1 /* Subroutines used for code generation on IBM RS/6000.
2    Copyright (C) 1991-2018 Free Software Foundation, Inc.
3    Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
4 
5    This file is part of GCC.
6 
7    GCC is free software; you can redistribute it and/or modify it
8    under the terms of the GNU General Public License as published
9    by the Free Software Foundation; either version 3, or (at your
10    option) any later version.
11 
12    GCC is distributed in the hope that it will be useful, but WITHOUT
13    ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14    or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
15    License for more details.
16 
17    You should have received a copy of the GNU General Public License
18    along with GCC; see the file COPYING3.  If not see
19    <http://www.gnu.org/licenses/>.  */
20 
21 #define IN_TARGET_CODE 1
22 
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "backend.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "memmodel.h"
30 #include "gimple.h"
31 #include "cfghooks.h"
32 #include "cfgloop.h"
33 #include "df.h"
34 #include "tm_p.h"
35 #include "stringpool.h"
36 #include "attribs.h"
37 #include "expmed.h"
38 #include "optabs.h"
39 #include "regs.h"
40 #include "ira.h"
41 #include "recog.h"
42 #include "cgraph.h"
43 #include "diagnostic-core.h"
44 #include "insn-attr.h"
45 #include "flags.h"
46 #include "alias.h"
47 #include "fold-const.h"
48 #include "stor-layout.h"
49 #include "calls.h"
50 #include "print-tree.h"
51 #include "varasm.h"
52 #include "explow.h"
53 #include "expr.h"
54 #include "output.h"
55 #include "dbxout.h"
56 #include "common/common-target.h"
57 #include "langhooks.h"
58 #include "reload.h"
59 #include "sched-int.h"
60 #include "gimplify.h"
61 #include "gimple-fold.h"
62 #include "gimple-iterator.h"
63 #include "gimple-ssa.h"
64 #include "gimple-walk.h"
65 #include "intl.h"
66 #include "params.h"
67 #include "tm-constrs.h"
68 #include "tree-vectorizer.h"
69 #include "target-globals.h"
70 #include "builtins.h"
71 #include "context.h"
72 #include "tree-pass.h"
73 #include "except.h"
74 #if TARGET_XCOFF
75 #include "xcoffout.h"  /* get declarations of xcoff_*_section_name */
76 #endif
77 #if TARGET_MACHO
78 #include "gstab.h"  /* for N_SLINE */
79 #endif
80 #include "case-cfn-macros.h"
81 #include "ppc-auxv.h"
82 #include "rtx-vector-builder.h"
83 
84 /* This file should be included last.  */
85 #include "target-def.h"
86 
87 #ifndef TARGET_NO_PROTOTYPE
88 #define TARGET_NO_PROTOTYPE 0
89 #endif
90 
91 #define min(A,B)	((A) < (B) ? (A) : (B))
92 #define max(A,B)	((A) > (B) ? (A) : (B))
93 
94 static pad_direction rs6000_function_arg_padding (machine_mode, const_tree);
95 
96 /* Structure used to define the rs6000 stack */
97 typedef struct rs6000_stack {
98   int reload_completed;		/* stack info won't change from here on */
99   int first_gp_reg_save;	/* first callee saved GP register used */
100   int first_fp_reg_save;	/* first callee saved FP register used */
101   int first_altivec_reg_save;	/* first callee saved AltiVec register used */
102   int lr_save_p;		/* true if the link reg needs to be saved */
103   int cr_save_p;		/* true if the CR reg needs to be saved */
104   unsigned int vrsave_mask;	/* mask of vec registers to save */
105   int push_p;			/* true if we need to allocate stack space */
106   int calls_p;			/* true if the function makes any calls */
107   int world_save_p;		/* true if we're saving *everything*:
108 				   r13-r31, cr, f14-f31, vrsave, v20-v31  */
109   enum rs6000_abi abi;		/* which ABI to use */
110   int gp_save_offset;		/* offset to save GP regs from initial SP */
111   int fp_save_offset;		/* offset to save FP regs from initial SP */
112   int altivec_save_offset;	/* offset to save AltiVec regs from initial SP */
113   int lr_save_offset;		/* offset to save LR from initial SP */
114   int cr_save_offset;		/* offset to save CR from initial SP */
115   int vrsave_save_offset;	/* offset to save VRSAVE from initial SP */
116   int spe_gp_save_offset;	/* offset to save spe 64-bit gprs  */
117   int varargs_save_offset;	/* offset to save the varargs registers */
118   int ehrd_offset;		/* offset to EH return data */
119   int ehcr_offset;		/* offset to EH CR field data */
120   int reg_size;			/* register size (4 or 8) */
121   HOST_WIDE_INT vars_size;	/* variable save area size */
122   int parm_size;		/* outgoing parameter size */
123   int save_size;		/* save area size */
124   int fixed_size;		/* fixed size of stack frame */
125   int gp_size;			/* size of saved GP registers */
126   int fp_size;			/* size of saved FP registers */
127   int altivec_size;		/* size of saved AltiVec registers */
128   int cr_size;			/* size to hold CR if not in fixed area */
129   int vrsave_size;		/* size to hold VRSAVE */
130   int altivec_padding_size;	/* size of altivec alignment padding */
131   int spe_gp_size;		/* size of 64-bit GPR save size for SPE */
132   int spe_padding_size;
133   HOST_WIDE_INT total_size;	/* total bytes allocated for stack */
134   int spe_64bit_regs_used;
135   int savres_strategy;
136 } rs6000_stack_t;
137 
138 /* A C structure for machine-specific, per-function data.
139    This is added to the cfun structure.  */
140 typedef struct GTY(()) machine_function
141 {
142   /* Whether the instruction chain has been scanned already.  */
143   int spe_insn_chain_scanned_p;
144   /* Flags if __builtin_return_address (n) with n >= 1 was used.  */
145   int ra_needs_full_frame;
146   /* Flags if __builtin_return_address (0) was used.  */
147   int ra_need_lr;
148   /* Cache lr_save_p after expansion of builtin_eh_return.  */
149   int lr_save_state;
150   /* Whether we need to save the TOC to the reserved stack location in the
151      function prologue.  */
152   bool save_toc_in_prologue;
153   /* Offset from virtual_stack_vars_rtx to the start of the ABI_V4
154      varargs save area.  */
155   HOST_WIDE_INT varargs_save_offset;
156   /* Temporary stack slot to use for SDmode copies.  This slot is
157      64-bits wide and is allocated early enough so that the offset
158      does not overflow the 16-bit load/store offset field.  */
159   rtx sdmode_stack_slot;
160   /* Alternative internal arg pointer for -fsplit-stack.  */
161   rtx split_stack_arg_pointer;
162   bool split_stack_argp_used;
163   /* Flag if r2 setup is needed with ELFv2 ABI.  */
164   bool r2_setup_needed;
165   /* The number of components we use for separate shrink-wrapping.  */
166   int n_components;
167   /* The components already handled by separate shrink-wrapping, which should
168      not be considered by the prologue and epilogue.  */
169   bool gpr_is_wrapped_separately[32];
170   bool fpr_is_wrapped_separately[32];
171   bool lr_is_wrapped_separately;
172 } machine_function;
173 
174 /* Support targetm.vectorize.builtin_mask_for_load.  */
175 static GTY(()) tree altivec_builtin_mask_for_load;
176 
177 /* Set to nonzero once AIX common-mode calls have been defined.  */
178 static GTY(()) int common_mode_defined;
179 
180 /* Label number of label created for -mrelocatable, to call to so we can
181    get the address of the GOT section */
182 static int rs6000_pic_labelno;
183 
184 #ifdef USING_ELFOS_H
185 /* Counter for labels which are to be placed in .fixup.  */
186 int fixuplabelno = 0;
187 #endif
188 
189 /* Whether to use variant of AIX ABI for PowerPC64 Linux.  */
190 int dot_symbols;
191 
192 /* Specify the machine mode that pointers have.  After generation of rtl, the
193    compiler makes no further distinction between pointers and any other objects
194    of this machine mode.  */
195 scalar_int_mode rs6000_pmode;
196 
197 /* Width in bits of a pointer.  */
198 unsigned rs6000_pointer_size;
199 
200 #ifdef HAVE_AS_GNU_ATTRIBUTE
201 # ifndef HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE
202 # define HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE 0
203 # endif
204 /* Flag whether floating point values have been passed/returned.
205    Note that this doesn't say whether fprs are used, since the
206    Tag_GNU_Power_ABI_FP .gnu.attributes value this flag controls
207    should be set for soft-float values passed in gprs and ieee128
208    values passed in vsx registers.  */
209 static bool rs6000_passes_float;
210 static bool rs6000_passes_long_double;
211 /* Flag whether vector values have been passed/returned.  */
212 static bool rs6000_passes_vector;
213 /* Flag whether small (<= 8 byte) structures have been returned.  */
214 static bool rs6000_returns_struct;
215 #endif
216 
217 /* Value is TRUE if register/mode pair is acceptable.  */
218 static bool rs6000_hard_regno_mode_ok_p
219   [NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER];
220 
221 /* Maximum number of registers needed for a given register class and mode.  */
222 unsigned char rs6000_class_max_nregs[NUM_MACHINE_MODES][LIM_REG_CLASSES];
223 
224 /* How many registers are needed for a given register and mode.  */
225 unsigned char rs6000_hard_regno_nregs[NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER];
226 
227 /* Map register number to register class.  */
228 enum reg_class rs6000_regno_regclass[FIRST_PSEUDO_REGISTER];
229 
230 static int dbg_cost_ctrl;
231 
232 /* Built in types.  */
233 tree rs6000_builtin_types[RS6000_BTI_MAX];
234 tree rs6000_builtin_decls[RS6000_BUILTIN_COUNT];
235 
236 /* Flag to say the TOC is initialized */
237 int toc_initialized, need_toc_init;
238 char toc_label_name[10];
239 
240 /* Cached value of rs6000_variable_issue. This is cached in
241    rs6000_variable_issue hook and returned from rs6000_sched_reorder2.  */
242 static short cached_can_issue_more;
243 
244 static GTY(()) section *read_only_data_section;
245 static GTY(()) section *private_data_section;
246 static GTY(()) section *tls_data_section;
247 static GTY(()) section *tls_private_data_section;
248 static GTY(()) section *read_only_private_data_section;
249 static GTY(()) section *sdata2_section;
250 static GTY(()) section *toc_section;
251 
252 struct builtin_description
253 {
254   const HOST_WIDE_INT mask;
255   const enum insn_code icode;
256   const char *const name;
257   const enum rs6000_builtins code;
258 };
259 
260 /* Describe the vector unit used for modes.  */
261 enum rs6000_vector rs6000_vector_unit[NUM_MACHINE_MODES];
262 enum rs6000_vector rs6000_vector_mem[NUM_MACHINE_MODES];
263 
264 /* Register classes for various constraints that are based on the target
265    switches.  */
266 enum reg_class rs6000_constraints[RS6000_CONSTRAINT_MAX];
267 
268 /* Describe the alignment of a vector.  */
269 int rs6000_vector_align[NUM_MACHINE_MODES];
270 
271 /* Map selected modes to types for builtins.  */
272 static GTY(()) tree builtin_mode_to_type[MAX_MACHINE_MODE][2];
273 
274 /* What modes to automatically generate reciprocal divide estimate (fre) and
275    reciprocal sqrt (frsqrte) for.  */
276 unsigned char rs6000_recip_bits[MAX_MACHINE_MODE];
277 
278 /* Masks to determine which reciprocal esitmate instructions to generate
279    automatically.  */
280 enum rs6000_recip_mask {
281   RECIP_SF_DIV		= 0x001,	/* Use divide estimate */
282   RECIP_DF_DIV		= 0x002,
283   RECIP_V4SF_DIV	= 0x004,
284   RECIP_V2DF_DIV	= 0x008,
285 
286   RECIP_SF_RSQRT	= 0x010,	/* Use reciprocal sqrt estimate.  */
287   RECIP_DF_RSQRT	= 0x020,
288   RECIP_V4SF_RSQRT	= 0x040,
289   RECIP_V2DF_RSQRT	= 0x080,
290 
291   /* Various combination of flags for -mrecip=xxx.  */
292   RECIP_NONE		= 0,
293   RECIP_ALL		= (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV
294 			   | RECIP_V2DF_DIV | RECIP_SF_RSQRT | RECIP_DF_RSQRT
295 			   | RECIP_V4SF_RSQRT | RECIP_V2DF_RSQRT),
296 
297   RECIP_HIGH_PRECISION	= RECIP_ALL,
298 
299   /* On low precision machines like the power5, don't enable double precision
300      reciprocal square root estimate, since it isn't accurate enough.  */
301   RECIP_LOW_PRECISION	= (RECIP_ALL & ~(RECIP_DF_RSQRT | RECIP_V2DF_RSQRT))
302 };
303 
304 /* -mrecip options.  */
305 static struct
306 {
307   const char *string;		/* option name */
308   unsigned int mask;		/* mask bits to set */
309 } recip_options[] = {
310   { "all",	 RECIP_ALL },
311   { "none",	 RECIP_NONE },
312   { "div",	 (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV
313 		  | RECIP_V2DF_DIV) },
314   { "divf",	 (RECIP_SF_DIV | RECIP_V4SF_DIV) },
315   { "divd",	 (RECIP_DF_DIV | RECIP_V2DF_DIV) },
316   { "rsqrt",	 (RECIP_SF_RSQRT | RECIP_DF_RSQRT | RECIP_V4SF_RSQRT
317 		  | RECIP_V2DF_RSQRT) },
318   { "rsqrtf",	 (RECIP_SF_RSQRT | RECIP_V4SF_RSQRT) },
319   { "rsqrtd",	 (RECIP_DF_RSQRT | RECIP_V2DF_RSQRT) },
320 };
321 
322 /* Used by __builtin_cpu_is(), mapping from PLATFORM names to values.  */
323 static const struct
324 {
325   const char *cpu;
326   unsigned int cpuid;
327 } cpu_is_info[] = {
328   { "power9",	   PPC_PLATFORM_POWER9 },
329   { "power8",	   PPC_PLATFORM_POWER8 },
330   { "power7",	   PPC_PLATFORM_POWER7 },
331   { "power6x",	   PPC_PLATFORM_POWER6X },
332   { "power6",	   PPC_PLATFORM_POWER6 },
333   { "power5+",	   PPC_PLATFORM_POWER5_PLUS },
334   { "power5",	   PPC_PLATFORM_POWER5 },
335   { "ppc970",	   PPC_PLATFORM_PPC970 },
336   { "power4",	   PPC_PLATFORM_POWER4 },
337   { "ppca2",	   PPC_PLATFORM_PPCA2 },
338   { "ppc476",	   PPC_PLATFORM_PPC476 },
339   { "ppc464",	   PPC_PLATFORM_PPC464 },
340   { "ppc440",	   PPC_PLATFORM_PPC440 },
341   { "ppc405",	   PPC_PLATFORM_PPC405 },
342   { "ppc-cell-be", PPC_PLATFORM_CELL_BE }
343 };
344 
345 /* Used by __builtin_cpu_supports(), mapping from HWCAP names to masks.  */
346 static const struct
347 {
348   const char *hwcap;
349   int mask;
350   unsigned int id;
351 } cpu_supports_info[] = {
352   /* AT_HWCAP masks.  */
353   { "4xxmac",		PPC_FEATURE_HAS_4xxMAC,		0 },
354   { "altivec",		PPC_FEATURE_HAS_ALTIVEC,	0 },
355   { "arch_2_05",	PPC_FEATURE_ARCH_2_05,		0 },
356   { "arch_2_06",	PPC_FEATURE_ARCH_2_06,		0 },
357   { "archpmu",		PPC_FEATURE_PERFMON_COMPAT,	0 },
358   { "booke",		PPC_FEATURE_BOOKE,		0 },
359   { "cellbe",		PPC_FEATURE_CELL_BE,		0 },
360   { "dfp",		PPC_FEATURE_HAS_DFP,		0 },
361   { "efpdouble",	PPC_FEATURE_HAS_EFP_DOUBLE,	0 },
362   { "efpsingle",	PPC_FEATURE_HAS_EFP_SINGLE,	0 },
363   { "fpu",		PPC_FEATURE_HAS_FPU,		0 },
364   { "ic_snoop",		PPC_FEATURE_ICACHE_SNOOP,	0 },
365   { "mmu",		PPC_FEATURE_HAS_MMU,		0 },
366   { "notb",		PPC_FEATURE_NO_TB,		0 },
367   { "pa6t",		PPC_FEATURE_PA6T,		0 },
368   { "power4",		PPC_FEATURE_POWER4,		0 },
369   { "power5",		PPC_FEATURE_POWER5,		0 },
370   { "power5+",		PPC_FEATURE_POWER5_PLUS,	0 },
371   { "power6x",		PPC_FEATURE_POWER6_EXT,		0 },
372   { "ppc32",		PPC_FEATURE_32,			0 },
373   { "ppc601",		PPC_FEATURE_601_INSTR,		0 },
374   { "ppc64",		PPC_FEATURE_64,			0 },
375   { "ppcle",		PPC_FEATURE_PPC_LE,		0 },
376   { "smt",		PPC_FEATURE_SMT,		0 },
377   { "spe",		PPC_FEATURE_HAS_SPE,		0 },
378   { "true_le",		PPC_FEATURE_TRUE_LE,		0 },
379   { "ucache",		PPC_FEATURE_UNIFIED_CACHE,	0 },
380   { "vsx",		PPC_FEATURE_HAS_VSX,		0 },
381 
382   /* AT_HWCAP2 masks.  */
383   { "arch_2_07",	PPC_FEATURE2_ARCH_2_07,		1 },
384   { "dscr",		PPC_FEATURE2_HAS_DSCR,		1 },
385   { "ebb",		PPC_FEATURE2_HAS_EBB,		1 },
386   { "htm",		PPC_FEATURE2_HAS_HTM,		1 },
387   { "htm-nosc",		PPC_FEATURE2_HTM_NOSC,		1 },
388   { "isel",		PPC_FEATURE2_HAS_ISEL,		1 },
389   { "tar",		PPC_FEATURE2_HAS_TAR,		1 },
390   { "vcrypto",		PPC_FEATURE2_HAS_VEC_CRYPTO,	1 },
391   { "arch_3_00",	PPC_FEATURE2_ARCH_3_00,		1 },
392   { "ieee128",		PPC_FEATURE2_HAS_IEEE128,	1 }
393 };
394 
395 /* Newer LIBCs explicitly export this symbol to declare that they provide
396    the AT_PLATFORM and AT_HWCAP/AT_HWCAP2 values in the TCB.  We emit a
397    reference to this symbol whenever we expand a CPU builtin, so that
398    we never link against an old LIBC.  */
399 const char *tcb_verification_symbol = "__parse_hwcap_and_convert_at_platform";
400 
401 /* True if we have expanded a CPU builtin.  */
402 bool cpu_builtin_p;
403 
404 /* Pointer to function (in powerpcspe-c.c) that can define or undefine target
405    macros that have changed.  Languages that don't support the preprocessor
406    don't link in powerpcspe-c.c, so we can't call it directly.  */
407 void (*rs6000_target_modify_macros_ptr) (bool, HOST_WIDE_INT, HOST_WIDE_INT);
408 
409 /* Simplfy register classes into simpler classifications.  We assume
410    GPR_REG_TYPE - FPR_REG_TYPE are ordered so that we can use a simple range
411    check for standard register classes (gpr/floating/altivec/vsx) and
412    floating/vector classes (float/altivec/vsx).  */
413 
414 enum rs6000_reg_type {
415   NO_REG_TYPE,
416   PSEUDO_REG_TYPE,
417   GPR_REG_TYPE,
418   VSX_REG_TYPE,
419   ALTIVEC_REG_TYPE,
420   FPR_REG_TYPE,
421   SPR_REG_TYPE,
422   CR_REG_TYPE,
423   SPE_ACC_TYPE,
424   SPEFSCR_REG_TYPE
425 };
426 
427 /* Map register class to register type.  */
428 static enum rs6000_reg_type reg_class_to_reg_type[N_REG_CLASSES];
429 
430 /* First/last register type for the 'normal' register types (i.e. general
431    purpose, floating point, altivec, and VSX registers).  */
432 #define IS_STD_REG_TYPE(RTYPE) IN_RANGE(RTYPE, GPR_REG_TYPE, FPR_REG_TYPE)
433 
434 #define IS_FP_VECT_REG_TYPE(RTYPE) IN_RANGE(RTYPE, VSX_REG_TYPE, FPR_REG_TYPE)
435 
436 
437 /* Register classes we care about in secondary reload or go if legitimate
438    address.  We only need to worry about GPR, FPR, and Altivec registers here,
439    along an ANY field that is the OR of the 3 register classes.  */
440 
441 enum rs6000_reload_reg_type {
442   RELOAD_REG_GPR,			/* General purpose registers.  */
443   RELOAD_REG_FPR,			/* Traditional floating point regs.  */
444   RELOAD_REG_VMX,			/* Altivec (VMX) registers.  */
445   RELOAD_REG_ANY,			/* OR of GPR, FPR, Altivec masks.  */
446   N_RELOAD_REG
447 };
448 
449 /* For setting up register classes, loop through the 3 register classes mapping
450    into real registers, and skip the ANY class, which is just an OR of the
451    bits.  */
452 #define FIRST_RELOAD_REG_CLASS	RELOAD_REG_GPR
453 #define LAST_RELOAD_REG_CLASS	RELOAD_REG_VMX
454 
455 /* Map reload register type to a register in the register class.  */
456 struct reload_reg_map_type {
457   const char *name;			/* Register class name.  */
458   int reg;				/* Register in the register class.  */
459 };
460 
461 static const struct reload_reg_map_type reload_reg_map[N_RELOAD_REG] = {
462   { "Gpr",	FIRST_GPR_REGNO },	/* RELOAD_REG_GPR.  */
463   { "Fpr",	FIRST_FPR_REGNO },	/* RELOAD_REG_FPR.  */
464   { "VMX",	FIRST_ALTIVEC_REGNO },	/* RELOAD_REG_VMX.  */
465   { "Any",	-1 },			/* RELOAD_REG_ANY.  */
466 };
467 
468 /* Mask bits for each register class, indexed per mode.  Historically the
469    compiler has been more restrictive which types can do PRE_MODIFY instead of
470    PRE_INC and PRE_DEC, so keep track of sepaate bits for these two.  */
471 typedef unsigned char addr_mask_type;
472 
473 #define RELOAD_REG_VALID	0x01	/* Mode valid in register..  */
474 #define RELOAD_REG_MULTIPLE	0x02	/* Mode takes multiple registers.  */
475 #define RELOAD_REG_INDEXED	0x04	/* Reg+reg addressing.  */
476 #define RELOAD_REG_OFFSET	0x08	/* Reg+offset addressing. */
477 #define RELOAD_REG_PRE_INCDEC	0x10	/* PRE_INC/PRE_DEC valid.  */
478 #define RELOAD_REG_PRE_MODIFY	0x20	/* PRE_MODIFY valid.  */
479 #define RELOAD_REG_AND_M16	0x40	/* AND -16 addressing.  */
480 #define RELOAD_REG_QUAD_OFFSET	0x80	/* quad offset is limited.  */
481 
482 /* Register type masks based on the type, of valid addressing modes.  */
483 struct rs6000_reg_addr {
484   enum insn_code reload_load;		/* INSN to reload for loading. */
485   enum insn_code reload_store;		/* INSN to reload for storing.  */
486   enum insn_code reload_fpr_gpr;	/* INSN to move from FPR to GPR.  */
487   enum insn_code reload_gpr_vsx;	/* INSN to move from GPR to VSX.  */
488   enum insn_code reload_vsx_gpr;	/* INSN to move from VSX to GPR.  */
489   enum insn_code fusion_gpr_ld;		/* INSN for fusing gpr ADDIS/loads.  */
490 					/* INSNs for fusing addi with loads
491 					   or stores for each reg. class.  */
492   enum insn_code fusion_addi_ld[(int)N_RELOAD_REG];
493   enum insn_code fusion_addi_st[(int)N_RELOAD_REG];
494 					/* INSNs for fusing addis with loads
495 					   or stores for each reg. class.  */
496   enum insn_code fusion_addis_ld[(int)N_RELOAD_REG];
497   enum insn_code fusion_addis_st[(int)N_RELOAD_REG];
498   addr_mask_type addr_mask[(int)N_RELOAD_REG]; /* Valid address masks.  */
499   bool scalar_in_vmx_p;			/* Scalar value can go in VMX.  */
500   bool fused_toc;			/* Mode supports TOC fusion.  */
501 };
502 
503 static struct rs6000_reg_addr reg_addr[NUM_MACHINE_MODES];
504 
505 /* Helper function to say whether a mode supports PRE_INC or PRE_DEC.  */
506 static inline bool
mode_supports_pre_incdec_p(machine_mode mode)507 mode_supports_pre_incdec_p (machine_mode mode)
508 {
509   return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_INCDEC)
510 	  != 0);
511 }
512 
513 /* Helper function to say whether a mode supports PRE_MODIFY.  */
514 static inline bool
mode_supports_pre_modify_p(machine_mode mode)515 mode_supports_pre_modify_p (machine_mode mode)
516 {
517   return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_MODIFY)
518 	  != 0);
519 }
520 
521 /* Given that there exists at least one variable that is set (produced)
522    by OUT_INSN and read (consumed) by IN_INSN, return true iff
523    IN_INSN represents one or more memory store operations and none of
524    the variables set by OUT_INSN is used by IN_INSN as the address of a
525    store operation.  If either IN_INSN or OUT_INSN does not represent
526    a "single" RTL SET expression (as loosely defined by the
527    implementation of the single_set function) or a PARALLEL with only
528    SETs, CLOBBERs, and USEs inside, this function returns false.
529 
530    This rs6000-specific version of store_data_bypass_p checks for
531    certain conditions that result in assertion failures (and internal
532    compiler errors) in the generic store_data_bypass_p function and
533    returns false rather than calling store_data_bypass_p if one of the
534    problematic conditions is detected.  */
535 
536 int
rs6000_store_data_bypass_p(rtx_insn * out_insn,rtx_insn * in_insn)537 rs6000_store_data_bypass_p (rtx_insn *out_insn, rtx_insn *in_insn)
538 {
539   rtx out_set, in_set;
540   rtx out_pat, in_pat;
541   rtx out_exp, in_exp;
542   int i, j;
543 
544   in_set = single_set (in_insn);
545   if (in_set)
546     {
547       if (MEM_P (SET_DEST (in_set)))
548 	{
549 	  out_set = single_set (out_insn);
550 	  if (!out_set)
551 	    {
552 	      out_pat = PATTERN (out_insn);
553 	      if (GET_CODE (out_pat) == PARALLEL)
554 		{
555 		  for (i = 0; i < XVECLEN (out_pat, 0); i++)
556 		    {
557 		      out_exp = XVECEXP (out_pat, 0, i);
558 		      if ((GET_CODE (out_exp) == CLOBBER)
559 			  || (GET_CODE (out_exp) == USE))
560 			continue;
561 		      else if (GET_CODE (out_exp) != SET)
562 			return false;
563 		    }
564 		}
565 	    }
566 	}
567     }
568   else
569     {
570       in_pat = PATTERN (in_insn);
571       if (GET_CODE (in_pat) != PARALLEL)
572 	return false;
573 
574       for (i = 0; i < XVECLEN (in_pat, 0); i++)
575 	{
576 	  in_exp = XVECEXP (in_pat, 0, i);
577 	  if ((GET_CODE (in_exp) == CLOBBER) || (GET_CODE (in_exp) == USE))
578 	    continue;
579 	  else if (GET_CODE (in_exp) != SET)
580 	    return false;
581 
582 	  if (MEM_P (SET_DEST (in_exp)))
583 	    {
584 	      out_set = single_set (out_insn);
585 	      if (!out_set)
586 		{
587 		  out_pat = PATTERN (out_insn);
588 		  if (GET_CODE (out_pat) != PARALLEL)
589 		    return false;
590 		  for (j = 0; j < XVECLEN (out_pat, 0); j++)
591 		    {
592 		      out_exp = XVECEXP (out_pat, 0, j);
593 		      if ((GET_CODE (out_exp) == CLOBBER)
594 			  || (GET_CODE (out_exp) == USE))
595 			continue;
596 		      else if (GET_CODE (out_exp) != SET)
597 			return false;
598 		    }
599 		}
600 	    }
601 	}
602     }
603   return store_data_bypass_p (out_insn, in_insn);
604 }
605 
606 /* Return true if we have D-form addressing in altivec registers.  */
607 static inline bool
mode_supports_vmx_dform(machine_mode mode)608 mode_supports_vmx_dform (machine_mode mode)
609 {
610   return ((reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_OFFSET) != 0);
611 }
612 
613 /* Return true if we have D-form addressing in VSX registers.  This addressing
614    is more limited than normal d-form addressing in that the offset must be
615    aligned on a 16-byte boundary.  */
616 static inline bool
mode_supports_vsx_dform_quad(machine_mode mode)617 mode_supports_vsx_dform_quad (machine_mode mode)
618 {
619   return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_QUAD_OFFSET)
620 	  != 0);
621 }
622 
623 
624 /* Target cpu costs.  */
625 
626 struct processor_costs {
627   const int mulsi;	  /* cost of SImode multiplication.  */
628   const int mulsi_const;  /* cost of SImode multiplication by constant.  */
629   const int mulsi_const9; /* cost of SImode mult by short constant.  */
630   const int muldi;	  /* cost of DImode multiplication.  */
631   const int divsi;	  /* cost of SImode division.  */
632   const int divdi;	  /* cost of DImode division.  */
633   const int fp;		  /* cost of simple SFmode and DFmode insns.  */
634   const int dmul;	  /* cost of DFmode multiplication (and fmadd).  */
635   const int sdiv;	  /* cost of SFmode division (fdivs).  */
636   const int ddiv;	  /* cost of DFmode division (fdiv).  */
637   const int cache_line_size;    /* cache line size in bytes. */
638   const int l1_cache_size;	/* size of l1 cache, in kilobytes.  */
639   const int l2_cache_size;	/* size of l2 cache, in kilobytes.  */
640   const int simultaneous_prefetches; /* number of parallel prefetch
641 					operations.  */
642   const int sfdf_convert;	/* cost of SF->DF conversion.  */
643 };
644 
645 const struct processor_costs *rs6000_cost;
646 
647 /* Processor costs (relative to an add) */
648 
649 /* Instruction size costs on 32bit processors.  */
650 static const
651 struct processor_costs size32_cost = {
652   COSTS_N_INSNS (1),    /* mulsi */
653   COSTS_N_INSNS (1),    /* mulsi_const */
654   COSTS_N_INSNS (1),    /* mulsi_const9 */
655   COSTS_N_INSNS (1),    /* muldi */
656   COSTS_N_INSNS (1),    /* divsi */
657   COSTS_N_INSNS (1),    /* divdi */
658   COSTS_N_INSNS (1),    /* fp */
659   COSTS_N_INSNS (1),    /* dmul */
660   COSTS_N_INSNS (1),    /* sdiv */
661   COSTS_N_INSNS (1),    /* ddiv */
662   32,			/* cache line size */
663   0,			/* l1 cache */
664   0,			/* l2 cache */
665   0,			/* streams */
666   0,			/* SF->DF convert */
667 };
668 
669 /* Instruction size costs on 64bit processors.  */
670 static const
671 struct processor_costs size64_cost = {
672   COSTS_N_INSNS (1),    /* mulsi */
673   COSTS_N_INSNS (1),    /* mulsi_const */
674   COSTS_N_INSNS (1),    /* mulsi_const9 */
675   COSTS_N_INSNS (1),    /* muldi */
676   COSTS_N_INSNS (1),    /* divsi */
677   COSTS_N_INSNS (1),    /* divdi */
678   COSTS_N_INSNS (1),    /* fp */
679   COSTS_N_INSNS (1),    /* dmul */
680   COSTS_N_INSNS (1),    /* sdiv */
681   COSTS_N_INSNS (1),    /* ddiv */
682   128,			/* cache line size */
683   0,			/* l1 cache */
684   0,			/* l2 cache */
685   0,			/* streams */
686   0,			/* SF->DF convert */
687 };
688 
689 /* Instruction costs on RS64A processors.  */
690 static const
691 struct processor_costs rs64a_cost = {
692   COSTS_N_INSNS (20),   /* mulsi */
693   COSTS_N_INSNS (12),   /* mulsi_const */
694   COSTS_N_INSNS (8),    /* mulsi_const9 */
695   COSTS_N_INSNS (34),   /* muldi */
696   COSTS_N_INSNS (65),   /* divsi */
697   COSTS_N_INSNS (67),   /* divdi */
698   COSTS_N_INSNS (4),    /* fp */
699   COSTS_N_INSNS (4),    /* dmul */
700   COSTS_N_INSNS (31),   /* sdiv */
701   COSTS_N_INSNS (31),   /* ddiv */
702   128,			/* cache line size */
703   128,			/* l1 cache */
704   2048,			/* l2 cache */
705   1,			/* streams */
706   0,			/* SF->DF convert */
707 };
708 
709 /* Instruction costs on MPCCORE processors.  */
710 static const
711 struct processor_costs mpccore_cost = {
712   COSTS_N_INSNS (2),    /* mulsi */
713   COSTS_N_INSNS (2),    /* mulsi_const */
714   COSTS_N_INSNS (2),    /* mulsi_const9 */
715   COSTS_N_INSNS (2),    /* muldi */
716   COSTS_N_INSNS (6),    /* divsi */
717   COSTS_N_INSNS (6),    /* divdi */
718   COSTS_N_INSNS (4),    /* fp */
719   COSTS_N_INSNS (5),    /* dmul */
720   COSTS_N_INSNS (10),   /* sdiv */
721   COSTS_N_INSNS (17),   /* ddiv */
722   32,			/* cache line size */
723   4,			/* l1 cache */
724   16,			/* l2 cache */
725   1,			/* streams */
726   0,			/* SF->DF convert */
727 };
728 
729 /* Instruction costs on PPC403 processors.  */
730 static const
731 struct processor_costs ppc403_cost = {
732   COSTS_N_INSNS (4),    /* mulsi */
733   COSTS_N_INSNS (4),    /* mulsi_const */
734   COSTS_N_INSNS (4),    /* mulsi_const9 */
735   COSTS_N_INSNS (4),    /* muldi */
736   COSTS_N_INSNS (33),   /* divsi */
737   COSTS_N_INSNS (33),   /* divdi */
738   COSTS_N_INSNS (11),   /* fp */
739   COSTS_N_INSNS (11),   /* dmul */
740   COSTS_N_INSNS (11),   /* sdiv */
741   COSTS_N_INSNS (11),   /* ddiv */
742   32,			/* cache line size */
743   4,			/* l1 cache */
744   16,			/* l2 cache */
745   1,			/* streams */
746   0,			/* SF->DF convert */
747 };
748 
749 /* Instruction costs on PPC405 processors.  */
750 static const
751 struct processor_costs ppc405_cost = {
752   COSTS_N_INSNS (5),    /* mulsi */
753   COSTS_N_INSNS (4),    /* mulsi_const */
754   COSTS_N_INSNS (3),    /* mulsi_const9 */
755   COSTS_N_INSNS (5),    /* muldi */
756   COSTS_N_INSNS (35),   /* divsi */
757   COSTS_N_INSNS (35),   /* divdi */
758   COSTS_N_INSNS (11),   /* fp */
759   COSTS_N_INSNS (11),   /* dmul */
760   COSTS_N_INSNS (11),   /* sdiv */
761   COSTS_N_INSNS (11),   /* ddiv */
762   32,			/* cache line size */
763   16,			/* l1 cache */
764   128,			/* l2 cache */
765   1,			/* streams */
766   0,			/* SF->DF convert */
767 };
768 
769 /* Instruction costs on PPC440 processors.  */
770 static const
771 struct processor_costs ppc440_cost = {
772   COSTS_N_INSNS (3),    /* mulsi */
773   COSTS_N_INSNS (2),    /* mulsi_const */
774   COSTS_N_INSNS (2),    /* mulsi_const9 */
775   COSTS_N_INSNS (3),    /* muldi */
776   COSTS_N_INSNS (34),   /* divsi */
777   COSTS_N_INSNS (34),   /* divdi */
778   COSTS_N_INSNS (5),    /* fp */
779   COSTS_N_INSNS (5),    /* dmul */
780   COSTS_N_INSNS (19),   /* sdiv */
781   COSTS_N_INSNS (33),   /* ddiv */
782   32,			/* cache line size */
783   32,			/* l1 cache */
784   256,			/* l2 cache */
785   1,			/* streams */
786   0,			/* SF->DF convert */
787 };
788 
789 /* Instruction costs on PPC476 processors.  */
790 static const
791 struct processor_costs ppc476_cost = {
792   COSTS_N_INSNS (4),    /* mulsi */
793   COSTS_N_INSNS (4),    /* mulsi_const */
794   COSTS_N_INSNS (4),    /* mulsi_const9 */
795   COSTS_N_INSNS (4),    /* muldi */
796   COSTS_N_INSNS (11),   /* divsi */
797   COSTS_N_INSNS (11),   /* divdi */
798   COSTS_N_INSNS (6),    /* fp */
799   COSTS_N_INSNS (6),    /* dmul */
800   COSTS_N_INSNS (19),   /* sdiv */
801   COSTS_N_INSNS (33),   /* ddiv */
802   32,			/* l1 cache line size */
803   32,			/* l1 cache */
804   512,			/* l2 cache */
805   1,			/* streams */
806   0,			/* SF->DF convert */
807 };
808 
809 /* Instruction costs on PPC601 processors.  */
810 static const
811 struct processor_costs ppc601_cost = {
812   COSTS_N_INSNS (5),    /* mulsi */
813   COSTS_N_INSNS (5),    /* mulsi_const */
814   COSTS_N_INSNS (5),    /* mulsi_const9 */
815   COSTS_N_INSNS (5),    /* muldi */
816   COSTS_N_INSNS (36),   /* divsi */
817   COSTS_N_INSNS (36),   /* divdi */
818   COSTS_N_INSNS (4),    /* fp */
819   COSTS_N_INSNS (5),    /* dmul */
820   COSTS_N_INSNS (17),   /* sdiv */
821   COSTS_N_INSNS (31),   /* ddiv */
822   32,			/* cache line size */
823   32,			/* l1 cache */
824   256,			/* l2 cache */
825   1,			/* streams */
826   0,			/* SF->DF convert */
827 };
828 
829 /* Instruction costs on PPC603 processors.  */
830 static const
831 struct processor_costs ppc603_cost = {
832   COSTS_N_INSNS (5),    /* mulsi */
833   COSTS_N_INSNS (3),    /* mulsi_const */
834   COSTS_N_INSNS (2),    /* mulsi_const9 */
835   COSTS_N_INSNS (5),    /* muldi */
836   COSTS_N_INSNS (37),   /* divsi */
837   COSTS_N_INSNS (37),   /* divdi */
838   COSTS_N_INSNS (3),    /* fp */
839   COSTS_N_INSNS (4),    /* dmul */
840   COSTS_N_INSNS (18),   /* sdiv */
841   COSTS_N_INSNS (33),   /* ddiv */
842   32,			/* cache line size */
843   8,			/* l1 cache */
844   64,			/* l2 cache */
845   1,			/* streams */
846   0,			/* SF->DF convert */
847 };
848 
849 /* Instruction costs on PPC604 processors.  */
850 static const
851 struct processor_costs ppc604_cost = {
852   COSTS_N_INSNS (4),    /* mulsi */
853   COSTS_N_INSNS (4),    /* mulsi_const */
854   COSTS_N_INSNS (4),    /* mulsi_const9 */
855   COSTS_N_INSNS (4),    /* muldi */
856   COSTS_N_INSNS (20),   /* divsi */
857   COSTS_N_INSNS (20),   /* divdi */
858   COSTS_N_INSNS (3),    /* fp */
859   COSTS_N_INSNS (3),    /* dmul */
860   COSTS_N_INSNS (18),   /* sdiv */
861   COSTS_N_INSNS (32),   /* ddiv */
862   32,			/* cache line size */
863   16,			/* l1 cache */
864   512,			/* l2 cache */
865   1,			/* streams */
866   0,			/* SF->DF convert */
867 };
868 
869 /* Instruction costs on PPC604e processors.  */
870 static const
871 struct processor_costs ppc604e_cost = {
872   COSTS_N_INSNS (2),    /* mulsi */
873   COSTS_N_INSNS (2),    /* mulsi_const */
874   COSTS_N_INSNS (2),    /* mulsi_const9 */
875   COSTS_N_INSNS (2),    /* muldi */
876   COSTS_N_INSNS (20),   /* divsi */
877   COSTS_N_INSNS (20),   /* divdi */
878   COSTS_N_INSNS (3),    /* fp */
879   COSTS_N_INSNS (3),    /* dmul */
880   COSTS_N_INSNS (18),   /* sdiv */
881   COSTS_N_INSNS (32),   /* ddiv */
882   32,			/* cache line size */
883   32,			/* l1 cache */
884   1024,			/* l2 cache */
885   1,			/* streams */
886   0,			/* SF->DF convert */
887 };
888 
889 /* Instruction costs on PPC620 processors.  */
890 static const
891 struct processor_costs ppc620_cost = {
892   COSTS_N_INSNS (5),    /* mulsi */
893   COSTS_N_INSNS (4),    /* mulsi_const */
894   COSTS_N_INSNS (3),    /* mulsi_const9 */
895   COSTS_N_INSNS (7),    /* muldi */
896   COSTS_N_INSNS (21),   /* divsi */
897   COSTS_N_INSNS (37),   /* divdi */
898   COSTS_N_INSNS (3),    /* fp */
899   COSTS_N_INSNS (3),    /* dmul */
900   COSTS_N_INSNS (18),   /* sdiv */
901   COSTS_N_INSNS (32),   /* ddiv */
902   128,			/* cache line size */
903   32,			/* l1 cache */
904   1024,			/* l2 cache */
905   1,			/* streams */
906   0,			/* SF->DF convert */
907 };
908 
909 /* Instruction costs on PPC630 processors.  */
910 static const
911 struct processor_costs ppc630_cost = {
912   COSTS_N_INSNS (5),    /* mulsi */
913   COSTS_N_INSNS (4),    /* mulsi_const */
914   COSTS_N_INSNS (3),    /* mulsi_const9 */
915   COSTS_N_INSNS (7),    /* muldi */
916   COSTS_N_INSNS (21),   /* divsi */
917   COSTS_N_INSNS (37),   /* divdi */
918   COSTS_N_INSNS (3),    /* fp */
919   COSTS_N_INSNS (3),    /* dmul */
920   COSTS_N_INSNS (17),   /* sdiv */
921   COSTS_N_INSNS (21),   /* ddiv */
922   128,			/* cache line size */
923   64,			/* l1 cache */
924   1024,			/* l2 cache */
925   1,			/* streams */
926   0,			/* SF->DF convert */
927 };
928 
929 /* Instruction costs on Cell processor.  */
930 /* COSTS_N_INSNS (1) ~ one add.  */
931 static const
932 struct processor_costs ppccell_cost = {
933   COSTS_N_INSNS (9/2)+2,    /* mulsi */
934   COSTS_N_INSNS (6/2),    /* mulsi_const */
935   COSTS_N_INSNS (6/2),    /* mulsi_const9 */
936   COSTS_N_INSNS (15/2)+2,   /* muldi */
937   COSTS_N_INSNS (38/2),   /* divsi */
938   COSTS_N_INSNS (70/2),   /* divdi */
939   COSTS_N_INSNS (10/2),   /* fp */
940   COSTS_N_INSNS (10/2),   /* dmul */
941   COSTS_N_INSNS (74/2),   /* sdiv */
942   COSTS_N_INSNS (74/2),   /* ddiv */
943   128,			/* cache line size */
944   32,			/* l1 cache */
945   512,			/* l2 cache */
946   6,			/* streams */
947   0,			/* SF->DF convert */
948 };
949 
950 /* Instruction costs on PPC750 and PPC7400 processors.  */
951 static const
952 struct processor_costs ppc750_cost = {
953   COSTS_N_INSNS (5),    /* mulsi */
954   COSTS_N_INSNS (3),    /* mulsi_const */
955   COSTS_N_INSNS (2),    /* mulsi_const9 */
956   COSTS_N_INSNS (5),    /* muldi */
957   COSTS_N_INSNS (17),   /* divsi */
958   COSTS_N_INSNS (17),   /* divdi */
959   COSTS_N_INSNS (3),    /* fp */
960   COSTS_N_INSNS (3),    /* dmul */
961   COSTS_N_INSNS (17),   /* sdiv */
962   COSTS_N_INSNS (31),   /* ddiv */
963   32,			/* cache line size */
964   32,			/* l1 cache */
965   512,			/* l2 cache */
966   1,			/* streams */
967   0,			/* SF->DF convert */
968 };
969 
970 /* Instruction costs on PPC7450 processors.  */
971 static const
972 struct processor_costs ppc7450_cost = {
973   COSTS_N_INSNS (4),    /* mulsi */
974   COSTS_N_INSNS (3),    /* mulsi_const */
975   COSTS_N_INSNS (3),    /* mulsi_const9 */
976   COSTS_N_INSNS (4),    /* muldi */
977   COSTS_N_INSNS (23),   /* divsi */
978   COSTS_N_INSNS (23),   /* divdi */
979   COSTS_N_INSNS (5),    /* fp */
980   COSTS_N_INSNS (5),    /* dmul */
981   COSTS_N_INSNS (21),   /* sdiv */
982   COSTS_N_INSNS (35),   /* ddiv */
983   32,			/* cache line size */
984   32,			/* l1 cache */
985   1024,			/* l2 cache */
986   1,			/* streams */
987   0,			/* SF->DF convert */
988 };
989 
990 /* Instruction costs on PPC8540 processors.  */
991 static const
992 struct processor_costs ppc8540_cost = {
993   COSTS_N_INSNS (4),    /* mulsi */
994   COSTS_N_INSNS (4),    /* mulsi_const */
995   COSTS_N_INSNS (4),    /* mulsi_const9 */
996   COSTS_N_INSNS (4),    /* muldi */
997   COSTS_N_INSNS (19),   /* divsi */
998   COSTS_N_INSNS (19),   /* divdi */
999   COSTS_N_INSNS (4),    /* fp */
1000   COSTS_N_INSNS (4),    /* dmul */
1001   COSTS_N_INSNS (29),   /* sdiv */
1002   COSTS_N_INSNS (29),   /* ddiv */
1003   32,			/* cache line size */
1004   32,			/* l1 cache */
1005   256,			/* l2 cache */
1006   1,			/* prefetch streams /*/
1007   0,			/* SF->DF convert */
1008 };
1009 
1010 /* Instruction costs on E300C2 and E300C3 cores.  */
1011 static const
1012 struct processor_costs ppce300c2c3_cost = {
1013   COSTS_N_INSNS (4),    /* mulsi */
1014   COSTS_N_INSNS (4),    /* mulsi_const */
1015   COSTS_N_INSNS (4),    /* mulsi_const9 */
1016   COSTS_N_INSNS (4),    /* muldi */
1017   COSTS_N_INSNS (19),   /* divsi */
1018   COSTS_N_INSNS (19),   /* divdi */
1019   COSTS_N_INSNS (3),    /* fp */
1020   COSTS_N_INSNS (4),    /* dmul */
1021   COSTS_N_INSNS (18),   /* sdiv */
1022   COSTS_N_INSNS (33),   /* ddiv */
1023   32,
1024   16,			/* l1 cache */
1025   16,			/* l2 cache */
1026   1,			/* prefetch streams /*/
1027   0,			/* SF->DF convert */
1028 };
1029 
1030 /* Instruction costs on PPCE500MC processors.  */
1031 static const
1032 struct processor_costs ppce500mc_cost = {
1033   COSTS_N_INSNS (4),    /* mulsi */
1034   COSTS_N_INSNS (4),    /* mulsi_const */
1035   COSTS_N_INSNS (4),    /* mulsi_const9 */
1036   COSTS_N_INSNS (4),    /* muldi */
1037   COSTS_N_INSNS (14),   /* divsi */
1038   COSTS_N_INSNS (14),   /* divdi */
1039   COSTS_N_INSNS (8),    /* fp */
1040   COSTS_N_INSNS (10),   /* dmul */
1041   COSTS_N_INSNS (36),   /* sdiv */
1042   COSTS_N_INSNS (66),   /* ddiv */
1043   64,			/* cache line size */
1044   32,			/* l1 cache */
1045   128,			/* l2 cache */
1046   1,			/* prefetch streams /*/
1047   0,			/* SF->DF convert */
1048 };
1049 
1050 /* Instruction costs on PPCE500MC64 processors.  */
1051 static const
1052 struct processor_costs ppce500mc64_cost = {
1053   COSTS_N_INSNS (4),    /* mulsi */
1054   COSTS_N_INSNS (4),    /* mulsi_const */
1055   COSTS_N_INSNS (4),    /* mulsi_const9 */
1056   COSTS_N_INSNS (4),    /* muldi */
1057   COSTS_N_INSNS (14),   /* divsi */
1058   COSTS_N_INSNS (14),   /* divdi */
1059   COSTS_N_INSNS (4),    /* fp */
1060   COSTS_N_INSNS (10),   /* dmul */
1061   COSTS_N_INSNS (36),   /* sdiv */
1062   COSTS_N_INSNS (66),   /* ddiv */
1063   64,			/* cache line size */
1064   32,			/* l1 cache */
1065   128,			/* l2 cache */
1066   1,			/* prefetch streams /*/
1067   0,			/* SF->DF convert */
1068 };
1069 
1070 /* Instruction costs on PPCE5500 processors.  */
1071 static const
1072 struct processor_costs ppce5500_cost = {
1073   COSTS_N_INSNS (5),    /* mulsi */
1074   COSTS_N_INSNS (5),    /* mulsi_const */
1075   COSTS_N_INSNS (4),    /* mulsi_const9 */
1076   COSTS_N_INSNS (5),    /* muldi */
1077   COSTS_N_INSNS (14),   /* divsi */
1078   COSTS_N_INSNS (14),   /* divdi */
1079   COSTS_N_INSNS (7),    /* fp */
1080   COSTS_N_INSNS (10),   /* dmul */
1081   COSTS_N_INSNS (36),   /* sdiv */
1082   COSTS_N_INSNS (66),   /* ddiv */
1083   64,			/* cache line size */
1084   32,			/* l1 cache */
1085   128,			/* l2 cache */
1086   1,			/* prefetch streams /*/
1087   0,			/* SF->DF convert */
1088 };
1089 
1090 /* Instruction costs on PPCE6500 processors.  */
1091 static const
1092 struct processor_costs ppce6500_cost = {
1093   COSTS_N_INSNS (5),    /* mulsi */
1094   COSTS_N_INSNS (5),    /* mulsi_const */
1095   COSTS_N_INSNS (4),    /* mulsi_const9 */
1096   COSTS_N_INSNS (5),    /* muldi */
1097   COSTS_N_INSNS (14),   /* divsi */
1098   COSTS_N_INSNS (14),   /* divdi */
1099   COSTS_N_INSNS (7),    /* fp */
1100   COSTS_N_INSNS (10),   /* dmul */
1101   COSTS_N_INSNS (36),   /* sdiv */
1102   COSTS_N_INSNS (66),   /* ddiv */
1103   64,			/* cache line size */
1104   32,			/* l1 cache */
1105   128,			/* l2 cache */
1106   1,			/* prefetch streams /*/
1107   0,			/* SF->DF convert */
1108 };
1109 
1110 /* Instruction costs on AppliedMicro Titan processors.  */
1111 static const
1112 struct processor_costs titan_cost = {
1113   COSTS_N_INSNS (5),    /* mulsi */
1114   COSTS_N_INSNS (5),    /* mulsi_const */
1115   COSTS_N_INSNS (5),    /* mulsi_const9 */
1116   COSTS_N_INSNS (5),    /* muldi */
1117   COSTS_N_INSNS (18),   /* divsi */
1118   COSTS_N_INSNS (18),   /* divdi */
1119   COSTS_N_INSNS (10),   /* fp */
1120   COSTS_N_INSNS (10),   /* dmul */
1121   COSTS_N_INSNS (46),   /* sdiv */
1122   COSTS_N_INSNS (72),   /* ddiv */
1123   32,			/* cache line size */
1124   32,			/* l1 cache */
1125   512,			/* l2 cache */
1126   1,			/* prefetch streams /*/
1127   0,			/* SF->DF convert */
1128 };
1129 
1130 /* Instruction costs on POWER4 and POWER5 processors.  */
1131 static const
1132 struct processor_costs power4_cost = {
1133   COSTS_N_INSNS (3),    /* mulsi */
1134   COSTS_N_INSNS (2),    /* mulsi_const */
1135   COSTS_N_INSNS (2),    /* mulsi_const9 */
1136   COSTS_N_INSNS (4),    /* muldi */
1137   COSTS_N_INSNS (18),   /* divsi */
1138   COSTS_N_INSNS (34),   /* divdi */
1139   COSTS_N_INSNS (3),    /* fp */
1140   COSTS_N_INSNS (3),    /* dmul */
1141   COSTS_N_INSNS (17),   /* sdiv */
1142   COSTS_N_INSNS (17),   /* ddiv */
1143   128,			/* cache line size */
1144   32,			/* l1 cache */
1145   1024,			/* l2 cache */
1146   8,			/* prefetch streams /*/
1147   0,			/* SF->DF convert */
1148 };
1149 
1150 /* Instruction costs on POWER6 processors.  */
1151 static const
1152 struct processor_costs power6_cost = {
1153   COSTS_N_INSNS (8),    /* mulsi */
1154   COSTS_N_INSNS (8),    /* mulsi_const */
1155   COSTS_N_INSNS (8),    /* mulsi_const9 */
1156   COSTS_N_INSNS (8),    /* muldi */
1157   COSTS_N_INSNS (22),   /* divsi */
1158   COSTS_N_INSNS (28),   /* divdi */
1159   COSTS_N_INSNS (3),    /* fp */
1160   COSTS_N_INSNS (3),    /* dmul */
1161   COSTS_N_INSNS (13),   /* sdiv */
1162   COSTS_N_INSNS (16),   /* ddiv */
1163   128,			/* cache line size */
1164   64,			/* l1 cache */
1165   2048,			/* l2 cache */
1166   16,			/* prefetch streams */
1167   0,			/* SF->DF convert */
1168 };
1169 
1170 /* Instruction costs on POWER7 processors.  */
1171 static const
1172 struct processor_costs power7_cost = {
1173   COSTS_N_INSNS (2),	/* mulsi */
1174   COSTS_N_INSNS (2),	/* mulsi_const */
1175   COSTS_N_INSNS (2),	/* mulsi_const9 */
1176   COSTS_N_INSNS (2),	/* muldi */
1177   COSTS_N_INSNS (18),	/* divsi */
1178   COSTS_N_INSNS (34),	/* divdi */
1179   COSTS_N_INSNS (3),	/* fp */
1180   COSTS_N_INSNS (3),	/* dmul */
1181   COSTS_N_INSNS (13),	/* sdiv */
1182   COSTS_N_INSNS (16),	/* ddiv */
1183   128,			/* cache line size */
1184   32,			/* l1 cache */
1185   256,			/* l2 cache */
1186   12,			/* prefetch streams */
1187   COSTS_N_INSNS (3),	/* SF->DF convert */
1188 };
1189 
1190 /* Instruction costs on POWER8 processors.  */
1191 static const
1192 struct processor_costs power8_cost = {
1193   COSTS_N_INSNS (3),	/* mulsi */
1194   COSTS_N_INSNS (3),	/* mulsi_const */
1195   COSTS_N_INSNS (3),	/* mulsi_const9 */
1196   COSTS_N_INSNS (3),	/* muldi */
1197   COSTS_N_INSNS (19),	/* divsi */
1198   COSTS_N_INSNS (35),	/* divdi */
1199   COSTS_N_INSNS (3),	/* fp */
1200   COSTS_N_INSNS (3),	/* dmul */
1201   COSTS_N_INSNS (14),	/* sdiv */
1202   COSTS_N_INSNS (17),	/* ddiv */
1203   128,			/* cache line size */
1204   32,			/* l1 cache */
1205   256,			/* l2 cache */
1206   12,			/* prefetch streams */
1207   COSTS_N_INSNS (3),	/* SF->DF convert */
1208 };
1209 
1210 /* Instruction costs on POWER9 processors.  */
1211 static const
1212 struct processor_costs power9_cost = {
1213   COSTS_N_INSNS (3),	/* mulsi */
1214   COSTS_N_INSNS (3),	/* mulsi_const */
1215   COSTS_N_INSNS (3),	/* mulsi_const9 */
1216   COSTS_N_INSNS (3),	/* muldi */
1217   COSTS_N_INSNS (8),	/* divsi */
1218   COSTS_N_INSNS (12),	/* divdi */
1219   COSTS_N_INSNS (3),	/* fp */
1220   COSTS_N_INSNS (3),	/* dmul */
1221   COSTS_N_INSNS (13),	/* sdiv */
1222   COSTS_N_INSNS (18),	/* ddiv */
1223   128,			/* cache line size */
1224   32,			/* l1 cache */
1225   512,			/* l2 cache */
1226   8,			/* prefetch streams */
1227   COSTS_N_INSNS (3),	/* SF->DF convert */
1228 };
1229 
1230 /* Instruction costs on POWER A2 processors.  */
1231 static const
1232 struct processor_costs ppca2_cost = {
1233   COSTS_N_INSNS (16),    /* mulsi */
1234   COSTS_N_INSNS (16),    /* mulsi_const */
1235   COSTS_N_INSNS (16),    /* mulsi_const9 */
1236   COSTS_N_INSNS (16),   /* muldi */
1237   COSTS_N_INSNS (22),   /* divsi */
1238   COSTS_N_INSNS (28),   /* divdi */
1239   COSTS_N_INSNS (3),    /* fp */
1240   COSTS_N_INSNS (3),    /* dmul */
1241   COSTS_N_INSNS (59),   /* sdiv */
1242   COSTS_N_INSNS (72),   /* ddiv */
1243   64,
1244   16,			/* l1 cache */
1245   2048,			/* l2 cache */
1246   16,			/* prefetch streams */
1247   0,			/* SF->DF convert */
1248 };
1249 
1250 
1251 /* Table that classifies rs6000 builtin functions (pure, const, etc.).  */
1252 #undef RS6000_BUILTIN_0
1253 #undef RS6000_BUILTIN_1
1254 #undef RS6000_BUILTIN_2
1255 #undef RS6000_BUILTIN_3
1256 #undef RS6000_BUILTIN_A
1257 #undef RS6000_BUILTIN_D
1258 #undef RS6000_BUILTIN_E
1259 #undef RS6000_BUILTIN_H
1260 #undef RS6000_BUILTIN_P
1261 #undef RS6000_BUILTIN_Q
1262 #undef RS6000_BUILTIN_S
1263 #undef RS6000_BUILTIN_X
1264 
1265 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE) \
1266   { NAME, ICODE, MASK, ATTR },
1267 
1268 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE) \
1269   { NAME, ICODE, MASK, ATTR },
1270 
1271 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)  \
1272   { NAME, ICODE, MASK, ATTR },
1273 
1274 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)  \
1275   { NAME, ICODE, MASK, ATTR },
1276 
1277 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)  \
1278   { NAME, ICODE, MASK, ATTR },
1279 
1280 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)  \
1281   { NAME, ICODE, MASK, ATTR },
1282 
1283 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)  \
1284   { NAME, ICODE, MASK, ATTR },
1285 
1286 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)  \
1287   { NAME, ICODE, MASK, ATTR },
1288 
1289 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)  \
1290   { NAME, ICODE, MASK, ATTR },
1291 
1292 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)  \
1293   { NAME, ICODE, MASK, ATTR },
1294 
1295 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)  \
1296   { NAME, ICODE, MASK, ATTR },
1297 
1298 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)  \
1299   { NAME, ICODE, MASK, ATTR },
1300 
1301 struct rs6000_builtin_info_type {
1302   const char *name;
1303   const enum insn_code icode;
1304   const HOST_WIDE_INT mask;
1305   const unsigned attr;
1306 };
1307 
1308 static const struct rs6000_builtin_info_type rs6000_builtin_info[] =
1309 {
1310 #include "powerpcspe-builtin.def"
1311 };
1312 
1313 #undef RS6000_BUILTIN_0
1314 #undef RS6000_BUILTIN_1
1315 #undef RS6000_BUILTIN_2
1316 #undef RS6000_BUILTIN_3
1317 #undef RS6000_BUILTIN_A
1318 #undef RS6000_BUILTIN_D
1319 #undef RS6000_BUILTIN_E
1320 #undef RS6000_BUILTIN_H
1321 #undef RS6000_BUILTIN_P
1322 #undef RS6000_BUILTIN_Q
1323 #undef RS6000_BUILTIN_S
1324 #undef RS6000_BUILTIN_X
1325 
1326 /* Support for -mveclibabi=<xxx> to control which vector library to use.  */
1327 static tree (*rs6000_veclib_handler) (combined_fn, tree, tree);
1328 
1329 
1330 static bool rs6000_debug_legitimate_address_p (machine_mode, rtx, bool);
1331 static bool spe_func_has_64bit_regs_p (void);
1332 static struct machine_function * rs6000_init_machine_status (void);
1333 static int rs6000_ra_ever_killed (void);
1334 static tree rs6000_handle_longcall_attribute (tree *, tree, tree, int, bool *);
1335 static tree rs6000_handle_altivec_attribute (tree *, tree, tree, int, bool *);
1336 static tree rs6000_handle_struct_attribute (tree *, tree, tree, int, bool *);
1337 static tree rs6000_builtin_vectorized_libmass (combined_fn, tree, tree);
1338 static void rs6000_emit_set_long_const (rtx, HOST_WIDE_INT);
1339 static int rs6000_memory_move_cost (machine_mode, reg_class_t, bool);
1340 static bool rs6000_debug_rtx_costs (rtx, machine_mode, int, int, int *, bool);
1341 static int rs6000_debug_address_cost (rtx, machine_mode, addr_space_t,
1342 				      bool);
1343 static int rs6000_debug_adjust_cost (rtx_insn *, int, rtx_insn *, int,
1344 				     unsigned int);
1345 static bool is_microcoded_insn (rtx_insn *);
1346 static bool is_nonpipeline_insn (rtx_insn *);
1347 static bool is_cracked_insn (rtx_insn *);
1348 static bool is_load_insn (rtx, rtx *);
1349 static bool is_store_insn (rtx, rtx *);
1350 static bool set_to_load_agen (rtx_insn *,rtx_insn *);
1351 static bool insn_terminates_group_p (rtx_insn *, enum group_termination);
1352 static bool insn_must_be_first_in_group (rtx_insn *);
1353 static bool insn_must_be_last_in_group (rtx_insn *);
1354 static void altivec_init_builtins (void);
1355 static tree builtin_function_type (machine_mode, machine_mode,
1356 				   machine_mode, machine_mode,
1357 				   enum rs6000_builtins, const char *name);
1358 static void rs6000_common_init_builtins (void);
1359 static void paired_init_builtins (void);
1360 static rtx paired_expand_predicate_builtin (enum insn_code, tree, rtx);
1361 static void spe_init_builtins (void);
1362 static void htm_init_builtins (void);
1363 static rtx spe_expand_predicate_builtin (enum insn_code, tree, rtx);
1364 static rtx spe_expand_evsel_builtin (enum insn_code, tree, rtx);
1365 static int rs6000_emit_int_cmove (rtx, rtx, rtx, rtx);
1366 static rs6000_stack_t *rs6000_stack_info (void);
1367 static void is_altivec_return_reg (rtx, void *);
1368 int easy_vector_constant (rtx, machine_mode);
1369 static rtx rs6000_debug_legitimize_address (rtx, rtx, machine_mode);
1370 static rtx rs6000_legitimize_tls_address (rtx, enum tls_model);
1371 static rtx rs6000_darwin64_record_arg (CUMULATIVE_ARGS *, const_tree,
1372 				       bool, bool);
1373 #if TARGET_MACHO
1374 static void macho_branch_islands (void);
1375 #endif
1376 static rtx rs6000_legitimize_reload_address (rtx, machine_mode, int, int,
1377 					     int, int *);
1378 static rtx rs6000_debug_legitimize_reload_address (rtx, machine_mode, int,
1379 						   int, int, int *);
1380 static bool rs6000_mode_dependent_address (const_rtx);
1381 static bool rs6000_debug_mode_dependent_address (const_rtx);
1382 static enum reg_class rs6000_secondary_reload_class (enum reg_class,
1383 						     machine_mode, rtx);
1384 static enum reg_class rs6000_debug_secondary_reload_class (enum reg_class,
1385 							   machine_mode,
1386 							   rtx);
1387 static enum reg_class rs6000_preferred_reload_class (rtx, enum reg_class);
1388 static enum reg_class rs6000_debug_preferred_reload_class (rtx,
1389 							   enum reg_class);
1390 static bool rs6000_debug_secondary_memory_needed (machine_mode,
1391 						  reg_class_t,
1392 						  reg_class_t);
1393 static bool rs6000_debug_can_change_mode_class (machine_mode,
1394 						machine_mode,
1395 						reg_class_t);
1396 static bool rs6000_save_toc_in_prologue_p (void);
1397 static rtx rs6000_internal_arg_pointer (void);
1398 
1399 rtx (*rs6000_legitimize_reload_address_ptr) (rtx, machine_mode, int, int,
1400 					     int, int *)
1401   = rs6000_legitimize_reload_address;
1402 
1403 static bool (*rs6000_mode_dependent_address_ptr) (const_rtx)
1404   = rs6000_mode_dependent_address;
1405 
1406 enum reg_class (*rs6000_secondary_reload_class_ptr) (enum reg_class,
1407 						     machine_mode, rtx)
1408   = rs6000_secondary_reload_class;
1409 
1410 enum reg_class (*rs6000_preferred_reload_class_ptr) (rtx, enum reg_class)
1411   = rs6000_preferred_reload_class;
1412 
1413 const int INSN_NOT_AVAILABLE = -1;
1414 
1415 static void rs6000_print_isa_options (FILE *, int, const char *,
1416 				      HOST_WIDE_INT);
1417 static void rs6000_print_builtin_options (FILE *, int, const char *,
1418 					  HOST_WIDE_INT);
1419 static HOST_WIDE_INT rs6000_disable_incompatible_switches (void);
1420 
1421 static enum rs6000_reg_type register_to_reg_type (rtx, bool *);
1422 static bool rs6000_secondary_reload_move (enum rs6000_reg_type,
1423 					  enum rs6000_reg_type,
1424 					  machine_mode,
1425 					  secondary_reload_info *,
1426 					  bool);
1427 rtl_opt_pass *make_pass_analyze_swaps (gcc::context*);
1428 static bool rs6000_keep_leaf_when_profiled () __attribute__ ((unused));
1429 static tree rs6000_fold_builtin (tree, int, tree *, bool);
1430 
1431 /* Hash table stuff for keeping track of TOC entries.  */
1432 
1433 struct GTY((for_user)) toc_hash_struct
1434 {
1435   /* `key' will satisfy CONSTANT_P; in fact, it will satisfy
1436      ASM_OUTPUT_SPECIAL_POOL_ENTRY_P.  */
1437   rtx key;
1438   machine_mode key_mode;
1439   int labelno;
1440 };
1441 
1442 struct toc_hasher : ggc_ptr_hash<toc_hash_struct>
1443 {
1444   static hashval_t hash (toc_hash_struct *);
1445   static bool equal (toc_hash_struct *, toc_hash_struct *);
1446 };
1447 
1448 static GTY (()) hash_table<toc_hasher> *toc_hash_table;
1449 
1450 /* Hash table to keep track of the argument types for builtin functions.  */
1451 
1452 struct GTY((for_user)) builtin_hash_struct
1453 {
1454   tree type;
1455   machine_mode mode[4];	/* return value + 3 arguments.  */
1456   unsigned char uns_p[4];	/* and whether the types are unsigned.  */
1457 };
1458 
1459 struct builtin_hasher : ggc_ptr_hash<builtin_hash_struct>
1460 {
1461   static hashval_t hash (builtin_hash_struct *);
1462   static bool equal (builtin_hash_struct *, builtin_hash_struct *);
1463 };
1464 
1465 static GTY (()) hash_table<builtin_hasher> *builtin_hash_table;
1466 
1467 
1468 /* Default register names.  */
1469 char rs6000_reg_names[][8] =
1470 {
1471       "0",  "1",  "2",  "3",  "4",  "5",  "6",  "7",
1472       "8",  "9", "10", "11", "12", "13", "14", "15",
1473      "16", "17", "18", "19", "20", "21", "22", "23",
1474      "24", "25", "26", "27", "28", "29", "30", "31",
1475       "0",  "1",  "2",  "3",  "4",  "5",  "6",  "7",
1476       "8",  "9", "10", "11", "12", "13", "14", "15",
1477      "16", "17", "18", "19", "20", "21", "22", "23",
1478      "24", "25", "26", "27", "28", "29", "30", "31",
1479      "mq", "lr", "ctr","ap",
1480       "0",  "1",  "2",  "3",  "4",  "5",  "6",  "7",
1481       "ca",
1482       /* AltiVec registers.  */
1483       "0",  "1",  "2",  "3",  "4",  "5",  "6", "7",
1484       "8",  "9",  "10", "11", "12", "13", "14", "15",
1485       "16", "17", "18", "19", "20", "21", "22", "23",
1486       "24", "25", "26", "27", "28", "29", "30", "31",
1487       "vrsave", "vscr",
1488       /* SPE registers.  */
1489       "spe_acc", "spefscr",
1490       /* Soft frame pointer.  */
1491       "sfp",
1492       /* HTM SPR registers.  */
1493       "tfhar", "tfiar", "texasr",
1494       /* SPE High registers.  */
1495       "0",  "1",  "2",  "3",  "4",  "5",  "6",  "7",
1496       "8",  "9", "10", "11", "12", "13", "14", "15",
1497      "16", "17", "18", "19", "20", "21", "22", "23",
1498      "24", "25", "26", "27", "28", "29", "30", "31"
1499 };
1500 
1501 #ifdef TARGET_REGNAMES
1502 static const char alt_reg_names[][8] =
1503 {
1504    "%r0",   "%r1",  "%r2",  "%r3",  "%r4",  "%r5",  "%r6",  "%r7",
1505    "%r8",   "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
1506   "%r16",  "%r17", "%r18", "%r19", "%r20", "%r21", "%r22", "%r23",
1507   "%r24",  "%r25", "%r26", "%r27", "%r28", "%r29", "%r30", "%r31",
1508    "%f0",   "%f1",  "%f2",  "%f3",  "%f4",  "%f5",  "%f6",  "%f7",
1509    "%f8",   "%f9", "%f10", "%f11", "%f12", "%f13", "%f14", "%f15",
1510   "%f16",  "%f17", "%f18", "%f19", "%f20", "%f21", "%f22", "%f23",
1511   "%f24",  "%f25", "%f26", "%f27", "%f28", "%f29", "%f30", "%f31",
1512     "mq",    "lr",  "ctr",   "ap",
1513   "%cr0",  "%cr1", "%cr2", "%cr3", "%cr4", "%cr5", "%cr6", "%cr7",
1514    "ca",
1515   /* AltiVec registers.  */
1516    "%v0",  "%v1",  "%v2",  "%v3",  "%v4",  "%v5",  "%v6", "%v7",
1517    "%v8",  "%v9", "%v10", "%v11", "%v12", "%v13", "%v14", "%v15",
1518   "%v16", "%v17", "%v18", "%v19", "%v20", "%v21", "%v22", "%v23",
1519   "%v24", "%v25", "%v26", "%v27", "%v28", "%v29", "%v30", "%v31",
1520   "vrsave", "vscr",
1521   /* SPE registers.  */
1522   "spe_acc", "spefscr",
1523   /* Soft frame pointer.  */
1524   "sfp",
1525   /* HTM SPR registers.  */
1526   "tfhar", "tfiar", "texasr",
1527   /* SPE High registers.  */
1528   "%rh0",  "%rh1",  "%rh2",  "%rh3",  "%rh4",  "%rh5",  "%rh6",   "%rh7",
1529   "%rh8",  "%rh9",  "%rh10", "%r11",  "%rh12", "%rh13", "%rh14", "%rh15",
1530   "%rh16", "%rh17", "%rh18", "%rh19", "%rh20", "%rh21", "%rh22", "%rh23",
1531   "%rh24", "%rh25", "%rh26", "%rh27", "%rh28", "%rh29", "%rh30", "%rh31"
1532 };
1533 #endif
1534 
1535 /* Table of valid machine attributes.  */
1536 
1537 static const struct attribute_spec rs6000_attribute_table[] =
1538 {
1539   /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
1540        affects_type_identity, handler, exclude } */
1541   { "altivec",   1, 1, false, true,  false, false,
1542     rs6000_handle_altivec_attribute, NULL },
1543   { "longcall",  0, 0, false, true,  true,  false,
1544     rs6000_handle_longcall_attribute, NULL },
1545   { "shortcall", 0, 0, false, true,  true,  false,
1546     rs6000_handle_longcall_attribute, NULL },
1547   { "ms_struct", 0, 0, false, false, false, false,
1548     rs6000_handle_struct_attribute, NULL },
1549   { "gcc_struct", 0, 0, false, false, false, false,
1550     rs6000_handle_struct_attribute, NULL },
1551 #ifdef SUBTARGET_ATTRIBUTE_TABLE
1552   SUBTARGET_ATTRIBUTE_TABLE,
1553 #endif
1554   { NULL,        0, 0, false, false, false, false, NULL, NULL }
1555 };
1556 
1557 #ifndef TARGET_PROFILE_KERNEL
1558 #define TARGET_PROFILE_KERNEL 0
1559 #endif
1560 
1561 /* The VRSAVE bitmask puts bit %v0 as the most significant bit.  */
1562 #define ALTIVEC_REG_BIT(REGNO) (0x80000000 >> ((REGNO) - FIRST_ALTIVEC_REGNO))
1563 
1564 /* Initialize the GCC target structure.  */
1565 #undef TARGET_ATTRIBUTE_TABLE
1566 #define TARGET_ATTRIBUTE_TABLE rs6000_attribute_table
1567 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
1568 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES rs6000_set_default_type_attributes
1569 #undef TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P
1570 #define TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P rs6000_attribute_takes_identifier_p
1571 
1572 #undef TARGET_ASM_ALIGNED_DI_OP
1573 #define TARGET_ASM_ALIGNED_DI_OP DOUBLE_INT_ASM_OP
1574 
1575 /* Default unaligned ops are only provided for ELF.  Find the ops needed
1576    for non-ELF systems.  */
1577 #ifndef OBJECT_FORMAT_ELF
1578 #if TARGET_XCOFF
1579 /* For XCOFF.  rs6000_assemble_integer will handle unaligned DIs on
1580    64-bit targets.  */
1581 #undef TARGET_ASM_UNALIGNED_HI_OP
1582 #define TARGET_ASM_UNALIGNED_HI_OP "\t.vbyte\t2,"
1583 #undef TARGET_ASM_UNALIGNED_SI_OP
1584 #define TARGET_ASM_UNALIGNED_SI_OP "\t.vbyte\t4,"
1585 #undef TARGET_ASM_UNALIGNED_DI_OP
1586 #define TARGET_ASM_UNALIGNED_DI_OP "\t.vbyte\t8,"
1587 #else
1588 /* For Darwin.  */
1589 #undef TARGET_ASM_UNALIGNED_HI_OP
1590 #define TARGET_ASM_UNALIGNED_HI_OP "\t.short\t"
1591 #undef TARGET_ASM_UNALIGNED_SI_OP
1592 #define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t"
1593 #undef TARGET_ASM_UNALIGNED_DI_OP
1594 #define TARGET_ASM_UNALIGNED_DI_OP "\t.quad\t"
1595 #undef TARGET_ASM_ALIGNED_DI_OP
1596 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
1597 #endif
1598 #endif
1599 
1600 /* This hook deals with fixups for relocatable code and DI-mode objects
1601    in 64-bit code.  */
1602 #undef TARGET_ASM_INTEGER
1603 #define TARGET_ASM_INTEGER rs6000_assemble_integer
1604 
1605 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
1606 #undef TARGET_ASM_ASSEMBLE_VISIBILITY
1607 #define TARGET_ASM_ASSEMBLE_VISIBILITY rs6000_assemble_visibility
1608 #endif
1609 
1610 #undef TARGET_SET_UP_BY_PROLOGUE
1611 #define TARGET_SET_UP_BY_PROLOGUE rs6000_set_up_by_prologue
1612 
1613 #undef TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS
1614 #define TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS rs6000_get_separate_components
1615 #undef TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB
1616 #define TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB rs6000_components_for_bb
1617 #undef TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS
1618 #define TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS rs6000_disqualify_components
1619 #undef TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS
1620 #define TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS rs6000_emit_prologue_components
1621 #undef TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS
1622 #define TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS rs6000_emit_epilogue_components
1623 #undef TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS
1624 #define TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS rs6000_set_handled_components
1625 
1626 #undef TARGET_EXTRA_LIVE_ON_ENTRY
1627 #define TARGET_EXTRA_LIVE_ON_ENTRY rs6000_live_on_entry
1628 
1629 #undef TARGET_INTERNAL_ARG_POINTER
1630 #define TARGET_INTERNAL_ARG_POINTER rs6000_internal_arg_pointer
1631 
1632 #undef TARGET_HAVE_TLS
1633 #define TARGET_HAVE_TLS HAVE_AS_TLS
1634 
1635 #undef TARGET_CANNOT_FORCE_CONST_MEM
1636 #define TARGET_CANNOT_FORCE_CONST_MEM rs6000_cannot_force_const_mem
1637 
1638 #undef TARGET_DELEGITIMIZE_ADDRESS
1639 #define TARGET_DELEGITIMIZE_ADDRESS rs6000_delegitimize_address
1640 
1641 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
1642 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P rs6000_const_not_ok_for_debug_p
1643 
1644 #undef TARGET_LEGITIMATE_COMBINED_INSN
1645 #define TARGET_LEGITIMATE_COMBINED_INSN rs6000_legitimate_combined_insn
1646 
1647 #undef TARGET_ASM_FUNCTION_PROLOGUE
1648 #define TARGET_ASM_FUNCTION_PROLOGUE rs6000_output_function_prologue
1649 #undef TARGET_ASM_FUNCTION_EPILOGUE
1650 #define TARGET_ASM_FUNCTION_EPILOGUE rs6000_output_function_epilogue
1651 
1652 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
1653 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA rs6000_output_addr_const_extra
1654 
1655 #undef TARGET_LEGITIMIZE_ADDRESS
1656 #define TARGET_LEGITIMIZE_ADDRESS rs6000_legitimize_address
1657 
1658 #undef  TARGET_SCHED_VARIABLE_ISSUE
1659 #define TARGET_SCHED_VARIABLE_ISSUE rs6000_variable_issue
1660 
1661 #undef TARGET_SCHED_ISSUE_RATE
1662 #define TARGET_SCHED_ISSUE_RATE rs6000_issue_rate
1663 #undef TARGET_SCHED_ADJUST_COST
1664 #define TARGET_SCHED_ADJUST_COST rs6000_adjust_cost
1665 #undef TARGET_SCHED_ADJUST_PRIORITY
1666 #define TARGET_SCHED_ADJUST_PRIORITY rs6000_adjust_priority
1667 #undef TARGET_SCHED_IS_COSTLY_DEPENDENCE
1668 #define TARGET_SCHED_IS_COSTLY_DEPENDENCE rs6000_is_costly_dependence
1669 #undef TARGET_SCHED_INIT
1670 #define TARGET_SCHED_INIT rs6000_sched_init
1671 #undef TARGET_SCHED_FINISH
1672 #define TARGET_SCHED_FINISH rs6000_sched_finish
1673 #undef TARGET_SCHED_REORDER
1674 #define TARGET_SCHED_REORDER rs6000_sched_reorder
1675 #undef TARGET_SCHED_REORDER2
1676 #define TARGET_SCHED_REORDER2 rs6000_sched_reorder2
1677 
1678 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1679 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD rs6000_use_sched_lookahead
1680 
1681 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
1682 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD rs6000_use_sched_lookahead_guard
1683 
1684 #undef TARGET_SCHED_ALLOC_SCHED_CONTEXT
1685 #define TARGET_SCHED_ALLOC_SCHED_CONTEXT rs6000_alloc_sched_context
1686 #undef TARGET_SCHED_INIT_SCHED_CONTEXT
1687 #define TARGET_SCHED_INIT_SCHED_CONTEXT rs6000_init_sched_context
1688 #undef TARGET_SCHED_SET_SCHED_CONTEXT
1689 #define TARGET_SCHED_SET_SCHED_CONTEXT rs6000_set_sched_context
1690 #undef TARGET_SCHED_FREE_SCHED_CONTEXT
1691 #define TARGET_SCHED_FREE_SCHED_CONTEXT rs6000_free_sched_context
1692 
1693 #undef TARGET_SCHED_CAN_SPECULATE_INSN
1694 #define TARGET_SCHED_CAN_SPECULATE_INSN rs6000_sched_can_speculate_insn
1695 
1696 #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
1697 #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD rs6000_builtin_mask_for_load
1698 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
1699 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT		\
1700   rs6000_builtin_support_vector_misalignment
1701 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
1702 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE rs6000_vector_alignment_reachable
1703 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
1704 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
1705   rs6000_builtin_vectorization_cost
1706 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
1707 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
1708   rs6000_preferred_simd_mode
1709 #undef TARGET_VECTORIZE_INIT_COST
1710 #define TARGET_VECTORIZE_INIT_COST rs6000_init_cost
1711 #undef TARGET_VECTORIZE_ADD_STMT_COST
1712 #define TARGET_VECTORIZE_ADD_STMT_COST rs6000_add_stmt_cost
1713 #undef TARGET_VECTORIZE_FINISH_COST
1714 #define TARGET_VECTORIZE_FINISH_COST rs6000_finish_cost
1715 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
1716 #define TARGET_VECTORIZE_DESTROY_COST_DATA rs6000_destroy_cost_data
1717 
1718 #undef TARGET_INIT_BUILTINS
1719 #define TARGET_INIT_BUILTINS rs6000_init_builtins
1720 #undef TARGET_BUILTIN_DECL
1721 #define TARGET_BUILTIN_DECL rs6000_builtin_decl
1722 
1723 #undef TARGET_FOLD_BUILTIN
1724 #define TARGET_FOLD_BUILTIN rs6000_fold_builtin
1725 #undef TARGET_GIMPLE_FOLD_BUILTIN
1726 #define TARGET_GIMPLE_FOLD_BUILTIN rs6000_gimple_fold_builtin
1727 
1728 #undef TARGET_EXPAND_BUILTIN
1729 #define TARGET_EXPAND_BUILTIN rs6000_expand_builtin
1730 
1731 #undef TARGET_MANGLE_TYPE
1732 #define TARGET_MANGLE_TYPE rs6000_mangle_type
1733 
1734 #undef TARGET_INIT_LIBFUNCS
1735 #define TARGET_INIT_LIBFUNCS rs6000_init_libfuncs
1736 
1737 #if TARGET_MACHO
1738 #undef TARGET_BINDS_LOCAL_P
1739 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1740 #endif
1741 
1742 #undef TARGET_MS_BITFIELD_LAYOUT_P
1743 #define TARGET_MS_BITFIELD_LAYOUT_P rs6000_ms_bitfield_layout_p
1744 
1745 #undef TARGET_ASM_OUTPUT_MI_THUNK
1746 #define TARGET_ASM_OUTPUT_MI_THUNK rs6000_output_mi_thunk
1747 
1748 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1749 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
1750 
1751 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1752 #define TARGET_FUNCTION_OK_FOR_SIBCALL rs6000_function_ok_for_sibcall
1753 
1754 #undef TARGET_REGISTER_MOVE_COST
1755 #define TARGET_REGISTER_MOVE_COST rs6000_register_move_cost
1756 #undef TARGET_MEMORY_MOVE_COST
1757 #define TARGET_MEMORY_MOVE_COST rs6000_memory_move_cost
1758 #undef TARGET_CANNOT_COPY_INSN_P
1759 #define TARGET_CANNOT_COPY_INSN_P rs6000_cannot_copy_insn_p
1760 #undef TARGET_RTX_COSTS
1761 #define TARGET_RTX_COSTS rs6000_rtx_costs
1762 #undef TARGET_ADDRESS_COST
1763 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
1764 
1765 #undef TARGET_DWARF_REGISTER_SPAN
1766 #define TARGET_DWARF_REGISTER_SPAN rs6000_dwarf_register_span
1767 
1768 #undef TARGET_INIT_DWARF_REG_SIZES_EXTRA
1769 #define TARGET_INIT_DWARF_REG_SIZES_EXTRA rs6000_init_dwarf_reg_sizes_extra
1770 
1771 #undef TARGET_MEMBER_TYPE_FORCES_BLK
1772 #define TARGET_MEMBER_TYPE_FORCES_BLK rs6000_member_type_forces_blk
1773 
1774 #undef TARGET_PROMOTE_FUNCTION_MODE
1775 #define TARGET_PROMOTE_FUNCTION_MODE rs6000_promote_function_mode
1776 
1777 #undef TARGET_RETURN_IN_MEMORY
1778 #define TARGET_RETURN_IN_MEMORY rs6000_return_in_memory
1779 
1780 #undef TARGET_RETURN_IN_MSB
1781 #define TARGET_RETURN_IN_MSB rs6000_return_in_msb
1782 
1783 #undef TARGET_SETUP_INCOMING_VARARGS
1784 #define TARGET_SETUP_INCOMING_VARARGS setup_incoming_varargs
1785 
1786 /* Always strict argument naming on rs6000.  */
1787 #undef TARGET_STRICT_ARGUMENT_NAMING
1788 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
1789 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
1790 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED hook_bool_CUMULATIVE_ARGS_true
1791 #undef TARGET_SPLIT_COMPLEX_ARG
1792 #define TARGET_SPLIT_COMPLEX_ARG hook_bool_const_tree_true
1793 #undef TARGET_MUST_PASS_IN_STACK
1794 #define TARGET_MUST_PASS_IN_STACK rs6000_must_pass_in_stack
1795 #undef TARGET_PASS_BY_REFERENCE
1796 #define TARGET_PASS_BY_REFERENCE rs6000_pass_by_reference
1797 #undef TARGET_ARG_PARTIAL_BYTES
1798 #define TARGET_ARG_PARTIAL_BYTES rs6000_arg_partial_bytes
1799 #undef TARGET_FUNCTION_ARG_ADVANCE
1800 #define TARGET_FUNCTION_ARG_ADVANCE rs6000_function_arg_advance
1801 #undef TARGET_FUNCTION_ARG
1802 #define TARGET_FUNCTION_ARG rs6000_function_arg
1803 #undef TARGET_FUNCTION_ARG_PADDING
1804 #define TARGET_FUNCTION_ARG_PADDING rs6000_function_arg_padding
1805 #undef TARGET_FUNCTION_ARG_BOUNDARY
1806 #define TARGET_FUNCTION_ARG_BOUNDARY rs6000_function_arg_boundary
1807 
1808 #undef TARGET_BUILD_BUILTIN_VA_LIST
1809 #define TARGET_BUILD_BUILTIN_VA_LIST rs6000_build_builtin_va_list
1810 
1811 #undef TARGET_EXPAND_BUILTIN_VA_START
1812 #define TARGET_EXPAND_BUILTIN_VA_START rs6000_va_start
1813 
1814 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1815 #define TARGET_GIMPLIFY_VA_ARG_EXPR rs6000_gimplify_va_arg
1816 
1817 #undef TARGET_EH_RETURN_FILTER_MODE
1818 #define TARGET_EH_RETURN_FILTER_MODE rs6000_eh_return_filter_mode
1819 
1820 #undef TARGET_SCALAR_MODE_SUPPORTED_P
1821 #define TARGET_SCALAR_MODE_SUPPORTED_P rs6000_scalar_mode_supported_p
1822 
1823 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1824 #define TARGET_VECTOR_MODE_SUPPORTED_P rs6000_vector_mode_supported_p
1825 
1826 #undef TARGET_FLOATN_MODE
1827 #define TARGET_FLOATN_MODE rs6000_floatn_mode
1828 
1829 #undef TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN
1830 #define TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN invalid_arg_for_unprototyped_fn
1831 
1832 #undef TARGET_ASM_LOOP_ALIGN_MAX_SKIP
1833 #define TARGET_ASM_LOOP_ALIGN_MAX_SKIP rs6000_loop_align_max_skip
1834 
1835 #undef TARGET_MD_ASM_ADJUST
1836 #define TARGET_MD_ASM_ADJUST rs6000_md_asm_adjust
1837 
1838 #undef TARGET_OPTION_OVERRIDE
1839 #define TARGET_OPTION_OVERRIDE rs6000_option_override
1840 
1841 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
1842 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
1843   rs6000_builtin_vectorized_function
1844 
1845 #undef TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION
1846 #define TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION \
1847   rs6000_builtin_md_vectorized_function
1848 
1849 #undef TARGET_STACK_PROTECT_GUARD
1850 #define TARGET_STACK_PROTECT_GUARD rs6000_init_stack_protect_guard
1851 
1852 #if !TARGET_MACHO
1853 #undef TARGET_STACK_PROTECT_FAIL
1854 #define TARGET_STACK_PROTECT_FAIL rs6000_stack_protect_fail
1855 #endif
1856 
1857 #ifdef HAVE_AS_TLS
1858 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1859 #define TARGET_ASM_OUTPUT_DWARF_DTPREL rs6000_output_dwarf_dtprel
1860 #endif
1861 
1862 /* Use a 32-bit anchor range.  This leads to sequences like:
1863 
1864 	addis	tmp,anchor,high
1865 	add	dest,tmp,low
1866 
1867    where tmp itself acts as an anchor, and can be shared between
1868    accesses to the same 64k page.  */
1869 #undef TARGET_MIN_ANCHOR_OFFSET
1870 #define TARGET_MIN_ANCHOR_OFFSET -0x7fffffff - 1
1871 #undef TARGET_MAX_ANCHOR_OFFSET
1872 #define TARGET_MAX_ANCHOR_OFFSET 0x7fffffff
1873 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
1874 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P rs6000_use_blocks_for_constant_p
1875 #undef TARGET_USE_BLOCKS_FOR_DECL_P
1876 #define TARGET_USE_BLOCKS_FOR_DECL_P rs6000_use_blocks_for_decl_p
1877 
1878 #undef TARGET_BUILTIN_RECIPROCAL
1879 #define TARGET_BUILTIN_RECIPROCAL rs6000_builtin_reciprocal
1880 
1881 #undef TARGET_EXPAND_TO_RTL_HOOK
1882 #define TARGET_EXPAND_TO_RTL_HOOK rs6000_alloc_sdmode_stack_slot
1883 
1884 #undef TARGET_INSTANTIATE_DECLS
1885 #define TARGET_INSTANTIATE_DECLS rs6000_instantiate_decls
1886 
1887 #undef TARGET_SECONDARY_RELOAD
1888 #define TARGET_SECONDARY_RELOAD rs6000_secondary_reload
1889 #undef TARGET_SECONDARY_MEMORY_NEEDED
1890 #define TARGET_SECONDARY_MEMORY_NEEDED rs6000_secondary_memory_needed
1891 #undef TARGET_SECONDARY_MEMORY_NEEDED_MODE
1892 #define TARGET_SECONDARY_MEMORY_NEEDED_MODE rs6000_secondary_memory_needed_mode
1893 
1894 #undef TARGET_LEGITIMATE_ADDRESS_P
1895 #define TARGET_LEGITIMATE_ADDRESS_P rs6000_legitimate_address_p
1896 
1897 #undef TARGET_MODE_DEPENDENT_ADDRESS_P
1898 #define TARGET_MODE_DEPENDENT_ADDRESS_P rs6000_mode_dependent_address_p
1899 
1900 #undef TARGET_LRA_P
1901 #define TARGET_LRA_P rs6000_lra_p
1902 
1903 #undef TARGET_COMPUTE_PRESSURE_CLASSES
1904 #define TARGET_COMPUTE_PRESSURE_CLASSES rs6000_compute_pressure_classes
1905 
1906 #undef TARGET_CAN_ELIMINATE
1907 #define TARGET_CAN_ELIMINATE rs6000_can_eliminate
1908 
1909 #undef TARGET_CONDITIONAL_REGISTER_USAGE
1910 #define TARGET_CONDITIONAL_REGISTER_USAGE rs6000_conditional_register_usage
1911 
1912 #undef TARGET_SCHED_REASSOCIATION_WIDTH
1913 #define TARGET_SCHED_REASSOCIATION_WIDTH rs6000_reassociation_width
1914 
1915 #undef TARGET_TRAMPOLINE_INIT
1916 #define TARGET_TRAMPOLINE_INIT rs6000_trampoline_init
1917 
1918 #undef TARGET_FUNCTION_VALUE
1919 #define TARGET_FUNCTION_VALUE rs6000_function_value
1920 
1921 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
1922 #define TARGET_OPTION_VALID_ATTRIBUTE_P rs6000_valid_attribute_p
1923 
1924 #undef TARGET_OPTION_SAVE
1925 #define TARGET_OPTION_SAVE rs6000_function_specific_save
1926 
1927 #undef TARGET_OPTION_RESTORE
1928 #define TARGET_OPTION_RESTORE rs6000_function_specific_restore
1929 
1930 #undef TARGET_OPTION_PRINT
1931 #define TARGET_OPTION_PRINT rs6000_function_specific_print
1932 
1933 #undef TARGET_CAN_INLINE_P
1934 #define TARGET_CAN_INLINE_P rs6000_can_inline_p
1935 
1936 #undef TARGET_SET_CURRENT_FUNCTION
1937 #define TARGET_SET_CURRENT_FUNCTION rs6000_set_current_function
1938 
1939 #undef TARGET_LEGITIMATE_CONSTANT_P
1940 #define TARGET_LEGITIMATE_CONSTANT_P rs6000_legitimate_constant_p
1941 
1942 #undef TARGET_VECTORIZE_VEC_PERM_CONST
1943 #define TARGET_VECTORIZE_VEC_PERM_CONST rs6000_vectorize_vec_perm_const
1944 
1945 #undef TARGET_CAN_USE_DOLOOP_P
1946 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
1947 
1948 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
1949 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV rs6000_atomic_assign_expand_fenv
1950 
1951 #undef TARGET_LIBGCC_CMP_RETURN_MODE
1952 #define TARGET_LIBGCC_CMP_RETURN_MODE rs6000_abi_word_mode
1953 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
1954 #define TARGET_LIBGCC_SHIFT_COUNT_MODE rs6000_abi_word_mode
1955 #undef TARGET_UNWIND_WORD_MODE
1956 #define TARGET_UNWIND_WORD_MODE rs6000_abi_word_mode
1957 
1958 #undef TARGET_OFFLOAD_OPTIONS
1959 #define TARGET_OFFLOAD_OPTIONS rs6000_offload_options
1960 
1961 #undef TARGET_C_MODE_FOR_SUFFIX
1962 #define TARGET_C_MODE_FOR_SUFFIX rs6000_c_mode_for_suffix
1963 
1964 #undef TARGET_INVALID_BINARY_OP
1965 #define TARGET_INVALID_BINARY_OP rs6000_invalid_binary_op
1966 
1967 #undef TARGET_OPTAB_SUPPORTED_P
1968 #define TARGET_OPTAB_SUPPORTED_P rs6000_optab_supported_p
1969 
1970 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
1971 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
1972 
1973 #undef TARGET_HARD_REGNO_NREGS
1974 #define TARGET_HARD_REGNO_NREGS rs6000_hard_regno_nregs_hook
1975 #undef TARGET_HARD_REGNO_MODE_OK
1976 #define TARGET_HARD_REGNO_MODE_OK rs6000_hard_regno_mode_ok
1977 
1978 #undef TARGET_MODES_TIEABLE_P
1979 #define TARGET_MODES_TIEABLE_P rs6000_modes_tieable_p
1980 
1981 #undef TARGET_HARD_REGNO_CALL_PART_CLOBBERED
1982 #define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \
1983   rs6000_hard_regno_call_part_clobbered
1984 
1985 #undef TARGET_SLOW_UNALIGNED_ACCESS
1986 #define TARGET_SLOW_UNALIGNED_ACCESS rs6000_slow_unaligned_access
1987 
1988 #undef TARGET_CAN_CHANGE_MODE_CLASS
1989 #define TARGET_CAN_CHANGE_MODE_CLASS rs6000_can_change_mode_class
1990 
1991 #undef TARGET_CONSTANT_ALIGNMENT
1992 #define TARGET_CONSTANT_ALIGNMENT rs6000_constant_alignment
1993 
1994 #undef TARGET_STARTING_FRAME_OFFSET
1995 #define TARGET_STARTING_FRAME_OFFSET rs6000_starting_frame_offset
1996 
1997 
1998 /* Processor table.  */
1999 struct rs6000_ptt
2000 {
2001   const char *const name;		/* Canonical processor name.  */
2002   const enum processor_type processor;	/* Processor type enum value.  */
2003   const HOST_WIDE_INT target_enable;	/* Target flags to enable.  */
2004 };
2005 
2006 static struct rs6000_ptt const processor_target_table[] =
2007 {
2008 #define RS6000_CPU(NAME, CPU, FLAGS) { NAME, CPU, FLAGS },
2009 #include "powerpcspe-cpus.def"
2010 #undef RS6000_CPU
2011 };
2012 
2013 /* Look up a processor name for -mcpu=xxx and -mtune=xxx.  Return -1 if the
2014    name is invalid.  */
2015 
2016 static int
rs6000_cpu_name_lookup(const char * name)2017 rs6000_cpu_name_lookup (const char *name)
2018 {
2019   size_t i;
2020 
2021   if (name != NULL)
2022     {
2023       for (i = 0; i < ARRAY_SIZE (processor_target_table); i++)
2024 	if (! strcmp (name, processor_target_table[i].name))
2025 	  return (int)i;
2026     }
2027 
2028   return -1;
2029 }
2030 
2031 
2032 /* Return number of consecutive hard regs needed starting at reg REGNO
2033    to hold something of mode MODE.
2034    This is ordinarily the length in words of a value of mode MODE
2035    but can be less for certain modes in special long registers.
2036 
2037    For the SPE, GPRs are 64 bits but only 32 bits are visible in
2038    scalar instructions.  The upper 32 bits are only available to the
2039    SIMD instructions.
2040 
2041    POWER and PowerPC GPRs hold 32 bits worth;
2042    PowerPC64 GPRs and FPRs point register holds 64 bits worth.  */
2043 
2044 static int
rs6000_hard_regno_nregs_internal(int regno,machine_mode mode)2045 rs6000_hard_regno_nregs_internal (int regno, machine_mode mode)
2046 {
2047   unsigned HOST_WIDE_INT reg_size;
2048 
2049   /* 128-bit floating point usually takes 2 registers, unless it is IEEE
2050      128-bit floating point that can go in vector registers, which has VSX
2051      memory addressing.  */
2052   if (FP_REGNO_P (regno))
2053     reg_size = (VECTOR_MEM_VSX_P (mode) || FLOAT128_VECTOR_P (mode)
2054 		? UNITS_PER_VSX_WORD
2055 		: UNITS_PER_FP_WORD);
2056 
2057   else if (SPE_SIMD_REGNO_P (regno) && TARGET_SPE && SPE_VECTOR_MODE (mode))
2058     reg_size = UNITS_PER_SPE_WORD;
2059 
2060   else if (ALTIVEC_REGNO_P (regno))
2061     reg_size = UNITS_PER_ALTIVEC_WORD;
2062 
2063   /* The value returned for SCmode in the E500 double case is 2 for
2064      ABI compatibility; storing an SCmode value in a single register
2065      would require function_arg and rs6000_spe_function_arg to handle
2066      SCmode so as to pass the value correctly in a pair of
2067      registers.  */
2068   else if (TARGET_E500_DOUBLE && FLOAT_MODE_P (mode) && mode != SCmode
2069 	   && !DECIMAL_FLOAT_MODE_P (mode) && SPE_SIMD_REGNO_P (regno))
2070     reg_size = UNITS_PER_FP_WORD;
2071 
2072   else
2073     reg_size = UNITS_PER_WORD;
2074 
2075   return (GET_MODE_SIZE (mode) + reg_size - 1) / reg_size;
2076 }
2077 
2078 /* Value is 1 if hard register REGNO can hold a value of machine-mode
2079    MODE.  */
2080 static int
rs6000_hard_regno_mode_ok_uncached(int regno,machine_mode mode)2081 rs6000_hard_regno_mode_ok_uncached (int regno, machine_mode mode)
2082 {
2083   int last_regno = regno + rs6000_hard_regno_nregs[mode][regno] - 1;
2084 
2085   if (COMPLEX_MODE_P (mode))
2086     mode = GET_MODE_INNER (mode);
2087 
2088   /* PTImode can only go in GPRs.  Quad word memory operations require even/odd
2089      register combinations, and use PTImode where we need to deal with quad
2090      word memory operations.  Don't allow quad words in the argument or frame
2091      pointer registers, just registers 0..31.  */
2092   if (mode == PTImode)
2093     return (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
2094 	    && IN_RANGE (last_regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
2095 	    && ((regno & 1) == 0));
2096 
2097   /* VSX registers that overlap the FPR registers are larger than for non-VSX
2098      implementations.  Don't allow an item to be split between a FP register
2099      and an Altivec register.  Allow TImode in all VSX registers if the user
2100      asked for it.  */
2101   if (TARGET_VSX && VSX_REGNO_P (regno)
2102       && (VECTOR_MEM_VSX_P (mode)
2103 	  || FLOAT128_VECTOR_P (mode)
2104 	  || reg_addr[mode].scalar_in_vmx_p
2105 	  || (TARGET_VSX_TIMODE && mode == TImode)
2106 	  || (TARGET_VADDUQM && mode == V1TImode)))
2107     {
2108       if (FP_REGNO_P (regno))
2109 	return FP_REGNO_P (last_regno);
2110 
2111       if (ALTIVEC_REGNO_P (regno))
2112 	{
2113 	  if (GET_MODE_SIZE (mode) != 16 && !reg_addr[mode].scalar_in_vmx_p)
2114 	    return 0;
2115 
2116 	  return ALTIVEC_REGNO_P (last_regno);
2117 	}
2118     }
2119 
2120   /* The GPRs can hold any mode, but values bigger than one register
2121      cannot go past R31.  */
2122   if (INT_REGNO_P (regno))
2123     return INT_REGNO_P (last_regno);
2124 
2125   /* The float registers (except for VSX vector modes) can only hold floating
2126      modes and DImode.  */
2127   if (FP_REGNO_P (regno))
2128     {
2129       if (FLOAT128_VECTOR_P (mode))
2130 	return false;
2131 
2132       if (SCALAR_FLOAT_MODE_P (mode)
2133 	  && (mode != TDmode || (regno % 2) == 0)
2134 	  && FP_REGNO_P (last_regno))
2135 	return 1;
2136 
2137       if (GET_MODE_CLASS (mode) == MODE_INT)
2138 	{
2139 	  if(GET_MODE_SIZE (mode) == UNITS_PER_FP_WORD)
2140 	    return 1;
2141 
2142 	  if (TARGET_VSX_SMALL_INTEGER)
2143 	    {
2144 	      if (mode == SImode)
2145 		return 1;
2146 
2147 	      if (TARGET_P9_VECTOR && (mode == HImode || mode == QImode))
2148 		return 1;
2149 	    }
2150 	}
2151 
2152       if (PAIRED_SIMD_REGNO_P (regno) && TARGET_PAIRED_FLOAT
2153 	  && PAIRED_VECTOR_MODE (mode))
2154 	return 1;
2155 
2156       return 0;
2157     }
2158 
2159   /* The CR register can only hold CC modes.  */
2160   if (CR_REGNO_P (regno))
2161     return GET_MODE_CLASS (mode) == MODE_CC;
2162 
2163   if (CA_REGNO_P (regno))
2164     return mode == Pmode || mode == SImode;
2165 
2166   /* AltiVec only in AldyVec registers.  */
2167   if (ALTIVEC_REGNO_P (regno))
2168     return (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode)
2169 	    || mode == V1TImode);
2170 
2171   /* ...but GPRs can hold SIMD data on the SPE in one register.  */
2172   if (SPE_SIMD_REGNO_P (regno) && TARGET_SPE && SPE_VECTOR_MODE (mode))
2173     return 1;
2174 
2175   /* We cannot put non-VSX TImode or PTImode anywhere except general register
2176      and it must be able to fit within the register set.  */
2177 
2178   return GET_MODE_SIZE (mode) <= UNITS_PER_WORD;
2179 }
2180 
2181 /* Implement TARGET_HARD_REGNO_NREGS.  */
2182 
2183 static unsigned int
rs6000_hard_regno_nregs_hook(unsigned int regno,machine_mode mode)2184 rs6000_hard_regno_nregs_hook (unsigned int regno, machine_mode mode)
2185 {
2186   return rs6000_hard_regno_nregs[mode][regno];
2187 }
2188 
2189 /* Implement TARGET_HARD_REGNO_MODE_OK.  */
2190 
2191 static bool
rs6000_hard_regno_mode_ok(unsigned int regno,machine_mode mode)2192 rs6000_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
2193 {
2194   return rs6000_hard_regno_mode_ok_p[mode][regno];
2195 }
2196 
2197 /* Implement TARGET_MODES_TIEABLE_P.
2198 
2199    PTImode cannot tie with other modes because PTImode is restricted to even
2200    GPR registers, and TImode can go in any GPR as well as VSX registers (PR
2201    57744).
2202 
2203    Altivec/VSX vector tests were moved ahead of scalar float mode, so that IEEE
2204    128-bit floating point on VSX systems ties with other vectors.  */
2205 
2206 static bool
rs6000_modes_tieable_p(machine_mode mode1,machine_mode mode2)2207 rs6000_modes_tieable_p (machine_mode mode1, machine_mode mode2)
2208 {
2209   if (mode1 == PTImode)
2210     return mode2 == PTImode;
2211   if (mode2 == PTImode)
2212     return false;
2213 
2214   if (ALTIVEC_OR_VSX_VECTOR_MODE (mode1))
2215     return ALTIVEC_OR_VSX_VECTOR_MODE (mode2);
2216   if (ALTIVEC_OR_VSX_VECTOR_MODE (mode2))
2217     return false;
2218 
2219   if (SCALAR_FLOAT_MODE_P (mode1))
2220     return SCALAR_FLOAT_MODE_P (mode2);
2221   if (SCALAR_FLOAT_MODE_P (mode2))
2222     return false;
2223 
2224   if (GET_MODE_CLASS (mode1) == MODE_CC)
2225     return GET_MODE_CLASS (mode2) == MODE_CC;
2226   if (GET_MODE_CLASS (mode2) == MODE_CC)
2227     return false;
2228 
2229   if (SPE_VECTOR_MODE (mode1))
2230     return SPE_VECTOR_MODE (mode2);
2231   if (SPE_VECTOR_MODE (mode2))
2232     return false;
2233 
2234   return true;
2235 }
2236 
2237 /* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED.  */
2238 
2239 static bool
rs6000_hard_regno_call_part_clobbered(unsigned int regno,machine_mode mode)2240 rs6000_hard_regno_call_part_clobbered (unsigned int regno, machine_mode mode)
2241 {
2242   if (TARGET_32BIT
2243       && TARGET_POWERPC64
2244       && GET_MODE_SIZE (mode) > 4
2245       && INT_REGNO_P (regno))
2246     return true;
2247 
2248   if (TARGET_VSX
2249       && FP_REGNO_P (regno)
2250       && GET_MODE_SIZE (mode) > 8
2251       && !FLOAT128_2REG_P (mode))
2252     return true;
2253 
2254   return false;
2255 }
2256 
2257 /* Print interesting facts about registers.  */
2258 static void
rs6000_debug_reg_print(int first_regno,int last_regno,const char * reg_name)2259 rs6000_debug_reg_print (int first_regno, int last_regno, const char *reg_name)
2260 {
2261   int r, m;
2262 
2263   for (r = first_regno; r <= last_regno; ++r)
2264     {
2265       const char *comma = "";
2266       int len;
2267 
2268       if (first_regno == last_regno)
2269 	fprintf (stderr, "%s:\t", reg_name);
2270       else
2271 	fprintf (stderr, "%s%d:\t", reg_name, r - first_regno);
2272 
2273       len = 8;
2274       for (m = 0; m < NUM_MACHINE_MODES; ++m)
2275 	if (rs6000_hard_regno_mode_ok_p[m][r] && rs6000_hard_regno_nregs[m][r])
2276 	  {
2277 	    if (len > 70)
2278 	      {
2279 		fprintf (stderr, ",\n\t");
2280 		len = 8;
2281 		comma = "";
2282 	      }
2283 
2284 	    if (rs6000_hard_regno_nregs[m][r] > 1)
2285 	      len += fprintf (stderr, "%s%s/%d", comma, GET_MODE_NAME (m),
2286 			     rs6000_hard_regno_nregs[m][r]);
2287 	    else
2288 	      len += fprintf (stderr, "%s%s", comma, GET_MODE_NAME (m));
2289 
2290 	    comma = ", ";
2291 	  }
2292 
2293       if (call_used_regs[r])
2294 	{
2295 	  if (len > 70)
2296 	    {
2297 	      fprintf (stderr, ",\n\t");
2298 	      len = 8;
2299 	      comma = "";
2300 	    }
2301 
2302 	  len += fprintf (stderr, "%s%s", comma, "call-used");
2303 	  comma = ", ";
2304 	}
2305 
2306       if (fixed_regs[r])
2307 	{
2308 	  if (len > 70)
2309 	    {
2310 	      fprintf (stderr, ",\n\t");
2311 	      len = 8;
2312 	      comma = "";
2313 	    }
2314 
2315 	  len += fprintf (stderr, "%s%s", comma, "fixed");
2316 	  comma = ", ";
2317 	}
2318 
2319       if (len > 70)
2320 	{
2321 	  fprintf (stderr, ",\n\t");
2322 	  comma = "";
2323 	}
2324 
2325       len += fprintf (stderr, "%sreg-class = %s", comma,
2326 		      reg_class_names[(int)rs6000_regno_regclass[r]]);
2327       comma = ", ";
2328 
2329       if (len > 70)
2330 	{
2331 	  fprintf (stderr, ",\n\t");
2332 	  comma = "";
2333 	}
2334 
2335       fprintf (stderr, "%sregno = %d\n", comma, r);
2336     }
2337 }
2338 
2339 static const char *
rs6000_debug_vector_unit(enum rs6000_vector v)2340 rs6000_debug_vector_unit (enum rs6000_vector v)
2341 {
2342   const char *ret;
2343 
2344   switch (v)
2345     {
2346     case VECTOR_NONE:	   ret = "none";      break;
2347     case VECTOR_ALTIVEC:   ret = "altivec";   break;
2348     case VECTOR_VSX:	   ret = "vsx";       break;
2349     case VECTOR_P8_VECTOR: ret = "p8_vector"; break;
2350     case VECTOR_PAIRED:	   ret = "paired";    break;
2351     case VECTOR_SPE:	   ret = "spe";       break;
2352     case VECTOR_OTHER:	   ret = "other";     break;
2353     default:		   ret = "unknown";   break;
2354     }
2355 
2356   return ret;
2357 }
2358 
2359 /* Inner function printing just the address mask for a particular reload
2360    register class.  */
2361 DEBUG_FUNCTION char *
rs6000_debug_addr_mask(addr_mask_type mask,bool keep_spaces)2362 rs6000_debug_addr_mask (addr_mask_type mask, bool keep_spaces)
2363 {
2364   static char ret[8];
2365   char *p = ret;
2366 
2367   if ((mask & RELOAD_REG_VALID) != 0)
2368     *p++ = 'v';
2369   else if (keep_spaces)
2370     *p++ = ' ';
2371 
2372   if ((mask & RELOAD_REG_MULTIPLE) != 0)
2373     *p++ = 'm';
2374   else if (keep_spaces)
2375     *p++ = ' ';
2376 
2377   if ((mask & RELOAD_REG_INDEXED) != 0)
2378     *p++ = 'i';
2379   else if (keep_spaces)
2380     *p++ = ' ';
2381 
2382   if ((mask & RELOAD_REG_QUAD_OFFSET) != 0)
2383     *p++ = 'O';
2384   else if ((mask & RELOAD_REG_OFFSET) != 0)
2385     *p++ = 'o';
2386   else if (keep_spaces)
2387     *p++ = ' ';
2388 
2389   if ((mask & RELOAD_REG_PRE_INCDEC) != 0)
2390     *p++ = '+';
2391   else if (keep_spaces)
2392     *p++ = ' ';
2393 
2394   if ((mask & RELOAD_REG_PRE_MODIFY) != 0)
2395     *p++ = '+';
2396   else if (keep_spaces)
2397     *p++ = ' ';
2398 
2399   if ((mask & RELOAD_REG_AND_M16) != 0)
2400     *p++ = '&';
2401   else if (keep_spaces)
2402     *p++ = ' ';
2403 
2404   *p = '\0';
2405 
2406   return ret;
2407 }
2408 
2409 /* Print the address masks in a human readble fashion.  */
2410 DEBUG_FUNCTION void
rs6000_debug_print_mode(ssize_t m)2411 rs6000_debug_print_mode (ssize_t m)
2412 {
2413   ssize_t rc;
2414   int spaces = 0;
2415   bool fuse_extra_p;
2416 
2417   fprintf (stderr, "Mode: %-5s", GET_MODE_NAME (m));
2418   for (rc = 0; rc < N_RELOAD_REG; rc++)
2419     fprintf (stderr, " %s: %s", reload_reg_map[rc].name,
2420 	     rs6000_debug_addr_mask (reg_addr[m].addr_mask[rc], true));
2421 
2422   if ((reg_addr[m].reload_store != CODE_FOR_nothing)
2423       || (reg_addr[m].reload_load != CODE_FOR_nothing))
2424     fprintf (stderr, "  Reload=%c%c",
2425 	     (reg_addr[m].reload_store != CODE_FOR_nothing) ? 's' : '*',
2426 	     (reg_addr[m].reload_load != CODE_FOR_nothing) ? 'l' : '*');
2427   else
2428     spaces += sizeof ("  Reload=sl") - 1;
2429 
2430   if (reg_addr[m].scalar_in_vmx_p)
2431     {
2432       fprintf (stderr, "%*s  Upper=y", spaces, "");
2433       spaces = 0;
2434     }
2435   else
2436     spaces += sizeof ("  Upper=y") - 1;
2437 
2438   fuse_extra_p = ((reg_addr[m].fusion_gpr_ld != CODE_FOR_nothing)
2439 		  || reg_addr[m].fused_toc);
2440   if (!fuse_extra_p)
2441     {
2442       for (rc = 0; rc < N_RELOAD_REG; rc++)
2443 	{
2444 	  if (rc != RELOAD_REG_ANY)
2445 	    {
2446 	      if (reg_addr[m].fusion_addi_ld[rc]     != CODE_FOR_nothing
2447 		  || reg_addr[m].fusion_addi_ld[rc]  != CODE_FOR_nothing
2448 		  || reg_addr[m].fusion_addi_st[rc]  != CODE_FOR_nothing
2449 		  || reg_addr[m].fusion_addis_ld[rc] != CODE_FOR_nothing
2450 		  || reg_addr[m].fusion_addis_st[rc] != CODE_FOR_nothing)
2451 		{
2452 		  fuse_extra_p = true;
2453 		  break;
2454 		}
2455 	    }
2456 	}
2457     }
2458 
2459   if (fuse_extra_p)
2460     {
2461       fprintf (stderr, "%*s  Fuse:", spaces, "");
2462       spaces = 0;
2463 
2464       for (rc = 0; rc < N_RELOAD_REG; rc++)
2465 	{
2466 	  if (rc != RELOAD_REG_ANY)
2467 	    {
2468 	      char load, store;
2469 
2470 	      if (reg_addr[m].fusion_addis_ld[rc] != CODE_FOR_nothing)
2471 		load = 'l';
2472 	      else if (reg_addr[m].fusion_addi_ld[rc] != CODE_FOR_nothing)
2473 		load = 'L';
2474 	      else
2475 		load = '-';
2476 
2477 	      if (reg_addr[m].fusion_addis_st[rc] != CODE_FOR_nothing)
2478 		store = 's';
2479 	      else if (reg_addr[m].fusion_addi_st[rc] != CODE_FOR_nothing)
2480 		store = 'S';
2481 	      else
2482 		store = '-';
2483 
2484 	      if (load == '-' && store == '-')
2485 		spaces += 5;
2486 	      else
2487 		{
2488 		  fprintf (stderr, "%*s%c=%c%c", (spaces + 1), "",
2489 			   reload_reg_map[rc].name[0], load, store);
2490 		  spaces = 0;
2491 		}
2492 	    }
2493 	}
2494 
2495       if (reg_addr[m].fusion_gpr_ld != CODE_FOR_nothing)
2496 	{
2497 	  fprintf (stderr, "%*sP8gpr", (spaces + 1), "");
2498 	  spaces = 0;
2499 	}
2500       else
2501 	spaces += sizeof (" P8gpr") - 1;
2502 
2503       if (reg_addr[m].fused_toc)
2504 	{
2505 	  fprintf (stderr, "%*sToc", (spaces + 1), "");
2506 	  spaces = 0;
2507 	}
2508       else
2509 	spaces += sizeof (" Toc") - 1;
2510     }
2511   else
2512     spaces += sizeof ("  Fuse: G=ls F=ls v=ls P8gpr Toc") - 1;
2513 
2514   if (rs6000_vector_unit[m] != VECTOR_NONE
2515       || rs6000_vector_mem[m] != VECTOR_NONE)
2516     {
2517       fprintf (stderr, "%*s  vector: arith=%-10s mem=%s",
2518 	       spaces, "",
2519 	       rs6000_debug_vector_unit (rs6000_vector_unit[m]),
2520 	       rs6000_debug_vector_unit (rs6000_vector_mem[m]));
2521     }
2522 
2523   fputs ("\n", stderr);
2524 }
2525 
2526 #define DEBUG_FMT_ID "%-32s= "
2527 #define DEBUG_FMT_D   DEBUG_FMT_ID "%d\n"
2528 #define DEBUG_FMT_WX  DEBUG_FMT_ID "%#.12" HOST_WIDE_INT_PRINT "x: "
2529 #define DEBUG_FMT_S   DEBUG_FMT_ID "%s\n"
2530 
2531 /* Print various interesting information with -mdebug=reg.  */
2532 static void
rs6000_debug_reg_global(void)2533 rs6000_debug_reg_global (void)
2534 {
2535   static const char *const tf[2] = { "false", "true" };
2536   const char *nl = (const char *)0;
2537   int m;
2538   size_t m1, m2, v;
2539   char costly_num[20];
2540   char nop_num[20];
2541   char flags_buffer[40];
2542   const char *costly_str;
2543   const char *nop_str;
2544   const char *trace_str;
2545   const char *abi_str;
2546   const char *cmodel_str;
2547   struct cl_target_option cl_opts;
2548 
2549   /* Modes we want tieable information on.  */
2550   static const machine_mode print_tieable_modes[] = {
2551     QImode,
2552     HImode,
2553     SImode,
2554     DImode,
2555     TImode,
2556     PTImode,
2557     SFmode,
2558     DFmode,
2559     TFmode,
2560     IFmode,
2561     KFmode,
2562     SDmode,
2563     DDmode,
2564     TDmode,
2565     V8QImode,
2566     V4HImode,
2567     V2SImode,
2568     V16QImode,
2569     V8HImode,
2570     V4SImode,
2571     V2DImode,
2572     V1TImode,
2573     V32QImode,
2574     V16HImode,
2575     V8SImode,
2576     V4DImode,
2577     V2TImode,
2578     V2SFmode,
2579     V4SFmode,
2580     V2DFmode,
2581     V8SFmode,
2582     V4DFmode,
2583     CCmode,
2584     CCUNSmode,
2585     CCEQmode,
2586   };
2587 
2588   /* Virtual regs we are interested in.  */
2589   const static struct {
2590     int regno;			/* register number.  */
2591     const char *name;		/* register name.  */
2592   } virtual_regs[] = {
2593     { STACK_POINTER_REGNUM,			"stack pointer:" },
2594     { TOC_REGNUM,				"toc:          " },
2595     { STATIC_CHAIN_REGNUM,			"static chain: " },
2596     { RS6000_PIC_OFFSET_TABLE_REGNUM,		"pic offset:   " },
2597     { HARD_FRAME_POINTER_REGNUM,		"hard frame:   " },
2598     { ARG_POINTER_REGNUM,			"arg pointer:  " },
2599     { FRAME_POINTER_REGNUM,			"frame pointer:" },
2600     { FIRST_PSEUDO_REGISTER,			"first pseudo: " },
2601     { FIRST_VIRTUAL_REGISTER,			"first virtual:" },
2602     { VIRTUAL_INCOMING_ARGS_REGNUM,		"incoming_args:" },
2603     { VIRTUAL_STACK_VARS_REGNUM,		"stack_vars:   " },
2604     { VIRTUAL_STACK_DYNAMIC_REGNUM,		"stack_dynamic:" },
2605     { VIRTUAL_OUTGOING_ARGS_REGNUM,		"outgoing_args:" },
2606     { VIRTUAL_CFA_REGNUM,			"cfa (frame):  " },
2607     { VIRTUAL_PREFERRED_STACK_BOUNDARY_REGNUM,	"stack boundry:" },
2608     { LAST_VIRTUAL_REGISTER,			"last virtual: " },
2609   };
2610 
2611   fputs ("\nHard register information:\n", stderr);
2612   rs6000_debug_reg_print (FIRST_GPR_REGNO, LAST_GPR_REGNO, "gr");
2613   rs6000_debug_reg_print (FIRST_FPR_REGNO, LAST_FPR_REGNO, "fp");
2614   rs6000_debug_reg_print (FIRST_ALTIVEC_REGNO,
2615 			  LAST_ALTIVEC_REGNO,
2616 			  "vs");
2617   rs6000_debug_reg_print (LR_REGNO, LR_REGNO, "lr");
2618   rs6000_debug_reg_print (CTR_REGNO, CTR_REGNO, "ctr");
2619   rs6000_debug_reg_print (CR0_REGNO, CR7_REGNO, "cr");
2620   rs6000_debug_reg_print (CA_REGNO, CA_REGNO, "ca");
2621   rs6000_debug_reg_print (VRSAVE_REGNO, VRSAVE_REGNO, "vrsave");
2622   rs6000_debug_reg_print (VSCR_REGNO, VSCR_REGNO, "vscr");
2623   rs6000_debug_reg_print (SPE_ACC_REGNO, SPE_ACC_REGNO, "spe_a");
2624   rs6000_debug_reg_print (SPEFSCR_REGNO, SPEFSCR_REGNO, "spe_f");
2625 
2626   fputs ("\nVirtual/stack/frame registers:\n", stderr);
2627   for (v = 0; v < ARRAY_SIZE (virtual_regs); v++)
2628     fprintf (stderr, "%s regno = %3d\n", virtual_regs[v].name, virtual_regs[v].regno);
2629 
2630   fprintf (stderr,
2631 	   "\n"
2632 	   "d  reg_class = %s\n"
2633 	   "f  reg_class = %s\n"
2634 	   "v  reg_class = %s\n"
2635 	   "wa reg_class = %s\n"
2636 	   "wb reg_class = %s\n"
2637 	   "wd reg_class = %s\n"
2638 	   "we reg_class = %s\n"
2639 	   "wf reg_class = %s\n"
2640 	   "wg reg_class = %s\n"
2641 	   "wh reg_class = %s\n"
2642 	   "wi reg_class = %s\n"
2643 	   "wj reg_class = %s\n"
2644 	   "wk reg_class = %s\n"
2645 	   "wl reg_class = %s\n"
2646 	   "wm reg_class = %s\n"
2647 	   "wo reg_class = %s\n"
2648 	   "wp reg_class = %s\n"
2649 	   "wq reg_class = %s\n"
2650 	   "wr reg_class = %s\n"
2651 	   "ws reg_class = %s\n"
2652 	   "wt reg_class = %s\n"
2653 	   "wu reg_class = %s\n"
2654 	   "wv reg_class = %s\n"
2655 	   "ww reg_class = %s\n"
2656 	   "wx reg_class = %s\n"
2657 	   "wy reg_class = %s\n"
2658 	   "wz reg_class = %s\n"
2659 	   "wA reg_class = %s\n"
2660 	   "wH reg_class = %s\n"
2661 	   "wI reg_class = %s\n"
2662 	   "wJ reg_class = %s\n"
2663 	   "wK reg_class = %s\n"
2664 	   "\n",
2665 	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_d]],
2666 	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_f]],
2667 	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_v]],
2668 	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wa]],
2669 	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wb]],
2670 	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wd]],
2671 	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_we]],
2672 	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wf]],
2673 	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wg]],
2674 	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wh]],
2675 	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wi]],
2676 	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wj]],
2677 	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wk]],
2678 	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wl]],
2679 	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wm]],
2680 	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wo]],
2681 	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wp]],
2682 	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wq]],
2683 	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wr]],
2684 	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_ws]],
2685 	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wt]],
2686 	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wu]],
2687 	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wv]],
2688 	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_ww]],
2689 	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wx]],
2690 	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wy]],
2691 	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wz]],
2692 	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wA]],
2693 	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wH]],
2694 	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wI]],
2695 	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wJ]],
2696 	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wK]]);
2697 
2698   nl = "\n";
2699   for (m = 0; m < NUM_MACHINE_MODES; ++m)
2700     rs6000_debug_print_mode (m);
2701 
2702   fputs ("\n", stderr);
2703 
2704   for (m1 = 0; m1 < ARRAY_SIZE (print_tieable_modes); m1++)
2705     {
2706       machine_mode mode1 = print_tieable_modes[m1];
2707       bool first_time = true;
2708 
2709       nl = (const char *)0;
2710       for (m2 = 0; m2 < ARRAY_SIZE (print_tieable_modes); m2++)
2711 	{
2712 	  machine_mode mode2 = print_tieable_modes[m2];
2713 	  if (mode1 != mode2 && rs6000_modes_tieable_p (mode1, mode2))
2714 	    {
2715 	      if (first_time)
2716 		{
2717 		  fprintf (stderr, "Tieable modes %s:", GET_MODE_NAME (mode1));
2718 		  nl = "\n";
2719 		  first_time = false;
2720 		}
2721 
2722 	      fprintf (stderr, " %s", GET_MODE_NAME (mode2));
2723 	    }
2724 	}
2725 
2726       if (!first_time)
2727 	fputs ("\n", stderr);
2728     }
2729 
2730   if (nl)
2731     fputs (nl, stderr);
2732 
2733   if (rs6000_recip_control)
2734     {
2735       fprintf (stderr, "\nReciprocal mask = 0x%x\n", rs6000_recip_control);
2736 
2737       for (m = 0; m < NUM_MACHINE_MODES; ++m)
2738 	if (rs6000_recip_bits[m])
2739 	  {
2740 	    fprintf (stderr,
2741 		     "Reciprocal estimate mode: %-5s divide: %s rsqrt: %s\n",
2742 		     GET_MODE_NAME (m),
2743 		     (RS6000_RECIP_AUTO_RE_P (m)
2744 		      ? "auto"
2745 		      : (RS6000_RECIP_HAVE_RE_P (m) ? "have" : "none")),
2746 		     (RS6000_RECIP_AUTO_RSQRTE_P (m)
2747 		      ? "auto"
2748 		      : (RS6000_RECIP_HAVE_RSQRTE_P (m) ? "have" : "none")));
2749 	  }
2750 
2751       fputs ("\n", stderr);
2752     }
2753 
2754   if (rs6000_cpu_index >= 0)
2755     {
2756       const char *name = processor_target_table[rs6000_cpu_index].name;
2757       HOST_WIDE_INT flags
2758 	= processor_target_table[rs6000_cpu_index].target_enable;
2759 
2760       sprintf (flags_buffer, "-mcpu=%s flags", name);
2761       rs6000_print_isa_options (stderr, 0, flags_buffer, flags);
2762     }
2763   else
2764     fprintf (stderr, DEBUG_FMT_S, "cpu", "<none>");
2765 
2766   if (rs6000_tune_index >= 0)
2767     {
2768       const char *name = processor_target_table[rs6000_tune_index].name;
2769       HOST_WIDE_INT flags
2770 	= processor_target_table[rs6000_tune_index].target_enable;
2771 
2772       sprintf (flags_buffer, "-mtune=%s flags", name);
2773       rs6000_print_isa_options (stderr, 0, flags_buffer, flags);
2774     }
2775   else
2776     fprintf (stderr, DEBUG_FMT_S, "tune", "<none>");
2777 
2778   cl_target_option_save (&cl_opts, &global_options);
2779   rs6000_print_isa_options (stderr, 0, "rs6000_isa_flags",
2780 			    rs6000_isa_flags);
2781 
2782   rs6000_print_isa_options (stderr, 0, "rs6000_isa_flags_explicit",
2783 			    rs6000_isa_flags_explicit);
2784 
2785   rs6000_print_builtin_options (stderr, 0, "rs6000_builtin_mask",
2786 				rs6000_builtin_mask);
2787 
2788   rs6000_print_isa_options (stderr, 0, "TARGET_DEFAULT", TARGET_DEFAULT);
2789 
2790   fprintf (stderr, DEBUG_FMT_S, "--with-cpu default",
2791 	   OPTION_TARGET_CPU_DEFAULT ? OPTION_TARGET_CPU_DEFAULT : "<none>");
2792 
2793   switch (rs6000_sched_costly_dep)
2794     {
2795     case max_dep_latency:
2796       costly_str = "max_dep_latency";
2797       break;
2798 
2799     case no_dep_costly:
2800       costly_str = "no_dep_costly";
2801       break;
2802 
2803     case all_deps_costly:
2804       costly_str = "all_deps_costly";
2805       break;
2806 
2807     case true_store_to_load_dep_costly:
2808       costly_str = "true_store_to_load_dep_costly";
2809       break;
2810 
2811     case store_to_load_dep_costly:
2812       costly_str = "store_to_load_dep_costly";
2813       break;
2814 
2815     default:
2816       costly_str = costly_num;
2817       sprintf (costly_num, "%d", (int)rs6000_sched_costly_dep);
2818       break;
2819     }
2820 
2821   fprintf (stderr, DEBUG_FMT_S, "sched_costly_dep", costly_str);
2822 
2823   switch (rs6000_sched_insert_nops)
2824     {
2825     case sched_finish_regroup_exact:
2826       nop_str = "sched_finish_regroup_exact";
2827       break;
2828 
2829     case sched_finish_pad_groups:
2830       nop_str = "sched_finish_pad_groups";
2831       break;
2832 
2833     case sched_finish_none:
2834       nop_str = "sched_finish_none";
2835       break;
2836 
2837     default:
2838       nop_str = nop_num;
2839       sprintf (nop_num, "%d", (int)rs6000_sched_insert_nops);
2840       break;
2841     }
2842 
2843   fprintf (stderr, DEBUG_FMT_S, "sched_insert_nops", nop_str);
2844 
2845   switch (rs6000_sdata)
2846     {
2847     default:
2848     case SDATA_NONE:
2849       break;
2850 
2851     case SDATA_DATA:
2852       fprintf (stderr, DEBUG_FMT_S, "sdata", "data");
2853       break;
2854 
2855     case SDATA_SYSV:
2856       fprintf (stderr, DEBUG_FMT_S, "sdata", "sysv");
2857       break;
2858 
2859     case SDATA_EABI:
2860       fprintf (stderr, DEBUG_FMT_S, "sdata", "eabi");
2861       break;
2862 
2863     }
2864 
2865   switch (rs6000_traceback)
2866     {
2867     case traceback_default:	trace_str = "default";	break;
2868     case traceback_none:	trace_str = "none";	break;
2869     case traceback_part:	trace_str = "part";	break;
2870     case traceback_full:	trace_str = "full";	break;
2871     default:			trace_str = "unknown";	break;
2872     }
2873 
2874   fprintf (stderr, DEBUG_FMT_S, "traceback", trace_str);
2875 
2876   switch (rs6000_current_cmodel)
2877     {
2878     case CMODEL_SMALL:	cmodel_str = "small";	break;
2879     case CMODEL_MEDIUM:	cmodel_str = "medium";	break;
2880     case CMODEL_LARGE:	cmodel_str = "large";	break;
2881     default:		cmodel_str = "unknown";	break;
2882     }
2883 
2884   fprintf (stderr, DEBUG_FMT_S, "cmodel", cmodel_str);
2885 
2886   switch (rs6000_current_abi)
2887     {
2888     case ABI_NONE:	abi_str = "none";	break;
2889     case ABI_AIX:	abi_str = "aix";	break;
2890     case ABI_ELFv2:	abi_str = "ELFv2";	break;
2891     case ABI_V4:	abi_str = "V4";		break;
2892     case ABI_DARWIN:	abi_str = "darwin";	break;
2893     default:		abi_str = "unknown";	break;
2894     }
2895 
2896   fprintf (stderr, DEBUG_FMT_S, "abi", abi_str);
2897 
2898   if (rs6000_altivec_abi)
2899     fprintf (stderr, DEBUG_FMT_S, "altivec_abi", "true");
2900 
2901   if (rs6000_spe_abi)
2902     fprintf (stderr, DEBUG_FMT_S, "spe_abi", "true");
2903 
2904   if (rs6000_darwin64_abi)
2905     fprintf (stderr, DEBUG_FMT_S, "darwin64_abi", "true");
2906 
2907   if (rs6000_float_gprs)
2908     fprintf (stderr, DEBUG_FMT_S, "float_gprs", "true");
2909 
2910   fprintf (stderr, DEBUG_FMT_S, "fprs",
2911 	   (TARGET_FPRS ? "true" : "false"));
2912 
2913   fprintf (stderr, DEBUG_FMT_S, "single_float",
2914 	   (TARGET_SINGLE_FLOAT ? "true" : "false"));
2915 
2916   fprintf (stderr, DEBUG_FMT_S, "double_float",
2917 	   (TARGET_DOUBLE_FLOAT ? "true" : "false"));
2918 
2919   fprintf (stderr, DEBUG_FMT_S, "soft_float",
2920 	   (TARGET_SOFT_FLOAT ? "true" : "false"));
2921 
2922   fprintf (stderr, DEBUG_FMT_S, "e500_single",
2923 	   (TARGET_E500_SINGLE ? "true" : "false"));
2924 
2925   fprintf (stderr, DEBUG_FMT_S, "e500_double",
2926 	   (TARGET_E500_DOUBLE ? "true" : "false"));
2927 
2928   if (TARGET_LINK_STACK)
2929     fprintf (stderr, DEBUG_FMT_S, "link_stack", "true");
2930 
2931   fprintf (stderr, DEBUG_FMT_S, "lra", TARGET_LRA ? "true" : "false");
2932 
2933   if (TARGET_P8_FUSION)
2934     {
2935       char options[80];
2936 
2937       strcpy (options, (TARGET_P9_FUSION) ? "power9" : "power8");
2938       if (TARGET_TOC_FUSION)
2939 	strcat (options, ", toc");
2940 
2941       if (TARGET_P8_FUSION_SIGN)
2942 	strcat (options, ", sign");
2943 
2944       fprintf (stderr, DEBUG_FMT_S, "fusion", options);
2945     }
2946 
2947   fprintf (stderr, DEBUG_FMT_S, "plt-format",
2948 	   TARGET_SECURE_PLT ? "secure" : "bss");
2949   fprintf (stderr, DEBUG_FMT_S, "struct-return",
2950 	   aix_struct_return ? "aix" : "sysv");
2951   fprintf (stderr, DEBUG_FMT_S, "always_hint", tf[!!rs6000_always_hint]);
2952   fprintf (stderr, DEBUG_FMT_S, "sched_groups", tf[!!rs6000_sched_groups]);
2953   fprintf (stderr, DEBUG_FMT_S, "align_branch",
2954 	   tf[!!rs6000_align_branch_targets]);
2955   fprintf (stderr, DEBUG_FMT_D, "tls_size", rs6000_tls_size);
2956   fprintf (stderr, DEBUG_FMT_D, "long_double_size",
2957 	   rs6000_long_double_type_size);
2958   fprintf (stderr, DEBUG_FMT_D, "sched_restricted_insns_priority",
2959 	   (int)rs6000_sched_restricted_insns_priority);
2960   fprintf (stderr, DEBUG_FMT_D, "Number of standard builtins",
2961 	   (int)END_BUILTINS);
2962   fprintf (stderr, DEBUG_FMT_D, "Number of rs6000 builtins",
2963 	   (int)RS6000_BUILTIN_COUNT);
2964 
2965   fprintf (stderr, DEBUG_FMT_D, "Enable float128 on VSX",
2966 	   (int)TARGET_FLOAT128_ENABLE_TYPE);
2967 
2968   if (TARGET_VSX)
2969     fprintf (stderr, DEBUG_FMT_D, "VSX easy 64-bit scalar element",
2970 	     (int)VECTOR_ELEMENT_SCALAR_64BIT);
2971 
2972   if (TARGET_DIRECT_MOVE_128)
2973     fprintf (stderr, DEBUG_FMT_D, "VSX easy 64-bit mfvsrld element",
2974 	     (int)VECTOR_ELEMENT_MFVSRLD_64BIT);
2975 }
2976 
2977 
2978 /* Update the addr mask bits in reg_addr to help secondary reload and go if
2979    legitimate address support to figure out the appropriate addressing to
2980    use.  */
2981 
2982 static void
rs6000_setup_reg_addr_masks(void)2983 rs6000_setup_reg_addr_masks (void)
2984 {
2985   ssize_t rc, reg, m, nregs;
2986   addr_mask_type any_addr_mask, addr_mask;
2987 
2988   for (m = 0; m < NUM_MACHINE_MODES; ++m)
2989     {
2990       machine_mode m2 = (machine_mode) m;
2991       bool complex_p = false;
2992       bool small_int_p = (m2 == QImode || m2 == HImode || m2 == SImode);
2993       size_t msize;
2994 
2995       if (COMPLEX_MODE_P (m2))
2996 	{
2997 	  complex_p = true;
2998 	  m2 = GET_MODE_INNER (m2);
2999 	}
3000 
3001       msize = GET_MODE_SIZE (m2);
3002 
3003       /* SDmode is special in that we want to access it only via REG+REG
3004 	 addressing on power7 and above, since we want to use the LFIWZX and
3005 	 STFIWZX instructions to load it.  */
3006       bool indexed_only_p = (m == SDmode && TARGET_NO_SDMODE_STACK);
3007 
3008       any_addr_mask = 0;
3009       for (rc = FIRST_RELOAD_REG_CLASS; rc <= LAST_RELOAD_REG_CLASS; rc++)
3010 	{
3011 	  addr_mask = 0;
3012 	  reg = reload_reg_map[rc].reg;
3013 
3014 	  /* Can mode values go in the GPR/FPR/Altivec registers?  */
3015 	  if (reg >= 0 && rs6000_hard_regno_mode_ok_p[m][reg])
3016 	    {
3017 	      bool small_int_vsx_p = (small_int_p
3018 				      && (rc == RELOAD_REG_FPR
3019 					  || rc == RELOAD_REG_VMX));
3020 
3021 	      nregs = rs6000_hard_regno_nregs[m][reg];
3022 	      addr_mask |= RELOAD_REG_VALID;
3023 
3024 	      /* Indicate if the mode takes more than 1 physical register.  If
3025 		 it takes a single register, indicate it can do REG+REG
3026 		 addressing.  Small integers in VSX registers can only do
3027 		 REG+REG addressing.  */
3028 	      if (small_int_vsx_p)
3029 		addr_mask |= RELOAD_REG_INDEXED;
3030 	      else if (nregs > 1 || m == BLKmode || complex_p)
3031 		addr_mask |= RELOAD_REG_MULTIPLE;
3032 	      else
3033 		addr_mask |= RELOAD_REG_INDEXED;
3034 
3035 	      /* Figure out if we can do PRE_INC, PRE_DEC, or PRE_MODIFY
3036 		 addressing.  Restrict addressing on SPE for 64-bit types
3037 		 because of the SUBREG hackery used to address 64-bit floats in
3038 		 '32-bit' GPRs.  If we allow scalars into Altivec registers,
3039 		 don't allow PRE_INC, PRE_DEC, or PRE_MODIFY.  */
3040 
3041 	      if (TARGET_UPDATE
3042 		  && (rc == RELOAD_REG_GPR || rc == RELOAD_REG_FPR)
3043 		  && msize <= 8
3044 		  && !VECTOR_MODE_P (m2)
3045 		  && !FLOAT128_VECTOR_P (m2)
3046 		  && !complex_p
3047 		  && !small_int_vsx_p
3048 		  && (m2 != DFmode || !TARGET_UPPER_REGS_DF)
3049 		  && (m2 != SFmode || !TARGET_UPPER_REGS_SF)
3050 		  && !(TARGET_E500_DOUBLE && msize == 8))
3051 		{
3052 		  addr_mask |= RELOAD_REG_PRE_INCDEC;
3053 
3054 		  /* PRE_MODIFY is more restricted than PRE_INC/PRE_DEC in that
3055 		     we don't allow PRE_MODIFY for some multi-register
3056 		     operations.  */
3057 		  switch (m)
3058 		    {
3059 		    default:
3060 		      addr_mask |= RELOAD_REG_PRE_MODIFY;
3061 		      break;
3062 
3063 		    case E_DImode:
3064 		      if (TARGET_POWERPC64)
3065 			addr_mask |= RELOAD_REG_PRE_MODIFY;
3066 		      break;
3067 
3068 		    case E_DFmode:
3069 		    case E_DDmode:
3070 		      if (TARGET_DF_INSN)
3071 			addr_mask |= RELOAD_REG_PRE_MODIFY;
3072 		      break;
3073 		    }
3074 		}
3075 	    }
3076 
3077 	  /* GPR and FPR registers can do REG+OFFSET addressing, except
3078 	     possibly for SDmode.  ISA 3.0 (i.e. power9) adds D-form addressing
3079 	     for 64-bit scalars and 32-bit SFmode to altivec registers.  */
3080 	  if ((addr_mask != 0) && !indexed_only_p
3081 	      && msize <= 8
3082 	      && (rc == RELOAD_REG_GPR
3083 		  || ((msize == 8 || m2 == SFmode)
3084 		      && (rc == RELOAD_REG_FPR
3085 			  || (rc == RELOAD_REG_VMX
3086 			      && TARGET_P9_DFORM_SCALAR)))))
3087 	    addr_mask |= RELOAD_REG_OFFSET;
3088 
3089 	  /* VSX registers can do REG+OFFSET addresssing if ISA 3.0
3090 	     instructions are enabled.  The offset for 128-bit VSX registers is
3091 	     only 12-bits.  While GPRs can handle the full offset range, VSX
3092 	     registers can only handle the restricted range.  */
3093 	  else if ((addr_mask != 0) && !indexed_only_p
3094 		   && msize == 16 && TARGET_P9_DFORM_VECTOR
3095 		   && (ALTIVEC_OR_VSX_VECTOR_MODE (m2)
3096 		       || (m2 == TImode && TARGET_VSX_TIMODE)))
3097 	    {
3098 	      addr_mask |= RELOAD_REG_OFFSET;
3099 	      if (rc == RELOAD_REG_FPR || rc == RELOAD_REG_VMX)
3100 		addr_mask |= RELOAD_REG_QUAD_OFFSET;
3101 	    }
3102 
3103 	  /* VMX registers can do (REG & -16) and ((REG+REG) & -16)
3104 	     addressing on 128-bit types.  */
3105 	  if (rc == RELOAD_REG_VMX && msize == 16
3106 	      && (addr_mask & RELOAD_REG_VALID) != 0)
3107 	    addr_mask |= RELOAD_REG_AND_M16;
3108 
3109 	  reg_addr[m].addr_mask[rc] = addr_mask;
3110 	  any_addr_mask |= addr_mask;
3111 	}
3112 
3113       reg_addr[m].addr_mask[RELOAD_REG_ANY] = any_addr_mask;
3114     }
3115 }
3116 
3117 
3118 /* Initialize the various global tables that are based on register size.  */
3119 static void
rs6000_init_hard_regno_mode_ok(bool global_init_p)3120 rs6000_init_hard_regno_mode_ok (bool global_init_p)
3121 {
3122   ssize_t r, m, c;
3123   int align64;
3124   int align32;
3125 
3126   /* Precalculate REGNO_REG_CLASS.  */
3127   rs6000_regno_regclass[0] = GENERAL_REGS;
3128   for (r = 1; r < 32; ++r)
3129     rs6000_regno_regclass[r] = BASE_REGS;
3130 
3131   for (r = 32; r < 64; ++r)
3132     rs6000_regno_regclass[r] = FLOAT_REGS;
3133 
3134   for (r = 64; r < FIRST_PSEUDO_REGISTER; ++r)
3135     rs6000_regno_regclass[r] = NO_REGS;
3136 
3137   for (r = FIRST_ALTIVEC_REGNO; r <= LAST_ALTIVEC_REGNO; ++r)
3138     rs6000_regno_regclass[r] = ALTIVEC_REGS;
3139 
3140   rs6000_regno_regclass[CR0_REGNO] = CR0_REGS;
3141   for (r = CR1_REGNO; r <= CR7_REGNO; ++r)
3142     rs6000_regno_regclass[r] = CR_REGS;
3143 
3144   rs6000_regno_regclass[LR_REGNO] = LINK_REGS;
3145   rs6000_regno_regclass[CTR_REGNO] = CTR_REGS;
3146   rs6000_regno_regclass[CA_REGNO] = NO_REGS;
3147   rs6000_regno_regclass[VRSAVE_REGNO] = VRSAVE_REGS;
3148   rs6000_regno_regclass[VSCR_REGNO] = VRSAVE_REGS;
3149   rs6000_regno_regclass[SPE_ACC_REGNO] = SPE_ACC_REGS;
3150   rs6000_regno_regclass[SPEFSCR_REGNO] = SPEFSCR_REGS;
3151   rs6000_regno_regclass[TFHAR_REGNO] = SPR_REGS;
3152   rs6000_regno_regclass[TFIAR_REGNO] = SPR_REGS;
3153   rs6000_regno_regclass[TEXASR_REGNO] = SPR_REGS;
3154   rs6000_regno_regclass[ARG_POINTER_REGNUM] = BASE_REGS;
3155   rs6000_regno_regclass[FRAME_POINTER_REGNUM] = BASE_REGS;
3156 
3157   /* Precalculate register class to simpler reload register class.  We don't
3158      need all of the register classes that are combinations of different
3159      classes, just the simple ones that have constraint letters.  */
3160   for (c = 0; c < N_REG_CLASSES; c++)
3161     reg_class_to_reg_type[c] = NO_REG_TYPE;
3162 
3163   reg_class_to_reg_type[(int)GENERAL_REGS] = GPR_REG_TYPE;
3164   reg_class_to_reg_type[(int)BASE_REGS] = GPR_REG_TYPE;
3165   reg_class_to_reg_type[(int)VSX_REGS] = VSX_REG_TYPE;
3166   reg_class_to_reg_type[(int)VRSAVE_REGS] = SPR_REG_TYPE;
3167   reg_class_to_reg_type[(int)VSCR_REGS] = SPR_REG_TYPE;
3168   reg_class_to_reg_type[(int)LINK_REGS] = SPR_REG_TYPE;
3169   reg_class_to_reg_type[(int)CTR_REGS] = SPR_REG_TYPE;
3170   reg_class_to_reg_type[(int)LINK_OR_CTR_REGS] = SPR_REG_TYPE;
3171   reg_class_to_reg_type[(int)CR_REGS] = CR_REG_TYPE;
3172   reg_class_to_reg_type[(int)CR0_REGS] = CR_REG_TYPE;
3173   reg_class_to_reg_type[(int)SPE_ACC_REGS] = SPE_ACC_TYPE;
3174   reg_class_to_reg_type[(int)SPEFSCR_REGS] = SPEFSCR_REG_TYPE;
3175 
3176   if (TARGET_VSX)
3177     {
3178       reg_class_to_reg_type[(int)FLOAT_REGS] = VSX_REG_TYPE;
3179       reg_class_to_reg_type[(int)ALTIVEC_REGS] = VSX_REG_TYPE;
3180     }
3181   else
3182     {
3183       reg_class_to_reg_type[(int)FLOAT_REGS] = FPR_REG_TYPE;
3184       reg_class_to_reg_type[(int)ALTIVEC_REGS] = ALTIVEC_REG_TYPE;
3185     }
3186 
3187   /* Precalculate the valid memory formats as well as the vector information,
3188      this must be set up before the rs6000_hard_regno_nregs_internal calls
3189      below.  */
3190   gcc_assert ((int)VECTOR_NONE == 0);
3191   memset ((void *) &rs6000_vector_unit[0], '\0', sizeof (rs6000_vector_unit));
3192   memset ((void *) &rs6000_vector_mem[0], '\0', sizeof (rs6000_vector_unit));
3193 
3194   gcc_assert ((int)CODE_FOR_nothing == 0);
3195   memset ((void *) &reg_addr[0], '\0', sizeof (reg_addr));
3196 
3197   gcc_assert ((int)NO_REGS == 0);
3198   memset ((void *) &rs6000_constraints[0], '\0', sizeof (rs6000_constraints));
3199 
3200   /* The VSX hardware allows native alignment for vectors, but control whether the compiler
3201      believes it can use native alignment or still uses 128-bit alignment.  */
3202   if (TARGET_VSX && !TARGET_VSX_ALIGN_128)
3203     {
3204       align64 = 64;
3205       align32 = 32;
3206     }
3207   else
3208     {
3209       align64 = 128;
3210       align32 = 128;
3211     }
3212 
3213   /* KF mode (IEEE 128-bit in VSX registers).  We do not have arithmetic, so
3214      only set the memory modes.  Include TFmode if -mabi=ieeelongdouble.  */
3215   if (TARGET_FLOAT128_TYPE)
3216     {
3217       rs6000_vector_mem[KFmode] = VECTOR_VSX;
3218       rs6000_vector_align[KFmode] = 128;
3219 
3220       if (FLOAT128_IEEE_P (TFmode))
3221 	{
3222 	  rs6000_vector_mem[TFmode] = VECTOR_VSX;
3223 	  rs6000_vector_align[TFmode] = 128;
3224 	}
3225     }
3226 
3227   /* V2DF mode, VSX only.  */
3228   if (TARGET_VSX)
3229     {
3230       rs6000_vector_unit[V2DFmode] = VECTOR_VSX;
3231       rs6000_vector_mem[V2DFmode] = VECTOR_VSX;
3232       rs6000_vector_align[V2DFmode] = align64;
3233     }
3234 
3235   /* V4SF mode, either VSX or Altivec.  */
3236   if (TARGET_VSX)
3237     {
3238       rs6000_vector_unit[V4SFmode] = VECTOR_VSX;
3239       rs6000_vector_mem[V4SFmode] = VECTOR_VSX;
3240       rs6000_vector_align[V4SFmode] = align32;
3241     }
3242   else if (TARGET_ALTIVEC)
3243     {
3244       rs6000_vector_unit[V4SFmode] = VECTOR_ALTIVEC;
3245       rs6000_vector_mem[V4SFmode] = VECTOR_ALTIVEC;
3246       rs6000_vector_align[V4SFmode] = align32;
3247     }
3248 
3249   /* V16QImode, V8HImode, V4SImode are Altivec only, but possibly do VSX loads
3250      and stores. */
3251   if (TARGET_ALTIVEC)
3252     {
3253       rs6000_vector_unit[V4SImode] = VECTOR_ALTIVEC;
3254       rs6000_vector_unit[V8HImode] = VECTOR_ALTIVEC;
3255       rs6000_vector_unit[V16QImode] = VECTOR_ALTIVEC;
3256       rs6000_vector_align[V4SImode] = align32;
3257       rs6000_vector_align[V8HImode] = align32;
3258       rs6000_vector_align[V16QImode] = align32;
3259 
3260       if (TARGET_VSX)
3261 	{
3262 	  rs6000_vector_mem[V4SImode] = VECTOR_VSX;
3263 	  rs6000_vector_mem[V8HImode] = VECTOR_VSX;
3264 	  rs6000_vector_mem[V16QImode] = VECTOR_VSX;
3265 	}
3266       else
3267 	{
3268 	  rs6000_vector_mem[V4SImode] = VECTOR_ALTIVEC;
3269 	  rs6000_vector_mem[V8HImode] = VECTOR_ALTIVEC;
3270 	  rs6000_vector_mem[V16QImode] = VECTOR_ALTIVEC;
3271 	}
3272     }
3273 
3274   /* V2DImode, full mode depends on ISA 2.07 vector mode.  Allow under VSX to
3275      do insert/splat/extract.  Altivec doesn't have 64-bit integer support.  */
3276   if (TARGET_VSX)
3277     {
3278       rs6000_vector_mem[V2DImode] = VECTOR_VSX;
3279       rs6000_vector_unit[V2DImode]
3280 	= (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE;
3281       rs6000_vector_align[V2DImode] = align64;
3282 
3283       rs6000_vector_mem[V1TImode] = VECTOR_VSX;
3284       rs6000_vector_unit[V1TImode]
3285 	= (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE;
3286       rs6000_vector_align[V1TImode] = 128;
3287     }
3288 
3289   /* DFmode, see if we want to use the VSX unit.  Memory is handled
3290      differently, so don't set rs6000_vector_mem.  */
3291   if (TARGET_VSX && TARGET_VSX_SCALAR_DOUBLE)
3292     {
3293       rs6000_vector_unit[DFmode] = VECTOR_VSX;
3294       rs6000_vector_align[DFmode] = 64;
3295     }
3296 
3297   /* SFmode, see if we want to use the VSX unit.  */
3298   if (TARGET_P8_VECTOR && TARGET_VSX_SCALAR_FLOAT)
3299     {
3300       rs6000_vector_unit[SFmode] = VECTOR_VSX;
3301       rs6000_vector_align[SFmode] = 32;
3302     }
3303 
3304   /* Allow TImode in VSX register and set the VSX memory macros.  */
3305   if (TARGET_VSX && TARGET_VSX_TIMODE)
3306     {
3307       rs6000_vector_mem[TImode] = VECTOR_VSX;
3308       rs6000_vector_align[TImode] = align64;
3309     }
3310 
3311   /* TODO add SPE and paired floating point vector support.  */
3312 
3313   /* Register class constraints for the constraints that depend on compile
3314      switches. When the VSX code was added, different constraints were added
3315      based on the type (DFmode, V2DFmode, V4SFmode).  For the vector types, all
3316      of the VSX registers are used.  The register classes for scalar floating
3317      point types is set, based on whether we allow that type into the upper
3318      (Altivec) registers.  GCC has register classes to target the Altivec
3319      registers for load/store operations, to select using a VSX memory
3320      operation instead of the traditional floating point operation.  The
3321      constraints are:
3322 
3323 	d  - Register class to use with traditional DFmode instructions.
3324 	f  - Register class to use with traditional SFmode instructions.
3325 	v  - Altivec register.
3326 	wa - Any VSX register.
3327 	wc - Reserved to represent individual CR bits (used in LLVM).
3328 	wd - Preferred register class for V2DFmode.
3329 	wf - Preferred register class for V4SFmode.
3330 	wg - Float register for power6x move insns.
3331 	wh - FP register for direct move instructions.
3332 	wi - FP or VSX register to hold 64-bit integers for VSX insns.
3333 	wj - FP or VSX register to hold 64-bit integers for direct moves.
3334 	wk - FP or VSX register to hold 64-bit doubles for direct moves.
3335 	wl - Float register if we can do 32-bit signed int loads.
3336 	wm - VSX register for ISA 2.07 direct move operations.
3337 	wn - always NO_REGS.
3338 	wr - GPR if 64-bit mode is permitted.
3339 	ws - Register class to do ISA 2.06 DF operations.
3340 	wt - VSX register for TImode in VSX registers.
3341 	wu - Altivec register for ISA 2.07 VSX SF/SI load/stores.
3342 	wv - Altivec register for ISA 2.06 VSX DF/DI load/stores.
3343 	ww - Register class to do SF conversions in with VSX operations.
3344 	wx - Float register if we can do 32-bit int stores.
3345 	wy - Register class to do ISA 2.07 SF operations.
3346 	wz - Float register if we can do 32-bit unsigned int loads.
3347 	wH - Altivec register if SImode is allowed in VSX registers.
3348 	wI - VSX register if SImode is allowed in VSX registers.
3349 	wJ - VSX register if QImode/HImode are allowed in VSX registers.
3350 	wK - Altivec register if QImode/HImode are allowed in VSX registers.  */
3351 
3352   if (TARGET_HARD_FLOAT && TARGET_FPRS)
3353     rs6000_constraints[RS6000_CONSTRAINT_f] = FLOAT_REGS;	/* SFmode  */
3354 
3355   if (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)
3356     rs6000_constraints[RS6000_CONSTRAINT_d]  = FLOAT_REGS;	/* DFmode  */
3357 
3358   if (TARGET_VSX)
3359     {
3360       rs6000_constraints[RS6000_CONSTRAINT_wa] = VSX_REGS;
3361       rs6000_constraints[RS6000_CONSTRAINT_wd] = VSX_REGS;	/* V2DFmode  */
3362       rs6000_constraints[RS6000_CONSTRAINT_wf] = VSX_REGS;	/* V4SFmode  */
3363 
3364       if (TARGET_VSX_TIMODE)
3365 	rs6000_constraints[RS6000_CONSTRAINT_wt] = VSX_REGS;	/* TImode  */
3366 
3367       if (TARGET_UPPER_REGS_DF)					/* DFmode  */
3368 	{
3369 	  rs6000_constraints[RS6000_CONSTRAINT_ws] = VSX_REGS;
3370 	  rs6000_constraints[RS6000_CONSTRAINT_wv] = ALTIVEC_REGS;
3371 	}
3372       else
3373 	rs6000_constraints[RS6000_CONSTRAINT_ws] = FLOAT_REGS;
3374 
3375       if (TARGET_UPPER_REGS_DI)					/* DImode  */
3376 	rs6000_constraints[RS6000_CONSTRAINT_wi] = VSX_REGS;
3377       else
3378 	rs6000_constraints[RS6000_CONSTRAINT_wi] = FLOAT_REGS;
3379     }
3380 
3381   /* Add conditional constraints based on various options, to allow us to
3382      collapse multiple insn patterns.  */
3383   if (TARGET_ALTIVEC)
3384     rs6000_constraints[RS6000_CONSTRAINT_v] = ALTIVEC_REGS;
3385 
3386   if (TARGET_MFPGPR)						/* DFmode  */
3387     rs6000_constraints[RS6000_CONSTRAINT_wg] = FLOAT_REGS;
3388 
3389   if (TARGET_LFIWAX)
3390     rs6000_constraints[RS6000_CONSTRAINT_wl] = FLOAT_REGS;	/* DImode  */
3391 
3392   if (TARGET_DIRECT_MOVE)
3393     {
3394       rs6000_constraints[RS6000_CONSTRAINT_wh] = FLOAT_REGS;
3395       rs6000_constraints[RS6000_CONSTRAINT_wj]			/* DImode  */
3396 	= rs6000_constraints[RS6000_CONSTRAINT_wi];
3397       rs6000_constraints[RS6000_CONSTRAINT_wk]			/* DFmode  */
3398 	= rs6000_constraints[RS6000_CONSTRAINT_ws];
3399       rs6000_constraints[RS6000_CONSTRAINT_wm] = VSX_REGS;
3400     }
3401 
3402   if (TARGET_POWERPC64)
3403     {
3404       rs6000_constraints[RS6000_CONSTRAINT_wr] = GENERAL_REGS;
3405       rs6000_constraints[RS6000_CONSTRAINT_wA] = BASE_REGS;
3406     }
3407 
3408   if (TARGET_P8_VECTOR && TARGET_UPPER_REGS_SF)			/* SFmode  */
3409     {
3410       rs6000_constraints[RS6000_CONSTRAINT_wu] = ALTIVEC_REGS;
3411       rs6000_constraints[RS6000_CONSTRAINT_wy] = VSX_REGS;
3412       rs6000_constraints[RS6000_CONSTRAINT_ww] = VSX_REGS;
3413     }
3414   else if (TARGET_P8_VECTOR)
3415     {
3416       rs6000_constraints[RS6000_CONSTRAINT_wy] = FLOAT_REGS;
3417       rs6000_constraints[RS6000_CONSTRAINT_ww] = FLOAT_REGS;
3418     }
3419   else if (TARGET_VSX)
3420     rs6000_constraints[RS6000_CONSTRAINT_ww] = FLOAT_REGS;
3421 
3422   if (TARGET_STFIWX)
3423     rs6000_constraints[RS6000_CONSTRAINT_wx] = FLOAT_REGS;	/* DImode  */
3424 
3425   if (TARGET_LFIWZX)
3426     rs6000_constraints[RS6000_CONSTRAINT_wz] = FLOAT_REGS;	/* DImode  */
3427 
3428   if (TARGET_FLOAT128_TYPE)
3429     {
3430       rs6000_constraints[RS6000_CONSTRAINT_wq] = VSX_REGS;	/* KFmode  */
3431       if (FLOAT128_IEEE_P (TFmode))
3432 	rs6000_constraints[RS6000_CONSTRAINT_wp] = VSX_REGS;	/* TFmode  */
3433     }
3434 
3435   /* Support for new D-form instructions.  */
3436   if (TARGET_P9_DFORM_SCALAR)
3437     rs6000_constraints[RS6000_CONSTRAINT_wb] = ALTIVEC_REGS;
3438 
3439   /* Support for ISA 3.0 (power9) vectors.  */
3440   if (TARGET_P9_VECTOR)
3441     rs6000_constraints[RS6000_CONSTRAINT_wo] = VSX_REGS;
3442 
3443   /* Support for new direct moves (ISA 3.0 + 64bit).  */
3444   if (TARGET_DIRECT_MOVE_128)
3445     rs6000_constraints[RS6000_CONSTRAINT_we] = VSX_REGS;
3446 
3447   /* Support small integers in VSX registers.  */
3448   if (TARGET_VSX_SMALL_INTEGER)
3449     {
3450       rs6000_constraints[RS6000_CONSTRAINT_wH] = ALTIVEC_REGS;
3451       rs6000_constraints[RS6000_CONSTRAINT_wI] = FLOAT_REGS;
3452       if (TARGET_P9_VECTOR)
3453 	{
3454 	  rs6000_constraints[RS6000_CONSTRAINT_wJ] = FLOAT_REGS;
3455 	  rs6000_constraints[RS6000_CONSTRAINT_wK] = ALTIVEC_REGS;
3456 	}
3457     }
3458 
3459   /* Set up the reload helper and direct move functions.  */
3460   if (TARGET_VSX || TARGET_ALTIVEC)
3461     {
3462       if (TARGET_64BIT)
3463 	{
3464 	  reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_di_store;
3465 	  reg_addr[V16QImode].reload_load  = CODE_FOR_reload_v16qi_di_load;
3466 	  reg_addr[V8HImode].reload_store  = CODE_FOR_reload_v8hi_di_store;
3467 	  reg_addr[V8HImode].reload_load   = CODE_FOR_reload_v8hi_di_load;
3468 	  reg_addr[V4SImode].reload_store  = CODE_FOR_reload_v4si_di_store;
3469 	  reg_addr[V4SImode].reload_load   = CODE_FOR_reload_v4si_di_load;
3470 	  reg_addr[V2DImode].reload_store  = CODE_FOR_reload_v2di_di_store;
3471 	  reg_addr[V2DImode].reload_load   = CODE_FOR_reload_v2di_di_load;
3472 	  reg_addr[V1TImode].reload_store  = CODE_FOR_reload_v1ti_di_store;
3473 	  reg_addr[V1TImode].reload_load   = CODE_FOR_reload_v1ti_di_load;
3474 	  reg_addr[V4SFmode].reload_store  = CODE_FOR_reload_v4sf_di_store;
3475 	  reg_addr[V4SFmode].reload_load   = CODE_FOR_reload_v4sf_di_load;
3476 	  reg_addr[V2DFmode].reload_store  = CODE_FOR_reload_v2df_di_store;
3477 	  reg_addr[V2DFmode].reload_load   = CODE_FOR_reload_v2df_di_load;
3478 	  reg_addr[DFmode].reload_store    = CODE_FOR_reload_df_di_store;
3479 	  reg_addr[DFmode].reload_load     = CODE_FOR_reload_df_di_load;
3480 	  reg_addr[DDmode].reload_store    = CODE_FOR_reload_dd_di_store;
3481 	  reg_addr[DDmode].reload_load     = CODE_FOR_reload_dd_di_load;
3482 	  reg_addr[SFmode].reload_store    = CODE_FOR_reload_sf_di_store;
3483 	  reg_addr[SFmode].reload_load     = CODE_FOR_reload_sf_di_load;
3484 
3485 	  if (FLOAT128_VECTOR_P (KFmode))
3486 	    {
3487 	      reg_addr[KFmode].reload_store = CODE_FOR_reload_kf_di_store;
3488 	      reg_addr[KFmode].reload_load  = CODE_FOR_reload_kf_di_load;
3489 	    }
3490 
3491 	  if (FLOAT128_VECTOR_P (TFmode))
3492 	    {
3493 	      reg_addr[TFmode].reload_store = CODE_FOR_reload_tf_di_store;
3494 	      reg_addr[TFmode].reload_load  = CODE_FOR_reload_tf_di_load;
3495 	    }
3496 
3497 	  /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
3498 	     available.  */
3499 	  if (TARGET_NO_SDMODE_STACK)
3500 	    {
3501 	      reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_di_store;
3502 	      reg_addr[SDmode].reload_load  = CODE_FOR_reload_sd_di_load;
3503 	    }
3504 
3505 	  if (TARGET_VSX_TIMODE)
3506 	    {
3507 	      reg_addr[TImode].reload_store  = CODE_FOR_reload_ti_di_store;
3508 	      reg_addr[TImode].reload_load   = CODE_FOR_reload_ti_di_load;
3509 	    }
3510 
3511 	  if (TARGET_DIRECT_MOVE && !TARGET_DIRECT_MOVE_128)
3512 	    {
3513 	      reg_addr[TImode].reload_gpr_vsx    = CODE_FOR_reload_gpr_from_vsxti;
3514 	      reg_addr[V1TImode].reload_gpr_vsx  = CODE_FOR_reload_gpr_from_vsxv1ti;
3515 	      reg_addr[V2DFmode].reload_gpr_vsx  = CODE_FOR_reload_gpr_from_vsxv2df;
3516 	      reg_addr[V2DImode].reload_gpr_vsx  = CODE_FOR_reload_gpr_from_vsxv2di;
3517 	      reg_addr[V4SFmode].reload_gpr_vsx  = CODE_FOR_reload_gpr_from_vsxv4sf;
3518 	      reg_addr[V4SImode].reload_gpr_vsx  = CODE_FOR_reload_gpr_from_vsxv4si;
3519 	      reg_addr[V8HImode].reload_gpr_vsx  = CODE_FOR_reload_gpr_from_vsxv8hi;
3520 	      reg_addr[V16QImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv16qi;
3521 	      reg_addr[SFmode].reload_gpr_vsx    = CODE_FOR_reload_gpr_from_vsxsf;
3522 
3523 	      reg_addr[TImode].reload_vsx_gpr    = CODE_FOR_reload_vsx_from_gprti;
3524 	      reg_addr[V1TImode].reload_vsx_gpr  = CODE_FOR_reload_vsx_from_gprv1ti;
3525 	      reg_addr[V2DFmode].reload_vsx_gpr  = CODE_FOR_reload_vsx_from_gprv2df;
3526 	      reg_addr[V2DImode].reload_vsx_gpr  = CODE_FOR_reload_vsx_from_gprv2di;
3527 	      reg_addr[V4SFmode].reload_vsx_gpr  = CODE_FOR_reload_vsx_from_gprv4sf;
3528 	      reg_addr[V4SImode].reload_vsx_gpr  = CODE_FOR_reload_vsx_from_gprv4si;
3529 	      reg_addr[V8HImode].reload_vsx_gpr  = CODE_FOR_reload_vsx_from_gprv8hi;
3530 	      reg_addr[V16QImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv16qi;
3531 	      reg_addr[SFmode].reload_vsx_gpr    = CODE_FOR_reload_vsx_from_gprsf;
3532 
3533 	      if (FLOAT128_VECTOR_P (KFmode))
3534 		{
3535 		  reg_addr[KFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxkf;
3536 		  reg_addr[KFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprkf;
3537 		}
3538 
3539 	      if (FLOAT128_VECTOR_P (TFmode))
3540 		{
3541 		  reg_addr[TFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxtf;
3542 		  reg_addr[TFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprtf;
3543 		}
3544 	    }
3545 	}
3546       else
3547 	{
3548 	  reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_si_store;
3549 	  reg_addr[V16QImode].reload_load  = CODE_FOR_reload_v16qi_si_load;
3550 	  reg_addr[V8HImode].reload_store  = CODE_FOR_reload_v8hi_si_store;
3551 	  reg_addr[V8HImode].reload_load   = CODE_FOR_reload_v8hi_si_load;
3552 	  reg_addr[V4SImode].reload_store  = CODE_FOR_reload_v4si_si_store;
3553 	  reg_addr[V4SImode].reload_load   = CODE_FOR_reload_v4si_si_load;
3554 	  reg_addr[V2DImode].reload_store  = CODE_FOR_reload_v2di_si_store;
3555 	  reg_addr[V2DImode].reload_load   = CODE_FOR_reload_v2di_si_load;
3556 	  reg_addr[V1TImode].reload_store  = CODE_FOR_reload_v1ti_si_store;
3557 	  reg_addr[V1TImode].reload_load   = CODE_FOR_reload_v1ti_si_load;
3558 	  reg_addr[V4SFmode].reload_store  = CODE_FOR_reload_v4sf_si_store;
3559 	  reg_addr[V4SFmode].reload_load   = CODE_FOR_reload_v4sf_si_load;
3560 	  reg_addr[V2DFmode].reload_store  = CODE_FOR_reload_v2df_si_store;
3561 	  reg_addr[V2DFmode].reload_load   = CODE_FOR_reload_v2df_si_load;
3562 	  reg_addr[DFmode].reload_store    = CODE_FOR_reload_df_si_store;
3563 	  reg_addr[DFmode].reload_load     = CODE_FOR_reload_df_si_load;
3564 	  reg_addr[DDmode].reload_store    = CODE_FOR_reload_dd_si_store;
3565 	  reg_addr[DDmode].reload_load     = CODE_FOR_reload_dd_si_load;
3566 	  reg_addr[SFmode].reload_store    = CODE_FOR_reload_sf_si_store;
3567 	  reg_addr[SFmode].reload_load     = CODE_FOR_reload_sf_si_load;
3568 
3569 	  if (FLOAT128_VECTOR_P (KFmode))
3570 	    {
3571 	      reg_addr[KFmode].reload_store = CODE_FOR_reload_kf_si_store;
3572 	      reg_addr[KFmode].reload_load  = CODE_FOR_reload_kf_si_load;
3573 	    }
3574 
3575 	  if (FLOAT128_IEEE_P (TFmode))
3576 	    {
3577 	      reg_addr[TFmode].reload_store = CODE_FOR_reload_tf_si_store;
3578 	      reg_addr[TFmode].reload_load  = CODE_FOR_reload_tf_si_load;
3579 	    }
3580 
3581 	  /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
3582 	     available.  */
3583 	  if (TARGET_NO_SDMODE_STACK)
3584 	    {
3585 	      reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_si_store;
3586 	      reg_addr[SDmode].reload_load  = CODE_FOR_reload_sd_si_load;
3587 	    }
3588 
3589 	  if (TARGET_VSX_TIMODE)
3590 	    {
3591 	      reg_addr[TImode].reload_store  = CODE_FOR_reload_ti_si_store;
3592 	      reg_addr[TImode].reload_load   = CODE_FOR_reload_ti_si_load;
3593 	    }
3594 
3595 	  if (TARGET_DIRECT_MOVE)
3596 	    {
3597 	      reg_addr[DImode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdi;
3598 	      reg_addr[DDmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdd;
3599 	      reg_addr[DFmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdf;
3600 	    }
3601 	}
3602 
3603       if (TARGET_UPPER_REGS_DF)
3604 	reg_addr[DFmode].scalar_in_vmx_p = true;
3605 
3606       if (TARGET_UPPER_REGS_DI)
3607 	reg_addr[DImode].scalar_in_vmx_p = true;
3608 
3609       if (TARGET_UPPER_REGS_SF)
3610 	reg_addr[SFmode].scalar_in_vmx_p = true;
3611 
3612       if (TARGET_VSX_SMALL_INTEGER)
3613 	{
3614 	  reg_addr[SImode].scalar_in_vmx_p = true;
3615 	  if (TARGET_P9_VECTOR)
3616 	    {
3617 	      reg_addr[HImode].scalar_in_vmx_p = true;
3618 	      reg_addr[QImode].scalar_in_vmx_p = true;
3619 	    }
3620 	}
3621     }
3622 
3623   /* Setup the fusion operations.  */
3624   if (TARGET_P8_FUSION)
3625     {
3626       reg_addr[QImode].fusion_gpr_ld = CODE_FOR_fusion_gpr_load_qi;
3627       reg_addr[HImode].fusion_gpr_ld = CODE_FOR_fusion_gpr_load_hi;
3628       reg_addr[SImode].fusion_gpr_ld = CODE_FOR_fusion_gpr_load_si;
3629       if (TARGET_64BIT)
3630 	reg_addr[DImode].fusion_gpr_ld = CODE_FOR_fusion_gpr_load_di;
3631     }
3632 
3633   if (TARGET_P9_FUSION)
3634     {
3635       struct fuse_insns {
3636 	enum machine_mode mode;			/* mode of the fused type.  */
3637 	enum machine_mode pmode;		/* pointer mode.  */
3638 	enum rs6000_reload_reg_type rtype;	/* register type.  */
3639 	enum insn_code load;			/* load insn.  */
3640 	enum insn_code store;			/* store insn.  */
3641       };
3642 
3643       static const struct fuse_insns addis_insns[] = {
3644 	{ E_SFmode, E_DImode, RELOAD_REG_FPR,
3645 	  CODE_FOR_fusion_vsx_di_sf_load,
3646 	  CODE_FOR_fusion_vsx_di_sf_store },
3647 
3648 	{ E_SFmode, E_SImode, RELOAD_REG_FPR,
3649 	  CODE_FOR_fusion_vsx_si_sf_load,
3650 	  CODE_FOR_fusion_vsx_si_sf_store },
3651 
3652 	{ E_DFmode, E_DImode, RELOAD_REG_FPR,
3653 	  CODE_FOR_fusion_vsx_di_df_load,
3654 	  CODE_FOR_fusion_vsx_di_df_store },
3655 
3656 	{ E_DFmode, E_SImode, RELOAD_REG_FPR,
3657 	  CODE_FOR_fusion_vsx_si_df_load,
3658 	  CODE_FOR_fusion_vsx_si_df_store },
3659 
3660 	{ E_DImode, E_DImode, RELOAD_REG_FPR,
3661 	  CODE_FOR_fusion_vsx_di_di_load,
3662 	  CODE_FOR_fusion_vsx_di_di_store },
3663 
3664 	{ E_DImode, E_SImode, RELOAD_REG_FPR,
3665 	  CODE_FOR_fusion_vsx_si_di_load,
3666 	  CODE_FOR_fusion_vsx_si_di_store },
3667 
3668 	{ E_QImode, E_DImode, RELOAD_REG_GPR,
3669 	  CODE_FOR_fusion_gpr_di_qi_load,
3670 	  CODE_FOR_fusion_gpr_di_qi_store },
3671 
3672 	{ E_QImode, E_SImode, RELOAD_REG_GPR,
3673 	  CODE_FOR_fusion_gpr_si_qi_load,
3674 	  CODE_FOR_fusion_gpr_si_qi_store },
3675 
3676 	{ E_HImode, E_DImode, RELOAD_REG_GPR,
3677 	  CODE_FOR_fusion_gpr_di_hi_load,
3678 	  CODE_FOR_fusion_gpr_di_hi_store },
3679 
3680 	{ E_HImode, E_SImode, RELOAD_REG_GPR,
3681 	  CODE_FOR_fusion_gpr_si_hi_load,
3682 	  CODE_FOR_fusion_gpr_si_hi_store },
3683 
3684 	{ E_SImode, E_DImode, RELOAD_REG_GPR,
3685 	  CODE_FOR_fusion_gpr_di_si_load,
3686 	  CODE_FOR_fusion_gpr_di_si_store },
3687 
3688 	{ E_SImode, E_SImode, RELOAD_REG_GPR,
3689 	  CODE_FOR_fusion_gpr_si_si_load,
3690 	  CODE_FOR_fusion_gpr_si_si_store },
3691 
3692 	{ E_SFmode, E_DImode, RELOAD_REG_GPR,
3693 	  CODE_FOR_fusion_gpr_di_sf_load,
3694 	  CODE_FOR_fusion_gpr_di_sf_store },
3695 
3696 	{ E_SFmode, E_SImode, RELOAD_REG_GPR,
3697 	  CODE_FOR_fusion_gpr_si_sf_load,
3698 	  CODE_FOR_fusion_gpr_si_sf_store },
3699 
3700 	{ E_DImode, E_DImode, RELOAD_REG_GPR,
3701 	  CODE_FOR_fusion_gpr_di_di_load,
3702 	  CODE_FOR_fusion_gpr_di_di_store },
3703 
3704 	{ E_DFmode, E_DImode, RELOAD_REG_GPR,
3705 	  CODE_FOR_fusion_gpr_di_df_load,
3706 	  CODE_FOR_fusion_gpr_di_df_store },
3707       };
3708 
3709       machine_mode cur_pmode = Pmode;
3710       size_t i;
3711 
3712       for (i = 0; i < ARRAY_SIZE (addis_insns); i++)
3713 	{
3714 	  machine_mode xmode = addis_insns[i].mode;
3715 	  enum rs6000_reload_reg_type rtype = addis_insns[i].rtype;
3716 
3717 	  if (addis_insns[i].pmode != cur_pmode)
3718 	    continue;
3719 
3720 	  if (rtype == RELOAD_REG_FPR
3721 	      && (!TARGET_HARD_FLOAT || !TARGET_FPRS))
3722 	    continue;
3723 
3724 	  reg_addr[xmode].fusion_addis_ld[rtype] = addis_insns[i].load;
3725 	  reg_addr[xmode].fusion_addis_st[rtype] = addis_insns[i].store;
3726 
3727 	  if (rtype == RELOAD_REG_FPR && TARGET_P9_DFORM_SCALAR)
3728 	    {
3729 	      reg_addr[xmode].fusion_addis_ld[RELOAD_REG_VMX]
3730 		= addis_insns[i].load;
3731 	      reg_addr[xmode].fusion_addis_st[RELOAD_REG_VMX]
3732 		= addis_insns[i].store;
3733 	    }
3734 	}
3735     }
3736 
3737   /* Note which types we support fusing TOC setup plus memory insn.  We only do
3738      fused TOCs for medium/large code models.  */
3739   if (TARGET_P8_FUSION && TARGET_TOC_FUSION && TARGET_POWERPC64
3740       && (TARGET_CMODEL != CMODEL_SMALL))
3741     {
3742       reg_addr[QImode].fused_toc = true;
3743       reg_addr[HImode].fused_toc = true;
3744       reg_addr[SImode].fused_toc = true;
3745       reg_addr[DImode].fused_toc = true;
3746       if (TARGET_HARD_FLOAT && TARGET_FPRS)
3747 	{
3748 	  if (TARGET_SINGLE_FLOAT)
3749 	    reg_addr[SFmode].fused_toc = true;
3750 	  if (TARGET_DOUBLE_FLOAT)
3751 	    reg_addr[DFmode].fused_toc = true;
3752 	}
3753     }
3754 
3755   /* Precalculate HARD_REGNO_NREGS.  */
3756   for (r = 0; r < FIRST_PSEUDO_REGISTER; ++r)
3757     for (m = 0; m < NUM_MACHINE_MODES; ++m)
3758       rs6000_hard_regno_nregs[m][r]
3759 	= rs6000_hard_regno_nregs_internal (r, (machine_mode)m);
3760 
3761   /* Precalculate TARGET_HARD_REGNO_MODE_OK.  */
3762   for (r = 0; r < FIRST_PSEUDO_REGISTER; ++r)
3763     for (m = 0; m < NUM_MACHINE_MODES; ++m)
3764       if (rs6000_hard_regno_mode_ok_uncached (r, (machine_mode)m))
3765 	rs6000_hard_regno_mode_ok_p[m][r] = true;
3766 
3767   /* Precalculate CLASS_MAX_NREGS sizes.  */
3768   for (c = 0; c < LIM_REG_CLASSES; ++c)
3769     {
3770       int reg_size;
3771 
3772       if (TARGET_VSX && VSX_REG_CLASS_P (c))
3773 	reg_size = UNITS_PER_VSX_WORD;
3774 
3775       else if (c == ALTIVEC_REGS)
3776 	reg_size = UNITS_PER_ALTIVEC_WORD;
3777 
3778       else if (c == FLOAT_REGS)
3779 	reg_size = UNITS_PER_FP_WORD;
3780 
3781       else
3782 	reg_size = UNITS_PER_WORD;
3783 
3784       for (m = 0; m < NUM_MACHINE_MODES; ++m)
3785 	{
3786 	  machine_mode m2 = (machine_mode)m;
3787 	  int reg_size2 = reg_size;
3788 
3789 	  /* TDmode & IBM 128-bit floating point always takes 2 registers, even
3790 	     in VSX.  */
3791 	  if (TARGET_VSX && VSX_REG_CLASS_P (c) && FLOAT128_2REG_P (m))
3792 	    reg_size2 = UNITS_PER_FP_WORD;
3793 
3794 	  rs6000_class_max_nregs[m][c]
3795 	    = (GET_MODE_SIZE (m2) + reg_size2 - 1) / reg_size2;
3796 	}
3797     }
3798 
3799   if (TARGET_E500_DOUBLE)
3800     rs6000_class_max_nregs[DFmode][GENERAL_REGS] = 1;
3801 
3802   /* Calculate which modes to automatically generate code to use a the
3803      reciprocal divide and square root instructions.  In the future, possibly
3804      automatically generate the instructions even if the user did not specify
3805      -mrecip.  The older machines double precision reciprocal sqrt estimate is
3806      not accurate enough.  */
3807   memset (rs6000_recip_bits, 0, sizeof (rs6000_recip_bits));
3808   if (TARGET_FRES)
3809     rs6000_recip_bits[SFmode] = RS6000_RECIP_MASK_HAVE_RE;
3810   if (TARGET_FRE)
3811     rs6000_recip_bits[DFmode] = RS6000_RECIP_MASK_HAVE_RE;
3812   if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode))
3813     rs6000_recip_bits[V4SFmode] = RS6000_RECIP_MASK_HAVE_RE;
3814   if (VECTOR_UNIT_VSX_P (V2DFmode))
3815     rs6000_recip_bits[V2DFmode] = RS6000_RECIP_MASK_HAVE_RE;
3816 
3817   if (TARGET_FRSQRTES)
3818     rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3819   if (TARGET_FRSQRTE)
3820     rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3821   if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode))
3822     rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3823   if (VECTOR_UNIT_VSX_P (V2DFmode))
3824     rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3825 
3826   if (rs6000_recip_control)
3827     {
3828       if (!flag_finite_math_only)
3829 	warning (0, "-mrecip requires -ffinite-math or -ffast-math");
3830       if (flag_trapping_math)
3831 	warning (0, "-mrecip requires -fno-trapping-math or -ffast-math");
3832       if (!flag_reciprocal_math)
3833 	warning (0, "-mrecip requires -freciprocal-math or -ffast-math");
3834       if (flag_finite_math_only && !flag_trapping_math && flag_reciprocal_math)
3835 	{
3836 	  if (RS6000_RECIP_HAVE_RE_P (SFmode)
3837 	      && (rs6000_recip_control & RECIP_SF_DIV) != 0)
3838 	    rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3839 
3840 	  if (RS6000_RECIP_HAVE_RE_P (DFmode)
3841 	      && (rs6000_recip_control & RECIP_DF_DIV) != 0)
3842 	    rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3843 
3844 	  if (RS6000_RECIP_HAVE_RE_P (V4SFmode)
3845 	      && (rs6000_recip_control & RECIP_V4SF_DIV) != 0)
3846 	    rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3847 
3848 	  if (RS6000_RECIP_HAVE_RE_P (V2DFmode)
3849 	      && (rs6000_recip_control & RECIP_V2DF_DIV) != 0)
3850 	    rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3851 
3852 	  if (RS6000_RECIP_HAVE_RSQRTE_P (SFmode)
3853 	      && (rs6000_recip_control & RECIP_SF_RSQRT) != 0)
3854 	    rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3855 
3856 	  if (RS6000_RECIP_HAVE_RSQRTE_P (DFmode)
3857 	      && (rs6000_recip_control & RECIP_DF_RSQRT) != 0)
3858 	    rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3859 
3860 	  if (RS6000_RECIP_HAVE_RSQRTE_P (V4SFmode)
3861 	      && (rs6000_recip_control & RECIP_V4SF_RSQRT) != 0)
3862 	    rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3863 
3864 	  if (RS6000_RECIP_HAVE_RSQRTE_P (V2DFmode)
3865 	      && (rs6000_recip_control & RECIP_V2DF_RSQRT) != 0)
3866 	    rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3867 	}
3868     }
3869 
3870   /* Update the addr mask bits in reg_addr to help secondary reload and go if
3871      legitimate address support to figure out the appropriate addressing to
3872      use.  */
3873   rs6000_setup_reg_addr_masks ();
3874 
3875   if (global_init_p || TARGET_DEBUG_TARGET)
3876     {
3877       if (TARGET_DEBUG_REG)
3878 	rs6000_debug_reg_global ();
3879 
3880       if (TARGET_DEBUG_COST || TARGET_DEBUG_REG)
3881 	fprintf (stderr,
3882 		 "SImode variable mult cost       = %d\n"
3883 		 "SImode constant mult cost       = %d\n"
3884 		 "SImode short constant mult cost = %d\n"
3885 		 "DImode multipliciation cost     = %d\n"
3886 		 "SImode division cost            = %d\n"
3887 		 "DImode division cost            = %d\n"
3888 		 "Simple fp operation cost        = %d\n"
3889 		 "DFmode multiplication cost      = %d\n"
3890 		 "SFmode division cost            = %d\n"
3891 		 "DFmode division cost            = %d\n"
3892 		 "cache line size                 = %d\n"
3893 		 "l1 cache size                   = %d\n"
3894 		 "l2 cache size                   = %d\n"
3895 		 "simultaneous prefetches         = %d\n"
3896 		 "\n",
3897 		 rs6000_cost->mulsi,
3898 		 rs6000_cost->mulsi_const,
3899 		 rs6000_cost->mulsi_const9,
3900 		 rs6000_cost->muldi,
3901 		 rs6000_cost->divsi,
3902 		 rs6000_cost->divdi,
3903 		 rs6000_cost->fp,
3904 		 rs6000_cost->dmul,
3905 		 rs6000_cost->sdiv,
3906 		 rs6000_cost->ddiv,
3907 		 rs6000_cost->cache_line_size,
3908 		 rs6000_cost->l1_cache_size,
3909 		 rs6000_cost->l2_cache_size,
3910 		 rs6000_cost->simultaneous_prefetches);
3911     }
3912 }
3913 
3914 #if TARGET_MACHO
3915 /* The Darwin version of SUBTARGET_OVERRIDE_OPTIONS.  */
3916 
3917 static void
darwin_rs6000_override_options(void)3918 darwin_rs6000_override_options (void)
3919 {
3920   /* The Darwin ABI always includes AltiVec, can't be (validly) turned
3921      off.  */
3922   rs6000_altivec_abi = 1;
3923   TARGET_ALTIVEC_VRSAVE = 1;
3924   rs6000_current_abi = ABI_DARWIN;
3925 
3926   if (DEFAULT_ABI == ABI_DARWIN
3927       && TARGET_64BIT)
3928       darwin_one_byte_bool = 1;
3929 
3930   if (TARGET_64BIT && ! TARGET_POWERPC64)
3931     {
3932       rs6000_isa_flags |= OPTION_MASK_POWERPC64;
3933       warning (0, "-m64 requires PowerPC64 architecture, enabling");
3934     }
3935   if (flag_mkernel)
3936     {
3937       rs6000_default_long_calls = 1;
3938       rs6000_isa_flags |= OPTION_MASK_SOFT_FLOAT;
3939     }
3940 
3941   /* Make -m64 imply -maltivec.  Darwin's 64-bit ABI includes
3942      Altivec.  */
3943   if (!flag_mkernel && !flag_apple_kext
3944       && TARGET_64BIT
3945       && ! (rs6000_isa_flags_explicit & OPTION_MASK_ALTIVEC))
3946     rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3947 
3948   /* Unless the user (not the configurer) has explicitly overridden
3949      it with -mcpu=G3 or -mno-altivec, then 10.5+ targets default to
3950      G4 unless targeting the kernel.  */
3951   if (!flag_mkernel
3952       && !flag_apple_kext
3953       && strverscmp (darwin_macosx_version_min, "10.5") >= 0
3954       && ! (rs6000_isa_flags_explicit & OPTION_MASK_ALTIVEC)
3955       && ! global_options_set.x_rs6000_cpu_index)
3956     {
3957       rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3958     }
3959 }
3960 #endif
3961 
3962 /* If not otherwise specified by a target, make 'long double' equivalent to
3963    'double'.  */
3964 
3965 #ifndef RS6000_DEFAULT_LONG_DOUBLE_SIZE
3966 #define RS6000_DEFAULT_LONG_DOUBLE_SIZE 64
3967 #endif
3968 
3969 /* Return the builtin mask of the various options used that could affect which
3970    builtins were used.  In the past we used target_flags, but we've run out of
3971    bits, and some options like SPE and PAIRED are no longer in
3972    target_flags.  */
3973 
3974 HOST_WIDE_INT
rs6000_builtin_mask_calculate(void)3975 rs6000_builtin_mask_calculate (void)
3976 {
3977   return (((TARGET_ALTIVEC)		    ? RS6000_BTM_ALTIVEC   : 0)
3978 	  | ((TARGET_CMPB)		    ? RS6000_BTM_CMPB	   : 0)
3979 	  | ((TARGET_VSX)		    ? RS6000_BTM_VSX	   : 0)
3980 	  | ((TARGET_SPE)		    ? RS6000_BTM_SPE	   : 0)
3981 	  | ((TARGET_PAIRED_FLOAT)	    ? RS6000_BTM_PAIRED	   : 0)
3982 	  | ((TARGET_FRE)		    ? RS6000_BTM_FRE	   : 0)
3983 	  | ((TARGET_FRES)		    ? RS6000_BTM_FRES	   : 0)
3984 	  | ((TARGET_FRSQRTE)		    ? RS6000_BTM_FRSQRTE   : 0)
3985 	  | ((TARGET_FRSQRTES)		    ? RS6000_BTM_FRSQRTES  : 0)
3986 	  | ((TARGET_POPCNTD)		    ? RS6000_BTM_POPCNTD   : 0)
3987 	  | ((rs6000_cpu == PROCESSOR_CELL) ? RS6000_BTM_CELL      : 0)
3988 	  | ((TARGET_P8_VECTOR)		    ? RS6000_BTM_P8_VECTOR : 0)
3989 	  | ((TARGET_P9_VECTOR)		    ? RS6000_BTM_P9_VECTOR : 0)
3990 	  | ((TARGET_P9_MISC)		    ? RS6000_BTM_P9_MISC   : 0)
3991 	  | ((TARGET_MODULO)		    ? RS6000_BTM_MODULO    : 0)
3992 	  | ((TARGET_64BIT)		    ? RS6000_BTM_64BIT     : 0)
3993 	  | ((TARGET_CRYPTO)		    ? RS6000_BTM_CRYPTO	   : 0)
3994 	  | ((TARGET_HTM)		    ? RS6000_BTM_HTM	   : 0)
3995 	  | ((TARGET_DFP)		    ? RS6000_BTM_DFP	   : 0)
3996 	  | ((TARGET_HARD_FLOAT)	    ? RS6000_BTM_HARD_FLOAT : 0)
3997 	  | ((TARGET_LONG_DOUBLE_128)	    ? RS6000_BTM_LDBL128   : 0)
3998 	  | ((TARGET_FLOAT128_TYPE)	    ? RS6000_BTM_FLOAT128  : 0));
3999 }
4000 
4001 /* Implement TARGET_MD_ASM_ADJUST.  All asm statements are considered
4002    to clobber the XER[CA] bit because clobbering that bit without telling
4003    the compiler worked just fine with versions of GCC before GCC 5, and
4004    breaking a lot of older code in ways that are hard to track down is
4005    not such a great idea.  */
4006 
4007 static rtx_insn *
rs6000_md_asm_adjust(vec<rtx> &,vec<rtx> &,vec<const char * > &,vec<rtx> & clobbers,HARD_REG_SET & clobbered_regs)4008 rs6000_md_asm_adjust (vec<rtx> &/*outputs*/, vec<rtx> &/*inputs*/,
4009 		      vec<const char *> &/*constraints*/,
4010 		      vec<rtx> &clobbers, HARD_REG_SET &clobbered_regs)
4011 {
4012   clobbers.safe_push (gen_rtx_REG (SImode, CA_REGNO));
4013   SET_HARD_REG_BIT (clobbered_regs, CA_REGNO);
4014   return NULL;
4015 }
4016 
4017 /* Override command line options.
4018 
4019    Combine build-specific configuration information with options
4020    specified on the command line to set various state variables which
4021    influence code generation, optimization, and expansion of built-in
4022    functions.  Assure that command-line configuration preferences are
4023    compatible with each other and with the build configuration; issue
4024    warnings while adjusting configuration or error messages while
4025    rejecting configuration.
4026 
4027    Upon entry to this function:
4028 
4029      This function is called once at the beginning of
4030      compilation, and then again at the start and end of compiling
4031      each section of code that has a different configuration, as
4032      indicated, for example, by adding the
4033 
4034        __attribute__((__target__("cpu=power9")))
4035 
4036      qualifier to a function definition or, for example, by bracketing
4037      code between
4038 
4039        #pragma GCC target("altivec")
4040 
4041      and
4042 
4043        #pragma GCC reset_options
4044 
4045      directives.  Parameter global_init_p is true for the initial
4046      invocation, which initializes global variables, and false for all
4047      subsequent invocations.
4048 
4049 
4050      Various global state information is assumed to be valid.  This
4051      includes OPTION_TARGET_CPU_DEFAULT, representing the name of the
4052      default CPU specified at build configure time, TARGET_DEFAULT,
4053      representing the default set of option flags for the default
4054      target, and global_options_set.x_rs6000_isa_flags, representing
4055      which options were requested on the command line.
4056 
4057    Upon return from this function:
4058 
4059      rs6000_isa_flags_explicit has a non-zero bit for each flag that
4060      was set by name on the command line.  Additionally, if certain
4061      attributes are automatically enabled or disabled by this function
4062      in order to assure compatibility between options and
4063      configuration, the flags associated with those attributes are
4064      also set.  By setting these "explicit bits", we avoid the risk
4065      that other code might accidentally overwrite these particular
4066      attributes with "default values".
4067 
4068      The various bits of rs6000_isa_flags are set to indicate the
4069      target options that have been selected for the most current
4070      compilation efforts.  This has the effect of also turning on the
4071      associated TARGET_XXX values since these are macros which are
4072      generally defined to test the corresponding bit of the
4073      rs6000_isa_flags variable.
4074 
4075      The variable rs6000_builtin_mask is set to represent the target
4076      options for the most current compilation efforts, consistent with
4077      the current contents of rs6000_isa_flags.  This variable controls
4078      expansion of built-in functions.
4079 
4080      Various other global variables and fields of global structures
4081      (over 50 in all) are initialized to reflect the desired options
4082      for the most current compilation efforts.  */
4083 
4084 static bool
rs6000_option_override_internal(bool global_init_p)4085 rs6000_option_override_internal (bool global_init_p)
4086 {
4087   bool ret = true;
4088   bool have_cpu = false;
4089 
4090   /* The default cpu requested at configure time, if any.  */
4091   const char *implicit_cpu = OPTION_TARGET_CPU_DEFAULT;
4092 
4093   HOST_WIDE_INT set_masks;
4094   HOST_WIDE_INT ignore_masks;
4095   int cpu_index;
4096   int tune_index;
4097   struct cl_target_option *main_target_opt
4098     = ((global_init_p || target_option_default_node == NULL)
4099        ? NULL : TREE_TARGET_OPTION (target_option_default_node));
4100 
4101   /* Print defaults.  */
4102   if ((TARGET_DEBUG_REG || TARGET_DEBUG_TARGET) && global_init_p)
4103     rs6000_print_isa_options (stderr, 0, "TARGET_DEFAULT", TARGET_DEFAULT);
4104 
4105   /* Remember the explicit arguments.  */
4106   if (global_init_p)
4107     rs6000_isa_flags_explicit = global_options_set.x_rs6000_isa_flags;
4108 
4109   /* On 64-bit Darwin, power alignment is ABI-incompatible with some C
4110      library functions, so warn about it. The flag may be useful for
4111      performance studies from time to time though, so don't disable it
4112      entirely.  */
4113   if (global_options_set.x_rs6000_alignment_flags
4114       && rs6000_alignment_flags == MASK_ALIGN_POWER
4115       && DEFAULT_ABI == ABI_DARWIN
4116       && TARGET_64BIT)
4117     warning (0, "-malign-power is not supported for 64-bit Darwin;"
4118 	     " it is incompatible with the installed C and C++ libraries");
4119 
4120   /* Numerous experiment shows that IRA based loop pressure
4121      calculation works better for RTL loop invariant motion on targets
4122      with enough (>= 32) registers.  It is an expensive optimization.
4123      So it is on only for peak performance.  */
4124   if (optimize >= 3 && global_init_p
4125       && !global_options_set.x_flag_ira_loop_pressure)
4126     flag_ira_loop_pressure = 1;
4127 
4128   /* -fsanitize=address needs to turn on -fasynchronous-unwind-tables in order
4129      for tracebacks to be complete but not if any -fasynchronous-unwind-tables
4130      options were already specified.  */
4131   if (flag_sanitize & SANITIZE_USER_ADDRESS
4132       && !global_options_set.x_flag_asynchronous_unwind_tables)
4133     flag_asynchronous_unwind_tables = 1;
4134 
4135   /* Set the pointer size.  */
4136   if (TARGET_64BIT)
4137     {
4138       rs6000_pmode = DImode;
4139       rs6000_pointer_size = 64;
4140     }
4141   else
4142     {
4143       rs6000_pmode = SImode;
4144       rs6000_pointer_size = 32;
4145     }
4146 
4147   /* Some OSs don't support saving the high part of 64-bit registers on context
4148      switch.  Other OSs don't support saving Altivec registers.  On those OSs,
4149      we don't touch the OPTION_MASK_POWERPC64 or OPTION_MASK_ALTIVEC settings;
4150      if the user wants either, the user must explicitly specify them and we
4151      won't interfere with the user's specification.  */
4152 
4153   set_masks = POWERPC_MASKS;
4154 #ifdef OS_MISSING_POWERPC64
4155   if (OS_MISSING_POWERPC64)
4156     set_masks &= ~OPTION_MASK_POWERPC64;
4157 #endif
4158 #ifdef OS_MISSING_ALTIVEC
4159   if (OS_MISSING_ALTIVEC)
4160     set_masks &= ~(OPTION_MASK_ALTIVEC | OPTION_MASK_VSX
4161 		   | OTHER_VSX_VECTOR_MASKS);
4162 #endif
4163 
4164   /* Don't override by the processor default if given explicitly.  */
4165   set_masks &= ~rs6000_isa_flags_explicit;
4166 
4167   /* Process the -mcpu=<xxx> and -mtune=<xxx> argument.  If the user changed
4168      the cpu in a target attribute or pragma, but did not specify a tuning
4169      option, use the cpu for the tuning option rather than the option specified
4170      with -mtune on the command line.  Process a '--with-cpu' configuration
4171      request as an implicit --cpu.  */
4172   if (rs6000_cpu_index >= 0)
4173     {
4174       cpu_index = rs6000_cpu_index;
4175       have_cpu = true;
4176     }
4177   else if (main_target_opt != NULL && main_target_opt->x_rs6000_cpu_index >= 0)
4178     {
4179       rs6000_cpu_index = cpu_index = main_target_opt->x_rs6000_cpu_index;
4180       have_cpu = true;
4181     }
4182   else if (implicit_cpu)
4183     {
4184       rs6000_cpu_index = cpu_index = rs6000_cpu_name_lookup (implicit_cpu);
4185       have_cpu = true;
4186     }
4187   else
4188     {
4189       /* PowerPC 64-bit LE requires at least ISA 2.07.  */
4190       const char *default_cpu = ((!TARGET_POWERPC64)
4191 				 ? "powerpc"
4192 				 : ((BYTES_BIG_ENDIAN)
4193 				    ? "powerpc64"
4194 				    : "powerpc64le"));
4195 
4196       rs6000_cpu_index = cpu_index = rs6000_cpu_name_lookup (default_cpu);
4197       have_cpu = false;
4198     }
4199 
4200   gcc_assert (cpu_index >= 0);
4201 
4202   if (have_cpu)
4203     {
4204 #ifndef HAVE_AS_POWER9
4205       if (processor_target_table[rs6000_cpu_index].processor
4206 	  == PROCESSOR_POWER9)
4207 	{
4208 	  have_cpu = false;
4209 	  warning (0, "will not generate power9 instructions because "
4210 		   "assembler lacks power9 support");
4211 	}
4212 #endif
4213 #ifndef HAVE_AS_POWER8
4214       if (processor_target_table[rs6000_cpu_index].processor
4215 	  == PROCESSOR_POWER8)
4216 	{
4217 	  have_cpu = false;
4218 	  warning (0, "will not generate power8 instructions because "
4219 		   "assembler lacks power8 support");
4220 	}
4221 #endif
4222 #ifndef HAVE_AS_POPCNTD
4223       if (processor_target_table[rs6000_cpu_index].processor
4224 	  == PROCESSOR_POWER7)
4225 	{
4226 	  have_cpu = false;
4227 	  warning (0, "will not generate power7 instructions because "
4228 		   "assembler lacks power7 support");
4229 	}
4230 #endif
4231 #ifndef HAVE_AS_DFP
4232       if (processor_target_table[rs6000_cpu_index].processor
4233 	  == PROCESSOR_POWER6)
4234 	{
4235 	  have_cpu = false;
4236 	  warning (0, "will not generate power6 instructions because "
4237 		   "assembler lacks power6 support");
4238 	}
4239 #endif
4240 #ifndef HAVE_AS_POPCNTB
4241       if (processor_target_table[rs6000_cpu_index].processor
4242 	  == PROCESSOR_POWER5)
4243 	{
4244 	  have_cpu = false;
4245 	  warning (0, "will not generate power5 instructions because "
4246 		   "assembler lacks power5 support");
4247 	}
4248 #endif
4249 
4250       if (!have_cpu)
4251 	{
4252 	  /* PowerPC 64-bit LE requires at least ISA 2.07.  */
4253 	  const char *default_cpu = (!TARGET_POWERPC64
4254 				     ? "powerpc"
4255 				     : (BYTES_BIG_ENDIAN
4256 					? "powerpc64"
4257 					: "powerpc64le"));
4258 
4259 	  rs6000_cpu_index = cpu_index = rs6000_cpu_name_lookup (default_cpu);
4260 	}
4261     }
4262 
4263   /* If we have a cpu, either through an explicit -mcpu=<xxx> or if the
4264      compiler was configured with --with-cpu=<xxx>, replace all of the ISA bits
4265      with those from the cpu, except for options that were explicitly set.  If
4266      we don't have a cpu, do not override the target bits set in
4267      TARGET_DEFAULT.  */
4268   if (have_cpu)
4269     {
4270       rs6000_isa_flags &= ~set_masks;
4271       rs6000_isa_flags |= (processor_target_table[cpu_index].target_enable
4272 			   & set_masks);
4273     }
4274   else
4275     {
4276       /* If no -mcpu=<xxx>, inherit any default options that were cleared via
4277 	 POWERPC_MASKS.  Originally, TARGET_DEFAULT was used to initialize
4278 	 target_flags via the TARGET_DEFAULT_TARGET_FLAGS hook.  When we switched
4279 	 to using rs6000_isa_flags, we need to do the initialization here.
4280 
4281 	 If there is a TARGET_DEFAULT, use that.  Otherwise fall back to using
4282 	 -mcpu=powerpc, -mcpu=powerpc64, or -mcpu=powerpc64le defaults.  */
4283       HOST_WIDE_INT flags = ((TARGET_DEFAULT) ? TARGET_DEFAULT
4284 			     : processor_target_table[cpu_index].target_enable);
4285       rs6000_isa_flags |= (flags & ~rs6000_isa_flags_explicit);
4286     }
4287 
4288   if (rs6000_tune_index >= 0)
4289     tune_index = rs6000_tune_index;
4290   else if (have_cpu)
4291     rs6000_tune_index = tune_index = cpu_index;
4292   else
4293     {
4294       size_t i;
4295       enum processor_type tune_proc
4296 	= (TARGET_POWERPC64 ? PROCESSOR_DEFAULT64 : PROCESSOR_DEFAULT);
4297 
4298       tune_index = -1;
4299       for (i = 0; i < ARRAY_SIZE (processor_target_table); i++)
4300 	if (processor_target_table[i].processor == tune_proc)
4301 	  {
4302 	    rs6000_tune_index = tune_index = i;
4303 	    break;
4304 	  }
4305     }
4306 
4307   gcc_assert (tune_index >= 0);
4308   rs6000_cpu = processor_target_table[tune_index].processor;
4309 
4310   /* Pick defaults for SPE related control flags.  Do this early to make sure
4311      that the TARGET_ macros are representative ASAP.  */
4312   {
4313     int spe_capable_cpu =
4314       (rs6000_cpu == PROCESSOR_PPC8540
4315        || rs6000_cpu == PROCESSOR_PPC8548);
4316 
4317     if (!global_options_set.x_rs6000_spe_abi)
4318       rs6000_spe_abi = spe_capable_cpu;
4319 
4320     if (!global_options_set.x_rs6000_spe)
4321       rs6000_spe = spe_capable_cpu;
4322 
4323     if (!global_options_set.x_rs6000_float_gprs)
4324       rs6000_float_gprs =
4325         (rs6000_cpu == PROCESSOR_PPC8540 ? 1
4326          : rs6000_cpu == PROCESSOR_PPC8548 ? 2
4327          : 0);
4328   }
4329 
4330   if (global_options_set.x_rs6000_spe_abi
4331       && rs6000_spe_abi
4332       && !TARGET_SPE_ABI)
4333     error ("not configured for SPE ABI");
4334 
4335   if (global_options_set.x_rs6000_spe
4336       && rs6000_spe
4337       && !TARGET_SPE)
4338     error ("not configured for SPE instruction set");
4339 
4340   if (main_target_opt != NULL
4341       && ((main_target_opt->x_rs6000_spe_abi != rs6000_spe_abi)
4342           || (main_target_opt->x_rs6000_spe != rs6000_spe)
4343           || (main_target_opt->x_rs6000_float_gprs != rs6000_float_gprs)))
4344     error ("target attribute or pragma changes SPE ABI");
4345 
4346   if (rs6000_cpu == PROCESSOR_PPCE300C2 || rs6000_cpu == PROCESSOR_PPCE300C3
4347       || rs6000_cpu == PROCESSOR_PPCE500MC || rs6000_cpu == PROCESSOR_PPCE500MC64
4348       || rs6000_cpu == PROCESSOR_PPCE5500)
4349     {
4350       if (TARGET_ALTIVEC)
4351 	error ("AltiVec not supported in this target");
4352       if (TARGET_SPE)
4353 	error ("SPE not supported in this target");
4354     }
4355   if (rs6000_cpu == PROCESSOR_PPCE6500)
4356     {
4357       if (TARGET_SPE)
4358 	error ("SPE not supported in this target");
4359     }
4360 
4361   /* Disable Cell microcode if we are optimizing for the Cell
4362      and not optimizing for size.  */
4363   if (rs6000_gen_cell_microcode == -1)
4364     rs6000_gen_cell_microcode = !(rs6000_cpu == PROCESSOR_CELL
4365                                   && !optimize_size);
4366 
4367   /* If we are optimizing big endian systems for space and it's OK to
4368      use instructions that would be microcoded on the Cell, use the
4369      load/store multiple and string instructions.  */
4370   if (BYTES_BIG_ENDIAN && optimize_size && rs6000_gen_cell_microcode)
4371     rs6000_isa_flags |= ~rs6000_isa_flags_explicit & (OPTION_MASK_MULTIPLE
4372 						      | OPTION_MASK_STRING);
4373 
4374   /* Don't allow -mmultiple or -mstring on little endian systems
4375      unless the cpu is a 750, because the hardware doesn't support the
4376      instructions used in little endian mode, and causes an alignment
4377      trap.  The 750 does not cause an alignment trap (except when the
4378      target is unaligned).  */
4379 
4380   if (!BYTES_BIG_ENDIAN && rs6000_cpu != PROCESSOR_PPC750)
4381     {
4382       if (TARGET_MULTIPLE)
4383 	{
4384 	  rs6000_isa_flags &= ~OPTION_MASK_MULTIPLE;
4385 	  if ((rs6000_isa_flags_explicit & OPTION_MASK_MULTIPLE) != 0)
4386 	    warning (0, "-mmultiple is not supported on little endian systems");
4387 	}
4388 
4389       if (TARGET_STRING)
4390 	{
4391 	  rs6000_isa_flags &= ~OPTION_MASK_STRING;
4392 	  if ((rs6000_isa_flags_explicit & OPTION_MASK_STRING) != 0)
4393 	    warning (0, "-mstring is not supported on little endian systems");
4394 	}
4395     }
4396 
4397   /* If little-endian, default to -mstrict-align on older processors.
4398      Testing for htm matches power8 and later.  */
4399   if (!BYTES_BIG_ENDIAN
4400       && !(processor_target_table[tune_index].target_enable & OPTION_MASK_HTM))
4401     rs6000_isa_flags |= ~rs6000_isa_flags_explicit & OPTION_MASK_STRICT_ALIGN;
4402 
4403   /* -maltivec={le,be} implies -maltivec.  */
4404   if (rs6000_altivec_element_order != 0)
4405     rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
4406 
4407   /* Disallow -maltivec=le in big endian mode for now.  This is not
4408      known to be useful for anyone.  */
4409   if (BYTES_BIG_ENDIAN && rs6000_altivec_element_order == 1)
4410     {
4411       warning (0, N_("-maltivec=le not allowed for big-endian targets"));
4412       rs6000_altivec_element_order = 0;
4413     }
4414 
4415   /* Add some warnings for VSX.  */
4416   if (TARGET_VSX)
4417     {
4418       const char *msg = NULL;
4419       if (!TARGET_HARD_FLOAT || !TARGET_FPRS
4420 	  || !TARGET_SINGLE_FLOAT || !TARGET_DOUBLE_FLOAT)
4421 	{
4422 	  if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
4423 	    msg = N_("-mvsx requires hardware floating point");
4424 	  else
4425 	    {
4426 	      rs6000_isa_flags &= ~ OPTION_MASK_VSX;
4427 	      rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
4428 	    }
4429 	}
4430       else if (TARGET_PAIRED_FLOAT)
4431 	msg = N_("-mvsx and -mpaired are incompatible");
4432       else if (TARGET_AVOID_XFORM > 0)
4433 	msg = N_("-mvsx needs indexed addressing");
4434       else if (!TARGET_ALTIVEC && (rs6000_isa_flags_explicit
4435 				   & OPTION_MASK_ALTIVEC))
4436         {
4437 	  if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
4438 	    msg = N_("-mvsx and -mno-altivec are incompatible");
4439 	  else
4440 	    msg = N_("-mno-altivec disables vsx");
4441         }
4442 
4443       if (msg)
4444 	{
4445 	  warning (0, msg);
4446 	  rs6000_isa_flags &= ~ OPTION_MASK_VSX;
4447 	  rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
4448 	}
4449     }
4450 
4451   /* If hard-float/altivec/vsx were explicitly turned off then don't allow
4452      the -mcpu setting to enable options that conflict. */
4453   if ((!TARGET_HARD_FLOAT || !TARGET_ALTIVEC || !TARGET_VSX)
4454       && (rs6000_isa_flags_explicit & (OPTION_MASK_SOFT_FLOAT
4455 				       | OPTION_MASK_ALTIVEC
4456 				       | OPTION_MASK_VSX)) != 0)
4457     rs6000_isa_flags &= ~((OPTION_MASK_P8_VECTOR | OPTION_MASK_CRYPTO
4458 			   | OPTION_MASK_DIRECT_MOVE)
4459 		         & ~rs6000_isa_flags_explicit);
4460 
4461   if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4462     rs6000_print_isa_options (stderr, 0, "before defaults", rs6000_isa_flags);
4463 
4464   /* Handle explicit -mno-{altivec,vsx,power8-vector,power9-vector} and turn
4465      off all of the options that depend on those flags.  */
4466   ignore_masks = rs6000_disable_incompatible_switches ();
4467 
4468   /* For the newer switches (vsx, dfp, etc.) set some of the older options,
4469      unless the user explicitly used the -mno-<option> to disable the code.  */
4470   if (TARGET_P9_VECTOR || TARGET_MODULO || TARGET_P9_DFORM_SCALAR
4471       || TARGET_P9_DFORM_VECTOR || TARGET_P9_DFORM_BOTH > 0)
4472     rs6000_isa_flags |= (ISA_3_0_MASKS_SERVER & ~ignore_masks);
4473   else if (TARGET_P9_MINMAX)
4474     {
4475       if (have_cpu)
4476 	{
4477 	  if (cpu_index == PROCESSOR_POWER9)
4478 	    {
4479 	      /* legacy behavior: allow -mcpu-power9 with certain
4480 		 capabilities explicitly disabled.  */
4481 	      rs6000_isa_flags |= (ISA_3_0_MASKS_SERVER & ~ignore_masks);
4482 	      /* However, reject this automatic fix if certain
4483 		 capabilities required for TARGET_P9_MINMAX support
4484 		 have been explicitly disabled.  */
4485 	      if (((OPTION_MASK_VSX | OPTION_MASK_UPPER_REGS_SF
4486 		    | OPTION_MASK_UPPER_REGS_DF) & rs6000_isa_flags)
4487 		  != (OPTION_MASK_VSX | OPTION_MASK_UPPER_REGS_SF
4488 		      | OPTION_MASK_UPPER_REGS_DF))
4489 		error ("-mpower9-minmax incompatible with explicitly disabled options");
4490 		}
4491 	  else
4492 	    error ("Power9 target option is incompatible with -mcpu=<xxx> for "
4493 		   "<xxx> less than power9");
4494 	}
4495       else if ((ISA_3_0_MASKS_SERVER & rs6000_isa_flags_explicit)
4496 	       != (ISA_3_0_MASKS_SERVER & rs6000_isa_flags
4497 		   & rs6000_isa_flags_explicit))
4498 	/* Enforce that none of the ISA_3_0_MASKS_SERVER flags
4499 	   were explicitly cleared.  */
4500 	error ("-mpower9-minmax incompatible with explicitly disabled options");
4501       else
4502 	rs6000_isa_flags |= ISA_3_0_MASKS_SERVER;
4503     }
4504   else if (TARGET_P8_VECTOR || TARGET_DIRECT_MOVE || TARGET_CRYPTO)
4505     rs6000_isa_flags |= (ISA_2_7_MASKS_SERVER & ~ignore_masks);
4506   else if (TARGET_VSX)
4507     rs6000_isa_flags |= (ISA_2_6_MASKS_SERVER & ~ignore_masks);
4508   else if (TARGET_POPCNTD)
4509     rs6000_isa_flags |= (ISA_2_6_MASKS_EMBEDDED & ~ignore_masks);
4510   else if (TARGET_DFP)
4511     rs6000_isa_flags |= (ISA_2_5_MASKS_SERVER & ~ignore_masks);
4512   else if (TARGET_CMPB)
4513     rs6000_isa_flags |= (ISA_2_5_MASKS_EMBEDDED & ~ignore_masks);
4514   else if (TARGET_FPRND)
4515     rs6000_isa_flags |= (ISA_2_4_MASKS & ~ignore_masks);
4516   else if (TARGET_POPCNTB)
4517     rs6000_isa_flags |= (ISA_2_2_MASKS & ~ignore_masks);
4518   else if (TARGET_ALTIVEC)
4519     rs6000_isa_flags |= (OPTION_MASK_PPC_GFXOPT & ~ignore_masks);
4520 
4521   if (TARGET_CRYPTO && !TARGET_ALTIVEC)
4522     {
4523       if (rs6000_isa_flags_explicit & OPTION_MASK_CRYPTO)
4524 	error ("-mcrypto requires -maltivec");
4525       rs6000_isa_flags &= ~OPTION_MASK_CRYPTO;
4526     }
4527 
4528   if (TARGET_DIRECT_MOVE && !TARGET_VSX)
4529     {
4530       if (rs6000_isa_flags_explicit & OPTION_MASK_DIRECT_MOVE)
4531 	error ("-mdirect-move requires -mvsx");
4532       rs6000_isa_flags &= ~OPTION_MASK_DIRECT_MOVE;
4533     }
4534 
4535   if (TARGET_P8_VECTOR && !TARGET_ALTIVEC)
4536     {
4537       if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
4538 	error ("-mpower8-vector requires -maltivec");
4539       rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
4540     }
4541 
4542   if (TARGET_P8_VECTOR && !TARGET_VSX)
4543     {
4544       if ((rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
4545 	  && (rs6000_isa_flags_explicit & OPTION_MASK_VSX))
4546 	error ("-mpower8-vector requires -mvsx");
4547       else if ((rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR) == 0)
4548 	{
4549 	  rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
4550 	  if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
4551 	    rs6000_isa_flags_explicit |= OPTION_MASK_P8_VECTOR;
4552 	}
4553       else
4554 	{
4555 	  /* OPTION_MASK_P8_VECTOR is explicit, and OPTION_MASK_VSX is
4556 	     not explicit.  */
4557 	  rs6000_isa_flags |= OPTION_MASK_VSX;
4558 	  rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
4559 	}
4560     }
4561 
4562   if (TARGET_VSX_TIMODE && !TARGET_VSX)
4563     {
4564       if (rs6000_isa_flags_explicit & OPTION_MASK_VSX_TIMODE)
4565 	error ("-mvsx-timode requires -mvsx");
4566       rs6000_isa_flags &= ~OPTION_MASK_VSX_TIMODE;
4567     }
4568 
4569   if (TARGET_DFP && !TARGET_HARD_FLOAT)
4570     {
4571       if (rs6000_isa_flags_explicit & OPTION_MASK_DFP)
4572 	error ("-mhard-dfp requires -mhard-float");
4573       rs6000_isa_flags &= ~OPTION_MASK_DFP;
4574     }
4575 
4576   /* Allow an explicit -mupper-regs to set -mupper-regs-df, -mupper-regs-di,
4577      and -mupper-regs-sf, depending on the cpu, unless the user explicitly also
4578      set the individual option.  */
4579   if (TARGET_UPPER_REGS > 0)
4580     {
4581       if (TARGET_VSX
4582 	  && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DF))
4583 	{
4584 	  rs6000_isa_flags |= OPTION_MASK_UPPER_REGS_DF;
4585 	  rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_DF;
4586 	}
4587       if (TARGET_VSX
4588 	  && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DI))
4589 	{
4590 	  rs6000_isa_flags |= OPTION_MASK_UPPER_REGS_DI;
4591 	  rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_DI;
4592 	}
4593       if (TARGET_P8_VECTOR
4594 	  && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_SF))
4595 	{
4596 	  rs6000_isa_flags |= OPTION_MASK_UPPER_REGS_SF;
4597 	  rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_SF;
4598 	}
4599     }
4600   else if (TARGET_UPPER_REGS == 0)
4601     {
4602       if (TARGET_VSX
4603 	  && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DF))
4604 	{
4605 	  rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_DF;
4606 	  rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_DF;
4607 	}
4608       if (TARGET_VSX
4609 	  && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DI))
4610 	{
4611 	  rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_DI;
4612 	  rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_DI;
4613 	}
4614       if (TARGET_P8_VECTOR
4615 	  && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_SF))
4616 	{
4617 	  rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_SF;
4618 	  rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_SF;
4619 	}
4620     }
4621 
4622   if (TARGET_UPPER_REGS_DF && !TARGET_VSX)
4623     {
4624       if (rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DF)
4625 	error ("-mupper-regs-df requires -mvsx");
4626       rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_DF;
4627     }
4628 
4629   if (TARGET_UPPER_REGS_DI && !TARGET_VSX)
4630     {
4631       if (rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DI)
4632 	error ("-mupper-regs-di requires -mvsx");
4633       rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_DI;
4634     }
4635 
4636   if (TARGET_UPPER_REGS_SF && !TARGET_P8_VECTOR)
4637     {
4638       if (rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_SF)
4639 	error ("-mupper-regs-sf requires -mpower8-vector");
4640       rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_SF;
4641     }
4642 
4643   /* The quad memory instructions only works in 64-bit mode. In 32-bit mode,
4644      silently turn off quad memory mode.  */
4645   if ((TARGET_QUAD_MEMORY || TARGET_QUAD_MEMORY_ATOMIC) && !TARGET_POWERPC64)
4646     {
4647       if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
4648 	warning (0, N_("-mquad-memory requires 64-bit mode"));
4649 
4650       if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) != 0)
4651 	warning (0, N_("-mquad-memory-atomic requires 64-bit mode"));
4652 
4653       rs6000_isa_flags &= ~(OPTION_MASK_QUAD_MEMORY
4654 			    | OPTION_MASK_QUAD_MEMORY_ATOMIC);
4655     }
4656 
4657   /* Non-atomic quad memory load/store are disabled for little endian, since
4658      the words are reversed, but atomic operations can still be done by
4659      swapping the words.  */
4660   if (TARGET_QUAD_MEMORY && !WORDS_BIG_ENDIAN)
4661     {
4662       if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
4663 	warning (0, N_("-mquad-memory is not available in little endian mode"));
4664 
4665       rs6000_isa_flags &= ~OPTION_MASK_QUAD_MEMORY;
4666     }
4667 
4668   /* Assume if the user asked for normal quad memory instructions, they want
4669      the atomic versions as well, unless they explicity told us not to use quad
4670      word atomic instructions.  */
4671   if (TARGET_QUAD_MEMORY
4672       && !TARGET_QUAD_MEMORY_ATOMIC
4673       && ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) == 0))
4674     rs6000_isa_flags |= OPTION_MASK_QUAD_MEMORY_ATOMIC;
4675 
4676   /* Enable power8 fusion if we are tuning for power8, even if we aren't
4677      generating power8 instructions.  */
4678   if (!(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION))
4679     rs6000_isa_flags |= (processor_target_table[tune_index].target_enable
4680 			 & OPTION_MASK_P8_FUSION);
4681 
4682   /* Setting additional fusion flags turns on base fusion.  */
4683   if (!TARGET_P8_FUSION && (TARGET_P8_FUSION_SIGN || TARGET_TOC_FUSION))
4684     {
4685       if (rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION)
4686 	{
4687 	  if (TARGET_P8_FUSION_SIGN)
4688 	    error ("-mpower8-fusion-sign requires -mpower8-fusion");
4689 
4690 	  if (TARGET_TOC_FUSION)
4691 	    error ("-mtoc-fusion requires -mpower8-fusion");
4692 
4693 	  rs6000_isa_flags &= ~OPTION_MASK_P8_FUSION;
4694 	}
4695       else
4696 	rs6000_isa_flags |= OPTION_MASK_P8_FUSION;
4697     }
4698 
4699   /* Power9 fusion is a superset over power8 fusion.  */
4700   if (TARGET_P9_FUSION && !TARGET_P8_FUSION)
4701     {
4702       if (rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION)
4703 	{
4704 	  /* We prefer to not mention undocumented options in
4705 	     error messages.  However, if users have managed to select
4706 	     power9-fusion without selecting power8-fusion, they
4707 	     already know about undocumented flags.  */
4708 	  error ("-mpower9-fusion requires -mpower8-fusion");
4709 	  rs6000_isa_flags &= ~OPTION_MASK_P9_FUSION;
4710 	}
4711       else
4712 	rs6000_isa_flags |= OPTION_MASK_P8_FUSION;
4713     }
4714 
4715   /* Enable power9 fusion if we are tuning for power9, even if we aren't
4716      generating power9 instructions.  */
4717   if (!(rs6000_isa_flags_explicit & OPTION_MASK_P9_FUSION))
4718     rs6000_isa_flags |= (processor_target_table[tune_index].target_enable
4719 			 & OPTION_MASK_P9_FUSION);
4720 
4721   /* Power8 does not fuse sign extended loads with the addis.  If we are
4722      optimizing at high levels for speed, convert a sign extended load into a
4723      zero extending load, and an explicit sign extension.  */
4724   if (TARGET_P8_FUSION
4725       && !(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION_SIGN)
4726       && optimize_function_for_speed_p (cfun)
4727       && optimize >= 3)
4728     rs6000_isa_flags |= OPTION_MASK_P8_FUSION_SIGN;
4729 
4730   /* TOC fusion requires 64-bit and medium/large code model.  */
4731   if (TARGET_TOC_FUSION && !TARGET_POWERPC64)
4732     {
4733       rs6000_isa_flags &= ~OPTION_MASK_TOC_FUSION;
4734       if ((rs6000_isa_flags_explicit & OPTION_MASK_TOC_FUSION) != 0)
4735 	warning (0, N_("-mtoc-fusion requires 64-bit"));
4736     }
4737 
4738   if (TARGET_TOC_FUSION && (TARGET_CMODEL == CMODEL_SMALL))
4739     {
4740       rs6000_isa_flags &= ~OPTION_MASK_TOC_FUSION;
4741       if ((rs6000_isa_flags_explicit & OPTION_MASK_TOC_FUSION) != 0)
4742 	warning (0, N_("-mtoc-fusion requires medium/large code model"));
4743     }
4744 
4745   /* Turn on -mtoc-fusion by default if p8-fusion and 64-bit medium/large code
4746      model.  */
4747   if (TARGET_P8_FUSION && !TARGET_TOC_FUSION && TARGET_POWERPC64
4748       && (TARGET_CMODEL != CMODEL_SMALL)
4749       && !(rs6000_isa_flags_explicit & OPTION_MASK_TOC_FUSION))
4750     rs6000_isa_flags |= OPTION_MASK_TOC_FUSION;
4751 
4752   /* ISA 3.0 vector instructions include ISA 2.07.  */
4753   if (TARGET_P9_VECTOR && !TARGET_P8_VECTOR)
4754     {
4755       /* We prefer to not mention undocumented options in
4756 	 error messages.  However, if users have managed to select
4757 	 power9-vector without selecting power8-vector, they
4758 	 already know about undocumented flags.  */
4759       if ((rs6000_isa_flags_explicit & OPTION_MASK_P9_VECTOR) &&
4760 	  (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR))
4761 	error ("-mpower9-vector requires -mpower8-vector");
4762       else if ((rs6000_isa_flags_explicit & OPTION_MASK_P9_VECTOR) == 0)
4763 	{
4764 	  rs6000_isa_flags &= ~OPTION_MASK_P9_VECTOR;
4765 	  if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
4766 	    rs6000_isa_flags_explicit |= OPTION_MASK_P9_VECTOR;
4767 	}
4768       else
4769 	{
4770 	  /* OPTION_MASK_P9_VECTOR is explicit and
4771 	     OPTION_MASK_P8_VECTOR is not explicit.  */
4772 	  rs6000_isa_flags |= OPTION_MASK_P8_VECTOR;
4773 	  rs6000_isa_flags_explicit |= OPTION_MASK_P8_VECTOR;
4774 	}
4775     }
4776 
4777   /* -mpower9-dform turns on both -mpower9-dform-scalar and
4778       -mpower9-dform-vector.  */
4779   if (TARGET_P9_DFORM_BOTH > 0)
4780     {
4781       if (!(rs6000_isa_flags_explicit & OPTION_MASK_P9_DFORM_VECTOR))
4782 	rs6000_isa_flags |= OPTION_MASK_P9_DFORM_VECTOR;
4783 
4784       if (!(rs6000_isa_flags_explicit & OPTION_MASK_P9_DFORM_SCALAR))
4785 	rs6000_isa_flags |= OPTION_MASK_P9_DFORM_SCALAR;
4786     }
4787   else if (TARGET_P9_DFORM_BOTH == 0)
4788     {
4789       if (!(rs6000_isa_flags_explicit & OPTION_MASK_P9_DFORM_VECTOR))
4790 	rs6000_isa_flags &= ~OPTION_MASK_P9_DFORM_VECTOR;
4791 
4792       if (!(rs6000_isa_flags_explicit & OPTION_MASK_P9_DFORM_SCALAR))
4793 	rs6000_isa_flags &= ~OPTION_MASK_P9_DFORM_SCALAR;
4794     }
4795 
4796   /* ISA 3.0 D-form instructions require p9-vector and upper-regs.  */
4797   if ((TARGET_P9_DFORM_SCALAR || TARGET_P9_DFORM_VECTOR) && !TARGET_P9_VECTOR)
4798     {
4799       /* We prefer to not mention undocumented options in
4800 	 error messages.  However, if users have managed to select
4801 	 power9-dform without selecting power9-vector, they
4802 	 already know about undocumented flags.  */
4803       if ((rs6000_isa_flags_explicit & OPTION_MASK_P9_VECTOR)
4804 	  && (rs6000_isa_flags_explicit & (OPTION_MASK_P9_DFORM_SCALAR
4805 					   | OPTION_MASK_P9_DFORM_VECTOR)))
4806 	error ("-mpower9-dform requires -mpower9-vector");
4807       else if (rs6000_isa_flags_explicit & OPTION_MASK_P9_VECTOR)
4808 	{
4809 	  rs6000_isa_flags &=
4810 	    ~(OPTION_MASK_P9_DFORM_SCALAR | OPTION_MASK_P9_DFORM_VECTOR);
4811 	  rs6000_isa_flags_explicit |=
4812 	    (OPTION_MASK_P9_DFORM_SCALAR | OPTION_MASK_P9_DFORM_VECTOR);
4813 	}
4814       else
4815 	{
4816 	  /* We know that OPTION_MASK_P9_VECTOR is not explicit and
4817 	     OPTION_MASK_P9_DFORM_SCALAR or OPTION_MASK_P9_DORM_VECTOR
4818 	     may be explicit.  */
4819 	  rs6000_isa_flags |= OPTION_MASK_P9_VECTOR;
4820 	  rs6000_isa_flags_explicit |= OPTION_MASK_P9_VECTOR;
4821 	}
4822     }
4823 
4824   if ((TARGET_P9_DFORM_SCALAR || TARGET_P9_DFORM_VECTOR)
4825       && !TARGET_DIRECT_MOVE)
4826     {
4827       /* We prefer to not mention undocumented options in
4828 	 error messages.  However, if users have managed to select
4829 	 power9-dform without selecting direct-move, they
4830 	 already know about undocumented flags.  */
4831       if ((rs6000_isa_flags_explicit & OPTION_MASK_DIRECT_MOVE)
4832 	  && ((rs6000_isa_flags_explicit & OPTION_MASK_P9_DFORM_VECTOR) ||
4833 	      (rs6000_isa_flags_explicit & OPTION_MASK_P9_DFORM_SCALAR) ||
4834 	      (TARGET_P9_DFORM_BOTH == 1)))
4835 	error ("-mpower9-dform, -mpower9-dform-vector, -mpower9-dform-scalar"
4836 	       " require -mdirect-move");
4837       else if ((rs6000_isa_flags_explicit & OPTION_MASK_DIRECT_MOVE) == 0)
4838 	{
4839 	  rs6000_isa_flags |= OPTION_MASK_DIRECT_MOVE;
4840 	  rs6000_isa_flags_explicit |= OPTION_MASK_DIRECT_MOVE;
4841 	}
4842       else
4843 	{
4844 	  rs6000_isa_flags &=
4845 	    ~(OPTION_MASK_P9_DFORM_SCALAR | OPTION_MASK_P9_DFORM_VECTOR);
4846 	  rs6000_isa_flags_explicit |=
4847 	    (OPTION_MASK_P9_DFORM_SCALAR | OPTION_MASK_P9_DFORM_VECTOR);
4848 	}
4849     }
4850 
4851   if (TARGET_P9_DFORM_SCALAR && !TARGET_UPPER_REGS_DF)
4852     {
4853       /* We prefer to not mention undocumented options in
4854 	 error messages.  However, if users have managed to select
4855 	 power9-dform without selecting upper-regs-df, they
4856 	 already know about undocumented flags.  */
4857       if (rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DF)
4858 	error ("-mpower9-dform requires -mupper-regs-df");
4859       rs6000_isa_flags &= ~OPTION_MASK_P9_DFORM_SCALAR;
4860     }
4861 
4862   if (TARGET_P9_DFORM_SCALAR && !TARGET_UPPER_REGS_SF)
4863     {
4864       if (rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_SF)
4865 	error ("-mpower9-dform requires -mupper-regs-sf");
4866       rs6000_isa_flags &= ~OPTION_MASK_P9_DFORM_SCALAR;
4867     }
4868 
4869   /* Enable LRA by default.  */
4870   if ((rs6000_isa_flags_explicit & OPTION_MASK_LRA) == 0)
4871     rs6000_isa_flags |= OPTION_MASK_LRA;
4872 
4873   /* There have been bugs with -mvsx-timode that don't show up with -mlra,
4874      but do show up with -mno-lra.  Given -mlra will become the default once
4875      PR 69847 is fixed, turn off the options with problems by default if
4876      -mno-lra was used, and warn if the user explicitly asked for the option.
4877 
4878      Enable -mpower9-dform-vector by default if LRA and other power9 options.
4879      Enable -mvsx-timode by default if LRA and VSX.  */
4880   if (!TARGET_LRA)
4881     {
4882       if (TARGET_VSX_TIMODE)
4883 	{
4884 	  if ((rs6000_isa_flags_explicit & OPTION_MASK_VSX_TIMODE) != 0)
4885 	    warning (0, "-mvsx-timode might need -mlra");
4886 
4887 	  else
4888 	    rs6000_isa_flags &= ~OPTION_MASK_VSX_TIMODE;
4889 	}
4890     }
4891 
4892   else
4893     {
4894       if (TARGET_VSX && !TARGET_VSX_TIMODE
4895 	  && (rs6000_isa_flags_explicit & OPTION_MASK_VSX_TIMODE) == 0)
4896 	rs6000_isa_flags |= OPTION_MASK_VSX_TIMODE;
4897     }
4898 
4899   /* Set -mallow-movmisalign to explicitly on if we have full ISA 2.07
4900      support. If we only have ISA 2.06 support, and the user did not specify
4901      the switch, leave it set to -1 so the movmisalign patterns are enabled,
4902      but we don't enable the full vectorization support  */
4903   if (TARGET_ALLOW_MOVMISALIGN == -1 && TARGET_P8_VECTOR && TARGET_DIRECT_MOVE)
4904     TARGET_ALLOW_MOVMISALIGN = 1;
4905 
4906   else if (TARGET_ALLOW_MOVMISALIGN && !TARGET_VSX)
4907     {
4908       if (TARGET_ALLOW_MOVMISALIGN > 0
4909 	  && global_options_set.x_TARGET_ALLOW_MOVMISALIGN)
4910 	error ("-mallow-movmisalign requires -mvsx");
4911 
4912       TARGET_ALLOW_MOVMISALIGN = 0;
4913     }
4914 
4915   /* Determine when unaligned vector accesses are permitted, and when
4916      they are preferred over masked Altivec loads.  Note that if
4917      TARGET_ALLOW_MOVMISALIGN has been disabled by the user, then
4918      TARGET_EFFICIENT_UNALIGNED_VSX must be as well.  The converse is
4919      not true.  */
4920   if (TARGET_EFFICIENT_UNALIGNED_VSX)
4921     {
4922       if (!TARGET_VSX)
4923 	{
4924 	  if (rs6000_isa_flags_explicit & OPTION_MASK_EFFICIENT_UNALIGNED_VSX)
4925 	    error ("-mefficient-unaligned-vsx requires -mvsx");
4926 
4927 	  rs6000_isa_flags &= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX;
4928 	}
4929 
4930       else if (!TARGET_ALLOW_MOVMISALIGN)
4931 	{
4932 	  if (rs6000_isa_flags_explicit & OPTION_MASK_EFFICIENT_UNALIGNED_VSX)
4933 	    error ("-mefficient-unaligned-vsx requires -mallow-movmisalign");
4934 
4935 	  rs6000_isa_flags &= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX;
4936 	}
4937     }
4938 
4939   /* Check whether we should allow small integers into VSX registers.  We
4940      require direct move to prevent the register allocator from having to move
4941      variables through memory to do moves.  SImode can be used on ISA 2.07,
4942      while HImode and QImode require ISA 3.0.  */
4943   if (TARGET_VSX_SMALL_INTEGER
4944       && (!TARGET_DIRECT_MOVE || !TARGET_P8_VECTOR || !TARGET_UPPER_REGS_DI))
4945     {
4946       if (rs6000_isa_flags_explicit & OPTION_MASK_VSX_SMALL_INTEGER)
4947 	error ("-mvsx-small-integer requires -mpower8-vector, "
4948 	       "-mupper-regs-di, and -mdirect-move");
4949 
4950       rs6000_isa_flags &= ~OPTION_MASK_VSX_SMALL_INTEGER;
4951     }
4952 
4953   /* Set long double size before the IEEE 128-bit tests.  */
4954   if (!global_options_set.x_rs6000_long_double_type_size)
4955     {
4956       if (main_target_opt != NULL
4957 	  && (main_target_opt->x_rs6000_long_double_type_size
4958 	      != RS6000_DEFAULT_LONG_DOUBLE_SIZE))
4959 	error ("target attribute or pragma changes long double size");
4960       else
4961 	rs6000_long_double_type_size = RS6000_DEFAULT_LONG_DOUBLE_SIZE;
4962     }
4963 
4964   /* Set -mabi=ieeelongdouble on some old targets.  Note, AIX and Darwin
4965      explicitly redefine TARGET_IEEEQUAD to 0, so those systems will not
4966      pick up this default.  */
4967 #if !defined (POWERPC_LINUX) && !defined (POWERPC_FREEBSD)
4968   if (!global_options_set.x_rs6000_ieeequad)
4969     rs6000_ieeequad = 1;
4970 #endif
4971 
4972   /* Enable the default support for IEEE 128-bit floating point on Linux VSX
4973      sytems, but don't enable the __float128 keyword.  */
4974   if (TARGET_VSX && TARGET_LONG_DOUBLE_128
4975       && (TARGET_FLOAT128_ENABLE_TYPE || TARGET_IEEEQUAD)
4976       && ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_TYPE) == 0))
4977     rs6000_isa_flags |= OPTION_MASK_FLOAT128_TYPE;
4978 
4979   /* IEEE 128-bit floating point requires VSX support.  */
4980   if (!TARGET_VSX)
4981     {
4982       if (TARGET_FLOAT128_KEYWORD)
4983 	{
4984 	  if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_KEYWORD) != 0)
4985 	    error ("-mfloat128 requires VSX support");
4986 
4987 	  rs6000_isa_flags &= ~(OPTION_MASK_FLOAT128_TYPE
4988 				| OPTION_MASK_FLOAT128_KEYWORD
4989 				| OPTION_MASK_FLOAT128_HW);
4990 	}
4991 
4992       else if (TARGET_FLOAT128_TYPE)
4993 	{
4994 	  if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_TYPE) != 0)
4995 	    error ("-mfloat128-type requires VSX support");
4996 
4997 	  rs6000_isa_flags &= ~(OPTION_MASK_FLOAT128_TYPE
4998 				| OPTION_MASK_FLOAT128_KEYWORD
4999 				| OPTION_MASK_FLOAT128_HW);
5000 	}
5001     }
5002 
5003   /* -mfloat128 and -mfloat128-hardware internally require the underlying IEEE
5004       128-bit floating point support to be enabled.  */
5005   if (!TARGET_FLOAT128_TYPE)
5006     {
5007       if (TARGET_FLOAT128_KEYWORD)
5008 	{
5009 	  if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_KEYWORD) != 0)
5010 	    {
5011 	      error ("-mfloat128 requires -mfloat128-type");
5012 	      rs6000_isa_flags &= ~(OPTION_MASK_FLOAT128_TYPE
5013 				    | OPTION_MASK_FLOAT128_KEYWORD
5014 				    | OPTION_MASK_FLOAT128_HW);
5015 	    }
5016 	  else
5017 	    rs6000_isa_flags |= OPTION_MASK_FLOAT128_TYPE;
5018 	}
5019 
5020       if (TARGET_FLOAT128_HW)
5021 	{
5022 	  if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW) != 0)
5023 	    {
5024 	      error ("-mfloat128-hardware requires -mfloat128-type");
5025 	      rs6000_isa_flags &= ~OPTION_MASK_FLOAT128_HW;
5026 	    }
5027 	  else
5028 	    rs6000_isa_flags &= ~(OPTION_MASK_FLOAT128_TYPE
5029 				  | OPTION_MASK_FLOAT128_KEYWORD
5030 				  | OPTION_MASK_FLOAT128_HW);
5031 	}
5032     }
5033 
5034   /* If we have -mfloat128-type and full ISA 3.0 support, enable
5035      -mfloat128-hardware by default.  However, don't enable the __float128
5036      keyword.  If the user explicitly turned on -mfloat128-hardware, enable the
5037      -mfloat128 option as well if it was not already set.  */
5038   if (TARGET_FLOAT128_TYPE && !TARGET_FLOAT128_HW
5039       && (rs6000_isa_flags & ISA_3_0_MASKS_IEEE) == ISA_3_0_MASKS_IEEE
5040       && !(rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW))
5041     rs6000_isa_flags |= OPTION_MASK_FLOAT128_HW;
5042 
5043   if (TARGET_FLOAT128_HW
5044       && (rs6000_isa_flags & ISA_3_0_MASKS_IEEE) != ISA_3_0_MASKS_IEEE)
5045     {
5046       if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW) != 0)
5047 	error ("-mfloat128-hardware requires full ISA 3.0 support");
5048 
5049       rs6000_isa_flags &= ~OPTION_MASK_FLOAT128_HW;
5050     }
5051 
5052   if (TARGET_FLOAT128_HW && !TARGET_64BIT)
5053     {
5054       if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW) != 0)
5055 	error ("-mfloat128-hardware requires -m64");
5056 
5057       rs6000_isa_flags &= ~OPTION_MASK_FLOAT128_HW;
5058     }
5059 
5060   if (TARGET_FLOAT128_HW && !TARGET_FLOAT128_KEYWORD
5061       && (rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW) != 0
5062       && (rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_KEYWORD) == 0)
5063     rs6000_isa_flags |= OPTION_MASK_FLOAT128_KEYWORD;
5064 
5065   /* Print the options after updating the defaults.  */
5066   if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
5067     rs6000_print_isa_options (stderr, 0, "after defaults", rs6000_isa_flags);
5068 
5069   /* E500mc does "better" if we inline more aggressively.  Respect the
5070      user's opinion, though.  */
5071   if (rs6000_block_move_inline_limit == 0
5072       && (rs6000_cpu == PROCESSOR_PPCE500MC
5073 	  || rs6000_cpu == PROCESSOR_PPCE500MC64
5074 	  || rs6000_cpu == PROCESSOR_PPCE5500
5075 	  || rs6000_cpu == PROCESSOR_PPCE6500))
5076     rs6000_block_move_inline_limit = 128;
5077 
5078   /* store_one_arg depends on expand_block_move to handle at least the
5079      size of reg_parm_stack_space.  */
5080   if (rs6000_block_move_inline_limit < (TARGET_POWERPC64 ? 64 : 32))
5081     rs6000_block_move_inline_limit = (TARGET_POWERPC64 ? 64 : 32);
5082 
5083   if (global_init_p)
5084     {
5085       /* If the appropriate debug option is enabled, replace the target hooks
5086 	 with debug versions that call the real version and then prints
5087 	 debugging information.  */
5088       if (TARGET_DEBUG_COST)
5089 	{
5090 	  targetm.rtx_costs = rs6000_debug_rtx_costs;
5091 	  targetm.address_cost = rs6000_debug_address_cost;
5092 	  targetm.sched.adjust_cost = rs6000_debug_adjust_cost;
5093 	}
5094 
5095       if (TARGET_DEBUG_ADDR)
5096 	{
5097 	  targetm.legitimate_address_p = rs6000_debug_legitimate_address_p;
5098 	  targetm.legitimize_address = rs6000_debug_legitimize_address;
5099 	  rs6000_secondary_reload_class_ptr
5100 	    = rs6000_debug_secondary_reload_class;
5101 	  targetm.secondary_memory_needed
5102 	    = rs6000_debug_secondary_memory_needed;
5103 	  targetm.can_change_mode_class
5104 	    = rs6000_debug_can_change_mode_class;
5105 	  rs6000_preferred_reload_class_ptr
5106 	    = rs6000_debug_preferred_reload_class;
5107 	  rs6000_legitimize_reload_address_ptr
5108 	    = rs6000_debug_legitimize_reload_address;
5109 	  rs6000_mode_dependent_address_ptr
5110 	    = rs6000_debug_mode_dependent_address;
5111 	}
5112 
5113       if (rs6000_veclibabi_name)
5114 	{
5115 	  if (strcmp (rs6000_veclibabi_name, "mass") == 0)
5116 	    rs6000_veclib_handler = rs6000_builtin_vectorized_libmass;
5117 	  else
5118 	    {
5119 	      error ("unknown vectorization library ABI type (%s) for "
5120 		     "-mveclibabi= switch", rs6000_veclibabi_name);
5121 	      ret = false;
5122 	    }
5123 	}
5124     }
5125 
5126   /* Disable VSX and Altivec silently if the user switched cpus to power7 in a
5127      target attribute or pragma which automatically enables both options,
5128      unless the altivec ABI was set.  This is set by default for 64-bit, but
5129      not for 32-bit.  */
5130   if (main_target_opt != NULL && !main_target_opt->x_rs6000_altivec_abi)
5131     rs6000_isa_flags &= ~((OPTION_MASK_VSX | OPTION_MASK_ALTIVEC
5132 			   | OPTION_MASK_FLOAT128_TYPE
5133 			   | OPTION_MASK_FLOAT128_KEYWORD)
5134 			  & ~rs6000_isa_flags_explicit);
5135 
5136   /* Enable Altivec ABI for AIX -maltivec.  */
5137   if (TARGET_XCOFF && (TARGET_ALTIVEC || TARGET_VSX))
5138     {
5139       if (main_target_opt != NULL && !main_target_opt->x_rs6000_altivec_abi)
5140 	error ("target attribute or pragma changes AltiVec ABI");
5141       else
5142 	rs6000_altivec_abi = 1;
5143     }
5144 
5145   /* The AltiVec ABI is the default for PowerPC-64 GNU/Linux.  For
5146      PowerPC-32 GNU/Linux, -maltivec implies the AltiVec ABI.  It can
5147      be explicitly overridden in either case.  */
5148   if (TARGET_ELF)
5149     {
5150       if (!global_options_set.x_rs6000_altivec_abi
5151 	  && (TARGET_64BIT || TARGET_ALTIVEC || TARGET_VSX))
5152 	{
5153 	  if (main_target_opt != NULL &&
5154 	      !main_target_opt->x_rs6000_altivec_abi)
5155 	    error ("target attribute or pragma changes AltiVec ABI");
5156 	  else
5157 	    rs6000_altivec_abi = 1;
5158 	}
5159     }
5160 
5161   /* Set the Darwin64 ABI as default for 64-bit Darwin.
5162      So far, the only darwin64 targets are also MACH-O.  */
5163   if (TARGET_MACHO
5164       && DEFAULT_ABI == ABI_DARWIN
5165       && TARGET_64BIT)
5166     {
5167       if (main_target_opt != NULL && !main_target_opt->x_rs6000_darwin64_abi)
5168 	error ("target attribute or pragma changes darwin64 ABI");
5169       else
5170 	{
5171 	  rs6000_darwin64_abi = 1;
5172 	  /* Default to natural alignment, for better performance.  */
5173 	  rs6000_alignment_flags = MASK_ALIGN_NATURAL;
5174 	}
5175     }
5176 
5177   /* Place FP constants in the constant pool instead of TOC
5178      if section anchors enabled.  */
5179   if (flag_section_anchors
5180       && !global_options_set.x_TARGET_NO_FP_IN_TOC)
5181     TARGET_NO_FP_IN_TOC = 1;
5182 
5183   if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
5184     rs6000_print_isa_options (stderr, 0, "before subtarget", rs6000_isa_flags);
5185 
5186 #ifdef SUBTARGET_OVERRIDE_OPTIONS
5187   SUBTARGET_OVERRIDE_OPTIONS;
5188 #endif
5189 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
5190   SUBSUBTARGET_OVERRIDE_OPTIONS;
5191 #endif
5192 #ifdef SUB3TARGET_OVERRIDE_OPTIONS
5193   SUB3TARGET_OVERRIDE_OPTIONS;
5194 #endif
5195 
5196   if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
5197     rs6000_print_isa_options (stderr, 0, "after subtarget", rs6000_isa_flags);
5198 
5199   /* For the E500 family of cores, reset the single/double FP flags to let us
5200      check that they remain constant across attributes or pragmas.  Also,
5201      clear a possible request for string instructions, not supported and which
5202      we might have silently queried above for -Os.
5203 
5204      For other families, clear ISEL in case it was set implicitly.
5205   */
5206 
5207   switch (rs6000_cpu)
5208     {
5209     case PROCESSOR_PPC8540:
5210     case PROCESSOR_PPC8548:
5211     case PROCESSOR_PPCE500MC:
5212     case PROCESSOR_PPCE500MC64:
5213     case PROCESSOR_PPCE5500:
5214     case PROCESSOR_PPCE6500:
5215 
5216       rs6000_single_float = TARGET_E500_SINGLE || TARGET_E500_DOUBLE;
5217       rs6000_double_float = TARGET_E500_DOUBLE;
5218 
5219       rs6000_isa_flags &= ~OPTION_MASK_STRING;
5220 
5221       break;
5222 
5223     default:
5224 
5225       if (have_cpu && !(rs6000_isa_flags_explicit & OPTION_MASK_ISEL))
5226 	rs6000_isa_flags &= ~OPTION_MASK_ISEL;
5227 
5228       break;
5229     }
5230 
5231   if (main_target_opt)
5232     {
5233       if (main_target_opt->x_rs6000_single_float != rs6000_single_float)
5234 	error ("target attribute or pragma changes single precision floating "
5235 	       "point");
5236       if (main_target_opt->x_rs6000_double_float != rs6000_double_float)
5237 	error ("target attribute or pragma changes double precision floating "
5238 	       "point");
5239     }
5240 
5241   /* Detect invalid option combinations with E500.  */
5242   CHECK_E500_OPTIONS;
5243 
5244   rs6000_always_hint = (rs6000_cpu != PROCESSOR_POWER4
5245 			&& rs6000_cpu != PROCESSOR_POWER5
5246 			&& rs6000_cpu != PROCESSOR_POWER6
5247 			&& rs6000_cpu != PROCESSOR_POWER7
5248 			&& rs6000_cpu != PROCESSOR_POWER8
5249 			&& rs6000_cpu != PROCESSOR_POWER9
5250 			&& rs6000_cpu != PROCESSOR_PPCA2
5251 			&& rs6000_cpu != PROCESSOR_CELL
5252 			&& rs6000_cpu != PROCESSOR_PPC476);
5253   rs6000_sched_groups = (rs6000_cpu == PROCESSOR_POWER4
5254 			 || rs6000_cpu == PROCESSOR_POWER5
5255 			 || rs6000_cpu == PROCESSOR_POWER7
5256 			 || rs6000_cpu == PROCESSOR_POWER8);
5257   rs6000_align_branch_targets = (rs6000_cpu == PROCESSOR_POWER4
5258 				 || rs6000_cpu == PROCESSOR_POWER5
5259 				 || rs6000_cpu == PROCESSOR_POWER6
5260 				 || rs6000_cpu == PROCESSOR_POWER7
5261 				 || rs6000_cpu == PROCESSOR_POWER8
5262 				 || rs6000_cpu == PROCESSOR_POWER9
5263 				 || rs6000_cpu == PROCESSOR_PPCE500MC
5264 				 || rs6000_cpu == PROCESSOR_PPCE500MC64
5265 				 || rs6000_cpu == PROCESSOR_PPCE5500
5266 				 || rs6000_cpu == PROCESSOR_PPCE6500);
5267 
5268   /* Allow debug switches to override the above settings.  These are set to -1
5269      in powerpcspe.opt to indicate the user hasn't directly set the switch.  */
5270   if (TARGET_ALWAYS_HINT >= 0)
5271     rs6000_always_hint = TARGET_ALWAYS_HINT;
5272 
5273   if (TARGET_SCHED_GROUPS >= 0)
5274     rs6000_sched_groups = TARGET_SCHED_GROUPS;
5275 
5276   if (TARGET_ALIGN_BRANCH_TARGETS >= 0)
5277     rs6000_align_branch_targets = TARGET_ALIGN_BRANCH_TARGETS;
5278 
5279   rs6000_sched_restricted_insns_priority
5280     = (rs6000_sched_groups ? 1 : 0);
5281 
5282   /* Handle -msched-costly-dep option.  */
5283   rs6000_sched_costly_dep
5284     = (rs6000_sched_groups ? true_store_to_load_dep_costly : no_dep_costly);
5285 
5286   if (rs6000_sched_costly_dep_str)
5287     {
5288       if (! strcmp (rs6000_sched_costly_dep_str, "no"))
5289 	rs6000_sched_costly_dep = no_dep_costly;
5290       else if (! strcmp (rs6000_sched_costly_dep_str, "all"))
5291 	rs6000_sched_costly_dep = all_deps_costly;
5292       else if (! strcmp (rs6000_sched_costly_dep_str, "true_store_to_load"))
5293 	rs6000_sched_costly_dep = true_store_to_load_dep_costly;
5294       else if (! strcmp (rs6000_sched_costly_dep_str, "store_to_load"))
5295 	rs6000_sched_costly_dep = store_to_load_dep_costly;
5296       else
5297 	rs6000_sched_costly_dep = ((enum rs6000_dependence_cost)
5298 				   atoi (rs6000_sched_costly_dep_str));
5299     }
5300 
5301   /* Handle -minsert-sched-nops option.  */
5302   rs6000_sched_insert_nops
5303     = (rs6000_sched_groups ? sched_finish_regroup_exact : sched_finish_none);
5304 
5305   if (rs6000_sched_insert_nops_str)
5306     {
5307       if (! strcmp (rs6000_sched_insert_nops_str, "no"))
5308 	rs6000_sched_insert_nops = sched_finish_none;
5309       else if (! strcmp (rs6000_sched_insert_nops_str, "pad"))
5310 	rs6000_sched_insert_nops = sched_finish_pad_groups;
5311       else if (! strcmp (rs6000_sched_insert_nops_str, "regroup_exact"))
5312 	rs6000_sched_insert_nops = sched_finish_regroup_exact;
5313       else
5314 	rs6000_sched_insert_nops = ((enum rs6000_nop_insertion)
5315 				    atoi (rs6000_sched_insert_nops_str));
5316     }
5317 
5318   /* Handle stack protector */
5319   if (!global_options_set.x_rs6000_stack_protector_guard)
5320 #ifdef TARGET_THREAD_SSP_OFFSET
5321     rs6000_stack_protector_guard = SSP_TLS;
5322 #else
5323     rs6000_stack_protector_guard = SSP_GLOBAL;
5324 #endif
5325 
5326 #ifdef TARGET_THREAD_SSP_OFFSET
5327   rs6000_stack_protector_guard_offset = TARGET_THREAD_SSP_OFFSET;
5328   rs6000_stack_protector_guard_reg = TARGET_64BIT ? 13 : 2;
5329 #endif
5330 
5331   if (global_options_set.x_rs6000_stack_protector_guard_offset_str)
5332     {
5333       char *endp;
5334       const char *str = rs6000_stack_protector_guard_offset_str;
5335 
5336       errno = 0;
5337       long offset = strtol (str, &endp, 0);
5338       if (!*str || *endp || errno)
5339 	error ("%qs is not a valid number "
5340 	       "in -mstack-protector-guard-offset=", str);
5341 
5342       if (!IN_RANGE (offset, -0x8000, 0x7fff)
5343 	  || (TARGET_64BIT && (offset & 3)))
5344 	error ("%qs is not a valid offset "
5345 	       "in -mstack-protector-guard-offset=", str);
5346 
5347       rs6000_stack_protector_guard_offset = offset;
5348     }
5349 
5350   if (global_options_set.x_rs6000_stack_protector_guard_reg_str)
5351     {
5352       const char *str = rs6000_stack_protector_guard_reg_str;
5353       int reg = decode_reg_name (str);
5354 
5355       if (!IN_RANGE (reg, 1, 31))
5356 	error ("%qs is not a valid base register "
5357 	       "in -mstack-protector-guard-reg=", str);
5358 
5359       rs6000_stack_protector_guard_reg = reg;
5360     }
5361 
5362   if (rs6000_stack_protector_guard == SSP_TLS
5363       && !IN_RANGE (rs6000_stack_protector_guard_reg, 1, 31))
5364     error ("-mstack-protector-guard=tls needs a valid base register");
5365 
5366   if (global_init_p)
5367     {
5368 #ifdef TARGET_REGNAMES
5369       /* If the user desires alternate register names, copy in the
5370 	 alternate names now.  */
5371       if (TARGET_REGNAMES)
5372 	memcpy (rs6000_reg_names, alt_reg_names, sizeof (rs6000_reg_names));
5373 #endif
5374 
5375       /* Set aix_struct_return last, after the ABI is determined.
5376 	 If -maix-struct-return or -msvr4-struct-return was explicitly
5377 	 used, don't override with the ABI default.  */
5378       if (!global_options_set.x_aix_struct_return)
5379 	aix_struct_return = (DEFAULT_ABI != ABI_V4 || DRAFT_V4_STRUCT_RET);
5380 
5381 #if 0
5382       /* IBM XL compiler defaults to unsigned bitfields.  */
5383       if (TARGET_XL_COMPAT)
5384 	flag_signed_bitfields = 0;
5385 #endif
5386 
5387       if (TARGET_LONG_DOUBLE_128 && !TARGET_IEEEQUAD)
5388 	REAL_MODE_FORMAT (TFmode) = &ibm_extended_format;
5389 
5390       ASM_GENERATE_INTERNAL_LABEL (toc_label_name, "LCTOC", 1);
5391 
5392       /* We can only guarantee the availability of DI pseudo-ops when
5393 	 assembling for 64-bit targets.  */
5394       if (!TARGET_64BIT)
5395 	{
5396 	  targetm.asm_out.aligned_op.di = NULL;
5397 	  targetm.asm_out.unaligned_op.di = NULL;
5398 	}
5399 
5400 
5401       /* Set branch target alignment, if not optimizing for size.  */
5402       if (!optimize_size)
5403 	{
5404 	  /* Cell wants to be aligned 8byte for dual issue.  Titan wants to be
5405 	     aligned 8byte to avoid misprediction by the branch predictor.  */
5406 	  if (rs6000_cpu == PROCESSOR_TITAN
5407 	      || rs6000_cpu == PROCESSOR_CELL)
5408 	    {
5409 	      if (align_functions <= 0)
5410 		align_functions = 8;
5411 	      if (align_jumps <= 0)
5412 		align_jumps = 8;
5413 	      if (align_loops <= 0)
5414 		align_loops = 8;
5415 	    }
5416 	  if (rs6000_align_branch_targets)
5417 	    {
5418 	      if (align_functions <= 0)
5419 		align_functions = 16;
5420 	      if (align_jumps <= 0)
5421 		align_jumps = 16;
5422 	      if (align_loops <= 0)
5423 		{
5424 		  can_override_loop_align = 1;
5425 		  align_loops = 16;
5426 		}
5427 	    }
5428 	  if (align_jumps_max_skip <= 0)
5429 	    align_jumps_max_skip = 15;
5430 	  if (align_loops_max_skip <= 0)
5431 	    align_loops_max_skip = 15;
5432 	}
5433 
5434       /* Arrange to save and restore machine status around nested functions.  */
5435       init_machine_status = rs6000_init_machine_status;
5436 
5437       /* We should always be splitting complex arguments, but we can't break
5438 	 Linux and Darwin ABIs at the moment.  For now, only AIX is fixed.  */
5439       if (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN)
5440 	targetm.calls.split_complex_arg = NULL;
5441 
5442       /* The AIX and ELFv1 ABIs define standard function descriptors.  */
5443       if (DEFAULT_ABI == ABI_AIX)
5444 	targetm.calls.custom_function_descriptors = 0;
5445     }
5446 
5447   /* Initialize rs6000_cost with the appropriate target costs.  */
5448   if (optimize_size)
5449     rs6000_cost = TARGET_POWERPC64 ? &size64_cost : &size32_cost;
5450   else
5451     switch (rs6000_cpu)
5452       {
5453       case PROCESSOR_RS64A:
5454 	rs6000_cost = &rs64a_cost;
5455 	break;
5456 
5457       case PROCESSOR_MPCCORE:
5458 	rs6000_cost = &mpccore_cost;
5459 	break;
5460 
5461       case PROCESSOR_PPC403:
5462 	rs6000_cost = &ppc403_cost;
5463 	break;
5464 
5465       case PROCESSOR_PPC405:
5466 	rs6000_cost = &ppc405_cost;
5467 	break;
5468 
5469       case PROCESSOR_PPC440:
5470 	rs6000_cost = &ppc440_cost;
5471 	break;
5472 
5473       case PROCESSOR_PPC476:
5474 	rs6000_cost = &ppc476_cost;
5475 	break;
5476 
5477       case PROCESSOR_PPC601:
5478 	rs6000_cost = &ppc601_cost;
5479 	break;
5480 
5481       case PROCESSOR_PPC603:
5482 	rs6000_cost = &ppc603_cost;
5483 	break;
5484 
5485       case PROCESSOR_PPC604:
5486 	rs6000_cost = &ppc604_cost;
5487 	break;
5488 
5489       case PROCESSOR_PPC604e:
5490 	rs6000_cost = &ppc604e_cost;
5491 	break;
5492 
5493       case PROCESSOR_PPC620:
5494 	rs6000_cost = &ppc620_cost;
5495 	break;
5496 
5497       case PROCESSOR_PPC630:
5498 	rs6000_cost = &ppc630_cost;
5499 	break;
5500 
5501       case PROCESSOR_CELL:
5502 	rs6000_cost = &ppccell_cost;
5503 	break;
5504 
5505       case PROCESSOR_PPC750:
5506       case PROCESSOR_PPC7400:
5507 	rs6000_cost = &ppc750_cost;
5508 	break;
5509 
5510       case PROCESSOR_PPC7450:
5511 	rs6000_cost = &ppc7450_cost;
5512 	break;
5513 
5514       case PROCESSOR_PPC8540:
5515       case PROCESSOR_PPC8548:
5516 	rs6000_cost = &ppc8540_cost;
5517 	break;
5518 
5519       case PROCESSOR_PPCE300C2:
5520       case PROCESSOR_PPCE300C3:
5521 	rs6000_cost = &ppce300c2c3_cost;
5522 	break;
5523 
5524       case PROCESSOR_PPCE500MC:
5525 	rs6000_cost = &ppce500mc_cost;
5526 	break;
5527 
5528       case PROCESSOR_PPCE500MC64:
5529 	rs6000_cost = &ppce500mc64_cost;
5530 	break;
5531 
5532       case PROCESSOR_PPCE5500:
5533 	rs6000_cost = &ppce5500_cost;
5534 	break;
5535 
5536       case PROCESSOR_PPCE6500:
5537 	rs6000_cost = &ppce6500_cost;
5538 	break;
5539 
5540       case PROCESSOR_TITAN:
5541 	rs6000_cost = &titan_cost;
5542 	break;
5543 
5544       case PROCESSOR_POWER4:
5545       case PROCESSOR_POWER5:
5546 	rs6000_cost = &power4_cost;
5547 	break;
5548 
5549       case PROCESSOR_POWER6:
5550 	rs6000_cost = &power6_cost;
5551 	break;
5552 
5553       case PROCESSOR_POWER7:
5554 	rs6000_cost = &power7_cost;
5555 	break;
5556 
5557       case PROCESSOR_POWER8:
5558 	rs6000_cost = &power8_cost;
5559 	break;
5560 
5561       case PROCESSOR_POWER9:
5562 	rs6000_cost = &power9_cost;
5563 	break;
5564 
5565       case PROCESSOR_PPCA2:
5566 	rs6000_cost = &ppca2_cost;
5567 	break;
5568 
5569       default:
5570 	gcc_unreachable ();
5571       }
5572 
5573   if (global_init_p)
5574     {
5575       maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
5576 			     rs6000_cost->simultaneous_prefetches,
5577 			     global_options.x_param_values,
5578 			     global_options_set.x_param_values);
5579       maybe_set_param_value (PARAM_L1_CACHE_SIZE, rs6000_cost->l1_cache_size,
5580 			     global_options.x_param_values,
5581 			     global_options_set.x_param_values);
5582       maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
5583 			     rs6000_cost->cache_line_size,
5584 			     global_options.x_param_values,
5585 			     global_options_set.x_param_values);
5586       maybe_set_param_value (PARAM_L2_CACHE_SIZE, rs6000_cost->l2_cache_size,
5587 			     global_options.x_param_values,
5588 			     global_options_set.x_param_values);
5589 
5590       /* Increase loop peeling limits based on performance analysis. */
5591       maybe_set_param_value (PARAM_MAX_PEELED_INSNS, 400,
5592 			     global_options.x_param_values,
5593 			     global_options_set.x_param_values);
5594       maybe_set_param_value (PARAM_MAX_COMPLETELY_PEELED_INSNS, 400,
5595 			     global_options.x_param_values,
5596 			     global_options_set.x_param_values);
5597 
5598       /* Use the 'model' -fsched-pressure algorithm by default.  */
5599       maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM,
5600 			     SCHED_PRESSURE_MODEL,
5601 			     global_options.x_param_values,
5602 			     global_options_set.x_param_values);
5603 
5604       /* If using typedef char *va_list, signal that
5605 	 __builtin_va_start (&ap, 0) can be optimized to
5606 	 ap = __builtin_next_arg (0).  */
5607       if (DEFAULT_ABI != ABI_V4)
5608 	targetm.expand_builtin_va_start = NULL;
5609     }
5610 
5611   /* Set up single/double float flags.
5612      If TARGET_HARD_FLOAT is set, but neither single or double is set,
5613      then set both flags. */
5614   if (TARGET_HARD_FLOAT && TARGET_FPRS
5615       && rs6000_single_float == 0 && rs6000_double_float == 0)
5616     rs6000_single_float = rs6000_double_float = 1;
5617 
5618   /* If not explicitly specified via option, decide whether to generate indexed
5619      load/store instructions.  A value of -1 indicates that the
5620      initial value of this variable has not been overwritten. During
5621      compilation, TARGET_AVOID_XFORM is either 0 or 1. */
5622   if (TARGET_AVOID_XFORM == -1)
5623     /* Avoid indexed addressing when targeting Power6 in order to avoid the
5624      DERAT mispredict penalty.  However the LVE and STVE altivec instructions
5625      need indexed accesses and the type used is the scalar type of the element
5626      being loaded or stored.  */
5627     TARGET_AVOID_XFORM = (rs6000_cpu == PROCESSOR_POWER6 && TARGET_CMPB
5628 			  && !TARGET_ALTIVEC);
5629 
5630   /* Set the -mrecip options.  */
5631   if (rs6000_recip_name)
5632     {
5633       char *p = ASTRDUP (rs6000_recip_name);
5634       char *q;
5635       unsigned int mask, i;
5636       bool invert;
5637 
5638       while ((q = strtok (p, ",")) != NULL)
5639 	{
5640 	  p = NULL;
5641 	  if (*q == '!')
5642 	    {
5643 	      invert = true;
5644 	      q++;
5645 	    }
5646 	  else
5647 	    invert = false;
5648 
5649 	  if (!strcmp (q, "default"))
5650 	    mask = ((TARGET_RECIP_PRECISION)
5651 		    ? RECIP_HIGH_PRECISION : RECIP_LOW_PRECISION);
5652 	  else
5653 	    {
5654 	      for (i = 0; i < ARRAY_SIZE (recip_options); i++)
5655 		if (!strcmp (q, recip_options[i].string))
5656 		  {
5657 		    mask = recip_options[i].mask;
5658 		    break;
5659 		  }
5660 
5661 	      if (i == ARRAY_SIZE (recip_options))
5662 		{
5663 		  error ("unknown option for -mrecip=%s", q);
5664 		  invert = false;
5665 		  mask = 0;
5666 		  ret = false;
5667 		}
5668 	    }
5669 
5670 	  if (invert)
5671 	    rs6000_recip_control &= ~mask;
5672 	  else
5673 	    rs6000_recip_control |= mask;
5674 	}
5675     }
5676 
5677   /* Set the builtin mask of the various options used that could affect which
5678      builtins were used.  In the past we used target_flags, but we've run out
5679      of bits, and some options like SPE and PAIRED are no longer in
5680      target_flags.  */
5681   rs6000_builtin_mask = rs6000_builtin_mask_calculate ();
5682   if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET)
5683     rs6000_print_builtin_options (stderr, 0, "builtin mask",
5684 				  rs6000_builtin_mask);
5685 
5686   /* Initialize all of the registers.  */
5687   rs6000_init_hard_regno_mode_ok (global_init_p);
5688 
5689   /* Save the initial options in case the user does function specific options */
5690   if (global_init_p)
5691     target_option_default_node = target_option_current_node
5692       = build_target_option_node (&global_options);
5693 
5694   /* If not explicitly specified via option, decide whether to generate the
5695      extra blr's required to preserve the link stack on some cpus (eg, 476).  */
5696   if (TARGET_LINK_STACK == -1)
5697     SET_TARGET_LINK_STACK (rs6000_cpu == PROCESSOR_PPC476 && flag_pic);
5698 
5699   return ret;
5700 }
5701 
5702 /* Implement TARGET_OPTION_OVERRIDE.  On the RS/6000 this is used to
5703    define the target cpu type.  */
5704 
5705 static void
rs6000_option_override(void)5706 rs6000_option_override (void)
5707 {
5708   (void) rs6000_option_override_internal (true);
5709 }
5710 
5711 
5712 /* Implement targetm.vectorize.builtin_mask_for_load.  */
5713 static tree
rs6000_builtin_mask_for_load(void)5714 rs6000_builtin_mask_for_load (void)
5715 {
5716   /* Don't use lvsl/vperm for P8 and similarly efficient machines.  */
5717   if ((TARGET_ALTIVEC && !TARGET_VSX)
5718       || (TARGET_VSX && !TARGET_EFFICIENT_UNALIGNED_VSX))
5719     return altivec_builtin_mask_for_load;
5720   else
5721     return 0;
5722 }
5723 
5724 /* Implement LOOP_ALIGN. */
5725 int
rs6000_loop_align(rtx label)5726 rs6000_loop_align (rtx label)
5727 {
5728   basic_block bb;
5729   int ninsns;
5730 
5731   /* Don't override loop alignment if -falign-loops was specified. */
5732   if (!can_override_loop_align)
5733     return align_loops_log;
5734 
5735   bb = BLOCK_FOR_INSN (label);
5736   ninsns = num_loop_insns(bb->loop_father);
5737 
5738   /* Align small loops to 32 bytes to fit in an icache sector, otherwise return default. */
5739   if (ninsns > 4 && ninsns <= 8
5740       && (rs6000_cpu == PROCESSOR_POWER4
5741 	  || rs6000_cpu == PROCESSOR_POWER5
5742 	  || rs6000_cpu == PROCESSOR_POWER6
5743 	  || rs6000_cpu == PROCESSOR_POWER7
5744 	  || rs6000_cpu == PROCESSOR_POWER8
5745 	  || rs6000_cpu == PROCESSOR_POWER9))
5746     return 5;
5747   else
5748     return align_loops_log;
5749 }
5750 
5751 /* Implement TARGET_LOOP_ALIGN_MAX_SKIP. */
5752 static int
rs6000_loop_align_max_skip(rtx_insn * label)5753 rs6000_loop_align_max_skip (rtx_insn *label)
5754 {
5755   return (1 << rs6000_loop_align (label)) - 1;
5756 }
5757 
5758 /* Return true iff, data reference of TYPE can reach vector alignment (16)
5759    after applying N number of iterations.  This routine does not determine
5760    how may iterations are required to reach desired alignment.  */
5761 
5762 static bool
rs6000_vector_alignment_reachable(const_tree type ATTRIBUTE_UNUSED,bool is_packed)5763 rs6000_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed)
5764 {
5765   if (is_packed)
5766     return false;
5767 
5768   if (TARGET_32BIT)
5769     {
5770       if (rs6000_alignment_flags == MASK_ALIGN_NATURAL)
5771         return true;
5772 
5773       if (rs6000_alignment_flags ==  MASK_ALIGN_POWER)
5774         return true;
5775 
5776       return false;
5777     }
5778   else
5779     {
5780       if (TARGET_MACHO)
5781         return false;
5782 
5783       /* Assuming that all other types are naturally aligned. CHECKME!  */
5784       return true;
5785     }
5786 }
5787 
5788 /* Return true if the vector misalignment factor is supported by the
5789    target.  */
5790 static bool
rs6000_builtin_support_vector_misalignment(machine_mode mode,const_tree type,int misalignment,bool is_packed)5791 rs6000_builtin_support_vector_misalignment (machine_mode mode,
5792 					    const_tree type,
5793 					    int misalignment,
5794 					    bool is_packed)
5795 {
5796   if (TARGET_VSX)
5797     {
5798       if (TARGET_EFFICIENT_UNALIGNED_VSX)
5799 	return true;
5800 
5801       /* Return if movmisalign pattern is not supported for this mode.  */
5802       if (optab_handler (movmisalign_optab, mode) == CODE_FOR_nothing)
5803         return false;
5804 
5805       if (misalignment == -1)
5806 	{
5807 	  /* Misalignment factor is unknown at compile time but we know
5808 	     it's word aligned.  */
5809 	  if (rs6000_vector_alignment_reachable (type, is_packed))
5810             {
5811               int element_size = TREE_INT_CST_LOW (TYPE_SIZE (type));
5812 
5813               if (element_size == 64 || element_size == 32)
5814                return true;
5815             }
5816 
5817 	  return false;
5818 	}
5819 
5820       /* VSX supports word-aligned vector.  */
5821       if (misalignment % 4 == 0)
5822 	return true;
5823     }
5824   return false;
5825 }
5826 
5827 /* Implement targetm.vectorize.builtin_vectorization_cost.  */
5828 static int
rs6000_builtin_vectorization_cost(enum vect_cost_for_stmt type_of_cost,tree vectype,int misalign)5829 rs6000_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
5830                                    tree vectype, int misalign)
5831 {
5832   unsigned elements;
5833   tree elem_type;
5834 
5835   switch (type_of_cost)
5836     {
5837       case scalar_stmt:
5838       case scalar_load:
5839       case scalar_store:
5840       case vector_stmt:
5841       case vector_load:
5842       case vector_store:
5843       case vec_to_scalar:
5844       case scalar_to_vec:
5845       case cond_branch_not_taken:
5846         return 1;
5847 
5848       case vec_perm:
5849 	if (TARGET_VSX)
5850 	  return 3;
5851 	else
5852 	  return 1;
5853 
5854       case vec_promote_demote:
5855         if (TARGET_VSX)
5856           return 4;
5857         else
5858           return 1;
5859 
5860       case cond_branch_taken:
5861         return 3;
5862 
5863       case unaligned_load:
5864       case vector_gather_load:
5865 	if (TARGET_P9_VECTOR)
5866 	  return 3;
5867 
5868 	if (TARGET_EFFICIENT_UNALIGNED_VSX)
5869 	  return 1;
5870 
5871         if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
5872           {
5873             elements = TYPE_VECTOR_SUBPARTS (vectype);
5874             if (elements == 2)
5875               /* Double word aligned.  */
5876               return 2;
5877 
5878             if (elements == 4)
5879               {
5880                 switch (misalign)
5881                   {
5882                     case 8:
5883                       /* Double word aligned.  */
5884                       return 2;
5885 
5886                     case -1:
5887                       /* Unknown misalignment.  */
5888                     case 4:
5889                     case 12:
5890                       /* Word aligned.  */
5891                       return 22;
5892 
5893                     default:
5894                       gcc_unreachable ();
5895                   }
5896               }
5897           }
5898 
5899         if (TARGET_ALTIVEC)
5900           /* Misaligned loads are not supported.  */
5901           gcc_unreachable ();
5902 
5903         return 2;
5904 
5905       case unaligned_store:
5906       case vector_scatter_store:
5907 	if (TARGET_EFFICIENT_UNALIGNED_VSX)
5908 	  return 1;
5909 
5910         if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
5911           {
5912             elements = TYPE_VECTOR_SUBPARTS (vectype);
5913             if (elements == 2)
5914               /* Double word aligned.  */
5915               return 2;
5916 
5917             if (elements == 4)
5918               {
5919                 switch (misalign)
5920                   {
5921                     case 8:
5922                       /* Double word aligned.  */
5923                       return 2;
5924 
5925                     case -1:
5926                       /* Unknown misalignment.  */
5927                     case 4:
5928                     case 12:
5929                       /* Word aligned.  */
5930                       return 23;
5931 
5932                     default:
5933                       gcc_unreachable ();
5934                   }
5935               }
5936           }
5937 
5938         if (TARGET_ALTIVEC)
5939           /* Misaligned stores are not supported.  */
5940           gcc_unreachable ();
5941 
5942         return 2;
5943 
5944       case vec_construct:
5945 	/* This is a rough approximation assuming non-constant elements
5946 	   constructed into a vector via element insertion.  FIXME:
5947 	   vec_construct is not granular enough for uniformly good
5948 	   decisions.  If the initialization is a splat, this is
5949 	   cheaper than we estimate.  Improve this someday.  */
5950 	elem_type = TREE_TYPE (vectype);
5951 	/* 32-bit vectors loaded into registers are stored as double
5952 	   precision, so we need 2 permutes, 2 converts, and 1 merge
5953 	   to construct a vector of short floats from them.  */
5954 	if (SCALAR_FLOAT_TYPE_P (elem_type)
5955 	    && TYPE_PRECISION (elem_type) == 32)
5956 	  return 5;
5957 	/* On POWER9, integer vector types are built up in GPRs and then
5958 	   use a direct move (2 cycles).  For POWER8 this is even worse,
5959 	   as we need two direct moves and a merge, and the direct moves
5960 	   are five cycles.  */
5961 	else if (INTEGRAL_TYPE_P (elem_type))
5962 	  {
5963 	    if (TARGET_P9_VECTOR)
5964 	      return TYPE_VECTOR_SUBPARTS (vectype) - 1 + 2;
5965 	    else
5966 	      return TYPE_VECTOR_SUBPARTS (vectype) - 1 + 11;
5967 	  }
5968 	else
5969 	  /* V2DFmode doesn't need a direct move.  */
5970 	  return 2;
5971 
5972       default:
5973         gcc_unreachable ();
5974     }
5975 }
5976 
5977 /* Implement targetm.vectorize.preferred_simd_mode.  */
5978 
5979 static machine_mode
rs6000_preferred_simd_mode(scalar_mode mode)5980 rs6000_preferred_simd_mode (scalar_mode mode)
5981 {
5982   if (TARGET_VSX)
5983     switch (mode)
5984       {
5985       case E_DFmode:
5986 	return V2DFmode;
5987       default:;
5988       }
5989   if (TARGET_ALTIVEC || TARGET_VSX)
5990     switch (mode)
5991       {
5992       case E_SFmode:
5993 	return V4SFmode;
5994       case E_TImode:
5995 	return V1TImode;
5996       case E_DImode:
5997 	return V2DImode;
5998       case E_SImode:
5999 	return V4SImode;
6000       case E_HImode:
6001 	return V8HImode;
6002       case E_QImode:
6003 	return V16QImode;
6004       default:;
6005       }
6006   if (TARGET_SPE)
6007     switch (mode)
6008       {
6009       case E_SFmode:
6010 	return V2SFmode;
6011       case E_SImode:
6012 	return V2SImode;
6013       default:;
6014       }
6015   if (TARGET_PAIRED_FLOAT
6016       && mode == SFmode)
6017     return V2SFmode;
6018   return word_mode;
6019 }
6020 
6021 typedef struct _rs6000_cost_data
6022 {
6023   struct loop *loop_info;
6024   unsigned cost[3];
6025 } rs6000_cost_data;
6026 
6027 /* Test for likely overcommitment of vector hardware resources.  If a
6028    loop iteration is relatively large, and too large a percentage of
6029    instructions in the loop are vectorized, the cost model may not
6030    adequately reflect delays from unavailable vector resources.
6031    Penalize the loop body cost for this case.  */
6032 
6033 static void
rs6000_density_test(rs6000_cost_data * data)6034 rs6000_density_test (rs6000_cost_data *data)
6035 {
6036   const int DENSITY_PCT_THRESHOLD = 85;
6037   const int DENSITY_SIZE_THRESHOLD = 70;
6038   const int DENSITY_PENALTY = 10;
6039   struct loop *loop = data->loop_info;
6040   basic_block *bbs = get_loop_body (loop);
6041   int nbbs = loop->num_nodes;
6042   int vec_cost = data->cost[vect_body], not_vec_cost = 0;
6043   int i, density_pct;
6044 
6045   for (i = 0; i < nbbs; i++)
6046     {
6047       basic_block bb = bbs[i];
6048       gimple_stmt_iterator gsi;
6049 
6050       for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
6051 	{
6052 	  gimple *stmt = gsi_stmt (gsi);
6053 	  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6054 
6055 	  if (!STMT_VINFO_RELEVANT_P (stmt_info)
6056 	      && !STMT_VINFO_IN_PATTERN_P (stmt_info))
6057 	    not_vec_cost++;
6058 	}
6059     }
6060 
6061   free (bbs);
6062   density_pct = (vec_cost * 100) / (vec_cost + not_vec_cost);
6063 
6064   if (density_pct > DENSITY_PCT_THRESHOLD
6065       && vec_cost + not_vec_cost > DENSITY_SIZE_THRESHOLD)
6066     {
6067       data->cost[vect_body] = vec_cost * (100 + DENSITY_PENALTY) / 100;
6068       if (dump_enabled_p ())
6069 	dump_printf_loc (MSG_NOTE, vect_location,
6070 			 "density %d%%, cost %d exceeds threshold, penalizing "
6071 			 "loop body cost by %d%%", density_pct,
6072 			 vec_cost + not_vec_cost, DENSITY_PENALTY);
6073     }
6074 }
6075 
6076 /* Implement targetm.vectorize.init_cost.  */
6077 
6078 /* For each vectorized loop, this var holds TRUE iff a non-memory vector
6079    instruction is needed by the vectorization.  */
6080 static bool rs6000_vect_nonmem;
6081 
6082 static void *
rs6000_init_cost(struct loop * loop_info)6083 rs6000_init_cost (struct loop *loop_info)
6084 {
6085   rs6000_cost_data *data = XNEW (struct _rs6000_cost_data);
6086   data->loop_info = loop_info;
6087   data->cost[vect_prologue] = 0;
6088   data->cost[vect_body]     = 0;
6089   data->cost[vect_epilogue] = 0;
6090   rs6000_vect_nonmem = false;
6091   return data;
6092 }
6093 
6094 /* Implement targetm.vectorize.add_stmt_cost.  */
6095 
6096 static unsigned
rs6000_add_stmt_cost(void * data,int count,enum vect_cost_for_stmt kind,struct _stmt_vec_info * stmt_info,int misalign,enum vect_cost_model_location where)6097 rs6000_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
6098 		      struct _stmt_vec_info *stmt_info, int misalign,
6099 		      enum vect_cost_model_location where)
6100 {
6101   rs6000_cost_data *cost_data = (rs6000_cost_data*) data;
6102   unsigned retval = 0;
6103 
6104   if (flag_vect_cost_model)
6105     {
6106       tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
6107       int stmt_cost = rs6000_builtin_vectorization_cost (kind, vectype,
6108 							 misalign);
6109       /* Statements in an inner loop relative to the loop being
6110 	 vectorized are weighted more heavily.  The value here is
6111 	 arbitrary and could potentially be improved with analysis.  */
6112       if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
6113 	count *= 50;  /* FIXME.  */
6114 
6115       retval = (unsigned) (count * stmt_cost);
6116       cost_data->cost[where] += retval;
6117 
6118       /* Check whether we're doing something other than just a copy loop.
6119 	 Not all such loops may be profitably vectorized; see
6120 	 rs6000_finish_cost.  */
6121       if ((kind == vec_to_scalar || kind == vec_perm
6122 	   || kind == vec_promote_demote || kind == vec_construct
6123 	   || kind == scalar_to_vec)
6124 	  || (where == vect_body && kind == vector_stmt))
6125 	rs6000_vect_nonmem = true;
6126     }
6127 
6128   return retval;
6129 }
6130 
6131 /* Implement targetm.vectorize.finish_cost.  */
6132 
6133 static void
rs6000_finish_cost(void * data,unsigned * prologue_cost,unsigned * body_cost,unsigned * epilogue_cost)6134 rs6000_finish_cost (void *data, unsigned *prologue_cost,
6135 		    unsigned *body_cost, unsigned *epilogue_cost)
6136 {
6137   rs6000_cost_data *cost_data = (rs6000_cost_data*) data;
6138 
6139   if (cost_data->loop_info)
6140     rs6000_density_test (cost_data);
6141 
6142   /* Don't vectorize minimum-vectorization-factor, simple copy loops
6143      that require versioning for any reason.  The vectorization is at
6144      best a wash inside the loop, and the versioning checks make
6145      profitability highly unlikely and potentially quite harmful.  */
6146   if (cost_data->loop_info)
6147     {
6148       loop_vec_info vec_info = loop_vec_info_for_loop (cost_data->loop_info);
6149       if (!rs6000_vect_nonmem
6150 	  && LOOP_VINFO_VECT_FACTOR (vec_info) == 2
6151 	  && LOOP_REQUIRES_VERSIONING (vec_info))
6152 	cost_data->cost[vect_body] += 10000;
6153     }
6154 
6155   *prologue_cost = cost_data->cost[vect_prologue];
6156   *body_cost     = cost_data->cost[vect_body];
6157   *epilogue_cost = cost_data->cost[vect_epilogue];
6158 }
6159 
6160 /* Implement targetm.vectorize.destroy_cost_data.  */
6161 
6162 static void
rs6000_destroy_cost_data(void * data)6163 rs6000_destroy_cost_data (void *data)
6164 {
6165   free (data);
6166 }
6167 
6168 /* Handler for the Mathematical Acceleration Subsystem (mass) interface to a
6169    library with vectorized intrinsics.  */
6170 
6171 static tree
rs6000_builtin_vectorized_libmass(combined_fn fn,tree type_out,tree type_in)6172 rs6000_builtin_vectorized_libmass (combined_fn fn, tree type_out,
6173 				   tree type_in)
6174 {
6175   char name[32];
6176   const char *suffix = NULL;
6177   tree fntype, new_fndecl, bdecl = NULL_TREE;
6178   int n_args = 1;
6179   const char *bname;
6180   machine_mode el_mode, in_mode;
6181   int n, in_n;
6182 
6183   /* Libmass is suitable for unsafe math only as it does not correctly support
6184      parts of IEEE with the required precision such as denormals.  Only support
6185      it if we have VSX to use the simd d2 or f4 functions.
6186      XXX: Add variable length support.  */
6187   if (!flag_unsafe_math_optimizations || !TARGET_VSX)
6188     return NULL_TREE;
6189 
6190   el_mode = TYPE_MODE (TREE_TYPE (type_out));
6191   n = TYPE_VECTOR_SUBPARTS (type_out);
6192   in_mode = TYPE_MODE (TREE_TYPE (type_in));
6193   in_n = TYPE_VECTOR_SUBPARTS (type_in);
6194   if (el_mode != in_mode
6195       || n != in_n)
6196     return NULL_TREE;
6197 
6198   switch (fn)
6199     {
6200     CASE_CFN_ATAN2:
6201     CASE_CFN_HYPOT:
6202     CASE_CFN_POW:
6203       n_args = 2;
6204       gcc_fallthrough ();
6205 
6206     CASE_CFN_ACOS:
6207     CASE_CFN_ACOSH:
6208     CASE_CFN_ASIN:
6209     CASE_CFN_ASINH:
6210     CASE_CFN_ATAN:
6211     CASE_CFN_ATANH:
6212     CASE_CFN_CBRT:
6213     CASE_CFN_COS:
6214     CASE_CFN_COSH:
6215     CASE_CFN_ERF:
6216     CASE_CFN_ERFC:
6217     CASE_CFN_EXP2:
6218     CASE_CFN_EXP:
6219     CASE_CFN_EXPM1:
6220     CASE_CFN_LGAMMA:
6221     CASE_CFN_LOG10:
6222     CASE_CFN_LOG1P:
6223     CASE_CFN_LOG2:
6224     CASE_CFN_LOG:
6225     CASE_CFN_SIN:
6226     CASE_CFN_SINH:
6227     CASE_CFN_SQRT:
6228     CASE_CFN_TAN:
6229     CASE_CFN_TANH:
6230       if (el_mode == DFmode && n == 2)
6231 	{
6232 	  bdecl = mathfn_built_in (double_type_node, fn);
6233 	  suffix = "d2";				/* pow -> powd2 */
6234 	}
6235       else if (el_mode == SFmode && n == 4)
6236 	{
6237 	  bdecl = mathfn_built_in (float_type_node, fn);
6238 	  suffix = "4";					/* powf -> powf4 */
6239 	}
6240       else
6241 	return NULL_TREE;
6242       if (!bdecl)
6243 	return NULL_TREE;
6244       break;
6245 
6246     default:
6247       return NULL_TREE;
6248     }
6249 
6250   gcc_assert (suffix != NULL);
6251   bname = IDENTIFIER_POINTER (DECL_NAME (bdecl));
6252   if (!bname)
6253     return NULL_TREE;
6254 
6255   strcpy (name, bname + sizeof ("__builtin_") - 1);
6256   strcat (name, suffix);
6257 
6258   if (n_args == 1)
6259     fntype = build_function_type_list (type_out, type_in, NULL);
6260   else if (n_args == 2)
6261     fntype = build_function_type_list (type_out, type_in, type_in, NULL);
6262   else
6263     gcc_unreachable ();
6264 
6265   /* Build a function declaration for the vectorized function.  */
6266   new_fndecl = build_decl (BUILTINS_LOCATION,
6267 			   FUNCTION_DECL, get_identifier (name), fntype);
6268   TREE_PUBLIC (new_fndecl) = 1;
6269   DECL_EXTERNAL (new_fndecl) = 1;
6270   DECL_IS_NOVOPS (new_fndecl) = 1;
6271   TREE_READONLY (new_fndecl) = 1;
6272 
6273   return new_fndecl;
6274 }
6275 
6276 /* Returns a function decl for a vectorized version of the builtin function
6277    with builtin function code FN and the result vector type TYPE, or NULL_TREE
6278    if it is not available.  */
6279 
6280 static tree
rs6000_builtin_vectorized_function(unsigned int fn,tree type_out,tree type_in)6281 rs6000_builtin_vectorized_function (unsigned int fn, tree type_out,
6282 				    tree type_in)
6283 {
6284   machine_mode in_mode, out_mode;
6285   int in_n, out_n;
6286 
6287   if (TARGET_DEBUG_BUILTIN)
6288     fprintf (stderr, "rs6000_builtin_vectorized_function (%s, %s, %s)\n",
6289 	     combined_fn_name (combined_fn (fn)),
6290 	     GET_MODE_NAME (TYPE_MODE (type_out)),
6291 	     GET_MODE_NAME (TYPE_MODE (type_in)));
6292 
6293   if (TREE_CODE (type_out) != VECTOR_TYPE
6294       || TREE_CODE (type_in) != VECTOR_TYPE
6295       || !TARGET_VECTORIZE_BUILTINS)
6296     return NULL_TREE;
6297 
6298   out_mode = TYPE_MODE (TREE_TYPE (type_out));
6299   out_n = TYPE_VECTOR_SUBPARTS (type_out);
6300   in_mode = TYPE_MODE (TREE_TYPE (type_in));
6301   in_n = TYPE_VECTOR_SUBPARTS (type_in);
6302 
6303   switch (fn)
6304     {
6305     CASE_CFN_COPYSIGN:
6306       if (VECTOR_UNIT_VSX_P (V2DFmode)
6307 	  && out_mode == DFmode && out_n == 2
6308 	  && in_mode == DFmode && in_n == 2)
6309 	return rs6000_builtin_decls[VSX_BUILTIN_CPSGNDP];
6310       if (VECTOR_UNIT_VSX_P (V4SFmode)
6311 	  && out_mode == SFmode && out_n == 4
6312 	  && in_mode == SFmode && in_n == 4)
6313 	return rs6000_builtin_decls[VSX_BUILTIN_CPSGNSP];
6314       if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
6315 	  && out_mode == SFmode && out_n == 4
6316 	  && in_mode == SFmode && in_n == 4)
6317 	return rs6000_builtin_decls[ALTIVEC_BUILTIN_COPYSIGN_V4SF];
6318       break;
6319     CASE_CFN_CEIL:
6320       if (VECTOR_UNIT_VSX_P (V2DFmode)
6321 	  && out_mode == DFmode && out_n == 2
6322 	  && in_mode == DFmode && in_n == 2)
6323 	return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIP];
6324       if (VECTOR_UNIT_VSX_P (V4SFmode)
6325 	  && out_mode == SFmode && out_n == 4
6326 	  && in_mode == SFmode && in_n == 4)
6327 	return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIP];
6328       if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
6329 	  && out_mode == SFmode && out_n == 4
6330 	  && in_mode == SFmode && in_n == 4)
6331 	return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIP];
6332       break;
6333     CASE_CFN_FLOOR:
6334       if (VECTOR_UNIT_VSX_P (V2DFmode)
6335 	  && out_mode == DFmode && out_n == 2
6336 	  && in_mode == DFmode && in_n == 2)
6337 	return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIM];
6338       if (VECTOR_UNIT_VSX_P (V4SFmode)
6339 	  && out_mode == SFmode && out_n == 4
6340 	  && in_mode == SFmode && in_n == 4)
6341 	return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIM];
6342       if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
6343 	  && out_mode == SFmode && out_n == 4
6344 	  && in_mode == SFmode && in_n == 4)
6345 	return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIM];
6346       break;
6347     CASE_CFN_FMA:
6348       if (VECTOR_UNIT_VSX_P (V2DFmode)
6349 	  && out_mode == DFmode && out_n == 2
6350 	  && in_mode == DFmode && in_n == 2)
6351 	return rs6000_builtin_decls[VSX_BUILTIN_XVMADDDP];
6352       if (VECTOR_UNIT_VSX_P (V4SFmode)
6353 	  && out_mode == SFmode && out_n == 4
6354 	  && in_mode == SFmode && in_n == 4)
6355 	return rs6000_builtin_decls[VSX_BUILTIN_XVMADDSP];
6356       if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
6357 	  && out_mode == SFmode && out_n == 4
6358 	  && in_mode == SFmode && in_n == 4)
6359 	return rs6000_builtin_decls[ALTIVEC_BUILTIN_VMADDFP];
6360       break;
6361     CASE_CFN_TRUNC:
6362       if (VECTOR_UNIT_VSX_P (V2DFmode)
6363 	  && out_mode == DFmode && out_n == 2
6364 	  && in_mode == DFmode && in_n == 2)
6365 	return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIZ];
6366       if (VECTOR_UNIT_VSX_P (V4SFmode)
6367 	  && out_mode == SFmode && out_n == 4
6368 	  && in_mode == SFmode && in_n == 4)
6369 	return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIZ];
6370       if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
6371 	  && out_mode == SFmode && out_n == 4
6372 	  && in_mode == SFmode && in_n == 4)
6373 	return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIZ];
6374       break;
6375     CASE_CFN_NEARBYINT:
6376       if (VECTOR_UNIT_VSX_P (V2DFmode)
6377 	  && flag_unsafe_math_optimizations
6378 	  && out_mode == DFmode && out_n == 2
6379 	  && in_mode == DFmode && in_n == 2)
6380 	return rs6000_builtin_decls[VSX_BUILTIN_XVRDPI];
6381       if (VECTOR_UNIT_VSX_P (V4SFmode)
6382 	  && flag_unsafe_math_optimizations
6383 	  && out_mode == SFmode && out_n == 4
6384 	  && in_mode == SFmode && in_n == 4)
6385 	return rs6000_builtin_decls[VSX_BUILTIN_XVRSPI];
6386       break;
6387     CASE_CFN_RINT:
6388       if (VECTOR_UNIT_VSX_P (V2DFmode)
6389 	  && !flag_trapping_math
6390 	  && out_mode == DFmode && out_n == 2
6391 	  && in_mode == DFmode && in_n == 2)
6392 	return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIC];
6393       if (VECTOR_UNIT_VSX_P (V4SFmode)
6394 	  && !flag_trapping_math
6395 	  && out_mode == SFmode && out_n == 4
6396 	  && in_mode == SFmode && in_n == 4)
6397 	return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIC];
6398       break;
6399     default:
6400       break;
6401     }
6402 
6403   /* Generate calls to libmass if appropriate.  */
6404   if (rs6000_veclib_handler)
6405     return rs6000_veclib_handler (combined_fn (fn), type_out, type_in);
6406 
6407   return NULL_TREE;
6408 }
6409 
6410 /* Implement TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION.  */
6411 
6412 static tree
rs6000_builtin_md_vectorized_function(tree fndecl,tree type_out,tree type_in)6413 rs6000_builtin_md_vectorized_function (tree fndecl, tree type_out,
6414 				       tree type_in)
6415 {
6416   machine_mode in_mode, out_mode;
6417   int in_n, out_n;
6418 
6419   if (TARGET_DEBUG_BUILTIN)
6420     fprintf (stderr, "rs6000_builtin_md_vectorized_function (%s, %s, %s)\n",
6421 	     IDENTIFIER_POINTER (DECL_NAME (fndecl)),
6422 	     GET_MODE_NAME (TYPE_MODE (type_out)),
6423 	     GET_MODE_NAME (TYPE_MODE (type_in)));
6424 
6425   if (TREE_CODE (type_out) != VECTOR_TYPE
6426       || TREE_CODE (type_in) != VECTOR_TYPE
6427       || !TARGET_VECTORIZE_BUILTINS)
6428     return NULL_TREE;
6429 
6430   out_mode = TYPE_MODE (TREE_TYPE (type_out));
6431   out_n = TYPE_VECTOR_SUBPARTS (type_out);
6432   in_mode = TYPE_MODE (TREE_TYPE (type_in));
6433   in_n = TYPE_VECTOR_SUBPARTS (type_in);
6434 
6435   enum rs6000_builtins fn
6436     = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
6437   switch (fn)
6438     {
6439     case RS6000_BUILTIN_RSQRTF:
6440       if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)
6441 	  && out_mode == SFmode && out_n == 4
6442 	  && in_mode == SFmode && in_n == 4)
6443 	return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRSQRTFP];
6444       break;
6445     case RS6000_BUILTIN_RSQRT:
6446       if (VECTOR_UNIT_VSX_P (V2DFmode)
6447 	  && out_mode == DFmode && out_n == 2
6448 	  && in_mode == DFmode && in_n == 2)
6449 	return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_2DF];
6450       break;
6451     case RS6000_BUILTIN_RECIPF:
6452       if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)
6453 	  && out_mode == SFmode && out_n == 4
6454 	  && in_mode == SFmode && in_n == 4)
6455 	return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRECIPFP];
6456       break;
6457     case RS6000_BUILTIN_RECIP:
6458       if (VECTOR_UNIT_VSX_P (V2DFmode)
6459 	  && out_mode == DFmode && out_n == 2
6460 	  && in_mode == DFmode && in_n == 2)
6461 	return rs6000_builtin_decls[VSX_BUILTIN_RECIP_V2DF];
6462       break;
6463     default:
6464       break;
6465     }
6466   return NULL_TREE;
6467 }
6468 
6469 /* Default CPU string for rs6000*_file_start functions.  */
6470 static const char *rs6000_default_cpu;
6471 
6472 /* Do anything needed at the start of the asm file.  */
6473 
6474 static void
rs6000_file_start(void)6475 rs6000_file_start (void)
6476 {
6477   char buffer[80];
6478   const char *start = buffer;
6479   FILE *file = asm_out_file;
6480 
6481   rs6000_default_cpu = TARGET_CPU_DEFAULT;
6482 
6483   default_file_start ();
6484 
6485   if (flag_verbose_asm)
6486     {
6487       sprintf (buffer, "\n%s rs6000/powerpc options:", ASM_COMMENT_START);
6488 
6489       if (rs6000_default_cpu != 0 && rs6000_default_cpu[0] != '\0')
6490 	{
6491 	  fprintf (file, "%s --with-cpu=%s", start, rs6000_default_cpu);
6492 	  start = "";
6493 	}
6494 
6495       if (global_options_set.x_rs6000_cpu_index)
6496 	{
6497 	  fprintf (file, "%s -mcpu=%s", start,
6498 		   processor_target_table[rs6000_cpu_index].name);
6499 	  start = "";
6500 	}
6501 
6502       if (global_options_set.x_rs6000_tune_index)
6503 	{
6504 	  fprintf (file, "%s -mtune=%s", start,
6505 		   processor_target_table[rs6000_tune_index].name);
6506 	  start = "";
6507 	}
6508 
6509       if (PPC405_ERRATUM77)
6510 	{
6511 	  fprintf (file, "%s PPC405CR_ERRATUM77", start);
6512 	  start = "";
6513 	}
6514 
6515 #ifdef USING_ELFOS_H
6516       switch (rs6000_sdata)
6517 	{
6518 	case SDATA_NONE: fprintf (file, "%s -msdata=none", start); start = ""; break;
6519 	case SDATA_DATA: fprintf (file, "%s -msdata=data", start); start = ""; break;
6520 	case SDATA_SYSV: fprintf (file, "%s -msdata=sysv", start); start = ""; break;
6521 	case SDATA_EABI: fprintf (file, "%s -msdata=eabi", start); start = ""; break;
6522 	}
6523 
6524       if (rs6000_sdata && g_switch_value)
6525 	{
6526 	  fprintf (file, "%s -G %d", start,
6527 		   g_switch_value);
6528 	  start = "";
6529 	}
6530 #endif
6531 
6532       if (*start == '\0')
6533 	putc ('\n', file);
6534     }
6535 
6536 #ifdef USING_ELFOS_H
6537   if (!(rs6000_default_cpu && rs6000_default_cpu[0])
6538       && !global_options_set.x_rs6000_cpu_index)
6539     {
6540       fputs ("\t.machine ", asm_out_file);
6541       if ((rs6000_isa_flags & OPTION_MASK_MODULO) != 0)
6542 	fputs ("power9\n", asm_out_file);
6543       else if ((rs6000_isa_flags & OPTION_MASK_DIRECT_MOVE) != 0)
6544 	fputs ("power8\n", asm_out_file);
6545       else if ((rs6000_isa_flags & OPTION_MASK_POPCNTD) != 0)
6546 	fputs ("power7\n", asm_out_file);
6547       else if ((rs6000_isa_flags & OPTION_MASK_CMPB) != 0)
6548 	fputs ("power6\n", asm_out_file);
6549       else if ((rs6000_isa_flags & OPTION_MASK_POPCNTB) != 0)
6550 	fputs ("power5\n", asm_out_file);
6551       else if ((rs6000_isa_flags & OPTION_MASK_MFCRF) != 0)
6552 	fputs ("power4\n", asm_out_file);
6553       else if ((rs6000_isa_flags & OPTION_MASK_POWERPC64) != 0)
6554 	fputs ("ppc64\n", asm_out_file);
6555       else
6556 	fputs ("ppc\n", asm_out_file);
6557     }
6558 #endif
6559 
6560   if (DEFAULT_ABI == ABI_ELFv2)
6561     fprintf (file, "\t.abiversion 2\n");
6562 }
6563 
6564 
6565 /* Return nonzero if this function is known to have a null epilogue.  */
6566 
6567 int
direct_return(void)6568 direct_return (void)
6569 {
6570   if (reload_completed)
6571     {
6572       rs6000_stack_t *info = rs6000_stack_info ();
6573 
6574       if (info->first_gp_reg_save == 32
6575 	  && info->first_fp_reg_save == 64
6576 	  && info->first_altivec_reg_save == LAST_ALTIVEC_REGNO + 1
6577 	  && ! info->lr_save_p
6578 	  && ! info->cr_save_p
6579 	  && info->vrsave_size == 0
6580 	  && ! info->push_p)
6581 	return 1;
6582     }
6583 
6584   return 0;
6585 }
6586 
6587 /* Return the number of instructions it takes to form a constant in an
6588    integer register.  */
6589 
6590 int
num_insns_constant_wide(HOST_WIDE_INT value)6591 num_insns_constant_wide (HOST_WIDE_INT value)
6592 {
6593   /* signed constant loadable with addi */
6594   if (((unsigned HOST_WIDE_INT) value + 0x8000) < 0x10000)
6595     return 1;
6596 
6597   /* constant loadable with addis */
6598   else if ((value & 0xffff) == 0
6599 	   && (value >> 31 == -1 || value >> 31 == 0))
6600     return 1;
6601 
6602   else if (TARGET_POWERPC64)
6603     {
6604       HOST_WIDE_INT low  = ((value & 0xffffffff) ^ 0x80000000) - 0x80000000;
6605       HOST_WIDE_INT high = value >> 31;
6606 
6607       if (high == 0 || high == -1)
6608 	return 2;
6609 
6610       high >>= 1;
6611 
6612       if (low == 0)
6613 	return num_insns_constant_wide (high) + 1;
6614       else if (high == 0)
6615 	return num_insns_constant_wide (low) + 1;
6616       else
6617 	return (num_insns_constant_wide (high)
6618 		+ num_insns_constant_wide (low) + 1);
6619     }
6620 
6621   else
6622     return 2;
6623 }
6624 
6625 int
num_insns_constant(rtx op,machine_mode mode)6626 num_insns_constant (rtx op, machine_mode mode)
6627 {
6628   HOST_WIDE_INT low, high;
6629 
6630   switch (GET_CODE (op))
6631     {
6632     case CONST_INT:
6633       if ((INTVAL (op) >> 31) != 0 && (INTVAL (op) >> 31) != -1
6634 	  && rs6000_is_valid_and_mask (op, mode))
6635 	return 2;
6636       else
6637 	return num_insns_constant_wide (INTVAL (op));
6638 
6639     case CONST_WIDE_INT:
6640       {
6641 	int i;
6642 	int ins = CONST_WIDE_INT_NUNITS (op) - 1;
6643 	for (i = 0; i < CONST_WIDE_INT_NUNITS (op); i++)
6644 	  ins += num_insns_constant_wide (CONST_WIDE_INT_ELT (op, i));
6645 	return ins;
6646       }
6647 
6648       case CONST_DOUBLE:
6649 	if (mode == SFmode || mode == SDmode)
6650 	  {
6651 	    long l;
6652 
6653 	    if (DECIMAL_FLOAT_MODE_P (mode))
6654 	      REAL_VALUE_TO_TARGET_DECIMAL32
6655 		(*CONST_DOUBLE_REAL_VALUE (op), l);
6656 	    else
6657 	      REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), l);
6658 	    return num_insns_constant_wide ((HOST_WIDE_INT) l);
6659 	  }
6660 
6661 	long l[2];
6662 	if (DECIMAL_FLOAT_MODE_P (mode))
6663 	  REAL_VALUE_TO_TARGET_DECIMAL64 (*CONST_DOUBLE_REAL_VALUE (op), l);
6664 	else
6665 	  REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (op), l);
6666 	high = l[WORDS_BIG_ENDIAN == 0];
6667 	low  = l[WORDS_BIG_ENDIAN != 0];
6668 
6669 	if (TARGET_32BIT)
6670 	  return (num_insns_constant_wide (low)
6671 		  + num_insns_constant_wide (high));
6672 	else
6673 	  {
6674 	    if ((high == 0 && low >= 0)
6675 		|| (high == -1 && low < 0))
6676 	      return num_insns_constant_wide (low);
6677 
6678 	    else if (rs6000_is_valid_and_mask (op, mode))
6679 	      return 2;
6680 
6681 	    else if (low == 0)
6682 	      return num_insns_constant_wide (high) + 1;
6683 
6684 	    else
6685 	      return (num_insns_constant_wide (high)
6686 		      + num_insns_constant_wide (low) + 1);
6687 	  }
6688 
6689     default:
6690       gcc_unreachable ();
6691     }
6692 }
6693 
6694 /* Interpret element ELT of the CONST_VECTOR OP as an integer value.
6695    If the mode of OP is MODE_VECTOR_INT, this simply returns the
6696    corresponding element of the vector, but for V4SFmode and V2SFmode,
6697    the corresponding "float" is interpreted as an SImode integer.  */
6698 
6699 HOST_WIDE_INT
const_vector_elt_as_int(rtx op,unsigned int elt)6700 const_vector_elt_as_int (rtx op, unsigned int elt)
6701 {
6702   rtx tmp;
6703 
6704   /* We can't handle V2DImode and V2DFmode vector constants here yet.  */
6705   gcc_assert (GET_MODE (op) != V2DImode
6706 	      && GET_MODE (op) != V2DFmode);
6707 
6708   tmp = CONST_VECTOR_ELT (op, elt);
6709   if (GET_MODE (op) == V4SFmode
6710       || GET_MODE (op) == V2SFmode)
6711     tmp = gen_lowpart (SImode, tmp);
6712   return INTVAL (tmp);
6713 }
6714 
6715 /* Return true if OP can be synthesized with a particular vspltisb, vspltish
6716    or vspltisw instruction.  OP is a CONST_VECTOR.  Which instruction is used
6717    depends on STEP and COPIES, one of which will be 1.  If COPIES > 1,
6718    all items are set to the same value and contain COPIES replicas of the
6719    vsplt's operand; if STEP > 1, one in STEP elements is set to the vsplt's
6720    operand and the others are set to the value of the operand's msb.  */
6721 
6722 static bool
vspltis_constant(rtx op,unsigned step,unsigned copies)6723 vspltis_constant (rtx op, unsigned step, unsigned copies)
6724 {
6725   machine_mode mode = GET_MODE (op);
6726   machine_mode inner = GET_MODE_INNER (mode);
6727 
6728   unsigned i;
6729   unsigned nunits;
6730   unsigned bitsize;
6731   unsigned mask;
6732 
6733   HOST_WIDE_INT val;
6734   HOST_WIDE_INT splat_val;
6735   HOST_WIDE_INT msb_val;
6736 
6737   if (mode == V2DImode || mode == V2DFmode || mode == V1TImode)
6738     return false;
6739 
6740   nunits = GET_MODE_NUNITS (mode);
6741   bitsize = GET_MODE_BITSIZE (inner);
6742   mask = GET_MODE_MASK (inner);
6743 
6744   val = const_vector_elt_as_int (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0);
6745   splat_val = val;
6746   msb_val = val >= 0 ? 0 : -1;
6747 
6748   /* Construct the value to be splatted, if possible.  If not, return 0.  */
6749   for (i = 2; i <= copies; i *= 2)
6750     {
6751       HOST_WIDE_INT small_val;
6752       bitsize /= 2;
6753       small_val = splat_val >> bitsize;
6754       mask >>= bitsize;
6755       if (splat_val != ((HOST_WIDE_INT)
6756           ((unsigned HOST_WIDE_INT) small_val << bitsize)
6757           | (small_val & mask)))
6758 	return false;
6759       splat_val = small_val;
6760     }
6761 
6762   /* Check if SPLAT_VAL can really be the operand of a vspltis[bhw].  */
6763   if (EASY_VECTOR_15 (splat_val))
6764     ;
6765 
6766   /* Also check if we can splat, and then add the result to itself.  Do so if
6767      the value is positive, of if the splat instruction is using OP's mode;
6768      for splat_val < 0, the splat and the add should use the same mode.  */
6769   else if (EASY_VECTOR_15_ADD_SELF (splat_val)
6770            && (splat_val >= 0 || (step == 1 && copies == 1)))
6771     ;
6772 
6773   /* Also check if are loading up the most significant bit which can be done by
6774      loading up -1 and shifting the value left by -1.  */
6775   else if (EASY_VECTOR_MSB (splat_val, inner))
6776     ;
6777 
6778   else
6779     return false;
6780 
6781   /* Check if VAL is present in every STEP-th element, and the
6782      other elements are filled with its most significant bit.  */
6783   for (i = 1; i < nunits; ++i)
6784     {
6785       HOST_WIDE_INT desired_val;
6786       unsigned elt = BYTES_BIG_ENDIAN ? nunits - 1 - i : i;
6787       if ((i & (step - 1)) == 0)
6788 	desired_val = val;
6789       else
6790 	desired_val = msb_val;
6791 
6792       if (desired_val != const_vector_elt_as_int (op, elt))
6793 	return false;
6794     }
6795 
6796   return true;
6797 }
6798 
6799 /* Like vsplitis_constant, but allow the value to be shifted left with a VSLDOI
6800    instruction, filling in the bottom elements with 0 or -1.
6801 
6802    Return 0 if the constant cannot be generated with VSLDOI.  Return positive
6803    for the number of zeroes to shift in, or negative for the number of 0xff
6804    bytes to shift in.
6805 
6806    OP is a CONST_VECTOR.  */
6807 
6808 int
vspltis_shifted(rtx op)6809 vspltis_shifted (rtx op)
6810 {
6811   machine_mode mode = GET_MODE (op);
6812   machine_mode inner = GET_MODE_INNER (mode);
6813 
6814   unsigned i, j;
6815   unsigned nunits;
6816   unsigned mask;
6817 
6818   HOST_WIDE_INT val;
6819 
6820   if (mode != V16QImode && mode != V8HImode && mode != V4SImode)
6821     return false;
6822 
6823   /* We need to create pseudo registers to do the shift, so don't recognize
6824      shift vector constants after reload.  */
6825   if (!can_create_pseudo_p ())
6826     return false;
6827 
6828   nunits = GET_MODE_NUNITS (mode);
6829   mask = GET_MODE_MASK (inner);
6830 
6831   val = const_vector_elt_as_int (op, BYTES_BIG_ENDIAN ? 0 : nunits - 1);
6832 
6833   /* Check if the value can really be the operand of a vspltis[bhw].  */
6834   if (EASY_VECTOR_15 (val))
6835     ;
6836 
6837   /* Also check if we are loading up the most significant bit which can be done
6838      by loading up -1 and shifting the value left by -1.  */
6839   else if (EASY_VECTOR_MSB (val, inner))
6840     ;
6841 
6842   else
6843     return 0;
6844 
6845   /* Check if VAL is present in every STEP-th element until we find elements
6846      that are 0 or all 1 bits.  */
6847   for (i = 1; i < nunits; ++i)
6848     {
6849       unsigned elt = BYTES_BIG_ENDIAN ? i : nunits - 1 - i;
6850       HOST_WIDE_INT elt_val = const_vector_elt_as_int (op, elt);
6851 
6852       /* If the value isn't the splat value, check for the remaining elements
6853 	 being 0/-1.  */
6854       if (val != elt_val)
6855 	{
6856 	  if (elt_val == 0)
6857 	    {
6858 	      for (j = i+1; j < nunits; ++j)
6859 		{
6860 		  unsigned elt2 = BYTES_BIG_ENDIAN ? j : nunits - 1 - j;
6861 		  if (const_vector_elt_as_int (op, elt2) != 0)
6862 		    return 0;
6863 		}
6864 
6865 	      return (nunits - i) * GET_MODE_SIZE (inner);
6866 	    }
6867 
6868 	  else if ((elt_val & mask) == mask)
6869 	    {
6870 	      for (j = i+1; j < nunits; ++j)
6871 		{
6872 		  unsigned elt2 = BYTES_BIG_ENDIAN ? j : nunits - 1 - j;
6873 		  if ((const_vector_elt_as_int (op, elt2) & mask) != mask)
6874 		    return 0;
6875 		}
6876 
6877 	      return -((nunits - i) * GET_MODE_SIZE (inner));
6878 	    }
6879 
6880 	  else
6881 	    return 0;
6882 	}
6883     }
6884 
6885   /* If all elements are equal, we don't need to do VLSDOI.  */
6886   return 0;
6887 }
6888 
6889 
6890 /* Return true if OP is of the given MODE and can be synthesized
6891    with a vspltisb, vspltish or vspltisw.  */
6892 
6893 bool
easy_altivec_constant(rtx op,machine_mode mode)6894 easy_altivec_constant (rtx op, machine_mode mode)
6895 {
6896   unsigned step, copies;
6897 
6898   if (mode == VOIDmode)
6899     mode = GET_MODE (op);
6900   else if (mode != GET_MODE (op))
6901     return false;
6902 
6903   /* V2DI/V2DF was added with VSX.  Only allow 0 and all 1's as easy
6904      constants.  */
6905   if (mode == V2DFmode)
6906     return zero_constant (op, mode);
6907 
6908   else if (mode == V2DImode)
6909     {
6910       if (GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_INT
6911 	  || GET_CODE (CONST_VECTOR_ELT (op, 1)) != CONST_INT)
6912 	return false;
6913 
6914       if (zero_constant (op, mode))
6915 	return true;
6916 
6917       if (INTVAL (CONST_VECTOR_ELT (op, 0)) == -1
6918 	  && INTVAL (CONST_VECTOR_ELT (op, 1)) == -1)
6919 	return true;
6920 
6921       return false;
6922     }
6923 
6924   /* V1TImode is a special container for TImode.  Ignore for now.  */
6925   else if (mode == V1TImode)
6926     return false;
6927 
6928   /* Start with a vspltisw.  */
6929   step = GET_MODE_NUNITS (mode) / 4;
6930   copies = 1;
6931 
6932   if (vspltis_constant (op, step, copies))
6933     return true;
6934 
6935   /* Then try with a vspltish.  */
6936   if (step == 1)
6937     copies <<= 1;
6938   else
6939     step >>= 1;
6940 
6941   if (vspltis_constant (op, step, copies))
6942     return true;
6943 
6944   /* And finally a vspltisb.  */
6945   if (step == 1)
6946     copies <<= 1;
6947   else
6948     step >>= 1;
6949 
6950   if (vspltis_constant (op, step, copies))
6951     return true;
6952 
6953   if (vspltis_shifted (op) != 0)
6954     return true;
6955 
6956   return false;
6957 }
6958 
6959 /* Generate a VEC_DUPLICATE representing a vspltis[bhw] instruction whose
6960    result is OP.  Abort if it is not possible.  */
6961 
6962 rtx
gen_easy_altivec_constant(rtx op)6963 gen_easy_altivec_constant (rtx op)
6964 {
6965   machine_mode mode = GET_MODE (op);
6966   int nunits = GET_MODE_NUNITS (mode);
6967   rtx val = CONST_VECTOR_ELT (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0);
6968   unsigned step = nunits / 4;
6969   unsigned copies = 1;
6970 
6971   /* Start with a vspltisw.  */
6972   if (vspltis_constant (op, step, copies))
6973     return gen_rtx_VEC_DUPLICATE (V4SImode, gen_lowpart (SImode, val));
6974 
6975   /* Then try with a vspltish.  */
6976   if (step == 1)
6977     copies <<= 1;
6978   else
6979     step >>= 1;
6980 
6981   if (vspltis_constant (op, step, copies))
6982     return gen_rtx_VEC_DUPLICATE (V8HImode, gen_lowpart (HImode, val));
6983 
6984   /* And finally a vspltisb.  */
6985   if (step == 1)
6986     copies <<= 1;
6987   else
6988     step >>= 1;
6989 
6990   if (vspltis_constant (op, step, copies))
6991     return gen_rtx_VEC_DUPLICATE (V16QImode, gen_lowpart (QImode, val));
6992 
6993   gcc_unreachable ();
6994 }
6995 
6996 /* Return true if OP is of the given MODE and can be synthesized with ISA 3.0
6997    instructions (xxspltib, vupkhsb/vextsb2w/vextb2d).
6998 
6999    Return the number of instructions needed (1 or 2) into the address pointed
7000    via NUM_INSNS_PTR.
7001 
7002    Return the constant that is being split via CONSTANT_PTR.  */
7003 
7004 bool
xxspltib_constant_p(rtx op,machine_mode mode,int * num_insns_ptr,int * constant_ptr)7005 xxspltib_constant_p (rtx op,
7006 		     machine_mode mode,
7007 		     int *num_insns_ptr,
7008 		     int *constant_ptr)
7009 {
7010   size_t nunits = GET_MODE_NUNITS (mode);
7011   size_t i;
7012   HOST_WIDE_INT value;
7013   rtx element;
7014 
7015   /* Set the returned values to out of bound values.  */
7016   *num_insns_ptr = -1;
7017   *constant_ptr = 256;
7018 
7019   if (!TARGET_P9_VECTOR)
7020     return false;
7021 
7022   if (mode == VOIDmode)
7023     mode = GET_MODE (op);
7024 
7025   else if (mode != GET_MODE (op) && GET_MODE (op) != VOIDmode)
7026     return false;
7027 
7028   /* Handle (vec_duplicate <constant>).  */
7029   if (GET_CODE (op) == VEC_DUPLICATE)
7030     {
7031       if (mode != V16QImode && mode != V8HImode && mode != V4SImode
7032 	  && mode != V2DImode)
7033 	return false;
7034 
7035       element = XEXP (op, 0);
7036       if (!CONST_INT_P (element))
7037 	return false;
7038 
7039       value = INTVAL (element);
7040       if (!IN_RANGE (value, -128, 127))
7041 	return false;
7042     }
7043 
7044   /* Handle (const_vector [...]).  */
7045   else if (GET_CODE (op) == CONST_VECTOR)
7046     {
7047       if (mode != V16QImode && mode != V8HImode && mode != V4SImode
7048 	  && mode != V2DImode)
7049 	return false;
7050 
7051       element = CONST_VECTOR_ELT (op, 0);
7052       if (!CONST_INT_P (element))
7053 	return false;
7054 
7055       value = INTVAL (element);
7056       if (!IN_RANGE (value, -128, 127))
7057 	return false;
7058 
7059       for (i = 1; i < nunits; i++)
7060 	{
7061 	  element = CONST_VECTOR_ELT (op, i);
7062 	  if (!CONST_INT_P (element))
7063 	    return false;
7064 
7065 	  if (value != INTVAL (element))
7066 	    return false;
7067 	}
7068     }
7069 
7070   /* Handle integer constants being loaded into the upper part of the VSX
7071      register as a scalar.  If the value isn't 0/-1, only allow it if the mode
7072      can go in Altivec registers.  Prefer VSPLTISW/VUPKHSW over XXSPLITIB.  */
7073   else if (CONST_INT_P (op))
7074     {
7075       if (!SCALAR_INT_MODE_P (mode))
7076 	return false;
7077 
7078       value = INTVAL (op);
7079       if (!IN_RANGE (value, -128, 127))
7080 	return false;
7081 
7082       if (!IN_RANGE (value, -1, 0))
7083 	{
7084 	  if (!(reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_VALID))
7085 	    return false;
7086 
7087 	  if (EASY_VECTOR_15 (value))
7088 	    return false;
7089 	}
7090     }
7091 
7092   else
7093     return false;
7094 
7095   /* See if we could generate vspltisw/vspltish directly instead of xxspltib +
7096      sign extend.  Special case 0/-1 to allow getting any VSX register instead
7097      of an Altivec register.  */
7098   if ((mode == V4SImode || mode == V8HImode) && !IN_RANGE (value, -1, 0)
7099       && EASY_VECTOR_15 (value))
7100     return false;
7101 
7102   /* Return # of instructions and the constant byte for XXSPLTIB.  */
7103   if (mode == V16QImode)
7104     *num_insns_ptr = 1;
7105 
7106   else if (IN_RANGE (value, -1, 0))
7107     *num_insns_ptr = 1;
7108 
7109   else
7110     *num_insns_ptr = 2;
7111 
7112   *constant_ptr = (int) value;
7113   return true;
7114 }
7115 
7116 const char *
output_vec_const_move(rtx * operands)7117 output_vec_const_move (rtx *operands)
7118 {
7119   int cst, cst2, shift;
7120   machine_mode mode;
7121   rtx dest, vec;
7122 
7123   dest = operands[0];
7124   vec = operands[1];
7125   mode = GET_MODE (dest);
7126 
7127   if (TARGET_VSX)
7128     {
7129       bool dest_vmx_p = ALTIVEC_REGNO_P (REGNO (dest));
7130       int xxspltib_value = 256;
7131       int num_insns = -1;
7132 
7133       if (zero_constant (vec, mode))
7134 	{
7135 	  if (TARGET_P9_VECTOR)
7136 	    return "xxspltib %x0,0";
7137 
7138 	  else if (dest_vmx_p)
7139 	    return "vspltisw %0,0";
7140 
7141 	  else
7142 	    return "xxlxor %x0,%x0,%x0";
7143 	}
7144 
7145       if (all_ones_constant (vec, mode))
7146 	{
7147 	  if (TARGET_P9_VECTOR)
7148 	    return "xxspltib %x0,255";
7149 
7150 	  else if (dest_vmx_p)
7151 	    return "vspltisw %0,-1";
7152 
7153 	  else if (TARGET_P8_VECTOR)
7154 	    return "xxlorc %x0,%x0,%x0";
7155 
7156 	  else
7157 	    gcc_unreachable ();
7158 	}
7159 
7160       if (TARGET_P9_VECTOR
7161 	  && xxspltib_constant_p (vec, mode, &num_insns, &xxspltib_value))
7162 	{
7163 	  if (num_insns == 1)
7164 	    {
7165 	      operands[2] = GEN_INT (xxspltib_value & 0xff);
7166 	      return "xxspltib %x0,%2";
7167 	    }
7168 
7169 	  return "#";
7170 	}
7171     }
7172 
7173   if (TARGET_ALTIVEC)
7174     {
7175       rtx splat_vec;
7176 
7177       gcc_assert (ALTIVEC_REGNO_P (REGNO (dest)));
7178       if (zero_constant (vec, mode))
7179 	return "vspltisw %0,0";
7180 
7181       if (all_ones_constant (vec, mode))
7182 	return "vspltisw %0,-1";
7183 
7184       /* Do we need to construct a value using VSLDOI?  */
7185       shift = vspltis_shifted (vec);
7186       if (shift != 0)
7187 	return "#";
7188 
7189       splat_vec = gen_easy_altivec_constant (vec);
7190       gcc_assert (GET_CODE (splat_vec) == VEC_DUPLICATE);
7191       operands[1] = XEXP (splat_vec, 0);
7192       if (!EASY_VECTOR_15 (INTVAL (operands[1])))
7193 	return "#";
7194 
7195       switch (GET_MODE (splat_vec))
7196 	{
7197 	case E_V4SImode:
7198 	  return "vspltisw %0,%1";
7199 
7200 	case E_V8HImode:
7201 	  return "vspltish %0,%1";
7202 
7203 	case E_V16QImode:
7204 	  return "vspltisb %0,%1";
7205 
7206 	default:
7207 	  gcc_unreachable ();
7208 	}
7209     }
7210 
7211   gcc_assert (TARGET_SPE);
7212 
7213   /* Vector constant 0 is handled as a splitter of V2SI, and in the
7214      pattern of V1DI, V4HI, and V2SF.
7215 
7216      FIXME: We should probably return # and add post reload
7217      splitters for these, but this way is so easy ;-).  */
7218   cst = INTVAL (CONST_VECTOR_ELT (vec, 0));
7219   cst2 = INTVAL (CONST_VECTOR_ELT (vec, 1));
7220   operands[1] = CONST_VECTOR_ELT (vec, 0);
7221   operands[2] = CONST_VECTOR_ELT (vec, 1);
7222   if (cst == cst2)
7223     return "li %0,%1\n\tevmergelo %0,%0,%0";
7224   else if (WORDS_BIG_ENDIAN)
7225     return "li %0,%1\n\tevmergelo %0,%0,%0\n\tli %0,%2";
7226   else
7227     return "li %0,%2\n\tevmergelo %0,%0,%0\n\tli %0,%1";
7228 }
7229 
7230 /* Initialize TARGET of vector PAIRED to VALS.  */
7231 
7232 void
paired_expand_vector_init(rtx target,rtx vals)7233 paired_expand_vector_init (rtx target, rtx vals)
7234 {
7235   machine_mode mode = GET_MODE (target);
7236   int n_elts = GET_MODE_NUNITS (mode);
7237   int n_var = 0;
7238   rtx x, new_rtx, tmp, constant_op, op1, op2;
7239   int i;
7240 
7241   for (i = 0; i < n_elts; ++i)
7242     {
7243       x = XVECEXP (vals, 0, i);
7244       if (!(CONST_SCALAR_INT_P (x) || CONST_DOUBLE_P (x) || CONST_FIXED_P (x)))
7245 	++n_var;
7246     }
7247   if (n_var == 0)
7248     {
7249       /* Load from constant pool.  */
7250       emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
7251       return;
7252     }
7253 
7254   if (n_var == 2)
7255     {
7256       /* The vector is initialized only with non-constants.  */
7257       new_rtx = gen_rtx_VEC_CONCAT (V2SFmode, XVECEXP (vals, 0, 0),
7258 				XVECEXP (vals, 0, 1));
7259 
7260       emit_move_insn (target, new_rtx);
7261       return;
7262     }
7263 
7264   /* One field is non-constant and the other one is a constant.  Load the
7265      constant from the constant pool and use ps_merge instruction to
7266      construct the whole vector.  */
7267   op1 = XVECEXP (vals, 0, 0);
7268   op2 = XVECEXP (vals, 0, 1);
7269 
7270   constant_op = (CONSTANT_P (op1)) ? op1 : op2;
7271 
7272   tmp = gen_reg_rtx (GET_MODE (constant_op));
7273   emit_move_insn (tmp, constant_op);
7274 
7275   if (CONSTANT_P (op1))
7276     new_rtx = gen_rtx_VEC_CONCAT (V2SFmode, tmp, op2);
7277   else
7278     new_rtx = gen_rtx_VEC_CONCAT (V2SFmode, op1, tmp);
7279 
7280   emit_move_insn (target, new_rtx);
7281 }
7282 
7283 void
paired_expand_vector_move(rtx operands[])7284 paired_expand_vector_move (rtx operands[])
7285 {
7286   rtx op0 = operands[0], op1 = operands[1];
7287 
7288   emit_move_insn (op0, op1);
7289 }
7290 
7291 /* Emit vector compare for code RCODE.  DEST is destination, OP1 and
7292    OP2 are two VEC_COND_EXPR operands, CC_OP0 and CC_OP1 are the two
7293    operands for the relation operation COND.  This is a recursive
7294    function.  */
7295 
7296 static void
paired_emit_vector_compare(enum rtx_code rcode,rtx dest,rtx op0,rtx op1,rtx cc_op0,rtx cc_op1)7297 paired_emit_vector_compare (enum rtx_code rcode,
7298                             rtx dest, rtx op0, rtx op1,
7299                             rtx cc_op0, rtx cc_op1)
7300 {
7301   rtx tmp = gen_reg_rtx (V2SFmode);
7302   rtx tmp1, max, min;
7303 
7304   gcc_assert (TARGET_PAIRED_FLOAT);
7305   gcc_assert (GET_MODE (op0) == GET_MODE (op1));
7306 
7307   switch (rcode)
7308     {
7309     case LT:
7310     case LTU:
7311       paired_emit_vector_compare (GE, dest, op1, op0, cc_op0, cc_op1);
7312       return;
7313     case GE:
7314     case GEU:
7315       emit_insn (gen_subv2sf3 (tmp, cc_op0, cc_op1));
7316       emit_insn (gen_selv2sf4 (dest, tmp, op0, op1, CONST0_RTX (SFmode)));
7317       return;
7318     case LE:
7319     case LEU:
7320       paired_emit_vector_compare (GE, dest, op0, op1, cc_op1, cc_op0);
7321       return;
7322     case GT:
7323       paired_emit_vector_compare (LE, dest, op1, op0, cc_op0, cc_op1);
7324       return;
7325     case EQ:
7326       tmp1 = gen_reg_rtx (V2SFmode);
7327       max = gen_reg_rtx (V2SFmode);
7328       min = gen_reg_rtx (V2SFmode);
7329       gen_reg_rtx (V2SFmode);
7330 
7331       emit_insn (gen_subv2sf3 (tmp, cc_op0, cc_op1));
7332       emit_insn (gen_selv2sf4
7333                  (max, tmp, cc_op0, cc_op1, CONST0_RTX (SFmode)));
7334       emit_insn (gen_subv2sf3 (tmp, cc_op1, cc_op0));
7335       emit_insn (gen_selv2sf4
7336                  (min, tmp, cc_op0, cc_op1, CONST0_RTX (SFmode)));
7337       emit_insn (gen_subv2sf3 (tmp1, min, max));
7338       emit_insn (gen_selv2sf4 (dest, tmp1, op0, op1, CONST0_RTX (SFmode)));
7339       return;
7340     case NE:
7341       paired_emit_vector_compare (EQ, dest, op1, op0, cc_op0, cc_op1);
7342       return;
7343     case UNLE:
7344       paired_emit_vector_compare (LE, dest, op1, op0, cc_op0, cc_op1);
7345       return;
7346     case UNLT:
7347       paired_emit_vector_compare (LT, dest, op1, op0, cc_op0, cc_op1);
7348       return;
7349     case UNGE:
7350       paired_emit_vector_compare (GE, dest, op1, op0, cc_op0, cc_op1);
7351       return;
7352     case UNGT:
7353       paired_emit_vector_compare (GT, dest, op1, op0, cc_op0, cc_op1);
7354       return;
7355     default:
7356       gcc_unreachable ();
7357     }
7358 
7359   return;
7360 }
7361 
7362 /* Emit vector conditional expression.
7363    DEST is destination. OP1 and OP2 are two VEC_COND_EXPR operands.
7364    CC_OP0 and CC_OP1 are the two operands for the relation operation COND.  */
7365 
7366 int
paired_emit_vector_cond_expr(rtx dest,rtx op1,rtx op2,rtx cond,rtx cc_op0,rtx cc_op1)7367 paired_emit_vector_cond_expr (rtx dest, rtx op1, rtx op2,
7368 			      rtx cond, rtx cc_op0, rtx cc_op1)
7369 {
7370   enum rtx_code rcode = GET_CODE (cond);
7371 
7372   if (!TARGET_PAIRED_FLOAT)
7373     return 0;
7374 
7375   paired_emit_vector_compare (rcode, dest, op1, op2, cc_op0, cc_op1);
7376 
7377   return 1;
7378 }
7379 
7380 /* Initialize vector TARGET to VALS.  */
7381 
7382 void
rs6000_expand_vector_init(rtx target,rtx vals)7383 rs6000_expand_vector_init (rtx target, rtx vals)
7384 {
7385   machine_mode mode = GET_MODE (target);
7386   machine_mode inner_mode = GET_MODE_INNER (mode);
7387   int n_elts = GET_MODE_NUNITS (mode);
7388   int n_var = 0, one_var = -1;
7389   bool all_same = true, all_const_zero = true;
7390   rtx x, mem;
7391   int i;
7392 
7393   for (i = 0; i < n_elts; ++i)
7394     {
7395       x = XVECEXP (vals, 0, i);
7396       if (!(CONST_SCALAR_INT_P (x) || CONST_DOUBLE_P (x) || CONST_FIXED_P (x)))
7397 	++n_var, one_var = i;
7398       else if (x != CONST0_RTX (inner_mode))
7399 	all_const_zero = false;
7400 
7401       if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
7402 	all_same = false;
7403     }
7404 
7405   if (n_var == 0)
7406     {
7407       rtx const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
7408       bool int_vector_p = (GET_MODE_CLASS (mode) == MODE_VECTOR_INT);
7409       if ((int_vector_p || TARGET_VSX) && all_const_zero)
7410 	{
7411 	  /* Zero register.  */
7412 	  emit_move_insn (target, CONST0_RTX (mode));
7413 	  return;
7414 	}
7415       else if (int_vector_p && easy_vector_constant (const_vec, mode))
7416 	{
7417 	  /* Splat immediate.  */
7418 	  emit_insn (gen_rtx_SET (target, const_vec));
7419 	  return;
7420 	}
7421       else
7422 	{
7423 	  /* Load from constant pool.  */
7424 	  emit_move_insn (target, const_vec);
7425 	  return;
7426 	}
7427     }
7428 
7429   /* Double word values on VSX can use xxpermdi or lxvdsx.  */
7430   if (VECTOR_MEM_VSX_P (mode) && (mode == V2DFmode || mode == V2DImode))
7431     {
7432       rtx op[2];
7433       size_t i;
7434       size_t num_elements = all_same ? 1 : 2;
7435       for (i = 0; i < num_elements; i++)
7436 	{
7437 	  op[i] = XVECEXP (vals, 0, i);
7438 	  /* Just in case there is a SUBREG with a smaller mode, do a
7439 	     conversion.  */
7440 	  if (GET_MODE (op[i]) != inner_mode)
7441 	    {
7442 	      rtx tmp = gen_reg_rtx (inner_mode);
7443 	      convert_move (tmp, op[i], 0);
7444 	      op[i] = tmp;
7445 	    }
7446 	  /* Allow load with splat double word.  */
7447 	  else if (MEM_P (op[i]))
7448 	    {
7449 	      if (!all_same)
7450 		op[i] = force_reg (inner_mode, op[i]);
7451 	    }
7452 	  else if (!REG_P (op[i]))
7453 	    op[i] = force_reg (inner_mode, op[i]);
7454 	}
7455 
7456       if (all_same)
7457 	{
7458 	  if (mode == V2DFmode)
7459 	    emit_insn (gen_vsx_splat_v2df (target, op[0]));
7460 	  else
7461 	    emit_insn (gen_vsx_splat_v2di (target, op[0]));
7462 	}
7463       else
7464 	{
7465 	  if (mode == V2DFmode)
7466 	    emit_insn (gen_vsx_concat_v2df (target, op[0], op[1]));
7467 	  else
7468 	    emit_insn (gen_vsx_concat_v2di (target, op[0], op[1]));
7469 	}
7470       return;
7471     }
7472 
7473   /* Special case initializing vector int if we are on 64-bit systems with
7474      direct move or we have the ISA 3.0 instructions.  */
7475   if (mode == V4SImode  && VECTOR_MEM_VSX_P (V4SImode)
7476       && TARGET_DIRECT_MOVE_64BIT)
7477     {
7478       if (all_same)
7479 	{
7480 	  rtx element0 = XVECEXP (vals, 0, 0);
7481 	  if (MEM_P (element0))
7482 	    element0 = rs6000_address_for_fpconvert (element0);
7483 	  else
7484 	    element0 = force_reg (SImode, element0);
7485 
7486 	  if (TARGET_P9_VECTOR)
7487 	    emit_insn (gen_vsx_splat_v4si (target, element0));
7488 	  else
7489 	    {
7490 	      rtx tmp = gen_reg_rtx (DImode);
7491 	      emit_insn (gen_zero_extendsidi2 (tmp, element0));
7492 	      emit_insn (gen_vsx_splat_v4si_di (target, tmp));
7493 	    }
7494 	  return;
7495 	}
7496       else
7497 	{
7498 	  rtx elements[4];
7499 	  size_t i;
7500 
7501 	  for (i = 0; i < 4; i++)
7502 	    {
7503 	      elements[i] = XVECEXP (vals, 0, i);
7504 	      if (!CONST_INT_P (elements[i]) && !REG_P (elements[i]))
7505 		elements[i] = copy_to_mode_reg (SImode, elements[i]);
7506 	    }
7507 
7508 	  emit_insn (gen_vsx_init_v4si (target, elements[0], elements[1],
7509 					elements[2], elements[3]));
7510 	  return;
7511 	}
7512     }
7513 
7514   /* With single precision floating point on VSX, know that internally single
7515      precision is actually represented as a double, and either make 2 V2DF
7516      vectors, and convert these vectors to single precision, or do one
7517      conversion, and splat the result to the other elements.  */
7518   if (mode == V4SFmode && VECTOR_MEM_VSX_P (V4SFmode))
7519     {
7520       if (all_same)
7521 	{
7522 	  rtx element0 = XVECEXP (vals, 0, 0);
7523 
7524 	  if (TARGET_P9_VECTOR)
7525 	    {
7526 	      if (MEM_P (element0))
7527 		element0 = rs6000_address_for_fpconvert (element0);
7528 
7529 	      emit_insn (gen_vsx_splat_v4sf (target, element0));
7530 	    }
7531 
7532 	  else
7533 	    {
7534 	      rtx freg = gen_reg_rtx (V4SFmode);
7535 	      rtx sreg = force_reg (SFmode, element0);
7536 	      rtx cvt  = (TARGET_XSCVDPSPN
7537 			  ? gen_vsx_xscvdpspn_scalar (freg, sreg)
7538 			  : gen_vsx_xscvdpsp_scalar (freg, sreg));
7539 
7540 	      emit_insn (cvt);
7541 	      emit_insn (gen_vsx_xxspltw_v4sf_direct (target, freg,
7542 						      const0_rtx));
7543 	    }
7544 	}
7545       else
7546 	{
7547 	  rtx dbl_even = gen_reg_rtx (V2DFmode);
7548 	  rtx dbl_odd  = gen_reg_rtx (V2DFmode);
7549 	  rtx flt_even = gen_reg_rtx (V4SFmode);
7550 	  rtx flt_odd  = gen_reg_rtx (V4SFmode);
7551 	  rtx op0 = force_reg (SFmode, XVECEXP (vals, 0, 0));
7552 	  rtx op1 = force_reg (SFmode, XVECEXP (vals, 0, 1));
7553 	  rtx op2 = force_reg (SFmode, XVECEXP (vals, 0, 2));
7554 	  rtx op3 = force_reg (SFmode, XVECEXP (vals, 0, 3));
7555 
7556 	  /* Use VMRGEW if we can instead of doing a permute.  */
7557 	  if (TARGET_P8_VECTOR)
7558 	    {
7559 	      emit_insn (gen_vsx_concat_v2sf (dbl_even, op0, op2));
7560 	      emit_insn (gen_vsx_concat_v2sf (dbl_odd, op1, op3));
7561 	      emit_insn (gen_vsx_xvcvdpsp (flt_even, dbl_even));
7562 	      emit_insn (gen_vsx_xvcvdpsp (flt_odd, dbl_odd));
7563 	      if (BYTES_BIG_ENDIAN)
7564 		emit_insn (gen_p8_vmrgew_v4sf_direct (target, flt_even, flt_odd));
7565 	      else
7566 		emit_insn (gen_p8_vmrgew_v4sf_direct (target, flt_odd, flt_even));
7567 	    }
7568 	  else
7569 	    {
7570 	      emit_insn (gen_vsx_concat_v2sf (dbl_even, op0, op1));
7571 	      emit_insn (gen_vsx_concat_v2sf (dbl_odd, op2, op3));
7572 	      emit_insn (gen_vsx_xvcvdpsp (flt_even, dbl_even));
7573 	      emit_insn (gen_vsx_xvcvdpsp (flt_odd, dbl_odd));
7574 	      rs6000_expand_extract_even (target, flt_even, flt_odd);
7575 	    }
7576 	}
7577       return;
7578     }
7579 
7580   /* Special case initializing vector short/char that are splats if we are on
7581      64-bit systems with direct move.  */
7582   if (all_same && TARGET_DIRECT_MOVE_64BIT
7583       && (mode == V16QImode || mode == V8HImode))
7584     {
7585       rtx op0 = XVECEXP (vals, 0, 0);
7586       rtx di_tmp = gen_reg_rtx (DImode);
7587 
7588       if (!REG_P (op0))
7589 	op0 = force_reg (GET_MODE_INNER (mode), op0);
7590 
7591       if (mode == V16QImode)
7592 	{
7593 	  emit_insn (gen_zero_extendqidi2 (di_tmp, op0));
7594 	  emit_insn (gen_vsx_vspltb_di (target, di_tmp));
7595 	  return;
7596 	}
7597 
7598       if (mode == V8HImode)
7599 	{
7600 	  emit_insn (gen_zero_extendhidi2 (di_tmp, op0));
7601 	  emit_insn (gen_vsx_vsplth_di (target, di_tmp));
7602 	  return;
7603 	}
7604     }
7605 
7606   /* Store value to stack temp.  Load vector element.  Splat.  However, splat
7607      of 64-bit items is not supported on Altivec.  */
7608   if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
7609     {
7610       mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode));
7611       emit_move_insn (adjust_address_nv (mem, inner_mode, 0),
7612 		      XVECEXP (vals, 0, 0));
7613       x = gen_rtx_UNSPEC (VOIDmode,
7614 			  gen_rtvec (1, const0_rtx), UNSPEC_LVE);
7615       emit_insn (gen_rtx_PARALLEL (VOIDmode,
7616 				   gen_rtvec (2,
7617 					      gen_rtx_SET (target, mem),
7618 					      x)));
7619       x = gen_rtx_VEC_SELECT (inner_mode, target,
7620 			      gen_rtx_PARALLEL (VOIDmode,
7621 						gen_rtvec (1, const0_rtx)));
7622       emit_insn (gen_rtx_SET (target, gen_rtx_VEC_DUPLICATE (mode, x)));
7623       return;
7624     }
7625 
7626   /* One field is non-constant.  Load constant then overwrite
7627      varying field.  */
7628   if (n_var == 1)
7629     {
7630       rtx copy = copy_rtx (vals);
7631 
7632       /* Load constant part of vector, substitute neighboring value for
7633 	 varying element.  */
7634       XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
7635       rs6000_expand_vector_init (target, copy);
7636 
7637       /* Insert variable.  */
7638       rs6000_expand_vector_set (target, XVECEXP (vals, 0, one_var), one_var);
7639       return;
7640     }
7641 
7642   /* Construct the vector in memory one field at a time
7643      and load the whole vector.  */
7644   mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
7645   for (i = 0; i < n_elts; i++)
7646     emit_move_insn (adjust_address_nv (mem, inner_mode,
7647 				    i * GET_MODE_SIZE (inner_mode)),
7648 		    XVECEXP (vals, 0, i));
7649   emit_move_insn (target, mem);
7650 }
7651 
7652 /* Set field ELT of TARGET to VAL.  */
7653 
7654 void
rs6000_expand_vector_set(rtx target,rtx val,int elt)7655 rs6000_expand_vector_set (rtx target, rtx val, int elt)
7656 {
7657   machine_mode mode = GET_MODE (target);
7658   machine_mode inner_mode = GET_MODE_INNER (mode);
7659   rtx reg = gen_reg_rtx (mode);
7660   rtx mask, mem, x;
7661   int width = GET_MODE_SIZE (inner_mode);
7662   int i;
7663 
7664   val = force_reg (GET_MODE (val), val);
7665 
7666   if (VECTOR_MEM_VSX_P (mode))
7667     {
7668       rtx insn = NULL_RTX;
7669       rtx elt_rtx = GEN_INT (elt);
7670 
7671       if (mode == V2DFmode)
7672 	insn = gen_vsx_set_v2df (target, target, val, elt_rtx);
7673 
7674       else if (mode == V2DImode)
7675 	insn = gen_vsx_set_v2di (target, target, val, elt_rtx);
7676 
7677       else if (TARGET_P9_VECTOR && TARGET_VSX_SMALL_INTEGER
7678 	       && TARGET_UPPER_REGS_DI && TARGET_POWERPC64)
7679 	{
7680 	  if (mode == V4SImode)
7681 	    insn = gen_vsx_set_v4si_p9 (target, target, val, elt_rtx);
7682 	  else if (mode == V8HImode)
7683 	    insn = gen_vsx_set_v8hi_p9 (target, target, val, elt_rtx);
7684 	  else if (mode == V16QImode)
7685 	    insn = gen_vsx_set_v16qi_p9 (target, target, val, elt_rtx);
7686 	}
7687 
7688       if (insn)
7689 	{
7690 	  emit_insn (insn);
7691 	  return;
7692 	}
7693     }
7694 
7695   /* Simplify setting single element vectors like V1TImode.  */
7696   if (GET_MODE_SIZE (mode) == GET_MODE_SIZE (inner_mode) && elt == 0)
7697     {
7698       emit_move_insn (target, gen_lowpart (mode, val));
7699       return;
7700     }
7701 
7702   /* Load single variable value.  */
7703   mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode));
7704   emit_move_insn (adjust_address_nv (mem, inner_mode, 0), val);
7705   x = gen_rtx_UNSPEC (VOIDmode,
7706 		      gen_rtvec (1, const0_rtx), UNSPEC_LVE);
7707   emit_insn (gen_rtx_PARALLEL (VOIDmode,
7708 			       gen_rtvec (2,
7709 					  gen_rtx_SET (reg, mem),
7710 					  x)));
7711 
7712   /* Linear sequence.  */
7713   mask = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
7714   for (i = 0; i < 16; ++i)
7715     XVECEXP (mask, 0, i) = GEN_INT (i);
7716 
7717   /* Set permute mask to insert element into target.  */
7718   for (i = 0; i < width; ++i)
7719     XVECEXP (mask, 0, elt*width + i)
7720       = GEN_INT (i + 0x10);
7721   x = gen_rtx_CONST_VECTOR (V16QImode, XVEC (mask, 0));
7722 
7723   if (BYTES_BIG_ENDIAN)
7724     x = gen_rtx_UNSPEC (mode,
7725 			gen_rtvec (3, target, reg,
7726 				   force_reg (V16QImode, x)),
7727 			UNSPEC_VPERM);
7728   else
7729     {
7730       if (TARGET_P9_VECTOR)
7731 	x = gen_rtx_UNSPEC (mode,
7732 			    gen_rtvec (3, target, reg,
7733 				       force_reg (V16QImode, x)),
7734 			    UNSPEC_VPERMR);
7735       else
7736 	{
7737 	  /* Invert selector.  We prefer to generate VNAND on P8 so
7738 	     that future fusion opportunities can kick in, but must
7739 	     generate VNOR elsewhere.  */
7740 	  rtx notx = gen_rtx_NOT (V16QImode, force_reg (V16QImode, x));
7741 	  rtx iorx = (TARGET_P8_VECTOR
7742 		      ? gen_rtx_IOR (V16QImode, notx, notx)
7743 		      : gen_rtx_AND (V16QImode, notx, notx));
7744 	  rtx tmp = gen_reg_rtx (V16QImode);
7745 	  emit_insn (gen_rtx_SET (tmp, iorx));
7746 
7747 	  /* Permute with operands reversed and adjusted selector.  */
7748 	  x = gen_rtx_UNSPEC (mode, gen_rtvec (3, reg, target, tmp),
7749 			      UNSPEC_VPERM);
7750 	}
7751     }
7752 
7753   emit_insn (gen_rtx_SET (target, x));
7754 }
7755 
7756 /* Extract field ELT from VEC into TARGET.  */
7757 
7758 void
rs6000_expand_vector_extract(rtx target,rtx vec,rtx elt)7759 rs6000_expand_vector_extract (rtx target, rtx vec, rtx elt)
7760 {
7761   machine_mode mode = GET_MODE (vec);
7762   machine_mode inner_mode = GET_MODE_INNER (mode);
7763   rtx mem;
7764 
7765   if (VECTOR_MEM_VSX_P (mode) && CONST_INT_P (elt))
7766     {
7767       switch (mode)
7768 	{
7769 	default:
7770 	  break;
7771 	case E_V1TImode:
7772 	  gcc_assert (INTVAL (elt) == 0 && inner_mode == TImode);
7773 	  emit_move_insn (target, gen_lowpart (TImode, vec));
7774 	  break;
7775 	case E_V2DFmode:
7776 	  emit_insn (gen_vsx_extract_v2df (target, vec, elt));
7777 	  return;
7778 	case E_V2DImode:
7779 	  emit_insn (gen_vsx_extract_v2di (target, vec, elt));
7780 	  return;
7781 	case E_V4SFmode:
7782 	  emit_insn (gen_vsx_extract_v4sf (target, vec, elt));
7783 	  return;
7784 	case E_V16QImode:
7785 	  if (TARGET_DIRECT_MOVE_64BIT)
7786 	    {
7787 	      emit_insn (gen_vsx_extract_v16qi (target, vec, elt));
7788 	      return;
7789 	    }
7790 	  else
7791 	    break;
7792 	case E_V8HImode:
7793 	  if (TARGET_DIRECT_MOVE_64BIT)
7794 	    {
7795 	      emit_insn (gen_vsx_extract_v8hi (target, vec, elt));
7796 	      return;
7797 	    }
7798 	  else
7799 	    break;
7800 	case E_V4SImode:
7801 	  if (TARGET_DIRECT_MOVE_64BIT)
7802 	    {
7803 	      emit_insn (gen_vsx_extract_v4si (target, vec, elt));
7804 	      return;
7805 	    }
7806 	  break;
7807 	}
7808     }
7809   else if (VECTOR_MEM_VSX_P (mode) && !CONST_INT_P (elt)
7810 	   && TARGET_DIRECT_MOVE_64BIT)
7811     {
7812       if (GET_MODE (elt) != DImode)
7813 	{
7814 	  rtx tmp = gen_reg_rtx (DImode);
7815 	  convert_move (tmp, elt, 0);
7816 	  elt = tmp;
7817 	}
7818       else if (!REG_P (elt))
7819 	elt = force_reg (DImode, elt);
7820 
7821       switch (mode)
7822 	{
7823 	case E_V2DFmode:
7824 	  emit_insn (gen_vsx_extract_v2df_var (target, vec, elt));
7825 	  return;
7826 
7827 	case E_V2DImode:
7828 	  emit_insn (gen_vsx_extract_v2di_var (target, vec, elt));
7829 	  return;
7830 
7831 	case E_V4SFmode:
7832 	  emit_insn (gen_vsx_extract_v4sf_var (target, vec, elt));
7833 	  return;
7834 
7835 	case E_V4SImode:
7836 	  emit_insn (gen_vsx_extract_v4si_var (target, vec, elt));
7837 	  return;
7838 
7839 	case E_V8HImode:
7840 	  emit_insn (gen_vsx_extract_v8hi_var (target, vec, elt));
7841 	  return;
7842 
7843 	case E_V16QImode:
7844 	  emit_insn (gen_vsx_extract_v16qi_var (target, vec, elt));
7845 	  return;
7846 
7847 	default:
7848 	  gcc_unreachable ();
7849 	}
7850     }
7851 
7852   gcc_assert (CONST_INT_P (elt));
7853 
7854   /* Allocate mode-sized buffer.  */
7855   mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
7856 
7857   emit_move_insn (mem, vec);
7858 
7859   /* Add offset to field within buffer matching vector element.  */
7860   mem = adjust_address_nv (mem, inner_mode,
7861 			   INTVAL (elt) * GET_MODE_SIZE (inner_mode));
7862 
7863   emit_move_insn (target, adjust_address_nv (mem, inner_mode, 0));
7864 }
7865 
7866 /* Helper function to return the register number of a RTX.  */
7867 static inline int
regno_or_subregno(rtx op)7868 regno_or_subregno (rtx op)
7869 {
7870   if (REG_P (op))
7871     return REGNO (op);
7872   else if (SUBREG_P (op))
7873     return subreg_regno (op);
7874   else
7875     gcc_unreachable ();
7876 }
7877 
7878 /* Adjust a memory address (MEM) of a vector type to point to a scalar field
7879    within the vector (ELEMENT) with a mode (SCALAR_MODE).  Use a base register
7880    temporary (BASE_TMP) to fixup the address.  Return the new memory address
7881    that is valid for reads or writes to a given register (SCALAR_REG).  */
7882 
7883 rtx
rs6000_adjust_vec_address(rtx scalar_reg,rtx mem,rtx element,rtx base_tmp,machine_mode scalar_mode)7884 rs6000_adjust_vec_address (rtx scalar_reg,
7885 			   rtx mem,
7886 			   rtx element,
7887 			   rtx base_tmp,
7888 			   machine_mode scalar_mode)
7889 {
7890   unsigned scalar_size = GET_MODE_SIZE (scalar_mode);
7891   rtx addr = XEXP (mem, 0);
7892   rtx element_offset;
7893   rtx new_addr;
7894   bool valid_addr_p;
7895 
7896   /* Vector addresses should not have PRE_INC, PRE_DEC, or PRE_MODIFY.  */
7897   gcc_assert (GET_RTX_CLASS (GET_CODE (addr)) != RTX_AUTOINC);
7898 
7899   /* Calculate what we need to add to the address to get the element
7900      address.  */
7901   if (CONST_INT_P (element))
7902     element_offset = GEN_INT (INTVAL (element) * scalar_size);
7903   else
7904     {
7905       int byte_shift = exact_log2 (scalar_size);
7906       gcc_assert (byte_shift >= 0);
7907 
7908       if (byte_shift == 0)
7909 	element_offset = element;
7910 
7911       else
7912 	{
7913 	  if (TARGET_POWERPC64)
7914 	    emit_insn (gen_ashldi3 (base_tmp, element, GEN_INT (byte_shift)));
7915 	  else
7916 	    emit_insn (gen_ashlsi3 (base_tmp, element, GEN_INT (byte_shift)));
7917 
7918 	  element_offset = base_tmp;
7919 	}
7920     }
7921 
7922   /* Create the new address pointing to the element within the vector.  If we
7923      are adding 0, we don't have to change the address.  */
7924   if (element_offset == const0_rtx)
7925     new_addr = addr;
7926 
7927   /* A simple indirect address can be converted into a reg + offset
7928      address.  */
7929   else if (REG_P (addr) || SUBREG_P (addr))
7930     new_addr = gen_rtx_PLUS (Pmode, addr, element_offset);
7931 
7932   /* Optimize D-FORM addresses with constant offset with a constant element, to
7933      include the element offset in the address directly.  */
7934   else if (GET_CODE (addr) == PLUS)
7935     {
7936       rtx op0 = XEXP (addr, 0);
7937       rtx op1 = XEXP (addr, 1);
7938       rtx insn;
7939 
7940       gcc_assert (REG_P (op0) || SUBREG_P (op0));
7941       if (CONST_INT_P (op1) && CONST_INT_P (element_offset))
7942 	{
7943 	  HOST_WIDE_INT offset = INTVAL (op1) + INTVAL (element_offset);
7944 	  rtx offset_rtx = GEN_INT (offset);
7945 
7946 	  if (IN_RANGE (offset, -32768, 32767)
7947 	      && (scalar_size < 8 || (offset & 0x3) == 0))
7948 	    new_addr = gen_rtx_PLUS (Pmode, op0, offset_rtx);
7949 	  else
7950 	    {
7951 	      emit_move_insn (base_tmp, offset_rtx);
7952 	      new_addr = gen_rtx_PLUS (Pmode, op0, base_tmp);
7953 	    }
7954 	}
7955       else
7956 	{
7957 	  bool op1_reg_p = (REG_P (op1) || SUBREG_P (op1));
7958 	  bool ele_reg_p = (REG_P (element_offset) || SUBREG_P (element_offset));
7959 
7960 	  /* Note, ADDI requires the register being added to be a base
7961 	     register.  If the register was R0, load it up into the temporary
7962 	     and do the add.  */
7963 	  if (op1_reg_p
7964 	      && (ele_reg_p || reg_or_subregno (op1) != FIRST_GPR_REGNO))
7965 	    {
7966 	      insn = gen_add3_insn (base_tmp, op1, element_offset);
7967 	      gcc_assert (insn != NULL_RTX);
7968 	      emit_insn (insn);
7969 	    }
7970 
7971 	  else if (ele_reg_p
7972 		   && reg_or_subregno (element_offset) != FIRST_GPR_REGNO)
7973 	    {
7974 	      insn = gen_add3_insn (base_tmp, element_offset, op1);
7975 	      gcc_assert (insn != NULL_RTX);
7976 	      emit_insn (insn);
7977 	    }
7978 
7979 	  else
7980 	    {
7981 	      emit_move_insn (base_tmp, op1);
7982 	      emit_insn (gen_add2_insn (base_tmp, element_offset));
7983 	    }
7984 
7985 	  new_addr = gen_rtx_PLUS (Pmode, op0, base_tmp);
7986 	}
7987     }
7988 
7989   else
7990     {
7991       emit_move_insn (base_tmp, addr);
7992       new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset);
7993     }
7994 
7995   /* If we have a PLUS, we need to see whether the particular register class
7996      allows for D-FORM or X-FORM addressing.  */
7997   if (GET_CODE (new_addr) == PLUS)
7998     {
7999       rtx op1 = XEXP (new_addr, 1);
8000       addr_mask_type addr_mask;
8001       int scalar_regno = regno_or_subregno (scalar_reg);
8002 
8003       gcc_assert (scalar_regno < FIRST_PSEUDO_REGISTER);
8004       if (INT_REGNO_P (scalar_regno))
8005 	addr_mask = reg_addr[scalar_mode].addr_mask[RELOAD_REG_GPR];
8006 
8007       else if (FP_REGNO_P (scalar_regno))
8008 	addr_mask = reg_addr[scalar_mode].addr_mask[RELOAD_REG_FPR];
8009 
8010       else if (ALTIVEC_REGNO_P (scalar_regno))
8011 	addr_mask = reg_addr[scalar_mode].addr_mask[RELOAD_REG_VMX];
8012 
8013       else
8014 	gcc_unreachable ();
8015 
8016       if (REG_P (op1) || SUBREG_P (op1))
8017 	valid_addr_p = (addr_mask & RELOAD_REG_INDEXED) != 0;
8018       else
8019 	valid_addr_p = (addr_mask & RELOAD_REG_OFFSET) != 0;
8020     }
8021 
8022   else if (REG_P (new_addr) || SUBREG_P (new_addr))
8023     valid_addr_p = true;
8024 
8025   else
8026     valid_addr_p = false;
8027 
8028   if (!valid_addr_p)
8029     {
8030       emit_move_insn (base_tmp, new_addr);
8031       new_addr = base_tmp;
8032     }
8033 
8034   return change_address (mem, scalar_mode, new_addr);
8035 }
8036 
8037 /* Split a variable vec_extract operation into the component instructions.  */
8038 
8039 void
rs6000_split_vec_extract_var(rtx dest,rtx src,rtx element,rtx tmp_gpr,rtx tmp_altivec)8040 rs6000_split_vec_extract_var (rtx dest, rtx src, rtx element, rtx tmp_gpr,
8041 			      rtx tmp_altivec)
8042 {
8043   machine_mode mode = GET_MODE (src);
8044   machine_mode scalar_mode = GET_MODE (dest);
8045   unsigned scalar_size = GET_MODE_SIZE (scalar_mode);
8046   int byte_shift = exact_log2 (scalar_size);
8047 
8048   gcc_assert (byte_shift >= 0);
8049 
8050   /* If we are given a memory address, optimize to load just the element.  We
8051      don't have to adjust the vector element number on little endian
8052      systems.  */
8053   if (MEM_P (src))
8054     {
8055       gcc_assert (REG_P (tmp_gpr));
8056       emit_move_insn (dest, rs6000_adjust_vec_address (dest, src, element,
8057 						       tmp_gpr, scalar_mode));
8058       return;
8059     }
8060 
8061   else if (REG_P (src) || SUBREG_P (src))
8062     {
8063       int bit_shift = byte_shift + 3;
8064       rtx element2;
8065       int dest_regno = regno_or_subregno (dest);
8066       int src_regno = regno_or_subregno (src);
8067       int element_regno = regno_or_subregno (element);
8068 
8069       gcc_assert (REG_P (tmp_gpr));
8070 
8071       /* See if we want to generate VEXTU{B,H,W}{L,R}X if the destination is in
8072 	 a general purpose register.  */
8073       if (TARGET_P9_VECTOR
8074 	  && (mode == V16QImode || mode == V8HImode || mode == V4SImode)
8075 	  && INT_REGNO_P (dest_regno)
8076 	  && ALTIVEC_REGNO_P (src_regno)
8077 	  && INT_REGNO_P (element_regno))
8078 	{
8079 	  rtx dest_si = gen_rtx_REG (SImode, dest_regno);
8080 	  rtx element_si = gen_rtx_REG (SImode, element_regno);
8081 
8082 	  if (mode == V16QImode)
8083 	    emit_insn (VECTOR_ELT_ORDER_BIG
8084 		       ? gen_vextublx (dest_si, element_si, src)
8085 		       : gen_vextubrx (dest_si, element_si, src));
8086 
8087 	  else if (mode == V8HImode)
8088 	    {
8089 	      rtx tmp_gpr_si = gen_rtx_REG (SImode, REGNO (tmp_gpr));
8090 	      emit_insn (gen_ashlsi3 (tmp_gpr_si, element_si, const1_rtx));
8091 	      emit_insn (VECTOR_ELT_ORDER_BIG
8092 			 ? gen_vextuhlx (dest_si, tmp_gpr_si, src)
8093 			 : gen_vextuhrx (dest_si, tmp_gpr_si, src));
8094 	    }
8095 
8096 
8097 	  else
8098 	    {
8099 	      rtx tmp_gpr_si = gen_rtx_REG (SImode, REGNO (tmp_gpr));
8100 	      emit_insn (gen_ashlsi3 (tmp_gpr_si, element_si, const2_rtx));
8101 	      emit_insn (VECTOR_ELT_ORDER_BIG
8102 			 ? gen_vextuwlx (dest_si, tmp_gpr_si, src)
8103 			 : gen_vextuwrx (dest_si, tmp_gpr_si, src));
8104 	    }
8105 
8106 	  return;
8107 	}
8108 
8109 
8110       gcc_assert (REG_P (tmp_altivec));
8111 
8112       /* For little endian, adjust element ordering.  For V2DI/V2DF, we can use
8113 	 an XOR, otherwise we need to subtract.  The shift amount is so VSLO
8114 	 will shift the element into the upper position (adding 3 to convert a
8115 	 byte shift into a bit shift).  */
8116       if (scalar_size == 8)
8117 	{
8118 	  if (!VECTOR_ELT_ORDER_BIG)
8119 	    {
8120 	      emit_insn (gen_xordi3 (tmp_gpr, element, const1_rtx));
8121 	      element2 = tmp_gpr;
8122 	    }
8123 	  else
8124 	    element2 = element;
8125 
8126 	  /* Generate RLDIC directly to shift left 6 bits and retrieve 1
8127 	     bit.  */
8128 	  emit_insn (gen_rtx_SET (tmp_gpr,
8129 				  gen_rtx_AND (DImode,
8130 					       gen_rtx_ASHIFT (DImode,
8131 							       element2,
8132 							       GEN_INT (6)),
8133 					       GEN_INT (64))));
8134 	}
8135       else
8136 	{
8137 	  if (!VECTOR_ELT_ORDER_BIG)
8138 	    {
8139 	      rtx num_ele_m1 = GEN_INT (GET_MODE_NUNITS (mode) - 1);
8140 
8141 	      emit_insn (gen_anddi3 (tmp_gpr, element, num_ele_m1));
8142 	      emit_insn (gen_subdi3 (tmp_gpr, num_ele_m1, tmp_gpr));
8143 	      element2 = tmp_gpr;
8144 	    }
8145 	  else
8146 	    element2 = element;
8147 
8148 	  emit_insn (gen_ashldi3 (tmp_gpr, element2, GEN_INT (bit_shift)));
8149 	}
8150 
8151       /* Get the value into the lower byte of the Altivec register where VSLO
8152 	 expects it.  */
8153       if (TARGET_P9_VECTOR)
8154 	emit_insn (gen_vsx_splat_v2di (tmp_altivec, tmp_gpr));
8155       else if (can_create_pseudo_p ())
8156 	emit_insn (gen_vsx_concat_v2di (tmp_altivec, tmp_gpr, tmp_gpr));
8157       else
8158 	{
8159 	  rtx tmp_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
8160 	  emit_move_insn (tmp_di, tmp_gpr);
8161 	  emit_insn (gen_vsx_concat_v2di (tmp_altivec, tmp_di, tmp_di));
8162 	}
8163 
8164       /* Do the VSLO to get the value into the final location.  */
8165       switch (mode)
8166 	{
8167 	case E_V2DFmode:
8168 	  emit_insn (gen_vsx_vslo_v2df (dest, src, tmp_altivec));
8169 	  return;
8170 
8171 	case E_V2DImode:
8172 	  emit_insn (gen_vsx_vslo_v2di (dest, src, tmp_altivec));
8173 	  return;
8174 
8175 	case E_V4SFmode:
8176 	  {
8177 	    rtx tmp_altivec_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
8178 	    rtx tmp_altivec_v4sf = gen_rtx_REG (V4SFmode, REGNO (tmp_altivec));
8179 	    rtx src_v2di = gen_rtx_REG (V2DImode, REGNO (src));
8180 	    emit_insn (gen_vsx_vslo_v2di (tmp_altivec_di, src_v2di,
8181 					  tmp_altivec));
8182 
8183 	    emit_insn (gen_vsx_xscvspdp_scalar2 (dest, tmp_altivec_v4sf));
8184 	    return;
8185 	  }
8186 
8187 	case E_V4SImode:
8188 	case E_V8HImode:
8189 	case E_V16QImode:
8190 	  {
8191 	    rtx tmp_altivec_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
8192 	    rtx src_v2di = gen_rtx_REG (V2DImode, REGNO (src));
8193 	    rtx tmp_gpr_di = gen_rtx_REG (DImode, REGNO (dest));
8194 	    emit_insn (gen_vsx_vslo_v2di (tmp_altivec_di, src_v2di,
8195 					  tmp_altivec));
8196 	    emit_move_insn (tmp_gpr_di, tmp_altivec_di);
8197 	    emit_insn (gen_ashrdi3 (tmp_gpr_di, tmp_gpr_di,
8198 				    GEN_INT (64 - (8 * scalar_size))));
8199 	    return;
8200 	  }
8201 
8202 	default:
8203 	  gcc_unreachable ();
8204 	}
8205 
8206       return;
8207     }
8208   else
8209     gcc_unreachable ();
8210  }
8211 
8212 /* Helper function for rs6000_split_v4si_init to build up a DImode value from
8213    two SImode values.  */
8214 
8215 static void
rs6000_split_v4si_init_di_reg(rtx dest,rtx si1,rtx si2,rtx tmp)8216 rs6000_split_v4si_init_di_reg (rtx dest, rtx si1, rtx si2, rtx tmp)
8217 {
8218   const unsigned HOST_WIDE_INT mask_32bit = HOST_WIDE_INT_C (0xffffffff);
8219 
8220   if (CONST_INT_P (si1) && CONST_INT_P (si2))
8221     {
8222       unsigned HOST_WIDE_INT const1 = (UINTVAL (si1) & mask_32bit) << 32;
8223       unsigned HOST_WIDE_INT const2 = UINTVAL (si2) & mask_32bit;
8224 
8225       emit_move_insn (dest, GEN_INT (const1 | const2));
8226       return;
8227     }
8228 
8229   /* Put si1 into upper 32-bits of dest.  */
8230   if (CONST_INT_P (si1))
8231     emit_move_insn (dest, GEN_INT ((UINTVAL (si1) & mask_32bit) << 32));
8232   else
8233     {
8234       /* Generate RLDIC.  */
8235       rtx si1_di = gen_rtx_REG (DImode, regno_or_subregno (si1));
8236       rtx shift_rtx = gen_rtx_ASHIFT (DImode, si1_di, GEN_INT (32));
8237       rtx mask_rtx = GEN_INT (mask_32bit << 32);
8238       rtx and_rtx = gen_rtx_AND (DImode, shift_rtx, mask_rtx);
8239       gcc_assert (!reg_overlap_mentioned_p (dest, si1));
8240       emit_insn (gen_rtx_SET (dest, and_rtx));
8241     }
8242 
8243   /* Put si2 into the temporary.  */
8244   gcc_assert (!reg_overlap_mentioned_p (dest, tmp));
8245   if (CONST_INT_P (si2))
8246     emit_move_insn (tmp, GEN_INT (UINTVAL (si2) & mask_32bit));
8247   else
8248     emit_insn (gen_zero_extendsidi2 (tmp, si2));
8249 
8250   /* Combine the two parts.  */
8251   emit_insn (gen_iordi3 (dest, dest, tmp));
8252   return;
8253 }
8254 
8255 /* Split a V4SI initialization.  */
8256 
8257 void
rs6000_split_v4si_init(rtx operands[])8258 rs6000_split_v4si_init (rtx operands[])
8259 {
8260   rtx dest = operands[0];
8261 
8262   /* Destination is a GPR, build up the two DImode parts in place.  */
8263   if (REG_P (dest) || SUBREG_P (dest))
8264     {
8265       int d_regno = regno_or_subregno (dest);
8266       rtx scalar1 = operands[1];
8267       rtx scalar2 = operands[2];
8268       rtx scalar3 = operands[3];
8269       rtx scalar4 = operands[4];
8270       rtx tmp1 = operands[5];
8271       rtx tmp2 = operands[6];
8272 
8273       /* Even though we only need one temporary (plus the destination, which
8274 	 has an early clobber constraint, try to use two temporaries, one for
8275 	 each double word created.  That way the 2nd insn scheduling pass can
8276 	 rearrange things so the two parts are done in parallel.  */
8277       if (BYTES_BIG_ENDIAN)
8278 	{
8279 	  rtx di_lo = gen_rtx_REG (DImode, d_regno);
8280 	  rtx di_hi = gen_rtx_REG (DImode, d_regno + 1);
8281 	  rs6000_split_v4si_init_di_reg (di_lo, scalar1, scalar2, tmp1);
8282 	  rs6000_split_v4si_init_di_reg (di_hi, scalar3, scalar4, tmp2);
8283 	}
8284       else
8285 	{
8286 	  rtx di_lo = gen_rtx_REG (DImode, d_regno + 1);
8287 	  rtx di_hi = gen_rtx_REG (DImode, d_regno);
8288 	  gcc_assert (!VECTOR_ELT_ORDER_BIG);
8289 	  rs6000_split_v4si_init_di_reg (di_lo, scalar4, scalar3, tmp1);
8290 	  rs6000_split_v4si_init_di_reg (di_hi, scalar2, scalar1, tmp2);
8291 	}
8292       return;
8293     }
8294 
8295   else
8296     gcc_unreachable ();
8297 }
8298 
8299 /* Return TRUE if OP is an invalid SUBREG operation on the e500.  */
8300 
8301 bool
invalid_e500_subreg(rtx op,machine_mode mode)8302 invalid_e500_subreg (rtx op, machine_mode mode)
8303 {
8304   if (TARGET_E500_DOUBLE)
8305     {
8306       /* Reject (subreg:SI (reg:DF)); likewise with subreg:DI or
8307 	 subreg:TI and reg:TF.  Decimal float modes are like integer
8308 	 modes (only low part of each register used) for this
8309 	 purpose.  */
8310       if (GET_CODE (op) == SUBREG
8311 	  && (mode == SImode || mode == DImode || mode == TImode
8312 	      || mode == DDmode || mode == TDmode || mode == PTImode)
8313 	  && REG_P (SUBREG_REG (op))
8314 	  && (GET_MODE (SUBREG_REG (op)) == DFmode
8315 	      || GET_MODE (SUBREG_REG (op)) == TFmode
8316 	      || GET_MODE (SUBREG_REG (op)) == IFmode
8317 	      || GET_MODE (SUBREG_REG (op)) == KFmode))
8318 	return true;
8319 
8320       /* Reject (subreg:DF (reg:DI)); likewise with subreg:TF and
8321 	 reg:TI.  */
8322       if (GET_CODE (op) == SUBREG
8323 	  && (mode == DFmode || mode == TFmode || mode == IFmode
8324 	      || mode == KFmode)
8325 	  && REG_P (SUBREG_REG (op))
8326 	  && (GET_MODE (SUBREG_REG (op)) == DImode
8327 	      || GET_MODE (SUBREG_REG (op)) == TImode
8328 	      || GET_MODE (SUBREG_REG (op)) == PTImode
8329 	      || GET_MODE (SUBREG_REG (op)) == DDmode
8330 	      || GET_MODE (SUBREG_REG (op)) == TDmode))
8331 	return true;
8332     }
8333 
8334   if (TARGET_SPE
8335       && GET_CODE (op) == SUBREG
8336       && mode == SImode
8337       && REG_P (SUBREG_REG (op))
8338       && SPE_VECTOR_MODE (GET_MODE (SUBREG_REG (op))))
8339     return true;
8340 
8341   return false;
8342 }
8343 
8344 /* Return alignment of TYPE.  Existing alignment is ALIGN.  HOW
8345    selects whether the alignment is abi mandated, optional, or
8346    both abi and optional alignment.  */
8347 
8348 unsigned int
rs6000_data_alignment(tree type,unsigned int align,enum data_align how)8349 rs6000_data_alignment (tree type, unsigned int align, enum data_align how)
8350 {
8351   if (how != align_opt)
8352     {
8353       if (TREE_CODE (type) == VECTOR_TYPE)
8354 	{
8355 	  if ((TARGET_SPE && SPE_VECTOR_MODE (TYPE_MODE (type)))
8356 	      || (TARGET_PAIRED_FLOAT && PAIRED_VECTOR_MODE (TYPE_MODE (type))))
8357 	    {
8358 	      if (align < 64)
8359 		align = 64;
8360 	    }
8361 	  else if (align < 128)
8362 	    align = 128;
8363 	}
8364       else if (TARGET_E500_DOUBLE
8365 	       && TREE_CODE (type) == REAL_TYPE
8366 	       && TYPE_MODE (type) == DFmode)
8367 	{
8368 	  if (align < 64)
8369 	    align = 64;
8370 	}
8371     }
8372 
8373   if (how != align_abi)
8374     {
8375       if (TREE_CODE (type) == ARRAY_TYPE
8376 	  && TYPE_MODE (TREE_TYPE (type)) == QImode)
8377 	{
8378 	  if (align < BITS_PER_WORD)
8379 	    align = BITS_PER_WORD;
8380 	}
8381     }
8382 
8383   return align;
8384 }
8385 
8386 /* Implement TARGET_SLOW_UNALIGNED_ACCESS.  Altivec vector memory
8387    instructions simply ignore the low bits; SPE vector memory
8388    instructions trap on unaligned accesses; VSX memory instructions are
8389    aligned to 4 or 8 bytes.  */
8390 
8391 static bool
rs6000_slow_unaligned_access(machine_mode mode,unsigned int align)8392 rs6000_slow_unaligned_access (machine_mode mode, unsigned int align)
8393 {
8394   return (STRICT_ALIGNMENT
8395 	  || (!TARGET_EFFICIENT_UNALIGNED_VSX
8396 	      && ((SCALAR_FLOAT_MODE_NOT_VECTOR_P (mode) && align < 32)
8397 		  || ((VECTOR_MODE_P (mode) || FLOAT128_VECTOR_P (mode))
8398 		      && (int) align < VECTOR_ALIGN (mode)))));
8399 }
8400 
8401 /* Previous GCC releases forced all vector types to have 16-byte alignment.  */
8402 
8403 bool
rs6000_special_adjust_field_align_p(tree type,unsigned int computed)8404 rs6000_special_adjust_field_align_p (tree type, unsigned int computed)
8405 {
8406   if (TARGET_ALTIVEC && TREE_CODE (type) == VECTOR_TYPE)
8407     {
8408       if (computed != 128)
8409 	{
8410 	  static bool warned;
8411 	  if (!warned && warn_psabi)
8412 	    {
8413 	      warned = true;
8414 	      inform (input_location,
8415 		      "the layout of aggregates containing vectors with"
8416 		      " %d-byte alignment has changed in GCC 5",
8417 		      computed / BITS_PER_UNIT);
8418 	    }
8419 	}
8420       /* In current GCC there is no special case.  */
8421       return false;
8422     }
8423 
8424   return false;
8425 }
8426 
8427 /* AIX increases natural record alignment to doubleword if the first
8428    field is an FP double while the FP fields remain word aligned.  */
8429 
8430 unsigned int
rs6000_special_round_type_align(tree type,unsigned int computed,unsigned int specified)8431 rs6000_special_round_type_align (tree type, unsigned int computed,
8432 				 unsigned int specified)
8433 {
8434   unsigned int align = MAX (computed, specified);
8435   tree field = TYPE_FIELDS (type);
8436 
8437   /* Skip all non field decls */
8438   while (field != NULL && TREE_CODE (field) != FIELD_DECL)
8439     field = DECL_CHAIN (field);
8440 
8441   if (field != NULL && field != type)
8442     {
8443       type = TREE_TYPE (field);
8444       while (TREE_CODE (type) == ARRAY_TYPE)
8445 	type = TREE_TYPE (type);
8446 
8447       if (type != error_mark_node && TYPE_MODE (type) == DFmode)
8448 	align = MAX (align, 64);
8449     }
8450 
8451   return align;
8452 }
8453 
8454 /* Darwin increases record alignment to the natural alignment of
8455    the first field.  */
8456 
8457 unsigned int
darwin_rs6000_special_round_type_align(tree type,unsigned int computed,unsigned int specified)8458 darwin_rs6000_special_round_type_align (tree type, unsigned int computed,
8459 					unsigned int specified)
8460 {
8461   unsigned int align = MAX (computed, specified);
8462 
8463   if (TYPE_PACKED (type))
8464     return align;
8465 
8466   /* Find the first field, looking down into aggregates.  */
8467   do {
8468     tree field = TYPE_FIELDS (type);
8469     /* Skip all non field decls */
8470     while (field != NULL && TREE_CODE (field) != FIELD_DECL)
8471       field = DECL_CHAIN (field);
8472     if (! field)
8473       break;
8474     /* A packed field does not contribute any extra alignment.  */
8475     if (DECL_PACKED (field))
8476       return align;
8477     type = TREE_TYPE (field);
8478     while (TREE_CODE (type) == ARRAY_TYPE)
8479       type = TREE_TYPE (type);
8480   } while (AGGREGATE_TYPE_P (type));
8481 
8482   if (! AGGREGATE_TYPE_P (type) && type != error_mark_node)
8483     align = MAX (align, TYPE_ALIGN (type));
8484 
8485   return align;
8486 }
8487 
8488 /* Return 1 for an operand in small memory on V.4/eabi.  */
8489 
8490 int
small_data_operand(rtx op ATTRIBUTE_UNUSED,machine_mode mode ATTRIBUTE_UNUSED)8491 small_data_operand (rtx op ATTRIBUTE_UNUSED,
8492 		    machine_mode mode ATTRIBUTE_UNUSED)
8493 {
8494 #if TARGET_ELF
8495   rtx sym_ref;
8496 
8497   if (rs6000_sdata == SDATA_NONE || rs6000_sdata == SDATA_DATA)
8498     return 0;
8499 
8500   if (DEFAULT_ABI != ABI_V4)
8501     return 0;
8502 
8503   /* Vector and float memory instructions have a limited offset on the
8504      SPE, so using a vector or float variable directly as an operand is
8505      not useful.  */
8506   if (TARGET_SPE
8507       && (SPE_VECTOR_MODE (mode) || FLOAT_MODE_P (mode)))
8508     return 0;
8509 
8510   if (GET_CODE (op) == SYMBOL_REF)
8511     sym_ref = op;
8512 
8513   else if (GET_CODE (op) != CONST
8514 	   || GET_CODE (XEXP (op, 0)) != PLUS
8515 	   || GET_CODE (XEXP (XEXP (op, 0), 0)) != SYMBOL_REF
8516 	   || GET_CODE (XEXP (XEXP (op, 0), 1)) != CONST_INT)
8517     return 0;
8518 
8519   else
8520     {
8521       rtx sum = XEXP (op, 0);
8522       HOST_WIDE_INT summand;
8523 
8524       /* We have to be careful here, because it is the referenced address
8525 	 that must be 32k from _SDA_BASE_, not just the symbol.  */
8526       summand = INTVAL (XEXP (sum, 1));
8527       if (summand < 0 || summand > g_switch_value)
8528 	return 0;
8529 
8530       sym_ref = XEXP (sum, 0);
8531     }
8532 
8533   return SYMBOL_REF_SMALL_P (sym_ref);
8534 #else
8535   return 0;
8536 #endif
8537 }
8538 
8539 /* Return true if either operand is a general purpose register.  */
8540 
8541 bool
gpr_or_gpr_p(rtx op0,rtx op1)8542 gpr_or_gpr_p (rtx op0, rtx op1)
8543 {
8544   return ((REG_P (op0) && INT_REGNO_P (REGNO (op0)))
8545 	  || (REG_P (op1) && INT_REGNO_P (REGNO (op1))));
8546 }
8547 
8548 /* Return true if this is a move direct operation between GPR registers and
8549    floating point/VSX registers.  */
8550 
8551 bool
direct_move_p(rtx op0,rtx op1)8552 direct_move_p (rtx op0, rtx op1)
8553 {
8554   int regno0, regno1;
8555 
8556   if (!REG_P (op0) || !REG_P (op1))
8557     return false;
8558 
8559   if (!TARGET_DIRECT_MOVE && !TARGET_MFPGPR)
8560     return false;
8561 
8562   regno0 = REGNO (op0);
8563   regno1 = REGNO (op1);
8564   if (regno0 >= FIRST_PSEUDO_REGISTER || regno1 >= FIRST_PSEUDO_REGISTER)
8565     return false;
8566 
8567   if (INT_REGNO_P (regno0))
8568     return (TARGET_DIRECT_MOVE) ? VSX_REGNO_P (regno1) : FP_REGNO_P (regno1);
8569 
8570   else if (INT_REGNO_P (regno1))
8571     {
8572       if (TARGET_MFPGPR && FP_REGNO_P (regno0))
8573 	return true;
8574 
8575       else if (TARGET_DIRECT_MOVE && VSX_REGNO_P (regno0))
8576 	return true;
8577     }
8578 
8579   return false;
8580 }
8581 
8582 /* Return true if the OFFSET is valid for the quad address instructions that
8583    use d-form (register + offset) addressing.  */
8584 
8585 static inline bool
quad_address_offset_p(HOST_WIDE_INT offset)8586 quad_address_offset_p (HOST_WIDE_INT offset)
8587 {
8588   return (IN_RANGE (offset, -32768, 32767) && ((offset) & 0xf) == 0);
8589 }
8590 
8591 /* Return true if the ADDR is an acceptable address for a quad memory
8592    operation of mode MODE (either LQ/STQ for general purpose registers, or
8593    LXV/STXV for vector registers under ISA 3.0.  GPR_P is true if this address
8594    is intended for LQ/STQ.  If it is false, the address is intended for the ISA
8595    3.0 LXV/STXV instruction.  */
8596 
8597 bool
quad_address_p(rtx addr,machine_mode mode,bool strict)8598 quad_address_p (rtx addr, machine_mode mode, bool strict)
8599 {
8600   rtx op0, op1;
8601 
8602   if (GET_MODE_SIZE (mode) != 16)
8603     return false;
8604 
8605   if (legitimate_indirect_address_p (addr, strict))
8606     return true;
8607 
8608   if (VECTOR_MODE_P (mode) && !mode_supports_vsx_dform_quad (mode))
8609     return false;
8610 
8611   if (GET_CODE (addr) != PLUS)
8612     return false;
8613 
8614   op0 = XEXP (addr, 0);
8615   if (!REG_P (op0) || !INT_REG_OK_FOR_BASE_P (op0, strict))
8616     return false;
8617 
8618   op1 = XEXP (addr, 1);
8619   if (!CONST_INT_P (op1))
8620     return false;
8621 
8622   return quad_address_offset_p (INTVAL (op1));
8623 }
8624 
8625 /* Return true if this is a load or store quad operation.  This function does
8626    not handle the atomic quad memory instructions.  */
8627 
8628 bool
quad_load_store_p(rtx op0,rtx op1)8629 quad_load_store_p (rtx op0, rtx op1)
8630 {
8631   bool ret;
8632 
8633   if (!TARGET_QUAD_MEMORY)
8634     ret = false;
8635 
8636   else if (REG_P (op0) && MEM_P (op1))
8637     ret = (quad_int_reg_operand (op0, GET_MODE (op0))
8638 	   && quad_memory_operand (op1, GET_MODE (op1))
8639 	   && !reg_overlap_mentioned_p (op0, op1));
8640 
8641   else if (MEM_P (op0) && REG_P (op1))
8642     ret = (quad_memory_operand (op0, GET_MODE (op0))
8643 	   && quad_int_reg_operand (op1, GET_MODE (op1)));
8644 
8645   else
8646     ret = false;
8647 
8648   if (TARGET_DEBUG_ADDR)
8649     {
8650       fprintf (stderr, "\n========== quad_load_store, return %s\n",
8651 	       ret ? "true" : "false");
8652       debug_rtx (gen_rtx_SET (op0, op1));
8653     }
8654 
8655   return ret;
8656 }
8657 
8658 /* Given an address, return a constant offset term if one exists.  */
8659 
8660 static rtx
address_offset(rtx op)8661 address_offset (rtx op)
8662 {
8663   if (GET_CODE (op) == PRE_INC
8664       || GET_CODE (op) == PRE_DEC)
8665     op = XEXP (op, 0);
8666   else if (GET_CODE (op) == PRE_MODIFY
8667 	   || GET_CODE (op) == LO_SUM)
8668     op = XEXP (op, 1);
8669 
8670   if (GET_CODE (op) == CONST)
8671     op = XEXP (op, 0);
8672 
8673   if (GET_CODE (op) == PLUS)
8674     op = XEXP (op, 1);
8675 
8676   if (CONST_INT_P (op))
8677     return op;
8678 
8679   return NULL_RTX;
8680 }
8681 
8682 /* Return true if the MEM operand is a memory operand suitable for use
8683    with a (full width, possibly multiple) gpr load/store.  On
8684    powerpc64 this means the offset must be divisible by 4.
8685    Implements 'Y' constraint.
8686 
8687    Accept direct, indexed, offset, lo_sum and tocref.  Since this is
8688    a constraint function we know the operand has satisfied a suitable
8689    memory predicate.  Also accept some odd rtl generated by reload
8690    (see rs6000_legitimize_reload_address for various forms).  It is
8691    important that reload rtl be accepted by appropriate constraints
8692    but not by the operand predicate.
8693 
8694    Offsetting a lo_sum should not be allowed, except where we know by
8695    alignment that a 32k boundary is not crossed, but see the ???
8696    comment in rs6000_legitimize_reload_address.  Note that by
8697    "offsetting" here we mean a further offset to access parts of the
8698    MEM.  It's fine to have a lo_sum where the inner address is offset
8699    from a sym, since the same sym+offset will appear in the high part
8700    of the address calculation.  */
8701 
8702 bool
mem_operand_gpr(rtx op,machine_mode mode)8703 mem_operand_gpr (rtx op, machine_mode mode)
8704 {
8705   unsigned HOST_WIDE_INT offset;
8706   int extra;
8707   rtx addr = XEXP (op, 0);
8708 
8709   op = address_offset (addr);
8710   if (op == NULL_RTX)
8711     return true;
8712 
8713   offset = INTVAL (op);
8714   if (TARGET_POWERPC64 && (offset & 3) != 0)
8715     return false;
8716 
8717   extra = GET_MODE_SIZE (mode) - UNITS_PER_WORD;
8718   if (extra < 0)
8719     extra = 0;
8720 
8721   if (GET_CODE (addr) == LO_SUM)
8722     /* For lo_sum addresses, we must allow any offset except one that
8723        causes a wrap, so test only the low 16 bits.  */
8724     offset = ((offset & 0xffff) ^ 0x8000) - 0x8000;
8725 
8726   return offset + 0x8000 < 0x10000u - extra;
8727 }
8728 
8729 /* As above, but for DS-FORM VSX insns.  Unlike mem_operand_gpr,
8730    enforce an offset divisible by 4 even for 32-bit.  */
8731 
8732 bool
mem_operand_ds_form(rtx op,machine_mode mode)8733 mem_operand_ds_form (rtx op, machine_mode mode)
8734 {
8735   unsigned HOST_WIDE_INT offset;
8736   int extra;
8737   rtx addr = XEXP (op, 0);
8738 
8739   if (!offsettable_address_p (false, mode, addr))
8740     return false;
8741 
8742   op = address_offset (addr);
8743   if (op == NULL_RTX)
8744     return true;
8745 
8746   offset = INTVAL (op);
8747   if ((offset & 3) != 0)
8748     return false;
8749 
8750   extra = GET_MODE_SIZE (mode) - UNITS_PER_WORD;
8751   if (extra < 0)
8752     extra = 0;
8753 
8754   if (GET_CODE (addr) == LO_SUM)
8755     /* For lo_sum addresses, we must allow any offset except one that
8756        causes a wrap, so test only the low 16 bits.  */
8757     offset = ((offset & 0xffff) ^ 0x8000) - 0x8000;
8758 
8759   return offset + 0x8000 < 0x10000u - extra;
8760 }
8761 
8762 /* Subroutines of rs6000_legitimize_address and rs6000_legitimate_address_p.  */
8763 
8764 static bool
reg_offset_addressing_ok_p(machine_mode mode)8765 reg_offset_addressing_ok_p (machine_mode mode)
8766 {
8767   switch (mode)
8768     {
8769     case E_V16QImode:
8770     case E_V8HImode:
8771     case E_V4SFmode:
8772     case E_V4SImode:
8773     case E_V2DFmode:
8774     case E_V2DImode:
8775     case E_V1TImode:
8776     case E_TImode:
8777     case E_TFmode:
8778     case E_KFmode:
8779       /* AltiVec/VSX vector modes.  Only reg+reg addressing was valid until the
8780 	 ISA 3.0 vector d-form addressing mode was added.  While TImode is not
8781 	 a vector mode, if we want to use the VSX registers to move it around,
8782 	 we need to restrict ourselves to reg+reg addressing.  Similarly for
8783 	 IEEE 128-bit floating point that is passed in a single vector
8784 	 register.  */
8785       if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode))
8786 	return mode_supports_vsx_dform_quad (mode);
8787       break;
8788 
8789     case E_V4HImode:
8790     case E_V2SImode:
8791     case E_V1DImode:
8792     case E_V2SFmode:
8793        /* Paired vector modes.  Only reg+reg addressing is valid.  */
8794       if (TARGET_PAIRED_FLOAT)
8795         return false;
8796       break;
8797 
8798     case E_SDmode:
8799       /* If we can do direct load/stores of SDmode, restrict it to reg+reg
8800 	 addressing for the LFIWZX and STFIWX instructions.  */
8801       if (TARGET_NO_SDMODE_STACK)
8802 	return false;
8803       break;
8804 
8805     default:
8806       break;
8807     }
8808 
8809   return true;
8810 }
8811 
8812 static bool
virtual_stack_registers_memory_p(rtx op)8813 virtual_stack_registers_memory_p (rtx op)
8814 {
8815   int regnum;
8816 
8817   if (GET_CODE (op) == REG)
8818     regnum = REGNO (op);
8819 
8820   else if (GET_CODE (op) == PLUS
8821 	   && GET_CODE (XEXP (op, 0)) == REG
8822 	   && GET_CODE (XEXP (op, 1)) == CONST_INT)
8823     regnum = REGNO (XEXP (op, 0));
8824 
8825   else
8826     return false;
8827 
8828   return (regnum >= FIRST_VIRTUAL_REGISTER
8829 	  && regnum <= LAST_VIRTUAL_POINTER_REGISTER);
8830 }
8831 
8832 /* Return true if a MODE sized memory accesses to OP plus OFFSET
8833    is known to not straddle a 32k boundary.  This function is used
8834    to determine whether -mcmodel=medium code can use TOC pointer
8835    relative addressing for OP.  This means the alignment of the TOC
8836    pointer must also be taken into account, and unfortunately that is
8837    only 8 bytes.  */
8838 
8839 #ifndef POWERPC64_TOC_POINTER_ALIGNMENT
8840 #define POWERPC64_TOC_POINTER_ALIGNMENT 8
8841 #endif
8842 
8843 static bool
offsettable_ok_by_alignment(rtx op,HOST_WIDE_INT offset,machine_mode mode)8844 offsettable_ok_by_alignment (rtx op, HOST_WIDE_INT offset,
8845 			     machine_mode mode)
8846 {
8847   tree decl;
8848   unsigned HOST_WIDE_INT dsize, dalign, lsb, mask;
8849 
8850   if (GET_CODE (op) != SYMBOL_REF)
8851     return false;
8852 
8853   /* ISA 3.0 vector d-form addressing is restricted, don't allow
8854      SYMBOL_REF.  */
8855   if (mode_supports_vsx_dform_quad (mode))
8856     return false;
8857 
8858   dsize = GET_MODE_SIZE (mode);
8859   decl = SYMBOL_REF_DECL (op);
8860   if (!decl)
8861     {
8862       if (dsize == 0)
8863 	return false;
8864 
8865       /* -fsection-anchors loses the original SYMBOL_REF_DECL when
8866 	 replacing memory addresses with an anchor plus offset.  We
8867 	 could find the decl by rummaging around in the block->objects
8868 	 VEC for the given offset but that seems like too much work.  */
8869       dalign = BITS_PER_UNIT;
8870       if (SYMBOL_REF_HAS_BLOCK_INFO_P (op)
8871 	  && SYMBOL_REF_ANCHOR_P (op)
8872 	  && SYMBOL_REF_BLOCK (op) != NULL)
8873 	{
8874 	  struct object_block *block = SYMBOL_REF_BLOCK (op);
8875 
8876 	  dalign = block->alignment;
8877 	  offset += SYMBOL_REF_BLOCK_OFFSET (op);
8878 	}
8879       else if (CONSTANT_POOL_ADDRESS_P (op))
8880 	{
8881 	  /* It would be nice to have get_pool_align()..  */
8882 	  machine_mode cmode = get_pool_mode (op);
8883 
8884 	  dalign = GET_MODE_ALIGNMENT (cmode);
8885 	}
8886     }
8887   else if (DECL_P (decl))
8888     {
8889       dalign = DECL_ALIGN (decl);
8890 
8891       if (dsize == 0)
8892 	{
8893 	  /* Allow BLKmode when the entire object is known to not
8894 	     cross a 32k boundary.  */
8895 	  if (!DECL_SIZE_UNIT (decl))
8896 	    return false;
8897 
8898 	  if (!tree_fits_uhwi_p (DECL_SIZE_UNIT (decl)))
8899 	    return false;
8900 
8901 	  dsize = tree_to_uhwi (DECL_SIZE_UNIT (decl));
8902 	  if (dsize > 32768)
8903 	    return false;
8904 
8905 	  dalign /= BITS_PER_UNIT;
8906 	  if (dalign > POWERPC64_TOC_POINTER_ALIGNMENT)
8907 	    dalign = POWERPC64_TOC_POINTER_ALIGNMENT;
8908 	  return dalign >= dsize;
8909 	}
8910     }
8911   else
8912     gcc_unreachable ();
8913 
8914   /* Find how many bits of the alignment we know for this access.  */
8915   dalign /= BITS_PER_UNIT;
8916   if (dalign > POWERPC64_TOC_POINTER_ALIGNMENT)
8917     dalign = POWERPC64_TOC_POINTER_ALIGNMENT;
8918   mask = dalign - 1;
8919   lsb = offset & -offset;
8920   mask &= lsb - 1;
8921   dalign = mask + 1;
8922 
8923   return dalign >= dsize;
8924 }
8925 
8926 static bool
constant_pool_expr_p(rtx op)8927 constant_pool_expr_p (rtx op)
8928 {
8929   rtx base, offset;
8930 
8931   split_const (op, &base, &offset);
8932   return (GET_CODE (base) == SYMBOL_REF
8933 	  && CONSTANT_POOL_ADDRESS_P (base)
8934 	  && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (base), Pmode));
8935 }
8936 
8937 static const_rtx tocrel_base, tocrel_offset;
8938 
8939 /* Return true if OP is a toc pointer relative address (the output
8940    of create_TOC_reference).  If STRICT, do not match non-split
8941    -mcmodel=large/medium toc pointer relative addresses.  */
8942 
8943 bool
toc_relative_expr_p(const_rtx op,bool strict)8944 toc_relative_expr_p (const_rtx op, bool strict)
8945 {
8946   if (!TARGET_TOC)
8947     return false;
8948 
8949   if (TARGET_CMODEL != CMODEL_SMALL)
8950     {
8951       /* When strict ensure we have everything tidy.  */
8952       if (strict
8953 	  && !(GET_CODE (op) == LO_SUM
8954 	       && REG_P (XEXP (op, 0))
8955 	       && INT_REG_OK_FOR_BASE_P (XEXP (op, 0), strict)))
8956 	return false;
8957 
8958       /* When not strict, allow non-split TOC addresses and also allow
8959 	 (lo_sum (high ..)) TOC addresses created during reload.  */
8960       if (GET_CODE (op) == LO_SUM)
8961 	op = XEXP (op, 1);
8962     }
8963 
8964   tocrel_base = op;
8965   tocrel_offset = const0_rtx;
8966   if (GET_CODE (op) == PLUS && add_cint_operand (XEXP (op, 1), GET_MODE (op)))
8967     {
8968       tocrel_base = XEXP (op, 0);
8969       tocrel_offset = XEXP (op, 1);
8970     }
8971 
8972   return (GET_CODE (tocrel_base) == UNSPEC
8973 	  && XINT (tocrel_base, 1) == UNSPEC_TOCREL);
8974 }
8975 
8976 /* Return true if X is a constant pool address, and also for cmodel=medium
8977    if X is a toc-relative address known to be offsettable within MODE.  */
8978 
8979 bool
legitimate_constant_pool_address_p(const_rtx x,machine_mode mode,bool strict)8980 legitimate_constant_pool_address_p (const_rtx x, machine_mode mode,
8981 				    bool strict)
8982 {
8983   return (toc_relative_expr_p (x, strict)
8984 	  && (TARGET_CMODEL != CMODEL_MEDIUM
8985 	      || constant_pool_expr_p (XVECEXP (tocrel_base, 0, 0))
8986 	      || mode == QImode
8987 	      || offsettable_ok_by_alignment (XVECEXP (tocrel_base, 0, 0),
8988 					      INTVAL (tocrel_offset), mode)));
8989 }
8990 
8991 static bool
legitimate_small_data_p(machine_mode mode,rtx x)8992 legitimate_small_data_p (machine_mode mode, rtx x)
8993 {
8994   return (DEFAULT_ABI == ABI_V4
8995 	  && !flag_pic && !TARGET_TOC
8996 	  && (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == CONST)
8997 	  && small_data_operand (x, mode));
8998 }
8999 
9000 /* SPE offset addressing is limited to 5-bits worth of double words.  */
9001 #define SPE_CONST_OFFSET_OK(x) (((x) & ~0xf8) == 0)
9002 
9003 bool
rs6000_legitimate_offset_address_p(machine_mode mode,rtx x,bool strict,bool worst_case)9004 rs6000_legitimate_offset_address_p (machine_mode mode, rtx x,
9005 				    bool strict, bool worst_case)
9006 {
9007   unsigned HOST_WIDE_INT offset;
9008   unsigned int extra;
9009 
9010   if (GET_CODE (x) != PLUS)
9011     return false;
9012   if (!REG_P (XEXP (x, 0)))
9013     return false;
9014   if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict))
9015     return false;
9016   if (mode_supports_vsx_dform_quad (mode))
9017     return quad_address_p (x, mode, strict);
9018   if (!reg_offset_addressing_ok_p (mode))
9019     return virtual_stack_registers_memory_p (x);
9020   if (legitimate_constant_pool_address_p (x, mode, strict || lra_in_progress))
9021     return true;
9022   if (GET_CODE (XEXP (x, 1)) != CONST_INT)
9023     return false;
9024 
9025   offset = INTVAL (XEXP (x, 1));
9026   extra = 0;
9027   switch (mode)
9028     {
9029     case E_V4HImode:
9030     case E_V2SImode:
9031     case E_V1DImode:
9032     case E_V2SFmode:
9033       /* SPE vector modes.  */
9034       return SPE_CONST_OFFSET_OK (offset);
9035 
9036     case E_DFmode:
9037     case E_DDmode:
9038     case E_DImode:
9039       /* On e500v2, we may have:
9040 
9041 	   (subreg:DF (mem:DI (plus (reg) (const_int))) 0).
9042 
9043          Which gets addressed with evldd instructions.  */
9044       if (TARGET_E500_DOUBLE)
9045 	return SPE_CONST_OFFSET_OK (offset);
9046 
9047       /* If we are using VSX scalar loads, restrict ourselves to reg+reg
9048 	 addressing.  */
9049       if (VECTOR_MEM_VSX_P (mode))
9050 	return false;
9051 
9052       if (!worst_case)
9053 	break;
9054       if (!TARGET_POWERPC64)
9055 	extra = 4;
9056       else if (offset & 3)
9057 	return false;
9058       break;
9059 
9060     case E_TFmode:
9061     case E_IFmode:
9062     case E_KFmode:
9063     case E_TDmode:
9064     case E_TImode:
9065     case E_PTImode:
9066       if (TARGET_E500_DOUBLE)
9067 	return (SPE_CONST_OFFSET_OK (offset)
9068 		&& SPE_CONST_OFFSET_OK (offset + 8));
9069 
9070       extra = 8;
9071       if (!worst_case)
9072 	break;
9073       if (!TARGET_POWERPC64)
9074 	extra = 12;
9075       else if (offset & 3)
9076 	return false;
9077       break;
9078 
9079     default:
9080       break;
9081     }
9082 
9083   offset += 0x8000;
9084   return offset < 0x10000 - extra;
9085 }
9086 
9087 bool
legitimate_indexed_address_p(rtx x,int strict)9088 legitimate_indexed_address_p (rtx x, int strict)
9089 {
9090   rtx op0, op1;
9091 
9092   if (GET_CODE (x) != PLUS)
9093     return false;
9094 
9095   op0 = XEXP (x, 0);
9096   op1 = XEXP (x, 1);
9097 
9098   /* Recognize the rtl generated by reload which we know will later be
9099      replaced with proper base and index regs.  */
9100   if (!strict
9101       && reload_in_progress
9102       && (REG_P (op0) || GET_CODE (op0) == PLUS)
9103       && REG_P (op1))
9104     return true;
9105 
9106   return (REG_P (op0) && REG_P (op1)
9107 	  && ((INT_REG_OK_FOR_BASE_P (op0, strict)
9108 	       && INT_REG_OK_FOR_INDEX_P (op1, strict))
9109 	      || (INT_REG_OK_FOR_BASE_P (op1, strict)
9110 		  && INT_REG_OK_FOR_INDEX_P (op0, strict))));
9111 }
9112 
9113 bool
avoiding_indexed_address_p(machine_mode mode)9114 avoiding_indexed_address_p (machine_mode mode)
9115 {
9116   /* Avoid indexed addressing for modes that have non-indexed
9117      load/store instruction forms.  */
9118   return (TARGET_AVOID_XFORM && VECTOR_MEM_NONE_P (mode));
9119 }
9120 
9121 bool
legitimate_indirect_address_p(rtx x,int strict)9122 legitimate_indirect_address_p (rtx x, int strict)
9123 {
9124   return GET_CODE (x) == REG && INT_REG_OK_FOR_BASE_P (x, strict);
9125 }
9126 
9127 bool
macho_lo_sum_memory_operand(rtx x,machine_mode mode)9128 macho_lo_sum_memory_operand (rtx x, machine_mode mode)
9129 {
9130   if (!TARGET_MACHO || !flag_pic
9131       || mode != SImode || GET_CODE (x) != MEM)
9132     return false;
9133   x = XEXP (x, 0);
9134 
9135   if (GET_CODE (x) != LO_SUM)
9136     return false;
9137   if (GET_CODE (XEXP (x, 0)) != REG)
9138     return false;
9139   if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), 0))
9140     return false;
9141   x = XEXP (x, 1);
9142 
9143   return CONSTANT_P (x);
9144 }
9145 
9146 static bool
legitimate_lo_sum_address_p(machine_mode mode,rtx x,int strict)9147 legitimate_lo_sum_address_p (machine_mode mode, rtx x, int strict)
9148 {
9149   if (GET_CODE (x) != LO_SUM)
9150     return false;
9151   if (GET_CODE (XEXP (x, 0)) != REG)
9152     return false;
9153   if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict))
9154     return false;
9155   /* quad word addresses are restricted, and we can't use LO_SUM.  */
9156   if (mode_supports_vsx_dform_quad (mode))
9157     return false;
9158   /* Restrict addressing for DI because of our SUBREG hackery.  */
9159   if (TARGET_E500_DOUBLE && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
9160     return false;
9161   x = XEXP (x, 1);
9162 
9163   if (TARGET_ELF || TARGET_MACHO)
9164     {
9165       bool large_toc_ok;
9166 
9167       if (DEFAULT_ABI == ABI_V4 && flag_pic)
9168 	return false;
9169       /* LRA doesn't use LEGITIMIZE_RELOAD_ADDRESS as it usually calls
9170 	 push_reload from reload pass code.  LEGITIMIZE_RELOAD_ADDRESS
9171 	 recognizes some LO_SUM addresses as valid although this
9172 	 function says opposite.  In most cases, LRA through different
9173 	 transformations can generate correct code for address reloads.
9174 	 It can not manage only some LO_SUM cases.  So we need to add
9175 	 code analogous to one in rs6000_legitimize_reload_address for
9176 	 LOW_SUM here saying that some addresses are still valid.  */
9177       large_toc_ok = (lra_in_progress && TARGET_CMODEL != CMODEL_SMALL
9178 		      && small_toc_ref (x, VOIDmode));
9179       if (TARGET_TOC && ! large_toc_ok)
9180 	return false;
9181       if (GET_MODE_NUNITS (mode) != 1)
9182 	return false;
9183       if (GET_MODE_SIZE (mode) > UNITS_PER_WORD
9184 	  && !(/* ??? Assume floating point reg based on mode?  */
9185 	       TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
9186 	       && (mode == DFmode || mode == DDmode)))
9187 	return false;
9188 
9189       return CONSTANT_P (x) || large_toc_ok;
9190     }
9191 
9192   return false;
9193 }
9194 
9195 
9196 /* Try machine-dependent ways of modifying an illegitimate address
9197    to be legitimate.  If we find one, return the new, valid address.
9198    This is used from only one place: `memory_address' in explow.c.
9199 
9200    OLDX is the address as it was before break_out_memory_refs was
9201    called.  In some cases it is useful to look at this to decide what
9202    needs to be done.
9203 
9204    It is always safe for this function to do nothing.  It exists to
9205    recognize opportunities to optimize the output.
9206 
9207    On RS/6000, first check for the sum of a register with a constant
9208    integer that is out of range.  If so, generate code to add the
9209    constant with the low-order 16 bits masked to the register and force
9210    this result into another register (this can be done with `cau').
9211    Then generate an address of REG+(CONST&0xffff), allowing for the
9212    possibility of bit 16 being a one.
9213 
9214    Then check for the sum of a register and something not constant, try to
9215    load the other things into a register and return the sum.  */
9216 
9217 static rtx
rs6000_legitimize_address(rtx x,rtx oldx ATTRIBUTE_UNUSED,machine_mode mode)9218 rs6000_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
9219 			   machine_mode mode)
9220 {
9221   unsigned int extra;
9222 
9223   if (!reg_offset_addressing_ok_p (mode)
9224       || mode_supports_vsx_dform_quad (mode))
9225     {
9226       if (virtual_stack_registers_memory_p (x))
9227 	return x;
9228 
9229       /* In theory we should not be seeing addresses of the form reg+0,
9230 	 but just in case it is generated, optimize it away.  */
9231       if (GET_CODE (x) == PLUS && XEXP (x, 1) == const0_rtx)
9232 	return force_reg (Pmode, XEXP (x, 0));
9233 
9234       /* For TImode with load/store quad, restrict addresses to just a single
9235 	 pointer, so it works with both GPRs and VSX registers.  */
9236       /* Make sure both operands are registers.  */
9237       else if (GET_CODE (x) == PLUS
9238 	       && (mode != TImode || !TARGET_VSX_TIMODE))
9239 	return gen_rtx_PLUS (Pmode,
9240 			     force_reg (Pmode, XEXP (x, 0)),
9241 			     force_reg (Pmode, XEXP (x, 1)));
9242       else
9243 	return force_reg (Pmode, x);
9244     }
9245   if (GET_CODE (x) == SYMBOL_REF)
9246     {
9247       enum tls_model model = SYMBOL_REF_TLS_MODEL (x);
9248       if (model != 0)
9249 	return rs6000_legitimize_tls_address (x, model);
9250     }
9251 
9252   extra = 0;
9253   switch (mode)
9254     {
9255     case E_TFmode:
9256     case E_TDmode:
9257     case E_TImode:
9258     case E_PTImode:
9259     case E_IFmode:
9260     case E_KFmode:
9261       /* As in legitimate_offset_address_p we do not assume
9262 	 worst-case.  The mode here is just a hint as to the registers
9263 	 used.  A TImode is usually in gprs, but may actually be in
9264 	 fprs.  Leave worst-case scenario for reload to handle via
9265 	 insn constraints.  PTImode is only GPRs.  */
9266       extra = 8;
9267       break;
9268     default:
9269       break;
9270     }
9271 
9272   if (GET_CODE (x) == PLUS
9273       && GET_CODE (XEXP (x, 0)) == REG
9274       && GET_CODE (XEXP (x, 1)) == CONST_INT
9275       && ((unsigned HOST_WIDE_INT) (INTVAL (XEXP (x, 1)) + 0x8000)
9276 	  >= 0x10000 - extra)
9277       && !(SPE_VECTOR_MODE (mode)
9278 	   || (TARGET_E500_DOUBLE && GET_MODE_SIZE (mode) > UNITS_PER_WORD)))
9279     {
9280       HOST_WIDE_INT high_int, low_int;
9281       rtx sum;
9282       low_int = ((INTVAL (XEXP (x, 1)) & 0xffff) ^ 0x8000) - 0x8000;
9283       if (low_int >= 0x8000 - extra)
9284 	low_int = 0;
9285       high_int = INTVAL (XEXP (x, 1)) - low_int;
9286       sum = force_operand (gen_rtx_PLUS (Pmode, XEXP (x, 0),
9287 					 GEN_INT (high_int)), 0);
9288       return plus_constant (Pmode, sum, low_int);
9289     }
9290   else if (GET_CODE (x) == PLUS
9291 	   && GET_CODE (XEXP (x, 0)) == REG
9292 	   && GET_CODE (XEXP (x, 1)) != CONST_INT
9293 	   && GET_MODE_NUNITS (mode) == 1
9294 	   && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
9295 	       || (/* ??? Assume floating point reg based on mode?  */
9296 		   (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)
9297 		   && (mode == DFmode || mode == DDmode)))
9298 	   && !avoiding_indexed_address_p (mode))
9299     {
9300       return gen_rtx_PLUS (Pmode, XEXP (x, 0),
9301 			   force_reg (Pmode, force_operand (XEXP (x, 1), 0)));
9302     }
9303   else if (SPE_VECTOR_MODE (mode)
9304 	   || (TARGET_E500_DOUBLE && GET_MODE_SIZE (mode) > UNITS_PER_WORD))
9305     {
9306       if (mode == DImode)
9307 	return x;
9308       /* We accept [reg + reg] and [reg + OFFSET].  */
9309 
9310       if (GET_CODE (x) == PLUS)
9311        {
9312          rtx op1 = XEXP (x, 0);
9313          rtx op2 = XEXP (x, 1);
9314          rtx y;
9315 
9316          op1 = force_reg (Pmode, op1);
9317 
9318          if (GET_CODE (op2) != REG
9319              && (GET_CODE (op2) != CONST_INT
9320                  || !SPE_CONST_OFFSET_OK (INTVAL (op2))
9321                  || (GET_MODE_SIZE (mode) > 8
9322                      && !SPE_CONST_OFFSET_OK (INTVAL (op2) + 8))))
9323            op2 = force_reg (Pmode, op2);
9324 
9325          /* We can't always do [reg + reg] for these, because [reg +
9326             reg + offset] is not a legitimate addressing mode.  */
9327          y = gen_rtx_PLUS (Pmode, op1, op2);
9328 
9329          if ((GET_MODE_SIZE (mode) > 8 || mode == DDmode) && REG_P (op2))
9330            return force_reg (Pmode, y);
9331          else
9332            return y;
9333        }
9334 
9335       return force_reg (Pmode, x);
9336     }
9337   else if ((TARGET_ELF
9338 #if TARGET_MACHO
9339 	    || !MACHO_DYNAMIC_NO_PIC_P
9340 #endif
9341 	    )
9342 	   && TARGET_32BIT
9343 	   && TARGET_NO_TOC
9344 	   && ! flag_pic
9345 	   && GET_CODE (x) != CONST_INT
9346 	   && GET_CODE (x) != CONST_WIDE_INT
9347 	   && GET_CODE (x) != CONST_DOUBLE
9348 	   && CONSTANT_P (x)
9349 	   && GET_MODE_NUNITS (mode) == 1
9350 	   && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
9351 	       || (/* ??? Assume floating point reg based on mode?  */
9352 		   (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)
9353 		   && (mode == DFmode || mode == DDmode))))
9354     {
9355       rtx reg = gen_reg_rtx (Pmode);
9356       if (TARGET_ELF)
9357 	emit_insn (gen_elf_high (reg, x));
9358       else
9359 	emit_insn (gen_macho_high (reg, x));
9360       return gen_rtx_LO_SUM (Pmode, reg, x);
9361     }
9362   else if (TARGET_TOC
9363 	   && GET_CODE (x) == SYMBOL_REF
9364 	   && constant_pool_expr_p (x)
9365 	   && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (x), Pmode))
9366     return create_TOC_reference (x, NULL_RTX);
9367   else
9368     return x;
9369 }
9370 
9371 /* Debug version of rs6000_legitimize_address.  */
9372 static rtx
rs6000_debug_legitimize_address(rtx x,rtx oldx,machine_mode mode)9373 rs6000_debug_legitimize_address (rtx x, rtx oldx, machine_mode mode)
9374 {
9375   rtx ret;
9376   rtx_insn *insns;
9377 
9378   start_sequence ();
9379   ret = rs6000_legitimize_address (x, oldx, mode);
9380   insns = get_insns ();
9381   end_sequence ();
9382 
9383   if (ret != x)
9384     {
9385       fprintf (stderr,
9386 	       "\nrs6000_legitimize_address: mode %s, old code %s, "
9387 	       "new code %s, modified\n",
9388 	       GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x)),
9389 	       GET_RTX_NAME (GET_CODE (ret)));
9390 
9391       fprintf (stderr, "Original address:\n");
9392       debug_rtx (x);
9393 
9394       fprintf (stderr, "oldx:\n");
9395       debug_rtx (oldx);
9396 
9397       fprintf (stderr, "New address:\n");
9398       debug_rtx (ret);
9399 
9400       if (insns)
9401 	{
9402 	  fprintf (stderr, "Insns added:\n");
9403 	  debug_rtx_list (insns, 20);
9404 	}
9405     }
9406   else
9407     {
9408       fprintf (stderr,
9409 	       "\nrs6000_legitimize_address: mode %s, code %s, no change:\n",
9410 	       GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x)));
9411 
9412       debug_rtx (x);
9413     }
9414 
9415   if (insns)
9416     emit_insn (insns);
9417 
9418   return ret;
9419 }
9420 
9421 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
9422    We need to emit DTP-relative relocations.  */
9423 
9424 static void rs6000_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
9425 static void
rs6000_output_dwarf_dtprel(FILE * file,int size,rtx x)9426 rs6000_output_dwarf_dtprel (FILE *file, int size, rtx x)
9427 {
9428   switch (size)
9429     {
9430     case 4:
9431       fputs ("\t.long\t", file);
9432       break;
9433     case 8:
9434       fputs (DOUBLE_INT_ASM_OP, file);
9435       break;
9436     default:
9437       gcc_unreachable ();
9438     }
9439   output_addr_const (file, x);
9440   if (TARGET_ELF)
9441     fputs ("@dtprel+0x8000", file);
9442   else if (TARGET_XCOFF && GET_CODE (x) == SYMBOL_REF)
9443     {
9444       switch (SYMBOL_REF_TLS_MODEL (x))
9445 	{
9446 	case 0:
9447 	  break;
9448 	case TLS_MODEL_LOCAL_EXEC:
9449 	  fputs ("@le", file);
9450 	  break;
9451 	case TLS_MODEL_INITIAL_EXEC:
9452 	  fputs ("@ie", file);
9453 	  break;
9454 	case TLS_MODEL_GLOBAL_DYNAMIC:
9455 	case TLS_MODEL_LOCAL_DYNAMIC:
9456 	  fputs ("@m", file);
9457 	  break;
9458 	default:
9459 	  gcc_unreachable ();
9460 	}
9461     }
9462 }
9463 
9464 /* Return true if X is a symbol that refers to real (rather than emulated)
9465    TLS.  */
9466 
9467 static bool
rs6000_real_tls_symbol_ref_p(rtx x)9468 rs6000_real_tls_symbol_ref_p (rtx x)
9469 {
9470   return (GET_CODE (x) == SYMBOL_REF
9471 	  && SYMBOL_REF_TLS_MODEL (x) >= TLS_MODEL_REAL);
9472 }
9473 
9474 /* In the name of slightly smaller debug output, and to cater to
9475    general assembler lossage, recognize various UNSPEC sequences
9476    and turn them back into a direct symbol reference.  */
9477 
9478 static rtx
rs6000_delegitimize_address(rtx orig_x)9479 rs6000_delegitimize_address (rtx orig_x)
9480 {
9481   rtx x, y, offset;
9482 
9483   orig_x = delegitimize_mem_from_attrs (orig_x);
9484   x = orig_x;
9485   if (MEM_P (x))
9486     x = XEXP (x, 0);
9487 
9488   y = x;
9489   if (TARGET_CMODEL != CMODEL_SMALL
9490       && GET_CODE (y) == LO_SUM)
9491     y = XEXP (y, 1);
9492 
9493   offset = NULL_RTX;
9494   if (GET_CODE (y) == PLUS
9495       && GET_MODE (y) == Pmode
9496       && CONST_INT_P (XEXP (y, 1)))
9497     {
9498       offset = XEXP (y, 1);
9499       y = XEXP (y, 0);
9500     }
9501 
9502   if (GET_CODE (y) == UNSPEC
9503       && XINT (y, 1) == UNSPEC_TOCREL)
9504     {
9505       y = XVECEXP (y, 0, 0);
9506 
9507 #ifdef HAVE_AS_TLS
9508       /* Do not associate thread-local symbols with the original
9509 	 constant pool symbol.  */
9510       if (TARGET_XCOFF
9511 	  && GET_CODE (y) == SYMBOL_REF
9512 	  && CONSTANT_POOL_ADDRESS_P (y)
9513 	  && rs6000_real_tls_symbol_ref_p (get_pool_constant (y)))
9514 	return orig_x;
9515 #endif
9516 
9517       if (offset != NULL_RTX)
9518 	y = gen_rtx_PLUS (Pmode, y, offset);
9519       if (!MEM_P (orig_x))
9520 	return y;
9521       else
9522 	return replace_equiv_address_nv (orig_x, y);
9523     }
9524 
9525   if (TARGET_MACHO
9526       && GET_CODE (orig_x) == LO_SUM
9527       && GET_CODE (XEXP (orig_x, 1)) == CONST)
9528     {
9529       y = XEXP (XEXP (orig_x, 1), 0);
9530       if (GET_CODE (y) == UNSPEC
9531 	  && XINT (y, 1) == UNSPEC_MACHOPIC_OFFSET)
9532 	return XVECEXP (y, 0, 0);
9533     }
9534 
9535   return orig_x;
9536 }
9537 
9538 /* Return true if X shouldn't be emitted into the debug info.
9539    The linker doesn't like .toc section references from
9540    .debug_* sections, so reject .toc section symbols.  */
9541 
9542 static bool
rs6000_const_not_ok_for_debug_p(rtx x)9543 rs6000_const_not_ok_for_debug_p (rtx x)
9544 {
9545   if (GET_CODE (x) == UNSPEC)
9546     return true;
9547   if (GET_CODE (x) == SYMBOL_REF
9548       && CONSTANT_POOL_ADDRESS_P (x))
9549     {
9550       rtx c = get_pool_constant (x);
9551       machine_mode cmode = get_pool_mode (x);
9552       if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (c, cmode))
9553 	return true;
9554     }
9555 
9556   return false;
9557 }
9558 
9559 
9560 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook.  */
9561 
9562 static bool
rs6000_legitimate_combined_insn(rtx_insn * insn)9563 rs6000_legitimate_combined_insn (rtx_insn *insn)
9564 {
9565   int icode = INSN_CODE (insn);
9566 
9567   /* Reject creating doloop insns.  Combine should not be allowed
9568      to create these for a number of reasons:
9569      1) In a nested loop, if combine creates one of these in an
9570      outer loop and the register allocator happens to allocate ctr
9571      to the outer loop insn, then the inner loop can't use ctr.
9572      Inner loops ought to be more highly optimized.
9573      2) Combine often wants to create one of these from what was
9574      originally a three insn sequence, first combining the three
9575      insns to two, then to ctrsi/ctrdi.  When ctrsi/ctrdi is not
9576      allocated ctr, the splitter takes use back to the three insn
9577      sequence.  It's better to stop combine at the two insn
9578      sequence.
9579      3) Faced with not being able to allocate ctr for ctrsi/crtdi
9580      insns, the register allocator sometimes uses floating point
9581      or vector registers for the pseudo.  Since ctrsi/ctrdi is a
9582      jump insn and output reloads are not implemented for jumps,
9583      the ctrsi/ctrdi splitters need to handle all possible cases.
9584      That's a pain, and it gets to be seriously difficult when a
9585      splitter that runs after reload needs memory to transfer from
9586      a gpr to fpr.  See PR70098 and PR71763 which are not fixed
9587      for the difficult case.  It's better to not create problems
9588      in the first place.  */
9589   if (icode != CODE_FOR_nothing
9590       && (icode == CODE_FOR_ctrsi_internal1
9591 	  || icode == CODE_FOR_ctrdi_internal1
9592 	  || icode == CODE_FOR_ctrsi_internal2
9593 	  || icode == CODE_FOR_ctrdi_internal2
9594 	  || icode == CODE_FOR_ctrsi_internal3
9595 	  || icode == CODE_FOR_ctrdi_internal3
9596 	  || icode == CODE_FOR_ctrsi_internal4
9597 	  || icode == CODE_FOR_ctrdi_internal4))
9598     return false;
9599 
9600   return true;
9601 }
9602 
9603 /* Construct the SYMBOL_REF for the tls_get_addr function.  */
9604 
9605 static GTY(()) rtx rs6000_tls_symbol;
9606 static rtx
rs6000_tls_get_addr(void)9607 rs6000_tls_get_addr (void)
9608 {
9609   if (!rs6000_tls_symbol)
9610     rs6000_tls_symbol = init_one_libfunc ("__tls_get_addr");
9611 
9612   return rs6000_tls_symbol;
9613 }
9614 
9615 /* Construct the SYMBOL_REF for TLS GOT references.  */
9616 
9617 static GTY(()) rtx rs6000_got_symbol;
9618 static rtx
rs6000_got_sym(void)9619 rs6000_got_sym (void)
9620 {
9621   if (!rs6000_got_symbol)
9622     {
9623       rs6000_got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
9624       SYMBOL_REF_FLAGS (rs6000_got_symbol) |= SYMBOL_FLAG_LOCAL;
9625       SYMBOL_REF_FLAGS (rs6000_got_symbol) |= SYMBOL_FLAG_EXTERNAL;
9626     }
9627 
9628   return rs6000_got_symbol;
9629 }
9630 
9631 /* AIX Thread-Local Address support.  */
9632 
9633 static rtx
rs6000_legitimize_tls_address_aix(rtx addr,enum tls_model model)9634 rs6000_legitimize_tls_address_aix (rtx addr, enum tls_model model)
9635 {
9636   rtx sym, mem, tocref, tlsreg, tmpreg, dest, tlsaddr;
9637   const char *name;
9638   char *tlsname;
9639 
9640   name = XSTR (addr, 0);
9641   /* Append TLS CSECT qualifier, unless the symbol already is qualified
9642      or the symbol will be in TLS private data section.  */
9643   if (name[strlen (name) - 1] != ']'
9644       && (TREE_PUBLIC (SYMBOL_REF_DECL (addr))
9645 	  || bss_initializer_p (SYMBOL_REF_DECL (addr))))
9646     {
9647       tlsname = XALLOCAVEC (char, strlen (name) + 4);
9648       strcpy (tlsname, name);
9649       strcat (tlsname,
9650 	      bss_initializer_p (SYMBOL_REF_DECL (addr)) ? "[UL]" : "[TL]");
9651       tlsaddr = copy_rtx (addr);
9652       XSTR (tlsaddr, 0) = ggc_strdup (tlsname);
9653     }
9654   else
9655     tlsaddr = addr;
9656 
9657   /* Place addr into TOC constant pool.  */
9658   sym = force_const_mem (GET_MODE (tlsaddr), tlsaddr);
9659 
9660   /* Output the TOC entry and create the MEM referencing the value.  */
9661   if (constant_pool_expr_p (XEXP (sym, 0))
9662       && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (XEXP (sym, 0)), Pmode))
9663     {
9664       tocref = create_TOC_reference (XEXP (sym, 0), NULL_RTX);
9665       mem = gen_const_mem (Pmode, tocref);
9666       set_mem_alias_set (mem, get_TOC_alias_set ());
9667     }
9668   else
9669     return sym;
9670 
9671   /* Use global-dynamic for local-dynamic.  */
9672   if (model == TLS_MODEL_GLOBAL_DYNAMIC
9673       || model == TLS_MODEL_LOCAL_DYNAMIC)
9674     {
9675       /* Create new TOC reference for @m symbol.  */
9676       name = XSTR (XVECEXP (XEXP (mem, 0), 0, 0), 0);
9677       tlsname = XALLOCAVEC (char, strlen (name) + 1);
9678       strcpy (tlsname, "*LCM");
9679       strcat (tlsname, name + 3);
9680       rtx modaddr = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (tlsname));
9681       SYMBOL_REF_FLAGS (modaddr) |= SYMBOL_FLAG_LOCAL;
9682       tocref = create_TOC_reference (modaddr, NULL_RTX);
9683       rtx modmem = gen_const_mem (Pmode, tocref);
9684       set_mem_alias_set (modmem, get_TOC_alias_set ());
9685 
9686       rtx modreg = gen_reg_rtx (Pmode);
9687       emit_insn (gen_rtx_SET (modreg, modmem));
9688 
9689       tmpreg = gen_reg_rtx (Pmode);
9690       emit_insn (gen_rtx_SET (tmpreg, mem));
9691 
9692       dest = gen_reg_rtx (Pmode);
9693       if (TARGET_32BIT)
9694 	emit_insn (gen_tls_get_addrsi (dest, modreg, tmpreg));
9695       else
9696 	emit_insn (gen_tls_get_addrdi (dest, modreg, tmpreg));
9697       return dest;
9698     }
9699   /* Obtain TLS pointer: 32 bit call or 64 bit GPR 13.  */
9700   else if (TARGET_32BIT)
9701     {
9702       tlsreg = gen_reg_rtx (SImode);
9703       emit_insn (gen_tls_get_tpointer (tlsreg));
9704     }
9705   else
9706     tlsreg = gen_rtx_REG (DImode, 13);
9707 
9708   /* Load the TOC value into temporary register.  */
9709   tmpreg = gen_reg_rtx (Pmode);
9710   emit_insn (gen_rtx_SET (tmpreg, mem));
9711   set_unique_reg_note (get_last_insn (), REG_EQUAL,
9712 		       gen_rtx_MINUS (Pmode, addr, tlsreg));
9713 
9714   /* Add TOC symbol value to TLS pointer.  */
9715   dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tmpreg, tlsreg));
9716 
9717   return dest;
9718 }
9719 
9720 /* ADDR contains a thread-local SYMBOL_REF.  Generate code to compute
9721    this (thread-local) address.  */
9722 
9723 static rtx
rs6000_legitimize_tls_address(rtx addr,enum tls_model model)9724 rs6000_legitimize_tls_address (rtx addr, enum tls_model model)
9725 {
9726   rtx dest, insn;
9727 
9728   if (TARGET_XCOFF)
9729     return rs6000_legitimize_tls_address_aix (addr, model);
9730 
9731   dest = gen_reg_rtx (Pmode);
9732   if (model == TLS_MODEL_LOCAL_EXEC && rs6000_tls_size == 16)
9733     {
9734       rtx tlsreg;
9735 
9736       if (TARGET_64BIT)
9737 	{
9738 	  tlsreg = gen_rtx_REG (Pmode, 13);
9739 	  insn = gen_tls_tprel_64 (dest, tlsreg, addr);
9740 	}
9741       else
9742 	{
9743 	  tlsreg = gen_rtx_REG (Pmode, 2);
9744 	  insn = gen_tls_tprel_32 (dest, tlsreg, addr);
9745 	}
9746       emit_insn (insn);
9747     }
9748   else if (model == TLS_MODEL_LOCAL_EXEC && rs6000_tls_size == 32)
9749     {
9750       rtx tlsreg, tmp;
9751 
9752       tmp = gen_reg_rtx (Pmode);
9753       if (TARGET_64BIT)
9754 	{
9755 	  tlsreg = gen_rtx_REG (Pmode, 13);
9756 	  insn = gen_tls_tprel_ha_64 (tmp, tlsreg, addr);
9757 	}
9758       else
9759 	{
9760 	  tlsreg = gen_rtx_REG (Pmode, 2);
9761 	  insn = gen_tls_tprel_ha_32 (tmp, tlsreg, addr);
9762 	}
9763       emit_insn (insn);
9764       if (TARGET_64BIT)
9765 	insn = gen_tls_tprel_lo_64 (dest, tmp, addr);
9766       else
9767 	insn = gen_tls_tprel_lo_32 (dest, tmp, addr);
9768       emit_insn (insn);
9769     }
9770   else
9771     {
9772       rtx r3, got, tga, tmp1, tmp2, call_insn;
9773 
9774       /* We currently use relocations like @got@tlsgd for tls, which
9775 	 means the linker will handle allocation of tls entries, placing
9776 	 them in the .got section.  So use a pointer to the .got section,
9777 	 not one to secondary TOC sections used by 64-bit -mminimal-toc,
9778 	 or to secondary GOT sections used by 32-bit -fPIC.  */
9779       if (TARGET_64BIT)
9780 	got = gen_rtx_REG (Pmode, 2);
9781       else
9782 	{
9783 	  if (flag_pic == 1)
9784 	    got = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
9785 	  else
9786 	    {
9787 	      rtx gsym = rs6000_got_sym ();
9788 	      got = gen_reg_rtx (Pmode);
9789 	      if (flag_pic == 0)
9790 		rs6000_emit_move (got, gsym, Pmode);
9791 	      else
9792 		{
9793 		  rtx mem, lab;
9794 
9795 		  tmp1 = gen_reg_rtx (Pmode);
9796 		  tmp2 = gen_reg_rtx (Pmode);
9797 		  mem = gen_const_mem (Pmode, tmp1);
9798 		  lab = gen_label_rtx ();
9799 		  emit_insn (gen_load_toc_v4_PIC_1b (gsym, lab));
9800 		  emit_move_insn (tmp1, gen_rtx_REG (Pmode, LR_REGNO));
9801 		  if (TARGET_LINK_STACK)
9802 		    emit_insn (gen_addsi3 (tmp1, tmp1, GEN_INT (4)));
9803 		  emit_move_insn (tmp2, mem);
9804 		  rtx_insn *last = emit_insn (gen_addsi3 (got, tmp1, tmp2));
9805 		  set_unique_reg_note (last, REG_EQUAL, gsym);
9806 		}
9807 	    }
9808 	}
9809 
9810       if (model == TLS_MODEL_GLOBAL_DYNAMIC)
9811 	{
9812 	  tga = rs6000_tls_get_addr ();
9813 	  emit_library_call_value (tga, dest, LCT_CONST, Pmode,
9814 				   const0_rtx, Pmode);
9815 
9816 	  r3 = gen_rtx_REG (Pmode, 3);
9817 	  if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
9818 	    {
9819 	      if (TARGET_64BIT)
9820 		insn = gen_tls_gd_aix64 (r3, got, addr, tga, const0_rtx);
9821 	      else
9822 		insn = gen_tls_gd_aix32 (r3, got, addr, tga, const0_rtx);
9823 	    }
9824 	  else if (DEFAULT_ABI == ABI_V4)
9825 	    insn = gen_tls_gd_sysvsi (r3, got, addr, tga, const0_rtx);
9826 	  else
9827 	    gcc_unreachable ();
9828 	  call_insn = last_call_insn ();
9829 	  PATTERN (call_insn) = insn;
9830 	  if (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic)
9831 	    use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn),
9832 		     pic_offset_table_rtx);
9833 	}
9834       else if (model == TLS_MODEL_LOCAL_DYNAMIC)
9835 	{
9836 	  tga = rs6000_tls_get_addr ();
9837 	  tmp1 = gen_reg_rtx (Pmode);
9838 	  emit_library_call_value (tga, tmp1, LCT_CONST, Pmode,
9839 				   const0_rtx, Pmode);
9840 
9841 	  r3 = gen_rtx_REG (Pmode, 3);
9842 	  if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
9843 	    {
9844 	      if (TARGET_64BIT)
9845 		insn = gen_tls_ld_aix64 (r3, got, tga, const0_rtx);
9846 	      else
9847 		insn = gen_tls_ld_aix32 (r3, got, tga, const0_rtx);
9848 	    }
9849 	  else if (DEFAULT_ABI == ABI_V4)
9850 	    insn = gen_tls_ld_sysvsi (r3, got, tga, const0_rtx);
9851 	  else
9852 	    gcc_unreachable ();
9853 	  call_insn = last_call_insn ();
9854 	  PATTERN (call_insn) = insn;
9855 	  if (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic)
9856 	    use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn),
9857 		     pic_offset_table_rtx);
9858 
9859 	  if (rs6000_tls_size == 16)
9860 	    {
9861 	      if (TARGET_64BIT)
9862 		insn = gen_tls_dtprel_64 (dest, tmp1, addr);
9863 	      else
9864 		insn = gen_tls_dtprel_32 (dest, tmp1, addr);
9865 	    }
9866 	  else if (rs6000_tls_size == 32)
9867 	    {
9868 	      tmp2 = gen_reg_rtx (Pmode);
9869 	      if (TARGET_64BIT)
9870 		insn = gen_tls_dtprel_ha_64 (tmp2, tmp1, addr);
9871 	      else
9872 		insn = gen_tls_dtprel_ha_32 (tmp2, tmp1, addr);
9873 	      emit_insn (insn);
9874 	      if (TARGET_64BIT)
9875 		insn = gen_tls_dtprel_lo_64 (dest, tmp2, addr);
9876 	      else
9877 		insn = gen_tls_dtprel_lo_32 (dest, tmp2, addr);
9878 	    }
9879 	  else
9880 	    {
9881 	      tmp2 = gen_reg_rtx (Pmode);
9882 	      if (TARGET_64BIT)
9883 		insn = gen_tls_got_dtprel_64 (tmp2, got, addr);
9884 	      else
9885 		insn = gen_tls_got_dtprel_32 (tmp2, got, addr);
9886 	      emit_insn (insn);
9887 	      insn = gen_rtx_SET (dest, gen_rtx_PLUS (Pmode, tmp2, tmp1));
9888 	    }
9889 	  emit_insn (insn);
9890 	}
9891       else
9892 	{
9893 	  /* IE, or 64-bit offset LE.  */
9894 	  tmp2 = gen_reg_rtx (Pmode);
9895 	  if (TARGET_64BIT)
9896 	    insn = gen_tls_got_tprel_64 (tmp2, got, addr);
9897 	  else
9898 	    insn = gen_tls_got_tprel_32 (tmp2, got, addr);
9899 	  emit_insn (insn);
9900 	  if (TARGET_64BIT)
9901 	    insn = gen_tls_tls_64 (dest, tmp2, addr);
9902 	  else
9903 	    insn = gen_tls_tls_32 (dest, tmp2, addr);
9904 	  emit_insn (insn);
9905 	}
9906     }
9907 
9908   return dest;
9909 }
9910 
9911 /* Only create the global variable for the stack protect guard if we are using
9912    the global flavor of that guard.  */
9913 static tree
rs6000_init_stack_protect_guard(void)9914 rs6000_init_stack_protect_guard (void)
9915 {
9916   if (rs6000_stack_protector_guard == SSP_GLOBAL)
9917     return default_stack_protect_guard ();
9918 
9919   return NULL_TREE;
9920 }
9921 
9922 /* Implement TARGET_CANNOT_FORCE_CONST_MEM.  */
9923 
9924 static bool
rs6000_cannot_force_const_mem(machine_mode mode ATTRIBUTE_UNUSED,rtx x)9925 rs6000_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
9926 {
9927   if (GET_CODE (x) == HIGH
9928       && GET_CODE (XEXP (x, 0)) == UNSPEC)
9929     return true;
9930 
9931   /* A TLS symbol in the TOC cannot contain a sum.  */
9932   if (GET_CODE (x) == CONST
9933       && GET_CODE (XEXP (x, 0)) == PLUS
9934       && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
9935       && SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0)) != 0)
9936     return true;
9937 
9938   /* Do not place an ELF TLS symbol in the constant pool.  */
9939   return TARGET_ELF && tls_referenced_p (x);
9940 }
9941 
9942 /* Return true iff the given SYMBOL_REF refers to a constant pool entry
9943    that we have put in the TOC, or for cmodel=medium, if the SYMBOL_REF
9944    can be addressed relative to the toc pointer.  */
9945 
9946 static bool
use_toc_relative_ref(rtx sym,machine_mode mode)9947 use_toc_relative_ref (rtx sym, machine_mode mode)
9948 {
9949   return ((constant_pool_expr_p (sym)
9950 	   && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (sym),
9951 					       get_pool_mode (sym)))
9952 	  || (TARGET_CMODEL == CMODEL_MEDIUM
9953 	      && SYMBOL_REF_LOCAL_P (sym)
9954 	      && GET_MODE_SIZE (mode) <= POWERPC64_TOC_POINTER_ALIGNMENT));
9955 }
9956 
9957 /* Our implementation of LEGITIMIZE_RELOAD_ADDRESS.  Returns a value to
9958    replace the input X, or the original X if no replacement is called for.
9959    The output parameter *WIN is 1 if the calling macro should goto WIN,
9960    0 if it should not.
9961 
9962    For RS/6000, we wish to handle large displacements off a base
9963    register by splitting the addend across an addiu/addis and the mem insn.
9964    This cuts number of extra insns needed from 3 to 1.
9965 
9966    On Darwin, we use this to generate code for floating point constants.
9967    A movsf_low is generated so we wind up with 2 instructions rather than 3.
9968    The Darwin code is inside #if TARGET_MACHO because only then are the
9969    machopic_* functions defined.  */
9970 static rtx
rs6000_legitimize_reload_address(rtx x,machine_mode mode,int opnum,int type,int ind_levels ATTRIBUTE_UNUSED,int * win)9971 rs6000_legitimize_reload_address (rtx x, machine_mode mode,
9972 				  int opnum, int type,
9973 				  int ind_levels ATTRIBUTE_UNUSED, int *win)
9974 {
9975   bool reg_offset_p = reg_offset_addressing_ok_p (mode);
9976   bool quad_offset_p = mode_supports_vsx_dform_quad (mode);
9977 
9978   /* Nasty hack for vsx_splat_v2df/v2di load from mem, which takes a
9979      DFmode/DImode MEM.  Ditto for ISA 3.0 vsx_splat_v4sf/v4si.  */
9980   if (reg_offset_p
9981       && opnum == 1
9982       && ((mode == DFmode && recog_data.operand_mode[0] == V2DFmode)
9983 	  || (mode == DImode && recog_data.operand_mode[0] == V2DImode)
9984 	  || (mode == SFmode && recog_data.operand_mode[0] == V4SFmode
9985 	      && TARGET_P9_VECTOR)
9986 	  || (mode == SImode && recog_data.operand_mode[0] == V4SImode
9987 	      && TARGET_P9_VECTOR)))
9988     reg_offset_p = false;
9989 
9990   /* We must recognize output that we have already generated ourselves.  */
9991   if (GET_CODE (x) == PLUS
9992       && GET_CODE (XEXP (x, 0)) == PLUS
9993       && GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
9994       && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
9995       && GET_CODE (XEXP (x, 1)) == CONST_INT)
9996     {
9997       if (TARGET_DEBUG_ADDR)
9998 	{
9999 	  fprintf (stderr, "\nlegitimize_reload_address push_reload #1:\n");
10000 	  debug_rtx (x);
10001 	}
10002       push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
10003 		   BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
10004 		   opnum, (enum reload_type) type);
10005       *win = 1;
10006       return x;
10007     }
10008 
10009   /* Likewise for (lo_sum (high ...) ...) output we have generated.  */
10010   if (GET_CODE (x) == LO_SUM
10011       && GET_CODE (XEXP (x, 0)) == HIGH)
10012     {
10013       if (TARGET_DEBUG_ADDR)
10014 	{
10015 	  fprintf (stderr, "\nlegitimize_reload_address push_reload #2:\n");
10016 	  debug_rtx (x);
10017 	}
10018       push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
10019 		   BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
10020 		   opnum, (enum reload_type) type);
10021       *win = 1;
10022       return x;
10023     }
10024 
10025 #if TARGET_MACHO
10026   if (DEFAULT_ABI == ABI_DARWIN && flag_pic
10027       && GET_CODE (x) == LO_SUM
10028       && GET_CODE (XEXP (x, 0)) == PLUS
10029       && XEXP (XEXP (x, 0), 0) == pic_offset_table_rtx
10030       && GET_CODE (XEXP (XEXP (x, 0), 1)) == HIGH
10031       && XEXP (XEXP (XEXP (x, 0), 1), 0) == XEXP (x, 1)
10032       && machopic_operand_p (XEXP (x, 1)))
10033     {
10034       /* Result of previous invocation of this function on Darwin
10035 	 floating point constant.  */
10036       push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
10037 		   BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
10038 		   opnum, (enum reload_type) type);
10039       *win = 1;
10040       return x;
10041     }
10042 #endif
10043 
10044   if (TARGET_CMODEL != CMODEL_SMALL
10045       && reg_offset_p
10046       && !quad_offset_p
10047       && small_toc_ref (x, VOIDmode))
10048     {
10049       rtx hi = gen_rtx_HIGH (Pmode, copy_rtx (x));
10050       x = gen_rtx_LO_SUM (Pmode, hi, x);
10051       if (TARGET_DEBUG_ADDR)
10052 	{
10053 	  fprintf (stderr, "\nlegitimize_reload_address push_reload #3:\n");
10054 	  debug_rtx (x);
10055 	}
10056       push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
10057 		   BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
10058 		   opnum, (enum reload_type) type);
10059       *win = 1;
10060       return x;
10061     }
10062 
10063   if (GET_CODE (x) == PLUS
10064       && REG_P (XEXP (x, 0))
10065       && REGNO (XEXP (x, 0)) < FIRST_PSEUDO_REGISTER
10066       && INT_REG_OK_FOR_BASE_P (XEXP (x, 0), 1)
10067       && CONST_INT_P (XEXP (x, 1))
10068       && reg_offset_p
10069       && !SPE_VECTOR_MODE (mode)
10070       && !(TARGET_E500_DOUBLE && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
10071       && (quad_offset_p || !VECTOR_MODE_P (mode) || VECTOR_MEM_NONE_P (mode)))
10072     {
10073       HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
10074       HOST_WIDE_INT low = ((val & 0xffff) ^ 0x8000) - 0x8000;
10075       HOST_WIDE_INT high
10076 	= (((val - low) & 0xffffffff) ^ 0x80000000) - 0x80000000;
10077 
10078       /* Check for 32-bit overflow or quad addresses with one of the
10079 	 four least significant bits set.  */
10080       if (high + low != val
10081 	  || (quad_offset_p && (low & 0xf)))
10082 	{
10083 	  *win = 0;
10084 	  return x;
10085 	}
10086 
10087       /* Reload the high part into a base reg; leave the low part
10088 	 in the mem directly.  */
10089 
10090       x = gen_rtx_PLUS (GET_MODE (x),
10091 			gen_rtx_PLUS (GET_MODE (x), XEXP (x, 0),
10092 				      GEN_INT (high)),
10093 			GEN_INT (low));
10094 
10095       if (TARGET_DEBUG_ADDR)
10096 	{
10097 	  fprintf (stderr, "\nlegitimize_reload_address push_reload #4:\n");
10098 	  debug_rtx (x);
10099 	}
10100       push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
10101 		   BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
10102 		   opnum, (enum reload_type) type);
10103       *win = 1;
10104       return x;
10105     }
10106 
10107   if (GET_CODE (x) == SYMBOL_REF
10108       && reg_offset_p
10109       && !quad_offset_p
10110       && (!VECTOR_MODE_P (mode) || VECTOR_MEM_NONE_P (mode))
10111       && !SPE_VECTOR_MODE (mode)
10112 #if TARGET_MACHO
10113       && DEFAULT_ABI == ABI_DARWIN
10114       && (flag_pic || MACHO_DYNAMIC_NO_PIC_P)
10115       && machopic_symbol_defined_p (x)
10116 #else
10117       && DEFAULT_ABI == ABI_V4
10118       && !flag_pic
10119 #endif
10120       /* Don't do this for TFmode or TDmode, since the result isn't offsettable.
10121 	 The same goes for DImode without 64-bit gprs and DFmode and DDmode
10122 	 without fprs.
10123 	 ??? Assume floating point reg based on mode?  This assumption is
10124 	 violated by eg. powerpc-linux -m32 compile of gcc.dg/pr28796-2.c
10125 	 where reload ends up doing a DFmode load of a constant from
10126 	 mem using two gprs.  Unfortunately, at this point reload
10127 	 hasn't yet selected regs so poking around in reload data
10128 	 won't help and even if we could figure out the regs reliably,
10129 	 we'd still want to allow this transformation when the mem is
10130 	 naturally aligned.  Since we say the address is good here, we
10131 	 can't disable offsets from LO_SUMs in mem_operand_gpr.
10132 	 FIXME: Allow offset from lo_sum for other modes too, when
10133 	 mem is sufficiently aligned.
10134 
10135 	 Also disallow this if the type can go in VMX/Altivec registers, since
10136 	 those registers do not have d-form (reg+offset) address modes.  */
10137       && !reg_addr[mode].scalar_in_vmx_p
10138       && mode != TFmode
10139       && mode != TDmode
10140       && mode != IFmode
10141       && mode != KFmode
10142       && (mode != TImode || !TARGET_VSX_TIMODE)
10143       && mode != PTImode
10144       && (mode != DImode || TARGET_POWERPC64)
10145       && ((mode != DFmode && mode != DDmode) || TARGET_POWERPC64
10146 	  || (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)))
10147     {
10148 #if TARGET_MACHO
10149       if (flag_pic)
10150 	{
10151 	  rtx offset = machopic_gen_offset (x);
10152 	  x = gen_rtx_LO_SUM (GET_MODE (x),
10153 		gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
10154 		  gen_rtx_HIGH (Pmode, offset)), offset);
10155 	}
10156       else
10157 #endif
10158 	x = gen_rtx_LO_SUM (GET_MODE (x),
10159 	      gen_rtx_HIGH (Pmode, x), x);
10160 
10161       if (TARGET_DEBUG_ADDR)
10162 	{
10163 	  fprintf (stderr, "\nlegitimize_reload_address push_reload #5:\n");
10164 	  debug_rtx (x);
10165 	}
10166       push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
10167 		   BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
10168 		   opnum, (enum reload_type) type);
10169       *win = 1;
10170       return x;
10171     }
10172 
10173   /* Reload an offset address wrapped by an AND that represents the
10174      masking of the lower bits.  Strip the outer AND and let reload
10175      convert the offset address into an indirect address.  For VSX,
10176      force reload to create the address with an AND in a separate
10177      register, because we can't guarantee an altivec register will
10178      be used.  */
10179   if (VECTOR_MEM_ALTIVEC_P (mode)
10180       && GET_CODE (x) == AND
10181       && GET_CODE (XEXP (x, 0)) == PLUS
10182       && GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
10183       && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
10184       && GET_CODE (XEXP (x, 1)) == CONST_INT
10185       && INTVAL (XEXP (x, 1)) == -16)
10186     {
10187       x = XEXP (x, 0);
10188       *win = 1;
10189       return x;
10190     }
10191 
10192   if (TARGET_TOC
10193       && reg_offset_p
10194       && !quad_offset_p
10195       && GET_CODE (x) == SYMBOL_REF
10196       && use_toc_relative_ref (x, mode))
10197     {
10198       x = create_TOC_reference (x, NULL_RTX);
10199       if (TARGET_CMODEL != CMODEL_SMALL)
10200 	{
10201 	  if (TARGET_DEBUG_ADDR)
10202 	    {
10203 	      fprintf (stderr, "\nlegitimize_reload_address push_reload #6:\n");
10204 	      debug_rtx (x);
10205 	    }
10206 	  push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
10207 		       BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
10208 		       opnum, (enum reload_type) type);
10209 	}
10210       *win = 1;
10211       return x;
10212     }
10213   *win = 0;
10214   return x;
10215 }
10216 
10217 /* Debug version of rs6000_legitimize_reload_address.  */
10218 static rtx
rs6000_debug_legitimize_reload_address(rtx x,machine_mode mode,int opnum,int type,int ind_levels,int * win)10219 rs6000_debug_legitimize_reload_address (rtx x, machine_mode mode,
10220 					int opnum, int type,
10221 					int ind_levels, int *win)
10222 {
10223   rtx ret = rs6000_legitimize_reload_address (x, mode, opnum, type,
10224 					      ind_levels, win);
10225   fprintf (stderr,
10226 	   "\nrs6000_legitimize_reload_address: mode = %s, opnum = %d, "
10227 	   "type = %d, ind_levels = %d, win = %d, original addr:\n",
10228 	   GET_MODE_NAME (mode), opnum, type, ind_levels, *win);
10229   debug_rtx (x);
10230 
10231   if (x == ret)
10232     fprintf (stderr, "Same address returned\n");
10233   else if (!ret)
10234     fprintf (stderr, "NULL returned\n");
10235   else
10236     {
10237       fprintf (stderr, "New address:\n");
10238       debug_rtx (ret);
10239     }
10240 
10241   return ret;
10242 }
10243 
10244 /* TARGET_LEGITIMATE_ADDRESS_P recognizes an RTL expression
10245    that is a valid memory address for an instruction.
10246    The MODE argument is the machine mode for the MEM expression
10247    that wants to use this address.
10248 
10249    On the RS/6000, there are four valid address: a SYMBOL_REF that
10250    refers to a constant pool entry of an address (or the sum of it
10251    plus a constant), a short (16-bit signed) constant plus a register,
10252    the sum of two registers, or a register indirect, possibly with an
10253    auto-increment.  For DFmode, DDmode and DImode with a constant plus
10254    register, we must ensure that both words are addressable or PowerPC64
10255    with offset word aligned.
10256 
10257    For modes spanning multiple registers (DFmode and DDmode in 32-bit GPRs,
10258    32-bit DImode, TImode, TFmode, TDmode), indexed addressing cannot be used
10259    because adjacent memory cells are accessed by adding word-sized offsets
10260    during assembly output.  */
10261 static bool
rs6000_legitimate_address_p(machine_mode mode,rtx x,bool reg_ok_strict)10262 rs6000_legitimate_address_p (machine_mode mode, rtx x, bool reg_ok_strict)
10263 {
10264   bool reg_offset_p = reg_offset_addressing_ok_p (mode);
10265   bool quad_offset_p = mode_supports_vsx_dform_quad (mode);
10266 
10267   /* If this is an unaligned stvx/ldvx type address, discard the outer AND.  */
10268   if (VECTOR_MEM_ALTIVEC_P (mode)
10269       && GET_CODE (x) == AND
10270       && GET_CODE (XEXP (x, 1)) == CONST_INT
10271       && INTVAL (XEXP (x, 1)) == -16)
10272     x = XEXP (x, 0);
10273 
10274   if (TARGET_ELF && RS6000_SYMBOL_REF_TLS_P (x))
10275     return 0;
10276   if (legitimate_indirect_address_p (x, reg_ok_strict))
10277     return 1;
10278   if (TARGET_UPDATE
10279       && (GET_CODE (x) == PRE_INC || GET_CODE (x) == PRE_DEC)
10280       && mode_supports_pre_incdec_p (mode)
10281       && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict))
10282     return 1;
10283   /* Handle restricted vector d-form offsets in ISA 3.0.  */
10284   if (quad_offset_p)
10285     {
10286       if (quad_address_p (x, mode, reg_ok_strict))
10287 	return 1;
10288     }
10289   else if (virtual_stack_registers_memory_p (x))
10290     return 1;
10291 
10292   else if (reg_offset_p)
10293     {
10294       if (legitimate_small_data_p (mode, x))
10295 	return 1;
10296       if (legitimate_constant_pool_address_p (x, mode,
10297 					     reg_ok_strict || lra_in_progress))
10298 	return 1;
10299       if (reg_addr[mode].fused_toc && GET_CODE (x) == UNSPEC
10300 	  && XINT (x, 1) == UNSPEC_FUSION_ADDIS)
10301 	return 1;
10302     }
10303 
10304   /* For TImode, if we have TImode in VSX registers, only allow register
10305      indirect addresses.  This will allow the values to go in either GPRs
10306      or VSX registers without reloading.  The vector types would tend to
10307      go into VSX registers, so we allow REG+REG, while TImode seems
10308      somewhat split, in that some uses are GPR based, and some VSX based.  */
10309   /* FIXME: We could loosen this by changing the following to
10310        if (mode == TImode && TARGET_QUAD_MEMORY && TARGET_VSX_TIMODE)
10311      but currently we cannot allow REG+REG addressing for TImode.  See
10312      PR72827 for complete details on how this ends up hoodwinking DSE.  */
10313   if (mode == TImode && TARGET_VSX_TIMODE)
10314     return 0;
10315   /* If not REG_OK_STRICT (before reload) let pass any stack offset.  */
10316   if (! reg_ok_strict
10317       && reg_offset_p
10318       && GET_CODE (x) == PLUS
10319       && GET_CODE (XEXP (x, 0)) == REG
10320       && (XEXP (x, 0) == virtual_stack_vars_rtx
10321 	  || XEXP (x, 0) == arg_pointer_rtx)
10322       && GET_CODE (XEXP (x, 1)) == CONST_INT)
10323     return 1;
10324   if (rs6000_legitimate_offset_address_p (mode, x, reg_ok_strict, false))
10325     return 1;
10326   if (!FLOAT128_2REG_P (mode)
10327       && ((TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)
10328 	  || TARGET_POWERPC64
10329 	  || (mode != DFmode && mode != DDmode)
10330 	  || (TARGET_E500_DOUBLE && mode != DDmode))
10331       && (TARGET_POWERPC64 || mode != DImode)
10332       && (mode != TImode || VECTOR_MEM_VSX_P (TImode))
10333       && mode != PTImode
10334       && !avoiding_indexed_address_p (mode)
10335       && legitimate_indexed_address_p (x, reg_ok_strict))
10336     return 1;
10337   if (TARGET_UPDATE && GET_CODE (x) == PRE_MODIFY
10338       && mode_supports_pre_modify_p (mode)
10339       && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict)
10340       && (rs6000_legitimate_offset_address_p (mode, XEXP (x, 1),
10341 					      reg_ok_strict, false)
10342 	  || (!avoiding_indexed_address_p (mode)
10343 	      && legitimate_indexed_address_p (XEXP (x, 1), reg_ok_strict)))
10344       && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
10345     return 1;
10346   if (reg_offset_p && !quad_offset_p
10347       && legitimate_lo_sum_address_p (mode, x, reg_ok_strict))
10348     return 1;
10349   return 0;
10350 }
10351 
10352 /* Debug version of rs6000_legitimate_address_p.  */
10353 static bool
rs6000_debug_legitimate_address_p(machine_mode mode,rtx x,bool reg_ok_strict)10354 rs6000_debug_legitimate_address_p (machine_mode mode, rtx x,
10355 				   bool reg_ok_strict)
10356 {
10357   bool ret = rs6000_legitimate_address_p (mode, x, reg_ok_strict);
10358   fprintf (stderr,
10359 	   "\nrs6000_legitimate_address_p: return = %s, mode = %s, "
10360 	   "strict = %d, reload = %s, code = %s\n",
10361 	   ret ? "true" : "false",
10362 	   GET_MODE_NAME (mode),
10363 	   reg_ok_strict,
10364 	   (reload_completed
10365 	    ? "after"
10366 	    : (reload_in_progress ? "progress" : "before")),
10367 	   GET_RTX_NAME (GET_CODE (x)));
10368   debug_rtx (x);
10369 
10370   return ret;
10371 }
10372 
10373 /* Implement TARGET_MODE_DEPENDENT_ADDRESS_P.  */
10374 
10375 static bool
rs6000_mode_dependent_address_p(const_rtx addr,addr_space_t as ATTRIBUTE_UNUSED)10376 rs6000_mode_dependent_address_p (const_rtx addr,
10377 				 addr_space_t as ATTRIBUTE_UNUSED)
10378 {
10379   return rs6000_mode_dependent_address_ptr (addr);
10380 }
10381 
10382 /* Go to LABEL if ADDR (a legitimate address expression)
10383    has an effect that depends on the machine mode it is used for.
10384 
10385    On the RS/6000 this is true of all integral offsets (since AltiVec
10386    and VSX modes don't allow them) or is a pre-increment or decrement.
10387 
10388    ??? Except that due to conceptual problems in offsettable_address_p
10389    we can't really report the problems of integral offsets.  So leave
10390    this assuming that the adjustable offset must be valid for the
10391    sub-words of a TFmode operand, which is what we had before.  */
10392 
10393 static bool
rs6000_mode_dependent_address(const_rtx addr)10394 rs6000_mode_dependent_address (const_rtx addr)
10395 {
10396   switch (GET_CODE (addr))
10397     {
10398     case PLUS:
10399       /* Any offset from virtual_stack_vars_rtx and arg_pointer_rtx
10400 	 is considered a legitimate address before reload, so there
10401 	 are no offset restrictions in that case.  Note that this
10402 	 condition is safe in strict mode because any address involving
10403 	 virtual_stack_vars_rtx or arg_pointer_rtx would already have
10404 	 been rejected as illegitimate.  */
10405       if (XEXP (addr, 0) != virtual_stack_vars_rtx
10406 	  && XEXP (addr, 0) != arg_pointer_rtx
10407 	  && GET_CODE (XEXP (addr, 1)) == CONST_INT)
10408 	{
10409 	  unsigned HOST_WIDE_INT val = INTVAL (XEXP (addr, 1));
10410 	  return val + 0x8000 >= 0x10000 - (TARGET_POWERPC64 ? 8 : 12);
10411 	}
10412       break;
10413 
10414     case LO_SUM:
10415       /* Anything in the constant pool is sufficiently aligned that
10416 	 all bytes have the same high part address.  */
10417       return !legitimate_constant_pool_address_p (addr, QImode, false);
10418 
10419     /* Auto-increment cases are now treated generically in recog.c.  */
10420     case PRE_MODIFY:
10421       return TARGET_UPDATE;
10422 
10423     /* AND is only allowed in Altivec loads.  */
10424     case AND:
10425       return true;
10426 
10427     default:
10428       break;
10429     }
10430 
10431   return false;
10432 }
10433 
10434 /* Debug version of rs6000_mode_dependent_address.  */
10435 static bool
rs6000_debug_mode_dependent_address(const_rtx addr)10436 rs6000_debug_mode_dependent_address (const_rtx addr)
10437 {
10438   bool ret = rs6000_mode_dependent_address (addr);
10439 
10440   fprintf (stderr, "\nrs6000_mode_dependent_address: ret = %s\n",
10441 	   ret ? "true" : "false");
10442   debug_rtx (addr);
10443 
10444   return ret;
10445 }
10446 
10447 /* Implement FIND_BASE_TERM.  */
10448 
10449 rtx
rs6000_find_base_term(rtx op)10450 rs6000_find_base_term (rtx op)
10451 {
10452   rtx base;
10453 
10454   base = op;
10455   if (GET_CODE (base) == CONST)
10456     base = XEXP (base, 0);
10457   if (GET_CODE (base) == PLUS)
10458     base = XEXP (base, 0);
10459   if (GET_CODE (base) == UNSPEC)
10460     switch (XINT (base, 1))
10461       {
10462       case UNSPEC_TOCREL:
10463       case UNSPEC_MACHOPIC_OFFSET:
10464 	/* OP represents SYM [+ OFFSET] - ANCHOR.  SYM is the base term
10465 	   for aliasing purposes.  */
10466 	return XVECEXP (base, 0, 0);
10467       }
10468 
10469   return op;
10470 }
10471 
10472 /* More elaborate version of recog's offsettable_memref_p predicate
10473    that works around the ??? note of rs6000_mode_dependent_address.
10474    In particular it accepts
10475 
10476      (mem:DI (plus:SI (reg/f:SI 31 31) (const_int 32760 [0x7ff8])))
10477 
10478    in 32-bit mode, that the recog predicate rejects.  */
10479 
10480 static bool
rs6000_offsettable_memref_p(rtx op,machine_mode reg_mode)10481 rs6000_offsettable_memref_p (rtx op, machine_mode reg_mode)
10482 {
10483   bool worst_case;
10484 
10485   if (!MEM_P (op))
10486     return false;
10487 
10488   /* First mimic offsettable_memref_p.  */
10489   if (offsettable_address_p (true, GET_MODE (op), XEXP (op, 0)))
10490     return true;
10491 
10492   /* offsettable_address_p invokes rs6000_mode_dependent_address, but
10493      the latter predicate knows nothing about the mode of the memory
10494      reference and, therefore, assumes that it is the largest supported
10495      mode (TFmode).  As a consequence, legitimate offsettable memory
10496      references are rejected.  rs6000_legitimate_offset_address_p contains
10497      the correct logic for the PLUS case of rs6000_mode_dependent_address,
10498      at least with a little bit of help here given that we know the
10499      actual registers used.  */
10500   worst_case = ((TARGET_POWERPC64 && GET_MODE_CLASS (reg_mode) == MODE_INT)
10501 		|| GET_MODE_SIZE (reg_mode) == 4);
10502   return rs6000_legitimate_offset_address_p (GET_MODE (op), XEXP (op, 0),
10503 					     true, worst_case);
10504 }
10505 
10506 /* Determine the reassociation width to be used in reassociate_bb.
10507    This takes into account how many parallel operations we
10508    can actually do of a given type, and also the latency.
10509    P8:
10510      int add/sub 6/cycle
10511          mul 2/cycle
10512      vect add/sub/mul 2/cycle
10513      fp   add/sub/mul 2/cycle
10514      dfp  1/cycle
10515 */
10516 
10517 static int
rs6000_reassociation_width(unsigned int opc ATTRIBUTE_UNUSED,machine_mode mode)10518 rs6000_reassociation_width (unsigned int opc ATTRIBUTE_UNUSED,
10519                             machine_mode mode)
10520 {
10521   switch (rs6000_cpu)
10522     {
10523     case PROCESSOR_POWER8:
10524     case PROCESSOR_POWER9:
10525       if (DECIMAL_FLOAT_MODE_P (mode))
10526 	return 1;
10527       if (VECTOR_MODE_P (mode))
10528 	return 4;
10529       if (INTEGRAL_MODE_P (mode))
10530 	return opc == MULT_EXPR ? 4 : 6;
10531       if (FLOAT_MODE_P (mode))
10532 	return 4;
10533       break;
10534     default:
10535       break;
10536     }
10537   return 1;
10538 }
10539 
10540 /* Change register usage conditional on target flags.  */
10541 static void
rs6000_conditional_register_usage(void)10542 rs6000_conditional_register_usage (void)
10543 {
10544   int i;
10545 
10546   if (TARGET_DEBUG_TARGET)
10547     fprintf (stderr, "rs6000_conditional_register_usage called\n");
10548 
10549   /* Set MQ register fixed (already call_used) so that it will not be
10550      allocated.  */
10551   fixed_regs[64] = 1;
10552 
10553   /* 64-bit AIX and Linux reserve GPR13 for thread-private data.  */
10554   if (TARGET_64BIT)
10555     fixed_regs[13] = call_used_regs[13]
10556       = call_really_used_regs[13] = 1;
10557 
10558   /* Conditionally disable FPRs.  */
10559   if (TARGET_SOFT_FLOAT || !TARGET_FPRS)
10560     for (i = 32; i < 64; i++)
10561       fixed_regs[i] = call_used_regs[i]
10562 	= call_really_used_regs[i] = 1;
10563 
10564   /* The TOC register is not killed across calls in a way that is
10565      visible to the compiler.  */
10566   if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
10567     call_really_used_regs[2] = 0;
10568 
10569   if (DEFAULT_ABI == ABI_V4 && flag_pic == 2)
10570     fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
10571 
10572   if (DEFAULT_ABI == ABI_V4 && flag_pic == 1)
10573     fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
10574       = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
10575       = call_really_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
10576 
10577   if (DEFAULT_ABI == ABI_DARWIN && flag_pic)
10578     fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
10579       = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
10580       = call_really_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
10581 
10582   if (TARGET_TOC && TARGET_MINIMAL_TOC)
10583     fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
10584       = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
10585 
10586   if (TARGET_SPE)
10587     {
10588       global_regs[SPEFSCR_REGNO] = 1;
10589       /* We used to use r14 as FIXED_SCRATCH to address SPE 64-bit
10590          registers in prologues and epilogues.  We no longer use r14
10591          for FIXED_SCRATCH, but we're keeping r14 out of the allocation
10592          pool for link-compatibility with older versions of GCC.  Once
10593          "old" code has died out, we can return r14 to the allocation
10594          pool.  */
10595       fixed_regs[14]
10596 	= call_used_regs[14]
10597 	= call_really_used_regs[14] = 1;
10598     }
10599 
10600   if (!TARGET_ALTIVEC && !TARGET_VSX)
10601     {
10602       for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
10603 	fixed_regs[i] = call_used_regs[i] = call_really_used_regs[i] = 1;
10604       call_really_used_regs[VRSAVE_REGNO] = 1;
10605     }
10606 
10607   if (TARGET_ALTIVEC || TARGET_VSX)
10608     global_regs[VSCR_REGNO] = 1;
10609 
10610   if (TARGET_ALTIVEC_ABI)
10611     {
10612       for (i = FIRST_ALTIVEC_REGNO; i < FIRST_ALTIVEC_REGNO + 20; ++i)
10613 	call_used_regs[i] = call_really_used_regs[i] = 1;
10614 
10615       /* AIX reserves VR20:31 in non-extended ABI mode.  */
10616       if (TARGET_XCOFF)
10617 	for (i = FIRST_ALTIVEC_REGNO + 20; i < FIRST_ALTIVEC_REGNO + 32; ++i)
10618 	  fixed_regs[i] = call_used_regs[i] = call_really_used_regs[i] = 1;
10619     }
10620 }
10621 
10622 
10623 /* Output insns to set DEST equal to the constant SOURCE as a series of
10624    lis, ori and shl instructions and return TRUE.  */
10625 
10626 bool
rs6000_emit_set_const(rtx dest,rtx source)10627 rs6000_emit_set_const (rtx dest, rtx source)
10628 {
10629   machine_mode mode = GET_MODE (dest);
10630   rtx temp, set;
10631   rtx_insn *insn;
10632   HOST_WIDE_INT c;
10633 
10634   gcc_checking_assert (CONST_INT_P (source));
10635   c = INTVAL (source);
10636   switch (mode)
10637     {
10638     case E_QImode:
10639     case E_HImode:
10640       emit_insn (gen_rtx_SET (dest, source));
10641       return true;
10642 
10643     case E_SImode:
10644       temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (SImode);
10645 
10646       emit_insn (gen_rtx_SET (copy_rtx (temp),
10647 			      GEN_INT (c & ~(HOST_WIDE_INT) 0xffff)));
10648       emit_insn (gen_rtx_SET (dest,
10649 			      gen_rtx_IOR (SImode, copy_rtx (temp),
10650 					   GEN_INT (c & 0xffff))));
10651       break;
10652 
10653     case E_DImode:
10654       if (!TARGET_POWERPC64)
10655 	{
10656 	  rtx hi, lo;
10657 
10658 	  hi = operand_subword_force (copy_rtx (dest), WORDS_BIG_ENDIAN == 0,
10659 				      DImode);
10660 	  lo = operand_subword_force (dest, WORDS_BIG_ENDIAN != 0,
10661 				      DImode);
10662 	  emit_move_insn (hi, GEN_INT (c >> 32));
10663 	  c = ((c & 0xffffffff) ^ 0x80000000) - 0x80000000;
10664 	  emit_move_insn (lo, GEN_INT (c));
10665 	}
10666       else
10667 	rs6000_emit_set_long_const (dest, c);
10668       break;
10669 
10670     default:
10671       gcc_unreachable ();
10672     }
10673 
10674   insn = get_last_insn ();
10675   set = single_set (insn);
10676   if (! CONSTANT_P (SET_SRC (set)))
10677     set_unique_reg_note (insn, REG_EQUAL, GEN_INT (c));
10678 
10679   return true;
10680 }
10681 
10682 /* Subroutine of rs6000_emit_set_const, handling PowerPC64 DImode.
10683    Output insns to set DEST equal to the constant C as a series of
10684    lis, ori and shl instructions.  */
10685 
10686 static void
rs6000_emit_set_long_const(rtx dest,HOST_WIDE_INT c)10687 rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c)
10688 {
10689   rtx temp;
10690   HOST_WIDE_INT ud1, ud2, ud3, ud4;
10691 
10692   ud1 = c & 0xffff;
10693   c = c >> 16;
10694   ud2 = c & 0xffff;
10695   c = c >> 16;
10696   ud3 = c & 0xffff;
10697   c = c >> 16;
10698   ud4 = c & 0xffff;
10699 
10700   if ((ud4 == 0xffff && ud3 == 0xffff && ud2 == 0xffff && (ud1 & 0x8000))
10701       || (ud4 == 0 && ud3 == 0 && ud2 == 0 && ! (ud1 & 0x8000)))
10702     emit_move_insn (dest, GEN_INT ((ud1 ^ 0x8000) - 0x8000));
10703 
10704   else if ((ud4 == 0xffff && ud3 == 0xffff && (ud2 & 0x8000))
10705 	   || (ud4 == 0 && ud3 == 0 && ! (ud2 & 0x8000)))
10706     {
10707       temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
10708 
10709       emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
10710 		      GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000));
10711       if (ud1 != 0)
10712 	emit_move_insn (dest,
10713 			gen_rtx_IOR (DImode, copy_rtx (temp),
10714 				     GEN_INT (ud1)));
10715     }
10716   else if (ud3 == 0 && ud4 == 0)
10717     {
10718       temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
10719 
10720       gcc_assert (ud2 & 0x8000);
10721       emit_move_insn (copy_rtx (temp),
10722 		      GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000));
10723       if (ud1 != 0)
10724 	emit_move_insn (copy_rtx (temp),
10725 			gen_rtx_IOR (DImode, copy_rtx (temp),
10726 				     GEN_INT (ud1)));
10727       emit_move_insn (dest,
10728 		      gen_rtx_ZERO_EXTEND (DImode,
10729 					   gen_lowpart (SImode,
10730 							copy_rtx (temp))));
10731     }
10732   else if ((ud4 == 0xffff && (ud3 & 0x8000))
10733 	   || (ud4 == 0 && ! (ud3 & 0x8000)))
10734     {
10735       temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
10736 
10737       emit_move_insn (copy_rtx (temp),
10738 		      GEN_INT (((ud3 << 16) ^ 0x80000000) - 0x80000000));
10739       if (ud2 != 0)
10740 	emit_move_insn (copy_rtx (temp),
10741 			gen_rtx_IOR (DImode, copy_rtx (temp),
10742 				     GEN_INT (ud2)));
10743       emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
10744 		      gen_rtx_ASHIFT (DImode, copy_rtx (temp),
10745 				      GEN_INT (16)));
10746       if (ud1 != 0)
10747 	emit_move_insn (dest,
10748 			gen_rtx_IOR (DImode, copy_rtx (temp),
10749 				     GEN_INT (ud1)));
10750     }
10751   else
10752     {
10753       temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
10754 
10755       emit_move_insn (copy_rtx (temp),
10756 		      GEN_INT (((ud4 << 16) ^ 0x80000000) - 0x80000000));
10757       if (ud3 != 0)
10758 	emit_move_insn (copy_rtx (temp),
10759 			gen_rtx_IOR (DImode, copy_rtx (temp),
10760 				     GEN_INT (ud3)));
10761 
10762       emit_move_insn (ud2 != 0 || ud1 != 0 ? copy_rtx (temp) : dest,
10763 		      gen_rtx_ASHIFT (DImode, copy_rtx (temp),
10764 				      GEN_INT (32)));
10765       if (ud2 != 0)
10766 	emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
10767 			gen_rtx_IOR (DImode, copy_rtx (temp),
10768 				     GEN_INT (ud2 << 16)));
10769       if (ud1 != 0)
10770 	emit_move_insn (dest,
10771 			gen_rtx_IOR (DImode, copy_rtx (temp),
10772 				     GEN_INT (ud1)));
10773     }
10774 }
10775 
10776 /* Helper for the following.  Get rid of [r+r] memory refs
10777    in cases where it won't work (TImode, TFmode, TDmode, PTImode).  */
10778 
10779 static void
rs6000_eliminate_indexed_memrefs(rtx operands[2])10780 rs6000_eliminate_indexed_memrefs (rtx operands[2])
10781 {
10782   if (reload_in_progress)
10783     return;
10784 
10785   if (GET_CODE (operands[0]) == MEM
10786       && GET_CODE (XEXP (operands[0], 0)) != REG
10787       && ! legitimate_constant_pool_address_p (XEXP (operands[0], 0),
10788 					       GET_MODE (operands[0]), false))
10789     operands[0]
10790       = replace_equiv_address (operands[0],
10791 			       copy_addr_to_reg (XEXP (operands[0], 0)));
10792 
10793   if (GET_CODE (operands[1]) == MEM
10794       && GET_CODE (XEXP (operands[1], 0)) != REG
10795       && ! legitimate_constant_pool_address_p (XEXP (operands[1], 0),
10796 					       GET_MODE (operands[1]), false))
10797     operands[1]
10798       = replace_equiv_address (operands[1],
10799 			       copy_addr_to_reg (XEXP (operands[1], 0)));
10800 }
10801 
10802 /* Generate a vector of constants to permute MODE for a little-endian
10803    storage operation by swapping the two halves of a vector.  */
10804 static rtvec
rs6000_const_vec(machine_mode mode)10805 rs6000_const_vec (machine_mode mode)
10806 {
10807   int i, subparts;
10808   rtvec v;
10809 
10810   switch (mode)
10811     {
10812     case E_V1TImode:
10813       subparts = 1;
10814       break;
10815     case E_V2DFmode:
10816     case E_V2DImode:
10817       subparts = 2;
10818       break;
10819     case E_V4SFmode:
10820     case E_V4SImode:
10821       subparts = 4;
10822       break;
10823     case E_V8HImode:
10824       subparts = 8;
10825       break;
10826     case E_V16QImode:
10827       subparts = 16;
10828       break;
10829     default:
10830       gcc_unreachable();
10831     }
10832 
10833   v = rtvec_alloc (subparts);
10834 
10835   for (i = 0; i < subparts / 2; ++i)
10836     RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i + subparts / 2);
10837   for (i = subparts / 2; i < subparts; ++i)
10838     RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i - subparts / 2);
10839 
10840   return v;
10841 }
10842 
10843 /* Generate a permute rtx that represents an lxvd2x, stxvd2x, or xxpermdi
10844    for a VSX load or store operation.  */
10845 rtx
rs6000_gen_le_vsx_permute(rtx source,machine_mode mode)10846 rs6000_gen_le_vsx_permute (rtx source, machine_mode mode)
10847 {
10848   /* Use ROTATE instead of VEC_SELECT on IEEE 128-bit floating point, and
10849      128-bit integers if they are allowed in VSX registers.  */
10850   if (FLOAT128_VECTOR_P (mode) || mode == TImode || mode == V1TImode)
10851     return gen_rtx_ROTATE (mode, source, GEN_INT (64));
10852   else
10853     {
10854       rtx par = gen_rtx_PARALLEL (VOIDmode, rs6000_const_vec (mode));
10855       return gen_rtx_VEC_SELECT (mode, source, par);
10856     }
10857 }
10858 
10859 /* Emit a little-endian load from vector memory location SOURCE to VSX
10860    register DEST in mode MODE.  The load is done with two permuting
10861    insn's that represent an lxvd2x and xxpermdi.  */
10862 void
rs6000_emit_le_vsx_load(rtx dest,rtx source,machine_mode mode)10863 rs6000_emit_le_vsx_load (rtx dest, rtx source, machine_mode mode)
10864 {
10865   rtx tmp, permute_mem, permute_reg;
10866 
10867   /* Use V2DImode to do swaps of types with 128-bit scalare parts (TImode,
10868      V1TImode).  */
10869   if (mode == TImode || mode == V1TImode)
10870     {
10871       mode = V2DImode;
10872       dest = gen_lowpart (V2DImode, dest);
10873       source = adjust_address (source, V2DImode, 0);
10874     }
10875 
10876   tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (dest) : dest;
10877   permute_mem = rs6000_gen_le_vsx_permute (source, mode);
10878   permute_reg = rs6000_gen_le_vsx_permute (tmp, mode);
10879   emit_insn (gen_rtx_SET (tmp, permute_mem));
10880   emit_insn (gen_rtx_SET (dest, permute_reg));
10881 }
10882 
10883 /* Emit a little-endian store to vector memory location DEST from VSX
10884    register SOURCE in mode MODE.  The store is done with two permuting
10885    insn's that represent an xxpermdi and an stxvd2x.  */
10886 void
rs6000_emit_le_vsx_store(rtx dest,rtx source,machine_mode mode)10887 rs6000_emit_le_vsx_store (rtx dest, rtx source, machine_mode mode)
10888 {
10889   rtx tmp, permute_src, permute_tmp;
10890 
10891   /* This should never be called during or after reload, because it does
10892      not re-permute the source register.  It is intended only for use
10893      during expand.  */
10894   gcc_assert (!reload_in_progress && !lra_in_progress && !reload_completed);
10895 
10896   /* Use V2DImode to do swaps of types with 128-bit scalar parts (TImode,
10897      V1TImode).  */
10898   if (mode == TImode || mode == V1TImode)
10899     {
10900       mode = V2DImode;
10901       dest = adjust_address (dest, V2DImode, 0);
10902       source = gen_lowpart (V2DImode, source);
10903     }
10904 
10905   tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (source) : source;
10906   permute_src = rs6000_gen_le_vsx_permute (source, mode);
10907   permute_tmp = rs6000_gen_le_vsx_permute (tmp, mode);
10908   emit_insn (gen_rtx_SET (tmp, permute_src));
10909   emit_insn (gen_rtx_SET (dest, permute_tmp));
10910 }
10911 
10912 /* Emit a sequence representing a little-endian VSX load or store,
10913    moving data from SOURCE to DEST in mode MODE.  This is done
10914    separately from rs6000_emit_move to ensure it is called only
10915    during expand.  LE VSX loads and stores introduced later are
10916    handled with a split.  The expand-time RTL generation allows
10917    us to optimize away redundant pairs of register-permutes.  */
10918 void
rs6000_emit_le_vsx_move(rtx dest,rtx source,machine_mode mode)10919 rs6000_emit_le_vsx_move (rtx dest, rtx source, machine_mode mode)
10920 {
10921   gcc_assert (!BYTES_BIG_ENDIAN
10922 	      && VECTOR_MEM_VSX_P (mode)
10923 	      && !TARGET_P9_VECTOR
10924 	      && !gpr_or_gpr_p (dest, source)
10925 	      && (MEM_P (source) ^ MEM_P (dest)));
10926 
10927   if (MEM_P (source))
10928     {
10929       gcc_assert (REG_P (dest) || GET_CODE (dest) == SUBREG);
10930       rs6000_emit_le_vsx_load (dest, source, mode);
10931     }
10932   else
10933     {
10934       if (!REG_P (source))
10935 	source = force_reg (mode, source);
10936       rs6000_emit_le_vsx_store (dest, source, mode);
10937     }
10938 }
10939 
10940 /* Return whether a SFmode or SImode move can be done without converting one
10941    mode to another.  This arrises when we have:
10942 
10943 	(SUBREG:SF (REG:SI ...))
10944 	(SUBREG:SI (REG:SF ...))
10945 
10946    and one of the values is in a floating point/vector register, where SFmode
10947    scalars are stored in DFmode format.  */
10948 
10949 bool
valid_sf_si_move(rtx dest,rtx src,machine_mode mode)10950 valid_sf_si_move (rtx dest, rtx src, machine_mode mode)
10951 {
10952   if (TARGET_ALLOW_SF_SUBREG)
10953     return true;
10954 
10955   if (mode != SFmode && GET_MODE_CLASS (mode) != MODE_INT)
10956     return true;
10957 
10958   if (!SUBREG_P (src) || !sf_subreg_operand (src, mode))
10959     return true;
10960 
10961   /*.  Allow (set (SUBREG:SI (REG:SF)) (SUBREG:SI (REG:SF))).  */
10962   if (SUBREG_P (dest))
10963     {
10964       rtx dest_subreg = SUBREG_REG (dest);
10965       rtx src_subreg = SUBREG_REG (src);
10966       return GET_MODE (dest_subreg) == GET_MODE (src_subreg);
10967     }
10968 
10969   return false;
10970 }
10971 
10972 
10973 /* Helper function to change moves with:
10974 
10975 	(SUBREG:SF (REG:SI)) and
10976 	(SUBREG:SI (REG:SF))
10977 
10978    into separate UNSPEC insns.  In the PowerPC architecture, scalar SFmode
10979    values are stored as DFmode values in the VSX registers.  We need to convert
10980    the bits before we can use a direct move or operate on the bits in the
10981    vector register as an integer type.
10982 
10983    Skip things like (set (SUBREG:SI (...) (SUBREG:SI (...)).  */
10984 
10985 static bool
rs6000_emit_move_si_sf_subreg(rtx dest,rtx source,machine_mode mode)10986 rs6000_emit_move_si_sf_subreg (rtx dest, rtx source, machine_mode mode)
10987 {
10988   if (TARGET_DIRECT_MOVE_64BIT && !reload_in_progress && !reload_completed
10989       && !lra_in_progress
10990       && (!SUBREG_P (dest) || !sf_subreg_operand (dest, mode))
10991       && SUBREG_P (source) && sf_subreg_operand (source, mode))
10992     {
10993       rtx inner_source = SUBREG_REG (source);
10994       machine_mode inner_mode = GET_MODE (inner_source);
10995 
10996       if (mode == SImode && inner_mode == SFmode)
10997 	{
10998 	  emit_insn (gen_movsi_from_sf (dest, inner_source));
10999 	  return true;
11000 	}
11001 
11002       if (mode == SFmode && inner_mode == SImode)
11003 	{
11004 	  emit_insn (gen_movsf_from_si (dest, inner_source));
11005 	  return true;
11006 	}
11007     }
11008 
11009   return false;
11010 }
11011 
11012 /* Emit a move from SOURCE to DEST in mode MODE.  */
11013 void
rs6000_emit_move(rtx dest,rtx source,machine_mode mode)11014 rs6000_emit_move (rtx dest, rtx source, machine_mode mode)
11015 {
11016   rtx operands[2];
11017   operands[0] = dest;
11018   operands[1] = source;
11019 
11020   if (TARGET_DEBUG_ADDR)
11021     {
11022       fprintf (stderr,
11023 	       "\nrs6000_emit_move: mode = %s, reload_in_progress = %d, "
11024 	       "reload_completed = %d, can_create_pseudos = %d.\ndest:\n",
11025 	       GET_MODE_NAME (mode),
11026 	       reload_in_progress,
11027 	       reload_completed,
11028 	       can_create_pseudo_p ());
11029       debug_rtx (dest);
11030       fprintf (stderr, "source:\n");
11031       debug_rtx (source);
11032     }
11033 
11034   /* Sanity checks.  Check that we get CONST_DOUBLE only when we should.  */
11035   if (CONST_WIDE_INT_P (operands[1])
11036       && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
11037     {
11038       /* This should be fixed with the introduction of CONST_WIDE_INT.  */
11039       gcc_unreachable ();
11040     }
11041 
11042   /* See if we need to special case SImode/SFmode SUBREG moves.  */
11043   if ((mode == SImode || mode == SFmode) && SUBREG_P (source)
11044       && rs6000_emit_move_si_sf_subreg (dest, source, mode))
11045     return;
11046 
11047   /* Check if GCC is setting up a block move that will end up using FP
11048      registers as temporaries.  We must make sure this is acceptable.  */
11049   if (GET_CODE (operands[0]) == MEM
11050       && GET_CODE (operands[1]) == MEM
11051       && mode == DImode
11052       && (rs6000_slow_unaligned_access (DImode, MEM_ALIGN (operands[0]))
11053 	  || rs6000_slow_unaligned_access (DImode, MEM_ALIGN (operands[1])))
11054       && ! (rs6000_slow_unaligned_access (SImode,
11055 					  (MEM_ALIGN (operands[0]) > 32
11056 					   ? 32 : MEM_ALIGN (operands[0])))
11057 	    || rs6000_slow_unaligned_access (SImode,
11058 					     (MEM_ALIGN (operands[1]) > 32
11059 					      ? 32 : MEM_ALIGN (operands[1]))))
11060       && ! MEM_VOLATILE_P (operands [0])
11061       && ! MEM_VOLATILE_P (operands [1]))
11062     {
11063       emit_move_insn (adjust_address (operands[0], SImode, 0),
11064 		      adjust_address (operands[1], SImode, 0));
11065       emit_move_insn (adjust_address (copy_rtx (operands[0]), SImode, 4),
11066 		      adjust_address (copy_rtx (operands[1]), SImode, 4));
11067       return;
11068     }
11069 
11070   if (can_create_pseudo_p () && GET_CODE (operands[0]) == MEM
11071       && !gpc_reg_operand (operands[1], mode))
11072     operands[1] = force_reg (mode, operands[1]);
11073 
11074   /* Recognize the case where operand[1] is a reference to thread-local
11075      data and load its address to a register.  */
11076   if (tls_referenced_p (operands[1]))
11077     {
11078       enum tls_model model;
11079       rtx tmp = operands[1];
11080       rtx addend = NULL;
11081 
11082       if (GET_CODE (tmp) == CONST && GET_CODE (XEXP (tmp, 0)) == PLUS)
11083 	{
11084           addend = XEXP (XEXP (tmp, 0), 1);
11085 	  tmp = XEXP (XEXP (tmp, 0), 0);
11086 	}
11087 
11088       gcc_assert (GET_CODE (tmp) == SYMBOL_REF);
11089       model = SYMBOL_REF_TLS_MODEL (tmp);
11090       gcc_assert (model != 0);
11091 
11092       tmp = rs6000_legitimize_tls_address (tmp, model);
11093       if (addend)
11094 	{
11095 	  tmp = gen_rtx_PLUS (mode, tmp, addend);
11096 	  tmp = force_operand (tmp, operands[0]);
11097 	}
11098       operands[1] = tmp;
11099     }
11100 
11101   /* Handle the case where reload calls us with an invalid address.  */
11102   if (reload_in_progress && mode == Pmode
11103       && (! general_operand (operands[1], mode)
11104 	  || ! nonimmediate_operand (operands[0], mode)))
11105     goto emit_set;
11106 
11107   /* 128-bit constant floating-point values on Darwin should really be loaded
11108      as two parts.  However, this premature splitting is a problem when DFmode
11109      values can go into Altivec registers.  */
11110   if (FLOAT128_IBM_P (mode) && !reg_addr[DFmode].scalar_in_vmx_p
11111       && GET_CODE (operands[1]) == CONST_DOUBLE)
11112     {
11113       rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode, 0),
11114 			simplify_gen_subreg (DFmode, operands[1], mode, 0),
11115 			DFmode);
11116       rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode,
11117 					     GET_MODE_SIZE (DFmode)),
11118 			simplify_gen_subreg (DFmode, operands[1], mode,
11119 					     GET_MODE_SIZE (DFmode)),
11120 			DFmode);
11121       return;
11122     }
11123 
11124   if (reload_in_progress && cfun->machine->sdmode_stack_slot != NULL_RTX)
11125     cfun->machine->sdmode_stack_slot =
11126       eliminate_regs (cfun->machine->sdmode_stack_slot, VOIDmode, NULL_RTX);
11127 
11128 
11129   /* Transform (p0:DD, (SUBREG:DD p1:SD)) to ((SUBREG:SD p0:DD),
11130      p1:SD) if p1 is not of floating point class and p0 is spilled as
11131      we can have no analogous movsd_store for this.  */
11132   if (lra_in_progress && mode == DDmode
11133       && REG_P (operands[0]) && REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER
11134       && reg_preferred_class (REGNO (operands[0])) == NO_REGS
11135       && GET_CODE (operands[1]) == SUBREG && REG_P (SUBREG_REG (operands[1]))
11136       && GET_MODE (SUBREG_REG (operands[1])) == SDmode)
11137     {
11138       enum reg_class cl;
11139       int regno = REGNO (SUBREG_REG (operands[1]));
11140 
11141       if (regno >= FIRST_PSEUDO_REGISTER)
11142 	{
11143 	  cl = reg_preferred_class (regno);
11144 	  regno = cl == NO_REGS ? -1 : ira_class_hard_regs[cl][1];
11145 	}
11146       if (regno >= 0 && ! FP_REGNO_P (regno))
11147 	{
11148 	  mode = SDmode;
11149 	  operands[0] = gen_lowpart_SUBREG (SDmode, operands[0]);
11150 	  operands[1] = SUBREG_REG (operands[1]);
11151 	}
11152     }
11153   if (lra_in_progress
11154       && mode == SDmode
11155       && REG_P (operands[0]) && REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER
11156       && reg_preferred_class (REGNO (operands[0])) == NO_REGS
11157       && (REG_P (operands[1])
11158 	  || (GET_CODE (operands[1]) == SUBREG
11159 	      && REG_P (SUBREG_REG (operands[1])))))
11160     {
11161       int regno = REGNO (GET_CODE (operands[1]) == SUBREG
11162 			 ? SUBREG_REG (operands[1]) : operands[1]);
11163       enum reg_class cl;
11164 
11165       if (regno >= FIRST_PSEUDO_REGISTER)
11166 	{
11167 	  cl = reg_preferred_class (regno);
11168 	  gcc_assert (cl != NO_REGS);
11169 	  regno = ira_class_hard_regs[cl][0];
11170 	}
11171       if (FP_REGNO_P (regno))
11172 	{
11173 	  if (GET_MODE (operands[0]) != DDmode)
11174 	    operands[0] = gen_rtx_SUBREG (DDmode, operands[0], 0);
11175 	  emit_insn (gen_movsd_store (operands[0], operands[1]));
11176 	}
11177       else if (INT_REGNO_P (regno))
11178 	emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
11179       else
11180 	gcc_unreachable();
11181       return;
11182     }
11183   /* Transform ((SUBREG:DD p0:SD), p1:DD) to (p0:SD, (SUBREG:SD
11184      p:DD)) if p0 is not of floating point class and p1 is spilled as
11185      we can have no analogous movsd_load for this.  */
11186   if (lra_in_progress && mode == DDmode
11187       && GET_CODE (operands[0]) == SUBREG && REG_P (SUBREG_REG (operands[0]))
11188       && GET_MODE (SUBREG_REG (operands[0])) == SDmode
11189       && REG_P (operands[1]) && REGNO (operands[1]) >= FIRST_PSEUDO_REGISTER
11190       && reg_preferred_class (REGNO (operands[1])) == NO_REGS)
11191     {
11192       enum reg_class cl;
11193       int regno = REGNO (SUBREG_REG (operands[0]));
11194 
11195       if (regno >= FIRST_PSEUDO_REGISTER)
11196 	{
11197 	  cl = reg_preferred_class (regno);
11198 	  regno = cl == NO_REGS ? -1 : ira_class_hard_regs[cl][0];
11199 	}
11200       if (regno >= 0 && ! FP_REGNO_P (regno))
11201 	{
11202 	  mode = SDmode;
11203 	  operands[0] = SUBREG_REG (operands[0]);
11204 	  operands[1] = gen_lowpart_SUBREG (SDmode, operands[1]);
11205 	}
11206     }
11207   if (lra_in_progress
11208       && mode == SDmode
11209       && (REG_P (operands[0])
11210 	  || (GET_CODE (operands[0]) == SUBREG
11211 	      && REG_P (SUBREG_REG (operands[0]))))
11212       && REG_P (operands[1]) && REGNO (operands[1]) >= FIRST_PSEUDO_REGISTER
11213       && reg_preferred_class (REGNO (operands[1])) == NO_REGS)
11214     {
11215       int regno = REGNO (GET_CODE (operands[0]) == SUBREG
11216 			 ? SUBREG_REG (operands[0]) : operands[0]);
11217       enum reg_class cl;
11218 
11219       if (regno >= FIRST_PSEUDO_REGISTER)
11220 	{
11221 	  cl = reg_preferred_class (regno);
11222 	  gcc_assert (cl != NO_REGS);
11223 	  regno = ira_class_hard_regs[cl][0];
11224 	}
11225       if (FP_REGNO_P (regno))
11226 	{
11227 	  if (GET_MODE (operands[1]) != DDmode)
11228 	    operands[1] = gen_rtx_SUBREG (DDmode, operands[1], 0);
11229 	  emit_insn (gen_movsd_load (operands[0], operands[1]));
11230 	}
11231       else if (INT_REGNO_P (regno))
11232 	emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
11233       else
11234 	gcc_unreachable();
11235       return;
11236     }
11237 
11238   if (reload_in_progress
11239       && mode == SDmode
11240       && cfun->machine->sdmode_stack_slot != NULL_RTX
11241       && MEM_P (operands[0])
11242       && rtx_equal_p (operands[0], cfun->machine->sdmode_stack_slot)
11243       && REG_P (operands[1]))
11244     {
11245       if (FP_REGNO_P (REGNO (operands[1])))
11246 	{
11247 	  rtx mem = adjust_address_nv (operands[0], DDmode, 0);
11248 	  mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
11249 	  emit_insn (gen_movsd_store (mem, operands[1]));
11250 	}
11251       else if (INT_REGNO_P (REGNO (operands[1])))
11252 	{
11253 	  rtx mem = operands[0];
11254 	  if (BYTES_BIG_ENDIAN)
11255 	    mem = adjust_address_nv (mem, mode, 4);
11256 	  mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
11257 	  emit_insn (gen_movsd_hardfloat (mem, operands[1]));
11258 	}
11259       else
11260 	gcc_unreachable();
11261       return;
11262     }
11263   if (reload_in_progress
11264       && mode == SDmode
11265       && REG_P (operands[0])
11266       && MEM_P (operands[1])
11267       && cfun->machine->sdmode_stack_slot != NULL_RTX
11268       && rtx_equal_p (operands[1], cfun->machine->sdmode_stack_slot))
11269     {
11270       if (FP_REGNO_P (REGNO (operands[0])))
11271 	{
11272 	  rtx mem = adjust_address_nv (operands[1], DDmode, 0);
11273 	  mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
11274 	  emit_insn (gen_movsd_load (operands[0], mem));
11275 	}
11276       else if (INT_REGNO_P (REGNO (operands[0])))
11277 	{
11278 	  rtx mem = operands[1];
11279 	  if (BYTES_BIG_ENDIAN)
11280 	    mem = adjust_address_nv (mem, mode, 4);
11281 	  mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
11282 	  emit_insn (gen_movsd_hardfloat (operands[0], mem));
11283 	}
11284       else
11285 	gcc_unreachable();
11286       return;
11287     }
11288 
11289   /* FIXME:  In the long term, this switch statement should go away
11290      and be replaced by a sequence of tests based on things like
11291      mode == Pmode.  */
11292   switch (mode)
11293     {
11294     case E_HImode:
11295     case E_QImode:
11296       if (CONSTANT_P (operands[1])
11297 	  && GET_CODE (operands[1]) != CONST_INT)
11298 	operands[1] = force_const_mem (mode, operands[1]);
11299       break;
11300 
11301     case E_TFmode:
11302     case E_TDmode:
11303     case E_IFmode:
11304     case E_KFmode:
11305       if (FLOAT128_2REG_P (mode))
11306 	rs6000_eliminate_indexed_memrefs (operands);
11307       /* fall through */
11308 
11309     case E_DFmode:
11310     case E_DDmode:
11311     case E_SFmode:
11312     case E_SDmode:
11313       if (CONSTANT_P (operands[1])
11314 	  && ! easy_fp_constant (operands[1], mode))
11315 	operands[1] = force_const_mem (mode, operands[1]);
11316       break;
11317 
11318     case E_V16QImode:
11319     case E_V8HImode:
11320     case E_V4SFmode:
11321     case E_V4SImode:
11322     case E_V4HImode:
11323     case E_V2SFmode:
11324     case E_V2SImode:
11325     case E_V1DImode:
11326     case E_V2DFmode:
11327     case E_V2DImode:
11328     case E_V1TImode:
11329       if (CONSTANT_P (operands[1])
11330 	  && !easy_vector_constant (operands[1], mode))
11331 	operands[1] = force_const_mem (mode, operands[1]);
11332       break;
11333 
11334     case E_SImode:
11335     case E_DImode:
11336       /* Use default pattern for address of ELF small data */
11337       if (TARGET_ELF
11338 	  && mode == Pmode
11339 	  && DEFAULT_ABI == ABI_V4
11340 	  && (GET_CODE (operands[1]) == SYMBOL_REF
11341 	      || GET_CODE (operands[1]) == CONST)
11342 	  && small_data_operand (operands[1], mode))
11343 	{
11344 	  emit_insn (gen_rtx_SET (operands[0], operands[1]));
11345 	  return;
11346 	}
11347 
11348       if (DEFAULT_ABI == ABI_V4
11349 	  && mode == Pmode && mode == SImode
11350 	  && flag_pic == 1 && got_operand (operands[1], mode))
11351 	{
11352 	  emit_insn (gen_movsi_got (operands[0], operands[1]));
11353 	  return;
11354 	}
11355 
11356       if ((TARGET_ELF || DEFAULT_ABI == ABI_DARWIN)
11357 	  && TARGET_NO_TOC
11358 	  && ! flag_pic
11359 	  && mode == Pmode
11360 	  && CONSTANT_P (operands[1])
11361 	  && GET_CODE (operands[1]) != HIGH
11362 	  && GET_CODE (operands[1]) != CONST_INT)
11363 	{
11364 	  rtx target = (!can_create_pseudo_p ()
11365 			? operands[0]
11366 			: gen_reg_rtx (mode));
11367 
11368 	  /* If this is a function address on -mcall-aixdesc,
11369 	     convert it to the address of the descriptor.  */
11370 	  if (DEFAULT_ABI == ABI_AIX
11371 	      && GET_CODE (operands[1]) == SYMBOL_REF
11372 	      && XSTR (operands[1], 0)[0] == '.')
11373 	    {
11374 	      const char *name = XSTR (operands[1], 0);
11375 	      rtx new_ref;
11376 	      while (*name == '.')
11377 		name++;
11378 	      new_ref = gen_rtx_SYMBOL_REF (Pmode, name);
11379 	      CONSTANT_POOL_ADDRESS_P (new_ref)
11380 		= CONSTANT_POOL_ADDRESS_P (operands[1]);
11381 	      SYMBOL_REF_FLAGS (new_ref) = SYMBOL_REF_FLAGS (operands[1]);
11382 	      SYMBOL_REF_USED (new_ref) = SYMBOL_REF_USED (operands[1]);
11383 	      SYMBOL_REF_DATA (new_ref) = SYMBOL_REF_DATA (operands[1]);
11384 	      operands[1] = new_ref;
11385 	    }
11386 
11387 	  if (DEFAULT_ABI == ABI_DARWIN)
11388 	    {
11389 #if TARGET_MACHO
11390 	      if (MACHO_DYNAMIC_NO_PIC_P)
11391 		{
11392 		  /* Take care of any required data indirection.  */
11393 		  operands[1] = rs6000_machopic_legitimize_pic_address (
11394 				  operands[1], mode, operands[0]);
11395 		  if (operands[0] != operands[1])
11396 		    emit_insn (gen_rtx_SET (operands[0], operands[1]));
11397 		  return;
11398 		}
11399 #endif
11400 	      emit_insn (gen_macho_high (target, operands[1]));
11401 	      emit_insn (gen_macho_low (operands[0], target, operands[1]));
11402 	      return;
11403 	    }
11404 
11405 	  emit_insn (gen_elf_high (target, operands[1]));
11406 	  emit_insn (gen_elf_low (operands[0], target, operands[1]));
11407 	  return;
11408 	}
11409 
11410       /* If this is a SYMBOL_REF that refers to a constant pool entry,
11411 	 and we have put it in the TOC, we just need to make a TOC-relative
11412 	 reference to it.  */
11413       if (TARGET_TOC
11414 	  && GET_CODE (operands[1]) == SYMBOL_REF
11415 	  && use_toc_relative_ref (operands[1], mode))
11416 	operands[1] = create_TOC_reference (operands[1], operands[0]);
11417       else if (mode == Pmode
11418 	       && CONSTANT_P (operands[1])
11419 	       && GET_CODE (operands[1]) != HIGH
11420 	       && ((GET_CODE (operands[1]) != CONST_INT
11421 		    && ! easy_fp_constant (operands[1], mode))
11422 		   || (GET_CODE (operands[1]) == CONST_INT
11423 		       && (num_insns_constant (operands[1], mode)
11424 			   > (TARGET_CMODEL != CMODEL_SMALL ? 3 : 2)))
11425 		   || (GET_CODE (operands[0]) == REG
11426 		       && FP_REGNO_P (REGNO (operands[0]))))
11427 	       && !toc_relative_expr_p (operands[1], false)
11428 	       && (TARGET_CMODEL == CMODEL_SMALL
11429 		   || can_create_pseudo_p ()
11430 		   || (REG_P (operands[0])
11431 		       && INT_REG_OK_FOR_BASE_P (operands[0], true))))
11432 	{
11433 
11434 #if TARGET_MACHO
11435 	  /* Darwin uses a special PIC legitimizer.  */
11436 	  if (DEFAULT_ABI == ABI_DARWIN && MACHOPIC_INDIRECT)
11437 	    {
11438 	      operands[1] =
11439 		rs6000_machopic_legitimize_pic_address (operands[1], mode,
11440 							operands[0]);
11441 	      if (operands[0] != operands[1])
11442 		emit_insn (gen_rtx_SET (operands[0], operands[1]));
11443 	      return;
11444 	    }
11445 #endif
11446 
11447 	  /* If we are to limit the number of things we put in the TOC and
11448 	     this is a symbol plus a constant we can add in one insn,
11449 	     just put the symbol in the TOC and add the constant.  Don't do
11450 	     this if reload is in progress.  */
11451 	  if (GET_CODE (operands[1]) == CONST
11452 	      && TARGET_NO_SUM_IN_TOC && ! reload_in_progress
11453 	      && GET_CODE (XEXP (operands[1], 0)) == PLUS
11454 	      && add_operand (XEXP (XEXP (operands[1], 0), 1), mode)
11455 	      && (GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == LABEL_REF
11456 		  || GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == SYMBOL_REF)
11457 	      && ! side_effects_p (operands[0]))
11458 	    {
11459 	      rtx sym =
11460 		force_const_mem (mode, XEXP (XEXP (operands[1], 0), 0));
11461 	      rtx other = XEXP (XEXP (operands[1], 0), 1);
11462 
11463 	      sym = force_reg (mode, sym);
11464 	      emit_insn (gen_add3_insn (operands[0], sym, other));
11465 	      return;
11466 	    }
11467 
11468 	  operands[1] = force_const_mem (mode, operands[1]);
11469 
11470 	  if (TARGET_TOC
11471 	      && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
11472 	      && use_toc_relative_ref (XEXP (operands[1], 0), mode))
11473 	    {
11474 	      rtx tocref = create_TOC_reference (XEXP (operands[1], 0),
11475 						 operands[0]);
11476 	      operands[1] = gen_const_mem (mode, tocref);
11477 	      set_mem_alias_set (operands[1], get_TOC_alias_set ());
11478 	    }
11479 	}
11480       break;
11481 
11482     case E_TImode:
11483       if (!VECTOR_MEM_VSX_P (TImode))
11484 	rs6000_eliminate_indexed_memrefs (operands);
11485       break;
11486 
11487     case E_PTImode:
11488       rs6000_eliminate_indexed_memrefs (operands);
11489       break;
11490 
11491     default:
11492       fatal_insn ("bad move", gen_rtx_SET (dest, source));
11493     }
11494 
11495   /* Above, we may have called force_const_mem which may have returned
11496      an invalid address.  If we can, fix this up; otherwise, reload will
11497      have to deal with it.  */
11498   if (GET_CODE (operands[1]) == MEM && ! reload_in_progress)
11499     operands[1] = validize_mem (operands[1]);
11500 
11501  emit_set:
11502   emit_insn (gen_rtx_SET (operands[0], operands[1]));
11503 }
11504 
11505 /* Return true if a structure, union or array containing FIELD should be
11506    accessed using `BLKMODE'.
11507 
11508    For the SPE, simd types are V2SI, and gcc can be tempted to put the
11509    entire thing in a DI and use subregs to access the internals.
11510    store_bit_field() will force (subreg:DI (reg:V2SI x))'s to the
11511    back-end.  Because a single GPR can hold a V2SI, but not a DI, the
11512    best thing to do is set structs to BLKmode and avoid Severe Tire
11513    Damage.
11514 
11515    On e500 v2, DF and DI modes suffer from the same anomaly.  DF can
11516    fit into 1, whereas DI still needs two.  */
11517 
11518 static bool
rs6000_member_type_forces_blk(const_tree field,machine_mode mode)11519 rs6000_member_type_forces_blk (const_tree field, machine_mode mode)
11520 {
11521   return ((TARGET_SPE && TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE)
11522 	  || (TARGET_E500_DOUBLE && mode == DFmode));
11523 }
11524 
11525 /* Nonzero if we can use a floating-point register to pass this arg.  */
11526 #define USE_FP_FOR_ARG_P(CUM,MODE)		\
11527   (SCALAR_FLOAT_MODE_NOT_VECTOR_P (MODE)		\
11528    && (CUM)->fregno <= FP_ARG_MAX_REG		\
11529    && TARGET_HARD_FLOAT && TARGET_FPRS)
11530 
11531 /* Nonzero if we can use an AltiVec register to pass this arg.  */
11532 #define USE_ALTIVEC_FOR_ARG_P(CUM,MODE,NAMED)			\
11533   (ALTIVEC_OR_VSX_VECTOR_MODE (MODE)				\
11534    && (CUM)->vregno <= ALTIVEC_ARG_MAX_REG			\
11535    && TARGET_ALTIVEC_ABI					\
11536    && (NAMED))
11537 
11538 /* Walk down the type tree of TYPE counting consecutive base elements.
11539    If *MODEP is VOIDmode, then set it to the first valid floating point
11540    or vector type.  If a non-floating point or vector type is found, or
11541    if a floating point or vector type that doesn't match a non-VOIDmode
11542    *MODEP is found, then return -1, otherwise return the count in the
11543    sub-tree.  */
11544 
11545 static int
rs6000_aggregate_candidate(const_tree type,machine_mode * modep)11546 rs6000_aggregate_candidate (const_tree type, machine_mode *modep)
11547 {
11548   machine_mode mode;
11549   HOST_WIDE_INT size;
11550 
11551   switch (TREE_CODE (type))
11552     {
11553     case REAL_TYPE:
11554       mode = TYPE_MODE (type);
11555       if (!SCALAR_FLOAT_MODE_P (mode))
11556 	return -1;
11557 
11558       if (*modep == VOIDmode)
11559 	*modep = mode;
11560 
11561       if (*modep == mode)
11562 	return 1;
11563 
11564       break;
11565 
11566     case COMPLEX_TYPE:
11567       mode = TYPE_MODE (TREE_TYPE (type));
11568       if (!SCALAR_FLOAT_MODE_P (mode))
11569 	return -1;
11570 
11571       if (*modep == VOIDmode)
11572 	*modep = mode;
11573 
11574       if (*modep == mode)
11575 	return 2;
11576 
11577       break;
11578 
11579     case VECTOR_TYPE:
11580       if (!TARGET_ALTIVEC_ABI || !TARGET_ALTIVEC)
11581 	return -1;
11582 
11583       /* Use V4SImode as representative of all 128-bit vector types.  */
11584       size = int_size_in_bytes (type);
11585       switch (size)
11586 	{
11587 	case 16:
11588 	  mode = V4SImode;
11589 	  break;
11590 	default:
11591 	  return -1;
11592 	}
11593 
11594       if (*modep == VOIDmode)
11595 	*modep = mode;
11596 
11597       /* Vector modes are considered to be opaque: two vectors are
11598 	 equivalent for the purposes of being homogeneous aggregates
11599 	 if they are the same size.  */
11600       if (*modep == mode)
11601 	return 1;
11602 
11603       break;
11604 
11605     case ARRAY_TYPE:
11606       {
11607 	int count;
11608 	tree index = TYPE_DOMAIN (type);
11609 
11610 	/* Can't handle incomplete types nor sizes that are not
11611 	   fixed.  */
11612 	if (!COMPLETE_TYPE_P (type)
11613 	    || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
11614 	  return -1;
11615 
11616 	count = rs6000_aggregate_candidate (TREE_TYPE (type), modep);
11617 	if (count == -1
11618 	    || !index
11619 	    || !TYPE_MAX_VALUE (index)
11620 	    || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
11621 	    || !TYPE_MIN_VALUE (index)
11622 	    || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
11623 	    || count < 0)
11624 	  return -1;
11625 
11626 	count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
11627 		      - tree_to_uhwi (TYPE_MIN_VALUE (index)));
11628 
11629 	/* There must be no padding.  */
11630 	if (wi::to_wide (TYPE_SIZE (type))
11631 	    != count * GET_MODE_BITSIZE (*modep))
11632 	  return -1;
11633 
11634 	return count;
11635       }
11636 
11637     case RECORD_TYPE:
11638       {
11639 	int count = 0;
11640 	int sub_count;
11641 	tree field;
11642 
11643 	/* Can't handle incomplete types nor sizes that are not
11644 	   fixed.  */
11645 	if (!COMPLETE_TYPE_P (type)
11646 	    || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
11647 	  return -1;
11648 
11649 	for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
11650 	  {
11651 	    if (TREE_CODE (field) != FIELD_DECL)
11652 	      continue;
11653 
11654 	    sub_count = rs6000_aggregate_candidate (TREE_TYPE (field), modep);
11655 	    if (sub_count < 0)
11656 	      return -1;
11657 	    count += sub_count;
11658 	  }
11659 
11660 	/* There must be no padding.  */
11661 	if (wi::to_wide (TYPE_SIZE (type))
11662 	    != count * GET_MODE_BITSIZE (*modep))
11663 	  return -1;
11664 
11665 	return count;
11666       }
11667 
11668     case UNION_TYPE:
11669     case QUAL_UNION_TYPE:
11670       {
11671 	/* These aren't very interesting except in a degenerate case.  */
11672 	int count = 0;
11673 	int sub_count;
11674 	tree field;
11675 
11676 	/* Can't handle incomplete types nor sizes that are not
11677 	   fixed.  */
11678 	if (!COMPLETE_TYPE_P (type)
11679 	    || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
11680 	  return -1;
11681 
11682 	for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
11683 	  {
11684 	    if (TREE_CODE (field) != FIELD_DECL)
11685 	      continue;
11686 
11687 	    sub_count = rs6000_aggregate_candidate (TREE_TYPE (field), modep);
11688 	    if (sub_count < 0)
11689 	      return -1;
11690 	    count = count > sub_count ? count : sub_count;
11691 	  }
11692 
11693 	/* There must be no padding.  */
11694 	if (wi::to_wide (TYPE_SIZE (type))
11695 	    != count * GET_MODE_BITSIZE (*modep))
11696 	  return -1;
11697 
11698 	return count;
11699       }
11700 
11701     default:
11702       break;
11703     }
11704 
11705   return -1;
11706 }
11707 
11708 /* If an argument, whose type is described by TYPE and MODE, is a homogeneous
11709    float or vector aggregate that shall be passed in FP/vector registers
11710    according to the ELFv2 ABI, return the homogeneous element mode in
11711    *ELT_MODE and the number of elements in *N_ELTS, and return TRUE.
11712 
11713    Otherwise, set *ELT_MODE to MODE and *N_ELTS to 1, and return FALSE.  */
11714 
11715 static bool
rs6000_discover_homogeneous_aggregate(machine_mode mode,const_tree type,machine_mode * elt_mode,int * n_elts)11716 rs6000_discover_homogeneous_aggregate (machine_mode mode, const_tree type,
11717 				       machine_mode *elt_mode,
11718 				       int *n_elts)
11719 {
11720   /* Note that we do not accept complex types at the top level as
11721      homogeneous aggregates; these types are handled via the
11722      targetm.calls.split_complex_arg mechanism.  Complex types
11723      can be elements of homogeneous aggregates, however.  */
11724   if (DEFAULT_ABI == ABI_ELFv2 && type && AGGREGATE_TYPE_P (type))
11725     {
11726       machine_mode field_mode = VOIDmode;
11727       int field_count = rs6000_aggregate_candidate (type, &field_mode);
11728 
11729       if (field_count > 0)
11730 	{
11731 	  int n_regs = (SCALAR_FLOAT_MODE_P (field_mode) ?
11732 			(GET_MODE_SIZE (field_mode) + 7) >> 3 : 1);
11733 
11734 	  /* The ELFv2 ABI allows homogeneous aggregates to occupy
11735 	     up to AGGR_ARG_NUM_REG registers.  */
11736 	  if (field_count * n_regs <= AGGR_ARG_NUM_REG)
11737 	    {
11738 	      if (elt_mode)
11739 		*elt_mode = field_mode;
11740 	      if (n_elts)
11741 		*n_elts = field_count;
11742 	      return true;
11743 	    }
11744 	}
11745     }
11746 
11747   if (elt_mode)
11748     *elt_mode = mode;
11749   if (n_elts)
11750     *n_elts = 1;
11751   return false;
11752 }
11753 
11754 /* Return a nonzero value to say to return the function value in
11755    memory, just as large structures are always returned.  TYPE will be
11756    the data type of the value, and FNTYPE will be the type of the
11757    function doing the returning, or @code{NULL} for libcalls.
11758 
11759    The AIX ABI for the RS/6000 specifies that all structures are
11760    returned in memory.  The Darwin ABI does the same.
11761 
11762    For the Darwin 64 Bit ABI, a function result can be returned in
11763    registers or in memory, depending on the size of the return data
11764    type.  If it is returned in registers, the value occupies the same
11765    registers as it would if it were the first and only function
11766    argument.  Otherwise, the function places its result in memory at
11767    the location pointed to by GPR3.
11768 
11769    The SVR4 ABI specifies that structures <= 8 bytes are returned in r3/r4,
11770    but a draft put them in memory, and GCC used to implement the draft
11771    instead of the final standard.  Therefore, aix_struct_return
11772    controls this instead of DEFAULT_ABI; V.4 targets needing backward
11773    compatibility can change DRAFT_V4_STRUCT_RET to override the
11774    default, and -m switches get the final word.  See
11775    rs6000_option_override_internal for more details.
11776 
11777    The PPC32 SVR4 ABI uses IEEE double extended for long double, if 128-bit
11778    long double support is enabled.  These values are returned in memory.
11779 
11780    int_size_in_bytes returns -1 for variable size objects, which go in
11781    memory always.  The cast to unsigned makes -1 > 8.  */
11782 
11783 static bool
rs6000_return_in_memory(const_tree type,const_tree fntype ATTRIBUTE_UNUSED)11784 rs6000_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
11785 {
11786   /* For the Darwin64 ABI, test if we can fit the return value in regs.  */
11787   if (TARGET_MACHO
11788       && rs6000_darwin64_abi
11789       && TREE_CODE (type) == RECORD_TYPE
11790       && int_size_in_bytes (type) > 0)
11791     {
11792       CUMULATIVE_ARGS valcum;
11793       rtx valret;
11794 
11795       valcum.words = 0;
11796       valcum.fregno = FP_ARG_MIN_REG;
11797       valcum.vregno = ALTIVEC_ARG_MIN_REG;
11798       /* Do a trial code generation as if this were going to be passed
11799 	 as an argument; if any part goes in memory, we return NULL.  */
11800       valret = rs6000_darwin64_record_arg (&valcum, type, true, true);
11801       if (valret)
11802 	return false;
11803       /* Otherwise fall through to more conventional ABI rules.  */
11804     }
11805 
11806   /* The ELFv2 ABI returns homogeneous VFP aggregates in registers */
11807   if (rs6000_discover_homogeneous_aggregate (TYPE_MODE (type), type,
11808 					     NULL, NULL))
11809     return false;
11810 
11811   /* The ELFv2 ABI returns aggregates up to 16B in registers */
11812   if (DEFAULT_ABI == ABI_ELFv2 && AGGREGATE_TYPE_P (type)
11813       && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) <= 16)
11814     return false;
11815 
11816   if (AGGREGATE_TYPE_P (type)
11817       && (aix_struct_return
11818 	  || (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8))
11819     return true;
11820 
11821   /* Allow -maltivec -mabi=no-altivec without warning.  Altivec vector
11822      modes only exist for GCC vector types if -maltivec.  */
11823   if (TARGET_32BIT && !TARGET_ALTIVEC_ABI
11824       && ALTIVEC_VECTOR_MODE (TYPE_MODE (type)))
11825     return false;
11826 
11827   /* Return synthetic vectors in memory.  */
11828   if (TREE_CODE (type) == VECTOR_TYPE
11829       && int_size_in_bytes (type) > (TARGET_ALTIVEC_ABI ? 16 : 8))
11830     {
11831       static bool warned_for_return_big_vectors = false;
11832       if (!warned_for_return_big_vectors)
11833 	{
11834 	  warning (OPT_Wpsabi, "GCC vector returned by reference: "
11835 		   "non-standard ABI extension with no compatibility guarantee");
11836 	  warned_for_return_big_vectors = true;
11837 	}
11838       return true;
11839     }
11840 
11841   if (DEFAULT_ABI == ABI_V4 && TARGET_IEEEQUAD
11842       && FLOAT128_IEEE_P (TYPE_MODE (type)))
11843     return true;
11844 
11845   return false;
11846 }
11847 
11848 /* Specify whether values returned in registers should be at the most
11849    significant end of a register.  We want aggregates returned by
11850    value to match the way aggregates are passed to functions.  */
11851 
11852 static bool
rs6000_return_in_msb(const_tree valtype)11853 rs6000_return_in_msb (const_tree valtype)
11854 {
11855   return (DEFAULT_ABI == ABI_ELFv2
11856 	  && BYTES_BIG_ENDIAN
11857 	  && AGGREGATE_TYPE_P (valtype)
11858 	  && rs6000_function_arg_padding (TYPE_MODE (valtype),
11859 					  valtype) == PAD_UPWARD);
11860 }
11861 
11862 #ifdef HAVE_AS_GNU_ATTRIBUTE
11863 /* Return TRUE if a call to function FNDECL may be one that
11864    potentially affects the function calling ABI of the object file.  */
11865 
11866 static bool
call_ABI_of_interest(tree fndecl)11867 call_ABI_of_interest (tree fndecl)
11868 {
11869   if (rs6000_gnu_attr && symtab->state == EXPANSION)
11870     {
11871       struct cgraph_node *c_node;
11872 
11873       /* Libcalls are always interesting.  */
11874       if (fndecl == NULL_TREE)
11875 	return true;
11876 
11877       /* Any call to an external function is interesting.  */
11878       if (DECL_EXTERNAL (fndecl))
11879 	return true;
11880 
11881       /* Interesting functions that we are emitting in this object file.  */
11882       c_node = cgraph_node::get (fndecl);
11883       c_node = c_node->ultimate_alias_target ();
11884       return !c_node->only_called_directly_p ();
11885     }
11886   return false;
11887 }
11888 #endif
11889 
11890 /* Initialize a variable CUM of type CUMULATIVE_ARGS
11891    for a call to a function whose data type is FNTYPE.
11892    For a library call, FNTYPE is 0 and RETURN_MODE the return value mode.
11893 
11894    For incoming args we set the number of arguments in the prototype large
11895    so we never return a PARALLEL.  */
11896 
11897 void
init_cumulative_args(CUMULATIVE_ARGS * cum,tree fntype,rtx libname ATTRIBUTE_UNUSED,int incoming,int libcall,int n_named_args,tree fndecl ATTRIBUTE_UNUSED,machine_mode return_mode ATTRIBUTE_UNUSED)11898 init_cumulative_args (CUMULATIVE_ARGS *cum, tree fntype,
11899 		      rtx libname ATTRIBUTE_UNUSED, int incoming,
11900 		      int libcall, int n_named_args,
11901 		      tree fndecl ATTRIBUTE_UNUSED,
11902 		      machine_mode return_mode ATTRIBUTE_UNUSED)
11903 {
11904   static CUMULATIVE_ARGS zero_cumulative;
11905 
11906   *cum = zero_cumulative;
11907   cum->words = 0;
11908   cum->fregno = FP_ARG_MIN_REG;
11909   cum->vregno = ALTIVEC_ARG_MIN_REG;
11910   cum->prototype = (fntype && prototype_p (fntype));
11911   cum->call_cookie = ((DEFAULT_ABI == ABI_V4 && libcall)
11912 		      ? CALL_LIBCALL : CALL_NORMAL);
11913   cum->sysv_gregno = GP_ARG_MIN_REG;
11914   cum->stdarg = stdarg_p (fntype);
11915   cum->libcall = libcall;
11916 
11917   cum->nargs_prototype = 0;
11918   if (incoming || cum->prototype)
11919     cum->nargs_prototype = n_named_args;
11920 
11921   /* Check for a longcall attribute.  */
11922   if ((!fntype && rs6000_default_long_calls)
11923       || (fntype
11924 	  && lookup_attribute ("longcall", TYPE_ATTRIBUTES (fntype))
11925 	  && !lookup_attribute ("shortcall", TYPE_ATTRIBUTES (fntype))))
11926     cum->call_cookie |= CALL_LONG;
11927 
11928   if (TARGET_DEBUG_ARG)
11929     {
11930       fprintf (stderr, "\ninit_cumulative_args:");
11931       if (fntype)
11932 	{
11933 	  tree ret_type = TREE_TYPE (fntype);
11934 	  fprintf (stderr, " ret code = %s,",
11935 		   get_tree_code_name (TREE_CODE (ret_type)));
11936 	}
11937 
11938       if (cum->call_cookie & CALL_LONG)
11939 	fprintf (stderr, " longcall,");
11940 
11941       fprintf (stderr, " proto = %d, nargs = %d\n",
11942 	       cum->prototype, cum->nargs_prototype);
11943     }
11944 
11945 #ifdef HAVE_AS_GNU_ATTRIBUTE
11946   if (TARGET_ELF && (TARGET_64BIT || DEFAULT_ABI == ABI_V4))
11947     {
11948       cum->escapes = call_ABI_of_interest (fndecl);
11949       if (cum->escapes)
11950 	{
11951 	  tree return_type;
11952 
11953 	  if (fntype)
11954 	    {
11955 	      return_type = TREE_TYPE (fntype);
11956 	      return_mode = TYPE_MODE (return_type);
11957 	    }
11958 	  else
11959 	    return_type = lang_hooks.types.type_for_mode (return_mode, 0);
11960 
11961 	  if (return_type != NULL)
11962 	    {
11963 	      if (TREE_CODE (return_type) == RECORD_TYPE
11964 		  && TYPE_TRANSPARENT_AGGR (return_type))
11965 		{
11966 		  return_type = TREE_TYPE (first_field (return_type));
11967 		  return_mode = TYPE_MODE (return_type);
11968 		}
11969 	      if (AGGREGATE_TYPE_P (return_type)
11970 		  && ((unsigned HOST_WIDE_INT) int_size_in_bytes (return_type)
11971 		      <= 8))
11972 		rs6000_returns_struct = true;
11973 	    }
11974 	  if (SCALAR_FLOAT_MODE_P (return_mode))
11975 	    {
11976 	      rs6000_passes_float = true;
11977 	      if ((HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE || TARGET_64BIT)
11978 		  && (FLOAT128_IBM_P (return_mode)
11979 		      || FLOAT128_IEEE_P (return_mode)
11980 		      || (return_type != NULL
11981 			  && (TYPE_MAIN_VARIANT (return_type)
11982 			      == long_double_type_node))))
11983 		rs6000_passes_long_double = true;
11984 	    }
11985 	  if (ALTIVEC_OR_VSX_VECTOR_MODE (return_mode)
11986 	      || SPE_VECTOR_MODE (return_mode))
11987 	    rs6000_passes_vector = true;
11988 	}
11989     }
11990 #endif
11991 
11992   if (fntype
11993       && !TARGET_ALTIVEC
11994       && TARGET_ALTIVEC_ABI
11995       && ALTIVEC_VECTOR_MODE (TYPE_MODE (TREE_TYPE (fntype))))
11996     {
11997       error ("cannot return value in vector register because"
11998 	     " altivec instructions are disabled, use -maltivec"
11999 	     " to enable them");
12000     }
12001 }
12002 
12003 /* The mode the ABI uses for a word.  This is not the same as word_mode
12004    for -m32 -mpowerpc64.  This is used to implement various target hooks.  */
12005 
12006 static scalar_int_mode
rs6000_abi_word_mode(void)12007 rs6000_abi_word_mode (void)
12008 {
12009   return TARGET_32BIT ? SImode : DImode;
12010 }
12011 
12012 /* Implement the TARGET_OFFLOAD_OPTIONS hook.  */
12013 static char *
rs6000_offload_options(void)12014 rs6000_offload_options (void)
12015 {
12016   if (TARGET_64BIT)
12017     return xstrdup ("-foffload-abi=lp64");
12018   else
12019     return xstrdup ("-foffload-abi=ilp32");
12020 }
12021 
12022 /* On rs6000, function arguments are promoted, as are function return
12023    values.  */
12024 
12025 static machine_mode
rs6000_promote_function_mode(const_tree type ATTRIBUTE_UNUSED,machine_mode mode,int * punsignedp ATTRIBUTE_UNUSED,const_tree,int)12026 rs6000_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
12027 			      machine_mode mode,
12028 			      int *punsignedp ATTRIBUTE_UNUSED,
12029 			      const_tree, int)
12030 {
12031   PROMOTE_MODE (mode, *punsignedp, type);
12032 
12033   return mode;
12034 }
12035 
12036 /* Return true if TYPE must be passed on the stack and not in registers.  */
12037 
12038 static bool
rs6000_must_pass_in_stack(machine_mode mode,const_tree type)12039 rs6000_must_pass_in_stack (machine_mode mode, const_tree type)
12040 {
12041   if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2 || TARGET_64BIT)
12042     return must_pass_in_stack_var_size (mode, type);
12043   else
12044     return must_pass_in_stack_var_size_or_pad (mode, type);
12045 }
12046 
12047 static inline bool
is_complex_IBM_long_double(machine_mode mode)12048 is_complex_IBM_long_double (machine_mode mode)
12049 {
12050   return mode == ICmode || (!TARGET_IEEEQUAD && mode == TCmode);
12051 }
12052 
12053 /* Whether ABI_V4 passes MODE args to a function in floating point
12054    registers.  */
12055 
12056 static bool
abi_v4_pass_in_fpr(machine_mode mode)12057 abi_v4_pass_in_fpr (machine_mode mode)
12058 {
12059   if (!TARGET_FPRS || !TARGET_HARD_FLOAT)
12060     return false;
12061   if (TARGET_SINGLE_FLOAT && mode == SFmode)
12062     return true;
12063   if (TARGET_DOUBLE_FLOAT && mode == DFmode)
12064     return true;
12065   /* ABI_V4 passes complex IBM long double in 8 gprs.
12066      Stupid, but we can't change the ABI now.  */
12067   if (is_complex_IBM_long_double (mode))
12068     return false;
12069   if (FLOAT128_2REG_P (mode))
12070     return true;
12071   if (DECIMAL_FLOAT_MODE_P (mode))
12072     return true;
12073   return false;
12074 }
12075 
12076 /* Implement TARGET_FUNCTION_ARG_PADDING
12077 
12078    For the AIX ABI structs are always stored left shifted in their
12079    argument slot.  */
12080 
12081 static pad_direction
rs6000_function_arg_padding(machine_mode mode,const_tree type)12082 rs6000_function_arg_padding (machine_mode mode, const_tree type)
12083 {
12084 #ifndef AGGREGATE_PADDING_FIXED
12085 #define AGGREGATE_PADDING_FIXED 0
12086 #endif
12087 #ifndef AGGREGATES_PAD_UPWARD_ALWAYS
12088 #define AGGREGATES_PAD_UPWARD_ALWAYS 0
12089 #endif
12090 
12091   if (!AGGREGATE_PADDING_FIXED)
12092     {
12093       /* GCC used to pass structures of the same size as integer types as
12094 	 if they were in fact integers, ignoring TARGET_FUNCTION_ARG_PADDING.
12095 	 i.e. Structures of size 1 or 2 (or 4 when TARGET_64BIT) were
12096 	 passed padded downward, except that -mstrict-align further
12097 	 muddied the water in that multi-component structures of 2 and 4
12098 	 bytes in size were passed padded upward.
12099 
12100 	 The following arranges for best compatibility with previous
12101 	 versions of gcc, but removes the -mstrict-align dependency.  */
12102       if (BYTES_BIG_ENDIAN)
12103 	{
12104 	  HOST_WIDE_INT size = 0;
12105 
12106 	  if (mode == BLKmode)
12107 	    {
12108 	      if (type && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
12109 		size = int_size_in_bytes (type);
12110 	    }
12111 	  else
12112 	    size = GET_MODE_SIZE (mode);
12113 
12114 	  if (size == 1 || size == 2 || size == 4)
12115 	    return PAD_DOWNWARD;
12116 	}
12117       return PAD_UPWARD;
12118     }
12119 
12120   if (AGGREGATES_PAD_UPWARD_ALWAYS)
12121     {
12122       if (type != 0 && AGGREGATE_TYPE_P (type))
12123 	return PAD_UPWARD;
12124     }
12125 
12126   /* Fall back to the default.  */
12127   return default_function_arg_padding (mode, type);
12128 }
12129 
12130 /* If defined, a C expression that gives the alignment boundary, in bits,
12131    of an argument with the specified mode and type.  If it is not defined,
12132    PARM_BOUNDARY is used for all arguments.
12133 
12134    V.4 wants long longs and doubles to be double word aligned.  Just
12135    testing the mode size is a boneheaded way to do this as it means
12136    that other types such as complex int are also double word aligned.
12137    However, we're stuck with this because changing the ABI might break
12138    existing library interfaces.
12139 
12140    Doubleword align SPE vectors.
12141    Quadword align Altivec/VSX vectors.
12142    Quadword align large synthetic vector types.   */
12143 
12144 static unsigned int
rs6000_function_arg_boundary(machine_mode mode,const_tree type)12145 rs6000_function_arg_boundary (machine_mode mode, const_tree type)
12146 {
12147   machine_mode elt_mode;
12148   int n_elts;
12149 
12150   rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
12151 
12152   if (DEFAULT_ABI == ABI_V4
12153       && (GET_MODE_SIZE (mode) == 8
12154 	  || (TARGET_HARD_FLOAT
12155 	      && TARGET_FPRS
12156 	      && !is_complex_IBM_long_double (mode)
12157 	      && FLOAT128_2REG_P (mode))))
12158     return 64;
12159   else if (FLOAT128_VECTOR_P (mode))
12160     return 128;
12161   else if (SPE_VECTOR_MODE (mode)
12162 	   || (type && TREE_CODE (type) == VECTOR_TYPE
12163 	       && int_size_in_bytes (type) >= 8
12164 	       && int_size_in_bytes (type) < 16))
12165     return 64;
12166   else if (ALTIVEC_OR_VSX_VECTOR_MODE (elt_mode)
12167 	   || (type && TREE_CODE (type) == VECTOR_TYPE
12168 	       && int_size_in_bytes (type) >= 16))
12169     return 128;
12170 
12171   /* Aggregate types that need > 8 byte alignment are quadword-aligned
12172      in the parameter area in the ELFv2 ABI, and in the AIX ABI unless
12173      -mcompat-align-parm is used.  */
12174   if (((DEFAULT_ABI == ABI_AIX && !rs6000_compat_align_parm)
12175        || DEFAULT_ABI == ABI_ELFv2)
12176       && type && TYPE_ALIGN (type) > 64)
12177     {
12178       /* "Aggregate" means any AGGREGATE_TYPE except for single-element
12179          or homogeneous float/vector aggregates here.  We already handled
12180          vector aggregates above, but still need to check for float here. */
12181       bool aggregate_p = (AGGREGATE_TYPE_P (type)
12182 			  && !SCALAR_FLOAT_MODE_P (elt_mode));
12183 
12184       /* We used to check for BLKmode instead of the above aggregate type
12185 	 check.  Warn when this results in any difference to the ABI.  */
12186       if (aggregate_p != (mode == BLKmode))
12187 	{
12188 	  static bool warned;
12189 	  if (!warned && warn_psabi)
12190 	    {
12191 	      warned = true;
12192 	      inform (input_location,
12193 		      "the ABI of passing aggregates with %d-byte alignment"
12194 		      " has changed in GCC 5",
12195 		      (int) TYPE_ALIGN (type) / BITS_PER_UNIT);
12196 	    }
12197 	}
12198 
12199       if (aggregate_p)
12200 	return 128;
12201     }
12202 
12203   /* Similar for the Darwin64 ABI.  Note that for historical reasons we
12204      implement the "aggregate type" check as a BLKmode check here; this
12205      means certain aggregate types are in fact not aligned.  */
12206   if (TARGET_MACHO && rs6000_darwin64_abi
12207       && mode == BLKmode
12208       && type && TYPE_ALIGN (type) > 64)
12209     return 128;
12210 
12211   return PARM_BOUNDARY;
12212 }
12213 
12214 /* The offset in words to the start of the parameter save area.  */
12215 
12216 static unsigned int
rs6000_parm_offset(void)12217 rs6000_parm_offset (void)
12218 {
12219   return (DEFAULT_ABI == ABI_V4 ? 2
12220 	  : DEFAULT_ABI == ABI_ELFv2 ? 4
12221 	  : 6);
12222 }
12223 
12224 /* For a function parm of MODE and TYPE, return the starting word in
12225    the parameter area.  NWORDS of the parameter area are already used.  */
12226 
12227 static unsigned int
rs6000_parm_start(machine_mode mode,const_tree type,unsigned int nwords)12228 rs6000_parm_start (machine_mode mode, const_tree type,
12229 		   unsigned int nwords)
12230 {
12231   unsigned int align;
12232 
12233   align = rs6000_function_arg_boundary (mode, type) / PARM_BOUNDARY - 1;
12234   return nwords + (-(rs6000_parm_offset () + nwords) & align);
12235 }
12236 
12237 /* Compute the size (in words) of a function argument.  */
12238 
12239 static unsigned long
rs6000_arg_size(machine_mode mode,const_tree type)12240 rs6000_arg_size (machine_mode mode, const_tree type)
12241 {
12242   unsigned long size;
12243 
12244   if (mode != BLKmode)
12245     size = GET_MODE_SIZE (mode);
12246   else
12247     size = int_size_in_bytes (type);
12248 
12249   if (TARGET_32BIT)
12250     return (size + 3) >> 2;
12251   else
12252     return (size + 7) >> 3;
12253 }
12254 
12255 /* Use this to flush pending int fields.  */
12256 
12257 static void
rs6000_darwin64_record_arg_advance_flush(CUMULATIVE_ARGS * cum,HOST_WIDE_INT bitpos,int final)12258 rs6000_darwin64_record_arg_advance_flush (CUMULATIVE_ARGS *cum,
12259 					  HOST_WIDE_INT bitpos, int final)
12260 {
12261   unsigned int startbit, endbit;
12262   int intregs, intoffset;
12263 
12264   /* Handle the situations where a float is taking up the first half
12265      of the GPR, and the other half is empty (typically due to
12266      alignment restrictions). We can detect this by a 8-byte-aligned
12267      int field, or by seeing that this is the final flush for this
12268      argument. Count the word and continue on.  */
12269   if (cum->floats_in_gpr == 1
12270       && (cum->intoffset % 64 == 0
12271 	  || (cum->intoffset == -1 && final)))
12272     {
12273       cum->words++;
12274       cum->floats_in_gpr = 0;
12275     }
12276 
12277   if (cum->intoffset == -1)
12278     return;
12279 
12280   intoffset = cum->intoffset;
12281   cum->intoffset = -1;
12282   cum->floats_in_gpr = 0;
12283 
12284   if (intoffset % BITS_PER_WORD != 0)
12285     {
12286       unsigned int bits = BITS_PER_WORD - intoffset % BITS_PER_WORD;
12287       if (!int_mode_for_size (bits, 0).exists ())
12288 	{
12289 	  /* We couldn't find an appropriate mode, which happens,
12290 	     e.g., in packed structs when there are 3 bytes to load.
12291 	     Back intoffset back to the beginning of the word in this
12292 	     case.  */
12293 	  intoffset = ROUND_DOWN (intoffset, BITS_PER_WORD);
12294 	}
12295     }
12296 
12297   startbit = ROUND_DOWN (intoffset, BITS_PER_WORD);
12298   endbit = ROUND_UP (bitpos, BITS_PER_WORD);
12299   intregs = (endbit - startbit) / BITS_PER_WORD;
12300   cum->words += intregs;
12301   /* words should be unsigned. */
12302   if ((unsigned)cum->words < (endbit/BITS_PER_WORD))
12303     {
12304       int pad = (endbit/BITS_PER_WORD) - cum->words;
12305       cum->words += pad;
12306     }
12307 }
12308 
12309 /* The darwin64 ABI calls for us to recurse down through structs,
12310    looking for elements passed in registers.  Unfortunately, we have
12311    to track int register count here also because of misalignments
12312    in powerpc alignment mode.  */
12313 
12314 static void
rs6000_darwin64_record_arg_advance_recurse(CUMULATIVE_ARGS * cum,const_tree type,HOST_WIDE_INT startbitpos)12315 rs6000_darwin64_record_arg_advance_recurse (CUMULATIVE_ARGS *cum,
12316 					    const_tree type,
12317 					    HOST_WIDE_INT startbitpos)
12318 {
12319   tree f;
12320 
12321   for (f = TYPE_FIELDS (type); f ; f = DECL_CHAIN (f))
12322     if (TREE_CODE (f) == FIELD_DECL)
12323       {
12324 	HOST_WIDE_INT bitpos = startbitpos;
12325 	tree ftype = TREE_TYPE (f);
12326 	machine_mode mode;
12327 	if (ftype == error_mark_node)
12328 	  continue;
12329 	mode = TYPE_MODE (ftype);
12330 
12331 	if (DECL_SIZE (f) != 0
12332 	    && tree_fits_uhwi_p (bit_position (f)))
12333 	  bitpos += int_bit_position (f);
12334 
12335 	/* ??? FIXME: else assume zero offset.  */
12336 
12337 	if (TREE_CODE (ftype) == RECORD_TYPE)
12338 	  rs6000_darwin64_record_arg_advance_recurse (cum, ftype, bitpos);
12339 	else if (USE_FP_FOR_ARG_P (cum, mode))
12340 	  {
12341 	    unsigned n_fpregs = (GET_MODE_SIZE (mode) + 7) >> 3;
12342 	    rs6000_darwin64_record_arg_advance_flush (cum, bitpos, 0);
12343 	    cum->fregno += n_fpregs;
12344 	    /* Single-precision floats present a special problem for
12345 	       us, because they are smaller than an 8-byte GPR, and so
12346 	       the structure-packing rules combined with the standard
12347 	       varargs behavior mean that we want to pack float/float
12348 	       and float/int combinations into a single register's
12349 	       space. This is complicated by the arg advance flushing,
12350 	       which works on arbitrarily large groups of int-type
12351 	       fields.  */
12352 	    if (mode == SFmode)
12353 	      {
12354 		if (cum->floats_in_gpr == 1)
12355 		  {
12356 		    /* Two floats in a word; count the word and reset
12357 		       the float count.  */
12358 		    cum->words++;
12359 		    cum->floats_in_gpr = 0;
12360 		  }
12361 		else if (bitpos % 64 == 0)
12362 		  {
12363 		    /* A float at the beginning of an 8-byte word;
12364 		       count it and put off adjusting cum->words until
12365 		       we see if a arg advance flush is going to do it
12366 		       for us.  */
12367 		    cum->floats_in_gpr++;
12368 		  }
12369 		else
12370 		  {
12371 		    /* The float is at the end of a word, preceded
12372 		       by integer fields, so the arg advance flush
12373 		       just above has already set cum->words and
12374 		       everything is taken care of.  */
12375 		  }
12376 	      }
12377 	    else
12378 	      cum->words += n_fpregs;
12379 	  }
12380 	else if (USE_ALTIVEC_FOR_ARG_P (cum, mode, 1))
12381 	  {
12382 	    rs6000_darwin64_record_arg_advance_flush (cum, bitpos, 0);
12383 	    cum->vregno++;
12384 	    cum->words += 2;
12385 	  }
12386 	else if (cum->intoffset == -1)
12387 	  cum->intoffset = bitpos;
12388       }
12389 }
12390 
12391 /* Check for an item that needs to be considered specially under the darwin 64
12392    bit ABI.  These are record types where the mode is BLK or the structure is
12393    8 bytes in size.  */
12394 static int
rs6000_darwin64_struct_check_p(machine_mode mode,const_tree type)12395 rs6000_darwin64_struct_check_p (machine_mode mode, const_tree type)
12396 {
12397   return rs6000_darwin64_abi
12398 	 && ((mode == BLKmode
12399 	      && TREE_CODE (type) == RECORD_TYPE
12400 	      && int_size_in_bytes (type) > 0)
12401 	  || (type && TREE_CODE (type) == RECORD_TYPE
12402 	      && int_size_in_bytes (type) == 8)) ? 1 : 0;
12403 }
12404 
12405 /* Update the data in CUM to advance over an argument
12406    of mode MODE and data type TYPE.
12407    (TYPE is null for libcalls where that information may not be available.)
12408 
12409    Note that for args passed by reference, function_arg will be called
12410    with MODE and TYPE set to that of the pointer to the arg, not the arg
12411    itself.  */
12412 
12413 static void
rs6000_function_arg_advance_1(CUMULATIVE_ARGS * cum,machine_mode mode,const_tree type,bool named,int depth)12414 rs6000_function_arg_advance_1 (CUMULATIVE_ARGS *cum, machine_mode mode,
12415 			       const_tree type, bool named, int depth)
12416 {
12417   machine_mode elt_mode;
12418   int n_elts;
12419 
12420   rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
12421 
12422   /* Only tick off an argument if we're not recursing.  */
12423   if (depth == 0)
12424     cum->nargs_prototype--;
12425 
12426 #ifdef HAVE_AS_GNU_ATTRIBUTE
12427   if (TARGET_ELF && (TARGET_64BIT || DEFAULT_ABI == ABI_V4)
12428       && cum->escapes)
12429     {
12430       if (SCALAR_FLOAT_MODE_P (mode))
12431 	{
12432 	  rs6000_passes_float = true;
12433 	  if ((HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE || TARGET_64BIT)
12434 	      && (FLOAT128_IBM_P (mode)
12435 		  || FLOAT128_IEEE_P (mode)
12436 		  || (type != NULL
12437 		      && TYPE_MAIN_VARIANT (type) == long_double_type_node)))
12438 	    rs6000_passes_long_double = true;
12439 	}
12440       if ((named && ALTIVEC_OR_VSX_VECTOR_MODE (mode))
12441 	  || (SPE_VECTOR_MODE (mode)
12442 	      && !cum->stdarg
12443 	      && cum->sysv_gregno <= GP_ARG_MAX_REG))
12444 	rs6000_passes_vector = true;
12445     }
12446 #endif
12447 
12448   if (TARGET_ALTIVEC_ABI
12449       && (ALTIVEC_OR_VSX_VECTOR_MODE (elt_mode)
12450 	  || (type && TREE_CODE (type) == VECTOR_TYPE
12451 	      && int_size_in_bytes (type) == 16)))
12452     {
12453       bool stack = false;
12454 
12455       if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named))
12456 	{
12457 	  cum->vregno += n_elts;
12458 
12459 	  if (!TARGET_ALTIVEC)
12460 	    error ("cannot pass argument in vector register because"
12461 		   " altivec instructions are disabled, use -maltivec"
12462 		   " to enable them");
12463 
12464 	  /* PowerPC64 Linux and AIX allocate GPRs for a vector argument
12465 	     even if it is going to be passed in a vector register.
12466 	     Darwin does the same for variable-argument functions.  */
12467 	  if (((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
12468 	       && TARGET_64BIT)
12469 	      || (cum->stdarg && DEFAULT_ABI != ABI_V4))
12470 	    stack = true;
12471 	}
12472       else
12473 	stack = true;
12474 
12475       if (stack)
12476 	{
12477 	  int align;
12478 
12479 	  /* Vector parameters must be 16-byte aligned.  In 32-bit
12480 	     mode this means we need to take into account the offset
12481 	     to the parameter save area.  In 64-bit mode, they just
12482 	     have to start on an even word, since the parameter save
12483 	     area is 16-byte aligned.  */
12484 	  if (TARGET_32BIT)
12485 	    align = -(rs6000_parm_offset () + cum->words) & 3;
12486 	  else
12487 	    align = cum->words & 1;
12488 	  cum->words += align + rs6000_arg_size (mode, type);
12489 
12490 	  if (TARGET_DEBUG_ARG)
12491 	    {
12492 	      fprintf (stderr, "function_adv: words = %2d, align=%d, ",
12493 		       cum->words, align);
12494 	      fprintf (stderr, "nargs = %4d, proto = %d, mode = %4s\n",
12495 		       cum->nargs_prototype, cum->prototype,
12496 		       GET_MODE_NAME (mode));
12497 	    }
12498 	}
12499     }
12500   else if (TARGET_SPE_ABI && TARGET_SPE && SPE_VECTOR_MODE (mode)
12501 	   && !cum->stdarg
12502 	   && cum->sysv_gregno <= GP_ARG_MAX_REG)
12503     cum->sysv_gregno++;
12504 
12505   else if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type))
12506     {
12507       int size = int_size_in_bytes (type);
12508       /* Variable sized types have size == -1 and are
12509 	 treated as if consisting entirely of ints.
12510 	 Pad to 16 byte boundary if needed.  */
12511       if (TYPE_ALIGN (type) >= 2 * BITS_PER_WORD
12512 	  && (cum->words % 2) != 0)
12513 	cum->words++;
12514       /* For varargs, we can just go up by the size of the struct. */
12515       if (!named)
12516 	cum->words += (size + 7) / 8;
12517       else
12518 	{
12519 	  /* It is tempting to say int register count just goes up by
12520 	     sizeof(type)/8, but this is wrong in a case such as
12521 	     { int; double; int; } [powerpc alignment].  We have to
12522 	     grovel through the fields for these too.  */
12523 	  cum->intoffset = 0;
12524 	  cum->floats_in_gpr = 0;
12525 	  rs6000_darwin64_record_arg_advance_recurse (cum, type, 0);
12526 	  rs6000_darwin64_record_arg_advance_flush (cum,
12527 						    size * BITS_PER_UNIT, 1);
12528 	}
12529 	  if (TARGET_DEBUG_ARG)
12530 	    {
12531 	      fprintf (stderr, "function_adv: words = %2d, align=%d, size=%d",
12532 		       cum->words, TYPE_ALIGN (type), size);
12533 	      fprintf (stderr,
12534 	           "nargs = %4d, proto = %d, mode = %4s (darwin64 abi)\n",
12535 		       cum->nargs_prototype, cum->prototype,
12536 		       GET_MODE_NAME (mode));
12537 	    }
12538     }
12539   else if (DEFAULT_ABI == ABI_V4)
12540     {
12541       if (abi_v4_pass_in_fpr (mode))
12542 	{
12543 	  /* _Decimal128 must use an even/odd register pair.  This assumes
12544 	     that the register number is odd when fregno is odd.  */
12545 	  if (mode == TDmode && (cum->fregno % 2) == 1)
12546 	    cum->fregno++;
12547 
12548 	  if (cum->fregno + (FLOAT128_2REG_P (mode) ? 1 : 0)
12549 	      <= FP_ARG_V4_MAX_REG)
12550 	    cum->fregno += (GET_MODE_SIZE (mode) + 7) >> 3;
12551 	  else
12552 	    {
12553 	      cum->fregno = FP_ARG_V4_MAX_REG + 1;
12554 	      if (mode == DFmode || FLOAT128_IBM_P (mode)
12555 		  || mode == DDmode || mode == TDmode)
12556 		cum->words += cum->words & 1;
12557 	      cum->words += rs6000_arg_size (mode, type);
12558 	    }
12559 	}
12560       else
12561 	{
12562 	  int n_words = rs6000_arg_size (mode, type);
12563 	  int gregno = cum->sysv_gregno;
12564 
12565 	  /* Long long and SPE vectors are put in (r3,r4), (r5,r6),
12566 	     (r7,r8) or (r9,r10).  As does any other 2 word item such
12567 	     as complex int due to a historical mistake.  */
12568 	  if (n_words == 2)
12569 	    gregno += (1 - gregno) & 1;
12570 
12571 	  /* Multi-reg args are not split between registers and stack.  */
12572 	  if (gregno + n_words - 1 > GP_ARG_MAX_REG)
12573 	    {
12574 	      /* Long long and SPE vectors are aligned on the stack.
12575 		 So are other 2 word items such as complex int due to
12576 		 a historical mistake.  */
12577 	      if (n_words == 2)
12578 		cum->words += cum->words & 1;
12579 	      cum->words += n_words;
12580 	    }
12581 
12582 	  /* Note: continuing to accumulate gregno past when we've started
12583 	     spilling to the stack indicates the fact that we've started
12584 	     spilling to the stack to expand_builtin_saveregs.  */
12585 	  cum->sysv_gregno = gregno + n_words;
12586 	}
12587 
12588       if (TARGET_DEBUG_ARG)
12589 	{
12590 	  fprintf (stderr, "function_adv: words = %2d, fregno = %2d, ",
12591 		   cum->words, cum->fregno);
12592 	  fprintf (stderr, "gregno = %2d, nargs = %4d, proto = %d, ",
12593 		   cum->sysv_gregno, cum->nargs_prototype, cum->prototype);
12594 	  fprintf (stderr, "mode = %4s, named = %d\n",
12595 		   GET_MODE_NAME (mode), named);
12596 	}
12597     }
12598   else
12599     {
12600       int n_words = rs6000_arg_size (mode, type);
12601       int start_words = cum->words;
12602       int align_words = rs6000_parm_start (mode, type, start_words);
12603 
12604       cum->words = align_words + n_words;
12605 
12606       if (SCALAR_FLOAT_MODE_P (elt_mode) && TARGET_HARD_FLOAT && TARGET_FPRS)
12607 	{
12608 	  /* _Decimal128 must be passed in an even/odd float register pair.
12609 	     This assumes that the register number is odd when fregno is
12610 	     odd.  */
12611 	  if (elt_mode == TDmode && (cum->fregno % 2) == 1)
12612 	    cum->fregno++;
12613 	  cum->fregno += n_elts * ((GET_MODE_SIZE (elt_mode) + 7) >> 3);
12614 	}
12615 
12616       if (TARGET_DEBUG_ARG)
12617 	{
12618 	  fprintf (stderr, "function_adv: words = %2d, fregno = %2d, ",
12619 		   cum->words, cum->fregno);
12620 	  fprintf (stderr, "nargs = %4d, proto = %d, mode = %4s, ",
12621 		   cum->nargs_prototype, cum->prototype, GET_MODE_NAME (mode));
12622 	  fprintf (stderr, "named = %d, align = %d, depth = %d\n",
12623 		   named, align_words - start_words, depth);
12624 	}
12625     }
12626 }
12627 
12628 static void
rs6000_function_arg_advance(cumulative_args_t cum,machine_mode mode,const_tree type,bool named)12629 rs6000_function_arg_advance (cumulative_args_t cum, machine_mode mode,
12630 			     const_tree type, bool named)
12631 {
12632   rs6000_function_arg_advance_1 (get_cumulative_args (cum), mode, type, named,
12633 				 0);
12634 }
12635 
12636 static rtx
spe_build_register_parallel(machine_mode mode,int gregno)12637 spe_build_register_parallel (machine_mode mode, int gregno)
12638 {
12639   rtx r1, r3, r5, r7;
12640 
12641   switch (mode)
12642     {
12643     case E_DFmode:
12644       r1 = gen_rtx_REG (DImode, gregno);
12645       r1 = gen_rtx_EXPR_LIST (VOIDmode, r1, const0_rtx);
12646       return gen_rtx_PARALLEL (mode, gen_rtvec (1, r1));
12647 
12648     case E_DCmode:
12649     case E_TFmode:
12650       r1 = gen_rtx_REG (DImode, gregno);
12651       r1 = gen_rtx_EXPR_LIST (VOIDmode, r1, const0_rtx);
12652       r3 = gen_rtx_REG (DImode, gregno + 2);
12653       r3 = gen_rtx_EXPR_LIST (VOIDmode, r3, GEN_INT (8));
12654       return gen_rtx_PARALLEL (mode, gen_rtvec (2, r1, r3));
12655 
12656     case E_TCmode:
12657       r1 = gen_rtx_REG (DImode, gregno);
12658       r1 = gen_rtx_EXPR_LIST (VOIDmode, r1, const0_rtx);
12659       r3 = gen_rtx_REG (DImode, gregno + 2);
12660       r3 = gen_rtx_EXPR_LIST (VOIDmode, r3, GEN_INT (8));
12661       r5 = gen_rtx_REG (DImode, gregno + 4);
12662       r5 = gen_rtx_EXPR_LIST (VOIDmode, r5, GEN_INT (16));
12663       r7 = gen_rtx_REG (DImode, gregno + 6);
12664       r7 = gen_rtx_EXPR_LIST (VOIDmode, r7, GEN_INT (24));
12665       return gen_rtx_PARALLEL (mode, gen_rtvec (4, r1, r3, r5, r7));
12666 
12667     default:
12668       gcc_unreachable ();
12669     }
12670 }
12671 
12672 /* Determine where to put a SIMD argument on the SPE.  */
12673 static rtx
rs6000_spe_function_arg(const CUMULATIVE_ARGS * cum,machine_mode mode,const_tree type)12674 rs6000_spe_function_arg (const CUMULATIVE_ARGS *cum, machine_mode mode,
12675 			 const_tree type)
12676 {
12677   int gregno = cum->sysv_gregno;
12678 
12679   /* On E500 v2, double arithmetic is done on the full 64-bit GPR, but
12680      are passed and returned in a pair of GPRs for ABI compatibility.  */
12681   if (TARGET_E500_DOUBLE && (mode == DFmode || mode == TFmode
12682 			     || mode == DCmode || mode == TCmode))
12683     {
12684       int n_words = rs6000_arg_size (mode, type);
12685 
12686       /* Doubles go in an odd/even register pair (r5/r6, etc).  */
12687       if (mode == DFmode)
12688 	gregno += (1 - gregno) & 1;
12689 
12690       /* Multi-reg args are not split between registers and stack.  */
12691       if (gregno + n_words - 1 > GP_ARG_MAX_REG)
12692 	return NULL_RTX;
12693 
12694       return spe_build_register_parallel (mode, gregno);
12695     }
12696   if (cum->stdarg)
12697     {
12698       int n_words = rs6000_arg_size (mode, type);
12699 
12700       /* SPE vectors are put in odd registers.  */
12701       if (n_words == 2 && (gregno & 1) == 0)
12702 	gregno += 1;
12703 
12704       if (gregno + n_words - 1 <= GP_ARG_MAX_REG)
12705 	{
12706 	  rtx r1, r2;
12707 	  machine_mode m = SImode;
12708 
12709 	  r1 = gen_rtx_REG (m, gregno);
12710 	  r1 = gen_rtx_EXPR_LIST (m, r1, const0_rtx);
12711 	  r2 = gen_rtx_REG (m, gregno + 1);
12712 	  r2 = gen_rtx_EXPR_LIST (m, r2, GEN_INT (4));
12713 	  return gen_rtx_PARALLEL (mode, gen_rtvec (2, r1, r2));
12714 	}
12715       else
12716 	return NULL_RTX;
12717     }
12718   else
12719     {
12720       if (gregno <= GP_ARG_MAX_REG)
12721 	return gen_rtx_REG (mode, gregno);
12722       else
12723 	return NULL_RTX;
12724     }
12725 }
12726 
12727 /* A subroutine of rs6000_darwin64_record_arg.  Assign the bits of the
12728    structure between cum->intoffset and bitpos to integer registers.  */
12729 
12730 static void
rs6000_darwin64_record_arg_flush(CUMULATIVE_ARGS * cum,HOST_WIDE_INT bitpos,rtx rvec[],int * k)12731 rs6000_darwin64_record_arg_flush (CUMULATIVE_ARGS *cum,
12732 				  HOST_WIDE_INT bitpos, rtx rvec[], int *k)
12733 {
12734   machine_mode mode;
12735   unsigned int regno;
12736   unsigned int startbit, endbit;
12737   int this_regno, intregs, intoffset;
12738   rtx reg;
12739 
12740   if (cum->intoffset == -1)
12741     return;
12742 
12743   intoffset = cum->intoffset;
12744   cum->intoffset = -1;
12745 
12746   /* If this is the trailing part of a word, try to only load that
12747      much into the register.  Otherwise load the whole register.  Note
12748      that in the latter case we may pick up unwanted bits.  It's not a
12749      problem at the moment but may wish to revisit.  */
12750 
12751   if (intoffset % BITS_PER_WORD != 0)
12752     {
12753       unsigned int bits = BITS_PER_WORD - intoffset % BITS_PER_WORD;
12754       if (!int_mode_for_size (bits, 0).exists (&mode))
12755 	{
12756 	  /* We couldn't find an appropriate mode, which happens,
12757 	     e.g., in packed structs when there are 3 bytes to load.
12758 	     Back intoffset back to the beginning of the word in this
12759 	     case.  */
12760 	  intoffset = ROUND_DOWN (intoffset, BITS_PER_WORD);
12761 	  mode = word_mode;
12762 	}
12763     }
12764   else
12765     mode = word_mode;
12766 
12767   startbit = ROUND_DOWN (intoffset, BITS_PER_WORD);
12768   endbit = ROUND_UP (bitpos, BITS_PER_WORD);
12769   intregs = (endbit - startbit) / BITS_PER_WORD;
12770   this_regno = cum->words + intoffset / BITS_PER_WORD;
12771 
12772   if (intregs > 0 && intregs > GP_ARG_NUM_REG - this_regno)
12773     cum->use_stack = 1;
12774 
12775   intregs = MIN (intregs, GP_ARG_NUM_REG - this_regno);
12776   if (intregs <= 0)
12777     return;
12778 
12779   intoffset /= BITS_PER_UNIT;
12780   do
12781     {
12782       regno = GP_ARG_MIN_REG + this_regno;
12783       reg = gen_rtx_REG (mode, regno);
12784       rvec[(*k)++] =
12785 	gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (intoffset));
12786 
12787       this_regno += 1;
12788       intoffset = (intoffset | (UNITS_PER_WORD-1)) + 1;
12789       mode = word_mode;
12790       intregs -= 1;
12791     }
12792   while (intregs > 0);
12793 }
12794 
12795 /* Recursive workhorse for the following.  */
12796 
12797 static void
rs6000_darwin64_record_arg_recurse(CUMULATIVE_ARGS * cum,const_tree type,HOST_WIDE_INT startbitpos,rtx rvec[],int * k)12798 rs6000_darwin64_record_arg_recurse (CUMULATIVE_ARGS *cum, const_tree type,
12799 				    HOST_WIDE_INT startbitpos, rtx rvec[],
12800 				    int *k)
12801 {
12802   tree f;
12803 
12804   for (f = TYPE_FIELDS (type); f ; f = DECL_CHAIN (f))
12805     if (TREE_CODE (f) == FIELD_DECL)
12806       {
12807 	HOST_WIDE_INT bitpos = startbitpos;
12808 	tree ftype = TREE_TYPE (f);
12809 	machine_mode mode;
12810 	if (ftype == error_mark_node)
12811 	  continue;
12812 	mode = TYPE_MODE (ftype);
12813 
12814 	if (DECL_SIZE (f) != 0
12815 	    && tree_fits_uhwi_p (bit_position (f)))
12816 	  bitpos += int_bit_position (f);
12817 
12818 	/* ??? FIXME: else assume zero offset.  */
12819 
12820 	if (TREE_CODE (ftype) == RECORD_TYPE)
12821 	  rs6000_darwin64_record_arg_recurse (cum, ftype, bitpos, rvec, k);
12822 	else if (cum->named && USE_FP_FOR_ARG_P (cum, mode))
12823 	  {
12824 	    unsigned n_fpreg = (GET_MODE_SIZE (mode) + 7) >> 3;
12825 #if 0
12826 	    switch (mode)
12827 	      {
12828 	      case E_SCmode: mode = SFmode; break;
12829 	      case E_DCmode: mode = DFmode; break;
12830 	      case E_TCmode: mode = TFmode; break;
12831 	      default: break;
12832 	      }
12833 #endif
12834 	    rs6000_darwin64_record_arg_flush (cum, bitpos, rvec, k);
12835 	    if (cum->fregno + n_fpreg > FP_ARG_MAX_REG + 1)
12836 	      {
12837 		gcc_assert (cum->fregno == FP_ARG_MAX_REG
12838 			    && (mode == TFmode || mode == TDmode));
12839 		/* Long double or _Decimal128 split over regs and memory.  */
12840 		mode = DECIMAL_FLOAT_MODE_P (mode) ? DDmode : DFmode;
12841 		cum->use_stack=1;
12842 	      }
12843 	    rvec[(*k)++]
12844 	      = gen_rtx_EXPR_LIST (VOIDmode,
12845 				   gen_rtx_REG (mode, cum->fregno++),
12846 				   GEN_INT (bitpos / BITS_PER_UNIT));
12847 	    if (FLOAT128_2REG_P (mode))
12848 	      cum->fregno++;
12849 	  }
12850 	else if (cum->named && USE_ALTIVEC_FOR_ARG_P (cum, mode, 1))
12851 	  {
12852 	    rs6000_darwin64_record_arg_flush (cum, bitpos, rvec, k);
12853 	    rvec[(*k)++]
12854 	      = gen_rtx_EXPR_LIST (VOIDmode,
12855 				   gen_rtx_REG (mode, cum->vregno++),
12856 				   GEN_INT (bitpos / BITS_PER_UNIT));
12857 	  }
12858 	else if (cum->intoffset == -1)
12859 	  cum->intoffset = bitpos;
12860       }
12861 }
12862 
12863 /* For the darwin64 ABI, we want to construct a PARALLEL consisting of
12864    the register(s) to be used for each field and subfield of a struct
12865    being passed by value, along with the offset of where the
12866    register's value may be found in the block.  FP fields go in FP
12867    register, vector fields go in vector registers, and everything
12868    else goes in int registers, packed as in memory.
12869 
12870    This code is also used for function return values.  RETVAL indicates
12871    whether this is the case.
12872 
12873    Much of this is taken from the SPARC V9 port, which has a similar
12874    calling convention.  */
12875 
12876 static rtx
rs6000_darwin64_record_arg(CUMULATIVE_ARGS * orig_cum,const_tree type,bool named,bool retval)12877 rs6000_darwin64_record_arg (CUMULATIVE_ARGS *orig_cum, const_tree type,
12878 			    bool named, bool retval)
12879 {
12880   rtx rvec[FIRST_PSEUDO_REGISTER];
12881   int k = 1, kbase = 1;
12882   HOST_WIDE_INT typesize = int_size_in_bytes (type);
12883   /* This is a copy; modifications are not visible to our caller.  */
12884   CUMULATIVE_ARGS copy_cum = *orig_cum;
12885   CUMULATIVE_ARGS *cum = &copy_cum;
12886 
12887   /* Pad to 16 byte boundary if needed.  */
12888   if (!retval && TYPE_ALIGN (type) >= 2 * BITS_PER_WORD
12889       && (cum->words % 2) != 0)
12890     cum->words++;
12891 
12892   cum->intoffset = 0;
12893   cum->use_stack = 0;
12894   cum->named = named;
12895 
12896   /* Put entries into rvec[] for individual FP and vector fields, and
12897      for the chunks of memory that go in int regs.  Note we start at
12898      element 1; 0 is reserved for an indication of using memory, and
12899      may or may not be filled in below. */
12900   rs6000_darwin64_record_arg_recurse (cum, type, /* startbit pos= */ 0, rvec, &k);
12901   rs6000_darwin64_record_arg_flush (cum, typesize * BITS_PER_UNIT, rvec, &k);
12902 
12903   /* If any part of the struct went on the stack put all of it there.
12904      This hack is because the generic code for
12905      FUNCTION_ARG_PARTIAL_NREGS cannot handle cases where the register
12906      parts of the struct are not at the beginning.  */
12907   if (cum->use_stack)
12908     {
12909       if (retval)
12910 	return NULL_RTX;    /* doesn't go in registers at all */
12911       kbase = 0;
12912       rvec[0] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
12913     }
12914   if (k > 1 || cum->use_stack)
12915     return gen_rtx_PARALLEL (BLKmode, gen_rtvec_v (k - kbase, &rvec[kbase]));
12916   else
12917     return NULL_RTX;
12918 }
12919 
12920 /* Determine where to place an argument in 64-bit mode with 32-bit ABI.  */
12921 
12922 static rtx
rs6000_mixed_function_arg(machine_mode mode,const_tree type,int align_words)12923 rs6000_mixed_function_arg (machine_mode mode, const_tree type,
12924 			   int align_words)
12925 {
12926   int n_units;
12927   int i, k;
12928   rtx rvec[GP_ARG_NUM_REG + 1];
12929 
12930   if (align_words >= GP_ARG_NUM_REG)
12931     return NULL_RTX;
12932 
12933   n_units = rs6000_arg_size (mode, type);
12934 
12935   /* Optimize the simple case where the arg fits in one gpr, except in
12936      the case of BLKmode due to assign_parms assuming that registers are
12937      BITS_PER_WORD wide.  */
12938   if (n_units == 0
12939       || (n_units == 1 && mode != BLKmode))
12940     return gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
12941 
12942   k = 0;
12943   if (align_words + n_units > GP_ARG_NUM_REG)
12944     /* Not all of the arg fits in gprs.  Say that it goes in memory too,
12945        using a magic NULL_RTX component.
12946        This is not strictly correct.  Only some of the arg belongs in
12947        memory, not all of it.  However, the normal scheme using
12948        function_arg_partial_nregs can result in unusual subregs, eg.
12949        (subreg:SI (reg:DF) 4), which are not handled well.  The code to
12950        store the whole arg to memory is often more efficient than code
12951        to store pieces, and we know that space is available in the right
12952        place for the whole arg.  */
12953     rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
12954 
12955   i = 0;
12956   do
12957     {
12958       rtx r = gen_rtx_REG (SImode, GP_ARG_MIN_REG + align_words);
12959       rtx off = GEN_INT (i++ * 4);
12960       rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
12961     }
12962   while (++align_words < GP_ARG_NUM_REG && --n_units != 0);
12963 
12964   return gen_rtx_PARALLEL (mode, gen_rtvec_v (k, rvec));
12965 }
12966 
12967 /* We have an argument of MODE and TYPE that goes into FPRs or VRs,
12968    but must also be copied into the parameter save area starting at
12969    offset ALIGN_WORDS.  Fill in RVEC with the elements corresponding
12970    to the GPRs and/or memory.  Return the number of elements used.  */
12971 
12972 static int
rs6000_psave_function_arg(machine_mode mode,const_tree type,int align_words,rtx * rvec)12973 rs6000_psave_function_arg (machine_mode mode, const_tree type,
12974 			   int align_words, rtx *rvec)
12975 {
12976   int k = 0;
12977 
12978   if (align_words < GP_ARG_NUM_REG)
12979     {
12980       int n_words = rs6000_arg_size (mode, type);
12981 
12982       if (align_words + n_words > GP_ARG_NUM_REG
12983 	  || mode == BLKmode
12984 	  || (TARGET_32BIT && TARGET_POWERPC64))
12985 	{
12986 	  /* If this is partially on the stack, then we only
12987 	     include the portion actually in registers here.  */
12988 	  machine_mode rmode = TARGET_32BIT ? SImode : DImode;
12989 	  int i = 0;
12990 
12991 	  if (align_words + n_words > GP_ARG_NUM_REG)
12992 	    {
12993 	      /* Not all of the arg fits in gprs.  Say that it goes in memory
12994 		 too, using a magic NULL_RTX component.  Also see comment in
12995 		 rs6000_mixed_function_arg for why the normal
12996 		 function_arg_partial_nregs scheme doesn't work in this case. */
12997 	      rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
12998 	    }
12999 
13000 	  do
13001 	    {
13002 	      rtx r = gen_rtx_REG (rmode, GP_ARG_MIN_REG + align_words);
13003 	      rtx off = GEN_INT (i++ * GET_MODE_SIZE (rmode));
13004 	      rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
13005 	    }
13006 	  while (++align_words < GP_ARG_NUM_REG && --n_words != 0);
13007 	}
13008       else
13009 	{
13010 	  /* The whole arg fits in gprs.  */
13011 	  rtx r = gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
13012 	  rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, const0_rtx);
13013 	}
13014     }
13015   else
13016     {
13017       /* It's entirely in memory.  */
13018       rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
13019     }
13020 
13021   return k;
13022 }
13023 
13024 /* RVEC is a vector of K components of an argument of mode MODE.
13025    Construct the final function_arg return value from it.  */
13026 
13027 static rtx
rs6000_finish_function_arg(machine_mode mode,rtx * rvec,int k)13028 rs6000_finish_function_arg (machine_mode mode, rtx *rvec, int k)
13029 {
13030   gcc_assert (k >= 1);
13031 
13032   /* Avoid returning a PARALLEL in the trivial cases.  */
13033   if (k == 1)
13034     {
13035       if (XEXP (rvec[0], 0) == NULL_RTX)
13036 	return NULL_RTX;
13037 
13038       if (GET_MODE (XEXP (rvec[0], 0)) == mode)
13039 	return XEXP (rvec[0], 0);
13040     }
13041 
13042   return gen_rtx_PARALLEL (mode, gen_rtvec_v (k, rvec));
13043 }
13044 
13045 /* Determine where to put an argument to a function.
13046    Value is zero to push the argument on the stack,
13047    or a hard register in which to store the argument.
13048 
13049    MODE is the argument's machine mode.
13050    TYPE is the data type of the argument (as a tree).
13051     This is null for libcalls where that information may
13052     not be available.
13053    CUM is a variable of type CUMULATIVE_ARGS which gives info about
13054     the preceding args and about the function being called.  It is
13055     not modified in this routine.
13056    NAMED is nonzero if this argument is a named parameter
13057     (otherwise it is an extra parameter matching an ellipsis).
13058 
13059    On RS/6000 the first eight words of non-FP are normally in registers
13060    and the rest are pushed.  Under AIX, the first 13 FP args are in registers.
13061    Under V.4, the first 8 FP args are in registers.
13062 
13063    If this is floating-point and no prototype is specified, we use
13064    both an FP and integer register (or possibly FP reg and stack).  Library
13065    functions (when CALL_LIBCALL is set) always have the proper types for args,
13066    so we can pass the FP value just in one register.  emit_library_function
13067    doesn't support PARALLEL anyway.
13068 
13069    Note that for args passed by reference, function_arg will be called
13070    with MODE and TYPE set to that of the pointer to the arg, not the arg
13071    itself.  */
13072 
13073 static rtx
rs6000_function_arg(cumulative_args_t cum_v,machine_mode mode,const_tree type,bool named)13074 rs6000_function_arg (cumulative_args_t cum_v, machine_mode mode,
13075 		     const_tree type, bool named)
13076 {
13077   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
13078   enum rs6000_abi abi = DEFAULT_ABI;
13079   machine_mode elt_mode;
13080   int n_elts;
13081 
13082   /* Return a marker to indicate whether CR1 needs to set or clear the
13083      bit that V.4 uses to say fp args were passed in registers.
13084      Assume that we don't need the marker for software floating point,
13085      or compiler generated library calls.  */
13086   if (mode == VOIDmode)
13087     {
13088       if (abi == ABI_V4
13089 	  && (cum->call_cookie & CALL_LIBCALL) == 0
13090 	  && (cum->stdarg
13091 	      || (cum->nargs_prototype < 0
13092 		  && (cum->prototype || TARGET_NO_PROTOTYPE))))
13093 	{
13094 	  /* For the SPE, we need to crxor CR6 always.  */
13095 	  if (TARGET_SPE_ABI)
13096 	    return GEN_INT (cum->call_cookie | CALL_V4_SET_FP_ARGS);
13097 	  else if (TARGET_HARD_FLOAT && TARGET_FPRS)
13098 	    return GEN_INT (cum->call_cookie
13099 			    | ((cum->fregno == FP_ARG_MIN_REG)
13100 			       ? CALL_V4_SET_FP_ARGS
13101 			       : CALL_V4_CLEAR_FP_ARGS));
13102 	}
13103 
13104       return GEN_INT (cum->call_cookie & ~CALL_LIBCALL);
13105     }
13106 
13107   rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
13108 
13109   if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type))
13110     {
13111       rtx rslt = rs6000_darwin64_record_arg (cum, type, named, /*retval= */false);
13112       if (rslt != NULL_RTX)
13113 	return rslt;
13114       /* Else fall through to usual handling.  */
13115     }
13116 
13117   if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named))
13118     {
13119       rtx rvec[GP_ARG_NUM_REG + AGGR_ARG_NUM_REG + 1];
13120       rtx r, off;
13121       int i, k = 0;
13122 
13123       /* Do we also need to pass this argument in the parameter save area?
13124 	 Library support functions for IEEE 128-bit are assumed to not need the
13125 	 value passed both in GPRs and in vector registers.  */
13126       if (TARGET_64BIT && !cum->prototype
13127 	  && (!cum->libcall || !FLOAT128_VECTOR_P (elt_mode)))
13128 	{
13129 	  int align_words = ROUND_UP (cum->words, 2);
13130 	  k = rs6000_psave_function_arg (mode, type, align_words, rvec);
13131 	}
13132 
13133       /* Describe where this argument goes in the vector registers.  */
13134       for (i = 0; i < n_elts && cum->vregno + i <= ALTIVEC_ARG_MAX_REG; i++)
13135 	{
13136 	  r = gen_rtx_REG (elt_mode, cum->vregno + i);
13137 	  off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
13138 	  rvec[k++] =  gen_rtx_EXPR_LIST (VOIDmode, r, off);
13139 	}
13140 
13141       return rs6000_finish_function_arg (mode, rvec, k);
13142     }
13143   else if (TARGET_ALTIVEC_ABI
13144 	   && (ALTIVEC_OR_VSX_VECTOR_MODE (mode)
13145 	       || (type && TREE_CODE (type) == VECTOR_TYPE
13146 		   && int_size_in_bytes (type) == 16)))
13147     {
13148       if (named || abi == ABI_V4)
13149 	return NULL_RTX;
13150       else
13151 	{
13152 	  /* Vector parameters to varargs functions under AIX or Darwin
13153 	     get passed in memory and possibly also in GPRs.  */
13154 	  int align, align_words, n_words;
13155 	  machine_mode part_mode;
13156 
13157 	  /* Vector parameters must be 16-byte aligned.  In 32-bit
13158 	     mode this means we need to take into account the offset
13159 	     to the parameter save area.  In 64-bit mode, they just
13160 	     have to start on an even word, since the parameter save
13161 	     area is 16-byte aligned.  */
13162 	  if (TARGET_32BIT)
13163 	    align = -(rs6000_parm_offset () + cum->words) & 3;
13164 	  else
13165 	    align = cum->words & 1;
13166 	  align_words = cum->words + align;
13167 
13168 	  /* Out of registers?  Memory, then.  */
13169 	  if (align_words >= GP_ARG_NUM_REG)
13170 	    return NULL_RTX;
13171 
13172 	  if (TARGET_32BIT && TARGET_POWERPC64)
13173 	    return rs6000_mixed_function_arg (mode, type, align_words);
13174 
13175 	  /* The vector value goes in GPRs.  Only the part of the
13176 	     value in GPRs is reported here.  */
13177 	  part_mode = mode;
13178 	  n_words = rs6000_arg_size (mode, type);
13179 	  if (align_words + n_words > GP_ARG_NUM_REG)
13180 	    /* Fortunately, there are only two possibilities, the value
13181 	       is either wholly in GPRs or half in GPRs and half not.  */
13182 	    part_mode = DImode;
13183 
13184 	  return gen_rtx_REG (part_mode, GP_ARG_MIN_REG + align_words);
13185 	}
13186     }
13187   else if (TARGET_SPE_ABI && TARGET_SPE
13188 	   && (SPE_VECTOR_MODE (mode)
13189 	       || (TARGET_E500_DOUBLE && (mode == DFmode
13190 					  || mode == DCmode
13191 					  || mode == TFmode
13192 					  || mode == TCmode))))
13193     return rs6000_spe_function_arg (cum, mode, type);
13194 
13195   else if (abi == ABI_V4)
13196     {
13197       if (abi_v4_pass_in_fpr (mode))
13198 	{
13199 	  /* _Decimal128 must use an even/odd register pair.  This assumes
13200 	     that the register number is odd when fregno is odd.  */
13201 	  if (mode == TDmode && (cum->fregno % 2) == 1)
13202 	    cum->fregno++;
13203 
13204 	  if (cum->fregno + (FLOAT128_2REG_P (mode) ? 1 : 0)
13205 	      <= FP_ARG_V4_MAX_REG)
13206 	    return gen_rtx_REG (mode, cum->fregno);
13207 	  else
13208 	    return NULL_RTX;
13209 	}
13210       else
13211 	{
13212 	  int n_words = rs6000_arg_size (mode, type);
13213 	  int gregno = cum->sysv_gregno;
13214 
13215 	  /* Long long and SPE vectors are put in (r3,r4), (r5,r6),
13216 	     (r7,r8) or (r9,r10).  As does any other 2 word item such
13217 	     as complex int due to a historical mistake.  */
13218 	  if (n_words == 2)
13219 	    gregno += (1 - gregno) & 1;
13220 
13221 	  /* Multi-reg args are not split between registers and stack.  */
13222 	  if (gregno + n_words - 1 > GP_ARG_MAX_REG)
13223 	    return NULL_RTX;
13224 
13225 	  if (TARGET_32BIT && TARGET_POWERPC64)
13226 	    return rs6000_mixed_function_arg (mode, type,
13227 					      gregno - GP_ARG_MIN_REG);
13228 	  return gen_rtx_REG (mode, gregno);
13229 	}
13230     }
13231   else
13232     {
13233       int align_words = rs6000_parm_start (mode, type, cum->words);
13234 
13235       /* _Decimal128 must be passed in an even/odd float register pair.
13236 	 This assumes that the register number is odd when fregno is odd.  */
13237       if (elt_mode == TDmode && (cum->fregno % 2) == 1)
13238 	cum->fregno++;
13239 
13240       if (USE_FP_FOR_ARG_P (cum, elt_mode))
13241 	{
13242 	  rtx rvec[GP_ARG_NUM_REG + AGGR_ARG_NUM_REG + 1];
13243 	  rtx r, off;
13244 	  int i, k = 0;
13245 	  unsigned long n_fpreg = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
13246 	  int fpr_words;
13247 
13248 	  /* Do we also need to pass this argument in the parameter
13249 	     save area?  */
13250 	  if (type && (cum->nargs_prototype <= 0
13251 		       || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
13252 			   && TARGET_XL_COMPAT
13253 			   && align_words >= GP_ARG_NUM_REG)))
13254 	    k = rs6000_psave_function_arg (mode, type, align_words, rvec);
13255 
13256 	  /* Describe where this argument goes in the fprs.  */
13257 	  for (i = 0; i < n_elts
13258 		      && cum->fregno + i * n_fpreg <= FP_ARG_MAX_REG; i++)
13259 	    {
13260 	      /* Check if the argument is split over registers and memory.
13261 		 This can only ever happen for long double or _Decimal128;
13262 		 complex types are handled via split_complex_arg.  */
13263 	      machine_mode fmode = elt_mode;
13264 	      if (cum->fregno + (i + 1) * n_fpreg > FP_ARG_MAX_REG + 1)
13265 		{
13266 		  gcc_assert (FLOAT128_2REG_P (fmode));
13267 		  fmode = DECIMAL_FLOAT_MODE_P (fmode) ? DDmode : DFmode;
13268 		}
13269 
13270 	      r = gen_rtx_REG (fmode, cum->fregno + i * n_fpreg);
13271 	      off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
13272 	      rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
13273 	    }
13274 
13275 	  /* If there were not enough FPRs to hold the argument, the rest
13276 	     usually goes into memory.  However, if the current position
13277 	     is still within the register parameter area, a portion may
13278 	     actually have to go into GPRs.
13279 
13280 	     Note that it may happen that the portion of the argument
13281 	     passed in the first "half" of the first GPR was already
13282 	     passed in the last FPR as well.
13283 
13284 	     For unnamed arguments, we already set up GPRs to cover the
13285 	     whole argument in rs6000_psave_function_arg, so there is
13286 	     nothing further to do at this point.  */
13287 	  fpr_words = (i * GET_MODE_SIZE (elt_mode)) / (TARGET_32BIT ? 4 : 8);
13288 	  if (i < n_elts && align_words + fpr_words < GP_ARG_NUM_REG
13289 	      && cum->nargs_prototype > 0)
13290             {
13291 	      static bool warned;
13292 
13293 	      machine_mode rmode = TARGET_32BIT ? SImode : DImode;
13294 	      int n_words = rs6000_arg_size (mode, type);
13295 
13296 	      align_words += fpr_words;
13297 	      n_words -= fpr_words;
13298 
13299 	      do
13300 		{
13301 		  r = gen_rtx_REG (rmode, GP_ARG_MIN_REG + align_words);
13302 		  off = GEN_INT (fpr_words++ * GET_MODE_SIZE (rmode));
13303 		  rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
13304 		}
13305 	      while (++align_words < GP_ARG_NUM_REG && --n_words != 0);
13306 
13307 	      if (!warned && warn_psabi)
13308 		{
13309 		  warned = true;
13310 		  inform (input_location,
13311 			  "the ABI of passing homogeneous float aggregates"
13312 			  " has changed in GCC 5");
13313 		}
13314 	    }
13315 
13316 	  return rs6000_finish_function_arg (mode, rvec, k);
13317 	}
13318       else if (align_words < GP_ARG_NUM_REG)
13319 	{
13320 	  if (TARGET_32BIT && TARGET_POWERPC64)
13321 	    return rs6000_mixed_function_arg (mode, type, align_words);
13322 
13323 	  return gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
13324 	}
13325       else
13326 	return NULL_RTX;
13327     }
13328 }
13329 
13330 /* For an arg passed partly in registers and partly in memory, this is
13331    the number of bytes passed in registers.  For args passed entirely in
13332    registers or entirely in memory, zero.  When an arg is described by a
13333    PARALLEL, perhaps using more than one register type, this function
13334    returns the number of bytes used by the first element of the PARALLEL.  */
13335 
13336 static int
rs6000_arg_partial_bytes(cumulative_args_t cum_v,machine_mode mode,tree type,bool named)13337 rs6000_arg_partial_bytes (cumulative_args_t cum_v, machine_mode mode,
13338 			  tree type, bool named)
13339 {
13340   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
13341   bool passed_in_gprs = true;
13342   int ret = 0;
13343   int align_words;
13344   machine_mode elt_mode;
13345   int n_elts;
13346 
13347   rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
13348 
13349   if (DEFAULT_ABI == ABI_V4)
13350     return 0;
13351 
13352   if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named))
13353     {
13354       /* If we are passing this arg in the fixed parameter save area (gprs or
13355          memory) as well as VRs, we do not use the partial bytes mechanism;
13356          instead, rs6000_function_arg will return a PARALLEL including a memory
13357          element as necessary.  Library support functions for IEEE 128-bit are
13358          assumed to not need the value passed both in GPRs and in vector
13359          registers.  */
13360       if (TARGET_64BIT && !cum->prototype
13361 	  && (!cum->libcall || !FLOAT128_VECTOR_P (elt_mode)))
13362 	return 0;
13363 
13364       /* Otherwise, we pass in VRs only.  Check for partial copies.  */
13365       passed_in_gprs = false;
13366       if (cum->vregno + n_elts > ALTIVEC_ARG_MAX_REG + 1)
13367 	ret = (ALTIVEC_ARG_MAX_REG + 1 - cum->vregno) * 16;
13368     }
13369 
13370   /* In this complicated case we just disable the partial_nregs code.  */
13371   if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type))
13372     return 0;
13373 
13374   align_words = rs6000_parm_start (mode, type, cum->words);
13375 
13376   if (USE_FP_FOR_ARG_P (cum, elt_mode))
13377     {
13378       unsigned long n_fpreg = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
13379 
13380       /* If we are passing this arg in the fixed parameter save area
13381          (gprs or memory) as well as FPRs, we do not use the partial
13382 	 bytes mechanism; instead, rs6000_function_arg will return a
13383 	 PARALLEL including a memory element as necessary.  */
13384       if (type
13385 	  && (cum->nargs_prototype <= 0
13386 	      || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
13387 		  && TARGET_XL_COMPAT
13388 		  && align_words >= GP_ARG_NUM_REG)))
13389 	return 0;
13390 
13391       /* Otherwise, we pass in FPRs only.  Check for partial copies.  */
13392       passed_in_gprs = false;
13393       if (cum->fregno + n_elts * n_fpreg > FP_ARG_MAX_REG + 1)
13394 	{
13395 	  /* Compute number of bytes / words passed in FPRs.  If there
13396 	     is still space available in the register parameter area
13397 	     *after* that amount, a part of the argument will be passed
13398 	     in GPRs.  In that case, the total amount passed in any
13399 	     registers is equal to the amount that would have been passed
13400 	     in GPRs if everything were passed there, so we fall back to
13401 	     the GPR code below to compute the appropriate value.  */
13402 	  int fpr = ((FP_ARG_MAX_REG + 1 - cum->fregno)
13403 		     * MIN (8, GET_MODE_SIZE (elt_mode)));
13404 	  int fpr_words = fpr / (TARGET_32BIT ? 4 : 8);
13405 
13406 	  if (align_words + fpr_words < GP_ARG_NUM_REG)
13407 	    passed_in_gprs = true;
13408 	  else
13409 	    ret = fpr;
13410 	}
13411     }
13412 
13413   if (passed_in_gprs
13414       && align_words < GP_ARG_NUM_REG
13415       && GP_ARG_NUM_REG < align_words + rs6000_arg_size (mode, type))
13416     ret = (GP_ARG_NUM_REG - align_words) * (TARGET_32BIT ? 4 : 8);
13417 
13418   if (ret != 0 && TARGET_DEBUG_ARG)
13419     fprintf (stderr, "rs6000_arg_partial_bytes: %d\n", ret);
13420 
13421   return ret;
13422 }
13423 
13424 /* A C expression that indicates when an argument must be passed by
13425    reference.  If nonzero for an argument, a copy of that argument is
13426    made in memory and a pointer to the argument is passed instead of
13427    the argument itself.  The pointer is passed in whatever way is
13428    appropriate for passing a pointer to that type.
13429 
13430    Under V.4, aggregates and long double are passed by reference.
13431 
13432    As an extension to all 32-bit ABIs, AltiVec vectors are passed by
13433    reference unless the AltiVec vector extension ABI is in force.
13434 
13435    As an extension to all ABIs, variable sized types are passed by
13436    reference.  */
13437 
13438 static bool
rs6000_pass_by_reference(cumulative_args_t cum ATTRIBUTE_UNUSED,machine_mode mode,const_tree type,bool named ATTRIBUTE_UNUSED)13439 rs6000_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
13440 			  machine_mode mode, const_tree type,
13441 			  bool named ATTRIBUTE_UNUSED)
13442 {
13443   if (!type)
13444     return 0;
13445 
13446   if (DEFAULT_ABI == ABI_V4 && TARGET_IEEEQUAD
13447       && FLOAT128_IEEE_P (TYPE_MODE (type)))
13448     {
13449       if (TARGET_DEBUG_ARG)
13450 	fprintf (stderr, "function_arg_pass_by_reference: V4 IEEE 128-bit\n");
13451       return 1;
13452     }
13453 
13454   if (DEFAULT_ABI == ABI_V4 && AGGREGATE_TYPE_P (type))
13455     {
13456       if (TARGET_DEBUG_ARG)
13457 	fprintf (stderr, "function_arg_pass_by_reference: V4 aggregate\n");
13458       return 1;
13459     }
13460 
13461   if (int_size_in_bytes (type) < 0)
13462     {
13463       if (TARGET_DEBUG_ARG)
13464 	fprintf (stderr, "function_arg_pass_by_reference: variable size\n");
13465       return 1;
13466     }
13467 
13468   /* Allow -maltivec -mabi=no-altivec without warning.  Altivec vector
13469      modes only exist for GCC vector types if -maltivec.  */
13470   if (TARGET_32BIT && !TARGET_ALTIVEC_ABI && ALTIVEC_VECTOR_MODE (mode))
13471     {
13472       if (TARGET_DEBUG_ARG)
13473 	fprintf (stderr, "function_arg_pass_by_reference: AltiVec\n");
13474       return 1;
13475     }
13476 
13477   /* Pass synthetic vectors in memory.  */
13478   if (TREE_CODE (type) == VECTOR_TYPE
13479       && int_size_in_bytes (type) > (TARGET_ALTIVEC_ABI ? 16 : 8))
13480     {
13481       static bool warned_for_pass_big_vectors = false;
13482       if (TARGET_DEBUG_ARG)
13483 	fprintf (stderr, "function_arg_pass_by_reference: synthetic vector\n");
13484       if (!warned_for_pass_big_vectors)
13485 	{
13486 	  warning (OPT_Wpsabi, "GCC vector passed by reference: "
13487 		   "non-standard ABI extension with no compatibility guarantee");
13488 	  warned_for_pass_big_vectors = true;
13489 	}
13490       return 1;
13491     }
13492 
13493   return 0;
13494 }
13495 
13496 /* Process parameter of type TYPE after ARGS_SO_FAR parameters were
13497    already processes.  Return true if the parameter must be passed
13498    (fully or partially) on the stack.  */
13499 
13500 static bool
rs6000_parm_needs_stack(cumulative_args_t args_so_far,tree type)13501 rs6000_parm_needs_stack (cumulative_args_t args_so_far, tree type)
13502 {
13503   machine_mode mode;
13504   int unsignedp;
13505   rtx entry_parm;
13506 
13507   /* Catch errors.  */
13508   if (type == NULL || type == error_mark_node)
13509     return true;
13510 
13511   /* Handle types with no storage requirement.  */
13512   if (TYPE_MODE (type) == VOIDmode)
13513     return false;
13514 
13515   /* Handle complex types.  */
13516   if (TREE_CODE (type) == COMPLEX_TYPE)
13517     return (rs6000_parm_needs_stack (args_so_far, TREE_TYPE (type))
13518 	    || rs6000_parm_needs_stack (args_so_far, TREE_TYPE (type)));
13519 
13520   /* Handle transparent aggregates.  */
13521   if ((TREE_CODE (type) == UNION_TYPE || TREE_CODE (type) == RECORD_TYPE)
13522       && TYPE_TRANSPARENT_AGGR (type))
13523     type = TREE_TYPE (first_field (type));
13524 
13525   /* See if this arg was passed by invisible reference.  */
13526   if (pass_by_reference (get_cumulative_args (args_so_far),
13527 			 TYPE_MODE (type), type, true))
13528     type = build_pointer_type (type);
13529 
13530   /* Find mode as it is passed by the ABI.  */
13531   unsignedp = TYPE_UNSIGNED (type);
13532   mode = promote_mode (type, TYPE_MODE (type), &unsignedp);
13533 
13534   /* If we must pass in stack, we need a stack.  */
13535   if (rs6000_must_pass_in_stack (mode, type))
13536     return true;
13537 
13538   /* If there is no incoming register, we need a stack.  */
13539   entry_parm = rs6000_function_arg (args_so_far, mode, type, true);
13540   if (entry_parm == NULL)
13541     return true;
13542 
13543   /* Likewise if we need to pass both in registers and on the stack.  */
13544   if (GET_CODE (entry_parm) == PARALLEL
13545       && XEXP (XVECEXP (entry_parm, 0, 0), 0) == NULL_RTX)
13546     return true;
13547 
13548   /* Also true if we're partially in registers and partially not.  */
13549   if (rs6000_arg_partial_bytes (args_so_far, mode, type, true) != 0)
13550     return true;
13551 
13552   /* Update info on where next arg arrives in registers.  */
13553   rs6000_function_arg_advance (args_so_far, mode, type, true);
13554   return false;
13555 }
13556 
13557 /* Return true if FUN has no prototype, has a variable argument
13558    list, or passes any parameter in memory.  */
13559 
13560 static bool
rs6000_function_parms_need_stack(tree fun,bool incoming)13561 rs6000_function_parms_need_stack (tree fun, bool incoming)
13562 {
13563   tree fntype, result;
13564   CUMULATIVE_ARGS args_so_far_v;
13565   cumulative_args_t args_so_far;
13566 
13567   if (!fun)
13568     /* Must be a libcall, all of which only use reg parms.  */
13569     return false;
13570 
13571   fntype = fun;
13572   if (!TYPE_P (fun))
13573     fntype = TREE_TYPE (fun);
13574 
13575   /* Varargs functions need the parameter save area.  */
13576   if ((!incoming && !prototype_p (fntype)) || stdarg_p (fntype))
13577     return true;
13578 
13579   INIT_CUMULATIVE_INCOMING_ARGS (args_so_far_v, fntype, NULL_RTX);
13580   args_so_far = pack_cumulative_args (&args_so_far_v);
13581 
13582   /* When incoming, we will have been passed the function decl.
13583      It is necessary to use the decl to handle K&R style functions,
13584      where TYPE_ARG_TYPES may not be available.  */
13585   if (incoming)
13586     {
13587       gcc_assert (DECL_P (fun));
13588       result = DECL_RESULT (fun);
13589     }
13590   else
13591     result = TREE_TYPE (fntype);
13592 
13593   if (result && aggregate_value_p (result, fntype))
13594     {
13595       if (!TYPE_P (result))
13596 	result = TREE_TYPE (result);
13597       result = build_pointer_type (result);
13598       rs6000_parm_needs_stack (args_so_far, result);
13599     }
13600 
13601   if (incoming)
13602     {
13603       tree parm;
13604 
13605       for (parm = DECL_ARGUMENTS (fun);
13606 	   parm && parm != void_list_node;
13607 	   parm = TREE_CHAIN (parm))
13608 	if (rs6000_parm_needs_stack (args_so_far, TREE_TYPE (parm)))
13609 	  return true;
13610     }
13611   else
13612     {
13613       function_args_iterator args_iter;
13614       tree arg_type;
13615 
13616       FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
13617 	if (rs6000_parm_needs_stack (args_so_far, arg_type))
13618 	  return true;
13619     }
13620 
13621   return false;
13622 }
13623 
13624 /* Return the size of the REG_PARM_STACK_SPACE are for FUN.  This is
13625    usually a constant depending on the ABI.  However, in the ELFv2 ABI
13626    the register parameter area is optional when calling a function that
13627    has a prototype is scope, has no variable argument list, and passes
13628    all parameters in registers.  */
13629 
13630 int
rs6000_reg_parm_stack_space(tree fun,bool incoming)13631 rs6000_reg_parm_stack_space (tree fun, bool incoming)
13632 {
13633   int reg_parm_stack_space;
13634 
13635   switch (DEFAULT_ABI)
13636     {
13637     default:
13638       reg_parm_stack_space = 0;
13639       break;
13640 
13641     case ABI_AIX:
13642     case ABI_DARWIN:
13643       reg_parm_stack_space = TARGET_64BIT ? 64 : 32;
13644       break;
13645 
13646     case ABI_ELFv2:
13647       /* ??? Recomputing this every time is a bit expensive.  Is there
13648 	 a place to cache this information?  */
13649       if (rs6000_function_parms_need_stack (fun, incoming))
13650 	reg_parm_stack_space = TARGET_64BIT ? 64 : 32;
13651       else
13652 	reg_parm_stack_space = 0;
13653       break;
13654     }
13655 
13656   return reg_parm_stack_space;
13657 }
13658 
13659 static void
rs6000_move_block_from_reg(int regno,rtx x,int nregs)13660 rs6000_move_block_from_reg (int regno, rtx x, int nregs)
13661 {
13662   int i;
13663   machine_mode reg_mode = TARGET_32BIT ? SImode : DImode;
13664 
13665   if (nregs == 0)
13666     return;
13667 
13668   for (i = 0; i < nregs; i++)
13669     {
13670       rtx tem = adjust_address_nv (x, reg_mode, i * GET_MODE_SIZE (reg_mode));
13671       if (reload_completed)
13672 	{
13673 	  if (! strict_memory_address_p (reg_mode, XEXP (tem, 0)))
13674 	    tem = NULL_RTX;
13675 	  else
13676 	    tem = simplify_gen_subreg (reg_mode, x, BLKmode,
13677 				       i * GET_MODE_SIZE (reg_mode));
13678 	}
13679       else
13680 	tem = replace_equiv_address (tem, XEXP (tem, 0));
13681 
13682       gcc_assert (tem);
13683 
13684       emit_move_insn (tem, gen_rtx_REG (reg_mode, regno + i));
13685     }
13686 }
13687 
13688 /* Perform any needed actions needed for a function that is receiving a
13689    variable number of arguments.
13690 
13691    CUM is as above.
13692 
13693    MODE and TYPE are the mode and type of the current parameter.
13694 
13695    PRETEND_SIZE is a variable that should be set to the amount of stack
13696    that must be pushed by the prolog to pretend that our caller pushed
13697    it.
13698 
13699    Normally, this macro will push all remaining incoming registers on the
13700    stack and set PRETEND_SIZE to the length of the registers pushed.  */
13701 
13702 static void
setup_incoming_varargs(cumulative_args_t cum,machine_mode mode,tree type,int * pretend_size ATTRIBUTE_UNUSED,int no_rtl)13703 setup_incoming_varargs (cumulative_args_t cum, machine_mode mode,
13704 			tree type, int *pretend_size ATTRIBUTE_UNUSED,
13705 			int no_rtl)
13706 {
13707   CUMULATIVE_ARGS next_cum;
13708   int reg_size = TARGET_32BIT ? 4 : 8;
13709   rtx save_area = NULL_RTX, mem;
13710   int first_reg_offset;
13711   alias_set_type set;
13712 
13713   /* Skip the last named argument.  */
13714   next_cum = *get_cumulative_args (cum);
13715   rs6000_function_arg_advance_1 (&next_cum, mode, type, true, 0);
13716 
13717   if (DEFAULT_ABI == ABI_V4)
13718     {
13719       first_reg_offset = next_cum.sysv_gregno - GP_ARG_MIN_REG;
13720 
13721       if (! no_rtl)
13722 	{
13723 	  int gpr_reg_num = 0, gpr_size = 0, fpr_size = 0;
13724 	  HOST_WIDE_INT offset = 0;
13725 
13726 	  /* Try to optimize the size of the varargs save area.
13727 	     The ABI requires that ap.reg_save_area is doubleword
13728 	     aligned, but we don't need to allocate space for all
13729 	     the bytes, only those to which we actually will save
13730 	     anything.  */
13731 	  if (cfun->va_list_gpr_size && first_reg_offset < GP_ARG_NUM_REG)
13732 	    gpr_reg_num = GP_ARG_NUM_REG - first_reg_offset;
13733 	  if (TARGET_HARD_FLOAT && TARGET_FPRS
13734 	      && next_cum.fregno <= FP_ARG_V4_MAX_REG
13735 	      && cfun->va_list_fpr_size)
13736 	    {
13737 	      if (gpr_reg_num)
13738 		fpr_size = (next_cum.fregno - FP_ARG_MIN_REG)
13739 			   * UNITS_PER_FP_WORD;
13740 	      if (cfun->va_list_fpr_size
13741 		  < FP_ARG_V4_MAX_REG + 1 - next_cum.fregno)
13742 		fpr_size += cfun->va_list_fpr_size * UNITS_PER_FP_WORD;
13743 	      else
13744 		fpr_size += (FP_ARG_V4_MAX_REG + 1 - next_cum.fregno)
13745 			    * UNITS_PER_FP_WORD;
13746 	    }
13747 	  if (gpr_reg_num)
13748 	    {
13749 	      offset = -((first_reg_offset * reg_size) & ~7);
13750 	      if (!fpr_size && gpr_reg_num > cfun->va_list_gpr_size)
13751 		{
13752 		  gpr_reg_num = cfun->va_list_gpr_size;
13753 		  if (reg_size == 4 && (first_reg_offset & 1))
13754 		    gpr_reg_num++;
13755 		}
13756 	      gpr_size = (gpr_reg_num * reg_size + 7) & ~7;
13757 	    }
13758 	  else if (fpr_size)
13759 	    offset = - (int) (next_cum.fregno - FP_ARG_MIN_REG)
13760 		       * UNITS_PER_FP_WORD
13761 		     - (int) (GP_ARG_NUM_REG * reg_size);
13762 
13763 	  if (gpr_size + fpr_size)
13764 	    {
13765 	      rtx reg_save_area
13766 		= assign_stack_local (BLKmode, gpr_size + fpr_size, 64);
13767 	      gcc_assert (GET_CODE (reg_save_area) == MEM);
13768 	      reg_save_area = XEXP (reg_save_area, 0);
13769 	      if (GET_CODE (reg_save_area) == PLUS)
13770 		{
13771 		  gcc_assert (XEXP (reg_save_area, 0)
13772 			      == virtual_stack_vars_rtx);
13773 		  gcc_assert (GET_CODE (XEXP (reg_save_area, 1)) == CONST_INT);
13774 		  offset += INTVAL (XEXP (reg_save_area, 1));
13775 		}
13776 	      else
13777 		gcc_assert (reg_save_area == virtual_stack_vars_rtx);
13778 	    }
13779 
13780 	  cfun->machine->varargs_save_offset = offset;
13781 	  save_area = plus_constant (Pmode, virtual_stack_vars_rtx, offset);
13782 	}
13783     }
13784   else
13785     {
13786       first_reg_offset = next_cum.words;
13787       save_area = crtl->args.internal_arg_pointer;
13788 
13789       if (targetm.calls.must_pass_in_stack (mode, type))
13790 	first_reg_offset += rs6000_arg_size (TYPE_MODE (type), type);
13791     }
13792 
13793   set = get_varargs_alias_set ();
13794   if (! no_rtl && first_reg_offset < GP_ARG_NUM_REG
13795       && cfun->va_list_gpr_size)
13796     {
13797       int n_gpr, nregs = GP_ARG_NUM_REG - first_reg_offset;
13798 
13799       if (va_list_gpr_counter_field)
13800 	/* V4 va_list_gpr_size counts number of registers needed.  */
13801 	n_gpr = cfun->va_list_gpr_size;
13802       else
13803 	/* char * va_list instead counts number of bytes needed.  */
13804 	n_gpr = (cfun->va_list_gpr_size + reg_size - 1) / reg_size;
13805 
13806       if (nregs > n_gpr)
13807 	nregs = n_gpr;
13808 
13809       mem = gen_rtx_MEM (BLKmode,
13810 			 plus_constant (Pmode, save_area,
13811 					first_reg_offset * reg_size));
13812       MEM_NOTRAP_P (mem) = 1;
13813       set_mem_alias_set (mem, set);
13814       set_mem_align (mem, BITS_PER_WORD);
13815 
13816       rs6000_move_block_from_reg (GP_ARG_MIN_REG + first_reg_offset, mem,
13817 				  nregs);
13818     }
13819 
13820   /* Save FP registers if needed.  */
13821   if (DEFAULT_ABI == ABI_V4
13822       && TARGET_HARD_FLOAT && TARGET_FPRS
13823       && ! no_rtl
13824       && next_cum.fregno <= FP_ARG_V4_MAX_REG
13825       && cfun->va_list_fpr_size)
13826     {
13827       int fregno = next_cum.fregno, nregs;
13828       rtx cr1 = gen_rtx_REG (CCmode, CR1_REGNO);
13829       rtx lab = gen_label_rtx ();
13830       int off = (GP_ARG_NUM_REG * reg_size) + ((fregno - FP_ARG_MIN_REG)
13831 					       * UNITS_PER_FP_WORD);
13832 
13833       emit_jump_insn
13834 	(gen_rtx_SET (pc_rtx,
13835 		      gen_rtx_IF_THEN_ELSE (VOIDmode,
13836 					    gen_rtx_NE (VOIDmode, cr1,
13837 							const0_rtx),
13838 					    gen_rtx_LABEL_REF (VOIDmode, lab),
13839 					    pc_rtx)));
13840 
13841       for (nregs = 0;
13842 	   fregno <= FP_ARG_V4_MAX_REG && nregs < cfun->va_list_fpr_size;
13843 	   fregno++, off += UNITS_PER_FP_WORD, nregs++)
13844 	{
13845 	  mem = gen_rtx_MEM ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
13846 			      ? DFmode : SFmode,
13847                              plus_constant (Pmode, save_area, off));
13848   	  MEM_NOTRAP_P (mem) = 1;
13849   	  set_mem_alias_set (mem, set);
13850 	  set_mem_align (mem, GET_MODE_ALIGNMENT (
13851 			 (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
13852 			  ? DFmode : SFmode));
13853 	  emit_move_insn (mem, gen_rtx_REG (
13854                           (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
13855 			   ? DFmode : SFmode, fregno));
13856 	}
13857 
13858       emit_label (lab);
13859     }
13860 }
13861 
13862 /* Create the va_list data type.  */
13863 
13864 static tree
rs6000_build_builtin_va_list(void)13865 rs6000_build_builtin_va_list (void)
13866 {
13867   tree f_gpr, f_fpr, f_res, f_ovf, f_sav, record, type_decl;
13868 
13869   /* For AIX, prefer 'char *' because that's what the system
13870      header files like.  */
13871   if (DEFAULT_ABI != ABI_V4)
13872     return build_pointer_type (char_type_node);
13873 
13874   record = (*lang_hooks.types.make_type) (RECORD_TYPE);
13875   type_decl = build_decl (BUILTINS_LOCATION, TYPE_DECL,
13876       			  get_identifier ("__va_list_tag"), record);
13877 
13878   f_gpr = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("gpr"),
13879 		      unsigned_char_type_node);
13880   f_fpr = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("fpr"),
13881 		      unsigned_char_type_node);
13882   /* Give the two bytes of padding a name, so that -Wpadded won't warn on
13883      every user file.  */
13884   f_res = build_decl (BUILTINS_LOCATION, FIELD_DECL,
13885       		      get_identifier ("reserved"), short_unsigned_type_node);
13886   f_ovf = build_decl (BUILTINS_LOCATION, FIELD_DECL,
13887       		      get_identifier ("overflow_arg_area"),
13888 		      ptr_type_node);
13889   f_sav = build_decl (BUILTINS_LOCATION, FIELD_DECL,
13890       		      get_identifier ("reg_save_area"),
13891 		      ptr_type_node);
13892 
13893   va_list_gpr_counter_field = f_gpr;
13894   va_list_fpr_counter_field = f_fpr;
13895 
13896   DECL_FIELD_CONTEXT (f_gpr) = record;
13897   DECL_FIELD_CONTEXT (f_fpr) = record;
13898   DECL_FIELD_CONTEXT (f_res) = record;
13899   DECL_FIELD_CONTEXT (f_ovf) = record;
13900   DECL_FIELD_CONTEXT (f_sav) = record;
13901 
13902   TYPE_STUB_DECL (record) = type_decl;
13903   TYPE_NAME (record) = type_decl;
13904   TYPE_FIELDS (record) = f_gpr;
13905   DECL_CHAIN (f_gpr) = f_fpr;
13906   DECL_CHAIN (f_fpr) = f_res;
13907   DECL_CHAIN (f_res) = f_ovf;
13908   DECL_CHAIN (f_ovf) = f_sav;
13909 
13910   layout_type (record);
13911 
13912   /* The correct type is an array type of one element.  */
13913   return build_array_type (record, build_index_type (size_zero_node));
13914 }
13915 
13916 /* Implement va_start.  */
13917 
13918 static void
rs6000_va_start(tree valist,rtx nextarg)13919 rs6000_va_start (tree valist, rtx nextarg)
13920 {
13921   HOST_WIDE_INT words, n_gpr, n_fpr;
13922   tree f_gpr, f_fpr, f_res, f_ovf, f_sav;
13923   tree gpr, fpr, ovf, sav, t;
13924 
13925   /* Only SVR4 needs something special.  */
13926   if (DEFAULT_ABI != ABI_V4)
13927     {
13928       std_expand_builtin_va_start (valist, nextarg);
13929       return;
13930     }
13931 
13932   f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
13933   f_fpr = DECL_CHAIN (f_gpr);
13934   f_res = DECL_CHAIN (f_fpr);
13935   f_ovf = DECL_CHAIN (f_res);
13936   f_sav = DECL_CHAIN (f_ovf);
13937 
13938   valist = build_simple_mem_ref (valist);
13939   gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
13940   fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
13941 		f_fpr, NULL_TREE);
13942   ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
13943 		f_ovf, NULL_TREE);
13944   sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
13945 		f_sav, NULL_TREE);
13946 
13947   /* Count number of gp and fp argument registers used.  */
13948   words = crtl->args.info.words;
13949   n_gpr = MIN (crtl->args.info.sysv_gregno - GP_ARG_MIN_REG,
13950 	       GP_ARG_NUM_REG);
13951   n_fpr = MIN (crtl->args.info.fregno - FP_ARG_MIN_REG,
13952 	       FP_ARG_NUM_REG);
13953 
13954   if (TARGET_DEBUG_ARG)
13955     fprintf (stderr, "va_start: words = " HOST_WIDE_INT_PRINT_DEC", n_gpr = "
13956 	     HOST_WIDE_INT_PRINT_DEC", n_fpr = " HOST_WIDE_INT_PRINT_DEC"\n",
13957 	     words, n_gpr, n_fpr);
13958 
13959   if (cfun->va_list_gpr_size)
13960     {
13961       t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
13962 		  build_int_cst (NULL_TREE, n_gpr));
13963       TREE_SIDE_EFFECTS (t) = 1;
13964       expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
13965     }
13966 
13967   if (cfun->va_list_fpr_size)
13968     {
13969       t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
13970 		  build_int_cst (NULL_TREE, n_fpr));
13971       TREE_SIDE_EFFECTS (t) = 1;
13972       expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
13973 
13974 #ifdef HAVE_AS_GNU_ATTRIBUTE
13975       if (call_ABI_of_interest (cfun->decl))
13976 	rs6000_passes_float = true;
13977 #endif
13978     }
13979 
13980   /* Find the overflow area.  */
13981   t = make_tree (TREE_TYPE (ovf), crtl->args.internal_arg_pointer);
13982   if (words != 0)
13983     t = fold_build_pointer_plus_hwi (t, words * MIN_UNITS_PER_WORD);
13984   t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
13985   TREE_SIDE_EFFECTS (t) = 1;
13986   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
13987 
13988   /* If there were no va_arg invocations, don't set up the register
13989      save area.  */
13990   if (!cfun->va_list_gpr_size
13991       && !cfun->va_list_fpr_size
13992       && n_gpr < GP_ARG_NUM_REG
13993       && n_fpr < FP_ARG_V4_MAX_REG)
13994     return;
13995 
13996   /* Find the register save area.  */
13997   t = make_tree (TREE_TYPE (sav), virtual_stack_vars_rtx);
13998   if (cfun->machine->varargs_save_offset)
13999     t = fold_build_pointer_plus_hwi (t, cfun->machine->varargs_save_offset);
14000   t = build2 (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
14001   TREE_SIDE_EFFECTS (t) = 1;
14002   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
14003 }
14004 
14005 /* Implement va_arg.  */
14006 
14007 static tree
rs6000_gimplify_va_arg(tree valist,tree type,gimple_seq * pre_p,gimple_seq * post_p)14008 rs6000_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
14009 			gimple_seq *post_p)
14010 {
14011   tree f_gpr, f_fpr, f_res, f_ovf, f_sav;
14012   tree gpr, fpr, ovf, sav, reg, t, u;
14013   int size, rsize, n_reg, sav_ofs, sav_scale;
14014   tree lab_false, lab_over, addr;
14015   int align;
14016   tree ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
14017   int regalign = 0;
14018   gimple *stmt;
14019 
14020   if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
14021     {
14022       t = rs6000_gimplify_va_arg (valist, ptrtype, pre_p, post_p);
14023       return build_va_arg_indirect_ref (t);
14024     }
14025 
14026   /* We need to deal with the fact that the darwin ppc64 ABI is defined by an
14027      earlier version of gcc, with the property that it always applied alignment
14028      adjustments to the va-args (even for zero-sized types).  The cheapest way
14029      to deal with this is to replicate the effect of the part of
14030      std_gimplify_va_arg_expr that carries out the align adjust, for the case
14031      of relevance.
14032      We don't need to check for pass-by-reference because of the test above.
14033      We can return a simplifed answer, since we know there's no offset to add.  */
14034 
14035   if (((TARGET_MACHO
14036         && rs6000_darwin64_abi)
14037        || DEFAULT_ABI == ABI_ELFv2
14038        || (DEFAULT_ABI == ABI_AIX && !rs6000_compat_align_parm))
14039       && integer_zerop (TYPE_SIZE (type)))
14040     {
14041       unsigned HOST_WIDE_INT align, boundary;
14042       tree valist_tmp = get_initialized_tmp_var (valist, pre_p, NULL);
14043       align = PARM_BOUNDARY / BITS_PER_UNIT;
14044       boundary = rs6000_function_arg_boundary (TYPE_MODE (type), type);
14045       if (boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
14046 	boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
14047       boundary /= BITS_PER_UNIT;
14048       if (boundary > align)
14049 	{
14050 	  tree t ;
14051 	  /* This updates arg ptr by the amount that would be necessary
14052 	     to align the zero-sized (but not zero-alignment) item.  */
14053 	  t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist_tmp,
14054 		      fold_build_pointer_plus_hwi (valist_tmp, boundary - 1));
14055 	  gimplify_and_add (t, pre_p);
14056 
14057 	  t = fold_convert (sizetype, valist_tmp);
14058 	  t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist_tmp,
14059 		  fold_convert (TREE_TYPE (valist),
14060 				fold_build2 (BIT_AND_EXPR, sizetype, t,
14061 					     size_int (-boundary))));
14062 	  t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
14063 	  gimplify_and_add (t, pre_p);
14064 	}
14065       /* Since it is zero-sized there's no increment for the item itself. */
14066       valist_tmp = fold_convert (build_pointer_type (type), valist_tmp);
14067       return build_va_arg_indirect_ref (valist_tmp);
14068     }
14069 
14070   if (DEFAULT_ABI != ABI_V4)
14071     {
14072       if (targetm.calls.split_complex_arg && TREE_CODE (type) == COMPLEX_TYPE)
14073 	{
14074 	  tree elem_type = TREE_TYPE (type);
14075 	  machine_mode elem_mode = TYPE_MODE (elem_type);
14076 	  int elem_size = GET_MODE_SIZE (elem_mode);
14077 
14078 	  if (elem_size < UNITS_PER_WORD)
14079 	    {
14080 	      tree real_part, imag_part;
14081 	      gimple_seq post = NULL;
14082 
14083 	      real_part = rs6000_gimplify_va_arg (valist, elem_type, pre_p,
14084 						  &post);
14085 	      /* Copy the value into a temporary, lest the formal temporary
14086 		 be reused out from under us.  */
14087 	      real_part = get_initialized_tmp_var (real_part, pre_p, &post);
14088 	      gimple_seq_add_seq (pre_p, post);
14089 
14090 	      imag_part = rs6000_gimplify_va_arg (valist, elem_type, pre_p,
14091 						  post_p);
14092 
14093 	      return build2 (COMPLEX_EXPR, type, real_part, imag_part);
14094 	    }
14095 	}
14096 
14097       return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
14098     }
14099 
14100   f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
14101   f_fpr = DECL_CHAIN (f_gpr);
14102   f_res = DECL_CHAIN (f_fpr);
14103   f_ovf = DECL_CHAIN (f_res);
14104   f_sav = DECL_CHAIN (f_ovf);
14105 
14106   gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
14107   fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
14108 		f_fpr, NULL_TREE);
14109   ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
14110 		f_ovf, NULL_TREE);
14111   sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
14112 		f_sav, NULL_TREE);
14113 
14114   size = int_size_in_bytes (type);
14115   rsize = (size + 3) / 4;
14116   int pad = 4 * rsize - size;
14117   align = 1;
14118 
14119   machine_mode mode = TYPE_MODE (type);
14120   if (abi_v4_pass_in_fpr (mode))
14121     {
14122       /* FP args go in FP registers, if present.  */
14123       reg = fpr;
14124       n_reg = (size + 7) / 8;
14125       sav_ofs = ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT) ? 8 : 4) * 4;
14126       sav_scale = ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT) ? 8 : 4);
14127       if (mode != SFmode && mode != SDmode)
14128 	align = 8;
14129     }
14130   else
14131     {
14132       /* Otherwise into GP registers.  */
14133       reg = gpr;
14134       n_reg = rsize;
14135       sav_ofs = 0;
14136       sav_scale = 4;
14137       if (n_reg == 2)
14138 	align = 8;
14139     }
14140 
14141   /* Pull the value out of the saved registers....  */
14142 
14143   lab_over = NULL;
14144   addr = create_tmp_var (ptr_type_node, "addr");
14145 
14146   /*  AltiVec vectors never go in registers when -mabi=altivec.  */
14147   if (TARGET_ALTIVEC_ABI && ALTIVEC_VECTOR_MODE (mode))
14148     align = 16;
14149   else
14150     {
14151       lab_false = create_artificial_label (input_location);
14152       lab_over = create_artificial_label (input_location);
14153 
14154       /* Long long and SPE vectors are aligned in the registers.
14155 	 As are any other 2 gpr item such as complex int due to a
14156 	 historical mistake.  */
14157       u = reg;
14158       if (n_reg == 2 && reg == gpr)
14159 	{
14160 	  regalign = 1;
14161 	  u = build2 (BIT_AND_EXPR, TREE_TYPE (reg), unshare_expr (reg),
14162 		     build_int_cst (TREE_TYPE (reg), n_reg - 1));
14163 	  u = build2 (POSTINCREMENT_EXPR, TREE_TYPE (reg),
14164 		      unshare_expr (reg), u);
14165 	}
14166       /* _Decimal128 is passed in even/odd fpr pairs; the stored
14167 	 reg number is 0 for f1, so we want to make it odd.  */
14168       else if (reg == fpr && mode == TDmode)
14169 	{
14170 	  t = build2 (BIT_IOR_EXPR, TREE_TYPE (reg), unshare_expr (reg),
14171 		      build_int_cst (TREE_TYPE (reg), 1));
14172 	  u = build2 (MODIFY_EXPR, void_type_node, unshare_expr (reg), t);
14173 	}
14174 
14175       t = fold_convert (TREE_TYPE (reg), size_int (8 - n_reg + 1));
14176       t = build2 (GE_EXPR, boolean_type_node, u, t);
14177       u = build1 (GOTO_EXPR, void_type_node, lab_false);
14178       t = build3 (COND_EXPR, void_type_node, t, u, NULL_TREE);
14179       gimplify_and_add (t, pre_p);
14180 
14181       t = sav;
14182       if (sav_ofs)
14183 	t = fold_build_pointer_plus_hwi (sav, sav_ofs);
14184 
14185       u = build2 (POSTINCREMENT_EXPR, TREE_TYPE (reg), unshare_expr (reg),
14186 		  build_int_cst (TREE_TYPE (reg), n_reg));
14187       u = fold_convert (sizetype, u);
14188       u = build2 (MULT_EXPR, sizetype, u, size_int (sav_scale));
14189       t = fold_build_pointer_plus (t, u);
14190 
14191       /* _Decimal32 varargs are located in the second word of the 64-bit
14192 	 FP register for 32-bit binaries.  */
14193       if (TARGET_32BIT
14194 	  && TARGET_HARD_FLOAT && TARGET_FPRS
14195 	  && mode == SDmode)
14196 	t = fold_build_pointer_plus_hwi (t, size);
14197 
14198       /* Args are passed right-aligned.  */
14199       if (BYTES_BIG_ENDIAN)
14200 	t = fold_build_pointer_plus_hwi (t, pad);
14201 
14202       gimplify_assign (addr, t, pre_p);
14203 
14204       gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
14205 
14206       stmt = gimple_build_label (lab_false);
14207       gimple_seq_add_stmt (pre_p, stmt);
14208 
14209       if ((n_reg == 2 && !regalign) || n_reg > 2)
14210 	{
14211 	  /* Ensure that we don't find any more args in regs.
14212 	     Alignment has taken care of for special cases.  */
14213 	  gimplify_assign (reg, build_int_cst (TREE_TYPE (reg), 8), pre_p);
14214 	}
14215     }
14216 
14217   /* ... otherwise out of the overflow area.  */
14218 
14219   /* Care for on-stack alignment if needed.  */
14220   t = ovf;
14221   if (align != 1)
14222     {
14223       t = fold_build_pointer_plus_hwi (t, align - 1);
14224       t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
14225 		  build_int_cst (TREE_TYPE (t), -align));
14226     }
14227 
14228   /* Args are passed right-aligned.  */
14229   if (BYTES_BIG_ENDIAN)
14230     t = fold_build_pointer_plus_hwi (t, pad);
14231 
14232   gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
14233 
14234   gimplify_assign (unshare_expr (addr), t, pre_p);
14235 
14236   t = fold_build_pointer_plus_hwi (t, size);
14237   gimplify_assign (unshare_expr (ovf), t, pre_p);
14238 
14239   if (lab_over)
14240     {
14241       stmt = gimple_build_label (lab_over);
14242       gimple_seq_add_stmt (pre_p, stmt);
14243     }
14244 
14245   if (STRICT_ALIGNMENT
14246       && (TYPE_ALIGN (type)
14247 	  > (unsigned) BITS_PER_UNIT * (align < 4 ? 4 : align)))
14248     {
14249       /* The value (of type complex double, for example) may not be
14250 	 aligned in memory in the saved registers, so copy via a
14251 	 temporary.  (This is the same code as used for SPARC.)  */
14252       tree tmp = create_tmp_var (type, "va_arg_tmp");
14253       tree dest_addr = build_fold_addr_expr (tmp);
14254 
14255       tree copy = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
14256 				   3, dest_addr, addr, size_int (rsize * 4));
14257       TREE_ADDRESSABLE (tmp) = 1;
14258 
14259       gimplify_and_add (copy, pre_p);
14260       addr = dest_addr;
14261     }
14262 
14263   addr = fold_convert (ptrtype, addr);
14264   return build_va_arg_indirect_ref (addr);
14265 }
14266 
14267 /* Builtins.  */
14268 
14269 static void
def_builtin(const char * name,tree type,enum rs6000_builtins code)14270 def_builtin (const char *name, tree type, enum rs6000_builtins code)
14271 {
14272   tree t;
14273   unsigned classify = rs6000_builtin_info[(int)code].attr;
14274   const char *attr_string = "";
14275 
14276   gcc_assert (name != NULL);
14277   gcc_assert (IN_RANGE ((int)code, 0, (int)RS6000_BUILTIN_COUNT));
14278 
14279   if (rs6000_builtin_decls[(int)code])
14280     fatal_error (input_location,
14281 		 "internal error: builtin function %s already processed", name);
14282 
14283   rs6000_builtin_decls[(int)code] = t =
14284     add_builtin_function (name, type, (int)code, BUILT_IN_MD, NULL, NULL_TREE);
14285 
14286   /* Set any special attributes.  */
14287   if ((classify & RS6000_BTC_CONST) != 0)
14288     {
14289       /* const function, function only depends on the inputs.  */
14290       TREE_READONLY (t) = 1;
14291       TREE_NOTHROW (t) = 1;
14292       attr_string = ", const";
14293     }
14294   else if ((classify & RS6000_BTC_PURE) != 0)
14295     {
14296       /* pure function, function can read global memory, but does not set any
14297 	 external state.  */
14298       DECL_PURE_P (t) = 1;
14299       TREE_NOTHROW (t) = 1;
14300       attr_string = ", pure";
14301     }
14302   else if ((classify & RS6000_BTC_FP) != 0)
14303     {
14304       /* Function is a math function.  If rounding mode is on, then treat the
14305 	 function as not reading global memory, but it can have arbitrary side
14306 	 effects.  If it is off, then assume the function is a const function.
14307 	 This mimics the ATTR_MATHFN_FPROUNDING attribute in
14308 	 builtin-attribute.def that is used for the math functions. */
14309       TREE_NOTHROW (t) = 1;
14310       if (flag_rounding_math)
14311 	{
14312 	  DECL_PURE_P (t) = 1;
14313 	  DECL_IS_NOVOPS (t) = 1;
14314 	  attr_string = ", fp, pure";
14315 	}
14316       else
14317 	{
14318 	  TREE_READONLY (t) = 1;
14319 	  attr_string = ", fp, const";
14320 	}
14321     }
14322   else if ((classify & RS6000_BTC_ATTR_MASK) != 0)
14323     gcc_unreachable ();
14324 
14325   if (TARGET_DEBUG_BUILTIN)
14326     fprintf (stderr, "rs6000_builtin, code = %4d, %s%s\n",
14327 	     (int)code, name, attr_string);
14328 }
14329 
14330 /* Simple ternary operations: VECd = foo (VECa, VECb, VECc).  */
14331 
14332 #undef RS6000_BUILTIN_0
14333 #undef RS6000_BUILTIN_1
14334 #undef RS6000_BUILTIN_2
14335 #undef RS6000_BUILTIN_3
14336 #undef RS6000_BUILTIN_A
14337 #undef RS6000_BUILTIN_D
14338 #undef RS6000_BUILTIN_E
14339 #undef RS6000_BUILTIN_H
14340 #undef RS6000_BUILTIN_P
14341 #undef RS6000_BUILTIN_Q
14342 #undef RS6000_BUILTIN_S
14343 #undef RS6000_BUILTIN_X
14344 
14345 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
14346 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
14347 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
14348 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE) \
14349   { MASK, ICODE, NAME, ENUM },
14350 
14351 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
14352 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
14353 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
14354 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
14355 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
14356 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
14357 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
14358 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
14359 
14360 static const struct builtin_description bdesc_3arg[] =
14361 {
14362 #include "powerpcspe-builtin.def"
14363 };
14364 
14365 /* DST operations: void foo (void *, const int, const char).  */
14366 
14367 #undef RS6000_BUILTIN_0
14368 #undef RS6000_BUILTIN_1
14369 #undef RS6000_BUILTIN_2
14370 #undef RS6000_BUILTIN_3
14371 #undef RS6000_BUILTIN_A
14372 #undef RS6000_BUILTIN_D
14373 #undef RS6000_BUILTIN_E
14374 #undef RS6000_BUILTIN_H
14375 #undef RS6000_BUILTIN_P
14376 #undef RS6000_BUILTIN_Q
14377 #undef RS6000_BUILTIN_S
14378 #undef RS6000_BUILTIN_X
14379 
14380 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
14381 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
14382 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
14383 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
14384 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
14385 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE) \
14386   { MASK, ICODE, NAME, ENUM },
14387 
14388 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
14389 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
14390 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
14391 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
14392 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
14393 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
14394 
14395 static const struct builtin_description bdesc_dst[] =
14396 {
14397 #include "powerpcspe-builtin.def"
14398 };
14399 
14400 /* Simple binary operations: VECc = foo (VECa, VECb).  */
14401 
14402 #undef RS6000_BUILTIN_0
14403 #undef RS6000_BUILTIN_1
14404 #undef RS6000_BUILTIN_2
14405 #undef RS6000_BUILTIN_3
14406 #undef RS6000_BUILTIN_A
14407 #undef RS6000_BUILTIN_D
14408 #undef RS6000_BUILTIN_E
14409 #undef RS6000_BUILTIN_H
14410 #undef RS6000_BUILTIN_P
14411 #undef RS6000_BUILTIN_Q
14412 #undef RS6000_BUILTIN_S
14413 #undef RS6000_BUILTIN_X
14414 
14415 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
14416 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
14417 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE) \
14418   { MASK, ICODE, NAME, ENUM },
14419 
14420 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
14421 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
14422 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
14423 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
14424 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
14425 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
14426 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
14427 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
14428 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
14429 
14430 static const struct builtin_description bdesc_2arg[] =
14431 {
14432 #include "powerpcspe-builtin.def"
14433 };
14434 
14435 #undef RS6000_BUILTIN_0
14436 #undef RS6000_BUILTIN_1
14437 #undef RS6000_BUILTIN_2
14438 #undef RS6000_BUILTIN_3
14439 #undef RS6000_BUILTIN_A
14440 #undef RS6000_BUILTIN_D
14441 #undef RS6000_BUILTIN_E
14442 #undef RS6000_BUILTIN_H
14443 #undef RS6000_BUILTIN_P
14444 #undef RS6000_BUILTIN_Q
14445 #undef RS6000_BUILTIN_S
14446 #undef RS6000_BUILTIN_X
14447 
14448 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
14449 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
14450 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
14451 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
14452 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
14453 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
14454 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
14455 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
14456 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE) \
14457   { MASK, ICODE, NAME, ENUM },
14458 
14459 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
14460 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
14461 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
14462 
14463 /* AltiVec predicates.  */
14464 
14465 static const struct builtin_description bdesc_altivec_preds[] =
14466 {
14467 #include "powerpcspe-builtin.def"
14468 };
14469 
14470 /* SPE predicates.  */
14471 #undef RS6000_BUILTIN_0
14472 #undef RS6000_BUILTIN_1
14473 #undef RS6000_BUILTIN_2
14474 #undef RS6000_BUILTIN_3
14475 #undef RS6000_BUILTIN_A
14476 #undef RS6000_BUILTIN_D
14477 #undef RS6000_BUILTIN_E
14478 #undef RS6000_BUILTIN_H
14479 #undef RS6000_BUILTIN_P
14480 #undef RS6000_BUILTIN_Q
14481 #undef RS6000_BUILTIN_S
14482 #undef RS6000_BUILTIN_X
14483 
14484 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
14485 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
14486 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
14487 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
14488 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
14489 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
14490 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
14491 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
14492 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
14493 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
14494 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE) \
14495   { MASK, ICODE, NAME, ENUM },
14496 
14497 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
14498 
14499 static const struct builtin_description bdesc_spe_predicates[] =
14500 {
14501 #include "powerpcspe-builtin.def"
14502 };
14503 
14504 /* SPE evsel predicates.  */
14505 #undef RS6000_BUILTIN_0
14506 #undef RS6000_BUILTIN_1
14507 #undef RS6000_BUILTIN_2
14508 #undef RS6000_BUILTIN_3
14509 #undef RS6000_BUILTIN_A
14510 #undef RS6000_BUILTIN_D
14511 #undef RS6000_BUILTIN_E
14512 #undef RS6000_BUILTIN_H
14513 #undef RS6000_BUILTIN_P
14514 #undef RS6000_BUILTIN_Q
14515 #undef RS6000_BUILTIN_S
14516 #undef RS6000_BUILTIN_X
14517 
14518 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
14519 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
14520 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
14521 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
14522 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
14523 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
14524 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE) \
14525   { MASK, ICODE, NAME, ENUM },
14526 
14527 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
14528 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
14529 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
14530 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
14531 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
14532 
14533 static const struct builtin_description bdesc_spe_evsel[] =
14534 {
14535 #include "powerpcspe-builtin.def"
14536 };
14537 
14538 /* PAIRED predicates.  */
14539 #undef RS6000_BUILTIN_0
14540 #undef RS6000_BUILTIN_1
14541 #undef RS6000_BUILTIN_2
14542 #undef RS6000_BUILTIN_3
14543 #undef RS6000_BUILTIN_A
14544 #undef RS6000_BUILTIN_D
14545 #undef RS6000_BUILTIN_E
14546 #undef RS6000_BUILTIN_H
14547 #undef RS6000_BUILTIN_P
14548 #undef RS6000_BUILTIN_Q
14549 #undef RS6000_BUILTIN_S
14550 #undef RS6000_BUILTIN_X
14551 
14552 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
14553 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
14554 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
14555 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
14556 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
14557 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
14558 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
14559 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
14560 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
14561 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE) \
14562   { MASK, ICODE, NAME, ENUM },
14563 
14564 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
14565 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
14566 
14567 static const struct builtin_description bdesc_paired_preds[] =
14568 {
14569 #include "powerpcspe-builtin.def"
14570 };
14571 
14572 /* ABS* operations.  */
14573 
14574 #undef RS6000_BUILTIN_0
14575 #undef RS6000_BUILTIN_1
14576 #undef RS6000_BUILTIN_2
14577 #undef RS6000_BUILTIN_3
14578 #undef RS6000_BUILTIN_A
14579 #undef RS6000_BUILTIN_D
14580 #undef RS6000_BUILTIN_E
14581 #undef RS6000_BUILTIN_H
14582 #undef RS6000_BUILTIN_P
14583 #undef RS6000_BUILTIN_Q
14584 #undef RS6000_BUILTIN_S
14585 #undef RS6000_BUILTIN_X
14586 
14587 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
14588 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
14589 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
14590 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
14591 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE) \
14592   { MASK, ICODE, NAME, ENUM },
14593 
14594 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
14595 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
14596 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
14597 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
14598 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
14599 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
14600 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
14601 
14602 static const struct builtin_description bdesc_abs[] =
14603 {
14604 #include "powerpcspe-builtin.def"
14605 };
14606 
14607 /* Simple unary operations: VECb = foo (unsigned literal) or VECb =
14608    foo (VECa).  */
14609 
14610 #undef RS6000_BUILTIN_0
14611 #undef RS6000_BUILTIN_1
14612 #undef RS6000_BUILTIN_2
14613 #undef RS6000_BUILTIN_3
14614 #undef RS6000_BUILTIN_A
14615 #undef RS6000_BUILTIN_D
14616 #undef RS6000_BUILTIN_E
14617 #undef RS6000_BUILTIN_H
14618 #undef RS6000_BUILTIN_P
14619 #undef RS6000_BUILTIN_Q
14620 #undef RS6000_BUILTIN_S
14621 #undef RS6000_BUILTIN_X
14622 
14623 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
14624 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE) \
14625   { MASK, ICODE, NAME, ENUM },
14626 
14627 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
14628 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
14629 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
14630 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
14631 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
14632 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
14633 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
14634 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
14635 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
14636 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
14637 
14638 static const struct builtin_description bdesc_1arg[] =
14639 {
14640 #include "powerpcspe-builtin.def"
14641 };
14642 
14643 /* Simple no-argument operations: result = __builtin_darn_32 () */
14644 
14645 #undef RS6000_BUILTIN_0
14646 #undef RS6000_BUILTIN_1
14647 #undef RS6000_BUILTIN_2
14648 #undef RS6000_BUILTIN_3
14649 #undef RS6000_BUILTIN_A
14650 #undef RS6000_BUILTIN_D
14651 #undef RS6000_BUILTIN_E
14652 #undef RS6000_BUILTIN_H
14653 #undef RS6000_BUILTIN_P
14654 #undef RS6000_BUILTIN_Q
14655 #undef RS6000_BUILTIN_S
14656 #undef RS6000_BUILTIN_X
14657 
14658 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE) \
14659   { MASK, ICODE, NAME, ENUM },
14660 
14661 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
14662 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
14663 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
14664 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
14665 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
14666 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
14667 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
14668 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
14669 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
14670 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
14671 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
14672 
14673 static const struct builtin_description bdesc_0arg[] =
14674 {
14675 #include "powerpcspe-builtin.def"
14676 };
14677 
14678 /* HTM builtins.  */
14679 #undef RS6000_BUILTIN_0
14680 #undef RS6000_BUILTIN_1
14681 #undef RS6000_BUILTIN_2
14682 #undef RS6000_BUILTIN_3
14683 #undef RS6000_BUILTIN_A
14684 #undef RS6000_BUILTIN_D
14685 #undef RS6000_BUILTIN_E
14686 #undef RS6000_BUILTIN_H
14687 #undef RS6000_BUILTIN_P
14688 #undef RS6000_BUILTIN_Q
14689 #undef RS6000_BUILTIN_S
14690 #undef RS6000_BUILTIN_X
14691 
14692 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
14693 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
14694 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
14695 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
14696 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
14697 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
14698 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
14699 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE) \
14700   { MASK, ICODE, NAME, ENUM },
14701 
14702 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
14703 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
14704 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
14705 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
14706 
14707 static const struct builtin_description bdesc_htm[] =
14708 {
14709 #include "powerpcspe-builtin.def"
14710 };
14711 
14712 #undef RS6000_BUILTIN_0
14713 #undef RS6000_BUILTIN_1
14714 #undef RS6000_BUILTIN_2
14715 #undef RS6000_BUILTIN_3
14716 #undef RS6000_BUILTIN_A
14717 #undef RS6000_BUILTIN_D
14718 #undef RS6000_BUILTIN_E
14719 #undef RS6000_BUILTIN_H
14720 #undef RS6000_BUILTIN_P
14721 #undef RS6000_BUILTIN_Q
14722 #undef RS6000_BUILTIN_S
14723 
14724 /* Return true if a builtin function is overloaded.  */
14725 bool
rs6000_overloaded_builtin_p(enum rs6000_builtins fncode)14726 rs6000_overloaded_builtin_p (enum rs6000_builtins fncode)
14727 {
14728   return (rs6000_builtin_info[(int)fncode].attr & RS6000_BTC_OVERLOADED) != 0;
14729 }
14730 
14731 const char *
rs6000_overloaded_builtin_name(enum rs6000_builtins fncode)14732 rs6000_overloaded_builtin_name (enum rs6000_builtins fncode)
14733 {
14734   return rs6000_builtin_info[(int)fncode].name;
14735 }
14736 
14737 /* Expand an expression EXP that calls a builtin without arguments.  */
14738 static rtx
rs6000_expand_zeroop_builtin(enum insn_code icode,rtx target)14739 rs6000_expand_zeroop_builtin (enum insn_code icode, rtx target)
14740 {
14741   rtx pat;
14742   machine_mode tmode = insn_data[icode].operand[0].mode;
14743 
14744   if (icode == CODE_FOR_nothing)
14745     /* Builtin not supported on this processor.  */
14746     return 0;
14747 
14748   if (target == 0
14749       || GET_MODE (target) != tmode
14750       || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14751     target = gen_reg_rtx (tmode);
14752 
14753   pat = GEN_FCN (icode) (target);
14754   if (! pat)
14755     return 0;
14756   emit_insn (pat);
14757 
14758   return target;
14759 }
14760 
14761 
14762 static rtx
rs6000_expand_mtfsf_builtin(enum insn_code icode,tree exp)14763 rs6000_expand_mtfsf_builtin (enum insn_code icode, tree exp)
14764 {
14765   rtx pat;
14766   tree arg0 = CALL_EXPR_ARG (exp, 0);
14767   tree arg1 = CALL_EXPR_ARG (exp, 1);
14768   rtx op0 = expand_normal (arg0);
14769   rtx op1 = expand_normal (arg1);
14770   machine_mode mode0 = insn_data[icode].operand[0].mode;
14771   machine_mode mode1 = insn_data[icode].operand[1].mode;
14772 
14773   if (icode == CODE_FOR_nothing)
14774     /* Builtin not supported on this processor.  */
14775     return 0;
14776 
14777   /* If we got invalid arguments bail out before generating bad rtl.  */
14778   if (arg0 == error_mark_node || arg1 == error_mark_node)
14779     return const0_rtx;
14780 
14781   if (GET_CODE (op0) != CONST_INT
14782       || INTVAL (op0) > 255
14783       || INTVAL (op0) < 0)
14784     {
14785       error ("argument 1 must be an 8-bit field value");
14786       return const0_rtx;
14787     }
14788 
14789   if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
14790     op0 = copy_to_mode_reg (mode0, op0);
14791 
14792   if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
14793     op1 = copy_to_mode_reg (mode1, op1);
14794 
14795   pat = GEN_FCN (icode) (op0, op1);
14796   if (! pat)
14797     return const0_rtx;
14798   emit_insn (pat);
14799 
14800   return NULL_RTX;
14801 }
14802 
14803 static rtx
rs6000_expand_unop_builtin(enum insn_code icode,tree exp,rtx target)14804 rs6000_expand_unop_builtin (enum insn_code icode, tree exp, rtx target)
14805 {
14806   rtx pat;
14807   tree arg0 = CALL_EXPR_ARG (exp, 0);
14808   rtx op0 = expand_normal (arg0);
14809   machine_mode tmode = insn_data[icode].operand[0].mode;
14810   machine_mode mode0 = insn_data[icode].operand[1].mode;
14811 
14812   if (icode == CODE_FOR_nothing)
14813     /* Builtin not supported on this processor.  */
14814     return 0;
14815 
14816   /* If we got invalid arguments bail out before generating bad rtl.  */
14817   if (arg0 == error_mark_node)
14818     return const0_rtx;
14819 
14820   if (icode == CODE_FOR_altivec_vspltisb
14821       || icode == CODE_FOR_altivec_vspltish
14822       || icode == CODE_FOR_altivec_vspltisw
14823       || icode == CODE_FOR_spe_evsplatfi
14824       || icode == CODE_FOR_spe_evsplati)
14825     {
14826       /* Only allow 5-bit *signed* literals.  */
14827       if (GET_CODE (op0) != CONST_INT
14828 	  || INTVAL (op0) > 15
14829 	  || INTVAL (op0) < -16)
14830 	{
14831 	  error ("argument 1 must be a 5-bit signed literal");
14832 	  return CONST0_RTX (tmode);
14833 	}
14834     }
14835 
14836   if (target == 0
14837       || GET_MODE (target) != tmode
14838       || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14839     target = gen_reg_rtx (tmode);
14840 
14841   if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14842     op0 = copy_to_mode_reg (mode0, op0);
14843 
14844   pat = GEN_FCN (icode) (target, op0);
14845   if (! pat)
14846     return 0;
14847   emit_insn (pat);
14848 
14849   return target;
14850 }
14851 
14852 static rtx
altivec_expand_abs_builtin(enum insn_code icode,tree exp,rtx target)14853 altivec_expand_abs_builtin (enum insn_code icode, tree exp, rtx target)
14854 {
14855   rtx pat, scratch1, scratch2;
14856   tree arg0 = CALL_EXPR_ARG (exp, 0);
14857   rtx op0 = expand_normal (arg0);
14858   machine_mode tmode = insn_data[icode].operand[0].mode;
14859   machine_mode mode0 = insn_data[icode].operand[1].mode;
14860 
14861   /* If we have invalid arguments, bail out before generating bad rtl.  */
14862   if (arg0 == error_mark_node)
14863     return const0_rtx;
14864 
14865   if (target == 0
14866       || GET_MODE (target) != tmode
14867       || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14868     target = gen_reg_rtx (tmode);
14869 
14870   if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14871     op0 = copy_to_mode_reg (mode0, op0);
14872 
14873   scratch1 = gen_reg_rtx (mode0);
14874   scratch2 = gen_reg_rtx (mode0);
14875 
14876   pat = GEN_FCN (icode) (target, op0, scratch1, scratch2);
14877   if (! pat)
14878     return 0;
14879   emit_insn (pat);
14880 
14881   return target;
14882 }
14883 
14884 static rtx
rs6000_expand_binop_builtin(enum insn_code icode,tree exp,rtx target)14885 rs6000_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
14886 {
14887   rtx pat;
14888   tree arg0 = CALL_EXPR_ARG (exp, 0);
14889   tree arg1 = CALL_EXPR_ARG (exp, 1);
14890   rtx op0 = expand_normal (arg0);
14891   rtx op1 = expand_normal (arg1);
14892   machine_mode tmode = insn_data[icode].operand[0].mode;
14893   machine_mode mode0 = insn_data[icode].operand[1].mode;
14894   machine_mode mode1 = insn_data[icode].operand[2].mode;
14895 
14896   if (icode == CODE_FOR_nothing)
14897     /* Builtin not supported on this processor.  */
14898     return 0;
14899 
14900   /* If we got invalid arguments bail out before generating bad rtl.  */
14901   if (arg0 == error_mark_node || arg1 == error_mark_node)
14902     return const0_rtx;
14903 
14904   if (icode == CODE_FOR_altivec_vcfux
14905       || icode == CODE_FOR_altivec_vcfsx
14906       || icode == CODE_FOR_altivec_vctsxs
14907       || icode == CODE_FOR_altivec_vctuxs
14908       || icode == CODE_FOR_altivec_vspltb
14909       || icode == CODE_FOR_altivec_vsplth
14910       || icode == CODE_FOR_altivec_vspltw
14911       || icode == CODE_FOR_spe_evaddiw
14912       || icode == CODE_FOR_spe_evldd
14913       || icode == CODE_FOR_spe_evldh
14914       || icode == CODE_FOR_spe_evldw
14915       || icode == CODE_FOR_spe_evlhhesplat
14916       || icode == CODE_FOR_spe_evlhhossplat
14917       || icode == CODE_FOR_spe_evlhhousplat
14918       || icode == CODE_FOR_spe_evlwhe
14919       || icode == CODE_FOR_spe_evlwhos
14920       || icode == CODE_FOR_spe_evlwhou
14921       || icode == CODE_FOR_spe_evlwhsplat
14922       || icode == CODE_FOR_spe_evlwwsplat
14923       || icode == CODE_FOR_spe_evrlwi
14924       || icode == CODE_FOR_spe_evslwi
14925       || icode == CODE_FOR_spe_evsrwis
14926       || icode == CODE_FOR_spe_evsubifw
14927       || icode == CODE_FOR_spe_evsrwiu)
14928     {
14929       /* Only allow 5-bit unsigned literals.  */
14930       STRIP_NOPS (arg1);
14931       if (TREE_CODE (arg1) != INTEGER_CST
14932 	  || TREE_INT_CST_LOW (arg1) & ~0x1f)
14933 	{
14934 	  error ("argument 2 must be a 5-bit unsigned literal");
14935 	  return CONST0_RTX (tmode);
14936 	}
14937     }
14938   else if (icode == CODE_FOR_dfptstsfi_eq_dd
14939       || icode == CODE_FOR_dfptstsfi_lt_dd
14940       || icode == CODE_FOR_dfptstsfi_gt_dd
14941       || icode == CODE_FOR_dfptstsfi_unordered_dd
14942       || icode == CODE_FOR_dfptstsfi_eq_td
14943       || icode == CODE_FOR_dfptstsfi_lt_td
14944       || icode == CODE_FOR_dfptstsfi_gt_td
14945       || icode == CODE_FOR_dfptstsfi_unordered_td)
14946     {
14947       /* Only allow 6-bit unsigned literals.  */
14948       STRIP_NOPS (arg0);
14949       if (TREE_CODE (arg0) != INTEGER_CST
14950 	  || !IN_RANGE (TREE_INT_CST_LOW (arg0), 0, 63))
14951 	{
14952 	  error ("argument 1 must be a 6-bit unsigned literal");
14953 	  return CONST0_RTX (tmode);
14954 	}
14955     }
14956   else if (icode == CODE_FOR_xststdcdp
14957 	   || icode == CODE_FOR_xststdcsp
14958 	   || icode == CODE_FOR_xvtstdcdp
14959 	   || icode == CODE_FOR_xvtstdcsp)
14960     {
14961       /* Only allow 7-bit unsigned literals. */
14962       STRIP_NOPS (arg1);
14963       if (TREE_CODE (arg1) != INTEGER_CST
14964 	  || !IN_RANGE (TREE_INT_CST_LOW (arg1), 0, 127))
14965 	{
14966 	  error ("argument 2 must be a 7-bit unsigned literal");
14967 	  return CONST0_RTX (tmode);
14968 	}
14969     }
14970 
14971   if (target == 0
14972       || GET_MODE (target) != tmode
14973       || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14974     target = gen_reg_rtx (tmode);
14975 
14976   if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14977     op0 = copy_to_mode_reg (mode0, op0);
14978   if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14979     op1 = copy_to_mode_reg (mode1, op1);
14980 
14981   pat = GEN_FCN (icode) (target, op0, op1);
14982   if (! pat)
14983     return 0;
14984   emit_insn (pat);
14985 
14986   return target;
14987 }
14988 
14989 static rtx
altivec_expand_predicate_builtin(enum insn_code icode,tree exp,rtx target)14990 altivec_expand_predicate_builtin (enum insn_code icode, tree exp, rtx target)
14991 {
14992   rtx pat, scratch;
14993   tree cr6_form = CALL_EXPR_ARG (exp, 0);
14994   tree arg0 = CALL_EXPR_ARG (exp, 1);
14995   tree arg1 = CALL_EXPR_ARG (exp, 2);
14996   rtx op0 = expand_normal (arg0);
14997   rtx op1 = expand_normal (arg1);
14998   machine_mode tmode = SImode;
14999   machine_mode mode0 = insn_data[icode].operand[1].mode;
15000   machine_mode mode1 = insn_data[icode].operand[2].mode;
15001   int cr6_form_int;
15002 
15003   if (TREE_CODE (cr6_form) != INTEGER_CST)
15004     {
15005       error ("argument 1 of __builtin_altivec_predicate must be a constant");
15006       return const0_rtx;
15007     }
15008   else
15009     cr6_form_int = TREE_INT_CST_LOW (cr6_form);
15010 
15011   gcc_assert (mode0 == mode1);
15012 
15013   /* If we have invalid arguments, bail out before generating bad rtl.  */
15014   if (arg0 == error_mark_node || arg1 == error_mark_node)
15015     return const0_rtx;
15016 
15017   if (target == 0
15018       || GET_MODE (target) != tmode
15019       || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15020     target = gen_reg_rtx (tmode);
15021 
15022   if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
15023     op0 = copy_to_mode_reg (mode0, op0);
15024   if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
15025     op1 = copy_to_mode_reg (mode1, op1);
15026 
15027   /* Note that for many of the relevant operations (e.g. cmpne or
15028      cmpeq) with float or double operands, it makes more sense for the
15029      mode of the allocated scratch register to select a vector of
15030      integer.  But the choice to copy the mode of operand 0 was made
15031      long ago and there are no plans to change it.  */
15032   scratch = gen_reg_rtx (mode0);
15033 
15034   pat = GEN_FCN (icode) (scratch, op0, op1);
15035   if (! pat)
15036     return 0;
15037   emit_insn (pat);
15038 
15039   /* The vec_any* and vec_all* predicates use the same opcodes for two
15040      different operations, but the bits in CR6 will be different
15041      depending on what information we want.  So we have to play tricks
15042      with CR6 to get the right bits out.
15043 
15044      If you think this is disgusting, look at the specs for the
15045      AltiVec predicates.  */
15046 
15047   switch (cr6_form_int)
15048     {
15049     case 0:
15050       emit_insn (gen_cr6_test_for_zero (target));
15051       break;
15052     case 1:
15053       emit_insn (gen_cr6_test_for_zero_reverse (target));
15054       break;
15055     case 2:
15056       emit_insn (gen_cr6_test_for_lt (target));
15057       break;
15058     case 3:
15059       emit_insn (gen_cr6_test_for_lt_reverse (target));
15060       break;
15061     default:
15062       error ("argument 1 of __builtin_altivec_predicate is out of range");
15063       break;
15064     }
15065 
15066   return target;
15067 }
15068 
15069 static rtx
paired_expand_lv_builtin(enum insn_code icode,tree exp,rtx target)15070 paired_expand_lv_builtin (enum insn_code icode, tree exp, rtx target)
15071 {
15072   rtx pat, addr;
15073   tree arg0 = CALL_EXPR_ARG (exp, 0);
15074   tree arg1 = CALL_EXPR_ARG (exp, 1);
15075   machine_mode tmode = insn_data[icode].operand[0].mode;
15076   machine_mode mode0 = Pmode;
15077   machine_mode mode1 = Pmode;
15078   rtx op0 = expand_normal (arg0);
15079   rtx op1 = expand_normal (arg1);
15080 
15081   if (icode == CODE_FOR_nothing)
15082     /* Builtin not supported on this processor.  */
15083     return 0;
15084 
15085   /* If we got invalid arguments bail out before generating bad rtl.  */
15086   if (arg0 == error_mark_node || arg1 == error_mark_node)
15087     return const0_rtx;
15088 
15089   if (target == 0
15090       || GET_MODE (target) != tmode
15091       || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15092     target = gen_reg_rtx (tmode);
15093 
15094   op1 = copy_to_mode_reg (mode1, op1);
15095 
15096   if (op0 == const0_rtx)
15097     {
15098       addr = gen_rtx_MEM (tmode, op1);
15099     }
15100   else
15101     {
15102       op0 = copy_to_mode_reg (mode0, op0);
15103       addr = gen_rtx_MEM (tmode, gen_rtx_PLUS (Pmode, op0, op1));
15104     }
15105 
15106   pat = GEN_FCN (icode) (target, addr);
15107 
15108   if (! pat)
15109     return 0;
15110   emit_insn (pat);
15111 
15112   return target;
15113 }
15114 
15115 /* Return a constant vector for use as a little-endian permute control vector
15116    to reverse the order of elements of the given vector mode.  */
15117 static rtx
swap_selector_for_mode(machine_mode mode)15118 swap_selector_for_mode (machine_mode mode)
15119 {
15120   /* These are little endian vectors, so their elements are reversed
15121      from what you would normally expect for a permute control vector.  */
15122   unsigned int swap2[16] = {7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8};
15123   unsigned int swap4[16] = {3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12};
15124   unsigned int swap8[16] = {1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14};
15125   unsigned int swap16[16] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
15126   unsigned int *swaparray, i;
15127   rtx perm[16];
15128 
15129   switch (mode)
15130     {
15131     case E_V2DFmode:
15132     case E_V2DImode:
15133       swaparray = swap2;
15134       break;
15135     case E_V4SFmode:
15136     case E_V4SImode:
15137       swaparray = swap4;
15138       break;
15139     case E_V8HImode:
15140       swaparray = swap8;
15141       break;
15142     case E_V16QImode:
15143       swaparray = swap16;
15144       break;
15145     default:
15146       gcc_unreachable ();
15147     }
15148 
15149   for (i = 0; i < 16; ++i)
15150     perm[i] = GEN_INT (swaparray[i]);
15151 
15152   return force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm)));
15153 }
15154 
15155 /* Generate code for an "lvxl", or "lve*x" built-in for a little endian target
15156    with -maltivec=be specified.  Issue the load followed by an element-
15157    reversing permute.  */
15158 void
altivec_expand_lvx_be(rtx op0,rtx op1,machine_mode mode,unsigned unspec)15159 altivec_expand_lvx_be (rtx op0, rtx op1, machine_mode mode, unsigned unspec)
15160 {
15161   rtx tmp = gen_reg_rtx (mode);
15162   rtx load = gen_rtx_SET (tmp, op1);
15163   rtx lvx = gen_rtx_UNSPEC (mode, gen_rtvec (1, const0_rtx), unspec);
15164   rtx par = gen_rtx_PARALLEL (mode, gen_rtvec (2, load, lvx));
15165   rtx sel = swap_selector_for_mode (mode);
15166   rtx vperm = gen_rtx_UNSPEC (mode, gen_rtvec (3, tmp, tmp, sel), UNSPEC_VPERM);
15167 
15168   gcc_assert (REG_P (op0));
15169   emit_insn (par);
15170   emit_insn (gen_rtx_SET (op0, vperm));
15171 }
15172 
15173 /* Generate code for a "stvxl" built-in for a little endian target with
15174    -maltivec=be specified.  Issue the store preceded by an element-reversing
15175    permute.  */
15176 void
altivec_expand_stvx_be(rtx op0,rtx op1,machine_mode mode,unsigned unspec)15177 altivec_expand_stvx_be (rtx op0, rtx op1, machine_mode mode, unsigned unspec)
15178 {
15179   rtx tmp = gen_reg_rtx (mode);
15180   rtx store = gen_rtx_SET (op0, tmp);
15181   rtx stvx = gen_rtx_UNSPEC (mode, gen_rtvec (1, const0_rtx), unspec);
15182   rtx par = gen_rtx_PARALLEL (mode, gen_rtvec (2, store, stvx));
15183   rtx sel = swap_selector_for_mode (mode);
15184   rtx vperm;
15185 
15186   gcc_assert (REG_P (op1));
15187   vperm = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op1, sel), UNSPEC_VPERM);
15188   emit_insn (gen_rtx_SET (tmp, vperm));
15189   emit_insn (par);
15190 }
15191 
15192 /* Generate code for a "stve*x" built-in for a little endian target with -maltivec=be
15193    specified.  Issue the store preceded by an element-reversing permute.  */
15194 void
altivec_expand_stvex_be(rtx op0,rtx op1,machine_mode mode,unsigned unspec)15195 altivec_expand_stvex_be (rtx op0, rtx op1, machine_mode mode, unsigned unspec)
15196 {
15197   machine_mode inner_mode = GET_MODE_INNER (mode);
15198   rtx tmp = gen_reg_rtx (mode);
15199   rtx stvx = gen_rtx_UNSPEC (inner_mode, gen_rtvec (1, tmp), unspec);
15200   rtx sel = swap_selector_for_mode (mode);
15201   rtx vperm;
15202 
15203   gcc_assert (REG_P (op1));
15204   vperm = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op1, sel), UNSPEC_VPERM);
15205   emit_insn (gen_rtx_SET (tmp, vperm));
15206   emit_insn (gen_rtx_SET (op0, stvx));
15207 }
15208 
15209 static rtx
altivec_expand_lv_builtin(enum insn_code icode,tree exp,rtx target,bool blk)15210 altivec_expand_lv_builtin (enum insn_code icode, tree exp, rtx target, bool blk)
15211 {
15212   rtx pat, addr;
15213   tree arg0 = CALL_EXPR_ARG (exp, 0);
15214   tree arg1 = CALL_EXPR_ARG (exp, 1);
15215   machine_mode tmode = insn_data[icode].operand[0].mode;
15216   machine_mode mode0 = Pmode;
15217   machine_mode mode1 = Pmode;
15218   rtx op0 = expand_normal (arg0);
15219   rtx op1 = expand_normal (arg1);
15220 
15221   if (icode == CODE_FOR_nothing)
15222     /* Builtin not supported on this processor.  */
15223     return 0;
15224 
15225   /* If we got invalid arguments bail out before generating bad rtl.  */
15226   if (arg0 == error_mark_node || arg1 == error_mark_node)
15227     return const0_rtx;
15228 
15229   if (target == 0
15230       || GET_MODE (target) != tmode
15231       || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15232     target = gen_reg_rtx (tmode);
15233 
15234   op1 = copy_to_mode_reg (mode1, op1);
15235 
15236   /* For LVX, express the RTL accurately by ANDing the address with -16.
15237      LVXL and LVE*X expand to use UNSPECs to hide their special behavior,
15238      so the raw address is fine.  */
15239   if (icode == CODE_FOR_altivec_lvx_v2df_2op
15240       || icode == CODE_FOR_altivec_lvx_v2di_2op
15241       || icode == CODE_FOR_altivec_lvx_v4sf_2op
15242       || icode == CODE_FOR_altivec_lvx_v4si_2op
15243       || icode == CODE_FOR_altivec_lvx_v8hi_2op
15244       || icode == CODE_FOR_altivec_lvx_v16qi_2op)
15245     {
15246       rtx rawaddr;
15247       if (op0 == const0_rtx)
15248 	rawaddr = op1;
15249       else
15250 	{
15251 	  op0 = copy_to_mode_reg (mode0, op0);
15252 	  rawaddr = gen_rtx_PLUS (Pmode, op1, op0);
15253 	}
15254       addr = gen_rtx_AND (Pmode, rawaddr, gen_rtx_CONST_INT (Pmode, -16));
15255       addr = gen_rtx_MEM (blk ? BLKmode : tmode, addr);
15256 
15257       /* For -maltivec=be, emit the load and follow it up with a
15258 	 permute to swap the elements.  */
15259       if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG)
15260 	{
15261 	  rtx temp = gen_reg_rtx (tmode);
15262 	  emit_insn (gen_rtx_SET (temp, addr));
15263 
15264 	  rtx sel = swap_selector_for_mode (tmode);
15265 	  rtx vperm = gen_rtx_UNSPEC (tmode, gen_rtvec (3, temp, temp, sel),
15266 				      UNSPEC_VPERM);
15267 	  emit_insn (gen_rtx_SET (target, vperm));
15268 	}
15269       else
15270 	emit_insn (gen_rtx_SET (target, addr));
15271     }
15272   else
15273     {
15274       if (op0 == const0_rtx)
15275 	addr = gen_rtx_MEM (blk ? BLKmode : tmode, op1);
15276       else
15277 	{
15278 	  op0 = copy_to_mode_reg (mode0, op0);
15279 	  addr = gen_rtx_MEM (blk ? BLKmode : tmode,
15280 			      gen_rtx_PLUS (Pmode, op1, op0));
15281 	}
15282 
15283       pat = GEN_FCN (icode) (target, addr);
15284       if (! pat)
15285 	return 0;
15286       emit_insn (pat);
15287     }
15288 
15289   return target;
15290 }
15291 
15292 static rtx
spe_expand_stv_builtin(enum insn_code icode,tree exp)15293 spe_expand_stv_builtin (enum insn_code icode, tree exp)
15294 {
15295   tree arg0 = CALL_EXPR_ARG (exp, 0);
15296   tree arg1 = CALL_EXPR_ARG (exp, 1);
15297   tree arg2 = CALL_EXPR_ARG (exp, 2);
15298   rtx op0 = expand_normal (arg0);
15299   rtx op1 = expand_normal (arg1);
15300   rtx op2 = expand_normal (arg2);
15301   rtx pat;
15302   machine_mode mode0 = insn_data[icode].operand[0].mode;
15303   machine_mode mode1 = insn_data[icode].operand[1].mode;
15304   machine_mode mode2 = insn_data[icode].operand[2].mode;
15305 
15306   /* Invalid arguments.  Bail before doing anything stoopid!  */
15307   if (arg0 == error_mark_node
15308       || arg1 == error_mark_node
15309       || arg2 == error_mark_node)
15310     return const0_rtx;
15311 
15312   if (! (*insn_data[icode].operand[2].predicate) (op0, mode2))
15313     op0 = copy_to_mode_reg (mode2, op0);
15314   if (! (*insn_data[icode].operand[0].predicate) (op1, mode0))
15315     op1 = copy_to_mode_reg (mode0, op1);
15316   if (! (*insn_data[icode].operand[1].predicate) (op2, mode1))
15317     op2 = copy_to_mode_reg (mode1, op2);
15318 
15319   pat = GEN_FCN (icode) (op1, op2, op0);
15320   if (pat)
15321     emit_insn (pat);
15322   return NULL_RTX;
15323 }
15324 
15325 static rtx
paired_expand_stv_builtin(enum insn_code icode,tree exp)15326 paired_expand_stv_builtin (enum insn_code icode, tree exp)
15327 {
15328   tree arg0 = CALL_EXPR_ARG (exp, 0);
15329   tree arg1 = CALL_EXPR_ARG (exp, 1);
15330   tree arg2 = CALL_EXPR_ARG (exp, 2);
15331   rtx op0 = expand_normal (arg0);
15332   rtx op1 = expand_normal (arg1);
15333   rtx op2 = expand_normal (arg2);
15334   rtx pat, addr;
15335   machine_mode tmode = insn_data[icode].operand[0].mode;
15336   machine_mode mode1 = Pmode;
15337   machine_mode mode2 = Pmode;
15338 
15339   /* Invalid arguments.  Bail before doing anything stoopid!  */
15340   if (arg0 == error_mark_node
15341       || arg1 == error_mark_node
15342       || arg2 == error_mark_node)
15343     return const0_rtx;
15344 
15345   if (! (*insn_data[icode].operand[1].predicate) (op0, tmode))
15346     op0 = copy_to_mode_reg (tmode, op0);
15347 
15348   op2 = copy_to_mode_reg (mode2, op2);
15349 
15350   if (op1 == const0_rtx)
15351     {
15352       addr = gen_rtx_MEM (tmode, op2);
15353     }
15354   else
15355     {
15356       op1 = copy_to_mode_reg (mode1, op1);
15357       addr = gen_rtx_MEM (tmode, gen_rtx_PLUS (Pmode, op1, op2));
15358     }
15359 
15360   pat = GEN_FCN (icode) (addr, op0);
15361   if (pat)
15362     emit_insn (pat);
15363   return NULL_RTX;
15364 }
15365 
15366 static rtx
altivec_expand_stxvl_builtin(enum insn_code icode,tree exp)15367 altivec_expand_stxvl_builtin (enum insn_code icode, tree exp)
15368 {
15369   rtx pat;
15370   tree arg0 = CALL_EXPR_ARG (exp, 0);
15371   tree arg1 = CALL_EXPR_ARG (exp, 1);
15372   tree arg2 = CALL_EXPR_ARG (exp, 2);
15373   rtx op0 = expand_normal (arg0);
15374   rtx op1 = expand_normal (arg1);
15375   rtx op2 = expand_normal (arg2);
15376   machine_mode mode0 = insn_data[icode].operand[0].mode;
15377   machine_mode mode1 = insn_data[icode].operand[1].mode;
15378   machine_mode mode2 = insn_data[icode].operand[2].mode;
15379 
15380   if (icode == CODE_FOR_nothing)
15381     /* Builtin not supported on this processor.  */
15382     return NULL_RTX;
15383 
15384   /* If we got invalid arguments bail out before generating bad rtl.  */
15385   if (arg0 == error_mark_node
15386       || arg1 == error_mark_node
15387       || arg2 == error_mark_node)
15388     return NULL_RTX;
15389 
15390   if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
15391     op0 = copy_to_mode_reg (mode0, op0);
15392   if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
15393     op1 = copy_to_mode_reg (mode1, op1);
15394   if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
15395     op2 = copy_to_mode_reg (mode2, op2);
15396 
15397   pat = GEN_FCN (icode) (op0, op1, op2);
15398   if (pat)
15399     emit_insn (pat);
15400 
15401   return NULL_RTX;
15402 }
15403 
15404 static rtx
altivec_expand_stv_builtin(enum insn_code icode,tree exp)15405 altivec_expand_stv_builtin (enum insn_code icode, tree exp)
15406 {
15407   tree arg0 = CALL_EXPR_ARG (exp, 0);
15408   tree arg1 = CALL_EXPR_ARG (exp, 1);
15409   tree arg2 = CALL_EXPR_ARG (exp, 2);
15410   rtx op0 = expand_normal (arg0);
15411   rtx op1 = expand_normal (arg1);
15412   rtx op2 = expand_normal (arg2);
15413   rtx pat, addr, rawaddr;
15414   machine_mode tmode = insn_data[icode].operand[0].mode;
15415   machine_mode smode = insn_data[icode].operand[1].mode;
15416   machine_mode mode1 = Pmode;
15417   machine_mode mode2 = Pmode;
15418 
15419   /* Invalid arguments.  Bail before doing anything stoopid!  */
15420   if (arg0 == error_mark_node
15421       || arg1 == error_mark_node
15422       || arg2 == error_mark_node)
15423     return const0_rtx;
15424 
15425   op2 = copy_to_mode_reg (mode2, op2);
15426 
15427   /* For STVX, express the RTL accurately by ANDing the address with -16.
15428      STVXL and STVE*X expand to use UNSPECs to hide their special behavior,
15429      so the raw address is fine.  */
15430   if (icode == CODE_FOR_altivec_stvx_v2df_2op
15431       || icode == CODE_FOR_altivec_stvx_v2di_2op
15432       || icode == CODE_FOR_altivec_stvx_v4sf_2op
15433       || icode == CODE_FOR_altivec_stvx_v4si_2op
15434       || icode == CODE_FOR_altivec_stvx_v8hi_2op
15435       || icode == CODE_FOR_altivec_stvx_v16qi_2op)
15436     {
15437       if (op1 == const0_rtx)
15438 	rawaddr = op2;
15439       else
15440 	{
15441 	  op1 = copy_to_mode_reg (mode1, op1);
15442 	  rawaddr = gen_rtx_PLUS (Pmode, op2, op1);
15443 	}
15444 
15445       addr = gen_rtx_AND (Pmode, rawaddr, gen_rtx_CONST_INT (Pmode, -16));
15446       addr = gen_rtx_MEM (tmode, addr);
15447 
15448       op0 = copy_to_mode_reg (tmode, op0);
15449 
15450       /* For -maltivec=be, emit a permute to swap the elements, followed
15451 	by the store.  */
15452      if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG)
15453 	{
15454 	  rtx temp = gen_reg_rtx (tmode);
15455 	  rtx sel = swap_selector_for_mode (tmode);
15456 	  rtx vperm = gen_rtx_UNSPEC (tmode, gen_rtvec (3, op0, op0, sel),
15457 				      UNSPEC_VPERM);
15458 	  emit_insn (gen_rtx_SET (temp, vperm));
15459 	  emit_insn (gen_rtx_SET (addr, temp));
15460 	}
15461       else
15462 	emit_insn (gen_rtx_SET (addr, op0));
15463     }
15464   else
15465     {
15466       if (! (*insn_data[icode].operand[1].predicate) (op0, smode))
15467 	op0 = copy_to_mode_reg (smode, op0);
15468 
15469       if (op1 == const0_rtx)
15470 	addr = gen_rtx_MEM (tmode, op2);
15471       else
15472 	{
15473 	  op1 = copy_to_mode_reg (mode1, op1);
15474 	  addr = gen_rtx_MEM (tmode, gen_rtx_PLUS (Pmode, op2, op1));
15475 	}
15476 
15477       pat = GEN_FCN (icode) (addr, op0);
15478       if (pat)
15479 	emit_insn (pat);
15480     }
15481 
15482   return NULL_RTX;
15483 }
15484 
15485 /* Return the appropriate SPR number associated with the given builtin.  */
15486 static inline HOST_WIDE_INT
htm_spr_num(enum rs6000_builtins code)15487 htm_spr_num (enum rs6000_builtins code)
15488 {
15489   if (code == HTM_BUILTIN_GET_TFHAR
15490       || code == HTM_BUILTIN_SET_TFHAR)
15491     return TFHAR_SPR;
15492   else if (code == HTM_BUILTIN_GET_TFIAR
15493 	   || code == HTM_BUILTIN_SET_TFIAR)
15494     return TFIAR_SPR;
15495   else if (code == HTM_BUILTIN_GET_TEXASR
15496 	   || code == HTM_BUILTIN_SET_TEXASR)
15497     return TEXASR_SPR;
15498   gcc_assert (code == HTM_BUILTIN_GET_TEXASRU
15499 	      || code == HTM_BUILTIN_SET_TEXASRU);
15500   return TEXASRU_SPR;
15501 }
15502 
15503 /* Return the appropriate SPR regno associated with the given builtin.  */
15504 static inline HOST_WIDE_INT
htm_spr_regno(enum rs6000_builtins code)15505 htm_spr_regno (enum rs6000_builtins code)
15506 {
15507   if (code == HTM_BUILTIN_GET_TFHAR
15508       || code == HTM_BUILTIN_SET_TFHAR)
15509     return TFHAR_REGNO;
15510   else if (code == HTM_BUILTIN_GET_TFIAR
15511 	   || code == HTM_BUILTIN_SET_TFIAR)
15512     return TFIAR_REGNO;
15513   gcc_assert (code == HTM_BUILTIN_GET_TEXASR
15514 	      || code == HTM_BUILTIN_SET_TEXASR
15515 	      || code == HTM_BUILTIN_GET_TEXASRU
15516 	      || code == HTM_BUILTIN_SET_TEXASRU);
15517   return TEXASR_REGNO;
15518 }
15519 
15520 /* Return the correct ICODE value depending on whether we are
15521    setting or reading the HTM SPRs.  */
15522 static inline enum insn_code
rs6000_htm_spr_icode(bool nonvoid)15523 rs6000_htm_spr_icode (bool nonvoid)
15524 {
15525   if (nonvoid)
15526     return (TARGET_POWERPC64) ? CODE_FOR_htm_mfspr_di : CODE_FOR_htm_mfspr_si;
15527   else
15528     return (TARGET_POWERPC64) ? CODE_FOR_htm_mtspr_di : CODE_FOR_htm_mtspr_si;
15529 }
15530 
15531 /* Expand the HTM builtin in EXP and store the result in TARGET.
15532    Store true in *EXPANDEDP if we found a builtin to expand.  */
15533 static rtx
htm_expand_builtin(tree exp,rtx target,bool * expandedp)15534 htm_expand_builtin (tree exp, rtx target, bool * expandedp)
15535 {
15536   tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
15537   bool nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
15538   enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
15539   const struct builtin_description *d;
15540   size_t i;
15541 
15542   *expandedp = true;
15543 
15544   if (!TARGET_POWERPC64
15545       && (fcode == HTM_BUILTIN_TABORTDC
15546 	  || fcode == HTM_BUILTIN_TABORTDCI))
15547     {
15548       size_t uns_fcode = (size_t)fcode;
15549       const char *name = rs6000_builtin_info[uns_fcode].name;
15550       error ("builtin %s is only valid in 64-bit mode", name);
15551       return const0_rtx;
15552     }
15553 
15554   /* Expand the HTM builtins.  */
15555   d = bdesc_htm;
15556   for (i = 0; i < ARRAY_SIZE (bdesc_htm); i++, d++)
15557     if (d->code == fcode)
15558       {
15559 	rtx op[MAX_HTM_OPERANDS], pat;
15560 	int nopnds = 0;
15561 	tree arg;
15562 	call_expr_arg_iterator iter;
15563 	unsigned attr = rs6000_builtin_info[fcode].attr;
15564 	enum insn_code icode = d->icode;
15565 	const struct insn_operand_data *insn_op;
15566 	bool uses_spr = (attr & RS6000_BTC_SPR);
15567 	rtx cr = NULL_RTX;
15568 
15569 	if (uses_spr)
15570 	  icode = rs6000_htm_spr_icode (nonvoid);
15571 	insn_op = &insn_data[icode].operand[0];
15572 
15573 	if (nonvoid)
15574 	  {
15575 	    machine_mode tmode = (uses_spr) ? insn_op->mode : E_SImode;
15576 	    if (!target
15577 		|| GET_MODE (target) != tmode
15578 		|| (uses_spr && !(*insn_op->predicate) (target, tmode)))
15579 	      target = gen_reg_rtx (tmode);
15580 	    if (uses_spr)
15581 	      op[nopnds++] = target;
15582 	  }
15583 
15584 	FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
15585 	{
15586 	  if (arg == error_mark_node || nopnds >= MAX_HTM_OPERANDS)
15587 	    return const0_rtx;
15588 
15589 	  insn_op = &insn_data[icode].operand[nopnds];
15590 
15591 	  op[nopnds] = expand_normal (arg);
15592 
15593 	  if (!(*insn_op->predicate) (op[nopnds], insn_op->mode))
15594 	    {
15595 	      if (!strcmp (insn_op->constraint, "n"))
15596 		{
15597 		  int arg_num = (nonvoid) ? nopnds : nopnds + 1;
15598 		  if (!CONST_INT_P (op[nopnds]))
15599 		    error ("argument %d must be an unsigned literal", arg_num);
15600 		  else
15601 		    error ("argument %d is an unsigned literal that is "
15602 			   "out of range", arg_num);
15603 		  return const0_rtx;
15604 		}
15605 	      op[nopnds] = copy_to_mode_reg (insn_op->mode, op[nopnds]);
15606 	    }
15607 
15608 	  nopnds++;
15609 	}
15610 
15611 	/* Handle the builtins for extended mnemonics.  These accept
15612 	   no arguments, but map to builtins that take arguments.  */
15613 	switch (fcode)
15614 	  {
15615 	  case HTM_BUILTIN_TENDALL:  /* Alias for: tend. 1  */
15616 	  case HTM_BUILTIN_TRESUME:  /* Alias for: tsr. 1  */
15617 	    op[nopnds++] = GEN_INT (1);
15618 	    if (flag_checking)
15619 	      attr |= RS6000_BTC_UNARY;
15620 	    break;
15621 	  case HTM_BUILTIN_TSUSPEND: /* Alias for: tsr. 0  */
15622 	    op[nopnds++] = GEN_INT (0);
15623 	    if (flag_checking)
15624 	      attr |= RS6000_BTC_UNARY;
15625 	    break;
15626 	  default:
15627 	    break;
15628 	  }
15629 
15630 	/* If this builtin accesses SPRs, then pass in the appropriate
15631 	   SPR number and SPR regno as the last two operands.  */
15632 	if (uses_spr)
15633 	  {
15634 	    machine_mode mode = (TARGET_POWERPC64) ? DImode : SImode;
15635 	    op[nopnds++] = gen_rtx_CONST_INT (mode, htm_spr_num (fcode));
15636 	    op[nopnds++] = gen_rtx_REG (mode, htm_spr_regno (fcode));
15637 	  }
15638 	/* If this builtin accesses a CR, then pass in a scratch
15639 	   CR as the last operand.  */
15640 	else if (attr & RS6000_BTC_CR)
15641 	  { cr = gen_reg_rtx (CCmode);
15642 	    op[nopnds++] = cr;
15643 	  }
15644 
15645 	if (flag_checking)
15646 	  {
15647 	    int expected_nopnds = 0;
15648 	    if ((attr & RS6000_BTC_TYPE_MASK) == RS6000_BTC_UNARY)
15649 	      expected_nopnds = 1;
15650 	    else if ((attr & RS6000_BTC_TYPE_MASK) == RS6000_BTC_BINARY)
15651 	      expected_nopnds = 2;
15652 	    else if ((attr & RS6000_BTC_TYPE_MASK) == RS6000_BTC_TERNARY)
15653 	      expected_nopnds = 3;
15654 	    if (!(attr & RS6000_BTC_VOID))
15655 	      expected_nopnds += 1;
15656 	    if (uses_spr)
15657 	      expected_nopnds += 2;
15658 
15659 	    gcc_assert (nopnds == expected_nopnds
15660 			&& nopnds <= MAX_HTM_OPERANDS);
15661 	  }
15662 
15663 	switch (nopnds)
15664 	  {
15665 	  case 1:
15666 	    pat = GEN_FCN (icode) (op[0]);
15667 	    break;
15668 	  case 2:
15669 	    pat = GEN_FCN (icode) (op[0], op[1]);
15670 	    break;
15671 	  case 3:
15672 	    pat = GEN_FCN (icode) (op[0], op[1], op[2]);
15673 	    break;
15674 	  case 4:
15675 	    pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
15676 	    break;
15677 	  default:
15678 	    gcc_unreachable ();
15679 	  }
15680 	if (!pat)
15681 	  return NULL_RTX;
15682 	emit_insn (pat);
15683 
15684 	if (attr & RS6000_BTC_CR)
15685 	  {
15686 	    if (fcode == HTM_BUILTIN_TBEGIN)
15687 	      {
15688 		/* Emit code to set TARGET to true or false depending on
15689 		   whether the tbegin. instruction successfully or failed
15690 		   to start a transaction.  We do this by placing the 1's
15691 		   complement of CR's EQ bit into TARGET.  */
15692 		rtx scratch = gen_reg_rtx (SImode);
15693 		emit_insn (gen_rtx_SET (scratch,
15694 					gen_rtx_EQ (SImode, cr,
15695 						     const0_rtx)));
15696 		emit_insn (gen_rtx_SET (target,
15697 					gen_rtx_XOR (SImode, scratch,
15698 						     GEN_INT (1))));
15699 	      }
15700 	    else
15701 	      {
15702 		/* Emit code to copy the 4-bit condition register field
15703 		   CR into the least significant end of register TARGET.  */
15704 		rtx scratch1 = gen_reg_rtx (SImode);
15705 		rtx scratch2 = gen_reg_rtx (SImode);
15706 		rtx subreg = simplify_gen_subreg (CCmode, scratch1, SImode, 0);
15707 		emit_insn (gen_movcc (subreg, cr));
15708 		emit_insn (gen_lshrsi3 (scratch2, scratch1, GEN_INT (28)));
15709 		emit_insn (gen_andsi3 (target, scratch2, GEN_INT (0xf)));
15710 	      }
15711 	  }
15712 
15713 	if (nonvoid)
15714 	  return target;
15715 	return const0_rtx;
15716       }
15717 
15718   *expandedp = false;
15719   return NULL_RTX;
15720 }
15721 
15722 /* Expand the CPU builtin in FCODE and store the result in TARGET.  */
15723 
15724 static rtx
cpu_expand_builtin(enum rs6000_builtins fcode,tree exp ATTRIBUTE_UNUSED,rtx target)15725 cpu_expand_builtin (enum rs6000_builtins fcode, tree exp ATTRIBUTE_UNUSED,
15726 		    rtx target)
15727 {
15728   /* __builtin_cpu_init () is a nop, so expand to nothing.  */
15729   if (fcode == RS6000_BUILTIN_CPU_INIT)
15730     return const0_rtx;
15731 
15732   if (target == 0 || GET_MODE (target) != SImode)
15733     target = gen_reg_rtx (SImode);
15734 
15735 #ifdef TARGET_LIBC_PROVIDES_HWCAP_IN_TCB
15736   tree arg = TREE_OPERAND (CALL_EXPR_ARG (exp, 0), 0);
15737   if (TREE_CODE (arg) != STRING_CST)
15738     {
15739       error ("builtin %s only accepts a string argument",
15740 	     rs6000_builtin_info[(size_t) fcode].name);
15741       return const0_rtx;
15742     }
15743 
15744   if (fcode == RS6000_BUILTIN_CPU_IS)
15745     {
15746       const char *cpu = TREE_STRING_POINTER (arg);
15747       rtx cpuid = NULL_RTX;
15748       for (size_t i = 0; i < ARRAY_SIZE (cpu_is_info); i++)
15749 	if (strcmp (cpu, cpu_is_info[i].cpu) == 0)
15750 	  {
15751 	    /* The CPUID value in the TCB is offset by _DL_FIRST_PLATFORM.  */
15752 	    cpuid = GEN_INT (cpu_is_info[i].cpuid + _DL_FIRST_PLATFORM);
15753 	    break;
15754 	  }
15755       if (cpuid == NULL_RTX)
15756 	{
15757 	  /* Invalid CPU argument.  */
15758 	  error ("cpu %s is an invalid argument to builtin %s",
15759 		 cpu, rs6000_builtin_info[(size_t) fcode].name);
15760 	  return const0_rtx;
15761 	}
15762 
15763       rtx platform = gen_reg_rtx (SImode);
15764       rtx tcbmem = gen_const_mem (SImode,
15765 				  gen_rtx_PLUS (Pmode,
15766 						gen_rtx_REG (Pmode, TLS_REGNUM),
15767 						GEN_INT (TCB_PLATFORM_OFFSET)));
15768       emit_move_insn (platform, tcbmem);
15769       emit_insn (gen_eqsi3 (target, platform, cpuid));
15770     }
15771   else if (fcode == RS6000_BUILTIN_CPU_SUPPORTS)
15772     {
15773       const char *hwcap = TREE_STRING_POINTER (arg);
15774       rtx mask = NULL_RTX;
15775       int hwcap_offset;
15776       for (size_t i = 0; i < ARRAY_SIZE (cpu_supports_info); i++)
15777 	if (strcmp (hwcap, cpu_supports_info[i].hwcap) == 0)
15778 	  {
15779 	    mask = GEN_INT (cpu_supports_info[i].mask);
15780 	    hwcap_offset = TCB_HWCAP_OFFSET (cpu_supports_info[i].id);
15781 	    break;
15782 	  }
15783       if (mask == NULL_RTX)
15784 	{
15785 	  /* Invalid HWCAP argument.  */
15786 	  error ("hwcap %s is an invalid argument to builtin %s",
15787 		 hwcap, rs6000_builtin_info[(size_t) fcode].name);
15788 	  return const0_rtx;
15789 	}
15790 
15791       rtx tcb_hwcap = gen_reg_rtx (SImode);
15792       rtx tcbmem = gen_const_mem (SImode,
15793 				  gen_rtx_PLUS (Pmode,
15794 						gen_rtx_REG (Pmode, TLS_REGNUM),
15795 						GEN_INT (hwcap_offset)));
15796       emit_move_insn (tcb_hwcap, tcbmem);
15797       rtx scratch1 = gen_reg_rtx (SImode);
15798       emit_insn (gen_rtx_SET (scratch1, gen_rtx_AND (SImode, tcb_hwcap, mask)));
15799       rtx scratch2 = gen_reg_rtx (SImode);
15800       emit_insn (gen_eqsi3 (scratch2, scratch1, const0_rtx));
15801       emit_insn (gen_rtx_SET (target, gen_rtx_XOR (SImode, scratch2, const1_rtx)));
15802     }
15803 
15804   /* Record that we have expanded a CPU builtin, so that we can later
15805      emit a reference to the special symbol exported by LIBC to ensure we
15806      do not link against an old LIBC that doesn't support this feature.  */
15807   cpu_builtin_p = true;
15808 
15809 #else
15810   /* For old LIBCs, always return FALSE.  */
15811   emit_move_insn (target, GEN_INT (0));
15812 #endif /* TARGET_LIBC_PROVIDES_HWCAP_IN_TCB */
15813 
15814   return target;
15815 }
15816 
15817 static rtx
rs6000_expand_ternop_builtin(enum insn_code icode,tree exp,rtx target)15818 rs6000_expand_ternop_builtin (enum insn_code icode, tree exp, rtx target)
15819 {
15820   rtx pat;
15821   tree arg0 = CALL_EXPR_ARG (exp, 0);
15822   tree arg1 = CALL_EXPR_ARG (exp, 1);
15823   tree arg2 = CALL_EXPR_ARG (exp, 2);
15824   rtx op0 = expand_normal (arg0);
15825   rtx op1 = expand_normal (arg1);
15826   rtx op2 = expand_normal (arg2);
15827   machine_mode tmode = insn_data[icode].operand[0].mode;
15828   machine_mode mode0 = insn_data[icode].operand[1].mode;
15829   machine_mode mode1 = insn_data[icode].operand[2].mode;
15830   machine_mode mode2 = insn_data[icode].operand[3].mode;
15831 
15832   if (icode == CODE_FOR_nothing)
15833     /* Builtin not supported on this processor.  */
15834     return 0;
15835 
15836   /* If we got invalid arguments bail out before generating bad rtl.  */
15837   if (arg0 == error_mark_node
15838       || arg1 == error_mark_node
15839       || arg2 == error_mark_node)
15840     return const0_rtx;
15841 
15842   /* Check and prepare argument depending on the instruction code.
15843 
15844      Note that a switch statement instead of the sequence of tests
15845      would be incorrect as many of the CODE_FOR values could be
15846      CODE_FOR_nothing and that would yield multiple alternatives
15847      with identical values.  We'd never reach here at runtime in
15848      this case.  */
15849   if (icode == CODE_FOR_altivec_vsldoi_v4sf
15850       || icode == CODE_FOR_altivec_vsldoi_v2df
15851       || icode == CODE_FOR_altivec_vsldoi_v4si
15852       || icode == CODE_FOR_altivec_vsldoi_v8hi
15853       || icode == CODE_FOR_altivec_vsldoi_v16qi)
15854     {
15855       /* Only allow 4-bit unsigned literals.  */
15856       STRIP_NOPS (arg2);
15857       if (TREE_CODE (arg2) != INTEGER_CST
15858 	  || TREE_INT_CST_LOW (arg2) & ~0xf)
15859 	{
15860 	  error ("argument 3 must be a 4-bit unsigned literal");
15861 	  return CONST0_RTX (tmode);
15862 	}
15863     }
15864   else if (icode == CODE_FOR_vsx_xxpermdi_v2df
15865            || icode == CODE_FOR_vsx_xxpermdi_v2di
15866            || icode == CODE_FOR_vsx_xxpermdi_v2df_be
15867            || icode == CODE_FOR_vsx_xxpermdi_v2di_be
15868            || icode == CODE_FOR_vsx_xxpermdi_v1ti
15869            || icode == CODE_FOR_vsx_xxpermdi_v4sf
15870            || icode == CODE_FOR_vsx_xxpermdi_v4si
15871            || icode == CODE_FOR_vsx_xxpermdi_v8hi
15872            || icode == CODE_FOR_vsx_xxpermdi_v16qi
15873            || icode == CODE_FOR_vsx_xxsldwi_v16qi
15874            || icode == CODE_FOR_vsx_xxsldwi_v8hi
15875            || icode == CODE_FOR_vsx_xxsldwi_v4si
15876            || icode == CODE_FOR_vsx_xxsldwi_v4sf
15877            || icode == CODE_FOR_vsx_xxsldwi_v2di
15878            || icode == CODE_FOR_vsx_xxsldwi_v2df)
15879     {
15880       /* Only allow 2-bit unsigned literals.  */
15881       STRIP_NOPS (arg2);
15882       if (TREE_CODE (arg2) != INTEGER_CST
15883 	  || TREE_INT_CST_LOW (arg2) & ~0x3)
15884 	{
15885 	  error ("argument 3 must be a 2-bit unsigned literal");
15886 	  return CONST0_RTX (tmode);
15887 	}
15888     }
15889   else if (icode == CODE_FOR_vsx_set_v2df
15890            || icode == CODE_FOR_vsx_set_v2di
15891 	   || icode == CODE_FOR_bcdadd
15892 	   || icode == CODE_FOR_bcdadd_lt
15893 	   || icode == CODE_FOR_bcdadd_eq
15894 	   || icode == CODE_FOR_bcdadd_gt
15895 	   || icode == CODE_FOR_bcdsub
15896 	   || icode == CODE_FOR_bcdsub_lt
15897 	   || icode == CODE_FOR_bcdsub_eq
15898 	   || icode == CODE_FOR_bcdsub_gt)
15899     {
15900       /* Only allow 1-bit unsigned literals.  */
15901       STRIP_NOPS (arg2);
15902       if (TREE_CODE (arg2) != INTEGER_CST
15903 	  || TREE_INT_CST_LOW (arg2) & ~0x1)
15904 	{
15905 	  error ("argument 3 must be a 1-bit unsigned literal");
15906 	  return CONST0_RTX (tmode);
15907 	}
15908     }
15909   else if (icode == CODE_FOR_dfp_ddedpd_dd
15910            || icode == CODE_FOR_dfp_ddedpd_td)
15911     {
15912       /* Only allow 2-bit unsigned literals where the value is 0 or 2.  */
15913       STRIP_NOPS (arg0);
15914       if (TREE_CODE (arg0) != INTEGER_CST
15915 	  || TREE_INT_CST_LOW (arg2) & ~0x3)
15916 	{
15917 	  error ("argument 1 must be 0 or 2");
15918 	  return CONST0_RTX (tmode);
15919 	}
15920     }
15921   else if (icode == CODE_FOR_dfp_denbcd_dd
15922 	   || icode == CODE_FOR_dfp_denbcd_td)
15923     {
15924       /* Only allow 1-bit unsigned literals.  */
15925       STRIP_NOPS (arg0);
15926       if (TREE_CODE (arg0) != INTEGER_CST
15927 	  || TREE_INT_CST_LOW (arg0) & ~0x1)
15928 	{
15929 	  error ("argument 1 must be a 1-bit unsigned literal");
15930 	  return CONST0_RTX (tmode);
15931 	}
15932     }
15933   else if (icode == CODE_FOR_dfp_dscli_dd
15934            || icode == CODE_FOR_dfp_dscli_td
15935 	   || icode == CODE_FOR_dfp_dscri_dd
15936 	   || icode == CODE_FOR_dfp_dscri_td)
15937     {
15938       /* Only allow 6-bit unsigned literals.  */
15939       STRIP_NOPS (arg1);
15940       if (TREE_CODE (arg1) != INTEGER_CST
15941 	  || TREE_INT_CST_LOW (arg1) & ~0x3f)
15942 	{
15943 	  error ("argument 2 must be a 6-bit unsigned literal");
15944 	  return CONST0_RTX (tmode);
15945 	}
15946     }
15947   else if (icode == CODE_FOR_crypto_vshasigmaw
15948 	   || icode == CODE_FOR_crypto_vshasigmad)
15949     {
15950       /* Check whether the 2nd and 3rd arguments are integer constants and in
15951 	 range and prepare arguments.  */
15952       STRIP_NOPS (arg1);
15953       if (TREE_CODE (arg1) != INTEGER_CST || wi::geu_p (wi::to_wide (arg1), 2))
15954 	{
15955 	  error ("argument 2 must be 0 or 1");
15956 	  return CONST0_RTX (tmode);
15957 	}
15958 
15959       STRIP_NOPS (arg2);
15960       if (TREE_CODE (arg2) != INTEGER_CST
15961 	  || wi::geu_p (wi::to_wide (arg2), 16))
15962 	{
15963 	  error ("argument 3 must be in the range 0..15");
15964 	  return CONST0_RTX (tmode);
15965 	}
15966     }
15967 
15968   if (target == 0
15969       || GET_MODE (target) != tmode
15970       || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15971     target = gen_reg_rtx (tmode);
15972 
15973   if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
15974     op0 = copy_to_mode_reg (mode0, op0);
15975   if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
15976     op1 = copy_to_mode_reg (mode1, op1);
15977   if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
15978     op2 = copy_to_mode_reg (mode2, op2);
15979 
15980   if (TARGET_PAIRED_FLOAT && icode == CODE_FOR_selv2sf4)
15981     pat = GEN_FCN (icode) (target, op0, op1, op2, CONST0_RTX (SFmode));
15982   else
15983     pat = GEN_FCN (icode) (target, op0, op1, op2);
15984   if (! pat)
15985     return 0;
15986   emit_insn (pat);
15987 
15988   return target;
15989 }
15990 
15991 /* Expand the lvx builtins.  */
15992 static rtx
altivec_expand_ld_builtin(tree exp,rtx target,bool * expandedp)15993 altivec_expand_ld_builtin (tree exp, rtx target, bool *expandedp)
15994 {
15995   tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
15996   unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
15997   tree arg0;
15998   machine_mode tmode, mode0;
15999   rtx pat, op0;
16000   enum insn_code icode;
16001 
16002   switch (fcode)
16003     {
16004     case ALTIVEC_BUILTIN_LD_INTERNAL_16qi:
16005       icode = CODE_FOR_vector_altivec_load_v16qi;
16006       break;
16007     case ALTIVEC_BUILTIN_LD_INTERNAL_8hi:
16008       icode = CODE_FOR_vector_altivec_load_v8hi;
16009       break;
16010     case ALTIVEC_BUILTIN_LD_INTERNAL_4si:
16011       icode = CODE_FOR_vector_altivec_load_v4si;
16012       break;
16013     case ALTIVEC_BUILTIN_LD_INTERNAL_4sf:
16014       icode = CODE_FOR_vector_altivec_load_v4sf;
16015       break;
16016     case ALTIVEC_BUILTIN_LD_INTERNAL_2df:
16017       icode = CODE_FOR_vector_altivec_load_v2df;
16018       break;
16019     case ALTIVEC_BUILTIN_LD_INTERNAL_2di:
16020       icode = CODE_FOR_vector_altivec_load_v2di;
16021       break;
16022     case ALTIVEC_BUILTIN_LD_INTERNAL_1ti:
16023       icode = CODE_FOR_vector_altivec_load_v1ti;
16024       break;
16025     default:
16026       *expandedp = false;
16027       return NULL_RTX;
16028     }
16029 
16030   *expandedp = true;
16031 
16032   arg0 = CALL_EXPR_ARG (exp, 0);
16033   op0 = expand_normal (arg0);
16034   tmode = insn_data[icode].operand[0].mode;
16035   mode0 = insn_data[icode].operand[1].mode;
16036 
16037   if (target == 0
16038       || GET_MODE (target) != tmode
16039       || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16040     target = gen_reg_rtx (tmode);
16041 
16042   if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
16043     op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
16044 
16045   pat = GEN_FCN (icode) (target, op0);
16046   if (! pat)
16047     return 0;
16048   emit_insn (pat);
16049   return target;
16050 }
16051 
16052 /* Expand the stvx builtins.  */
16053 static rtx
altivec_expand_st_builtin(tree exp,rtx target ATTRIBUTE_UNUSED,bool * expandedp)16054 altivec_expand_st_builtin (tree exp, rtx target ATTRIBUTE_UNUSED,
16055 			   bool *expandedp)
16056 {
16057   tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
16058   unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
16059   tree arg0, arg1;
16060   machine_mode mode0, mode1;
16061   rtx pat, op0, op1;
16062   enum insn_code icode;
16063 
16064   switch (fcode)
16065     {
16066     case ALTIVEC_BUILTIN_ST_INTERNAL_16qi:
16067       icode = CODE_FOR_vector_altivec_store_v16qi;
16068       break;
16069     case ALTIVEC_BUILTIN_ST_INTERNAL_8hi:
16070       icode = CODE_FOR_vector_altivec_store_v8hi;
16071       break;
16072     case ALTIVEC_BUILTIN_ST_INTERNAL_4si:
16073       icode = CODE_FOR_vector_altivec_store_v4si;
16074       break;
16075     case ALTIVEC_BUILTIN_ST_INTERNAL_4sf:
16076       icode = CODE_FOR_vector_altivec_store_v4sf;
16077       break;
16078     case ALTIVEC_BUILTIN_ST_INTERNAL_2df:
16079       icode = CODE_FOR_vector_altivec_store_v2df;
16080       break;
16081     case ALTIVEC_BUILTIN_ST_INTERNAL_2di:
16082       icode = CODE_FOR_vector_altivec_store_v2di;
16083       break;
16084     case ALTIVEC_BUILTIN_ST_INTERNAL_1ti:
16085       icode = CODE_FOR_vector_altivec_store_v1ti;
16086       break;
16087     default:
16088       *expandedp = false;
16089       return NULL_RTX;
16090     }
16091 
16092   arg0 = CALL_EXPR_ARG (exp, 0);
16093   arg1 = CALL_EXPR_ARG (exp, 1);
16094   op0 = expand_normal (arg0);
16095   op1 = expand_normal (arg1);
16096   mode0 = insn_data[icode].operand[0].mode;
16097   mode1 = insn_data[icode].operand[1].mode;
16098 
16099   if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
16100     op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
16101   if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
16102     op1 = copy_to_mode_reg (mode1, op1);
16103 
16104   pat = GEN_FCN (icode) (op0, op1);
16105   if (pat)
16106     emit_insn (pat);
16107 
16108   *expandedp = true;
16109   return NULL_RTX;
16110 }
16111 
16112 /* Expand the dst builtins.  */
16113 static rtx
altivec_expand_dst_builtin(tree exp,rtx target ATTRIBUTE_UNUSED,bool * expandedp)16114 altivec_expand_dst_builtin (tree exp, rtx target ATTRIBUTE_UNUSED,
16115 			    bool *expandedp)
16116 {
16117   tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
16118   enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
16119   tree arg0, arg1, arg2;
16120   machine_mode mode0, mode1;
16121   rtx pat, op0, op1, op2;
16122   const struct builtin_description *d;
16123   size_t i;
16124 
16125   *expandedp = false;
16126 
16127   /* Handle DST variants.  */
16128   d = bdesc_dst;
16129   for (i = 0; i < ARRAY_SIZE (bdesc_dst); i++, d++)
16130     if (d->code == fcode)
16131       {
16132 	arg0 = CALL_EXPR_ARG (exp, 0);
16133 	arg1 = CALL_EXPR_ARG (exp, 1);
16134 	arg2 = CALL_EXPR_ARG (exp, 2);
16135 	op0 = expand_normal (arg0);
16136 	op1 = expand_normal (arg1);
16137 	op2 = expand_normal (arg2);
16138 	mode0 = insn_data[d->icode].operand[0].mode;
16139 	mode1 = insn_data[d->icode].operand[1].mode;
16140 
16141 	/* Invalid arguments, bail out before generating bad rtl.  */
16142 	if (arg0 == error_mark_node
16143 	    || arg1 == error_mark_node
16144 	    || arg2 == error_mark_node)
16145 	  return const0_rtx;
16146 
16147 	*expandedp = true;
16148 	STRIP_NOPS (arg2);
16149 	if (TREE_CODE (arg2) != INTEGER_CST
16150 	    || TREE_INT_CST_LOW (arg2) & ~0x3)
16151 	  {
16152 	    error ("argument to %qs must be a 2-bit unsigned literal", d->name);
16153 	    return const0_rtx;
16154 	  }
16155 
16156 	if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
16157 	  op0 = copy_to_mode_reg (Pmode, op0);
16158 	if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
16159 	  op1 = copy_to_mode_reg (mode1, op1);
16160 
16161 	pat = GEN_FCN (d->icode) (op0, op1, op2);
16162 	if (pat != 0)
16163 	  emit_insn (pat);
16164 
16165 	return NULL_RTX;
16166       }
16167 
16168   return NULL_RTX;
16169 }
16170 
16171 /* Expand vec_init builtin.  */
16172 static rtx
altivec_expand_vec_init_builtin(tree type,tree exp,rtx target)16173 altivec_expand_vec_init_builtin (tree type, tree exp, rtx target)
16174 {
16175   machine_mode tmode = TYPE_MODE (type);
16176   machine_mode inner_mode = GET_MODE_INNER (tmode);
16177   int i, n_elt = GET_MODE_NUNITS (tmode);
16178 
16179   gcc_assert (VECTOR_MODE_P (tmode));
16180   gcc_assert (n_elt == call_expr_nargs (exp));
16181 
16182   if (!target || !register_operand (target, tmode))
16183     target = gen_reg_rtx (tmode);
16184 
16185   /* If we have a vector compromised of a single element, such as V1TImode, do
16186      the initialization directly.  */
16187   if (n_elt == 1 && GET_MODE_SIZE (tmode) == GET_MODE_SIZE (inner_mode))
16188     {
16189       rtx x = expand_normal (CALL_EXPR_ARG (exp, 0));
16190       emit_move_insn (target, gen_lowpart (tmode, x));
16191     }
16192   else
16193     {
16194       rtvec v = rtvec_alloc (n_elt);
16195 
16196       for (i = 0; i < n_elt; ++i)
16197 	{
16198 	  rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
16199 	  RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
16200 	}
16201 
16202       rs6000_expand_vector_init (target, gen_rtx_PARALLEL (tmode, v));
16203     }
16204 
16205   return target;
16206 }
16207 
16208 /* Return the integer constant in ARG.  Constrain it to be in the range
16209    of the subparts of VEC_TYPE; issue an error if not.  */
16210 
16211 static int
get_element_number(tree vec_type,tree arg)16212 get_element_number (tree vec_type, tree arg)
16213 {
16214   unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
16215 
16216   if (!tree_fits_uhwi_p (arg)
16217       || (elt = tree_to_uhwi (arg), elt > max))
16218     {
16219       error ("selector must be an integer constant in the range 0..%wi", max);
16220       return 0;
16221     }
16222 
16223   return elt;
16224 }
16225 
16226 /* Expand vec_set builtin.  */
16227 static rtx
altivec_expand_vec_set_builtin(tree exp)16228 altivec_expand_vec_set_builtin (tree exp)
16229 {
16230   machine_mode tmode, mode1;
16231   tree arg0, arg1, arg2;
16232   int elt;
16233   rtx op0, op1;
16234 
16235   arg0 = CALL_EXPR_ARG (exp, 0);
16236   arg1 = CALL_EXPR_ARG (exp, 1);
16237   arg2 = CALL_EXPR_ARG (exp, 2);
16238 
16239   tmode = TYPE_MODE (TREE_TYPE (arg0));
16240   mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
16241   gcc_assert (VECTOR_MODE_P (tmode));
16242 
16243   op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
16244   op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
16245   elt = get_element_number (TREE_TYPE (arg0), arg2);
16246 
16247   if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
16248     op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
16249 
16250   op0 = force_reg (tmode, op0);
16251   op1 = force_reg (mode1, op1);
16252 
16253   rs6000_expand_vector_set (op0, op1, elt);
16254 
16255   return op0;
16256 }
16257 
16258 /* Expand vec_ext builtin.  */
16259 static rtx
altivec_expand_vec_ext_builtin(tree exp,rtx target)16260 altivec_expand_vec_ext_builtin (tree exp, rtx target)
16261 {
16262   machine_mode tmode, mode0;
16263   tree arg0, arg1;
16264   rtx op0;
16265   rtx op1;
16266 
16267   arg0 = CALL_EXPR_ARG (exp, 0);
16268   arg1 = CALL_EXPR_ARG (exp, 1);
16269 
16270   op0 = expand_normal (arg0);
16271   op1 = expand_normal (arg1);
16272 
16273   /* Call get_element_number to validate arg1 if it is a constant.  */
16274   if (TREE_CODE (arg1) == INTEGER_CST)
16275     (void) get_element_number (TREE_TYPE (arg0), arg1);
16276 
16277   tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
16278   mode0 = TYPE_MODE (TREE_TYPE (arg0));
16279   gcc_assert (VECTOR_MODE_P (mode0));
16280 
16281   op0 = force_reg (mode0, op0);
16282 
16283   if (optimize || !target || !register_operand (target, tmode))
16284     target = gen_reg_rtx (tmode);
16285 
16286   rs6000_expand_vector_extract (target, op0, op1);
16287 
16288   return target;
16289 }
16290 
16291 /* Expand the builtin in EXP and store the result in TARGET.  Store
16292    true in *EXPANDEDP if we found a builtin to expand.  */
16293 static rtx
altivec_expand_builtin(tree exp,rtx target,bool * expandedp)16294 altivec_expand_builtin (tree exp, rtx target, bool *expandedp)
16295 {
16296   const struct builtin_description *d;
16297   size_t i;
16298   enum insn_code icode;
16299   tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
16300   tree arg0, arg1, arg2;
16301   rtx op0, pat;
16302   machine_mode tmode, mode0;
16303   enum rs6000_builtins fcode
16304     = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
16305 
16306   if (rs6000_overloaded_builtin_p (fcode))
16307     {
16308       *expandedp = true;
16309       error ("unresolved overload for Altivec builtin %qF", fndecl);
16310 
16311       /* Given it is invalid, just generate a normal call.  */
16312       return expand_call (exp, target, false);
16313     }
16314 
16315   target = altivec_expand_ld_builtin (exp, target, expandedp);
16316   if (*expandedp)
16317     return target;
16318 
16319   target = altivec_expand_st_builtin (exp, target, expandedp);
16320   if (*expandedp)
16321     return target;
16322 
16323   target = altivec_expand_dst_builtin (exp, target, expandedp);
16324   if (*expandedp)
16325     return target;
16326 
16327   *expandedp = true;
16328 
16329   switch (fcode)
16330     {
16331     case ALTIVEC_BUILTIN_STVX_V2DF:
16332       return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v2df_2op, exp);
16333     case ALTIVEC_BUILTIN_STVX_V2DI:
16334       return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v2di_2op, exp);
16335     case ALTIVEC_BUILTIN_STVX_V4SF:
16336       return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v4sf_2op, exp);
16337     case ALTIVEC_BUILTIN_STVX:
16338     case ALTIVEC_BUILTIN_STVX_V4SI:
16339       return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v4si_2op, exp);
16340     case ALTIVEC_BUILTIN_STVX_V8HI:
16341       return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v8hi_2op, exp);
16342     case ALTIVEC_BUILTIN_STVX_V16QI:
16343       return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v16qi_2op, exp);
16344     case ALTIVEC_BUILTIN_STVEBX:
16345       return altivec_expand_stv_builtin (CODE_FOR_altivec_stvebx, exp);
16346     case ALTIVEC_BUILTIN_STVEHX:
16347       return altivec_expand_stv_builtin (CODE_FOR_altivec_stvehx, exp);
16348     case ALTIVEC_BUILTIN_STVEWX:
16349       return altivec_expand_stv_builtin (CODE_FOR_altivec_stvewx, exp);
16350     case ALTIVEC_BUILTIN_STVXL_V2DF:
16351       return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v2df, exp);
16352     case ALTIVEC_BUILTIN_STVXL_V2DI:
16353       return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v2di, exp);
16354     case ALTIVEC_BUILTIN_STVXL_V4SF:
16355       return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v4sf, exp);
16356     case ALTIVEC_BUILTIN_STVXL:
16357     case ALTIVEC_BUILTIN_STVXL_V4SI:
16358       return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v4si, exp);
16359     case ALTIVEC_BUILTIN_STVXL_V8HI:
16360       return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v8hi, exp);
16361     case ALTIVEC_BUILTIN_STVXL_V16QI:
16362       return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v16qi, exp);
16363 
16364     case ALTIVEC_BUILTIN_STVLX:
16365       return altivec_expand_stv_builtin (CODE_FOR_altivec_stvlx, exp);
16366     case ALTIVEC_BUILTIN_STVLXL:
16367       return altivec_expand_stv_builtin (CODE_FOR_altivec_stvlxl, exp);
16368     case ALTIVEC_BUILTIN_STVRX:
16369       return altivec_expand_stv_builtin (CODE_FOR_altivec_stvrx, exp);
16370     case ALTIVEC_BUILTIN_STVRXL:
16371       return altivec_expand_stv_builtin (CODE_FOR_altivec_stvrxl, exp);
16372 
16373     case P9V_BUILTIN_STXVL:
16374       return altivec_expand_stxvl_builtin (CODE_FOR_stxvl, exp);
16375 
16376     case VSX_BUILTIN_STXVD2X_V1TI:
16377       return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v1ti, exp);
16378     case VSX_BUILTIN_STXVD2X_V2DF:
16379       return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v2df, exp);
16380     case VSX_BUILTIN_STXVD2X_V2DI:
16381       return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v2di, exp);
16382     case VSX_BUILTIN_STXVW4X_V4SF:
16383       return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v4sf, exp);
16384     case VSX_BUILTIN_STXVW4X_V4SI:
16385       return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v4si, exp);
16386     case VSX_BUILTIN_STXVW4X_V8HI:
16387       return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v8hi, exp);
16388     case VSX_BUILTIN_STXVW4X_V16QI:
16389       return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v16qi, exp);
16390 
16391     /* For the following on big endian, it's ok to use any appropriate
16392        unaligned-supporting store, so use a generic expander.  For
16393        little-endian, the exact element-reversing instruction must
16394        be used.  */
16395     case VSX_BUILTIN_ST_ELEMREV_V2DF:
16396       {
16397 	enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v2df
16398 			       : CODE_FOR_vsx_st_elemrev_v2df);
16399 	return altivec_expand_stv_builtin (code, exp);
16400       }
16401     case VSX_BUILTIN_ST_ELEMREV_V2DI:
16402       {
16403 	enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v2di
16404 			       : CODE_FOR_vsx_st_elemrev_v2di);
16405 	return altivec_expand_stv_builtin (code, exp);
16406       }
16407     case VSX_BUILTIN_ST_ELEMREV_V4SF:
16408       {
16409 	enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v4sf
16410 			       : CODE_FOR_vsx_st_elemrev_v4sf);
16411 	return altivec_expand_stv_builtin (code, exp);
16412       }
16413     case VSX_BUILTIN_ST_ELEMREV_V4SI:
16414       {
16415 	enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v4si
16416 			       : CODE_FOR_vsx_st_elemrev_v4si);
16417 	return altivec_expand_stv_builtin (code, exp);
16418       }
16419     case VSX_BUILTIN_ST_ELEMREV_V8HI:
16420       {
16421 	enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v8hi
16422 			       : CODE_FOR_vsx_st_elemrev_v8hi);
16423 	return altivec_expand_stv_builtin (code, exp);
16424       }
16425     case VSX_BUILTIN_ST_ELEMREV_V16QI:
16426       {
16427 	enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v16qi
16428 			       : CODE_FOR_vsx_st_elemrev_v16qi);
16429 	return altivec_expand_stv_builtin (code, exp);
16430       }
16431 
16432     case ALTIVEC_BUILTIN_MFVSCR:
16433       icode = CODE_FOR_altivec_mfvscr;
16434       tmode = insn_data[icode].operand[0].mode;
16435 
16436       if (target == 0
16437 	  || GET_MODE (target) != tmode
16438 	  || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16439 	target = gen_reg_rtx (tmode);
16440 
16441       pat = GEN_FCN (icode) (target);
16442       if (! pat)
16443 	return 0;
16444       emit_insn (pat);
16445       return target;
16446 
16447     case ALTIVEC_BUILTIN_MTVSCR:
16448       icode = CODE_FOR_altivec_mtvscr;
16449       arg0 = CALL_EXPR_ARG (exp, 0);
16450       op0 = expand_normal (arg0);
16451       mode0 = insn_data[icode].operand[0].mode;
16452 
16453       /* If we got invalid arguments bail out before generating bad rtl.  */
16454       if (arg0 == error_mark_node)
16455 	return const0_rtx;
16456 
16457       if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
16458 	op0 = copy_to_mode_reg (mode0, op0);
16459 
16460       pat = GEN_FCN (icode) (op0);
16461       if (pat)
16462 	emit_insn (pat);
16463       return NULL_RTX;
16464 
16465     case ALTIVEC_BUILTIN_DSSALL:
16466       emit_insn (gen_altivec_dssall ());
16467       return NULL_RTX;
16468 
16469     case ALTIVEC_BUILTIN_DSS:
16470       icode = CODE_FOR_altivec_dss;
16471       arg0 = CALL_EXPR_ARG (exp, 0);
16472       STRIP_NOPS (arg0);
16473       op0 = expand_normal (arg0);
16474       mode0 = insn_data[icode].operand[0].mode;
16475 
16476       /* If we got invalid arguments bail out before generating bad rtl.  */
16477       if (arg0 == error_mark_node)
16478 	return const0_rtx;
16479 
16480       if (TREE_CODE (arg0) != INTEGER_CST
16481 	  || TREE_INT_CST_LOW (arg0) & ~0x3)
16482 	{
16483 	  error ("argument to dss must be a 2-bit unsigned literal");
16484 	  return const0_rtx;
16485 	}
16486 
16487       if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
16488 	op0 = copy_to_mode_reg (mode0, op0);
16489 
16490       emit_insn (gen_altivec_dss (op0));
16491       return NULL_RTX;
16492 
16493     case ALTIVEC_BUILTIN_VEC_INIT_V4SI:
16494     case ALTIVEC_BUILTIN_VEC_INIT_V8HI:
16495     case ALTIVEC_BUILTIN_VEC_INIT_V16QI:
16496     case ALTIVEC_BUILTIN_VEC_INIT_V4SF:
16497     case VSX_BUILTIN_VEC_INIT_V2DF:
16498     case VSX_BUILTIN_VEC_INIT_V2DI:
16499     case VSX_BUILTIN_VEC_INIT_V1TI:
16500       return altivec_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
16501 
16502     case ALTIVEC_BUILTIN_VEC_SET_V4SI:
16503     case ALTIVEC_BUILTIN_VEC_SET_V8HI:
16504     case ALTIVEC_BUILTIN_VEC_SET_V16QI:
16505     case ALTIVEC_BUILTIN_VEC_SET_V4SF:
16506     case VSX_BUILTIN_VEC_SET_V2DF:
16507     case VSX_BUILTIN_VEC_SET_V2DI:
16508     case VSX_BUILTIN_VEC_SET_V1TI:
16509       return altivec_expand_vec_set_builtin (exp);
16510 
16511     case ALTIVEC_BUILTIN_VEC_EXT_V4SI:
16512     case ALTIVEC_BUILTIN_VEC_EXT_V8HI:
16513     case ALTIVEC_BUILTIN_VEC_EXT_V16QI:
16514     case ALTIVEC_BUILTIN_VEC_EXT_V4SF:
16515     case VSX_BUILTIN_VEC_EXT_V2DF:
16516     case VSX_BUILTIN_VEC_EXT_V2DI:
16517     case VSX_BUILTIN_VEC_EXT_V1TI:
16518       return altivec_expand_vec_ext_builtin (exp, target);
16519 
16520     case P9V_BUILTIN_VEXTRACT4B:
16521     case P9V_BUILTIN_VEC_VEXTRACT4B:
16522       arg1 = CALL_EXPR_ARG (exp, 1);
16523       STRIP_NOPS (arg1);
16524 
16525       /* Generate a normal call if it is invalid.  */
16526       if (arg1 == error_mark_node)
16527 	return expand_call (exp, target, false);
16528 
16529       if (TREE_CODE (arg1) != INTEGER_CST || TREE_INT_CST_LOW (arg1) > 12)
16530 	{
16531 	  error ("second argument to vec_vextract4b must be 0..12");
16532 	  return expand_call (exp, target, false);
16533 	}
16534       break;
16535 
16536     case P9V_BUILTIN_VINSERT4B:
16537     case P9V_BUILTIN_VINSERT4B_DI:
16538     case P9V_BUILTIN_VEC_VINSERT4B:
16539       arg2 = CALL_EXPR_ARG (exp, 2);
16540       STRIP_NOPS (arg2);
16541 
16542       /* Generate a normal call if it is invalid.  */
16543       if (arg2 == error_mark_node)
16544 	return expand_call (exp, target, false);
16545 
16546       if (TREE_CODE (arg2) != INTEGER_CST || TREE_INT_CST_LOW (arg2) > 12)
16547 	{
16548 	  error ("third argument to vec_vinsert4b must be 0..12");
16549 	  return expand_call (exp, target, false);
16550 	}
16551       break;
16552 
16553     default:
16554       break;
16555       /* Fall through.  */
16556     }
16557 
16558   /* Expand abs* operations.  */
16559   d = bdesc_abs;
16560   for (i = 0; i < ARRAY_SIZE (bdesc_abs); i++, d++)
16561     if (d->code == fcode)
16562       return altivec_expand_abs_builtin (d->icode, exp, target);
16563 
16564   /* Expand the AltiVec predicates.  */
16565   d = bdesc_altivec_preds;
16566   for (i = 0; i < ARRAY_SIZE (bdesc_altivec_preds); i++, d++)
16567     if (d->code == fcode)
16568       return altivec_expand_predicate_builtin (d->icode, exp, target);
16569 
16570   /* LV* are funky.  We initialized them differently.  */
16571   switch (fcode)
16572     {
16573     case ALTIVEC_BUILTIN_LVSL:
16574       return altivec_expand_lv_builtin (CODE_FOR_altivec_lvsl,
16575 					exp, target, false);
16576     case ALTIVEC_BUILTIN_LVSR:
16577       return altivec_expand_lv_builtin (CODE_FOR_altivec_lvsr,
16578 					exp, target, false);
16579     case ALTIVEC_BUILTIN_LVEBX:
16580       return altivec_expand_lv_builtin (CODE_FOR_altivec_lvebx,
16581 					exp, target, false);
16582     case ALTIVEC_BUILTIN_LVEHX:
16583       return altivec_expand_lv_builtin (CODE_FOR_altivec_lvehx,
16584 					exp, target, false);
16585     case ALTIVEC_BUILTIN_LVEWX:
16586       return altivec_expand_lv_builtin (CODE_FOR_altivec_lvewx,
16587 					exp, target, false);
16588     case ALTIVEC_BUILTIN_LVXL_V2DF:
16589       return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v2df,
16590 					exp, target, false);
16591     case ALTIVEC_BUILTIN_LVXL_V2DI:
16592       return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v2di,
16593 					exp, target, false);
16594     case ALTIVEC_BUILTIN_LVXL_V4SF:
16595       return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v4sf,
16596 					exp, target, false);
16597     case ALTIVEC_BUILTIN_LVXL:
16598     case ALTIVEC_BUILTIN_LVXL_V4SI:
16599       return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v4si,
16600 					exp, target, false);
16601     case ALTIVEC_BUILTIN_LVXL_V8HI:
16602       return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v8hi,
16603 					exp, target, false);
16604     case ALTIVEC_BUILTIN_LVXL_V16QI:
16605       return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v16qi,
16606 					exp, target, false);
16607     case ALTIVEC_BUILTIN_LVX_V2DF:
16608       return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v2df_2op,
16609 					exp, target, false);
16610     case ALTIVEC_BUILTIN_LVX_V2DI:
16611       return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v2di_2op,
16612 					exp, target, false);
16613     case ALTIVEC_BUILTIN_LVX_V4SF:
16614       return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v4sf_2op,
16615 					exp, target, false);
16616     case ALTIVEC_BUILTIN_LVX:
16617     case ALTIVEC_BUILTIN_LVX_V4SI:
16618       return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v4si_2op,
16619 					exp, target, false);
16620     case ALTIVEC_BUILTIN_LVX_V8HI:
16621       return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v8hi_2op,
16622 					exp, target, false);
16623     case ALTIVEC_BUILTIN_LVX_V16QI:
16624       return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v16qi_2op,
16625 					exp, target, false);
16626     case ALTIVEC_BUILTIN_LVLX:
16627       return altivec_expand_lv_builtin (CODE_FOR_altivec_lvlx,
16628 					exp, target, true);
16629     case ALTIVEC_BUILTIN_LVLXL:
16630       return altivec_expand_lv_builtin (CODE_FOR_altivec_lvlxl,
16631 					exp, target, true);
16632     case ALTIVEC_BUILTIN_LVRX:
16633       return altivec_expand_lv_builtin (CODE_FOR_altivec_lvrx,
16634 					exp, target, true);
16635     case ALTIVEC_BUILTIN_LVRXL:
16636       return altivec_expand_lv_builtin (CODE_FOR_altivec_lvrxl,
16637 					exp, target, true);
16638     case VSX_BUILTIN_LXVD2X_V1TI:
16639       return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v1ti,
16640 					exp, target, false);
16641     case VSX_BUILTIN_LXVD2X_V2DF:
16642       return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v2df,
16643 					exp, target, false);
16644     case VSX_BUILTIN_LXVD2X_V2DI:
16645       return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v2di,
16646 					exp, target, false);
16647     case VSX_BUILTIN_LXVW4X_V4SF:
16648       return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v4sf,
16649 					exp, target, false);
16650     case VSX_BUILTIN_LXVW4X_V4SI:
16651       return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v4si,
16652 					exp, target, false);
16653     case VSX_BUILTIN_LXVW4X_V8HI:
16654       return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v8hi,
16655 					exp, target, false);
16656     case VSX_BUILTIN_LXVW4X_V16QI:
16657       return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v16qi,
16658 					exp, target, false);
16659     /* For the following on big endian, it's ok to use any appropriate
16660        unaligned-supporting load, so use a generic expander.  For
16661        little-endian, the exact element-reversing instruction must
16662        be used.  */
16663     case VSX_BUILTIN_LD_ELEMREV_V2DF:
16664       {
16665 	enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v2df
16666 			       : CODE_FOR_vsx_ld_elemrev_v2df);
16667 	return altivec_expand_lv_builtin (code, exp, target, false);
16668       }
16669     case VSX_BUILTIN_LD_ELEMREV_V2DI:
16670       {
16671 	enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v2di
16672 			       : CODE_FOR_vsx_ld_elemrev_v2di);
16673 	return altivec_expand_lv_builtin (code, exp, target, false);
16674       }
16675     case VSX_BUILTIN_LD_ELEMREV_V4SF:
16676       {
16677 	enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v4sf
16678 			       : CODE_FOR_vsx_ld_elemrev_v4sf);
16679 	return altivec_expand_lv_builtin (code, exp, target, false);
16680       }
16681     case VSX_BUILTIN_LD_ELEMREV_V4SI:
16682       {
16683 	enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v4si
16684 			       : CODE_FOR_vsx_ld_elemrev_v4si);
16685 	return altivec_expand_lv_builtin (code, exp, target, false);
16686       }
16687     case VSX_BUILTIN_LD_ELEMREV_V8HI:
16688       {
16689 	enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v8hi
16690 			       : CODE_FOR_vsx_ld_elemrev_v8hi);
16691 	return altivec_expand_lv_builtin (code, exp, target, false);
16692       }
16693     case VSX_BUILTIN_LD_ELEMREV_V16QI:
16694       {
16695 	enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v16qi
16696 			       : CODE_FOR_vsx_ld_elemrev_v16qi);
16697 	return altivec_expand_lv_builtin (code, exp, target, false);
16698       }
16699       break;
16700     default:
16701       break;
16702       /* Fall through.  */
16703     }
16704 
16705   *expandedp = false;
16706   return NULL_RTX;
16707 }
16708 
16709 /* Expand the builtin in EXP and store the result in TARGET.  Store
16710    true in *EXPANDEDP if we found a builtin to expand.  */
16711 static rtx
paired_expand_builtin(tree exp,rtx target,bool * expandedp)16712 paired_expand_builtin (tree exp, rtx target, bool * expandedp)
16713 {
16714   tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
16715   enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
16716   const struct builtin_description *d;
16717   size_t i;
16718 
16719   *expandedp = true;
16720 
16721   switch (fcode)
16722     {
16723     case PAIRED_BUILTIN_STX:
16724       return paired_expand_stv_builtin (CODE_FOR_paired_stx, exp);
16725     case PAIRED_BUILTIN_LX:
16726       return paired_expand_lv_builtin (CODE_FOR_paired_lx, exp, target);
16727     default:
16728       break;
16729       /* Fall through.  */
16730     }
16731 
16732   /* Expand the paired predicates.  */
16733   d = bdesc_paired_preds;
16734   for (i = 0; i < ARRAY_SIZE (bdesc_paired_preds); i++, d++)
16735     if (d->code == fcode)
16736       return paired_expand_predicate_builtin (d->icode, exp, target);
16737 
16738   *expandedp = false;
16739   return NULL_RTX;
16740 }
16741 
16742 /* Binops that need to be initialized manually, but can be expanded
16743    automagically by rs6000_expand_binop_builtin.  */
16744 static const struct builtin_description bdesc_2arg_spe[] =
16745 {
16746   { RS6000_BTM_SPE, CODE_FOR_spe_evlddx, "__builtin_spe_evlddx", SPE_BUILTIN_EVLDDX },
16747   { RS6000_BTM_SPE, CODE_FOR_spe_evldwx, "__builtin_spe_evldwx", SPE_BUILTIN_EVLDWX },
16748   { RS6000_BTM_SPE, CODE_FOR_spe_evldhx, "__builtin_spe_evldhx", SPE_BUILTIN_EVLDHX },
16749   { RS6000_BTM_SPE, CODE_FOR_spe_evlwhex, "__builtin_spe_evlwhex", SPE_BUILTIN_EVLWHEX },
16750   { RS6000_BTM_SPE, CODE_FOR_spe_evlwhoux, "__builtin_spe_evlwhoux", SPE_BUILTIN_EVLWHOUX },
16751   { RS6000_BTM_SPE, CODE_FOR_spe_evlwhosx, "__builtin_spe_evlwhosx", SPE_BUILTIN_EVLWHOSX },
16752   { RS6000_BTM_SPE, CODE_FOR_spe_evlwwsplatx, "__builtin_spe_evlwwsplatx", SPE_BUILTIN_EVLWWSPLATX },
16753   { RS6000_BTM_SPE, CODE_FOR_spe_evlwhsplatx, "__builtin_spe_evlwhsplatx", SPE_BUILTIN_EVLWHSPLATX },
16754   { RS6000_BTM_SPE, CODE_FOR_spe_evlhhesplatx, "__builtin_spe_evlhhesplatx", SPE_BUILTIN_EVLHHESPLATX },
16755   { RS6000_BTM_SPE, CODE_FOR_spe_evlhhousplatx, "__builtin_spe_evlhhousplatx", SPE_BUILTIN_EVLHHOUSPLATX },
16756   { RS6000_BTM_SPE, CODE_FOR_spe_evlhhossplatx, "__builtin_spe_evlhhossplatx", SPE_BUILTIN_EVLHHOSSPLATX },
16757   { RS6000_BTM_SPE, CODE_FOR_spe_evldd, "__builtin_spe_evldd", SPE_BUILTIN_EVLDD },
16758   { RS6000_BTM_SPE, CODE_FOR_spe_evldw, "__builtin_spe_evldw", SPE_BUILTIN_EVLDW },
16759   { RS6000_BTM_SPE, CODE_FOR_spe_evldh, "__builtin_spe_evldh", SPE_BUILTIN_EVLDH },
16760   { RS6000_BTM_SPE, CODE_FOR_spe_evlwhe, "__builtin_spe_evlwhe", SPE_BUILTIN_EVLWHE },
16761   { RS6000_BTM_SPE, CODE_FOR_spe_evlwhou, "__builtin_spe_evlwhou", SPE_BUILTIN_EVLWHOU },
16762   { RS6000_BTM_SPE, CODE_FOR_spe_evlwhos, "__builtin_spe_evlwhos", SPE_BUILTIN_EVLWHOS },
16763   { RS6000_BTM_SPE, CODE_FOR_spe_evlwwsplat, "__builtin_spe_evlwwsplat", SPE_BUILTIN_EVLWWSPLAT },
16764   { RS6000_BTM_SPE, CODE_FOR_spe_evlwhsplat, "__builtin_spe_evlwhsplat", SPE_BUILTIN_EVLWHSPLAT },
16765   { RS6000_BTM_SPE, CODE_FOR_spe_evlhhesplat, "__builtin_spe_evlhhesplat", SPE_BUILTIN_EVLHHESPLAT },
16766   { RS6000_BTM_SPE, CODE_FOR_spe_evlhhousplat, "__builtin_spe_evlhhousplat", SPE_BUILTIN_EVLHHOUSPLAT },
16767   { RS6000_BTM_SPE, CODE_FOR_spe_evlhhossplat, "__builtin_spe_evlhhossplat", SPE_BUILTIN_EVLHHOSSPLAT }
16768 };
16769 
16770 /* Expand the builtin in EXP and store the result in TARGET.  Store
16771    true in *EXPANDEDP if we found a builtin to expand.
16772 
16773    This expands the SPE builtins that are not simple unary and binary
16774    operations.  */
16775 static rtx
spe_expand_builtin(tree exp,rtx target,bool * expandedp)16776 spe_expand_builtin (tree exp, rtx target, bool *expandedp)
16777 {
16778   tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
16779   tree arg1, arg0;
16780   enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
16781   enum insn_code icode;
16782   machine_mode tmode, mode0;
16783   rtx pat, op0;
16784   const struct builtin_description *d;
16785   size_t i;
16786 
16787   *expandedp = true;
16788 
16789   /* Syntax check for a 5-bit unsigned immediate.  */
16790   switch (fcode)
16791     {
16792     case SPE_BUILTIN_EVSTDD:
16793     case SPE_BUILTIN_EVSTDH:
16794     case SPE_BUILTIN_EVSTDW:
16795     case SPE_BUILTIN_EVSTWHE:
16796     case SPE_BUILTIN_EVSTWHO:
16797     case SPE_BUILTIN_EVSTWWE:
16798     case SPE_BUILTIN_EVSTWWO:
16799       arg1 = CALL_EXPR_ARG (exp, 2);
16800       if (TREE_CODE (arg1) != INTEGER_CST
16801 	  || TREE_INT_CST_LOW (arg1) & ~0x1f)
16802 	{
16803 	  error ("argument 2 must be a 5-bit unsigned literal");
16804 	  return const0_rtx;
16805 	}
16806       break;
16807     default:
16808       break;
16809     }
16810 
16811   /* The evsplat*i instructions are not quite generic.  */
16812   switch (fcode)
16813     {
16814     case SPE_BUILTIN_EVSPLATFI:
16815       return rs6000_expand_unop_builtin (CODE_FOR_spe_evsplatfi,
16816 					 exp, target);
16817     case SPE_BUILTIN_EVSPLATI:
16818       return rs6000_expand_unop_builtin (CODE_FOR_spe_evsplati,
16819 					 exp, target);
16820     default:
16821       break;
16822     }
16823 
16824   d = bdesc_2arg_spe;
16825   for (i = 0; i < ARRAY_SIZE (bdesc_2arg_spe); ++i, ++d)
16826     if (d->code == fcode)
16827       return rs6000_expand_binop_builtin (d->icode, exp, target);
16828 
16829   d = bdesc_spe_predicates;
16830   for (i = 0; i < ARRAY_SIZE (bdesc_spe_predicates); ++i, ++d)
16831     if (d->code == fcode)
16832       return spe_expand_predicate_builtin (d->icode, exp, target);
16833 
16834   d = bdesc_spe_evsel;
16835   for (i = 0; i < ARRAY_SIZE (bdesc_spe_evsel); ++i, ++d)
16836     if (d->code == fcode)
16837       return spe_expand_evsel_builtin (d->icode, exp, target);
16838 
16839   switch (fcode)
16840     {
16841     case SPE_BUILTIN_EVSTDDX:
16842       return spe_expand_stv_builtin (CODE_FOR_spe_evstddx, exp);
16843     case SPE_BUILTIN_EVSTDHX:
16844       return spe_expand_stv_builtin (CODE_FOR_spe_evstdhx, exp);
16845     case SPE_BUILTIN_EVSTDWX:
16846       return spe_expand_stv_builtin (CODE_FOR_spe_evstdwx, exp);
16847     case SPE_BUILTIN_EVSTWHEX:
16848       return spe_expand_stv_builtin (CODE_FOR_spe_evstwhex, exp);
16849     case SPE_BUILTIN_EVSTWHOX:
16850       return spe_expand_stv_builtin (CODE_FOR_spe_evstwhox, exp);
16851     case SPE_BUILTIN_EVSTWWEX:
16852       return spe_expand_stv_builtin (CODE_FOR_spe_evstwwex, exp);
16853     case SPE_BUILTIN_EVSTWWOX:
16854       return spe_expand_stv_builtin (CODE_FOR_spe_evstwwox, exp);
16855     case SPE_BUILTIN_EVSTDD:
16856       return spe_expand_stv_builtin (CODE_FOR_spe_evstdd, exp);
16857     case SPE_BUILTIN_EVSTDH:
16858       return spe_expand_stv_builtin (CODE_FOR_spe_evstdh, exp);
16859     case SPE_BUILTIN_EVSTDW:
16860       return spe_expand_stv_builtin (CODE_FOR_spe_evstdw, exp);
16861     case SPE_BUILTIN_EVSTWHE:
16862       return spe_expand_stv_builtin (CODE_FOR_spe_evstwhe, exp);
16863     case SPE_BUILTIN_EVSTWHO:
16864       return spe_expand_stv_builtin (CODE_FOR_spe_evstwho, exp);
16865     case SPE_BUILTIN_EVSTWWE:
16866       return spe_expand_stv_builtin (CODE_FOR_spe_evstwwe, exp);
16867     case SPE_BUILTIN_EVSTWWO:
16868       return spe_expand_stv_builtin (CODE_FOR_spe_evstwwo, exp);
16869     case SPE_BUILTIN_MFSPEFSCR:
16870       icode = CODE_FOR_spe_mfspefscr;
16871       tmode = insn_data[icode].operand[0].mode;
16872 
16873       if (target == 0
16874 	  || GET_MODE (target) != tmode
16875 	  || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16876 	target = gen_reg_rtx (tmode);
16877 
16878       pat = GEN_FCN (icode) (target);
16879       if (! pat)
16880 	return 0;
16881       emit_insn (pat);
16882       return target;
16883     case SPE_BUILTIN_MTSPEFSCR:
16884       icode = CODE_FOR_spe_mtspefscr;
16885       arg0 = CALL_EXPR_ARG (exp, 0);
16886       op0 = expand_normal (arg0);
16887       mode0 = insn_data[icode].operand[0].mode;
16888 
16889       if (arg0 == error_mark_node)
16890 	return const0_rtx;
16891 
16892       if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
16893 	op0 = copy_to_mode_reg (mode0, op0);
16894 
16895       pat = GEN_FCN (icode) (op0);
16896       if (pat)
16897 	emit_insn (pat);
16898       return NULL_RTX;
16899     default:
16900       break;
16901     }
16902 
16903   *expandedp = false;
16904   return NULL_RTX;
16905 }
16906 
16907 static rtx
paired_expand_predicate_builtin(enum insn_code icode,tree exp,rtx target)16908 paired_expand_predicate_builtin (enum insn_code icode, tree exp, rtx target)
16909 {
16910   rtx pat, scratch, tmp;
16911   tree form = CALL_EXPR_ARG (exp, 0);
16912   tree arg0 = CALL_EXPR_ARG (exp, 1);
16913   tree arg1 = CALL_EXPR_ARG (exp, 2);
16914   rtx op0 = expand_normal (arg0);
16915   rtx op1 = expand_normal (arg1);
16916   machine_mode mode0 = insn_data[icode].operand[1].mode;
16917   machine_mode mode1 = insn_data[icode].operand[2].mode;
16918   int form_int;
16919   enum rtx_code code;
16920 
16921   if (TREE_CODE (form) != INTEGER_CST)
16922     {
16923       error ("argument 1 of __builtin_paired_predicate must be a constant");
16924       return const0_rtx;
16925     }
16926   else
16927     form_int = TREE_INT_CST_LOW (form);
16928 
16929   gcc_assert (mode0 == mode1);
16930 
16931   if (arg0 == error_mark_node || arg1 == error_mark_node)
16932     return const0_rtx;
16933 
16934   if (target == 0
16935       || GET_MODE (target) != SImode
16936       || !(*insn_data[icode].operand[0].predicate) (target, SImode))
16937     target = gen_reg_rtx (SImode);
16938   if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
16939     op0 = copy_to_mode_reg (mode0, op0);
16940   if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
16941     op1 = copy_to_mode_reg (mode1, op1);
16942 
16943   scratch = gen_reg_rtx (CCFPmode);
16944 
16945   pat = GEN_FCN (icode) (scratch, op0, op1);
16946   if (!pat)
16947     return const0_rtx;
16948 
16949   emit_insn (pat);
16950 
16951   switch (form_int)
16952     {
16953       /* LT bit.  */
16954     case 0:
16955       code = LT;
16956       break;
16957       /* GT bit.  */
16958     case 1:
16959       code = GT;
16960       break;
16961       /* EQ bit.  */
16962     case 2:
16963       code = EQ;
16964       break;
16965       /* UN bit.  */
16966     case 3:
16967       emit_insn (gen_move_from_CR_ov_bit (target, scratch));
16968       return target;
16969     default:
16970       error ("argument 1 of __builtin_paired_predicate is out of range");
16971       return const0_rtx;
16972     }
16973 
16974   tmp = gen_rtx_fmt_ee (code, SImode, scratch, const0_rtx);
16975   emit_move_insn (target, tmp);
16976   return target;
16977 }
16978 
16979 static rtx
spe_expand_predicate_builtin(enum insn_code icode,tree exp,rtx target)16980 spe_expand_predicate_builtin (enum insn_code icode, tree exp, rtx target)
16981 {
16982   rtx pat, scratch, tmp;
16983   tree form = CALL_EXPR_ARG (exp, 0);
16984   tree arg0 = CALL_EXPR_ARG (exp, 1);
16985   tree arg1 = CALL_EXPR_ARG (exp, 2);
16986   rtx op0 = expand_normal (arg0);
16987   rtx op1 = expand_normal (arg1);
16988   machine_mode mode0 = insn_data[icode].operand[1].mode;
16989   machine_mode mode1 = insn_data[icode].operand[2].mode;
16990   int form_int;
16991   enum rtx_code code;
16992 
16993   if (TREE_CODE (form) != INTEGER_CST)
16994     {
16995       error ("argument 1 of __builtin_spe_predicate must be a constant");
16996       return const0_rtx;
16997     }
16998   else
16999     form_int = TREE_INT_CST_LOW (form);
17000 
17001   gcc_assert (mode0 == mode1);
17002 
17003   if (arg0 == error_mark_node || arg1 == error_mark_node)
17004     return const0_rtx;
17005 
17006   if (target == 0
17007       || GET_MODE (target) != SImode
17008       || ! (*insn_data[icode].operand[0].predicate) (target, SImode))
17009     target = gen_reg_rtx (SImode);
17010 
17011   if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
17012     op0 = copy_to_mode_reg (mode0, op0);
17013   if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
17014     op1 = copy_to_mode_reg (mode1, op1);
17015 
17016   scratch = gen_reg_rtx (CCmode);
17017 
17018   pat = GEN_FCN (icode) (scratch, op0, op1);
17019   if (! pat)
17020     return const0_rtx;
17021   emit_insn (pat);
17022 
17023   /* There are 4 variants for each predicate: _any_, _all_, _upper_,
17024      _lower_.  We use one compare, but look in different bits of the
17025      CR for each variant.
17026 
17027      There are 2 elements in each SPE simd type (upper/lower).  The CR
17028      bits are set as follows:
17029 
17030      BIT0  | BIT 1  | BIT 2   | BIT 3
17031      U     |   L    | (U | L) | (U & L)
17032 
17033      So, for an "all" relationship, BIT 3 would be set.
17034      For an "any" relationship, BIT 2 would be set.  Etc.
17035 
17036      Following traditional nomenclature, these bits map to:
17037 
17038      BIT0  | BIT 1  | BIT 2   | BIT 3
17039      LT    | GT     | EQ      | OV
17040 
17041      Later, we will generate rtl to look in the LT/EQ/EQ/OV bits.
17042   */
17043 
17044   switch (form_int)
17045     {
17046       /* All variant.  OV bit.  */
17047     case 0:
17048       /* We need to get to the OV bit, which is the ORDERED bit.  We
17049 	 could generate (ordered:SI (reg:CC xx) (const_int 0)), but
17050 	 that's ugly and will make validate_condition_mode die.
17051 	 So let's just use another pattern.  */
17052       emit_insn (gen_move_from_CR_ov_bit (target, scratch));
17053       return target;
17054       /* Any variant.  EQ bit.  */
17055     case 1:
17056       code = EQ;
17057       break;
17058       /* Upper variant.  LT bit.  */
17059     case 2:
17060       code = LT;
17061       break;
17062       /* Lower variant.  GT bit.  */
17063     case 3:
17064       code = GT;
17065       break;
17066     default:
17067       error ("argument 1 of __builtin_spe_predicate is out of range");
17068       return const0_rtx;
17069     }
17070 
17071   tmp = gen_rtx_fmt_ee (code, SImode, scratch, const0_rtx);
17072   emit_move_insn (target, tmp);
17073 
17074   return target;
17075 }
17076 
17077 /* The evsel builtins look like this:
17078 
17079      e = __builtin_spe_evsel_OP (a, b, c, d);
17080 
17081    and work like this:
17082 
17083      e[upper] = a[upper] *OP* b[upper] ? c[upper] : d[upper];
17084      e[lower] = a[lower] *OP* b[lower] ? c[lower] : d[lower];
17085 */
17086 
17087 static rtx
spe_expand_evsel_builtin(enum insn_code icode,tree exp,rtx target)17088 spe_expand_evsel_builtin (enum insn_code icode, tree exp, rtx target)
17089 {
17090   rtx pat, scratch;
17091   tree arg0 = CALL_EXPR_ARG (exp, 0);
17092   tree arg1 = CALL_EXPR_ARG (exp, 1);
17093   tree arg2 = CALL_EXPR_ARG (exp, 2);
17094   tree arg3 = CALL_EXPR_ARG (exp, 3);
17095   rtx op0 = expand_normal (arg0);
17096   rtx op1 = expand_normal (arg1);
17097   rtx op2 = expand_normal (arg2);
17098   rtx op3 = expand_normal (arg3);
17099   machine_mode mode0 = insn_data[icode].operand[1].mode;
17100   machine_mode mode1 = insn_data[icode].operand[2].mode;
17101 
17102   gcc_assert (mode0 == mode1);
17103 
17104   if (arg0 == error_mark_node || arg1 == error_mark_node
17105       || arg2 == error_mark_node || arg3 == error_mark_node)
17106     return const0_rtx;
17107 
17108   if (target == 0
17109       || GET_MODE (target) != mode0
17110       || ! (*insn_data[icode].operand[0].predicate) (target, mode0))
17111     target = gen_reg_rtx (mode0);
17112 
17113   if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
17114     op0 = copy_to_mode_reg (mode0, op0);
17115   if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
17116     op1 = copy_to_mode_reg (mode0, op1);
17117   if (! (*insn_data[icode].operand[1].predicate) (op2, mode1))
17118     op2 = copy_to_mode_reg (mode0, op2);
17119   if (! (*insn_data[icode].operand[1].predicate) (op3, mode1))
17120     op3 = copy_to_mode_reg (mode0, op3);
17121 
17122   /* Generate the compare.  */
17123   scratch = gen_reg_rtx (CCmode);
17124   pat = GEN_FCN (icode) (scratch, op0, op1);
17125   if (! pat)
17126     return const0_rtx;
17127   emit_insn (pat);
17128 
17129   if (mode0 == V2SImode)
17130     emit_insn (gen_spe_evsel (target, op2, op3, scratch));
17131   else
17132     emit_insn (gen_spe_evsel_fs (target, op2, op3, scratch));
17133 
17134   return target;
17135 }
17136 
17137 /* Raise an error message for a builtin function that is called without the
17138    appropriate target options being set.  */
17139 
17140 static void
rs6000_invalid_builtin(enum rs6000_builtins fncode)17141 rs6000_invalid_builtin (enum rs6000_builtins fncode)
17142 {
17143   size_t uns_fncode = (size_t)fncode;
17144   const char *name = rs6000_builtin_info[uns_fncode].name;
17145   HOST_WIDE_INT fnmask = rs6000_builtin_info[uns_fncode].mask;
17146 
17147   gcc_assert (name != NULL);
17148   if ((fnmask & RS6000_BTM_CELL) != 0)
17149     error ("Builtin function %s is only valid for the cell processor", name);
17150   else if ((fnmask & RS6000_BTM_VSX) != 0)
17151     error ("Builtin function %s requires the -mvsx option", name);
17152   else if ((fnmask & RS6000_BTM_HTM) != 0)
17153     error ("Builtin function %s requires the -mhtm option", name);
17154   else if ((fnmask & RS6000_BTM_ALTIVEC) != 0)
17155     error ("Builtin function %s requires the -maltivec option", name);
17156   else if ((fnmask & RS6000_BTM_PAIRED) != 0)
17157     error ("Builtin function %s requires the -mpaired option", name);
17158   else if ((fnmask & RS6000_BTM_SPE) != 0)
17159     error ("Builtin function %s requires the -mspe option", name);
17160   else if ((fnmask & (RS6000_BTM_DFP | RS6000_BTM_P8_VECTOR))
17161 	   == (RS6000_BTM_DFP | RS6000_BTM_P8_VECTOR))
17162     error ("Builtin function %s requires the -mhard-dfp and"
17163 	   " -mpower8-vector options", name);
17164   else if ((fnmask & RS6000_BTM_DFP) != 0)
17165     error ("Builtin function %s requires the -mhard-dfp option", name);
17166   else if ((fnmask & RS6000_BTM_P8_VECTOR) != 0)
17167     error ("Builtin function %s requires the -mpower8-vector option", name);
17168   else if ((fnmask & (RS6000_BTM_P9_VECTOR | RS6000_BTM_64BIT))
17169 	   == (RS6000_BTM_P9_VECTOR | RS6000_BTM_64BIT))
17170     error ("Builtin function %s requires the -mcpu=power9 and"
17171 	   " -m64 options", name);
17172   else if ((fnmask & RS6000_BTM_P9_VECTOR) != 0)
17173     error ("Builtin function %s requires the -mcpu=power9 option", name);
17174   else if ((fnmask & (RS6000_BTM_P9_MISC | RS6000_BTM_64BIT))
17175 	   == (RS6000_BTM_P9_MISC | RS6000_BTM_64BIT))
17176     error ("Builtin function %s requires the -mcpu=power9 and"
17177 	   " -m64 options", name);
17178   else if ((fnmask & RS6000_BTM_P9_MISC) == RS6000_BTM_P9_MISC)
17179     error ("Builtin function %s requires the -mcpu=power9 option", name);
17180   else if ((fnmask & (RS6000_BTM_HARD_FLOAT | RS6000_BTM_LDBL128))
17181 	   == (RS6000_BTM_HARD_FLOAT | RS6000_BTM_LDBL128))
17182     error ("Builtin function %s requires the -mhard-float and"
17183 	   " -mlong-double-128 options", name);
17184   else if ((fnmask & RS6000_BTM_HARD_FLOAT) != 0)
17185     error ("Builtin function %s requires the -mhard-float option", name);
17186   else if ((fnmask & RS6000_BTM_FLOAT128) != 0)
17187     error ("Builtin function %s requires the -mfloat128 option", name);
17188   else
17189     error ("Builtin function %s is not supported with the current options",
17190 	   name);
17191 }
17192 
17193 /* Target hook for early folding of built-ins, shamelessly stolen
17194    from ia64.c.  */
17195 
17196 static tree
rs6000_fold_builtin(tree fndecl,int n_args ATTRIBUTE_UNUSED,tree * args,bool ignore ATTRIBUTE_UNUSED)17197 rs6000_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED,
17198 		     tree *args, bool ignore ATTRIBUTE_UNUSED)
17199 {
17200   if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
17201     {
17202       enum rs6000_builtins fn_code
17203 	= (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
17204       switch (fn_code)
17205 	{
17206 	case RS6000_BUILTIN_NANQ:
17207 	case RS6000_BUILTIN_NANSQ:
17208 	  {
17209 	    tree type = TREE_TYPE (TREE_TYPE (fndecl));
17210 	    const char *str = c_getstr (*args);
17211 	    int quiet = fn_code == RS6000_BUILTIN_NANQ;
17212 	    REAL_VALUE_TYPE real;
17213 
17214 	    if (str && real_nan (&real, str, quiet, TYPE_MODE (type)))
17215 	      return build_real (type, real);
17216 	    return NULL_TREE;
17217 	  }
17218 	case RS6000_BUILTIN_INFQ:
17219 	case RS6000_BUILTIN_HUGE_VALQ:
17220 	  {
17221 	    tree type = TREE_TYPE (TREE_TYPE (fndecl));
17222 	    REAL_VALUE_TYPE inf;
17223 	    real_inf (&inf);
17224 	    return build_real (type, inf);
17225 	  }
17226 	default:
17227 	  break;
17228 	}
17229     }
17230 #ifdef SUBTARGET_FOLD_BUILTIN
17231   return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore);
17232 #else
17233   return NULL_TREE;
17234 #endif
17235 }
17236 
17237 /* Fold a machine-dependent built-in in GIMPLE.  (For folding into
17238    a constant, use rs6000_fold_builtin.)  */
17239 
17240 bool
rs6000_gimple_fold_builtin(gimple_stmt_iterator * gsi)17241 rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
17242 {
17243   gimple *stmt = gsi_stmt (*gsi);
17244   tree fndecl = gimple_call_fndecl (stmt);
17245   gcc_checking_assert (fndecl && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD);
17246   enum rs6000_builtins fn_code
17247     = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
17248   tree arg0, arg1, lhs;
17249 
17250   switch (fn_code)
17251     {
17252     /* Flavors of vec_add.  We deliberately don't expand
17253        P8V_BUILTIN_VADDUQM as it gets lowered from V1TImode to
17254        TImode, resulting in much poorer code generation.  */
17255     case ALTIVEC_BUILTIN_VADDUBM:
17256     case ALTIVEC_BUILTIN_VADDUHM:
17257     case ALTIVEC_BUILTIN_VADDUWM:
17258     case P8V_BUILTIN_VADDUDM:
17259     case ALTIVEC_BUILTIN_VADDFP:
17260     case VSX_BUILTIN_XVADDDP:
17261       {
17262 	arg0 = gimple_call_arg (stmt, 0);
17263 	arg1 = gimple_call_arg (stmt, 1);
17264 	lhs = gimple_call_lhs (stmt);
17265 	gimple *g = gimple_build_assign (lhs, PLUS_EXPR, arg0, arg1);
17266 	gimple_set_location (g, gimple_location (stmt));
17267 	gsi_replace (gsi, g, true);
17268 	return true;
17269       }
17270     /* Flavors of vec_sub.  We deliberately don't expand
17271        P8V_BUILTIN_VSUBUQM. */
17272     case ALTIVEC_BUILTIN_VSUBUBM:
17273     case ALTIVEC_BUILTIN_VSUBUHM:
17274     case ALTIVEC_BUILTIN_VSUBUWM:
17275     case P8V_BUILTIN_VSUBUDM:
17276     case ALTIVEC_BUILTIN_VSUBFP:
17277     case VSX_BUILTIN_XVSUBDP:
17278       {
17279 	arg0 = gimple_call_arg (stmt, 0);
17280 	arg1 = gimple_call_arg (stmt, 1);
17281 	lhs = gimple_call_lhs (stmt);
17282 	gimple *g = gimple_build_assign (lhs, MINUS_EXPR, arg0, arg1);
17283 	gimple_set_location (g, gimple_location (stmt));
17284 	gsi_replace (gsi, g, true);
17285 	return true;
17286       }
17287     case VSX_BUILTIN_XVMULSP:
17288     case VSX_BUILTIN_XVMULDP:
17289       {
17290 	arg0 = gimple_call_arg (stmt, 0);
17291 	arg1 = gimple_call_arg (stmt, 1);
17292 	lhs = gimple_call_lhs (stmt);
17293 	gimple *g = gimple_build_assign (lhs, MULT_EXPR, arg0, arg1);
17294 	gimple_set_location (g, gimple_location (stmt));
17295 	gsi_replace (gsi, g, true);
17296 	return true;
17297       }
17298     /* Even element flavors of vec_mul (signed). */
17299     case ALTIVEC_BUILTIN_VMULESB:
17300     case ALTIVEC_BUILTIN_VMULESH:
17301     /* Even element flavors of vec_mul (unsigned).  */
17302     case ALTIVEC_BUILTIN_VMULEUB:
17303     case ALTIVEC_BUILTIN_VMULEUH:
17304       {
17305 	arg0 = gimple_call_arg (stmt, 0);
17306 	arg1 = gimple_call_arg (stmt, 1);
17307 	lhs = gimple_call_lhs (stmt);
17308 	gimple *g = gimple_build_assign (lhs, VEC_WIDEN_MULT_EVEN_EXPR, arg0, arg1);
17309 	gimple_set_location (g, gimple_location (stmt));
17310 	gsi_replace (gsi, g, true);
17311 	return true;
17312       }
17313     /* Odd element flavors of vec_mul (signed).  */
17314     case ALTIVEC_BUILTIN_VMULOSB:
17315     case ALTIVEC_BUILTIN_VMULOSH:
17316     /* Odd element flavors of vec_mul (unsigned). */
17317     case ALTIVEC_BUILTIN_VMULOUB:
17318     case ALTIVEC_BUILTIN_VMULOUH:
17319       {
17320 	arg0 = gimple_call_arg (stmt, 0);
17321 	arg1 = gimple_call_arg (stmt, 1);
17322 	lhs = gimple_call_lhs (stmt);
17323 	gimple *g = gimple_build_assign (lhs, VEC_WIDEN_MULT_ODD_EXPR, arg0, arg1);
17324 	gimple_set_location (g, gimple_location (stmt));
17325 	gsi_replace (gsi, g, true);
17326 	return true;
17327       }
17328     /* Flavors of vec_div (Integer).  */
17329     case VSX_BUILTIN_DIV_V2DI:
17330     case VSX_BUILTIN_UDIV_V2DI:
17331       {
17332 	arg0 = gimple_call_arg (stmt, 0);
17333 	arg1 = gimple_call_arg (stmt, 1);
17334 	lhs = gimple_call_lhs (stmt);
17335 	gimple *g = gimple_build_assign (lhs, TRUNC_DIV_EXPR, arg0, arg1);
17336 	gimple_set_location (g, gimple_location (stmt));
17337 	gsi_replace (gsi, g, true);
17338 	return true;
17339       }
17340     /* Flavors of vec_div (Float).  */
17341     case VSX_BUILTIN_XVDIVSP:
17342     case VSX_BUILTIN_XVDIVDP:
17343       {
17344 	arg0 = gimple_call_arg (stmt, 0);
17345 	arg1 = gimple_call_arg (stmt, 1);
17346 	lhs = gimple_call_lhs (stmt);
17347 	gimple *g = gimple_build_assign (lhs, RDIV_EXPR, arg0, arg1);
17348 	gimple_set_location (g, gimple_location (stmt));
17349 	gsi_replace (gsi, g, true);
17350 	return true;
17351       }
17352     /* Flavors of vec_and.  */
17353     case ALTIVEC_BUILTIN_VAND:
17354       {
17355 	arg0 = gimple_call_arg (stmt, 0);
17356 	arg1 = gimple_call_arg (stmt, 1);
17357 	lhs = gimple_call_lhs (stmt);
17358 	gimple *g = gimple_build_assign (lhs, BIT_AND_EXPR, arg0, arg1);
17359 	gimple_set_location (g, gimple_location (stmt));
17360 	gsi_replace (gsi, g, true);
17361 	return true;
17362       }
17363     /* Flavors of vec_andc.  */
17364     case ALTIVEC_BUILTIN_VANDC:
17365       {
17366 	arg0 = gimple_call_arg (stmt, 0);
17367 	arg1 = gimple_call_arg (stmt, 1);
17368 	lhs = gimple_call_lhs (stmt);
17369 	tree temp = create_tmp_reg_or_ssa_name (TREE_TYPE (arg1));
17370 	gimple *g = gimple_build_assign(temp, BIT_NOT_EXPR, arg1);
17371 	gimple_set_location (g, gimple_location (stmt));
17372 	gsi_insert_before(gsi, g, GSI_SAME_STMT);
17373 	g = gimple_build_assign (lhs, BIT_AND_EXPR, arg0, temp);
17374 	gimple_set_location (g, gimple_location (stmt));
17375 	gsi_replace (gsi, g, true);
17376 	return true;
17377       }
17378     /* Flavors of vec_nand.  */
17379     case P8V_BUILTIN_VEC_NAND:
17380     case P8V_BUILTIN_NAND_V16QI:
17381     case P8V_BUILTIN_NAND_V8HI:
17382     case P8V_BUILTIN_NAND_V4SI:
17383     case P8V_BUILTIN_NAND_V4SF:
17384     case P8V_BUILTIN_NAND_V2DF:
17385     case P8V_BUILTIN_NAND_V2DI:
17386       {
17387 	arg0 = gimple_call_arg (stmt, 0);
17388 	arg1 = gimple_call_arg (stmt, 1);
17389 	lhs = gimple_call_lhs (stmt);
17390 	tree temp = create_tmp_reg_or_ssa_name (TREE_TYPE (arg1));
17391 	gimple *g = gimple_build_assign(temp, BIT_AND_EXPR, arg0, arg1);
17392 	gimple_set_location (g, gimple_location (stmt));
17393 	gsi_insert_before(gsi, g, GSI_SAME_STMT);
17394 	g = gimple_build_assign (lhs, BIT_NOT_EXPR, temp);
17395 	gimple_set_location (g, gimple_location (stmt));
17396 	gsi_replace (gsi, g, true);
17397 	return true;
17398       }
17399     /* Flavors of vec_or.  */
17400     case ALTIVEC_BUILTIN_VOR:
17401       {
17402 	arg0 = gimple_call_arg (stmt, 0);
17403 	arg1 = gimple_call_arg (stmt, 1);
17404 	lhs = gimple_call_lhs (stmt);
17405 	gimple *g = gimple_build_assign (lhs, BIT_IOR_EXPR, arg0, arg1);
17406 	gimple_set_location (g, gimple_location (stmt));
17407 	gsi_replace (gsi, g, true);
17408 	return true;
17409       }
17410     /* flavors of vec_orc.  */
17411     case P8V_BUILTIN_ORC_V16QI:
17412     case P8V_BUILTIN_ORC_V8HI:
17413     case P8V_BUILTIN_ORC_V4SI:
17414     case P8V_BUILTIN_ORC_V4SF:
17415     case P8V_BUILTIN_ORC_V2DF:
17416     case P8V_BUILTIN_ORC_V2DI:
17417       {
17418 	arg0 = gimple_call_arg (stmt, 0);
17419 	arg1 = gimple_call_arg (stmt, 1);
17420 	lhs = gimple_call_lhs (stmt);
17421 	tree temp = create_tmp_reg_or_ssa_name (TREE_TYPE (arg1));
17422 	gimple *g = gimple_build_assign(temp, BIT_NOT_EXPR, arg1);
17423 	gimple_set_location (g, gimple_location (stmt));
17424 	gsi_insert_before(gsi, g, GSI_SAME_STMT);
17425 	g = gimple_build_assign (lhs, BIT_IOR_EXPR, arg0, temp);
17426 	gimple_set_location (g, gimple_location (stmt));
17427 	gsi_replace (gsi, g, true);
17428 	return true;
17429       }
17430     /* Flavors of vec_xor.  */
17431     case ALTIVEC_BUILTIN_VXOR:
17432       {
17433 	arg0 = gimple_call_arg (stmt, 0);
17434 	arg1 = gimple_call_arg (stmt, 1);
17435 	lhs = gimple_call_lhs (stmt);
17436 	gimple *g = gimple_build_assign (lhs, BIT_XOR_EXPR, arg0, arg1);
17437 	gimple_set_location (g, gimple_location (stmt));
17438 	gsi_replace (gsi, g, true);
17439 	return true;
17440       }
17441     /* Flavors of vec_nor.  */
17442     case ALTIVEC_BUILTIN_VNOR:
17443       {
17444 	arg0 = gimple_call_arg (stmt, 0);
17445 	arg1 = gimple_call_arg (stmt, 1);
17446 	lhs = gimple_call_lhs (stmt);
17447 	tree temp = create_tmp_reg_or_ssa_name (TREE_TYPE (arg1));
17448 	gimple *g = gimple_build_assign (temp, BIT_IOR_EXPR, arg0, arg1);
17449 	gimple_set_location (g, gimple_location (stmt));
17450 	gsi_insert_before(gsi, g, GSI_SAME_STMT);
17451 	g = gimple_build_assign (lhs, BIT_NOT_EXPR, temp);
17452 	gimple_set_location (g, gimple_location (stmt));
17453 	gsi_replace (gsi, g, true);
17454 	return true;
17455       }
17456     default:
17457       break;
17458     }
17459 
17460   return false;
17461 }
17462 
17463 /* Expand an expression EXP that calls a built-in function,
17464    with result going to TARGET if that's convenient
17465    (and in mode MODE if that's convenient).
17466    SUBTARGET may be used as the target for computing one of EXP's operands.
17467    IGNORE is nonzero if the value is to be ignored.  */
17468 
17469 static rtx
rs6000_expand_builtin(tree exp,rtx target,rtx subtarget ATTRIBUTE_UNUSED,machine_mode mode ATTRIBUTE_UNUSED,int ignore ATTRIBUTE_UNUSED)17470 rs6000_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
17471 		       machine_mode mode ATTRIBUTE_UNUSED,
17472 		       int ignore ATTRIBUTE_UNUSED)
17473 {
17474   tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
17475   enum rs6000_builtins fcode
17476     = (enum rs6000_builtins)DECL_FUNCTION_CODE (fndecl);
17477   size_t uns_fcode = (size_t)fcode;
17478   const struct builtin_description *d;
17479   size_t i;
17480   rtx ret;
17481   bool success;
17482   HOST_WIDE_INT mask = rs6000_builtin_info[uns_fcode].mask;
17483   bool func_valid_p = ((rs6000_builtin_mask & mask) == mask);
17484 
17485   if (TARGET_DEBUG_BUILTIN)
17486     {
17487       enum insn_code icode = rs6000_builtin_info[uns_fcode].icode;
17488       const char *name1 = rs6000_builtin_info[uns_fcode].name;
17489       const char *name2 = ((icode != CODE_FOR_nothing)
17490 			   ? get_insn_name ((int)icode)
17491 			   : "nothing");
17492       const char *name3;
17493 
17494       switch (rs6000_builtin_info[uns_fcode].attr & RS6000_BTC_TYPE_MASK)
17495 	{
17496 	default:		   name3 = "unknown";	break;
17497 	case RS6000_BTC_SPECIAL:   name3 = "special";	break;
17498 	case RS6000_BTC_UNARY:	   name3 = "unary";	break;
17499 	case RS6000_BTC_BINARY:	   name3 = "binary";	break;
17500 	case RS6000_BTC_TERNARY:   name3 = "ternary";	break;
17501 	case RS6000_BTC_PREDICATE: name3 = "predicate";	break;
17502 	case RS6000_BTC_ABS:	   name3 = "abs";	break;
17503 	case RS6000_BTC_EVSEL:	   name3 = "evsel";	break;
17504 	case RS6000_BTC_DST:	   name3 = "dst";	break;
17505 	}
17506 
17507 
17508       fprintf (stderr,
17509 	       "rs6000_expand_builtin, %s (%d), insn = %s (%d), type=%s%s\n",
17510 	       (name1) ? name1 : "---", fcode,
17511 	       (name2) ? name2 : "---", (int)icode,
17512 	       name3,
17513 	       func_valid_p ? "" : ", not valid");
17514     }
17515 
17516   if (!func_valid_p)
17517     {
17518       rs6000_invalid_builtin (fcode);
17519 
17520       /* Given it is invalid, just generate a normal call.  */
17521       return expand_call (exp, target, ignore);
17522     }
17523 
17524   switch (fcode)
17525     {
17526     case RS6000_BUILTIN_RECIP:
17527       return rs6000_expand_binop_builtin (CODE_FOR_recipdf3, exp, target);
17528 
17529     case RS6000_BUILTIN_RECIPF:
17530       return rs6000_expand_binop_builtin (CODE_FOR_recipsf3, exp, target);
17531 
17532     case RS6000_BUILTIN_RSQRTF:
17533       return rs6000_expand_unop_builtin (CODE_FOR_rsqrtsf2, exp, target);
17534 
17535     case RS6000_BUILTIN_RSQRT:
17536       return rs6000_expand_unop_builtin (CODE_FOR_rsqrtdf2, exp, target);
17537 
17538     case POWER7_BUILTIN_BPERMD:
17539       return rs6000_expand_binop_builtin (((TARGET_64BIT)
17540 					   ? CODE_FOR_bpermd_di
17541 					   : CODE_FOR_bpermd_si), exp, target);
17542 
17543     case RS6000_BUILTIN_GET_TB:
17544       return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_get_timebase,
17545 					   target);
17546 
17547     case RS6000_BUILTIN_MFTB:
17548       return rs6000_expand_zeroop_builtin (((TARGET_64BIT)
17549 					    ? CODE_FOR_rs6000_mftb_di
17550 					    : CODE_FOR_rs6000_mftb_si),
17551 					   target);
17552 
17553     case RS6000_BUILTIN_MFFS:
17554       return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_mffs, target);
17555 
17556     case RS6000_BUILTIN_MTFSF:
17557       return rs6000_expand_mtfsf_builtin (CODE_FOR_rs6000_mtfsf, exp);
17558 
17559     case RS6000_BUILTIN_CPU_INIT:
17560     case RS6000_BUILTIN_CPU_IS:
17561     case RS6000_BUILTIN_CPU_SUPPORTS:
17562       return cpu_expand_builtin (fcode, exp, target);
17563 
17564     case ALTIVEC_BUILTIN_MASK_FOR_LOAD:
17565     case ALTIVEC_BUILTIN_MASK_FOR_STORE:
17566       {
17567 	int icode = (BYTES_BIG_ENDIAN ? (int) CODE_FOR_altivec_lvsr_direct
17568 		     : (int) CODE_FOR_altivec_lvsl_direct);
17569 	machine_mode tmode = insn_data[icode].operand[0].mode;
17570 	machine_mode mode = insn_data[icode].operand[1].mode;
17571 	tree arg;
17572 	rtx op, addr, pat;
17573 
17574 	gcc_assert (TARGET_ALTIVEC);
17575 
17576 	arg = CALL_EXPR_ARG (exp, 0);
17577 	gcc_assert (POINTER_TYPE_P (TREE_TYPE (arg)));
17578 	op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL);
17579 	addr = memory_address (mode, op);
17580 	if (fcode == ALTIVEC_BUILTIN_MASK_FOR_STORE)
17581 	  op = addr;
17582 	else
17583 	  {
17584 	    /* For the load case need to negate the address.  */
17585 	    op = gen_reg_rtx (GET_MODE (addr));
17586 	    emit_insn (gen_rtx_SET (op, gen_rtx_NEG (GET_MODE (addr), addr)));
17587 	  }
17588 	op = gen_rtx_MEM (mode, op);
17589 
17590 	if (target == 0
17591 	    || GET_MODE (target) != tmode
17592 	    || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
17593 	  target = gen_reg_rtx (tmode);
17594 
17595 	pat = GEN_FCN (icode) (target, op);
17596 	if (!pat)
17597 	  return 0;
17598 	emit_insn (pat);
17599 
17600 	return target;
17601       }
17602 
17603     case ALTIVEC_BUILTIN_VCFUX:
17604     case ALTIVEC_BUILTIN_VCFSX:
17605     case ALTIVEC_BUILTIN_VCTUXS:
17606     case ALTIVEC_BUILTIN_VCTSXS:
17607   /* FIXME: There's got to be a nicer way to handle this case than
17608      constructing a new CALL_EXPR.  */
17609       if (call_expr_nargs (exp) == 1)
17610 	{
17611 	  exp = build_call_nary (TREE_TYPE (exp), CALL_EXPR_FN (exp),
17612 				 2, CALL_EXPR_ARG (exp, 0), integer_zero_node);
17613 	}
17614       break;
17615 
17616     default:
17617       break;
17618     }
17619 
17620   if (TARGET_ALTIVEC)
17621     {
17622       ret = altivec_expand_builtin (exp, target, &success);
17623 
17624       if (success)
17625 	return ret;
17626     }
17627   if (TARGET_SPE)
17628     {
17629       ret = spe_expand_builtin (exp, target, &success);
17630 
17631       if (success)
17632 	return ret;
17633     }
17634   if (TARGET_PAIRED_FLOAT)
17635     {
17636       ret = paired_expand_builtin (exp, target, &success);
17637 
17638       if (success)
17639 	return ret;
17640     }
17641   if (TARGET_HTM)
17642     {
17643       ret = htm_expand_builtin (exp, target, &success);
17644 
17645       if (success)
17646 	return ret;
17647     }
17648 
17649   unsigned attr = rs6000_builtin_info[uns_fcode].attr & RS6000_BTC_TYPE_MASK;
17650   /* RS6000_BTC_SPECIAL represents no-operand operators.  */
17651   gcc_assert (attr == RS6000_BTC_UNARY
17652 	      || attr == RS6000_BTC_BINARY
17653 	      || attr == RS6000_BTC_TERNARY
17654 	      || attr == RS6000_BTC_SPECIAL);
17655 
17656   /* Handle simple unary operations.  */
17657   d = bdesc_1arg;
17658   for (i = 0; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
17659     if (d->code == fcode)
17660       return rs6000_expand_unop_builtin (d->icode, exp, target);
17661 
17662   /* Handle simple binary operations.  */
17663   d = bdesc_2arg;
17664   for (i = 0; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
17665     if (d->code == fcode)
17666       return rs6000_expand_binop_builtin (d->icode, exp, target);
17667 
17668   /* Handle simple ternary operations.  */
17669   d = bdesc_3arg;
17670   for (i = 0; i < ARRAY_SIZE  (bdesc_3arg); i++, d++)
17671     if (d->code == fcode)
17672       return rs6000_expand_ternop_builtin (d->icode, exp, target);
17673 
17674   /* Handle simple no-argument operations. */
17675   d = bdesc_0arg;
17676   for (i = 0; i < ARRAY_SIZE (bdesc_0arg); i++, d++)
17677     if (d->code == fcode)
17678       return rs6000_expand_zeroop_builtin (d->icode, target);
17679 
17680   gcc_unreachable ();
17681 }
17682 
17683 /* Create a builtin vector type with a name.  Taking care not to give
17684    the canonical type a name.  */
17685 
17686 static tree
rs6000_vector_type(const char * name,tree elt_type,unsigned num_elts)17687 rs6000_vector_type (const char *name, tree elt_type, unsigned num_elts)
17688 {
17689   tree result = build_vector_type (elt_type, num_elts);
17690 
17691   /* Copy so we don't give the canonical type a name.  */
17692   result = build_variant_type_copy (result);
17693 
17694   add_builtin_type (name, result);
17695 
17696   return result;
17697 }
17698 
17699 static void
rs6000_init_builtins(void)17700 rs6000_init_builtins (void)
17701 {
17702   tree tdecl;
17703   tree ftype;
17704   machine_mode mode;
17705 
17706   if (TARGET_DEBUG_BUILTIN)
17707     fprintf (stderr, "rs6000_init_builtins%s%s%s%s\n",
17708 	     (TARGET_PAIRED_FLOAT) ? ", paired"	 : "",
17709 	     (TARGET_SPE)	   ? ", spe"	 : "",
17710 	     (TARGET_ALTIVEC)	   ? ", altivec" : "",
17711 	     (TARGET_VSX)	   ? ", vsx"	 : "");
17712 
17713   V2SI_type_node = build_vector_type (intSI_type_node, 2);
17714   V2SF_type_node = build_vector_type (float_type_node, 2);
17715   V2DI_type_node = rs6000_vector_type (TARGET_POWERPC64 ? "__vector long"
17716 				       : "__vector long long",
17717 				       intDI_type_node, 2);
17718   V2DF_type_node = rs6000_vector_type ("__vector double", double_type_node, 2);
17719   V4HI_type_node = build_vector_type (intHI_type_node, 4);
17720   V4SI_type_node = rs6000_vector_type ("__vector signed int",
17721 				       intSI_type_node, 4);
17722   V4SF_type_node = rs6000_vector_type ("__vector float", float_type_node, 4);
17723   V8HI_type_node = rs6000_vector_type ("__vector signed short",
17724 				       intHI_type_node, 8);
17725   V16QI_type_node = rs6000_vector_type ("__vector signed char",
17726 					intQI_type_node, 16);
17727 
17728   unsigned_V16QI_type_node = rs6000_vector_type ("__vector unsigned char",
17729 					unsigned_intQI_type_node, 16);
17730   unsigned_V8HI_type_node = rs6000_vector_type ("__vector unsigned short",
17731 				       unsigned_intHI_type_node, 8);
17732   unsigned_V4SI_type_node = rs6000_vector_type ("__vector unsigned int",
17733 				       unsigned_intSI_type_node, 4);
17734   unsigned_V2DI_type_node = rs6000_vector_type (TARGET_POWERPC64
17735 				       ? "__vector unsigned long"
17736 				       : "__vector unsigned long long",
17737 				       unsigned_intDI_type_node, 2);
17738 
17739   opaque_V2SF_type_node = build_opaque_vector_type (float_type_node, 2);
17740   opaque_V2SI_type_node = build_opaque_vector_type (intSI_type_node, 2);
17741   opaque_p_V2SI_type_node = build_pointer_type (opaque_V2SI_type_node);
17742   opaque_V4SI_type_node = build_opaque_vector_type (intSI_type_node, 4);
17743 
17744   const_str_type_node
17745     = build_pointer_type (build_qualified_type (char_type_node,
17746 						TYPE_QUAL_CONST));
17747 
17748   /* We use V1TI mode as a special container to hold __int128_t items that
17749      must live in VSX registers.  */
17750   if (intTI_type_node)
17751     {
17752       V1TI_type_node = rs6000_vector_type ("__vector __int128",
17753 					   intTI_type_node, 1);
17754       unsigned_V1TI_type_node
17755 	= rs6000_vector_type ("__vector unsigned __int128",
17756 			      unsigned_intTI_type_node, 1);
17757     }
17758 
17759   /* The 'vector bool ...' types must be kept distinct from 'vector unsigned ...'
17760      types, especially in C++ land.  Similarly, 'vector pixel' is distinct from
17761      'vector unsigned short'.  */
17762 
17763   bool_char_type_node = build_distinct_type_copy (unsigned_intQI_type_node);
17764   bool_short_type_node = build_distinct_type_copy (unsigned_intHI_type_node);
17765   bool_int_type_node = build_distinct_type_copy (unsigned_intSI_type_node);
17766   bool_long_type_node = build_distinct_type_copy (unsigned_intDI_type_node);
17767   pixel_type_node = build_distinct_type_copy (unsigned_intHI_type_node);
17768 
17769   long_integer_type_internal_node = long_integer_type_node;
17770   long_unsigned_type_internal_node = long_unsigned_type_node;
17771   long_long_integer_type_internal_node = long_long_integer_type_node;
17772   long_long_unsigned_type_internal_node = long_long_unsigned_type_node;
17773   intQI_type_internal_node = intQI_type_node;
17774   uintQI_type_internal_node = unsigned_intQI_type_node;
17775   intHI_type_internal_node = intHI_type_node;
17776   uintHI_type_internal_node = unsigned_intHI_type_node;
17777   intSI_type_internal_node = intSI_type_node;
17778   uintSI_type_internal_node = unsigned_intSI_type_node;
17779   intDI_type_internal_node = intDI_type_node;
17780   uintDI_type_internal_node = unsigned_intDI_type_node;
17781   intTI_type_internal_node = intTI_type_node;
17782   uintTI_type_internal_node = unsigned_intTI_type_node;
17783   float_type_internal_node = float_type_node;
17784   double_type_internal_node = double_type_node;
17785   long_double_type_internal_node = long_double_type_node;
17786   dfloat64_type_internal_node = dfloat64_type_node;
17787   dfloat128_type_internal_node = dfloat128_type_node;
17788   void_type_internal_node = void_type_node;
17789 
17790   /* 128-bit floating point support.  KFmode is IEEE 128-bit floating point.
17791      IFmode is the IBM extended 128-bit format that is a pair of doubles.
17792      TFmode will be either IEEE 128-bit floating point or the IBM double-double
17793      format that uses a pair of doubles, depending on the switches and
17794      defaults.
17795 
17796      We do not enable the actual __float128 keyword unless the user explicitly
17797      asks for it, because the library support is not yet complete.
17798 
17799      If we don't support for either 128-bit IBM double double or IEEE 128-bit
17800      floating point, we need make sure the type is non-zero or else self-test
17801      fails during bootstrap.
17802 
17803      We don't register a built-in type for __ibm128 if the type is the same as
17804      long double.  Instead we add a #define for __ibm128 in
17805      rs6000_cpu_cpp_builtins to long double.  */
17806   if (TARGET_LONG_DOUBLE_128 && FLOAT128_IEEE_P (TFmode))
17807     {
17808       ibm128_float_type_node = make_node (REAL_TYPE);
17809       TYPE_PRECISION (ibm128_float_type_node) = 128;
17810       SET_TYPE_MODE (ibm128_float_type_node, IFmode);
17811       layout_type (ibm128_float_type_node);
17812 
17813       lang_hooks.types.register_builtin_type (ibm128_float_type_node,
17814 					      "__ibm128");
17815     }
17816   else
17817     ibm128_float_type_node = long_double_type_node;
17818 
17819   if (TARGET_FLOAT128_KEYWORD)
17820     {
17821       ieee128_float_type_node = float128_type_node;
17822       lang_hooks.types.register_builtin_type (ieee128_float_type_node,
17823 					      "__float128");
17824     }
17825 
17826   else if (TARGET_FLOAT128_TYPE)
17827     {
17828       ieee128_float_type_node = make_node (REAL_TYPE);
17829       TYPE_PRECISION (ibm128_float_type_node) = 128;
17830       SET_TYPE_MODE (ieee128_float_type_node, KFmode);
17831       layout_type (ieee128_float_type_node);
17832 
17833       /* If we are not exporting the __float128/_Float128 keywords, we need a
17834 	 keyword to get the types created.  Use __ieee128 as the dummy
17835 	 keyword.  */
17836       lang_hooks.types.register_builtin_type (ieee128_float_type_node,
17837 					      "__ieee128");
17838     }
17839 
17840   else
17841     ieee128_float_type_node = long_double_type_node;
17842 
17843   /* Initialize the modes for builtin_function_type, mapping a machine mode to
17844      tree type node.  */
17845   builtin_mode_to_type[QImode][0] = integer_type_node;
17846   builtin_mode_to_type[HImode][0] = integer_type_node;
17847   builtin_mode_to_type[SImode][0] = intSI_type_node;
17848   builtin_mode_to_type[SImode][1] = unsigned_intSI_type_node;
17849   builtin_mode_to_type[DImode][0] = intDI_type_node;
17850   builtin_mode_to_type[DImode][1] = unsigned_intDI_type_node;
17851   builtin_mode_to_type[TImode][0] = intTI_type_node;
17852   builtin_mode_to_type[TImode][1] = unsigned_intTI_type_node;
17853   builtin_mode_to_type[SFmode][0] = float_type_node;
17854   builtin_mode_to_type[DFmode][0] = double_type_node;
17855   builtin_mode_to_type[IFmode][0] = ibm128_float_type_node;
17856   builtin_mode_to_type[KFmode][0] = ieee128_float_type_node;
17857   builtin_mode_to_type[TFmode][0] = long_double_type_node;
17858   builtin_mode_to_type[DDmode][0] = dfloat64_type_node;
17859   builtin_mode_to_type[TDmode][0] = dfloat128_type_node;
17860   builtin_mode_to_type[V1TImode][0] = V1TI_type_node;
17861   builtin_mode_to_type[V1TImode][1] = unsigned_V1TI_type_node;
17862   builtin_mode_to_type[V2SImode][0] = V2SI_type_node;
17863   builtin_mode_to_type[V2SFmode][0] = V2SF_type_node;
17864   builtin_mode_to_type[V2DImode][0] = V2DI_type_node;
17865   builtin_mode_to_type[V2DImode][1] = unsigned_V2DI_type_node;
17866   builtin_mode_to_type[V2DFmode][0] = V2DF_type_node;
17867   builtin_mode_to_type[V4HImode][0] = V4HI_type_node;
17868   builtin_mode_to_type[V4SImode][0] = V4SI_type_node;
17869   builtin_mode_to_type[V4SImode][1] = unsigned_V4SI_type_node;
17870   builtin_mode_to_type[V4SFmode][0] = V4SF_type_node;
17871   builtin_mode_to_type[V8HImode][0] = V8HI_type_node;
17872   builtin_mode_to_type[V8HImode][1] = unsigned_V8HI_type_node;
17873   builtin_mode_to_type[V16QImode][0] = V16QI_type_node;
17874   builtin_mode_to_type[V16QImode][1] = unsigned_V16QI_type_node;
17875 
17876   tdecl = add_builtin_type ("__bool char", bool_char_type_node);
17877   TYPE_NAME (bool_char_type_node) = tdecl;
17878 
17879   tdecl = add_builtin_type ("__bool short", bool_short_type_node);
17880   TYPE_NAME (bool_short_type_node) = tdecl;
17881 
17882   tdecl = add_builtin_type ("__bool int", bool_int_type_node);
17883   TYPE_NAME (bool_int_type_node) = tdecl;
17884 
17885   tdecl = add_builtin_type ("__pixel", pixel_type_node);
17886   TYPE_NAME (pixel_type_node) = tdecl;
17887 
17888   bool_V16QI_type_node = rs6000_vector_type ("__vector __bool char",
17889 					     bool_char_type_node, 16);
17890   bool_V8HI_type_node = rs6000_vector_type ("__vector __bool short",
17891 					    bool_short_type_node, 8);
17892   bool_V4SI_type_node = rs6000_vector_type ("__vector __bool int",
17893 					    bool_int_type_node, 4);
17894   bool_V2DI_type_node = rs6000_vector_type (TARGET_POWERPC64
17895 					    ? "__vector __bool long"
17896 					    : "__vector __bool long long",
17897 					    bool_long_type_node, 2);
17898   pixel_V8HI_type_node = rs6000_vector_type ("__vector __pixel",
17899 					     pixel_type_node, 8);
17900 
17901   /* Paired and SPE builtins are only available if you build a compiler with
17902      the appropriate options, so only create those builtins with the
17903      appropriate compiler option.  Create Altivec and VSX builtins on machines
17904      with at least the general purpose extensions (970 and newer) to allow the
17905      use of the target attribute.  */
17906   if (TARGET_PAIRED_FLOAT)
17907     paired_init_builtins ();
17908   if (TARGET_SPE)
17909     spe_init_builtins ();
17910   if (TARGET_EXTRA_BUILTINS)
17911     altivec_init_builtins ();
17912   if (TARGET_HTM)
17913     htm_init_builtins ();
17914 
17915   if (TARGET_EXTRA_BUILTINS || TARGET_SPE || TARGET_PAIRED_FLOAT)
17916     rs6000_common_init_builtins ();
17917 
17918   ftype = build_function_type_list (ieee128_float_type_node,
17919 				    const_str_type_node, NULL_TREE);
17920   def_builtin ("__builtin_nanq", ftype, RS6000_BUILTIN_NANQ);
17921   def_builtin ("__builtin_nansq", ftype, RS6000_BUILTIN_NANSQ);
17922 
17923   ftype = build_function_type_list (ieee128_float_type_node, NULL_TREE);
17924   def_builtin ("__builtin_infq", ftype, RS6000_BUILTIN_INFQ);
17925   def_builtin ("__builtin_huge_valq", ftype, RS6000_BUILTIN_HUGE_VALQ);
17926 
17927   ftype = builtin_function_type (DFmode, DFmode, DFmode, VOIDmode,
17928 				 RS6000_BUILTIN_RECIP, "__builtin_recipdiv");
17929   def_builtin ("__builtin_recipdiv", ftype, RS6000_BUILTIN_RECIP);
17930 
17931   ftype = builtin_function_type (SFmode, SFmode, SFmode, VOIDmode,
17932 				 RS6000_BUILTIN_RECIPF, "__builtin_recipdivf");
17933   def_builtin ("__builtin_recipdivf", ftype, RS6000_BUILTIN_RECIPF);
17934 
17935   ftype = builtin_function_type (DFmode, DFmode, VOIDmode, VOIDmode,
17936 				 RS6000_BUILTIN_RSQRT, "__builtin_rsqrt");
17937   def_builtin ("__builtin_rsqrt", ftype, RS6000_BUILTIN_RSQRT);
17938 
17939   ftype = builtin_function_type (SFmode, SFmode, VOIDmode, VOIDmode,
17940 				 RS6000_BUILTIN_RSQRTF, "__builtin_rsqrtf");
17941   def_builtin ("__builtin_rsqrtf", ftype, RS6000_BUILTIN_RSQRTF);
17942 
17943   mode = (TARGET_64BIT) ? DImode : SImode;
17944   ftype = builtin_function_type (mode, mode, mode, VOIDmode,
17945 				 POWER7_BUILTIN_BPERMD, "__builtin_bpermd");
17946   def_builtin ("__builtin_bpermd", ftype, POWER7_BUILTIN_BPERMD);
17947 
17948   ftype = build_function_type_list (unsigned_intDI_type_node,
17949 				    NULL_TREE);
17950   def_builtin ("__builtin_ppc_get_timebase", ftype, RS6000_BUILTIN_GET_TB);
17951 
17952   if (TARGET_64BIT)
17953     ftype = build_function_type_list (unsigned_intDI_type_node,
17954 				      NULL_TREE);
17955   else
17956     ftype = build_function_type_list (unsigned_intSI_type_node,
17957 				      NULL_TREE);
17958   def_builtin ("__builtin_ppc_mftb", ftype, RS6000_BUILTIN_MFTB);
17959 
17960   ftype = build_function_type_list (double_type_node, NULL_TREE);
17961   def_builtin ("__builtin_mffs", ftype, RS6000_BUILTIN_MFFS);
17962 
17963   ftype = build_function_type_list (void_type_node,
17964 				    intSI_type_node, double_type_node,
17965 				    NULL_TREE);
17966   def_builtin ("__builtin_mtfsf", ftype, RS6000_BUILTIN_MTFSF);
17967 
17968   ftype = build_function_type_list (void_type_node, NULL_TREE);
17969   def_builtin ("__builtin_cpu_init", ftype, RS6000_BUILTIN_CPU_INIT);
17970 
17971   ftype = build_function_type_list (bool_int_type_node, const_ptr_type_node,
17972 				    NULL_TREE);
17973   def_builtin ("__builtin_cpu_is", ftype, RS6000_BUILTIN_CPU_IS);
17974   def_builtin ("__builtin_cpu_supports", ftype, RS6000_BUILTIN_CPU_SUPPORTS);
17975 
17976   /* AIX libm provides clog as __clog.  */
17977   if (TARGET_XCOFF &&
17978       (tdecl = builtin_decl_explicit (BUILT_IN_CLOG)) != NULL_TREE)
17979     set_user_assembler_name (tdecl, "__clog");
17980 
17981 #ifdef SUBTARGET_INIT_BUILTINS
17982   SUBTARGET_INIT_BUILTINS;
17983 #endif
17984 }
17985 
17986 /* Returns the rs6000 builtin decl for CODE.  */
17987 
17988 static tree
rs6000_builtin_decl(unsigned code,bool initialize_p ATTRIBUTE_UNUSED)17989 rs6000_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
17990 {
17991   HOST_WIDE_INT fnmask;
17992 
17993   if (code >= RS6000_BUILTIN_COUNT)
17994     return error_mark_node;
17995 
17996   fnmask = rs6000_builtin_info[code].mask;
17997   if ((fnmask & rs6000_builtin_mask) != fnmask)
17998     {
17999       rs6000_invalid_builtin ((enum rs6000_builtins)code);
18000       return error_mark_node;
18001     }
18002 
18003   return rs6000_builtin_decls[code];
18004 }
18005 
18006 static void
spe_init_builtins(void)18007 spe_init_builtins (void)
18008 {
18009   tree puint_type_node = build_pointer_type (unsigned_type_node);
18010   tree pushort_type_node = build_pointer_type (short_unsigned_type_node);
18011   const struct builtin_description *d;
18012   size_t i;
18013   HOST_WIDE_INT builtin_mask = rs6000_builtin_mask;
18014 
18015   tree v2si_ftype_4_v2si
18016     = build_function_type_list (opaque_V2SI_type_node,
18017                                 opaque_V2SI_type_node,
18018                                 opaque_V2SI_type_node,
18019                                 opaque_V2SI_type_node,
18020                                 opaque_V2SI_type_node,
18021                                 NULL_TREE);
18022 
18023   tree v2sf_ftype_4_v2sf
18024     = build_function_type_list (opaque_V2SF_type_node,
18025                                 opaque_V2SF_type_node,
18026                                 opaque_V2SF_type_node,
18027                                 opaque_V2SF_type_node,
18028                                 opaque_V2SF_type_node,
18029                                 NULL_TREE);
18030 
18031   tree int_ftype_int_v2si_v2si
18032     = build_function_type_list (integer_type_node,
18033                                 integer_type_node,
18034                                 opaque_V2SI_type_node,
18035                                 opaque_V2SI_type_node,
18036                                 NULL_TREE);
18037 
18038   tree int_ftype_int_v2sf_v2sf
18039     = build_function_type_list (integer_type_node,
18040                                 integer_type_node,
18041                                 opaque_V2SF_type_node,
18042                                 opaque_V2SF_type_node,
18043                                 NULL_TREE);
18044 
18045   tree void_ftype_v2si_puint_int
18046     = build_function_type_list (void_type_node,
18047                                 opaque_V2SI_type_node,
18048                                 puint_type_node,
18049                                 integer_type_node,
18050                                 NULL_TREE);
18051 
18052   tree void_ftype_v2si_puint_char
18053     = build_function_type_list (void_type_node,
18054                                 opaque_V2SI_type_node,
18055                                 puint_type_node,
18056                                 char_type_node,
18057                                 NULL_TREE);
18058 
18059   tree void_ftype_v2si_pv2si_int
18060     = build_function_type_list (void_type_node,
18061                                 opaque_V2SI_type_node,
18062                                 opaque_p_V2SI_type_node,
18063                                 integer_type_node,
18064                                 NULL_TREE);
18065 
18066   tree void_ftype_v2si_pv2si_char
18067     = build_function_type_list (void_type_node,
18068                                 opaque_V2SI_type_node,
18069                                 opaque_p_V2SI_type_node,
18070                                 char_type_node,
18071                                 NULL_TREE);
18072 
18073   tree void_ftype_int
18074     = build_function_type_list (void_type_node, integer_type_node, NULL_TREE);
18075 
18076   tree int_ftype_void
18077     = build_function_type_list (integer_type_node, NULL_TREE);
18078 
18079   tree v2si_ftype_pv2si_int
18080     = build_function_type_list (opaque_V2SI_type_node,
18081                                 opaque_p_V2SI_type_node,
18082                                 integer_type_node,
18083                                 NULL_TREE);
18084 
18085   tree v2si_ftype_puint_int
18086     = build_function_type_list (opaque_V2SI_type_node,
18087                                 puint_type_node,
18088                                 integer_type_node,
18089                                 NULL_TREE);
18090 
18091   tree v2si_ftype_pushort_int
18092     = build_function_type_list (opaque_V2SI_type_node,
18093                                 pushort_type_node,
18094                                 integer_type_node,
18095                                 NULL_TREE);
18096 
18097   tree v2si_ftype_signed_char
18098     = build_function_type_list (opaque_V2SI_type_node,
18099                                 signed_char_type_node,
18100                                 NULL_TREE);
18101 
18102   add_builtin_type ("__ev64_opaque__", opaque_V2SI_type_node);
18103 
18104   /* Initialize irregular SPE builtins.  */
18105 
18106   def_builtin ("__builtin_spe_mtspefscr", void_ftype_int, SPE_BUILTIN_MTSPEFSCR);
18107   def_builtin ("__builtin_spe_mfspefscr", int_ftype_void, SPE_BUILTIN_MFSPEFSCR);
18108   def_builtin ("__builtin_spe_evstddx", void_ftype_v2si_pv2si_int, SPE_BUILTIN_EVSTDDX);
18109   def_builtin ("__builtin_spe_evstdhx", void_ftype_v2si_pv2si_int, SPE_BUILTIN_EVSTDHX);
18110   def_builtin ("__builtin_spe_evstdwx", void_ftype_v2si_pv2si_int, SPE_BUILTIN_EVSTDWX);
18111   def_builtin ("__builtin_spe_evstwhex", void_ftype_v2si_puint_int, SPE_BUILTIN_EVSTWHEX);
18112   def_builtin ("__builtin_spe_evstwhox", void_ftype_v2si_puint_int, SPE_BUILTIN_EVSTWHOX);
18113   def_builtin ("__builtin_spe_evstwwex", void_ftype_v2si_puint_int, SPE_BUILTIN_EVSTWWEX);
18114   def_builtin ("__builtin_spe_evstwwox", void_ftype_v2si_puint_int, SPE_BUILTIN_EVSTWWOX);
18115   def_builtin ("__builtin_spe_evstdd", void_ftype_v2si_pv2si_char, SPE_BUILTIN_EVSTDD);
18116   def_builtin ("__builtin_spe_evstdh", void_ftype_v2si_pv2si_char, SPE_BUILTIN_EVSTDH);
18117   def_builtin ("__builtin_spe_evstdw", void_ftype_v2si_pv2si_char, SPE_BUILTIN_EVSTDW);
18118   def_builtin ("__builtin_spe_evstwhe", void_ftype_v2si_puint_char, SPE_BUILTIN_EVSTWHE);
18119   def_builtin ("__builtin_spe_evstwho", void_ftype_v2si_puint_char, SPE_BUILTIN_EVSTWHO);
18120   def_builtin ("__builtin_spe_evstwwe", void_ftype_v2si_puint_char, SPE_BUILTIN_EVSTWWE);
18121   def_builtin ("__builtin_spe_evstwwo", void_ftype_v2si_puint_char, SPE_BUILTIN_EVSTWWO);
18122   def_builtin ("__builtin_spe_evsplatfi", v2si_ftype_signed_char, SPE_BUILTIN_EVSPLATFI);
18123   def_builtin ("__builtin_spe_evsplati", v2si_ftype_signed_char, SPE_BUILTIN_EVSPLATI);
18124 
18125   /* Loads.  */
18126   def_builtin ("__builtin_spe_evlddx", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDDX);
18127   def_builtin ("__builtin_spe_evldwx", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDWX);
18128   def_builtin ("__builtin_spe_evldhx", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDHX);
18129   def_builtin ("__builtin_spe_evlwhex", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHEX);
18130   def_builtin ("__builtin_spe_evlwhoux", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHOUX);
18131   def_builtin ("__builtin_spe_evlwhosx", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHOSX);
18132   def_builtin ("__builtin_spe_evlwwsplatx", v2si_ftype_puint_int, SPE_BUILTIN_EVLWWSPLATX);
18133   def_builtin ("__builtin_spe_evlwhsplatx", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHSPLATX);
18134   def_builtin ("__builtin_spe_evlhhesplatx", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHESPLATX);
18135   def_builtin ("__builtin_spe_evlhhousplatx", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHOUSPLATX);
18136   def_builtin ("__builtin_spe_evlhhossplatx", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHOSSPLATX);
18137   def_builtin ("__builtin_spe_evldd", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDD);
18138   def_builtin ("__builtin_spe_evldw", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDW);
18139   def_builtin ("__builtin_spe_evldh", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDH);
18140   def_builtin ("__builtin_spe_evlhhesplat", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHESPLAT);
18141   def_builtin ("__builtin_spe_evlhhossplat", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHOSSPLAT);
18142   def_builtin ("__builtin_spe_evlhhousplat", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHOUSPLAT);
18143   def_builtin ("__builtin_spe_evlwhe", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHE);
18144   def_builtin ("__builtin_spe_evlwhos", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHOS);
18145   def_builtin ("__builtin_spe_evlwhou", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHOU);
18146   def_builtin ("__builtin_spe_evlwhsplat", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHSPLAT);
18147   def_builtin ("__builtin_spe_evlwwsplat", v2si_ftype_puint_int, SPE_BUILTIN_EVLWWSPLAT);
18148 
18149   /* Predicates.  */
18150   d = bdesc_spe_predicates;
18151   for (i = 0; i < ARRAY_SIZE (bdesc_spe_predicates); ++i, d++)
18152     {
18153       tree type;
18154       HOST_WIDE_INT mask = d->mask;
18155 
18156       if ((mask & builtin_mask) != mask)
18157 	{
18158 	  if (TARGET_DEBUG_BUILTIN)
18159 	    fprintf (stderr, "spe_init_builtins, skip predicate %s\n",
18160 		     d->name);
18161 	  continue;
18162 	}
18163 
18164       /* Cannot define builtin if the instruction is disabled.  */
18165       gcc_assert (d->icode != CODE_FOR_nothing);
18166       switch (insn_data[d->icode].operand[1].mode)
18167 	{
18168 	case E_V2SImode:
18169 	  type = int_ftype_int_v2si_v2si;
18170 	  break;
18171 	case E_V2SFmode:
18172 	  type = int_ftype_int_v2sf_v2sf;
18173 	  break;
18174 	default:
18175 	  gcc_unreachable ();
18176 	}
18177 
18178       def_builtin (d->name, type, d->code);
18179     }
18180 
18181   /* Evsel predicates.  */
18182   d = bdesc_spe_evsel;
18183   for (i = 0; i < ARRAY_SIZE (bdesc_spe_evsel); ++i, d++)
18184     {
18185       tree type;
18186       HOST_WIDE_INT mask = d->mask;
18187 
18188       if ((mask & builtin_mask) != mask)
18189 	{
18190 	  if (TARGET_DEBUG_BUILTIN)
18191 	    fprintf (stderr, "spe_init_builtins, skip evsel %s\n",
18192 		     d->name);
18193 	  continue;
18194 	}
18195 
18196       /* Cannot define builtin if the instruction is disabled.  */
18197       gcc_assert (d->icode != CODE_FOR_nothing);
18198       switch (insn_data[d->icode].operand[1].mode)
18199 	{
18200 	case E_V2SImode:
18201 	  type = v2si_ftype_4_v2si;
18202 	  break;
18203 	case E_V2SFmode:
18204 	  type = v2sf_ftype_4_v2sf;
18205 	  break;
18206 	default:
18207 	  gcc_unreachable ();
18208 	}
18209 
18210       def_builtin (d->name, type, d->code);
18211     }
18212 }
18213 
18214 static void
paired_init_builtins(void)18215 paired_init_builtins (void)
18216 {
18217   const struct builtin_description *d;
18218   size_t i;
18219   HOST_WIDE_INT builtin_mask = rs6000_builtin_mask;
18220 
18221    tree int_ftype_int_v2sf_v2sf
18222     = build_function_type_list (integer_type_node,
18223                                 integer_type_node,
18224                                 V2SF_type_node,
18225                                 V2SF_type_node,
18226                                 NULL_TREE);
18227   tree pcfloat_type_node =
18228     build_pointer_type (build_qualified_type
18229 			(float_type_node, TYPE_QUAL_CONST));
18230 
18231   tree v2sf_ftype_long_pcfloat = build_function_type_list (V2SF_type_node,
18232 							   long_integer_type_node,
18233 							   pcfloat_type_node,
18234 							   NULL_TREE);
18235   tree void_ftype_v2sf_long_pcfloat =
18236     build_function_type_list (void_type_node,
18237 			      V2SF_type_node,
18238 			      long_integer_type_node,
18239 			      pcfloat_type_node,
18240 			      NULL_TREE);
18241 
18242 
18243   def_builtin ("__builtin_paired_lx", v2sf_ftype_long_pcfloat,
18244 	       PAIRED_BUILTIN_LX);
18245 
18246 
18247   def_builtin ("__builtin_paired_stx", void_ftype_v2sf_long_pcfloat,
18248 	       PAIRED_BUILTIN_STX);
18249 
18250   /* Predicates.  */
18251   d = bdesc_paired_preds;
18252   for (i = 0; i < ARRAY_SIZE (bdesc_paired_preds); ++i, d++)
18253     {
18254       tree type;
18255       HOST_WIDE_INT mask = d->mask;
18256 
18257       if ((mask & builtin_mask) != mask)
18258 	{
18259 	  if (TARGET_DEBUG_BUILTIN)
18260 	    fprintf (stderr, "paired_init_builtins, skip predicate %s\n",
18261 		     d->name);
18262 	  continue;
18263 	}
18264 
18265       /* Cannot define builtin if the instruction is disabled.  */
18266       gcc_assert (d->icode != CODE_FOR_nothing);
18267 
18268       if (TARGET_DEBUG_BUILTIN)
18269 	fprintf (stderr, "paired pred #%d, insn = %s [%d], mode = %s\n",
18270 		 (int)i, get_insn_name (d->icode), (int)d->icode,
18271 		 GET_MODE_NAME (insn_data[d->icode].operand[1].mode));
18272 
18273       switch (insn_data[d->icode].operand[1].mode)
18274 	{
18275 	case E_V2SFmode:
18276 	  type = int_ftype_int_v2sf_v2sf;
18277 	  break;
18278 	default:
18279 	  gcc_unreachable ();
18280 	}
18281 
18282       def_builtin (d->name, type, d->code);
18283     }
18284 }
18285 
18286 static void
altivec_init_builtins(void)18287 altivec_init_builtins (void)
18288 {
18289   const struct builtin_description *d;
18290   size_t i;
18291   tree ftype;
18292   tree decl;
18293   HOST_WIDE_INT builtin_mask = rs6000_builtin_mask;
18294 
18295   tree pvoid_type_node = build_pointer_type (void_type_node);
18296 
18297   tree pcvoid_type_node
18298     = build_pointer_type (build_qualified_type (void_type_node,
18299 						TYPE_QUAL_CONST));
18300 
18301   tree int_ftype_opaque
18302     = build_function_type_list (integer_type_node,
18303 				opaque_V4SI_type_node, NULL_TREE);
18304   tree opaque_ftype_opaque
18305     = build_function_type_list (integer_type_node, NULL_TREE);
18306   tree opaque_ftype_opaque_int
18307     = build_function_type_list (opaque_V4SI_type_node,
18308 				opaque_V4SI_type_node, integer_type_node, NULL_TREE);
18309   tree opaque_ftype_opaque_opaque_int
18310     = build_function_type_list (opaque_V4SI_type_node,
18311 				opaque_V4SI_type_node, opaque_V4SI_type_node,
18312 				integer_type_node, NULL_TREE);
18313   tree opaque_ftype_opaque_opaque_opaque
18314     = build_function_type_list (opaque_V4SI_type_node,
18315 				opaque_V4SI_type_node, opaque_V4SI_type_node,
18316 				opaque_V4SI_type_node, NULL_TREE);
18317   tree opaque_ftype_opaque_opaque
18318     = build_function_type_list (opaque_V4SI_type_node,
18319 				opaque_V4SI_type_node, opaque_V4SI_type_node,
18320 				NULL_TREE);
18321   tree int_ftype_int_opaque_opaque
18322     = build_function_type_list (integer_type_node,
18323                                 integer_type_node, opaque_V4SI_type_node,
18324                                 opaque_V4SI_type_node, NULL_TREE);
18325   tree int_ftype_int_v4si_v4si
18326     = build_function_type_list (integer_type_node,
18327 				integer_type_node, V4SI_type_node,
18328 				V4SI_type_node, NULL_TREE);
18329   tree int_ftype_int_v2di_v2di
18330     = build_function_type_list (integer_type_node,
18331 				integer_type_node, V2DI_type_node,
18332 				V2DI_type_node, NULL_TREE);
18333   tree void_ftype_v4si
18334     = build_function_type_list (void_type_node, V4SI_type_node, NULL_TREE);
18335   tree v8hi_ftype_void
18336     = build_function_type_list (V8HI_type_node, NULL_TREE);
18337   tree void_ftype_void
18338     = build_function_type_list (void_type_node, NULL_TREE);
18339   tree void_ftype_int
18340     = build_function_type_list (void_type_node, integer_type_node, NULL_TREE);
18341 
18342   tree opaque_ftype_long_pcvoid
18343     = build_function_type_list (opaque_V4SI_type_node,
18344 				long_integer_type_node, pcvoid_type_node,
18345 				NULL_TREE);
18346   tree v16qi_ftype_long_pcvoid
18347     = build_function_type_list (V16QI_type_node,
18348 				long_integer_type_node, pcvoid_type_node,
18349 				NULL_TREE);
18350   tree v8hi_ftype_long_pcvoid
18351     = build_function_type_list (V8HI_type_node,
18352 				long_integer_type_node, pcvoid_type_node,
18353 				NULL_TREE);
18354   tree v4si_ftype_long_pcvoid
18355     = build_function_type_list (V4SI_type_node,
18356 				long_integer_type_node, pcvoid_type_node,
18357 				NULL_TREE);
18358   tree v4sf_ftype_long_pcvoid
18359     = build_function_type_list (V4SF_type_node,
18360 				long_integer_type_node, pcvoid_type_node,
18361 				NULL_TREE);
18362   tree v2df_ftype_long_pcvoid
18363     = build_function_type_list (V2DF_type_node,
18364 				long_integer_type_node, pcvoid_type_node,
18365 				NULL_TREE);
18366   tree v2di_ftype_long_pcvoid
18367     = build_function_type_list (V2DI_type_node,
18368 				long_integer_type_node, pcvoid_type_node,
18369 				NULL_TREE);
18370 
18371   tree void_ftype_opaque_long_pvoid
18372     = build_function_type_list (void_type_node,
18373 				opaque_V4SI_type_node, long_integer_type_node,
18374 				pvoid_type_node, NULL_TREE);
18375   tree void_ftype_v4si_long_pvoid
18376     = build_function_type_list (void_type_node,
18377 				V4SI_type_node, long_integer_type_node,
18378 				pvoid_type_node, NULL_TREE);
18379   tree void_ftype_v16qi_long_pvoid
18380     = build_function_type_list (void_type_node,
18381 				V16QI_type_node, long_integer_type_node,
18382 				pvoid_type_node, NULL_TREE);
18383 
18384   tree void_ftype_v16qi_pvoid_long
18385     = build_function_type_list (void_type_node,
18386 				V16QI_type_node, pvoid_type_node,
18387 				long_integer_type_node, NULL_TREE);
18388 
18389   tree void_ftype_v8hi_long_pvoid
18390     = build_function_type_list (void_type_node,
18391 				V8HI_type_node, long_integer_type_node,
18392 				pvoid_type_node, NULL_TREE);
18393   tree void_ftype_v4sf_long_pvoid
18394     = build_function_type_list (void_type_node,
18395 				V4SF_type_node, long_integer_type_node,
18396 				pvoid_type_node, NULL_TREE);
18397   tree void_ftype_v2df_long_pvoid
18398     = build_function_type_list (void_type_node,
18399 				V2DF_type_node, long_integer_type_node,
18400 				pvoid_type_node, NULL_TREE);
18401   tree void_ftype_v2di_long_pvoid
18402     = build_function_type_list (void_type_node,
18403 				V2DI_type_node, long_integer_type_node,
18404 				pvoid_type_node, NULL_TREE);
18405   tree int_ftype_int_v8hi_v8hi
18406     = build_function_type_list (integer_type_node,
18407 				integer_type_node, V8HI_type_node,
18408 				V8HI_type_node, NULL_TREE);
18409   tree int_ftype_int_v16qi_v16qi
18410     = build_function_type_list (integer_type_node,
18411 				integer_type_node, V16QI_type_node,
18412 				V16QI_type_node, NULL_TREE);
18413   tree int_ftype_int_v4sf_v4sf
18414     = build_function_type_list (integer_type_node,
18415 				integer_type_node, V4SF_type_node,
18416 				V4SF_type_node, NULL_TREE);
18417   tree int_ftype_int_v2df_v2df
18418     = build_function_type_list (integer_type_node,
18419 				integer_type_node, V2DF_type_node,
18420 				V2DF_type_node, NULL_TREE);
18421   tree v2di_ftype_v2di
18422     = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
18423   tree v4si_ftype_v4si
18424     = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
18425   tree v8hi_ftype_v8hi
18426     = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
18427   tree v16qi_ftype_v16qi
18428     = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
18429   tree v4sf_ftype_v4sf
18430     = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
18431   tree v2df_ftype_v2df
18432     = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
18433   tree void_ftype_pcvoid_int_int
18434     = build_function_type_list (void_type_node,
18435 				pcvoid_type_node, integer_type_node,
18436 				integer_type_node, NULL_TREE);
18437 
18438   def_builtin ("__builtin_altivec_mtvscr", void_ftype_v4si, ALTIVEC_BUILTIN_MTVSCR);
18439   def_builtin ("__builtin_altivec_mfvscr", v8hi_ftype_void, ALTIVEC_BUILTIN_MFVSCR);
18440   def_builtin ("__builtin_altivec_dssall", void_ftype_void, ALTIVEC_BUILTIN_DSSALL);
18441   def_builtin ("__builtin_altivec_dss", void_ftype_int, ALTIVEC_BUILTIN_DSS);
18442   def_builtin ("__builtin_altivec_lvsl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVSL);
18443   def_builtin ("__builtin_altivec_lvsr", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVSR);
18444   def_builtin ("__builtin_altivec_lvebx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEBX);
18445   def_builtin ("__builtin_altivec_lvehx", v8hi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEHX);
18446   def_builtin ("__builtin_altivec_lvewx", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEWX);
18447   def_builtin ("__builtin_altivec_lvxl", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVXL);
18448   def_builtin ("__builtin_altivec_lvxl_v2df", v2df_ftype_long_pcvoid,
18449 	       ALTIVEC_BUILTIN_LVXL_V2DF);
18450   def_builtin ("__builtin_altivec_lvxl_v2di", v2di_ftype_long_pcvoid,
18451 	       ALTIVEC_BUILTIN_LVXL_V2DI);
18452   def_builtin ("__builtin_altivec_lvxl_v4sf", v4sf_ftype_long_pcvoid,
18453 	       ALTIVEC_BUILTIN_LVXL_V4SF);
18454   def_builtin ("__builtin_altivec_lvxl_v4si", v4si_ftype_long_pcvoid,
18455 	       ALTIVEC_BUILTIN_LVXL_V4SI);
18456   def_builtin ("__builtin_altivec_lvxl_v8hi", v8hi_ftype_long_pcvoid,
18457 	       ALTIVEC_BUILTIN_LVXL_V8HI);
18458   def_builtin ("__builtin_altivec_lvxl_v16qi", v16qi_ftype_long_pcvoid,
18459 	       ALTIVEC_BUILTIN_LVXL_V16QI);
18460   def_builtin ("__builtin_altivec_lvx", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVX);
18461   def_builtin ("__builtin_altivec_lvx_v2df", v2df_ftype_long_pcvoid,
18462 	       ALTIVEC_BUILTIN_LVX_V2DF);
18463   def_builtin ("__builtin_altivec_lvx_v2di", v2di_ftype_long_pcvoid,
18464 	       ALTIVEC_BUILTIN_LVX_V2DI);
18465   def_builtin ("__builtin_altivec_lvx_v4sf", v4sf_ftype_long_pcvoid,
18466 	       ALTIVEC_BUILTIN_LVX_V4SF);
18467   def_builtin ("__builtin_altivec_lvx_v4si", v4si_ftype_long_pcvoid,
18468 	       ALTIVEC_BUILTIN_LVX_V4SI);
18469   def_builtin ("__builtin_altivec_lvx_v8hi", v8hi_ftype_long_pcvoid,
18470 	       ALTIVEC_BUILTIN_LVX_V8HI);
18471   def_builtin ("__builtin_altivec_lvx_v16qi", v16qi_ftype_long_pcvoid,
18472 	       ALTIVEC_BUILTIN_LVX_V16QI);
18473   def_builtin ("__builtin_altivec_stvx", void_ftype_v4si_long_pvoid, ALTIVEC_BUILTIN_STVX);
18474   def_builtin ("__builtin_altivec_stvx_v2df", void_ftype_v2df_long_pvoid,
18475 	       ALTIVEC_BUILTIN_STVX_V2DF);
18476   def_builtin ("__builtin_altivec_stvx_v2di", void_ftype_v2di_long_pvoid,
18477 	       ALTIVEC_BUILTIN_STVX_V2DI);
18478   def_builtin ("__builtin_altivec_stvx_v4sf", void_ftype_v4sf_long_pvoid,
18479 	       ALTIVEC_BUILTIN_STVX_V4SF);
18480   def_builtin ("__builtin_altivec_stvx_v4si", void_ftype_v4si_long_pvoid,
18481 	       ALTIVEC_BUILTIN_STVX_V4SI);
18482   def_builtin ("__builtin_altivec_stvx_v8hi", void_ftype_v8hi_long_pvoid,
18483 	       ALTIVEC_BUILTIN_STVX_V8HI);
18484   def_builtin ("__builtin_altivec_stvx_v16qi", void_ftype_v16qi_long_pvoid,
18485 	       ALTIVEC_BUILTIN_STVX_V16QI);
18486   def_builtin ("__builtin_altivec_stvewx", void_ftype_v4si_long_pvoid, ALTIVEC_BUILTIN_STVEWX);
18487   def_builtin ("__builtin_altivec_stvxl", void_ftype_v4si_long_pvoid, ALTIVEC_BUILTIN_STVXL);
18488   def_builtin ("__builtin_altivec_stvxl_v2df", void_ftype_v2df_long_pvoid,
18489 	       ALTIVEC_BUILTIN_STVXL_V2DF);
18490   def_builtin ("__builtin_altivec_stvxl_v2di", void_ftype_v2di_long_pvoid,
18491 	       ALTIVEC_BUILTIN_STVXL_V2DI);
18492   def_builtin ("__builtin_altivec_stvxl_v4sf", void_ftype_v4sf_long_pvoid,
18493 	       ALTIVEC_BUILTIN_STVXL_V4SF);
18494   def_builtin ("__builtin_altivec_stvxl_v4si", void_ftype_v4si_long_pvoid,
18495 	       ALTIVEC_BUILTIN_STVXL_V4SI);
18496   def_builtin ("__builtin_altivec_stvxl_v8hi", void_ftype_v8hi_long_pvoid,
18497 	       ALTIVEC_BUILTIN_STVXL_V8HI);
18498   def_builtin ("__builtin_altivec_stvxl_v16qi", void_ftype_v16qi_long_pvoid,
18499 	       ALTIVEC_BUILTIN_STVXL_V16QI);
18500   def_builtin ("__builtin_altivec_stvebx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVEBX);
18501   def_builtin ("__builtin_altivec_stvehx", void_ftype_v8hi_long_pvoid, ALTIVEC_BUILTIN_STVEHX);
18502   def_builtin ("__builtin_vec_ld", opaque_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LD);
18503   def_builtin ("__builtin_vec_lde", opaque_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LDE);
18504   def_builtin ("__builtin_vec_ldl", opaque_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LDL);
18505   def_builtin ("__builtin_vec_lvsl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVSL);
18506   def_builtin ("__builtin_vec_lvsr", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVSR);
18507   def_builtin ("__builtin_vec_lvebx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVEBX);
18508   def_builtin ("__builtin_vec_lvehx", v8hi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVEHX);
18509   def_builtin ("__builtin_vec_lvewx", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVEWX);
18510   def_builtin ("__builtin_vec_st", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_ST);
18511   def_builtin ("__builtin_vec_ste", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STE);
18512   def_builtin ("__builtin_vec_stl", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STL);
18513   def_builtin ("__builtin_vec_stvewx", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STVEWX);
18514   def_builtin ("__builtin_vec_stvebx", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STVEBX);
18515   def_builtin ("__builtin_vec_stvehx", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STVEHX);
18516 
18517   def_builtin ("__builtin_vsx_lxvd2x_v2df", v2df_ftype_long_pcvoid,
18518 	       VSX_BUILTIN_LXVD2X_V2DF);
18519   def_builtin ("__builtin_vsx_lxvd2x_v2di", v2di_ftype_long_pcvoid,
18520 	       VSX_BUILTIN_LXVD2X_V2DI);
18521   def_builtin ("__builtin_vsx_lxvw4x_v4sf", v4sf_ftype_long_pcvoid,
18522 	       VSX_BUILTIN_LXVW4X_V4SF);
18523   def_builtin ("__builtin_vsx_lxvw4x_v4si", v4si_ftype_long_pcvoid,
18524 	       VSX_BUILTIN_LXVW4X_V4SI);
18525   def_builtin ("__builtin_vsx_lxvw4x_v8hi", v8hi_ftype_long_pcvoid,
18526 	       VSX_BUILTIN_LXVW4X_V8HI);
18527   def_builtin ("__builtin_vsx_lxvw4x_v16qi", v16qi_ftype_long_pcvoid,
18528 	       VSX_BUILTIN_LXVW4X_V16QI);
18529   def_builtin ("__builtin_vsx_stxvd2x_v2df", void_ftype_v2df_long_pvoid,
18530 	       VSX_BUILTIN_STXVD2X_V2DF);
18531   def_builtin ("__builtin_vsx_stxvd2x_v2di", void_ftype_v2di_long_pvoid,
18532 	       VSX_BUILTIN_STXVD2X_V2DI);
18533   def_builtin ("__builtin_vsx_stxvw4x_v4sf", void_ftype_v4sf_long_pvoid,
18534 	       VSX_BUILTIN_STXVW4X_V4SF);
18535   def_builtin ("__builtin_vsx_stxvw4x_v4si", void_ftype_v4si_long_pvoid,
18536 	       VSX_BUILTIN_STXVW4X_V4SI);
18537   def_builtin ("__builtin_vsx_stxvw4x_v8hi", void_ftype_v8hi_long_pvoid,
18538 	       VSX_BUILTIN_STXVW4X_V8HI);
18539   def_builtin ("__builtin_vsx_stxvw4x_v16qi", void_ftype_v16qi_long_pvoid,
18540 	       VSX_BUILTIN_STXVW4X_V16QI);
18541 
18542   def_builtin ("__builtin_vsx_ld_elemrev_v2df", v2df_ftype_long_pcvoid,
18543 	       VSX_BUILTIN_LD_ELEMREV_V2DF);
18544   def_builtin ("__builtin_vsx_ld_elemrev_v2di", v2di_ftype_long_pcvoid,
18545 	       VSX_BUILTIN_LD_ELEMREV_V2DI);
18546   def_builtin ("__builtin_vsx_ld_elemrev_v4sf", v4sf_ftype_long_pcvoid,
18547 	       VSX_BUILTIN_LD_ELEMREV_V4SF);
18548   def_builtin ("__builtin_vsx_ld_elemrev_v4si", v4si_ftype_long_pcvoid,
18549 	       VSX_BUILTIN_LD_ELEMREV_V4SI);
18550   def_builtin ("__builtin_vsx_st_elemrev_v2df", void_ftype_v2df_long_pvoid,
18551 	       VSX_BUILTIN_ST_ELEMREV_V2DF);
18552   def_builtin ("__builtin_vsx_st_elemrev_v2di", void_ftype_v2di_long_pvoid,
18553 	       VSX_BUILTIN_ST_ELEMREV_V2DI);
18554   def_builtin ("__builtin_vsx_st_elemrev_v4sf", void_ftype_v4sf_long_pvoid,
18555 	       VSX_BUILTIN_ST_ELEMREV_V4SF);
18556   def_builtin ("__builtin_vsx_st_elemrev_v4si", void_ftype_v4si_long_pvoid,
18557 	       VSX_BUILTIN_ST_ELEMREV_V4SI);
18558 
18559   if (TARGET_P9_VECTOR)
18560     {
18561       def_builtin ("__builtin_vsx_ld_elemrev_v8hi", v8hi_ftype_long_pcvoid,
18562 		   VSX_BUILTIN_LD_ELEMREV_V8HI);
18563       def_builtin ("__builtin_vsx_ld_elemrev_v16qi", v16qi_ftype_long_pcvoid,
18564 		   VSX_BUILTIN_LD_ELEMREV_V16QI);
18565       def_builtin ("__builtin_vsx_st_elemrev_v8hi",
18566 		   void_ftype_v8hi_long_pvoid, VSX_BUILTIN_ST_ELEMREV_V8HI);
18567       def_builtin ("__builtin_vsx_st_elemrev_v16qi",
18568 		   void_ftype_v16qi_long_pvoid, VSX_BUILTIN_ST_ELEMREV_V16QI);
18569     }
18570   else
18571     {
18572       rs6000_builtin_decls[(int) VSX_BUILTIN_LD_ELEMREV_V8HI]
18573 	= rs6000_builtin_decls[(int) VSX_BUILTIN_LXVW4X_V8HI];
18574       rs6000_builtin_decls[(int) VSX_BUILTIN_LD_ELEMREV_V16QI]
18575 	= rs6000_builtin_decls[(int) VSX_BUILTIN_LXVW4X_V16QI];
18576       rs6000_builtin_decls[(int) VSX_BUILTIN_ST_ELEMREV_V8HI]
18577 	= rs6000_builtin_decls[(int) VSX_BUILTIN_STXVW4X_V8HI];
18578       rs6000_builtin_decls[(int) VSX_BUILTIN_ST_ELEMREV_V16QI]
18579 	= rs6000_builtin_decls[(int) VSX_BUILTIN_STXVW4X_V16QI];
18580     }
18581 
18582   def_builtin ("__builtin_vec_vsx_ld", opaque_ftype_long_pcvoid,
18583 	       VSX_BUILTIN_VEC_LD);
18584   def_builtin ("__builtin_vec_vsx_st", void_ftype_opaque_long_pvoid,
18585 	       VSX_BUILTIN_VEC_ST);
18586   def_builtin ("__builtin_vec_xl", opaque_ftype_long_pcvoid,
18587 	       VSX_BUILTIN_VEC_XL);
18588   def_builtin ("__builtin_vec_xst", void_ftype_opaque_long_pvoid,
18589 	       VSX_BUILTIN_VEC_XST);
18590 
18591   def_builtin ("__builtin_vec_step", int_ftype_opaque, ALTIVEC_BUILTIN_VEC_STEP);
18592   def_builtin ("__builtin_vec_splats", opaque_ftype_opaque, ALTIVEC_BUILTIN_VEC_SPLATS);
18593   def_builtin ("__builtin_vec_promote", opaque_ftype_opaque, ALTIVEC_BUILTIN_VEC_PROMOTE);
18594 
18595   def_builtin ("__builtin_vec_sld", opaque_ftype_opaque_opaque_int, ALTIVEC_BUILTIN_VEC_SLD);
18596   def_builtin ("__builtin_vec_splat", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_SPLAT);
18597   def_builtin ("__builtin_vec_extract", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_EXTRACT);
18598   def_builtin ("__builtin_vec_insert", opaque_ftype_opaque_opaque_int, ALTIVEC_BUILTIN_VEC_INSERT);
18599   def_builtin ("__builtin_vec_vspltw", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VSPLTW);
18600   def_builtin ("__builtin_vec_vsplth", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VSPLTH);
18601   def_builtin ("__builtin_vec_vspltb", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VSPLTB);
18602   def_builtin ("__builtin_vec_ctf", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_CTF);
18603   def_builtin ("__builtin_vec_vcfsx", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VCFSX);
18604   def_builtin ("__builtin_vec_vcfux", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VCFUX);
18605   def_builtin ("__builtin_vec_cts", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_CTS);
18606   def_builtin ("__builtin_vec_ctu", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_CTU);
18607 
18608   def_builtin ("__builtin_vec_adde", opaque_ftype_opaque_opaque_opaque,
18609 		ALTIVEC_BUILTIN_VEC_ADDE);
18610   def_builtin ("__builtin_vec_addec", opaque_ftype_opaque_opaque_opaque,
18611 		ALTIVEC_BUILTIN_VEC_ADDEC);
18612   def_builtin ("__builtin_vec_cmpne", opaque_ftype_opaque_opaque,
18613 		ALTIVEC_BUILTIN_VEC_CMPNE);
18614   def_builtin ("__builtin_vec_mul", opaque_ftype_opaque_opaque,
18615 		ALTIVEC_BUILTIN_VEC_MUL);
18616 
18617   /* Cell builtins.  */
18618   def_builtin ("__builtin_altivec_lvlx",  v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVLX);
18619   def_builtin ("__builtin_altivec_lvlxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVLXL);
18620   def_builtin ("__builtin_altivec_lvrx",  v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVRX);
18621   def_builtin ("__builtin_altivec_lvrxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVRXL);
18622 
18623   def_builtin ("__builtin_vec_lvlx",  v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVLX);
18624   def_builtin ("__builtin_vec_lvlxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVLXL);
18625   def_builtin ("__builtin_vec_lvrx",  v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVRX);
18626   def_builtin ("__builtin_vec_lvrxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVRXL);
18627 
18628   def_builtin ("__builtin_altivec_stvlx",  void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVLX);
18629   def_builtin ("__builtin_altivec_stvlxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVLXL);
18630   def_builtin ("__builtin_altivec_stvrx",  void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVRX);
18631   def_builtin ("__builtin_altivec_stvrxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVRXL);
18632 
18633   def_builtin ("__builtin_vec_stvlx",  void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVLX);
18634   def_builtin ("__builtin_vec_stvlxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVLXL);
18635   def_builtin ("__builtin_vec_stvrx",  void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVRX);
18636   def_builtin ("__builtin_vec_stvrxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVRXL);
18637 
18638   if (TARGET_P9_VECTOR)
18639     def_builtin ("__builtin_altivec_stxvl", void_ftype_v16qi_pvoid_long,
18640 		 P9V_BUILTIN_STXVL);
18641 
18642   /* Add the DST variants.  */
18643   d = bdesc_dst;
18644   for (i = 0; i < ARRAY_SIZE (bdesc_dst); i++, d++)
18645     {
18646       HOST_WIDE_INT mask = d->mask;
18647 
18648       /* It is expected that these dst built-in functions may have
18649 	 d->icode equal to CODE_FOR_nothing.  */
18650       if ((mask & builtin_mask) != mask)
18651 	{
18652 	  if (TARGET_DEBUG_BUILTIN)
18653 	    fprintf (stderr, "altivec_init_builtins, skip dst %s\n",
18654 		     d->name);
18655 	  continue;
18656 	}
18657       def_builtin (d->name, void_ftype_pcvoid_int_int, d->code);
18658     }
18659 
18660   /* Initialize the predicates.  */
18661   d = bdesc_altivec_preds;
18662   for (i = 0; i < ARRAY_SIZE (bdesc_altivec_preds); i++, d++)
18663     {
18664       machine_mode mode1;
18665       tree type;
18666       HOST_WIDE_INT mask = d->mask;
18667 
18668       if ((mask & builtin_mask) != mask)
18669 	{
18670 	  if (TARGET_DEBUG_BUILTIN)
18671 	    fprintf (stderr, "altivec_init_builtins, skip predicate %s\n",
18672 		     d->name);
18673 	  continue;
18674 	}
18675 
18676       if (rs6000_overloaded_builtin_p (d->code))
18677 	mode1 = VOIDmode;
18678       else
18679 	{
18680 	  /* Cannot define builtin if the instruction is disabled.  */
18681 	  gcc_assert (d->icode != CODE_FOR_nothing);
18682 	  mode1 = insn_data[d->icode].operand[1].mode;
18683 	}
18684 
18685       switch (mode1)
18686 	{
18687 	case E_VOIDmode:
18688 	  type = int_ftype_int_opaque_opaque;
18689 	  break;
18690 	case E_V2DImode:
18691 	  type = int_ftype_int_v2di_v2di;
18692 	  break;
18693 	case E_V4SImode:
18694 	  type = int_ftype_int_v4si_v4si;
18695 	  break;
18696 	case E_V8HImode:
18697 	  type = int_ftype_int_v8hi_v8hi;
18698 	  break;
18699 	case E_V16QImode:
18700 	  type = int_ftype_int_v16qi_v16qi;
18701 	  break;
18702 	case E_V4SFmode:
18703 	  type = int_ftype_int_v4sf_v4sf;
18704 	  break;
18705 	case E_V2DFmode:
18706 	  type = int_ftype_int_v2df_v2df;
18707 	  break;
18708 	default:
18709 	  gcc_unreachable ();
18710 	}
18711 
18712       def_builtin (d->name, type, d->code);
18713     }
18714 
18715   /* Initialize the abs* operators.  */
18716   d = bdesc_abs;
18717   for (i = 0; i < ARRAY_SIZE (bdesc_abs); i++, d++)
18718     {
18719       machine_mode mode0;
18720       tree type;
18721       HOST_WIDE_INT mask = d->mask;
18722 
18723       if ((mask & builtin_mask) != mask)
18724 	{
18725 	  if (TARGET_DEBUG_BUILTIN)
18726 	    fprintf (stderr, "altivec_init_builtins, skip abs %s\n",
18727 		     d->name);
18728 	  continue;
18729 	}
18730 
18731       /* Cannot define builtin if the instruction is disabled.  */
18732       gcc_assert (d->icode != CODE_FOR_nothing);
18733       mode0 = insn_data[d->icode].operand[0].mode;
18734 
18735       switch (mode0)
18736 	{
18737 	case E_V2DImode:
18738 	  type = v2di_ftype_v2di;
18739 	  break;
18740 	case E_V4SImode:
18741 	  type = v4si_ftype_v4si;
18742 	  break;
18743 	case E_V8HImode:
18744 	  type = v8hi_ftype_v8hi;
18745 	  break;
18746 	case E_V16QImode:
18747 	  type = v16qi_ftype_v16qi;
18748 	  break;
18749 	case E_V4SFmode:
18750 	  type = v4sf_ftype_v4sf;
18751 	  break;
18752 	case E_V2DFmode:
18753 	  type = v2df_ftype_v2df;
18754 	  break;
18755 	default:
18756 	  gcc_unreachable ();
18757 	}
18758 
18759       def_builtin (d->name, type, d->code);
18760     }
18761 
18762   /* Initialize target builtin that implements
18763      targetm.vectorize.builtin_mask_for_load.  */
18764 
18765   decl = add_builtin_function ("__builtin_altivec_mask_for_load",
18766 			       v16qi_ftype_long_pcvoid,
18767 			       ALTIVEC_BUILTIN_MASK_FOR_LOAD,
18768 			       BUILT_IN_MD, NULL, NULL_TREE);
18769   TREE_READONLY (decl) = 1;
18770   /* Record the decl. Will be used by rs6000_builtin_mask_for_load.  */
18771   altivec_builtin_mask_for_load = decl;
18772 
18773   /* Access to the vec_init patterns.  */
18774   ftype = build_function_type_list (V4SI_type_node, integer_type_node,
18775 				    integer_type_node, integer_type_node,
18776 				    integer_type_node, NULL_TREE);
18777   def_builtin ("__builtin_vec_init_v4si", ftype, ALTIVEC_BUILTIN_VEC_INIT_V4SI);
18778 
18779   ftype = build_function_type_list (V8HI_type_node, short_integer_type_node,
18780 				    short_integer_type_node,
18781 				    short_integer_type_node,
18782 				    short_integer_type_node,
18783 				    short_integer_type_node,
18784 				    short_integer_type_node,
18785 				    short_integer_type_node,
18786 				    short_integer_type_node, NULL_TREE);
18787   def_builtin ("__builtin_vec_init_v8hi", ftype, ALTIVEC_BUILTIN_VEC_INIT_V8HI);
18788 
18789   ftype = build_function_type_list (V16QI_type_node, char_type_node,
18790 				    char_type_node, char_type_node,
18791 				    char_type_node, char_type_node,
18792 				    char_type_node, char_type_node,
18793 				    char_type_node, char_type_node,
18794 				    char_type_node, char_type_node,
18795 				    char_type_node, char_type_node,
18796 				    char_type_node, char_type_node,
18797 				    char_type_node, NULL_TREE);
18798   def_builtin ("__builtin_vec_init_v16qi", ftype,
18799 	       ALTIVEC_BUILTIN_VEC_INIT_V16QI);
18800 
18801   ftype = build_function_type_list (V4SF_type_node, float_type_node,
18802 				    float_type_node, float_type_node,
18803 				    float_type_node, NULL_TREE);
18804   def_builtin ("__builtin_vec_init_v4sf", ftype, ALTIVEC_BUILTIN_VEC_INIT_V4SF);
18805 
18806   /* VSX builtins.  */
18807   ftype = build_function_type_list (V2DF_type_node, double_type_node,
18808 				    double_type_node, NULL_TREE);
18809   def_builtin ("__builtin_vec_init_v2df", ftype, VSX_BUILTIN_VEC_INIT_V2DF);
18810 
18811   ftype = build_function_type_list (V2DI_type_node, intDI_type_node,
18812 				    intDI_type_node, NULL_TREE);
18813   def_builtin ("__builtin_vec_init_v2di", ftype, VSX_BUILTIN_VEC_INIT_V2DI);
18814 
18815   /* Access to the vec_set patterns.  */
18816   ftype = build_function_type_list (V4SI_type_node, V4SI_type_node,
18817 				    intSI_type_node,
18818 				    integer_type_node, NULL_TREE);
18819   def_builtin ("__builtin_vec_set_v4si", ftype, ALTIVEC_BUILTIN_VEC_SET_V4SI);
18820 
18821   ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
18822 				    intHI_type_node,
18823 				    integer_type_node, NULL_TREE);
18824   def_builtin ("__builtin_vec_set_v8hi", ftype, ALTIVEC_BUILTIN_VEC_SET_V8HI);
18825 
18826   ftype = build_function_type_list (V16QI_type_node, V16QI_type_node,
18827 				    intQI_type_node,
18828 				    integer_type_node, NULL_TREE);
18829   def_builtin ("__builtin_vec_set_v16qi", ftype, ALTIVEC_BUILTIN_VEC_SET_V16QI);
18830 
18831   ftype = build_function_type_list (V4SF_type_node, V4SF_type_node,
18832 				    float_type_node,
18833 				    integer_type_node, NULL_TREE);
18834   def_builtin ("__builtin_vec_set_v4sf", ftype, ALTIVEC_BUILTIN_VEC_SET_V4SF);
18835 
18836   ftype = build_function_type_list (V2DF_type_node, V2DF_type_node,
18837 				    double_type_node,
18838 				    integer_type_node, NULL_TREE);
18839   def_builtin ("__builtin_vec_set_v2df", ftype, VSX_BUILTIN_VEC_SET_V2DF);
18840 
18841   ftype = build_function_type_list (V2DI_type_node, V2DI_type_node,
18842 				    intDI_type_node,
18843 				    integer_type_node, NULL_TREE);
18844   def_builtin ("__builtin_vec_set_v2di", ftype, VSX_BUILTIN_VEC_SET_V2DI);
18845 
18846   /* Access to the vec_extract patterns.  */
18847   ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
18848 				    integer_type_node, NULL_TREE);
18849   def_builtin ("__builtin_vec_ext_v4si", ftype, ALTIVEC_BUILTIN_VEC_EXT_V4SI);
18850 
18851   ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
18852 				    integer_type_node, NULL_TREE);
18853   def_builtin ("__builtin_vec_ext_v8hi", ftype, ALTIVEC_BUILTIN_VEC_EXT_V8HI);
18854 
18855   ftype = build_function_type_list (intQI_type_node, V16QI_type_node,
18856 				    integer_type_node, NULL_TREE);
18857   def_builtin ("__builtin_vec_ext_v16qi", ftype, ALTIVEC_BUILTIN_VEC_EXT_V16QI);
18858 
18859   ftype = build_function_type_list (float_type_node, V4SF_type_node,
18860 				    integer_type_node, NULL_TREE);
18861   def_builtin ("__builtin_vec_ext_v4sf", ftype, ALTIVEC_BUILTIN_VEC_EXT_V4SF);
18862 
18863   ftype = build_function_type_list (double_type_node, V2DF_type_node,
18864 				    integer_type_node, NULL_TREE);
18865   def_builtin ("__builtin_vec_ext_v2df", ftype, VSX_BUILTIN_VEC_EXT_V2DF);
18866 
18867   ftype = build_function_type_list (intDI_type_node, V2DI_type_node,
18868 				    integer_type_node, NULL_TREE);
18869   def_builtin ("__builtin_vec_ext_v2di", ftype, VSX_BUILTIN_VEC_EXT_V2DI);
18870 
18871 
18872   if (V1TI_type_node)
18873     {
18874       tree v1ti_ftype_long_pcvoid
18875 	= build_function_type_list (V1TI_type_node,
18876 				    long_integer_type_node, pcvoid_type_node,
18877 				    NULL_TREE);
18878       tree void_ftype_v1ti_long_pvoid
18879 	= build_function_type_list (void_type_node,
18880 				    V1TI_type_node, long_integer_type_node,
18881 				    pvoid_type_node, NULL_TREE);
18882       def_builtin ("__builtin_vsx_lxvd2x_v1ti", v1ti_ftype_long_pcvoid,
18883 		   VSX_BUILTIN_LXVD2X_V1TI);
18884       def_builtin ("__builtin_vsx_stxvd2x_v1ti", void_ftype_v1ti_long_pvoid,
18885 		   VSX_BUILTIN_STXVD2X_V1TI);
18886       ftype = build_function_type_list (V1TI_type_node, intTI_type_node,
18887 					NULL_TREE, NULL_TREE);
18888       def_builtin ("__builtin_vec_init_v1ti", ftype, VSX_BUILTIN_VEC_INIT_V1TI);
18889       ftype = build_function_type_list (V1TI_type_node, V1TI_type_node,
18890 					intTI_type_node,
18891 					integer_type_node, NULL_TREE);
18892       def_builtin ("__builtin_vec_set_v1ti", ftype, VSX_BUILTIN_VEC_SET_V1TI);
18893       ftype = build_function_type_list (intTI_type_node, V1TI_type_node,
18894 					integer_type_node, NULL_TREE);
18895       def_builtin ("__builtin_vec_ext_v1ti", ftype, VSX_BUILTIN_VEC_EXT_V1TI);
18896     }
18897 
18898 }
18899 
18900 static void
htm_init_builtins(void)18901 htm_init_builtins (void)
18902 {
18903   HOST_WIDE_INT builtin_mask = rs6000_builtin_mask;
18904   const struct builtin_description *d;
18905   size_t i;
18906 
18907   d = bdesc_htm;
18908   for (i = 0; i < ARRAY_SIZE (bdesc_htm); i++, d++)
18909     {
18910       tree op[MAX_HTM_OPERANDS], type;
18911       HOST_WIDE_INT mask = d->mask;
18912       unsigned attr = rs6000_builtin_info[d->code].attr;
18913       bool void_func = (attr & RS6000_BTC_VOID);
18914       int attr_args = (attr & RS6000_BTC_TYPE_MASK);
18915       int nopnds = 0;
18916       tree gpr_type_node;
18917       tree rettype;
18918       tree argtype;
18919 
18920       /* It is expected that these htm built-in functions may have
18921 	 d->icode equal to CODE_FOR_nothing.  */
18922 
18923       if (TARGET_32BIT && TARGET_POWERPC64)
18924 	gpr_type_node = long_long_unsigned_type_node;
18925       else
18926 	gpr_type_node = long_unsigned_type_node;
18927 
18928       if (attr & RS6000_BTC_SPR)
18929 	{
18930 	  rettype = gpr_type_node;
18931 	  argtype = gpr_type_node;
18932 	}
18933       else if (d->code == HTM_BUILTIN_TABORTDC
18934 	       || d->code == HTM_BUILTIN_TABORTDCI)
18935 	{
18936 	  rettype = unsigned_type_node;
18937 	  argtype = gpr_type_node;
18938 	}
18939       else
18940 	{
18941 	  rettype = unsigned_type_node;
18942 	  argtype = unsigned_type_node;
18943 	}
18944 
18945       if ((mask & builtin_mask) != mask)
18946 	{
18947 	  if (TARGET_DEBUG_BUILTIN)
18948 	    fprintf (stderr, "htm_builtin, skip binary %s\n", d->name);
18949 	  continue;
18950 	}
18951 
18952       if (d->name == 0)
18953 	{
18954 	  if (TARGET_DEBUG_BUILTIN)
18955 	    fprintf (stderr, "htm_builtin, bdesc_htm[%ld] no name\n",
18956 		     (long unsigned) i);
18957 	  continue;
18958 	}
18959 
18960       op[nopnds++] = (void_func) ? void_type_node : rettype;
18961 
18962       if (attr_args == RS6000_BTC_UNARY)
18963 	op[nopnds++] = argtype;
18964       else if (attr_args == RS6000_BTC_BINARY)
18965 	{
18966 	  op[nopnds++] = argtype;
18967 	  op[nopnds++] = argtype;
18968 	}
18969       else if (attr_args == RS6000_BTC_TERNARY)
18970 	{
18971 	  op[nopnds++] = argtype;
18972 	  op[nopnds++] = argtype;
18973 	  op[nopnds++] = argtype;
18974 	}
18975 
18976       switch (nopnds)
18977 	{
18978 	case 1:
18979 	  type = build_function_type_list (op[0], NULL_TREE);
18980 	  break;
18981 	case 2:
18982 	  type = build_function_type_list (op[0], op[1], NULL_TREE);
18983 	  break;
18984 	case 3:
18985 	  type = build_function_type_list (op[0], op[1], op[2], NULL_TREE);
18986 	  break;
18987 	case 4:
18988 	  type = build_function_type_list (op[0], op[1], op[2], op[3],
18989 					   NULL_TREE);
18990 	  break;
18991 	default:
18992 	  gcc_unreachable ();
18993 	}
18994 
18995       def_builtin (d->name, type, d->code);
18996     }
18997 }
18998 
18999 /* Hash function for builtin functions with up to 3 arguments and a return
19000    type.  */
19001 hashval_t
hash(builtin_hash_struct * bh)19002 builtin_hasher::hash (builtin_hash_struct *bh)
19003 {
19004   unsigned ret = 0;
19005   int i;
19006 
19007   for (i = 0; i < 4; i++)
19008     {
19009       ret = (ret * (unsigned)MAX_MACHINE_MODE) + ((unsigned)bh->mode[i]);
19010       ret = (ret * 2) + bh->uns_p[i];
19011     }
19012 
19013   return ret;
19014 }
19015 
19016 /* Compare builtin hash entries H1 and H2 for equivalence.  */
19017 bool
equal(builtin_hash_struct * p1,builtin_hash_struct * p2)19018 builtin_hasher::equal (builtin_hash_struct *p1, builtin_hash_struct *p2)
19019 {
19020   return ((p1->mode[0] == p2->mode[0])
19021 	  && (p1->mode[1] == p2->mode[1])
19022 	  && (p1->mode[2] == p2->mode[2])
19023 	  && (p1->mode[3] == p2->mode[3])
19024 	  && (p1->uns_p[0] == p2->uns_p[0])
19025 	  && (p1->uns_p[1] == p2->uns_p[1])
19026 	  && (p1->uns_p[2] == p2->uns_p[2])
19027 	  && (p1->uns_p[3] == p2->uns_p[3]));
19028 }
19029 
19030 /* Map types for builtin functions with an explicit return type and up to 3
19031    arguments.  Functions with fewer than 3 arguments use VOIDmode as the type
19032    of the argument.  */
19033 static tree
builtin_function_type(machine_mode mode_ret,machine_mode mode_arg0,machine_mode mode_arg1,machine_mode mode_arg2,enum rs6000_builtins builtin,const char * name)19034 builtin_function_type (machine_mode mode_ret, machine_mode mode_arg0,
19035 		       machine_mode mode_arg1, machine_mode mode_arg2,
19036 		       enum rs6000_builtins builtin, const char *name)
19037 {
19038   struct builtin_hash_struct h;
19039   struct builtin_hash_struct *h2;
19040   int num_args = 3;
19041   int i;
19042   tree ret_type = NULL_TREE;
19043   tree arg_type[3] = { NULL_TREE, NULL_TREE, NULL_TREE };
19044 
19045   /* Create builtin_hash_table.  */
19046   if (builtin_hash_table == NULL)
19047     builtin_hash_table = hash_table<builtin_hasher>::create_ggc (1500);
19048 
19049   h.type = NULL_TREE;
19050   h.mode[0] = mode_ret;
19051   h.mode[1] = mode_arg0;
19052   h.mode[2] = mode_arg1;
19053   h.mode[3] = mode_arg2;
19054   h.uns_p[0] = 0;
19055   h.uns_p[1] = 0;
19056   h.uns_p[2] = 0;
19057   h.uns_p[3] = 0;
19058 
19059   /* If the builtin is a type that produces unsigned results or takes unsigned
19060      arguments, and it is returned as a decl for the vectorizer (such as
19061      widening multiplies, permute), make sure the arguments and return value
19062      are type correct.  */
19063   switch (builtin)
19064     {
19065       /* unsigned 1 argument functions.  */
19066     case CRYPTO_BUILTIN_VSBOX:
19067     case P8V_BUILTIN_VGBBD:
19068     case MISC_BUILTIN_CDTBCD:
19069     case MISC_BUILTIN_CBCDTD:
19070       h.uns_p[0] = 1;
19071       h.uns_p[1] = 1;
19072       break;
19073 
19074       /* unsigned 2 argument functions.  */
19075     case ALTIVEC_BUILTIN_VMULEUB:
19076     case ALTIVEC_BUILTIN_VMULEUH:
19077     case ALTIVEC_BUILTIN_VMULOUB:
19078     case ALTIVEC_BUILTIN_VMULOUH:
19079     case CRYPTO_BUILTIN_VCIPHER:
19080     case CRYPTO_BUILTIN_VCIPHERLAST:
19081     case CRYPTO_BUILTIN_VNCIPHER:
19082     case CRYPTO_BUILTIN_VNCIPHERLAST:
19083     case CRYPTO_BUILTIN_VPMSUMB:
19084     case CRYPTO_BUILTIN_VPMSUMH:
19085     case CRYPTO_BUILTIN_VPMSUMW:
19086     case CRYPTO_BUILTIN_VPMSUMD:
19087     case CRYPTO_BUILTIN_VPMSUM:
19088     case MISC_BUILTIN_ADDG6S:
19089     case MISC_BUILTIN_DIVWEU:
19090     case MISC_BUILTIN_DIVWEUO:
19091     case MISC_BUILTIN_DIVDEU:
19092     case MISC_BUILTIN_DIVDEUO:
19093     case VSX_BUILTIN_UDIV_V2DI:
19094       h.uns_p[0] = 1;
19095       h.uns_p[1] = 1;
19096       h.uns_p[2] = 1;
19097       break;
19098 
19099       /* unsigned 3 argument functions.  */
19100     case ALTIVEC_BUILTIN_VPERM_16QI_UNS:
19101     case ALTIVEC_BUILTIN_VPERM_8HI_UNS:
19102     case ALTIVEC_BUILTIN_VPERM_4SI_UNS:
19103     case ALTIVEC_BUILTIN_VPERM_2DI_UNS:
19104     case ALTIVEC_BUILTIN_VSEL_16QI_UNS:
19105     case ALTIVEC_BUILTIN_VSEL_8HI_UNS:
19106     case ALTIVEC_BUILTIN_VSEL_4SI_UNS:
19107     case ALTIVEC_BUILTIN_VSEL_2DI_UNS:
19108     case VSX_BUILTIN_VPERM_16QI_UNS:
19109     case VSX_BUILTIN_VPERM_8HI_UNS:
19110     case VSX_BUILTIN_VPERM_4SI_UNS:
19111     case VSX_BUILTIN_VPERM_2DI_UNS:
19112     case VSX_BUILTIN_XXSEL_16QI_UNS:
19113     case VSX_BUILTIN_XXSEL_8HI_UNS:
19114     case VSX_BUILTIN_XXSEL_4SI_UNS:
19115     case VSX_BUILTIN_XXSEL_2DI_UNS:
19116     case CRYPTO_BUILTIN_VPERMXOR:
19117     case CRYPTO_BUILTIN_VPERMXOR_V2DI:
19118     case CRYPTO_BUILTIN_VPERMXOR_V4SI:
19119     case CRYPTO_BUILTIN_VPERMXOR_V8HI:
19120     case CRYPTO_BUILTIN_VPERMXOR_V16QI:
19121     case CRYPTO_BUILTIN_VSHASIGMAW:
19122     case CRYPTO_BUILTIN_VSHASIGMAD:
19123     case CRYPTO_BUILTIN_VSHASIGMA:
19124       h.uns_p[0] = 1;
19125       h.uns_p[1] = 1;
19126       h.uns_p[2] = 1;
19127       h.uns_p[3] = 1;
19128       break;
19129 
19130       /* signed permute functions with unsigned char mask.  */
19131     case ALTIVEC_BUILTIN_VPERM_16QI:
19132     case ALTIVEC_BUILTIN_VPERM_8HI:
19133     case ALTIVEC_BUILTIN_VPERM_4SI:
19134     case ALTIVEC_BUILTIN_VPERM_4SF:
19135     case ALTIVEC_BUILTIN_VPERM_2DI:
19136     case ALTIVEC_BUILTIN_VPERM_2DF:
19137     case VSX_BUILTIN_VPERM_16QI:
19138     case VSX_BUILTIN_VPERM_8HI:
19139     case VSX_BUILTIN_VPERM_4SI:
19140     case VSX_BUILTIN_VPERM_4SF:
19141     case VSX_BUILTIN_VPERM_2DI:
19142     case VSX_BUILTIN_VPERM_2DF:
19143       h.uns_p[3] = 1;
19144       break;
19145 
19146       /* unsigned args, signed return.  */
19147     case VSX_BUILTIN_XVCVUXDSP:
19148     case VSX_BUILTIN_XVCVUXDDP_UNS:
19149     case ALTIVEC_BUILTIN_UNSFLOAT_V4SI_V4SF:
19150       h.uns_p[1] = 1;
19151       break;
19152 
19153       /* signed args, unsigned return.  */
19154     case VSX_BUILTIN_XVCVDPUXDS_UNS:
19155     case ALTIVEC_BUILTIN_FIXUNS_V4SF_V4SI:
19156     case MISC_BUILTIN_UNPACK_TD:
19157     case MISC_BUILTIN_UNPACK_V1TI:
19158       h.uns_p[0] = 1;
19159       break;
19160 
19161       /* unsigned arguments for 128-bit pack instructions.  */
19162     case MISC_BUILTIN_PACK_TD:
19163     case MISC_BUILTIN_PACK_V1TI:
19164       h.uns_p[1] = 1;
19165       h.uns_p[2] = 1;
19166       break;
19167 
19168     default:
19169       break;
19170     }
19171 
19172   /* Figure out how many args are present.  */
19173   while (num_args > 0 && h.mode[num_args] == VOIDmode)
19174     num_args--;
19175 
19176   ret_type = builtin_mode_to_type[h.mode[0]][h.uns_p[0]];
19177   if (!ret_type && h.uns_p[0])
19178     ret_type = builtin_mode_to_type[h.mode[0]][0];
19179 
19180   if (!ret_type)
19181     fatal_error (input_location,
19182 		 "internal error: builtin function %s had an unexpected "
19183 		 "return type %s", name, GET_MODE_NAME (h.mode[0]));
19184 
19185   for (i = 0; i < (int) ARRAY_SIZE (arg_type); i++)
19186     arg_type[i] = NULL_TREE;
19187 
19188   for (i = 0; i < num_args; i++)
19189     {
19190       int m = (int) h.mode[i+1];
19191       int uns_p = h.uns_p[i+1];
19192 
19193       arg_type[i] = builtin_mode_to_type[m][uns_p];
19194       if (!arg_type[i] && uns_p)
19195 	arg_type[i] = builtin_mode_to_type[m][0];
19196 
19197       if (!arg_type[i])
19198 	fatal_error (input_location,
19199 		     "internal error: builtin function %s, argument %d "
19200 		     "had unexpected argument type %s", name, i,
19201 		     GET_MODE_NAME (m));
19202     }
19203 
19204   builtin_hash_struct **found = builtin_hash_table->find_slot (&h, INSERT);
19205   if (*found == NULL)
19206     {
19207       h2 = ggc_alloc<builtin_hash_struct> ();
19208       *h2 = h;
19209       *found = h2;
19210 
19211       h2->type = build_function_type_list (ret_type, arg_type[0], arg_type[1],
19212 					   arg_type[2], NULL_TREE);
19213     }
19214 
19215   return (*found)->type;
19216 }
19217 
19218 static void
rs6000_common_init_builtins(void)19219 rs6000_common_init_builtins (void)
19220 {
19221   const struct builtin_description *d;
19222   size_t i;
19223 
19224   tree opaque_ftype_opaque = NULL_TREE;
19225   tree opaque_ftype_opaque_opaque = NULL_TREE;
19226   tree opaque_ftype_opaque_opaque_opaque = NULL_TREE;
19227   tree v2si_ftype = NULL_TREE;
19228   tree v2si_ftype_qi = NULL_TREE;
19229   tree v2si_ftype_v2si_qi = NULL_TREE;
19230   tree v2si_ftype_int_qi = NULL_TREE;
19231   HOST_WIDE_INT builtin_mask = rs6000_builtin_mask;
19232 
19233   if (!TARGET_PAIRED_FLOAT)
19234     {
19235       builtin_mode_to_type[V2SImode][0] = opaque_V2SI_type_node;
19236       builtin_mode_to_type[V2SFmode][0] = opaque_V2SF_type_node;
19237     }
19238 
19239   /* Paired and SPE builtins are only available if you build a compiler with
19240      the appropriate options, so only create those builtins with the
19241      appropriate compiler option.  Create Altivec and VSX builtins on machines
19242      with at least the general purpose extensions (970 and newer) to allow the
19243      use of the target attribute..  */
19244 
19245   if (TARGET_EXTRA_BUILTINS)
19246     builtin_mask |= RS6000_BTM_COMMON;
19247 
19248   /* Add the ternary operators.  */
19249   d = bdesc_3arg;
19250   for (i = 0; i < ARRAY_SIZE (bdesc_3arg); i++, d++)
19251     {
19252       tree type;
19253       HOST_WIDE_INT mask = d->mask;
19254 
19255       if ((mask & builtin_mask) != mask)
19256 	{
19257 	  if (TARGET_DEBUG_BUILTIN)
19258 	    fprintf (stderr, "rs6000_builtin, skip ternary %s\n", d->name);
19259 	  continue;
19260 	}
19261 
19262       if (rs6000_overloaded_builtin_p (d->code))
19263 	{
19264 	  if (! (type = opaque_ftype_opaque_opaque_opaque))
19265 	    type = opaque_ftype_opaque_opaque_opaque
19266 	      = build_function_type_list (opaque_V4SI_type_node,
19267 					  opaque_V4SI_type_node,
19268 					  opaque_V4SI_type_node,
19269 					  opaque_V4SI_type_node,
19270 					  NULL_TREE);
19271 	}
19272       else
19273 	{
19274 	  enum insn_code icode = d->icode;
19275 	  if (d->name == 0)
19276 	    {
19277 	      if (TARGET_DEBUG_BUILTIN)
19278 		fprintf (stderr, "rs6000_builtin, bdesc_3arg[%ld] no name\n",
19279 			 (long unsigned)i);
19280 
19281 	      continue;
19282 	    }
19283 
19284           if (icode == CODE_FOR_nothing)
19285 	    {
19286 	      if (TARGET_DEBUG_BUILTIN)
19287 		fprintf (stderr, "rs6000_builtin, skip ternary %s (no code)\n",
19288 			 d->name);
19289 
19290 	      continue;
19291 	    }
19292 
19293 	  type = builtin_function_type (insn_data[icode].operand[0].mode,
19294 					insn_data[icode].operand[1].mode,
19295 					insn_data[icode].operand[2].mode,
19296 					insn_data[icode].operand[3].mode,
19297 					d->code, d->name);
19298 	}
19299 
19300       def_builtin (d->name, type, d->code);
19301     }
19302 
19303   /* Add the binary operators.  */
19304   d = bdesc_2arg;
19305   for (i = 0; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
19306     {
19307       machine_mode mode0, mode1, mode2;
19308       tree type;
19309       HOST_WIDE_INT mask = d->mask;
19310 
19311       if ((mask & builtin_mask) != mask)
19312 	{
19313 	  if (TARGET_DEBUG_BUILTIN)
19314 	    fprintf (stderr, "rs6000_builtin, skip binary %s\n", d->name);
19315 	  continue;
19316 	}
19317 
19318       if (rs6000_overloaded_builtin_p (d->code))
19319 	{
19320 	  if (! (type = opaque_ftype_opaque_opaque))
19321 	    type = opaque_ftype_opaque_opaque
19322 	      = build_function_type_list (opaque_V4SI_type_node,
19323 					  opaque_V4SI_type_node,
19324 					  opaque_V4SI_type_node,
19325 					  NULL_TREE);
19326 	}
19327       else
19328 	{
19329 	  enum insn_code icode = d->icode;
19330 	  if (d->name == 0)
19331 	    {
19332 	      if (TARGET_DEBUG_BUILTIN)
19333 		fprintf (stderr, "rs6000_builtin, bdesc_2arg[%ld] no name\n",
19334 			 (long unsigned)i);
19335 
19336 	      continue;
19337 	    }
19338 
19339           if (icode == CODE_FOR_nothing)
19340 	    {
19341 	      if (TARGET_DEBUG_BUILTIN)
19342 		fprintf (stderr, "rs6000_builtin, skip binary %s (no code)\n",
19343 			 d->name);
19344 
19345 	      continue;
19346 	    }
19347 
19348           mode0 = insn_data[icode].operand[0].mode;
19349           mode1 = insn_data[icode].operand[1].mode;
19350           mode2 = insn_data[icode].operand[2].mode;
19351 
19352 	  if (mode0 == V2SImode && mode1 == V2SImode && mode2 == QImode)
19353 	    {
19354 	      if (! (type = v2si_ftype_v2si_qi))
19355 		type = v2si_ftype_v2si_qi
19356 		  = build_function_type_list (opaque_V2SI_type_node,
19357 					      opaque_V2SI_type_node,
19358 					      char_type_node,
19359 					      NULL_TREE);
19360 	    }
19361 
19362 	  else if (mode0 == V2SImode && GET_MODE_CLASS (mode1) == MODE_INT
19363 		   && mode2 == QImode)
19364 	    {
19365 	      if (! (type = v2si_ftype_int_qi))
19366 		type = v2si_ftype_int_qi
19367 		  = build_function_type_list (opaque_V2SI_type_node,
19368 					      integer_type_node,
19369 					      char_type_node,
19370 					      NULL_TREE);
19371 	    }
19372 
19373 	  else
19374 	    type = builtin_function_type (mode0, mode1, mode2, VOIDmode,
19375 					  d->code, d->name);
19376 	}
19377 
19378       def_builtin (d->name, type, d->code);
19379     }
19380 
19381   /* Add the simple unary operators.  */
19382   d = bdesc_1arg;
19383   for (i = 0; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
19384     {
19385       machine_mode mode0, mode1;
19386       tree type;
19387       HOST_WIDE_INT mask = d->mask;
19388 
19389       if ((mask & builtin_mask) != mask)
19390 	{
19391 	  if (TARGET_DEBUG_BUILTIN)
19392 	    fprintf (stderr, "rs6000_builtin, skip unary %s\n", d->name);
19393 	  continue;
19394 	}
19395 
19396       if (rs6000_overloaded_builtin_p (d->code))
19397 	{
19398 	  if (! (type = opaque_ftype_opaque))
19399 	    type = opaque_ftype_opaque
19400 	      = build_function_type_list (opaque_V4SI_type_node,
19401 					  opaque_V4SI_type_node,
19402 					  NULL_TREE);
19403 	}
19404       else
19405         {
19406 	  enum insn_code icode = d->icode;
19407 	  if (d->name == 0)
19408 	    {
19409 	      if (TARGET_DEBUG_BUILTIN)
19410 		fprintf (stderr, "rs6000_builtin, bdesc_1arg[%ld] no name\n",
19411 			 (long unsigned)i);
19412 
19413 	      continue;
19414 	    }
19415 
19416           if (icode == CODE_FOR_nothing)
19417 	    {
19418 	      if (TARGET_DEBUG_BUILTIN)
19419 		fprintf (stderr, "rs6000_builtin, skip unary %s (no code)\n",
19420 			 d->name);
19421 
19422 	      continue;
19423 	    }
19424 
19425           mode0 = insn_data[icode].operand[0].mode;
19426           mode1 = insn_data[icode].operand[1].mode;
19427 
19428 	  if (mode0 == V2SImode && mode1 == QImode)
19429 	    {
19430 	      if (! (type = v2si_ftype_qi))
19431 		type = v2si_ftype_qi
19432 		  = build_function_type_list (opaque_V2SI_type_node,
19433 					      char_type_node,
19434 					      NULL_TREE);
19435 	    }
19436 
19437 	  else
19438 	    type = builtin_function_type (mode0, mode1, VOIDmode, VOIDmode,
19439 					  d->code, d->name);
19440 	}
19441 
19442       def_builtin (d->name, type, d->code);
19443     }
19444 
19445   /* Add the simple no-argument operators.  */
19446   d = bdesc_0arg;
19447   for (i = 0; i < ARRAY_SIZE (bdesc_0arg); i++, d++)
19448     {
19449       machine_mode mode0;
19450       tree type;
19451       HOST_WIDE_INT mask = d->mask;
19452 
19453       if ((mask & builtin_mask) != mask)
19454 	{
19455 	  if (TARGET_DEBUG_BUILTIN)
19456 	    fprintf (stderr, "rs6000_builtin, skip no-argument %s\n", d->name);
19457 	  continue;
19458 	}
19459       if (rs6000_overloaded_builtin_p (d->code))
19460 	{
19461 	  if (!opaque_ftype_opaque)
19462 	    opaque_ftype_opaque
19463 	      = build_function_type_list (opaque_V4SI_type_node, NULL_TREE);
19464 	  type = opaque_ftype_opaque;
19465 	}
19466       else
19467 	{
19468 	  enum insn_code icode = d->icode;
19469 	  if (d->name == 0)
19470 	    {
19471 	      if (TARGET_DEBUG_BUILTIN)
19472 		fprintf (stderr, "rs6000_builtin, bdesc_0arg[%lu] no name\n",
19473 			 (long unsigned) i);
19474 	      continue;
19475 	    }
19476 	  if (icode == CODE_FOR_nothing)
19477 	    {
19478 	      if (TARGET_DEBUG_BUILTIN)
19479 		fprintf (stderr,
19480 			 "rs6000_builtin, skip no-argument %s (no code)\n",
19481 			 d->name);
19482 	      continue;
19483 	    }
19484 	  mode0 = insn_data[icode].operand[0].mode;
19485 	  if (mode0 == V2SImode)
19486 	    {
19487 	      /* code for SPE */
19488 	      if (! (type = v2si_ftype))
19489 		{
19490 		  v2si_ftype
19491 		    = build_function_type_list (opaque_V2SI_type_node,
19492 						NULL_TREE);
19493 		  type = v2si_ftype;
19494 		}
19495 	    }
19496 	  else
19497 	    type = builtin_function_type (mode0, VOIDmode, VOIDmode, VOIDmode,
19498 					  d->code, d->name);
19499 	}
19500       def_builtin (d->name, type, d->code);
19501     }
19502 }
19503 
19504 /* Set up AIX/Darwin/64-bit Linux quad floating point routines.  */
19505 static void
init_float128_ibm(machine_mode mode)19506 init_float128_ibm (machine_mode mode)
19507 {
19508   if (!TARGET_XL_COMPAT)
19509     {
19510       set_optab_libfunc (add_optab, mode, "__gcc_qadd");
19511       set_optab_libfunc (sub_optab, mode, "__gcc_qsub");
19512       set_optab_libfunc (smul_optab, mode, "__gcc_qmul");
19513       set_optab_libfunc (sdiv_optab, mode, "__gcc_qdiv");
19514 
19515       if (!(TARGET_HARD_FLOAT && (TARGET_FPRS || TARGET_E500_DOUBLE)))
19516 	{
19517 	  set_optab_libfunc (neg_optab, mode, "__gcc_qneg");
19518 	  set_optab_libfunc (eq_optab, mode, "__gcc_qeq");
19519 	  set_optab_libfunc (ne_optab, mode, "__gcc_qne");
19520 	  set_optab_libfunc (gt_optab, mode, "__gcc_qgt");
19521 	  set_optab_libfunc (ge_optab, mode, "__gcc_qge");
19522 	  set_optab_libfunc (lt_optab, mode, "__gcc_qlt");
19523 	  set_optab_libfunc (le_optab, mode, "__gcc_qle");
19524 
19525 	  set_conv_libfunc (sext_optab, mode, SFmode, "__gcc_stoq");
19526 	  set_conv_libfunc (sext_optab, mode, DFmode, "__gcc_dtoq");
19527 	  set_conv_libfunc (trunc_optab, SFmode, mode, "__gcc_qtos");
19528 	  set_conv_libfunc (trunc_optab, DFmode, mode, "__gcc_qtod");
19529 	  set_conv_libfunc (sfix_optab, SImode, mode, "__gcc_qtoi");
19530 	  set_conv_libfunc (ufix_optab, SImode, mode, "__gcc_qtou");
19531 	  set_conv_libfunc (sfloat_optab, mode, SImode, "__gcc_itoq");
19532 	  set_conv_libfunc (ufloat_optab, mode, SImode, "__gcc_utoq");
19533 	}
19534 
19535       if (!(TARGET_HARD_FLOAT && TARGET_FPRS))
19536 	set_optab_libfunc (unord_optab, mode, "__gcc_qunord");
19537     }
19538   else
19539     {
19540       set_optab_libfunc (add_optab, mode, "_xlqadd");
19541       set_optab_libfunc (sub_optab, mode, "_xlqsub");
19542       set_optab_libfunc (smul_optab, mode, "_xlqmul");
19543       set_optab_libfunc (sdiv_optab, mode, "_xlqdiv");
19544     }
19545 
19546   /* Add various conversions for IFmode to use the traditional TFmode
19547      names.  */
19548   if (mode == IFmode)
19549     {
19550       set_conv_libfunc (sext_optab, mode, SDmode, "__dpd_extendsdtf2");
19551       set_conv_libfunc (sext_optab, mode, DDmode, "__dpd_extendddtf2");
19552       set_conv_libfunc (trunc_optab, mode, TDmode, "__dpd_trunctftd2");
19553       set_conv_libfunc (trunc_optab, SDmode, mode, "__dpd_trunctfsd2");
19554       set_conv_libfunc (trunc_optab, DDmode, mode, "__dpd_trunctfdd2");
19555       set_conv_libfunc (sext_optab, TDmode, mode, "__dpd_extendtdtf2");
19556 
19557       if (TARGET_POWERPC64)
19558 	{
19559 	  set_conv_libfunc (sfix_optab, TImode, mode, "__fixtfti");
19560 	  set_conv_libfunc (ufix_optab, TImode, mode, "__fixunstfti");
19561 	  set_conv_libfunc (sfloat_optab, mode, TImode, "__floattitf");
19562 	  set_conv_libfunc (ufloat_optab, mode, TImode, "__floatuntitf");
19563 	}
19564     }
19565 }
19566 
19567 /* Set up IEEE 128-bit floating point routines.  Use different names if the
19568    arguments can be passed in a vector register.  The historical PowerPC
19569    implementation of IEEE 128-bit floating point used _q_<op> for the names, so
19570    continue to use that if we aren't using vector registers to pass IEEE
19571    128-bit floating point.  */
19572 
19573 static void
init_float128_ieee(machine_mode mode)19574 init_float128_ieee (machine_mode mode)
19575 {
19576   if (FLOAT128_VECTOR_P (mode))
19577     {
19578       set_optab_libfunc (add_optab, mode, "__addkf3");
19579       set_optab_libfunc (sub_optab, mode, "__subkf3");
19580       set_optab_libfunc (neg_optab, mode, "__negkf2");
19581       set_optab_libfunc (smul_optab, mode, "__mulkf3");
19582       set_optab_libfunc (sdiv_optab, mode, "__divkf3");
19583       set_optab_libfunc (sqrt_optab, mode, "__sqrtkf2");
19584       set_optab_libfunc (abs_optab, mode, "__abstkf2");
19585 
19586       set_optab_libfunc (eq_optab, mode, "__eqkf2");
19587       set_optab_libfunc (ne_optab, mode, "__nekf2");
19588       set_optab_libfunc (gt_optab, mode, "__gtkf2");
19589       set_optab_libfunc (ge_optab, mode, "__gekf2");
19590       set_optab_libfunc (lt_optab, mode, "__ltkf2");
19591       set_optab_libfunc (le_optab, mode, "__lekf2");
19592       set_optab_libfunc (unord_optab, mode, "__unordkf2");
19593 
19594       set_conv_libfunc (sext_optab, mode, SFmode, "__extendsfkf2");
19595       set_conv_libfunc (sext_optab, mode, DFmode, "__extenddfkf2");
19596       set_conv_libfunc (trunc_optab, SFmode, mode, "__trunckfsf2");
19597       set_conv_libfunc (trunc_optab, DFmode, mode, "__trunckfdf2");
19598 
19599       set_conv_libfunc (sext_optab, mode, IFmode, "__extendtfkf2");
19600       if (mode != TFmode && FLOAT128_IBM_P (TFmode))
19601 	set_conv_libfunc (sext_optab, mode, TFmode, "__extendtfkf2");
19602 
19603       set_conv_libfunc (trunc_optab, IFmode, mode, "__trunckftf2");
19604       if (mode != TFmode && FLOAT128_IBM_P (TFmode))
19605 	set_conv_libfunc (trunc_optab, TFmode, mode, "__trunckftf2");
19606 
19607       set_conv_libfunc (sext_optab, mode, SDmode, "__dpd_extendsdkf2");
19608       set_conv_libfunc (sext_optab, mode, DDmode, "__dpd_extendddkf2");
19609       set_conv_libfunc (trunc_optab, mode, TDmode, "__dpd_trunckftd2");
19610       set_conv_libfunc (trunc_optab, SDmode, mode, "__dpd_trunckfsd2");
19611       set_conv_libfunc (trunc_optab, DDmode, mode, "__dpd_trunckfdd2");
19612       set_conv_libfunc (sext_optab, TDmode, mode, "__dpd_extendtdkf2");
19613 
19614       set_conv_libfunc (sfix_optab, SImode, mode, "__fixkfsi");
19615       set_conv_libfunc (ufix_optab, SImode, mode, "__fixunskfsi");
19616       set_conv_libfunc (sfix_optab, DImode, mode, "__fixkfdi");
19617       set_conv_libfunc (ufix_optab, DImode, mode, "__fixunskfdi");
19618 
19619       set_conv_libfunc (sfloat_optab, mode, SImode, "__floatsikf");
19620       set_conv_libfunc (ufloat_optab, mode, SImode, "__floatunsikf");
19621       set_conv_libfunc (sfloat_optab, mode, DImode, "__floatdikf");
19622       set_conv_libfunc (ufloat_optab, mode, DImode, "__floatundikf");
19623 
19624       if (TARGET_POWERPC64)
19625 	{
19626 	  set_conv_libfunc (sfix_optab, TImode, mode, "__fixkfti");
19627 	  set_conv_libfunc (ufix_optab, TImode, mode, "__fixunskfti");
19628 	  set_conv_libfunc (sfloat_optab, mode, TImode, "__floattikf");
19629 	  set_conv_libfunc (ufloat_optab, mode, TImode, "__floatuntikf");
19630 	}
19631     }
19632 
19633   else
19634     {
19635       set_optab_libfunc (add_optab, mode, "_q_add");
19636       set_optab_libfunc (sub_optab, mode, "_q_sub");
19637       set_optab_libfunc (neg_optab, mode, "_q_neg");
19638       set_optab_libfunc (smul_optab, mode, "_q_mul");
19639       set_optab_libfunc (sdiv_optab, mode, "_q_div");
19640       if (TARGET_PPC_GPOPT)
19641 	set_optab_libfunc (sqrt_optab, mode, "_q_sqrt");
19642 
19643       set_optab_libfunc (eq_optab, mode, "_q_feq");
19644       set_optab_libfunc (ne_optab, mode, "_q_fne");
19645       set_optab_libfunc (gt_optab, mode, "_q_fgt");
19646       set_optab_libfunc (ge_optab, mode, "_q_fge");
19647       set_optab_libfunc (lt_optab, mode, "_q_flt");
19648       set_optab_libfunc (le_optab, mode, "_q_fle");
19649 
19650       set_conv_libfunc (sext_optab, mode, SFmode, "_q_stoq");
19651       set_conv_libfunc (sext_optab, mode, DFmode, "_q_dtoq");
19652       set_conv_libfunc (trunc_optab, SFmode, mode, "_q_qtos");
19653       set_conv_libfunc (trunc_optab, DFmode, mode, "_q_qtod");
19654       set_conv_libfunc (sfix_optab, SImode, mode, "_q_qtoi");
19655       set_conv_libfunc (ufix_optab, SImode, mode, "_q_qtou");
19656       set_conv_libfunc (sfloat_optab, mode, SImode, "_q_itoq");
19657       set_conv_libfunc (ufloat_optab, mode, SImode, "_q_utoq");
19658     }
19659 }
19660 
19661 static void
rs6000_init_libfuncs(void)19662 rs6000_init_libfuncs (void)
19663 {
19664   /* __float128 support.  */
19665   if (TARGET_FLOAT128_TYPE)
19666     {
19667       init_float128_ibm (IFmode);
19668       init_float128_ieee (KFmode);
19669     }
19670 
19671   /* AIX/Darwin/64-bit Linux quad floating point routines.  */
19672   if (TARGET_LONG_DOUBLE_128)
19673     {
19674       if (!TARGET_IEEEQUAD)
19675 	init_float128_ibm (TFmode);
19676 
19677       /* IEEE 128-bit including 32-bit SVR4 quad floating point routines.  */
19678       else
19679 	init_float128_ieee (TFmode);
19680     }
19681 }
19682 
19683 
19684 /* Expand a block clear operation, and return 1 if successful.  Return 0
19685    if we should let the compiler generate normal code.
19686 
19687    operands[0] is the destination
19688    operands[1] is the length
19689    operands[3] is the alignment */
19690 
19691 int
expand_block_clear(rtx operands[])19692 expand_block_clear (rtx operands[])
19693 {
19694   rtx orig_dest = operands[0];
19695   rtx bytes_rtx	= operands[1];
19696   rtx align_rtx = operands[3];
19697   bool constp	= (GET_CODE (bytes_rtx) == CONST_INT);
19698   HOST_WIDE_INT align;
19699   HOST_WIDE_INT bytes;
19700   int offset;
19701   int clear_bytes;
19702   int clear_step;
19703 
19704   /* If this is not a fixed size move, just call memcpy */
19705   if (! constp)
19706     return 0;
19707 
19708   /* This must be a fixed size alignment  */
19709   gcc_assert (GET_CODE (align_rtx) == CONST_INT);
19710   align = INTVAL (align_rtx) * BITS_PER_UNIT;
19711 
19712   /* Anything to clear? */
19713   bytes = INTVAL (bytes_rtx);
19714   if (bytes <= 0)
19715     return 1;
19716 
19717   /* Use the builtin memset after a point, to avoid huge code bloat.
19718      When optimize_size, avoid any significant code bloat; calling
19719      memset is about 4 instructions, so allow for one instruction to
19720      load zero and three to do clearing.  */
19721   if (TARGET_ALTIVEC && align >= 128)
19722     clear_step = 16;
19723   else if (TARGET_POWERPC64 && (align >= 64 || !STRICT_ALIGNMENT))
19724     clear_step = 8;
19725   else if (TARGET_SPE && align >= 64)
19726     clear_step = 8;
19727   else
19728     clear_step = 4;
19729 
19730   if (optimize_size && bytes > 3 * clear_step)
19731     return 0;
19732   if (! optimize_size && bytes > 8 * clear_step)
19733     return 0;
19734 
19735   for (offset = 0; bytes > 0; offset += clear_bytes, bytes -= clear_bytes)
19736     {
19737       machine_mode mode = BLKmode;
19738       rtx dest;
19739 
19740       if (bytes >= 16 && TARGET_ALTIVEC && align >= 128)
19741 	{
19742 	  clear_bytes = 16;
19743 	  mode = V4SImode;
19744 	}
19745       else if (bytes >= 8 && TARGET_SPE && align >= 64)
19746         {
19747           clear_bytes = 8;
19748           mode = V2SImode;
19749         }
19750       else if (bytes >= 8 && TARGET_POWERPC64
19751 	       && (align >= 64 || !STRICT_ALIGNMENT))
19752 	{
19753 	  clear_bytes = 8;
19754 	  mode = DImode;
19755 	  if (offset == 0 && align < 64)
19756 	    {
19757 	      rtx addr;
19758 
19759 	      /* If the address form is reg+offset with offset not a
19760 		 multiple of four, reload into reg indirect form here
19761 		 rather than waiting for reload.  This way we get one
19762 		 reload, not one per store.  */
19763 	      addr = XEXP (orig_dest, 0);
19764 	      if ((GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM)
19765 		  && GET_CODE (XEXP (addr, 1)) == CONST_INT
19766 		  && (INTVAL (XEXP (addr, 1)) & 3) != 0)
19767 		{
19768 		  addr = copy_addr_to_reg (addr);
19769 		  orig_dest = replace_equiv_address (orig_dest, addr);
19770 		}
19771 	    }
19772 	}
19773       else if (bytes >= 4 && (align >= 32 || !STRICT_ALIGNMENT))
19774 	{			/* move 4 bytes */
19775 	  clear_bytes = 4;
19776 	  mode = SImode;
19777 	}
19778       else if (bytes >= 2 && (align >= 16 || !STRICT_ALIGNMENT))
19779 	{			/* move 2 bytes */
19780 	  clear_bytes = 2;
19781 	  mode = HImode;
19782 	}
19783       else /* move 1 byte at a time */
19784 	{
19785 	  clear_bytes = 1;
19786 	  mode = QImode;
19787 	}
19788 
19789       dest = adjust_address (orig_dest, mode, offset);
19790 
19791       emit_move_insn (dest, CONST0_RTX (mode));
19792     }
19793 
19794   return 1;
19795 }
19796 
19797 /* Emit a potentially record-form instruction, setting DST from SRC.
19798    If DOT is 0, that is all; otherwise, set CCREG to the result of the
19799    signed comparison of DST with zero.  If DOT is 1, the generated RTL
19800    doesn't care about the DST result; if DOT is 2, it does.  If CCREG
19801    is CR0 do a single dot insn (as a PARALLEL); otherwise, do a SET and
19802    a separate COMPARE.  */
19803 
19804 static void
rs6000_emit_dot_insn(rtx dst,rtx src,int dot,rtx ccreg)19805 rs6000_emit_dot_insn (rtx dst, rtx src, int dot, rtx ccreg)
19806 {
19807   if (dot == 0)
19808     {
19809       emit_move_insn (dst, src);
19810       return;
19811     }
19812 
19813   if (cc_reg_not_cr0_operand (ccreg, CCmode))
19814     {
19815       emit_move_insn (dst, src);
19816       emit_move_insn (ccreg, gen_rtx_COMPARE (CCmode, dst, const0_rtx));
19817       return;
19818     }
19819 
19820   rtx ccset = gen_rtx_SET (ccreg, gen_rtx_COMPARE (CCmode, src, const0_rtx));
19821   if (dot == 1)
19822     {
19823       rtx clobber = gen_rtx_CLOBBER (VOIDmode, dst);
19824       emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, ccset, clobber)));
19825     }
19826   else
19827     {
19828       rtx set = gen_rtx_SET (dst, src);
19829       emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, ccset, set)));
19830     }
19831 }
19832 
19833 /* Figure out the correct instructions to generate to load data for
19834    block compare.  MODE is used for the read from memory, and
19835    data is zero extended if REG is wider than MODE.  If LE code
19836    is being generated, bswap loads are used.
19837 
19838    REG is the destination register to move the data into.
19839    MEM is the memory block being read.
19840    MODE is the mode of memory to use for the read.  */
19841 static void
do_load_for_compare(rtx reg,rtx mem,machine_mode mode)19842 do_load_for_compare (rtx reg, rtx mem, machine_mode mode)
19843 {
19844   switch (GET_MODE (reg))
19845     {
19846     case E_DImode:
19847       switch (mode)
19848 	{
19849 	case E_QImode:
19850 	  emit_insn (gen_zero_extendqidi2 (reg, mem));
19851 	  break;
19852 	case E_HImode:
19853 	  {
19854 	    rtx src = mem;
19855 	    if (!BYTES_BIG_ENDIAN)
19856 	      {
19857 		src = gen_reg_rtx (HImode);
19858 		emit_insn (gen_bswaphi2 (src, mem));
19859 	      }
19860 	    emit_insn (gen_zero_extendhidi2 (reg, src));
19861 	    break;
19862 	  }
19863 	case E_SImode:
19864 	  {
19865 	    rtx src = mem;
19866 	    if (!BYTES_BIG_ENDIAN)
19867 	      {
19868 		src = gen_reg_rtx (SImode);
19869 		emit_insn (gen_bswapsi2 (src, mem));
19870 	      }
19871 	    emit_insn (gen_zero_extendsidi2 (reg, src));
19872 	  }
19873 	  break;
19874 	case E_DImode:
19875 	  if (!BYTES_BIG_ENDIAN)
19876 	    emit_insn (gen_bswapdi2 (reg, mem));
19877 	  else
19878 	    emit_insn (gen_movdi (reg, mem));
19879 	  break;
19880 	default:
19881 	  gcc_unreachable ();
19882 	}
19883       break;
19884 
19885     case E_SImode:
19886       switch (mode)
19887 	{
19888 	case E_QImode:
19889 	  emit_insn (gen_zero_extendqisi2 (reg, mem));
19890 	  break;
19891 	case E_HImode:
19892 	  {
19893 	    rtx src = mem;
19894 	    if (!BYTES_BIG_ENDIAN)
19895 	      {
19896 		src = gen_reg_rtx (HImode);
19897 		emit_insn (gen_bswaphi2 (src, mem));
19898 	      }
19899 	    emit_insn (gen_zero_extendhisi2 (reg, src));
19900 	    break;
19901 	  }
19902 	case E_SImode:
19903 	  if (!BYTES_BIG_ENDIAN)
19904 	    emit_insn (gen_bswapsi2 (reg, mem));
19905 	  else
19906 	    emit_insn (gen_movsi (reg, mem));
19907 	  break;
19908 	case E_DImode:
19909 	  /* DImode is larger than the destination reg so is not expected.  */
19910 	  gcc_unreachable ();
19911 	  break;
19912 	default:
19913 	  gcc_unreachable ();
19914 	}
19915       break;
19916     default:
19917       gcc_unreachable ();
19918       break;
19919     }
19920 }
19921 
19922 /* Select the mode to be used for reading the next chunk of bytes
19923    in the compare.
19924 
19925    OFFSET is the current read offset from the beginning of the block.
19926    BYTES is the number of bytes remaining to be read.
19927    ALIGN is the minimum alignment of the memory blocks being compared in bytes.
19928    WORD_MODE_OK indicates using WORD_MODE is allowed, else SImode is
19929    the largest allowable mode.  */
19930 static machine_mode
select_block_compare_mode(unsigned HOST_WIDE_INT offset,unsigned HOST_WIDE_INT bytes,unsigned HOST_WIDE_INT align,bool word_mode_ok)19931 select_block_compare_mode (unsigned HOST_WIDE_INT offset,
19932 			   unsigned HOST_WIDE_INT bytes,
19933 			   unsigned HOST_WIDE_INT align, bool word_mode_ok)
19934 {
19935   /* First see if we can do a whole load unit
19936      as that will be more efficient than a larger load + shift.  */
19937 
19938   /* If big, use biggest chunk.
19939      If exactly chunk size, use that size.
19940      If remainder can be done in one piece with shifting, do that.
19941      Do largest chunk possible without violating alignment rules.  */
19942 
19943   /* The most we can read without potential page crossing.  */
19944   unsigned HOST_WIDE_INT maxread = ROUND_UP (bytes, align);
19945 
19946   if (word_mode_ok && bytes >= UNITS_PER_WORD)
19947     return word_mode;
19948   else if (bytes == GET_MODE_SIZE (SImode))
19949     return SImode;
19950   else if (bytes == GET_MODE_SIZE (HImode))
19951     return HImode;
19952   else if (bytes == GET_MODE_SIZE (QImode))
19953     return QImode;
19954   else if (bytes < GET_MODE_SIZE (SImode)
19955 	   && offset >= GET_MODE_SIZE (SImode) - bytes)
19956     /* This matches the case were we have SImode and 3 bytes
19957        and offset >= 1 and permits us to move back one and overlap
19958        with the previous read, thus avoiding having to shift
19959        unwanted bytes off of the input.  */
19960     return SImode;
19961   else if (word_mode_ok && bytes < UNITS_PER_WORD
19962 	   && offset >= UNITS_PER_WORD-bytes)
19963     /* Similarly, if we can use DImode it will get matched here and
19964        can do an overlapping read that ends at the end of the block.  */
19965     return word_mode;
19966   else if (word_mode_ok && maxread >= UNITS_PER_WORD)
19967     /* It is safe to do all remaining in one load of largest size,
19968        possibly with a shift to get rid of unwanted bytes.  */
19969     return word_mode;
19970   else if (maxread >= GET_MODE_SIZE (SImode))
19971     /* It is safe to do all remaining in one SImode load,
19972        possibly with a shift to get rid of unwanted bytes.  */
19973     return SImode;
19974   else if (bytes > GET_MODE_SIZE (SImode))
19975     return SImode;
19976   else if (bytes > GET_MODE_SIZE (HImode))
19977     return HImode;
19978 
19979   /* final fallback is do one byte */
19980   return QImode;
19981 }
19982 
19983 /* Compute the alignment of pointer+OFFSET where the original alignment
19984    of pointer was BASE_ALIGN.  */
19985 static unsigned HOST_WIDE_INT
compute_current_alignment(unsigned HOST_WIDE_INT base_align,unsigned HOST_WIDE_INT offset)19986 compute_current_alignment (unsigned HOST_WIDE_INT base_align,
19987 			   unsigned HOST_WIDE_INT offset)
19988 {
19989   if (offset == 0)
19990     return base_align;
19991   return min (base_align, offset & -offset);
19992 }
19993 
19994 /* Expand a block compare operation, and return true if successful.
19995    Return false if we should let the compiler generate normal code,
19996    probably a memcmp call.
19997 
19998    OPERANDS[0] is the target (result).
19999    OPERANDS[1] is the first source.
20000    OPERANDS[2] is the second source.
20001    OPERANDS[3] is the length.
20002    OPERANDS[4] is the alignment.  */
20003 bool
expand_block_compare(rtx operands[])20004 expand_block_compare (rtx operands[])
20005 {
20006   rtx target = operands[0];
20007   rtx orig_src1 = operands[1];
20008   rtx orig_src2 = operands[2];
20009   rtx bytes_rtx = operands[3];
20010   rtx align_rtx = operands[4];
20011   HOST_WIDE_INT cmp_bytes = 0;
20012   rtx src1 = orig_src1;
20013   rtx src2 = orig_src2;
20014 
20015   /* This case is complicated to handle because the subtract
20016      with carry instructions do not generate the 64-bit
20017      carry and so we must emit code to calculate it ourselves.
20018      We choose not to implement this yet.  */
20019   if (TARGET_32BIT && TARGET_POWERPC64)
20020     return false;
20021 
20022   /* If this is not a fixed size compare, just call memcmp.  */
20023   if (!CONST_INT_P (bytes_rtx))
20024     return false;
20025 
20026   /* This must be a fixed size alignment.  */
20027   if (!CONST_INT_P (align_rtx))
20028     return false;
20029 
20030   unsigned int base_align = UINTVAL (align_rtx) / BITS_PER_UNIT;
20031 
20032   /* rs6000_slow_unaligned_access -- don't do unaligned stuff.  */
20033   if (rs6000_slow_unaligned_access (word_mode, MEM_ALIGN (orig_src1))
20034       || rs6000_slow_unaligned_access (word_mode, MEM_ALIGN (orig_src2)))
20035     return false;
20036 
20037   gcc_assert (GET_MODE (target) == SImode);
20038 
20039   /* Anything to move?  */
20040   unsigned HOST_WIDE_INT bytes = UINTVAL (bytes_rtx);
20041   if (bytes == 0)
20042     return true;
20043 
20044   /* The code generated for p7 and older is not faster than glibc
20045      memcmp if alignment is small and length is not short, so bail
20046      out to avoid those conditions.  */
20047   if (!TARGET_EFFICIENT_OVERLAPPING_UNALIGNED
20048       && ((base_align == 1 && bytes > 16)
20049 	  || (base_align == 2 && bytes > 32)))
20050     return false;
20051 
20052   rtx tmp_reg_src1 = gen_reg_rtx (word_mode);
20053   rtx tmp_reg_src2 = gen_reg_rtx (word_mode);
20054   /* P7/P8 code uses cond for subfc. but P9 uses
20055      it for cmpld which needs CCUNSmode. */
20056   rtx cond;
20057   if (TARGET_P9_MISC)
20058     cond = gen_reg_rtx (CCUNSmode);
20059   else
20060     cond = gen_reg_rtx (CCmode);
20061 
20062   /* If we have an LE target without ldbrx and word_mode is DImode,
20063      then we must avoid using word_mode.  */
20064   int word_mode_ok = !(!BYTES_BIG_ENDIAN && !TARGET_LDBRX
20065 		       && word_mode == DImode);
20066 
20067   /* Strategy phase.  How many ops will this take and should we expand it?  */
20068 
20069   unsigned HOST_WIDE_INT offset = 0;
20070   machine_mode load_mode =
20071     select_block_compare_mode (offset, bytes, base_align, word_mode_ok);
20072   unsigned int load_mode_size = GET_MODE_SIZE (load_mode);
20073 
20074   /* We don't want to generate too much code.  */
20075   unsigned HOST_WIDE_INT max_bytes =
20076     load_mode_size * (unsigned HOST_WIDE_INT) rs6000_block_compare_inline_limit;
20077   if (!IN_RANGE (bytes, 1, max_bytes))
20078     return false;
20079 
20080   bool generate_6432_conversion = false;
20081   rtx convert_label = NULL;
20082   rtx final_label = NULL;
20083 
20084   /* Example of generated code for 18 bytes aligned 1 byte.
20085      Compiled with -fno-reorder-blocks for clarity.
20086              ldbrx 10,31,8
20087              ldbrx 9,7,8
20088              subfc. 9,9,10
20089              bne 0,.L6487
20090              addi 9,12,8
20091              addi 5,11,8
20092              ldbrx 10,0,9
20093              ldbrx 9,0,5
20094              subfc. 9,9,10
20095              bne 0,.L6487
20096              addi 9,12,16
20097              lhbrx 10,0,9
20098              addi 9,11,16
20099              lhbrx 9,0,9
20100              subf 9,9,10
20101              b .L6488
20102              .p2align 4,,15
20103      .L6487: #convert_label
20104              popcntd 9,9
20105              subfe 10,10,10
20106              or 9,9,10
20107      .L6488: #final_label
20108              extsw 10,9
20109 
20110      We start off with DImode for two blocks that jump to the DI->SI conversion
20111      if the difference is found there, then a final block of HImode that skips
20112      the DI->SI conversion.  */
20113 
20114   while (bytes > 0)
20115     {
20116       unsigned int align = compute_current_alignment (base_align, offset);
20117       if (TARGET_EFFICIENT_OVERLAPPING_UNALIGNED)
20118 	load_mode = select_block_compare_mode (offset, bytes, align,
20119 					       word_mode_ok);
20120       else
20121 	load_mode = select_block_compare_mode (0, bytes, align, word_mode_ok);
20122       load_mode_size = GET_MODE_SIZE (load_mode);
20123       if (bytes >= load_mode_size)
20124 	cmp_bytes = load_mode_size;
20125       else if (TARGET_EFFICIENT_OVERLAPPING_UNALIGNED)
20126 	{
20127 	  /* Move this load back so it doesn't go past the end.
20128 	     P8/P9 can do this efficiently.  */
20129 	  unsigned int extra_bytes = load_mode_size - bytes;
20130 	  cmp_bytes = bytes;
20131 	  if (extra_bytes < offset)
20132 	    {
20133 	      offset -= extra_bytes;
20134 	      cmp_bytes = load_mode_size;
20135 	      bytes = cmp_bytes;
20136 	    }
20137 	}
20138       else
20139 	/* P7 and earlier can't do the overlapping load trick fast,
20140 	   so this forces a non-overlapping load and a shift to get
20141 	   rid of the extra bytes.  */
20142 	cmp_bytes = bytes;
20143 
20144       src1 = adjust_address (orig_src1, load_mode, offset);
20145       src2 = adjust_address (orig_src2, load_mode, offset);
20146 
20147       if (!REG_P (XEXP (src1, 0)))
20148 	{
20149 	  rtx src1_reg = copy_addr_to_reg (XEXP (src1, 0));
20150 	  src1 = replace_equiv_address (src1, src1_reg);
20151 	}
20152       set_mem_size (src1, cmp_bytes);
20153 
20154       if (!REG_P (XEXP (src2, 0)))
20155 	{
20156 	  rtx src2_reg = copy_addr_to_reg (XEXP (src2, 0));
20157 	  src2 = replace_equiv_address (src2, src2_reg);
20158 	}
20159       set_mem_size (src2, cmp_bytes);
20160 
20161       do_load_for_compare (tmp_reg_src1, src1, load_mode);
20162       do_load_for_compare (tmp_reg_src2, src2, load_mode);
20163 
20164       if (cmp_bytes < load_mode_size)
20165 	{
20166 	  /* Shift unneeded bytes off.  */
20167 	  rtx sh = GEN_INT (BITS_PER_UNIT * (load_mode_size - cmp_bytes));
20168 	  if (word_mode == DImode)
20169 	    {
20170 	      emit_insn (gen_lshrdi3 (tmp_reg_src1, tmp_reg_src1, sh));
20171 	      emit_insn (gen_lshrdi3 (tmp_reg_src2, tmp_reg_src2, sh));
20172 	    }
20173 	  else
20174 	    {
20175 	      emit_insn (gen_lshrsi3 (tmp_reg_src1, tmp_reg_src1, sh));
20176 	      emit_insn (gen_lshrsi3 (tmp_reg_src2, tmp_reg_src2, sh));
20177 	    }
20178 	}
20179 
20180       int remain = bytes - cmp_bytes;
20181       if (GET_MODE_SIZE (GET_MODE (target)) > GET_MODE_SIZE (load_mode))
20182 	{
20183 	  /* Target is larger than load size so we don't need to
20184 	     reduce result size.  */
20185 
20186 	  /* We previously did a block that need 64->32 conversion but
20187 	     the current block does not, so a label is needed to jump
20188 	     to the end.  */
20189 	  if (generate_6432_conversion && !final_label)
20190 	    final_label = gen_label_rtx ();
20191 
20192 	  if (remain > 0)
20193 	    {
20194 	      /* This is not the last block, branch to the end if the result
20195 		 of this subtract is not zero.  */
20196 	      if (!final_label)
20197 		final_label = gen_label_rtx ();
20198 	      rtx fin_ref = gen_rtx_LABEL_REF (VOIDmode, final_label);
20199 	      rtx tmp = gen_rtx_MINUS (word_mode, tmp_reg_src1, tmp_reg_src2);
20200 	      rtx cr = gen_reg_rtx (CCmode);
20201 	      rs6000_emit_dot_insn (tmp_reg_src2, tmp, 2, cr);
20202 	      emit_insn (gen_movsi (target,
20203 				    gen_lowpart (SImode, tmp_reg_src2)));
20204 	      rtx ne_rtx = gen_rtx_NE (VOIDmode, cr, const0_rtx);
20205 	      rtx ifelse = gen_rtx_IF_THEN_ELSE (VOIDmode, ne_rtx,
20206 						 fin_ref, pc_rtx);
20207 	      rtx j = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse));
20208 	      JUMP_LABEL (j) = final_label;
20209 	      LABEL_NUSES (final_label) += 1;
20210 	    }
20211 	  else
20212 	    {
20213 	      if (word_mode == DImode)
20214 		{
20215 		  emit_insn (gen_subdi3 (tmp_reg_src2, tmp_reg_src1,
20216 					 tmp_reg_src2));
20217 		  emit_insn (gen_movsi (target,
20218 					gen_lowpart (SImode, tmp_reg_src2)));
20219 		}
20220 	      else
20221 		emit_insn (gen_subsi3 (target, tmp_reg_src1, tmp_reg_src2));
20222 
20223 	      if (final_label)
20224 		{
20225 		  rtx fin_ref = gen_rtx_LABEL_REF (VOIDmode, final_label);
20226 		  rtx j = emit_jump_insn (gen_rtx_SET (pc_rtx, fin_ref));
20227 		  JUMP_LABEL(j) = final_label;
20228 		  LABEL_NUSES (final_label) += 1;
20229 		  emit_barrier ();
20230 		}
20231 	    }
20232 	}
20233       else
20234 	{
20235 	  /* Do we need a 64->32 conversion block? We need the 64->32
20236 	     conversion even if target size == load_mode size because
20237 	     the subtract generates one extra bit.  */
20238 	  generate_6432_conversion = true;
20239 
20240 	  if (remain > 0)
20241 	    {
20242 	      if (!convert_label)
20243 		convert_label = gen_label_rtx ();
20244 
20245 	      /* Compare to zero and branch to convert_label if not zero.  */
20246 	      rtx cvt_ref = gen_rtx_LABEL_REF (VOIDmode, convert_label);
20247 	      if (TARGET_P9_MISC)
20248 		{
20249 		/* Generate a compare, and convert with a setb later.  */
20250 		  rtx cmp = gen_rtx_COMPARE (CCUNSmode, tmp_reg_src1,
20251 					     tmp_reg_src2);
20252 		  emit_insn (gen_rtx_SET (cond, cmp));
20253 		}
20254 	      else
20255 		/* Generate a subfc. and use the longer
20256 		   sequence for conversion.  */
20257 		if (TARGET_64BIT)
20258 		  emit_insn (gen_subfdi3_carry_dot2 (tmp_reg_src2, tmp_reg_src2,
20259 						     tmp_reg_src1, cond));
20260 		else
20261 		  emit_insn (gen_subfsi3_carry_dot2 (tmp_reg_src2, tmp_reg_src2,
20262 						     tmp_reg_src1, cond));
20263 	      rtx ne_rtx = gen_rtx_NE (VOIDmode, cond, const0_rtx);
20264 	      rtx ifelse = gen_rtx_IF_THEN_ELSE (VOIDmode, ne_rtx,
20265 						 cvt_ref, pc_rtx);
20266 	      rtx j = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse));
20267 	      JUMP_LABEL(j) = convert_label;
20268 	      LABEL_NUSES (convert_label) += 1;
20269 	    }
20270 	  else
20271 	    {
20272 	      /* Just do the subtract/compare.  Since this is the last block
20273 		 the convert code will be generated immediately following.  */
20274 	      if (TARGET_P9_MISC)
20275 		{
20276 		  rtx cmp = gen_rtx_COMPARE (CCUNSmode, tmp_reg_src1,
20277 					     tmp_reg_src2);
20278 		  emit_insn (gen_rtx_SET (cond, cmp));
20279 		}
20280 	      else
20281 		if (TARGET_64BIT)
20282 		  emit_insn (gen_subfdi3_carry (tmp_reg_src2, tmp_reg_src2,
20283 						tmp_reg_src1));
20284 		else
20285 		  emit_insn (gen_subfsi3_carry (tmp_reg_src2, tmp_reg_src2,
20286 						tmp_reg_src1));
20287 	    }
20288 	}
20289 
20290       offset += cmp_bytes;
20291       bytes -= cmp_bytes;
20292     }
20293 
20294   if (generate_6432_conversion)
20295     {
20296       if (convert_label)
20297 	emit_label (convert_label);
20298 
20299       /* We need to produce DI result from sub, then convert to target SI
20300 	 while maintaining <0 / ==0 / >0 properties. This sequence works:
20301 	 subfc L,A,B
20302 	 subfe H,H,H
20303 	 popcntd L,L
20304 	 rldimi L,H,6,0
20305 
20306 	 This is an alternate one Segher cooked up if somebody
20307 	 wants to expand this for something that doesn't have popcntd:
20308 	 subfc L,a,b
20309 	 subfe H,x,x
20310 	 addic t,L,-1
20311 	 subfe v,t,L
20312 	 or z,v,H
20313 
20314 	 And finally, p9 can just do this:
20315 	 cmpld A,B
20316 	 setb r */
20317 
20318       if (TARGET_P9_MISC)
20319 	{
20320 	  emit_insn (gen_setb_unsigned (target, cond));
20321 	}
20322       else
20323 	{
20324 	  if (TARGET_64BIT)
20325 	    {
20326 	      rtx tmp_reg_ca = gen_reg_rtx (DImode);
20327 	      emit_insn (gen_subfdi3_carry_in_xx (tmp_reg_ca));
20328 	      emit_insn (gen_popcntddi2 (tmp_reg_src2, tmp_reg_src2));
20329 	      emit_insn (gen_iordi3 (tmp_reg_src2, tmp_reg_src2, tmp_reg_ca));
20330 	      emit_insn (gen_movsi (target, gen_lowpart (SImode, tmp_reg_src2)));
20331 	    }
20332 	  else
20333 	    {
20334 	      rtx tmp_reg_ca = gen_reg_rtx (SImode);
20335 	      emit_insn (gen_subfsi3_carry_in_xx (tmp_reg_ca));
20336 	      emit_insn (gen_popcntdsi2 (tmp_reg_src2, tmp_reg_src2));
20337 	      emit_insn (gen_iorsi3 (target, tmp_reg_src2, tmp_reg_ca));
20338 	    }
20339 	}
20340     }
20341 
20342   if (final_label)
20343     emit_label (final_label);
20344 
20345   gcc_assert (bytes == 0);
20346   return true;
20347 }
20348 
20349 /* Generate alignment check and branch code to set up for
20350    strncmp when we don't have DI alignment.
20351    STRNCMP_LABEL is the label to branch if there is a page crossing.
20352    SRC is the string pointer to be examined.
20353    BYTES is the max number of bytes to compare.  */
20354 static void
expand_strncmp_align_check(rtx strncmp_label,rtx src,HOST_WIDE_INT bytes)20355 expand_strncmp_align_check (rtx strncmp_label, rtx src, HOST_WIDE_INT bytes)
20356 {
20357   rtx lab_ref = gen_rtx_LABEL_REF (VOIDmode, strncmp_label);
20358   rtx src_check = copy_addr_to_reg (XEXP (src, 0));
20359   if (GET_MODE (src_check) == SImode)
20360     emit_insn (gen_andsi3 (src_check, src_check, GEN_INT (0xfff)));
20361   else
20362     emit_insn (gen_anddi3 (src_check, src_check, GEN_INT (0xfff)));
20363   rtx cond = gen_reg_rtx (CCmode);
20364   emit_move_insn (cond, gen_rtx_COMPARE (CCmode, src_check,
20365 					 GEN_INT (4096 - bytes)));
20366 
20367   rtx cmp_rtx = gen_rtx_LT (VOIDmode, cond, const0_rtx);
20368 
20369   rtx ifelse = gen_rtx_IF_THEN_ELSE (VOIDmode, cmp_rtx,
20370 				     pc_rtx, lab_ref);
20371   rtx j = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse));
20372   JUMP_LABEL (j) = strncmp_label;
20373   LABEL_NUSES (strncmp_label) += 1;
20374 }
20375 
20376 /* Expand a string compare operation with length, and return
20377    true if successful. Return false if we should let the
20378    compiler generate normal code, probably a strncmp call.
20379 
20380    OPERANDS[0] is the target (result).
20381    OPERANDS[1] is the first source.
20382    OPERANDS[2] is the second source.
20383    If NO_LENGTH is zero, then:
20384    OPERANDS[3] is the length.
20385    OPERANDS[4] is the alignment in bytes.
20386    If NO_LENGTH is nonzero, then:
20387    OPERANDS[3] is the alignment in bytes.  */
20388 bool
expand_strn_compare(rtx operands[],int no_length)20389 expand_strn_compare (rtx operands[], int no_length)
20390 {
20391   rtx target = operands[0];
20392   rtx orig_src1 = operands[1];
20393   rtx orig_src2 = operands[2];
20394   rtx bytes_rtx, align_rtx;
20395   if (no_length)
20396     {
20397       bytes_rtx = NULL;
20398       align_rtx = operands[3];
20399     }
20400   else
20401     {
20402       bytes_rtx = operands[3];
20403       align_rtx = operands[4];
20404     }
20405   unsigned HOST_WIDE_INT cmp_bytes = 0;
20406   rtx src1 = orig_src1;
20407   rtx src2 = orig_src2;
20408 
20409   /* If we have a length, it must be constant. This simplifies things
20410      a bit as we don't have to generate code to check if we've exceeded
20411      the length. Later this could be expanded to handle this case.  */
20412   if (!no_length && !CONST_INT_P (bytes_rtx))
20413     return false;
20414 
20415   /* This must be a fixed size alignment.  */
20416   if (!CONST_INT_P (align_rtx))
20417     return false;
20418 
20419   unsigned int base_align = UINTVAL (align_rtx);
20420   int align1 = MEM_ALIGN (orig_src1) / BITS_PER_UNIT;
20421   int align2 = MEM_ALIGN (orig_src2) / BITS_PER_UNIT;
20422 
20423   /* rs6000_slow_unaligned_access -- don't do unaligned stuff.  */
20424   if (rs6000_slow_unaligned_access (word_mode, align1)
20425       || rs6000_slow_unaligned_access (word_mode, align2))
20426     return false;
20427 
20428   gcc_assert (GET_MODE (target) == SImode);
20429 
20430   /* If we have an LE target without ldbrx and word_mode is DImode,
20431      then we must avoid using word_mode.  */
20432   int word_mode_ok = !(!BYTES_BIG_ENDIAN && !TARGET_LDBRX
20433 		       && word_mode == DImode);
20434 
20435   unsigned int word_mode_size = GET_MODE_SIZE (word_mode);
20436 
20437   unsigned HOST_WIDE_INT offset = 0;
20438   unsigned HOST_WIDE_INT bytes; /* N from the strncmp args if available.  */
20439   unsigned HOST_WIDE_INT compare_length; /* How much to compare inline.  */
20440   if (no_length)
20441     /* Use this as a standin to determine the mode to use.  */
20442     bytes = rs6000_string_compare_inline_limit * word_mode_size;
20443   else
20444     bytes = UINTVAL (bytes_rtx);
20445 
20446   machine_mode load_mode =
20447     select_block_compare_mode (offset, bytes, base_align, word_mode_ok);
20448   unsigned int load_mode_size = GET_MODE_SIZE (load_mode);
20449   compare_length = rs6000_string_compare_inline_limit * load_mode_size;
20450 
20451   /* If we have equality at the end of the last compare and we have not
20452      found the end of the string, we need to call strcmp/strncmp to
20453      compare the remainder.  */
20454   bool equality_compare_rest = false;
20455 
20456   if (no_length)
20457     {
20458       bytes = compare_length;
20459       equality_compare_rest = true;
20460     }
20461   else
20462     {
20463       if (bytes <= compare_length)
20464 	compare_length = bytes;
20465       else
20466 	equality_compare_rest = true;
20467     }
20468 
20469   rtx result_reg = gen_reg_rtx (word_mode);
20470   rtx final_move_label = gen_label_rtx ();
20471   rtx final_label = gen_label_rtx ();
20472   rtx begin_compare_label = NULL;
20473 
20474   if (base_align < 8)
20475     {
20476       /* Generate code that checks distance to 4k boundary for this case.  */
20477       begin_compare_label = gen_label_rtx ();
20478       rtx strncmp_label = gen_label_rtx ();
20479       rtx jmp;
20480 
20481       /* Strncmp for power8 in glibc does this:
20482 	 rldicl	r8,r3,0,52
20483 	 cmpldi	cr7,r8,4096-16
20484 	 bgt	cr7,L(pagecross) */
20485 
20486       /* Make sure that the length we use for the alignment test and
20487          the subsequent code generation are in agreement so we do not
20488          go past the length we tested for a 4k boundary crossing.  */
20489       unsigned HOST_WIDE_INT align_test = compare_length;
20490       if (align_test < 8)
20491         {
20492           align_test = HOST_WIDE_INT_1U << ceil_log2 (align_test);
20493           base_align = align_test;
20494         }
20495       else
20496         {
20497           align_test = ROUND_UP (align_test, 8);
20498           base_align = 8;
20499         }
20500 
20501       if (align1 < 8)
20502         expand_strncmp_align_check (strncmp_label, src1, align_test);
20503       if (align2 < 8)
20504         expand_strncmp_align_check (strncmp_label, src2, align_test);
20505 
20506       /* Now generate the following sequence:
20507 	 - branch to begin_compare
20508 	 - strncmp_label
20509 	 - call to strncmp
20510 	 - branch to final_label
20511 	 - begin_compare_label */
20512 
20513       rtx cmp_ref = gen_rtx_LABEL_REF (VOIDmode, begin_compare_label);
20514       jmp = emit_jump_insn (gen_rtx_SET (pc_rtx, cmp_ref));
20515       JUMP_LABEL (jmp) = begin_compare_label;
20516       LABEL_NUSES (begin_compare_label) += 1;
20517       emit_barrier ();
20518 
20519       emit_label (strncmp_label);
20520 
20521       if (!REG_P (XEXP (src1, 0)))
20522 	{
20523 	  rtx src1_reg = copy_addr_to_reg (XEXP (src1, 0));
20524 	  src1 = replace_equiv_address (src1, src1_reg);
20525 	}
20526 
20527       if (!REG_P (XEXP (src2, 0)))
20528 	{
20529 	  rtx src2_reg = copy_addr_to_reg (XEXP (src2, 0));
20530 	  src2 = replace_equiv_address (src2, src2_reg);
20531 	}
20532 
20533       if (no_length)
20534 	{
20535 	  tree fun = builtin_decl_explicit (BUILT_IN_STRCMP);
20536 	  emit_library_call_value (XEXP (DECL_RTL (fun), 0),
20537 				   target, LCT_NORMAL, GET_MODE (target),
20538 				   force_reg (Pmode, XEXP (src1, 0)), Pmode,
20539 				   force_reg (Pmode, XEXP (src2, 0)), Pmode);
20540 	}
20541       else
20542 	{
20543 	  /* -m32 -mpowerpc64 results in word_mode being DImode even
20544 	     though otherwise it is 32-bit. The length arg to strncmp
20545 	     is a size_t which will be the same size as pointers.  */
20546 	  rtx len_rtx;
20547 	  if (TARGET_64BIT)
20548 	    len_rtx = gen_reg_rtx (DImode);
20549 	  else
20550 	    len_rtx = gen_reg_rtx (SImode);
20551 
20552 	  emit_move_insn (len_rtx, bytes_rtx);
20553 
20554 	  tree fun = builtin_decl_explicit (BUILT_IN_STRNCMP);
20555 	  emit_library_call_value (XEXP (DECL_RTL (fun), 0),
20556 				   target, LCT_NORMAL, GET_MODE (target),
20557 				   force_reg (Pmode, XEXP (src1, 0)), Pmode,
20558 				   force_reg (Pmode, XEXP (src2, 0)), Pmode,
20559 				   len_rtx, GET_MODE (len_rtx));
20560 	}
20561 
20562       rtx fin_ref = gen_rtx_LABEL_REF (VOIDmode, final_label);
20563       jmp = emit_jump_insn (gen_rtx_SET (pc_rtx, fin_ref));
20564       JUMP_LABEL (jmp) = final_label;
20565       LABEL_NUSES (final_label) += 1;
20566       emit_barrier ();
20567       emit_label (begin_compare_label);
20568     }
20569 
20570   rtx cleanup_label = NULL;
20571   rtx tmp_reg_src1 = gen_reg_rtx (word_mode);
20572   rtx tmp_reg_src2 = gen_reg_rtx (word_mode);
20573 
20574   /* Generate sequence of ld/ldbrx, cmpb to compare out
20575      to the length specified.  */
20576   unsigned HOST_WIDE_INT bytes_to_compare = compare_length;
20577   while (bytes_to_compare > 0)
20578     {
20579       /* Compare sequence:
20580          check each 8B with: ld/ld cmpd bne
20581 	 If equal, use rldicr/cmpb to check for zero byte.
20582          cleanup code at end:
20583          cmpb          get byte that differs
20584          cmpb          look for zero byte
20585          orc           combine
20586          cntlzd        get bit of first zero/diff byte
20587          subfic        convert for rldcl use
20588          rldcl rldcl   extract diff/zero byte
20589          subf          subtract for final result
20590 
20591          The last compare can branch around the cleanup code if the
20592          result is zero because the strings are exactly equal.  */
20593       unsigned int align = compute_current_alignment (base_align, offset);
20594       if (TARGET_EFFICIENT_OVERLAPPING_UNALIGNED)
20595 	load_mode = select_block_compare_mode (offset, bytes_to_compare, align,
20596 					       word_mode_ok);
20597       else
20598 	load_mode = select_block_compare_mode (0, bytes_to_compare, align,
20599 					       word_mode_ok);
20600       load_mode_size = GET_MODE_SIZE (load_mode);
20601       if (bytes_to_compare >= load_mode_size)
20602 	cmp_bytes = load_mode_size;
20603       else if (TARGET_EFFICIENT_OVERLAPPING_UNALIGNED)
20604 	{
20605 	  /* Move this load back so it doesn't go past the end.
20606 	     P8/P9 can do this efficiently.  */
20607 	  unsigned int extra_bytes = load_mode_size - bytes_to_compare;
20608 	  cmp_bytes = bytes_to_compare;
20609 	  if (extra_bytes < offset)
20610 	    {
20611 	      offset -= extra_bytes;
20612 	      cmp_bytes = load_mode_size;
20613 	      bytes_to_compare = cmp_bytes;
20614 	    }
20615 	}
20616       else
20617 	/* P7 and earlier can't do the overlapping load trick fast,
20618 	   so this forces a non-overlapping load and a shift to get
20619 	   rid of the extra bytes.  */
20620 	cmp_bytes = bytes_to_compare;
20621 
20622       src1 = adjust_address (orig_src1, load_mode, offset);
20623       src2 = adjust_address (orig_src2, load_mode, offset);
20624 
20625       if (!REG_P (XEXP (src1, 0)))
20626 	{
20627 	  rtx src1_reg = copy_addr_to_reg (XEXP (src1, 0));
20628 	  src1 = replace_equiv_address (src1, src1_reg);
20629 	}
20630       set_mem_size (src1, cmp_bytes);
20631 
20632       if (!REG_P (XEXP (src2, 0)))
20633 	{
20634 	  rtx src2_reg = copy_addr_to_reg (XEXP (src2, 0));
20635 	  src2 = replace_equiv_address (src2, src2_reg);
20636 	}
20637       set_mem_size (src2, cmp_bytes);
20638 
20639       do_load_for_compare (tmp_reg_src1, src1, load_mode);
20640       do_load_for_compare (tmp_reg_src2, src2, load_mode);
20641 
20642       /* We must always left-align the data we read, and
20643 	 clear any bytes to the right that are beyond the string.
20644 	 Otherwise the cmpb sequence won't produce the correct
20645 	 results.  The beginning of the compare will be done
20646 	 with word_mode so will not have any extra shifts or
20647 	 clear rights.  */
20648 
20649       if (load_mode_size < word_mode_size)
20650 	{
20651 	  /* Rotate left first. */
20652 	  rtx sh = GEN_INT (BITS_PER_UNIT * (word_mode_size - load_mode_size));
20653 	  if (word_mode == DImode)
20654 	    {
20655 	      emit_insn (gen_rotldi3 (tmp_reg_src1, tmp_reg_src1, sh));
20656 	      emit_insn (gen_rotldi3 (tmp_reg_src2, tmp_reg_src2, sh));
20657 	    }
20658 	  else
20659 	    {
20660 	      emit_insn (gen_rotlsi3 (tmp_reg_src1, tmp_reg_src1, sh));
20661 	      emit_insn (gen_rotlsi3 (tmp_reg_src2, tmp_reg_src2, sh));
20662 	    }
20663 	}
20664 
20665       if (cmp_bytes < word_mode_size)
20666 	{
20667 	  /* Now clear right.  This plus the rotate can be
20668 	     turned into a rldicr instruction. */
20669 	  HOST_WIDE_INT mb = BITS_PER_UNIT * (word_mode_size - cmp_bytes);
20670 	  rtx mask = GEN_INT (HOST_WIDE_INT_M1U << mb);
20671 	  if (word_mode == DImode)
20672 	    {
20673 	      emit_insn (gen_anddi3_mask (tmp_reg_src1, tmp_reg_src1, mask));
20674 	      emit_insn (gen_anddi3_mask (tmp_reg_src2, tmp_reg_src2, mask));
20675 	    }
20676 	  else
20677 	    {
20678 	      emit_insn (gen_andsi3_mask (tmp_reg_src1, tmp_reg_src1, mask));
20679 	      emit_insn (gen_andsi3_mask (tmp_reg_src2, tmp_reg_src2, mask));
20680 	    }
20681 	}
20682 
20683       /* Cases to handle.  A and B are chunks of the two strings.
20684 	 1: Not end of comparison:
20685 	 A != B: branch to cleanup code to compute result.
20686 	 A == B: check for 0 byte, next block if not found.
20687 	 2: End of the inline comparison:
20688 	 A != B: branch to cleanup code to compute result.
20689 	 A == B: check for 0 byte, call strcmp/strncmp
20690 	 3: compared requested N bytes:
20691 	 A == B: branch to result 0.
20692 	 A != B: cleanup code to compute result.  */
20693 
20694       unsigned HOST_WIDE_INT remain = bytes_to_compare - cmp_bytes;
20695 
20696       rtx dst_label;
20697       if (remain > 0 || equality_compare_rest)
20698 	{
20699 	  /* Branch to cleanup code, otherwise fall through to do
20700 	     more compares.  */
20701 	  if (!cleanup_label)
20702 	    cleanup_label = gen_label_rtx ();
20703 	  dst_label = cleanup_label;
20704 	}
20705       else
20706 	/* Branch to end and produce result of 0.  */
20707 	dst_label = final_move_label;
20708 
20709       rtx lab_ref = gen_rtx_LABEL_REF (VOIDmode, dst_label);
20710       rtx cond = gen_reg_rtx (CCmode);
20711 
20712       /* Always produce the 0 result, it is needed if
20713 	 cmpb finds a 0 byte in this chunk.  */
20714       rtx tmp = gen_rtx_MINUS (word_mode, tmp_reg_src1, tmp_reg_src2);
20715       rs6000_emit_dot_insn (result_reg, tmp, 1, cond);
20716 
20717       rtx cmp_rtx;
20718       if (remain == 0 && !equality_compare_rest)
20719 	cmp_rtx = gen_rtx_EQ (VOIDmode, cond, const0_rtx);
20720       else
20721 	cmp_rtx = gen_rtx_NE (VOIDmode, cond, const0_rtx);
20722 
20723       rtx ifelse = gen_rtx_IF_THEN_ELSE (VOIDmode, cmp_rtx,
20724 					 lab_ref, pc_rtx);
20725       rtx j = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse));
20726       JUMP_LABEL (j) = dst_label;
20727       LABEL_NUSES (dst_label) += 1;
20728 
20729       if (remain > 0 || equality_compare_rest)
20730 	{
20731 	  /* Generate a cmpb to test for a 0 byte and branch
20732 	     to final result if found.  */
20733 	  rtx cmpb_zero = gen_reg_rtx (word_mode);
20734 	  rtx lab_ref_fin = gen_rtx_LABEL_REF (VOIDmode, final_move_label);
20735 	  rtx condz = gen_reg_rtx (CCmode);
20736 	  rtx zero_reg = gen_reg_rtx (word_mode);
20737 	  if (word_mode == SImode)
20738 	    {
20739 	      emit_insn (gen_movsi (zero_reg, GEN_INT (0)));
20740 	      emit_insn (gen_cmpbsi3 (cmpb_zero, tmp_reg_src1, zero_reg));
20741 	      if (cmp_bytes < word_mode_size)
20742 		{
20743 		  /* Don't want to look at zero bytes past end.  */
20744 		  HOST_WIDE_INT mb =
20745 		    BITS_PER_UNIT * (word_mode_size - cmp_bytes);
20746 		  rtx mask = GEN_INT (HOST_WIDE_INT_M1U << mb);
20747 		  emit_insn (gen_andsi3_mask (cmpb_zero, cmpb_zero, mask));
20748 		}
20749 	    }
20750 	  else
20751 	    {
20752 	      emit_insn (gen_movdi (zero_reg, GEN_INT (0)));
20753 	      emit_insn (gen_cmpbdi3 (cmpb_zero, tmp_reg_src1, zero_reg));
20754 	      if (cmp_bytes < word_mode_size)
20755 		{
20756 		  /* Don't want to look at zero bytes past end.  */
20757 		  HOST_WIDE_INT mb =
20758 		    BITS_PER_UNIT * (word_mode_size - cmp_bytes);
20759 		  rtx mask = GEN_INT (HOST_WIDE_INT_M1U << mb);
20760 		  emit_insn (gen_anddi3_mask (cmpb_zero, cmpb_zero, mask));
20761 		}
20762 	    }
20763 
20764 	  emit_move_insn (condz, gen_rtx_COMPARE (CCmode, cmpb_zero, zero_reg));
20765 	  rtx cmpnz_rtx = gen_rtx_NE (VOIDmode, condz, const0_rtx);
20766 	  rtx ifelse = gen_rtx_IF_THEN_ELSE (VOIDmode, cmpnz_rtx,
20767 					     lab_ref_fin, pc_rtx);
20768 	  rtx j2 = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse));
20769 	  JUMP_LABEL (j2) = final_move_label;
20770 	  LABEL_NUSES (final_move_label) += 1;
20771 
20772 	}
20773 
20774       offset += cmp_bytes;
20775       bytes_to_compare -= cmp_bytes;
20776     }
20777 
20778   if (equality_compare_rest)
20779     {
20780       /* Update pointers past what has been compared already.  */
20781       src1 = adjust_address (orig_src1, load_mode, offset);
20782       src2 = adjust_address (orig_src2, load_mode, offset);
20783 
20784       if (!REG_P (XEXP (src1, 0)))
20785 	{
20786 	  rtx src1_reg = copy_addr_to_reg (XEXP (src1, 0));
20787 	  src1 = replace_equiv_address (src1, src1_reg);
20788 	}
20789       set_mem_size (src1, cmp_bytes);
20790 
20791       if (!REG_P (XEXP (src2, 0)))
20792 	{
20793 	  rtx src2_reg = copy_addr_to_reg (XEXP (src2, 0));
20794 	  src2 = replace_equiv_address (src2, src2_reg);
20795 	}
20796       set_mem_size (src2, cmp_bytes);
20797 
20798       /* Construct call to strcmp/strncmp to compare the rest of the string.  */
20799       if (no_length)
20800 	{
20801 	  tree fun = builtin_decl_explicit (BUILT_IN_STRCMP);
20802 	  emit_library_call_value (XEXP (DECL_RTL (fun), 0),
20803 				   target, LCT_NORMAL, GET_MODE (target),
20804 				   force_reg (Pmode, XEXP (src1, 0)), Pmode,
20805 				   force_reg (Pmode, XEXP (src2, 0)), Pmode);
20806 	}
20807       else
20808 	{
20809 	  rtx len_rtx;
20810 	  if (TARGET_64BIT)
20811 	    len_rtx = gen_reg_rtx (DImode);
20812 	  else
20813 	    len_rtx = gen_reg_rtx (SImode);
20814 
20815 	  emit_move_insn (len_rtx, GEN_INT (bytes - compare_length));
20816 	  tree fun = builtin_decl_explicit (BUILT_IN_STRNCMP);
20817 	  emit_library_call_value (XEXP (DECL_RTL (fun), 0),
20818 				   target, LCT_NORMAL, GET_MODE (target),
20819 				   force_reg (Pmode, XEXP (src1, 0)), Pmode,
20820 				   force_reg (Pmode, XEXP (src2, 0)), Pmode,
20821 				   len_rtx, GET_MODE (len_rtx));
20822 	}
20823 
20824       rtx fin_ref = gen_rtx_LABEL_REF (VOIDmode, final_label);
20825       rtx jmp = emit_jump_insn (gen_rtx_SET (pc_rtx, fin_ref));
20826       JUMP_LABEL (jmp) = final_label;
20827       LABEL_NUSES (final_label) += 1;
20828       emit_barrier ();
20829     }
20830 
20831   if (cleanup_label)
20832     emit_label (cleanup_label);
20833 
20834   /* Generate the final sequence that identifies the differing
20835      byte and generates the final result, taking into account
20836      zero bytes:
20837 
20838      cmpb              cmpb_result1, src1, src2
20839      cmpb              cmpb_result2, src1, zero
20840      orc               cmpb_result1, cmp_result1, cmpb_result2
20841      cntlzd            get bit of first zero/diff byte
20842      addi              convert for rldcl use
20843      rldcl rldcl       extract diff/zero byte
20844      subf              subtract for final result
20845   */
20846 
20847   rtx cmpb_diff = gen_reg_rtx (word_mode);
20848   rtx cmpb_zero = gen_reg_rtx (word_mode);
20849   rtx rot_amt = gen_reg_rtx (word_mode);
20850   rtx zero_reg = gen_reg_rtx (word_mode);
20851 
20852   rtx rot1_1 = gen_reg_rtx (word_mode);
20853   rtx rot1_2 = gen_reg_rtx (word_mode);
20854   rtx rot2_1 = gen_reg_rtx (word_mode);
20855   rtx rot2_2 = gen_reg_rtx (word_mode);
20856 
20857   if (word_mode == SImode)
20858     {
20859       emit_insn (gen_cmpbsi3 (cmpb_diff, tmp_reg_src1, tmp_reg_src2));
20860       emit_insn (gen_movsi (zero_reg, GEN_INT (0)));
20861       emit_insn (gen_cmpbsi3 (cmpb_zero, tmp_reg_src1, zero_reg));
20862       emit_insn (gen_one_cmplsi2 (cmpb_diff,cmpb_diff));
20863       emit_insn (gen_iorsi3 (cmpb_diff, cmpb_diff, cmpb_zero));
20864       emit_insn (gen_clzsi2 (rot_amt, cmpb_diff));
20865       emit_insn (gen_addsi3 (rot_amt, rot_amt, GEN_INT (8)));
20866       emit_insn (gen_rotlsi3 (rot1_1, tmp_reg_src1,
20867 			      gen_lowpart (SImode, rot_amt)));
20868       emit_insn (gen_andsi3_mask (rot1_2, rot1_1, GEN_INT (0xff)));
20869       emit_insn (gen_rotlsi3 (rot2_1, tmp_reg_src2,
20870 			      gen_lowpart (SImode, rot_amt)));
20871       emit_insn (gen_andsi3_mask (rot2_2, rot2_1, GEN_INT (0xff)));
20872       emit_insn (gen_subsi3 (result_reg, rot1_2, rot2_2));
20873     }
20874   else
20875     {
20876       emit_insn (gen_cmpbdi3 (cmpb_diff, tmp_reg_src1, tmp_reg_src2));
20877       emit_insn (gen_movdi (zero_reg, GEN_INT (0)));
20878       emit_insn (gen_cmpbdi3 (cmpb_zero, tmp_reg_src1, zero_reg));
20879       emit_insn (gen_one_cmpldi2 (cmpb_diff,cmpb_diff));
20880       emit_insn (gen_iordi3 (cmpb_diff, cmpb_diff, cmpb_zero));
20881       emit_insn (gen_clzdi2 (rot_amt, cmpb_diff));
20882       emit_insn (gen_adddi3 (rot_amt, rot_amt, GEN_INT (8)));
20883       emit_insn (gen_rotldi3 (rot1_1, tmp_reg_src1,
20884 			      gen_lowpart (SImode, rot_amt)));
20885       emit_insn (gen_anddi3_mask (rot1_2, rot1_1, GEN_INT (0xff)));
20886       emit_insn (gen_rotldi3 (rot2_1, tmp_reg_src2,
20887 			      gen_lowpart (SImode, rot_amt)));
20888       emit_insn (gen_anddi3_mask (rot2_2, rot2_1, GEN_INT (0xff)));
20889       emit_insn (gen_subdi3 (result_reg, rot1_2, rot2_2));
20890     }
20891 
20892   emit_label (final_move_label);
20893   emit_insn (gen_movsi (target,
20894 			gen_lowpart (SImode, result_reg)));
20895   emit_label (final_label);
20896   return true;
20897 }
20898 
20899 /* Expand a block move operation, and return 1 if successful.  Return 0
20900    if we should let the compiler generate normal code.
20901 
20902    operands[0] is the destination
20903    operands[1] is the source
20904    operands[2] is the length
20905    operands[3] is the alignment */
20906 
20907 #define MAX_MOVE_REG 4
20908 
20909 int
expand_block_move(rtx operands[])20910 expand_block_move (rtx operands[])
20911 {
20912   rtx orig_dest = operands[0];
20913   rtx orig_src	= operands[1];
20914   rtx bytes_rtx	= operands[2];
20915   rtx align_rtx = operands[3];
20916   int constp	= (GET_CODE (bytes_rtx) == CONST_INT);
20917   int align;
20918   int bytes;
20919   int offset;
20920   int move_bytes;
20921   rtx stores[MAX_MOVE_REG];
20922   int num_reg = 0;
20923 
20924   /* If this is not a fixed size move, just call memcpy */
20925   if (! constp)
20926     return 0;
20927 
20928   /* This must be a fixed size alignment */
20929   gcc_assert (GET_CODE (align_rtx) == CONST_INT);
20930   align = INTVAL (align_rtx) * BITS_PER_UNIT;
20931 
20932   /* Anything to move? */
20933   bytes = INTVAL (bytes_rtx);
20934   if (bytes <= 0)
20935     return 1;
20936 
20937   if (bytes > rs6000_block_move_inline_limit)
20938     return 0;
20939 
20940   for (offset = 0; bytes > 0; offset += move_bytes, bytes -= move_bytes)
20941     {
20942       union {
20943 	rtx (*movmemsi) (rtx, rtx, rtx, rtx);
20944 	rtx (*mov) (rtx, rtx);
20945       } gen_func;
20946       machine_mode mode = BLKmode;
20947       rtx src, dest;
20948 
20949       /* Altivec first, since it will be faster than a string move
20950 	 when it applies, and usually not significantly larger.  */
20951       if (TARGET_ALTIVEC && bytes >= 16 && align >= 128)
20952 	{
20953 	  move_bytes = 16;
20954 	  mode = V4SImode;
20955 	  gen_func.mov = gen_movv4si;
20956 	}
20957       else if (TARGET_SPE && bytes >= 8 && align >= 64)
20958         {
20959           move_bytes = 8;
20960           mode = V2SImode;
20961           gen_func.mov = gen_movv2si;
20962         }
20963       else if (TARGET_STRING
20964 	  && bytes > 24		/* move up to 32 bytes at a time */
20965 	  && ! fixed_regs[5]
20966 	  && ! fixed_regs[6]
20967 	  && ! fixed_regs[7]
20968 	  && ! fixed_regs[8]
20969 	  && ! fixed_regs[9]
20970 	  && ! fixed_regs[10]
20971 	  && ! fixed_regs[11]
20972 	  && ! fixed_regs[12])
20973 	{
20974 	  move_bytes = (bytes > 32) ? 32 : bytes;
20975 	  gen_func.movmemsi = gen_movmemsi_8reg;
20976 	}
20977       else if (TARGET_STRING
20978 	       && bytes > 16	/* move up to 24 bytes at a time */
20979 	       && ! fixed_regs[5]
20980 	       && ! fixed_regs[6]
20981 	       && ! fixed_regs[7]
20982 	       && ! fixed_regs[8]
20983 	       && ! fixed_regs[9]
20984 	       && ! fixed_regs[10])
20985 	{
20986 	  move_bytes = (bytes > 24) ? 24 : bytes;
20987 	  gen_func.movmemsi = gen_movmemsi_6reg;
20988 	}
20989       else if (TARGET_STRING
20990 	       && bytes > 8	/* move up to 16 bytes at a time */
20991 	       && ! fixed_regs[5]
20992 	       && ! fixed_regs[6]
20993 	       && ! fixed_regs[7]
20994 	       && ! fixed_regs[8])
20995 	{
20996 	  move_bytes = (bytes > 16) ? 16 : bytes;
20997 	  gen_func.movmemsi = gen_movmemsi_4reg;
20998 	}
20999       else if (bytes >= 8 && TARGET_POWERPC64
21000 	       && (align >= 64 || !STRICT_ALIGNMENT))
21001 	{
21002 	  move_bytes = 8;
21003 	  mode = DImode;
21004 	  gen_func.mov = gen_movdi;
21005 	  if (offset == 0 && align < 64)
21006 	    {
21007 	      rtx addr;
21008 
21009 	      /* If the address form is reg+offset with offset not a
21010 		 multiple of four, reload into reg indirect form here
21011 		 rather than waiting for reload.  This way we get one
21012 		 reload, not one per load and/or store.  */
21013 	      addr = XEXP (orig_dest, 0);
21014 	      if ((GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM)
21015 		  && GET_CODE (XEXP (addr, 1)) == CONST_INT
21016 		  && (INTVAL (XEXP (addr, 1)) & 3) != 0)
21017 		{
21018 		  addr = copy_addr_to_reg (addr);
21019 		  orig_dest = replace_equiv_address (orig_dest, addr);
21020 		}
21021 	      addr = XEXP (orig_src, 0);
21022 	      if ((GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM)
21023 		  && GET_CODE (XEXP (addr, 1)) == CONST_INT
21024 		  && (INTVAL (XEXP (addr, 1)) & 3) != 0)
21025 		{
21026 		  addr = copy_addr_to_reg (addr);
21027 		  orig_src = replace_equiv_address (orig_src, addr);
21028 		}
21029 	    }
21030 	}
21031       else if (TARGET_STRING && bytes > 4 && !TARGET_POWERPC64)
21032 	{			/* move up to 8 bytes at a time */
21033 	  move_bytes = (bytes > 8) ? 8 : bytes;
21034 	  gen_func.movmemsi = gen_movmemsi_2reg;
21035 	}
21036       else if (bytes >= 4 && (align >= 32 || !STRICT_ALIGNMENT))
21037 	{			/* move 4 bytes */
21038 	  move_bytes = 4;
21039 	  mode = SImode;
21040 	  gen_func.mov = gen_movsi;
21041 	}
21042       else if (bytes >= 2 && (align >= 16 || !STRICT_ALIGNMENT))
21043 	{			/* move 2 bytes */
21044 	  move_bytes = 2;
21045 	  mode = HImode;
21046 	  gen_func.mov = gen_movhi;
21047 	}
21048       else if (TARGET_STRING && bytes > 1)
21049 	{			/* move up to 4 bytes at a time */
21050 	  move_bytes = (bytes > 4) ? 4 : bytes;
21051 	  gen_func.movmemsi = gen_movmemsi_1reg;
21052 	}
21053       else /* move 1 byte at a time */
21054 	{
21055 	  move_bytes = 1;
21056 	  mode = QImode;
21057 	  gen_func.mov = gen_movqi;
21058 	}
21059 
21060       src = adjust_address (orig_src, mode, offset);
21061       dest = adjust_address (orig_dest, mode, offset);
21062 
21063       if (mode != BLKmode)
21064 	{
21065 	  rtx tmp_reg = gen_reg_rtx (mode);
21066 
21067 	  emit_insn ((*gen_func.mov) (tmp_reg, src));
21068 	  stores[num_reg++] = (*gen_func.mov) (dest, tmp_reg);
21069 	}
21070 
21071       if (mode == BLKmode || num_reg >= MAX_MOVE_REG || bytes == move_bytes)
21072 	{
21073 	  int i;
21074 	  for (i = 0; i < num_reg; i++)
21075 	    emit_insn (stores[i]);
21076 	  num_reg = 0;
21077 	}
21078 
21079       if (mode == BLKmode)
21080 	{
21081 	  /* Move the address into scratch registers.  The movmemsi
21082 	     patterns require zero offset.  */
21083 	  if (!REG_P (XEXP (src, 0)))
21084 	    {
21085 	      rtx src_reg = copy_addr_to_reg (XEXP (src, 0));
21086 	      src = replace_equiv_address (src, src_reg);
21087 	    }
21088 	  set_mem_size (src, move_bytes);
21089 
21090 	  if (!REG_P (XEXP (dest, 0)))
21091 	    {
21092 	      rtx dest_reg = copy_addr_to_reg (XEXP (dest, 0));
21093 	      dest = replace_equiv_address (dest, dest_reg);
21094 	    }
21095 	  set_mem_size (dest, move_bytes);
21096 
21097 	  emit_insn ((*gen_func.movmemsi) (dest, src,
21098 					   GEN_INT (move_bytes & 31),
21099 					   align_rtx));
21100 	}
21101     }
21102 
21103   return 1;
21104 }
21105 
21106 
21107 /* Return a string to perform a load_multiple operation.
21108    operands[0] is the vector.
21109    operands[1] is the source address.
21110    operands[2] is the first destination register.  */
21111 
21112 const char *
rs6000_output_load_multiple(rtx operands[3])21113 rs6000_output_load_multiple (rtx operands[3])
21114 {
21115   /* We have to handle the case where the pseudo used to contain the address
21116      is assigned to one of the output registers.  */
21117   int i, j;
21118   int words = XVECLEN (operands[0], 0);
21119   rtx xop[10];
21120 
21121   if (XVECLEN (operands[0], 0) == 1)
21122     return "lwz %2,0(%1)";
21123 
21124   for (i = 0; i < words; i++)
21125     if (refers_to_regno_p (REGNO (operands[2]) + i, operands[1]))
21126       {
21127 	if (i == words-1)
21128 	  {
21129 	    xop[0] = GEN_INT (4 * (words-1));
21130 	    xop[1] = operands[1];
21131 	    xop[2] = operands[2];
21132 	    output_asm_insn ("lswi %2,%1,%0\n\tlwz %1,%0(%1)", xop);
21133 	    return "";
21134 	  }
21135 	else if (i == 0)
21136 	  {
21137 	    xop[0] = GEN_INT (4 * (words-1));
21138 	    xop[1] = operands[1];
21139 	    xop[2] = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
21140 	    output_asm_insn ("addi %1,%1,4\n\tlswi %2,%1,%0\n\tlwz %1,-4(%1)", xop);
21141 	    return "";
21142 	  }
21143 	else
21144 	  {
21145 	    for (j = 0; j < words; j++)
21146 	      if (j != i)
21147 		{
21148 		  xop[0] = GEN_INT (j * 4);
21149 		  xop[1] = operands[1];
21150 		  xop[2] = gen_rtx_REG (SImode, REGNO (operands[2]) + j);
21151 		  output_asm_insn ("lwz %2,%0(%1)", xop);
21152 		}
21153 	    xop[0] = GEN_INT (i * 4);
21154 	    xop[1] = operands[1];
21155 	    output_asm_insn ("lwz %1,%0(%1)", xop);
21156 	    return "";
21157 	  }
21158       }
21159 
21160   return "lswi %2,%1,%N0";
21161 }
21162 
21163 
21164 /* A validation routine: say whether CODE, a condition code, and MODE
21165    match.  The other alternatives either don't make sense or should
21166    never be generated.  */
21167 
21168 void
validate_condition_mode(enum rtx_code code,machine_mode mode)21169 validate_condition_mode (enum rtx_code code, machine_mode mode)
21170 {
21171   gcc_assert ((GET_RTX_CLASS (code) == RTX_COMPARE
21172 	       || GET_RTX_CLASS (code) == RTX_COMM_COMPARE)
21173 	      && GET_MODE_CLASS (mode) == MODE_CC);
21174 
21175   /* These don't make sense.  */
21176   gcc_assert ((code != GT && code != LT && code != GE && code != LE)
21177 	      || mode != CCUNSmode);
21178 
21179   gcc_assert ((code != GTU && code != LTU && code != GEU && code != LEU)
21180 	      || mode == CCUNSmode);
21181 
21182   gcc_assert (mode == CCFPmode
21183 	      || (code != ORDERED && code != UNORDERED
21184 		  && code != UNEQ && code != LTGT
21185 		  && code != UNGT && code != UNLT
21186 		  && code != UNGE && code != UNLE));
21187 
21188   /* These should never be generated except for
21189      flag_finite_math_only.  */
21190   gcc_assert (mode != CCFPmode
21191 	      || flag_finite_math_only
21192 	      || (code != LE && code != GE
21193 		  && code != UNEQ && code != LTGT
21194 		  && code != UNGT && code != UNLT));
21195 
21196   /* These are invalid; the information is not there.  */
21197   gcc_assert (mode != CCEQmode || code == EQ || code == NE);
21198 }
21199 
21200 
21201 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm,
21202    rldicl, rldicr, or rldic instruction in mode MODE.  If so, if E is
21203    not zero, store there the bit offset (counted from the right) where
21204    the single stretch of 1 bits begins; and similarly for B, the bit
21205    offset where it ends.  */
21206 
21207 bool
rs6000_is_valid_mask(rtx mask,int * b,int * e,machine_mode mode)21208 rs6000_is_valid_mask (rtx mask, int *b, int *e, machine_mode mode)
21209 {
21210   unsigned HOST_WIDE_INT val = INTVAL (mask);
21211   unsigned HOST_WIDE_INT bit;
21212   int nb, ne;
21213   int n = GET_MODE_PRECISION (mode);
21214 
21215   if (mode != DImode && mode != SImode)
21216     return false;
21217 
21218   if (INTVAL (mask) >= 0)
21219     {
21220       bit = val & -val;
21221       ne = exact_log2 (bit);
21222       nb = exact_log2 (val + bit);
21223     }
21224   else if (val + 1 == 0)
21225     {
21226       nb = n;
21227       ne = 0;
21228     }
21229   else if (val & 1)
21230     {
21231       val = ~val;
21232       bit = val & -val;
21233       nb = exact_log2 (bit);
21234       ne = exact_log2 (val + bit);
21235     }
21236   else
21237     {
21238       bit = val & -val;
21239       ne = exact_log2 (bit);
21240       if (val + bit == 0)
21241 	nb = n;
21242       else
21243 	nb = 0;
21244     }
21245 
21246   nb--;
21247 
21248   if (nb < 0 || ne < 0 || nb >= n || ne >= n)
21249     return false;
21250 
21251   if (b)
21252     *b = nb;
21253   if (e)
21254     *e = ne;
21255 
21256   return true;
21257 }
21258 
21259 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm, rldicl,
21260    or rldicr instruction, to implement an AND with it in mode MODE.  */
21261 
21262 bool
rs6000_is_valid_and_mask(rtx mask,machine_mode mode)21263 rs6000_is_valid_and_mask (rtx mask, machine_mode mode)
21264 {
21265   int nb, ne;
21266 
21267   if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
21268     return false;
21269 
21270   /* For DImode, we need a rldicl, rldicr, or a rlwinm with mask that
21271      does not wrap.  */
21272   if (mode == DImode)
21273     return (ne == 0 || nb == 63 || (nb < 32 && ne <= nb));
21274 
21275   /* For SImode, rlwinm can do everything.  */
21276   if (mode == SImode)
21277     return (nb < 32 && ne < 32);
21278 
21279   return false;
21280 }
21281 
21282 /* Return the instruction template for an AND with mask in mode MODE, with
21283    operands OPERANDS.  If DOT is true, make it a record-form instruction.  */
21284 
21285 const char *
rs6000_insn_for_and_mask(machine_mode mode,rtx * operands,bool dot)21286 rs6000_insn_for_and_mask (machine_mode mode, rtx *operands, bool dot)
21287 {
21288   int nb, ne;
21289 
21290   if (!rs6000_is_valid_mask (operands[2], &nb, &ne, mode))
21291     gcc_unreachable ();
21292 
21293   if (mode == DImode && ne == 0)
21294     {
21295       operands[3] = GEN_INT (63 - nb);
21296       if (dot)
21297 	return "rldicl. %0,%1,0,%3";
21298       return "rldicl %0,%1,0,%3";
21299     }
21300 
21301   if (mode == DImode && nb == 63)
21302     {
21303       operands[3] = GEN_INT (63 - ne);
21304       if (dot)
21305 	return "rldicr. %0,%1,0,%3";
21306       return "rldicr %0,%1,0,%3";
21307     }
21308 
21309   if (nb < 32 && ne < 32)
21310     {
21311       operands[3] = GEN_INT (31 - nb);
21312       operands[4] = GEN_INT (31 - ne);
21313       if (dot)
21314 	return "rlwinm. %0,%1,0,%3,%4";
21315       return "rlwinm %0,%1,0,%3,%4";
21316     }
21317 
21318   gcc_unreachable ();
21319 }
21320 
21321 /* Return whether MASK (a CONST_INT) is a valid mask for any rlw[i]nm,
21322    rld[i]cl, rld[i]cr, or rld[i]c instruction, to implement an AND with
21323    shift SHIFT (a ROTATE, ASHIFT, or LSHIFTRT) in mode MODE.  */
21324 
21325 bool
rs6000_is_valid_shift_mask(rtx mask,rtx shift,machine_mode mode)21326 rs6000_is_valid_shift_mask (rtx mask, rtx shift, machine_mode mode)
21327 {
21328   int nb, ne;
21329 
21330   if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
21331     return false;
21332 
21333   int n = GET_MODE_PRECISION (mode);
21334   int sh = -1;
21335 
21336   if (CONST_INT_P (XEXP (shift, 1)))
21337     {
21338       sh = INTVAL (XEXP (shift, 1));
21339       if (sh < 0 || sh >= n)
21340 	return false;
21341     }
21342 
21343   rtx_code code = GET_CODE (shift);
21344 
21345   /* Convert any shift by 0 to a rotate, to simplify below code.  */
21346   if (sh == 0)
21347     code = ROTATE;
21348 
21349   /* Convert rotate to simple shift if we can, to make analysis simpler.  */
21350   if (code == ROTATE && sh >= 0 && nb >= ne && ne >= sh)
21351     code = ASHIFT;
21352   if (code == ROTATE && sh >= 0 && nb >= ne && nb < sh)
21353     {
21354       code = LSHIFTRT;
21355       sh = n - sh;
21356     }
21357 
21358   /* DImode rotates need rld*.  */
21359   if (mode == DImode && code == ROTATE)
21360     return (nb == 63 || ne == 0 || ne == sh);
21361 
21362   /* SImode rotates need rlw*.  */
21363   if (mode == SImode && code == ROTATE)
21364     return (nb < 32 && ne < 32 && sh < 32);
21365 
21366   /* Wrap-around masks are only okay for rotates.  */
21367   if (ne > nb)
21368     return false;
21369 
21370   /* Variable shifts are only okay for rotates.  */
21371   if (sh < 0)
21372     return false;
21373 
21374   /* Don't allow ASHIFT if the mask is wrong for that.  */
21375   if (code == ASHIFT && ne < sh)
21376     return false;
21377 
21378   /* If we can do it with an rlw*, we can do it.  Don't allow LSHIFTRT
21379      if the mask is wrong for that.  */
21380   if (nb < 32 && ne < 32 && sh < 32
21381       && !(code == LSHIFTRT && nb >= 32 - sh))
21382     return true;
21383 
21384   /* If we can do it with an rld*, we can do it.  Don't allow LSHIFTRT
21385      if the mask is wrong for that.  */
21386   if (code == LSHIFTRT)
21387     sh = 64 - sh;
21388   if (nb == 63 || ne == 0 || ne == sh)
21389     return !(code == LSHIFTRT && nb >= sh);
21390 
21391   return false;
21392 }
21393 
21394 /* Return the instruction template for a shift with mask in mode MODE, with
21395    operands OPERANDS.  If DOT is true, make it a record-form instruction.  */
21396 
21397 const char *
rs6000_insn_for_shift_mask(machine_mode mode,rtx * operands,bool dot)21398 rs6000_insn_for_shift_mask (machine_mode mode, rtx *operands, bool dot)
21399 {
21400   int nb, ne;
21401 
21402   if (!rs6000_is_valid_mask (operands[3], &nb, &ne, mode))
21403     gcc_unreachable ();
21404 
21405   if (mode == DImode && ne == 0)
21406     {
21407       if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
21408 	operands[2] = GEN_INT (64 - INTVAL (operands[2]));
21409       operands[3] = GEN_INT (63 - nb);
21410       if (dot)
21411 	return "rld%I2cl. %0,%1,%2,%3";
21412       return "rld%I2cl %0,%1,%2,%3";
21413     }
21414 
21415   if (mode == DImode && nb == 63)
21416     {
21417       operands[3] = GEN_INT (63 - ne);
21418       if (dot)
21419 	return "rld%I2cr. %0,%1,%2,%3";
21420       return "rld%I2cr %0,%1,%2,%3";
21421     }
21422 
21423   if (mode == DImode
21424       && GET_CODE (operands[4]) != LSHIFTRT
21425       && CONST_INT_P (operands[2])
21426       && ne == INTVAL (operands[2]))
21427     {
21428       operands[3] = GEN_INT (63 - nb);
21429       if (dot)
21430 	return "rld%I2c. %0,%1,%2,%3";
21431       return "rld%I2c %0,%1,%2,%3";
21432     }
21433 
21434   if (nb < 32 && ne < 32)
21435     {
21436       if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
21437 	operands[2] = GEN_INT (32 - INTVAL (operands[2]));
21438       operands[3] = GEN_INT (31 - nb);
21439       operands[4] = GEN_INT (31 - ne);
21440       /* This insn can also be a 64-bit rotate with mask that really makes
21441 	 it just a shift right (with mask); the %h below are to adjust for
21442 	 that situation (shift count is >= 32 in that case).  */
21443       if (dot)
21444 	return "rlw%I2nm. %0,%1,%h2,%3,%4";
21445       return "rlw%I2nm %0,%1,%h2,%3,%4";
21446     }
21447 
21448   gcc_unreachable ();
21449 }
21450 
21451 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwimi or
21452    rldimi instruction, to implement an insert with shift SHIFT (a ROTATE,
21453    ASHIFT, or LSHIFTRT) in mode MODE.  */
21454 
21455 bool
rs6000_is_valid_insert_mask(rtx mask,rtx shift,machine_mode mode)21456 rs6000_is_valid_insert_mask (rtx mask, rtx shift, machine_mode mode)
21457 {
21458   int nb, ne;
21459 
21460   if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
21461     return false;
21462 
21463   int n = GET_MODE_PRECISION (mode);
21464 
21465   int sh = INTVAL (XEXP (shift, 1));
21466   if (sh < 0 || sh >= n)
21467     return false;
21468 
21469   rtx_code code = GET_CODE (shift);
21470 
21471   /* Convert any shift by 0 to a rotate, to simplify below code.  */
21472   if (sh == 0)
21473     code = ROTATE;
21474 
21475   /* Convert rotate to simple shift if we can, to make analysis simpler.  */
21476   if (code == ROTATE && sh >= 0 && nb >= ne && ne >= sh)
21477     code = ASHIFT;
21478   if (code == ROTATE && sh >= 0 && nb >= ne && nb < sh)
21479     {
21480       code = LSHIFTRT;
21481       sh = n - sh;
21482     }
21483 
21484   /* DImode rotates need rldimi.  */
21485   if (mode == DImode && code == ROTATE)
21486     return (ne == sh);
21487 
21488   /* SImode rotates need rlwimi.  */
21489   if (mode == SImode && code == ROTATE)
21490     return (nb < 32 && ne < 32 && sh < 32);
21491 
21492   /* Wrap-around masks are only okay for rotates.  */
21493   if (ne > nb)
21494     return false;
21495 
21496   /* Don't allow ASHIFT if the mask is wrong for that.  */
21497   if (code == ASHIFT && ne < sh)
21498     return false;
21499 
21500   /* If we can do it with an rlwimi, we can do it.  Don't allow LSHIFTRT
21501      if the mask is wrong for that.  */
21502   if (nb < 32 && ne < 32 && sh < 32
21503       && !(code == LSHIFTRT && nb >= 32 - sh))
21504     return true;
21505 
21506   /* If we can do it with an rldimi, we can do it.  Don't allow LSHIFTRT
21507      if the mask is wrong for that.  */
21508   if (code == LSHIFTRT)
21509     sh = 64 - sh;
21510   if (ne == sh)
21511     return !(code == LSHIFTRT && nb >= sh);
21512 
21513   return false;
21514 }
21515 
21516 /* Return the instruction template for an insert with mask in mode MODE, with
21517    operands OPERANDS.  If DOT is true, make it a record-form instruction.  */
21518 
21519 const char *
rs6000_insn_for_insert_mask(machine_mode mode,rtx * operands,bool dot)21520 rs6000_insn_for_insert_mask (machine_mode mode, rtx *operands, bool dot)
21521 {
21522   int nb, ne;
21523 
21524   if (!rs6000_is_valid_mask (operands[3], &nb, &ne, mode))
21525     gcc_unreachable ();
21526 
21527   /* Prefer rldimi because rlwimi is cracked.  */
21528   if (TARGET_POWERPC64
21529       && (!dot || mode == DImode)
21530       && GET_CODE (operands[4]) != LSHIFTRT
21531       && ne == INTVAL (operands[2]))
21532     {
21533       operands[3] = GEN_INT (63 - nb);
21534       if (dot)
21535 	return "rldimi. %0,%1,%2,%3";
21536       return "rldimi %0,%1,%2,%3";
21537     }
21538 
21539   if (nb < 32 && ne < 32)
21540     {
21541       if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
21542 	operands[2] = GEN_INT (32 - INTVAL (operands[2]));
21543       operands[3] = GEN_INT (31 - nb);
21544       operands[4] = GEN_INT (31 - ne);
21545       if (dot)
21546 	return "rlwimi. %0,%1,%2,%3,%4";
21547       return "rlwimi %0,%1,%2,%3,%4";
21548     }
21549 
21550   gcc_unreachable ();
21551 }
21552 
21553 /* Return whether an AND with C (a CONST_INT) in mode MODE can be done
21554    using two machine instructions.  */
21555 
21556 bool
rs6000_is_valid_2insn_and(rtx c,machine_mode mode)21557 rs6000_is_valid_2insn_and (rtx c, machine_mode mode)
21558 {
21559   /* There are two kinds of AND we can handle with two insns:
21560      1) those we can do with two rl* insn;
21561      2) ori[s];xori[s].
21562 
21563      We do not handle that last case yet.  */
21564 
21565   /* If there is just one stretch of ones, we can do it.  */
21566   if (rs6000_is_valid_mask (c, NULL, NULL, mode))
21567     return true;
21568 
21569   /* Otherwise, fill in the lowest "hole"; if we can do the result with
21570      one insn, we can do the whole thing with two.  */
21571   unsigned HOST_WIDE_INT val = INTVAL (c);
21572   unsigned HOST_WIDE_INT bit1 = val & -val;
21573   unsigned HOST_WIDE_INT bit2 = (val + bit1) & ~val;
21574   unsigned HOST_WIDE_INT val1 = (val + bit1) & val;
21575   unsigned HOST_WIDE_INT bit3 = val1 & -val1;
21576   return rs6000_is_valid_and_mask (GEN_INT (val + bit3 - bit2), mode);
21577 }
21578 
21579 /* Emit the two insns to do an AND in mode MODE, with operands OPERANDS.
21580    If EXPAND is true, split rotate-and-mask instructions we generate to
21581    their constituent parts as well (this is used during expand); if DOT
21582    is 1, make the last insn a record-form instruction clobbering the
21583    destination GPR and setting the CC reg (from operands[3]); if 2, set
21584    that GPR as well as the CC reg.  */
21585 
21586 void
rs6000_emit_2insn_and(machine_mode mode,rtx * operands,bool expand,int dot)21587 rs6000_emit_2insn_and (machine_mode mode, rtx *operands, bool expand, int dot)
21588 {
21589   gcc_assert (!(expand && dot));
21590 
21591   unsigned HOST_WIDE_INT val = INTVAL (operands[2]);
21592 
21593   /* If it is one stretch of ones, it is DImode; shift left, mask, then
21594      shift right.  This generates better code than doing the masks without
21595      shifts, or shifting first right and then left.  */
21596   int nb, ne;
21597   if (rs6000_is_valid_mask (operands[2], &nb, &ne, mode) && nb >= ne)
21598     {
21599       gcc_assert (mode == DImode);
21600 
21601       int shift = 63 - nb;
21602       if (expand)
21603 	{
21604 	  rtx tmp1 = gen_reg_rtx (DImode);
21605 	  rtx tmp2 = gen_reg_rtx (DImode);
21606 	  emit_insn (gen_ashldi3 (tmp1, operands[1], GEN_INT (shift)));
21607 	  emit_insn (gen_anddi3 (tmp2, tmp1, GEN_INT (val << shift)));
21608 	  emit_insn (gen_lshrdi3 (operands[0], tmp2, GEN_INT (shift)));
21609 	}
21610       else
21611 	{
21612 	  rtx tmp = gen_rtx_ASHIFT (mode, operands[1], GEN_INT (shift));
21613 	  tmp = gen_rtx_AND (mode, tmp, GEN_INT (val << shift));
21614 	  emit_move_insn (operands[0], tmp);
21615 	  tmp = gen_rtx_LSHIFTRT (mode, operands[0], GEN_INT (shift));
21616 	  rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
21617 	}
21618       return;
21619     }
21620 
21621   /* Otherwise, make a mask2 that cuts out the lowest "hole", and a mask1
21622      that does the rest.  */
21623   unsigned HOST_WIDE_INT bit1 = val & -val;
21624   unsigned HOST_WIDE_INT bit2 = (val + bit1) & ~val;
21625   unsigned HOST_WIDE_INT val1 = (val + bit1) & val;
21626   unsigned HOST_WIDE_INT bit3 = val1 & -val1;
21627 
21628   unsigned HOST_WIDE_INT mask1 = -bit3 + bit2 - 1;
21629   unsigned HOST_WIDE_INT mask2 = val + bit3 - bit2;
21630 
21631   gcc_assert (rs6000_is_valid_and_mask (GEN_INT (mask2), mode));
21632 
21633   /* Two "no-rotate"-and-mask instructions, for SImode.  */
21634   if (rs6000_is_valid_and_mask (GEN_INT (mask1), mode))
21635     {
21636       gcc_assert (mode == SImode);
21637 
21638       rtx reg = expand ? gen_reg_rtx (mode) : operands[0];
21639       rtx tmp = gen_rtx_AND (mode, operands[1], GEN_INT (mask1));
21640       emit_move_insn (reg, tmp);
21641       tmp = gen_rtx_AND (mode, reg, GEN_INT (mask2));
21642       rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
21643       return;
21644     }
21645 
21646   gcc_assert (mode == DImode);
21647 
21648   /* Two "no-rotate"-and-mask instructions, for DImode: both are rlwinm
21649      insns; we have to do the first in SImode, because it wraps.  */
21650   if (mask2 <= 0xffffffff
21651       && rs6000_is_valid_and_mask (GEN_INT (mask1), SImode))
21652     {
21653       rtx reg = expand ? gen_reg_rtx (mode) : operands[0];
21654       rtx tmp = gen_rtx_AND (SImode, gen_lowpart (SImode, operands[1]),
21655 			     GEN_INT (mask1));
21656       rtx reg_low = gen_lowpart (SImode, reg);
21657       emit_move_insn (reg_low, tmp);
21658       tmp = gen_rtx_AND (mode, reg, GEN_INT (mask2));
21659       rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
21660       return;
21661     }
21662 
21663   /* Two rld* insns: rotate, clear the hole in the middle (which now is
21664      at the top end), rotate back and clear the other hole.  */
21665   int right = exact_log2 (bit3);
21666   int left = 64 - right;
21667 
21668   /* Rotate the mask too.  */
21669   mask1 = (mask1 >> right) | ((bit2 - 1) << left);
21670 
21671   if (expand)
21672     {
21673       rtx tmp1 = gen_reg_rtx (DImode);
21674       rtx tmp2 = gen_reg_rtx (DImode);
21675       rtx tmp3 = gen_reg_rtx (DImode);
21676       emit_insn (gen_rotldi3 (tmp1, operands[1], GEN_INT (left)));
21677       emit_insn (gen_anddi3 (tmp2, tmp1, GEN_INT (mask1)));
21678       emit_insn (gen_rotldi3 (tmp3, tmp2, GEN_INT (right)));
21679       emit_insn (gen_anddi3 (operands[0], tmp3, GEN_INT (mask2)));
21680     }
21681   else
21682     {
21683       rtx tmp = gen_rtx_ROTATE (mode, operands[1], GEN_INT (left));
21684       tmp = gen_rtx_AND (mode, tmp, GEN_INT (mask1));
21685       emit_move_insn (operands[0], tmp);
21686       tmp = gen_rtx_ROTATE (mode, operands[0], GEN_INT (right));
21687       tmp = gen_rtx_AND (mode, tmp, GEN_INT (mask2));
21688       rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
21689     }
21690 }
21691 
21692 /* Return 1 if REGNO (reg1) == REGNO (reg2) - 1 making them candidates
21693    for lfq and stfq insns iff the registers are hard registers.   */
21694 
21695 int
registers_ok_for_quad_peep(rtx reg1,rtx reg2)21696 registers_ok_for_quad_peep (rtx reg1, rtx reg2)
21697 {
21698   /* We might have been passed a SUBREG.  */
21699   if (GET_CODE (reg1) != REG || GET_CODE (reg2) != REG)
21700     return 0;
21701 
21702   /* We might have been passed non floating point registers.  */
21703   if (!FP_REGNO_P (REGNO (reg1))
21704       || !FP_REGNO_P (REGNO (reg2)))
21705     return 0;
21706 
21707   return (REGNO (reg1) == REGNO (reg2) - 1);
21708 }
21709 
21710 /* Return 1 if addr1 and addr2 are suitable for lfq or stfq insn.
21711    addr1 and addr2 must be in consecutive memory locations
21712    (addr2 == addr1 + 8).  */
21713 
21714 int
mems_ok_for_quad_peep(rtx mem1,rtx mem2)21715 mems_ok_for_quad_peep (rtx mem1, rtx mem2)
21716 {
21717   rtx addr1, addr2;
21718   unsigned int reg1, reg2;
21719   int offset1, offset2;
21720 
21721   /* The mems cannot be volatile.  */
21722   if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2))
21723     return 0;
21724 
21725   addr1 = XEXP (mem1, 0);
21726   addr2 = XEXP (mem2, 0);
21727 
21728   /* Extract an offset (if used) from the first addr.  */
21729   if (GET_CODE (addr1) == PLUS)
21730     {
21731       /* If not a REG, return zero.  */
21732       if (GET_CODE (XEXP (addr1, 0)) != REG)
21733 	return 0;
21734       else
21735 	{
21736 	  reg1 = REGNO (XEXP (addr1, 0));
21737 	  /* The offset must be constant!  */
21738 	  if (GET_CODE (XEXP (addr1, 1)) != CONST_INT)
21739 	    return 0;
21740 	  offset1 = INTVAL (XEXP (addr1, 1));
21741 	}
21742     }
21743   else if (GET_CODE (addr1) != REG)
21744     return 0;
21745   else
21746     {
21747       reg1 = REGNO (addr1);
21748       /* This was a simple (mem (reg)) expression.  Offset is 0.  */
21749       offset1 = 0;
21750     }
21751 
21752   /* And now for the second addr.  */
21753   if (GET_CODE (addr2) == PLUS)
21754     {
21755       /* If not a REG, return zero.  */
21756       if (GET_CODE (XEXP (addr2, 0)) != REG)
21757 	return 0;
21758       else
21759 	{
21760 	  reg2 = REGNO (XEXP (addr2, 0));
21761 	  /* The offset must be constant. */
21762 	  if (GET_CODE (XEXP (addr2, 1)) != CONST_INT)
21763 	    return 0;
21764 	  offset2 = INTVAL (XEXP (addr2, 1));
21765 	}
21766     }
21767   else if (GET_CODE (addr2) != REG)
21768     return 0;
21769   else
21770     {
21771       reg2 = REGNO (addr2);
21772       /* This was a simple (mem (reg)) expression.  Offset is 0.  */
21773       offset2 = 0;
21774     }
21775 
21776   /* Both of these must have the same base register.  */
21777   if (reg1 != reg2)
21778     return 0;
21779 
21780   /* The offset for the second addr must be 8 more than the first addr.  */
21781   if (offset2 != offset1 + 8)
21782     return 0;
21783 
21784   /* All the tests passed.  addr1 and addr2 are valid for lfq or stfq
21785      instructions.  */
21786   return 1;
21787 }
21788 
21789 
21790 rtx
rs6000_secondary_memory_needed_rtx(machine_mode mode)21791 rs6000_secondary_memory_needed_rtx (machine_mode mode)
21792 {
21793   static bool eliminated = false;
21794   rtx ret;
21795 
21796   if (mode != SDmode || TARGET_NO_SDMODE_STACK)
21797     ret = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
21798   else
21799     {
21800       rtx mem = cfun->machine->sdmode_stack_slot;
21801       gcc_assert (mem != NULL_RTX);
21802 
21803       if (!eliminated)
21804 	{
21805 	  mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
21806 	  cfun->machine->sdmode_stack_slot = mem;
21807 	  eliminated = true;
21808 	}
21809       ret = mem;
21810     }
21811 
21812   if (TARGET_DEBUG_ADDR)
21813     {
21814       fprintf (stderr, "\nrs6000_secondary_memory_needed_rtx, mode %s, rtx:\n",
21815 	       GET_MODE_NAME (mode));
21816       if (!ret)
21817 	fprintf (stderr, "\tNULL_RTX\n");
21818       else
21819 	debug_rtx (ret);
21820     }
21821 
21822   return ret;
21823 }
21824 
21825 /* Implement TARGET_SECONDARY_MEMORY_NEEDED_MODE.  For SDmode values we
21826    need to use DDmode, in all other cases we can use the same mode.  */
21827 static machine_mode
rs6000_secondary_memory_needed_mode(machine_mode mode)21828 rs6000_secondary_memory_needed_mode (machine_mode mode)
21829 {
21830   if (lra_in_progress && mode == SDmode)
21831     return DDmode;
21832   return mode;
21833 }
21834 
21835 static tree
rs6000_check_sdmode(tree * tp,int * walk_subtrees,void * data ATTRIBUTE_UNUSED)21836 rs6000_check_sdmode (tree *tp, int *walk_subtrees, void *data ATTRIBUTE_UNUSED)
21837 {
21838   /* Don't walk into types.  */
21839   if (*tp == NULL_TREE || *tp == error_mark_node || TYPE_P (*tp))
21840     {
21841       *walk_subtrees = 0;
21842       return NULL_TREE;
21843     }
21844 
21845   switch (TREE_CODE (*tp))
21846     {
21847     case VAR_DECL:
21848     case PARM_DECL:
21849     case FIELD_DECL:
21850     case RESULT_DECL:
21851     case SSA_NAME:
21852     case REAL_CST:
21853     case MEM_REF:
21854     case VIEW_CONVERT_EXPR:
21855       if (TYPE_MODE (TREE_TYPE (*tp)) == SDmode)
21856 	return *tp;
21857       break;
21858     default:
21859       break;
21860     }
21861 
21862   return NULL_TREE;
21863 }
21864 
21865 /* Classify a register type.  Because the FMRGOW/FMRGEW instructions only work
21866    on traditional floating point registers, and the VMRGOW/VMRGEW instructions
21867    only work on the traditional altivec registers, note if an altivec register
21868    was chosen.  */
21869 
21870 static enum rs6000_reg_type
register_to_reg_type(rtx reg,bool * is_altivec)21871 register_to_reg_type (rtx reg, bool *is_altivec)
21872 {
21873   HOST_WIDE_INT regno;
21874   enum reg_class rclass;
21875 
21876   if (GET_CODE (reg) == SUBREG)
21877     reg = SUBREG_REG (reg);
21878 
21879   if (!REG_P (reg))
21880     return NO_REG_TYPE;
21881 
21882   regno = REGNO (reg);
21883   if (regno >= FIRST_PSEUDO_REGISTER)
21884     {
21885       if (!lra_in_progress && !reload_in_progress && !reload_completed)
21886 	return PSEUDO_REG_TYPE;
21887 
21888       regno = true_regnum (reg);
21889       if (regno < 0 || regno >= FIRST_PSEUDO_REGISTER)
21890 	return PSEUDO_REG_TYPE;
21891     }
21892 
21893   gcc_assert (regno >= 0);
21894 
21895   if (is_altivec && ALTIVEC_REGNO_P (regno))
21896     *is_altivec = true;
21897 
21898   rclass = rs6000_regno_regclass[regno];
21899   return reg_class_to_reg_type[(int)rclass];
21900 }
21901 
21902 /* Helper function to return the cost of adding a TOC entry address.  */
21903 
21904 static inline int
rs6000_secondary_reload_toc_costs(addr_mask_type addr_mask)21905 rs6000_secondary_reload_toc_costs (addr_mask_type addr_mask)
21906 {
21907   int ret;
21908 
21909   if (TARGET_CMODEL != CMODEL_SMALL)
21910     ret = ((addr_mask & RELOAD_REG_OFFSET) == 0) ? 1 : 2;
21911 
21912   else
21913     ret = (TARGET_MINIMAL_TOC) ? 6 : 3;
21914 
21915   return ret;
21916 }
21917 
21918 /* Helper function for rs6000_secondary_reload to determine whether the memory
21919    address (ADDR) with a given register class (RCLASS) and machine mode (MODE)
21920    needs reloading.  Return negative if the memory is not handled by the memory
21921    helper functions and to try a different reload method, 0 if no additional
21922    instructions are need, and positive to give the extra cost for the
21923    memory.  */
21924 
21925 static int
rs6000_secondary_reload_memory(rtx addr,enum reg_class rclass,machine_mode mode)21926 rs6000_secondary_reload_memory (rtx addr,
21927 				enum reg_class rclass,
21928 				machine_mode mode)
21929 {
21930   int extra_cost = 0;
21931   rtx reg, and_arg, plus_arg0, plus_arg1;
21932   addr_mask_type addr_mask;
21933   const char *type = NULL;
21934   const char *fail_msg = NULL;
21935 
21936   if (GPR_REG_CLASS_P (rclass))
21937     addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_GPR];
21938 
21939   else if (rclass == FLOAT_REGS)
21940     addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_FPR];
21941 
21942   else if (rclass == ALTIVEC_REGS)
21943     addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_VMX];
21944 
21945   /* For the combined VSX_REGS, turn off Altivec AND -16.  */
21946   else if (rclass == VSX_REGS)
21947     addr_mask = (reg_addr[mode].addr_mask[RELOAD_REG_VMX]
21948 		 & ~RELOAD_REG_AND_M16);
21949 
21950   /* If the register allocator hasn't made up its mind yet on the register
21951      class to use, settle on defaults to use.  */
21952   else if (rclass == NO_REGS)
21953     {
21954       addr_mask = (reg_addr[mode].addr_mask[RELOAD_REG_ANY]
21955 		   & ~RELOAD_REG_AND_M16);
21956 
21957       if ((addr_mask & RELOAD_REG_MULTIPLE) != 0)
21958 	addr_mask &= ~(RELOAD_REG_INDEXED
21959 		       | RELOAD_REG_PRE_INCDEC
21960 		       | RELOAD_REG_PRE_MODIFY);
21961     }
21962 
21963   else
21964     addr_mask = 0;
21965 
21966   /* If the register isn't valid in this register class, just return now.  */
21967   if ((addr_mask & RELOAD_REG_VALID) == 0)
21968     {
21969       if (TARGET_DEBUG_ADDR)
21970 	{
21971 	  fprintf (stderr,
21972 		   "rs6000_secondary_reload_memory: mode = %s, class = %s, "
21973 		   "not valid in class\n",
21974 		   GET_MODE_NAME (mode), reg_class_names[rclass]);
21975 	  debug_rtx (addr);
21976 	}
21977 
21978       return -1;
21979     }
21980 
21981   switch (GET_CODE (addr))
21982     {
21983       /* Does the register class supports auto update forms for this mode?  We
21984 	 don't need a scratch register, since the powerpc only supports
21985 	 PRE_INC, PRE_DEC, and PRE_MODIFY.  */
21986     case PRE_INC:
21987     case PRE_DEC:
21988       reg = XEXP (addr, 0);
21989       if (!base_reg_operand (addr, GET_MODE (reg)))
21990 	{
21991 	  fail_msg = "no base register #1";
21992 	  extra_cost = -1;
21993 	}
21994 
21995       else if ((addr_mask & RELOAD_REG_PRE_INCDEC) == 0)
21996 	{
21997 	  extra_cost = 1;
21998 	  type = "update";
21999 	}
22000       break;
22001 
22002     case PRE_MODIFY:
22003       reg = XEXP (addr, 0);
22004       plus_arg1 = XEXP (addr, 1);
22005       if (!base_reg_operand (reg, GET_MODE (reg))
22006 	  || GET_CODE (plus_arg1) != PLUS
22007 	  || !rtx_equal_p (reg, XEXP (plus_arg1, 0)))
22008 	{
22009 	  fail_msg = "bad PRE_MODIFY";
22010 	  extra_cost = -1;
22011 	}
22012 
22013       else if ((addr_mask & RELOAD_REG_PRE_MODIFY) == 0)
22014 	{
22015 	  extra_cost = 1;
22016 	  type = "update";
22017 	}
22018       break;
22019 
22020       /* Do we need to simulate AND -16 to clear the bottom address bits used
22021 	 in VMX load/stores?  Only allow the AND for vector sizes.  */
22022     case AND:
22023       and_arg = XEXP (addr, 0);
22024       if (GET_MODE_SIZE (mode) != 16
22025 	  || GET_CODE (XEXP (addr, 1)) != CONST_INT
22026 	  || INTVAL (XEXP (addr, 1)) != -16)
22027 	{
22028 	  fail_msg = "bad Altivec AND #1";
22029 	  extra_cost = -1;
22030 	}
22031 
22032       if (rclass != ALTIVEC_REGS)
22033 	{
22034 	  if (legitimate_indirect_address_p (and_arg, false))
22035 	    extra_cost = 1;
22036 
22037 	  else if (legitimate_indexed_address_p (and_arg, false))
22038 	    extra_cost = 2;
22039 
22040 	  else
22041 	    {
22042 	      fail_msg = "bad Altivec AND #2";
22043 	      extra_cost = -1;
22044 	    }
22045 
22046 	  type = "and";
22047 	}
22048       break;
22049 
22050       /* If this is an indirect address, make sure it is a base register.  */
22051     case REG:
22052     case SUBREG:
22053       if (!legitimate_indirect_address_p (addr, false))
22054 	{
22055 	  extra_cost = 1;
22056 	  type = "move";
22057 	}
22058       break;
22059 
22060       /* If this is an indexed address, make sure the register class can handle
22061 	 indexed addresses for this mode.  */
22062     case PLUS:
22063       plus_arg0 = XEXP (addr, 0);
22064       plus_arg1 = XEXP (addr, 1);
22065 
22066       /* (plus (plus (reg) (constant)) (constant)) is generated during
22067 	 push_reload processing, so handle it now.  */
22068       if (GET_CODE (plus_arg0) == PLUS && CONST_INT_P (plus_arg1))
22069 	{
22070 	  if ((addr_mask & RELOAD_REG_OFFSET) == 0)
22071 	    {
22072 	      extra_cost = 1;
22073 	      type = "offset";
22074 	    }
22075 	}
22076 
22077       /* (plus (plus (reg) (constant)) (reg)) is also generated during
22078 	 push_reload processing, so handle it now.  */
22079       else if (GET_CODE (plus_arg0) == PLUS && REG_P (plus_arg1))
22080 	{
22081 	  if ((addr_mask & RELOAD_REG_INDEXED) == 0)
22082 	    {
22083 	      extra_cost = 1;
22084 	      type = "indexed #2";
22085 	    }
22086 	}
22087 
22088       else if (!base_reg_operand (plus_arg0, GET_MODE (plus_arg0)))
22089 	{
22090 	  fail_msg = "no base register #2";
22091 	  extra_cost = -1;
22092 	}
22093 
22094       else if (int_reg_operand (plus_arg1, GET_MODE (plus_arg1)))
22095 	{
22096 	  if ((addr_mask & RELOAD_REG_INDEXED) == 0
22097 	      || !legitimate_indexed_address_p (addr, false))
22098 	    {
22099 	      extra_cost = 1;
22100 	      type = "indexed";
22101 	    }
22102 	}
22103 
22104       else if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0
22105 	       && CONST_INT_P (plus_arg1))
22106 	{
22107 	  if (!quad_address_offset_p (INTVAL (plus_arg1)))
22108 	    {
22109 	      extra_cost = 1;
22110 	      type = "vector d-form offset";
22111 	    }
22112 	}
22113 
22114       /* Make sure the register class can handle offset addresses.  */
22115       else if (rs6000_legitimate_offset_address_p (mode, addr, false, true))
22116 	{
22117 	  if ((addr_mask & RELOAD_REG_OFFSET) == 0)
22118 	    {
22119 	      extra_cost = 1;
22120 	      type = "offset #2";
22121 	    }
22122 	}
22123 
22124       else
22125 	{
22126 	  fail_msg = "bad PLUS";
22127 	  extra_cost = -1;
22128 	}
22129 
22130       break;
22131 
22132     case LO_SUM:
22133       /* Quad offsets are restricted and can't handle normal addresses.  */
22134       if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
22135 	{
22136 	  extra_cost = -1;
22137 	  type = "vector d-form lo_sum";
22138 	}
22139 
22140       else if (!legitimate_lo_sum_address_p (mode, addr, false))
22141 	{
22142 	  fail_msg = "bad LO_SUM";
22143 	  extra_cost = -1;
22144 	}
22145 
22146       if ((addr_mask & RELOAD_REG_OFFSET) == 0)
22147 	{
22148 	  extra_cost = 1;
22149 	  type = "lo_sum";
22150 	}
22151       break;
22152 
22153       /* Static addresses need to create a TOC entry.  */
22154     case CONST:
22155     case SYMBOL_REF:
22156     case LABEL_REF:
22157       if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
22158 	{
22159 	  extra_cost = -1;
22160 	  type = "vector d-form lo_sum #2";
22161 	}
22162 
22163       else
22164 	{
22165 	  type = "address";
22166 	  extra_cost = rs6000_secondary_reload_toc_costs (addr_mask);
22167 	}
22168       break;
22169 
22170       /* TOC references look like offsetable memory.  */
22171     case UNSPEC:
22172       if (TARGET_CMODEL == CMODEL_SMALL || XINT (addr, 1) != UNSPEC_TOCREL)
22173 	{
22174 	  fail_msg = "bad UNSPEC";
22175 	  extra_cost = -1;
22176 	}
22177 
22178       else if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
22179 	{
22180 	  extra_cost = -1;
22181 	  type = "vector d-form lo_sum #3";
22182 	}
22183 
22184       else if ((addr_mask & RELOAD_REG_OFFSET) == 0)
22185 	{
22186 	  extra_cost = 1;
22187 	  type = "toc reference";
22188 	}
22189       break;
22190 
22191     default:
22192 	{
22193 	  fail_msg = "bad address";
22194 	  extra_cost = -1;
22195 	}
22196     }
22197 
22198   if (TARGET_DEBUG_ADDR /* && extra_cost != 0 */)
22199     {
22200       if (extra_cost < 0)
22201 	fprintf (stderr,
22202 		 "rs6000_secondary_reload_memory error: mode = %s, "
22203 		 "class = %s, addr_mask = '%s', %s\n",
22204 		 GET_MODE_NAME (mode),
22205 		 reg_class_names[rclass],
22206 		 rs6000_debug_addr_mask (addr_mask, false),
22207 		 (fail_msg != NULL) ? fail_msg : "<bad address>");
22208 
22209       else
22210 	fprintf (stderr,
22211 		 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
22212 		 "addr_mask = '%s', extra cost = %d, %s\n",
22213 		 GET_MODE_NAME (mode),
22214 		 reg_class_names[rclass],
22215 		 rs6000_debug_addr_mask (addr_mask, false),
22216 		 extra_cost,
22217 		 (type) ? type : "<none>");
22218 
22219       debug_rtx (addr);
22220     }
22221 
22222   return extra_cost;
22223 }
22224 
22225 /* Helper function for rs6000_secondary_reload to return true if a move to a
22226    different register classe is really a simple move.  */
22227 
22228 static bool
rs6000_secondary_reload_simple_move(enum rs6000_reg_type to_type,enum rs6000_reg_type from_type,machine_mode mode)22229 rs6000_secondary_reload_simple_move (enum rs6000_reg_type to_type,
22230 				     enum rs6000_reg_type from_type,
22231 				     machine_mode mode)
22232 {
22233   int size = GET_MODE_SIZE (mode);
22234 
22235   /* Add support for various direct moves available.  In this function, we only
22236      look at cases where we don't need any extra registers, and one or more
22237      simple move insns are issued.  Originally small integers are not allowed
22238      in FPR/VSX registers.  Single precision binary floating is not a simple
22239      move because we need to convert to the single precision memory layout.
22240      The 4-byte SDmode can be moved.  TDmode values are disallowed since they
22241      need special direct move handling, which we do not support yet.  */
22242   if (TARGET_DIRECT_MOVE
22243       && ((to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
22244 	  || (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)))
22245     {
22246       if (TARGET_POWERPC64)
22247 	{
22248 	  /* ISA 2.07: MTVSRD or MVFVSRD.  */
22249 	  if (size == 8)
22250 	    return true;
22251 
22252 	  /* ISA 3.0: MTVSRDD or MFVSRD + MFVSRLD.  */
22253 	  if (size == 16 && TARGET_P9_VECTOR && mode != TDmode)
22254 	    return true;
22255 	}
22256 
22257       /* ISA 2.07: MTVSRWZ or  MFVSRWZ.  */
22258       if (TARGET_VSX_SMALL_INTEGER)
22259 	{
22260 	  if (mode == SImode)
22261 	    return true;
22262 
22263 	  if (TARGET_P9_VECTOR && (mode == HImode || mode == QImode))
22264 	    return true;
22265 	}
22266 
22267       /* ISA 2.07: MTVSRWZ or  MFVSRWZ.  */
22268       if (mode == SDmode)
22269 	return true;
22270     }
22271 
22272   /* Power6+: MFTGPR or MFFGPR.  */
22273   else if (TARGET_MFPGPR && TARGET_POWERPC64 && size == 8
22274       && ((to_type == GPR_REG_TYPE && from_type == FPR_REG_TYPE)
22275 	  || (to_type == FPR_REG_TYPE && from_type == GPR_REG_TYPE)))
22276     return true;
22277 
22278   /* Move to/from SPR.  */
22279   else if ((size == 4 || (TARGET_POWERPC64 && size == 8))
22280 	   && ((to_type == GPR_REG_TYPE && from_type == SPR_REG_TYPE)
22281 	       || (to_type == SPR_REG_TYPE && from_type == GPR_REG_TYPE)))
22282     return true;
22283 
22284   return false;
22285 }
22286 
22287 /* Direct move helper function for rs6000_secondary_reload, handle all of the
22288    special direct moves that involve allocating an extra register, return the
22289    insn code of the helper function if there is such a function or
22290    CODE_FOR_nothing if not.  */
22291 
22292 static bool
rs6000_secondary_reload_direct_move(enum rs6000_reg_type to_type,enum rs6000_reg_type from_type,machine_mode mode,secondary_reload_info * sri,bool altivec_p)22293 rs6000_secondary_reload_direct_move (enum rs6000_reg_type to_type,
22294 				     enum rs6000_reg_type from_type,
22295 				     machine_mode mode,
22296 				     secondary_reload_info *sri,
22297 				     bool altivec_p)
22298 {
22299   bool ret = false;
22300   enum insn_code icode = CODE_FOR_nothing;
22301   int cost = 0;
22302   int size = GET_MODE_SIZE (mode);
22303 
22304   if (TARGET_POWERPC64 && size == 16)
22305     {
22306       /* Handle moving 128-bit values from GPRs to VSX point registers on
22307 	 ISA 2.07 (power8, power9) when running in 64-bit mode using
22308 	 XXPERMDI to glue the two 64-bit values back together.  */
22309       if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
22310 	{
22311 	  cost = 3;			/* 2 mtvsrd's, 1 xxpermdi.  */
22312 	  icode = reg_addr[mode].reload_vsx_gpr;
22313 	}
22314 
22315       /* Handle moving 128-bit values from VSX point registers to GPRs on
22316 	 ISA 2.07 when running in 64-bit mode using XXPERMDI to get access to the
22317 	 bottom 64-bit value.  */
22318       else if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
22319 	{
22320 	  cost = 3;			/* 2 mfvsrd's, 1 xxpermdi.  */
22321 	  icode = reg_addr[mode].reload_gpr_vsx;
22322 	}
22323     }
22324 
22325   else if (TARGET_POWERPC64 && mode == SFmode)
22326     {
22327       if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
22328 	{
22329 	  cost = 3;			/* xscvdpspn, mfvsrd, and.  */
22330 	  icode = reg_addr[mode].reload_gpr_vsx;
22331 	}
22332 
22333       else if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
22334 	{
22335 	  cost = 2;			/* mtvsrz, xscvspdpn.  */
22336 	  icode = reg_addr[mode].reload_vsx_gpr;
22337 	}
22338     }
22339 
22340   else if (!TARGET_POWERPC64 && size == 8)
22341     {
22342       /* Handle moving 64-bit values from GPRs to floating point registers on
22343 	 ISA 2.07 when running in 32-bit mode using FMRGOW to glue the two
22344 	 32-bit values back together.  Altivec register classes must be handled
22345 	 specially since a different instruction is used, and the secondary
22346 	 reload support requires a single instruction class in the scratch
22347 	 register constraint.  However, right now TFmode is not allowed in
22348 	 Altivec registers, so the pattern will never match.  */
22349       if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE && !altivec_p)
22350 	{
22351 	  cost = 3;			/* 2 mtvsrwz's, 1 fmrgow.  */
22352 	  icode = reg_addr[mode].reload_fpr_gpr;
22353 	}
22354     }
22355 
22356   if (icode != CODE_FOR_nothing)
22357     {
22358       ret = true;
22359       if (sri)
22360 	{
22361 	  sri->icode = icode;
22362 	  sri->extra_cost = cost;
22363 	}
22364     }
22365 
22366   return ret;
22367 }
22368 
22369 /* Return whether a move between two register classes can be done either
22370    directly (simple move) or via a pattern that uses a single extra temporary
22371    (using ISA 2.07's direct move in this case.  */
22372 
22373 static bool
rs6000_secondary_reload_move(enum rs6000_reg_type to_type,enum rs6000_reg_type from_type,machine_mode mode,secondary_reload_info * sri,bool altivec_p)22374 rs6000_secondary_reload_move (enum rs6000_reg_type to_type,
22375 			      enum rs6000_reg_type from_type,
22376 			      machine_mode mode,
22377 			      secondary_reload_info *sri,
22378 			      bool altivec_p)
22379 {
22380   /* Fall back to load/store reloads if either type is not a register.  */
22381   if (to_type == NO_REG_TYPE || from_type == NO_REG_TYPE)
22382     return false;
22383 
22384   /* If we haven't allocated registers yet, assume the move can be done for the
22385      standard register types.  */
22386   if ((to_type == PSEUDO_REG_TYPE && from_type == PSEUDO_REG_TYPE)
22387       || (to_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (from_type))
22388       || (from_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (to_type)))
22389     return true;
22390 
22391   /* Moves to the same set of registers is a simple move for non-specialized
22392      registers.  */
22393   if (to_type == from_type && IS_STD_REG_TYPE (to_type))
22394     return true;
22395 
22396   /* Check whether a simple move can be done directly.  */
22397   if (rs6000_secondary_reload_simple_move (to_type, from_type, mode))
22398     {
22399       if (sri)
22400 	{
22401 	  sri->icode = CODE_FOR_nothing;
22402 	  sri->extra_cost = 0;
22403 	}
22404       return true;
22405     }
22406 
22407   /* Now check if we can do it in a few steps.  */
22408   return rs6000_secondary_reload_direct_move (to_type, from_type, mode, sri,
22409 					      altivec_p);
22410 }
22411 
22412 /* Inform reload about cases where moving X with a mode MODE to a register in
22413    RCLASS requires an extra scratch or immediate register.  Return the class
22414    needed for the immediate register.
22415 
22416    For VSX and Altivec, we may need a register to convert sp+offset into
22417    reg+sp.
22418 
22419    For misaligned 64-bit gpr loads and stores we need a register to
22420    convert an offset address to indirect.  */
22421 
22422 static reg_class_t
rs6000_secondary_reload(bool in_p,rtx x,reg_class_t rclass_i,machine_mode mode,secondary_reload_info * sri)22423 rs6000_secondary_reload (bool in_p,
22424 			 rtx x,
22425 			 reg_class_t rclass_i,
22426 			 machine_mode mode,
22427 			 secondary_reload_info *sri)
22428 {
22429   enum reg_class rclass = (enum reg_class) rclass_i;
22430   reg_class_t ret = ALL_REGS;
22431   enum insn_code icode;
22432   bool default_p = false;
22433   bool done_p = false;
22434 
22435   /* Allow subreg of memory before/during reload.  */
22436   bool memory_p = (MEM_P (x)
22437 		   || (!reload_completed && GET_CODE (x) == SUBREG
22438 		       && MEM_P (SUBREG_REG (x))));
22439 
22440   sri->icode = CODE_FOR_nothing;
22441   sri->t_icode = CODE_FOR_nothing;
22442   sri->extra_cost = 0;
22443   icode = ((in_p)
22444 	   ? reg_addr[mode].reload_load
22445 	   : reg_addr[mode].reload_store);
22446 
22447   if (REG_P (x) || register_operand (x, mode))
22448     {
22449       enum rs6000_reg_type to_type = reg_class_to_reg_type[(int)rclass];
22450       bool altivec_p = (rclass == ALTIVEC_REGS);
22451       enum rs6000_reg_type from_type = register_to_reg_type (x, &altivec_p);
22452 
22453       if (!in_p)
22454 	std::swap (to_type, from_type);
22455 
22456       /* Can we do a direct move of some sort?  */
22457       if (rs6000_secondary_reload_move (to_type, from_type, mode, sri,
22458 					altivec_p))
22459 	{
22460 	  icode = (enum insn_code)sri->icode;
22461 	  default_p = false;
22462 	  done_p = true;
22463 	  ret = NO_REGS;
22464 	}
22465     }
22466 
22467   /* Make sure 0.0 is not reloaded or forced into memory.  */
22468   if (x == CONST0_RTX (mode) && VSX_REG_CLASS_P (rclass))
22469     {
22470       ret = NO_REGS;
22471       default_p = false;
22472       done_p = true;
22473     }
22474 
22475   /* If this is a scalar floating point value and we want to load it into the
22476      traditional Altivec registers, do it via a move via a traditional floating
22477      point register, unless we have D-form addressing.  Also make sure that
22478      non-zero constants use a FPR.  */
22479   if (!done_p && reg_addr[mode].scalar_in_vmx_p
22480       && !mode_supports_vmx_dform (mode)
22481       && (rclass == VSX_REGS || rclass == ALTIVEC_REGS)
22482       && (memory_p || (GET_CODE (x) == CONST_DOUBLE)))
22483     {
22484       ret = FLOAT_REGS;
22485       default_p = false;
22486       done_p = true;
22487     }
22488 
22489   /* Handle reload of load/stores if we have reload helper functions.  */
22490   if (!done_p && icode != CODE_FOR_nothing && memory_p)
22491     {
22492       int extra_cost = rs6000_secondary_reload_memory (XEXP (x, 0), rclass,
22493 						       mode);
22494 
22495       if (extra_cost >= 0)
22496 	{
22497 	  done_p = true;
22498 	  ret = NO_REGS;
22499 	  if (extra_cost > 0)
22500 	    {
22501 	      sri->extra_cost = extra_cost;
22502 	      sri->icode = icode;
22503 	    }
22504 	}
22505     }
22506 
22507   /* Handle unaligned loads and stores of integer registers.  */
22508   if (!done_p && TARGET_POWERPC64
22509       && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
22510       && memory_p
22511       && GET_MODE_SIZE (GET_MODE (x)) >= UNITS_PER_WORD)
22512     {
22513       rtx addr = XEXP (x, 0);
22514       rtx off = address_offset (addr);
22515 
22516       if (off != NULL_RTX)
22517 	{
22518 	  unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD;
22519 	  unsigned HOST_WIDE_INT offset = INTVAL (off);
22520 
22521 	  /* We need a secondary reload when our legitimate_address_p
22522 	     says the address is good (as otherwise the entire address
22523 	     will be reloaded), and the offset is not a multiple of
22524 	     four or we have an address wrap.  Address wrap will only
22525 	     occur for LO_SUMs since legitimate_offset_address_p
22526 	     rejects addresses for 16-byte mems that will wrap.  */
22527 	  if (GET_CODE (addr) == LO_SUM
22528 	      ? (1 /* legitimate_address_p allows any offset for lo_sum */
22529 		 && ((offset & 3) != 0
22530 		     || ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra))
22531 	      : (offset + 0x8000 < 0x10000 - extra /* legitimate_address_p */
22532 		 && (offset & 3) != 0))
22533 	    {
22534 	      /* -m32 -mpowerpc64 needs to use a 32-bit scratch register.  */
22535 	      if (in_p)
22536 		sri->icode = ((TARGET_32BIT) ? CODE_FOR_reload_si_load
22537 			      : CODE_FOR_reload_di_load);
22538 	      else
22539 		sri->icode = ((TARGET_32BIT) ? CODE_FOR_reload_si_store
22540 			      : CODE_FOR_reload_di_store);
22541 	      sri->extra_cost = 2;
22542 	      ret = NO_REGS;
22543 	      done_p = true;
22544 	    }
22545 	  else
22546 	    default_p = true;
22547 	}
22548       else
22549 	default_p = true;
22550     }
22551 
22552   if (!done_p && !TARGET_POWERPC64
22553       && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
22554       && memory_p
22555       && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
22556     {
22557       rtx addr = XEXP (x, 0);
22558       rtx off = address_offset (addr);
22559 
22560       if (off != NULL_RTX)
22561 	{
22562 	  unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD;
22563 	  unsigned HOST_WIDE_INT offset = INTVAL (off);
22564 
22565 	  /* We need a secondary reload when our legitimate_address_p
22566 	     says the address is good (as otherwise the entire address
22567 	     will be reloaded), and we have a wrap.
22568 
22569 	     legitimate_lo_sum_address_p allows LO_SUM addresses to
22570 	     have any offset so test for wrap in the low 16 bits.
22571 
22572 	     legitimate_offset_address_p checks for the range
22573 	     [-0x8000,0x7fff] for mode size of 8 and [-0x8000,0x7ff7]
22574 	     for mode size of 16.  We wrap at [0x7ffc,0x7fff] and
22575 	     [0x7ff4,0x7fff] respectively, so test for the
22576 	     intersection of these ranges, [0x7ffc,0x7fff] and
22577 	     [0x7ff4,0x7ff7] respectively.
22578 
22579 	     Note that the address we see here may have been
22580 	     manipulated by legitimize_reload_address.  */
22581 	  if (GET_CODE (addr) == LO_SUM
22582 	      ? ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra
22583 	      : offset - (0x8000 - extra) < UNITS_PER_WORD)
22584 	    {
22585 	      if (in_p)
22586 		sri->icode = CODE_FOR_reload_si_load;
22587 	      else
22588 		sri->icode = CODE_FOR_reload_si_store;
22589 	      sri->extra_cost = 2;
22590 	      ret = NO_REGS;
22591 	      done_p = true;
22592 	    }
22593 	  else
22594 	    default_p = true;
22595 	}
22596       else
22597 	default_p = true;
22598     }
22599 
22600   if (!done_p)
22601     default_p = true;
22602 
22603   if (default_p)
22604     ret = default_secondary_reload (in_p, x, rclass, mode, sri);
22605 
22606   gcc_assert (ret != ALL_REGS);
22607 
22608   if (TARGET_DEBUG_ADDR)
22609     {
22610       fprintf (stderr,
22611 	       "\nrs6000_secondary_reload, return %s, in_p = %s, rclass = %s, "
22612 	       "mode = %s",
22613 	       reg_class_names[ret],
22614 	       in_p ? "true" : "false",
22615 	       reg_class_names[rclass],
22616 	       GET_MODE_NAME (mode));
22617 
22618       if (reload_completed)
22619 	fputs (", after reload", stderr);
22620 
22621       if (!done_p)
22622 	fputs (", done_p not set", stderr);
22623 
22624       if (default_p)
22625 	fputs (", default secondary reload", stderr);
22626 
22627       if (sri->icode != CODE_FOR_nothing)
22628 	fprintf (stderr, ", reload func = %s, extra cost = %d",
22629 		 insn_data[sri->icode].name, sri->extra_cost);
22630 
22631       else if (sri->extra_cost > 0)
22632 	fprintf (stderr, ", extra cost = %d", sri->extra_cost);
22633 
22634       fputs ("\n", stderr);
22635       debug_rtx (x);
22636     }
22637 
22638   return ret;
22639 }
22640 
22641 /* Better tracing for rs6000_secondary_reload_inner.  */
22642 
22643 static void
rs6000_secondary_reload_trace(int line,rtx reg,rtx mem,rtx scratch,bool store_p)22644 rs6000_secondary_reload_trace (int line, rtx reg, rtx mem, rtx scratch,
22645 			       bool store_p)
22646 {
22647   rtx set, clobber;
22648 
22649   gcc_assert (reg != NULL_RTX && mem != NULL_RTX && scratch != NULL_RTX);
22650 
22651   fprintf (stderr, "rs6000_secondary_reload_inner:%d, type = %s\n", line,
22652 	   store_p ? "store" : "load");
22653 
22654   if (store_p)
22655     set = gen_rtx_SET (mem, reg);
22656   else
22657     set = gen_rtx_SET (reg, mem);
22658 
22659   clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
22660   debug_rtx (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
22661 }
22662 
22663 static void rs6000_secondary_reload_fail (int, rtx, rtx, rtx, bool)
22664   ATTRIBUTE_NORETURN;
22665 
22666 static void
rs6000_secondary_reload_fail(int line,rtx reg,rtx mem,rtx scratch,bool store_p)22667 rs6000_secondary_reload_fail (int line, rtx reg, rtx mem, rtx scratch,
22668 			      bool store_p)
22669 {
22670   rs6000_secondary_reload_trace (line, reg, mem, scratch, store_p);
22671   gcc_unreachable ();
22672 }
22673 
22674 /* Fixup reload addresses for values in GPR, FPR, and VMX registers that have
22675    reload helper functions.  These were identified in
22676    rs6000_secondary_reload_memory, and if reload decided to use the secondary
22677    reload, it calls the insns:
22678 	reload_<RELOAD:mode>_<P:mptrsize>_store
22679 	reload_<RELOAD:mode>_<P:mptrsize>_load
22680 
22681    which in turn calls this function, to do whatever is necessary to create
22682    valid addresses.  */
22683 
22684 void
rs6000_secondary_reload_inner(rtx reg,rtx mem,rtx scratch,bool store_p)22685 rs6000_secondary_reload_inner (rtx reg, rtx mem, rtx scratch, bool store_p)
22686 {
22687   int regno = true_regnum (reg);
22688   machine_mode mode = GET_MODE (reg);
22689   addr_mask_type addr_mask;
22690   rtx addr;
22691   rtx new_addr;
22692   rtx op_reg, op0, op1;
22693   rtx and_op;
22694   rtx cc_clobber;
22695   rtvec rv;
22696 
22697   if (regno < 0 || regno >= FIRST_PSEUDO_REGISTER || !MEM_P (mem)
22698       || !base_reg_operand (scratch, GET_MODE (scratch)))
22699     rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
22700 
22701   if (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO))
22702     addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_GPR];
22703 
22704   else if (IN_RANGE (regno, FIRST_FPR_REGNO, LAST_FPR_REGNO))
22705     addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_FPR];
22706 
22707   else if (IN_RANGE (regno, FIRST_ALTIVEC_REGNO, LAST_ALTIVEC_REGNO))
22708     addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_VMX];
22709 
22710   else
22711     rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
22712 
22713   /* Make sure the mode is valid in this register class.  */
22714   if ((addr_mask & RELOAD_REG_VALID) == 0)
22715     rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
22716 
22717   if (TARGET_DEBUG_ADDR)
22718     rs6000_secondary_reload_trace (__LINE__, reg, mem, scratch, store_p);
22719 
22720   new_addr = addr = XEXP (mem, 0);
22721   switch (GET_CODE (addr))
22722     {
22723       /* Does the register class support auto update forms for this mode?  If
22724 	 not, do the update now.  We don't need a scratch register, since the
22725 	 powerpc only supports PRE_INC, PRE_DEC, and PRE_MODIFY.  */
22726     case PRE_INC:
22727     case PRE_DEC:
22728       op_reg = XEXP (addr, 0);
22729       if (!base_reg_operand (op_reg, Pmode))
22730 	rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
22731 
22732       if ((addr_mask & RELOAD_REG_PRE_INCDEC) == 0)
22733 	{
22734 	  emit_insn (gen_add2_insn (op_reg, GEN_INT (GET_MODE_SIZE (mode))));
22735 	  new_addr = op_reg;
22736 	}
22737       break;
22738 
22739     case PRE_MODIFY:
22740       op0 = XEXP (addr, 0);
22741       op1 = XEXP (addr, 1);
22742       if (!base_reg_operand (op0, Pmode)
22743 	  || GET_CODE (op1) != PLUS
22744 	  || !rtx_equal_p (op0, XEXP (op1, 0)))
22745 	rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
22746 
22747       if ((addr_mask & RELOAD_REG_PRE_MODIFY) == 0)
22748 	{
22749 	  emit_insn (gen_rtx_SET (op0, op1));
22750 	  new_addr = reg;
22751 	}
22752       break;
22753 
22754       /* Do we need to simulate AND -16 to clear the bottom address bits used
22755 	 in VMX load/stores?  */
22756     case AND:
22757       op0 = XEXP (addr, 0);
22758       op1 = XEXP (addr, 1);
22759       if ((addr_mask & RELOAD_REG_AND_M16) == 0)
22760 	{
22761 	  if (REG_P (op0) || GET_CODE (op0) == SUBREG)
22762 	    op_reg = op0;
22763 
22764 	  else if (GET_CODE (op1) == PLUS)
22765 	    {
22766 	      emit_insn (gen_rtx_SET (scratch, op1));
22767 	      op_reg = scratch;
22768 	    }
22769 
22770 	  else
22771 	    rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
22772 
22773 	  and_op = gen_rtx_AND (GET_MODE (scratch), op_reg, op1);
22774 	  cc_clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (CCmode));
22775 	  rv = gen_rtvec (2, gen_rtx_SET (scratch, and_op), cc_clobber);
22776 	  emit_insn (gen_rtx_PARALLEL (VOIDmode, rv));
22777 	  new_addr = scratch;
22778 	}
22779       break;
22780 
22781       /* If this is an indirect address, make sure it is a base register.  */
22782     case REG:
22783     case SUBREG:
22784       if (!base_reg_operand (addr, GET_MODE (addr)))
22785 	{
22786 	  emit_insn (gen_rtx_SET (scratch, addr));
22787 	  new_addr = scratch;
22788 	}
22789       break;
22790 
22791       /* If this is an indexed address, make sure the register class can handle
22792 	 indexed addresses for this mode.  */
22793     case PLUS:
22794       op0 = XEXP (addr, 0);
22795       op1 = XEXP (addr, 1);
22796       if (!base_reg_operand (op0, Pmode))
22797 	rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
22798 
22799       else if (int_reg_operand (op1, Pmode))
22800 	{
22801 	  if ((addr_mask & RELOAD_REG_INDEXED) == 0)
22802 	    {
22803 	      emit_insn (gen_rtx_SET (scratch, addr));
22804 	      new_addr = scratch;
22805 	    }
22806 	}
22807 
22808       else if (mode_supports_vsx_dform_quad (mode) && CONST_INT_P (op1))
22809 	{
22810 	  if (((addr_mask & RELOAD_REG_QUAD_OFFSET) == 0)
22811 	      || !quad_address_p (addr, mode, false))
22812 	    {
22813 	      emit_insn (gen_rtx_SET (scratch, addr));
22814 	      new_addr = scratch;
22815 	    }
22816 	}
22817 
22818       /* Make sure the register class can handle offset addresses.  */
22819       else if (rs6000_legitimate_offset_address_p (mode, addr, false, true))
22820 	{
22821 	  if ((addr_mask & RELOAD_REG_OFFSET) == 0)
22822 	    {
22823 	      emit_insn (gen_rtx_SET (scratch, addr));
22824 	      new_addr = scratch;
22825 	    }
22826 	}
22827 
22828       else
22829 	rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
22830 
22831       break;
22832 
22833     case LO_SUM:
22834       op0 = XEXP (addr, 0);
22835       op1 = XEXP (addr, 1);
22836       if (!base_reg_operand (op0, Pmode))
22837 	rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
22838 
22839       else if (int_reg_operand (op1, Pmode))
22840 	{
22841 	  if ((addr_mask & RELOAD_REG_INDEXED) == 0)
22842 	    {
22843 	      emit_insn (gen_rtx_SET (scratch, addr));
22844 	      new_addr = scratch;
22845 	    }
22846 	}
22847 
22848       /* Quad offsets are restricted and can't handle normal addresses.  */
22849       else if (mode_supports_vsx_dform_quad (mode))
22850 	{
22851 	  emit_insn (gen_rtx_SET (scratch, addr));
22852 	  new_addr = scratch;
22853 	}
22854 
22855       /* Make sure the register class can handle offset addresses.  */
22856       else if (legitimate_lo_sum_address_p (mode, addr, false))
22857 	{
22858 	  if ((addr_mask & RELOAD_REG_OFFSET) == 0)
22859 	    {
22860 	      emit_insn (gen_rtx_SET (scratch, addr));
22861 	      new_addr = scratch;
22862 	    }
22863 	}
22864 
22865       else
22866 	rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
22867 
22868       break;
22869 
22870     case SYMBOL_REF:
22871     case CONST:
22872     case LABEL_REF:
22873       rs6000_emit_move (scratch, addr, Pmode);
22874       new_addr = scratch;
22875       break;
22876 
22877     default:
22878       rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
22879     }
22880 
22881   /* Adjust the address if it changed.  */
22882   if (addr != new_addr)
22883     {
22884       mem = replace_equiv_address_nv (mem, new_addr);
22885       if (TARGET_DEBUG_ADDR)
22886 	fprintf (stderr, "\nrs6000_secondary_reload_inner, mem adjusted.\n");
22887     }
22888 
22889   /* Now create the move.  */
22890   if (store_p)
22891     emit_insn (gen_rtx_SET (mem, reg));
22892   else
22893     emit_insn (gen_rtx_SET (reg, mem));
22894 
22895   return;
22896 }
22897 
22898 /* Convert reloads involving 64-bit gprs and misaligned offset
22899    addressing, or multiple 32-bit gprs and offsets that are too large,
22900    to use indirect addressing.  */
22901 
22902 void
rs6000_secondary_reload_gpr(rtx reg,rtx mem,rtx scratch,bool store_p)22903 rs6000_secondary_reload_gpr (rtx reg, rtx mem, rtx scratch, bool store_p)
22904 {
22905   int regno = true_regnum (reg);
22906   enum reg_class rclass;
22907   rtx addr;
22908   rtx scratch_or_premodify = scratch;
22909 
22910   if (TARGET_DEBUG_ADDR)
22911     {
22912       fprintf (stderr, "\nrs6000_secondary_reload_gpr, type = %s\n",
22913 	       store_p ? "store" : "load");
22914       fprintf (stderr, "reg:\n");
22915       debug_rtx (reg);
22916       fprintf (stderr, "mem:\n");
22917       debug_rtx (mem);
22918       fprintf (stderr, "scratch:\n");
22919       debug_rtx (scratch);
22920     }
22921 
22922   gcc_assert (regno >= 0 && regno < FIRST_PSEUDO_REGISTER);
22923   gcc_assert (GET_CODE (mem) == MEM);
22924   rclass = REGNO_REG_CLASS (regno);
22925   gcc_assert (rclass == GENERAL_REGS || rclass == BASE_REGS);
22926   addr = XEXP (mem, 0);
22927 
22928   if (GET_CODE (addr) == PRE_MODIFY)
22929     {
22930       gcc_assert (REG_P (XEXP (addr, 0))
22931 		  && GET_CODE (XEXP (addr, 1)) == PLUS
22932 		  && XEXP (XEXP (addr, 1), 0) == XEXP (addr, 0));
22933       scratch_or_premodify = XEXP (addr, 0);
22934       if (!HARD_REGISTER_P (scratch_or_premodify))
22935 	/* If we have a pseudo here then reload will have arranged
22936 	   to have it replaced, but only in the original insn.
22937 	   Use the replacement here too.  */
22938 	scratch_or_premodify = find_replacement (&XEXP (addr, 0));
22939 
22940       /* RTL emitted by rs6000_secondary_reload_gpr uses RTL
22941 	 expressions from the original insn, without unsharing them.
22942 	 Any RTL that points into the original insn will of course
22943 	 have register replacements applied.  That is why we don't
22944 	 need to look for replacements under the PLUS.  */
22945       addr = XEXP (addr, 1);
22946     }
22947   gcc_assert (GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM);
22948 
22949   rs6000_emit_move (scratch_or_premodify, addr, Pmode);
22950 
22951   mem = replace_equiv_address_nv (mem, scratch_or_premodify);
22952 
22953   /* Now create the move.  */
22954   if (store_p)
22955     emit_insn (gen_rtx_SET (mem, reg));
22956   else
22957     emit_insn (gen_rtx_SET (reg, mem));
22958 
22959   return;
22960 }
22961 
22962 /* Allocate a 64-bit stack slot to be used for copying SDmode values through if
22963    this function has any SDmode references.  If we are on a power7 or later, we
22964    don't need the 64-bit stack slot since the LFIWZX and STIFWX instructions
22965    can load/store the value.  */
22966 
22967 static void
rs6000_alloc_sdmode_stack_slot(void)22968 rs6000_alloc_sdmode_stack_slot (void)
22969 {
22970   tree t;
22971   basic_block bb;
22972   gimple_stmt_iterator gsi;
22973 
22974   gcc_assert (cfun->machine->sdmode_stack_slot == NULL_RTX);
22975   /* We use a different approach for dealing with the secondary
22976      memory in LRA.  */
22977   if (ira_use_lra_p)
22978     return;
22979 
22980   if (TARGET_NO_SDMODE_STACK)
22981     return;
22982 
22983   FOR_EACH_BB_FN (bb, cfun)
22984     for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
22985       {
22986 	tree ret = walk_gimple_op (gsi_stmt (gsi), rs6000_check_sdmode, NULL);
22987 	if (ret)
22988 	  {
22989 	    rtx stack = assign_stack_local (DDmode, GET_MODE_SIZE (DDmode), 0);
22990 	    cfun->machine->sdmode_stack_slot = adjust_address_nv (stack,
22991 								  SDmode, 0);
22992 	    return;
22993 	  }
22994       }
22995 
22996   /* Check for any SDmode parameters of the function.  */
22997   for (t = DECL_ARGUMENTS (cfun->decl); t; t = DECL_CHAIN (t))
22998     {
22999       if (TREE_TYPE (t) == error_mark_node)
23000 	continue;
23001 
23002       if (TYPE_MODE (TREE_TYPE (t)) == SDmode
23003 	  || TYPE_MODE (DECL_ARG_TYPE (t)) == SDmode)
23004 	{
23005 	  rtx stack = assign_stack_local (DDmode, GET_MODE_SIZE (DDmode), 0);
23006 	  cfun->machine->sdmode_stack_slot = adjust_address_nv (stack,
23007 								SDmode, 0);
23008 	  return;
23009 	}
23010     }
23011 }
23012 
23013 static void
rs6000_instantiate_decls(void)23014 rs6000_instantiate_decls (void)
23015 {
23016   if (cfun->machine->sdmode_stack_slot != NULL_RTX)
23017     instantiate_decl_rtl (cfun->machine->sdmode_stack_slot);
23018 }
23019 
23020 /* Given an rtx X being reloaded into a reg required to be
23021    in class CLASS, return the class of reg to actually use.
23022    In general this is just CLASS; but on some machines
23023    in some cases it is preferable to use a more restrictive class.
23024 
23025    On the RS/6000, we have to return NO_REGS when we want to reload a
23026    floating-point CONST_DOUBLE to force it to be copied to memory.
23027 
23028    We also don't want to reload integer values into floating-point
23029    registers if we can at all help it.  In fact, this can
23030    cause reload to die, if it tries to generate a reload of CTR
23031    into a FP register and discovers it doesn't have the memory location
23032    required.
23033 
23034    ??? Would it be a good idea to have reload do the converse, that is
23035    try to reload floating modes into FP registers if possible?
23036  */
23037 
23038 static enum reg_class
rs6000_preferred_reload_class(rtx x,enum reg_class rclass)23039 rs6000_preferred_reload_class (rtx x, enum reg_class rclass)
23040 {
23041   machine_mode mode = GET_MODE (x);
23042   bool is_constant = CONSTANT_P (x);
23043 
23044   /* If a mode can't go in FPR/ALTIVEC/VSX registers, don't return a preferred
23045      reload class for it.  */
23046   if ((rclass == ALTIVEC_REGS || rclass == VSX_REGS)
23047       && (reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_VALID) == 0)
23048     return NO_REGS;
23049 
23050   if ((rclass == FLOAT_REGS || rclass == VSX_REGS)
23051       && (reg_addr[mode].addr_mask[RELOAD_REG_FPR] & RELOAD_REG_VALID) == 0)
23052     return NO_REGS;
23053 
23054   /* For VSX, see if we should prefer FLOAT_REGS or ALTIVEC_REGS.  Do not allow
23055      the reloading of address expressions using PLUS into floating point
23056      registers.  */
23057   if (TARGET_VSX && VSX_REG_CLASS_P (rclass) && GET_CODE (x) != PLUS)
23058     {
23059       if (is_constant)
23060 	{
23061 	  /* Zero is always allowed in all VSX registers.  */
23062 	  if (x == CONST0_RTX (mode))
23063 	    return rclass;
23064 
23065 	  /* If this is a vector constant that can be formed with a few Altivec
23066 	     instructions, we want altivec registers.  */
23067 	  if (GET_CODE (x) == CONST_VECTOR && easy_vector_constant (x, mode))
23068 	    return ALTIVEC_REGS;
23069 
23070 	  /* If this is an integer constant that can easily be loaded into
23071 	     vector registers, allow it.  */
23072 	  if (CONST_INT_P (x))
23073 	    {
23074 	      HOST_WIDE_INT value = INTVAL (x);
23075 
23076 	      /* ISA 2.07 can generate -1 in all registers with XXLORC.  ISA
23077 		 2.06 can generate it in the Altivec registers with
23078 		 VSPLTI<x>.  */
23079 	      if (value == -1)
23080 		{
23081 		  if (TARGET_P8_VECTOR)
23082 		    return rclass;
23083 		  else if (rclass == ALTIVEC_REGS || rclass == VSX_REGS)
23084 		    return ALTIVEC_REGS;
23085 		  else
23086 		    return NO_REGS;
23087 		}
23088 
23089 	      /* ISA 3.0 can load -128..127 using the XXSPLTIB instruction and
23090 		 a sign extend in the Altivec registers.  */
23091 	      if (IN_RANGE (value, -128, 127) && TARGET_P9_VECTOR
23092 		  && TARGET_VSX_SMALL_INTEGER
23093 		  && (rclass == ALTIVEC_REGS || rclass == VSX_REGS))
23094 		return ALTIVEC_REGS;
23095 	    }
23096 
23097 	  /* Force constant to memory.  */
23098 	  return NO_REGS;
23099 	}
23100 
23101       /* D-form addressing can easily reload the value.  */
23102       if (mode_supports_vmx_dform (mode)
23103 	  || mode_supports_vsx_dform_quad (mode))
23104 	return rclass;
23105 
23106       /* If this is a scalar floating point value and we don't have D-form
23107 	 addressing, prefer the traditional floating point registers so that we
23108 	 can use D-form (register+offset) addressing.  */
23109       if (rclass == VSX_REGS
23110 	  && (mode == SFmode || GET_MODE_SIZE (mode) == 8))
23111 	return FLOAT_REGS;
23112 
23113       /* Prefer the Altivec registers if Altivec is handling the vector
23114 	 operations (i.e. V16QI, V8HI, and V4SI), or if we prefer Altivec
23115 	 loads.  */
23116       if (VECTOR_UNIT_ALTIVEC_P (mode) || VECTOR_MEM_ALTIVEC_P (mode)
23117 	  || mode == V1TImode)
23118 	return ALTIVEC_REGS;
23119 
23120       return rclass;
23121     }
23122 
23123   if (is_constant || GET_CODE (x) == PLUS)
23124     {
23125       if (reg_class_subset_p (GENERAL_REGS, rclass))
23126 	return GENERAL_REGS;
23127       if (reg_class_subset_p (BASE_REGS, rclass))
23128 	return BASE_REGS;
23129       return NO_REGS;
23130     }
23131 
23132   if (GET_MODE_CLASS (mode) == MODE_INT && rclass == NON_SPECIAL_REGS)
23133     return GENERAL_REGS;
23134 
23135   return rclass;
23136 }
23137 
23138 /* Debug version of rs6000_preferred_reload_class.  */
23139 static enum reg_class
rs6000_debug_preferred_reload_class(rtx x,enum reg_class rclass)23140 rs6000_debug_preferred_reload_class (rtx x, enum reg_class rclass)
23141 {
23142   enum reg_class ret = rs6000_preferred_reload_class (x, rclass);
23143 
23144   fprintf (stderr,
23145 	   "\nrs6000_preferred_reload_class, return %s, rclass = %s, "
23146 	   "mode = %s, x:\n",
23147 	   reg_class_names[ret], reg_class_names[rclass],
23148 	   GET_MODE_NAME (GET_MODE (x)));
23149   debug_rtx (x);
23150 
23151   return ret;
23152 }
23153 
23154 /* If we are copying between FP or AltiVec registers and anything else, we need
23155    a memory location.  The exception is when we are targeting ppc64 and the
23156    move to/from fpr to gpr instructions are available.  Also, under VSX, you
23157    can copy vector registers from the FP register set to the Altivec register
23158    set and vice versa.  */
23159 
23160 static bool
rs6000_secondary_memory_needed(machine_mode mode,reg_class_t from_class,reg_class_t to_class)23161 rs6000_secondary_memory_needed (machine_mode mode,
23162 				reg_class_t from_class,
23163 				reg_class_t to_class)
23164 {
23165   enum rs6000_reg_type from_type, to_type;
23166   bool altivec_p = ((from_class == ALTIVEC_REGS)
23167 		    || (to_class == ALTIVEC_REGS));
23168 
23169   /* If a simple/direct move is available, we don't need secondary memory  */
23170   from_type = reg_class_to_reg_type[(int)from_class];
23171   to_type = reg_class_to_reg_type[(int)to_class];
23172 
23173   if (rs6000_secondary_reload_move (to_type, from_type, mode,
23174 				    (secondary_reload_info *)0, altivec_p))
23175     return false;
23176 
23177   /* If we have a floating point or vector register class, we need to use
23178      memory to transfer the data.  */
23179   if (IS_FP_VECT_REG_TYPE (from_type) || IS_FP_VECT_REG_TYPE (to_type))
23180     return true;
23181 
23182   return false;
23183 }
23184 
23185 /* Debug version of rs6000_secondary_memory_needed.  */
23186 static bool
rs6000_debug_secondary_memory_needed(machine_mode mode,reg_class_t from_class,reg_class_t to_class)23187 rs6000_debug_secondary_memory_needed (machine_mode mode,
23188 				      reg_class_t from_class,
23189 				      reg_class_t to_class)
23190 {
23191   bool ret = rs6000_secondary_memory_needed (mode, from_class, to_class);
23192 
23193   fprintf (stderr,
23194 	   "rs6000_secondary_memory_needed, return: %s, from_class = %s, "
23195 	   "to_class = %s, mode = %s\n",
23196 	   ret ? "true" : "false",
23197 	   reg_class_names[from_class],
23198 	   reg_class_names[to_class],
23199 	   GET_MODE_NAME (mode));
23200 
23201   return ret;
23202 }
23203 
23204 /* Return the register class of a scratch register needed to copy IN into
23205    or out of a register in RCLASS in MODE.  If it can be done directly,
23206    NO_REGS is returned.  */
23207 
23208 static enum reg_class
rs6000_secondary_reload_class(enum reg_class rclass,machine_mode mode,rtx in)23209 rs6000_secondary_reload_class (enum reg_class rclass, machine_mode mode,
23210 			       rtx in)
23211 {
23212   int regno;
23213 
23214   if (TARGET_ELF || (DEFAULT_ABI == ABI_DARWIN
23215 #if TARGET_MACHO
23216 		     && MACHOPIC_INDIRECT
23217 #endif
23218 		     ))
23219     {
23220       /* We cannot copy a symbolic operand directly into anything
23221 	 other than BASE_REGS for TARGET_ELF.  So indicate that a
23222 	 register from BASE_REGS is needed as an intermediate
23223 	 register.
23224 
23225 	 On Darwin, pic addresses require a load from memory, which
23226 	 needs a base register.  */
23227       if (rclass != BASE_REGS
23228 	  && (GET_CODE (in) == SYMBOL_REF
23229 	      || GET_CODE (in) == HIGH
23230 	      || GET_CODE (in) == LABEL_REF
23231 	      || GET_CODE (in) == CONST))
23232 	return BASE_REGS;
23233     }
23234 
23235   if (GET_CODE (in) == REG)
23236     {
23237       regno = REGNO (in);
23238       if (regno >= FIRST_PSEUDO_REGISTER)
23239 	{
23240 	  regno = true_regnum (in);
23241 	  if (regno >= FIRST_PSEUDO_REGISTER)
23242 	    regno = -1;
23243 	}
23244     }
23245   else if (GET_CODE (in) == SUBREG)
23246     {
23247       regno = true_regnum (in);
23248       if (regno >= FIRST_PSEUDO_REGISTER)
23249 	regno = -1;
23250     }
23251   else
23252     regno = -1;
23253 
23254   /* If we have VSX register moves, prefer moving scalar values between
23255      Altivec registers and GPR by going via an FPR (and then via memory)
23256      instead of reloading the secondary memory address for Altivec moves.  */
23257   if (TARGET_VSX
23258       && GET_MODE_SIZE (mode) < 16
23259       && !mode_supports_vmx_dform (mode)
23260       && (((rclass == GENERAL_REGS || rclass == BASE_REGS)
23261            && (regno >= 0 && ALTIVEC_REGNO_P (regno)))
23262           || ((rclass == VSX_REGS || rclass == ALTIVEC_REGS)
23263               && (regno >= 0 && INT_REGNO_P (regno)))))
23264     return FLOAT_REGS;
23265 
23266   /* We can place anything into GENERAL_REGS and can put GENERAL_REGS
23267      into anything.  */
23268   if (rclass == GENERAL_REGS || rclass == BASE_REGS
23269       || (regno >= 0 && INT_REGNO_P (regno)))
23270     return NO_REGS;
23271 
23272   /* Constants, memory, and VSX registers can go into VSX registers (both the
23273      traditional floating point and the altivec registers).  */
23274   if (rclass == VSX_REGS
23275       && (regno == -1 || VSX_REGNO_P (regno)))
23276     return NO_REGS;
23277 
23278   /* Constants, memory, and FP registers can go into FP registers.  */
23279   if ((regno == -1 || FP_REGNO_P (regno))
23280       && (rclass == FLOAT_REGS || rclass == NON_SPECIAL_REGS))
23281     return (mode != SDmode || lra_in_progress) ? NO_REGS : GENERAL_REGS;
23282 
23283   /* Memory, and AltiVec registers can go into AltiVec registers.  */
23284   if ((regno == -1 || ALTIVEC_REGNO_P (regno))
23285       && rclass == ALTIVEC_REGS)
23286     return NO_REGS;
23287 
23288   /* We can copy among the CR registers.  */
23289   if ((rclass == CR_REGS || rclass == CR0_REGS)
23290       && regno >= 0 && CR_REGNO_P (regno))
23291     return NO_REGS;
23292 
23293   /* Otherwise, we need GENERAL_REGS.  */
23294   return GENERAL_REGS;
23295 }
23296 
23297 /* Debug version of rs6000_secondary_reload_class.  */
23298 static enum reg_class
rs6000_debug_secondary_reload_class(enum reg_class rclass,machine_mode mode,rtx in)23299 rs6000_debug_secondary_reload_class (enum reg_class rclass,
23300 				     machine_mode mode, rtx in)
23301 {
23302   enum reg_class ret = rs6000_secondary_reload_class (rclass, mode, in);
23303   fprintf (stderr,
23304 	   "\nrs6000_secondary_reload_class, return %s, rclass = %s, "
23305 	   "mode = %s, input rtx:\n",
23306 	   reg_class_names[ret], reg_class_names[rclass],
23307 	   GET_MODE_NAME (mode));
23308   debug_rtx (in);
23309 
23310   return ret;
23311 }
23312 
23313 /* Implement TARGET_CAN_CHANGE_MODE_CLASS.  */
23314 
23315 static bool
rs6000_can_change_mode_class(machine_mode from,machine_mode to,reg_class_t rclass)23316 rs6000_can_change_mode_class (machine_mode from,
23317 			      machine_mode to,
23318 			      reg_class_t rclass)
23319 {
23320   unsigned from_size = GET_MODE_SIZE (from);
23321   unsigned to_size = GET_MODE_SIZE (to);
23322 
23323   if (from_size != to_size)
23324     {
23325       enum reg_class xclass = (TARGET_VSX) ? VSX_REGS : FLOAT_REGS;
23326 
23327       if (reg_classes_intersect_p (xclass, rclass))
23328 	{
23329 	  unsigned to_nregs = hard_regno_nregs (FIRST_FPR_REGNO, to);
23330 	  unsigned from_nregs = hard_regno_nregs (FIRST_FPR_REGNO, from);
23331 	  bool to_float128_vector_p = FLOAT128_VECTOR_P (to);
23332 	  bool from_float128_vector_p = FLOAT128_VECTOR_P (from);
23333 
23334 	  /* Don't allow 64-bit types to overlap with 128-bit types that take a
23335 	     single register under VSX because the scalar part of the register
23336 	     is in the upper 64-bits, and not the lower 64-bits.  Types like
23337 	     TFmode/TDmode that take 2 scalar register can overlap.  128-bit
23338 	     IEEE floating point can't overlap, and neither can small
23339 	     values.  */
23340 
23341 	  if (to_float128_vector_p && from_float128_vector_p)
23342 	    return true;
23343 
23344 	  else if (to_float128_vector_p || from_float128_vector_p)
23345 	    return false;
23346 
23347 	  /* TDmode in floating-mode registers must always go into a register
23348 	     pair with the most significant word in the even-numbered register
23349 	     to match ISA requirements.  In little-endian mode, this does not
23350 	     match subreg numbering, so we cannot allow subregs.  */
23351 	  if (!BYTES_BIG_ENDIAN && (to == TDmode || from == TDmode))
23352 	    return false;
23353 
23354 	  if (from_size < 8 || to_size < 8)
23355 	    return false;
23356 
23357 	  if (from_size == 8 && (8 * to_nregs) != to_size)
23358 	    return false;
23359 
23360 	  if (to_size == 8 && (8 * from_nregs) != from_size)
23361 	    return false;
23362 
23363 	  return true;
23364 	}
23365       else
23366 	return true;
23367     }
23368 
23369   if (TARGET_E500_DOUBLE
23370       && ((((to) == DFmode) + ((from) == DFmode)) == 1
23371 	  || (((to) == TFmode) + ((from) == TFmode)) == 1
23372 	  || (((to) == IFmode) + ((from) == IFmode)) == 1
23373 	  || (((to) == KFmode) + ((from) == KFmode)) == 1
23374 	  || (((to) == DDmode) + ((from) == DDmode)) == 1
23375 	  || (((to) == TDmode) + ((from) == TDmode)) == 1
23376 	  || (((to) == DImode) + ((from) == DImode)) == 1))
23377     return false;
23378 
23379   /* Since the VSX register set includes traditional floating point registers
23380      and altivec registers, just check for the size being different instead of
23381      trying to check whether the modes are vector modes.  Otherwise it won't
23382      allow say DF and DI to change classes.  For types like TFmode and TDmode
23383      that take 2 64-bit registers, rather than a single 128-bit register, don't
23384      allow subregs of those types to other 128 bit types.  */
23385   if (TARGET_VSX && VSX_REG_CLASS_P (rclass))
23386     {
23387       unsigned num_regs = (from_size + 15) / 16;
23388       if (hard_regno_nregs (FIRST_FPR_REGNO, to) > num_regs
23389 	  || hard_regno_nregs (FIRST_FPR_REGNO, from) > num_regs)
23390 	return false;
23391 
23392       return (from_size == 8 || from_size == 16);
23393     }
23394 
23395   if (TARGET_ALTIVEC && rclass == ALTIVEC_REGS
23396       && (ALTIVEC_VECTOR_MODE (from) + ALTIVEC_VECTOR_MODE (to)) == 1)
23397     return false;
23398 
23399   if (TARGET_SPE && (SPE_VECTOR_MODE (from) + SPE_VECTOR_MODE (to)) == 1
23400       && reg_classes_intersect_p (GENERAL_REGS, rclass))
23401     return false;
23402 
23403   return true;
23404 }
23405 
23406 /* Debug version of rs6000_can_change_mode_class.  */
23407 static bool
rs6000_debug_can_change_mode_class(machine_mode from,machine_mode to,reg_class_t rclass)23408 rs6000_debug_can_change_mode_class (machine_mode from,
23409 				    machine_mode to,
23410 				    reg_class_t rclass)
23411 {
23412   bool ret = rs6000_can_change_mode_class (from, to, rclass);
23413 
23414   fprintf (stderr,
23415 	   "rs6000_can_change_mode_class, return %s, from = %s, "
23416 	   "to = %s, rclass = %s\n",
23417 	   ret ? "true" : "false",
23418 	   GET_MODE_NAME (from), GET_MODE_NAME (to),
23419 	   reg_class_names[rclass]);
23420 
23421   return ret;
23422 }
23423 
23424 /* Return a string to do a move operation of 128 bits of data.  */
23425 
23426 const char *
rs6000_output_move_128bit(rtx operands[])23427 rs6000_output_move_128bit (rtx operands[])
23428 {
23429   rtx dest = operands[0];
23430   rtx src = operands[1];
23431   machine_mode mode = GET_MODE (dest);
23432   int dest_regno;
23433   int src_regno;
23434   bool dest_gpr_p, dest_fp_p, dest_vmx_p, dest_vsx_p;
23435   bool src_gpr_p, src_fp_p, src_vmx_p, src_vsx_p;
23436 
23437   if (REG_P (dest))
23438     {
23439       dest_regno = REGNO (dest);
23440       dest_gpr_p = INT_REGNO_P (dest_regno);
23441       dest_fp_p = FP_REGNO_P (dest_regno);
23442       dest_vmx_p = ALTIVEC_REGNO_P (dest_regno);
23443       dest_vsx_p = dest_fp_p | dest_vmx_p;
23444     }
23445   else
23446     {
23447       dest_regno = -1;
23448       dest_gpr_p = dest_fp_p = dest_vmx_p = dest_vsx_p = false;
23449     }
23450 
23451   if (REG_P (src))
23452     {
23453       src_regno = REGNO (src);
23454       src_gpr_p = INT_REGNO_P (src_regno);
23455       src_fp_p = FP_REGNO_P (src_regno);
23456       src_vmx_p = ALTIVEC_REGNO_P (src_regno);
23457       src_vsx_p = src_fp_p | src_vmx_p;
23458     }
23459   else
23460     {
23461       src_regno = -1;
23462       src_gpr_p = src_fp_p = src_vmx_p = src_vsx_p = false;
23463     }
23464 
23465   /* Register moves.  */
23466   if (dest_regno >= 0 && src_regno >= 0)
23467     {
23468       if (dest_gpr_p)
23469 	{
23470 	  if (src_gpr_p)
23471 	    return "#";
23472 
23473 	  if (TARGET_DIRECT_MOVE_128 && src_vsx_p)
23474 	    return (WORDS_BIG_ENDIAN
23475 		    ? "mfvsrd %0,%x1\n\tmfvsrld %L0,%x1"
23476 		    : "mfvsrd %L0,%x1\n\tmfvsrld %0,%x1");
23477 
23478 	  else if (TARGET_VSX && TARGET_DIRECT_MOVE && src_vsx_p)
23479 	    return "#";
23480 	}
23481 
23482       else if (TARGET_VSX && dest_vsx_p)
23483 	{
23484 	  if (src_vsx_p)
23485 	    return "xxlor %x0,%x1,%x1";
23486 
23487 	  else if (TARGET_DIRECT_MOVE_128 && src_gpr_p)
23488 	    return (WORDS_BIG_ENDIAN
23489 		    ? "mtvsrdd %x0,%1,%L1"
23490 		    : "mtvsrdd %x0,%L1,%1");
23491 
23492 	  else if (TARGET_DIRECT_MOVE && src_gpr_p)
23493 	    return "#";
23494 	}
23495 
23496       else if (TARGET_ALTIVEC && dest_vmx_p && src_vmx_p)
23497 	return "vor %0,%1,%1";
23498 
23499       else if (dest_fp_p && src_fp_p)
23500 	return "#";
23501     }
23502 
23503   /* Loads.  */
23504   else if (dest_regno >= 0 && MEM_P (src))
23505     {
23506       if (dest_gpr_p)
23507 	{
23508 	  if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
23509 	    return "lq %0,%1";
23510 	  else
23511 	    return "#";
23512 	}
23513 
23514       else if (TARGET_ALTIVEC && dest_vmx_p
23515 	       && altivec_indexed_or_indirect_operand (src, mode))
23516 	return "lvx %0,%y1";
23517 
23518       else if (TARGET_VSX && dest_vsx_p)
23519 	{
23520 	  if (mode_supports_vsx_dform_quad (mode)
23521 	      && quad_address_p (XEXP (src, 0), mode, true))
23522 	    return "lxv %x0,%1";
23523 
23524 	  else if (TARGET_P9_VECTOR)
23525 	    return "lxvx %x0,%y1";
23526 
23527 	  else if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
23528 	    return "lxvw4x %x0,%y1";
23529 
23530 	  else
23531 	    return "lxvd2x %x0,%y1";
23532 	}
23533 
23534       else if (TARGET_ALTIVEC && dest_vmx_p)
23535 	return "lvx %0,%y1";
23536 
23537       else if (dest_fp_p)
23538 	return "#";
23539     }
23540 
23541   /* Stores.  */
23542   else if (src_regno >= 0 && MEM_P (dest))
23543     {
23544       if (src_gpr_p)
23545 	{
23546  	  if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
23547 	    return "stq %1,%0";
23548 	  else
23549 	    return "#";
23550 	}
23551 
23552       else if (TARGET_ALTIVEC && src_vmx_p
23553 	       && altivec_indexed_or_indirect_operand (src, mode))
23554 	return "stvx %1,%y0";
23555 
23556       else if (TARGET_VSX && src_vsx_p)
23557 	{
23558 	  if (mode_supports_vsx_dform_quad (mode)
23559 	      && quad_address_p (XEXP (dest, 0), mode, true))
23560 	    return "stxv %x1,%0";
23561 
23562 	  else if (TARGET_P9_VECTOR)
23563 	    return "stxvx %x1,%y0";
23564 
23565 	  else if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
23566 	    return "stxvw4x %x1,%y0";
23567 
23568 	  else
23569 	    return "stxvd2x %x1,%y0";
23570 	}
23571 
23572       else if (TARGET_ALTIVEC && src_vmx_p)
23573 	return "stvx %1,%y0";
23574 
23575       else if (src_fp_p)
23576 	return "#";
23577     }
23578 
23579   /* Constants.  */
23580   else if (dest_regno >= 0
23581 	   && (GET_CODE (src) == CONST_INT
23582 	       || GET_CODE (src) == CONST_WIDE_INT
23583 	       || GET_CODE (src) == CONST_DOUBLE
23584 	       || GET_CODE (src) == CONST_VECTOR))
23585     {
23586       if (dest_gpr_p)
23587 	return "#";
23588 
23589       else if ((dest_vmx_p && TARGET_ALTIVEC)
23590 	       || (dest_vsx_p && TARGET_VSX))
23591 	return output_vec_const_move (operands);
23592     }
23593 
23594   fatal_insn ("Bad 128-bit move", gen_rtx_SET (dest, src));
23595 }
23596 
23597 /* Validate a 128-bit move.  */
23598 bool
rs6000_move_128bit_ok_p(rtx operands[])23599 rs6000_move_128bit_ok_p (rtx operands[])
23600 {
23601   machine_mode mode = GET_MODE (operands[0]);
23602   return (gpc_reg_operand (operands[0], mode)
23603 	  || gpc_reg_operand (operands[1], mode));
23604 }
23605 
23606 /* Return true if a 128-bit move needs to be split.  */
23607 bool
rs6000_split_128bit_ok_p(rtx operands[])23608 rs6000_split_128bit_ok_p (rtx operands[])
23609 {
23610   if (!reload_completed)
23611     return false;
23612 
23613   if (!gpr_or_gpr_p (operands[0], operands[1]))
23614     return false;
23615 
23616   if (quad_load_store_p (operands[0], operands[1]))
23617     return false;
23618 
23619   return true;
23620 }
23621 
23622 
23623 /* Given a comparison operation, return the bit number in CCR to test.  We
23624    know this is a valid comparison.
23625 
23626    SCC_P is 1 if this is for an scc.  That means that %D will have been
23627    used instead of %C, so the bits will be in different places.
23628 
23629    Return -1 if OP isn't a valid comparison for some reason.  */
23630 
23631 int
ccr_bit(rtx op,int scc_p)23632 ccr_bit (rtx op, int scc_p)
23633 {
23634   enum rtx_code code = GET_CODE (op);
23635   machine_mode cc_mode;
23636   int cc_regnum;
23637   int base_bit;
23638   rtx reg;
23639 
23640   if (!COMPARISON_P (op))
23641     return -1;
23642 
23643   reg = XEXP (op, 0);
23644 
23645   gcc_assert (GET_CODE (reg) == REG && CR_REGNO_P (REGNO (reg)));
23646 
23647   cc_mode = GET_MODE (reg);
23648   cc_regnum = REGNO (reg);
23649   base_bit = 4 * (cc_regnum - CR0_REGNO);
23650 
23651   validate_condition_mode (code, cc_mode);
23652 
23653   /* When generating a sCOND operation, only positive conditions are
23654      allowed.  */
23655   gcc_assert (!scc_p
23656 	      || code == EQ || code == GT || code == LT || code == UNORDERED
23657 	      || code == GTU || code == LTU);
23658 
23659   switch (code)
23660     {
23661     case NE:
23662       return scc_p ? base_bit + 3 : base_bit + 2;
23663     case EQ:
23664       return base_bit + 2;
23665     case GT:  case GTU:  case UNLE:
23666       return base_bit + 1;
23667     case LT:  case LTU:  case UNGE:
23668       return base_bit;
23669     case ORDERED:  case UNORDERED:
23670       return base_bit + 3;
23671 
23672     case GE:  case GEU:
23673       /* If scc, we will have done a cror to put the bit in the
23674 	 unordered position.  So test that bit.  For integer, this is ! LT
23675 	 unless this is an scc insn.  */
23676       return scc_p ? base_bit + 3 : base_bit;
23677 
23678     case LE:  case LEU:
23679       return scc_p ? base_bit + 3 : base_bit + 1;
23680 
23681     default:
23682       gcc_unreachable ();
23683     }
23684 }
23685 
23686 /* Return the GOT register.  */
23687 
23688 rtx
rs6000_got_register(rtx value ATTRIBUTE_UNUSED)23689 rs6000_got_register (rtx value ATTRIBUTE_UNUSED)
23690 {
23691   /* The second flow pass currently (June 1999) can't update
23692      regs_ever_live without disturbing other parts of the compiler, so
23693      update it here to make the prolog/epilogue code happy.  */
23694   if (!can_create_pseudo_p ()
23695       && !df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM))
23696     df_set_regs_ever_live (RS6000_PIC_OFFSET_TABLE_REGNUM, true);
23697 
23698   crtl->uses_pic_offset_table = 1;
23699 
23700   return pic_offset_table_rtx;
23701 }
23702 
23703 static rs6000_stack_t stack_info;
23704 
23705 /* Function to init struct machine_function.
23706    This will be called, via a pointer variable,
23707    from push_function_context.  */
23708 
23709 static struct machine_function *
rs6000_init_machine_status(void)23710 rs6000_init_machine_status (void)
23711 {
23712   stack_info.reload_completed = 0;
23713   return ggc_cleared_alloc<machine_function> ();
23714 }
23715 
23716 #define INT_P(X) (GET_CODE (X) == CONST_INT && GET_MODE (X) == VOIDmode)
23717 
23718 /* Write out a function code label.  */
23719 
23720 void
rs6000_output_function_entry(FILE * file,const char * fname)23721 rs6000_output_function_entry (FILE *file, const char *fname)
23722 {
23723   if (fname[0] != '.')
23724     {
23725       switch (DEFAULT_ABI)
23726 	{
23727 	default:
23728 	  gcc_unreachable ();
23729 
23730 	case ABI_AIX:
23731 	  if (DOT_SYMBOLS)
23732 	    putc ('.', file);
23733 	  else
23734 	    ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "L.");
23735 	  break;
23736 
23737 	case ABI_ELFv2:
23738 	case ABI_V4:
23739 	case ABI_DARWIN:
23740 	  break;
23741 	}
23742     }
23743 
23744   RS6000_OUTPUT_BASENAME (file, fname);
23745 }
23746 
23747 /* Print an operand.  Recognize special options, documented below.  */
23748 
23749 #if TARGET_ELF
23750 #define SMALL_DATA_RELOC ((rs6000_sdata == SDATA_EABI) ? "sda21" : "sdarel")
23751 #define SMALL_DATA_REG ((rs6000_sdata == SDATA_EABI) ? 0 : 13)
23752 #else
23753 #define SMALL_DATA_RELOC "sda21"
23754 #define SMALL_DATA_REG 0
23755 #endif
23756 
23757 void
print_operand(FILE * file,rtx x,int code)23758 print_operand (FILE *file, rtx x, int code)
23759 {
23760   int i;
23761   unsigned HOST_WIDE_INT uval;
23762 
23763   switch (code)
23764     {
23765       /* %a is output_address.  */
23766 
23767       /* %c is output_addr_const if a CONSTANT_ADDRESS_P, otherwise
23768 	 output_operand.  */
23769 
23770     case 'D':
23771       /* Like 'J' but get to the GT bit only.  */
23772       gcc_assert (REG_P (x));
23773 
23774       /* Bit 1 is GT bit.  */
23775       i = 4 * (REGNO (x) - CR0_REGNO) + 1;
23776 
23777       /* Add one for shift count in rlinm for scc.  */
23778       fprintf (file, "%d", i + 1);
23779       return;
23780 
23781     case 'e':
23782       /* If the low 16 bits are 0, but some other bit is set, write 's'.  */
23783       if (! INT_P (x))
23784 	{
23785 	  output_operand_lossage ("invalid %%e value");
23786 	  return;
23787 	}
23788 
23789       uval = INTVAL (x);
23790       if ((uval & 0xffff) == 0 && uval != 0)
23791 	putc ('s', file);
23792       return;
23793 
23794     case 'E':
23795       /* X is a CR register.  Print the number of the EQ bit of the CR */
23796       if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
23797 	output_operand_lossage ("invalid %%E value");
23798       else
23799 	fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO) + 2);
23800       return;
23801 
23802     case 'f':
23803       /* X is a CR register.  Print the shift count needed to move it
23804 	 to the high-order four bits.  */
23805       if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
23806 	output_operand_lossage ("invalid %%f value");
23807       else
23808 	fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO));
23809       return;
23810 
23811     case 'F':
23812       /* Similar, but print the count for the rotate in the opposite
23813 	 direction.  */
23814       if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
23815 	output_operand_lossage ("invalid %%F value");
23816       else
23817 	fprintf (file, "%d", 32 - 4 * (REGNO (x) - CR0_REGNO));
23818       return;
23819 
23820     case 'G':
23821       /* X is a constant integer.  If it is negative, print "m",
23822 	 otherwise print "z".  This is to make an aze or ame insn.  */
23823       if (GET_CODE (x) != CONST_INT)
23824 	output_operand_lossage ("invalid %%G value");
23825       else if (INTVAL (x) >= 0)
23826 	putc ('z', file);
23827       else
23828 	putc ('m', file);
23829       return;
23830 
23831     case 'h':
23832       /* If constant, output low-order five bits.  Otherwise, write
23833 	 normally.  */
23834       if (INT_P (x))
23835 	fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 31);
23836       else
23837 	print_operand (file, x, 0);
23838       return;
23839 
23840     case 'H':
23841       /* If constant, output low-order six bits.  Otherwise, write
23842 	 normally.  */
23843       if (INT_P (x))
23844 	fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 63);
23845       else
23846 	print_operand (file, x, 0);
23847       return;
23848 
23849     case 'I':
23850       /* Print `i' if this is a constant, else nothing.  */
23851       if (INT_P (x))
23852 	putc ('i', file);
23853       return;
23854 
23855     case 'j':
23856       /* Write the bit number in CCR for jump.  */
23857       i = ccr_bit (x, 0);
23858       if (i == -1)
23859 	output_operand_lossage ("invalid %%j code");
23860       else
23861 	fprintf (file, "%d", i);
23862       return;
23863 
23864     case 'J':
23865       /* Similar, but add one for shift count in rlinm for scc and pass
23866 	 scc flag to `ccr_bit'.  */
23867       i = ccr_bit (x, 1);
23868       if (i == -1)
23869 	output_operand_lossage ("invalid %%J code");
23870       else
23871 	/* If we want bit 31, write a shift count of zero, not 32.  */
23872 	fprintf (file, "%d", i == 31 ? 0 : i + 1);
23873       return;
23874 
23875     case 'k':
23876       /* X must be a constant.  Write the 1's complement of the
23877 	 constant.  */
23878       if (! INT_P (x))
23879 	output_operand_lossage ("invalid %%k value");
23880       else
23881 	fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~ INTVAL (x));
23882       return;
23883 
23884     case 'K':
23885       /* X must be a symbolic constant on ELF.  Write an
23886 	 expression suitable for an 'addi' that adds in the low 16
23887 	 bits of the MEM.  */
23888       if (GET_CODE (x) == CONST)
23889 	{
23890 	  if (GET_CODE (XEXP (x, 0)) != PLUS
23891 	      || (GET_CODE (XEXP (XEXP (x, 0), 0)) != SYMBOL_REF
23892 		  && GET_CODE (XEXP (XEXP (x, 0), 0)) != LABEL_REF)
23893 	      || GET_CODE (XEXP (XEXP (x, 0), 1)) != CONST_INT)
23894 	    output_operand_lossage ("invalid %%K value");
23895 	}
23896       print_operand_address (file, x);
23897       fputs ("@l", file);
23898       return;
23899 
23900       /* %l is output_asm_label.  */
23901 
23902     case 'L':
23903       /* Write second word of DImode or DFmode reference.  Works on register
23904 	 or non-indexed memory only.  */
23905       if (REG_P (x))
23906 	fputs (reg_names[REGNO (x) + 1], file);
23907       else if (MEM_P (x))
23908 	{
23909 	  machine_mode mode = GET_MODE (x);
23910 	  /* Handle possible auto-increment.  Since it is pre-increment and
23911 	     we have already done it, we can just use an offset of word.  */
23912 	  if (GET_CODE (XEXP (x, 0)) == PRE_INC
23913 	      || GET_CODE (XEXP (x, 0)) == PRE_DEC)
23914 	    output_address (mode, plus_constant (Pmode, XEXP (XEXP (x, 0), 0),
23915 						 UNITS_PER_WORD));
23916 	  else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
23917 	    output_address (mode, plus_constant (Pmode, XEXP (XEXP (x, 0), 0),
23918 						 UNITS_PER_WORD));
23919 	  else
23920 	    output_address (mode, XEXP (adjust_address_nv (x, SImode,
23921 							   UNITS_PER_WORD),
23922 				  0));
23923 
23924 	  if (small_data_operand (x, GET_MODE (x)))
23925 	    fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
23926 		     reg_names[SMALL_DATA_REG]);
23927 	}
23928       return;
23929 
23930     case 'N':
23931       /* Write the number of elements in the vector times 4.  */
23932       if (GET_CODE (x) != PARALLEL)
23933 	output_operand_lossage ("invalid %%N value");
23934       else
23935 	fprintf (file, "%d", XVECLEN (x, 0) * 4);
23936       return;
23937 
23938     case 'O':
23939       /* Similar, but subtract 1 first.  */
23940       if (GET_CODE (x) != PARALLEL)
23941 	output_operand_lossage ("invalid %%O value");
23942       else
23943 	fprintf (file, "%d", (XVECLEN (x, 0) - 1) * 4);
23944       return;
23945 
23946     case 'p':
23947       /* X is a CONST_INT that is a power of two.  Output the logarithm.  */
23948       if (! INT_P (x)
23949 	  || INTVAL (x) < 0
23950 	  || (i = exact_log2 (INTVAL (x))) < 0)
23951 	output_operand_lossage ("invalid %%p value");
23952       else
23953 	fprintf (file, "%d", i);
23954       return;
23955 
23956     case 'P':
23957       /* The operand must be an indirect memory reference.  The result
23958 	 is the register name.  */
23959       if (GET_CODE (x) != MEM || GET_CODE (XEXP (x, 0)) != REG
23960 	  || REGNO (XEXP (x, 0)) >= 32)
23961 	output_operand_lossage ("invalid %%P value");
23962       else
23963 	fputs (reg_names[REGNO (XEXP (x, 0))], file);
23964       return;
23965 
23966     case 'q':
23967       /* This outputs the logical code corresponding to a boolean
23968 	 expression.  The expression may have one or both operands
23969 	 negated (if one, only the first one).  For condition register
23970 	 logical operations, it will also treat the negated
23971 	 CR codes as NOTs, but not handle NOTs of them.  */
23972       {
23973 	const char *const *t = 0;
23974 	const char *s;
23975 	enum rtx_code code = GET_CODE (x);
23976 	static const char * const tbl[3][3] = {
23977 	  { "and", "andc", "nor" },
23978 	  { "or", "orc", "nand" },
23979 	  { "xor", "eqv", "xor" } };
23980 
23981 	if (code == AND)
23982 	  t = tbl[0];
23983 	else if (code == IOR)
23984 	  t = tbl[1];
23985 	else if (code == XOR)
23986 	  t = tbl[2];
23987 	else
23988 	  output_operand_lossage ("invalid %%q value");
23989 
23990 	if (GET_CODE (XEXP (x, 0)) != NOT)
23991 	  s = t[0];
23992 	else
23993 	  {
23994 	    if (GET_CODE (XEXP (x, 1)) == NOT)
23995 	      s = t[2];
23996 	    else
23997 	      s = t[1];
23998 	  }
23999 
24000 	fputs (s, file);
24001       }
24002       return;
24003 
24004     case 'Q':
24005       if (! TARGET_MFCRF)
24006 	return;
24007       fputc (',', file);
24008       /* FALLTHRU */
24009 
24010     case 'R':
24011       /* X is a CR register.  Print the mask for `mtcrf'.  */
24012       if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
24013 	output_operand_lossage ("invalid %%R value");
24014       else
24015 	fprintf (file, "%d", 128 >> (REGNO (x) - CR0_REGNO));
24016       return;
24017 
24018     case 's':
24019       /* Low 5 bits of 32 - value */
24020       if (! INT_P (x))
24021 	output_operand_lossage ("invalid %%s value");
24022       else
24023 	fprintf (file, HOST_WIDE_INT_PRINT_DEC, (32 - INTVAL (x)) & 31);
24024       return;
24025 
24026     case 't':
24027       /* Like 'J' but get to the OVERFLOW/UNORDERED bit.  */
24028       gcc_assert (REG_P (x) && GET_MODE (x) == CCmode);
24029 
24030       /* Bit 3 is OV bit.  */
24031       i = 4 * (REGNO (x) - CR0_REGNO) + 3;
24032 
24033       /* If we want bit 31, write a shift count of zero, not 32.  */
24034       fprintf (file, "%d", i == 31 ? 0 : i + 1);
24035       return;
24036 
24037     case 'T':
24038       /* Print the symbolic name of a branch target register.  */
24039       if (GET_CODE (x) != REG || (REGNO (x) != LR_REGNO
24040 				  && REGNO (x) != CTR_REGNO))
24041 	output_operand_lossage ("invalid %%T value");
24042       else if (REGNO (x) == LR_REGNO)
24043 	fputs ("lr", file);
24044       else
24045 	fputs ("ctr", file);
24046       return;
24047 
24048     case 'u':
24049       /* High-order or low-order 16 bits of constant, whichever is non-zero,
24050 	 for use in unsigned operand.  */
24051       if (! INT_P (x))
24052 	{
24053 	  output_operand_lossage ("invalid %%u value");
24054 	  return;
24055 	}
24056 
24057       uval = INTVAL (x);
24058       if ((uval & 0xffff) == 0)
24059 	uval >>= 16;
24060 
24061       fprintf (file, HOST_WIDE_INT_PRINT_HEX, uval & 0xffff);
24062       return;
24063 
24064     case 'v':
24065       /* High-order 16 bits of constant for use in signed operand.  */
24066       if (! INT_P (x))
24067 	output_operand_lossage ("invalid %%v value");
24068       else
24069 	fprintf (file, HOST_WIDE_INT_PRINT_HEX,
24070 		 (INTVAL (x) >> 16) & 0xffff);
24071       return;
24072 
24073     case 'U':
24074       /* Print `u' if this has an auto-increment or auto-decrement.  */
24075       if (MEM_P (x)
24076 	  && (GET_CODE (XEXP (x, 0)) == PRE_INC
24077 	      || GET_CODE (XEXP (x, 0)) == PRE_DEC
24078 	      || GET_CODE (XEXP (x, 0)) == PRE_MODIFY))
24079 	putc ('u', file);
24080       return;
24081 
24082     case 'V':
24083       /* Print the trap code for this operand.  */
24084       switch (GET_CODE (x))
24085 	{
24086 	case EQ:
24087 	  fputs ("eq", file);   /* 4 */
24088 	  break;
24089 	case NE:
24090 	  fputs ("ne", file);   /* 24 */
24091 	  break;
24092 	case LT:
24093 	  fputs ("lt", file);   /* 16 */
24094 	  break;
24095 	case LE:
24096 	  fputs ("le", file);   /* 20 */
24097 	  break;
24098 	case GT:
24099 	  fputs ("gt", file);   /* 8 */
24100 	  break;
24101 	case GE:
24102 	  fputs ("ge", file);   /* 12 */
24103 	  break;
24104 	case LTU:
24105 	  fputs ("llt", file);  /* 2 */
24106 	  break;
24107 	case LEU:
24108 	  fputs ("lle", file);  /* 6 */
24109 	  break;
24110 	case GTU:
24111 	  fputs ("lgt", file);  /* 1 */
24112 	  break;
24113 	case GEU:
24114 	  fputs ("lge", file);  /* 5 */
24115 	  break;
24116 	default:
24117 	  gcc_unreachable ();
24118 	}
24119       break;
24120 
24121     case 'w':
24122       /* If constant, low-order 16 bits of constant, signed.  Otherwise, write
24123 	 normally.  */
24124       if (INT_P (x))
24125 	fprintf (file, HOST_WIDE_INT_PRINT_DEC,
24126 		 ((INTVAL (x) & 0xffff) ^ 0x8000) - 0x8000);
24127       else
24128 	print_operand (file, x, 0);
24129       return;
24130 
24131     case 'x':
24132       /* X is a FPR or Altivec register used in a VSX context.  */
24133       if (GET_CODE (x) != REG || !VSX_REGNO_P (REGNO (x)))
24134 	output_operand_lossage ("invalid %%x value");
24135       else
24136 	{
24137 	  int reg = REGNO (x);
24138 	  int vsx_reg = (FP_REGNO_P (reg)
24139 			 ? reg - 32
24140 			 : reg - FIRST_ALTIVEC_REGNO + 32);
24141 
24142 #ifdef TARGET_REGNAMES
24143 	  if (TARGET_REGNAMES)
24144 	    fprintf (file, "%%vs%d", vsx_reg);
24145 	  else
24146 #endif
24147 	    fprintf (file, "%d", vsx_reg);
24148 	}
24149       return;
24150 
24151     case 'X':
24152       if (MEM_P (x)
24153 	  && (legitimate_indexed_address_p (XEXP (x, 0), 0)
24154 	      || (GET_CODE (XEXP (x, 0)) == PRE_MODIFY
24155 		  && legitimate_indexed_address_p (XEXP (XEXP (x, 0), 1), 0))))
24156 	putc ('x', file);
24157       return;
24158 
24159     case 'Y':
24160       /* Like 'L', for third word of TImode/PTImode  */
24161       if (REG_P (x))
24162 	fputs (reg_names[REGNO (x) + 2], file);
24163       else if (MEM_P (x))
24164 	{
24165 	  machine_mode mode = GET_MODE (x);
24166 	  if (GET_CODE (XEXP (x, 0)) == PRE_INC
24167 	      || GET_CODE (XEXP (x, 0)) == PRE_DEC)
24168 	    output_address (mode, plus_constant (Pmode,
24169 						 XEXP (XEXP (x, 0), 0), 8));
24170 	  else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
24171 	    output_address (mode, plus_constant (Pmode,
24172 						 XEXP (XEXP (x, 0), 0), 8));
24173 	  else
24174 	    output_address (mode, XEXP (adjust_address_nv (x, SImode, 8), 0));
24175 	  if (small_data_operand (x, GET_MODE (x)))
24176 	    fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
24177 		     reg_names[SMALL_DATA_REG]);
24178 	}
24179       return;
24180 
24181     case 'z':
24182       /* X is a SYMBOL_REF.  Write out the name preceded by a
24183 	 period and without any trailing data in brackets.  Used for function
24184 	 names.  If we are configured for System V (or the embedded ABI) on
24185 	 the PowerPC, do not emit the period, since those systems do not use
24186 	 TOCs and the like.  */
24187       gcc_assert (GET_CODE (x) == SYMBOL_REF);
24188 
24189       /* For macho, check to see if we need a stub.  */
24190       if (TARGET_MACHO)
24191 	{
24192 	  const char *name = XSTR (x, 0);
24193 #if TARGET_MACHO
24194 	  if (darwin_emit_branch_islands
24195 	      && MACHOPIC_INDIRECT
24196 	      && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
24197 	    name = machopic_indirection_name (x, /*stub_p=*/true);
24198 #endif
24199 	  assemble_name (file, name);
24200 	}
24201       else if (!DOT_SYMBOLS)
24202 	assemble_name (file, XSTR (x, 0));
24203       else
24204 	rs6000_output_function_entry (file, XSTR (x, 0));
24205       return;
24206 
24207     case 'Z':
24208       /* Like 'L', for last word of TImode/PTImode.  */
24209       if (REG_P (x))
24210 	fputs (reg_names[REGNO (x) + 3], file);
24211       else if (MEM_P (x))
24212 	{
24213 	  machine_mode mode = GET_MODE (x);
24214 	  if (GET_CODE (XEXP (x, 0)) == PRE_INC
24215 	      || GET_CODE (XEXP (x, 0)) == PRE_DEC)
24216 	    output_address (mode, plus_constant (Pmode,
24217 						 XEXP (XEXP (x, 0), 0), 12));
24218 	  else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
24219 	    output_address (mode, plus_constant (Pmode,
24220 						 XEXP (XEXP (x, 0), 0), 12));
24221 	  else
24222 	    output_address (mode, XEXP (adjust_address_nv (x, SImode, 12), 0));
24223 	  if (small_data_operand (x, GET_MODE (x)))
24224 	    fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
24225 		     reg_names[SMALL_DATA_REG]);
24226 	}
24227       return;
24228 
24229       /* Print AltiVec or SPE memory operand.  */
24230     case 'y':
24231       {
24232 	rtx tmp;
24233 
24234 	gcc_assert (MEM_P (x));
24235 
24236 	tmp = XEXP (x, 0);
24237 
24238 	/* Ugly hack because %y is overloaded.  */
24239 	if ((TARGET_SPE || TARGET_E500_DOUBLE)
24240 	    && (GET_MODE_SIZE (GET_MODE (x)) == 8
24241 		|| FLOAT128_2REG_P (GET_MODE (x))
24242 		|| GET_MODE (x) == TImode
24243 		|| GET_MODE (x) == PTImode))
24244 	  {
24245 	    /* Handle [reg].  */
24246 	    if (REG_P (tmp))
24247 	      {
24248 		fprintf (file, "0(%s)", reg_names[REGNO (tmp)]);
24249 		break;
24250 	      }
24251 	    /* Handle [reg+UIMM].  */
24252 	    else if (GET_CODE (tmp) == PLUS &&
24253 		     GET_CODE (XEXP (tmp, 1)) == CONST_INT)
24254 	      {
24255 		int x;
24256 
24257 		gcc_assert (REG_P (XEXP (tmp, 0)));
24258 
24259 		x = INTVAL (XEXP (tmp, 1));
24260 		fprintf (file, "%d(%s)", x, reg_names[REGNO (XEXP (tmp, 0))]);
24261 		break;
24262 	      }
24263 
24264 	    /* Fall through.  Must be [reg+reg].  */
24265 	  }
24266 	if (VECTOR_MEM_ALTIVEC_P (GET_MODE (x))
24267 	    && GET_CODE (tmp) == AND
24268 	    && GET_CODE (XEXP (tmp, 1)) == CONST_INT
24269 	    && INTVAL (XEXP (tmp, 1)) == -16)
24270 	  tmp = XEXP (tmp, 0);
24271 	else if (VECTOR_MEM_VSX_P (GET_MODE (x))
24272 		 && GET_CODE (tmp) == PRE_MODIFY)
24273 	  tmp = XEXP (tmp, 1);
24274 	if (REG_P (tmp))
24275 	  fprintf (file, "0,%s", reg_names[REGNO (tmp)]);
24276 	else
24277 	  {
24278 	    if (GET_CODE (tmp) != PLUS
24279 		|| !REG_P (XEXP (tmp, 0))
24280 		|| !REG_P (XEXP (tmp, 1)))
24281 	      {
24282 		output_operand_lossage ("invalid %%y value, try using the 'Z' constraint");
24283 		break;
24284 	      }
24285 
24286 	    if (REGNO (XEXP (tmp, 0)) == 0)
24287 	      fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (tmp, 1)) ],
24288 		       reg_names[ REGNO (XEXP (tmp, 0)) ]);
24289 	    else
24290 	      fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (tmp, 0)) ],
24291 		       reg_names[ REGNO (XEXP (tmp, 1)) ]);
24292 	  }
24293 	break;
24294       }
24295 
24296     case 0:
24297       if (REG_P (x))
24298 	fprintf (file, "%s", reg_names[REGNO (x)]);
24299       else if (MEM_P (x))
24300 	{
24301 	  /* We need to handle PRE_INC and PRE_DEC here, since we need to
24302 	     know the width from the mode.  */
24303 	  if (GET_CODE (XEXP (x, 0)) == PRE_INC)
24304 	    fprintf (file, "%d(%s)", GET_MODE_SIZE (GET_MODE (x)),
24305 		     reg_names[REGNO (XEXP (XEXP (x, 0), 0))]);
24306 	  else if (GET_CODE (XEXP (x, 0)) == PRE_DEC)
24307 	    fprintf (file, "%d(%s)", - GET_MODE_SIZE (GET_MODE (x)),
24308 		     reg_names[REGNO (XEXP (XEXP (x, 0), 0))]);
24309 	  else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
24310 	    output_address (GET_MODE (x), XEXP (XEXP (x, 0), 1));
24311 	  else
24312 	    output_address (GET_MODE (x), XEXP (x, 0));
24313 	}
24314       else
24315 	{
24316 	  if (toc_relative_expr_p (x, false))
24317 	    /* This hack along with a corresponding hack in
24318 	       rs6000_output_addr_const_extra arranges to output addends
24319 	       where the assembler expects to find them.  eg.
24320 	       (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 4)
24321 	       without this hack would be output as "x@toc+4".  We
24322 	       want "x+4@toc".  */
24323 	    output_addr_const (file, CONST_CAST_RTX (tocrel_base));
24324 	  else
24325 	    output_addr_const (file, x);
24326 	}
24327       return;
24328 
24329     case '&':
24330       if (const char *name = get_some_local_dynamic_name ())
24331 	assemble_name (file, name);
24332       else
24333 	output_operand_lossage ("'%%&' used without any "
24334 				"local dynamic TLS references");
24335       return;
24336 
24337     default:
24338       output_operand_lossage ("invalid %%xn code");
24339     }
24340 }
24341 
24342 /* Print the address of an operand.  */
24343 
24344 void
print_operand_address(FILE * file,rtx x)24345 print_operand_address (FILE *file, rtx x)
24346 {
24347   if (REG_P (x))
24348     fprintf (file, "0(%s)", reg_names[ REGNO (x) ]);
24349   else if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == CONST
24350 	   || GET_CODE (x) == LABEL_REF)
24351     {
24352       output_addr_const (file, x);
24353       if (small_data_operand (x, GET_MODE (x)))
24354 	fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
24355 		 reg_names[SMALL_DATA_REG]);
24356       else
24357 	gcc_assert (!TARGET_TOC);
24358     }
24359   else if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0))
24360 	   && REG_P (XEXP (x, 1)))
24361     {
24362       if (REGNO (XEXP (x, 0)) == 0)
24363 	fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (x, 1)) ],
24364 		 reg_names[ REGNO (XEXP (x, 0)) ]);
24365       else
24366 	fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (x, 0)) ],
24367 		 reg_names[ REGNO (XEXP (x, 1)) ]);
24368     }
24369   else if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0))
24370 	   && GET_CODE (XEXP (x, 1)) == CONST_INT)
24371     fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
24372 	     INTVAL (XEXP (x, 1)), reg_names[ REGNO (XEXP (x, 0)) ]);
24373 #if TARGET_MACHO
24374   else if (GET_CODE (x) == LO_SUM && REG_P (XEXP (x, 0))
24375 	   && CONSTANT_P (XEXP (x, 1)))
24376     {
24377       fprintf (file, "lo16(");
24378       output_addr_const (file, XEXP (x, 1));
24379       fprintf (file, ")(%s)", reg_names[ REGNO (XEXP (x, 0)) ]);
24380     }
24381 #endif
24382 #if TARGET_ELF
24383   else if (GET_CODE (x) == LO_SUM && REG_P (XEXP (x, 0))
24384 	   && CONSTANT_P (XEXP (x, 1)))
24385     {
24386       output_addr_const (file, XEXP (x, 1));
24387       fprintf (file, "@l(%s)", reg_names[ REGNO (XEXP (x, 0)) ]);
24388     }
24389 #endif
24390   else if (toc_relative_expr_p (x, false))
24391     {
24392       /* This hack along with a corresponding hack in
24393 	 rs6000_output_addr_const_extra arranges to output addends
24394 	 where the assembler expects to find them.  eg.
24395 	 (lo_sum (reg 9)
24396 	 .       (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 8))
24397 	 without this hack would be output as "x@toc+8@l(9)".  We
24398 	 want "x+8@toc@l(9)".  */
24399       output_addr_const (file, CONST_CAST_RTX (tocrel_base));
24400       if (GET_CODE (x) == LO_SUM)
24401 	fprintf (file, "@l(%s)", reg_names[REGNO (XEXP (x, 0))]);
24402       else
24403 	fprintf (file, "(%s)", reg_names[REGNO (XVECEXP (tocrel_base, 0, 1))]);
24404     }
24405   else
24406     gcc_unreachable ();
24407 }
24408 
24409 /* Implement TARGET_OUTPUT_ADDR_CONST_EXTRA.  */
24410 
24411 static bool
rs6000_output_addr_const_extra(FILE * file,rtx x)24412 rs6000_output_addr_const_extra (FILE *file, rtx x)
24413 {
24414   if (GET_CODE (x) == UNSPEC)
24415     switch (XINT (x, 1))
24416       {
24417       case UNSPEC_TOCREL:
24418 	gcc_checking_assert (GET_CODE (XVECEXP (x, 0, 0)) == SYMBOL_REF
24419 			     && REG_P (XVECEXP (x, 0, 1))
24420 			     && REGNO (XVECEXP (x, 0, 1)) == TOC_REGISTER);
24421 	output_addr_const (file, XVECEXP (x, 0, 0));
24422 	if (x == tocrel_base && tocrel_offset != const0_rtx)
24423 	  {
24424 	    if (INTVAL (tocrel_offset) >= 0)
24425 	      fprintf (file, "+");
24426 	    output_addr_const (file, CONST_CAST_RTX (tocrel_offset));
24427 	  }
24428 	if (!TARGET_AIX || (TARGET_ELF && TARGET_MINIMAL_TOC))
24429 	  {
24430 	    putc ('-', file);
24431 	    assemble_name (file, toc_label_name);
24432 	    need_toc_init = 1;
24433 	  }
24434 	else if (TARGET_ELF)
24435 	  fputs ("@toc", file);
24436 	return true;
24437 
24438 #if TARGET_MACHO
24439       case UNSPEC_MACHOPIC_OFFSET:
24440 	output_addr_const (file, XVECEXP (x, 0, 0));
24441 	putc ('-', file);
24442 	machopic_output_function_base_name (file);
24443 	return true;
24444 #endif
24445       }
24446   return false;
24447 }
24448 
24449 /* Target hook for assembling integer objects.  The PowerPC version has
24450    to handle fixup entries for relocatable code if RELOCATABLE_NEEDS_FIXUP
24451    is defined.  It also needs to handle DI-mode objects on 64-bit
24452    targets.  */
24453 
24454 static bool
rs6000_assemble_integer(rtx x,unsigned int size,int aligned_p)24455 rs6000_assemble_integer (rtx x, unsigned int size, int aligned_p)
24456 {
24457 #ifdef RELOCATABLE_NEEDS_FIXUP
24458   /* Special handling for SI values.  */
24459   if (RELOCATABLE_NEEDS_FIXUP && size == 4 && aligned_p)
24460     {
24461       static int recurse = 0;
24462 
24463       /* For -mrelocatable, we mark all addresses that need to be fixed up in
24464 	 the .fixup section.  Since the TOC section is already relocated, we
24465 	 don't need to mark it here.  We used to skip the text section, but it
24466 	 should never be valid for relocated addresses to be placed in the text
24467 	 section.  */
24468       if (DEFAULT_ABI == ABI_V4
24469 	  && (TARGET_RELOCATABLE || flag_pic > 1)
24470 	  && in_section != toc_section
24471 	  && !recurse
24472 	  && !CONST_SCALAR_INT_P (x)
24473 	  && CONSTANT_P (x))
24474 	{
24475 	  char buf[256];
24476 
24477 	  recurse = 1;
24478 	  ASM_GENERATE_INTERNAL_LABEL (buf, "LCP", fixuplabelno);
24479 	  fixuplabelno++;
24480 	  ASM_OUTPUT_LABEL (asm_out_file, buf);
24481 	  fprintf (asm_out_file, "\t.long\t(");
24482 	  output_addr_const (asm_out_file, x);
24483 	  fprintf (asm_out_file, ")@fixup\n");
24484 	  fprintf (asm_out_file, "\t.section\t\".fixup\",\"aw\"\n");
24485 	  ASM_OUTPUT_ALIGN (asm_out_file, 2);
24486 	  fprintf (asm_out_file, "\t.long\t");
24487 	  assemble_name (asm_out_file, buf);
24488 	  fprintf (asm_out_file, "\n\t.previous\n");
24489 	  recurse = 0;
24490 	  return true;
24491 	}
24492       /* Remove initial .'s to turn a -mcall-aixdesc function
24493 	 address into the address of the descriptor, not the function
24494 	 itself.  */
24495       else if (GET_CODE (x) == SYMBOL_REF
24496 	       && XSTR (x, 0)[0] == '.'
24497 	       && DEFAULT_ABI == ABI_AIX)
24498 	{
24499 	  const char *name = XSTR (x, 0);
24500 	  while (*name == '.')
24501 	    name++;
24502 
24503 	  fprintf (asm_out_file, "\t.long\t%s\n", name);
24504 	  return true;
24505 	}
24506     }
24507 #endif /* RELOCATABLE_NEEDS_FIXUP */
24508   return default_assemble_integer (x, size, aligned_p);
24509 }
24510 
24511 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
24512 /* Emit an assembler directive to set symbol visibility for DECL to
24513    VISIBILITY_TYPE.  */
24514 
24515 static void
rs6000_assemble_visibility(tree decl,int vis)24516 rs6000_assemble_visibility (tree decl, int vis)
24517 {
24518   if (TARGET_XCOFF)
24519     return;
24520 
24521   /* Functions need to have their entry point symbol visibility set as
24522      well as their descriptor symbol visibility.  */
24523   if (DEFAULT_ABI == ABI_AIX
24524       && DOT_SYMBOLS
24525       && TREE_CODE (decl) == FUNCTION_DECL)
24526     {
24527       static const char * const visibility_types[] = {
24528 	NULL, "protected", "hidden", "internal"
24529       };
24530 
24531       const char *name, *type;
24532 
24533       name = ((* targetm.strip_name_encoding)
24534 	      (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl))));
24535       type = visibility_types[vis];
24536 
24537       fprintf (asm_out_file, "\t.%s\t%s\n", type, name);
24538       fprintf (asm_out_file, "\t.%s\t.%s\n", type, name);
24539     }
24540   else
24541     default_assemble_visibility (decl, vis);
24542 }
24543 #endif
24544 
24545 enum rtx_code
rs6000_reverse_condition(machine_mode mode,enum rtx_code code)24546 rs6000_reverse_condition (machine_mode mode, enum rtx_code code)
24547 {
24548   /* Reversal of FP compares takes care -- an ordered compare
24549      becomes an unordered compare and vice versa.  */
24550   if (mode == CCFPmode
24551       && (!flag_finite_math_only
24552 	  || code == UNLT || code == UNLE || code == UNGT || code == UNGE
24553 	  || code == UNEQ || code == LTGT))
24554     return reverse_condition_maybe_unordered (code);
24555   else
24556     return reverse_condition (code);
24557 }
24558 
24559 /* Generate a compare for CODE.  Return a brand-new rtx that
24560    represents the result of the compare.  */
24561 
24562 static rtx
rs6000_generate_compare(rtx cmp,machine_mode mode)24563 rs6000_generate_compare (rtx cmp, machine_mode mode)
24564 {
24565   machine_mode comp_mode;
24566   rtx compare_result;
24567   enum rtx_code code = GET_CODE (cmp);
24568   rtx op0 = XEXP (cmp, 0);
24569   rtx op1 = XEXP (cmp, 1);
24570 
24571   if (!TARGET_FLOAT128_HW && FLOAT128_VECTOR_P (mode))
24572     comp_mode = CCmode;
24573   else if (FLOAT_MODE_P (mode))
24574     comp_mode = CCFPmode;
24575   else if (code == GTU || code == LTU
24576 	   || code == GEU || code == LEU)
24577     comp_mode = CCUNSmode;
24578   else if ((code == EQ || code == NE)
24579 	   && unsigned_reg_p (op0)
24580 	   && (unsigned_reg_p (op1)
24581 	       || (CONST_INT_P (op1) && INTVAL (op1) != 0)))
24582     /* These are unsigned values, perhaps there will be a later
24583        ordering compare that can be shared with this one.  */
24584     comp_mode = CCUNSmode;
24585   else
24586     comp_mode = CCmode;
24587 
24588   /* If we have an unsigned compare, make sure we don't have a signed value as
24589      an immediate.  */
24590   if (comp_mode == CCUNSmode && GET_CODE (op1) == CONST_INT
24591       && INTVAL (op1) < 0)
24592     {
24593       op0 = copy_rtx_if_shared (op0);
24594       op1 = force_reg (GET_MODE (op0), op1);
24595       cmp = gen_rtx_fmt_ee (code, GET_MODE (cmp), op0, op1);
24596     }
24597 
24598   /* First, the compare.  */
24599   compare_result = gen_reg_rtx (comp_mode);
24600 
24601   /* E500 FP compare instructions on the GPRs.  Yuck!  */
24602   if ((!TARGET_FPRS && TARGET_HARD_FLOAT)
24603       && FLOAT_MODE_P (mode))
24604     {
24605       rtx cmp, or_result, compare_result2;
24606       machine_mode op_mode = GET_MODE (op0);
24607       bool reverse_p;
24608 
24609       if (op_mode == VOIDmode)
24610 	op_mode = GET_MODE (op1);
24611 
24612       /* First reverse the condition codes that aren't directly supported.  */
24613       switch (code)
24614 	{
24615 	  case NE:
24616 	  case UNLT:
24617 	  case UNLE:
24618 	  case UNGT:
24619 	  case UNGE:
24620 	    code = reverse_condition_maybe_unordered (code);
24621 	    reverse_p = true;
24622 	    break;
24623 
24624 	  case EQ:
24625 	  case LT:
24626 	  case LE:
24627 	  case GT:
24628 	  case GE:
24629 	    reverse_p = false;
24630 	    break;
24631 
24632 	  default:
24633 	    gcc_unreachable ();
24634 	}
24635 
24636       /* The E500 FP compare instructions toggle the GT bit (CR bit 1) only.
24637 	 This explains the following mess.  */
24638 
24639       switch (code)
24640 	{
24641 	case EQ:
24642 	  switch (op_mode)
24643 	    {
24644 	    case E_SFmode:
24645 	      cmp = (flag_finite_math_only && !flag_trapping_math)
24646 		? gen_tstsfeq_gpr (compare_result, op0, op1)
24647 		: gen_cmpsfeq_gpr (compare_result, op0, op1);
24648 	      break;
24649 
24650 	    case E_DFmode:
24651 	      cmp = (flag_finite_math_only && !flag_trapping_math)
24652 		? gen_tstdfeq_gpr (compare_result, op0, op1)
24653 		: gen_cmpdfeq_gpr (compare_result, op0, op1);
24654 	      break;
24655 
24656 	    case E_TFmode:
24657 	    case E_IFmode:
24658 	    case E_KFmode:
24659 	      cmp = (flag_finite_math_only && !flag_trapping_math)
24660 		? gen_tsttfeq_gpr (compare_result, op0, op1)
24661 		: gen_cmptfeq_gpr (compare_result, op0, op1);
24662 	      break;
24663 
24664 	    default:
24665 	      gcc_unreachable ();
24666 	    }
24667 	  break;
24668 
24669 	case GT:
24670 	case GE:
24671 	  switch (op_mode)
24672 	    {
24673 	    case E_SFmode:
24674 	      cmp = (flag_finite_math_only && !flag_trapping_math)
24675 		? gen_tstsfgt_gpr (compare_result, op0, op1)
24676 		: gen_cmpsfgt_gpr (compare_result, op0, op1);
24677 	      break;
24678 
24679 	    case E_DFmode:
24680 	      cmp = (flag_finite_math_only && !flag_trapping_math)
24681 		? gen_tstdfgt_gpr (compare_result, op0, op1)
24682 		: gen_cmpdfgt_gpr (compare_result, op0, op1);
24683 	      break;
24684 
24685 	    case E_TFmode:
24686 	    case E_IFmode:
24687 	    case E_KFmode:
24688 	      cmp = (flag_finite_math_only && !flag_trapping_math)
24689 		? gen_tsttfgt_gpr (compare_result, op0, op1)
24690 		: gen_cmptfgt_gpr (compare_result, op0, op1);
24691 	      break;
24692 
24693 	    default:
24694 	      gcc_unreachable ();
24695 	    }
24696 	  break;
24697 
24698 	case LT:
24699 	case LE:
24700 	  switch (op_mode)
24701 	    {
24702 	    case E_SFmode:
24703 	      cmp = (flag_finite_math_only && !flag_trapping_math)
24704 		? gen_tstsflt_gpr (compare_result, op0, op1)
24705 		: gen_cmpsflt_gpr (compare_result, op0, op1);
24706 	      break;
24707 
24708 	    case E_DFmode:
24709 	      cmp = (flag_finite_math_only && !flag_trapping_math)
24710 		? gen_tstdflt_gpr (compare_result, op0, op1)
24711 		: gen_cmpdflt_gpr (compare_result, op0, op1);
24712 	      break;
24713 
24714 	    case E_TFmode:
24715 	    case E_IFmode:
24716 	    case E_KFmode:
24717 	      cmp = (flag_finite_math_only && !flag_trapping_math)
24718 		? gen_tsttflt_gpr (compare_result, op0, op1)
24719 		: gen_cmptflt_gpr (compare_result, op0, op1);
24720 	      break;
24721 
24722 	    default:
24723 	      gcc_unreachable ();
24724 	    }
24725 	  break;
24726 
24727         default:
24728           gcc_unreachable ();
24729 	}
24730 
24731       /* Synthesize LE and GE from LT/GT || EQ.  */
24732       if (code == LE || code == GE)
24733 	{
24734 	  emit_insn (cmp);
24735 
24736 	  compare_result2 = gen_reg_rtx (CCFPmode);
24737 
24738 	  /* Do the EQ.  */
24739 	  switch (op_mode)
24740 	    {
24741 	    case E_SFmode:
24742 	      cmp = (flag_finite_math_only && !flag_trapping_math)
24743 		? gen_tstsfeq_gpr (compare_result2, op0, op1)
24744 		: gen_cmpsfeq_gpr (compare_result2, op0, op1);
24745 	      break;
24746 
24747 	    case E_DFmode:
24748 	      cmp = (flag_finite_math_only && !flag_trapping_math)
24749 		? gen_tstdfeq_gpr (compare_result2, op0, op1)
24750 		: gen_cmpdfeq_gpr (compare_result2, op0, op1);
24751 	      break;
24752 
24753 	    case E_TFmode:
24754 	    case E_IFmode:
24755 	    case E_KFmode:
24756 	      cmp = (flag_finite_math_only && !flag_trapping_math)
24757 		? gen_tsttfeq_gpr (compare_result2, op0, op1)
24758 		: gen_cmptfeq_gpr (compare_result2, op0, op1);
24759 	      break;
24760 
24761 	    default:
24762 	      gcc_unreachable ();
24763 	    }
24764 
24765 	  emit_insn (cmp);
24766 
24767 	  /* OR them together.  */
24768 	  or_result = gen_reg_rtx (CCFPmode);
24769 	  cmp = gen_e500_cr_ior_compare (or_result, compare_result,
24770 					 compare_result2);
24771 	  compare_result = or_result;
24772 	}
24773 
24774       code = reverse_p ? NE : EQ;
24775 
24776       emit_insn (cmp);
24777     }
24778 
24779   /* IEEE 128-bit support in VSX registers when we do not have hardware
24780      support.  */
24781   else if (!TARGET_FLOAT128_HW && FLOAT128_VECTOR_P (mode))
24782     {
24783       rtx libfunc = NULL_RTX;
24784       bool check_nan = false;
24785       rtx dest;
24786 
24787       switch (code)
24788 	{
24789 	case EQ:
24790 	case NE:
24791 	  libfunc = optab_libfunc (eq_optab, mode);
24792 	  break;
24793 
24794 	case GT:
24795 	case GE:
24796 	  libfunc = optab_libfunc (ge_optab, mode);
24797 	  break;
24798 
24799 	case LT:
24800 	case LE:
24801 	  libfunc = optab_libfunc (le_optab, mode);
24802 	  break;
24803 
24804 	case UNORDERED:
24805 	case ORDERED:
24806 	  libfunc = optab_libfunc (unord_optab, mode);
24807 	  code = (code == UNORDERED) ? NE : EQ;
24808 	  break;
24809 
24810 	case UNGE:
24811 	case UNGT:
24812 	  check_nan = true;
24813 	  libfunc = optab_libfunc (ge_optab, mode);
24814 	  code = (code == UNGE) ? GE : GT;
24815 	  break;
24816 
24817 	case UNLE:
24818 	case UNLT:
24819 	  check_nan = true;
24820 	  libfunc = optab_libfunc (le_optab, mode);
24821 	  code = (code == UNLE) ? LE : LT;
24822 	  break;
24823 
24824 	case UNEQ:
24825 	case LTGT:
24826 	  check_nan = true;
24827 	  libfunc = optab_libfunc (eq_optab, mode);
24828 	  code = (code = UNEQ) ? EQ : NE;
24829 	  break;
24830 
24831 	default:
24832 	  gcc_unreachable ();
24833 	}
24834 
24835       gcc_assert (libfunc);
24836 
24837       if (!check_nan)
24838 	dest = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
24839 					SImode, op0, mode, op1, mode);
24840 
24841       /* The library signals an exception for signalling NaNs, so we need to
24842 	 handle isgreater, etc. by first checking isordered.  */
24843       else
24844 	{
24845 	  rtx ne_rtx, normal_dest, unord_dest;
24846 	  rtx unord_func = optab_libfunc (unord_optab, mode);
24847 	  rtx join_label = gen_label_rtx ();
24848 	  rtx join_ref = gen_rtx_LABEL_REF (VOIDmode, join_label);
24849 	  rtx unord_cmp = gen_reg_rtx (comp_mode);
24850 
24851 
24852 	  /* Test for either value being a NaN.  */
24853 	  gcc_assert (unord_func);
24854 	  unord_dest = emit_library_call_value (unord_func, NULL_RTX, LCT_CONST,
24855 						SImode, op0, mode, op1, mode);
24856 
24857 	  /* Set value (0) if either value is a NaN, and jump to the join
24858 	     label.  */
24859 	  dest = gen_reg_rtx (SImode);
24860 	  emit_move_insn (dest, const1_rtx);
24861 	  emit_insn (gen_rtx_SET (unord_cmp,
24862 				  gen_rtx_COMPARE (comp_mode, unord_dest,
24863 						   const0_rtx)));
24864 
24865 	  ne_rtx = gen_rtx_NE (comp_mode, unord_cmp, const0_rtx);
24866 	  emit_jump_insn (gen_rtx_SET (pc_rtx,
24867 				       gen_rtx_IF_THEN_ELSE (VOIDmode, ne_rtx,
24868 							     join_ref,
24869 							     pc_rtx)));
24870 
24871 	  /* Do the normal comparison, knowing that the values are not
24872 	     NaNs.  */
24873 	  normal_dest = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
24874 						 SImode, op0, mode, op1, mode);
24875 
24876 	  emit_insn (gen_cstoresi4 (dest,
24877 				    gen_rtx_fmt_ee (code, SImode, normal_dest,
24878 						    const0_rtx),
24879 				    normal_dest, const0_rtx));
24880 
24881 	  /* Join NaN and non-Nan paths.  Compare dest against 0.  */
24882 	  emit_label (join_label);
24883 	  code = NE;
24884 	}
24885 
24886       emit_insn (gen_rtx_SET (compare_result,
24887 			      gen_rtx_COMPARE (comp_mode, dest, const0_rtx)));
24888     }
24889 
24890   else
24891     {
24892       /* Generate XLC-compatible TFmode compare as PARALLEL with extra
24893 	 CLOBBERs to match cmptf_internal2 pattern.  */
24894       if (comp_mode == CCFPmode && TARGET_XL_COMPAT
24895 	  && FLOAT128_IBM_P (GET_MODE (op0))
24896 	  && TARGET_HARD_FLOAT && TARGET_FPRS)
24897 	emit_insn (gen_rtx_PARALLEL (VOIDmode,
24898 	  gen_rtvec (10,
24899 		     gen_rtx_SET (compare_result,
24900 				  gen_rtx_COMPARE (comp_mode, op0, op1)),
24901 		     gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
24902 		     gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
24903 		     gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
24904 		     gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
24905 		     gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
24906 		     gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
24907 		     gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
24908 		     gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
24909 		     gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (Pmode)))));
24910       else if (GET_CODE (op1) == UNSPEC
24911 	       && XINT (op1, 1) == UNSPEC_SP_TEST)
24912 	{
24913 	  rtx op1b = XVECEXP (op1, 0, 0);
24914 	  comp_mode = CCEQmode;
24915 	  compare_result = gen_reg_rtx (CCEQmode);
24916 	  if (TARGET_64BIT)
24917 	    emit_insn (gen_stack_protect_testdi (compare_result, op0, op1b));
24918 	  else
24919 	    emit_insn (gen_stack_protect_testsi (compare_result, op0, op1b));
24920 	}
24921       else
24922 	emit_insn (gen_rtx_SET (compare_result,
24923 				gen_rtx_COMPARE (comp_mode, op0, op1)));
24924     }
24925 
24926   /* Some kinds of FP comparisons need an OR operation;
24927      under flag_finite_math_only we don't bother.  */
24928   if (FLOAT_MODE_P (mode)
24929       && (!FLOAT128_IEEE_P (mode) || TARGET_FLOAT128_HW)
24930       && !flag_finite_math_only
24931       && !(TARGET_HARD_FLOAT && !TARGET_FPRS)
24932       && (code == LE || code == GE
24933 	  || code == UNEQ || code == LTGT
24934 	  || code == UNGT || code == UNLT))
24935     {
24936       enum rtx_code or1, or2;
24937       rtx or1_rtx, or2_rtx, compare2_rtx;
24938       rtx or_result = gen_reg_rtx (CCEQmode);
24939 
24940       switch (code)
24941 	{
24942 	case LE: or1 = LT;  or2 = EQ;  break;
24943 	case GE: or1 = GT;  or2 = EQ;  break;
24944 	case UNEQ: or1 = UNORDERED;  or2 = EQ;  break;
24945 	case LTGT: or1 = LT;  or2 = GT;  break;
24946 	case UNGT: or1 = UNORDERED;  or2 = GT;  break;
24947 	case UNLT: or1 = UNORDERED;  or2 = LT;  break;
24948 	default:  gcc_unreachable ();
24949 	}
24950       validate_condition_mode (or1, comp_mode);
24951       validate_condition_mode (or2, comp_mode);
24952       or1_rtx = gen_rtx_fmt_ee (or1, SImode, compare_result, const0_rtx);
24953       or2_rtx = gen_rtx_fmt_ee (or2, SImode, compare_result, const0_rtx);
24954       compare2_rtx = gen_rtx_COMPARE (CCEQmode,
24955 				      gen_rtx_IOR (SImode, or1_rtx, or2_rtx),
24956 				      const_true_rtx);
24957       emit_insn (gen_rtx_SET (or_result, compare2_rtx));
24958 
24959       compare_result = or_result;
24960       code = EQ;
24961     }
24962 
24963   validate_condition_mode (code, GET_MODE (compare_result));
24964 
24965   return gen_rtx_fmt_ee (code, VOIDmode, compare_result, const0_rtx);
24966 }
24967 
24968 
24969 /* Return the diagnostic message string if the binary operation OP is
24970    not permitted on TYPE1 and TYPE2, NULL otherwise.  */
24971 
24972 static const char*
rs6000_invalid_binary_op(int op ATTRIBUTE_UNUSED,const_tree type1,const_tree type2)24973 rs6000_invalid_binary_op (int op ATTRIBUTE_UNUSED,
24974 			  const_tree type1,
24975 			  const_tree type2)
24976 {
24977   machine_mode mode1 = TYPE_MODE (type1);
24978   machine_mode mode2 = TYPE_MODE (type2);
24979 
24980   /* For complex modes, use the inner type.  */
24981   if (COMPLEX_MODE_P (mode1))
24982     mode1 = GET_MODE_INNER (mode1);
24983 
24984   if (COMPLEX_MODE_P (mode2))
24985     mode2 = GET_MODE_INNER (mode2);
24986 
24987   /* Don't allow IEEE 754R 128-bit binary floating point and IBM extended
24988      double to intermix unless -mfloat128-convert.  */
24989   if (mode1 == mode2)
24990     return NULL;
24991 
24992   if (!TARGET_FLOAT128_CVT)
24993     {
24994       if ((mode1 == KFmode && mode2 == IFmode)
24995 	  || (mode1 == IFmode && mode2 == KFmode))
24996 	return N_("__float128 and __ibm128 cannot be used in the same "
24997 		  "expression");
24998 
24999       if (TARGET_IEEEQUAD
25000 	  && ((mode1 == IFmode && mode2 == TFmode)
25001 	      || (mode1 == TFmode && mode2 == IFmode)))
25002 	return N_("__ibm128 and long double cannot be used in the same "
25003 		  "expression");
25004 
25005       if (!TARGET_IEEEQUAD
25006 	  && ((mode1 == KFmode && mode2 == TFmode)
25007 	      || (mode1 == TFmode && mode2 == KFmode)))
25008 	return N_("__float128 and long double cannot be used in the same "
25009 		  "expression");
25010     }
25011 
25012   return NULL;
25013 }
25014 
25015 
25016 /* Expand floating point conversion to/from __float128 and __ibm128.  */
25017 
25018 void
rs6000_expand_float128_convert(rtx dest,rtx src,bool unsigned_p)25019 rs6000_expand_float128_convert (rtx dest, rtx src, bool unsigned_p)
25020 {
25021   machine_mode dest_mode = GET_MODE (dest);
25022   machine_mode src_mode = GET_MODE (src);
25023   convert_optab cvt = unknown_optab;
25024   bool do_move = false;
25025   rtx libfunc = NULL_RTX;
25026   rtx dest2;
25027   typedef rtx (*rtx_2func_t) (rtx, rtx);
25028   rtx_2func_t hw_convert = (rtx_2func_t)0;
25029   size_t kf_or_tf;
25030 
25031   struct hw_conv_t {
25032     rtx_2func_t	from_df;
25033     rtx_2func_t from_sf;
25034     rtx_2func_t from_si_sign;
25035     rtx_2func_t from_si_uns;
25036     rtx_2func_t from_di_sign;
25037     rtx_2func_t from_di_uns;
25038     rtx_2func_t to_df;
25039     rtx_2func_t to_sf;
25040     rtx_2func_t to_si_sign;
25041     rtx_2func_t to_si_uns;
25042     rtx_2func_t to_di_sign;
25043     rtx_2func_t to_di_uns;
25044   } hw_conversions[2] = {
25045     /* convertions to/from KFmode */
25046     {
25047       gen_extenddfkf2_hw,		/* KFmode <- DFmode.  */
25048       gen_extendsfkf2_hw,		/* KFmode <- SFmode.  */
25049       gen_float_kfsi2_hw,		/* KFmode <- SImode (signed).  */
25050       gen_floatuns_kfsi2_hw,		/* KFmode <- SImode (unsigned).  */
25051       gen_float_kfdi2_hw,		/* KFmode <- DImode (signed).  */
25052       gen_floatuns_kfdi2_hw,		/* KFmode <- DImode (unsigned).  */
25053       gen_trunckfdf2_hw,		/* DFmode <- KFmode.  */
25054       gen_trunckfsf2_hw,		/* SFmode <- KFmode.  */
25055       gen_fix_kfsi2_hw,			/* SImode <- KFmode (signed).  */
25056       gen_fixuns_kfsi2_hw,		/* SImode <- KFmode (unsigned).  */
25057       gen_fix_kfdi2_hw,			/* DImode <- KFmode (signed).  */
25058       gen_fixuns_kfdi2_hw,		/* DImode <- KFmode (unsigned).  */
25059     },
25060 
25061     /* convertions to/from TFmode */
25062     {
25063       gen_extenddftf2_hw,		/* TFmode <- DFmode.  */
25064       gen_extendsftf2_hw,		/* TFmode <- SFmode.  */
25065       gen_float_tfsi2_hw,		/* TFmode <- SImode (signed).  */
25066       gen_floatuns_tfsi2_hw,		/* TFmode <- SImode (unsigned).  */
25067       gen_float_tfdi2_hw,		/* TFmode <- DImode (signed).  */
25068       gen_floatuns_tfdi2_hw,		/* TFmode <- DImode (unsigned).  */
25069       gen_trunctfdf2_hw,		/* DFmode <- TFmode.  */
25070       gen_trunctfsf2_hw,		/* SFmode <- TFmode.  */
25071       gen_fix_tfsi2_hw,			/* SImode <- TFmode (signed).  */
25072       gen_fixuns_tfsi2_hw,		/* SImode <- TFmode (unsigned).  */
25073       gen_fix_tfdi2_hw,			/* DImode <- TFmode (signed).  */
25074       gen_fixuns_tfdi2_hw,		/* DImode <- TFmode (unsigned).  */
25075     },
25076   };
25077 
25078   if (dest_mode == src_mode)
25079     gcc_unreachable ();
25080 
25081   /* Eliminate memory operations.  */
25082   if (MEM_P (src))
25083     src = force_reg (src_mode, src);
25084 
25085   if (MEM_P (dest))
25086     {
25087       rtx tmp = gen_reg_rtx (dest_mode);
25088       rs6000_expand_float128_convert (tmp, src, unsigned_p);
25089       rs6000_emit_move (dest, tmp, dest_mode);
25090       return;
25091     }
25092 
25093   /* Convert to IEEE 128-bit floating point.  */
25094   if (FLOAT128_IEEE_P (dest_mode))
25095     {
25096       if (dest_mode == KFmode)
25097 	kf_or_tf = 0;
25098       else if (dest_mode == TFmode)
25099 	kf_or_tf = 1;
25100       else
25101 	gcc_unreachable ();
25102 
25103       switch (src_mode)
25104 	{
25105 	case E_DFmode:
25106 	  cvt = sext_optab;
25107 	  hw_convert = hw_conversions[kf_or_tf].from_df;
25108 	  break;
25109 
25110 	case E_SFmode:
25111 	  cvt = sext_optab;
25112 	  hw_convert = hw_conversions[kf_or_tf].from_sf;
25113 	  break;
25114 
25115 	case E_KFmode:
25116 	case E_IFmode:
25117 	case E_TFmode:
25118 	  if (FLOAT128_IBM_P (src_mode))
25119 	    cvt = sext_optab;
25120 	  else
25121 	    do_move = true;
25122 	  break;
25123 
25124 	case E_SImode:
25125 	  if (unsigned_p)
25126 	    {
25127 	      cvt = ufloat_optab;
25128 	      hw_convert = hw_conversions[kf_or_tf].from_si_uns;
25129 	    }
25130 	  else
25131 	    {
25132 	      cvt = sfloat_optab;
25133 	      hw_convert = hw_conversions[kf_or_tf].from_si_sign;
25134 	    }
25135 	  break;
25136 
25137 	case E_DImode:
25138 	  if (unsigned_p)
25139 	    {
25140 	      cvt = ufloat_optab;
25141 	      hw_convert = hw_conversions[kf_or_tf].from_di_uns;
25142 	    }
25143 	  else
25144 	    {
25145 	      cvt = sfloat_optab;
25146 	      hw_convert = hw_conversions[kf_or_tf].from_di_sign;
25147 	    }
25148 	  break;
25149 
25150 	default:
25151 	  gcc_unreachable ();
25152 	}
25153     }
25154 
25155   /* Convert from IEEE 128-bit floating point.  */
25156   else if (FLOAT128_IEEE_P (src_mode))
25157     {
25158       if (src_mode == KFmode)
25159 	kf_or_tf = 0;
25160       else if (src_mode == TFmode)
25161 	kf_or_tf = 1;
25162       else
25163 	gcc_unreachable ();
25164 
25165       switch (dest_mode)
25166 	{
25167 	case E_DFmode:
25168 	  cvt = trunc_optab;
25169 	  hw_convert = hw_conversions[kf_or_tf].to_df;
25170 	  break;
25171 
25172 	case E_SFmode:
25173 	  cvt = trunc_optab;
25174 	  hw_convert = hw_conversions[kf_or_tf].to_sf;
25175 	  break;
25176 
25177 	case E_KFmode:
25178 	case E_IFmode:
25179 	case E_TFmode:
25180 	  if (FLOAT128_IBM_P (dest_mode))
25181 	    cvt = trunc_optab;
25182 	  else
25183 	    do_move = true;
25184 	  break;
25185 
25186 	case E_SImode:
25187 	  if (unsigned_p)
25188 	    {
25189 	      cvt = ufix_optab;
25190 	      hw_convert = hw_conversions[kf_or_tf].to_si_uns;
25191 	    }
25192 	  else
25193 	    {
25194 	      cvt = sfix_optab;
25195 	      hw_convert = hw_conversions[kf_or_tf].to_si_sign;
25196 	    }
25197 	  break;
25198 
25199 	case E_DImode:
25200 	  if (unsigned_p)
25201 	    {
25202 	      cvt = ufix_optab;
25203 	      hw_convert = hw_conversions[kf_or_tf].to_di_uns;
25204 	    }
25205 	  else
25206 	    {
25207 	      cvt = sfix_optab;
25208 	      hw_convert = hw_conversions[kf_or_tf].to_di_sign;
25209 	    }
25210 	  break;
25211 
25212 	default:
25213 	  gcc_unreachable ();
25214 	}
25215     }
25216 
25217   /* Both IBM format.  */
25218   else if (FLOAT128_IBM_P (dest_mode) && FLOAT128_IBM_P (src_mode))
25219     do_move = true;
25220 
25221   else
25222     gcc_unreachable ();
25223 
25224   /* Handle conversion between TFmode/KFmode.  */
25225   if (do_move)
25226     emit_move_insn (dest, gen_lowpart (dest_mode, src));
25227 
25228   /* Handle conversion if we have hardware support.  */
25229   else if (TARGET_FLOAT128_HW && hw_convert)
25230     emit_insn ((hw_convert) (dest, src));
25231 
25232   /* Call an external function to do the conversion.  */
25233   else if (cvt != unknown_optab)
25234     {
25235       libfunc = convert_optab_libfunc (cvt, dest_mode, src_mode);
25236       gcc_assert (libfunc != NULL_RTX);
25237 
25238       dest2 = emit_library_call_value (libfunc, dest, LCT_CONST, dest_mode,
25239 				       src, src_mode);
25240 
25241       gcc_assert (dest2 != NULL_RTX);
25242       if (!rtx_equal_p (dest, dest2))
25243 	emit_move_insn (dest, dest2);
25244     }
25245 
25246   else
25247     gcc_unreachable ();
25248 
25249   return;
25250 }
25251 
25252 
25253 /* Emit the RTL for an sISEL pattern.  */
25254 
25255 void
rs6000_emit_sISEL(machine_mode mode ATTRIBUTE_UNUSED,rtx operands[])25256 rs6000_emit_sISEL (machine_mode mode ATTRIBUTE_UNUSED, rtx operands[])
25257 {
25258   rs6000_emit_int_cmove (operands[0], operands[1], const1_rtx, const0_rtx);
25259 }
25260 
25261 /* Emit RTL that sets a register to zero if OP1 and OP2 are equal.  SCRATCH
25262    can be used as that dest register.  Return the dest register.  */
25263 
25264 rtx
rs6000_emit_eqne(machine_mode mode,rtx op1,rtx op2,rtx scratch)25265 rs6000_emit_eqne (machine_mode mode, rtx op1, rtx op2, rtx scratch)
25266 {
25267   if (op2 == const0_rtx)
25268     return op1;
25269 
25270   if (GET_CODE (scratch) == SCRATCH)
25271     scratch = gen_reg_rtx (mode);
25272 
25273   if (logical_operand (op2, mode))
25274     emit_insn (gen_rtx_SET (scratch, gen_rtx_XOR (mode, op1, op2)));
25275   else
25276     emit_insn (gen_rtx_SET (scratch,
25277 			    gen_rtx_PLUS (mode, op1, negate_rtx (mode, op2))));
25278 
25279   return scratch;
25280 }
25281 
25282 void
rs6000_emit_sCOND(machine_mode mode,rtx operands[])25283 rs6000_emit_sCOND (machine_mode mode, rtx operands[])
25284 {
25285   rtx condition_rtx;
25286   machine_mode op_mode;
25287   enum rtx_code cond_code;
25288   rtx result = operands[0];
25289 
25290   condition_rtx = rs6000_generate_compare (operands[1], mode);
25291   cond_code = GET_CODE (condition_rtx);
25292 
25293   if (FLOAT_MODE_P (mode)
25294       && !TARGET_FPRS && TARGET_HARD_FLOAT)
25295     {
25296       rtx t;
25297 
25298       PUT_MODE (condition_rtx, SImode);
25299       t = XEXP (condition_rtx, 0);
25300 
25301       gcc_assert (cond_code == NE || cond_code == EQ);
25302 
25303       if (cond_code == NE)
25304 	emit_insn (gen_e500_flip_gt_bit (t, t));
25305 
25306       emit_insn (gen_move_from_CR_gt_bit (result, t));
25307       return;
25308     }
25309 
25310   if (cond_code == NE
25311       || cond_code == GE || cond_code == LE
25312       || cond_code == GEU || cond_code == LEU
25313       || cond_code == ORDERED || cond_code == UNGE || cond_code == UNLE)
25314     {
25315       rtx not_result = gen_reg_rtx (CCEQmode);
25316       rtx not_op, rev_cond_rtx;
25317       machine_mode cc_mode;
25318 
25319       cc_mode = GET_MODE (XEXP (condition_rtx, 0));
25320 
25321       rev_cond_rtx = gen_rtx_fmt_ee (rs6000_reverse_condition (cc_mode, cond_code),
25322 				     SImode, XEXP (condition_rtx, 0), const0_rtx);
25323       not_op = gen_rtx_COMPARE (CCEQmode, rev_cond_rtx, const0_rtx);
25324       emit_insn (gen_rtx_SET (not_result, not_op));
25325       condition_rtx = gen_rtx_EQ (VOIDmode, not_result, const0_rtx);
25326     }
25327 
25328   op_mode = GET_MODE (XEXP (operands[1], 0));
25329   if (op_mode == VOIDmode)
25330     op_mode = GET_MODE (XEXP (operands[1], 1));
25331 
25332   if (TARGET_POWERPC64 && (op_mode == DImode || FLOAT_MODE_P (mode)))
25333     {
25334       PUT_MODE (condition_rtx, DImode);
25335       convert_move (result, condition_rtx, 0);
25336     }
25337   else
25338     {
25339       PUT_MODE (condition_rtx, SImode);
25340       emit_insn (gen_rtx_SET (result, condition_rtx));
25341     }
25342 }
25343 
25344 /* Emit a branch of kind CODE to location LOC.  */
25345 
25346 void
rs6000_emit_cbranch(machine_mode mode,rtx operands[])25347 rs6000_emit_cbranch (machine_mode mode, rtx operands[])
25348 {
25349   rtx condition_rtx, loc_ref;
25350 
25351   condition_rtx = rs6000_generate_compare (operands[0], mode);
25352   loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands[3]);
25353   emit_jump_insn (gen_rtx_SET (pc_rtx,
25354 			       gen_rtx_IF_THEN_ELSE (VOIDmode, condition_rtx,
25355 						     loc_ref, pc_rtx)));
25356 }
25357 
25358 /* Return the string to output a conditional branch to LABEL, which is
25359    the operand template of the label, or NULL if the branch is really a
25360    conditional return.
25361 
25362    OP is the conditional expression.  XEXP (OP, 0) is assumed to be a
25363    condition code register and its mode specifies what kind of
25364    comparison we made.
25365 
25366    REVERSED is nonzero if we should reverse the sense of the comparison.
25367 
25368    INSN is the insn.  */
25369 
25370 char *
output_cbranch(rtx op,const char * label,int reversed,rtx_insn * insn)25371 output_cbranch (rtx op, const char *label, int reversed, rtx_insn *insn)
25372 {
25373   static char string[64];
25374   enum rtx_code code = GET_CODE (op);
25375   rtx cc_reg = XEXP (op, 0);
25376   machine_mode mode = GET_MODE (cc_reg);
25377   int cc_regno = REGNO (cc_reg) - CR0_REGNO;
25378   int need_longbranch = label != NULL && get_attr_length (insn) == 8;
25379   int really_reversed = reversed ^ need_longbranch;
25380   char *s = string;
25381   const char *ccode;
25382   const char *pred;
25383   rtx note;
25384 
25385   validate_condition_mode (code, mode);
25386 
25387   /* Work out which way this really branches.  We could use
25388      reverse_condition_maybe_unordered here always but this
25389      makes the resulting assembler clearer.  */
25390   if (really_reversed)
25391     {
25392       /* Reversal of FP compares takes care -- an ordered compare
25393 	 becomes an unordered compare and vice versa.  */
25394       if (mode == CCFPmode)
25395 	code = reverse_condition_maybe_unordered (code);
25396       else
25397 	code = reverse_condition (code);
25398     }
25399 
25400   if ((!TARGET_FPRS && TARGET_HARD_FLOAT) && mode == CCFPmode)
25401     {
25402       /* The efscmp/tst* instructions twiddle bit 2, which maps nicely
25403 	 to the GT bit.  */
25404       switch (code)
25405 	{
25406 	case EQ:
25407 	  /* Opposite of GT.  */
25408 	  code = GT;
25409 	  break;
25410 
25411 	case NE:
25412 	  code = UNLE;
25413 	  break;
25414 
25415 	default:
25416 	  gcc_unreachable ();
25417 	}
25418     }
25419 
25420   switch (code)
25421     {
25422       /* Not all of these are actually distinct opcodes, but
25423 	 we distinguish them for clarity of the resulting assembler.  */
25424     case NE: case LTGT:
25425       ccode = "ne"; break;
25426     case EQ: case UNEQ:
25427       ccode = "eq"; break;
25428     case GE: case GEU:
25429       ccode = "ge"; break;
25430     case GT: case GTU: case UNGT:
25431       ccode = "gt"; break;
25432     case LE: case LEU:
25433       ccode = "le"; break;
25434     case LT: case LTU: case UNLT:
25435       ccode = "lt"; break;
25436     case UNORDERED: ccode = "un"; break;
25437     case ORDERED: ccode = "nu"; break;
25438     case UNGE: ccode = "nl"; break;
25439     case UNLE: ccode = "ng"; break;
25440     default:
25441       gcc_unreachable ();
25442     }
25443 
25444   /* Maybe we have a guess as to how likely the branch is.  */
25445   pred = "";
25446   note = find_reg_note (insn, REG_BR_PROB, NULL_RTX);
25447   if (note != NULL_RTX)
25448     {
25449       /* PROB is the difference from 50%.  */
25450       int prob = profile_probability::from_reg_br_prob_note (XINT (note, 0))
25451 		   .to_reg_br_prob_base () - REG_BR_PROB_BASE / 2;
25452 
25453       /* Only hint for highly probable/improbable branches on newer cpus when
25454 	 we have real profile data, as static prediction overrides processor
25455 	 dynamic prediction.  For older cpus we may as well always hint, but
25456 	 assume not taken for branches that are very close to 50% as a
25457 	 mispredicted taken branch is more expensive than a
25458 	 mispredicted not-taken branch.  */
25459       if (rs6000_always_hint
25460 	  || (abs (prob) > REG_BR_PROB_BASE / 100 * 48
25461 	      && (profile_status_for_fn (cfun) != PROFILE_GUESSED)
25462 	      && br_prob_note_reliable_p (note)))
25463 	{
25464 	  if (abs (prob) > REG_BR_PROB_BASE / 20
25465 	      && ((prob > 0) ^ need_longbranch))
25466 	    pred = "+";
25467 	  else
25468 	    pred = "-";
25469 	}
25470     }
25471 
25472   if (label == NULL)
25473     s += sprintf (s, "b%slr%s ", ccode, pred);
25474   else
25475     s += sprintf (s, "b%s%s ", ccode, pred);
25476 
25477   /* We need to escape any '%' characters in the reg_names string.
25478      Assume they'd only be the first character....  */
25479   if (reg_names[cc_regno + CR0_REGNO][0] == '%')
25480     *s++ = '%';
25481   s += sprintf (s, "%s", reg_names[cc_regno + CR0_REGNO]);
25482 
25483   if (label != NULL)
25484     {
25485       /* If the branch distance was too far, we may have to use an
25486 	 unconditional branch to go the distance.  */
25487       if (need_longbranch)
25488 	s += sprintf (s, ",$+8\n\tb %s", label);
25489       else
25490 	s += sprintf (s, ",%s", label);
25491     }
25492 
25493   return string;
25494 }
25495 
25496 /* Return the string to flip the GT bit on a CR.  */
25497 char *
output_e500_flip_gt_bit(rtx dst,rtx src)25498 output_e500_flip_gt_bit (rtx dst, rtx src)
25499 {
25500   static char string[64];
25501   int a, b;
25502 
25503   gcc_assert (GET_CODE (dst) == REG && CR_REGNO_P (REGNO (dst))
25504 	      && GET_CODE (src) == REG && CR_REGNO_P (REGNO (src)));
25505 
25506   /* GT bit.  */
25507   a = 4 * (REGNO (dst) - CR0_REGNO) + 1;
25508   b = 4 * (REGNO (src) - CR0_REGNO) + 1;
25509 
25510   sprintf (string, "crnot %d,%d", a, b);
25511   return string;
25512 }
25513 
25514 /* Return insn for VSX or Altivec comparisons.  */
25515 
25516 static rtx
rs6000_emit_vector_compare_inner(enum rtx_code code,rtx op0,rtx op1)25517 rs6000_emit_vector_compare_inner (enum rtx_code code, rtx op0, rtx op1)
25518 {
25519   rtx mask;
25520   machine_mode mode = GET_MODE (op0);
25521 
25522   switch (code)
25523     {
25524     default:
25525       break;
25526 
25527     case GE:
25528       if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
25529 	return NULL_RTX;
25530       /* FALLTHRU */
25531 
25532     case EQ:
25533     case GT:
25534     case GTU:
25535     case ORDERED:
25536     case UNORDERED:
25537     case UNEQ:
25538     case LTGT:
25539       mask = gen_reg_rtx (mode);
25540       emit_insn (gen_rtx_SET (mask, gen_rtx_fmt_ee (code, mode, op0, op1)));
25541       return mask;
25542     }
25543 
25544   return NULL_RTX;
25545 }
25546 
25547 /* Emit vector compare for operands OP0 and OP1 using code RCODE.
25548    DMODE is expected destination mode. This is a recursive function.  */
25549 
25550 static rtx
rs6000_emit_vector_compare(enum rtx_code rcode,rtx op0,rtx op1,machine_mode dmode)25551 rs6000_emit_vector_compare (enum rtx_code rcode,
25552 			    rtx op0, rtx op1,
25553 			    machine_mode dmode)
25554 {
25555   rtx mask;
25556   bool swap_operands = false;
25557   bool try_again = false;
25558 
25559   gcc_assert (VECTOR_UNIT_ALTIVEC_OR_VSX_P (dmode));
25560   gcc_assert (GET_MODE (op0) == GET_MODE (op1));
25561 
25562   /* See if the comparison works as is.  */
25563   mask = rs6000_emit_vector_compare_inner (rcode, op0, op1);
25564   if (mask)
25565     return mask;
25566 
25567   switch (rcode)
25568     {
25569     case LT:
25570       rcode = GT;
25571       swap_operands = true;
25572       try_again = true;
25573       break;
25574     case LTU:
25575       rcode = GTU;
25576       swap_operands = true;
25577       try_again = true;
25578       break;
25579     case NE:
25580     case UNLE:
25581     case UNLT:
25582     case UNGE:
25583     case UNGT:
25584       /* Invert condition and try again.
25585 	 e.g., A != B becomes ~(A==B).  */
25586       {
25587 	enum rtx_code rev_code;
25588 	enum insn_code nor_code;
25589 	rtx mask2;
25590 
25591 	rev_code = reverse_condition_maybe_unordered (rcode);
25592 	if (rev_code == UNKNOWN)
25593 	  return NULL_RTX;
25594 
25595 	nor_code = optab_handler (one_cmpl_optab, dmode);
25596 	if (nor_code == CODE_FOR_nothing)
25597 	  return NULL_RTX;
25598 
25599 	mask2 = rs6000_emit_vector_compare (rev_code, op0, op1, dmode);
25600 	if (!mask2)
25601 	  return NULL_RTX;
25602 
25603 	mask = gen_reg_rtx (dmode);
25604 	emit_insn (GEN_FCN (nor_code) (mask, mask2));
25605 	return mask;
25606       }
25607       break;
25608     case GE:
25609     case GEU:
25610     case LE:
25611     case LEU:
25612       /* Try GT/GTU/LT/LTU OR EQ */
25613       {
25614 	rtx c_rtx, eq_rtx;
25615 	enum insn_code ior_code;
25616 	enum rtx_code new_code;
25617 
25618 	switch (rcode)
25619 	  {
25620 	  case  GE:
25621 	    new_code = GT;
25622 	    break;
25623 
25624 	  case GEU:
25625 	    new_code = GTU;
25626 	    break;
25627 
25628 	  case LE:
25629 	    new_code = LT;
25630 	    break;
25631 
25632 	  case LEU:
25633 	    new_code = LTU;
25634 	    break;
25635 
25636 	  default:
25637 	    gcc_unreachable ();
25638 	  }
25639 
25640 	ior_code = optab_handler (ior_optab, dmode);
25641 	if (ior_code == CODE_FOR_nothing)
25642 	  return NULL_RTX;
25643 
25644 	c_rtx = rs6000_emit_vector_compare (new_code, op0, op1, dmode);
25645 	if (!c_rtx)
25646 	  return NULL_RTX;
25647 
25648 	eq_rtx = rs6000_emit_vector_compare (EQ, op0, op1, dmode);
25649 	if (!eq_rtx)
25650 	  return NULL_RTX;
25651 
25652 	mask = gen_reg_rtx (dmode);
25653 	emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
25654 	return mask;
25655       }
25656       break;
25657     default:
25658       return NULL_RTX;
25659     }
25660 
25661   if (try_again)
25662     {
25663       if (swap_operands)
25664 	std::swap (op0, op1);
25665 
25666       mask = rs6000_emit_vector_compare_inner (rcode, op0, op1);
25667       if (mask)
25668 	return mask;
25669     }
25670 
25671   /* You only get two chances.  */
25672   return NULL_RTX;
25673 }
25674 
25675 /* Emit vector conditional expression.  DEST is destination. OP_TRUE and
25676    OP_FALSE are two VEC_COND_EXPR operands.  CC_OP0 and CC_OP1 are the two
25677    operands for the relation operation COND.  */
25678 
25679 int
rs6000_emit_vector_cond_expr(rtx dest,rtx op_true,rtx op_false,rtx cond,rtx cc_op0,rtx cc_op1)25680 rs6000_emit_vector_cond_expr (rtx dest, rtx op_true, rtx op_false,
25681 			      rtx cond, rtx cc_op0, rtx cc_op1)
25682 {
25683   machine_mode dest_mode = GET_MODE (dest);
25684   machine_mode mask_mode = GET_MODE (cc_op0);
25685   enum rtx_code rcode = GET_CODE (cond);
25686   machine_mode cc_mode = CCmode;
25687   rtx mask;
25688   rtx cond2;
25689   bool invert_move = false;
25690 
25691   if (VECTOR_UNIT_NONE_P (dest_mode))
25692     return 0;
25693 
25694   gcc_assert (GET_MODE_SIZE (dest_mode) == GET_MODE_SIZE (mask_mode)
25695 	      && GET_MODE_NUNITS (dest_mode) == GET_MODE_NUNITS (mask_mode));
25696 
25697   switch (rcode)
25698     {
25699       /* Swap operands if we can, and fall back to doing the operation as
25700 	 specified, and doing a NOR to invert the test.  */
25701     case NE:
25702     case UNLE:
25703     case UNLT:
25704     case UNGE:
25705     case UNGT:
25706       /* Invert condition and try again.
25707 	 e.g., A  = (B != C) ? D : E becomes A = (B == C) ? E : D.  */
25708       invert_move = true;
25709       rcode = reverse_condition_maybe_unordered (rcode);
25710       if (rcode == UNKNOWN)
25711 	return 0;
25712       break;
25713 
25714     case GE:
25715     case LE:
25716       if (GET_MODE_CLASS (mask_mode) == MODE_VECTOR_INT)
25717 	{
25718 	  /* Invert condition to avoid compound test.  */
25719 	  invert_move = true;
25720 	  rcode = reverse_condition (rcode);
25721 	}
25722       break;
25723 
25724     case GTU:
25725     case GEU:
25726     case LTU:
25727     case LEU:
25728       /* Mark unsigned tests with CCUNSmode.  */
25729       cc_mode = CCUNSmode;
25730 
25731       /* Invert condition to avoid compound test if necessary.  */
25732       if (rcode == GEU || rcode == LEU)
25733 	{
25734 	  invert_move = true;
25735 	  rcode = reverse_condition (rcode);
25736 	}
25737       break;
25738 
25739     default:
25740       break;
25741     }
25742 
25743   /* Get the vector mask for the given relational operations.  */
25744   mask = rs6000_emit_vector_compare (rcode, cc_op0, cc_op1, mask_mode);
25745 
25746   if (!mask)
25747     return 0;
25748 
25749   if (invert_move)
25750     std::swap (op_true, op_false);
25751 
25752   /* Optimize vec1 == vec2, to know the mask generates -1/0.  */
25753   if (GET_MODE_CLASS (dest_mode) == MODE_VECTOR_INT
25754       && (GET_CODE (op_true) == CONST_VECTOR
25755 	  || GET_CODE (op_false) == CONST_VECTOR))
25756     {
25757       rtx constant_0 = CONST0_RTX (dest_mode);
25758       rtx constant_m1 = CONSTM1_RTX (dest_mode);
25759 
25760       if (op_true == constant_m1 && op_false == constant_0)
25761 	{
25762 	  emit_move_insn (dest, mask);
25763 	  return 1;
25764 	}
25765 
25766       else if (op_true == constant_0 && op_false == constant_m1)
25767 	{
25768 	  emit_insn (gen_rtx_SET (dest, gen_rtx_NOT (dest_mode, mask)));
25769 	  return 1;
25770 	}
25771 
25772       /* If we can't use the vector comparison directly, perhaps we can use
25773 	 the mask for the true or false fields, instead of loading up a
25774 	 constant.  */
25775       if (op_true == constant_m1)
25776 	op_true = mask;
25777 
25778       if (op_false == constant_0)
25779 	op_false = mask;
25780     }
25781 
25782   if (!REG_P (op_true) && !SUBREG_P (op_true))
25783     op_true = force_reg (dest_mode, op_true);
25784 
25785   if (!REG_P (op_false) && !SUBREG_P (op_false))
25786     op_false = force_reg (dest_mode, op_false);
25787 
25788   cond2 = gen_rtx_fmt_ee (NE, cc_mode, gen_lowpart (dest_mode, mask),
25789 			  CONST0_RTX (dest_mode));
25790   emit_insn (gen_rtx_SET (dest,
25791 			  gen_rtx_IF_THEN_ELSE (dest_mode,
25792 						cond2,
25793 						op_true,
25794 						op_false)));
25795   return 1;
25796 }
25797 
25798 /* ISA 3.0 (power9) minmax subcase to emit a XSMAXCDP or XSMINCDP instruction
25799    for SF/DF scalars.  Move TRUE_COND to DEST if OP of the operands of the last
25800    comparison is nonzero/true, FALSE_COND if it is zero/false.  Return 0 if the
25801    hardware has no such operation.  */
25802 
25803 static int
rs6000_emit_p9_fp_minmax(rtx dest,rtx op,rtx true_cond,rtx false_cond)25804 rs6000_emit_p9_fp_minmax (rtx dest, rtx op, rtx true_cond, rtx false_cond)
25805 {
25806   enum rtx_code code = GET_CODE (op);
25807   rtx op0 = XEXP (op, 0);
25808   rtx op1 = XEXP (op, 1);
25809   machine_mode compare_mode = GET_MODE (op0);
25810   machine_mode result_mode = GET_MODE (dest);
25811   bool max_p = false;
25812 
25813   if (result_mode != compare_mode)
25814     return 0;
25815 
25816   if (code == GE || code == GT)
25817     max_p = true;
25818   else if (code == LE || code == LT)
25819     max_p = false;
25820   else
25821     return 0;
25822 
25823   if (rtx_equal_p (op0, true_cond) && rtx_equal_p (op1, false_cond))
25824     ;
25825 
25826   else if (rtx_equal_p (op1, true_cond) && rtx_equal_p (op0, false_cond))
25827     max_p = !max_p;
25828 
25829   else
25830     return 0;
25831 
25832   rs6000_emit_minmax (dest, max_p ? SMAX : SMIN, op0, op1);
25833   return 1;
25834 }
25835 
25836 /* ISA 3.0 (power9) conditional move subcase to emit XSCMP{EQ,GE,GT,NE}DP and
25837    XXSEL instructions for SF/DF scalars.  Move TRUE_COND to DEST if OP of the
25838    operands of the last comparison is nonzero/true, FALSE_COND if it is
25839    zero/false.  Return 0 if the hardware has no such operation.  */
25840 
25841 static int
rs6000_emit_p9_fp_cmove(rtx dest,rtx op,rtx true_cond,rtx false_cond)25842 rs6000_emit_p9_fp_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
25843 {
25844   enum rtx_code code = GET_CODE (op);
25845   rtx op0 = XEXP (op, 0);
25846   rtx op1 = XEXP (op, 1);
25847   machine_mode result_mode = GET_MODE (dest);
25848   rtx compare_rtx;
25849   rtx cmove_rtx;
25850   rtx clobber_rtx;
25851 
25852   if (!can_create_pseudo_p ())
25853     return 0;
25854 
25855   switch (code)
25856     {
25857     case EQ:
25858     case GE:
25859     case GT:
25860       break;
25861 
25862     case NE:
25863     case LT:
25864     case LE:
25865       code = swap_condition (code);
25866       std::swap (op0, op1);
25867       break;
25868 
25869     default:
25870       return 0;
25871     }
25872 
25873   /* Generate:	[(parallel [(set (dest)
25874 				 (if_then_else (op (cmp1) (cmp2))
25875 					       (true)
25876 					       (false)))
25877 			    (clobber (scratch))])].  */
25878 
25879   compare_rtx = gen_rtx_fmt_ee (code, CCFPmode, op0, op1);
25880   cmove_rtx = gen_rtx_SET (dest,
25881 			   gen_rtx_IF_THEN_ELSE (result_mode,
25882 						 compare_rtx,
25883 						 true_cond,
25884 						 false_cond));
25885 
25886   clobber_rtx = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (V2DImode));
25887   emit_insn (gen_rtx_PARALLEL (VOIDmode,
25888 			       gen_rtvec (2, cmove_rtx, clobber_rtx)));
25889 
25890   return 1;
25891 }
25892 
25893 /* Emit a conditional move: move TRUE_COND to DEST if OP of the
25894    operands of the last comparison is nonzero/true, FALSE_COND if it
25895    is zero/false.  Return 0 if the hardware has no such operation.  */
25896 
25897 int
rs6000_emit_cmove(rtx dest,rtx op,rtx true_cond,rtx false_cond)25898 rs6000_emit_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
25899 {
25900   enum rtx_code code = GET_CODE (op);
25901   rtx op0 = XEXP (op, 0);
25902   rtx op1 = XEXP (op, 1);
25903   machine_mode compare_mode = GET_MODE (op0);
25904   machine_mode result_mode = GET_MODE (dest);
25905   rtx temp;
25906   bool is_against_zero;
25907 
25908   /* These modes should always match.  */
25909   if (GET_MODE (op1) != compare_mode
25910       /* In the isel case however, we can use a compare immediate, so
25911 	 op1 may be a small constant.  */
25912       && (!TARGET_ISEL || !short_cint_operand (op1, VOIDmode)))
25913     return 0;
25914   if (GET_MODE (true_cond) != result_mode)
25915     return 0;
25916   if (GET_MODE (false_cond) != result_mode)
25917     return 0;
25918 
25919   /* See if we can use the ISA 3.0 (power9) min/max/compare functions.  */
25920   if (TARGET_P9_MINMAX
25921       && (compare_mode == SFmode || compare_mode == DFmode)
25922       && (result_mode == SFmode || result_mode == DFmode))
25923     {
25924       if (rs6000_emit_p9_fp_minmax (dest, op, true_cond, false_cond))
25925 	return 1;
25926 
25927       if (rs6000_emit_p9_fp_cmove (dest, op, true_cond, false_cond))
25928 	return 1;
25929     }
25930 
25931   /* Don't allow using floating point comparisons for integer results for
25932      now.  */
25933   if (FLOAT_MODE_P (compare_mode) && !FLOAT_MODE_P (result_mode))
25934     return 0;
25935 
25936   /* First, work out if the hardware can do this at all, or
25937      if it's too slow....  */
25938   if (!FLOAT_MODE_P (compare_mode))
25939     {
25940       if (TARGET_ISEL)
25941 	return rs6000_emit_int_cmove (dest, op, true_cond, false_cond);
25942       return 0;
25943     }
25944   else if (TARGET_HARD_FLOAT && !TARGET_FPRS
25945 	   && SCALAR_FLOAT_MODE_P (compare_mode))
25946     return 0;
25947 
25948   is_against_zero = op1 == CONST0_RTX (compare_mode);
25949 
25950   /* A floating-point subtract might overflow, underflow, or produce
25951      an inexact result, thus changing the floating-point flags, so it
25952      can't be generated if we care about that.  It's safe if one side
25953      of the construct is zero, since then no subtract will be
25954      generated.  */
25955   if (SCALAR_FLOAT_MODE_P (compare_mode)
25956       && flag_trapping_math && ! is_against_zero)
25957     return 0;
25958 
25959   /* Eliminate half of the comparisons by switching operands, this
25960      makes the remaining code simpler.  */
25961   if (code == UNLT || code == UNGT || code == UNORDERED || code == NE
25962       || code == LTGT || code == LT || code == UNLE)
25963     {
25964       code = reverse_condition_maybe_unordered (code);
25965       temp = true_cond;
25966       true_cond = false_cond;
25967       false_cond = temp;
25968     }
25969 
25970   /* UNEQ and LTGT take four instructions for a comparison with zero,
25971      it'll probably be faster to use a branch here too.  */
25972   if (code == UNEQ && HONOR_NANS (compare_mode))
25973     return 0;
25974 
25975   /* We're going to try to implement comparisons by performing
25976      a subtract, then comparing against zero.  Unfortunately,
25977      Inf - Inf is NaN which is not zero, and so if we don't
25978      know that the operand is finite and the comparison
25979      would treat EQ different to UNORDERED, we can't do it.  */
25980   if (HONOR_INFINITIES (compare_mode)
25981       && code != GT && code != UNGE
25982       && (GET_CODE (op1) != CONST_DOUBLE
25983 	  || real_isinf (CONST_DOUBLE_REAL_VALUE (op1)))
25984       /* Constructs of the form (a OP b ? a : b) are safe.  */
25985       && ((! rtx_equal_p (op0, false_cond) && ! rtx_equal_p (op1, false_cond))
25986 	  || (! rtx_equal_p (op0, true_cond)
25987 	      && ! rtx_equal_p (op1, true_cond))))
25988     return 0;
25989 
25990   /* At this point we know we can use fsel.  */
25991 
25992   /* Reduce the comparison to a comparison against zero.  */
25993   if (! is_against_zero)
25994     {
25995       temp = gen_reg_rtx (compare_mode);
25996       emit_insn (gen_rtx_SET (temp, gen_rtx_MINUS (compare_mode, op0, op1)));
25997       op0 = temp;
25998       op1 = CONST0_RTX (compare_mode);
25999     }
26000 
26001   /* If we don't care about NaNs we can reduce some of the comparisons
26002      down to faster ones.  */
26003   if (! HONOR_NANS (compare_mode))
26004     switch (code)
26005       {
26006       case GT:
26007 	code = LE;
26008 	temp = true_cond;
26009 	true_cond = false_cond;
26010 	false_cond = temp;
26011 	break;
26012       case UNGE:
26013 	code = GE;
26014 	break;
26015       case UNEQ:
26016 	code = EQ;
26017 	break;
26018       default:
26019 	break;
26020       }
26021 
26022   /* Now, reduce everything down to a GE.  */
26023   switch (code)
26024     {
26025     case GE:
26026       break;
26027 
26028     case LE:
26029       temp = gen_reg_rtx (compare_mode);
26030       emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
26031       op0 = temp;
26032       break;
26033 
26034     case ORDERED:
26035       temp = gen_reg_rtx (compare_mode);
26036       emit_insn (gen_rtx_SET (temp, gen_rtx_ABS (compare_mode, op0)));
26037       op0 = temp;
26038       break;
26039 
26040     case EQ:
26041       temp = gen_reg_rtx (compare_mode);
26042       emit_insn (gen_rtx_SET (temp,
26043 			      gen_rtx_NEG (compare_mode,
26044 					   gen_rtx_ABS (compare_mode, op0))));
26045       op0 = temp;
26046       break;
26047 
26048     case UNGE:
26049       /* a UNGE 0 <-> (a GE 0 || -a UNLT 0) */
26050       temp = gen_reg_rtx (result_mode);
26051       emit_insn (gen_rtx_SET (temp,
26052 			      gen_rtx_IF_THEN_ELSE (result_mode,
26053 						    gen_rtx_GE (VOIDmode,
26054 								op0, op1),
26055 						    true_cond, false_cond)));
26056       false_cond = true_cond;
26057       true_cond = temp;
26058 
26059       temp = gen_reg_rtx (compare_mode);
26060       emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
26061       op0 = temp;
26062       break;
26063 
26064     case GT:
26065       /* a GT 0 <-> (a GE 0 && -a UNLT 0) */
26066       temp = gen_reg_rtx (result_mode);
26067       emit_insn (gen_rtx_SET (temp,
26068 			      gen_rtx_IF_THEN_ELSE (result_mode,
26069 						    gen_rtx_GE (VOIDmode,
26070 								op0, op1),
26071 						    true_cond, false_cond)));
26072       true_cond = false_cond;
26073       false_cond = temp;
26074 
26075       temp = gen_reg_rtx (compare_mode);
26076       emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
26077       op0 = temp;
26078       break;
26079 
26080     default:
26081       gcc_unreachable ();
26082     }
26083 
26084   emit_insn (gen_rtx_SET (dest,
26085 			  gen_rtx_IF_THEN_ELSE (result_mode,
26086 						gen_rtx_GE (VOIDmode,
26087 							    op0, op1),
26088 						true_cond, false_cond)));
26089   return 1;
26090 }
26091 
26092 /* Same as above, but for ints (isel).  */
26093 
26094 static int
rs6000_emit_int_cmove(rtx dest,rtx op,rtx true_cond,rtx false_cond)26095 rs6000_emit_int_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
26096 {
26097   rtx condition_rtx, cr;
26098   machine_mode mode = GET_MODE (dest);
26099   enum rtx_code cond_code;
26100   rtx (*isel_func) (rtx, rtx, rtx, rtx, rtx);
26101   bool signedp;
26102 
26103   if (mode != SImode && (!TARGET_POWERPC64 || mode != DImode))
26104     return 0;
26105 
26106   /* We still have to do the compare, because isel doesn't do a
26107      compare, it just looks at the CRx bits set by a previous compare
26108      instruction.  */
26109   condition_rtx = rs6000_generate_compare (op, mode);
26110   cond_code = GET_CODE (condition_rtx);
26111   cr = XEXP (condition_rtx, 0);
26112   signedp = GET_MODE (cr) == CCmode;
26113 
26114   isel_func = (mode == SImode
26115 	       ? (signedp ? gen_isel_signed_si : gen_isel_unsigned_si)
26116 	       : (signedp ? gen_isel_signed_di : gen_isel_unsigned_di));
26117 
26118   switch (cond_code)
26119     {
26120     case LT: case GT: case LTU: case GTU: case EQ:
26121       /* isel handles these directly.  */
26122       break;
26123 
26124     default:
26125       /* We need to swap the sense of the comparison.  */
26126       {
26127 	std::swap (false_cond, true_cond);
26128 	PUT_CODE (condition_rtx, reverse_condition (cond_code));
26129       }
26130       break;
26131     }
26132 
26133   false_cond = force_reg (mode, false_cond);
26134   if (true_cond != const0_rtx)
26135     true_cond = force_reg (mode, true_cond);
26136 
26137   emit_insn (isel_func (dest, condition_rtx, true_cond, false_cond, cr));
26138 
26139   return 1;
26140 }
26141 
26142 const char *
output_isel(rtx * operands)26143 output_isel (rtx *operands)
26144 {
26145   enum rtx_code code;
26146 
26147   code = GET_CODE (operands[1]);
26148 
26149   if (code == GE || code == GEU || code == LE || code == LEU || code == NE)
26150     {
26151       gcc_assert (GET_CODE (operands[2]) == REG
26152 		  && GET_CODE (operands[3]) == REG);
26153       PUT_CODE (operands[1], reverse_condition (code));
26154       return "isel %0,%3,%2,%j1";
26155     }
26156 
26157   return "isel %0,%2,%3,%j1";
26158 }
26159 
26160 void
rs6000_emit_minmax(rtx dest,enum rtx_code code,rtx op0,rtx op1)26161 rs6000_emit_minmax (rtx dest, enum rtx_code code, rtx op0, rtx op1)
26162 {
26163   machine_mode mode = GET_MODE (op0);
26164   enum rtx_code c;
26165   rtx target;
26166 
26167   /* VSX/altivec have direct min/max insns.  */
26168   if ((code == SMAX || code == SMIN)
26169       && (VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode)
26170 	  || (mode == SFmode && VECTOR_UNIT_VSX_P (DFmode))))
26171     {
26172       emit_insn (gen_rtx_SET (dest, gen_rtx_fmt_ee (code, mode, op0, op1)));
26173       return;
26174     }
26175 
26176   if (code == SMAX || code == SMIN)
26177     c = GE;
26178   else
26179     c = GEU;
26180 
26181   if (code == SMAX || code == UMAX)
26182     target = emit_conditional_move (dest, c, op0, op1, mode,
26183 				    op0, op1, mode, 0);
26184   else
26185     target = emit_conditional_move (dest, c, op0, op1, mode,
26186 				    op1, op0, mode, 0);
26187   gcc_assert (target);
26188   if (target != dest)
26189     emit_move_insn (dest, target);
26190 }
26191 
26192 /* Split a signbit operation on 64-bit machines with direct move.  Also allow
26193    for the value to come from memory or if it is already loaded into a GPR.  */
26194 
26195 void
rs6000_split_signbit(rtx dest,rtx src)26196 rs6000_split_signbit (rtx dest, rtx src)
26197 {
26198   machine_mode d_mode = GET_MODE (dest);
26199   machine_mode s_mode = GET_MODE (src);
26200   rtx dest_di = (d_mode == DImode) ? dest : gen_lowpart (DImode, dest);
26201   rtx shift_reg = dest_di;
26202 
26203   gcc_assert (FLOAT128_IEEE_P (s_mode) && TARGET_POWERPC64);
26204 
26205   if (MEM_P (src))
26206     {
26207       rtx mem = (WORDS_BIG_ENDIAN
26208 		 ? adjust_address (src, DImode, 0)
26209 		 : adjust_address (src, DImode, 8));
26210       emit_insn (gen_rtx_SET (dest_di, mem));
26211     }
26212 
26213   else
26214     {
26215       unsigned int r = reg_or_subregno (src);
26216 
26217       if (INT_REGNO_P (r))
26218 	shift_reg = gen_rtx_REG (DImode, r + (BYTES_BIG_ENDIAN == 0));
26219 
26220       else
26221 	{
26222 	  /* Generate the special mfvsrd instruction to get it in a GPR.  */
26223 	  gcc_assert (VSX_REGNO_P (r));
26224 	  if (s_mode == KFmode)
26225 	    emit_insn (gen_signbitkf2_dm2 (dest_di, src));
26226 	  else
26227 	    emit_insn (gen_signbittf2_dm2 (dest_di, src));
26228 	}
26229     }
26230 
26231   emit_insn (gen_lshrdi3 (dest_di, shift_reg, GEN_INT (63)));
26232   return;
26233 }
26234 
26235 /* A subroutine of the atomic operation splitters.  Jump to LABEL if
26236    COND is true.  Mark the jump as unlikely to be taken.  */
26237 
26238 static void
emit_unlikely_jump(rtx cond,rtx label)26239 emit_unlikely_jump (rtx cond, rtx label)
26240 {
26241   rtx x = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, label, pc_rtx);
26242   rtx_insn *insn = emit_jump_insn (gen_rtx_SET (pc_rtx, x));
26243   add_reg_br_prob_note (insn, profile_probability::very_unlikely ());
26244 }
26245 
26246 /* A subroutine of the atomic operation splitters.  Emit a load-locked
26247    instruction in MODE.  For QI/HImode, possibly use a pattern than includes
26248    the zero_extend operation.  */
26249 
26250 static void
emit_load_locked(machine_mode mode,rtx reg,rtx mem)26251 emit_load_locked (machine_mode mode, rtx reg, rtx mem)
26252 {
26253   rtx (*fn) (rtx, rtx) = NULL;
26254 
26255   switch (mode)
26256     {
26257     case E_QImode:
26258       fn = gen_load_lockedqi;
26259       break;
26260     case E_HImode:
26261       fn = gen_load_lockedhi;
26262       break;
26263     case E_SImode:
26264       if (GET_MODE (mem) == QImode)
26265 	fn = gen_load_lockedqi_si;
26266       else if (GET_MODE (mem) == HImode)
26267 	fn = gen_load_lockedhi_si;
26268       else
26269 	fn = gen_load_lockedsi;
26270       break;
26271     case E_DImode:
26272       fn = gen_load_lockeddi;
26273       break;
26274     case E_TImode:
26275       fn = gen_load_lockedti;
26276       break;
26277     default:
26278       gcc_unreachable ();
26279     }
26280   emit_insn (fn (reg, mem));
26281 }
26282 
26283 /* A subroutine of the atomic operation splitters.  Emit a store-conditional
26284    instruction in MODE.  */
26285 
26286 static void
emit_store_conditional(machine_mode mode,rtx res,rtx mem,rtx val)26287 emit_store_conditional (machine_mode mode, rtx res, rtx mem, rtx val)
26288 {
26289   rtx (*fn) (rtx, rtx, rtx) = NULL;
26290 
26291   switch (mode)
26292     {
26293     case E_QImode:
26294       fn = gen_store_conditionalqi;
26295       break;
26296     case E_HImode:
26297       fn = gen_store_conditionalhi;
26298       break;
26299     case E_SImode:
26300       fn = gen_store_conditionalsi;
26301       break;
26302     case E_DImode:
26303       fn = gen_store_conditionaldi;
26304       break;
26305     case E_TImode:
26306       fn = gen_store_conditionalti;
26307       break;
26308     default:
26309       gcc_unreachable ();
26310     }
26311 
26312   /* Emit sync before stwcx. to address PPC405 Erratum.  */
26313   if (PPC405_ERRATUM77)
26314     emit_insn (gen_hwsync ());
26315 
26316   emit_insn (fn (res, mem, val));
26317 }
26318 
26319 /* Expand barriers before and after a load_locked/store_cond sequence.  */
26320 
26321 static rtx
rs6000_pre_atomic_barrier(rtx mem,enum memmodel model)26322 rs6000_pre_atomic_barrier (rtx mem, enum memmodel model)
26323 {
26324   rtx addr = XEXP (mem, 0);
26325   int strict_p = (reload_in_progress || reload_completed);
26326 
26327   if (!legitimate_indirect_address_p (addr, strict_p)
26328       && !legitimate_indexed_address_p (addr, strict_p))
26329     {
26330       addr = force_reg (Pmode, addr);
26331       mem = replace_equiv_address_nv (mem, addr);
26332     }
26333 
26334   switch (model)
26335     {
26336     case MEMMODEL_RELAXED:
26337     case MEMMODEL_CONSUME:
26338     case MEMMODEL_ACQUIRE:
26339       break;
26340     case MEMMODEL_RELEASE:
26341     case MEMMODEL_ACQ_REL:
26342       emit_insn (gen_lwsync ());
26343       break;
26344     case MEMMODEL_SEQ_CST:
26345       emit_insn (gen_hwsync ());
26346       break;
26347     default:
26348       gcc_unreachable ();
26349     }
26350   return mem;
26351 }
26352 
26353 static void
rs6000_post_atomic_barrier(enum memmodel model)26354 rs6000_post_atomic_barrier (enum memmodel model)
26355 {
26356   switch (model)
26357     {
26358     case MEMMODEL_RELAXED:
26359     case MEMMODEL_CONSUME:
26360     case MEMMODEL_RELEASE:
26361       break;
26362     case MEMMODEL_ACQUIRE:
26363     case MEMMODEL_ACQ_REL:
26364     case MEMMODEL_SEQ_CST:
26365       emit_insn (gen_isync ());
26366       break;
26367     default:
26368       gcc_unreachable ();
26369     }
26370 }
26371 
26372 /* A subroutine of the various atomic expanders.  For sub-word operations,
26373    we must adjust things to operate on SImode.  Given the original MEM,
26374    return a new aligned memory.  Also build and return the quantities by
26375    which to shift and mask.  */
26376 
26377 static rtx
rs6000_adjust_atomic_subword(rtx orig_mem,rtx * pshift,rtx * pmask)26378 rs6000_adjust_atomic_subword (rtx orig_mem, rtx *pshift, rtx *pmask)
26379 {
26380   rtx addr, align, shift, mask, mem;
26381   HOST_WIDE_INT shift_mask;
26382   machine_mode mode = GET_MODE (orig_mem);
26383 
26384   /* For smaller modes, we have to implement this via SImode.  */
26385   shift_mask = (mode == QImode ? 0x18 : 0x10);
26386 
26387   addr = XEXP (orig_mem, 0);
26388   addr = force_reg (GET_MODE (addr), addr);
26389 
26390   /* Aligned memory containing subword.  Generate a new memory.  We
26391      do not want any of the existing MEM_ATTR data, as we're now
26392      accessing memory outside the original object.  */
26393   align = expand_simple_binop (Pmode, AND, addr, GEN_INT (-4),
26394 			       NULL_RTX, 1, OPTAB_LIB_WIDEN);
26395   mem = gen_rtx_MEM (SImode, align);
26396   MEM_VOLATILE_P (mem) = MEM_VOLATILE_P (orig_mem);
26397   if (MEM_ALIAS_SET (orig_mem) == ALIAS_SET_MEMORY_BARRIER)
26398     set_mem_alias_set (mem, ALIAS_SET_MEMORY_BARRIER);
26399 
26400   /* Shift amount for subword relative to aligned word.  */
26401   shift = gen_reg_rtx (SImode);
26402   addr = gen_lowpart (SImode, addr);
26403   rtx tmp = gen_reg_rtx (SImode);
26404   emit_insn (gen_ashlsi3 (tmp, addr, GEN_INT (3)));
26405   emit_insn (gen_andsi3 (shift, tmp, GEN_INT (shift_mask)));
26406   if (BYTES_BIG_ENDIAN)
26407     shift = expand_simple_binop (SImode, XOR, shift, GEN_INT (shift_mask),
26408 			         shift, 1, OPTAB_LIB_WIDEN);
26409   *pshift = shift;
26410 
26411   /* Mask for insertion.  */
26412   mask = expand_simple_binop (SImode, ASHIFT, GEN_INT (GET_MODE_MASK (mode)),
26413 			      shift, NULL_RTX, 1, OPTAB_LIB_WIDEN);
26414   *pmask = mask;
26415 
26416   return mem;
26417 }
26418 
26419 /* A subroutine of the various atomic expanders.  For sub-word operands,
26420    combine OLDVAL and NEWVAL via MASK.  Returns a new pseduo.  */
26421 
26422 static rtx
rs6000_mask_atomic_subword(rtx oldval,rtx newval,rtx mask)26423 rs6000_mask_atomic_subword (rtx oldval, rtx newval, rtx mask)
26424 {
26425   rtx x;
26426 
26427   x = gen_reg_rtx (SImode);
26428   emit_insn (gen_rtx_SET (x, gen_rtx_AND (SImode,
26429 					  gen_rtx_NOT (SImode, mask),
26430 					  oldval)));
26431 
26432   x = expand_simple_binop (SImode, IOR, newval, x, x, 1, OPTAB_LIB_WIDEN);
26433 
26434   return x;
26435 }
26436 
26437 /* A subroutine of the various atomic expanders.  For sub-word operands,
26438    extract WIDE to NARROW via SHIFT.  */
26439 
26440 static void
rs6000_finish_atomic_subword(rtx narrow,rtx wide,rtx shift)26441 rs6000_finish_atomic_subword (rtx narrow, rtx wide, rtx shift)
26442 {
26443   wide = expand_simple_binop (SImode, LSHIFTRT, wide, shift,
26444 			      wide, 1, OPTAB_LIB_WIDEN);
26445   emit_move_insn (narrow, gen_lowpart (GET_MODE (narrow), wide));
26446 }
26447 
26448 /* Expand an atomic compare and swap operation.  */
26449 
26450 void
rs6000_expand_atomic_compare_and_swap(rtx operands[])26451 rs6000_expand_atomic_compare_and_swap (rtx operands[])
26452 {
26453   rtx boolval, retval, mem, oldval, newval, cond;
26454   rtx label1, label2, x, mask, shift;
26455   machine_mode mode, orig_mode;
26456   enum memmodel mod_s, mod_f;
26457   bool is_weak;
26458 
26459   boolval = operands[0];
26460   retval = operands[1];
26461   mem = operands[2];
26462   oldval = operands[3];
26463   newval = operands[4];
26464   is_weak = (INTVAL (operands[5]) != 0);
26465   mod_s = memmodel_base (INTVAL (operands[6]));
26466   mod_f = memmodel_base (INTVAL (operands[7]));
26467   orig_mode = mode = GET_MODE (mem);
26468 
26469   mask = shift = NULL_RTX;
26470   if (mode == QImode || mode == HImode)
26471     {
26472       /* Before power8, we didn't have access to lbarx/lharx, so generate a
26473 	 lwarx and shift/mask operations.  With power8, we need to do the
26474 	 comparison in SImode, but the store is still done in QI/HImode.  */
26475       oldval = convert_modes (SImode, mode, oldval, 1);
26476 
26477       if (!TARGET_SYNC_HI_QI)
26478 	{
26479 	  mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
26480 
26481 	  /* Shift and mask OLDVAL into position with the word.  */
26482 	  oldval = expand_simple_binop (SImode, ASHIFT, oldval, shift,
26483 					NULL_RTX, 1, OPTAB_LIB_WIDEN);
26484 
26485 	  /* Shift and mask NEWVAL into position within the word.  */
26486 	  newval = convert_modes (SImode, mode, newval, 1);
26487 	  newval = expand_simple_binop (SImode, ASHIFT, newval, shift,
26488 					NULL_RTX, 1, OPTAB_LIB_WIDEN);
26489 	}
26490 
26491       /* Prepare to adjust the return value.  */
26492       retval = gen_reg_rtx (SImode);
26493       mode = SImode;
26494     }
26495   else if (reg_overlap_mentioned_p (retval, oldval))
26496     oldval = copy_to_reg (oldval);
26497 
26498   if (mode != TImode && !reg_or_short_operand (oldval, mode))
26499     oldval = copy_to_mode_reg (mode, oldval);
26500 
26501   if (reg_overlap_mentioned_p (retval, newval))
26502     newval = copy_to_reg (newval);
26503 
26504   mem = rs6000_pre_atomic_barrier (mem, mod_s);
26505 
26506   label1 = NULL_RTX;
26507   if (!is_weak)
26508     {
26509       label1 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
26510       emit_label (XEXP (label1, 0));
26511     }
26512   label2 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
26513 
26514   emit_load_locked (mode, retval, mem);
26515 
26516   x = retval;
26517   if (mask)
26518     x = expand_simple_binop (SImode, AND, retval, mask,
26519 			     NULL_RTX, 1, OPTAB_LIB_WIDEN);
26520 
26521   cond = gen_reg_rtx (CCmode);
26522   /* If we have TImode, synthesize a comparison.  */
26523   if (mode != TImode)
26524     x = gen_rtx_COMPARE (CCmode, x, oldval);
26525   else
26526     {
26527       rtx xor1_result = gen_reg_rtx (DImode);
26528       rtx xor2_result = gen_reg_rtx (DImode);
26529       rtx or_result = gen_reg_rtx (DImode);
26530       rtx new_word0 = simplify_gen_subreg (DImode, x, TImode, 0);
26531       rtx new_word1 = simplify_gen_subreg (DImode, x, TImode, 8);
26532       rtx old_word0 = simplify_gen_subreg (DImode, oldval, TImode, 0);
26533       rtx old_word1 = simplify_gen_subreg (DImode, oldval, TImode, 8);
26534 
26535       emit_insn (gen_xordi3 (xor1_result, new_word0, old_word0));
26536       emit_insn (gen_xordi3 (xor2_result, new_word1, old_word1));
26537       emit_insn (gen_iordi3 (or_result, xor1_result, xor2_result));
26538       x = gen_rtx_COMPARE (CCmode, or_result, const0_rtx);
26539     }
26540 
26541   emit_insn (gen_rtx_SET (cond, x));
26542 
26543   x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
26544   emit_unlikely_jump (x, label2);
26545 
26546   x = newval;
26547   if (mask)
26548     x = rs6000_mask_atomic_subword (retval, newval, mask);
26549 
26550   emit_store_conditional (orig_mode, cond, mem, x);
26551 
26552   if (!is_weak)
26553     {
26554       x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
26555       emit_unlikely_jump (x, label1);
26556     }
26557 
26558   if (!is_mm_relaxed (mod_f))
26559     emit_label (XEXP (label2, 0));
26560 
26561   rs6000_post_atomic_barrier (mod_s);
26562 
26563   if (is_mm_relaxed (mod_f))
26564     emit_label (XEXP (label2, 0));
26565 
26566   if (shift)
26567     rs6000_finish_atomic_subword (operands[1], retval, shift);
26568   else if (mode != GET_MODE (operands[1]))
26569     convert_move (operands[1], retval, 1);
26570 
26571   /* In all cases, CR0 contains EQ on success, and NE on failure.  */
26572   x = gen_rtx_EQ (SImode, cond, const0_rtx);
26573   emit_insn (gen_rtx_SET (boolval, x));
26574 }
26575 
26576 /* Expand an atomic exchange operation.  */
26577 
26578 void
rs6000_expand_atomic_exchange(rtx operands[])26579 rs6000_expand_atomic_exchange (rtx operands[])
26580 {
26581   rtx retval, mem, val, cond;
26582   machine_mode mode;
26583   enum memmodel model;
26584   rtx label, x, mask, shift;
26585 
26586   retval = operands[0];
26587   mem = operands[1];
26588   val = operands[2];
26589   model = memmodel_base (INTVAL (operands[3]));
26590   mode = GET_MODE (mem);
26591 
26592   mask = shift = NULL_RTX;
26593   if (!TARGET_SYNC_HI_QI && (mode == QImode || mode == HImode))
26594     {
26595       mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
26596 
26597       /* Shift and mask VAL into position with the word.  */
26598       val = convert_modes (SImode, mode, val, 1);
26599       val = expand_simple_binop (SImode, ASHIFT, val, shift,
26600 				 NULL_RTX, 1, OPTAB_LIB_WIDEN);
26601 
26602       /* Prepare to adjust the return value.  */
26603       retval = gen_reg_rtx (SImode);
26604       mode = SImode;
26605     }
26606 
26607   mem = rs6000_pre_atomic_barrier (mem, model);
26608 
26609   label = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
26610   emit_label (XEXP (label, 0));
26611 
26612   emit_load_locked (mode, retval, mem);
26613 
26614   x = val;
26615   if (mask)
26616     x = rs6000_mask_atomic_subword (retval, val, mask);
26617 
26618   cond = gen_reg_rtx (CCmode);
26619   emit_store_conditional (mode, cond, mem, x);
26620 
26621   x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
26622   emit_unlikely_jump (x, label);
26623 
26624   rs6000_post_atomic_barrier (model);
26625 
26626   if (shift)
26627     rs6000_finish_atomic_subword (operands[0], retval, shift);
26628 }
26629 
26630 /* Expand an atomic fetch-and-operate pattern.  CODE is the binary operation
26631    to perform.  MEM is the memory on which to operate.  VAL is the second
26632    operand of the binary operator.  BEFORE and AFTER are optional locations to
26633    return the value of MEM either before of after the operation.  MODEL_RTX
26634    is a CONST_INT containing the memory model to use.  */
26635 
26636 void
rs6000_expand_atomic_op(enum rtx_code code,rtx mem,rtx val,rtx orig_before,rtx orig_after,rtx model_rtx)26637 rs6000_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
26638 			 rtx orig_before, rtx orig_after, rtx model_rtx)
26639 {
26640   enum memmodel model = memmodel_base (INTVAL (model_rtx));
26641   machine_mode mode = GET_MODE (mem);
26642   machine_mode store_mode = mode;
26643   rtx label, x, cond, mask, shift;
26644   rtx before = orig_before, after = orig_after;
26645 
26646   mask = shift = NULL_RTX;
26647   /* On power8, we want to use SImode for the operation.  On previous systems,
26648      use the operation in a subword and shift/mask to get the proper byte or
26649      halfword.  */
26650   if (mode == QImode || mode == HImode)
26651     {
26652       if (TARGET_SYNC_HI_QI)
26653 	{
26654 	  val = convert_modes (SImode, mode, val, 1);
26655 
26656 	  /* Prepare to adjust the return value.  */
26657 	  before = gen_reg_rtx (SImode);
26658 	  if (after)
26659 	    after = gen_reg_rtx (SImode);
26660 	  mode = SImode;
26661 	}
26662       else
26663 	{
26664 	  mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
26665 
26666 	  /* Shift and mask VAL into position with the word.  */
26667 	  val = convert_modes (SImode, mode, val, 1);
26668 	  val = expand_simple_binop (SImode, ASHIFT, val, shift,
26669 				     NULL_RTX, 1, OPTAB_LIB_WIDEN);
26670 
26671 	  switch (code)
26672 	    {
26673 	    case IOR:
26674 	    case XOR:
26675 	      /* We've already zero-extended VAL.  That is sufficient to
26676 		 make certain that it does not affect other bits.  */
26677 	      mask = NULL;
26678 	      break;
26679 
26680 	    case AND:
26681 	      /* If we make certain that all of the other bits in VAL are
26682 		 set, that will be sufficient to not affect other bits.  */
26683 	      x = gen_rtx_NOT (SImode, mask);
26684 	      x = gen_rtx_IOR (SImode, x, val);
26685 	      emit_insn (gen_rtx_SET (val, x));
26686 	      mask = NULL;
26687 	      break;
26688 
26689 	    case NOT:
26690 	    case PLUS:
26691 	    case MINUS:
26692 	      /* These will all affect bits outside the field and need
26693 		 adjustment via MASK within the loop.  */
26694 	      break;
26695 
26696 	    default:
26697 	      gcc_unreachable ();
26698 	    }
26699 
26700 	  /* Prepare to adjust the return value.  */
26701 	  before = gen_reg_rtx (SImode);
26702 	  if (after)
26703 	    after = gen_reg_rtx (SImode);
26704 	  store_mode = mode = SImode;
26705 	}
26706     }
26707 
26708   mem = rs6000_pre_atomic_barrier (mem, model);
26709 
26710   label = gen_label_rtx ();
26711   emit_label (label);
26712   label = gen_rtx_LABEL_REF (VOIDmode, label);
26713 
26714   if (before == NULL_RTX)
26715     before = gen_reg_rtx (mode);
26716 
26717   emit_load_locked (mode, before, mem);
26718 
26719   if (code == NOT)
26720     {
26721       x = expand_simple_binop (mode, AND, before, val,
26722 			       NULL_RTX, 1, OPTAB_LIB_WIDEN);
26723       after = expand_simple_unop (mode, NOT, x, after, 1);
26724     }
26725   else
26726     {
26727       after = expand_simple_binop (mode, code, before, val,
26728 				   after, 1, OPTAB_LIB_WIDEN);
26729     }
26730 
26731   x = after;
26732   if (mask)
26733     {
26734       x = expand_simple_binop (SImode, AND, after, mask,
26735 			       NULL_RTX, 1, OPTAB_LIB_WIDEN);
26736       x = rs6000_mask_atomic_subword (before, x, mask);
26737     }
26738   else if (store_mode != mode)
26739     x = convert_modes (store_mode, mode, x, 1);
26740 
26741   cond = gen_reg_rtx (CCmode);
26742   emit_store_conditional (store_mode, cond, mem, x);
26743 
26744   x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
26745   emit_unlikely_jump (x, label);
26746 
26747   rs6000_post_atomic_barrier (model);
26748 
26749   if (shift)
26750     {
26751       /* QImode/HImode on machines without lbarx/lharx where we do a lwarx and
26752 	 then do the calcuations in a SImode register.  */
26753       if (orig_before)
26754 	rs6000_finish_atomic_subword (orig_before, before, shift);
26755       if (orig_after)
26756 	rs6000_finish_atomic_subword (orig_after, after, shift);
26757     }
26758   else if (store_mode != mode)
26759     {
26760       /* QImode/HImode on machines with lbarx/lharx where we do the native
26761 	 operation and then do the calcuations in a SImode register.  */
26762       if (orig_before)
26763 	convert_move (orig_before, before, 1);
26764       if (orig_after)
26765 	convert_move (orig_after, after, 1);
26766     }
26767   else if (orig_after && after != orig_after)
26768     emit_move_insn (orig_after, after);
26769 }
26770 
26771 /* Emit instructions to move SRC to DST.  Called by splitters for
26772    multi-register moves.  It will emit at most one instruction for
26773    each register that is accessed; that is, it won't emit li/lis pairs
26774    (or equivalent for 64-bit code).  One of SRC or DST must be a hard
26775    register.  */
26776 
26777 void
rs6000_split_multireg_move(rtx dst,rtx src)26778 rs6000_split_multireg_move (rtx dst, rtx src)
26779 {
26780   /* The register number of the first register being moved.  */
26781   int reg;
26782   /* The mode that is to be moved.  */
26783   machine_mode mode;
26784   /* The mode that the move is being done in, and its size.  */
26785   machine_mode reg_mode;
26786   int reg_mode_size;
26787   /* The number of registers that will be moved.  */
26788   int nregs;
26789 
26790   reg = REG_P (dst) ? REGNO (dst) : REGNO (src);
26791   mode = GET_MODE (dst);
26792   nregs = hard_regno_nregs (reg, mode);
26793   if (FP_REGNO_P (reg))
26794     reg_mode = DECIMAL_FLOAT_MODE_P (mode) ? DDmode :
26795 	((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT) ? DFmode : SFmode);
26796   else if (ALTIVEC_REGNO_P (reg))
26797     reg_mode = V16QImode;
26798   else if (TARGET_E500_DOUBLE && FLOAT128_2REG_P (mode))
26799     reg_mode = DFmode;
26800   else
26801     reg_mode = word_mode;
26802   reg_mode_size = GET_MODE_SIZE (reg_mode);
26803 
26804   gcc_assert (reg_mode_size * nregs == GET_MODE_SIZE (mode));
26805 
26806   /* TDmode residing in FP registers is special, since the ISA requires that
26807      the lower-numbered word of a register pair is always the most significant
26808      word, even in little-endian mode.  This does not match the usual subreg
26809      semantics, so we cannnot use simplify_gen_subreg in those cases.  Access
26810      the appropriate constituent registers "by hand" in little-endian mode.
26811 
26812      Note we do not need to check for destructive overlap here since TDmode
26813      can only reside in even/odd register pairs.  */
26814   if (FP_REGNO_P (reg) && DECIMAL_FLOAT_MODE_P (mode) && !BYTES_BIG_ENDIAN)
26815     {
26816       rtx p_src, p_dst;
26817       int i;
26818 
26819       for (i = 0; i < nregs; i++)
26820 	{
26821 	  if (REG_P (src) && FP_REGNO_P (REGNO (src)))
26822 	    p_src = gen_rtx_REG (reg_mode, REGNO (src) + nregs - 1 - i);
26823 	  else
26824 	    p_src = simplify_gen_subreg (reg_mode, src, mode,
26825 					 i * reg_mode_size);
26826 
26827 	  if (REG_P (dst) && FP_REGNO_P (REGNO (dst)))
26828 	    p_dst = gen_rtx_REG (reg_mode, REGNO (dst) + nregs - 1 - i);
26829 	  else
26830 	    p_dst = simplify_gen_subreg (reg_mode, dst, mode,
26831 					 i * reg_mode_size);
26832 
26833 	  emit_insn (gen_rtx_SET (p_dst, p_src));
26834 	}
26835 
26836       return;
26837     }
26838 
26839   if (REG_P (src) && REG_P (dst) && (REGNO (src) < REGNO (dst)))
26840     {
26841       /* Move register range backwards, if we might have destructive
26842 	 overlap.  */
26843       int i;
26844       for (i = nregs - 1; i >= 0; i--)
26845 	emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
26846 						     i * reg_mode_size),
26847 				simplify_gen_subreg (reg_mode, src, mode,
26848 						     i * reg_mode_size)));
26849     }
26850   else
26851     {
26852       int i;
26853       int j = -1;
26854       bool used_update = false;
26855       rtx restore_basereg = NULL_RTX;
26856 
26857       if (MEM_P (src) && INT_REGNO_P (reg))
26858 	{
26859 	  rtx breg;
26860 
26861 	  if (GET_CODE (XEXP (src, 0)) == PRE_INC
26862 	      || GET_CODE (XEXP (src, 0)) == PRE_DEC)
26863 	    {
26864 	      rtx delta_rtx;
26865 	      breg = XEXP (XEXP (src, 0), 0);
26866 	      delta_rtx = (GET_CODE (XEXP (src, 0)) == PRE_INC
26867 			   ? GEN_INT (GET_MODE_SIZE (GET_MODE (src)))
26868 			   : GEN_INT (-GET_MODE_SIZE (GET_MODE (src))));
26869 	      emit_insn (gen_add3_insn (breg, breg, delta_rtx));
26870 	      src = replace_equiv_address (src, breg);
26871 	    }
26872 	  else if (! rs6000_offsettable_memref_p (src, reg_mode))
26873 	    {
26874 	      if (GET_CODE (XEXP (src, 0)) == PRE_MODIFY)
26875 		{
26876 		  rtx basereg = XEXP (XEXP (src, 0), 0);
26877 		  if (TARGET_UPDATE)
26878 		    {
26879 		      rtx ndst = simplify_gen_subreg (reg_mode, dst, mode, 0);
26880 		      emit_insn (gen_rtx_SET (ndst,
26881 					      gen_rtx_MEM (reg_mode,
26882 							   XEXP (src, 0))));
26883 		      used_update = true;
26884 		    }
26885 		  else
26886 		    emit_insn (gen_rtx_SET (basereg,
26887 					    XEXP (XEXP (src, 0), 1)));
26888 		  src = replace_equiv_address (src, basereg);
26889 		}
26890 	      else
26891 		{
26892 		  rtx basereg = gen_rtx_REG (Pmode, reg);
26893 		  emit_insn (gen_rtx_SET (basereg, XEXP (src, 0)));
26894 		  src = replace_equiv_address (src, basereg);
26895 		}
26896 	    }
26897 
26898 	  breg = XEXP (src, 0);
26899 	  if (GET_CODE (breg) == PLUS || GET_CODE (breg) == LO_SUM)
26900 	    breg = XEXP (breg, 0);
26901 
26902 	  /* If the base register we are using to address memory is
26903 	     also a destination reg, then change that register last.  */
26904 	  if (REG_P (breg)
26905 	      && REGNO (breg) >= REGNO (dst)
26906 	      && REGNO (breg) < REGNO (dst) + nregs)
26907 	    j = REGNO (breg) - REGNO (dst);
26908 	}
26909       else if (MEM_P (dst) && INT_REGNO_P (reg))
26910 	{
26911 	  rtx breg;
26912 
26913 	  if (GET_CODE (XEXP (dst, 0)) == PRE_INC
26914 	      || GET_CODE (XEXP (dst, 0)) == PRE_DEC)
26915 	    {
26916 	      rtx delta_rtx;
26917 	      breg = XEXP (XEXP (dst, 0), 0);
26918 	      delta_rtx = (GET_CODE (XEXP (dst, 0)) == PRE_INC
26919 			   ? GEN_INT (GET_MODE_SIZE (GET_MODE (dst)))
26920 			   : GEN_INT (-GET_MODE_SIZE (GET_MODE (dst))));
26921 
26922 	      /* We have to update the breg before doing the store.
26923 		 Use store with update, if available.  */
26924 
26925 	      if (TARGET_UPDATE)
26926 		{
26927 		  rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0);
26928 		  emit_insn (TARGET_32BIT
26929 			     ? (TARGET_POWERPC64
26930 				? gen_movdi_si_update (breg, breg, delta_rtx, nsrc)
26931 				: gen_movsi_update (breg, breg, delta_rtx, nsrc))
26932 			     : gen_movdi_di_update (breg, breg, delta_rtx, nsrc));
26933 		  used_update = true;
26934 		}
26935 	      else
26936 		emit_insn (gen_add3_insn (breg, breg, delta_rtx));
26937 	      dst = replace_equiv_address (dst, breg);
26938 	    }
26939 	  else if (!rs6000_offsettable_memref_p (dst, reg_mode)
26940 		   && GET_CODE (XEXP (dst, 0)) != LO_SUM)
26941 	    {
26942 	      if (GET_CODE (XEXP (dst, 0)) == PRE_MODIFY)
26943 		{
26944 		  rtx basereg = XEXP (XEXP (dst, 0), 0);
26945 		  if (TARGET_UPDATE)
26946 		    {
26947 		      rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0);
26948 		      emit_insn (gen_rtx_SET (gen_rtx_MEM (reg_mode,
26949 							   XEXP (dst, 0)),
26950 					      nsrc));
26951 		      used_update = true;
26952 		    }
26953 		  else
26954 		    emit_insn (gen_rtx_SET (basereg,
26955 					    XEXP (XEXP (dst, 0), 1)));
26956 		  dst = replace_equiv_address (dst, basereg);
26957 		}
26958 	      else
26959 		{
26960 		  rtx basereg = XEXP (XEXP (dst, 0), 0);
26961 		  rtx offsetreg = XEXP (XEXP (dst, 0), 1);
26962 		  gcc_assert (GET_CODE (XEXP (dst, 0)) == PLUS
26963 			      && REG_P (basereg)
26964 			      && REG_P (offsetreg)
26965 			      && REGNO (basereg) != REGNO (offsetreg));
26966 		  if (REGNO (basereg) == 0)
26967 		    {
26968 		      rtx tmp = offsetreg;
26969 		      offsetreg = basereg;
26970 		      basereg = tmp;
26971 		    }
26972 		  emit_insn (gen_add3_insn (basereg, basereg, offsetreg));
26973 		  restore_basereg = gen_sub3_insn (basereg, basereg, offsetreg);
26974 		  dst = replace_equiv_address (dst, basereg);
26975 		}
26976 	    }
26977 	  else if (GET_CODE (XEXP (dst, 0)) != LO_SUM)
26978 	    gcc_assert (rs6000_offsettable_memref_p (dst, reg_mode));
26979 	}
26980 
26981       for (i = 0; i < nregs; i++)
26982 	{
26983 	  /* Calculate index to next subword.  */
26984 	  ++j;
26985 	  if (j == nregs)
26986 	    j = 0;
26987 
26988 	  /* If compiler already emitted move of first word by
26989 	     store with update, no need to do anything.  */
26990 	  if (j == 0 && used_update)
26991 	    continue;
26992 
26993 	  emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
26994 						       j * reg_mode_size),
26995 				  simplify_gen_subreg (reg_mode, src, mode,
26996 						       j * reg_mode_size)));
26997 	}
26998       if (restore_basereg != NULL_RTX)
26999 	emit_insn (restore_basereg);
27000     }
27001 }
27002 
27003 
27004 /* This page contains routines that are used to determine what the
27005    function prologue and epilogue code will do and write them out.  */
27006 
27007 static inline bool
save_reg_p(int r)27008 save_reg_p (int r)
27009 {
27010   return !call_used_regs[r] && df_regs_ever_live_p (r);
27011 }
27012 
27013 /* Determine whether the gp REG is really used.  */
27014 
27015 static bool
rs6000_reg_live_or_pic_offset_p(int reg)27016 rs6000_reg_live_or_pic_offset_p (int reg)
27017 {
27018   /* We need to mark the PIC offset register live for the same conditions
27019      as it is set up, or otherwise it won't be saved before we clobber it.  */
27020 
27021   if (reg == RS6000_PIC_OFFSET_TABLE_REGNUM && !TARGET_SINGLE_PIC_BASE)
27022     {
27023       if (TARGET_TOC && TARGET_MINIMAL_TOC
27024 	  && (crtl->calls_eh_return
27025 	      || df_regs_ever_live_p (reg)
27026 	      || !constant_pool_empty_p ()))
27027 	return true;
27028 
27029       if ((DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN)
27030 	  && flag_pic)
27031 	return true;
27032     }
27033 
27034   /* If the function calls eh_return, claim used all the registers that would
27035      be checked for liveness otherwise.  */
27036 
27037   return ((crtl->calls_eh_return || df_regs_ever_live_p (reg))
27038 	  && !call_used_regs[reg]);
27039 }
27040 
27041 /* Return the first fixed-point register that is required to be
27042    saved. 32 if none.  */
27043 
27044 int
first_reg_to_save(void)27045 first_reg_to_save (void)
27046 {
27047   int first_reg;
27048 
27049   /* Find lowest numbered live register.  */
27050   for (first_reg = 13; first_reg <= 31; first_reg++)
27051     if (save_reg_p (first_reg))
27052       break;
27053 
27054   if (first_reg > RS6000_PIC_OFFSET_TABLE_REGNUM
27055       && ((DEFAULT_ABI == ABI_V4 && flag_pic != 0)
27056 	  || (DEFAULT_ABI == ABI_DARWIN && flag_pic)
27057 	  || (TARGET_TOC && TARGET_MINIMAL_TOC))
27058       && rs6000_reg_live_or_pic_offset_p (RS6000_PIC_OFFSET_TABLE_REGNUM))
27059     first_reg = RS6000_PIC_OFFSET_TABLE_REGNUM;
27060 
27061 #if TARGET_MACHO
27062   if (flag_pic
27063       && crtl->uses_pic_offset_table
27064       && first_reg > RS6000_PIC_OFFSET_TABLE_REGNUM)
27065     return RS6000_PIC_OFFSET_TABLE_REGNUM;
27066 #endif
27067 
27068   return first_reg;
27069 }
27070 
27071 /* Similar, for FP regs.  */
27072 
27073 int
first_fp_reg_to_save(void)27074 first_fp_reg_to_save (void)
27075 {
27076   int first_reg;
27077 
27078   /* Find lowest numbered live register.  */
27079   for (first_reg = 14 + 32; first_reg <= 63; first_reg++)
27080     if (save_reg_p (first_reg))
27081       break;
27082 
27083   return first_reg;
27084 }
27085 
27086 /* Similar, for AltiVec regs.  */
27087 
27088 static int
first_altivec_reg_to_save(void)27089 first_altivec_reg_to_save (void)
27090 {
27091   int i;
27092 
27093   /* Stack frame remains as is unless we are in AltiVec ABI.  */
27094   if (! TARGET_ALTIVEC_ABI)
27095     return LAST_ALTIVEC_REGNO + 1;
27096 
27097   /* On Darwin, the unwind routines are compiled without
27098      TARGET_ALTIVEC, and use save_world to save/restore the
27099      altivec registers when necessary.  */
27100   if (DEFAULT_ABI == ABI_DARWIN && crtl->calls_eh_return
27101       && ! TARGET_ALTIVEC)
27102     return FIRST_ALTIVEC_REGNO + 20;
27103 
27104   /* Find lowest numbered live register.  */
27105   for (i = FIRST_ALTIVEC_REGNO + 20; i <= LAST_ALTIVEC_REGNO; ++i)
27106     if (save_reg_p (i))
27107       break;
27108 
27109   return i;
27110 }
27111 
27112 /* Return a 32-bit mask of the AltiVec registers we need to set in
27113    VRSAVE.  Bit n of the return value is 1 if Vn is live.  The MSB in
27114    the 32-bit word is 0.  */
27115 
27116 static unsigned int
compute_vrsave_mask(void)27117 compute_vrsave_mask (void)
27118 {
27119   unsigned int i, mask = 0;
27120 
27121   /* On Darwin, the unwind routines are compiled without
27122      TARGET_ALTIVEC, and use save_world to save/restore the
27123      call-saved altivec registers when necessary.  */
27124   if (DEFAULT_ABI == ABI_DARWIN && crtl->calls_eh_return
27125       && ! TARGET_ALTIVEC)
27126     mask |= 0xFFF;
27127 
27128   /* First, find out if we use _any_ altivec registers.  */
27129   for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
27130     if (df_regs_ever_live_p (i))
27131       mask |= ALTIVEC_REG_BIT (i);
27132 
27133   if (mask == 0)
27134     return mask;
27135 
27136   /* Next, remove the argument registers from the set.  These must
27137      be in the VRSAVE mask set by the caller, so we don't need to add
27138      them in again.  More importantly, the mask we compute here is
27139      used to generate CLOBBERs in the set_vrsave insn, and we do not
27140      wish the argument registers to die.  */
27141   for (i = ALTIVEC_ARG_MIN_REG; i < (unsigned) crtl->args.info.vregno; i++)
27142     mask &= ~ALTIVEC_REG_BIT (i);
27143 
27144   /* Similarly, remove the return value from the set.  */
27145   {
27146     bool yes = false;
27147     diddle_return_value (is_altivec_return_reg, &yes);
27148     if (yes)
27149       mask &= ~ALTIVEC_REG_BIT (ALTIVEC_ARG_RETURN);
27150   }
27151 
27152   return mask;
27153 }
27154 
27155 /* For a very restricted set of circumstances, we can cut down the
27156    size of prologues/epilogues by calling our own save/restore-the-world
27157    routines.  */
27158 
27159 static void
compute_save_world_info(rs6000_stack_t * info)27160 compute_save_world_info (rs6000_stack_t *info)
27161 {
27162   info->world_save_p = 1;
27163   info->world_save_p
27164     = (WORLD_SAVE_P (info)
27165        && DEFAULT_ABI == ABI_DARWIN
27166        && !cfun->has_nonlocal_label
27167        && info->first_fp_reg_save == FIRST_SAVED_FP_REGNO
27168        && info->first_gp_reg_save == FIRST_SAVED_GP_REGNO
27169        && info->first_altivec_reg_save == FIRST_SAVED_ALTIVEC_REGNO
27170        && info->cr_save_p);
27171 
27172   /* This will not work in conjunction with sibcalls.  Make sure there
27173      are none.  (This check is expensive, but seldom executed.) */
27174   if (WORLD_SAVE_P (info))
27175     {
27176       rtx_insn *insn;
27177       for (insn = get_last_insn_anywhere (); insn; insn = PREV_INSN (insn))
27178 	if (CALL_P (insn) && SIBLING_CALL_P (insn))
27179 	  {
27180 	    info->world_save_p = 0;
27181 	    break;
27182 	  }
27183     }
27184 
27185   if (WORLD_SAVE_P (info))
27186     {
27187       /* Even if we're not touching VRsave, make sure there's room on the
27188 	 stack for it, if it looks like we're calling SAVE_WORLD, which
27189 	 will attempt to save it. */
27190       info->vrsave_size  = 4;
27191 
27192       /* If we are going to save the world, we need to save the link register too.  */
27193       info->lr_save_p = 1;
27194 
27195       /* "Save" the VRsave register too if we're saving the world.  */
27196       if (info->vrsave_mask == 0)
27197 	info->vrsave_mask = compute_vrsave_mask ();
27198 
27199       /* Because the Darwin register save/restore routines only handle
27200 	 F14 .. F31 and V20 .. V31 as per the ABI, perform a consistency
27201 	 check.  */
27202       gcc_assert (info->first_fp_reg_save >= FIRST_SAVED_FP_REGNO
27203 		  && (info->first_altivec_reg_save
27204 		      >= FIRST_SAVED_ALTIVEC_REGNO));
27205     }
27206 
27207   return;
27208 }
27209 
27210 
27211 static void
is_altivec_return_reg(rtx reg,void * xyes)27212 is_altivec_return_reg (rtx reg, void *xyes)
27213 {
27214   bool *yes = (bool *) xyes;
27215   if (REGNO (reg) == ALTIVEC_ARG_RETURN)
27216     *yes = true;
27217 }
27218 
27219 
27220 /* Return whether REG is a global user reg or has been specifed by
27221    -ffixed-REG.  We should not restore these, and so cannot use
27222    lmw or out-of-line restore functions if there are any.  We also
27223    can't save them (well, emit frame notes for them), because frame
27224    unwinding during exception handling will restore saved registers.  */
27225 
27226 static bool
fixed_reg_p(int reg)27227 fixed_reg_p (int reg)
27228 {
27229   /* Ignore fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] when the
27230      backend sets it, overriding anything the user might have given.  */
27231   if (reg == RS6000_PIC_OFFSET_TABLE_REGNUM
27232       && ((DEFAULT_ABI == ABI_V4 && flag_pic)
27233 	  || (DEFAULT_ABI == ABI_DARWIN && flag_pic)
27234 	  || (TARGET_TOC && TARGET_MINIMAL_TOC)))
27235     return false;
27236 
27237   return fixed_regs[reg];
27238 }
27239 
27240 /* Determine the strategy for savings/restoring registers.  */
27241 
27242 enum {
27243   SAVE_MULTIPLE = 0x1,
27244   SAVE_INLINE_GPRS = 0x2,
27245   SAVE_INLINE_FPRS = 0x4,
27246   SAVE_NOINLINE_GPRS_SAVES_LR = 0x8,
27247   SAVE_NOINLINE_FPRS_SAVES_LR = 0x10,
27248   SAVE_INLINE_VRS = 0x20,
27249   REST_MULTIPLE = 0x100,
27250   REST_INLINE_GPRS = 0x200,
27251   REST_INLINE_FPRS = 0x400,
27252   REST_NOINLINE_FPRS_DOESNT_RESTORE_LR = 0x800,
27253   REST_INLINE_VRS = 0x1000
27254 };
27255 
27256 static int
rs6000_savres_strategy(rs6000_stack_t * info,bool using_static_chain_p)27257 rs6000_savres_strategy (rs6000_stack_t *info,
27258 			bool using_static_chain_p)
27259 {
27260   int strategy = 0;
27261 
27262   /* Select between in-line and out-of-line save and restore of regs.
27263      First, all the obvious cases where we don't use out-of-line.  */
27264   if (crtl->calls_eh_return
27265       || cfun->machine->ra_need_lr)
27266     strategy |= (SAVE_INLINE_FPRS | REST_INLINE_FPRS
27267 		 | SAVE_INLINE_GPRS | REST_INLINE_GPRS
27268 		 | SAVE_INLINE_VRS | REST_INLINE_VRS);
27269 
27270   if (info->first_gp_reg_save == 32)
27271     strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
27272 
27273   if (info->first_fp_reg_save == 64
27274       /* The out-of-line FP routines use double-precision stores;
27275 	 we can't use those routines if we don't have such stores.  */
27276       || (TARGET_HARD_FLOAT && !TARGET_DOUBLE_FLOAT))
27277     strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
27278 
27279   if (info->first_altivec_reg_save == LAST_ALTIVEC_REGNO + 1)
27280     strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
27281 
27282   /* Define cutoff for using out-of-line functions to save registers.  */
27283   if (DEFAULT_ABI == ABI_V4 || TARGET_ELF)
27284     {
27285       if (!optimize_size)
27286 	{
27287 	  strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
27288 	  strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
27289 	  strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
27290 	}
27291       else
27292 	{
27293 	  /* Prefer out-of-line restore if it will exit.  */
27294 	  if (info->first_fp_reg_save > 61)
27295 	    strategy |= SAVE_INLINE_FPRS;
27296 	  if (info->first_gp_reg_save > 29)
27297 	    {
27298 	      if (info->first_fp_reg_save == 64)
27299 		strategy |= SAVE_INLINE_GPRS;
27300 	      else
27301 		strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
27302 	    }
27303 	  if (info->first_altivec_reg_save == LAST_ALTIVEC_REGNO)
27304 	    strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
27305 	}
27306     }
27307   else if (DEFAULT_ABI == ABI_DARWIN)
27308     {
27309       if (info->first_fp_reg_save > 60)
27310 	strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
27311       if (info->first_gp_reg_save > 29)
27312 	strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
27313       strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
27314     }
27315   else
27316     {
27317       gcc_checking_assert (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2);
27318       if ((flag_shrink_wrap_separate && optimize_function_for_speed_p (cfun))
27319 	  || info->first_fp_reg_save > 61)
27320 	strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
27321       strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
27322       strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
27323     }
27324 
27325   /* Don't bother to try to save things out-of-line if r11 is occupied
27326      by the static chain.  It would require too much fiddling and the
27327      static chain is rarely used anyway.  FPRs are saved w.r.t the stack
27328      pointer on Darwin, and AIX uses r1 or r12.  */
27329   if (using_static_chain_p
27330       && (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN))
27331     strategy |= ((DEFAULT_ABI == ABI_DARWIN ? 0 : SAVE_INLINE_FPRS)
27332 		 | SAVE_INLINE_GPRS
27333 		 | SAVE_INLINE_VRS);
27334 
27335   /* Saving CR interferes with the exit routines used on the SPE, so
27336      just punt here.  */
27337   if (TARGET_SPE_ABI
27338       && info->spe_64bit_regs_used
27339       && info->cr_save_p)
27340     strategy |= REST_INLINE_GPRS;
27341 
27342   /* We can only use the out-of-line routines to restore fprs if we've
27343      saved all the registers from first_fp_reg_save in the prologue.
27344      Otherwise, we risk loading garbage.  Of course, if we have saved
27345      out-of-line then we know we haven't skipped any fprs.  */
27346   if ((strategy & SAVE_INLINE_FPRS)
27347       && !(strategy & REST_INLINE_FPRS))
27348     {
27349       int i;
27350 
27351       for (i = info->first_fp_reg_save; i < 64; i++)
27352 	if (fixed_regs[i] || !save_reg_p (i))
27353 	  {
27354 	    strategy |= REST_INLINE_FPRS;
27355 	    break;
27356 	  }
27357     }
27358 
27359   /* Similarly, for altivec regs.  */
27360   if ((strategy & SAVE_INLINE_VRS)
27361       && !(strategy & REST_INLINE_VRS))
27362     {
27363       int i;
27364 
27365       for (i = info->first_altivec_reg_save; i < LAST_ALTIVEC_REGNO + 1; i++)
27366 	if (fixed_regs[i] || !save_reg_p (i))
27367 	  {
27368 	    strategy |= REST_INLINE_VRS;
27369 	    break;
27370 	  }
27371     }
27372 
27373   /* info->lr_save_p isn't yet set if the only reason lr needs to be
27374      saved is an out-of-line save or restore.  Set up the value for
27375      the next test (excluding out-of-line gprs).  */
27376   bool lr_save_p = (info->lr_save_p
27377 		    || !(strategy & SAVE_INLINE_FPRS)
27378 		    || !(strategy & SAVE_INLINE_VRS)
27379 		    || !(strategy & REST_INLINE_FPRS)
27380 		    || !(strategy & REST_INLINE_VRS));
27381 
27382   if (TARGET_MULTIPLE
27383       && !TARGET_POWERPC64
27384       && !(TARGET_SPE_ABI && info->spe_64bit_regs_used)
27385       && info->first_gp_reg_save < 31
27386       && !(flag_shrink_wrap
27387 	   && flag_shrink_wrap_separate
27388 	   && optimize_function_for_speed_p (cfun)))
27389     {
27390       /* Prefer store multiple for saves over out-of-line routines,
27391 	 since the store-multiple instruction will always be smaller.  */
27392       strategy |= SAVE_INLINE_GPRS | SAVE_MULTIPLE;
27393 
27394       /* The situation is more complicated with load multiple.  We'd
27395 	 prefer to use the out-of-line routines for restores, since the
27396 	 "exit" out-of-line routines can handle the restore of LR and the
27397 	 frame teardown.  However if doesn't make sense to use the
27398 	 out-of-line routine if that is the only reason we'd need to save
27399 	 LR, and we can't use the "exit" out-of-line gpr restore if we
27400 	 have saved some fprs; In those cases it is advantageous to use
27401 	 load multiple when available.  */
27402       if (info->first_fp_reg_save != 64 || !lr_save_p)
27403 	strategy |= REST_INLINE_GPRS | REST_MULTIPLE;
27404     }
27405 
27406   /* Using the "exit" out-of-line routine does not improve code size
27407      if using it would require lr to be saved and if only saving one
27408      or two gprs.  */
27409   else if (!lr_save_p && info->first_gp_reg_save > 29)
27410     strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
27411 
27412   /* We can only use load multiple or the out-of-line routines to
27413      restore gprs if we've saved all the registers from
27414      first_gp_reg_save.  Otherwise, we risk loading garbage.
27415      Of course, if we have saved out-of-line or used stmw then we know
27416      we haven't skipped any gprs.  */
27417   if ((strategy & (SAVE_INLINE_GPRS | SAVE_MULTIPLE)) == SAVE_INLINE_GPRS
27418       && (strategy & (REST_INLINE_GPRS | REST_MULTIPLE)) != REST_INLINE_GPRS)
27419     {
27420       int i;
27421 
27422       for (i = info->first_gp_reg_save; i < 32; i++)
27423 	if (fixed_reg_p (i) || !save_reg_p (i))
27424 	  {
27425 	    strategy |= REST_INLINE_GPRS;
27426 	    strategy &= ~REST_MULTIPLE;
27427 	    break;
27428 	  }
27429     }
27430 
27431   if (TARGET_ELF && TARGET_64BIT)
27432     {
27433       if (!(strategy & SAVE_INLINE_FPRS))
27434 	strategy |= SAVE_NOINLINE_FPRS_SAVES_LR;
27435       else if (!(strategy & SAVE_INLINE_GPRS)
27436 	       && info->first_fp_reg_save == 64)
27437 	strategy |= SAVE_NOINLINE_GPRS_SAVES_LR;
27438     }
27439   else if (TARGET_AIX && !(strategy & REST_INLINE_FPRS))
27440     strategy |= REST_NOINLINE_FPRS_DOESNT_RESTORE_LR;
27441 
27442   if (TARGET_MACHO && !(strategy & SAVE_INLINE_FPRS))
27443     strategy |= SAVE_NOINLINE_FPRS_SAVES_LR;
27444 
27445   return strategy;
27446 }
27447 
27448 /* Calculate the stack information for the current function.  This is
27449    complicated by having two separate calling sequences, the AIX calling
27450    sequence and the V.4 calling sequence.
27451 
27452    AIX (and Darwin/Mac OS X) stack frames look like:
27453 							  32-bit  64-bit
27454 	SP---->	+---------------------------------------+
27455 		| back chain to caller			| 0	  0
27456 		+---------------------------------------+
27457 		| saved CR				| 4       8 (8-11)
27458 		+---------------------------------------+
27459 		| saved LR				| 8       16
27460 		+---------------------------------------+
27461 		| reserved for compilers		| 12      24
27462 		+---------------------------------------+
27463 		| reserved for binders			| 16      32
27464 		+---------------------------------------+
27465 		| saved TOC pointer			| 20      40
27466 		+---------------------------------------+
27467 		| Parameter save area (+padding*) (P)	| 24      48
27468 		+---------------------------------------+
27469 		| Alloca space (A)			| 24+P    etc.
27470 		+---------------------------------------+
27471 		| Local variable space (L)		| 24+P+A
27472 		+---------------------------------------+
27473 		| Float/int conversion temporary (X)	| 24+P+A+L
27474 		+---------------------------------------+
27475 		| Save area for AltiVec registers (W)	| 24+P+A+L+X
27476 		+---------------------------------------+
27477 		| AltiVec alignment padding (Y)		| 24+P+A+L+X+W
27478 		+---------------------------------------+
27479 		| Save area for VRSAVE register (Z)	| 24+P+A+L+X+W+Y
27480 		+---------------------------------------+
27481 		| Save area for GP registers (G)	| 24+P+A+X+L+X+W+Y+Z
27482 		+---------------------------------------+
27483 		| Save area for FP registers (F)	| 24+P+A+X+L+X+W+Y+Z+G
27484 		+---------------------------------------+
27485 	old SP->| back chain to caller's caller		|
27486 		+---------------------------------------+
27487 
27488      * If the alloca area is present, the parameter save area is
27489        padded so that the former starts 16-byte aligned.
27490 
27491    The required alignment for AIX configurations is two words (i.e., 8
27492    or 16 bytes).
27493 
27494    The ELFv2 ABI is a variant of the AIX ABI.  Stack frames look like:
27495 
27496 	SP---->	+---------------------------------------+
27497 		| Back chain to caller			|  0
27498 		+---------------------------------------+
27499 		| Save area for CR			|  8
27500 		+---------------------------------------+
27501 		| Saved LR				|  16
27502 		+---------------------------------------+
27503 		| Saved TOC pointer			|  24
27504 		+---------------------------------------+
27505 		| Parameter save area (+padding*) (P)	|  32
27506 		+---------------------------------------+
27507 		| Alloca space (A)			|  32+P
27508 		+---------------------------------------+
27509 		| Local variable space (L)		|  32+P+A
27510 		+---------------------------------------+
27511 		| Save area for AltiVec registers (W)	|  32+P+A+L
27512 		+---------------------------------------+
27513 		| AltiVec alignment padding (Y)		|  32+P+A+L+W
27514 		+---------------------------------------+
27515 		| Save area for GP registers (G)	|  32+P+A+L+W+Y
27516 		+---------------------------------------+
27517 		| Save area for FP registers (F)	|  32+P+A+L+W+Y+G
27518 		+---------------------------------------+
27519 	old SP->| back chain to caller's caller		|  32+P+A+L+W+Y+G+F
27520 		+---------------------------------------+
27521 
27522      * If the alloca area is present, the parameter save area is
27523        padded so that the former starts 16-byte aligned.
27524 
27525    V.4 stack frames look like:
27526 
27527 	SP---->	+---------------------------------------+
27528 		| back chain to caller			| 0
27529 		+---------------------------------------+
27530 		| caller's saved LR			| 4
27531 		+---------------------------------------+
27532 		| Parameter save area (+padding*) (P)	| 8
27533 		+---------------------------------------+
27534 		| Alloca space (A)			| 8+P
27535 		+---------------------------------------+
27536 		| Varargs save area (V)			| 8+P+A
27537 		+---------------------------------------+
27538 		| Local variable space (L)		| 8+P+A+V
27539 		+---------------------------------------+
27540 		| Float/int conversion temporary (X)	| 8+P+A+V+L
27541 		+---------------------------------------+
27542 		| Save area for AltiVec registers (W)	| 8+P+A+V+L+X
27543 		+---------------------------------------+
27544 		| AltiVec alignment padding (Y)		| 8+P+A+V+L+X+W
27545 		+---------------------------------------+
27546 		| Save area for VRSAVE register (Z)	| 8+P+A+V+L+X+W+Y
27547 		+---------------------------------------+
27548 		| SPE: area for 64-bit GP registers	|
27549 		+---------------------------------------+
27550 		| SPE alignment padding			|
27551 		+---------------------------------------+
27552 		| saved CR (C)				| 8+P+A+V+L+X+W+Y+Z
27553 		+---------------------------------------+
27554 		| Save area for GP registers (G)	| 8+P+A+V+L+X+W+Y+Z+C
27555 		+---------------------------------------+
27556 		| Save area for FP registers (F)	| 8+P+A+V+L+X+W+Y+Z+C+G
27557 		+---------------------------------------+
27558 	old SP->| back chain to caller's caller		|
27559 		+---------------------------------------+
27560 
27561      * If the alloca area is present and the required alignment is
27562        16 bytes, the parameter save area is padded so that the
27563        alloca area starts 16-byte aligned.
27564 
27565    The required alignment for V.4 is 16 bytes, or 8 bytes if -meabi is
27566    given.  (But note below and in sysv4.h that we require only 8 and
27567    may round up the size of our stack frame anyways.  The historical
27568    reason is early versions of powerpc-linux which didn't properly
27569    align the stack at program startup.  A happy side-effect is that
27570    -mno-eabi libraries can be used with -meabi programs.)
27571 
27572    The EABI configuration defaults to the V.4 layout.  However,
27573    the stack alignment requirements may differ.  If -mno-eabi is not
27574    given, the required stack alignment is 8 bytes; if -mno-eabi is
27575    given, the required alignment is 16 bytes.  (But see V.4 comment
27576    above.)  */
27577 
27578 #ifndef ABI_STACK_BOUNDARY
27579 #define ABI_STACK_BOUNDARY STACK_BOUNDARY
27580 #endif
27581 
27582 static rs6000_stack_t *
rs6000_stack_info(void)27583 rs6000_stack_info (void)
27584 {
27585   /* We should never be called for thunks, we are not set up for that.  */
27586   gcc_assert (!cfun->is_thunk);
27587 
27588   rs6000_stack_t *info = &stack_info;
27589   int reg_size = TARGET_32BIT ? 4 : 8;
27590   int ehrd_size;
27591   int ehcr_size;
27592   int save_align;
27593   int first_gp;
27594   HOST_WIDE_INT non_fixed_size;
27595   bool using_static_chain_p;
27596 
27597   if (reload_completed && info->reload_completed)
27598     return info;
27599 
27600   memset (info, 0, sizeof (*info));
27601   info->reload_completed = reload_completed;
27602 
27603   if (TARGET_SPE)
27604     {
27605       /* Cache value so we don't rescan instruction chain over and over.  */
27606       if (cfun->machine->spe_insn_chain_scanned_p == 0)
27607 	cfun->machine->spe_insn_chain_scanned_p
27608 	  = spe_func_has_64bit_regs_p () + 1;
27609       info->spe_64bit_regs_used = cfun->machine->spe_insn_chain_scanned_p - 1;
27610     }
27611 
27612   /* Select which calling sequence.  */
27613   info->abi = DEFAULT_ABI;
27614 
27615   /* Calculate which registers need to be saved & save area size.  */
27616   info->first_gp_reg_save = first_reg_to_save ();
27617   /* Assume that we will have to save RS6000_PIC_OFFSET_TABLE_REGNUM,
27618      even if it currently looks like we won't.  Reload may need it to
27619      get at a constant; if so, it will have already created a constant
27620      pool entry for it.  */
27621   if (((TARGET_TOC && TARGET_MINIMAL_TOC)
27622        || (flag_pic == 1 && DEFAULT_ABI == ABI_V4)
27623        || (flag_pic && DEFAULT_ABI == ABI_DARWIN))
27624       && crtl->uses_const_pool
27625       && info->first_gp_reg_save > RS6000_PIC_OFFSET_TABLE_REGNUM)
27626     first_gp = RS6000_PIC_OFFSET_TABLE_REGNUM;
27627   else
27628     first_gp = info->first_gp_reg_save;
27629 
27630   info->gp_size = reg_size * (32 - first_gp);
27631 
27632   /* For the SPE, we have an additional upper 32-bits on each GPR.
27633      Ideally we should save the entire 64-bits only when the upper
27634      half is used in SIMD instructions.  Since we only record
27635      registers live (not the size they are used in), this proves
27636      difficult because we'd have to traverse the instruction chain at
27637      the right time, taking reload into account.  This is a real pain,
27638      so we opt to save the GPRs in 64-bits always if but one register
27639      gets used in 64-bits.  Otherwise, all the registers in the frame
27640      get saved in 32-bits.
27641 
27642      So... since when we save all GPRs (except the SP) in 64-bits, the
27643      traditional GP save area will be empty.  */
27644   if (TARGET_SPE_ABI && info->spe_64bit_regs_used != 0)
27645     info->gp_size = 0;
27646 
27647   info->first_fp_reg_save = first_fp_reg_to_save ();
27648   info->fp_size = 8 * (64 - info->first_fp_reg_save);
27649 
27650   info->first_altivec_reg_save = first_altivec_reg_to_save ();
27651   info->altivec_size = 16 * (LAST_ALTIVEC_REGNO + 1
27652 				 - info->first_altivec_reg_save);
27653 
27654   /* Does this function call anything?  */
27655   info->calls_p = (!crtl->is_leaf || cfun->machine->ra_needs_full_frame);
27656 
27657   /* Determine if we need to save the condition code registers.  */
27658   if (save_reg_p (CR2_REGNO)
27659       || save_reg_p (CR3_REGNO)
27660       || save_reg_p (CR4_REGNO))
27661     {
27662       info->cr_save_p = 1;
27663       if (DEFAULT_ABI == ABI_V4)
27664 	info->cr_size = reg_size;
27665     }
27666 
27667   /* If the current function calls __builtin_eh_return, then we need
27668      to allocate stack space for registers that will hold data for
27669      the exception handler.  */
27670   if (crtl->calls_eh_return)
27671     {
27672       unsigned int i;
27673       for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM; ++i)
27674 	continue;
27675 
27676       /* SPE saves EH registers in 64-bits.  */
27677       ehrd_size = i * (TARGET_SPE_ABI && info->spe_64bit_regs_used != 0
27678 		       ? UNITS_PER_SPE_WORD : UNITS_PER_WORD);
27679     }
27680   else
27681     ehrd_size = 0;
27682 
27683   /* In the ELFv2 ABI, we also need to allocate space for separate
27684      CR field save areas if the function calls __builtin_eh_return.  */
27685   if (DEFAULT_ABI == ABI_ELFv2 && crtl->calls_eh_return)
27686     {
27687       /* This hard-codes that we have three call-saved CR fields.  */
27688       ehcr_size = 3 * reg_size;
27689       /* We do *not* use the regular CR save mechanism.  */
27690       info->cr_save_p = 0;
27691     }
27692   else
27693     ehcr_size = 0;
27694 
27695   /* Determine various sizes.  */
27696   info->reg_size     = reg_size;
27697   info->fixed_size   = RS6000_SAVE_AREA;
27698   info->vars_size    = RS6000_ALIGN (get_frame_size (), 8);
27699   if (cfun->calls_alloca)
27700     info->parm_size  =
27701       RS6000_ALIGN (crtl->outgoing_args_size + info->fixed_size,
27702 		    STACK_BOUNDARY / BITS_PER_UNIT) - info->fixed_size;
27703   else
27704     info->parm_size  = RS6000_ALIGN (crtl->outgoing_args_size,
27705 				     TARGET_ALTIVEC ? 16 : 8);
27706   if (FRAME_GROWS_DOWNWARD)
27707     info->vars_size
27708       += RS6000_ALIGN (info->fixed_size + info->vars_size + info->parm_size,
27709 		       ABI_STACK_BOUNDARY / BITS_PER_UNIT)
27710 	 - (info->fixed_size + info->vars_size + info->parm_size);
27711 
27712   if (TARGET_SPE_ABI && info->spe_64bit_regs_used != 0)
27713     info->spe_gp_size = 8 * (32 - first_gp);
27714 
27715   if (TARGET_ALTIVEC_ABI)
27716     info->vrsave_mask = compute_vrsave_mask ();
27717 
27718   if (TARGET_ALTIVEC_VRSAVE && info->vrsave_mask)
27719     info->vrsave_size = 4;
27720 
27721   compute_save_world_info (info);
27722 
27723   /* Calculate the offsets.  */
27724   switch (DEFAULT_ABI)
27725     {
27726     case ABI_NONE:
27727     default:
27728       gcc_unreachable ();
27729 
27730     case ABI_AIX:
27731     case ABI_ELFv2:
27732     case ABI_DARWIN:
27733       info->fp_save_offset = -info->fp_size;
27734       info->gp_save_offset = info->fp_save_offset - info->gp_size;
27735 
27736       if (TARGET_ALTIVEC_ABI)
27737 	{
27738 	  info->vrsave_save_offset = info->gp_save_offset - info->vrsave_size;
27739 
27740 	  /* Align stack so vector save area is on a quadword boundary.
27741 	     The padding goes above the vectors.  */
27742 	  if (info->altivec_size != 0)
27743 	    info->altivec_padding_size = info->vrsave_save_offset & 0xF;
27744 
27745 	  info->altivec_save_offset = info->vrsave_save_offset
27746 				      - info->altivec_padding_size
27747 				      - info->altivec_size;
27748 	  gcc_assert (info->altivec_size == 0
27749 		      || info->altivec_save_offset % 16 == 0);
27750 
27751 	  /* Adjust for AltiVec case.  */
27752 	  info->ehrd_offset = info->altivec_save_offset - ehrd_size;
27753 	}
27754       else
27755 	info->ehrd_offset = info->gp_save_offset - ehrd_size;
27756 
27757       info->ehcr_offset = info->ehrd_offset - ehcr_size;
27758       info->cr_save_offset = reg_size; /* first word when 64-bit.  */
27759       info->lr_save_offset = 2*reg_size;
27760       break;
27761 
27762     case ABI_V4:
27763       info->fp_save_offset = -info->fp_size;
27764       info->gp_save_offset = info->fp_save_offset - info->gp_size;
27765       info->cr_save_offset = info->gp_save_offset - info->cr_size;
27766 
27767       if (TARGET_SPE_ABI && info->spe_64bit_regs_used != 0)
27768 	{
27769 	  /* Align stack so SPE GPR save area is aligned on a
27770 	     double-word boundary.  */
27771 	  if (info->spe_gp_size != 0 && info->cr_save_offset != 0)
27772 	    info->spe_padding_size = 8 - (-info->cr_save_offset % 8);
27773 	  else
27774 	    info->spe_padding_size = 0;
27775 
27776 	  info->spe_gp_save_offset = info->cr_save_offset
27777 				     - info->spe_padding_size
27778 				     - info->spe_gp_size;
27779 
27780 	  /* Adjust for SPE case.  */
27781 	  info->ehrd_offset = info->spe_gp_save_offset;
27782 	}
27783       else if (TARGET_ALTIVEC_ABI)
27784 	{
27785 	  info->vrsave_save_offset = info->cr_save_offset - info->vrsave_size;
27786 
27787 	  /* Align stack so vector save area is on a quadword boundary.  */
27788 	  if (info->altivec_size != 0)
27789 	    info->altivec_padding_size = 16 - (-info->vrsave_save_offset % 16);
27790 
27791 	  info->altivec_save_offset = info->vrsave_save_offset
27792 				      - info->altivec_padding_size
27793 				      - info->altivec_size;
27794 
27795 	  /* Adjust for AltiVec case.  */
27796 	  info->ehrd_offset = info->altivec_save_offset;
27797 	}
27798       else
27799 	info->ehrd_offset = info->cr_save_offset;
27800 
27801       info->ehrd_offset -= ehrd_size;
27802       info->lr_save_offset = reg_size;
27803     }
27804 
27805   save_align = (TARGET_ALTIVEC_ABI || DEFAULT_ABI == ABI_DARWIN) ? 16 : 8;
27806   info->save_size = RS6000_ALIGN (info->fp_size
27807 				  + info->gp_size
27808 				  + info->altivec_size
27809 				  + info->altivec_padding_size
27810 				  + info->spe_gp_size
27811 				  + info->spe_padding_size
27812 				  + ehrd_size
27813 				  + ehcr_size
27814 				  + info->cr_size
27815 				  + info->vrsave_size,
27816 				  save_align);
27817 
27818   non_fixed_size = info->vars_size + info->parm_size + info->save_size;
27819 
27820   info->total_size = RS6000_ALIGN (non_fixed_size + info->fixed_size,
27821 				   ABI_STACK_BOUNDARY / BITS_PER_UNIT);
27822 
27823   /* Determine if we need to save the link register.  */
27824   if (info->calls_p
27825       || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
27826 	  && crtl->profile
27827 	  && !TARGET_PROFILE_KERNEL)
27828       || (DEFAULT_ABI == ABI_V4 && cfun->calls_alloca)
27829 #ifdef TARGET_RELOCATABLE
27830       || (DEFAULT_ABI == ABI_V4
27831 	  && (TARGET_RELOCATABLE || flag_pic > 1)
27832 	  && !constant_pool_empty_p ())
27833 #endif
27834       || rs6000_ra_ever_killed ())
27835     info->lr_save_p = 1;
27836 
27837   using_static_chain_p = (cfun->static_chain_decl != NULL_TREE
27838 			  && df_regs_ever_live_p (STATIC_CHAIN_REGNUM)
27839 			  && call_used_regs[STATIC_CHAIN_REGNUM]);
27840   info->savres_strategy = rs6000_savres_strategy (info, using_static_chain_p);
27841 
27842   if (!(info->savres_strategy & SAVE_INLINE_GPRS)
27843       || !(info->savres_strategy & SAVE_INLINE_FPRS)
27844       || !(info->savres_strategy & SAVE_INLINE_VRS)
27845       || !(info->savres_strategy & REST_INLINE_GPRS)
27846       || !(info->savres_strategy & REST_INLINE_FPRS)
27847       || !(info->savres_strategy & REST_INLINE_VRS))
27848     info->lr_save_p = 1;
27849 
27850   if (info->lr_save_p)
27851     df_set_regs_ever_live (LR_REGNO, true);
27852 
27853   /* Determine if we need to allocate any stack frame:
27854 
27855      For AIX we need to push the stack if a frame pointer is needed
27856      (because the stack might be dynamically adjusted), if we are
27857      debugging, if we make calls, or if the sum of fp_save, gp_save,
27858      and local variables are more than the space needed to save all
27859      non-volatile registers: 32-bit: 18*8 + 19*4 = 220 or 64-bit: 18*8
27860      + 18*8 = 288 (GPR13 reserved).
27861 
27862      For V.4 we don't have the stack cushion that AIX uses, but assume
27863      that the debugger can handle stackless frames.  */
27864 
27865   if (info->calls_p)
27866     info->push_p = 1;
27867 
27868   else if (DEFAULT_ABI == ABI_V4)
27869     info->push_p = non_fixed_size != 0;
27870 
27871   else if (frame_pointer_needed)
27872     info->push_p = 1;
27873 
27874   else if (TARGET_XCOFF && write_symbols != NO_DEBUG)
27875     info->push_p = 1;
27876 
27877   else
27878     info->push_p = non_fixed_size > (TARGET_32BIT ? 220 : 288);
27879 
27880   return info;
27881 }
27882 
27883 /* Return true if the current function uses any GPRs in 64-bit SIMD
27884    mode.  */
27885 
27886 static bool
spe_func_has_64bit_regs_p(void)27887 spe_func_has_64bit_regs_p (void)
27888 {
27889   rtx_insn *insns, *insn;
27890 
27891   /* Functions that save and restore all the call-saved registers will
27892      need to save/restore the registers in 64-bits.  */
27893   if (crtl->calls_eh_return
27894       || cfun->calls_setjmp
27895       || crtl->has_nonlocal_goto)
27896     return true;
27897 
27898   insns = get_insns ();
27899 
27900   for (insn = NEXT_INSN (insns); insn != NULL_RTX; insn = NEXT_INSN (insn))
27901     {
27902       if (INSN_P (insn))
27903 	{
27904 	  rtx i;
27905 
27906 	  /* FIXME: This should be implemented with attributes...
27907 
27908 	         (set_attr "spe64" "true")....then,
27909 	         if (get_spe64(insn)) return true;
27910 
27911 	     It's the only reliable way to do the stuff below.  */
27912 
27913 	  i = PATTERN (insn);
27914 	  if (GET_CODE (i) == SET)
27915 	    {
27916 	      machine_mode mode = GET_MODE (SET_SRC (i));
27917 
27918 	      if (SPE_VECTOR_MODE (mode))
27919 		return true;
27920 	      if (TARGET_E500_DOUBLE
27921 		  && (mode == DFmode || FLOAT128_2REG_P (mode)))
27922 		return true;
27923 	    }
27924 	}
27925     }
27926 
27927   return false;
27928 }
27929 
27930 static void
debug_stack_info(rs6000_stack_t * info)27931 debug_stack_info (rs6000_stack_t *info)
27932 {
27933   const char *abi_string;
27934 
27935   if (! info)
27936     info = rs6000_stack_info ();
27937 
27938   fprintf (stderr, "\nStack information for function %s:\n",
27939 	   ((current_function_decl && DECL_NAME (current_function_decl))
27940 	    ? IDENTIFIER_POINTER (DECL_NAME (current_function_decl))
27941 	    : "<unknown>"));
27942 
27943   switch (info->abi)
27944     {
27945     default:		 abi_string = "Unknown";	break;
27946     case ABI_NONE:	 abi_string = "NONE";		break;
27947     case ABI_AIX:	 abi_string = "AIX";		break;
27948     case ABI_ELFv2:	 abi_string = "ELFv2";		break;
27949     case ABI_DARWIN:	 abi_string = "Darwin";		break;
27950     case ABI_V4:	 abi_string = "V.4";		break;
27951     }
27952 
27953   fprintf (stderr, "\tABI                 = %5s\n", abi_string);
27954 
27955   if (TARGET_ALTIVEC_ABI)
27956     fprintf (stderr, "\tALTIVEC ABI extensions enabled.\n");
27957 
27958   if (TARGET_SPE_ABI)
27959     fprintf (stderr, "\tSPE ABI extensions enabled.\n");
27960 
27961   if (info->first_gp_reg_save != 32)
27962     fprintf (stderr, "\tfirst_gp_reg_save   = %5d\n", info->first_gp_reg_save);
27963 
27964   if (info->first_fp_reg_save != 64)
27965     fprintf (stderr, "\tfirst_fp_reg_save   = %5d\n", info->first_fp_reg_save);
27966 
27967   if (info->first_altivec_reg_save <= LAST_ALTIVEC_REGNO)
27968     fprintf (stderr, "\tfirst_altivec_reg_save = %5d\n",
27969 	     info->first_altivec_reg_save);
27970 
27971   if (info->lr_save_p)
27972     fprintf (stderr, "\tlr_save_p           = %5d\n", info->lr_save_p);
27973 
27974   if (info->cr_save_p)
27975     fprintf (stderr, "\tcr_save_p           = %5d\n", info->cr_save_p);
27976 
27977   if (info->vrsave_mask)
27978     fprintf (stderr, "\tvrsave_mask         = 0x%x\n", info->vrsave_mask);
27979 
27980   if (info->push_p)
27981     fprintf (stderr, "\tpush_p              = %5d\n", info->push_p);
27982 
27983   if (info->calls_p)
27984     fprintf (stderr, "\tcalls_p             = %5d\n", info->calls_p);
27985 
27986   if (info->gp_size)
27987     fprintf (stderr, "\tgp_save_offset      = %5d\n", info->gp_save_offset);
27988 
27989   if (info->fp_size)
27990     fprintf (stderr, "\tfp_save_offset      = %5d\n", info->fp_save_offset);
27991 
27992   if (info->altivec_size)
27993     fprintf (stderr, "\taltivec_save_offset = %5d\n",
27994 	     info->altivec_save_offset);
27995 
27996   if (info->spe_gp_size)
27997     fprintf (stderr, "\tspe_gp_save_offset  = %5d\n",
27998 	     info->spe_gp_save_offset);
27999 
28000   if (info->vrsave_size)
28001     fprintf (stderr, "\tvrsave_save_offset  = %5d\n",
28002 	     info->vrsave_save_offset);
28003 
28004   if (info->lr_save_p)
28005     fprintf (stderr, "\tlr_save_offset      = %5d\n", info->lr_save_offset);
28006 
28007   if (info->cr_save_p)
28008     fprintf (stderr, "\tcr_save_offset      = %5d\n", info->cr_save_offset);
28009 
28010   if (info->varargs_save_offset)
28011     fprintf (stderr, "\tvarargs_save_offset = %5d\n", info->varargs_save_offset);
28012 
28013   if (info->total_size)
28014     fprintf (stderr, "\ttotal_size          = " HOST_WIDE_INT_PRINT_DEC"\n",
28015 	     info->total_size);
28016 
28017   if (info->vars_size)
28018     fprintf (stderr, "\tvars_size           = " HOST_WIDE_INT_PRINT_DEC"\n",
28019 	     info->vars_size);
28020 
28021   if (info->parm_size)
28022     fprintf (stderr, "\tparm_size           = %5d\n", info->parm_size);
28023 
28024   if (info->fixed_size)
28025     fprintf (stderr, "\tfixed_size          = %5d\n", info->fixed_size);
28026 
28027   if (info->gp_size)
28028     fprintf (stderr, "\tgp_size             = %5d\n", info->gp_size);
28029 
28030   if (info->spe_gp_size)
28031     fprintf (stderr, "\tspe_gp_size         = %5d\n", info->spe_gp_size);
28032 
28033   if (info->fp_size)
28034     fprintf (stderr, "\tfp_size             = %5d\n", info->fp_size);
28035 
28036   if (info->altivec_size)
28037     fprintf (stderr, "\taltivec_size        = %5d\n", info->altivec_size);
28038 
28039   if (info->vrsave_size)
28040     fprintf (stderr, "\tvrsave_size         = %5d\n", info->vrsave_size);
28041 
28042   if (info->altivec_padding_size)
28043     fprintf (stderr, "\taltivec_padding_size= %5d\n",
28044 	     info->altivec_padding_size);
28045 
28046   if (info->spe_padding_size)
28047     fprintf (stderr, "\tspe_padding_size    = %5d\n",
28048 	     info->spe_padding_size);
28049 
28050   if (info->cr_size)
28051     fprintf (stderr, "\tcr_size             = %5d\n", info->cr_size);
28052 
28053   if (info->save_size)
28054     fprintf (stderr, "\tsave_size           = %5d\n", info->save_size);
28055 
28056   if (info->reg_size != 4)
28057     fprintf (stderr, "\treg_size            = %5d\n", info->reg_size);
28058 
28059     fprintf (stderr, "\tsave-strategy       =  %04x\n", info->savres_strategy);
28060 
28061   fprintf (stderr, "\n");
28062 }
28063 
28064 rtx
rs6000_return_addr(int count,rtx frame)28065 rs6000_return_addr (int count, rtx frame)
28066 {
28067   /* Currently we don't optimize very well between prolog and body
28068      code and for PIC code the code can be actually quite bad, so
28069      don't try to be too clever here.  */
28070   if (count != 0
28071       || ((DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN) && flag_pic))
28072     {
28073       cfun->machine->ra_needs_full_frame = 1;
28074 
28075       return
28076 	gen_rtx_MEM
28077 	  (Pmode,
28078 	   memory_address
28079 	   (Pmode,
28080 	    plus_constant (Pmode,
28081 			   copy_to_reg
28082 			   (gen_rtx_MEM (Pmode,
28083 					 memory_address (Pmode, frame))),
28084 			   RETURN_ADDRESS_OFFSET)));
28085     }
28086 
28087   cfun->machine->ra_need_lr = 1;
28088   return get_hard_reg_initial_val (Pmode, LR_REGNO);
28089 }
28090 
28091 /* Say whether a function is a candidate for sibcall handling or not.  */
28092 
28093 static bool
rs6000_function_ok_for_sibcall(tree decl,tree exp)28094 rs6000_function_ok_for_sibcall (tree decl, tree exp)
28095 {
28096   tree fntype;
28097 
28098   if (decl)
28099     fntype = TREE_TYPE (decl);
28100   else
28101     fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
28102 
28103   /* We can't do it if the called function has more vector parameters
28104      than the current function; there's nowhere to put the VRsave code.  */
28105   if (TARGET_ALTIVEC_ABI
28106       && TARGET_ALTIVEC_VRSAVE
28107       && !(decl && decl == current_function_decl))
28108     {
28109       function_args_iterator args_iter;
28110       tree type;
28111       int nvreg = 0;
28112 
28113       /* Functions with vector parameters are required to have a
28114 	 prototype, so the argument type info must be available
28115 	 here.  */
28116       FOREACH_FUNCTION_ARGS(fntype, type, args_iter)
28117 	if (TREE_CODE (type) == VECTOR_TYPE
28118 	    && ALTIVEC_OR_VSX_VECTOR_MODE (TYPE_MODE (type)))
28119 	  nvreg++;
28120 
28121       FOREACH_FUNCTION_ARGS(TREE_TYPE (current_function_decl), type, args_iter)
28122 	if (TREE_CODE (type) == VECTOR_TYPE
28123 	    && ALTIVEC_OR_VSX_VECTOR_MODE (TYPE_MODE (type)))
28124 	  nvreg--;
28125 
28126       if (nvreg > 0)
28127 	return false;
28128     }
28129 
28130   /* Under the AIX or ELFv2 ABIs we can't allow calls to non-local
28131      functions, because the callee may have a different TOC pointer to
28132      the caller and there's no way to ensure we restore the TOC when
28133      we return.  With the secure-plt SYSV ABI we can't make non-local
28134      calls when -fpic/PIC because the plt call stubs use r30.  */
28135   if (DEFAULT_ABI == ABI_DARWIN
28136       || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
28137 	  && decl
28138 	  && !DECL_EXTERNAL (decl)
28139 	  && !DECL_WEAK (decl)
28140 	  && (*targetm.binds_local_p) (decl))
28141       || (DEFAULT_ABI == ABI_V4
28142 	  && (!TARGET_SECURE_PLT
28143 	      || !flag_pic
28144 	      || (decl
28145 		  && (*targetm.binds_local_p) (decl)))))
28146     {
28147       tree attr_list = TYPE_ATTRIBUTES (fntype);
28148 
28149       if (!lookup_attribute ("longcall", attr_list)
28150 	  || lookup_attribute ("shortcall", attr_list))
28151 	return true;
28152     }
28153 
28154   return false;
28155 }
28156 
28157 static int
rs6000_ra_ever_killed(void)28158 rs6000_ra_ever_killed (void)
28159 {
28160   rtx_insn *top;
28161   rtx reg;
28162   rtx_insn *insn;
28163 
28164   if (cfun->is_thunk)
28165     return 0;
28166 
28167   if (cfun->machine->lr_save_state)
28168     return cfun->machine->lr_save_state - 1;
28169 
28170   /* regs_ever_live has LR marked as used if any sibcalls are present,
28171      but this should not force saving and restoring in the
28172      pro/epilogue.  Likewise, reg_set_between_p thinks a sibcall
28173      clobbers LR, so that is inappropriate.  */
28174 
28175   /* Also, the prologue can generate a store into LR that
28176      doesn't really count, like this:
28177 
28178         move LR->R0
28179         bcl to set PIC register
28180         move LR->R31
28181         move R0->LR
28182 
28183      When we're called from the epilogue, we need to avoid counting
28184      this as a store.  */
28185 
28186   push_topmost_sequence ();
28187   top = get_insns ();
28188   pop_topmost_sequence ();
28189   reg = gen_rtx_REG (Pmode, LR_REGNO);
28190 
28191   for (insn = NEXT_INSN (top); insn != NULL_RTX; insn = NEXT_INSN (insn))
28192     {
28193       if (INSN_P (insn))
28194 	{
28195 	  if (CALL_P (insn))
28196 	    {
28197 	      if (!SIBLING_CALL_P (insn))
28198 		return 1;
28199 	    }
28200 	  else if (find_regno_note (insn, REG_INC, LR_REGNO))
28201 	    return 1;
28202 	  else if (set_of (reg, insn) != NULL_RTX
28203 		   && !prologue_epilogue_contains (insn))
28204 	    return 1;
28205     	}
28206     }
28207   return 0;
28208 }
28209 
28210 /* Emit instructions needed to load the TOC register.
28211    This is only needed when TARGET_TOC, TARGET_MINIMAL_TOC, and there is
28212    a constant pool; or for SVR4 -fpic.  */
28213 
28214 void
rs6000_emit_load_toc_table(int fromprolog)28215 rs6000_emit_load_toc_table (int fromprolog)
28216 {
28217   rtx dest;
28218   dest = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
28219 
28220   if (TARGET_ELF && TARGET_SECURE_PLT && DEFAULT_ABI == ABI_V4 && flag_pic)
28221     {
28222       char buf[30];
28223       rtx lab, tmp1, tmp2, got;
28224 
28225       lab = gen_label_rtx ();
28226       ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (lab));
28227       lab = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
28228       if (flag_pic == 2)
28229 	{
28230 	  got = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (toc_label_name));
28231 	  need_toc_init = 1;
28232 	}
28233       else
28234 	got = rs6000_got_sym ();
28235       tmp1 = tmp2 = dest;
28236       if (!fromprolog)
28237 	{
28238 	  tmp1 = gen_reg_rtx (Pmode);
28239 	  tmp2 = gen_reg_rtx (Pmode);
28240 	}
28241       emit_insn (gen_load_toc_v4_PIC_1 (lab));
28242       emit_move_insn (tmp1, gen_rtx_REG (Pmode, LR_REGNO));
28243       emit_insn (gen_load_toc_v4_PIC_3b (tmp2, tmp1, got, lab));
28244       emit_insn (gen_load_toc_v4_PIC_3c (dest, tmp2, got, lab));
28245     }
28246   else if (TARGET_ELF && DEFAULT_ABI == ABI_V4 && flag_pic == 1)
28247     {
28248       emit_insn (gen_load_toc_v4_pic_si ());
28249       emit_move_insn (dest, gen_rtx_REG (Pmode, LR_REGNO));
28250     }
28251   else if (TARGET_ELF && DEFAULT_ABI == ABI_V4 && flag_pic == 2)
28252     {
28253       char buf[30];
28254       rtx temp0 = (fromprolog
28255 		   ? gen_rtx_REG (Pmode, 0)
28256 		   : gen_reg_rtx (Pmode));
28257 
28258       if (fromprolog)
28259 	{
28260 	  rtx symF, symL;
28261 
28262 	  ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
28263 	  symF = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
28264 
28265 	  ASM_GENERATE_INTERNAL_LABEL (buf, "LCL", rs6000_pic_labelno);
28266 	  symL = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
28267 
28268 	  emit_insn (gen_load_toc_v4_PIC_1 (symF));
28269 	  emit_move_insn (dest, gen_rtx_REG (Pmode, LR_REGNO));
28270 	  emit_insn (gen_load_toc_v4_PIC_2 (temp0, dest, symL, symF));
28271 	}
28272       else
28273 	{
28274 	  rtx tocsym, lab;
28275 
28276 	  tocsym = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (toc_label_name));
28277 	  need_toc_init = 1;
28278 	  lab = gen_label_rtx ();
28279 	  emit_insn (gen_load_toc_v4_PIC_1b (tocsym, lab));
28280 	  emit_move_insn (dest, gen_rtx_REG (Pmode, LR_REGNO));
28281 	  if (TARGET_LINK_STACK)
28282 	    emit_insn (gen_addsi3 (dest, dest, GEN_INT (4)));
28283 	  emit_move_insn (temp0, gen_rtx_MEM (Pmode, dest));
28284 	}
28285       emit_insn (gen_addsi3 (dest, temp0, dest));
28286     }
28287   else if (TARGET_ELF && !TARGET_AIX && flag_pic == 0 && TARGET_MINIMAL_TOC)
28288     {
28289       /* This is for AIX code running in non-PIC ELF32.  */
28290       rtx realsym = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (toc_label_name));
28291 
28292       need_toc_init = 1;
28293       emit_insn (gen_elf_high (dest, realsym));
28294       emit_insn (gen_elf_low (dest, dest, realsym));
28295     }
28296   else
28297     {
28298       gcc_assert (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2);
28299 
28300       if (TARGET_32BIT)
28301 	emit_insn (gen_load_toc_aix_si (dest));
28302       else
28303 	emit_insn (gen_load_toc_aix_di (dest));
28304     }
28305 }
28306 
28307 /* Emit instructions to restore the link register after determining where
28308    its value has been stored.  */
28309 
28310 void
rs6000_emit_eh_reg_restore(rtx source,rtx scratch)28311 rs6000_emit_eh_reg_restore (rtx source, rtx scratch)
28312 {
28313   rs6000_stack_t *info = rs6000_stack_info ();
28314   rtx operands[2];
28315 
28316   operands[0] = source;
28317   operands[1] = scratch;
28318 
28319   if (info->lr_save_p)
28320     {
28321       rtx frame_rtx = stack_pointer_rtx;
28322       HOST_WIDE_INT sp_offset = 0;
28323       rtx tmp;
28324 
28325       if (frame_pointer_needed
28326 	  || cfun->calls_alloca
28327 	  || info->total_size > 32767)
28328 	{
28329 	  tmp = gen_frame_mem (Pmode, frame_rtx);
28330 	  emit_move_insn (operands[1], tmp);
28331 	  frame_rtx = operands[1];
28332 	}
28333       else if (info->push_p)
28334 	sp_offset = info->total_size;
28335 
28336       tmp = plus_constant (Pmode, frame_rtx,
28337 			   info->lr_save_offset + sp_offset);
28338       tmp = gen_frame_mem (Pmode, tmp);
28339       emit_move_insn (tmp, operands[0]);
28340     }
28341   else
28342     emit_move_insn (gen_rtx_REG (Pmode, LR_REGNO), operands[0]);
28343 
28344   /* Freeze lr_save_p.  We've just emitted rtl that depends on the
28345      state of lr_save_p so any change from here on would be a bug.  In
28346      particular, stop rs6000_ra_ever_killed from considering the SET
28347      of lr we may have added just above.  */
28348   cfun->machine->lr_save_state = info->lr_save_p + 1;
28349 }
28350 
28351 static GTY(()) alias_set_type set = -1;
28352 
28353 alias_set_type
get_TOC_alias_set(void)28354 get_TOC_alias_set (void)
28355 {
28356   if (set == -1)
28357     set = new_alias_set ();
28358   return set;
28359 }
28360 
28361 /* This returns nonzero if the current function uses the TOC.  This is
28362    determined by the presence of (use (unspec ... UNSPEC_TOC)), which
28363    is generated by the ABI_V4 load_toc_* patterns.  */
28364 #if TARGET_ELF
28365 static int
uses_TOC(void)28366 uses_TOC (void)
28367 {
28368   rtx_insn *insn;
28369 
28370   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
28371     if (INSN_P (insn))
28372       {
28373 	rtx pat = PATTERN (insn);
28374 	int i;
28375 
28376 	if (GET_CODE (pat) == PARALLEL)
28377 	  for (i = 0; i < XVECLEN (pat, 0); i++)
28378 	    {
28379 	      rtx sub = XVECEXP (pat, 0, i);
28380 	      if (GET_CODE (sub) == USE)
28381 		{
28382 		  sub = XEXP (sub, 0);
28383 		  if (GET_CODE (sub) == UNSPEC
28384 		      && XINT (sub, 1) == UNSPEC_TOC)
28385 		    return 1;
28386 		}
28387 	    }
28388       }
28389   return 0;
28390 }
28391 #endif
28392 
28393 rtx
create_TOC_reference(rtx symbol,rtx largetoc_reg)28394 create_TOC_reference (rtx symbol, rtx largetoc_reg)
28395 {
28396   rtx tocrel, tocreg, hi;
28397 
28398   if (TARGET_DEBUG_ADDR)
28399     {
28400       if (GET_CODE (symbol) == SYMBOL_REF)
28401 	fprintf (stderr, "\ncreate_TOC_reference, (symbol_ref %s)\n",
28402 		 XSTR (symbol, 0));
28403       else
28404 	{
28405 	  fprintf (stderr, "\ncreate_TOC_reference, code %s:\n",
28406 		   GET_RTX_NAME (GET_CODE (symbol)));
28407 	  debug_rtx (symbol);
28408 	}
28409     }
28410 
28411   if (!can_create_pseudo_p ())
28412     df_set_regs_ever_live (TOC_REGISTER, true);
28413 
28414   tocreg = gen_rtx_REG (Pmode, TOC_REGISTER);
28415   tocrel = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, symbol, tocreg), UNSPEC_TOCREL);
28416   if (TARGET_CMODEL == CMODEL_SMALL || can_create_pseudo_p ())
28417     return tocrel;
28418 
28419   hi = gen_rtx_HIGH (Pmode, copy_rtx (tocrel));
28420   if (largetoc_reg != NULL)
28421     {
28422       emit_move_insn (largetoc_reg, hi);
28423       hi = largetoc_reg;
28424     }
28425   return gen_rtx_LO_SUM (Pmode, hi, tocrel);
28426 }
28427 
28428 /* Issue assembly directives that create a reference to the given DWARF
28429    FRAME_TABLE_LABEL from the current function section.  */
28430 void
rs6000_aix_asm_output_dwarf_table_ref(char * frame_table_label)28431 rs6000_aix_asm_output_dwarf_table_ref (char * frame_table_label)
28432 {
28433   fprintf (asm_out_file, "\t.ref %s\n",
28434 	   (* targetm.strip_name_encoding) (frame_table_label));
28435 }
28436 
28437 /* This ties together stack memory (MEM with an alias set of frame_alias_set)
28438    and the change to the stack pointer.  */
28439 
28440 static void
rs6000_emit_stack_tie(rtx fp,bool hard_frame_needed)28441 rs6000_emit_stack_tie (rtx fp, bool hard_frame_needed)
28442 {
28443   rtvec p;
28444   int i;
28445   rtx regs[3];
28446 
28447   i = 0;
28448   regs[i++] = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
28449   if (hard_frame_needed)
28450     regs[i++] = gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM);
28451   if (!(REGNO (fp) == STACK_POINTER_REGNUM
28452 	|| (hard_frame_needed
28453 	    && REGNO (fp) == HARD_FRAME_POINTER_REGNUM)))
28454     regs[i++] = fp;
28455 
28456   p = rtvec_alloc (i);
28457   while (--i >= 0)
28458     {
28459       rtx mem = gen_frame_mem (BLKmode, regs[i]);
28460       RTVEC_ELT (p, i) = gen_rtx_SET (mem, const0_rtx);
28461     }
28462 
28463   emit_insn (gen_stack_tie (gen_rtx_PARALLEL (VOIDmode, p)));
28464 }
28465 
28466 /* Emit the correct code for allocating stack space, as insns.
28467    If COPY_REG, make sure a copy of the old frame is left there.
28468    The generated code may use hard register 0 as a temporary.  */
28469 
28470 static rtx_insn *
rs6000_emit_allocate_stack(HOST_WIDE_INT size,rtx copy_reg,int copy_off)28471 rs6000_emit_allocate_stack (HOST_WIDE_INT size, rtx copy_reg, int copy_off)
28472 {
28473   rtx_insn *insn;
28474   rtx stack_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
28475   rtx tmp_reg = gen_rtx_REG (Pmode, 0);
28476   rtx todec = gen_int_mode (-size, Pmode);
28477   rtx par, set, mem;
28478 
28479   if (INTVAL (todec) != -size)
28480     {
28481       warning (0, "stack frame too large");
28482       emit_insn (gen_trap ());
28483       return 0;
28484     }
28485 
28486   if (crtl->limit_stack)
28487     {
28488       if (REG_P (stack_limit_rtx)
28489 	  && REGNO (stack_limit_rtx) > 1
28490 	  && REGNO (stack_limit_rtx) <= 31)
28491 	{
28492 	  emit_insn (gen_add3_insn (tmp_reg, stack_limit_rtx, GEN_INT (size)));
28493 	  emit_insn (gen_cond_trap (LTU, stack_reg, tmp_reg,
28494 				    const0_rtx));
28495 	}
28496       else if (GET_CODE (stack_limit_rtx) == SYMBOL_REF
28497 	       && TARGET_32BIT
28498 	       && DEFAULT_ABI == ABI_V4
28499 	       && !flag_pic)
28500 	{
28501 	  rtx toload = gen_rtx_CONST (VOIDmode,
28502 				      gen_rtx_PLUS (Pmode,
28503 						    stack_limit_rtx,
28504 						    GEN_INT (size)));
28505 
28506 	  emit_insn (gen_elf_high (tmp_reg, toload));
28507 	  emit_insn (gen_elf_low (tmp_reg, tmp_reg, toload));
28508 	  emit_insn (gen_cond_trap (LTU, stack_reg, tmp_reg,
28509 				    const0_rtx));
28510 	}
28511       else
28512 	warning (0, "stack limit expression is not supported");
28513     }
28514 
28515   if (copy_reg)
28516     {
28517       if (copy_off != 0)
28518 	emit_insn (gen_add3_insn (copy_reg, stack_reg, GEN_INT (copy_off)));
28519       else
28520 	emit_move_insn (copy_reg, stack_reg);
28521     }
28522 
28523   if (size > 32767)
28524     {
28525       /* Need a note here so that try_split doesn't get confused.  */
28526       if (get_last_insn () == NULL_RTX)
28527 	emit_note (NOTE_INSN_DELETED);
28528       insn = emit_move_insn (tmp_reg, todec);
28529       try_split (PATTERN (insn), insn, 0);
28530       todec = tmp_reg;
28531     }
28532 
28533   insn = emit_insn (TARGET_32BIT
28534 		    ? gen_movsi_update_stack (stack_reg, stack_reg,
28535 					      todec, stack_reg)
28536 		    : gen_movdi_di_update_stack (stack_reg, stack_reg,
28537 						 todec, stack_reg));
28538   /* Since we didn't use gen_frame_mem to generate the MEM, grab
28539      it now and set the alias set/attributes. The above gen_*_update
28540      calls will generate a PARALLEL with the MEM set being the first
28541      operation. */
28542   par = PATTERN (insn);
28543   gcc_assert (GET_CODE (par) == PARALLEL);
28544   set = XVECEXP (par, 0, 0);
28545   gcc_assert (GET_CODE (set) == SET);
28546   mem = SET_DEST (set);
28547   gcc_assert (MEM_P (mem));
28548   MEM_NOTRAP_P (mem) = 1;
28549   set_mem_alias_set (mem, get_frame_alias_set ());
28550 
28551   RTX_FRAME_RELATED_P (insn) = 1;
28552   add_reg_note (insn, REG_FRAME_RELATED_EXPR,
28553 		gen_rtx_SET (stack_reg, gen_rtx_PLUS (Pmode, stack_reg,
28554 						      GEN_INT (-size))));
28555   return insn;
28556 }
28557 
28558 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
28559 
28560 #if PROBE_INTERVAL > 32768
28561 #error Cannot use indexed addressing mode for stack probing
28562 #endif
28563 
28564 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
28565    inclusive.  These are offsets from the current stack pointer.  */
28566 
28567 static void
rs6000_emit_probe_stack_range(HOST_WIDE_INT first,HOST_WIDE_INT size)28568 rs6000_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
28569 {
28570   /* See if we have a constant small number of probes to generate.  If so,
28571      that's the easy case.  */
28572   if (first + size <= 32768)
28573     {
28574       HOST_WIDE_INT i;
28575 
28576       /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
28577 	 it exceeds SIZE.  If only one probe is needed, this will not
28578 	 generate any code.  Then probe at FIRST + SIZE.  */
28579       for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
28580 	emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
28581 					 -(first + i)));
28582 
28583       emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
28584 				       -(first + size)));
28585     }
28586 
28587   /* Otherwise, do the same as above, but in a loop.  Note that we must be
28588      extra careful with variables wrapping around because we might be at
28589      the very top (or the very bottom) of the address space and we have
28590      to be able to handle this case properly; in particular, we use an
28591      equality test for the loop condition.  */
28592   else
28593     {
28594       HOST_WIDE_INT rounded_size;
28595       rtx r12 = gen_rtx_REG (Pmode, 12);
28596       rtx r0 = gen_rtx_REG (Pmode, 0);
28597 
28598       /* Sanity check for the addressing mode we're going to use.  */
28599       gcc_assert (first <= 32768);
28600 
28601       /* Step 1: round SIZE to the previous multiple of the interval.  */
28602 
28603       rounded_size = ROUND_DOWN (size, PROBE_INTERVAL);
28604 
28605 
28606       /* Step 2: compute initial and final value of the loop counter.  */
28607 
28608       /* TEST_ADDR = SP + FIRST.  */
28609       emit_insn (gen_rtx_SET (r12, plus_constant (Pmode, stack_pointer_rtx,
28610 						  -first)));
28611 
28612       /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE.  */
28613       if (rounded_size > 32768)
28614 	{
28615 	  emit_move_insn (r0, GEN_INT (-rounded_size));
28616 	  emit_insn (gen_rtx_SET (r0, gen_rtx_PLUS (Pmode, r12, r0)));
28617 	}
28618       else
28619 	emit_insn (gen_rtx_SET (r0, plus_constant (Pmode, r12,
28620 						   -rounded_size)));
28621 
28622 
28623       /* Step 3: the loop
28624 
28625 	 do
28626 	   {
28627 	     TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
28628 	     probe at TEST_ADDR
28629 	   }
28630 	 while (TEST_ADDR != LAST_ADDR)
28631 
28632 	 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
28633 	 until it is equal to ROUNDED_SIZE.  */
28634 
28635       if (TARGET_64BIT)
28636 	emit_insn (gen_probe_stack_rangedi (r12, r12, r0));
28637       else
28638 	emit_insn (gen_probe_stack_rangesi (r12, r12, r0));
28639 
28640 
28641       /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
28642 	 that SIZE is equal to ROUNDED_SIZE.  */
28643 
28644       if (size != rounded_size)
28645 	emit_stack_probe (plus_constant (Pmode, r12, rounded_size - size));
28646     }
28647 }
28648 
28649 /* Probe a range of stack addresses from REG1 to REG2 inclusive.  These are
28650    absolute addresses.  */
28651 
28652 const char *
output_probe_stack_range(rtx reg1,rtx reg2)28653 output_probe_stack_range (rtx reg1, rtx reg2)
28654 {
28655   static int labelno = 0;
28656   char loop_lab[32];
28657   rtx xops[2];
28658 
28659   ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
28660 
28661   /* Loop.  */
28662   ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
28663 
28664   /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL.  */
28665   xops[0] = reg1;
28666   xops[1] = GEN_INT (-PROBE_INTERVAL);
28667   output_asm_insn ("addi %0,%0,%1", xops);
28668 
28669   /* Probe at TEST_ADDR.  */
28670   xops[1] = gen_rtx_REG (Pmode, 0);
28671   output_asm_insn ("stw %1,0(%0)", xops);
28672 
28673   /* Test if TEST_ADDR == LAST_ADDR.  */
28674   xops[1] = reg2;
28675   if (TARGET_64BIT)
28676     output_asm_insn ("cmpd 0,%0,%1", xops);
28677   else
28678     output_asm_insn ("cmpw 0,%0,%1", xops);
28679 
28680   /* Branch.  */
28681   fputs ("\tbne 0,", asm_out_file);
28682   assemble_name_raw (asm_out_file, loop_lab);
28683   fputc ('\n', asm_out_file);
28684 
28685   return "";
28686 }
28687 
28688 /* Add to 'insn' a note which is PATTERN (INSN) but with REG replaced
28689    with (plus:P (reg 1) VAL), and with REG2 replaced with REPL2 if REG2
28690    is not NULL.  It would be nice if dwarf2out_frame_debug_expr could
28691    deduce these equivalences by itself so it wasn't necessary to hold
28692    its hand so much.  Don't be tempted to always supply d2_f_d_e with
28693    the actual cfa register, ie. r31 when we are using a hard frame
28694    pointer.  That fails when saving regs off r1, and sched moves the
28695    r31 setup past the reg saves.  */
28696 
28697 static rtx_insn *
rs6000_frame_related(rtx_insn * insn,rtx reg,HOST_WIDE_INT val,rtx reg2,rtx repl2)28698 rs6000_frame_related (rtx_insn *insn, rtx reg, HOST_WIDE_INT val,
28699 		      rtx reg2, rtx repl2)
28700 {
28701   rtx repl;
28702 
28703   if (REGNO (reg) == STACK_POINTER_REGNUM)
28704     {
28705       gcc_checking_assert (val == 0);
28706       repl = NULL_RTX;
28707     }
28708   else
28709     repl = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, STACK_POINTER_REGNUM),
28710 			 GEN_INT (val));
28711 
28712   rtx pat = PATTERN (insn);
28713   if (!repl && !reg2)
28714     {
28715       /* No need for any replacement.  Just set RTX_FRAME_RELATED_P.  */
28716       if (GET_CODE (pat) == PARALLEL)
28717 	for (int i = 0; i < XVECLEN (pat, 0); i++)
28718 	  if (GET_CODE (XVECEXP (pat, 0, i)) == SET)
28719 	    {
28720 	      rtx set = XVECEXP (pat, 0, i);
28721 
28722 	      /* If this PARALLEL has been emitted for out-of-line
28723 		 register save functions, or store multiple, then omit
28724 		 eh_frame info for any user-defined global regs.  If
28725 		 eh_frame info is supplied, frame unwinding will
28726 		 restore a user reg.  */
28727 	      if (!REG_P (SET_SRC (set))
28728 		  || !fixed_reg_p (REGNO (SET_SRC (set))))
28729 		RTX_FRAME_RELATED_P (set) = 1;
28730 	    }
28731       RTX_FRAME_RELATED_P (insn) = 1;
28732       return insn;
28733     }
28734 
28735   /* We expect that 'pat' is either a SET or a PARALLEL containing
28736      SETs (and possibly other stuff).  In a PARALLEL, all the SETs
28737      are important so they all have to be marked RTX_FRAME_RELATED_P.
28738      Call simplify_replace_rtx on the SETs rather than the whole insn
28739      so as to leave the other stuff alone (for example USE of r12).  */
28740 
28741   set_used_flags (pat);
28742   if (GET_CODE (pat) == SET)
28743     {
28744       if (repl)
28745 	pat = simplify_replace_rtx (pat, reg, repl);
28746       if (reg2)
28747 	pat = simplify_replace_rtx (pat, reg2, repl2);
28748     }
28749   else if (GET_CODE (pat) == PARALLEL)
28750     {
28751       pat = shallow_copy_rtx (pat);
28752       XVEC (pat, 0) = shallow_copy_rtvec (XVEC (pat, 0));
28753 
28754       for (int i = 0; i < XVECLEN (pat, 0); i++)
28755 	if (GET_CODE (XVECEXP (pat, 0, i)) == SET)
28756 	  {
28757 	    rtx set = XVECEXP (pat, 0, i);
28758 
28759 	    if (repl)
28760 	      set = simplify_replace_rtx (set, reg, repl);
28761 	    if (reg2)
28762 	      set = simplify_replace_rtx (set, reg2, repl2);
28763 	    XVECEXP (pat, 0, i) = set;
28764 
28765 	    /* Omit eh_frame info for any user-defined global regs.  */
28766 	    if (!REG_P (SET_SRC (set))
28767 		|| !fixed_reg_p (REGNO (SET_SRC (set))))
28768 	      RTX_FRAME_RELATED_P (set) = 1;
28769 	  }
28770     }
28771   else
28772     gcc_unreachable ();
28773 
28774   RTX_FRAME_RELATED_P (insn) = 1;
28775   add_reg_note (insn, REG_FRAME_RELATED_EXPR, copy_rtx_if_shared (pat));
28776 
28777   return insn;
28778 }
28779 
28780 /* Returns an insn that has a vrsave set operation with the
28781    appropriate CLOBBERs.  */
28782 
28783 static rtx
generate_set_vrsave(rtx reg,rs6000_stack_t * info,int epiloguep)28784 generate_set_vrsave (rtx reg, rs6000_stack_t *info, int epiloguep)
28785 {
28786   int nclobs, i;
28787   rtx insn, clobs[TOTAL_ALTIVEC_REGS + 1];
28788   rtx vrsave = gen_rtx_REG (SImode, VRSAVE_REGNO);
28789 
28790   clobs[0]
28791     = gen_rtx_SET (vrsave,
28792 		   gen_rtx_UNSPEC_VOLATILE (SImode,
28793 					    gen_rtvec (2, reg, vrsave),
28794 					    UNSPECV_SET_VRSAVE));
28795 
28796   nclobs = 1;
28797 
28798   /* We need to clobber the registers in the mask so the scheduler
28799      does not move sets to VRSAVE before sets of AltiVec registers.
28800 
28801      However, if the function receives nonlocal gotos, reload will set
28802      all call saved registers live.  We will end up with:
28803 
28804      	(set (reg 999) (mem))
28805 	(parallel [ (set (reg vrsave) (unspec blah))
28806 		    (clobber (reg 999))])
28807 
28808      The clobber will cause the store into reg 999 to be dead, and
28809      flow will attempt to delete an epilogue insn.  In this case, we
28810      need an unspec use/set of the register.  */
28811 
28812   for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
28813     if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
28814       {
28815 	if (!epiloguep || call_used_regs [i])
28816 	  clobs[nclobs++] = gen_rtx_CLOBBER (VOIDmode,
28817 					     gen_rtx_REG (V4SImode, i));
28818 	else
28819 	  {
28820 	    rtx reg = gen_rtx_REG (V4SImode, i);
28821 
28822 	    clobs[nclobs++]
28823 	      = gen_rtx_SET (reg,
28824 			     gen_rtx_UNSPEC (V4SImode,
28825 					     gen_rtvec (1, reg), 27));
28826 	  }
28827       }
28828 
28829   insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nclobs));
28830 
28831   for (i = 0; i < nclobs; ++i)
28832     XVECEXP (insn, 0, i) = clobs[i];
28833 
28834   return insn;
28835 }
28836 
28837 static rtx
gen_frame_set(rtx reg,rtx frame_reg,int offset,bool store)28838 gen_frame_set (rtx reg, rtx frame_reg, int offset, bool store)
28839 {
28840   rtx addr, mem;
28841 
28842   addr = gen_rtx_PLUS (Pmode, frame_reg, GEN_INT (offset));
28843   mem = gen_frame_mem (GET_MODE (reg), addr);
28844   return gen_rtx_SET (store ? mem : reg, store ? reg : mem);
28845 }
28846 
28847 static rtx
gen_frame_load(rtx reg,rtx frame_reg,int offset)28848 gen_frame_load (rtx reg, rtx frame_reg, int offset)
28849 {
28850   return gen_frame_set (reg, frame_reg, offset, false);
28851 }
28852 
28853 static rtx
gen_frame_store(rtx reg,rtx frame_reg,int offset)28854 gen_frame_store (rtx reg, rtx frame_reg, int offset)
28855 {
28856   return gen_frame_set (reg, frame_reg, offset, true);
28857 }
28858 
28859 /* Save a register into the frame, and emit RTX_FRAME_RELATED_P notes.
28860    Save REGNO into [FRAME_REG + OFFSET] in mode MODE.  */
28861 
28862 static rtx_insn *
emit_frame_save(rtx frame_reg,machine_mode mode,unsigned int regno,int offset,HOST_WIDE_INT frame_reg_to_sp)28863 emit_frame_save (rtx frame_reg, machine_mode mode,
28864 		 unsigned int regno, int offset, HOST_WIDE_INT frame_reg_to_sp)
28865 {
28866   rtx reg;
28867 
28868   /* Some cases that need register indexed addressing.  */
28869   gcc_checking_assert (!((TARGET_ALTIVEC_ABI && ALTIVEC_VECTOR_MODE (mode))
28870 			 || (TARGET_VSX && ALTIVEC_OR_VSX_VECTOR_MODE (mode))
28871 			 || (TARGET_E500_DOUBLE && mode == DFmode)
28872 			 || (TARGET_SPE_ABI
28873 			     && SPE_VECTOR_MODE (mode)
28874 			     && !SPE_CONST_OFFSET_OK (offset))));
28875 
28876   reg = gen_rtx_REG (mode, regno);
28877   rtx_insn *insn = emit_insn (gen_frame_store (reg, frame_reg, offset));
28878   return rs6000_frame_related (insn, frame_reg, frame_reg_to_sp,
28879 			       NULL_RTX, NULL_RTX);
28880 }
28881 
28882 /* Emit an offset memory reference suitable for a frame store, while
28883    converting to a valid addressing mode.  */
28884 
28885 static rtx
gen_frame_mem_offset(machine_mode mode,rtx reg,int offset)28886 gen_frame_mem_offset (machine_mode mode, rtx reg, int offset)
28887 {
28888   rtx int_rtx, offset_rtx;
28889 
28890   int_rtx = GEN_INT (offset);
28891 
28892   if ((TARGET_SPE_ABI && SPE_VECTOR_MODE (mode) && !SPE_CONST_OFFSET_OK (offset))
28893       || (TARGET_E500_DOUBLE && mode == DFmode))
28894     {
28895       offset_rtx = gen_rtx_REG (Pmode, FIXED_SCRATCH);
28896       emit_move_insn (offset_rtx, int_rtx);
28897     }
28898   else
28899     offset_rtx = int_rtx;
28900 
28901   return gen_frame_mem (mode, gen_rtx_PLUS (Pmode, reg, offset_rtx));
28902 }
28903 
28904 #ifndef TARGET_FIX_AND_CONTINUE
28905 #define TARGET_FIX_AND_CONTINUE 0
28906 #endif
28907 
28908 /* It's really GPR 13 or 14, FPR 14 and VR 20.  We need the smallest.  */
28909 #define FIRST_SAVRES_REGISTER FIRST_SAVED_GP_REGNO
28910 #define LAST_SAVRES_REGISTER 31
28911 #define N_SAVRES_REGISTERS (LAST_SAVRES_REGISTER - FIRST_SAVRES_REGISTER + 1)
28912 
28913 enum {
28914   SAVRES_LR = 0x1,
28915   SAVRES_SAVE = 0x2,
28916   SAVRES_REG = 0x0c,
28917   SAVRES_GPR = 0,
28918   SAVRES_FPR = 4,
28919   SAVRES_VR  = 8
28920 };
28921 
28922 static GTY(()) rtx savres_routine_syms[N_SAVRES_REGISTERS][12];
28923 
28924 /* Temporary holding space for an out-of-line register save/restore
28925    routine name.  */
28926 static char savres_routine_name[30];
28927 
28928 /* Return the name for an out-of-line register save/restore routine.
28929    We are saving/restoring GPRs if GPR is true.  */
28930 
28931 static char *
rs6000_savres_routine_name(rs6000_stack_t * info,int regno,int sel)28932 rs6000_savres_routine_name (rs6000_stack_t *info, int regno, int sel)
28933 {
28934   const char *prefix = "";
28935   const char *suffix = "";
28936 
28937   /* Different targets are supposed to define
28938      {SAVE,RESTORE}_FP_{PREFIX,SUFFIX} with the idea that the needed
28939      routine name could be defined with:
28940 
28941      sprintf (name, "%s%d%s", SAVE_FP_PREFIX, regno, SAVE_FP_SUFFIX)
28942 
28943      This is a nice idea in practice, but in reality, things are
28944      complicated in several ways:
28945 
28946      - ELF targets have save/restore routines for GPRs.
28947 
28948      - SPE targets use different prefixes for 32/64-bit registers, and
28949        neither of them fit neatly in the FOO_{PREFIX,SUFFIX} regimen.
28950 
28951      - PPC64 ELF targets have routines for save/restore of GPRs that
28952        differ in what they do with the link register, so having a set
28953        prefix doesn't work.  (We only use one of the save routines at
28954        the moment, though.)
28955 
28956      - PPC32 elf targets have "exit" versions of the restore routines
28957        that restore the link register and can save some extra space.
28958        These require an extra suffix.  (There are also "tail" versions
28959        of the restore routines and "GOT" versions of the save routines,
28960        but we don't generate those at present.  Same problems apply,
28961        though.)
28962 
28963      We deal with all this by synthesizing our own prefix/suffix and
28964      using that for the simple sprintf call shown above.  */
28965   if (TARGET_SPE)
28966     {
28967       /* No floating point saves on the SPE.  */
28968       gcc_assert ((sel & SAVRES_REG) == SAVRES_GPR);
28969 
28970       if ((sel & SAVRES_SAVE))
28971 	prefix = info->spe_64bit_regs_used ? "_save64gpr_" : "_save32gpr_";
28972       else
28973 	prefix = info->spe_64bit_regs_used ? "_rest64gpr_" : "_rest32gpr_";
28974 
28975       if ((sel & SAVRES_LR))
28976 	suffix = "_x";
28977     }
28978   else if (DEFAULT_ABI == ABI_V4)
28979     {
28980       if (TARGET_64BIT)
28981 	goto aix_names;
28982 
28983       if ((sel & SAVRES_REG) == SAVRES_GPR)
28984 	prefix = (sel & SAVRES_SAVE) ? "_savegpr_" : "_restgpr_";
28985       else if ((sel & SAVRES_REG) == SAVRES_FPR)
28986 	prefix = (sel & SAVRES_SAVE) ? "_savefpr_" : "_restfpr_";
28987       else if ((sel & SAVRES_REG) == SAVRES_VR)
28988 	prefix = (sel & SAVRES_SAVE) ? "_savevr_" : "_restvr_";
28989       else
28990 	abort ();
28991 
28992       if ((sel & SAVRES_LR))
28993 	suffix = "_x";
28994     }
28995   else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
28996     {
28997 #if !defined (POWERPC_LINUX) && !defined (POWERPC_FREEBSD)
28998       /* No out-of-line save/restore routines for GPRs on AIX.  */
28999       gcc_assert (!TARGET_AIX || (sel & SAVRES_REG) != SAVRES_GPR);
29000 #endif
29001 
29002     aix_names:
29003       if ((sel & SAVRES_REG) == SAVRES_GPR)
29004 	prefix = ((sel & SAVRES_SAVE)
29005 		  ? ((sel & SAVRES_LR) ? "_savegpr0_" : "_savegpr1_")
29006 		  : ((sel & SAVRES_LR) ? "_restgpr0_" : "_restgpr1_"));
29007       else if ((sel & SAVRES_REG) == SAVRES_FPR)
29008 	{
29009 #if defined (POWERPC_LINUX) || defined (POWERPC_FREEBSD)
29010 	  if ((sel & SAVRES_LR))
29011 	    prefix = ((sel & SAVRES_SAVE) ? "_savefpr_" : "_restfpr_");
29012 	  else
29013 #endif
29014 	    {
29015 	      prefix = (sel & SAVRES_SAVE) ? SAVE_FP_PREFIX : RESTORE_FP_PREFIX;
29016 	      suffix = (sel & SAVRES_SAVE) ? SAVE_FP_SUFFIX : RESTORE_FP_SUFFIX;
29017 	    }
29018 	}
29019       else if ((sel & SAVRES_REG) == SAVRES_VR)
29020 	prefix = (sel & SAVRES_SAVE) ? "_savevr_" : "_restvr_";
29021       else
29022 	abort ();
29023     }
29024 
29025    if (DEFAULT_ABI == ABI_DARWIN)
29026     {
29027       /* The Darwin approach is (slightly) different, in order to be
29028 	 compatible with code generated by the system toolchain.  There is a
29029 	 single symbol for the start of save sequence, and the code here
29030 	 embeds an offset into that code on the basis of the first register
29031 	 to be saved.  */
29032       prefix = (sel & SAVRES_SAVE) ? "save" : "rest" ;
29033       if ((sel & SAVRES_REG) == SAVRES_GPR)
29034 	sprintf (savres_routine_name, "*%sGPR%s%s%.0d ; %s r%d-r31", prefix,
29035 		 ((sel & SAVRES_LR) ? "x" : ""), (regno == 13 ? "" : "+"),
29036 		 (regno - 13) * 4, prefix, regno);
29037       else if ((sel & SAVRES_REG) == SAVRES_FPR)
29038 	sprintf (savres_routine_name, "*%sFP%s%.0d ; %s f%d-f31", prefix,
29039 		 (regno == 14 ? "" : "+"), (regno - 14) * 4, prefix, regno);
29040       else if ((sel & SAVRES_REG) == SAVRES_VR)
29041 	sprintf (savres_routine_name, "*%sVEC%s%.0d ; %s v%d-v31", prefix,
29042 		 (regno == 20 ? "" : "+"), (regno - 20) * 8, prefix, regno);
29043       else
29044 	abort ();
29045     }
29046   else
29047     sprintf (savres_routine_name, "%s%d%s", prefix, regno, suffix);
29048 
29049   return savres_routine_name;
29050 }
29051 
29052 /* Return an RTL SYMBOL_REF for an out-of-line register save/restore routine.
29053    We are saving/restoring GPRs if GPR is true.  */
29054 
29055 static rtx
rs6000_savres_routine_sym(rs6000_stack_t * info,int sel)29056 rs6000_savres_routine_sym (rs6000_stack_t *info, int sel)
29057 {
29058   int regno = ((sel & SAVRES_REG) == SAVRES_GPR
29059 	       ? info->first_gp_reg_save
29060 	       : (sel & SAVRES_REG) == SAVRES_FPR
29061 	       ? info->first_fp_reg_save - 32
29062 	       : (sel & SAVRES_REG) == SAVRES_VR
29063 	       ? info->first_altivec_reg_save - FIRST_ALTIVEC_REGNO
29064 	       : -1);
29065   rtx sym;
29066   int select = sel;
29067 
29068   /* On the SPE, we never have any FPRs, but we do have 32/64-bit
29069      versions of the gpr routines.  */
29070   if (TARGET_SPE_ABI && (sel & SAVRES_REG) == SAVRES_GPR
29071       && info->spe_64bit_regs_used)
29072     select ^= SAVRES_FPR ^ SAVRES_GPR;
29073 
29074   /* Don't generate bogus routine names.  */
29075   gcc_assert (FIRST_SAVRES_REGISTER <= regno
29076 	      && regno <= LAST_SAVRES_REGISTER
29077 	      && select >= 0 && select <= 12);
29078 
29079   sym = savres_routine_syms[regno-FIRST_SAVRES_REGISTER][select];
29080 
29081   if (sym == NULL)
29082     {
29083       char *name;
29084 
29085       name = rs6000_savres_routine_name (info, regno, sel);
29086 
29087       sym = savres_routine_syms[regno-FIRST_SAVRES_REGISTER][select]
29088 	= gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
29089       SYMBOL_REF_FLAGS (sym) |= SYMBOL_FLAG_FUNCTION;
29090     }
29091 
29092   return sym;
29093 }
29094 
29095 /* Emit a sequence of insns, including a stack tie if needed, for
29096    resetting the stack pointer.  If UPDT_REGNO is not 1, then don't
29097    reset the stack pointer, but move the base of the frame into
29098    reg UPDT_REGNO for use by out-of-line register restore routines.  */
29099 
29100 static rtx
rs6000_emit_stack_reset(rs6000_stack_t * info,rtx frame_reg_rtx,HOST_WIDE_INT frame_off,unsigned updt_regno)29101 rs6000_emit_stack_reset (rs6000_stack_t *info,
29102 			 rtx frame_reg_rtx, HOST_WIDE_INT frame_off,
29103 			 unsigned updt_regno)
29104 {
29105   /* If there is nothing to do, don't do anything.  */
29106   if (frame_off == 0 && REGNO (frame_reg_rtx) == updt_regno)
29107     return NULL_RTX;
29108 
29109   rtx updt_reg_rtx = gen_rtx_REG (Pmode, updt_regno);
29110 
29111   /* This blockage is needed so that sched doesn't decide to move
29112      the sp change before the register restores.  */
29113   if (DEFAULT_ABI == ABI_V4
29114       || (TARGET_SPE_ABI
29115 	  && info->spe_64bit_regs_used != 0
29116 	  && info->first_gp_reg_save != 32))
29117     return emit_insn (gen_stack_restore_tie (updt_reg_rtx, frame_reg_rtx,
29118 					     GEN_INT (frame_off)));
29119 
29120   /* If we are restoring registers out-of-line, we will be using the
29121      "exit" variants of the restore routines, which will reset the
29122      stack for us.  But we do need to point updt_reg into the
29123      right place for those routines.  */
29124   if (frame_off != 0)
29125     return emit_insn (gen_add3_insn (updt_reg_rtx,
29126 				     frame_reg_rtx, GEN_INT (frame_off)));
29127   else
29128     return emit_move_insn (updt_reg_rtx, frame_reg_rtx);
29129 
29130   return NULL_RTX;
29131 }
29132 
29133 /* Return the register number used as a pointer by out-of-line
29134    save/restore functions.  */
29135 
29136 static inline unsigned
ptr_regno_for_savres(int sel)29137 ptr_regno_for_savres (int sel)
29138 {
29139   if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
29140     return (sel & SAVRES_REG) == SAVRES_FPR || (sel & SAVRES_LR) ? 1 : 12;
29141   return DEFAULT_ABI == ABI_DARWIN && (sel & SAVRES_REG) == SAVRES_FPR ? 1 : 11;
29142 }
29143 
29144 /* Construct a parallel rtx describing the effect of a call to an
29145    out-of-line register save/restore routine, and emit the insn
29146    or jump_insn as appropriate.  */
29147 
29148 static rtx_insn *
rs6000_emit_savres_rtx(rs6000_stack_t * info,rtx frame_reg_rtx,int save_area_offset,int lr_offset,machine_mode reg_mode,int sel)29149 rs6000_emit_savres_rtx (rs6000_stack_t *info,
29150 			rtx frame_reg_rtx, int save_area_offset, int lr_offset,
29151 			machine_mode reg_mode, int sel)
29152 {
29153   int i;
29154   int offset, start_reg, end_reg, n_regs, use_reg;
29155   int reg_size = GET_MODE_SIZE (reg_mode);
29156   rtx sym;
29157   rtvec p;
29158   rtx par;
29159   rtx_insn *insn;
29160 
29161   offset = 0;
29162   start_reg = ((sel & SAVRES_REG) == SAVRES_GPR
29163 	       ? info->first_gp_reg_save
29164 	       : (sel & SAVRES_REG) == SAVRES_FPR
29165 	       ? info->first_fp_reg_save
29166 	       : (sel & SAVRES_REG) == SAVRES_VR
29167 	       ? info->first_altivec_reg_save
29168 	       : -1);
29169   end_reg = ((sel & SAVRES_REG) == SAVRES_GPR
29170 	     ? 32
29171 	     : (sel & SAVRES_REG) == SAVRES_FPR
29172 	     ? 64
29173 	     : (sel & SAVRES_REG) == SAVRES_VR
29174 	     ? LAST_ALTIVEC_REGNO + 1
29175 	     : -1);
29176   n_regs = end_reg - start_reg;
29177   p = rtvec_alloc (3 + ((sel & SAVRES_LR) ? 1 : 0)
29178 		   + ((sel & SAVRES_REG) == SAVRES_VR ? 1 : 0)
29179 		   + n_regs);
29180 
29181   if (!(sel & SAVRES_SAVE) && (sel & SAVRES_LR))
29182     RTVEC_ELT (p, offset++) = ret_rtx;
29183 
29184   RTVEC_ELT (p, offset++)
29185     = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, LR_REGNO));
29186 
29187   sym = rs6000_savres_routine_sym (info, sel);
29188   RTVEC_ELT (p, offset++) = gen_rtx_USE (VOIDmode, sym);
29189 
29190   use_reg = ptr_regno_for_savres (sel);
29191   if ((sel & SAVRES_REG) == SAVRES_VR)
29192     {
29193       /* Vector regs are saved/restored using [reg+reg] addressing.  */
29194       RTVEC_ELT (p, offset++)
29195 	= gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, use_reg));
29196       RTVEC_ELT (p, offset++)
29197 	= gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, 0));
29198     }
29199   else
29200     RTVEC_ELT (p, offset++)
29201       = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, use_reg));
29202 
29203   for (i = 0; i < end_reg - start_reg; i++)
29204     RTVEC_ELT (p, i + offset)
29205       = gen_frame_set (gen_rtx_REG (reg_mode, start_reg + i),
29206 		       frame_reg_rtx, save_area_offset + reg_size * i,
29207 		       (sel & SAVRES_SAVE) != 0);
29208 
29209   if ((sel & SAVRES_SAVE) && (sel & SAVRES_LR))
29210     RTVEC_ELT (p, i + offset)
29211       = gen_frame_store (gen_rtx_REG (Pmode, 0), frame_reg_rtx, lr_offset);
29212 
29213   par = gen_rtx_PARALLEL (VOIDmode, p);
29214 
29215   if (!(sel & SAVRES_SAVE) && (sel & SAVRES_LR))
29216     {
29217       insn = emit_jump_insn (par);
29218       JUMP_LABEL (insn) = ret_rtx;
29219     }
29220   else
29221     insn = emit_insn (par);
29222   return insn;
29223 }
29224 
29225 /* Emit code to store CR fields that need to be saved into REG.  */
29226 
29227 static void
rs6000_emit_move_from_cr(rtx reg)29228 rs6000_emit_move_from_cr (rtx reg)
29229 {
29230   /* Only the ELFv2 ABI allows storing only selected fields.  */
29231   if (DEFAULT_ABI == ABI_ELFv2 && TARGET_MFCRF)
29232     {
29233       int i, cr_reg[8], count = 0;
29234 
29235       /* Collect CR fields that must be saved.  */
29236       for (i = 0; i < 8; i++)
29237 	if (save_reg_p (CR0_REGNO + i))
29238 	  cr_reg[count++] = i;
29239 
29240       /* If it's just a single one, use mfcrf.  */
29241       if (count == 1)
29242 	{
29243 	  rtvec p = rtvec_alloc (1);
29244 	  rtvec r = rtvec_alloc (2);
29245 	  RTVEC_ELT (r, 0) = gen_rtx_REG (CCmode, CR0_REGNO + cr_reg[0]);
29246 	  RTVEC_ELT (r, 1) = GEN_INT (1 << (7 - cr_reg[0]));
29247 	  RTVEC_ELT (p, 0)
29248 	    = gen_rtx_SET (reg,
29249 			   gen_rtx_UNSPEC (SImode, r, UNSPEC_MOVESI_FROM_CR));
29250 
29251 	  emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
29252 	  return;
29253 	}
29254 
29255       /* ??? It might be better to handle count == 2 / 3 cases here
29256 	 as well, using logical operations to combine the values.  */
29257     }
29258 
29259   emit_insn (gen_movesi_from_cr (reg));
29260 }
29261 
29262 /* Return whether the split-stack arg pointer (r12) is used.  */
29263 
29264 static bool
split_stack_arg_pointer_used_p(void)29265 split_stack_arg_pointer_used_p (void)
29266 {
29267   /* If the pseudo holding the arg pointer is no longer a pseudo,
29268      then the arg pointer is used.  */
29269   if (cfun->machine->split_stack_arg_pointer != NULL_RTX
29270       && (!REG_P (cfun->machine->split_stack_arg_pointer)
29271 	  || (REGNO (cfun->machine->split_stack_arg_pointer)
29272 	      < FIRST_PSEUDO_REGISTER)))
29273     return true;
29274 
29275   /* Unfortunately we also need to do some code scanning, since
29276      r12 may have been substituted for the pseudo.  */
29277   rtx_insn *insn;
29278   basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
29279   FOR_BB_INSNS (bb, insn)
29280     if (NONDEBUG_INSN_P (insn))
29281       {
29282 	/* A call destroys r12.  */
29283 	if (CALL_P (insn))
29284 	  return false;
29285 
29286 	df_ref use;
29287 	FOR_EACH_INSN_USE (use, insn)
29288 	  {
29289 	    rtx x = DF_REF_REG (use);
29290 	    if (REG_P (x) && REGNO (x) == 12)
29291 	      return true;
29292 	  }
29293 	df_ref def;
29294 	FOR_EACH_INSN_DEF (def, insn)
29295 	  {
29296 	    rtx x = DF_REF_REG (def);
29297 	    if (REG_P (x) && REGNO (x) == 12)
29298 	      return false;
29299 	  }
29300       }
29301   return bitmap_bit_p (DF_LR_OUT (bb), 12);
29302 }
29303 
29304 /* Return whether we need to emit an ELFv2 global entry point prologue.  */
29305 
29306 static bool
rs6000_global_entry_point_needed_p(void)29307 rs6000_global_entry_point_needed_p (void)
29308 {
29309   /* Only needed for the ELFv2 ABI.  */
29310   if (DEFAULT_ABI != ABI_ELFv2)
29311     return false;
29312 
29313   /* With -msingle-pic-base, we assume the whole program shares the same
29314      TOC, so no global entry point prologues are needed anywhere.  */
29315   if (TARGET_SINGLE_PIC_BASE)
29316     return false;
29317 
29318   /* Ensure we have a global entry point for thunks.   ??? We could
29319      avoid that if the target routine doesn't need a global entry point,
29320      but we do not know whether this is the case at this point.  */
29321   if (cfun->is_thunk)
29322     return true;
29323 
29324   /* For regular functions, rs6000_emit_prologue sets this flag if the
29325      routine ever uses the TOC pointer.  */
29326   return cfun->machine->r2_setup_needed;
29327 }
29328 
29329 /* Implement TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS.  */
29330 static sbitmap
rs6000_get_separate_components(void)29331 rs6000_get_separate_components (void)
29332 {
29333   rs6000_stack_t *info = rs6000_stack_info ();
29334 
29335   if (WORLD_SAVE_P (info))
29336     return NULL;
29337 
29338   if (TARGET_SPE_ABI)
29339     return NULL;
29340 
29341   gcc_assert (!(info->savres_strategy & SAVE_MULTIPLE)
29342 	      && !(info->savres_strategy & REST_MULTIPLE));
29343 
29344   /* Component 0 is the save/restore of LR (done via GPR0).
29345      Components 13..31 are the save/restore of GPR13..GPR31.
29346      Components 46..63 are the save/restore of FPR14..FPR31.  */
29347 
29348   cfun->machine->n_components = 64;
29349 
29350   sbitmap components = sbitmap_alloc (cfun->machine->n_components);
29351   bitmap_clear (components);
29352 
29353   int reg_size = TARGET_32BIT ? 4 : 8;
29354   int fp_reg_size = 8;
29355 
29356   /* The GPRs we need saved to the frame.  */
29357   if ((info->savres_strategy & SAVE_INLINE_GPRS)
29358       && (info->savres_strategy & REST_INLINE_GPRS))
29359     {
29360       int offset = info->gp_save_offset;
29361       if (info->push_p)
29362 	offset += info->total_size;
29363 
29364       for (unsigned regno = info->first_gp_reg_save; regno < 32; regno++)
29365 	{
29366 	  if (IN_RANGE (offset, -0x8000, 0x7fff)
29367 	      && rs6000_reg_live_or_pic_offset_p (regno))
29368 	    bitmap_set_bit (components, regno);
29369 
29370 	  offset += reg_size;
29371 	}
29372     }
29373 
29374   /* Don't mess with the hard frame pointer.  */
29375   if (frame_pointer_needed)
29376     bitmap_clear_bit (components, HARD_FRAME_POINTER_REGNUM);
29377 
29378   /* Don't mess with the fixed TOC register.  */
29379   if ((TARGET_TOC && TARGET_MINIMAL_TOC)
29380       || (flag_pic == 1 && DEFAULT_ABI == ABI_V4)
29381       || (flag_pic && DEFAULT_ABI == ABI_DARWIN))
29382     bitmap_clear_bit (components, RS6000_PIC_OFFSET_TABLE_REGNUM);
29383 
29384   /* The FPRs we need saved to the frame.  */
29385   if ((info->savres_strategy & SAVE_INLINE_FPRS)
29386       && (info->savres_strategy & REST_INLINE_FPRS))
29387     {
29388       int offset = info->fp_save_offset;
29389       if (info->push_p)
29390 	offset += info->total_size;
29391 
29392       for (unsigned regno = info->first_fp_reg_save; regno < 64; regno++)
29393 	{
29394 	  if (IN_RANGE (offset, -0x8000, 0x7fff) && save_reg_p (regno))
29395 	    bitmap_set_bit (components, regno);
29396 
29397 	  offset += fp_reg_size;
29398 	}
29399     }
29400 
29401   /* Optimize LR save and restore if we can.  This is component 0.  Any
29402      out-of-line register save/restore routines need LR.  */
29403   if (info->lr_save_p
29404       && !(flag_pic && (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN))
29405       && (info->savres_strategy & SAVE_INLINE_GPRS)
29406       && (info->savres_strategy & REST_INLINE_GPRS)
29407       && (info->savres_strategy & SAVE_INLINE_FPRS)
29408       && (info->savres_strategy & REST_INLINE_FPRS)
29409       && (info->savres_strategy & SAVE_INLINE_VRS)
29410       && (info->savres_strategy & REST_INLINE_VRS))
29411     {
29412       int offset = info->lr_save_offset;
29413       if (info->push_p)
29414 	offset += info->total_size;
29415       if (IN_RANGE (offset, -0x8000, 0x7fff))
29416 	bitmap_set_bit (components, 0);
29417     }
29418 
29419   return components;
29420 }
29421 
29422 /* Implement TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB.  */
29423 static sbitmap
rs6000_components_for_bb(basic_block bb)29424 rs6000_components_for_bb (basic_block bb)
29425 {
29426   rs6000_stack_t *info = rs6000_stack_info ();
29427 
29428   bitmap in = DF_LIVE_IN (bb);
29429   bitmap gen = &DF_LIVE_BB_INFO (bb)->gen;
29430   bitmap kill = &DF_LIVE_BB_INFO (bb)->kill;
29431 
29432   sbitmap components = sbitmap_alloc (cfun->machine->n_components);
29433   bitmap_clear (components);
29434 
29435   /* A register is used in a bb if it is in the IN, GEN, or KILL sets.  */
29436 
29437   /* GPRs.  */
29438   for (unsigned regno = info->first_gp_reg_save; regno < 32; regno++)
29439     if (bitmap_bit_p (in, regno)
29440 	|| bitmap_bit_p (gen, regno)
29441 	|| bitmap_bit_p (kill, regno))
29442       bitmap_set_bit (components, regno);
29443 
29444   /* FPRs.  */
29445   for (unsigned regno = info->first_fp_reg_save; regno < 64; regno++)
29446     if (bitmap_bit_p (in, regno)
29447 	|| bitmap_bit_p (gen, regno)
29448 	|| bitmap_bit_p (kill, regno))
29449       bitmap_set_bit (components, regno);
29450 
29451   /* The link register.  */
29452   if (bitmap_bit_p (in, LR_REGNO)
29453       || bitmap_bit_p (gen, LR_REGNO)
29454       || bitmap_bit_p (kill, LR_REGNO))
29455     bitmap_set_bit (components, 0);
29456 
29457   return components;
29458 }
29459 
29460 /* Implement TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS.  */
29461 static void
rs6000_disqualify_components(sbitmap components,edge e,sbitmap edge_components,bool)29462 rs6000_disqualify_components (sbitmap components, edge e,
29463 			      sbitmap edge_components, bool /*is_prologue*/)
29464 {
29465   /* Our LR pro/epilogue code moves LR via R0, so R0 had better not be
29466      live where we want to place that code.  */
29467   if (bitmap_bit_p (edge_components, 0)
29468       && bitmap_bit_p (DF_LIVE_IN (e->dest), 0))
29469     {
29470       if (dump_file)
29471 	fprintf (dump_file, "Disqualifying LR because GPR0 is live "
29472 		 "on entry to bb %d\n", e->dest->index);
29473       bitmap_clear_bit (components, 0);
29474     }
29475 }
29476 
29477 /* Implement TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS.  */
29478 static void
rs6000_emit_prologue_components(sbitmap components)29479 rs6000_emit_prologue_components (sbitmap components)
29480 {
29481   rs6000_stack_t *info = rs6000_stack_info ();
29482   rtx ptr_reg = gen_rtx_REG (Pmode, frame_pointer_needed
29483 			     ? HARD_FRAME_POINTER_REGNUM
29484 			     : STACK_POINTER_REGNUM);
29485 
29486   machine_mode reg_mode = Pmode;
29487   int reg_size = TARGET_32BIT ? 4 : 8;
29488   machine_mode fp_reg_mode = (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
29489 			     ? DFmode : SFmode;
29490   int fp_reg_size = 8;
29491 
29492   /* Prologue for LR.  */
29493   if (bitmap_bit_p (components, 0))
29494     {
29495       rtx reg = gen_rtx_REG (reg_mode, 0);
29496       rtx_insn *insn = emit_move_insn (reg, gen_rtx_REG (reg_mode, LR_REGNO));
29497       RTX_FRAME_RELATED_P (insn) = 1;
29498       add_reg_note (insn, REG_CFA_REGISTER, NULL);
29499 
29500       int offset = info->lr_save_offset;
29501       if (info->push_p)
29502 	offset += info->total_size;
29503 
29504       insn = emit_insn (gen_frame_store (reg, ptr_reg, offset));
29505       RTX_FRAME_RELATED_P (insn) = 1;
29506       rtx lr = gen_rtx_REG (reg_mode, LR_REGNO);
29507       rtx mem = copy_rtx (SET_DEST (single_set (insn)));
29508       add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (mem, lr));
29509     }
29510 
29511   /* Prologue for the GPRs.  */
29512   int offset = info->gp_save_offset;
29513   if (info->push_p)
29514     offset += info->total_size;
29515 
29516   for (int i = info->first_gp_reg_save; i < 32; i++)
29517     {
29518       if (bitmap_bit_p (components, i))
29519 	{
29520 	  rtx reg = gen_rtx_REG (reg_mode, i);
29521 	  rtx_insn *insn = emit_insn (gen_frame_store (reg, ptr_reg, offset));
29522 	  RTX_FRAME_RELATED_P (insn) = 1;
29523 	  rtx set = copy_rtx (single_set (insn));
29524 	  add_reg_note (insn, REG_CFA_OFFSET, set);
29525 	}
29526 
29527       offset += reg_size;
29528     }
29529 
29530   /* Prologue for the FPRs.  */
29531   offset = info->fp_save_offset;
29532   if (info->push_p)
29533     offset += info->total_size;
29534 
29535   for (int i = info->first_fp_reg_save; i < 64; i++)
29536     {
29537       if (bitmap_bit_p (components, i))
29538 	{
29539 	  rtx reg = gen_rtx_REG (fp_reg_mode, i);
29540 	  rtx_insn *insn = emit_insn (gen_frame_store (reg, ptr_reg, offset));
29541 	  RTX_FRAME_RELATED_P (insn) = 1;
29542 	  rtx set = copy_rtx (single_set (insn));
29543 	  add_reg_note (insn, REG_CFA_OFFSET, set);
29544 	}
29545 
29546       offset += fp_reg_size;
29547     }
29548 }
29549 
29550 /* Implement TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS.  */
29551 static void
rs6000_emit_epilogue_components(sbitmap components)29552 rs6000_emit_epilogue_components (sbitmap components)
29553 {
29554   rs6000_stack_t *info = rs6000_stack_info ();
29555   rtx ptr_reg = gen_rtx_REG (Pmode, frame_pointer_needed
29556 			     ? HARD_FRAME_POINTER_REGNUM
29557 			     : STACK_POINTER_REGNUM);
29558 
29559   machine_mode reg_mode = Pmode;
29560   int reg_size = TARGET_32BIT ? 4 : 8;
29561 
29562   machine_mode fp_reg_mode = (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
29563 			     ? DFmode : SFmode;
29564   int fp_reg_size = 8;
29565 
29566   /* Epilogue for the FPRs.  */
29567   int offset = info->fp_save_offset;
29568   if (info->push_p)
29569     offset += info->total_size;
29570 
29571   for (int i = info->first_fp_reg_save; i < 64; i++)
29572     {
29573       if (bitmap_bit_p (components, i))
29574 	{
29575 	  rtx reg = gen_rtx_REG (fp_reg_mode, i);
29576 	  rtx_insn *insn = emit_insn (gen_frame_load (reg, ptr_reg, offset));
29577 	  RTX_FRAME_RELATED_P (insn) = 1;
29578 	  add_reg_note (insn, REG_CFA_RESTORE, reg);
29579 	}
29580 
29581       offset += fp_reg_size;
29582     }
29583 
29584   /* Epilogue for the GPRs.  */
29585   offset = info->gp_save_offset;
29586   if (info->push_p)
29587     offset += info->total_size;
29588 
29589   for (int i = info->first_gp_reg_save; i < 32; i++)
29590     {
29591       if (bitmap_bit_p (components, i))
29592 	{
29593 	  rtx reg = gen_rtx_REG (reg_mode, i);
29594 	  rtx_insn *insn = emit_insn (gen_frame_load (reg, ptr_reg, offset));
29595 	  RTX_FRAME_RELATED_P (insn) = 1;
29596 	  add_reg_note (insn, REG_CFA_RESTORE, reg);
29597 	}
29598 
29599       offset += reg_size;
29600     }
29601 
29602   /* Epilogue for LR.  */
29603   if (bitmap_bit_p (components, 0))
29604     {
29605       int offset = info->lr_save_offset;
29606       if (info->push_p)
29607 	offset += info->total_size;
29608 
29609       rtx reg = gen_rtx_REG (reg_mode, 0);
29610       rtx_insn *insn = emit_insn (gen_frame_load (reg, ptr_reg, offset));
29611 
29612       rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
29613       insn = emit_move_insn (lr, reg);
29614       RTX_FRAME_RELATED_P (insn) = 1;
29615       add_reg_note (insn, REG_CFA_RESTORE, lr);
29616     }
29617 }
29618 
29619 /* Implement TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS.  */
29620 static void
rs6000_set_handled_components(sbitmap components)29621 rs6000_set_handled_components (sbitmap components)
29622 {
29623   rs6000_stack_t *info = rs6000_stack_info ();
29624 
29625   for (int i = info->first_gp_reg_save; i < 32; i++)
29626     if (bitmap_bit_p (components, i))
29627       cfun->machine->gpr_is_wrapped_separately[i] = true;
29628 
29629   for (int i = info->first_fp_reg_save; i < 64; i++)
29630     if (bitmap_bit_p (components, i))
29631       cfun->machine->fpr_is_wrapped_separately[i - 32] = true;
29632 
29633   if (bitmap_bit_p (components, 0))
29634     cfun->machine->lr_is_wrapped_separately = true;
29635 }
29636 
29637 /* Emit function prologue as insns.  */
29638 
29639 void
rs6000_emit_prologue(void)29640 rs6000_emit_prologue (void)
29641 {
29642   rs6000_stack_t *info = rs6000_stack_info ();
29643   machine_mode reg_mode = Pmode;
29644   int reg_size = TARGET_32BIT ? 4 : 8;
29645   machine_mode fp_reg_mode = (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
29646 			     ? DFmode : SFmode;
29647   int fp_reg_size = 8;
29648   rtx sp_reg_rtx = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
29649   rtx frame_reg_rtx = sp_reg_rtx;
29650   unsigned int cr_save_regno;
29651   rtx cr_save_rtx = NULL_RTX;
29652   rtx_insn *insn;
29653   int strategy;
29654   int using_static_chain_p = (cfun->static_chain_decl != NULL_TREE
29655 			      && df_regs_ever_live_p (STATIC_CHAIN_REGNUM)
29656 			      && call_used_regs[STATIC_CHAIN_REGNUM]);
29657   int using_split_stack = (flag_split_stack
29658                            && (lookup_attribute ("no_split_stack",
29659                                                  DECL_ATTRIBUTES (cfun->decl))
29660                                == NULL));
29661 
29662   /* Offset to top of frame for frame_reg and sp respectively.  */
29663   HOST_WIDE_INT frame_off = 0;
29664   HOST_WIDE_INT sp_off = 0;
29665   /* sp_adjust is the stack adjusting instruction, tracked so that the
29666      insn setting up the split-stack arg pointer can be emitted just
29667      prior to it, when r12 is not used here for other purposes.  */
29668   rtx_insn *sp_adjust = 0;
29669 
29670 #if CHECKING_P
29671   /* Track and check usage of r0, r11, r12.  */
29672   int reg_inuse = using_static_chain_p ? 1 << 11 : 0;
29673 #define START_USE(R) do \
29674   {						\
29675     gcc_assert ((reg_inuse & (1 << (R))) == 0);	\
29676     reg_inuse |= 1 << (R);			\
29677   } while (0)
29678 #define END_USE(R) do \
29679   {						\
29680     gcc_assert ((reg_inuse & (1 << (R))) != 0);	\
29681     reg_inuse &= ~(1 << (R));			\
29682   } while (0)
29683 #define NOT_INUSE(R) do \
29684   {						\
29685     gcc_assert ((reg_inuse & (1 << (R))) == 0);	\
29686   } while (0)
29687 #else
29688 #define START_USE(R) do {} while (0)
29689 #define END_USE(R) do {} while (0)
29690 #define NOT_INUSE(R) do {} while (0)
29691 #endif
29692 
29693   if (DEFAULT_ABI == ABI_ELFv2
29694       && !TARGET_SINGLE_PIC_BASE)
29695     {
29696       cfun->machine->r2_setup_needed = df_regs_ever_live_p (TOC_REGNUM);
29697 
29698       /* With -mminimal-toc we may generate an extra use of r2 below.  */
29699       if (TARGET_TOC && TARGET_MINIMAL_TOC
29700 	  && !constant_pool_empty_p ())
29701 	cfun->machine->r2_setup_needed = true;
29702     }
29703 
29704 
29705   if (flag_stack_usage_info)
29706     current_function_static_stack_size = info->total_size;
29707 
29708   if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
29709       || flag_stack_clash_protection)
29710     {
29711       HOST_WIDE_INT size = info->total_size;
29712 
29713       if (crtl->is_leaf && !cfun->calls_alloca)
29714 	{
29715 	  if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
29716 	    rs6000_emit_probe_stack_range (get_stack_check_protect (),
29717 					   size - get_stack_check_protect ());
29718 	}
29719       else if (size > 0)
29720 	rs6000_emit_probe_stack_range (get_stack_check_protect (), size);
29721     }
29722 
29723   if (TARGET_FIX_AND_CONTINUE)
29724     {
29725       /* gdb on darwin arranges to forward a function from the old
29726 	 address by modifying the first 5 instructions of the function
29727 	 to branch to the overriding function.  This is necessary to
29728 	 permit function pointers that point to the old function to
29729 	 actually forward to the new function.  */
29730       emit_insn (gen_nop ());
29731       emit_insn (gen_nop ());
29732       emit_insn (gen_nop ());
29733       emit_insn (gen_nop ());
29734       emit_insn (gen_nop ());
29735     }
29736 
29737   if (TARGET_SPE_ABI && info->spe_64bit_regs_used != 0)
29738     {
29739       reg_mode = V2SImode;
29740       reg_size = 8;
29741     }
29742 
29743   /* Handle world saves specially here.  */
29744   if (WORLD_SAVE_P (info))
29745     {
29746       int i, j, sz;
29747       rtx treg;
29748       rtvec p;
29749       rtx reg0;
29750 
29751       /* save_world expects lr in r0. */
29752       reg0 = gen_rtx_REG (Pmode, 0);
29753       if (info->lr_save_p)
29754 	{
29755 	  insn = emit_move_insn (reg0,
29756 				 gen_rtx_REG (Pmode, LR_REGNO));
29757 	  RTX_FRAME_RELATED_P (insn) = 1;
29758 	}
29759 
29760       /* The SAVE_WORLD and RESTORE_WORLD routines make a number of
29761 	 assumptions about the offsets of various bits of the stack
29762 	 frame.  */
29763       gcc_assert (info->gp_save_offset == -220
29764 		  && info->fp_save_offset == -144
29765 		  && info->lr_save_offset == 8
29766 		  && info->cr_save_offset == 4
29767 		  && info->push_p
29768 		  && info->lr_save_p
29769 		  && (!crtl->calls_eh_return
29770 		      || info->ehrd_offset == -432)
29771 		  && info->vrsave_save_offset == -224
29772 		  && info->altivec_save_offset == -416);
29773 
29774       treg = gen_rtx_REG (SImode, 11);
29775       emit_move_insn (treg, GEN_INT (-info->total_size));
29776 
29777       /* SAVE_WORLD takes the caller's LR in R0 and the frame size
29778 	 in R11.  It also clobbers R12, so beware!  */
29779 
29780       /* Preserve CR2 for save_world prologues */
29781       sz = 5;
29782       sz += 32 - info->first_gp_reg_save;
29783       sz += 64 - info->first_fp_reg_save;
29784       sz += LAST_ALTIVEC_REGNO - info->first_altivec_reg_save + 1;
29785       p = rtvec_alloc (sz);
29786       j = 0;
29787       RTVEC_ELT (p, j++) = gen_rtx_CLOBBER (VOIDmode,
29788 					    gen_rtx_REG (SImode,
29789 							 LR_REGNO));
29790       RTVEC_ELT (p, j++) = gen_rtx_USE (VOIDmode,
29791 					gen_rtx_SYMBOL_REF (Pmode,
29792 							    "*save_world"));
29793       /* We do floats first so that the instruction pattern matches
29794 	 properly.  */
29795       for (i = 0; i < 64 - info->first_fp_reg_save; i++)
29796 	RTVEC_ELT (p, j++)
29797 	  = gen_frame_store (gen_rtx_REG (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT
29798 					  ? DFmode : SFmode,
29799 					  info->first_fp_reg_save + i),
29800 			     frame_reg_rtx,
29801 			     info->fp_save_offset + frame_off + 8 * i);
29802       for (i = 0; info->first_altivec_reg_save + i <= LAST_ALTIVEC_REGNO; i++)
29803 	RTVEC_ELT (p, j++)
29804 	  = gen_frame_store (gen_rtx_REG (V4SImode,
29805 					  info->first_altivec_reg_save + i),
29806 			     frame_reg_rtx,
29807 			     info->altivec_save_offset + frame_off + 16 * i);
29808       for (i = 0; i < 32 - info->first_gp_reg_save; i++)
29809 	RTVEC_ELT (p, j++)
29810 	  = gen_frame_store (gen_rtx_REG (reg_mode, info->first_gp_reg_save + i),
29811 			     frame_reg_rtx,
29812 			     info->gp_save_offset + frame_off + reg_size * i);
29813 
29814       /* CR register traditionally saved as CR2.  */
29815       RTVEC_ELT (p, j++)
29816 	= gen_frame_store (gen_rtx_REG (SImode, CR2_REGNO),
29817 			   frame_reg_rtx, info->cr_save_offset + frame_off);
29818       /* Explain about use of R0.  */
29819       if (info->lr_save_p)
29820 	RTVEC_ELT (p, j++)
29821 	  = gen_frame_store (reg0,
29822 			     frame_reg_rtx, info->lr_save_offset + frame_off);
29823       /* Explain what happens to the stack pointer.  */
29824       {
29825 	rtx newval = gen_rtx_PLUS (Pmode, sp_reg_rtx, treg);
29826 	RTVEC_ELT (p, j++) = gen_rtx_SET (sp_reg_rtx, newval);
29827       }
29828 
29829       insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
29830       rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
29831 			    treg, GEN_INT (-info->total_size));
29832       sp_off = frame_off = info->total_size;
29833     }
29834 
29835   strategy = info->savres_strategy;
29836 
29837   /* For V.4, update stack before we do any saving and set back pointer.  */
29838   if (! WORLD_SAVE_P (info)
29839       && info->push_p
29840       && (DEFAULT_ABI == ABI_V4
29841 	  || crtl->calls_eh_return))
29842     {
29843       bool need_r11 = (TARGET_SPE
29844 		       ? (!(strategy & SAVE_INLINE_GPRS)
29845 			  && info->spe_64bit_regs_used == 0)
29846 		       : (!(strategy & SAVE_INLINE_FPRS)
29847 			  || !(strategy & SAVE_INLINE_GPRS)
29848 			  || !(strategy & SAVE_INLINE_VRS)));
29849       int ptr_regno = -1;
29850       rtx ptr_reg = NULL_RTX;
29851       int ptr_off = 0;
29852 
29853       if (info->total_size < 32767)
29854 	frame_off = info->total_size;
29855       else if (need_r11)
29856 	ptr_regno = 11;
29857       else if (info->cr_save_p
29858 	       || info->lr_save_p
29859 	       || info->first_fp_reg_save < 64
29860 	       || info->first_gp_reg_save < 32
29861 	       || info->altivec_size != 0
29862 	       || info->vrsave_size != 0
29863 	       || crtl->calls_eh_return)
29864 	ptr_regno = 12;
29865       else
29866 	{
29867 	  /* The prologue won't be saving any regs so there is no need
29868 	     to set up a frame register to access any frame save area.
29869 	     We also won't be using frame_off anywhere below, but set
29870 	     the correct value anyway to protect against future
29871 	     changes to this function.  */
29872 	  frame_off = info->total_size;
29873 	}
29874       if (ptr_regno != -1)
29875 	{
29876 	  /* Set up the frame offset to that needed by the first
29877 	     out-of-line save function.  */
29878 	  START_USE (ptr_regno);
29879 	  ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
29880 	  frame_reg_rtx = ptr_reg;
29881 	  if (!(strategy & SAVE_INLINE_FPRS) && info->fp_size != 0)
29882 	    gcc_checking_assert (info->fp_save_offset + info->fp_size == 0);
29883 	  else if (!(strategy & SAVE_INLINE_GPRS) && info->first_gp_reg_save < 32)
29884 	    ptr_off = info->gp_save_offset + info->gp_size;
29885 	  else if (!(strategy & SAVE_INLINE_VRS) && info->altivec_size != 0)
29886 	    ptr_off = info->altivec_save_offset + info->altivec_size;
29887 	  frame_off = -ptr_off;
29888 	}
29889       sp_adjust = rs6000_emit_allocate_stack (info->total_size,
29890 					      ptr_reg, ptr_off);
29891       if (REGNO (frame_reg_rtx) == 12)
29892 	sp_adjust = 0;
29893       sp_off = info->total_size;
29894       if (frame_reg_rtx != sp_reg_rtx)
29895 	rs6000_emit_stack_tie (frame_reg_rtx, false);
29896     }
29897 
29898   /* If we use the link register, get it into r0.  */
29899   if (!WORLD_SAVE_P (info) && info->lr_save_p
29900       && !cfun->machine->lr_is_wrapped_separately)
29901     {
29902       rtx addr, reg, mem;
29903 
29904       reg = gen_rtx_REG (Pmode, 0);
29905       START_USE (0);
29906       insn = emit_move_insn (reg, gen_rtx_REG (Pmode, LR_REGNO));
29907       RTX_FRAME_RELATED_P (insn) = 1;
29908 
29909       if (!(strategy & (SAVE_NOINLINE_GPRS_SAVES_LR
29910 			| SAVE_NOINLINE_FPRS_SAVES_LR)))
29911 	{
29912 	  addr = gen_rtx_PLUS (Pmode, frame_reg_rtx,
29913 			       GEN_INT (info->lr_save_offset + frame_off));
29914 	  mem = gen_rtx_MEM (Pmode, addr);
29915 	  /* This should not be of rs6000_sr_alias_set, because of
29916 	     __builtin_return_address.  */
29917 
29918 	  insn = emit_move_insn (mem, reg);
29919 	  rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
29920 				NULL_RTX, NULL_RTX);
29921 	  END_USE (0);
29922 	}
29923     }
29924 
29925   /* If we need to save CR, put it into r12 or r11.  Choose r12 except when
29926      r12 will be needed by out-of-line gpr restore.  */
29927   cr_save_regno = ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
29928 		   && !(strategy & (SAVE_INLINE_GPRS
29929 				    | SAVE_NOINLINE_GPRS_SAVES_LR))
29930 		   ? 11 : 12);
29931   if (!WORLD_SAVE_P (info)
29932       && info->cr_save_p
29933       && REGNO (frame_reg_rtx) != cr_save_regno
29934       && !(using_static_chain_p && cr_save_regno == 11)
29935       && !(using_split_stack && cr_save_regno == 12 && sp_adjust))
29936     {
29937       cr_save_rtx = gen_rtx_REG (SImode, cr_save_regno);
29938       START_USE (cr_save_regno);
29939       rs6000_emit_move_from_cr (cr_save_rtx);
29940     }
29941 
29942   /* Do any required saving of fpr's.  If only one or two to save, do
29943      it ourselves.  Otherwise, call function.  */
29944   if (!WORLD_SAVE_P (info) && (strategy & SAVE_INLINE_FPRS))
29945     {
29946       int offset = info->fp_save_offset + frame_off;
29947       for (int i = info->first_fp_reg_save; i < 64; i++)
29948 	{
29949 	  if (save_reg_p (i)
29950 	      && !cfun->machine->fpr_is_wrapped_separately[i - 32])
29951 	    emit_frame_save (frame_reg_rtx, fp_reg_mode, i, offset,
29952 			     sp_off - frame_off);
29953 
29954 	  offset += fp_reg_size;
29955 	}
29956     }
29957   else if (!WORLD_SAVE_P (info) && info->first_fp_reg_save != 64)
29958     {
29959       bool lr = (strategy & SAVE_NOINLINE_FPRS_SAVES_LR) != 0;
29960       int sel = SAVRES_SAVE | SAVRES_FPR | (lr ? SAVRES_LR : 0);
29961       unsigned ptr_regno = ptr_regno_for_savres (sel);
29962       rtx ptr_reg = frame_reg_rtx;
29963 
29964       if (REGNO (frame_reg_rtx) == ptr_regno)
29965 	gcc_checking_assert (frame_off == 0);
29966       else
29967 	{
29968 	  ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
29969 	  NOT_INUSE (ptr_regno);
29970 	  emit_insn (gen_add3_insn (ptr_reg,
29971 				    frame_reg_rtx, GEN_INT (frame_off)));
29972 	}
29973       insn = rs6000_emit_savres_rtx (info, ptr_reg,
29974 				     info->fp_save_offset,
29975 				     info->lr_save_offset,
29976 				     DFmode, sel);
29977       rs6000_frame_related (insn, ptr_reg, sp_off,
29978 			    NULL_RTX, NULL_RTX);
29979       if (lr)
29980 	END_USE (0);
29981     }
29982 
29983   /* Save GPRs.  This is done as a PARALLEL if we are using
29984      the store-multiple instructions.  */
29985   if (!WORLD_SAVE_P (info)
29986       && TARGET_SPE_ABI
29987       && info->spe_64bit_regs_used != 0
29988       && info->first_gp_reg_save != 32)
29989     {
29990       int i;
29991       rtx spe_save_area_ptr;
29992       HOST_WIDE_INT save_off;
29993       int ool_adjust = 0;
29994 
29995       /* Determine whether we can address all of the registers that need
29996 	 to be saved with an offset from frame_reg_rtx that fits in
29997 	 the small const field for SPE memory instructions.  */
29998       int spe_regs_addressable
29999 	= (SPE_CONST_OFFSET_OK (info->spe_gp_save_offset + frame_off
30000 				+ reg_size * (32 - info->first_gp_reg_save - 1))
30001 	   && (strategy & SAVE_INLINE_GPRS));
30002 
30003       if (spe_regs_addressable)
30004 	{
30005 	  spe_save_area_ptr = frame_reg_rtx;
30006 	  save_off = frame_off;
30007 	}
30008       else
30009 	{
30010 	  /* Make r11 point to the start of the SPE save area.  We need
30011 	     to be careful here if r11 is holding the static chain.  If
30012 	     it is, then temporarily save it in r0.  */
30013 	  HOST_WIDE_INT offset;
30014 
30015 	  if (!(strategy & SAVE_INLINE_GPRS))
30016 	    ool_adjust = 8 * (info->first_gp_reg_save - FIRST_SAVED_GP_REGNO);
30017 	  offset = info->spe_gp_save_offset + frame_off - ool_adjust;
30018 	  spe_save_area_ptr = gen_rtx_REG (Pmode, 11);
30019 	  save_off = frame_off - offset;
30020 
30021 	  if (using_static_chain_p)
30022 	    {
30023 	      rtx r0 = gen_rtx_REG (Pmode, 0);
30024 
30025 	      START_USE (0);
30026 	      gcc_assert (info->first_gp_reg_save > 11);
30027 
30028 	      emit_move_insn (r0, spe_save_area_ptr);
30029 	    }
30030 	  else if (REGNO (frame_reg_rtx) != 11)
30031 	    START_USE (11);
30032 
30033 	  emit_insn (gen_addsi3 (spe_save_area_ptr,
30034 				 frame_reg_rtx, GEN_INT (offset)));
30035 	  if (!using_static_chain_p && REGNO (frame_reg_rtx) == 11)
30036 	    frame_off = -info->spe_gp_save_offset + ool_adjust;
30037 	}
30038 
30039       if ((strategy & SAVE_INLINE_GPRS))
30040 	{
30041 	  for (i = 0; i < 32 - info->first_gp_reg_save; i++)
30042 	    if (rs6000_reg_live_or_pic_offset_p (info->first_gp_reg_save + i))
30043 	      emit_frame_save (spe_save_area_ptr, reg_mode,
30044 			       info->first_gp_reg_save + i,
30045 			       (info->spe_gp_save_offset + save_off
30046 				+ reg_size * i),
30047 			       sp_off - save_off);
30048 	}
30049       else
30050 	{
30051 	  insn = rs6000_emit_savres_rtx (info, spe_save_area_ptr,
30052 					 info->spe_gp_save_offset + save_off,
30053 					 0, reg_mode,
30054 					 SAVRES_SAVE | SAVRES_GPR);
30055 
30056 	  rs6000_frame_related (insn, spe_save_area_ptr, sp_off - save_off,
30057 				NULL_RTX, NULL_RTX);
30058 	}
30059 
30060       /* Move the static chain pointer back.  */
30061       if (!spe_regs_addressable)
30062 	{
30063 	  if (using_static_chain_p)
30064 	    {
30065 	      emit_move_insn (spe_save_area_ptr, gen_rtx_REG (Pmode, 0));
30066 	      END_USE (0);
30067 	    }
30068 	  else if (REGNO (frame_reg_rtx) != 11)
30069 	    END_USE (11);
30070 	}
30071     }
30072   else if (!WORLD_SAVE_P (info) && !(strategy & SAVE_INLINE_GPRS))
30073     {
30074       bool lr = (strategy & SAVE_NOINLINE_GPRS_SAVES_LR) != 0;
30075       int sel = SAVRES_SAVE | SAVRES_GPR | (lr ? SAVRES_LR : 0);
30076       unsigned ptr_regno = ptr_regno_for_savres (sel);
30077       rtx ptr_reg = frame_reg_rtx;
30078       bool ptr_set_up = REGNO (ptr_reg) == ptr_regno;
30079       int end_save = info->gp_save_offset + info->gp_size;
30080       int ptr_off;
30081 
30082       if (ptr_regno == 12)
30083 	sp_adjust = 0;
30084       if (!ptr_set_up)
30085 	ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
30086 
30087       /* Need to adjust r11 (r12) if we saved any FPRs.  */
30088       if (end_save + frame_off != 0)
30089 	{
30090 	  rtx offset = GEN_INT (end_save + frame_off);
30091 
30092 	  if (ptr_set_up)
30093 	    frame_off = -end_save;
30094 	  else
30095 	    NOT_INUSE (ptr_regno);
30096 	  emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
30097 	}
30098       else if (!ptr_set_up)
30099 	{
30100 	  NOT_INUSE (ptr_regno);
30101 	  emit_move_insn (ptr_reg, frame_reg_rtx);
30102 	}
30103       ptr_off = -end_save;
30104       insn = rs6000_emit_savres_rtx (info, ptr_reg,
30105 				     info->gp_save_offset + ptr_off,
30106 				     info->lr_save_offset + ptr_off,
30107 				     reg_mode, sel);
30108       rs6000_frame_related (insn, ptr_reg, sp_off - ptr_off,
30109 			    NULL_RTX, NULL_RTX);
30110       if (lr)
30111 	END_USE (0);
30112     }
30113   else if (!WORLD_SAVE_P (info) && (strategy & SAVE_MULTIPLE))
30114     {
30115       rtvec p;
30116       int i;
30117       p = rtvec_alloc (32 - info->first_gp_reg_save);
30118       for (i = 0; i < 32 - info->first_gp_reg_save; i++)
30119 	RTVEC_ELT (p, i)
30120 	  = gen_frame_store (gen_rtx_REG (reg_mode, info->first_gp_reg_save + i),
30121 			     frame_reg_rtx,
30122 			     info->gp_save_offset + frame_off + reg_size * i);
30123       insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
30124       rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
30125 			    NULL_RTX, NULL_RTX);
30126     }
30127   else if (!WORLD_SAVE_P (info))
30128     {
30129       int offset = info->gp_save_offset + frame_off;
30130       for (int i = info->first_gp_reg_save; i < 32; i++)
30131 	{
30132 	  if (rs6000_reg_live_or_pic_offset_p (i)
30133 	      && !cfun->machine->gpr_is_wrapped_separately[i])
30134 	    emit_frame_save (frame_reg_rtx, reg_mode, i, offset,
30135 			     sp_off - frame_off);
30136 
30137 	  offset += reg_size;
30138 	}
30139     }
30140 
30141   if (crtl->calls_eh_return)
30142     {
30143       unsigned int i;
30144       rtvec p;
30145 
30146       for (i = 0; ; ++i)
30147 	{
30148 	  unsigned int regno = EH_RETURN_DATA_REGNO (i);
30149 	  if (regno == INVALID_REGNUM)
30150 	    break;
30151 	}
30152 
30153       p = rtvec_alloc (i);
30154 
30155       for (i = 0; ; ++i)
30156 	{
30157 	  unsigned int regno = EH_RETURN_DATA_REGNO (i);
30158 	  if (regno == INVALID_REGNUM)
30159 	    break;
30160 
30161 	  rtx set
30162 	    = gen_frame_store (gen_rtx_REG (reg_mode, regno),
30163 			       sp_reg_rtx,
30164 			       info->ehrd_offset + sp_off + reg_size * (int) i);
30165 	  RTVEC_ELT (p, i) = set;
30166 	  RTX_FRAME_RELATED_P (set) = 1;
30167 	}
30168 
30169       insn = emit_insn (gen_blockage ());
30170       RTX_FRAME_RELATED_P (insn) = 1;
30171       add_reg_note (insn, REG_FRAME_RELATED_EXPR, gen_rtx_PARALLEL (VOIDmode, p));
30172     }
30173 
30174   /* In AIX ABI we need to make sure r2 is really saved.  */
30175   if (TARGET_AIX && crtl->calls_eh_return)
30176     {
30177       rtx tmp_reg, tmp_reg_si, hi, lo, compare_result, toc_save_done, jump;
30178       rtx join_insn, note;
30179       rtx_insn *save_insn;
30180       long toc_restore_insn;
30181 
30182       tmp_reg = gen_rtx_REG (Pmode, 11);
30183       tmp_reg_si = gen_rtx_REG (SImode, 11);
30184       if (using_static_chain_p)
30185 	{
30186 	  START_USE (0);
30187 	  emit_move_insn (gen_rtx_REG (Pmode, 0), tmp_reg);
30188 	}
30189       else
30190 	START_USE (11);
30191       emit_move_insn (tmp_reg, gen_rtx_REG (Pmode, LR_REGNO));
30192       /* Peek at instruction to which this function returns.  If it's
30193 	 restoring r2, then we know we've already saved r2.  We can't
30194 	 unconditionally save r2 because the value we have will already
30195 	 be updated if we arrived at this function via a plt call or
30196 	 toc adjusting stub.  */
30197       emit_move_insn (tmp_reg_si, gen_rtx_MEM (SImode, tmp_reg));
30198       toc_restore_insn = ((TARGET_32BIT ? 0x80410000 : 0xE8410000)
30199 			  + RS6000_TOC_SAVE_SLOT);
30200       hi = gen_int_mode (toc_restore_insn & ~0xffff, SImode);
30201       emit_insn (gen_xorsi3 (tmp_reg_si, tmp_reg_si, hi));
30202       compare_result = gen_rtx_REG (CCUNSmode, CR0_REGNO);
30203       validate_condition_mode (EQ, CCUNSmode);
30204       lo = gen_int_mode (toc_restore_insn & 0xffff, SImode);
30205       emit_insn (gen_rtx_SET (compare_result,
30206 			      gen_rtx_COMPARE (CCUNSmode, tmp_reg_si, lo)));
30207       toc_save_done = gen_label_rtx ();
30208       jump = gen_rtx_IF_THEN_ELSE (VOIDmode,
30209 				   gen_rtx_EQ (VOIDmode, compare_result,
30210 					       const0_rtx),
30211 				   gen_rtx_LABEL_REF (VOIDmode, toc_save_done),
30212 				   pc_rtx);
30213       jump = emit_jump_insn (gen_rtx_SET (pc_rtx, jump));
30214       JUMP_LABEL (jump) = toc_save_done;
30215       LABEL_NUSES (toc_save_done) += 1;
30216 
30217       save_insn = emit_frame_save (frame_reg_rtx, reg_mode,
30218 				   TOC_REGNUM, frame_off + RS6000_TOC_SAVE_SLOT,
30219 				   sp_off - frame_off);
30220 
30221       emit_label (toc_save_done);
30222 
30223       /* ??? If we leave SAVE_INSN as marked as saving R2, then we'll
30224 	 have a CFG that has different saves along different paths.
30225 	 Move the note to a dummy blockage insn, which describes that
30226 	 R2 is unconditionally saved after the label.  */
30227       /* ??? An alternate representation might be a special insn pattern
30228 	 containing both the branch and the store.  That might let the
30229 	 code that minimizes the number of DW_CFA_advance opcodes better
30230 	 freedom in placing the annotations.  */
30231       note = find_reg_note (save_insn, REG_FRAME_RELATED_EXPR, NULL);
30232       if (note)
30233 	remove_note (save_insn, note);
30234       else
30235 	note = alloc_reg_note (REG_FRAME_RELATED_EXPR,
30236 			       copy_rtx (PATTERN (save_insn)), NULL_RTX);
30237       RTX_FRAME_RELATED_P (save_insn) = 0;
30238 
30239       join_insn = emit_insn (gen_blockage ());
30240       REG_NOTES (join_insn) = note;
30241       RTX_FRAME_RELATED_P (join_insn) = 1;
30242 
30243       if (using_static_chain_p)
30244 	{
30245 	  emit_move_insn (tmp_reg, gen_rtx_REG (Pmode, 0));
30246 	  END_USE (0);
30247 	}
30248       else
30249 	END_USE (11);
30250     }
30251 
30252   /* Save CR if we use any that must be preserved.  */
30253   if (!WORLD_SAVE_P (info) && info->cr_save_p)
30254     {
30255       rtx addr = gen_rtx_PLUS (Pmode, frame_reg_rtx,
30256 			       GEN_INT (info->cr_save_offset + frame_off));
30257       rtx mem = gen_frame_mem (SImode, addr);
30258 
30259       /* If we didn't copy cr before, do so now using r0.  */
30260       if (cr_save_rtx == NULL_RTX)
30261 	{
30262 	  START_USE (0);
30263 	  cr_save_rtx = gen_rtx_REG (SImode, 0);
30264 	  rs6000_emit_move_from_cr (cr_save_rtx);
30265 	}
30266 
30267       /* Saving CR requires a two-instruction sequence: one instruction
30268 	 to move the CR to a general-purpose register, and a second
30269 	 instruction that stores the GPR to memory.
30270 
30271 	 We do not emit any DWARF CFI records for the first of these,
30272 	 because we cannot properly represent the fact that CR is saved in
30273 	 a register.  One reason is that we cannot express that multiple
30274 	 CR fields are saved; another reason is that on 64-bit, the size
30275 	 of the CR register in DWARF (4 bytes) differs from the size of
30276 	 a general-purpose register.
30277 
30278 	 This means if any intervening instruction were to clobber one of
30279 	 the call-saved CR fields, we'd have incorrect CFI.  To prevent
30280 	 this from happening, we mark the store to memory as a use of
30281 	 those CR fields, which prevents any such instruction from being
30282 	 scheduled in between the two instructions.  */
30283       rtx crsave_v[9];
30284       int n_crsave = 0;
30285       int i;
30286 
30287       crsave_v[n_crsave++] = gen_rtx_SET (mem, cr_save_rtx);
30288       for (i = 0; i < 8; i++)
30289 	if (save_reg_p (CR0_REGNO + i))
30290 	  crsave_v[n_crsave++]
30291 	    = gen_rtx_USE (VOIDmode, gen_rtx_REG (CCmode, CR0_REGNO + i));
30292 
30293       insn = emit_insn (gen_rtx_PARALLEL (VOIDmode,
30294 					  gen_rtvec_v (n_crsave, crsave_v)));
30295       END_USE (REGNO (cr_save_rtx));
30296 
30297       /* Now, there's no way that dwarf2out_frame_debug_expr is going to
30298 	 understand '(unspec:SI [(reg:CC 68) ...] UNSPEC_MOVESI_FROM_CR)',
30299 	 so we need to construct a frame expression manually.  */
30300       RTX_FRAME_RELATED_P (insn) = 1;
30301 
30302       /* Update address to be stack-pointer relative, like
30303 	 rs6000_frame_related would do.  */
30304       addr = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, STACK_POINTER_REGNUM),
30305 			   GEN_INT (info->cr_save_offset + sp_off));
30306       mem = gen_frame_mem (SImode, addr);
30307 
30308       if (DEFAULT_ABI == ABI_ELFv2)
30309 	{
30310 	  /* In the ELFv2 ABI we generate separate CFI records for each
30311 	     CR field that was actually saved.  They all point to the
30312 	     same 32-bit stack slot.  */
30313 	  rtx crframe[8];
30314 	  int n_crframe = 0;
30315 
30316 	  for (i = 0; i < 8; i++)
30317 	    if (save_reg_p (CR0_REGNO + i))
30318 	      {
30319 		crframe[n_crframe]
30320 		  = gen_rtx_SET (mem, gen_rtx_REG (SImode, CR0_REGNO + i));
30321 
30322 		RTX_FRAME_RELATED_P (crframe[n_crframe]) = 1;
30323 		n_crframe++;
30324 	     }
30325 
30326 	  add_reg_note (insn, REG_FRAME_RELATED_EXPR,
30327 			gen_rtx_PARALLEL (VOIDmode,
30328 					  gen_rtvec_v (n_crframe, crframe)));
30329 	}
30330       else
30331 	{
30332 	  /* In other ABIs, by convention, we use a single CR regnum to
30333 	     represent the fact that all call-saved CR fields are saved.
30334 	     We use CR2_REGNO to be compatible with gcc-2.95 on Linux.  */
30335 	  rtx set = gen_rtx_SET (mem, gen_rtx_REG (SImode, CR2_REGNO));
30336 	  add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
30337 	}
30338     }
30339 
30340   /* In the ELFv2 ABI we need to save all call-saved CR fields into
30341      *separate* slots if the routine calls __builtin_eh_return, so
30342      that they can be independently restored by the unwinder.  */
30343   if (DEFAULT_ABI == ABI_ELFv2 && crtl->calls_eh_return)
30344     {
30345       int i, cr_off = info->ehcr_offset;
30346       rtx crsave;
30347 
30348       /* ??? We might get better performance by using multiple mfocrf
30349 	 instructions.  */
30350       crsave = gen_rtx_REG (SImode, 0);
30351       emit_insn (gen_movesi_from_cr (crsave));
30352 
30353       for (i = 0; i < 8; i++)
30354 	if (!call_used_regs[CR0_REGNO + i])
30355 	  {
30356 	    rtvec p = rtvec_alloc (2);
30357 	    RTVEC_ELT (p, 0)
30358 	      = gen_frame_store (crsave, frame_reg_rtx, cr_off + frame_off);
30359 	    RTVEC_ELT (p, 1)
30360 	      = gen_rtx_USE (VOIDmode, gen_rtx_REG (CCmode, CR0_REGNO + i));
30361 
30362 	    insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
30363 
30364 	    RTX_FRAME_RELATED_P (insn) = 1;
30365 	    add_reg_note (insn, REG_FRAME_RELATED_EXPR,
30366 			  gen_frame_store (gen_rtx_REG (SImode, CR0_REGNO + i),
30367 					   sp_reg_rtx, cr_off + sp_off));
30368 
30369 	    cr_off += reg_size;
30370 	  }
30371     }
30372 
30373   /* Update stack and set back pointer unless this is V.4,
30374      for which it was done previously.  */
30375   if (!WORLD_SAVE_P (info) && info->push_p
30376       && !(DEFAULT_ABI == ABI_V4 || crtl->calls_eh_return))
30377     {
30378       rtx ptr_reg = NULL;
30379       int ptr_off = 0;
30380 
30381       /* If saving altivec regs we need to be able to address all save
30382 	 locations using a 16-bit offset.  */
30383       if ((strategy & SAVE_INLINE_VRS) == 0
30384 	  || (info->altivec_size != 0
30385 	      && (info->altivec_save_offset + info->altivec_size - 16
30386 		  + info->total_size - frame_off) > 32767)
30387 	  || (info->vrsave_size != 0
30388 	      && (info->vrsave_save_offset
30389 		  + info->total_size - frame_off) > 32767))
30390 	{
30391 	  int sel = SAVRES_SAVE | SAVRES_VR;
30392 	  unsigned ptr_regno = ptr_regno_for_savres (sel);
30393 
30394 	  if (using_static_chain_p
30395 	      && ptr_regno == STATIC_CHAIN_REGNUM)
30396 	    ptr_regno = 12;
30397 	  if (REGNO (frame_reg_rtx) != ptr_regno)
30398 	    START_USE (ptr_regno);
30399 	  ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
30400 	  frame_reg_rtx = ptr_reg;
30401 	  ptr_off = info->altivec_save_offset + info->altivec_size;
30402 	  frame_off = -ptr_off;
30403 	}
30404       else if (REGNO (frame_reg_rtx) == 1)
30405 	frame_off = info->total_size;
30406       sp_adjust = rs6000_emit_allocate_stack (info->total_size,
30407 					      ptr_reg, ptr_off);
30408       if (REGNO (frame_reg_rtx) == 12)
30409 	sp_adjust = 0;
30410       sp_off = info->total_size;
30411       if (frame_reg_rtx != sp_reg_rtx)
30412 	rs6000_emit_stack_tie (frame_reg_rtx, false);
30413     }
30414 
30415   /* Set frame pointer, if needed.  */
30416   if (frame_pointer_needed)
30417     {
30418       insn = emit_move_insn (gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM),
30419 			     sp_reg_rtx);
30420       RTX_FRAME_RELATED_P (insn) = 1;
30421     }
30422 
30423   /* Save AltiVec registers if needed.  Save here because the red zone does
30424      not always include AltiVec registers.  */
30425   if (!WORLD_SAVE_P (info)
30426       && info->altivec_size != 0 && (strategy & SAVE_INLINE_VRS) == 0)
30427     {
30428       int end_save = info->altivec_save_offset + info->altivec_size;
30429       int ptr_off;
30430       /* Oddly, the vector save/restore functions point r0 at the end
30431 	 of the save area, then use r11 or r12 to load offsets for
30432 	 [reg+reg] addressing.  */
30433       rtx ptr_reg = gen_rtx_REG (Pmode, 0);
30434       int scratch_regno = ptr_regno_for_savres (SAVRES_SAVE | SAVRES_VR);
30435       rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
30436 
30437       gcc_checking_assert (scratch_regno == 11 || scratch_regno == 12);
30438       NOT_INUSE (0);
30439       if (scratch_regno == 12)
30440 	sp_adjust = 0;
30441       if (end_save + frame_off != 0)
30442 	{
30443 	  rtx offset = GEN_INT (end_save + frame_off);
30444 
30445 	  emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
30446 	}
30447       else
30448 	emit_move_insn (ptr_reg, frame_reg_rtx);
30449 
30450       ptr_off = -end_save;
30451       insn = rs6000_emit_savres_rtx (info, scratch_reg,
30452 				     info->altivec_save_offset + ptr_off,
30453 				     0, V4SImode, SAVRES_SAVE | SAVRES_VR);
30454       rs6000_frame_related (insn, scratch_reg, sp_off - ptr_off,
30455 			    NULL_RTX, NULL_RTX);
30456       if (REGNO (frame_reg_rtx) == REGNO (scratch_reg))
30457 	{
30458 	  /* The oddity mentioned above clobbered our frame reg.  */
30459 	  emit_move_insn (frame_reg_rtx, ptr_reg);
30460 	  frame_off = ptr_off;
30461 	}
30462     }
30463   else if (!WORLD_SAVE_P (info)
30464 	   && info->altivec_size != 0)
30465     {
30466       int i;
30467 
30468       for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
30469 	if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
30470 	  {
30471 	    rtx areg, savereg, mem;
30472 	    HOST_WIDE_INT offset;
30473 
30474 	    offset = (info->altivec_save_offset + frame_off
30475 		      + 16 * (i - info->first_altivec_reg_save));
30476 
30477 	    savereg = gen_rtx_REG (V4SImode, i);
30478 
30479 	    if (TARGET_P9_DFORM_VECTOR && quad_address_offset_p (offset))
30480 	      {
30481 		mem = gen_frame_mem (V4SImode,
30482 				     gen_rtx_PLUS (Pmode, frame_reg_rtx,
30483 						   GEN_INT (offset)));
30484 		insn = emit_insn (gen_rtx_SET (mem, savereg));
30485 		areg = NULL_RTX;
30486 	      }
30487 	    else
30488 	      {
30489 		NOT_INUSE (0);
30490 		areg = gen_rtx_REG (Pmode, 0);
30491 		emit_move_insn (areg, GEN_INT (offset));
30492 
30493 		/* AltiVec addressing mode is [reg+reg].  */
30494 		mem = gen_frame_mem (V4SImode,
30495 				     gen_rtx_PLUS (Pmode, frame_reg_rtx, areg));
30496 
30497 		/* Rather than emitting a generic move, force use of the stvx
30498 		   instruction, which we always want on ISA 2.07 (power8) systems.
30499 		   In particular we don't want xxpermdi/stxvd2x for little
30500 		   endian.  */
30501 		insn = emit_insn (gen_altivec_stvx_v4si_internal (mem, savereg));
30502 	      }
30503 
30504 	    rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
30505 				  areg, GEN_INT (offset));
30506 	  }
30507     }
30508 
30509   /* VRSAVE is a bit vector representing which AltiVec registers
30510      are used.  The OS uses this to determine which vector
30511      registers to save on a context switch.  We need to save
30512      VRSAVE on the stack frame, add whatever AltiVec registers we
30513      used in this function, and do the corresponding magic in the
30514      epilogue.  */
30515 
30516   if (!WORLD_SAVE_P (info)
30517       && info->vrsave_size != 0)
30518     {
30519       rtx reg, vrsave;
30520       int offset;
30521       int save_regno;
30522 
30523       /* Get VRSAVE onto a GPR.  Note that ABI_V4 and ABI_DARWIN might
30524 	 be using r12 as frame_reg_rtx and r11 as the static chain
30525 	 pointer for nested functions.  */
30526       save_regno = 12;
30527       if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
30528 	  && !using_static_chain_p)
30529 	save_regno = 11;
30530       else if (using_split_stack || REGNO (frame_reg_rtx) == 12)
30531 	{
30532 	  save_regno = 11;
30533 	  if (using_static_chain_p)
30534 	    save_regno = 0;
30535 	}
30536 
30537       NOT_INUSE (save_regno);
30538       reg = gen_rtx_REG (SImode, save_regno);
30539       vrsave = gen_rtx_REG (SImode, VRSAVE_REGNO);
30540       if (TARGET_MACHO)
30541 	emit_insn (gen_get_vrsave_internal (reg));
30542       else
30543 	emit_insn (gen_rtx_SET (reg, vrsave));
30544 
30545       /* Save VRSAVE.  */
30546       offset = info->vrsave_save_offset + frame_off;
30547       insn = emit_insn (gen_frame_store (reg, frame_reg_rtx, offset));
30548 
30549       /* Include the registers in the mask.  */
30550       emit_insn (gen_iorsi3 (reg, reg, GEN_INT ((int) info->vrsave_mask)));
30551 
30552       insn = emit_insn (generate_set_vrsave (reg, info, 0));
30553     }
30554 
30555   /* If we are using RS6000_PIC_OFFSET_TABLE_REGNUM, we need to set it up.  */
30556   if (!TARGET_SINGLE_PIC_BASE
30557       && ((TARGET_TOC && TARGET_MINIMAL_TOC
30558 	   && !constant_pool_empty_p ())
30559 	  || (DEFAULT_ABI == ABI_V4
30560 	      && (flag_pic == 1 || (flag_pic && TARGET_SECURE_PLT))
30561 	      && df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM))))
30562     {
30563       /* If emit_load_toc_table will use the link register, we need to save
30564 	 it.  We use R12 for this purpose because emit_load_toc_table
30565 	 can use register 0.  This allows us to use a plain 'blr' to return
30566 	 from the procedure more often.  */
30567       int save_LR_around_toc_setup = (TARGET_ELF
30568 				      && DEFAULT_ABI == ABI_V4
30569 				      && flag_pic
30570 				      && ! info->lr_save_p
30571 				      && EDGE_COUNT (EXIT_BLOCK_PTR_FOR_FN (cfun)->preds) > 0);
30572       if (save_LR_around_toc_setup)
30573 	{
30574 	  rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
30575 	  rtx tmp = gen_rtx_REG (Pmode, 12);
30576 
30577 	  sp_adjust = 0;
30578 	  insn = emit_move_insn (tmp, lr);
30579 	  RTX_FRAME_RELATED_P (insn) = 1;
30580 
30581 	  rs6000_emit_load_toc_table (TRUE);
30582 
30583 	  insn = emit_move_insn (lr, tmp);
30584 	  add_reg_note (insn, REG_CFA_RESTORE, lr);
30585 	  RTX_FRAME_RELATED_P (insn) = 1;
30586 	}
30587       else
30588 	rs6000_emit_load_toc_table (TRUE);
30589     }
30590 
30591 #if TARGET_MACHO
30592   if (!TARGET_SINGLE_PIC_BASE
30593       && DEFAULT_ABI == ABI_DARWIN
30594       && flag_pic && crtl->uses_pic_offset_table)
30595     {
30596       rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
30597       rtx src = gen_rtx_SYMBOL_REF (Pmode, MACHOPIC_FUNCTION_BASE_NAME);
30598 
30599       /* Save and restore LR locally around this call (in R0).  */
30600       if (!info->lr_save_p)
30601 	emit_move_insn (gen_rtx_REG (Pmode, 0), lr);
30602 
30603       emit_insn (gen_load_macho_picbase (src));
30604 
30605       emit_move_insn (gen_rtx_REG (Pmode,
30606 				   RS6000_PIC_OFFSET_TABLE_REGNUM),
30607 		      lr);
30608 
30609       if (!info->lr_save_p)
30610 	emit_move_insn (lr, gen_rtx_REG (Pmode, 0));
30611     }
30612 #endif
30613 
30614   /* If we need to, save the TOC register after doing the stack setup.
30615      Do not emit eh frame info for this save.  The unwinder wants info,
30616      conceptually attached to instructions in this function, about
30617      register values in the caller of this function.  This R2 may have
30618      already been changed from the value in the caller.
30619      We don't attempt to write accurate DWARF EH frame info for R2
30620      because code emitted by gcc for a (non-pointer) function call
30621      doesn't save and restore R2.  Instead, R2 is managed out-of-line
30622      by a linker generated plt call stub when the function resides in
30623      a shared library.  This behavior is costly to describe in DWARF,
30624      both in terms of the size of DWARF info and the time taken in the
30625      unwinder to interpret it.  R2 changes, apart from the
30626      calls_eh_return case earlier in this function, are handled by
30627      linux-unwind.h frob_update_context.  */
30628   if (rs6000_save_toc_in_prologue_p ())
30629     {
30630       rtx reg = gen_rtx_REG (reg_mode, TOC_REGNUM);
30631       emit_insn (gen_frame_store (reg, sp_reg_rtx, RS6000_TOC_SAVE_SLOT));
30632     }
30633 
30634   if (using_split_stack && split_stack_arg_pointer_used_p ())
30635     {
30636       /* Set up the arg pointer (r12) for -fsplit-stack code.  If
30637 	 __morestack was called, it left the arg pointer to the old
30638 	 stack in r29.  Otherwise, the arg pointer is the top of the
30639 	 current frame.  */
30640       cfun->machine->split_stack_argp_used = true;
30641       if (sp_adjust)
30642 	{
30643 	  rtx r12 = gen_rtx_REG (Pmode, 12);
30644 	  rtx set_r12 = gen_rtx_SET (r12, sp_reg_rtx);
30645 	  emit_insn_before (set_r12, sp_adjust);
30646 	}
30647       else if (frame_off != 0 || REGNO (frame_reg_rtx) != 12)
30648 	{
30649 	  rtx r12 = gen_rtx_REG (Pmode, 12);
30650 	  if (frame_off == 0)
30651 	    emit_move_insn (r12, frame_reg_rtx);
30652 	  else
30653 	    emit_insn (gen_add3_insn (r12, frame_reg_rtx, GEN_INT (frame_off)));
30654 	}
30655       if (info->push_p)
30656 	{
30657 	  rtx r12 = gen_rtx_REG (Pmode, 12);
30658 	  rtx r29 = gen_rtx_REG (Pmode, 29);
30659 	  rtx cr7 = gen_rtx_REG (CCUNSmode, CR7_REGNO);
30660 	  rtx not_more = gen_label_rtx ();
30661 	  rtx jump;
30662 
30663 	  jump = gen_rtx_IF_THEN_ELSE (VOIDmode,
30664 				       gen_rtx_GEU (VOIDmode, cr7, const0_rtx),
30665 				       gen_rtx_LABEL_REF (VOIDmode, not_more),
30666 				       pc_rtx);
30667 	  jump = emit_jump_insn (gen_rtx_SET (pc_rtx, jump));
30668 	  JUMP_LABEL (jump) = not_more;
30669 	  LABEL_NUSES (not_more) += 1;
30670 	  emit_move_insn (r12, r29);
30671 	  emit_label (not_more);
30672 	}
30673     }
30674 }
30675 
30676 /* Output .extern statements for the save/restore routines we use.  */
30677 
30678 static void
rs6000_output_savres_externs(FILE * file)30679 rs6000_output_savres_externs (FILE *file)
30680 {
30681   rs6000_stack_t *info = rs6000_stack_info ();
30682 
30683   if (TARGET_DEBUG_STACK)
30684     debug_stack_info (info);
30685 
30686   /* Write .extern for any function we will call to save and restore
30687      fp values.  */
30688   if (info->first_fp_reg_save < 64
30689       && !TARGET_MACHO
30690       && !TARGET_ELF)
30691     {
30692       char *name;
30693       int regno = info->first_fp_reg_save - 32;
30694 
30695       if ((info->savres_strategy & SAVE_INLINE_FPRS) == 0)
30696 	{
30697 	  bool lr = (info->savres_strategy & SAVE_NOINLINE_FPRS_SAVES_LR) != 0;
30698 	  int sel = SAVRES_SAVE | SAVRES_FPR | (lr ? SAVRES_LR : 0);
30699 	  name = rs6000_savres_routine_name (info, regno, sel);
30700 	  fprintf (file, "\t.extern %s\n", name);
30701 	}
30702       if ((info->savres_strategy & REST_INLINE_FPRS) == 0)
30703 	{
30704 	  bool lr = (info->savres_strategy
30705 		     & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR) == 0;
30706 	  int sel = SAVRES_FPR | (lr ? SAVRES_LR : 0);
30707 	  name = rs6000_savres_routine_name (info, regno, sel);
30708 	  fprintf (file, "\t.extern %s\n", name);
30709 	}
30710     }
30711 }
30712 
30713 /* Write function prologue.  */
30714 
30715 static void
rs6000_output_function_prologue(FILE * file)30716 rs6000_output_function_prologue (FILE *file)
30717 {
30718   if (!cfun->is_thunk)
30719     rs6000_output_savres_externs (file);
30720 
30721   /* ELFv2 ABI r2 setup code and local entry point.  This must follow
30722      immediately after the global entry point label.  */
30723   if (rs6000_global_entry_point_needed_p ())
30724     {
30725       const char *name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
30726 
30727       (*targetm.asm_out.internal_label) (file, "LCF", rs6000_pic_labelno);
30728 
30729       if (TARGET_CMODEL != CMODEL_LARGE)
30730 	{
30731 	  /* In the small and medium code models, we assume the TOC is less
30732 	     2 GB away from the text section, so it can be computed via the
30733 	     following two-instruction sequence.  */
30734 	  char buf[256];
30735 
30736 	  ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
30737 	  fprintf (file, "0:\taddis 2,12,.TOC.-");
30738 	  assemble_name (file, buf);
30739 	  fprintf (file, "@ha\n");
30740 	  fprintf (file, "\taddi 2,2,.TOC.-");
30741 	  assemble_name (file, buf);
30742 	  fprintf (file, "@l\n");
30743 	}
30744       else
30745 	{
30746 	  /* In the large code model, we allow arbitrary offsets between the
30747 	     TOC and the text section, so we have to load the offset from
30748 	     memory.  The data field is emitted directly before the global
30749 	     entry point in rs6000_elf_declare_function_name.  */
30750 	  char buf[256];
30751 
30752 #ifdef HAVE_AS_ENTRY_MARKERS
30753 	  /* If supported by the linker, emit a marker relocation.  If the
30754 	     total code size of the final executable or shared library
30755 	     happens to fit into 2 GB after all, the linker will replace
30756 	     this code sequence with the sequence for the small or medium
30757 	     code model.  */
30758 	  fprintf (file, "\t.reloc .,R_PPC64_ENTRY\n");
30759 #endif
30760 	  fprintf (file, "\tld 2,");
30761 	  ASM_GENERATE_INTERNAL_LABEL (buf, "LCL", rs6000_pic_labelno);
30762 	  assemble_name (file, buf);
30763 	  fprintf (file, "-");
30764 	  ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
30765 	  assemble_name (file, buf);
30766 	  fprintf (file, "(12)\n");
30767 	  fprintf (file, "\tadd 2,2,12\n");
30768 	}
30769 
30770       fputs ("\t.localentry\t", file);
30771       assemble_name (file, name);
30772       fputs (",.-", file);
30773       assemble_name (file, name);
30774       fputs ("\n", file);
30775     }
30776 
30777   /* Output -mprofile-kernel code.  This needs to be done here instead of
30778      in output_function_profile since it must go after the ELFv2 ABI
30779      local entry point.  */
30780   if (TARGET_PROFILE_KERNEL && crtl->profile)
30781     {
30782       gcc_assert (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2);
30783       gcc_assert (!TARGET_32BIT);
30784 
30785       asm_fprintf (file, "\tmflr %s\n", reg_names[0]);
30786 
30787       /* In the ELFv2 ABI we have no compiler stack word.  It must be
30788 	 the resposibility of _mcount to preserve the static chain
30789 	 register if required.  */
30790       if (DEFAULT_ABI != ABI_ELFv2
30791 	  && cfun->static_chain_decl != NULL)
30792 	{
30793 	  asm_fprintf (file, "\tstd %s,24(%s)\n",
30794 		       reg_names[STATIC_CHAIN_REGNUM], reg_names[1]);
30795 	  fprintf (file, "\tbl %s\n", RS6000_MCOUNT);
30796 	  asm_fprintf (file, "\tld %s,24(%s)\n",
30797 		       reg_names[STATIC_CHAIN_REGNUM], reg_names[1]);
30798 	}
30799       else
30800 	fprintf (file, "\tbl %s\n", RS6000_MCOUNT);
30801     }
30802 
30803   rs6000_pic_labelno++;
30804 }
30805 
30806 /* -mprofile-kernel code calls mcount before the function prolog,
30807    so a profiled leaf function should stay a leaf function.  */
30808 static bool
rs6000_keep_leaf_when_profiled()30809 rs6000_keep_leaf_when_profiled ()
30810 {
30811   return TARGET_PROFILE_KERNEL;
30812 }
30813 
30814 /* Non-zero if vmx regs are restored before the frame pop, zero if
30815    we restore after the pop when possible.  */
30816 #define ALWAYS_RESTORE_ALTIVEC_BEFORE_POP 0
30817 
30818 /* Restoring cr is a two step process: loading a reg from the frame
30819    save, then moving the reg to cr.  For ABI_V4 we must let the
30820    unwinder know that the stack location is no longer valid at or
30821    before the stack deallocation, but we can't emit a cfa_restore for
30822    cr at the stack deallocation like we do for other registers.
30823    The trouble is that it is possible for the move to cr to be
30824    scheduled after the stack deallocation.  So say exactly where cr
30825    is located on each of the two insns.  */
30826 
30827 static rtx
load_cr_save(int regno,rtx frame_reg_rtx,int offset,bool exit_func)30828 load_cr_save (int regno, rtx frame_reg_rtx, int offset, bool exit_func)
30829 {
30830   rtx mem = gen_frame_mem_offset (SImode, frame_reg_rtx, offset);
30831   rtx reg = gen_rtx_REG (SImode, regno);
30832   rtx_insn *insn = emit_move_insn (reg, mem);
30833 
30834   if (!exit_func && DEFAULT_ABI == ABI_V4)
30835     {
30836       rtx cr = gen_rtx_REG (SImode, CR2_REGNO);
30837       rtx set = gen_rtx_SET (reg, cr);
30838 
30839       add_reg_note (insn, REG_CFA_REGISTER, set);
30840       RTX_FRAME_RELATED_P (insn) = 1;
30841     }
30842   return reg;
30843 }
30844 
30845 /* Reload CR from REG.  */
30846 
30847 static void
restore_saved_cr(rtx reg,int using_mfcr_multiple,bool exit_func)30848 restore_saved_cr (rtx reg, int using_mfcr_multiple, bool exit_func)
30849 {
30850   int count = 0;
30851   int i;
30852 
30853   if (using_mfcr_multiple)
30854     {
30855       for (i = 0; i < 8; i++)
30856 	if (save_reg_p (CR0_REGNO + i))
30857 	  count++;
30858       gcc_assert (count);
30859     }
30860 
30861   if (using_mfcr_multiple && count > 1)
30862     {
30863       rtx_insn *insn;
30864       rtvec p;
30865       int ndx;
30866 
30867       p = rtvec_alloc (count);
30868 
30869       ndx = 0;
30870       for (i = 0; i < 8; i++)
30871 	if (save_reg_p (CR0_REGNO + i))
30872 	  {
30873 	    rtvec r = rtvec_alloc (2);
30874 	    RTVEC_ELT (r, 0) = reg;
30875 	    RTVEC_ELT (r, 1) = GEN_INT (1 << (7-i));
30876 	    RTVEC_ELT (p, ndx) =
30877 	      gen_rtx_SET (gen_rtx_REG (CCmode, CR0_REGNO + i),
30878 			   gen_rtx_UNSPEC (CCmode, r, UNSPEC_MOVESI_TO_CR));
30879 	    ndx++;
30880 	  }
30881       insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
30882       gcc_assert (ndx == count);
30883 
30884       /* For the ELFv2 ABI we generate a CFA_RESTORE for each
30885 	 CR field separately.  */
30886       if (!exit_func && DEFAULT_ABI == ABI_ELFv2 && flag_shrink_wrap)
30887 	{
30888 	  for (i = 0; i < 8; i++)
30889 	    if (save_reg_p (CR0_REGNO + i))
30890 	      add_reg_note (insn, REG_CFA_RESTORE,
30891 			    gen_rtx_REG (SImode, CR0_REGNO + i));
30892 
30893 	  RTX_FRAME_RELATED_P (insn) = 1;
30894 	}
30895     }
30896   else
30897     for (i = 0; i < 8; i++)
30898       if (save_reg_p (CR0_REGNO + i))
30899 	{
30900 	  rtx insn = emit_insn (gen_movsi_to_cr_one
30901 				 (gen_rtx_REG (CCmode, CR0_REGNO + i), reg));
30902 
30903 	  /* For the ELFv2 ABI we generate a CFA_RESTORE for each
30904 	     CR field separately, attached to the insn that in fact
30905 	     restores this particular CR field.  */
30906 	  if (!exit_func && DEFAULT_ABI == ABI_ELFv2 && flag_shrink_wrap)
30907 	    {
30908 	      add_reg_note (insn, REG_CFA_RESTORE,
30909 			    gen_rtx_REG (SImode, CR0_REGNO + i));
30910 
30911 	      RTX_FRAME_RELATED_P (insn) = 1;
30912 	    }
30913 	}
30914 
30915   /* For other ABIs, we just generate a single CFA_RESTORE for CR2.  */
30916   if (!exit_func && DEFAULT_ABI != ABI_ELFv2
30917       && (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap))
30918     {
30919       rtx_insn *insn = get_last_insn ();
30920       rtx cr = gen_rtx_REG (SImode, CR2_REGNO);
30921 
30922       add_reg_note (insn, REG_CFA_RESTORE, cr);
30923       RTX_FRAME_RELATED_P (insn) = 1;
30924     }
30925 }
30926 
30927 /* Like cr, the move to lr instruction can be scheduled after the
30928    stack deallocation, but unlike cr, its stack frame save is still
30929    valid.  So we only need to emit the cfa_restore on the correct
30930    instruction.  */
30931 
30932 static void
load_lr_save(int regno,rtx frame_reg_rtx,int offset)30933 load_lr_save (int regno, rtx frame_reg_rtx, int offset)
30934 {
30935   rtx mem = gen_frame_mem_offset (Pmode, frame_reg_rtx, offset);
30936   rtx reg = gen_rtx_REG (Pmode, regno);
30937 
30938   emit_move_insn (reg, mem);
30939 }
30940 
30941 static void
restore_saved_lr(int regno,bool exit_func)30942 restore_saved_lr (int regno, bool exit_func)
30943 {
30944   rtx reg = gen_rtx_REG (Pmode, regno);
30945   rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
30946   rtx_insn *insn = emit_move_insn (lr, reg);
30947 
30948   if (!exit_func && flag_shrink_wrap)
30949     {
30950       add_reg_note (insn, REG_CFA_RESTORE, lr);
30951       RTX_FRAME_RELATED_P (insn) = 1;
30952     }
30953 }
30954 
30955 static rtx
add_crlr_cfa_restore(const rs6000_stack_t * info,rtx cfa_restores)30956 add_crlr_cfa_restore (const rs6000_stack_t *info, rtx cfa_restores)
30957 {
30958   if (DEFAULT_ABI == ABI_ELFv2)
30959     {
30960       int i;
30961       for (i = 0; i < 8; i++)
30962 	if (save_reg_p (CR0_REGNO + i))
30963 	  {
30964 	    rtx cr = gen_rtx_REG (SImode, CR0_REGNO + i);
30965 	    cfa_restores = alloc_reg_note (REG_CFA_RESTORE, cr,
30966 					   cfa_restores);
30967 	  }
30968     }
30969   else if (info->cr_save_p)
30970     cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
30971 				   gen_rtx_REG (SImode, CR2_REGNO),
30972 				   cfa_restores);
30973 
30974   if (info->lr_save_p)
30975     cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
30976 				   gen_rtx_REG (Pmode, LR_REGNO),
30977 				   cfa_restores);
30978   return cfa_restores;
30979 }
30980 
30981 /* Return true if OFFSET from stack pointer can be clobbered by signals.
30982    V.4 doesn't have any stack cushion, AIX ABIs have 220 or 288 bytes
30983    below stack pointer not cloberred by signals.  */
30984 
30985 static inline bool
offset_below_red_zone_p(HOST_WIDE_INT offset)30986 offset_below_red_zone_p (HOST_WIDE_INT offset)
30987 {
30988   return offset < (DEFAULT_ABI == ABI_V4
30989 		   ? 0
30990 		   : TARGET_32BIT ? -220 : -288);
30991 }
30992 
30993 /* Append CFA_RESTORES to any existing REG_NOTES on the last insn.  */
30994 
30995 static void
emit_cfa_restores(rtx cfa_restores)30996 emit_cfa_restores (rtx cfa_restores)
30997 {
30998   rtx_insn *insn = get_last_insn ();
30999   rtx *loc = &REG_NOTES (insn);
31000 
31001   while (*loc)
31002     loc = &XEXP (*loc, 1);
31003   *loc = cfa_restores;
31004   RTX_FRAME_RELATED_P (insn) = 1;
31005 }
31006 
31007 /* Emit function epilogue as insns.  */
31008 
31009 void
rs6000_emit_epilogue(int sibcall)31010 rs6000_emit_epilogue (int sibcall)
31011 {
31012   rs6000_stack_t *info;
31013   int restoring_GPRs_inline;
31014   int restoring_FPRs_inline;
31015   int using_load_multiple;
31016   int using_mtcr_multiple;
31017   int use_backchain_to_restore_sp;
31018   int restore_lr;
31019   int strategy;
31020   HOST_WIDE_INT frame_off = 0;
31021   rtx sp_reg_rtx = gen_rtx_REG (Pmode, 1);
31022   rtx frame_reg_rtx = sp_reg_rtx;
31023   rtx cfa_restores = NULL_RTX;
31024   rtx insn;
31025   rtx cr_save_reg = NULL_RTX;
31026   machine_mode reg_mode = Pmode;
31027   int reg_size = TARGET_32BIT ? 4 : 8;
31028   machine_mode fp_reg_mode = (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
31029 			     ? DFmode : SFmode;
31030   int fp_reg_size = 8;
31031   int i;
31032   bool exit_func;
31033   unsigned ptr_regno;
31034 
31035   info = rs6000_stack_info ();
31036 
31037   if (TARGET_SPE_ABI && info->spe_64bit_regs_used != 0)
31038     {
31039       reg_mode = V2SImode;
31040       reg_size = 8;
31041     }
31042 
31043   strategy = info->savres_strategy;
31044   using_load_multiple = strategy & REST_MULTIPLE;
31045   restoring_FPRs_inline = sibcall || (strategy & REST_INLINE_FPRS);
31046   restoring_GPRs_inline = sibcall || (strategy & REST_INLINE_GPRS);
31047   using_mtcr_multiple = (rs6000_cpu == PROCESSOR_PPC601
31048 			 || rs6000_cpu == PROCESSOR_PPC603
31049 			 || rs6000_cpu == PROCESSOR_PPC750
31050 			 || optimize_size);
31051   /* Restore via the backchain when we have a large frame, since this
31052      is more efficient than an addis, addi pair.  The second condition
31053      here will not trigger at the moment;  We don't actually need a
31054      frame pointer for alloca, but the generic parts of the compiler
31055      give us one anyway.  */
31056   use_backchain_to_restore_sp = (info->total_size + (info->lr_save_p
31057 						     ? info->lr_save_offset
31058 						     : 0) > 32767
31059 				 || (cfun->calls_alloca
31060 				     && !frame_pointer_needed));
31061   restore_lr = (info->lr_save_p
31062 		&& (restoring_FPRs_inline
31063 		    || (strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR))
31064 		&& (restoring_GPRs_inline
31065 		    || info->first_fp_reg_save < 64)
31066 		&& !cfun->machine->lr_is_wrapped_separately);
31067 
31068 
31069   if (WORLD_SAVE_P (info))
31070     {
31071       int i, j;
31072       char rname[30];
31073       const char *alloc_rname;
31074       rtvec p;
31075 
31076       /* eh_rest_world_r10 will return to the location saved in the LR
31077 	 stack slot (which is not likely to be our caller.)
31078 	 Input: R10 -- stack adjustment.  Clobbers R0, R11, R12, R7, R8.
31079 	 rest_world is similar, except any R10 parameter is ignored.
31080 	 The exception-handling stuff that was here in 2.95 is no
31081 	 longer necessary.  */
31082 
31083       p = rtvec_alloc (9
31084 		       + 32 - info->first_gp_reg_save
31085 		       + LAST_ALTIVEC_REGNO + 1 - info->first_altivec_reg_save
31086 		       + 63 + 1 - info->first_fp_reg_save);
31087 
31088       strcpy (rname, ((crtl->calls_eh_return) ?
31089 		      "*eh_rest_world_r10" : "*rest_world"));
31090       alloc_rname = ggc_strdup (rname);
31091 
31092       j = 0;
31093       RTVEC_ELT (p, j++) = ret_rtx;
31094       RTVEC_ELT (p, j++)
31095 	= gen_rtx_USE (VOIDmode, gen_rtx_SYMBOL_REF (Pmode, alloc_rname));
31096       /* The instruction pattern requires a clobber here;
31097 	 it is shared with the restVEC helper. */
31098       RTVEC_ELT (p, j++)
31099 	= gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, 11));
31100 
31101       {
31102 	/* CR register traditionally saved as CR2.  */
31103 	rtx reg = gen_rtx_REG (SImode, CR2_REGNO);
31104 	RTVEC_ELT (p, j++)
31105 	  = gen_frame_load (reg, frame_reg_rtx, info->cr_save_offset);
31106 	if (flag_shrink_wrap)
31107 	  {
31108 	    cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
31109 					   gen_rtx_REG (Pmode, LR_REGNO),
31110 					   cfa_restores);
31111 	    cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
31112 	  }
31113       }
31114 
31115       for (i = 0; i < 32 - info->first_gp_reg_save; i++)
31116 	{
31117 	  rtx reg = gen_rtx_REG (reg_mode, info->first_gp_reg_save + i);
31118 	  RTVEC_ELT (p, j++)
31119 	    = gen_frame_load (reg,
31120 			      frame_reg_rtx, info->gp_save_offset + reg_size * i);
31121 	  if (flag_shrink_wrap)
31122 	    cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
31123 	}
31124       for (i = 0; info->first_altivec_reg_save + i <= LAST_ALTIVEC_REGNO; i++)
31125 	{
31126 	  rtx reg = gen_rtx_REG (V4SImode, info->first_altivec_reg_save + i);
31127 	  RTVEC_ELT (p, j++)
31128 	    = gen_frame_load (reg,
31129 			      frame_reg_rtx, info->altivec_save_offset + 16 * i);
31130 	  if (flag_shrink_wrap)
31131 	    cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
31132 	}
31133       for (i = 0; info->first_fp_reg_save + i <= 63; i++)
31134 	{
31135 	  rtx reg = gen_rtx_REG ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT
31136 				  ? DFmode : SFmode),
31137 				 info->first_fp_reg_save + i);
31138 	  RTVEC_ELT (p, j++)
31139 	    = gen_frame_load (reg, frame_reg_rtx, info->fp_save_offset + 8 * i);
31140 	  if (flag_shrink_wrap)
31141 	    cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
31142 	}
31143       RTVEC_ELT (p, j++)
31144 	= gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, 0));
31145       RTVEC_ELT (p, j++)
31146 	= gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 12));
31147       RTVEC_ELT (p, j++)
31148 	= gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 7));
31149       RTVEC_ELT (p, j++)
31150 	= gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 8));
31151       RTVEC_ELT (p, j++)
31152 	= gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode, 10));
31153       insn = emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, p));
31154 
31155       if (flag_shrink_wrap)
31156 	{
31157 	  REG_NOTES (insn) = cfa_restores;
31158 	  add_reg_note (insn, REG_CFA_DEF_CFA, sp_reg_rtx);
31159 	  RTX_FRAME_RELATED_P (insn) = 1;
31160 	}
31161       return;
31162     }
31163 
31164   /* frame_reg_rtx + frame_off points to the top of this stack frame.  */
31165   if (info->push_p)
31166     frame_off = info->total_size;
31167 
31168   /* Restore AltiVec registers if we must do so before adjusting the
31169      stack.  */
31170   if (info->altivec_size != 0
31171       && (ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
31172 	  || (DEFAULT_ABI != ABI_V4
31173 	      && offset_below_red_zone_p (info->altivec_save_offset))))
31174     {
31175       int i;
31176       int scratch_regno = ptr_regno_for_savres (SAVRES_VR);
31177 
31178       gcc_checking_assert (scratch_regno == 11 || scratch_regno == 12);
31179       if (use_backchain_to_restore_sp)
31180 	{
31181 	  int frame_regno = 11;
31182 
31183 	  if ((strategy & REST_INLINE_VRS) == 0)
31184 	    {
31185 	      /* Of r11 and r12, select the one not clobbered by an
31186 		 out-of-line restore function for the frame register.  */
31187 	      frame_regno = 11 + 12 - scratch_regno;
31188 	    }
31189 	  frame_reg_rtx = gen_rtx_REG (Pmode, frame_regno);
31190 	  emit_move_insn (frame_reg_rtx,
31191 			  gen_rtx_MEM (Pmode, sp_reg_rtx));
31192 	  frame_off = 0;
31193 	}
31194       else if (frame_pointer_needed)
31195 	frame_reg_rtx = hard_frame_pointer_rtx;
31196 
31197       if ((strategy & REST_INLINE_VRS) == 0)
31198 	{
31199 	  int end_save = info->altivec_save_offset + info->altivec_size;
31200 	  int ptr_off;
31201 	  rtx ptr_reg = gen_rtx_REG (Pmode, 0);
31202 	  rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
31203 
31204 	  if (end_save + frame_off != 0)
31205 	    {
31206 	      rtx offset = GEN_INT (end_save + frame_off);
31207 
31208 	      emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
31209 	    }
31210 	  else
31211 	    emit_move_insn (ptr_reg, frame_reg_rtx);
31212 
31213 	  ptr_off = -end_save;
31214 	  insn = rs6000_emit_savres_rtx (info, scratch_reg,
31215 					 info->altivec_save_offset + ptr_off,
31216 					 0, V4SImode, SAVRES_VR);
31217 	}
31218       else
31219 	{
31220 	  for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
31221 	    if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
31222 	      {
31223 		rtx addr, areg, mem, insn;
31224 		rtx reg = gen_rtx_REG (V4SImode, i);
31225 		HOST_WIDE_INT offset
31226 		  = (info->altivec_save_offset + frame_off
31227 		     + 16 * (i - info->first_altivec_reg_save));
31228 
31229 		if (TARGET_P9_DFORM_VECTOR && quad_address_offset_p (offset))
31230 		  {
31231 		    mem = gen_frame_mem (V4SImode,
31232 					 gen_rtx_PLUS (Pmode, frame_reg_rtx,
31233 						       GEN_INT (offset)));
31234 		    insn = gen_rtx_SET (reg, mem);
31235 		  }
31236 		else
31237 		  {
31238 		    areg = gen_rtx_REG (Pmode, 0);
31239 		    emit_move_insn (areg, GEN_INT (offset));
31240 
31241 		    /* AltiVec addressing mode is [reg+reg].  */
31242 		    addr = gen_rtx_PLUS (Pmode, frame_reg_rtx, areg);
31243 		    mem = gen_frame_mem (V4SImode, addr);
31244 
31245 		    /* Rather than emitting a generic move, force use of the
31246 		       lvx instruction, which we always want.  In particular we
31247 		       don't want lxvd2x/xxpermdi for little endian.  */
31248 		    insn = gen_altivec_lvx_v4si_internal (reg, mem);
31249 		  }
31250 
31251 		(void) emit_insn (insn);
31252 	      }
31253 	}
31254 
31255       for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
31256 	if (((strategy & REST_INLINE_VRS) == 0
31257 	     || (info->vrsave_mask & ALTIVEC_REG_BIT (i)) != 0)
31258 	    && (flag_shrink_wrap
31259 		|| (offset_below_red_zone_p
31260 		    (info->altivec_save_offset
31261 		     + 16 * (i - info->first_altivec_reg_save)))))
31262 	  {
31263 	    rtx reg = gen_rtx_REG (V4SImode, i);
31264 	    cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
31265 	  }
31266     }
31267 
31268   /* Restore VRSAVE if we must do so before adjusting the stack.  */
31269   if (info->vrsave_size != 0
31270       && (ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
31271 	  || (DEFAULT_ABI != ABI_V4
31272 	      && offset_below_red_zone_p (info->vrsave_save_offset))))
31273     {
31274       rtx reg;
31275 
31276       if (frame_reg_rtx == sp_reg_rtx)
31277 	{
31278 	  if (use_backchain_to_restore_sp)
31279 	    {
31280 	      frame_reg_rtx = gen_rtx_REG (Pmode, 11);
31281 	      emit_move_insn (frame_reg_rtx,
31282 			      gen_rtx_MEM (Pmode, sp_reg_rtx));
31283 	      frame_off = 0;
31284 	    }
31285 	  else if (frame_pointer_needed)
31286 	    frame_reg_rtx = hard_frame_pointer_rtx;
31287 	}
31288 
31289       reg = gen_rtx_REG (SImode, 12);
31290       emit_insn (gen_frame_load (reg, frame_reg_rtx,
31291 				 info->vrsave_save_offset + frame_off));
31292 
31293       emit_insn (generate_set_vrsave (reg, info, 1));
31294     }
31295 
31296   insn = NULL_RTX;
31297   /* If we have a large stack frame, restore the old stack pointer
31298      using the backchain.  */
31299   if (use_backchain_to_restore_sp)
31300     {
31301       if (frame_reg_rtx == sp_reg_rtx)
31302 	{
31303 	  /* Under V.4, don't reset the stack pointer until after we're done
31304 	     loading the saved registers.  */
31305 	  if (DEFAULT_ABI == ABI_V4)
31306 	    frame_reg_rtx = gen_rtx_REG (Pmode, 11);
31307 
31308 	  insn = emit_move_insn (frame_reg_rtx,
31309 				 gen_rtx_MEM (Pmode, sp_reg_rtx));
31310 	  frame_off = 0;
31311 	}
31312       else if (ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
31313 	       && DEFAULT_ABI == ABI_V4)
31314 	/* frame_reg_rtx has been set up by the altivec restore.  */
31315 	;
31316       else
31317 	{
31318 	  insn = emit_move_insn (sp_reg_rtx, frame_reg_rtx);
31319 	  frame_reg_rtx = sp_reg_rtx;
31320 	}
31321     }
31322   /* If we have a frame pointer, we can restore the old stack pointer
31323      from it.  */
31324   else if (frame_pointer_needed)
31325     {
31326       frame_reg_rtx = sp_reg_rtx;
31327       if (DEFAULT_ABI == ABI_V4)
31328 	frame_reg_rtx = gen_rtx_REG (Pmode, 11);
31329       /* Prevent reordering memory accesses against stack pointer restore.  */
31330       else if (cfun->calls_alloca
31331 	       || offset_below_red_zone_p (-info->total_size))
31332 	rs6000_emit_stack_tie (frame_reg_rtx, true);
31333 
31334       insn = emit_insn (gen_add3_insn (frame_reg_rtx, hard_frame_pointer_rtx,
31335 				       GEN_INT (info->total_size)));
31336       frame_off = 0;
31337     }
31338   else if (info->push_p
31339 	   && DEFAULT_ABI != ABI_V4
31340 	   && !crtl->calls_eh_return)
31341     {
31342       /* Prevent reordering memory accesses against stack pointer restore.  */
31343       if (cfun->calls_alloca
31344 	  || offset_below_red_zone_p (-info->total_size))
31345 	rs6000_emit_stack_tie (frame_reg_rtx, false);
31346       insn = emit_insn (gen_add3_insn (sp_reg_rtx, sp_reg_rtx,
31347 				       GEN_INT (info->total_size)));
31348       frame_off = 0;
31349     }
31350   if (insn && frame_reg_rtx == sp_reg_rtx)
31351     {
31352       if (cfa_restores)
31353 	{
31354 	  REG_NOTES (insn) = cfa_restores;
31355 	  cfa_restores = NULL_RTX;
31356 	}
31357       add_reg_note (insn, REG_CFA_DEF_CFA, sp_reg_rtx);
31358       RTX_FRAME_RELATED_P (insn) = 1;
31359     }
31360 
31361   /* Restore AltiVec registers if we have not done so already.  */
31362   if (!ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
31363       && info->altivec_size != 0
31364       && (DEFAULT_ABI == ABI_V4
31365 	  || !offset_below_red_zone_p (info->altivec_save_offset)))
31366     {
31367       int i;
31368 
31369       if ((strategy & REST_INLINE_VRS) == 0)
31370 	{
31371 	  int end_save = info->altivec_save_offset + info->altivec_size;
31372 	  int ptr_off;
31373 	  rtx ptr_reg = gen_rtx_REG (Pmode, 0);
31374 	  int scratch_regno = ptr_regno_for_savres (SAVRES_VR);
31375 	  rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
31376 
31377 	  if (end_save + frame_off != 0)
31378 	    {
31379 	      rtx offset = GEN_INT (end_save + frame_off);
31380 
31381 	      emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
31382 	    }
31383 	  else
31384 	    emit_move_insn (ptr_reg, frame_reg_rtx);
31385 
31386 	  ptr_off = -end_save;
31387 	  insn = rs6000_emit_savres_rtx (info, scratch_reg,
31388 					 info->altivec_save_offset + ptr_off,
31389 					 0, V4SImode, SAVRES_VR);
31390 	  if (REGNO (frame_reg_rtx) == REGNO (scratch_reg))
31391 	    {
31392 	      /* Frame reg was clobbered by out-of-line save.  Restore it
31393 		 from ptr_reg, and if we are calling out-of-line gpr or
31394 		 fpr restore set up the correct pointer and offset.  */
31395 	      unsigned newptr_regno = 1;
31396 	      if (!restoring_GPRs_inline)
31397 		{
31398 		  bool lr = info->gp_save_offset + info->gp_size == 0;
31399 		  int sel = SAVRES_GPR | (lr ? SAVRES_LR : 0);
31400 		  newptr_regno = ptr_regno_for_savres (sel);
31401 		  end_save = info->gp_save_offset + info->gp_size;
31402 		}
31403 	      else if (!restoring_FPRs_inline)
31404 		{
31405 		  bool lr = !(strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR);
31406 		  int sel = SAVRES_FPR | (lr ? SAVRES_LR : 0);
31407 		  newptr_regno = ptr_regno_for_savres (sel);
31408 		  end_save = info->fp_save_offset + info->fp_size;
31409 		}
31410 
31411 	      if (newptr_regno != 1 && REGNO (frame_reg_rtx) != newptr_regno)
31412 		frame_reg_rtx = gen_rtx_REG (Pmode, newptr_regno);
31413 
31414 	      if (end_save + ptr_off != 0)
31415 		{
31416 		  rtx offset = GEN_INT (end_save + ptr_off);
31417 
31418 		  frame_off = -end_save;
31419 		  if (TARGET_32BIT)
31420 		    emit_insn (gen_addsi3_carry (frame_reg_rtx,
31421 						 ptr_reg, offset));
31422 		  else
31423 		    emit_insn (gen_adddi3_carry (frame_reg_rtx,
31424 						 ptr_reg, offset));
31425 		}
31426 	      else
31427 		{
31428 		  frame_off = ptr_off;
31429 		  emit_move_insn (frame_reg_rtx, ptr_reg);
31430 		}
31431 	    }
31432 	}
31433       else
31434 	{
31435 	  for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
31436 	    if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
31437 	      {
31438 		rtx addr, areg, mem, insn;
31439 		rtx reg = gen_rtx_REG (V4SImode, i);
31440 		HOST_WIDE_INT offset
31441 		  = (info->altivec_save_offset + frame_off
31442 		     + 16 * (i - info->first_altivec_reg_save));
31443 
31444 		if (TARGET_P9_DFORM_VECTOR && quad_address_offset_p (offset))
31445 		  {
31446 		    mem = gen_frame_mem (V4SImode,
31447 					 gen_rtx_PLUS (Pmode, frame_reg_rtx,
31448 						       GEN_INT (offset)));
31449 		    insn = gen_rtx_SET (reg, mem);
31450 		  }
31451 		else
31452 		  {
31453 		    areg = gen_rtx_REG (Pmode, 0);
31454 		    emit_move_insn (areg, GEN_INT (offset));
31455 
31456 		    /* AltiVec addressing mode is [reg+reg].  */
31457 		    addr = gen_rtx_PLUS (Pmode, frame_reg_rtx, areg);
31458 		    mem = gen_frame_mem (V4SImode, addr);
31459 
31460 		    /* Rather than emitting a generic move, force use of the
31461 		       lvx instruction, which we always want.  In particular we
31462 		       don't want lxvd2x/xxpermdi for little endian.  */
31463 		    insn = gen_altivec_lvx_v4si_internal (reg, mem);
31464 		  }
31465 
31466 		(void) emit_insn (insn);
31467 	      }
31468 	}
31469 
31470       for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
31471 	if (((strategy & REST_INLINE_VRS) == 0
31472 	     || (info->vrsave_mask & ALTIVEC_REG_BIT (i)) != 0)
31473 	    && (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap))
31474 	  {
31475 	    rtx reg = gen_rtx_REG (V4SImode, i);
31476 	    cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
31477 	  }
31478     }
31479 
31480   /* Restore VRSAVE if we have not done so already.  */
31481   if (!ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
31482       && info->vrsave_size != 0
31483       && (DEFAULT_ABI == ABI_V4
31484 	  || !offset_below_red_zone_p (info->vrsave_save_offset)))
31485     {
31486       rtx reg;
31487 
31488       reg = gen_rtx_REG (SImode, 12);
31489       emit_insn (gen_frame_load (reg, frame_reg_rtx,
31490 				 info->vrsave_save_offset + frame_off));
31491 
31492       emit_insn (generate_set_vrsave (reg, info, 1));
31493     }
31494 
31495   /* If we exit by an out-of-line restore function on ABI_V4 then that
31496      function will deallocate the stack, so we don't need to worry
31497      about the unwinder restoring cr from an invalid stack frame
31498      location.  */
31499   exit_func = (!restoring_FPRs_inline
31500 	       || (!restoring_GPRs_inline
31501 		   && info->first_fp_reg_save == 64));
31502 
31503   /* In the ELFv2 ABI we need to restore all call-saved CR fields from
31504      *separate* slots if the routine calls __builtin_eh_return, so
31505      that they can be independently restored by the unwinder.  */
31506   if (DEFAULT_ABI == ABI_ELFv2 && crtl->calls_eh_return)
31507     {
31508       int i, cr_off = info->ehcr_offset;
31509 
31510       for (i = 0; i < 8; i++)
31511 	if (!call_used_regs[CR0_REGNO + i])
31512 	  {
31513 	    rtx reg = gen_rtx_REG (SImode, 0);
31514 	    emit_insn (gen_frame_load (reg, frame_reg_rtx,
31515 				       cr_off + frame_off));
31516 
31517 	    insn = emit_insn (gen_movsi_to_cr_one
31518 				(gen_rtx_REG (CCmode, CR0_REGNO + i), reg));
31519 
31520 	    if (!exit_func && flag_shrink_wrap)
31521 	      {
31522 		add_reg_note (insn, REG_CFA_RESTORE,
31523 			      gen_rtx_REG (SImode, CR0_REGNO + i));
31524 
31525 		RTX_FRAME_RELATED_P (insn) = 1;
31526 	      }
31527 
31528 	    cr_off += reg_size;
31529 	  }
31530     }
31531 
31532   /* Get the old lr if we saved it.  If we are restoring registers
31533      out-of-line, then the out-of-line routines can do this for us.  */
31534   if (restore_lr && restoring_GPRs_inline)
31535     load_lr_save (0, frame_reg_rtx, info->lr_save_offset + frame_off);
31536 
31537   /* Get the old cr if we saved it.  */
31538   if (info->cr_save_p)
31539     {
31540       unsigned cr_save_regno = 12;
31541 
31542       if (!restoring_GPRs_inline)
31543 	{
31544 	  /* Ensure we don't use the register used by the out-of-line
31545 	     gpr register restore below.  */
31546 	  bool lr = info->gp_save_offset + info->gp_size == 0;
31547 	  int sel = SAVRES_GPR | (lr ? SAVRES_LR : 0);
31548 	  int gpr_ptr_regno = ptr_regno_for_savres (sel);
31549 
31550 	  if (gpr_ptr_regno == 12)
31551 	    cr_save_regno = 11;
31552 	  gcc_checking_assert (REGNO (frame_reg_rtx) != cr_save_regno);
31553 	}
31554       else if (REGNO (frame_reg_rtx) == 12)
31555 	cr_save_regno = 11;
31556 
31557       cr_save_reg = load_cr_save (cr_save_regno, frame_reg_rtx,
31558 				  info->cr_save_offset + frame_off,
31559 				  exit_func);
31560     }
31561 
31562   /* Set LR here to try to overlap restores below.  */
31563   if (restore_lr && restoring_GPRs_inline)
31564     restore_saved_lr (0, exit_func);
31565 
31566   /* Load exception handler data registers, if needed.  */
31567   if (crtl->calls_eh_return)
31568     {
31569       unsigned int i, regno;
31570 
31571       if (TARGET_AIX)
31572 	{
31573 	  rtx reg = gen_rtx_REG (reg_mode, 2);
31574 	  emit_insn (gen_frame_load (reg, frame_reg_rtx,
31575 				     frame_off + RS6000_TOC_SAVE_SLOT));
31576 	}
31577 
31578       for (i = 0; ; ++i)
31579 	{
31580 	  rtx mem;
31581 
31582 	  regno = EH_RETURN_DATA_REGNO (i);
31583 	  if (regno == INVALID_REGNUM)
31584 	    break;
31585 
31586 	  /* Note: possible use of r0 here to address SPE regs.  */
31587 	  mem = gen_frame_mem_offset (reg_mode, frame_reg_rtx,
31588 				      info->ehrd_offset + frame_off
31589 				      + reg_size * (int) i);
31590 
31591 	  emit_move_insn (gen_rtx_REG (reg_mode, regno), mem);
31592 	}
31593     }
31594 
31595   /* Restore GPRs.  This is done as a PARALLEL if we are using
31596      the load-multiple instructions.  */
31597   if (TARGET_SPE_ABI
31598       && info->spe_64bit_regs_used
31599       && info->first_gp_reg_save != 32)
31600     {
31601       /* Determine whether we can address all of the registers that need
31602 	 to be saved with an offset from frame_reg_rtx that fits in
31603 	 the small const field for SPE memory instructions.  */
31604       int spe_regs_addressable
31605 	= (SPE_CONST_OFFSET_OK (info->spe_gp_save_offset + frame_off
31606 				+ reg_size * (32 - info->first_gp_reg_save - 1))
31607 	   && restoring_GPRs_inline);
31608 
31609       if (!spe_regs_addressable)
31610 	{
31611 	  int ool_adjust = 0;
31612 	  rtx old_frame_reg_rtx = frame_reg_rtx;
31613 	  /* Make r11 point to the start of the SPE save area.  We worried about
31614 	     not clobbering it when we were saving registers in the prologue.
31615 	     There's no need to worry here because the static chain is passed
31616 	     anew to every function.  */
31617 
31618 	  if (!restoring_GPRs_inline)
31619 	    ool_adjust = 8 * (info->first_gp_reg_save - FIRST_SAVED_GP_REGNO);
31620 	  frame_reg_rtx = gen_rtx_REG (Pmode, 11);
31621 	  emit_insn (gen_addsi3 (frame_reg_rtx, old_frame_reg_rtx,
31622 				 GEN_INT (info->spe_gp_save_offset
31623 					  + frame_off
31624 					  - ool_adjust)));
31625 	  /* Keep the invariant that frame_reg_rtx + frame_off points
31626 	     at the top of the stack frame.  */
31627 	  frame_off = -info->spe_gp_save_offset + ool_adjust;
31628 	}
31629 
31630       if (restoring_GPRs_inline)
31631 	{
31632 	  HOST_WIDE_INT spe_offset = info->spe_gp_save_offset + frame_off;
31633 
31634 	  for (i = 0; i < 32 - info->first_gp_reg_save; i++)
31635 	    if (rs6000_reg_live_or_pic_offset_p (info->first_gp_reg_save + i))
31636 	      {
31637 		rtx offset, addr, mem, reg;
31638 
31639 		/* We're doing all this to ensure that the immediate offset
31640 		   fits into the immediate field of 'evldd'.  */
31641 		gcc_assert (SPE_CONST_OFFSET_OK (spe_offset + reg_size * i));
31642 
31643 		offset = GEN_INT (spe_offset + reg_size * i);
31644 		addr = gen_rtx_PLUS (Pmode, frame_reg_rtx, offset);
31645 		mem = gen_rtx_MEM (V2SImode, addr);
31646 		reg = gen_rtx_REG (reg_mode, info->first_gp_reg_save + i);
31647 
31648 		emit_move_insn (reg, mem);
31649 	      }
31650 	}
31651       else
31652 	rs6000_emit_savres_rtx (info, frame_reg_rtx,
31653 				info->spe_gp_save_offset + frame_off,
31654 				info->lr_save_offset + frame_off,
31655 				reg_mode,
31656 				SAVRES_GPR | SAVRES_LR);
31657     }
31658   else if (!restoring_GPRs_inline)
31659     {
31660       /* We are jumping to an out-of-line function.  */
31661       rtx ptr_reg;
31662       int end_save = info->gp_save_offset + info->gp_size;
31663       bool can_use_exit = end_save == 0;
31664       int sel = SAVRES_GPR | (can_use_exit ? SAVRES_LR : 0);
31665       int ptr_off;
31666 
31667       /* Emit stack reset code if we need it.  */
31668       ptr_regno = ptr_regno_for_savres (sel);
31669       ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
31670       if (can_use_exit)
31671 	rs6000_emit_stack_reset (info, frame_reg_rtx, frame_off, ptr_regno);
31672       else if (end_save + frame_off != 0)
31673 	emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx,
31674 				  GEN_INT (end_save + frame_off)));
31675       else if (REGNO (frame_reg_rtx) != ptr_regno)
31676 	emit_move_insn (ptr_reg, frame_reg_rtx);
31677       if (REGNO (frame_reg_rtx) == ptr_regno)
31678 	frame_off = -end_save;
31679 
31680       if (can_use_exit && info->cr_save_p)
31681 	restore_saved_cr (cr_save_reg, using_mtcr_multiple, true);
31682 
31683       ptr_off = -end_save;
31684       rs6000_emit_savres_rtx (info, ptr_reg,
31685 			      info->gp_save_offset + ptr_off,
31686 			      info->lr_save_offset + ptr_off,
31687 			      reg_mode, sel);
31688     }
31689   else if (using_load_multiple)
31690     {
31691       rtvec p;
31692       p = rtvec_alloc (32 - info->first_gp_reg_save);
31693       for (i = 0; i < 32 - info->first_gp_reg_save; i++)
31694 	RTVEC_ELT (p, i)
31695 	  = gen_frame_load (gen_rtx_REG (reg_mode, info->first_gp_reg_save + i),
31696 			    frame_reg_rtx,
31697 			    info->gp_save_offset + frame_off + reg_size * i);
31698       emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
31699     }
31700   else
31701     {
31702       int offset = info->gp_save_offset + frame_off;
31703       for (i = info->first_gp_reg_save; i < 32; i++)
31704 	{
31705 	  if (rs6000_reg_live_or_pic_offset_p (i)
31706 	      && !cfun->machine->gpr_is_wrapped_separately[i])
31707 	    {
31708 	      rtx reg = gen_rtx_REG (reg_mode, i);
31709 	      emit_insn (gen_frame_load (reg, frame_reg_rtx, offset));
31710 	    }
31711 
31712 	  offset += reg_size;
31713 	}
31714     }
31715 
31716   if (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap)
31717     {
31718       /* If the frame pointer was used then we can't delay emitting
31719 	 a REG_CFA_DEF_CFA note.  This must happen on the insn that
31720 	 restores the frame pointer, r31.  We may have already emitted
31721 	 a REG_CFA_DEF_CFA note, but that's OK;  A duplicate is
31722 	 discarded by dwarf2cfi.c/dwarf2out.c, and in any case would
31723 	 be harmless if emitted.  */
31724       if (frame_pointer_needed)
31725 	{
31726 	  insn = get_last_insn ();
31727 	  add_reg_note (insn, REG_CFA_DEF_CFA,
31728 			plus_constant (Pmode, frame_reg_rtx, frame_off));
31729 	  RTX_FRAME_RELATED_P (insn) = 1;
31730 	}
31731 
31732       /* Set up cfa_restores.  We always need these when
31733 	 shrink-wrapping.  If not shrink-wrapping then we only need
31734 	 the cfa_restore when the stack location is no longer valid.
31735 	 The cfa_restores must be emitted on or before the insn that
31736 	 invalidates the stack, and of course must not be emitted
31737 	 before the insn that actually does the restore.  The latter
31738 	 is why it is a bad idea to emit the cfa_restores as a group
31739 	 on the last instruction here that actually does a restore:
31740 	 That insn may be reordered with respect to others doing
31741 	 restores.  */
31742       if (flag_shrink_wrap
31743 	  && !restoring_GPRs_inline
31744 	  && info->first_fp_reg_save == 64)
31745 	cfa_restores = add_crlr_cfa_restore (info, cfa_restores);
31746 
31747       for (i = info->first_gp_reg_save; i < 32; i++)
31748 	if (!restoring_GPRs_inline
31749 	    || using_load_multiple
31750 	    || rs6000_reg_live_or_pic_offset_p (i))
31751 	  {
31752 	    if (cfun->machine->gpr_is_wrapped_separately[i])
31753 	      continue;
31754 
31755 	    rtx reg = gen_rtx_REG (reg_mode, i);
31756 	    cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
31757 	  }
31758     }
31759 
31760   if (!restoring_GPRs_inline
31761       && info->first_fp_reg_save == 64)
31762     {
31763       /* We are jumping to an out-of-line function.  */
31764       if (cfa_restores)
31765 	emit_cfa_restores (cfa_restores);
31766       return;
31767     }
31768 
31769   if (restore_lr && !restoring_GPRs_inline)
31770     {
31771       load_lr_save (0, frame_reg_rtx, info->lr_save_offset + frame_off);
31772       restore_saved_lr (0, exit_func);
31773     }
31774 
31775   /* Restore fpr's if we need to do it without calling a function.  */
31776   if (restoring_FPRs_inline)
31777     {
31778       int offset = info->fp_save_offset + frame_off;
31779       for (i = info->first_fp_reg_save; i < 64; i++)
31780 	{
31781 	  if (save_reg_p (i)
31782 	      && !cfun->machine->fpr_is_wrapped_separately[i - 32])
31783 	    {
31784 	      rtx reg = gen_rtx_REG (fp_reg_mode, i);
31785 	      emit_insn (gen_frame_load (reg, frame_reg_rtx, offset));
31786 	      if (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap)
31787 		cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg,
31788 					       cfa_restores);
31789 	    }
31790 
31791 	  offset += fp_reg_size;
31792 	}
31793     }
31794 
31795   /* If we saved cr, restore it here.  Just those that were used.  */
31796   if (info->cr_save_p)
31797     restore_saved_cr (cr_save_reg, using_mtcr_multiple, exit_func);
31798 
31799   /* If this is V.4, unwind the stack pointer after all of the loads
31800      have been done, or set up r11 if we are restoring fp out of line.  */
31801   ptr_regno = 1;
31802   if (!restoring_FPRs_inline)
31803     {
31804       bool lr = (strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR) == 0;
31805       int sel = SAVRES_FPR | (lr ? SAVRES_LR : 0);
31806       ptr_regno = ptr_regno_for_savres (sel);
31807     }
31808 
31809   insn = rs6000_emit_stack_reset (info, frame_reg_rtx, frame_off, ptr_regno);
31810   if (REGNO (frame_reg_rtx) == ptr_regno)
31811     frame_off = 0;
31812 
31813   if (insn && restoring_FPRs_inline)
31814     {
31815       if (cfa_restores)
31816 	{
31817 	  REG_NOTES (insn) = cfa_restores;
31818 	  cfa_restores = NULL_RTX;
31819 	}
31820       add_reg_note (insn, REG_CFA_DEF_CFA, sp_reg_rtx);
31821       RTX_FRAME_RELATED_P (insn) = 1;
31822     }
31823 
31824   if (crtl->calls_eh_return)
31825     {
31826       rtx sa = EH_RETURN_STACKADJ_RTX;
31827       emit_insn (gen_add3_insn (sp_reg_rtx, sp_reg_rtx, sa));
31828     }
31829 
31830   if (!sibcall && restoring_FPRs_inline)
31831     {
31832       if (cfa_restores)
31833 	{
31834 	  /* We can't hang the cfa_restores off a simple return,
31835 	     since the shrink-wrap code sometimes uses an existing
31836 	     return.  This means there might be a path from
31837 	     pre-prologue code to this return, and dwarf2cfi code
31838 	     wants the eh_frame unwinder state to be the same on
31839 	     all paths to any point.  So we need to emit the
31840 	     cfa_restores before the return.  For -m64 we really
31841 	     don't need epilogue cfa_restores at all, except for
31842 	     this irritating dwarf2cfi with shrink-wrap
31843 	     requirement;  The stack red-zone means eh_frame info
31844 	     from the prologue telling the unwinder to restore
31845 	     from the stack is perfectly good right to the end of
31846 	     the function.  */
31847 	  emit_insn (gen_blockage ());
31848 	  emit_cfa_restores (cfa_restores);
31849 	  cfa_restores = NULL_RTX;
31850 	}
31851 
31852       emit_jump_insn (targetm.gen_simple_return ());
31853     }
31854 
31855   if (!sibcall && !restoring_FPRs_inline)
31856     {
31857       bool lr = (strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR) == 0;
31858       rtvec p = rtvec_alloc (3 + !!lr + 64 - info->first_fp_reg_save);
31859       int elt = 0;
31860       RTVEC_ELT (p, elt++) = ret_rtx;
31861       if (lr)
31862 	RTVEC_ELT (p, elt++)
31863 	  = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, LR_REGNO));
31864 
31865       /* We have to restore more than two FP registers, so branch to the
31866 	 restore function.  It will return to our caller.  */
31867       int i;
31868       int reg;
31869       rtx sym;
31870 
31871       if (flag_shrink_wrap)
31872 	cfa_restores = add_crlr_cfa_restore (info, cfa_restores);
31873 
31874       sym = rs6000_savres_routine_sym (info, SAVRES_FPR | (lr ? SAVRES_LR : 0));
31875       RTVEC_ELT (p, elt++) = gen_rtx_USE (VOIDmode, sym);
31876       reg = (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)? 1 : 11;
31877       RTVEC_ELT (p, elt++) = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, reg));
31878 
31879       for (i = 0; i < 64 - info->first_fp_reg_save; i++)
31880 	{
31881 	  rtx reg = gen_rtx_REG (DFmode, info->first_fp_reg_save + i);
31882 
31883 	  RTVEC_ELT (p, elt++)
31884 	    = gen_frame_load (reg, sp_reg_rtx, info->fp_save_offset + 8 * i);
31885 	  if (flag_shrink_wrap)
31886 	    cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
31887 	}
31888 
31889       emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, p));
31890     }
31891 
31892   if (cfa_restores)
31893     {
31894       if (sibcall)
31895 	/* Ensure the cfa_restores are hung off an insn that won't
31896 	   be reordered above other restores.  */
31897 	emit_insn (gen_blockage ());
31898 
31899       emit_cfa_restores (cfa_restores);
31900     }
31901 }
31902 
31903 /* Write function epilogue.  */
31904 
31905 static void
rs6000_output_function_epilogue(FILE * file)31906 rs6000_output_function_epilogue (FILE *file)
31907 {
31908 #if TARGET_MACHO
31909   macho_branch_islands ();
31910 
31911   {
31912     rtx_insn *insn = get_last_insn ();
31913     rtx_insn *deleted_debug_label = NULL;
31914 
31915     /* Mach-O doesn't support labels at the end of objects, so if
31916        it looks like we might want one, take special action.
31917 
31918        First, collect any sequence of deleted debug labels.  */
31919     while (insn
31920 	   && NOTE_P (insn)
31921 	   && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
31922       {
31923 	/* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
31924 	   notes only, instead set their CODE_LABEL_NUMBER to -1,
31925 	   otherwise there would be code generation differences
31926 	   in between -g and -g0.  */
31927 	if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
31928 	  deleted_debug_label = insn;
31929 	insn = PREV_INSN (insn);
31930       }
31931 
31932     /* Second, if we have:
31933        label:
31934 	 barrier
31935        then this needs to be detected, so skip past the barrier.  */
31936 
31937     if (insn && BARRIER_P (insn))
31938       insn = PREV_INSN (insn);
31939 
31940     /* Up to now we've only seen notes or barriers.  */
31941     if (insn)
31942       {
31943 	if (LABEL_P (insn)
31944 	    || (NOTE_P (insn)
31945 		&& NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL))
31946 	  /* Trailing label: <barrier>.  */
31947 	  fputs ("\tnop\n", file);
31948 	else
31949 	  {
31950 	    /* Lastly, see if we have a completely empty function body.  */
31951 	    while (insn && ! INSN_P (insn))
31952 	      insn = PREV_INSN (insn);
31953 	    /* If we don't find any insns, we've got an empty function body;
31954 	       I.e. completely empty - without a return or branch.  This is
31955 	       taken as the case where a function body has been removed
31956 	       because it contains an inline __builtin_unreachable().  GCC
31957 	       states that reaching __builtin_unreachable() means UB so we're
31958 	       not obliged to do anything special; however, we want
31959 	       non-zero-sized function bodies.  To meet this, and help the
31960 	       user out, let's trap the case.  */
31961 	    if (insn == NULL)
31962 	      fputs ("\ttrap\n", file);
31963 	  }
31964       }
31965     else if (deleted_debug_label)
31966       for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn))
31967 	if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
31968 	  CODE_LABEL_NUMBER (insn) = -1;
31969   }
31970 #endif
31971 
31972   /* Output a traceback table here.  See /usr/include/sys/debug.h for info
31973      on its format.
31974 
31975      We don't output a traceback table if -finhibit-size-directive was
31976      used.  The documentation for -finhibit-size-directive reads
31977      ``don't output a @code{.size} assembler directive, or anything
31978      else that would cause trouble if the function is split in the
31979      middle, and the two halves are placed at locations far apart in
31980      memory.''  The traceback table has this property, since it
31981      includes the offset from the start of the function to the
31982      traceback table itself.
31983 
31984      System V.4 Powerpc's (and the embedded ABI derived from it) use a
31985      different traceback table.  */
31986   if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
31987       && ! flag_inhibit_size_directive
31988       && rs6000_traceback != traceback_none && !cfun->is_thunk)
31989     {
31990       const char *fname = NULL;
31991       const char *language_string = lang_hooks.name;
31992       int fixed_parms = 0, float_parms = 0, parm_info = 0;
31993       int i;
31994       int optional_tbtab;
31995       rs6000_stack_t *info = rs6000_stack_info ();
31996 
31997       if (rs6000_traceback == traceback_full)
31998 	optional_tbtab = 1;
31999       else if (rs6000_traceback == traceback_part)
32000 	optional_tbtab = 0;
32001       else
32002 	optional_tbtab = !optimize_size && !TARGET_ELF;
32003 
32004       if (optional_tbtab)
32005 	{
32006 	  fname = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
32007 	  while (*fname == '.')	/* V.4 encodes . in the name */
32008 	    fname++;
32009 
32010 	  /* Need label immediately before tbtab, so we can compute
32011 	     its offset from the function start.  */
32012 	  ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LT");
32013 	  ASM_OUTPUT_LABEL (file, fname);
32014 	}
32015 
32016       /* The .tbtab pseudo-op can only be used for the first eight
32017 	 expressions, since it can't handle the possibly variable
32018 	 length fields that follow.  However, if you omit the optional
32019 	 fields, the assembler outputs zeros for all optional fields
32020 	 anyways, giving each variable length field is minimum length
32021 	 (as defined in sys/debug.h).  Thus we can not use the .tbtab
32022 	 pseudo-op at all.  */
32023 
32024       /* An all-zero word flags the start of the tbtab, for debuggers
32025 	 that have to find it by searching forward from the entry
32026 	 point or from the current pc.  */
32027       fputs ("\t.long 0\n", file);
32028 
32029       /* Tbtab format type.  Use format type 0.  */
32030       fputs ("\t.byte 0,", file);
32031 
32032       /* Language type.  Unfortunately, there does not seem to be any
32033 	 official way to discover the language being compiled, so we
32034 	 use language_string.
32035 	 C is 0.  Fortran is 1.  Pascal is 2.  Ada is 3.  C++ is 9.
32036 	 Java is 13.  Objective-C is 14.  Objective-C++ isn't assigned
32037 	 a number, so for now use 9.  LTO, Go and JIT aren't assigned numbers
32038 	 either, so for now use 0.  */
32039       if (lang_GNU_C ()
32040 	  || ! strcmp (language_string, "GNU GIMPLE")
32041 	  || ! strcmp (language_string, "GNU Go")
32042 	  || ! strcmp (language_string, "libgccjit"))
32043 	i = 0;
32044       else if (! strcmp (language_string, "GNU F77")
32045 	       || lang_GNU_Fortran ())
32046 	i = 1;
32047       else if (! strcmp (language_string, "GNU Pascal"))
32048 	i = 2;
32049       else if (! strcmp (language_string, "GNU Ada"))
32050 	i = 3;
32051       else if (lang_GNU_CXX ()
32052 	       || ! strcmp (language_string, "GNU Objective-C++"))
32053 	i = 9;
32054       else if (! strcmp (language_string, "GNU Java"))
32055 	i = 13;
32056       else if (! strcmp (language_string, "GNU Objective-C"))
32057 	i = 14;
32058       else
32059 	gcc_unreachable ();
32060       fprintf (file, "%d,", i);
32061 
32062       /* 8 single bit fields: global linkage (not set for C extern linkage,
32063 	 apparently a PL/I convention?), out-of-line epilogue/prologue, offset
32064 	 from start of procedure stored in tbtab, internal function, function
32065 	 has controlled storage, function has no toc, function uses fp,
32066 	 function logs/aborts fp operations.  */
32067       /* Assume that fp operations are used if any fp reg must be saved.  */
32068       fprintf (file, "%d,",
32069 	       (optional_tbtab << 5) | ((info->first_fp_reg_save != 64) << 1));
32070 
32071       /* 6 bitfields: function is interrupt handler, name present in
32072 	 proc table, function calls alloca, on condition directives
32073 	 (controls stack walks, 3 bits), saves condition reg, saves
32074 	 link reg.  */
32075       /* The `function calls alloca' bit seems to be set whenever reg 31 is
32076 	 set up as a frame pointer, even when there is no alloca call.  */
32077       fprintf (file, "%d,",
32078 	       ((optional_tbtab << 6)
32079 		| ((optional_tbtab & frame_pointer_needed) << 5)
32080 		| (info->cr_save_p << 1)
32081 		| (info->lr_save_p)));
32082 
32083       /* 3 bitfields: saves backchain, fixup code, number of fpr saved
32084 	 (6 bits).  */
32085       fprintf (file, "%d,",
32086 	       (info->push_p << 7) | (64 - info->first_fp_reg_save));
32087 
32088       /* 2 bitfields: spare bits (2 bits), number of gpr saved (6 bits).  */
32089       fprintf (file, "%d,", (32 - first_reg_to_save ()));
32090 
32091       if (optional_tbtab)
32092 	{
32093 	  /* Compute the parameter info from the function decl argument
32094 	     list.  */
32095 	  tree decl;
32096 	  int next_parm_info_bit = 31;
32097 
32098 	  for (decl = DECL_ARGUMENTS (current_function_decl);
32099 	       decl; decl = DECL_CHAIN (decl))
32100 	    {
32101 	      rtx parameter = DECL_INCOMING_RTL (decl);
32102 	      machine_mode mode = GET_MODE (parameter);
32103 
32104 	      if (GET_CODE (parameter) == REG)
32105 		{
32106 		  if (SCALAR_FLOAT_MODE_P (mode))
32107 		    {
32108 		      int bits;
32109 
32110 		      float_parms++;
32111 
32112 		      switch (mode)
32113 			{
32114 			case E_SFmode:
32115 			case E_SDmode:
32116 			  bits = 0x2;
32117 			  break;
32118 
32119 			case E_DFmode:
32120 			case E_DDmode:
32121 			case E_TFmode:
32122 			case E_TDmode:
32123 			case E_IFmode:
32124 			case E_KFmode:
32125 			  bits = 0x3;
32126 			  break;
32127 
32128 			default:
32129 			  gcc_unreachable ();
32130 			}
32131 
32132 		      /* If only one bit will fit, don't or in this entry.  */
32133 		      if (next_parm_info_bit > 0)
32134 			parm_info |= (bits << (next_parm_info_bit - 1));
32135 		      next_parm_info_bit -= 2;
32136 		    }
32137 		  else
32138 		    {
32139 		      fixed_parms += ((GET_MODE_SIZE (mode)
32140 				       + (UNITS_PER_WORD - 1))
32141 				      / UNITS_PER_WORD);
32142 		      next_parm_info_bit -= 1;
32143 		    }
32144 		}
32145 	    }
32146 	}
32147 
32148       /* Number of fixed point parameters.  */
32149       /* This is actually the number of words of fixed point parameters; thus
32150 	 an 8 byte struct counts as 2; and thus the maximum value is 8.  */
32151       fprintf (file, "%d,", fixed_parms);
32152 
32153       /* 2 bitfields: number of floating point parameters (7 bits), parameters
32154 	 all on stack.  */
32155       /* This is actually the number of fp registers that hold parameters;
32156 	 and thus the maximum value is 13.  */
32157       /* Set parameters on stack bit if parameters are not in their original
32158 	 registers, regardless of whether they are on the stack?  Xlc
32159 	 seems to set the bit when not optimizing.  */
32160       fprintf (file, "%d\n", ((float_parms << 1) | (! optimize)));
32161 
32162       if (optional_tbtab)
32163 	{
32164 	  /* Optional fields follow.  Some are variable length.  */
32165 
32166 	  /* Parameter types, left adjusted bit fields: 0 fixed, 10 single
32167 	     float, 11 double float.  */
32168 	  /* There is an entry for each parameter in a register, in the order
32169 	     that they occur in the parameter list.  Any intervening arguments
32170 	     on the stack are ignored.  If the list overflows a long (max
32171 	     possible length 34 bits) then completely leave off all elements
32172 	     that don't fit.  */
32173 	  /* Only emit this long if there was at least one parameter.  */
32174 	  if (fixed_parms || float_parms)
32175 	    fprintf (file, "\t.long %d\n", parm_info);
32176 
32177 	  /* Offset from start of code to tb table.  */
32178 	  fputs ("\t.long ", file);
32179 	  ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LT");
32180 	  RS6000_OUTPUT_BASENAME (file, fname);
32181 	  putc ('-', file);
32182 	  rs6000_output_function_entry (file, fname);
32183 	  putc ('\n', file);
32184 
32185 	  /* Interrupt handler mask.  */
32186 	  /* Omit this long, since we never set the interrupt handler bit
32187 	     above.  */
32188 
32189 	  /* Number of CTL (controlled storage) anchors.  */
32190 	  /* Omit this long, since the has_ctl bit is never set above.  */
32191 
32192 	  /* Displacement into stack of each CTL anchor.  */
32193 	  /* Omit this list of longs, because there are no CTL anchors.  */
32194 
32195 	  /* Length of function name.  */
32196 	  if (*fname == '*')
32197 	    ++fname;
32198 	  fprintf (file, "\t.short %d\n", (int) strlen (fname));
32199 
32200 	  /* Function name.  */
32201 	  assemble_string (fname, strlen (fname));
32202 
32203 	  /* Register for alloca automatic storage; this is always reg 31.
32204 	     Only emit this if the alloca bit was set above.  */
32205 	  if (frame_pointer_needed)
32206 	    fputs ("\t.byte 31\n", file);
32207 
32208 	  fputs ("\t.align 2\n", file);
32209 	}
32210     }
32211 
32212   /* Arrange to define .LCTOC1 label, if not already done.  */
32213   if (need_toc_init)
32214     {
32215       need_toc_init = 0;
32216       if (!toc_initialized)
32217 	{
32218 	  switch_to_section (toc_section);
32219 	  switch_to_section (current_function_section ());
32220 	}
32221     }
32222 }
32223 
32224 /* -fsplit-stack support.  */
32225 
32226 /* A SYMBOL_REF for __morestack.  */
32227 static GTY(()) rtx morestack_ref;
32228 
32229 static rtx
gen_add3_const(rtx rt,rtx ra,long c)32230 gen_add3_const (rtx rt, rtx ra, long c)
32231 {
32232   if (TARGET_64BIT)
32233     return gen_adddi3 (rt, ra, GEN_INT (c));
32234  else
32235     return gen_addsi3 (rt, ra, GEN_INT (c));
32236 }
32237 
32238 /* Emit -fsplit-stack prologue, which goes before the regular function
32239    prologue (at local entry point in the case of ELFv2).  */
32240 
32241 void
rs6000_expand_split_stack_prologue(void)32242 rs6000_expand_split_stack_prologue (void)
32243 {
32244   rs6000_stack_t *info = rs6000_stack_info ();
32245   unsigned HOST_WIDE_INT allocate;
32246   long alloc_hi, alloc_lo;
32247   rtx r0, r1, r12, lr, ok_label, compare, jump, call_fusage;
32248   rtx_insn *insn;
32249 
32250   gcc_assert (flag_split_stack && reload_completed);
32251 
32252   if (!info->push_p)
32253     return;
32254 
32255   if (global_regs[29])
32256     {
32257       error ("-fsplit-stack uses register r29");
32258       inform (DECL_SOURCE_LOCATION (global_regs_decl[29]),
32259 	      "conflicts with %qD", global_regs_decl[29]);
32260     }
32261 
32262   allocate = info->total_size;
32263   if (allocate > (unsigned HOST_WIDE_INT) 1 << 31)
32264     {
32265       sorry ("Stack frame larger than 2G is not supported for -fsplit-stack");
32266       return;
32267     }
32268   if (morestack_ref == NULL_RTX)
32269     {
32270       morestack_ref = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
32271       SYMBOL_REF_FLAGS (morestack_ref) |= (SYMBOL_FLAG_LOCAL
32272 					   | SYMBOL_FLAG_FUNCTION);
32273     }
32274 
32275   r0 = gen_rtx_REG (Pmode, 0);
32276   r1 = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
32277   r12 = gen_rtx_REG (Pmode, 12);
32278   emit_insn (gen_load_split_stack_limit (r0));
32279   /* Always emit two insns here to calculate the requested stack,
32280      so that the linker can edit them when adjusting size for calling
32281      non-split-stack code.  */
32282   alloc_hi = (-allocate + 0x8000) & ~0xffffL;
32283   alloc_lo = -allocate - alloc_hi;
32284   if (alloc_hi != 0)
32285     {
32286       emit_insn (gen_add3_const (r12, r1, alloc_hi));
32287       if (alloc_lo != 0)
32288 	emit_insn (gen_add3_const (r12, r12, alloc_lo));
32289       else
32290 	emit_insn (gen_nop ());
32291     }
32292   else
32293     {
32294       emit_insn (gen_add3_const (r12, r1, alloc_lo));
32295       emit_insn (gen_nop ());
32296     }
32297 
32298   compare = gen_rtx_REG (CCUNSmode, CR7_REGNO);
32299   emit_insn (gen_rtx_SET (compare, gen_rtx_COMPARE (CCUNSmode, r12, r0)));
32300   ok_label = gen_label_rtx ();
32301   jump = gen_rtx_IF_THEN_ELSE (VOIDmode,
32302 			       gen_rtx_GEU (VOIDmode, compare, const0_rtx),
32303 			       gen_rtx_LABEL_REF (VOIDmode, ok_label),
32304 			       pc_rtx);
32305   insn = emit_jump_insn (gen_rtx_SET (pc_rtx, jump));
32306   JUMP_LABEL (insn) = ok_label;
32307   /* Mark the jump as very likely to be taken.  */
32308   add_reg_br_prob_note (insn, profile_probability::very_likely ());
32309 
32310   lr = gen_rtx_REG (Pmode, LR_REGNO);
32311   insn = emit_move_insn (r0, lr);
32312   RTX_FRAME_RELATED_P (insn) = 1;
32313   insn = emit_insn (gen_frame_store (r0, r1, info->lr_save_offset));
32314   RTX_FRAME_RELATED_P (insn) = 1;
32315 
32316   insn = emit_call_insn (gen_call (gen_rtx_MEM (SImode, morestack_ref),
32317 				   const0_rtx, const0_rtx));
32318   call_fusage = NULL_RTX;
32319   use_reg (&call_fusage, r12);
32320   /* Say the call uses r0, even though it doesn't, to stop regrename
32321      from twiddling with the insns saving lr, trashing args for cfun.
32322      The insns restoring lr are similarly protected by making
32323      split_stack_return use r0.  */
32324   use_reg (&call_fusage, r0);
32325   add_function_usage_to (insn, call_fusage);
32326   /* Indicate that this function can't jump to non-local gotos.  */
32327   make_reg_eh_region_note_nothrow_nononlocal (insn);
32328   emit_insn (gen_frame_load (r0, r1, info->lr_save_offset));
32329   insn = emit_move_insn (lr, r0);
32330   add_reg_note (insn, REG_CFA_RESTORE, lr);
32331   RTX_FRAME_RELATED_P (insn) = 1;
32332   emit_insn (gen_split_stack_return ());
32333 
32334   emit_label (ok_label);
32335   LABEL_NUSES (ok_label) = 1;
32336 }
32337 
32338 /* Return the internal arg pointer used for function incoming
32339    arguments.  When -fsplit-stack, the arg pointer is r12 so we need
32340    to copy it to a pseudo in order for it to be preserved over calls
32341    and suchlike.  We'd really like to use a pseudo here for the
32342    internal arg pointer but data-flow analysis is not prepared to
32343    accept pseudos as live at the beginning of a function.  */
32344 
32345 static rtx
rs6000_internal_arg_pointer(void)32346 rs6000_internal_arg_pointer (void)
32347 {
32348   if (flag_split_stack
32349      && (lookup_attribute ("no_split_stack", DECL_ATTRIBUTES (cfun->decl))
32350          == NULL))
32351 
32352     {
32353       if (cfun->machine->split_stack_arg_pointer == NULL_RTX)
32354 	{
32355 	  rtx pat;
32356 
32357 	  cfun->machine->split_stack_arg_pointer = gen_reg_rtx (Pmode);
32358 	  REG_POINTER (cfun->machine->split_stack_arg_pointer) = 1;
32359 
32360 	  /* Put the pseudo initialization right after the note at the
32361 	     beginning of the function.  */
32362 	  pat = gen_rtx_SET (cfun->machine->split_stack_arg_pointer,
32363 			     gen_rtx_REG (Pmode, 12));
32364 	  push_topmost_sequence ();
32365 	  emit_insn_after (pat, get_insns ());
32366 	  pop_topmost_sequence ();
32367 	}
32368       return plus_constant (Pmode, cfun->machine->split_stack_arg_pointer,
32369 			    FIRST_PARM_OFFSET (current_function_decl));
32370     }
32371   return virtual_incoming_args_rtx;
32372 }
32373 
32374 /* We may have to tell the dataflow pass that the split stack prologue
32375    is initializing a register.  */
32376 
32377 static void
rs6000_live_on_entry(bitmap regs)32378 rs6000_live_on_entry (bitmap regs)
32379 {
32380   if (flag_split_stack)
32381     bitmap_set_bit (regs, 12);
32382 }
32383 
32384 /* Emit -fsplit-stack dynamic stack allocation space check.  */
32385 
32386 void
rs6000_split_stack_space_check(rtx size,rtx label)32387 rs6000_split_stack_space_check (rtx size, rtx label)
32388 {
32389   rtx sp = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
32390   rtx limit = gen_reg_rtx (Pmode);
32391   rtx requested = gen_reg_rtx (Pmode);
32392   rtx cmp = gen_reg_rtx (CCUNSmode);
32393   rtx jump;
32394 
32395   emit_insn (gen_load_split_stack_limit (limit));
32396   if (CONST_INT_P (size))
32397     emit_insn (gen_add3_insn (requested, sp, GEN_INT (-INTVAL (size))));
32398   else
32399     {
32400       size = force_reg (Pmode, size);
32401       emit_move_insn (requested, gen_rtx_MINUS (Pmode, sp, size));
32402     }
32403   emit_insn (gen_rtx_SET (cmp, gen_rtx_COMPARE (CCUNSmode, requested, limit)));
32404   jump = gen_rtx_IF_THEN_ELSE (VOIDmode,
32405 			       gen_rtx_GEU (VOIDmode, cmp, const0_rtx),
32406 			       gen_rtx_LABEL_REF (VOIDmode, label),
32407 			       pc_rtx);
32408   jump = emit_jump_insn (gen_rtx_SET (pc_rtx, jump));
32409   JUMP_LABEL (jump) = label;
32410 }
32411 
32412 /* A C compound statement that outputs the assembler code for a thunk
32413    function, used to implement C++ virtual function calls with
32414    multiple inheritance.  The thunk acts as a wrapper around a virtual
32415    function, adjusting the implicit object parameter before handing
32416    control off to the real function.
32417 
32418    First, emit code to add the integer DELTA to the location that
32419    contains the incoming first argument.  Assume that this argument
32420    contains a pointer, and is the one used to pass the `this' pointer
32421    in C++.  This is the incoming argument *before* the function
32422    prologue, e.g. `%o0' on a sparc.  The addition must preserve the
32423    values of all other incoming arguments.
32424 
32425    After the addition, emit code to jump to FUNCTION, which is a
32426    `FUNCTION_DECL'.  This is a direct pure jump, not a call, and does
32427    not touch the return address.  Hence returning from FUNCTION will
32428    return to whoever called the current `thunk'.
32429 
32430    The effect must be as if FUNCTION had been called directly with the
32431    adjusted first argument.  This macro is responsible for emitting
32432    all of the code for a thunk function; output_function_prologue()
32433    and output_function_epilogue() are not invoked.
32434 
32435    The THUNK_FNDECL is redundant.  (DELTA and FUNCTION have already
32436    been extracted from it.)  It might possibly be useful on some
32437    targets, but probably not.
32438 
32439    If you do not define this macro, the target-independent code in the
32440    C++ frontend will generate a less efficient heavyweight thunk that
32441    calls FUNCTION instead of jumping to it.  The generic approach does
32442    not support varargs.  */
32443 
32444 static void
rs6000_output_mi_thunk(FILE * file,tree thunk_fndecl ATTRIBUTE_UNUSED,HOST_WIDE_INT delta,HOST_WIDE_INT vcall_offset,tree function)32445 rs6000_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
32446 			HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
32447 			tree function)
32448 {
32449   rtx this_rtx, funexp;
32450   rtx_insn *insn;
32451 
32452   reload_completed = 1;
32453   epilogue_completed = 1;
32454 
32455   /* Mark the end of the (empty) prologue.  */
32456   emit_note (NOTE_INSN_PROLOGUE_END);
32457 
32458   /* Find the "this" pointer.  If the function returns a structure,
32459      the structure return pointer is in r3.  */
32460   if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
32461     this_rtx = gen_rtx_REG (Pmode, 4);
32462   else
32463     this_rtx = gen_rtx_REG (Pmode, 3);
32464 
32465   /* Apply the constant offset, if required.  */
32466   if (delta)
32467     emit_insn (gen_add3_insn (this_rtx, this_rtx, GEN_INT (delta)));
32468 
32469   /* Apply the offset from the vtable, if required.  */
32470   if (vcall_offset)
32471     {
32472       rtx vcall_offset_rtx = GEN_INT (vcall_offset);
32473       rtx tmp = gen_rtx_REG (Pmode, 12);
32474 
32475       emit_move_insn (tmp, gen_rtx_MEM (Pmode, this_rtx));
32476       if (((unsigned HOST_WIDE_INT) vcall_offset) + 0x8000 >= 0x10000)
32477 	{
32478 	  emit_insn (gen_add3_insn (tmp, tmp, vcall_offset_rtx));
32479 	  emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp));
32480 	}
32481       else
32482 	{
32483 	  rtx loc = gen_rtx_PLUS (Pmode, tmp, vcall_offset_rtx);
32484 
32485 	  emit_move_insn (tmp, gen_rtx_MEM (Pmode, loc));
32486 	}
32487       emit_insn (gen_add3_insn (this_rtx, this_rtx, tmp));
32488     }
32489 
32490   /* Generate a tail call to the target function.  */
32491   if (!TREE_USED (function))
32492     {
32493       assemble_external (function);
32494       TREE_USED (function) = 1;
32495     }
32496   funexp = XEXP (DECL_RTL (function), 0);
32497   funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
32498 
32499 #if TARGET_MACHO
32500   if (MACHOPIC_INDIRECT)
32501     funexp = machopic_indirect_call_target (funexp);
32502 #endif
32503 
32504   /* gen_sibcall expects reload to convert scratch pseudo to LR so we must
32505      generate sibcall RTL explicitly.  */
32506   insn = emit_call_insn (
32507 	   gen_rtx_PARALLEL (VOIDmode,
32508 	     gen_rtvec (3,
32509 			gen_rtx_CALL (VOIDmode,
32510 				      funexp, const0_rtx),
32511 			gen_rtx_USE (VOIDmode, const0_rtx),
32512 			simple_return_rtx)));
32513   SIBLING_CALL_P (insn) = 1;
32514   emit_barrier ();
32515 
32516   /* Run just enough of rest_of_compilation to get the insns emitted.
32517      There's not really enough bulk here to make other passes such as
32518      instruction scheduling worth while.  Note that use_thunk calls
32519      assemble_start_function and assemble_end_function.  */
32520   insn = get_insns ();
32521   shorten_branches (insn);
32522   final_start_function (insn, file, 1);
32523   final (insn, file, 1);
32524   final_end_function ();
32525 
32526   reload_completed = 0;
32527   epilogue_completed = 0;
32528 }
32529 
32530 /* A quick summary of the various types of 'constant-pool tables'
32531    under PowerPC:
32532 
32533    Target	Flags		Name		One table per
32534    AIX		(none)		AIX TOC		object file
32535    AIX		-mfull-toc	AIX TOC		object file
32536    AIX		-mminimal-toc	AIX minimal TOC	translation unit
32537    SVR4/EABI	(none)		SVR4 SDATA	object file
32538    SVR4/EABI	-fpic		SVR4 pic	object file
32539    SVR4/EABI	-fPIC		SVR4 PIC	translation unit
32540    SVR4/EABI	-mrelocatable	EABI TOC	function
32541    SVR4/EABI	-maix		AIX TOC		object file
32542    SVR4/EABI	-maix -mminimal-toc
32543 				AIX minimal TOC	translation unit
32544 
32545    Name			Reg.	Set by	entries	      contains:
32546 					made by	 addrs?	fp?	sum?
32547 
32548    AIX TOC		2	crt0	as	 Y	option	option
32549    AIX minimal TOC	30	prolog	gcc	 Y	Y	option
32550    SVR4 SDATA		13	crt0	gcc	 N	Y	N
32551    SVR4 pic		30	prolog	ld	 Y	not yet	N
32552    SVR4 PIC		30	prolog	gcc	 Y	option	option
32553    EABI TOC		30	prolog	gcc	 Y	option	option
32554 
32555 */
32556 
32557 /* Hash functions for the hash table.  */
32558 
32559 static unsigned
rs6000_hash_constant(rtx k)32560 rs6000_hash_constant (rtx k)
32561 {
32562   enum rtx_code code = GET_CODE (k);
32563   machine_mode mode = GET_MODE (k);
32564   unsigned result = (code << 3) ^ mode;
32565   const char *format;
32566   int flen, fidx;
32567 
32568   format = GET_RTX_FORMAT (code);
32569   flen = strlen (format);
32570   fidx = 0;
32571 
32572   switch (code)
32573     {
32574     case LABEL_REF:
32575       return result * 1231 + (unsigned) INSN_UID (XEXP (k, 0));
32576 
32577     case CONST_WIDE_INT:
32578       {
32579 	int i;
32580 	flen = CONST_WIDE_INT_NUNITS (k);
32581 	for (i = 0; i < flen; i++)
32582 	  result = result * 613 + CONST_WIDE_INT_ELT (k, i);
32583 	return result;
32584       }
32585 
32586     case CONST_DOUBLE:
32587       if (mode != VOIDmode)
32588 	return real_hash (CONST_DOUBLE_REAL_VALUE (k)) * result;
32589       flen = 2;
32590       break;
32591 
32592     case CODE_LABEL:
32593       fidx = 3;
32594       break;
32595 
32596     default:
32597       break;
32598     }
32599 
32600   for (; fidx < flen; fidx++)
32601     switch (format[fidx])
32602       {
32603       case 's':
32604 	{
32605 	  unsigned i, len;
32606 	  const char *str = XSTR (k, fidx);
32607 	  len = strlen (str);
32608 	  result = result * 613 + len;
32609 	  for (i = 0; i < len; i++)
32610 	    result = result * 613 + (unsigned) str[i];
32611 	  break;
32612 	}
32613       case 'u':
32614       case 'e':
32615 	result = result * 1231 + rs6000_hash_constant (XEXP (k, fidx));
32616 	break;
32617       case 'i':
32618       case 'n':
32619 	result = result * 613 + (unsigned) XINT (k, fidx);
32620 	break;
32621       case 'w':
32622 	if (sizeof (unsigned) >= sizeof (HOST_WIDE_INT))
32623 	  result = result * 613 + (unsigned) XWINT (k, fidx);
32624 	else
32625 	  {
32626 	    size_t i;
32627 	    for (i = 0; i < sizeof (HOST_WIDE_INT) / sizeof (unsigned); i++)
32628 	      result = result * 613 + (unsigned) (XWINT (k, fidx)
32629 						  >> CHAR_BIT * i);
32630 	  }
32631 	break;
32632       case '0':
32633 	break;
32634       default:
32635 	gcc_unreachable ();
32636       }
32637 
32638   return result;
32639 }
32640 
32641 hashval_t
hash(toc_hash_struct * thc)32642 toc_hasher::hash (toc_hash_struct *thc)
32643 {
32644   return rs6000_hash_constant (thc->key) ^ thc->key_mode;
32645 }
32646 
32647 /* Compare H1 and H2 for equivalence.  */
32648 
32649 bool
equal(toc_hash_struct * h1,toc_hash_struct * h2)32650 toc_hasher::equal (toc_hash_struct *h1, toc_hash_struct *h2)
32651 {
32652   rtx r1 = h1->key;
32653   rtx r2 = h2->key;
32654 
32655   if (h1->key_mode != h2->key_mode)
32656     return 0;
32657 
32658   return rtx_equal_p (r1, r2);
32659 }
32660 
32661 /* These are the names given by the C++ front-end to vtables, and
32662    vtable-like objects.  Ideally, this logic should not be here;
32663    instead, there should be some programmatic way of inquiring as
32664    to whether or not an object is a vtable.  */
32665 
32666 #define VTABLE_NAME_P(NAME)				\
32667   (strncmp ("_vt.", name, strlen ("_vt.")) == 0		\
32668   || strncmp ("_ZTV", name, strlen ("_ZTV")) == 0	\
32669   || strncmp ("_ZTT", name, strlen ("_ZTT")) == 0	\
32670   || strncmp ("_ZTI", name, strlen ("_ZTI")) == 0	\
32671   || strncmp ("_ZTC", name, strlen ("_ZTC")) == 0)
32672 
32673 #ifdef NO_DOLLAR_IN_LABEL
32674 /* Return a GGC-allocated character string translating dollar signs in
32675    input NAME to underscores.  Used by XCOFF ASM_OUTPUT_LABELREF.  */
32676 
32677 const char *
rs6000_xcoff_strip_dollar(const char * name)32678 rs6000_xcoff_strip_dollar (const char *name)
32679 {
32680   char *strip, *p;
32681   const char *q;
32682   size_t len;
32683 
32684   q = (const char *) strchr (name, '$');
32685 
32686   if (q == 0 || q == name)
32687     return name;
32688 
32689   len = strlen (name);
32690   strip = XALLOCAVEC (char, len + 1);
32691   strcpy (strip, name);
32692   p = strip + (q - name);
32693   while (p)
32694     {
32695       *p = '_';
32696       p = strchr (p + 1, '$');
32697     }
32698 
32699   return ggc_alloc_string (strip, len);
32700 }
32701 #endif
32702 
32703 void
rs6000_output_symbol_ref(FILE * file,rtx x)32704 rs6000_output_symbol_ref (FILE *file, rtx x)
32705 {
32706   const char *name = XSTR (x, 0);
32707 
32708   /* Currently C++ toc references to vtables can be emitted before it
32709      is decided whether the vtable is public or private.  If this is
32710      the case, then the linker will eventually complain that there is
32711      a reference to an unknown section.  Thus, for vtables only,
32712      we emit the TOC reference to reference the identifier and not the
32713      symbol.  */
32714   if (VTABLE_NAME_P (name))
32715     {
32716       RS6000_OUTPUT_BASENAME (file, name);
32717     }
32718   else
32719     assemble_name (file, name);
32720 }
32721 
32722 /* Output a TOC entry.  We derive the entry name from what is being
32723    written.  */
32724 
32725 void
output_toc(FILE * file,rtx x,int labelno,machine_mode mode)32726 output_toc (FILE *file, rtx x, int labelno, machine_mode mode)
32727 {
32728   char buf[256];
32729   const char *name = buf;
32730   rtx base = x;
32731   HOST_WIDE_INT offset = 0;
32732 
32733   gcc_assert (!TARGET_NO_TOC);
32734 
32735   /* When the linker won't eliminate them, don't output duplicate
32736      TOC entries (this happens on AIX if there is any kind of TOC,
32737      and on SVR4 under -fPIC or -mrelocatable).  Don't do this for
32738      CODE_LABELs.  */
32739   if (TARGET_TOC && GET_CODE (x) != LABEL_REF)
32740     {
32741       struct toc_hash_struct *h;
32742 
32743       /* Create toc_hash_table.  This can't be done at TARGET_OPTION_OVERRIDE
32744 	 time because GGC is not initialized at that point.  */
32745       if (toc_hash_table == NULL)
32746 	toc_hash_table = hash_table<toc_hasher>::create_ggc (1021);
32747 
32748       h = ggc_alloc<toc_hash_struct> ();
32749       h->key = x;
32750       h->key_mode = mode;
32751       h->labelno = labelno;
32752 
32753       toc_hash_struct **found = toc_hash_table->find_slot (h, INSERT);
32754       if (*found == NULL)
32755 	*found = h;
32756       else  /* This is indeed a duplicate.
32757 	       Set this label equal to that label.  */
32758 	{
32759 	  fputs ("\t.set ", file);
32760 	  ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC");
32761 	  fprintf (file, "%d,", labelno);
32762 	  ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC");
32763 	  fprintf (file, "%d\n", ((*found)->labelno));
32764 
32765 #ifdef HAVE_AS_TLS
32766 	  if (TARGET_XCOFF && GET_CODE (x) == SYMBOL_REF
32767 	      && (SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_GLOBAL_DYNAMIC
32768 		  || SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC))
32769 	    {
32770 	      fputs ("\t.set ", file);
32771 	      ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LCM");
32772 	      fprintf (file, "%d,", labelno);
32773 	      ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LCM");
32774 	      fprintf (file, "%d\n", ((*found)->labelno));
32775 	    }
32776 #endif
32777 	  return;
32778 	}
32779     }
32780 
32781   /* If we're going to put a double constant in the TOC, make sure it's
32782      aligned properly when strict alignment is on.  */
32783   if ((CONST_DOUBLE_P (x) || CONST_WIDE_INT_P (x))
32784       && STRICT_ALIGNMENT
32785       && GET_MODE_BITSIZE (mode) >= 64
32786       && ! (TARGET_NO_FP_IN_TOC && ! TARGET_MINIMAL_TOC)) {
32787     ASM_OUTPUT_ALIGN (file, 3);
32788   }
32789 
32790   (*targetm.asm_out.internal_label) (file, "LC", labelno);
32791 
32792   /* Handle FP constants specially.  Note that if we have a minimal
32793      TOC, things we put here aren't actually in the TOC, so we can allow
32794      FP constants.  */
32795   if (GET_CODE (x) == CONST_DOUBLE &&
32796       (GET_MODE (x) == TFmode || GET_MODE (x) == TDmode
32797        || GET_MODE (x) == IFmode || GET_MODE (x) == KFmode))
32798     {
32799       long k[4];
32800 
32801       if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
32802 	REAL_VALUE_TO_TARGET_DECIMAL128 (*CONST_DOUBLE_REAL_VALUE (x), k);
32803       else
32804 	REAL_VALUE_TO_TARGET_LONG_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), k);
32805 
32806       if (TARGET_64BIT)
32807 	{
32808 	  if (TARGET_ELF || TARGET_MINIMAL_TOC)
32809 	    fputs (DOUBLE_INT_ASM_OP, file);
32810 	  else
32811 	    fprintf (file, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
32812 		     k[0] & 0xffffffff, k[1] & 0xffffffff,
32813 		     k[2] & 0xffffffff, k[3] & 0xffffffff);
32814 	  fprintf (file, "0x%lx%08lx,0x%lx%08lx\n",
32815 		   k[WORDS_BIG_ENDIAN ? 0 : 1] & 0xffffffff,
32816 		   k[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffff,
32817 		   k[WORDS_BIG_ENDIAN ? 2 : 3] & 0xffffffff,
32818 		   k[WORDS_BIG_ENDIAN ? 3 : 2] & 0xffffffff);
32819 	  return;
32820 	}
32821       else
32822 	{
32823 	  if (TARGET_ELF || TARGET_MINIMAL_TOC)
32824 	    fputs ("\t.long ", file);
32825 	  else
32826 	    fprintf (file, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
32827 		     k[0] & 0xffffffff, k[1] & 0xffffffff,
32828 		     k[2] & 0xffffffff, k[3] & 0xffffffff);
32829 	  fprintf (file, "0x%lx,0x%lx,0x%lx,0x%lx\n",
32830 		   k[0] & 0xffffffff, k[1] & 0xffffffff,
32831 		   k[2] & 0xffffffff, k[3] & 0xffffffff);
32832 	  return;
32833 	}
32834     }
32835   else if (GET_CODE (x) == CONST_DOUBLE &&
32836 	   (GET_MODE (x) == DFmode || GET_MODE (x) == DDmode))
32837     {
32838       long k[2];
32839 
32840       if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
32841 	REAL_VALUE_TO_TARGET_DECIMAL64 (*CONST_DOUBLE_REAL_VALUE (x), k);
32842       else
32843 	REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), k);
32844 
32845       if (TARGET_64BIT)
32846 	{
32847 	  if (TARGET_ELF || TARGET_MINIMAL_TOC)
32848 	    fputs (DOUBLE_INT_ASM_OP, file);
32849 	  else
32850 	    fprintf (file, "\t.tc FD_%lx_%lx[TC],",
32851 		     k[0] & 0xffffffff, k[1] & 0xffffffff);
32852 	  fprintf (file, "0x%lx%08lx\n",
32853 		   k[WORDS_BIG_ENDIAN ? 0 : 1] & 0xffffffff,
32854 		   k[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffff);
32855 	  return;
32856 	}
32857       else
32858 	{
32859 	  if (TARGET_ELF || TARGET_MINIMAL_TOC)
32860 	    fputs ("\t.long ", file);
32861 	  else
32862 	    fprintf (file, "\t.tc FD_%lx_%lx[TC],",
32863 		     k[0] & 0xffffffff, k[1] & 0xffffffff);
32864 	  fprintf (file, "0x%lx,0x%lx\n",
32865 		   k[0] & 0xffffffff, k[1] & 0xffffffff);
32866 	  return;
32867 	}
32868     }
32869   else if (GET_CODE (x) == CONST_DOUBLE &&
32870 	   (GET_MODE (x) == SFmode || GET_MODE (x) == SDmode))
32871     {
32872       long l;
32873 
32874       if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
32875 	REAL_VALUE_TO_TARGET_DECIMAL32 (*CONST_DOUBLE_REAL_VALUE (x), l);
32876       else
32877 	REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), l);
32878 
32879       if (TARGET_64BIT)
32880 	{
32881 	  if (TARGET_ELF || TARGET_MINIMAL_TOC)
32882 	    fputs (DOUBLE_INT_ASM_OP, file);
32883 	  else
32884 	    fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff);
32885 	  if (WORDS_BIG_ENDIAN)
32886 	    fprintf (file, "0x%lx00000000\n", l & 0xffffffff);
32887 	  else
32888 	    fprintf (file, "0x%lx\n", l & 0xffffffff);
32889 	  return;
32890 	}
32891       else
32892 	{
32893 	  if (TARGET_ELF || TARGET_MINIMAL_TOC)
32894 	    fputs ("\t.long ", file);
32895 	  else
32896 	    fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff);
32897 	  fprintf (file, "0x%lx\n", l & 0xffffffff);
32898 	  return;
32899 	}
32900     }
32901   else if (GET_MODE (x) == VOIDmode && GET_CODE (x) == CONST_INT)
32902     {
32903       unsigned HOST_WIDE_INT low;
32904       HOST_WIDE_INT high;
32905 
32906       low = INTVAL (x) & 0xffffffff;
32907       high = (HOST_WIDE_INT) INTVAL (x) >> 32;
32908 
32909       /* TOC entries are always Pmode-sized, so when big-endian
32910 	 smaller integer constants in the TOC need to be padded.
32911 	 (This is still a win over putting the constants in
32912 	 a separate constant pool, because then we'd have
32913 	 to have both a TOC entry _and_ the actual constant.)
32914 
32915 	 For a 32-bit target, CONST_INT values are loaded and shifted
32916 	 entirely within `low' and can be stored in one TOC entry.  */
32917 
32918       /* It would be easy to make this work, but it doesn't now.  */
32919       gcc_assert (!TARGET_64BIT || POINTER_SIZE >= GET_MODE_BITSIZE (mode));
32920 
32921       if (WORDS_BIG_ENDIAN && POINTER_SIZE > GET_MODE_BITSIZE (mode))
32922 	{
32923 	  low |= high << 32;
32924 	  low <<= POINTER_SIZE - GET_MODE_BITSIZE (mode);
32925 	  high = (HOST_WIDE_INT) low >> 32;
32926 	  low &= 0xffffffff;
32927 	}
32928 
32929       if (TARGET_64BIT)
32930 	{
32931 	  if (TARGET_ELF || TARGET_MINIMAL_TOC)
32932 	    fputs (DOUBLE_INT_ASM_OP, file);
32933 	  else
32934 	    fprintf (file, "\t.tc ID_%lx_%lx[TC],",
32935 		     (long) high & 0xffffffff, (long) low & 0xffffffff);
32936 	  fprintf (file, "0x%lx%08lx\n",
32937 		   (long) high & 0xffffffff, (long) low & 0xffffffff);
32938 	  return;
32939 	}
32940       else
32941 	{
32942 	  if (POINTER_SIZE < GET_MODE_BITSIZE (mode))
32943 	    {
32944 	      if (TARGET_ELF || TARGET_MINIMAL_TOC)
32945 		fputs ("\t.long ", file);
32946 	      else
32947 		fprintf (file, "\t.tc ID_%lx_%lx[TC],",
32948 			 (long) high & 0xffffffff, (long) low & 0xffffffff);
32949 	      fprintf (file, "0x%lx,0x%lx\n",
32950 		       (long) high & 0xffffffff, (long) low & 0xffffffff);
32951 	    }
32952 	  else
32953 	    {
32954 	      if (TARGET_ELF || TARGET_MINIMAL_TOC)
32955 		fputs ("\t.long ", file);
32956 	      else
32957 		fprintf (file, "\t.tc IS_%lx[TC],", (long) low & 0xffffffff);
32958 	      fprintf (file, "0x%lx\n", (long) low & 0xffffffff);
32959 	    }
32960 	  return;
32961 	}
32962     }
32963 
32964   if (GET_CODE (x) == CONST)
32965     {
32966       gcc_assert (GET_CODE (XEXP (x, 0)) == PLUS
32967 		  && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT);
32968 
32969       base = XEXP (XEXP (x, 0), 0);
32970       offset = INTVAL (XEXP (XEXP (x, 0), 1));
32971     }
32972 
32973   switch (GET_CODE (base))
32974     {
32975     case SYMBOL_REF:
32976       name = XSTR (base, 0);
32977       break;
32978 
32979     case LABEL_REF:
32980       ASM_GENERATE_INTERNAL_LABEL (buf, "L",
32981 				   CODE_LABEL_NUMBER (XEXP (base, 0)));
32982       break;
32983 
32984     case CODE_LABEL:
32985       ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (base));
32986       break;
32987 
32988     default:
32989       gcc_unreachable ();
32990     }
32991 
32992   if (TARGET_ELF || TARGET_MINIMAL_TOC)
32993     fputs (TARGET_32BIT ? "\t.long " : DOUBLE_INT_ASM_OP, file);
32994   else
32995     {
32996       fputs ("\t.tc ", file);
32997       RS6000_OUTPUT_BASENAME (file, name);
32998 
32999       if (offset < 0)
33000 	fprintf (file, ".N" HOST_WIDE_INT_PRINT_UNSIGNED, - offset);
33001       else if (offset)
33002 	fprintf (file, ".P" HOST_WIDE_INT_PRINT_UNSIGNED, offset);
33003 
33004       /* Mark large TOC symbols on AIX with [TE] so they are mapped
33005 	 after other TOC symbols, reducing overflow of small TOC access
33006 	 to [TC] symbols.  */
33007       fputs (TARGET_XCOFF && TARGET_CMODEL != CMODEL_SMALL
33008 	     ? "[TE]," : "[TC],", file);
33009     }
33010 
33011   /* Currently C++ toc references to vtables can be emitted before it
33012      is decided whether the vtable is public or private.  If this is
33013      the case, then the linker will eventually complain that there is
33014      a TOC reference to an unknown section.  Thus, for vtables only,
33015      we emit the TOC reference to reference the symbol and not the
33016      section.  */
33017   if (VTABLE_NAME_P (name))
33018     {
33019       RS6000_OUTPUT_BASENAME (file, name);
33020       if (offset < 0)
33021 	fprintf (file, HOST_WIDE_INT_PRINT_DEC, offset);
33022       else if (offset > 0)
33023 	fprintf (file, "+" HOST_WIDE_INT_PRINT_DEC, offset);
33024     }
33025   else
33026     output_addr_const (file, x);
33027 
33028 #if HAVE_AS_TLS
33029   if (TARGET_XCOFF && GET_CODE (base) == SYMBOL_REF)
33030     {
33031       switch (SYMBOL_REF_TLS_MODEL (base))
33032 	{
33033 	case 0:
33034 	  break;
33035 	case TLS_MODEL_LOCAL_EXEC:
33036 	  fputs ("@le", file);
33037 	  break;
33038 	case TLS_MODEL_INITIAL_EXEC:
33039 	  fputs ("@ie", file);
33040 	  break;
33041 	/* Use global-dynamic for local-dynamic.  */
33042 	case TLS_MODEL_GLOBAL_DYNAMIC:
33043 	case TLS_MODEL_LOCAL_DYNAMIC:
33044 	  putc ('\n', file);
33045 	  (*targetm.asm_out.internal_label) (file, "LCM", labelno);
33046 	  fputs ("\t.tc .", file);
33047 	  RS6000_OUTPUT_BASENAME (file, name);
33048 	  fputs ("[TC],", file);
33049 	  output_addr_const (file, x);
33050 	  fputs ("@m", file);
33051 	  break;
33052 	default:
33053 	  gcc_unreachable ();
33054 	}
33055     }
33056 #endif
33057 
33058   putc ('\n', file);
33059 }
33060 
33061 /* Output an assembler pseudo-op to write an ASCII string of N characters
33062    starting at P to FILE.
33063 
33064    On the RS/6000, we have to do this using the .byte operation and
33065    write out special characters outside the quoted string.
33066    Also, the assembler is broken; very long strings are truncated,
33067    so we must artificially break them up early.  */
33068 
33069 void
output_ascii(FILE * file,const char * p,int n)33070 output_ascii (FILE *file, const char *p, int n)
33071 {
33072   char c;
33073   int i, count_string;
33074   const char *for_string = "\t.byte \"";
33075   const char *for_decimal = "\t.byte ";
33076   const char *to_close = NULL;
33077 
33078   count_string = 0;
33079   for (i = 0; i < n; i++)
33080     {
33081       c = *p++;
33082       if (c >= ' ' && c < 0177)
33083 	{
33084 	  if (for_string)
33085 	    fputs (for_string, file);
33086 	  putc (c, file);
33087 
33088 	  /* Write two quotes to get one.  */
33089 	  if (c == '"')
33090 	    {
33091 	      putc (c, file);
33092 	      ++count_string;
33093 	    }
33094 
33095 	  for_string = NULL;
33096 	  for_decimal = "\"\n\t.byte ";
33097 	  to_close = "\"\n";
33098 	  ++count_string;
33099 
33100 	  if (count_string >= 512)
33101 	    {
33102 	      fputs (to_close, file);
33103 
33104 	      for_string = "\t.byte \"";
33105 	      for_decimal = "\t.byte ";
33106 	      to_close = NULL;
33107 	      count_string = 0;
33108 	    }
33109 	}
33110       else
33111 	{
33112 	  if (for_decimal)
33113 	    fputs (for_decimal, file);
33114 	  fprintf (file, "%d", c);
33115 
33116 	  for_string = "\n\t.byte \"";
33117 	  for_decimal = ", ";
33118 	  to_close = "\n";
33119 	  count_string = 0;
33120 	}
33121     }
33122 
33123   /* Now close the string if we have written one.  Then end the line.  */
33124   if (to_close)
33125     fputs (to_close, file);
33126 }
33127 
33128 /* Generate a unique section name for FILENAME for a section type
33129    represented by SECTION_DESC.  Output goes into BUF.
33130 
33131    SECTION_DESC can be any string, as long as it is different for each
33132    possible section type.
33133 
33134    We name the section in the same manner as xlc.  The name begins with an
33135    underscore followed by the filename (after stripping any leading directory
33136    names) with the last period replaced by the string SECTION_DESC.  If
33137    FILENAME does not contain a period, SECTION_DESC is appended to the end of
33138    the name.  */
33139 
33140 void
rs6000_gen_section_name(char ** buf,const char * filename,const char * section_desc)33141 rs6000_gen_section_name (char **buf, const char *filename,
33142 			 const char *section_desc)
33143 {
33144   const char *q, *after_last_slash, *last_period = 0;
33145   char *p;
33146   int len;
33147 
33148   after_last_slash = filename;
33149   for (q = filename; *q; q++)
33150     {
33151       if (*q == '/')
33152 	after_last_slash = q + 1;
33153       else if (*q == '.')
33154 	last_period = q;
33155     }
33156 
33157   len = strlen (after_last_slash) + strlen (section_desc) + 2;
33158   *buf = (char *) xmalloc (len);
33159 
33160   p = *buf;
33161   *p++ = '_';
33162 
33163   for (q = after_last_slash; *q; q++)
33164     {
33165       if (q == last_period)
33166 	{
33167 	  strcpy (p, section_desc);
33168 	  p += strlen (section_desc);
33169 	  break;
33170 	}
33171 
33172       else if (ISALNUM (*q))
33173 	*p++ = *q;
33174     }
33175 
33176   if (last_period == 0)
33177     strcpy (p, section_desc);
33178   else
33179     *p = '\0';
33180 }
33181 
33182 /* Emit profile function.  */
33183 
33184 void
output_profile_hook(int labelno ATTRIBUTE_UNUSED)33185 output_profile_hook (int labelno ATTRIBUTE_UNUSED)
33186 {
33187   /* Non-standard profiling for kernels, which just saves LR then calls
33188      _mcount without worrying about arg saves.  The idea is to change
33189      the function prologue as little as possible as it isn't easy to
33190      account for arg save/restore code added just for _mcount.  */
33191   if (TARGET_PROFILE_KERNEL)
33192     return;
33193 
33194   if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
33195     {
33196 #ifndef NO_PROFILE_COUNTERS
33197 # define NO_PROFILE_COUNTERS 0
33198 #endif
33199       if (NO_PROFILE_COUNTERS)
33200 	emit_library_call (init_one_libfunc (RS6000_MCOUNT),
33201 			   LCT_NORMAL, VOIDmode);
33202       else
33203 	{
33204 	  char buf[30];
33205 	  const char *label_name;
33206 	  rtx fun;
33207 
33208 	  ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
33209 	  label_name = ggc_strdup ((*targetm.strip_name_encoding) (buf));
33210 	  fun = gen_rtx_SYMBOL_REF (Pmode, label_name);
33211 
33212 	  emit_library_call (init_one_libfunc (RS6000_MCOUNT),
33213 			     LCT_NORMAL, VOIDmode, fun, Pmode);
33214 	}
33215     }
33216   else if (DEFAULT_ABI == ABI_DARWIN)
33217     {
33218       const char *mcount_name = RS6000_MCOUNT;
33219       int caller_addr_regno = LR_REGNO;
33220 
33221       /* Be conservative and always set this, at least for now.  */
33222       crtl->uses_pic_offset_table = 1;
33223 
33224 #if TARGET_MACHO
33225       /* For PIC code, set up a stub and collect the caller's address
33226 	 from r0, which is where the prologue puts it.  */
33227       if (MACHOPIC_INDIRECT
33228 	  && crtl->uses_pic_offset_table)
33229 	caller_addr_regno = 0;
33230 #endif
33231       emit_library_call (gen_rtx_SYMBOL_REF (Pmode, mcount_name),
33232 			 LCT_NORMAL, VOIDmode,
33233 			 gen_rtx_REG (Pmode, caller_addr_regno), Pmode);
33234     }
33235 }
33236 
33237 /* Write function profiler code.  */
33238 
33239 void
output_function_profiler(FILE * file,int labelno)33240 output_function_profiler (FILE *file, int labelno)
33241 {
33242   char buf[100];
33243 
33244   switch (DEFAULT_ABI)
33245     {
33246     default:
33247       gcc_unreachable ();
33248 
33249     case ABI_V4:
33250       if (!TARGET_32BIT)
33251 	{
33252 	  warning (0, "no profiling of 64-bit code for this ABI");
33253 	  return;
33254 	}
33255       ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
33256       fprintf (file, "\tmflr %s\n", reg_names[0]);
33257       if (NO_PROFILE_COUNTERS)
33258 	{
33259 	  asm_fprintf (file, "\tstw %s,4(%s)\n",
33260 		       reg_names[0], reg_names[1]);
33261 	}
33262       else if (TARGET_SECURE_PLT && flag_pic)
33263 	{
33264 	  if (TARGET_LINK_STACK)
33265 	    {
33266 	      char name[32];
33267 	      get_ppc476_thunk_name (name);
33268 	      asm_fprintf (file, "\tbl %s\n", name);
33269 	    }
33270 	  else
33271 	    asm_fprintf (file, "\tbcl 20,31,1f\n1:\n");
33272 	  asm_fprintf (file, "\tstw %s,4(%s)\n",
33273 		       reg_names[0], reg_names[1]);
33274 	  asm_fprintf (file, "\tmflr %s\n", reg_names[12]);
33275 	  asm_fprintf (file, "\taddis %s,%s,",
33276 		       reg_names[12], reg_names[12]);
33277 	  assemble_name (file, buf);
33278 	  asm_fprintf (file, "-1b@ha\n\tla %s,", reg_names[0]);
33279 	  assemble_name (file, buf);
33280 	  asm_fprintf (file, "-1b@l(%s)\n", reg_names[12]);
33281 	}
33282       else if (flag_pic == 1)
33283 	{
33284 	  fputs ("\tbl _GLOBAL_OFFSET_TABLE_@local-4\n", file);
33285 	  asm_fprintf (file, "\tstw %s,4(%s)\n",
33286 		       reg_names[0], reg_names[1]);
33287 	  asm_fprintf (file, "\tmflr %s\n", reg_names[12]);
33288 	  asm_fprintf (file, "\tlwz %s,", reg_names[0]);
33289 	  assemble_name (file, buf);
33290 	  asm_fprintf (file, "@got(%s)\n", reg_names[12]);
33291 	}
33292       else if (flag_pic > 1)
33293 	{
33294 	  asm_fprintf (file, "\tstw %s,4(%s)\n",
33295 		       reg_names[0], reg_names[1]);
33296 	  /* Now, we need to get the address of the label.  */
33297 	  if (TARGET_LINK_STACK)
33298 	    {
33299 	      char name[32];
33300 	      get_ppc476_thunk_name (name);
33301 	      asm_fprintf (file, "\tbl %s\n\tb 1f\n\t.long ", name);
33302 	      assemble_name (file, buf);
33303 	      fputs ("-.\n1:", file);
33304 	      asm_fprintf (file, "\tmflr %s\n", reg_names[11]);
33305 	      asm_fprintf (file, "\taddi %s,%s,4\n",
33306 			   reg_names[11], reg_names[11]);
33307 	    }
33308 	  else
33309 	    {
33310 	      fputs ("\tbcl 20,31,1f\n\t.long ", file);
33311 	      assemble_name (file, buf);
33312 	      fputs ("-.\n1:", file);
33313 	      asm_fprintf (file, "\tmflr %s\n", reg_names[11]);
33314 	    }
33315 	  asm_fprintf (file, "\tlwz %s,0(%s)\n",
33316 		       reg_names[0], reg_names[11]);
33317 	  asm_fprintf (file, "\tadd %s,%s,%s\n",
33318 		       reg_names[0], reg_names[0], reg_names[11]);
33319 	}
33320       else
33321 	{
33322 	  asm_fprintf (file, "\tlis %s,", reg_names[12]);
33323 	  assemble_name (file, buf);
33324 	  fputs ("@ha\n", file);
33325 	  asm_fprintf (file, "\tstw %s,4(%s)\n",
33326 		       reg_names[0], reg_names[1]);
33327 	  asm_fprintf (file, "\tla %s,", reg_names[0]);
33328 	  assemble_name (file, buf);
33329 	  asm_fprintf (file, "@l(%s)\n", reg_names[12]);
33330 	}
33331 
33332       /* ABI_V4 saves the static chain reg with ASM_OUTPUT_REG_PUSH.  */
33333       fprintf (file, "\tbl %s%s\n",
33334 	       RS6000_MCOUNT, flag_pic ? "@plt" : "");
33335       break;
33336 
33337     case ABI_AIX:
33338     case ABI_ELFv2:
33339     case ABI_DARWIN:
33340       /* Don't do anything, done in output_profile_hook ().  */
33341       break;
33342     }
33343 }
33344 
33345 
33346 
33347 /* The following variable value is the last issued insn.  */
33348 
33349 static rtx_insn *last_scheduled_insn;
33350 
33351 /* The following variable helps to balance issuing of load and
33352    store instructions */
33353 
33354 static int load_store_pendulum;
33355 
33356 /* The following variable helps pair divide insns during scheduling.  */
33357 static int divide_cnt;
33358 /* The following variable helps pair and alternate vector and vector load
33359    insns during scheduling.  */
33360 static int vec_pairing;
33361 
33362 
33363 /* Power4 load update and store update instructions are cracked into a
33364    load or store and an integer insn which are executed in the same cycle.
33365    Branches have their own dispatch slot which does not count against the
33366    GCC issue rate, but it changes the program flow so there are no other
33367    instructions to issue in this cycle.  */
33368 
33369 static int
rs6000_variable_issue_1(rtx_insn * insn,int more)33370 rs6000_variable_issue_1 (rtx_insn *insn, int more)
33371 {
33372   last_scheduled_insn = insn;
33373   if (GET_CODE (PATTERN (insn)) == USE
33374       || GET_CODE (PATTERN (insn)) == CLOBBER)
33375     {
33376       cached_can_issue_more = more;
33377       return cached_can_issue_more;
33378     }
33379 
33380   if (insn_terminates_group_p (insn, current_group))
33381     {
33382       cached_can_issue_more = 0;
33383       return cached_can_issue_more;
33384     }
33385 
33386   /* If no reservation, but reach here */
33387   if (recog_memoized (insn) < 0)
33388     return more;
33389 
33390   if (rs6000_sched_groups)
33391     {
33392       if (is_microcoded_insn (insn))
33393         cached_can_issue_more = 0;
33394       else if (is_cracked_insn (insn))
33395         cached_can_issue_more = more > 2 ? more - 2 : 0;
33396       else
33397         cached_can_issue_more = more - 1;
33398 
33399       return cached_can_issue_more;
33400     }
33401 
33402   if (rs6000_cpu_attr == CPU_CELL && is_nonpipeline_insn (insn))
33403     return 0;
33404 
33405   cached_can_issue_more = more - 1;
33406   return cached_can_issue_more;
33407 }
33408 
33409 static int
rs6000_variable_issue(FILE * stream,int verbose,rtx_insn * insn,int more)33410 rs6000_variable_issue (FILE *stream, int verbose, rtx_insn *insn, int more)
33411 {
33412   int r = rs6000_variable_issue_1 (insn, more);
33413   if (verbose)
33414     fprintf (stream, "// rs6000_variable_issue (more = %d) = %d\n", more, r);
33415   return r;
33416 }
33417 
33418 /* Adjust the cost of a scheduling dependency.  Return the new cost of
33419    a dependency LINK or INSN on DEP_INSN.  COST is the current cost.  */
33420 
33421 static int
rs6000_adjust_cost(rtx_insn * insn,int dep_type,rtx_insn * dep_insn,int cost,unsigned int)33422 rs6000_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost,
33423 		    unsigned int)
33424 {
33425   enum attr_type attr_type;
33426 
33427   if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
33428     return cost;
33429 
33430   switch (dep_type)
33431     {
33432     case REG_DEP_TRUE:
33433       {
33434         /* Data dependency; DEP_INSN writes a register that INSN reads
33435 	   some cycles later.  */
33436 
33437 	/* Separate a load from a narrower, dependent store.  */
33438 	if ((rs6000_sched_groups || rs6000_cpu_attr == CPU_POWER9)
33439 	    && GET_CODE (PATTERN (insn)) == SET
33440 	    && GET_CODE (PATTERN (dep_insn)) == SET
33441 	    && GET_CODE (XEXP (PATTERN (insn), 1)) == MEM
33442 	    && GET_CODE (XEXP (PATTERN (dep_insn), 0)) == MEM
33443 	    && (GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (insn), 1)))
33444 		> GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (dep_insn), 0)))))
33445 	  return cost + 14;
33446 
33447         attr_type = get_attr_type (insn);
33448 
33449         switch (attr_type)
33450           {
33451           case TYPE_JMPREG:
33452             /* Tell the first scheduling pass about the latency between
33453                a mtctr and bctr (and mtlr and br/blr).  The first
33454                scheduling pass will not know about this latency since
33455                the mtctr instruction, which has the latency associated
33456                to it, will be generated by reload.  */
33457             return 4;
33458           case TYPE_BRANCH:
33459             /* Leave some extra cycles between a compare and its
33460                dependent branch, to inhibit expensive mispredicts.  */
33461             if ((rs6000_cpu_attr == CPU_PPC603
33462                  || rs6000_cpu_attr == CPU_PPC604
33463                  || rs6000_cpu_attr == CPU_PPC604E
33464                  || rs6000_cpu_attr == CPU_PPC620
33465                  || rs6000_cpu_attr == CPU_PPC630
33466                  || rs6000_cpu_attr == CPU_PPC750
33467                  || rs6000_cpu_attr == CPU_PPC7400
33468                  || rs6000_cpu_attr == CPU_PPC7450
33469                  || rs6000_cpu_attr == CPU_PPCE5500
33470                  || rs6000_cpu_attr == CPU_PPCE6500
33471                  || rs6000_cpu_attr == CPU_POWER4
33472                  || rs6000_cpu_attr == CPU_POWER5
33473 		 || rs6000_cpu_attr == CPU_POWER7
33474 		 || rs6000_cpu_attr == CPU_POWER8
33475 		 || rs6000_cpu_attr == CPU_POWER9
33476                  || rs6000_cpu_attr == CPU_CELL)
33477                 && recog_memoized (dep_insn)
33478                 && (INSN_CODE (dep_insn) >= 0))
33479 
33480               switch (get_attr_type (dep_insn))
33481                 {
33482                 case TYPE_CMP:
33483                 case TYPE_FPCOMPARE:
33484                 case TYPE_CR_LOGICAL:
33485                 case TYPE_DELAYED_CR:
33486 		  return cost + 2;
33487                 case TYPE_EXTS:
33488                 case TYPE_MUL:
33489 		  if (get_attr_dot (dep_insn) == DOT_YES)
33490 		    return cost + 2;
33491 		  else
33492 		    break;
33493                 case TYPE_SHIFT:
33494 		  if (get_attr_dot (dep_insn) == DOT_YES
33495 		      && get_attr_var_shift (dep_insn) == VAR_SHIFT_NO)
33496 		    return cost + 2;
33497 		  else
33498 		    break;
33499 		default:
33500 		  break;
33501 		}
33502             break;
33503 
33504           case TYPE_STORE:
33505           case TYPE_FPSTORE:
33506             if ((rs6000_cpu == PROCESSOR_POWER6)
33507                 && recog_memoized (dep_insn)
33508                 && (INSN_CODE (dep_insn) >= 0))
33509               {
33510 
33511                 if (GET_CODE (PATTERN (insn)) != SET)
33512                   /* If this happens, we have to extend this to schedule
33513                      optimally.  Return default for now.  */
33514                   return cost;
33515 
33516                 /* Adjust the cost for the case where the value written
33517                    by a fixed point operation is used as the address
33518                    gen value on a store. */
33519                 switch (get_attr_type (dep_insn))
33520                   {
33521                   case TYPE_LOAD:
33522                   case TYPE_CNTLZ:
33523                     {
33524                       if (! rs6000_store_data_bypass_p (dep_insn, insn))
33525                         return get_attr_sign_extend (dep_insn)
33526                                == SIGN_EXTEND_YES ? 6 : 4;
33527                       break;
33528                     }
33529                   case TYPE_SHIFT:
33530                     {
33531                       if (! rs6000_store_data_bypass_p (dep_insn, insn))
33532                         return get_attr_var_shift (dep_insn) == VAR_SHIFT_YES ?
33533                                6 : 3;
33534                       break;
33535 		    }
33536                   case TYPE_INTEGER:
33537                   case TYPE_ADD:
33538                   case TYPE_LOGICAL:
33539                   case TYPE_EXTS:
33540                   case TYPE_INSERT:
33541                     {
33542                       if (! rs6000_store_data_bypass_p (dep_insn, insn))
33543                         return 3;
33544                       break;
33545                     }
33546                   case TYPE_STORE:
33547                   case TYPE_FPLOAD:
33548                   case TYPE_FPSTORE:
33549                     {
33550                       if (get_attr_update (dep_insn) == UPDATE_YES
33551                           && ! rs6000_store_data_bypass_p (dep_insn, insn))
33552                         return 3;
33553                       break;
33554                     }
33555                   case TYPE_MUL:
33556                     {
33557                       if (! rs6000_store_data_bypass_p (dep_insn, insn))
33558                         return 17;
33559                       break;
33560                     }
33561                   case TYPE_DIV:
33562                     {
33563                       if (! rs6000_store_data_bypass_p (dep_insn, insn))
33564                         return get_attr_size (dep_insn) == SIZE_32 ? 45 : 57;
33565                       break;
33566                     }
33567                   default:
33568                     break;
33569                   }
33570               }
33571 	    break;
33572 
33573           case TYPE_LOAD:
33574             if ((rs6000_cpu == PROCESSOR_POWER6)
33575                 && recog_memoized (dep_insn)
33576                 && (INSN_CODE (dep_insn) >= 0))
33577               {
33578 
33579                 /* Adjust the cost for the case where the value written
33580                    by a fixed point instruction is used within the address
33581                    gen portion of a subsequent load(u)(x) */
33582                 switch (get_attr_type (dep_insn))
33583                   {
33584                   case TYPE_LOAD:
33585                   case TYPE_CNTLZ:
33586                     {
33587                       if (set_to_load_agen (dep_insn, insn))
33588                         return get_attr_sign_extend (dep_insn)
33589                                == SIGN_EXTEND_YES ? 6 : 4;
33590                       break;
33591                     }
33592                   case TYPE_SHIFT:
33593                     {
33594                       if (set_to_load_agen (dep_insn, insn))
33595                         return get_attr_var_shift (dep_insn) == VAR_SHIFT_YES ?
33596                                6 : 3;
33597                       break;
33598 		    }
33599                   case TYPE_INTEGER:
33600                   case TYPE_ADD:
33601                   case TYPE_LOGICAL:
33602                   case TYPE_EXTS:
33603                   case TYPE_INSERT:
33604                     {
33605                       if (set_to_load_agen (dep_insn, insn))
33606                         return 3;
33607                       break;
33608                     }
33609                   case TYPE_STORE:
33610                   case TYPE_FPLOAD:
33611                   case TYPE_FPSTORE:
33612                     {
33613                       if (get_attr_update (dep_insn) == UPDATE_YES
33614                           && set_to_load_agen (dep_insn, insn))
33615                         return 3;
33616                       break;
33617                     }
33618                   case TYPE_MUL:
33619                     {
33620                       if (set_to_load_agen (dep_insn, insn))
33621                         return 17;
33622                       break;
33623                     }
33624                   case TYPE_DIV:
33625                     {
33626                       if (set_to_load_agen (dep_insn, insn))
33627                         return get_attr_size (dep_insn) == SIZE_32 ? 45 : 57;
33628                       break;
33629                     }
33630                   default:
33631                     break;
33632                   }
33633               }
33634             break;
33635 
33636           case TYPE_FPLOAD:
33637             if ((rs6000_cpu == PROCESSOR_POWER6)
33638                 && get_attr_update (insn) == UPDATE_NO
33639                 && recog_memoized (dep_insn)
33640                 && (INSN_CODE (dep_insn) >= 0)
33641                 && (get_attr_type (dep_insn) == TYPE_MFFGPR))
33642               return 2;
33643 
33644           default:
33645             break;
33646           }
33647 
33648 	/* Fall out to return default cost.  */
33649       }
33650       break;
33651 
33652     case REG_DEP_OUTPUT:
33653       /* Output dependency; DEP_INSN writes a register that INSN writes some
33654 	 cycles later.  */
33655       if ((rs6000_cpu == PROCESSOR_POWER6)
33656           && recog_memoized (dep_insn)
33657           && (INSN_CODE (dep_insn) >= 0))
33658         {
33659           attr_type = get_attr_type (insn);
33660 
33661           switch (attr_type)
33662             {
33663             case TYPE_FP:
33664             case TYPE_FPSIMPLE:
33665               if (get_attr_type (dep_insn) == TYPE_FP
33666 		  || get_attr_type (dep_insn) == TYPE_FPSIMPLE)
33667                 return 1;
33668               break;
33669             case TYPE_FPLOAD:
33670               if (get_attr_update (insn) == UPDATE_NO
33671                   && get_attr_type (dep_insn) == TYPE_MFFGPR)
33672                 return 2;
33673               break;
33674             default:
33675               break;
33676             }
33677         }
33678       /* Fall through, no cost for output dependency.  */
33679       /* FALLTHRU */
33680 
33681     case REG_DEP_ANTI:
33682       /* Anti dependency; DEP_INSN reads a register that INSN writes some
33683 	 cycles later.  */
33684       return 0;
33685 
33686     default:
33687       gcc_unreachable ();
33688     }
33689 
33690   return cost;
33691 }
33692 
33693 /* Debug version of rs6000_adjust_cost.  */
33694 
33695 static int
rs6000_debug_adjust_cost(rtx_insn * insn,int dep_type,rtx_insn * dep_insn,int cost,unsigned int dw)33696 rs6000_debug_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn,
33697 			  int cost, unsigned int dw)
33698 {
33699   int ret = rs6000_adjust_cost (insn, dep_type, dep_insn, cost, dw);
33700 
33701   if (ret != cost)
33702     {
33703       const char *dep;
33704 
33705       switch (dep_type)
33706 	{
33707 	default:	     dep = "unknown depencency"; break;
33708 	case REG_DEP_TRUE:   dep = "data dependency";	 break;
33709 	case REG_DEP_OUTPUT: dep = "output dependency";  break;
33710 	case REG_DEP_ANTI:   dep = "anti depencency";	 break;
33711 	}
33712 
33713       fprintf (stderr,
33714 	       "\nrs6000_adjust_cost, final cost = %d, orig cost = %d, "
33715 	       "%s, insn:\n", ret, cost, dep);
33716 
33717       debug_rtx (insn);
33718     }
33719 
33720   return ret;
33721 }
33722 
33723 /* The function returns a true if INSN is microcoded.
33724    Return false otherwise.  */
33725 
33726 static bool
is_microcoded_insn(rtx_insn * insn)33727 is_microcoded_insn (rtx_insn *insn)
33728 {
33729   if (!insn || !NONDEBUG_INSN_P (insn)
33730       || GET_CODE (PATTERN (insn)) == USE
33731       || GET_CODE (PATTERN (insn)) == CLOBBER)
33732     return false;
33733 
33734   if (rs6000_cpu_attr == CPU_CELL)
33735     return get_attr_cell_micro (insn) == CELL_MICRO_ALWAYS;
33736 
33737   if (rs6000_sched_groups
33738       && (rs6000_cpu == PROCESSOR_POWER4 || rs6000_cpu == PROCESSOR_POWER5))
33739     {
33740       enum attr_type type = get_attr_type (insn);
33741       if ((type == TYPE_LOAD
33742 	   && get_attr_update (insn) == UPDATE_YES
33743 	   && get_attr_sign_extend (insn) == SIGN_EXTEND_YES)
33744 	  || ((type == TYPE_LOAD || type == TYPE_STORE)
33745 	      && get_attr_update (insn) == UPDATE_YES
33746 	      && get_attr_indexed (insn) == INDEXED_YES)
33747 	  || type == TYPE_MFCR)
33748 	return true;
33749     }
33750 
33751   return false;
33752 }
33753 
33754 /* The function returns true if INSN is cracked into 2 instructions
33755    by the processor (and therefore occupies 2 issue slots).  */
33756 
33757 static bool
is_cracked_insn(rtx_insn * insn)33758 is_cracked_insn (rtx_insn *insn)
33759 {
33760   if (!insn || !NONDEBUG_INSN_P (insn)
33761       || GET_CODE (PATTERN (insn)) == USE
33762       || GET_CODE (PATTERN (insn)) == CLOBBER)
33763     return false;
33764 
33765   if (rs6000_sched_groups
33766       && (rs6000_cpu == PROCESSOR_POWER4 || rs6000_cpu == PROCESSOR_POWER5))
33767     {
33768       enum attr_type type = get_attr_type (insn);
33769       if ((type == TYPE_LOAD
33770 	   && get_attr_sign_extend (insn) == SIGN_EXTEND_YES
33771 	   && get_attr_update (insn) == UPDATE_NO)
33772 	  || (type == TYPE_LOAD
33773 	      && get_attr_sign_extend (insn) == SIGN_EXTEND_NO
33774 	      && get_attr_update (insn) == UPDATE_YES
33775 	      && get_attr_indexed (insn) == INDEXED_NO)
33776 	  || (type == TYPE_STORE
33777 	      && get_attr_update (insn) == UPDATE_YES
33778 	      && get_attr_indexed (insn) == INDEXED_NO)
33779 	  || ((type == TYPE_FPLOAD || type == TYPE_FPSTORE)
33780 	      && get_attr_update (insn) == UPDATE_YES)
33781 	  || type == TYPE_DELAYED_CR
33782 	  || (type == TYPE_EXTS
33783 	      && get_attr_dot (insn) == DOT_YES)
33784 	  || (type == TYPE_SHIFT
33785 	      && get_attr_dot (insn) == DOT_YES
33786 	      && get_attr_var_shift (insn) == VAR_SHIFT_NO)
33787 	  || (type == TYPE_MUL
33788 	      && get_attr_dot (insn) == DOT_YES)
33789 	  || type == TYPE_DIV
33790 	  || (type == TYPE_INSERT
33791 	      && get_attr_size (insn) == SIZE_32))
33792 	return true;
33793     }
33794 
33795   return false;
33796 }
33797 
33798 /* The function returns true if INSN can be issued only from
33799    the branch slot.  */
33800 
33801 static bool
is_branch_slot_insn(rtx_insn * insn)33802 is_branch_slot_insn (rtx_insn *insn)
33803 {
33804   if (!insn || !NONDEBUG_INSN_P (insn)
33805       || GET_CODE (PATTERN (insn)) == USE
33806       || GET_CODE (PATTERN (insn)) == CLOBBER)
33807     return false;
33808 
33809   if (rs6000_sched_groups)
33810     {
33811       enum attr_type type = get_attr_type (insn);
33812       if (type == TYPE_BRANCH || type == TYPE_JMPREG)
33813 	return true;
33814       return false;
33815     }
33816 
33817   return false;
33818 }
33819 
33820 /* The function returns true if out_inst sets a value that is
33821    used in the address generation computation of in_insn */
33822 static bool
set_to_load_agen(rtx_insn * out_insn,rtx_insn * in_insn)33823 set_to_load_agen (rtx_insn *out_insn, rtx_insn *in_insn)
33824 {
33825   rtx out_set, in_set;
33826 
33827   /* For performance reasons, only handle the simple case where
33828      both loads are a single_set. */
33829   out_set = single_set (out_insn);
33830   if (out_set)
33831     {
33832       in_set = single_set (in_insn);
33833       if (in_set)
33834         return reg_mentioned_p (SET_DEST (out_set), SET_SRC (in_set));
33835     }
33836 
33837   return false;
33838 }
33839 
33840 /* Try to determine base/offset/size parts of the given MEM.
33841    Return true if successful, false if all the values couldn't
33842    be determined.
33843 
33844    This function only looks for REG or REG+CONST address forms.
33845    REG+REG address form will return false. */
33846 
33847 static bool
get_memref_parts(rtx mem,rtx * base,HOST_WIDE_INT * offset,HOST_WIDE_INT * size)33848 get_memref_parts (rtx mem, rtx *base, HOST_WIDE_INT *offset,
33849 		  HOST_WIDE_INT *size)
33850 {
33851   rtx addr_rtx;
33852   if MEM_SIZE_KNOWN_P (mem)
33853     *size = MEM_SIZE (mem);
33854   else
33855     return false;
33856 
33857   addr_rtx = (XEXP (mem, 0));
33858   if (GET_CODE (addr_rtx) == PRE_MODIFY)
33859     addr_rtx = XEXP (addr_rtx, 1);
33860 
33861   *offset = 0;
33862   while (GET_CODE (addr_rtx) == PLUS
33863 	 && CONST_INT_P (XEXP (addr_rtx, 1)))
33864     {
33865       *offset += INTVAL (XEXP (addr_rtx, 1));
33866       addr_rtx = XEXP (addr_rtx, 0);
33867     }
33868   if (!REG_P (addr_rtx))
33869     return false;
33870 
33871   *base = addr_rtx;
33872   return true;
33873 }
33874 
33875 /* The function returns true if the target storage location of
33876    mem1 is adjacent to the target storage location of mem2 */
33877 /* Return 1 if memory locations are adjacent.  */
33878 
33879 static bool
adjacent_mem_locations(rtx mem1,rtx mem2)33880 adjacent_mem_locations (rtx mem1, rtx mem2)
33881 {
33882   rtx reg1, reg2;
33883   HOST_WIDE_INT off1, size1, off2, size2;
33884 
33885   if (get_memref_parts (mem1, &reg1, &off1, &size1)
33886       && get_memref_parts (mem2, &reg2, &off2, &size2))
33887     return ((REGNO (reg1) == REGNO (reg2))
33888 	    && ((off1 + size1 == off2)
33889 		|| (off2 + size2 == off1)));
33890 
33891   return false;
33892 }
33893 
33894 /* This function returns true if it can be determined that the two MEM
33895    locations overlap by at least 1 byte based on base reg/offset/size. */
33896 
33897 static bool
mem_locations_overlap(rtx mem1,rtx mem2)33898 mem_locations_overlap (rtx mem1, rtx mem2)
33899 {
33900   rtx reg1, reg2;
33901   HOST_WIDE_INT off1, size1, off2, size2;
33902 
33903   if (get_memref_parts (mem1, &reg1, &off1, &size1)
33904       && get_memref_parts (mem2, &reg2, &off2, &size2))
33905     return ((REGNO (reg1) == REGNO (reg2))
33906 	    && (((off1 <= off2) && (off1 + size1 > off2))
33907 		|| ((off2 <= off1) && (off2 + size2 > off1))));
33908 
33909   return false;
33910 }
33911 
33912 /* A C statement (sans semicolon) to update the integer scheduling
33913    priority INSN_PRIORITY (INSN). Increase the priority to execute the
33914    INSN earlier, reduce the priority to execute INSN later.  Do not
33915    define this macro if you do not need to adjust the scheduling
33916    priorities of insns.  */
33917 
33918 static int
rs6000_adjust_priority(rtx_insn * insn ATTRIBUTE_UNUSED,int priority)33919 rs6000_adjust_priority (rtx_insn *insn ATTRIBUTE_UNUSED, int priority)
33920 {
33921   rtx load_mem, str_mem;
33922   /* On machines (like the 750) which have asymmetric integer units,
33923      where one integer unit can do multiply and divides and the other
33924      can't, reduce the priority of multiply/divide so it is scheduled
33925      before other integer operations.  */
33926 
33927 #if 0
33928   if (! INSN_P (insn))
33929     return priority;
33930 
33931   if (GET_CODE (PATTERN (insn)) == USE)
33932     return priority;
33933 
33934   switch (rs6000_cpu_attr) {
33935   case CPU_PPC750:
33936     switch (get_attr_type (insn))
33937       {
33938       default:
33939 	break;
33940 
33941       case TYPE_MUL:
33942       case TYPE_DIV:
33943 	fprintf (stderr, "priority was %#x (%d) before adjustment\n",
33944 		 priority, priority);
33945 	if (priority >= 0 && priority < 0x01000000)
33946 	  priority >>= 3;
33947 	break;
33948       }
33949   }
33950 #endif
33951 
33952   if (insn_must_be_first_in_group (insn)
33953       && reload_completed
33954       && current_sched_info->sched_max_insns_priority
33955       && rs6000_sched_restricted_insns_priority)
33956     {
33957 
33958       /* Prioritize insns that can be dispatched only in the first
33959 	 dispatch slot.  */
33960       if (rs6000_sched_restricted_insns_priority == 1)
33961 	/* Attach highest priority to insn. This means that in
33962 	   haifa-sched.c:ready_sort(), dispatch-slot restriction considerations
33963 	   precede 'priority' (critical path) considerations.  */
33964 	return current_sched_info->sched_max_insns_priority;
33965       else if (rs6000_sched_restricted_insns_priority == 2)
33966 	/* Increase priority of insn by a minimal amount. This means that in
33967 	   haifa-sched.c:ready_sort(), only 'priority' (critical path)
33968 	   considerations precede dispatch-slot restriction considerations.  */
33969 	return (priority + 1);
33970     }
33971 
33972   if (rs6000_cpu == PROCESSOR_POWER6
33973       && ((load_store_pendulum == -2 && is_load_insn (insn, &load_mem))
33974           || (load_store_pendulum == 2 && is_store_insn (insn, &str_mem))))
33975     /* Attach highest priority to insn if the scheduler has just issued two
33976        stores and this instruction is a load, or two loads and this instruction
33977        is a store. Power6 wants loads and stores scheduled alternately
33978        when possible */
33979     return current_sched_info->sched_max_insns_priority;
33980 
33981   return priority;
33982 }
33983 
33984 /* Return true if the instruction is nonpipelined on the Cell. */
33985 static bool
is_nonpipeline_insn(rtx_insn * insn)33986 is_nonpipeline_insn (rtx_insn *insn)
33987 {
33988   enum attr_type type;
33989   if (!insn || !NONDEBUG_INSN_P (insn)
33990       || GET_CODE (PATTERN (insn)) == USE
33991       || GET_CODE (PATTERN (insn)) == CLOBBER)
33992     return false;
33993 
33994   type = get_attr_type (insn);
33995   if (type == TYPE_MUL
33996       || type == TYPE_DIV
33997       || type == TYPE_SDIV
33998       || type == TYPE_DDIV
33999       || type == TYPE_SSQRT
34000       || type == TYPE_DSQRT
34001       || type == TYPE_MFCR
34002       || type == TYPE_MFCRF
34003       || type == TYPE_MFJMPR)
34004     {
34005       return true;
34006     }
34007   return false;
34008 }
34009 
34010 
34011 /* Return how many instructions the machine can issue per cycle.  */
34012 
34013 static int
rs6000_issue_rate(void)34014 rs6000_issue_rate (void)
34015 {
34016   /* Unless scheduling for register pressure, use issue rate of 1 for
34017      first scheduling pass to decrease degradation.  */
34018   if (!reload_completed && !flag_sched_pressure)
34019     return 1;
34020 
34021   switch (rs6000_cpu_attr) {
34022   case CPU_RS64A:
34023   case CPU_PPC601: /* ? */
34024   case CPU_PPC7450:
34025     return 3;
34026   case CPU_PPC440:
34027   case CPU_PPC603:
34028   case CPU_PPC750:
34029   case CPU_PPC7400:
34030   case CPU_PPC8540:
34031   case CPU_PPC8548:
34032   case CPU_CELL:
34033   case CPU_PPCE300C2:
34034   case CPU_PPCE300C3:
34035   case CPU_PPCE500MC:
34036   case CPU_PPCE500MC64:
34037   case CPU_PPCE5500:
34038   case CPU_PPCE6500:
34039   case CPU_TITAN:
34040     return 2;
34041   case CPU_PPC476:
34042   case CPU_PPC604:
34043   case CPU_PPC604E:
34044   case CPU_PPC620:
34045   case CPU_PPC630:
34046     return 4;
34047   case CPU_POWER4:
34048   case CPU_POWER5:
34049   case CPU_POWER6:
34050   case CPU_POWER7:
34051     return 5;
34052   case CPU_POWER8:
34053     return 7;
34054   case CPU_POWER9:
34055     return 6;
34056   default:
34057     return 1;
34058   }
34059 }
34060 
34061 /* Return how many instructions to look ahead for better insn
34062    scheduling.  */
34063 
34064 static int
rs6000_use_sched_lookahead(void)34065 rs6000_use_sched_lookahead (void)
34066 {
34067   switch (rs6000_cpu_attr)
34068     {
34069     case CPU_PPC8540:
34070     case CPU_PPC8548:
34071       return 4;
34072 
34073     case CPU_CELL:
34074       return (reload_completed ? 8 : 0);
34075 
34076     default:
34077       return 0;
34078     }
34079 }
34080 
34081 /* We are choosing insn from the ready queue.  Return zero if INSN can be
34082    chosen.  */
34083 static int
rs6000_use_sched_lookahead_guard(rtx_insn * insn,int ready_index)34084 rs6000_use_sched_lookahead_guard (rtx_insn *insn, int ready_index)
34085 {
34086   if (ready_index == 0)
34087     return 0;
34088 
34089   if (rs6000_cpu_attr != CPU_CELL)
34090     return 0;
34091 
34092   gcc_assert (insn != NULL_RTX && INSN_P (insn));
34093 
34094   if (!reload_completed
34095       || is_nonpipeline_insn (insn)
34096       || is_microcoded_insn (insn))
34097     return 1;
34098 
34099   return 0;
34100 }
34101 
34102 /* Determine if PAT refers to memory. If so, set MEM_REF to the MEM rtx
34103    and return true.  */
34104 
34105 static bool
find_mem_ref(rtx pat,rtx * mem_ref)34106 find_mem_ref (rtx pat, rtx *mem_ref)
34107 {
34108   const char * fmt;
34109   int i, j;
34110 
34111   /* stack_tie does not produce any real memory traffic.  */
34112   if (tie_operand (pat, VOIDmode))
34113     return false;
34114 
34115   if (GET_CODE (pat) == MEM)
34116     {
34117       *mem_ref = pat;
34118       return true;
34119     }
34120 
34121   /* Recursively process the pattern.  */
34122   fmt = GET_RTX_FORMAT (GET_CODE (pat));
34123 
34124   for (i = GET_RTX_LENGTH (GET_CODE (pat)) - 1; i >= 0; i--)
34125     {
34126       if (fmt[i] == 'e')
34127 	{
34128 	  if (find_mem_ref (XEXP (pat, i), mem_ref))
34129 	    return true;
34130 	}
34131       else if (fmt[i] == 'E')
34132 	for (j = XVECLEN (pat, i) - 1; j >= 0; j--)
34133 	  {
34134 	    if (find_mem_ref (XVECEXP (pat, i, j), mem_ref))
34135 	      return true;
34136 	  }
34137     }
34138 
34139   return false;
34140 }
34141 
34142 /* Determine if PAT is a PATTERN of a load insn.  */
34143 
34144 static bool
is_load_insn1(rtx pat,rtx * load_mem)34145 is_load_insn1 (rtx pat, rtx *load_mem)
34146 {
34147   if (!pat || pat == NULL_RTX)
34148     return false;
34149 
34150   if (GET_CODE (pat) == SET)
34151     return find_mem_ref (SET_SRC (pat), load_mem);
34152 
34153   if (GET_CODE (pat) == PARALLEL)
34154     {
34155       int i;
34156 
34157       for (i = 0; i < XVECLEN (pat, 0); i++)
34158 	if (is_load_insn1 (XVECEXP (pat, 0, i), load_mem))
34159 	  return true;
34160     }
34161 
34162   return false;
34163 }
34164 
34165 /* Determine if INSN loads from memory.  */
34166 
34167 static bool
is_load_insn(rtx insn,rtx * load_mem)34168 is_load_insn (rtx insn, rtx *load_mem)
34169 {
34170   if (!insn || !INSN_P (insn))
34171     return false;
34172 
34173   if (CALL_P (insn))
34174     return false;
34175 
34176   return is_load_insn1 (PATTERN (insn), load_mem);
34177 }
34178 
34179 /* Determine if PAT is a PATTERN of a store insn.  */
34180 
34181 static bool
is_store_insn1(rtx pat,rtx * str_mem)34182 is_store_insn1 (rtx pat, rtx *str_mem)
34183 {
34184   if (!pat || pat == NULL_RTX)
34185     return false;
34186 
34187   if (GET_CODE (pat) == SET)
34188     return find_mem_ref (SET_DEST (pat), str_mem);
34189 
34190   if (GET_CODE (pat) == PARALLEL)
34191     {
34192       int i;
34193 
34194       for (i = 0; i < XVECLEN (pat, 0); i++)
34195 	if (is_store_insn1 (XVECEXP (pat, 0, i), str_mem))
34196 	  return true;
34197     }
34198 
34199   return false;
34200 }
34201 
34202 /* Determine if INSN stores to memory.  */
34203 
34204 static bool
is_store_insn(rtx insn,rtx * str_mem)34205 is_store_insn (rtx insn, rtx *str_mem)
34206 {
34207   if (!insn || !INSN_P (insn))
34208     return false;
34209 
34210   return is_store_insn1 (PATTERN (insn), str_mem);
34211 }
34212 
34213 /* Return whether TYPE is a Power9 pairable vector instruction type.  */
34214 
34215 static bool
is_power9_pairable_vec_type(enum attr_type type)34216 is_power9_pairable_vec_type (enum attr_type type)
34217 {
34218   switch (type)
34219     {
34220       case TYPE_VECSIMPLE:
34221       case TYPE_VECCOMPLEX:
34222       case TYPE_VECDIV:
34223       case TYPE_VECCMP:
34224       case TYPE_VECPERM:
34225       case TYPE_VECFLOAT:
34226       case TYPE_VECFDIV:
34227       case TYPE_VECDOUBLE:
34228 	return true;
34229       default:
34230 	break;
34231     }
34232   return false;
34233 }
34234 
34235 /* Returns whether the dependence between INSN and NEXT is considered
34236    costly by the given target.  */
34237 
34238 static bool
rs6000_is_costly_dependence(dep_t dep,int cost,int distance)34239 rs6000_is_costly_dependence (dep_t dep, int cost, int distance)
34240 {
34241   rtx insn;
34242   rtx next;
34243   rtx load_mem, str_mem;
34244 
34245   /* If the flag is not enabled - no dependence is considered costly;
34246      allow all dependent insns in the same group.
34247      This is the most aggressive option.  */
34248   if (rs6000_sched_costly_dep == no_dep_costly)
34249     return false;
34250 
34251   /* If the flag is set to 1 - a dependence is always considered costly;
34252      do not allow dependent instructions in the same group.
34253      This is the most conservative option.  */
34254   if (rs6000_sched_costly_dep == all_deps_costly)
34255     return true;
34256 
34257   insn = DEP_PRO (dep);
34258   next = DEP_CON (dep);
34259 
34260   if (rs6000_sched_costly_dep == store_to_load_dep_costly
34261       && is_load_insn (next, &load_mem)
34262       && is_store_insn (insn, &str_mem))
34263     /* Prevent load after store in the same group.  */
34264     return true;
34265 
34266   if (rs6000_sched_costly_dep == true_store_to_load_dep_costly
34267       && is_load_insn (next, &load_mem)
34268       && is_store_insn (insn, &str_mem)
34269       && DEP_TYPE (dep) == REG_DEP_TRUE
34270       && mem_locations_overlap(str_mem, load_mem))
34271      /* Prevent load after store in the same group if it is a true
34272 	dependence.  */
34273      return true;
34274 
34275   /* The flag is set to X; dependences with latency >= X are considered costly,
34276      and will not be scheduled in the same group.  */
34277   if (rs6000_sched_costly_dep <= max_dep_latency
34278       && ((cost - distance) >= (int)rs6000_sched_costly_dep))
34279     return true;
34280 
34281   return false;
34282 }
34283 
34284 /* Return the next insn after INSN that is found before TAIL is reached,
34285    skipping any "non-active" insns - insns that will not actually occupy
34286    an issue slot.  Return NULL_RTX if such an insn is not found.  */
34287 
34288 static rtx_insn *
get_next_active_insn(rtx_insn * insn,rtx_insn * tail)34289 get_next_active_insn (rtx_insn *insn, rtx_insn *tail)
34290 {
34291   if (insn == NULL_RTX || insn == tail)
34292     return NULL;
34293 
34294   while (1)
34295     {
34296       insn = NEXT_INSN (insn);
34297       if (insn == NULL_RTX || insn == tail)
34298 	return NULL;
34299 
34300       if (CALL_P (insn)
34301 	  || JUMP_P (insn) || JUMP_TABLE_DATA_P (insn)
34302 	  || (NONJUMP_INSN_P (insn)
34303 	      && GET_CODE (PATTERN (insn)) != USE
34304 	      && GET_CODE (PATTERN (insn)) != CLOBBER
34305 	      && INSN_CODE (insn) != CODE_FOR_stack_tie))
34306 	break;
34307     }
34308   return insn;
34309 }
34310 
34311 /* Do Power9 specific sched_reorder2 reordering of ready list.  */
34312 
34313 static int
power9_sched_reorder2(rtx_insn ** ready,int lastpos)34314 power9_sched_reorder2 (rtx_insn **ready, int lastpos)
34315 {
34316   int pos;
34317   int i;
34318   rtx_insn *tmp;
34319   enum attr_type type, type2;
34320 
34321   type = get_attr_type (last_scheduled_insn);
34322 
34323   /* Try to issue fixed point divides back-to-back in pairs so they will be
34324      routed to separate execution units and execute in parallel.  */
34325   if (type == TYPE_DIV && divide_cnt == 0)
34326     {
34327       /* First divide has been scheduled.  */
34328       divide_cnt = 1;
34329 
34330       /* Scan the ready list looking for another divide, if found move it
34331 	 to the end of the list so it is chosen next.  */
34332       pos = lastpos;
34333       while (pos >= 0)
34334 	{
34335 	  if (recog_memoized (ready[pos]) >= 0
34336 	      && get_attr_type (ready[pos]) == TYPE_DIV)
34337 	    {
34338 	      tmp = ready[pos];
34339 	      for (i = pos; i < lastpos; i++)
34340 		ready[i] = ready[i + 1];
34341 	      ready[lastpos] = tmp;
34342 	      break;
34343 	    }
34344 	  pos--;
34345 	}
34346     }
34347   else
34348     {
34349       /* Last insn was the 2nd divide or not a divide, reset the counter.  */
34350       divide_cnt = 0;
34351 
34352       /* The best dispatch throughput for vector and vector load insns can be
34353 	 achieved by interleaving a vector and vector load such that they'll
34354 	 dispatch to the same superslice. If this pairing cannot be achieved
34355 	 then it is best to pair vector insns together and vector load insns
34356 	 together.
34357 
34358 	 To aid in this pairing, vec_pairing maintains the current state with
34359 	 the following values:
34360 
34361 	     0  : Initial state, no vecload/vector pairing has been started.
34362 
34363 	     1  : A vecload or vector insn has been issued and a candidate for
34364 		  pairing has been found and moved to the end of the ready
34365 		  list.  */
34366       if (type == TYPE_VECLOAD)
34367 	{
34368 	  /* Issued a vecload.  */
34369 	  if (vec_pairing == 0)
34370 	    {
34371 	      int vecload_pos = -1;
34372 	      /* We issued a single vecload, look for a vector insn to pair it
34373 		 with.  If one isn't found, try to pair another vecload.  */
34374 	      pos = lastpos;
34375 	      while (pos >= 0)
34376 		{
34377 		  if (recog_memoized (ready[pos]) >= 0)
34378 		    {
34379 		      type2 = get_attr_type (ready[pos]);
34380 		      if (is_power9_pairable_vec_type (type2))
34381 			{
34382 			  /* Found a vector insn to pair with, move it to the
34383 			     end of the ready list so it is scheduled next.  */
34384 			  tmp = ready[pos];
34385 			  for (i = pos; i < lastpos; i++)
34386 			    ready[i] = ready[i + 1];
34387 			  ready[lastpos] = tmp;
34388 			  vec_pairing = 1;
34389 			  return cached_can_issue_more;
34390 			}
34391 		      else if (type2 == TYPE_VECLOAD && vecload_pos == -1)
34392 			/* Remember position of first vecload seen.  */
34393 			vecload_pos = pos;
34394 		    }
34395 		  pos--;
34396 		}
34397 	      if (vecload_pos >= 0)
34398 		{
34399 		  /* Didn't find a vector to pair with but did find a vecload,
34400 		     move it to the end of the ready list.  */
34401 		  tmp = ready[vecload_pos];
34402 		  for (i = vecload_pos; i < lastpos; i++)
34403 		    ready[i] = ready[i + 1];
34404 		  ready[lastpos] = tmp;
34405 		  vec_pairing = 1;
34406 		  return cached_can_issue_more;
34407 		}
34408 	    }
34409 	}
34410       else if (is_power9_pairable_vec_type (type))
34411 	{
34412 	  /* Issued a vector operation.  */
34413 	  if (vec_pairing == 0)
34414 	    {
34415 	      int vec_pos = -1;
34416 	      /* We issued a single vector insn, look for a vecload to pair it
34417 		 with.  If one isn't found, try to pair another vector.  */
34418 	      pos = lastpos;
34419 	      while (pos >= 0)
34420 		{
34421 		  if (recog_memoized (ready[pos]) >= 0)
34422 		    {
34423 		      type2 = get_attr_type (ready[pos]);
34424 		      if (type2 == TYPE_VECLOAD)
34425 			{
34426 			  /* Found a vecload insn to pair with, move it to the
34427 			     end of the ready list so it is scheduled next.  */
34428 			  tmp = ready[pos];
34429 			  for (i = pos; i < lastpos; i++)
34430 			    ready[i] = ready[i + 1];
34431 			  ready[lastpos] = tmp;
34432 			  vec_pairing = 1;
34433 			  return cached_can_issue_more;
34434 			}
34435 		      else if (is_power9_pairable_vec_type (type2)
34436 			       && vec_pos == -1)
34437 			/* Remember position of first vector insn seen.  */
34438 			vec_pos = pos;
34439 		    }
34440 		  pos--;
34441 		}
34442 	      if (vec_pos >= 0)
34443 		{
34444 		  /* Didn't find a vecload to pair with but did find a vector
34445 		     insn, move it to the end of the ready list.  */
34446 		  tmp = ready[vec_pos];
34447 		  for (i = vec_pos; i < lastpos; i++)
34448 		    ready[i] = ready[i + 1];
34449 		  ready[lastpos] = tmp;
34450 		  vec_pairing = 1;
34451 		  return cached_can_issue_more;
34452 		}
34453 	    }
34454 	}
34455 
34456       /* We've either finished a vec/vecload pair, couldn't find an insn to
34457 	 continue the current pair, or the last insn had nothing to do with
34458 	 with pairing.  In any case, reset the state.  */
34459       vec_pairing = 0;
34460     }
34461 
34462   return cached_can_issue_more;
34463 }
34464 
34465 /* We are about to begin issuing insns for this clock cycle. */
34466 
34467 static int
rs6000_sched_reorder(FILE * dump ATTRIBUTE_UNUSED,int sched_verbose,rtx_insn ** ready ATTRIBUTE_UNUSED,int * pn_ready ATTRIBUTE_UNUSED,int clock_var ATTRIBUTE_UNUSED)34468 rs6000_sched_reorder (FILE *dump ATTRIBUTE_UNUSED, int sched_verbose,
34469                         rtx_insn **ready ATTRIBUTE_UNUSED,
34470                         int *pn_ready ATTRIBUTE_UNUSED,
34471 		        int clock_var ATTRIBUTE_UNUSED)
34472 {
34473   int n_ready = *pn_ready;
34474 
34475   if (sched_verbose)
34476     fprintf (dump, "// rs6000_sched_reorder :\n");
34477 
34478   /* Reorder the ready list, if the second to last ready insn
34479      is a nonepipeline insn.  */
34480   if (rs6000_cpu_attr == CPU_CELL && n_ready > 1)
34481   {
34482     if (is_nonpipeline_insn (ready[n_ready - 1])
34483         && (recog_memoized (ready[n_ready - 2]) > 0))
34484       /* Simply swap first two insns.  */
34485       std::swap (ready[n_ready - 1], ready[n_ready - 2]);
34486   }
34487 
34488   if (rs6000_cpu == PROCESSOR_POWER6)
34489     load_store_pendulum = 0;
34490 
34491   return rs6000_issue_rate ();
34492 }
34493 
34494 /* Like rs6000_sched_reorder, but called after issuing each insn.  */
34495 
34496 static int
rs6000_sched_reorder2(FILE * dump,int sched_verbose,rtx_insn ** ready,int * pn_ready,int clock_var ATTRIBUTE_UNUSED)34497 rs6000_sched_reorder2 (FILE *dump, int sched_verbose, rtx_insn **ready,
34498 		         int *pn_ready, int clock_var ATTRIBUTE_UNUSED)
34499 {
34500   if (sched_verbose)
34501     fprintf (dump, "// rs6000_sched_reorder2 :\n");
34502 
34503   /* For Power6, we need to handle some special cases to try and keep the
34504      store queue from overflowing and triggering expensive flushes.
34505 
34506      This code monitors how load and store instructions are being issued
34507      and skews the ready list one way or the other to increase the likelihood
34508      that a desired instruction is issued at the proper time.
34509 
34510      A couple of things are done.  First, we maintain a "load_store_pendulum"
34511      to track the current state of load/store issue.
34512 
34513        - If the pendulum is at zero, then no loads or stores have been
34514          issued in the current cycle so we do nothing.
34515 
34516        - If the pendulum is 1, then a single load has been issued in this
34517          cycle and we attempt to locate another load in the ready list to
34518          issue with it.
34519 
34520        - If the pendulum is -2, then two stores have already been
34521          issued in this cycle, so we increase the priority of the first load
34522          in the ready list to increase it's likelihood of being chosen first
34523          in the next cycle.
34524 
34525        - If the pendulum is -1, then a single store has been issued in this
34526          cycle and we attempt to locate another store in the ready list to
34527          issue with it, preferring a store to an adjacent memory location to
34528          facilitate store pairing in the store queue.
34529 
34530        - If the pendulum is 2, then two loads have already been
34531          issued in this cycle, so we increase the priority of the first store
34532          in the ready list to increase it's likelihood of being chosen first
34533          in the next cycle.
34534 
34535        - If the pendulum < -2 or > 2, then do nothing.
34536 
34537        Note: This code covers the most common scenarios.  There exist non
34538              load/store instructions which make use of the LSU and which
34539              would need to be accounted for to strictly model the behavior
34540              of the machine.  Those instructions are currently unaccounted
34541              for to help minimize compile time overhead of this code.
34542    */
34543   if (rs6000_cpu == PROCESSOR_POWER6 && last_scheduled_insn)
34544     {
34545       int pos;
34546       int i;
34547       rtx_insn *tmp;
34548       rtx load_mem, str_mem;
34549 
34550       if (is_store_insn (last_scheduled_insn, &str_mem))
34551         /* Issuing a store, swing the load_store_pendulum to the left */
34552         load_store_pendulum--;
34553       else if (is_load_insn (last_scheduled_insn, &load_mem))
34554         /* Issuing a load, swing the load_store_pendulum to the right */
34555         load_store_pendulum++;
34556       else
34557         return cached_can_issue_more;
34558 
34559       /* If the pendulum is balanced, or there is only one instruction on
34560          the ready list, then all is well, so return. */
34561       if ((load_store_pendulum == 0) || (*pn_ready <= 1))
34562         return cached_can_issue_more;
34563 
34564       if (load_store_pendulum == 1)
34565         {
34566           /* A load has been issued in this cycle.  Scan the ready list
34567              for another load to issue with it */
34568           pos = *pn_ready-1;
34569 
34570           while (pos >= 0)
34571             {
34572               if (is_load_insn (ready[pos], &load_mem))
34573                 {
34574                   /* Found a load.  Move it to the head of the ready list,
34575                      and adjust it's priority so that it is more likely to
34576                      stay there */
34577                   tmp = ready[pos];
34578                   for (i=pos; i<*pn_ready-1; i++)
34579                     ready[i] = ready[i + 1];
34580                   ready[*pn_ready-1] = tmp;
34581 
34582                   if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp))
34583                     INSN_PRIORITY (tmp)++;
34584                   break;
34585                 }
34586               pos--;
34587             }
34588         }
34589       else if (load_store_pendulum == -2)
34590         {
34591           /* Two stores have been issued in this cycle.  Increase the
34592              priority of the first load in the ready list to favor it for
34593              issuing in the next cycle. */
34594           pos = *pn_ready-1;
34595 
34596           while (pos >= 0)
34597             {
34598               if (is_load_insn (ready[pos], &load_mem)
34599                   && !sel_sched_p ()
34600 		  && INSN_PRIORITY_KNOWN (ready[pos]))
34601                 {
34602                   INSN_PRIORITY (ready[pos])++;
34603 
34604                   /* Adjust the pendulum to account for the fact that a load
34605                      was found and increased in priority.  This is to prevent
34606                      increasing the priority of multiple loads */
34607                   load_store_pendulum--;
34608 
34609                   break;
34610                 }
34611               pos--;
34612             }
34613         }
34614       else if (load_store_pendulum == -1)
34615         {
34616           /* A store has been issued in this cycle.  Scan the ready list for
34617              another store to issue with it, preferring a store to an adjacent
34618              memory location */
34619           int first_store_pos = -1;
34620 
34621           pos = *pn_ready-1;
34622 
34623           while (pos >= 0)
34624             {
34625               if (is_store_insn (ready[pos], &str_mem))
34626                 {
34627 		  rtx str_mem2;
34628                   /* Maintain the index of the first store found on the
34629                      list */
34630                   if (first_store_pos == -1)
34631                     first_store_pos = pos;
34632 
34633                   if (is_store_insn (last_scheduled_insn, &str_mem2)
34634                       && adjacent_mem_locations (str_mem, str_mem2))
34635                     {
34636                       /* Found an adjacent store.  Move it to the head of the
34637                          ready list, and adjust it's priority so that it is
34638                          more likely to stay there */
34639                       tmp = ready[pos];
34640                       for (i=pos; i<*pn_ready-1; i++)
34641                         ready[i] = ready[i + 1];
34642                       ready[*pn_ready-1] = tmp;
34643 
34644                       if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp))
34645                         INSN_PRIORITY (tmp)++;
34646 
34647                       first_store_pos = -1;
34648 
34649                       break;
34650                     };
34651                 }
34652               pos--;
34653             }
34654 
34655           if (first_store_pos >= 0)
34656             {
34657               /* An adjacent store wasn't found, but a non-adjacent store was,
34658                  so move the non-adjacent store to the front of the ready
34659                  list, and adjust its priority so that it is more likely to
34660                  stay there. */
34661               tmp = ready[first_store_pos];
34662               for (i=first_store_pos; i<*pn_ready-1; i++)
34663                 ready[i] = ready[i + 1];
34664               ready[*pn_ready-1] = tmp;
34665               if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp))
34666                 INSN_PRIORITY (tmp)++;
34667             }
34668         }
34669       else if (load_store_pendulum == 2)
34670        {
34671            /* Two loads have been issued in this cycle.  Increase the priority
34672               of the first store in the ready list to favor it for issuing in
34673               the next cycle. */
34674           pos = *pn_ready-1;
34675 
34676           while (pos >= 0)
34677             {
34678               if (is_store_insn (ready[pos], &str_mem)
34679                   && !sel_sched_p ()
34680 		  && INSN_PRIORITY_KNOWN (ready[pos]))
34681                 {
34682                   INSN_PRIORITY (ready[pos])++;
34683 
34684                   /* Adjust the pendulum to account for the fact that a store
34685                      was found and increased in priority.  This is to prevent
34686                      increasing the priority of multiple stores */
34687                   load_store_pendulum++;
34688 
34689                   break;
34690                 }
34691               pos--;
34692             }
34693         }
34694     }
34695 
34696   /* Do Power9 dependent reordering if necessary.  */
34697   if (rs6000_cpu == PROCESSOR_POWER9 && last_scheduled_insn
34698       && recog_memoized (last_scheduled_insn) >= 0)
34699     return power9_sched_reorder2 (ready, *pn_ready - 1);
34700 
34701   return cached_can_issue_more;
34702 }
34703 
34704 /* Return whether the presence of INSN causes a dispatch group termination
34705    of group WHICH_GROUP.
34706 
34707    If WHICH_GROUP == current_group, this function will return true if INSN
34708    causes the termination of the current group (i.e, the dispatch group to
34709    which INSN belongs). This means that INSN will be the last insn in the
34710    group it belongs to.
34711 
34712    If WHICH_GROUP == previous_group, this function will return true if INSN
34713    causes the termination of the previous group (i.e, the dispatch group that
34714    precedes the group to which INSN belongs).  This means that INSN will be
34715    the first insn in the group it belongs to).  */
34716 
34717 static bool
insn_terminates_group_p(rtx_insn * insn,enum group_termination which_group)34718 insn_terminates_group_p (rtx_insn *insn, enum group_termination which_group)
34719 {
34720   bool first, last;
34721 
34722   if (! insn)
34723     return false;
34724 
34725   first = insn_must_be_first_in_group (insn);
34726   last = insn_must_be_last_in_group (insn);
34727 
34728   if (first && last)
34729     return true;
34730 
34731   if (which_group == current_group)
34732     return last;
34733   else if (which_group == previous_group)
34734     return first;
34735 
34736   return false;
34737 }
34738 
34739 
34740 static bool
insn_must_be_first_in_group(rtx_insn * insn)34741 insn_must_be_first_in_group (rtx_insn *insn)
34742 {
34743   enum attr_type type;
34744 
34745   if (!insn
34746       || NOTE_P (insn)
34747       || DEBUG_INSN_P (insn)
34748       || GET_CODE (PATTERN (insn)) == USE
34749       || GET_CODE (PATTERN (insn)) == CLOBBER)
34750     return false;
34751 
34752   switch (rs6000_cpu)
34753     {
34754     case PROCESSOR_POWER5:
34755       if (is_cracked_insn (insn))
34756         return true;
34757       /* FALLTHRU */
34758     case PROCESSOR_POWER4:
34759       if (is_microcoded_insn (insn))
34760         return true;
34761 
34762       if (!rs6000_sched_groups)
34763         return false;
34764 
34765       type = get_attr_type (insn);
34766 
34767       switch (type)
34768         {
34769         case TYPE_MFCR:
34770         case TYPE_MFCRF:
34771         case TYPE_MTCR:
34772         case TYPE_DELAYED_CR:
34773         case TYPE_CR_LOGICAL:
34774         case TYPE_MTJMPR:
34775         case TYPE_MFJMPR:
34776         case TYPE_DIV:
34777         case TYPE_LOAD_L:
34778         case TYPE_STORE_C:
34779         case TYPE_ISYNC:
34780         case TYPE_SYNC:
34781           return true;
34782         default:
34783           break;
34784         }
34785       break;
34786     case PROCESSOR_POWER6:
34787       type = get_attr_type (insn);
34788 
34789       switch (type)
34790         {
34791         case TYPE_EXTS:
34792         case TYPE_CNTLZ:
34793         case TYPE_TRAP:
34794         case TYPE_MUL:
34795         case TYPE_INSERT:
34796         case TYPE_FPCOMPARE:
34797         case TYPE_MFCR:
34798         case TYPE_MTCR:
34799         case TYPE_MFJMPR:
34800         case TYPE_MTJMPR:
34801         case TYPE_ISYNC:
34802         case TYPE_SYNC:
34803         case TYPE_LOAD_L:
34804         case TYPE_STORE_C:
34805           return true;
34806         case TYPE_SHIFT:
34807           if (get_attr_dot (insn) == DOT_NO
34808               || get_attr_var_shift (insn) == VAR_SHIFT_NO)
34809             return true;
34810           else
34811             break;
34812         case TYPE_DIV:
34813           if (get_attr_size (insn) == SIZE_32)
34814             return true;
34815           else
34816             break;
34817         case TYPE_LOAD:
34818         case TYPE_STORE:
34819         case TYPE_FPLOAD:
34820         case TYPE_FPSTORE:
34821           if (get_attr_update (insn) == UPDATE_YES)
34822             return true;
34823           else
34824             break;
34825         default:
34826           break;
34827         }
34828       break;
34829     case PROCESSOR_POWER7:
34830       type = get_attr_type (insn);
34831 
34832       switch (type)
34833         {
34834         case TYPE_CR_LOGICAL:
34835         case TYPE_MFCR:
34836         case TYPE_MFCRF:
34837         case TYPE_MTCR:
34838         case TYPE_DIV:
34839         case TYPE_ISYNC:
34840         case TYPE_LOAD_L:
34841         case TYPE_STORE_C:
34842         case TYPE_MFJMPR:
34843         case TYPE_MTJMPR:
34844           return true;
34845         case TYPE_MUL:
34846         case TYPE_SHIFT:
34847         case TYPE_EXTS:
34848           if (get_attr_dot (insn) == DOT_YES)
34849             return true;
34850           else
34851             break;
34852         case TYPE_LOAD:
34853           if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
34854               || get_attr_update (insn) == UPDATE_YES)
34855             return true;
34856           else
34857             break;
34858         case TYPE_STORE:
34859         case TYPE_FPLOAD:
34860         case TYPE_FPSTORE:
34861           if (get_attr_update (insn) == UPDATE_YES)
34862             return true;
34863           else
34864             break;
34865         default:
34866           break;
34867         }
34868       break;
34869     case PROCESSOR_POWER8:
34870       type = get_attr_type (insn);
34871 
34872       switch (type)
34873         {
34874         case TYPE_CR_LOGICAL:
34875         case TYPE_DELAYED_CR:
34876         case TYPE_MFCR:
34877         case TYPE_MFCRF:
34878         case TYPE_MTCR:
34879         case TYPE_SYNC:
34880         case TYPE_ISYNC:
34881         case TYPE_LOAD_L:
34882         case TYPE_STORE_C:
34883         case TYPE_VECSTORE:
34884         case TYPE_MFJMPR:
34885         case TYPE_MTJMPR:
34886           return true;
34887         case TYPE_SHIFT:
34888         case TYPE_EXTS:
34889         case TYPE_MUL:
34890           if (get_attr_dot (insn) == DOT_YES)
34891             return true;
34892           else
34893             break;
34894         case TYPE_LOAD:
34895           if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
34896               || get_attr_update (insn) == UPDATE_YES)
34897             return true;
34898           else
34899             break;
34900         case TYPE_STORE:
34901           if (get_attr_update (insn) == UPDATE_YES
34902               && get_attr_indexed (insn) == INDEXED_YES)
34903             return true;
34904           else
34905             break;
34906         default:
34907           break;
34908         }
34909       break;
34910     default:
34911       break;
34912     }
34913 
34914   return false;
34915 }
34916 
34917 static bool
insn_must_be_last_in_group(rtx_insn * insn)34918 insn_must_be_last_in_group (rtx_insn *insn)
34919 {
34920   enum attr_type type;
34921 
34922   if (!insn
34923       || NOTE_P (insn)
34924       || DEBUG_INSN_P (insn)
34925       || GET_CODE (PATTERN (insn)) == USE
34926       || GET_CODE (PATTERN (insn)) == CLOBBER)
34927     return false;
34928 
34929   switch (rs6000_cpu) {
34930   case PROCESSOR_POWER4:
34931   case PROCESSOR_POWER5:
34932     if (is_microcoded_insn (insn))
34933       return true;
34934 
34935     if (is_branch_slot_insn (insn))
34936       return true;
34937 
34938     break;
34939   case PROCESSOR_POWER6:
34940     type = get_attr_type (insn);
34941 
34942     switch (type)
34943       {
34944       case TYPE_EXTS:
34945       case TYPE_CNTLZ:
34946       case TYPE_TRAP:
34947       case TYPE_MUL:
34948       case TYPE_FPCOMPARE:
34949       case TYPE_MFCR:
34950       case TYPE_MTCR:
34951       case TYPE_MFJMPR:
34952       case TYPE_MTJMPR:
34953       case TYPE_ISYNC:
34954       case TYPE_SYNC:
34955       case TYPE_LOAD_L:
34956       case TYPE_STORE_C:
34957         return true;
34958       case TYPE_SHIFT:
34959         if (get_attr_dot (insn) == DOT_NO
34960             || get_attr_var_shift (insn) == VAR_SHIFT_NO)
34961           return true;
34962         else
34963           break;
34964       case TYPE_DIV:
34965         if (get_attr_size (insn) == SIZE_32)
34966           return true;
34967         else
34968           break;
34969       default:
34970         break;
34971     }
34972     break;
34973   case PROCESSOR_POWER7:
34974     type = get_attr_type (insn);
34975 
34976     switch (type)
34977       {
34978       case TYPE_ISYNC:
34979       case TYPE_SYNC:
34980       case TYPE_LOAD_L:
34981       case TYPE_STORE_C:
34982         return true;
34983       case TYPE_LOAD:
34984         if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
34985             && get_attr_update (insn) == UPDATE_YES)
34986           return true;
34987         else
34988           break;
34989       case TYPE_STORE:
34990         if (get_attr_update (insn) == UPDATE_YES
34991             && get_attr_indexed (insn) == INDEXED_YES)
34992           return true;
34993         else
34994           break;
34995       default:
34996         break;
34997     }
34998     break;
34999   case PROCESSOR_POWER8:
35000     type = get_attr_type (insn);
35001 
35002     switch (type)
35003       {
35004       case TYPE_MFCR:
35005       case TYPE_MTCR:
35006       case TYPE_ISYNC:
35007       case TYPE_SYNC:
35008       case TYPE_LOAD_L:
35009       case TYPE_STORE_C:
35010         return true;
35011       case TYPE_LOAD:
35012         if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
35013             && get_attr_update (insn) == UPDATE_YES)
35014           return true;
35015         else
35016           break;
35017       case TYPE_STORE:
35018         if (get_attr_update (insn) == UPDATE_YES
35019             && get_attr_indexed (insn) == INDEXED_YES)
35020           return true;
35021         else
35022           break;
35023       default:
35024         break;
35025     }
35026     break;
35027   default:
35028     break;
35029   }
35030 
35031   return false;
35032 }
35033 
35034 /* Return true if it is recommended to keep NEXT_INSN "far" (in a separate
35035    dispatch group) from the insns in GROUP_INSNS.  Return false otherwise.  */
35036 
35037 static bool
is_costly_group(rtx * group_insns,rtx next_insn)35038 is_costly_group (rtx *group_insns, rtx next_insn)
35039 {
35040   int i;
35041   int issue_rate = rs6000_issue_rate ();
35042 
35043   for (i = 0; i < issue_rate; i++)
35044     {
35045       sd_iterator_def sd_it;
35046       dep_t dep;
35047       rtx insn = group_insns[i];
35048 
35049       if (!insn)
35050 	continue;
35051 
35052       FOR_EACH_DEP (insn, SD_LIST_RES_FORW, sd_it, dep)
35053 	{
35054 	  rtx next = DEP_CON (dep);
35055 
35056 	  if (next == next_insn
35057 	      && rs6000_is_costly_dependence (dep, dep_cost (dep), 0))
35058 	    return true;
35059 	}
35060     }
35061 
35062   return false;
35063 }
35064 
35065 /* Utility of the function redefine_groups.
35066    Check if it is too costly to schedule NEXT_INSN together with GROUP_INSNS
35067    in the same dispatch group.  If so, insert nops before NEXT_INSN, in order
35068    to keep it "far" (in a separate group) from GROUP_INSNS, following
35069    one of the following schemes, depending on the value of the flag
35070    -minsert_sched_nops = X:
35071    (1) X == sched_finish_regroup_exact: insert exactly as many nops as needed
35072        in order to force NEXT_INSN into a separate group.
35073    (2) X < sched_finish_regroup_exact: insert exactly X nops.
35074    GROUP_END, CAN_ISSUE_MORE and GROUP_COUNT record the state after nop
35075    insertion (has a group just ended, how many vacant issue slots remain in the
35076    last group, and how many dispatch groups were encountered so far).  */
35077 
35078 static int
force_new_group(int sched_verbose,FILE * dump,rtx * group_insns,rtx_insn * next_insn,bool * group_end,int can_issue_more,int * group_count)35079 force_new_group (int sched_verbose, FILE *dump, rtx *group_insns,
35080 		 rtx_insn *next_insn, bool *group_end, int can_issue_more,
35081 		 int *group_count)
35082 {
35083   rtx nop;
35084   bool force;
35085   int issue_rate = rs6000_issue_rate ();
35086   bool end = *group_end;
35087   int i;
35088 
35089   if (next_insn == NULL_RTX || DEBUG_INSN_P (next_insn))
35090     return can_issue_more;
35091 
35092   if (rs6000_sched_insert_nops > sched_finish_regroup_exact)
35093     return can_issue_more;
35094 
35095   force = is_costly_group (group_insns, next_insn);
35096   if (!force)
35097     return can_issue_more;
35098 
35099   if (sched_verbose > 6)
35100     fprintf (dump,"force: group count = %d, can_issue_more = %d\n",
35101 	     *group_count ,can_issue_more);
35102 
35103   if (rs6000_sched_insert_nops == sched_finish_regroup_exact)
35104     {
35105       if (*group_end)
35106 	can_issue_more = 0;
35107 
35108       /* Since only a branch can be issued in the last issue_slot, it is
35109 	 sufficient to insert 'can_issue_more - 1' nops if next_insn is not
35110 	 a branch. If next_insn is a branch, we insert 'can_issue_more' nops;
35111 	 in this case the last nop will start a new group and the branch
35112 	 will be forced to the new group.  */
35113       if (can_issue_more && !is_branch_slot_insn (next_insn))
35114 	can_issue_more--;
35115 
35116       /* Do we have a special group ending nop? */
35117       if (rs6000_cpu_attr == CPU_POWER6 || rs6000_cpu_attr == CPU_POWER7
35118 	  || rs6000_cpu_attr == CPU_POWER8)
35119 	{
35120 	  nop = gen_group_ending_nop ();
35121 	  emit_insn_before (nop, next_insn);
35122 	  can_issue_more = 0;
35123 	}
35124       else
35125 	while (can_issue_more > 0)
35126 	  {
35127 	    nop = gen_nop ();
35128 	    emit_insn_before (nop, next_insn);
35129 	    can_issue_more--;
35130 	  }
35131 
35132       *group_end = true;
35133       return 0;
35134     }
35135 
35136   if (rs6000_sched_insert_nops < sched_finish_regroup_exact)
35137     {
35138       int n_nops = rs6000_sched_insert_nops;
35139 
35140       /* Nops can't be issued from the branch slot, so the effective
35141 	 issue_rate for nops is 'issue_rate - 1'.  */
35142       if (can_issue_more == 0)
35143 	can_issue_more = issue_rate;
35144       can_issue_more--;
35145       if (can_issue_more == 0)
35146 	{
35147 	  can_issue_more = issue_rate - 1;
35148 	  (*group_count)++;
35149 	  end = true;
35150 	  for (i = 0; i < issue_rate; i++)
35151 	    {
35152 	      group_insns[i] = 0;
35153 	    }
35154 	}
35155 
35156       while (n_nops > 0)
35157 	{
35158 	  nop = gen_nop ();
35159 	  emit_insn_before (nop, next_insn);
35160 	  if (can_issue_more == issue_rate - 1) /* new group begins */
35161 	    end = false;
35162 	  can_issue_more--;
35163 	  if (can_issue_more == 0)
35164 	    {
35165 	      can_issue_more = issue_rate - 1;
35166 	      (*group_count)++;
35167 	      end = true;
35168 	      for (i = 0; i < issue_rate; i++)
35169 		{
35170 		  group_insns[i] = 0;
35171 		}
35172 	    }
35173 	  n_nops--;
35174 	}
35175 
35176       /* Scale back relative to 'issue_rate' (instead of 'issue_rate - 1').  */
35177       can_issue_more++;
35178 
35179       /* Is next_insn going to start a new group?  */
35180       *group_end
35181 	= (end
35182 	   || (can_issue_more == 1 && !is_branch_slot_insn (next_insn))
35183 	   || (can_issue_more <= 2 && is_cracked_insn (next_insn))
35184 	   || (can_issue_more < issue_rate &&
35185 	       insn_terminates_group_p (next_insn, previous_group)));
35186       if (*group_end && end)
35187 	(*group_count)--;
35188 
35189       if (sched_verbose > 6)
35190 	fprintf (dump, "done force: group count = %d, can_issue_more = %d\n",
35191 		 *group_count, can_issue_more);
35192       return can_issue_more;
35193     }
35194 
35195   return can_issue_more;
35196 }
35197 
35198 /* This function tries to synch the dispatch groups that the compiler "sees"
35199    with the dispatch groups that the processor dispatcher is expected to
35200    form in practice.  It tries to achieve this synchronization by forcing the
35201    estimated processor grouping on the compiler (as opposed to the function
35202    'pad_goups' which tries to force the scheduler's grouping on the processor).
35203 
35204    The function scans the insn sequence between PREV_HEAD_INSN and TAIL and
35205    examines the (estimated) dispatch groups that will be formed by the processor
35206    dispatcher.  It marks these group boundaries to reflect the estimated
35207    processor grouping, overriding the grouping that the scheduler had marked.
35208    Depending on the value of the flag '-minsert-sched-nops' this function can
35209    force certain insns into separate groups or force a certain distance between
35210    them by inserting nops, for example, if there exists a "costly dependence"
35211    between the insns.
35212 
35213    The function estimates the group boundaries that the processor will form as
35214    follows:  It keeps track of how many vacant issue slots are available after
35215    each insn.  A subsequent insn will start a new group if one of the following
35216    4 cases applies:
35217    - no more vacant issue slots remain in the current dispatch group.
35218    - only the last issue slot, which is the branch slot, is vacant, but the next
35219      insn is not a branch.
35220    - only the last 2 or less issue slots, including the branch slot, are vacant,
35221      which means that a cracked insn (which occupies two issue slots) can't be
35222      issued in this group.
35223    - less than 'issue_rate' slots are vacant, and the next insn always needs to
35224      start a new group.  */
35225 
35226 static int
redefine_groups(FILE * dump,int sched_verbose,rtx_insn * prev_head_insn,rtx_insn * tail)35227 redefine_groups (FILE *dump, int sched_verbose, rtx_insn *prev_head_insn,
35228 		 rtx_insn *tail)
35229 {
35230   rtx_insn *insn, *next_insn;
35231   int issue_rate;
35232   int can_issue_more;
35233   int slot, i;
35234   bool group_end;
35235   int group_count = 0;
35236   rtx *group_insns;
35237 
35238   /* Initialize.  */
35239   issue_rate = rs6000_issue_rate ();
35240   group_insns = XALLOCAVEC (rtx, issue_rate);
35241   for (i = 0; i < issue_rate; i++)
35242     {
35243       group_insns[i] = 0;
35244     }
35245   can_issue_more = issue_rate;
35246   slot = 0;
35247   insn = get_next_active_insn (prev_head_insn, tail);
35248   group_end = false;
35249 
35250   while (insn != NULL_RTX)
35251     {
35252       slot = (issue_rate - can_issue_more);
35253       group_insns[slot] = insn;
35254       can_issue_more =
35255 	rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more);
35256       if (insn_terminates_group_p (insn, current_group))
35257 	can_issue_more = 0;
35258 
35259       next_insn = get_next_active_insn (insn, tail);
35260       if (next_insn == NULL_RTX)
35261 	return group_count + 1;
35262 
35263       /* Is next_insn going to start a new group?  */
35264       group_end
35265 	= (can_issue_more == 0
35266 	   || (can_issue_more == 1 && !is_branch_slot_insn (next_insn))
35267 	   || (can_issue_more <= 2 && is_cracked_insn (next_insn))
35268 	   || (can_issue_more < issue_rate &&
35269 	       insn_terminates_group_p (next_insn, previous_group)));
35270 
35271       can_issue_more = force_new_group (sched_verbose, dump, group_insns,
35272 					next_insn, &group_end, can_issue_more,
35273 					&group_count);
35274 
35275       if (group_end)
35276 	{
35277 	  group_count++;
35278 	  can_issue_more = 0;
35279 	  for (i = 0; i < issue_rate; i++)
35280 	    {
35281 	      group_insns[i] = 0;
35282 	    }
35283 	}
35284 
35285       if (GET_MODE (next_insn) == TImode && can_issue_more)
35286 	PUT_MODE (next_insn, VOIDmode);
35287       else if (!can_issue_more && GET_MODE (next_insn) != TImode)
35288 	PUT_MODE (next_insn, TImode);
35289 
35290       insn = next_insn;
35291       if (can_issue_more == 0)
35292 	can_issue_more = issue_rate;
35293     } /* while */
35294 
35295   return group_count;
35296 }
35297 
35298 /* Scan the insn sequence between PREV_HEAD_INSN and TAIL and examine the
35299    dispatch group boundaries that the scheduler had marked.  Pad with nops
35300    any dispatch groups which have vacant issue slots, in order to force the
35301    scheduler's grouping on the processor dispatcher.  The function
35302    returns the number of dispatch groups found.  */
35303 
35304 static int
pad_groups(FILE * dump,int sched_verbose,rtx_insn * prev_head_insn,rtx_insn * tail)35305 pad_groups (FILE *dump, int sched_verbose, rtx_insn *prev_head_insn,
35306 	    rtx_insn *tail)
35307 {
35308   rtx_insn *insn, *next_insn;
35309   rtx nop;
35310   int issue_rate;
35311   int can_issue_more;
35312   int group_end;
35313   int group_count = 0;
35314 
35315   /* Initialize issue_rate.  */
35316   issue_rate = rs6000_issue_rate ();
35317   can_issue_more = issue_rate;
35318 
35319   insn = get_next_active_insn (prev_head_insn, tail);
35320   next_insn = get_next_active_insn (insn, tail);
35321 
35322   while (insn != NULL_RTX)
35323     {
35324       can_issue_more =
35325       	rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more);
35326 
35327       group_end = (next_insn == NULL_RTX || GET_MODE (next_insn) == TImode);
35328 
35329       if (next_insn == NULL_RTX)
35330 	break;
35331 
35332       if (group_end)
35333 	{
35334 	  /* If the scheduler had marked group termination at this location
35335 	     (between insn and next_insn), and neither insn nor next_insn will
35336 	     force group termination, pad the group with nops to force group
35337 	     termination.  */
35338 	  if (can_issue_more
35339 	      && (rs6000_sched_insert_nops == sched_finish_pad_groups)
35340 	      && !insn_terminates_group_p (insn, current_group)
35341 	      && !insn_terminates_group_p (next_insn, previous_group))
35342 	    {
35343 	      if (!is_branch_slot_insn (next_insn))
35344 		can_issue_more--;
35345 
35346 	      while (can_issue_more)
35347 		{
35348 		  nop = gen_nop ();
35349 		  emit_insn_before (nop, next_insn);
35350 		  can_issue_more--;
35351 		}
35352 	    }
35353 
35354 	  can_issue_more = issue_rate;
35355 	  group_count++;
35356 	}
35357 
35358       insn = next_insn;
35359       next_insn = get_next_active_insn (insn, tail);
35360     }
35361 
35362   return group_count;
35363 }
35364 
35365 /* We're beginning a new block.  Initialize data structures as necessary.  */
35366 
35367 static void
rs6000_sched_init(FILE * dump ATTRIBUTE_UNUSED,int sched_verbose ATTRIBUTE_UNUSED,int max_ready ATTRIBUTE_UNUSED)35368 rs6000_sched_init (FILE *dump ATTRIBUTE_UNUSED,
35369 		     int sched_verbose ATTRIBUTE_UNUSED,
35370 		     int max_ready ATTRIBUTE_UNUSED)
35371 {
35372   last_scheduled_insn = NULL;
35373   load_store_pendulum = 0;
35374   divide_cnt = 0;
35375   vec_pairing = 0;
35376 }
35377 
35378 /* The following function is called at the end of scheduling BB.
35379    After reload, it inserts nops at insn group bundling.  */
35380 
35381 static void
rs6000_sched_finish(FILE * dump,int sched_verbose)35382 rs6000_sched_finish (FILE *dump, int sched_verbose)
35383 {
35384   int n_groups;
35385 
35386   if (sched_verbose)
35387     fprintf (dump, "=== Finishing schedule.\n");
35388 
35389   if (reload_completed && rs6000_sched_groups)
35390     {
35391       /* Do not run sched_finish hook when selective scheduling enabled.  */
35392       if (sel_sched_p ())
35393 	return;
35394 
35395       if (rs6000_sched_insert_nops == sched_finish_none)
35396 	return;
35397 
35398       if (rs6000_sched_insert_nops == sched_finish_pad_groups)
35399 	n_groups = pad_groups (dump, sched_verbose,
35400 			       current_sched_info->prev_head,
35401 			       current_sched_info->next_tail);
35402       else
35403 	n_groups = redefine_groups (dump, sched_verbose,
35404 				    current_sched_info->prev_head,
35405 				    current_sched_info->next_tail);
35406 
35407       if (sched_verbose >= 6)
35408 	{
35409     	  fprintf (dump, "ngroups = %d\n", n_groups);
35410 	  print_rtl (dump, current_sched_info->prev_head);
35411 	  fprintf (dump, "Done finish_sched\n");
35412 	}
35413     }
35414 }
35415 
35416 struct rs6000_sched_context
35417 {
35418   short cached_can_issue_more;
35419   rtx_insn *last_scheduled_insn;
35420   int load_store_pendulum;
35421   int divide_cnt;
35422   int vec_pairing;
35423 };
35424 
35425 typedef struct rs6000_sched_context rs6000_sched_context_def;
35426 typedef rs6000_sched_context_def *rs6000_sched_context_t;
35427 
35428 /* Allocate store for new scheduling context.  */
35429 static void *
rs6000_alloc_sched_context(void)35430 rs6000_alloc_sched_context (void)
35431 {
35432   return xmalloc (sizeof (rs6000_sched_context_def));
35433 }
35434 
35435 /* If CLEAN_P is true then initializes _SC with clean data,
35436    and from the global context otherwise.  */
35437 static void
rs6000_init_sched_context(void * _sc,bool clean_p)35438 rs6000_init_sched_context (void *_sc, bool clean_p)
35439 {
35440   rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc;
35441 
35442   if (clean_p)
35443     {
35444       sc->cached_can_issue_more = 0;
35445       sc->last_scheduled_insn = NULL;
35446       sc->load_store_pendulum = 0;
35447       sc->divide_cnt = 0;
35448       sc->vec_pairing = 0;
35449     }
35450   else
35451     {
35452       sc->cached_can_issue_more = cached_can_issue_more;
35453       sc->last_scheduled_insn = last_scheduled_insn;
35454       sc->load_store_pendulum = load_store_pendulum;
35455       sc->divide_cnt = divide_cnt;
35456       sc->vec_pairing = vec_pairing;
35457     }
35458 }
35459 
35460 /* Sets the global scheduling context to the one pointed to by _SC.  */
35461 static void
rs6000_set_sched_context(void * _sc)35462 rs6000_set_sched_context (void *_sc)
35463 {
35464   rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc;
35465 
35466   gcc_assert (sc != NULL);
35467 
35468   cached_can_issue_more = sc->cached_can_issue_more;
35469   last_scheduled_insn = sc->last_scheduled_insn;
35470   load_store_pendulum = sc->load_store_pendulum;
35471   divide_cnt = sc->divide_cnt;
35472   vec_pairing = sc->vec_pairing;
35473 }
35474 
35475 /* Free _SC.  */
35476 static void
rs6000_free_sched_context(void * _sc)35477 rs6000_free_sched_context (void *_sc)
35478 {
35479   gcc_assert (_sc != NULL);
35480 
35481   free (_sc);
35482 }
35483 
35484 static bool
rs6000_sched_can_speculate_insn(rtx_insn * insn)35485 rs6000_sched_can_speculate_insn (rtx_insn *insn)
35486 {
35487   switch (get_attr_type (insn))
35488     {
35489     case TYPE_DIV:
35490     case TYPE_SDIV:
35491     case TYPE_DDIV:
35492     case TYPE_VECDIV:
35493     case TYPE_SSQRT:
35494     case TYPE_DSQRT:
35495       return false;
35496 
35497     default:
35498       return true;
35499   }
35500 }
35501 
35502 /* Length in units of the trampoline for entering a nested function.  */
35503 
35504 int
rs6000_trampoline_size(void)35505 rs6000_trampoline_size (void)
35506 {
35507   int ret = 0;
35508 
35509   switch (DEFAULT_ABI)
35510     {
35511     default:
35512       gcc_unreachable ();
35513 
35514     case ABI_AIX:
35515       ret = (TARGET_32BIT) ? 12 : 24;
35516       break;
35517 
35518     case ABI_ELFv2:
35519       gcc_assert (!TARGET_32BIT);
35520       ret = 32;
35521       break;
35522 
35523     case ABI_DARWIN:
35524     case ABI_V4:
35525       ret = (TARGET_32BIT) ? 40 : 48;
35526       break;
35527     }
35528 
35529   return ret;
35530 }
35531 
35532 /* Emit RTL insns to initialize the variable parts of a trampoline.
35533    FNADDR is an RTX for the address of the function's pure code.
35534    CXT is an RTX for the static chain value for the function.  */
35535 
35536 static void
rs6000_trampoline_init(rtx m_tramp,tree fndecl,rtx cxt)35537 rs6000_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
35538 {
35539   int regsize = (TARGET_32BIT) ? 4 : 8;
35540   rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
35541   rtx ctx_reg = force_reg (Pmode, cxt);
35542   rtx addr = force_reg (Pmode, XEXP (m_tramp, 0));
35543 
35544   switch (DEFAULT_ABI)
35545     {
35546     default:
35547       gcc_unreachable ();
35548 
35549     /* Under AIX, just build the 3 word function descriptor */
35550     case ABI_AIX:
35551       {
35552 	rtx fnmem, fn_reg, toc_reg;
35553 
35554 	if (!TARGET_POINTERS_TO_NESTED_FUNCTIONS)
35555 	  error ("You cannot take the address of a nested function if you use "
35556 		 "the -mno-pointers-to-nested-functions option.");
35557 
35558 	fnmem = gen_const_mem (Pmode, force_reg (Pmode, fnaddr));
35559 	fn_reg = gen_reg_rtx (Pmode);
35560 	toc_reg = gen_reg_rtx (Pmode);
35561 
35562   /* Macro to shorten the code expansions below.  */
35563 # define MEM_PLUS(MEM, OFFSET) adjust_address (MEM, Pmode, OFFSET)
35564 
35565 	m_tramp = replace_equiv_address (m_tramp, addr);
35566 
35567 	emit_move_insn (fn_reg, MEM_PLUS (fnmem, 0));
35568 	emit_move_insn (toc_reg, MEM_PLUS (fnmem, regsize));
35569 	emit_move_insn (MEM_PLUS (m_tramp, 0), fn_reg);
35570 	emit_move_insn (MEM_PLUS (m_tramp, regsize), toc_reg);
35571 	emit_move_insn (MEM_PLUS (m_tramp, 2*regsize), ctx_reg);
35572 
35573 # undef MEM_PLUS
35574       }
35575       break;
35576 
35577     /* Under V.4/eabi/darwin, __trampoline_setup does the real work.  */
35578     case ABI_ELFv2:
35579     case ABI_DARWIN:
35580     case ABI_V4:
35581       emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__trampoline_setup"),
35582 			 LCT_NORMAL, VOIDmode,
35583 			 addr, Pmode,
35584 			 GEN_INT (rs6000_trampoline_size ()), SImode,
35585 			 fnaddr, Pmode,
35586 			 ctx_reg, Pmode);
35587       break;
35588     }
35589 }
35590 
35591 
35592 /* Returns TRUE iff the target attribute indicated by ATTR_ID takes a plain
35593    identifier as an argument, so the front end shouldn't look it up.  */
35594 
35595 static bool
rs6000_attribute_takes_identifier_p(const_tree attr_id)35596 rs6000_attribute_takes_identifier_p (const_tree attr_id)
35597 {
35598   return is_attribute_p ("altivec", attr_id);
35599 }
35600 
35601 /* Handle the "altivec" attribute.  The attribute may have
35602    arguments as follows:
35603 
35604 	__attribute__((altivec(vector__)))
35605 	__attribute__((altivec(pixel__)))	(always followed by 'unsigned short')
35606 	__attribute__((altivec(bool__)))	(always followed by 'unsigned')
35607 
35608   and may appear more than once (e.g., 'vector bool char') in a
35609   given declaration.  */
35610 
35611 static tree
rs6000_handle_altivec_attribute(tree * node,tree name ATTRIBUTE_UNUSED,tree args,int flags ATTRIBUTE_UNUSED,bool * no_add_attrs)35612 rs6000_handle_altivec_attribute (tree *node,
35613 				 tree name ATTRIBUTE_UNUSED,
35614 				 tree args,
35615 				 int flags ATTRIBUTE_UNUSED,
35616 				 bool *no_add_attrs)
35617 {
35618   tree type = *node, result = NULL_TREE;
35619   machine_mode mode;
35620   int unsigned_p;
35621   char altivec_type
35622     = ((args && TREE_CODE (args) == TREE_LIST && TREE_VALUE (args)
35623 	&& TREE_CODE (TREE_VALUE (args)) == IDENTIFIER_NODE)
35624        ? *IDENTIFIER_POINTER (TREE_VALUE (args))
35625        : '?');
35626 
35627   while (POINTER_TYPE_P (type)
35628 	 || TREE_CODE (type) == FUNCTION_TYPE
35629 	 || TREE_CODE (type) == METHOD_TYPE
35630 	 || TREE_CODE (type) == ARRAY_TYPE)
35631     type = TREE_TYPE (type);
35632 
35633   mode = TYPE_MODE (type);
35634 
35635   /* Check for invalid AltiVec type qualifiers.  */
35636   if (type == long_double_type_node)
35637     error ("use of %<long double%> in AltiVec types is invalid");
35638   else if (type == boolean_type_node)
35639     error ("use of boolean types in AltiVec types is invalid");
35640   else if (TREE_CODE (type) == COMPLEX_TYPE)
35641     error ("use of %<complex%> in AltiVec types is invalid");
35642   else if (DECIMAL_FLOAT_MODE_P (mode))
35643     error ("use of decimal floating point types in AltiVec types is invalid");
35644   else if (!TARGET_VSX)
35645     {
35646       if (type == long_unsigned_type_node || type == long_integer_type_node)
35647 	{
35648 	  if (TARGET_64BIT)
35649 	    error ("use of %<long%> in AltiVec types is invalid for "
35650 		   "64-bit code without -mvsx");
35651 	  else if (rs6000_warn_altivec_long)
35652 	    warning (0, "use of %<long%> in AltiVec types is deprecated; "
35653 		     "use %<int%>");
35654 	}
35655       else if (type == long_long_unsigned_type_node
35656 	       || type == long_long_integer_type_node)
35657 	error ("use of %<long long%> in AltiVec types is invalid without "
35658 	       "-mvsx");
35659       else if (type == double_type_node)
35660 	error ("use of %<double%> in AltiVec types is invalid without -mvsx");
35661     }
35662 
35663   switch (altivec_type)
35664     {
35665     case 'v':
35666       unsigned_p = TYPE_UNSIGNED (type);
35667       switch (mode)
35668 	{
35669 	case E_TImode:
35670 	  result = (unsigned_p ? unsigned_V1TI_type_node : V1TI_type_node);
35671 	  break;
35672 	case E_DImode:
35673 	  result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
35674 	  break;
35675 	case E_SImode:
35676 	  result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
35677 	  break;
35678 	case E_HImode:
35679 	  result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
35680 	  break;
35681 	case E_QImode:
35682 	  result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
35683 	  break;
35684 	case E_SFmode: result = V4SF_type_node; break;
35685 	case E_DFmode: result = V2DF_type_node; break;
35686 	  /* If the user says 'vector int bool', we may be handed the 'bool'
35687 	     attribute _before_ the 'vector' attribute, and so select the
35688 	     proper type in the 'b' case below.  */
35689 	case E_V4SImode: case E_V8HImode: case E_V16QImode: case E_V4SFmode:
35690 	case E_V2DImode: case E_V2DFmode:
35691 	  result = type;
35692 	default: break;
35693 	}
35694       break;
35695     case 'b':
35696       switch (mode)
35697 	{
35698 	case E_DImode: case E_V2DImode: result = bool_V2DI_type_node; break;
35699 	case E_SImode: case E_V4SImode: result = bool_V4SI_type_node; break;
35700 	case E_HImode: case E_V8HImode: result = bool_V8HI_type_node; break;
35701 	case E_QImode: case E_V16QImode: result = bool_V16QI_type_node;
35702 	default: break;
35703 	}
35704       break;
35705     case 'p':
35706       switch (mode)
35707 	{
35708 	case E_V8HImode: result = pixel_V8HI_type_node;
35709 	default: break;
35710 	}
35711     default: break;
35712     }
35713 
35714   /* Propagate qualifiers attached to the element type
35715      onto the vector type.  */
35716   if (result && result != type && TYPE_QUALS (type))
35717     result = build_qualified_type (result, TYPE_QUALS (type));
35718 
35719   *no_add_attrs = true;  /* No need to hang on to the attribute.  */
35720 
35721   if (result)
35722     *node = lang_hooks.types.reconstruct_complex_type (*node, result);
35723 
35724   return NULL_TREE;
35725 }
35726 
35727 /* AltiVec defines four built-in scalar types that serve as vector
35728    elements; we must teach the compiler how to mangle them.  */
35729 
35730 static const char *
rs6000_mangle_type(const_tree type)35731 rs6000_mangle_type (const_tree type)
35732 {
35733   type = TYPE_MAIN_VARIANT (type);
35734 
35735   if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
35736       && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
35737     return NULL;
35738 
35739   if (type == bool_char_type_node) return "U6__boolc";
35740   if (type == bool_short_type_node) return "U6__bools";
35741   if (type == pixel_type_node) return "u7__pixel";
35742   if (type == bool_int_type_node) return "U6__booli";
35743   if (type == bool_long_type_node) return "U6__booll";
35744 
35745   /* Use a unique name for __float128 rather than trying to use "e" or "g". Use
35746      "g" for IBM extended double, no matter whether it is long double (using
35747      -mabi=ibmlongdouble) or the distinct __ibm128 type.  */
35748   if (TARGET_FLOAT128_TYPE)
35749     {
35750       if (type == ieee128_float_type_node)
35751 	return "U10__float128";
35752 
35753       if (type == ibm128_float_type_node)
35754 	return "g";
35755 
35756       if (type == long_double_type_node && TARGET_LONG_DOUBLE_128)
35757 	return (TARGET_IEEEQUAD) ? "U10__float128" : "g";
35758     }
35759 
35760   /* Mangle IBM extended float long double as `g' (__float128) on
35761      powerpc*-linux where long-double-64 previously was the default.  */
35762   if (TYPE_MAIN_VARIANT (type) == long_double_type_node
35763       && TARGET_ELF
35764       && TARGET_LONG_DOUBLE_128
35765       && !TARGET_IEEEQUAD)
35766     return "g";
35767 
35768   /* For all other types, use normal C++ mangling.  */
35769   return NULL;
35770 }
35771 
35772 /* Handle a "longcall" or "shortcall" attribute; arguments as in
35773    struct attribute_spec.handler.  */
35774 
35775 static tree
rs6000_handle_longcall_attribute(tree * node,tree name,tree args ATTRIBUTE_UNUSED,int flags ATTRIBUTE_UNUSED,bool * no_add_attrs)35776 rs6000_handle_longcall_attribute (tree *node, tree name,
35777 				  tree args ATTRIBUTE_UNUSED,
35778 				  int flags ATTRIBUTE_UNUSED,
35779 				  bool *no_add_attrs)
35780 {
35781   if (TREE_CODE (*node) != FUNCTION_TYPE
35782       && TREE_CODE (*node) != FIELD_DECL
35783       && TREE_CODE (*node) != TYPE_DECL)
35784     {
35785       warning (OPT_Wattributes, "%qE attribute only applies to functions",
35786 	       name);
35787       *no_add_attrs = true;
35788     }
35789 
35790   return NULL_TREE;
35791 }
35792 
35793 /* Set longcall attributes on all functions declared when
35794    rs6000_default_long_calls is true.  */
35795 static void
rs6000_set_default_type_attributes(tree type)35796 rs6000_set_default_type_attributes (tree type)
35797 {
35798   if (rs6000_default_long_calls
35799       && (TREE_CODE (type) == FUNCTION_TYPE
35800 	  || TREE_CODE (type) == METHOD_TYPE))
35801     TYPE_ATTRIBUTES (type) = tree_cons (get_identifier ("longcall"),
35802 					NULL_TREE,
35803 					TYPE_ATTRIBUTES (type));
35804 
35805 #if TARGET_MACHO
35806   darwin_set_default_type_attributes (type);
35807 #endif
35808 }
35809 
35810 /* Return a reference suitable for calling a function with the
35811    longcall attribute.  */
35812 
35813 rtx
rs6000_longcall_ref(rtx call_ref)35814 rs6000_longcall_ref (rtx call_ref)
35815 {
35816   const char *call_name;
35817   tree node;
35818 
35819   if (GET_CODE (call_ref) != SYMBOL_REF)
35820     return call_ref;
35821 
35822   /* System V adds '.' to the internal name, so skip them.  */
35823   call_name = XSTR (call_ref, 0);
35824   if (*call_name == '.')
35825     {
35826       while (*call_name == '.')
35827 	call_name++;
35828 
35829       node = get_identifier (call_name);
35830       call_ref = gen_rtx_SYMBOL_REF (VOIDmode, IDENTIFIER_POINTER (node));
35831     }
35832 
35833   return force_reg (Pmode, call_ref);
35834 }
35835 
35836 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
35837 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
35838 #endif
35839 
35840 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
35841    struct attribute_spec.handler.  */
35842 static tree
rs6000_handle_struct_attribute(tree * node,tree name,tree args ATTRIBUTE_UNUSED,int flags ATTRIBUTE_UNUSED,bool * no_add_attrs)35843 rs6000_handle_struct_attribute (tree *node, tree name,
35844 				tree args ATTRIBUTE_UNUSED,
35845 				int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
35846 {
35847   tree *type = NULL;
35848   if (DECL_P (*node))
35849     {
35850       if (TREE_CODE (*node) == TYPE_DECL)
35851         type = &TREE_TYPE (*node);
35852     }
35853   else
35854     type = node;
35855 
35856   if (!(type && (TREE_CODE (*type) == RECORD_TYPE
35857                  || TREE_CODE (*type) == UNION_TYPE)))
35858     {
35859       warning (OPT_Wattributes, "%qE attribute ignored", name);
35860       *no_add_attrs = true;
35861     }
35862 
35863   else if ((is_attribute_p ("ms_struct", name)
35864             && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
35865            || ((is_attribute_p ("gcc_struct", name)
35866                 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
35867     {
35868       warning (OPT_Wattributes, "%qE incompatible attribute ignored",
35869                name);
35870       *no_add_attrs = true;
35871     }
35872 
35873   return NULL_TREE;
35874 }
35875 
35876 static bool
rs6000_ms_bitfield_layout_p(const_tree record_type)35877 rs6000_ms_bitfield_layout_p (const_tree record_type)
35878 {
35879   return (TARGET_USE_MS_BITFIELD_LAYOUT &&
35880           !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
35881     || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
35882 }
35883 
35884 #ifdef USING_ELFOS_H
35885 
35886 /* A get_unnamed_section callback, used for switching to toc_section.  */
35887 
35888 static void
rs6000_elf_output_toc_section_asm_op(const void * data ATTRIBUTE_UNUSED)35889 rs6000_elf_output_toc_section_asm_op (const void *data ATTRIBUTE_UNUSED)
35890 {
35891   if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
35892       && TARGET_MINIMAL_TOC)
35893     {
35894       if (!toc_initialized)
35895 	{
35896 	  fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP);
35897 	  ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
35898 	  (*targetm.asm_out.internal_label) (asm_out_file, "LCTOC", 0);
35899 	  fprintf (asm_out_file, "\t.tc ");
35900 	  ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1[TC],");
35901 	  ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
35902 	  fprintf (asm_out_file, "\n");
35903 
35904 	  fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
35905 	  ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
35906 	  ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
35907 	  fprintf (asm_out_file, " = .+32768\n");
35908 	  toc_initialized = 1;
35909 	}
35910       else
35911 	fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
35912     }
35913   else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
35914     {
35915       fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP);
35916       if (!toc_initialized)
35917 	{
35918 	  ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
35919 	  toc_initialized = 1;
35920 	}
35921     }
35922   else
35923     {
35924       fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
35925       if (!toc_initialized)
35926 	{
35927 	  ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
35928 	  ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
35929 	  fprintf (asm_out_file, " = .+32768\n");
35930 	  toc_initialized = 1;
35931 	}
35932     }
35933 }
35934 
35935 /* Implement TARGET_ASM_INIT_SECTIONS.  */
35936 
35937 static void
rs6000_elf_asm_init_sections(void)35938 rs6000_elf_asm_init_sections (void)
35939 {
35940   toc_section
35941     = get_unnamed_section (0, rs6000_elf_output_toc_section_asm_op, NULL);
35942 
35943   sdata2_section
35944     = get_unnamed_section (SECTION_WRITE, output_section_asm_op,
35945 			   SDATA2_SECTION_ASM_OP);
35946 }
35947 
35948 /* Implement TARGET_SELECT_RTX_SECTION.  */
35949 
35950 static section *
rs6000_elf_select_rtx_section(machine_mode mode,rtx x,unsigned HOST_WIDE_INT align)35951 rs6000_elf_select_rtx_section (machine_mode mode, rtx x,
35952 			       unsigned HOST_WIDE_INT align)
35953 {
35954   if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode))
35955     return toc_section;
35956   else
35957     return default_elf_select_rtx_section (mode, x, align);
35958 }
35959 
35960 /* For a SYMBOL_REF, set generic flags and then perform some
35961    target-specific processing.
35962 
35963    When the AIX ABI is requested on a non-AIX system, replace the
35964    function name with the real name (with a leading .) rather than the
35965    function descriptor name.  This saves a lot of overriding code to
35966    read the prefixes.  */
35967 
35968 static void rs6000_elf_encode_section_info (tree, rtx, int) ATTRIBUTE_UNUSED;
35969 static void
rs6000_elf_encode_section_info(tree decl,rtx rtl,int first)35970 rs6000_elf_encode_section_info (tree decl, rtx rtl, int first)
35971 {
35972   default_encode_section_info (decl, rtl, first);
35973 
35974   if (first
35975       && TREE_CODE (decl) == FUNCTION_DECL
35976       && !TARGET_AIX
35977       && DEFAULT_ABI == ABI_AIX)
35978     {
35979       rtx sym_ref = XEXP (rtl, 0);
35980       size_t len = strlen (XSTR (sym_ref, 0));
35981       char *str = XALLOCAVEC (char, len + 2);
35982       str[0] = '.';
35983       memcpy (str + 1, XSTR (sym_ref, 0), len + 1);
35984       XSTR (sym_ref, 0) = ggc_alloc_string (str, len + 1);
35985     }
35986 }
35987 
35988 static inline bool
compare_section_name(const char * section,const char * templ)35989 compare_section_name (const char *section, const char *templ)
35990 {
35991   int len;
35992 
35993   len = strlen (templ);
35994   return (strncmp (section, templ, len) == 0
35995 	  && (section[len] == 0 || section[len] == '.'));
35996 }
35997 
35998 bool
rs6000_elf_in_small_data_p(const_tree decl)35999 rs6000_elf_in_small_data_p (const_tree decl)
36000 {
36001   if (rs6000_sdata == SDATA_NONE)
36002     return false;
36003 
36004   /* We want to merge strings, so we never consider them small data.  */
36005   if (TREE_CODE (decl) == STRING_CST)
36006     return false;
36007 
36008   /* Functions are never in the small data area.  */
36009   if (TREE_CODE (decl) == FUNCTION_DECL)
36010     return false;
36011 
36012   if (TREE_CODE (decl) == VAR_DECL && DECL_SECTION_NAME (decl))
36013     {
36014       const char *section = DECL_SECTION_NAME (decl);
36015       if (compare_section_name (section, ".sdata")
36016 	  || compare_section_name (section, ".sdata2")
36017 	  || compare_section_name (section, ".gnu.linkonce.s")
36018 	  || compare_section_name (section, ".sbss")
36019 	  || compare_section_name (section, ".sbss2")
36020 	  || compare_section_name (section, ".gnu.linkonce.sb")
36021 	  || strcmp (section, ".PPC.EMB.sdata0") == 0
36022 	  || strcmp (section, ".PPC.EMB.sbss0") == 0)
36023 	return true;
36024     }
36025   else
36026     {
36027       HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (decl));
36028 
36029       if (size > 0
36030 	  && size <= g_switch_value
36031 	  /* If it's not public, and we're not going to reference it there,
36032 	     there's no need to put it in the small data section.  */
36033 	  && (rs6000_sdata != SDATA_DATA || TREE_PUBLIC (decl)))
36034 	return true;
36035     }
36036 
36037   return false;
36038 }
36039 
36040 #endif /* USING_ELFOS_H */
36041 
36042 /* Implement TARGET_USE_BLOCKS_FOR_CONSTANT_P.  */
36043 
36044 static bool
rs6000_use_blocks_for_constant_p(machine_mode mode,const_rtx x)36045 rs6000_use_blocks_for_constant_p (machine_mode mode, const_rtx x)
36046 {
36047   return !ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode);
36048 }
36049 
36050 /* Do not place thread-local symbols refs in the object blocks.  */
36051 
36052 static bool
rs6000_use_blocks_for_decl_p(const_tree decl)36053 rs6000_use_blocks_for_decl_p (const_tree decl)
36054 {
36055   return !DECL_THREAD_LOCAL_P (decl);
36056 }
36057 
36058 /* Return a REG that occurs in ADDR with coefficient 1.
36059    ADDR can be effectively incremented by incrementing REG.
36060 
36061    r0 is special and we must not select it as an address
36062    register by this routine since our caller will try to
36063    increment the returned register via an "la" instruction.  */
36064 
36065 rtx
find_addr_reg(rtx addr)36066 find_addr_reg (rtx addr)
36067 {
36068   while (GET_CODE (addr) == PLUS)
36069     {
36070       if (GET_CODE (XEXP (addr, 0)) == REG
36071 	  && REGNO (XEXP (addr, 0)) != 0)
36072 	addr = XEXP (addr, 0);
36073       else if (GET_CODE (XEXP (addr, 1)) == REG
36074 	       && REGNO (XEXP (addr, 1)) != 0)
36075 	addr = XEXP (addr, 1);
36076       else if (CONSTANT_P (XEXP (addr, 0)))
36077 	addr = XEXP (addr, 1);
36078       else if (CONSTANT_P (XEXP (addr, 1)))
36079 	addr = XEXP (addr, 0);
36080       else
36081 	gcc_unreachable ();
36082     }
36083   gcc_assert (GET_CODE (addr) == REG && REGNO (addr) != 0);
36084   return addr;
36085 }
36086 
36087 void
rs6000_fatal_bad_address(rtx op)36088 rs6000_fatal_bad_address (rtx op)
36089 {
36090   fatal_insn ("bad address", op);
36091 }
36092 
36093 #if TARGET_MACHO
36094 
36095 typedef struct branch_island_d {
36096   tree function_name;
36097   tree label_name;
36098   int line_number;
36099 } branch_island;
36100 
36101 
36102 static vec<branch_island, va_gc> *branch_islands;
36103 
36104 /* Remember to generate a branch island for far calls to the given
36105    function.  */
36106 
36107 static void
add_compiler_branch_island(tree label_name,tree function_name,int line_number)36108 add_compiler_branch_island (tree label_name, tree function_name,
36109 			    int line_number)
36110 {
36111   branch_island bi = {function_name, label_name, line_number};
36112   vec_safe_push (branch_islands, bi);
36113 }
36114 
36115 /* Generate far-jump branch islands for everything recorded in
36116    branch_islands.  Invoked immediately after the last instruction of
36117    the epilogue has been emitted; the branch islands must be appended
36118    to, and contiguous with, the function body.  Mach-O stubs are
36119    generated in machopic_output_stub().  */
36120 
36121 static void
macho_branch_islands(void)36122 macho_branch_islands (void)
36123 {
36124   char tmp_buf[512];
36125 
36126   while (!vec_safe_is_empty (branch_islands))
36127     {
36128       branch_island *bi = &branch_islands->last ();
36129       const char *label = IDENTIFIER_POINTER (bi->label_name);
36130       const char *name = IDENTIFIER_POINTER (bi->function_name);
36131       char name_buf[512];
36132       /* Cheap copy of the details from the Darwin ASM_OUTPUT_LABELREF().  */
36133       if (name[0] == '*' || name[0] == '&')
36134 	strcpy (name_buf, name+1);
36135       else
36136 	{
36137 	  name_buf[0] = '_';
36138 	  strcpy (name_buf+1, name);
36139 	}
36140       strcpy (tmp_buf, "\n");
36141       strcat (tmp_buf, label);
36142 #if defined (DBX_DEBUGGING_INFO) || defined (XCOFF_DEBUGGING_INFO)
36143       if (write_symbols == DBX_DEBUG || write_symbols == XCOFF_DEBUG)
36144 	dbxout_stabd (N_SLINE, bi->line_number);
36145 #endif /* DBX_DEBUGGING_INFO || XCOFF_DEBUGGING_INFO */
36146       if (flag_pic)
36147 	{
36148 	  if (TARGET_LINK_STACK)
36149 	    {
36150 	      char name[32];
36151 	      get_ppc476_thunk_name (name);
36152 	      strcat (tmp_buf, ":\n\tmflr r0\n\tbl ");
36153 	      strcat (tmp_buf, name);
36154 	      strcat (tmp_buf, "\n");
36155 	      strcat (tmp_buf, label);
36156 	      strcat (tmp_buf, "_pic:\n\tmflr r11\n");
36157 	    }
36158 	  else
36159 	    {
36160 	      strcat (tmp_buf, ":\n\tmflr r0\n\tbcl 20,31,");
36161 	      strcat (tmp_buf, label);
36162 	      strcat (tmp_buf, "_pic\n");
36163 	      strcat (tmp_buf, label);
36164 	      strcat (tmp_buf, "_pic:\n\tmflr r11\n");
36165 	    }
36166 
36167 	  strcat (tmp_buf, "\taddis r11,r11,ha16(");
36168 	  strcat (tmp_buf, name_buf);
36169 	  strcat (tmp_buf, " - ");
36170 	  strcat (tmp_buf, label);
36171 	  strcat (tmp_buf, "_pic)\n");
36172 
36173 	  strcat (tmp_buf, "\tmtlr r0\n");
36174 
36175 	  strcat (tmp_buf, "\taddi r12,r11,lo16(");
36176 	  strcat (tmp_buf, name_buf);
36177 	  strcat (tmp_buf, " - ");
36178 	  strcat (tmp_buf, label);
36179 	  strcat (tmp_buf, "_pic)\n");
36180 
36181 	  strcat (tmp_buf, "\tmtctr r12\n\tbctr\n");
36182 	}
36183       else
36184 	{
36185 	  strcat (tmp_buf, ":\nlis r12,hi16(");
36186 	  strcat (tmp_buf, name_buf);
36187 	  strcat (tmp_buf, ")\n\tori r12,r12,lo16(");
36188 	  strcat (tmp_buf, name_buf);
36189 	  strcat (tmp_buf, ")\n\tmtctr r12\n\tbctr");
36190 	}
36191       output_asm_insn (tmp_buf, 0);
36192 #if defined (DBX_DEBUGGING_INFO) || defined (XCOFF_DEBUGGING_INFO)
36193       if (write_symbols == DBX_DEBUG || write_symbols == XCOFF_DEBUG)
36194 	dbxout_stabd (N_SLINE, bi->line_number);
36195 #endif /* DBX_DEBUGGING_INFO || XCOFF_DEBUGGING_INFO */
36196       branch_islands->pop ();
36197     }
36198 }
36199 
36200 /* NO_PREVIOUS_DEF checks in the link list whether the function name is
36201    already there or not.  */
36202 
36203 static int
no_previous_def(tree function_name)36204 no_previous_def (tree function_name)
36205 {
36206   branch_island *bi;
36207   unsigned ix;
36208 
36209   FOR_EACH_VEC_SAFE_ELT (branch_islands, ix, bi)
36210     if (function_name == bi->function_name)
36211       return 0;
36212   return 1;
36213 }
36214 
36215 /* GET_PREV_LABEL gets the label name from the previous definition of
36216    the function.  */
36217 
36218 static tree
get_prev_label(tree function_name)36219 get_prev_label (tree function_name)
36220 {
36221   branch_island *bi;
36222   unsigned ix;
36223 
36224   FOR_EACH_VEC_SAFE_ELT (branch_islands, ix, bi)
36225     if (function_name == bi->function_name)
36226       return bi->label_name;
36227   return NULL_TREE;
36228 }
36229 
36230 /* INSN is either a function call or a millicode call.  It may have an
36231    unconditional jump in its delay slot.
36232 
36233    CALL_DEST is the routine we are calling.  */
36234 
36235 char *
output_call(rtx_insn * insn,rtx * operands,int dest_operand_number,int cookie_operand_number)36236 output_call (rtx_insn *insn, rtx *operands, int dest_operand_number,
36237 	     int cookie_operand_number)
36238 {
36239   static char buf[256];
36240   if (darwin_emit_branch_islands
36241       && GET_CODE (operands[dest_operand_number]) == SYMBOL_REF
36242       && (INTVAL (operands[cookie_operand_number]) & CALL_LONG))
36243     {
36244       tree labelname;
36245       tree funname = get_identifier (XSTR (operands[dest_operand_number], 0));
36246 
36247       if (no_previous_def (funname))
36248 	{
36249 	  rtx label_rtx = gen_label_rtx ();
36250 	  char *label_buf, temp_buf[256];
36251 	  ASM_GENERATE_INTERNAL_LABEL (temp_buf, "L",
36252 				       CODE_LABEL_NUMBER (label_rtx));
36253 	  label_buf = temp_buf[0] == '*' ? temp_buf + 1 : temp_buf;
36254 	  labelname = get_identifier (label_buf);
36255 	  add_compiler_branch_island (labelname, funname, insn_line (insn));
36256 	}
36257       else
36258 	labelname = get_prev_label (funname);
36259 
36260       /* "jbsr foo, L42" is Mach-O for "Link as 'bl foo' if a 'bl'
36261 	 instruction will reach 'foo', otherwise link as 'bl L42'".
36262 	 "L42" should be a 'branch island', that will do a far jump to
36263 	 'foo'.  Branch islands are generated in
36264 	 macho_branch_islands().  */
36265       sprintf (buf, "jbsr %%z%d,%.246s",
36266 	       dest_operand_number, IDENTIFIER_POINTER (labelname));
36267     }
36268   else
36269     sprintf (buf, "bl %%z%d", dest_operand_number);
36270   return buf;
36271 }
36272 
36273 /* Generate PIC and indirect symbol stubs.  */
36274 
36275 void
machopic_output_stub(FILE * file,const char * symb,const char * stub)36276 machopic_output_stub (FILE *file, const char *symb, const char *stub)
36277 {
36278   unsigned int length;
36279   char *symbol_name, *lazy_ptr_name;
36280   char *local_label_0;
36281   static int label = 0;
36282 
36283   /* Lose our funky encoding stuff so it doesn't contaminate the stub.  */
36284   symb = (*targetm.strip_name_encoding) (symb);
36285 
36286 
36287   length = strlen (symb);
36288   symbol_name = XALLOCAVEC (char, length + 32);
36289   GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
36290 
36291   lazy_ptr_name = XALLOCAVEC (char, length + 32);
36292   GEN_LAZY_PTR_NAME_FOR_SYMBOL (lazy_ptr_name, symb, length);
36293 
36294   if (flag_pic == 2)
36295     switch_to_section (darwin_sections[machopic_picsymbol_stub1_section]);
36296   else
36297     switch_to_section (darwin_sections[machopic_symbol_stub1_section]);
36298 
36299   if (flag_pic == 2)
36300     {
36301       fprintf (file, "\t.align 5\n");
36302 
36303       fprintf (file, "%s:\n", stub);
36304       fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
36305 
36306       label++;
36307       local_label_0 = XALLOCAVEC (char, sizeof ("\"L00000000000$spb\""));
36308       sprintf (local_label_0, "\"L%011d$spb\"", label);
36309 
36310       fprintf (file, "\tmflr r0\n");
36311       if (TARGET_LINK_STACK)
36312 	{
36313 	  char name[32];
36314 	  get_ppc476_thunk_name (name);
36315 	  fprintf (file, "\tbl %s\n", name);
36316 	  fprintf (file, "%s:\n\tmflr r11\n", local_label_0);
36317 	}
36318       else
36319 	{
36320 	  fprintf (file, "\tbcl 20,31,%s\n", local_label_0);
36321 	  fprintf (file, "%s:\n\tmflr r11\n", local_label_0);
36322 	}
36323       fprintf (file, "\taddis r11,r11,ha16(%s-%s)\n",
36324 	       lazy_ptr_name, local_label_0);
36325       fprintf (file, "\tmtlr r0\n");
36326       fprintf (file, "\t%s r12,lo16(%s-%s)(r11)\n",
36327 	       (TARGET_64BIT ? "ldu" : "lwzu"),
36328 	       lazy_ptr_name, local_label_0);
36329       fprintf (file, "\tmtctr r12\n");
36330       fprintf (file, "\tbctr\n");
36331     }
36332   else
36333     {
36334       fprintf (file, "\t.align 4\n");
36335 
36336       fprintf (file, "%s:\n", stub);
36337       fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
36338 
36339       fprintf (file, "\tlis r11,ha16(%s)\n", lazy_ptr_name);
36340       fprintf (file, "\t%s r12,lo16(%s)(r11)\n",
36341 	       (TARGET_64BIT ? "ldu" : "lwzu"),
36342 	       lazy_ptr_name);
36343       fprintf (file, "\tmtctr r12\n");
36344       fprintf (file, "\tbctr\n");
36345     }
36346 
36347   switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
36348   fprintf (file, "%s:\n", lazy_ptr_name);
36349   fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
36350   fprintf (file, "%sdyld_stub_binding_helper\n",
36351 	   (TARGET_64BIT ? DOUBLE_INT_ASM_OP : "\t.long\t"));
36352 }
36353 
36354 /* Legitimize PIC addresses.  If the address is already
36355    position-independent, we return ORIG.  Newly generated
36356    position-independent addresses go into a reg.  This is REG if non
36357    zero, otherwise we allocate register(s) as necessary.  */
36358 
36359 #define SMALL_INT(X) ((UINTVAL (X) + 0x8000) < 0x10000)
36360 
36361 rtx
rs6000_machopic_legitimize_pic_address(rtx orig,machine_mode mode,rtx reg)36362 rs6000_machopic_legitimize_pic_address (rtx orig, machine_mode mode,
36363 					rtx reg)
36364 {
36365   rtx base, offset;
36366 
36367   if (reg == NULL && ! reload_in_progress && ! reload_completed)
36368     reg = gen_reg_rtx (Pmode);
36369 
36370   if (GET_CODE (orig) == CONST)
36371     {
36372       rtx reg_temp;
36373 
36374       if (GET_CODE (XEXP (orig, 0)) == PLUS
36375 	  && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
36376 	return orig;
36377 
36378       gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
36379 
36380       /* Use a different reg for the intermediate value, as
36381 	 it will be marked UNCHANGING.  */
36382       reg_temp = !can_create_pseudo_p () ? reg : gen_reg_rtx (Pmode);
36383       base = rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig, 0), 0),
36384 						     Pmode, reg_temp);
36385       offset =
36386 	rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig, 0), 1),
36387 						Pmode, reg);
36388 
36389       if (GET_CODE (offset) == CONST_INT)
36390 	{
36391 	  if (SMALL_INT (offset))
36392 	    return plus_constant (Pmode, base, INTVAL (offset));
36393 	  else if (! reload_in_progress && ! reload_completed)
36394 	    offset = force_reg (Pmode, offset);
36395 	  else
36396 	    {
36397  	      rtx mem = force_const_mem (Pmode, orig);
36398 	      return machopic_legitimize_pic_address (mem, Pmode, reg);
36399 	    }
36400 	}
36401       return gen_rtx_PLUS (Pmode, base, offset);
36402     }
36403 
36404   /* Fall back on generic machopic code.  */
36405   return machopic_legitimize_pic_address (orig, mode, reg);
36406 }
36407 
36408 /* Output a .machine directive for the Darwin assembler, and call
36409    the generic start_file routine.  */
36410 
36411 static void
rs6000_darwin_file_start(void)36412 rs6000_darwin_file_start (void)
36413 {
36414   static const struct
36415   {
36416     const char *arg;
36417     const char *name;
36418     HOST_WIDE_INT if_set;
36419   } mapping[] = {
36420     { "ppc64", "ppc64", MASK_64BIT },
36421     { "970", "ppc970", MASK_PPC_GPOPT | MASK_MFCRF | MASK_POWERPC64 },
36422     { "power4", "ppc970", 0 },
36423     { "G5", "ppc970", 0 },
36424     { "7450", "ppc7450", 0 },
36425     { "7400", "ppc7400", MASK_ALTIVEC },
36426     { "G4", "ppc7400", 0 },
36427     { "750", "ppc750", 0 },
36428     { "740", "ppc750", 0 },
36429     { "G3", "ppc750", 0 },
36430     { "604e", "ppc604e", 0 },
36431     { "604", "ppc604", 0 },
36432     { "603e", "ppc603", 0 },
36433     { "603", "ppc603", 0 },
36434     { "601", "ppc601", 0 },
36435     { NULL, "ppc", 0 } };
36436   const char *cpu_id = "";
36437   size_t i;
36438 
36439   rs6000_file_start ();
36440   darwin_file_start ();
36441 
36442   /* Determine the argument to -mcpu=.  Default to G3 if not specified.  */
36443 
36444   if (rs6000_default_cpu != 0 && rs6000_default_cpu[0] != '\0')
36445     cpu_id = rs6000_default_cpu;
36446 
36447   if (global_options_set.x_rs6000_cpu_index)
36448     cpu_id = processor_target_table[rs6000_cpu_index].name;
36449 
36450   /* Look through the mapping array.  Pick the first name that either
36451      matches the argument, has a bit set in IF_SET that is also set
36452      in the target flags, or has a NULL name.  */
36453 
36454   i = 0;
36455   while (mapping[i].arg != NULL
36456 	 && strcmp (mapping[i].arg, cpu_id) != 0
36457 	 && (mapping[i].if_set & rs6000_isa_flags) == 0)
36458     i++;
36459 
36460   fprintf (asm_out_file, "\t.machine %s\n", mapping[i].name);
36461 }
36462 
36463 #endif /* TARGET_MACHO */
36464 
36465 #if TARGET_ELF
36466 static int
rs6000_elf_reloc_rw_mask(void)36467 rs6000_elf_reloc_rw_mask (void)
36468 {
36469   if (flag_pic)
36470     return 3;
36471   else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
36472     return 2;
36473   else
36474     return 0;
36475 }
36476 
36477 /* Record an element in the table of global constructors.  SYMBOL is
36478    a SYMBOL_REF of the function to be called; PRIORITY is a number
36479    between 0 and MAX_INIT_PRIORITY.
36480 
36481    This differs from default_named_section_asm_out_constructor in
36482    that we have special handling for -mrelocatable.  */
36483 
36484 static void rs6000_elf_asm_out_constructor (rtx, int) ATTRIBUTE_UNUSED;
36485 static void
rs6000_elf_asm_out_constructor(rtx symbol,int priority)36486 rs6000_elf_asm_out_constructor (rtx symbol, int priority)
36487 {
36488   const char *section = ".ctors";
36489   char buf[18];
36490 
36491   if (priority != DEFAULT_INIT_PRIORITY)
36492     {
36493       sprintf (buf, ".ctors.%.5u",
36494 	       /* Invert the numbering so the linker puts us in the proper
36495 		  order; constructors are run from right to left, and the
36496 		  linker sorts in increasing order.  */
36497 	       MAX_INIT_PRIORITY - priority);
36498       section = buf;
36499     }
36500 
36501   switch_to_section (get_section (section, SECTION_WRITE, NULL));
36502   assemble_align (POINTER_SIZE);
36503 
36504   if (DEFAULT_ABI == ABI_V4
36505       && (TARGET_RELOCATABLE || flag_pic > 1))
36506     {
36507       fputs ("\t.long (", asm_out_file);
36508       output_addr_const (asm_out_file, symbol);
36509       fputs (")@fixup\n", asm_out_file);
36510     }
36511   else
36512     assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
36513 }
36514 
36515 static void rs6000_elf_asm_out_destructor (rtx, int) ATTRIBUTE_UNUSED;
36516 static void
rs6000_elf_asm_out_destructor(rtx symbol,int priority)36517 rs6000_elf_asm_out_destructor (rtx symbol, int priority)
36518 {
36519   const char *section = ".dtors";
36520   char buf[18];
36521 
36522   if (priority != DEFAULT_INIT_PRIORITY)
36523     {
36524       sprintf (buf, ".dtors.%.5u",
36525 	       /* Invert the numbering so the linker puts us in the proper
36526 		  order; constructors are run from right to left, and the
36527 		  linker sorts in increasing order.  */
36528 	       MAX_INIT_PRIORITY - priority);
36529       section = buf;
36530     }
36531 
36532   switch_to_section (get_section (section, SECTION_WRITE, NULL));
36533   assemble_align (POINTER_SIZE);
36534 
36535   if (DEFAULT_ABI == ABI_V4
36536       && (TARGET_RELOCATABLE || flag_pic > 1))
36537     {
36538       fputs ("\t.long (", asm_out_file);
36539       output_addr_const (asm_out_file, symbol);
36540       fputs (")@fixup\n", asm_out_file);
36541     }
36542   else
36543     assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
36544 }
36545 
36546 void
rs6000_elf_declare_function_name(FILE * file,const char * name,tree decl)36547 rs6000_elf_declare_function_name (FILE *file, const char *name, tree decl)
36548 {
36549   if (TARGET_64BIT && DEFAULT_ABI != ABI_ELFv2)
36550     {
36551       fputs ("\t.section\t\".opd\",\"aw\"\n\t.align 3\n", file);
36552       ASM_OUTPUT_LABEL (file, name);
36553       fputs (DOUBLE_INT_ASM_OP, file);
36554       rs6000_output_function_entry (file, name);
36555       fputs (",.TOC.@tocbase,0\n\t.previous\n", file);
36556       if (DOT_SYMBOLS)
36557 	{
36558 	  fputs ("\t.size\t", file);
36559 	  assemble_name (file, name);
36560 	  fputs (",24\n\t.type\t.", file);
36561 	  assemble_name (file, name);
36562 	  fputs (",@function\n", file);
36563 	  if (TREE_PUBLIC (decl) && ! DECL_WEAK (decl))
36564 	    {
36565 	      fputs ("\t.globl\t.", file);
36566 	      assemble_name (file, name);
36567 	      putc ('\n', file);
36568 	    }
36569 	}
36570       else
36571 	ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
36572       ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
36573       rs6000_output_function_entry (file, name);
36574       fputs (":\n", file);
36575       return;
36576     }
36577 
36578   if (DEFAULT_ABI == ABI_V4
36579       && (TARGET_RELOCATABLE || flag_pic > 1)
36580       && !TARGET_SECURE_PLT
36581       && (!constant_pool_empty_p () || crtl->profile)
36582       && uses_TOC ())
36583     {
36584       char buf[256];
36585 
36586       (*targetm.asm_out.internal_label) (file, "LCL", rs6000_pic_labelno);
36587 
36588       fprintf (file, "\t.long ");
36589       assemble_name (file, toc_label_name);
36590       need_toc_init = 1;
36591       putc ('-', file);
36592       ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
36593       assemble_name (file, buf);
36594       putc ('\n', file);
36595     }
36596 
36597   ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
36598   ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
36599 
36600   if (TARGET_CMODEL == CMODEL_LARGE && rs6000_global_entry_point_needed_p ())
36601     {
36602       char buf[256];
36603 
36604       (*targetm.asm_out.internal_label) (file, "LCL", rs6000_pic_labelno);
36605 
36606       fprintf (file, "\t.quad .TOC.-");
36607       ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
36608       assemble_name (file, buf);
36609       putc ('\n', file);
36610     }
36611 
36612   if (DEFAULT_ABI == ABI_AIX)
36613     {
36614       const char *desc_name, *orig_name;
36615 
36616       orig_name = (*targetm.strip_name_encoding) (name);
36617       desc_name = orig_name;
36618       while (*desc_name == '.')
36619 	desc_name++;
36620 
36621       if (TREE_PUBLIC (decl))
36622 	fprintf (file, "\t.globl %s\n", desc_name);
36623 
36624       fprintf (file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
36625       fprintf (file, "%s:\n", desc_name);
36626       fprintf (file, "\t.long %s\n", orig_name);
36627       fputs ("\t.long _GLOBAL_OFFSET_TABLE_\n", file);
36628       fputs ("\t.long 0\n", file);
36629       fprintf (file, "\t.previous\n");
36630     }
36631   ASM_OUTPUT_LABEL (file, name);
36632 }
36633 
36634 static void rs6000_elf_file_end (void) ATTRIBUTE_UNUSED;
36635 static void
rs6000_elf_file_end(void)36636 rs6000_elf_file_end (void)
36637 {
36638 #ifdef HAVE_AS_GNU_ATTRIBUTE
36639   /* ??? The value emitted depends on options active at file end.
36640      Assume anyone using #pragma or attributes that might change
36641      options knows what they are doing.  */
36642   if ((TARGET_64BIT || DEFAULT_ABI == ABI_V4)
36643       && rs6000_passes_float)
36644     {
36645       int fp;
36646 
36647       if (TARGET_DF_FPR | TARGET_DF_SPE)
36648 	fp = 1;
36649       else if (TARGET_SF_FPR | TARGET_SF_SPE)
36650 	fp = 3;
36651       else
36652 	fp = 2;
36653       if (rs6000_passes_long_double)
36654 	{
36655 	  if (!TARGET_LONG_DOUBLE_128)
36656 	    fp |= 2 * 4;
36657 	  else if (TARGET_IEEEQUAD)
36658 	    fp |= 3 * 4;
36659 	  else
36660 	    fp |= 1 * 4;
36661 	}
36662       fprintf (asm_out_file, "\t.gnu_attribute 4, %d\n", fp);
36663     }
36664   if (TARGET_32BIT && DEFAULT_ABI == ABI_V4)
36665     {
36666       if (rs6000_passes_vector)
36667 	fprintf (asm_out_file, "\t.gnu_attribute 8, %d\n",
36668 		 (TARGET_ALTIVEC_ABI ? 2
36669 		  : TARGET_SPE_ABI ? 3
36670 		  : 1));
36671       if (rs6000_returns_struct)
36672 	fprintf (asm_out_file, "\t.gnu_attribute 12, %d\n",
36673 		 aix_struct_return ? 2 : 1);
36674     }
36675 #endif
36676 #if defined (POWERPC_LINUX) || defined (POWERPC_FREEBSD)
36677   if (TARGET_32BIT || DEFAULT_ABI == ABI_ELFv2)
36678     file_end_indicate_exec_stack ();
36679 #endif
36680 
36681   if (flag_split_stack)
36682     file_end_indicate_split_stack ();
36683 
36684   if (cpu_builtin_p)
36685     {
36686       /* We have expanded a CPU builtin, so we need to emit a reference to
36687 	 the special symbol that LIBC uses to declare it supports the
36688 	 AT_PLATFORM and AT_HWCAP/AT_HWCAP2 in the TCB feature.  */
36689       switch_to_section (data_section);
36690       fprintf (asm_out_file, "\t.align %u\n", TARGET_32BIT ? 2 : 3);
36691       fprintf (asm_out_file, "\t%s %s\n",
36692 	       TARGET_32BIT ? ".long" : ".quad", tcb_verification_symbol);
36693     }
36694 }
36695 #endif
36696 
36697 #if TARGET_XCOFF
36698 
36699 #ifndef HAVE_XCOFF_DWARF_EXTRAS
36700 #define HAVE_XCOFF_DWARF_EXTRAS 0
36701 #endif
36702 
36703 static enum unwind_info_type
rs6000_xcoff_debug_unwind_info(void)36704 rs6000_xcoff_debug_unwind_info (void)
36705 {
36706   return UI_NONE;
36707 }
36708 
36709 static void
rs6000_xcoff_asm_output_anchor(rtx symbol)36710 rs6000_xcoff_asm_output_anchor (rtx symbol)
36711 {
36712   char buffer[100];
36713 
36714   sprintf (buffer, "$ + " HOST_WIDE_INT_PRINT_DEC,
36715 	   SYMBOL_REF_BLOCK_OFFSET (symbol));
36716   fprintf (asm_out_file, "%s", SET_ASM_OP);
36717   RS6000_OUTPUT_BASENAME (asm_out_file, XSTR (symbol, 0));
36718   fprintf (asm_out_file, ",");
36719   RS6000_OUTPUT_BASENAME (asm_out_file, buffer);
36720   fprintf (asm_out_file, "\n");
36721 }
36722 
36723 static void
rs6000_xcoff_asm_globalize_label(FILE * stream,const char * name)36724 rs6000_xcoff_asm_globalize_label (FILE *stream, const char *name)
36725 {
36726   fputs (GLOBAL_ASM_OP, stream);
36727   RS6000_OUTPUT_BASENAME (stream, name);
36728   putc ('\n', stream);
36729 }
36730 
36731 /* A get_unnamed_decl callback, used for read-only sections.  PTR
36732    points to the section string variable.  */
36733 
36734 static void
rs6000_xcoff_output_readonly_section_asm_op(const void * directive)36735 rs6000_xcoff_output_readonly_section_asm_op (const void *directive)
36736 {
36737   fprintf (asm_out_file, "\t.csect %s[RO],%s\n",
36738 	   *(const char *const *) directive,
36739 	   XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
36740 }
36741 
36742 /* Likewise for read-write sections.  */
36743 
36744 static void
rs6000_xcoff_output_readwrite_section_asm_op(const void * directive)36745 rs6000_xcoff_output_readwrite_section_asm_op (const void *directive)
36746 {
36747   fprintf (asm_out_file, "\t.csect %s[RW],%s\n",
36748 	   *(const char *const *) directive,
36749 	   XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
36750 }
36751 
36752 static void
rs6000_xcoff_output_tls_section_asm_op(const void * directive)36753 rs6000_xcoff_output_tls_section_asm_op (const void *directive)
36754 {
36755   fprintf (asm_out_file, "\t.csect %s[TL],%s\n",
36756 	   *(const char *const *) directive,
36757 	   XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
36758 }
36759 
36760 /* A get_unnamed_section callback, used for switching to toc_section.  */
36761 
36762 static void
rs6000_xcoff_output_toc_section_asm_op(const void * data ATTRIBUTE_UNUSED)36763 rs6000_xcoff_output_toc_section_asm_op (const void *data ATTRIBUTE_UNUSED)
36764 {
36765   if (TARGET_MINIMAL_TOC)
36766     {
36767       /* toc_section is always selected at least once from
36768 	 rs6000_xcoff_file_start, so this is guaranteed to
36769 	 always be defined once and only once in each file.  */
36770       if (!toc_initialized)
36771 	{
36772 	  fputs ("\t.toc\nLCTOC..1:\n", asm_out_file);
36773 	  fputs ("\t.tc toc_table[TC],toc_table[RW]\n", asm_out_file);
36774 	  toc_initialized = 1;
36775 	}
36776       fprintf (asm_out_file, "\t.csect toc_table[RW]%s\n",
36777 	       (TARGET_32BIT ? "" : ",3"));
36778     }
36779   else
36780     fputs ("\t.toc\n", asm_out_file);
36781 }
36782 
36783 /* Implement TARGET_ASM_INIT_SECTIONS.  */
36784 
36785 static void
rs6000_xcoff_asm_init_sections(void)36786 rs6000_xcoff_asm_init_sections (void)
36787 {
36788   read_only_data_section
36789     = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op,
36790 			   &xcoff_read_only_section_name);
36791 
36792   private_data_section
36793     = get_unnamed_section (SECTION_WRITE,
36794 			   rs6000_xcoff_output_readwrite_section_asm_op,
36795 			   &xcoff_private_data_section_name);
36796 
36797   tls_data_section
36798     = get_unnamed_section (SECTION_TLS,
36799 			   rs6000_xcoff_output_tls_section_asm_op,
36800 			   &xcoff_tls_data_section_name);
36801 
36802   tls_private_data_section
36803     = get_unnamed_section (SECTION_TLS,
36804 			   rs6000_xcoff_output_tls_section_asm_op,
36805 			   &xcoff_private_data_section_name);
36806 
36807   read_only_private_data_section
36808     = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op,
36809 			   &xcoff_private_data_section_name);
36810 
36811   toc_section
36812     = get_unnamed_section (0, rs6000_xcoff_output_toc_section_asm_op, NULL);
36813 
36814   readonly_data_section = read_only_data_section;
36815 }
36816 
36817 static int
rs6000_xcoff_reloc_rw_mask(void)36818 rs6000_xcoff_reloc_rw_mask (void)
36819 {
36820   return 3;
36821 }
36822 
36823 static void
rs6000_xcoff_asm_named_section(const char * name,unsigned int flags,tree decl ATTRIBUTE_UNUSED)36824 rs6000_xcoff_asm_named_section (const char *name, unsigned int flags,
36825 				tree decl ATTRIBUTE_UNUSED)
36826 {
36827   int smclass;
36828   static const char * const suffix[5] = { "PR", "RO", "RW", "TL", "XO" };
36829 
36830   if (flags & SECTION_EXCLUDE)
36831     smclass = 4;
36832   else if (flags & SECTION_DEBUG)
36833     {
36834       fprintf (asm_out_file, "\t.dwsect %s\n", name);
36835       return;
36836     }
36837   else if (flags & SECTION_CODE)
36838     smclass = 0;
36839   else if (flags & SECTION_TLS)
36840     smclass = 3;
36841   else if (flags & SECTION_WRITE)
36842     smclass = 2;
36843   else
36844     smclass = 1;
36845 
36846   fprintf (asm_out_file, "\t.csect %s%s[%s],%u\n",
36847 	   (flags & SECTION_CODE) ? "." : "",
36848 	   name, suffix[smclass], flags & SECTION_ENTSIZE);
36849 }
36850 
36851 #define IN_NAMED_SECTION(DECL) \
36852   ((TREE_CODE (DECL) == FUNCTION_DECL || TREE_CODE (DECL) == VAR_DECL) \
36853    && DECL_SECTION_NAME (DECL) != NULL)
36854 
36855 static section *
rs6000_xcoff_select_section(tree decl,int reloc,unsigned HOST_WIDE_INT align)36856 rs6000_xcoff_select_section (tree decl, int reloc,
36857 			     unsigned HOST_WIDE_INT align)
36858 {
36859   /* Place variables with alignment stricter than BIGGEST_ALIGNMENT into
36860      named section.  */
36861   if (align > BIGGEST_ALIGNMENT)
36862     {
36863       resolve_unique_section (decl, reloc, true);
36864       if (IN_NAMED_SECTION (decl))
36865 	return get_named_section (decl, NULL, reloc);
36866     }
36867 
36868   if (decl_readonly_section (decl, reloc))
36869     {
36870       if (TREE_PUBLIC (decl))
36871 	return read_only_data_section;
36872       else
36873 	return read_only_private_data_section;
36874     }
36875   else
36876     {
36877 #if HAVE_AS_TLS
36878       if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL_P (decl))
36879 	{
36880 	  if (TREE_PUBLIC (decl))
36881 	    return tls_data_section;
36882 	  else if (bss_initializer_p (decl))
36883 	    {
36884 	      /* Convert to COMMON to emit in BSS.  */
36885 	      DECL_COMMON (decl) = 1;
36886 	      return tls_comm_section;
36887 	    }
36888 	  else
36889 	    return tls_private_data_section;
36890 	}
36891       else
36892 #endif
36893 	if (TREE_PUBLIC (decl))
36894 	return data_section;
36895       else
36896 	return private_data_section;
36897     }
36898 }
36899 
36900 static void
rs6000_xcoff_unique_section(tree decl,int reloc ATTRIBUTE_UNUSED)36901 rs6000_xcoff_unique_section (tree decl, int reloc ATTRIBUTE_UNUSED)
36902 {
36903   const char *name;
36904 
36905   /* Use select_section for private data and uninitialized data with
36906      alignment <= BIGGEST_ALIGNMENT.  */
36907   if (!TREE_PUBLIC (decl)
36908       || DECL_COMMON (decl)
36909       || (DECL_INITIAL (decl) == NULL_TREE
36910 	  && DECL_ALIGN (decl) <= BIGGEST_ALIGNMENT)
36911       || DECL_INITIAL (decl) == error_mark_node
36912       || (flag_zero_initialized_in_bss
36913 	  && initializer_zerop (DECL_INITIAL (decl))))
36914     return;
36915 
36916   name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
36917   name = (*targetm.strip_name_encoding) (name);
36918   set_decl_section_name (decl, name);
36919 }
36920 
36921 /* Select section for constant in constant pool.
36922 
36923    On RS/6000, all constants are in the private read-only data area.
36924    However, if this is being placed in the TOC it must be output as a
36925    toc entry.  */
36926 
36927 static section *
rs6000_xcoff_select_rtx_section(machine_mode mode,rtx x,unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)36928 rs6000_xcoff_select_rtx_section (machine_mode mode, rtx x,
36929 				 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
36930 {
36931   if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode))
36932     return toc_section;
36933   else
36934     return read_only_private_data_section;
36935 }
36936 
36937 /* Remove any trailing [DS] or the like from the symbol name.  */
36938 
36939 static const char *
rs6000_xcoff_strip_name_encoding(const char * name)36940 rs6000_xcoff_strip_name_encoding (const char *name)
36941 {
36942   size_t len;
36943   if (*name == '*')
36944     name++;
36945   len = strlen (name);
36946   if (name[len - 1] == ']')
36947     return ggc_alloc_string (name, len - 4);
36948   else
36949     return name;
36950 }
36951 
36952 /* Section attributes.  AIX is always PIC.  */
36953 
36954 static unsigned int
rs6000_xcoff_section_type_flags(tree decl,const char * name,int reloc)36955 rs6000_xcoff_section_type_flags (tree decl, const char *name, int reloc)
36956 {
36957   unsigned int align;
36958   unsigned int flags = default_section_type_flags (decl, name, reloc);
36959 
36960   /* Align to at least UNIT size.  */
36961   if ((flags & SECTION_CODE) != 0 || !decl || !DECL_P (decl))
36962     align = MIN_UNITS_PER_WORD;
36963   else
36964     /* Increase alignment of large objects if not already stricter.  */
36965     align = MAX ((DECL_ALIGN (decl) / BITS_PER_UNIT),
36966 		 int_size_in_bytes (TREE_TYPE (decl)) > MIN_UNITS_PER_WORD
36967 		 ? UNITS_PER_FP_WORD : MIN_UNITS_PER_WORD);
36968 
36969   return flags | (exact_log2 (align) & SECTION_ENTSIZE);
36970 }
36971 
36972 /* Output at beginning of assembler file.
36973 
36974    Initialize the section names for the RS/6000 at this point.
36975 
36976    Specify filename, including full path, to assembler.
36977 
36978    We want to go into the TOC section so at least one .toc will be emitted.
36979    Also, in order to output proper .bs/.es pairs, we need at least one static
36980    [RW] section emitted.
36981 
36982    Finally, declare mcount when profiling to make the assembler happy.  */
36983 
36984 static void
rs6000_xcoff_file_start(void)36985 rs6000_xcoff_file_start (void)
36986 {
36987   rs6000_gen_section_name (&xcoff_bss_section_name,
36988 			   main_input_filename, ".bss_");
36989   rs6000_gen_section_name (&xcoff_private_data_section_name,
36990 			   main_input_filename, ".rw_");
36991   rs6000_gen_section_name (&xcoff_read_only_section_name,
36992 			   main_input_filename, ".ro_");
36993   rs6000_gen_section_name (&xcoff_tls_data_section_name,
36994 			   main_input_filename, ".tls_");
36995   rs6000_gen_section_name (&xcoff_tbss_section_name,
36996 			   main_input_filename, ".tbss_[UL]");
36997 
36998   fputs ("\t.file\t", asm_out_file);
36999   output_quoted_string (asm_out_file, main_input_filename);
37000   fputc ('\n', asm_out_file);
37001   if (write_symbols != NO_DEBUG)
37002     switch_to_section (private_data_section);
37003   switch_to_section (toc_section);
37004   switch_to_section (text_section);
37005   if (profile_flag)
37006     fprintf (asm_out_file, "\t.extern %s\n", RS6000_MCOUNT);
37007   rs6000_file_start ();
37008 }
37009 
37010 /* Output at end of assembler file.
37011    On the RS/6000, referencing data should automatically pull in text.  */
37012 
37013 static void
rs6000_xcoff_file_end(void)37014 rs6000_xcoff_file_end (void)
37015 {
37016   switch_to_section (text_section);
37017   fputs ("_section_.text:\n", asm_out_file);
37018   switch_to_section (data_section);
37019   fputs (TARGET_32BIT
37020 	 ? "\t.long _section_.text\n" : "\t.llong _section_.text\n",
37021 	 asm_out_file);
37022 }
37023 
37024 struct declare_alias_data
37025 {
37026   FILE *file;
37027   bool function_descriptor;
37028 };
37029 
37030 /* Declare alias N.  A helper function for for_node_and_aliases.  */
37031 
37032 static bool
rs6000_declare_alias(struct symtab_node * n,void * d)37033 rs6000_declare_alias (struct symtab_node *n, void *d)
37034 {
37035   struct declare_alias_data *data = (struct declare_alias_data *)d;
37036   /* Main symbol is output specially, because varasm machinery does part of
37037      the job for us - we do not need to declare .globl/lglobs and such.  */
37038   if (!n->alias || n->weakref)
37039     return false;
37040 
37041   if (lookup_attribute ("ifunc", DECL_ATTRIBUTES (n->decl)))
37042     return false;
37043 
37044   /* Prevent assemble_alias from trying to use .set pseudo operation
37045      that does not behave as expected by the middle-end.  */
37046   TREE_ASM_WRITTEN (n->decl) = true;
37047 
37048   const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (n->decl));
37049   char *buffer = (char *) alloca (strlen (name) + 2);
37050   char *p;
37051   int dollar_inside = 0;
37052 
37053   strcpy (buffer, name);
37054   p = strchr (buffer, '$');
37055   while (p) {
37056     *p = '_';
37057     dollar_inside++;
37058     p = strchr (p + 1, '$');
37059   }
37060   if (TREE_PUBLIC (n->decl))
37061     {
37062       if (!RS6000_WEAK || !DECL_WEAK (n->decl))
37063 	{
37064           if (dollar_inside) {
37065 	      if (data->function_descriptor)
37066                 fprintf(data->file, "\t.rename .%s,\".%s\"\n", buffer, name);
37067 	      fprintf(data->file, "\t.rename %s,\"%s\"\n", buffer, name);
37068 	    }
37069 	  if (data->function_descriptor)
37070 	    {
37071 	      fputs ("\t.globl .", data->file);
37072 	      RS6000_OUTPUT_BASENAME (data->file, buffer);
37073 	      putc ('\n', data->file);
37074 	    }
37075 	  fputs ("\t.globl ", data->file);
37076 	  RS6000_OUTPUT_BASENAME (data->file, buffer);
37077 	  putc ('\n', data->file);
37078 	}
37079 #ifdef ASM_WEAKEN_DECL
37080       else if (DECL_WEAK (n->decl) && !data->function_descriptor)
37081 	ASM_WEAKEN_DECL (data->file, n->decl, name, NULL);
37082 #endif
37083     }
37084   else
37085     {
37086       if (dollar_inside)
37087 	{
37088 	  if (data->function_descriptor)
37089             fprintf(data->file, "\t.rename .%s,\".%s\"\n", buffer, name);
37090 	  fprintf(data->file, "\t.rename %s,\"%s\"\n", buffer, name);
37091 	}
37092       if (data->function_descriptor)
37093 	{
37094 	  fputs ("\t.lglobl .", data->file);
37095 	  RS6000_OUTPUT_BASENAME (data->file, buffer);
37096 	  putc ('\n', data->file);
37097 	}
37098       fputs ("\t.lglobl ", data->file);
37099       RS6000_OUTPUT_BASENAME (data->file, buffer);
37100       putc ('\n', data->file);
37101     }
37102   if (data->function_descriptor)
37103     fputs (".", data->file);
37104   RS6000_OUTPUT_BASENAME (data->file, buffer);
37105   fputs (":\n", data->file);
37106   return false;
37107 }
37108 
37109 
37110 #ifdef HAVE_GAS_HIDDEN
37111 /* Helper function to calculate visibility of a DECL
37112    and return the value as a const string.  */
37113 
37114 static const char *
rs6000_xcoff_visibility(tree decl)37115 rs6000_xcoff_visibility (tree decl)
37116 {
37117   static const char * const visibility_types[] = {
37118     "", ",protected", ",hidden", ",internal"
37119   };
37120 
37121   enum symbol_visibility vis = DECL_VISIBILITY (decl);
37122 
37123   if (TREE_CODE (decl) == FUNCTION_DECL
37124       && cgraph_node::get (decl)
37125       && cgraph_node::get (decl)->instrumentation_clone
37126       && cgraph_node::get (decl)->instrumented_version)
37127     vis = DECL_VISIBILITY (cgraph_node::get (decl)->instrumented_version->decl);
37128 
37129   return visibility_types[vis];
37130 }
37131 #endif
37132 
37133 
37134 /* This macro produces the initial definition of a function name.
37135    On the RS/6000, we need to place an extra '.' in the function name and
37136    output the function descriptor.
37137    Dollar signs are converted to underscores.
37138 
37139    The csect for the function will have already been created when
37140    text_section was selected.  We do have to go back to that csect, however.
37141 
37142    The third and fourth parameters to the .function pseudo-op (16 and 044)
37143    are placeholders which no longer have any use.
37144 
37145    Because AIX assembler's .set command has unexpected semantics, we output
37146    all aliases as alternative labels in front of the definition.  */
37147 
37148 void
rs6000_xcoff_declare_function_name(FILE * file,const char * name,tree decl)37149 rs6000_xcoff_declare_function_name (FILE *file, const char *name, tree decl)
37150 {
37151   char *buffer = (char *) alloca (strlen (name) + 1);
37152   char *p;
37153   int dollar_inside = 0;
37154   struct declare_alias_data data = {file, false};
37155 
37156   strcpy (buffer, name);
37157   p = strchr (buffer, '$');
37158   while (p) {
37159     *p = '_';
37160     dollar_inside++;
37161     p = strchr (p + 1, '$');
37162   }
37163   if (TREE_PUBLIC (decl))
37164     {
37165       if (!RS6000_WEAK || !DECL_WEAK (decl))
37166 	{
37167           if (dollar_inside) {
37168               fprintf(file, "\t.rename .%s,\".%s\"\n", buffer, name);
37169               fprintf(file, "\t.rename %s,\"%s\"\n", buffer, name);
37170 	    }
37171 	  fputs ("\t.globl .", file);
37172 	  RS6000_OUTPUT_BASENAME (file, buffer);
37173 #ifdef HAVE_GAS_HIDDEN
37174 	  fputs (rs6000_xcoff_visibility (decl), file);
37175 #endif
37176 	  putc ('\n', file);
37177 	}
37178     }
37179   else
37180     {
37181       if (dollar_inside) {
37182           fprintf(file, "\t.rename .%s,\".%s\"\n", buffer, name);
37183           fprintf(file, "\t.rename %s,\"%s\"\n", buffer, name);
37184 	}
37185       fputs ("\t.lglobl .", file);
37186       RS6000_OUTPUT_BASENAME (file, buffer);
37187       putc ('\n', file);
37188     }
37189   fputs ("\t.csect ", file);
37190   RS6000_OUTPUT_BASENAME (file, buffer);
37191   fputs (TARGET_32BIT ? "[DS]\n" : "[DS],3\n", file);
37192   RS6000_OUTPUT_BASENAME (file, buffer);
37193   fputs (":\n", file);
37194   symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias,
37195 							&data, true);
37196   fputs (TARGET_32BIT ? "\t.long ." : "\t.llong .", file);
37197   RS6000_OUTPUT_BASENAME (file, buffer);
37198   fputs (", TOC[tc0], 0\n", file);
37199   in_section = NULL;
37200   switch_to_section (function_section (decl));
37201   putc ('.', file);
37202   RS6000_OUTPUT_BASENAME (file, buffer);
37203   fputs (":\n", file);
37204   data.function_descriptor = true;
37205   symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias,
37206 							&data, true);
37207   if (!DECL_IGNORED_P (decl))
37208     {
37209       if (write_symbols == DBX_DEBUG || write_symbols == XCOFF_DEBUG)
37210 	xcoffout_declare_function (file, decl, buffer);
37211       else if (write_symbols == DWARF2_DEBUG)
37212 	{
37213 	  name = (*targetm.strip_name_encoding) (name);
37214 	  fprintf (file, "\t.function .%s,.%s,2,0\n", name, name);
37215 	}
37216     }
37217   return;
37218 }
37219 
37220 
37221 /* Output assembly language to globalize a symbol from a DECL,
37222    possibly with visibility.  */
37223 
37224 void
rs6000_xcoff_asm_globalize_decl_name(FILE * stream,tree decl)37225 rs6000_xcoff_asm_globalize_decl_name (FILE *stream, tree decl)
37226 {
37227   const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0);
37228   fputs (GLOBAL_ASM_OP, stream);
37229   RS6000_OUTPUT_BASENAME (stream, name);
37230 #ifdef HAVE_GAS_HIDDEN
37231   fputs (rs6000_xcoff_visibility (decl), stream);
37232 #endif
37233   putc ('\n', stream);
37234 }
37235 
37236 /* Output assembly language to define a symbol as COMMON from a DECL,
37237    possibly with visibility.  */
37238 
37239 void
rs6000_xcoff_asm_output_aligned_decl_common(FILE * stream,tree decl ATTRIBUTE_UNUSED,const char * name,unsigned HOST_WIDE_INT size,unsigned HOST_WIDE_INT align)37240 rs6000_xcoff_asm_output_aligned_decl_common (FILE *stream,
37241 					     tree decl ATTRIBUTE_UNUSED,
37242 					     const char *name,
37243 					     unsigned HOST_WIDE_INT size,
37244 					     unsigned HOST_WIDE_INT align)
37245 {
37246   unsigned HOST_WIDE_INT align2 = 2;
37247 
37248   if (align > 32)
37249     align2 = floor_log2 (align / BITS_PER_UNIT);
37250   else if (size > 4)
37251     align2 = 3;
37252 
37253   fputs (COMMON_ASM_OP, stream);
37254   RS6000_OUTPUT_BASENAME (stream, name);
37255 
37256   fprintf (stream,
37257 	   "," HOST_WIDE_INT_PRINT_UNSIGNED "," HOST_WIDE_INT_PRINT_UNSIGNED,
37258 	   size, align2);
37259 
37260 #ifdef HAVE_GAS_HIDDEN
37261   fputs (rs6000_xcoff_visibility (decl), stream);
37262 #endif
37263   putc ('\n', stream);
37264 }
37265 
37266 /* This macro produces the initial definition of a object (variable) name.
37267    Because AIX assembler's .set command has unexpected semantics, we output
37268    all aliases as alternative labels in front of the definition.  */
37269 
37270 void
rs6000_xcoff_declare_object_name(FILE * file,const char * name,tree decl)37271 rs6000_xcoff_declare_object_name (FILE *file, const char *name, tree decl)
37272 {
37273   struct declare_alias_data data = {file, false};
37274   RS6000_OUTPUT_BASENAME (file, name);
37275   fputs (":\n", file);
37276   symtab_node::get_create (decl)->call_for_symbol_and_aliases (rs6000_declare_alias,
37277 							       &data, true);
37278 }
37279 
37280 /* Overide the default 'SYMBOL-.' syntax with AIX compatible 'SYMBOL-$'. */
37281 
37282 void
rs6000_asm_output_dwarf_pcrel(FILE * file,int size,const char * label)37283 rs6000_asm_output_dwarf_pcrel (FILE *file, int size, const char *label)
37284 {
37285   fputs (integer_asm_op (size, FALSE), file);
37286   assemble_name (file, label);
37287   fputs ("-$", file);
37288 }
37289 
37290 /* Output a symbol offset relative to the dbase for the current object.
37291    We use __gcc_unwind_dbase as an arbitrary base for dbase and assume
37292    signed offsets.
37293 
37294    __gcc_unwind_dbase is embedded in all executables/libraries through
37295    libgcc/config/rs6000/crtdbase.S.  */
37296 
37297 void
rs6000_asm_output_dwarf_datarel(FILE * file,int size,const char * label)37298 rs6000_asm_output_dwarf_datarel (FILE *file, int size, const char *label)
37299 {
37300   fputs (integer_asm_op (size, FALSE), file);
37301   assemble_name (file, label);
37302   fputs("-__gcc_unwind_dbase", file);
37303 }
37304 
37305 #ifdef HAVE_AS_TLS
37306 static void
rs6000_xcoff_encode_section_info(tree decl,rtx rtl,int first)37307 rs6000_xcoff_encode_section_info (tree decl, rtx rtl, int first)
37308 {
37309   rtx symbol;
37310   int flags;
37311   const char *symname;
37312 
37313   default_encode_section_info (decl, rtl, first);
37314 
37315   /* Careful not to prod global register variables.  */
37316   if (!MEM_P (rtl))
37317     return;
37318   symbol = XEXP (rtl, 0);
37319   if (GET_CODE (symbol) != SYMBOL_REF)
37320     return;
37321 
37322   flags = SYMBOL_REF_FLAGS (symbol);
37323 
37324   if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL_P (decl))
37325     flags &= ~SYMBOL_FLAG_HAS_BLOCK_INFO;
37326 
37327   SYMBOL_REF_FLAGS (symbol) = flags;
37328 
37329   /* Append mapping class to extern decls.  */
37330   symname = XSTR (symbol, 0);
37331   if (decl /* sync condition with assemble_external () */
37332       && DECL_P (decl) && DECL_EXTERNAL (decl) && TREE_PUBLIC (decl)
37333       && ((TREE_CODE (decl) == VAR_DECL && !DECL_THREAD_LOCAL_P (decl))
37334 	  || TREE_CODE (decl) == FUNCTION_DECL)
37335       && symname[strlen (symname) - 1] != ']')
37336     {
37337       char *newname = (char *) alloca (strlen (symname) + 5);
37338       strcpy (newname, symname);
37339       strcat (newname, (TREE_CODE (decl) == FUNCTION_DECL
37340 			? "[DS]" : "[UA]"));
37341       XSTR (symbol, 0) = ggc_strdup (newname);
37342     }
37343 }
37344 #endif /* HAVE_AS_TLS */
37345 #endif /* TARGET_XCOFF */
37346 
37347 void
rs6000_asm_weaken_decl(FILE * stream,tree decl,const char * name,const char * val)37348 rs6000_asm_weaken_decl (FILE *stream, tree decl,
37349 			const char *name, const char *val)
37350 {
37351   fputs ("\t.weak\t", stream);
37352   RS6000_OUTPUT_BASENAME (stream, name);
37353   if (decl && TREE_CODE (decl) == FUNCTION_DECL
37354       && DEFAULT_ABI == ABI_AIX && DOT_SYMBOLS)
37355     {
37356       if (TARGET_XCOFF)
37357 	fputs ("[DS]", stream);
37358 #if TARGET_XCOFF && HAVE_GAS_HIDDEN
37359       if (TARGET_XCOFF)
37360 	fputs (rs6000_xcoff_visibility (decl), stream);
37361 #endif
37362       fputs ("\n\t.weak\t.", stream);
37363       RS6000_OUTPUT_BASENAME (stream, name);
37364     }
37365 #if TARGET_XCOFF && HAVE_GAS_HIDDEN
37366   if (TARGET_XCOFF)
37367     fputs (rs6000_xcoff_visibility (decl), stream);
37368 #endif
37369   fputc ('\n', stream);
37370   if (val)
37371     {
37372 #ifdef ASM_OUTPUT_DEF
37373       ASM_OUTPUT_DEF (stream, name, val);
37374 #endif
37375       if (decl && TREE_CODE (decl) == FUNCTION_DECL
37376 	  && DEFAULT_ABI == ABI_AIX && DOT_SYMBOLS)
37377 	{
37378 	  fputs ("\t.set\t.", stream);
37379 	  RS6000_OUTPUT_BASENAME (stream, name);
37380 	  fputs (",.", stream);
37381 	  RS6000_OUTPUT_BASENAME (stream, val);
37382 	  fputc ('\n', stream);
37383 	}
37384     }
37385 }
37386 
37387 
37388 /* Return true if INSN should not be copied.  */
37389 
37390 static bool
rs6000_cannot_copy_insn_p(rtx_insn * insn)37391 rs6000_cannot_copy_insn_p (rtx_insn *insn)
37392 {
37393   return recog_memoized (insn) >= 0
37394 	 && get_attr_cannot_copy (insn);
37395 }
37396 
37397 /* Compute a (partial) cost for rtx X.  Return true if the complete
37398    cost has been computed, and false if subexpressions should be
37399    scanned.  In either case, *TOTAL contains the cost result.  */
37400 
37401 static bool
rs6000_rtx_costs(rtx x,machine_mode mode,int outer_code,int opno ATTRIBUTE_UNUSED,int * total,bool speed)37402 rs6000_rtx_costs (rtx x, machine_mode mode, int outer_code,
37403 		  int opno ATTRIBUTE_UNUSED, int *total, bool speed)
37404 {
37405   int code = GET_CODE (x);
37406 
37407   switch (code)
37408     {
37409       /* On the RS/6000, if it is valid in the insn, it is free.  */
37410     case CONST_INT:
37411       if (((outer_code == SET
37412 	    || outer_code == PLUS
37413 	    || outer_code == MINUS)
37414 	   && (satisfies_constraint_I (x)
37415 	       || satisfies_constraint_L (x)))
37416 	  || (outer_code == AND
37417 	      && (satisfies_constraint_K (x)
37418 		  || (mode == SImode
37419 		      ? satisfies_constraint_L (x)
37420 		      : satisfies_constraint_J (x))))
37421 	  || ((outer_code == IOR || outer_code == XOR)
37422 	      && (satisfies_constraint_K (x)
37423 		  || (mode == SImode
37424 		      ? satisfies_constraint_L (x)
37425 		      : satisfies_constraint_J (x))))
37426 	  || outer_code == ASHIFT
37427 	  || outer_code == ASHIFTRT
37428 	  || outer_code == LSHIFTRT
37429 	  || outer_code == ROTATE
37430 	  || outer_code == ROTATERT
37431 	  || outer_code == ZERO_EXTRACT
37432 	  || (outer_code == MULT
37433 	      && satisfies_constraint_I (x))
37434 	  || ((outer_code == DIV || outer_code == UDIV
37435 	       || outer_code == MOD || outer_code == UMOD)
37436 	      && exact_log2 (INTVAL (x)) >= 0)
37437 	  || (outer_code == COMPARE
37438 	      && (satisfies_constraint_I (x)
37439 		  || satisfies_constraint_K (x)))
37440 	  || ((outer_code == EQ || outer_code == NE)
37441 	      && (satisfies_constraint_I (x)
37442 		  || satisfies_constraint_K (x)
37443 		  || (mode == SImode
37444 		      ? satisfies_constraint_L (x)
37445 		      : satisfies_constraint_J (x))))
37446 	  || (outer_code == GTU
37447 	      && satisfies_constraint_I (x))
37448 	  || (outer_code == LTU
37449 	      && satisfies_constraint_P (x)))
37450 	{
37451 	  *total = 0;
37452 	  return true;
37453 	}
37454       else if ((outer_code == PLUS
37455 		&& reg_or_add_cint_operand (x, VOIDmode))
37456 	       || (outer_code == MINUS
37457 		   && reg_or_sub_cint_operand (x, VOIDmode))
37458 	       || ((outer_code == SET
37459 		    || outer_code == IOR
37460 		    || outer_code == XOR)
37461 		   && (INTVAL (x)
37462 		       & ~ (unsigned HOST_WIDE_INT) 0xffffffff) == 0))
37463 	{
37464 	  *total = COSTS_N_INSNS (1);
37465 	  return true;
37466 	}
37467       /* FALLTHRU */
37468 
37469     case CONST_DOUBLE:
37470     case CONST_WIDE_INT:
37471     case CONST:
37472     case HIGH:
37473     case SYMBOL_REF:
37474       *total = !speed ? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
37475       return true;
37476 
37477     case MEM:
37478       /* When optimizing for size, MEM should be slightly more expensive
37479 	 than generating address, e.g., (plus (reg) (const)).
37480 	 L1 cache latency is about two instructions.  */
37481       *total = !speed ? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
37482       if (rs6000_slow_unaligned_access (mode, MEM_ALIGN (x)))
37483 	*total += COSTS_N_INSNS (100);
37484       return true;
37485 
37486     case LABEL_REF:
37487       *total = 0;
37488       return true;
37489 
37490     case PLUS:
37491     case MINUS:
37492       if (FLOAT_MODE_P (mode))
37493 	*total = rs6000_cost->fp;
37494       else
37495 	*total = COSTS_N_INSNS (1);
37496       return false;
37497 
37498     case MULT:
37499       if (GET_CODE (XEXP (x, 1)) == CONST_INT
37500 	  && satisfies_constraint_I (XEXP (x, 1)))
37501 	{
37502 	  if (INTVAL (XEXP (x, 1)) >= -256
37503 	      && INTVAL (XEXP (x, 1)) <= 255)
37504 	    *total = rs6000_cost->mulsi_const9;
37505 	  else
37506 	    *total = rs6000_cost->mulsi_const;
37507 	}
37508       else if (mode == SFmode)
37509 	*total = rs6000_cost->fp;
37510       else if (FLOAT_MODE_P (mode))
37511 	*total = rs6000_cost->dmul;
37512       else if (mode == DImode)
37513 	*total = rs6000_cost->muldi;
37514       else
37515 	*total = rs6000_cost->mulsi;
37516       return false;
37517 
37518     case FMA:
37519       if (mode == SFmode)
37520 	*total = rs6000_cost->fp;
37521       else
37522 	*total = rs6000_cost->dmul;
37523       break;
37524 
37525     case DIV:
37526     case MOD:
37527       if (FLOAT_MODE_P (mode))
37528 	{
37529 	  *total = mode == DFmode ? rs6000_cost->ddiv
37530 				  : rs6000_cost->sdiv;
37531 	  return false;
37532 	}
37533       /* FALLTHRU */
37534 
37535     case UDIV:
37536     case UMOD:
37537       if (GET_CODE (XEXP (x, 1)) == CONST_INT
37538 	  && exact_log2 (INTVAL (XEXP (x, 1))) >= 0)
37539 	{
37540 	  if (code == DIV || code == MOD)
37541 	    /* Shift, addze */
37542 	    *total = COSTS_N_INSNS (2);
37543 	  else
37544 	    /* Shift */
37545 	    *total = COSTS_N_INSNS (1);
37546 	}
37547       else
37548 	{
37549 	  if (GET_MODE (XEXP (x, 1)) == DImode)
37550 	    *total = rs6000_cost->divdi;
37551 	  else
37552 	    *total = rs6000_cost->divsi;
37553 	}
37554       /* Add in shift and subtract for MOD unless we have a mod instruction. */
37555       if (!TARGET_MODULO && (code == MOD || code == UMOD))
37556 	*total += COSTS_N_INSNS (2);
37557       return false;
37558 
37559     case CTZ:
37560       *total = COSTS_N_INSNS (TARGET_CTZ ? 1 : 4);
37561       return false;
37562 
37563     case FFS:
37564       *total = COSTS_N_INSNS (4);
37565       return false;
37566 
37567     case POPCOUNT:
37568       *total = COSTS_N_INSNS (TARGET_POPCNTD ? 1 : 6);
37569       return false;
37570 
37571     case PARITY:
37572       *total = COSTS_N_INSNS (TARGET_CMPB ? 2 : 6);
37573       return false;
37574 
37575     case NOT:
37576       if (outer_code == AND || outer_code == IOR || outer_code == XOR)
37577 	*total = 0;
37578       else
37579 	*total = COSTS_N_INSNS (1);
37580       return false;
37581 
37582     case AND:
37583       if (CONST_INT_P (XEXP (x, 1)))
37584 	{
37585 	  rtx left = XEXP (x, 0);
37586 	  rtx_code left_code = GET_CODE (left);
37587 
37588 	  /* rotate-and-mask: 1 insn.  */
37589 	  if ((left_code == ROTATE
37590 	       || left_code == ASHIFT
37591 	       || left_code == LSHIFTRT)
37592 	      && rs6000_is_valid_shift_mask (XEXP (x, 1), left, mode))
37593 	    {
37594 	      *total = rtx_cost (XEXP (left, 0), mode, left_code, 0, speed);
37595 	      if (!CONST_INT_P (XEXP (left, 1)))
37596 		*total += rtx_cost (XEXP (left, 1), SImode, left_code, 1, speed);
37597 	      *total += COSTS_N_INSNS (1);
37598 	      return true;
37599 	    }
37600 
37601 	  /* rotate-and-mask (no rotate), andi., andis.: 1 insn.  */
37602 	  HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
37603 	  if (rs6000_is_valid_and_mask (XEXP (x, 1), mode)
37604 	      || (val & 0xffff) == val
37605 	      || (val & 0xffff0000) == val
37606 	      || ((val & 0xffff) == 0 && mode == SImode))
37607 	    {
37608 	      *total = rtx_cost (left, mode, AND, 0, speed);
37609 	      *total += COSTS_N_INSNS (1);
37610 	      return true;
37611 	    }
37612 
37613 	  /* 2 insns.  */
37614 	  if (rs6000_is_valid_2insn_and (XEXP (x, 1), mode))
37615 	    {
37616 	      *total = rtx_cost (left, mode, AND, 0, speed);
37617 	      *total += COSTS_N_INSNS (2);
37618 	      return true;
37619 	    }
37620 	}
37621 
37622       *total = COSTS_N_INSNS (1);
37623       return false;
37624 
37625     case IOR:
37626       /* FIXME */
37627       *total = COSTS_N_INSNS (1);
37628       return true;
37629 
37630     case CLZ:
37631     case XOR:
37632     case ZERO_EXTRACT:
37633       *total = COSTS_N_INSNS (1);
37634       return false;
37635 
37636     case ASHIFT:
37637       /* The EXTSWSLI instruction is a combined instruction.  Don't count both
37638 	 the sign extend and shift separately within the insn.  */
37639       if (TARGET_EXTSWSLI && mode == DImode
37640 	  && GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
37641 	  && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode)
37642 	{
37643 	  *total = 0;
37644 	  return false;
37645 	}
37646       /* fall through */
37647 
37648     case ASHIFTRT:
37649     case LSHIFTRT:
37650     case ROTATE:
37651     case ROTATERT:
37652       /* Handle mul_highpart.  */
37653       if (outer_code == TRUNCATE
37654 	  && GET_CODE (XEXP (x, 0)) == MULT)
37655 	{
37656 	  if (mode == DImode)
37657 	    *total = rs6000_cost->muldi;
37658 	  else
37659 	    *total = rs6000_cost->mulsi;
37660 	  return true;
37661 	}
37662       else if (outer_code == AND)
37663 	*total = 0;
37664       else
37665 	*total = COSTS_N_INSNS (1);
37666       return false;
37667 
37668     case SIGN_EXTEND:
37669     case ZERO_EXTEND:
37670       if (GET_CODE (XEXP (x, 0)) == MEM)
37671 	*total = 0;
37672       else
37673 	*total = COSTS_N_INSNS (1);
37674       return false;
37675 
37676     case COMPARE:
37677     case NEG:
37678     case ABS:
37679       if (!FLOAT_MODE_P (mode))
37680 	{
37681 	  *total = COSTS_N_INSNS (1);
37682 	  return false;
37683 	}
37684       /* FALLTHRU */
37685 
37686     case FLOAT:
37687     case UNSIGNED_FLOAT:
37688     case FIX:
37689     case UNSIGNED_FIX:
37690     case FLOAT_TRUNCATE:
37691       *total = rs6000_cost->fp;
37692       return false;
37693 
37694     case FLOAT_EXTEND:
37695       if (mode == DFmode)
37696 	*total = rs6000_cost->sfdf_convert;
37697       else
37698 	*total = rs6000_cost->fp;
37699       return false;
37700 
37701     case UNSPEC:
37702       switch (XINT (x, 1))
37703 	{
37704 	case UNSPEC_FRSP:
37705 	  *total = rs6000_cost->fp;
37706 	  return true;
37707 
37708 	default:
37709 	  break;
37710 	}
37711       break;
37712 
37713     case CALL:
37714     case IF_THEN_ELSE:
37715       if (!speed)
37716 	{
37717 	  *total = COSTS_N_INSNS (1);
37718 	  return true;
37719 	}
37720       else if (FLOAT_MODE_P (mode)
37721 	       && TARGET_PPC_GFXOPT && TARGET_HARD_FLOAT && TARGET_FPRS)
37722 	{
37723 	  *total = rs6000_cost->fp;
37724 	  return false;
37725 	}
37726       break;
37727 
37728     case NE:
37729     case EQ:
37730     case GTU:
37731     case LTU:
37732       /* Carry bit requires mode == Pmode.
37733 	 NEG or PLUS already counted so only add one.  */
37734       if (mode == Pmode
37735 	  && (outer_code == NEG || outer_code == PLUS))
37736 	{
37737 	  *total = COSTS_N_INSNS (1);
37738 	  return true;
37739 	}
37740       if (outer_code == SET)
37741 	{
37742 	  if (XEXP (x, 1) == const0_rtx)
37743 	    {
37744 	      if (TARGET_ISEL && !TARGET_MFCRF)
37745 		*total = COSTS_N_INSNS (8);
37746 	      else
37747 		*total = COSTS_N_INSNS (2);
37748 	      return true;
37749 	    }
37750 	  else
37751 	    {
37752 	      *total = COSTS_N_INSNS (3);
37753 	      return false;
37754 	    }
37755 	}
37756       /* FALLTHRU */
37757 
37758     case GT:
37759     case LT:
37760     case UNORDERED:
37761       if (outer_code == SET && (XEXP (x, 1) == const0_rtx))
37762 	{
37763 	  if (TARGET_ISEL && !TARGET_MFCRF)
37764 	    *total = COSTS_N_INSNS (8);
37765 	  else
37766 	    *total = COSTS_N_INSNS (2);
37767 	  return true;
37768 	}
37769       /* CC COMPARE.  */
37770       if (outer_code == COMPARE)
37771 	{
37772 	  *total = 0;
37773 	  return true;
37774 	}
37775       break;
37776 
37777     default:
37778       break;
37779     }
37780 
37781   return false;
37782 }
37783 
37784 /* Debug form of r6000_rtx_costs that is selected if -mdebug=cost.  */
37785 
37786 static bool
rs6000_debug_rtx_costs(rtx x,machine_mode mode,int outer_code,int opno,int * total,bool speed)37787 rs6000_debug_rtx_costs (rtx x, machine_mode mode, int outer_code,
37788 			int opno, int *total, bool speed)
37789 {
37790   bool ret = rs6000_rtx_costs (x, mode, outer_code, opno, total, speed);
37791 
37792   fprintf (stderr,
37793 	   "\nrs6000_rtx_costs, return = %s, mode = %s, outer_code = %s, "
37794 	   "opno = %d, total = %d, speed = %s, x:\n",
37795 	   ret ? "complete" : "scan inner",
37796 	   GET_MODE_NAME (mode),
37797 	   GET_RTX_NAME (outer_code),
37798 	   opno,
37799 	   *total,
37800 	   speed ? "true" : "false");
37801 
37802   debug_rtx (x);
37803 
37804   return ret;
37805 }
37806 
37807 /* Debug form of ADDRESS_COST that is selected if -mdebug=cost.  */
37808 
37809 static int
rs6000_debug_address_cost(rtx x,machine_mode mode,addr_space_t as,bool speed)37810 rs6000_debug_address_cost (rtx x, machine_mode mode,
37811 			   addr_space_t as, bool speed)
37812 {
37813   int ret = TARGET_ADDRESS_COST (x, mode, as, speed);
37814 
37815   fprintf (stderr, "\nrs6000_address_cost, return = %d, speed = %s, x:\n",
37816 	   ret, speed ? "true" : "false");
37817   debug_rtx (x);
37818 
37819   return ret;
37820 }
37821 
37822 
37823 /* A C expression returning the cost of moving data from a register of class
37824    CLASS1 to one of CLASS2.  */
37825 
37826 static int
rs6000_register_move_cost(machine_mode mode,reg_class_t from,reg_class_t to)37827 rs6000_register_move_cost (machine_mode mode,
37828 			   reg_class_t from, reg_class_t to)
37829 {
37830   int ret;
37831 
37832   if (TARGET_DEBUG_COST)
37833     dbg_cost_ctrl++;
37834 
37835   /*  Moves from/to GENERAL_REGS.  */
37836   if (reg_classes_intersect_p (to, GENERAL_REGS)
37837       || reg_classes_intersect_p (from, GENERAL_REGS))
37838     {
37839       reg_class_t rclass = from;
37840 
37841       if (! reg_classes_intersect_p (to, GENERAL_REGS))
37842 	rclass = to;
37843 
37844       if (rclass == FLOAT_REGS || rclass == ALTIVEC_REGS || rclass == VSX_REGS)
37845 	ret = (rs6000_memory_move_cost (mode, rclass, false)
37846 	       + rs6000_memory_move_cost (mode, GENERAL_REGS, false));
37847 
37848       /* It's more expensive to move CR_REGS than CR0_REGS because of the
37849 	 shift.  */
37850       else if (rclass == CR_REGS)
37851 	ret = 4;
37852 
37853       /* For those processors that have slow LR/CTR moves, make them more
37854          expensive than memory in order to bias spills to memory .*/
37855       else if ((rs6000_cpu == PROCESSOR_POWER6
37856 		|| rs6000_cpu == PROCESSOR_POWER7
37857 		|| rs6000_cpu == PROCESSOR_POWER8
37858 		|| rs6000_cpu == PROCESSOR_POWER9)
37859 	       && reg_classes_intersect_p (rclass, LINK_OR_CTR_REGS))
37860         ret = 6 * hard_regno_nregs (0, mode);
37861 
37862       else
37863 	/* A move will cost one instruction per GPR moved.  */
37864 	ret = 2 * hard_regno_nregs (0, mode);
37865     }
37866 
37867   /* If we have VSX, we can easily move between FPR or Altivec registers.  */
37868   else if (VECTOR_MEM_VSX_P (mode)
37869 	   && reg_classes_intersect_p (to, VSX_REGS)
37870 	   && reg_classes_intersect_p (from, VSX_REGS))
37871     ret = 2 * hard_regno_nregs (FIRST_FPR_REGNO, mode);
37872 
37873   /* Moving between two similar registers is just one instruction.  */
37874   else if (reg_classes_intersect_p (to, from))
37875     ret = (FLOAT128_2REG_P (mode)) ? 4 : 2;
37876 
37877   /* Everything else has to go through GENERAL_REGS.  */
37878   else
37879     ret = (rs6000_register_move_cost (mode, GENERAL_REGS, to)
37880 	   + rs6000_register_move_cost (mode, from, GENERAL_REGS));
37881 
37882   if (TARGET_DEBUG_COST)
37883     {
37884       if (dbg_cost_ctrl == 1)
37885 	fprintf (stderr,
37886 		 "rs6000_register_move_cost:, ret=%d, mode=%s, from=%s, to=%s\n",
37887 		 ret, GET_MODE_NAME (mode), reg_class_names[from],
37888 		 reg_class_names[to]);
37889       dbg_cost_ctrl--;
37890     }
37891 
37892   return ret;
37893 }
37894 
37895 /* A C expressions returning the cost of moving data of MODE from a register to
37896    or from memory.  */
37897 
37898 static int
rs6000_memory_move_cost(machine_mode mode,reg_class_t rclass,bool in ATTRIBUTE_UNUSED)37899 rs6000_memory_move_cost (machine_mode mode, reg_class_t rclass,
37900 			 bool in ATTRIBUTE_UNUSED)
37901 {
37902   int ret;
37903 
37904   if (TARGET_DEBUG_COST)
37905     dbg_cost_ctrl++;
37906 
37907   if (reg_classes_intersect_p (rclass, GENERAL_REGS))
37908     ret = 4 * hard_regno_nregs (0, mode);
37909   else if ((reg_classes_intersect_p (rclass, FLOAT_REGS)
37910 	    || reg_classes_intersect_p (rclass, VSX_REGS)))
37911     ret = 4 * hard_regno_nregs (32, mode);
37912   else if (reg_classes_intersect_p (rclass, ALTIVEC_REGS))
37913     ret = 4 * hard_regno_nregs (FIRST_ALTIVEC_REGNO, mode);
37914   else
37915     ret = 4 + rs6000_register_move_cost (mode, rclass, GENERAL_REGS);
37916 
37917   if (TARGET_DEBUG_COST)
37918     {
37919       if (dbg_cost_ctrl == 1)
37920 	fprintf (stderr,
37921 		 "rs6000_memory_move_cost: ret=%d, mode=%s, rclass=%s, in=%d\n",
37922 		 ret, GET_MODE_NAME (mode), reg_class_names[rclass], in);
37923       dbg_cost_ctrl--;
37924     }
37925 
37926   return ret;
37927 }
37928 
37929 /* Returns a code for a target-specific builtin that implements
37930    reciprocal of the function, or NULL_TREE if not available.  */
37931 
37932 static tree
rs6000_builtin_reciprocal(tree fndecl)37933 rs6000_builtin_reciprocal (tree fndecl)
37934 {
37935   switch (DECL_FUNCTION_CODE (fndecl))
37936     {
37937     case VSX_BUILTIN_XVSQRTDP:
37938       if (!RS6000_RECIP_AUTO_RSQRTE_P (V2DFmode))
37939 	return NULL_TREE;
37940 
37941       return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_2DF];
37942 
37943     case VSX_BUILTIN_XVSQRTSP:
37944       if (!RS6000_RECIP_AUTO_RSQRTE_P (V4SFmode))
37945 	return NULL_TREE;
37946 
37947       return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_4SF];
37948 
37949     default:
37950       return NULL_TREE;
37951     }
37952 }
37953 
37954 /* Load up a constant.  If the mode is a vector mode, splat the value across
37955    all of the vector elements.  */
37956 
37957 static rtx
rs6000_load_constant_and_splat(machine_mode mode,REAL_VALUE_TYPE dconst)37958 rs6000_load_constant_and_splat (machine_mode mode, REAL_VALUE_TYPE dconst)
37959 {
37960   rtx reg;
37961 
37962   if (mode == SFmode || mode == DFmode)
37963     {
37964       rtx d = const_double_from_real_value (dconst, mode);
37965       reg = force_reg (mode, d);
37966     }
37967   else if (mode == V4SFmode)
37968     {
37969       rtx d = const_double_from_real_value (dconst, SFmode);
37970       rtvec v = gen_rtvec (4, d, d, d, d);
37971       reg = gen_reg_rtx (mode);
37972       rs6000_expand_vector_init (reg, gen_rtx_PARALLEL (mode, v));
37973     }
37974   else if (mode == V2DFmode)
37975     {
37976       rtx d = const_double_from_real_value (dconst, DFmode);
37977       rtvec v = gen_rtvec (2, d, d);
37978       reg = gen_reg_rtx (mode);
37979       rs6000_expand_vector_init (reg, gen_rtx_PARALLEL (mode, v));
37980     }
37981   else
37982     gcc_unreachable ();
37983 
37984   return reg;
37985 }
37986 
37987 /* Generate an FMA instruction.  */
37988 
37989 static void
rs6000_emit_madd(rtx target,rtx m1,rtx m2,rtx a)37990 rs6000_emit_madd (rtx target, rtx m1, rtx m2, rtx a)
37991 {
37992   machine_mode mode = GET_MODE (target);
37993   rtx dst;
37994 
37995   dst = expand_ternary_op (mode, fma_optab, m1, m2, a, target, 0);
37996   gcc_assert (dst != NULL);
37997 
37998   if (dst != target)
37999     emit_move_insn (target, dst);
38000 }
38001 
38002 /* Generate a FNMSUB instruction: dst = -fma(m1, m2, -a).  */
38003 
38004 static void
rs6000_emit_nmsub(rtx dst,rtx m1,rtx m2,rtx a)38005 rs6000_emit_nmsub (rtx dst, rtx m1, rtx m2, rtx a)
38006 {
38007   machine_mode mode = GET_MODE (dst);
38008   rtx r;
38009 
38010   /* This is a tad more complicated, since the fnma_optab is for
38011      a different expression: fma(-m1, m2, a), which is the same
38012      thing except in the case of signed zeros.
38013 
38014      Fortunately we know that if FMA is supported that FNMSUB is
38015      also supported in the ISA.  Just expand it directly.  */
38016 
38017   gcc_assert (optab_handler (fma_optab, mode) != CODE_FOR_nothing);
38018 
38019   r = gen_rtx_NEG (mode, a);
38020   r = gen_rtx_FMA (mode, m1, m2, r);
38021   r = gen_rtx_NEG (mode, r);
38022   emit_insn (gen_rtx_SET (dst, r));
38023 }
38024 
38025 /* Newton-Raphson approximation of floating point divide DST = N/D.  If NOTE_P,
38026    add a reg_note saying that this was a division.  Support both scalar and
38027    vector divide.  Assumes no trapping math and finite arguments.  */
38028 
38029 void
rs6000_emit_swdiv(rtx dst,rtx n,rtx d,bool note_p)38030 rs6000_emit_swdiv (rtx dst, rtx n, rtx d, bool note_p)
38031 {
38032   machine_mode mode = GET_MODE (dst);
38033   rtx one, x0, e0, x1, xprev, eprev, xnext, enext, u, v;
38034   int i;
38035 
38036   /* Low precision estimates guarantee 5 bits of accuracy.  High
38037      precision estimates guarantee 14 bits of accuracy.  SFmode
38038      requires 23 bits of accuracy.  DFmode requires 52 bits of
38039      accuracy.  Each pass at least doubles the accuracy, leading
38040      to the following.  */
38041   int passes = (TARGET_RECIP_PRECISION) ? 1 : 3;
38042   if (mode == DFmode || mode == V2DFmode)
38043     passes++;
38044 
38045   enum insn_code code = optab_handler (smul_optab, mode);
38046   insn_gen_fn gen_mul = GEN_FCN (code);
38047 
38048   gcc_assert (code != CODE_FOR_nothing);
38049 
38050   one = rs6000_load_constant_and_splat (mode, dconst1);
38051 
38052   /* x0 = 1./d estimate */
38053   x0 = gen_reg_rtx (mode);
38054   emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, d),
38055 					      UNSPEC_FRES)));
38056 
38057   /* Each iteration but the last calculates x_(i+1) = x_i * (2 - d * x_i).  */
38058   if (passes > 1) {
38059 
38060     /* e0 = 1. - d * x0  */
38061     e0 = gen_reg_rtx (mode);
38062     rs6000_emit_nmsub (e0, d, x0, one);
38063 
38064     /* x1 = x0 + e0 * x0  */
38065     x1 = gen_reg_rtx (mode);
38066     rs6000_emit_madd (x1, e0, x0, x0);
38067 
38068     for (i = 0, xprev = x1, eprev = e0; i < passes - 2;
38069 	 ++i, xprev = xnext, eprev = enext) {
38070 
38071       /* enext = eprev * eprev  */
38072       enext = gen_reg_rtx (mode);
38073       emit_insn (gen_mul (enext, eprev, eprev));
38074 
38075       /* xnext = xprev + enext * xprev  */
38076       xnext = gen_reg_rtx (mode);
38077       rs6000_emit_madd (xnext, enext, xprev, xprev);
38078     }
38079 
38080   } else
38081     xprev = x0;
38082 
38083   /* The last iteration calculates x_(i+1) = n * x_i * (2 - d * x_i).  */
38084 
38085   /* u = n * xprev  */
38086   u = gen_reg_rtx (mode);
38087   emit_insn (gen_mul (u, n, xprev));
38088 
38089   /* v = n - (d * u)  */
38090   v = gen_reg_rtx (mode);
38091   rs6000_emit_nmsub (v, d, u, n);
38092 
38093   /* dst = (v * xprev) + u  */
38094   rs6000_emit_madd (dst, v, xprev, u);
38095 
38096   if (note_p)
38097     add_reg_note (get_last_insn (), REG_EQUAL, gen_rtx_DIV (mode, n, d));
38098 }
38099 
38100 /* Goldschmidt's Algorithm for single/double-precision floating point
38101    sqrt and rsqrt.  Assumes no trapping math and finite arguments.  */
38102 
38103 void
rs6000_emit_swsqrt(rtx dst,rtx src,bool recip)38104 rs6000_emit_swsqrt (rtx dst, rtx src, bool recip)
38105 {
38106   machine_mode mode = GET_MODE (src);
38107   rtx e = gen_reg_rtx (mode);
38108   rtx g = gen_reg_rtx (mode);
38109   rtx h = gen_reg_rtx (mode);
38110 
38111   /* Low precision estimates guarantee 5 bits of accuracy.  High
38112      precision estimates guarantee 14 bits of accuracy.  SFmode
38113      requires 23 bits of accuracy.  DFmode requires 52 bits of
38114      accuracy.  Each pass at least doubles the accuracy, leading
38115      to the following.  */
38116   int passes = (TARGET_RECIP_PRECISION) ? 1 : 3;
38117   if (mode == DFmode || mode == V2DFmode)
38118     passes++;
38119 
38120   int i;
38121   rtx mhalf;
38122   enum insn_code code = optab_handler (smul_optab, mode);
38123   insn_gen_fn gen_mul = GEN_FCN (code);
38124 
38125   gcc_assert (code != CODE_FOR_nothing);
38126 
38127   mhalf = rs6000_load_constant_and_splat (mode, dconsthalf);
38128 
38129   /* e = rsqrt estimate */
38130   emit_insn (gen_rtx_SET (e, gen_rtx_UNSPEC (mode, gen_rtvec (1, src),
38131 					     UNSPEC_RSQRT)));
38132 
38133   /* If (src == 0.0) filter infinity to prevent NaN for sqrt(0.0).  */
38134   if (!recip)
38135     {
38136       rtx zero = force_reg (mode, CONST0_RTX (mode));
38137 
38138       if (mode == SFmode)
38139 	{
38140 	  rtx target = emit_conditional_move (e, GT, src, zero, mode,
38141 					      e, zero, mode, 0);
38142 	  if (target != e)
38143 	    emit_move_insn (e, target);
38144 	}
38145       else
38146 	{
38147 	  rtx cond = gen_rtx_GT (VOIDmode, e, zero);
38148 	  rs6000_emit_vector_cond_expr (e, e, zero, cond, src, zero);
38149 	}
38150     }
38151 
38152   /* g = sqrt estimate.  */
38153   emit_insn (gen_mul (g, e, src));
38154   /* h = 1/(2*sqrt) estimate.  */
38155   emit_insn (gen_mul (h, e, mhalf));
38156 
38157   if (recip)
38158     {
38159       if (passes == 1)
38160 	{
38161 	  rtx t = gen_reg_rtx (mode);
38162 	  rs6000_emit_nmsub (t, g, h, mhalf);
38163 	  /* Apply correction directly to 1/rsqrt estimate.  */
38164 	  rs6000_emit_madd (dst, e, t, e);
38165 	}
38166       else
38167 	{
38168 	  for (i = 0; i < passes; i++)
38169 	    {
38170 	      rtx t1 = gen_reg_rtx (mode);
38171 	      rtx g1 = gen_reg_rtx (mode);
38172 	      rtx h1 = gen_reg_rtx (mode);
38173 
38174 	      rs6000_emit_nmsub (t1, g, h, mhalf);
38175 	      rs6000_emit_madd (g1, g, t1, g);
38176 	      rs6000_emit_madd (h1, h, t1, h);
38177 
38178 	      g = g1;
38179 	      h = h1;
38180 	    }
38181 	  /* Multiply by 2 for 1/rsqrt.  */
38182 	  emit_insn (gen_add3_insn (dst, h, h));
38183 	}
38184     }
38185   else
38186     {
38187       rtx t = gen_reg_rtx (mode);
38188       rs6000_emit_nmsub (t, g, h, mhalf);
38189       rs6000_emit_madd (dst, g, t, g);
38190     }
38191 
38192   return;
38193 }
38194 
38195 /* Emit popcount intrinsic on TARGET_POPCNTB (Power5) and TARGET_POPCNTD
38196    (Power7) targets.  DST is the target, and SRC is the argument operand.  */
38197 
38198 void
rs6000_emit_popcount(rtx dst,rtx src)38199 rs6000_emit_popcount (rtx dst, rtx src)
38200 {
38201   machine_mode mode = GET_MODE (dst);
38202   rtx tmp1, tmp2;
38203 
38204   /* Use the PPC ISA 2.06 popcnt{w,d} instruction if we can.  */
38205   if (TARGET_POPCNTD)
38206     {
38207       if (mode == SImode)
38208 	emit_insn (gen_popcntdsi2 (dst, src));
38209       else
38210 	emit_insn (gen_popcntddi2 (dst, src));
38211       return;
38212     }
38213 
38214   tmp1 = gen_reg_rtx (mode);
38215 
38216   if (mode == SImode)
38217     {
38218       emit_insn (gen_popcntbsi2 (tmp1, src));
38219       tmp2 = expand_mult (SImode, tmp1, GEN_INT (0x01010101),
38220 			   NULL_RTX, 0);
38221       tmp2 = force_reg (SImode, tmp2);
38222       emit_insn (gen_lshrsi3 (dst, tmp2, GEN_INT (24)));
38223     }
38224   else
38225     {
38226       emit_insn (gen_popcntbdi2 (tmp1, src));
38227       tmp2 = expand_mult (DImode, tmp1,
38228 			  GEN_INT ((HOST_WIDE_INT)
38229 				   0x01010101 << 32 | 0x01010101),
38230 			  NULL_RTX, 0);
38231       tmp2 = force_reg (DImode, tmp2);
38232       emit_insn (gen_lshrdi3 (dst, tmp2, GEN_INT (56)));
38233     }
38234 }
38235 
38236 
38237 /* Emit parity intrinsic on TARGET_POPCNTB targets.  DST is the
38238    target, and SRC is the argument operand.  */
38239 
38240 void
rs6000_emit_parity(rtx dst,rtx src)38241 rs6000_emit_parity (rtx dst, rtx src)
38242 {
38243   machine_mode mode = GET_MODE (dst);
38244   rtx tmp;
38245 
38246   tmp = gen_reg_rtx (mode);
38247 
38248   /* Use the PPC ISA 2.05 prtyw/prtyd instruction if we can.  */
38249   if (TARGET_CMPB)
38250     {
38251       if (mode == SImode)
38252 	{
38253 	  emit_insn (gen_popcntbsi2 (tmp, src));
38254 	  emit_insn (gen_paritysi2_cmpb (dst, tmp));
38255 	}
38256       else
38257 	{
38258 	  emit_insn (gen_popcntbdi2 (tmp, src));
38259 	  emit_insn (gen_paritydi2_cmpb (dst, tmp));
38260 	}
38261       return;
38262     }
38263 
38264   if (mode == SImode)
38265     {
38266       /* Is mult+shift >= shift+xor+shift+xor?  */
38267       if (rs6000_cost->mulsi_const >= COSTS_N_INSNS (3))
38268 	{
38269 	  rtx tmp1, tmp2, tmp3, tmp4;
38270 
38271 	  tmp1 = gen_reg_rtx (SImode);
38272 	  emit_insn (gen_popcntbsi2 (tmp1, src));
38273 
38274 	  tmp2 = gen_reg_rtx (SImode);
38275 	  emit_insn (gen_lshrsi3 (tmp2, tmp1, GEN_INT (16)));
38276 	  tmp3 = gen_reg_rtx (SImode);
38277 	  emit_insn (gen_xorsi3 (tmp3, tmp1, tmp2));
38278 
38279 	  tmp4 = gen_reg_rtx (SImode);
38280 	  emit_insn (gen_lshrsi3 (tmp4, tmp3, GEN_INT (8)));
38281 	  emit_insn (gen_xorsi3 (tmp, tmp3, tmp4));
38282 	}
38283       else
38284 	rs6000_emit_popcount (tmp, src);
38285       emit_insn (gen_andsi3 (dst, tmp, const1_rtx));
38286     }
38287   else
38288     {
38289       /* Is mult+shift >= shift+xor+shift+xor+shift+xor?  */
38290       if (rs6000_cost->muldi >= COSTS_N_INSNS (5))
38291 	{
38292 	  rtx tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
38293 
38294 	  tmp1 = gen_reg_rtx (DImode);
38295 	  emit_insn (gen_popcntbdi2 (tmp1, src));
38296 
38297 	  tmp2 = gen_reg_rtx (DImode);
38298 	  emit_insn (gen_lshrdi3 (tmp2, tmp1, GEN_INT (32)));
38299 	  tmp3 = gen_reg_rtx (DImode);
38300 	  emit_insn (gen_xordi3 (tmp3, tmp1, tmp2));
38301 
38302 	  tmp4 = gen_reg_rtx (DImode);
38303 	  emit_insn (gen_lshrdi3 (tmp4, tmp3, GEN_INT (16)));
38304 	  tmp5 = gen_reg_rtx (DImode);
38305 	  emit_insn (gen_xordi3 (tmp5, tmp3, tmp4));
38306 
38307 	  tmp6 = gen_reg_rtx (DImode);
38308 	  emit_insn (gen_lshrdi3 (tmp6, tmp5, GEN_INT (8)));
38309 	  emit_insn (gen_xordi3 (tmp, tmp5, tmp6));
38310 	}
38311       else
38312         rs6000_emit_popcount (tmp, src);
38313       emit_insn (gen_anddi3 (dst, tmp, const1_rtx));
38314     }
38315 }
38316 
38317 /* Expand an Altivec constant permutation for little endian mode.
38318    OP0 and OP1 are the input vectors and TARGET is the output vector.
38319    SEL specifies the constant permutation vector.
38320 
38321    There are two issues: First, the two input operands must be
38322    swapped so that together they form a double-wide array in LE
38323    order.  Second, the vperm instruction has surprising behavior
38324    in LE mode:  it interprets the elements of the source vectors
38325    in BE mode ("left to right") and interprets the elements of
38326    the destination vector in LE mode ("right to left").  To
38327    correct for this, we must subtract each element of the permute
38328    control vector from 31.
38329 
38330    For example, suppose we want to concatenate vr10 = {0, 1, 2, 3}
38331    with vr11 = {4, 5, 6, 7} and extract {0, 2, 4, 6} using a vperm.
38332    We place {0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27} in vr12 to
38333    serve as the permute control vector.  Then, in BE mode,
38334 
38335      vperm 9,10,11,12
38336 
38337    places the desired result in vr9.  However, in LE mode the
38338    vector contents will be
38339 
38340      vr10 = 00000003 00000002 00000001 00000000
38341      vr11 = 00000007 00000006 00000005 00000004
38342 
38343    The result of the vperm using the same permute control vector is
38344 
38345      vr9  = 05000000 07000000 01000000 03000000
38346 
38347    That is, the leftmost 4 bytes of vr10 are interpreted as the
38348    source for the rightmost 4 bytes of vr9, and so on.
38349 
38350    If we change the permute control vector to
38351 
38352      vr12 = {31,20,29,28,23,22,21,20,15,14,13,12,7,6,5,4}
38353 
38354    and issue
38355 
38356      vperm 9,11,10,12
38357 
38358    we get the desired
38359 
38360    vr9  = 00000006 00000004 00000002 00000000.  */
38361 
38362 static void
altivec_expand_vec_perm_const_le(rtx target,rtx op0,rtx op1,const vec_perm_indices & sel)38363 altivec_expand_vec_perm_const_le (rtx target, rtx op0, rtx op1,
38364 				  const vec_perm_indices &sel)
38365 {
38366   unsigned int i;
38367   rtx perm[16];
38368   rtx constv, unspec;
38369 
38370   /* Unpack and adjust the constant selector.  */
38371   for (i = 0; i < 16; ++i)
38372     {
38373       unsigned int elt = 31 - (sel[i] & 31);
38374       perm[i] = GEN_INT (elt);
38375     }
38376 
38377   /* Expand to a permute, swapping the inputs and using the
38378      adjusted selector.  */
38379   if (!REG_P (op0))
38380     op0 = force_reg (V16QImode, op0);
38381   if (!REG_P (op1))
38382     op1 = force_reg (V16QImode, op1);
38383 
38384   constv = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm));
38385   constv = force_reg (V16QImode, constv);
38386   unspec = gen_rtx_UNSPEC (V16QImode, gen_rtvec (3, op1, op0, constv),
38387 			   UNSPEC_VPERM);
38388   if (!REG_P (target))
38389     {
38390       rtx tmp = gen_reg_rtx (V16QImode);
38391       emit_move_insn (tmp, unspec);
38392       unspec = tmp;
38393     }
38394 
38395   emit_move_insn (target, unspec);
38396 }
38397 
38398 /* Similarly to altivec_expand_vec_perm_const_le, we must adjust the
38399    permute control vector.  But here it's not a constant, so we must
38400    generate a vector NAND or NOR to do the adjustment.  */
38401 
38402 void
altivec_expand_vec_perm_le(rtx operands[4])38403 altivec_expand_vec_perm_le (rtx operands[4])
38404 {
38405   rtx notx, iorx, unspec;
38406   rtx target = operands[0];
38407   rtx op0 = operands[1];
38408   rtx op1 = operands[2];
38409   rtx sel = operands[3];
38410   rtx tmp = target;
38411   rtx norreg = gen_reg_rtx (V16QImode);
38412   machine_mode mode = GET_MODE (target);
38413 
38414   /* Get everything in regs so the pattern matches.  */
38415   if (!REG_P (op0))
38416     op0 = force_reg (mode, op0);
38417   if (!REG_P (op1))
38418     op1 = force_reg (mode, op1);
38419   if (!REG_P (sel))
38420     sel = force_reg (V16QImode, sel);
38421   if (!REG_P (target))
38422     tmp = gen_reg_rtx (mode);
38423 
38424   if (TARGET_P9_VECTOR)
38425     {
38426       unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op0, op1, sel),
38427 			       UNSPEC_VPERMR);
38428     }
38429   else
38430     {
38431       /* Invert the selector with a VNAND if available, else a VNOR.
38432 	 The VNAND is preferred for future fusion opportunities.  */
38433       notx = gen_rtx_NOT (V16QImode, sel);
38434       iorx = (TARGET_P8_VECTOR
38435 	      ? gen_rtx_IOR (V16QImode, notx, notx)
38436 	      : gen_rtx_AND (V16QImode, notx, notx));
38437       emit_insn (gen_rtx_SET (norreg, iorx));
38438 
38439       /* Permute with operands reversed and adjusted selector.  */
38440       unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op0, norreg),
38441 			       UNSPEC_VPERM);
38442     }
38443 
38444   /* Copy into target, possibly by way of a register.  */
38445   if (!REG_P (target))
38446     {
38447       emit_move_insn (tmp, unspec);
38448       unspec = tmp;
38449     }
38450 
38451   emit_move_insn (target, unspec);
38452 }
38453 
38454 /* Expand an Altivec constant permutation.  Return true if we match
38455    an efficient implementation; false to fall back to VPERM.
38456 
38457    OP0 and OP1 are the input vectors and TARGET is the output vector.
38458    SEL specifies the constant permutation vector.  */
38459 
38460 static bool
altivec_expand_vec_perm_const(rtx target,rtx op0,rtx op1,const vec_perm_indices & sel)38461 altivec_expand_vec_perm_const (rtx target, rtx op0, rtx op1,
38462 			       const vec_perm_indices &sel)
38463 {
38464   struct altivec_perm_insn {
38465     HOST_WIDE_INT mask;
38466     enum insn_code impl;
38467     unsigned char perm[16];
38468   };
38469   static const struct altivec_perm_insn patterns[] = {
38470     { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vpkuhum_direct,
38471       {  1,  3,  5,  7,  9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 } },
38472     { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vpkuwum_direct,
38473       {  2,  3,  6,  7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 } },
38474     { OPTION_MASK_ALTIVEC,
38475       (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghb_direct
38476        : CODE_FOR_altivec_vmrglb_direct),
38477       {  0, 16,  1, 17,  2, 18,  3, 19,  4, 20,  5, 21,  6, 22,  7, 23 } },
38478     { OPTION_MASK_ALTIVEC,
38479       (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghh_direct
38480        : CODE_FOR_altivec_vmrglh_direct),
38481       {  0,  1, 16, 17,  2,  3, 18, 19,  4,  5, 20, 21,  6,  7, 22, 23 } },
38482     { OPTION_MASK_ALTIVEC,
38483       (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghw_direct
38484        : CODE_FOR_altivec_vmrglw_direct),
38485       {  0,  1,  2,  3, 16, 17, 18, 19,  4,  5,  6,  7, 20, 21, 22, 23 } },
38486     { OPTION_MASK_ALTIVEC,
38487       (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglb_direct
38488        : CODE_FOR_altivec_vmrghb_direct),
38489       {  8, 24,  9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 } },
38490     { OPTION_MASK_ALTIVEC,
38491       (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglh_direct
38492        : CODE_FOR_altivec_vmrghh_direct),
38493       {  8,  9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 } },
38494     { OPTION_MASK_ALTIVEC,
38495       (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglw_direct
38496        : CODE_FOR_altivec_vmrghw_direct),
38497       {  8,  9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } },
38498     { OPTION_MASK_P8_VECTOR, CODE_FOR_p8_vmrgew,
38499       {  0,  1,  2,  3, 16, 17, 18, 19,  8,  9, 10, 11, 24, 25, 26, 27 } },
38500     { OPTION_MASK_P8_VECTOR, CODE_FOR_p8_vmrgow,
38501       {  4,  5,  6,  7, 20, 21, 22, 23, 12, 13, 14, 15, 28, 29, 30, 31 } }
38502   };
38503 
38504   unsigned int i, j, elt, which;
38505   unsigned char perm[16];
38506   rtx x;
38507   bool one_vec;
38508 
38509   /* Unpack the constant selector.  */
38510   for (i = which = 0; i < 16; ++i)
38511     {
38512       elt = sel[i] & 31;
38513       which |= (elt < 16 ? 1 : 2);
38514       perm[i] = elt;
38515     }
38516 
38517   /* Simplify the constant selector based on operands.  */
38518   switch (which)
38519     {
38520     default:
38521       gcc_unreachable ();
38522 
38523     case 3:
38524       one_vec = false;
38525       if (!rtx_equal_p (op0, op1))
38526 	break;
38527       /* FALLTHRU */
38528 
38529     case 2:
38530       for (i = 0; i < 16; ++i)
38531 	perm[i] &= 15;
38532       op0 = op1;
38533       one_vec = true;
38534       break;
38535 
38536     case 1:
38537       op1 = op0;
38538       one_vec = true;
38539       break;
38540     }
38541 
38542   /* Look for splat patterns.  */
38543   if (one_vec)
38544     {
38545       elt = perm[0];
38546 
38547       for (i = 0; i < 16; ++i)
38548 	if (perm[i] != elt)
38549 	  break;
38550       if (i == 16)
38551 	{
38552           if (!BYTES_BIG_ENDIAN)
38553             elt = 15 - elt;
38554 	  emit_insn (gen_altivec_vspltb_direct (target, op0, GEN_INT (elt)));
38555 	  return true;
38556 	}
38557 
38558       if (elt % 2 == 0)
38559 	{
38560 	  for (i = 0; i < 16; i += 2)
38561 	    if (perm[i] != elt || perm[i + 1] != elt + 1)
38562 	      break;
38563 	  if (i == 16)
38564 	    {
38565 	      int field = BYTES_BIG_ENDIAN ? elt / 2 : 7 - elt / 2;
38566 	      x = gen_reg_rtx (V8HImode);
38567 	      emit_insn (gen_altivec_vsplth_direct (x, gen_lowpart (V8HImode, op0),
38568 						    GEN_INT (field)));
38569 	      emit_move_insn (target, gen_lowpart (V16QImode, x));
38570 	      return true;
38571 	    }
38572 	}
38573 
38574       if (elt % 4 == 0)
38575 	{
38576 	  for (i = 0; i < 16; i += 4)
38577 	    if (perm[i] != elt
38578 		|| perm[i + 1] != elt + 1
38579 		|| perm[i + 2] != elt + 2
38580 		|| perm[i + 3] != elt + 3)
38581 	      break;
38582 	  if (i == 16)
38583 	    {
38584 	      int field = BYTES_BIG_ENDIAN ? elt / 4 : 3 - elt / 4;
38585 	      x = gen_reg_rtx (V4SImode);
38586 	      emit_insn (gen_altivec_vspltw_direct (x, gen_lowpart (V4SImode, op0),
38587 						    GEN_INT (field)));
38588 	      emit_move_insn (target, gen_lowpart (V16QImode, x));
38589 	      return true;
38590 	    }
38591 	}
38592     }
38593 
38594   /* Look for merge and pack patterns.  */
38595   for (j = 0; j < ARRAY_SIZE (patterns); ++j)
38596     {
38597       bool swapped;
38598 
38599       if ((patterns[j].mask & rs6000_isa_flags) == 0)
38600 	continue;
38601 
38602       elt = patterns[j].perm[0];
38603       if (perm[0] == elt)
38604 	swapped = false;
38605       else if (perm[0] == elt + 16)
38606 	swapped = true;
38607       else
38608 	continue;
38609       for (i = 1; i < 16; ++i)
38610 	{
38611 	  elt = patterns[j].perm[i];
38612 	  if (swapped)
38613 	    elt = (elt >= 16 ? elt - 16 : elt + 16);
38614 	  else if (one_vec && elt >= 16)
38615 	    elt -= 16;
38616 	  if (perm[i] != elt)
38617 	    break;
38618 	}
38619       if (i == 16)
38620 	{
38621 	  enum insn_code icode = patterns[j].impl;
38622 	  machine_mode omode = insn_data[icode].operand[0].mode;
38623 	  machine_mode imode = insn_data[icode].operand[1].mode;
38624 
38625 	  /* For little-endian, don't use vpkuwum and vpkuhum if the
38626 	     underlying vector type is not V4SI and V8HI, respectively.
38627 	     For example, using vpkuwum with a V8HI picks up the even
38628 	     halfwords (BE numbering) when the even halfwords (LE
38629 	     numbering) are what we need.  */
38630 	  if (!BYTES_BIG_ENDIAN
38631 	      && icode == CODE_FOR_altivec_vpkuwum_direct
38632 	      && ((GET_CODE (op0) == REG
38633 		   && GET_MODE (op0) != V4SImode)
38634 		  || (GET_CODE (op0) == SUBREG
38635 		      && GET_MODE (XEXP (op0, 0)) != V4SImode)))
38636 	    continue;
38637 	  if (!BYTES_BIG_ENDIAN
38638 	      && icode == CODE_FOR_altivec_vpkuhum_direct
38639 	      && ((GET_CODE (op0) == REG
38640 		   && GET_MODE (op0) != V8HImode)
38641 		  || (GET_CODE (op0) == SUBREG
38642 		      && GET_MODE (XEXP (op0, 0)) != V8HImode)))
38643 	    continue;
38644 
38645           /* For little-endian, the two input operands must be swapped
38646              (or swapped back) to ensure proper right-to-left numbering
38647              from 0 to 2N-1.  */
38648 	  if (swapped ^ !BYTES_BIG_ENDIAN)
38649 	    std::swap (op0, op1);
38650 	  if (imode != V16QImode)
38651 	    {
38652 	      op0 = gen_lowpart (imode, op0);
38653 	      op1 = gen_lowpart (imode, op1);
38654 	    }
38655 	  if (omode == V16QImode)
38656 	    x = target;
38657 	  else
38658 	    x = gen_reg_rtx (omode);
38659 	  emit_insn (GEN_FCN (icode) (x, op0, op1));
38660 	  if (omode != V16QImode)
38661 	    emit_move_insn (target, gen_lowpart (V16QImode, x));
38662 	  return true;
38663 	}
38664     }
38665 
38666   if (!BYTES_BIG_ENDIAN)
38667     {
38668       altivec_expand_vec_perm_const_le (target, op0, op1, sel);
38669       return true;
38670     }
38671 
38672   return false;
38673 }
38674 
38675 /* Expand a Paired Single, VSX Permute Doubleword, or SPE constant permutation.
38676    Return true if we match an efficient implementation.  */
38677 
38678 static bool
rs6000_expand_vec_perm_const_1(rtx target,rtx op0,rtx op1,unsigned char perm0,unsigned char perm1)38679 rs6000_expand_vec_perm_const_1 (rtx target, rtx op0, rtx op1,
38680 				unsigned char perm0, unsigned char perm1)
38681 {
38682   rtx x;
38683 
38684   /* If both selectors come from the same operand, fold to single op.  */
38685   if ((perm0 & 2) == (perm1 & 2))
38686     {
38687       if (perm0 & 2)
38688 	op0 = op1;
38689       else
38690 	op1 = op0;
38691     }
38692   /* If both operands are equal, fold to simpler permutation.  */
38693   if (rtx_equal_p (op0, op1))
38694     {
38695       perm0 = perm0 & 1;
38696       perm1 = (perm1 & 1) + 2;
38697     }
38698   /* If the first selector comes from the second operand, swap.  */
38699   else if (perm0 & 2)
38700     {
38701       if (perm1 & 2)
38702 	return false;
38703       perm0 -= 2;
38704       perm1 += 2;
38705       std::swap (op0, op1);
38706     }
38707   /* If the second selector does not come from the second operand, fail.  */
38708   else if ((perm1 & 2) == 0)
38709     return false;
38710 
38711   /* Success! */
38712   if (target != NULL)
38713     {
38714       machine_mode vmode, dmode;
38715       rtvec v;
38716 
38717       vmode = GET_MODE (target);
38718       gcc_assert (GET_MODE_NUNITS (vmode) == 2);
38719       dmode = mode_for_vector (GET_MODE_INNER (vmode), 4).require ();
38720       x = gen_rtx_VEC_CONCAT (dmode, op0, op1);
38721       v = gen_rtvec (2, GEN_INT (perm0), GEN_INT (perm1));
38722       x = gen_rtx_VEC_SELECT (vmode, x, gen_rtx_PARALLEL (VOIDmode, v));
38723       emit_insn (gen_rtx_SET (target, x));
38724     }
38725   return true;
38726 }
38727 
38728 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST.  */
38729 
38730 static bool
rs6000_vectorize_vec_perm_const(machine_mode vmode,rtx target,rtx op0,rtx op1,const vec_perm_indices & sel)38731 rs6000_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0,
38732 				 rtx op1, const vec_perm_indices &sel)
38733 {
38734   bool testing_p = !target;
38735 
38736   /* AltiVec (and thus VSX) can handle arbitrary permutations.  */
38737   if (TARGET_ALTIVEC && testing_p)
38738     return true;
38739 
38740   /* Check for ps_merge*, evmerge* or xxperm* insns.  */
38741   if ((vmode == V2SFmode && TARGET_PAIRED_FLOAT)
38742       || (vmode == V2SImode && TARGET_SPE)
38743       || ((vmode == V2DFmode || vmode == V2DImode)
38744 	  && VECTOR_MEM_VSX_P (vmode)))
38745     {
38746       if (testing_p)
38747 	{
38748 	  op0 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 1);
38749 	  op1 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 2);
38750 	}
38751       if (rs6000_expand_vec_perm_const_1 (target, op0, op1, sel[0], sel[1]))
38752 	return true;
38753     }
38754 
38755   if (TARGET_ALTIVEC)
38756     {
38757       /* Force the target-independent code to lower to V16QImode.  */
38758       if (vmode != V16QImode)
38759 	return false;
38760       if (altivec_expand_vec_perm_const (target, op0, op1, sel))
38761 	return true;
38762     }
38763 
38764   return false;
38765 }
38766 
38767 /* A subroutine for rs6000_expand_extract_even & rs6000_expand_interleave.
38768    OP0 and OP1 are the input vectors and TARGET is the output vector.
38769    PERM specifies the constant permutation vector.  */
38770 
38771 static void
rs6000_do_expand_vec_perm(rtx target,rtx op0,rtx op1,machine_mode vmode,const vec_perm_builder & perm)38772 rs6000_do_expand_vec_perm (rtx target, rtx op0, rtx op1,
38773 			   machine_mode vmode, const vec_perm_builder &perm)
38774 {
38775   rtx x = expand_vec_perm_const (vmode, op0, op1, perm, BLKmode, target);
38776   if (x != target)
38777     emit_move_insn (target, x);
38778 }
38779 
38780 /* Expand an extract even operation.  */
38781 
38782 void
rs6000_expand_extract_even(rtx target,rtx op0,rtx op1)38783 rs6000_expand_extract_even (rtx target, rtx op0, rtx op1)
38784 {
38785   machine_mode vmode = GET_MODE (target);
38786   unsigned i, nelt = GET_MODE_NUNITS (vmode);
38787   vec_perm_builder perm (nelt, nelt, 1);
38788 
38789   for (i = 0; i < nelt; i++)
38790     perm.quick_push (i * 2);
38791 
38792   rs6000_do_expand_vec_perm (target, op0, op1, vmode, perm);
38793 }
38794 
38795 /* Expand a vector interleave operation.  */
38796 
38797 void
rs6000_expand_interleave(rtx target,rtx op0,rtx op1,bool highp)38798 rs6000_expand_interleave (rtx target, rtx op0, rtx op1, bool highp)
38799 {
38800   machine_mode vmode = GET_MODE (target);
38801   unsigned i, high, nelt = GET_MODE_NUNITS (vmode);
38802   vec_perm_builder perm (nelt, nelt, 1);
38803 
38804   high = (highp ? 0 : nelt / 2);
38805   for (i = 0; i < nelt / 2; i++)
38806     {
38807       perm.quick_push (i + high);
38808       perm.quick_push (i + nelt + high);
38809     }
38810 
38811   rs6000_do_expand_vec_perm (target, op0, op1, vmode, perm);
38812 }
38813 
38814 /* Scale a V2DF vector SRC by two to the SCALE and place in TGT.  */
38815 void
rs6000_scale_v2df(rtx tgt,rtx src,int scale)38816 rs6000_scale_v2df (rtx tgt, rtx src, int scale)
38817 {
38818   HOST_WIDE_INT hwi_scale (scale);
38819   REAL_VALUE_TYPE r_pow;
38820   rtvec v = rtvec_alloc (2);
38821   rtx elt;
38822   rtx scale_vec = gen_reg_rtx (V2DFmode);
38823   (void)real_powi (&r_pow, DFmode, &dconst2, hwi_scale);
38824   elt = const_double_from_real_value (r_pow, DFmode);
38825   RTVEC_ELT (v, 0) = elt;
38826   RTVEC_ELT (v, 1) = elt;
38827   rs6000_expand_vector_init (scale_vec, gen_rtx_PARALLEL (V2DFmode, v));
38828   emit_insn (gen_mulv2df3 (tgt, src, scale_vec));
38829 }
38830 
38831 /* Return an RTX representing where to find the function value of a
38832    function returning MODE.  */
38833 static rtx
rs6000_complex_function_value(machine_mode mode)38834 rs6000_complex_function_value (machine_mode mode)
38835 {
38836   unsigned int regno;
38837   rtx r1, r2;
38838   machine_mode inner = GET_MODE_INNER (mode);
38839   unsigned int inner_bytes = GET_MODE_UNIT_SIZE (mode);
38840 
38841   if (TARGET_FLOAT128_TYPE
38842       && (mode == KCmode
38843 	  || (mode == TCmode && TARGET_IEEEQUAD)))
38844     regno = ALTIVEC_ARG_RETURN;
38845 
38846   else if (FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT && TARGET_FPRS)
38847     regno = FP_ARG_RETURN;
38848 
38849   else
38850     {
38851       regno = GP_ARG_RETURN;
38852 
38853       /* 32-bit is OK since it'll go in r3/r4.  */
38854       if (TARGET_32BIT && inner_bytes >= 4)
38855 	return gen_rtx_REG (mode, regno);
38856     }
38857 
38858   if (inner_bytes >= 8)
38859     return gen_rtx_REG (mode, regno);
38860 
38861   r1 = gen_rtx_EXPR_LIST (inner, gen_rtx_REG (inner, regno),
38862 			  const0_rtx);
38863   r2 = gen_rtx_EXPR_LIST (inner, gen_rtx_REG (inner, regno + 1),
38864 			  GEN_INT (inner_bytes));
38865   return gen_rtx_PARALLEL (mode, gen_rtvec (2, r1, r2));
38866 }
38867 
38868 /* Return an rtx describing a return value of MODE as a PARALLEL
38869    in N_ELTS registers, each of mode ELT_MODE, starting at REGNO,
38870    stride REG_STRIDE.  */
38871 
38872 static rtx
rs6000_parallel_return(machine_mode mode,int n_elts,machine_mode elt_mode,unsigned int regno,unsigned int reg_stride)38873 rs6000_parallel_return (machine_mode mode,
38874 			int n_elts, machine_mode elt_mode,
38875 			unsigned int regno, unsigned int reg_stride)
38876 {
38877   rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts));
38878 
38879   int i;
38880   for (i = 0; i < n_elts; i++)
38881     {
38882       rtx r = gen_rtx_REG (elt_mode, regno);
38883       rtx off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
38884       XVECEXP (par, 0, i) = gen_rtx_EXPR_LIST (VOIDmode, r, off);
38885       regno += reg_stride;
38886     }
38887 
38888   return par;
38889 }
38890 
38891 /* Target hook for TARGET_FUNCTION_VALUE.
38892 
38893    On the SPE, both FPs and vectors are returned in r3.
38894 
38895    On RS/6000 an integer value is in r3 and a floating-point value is in
38896    fp1, unless -msoft-float.  */
38897 
38898 static rtx
rs6000_function_value(const_tree valtype,const_tree fn_decl_or_type ATTRIBUTE_UNUSED,bool outgoing ATTRIBUTE_UNUSED)38899 rs6000_function_value (const_tree valtype,
38900 		       const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
38901 		       bool outgoing ATTRIBUTE_UNUSED)
38902 {
38903   machine_mode mode;
38904   unsigned int regno;
38905   machine_mode elt_mode;
38906   int n_elts;
38907 
38908   /* Special handling for structs in darwin64.  */
38909   if (TARGET_MACHO
38910       && rs6000_darwin64_struct_check_p (TYPE_MODE (valtype), valtype))
38911     {
38912       CUMULATIVE_ARGS valcum;
38913       rtx valret;
38914 
38915       valcum.words = 0;
38916       valcum.fregno = FP_ARG_MIN_REG;
38917       valcum.vregno = ALTIVEC_ARG_MIN_REG;
38918       /* Do a trial code generation as if this were going to be passed as
38919 	 an argument; if any part goes in memory, we return NULL.  */
38920       valret = rs6000_darwin64_record_arg (&valcum, valtype, true, /* retval= */ true);
38921       if (valret)
38922 	return valret;
38923       /* Otherwise fall through to standard ABI rules.  */
38924     }
38925 
38926   mode = TYPE_MODE (valtype);
38927 
38928   /* The ELFv2 ABI returns homogeneous VFP aggregates in registers.  */
38929   if (rs6000_discover_homogeneous_aggregate (mode, valtype, &elt_mode, &n_elts))
38930     {
38931       int first_reg, n_regs;
38932 
38933       if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (elt_mode))
38934 	{
38935 	  /* _Decimal128 must use even/odd register pairs.  */
38936 	  first_reg = (elt_mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
38937 	  n_regs = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
38938 	}
38939       else
38940 	{
38941 	  first_reg = ALTIVEC_ARG_RETURN;
38942 	  n_regs = 1;
38943 	}
38944 
38945       return rs6000_parallel_return (mode, n_elts, elt_mode, first_reg, n_regs);
38946     }
38947 
38948   /* Some return value types need be split in -mpowerpc64, 32bit ABI.  */
38949   if (TARGET_32BIT && TARGET_POWERPC64)
38950     switch (mode)
38951       {
38952       default:
38953 	break;
38954       case E_DImode:
38955       case E_SCmode:
38956       case E_DCmode:
38957       case E_TCmode:
38958 	int count = GET_MODE_SIZE (mode) / 4;
38959 	return rs6000_parallel_return (mode, count, SImode, GP_ARG_RETURN, 1);
38960       }
38961 
38962   if ((INTEGRAL_TYPE_P (valtype)
38963        && GET_MODE_BITSIZE (mode) < (TARGET_32BIT ? 32 : 64))
38964       || POINTER_TYPE_P (valtype))
38965     mode = TARGET_32BIT ? SImode : DImode;
38966 
38967   if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT && TARGET_FPRS)
38968     /* _Decimal128 must use an even/odd register pair.  */
38969     regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
38970   else if (SCALAR_FLOAT_TYPE_P (valtype) && TARGET_HARD_FLOAT && TARGET_FPRS
38971 	   && !FLOAT128_VECTOR_P (mode)
38972 	   && ((TARGET_SINGLE_FLOAT && (mode == SFmode)) || TARGET_DOUBLE_FLOAT))
38973     regno = FP_ARG_RETURN;
38974   else if (TREE_CODE (valtype) == COMPLEX_TYPE
38975 	   && targetm.calls.split_complex_arg)
38976     return rs6000_complex_function_value (mode);
38977   /* VSX is a superset of Altivec and adds V2DImode/V2DFmode.  Since the same
38978      return register is used in both cases, and we won't see V2DImode/V2DFmode
38979      for pure altivec, combine the two cases.  */
38980   else if ((TREE_CODE (valtype) == VECTOR_TYPE || FLOAT128_VECTOR_P (mode))
38981 	   && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI
38982 	   && ALTIVEC_OR_VSX_VECTOR_MODE (mode))
38983     regno = ALTIVEC_ARG_RETURN;
38984   else if (TARGET_E500_DOUBLE && TARGET_HARD_FLOAT
38985 	   && (mode == DFmode || mode == DCmode
38986 	       || FLOAT128_IBM_P (mode) || mode == TCmode))
38987     return spe_build_register_parallel (mode, GP_ARG_RETURN);
38988   else
38989     regno = GP_ARG_RETURN;
38990 
38991   return gen_rtx_REG (mode, regno);
38992 }
38993 
38994 /* Define how to find the value returned by a library function
38995    assuming the value has mode MODE.  */
38996 rtx
rs6000_libcall_value(machine_mode mode)38997 rs6000_libcall_value (machine_mode mode)
38998 {
38999   unsigned int regno;
39000 
39001   /* Long long return value need be split in -mpowerpc64, 32bit ABI.  */
39002   if (TARGET_32BIT && TARGET_POWERPC64 && mode == DImode)
39003     return rs6000_parallel_return (mode, 2, SImode, GP_ARG_RETURN, 1);
39004 
39005   if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT && TARGET_FPRS)
39006     /* _Decimal128 must use an even/odd register pair.  */
39007     regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
39008   else if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (mode)
39009 	   && TARGET_HARD_FLOAT && TARGET_FPRS
39010            && ((TARGET_SINGLE_FLOAT && mode == SFmode) || TARGET_DOUBLE_FLOAT))
39011     regno = FP_ARG_RETURN;
39012   /* VSX is a superset of Altivec and adds V2DImode/V2DFmode.  Since the same
39013      return register is used in both cases, and we won't see V2DImode/V2DFmode
39014      for pure altivec, combine the two cases.  */
39015   else if (ALTIVEC_OR_VSX_VECTOR_MODE (mode)
39016 	   && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI)
39017     regno = ALTIVEC_ARG_RETURN;
39018   else if (COMPLEX_MODE_P (mode) && targetm.calls.split_complex_arg)
39019     return rs6000_complex_function_value (mode);
39020   else if (TARGET_E500_DOUBLE && TARGET_HARD_FLOAT
39021 	   && (mode == DFmode || mode == DCmode
39022 	       || FLOAT128_IBM_P (mode) || mode == TCmode))
39023     return spe_build_register_parallel (mode, GP_ARG_RETURN);
39024   else
39025     regno = GP_ARG_RETURN;
39026 
39027   return gen_rtx_REG (mode, regno);
39028 }
39029 
39030 
39031 /* Return true if we use LRA instead of reload pass.  */
39032 static bool
rs6000_lra_p(void)39033 rs6000_lra_p (void)
39034 {
39035   return TARGET_LRA;
39036 }
39037 
39038 /* Compute register pressure classes.  We implement the target hook to avoid
39039    IRA picking something like NON_SPECIAL_REGS as a pressure class, which can
39040    lead to incorrect estimates of number of available registers and therefor
39041    increased register pressure/spill.   */
39042 static int
rs6000_compute_pressure_classes(enum reg_class * pressure_classes)39043 rs6000_compute_pressure_classes (enum reg_class *pressure_classes)
39044 {
39045   int n;
39046 
39047   n = 0;
39048   pressure_classes[n++] = GENERAL_REGS;
39049   if (TARGET_VSX)
39050     pressure_classes[n++] = VSX_REGS;
39051   else
39052     {
39053       if (TARGET_ALTIVEC)
39054 	pressure_classes[n++] = ALTIVEC_REGS;
39055       if (TARGET_HARD_FLOAT && TARGET_FPRS)
39056 	pressure_classes[n++] = FLOAT_REGS;
39057     }
39058   pressure_classes[n++] = CR_REGS;
39059   pressure_classes[n++] = SPECIAL_REGS;
39060 
39061   return n;
39062 }
39063 
39064 /* Given FROM and TO register numbers, say whether this elimination is allowed.
39065    Frame pointer elimination is automatically handled.
39066 
39067    For the RS/6000, if frame pointer elimination is being done, we would like
39068    to convert ap into fp, not sp.
39069 
39070    We need r30 if -mminimal-toc was specified, and there are constant pool
39071    references.  */
39072 
39073 static bool
rs6000_can_eliminate(const int from,const int to)39074 rs6000_can_eliminate (const int from, const int to)
39075 {
39076   return (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM
39077 	  ? ! frame_pointer_needed
39078 	  : from == RS6000_PIC_OFFSET_TABLE_REGNUM
39079 	    ? ! TARGET_MINIMAL_TOC || TARGET_NO_TOC
39080 		|| constant_pool_empty_p ()
39081 	    : true);
39082 }
39083 
39084 /* Define the offset between two registers, FROM to be eliminated and its
39085    replacement TO, at the start of a routine.  */
39086 HOST_WIDE_INT
rs6000_initial_elimination_offset(int from,int to)39087 rs6000_initial_elimination_offset (int from, int to)
39088 {
39089   rs6000_stack_t *info = rs6000_stack_info ();
39090   HOST_WIDE_INT offset;
39091 
39092   if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
39093     offset = info->push_p ? 0 : -info->total_size;
39094   else if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
39095     {
39096       offset = info->push_p ? 0 : -info->total_size;
39097       if (FRAME_GROWS_DOWNWARD)
39098 	offset += info->fixed_size + info->vars_size + info->parm_size;
39099     }
39100   else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
39101     offset = FRAME_GROWS_DOWNWARD
39102 	     ? info->fixed_size + info->vars_size + info->parm_size
39103 	     : 0;
39104   else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
39105     offset = info->total_size;
39106   else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
39107     offset = info->push_p ? info->total_size : 0;
39108   else if (from == RS6000_PIC_OFFSET_TABLE_REGNUM)
39109     offset = 0;
39110   else
39111     gcc_unreachable ();
39112 
39113   return offset;
39114 }
39115 
39116 static rtx
rs6000_dwarf_register_span(rtx reg)39117 rs6000_dwarf_register_span (rtx reg)
39118 {
39119   rtx parts[8];
39120   int i, words;
39121   unsigned regno = REGNO (reg);
39122   machine_mode mode = GET_MODE (reg);
39123 
39124   if (TARGET_SPE
39125       && regno < 32
39126       && (SPE_VECTOR_MODE (GET_MODE (reg))
39127 	  || (TARGET_E500_DOUBLE && FLOAT_MODE_P (mode)
39128 	      && mode != SFmode && mode != SDmode && mode != SCmode)))
39129     ;
39130   else
39131     return NULL_RTX;
39132 
39133   regno = REGNO (reg);
39134 
39135   /* The duality of the SPE register size wreaks all kinds of havoc.
39136      This is a way of distinguishing r0 in 32-bits from r0 in
39137      64-bits.  */
39138   words = (GET_MODE_SIZE (mode) + UNITS_PER_FP_WORD - 1) / UNITS_PER_FP_WORD;
39139   gcc_assert (words <= 4);
39140   for (i = 0; i < words; i++, regno++)
39141     {
39142       if (BYTES_BIG_ENDIAN)
39143 	{
39144 	  parts[2 * i] = gen_rtx_REG (SImode, regno + FIRST_SPE_HIGH_REGNO);
39145 	  parts[2 * i + 1] = gen_rtx_REG (SImode, regno);
39146 	}
39147       else
39148 	{
39149 	  parts[2 * i] = gen_rtx_REG (SImode, regno);
39150 	  parts[2 * i + 1] = gen_rtx_REG (SImode, regno + FIRST_SPE_HIGH_REGNO);
39151 	}
39152     }
39153 
39154   return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (words * 2, parts));
39155 }
39156 
39157 /* Fill in sizes for SPE register high parts in table used by unwinder.  */
39158 
39159 static void
rs6000_init_dwarf_reg_sizes_extra(tree address)39160 rs6000_init_dwarf_reg_sizes_extra (tree address)
39161 {
39162   if (TARGET_SPE)
39163     {
39164       int i;
39165       machine_mode mode = TYPE_MODE (char_type_node);
39166       rtx addr = expand_expr (address, NULL_RTX, VOIDmode, EXPAND_NORMAL);
39167       rtx mem = gen_rtx_MEM (BLKmode, addr);
39168       rtx value = gen_int_mode (4, mode);
39169 
39170       for (i = FIRST_SPE_HIGH_REGNO; i < LAST_SPE_HIGH_REGNO+1; i++)
39171 	{
39172 	  int column = DWARF_REG_TO_UNWIND_COLUMN
39173 		(DWARF2_FRAME_REG_OUT (DWARF_FRAME_REGNUM (i), true));
39174 	  HOST_WIDE_INT offset = column * GET_MODE_SIZE (mode);
39175 
39176 	  emit_move_insn (adjust_address (mem, mode, offset), value);
39177 	}
39178     }
39179 
39180   if (TARGET_MACHO && ! TARGET_ALTIVEC)
39181     {
39182       int i;
39183       machine_mode mode = TYPE_MODE (char_type_node);
39184       rtx addr = expand_expr (address, NULL_RTX, VOIDmode, EXPAND_NORMAL);
39185       rtx mem = gen_rtx_MEM (BLKmode, addr);
39186       rtx value = gen_int_mode (16, mode);
39187 
39188       /* On Darwin, libgcc may be built to run on both G3 and G4/5.
39189 	 The unwinder still needs to know the size of Altivec registers.  */
39190 
39191       for (i = FIRST_ALTIVEC_REGNO; i < LAST_ALTIVEC_REGNO+1; i++)
39192 	{
39193 	  int column = DWARF_REG_TO_UNWIND_COLUMN
39194 		(DWARF2_FRAME_REG_OUT (DWARF_FRAME_REGNUM (i), true));
39195 	  HOST_WIDE_INT offset = column * GET_MODE_SIZE (mode);
39196 
39197 	  emit_move_insn (adjust_address (mem, mode, offset), value);
39198 	}
39199     }
39200 }
39201 
39202 /* Map internal gcc register numbers to debug format register numbers.
39203    FORMAT specifies the type of debug register number to use:
39204      0 -- debug information, except for frame-related sections
39205      1 -- DWARF .debug_frame section
39206      2 -- DWARF .eh_frame section  */
39207 
39208 unsigned int
rs6000_dbx_register_number(unsigned int regno,unsigned int format)39209 rs6000_dbx_register_number (unsigned int regno, unsigned int format)
39210 {
39211   /* We never use the GCC internal number for SPE high registers.
39212      Those are mapped to the 1200..1231 range for all debug formats.  */
39213   if (SPE_HIGH_REGNO_P (regno))
39214     return regno - FIRST_SPE_HIGH_REGNO + 1200;
39215 
39216   /* Except for the above, we use the internal number for non-DWARF
39217      debug information, and also for .eh_frame.  */
39218   if ((format == 0 && write_symbols != DWARF2_DEBUG) || format == 2)
39219     return regno;
39220 
39221   /* On some platforms, we use the standard DWARF register
39222      numbering for .debug_info and .debug_frame.  */
39223 #ifdef RS6000_USE_DWARF_NUMBERING
39224   if (regno <= 63)
39225     return regno;
39226   if (regno == LR_REGNO)
39227     return 108;
39228   if (regno == CTR_REGNO)
39229     return 109;
39230   /* Special handling for CR for .debug_frame: rs6000_emit_prologue has
39231      translated any combination of CR2, CR3, CR4 saves to a save of CR2.
39232      The actual code emitted saves the whole of CR, so we map CR2_REGNO
39233      to the DWARF reg for CR.  */
39234   if (format == 1 && regno == CR2_REGNO)
39235     return 64;
39236   if (CR_REGNO_P (regno))
39237     return regno - CR0_REGNO + 86;
39238   if (regno == CA_REGNO)
39239     return 101;  /* XER */
39240   if (ALTIVEC_REGNO_P (regno))
39241     return regno - FIRST_ALTIVEC_REGNO + 1124;
39242   if (regno == VRSAVE_REGNO)
39243     return 356;
39244   if (regno == VSCR_REGNO)
39245     return 67;
39246   if (regno == SPE_ACC_REGNO)
39247     return 99;
39248   if (regno == SPEFSCR_REGNO)
39249     return 612;
39250 #endif
39251   return regno;
39252 }
39253 
39254 /* target hook eh_return_filter_mode */
39255 static scalar_int_mode
rs6000_eh_return_filter_mode(void)39256 rs6000_eh_return_filter_mode (void)
39257 {
39258   return TARGET_32BIT ? SImode : word_mode;
39259 }
39260 
39261 /* Target hook for scalar_mode_supported_p.  */
39262 static bool
rs6000_scalar_mode_supported_p(scalar_mode mode)39263 rs6000_scalar_mode_supported_p (scalar_mode mode)
39264 {
39265   /* -m32 does not support TImode.  This is the default, from
39266      default_scalar_mode_supported_p.  For -m32 -mpowerpc64 we want the
39267      same ABI as for -m32.  But default_scalar_mode_supported_p allows
39268      integer modes of precision 2 * BITS_PER_WORD, which matches TImode
39269      for -mpowerpc64.  */
39270   if (TARGET_32BIT && mode == TImode)
39271     return false;
39272 
39273   if (DECIMAL_FLOAT_MODE_P (mode))
39274     return default_decimal_float_supported_p ();
39275   else if (TARGET_FLOAT128_TYPE && (mode == KFmode || mode == IFmode))
39276     return true;
39277   else
39278     return default_scalar_mode_supported_p (mode);
39279 }
39280 
39281 /* Target hook for vector_mode_supported_p.  */
39282 static bool
rs6000_vector_mode_supported_p(machine_mode mode)39283 rs6000_vector_mode_supported_p (machine_mode mode)
39284 {
39285 
39286   if (TARGET_PAIRED_FLOAT && PAIRED_VECTOR_MODE (mode))
39287     return true;
39288 
39289   if (TARGET_SPE && SPE_VECTOR_MODE (mode))
39290     return true;
39291 
39292   /* There is no vector form for IEEE 128-bit.  If we return true for IEEE
39293      128-bit, the compiler might try to widen IEEE 128-bit to IBM
39294      double-double.  */
39295   else if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode) && !FLOAT128_IEEE_P (mode))
39296     return true;
39297 
39298   else
39299     return false;
39300 }
39301 
39302 /* Target hook for floatn_mode.  */
39303 static opt_scalar_float_mode
rs6000_floatn_mode(int n,bool extended)39304 rs6000_floatn_mode (int n, bool extended)
39305 {
39306   if (extended)
39307     {
39308       switch (n)
39309 	{
39310 	case 32:
39311 	  return DFmode;
39312 
39313 	case 64:
39314 	  if (TARGET_FLOAT128_KEYWORD)
39315 	    return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
39316 	  else
39317 	    return opt_scalar_float_mode ();
39318 
39319 	case 128:
39320 	  return opt_scalar_float_mode ();
39321 
39322 	default:
39323 	  /* Those are the only valid _FloatNx types.  */
39324 	  gcc_unreachable ();
39325 	}
39326     }
39327   else
39328     {
39329       switch (n)
39330 	{
39331 	case 32:
39332 	  return SFmode;
39333 
39334 	case 64:
39335 	  return DFmode;
39336 
39337 	case 128:
39338 	  if (TARGET_FLOAT128_KEYWORD)
39339 	    return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
39340 	  else
39341 	    return opt_scalar_float_mode ();
39342 
39343 	default:
39344 	  return opt_scalar_float_mode ();
39345 	}
39346     }
39347 
39348 }
39349 
39350 /* Target hook for c_mode_for_suffix.  */
39351 static machine_mode
rs6000_c_mode_for_suffix(char suffix)39352 rs6000_c_mode_for_suffix (char suffix)
39353 {
39354   if (TARGET_FLOAT128_TYPE)
39355     {
39356       if (suffix == 'q' || suffix == 'Q')
39357 	return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
39358 
39359       /* At the moment, we are not defining a suffix for IBM extended double.
39360 	 If/when the default for -mabi=ieeelongdouble is changed, and we want
39361 	 to support __ibm128 constants in legacy library code, we may need to
39362 	 re-evalaute this decision.  Currently, c-lex.c only supports 'w' and
39363 	 'q' as machine dependent suffixes.  The x86_64 port uses 'w' for
39364 	 __float80 constants.  */
39365     }
39366 
39367   return VOIDmode;
39368 }
39369 
39370 /* Target hook for invalid_arg_for_unprototyped_fn. */
39371 static const char *
invalid_arg_for_unprototyped_fn(const_tree typelist,const_tree funcdecl,const_tree val)39372 invalid_arg_for_unprototyped_fn (const_tree typelist, const_tree funcdecl, const_tree val)
39373 {
39374   return (!rs6000_darwin64_abi
39375 	  && typelist == 0
39376           && TREE_CODE (TREE_TYPE (val)) == VECTOR_TYPE
39377           && (funcdecl == NULL_TREE
39378               || (TREE_CODE (funcdecl) == FUNCTION_DECL
39379                   && DECL_BUILT_IN_CLASS (funcdecl) != BUILT_IN_MD)))
39380 	  ? N_("AltiVec argument passed to unprototyped function")
39381 	  : NULL;
39382 }
39383 
39384 /* For TARGET_SECURE_PLT 32-bit PIC code we can save PIC register
39385    setup by using __stack_chk_fail_local hidden function instead of
39386    calling __stack_chk_fail directly.  Otherwise it is better to call
39387    __stack_chk_fail directly.  */
39388 
39389 static tree ATTRIBUTE_UNUSED
rs6000_stack_protect_fail(void)39390 rs6000_stack_protect_fail (void)
39391 {
39392   return (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic)
39393 	 ? default_hidden_stack_protect_fail ()
39394 	 : default_external_stack_protect_fail ();
39395 }
39396 
39397 void
rs6000_final_prescan_insn(rtx_insn * insn,rtx * operand ATTRIBUTE_UNUSED,int num_operands ATTRIBUTE_UNUSED)39398 rs6000_final_prescan_insn (rtx_insn *insn, rtx *operand ATTRIBUTE_UNUSED,
39399 			   int num_operands ATTRIBUTE_UNUSED)
39400 {
39401   if (rs6000_warn_cell_microcode)
39402     {
39403       const char *temp;
39404       int insn_code_number = recog_memoized (insn);
39405       location_t location = INSN_LOCATION (insn);
39406 
39407       /* Punt on insns we cannot recognize.  */
39408       if (insn_code_number < 0)
39409 	return;
39410 
39411       /* get_insn_template can modify recog_data, so save and restore it.  */
39412       struct recog_data_d recog_data_save = recog_data;
39413       for (int i = 0; i < recog_data.n_operands; i++)
39414 	recog_data.operand[i] = copy_rtx (recog_data.operand[i]);
39415       temp = get_insn_template (insn_code_number, insn);
39416       recog_data = recog_data_save;
39417 
39418       if (get_attr_cell_micro (insn) == CELL_MICRO_ALWAYS)
39419 	warning_at (location, OPT_mwarn_cell_microcode,
39420 		    "emitting microcode insn %s\t[%s] #%d",
39421 		    temp, insn_data[INSN_CODE (insn)].name, INSN_UID (insn));
39422       else if (get_attr_cell_micro (insn) == CELL_MICRO_CONDITIONAL)
39423 	warning_at (location, OPT_mwarn_cell_microcode,
39424 		    "emitting conditional microcode insn %s\t[%s] #%d",
39425 		    temp, insn_data[INSN_CODE (insn)].name, INSN_UID (insn));
39426     }
39427 }
39428 
39429 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook.  */
39430 
39431 #if TARGET_ELF
39432 static unsigned HOST_WIDE_INT
rs6000_asan_shadow_offset(void)39433 rs6000_asan_shadow_offset (void)
39434 {
39435   return (unsigned HOST_WIDE_INT) 1 << (TARGET_64BIT ? 41 : 29);
39436 }
39437 #endif
39438 
39439 /* Mask options that we want to support inside of attribute((target)) and
39440    #pragma GCC target operations.  Note, we do not include things like
39441    64/32-bit, endianness, hard/soft floating point, etc. that would have
39442    different calling sequences.  */
39443 
39444 struct rs6000_opt_mask {
39445   const char *name;		/* option name */
39446   HOST_WIDE_INT mask;		/* mask to set */
39447   bool invert;			/* invert sense of mask */
39448   bool valid_target;		/* option is a target option */
39449 };
39450 
39451 static struct rs6000_opt_mask const rs6000_opt_masks[] =
39452 {
39453   { "altivec",			OPTION_MASK_ALTIVEC,		false, true  },
39454   { "cmpb",			OPTION_MASK_CMPB,		false, true  },
39455   { "crypto",			OPTION_MASK_CRYPTO,		false, true  },
39456   { "direct-move",		OPTION_MASK_DIRECT_MOVE,	false, true  },
39457   { "dlmzb",			OPTION_MASK_DLMZB,		false, true  },
39458   { "efficient-unaligned-vsx",	OPTION_MASK_EFFICIENT_UNALIGNED_VSX,
39459 								false, true  },
39460   { "float128",			OPTION_MASK_FLOAT128_KEYWORD,	false, false },
39461   { "float128-type",		OPTION_MASK_FLOAT128_TYPE,	false, false },
39462   { "float128-hardware",	OPTION_MASK_FLOAT128_HW,	false, false },
39463   { "fprnd",			OPTION_MASK_FPRND,		false, true  },
39464   { "hard-dfp",			OPTION_MASK_DFP,		false, true  },
39465   { "htm",			OPTION_MASK_HTM,		false, true  },
39466   { "isel",			OPTION_MASK_ISEL,		false, true  },
39467   { "mfcrf",			OPTION_MASK_MFCRF,		false, true  },
39468   { "mfpgpr",			OPTION_MASK_MFPGPR,		false, true  },
39469   { "modulo",			OPTION_MASK_MODULO,		false, true  },
39470   { "mulhw",			OPTION_MASK_MULHW,		false, true  },
39471   { "multiple",			OPTION_MASK_MULTIPLE,		false, true  },
39472   { "popcntb",			OPTION_MASK_POPCNTB,		false, true  },
39473   { "popcntd",			OPTION_MASK_POPCNTD,		false, true  },
39474   { "power8-fusion",		OPTION_MASK_P8_FUSION,		false, true  },
39475   { "power8-fusion-sign",	OPTION_MASK_P8_FUSION_SIGN,	false, true  },
39476   { "power8-vector",		OPTION_MASK_P8_VECTOR,		false, true  },
39477   { "power9-dform-scalar",	OPTION_MASK_P9_DFORM_SCALAR,	false, true  },
39478   { "power9-dform-vector",	OPTION_MASK_P9_DFORM_VECTOR,	false, true  },
39479   { "power9-fusion",		OPTION_MASK_P9_FUSION,		false, true  },
39480   { "power9-minmax",		OPTION_MASK_P9_MINMAX,		false, true  },
39481   { "power9-misc",		OPTION_MASK_P9_MISC,		false, true  },
39482   { "power9-vector",		OPTION_MASK_P9_VECTOR,		false, true  },
39483   { "powerpc-gfxopt",		OPTION_MASK_PPC_GFXOPT,		false, true  },
39484   { "powerpc-gpopt",		OPTION_MASK_PPC_GPOPT,		false, true  },
39485   { "quad-memory",		OPTION_MASK_QUAD_MEMORY,	false, true  },
39486   { "quad-memory-atomic",	OPTION_MASK_QUAD_MEMORY_ATOMIC,	false, true  },
39487   { "recip-precision",		OPTION_MASK_RECIP_PRECISION,	false, true  },
39488   { "save-toc-indirect",	OPTION_MASK_SAVE_TOC_INDIRECT,	false, true  },
39489   { "string",			OPTION_MASK_STRING,		false, true  },
39490   { "toc-fusion",		OPTION_MASK_TOC_FUSION,		false, true  },
39491   { "update",			OPTION_MASK_NO_UPDATE,		true , true  },
39492   { "upper-regs-di",		OPTION_MASK_UPPER_REGS_DI,	false, true  },
39493   { "upper-regs-df",		OPTION_MASK_UPPER_REGS_DF,	false, true  },
39494   { "upper-regs-sf",		OPTION_MASK_UPPER_REGS_SF,	false, true  },
39495   { "vsx",			OPTION_MASK_VSX,		false, true  },
39496   { "vsx-small-integer",	OPTION_MASK_VSX_SMALL_INTEGER,	false, true  },
39497   { "vsx-timode",		OPTION_MASK_VSX_TIMODE,		false, true  },
39498 #ifdef OPTION_MASK_64BIT
39499 #if TARGET_AIX_OS
39500   { "aix64",			OPTION_MASK_64BIT,		false, false },
39501   { "aix32",			OPTION_MASK_64BIT,		true,  false },
39502 #else
39503   { "64",			OPTION_MASK_64BIT,		false, false },
39504   { "32",			OPTION_MASK_64BIT,		true,  false },
39505 #endif
39506 #endif
39507 #ifdef OPTION_MASK_EABI
39508   { "eabi",			OPTION_MASK_EABI,		false, false },
39509 #endif
39510 #ifdef OPTION_MASK_LITTLE_ENDIAN
39511   { "little",			OPTION_MASK_LITTLE_ENDIAN,	false, false },
39512   { "big",			OPTION_MASK_LITTLE_ENDIAN,	true,  false },
39513 #endif
39514 #ifdef OPTION_MASK_RELOCATABLE
39515   { "relocatable",		OPTION_MASK_RELOCATABLE,	false, false },
39516 #endif
39517 #ifdef OPTION_MASK_STRICT_ALIGN
39518   { "strict-align",		OPTION_MASK_STRICT_ALIGN,	false, false },
39519 #endif
39520   { "soft-float",		OPTION_MASK_SOFT_FLOAT,		false, false },
39521   { "string",			OPTION_MASK_STRING,		false, false },
39522 };
39523 
39524 /* Builtin mask mapping for printing the flags.  */
39525 static struct rs6000_opt_mask const rs6000_builtin_mask_names[] =
39526 {
39527   { "altivec",		 RS6000_BTM_ALTIVEC,	false, false },
39528   { "vsx",		 RS6000_BTM_VSX,	false, false },
39529   { "spe",		 RS6000_BTM_SPE,	false, false },
39530   { "paired",		 RS6000_BTM_PAIRED,	false, false },
39531   { "fre",		 RS6000_BTM_FRE,	false, false },
39532   { "fres",		 RS6000_BTM_FRES,	false, false },
39533   { "frsqrte",		 RS6000_BTM_FRSQRTE,	false, false },
39534   { "frsqrtes",		 RS6000_BTM_FRSQRTES,	false, false },
39535   { "popcntd",		 RS6000_BTM_POPCNTD,	false, false },
39536   { "cell",		 RS6000_BTM_CELL,	false, false },
39537   { "power8-vector",	 RS6000_BTM_P8_VECTOR,	false, false },
39538   { "power9-vector",	 RS6000_BTM_P9_VECTOR,	false, false },
39539   { "power9-misc",	 RS6000_BTM_P9_MISC,	false, false },
39540   { "crypto",		 RS6000_BTM_CRYPTO,	false, false },
39541   { "htm",		 RS6000_BTM_HTM,	false, false },
39542   { "hard-dfp",		 RS6000_BTM_DFP,	false, false },
39543   { "hard-float",	 RS6000_BTM_HARD_FLOAT,	false, false },
39544   { "long-double-128",	 RS6000_BTM_LDBL128,	false, false },
39545   { "float128",		 RS6000_BTM_FLOAT128,   false, false },
39546 };
39547 
39548 /* Option variables that we want to support inside attribute((target)) and
39549    #pragma GCC target operations.  */
39550 
39551 struct rs6000_opt_var {
39552   const char *name;		/* option name */
39553   size_t global_offset;		/* offset of the option in global_options.  */
39554   size_t target_offset;		/* offset of the option in target options.  */
39555 };
39556 
39557 static struct rs6000_opt_var const rs6000_opt_vars[] =
39558 {
39559   { "friz",
39560     offsetof (struct gcc_options, x_TARGET_FRIZ),
39561     offsetof (struct cl_target_option, x_TARGET_FRIZ), },
39562   { "avoid-indexed-addresses",
39563     offsetof (struct gcc_options, x_TARGET_AVOID_XFORM),
39564     offsetof (struct cl_target_option, x_TARGET_AVOID_XFORM) },
39565   { "paired",
39566     offsetof (struct gcc_options, x_rs6000_paired_float),
39567     offsetof (struct cl_target_option, x_rs6000_paired_float), },
39568   { "longcall",
39569     offsetof (struct gcc_options, x_rs6000_default_long_calls),
39570     offsetof (struct cl_target_option, x_rs6000_default_long_calls), },
39571   { "optimize-swaps",
39572     offsetof (struct gcc_options, x_rs6000_optimize_swaps),
39573     offsetof (struct cl_target_option, x_rs6000_optimize_swaps), },
39574   { "allow-movmisalign",
39575     offsetof (struct gcc_options, x_TARGET_ALLOW_MOVMISALIGN),
39576     offsetof (struct cl_target_option, x_TARGET_ALLOW_MOVMISALIGN), },
39577   { "allow-df-permute",
39578     offsetof (struct gcc_options, x_TARGET_ALLOW_DF_PERMUTE),
39579     offsetof (struct cl_target_option, x_TARGET_ALLOW_DF_PERMUTE), },
39580   { "sched-groups",
39581     offsetof (struct gcc_options, x_TARGET_SCHED_GROUPS),
39582     offsetof (struct cl_target_option, x_TARGET_SCHED_GROUPS), },
39583   { "always-hint",
39584     offsetof (struct gcc_options, x_TARGET_ALWAYS_HINT),
39585     offsetof (struct cl_target_option, x_TARGET_ALWAYS_HINT), },
39586   { "align-branch-targets",
39587     offsetof (struct gcc_options, x_TARGET_ALIGN_BRANCH_TARGETS),
39588     offsetof (struct cl_target_option, x_TARGET_ALIGN_BRANCH_TARGETS), },
39589   { "vectorize-builtins",
39590     offsetof (struct gcc_options, x_TARGET_VECTORIZE_BUILTINS),
39591     offsetof (struct cl_target_option, x_TARGET_VECTORIZE_BUILTINS), },
39592   { "tls-markers",
39593     offsetof (struct gcc_options, x_tls_markers),
39594     offsetof (struct cl_target_option, x_tls_markers), },
39595   { "sched-prolog",
39596     offsetof (struct gcc_options, x_TARGET_SCHED_PROLOG),
39597     offsetof (struct cl_target_option, x_TARGET_SCHED_PROLOG), },
39598   { "sched-epilog",
39599     offsetof (struct gcc_options, x_TARGET_SCHED_PROLOG),
39600     offsetof (struct cl_target_option, x_TARGET_SCHED_PROLOG), },
39601   { "gen-cell-microcode",
39602     offsetof (struct gcc_options, x_rs6000_gen_cell_microcode),
39603     offsetof (struct cl_target_option, x_rs6000_gen_cell_microcode), },
39604   { "warn-cell-microcode",
39605     offsetof (struct gcc_options, x_rs6000_warn_cell_microcode),
39606     offsetof (struct cl_target_option, x_rs6000_warn_cell_microcode), },
39607 };
39608 
39609 /* Inner function to handle attribute((target("..."))) and #pragma GCC target
39610    parsing.  Return true if there were no errors.  */
39611 
39612 static bool
rs6000_inner_target_options(tree args,bool attr_p)39613 rs6000_inner_target_options (tree args, bool attr_p)
39614 {
39615   bool ret = true;
39616 
39617   if (args == NULL_TREE)
39618     ;
39619 
39620   else if (TREE_CODE (args) == STRING_CST)
39621     {
39622       char *p = ASTRDUP (TREE_STRING_POINTER (args));
39623       char *q;
39624 
39625       while ((q = strtok (p, ",")) != NULL)
39626 	{
39627 	  bool error_p = false;
39628 	  bool not_valid_p = false;
39629 	  const char *cpu_opt = NULL;
39630 
39631 	  p = NULL;
39632 	  if (strncmp (q, "cpu=", 4) == 0)
39633 	    {
39634 	      int cpu_index = rs6000_cpu_name_lookup (q+4);
39635 	      if (cpu_index >= 0)
39636 		rs6000_cpu_index = cpu_index;
39637 	      else
39638 		{
39639 		  error_p = true;
39640 		  cpu_opt = q+4;
39641 		}
39642 	    }
39643 	  else if (strncmp (q, "tune=", 5) == 0)
39644 	    {
39645 	      int tune_index = rs6000_cpu_name_lookup (q+5);
39646 	      if (tune_index >= 0)
39647 		rs6000_tune_index = tune_index;
39648 	      else
39649 		{
39650 		  error_p = true;
39651 		  cpu_opt = q+5;
39652 		}
39653 	    }
39654 	  else
39655 	    {
39656 	      size_t i;
39657 	      bool invert = false;
39658 	      char *r = q;
39659 
39660 	      error_p = true;
39661 	      if (strncmp (r, "no-", 3) == 0)
39662 		{
39663 		  invert = true;
39664 		  r += 3;
39665 		}
39666 
39667 	      for (i = 0; i < ARRAY_SIZE (rs6000_opt_masks); i++)
39668 		if (strcmp (r, rs6000_opt_masks[i].name) == 0)
39669 		  {
39670 		    HOST_WIDE_INT mask = rs6000_opt_masks[i].mask;
39671 
39672 		    if (!rs6000_opt_masks[i].valid_target)
39673 		      not_valid_p = true;
39674 		    else
39675 		      {
39676 			error_p = false;
39677 			rs6000_isa_flags_explicit |= mask;
39678 
39679 			/* VSX needs altivec, so -mvsx automagically sets
39680 			   altivec and disables -mavoid-indexed-addresses.  */
39681 			if (!invert)
39682 			  {
39683 			    if (mask == OPTION_MASK_VSX)
39684 			      {
39685 				mask |= OPTION_MASK_ALTIVEC;
39686 				TARGET_AVOID_XFORM = 0;
39687 			      }
39688 			  }
39689 
39690 			if (rs6000_opt_masks[i].invert)
39691 			  invert = !invert;
39692 
39693 			if (invert)
39694 			  rs6000_isa_flags &= ~mask;
39695 			else
39696 			  rs6000_isa_flags |= mask;
39697 		      }
39698 		    break;
39699 		  }
39700 
39701 	      if (error_p && !not_valid_p)
39702 		{
39703 		  for (i = 0; i < ARRAY_SIZE (rs6000_opt_vars); i++)
39704 		    if (strcmp (r, rs6000_opt_vars[i].name) == 0)
39705 		      {
39706 			size_t j = rs6000_opt_vars[i].global_offset;
39707 			*((int *) ((char *)&global_options + j)) = !invert;
39708 			error_p = false;
39709 			not_valid_p = false;
39710 			break;
39711 		      }
39712 		}
39713 	    }
39714 
39715 	  if (error_p)
39716 	    {
39717 	      const char *eprefix, *esuffix;
39718 
39719 	      ret = false;
39720 	      if (attr_p)
39721 		{
39722 		  eprefix = "__attribute__((__target__(";
39723 		  esuffix = ")))";
39724 		}
39725 	      else
39726 		{
39727 		  eprefix = "#pragma GCC target ";
39728 		  esuffix = "";
39729 		}
39730 
39731 	      if (cpu_opt)
39732 		error ("invalid cpu \"%s\" for %s\"%s\"%s", cpu_opt, eprefix,
39733 		       q, esuffix);
39734 	      else if (not_valid_p)
39735 		error ("%s\"%s\"%s is not allowed", eprefix, q, esuffix);
39736 	      else
39737 		error ("%s\"%s\"%s is invalid", eprefix, q, esuffix);
39738 	    }
39739 	}
39740     }
39741 
39742   else if (TREE_CODE (args) == TREE_LIST)
39743     {
39744       do
39745 	{
39746 	  tree value = TREE_VALUE (args);
39747 	  if (value)
39748 	    {
39749 	      bool ret2 = rs6000_inner_target_options (value, attr_p);
39750 	      if (!ret2)
39751 		ret = false;
39752 	    }
39753 	  args = TREE_CHAIN (args);
39754 	}
39755       while (args != NULL_TREE);
39756     }
39757 
39758   else
39759     {
39760       error ("attribute %<target%> argument not a string");
39761       return false;
39762     }
39763 
39764   return ret;
39765 }
39766 
39767 /* Print out the target options as a list for -mdebug=target.  */
39768 
39769 static void
rs6000_debug_target_options(tree args,const char * prefix)39770 rs6000_debug_target_options (tree args, const char *prefix)
39771 {
39772   if (args == NULL_TREE)
39773     fprintf (stderr, "%s<NULL>", prefix);
39774 
39775   else if (TREE_CODE (args) == STRING_CST)
39776     {
39777       char *p = ASTRDUP (TREE_STRING_POINTER (args));
39778       char *q;
39779 
39780       while ((q = strtok (p, ",")) != NULL)
39781 	{
39782 	  p = NULL;
39783 	  fprintf (stderr, "%s\"%s\"", prefix, q);
39784 	  prefix = ", ";
39785 	}
39786     }
39787 
39788   else if (TREE_CODE (args) == TREE_LIST)
39789     {
39790       do
39791 	{
39792 	  tree value = TREE_VALUE (args);
39793 	  if (value)
39794 	    {
39795 	      rs6000_debug_target_options (value, prefix);
39796 	      prefix = ", ";
39797 	    }
39798 	  args = TREE_CHAIN (args);
39799 	}
39800       while (args != NULL_TREE);
39801     }
39802 
39803   else
39804     gcc_unreachable ();
39805 
39806   return;
39807 }
39808 
39809 
39810 /* Hook to validate attribute((target("..."))).  */
39811 
39812 static bool
rs6000_valid_attribute_p(tree fndecl,tree ARG_UNUSED (name),tree args,int flags)39813 rs6000_valid_attribute_p (tree fndecl,
39814 			  tree ARG_UNUSED (name),
39815 			  tree args,
39816 			  int flags)
39817 {
39818   struct cl_target_option cur_target;
39819   bool ret;
39820   tree old_optimize = build_optimization_node (&global_options);
39821   tree new_target, new_optimize;
39822   tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
39823 
39824   gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
39825 
39826   if (TARGET_DEBUG_TARGET)
39827     {
39828       tree tname = DECL_NAME (fndecl);
39829       fprintf (stderr, "\n==================== rs6000_valid_attribute_p:\n");
39830       if (tname)
39831 	fprintf (stderr, "function: %.*s\n",
39832 		 (int) IDENTIFIER_LENGTH (tname),
39833 		 IDENTIFIER_POINTER (tname));
39834       else
39835 	fprintf (stderr, "function: unknown\n");
39836 
39837       fprintf (stderr, "args:");
39838       rs6000_debug_target_options (args, " ");
39839       fprintf (stderr, "\n");
39840 
39841       if (flags)
39842 	fprintf (stderr, "flags: 0x%x\n", flags);
39843 
39844       fprintf (stderr, "--------------------\n");
39845     }
39846 
39847   old_optimize = build_optimization_node (&global_options);
39848   func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
39849 
39850   /* If the function changed the optimization levels as well as setting target
39851      options, start with the optimizations specified.  */
39852   if (func_optimize && func_optimize != old_optimize)
39853     cl_optimization_restore (&global_options,
39854 			     TREE_OPTIMIZATION (func_optimize));
39855 
39856   /* The target attributes may also change some optimization flags, so update
39857      the optimization options if necessary.  */
39858   cl_target_option_save (&cur_target, &global_options);
39859   rs6000_cpu_index = rs6000_tune_index = -1;
39860   ret = rs6000_inner_target_options (args, true);
39861 
39862   /* Set up any additional state.  */
39863   if (ret)
39864     {
39865       ret = rs6000_option_override_internal (false);
39866       new_target = build_target_option_node (&global_options);
39867     }
39868   else
39869     new_target = NULL;
39870 
39871   new_optimize = build_optimization_node (&global_options);
39872 
39873   if (!new_target)
39874     ret = false;
39875 
39876   else if (fndecl)
39877     {
39878       DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
39879 
39880       if (old_optimize != new_optimize)
39881 	DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
39882     }
39883 
39884   cl_target_option_restore (&global_options, &cur_target);
39885 
39886   if (old_optimize != new_optimize)
39887     cl_optimization_restore (&global_options,
39888 			     TREE_OPTIMIZATION (old_optimize));
39889 
39890   return ret;
39891 }
39892 
39893 
39894 /* Hook to validate the current #pragma GCC target and set the state, and
39895    update the macros based on what was changed.  If ARGS is NULL, then
39896    POP_TARGET is used to reset the options.  */
39897 
39898 bool
rs6000_pragma_target_parse(tree args,tree pop_target)39899 rs6000_pragma_target_parse (tree args, tree pop_target)
39900 {
39901   tree prev_tree = build_target_option_node (&global_options);
39902   tree cur_tree;
39903   struct cl_target_option *prev_opt, *cur_opt;
39904   HOST_WIDE_INT prev_flags, cur_flags, diff_flags;
39905   HOST_WIDE_INT prev_bumask, cur_bumask, diff_bumask;
39906 
39907   if (TARGET_DEBUG_TARGET)
39908     {
39909       fprintf (stderr, "\n==================== rs6000_pragma_target_parse\n");
39910       fprintf (stderr, "args:");
39911       rs6000_debug_target_options (args, " ");
39912       fprintf (stderr, "\n");
39913 
39914       if (pop_target)
39915 	{
39916 	  fprintf (stderr, "pop_target:\n");
39917 	  debug_tree (pop_target);
39918 	}
39919       else
39920 	fprintf (stderr, "pop_target: <NULL>\n");
39921 
39922       fprintf (stderr, "--------------------\n");
39923     }
39924 
39925   if (! args)
39926     {
39927       cur_tree = ((pop_target)
39928 		  ? pop_target
39929 		  : target_option_default_node);
39930       cl_target_option_restore (&global_options,
39931 				TREE_TARGET_OPTION (cur_tree));
39932     }
39933   else
39934     {
39935       rs6000_cpu_index = rs6000_tune_index = -1;
39936       if (!rs6000_inner_target_options (args, false)
39937 	  || !rs6000_option_override_internal (false)
39938 	  || (cur_tree = build_target_option_node (&global_options))
39939 	     == NULL_TREE)
39940 	{
39941 	  if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET)
39942 	    fprintf (stderr, "invalid pragma\n");
39943 
39944 	  return false;
39945 	}
39946     }
39947 
39948   target_option_current_node = cur_tree;
39949 
39950   /* If we have the preprocessor linked in (i.e. C or C++ languages), possibly
39951      change the macros that are defined.  */
39952   if (rs6000_target_modify_macros_ptr)
39953     {
39954       prev_opt    = TREE_TARGET_OPTION (prev_tree);
39955       prev_bumask = prev_opt->x_rs6000_builtin_mask;
39956       prev_flags  = prev_opt->x_rs6000_isa_flags;
39957 
39958       cur_opt     = TREE_TARGET_OPTION (cur_tree);
39959       cur_flags   = cur_opt->x_rs6000_isa_flags;
39960       cur_bumask  = cur_opt->x_rs6000_builtin_mask;
39961 
39962       diff_bumask = (prev_bumask ^ cur_bumask);
39963       diff_flags  = (prev_flags ^ cur_flags);
39964 
39965       if ((diff_flags != 0) || (diff_bumask != 0))
39966 	{
39967 	  /* Delete old macros.  */
39968 	  rs6000_target_modify_macros_ptr (false,
39969 					   prev_flags & diff_flags,
39970 					   prev_bumask & diff_bumask);
39971 
39972 	  /* Define new macros.  */
39973 	  rs6000_target_modify_macros_ptr (true,
39974 					   cur_flags & diff_flags,
39975 					   cur_bumask & diff_bumask);
39976 	}
39977     }
39978 
39979   return true;
39980 }
39981 
39982 
39983 /* Remember the last target of rs6000_set_current_function.  */
39984 static GTY(()) tree rs6000_previous_fndecl;
39985 
39986 /* Establish appropriate back-end context for processing the function
39987    FNDECL.  The argument might be NULL to indicate processing at top
39988    level, outside of any function scope.  */
39989 static void
rs6000_set_current_function(tree fndecl)39990 rs6000_set_current_function (tree fndecl)
39991 {
39992   tree old_tree = (rs6000_previous_fndecl
39993 		   ? DECL_FUNCTION_SPECIFIC_TARGET (rs6000_previous_fndecl)
39994 		   : NULL_TREE);
39995 
39996   tree new_tree = (fndecl
39997 		   ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
39998 		   : NULL_TREE);
39999 
40000   if (TARGET_DEBUG_TARGET)
40001     {
40002       bool print_final = false;
40003       fprintf (stderr, "\n==================== rs6000_set_current_function");
40004 
40005       if (fndecl)
40006 	fprintf (stderr, ", fndecl %s (%p)",
40007 		 (DECL_NAME (fndecl)
40008 		  ? IDENTIFIER_POINTER (DECL_NAME (fndecl))
40009 		  : "<unknown>"), (void *)fndecl);
40010 
40011       if (rs6000_previous_fndecl)
40012 	fprintf (stderr, ", prev_fndecl (%p)", (void *)rs6000_previous_fndecl);
40013 
40014       fprintf (stderr, "\n");
40015       if (new_tree)
40016 	{
40017 	  fprintf (stderr, "\nnew fndecl target specific options:\n");
40018 	  debug_tree (new_tree);
40019 	  print_final = true;
40020 	}
40021 
40022       if (old_tree)
40023 	{
40024 	  fprintf (stderr, "\nold fndecl target specific options:\n");
40025 	  debug_tree (old_tree);
40026 	  print_final = true;
40027 	}
40028 
40029       if (print_final)
40030 	fprintf (stderr, "--------------------\n");
40031     }
40032 
40033   /* Only change the context if the function changes.  This hook is called
40034      several times in the course of compiling a function, and we don't want to
40035      slow things down too much or call target_reinit when it isn't safe.  */
40036   if (fndecl && fndecl != rs6000_previous_fndecl)
40037     {
40038       rs6000_previous_fndecl = fndecl;
40039       if (old_tree == new_tree)
40040 	;
40041 
40042       else if (new_tree && new_tree != target_option_default_node)
40043 	{
40044 	  cl_target_option_restore (&global_options,
40045 				    TREE_TARGET_OPTION (new_tree));
40046 	  if (TREE_TARGET_GLOBALS (new_tree))
40047 	    restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
40048 	  else
40049 	    TREE_TARGET_GLOBALS (new_tree)
40050 	      = save_target_globals_default_opts ();
40051 	}
40052 
40053       else if (old_tree && old_tree != target_option_default_node)
40054 	{
40055 	  new_tree = target_option_current_node;
40056 	  cl_target_option_restore (&global_options,
40057 				    TREE_TARGET_OPTION (new_tree));
40058 	  if (TREE_TARGET_GLOBALS (new_tree))
40059 	    restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
40060 	  else if (new_tree == target_option_default_node)
40061 	    restore_target_globals (&default_target_globals);
40062 	  else
40063 	    TREE_TARGET_GLOBALS (new_tree)
40064 	      = save_target_globals_default_opts ();
40065 	}
40066     }
40067 }
40068 
40069 
40070 /* Save the current options */
40071 
40072 static void
rs6000_function_specific_save(struct cl_target_option * ptr,struct gcc_options * opts)40073 rs6000_function_specific_save (struct cl_target_option *ptr,
40074 			       struct gcc_options *opts)
40075 {
40076   ptr->x_rs6000_isa_flags = opts->x_rs6000_isa_flags;
40077   ptr->x_rs6000_isa_flags_explicit = opts->x_rs6000_isa_flags_explicit;
40078 }
40079 
40080 /* Restore the current options */
40081 
40082 static void
rs6000_function_specific_restore(struct gcc_options * opts,struct cl_target_option * ptr)40083 rs6000_function_specific_restore (struct gcc_options *opts,
40084 				  struct cl_target_option *ptr)
40085 
40086 {
40087   opts->x_rs6000_isa_flags = ptr->x_rs6000_isa_flags;
40088   opts->x_rs6000_isa_flags_explicit = ptr->x_rs6000_isa_flags_explicit;
40089   (void) rs6000_option_override_internal (false);
40090 }
40091 
40092 /* Print the current options */
40093 
40094 static void
rs6000_function_specific_print(FILE * file,int indent,struct cl_target_option * ptr)40095 rs6000_function_specific_print (FILE *file, int indent,
40096 				struct cl_target_option *ptr)
40097 {
40098   rs6000_print_isa_options (file, indent, "Isa options set",
40099 			    ptr->x_rs6000_isa_flags);
40100 
40101   rs6000_print_isa_options (file, indent, "Isa options explicit",
40102 			    ptr->x_rs6000_isa_flags_explicit);
40103 }
40104 
40105 /* Helper function to print the current isa or misc options on a line.  */
40106 
40107 static void
rs6000_print_options_internal(FILE * file,int indent,const char * string,HOST_WIDE_INT flags,const char * prefix,const struct rs6000_opt_mask * opts,size_t num_elements)40108 rs6000_print_options_internal (FILE *file,
40109 			       int indent,
40110 			       const char *string,
40111 			       HOST_WIDE_INT flags,
40112 			       const char *prefix,
40113 			       const struct rs6000_opt_mask *opts,
40114 			       size_t num_elements)
40115 {
40116   size_t i;
40117   size_t start_column = 0;
40118   size_t cur_column;
40119   size_t max_column = 120;
40120   size_t prefix_len = strlen (prefix);
40121   size_t comma_len = 0;
40122   const char *comma = "";
40123 
40124   if (indent)
40125     start_column += fprintf (file, "%*s", indent, "");
40126 
40127   if (!flags)
40128     {
40129       fprintf (stderr, DEBUG_FMT_S, string, "<none>");
40130       return;
40131     }
40132 
40133   start_column += fprintf (stderr, DEBUG_FMT_WX, string, flags);
40134 
40135   /* Print the various mask options.  */
40136   cur_column = start_column;
40137   for (i = 0; i < num_elements; i++)
40138     {
40139       bool invert = opts[i].invert;
40140       const char *name = opts[i].name;
40141       const char *no_str = "";
40142       HOST_WIDE_INT mask = opts[i].mask;
40143       size_t len = comma_len + prefix_len + strlen (name);
40144 
40145       if (!invert)
40146 	{
40147 	  if ((flags & mask) == 0)
40148 	    {
40149 	      no_str = "no-";
40150 	      len += sizeof ("no-") - 1;
40151 	    }
40152 
40153 	  flags &= ~mask;
40154 	}
40155 
40156       else
40157 	{
40158 	  if ((flags & mask) != 0)
40159 	    {
40160 	      no_str = "no-";
40161 	      len += sizeof ("no-") - 1;
40162 	    }
40163 
40164 	  flags |= mask;
40165 	}
40166 
40167       cur_column += len;
40168       if (cur_column > max_column)
40169 	{
40170 	  fprintf (stderr, ", \\\n%*s", (int)start_column, "");
40171 	  cur_column = start_column + len;
40172 	  comma = "";
40173 	}
40174 
40175       fprintf (file, "%s%s%s%s", comma, prefix, no_str, name);
40176       comma = ", ";
40177       comma_len = sizeof (", ") - 1;
40178     }
40179 
40180   fputs ("\n", file);
40181 }
40182 
40183 /* Helper function to print the current isa options on a line.  */
40184 
40185 static void
rs6000_print_isa_options(FILE * file,int indent,const char * string,HOST_WIDE_INT flags)40186 rs6000_print_isa_options (FILE *file, int indent, const char *string,
40187 			  HOST_WIDE_INT flags)
40188 {
40189   rs6000_print_options_internal (file, indent, string, flags, "-m",
40190 				 &rs6000_opt_masks[0],
40191 				 ARRAY_SIZE (rs6000_opt_masks));
40192 }
40193 
40194 static void
rs6000_print_builtin_options(FILE * file,int indent,const char * string,HOST_WIDE_INT flags)40195 rs6000_print_builtin_options (FILE *file, int indent, const char *string,
40196 			      HOST_WIDE_INT flags)
40197 {
40198   rs6000_print_options_internal (file, indent, string, flags, "",
40199 				 &rs6000_builtin_mask_names[0],
40200 				 ARRAY_SIZE (rs6000_builtin_mask_names));
40201 }
40202 
40203 /* If the user used -mno-vsx, we need turn off all of the implicit ISA 2.06,
40204    2.07, and 3.0 options that relate to the vector unit (-mdirect-move,
40205    -mvsx-timode, -mupper-regs-df).
40206 
40207    If the user used -mno-power8-vector, we need to turn off all of the implicit
40208    ISA 2.07 and 3.0 options that relate to the vector unit.
40209 
40210    If the user used -mno-power9-vector, we need to turn off all of the implicit
40211    ISA 3.0 options that relate to the vector unit.
40212 
40213    This function does not handle explicit options such as the user specifying
40214    -mdirect-move.  These are handled in rs6000_option_override_internal, and
40215    the appropriate error is given if needed.
40216 
40217    We return a mask of all of the implicit options that should not be enabled
40218    by default.  */
40219 
40220 static HOST_WIDE_INT
rs6000_disable_incompatible_switches(void)40221 rs6000_disable_incompatible_switches (void)
40222 {
40223   HOST_WIDE_INT ignore_masks = rs6000_isa_flags_explicit;
40224   size_t i, j;
40225 
40226   static const struct {
40227     const HOST_WIDE_INT no_flag;	/* flag explicitly turned off.  */
40228     const HOST_WIDE_INT dep_flags;	/* flags that depend on this option.  */
40229     const char *const name;		/* name of the switch.  */
40230   } flags[] = {
40231     { OPTION_MASK_P9_VECTOR,	OTHER_P9_VECTOR_MASKS,	"power9-vector"	},
40232     { OPTION_MASK_P8_VECTOR,	OTHER_P8_VECTOR_MASKS,	"power8-vector"	},
40233     { OPTION_MASK_VSX,		OTHER_VSX_VECTOR_MASKS,	"vsx"		},
40234   };
40235 
40236   for (i = 0; i < ARRAY_SIZE (flags); i++)
40237     {
40238       HOST_WIDE_INT no_flag = flags[i].no_flag;
40239 
40240       if ((rs6000_isa_flags & no_flag) == 0
40241 	  && (rs6000_isa_flags_explicit & no_flag) != 0)
40242 	{
40243 	  HOST_WIDE_INT dep_flags = flags[i].dep_flags;
40244 	  HOST_WIDE_INT set_flags = (rs6000_isa_flags_explicit
40245 				     & rs6000_isa_flags
40246 				     & dep_flags);
40247 
40248 	  if (set_flags)
40249 	    {
40250 	      for (j = 0; j < ARRAY_SIZE (rs6000_opt_masks); j++)
40251 		if ((set_flags & rs6000_opt_masks[j].mask) != 0)
40252 		  {
40253 		    set_flags &= ~rs6000_opt_masks[j].mask;
40254 		    error ("-mno-%s turns off -m%s",
40255 			   flags[i].name,
40256 			   rs6000_opt_masks[j].name);
40257 		  }
40258 
40259 	      gcc_assert (!set_flags);
40260 	    }
40261 
40262 	  rs6000_isa_flags &= ~dep_flags;
40263 	  ignore_masks |= no_flag | dep_flags;
40264 	}
40265     }
40266 
40267   if (!TARGET_P9_VECTOR
40268       && (rs6000_isa_flags_explicit & OPTION_MASK_P9_VECTOR) != 0
40269       && TARGET_P9_DFORM_BOTH > 0)
40270     {
40271       error ("-mno-power9-vector turns off -mpower9-dform");
40272       TARGET_P9_DFORM_BOTH = 0;
40273     }
40274 
40275   return ignore_masks;
40276 }
40277 
40278 
40279 /* Hook to determine if one function can safely inline another.  */
40280 
40281 static bool
rs6000_can_inline_p(tree caller,tree callee)40282 rs6000_can_inline_p (tree caller, tree callee)
40283 {
40284   bool ret = false;
40285   tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
40286   tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
40287 
40288   /* If callee has no option attributes, then it is ok to inline.  */
40289   if (!callee_tree)
40290     ret = true;
40291 
40292   /* If caller has no option attributes, but callee does then it is not ok to
40293      inline.  */
40294   else if (!caller_tree)
40295     ret = false;
40296 
40297   else
40298     {
40299       struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
40300       struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
40301 
40302       /* Callee's options should a subset of the caller's, i.e. a vsx function
40303 	 can inline an altivec function but a non-vsx function can't inline a
40304 	 vsx function.  */
40305       if ((caller_opts->x_rs6000_isa_flags & callee_opts->x_rs6000_isa_flags)
40306 	  == callee_opts->x_rs6000_isa_flags)
40307 	ret = true;
40308     }
40309 
40310   if (TARGET_DEBUG_TARGET)
40311     fprintf (stderr, "rs6000_can_inline_p:, caller %s, callee %s, %s inline\n",
40312 	     (DECL_NAME (caller)
40313 	      ? IDENTIFIER_POINTER (DECL_NAME (caller))
40314 	      : "<unknown>"),
40315 	     (DECL_NAME (callee)
40316 	      ? IDENTIFIER_POINTER (DECL_NAME (callee))
40317 	      : "<unknown>"),
40318 	     (ret ? "can" : "cannot"));
40319 
40320   return ret;
40321 }
40322 
40323 /* Allocate a stack temp and fixup the address so it meets the particular
40324    memory requirements (either offetable or REG+REG addressing).  */
40325 
40326 rtx
rs6000_allocate_stack_temp(machine_mode mode,bool offsettable_p,bool reg_reg_p)40327 rs6000_allocate_stack_temp (machine_mode mode,
40328 			    bool offsettable_p,
40329 			    bool reg_reg_p)
40330 {
40331   rtx stack = assign_stack_temp (mode, GET_MODE_SIZE (mode));
40332   rtx addr = XEXP (stack, 0);
40333   int strict_p = (reload_in_progress || reload_completed);
40334 
40335   if (!legitimate_indirect_address_p (addr, strict_p))
40336     {
40337       if (offsettable_p
40338 	  && !rs6000_legitimate_offset_address_p (mode, addr, strict_p, true))
40339 	stack = replace_equiv_address (stack, copy_addr_to_reg (addr));
40340 
40341       else if (reg_reg_p && !legitimate_indexed_address_p (addr, strict_p))
40342 	stack = replace_equiv_address (stack, copy_addr_to_reg (addr));
40343     }
40344 
40345   return stack;
40346 }
40347 
40348 /* Given a memory reference, if it is not a reg or reg+reg addressing, convert
40349    to such a form to deal with memory reference instructions like STFIWX that
40350    only take reg+reg addressing.  */
40351 
40352 rtx
rs6000_address_for_fpconvert(rtx x)40353 rs6000_address_for_fpconvert (rtx x)
40354 {
40355   int strict_p = (reload_in_progress || reload_completed);
40356   rtx addr;
40357 
40358   gcc_assert (MEM_P (x));
40359   addr = XEXP (x, 0);
40360   if (! legitimate_indirect_address_p (addr, strict_p)
40361       && ! legitimate_indexed_address_p (addr, strict_p))
40362     {
40363       if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
40364 	{
40365 	  rtx reg = XEXP (addr, 0);
40366 	  HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (x));
40367 	  rtx size_rtx = GEN_INT ((GET_CODE (addr) == PRE_DEC) ? -size : size);
40368 	  gcc_assert (REG_P (reg));
40369 	  emit_insn (gen_add3_insn (reg, reg, size_rtx));
40370 	  addr = reg;
40371 	}
40372       else if (GET_CODE (addr) == PRE_MODIFY)
40373 	{
40374 	  rtx reg = XEXP (addr, 0);
40375 	  rtx expr = XEXP (addr, 1);
40376 	  gcc_assert (REG_P (reg));
40377 	  gcc_assert (GET_CODE (expr) == PLUS);
40378 	  emit_insn (gen_add3_insn (reg, XEXP (expr, 0), XEXP (expr, 1)));
40379 	  addr = reg;
40380 	}
40381 
40382       x = replace_equiv_address (x, copy_addr_to_reg (addr));
40383     }
40384 
40385   return x;
40386 }
40387 
40388 /* Given a memory reference, if it is not in the form for altivec memory
40389    reference instructions (i.e. reg or reg+reg addressing with AND of -16),
40390    convert to the altivec format.  */
40391 
40392 rtx
rs6000_address_for_altivec(rtx x)40393 rs6000_address_for_altivec (rtx x)
40394 {
40395   gcc_assert (MEM_P (x));
40396   if (!altivec_indexed_or_indirect_operand (x, GET_MODE (x)))
40397     {
40398       rtx addr = XEXP (x, 0);
40399       int strict_p = (reload_in_progress || reload_completed);
40400 
40401       if (!legitimate_indexed_address_p (addr, strict_p)
40402 	  && !legitimate_indirect_address_p (addr, strict_p))
40403 	addr = copy_to_mode_reg (Pmode, addr);
40404 
40405       addr = gen_rtx_AND (Pmode, addr, GEN_INT (-16));
40406       x = change_address (x, GET_MODE (x), addr);
40407     }
40408 
40409   return x;
40410 }
40411 
40412 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
40413 
40414    On the RS/6000, all integer constants are acceptable, most won't be valid
40415    for particular insns, though.  Only easy FP constants are acceptable.  */
40416 
40417 static bool
rs6000_legitimate_constant_p(machine_mode mode,rtx x)40418 rs6000_legitimate_constant_p (machine_mode mode, rtx x)
40419 {
40420   if (TARGET_ELF && tls_referenced_p (x))
40421     return false;
40422 
40423   return ((GET_CODE (x) != CONST_DOUBLE && GET_CODE (x) != CONST_VECTOR)
40424 	  || GET_MODE (x) == VOIDmode
40425 	  || (TARGET_POWERPC64 && mode == DImode)
40426 	  || easy_fp_constant (x, mode)
40427 	  || easy_vector_constant (x, mode));
40428 }
40429 
40430 
40431 /* Return TRUE iff the sequence ending in LAST sets the static chain.  */
40432 
40433 static bool
chain_already_loaded(rtx_insn * last)40434 chain_already_loaded (rtx_insn *last)
40435 {
40436   for (; last != NULL; last = PREV_INSN (last))
40437     {
40438       if (NONJUMP_INSN_P (last))
40439 	{
40440 	  rtx patt = PATTERN (last);
40441 
40442 	  if (GET_CODE (patt) == SET)
40443 	    {
40444 	      rtx lhs = XEXP (patt, 0);
40445 
40446 	      if (REG_P (lhs) && REGNO (lhs) == STATIC_CHAIN_REGNUM)
40447 		return true;
40448 	    }
40449 	}
40450     }
40451   return false;
40452 }
40453 
40454 /* Expand code to perform a call under the AIX or ELFv2 ABI.  */
40455 
40456 void
rs6000_call_aix(rtx value,rtx func_desc,rtx flag,rtx cookie)40457 rs6000_call_aix (rtx value, rtx func_desc, rtx flag, rtx cookie)
40458 {
40459   const bool direct_call_p
40460     = GET_CODE (func_desc) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (func_desc);
40461   rtx toc_reg = gen_rtx_REG (Pmode, TOC_REGNUM);
40462   rtx toc_load = NULL_RTX;
40463   rtx toc_restore = NULL_RTX;
40464   rtx func_addr;
40465   rtx abi_reg = NULL_RTX;
40466   rtx call[4];
40467   int n_call;
40468   rtx insn;
40469 
40470   /* Handle longcall attributes.  */
40471   if (INTVAL (cookie) & CALL_LONG)
40472     func_desc = rs6000_longcall_ref (func_desc);
40473 
40474   /* Handle indirect calls.  */
40475   if (GET_CODE (func_desc) != SYMBOL_REF
40476       || (DEFAULT_ABI == ABI_AIX && !SYMBOL_REF_FUNCTION_P (func_desc)))
40477     {
40478       /* Save the TOC into its reserved slot before the call,
40479 	 and prepare to restore it after the call.  */
40480       rtx stack_ptr = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
40481       rtx stack_toc_offset = GEN_INT (RS6000_TOC_SAVE_SLOT);
40482       rtx stack_toc_mem = gen_frame_mem (Pmode,
40483 					 gen_rtx_PLUS (Pmode, stack_ptr,
40484 						       stack_toc_offset));
40485       rtx stack_toc_unspec = gen_rtx_UNSPEC (Pmode,
40486 					     gen_rtvec (1, stack_toc_offset),
40487 					     UNSPEC_TOCSLOT);
40488       toc_restore = gen_rtx_SET (toc_reg, stack_toc_unspec);
40489 
40490       /* Can we optimize saving the TOC in the prologue or
40491 	 do we need to do it at every call?  */
40492       if (TARGET_SAVE_TOC_INDIRECT && !cfun->calls_alloca)
40493 	cfun->machine->save_toc_in_prologue = true;
40494       else
40495 	{
40496 	  MEM_VOLATILE_P (stack_toc_mem) = 1;
40497 	  emit_move_insn (stack_toc_mem, toc_reg);
40498 	}
40499 
40500       if (DEFAULT_ABI == ABI_ELFv2)
40501 	{
40502 	  /* A function pointer in the ELFv2 ABI is just a plain address, but
40503 	     the ABI requires it to be loaded into r12 before the call.  */
40504 	  func_addr = gen_rtx_REG (Pmode, 12);
40505 	  emit_move_insn (func_addr, func_desc);
40506 	  abi_reg = func_addr;
40507 	}
40508       else
40509 	{
40510 	  /* A function pointer under AIX is a pointer to a data area whose
40511 	     first word contains the actual address of the function, whose
40512 	     second word contains a pointer to its TOC, and whose third word
40513 	     contains a value to place in the static chain register (r11).
40514 	     Note that if we load the static chain, our "trampoline" need
40515 	     not have any executable code.  */
40516 
40517 	  /* Load up address of the actual function.  */
40518 	  func_desc = force_reg (Pmode, func_desc);
40519 	  func_addr = gen_reg_rtx (Pmode);
40520 	  emit_move_insn (func_addr, gen_rtx_MEM (Pmode, func_desc));
40521 
40522 	  /* Prepare to load the TOC of the called function.  Note that the
40523 	     TOC load must happen immediately before the actual call so
40524 	     that unwinding the TOC registers works correctly.  See the
40525 	     comment in frob_update_context.  */
40526 	  rtx func_toc_offset = GEN_INT (GET_MODE_SIZE (Pmode));
40527 	  rtx func_toc_mem = gen_rtx_MEM (Pmode,
40528 					  gen_rtx_PLUS (Pmode, func_desc,
40529 							func_toc_offset));
40530 	  toc_load = gen_rtx_USE (VOIDmode, func_toc_mem);
40531 
40532 	  /* If we have a static chain, load it up.  But, if the call was
40533 	     originally direct, the 3rd word has not been written since no
40534 	     trampoline has been built, so we ought not to load it, lest we
40535 	     override a static chain value.  */
40536 	  if (!direct_call_p
40537 	      && TARGET_POINTERS_TO_NESTED_FUNCTIONS
40538 	      && !chain_already_loaded (get_current_sequence ()->next->last))
40539 	    {
40540 	      rtx sc_reg = gen_rtx_REG (Pmode, STATIC_CHAIN_REGNUM);
40541 	      rtx func_sc_offset = GEN_INT (2 * GET_MODE_SIZE (Pmode));
40542 	      rtx func_sc_mem = gen_rtx_MEM (Pmode,
40543 					     gen_rtx_PLUS (Pmode, func_desc,
40544 							   func_sc_offset));
40545 	      emit_move_insn (sc_reg, func_sc_mem);
40546 	      abi_reg = sc_reg;
40547 	    }
40548 	}
40549     }
40550   else
40551     {
40552       /* Direct calls use the TOC: for local calls, the callee will
40553 	 assume the TOC register is set; for non-local calls, the
40554 	 PLT stub needs the TOC register.  */
40555       abi_reg = toc_reg;
40556       func_addr = func_desc;
40557     }
40558 
40559   /* Create the call.  */
40560   call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), flag);
40561   if (value != NULL_RTX)
40562     call[0] = gen_rtx_SET (value, call[0]);
40563   n_call = 1;
40564 
40565   if (toc_load)
40566     call[n_call++] = toc_load;
40567   if (toc_restore)
40568     call[n_call++] = toc_restore;
40569 
40570   call[n_call++] = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, LR_REGNO));
40571 
40572   insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (n_call, call));
40573   insn = emit_call_insn (insn);
40574 
40575   /* Mention all registers defined by the ABI to hold information
40576      as uses in CALL_INSN_FUNCTION_USAGE.  */
40577   if (abi_reg)
40578     use_reg (&CALL_INSN_FUNCTION_USAGE (insn), abi_reg);
40579 }
40580 
40581 /* Expand code to perform a sibling call under the AIX or ELFv2 ABI.  */
40582 
40583 void
rs6000_sibcall_aix(rtx value,rtx func_desc,rtx flag,rtx cookie)40584 rs6000_sibcall_aix (rtx value, rtx func_desc, rtx flag, rtx cookie)
40585 {
40586   rtx call[2];
40587   rtx insn;
40588 
40589   gcc_assert (INTVAL (cookie) == 0);
40590 
40591   /* Create the call.  */
40592   call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_desc), flag);
40593   if (value != NULL_RTX)
40594     call[0] = gen_rtx_SET (value, call[0]);
40595 
40596   call[1] = simple_return_rtx;
40597 
40598   insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (2, call));
40599   insn = emit_call_insn (insn);
40600 
40601   /* Note use of the TOC register.  */
40602   use_reg (&CALL_INSN_FUNCTION_USAGE (insn), gen_rtx_REG (Pmode, TOC_REGNUM));
40603 }
40604 
40605 /* Return whether we need to always update the saved TOC pointer when we update
40606    the stack pointer.  */
40607 
40608 static bool
rs6000_save_toc_in_prologue_p(void)40609 rs6000_save_toc_in_prologue_p (void)
40610 {
40611   return (cfun && cfun->machine && cfun->machine->save_toc_in_prologue);
40612 }
40613 
40614 #ifdef HAVE_GAS_HIDDEN
40615 # define USE_HIDDEN_LINKONCE 1
40616 #else
40617 # define USE_HIDDEN_LINKONCE 0
40618 #endif
40619 
40620 /* Fills in the label name that should be used for a 476 link stack thunk.  */
40621 
40622 void
get_ppc476_thunk_name(char name[32])40623 get_ppc476_thunk_name (char name[32])
40624 {
40625   gcc_assert (TARGET_LINK_STACK);
40626 
40627   if (USE_HIDDEN_LINKONCE)
40628     sprintf (name, "__ppc476.get_thunk");
40629   else
40630     ASM_GENERATE_INTERNAL_LABEL (name, "LPPC476_", 0);
40631 }
40632 
40633 /* This function emits the simple thunk routine that is used to preserve
40634    the link stack on the 476 cpu.  */
40635 
40636 static void rs6000_code_end (void) ATTRIBUTE_UNUSED;
40637 static void
rs6000_code_end(void)40638 rs6000_code_end (void)
40639 {
40640   char name[32];
40641   tree decl;
40642 
40643   if (!TARGET_LINK_STACK)
40644     return;
40645 
40646   get_ppc476_thunk_name (name);
40647 
40648   decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL, get_identifier (name),
40649 		     build_function_type_list (void_type_node, NULL_TREE));
40650   DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
40651 				   NULL_TREE, void_type_node);
40652   TREE_PUBLIC (decl) = 1;
40653   TREE_STATIC (decl) = 1;
40654 
40655 #if RS6000_WEAK
40656   if (USE_HIDDEN_LINKONCE && !TARGET_XCOFF)
40657     {
40658       cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
40659       targetm.asm_out.unique_section (decl, 0);
40660       switch_to_section (get_named_section (decl, NULL, 0));
40661       DECL_WEAK (decl) = 1;
40662       ASM_WEAKEN_DECL (asm_out_file, decl, name, 0);
40663       targetm.asm_out.globalize_label (asm_out_file, name);
40664       targetm.asm_out.assemble_visibility (decl, VISIBILITY_HIDDEN);
40665       ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
40666     }
40667   else
40668 #endif
40669     {
40670       switch_to_section (text_section);
40671       ASM_OUTPUT_LABEL (asm_out_file, name);
40672     }
40673 
40674   DECL_INITIAL (decl) = make_node (BLOCK);
40675   current_function_decl = decl;
40676   allocate_struct_function (decl, false);
40677   init_function_start (decl);
40678   first_function_block_is_cold = false;
40679   /* Make sure unwind info is emitted for the thunk if needed.  */
40680   final_start_function (emit_barrier (), asm_out_file, 1);
40681 
40682   fputs ("\tblr\n", asm_out_file);
40683 
40684   final_end_function ();
40685   init_insn_lengths ();
40686   free_after_compilation (cfun);
40687   set_cfun (NULL);
40688   current_function_decl = NULL;
40689 }
40690 
40691 /* Add r30 to hard reg set if the prologue sets it up and it is not
40692    pic_offset_table_rtx.  */
40693 
40694 static void
rs6000_set_up_by_prologue(struct hard_reg_set_container * set)40695 rs6000_set_up_by_prologue (struct hard_reg_set_container *set)
40696 {
40697   if (!TARGET_SINGLE_PIC_BASE
40698       && TARGET_TOC
40699       && TARGET_MINIMAL_TOC
40700       && !constant_pool_empty_p ())
40701     add_to_hard_reg_set (&set->set, Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
40702   if (cfun->machine->split_stack_argp_used)
40703     add_to_hard_reg_set (&set->set, Pmode, 12);
40704 }
40705 
40706 
40707 /* Helper function for rs6000_split_logical to emit a logical instruction after
40708    spliting the operation to single GPR registers.
40709 
40710    DEST is the destination register.
40711    OP1 and OP2 are the input source registers.
40712    CODE is the base operation (AND, IOR, XOR, NOT).
40713    MODE is the machine mode.
40714    If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
40715    If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
40716    If COMPLEMENT_OP2_P is true, wrap operand2 with NOT.  */
40717 
40718 static void
rs6000_split_logical_inner(rtx dest,rtx op1,rtx op2,enum rtx_code code,machine_mode mode,bool complement_final_p,bool complement_op1_p,bool complement_op2_p)40719 rs6000_split_logical_inner (rtx dest,
40720 			    rtx op1,
40721 			    rtx op2,
40722 			    enum rtx_code code,
40723 			    machine_mode mode,
40724 			    bool complement_final_p,
40725 			    bool complement_op1_p,
40726 			    bool complement_op2_p)
40727 {
40728   rtx bool_rtx;
40729 
40730   /* Optimize AND of 0/0xffffffff and IOR/XOR of 0.  */
40731   if (op2 && GET_CODE (op2) == CONST_INT
40732       && (mode == SImode || (mode == DImode && TARGET_POWERPC64))
40733       && !complement_final_p && !complement_op1_p && !complement_op2_p)
40734     {
40735       HOST_WIDE_INT mask = GET_MODE_MASK (mode);
40736       HOST_WIDE_INT value = INTVAL (op2) & mask;
40737 
40738       /* Optimize AND of 0 to just set 0.  Optimize AND of -1 to be a move.  */
40739       if (code == AND)
40740 	{
40741 	  if (value == 0)
40742 	    {
40743 	      emit_insn (gen_rtx_SET (dest, const0_rtx));
40744 	      return;
40745 	    }
40746 
40747 	  else if (value == mask)
40748 	    {
40749 	      if (!rtx_equal_p (dest, op1))
40750 		emit_insn (gen_rtx_SET (dest, op1));
40751 	      return;
40752 	    }
40753 	}
40754 
40755       /* Optimize IOR/XOR of 0 to be a simple move.  Split large operations
40756 	 into separate ORI/ORIS or XORI/XORIS instrucitons.  */
40757       else if (code == IOR || code == XOR)
40758 	{
40759 	  if (value == 0)
40760 	    {
40761 	      if (!rtx_equal_p (dest, op1))
40762 		emit_insn (gen_rtx_SET (dest, op1));
40763 	      return;
40764 	    }
40765 	}
40766     }
40767 
40768   if (code == AND && mode == SImode
40769       && !complement_final_p && !complement_op1_p && !complement_op2_p)
40770     {
40771       emit_insn (gen_andsi3 (dest, op1, op2));
40772       return;
40773     }
40774 
40775   if (complement_op1_p)
40776     op1 = gen_rtx_NOT (mode, op1);
40777 
40778   if (complement_op2_p)
40779     op2 = gen_rtx_NOT (mode, op2);
40780 
40781   /* For canonical RTL, if only one arm is inverted it is the first.  */
40782   if (!complement_op1_p && complement_op2_p)
40783     std::swap (op1, op2);
40784 
40785   bool_rtx = ((code == NOT)
40786 	      ? gen_rtx_NOT (mode, op1)
40787 	      : gen_rtx_fmt_ee (code, mode, op1, op2));
40788 
40789   if (complement_final_p)
40790     bool_rtx = gen_rtx_NOT (mode, bool_rtx);
40791 
40792   emit_insn (gen_rtx_SET (dest, bool_rtx));
40793 }
40794 
40795 /* Split a DImode AND/IOR/XOR with a constant on a 32-bit system.  These
40796    operations are split immediately during RTL generation to allow for more
40797    optimizations of the AND/IOR/XOR.
40798 
40799    OPERANDS is an array containing the destination and two input operands.
40800    CODE is the base operation (AND, IOR, XOR, NOT).
40801    MODE is the machine mode.
40802    If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
40803    If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
40804    If COMPLEMENT_OP2_P is true, wrap operand2 with NOT.
40805    CLOBBER_REG is either NULL or a scratch register of type CC to allow
40806    formation of the AND instructions.  */
40807 
40808 static void
rs6000_split_logical_di(rtx operands[3],enum rtx_code code,bool complement_final_p,bool complement_op1_p,bool complement_op2_p)40809 rs6000_split_logical_di (rtx operands[3],
40810 			 enum rtx_code code,
40811 			 bool complement_final_p,
40812 			 bool complement_op1_p,
40813 			 bool complement_op2_p)
40814 {
40815   const HOST_WIDE_INT lower_32bits = HOST_WIDE_INT_C(0xffffffff);
40816   const HOST_WIDE_INT upper_32bits = ~ lower_32bits;
40817   const HOST_WIDE_INT sign_bit = HOST_WIDE_INT_C(0x80000000);
40818   enum hi_lo { hi = 0, lo = 1 };
40819   rtx op0_hi_lo[2], op1_hi_lo[2], op2_hi_lo[2];
40820   size_t i;
40821 
40822   op0_hi_lo[hi] = gen_highpart (SImode, operands[0]);
40823   op1_hi_lo[hi] = gen_highpart (SImode, operands[1]);
40824   op0_hi_lo[lo] = gen_lowpart (SImode, operands[0]);
40825   op1_hi_lo[lo] = gen_lowpart (SImode, operands[1]);
40826 
40827   if (code == NOT)
40828     op2_hi_lo[hi] = op2_hi_lo[lo] = NULL_RTX;
40829   else
40830     {
40831       if (GET_CODE (operands[2]) != CONST_INT)
40832 	{
40833 	  op2_hi_lo[hi] = gen_highpart_mode (SImode, DImode, operands[2]);
40834 	  op2_hi_lo[lo] = gen_lowpart (SImode, operands[2]);
40835 	}
40836       else
40837 	{
40838 	  HOST_WIDE_INT value = INTVAL (operands[2]);
40839 	  HOST_WIDE_INT value_hi_lo[2];
40840 
40841 	  gcc_assert (!complement_final_p);
40842 	  gcc_assert (!complement_op1_p);
40843 	  gcc_assert (!complement_op2_p);
40844 
40845 	  value_hi_lo[hi] = value >> 32;
40846 	  value_hi_lo[lo] = value & lower_32bits;
40847 
40848 	  for (i = 0; i < 2; i++)
40849 	    {
40850 	      HOST_WIDE_INT sub_value = value_hi_lo[i];
40851 
40852 	      if (sub_value & sign_bit)
40853 		sub_value |= upper_32bits;
40854 
40855 	      op2_hi_lo[i] = GEN_INT (sub_value);
40856 
40857 	      /* If this is an AND instruction, check to see if we need to load
40858 		 the value in a register.  */
40859 	      if (code == AND && sub_value != -1 && sub_value != 0
40860 		  && !and_operand (op2_hi_lo[i], SImode))
40861 		op2_hi_lo[i] = force_reg (SImode, op2_hi_lo[i]);
40862 	    }
40863 	}
40864     }
40865 
40866   for (i = 0; i < 2; i++)
40867     {
40868       /* Split large IOR/XOR operations.  */
40869       if ((code == IOR || code == XOR)
40870 	  && GET_CODE (op2_hi_lo[i]) == CONST_INT
40871 	  && !complement_final_p
40872 	  && !complement_op1_p
40873 	  && !complement_op2_p
40874 	  && !logical_const_operand (op2_hi_lo[i], SImode))
40875 	{
40876 	  HOST_WIDE_INT value = INTVAL (op2_hi_lo[i]);
40877 	  HOST_WIDE_INT hi_16bits = value & HOST_WIDE_INT_C(0xffff0000);
40878 	  HOST_WIDE_INT lo_16bits = value & HOST_WIDE_INT_C(0x0000ffff);
40879 	  rtx tmp = gen_reg_rtx (SImode);
40880 
40881 	  /* Make sure the constant is sign extended.  */
40882 	  if ((hi_16bits & sign_bit) != 0)
40883 	    hi_16bits |= upper_32bits;
40884 
40885 	  rs6000_split_logical_inner (tmp, op1_hi_lo[i], GEN_INT (hi_16bits),
40886 				      code, SImode, false, false, false);
40887 
40888 	  rs6000_split_logical_inner (op0_hi_lo[i], tmp, GEN_INT (lo_16bits),
40889 				      code, SImode, false, false, false);
40890 	}
40891       else
40892 	rs6000_split_logical_inner (op0_hi_lo[i], op1_hi_lo[i], op2_hi_lo[i],
40893 				    code, SImode, complement_final_p,
40894 				    complement_op1_p, complement_op2_p);
40895     }
40896 
40897   return;
40898 }
40899 
40900 /* Split the insns that make up boolean operations operating on multiple GPR
40901    registers.  The boolean MD patterns ensure that the inputs either are
40902    exactly the same as the output registers, or there is no overlap.
40903 
40904    OPERANDS is an array containing the destination and two input operands.
40905    CODE is the base operation (AND, IOR, XOR, NOT).
40906    If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
40907    If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
40908    If COMPLEMENT_OP2_P is true, wrap operand2 with NOT.  */
40909 
40910 void
rs6000_split_logical(rtx operands[3],enum rtx_code code,bool complement_final_p,bool complement_op1_p,bool complement_op2_p)40911 rs6000_split_logical (rtx operands[3],
40912 		      enum rtx_code code,
40913 		      bool complement_final_p,
40914 		      bool complement_op1_p,
40915 		      bool complement_op2_p)
40916 {
40917   machine_mode mode = GET_MODE (operands[0]);
40918   machine_mode sub_mode;
40919   rtx op0, op1, op2;
40920   int sub_size, regno0, regno1, nregs, i;
40921 
40922   /* If this is DImode, use the specialized version that can run before
40923      register allocation.  */
40924   if (mode == DImode && !TARGET_POWERPC64)
40925     {
40926       rs6000_split_logical_di (operands, code, complement_final_p,
40927 			       complement_op1_p, complement_op2_p);
40928       return;
40929     }
40930 
40931   op0 = operands[0];
40932   op1 = operands[1];
40933   op2 = (code == NOT) ? NULL_RTX : operands[2];
40934   sub_mode = (TARGET_POWERPC64) ? DImode : SImode;
40935   sub_size = GET_MODE_SIZE (sub_mode);
40936   regno0 = REGNO (op0);
40937   regno1 = REGNO (op1);
40938 
40939   gcc_assert (reload_completed);
40940   gcc_assert (IN_RANGE (regno0, FIRST_GPR_REGNO, LAST_GPR_REGNO));
40941   gcc_assert (IN_RANGE (regno1, FIRST_GPR_REGNO, LAST_GPR_REGNO));
40942 
40943   nregs = rs6000_hard_regno_nregs[(int)mode][regno0];
40944   gcc_assert (nregs > 1);
40945 
40946   if (op2 && REG_P (op2))
40947     gcc_assert (IN_RANGE (REGNO (op2), FIRST_GPR_REGNO, LAST_GPR_REGNO));
40948 
40949   for (i = 0; i < nregs; i++)
40950     {
40951       int offset = i * sub_size;
40952       rtx sub_op0 = simplify_subreg (sub_mode, op0, mode, offset);
40953       rtx sub_op1 = simplify_subreg (sub_mode, op1, mode, offset);
40954       rtx sub_op2 = ((code == NOT)
40955 		     ? NULL_RTX
40956 		     : simplify_subreg (sub_mode, op2, mode, offset));
40957 
40958       rs6000_split_logical_inner (sub_op0, sub_op1, sub_op2, code, sub_mode,
40959 				  complement_final_p, complement_op1_p,
40960 				  complement_op2_p);
40961     }
40962 
40963   return;
40964 }
40965 
40966 
40967 /* Return true if the peephole2 can combine a load involving a combination of
40968    an addis instruction and a load with an offset that can be fused together on
40969    a power8.  */
40970 
40971 bool
fusion_gpr_load_p(rtx addis_reg,rtx addis_value,rtx target,rtx mem)40972 fusion_gpr_load_p (rtx addis_reg,	/* register set via addis.  */
40973 		   rtx addis_value,	/* addis value.  */
40974 		   rtx target,		/* target register that is loaded.  */
40975 		   rtx mem)		/* bottom part of the memory addr. */
40976 {
40977   rtx addr;
40978   rtx base_reg;
40979 
40980   /* Validate arguments.  */
40981   if (!base_reg_operand (addis_reg, GET_MODE (addis_reg)))
40982     return false;
40983 
40984   if (!base_reg_operand (target, GET_MODE (target)))
40985     return false;
40986 
40987   if (!fusion_gpr_addis (addis_value, GET_MODE (addis_value)))
40988     return false;
40989 
40990   /* Allow sign/zero extension.  */
40991   if (GET_CODE (mem) == ZERO_EXTEND
40992       || (GET_CODE (mem) == SIGN_EXTEND && TARGET_P8_FUSION_SIGN))
40993     mem = XEXP (mem, 0);
40994 
40995   if (!MEM_P (mem))
40996     return false;
40997 
40998   if (!fusion_gpr_mem_load (mem, GET_MODE (mem)))
40999     return false;
41000 
41001   addr = XEXP (mem, 0);			/* either PLUS or LO_SUM.  */
41002   if (GET_CODE (addr) != PLUS && GET_CODE (addr) != LO_SUM)
41003     return false;
41004 
41005   /* Validate that the register used to load the high value is either the
41006      register being loaded, or we can safely replace its use.
41007 
41008      This function is only called from the peephole2 pass and we assume that
41009      there are 2 instructions in the peephole (addis and load), so we want to
41010      check if the target register was not used in the memory address and the
41011      register to hold the addis result is dead after the peephole.  */
41012   if (REGNO (addis_reg) != REGNO (target))
41013     {
41014       if (reg_mentioned_p (target, mem))
41015 	return false;
41016 
41017       if (!peep2_reg_dead_p (2, addis_reg))
41018 	return false;
41019 
41020       /* If the target register being loaded is the stack pointer, we must
41021          avoid loading any other value into it, even temporarily.  */
41022       if (REG_P (target) && REGNO (target) == STACK_POINTER_REGNUM)
41023 	return false;
41024     }
41025 
41026   base_reg = XEXP (addr, 0);
41027   return REGNO (addis_reg) == REGNO (base_reg);
41028 }
41029 
41030 /* During the peephole2 pass, adjust and expand the insns for a load fusion
41031    sequence.  We adjust the addis register to use the target register.  If the
41032    load sign extends, we adjust the code to do the zero extending load, and an
41033    explicit sign extension later since the fusion only covers zero extending
41034    loads.
41035 
41036    The operands are:
41037 	operands[0]	register set with addis (to be replaced with target)
41038 	operands[1]	value set via addis
41039 	operands[2]	target register being loaded
41040 	operands[3]	D-form memory reference using operands[0].  */
41041 
41042 void
expand_fusion_gpr_load(rtx * operands)41043 expand_fusion_gpr_load (rtx *operands)
41044 {
41045   rtx addis_value = operands[1];
41046   rtx target = operands[2];
41047   rtx orig_mem = operands[3];
41048   rtx  new_addr, new_mem, orig_addr, offset;
41049   enum rtx_code plus_or_lo_sum;
41050   machine_mode target_mode = GET_MODE (target);
41051   machine_mode extend_mode = target_mode;
41052   machine_mode ptr_mode = Pmode;
41053   enum rtx_code extend = UNKNOWN;
41054 
41055   if (GET_CODE (orig_mem) == ZERO_EXTEND
41056       || (TARGET_P8_FUSION_SIGN && GET_CODE (orig_mem) == SIGN_EXTEND))
41057     {
41058       extend = GET_CODE (orig_mem);
41059       orig_mem = XEXP (orig_mem, 0);
41060       target_mode = GET_MODE (orig_mem);
41061     }
41062 
41063   gcc_assert (MEM_P (orig_mem));
41064 
41065   orig_addr = XEXP (orig_mem, 0);
41066   plus_or_lo_sum = GET_CODE (orig_addr);
41067   gcc_assert (plus_or_lo_sum == PLUS || plus_or_lo_sum == LO_SUM);
41068 
41069   offset = XEXP (orig_addr, 1);
41070   new_addr = gen_rtx_fmt_ee (plus_or_lo_sum, ptr_mode, addis_value, offset);
41071   new_mem = replace_equiv_address_nv (orig_mem, new_addr, false);
41072 
41073   if (extend != UNKNOWN)
41074     new_mem = gen_rtx_fmt_e (ZERO_EXTEND, extend_mode, new_mem);
41075 
41076   new_mem = gen_rtx_UNSPEC (extend_mode, gen_rtvec (1, new_mem),
41077 			    UNSPEC_FUSION_GPR);
41078   emit_insn (gen_rtx_SET (target, new_mem));
41079 
41080   if (extend == SIGN_EXTEND)
41081     {
41082       int sub_off = ((BYTES_BIG_ENDIAN)
41083 		     ? GET_MODE_SIZE (extend_mode) - GET_MODE_SIZE (target_mode)
41084 		     : 0);
41085       rtx sign_reg
41086 	= simplify_subreg (target_mode, target, extend_mode, sub_off);
41087 
41088       emit_insn (gen_rtx_SET (target,
41089 			      gen_rtx_SIGN_EXTEND (extend_mode, sign_reg)));
41090     }
41091 
41092   return;
41093 }
41094 
41095 /* Emit the addis instruction that will be part of a fused instruction
41096    sequence.  */
41097 
41098 void
emit_fusion_addis(rtx target,rtx addis_value,const char * comment,const char * mode_name)41099 emit_fusion_addis (rtx target, rtx addis_value, const char *comment,
41100 		   const char *mode_name)
41101 {
41102   rtx fuse_ops[10];
41103   char insn_template[80];
41104   const char *addis_str = NULL;
41105   const char *comment_str = ASM_COMMENT_START;
41106 
41107   if (*comment_str == ' ')
41108     comment_str++;
41109 
41110   /* Emit the addis instruction.  */
41111   fuse_ops[0] = target;
41112   if (satisfies_constraint_L (addis_value))
41113     {
41114       fuse_ops[1] = addis_value;
41115       addis_str = "lis %0,%v1";
41116     }
41117 
41118   else if (GET_CODE (addis_value) == PLUS)
41119     {
41120       rtx op0 = XEXP (addis_value, 0);
41121       rtx op1 = XEXP (addis_value, 1);
41122 
41123       if (REG_P (op0) && CONST_INT_P (op1)
41124 	  && satisfies_constraint_L (op1))
41125 	{
41126 	  fuse_ops[1] = op0;
41127 	  fuse_ops[2] = op1;
41128 	  addis_str = "addis %0,%1,%v2";
41129 	}
41130     }
41131 
41132   else if (GET_CODE (addis_value) == HIGH)
41133     {
41134       rtx value = XEXP (addis_value, 0);
41135       if (GET_CODE (value) == UNSPEC && XINT (value, 1) == UNSPEC_TOCREL)
41136 	{
41137 	  fuse_ops[1] = XVECEXP (value, 0, 0);		/* symbol ref.  */
41138 	  fuse_ops[2] = XVECEXP (value, 0, 1);		/* TOC register.  */
41139 	  if (TARGET_ELF)
41140 	    addis_str = "addis %0,%2,%1@toc@ha";
41141 
41142 	  else if (TARGET_XCOFF)
41143 	    addis_str = "addis %0,%1@u(%2)";
41144 
41145 	  else
41146 	    gcc_unreachable ();
41147 	}
41148 
41149       else if (GET_CODE (value) == PLUS)
41150 	{
41151 	  rtx op0 = XEXP (value, 0);
41152 	  rtx op1 = XEXP (value, 1);
41153 
41154 	  if (GET_CODE (op0) == UNSPEC
41155 	      && XINT (op0, 1) == UNSPEC_TOCREL
41156 	      && CONST_INT_P (op1))
41157 	    {
41158 	      fuse_ops[1] = XVECEXP (op0, 0, 0);	/* symbol ref.  */
41159 	      fuse_ops[2] = XVECEXP (op0, 0, 1);	/* TOC register.  */
41160 	      fuse_ops[3] = op1;
41161 	      if (TARGET_ELF)
41162 		addis_str = "addis %0,%2,%1+%3@toc@ha";
41163 
41164 	      else if (TARGET_XCOFF)
41165 		addis_str = "addis %0,%1+%3@u(%2)";
41166 
41167 	      else
41168 		gcc_unreachable ();
41169 	    }
41170 	}
41171 
41172       else if (satisfies_constraint_L (value))
41173 	{
41174 	  fuse_ops[1] = value;
41175 	  addis_str = "lis %0,%v1";
41176 	}
41177 
41178       else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (value))
41179 	{
41180 	  fuse_ops[1] = value;
41181 	  addis_str = "lis %0,%1@ha";
41182 	}
41183     }
41184 
41185   if (!addis_str)
41186     fatal_insn ("Could not generate addis value for fusion", addis_value);
41187 
41188   sprintf (insn_template, "%s\t\t%s %s, type %s", addis_str, comment_str,
41189 	   comment, mode_name);
41190   output_asm_insn (insn_template, fuse_ops);
41191 }
41192 
41193 /* Emit a D-form load or store instruction that is the second instruction
41194    of a fusion sequence.  */
41195 
41196 void
emit_fusion_load_store(rtx load_store_reg,rtx addis_reg,rtx offset,const char * insn_str)41197 emit_fusion_load_store (rtx load_store_reg, rtx addis_reg, rtx offset,
41198 			const char *insn_str)
41199 {
41200   rtx fuse_ops[10];
41201   char insn_template[80];
41202 
41203   fuse_ops[0] = load_store_reg;
41204   fuse_ops[1] = addis_reg;
41205 
41206   if (CONST_INT_P (offset) && satisfies_constraint_I (offset))
41207     {
41208       sprintf (insn_template, "%s %%0,%%2(%%1)", insn_str);
41209       fuse_ops[2] = offset;
41210       output_asm_insn (insn_template, fuse_ops);
41211     }
41212 
41213   else if (GET_CODE (offset) == UNSPEC
41214 	   && XINT (offset, 1) == UNSPEC_TOCREL)
41215     {
41216       if (TARGET_ELF)
41217 	sprintf (insn_template, "%s %%0,%%2@toc@l(%%1)", insn_str);
41218 
41219       else if (TARGET_XCOFF)
41220 	sprintf (insn_template, "%s %%0,%%2@l(%%1)", insn_str);
41221 
41222       else
41223 	gcc_unreachable ();
41224 
41225       fuse_ops[2] = XVECEXP (offset, 0, 0);
41226       output_asm_insn (insn_template, fuse_ops);
41227     }
41228 
41229   else if (GET_CODE (offset) == PLUS
41230 	   && GET_CODE (XEXP (offset, 0)) == UNSPEC
41231 	   && XINT (XEXP (offset, 0), 1) == UNSPEC_TOCREL
41232 	   && CONST_INT_P (XEXP (offset, 1)))
41233     {
41234       rtx tocrel_unspec = XEXP (offset, 0);
41235       if (TARGET_ELF)
41236 	sprintf (insn_template, "%s %%0,%%2+%%3@toc@l(%%1)", insn_str);
41237 
41238       else if (TARGET_XCOFF)
41239 	sprintf (insn_template, "%s %%0,%%2+%%3@l(%%1)", insn_str);
41240 
41241       else
41242 	gcc_unreachable ();
41243 
41244       fuse_ops[2] = XVECEXP (tocrel_unspec, 0, 0);
41245       fuse_ops[3] = XEXP (offset, 1);
41246       output_asm_insn (insn_template, fuse_ops);
41247     }
41248 
41249   else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (offset))
41250     {
41251       sprintf (insn_template, "%s %%0,%%2@l(%%1)", insn_str);
41252 
41253       fuse_ops[2] = offset;
41254       output_asm_insn (insn_template, fuse_ops);
41255     }
41256 
41257   else
41258     fatal_insn ("Unable to generate load/store offset for fusion", offset);
41259 
41260   return;
41261 }
41262 
41263 /* Wrap a TOC address that can be fused to indicate that special fusion
41264    processing is needed.  */
41265 
41266 rtx
fusion_wrap_memory_address(rtx old_mem)41267 fusion_wrap_memory_address (rtx old_mem)
41268 {
41269   rtx old_addr = XEXP (old_mem, 0);
41270   rtvec v = gen_rtvec (1, old_addr);
41271   rtx new_addr = gen_rtx_UNSPEC (Pmode, v, UNSPEC_FUSION_ADDIS);
41272   return replace_equiv_address_nv (old_mem, new_addr, false);
41273 }
41274 
41275 /* Given an address, convert it into the addis and load offset parts.  Addresses
41276    created during the peephole2 process look like:
41277 	(lo_sum (high (unspec [(sym)] UNSPEC_TOCREL))
41278 		(unspec [(...)] UNSPEC_TOCREL))
41279 
41280    Addresses created via toc fusion look like:
41281 	(unspec [(unspec [(...)] UNSPEC_TOCREL)] UNSPEC_FUSION_ADDIS))  */
41282 
41283 static void
fusion_split_address(rtx addr,rtx * p_hi,rtx * p_lo)41284 fusion_split_address (rtx addr, rtx *p_hi, rtx *p_lo)
41285 {
41286   rtx hi, lo;
41287 
41288   if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_FUSION_ADDIS)
41289     {
41290       lo = XVECEXP (addr, 0, 0);
41291       hi = gen_rtx_HIGH (Pmode, lo);
41292     }
41293   else if (GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM)
41294     {
41295       hi = XEXP (addr, 0);
41296       lo = XEXP (addr, 1);
41297     }
41298   else
41299     gcc_unreachable ();
41300 
41301   *p_hi = hi;
41302   *p_lo = lo;
41303 }
41304 
41305 /* Return a string to fuse an addis instruction with a gpr load to the same
41306    register that we loaded up the addis instruction.  The address that is used
41307    is the logical address that was formed during peephole2:
41308 	(lo_sum (high) (low-part))
41309 
41310    Or the address is the TOC address that is wrapped before register allocation:
41311 	(unspec [(addr) (toc-reg)] UNSPEC_FUSION_ADDIS)
41312 
41313    The code is complicated, so we call output_asm_insn directly, and just
41314    return "".  */
41315 
41316 const char *
emit_fusion_gpr_load(rtx target,rtx mem)41317 emit_fusion_gpr_load (rtx target, rtx mem)
41318 {
41319   rtx addis_value;
41320   rtx addr;
41321   rtx load_offset;
41322   const char *load_str = NULL;
41323   const char *mode_name = NULL;
41324   machine_mode mode;
41325 
41326   if (GET_CODE (mem) == ZERO_EXTEND)
41327     mem = XEXP (mem, 0);
41328 
41329   gcc_assert (REG_P (target) && MEM_P (mem));
41330 
41331   addr = XEXP (mem, 0);
41332   fusion_split_address (addr, &addis_value, &load_offset);
41333 
41334   /* Now emit the load instruction to the same register.  */
41335   mode = GET_MODE (mem);
41336   switch (mode)
41337     {
41338     case E_QImode:
41339       mode_name = "char";
41340       load_str = "lbz";
41341       break;
41342 
41343     case E_HImode:
41344       mode_name = "short";
41345       load_str = "lhz";
41346       break;
41347 
41348     case E_SImode:
41349     case E_SFmode:
41350       mode_name = (mode == SFmode) ? "float" : "int";
41351       load_str = "lwz";
41352       break;
41353 
41354     case E_DImode:
41355     case E_DFmode:
41356       gcc_assert (TARGET_POWERPC64);
41357       mode_name = (mode == DFmode) ? "double" : "long";
41358       load_str = "ld";
41359       break;
41360 
41361     default:
41362       fatal_insn ("Bad GPR fusion", gen_rtx_SET (target, mem));
41363     }
41364 
41365   /* Emit the addis instruction.  */
41366   emit_fusion_addis (target, addis_value, "gpr load fusion", mode_name);
41367 
41368   /* Emit the D-form load instruction.  */
41369   emit_fusion_load_store (target, target, load_offset, load_str);
41370 
41371   return "";
41372 }
41373 
41374 
41375 /* Return true if the peephole2 can combine a load/store involving a
41376    combination of an addis instruction and the memory operation.  This was
41377    added to the ISA 3.0 (power9) hardware.  */
41378 
41379 bool
fusion_p9_p(rtx addis_reg,rtx addis_value,rtx dest,rtx src)41380 fusion_p9_p (rtx addis_reg,		/* register set via addis.  */
41381 	     rtx addis_value,		/* addis value.  */
41382 	     rtx dest,			/* destination (memory or register). */
41383 	     rtx src)			/* source (register or memory).  */
41384 {
41385   rtx addr, mem, offset;
41386   machine_mode mode = GET_MODE (src);
41387 
41388   /* Validate arguments.  */
41389   if (!base_reg_operand (addis_reg, GET_MODE (addis_reg)))
41390     return false;
41391 
41392   if (!fusion_gpr_addis (addis_value, GET_MODE (addis_value)))
41393     return false;
41394 
41395   /* Ignore extend operations that are part of the load.  */
41396   if (GET_CODE (src) == FLOAT_EXTEND || GET_CODE (src) == ZERO_EXTEND)
41397     src = XEXP (src, 0);
41398 
41399   /* Test for memory<-register or register<-memory.  */
41400   if (fpr_reg_operand (src, mode) || int_reg_operand (src, mode))
41401     {
41402       if (!MEM_P (dest))
41403 	return false;
41404 
41405       mem = dest;
41406     }
41407 
41408   else if (MEM_P (src))
41409     {
41410       if (!fpr_reg_operand (dest, mode) && !int_reg_operand (dest, mode))
41411 	return false;
41412 
41413       mem = src;
41414     }
41415 
41416   else
41417     return false;
41418 
41419   addr = XEXP (mem, 0);			/* either PLUS or LO_SUM.  */
41420   if (GET_CODE (addr) == PLUS)
41421     {
41422       if (!rtx_equal_p (addis_reg, XEXP (addr, 0)))
41423 	return false;
41424 
41425       return satisfies_constraint_I (XEXP (addr, 1));
41426     }
41427 
41428   else if (GET_CODE (addr) == LO_SUM)
41429     {
41430       if (!rtx_equal_p (addis_reg, XEXP (addr, 0)))
41431 	return false;
41432 
41433       offset = XEXP (addr, 1);
41434       if (TARGET_XCOFF || (TARGET_ELF && TARGET_POWERPC64))
41435 	return small_toc_ref (offset, GET_MODE (offset));
41436 
41437       else if (TARGET_ELF && !TARGET_POWERPC64)
41438 	return CONSTANT_P (offset);
41439     }
41440 
41441   return false;
41442 }
41443 
41444 /* During the peephole2 pass, adjust and expand the insns for an extended fusion
41445    load sequence.
41446 
41447    The operands are:
41448 	operands[0]	register set with addis
41449 	operands[1]	value set via addis
41450 	operands[2]	target register being loaded
41451 	operands[3]	D-form memory reference using operands[0].
41452 
41453   This is similar to the fusion introduced with power8, except it scales to
41454   both loads/stores and does not require the result register to be the same as
41455   the base register.  At the moment, we only do this if register set with addis
41456   is dead.  */
41457 
41458 void
expand_fusion_p9_load(rtx * operands)41459 expand_fusion_p9_load (rtx *operands)
41460 {
41461   rtx tmp_reg = operands[0];
41462   rtx addis_value = operands[1];
41463   rtx target = operands[2];
41464   rtx orig_mem = operands[3];
41465   rtx  new_addr, new_mem, orig_addr, offset, set, clobber, insn;
41466   enum rtx_code plus_or_lo_sum;
41467   machine_mode target_mode = GET_MODE (target);
41468   machine_mode extend_mode = target_mode;
41469   machine_mode ptr_mode = Pmode;
41470   enum rtx_code extend = UNKNOWN;
41471 
41472   if (GET_CODE (orig_mem) == FLOAT_EXTEND || GET_CODE (orig_mem) == ZERO_EXTEND)
41473     {
41474       extend = GET_CODE (orig_mem);
41475       orig_mem = XEXP (orig_mem, 0);
41476       target_mode = GET_MODE (orig_mem);
41477     }
41478 
41479   gcc_assert (MEM_P (orig_mem));
41480 
41481   orig_addr = XEXP (orig_mem, 0);
41482   plus_or_lo_sum = GET_CODE (orig_addr);
41483   gcc_assert (plus_or_lo_sum == PLUS || plus_or_lo_sum == LO_SUM);
41484 
41485   offset = XEXP (orig_addr, 1);
41486   new_addr = gen_rtx_fmt_ee (plus_or_lo_sum, ptr_mode, addis_value, offset);
41487   new_mem = replace_equiv_address_nv (orig_mem, new_addr, false);
41488 
41489   if (extend != UNKNOWN)
41490     new_mem = gen_rtx_fmt_e (extend, extend_mode, new_mem);
41491 
41492   new_mem = gen_rtx_UNSPEC (extend_mode, gen_rtvec (1, new_mem),
41493 			    UNSPEC_FUSION_P9);
41494 
41495   set = gen_rtx_SET (target, new_mem);
41496   clobber = gen_rtx_CLOBBER (VOIDmode, tmp_reg);
41497   insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber));
41498   emit_insn (insn);
41499 
41500   return;
41501 }
41502 
41503 /* During the peephole2 pass, adjust and expand the insns for an extended fusion
41504    store sequence.
41505 
41506    The operands are:
41507 	operands[0]	register set with addis
41508 	operands[1]	value set via addis
41509 	operands[2]	target D-form memory being stored to
41510 	operands[3]	register being stored
41511 
41512   This is similar to the fusion introduced with power8, except it scales to
41513   both loads/stores and does not require the result register to be the same as
41514   the base register.  At the moment, we only do this if register set with addis
41515   is dead.  */
41516 
41517 void
expand_fusion_p9_store(rtx * operands)41518 expand_fusion_p9_store (rtx *operands)
41519 {
41520   rtx tmp_reg = operands[0];
41521   rtx addis_value = operands[1];
41522   rtx orig_mem = operands[2];
41523   rtx src = operands[3];
41524   rtx  new_addr, new_mem, orig_addr, offset, set, clobber, insn, new_src;
41525   enum rtx_code plus_or_lo_sum;
41526   machine_mode target_mode = GET_MODE (orig_mem);
41527   machine_mode ptr_mode = Pmode;
41528 
41529   gcc_assert (MEM_P (orig_mem));
41530 
41531   orig_addr = XEXP (orig_mem, 0);
41532   plus_or_lo_sum = GET_CODE (orig_addr);
41533   gcc_assert (plus_or_lo_sum == PLUS || plus_or_lo_sum == LO_SUM);
41534 
41535   offset = XEXP (orig_addr, 1);
41536   new_addr = gen_rtx_fmt_ee (plus_or_lo_sum, ptr_mode, addis_value, offset);
41537   new_mem = replace_equiv_address_nv (orig_mem, new_addr, false);
41538 
41539   new_src = gen_rtx_UNSPEC (target_mode, gen_rtvec (1, src),
41540 			    UNSPEC_FUSION_P9);
41541 
41542   set = gen_rtx_SET (new_mem, new_src);
41543   clobber = gen_rtx_CLOBBER (VOIDmode, tmp_reg);
41544   insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber));
41545   emit_insn (insn);
41546 
41547   return;
41548 }
41549 
41550 /* Return a string to fuse an addis instruction with a load using extended
41551    fusion.  The address that is used is the logical address that was formed
41552    during peephole2: (lo_sum (high) (low-part))
41553 
41554    The code is complicated, so we call output_asm_insn directly, and just
41555    return "".  */
41556 
41557 const char *
emit_fusion_p9_load(rtx reg,rtx mem,rtx tmp_reg)41558 emit_fusion_p9_load (rtx reg, rtx mem, rtx tmp_reg)
41559 {
41560   machine_mode mode = GET_MODE (reg);
41561   rtx hi;
41562   rtx lo;
41563   rtx addr;
41564   const char *load_string;
41565   int r;
41566 
41567   if (GET_CODE (mem) == FLOAT_EXTEND || GET_CODE (mem) == ZERO_EXTEND)
41568     {
41569       mem = XEXP (mem, 0);
41570       mode = GET_MODE (mem);
41571     }
41572 
41573   if (GET_CODE (reg) == SUBREG)
41574     {
41575       gcc_assert (SUBREG_BYTE (reg) == 0);
41576       reg = SUBREG_REG (reg);
41577     }
41578 
41579   if (!REG_P (reg))
41580     fatal_insn ("emit_fusion_p9_load, bad reg #1", reg);
41581 
41582   r = REGNO (reg);
41583   if (FP_REGNO_P (r))
41584     {
41585       if (mode == SFmode)
41586 	load_string = "lfs";
41587       else if (mode == DFmode || mode == DImode)
41588 	load_string = "lfd";
41589       else
41590 	gcc_unreachable ();
41591     }
41592   else if (ALTIVEC_REGNO_P (r) && TARGET_P9_DFORM_SCALAR)
41593     {
41594       if (mode == SFmode)
41595 	load_string = "lxssp";
41596       else if (mode == DFmode || mode == DImode)
41597 	load_string = "lxsd";
41598       else
41599 	gcc_unreachable ();
41600     }
41601   else if (INT_REGNO_P (r))
41602     {
41603       switch (mode)
41604 	{
41605 	case E_QImode:
41606 	  load_string = "lbz";
41607 	  break;
41608 	case E_HImode:
41609 	  load_string = "lhz";
41610 	  break;
41611 	case E_SImode:
41612 	case E_SFmode:
41613 	  load_string = "lwz";
41614 	  break;
41615 	case E_DImode:
41616 	case E_DFmode:
41617 	  if (!TARGET_POWERPC64)
41618 	    gcc_unreachable ();
41619 	  load_string = "ld";
41620 	  break;
41621 	default:
41622 	  gcc_unreachable ();
41623 	}
41624     }
41625   else
41626     fatal_insn ("emit_fusion_p9_load, bad reg #2", reg);
41627 
41628   if (!MEM_P (mem))
41629     fatal_insn ("emit_fusion_p9_load not MEM", mem);
41630 
41631   addr = XEXP (mem, 0);
41632   fusion_split_address (addr, &hi, &lo);
41633 
41634   /* Emit the addis instruction.  */
41635   emit_fusion_addis (tmp_reg, hi, "power9 load fusion", GET_MODE_NAME (mode));
41636 
41637   /* Emit the D-form load instruction.  */
41638   emit_fusion_load_store (reg, tmp_reg, lo, load_string);
41639 
41640   return "";
41641 }
41642 
41643 /* Return a string to fuse an addis instruction with a store using extended
41644    fusion.  The address that is used is the logical address that was formed
41645    during peephole2: (lo_sum (high) (low-part))
41646 
41647    The code is complicated, so we call output_asm_insn directly, and just
41648    return "".  */
41649 
41650 const char *
emit_fusion_p9_store(rtx mem,rtx reg,rtx tmp_reg)41651 emit_fusion_p9_store (rtx mem, rtx reg, rtx tmp_reg)
41652 {
41653   machine_mode mode = GET_MODE (reg);
41654   rtx hi;
41655   rtx lo;
41656   rtx addr;
41657   const char *store_string;
41658   int r;
41659 
41660   if (GET_CODE (reg) == SUBREG)
41661     {
41662       gcc_assert (SUBREG_BYTE (reg) == 0);
41663       reg = SUBREG_REG (reg);
41664     }
41665 
41666   if (!REG_P (reg))
41667     fatal_insn ("emit_fusion_p9_store, bad reg #1", reg);
41668 
41669   r = REGNO (reg);
41670   if (FP_REGNO_P (r))
41671     {
41672       if (mode == SFmode)
41673 	store_string = "stfs";
41674       else if (mode == DFmode)
41675 	store_string = "stfd";
41676       else
41677 	gcc_unreachable ();
41678     }
41679   else if (ALTIVEC_REGNO_P (r) && TARGET_P9_DFORM_SCALAR)
41680     {
41681       if (mode == SFmode)
41682 	store_string = "stxssp";
41683       else if (mode == DFmode || mode == DImode)
41684 	store_string = "stxsd";
41685       else
41686 	gcc_unreachable ();
41687     }
41688   else if (INT_REGNO_P (r))
41689     {
41690       switch (mode)
41691 	{
41692 	case E_QImode:
41693 	  store_string = "stb";
41694 	  break;
41695 	case E_HImode:
41696 	  store_string = "sth";
41697 	  break;
41698 	case E_SImode:
41699 	case E_SFmode:
41700 	  store_string = "stw";
41701 	  break;
41702 	case E_DImode:
41703 	case E_DFmode:
41704 	  if (!TARGET_POWERPC64)
41705 	    gcc_unreachable ();
41706 	  store_string = "std";
41707 	  break;
41708 	default:
41709 	  gcc_unreachable ();
41710 	}
41711     }
41712   else
41713     fatal_insn ("emit_fusion_p9_store, bad reg #2", reg);
41714 
41715   if (!MEM_P (mem))
41716     fatal_insn ("emit_fusion_p9_store not MEM", mem);
41717 
41718   addr = XEXP (mem, 0);
41719   fusion_split_address (addr, &hi, &lo);
41720 
41721   /* Emit the addis instruction.  */
41722   emit_fusion_addis (tmp_reg, hi, "power9 store fusion", GET_MODE_NAME (mode));
41723 
41724   /* Emit the D-form load instruction.  */
41725   emit_fusion_load_store (reg, tmp_reg, lo, store_string);
41726 
41727   return "";
41728 }
41729 
41730 
41731 /* Analyze vector computations and remove unnecessary doubleword
41732    swaps (xxswapdi instructions).  This pass is performed only
41733    for little-endian VSX code generation.
41734 
41735    For this specific case, loads and stores of 4x32 and 2x64 vectors
41736    are inefficient.  These are implemented using the lvx2dx and
41737    stvx2dx instructions, which invert the order of doublewords in
41738    a vector register.  Thus the code generation inserts an xxswapdi
41739    after each such load, and prior to each such store.  (For spill
41740    code after register assignment, an additional xxswapdi is inserted
41741    following each store in order to return a hard register to its
41742    unpermuted value.)
41743 
41744    The extra xxswapdi instructions reduce performance.  This can be
41745    particularly bad for vectorized code.  The purpose of this pass
41746    is to reduce the number of xxswapdi instructions required for
41747    correctness.
41748 
41749    The primary insight is that much code that operates on vectors
41750    does not care about the relative order of elements in a register,
41751    so long as the correct memory order is preserved.  If we have
41752    a computation where all input values are provided by lvxd2x/xxswapdi
41753    sequences, all outputs are stored using xxswapdi/stvxd2x sequences,
41754    and all intermediate computations are pure SIMD (independent of
41755    element order), then all the xxswapdi's associated with the loads
41756    and stores may be removed.
41757 
41758    This pass uses some of the infrastructure and logical ideas from
41759    the "web" pass in web.c.  We create maximal webs of computations
41760    fitting the description above using union-find.  Each such web is
41761    then optimized by removing its unnecessary xxswapdi instructions.
41762 
41763    The pass is placed prior to global optimization so that we can
41764    perform the optimization in the safest and simplest way possible;
41765    that is, by replacing each xxswapdi insn with a register copy insn.
41766    Subsequent forward propagation will remove copies where possible.
41767 
41768    There are some operations sensitive to element order for which we
41769    can still allow the operation, provided we modify those operations.
41770    These include CONST_VECTORs, for which we must swap the first and
41771    second halves of the constant vector; and SUBREGs, for which we
41772    must adjust the byte offset to account for the swapped doublewords.
41773    A remaining opportunity would be non-immediate-form splats, for
41774    which we should adjust the selected lane of the input.  We should
41775    also make code generation adjustments for sum-across operations,
41776    since this is a common vectorizer reduction.
41777 
41778    Because we run prior to the first split, we can see loads and stores
41779    here that match *vsx_le_perm_{load,store}_<mode>.  These are vanilla
41780    vector loads and stores that have not yet been split into a permuting
41781    load/store and a swap.  (One way this can happen is with a builtin
41782    call to vec_vsx_{ld,st}.)  We can handle these as well, but rather
41783    than deleting a swap, we convert the load/store into a permuting
41784    load/store (which effectively removes the swap).  */
41785 
41786 /* Notes on Permutes
41787 
41788    We do not currently handle computations that contain permutes.  There
41789    is a general transformation that can be performed correctly, but it
41790    may introduce more expensive code than it replaces.  To handle these
41791    would require a cost model to determine when to perform the optimization.
41792    This commentary records how this could be done if desired.
41793 
41794    The most general permute is something like this (example for V16QI):
41795 
41796    (vec_select:V16QI (vec_concat:V32QI (op1:V16QI) (op2:V16QI))
41797                      (parallel [(const_int a0) (const_int a1)
41798                                  ...
41799                                 (const_int a14) (const_int a15)]))
41800 
41801    where a0,...,a15 are in [0,31] and select elements from op1 and op2
41802    to produce in the result.
41803 
41804    Regardless of mode, we can convert the PARALLEL to a mask of 16
41805    byte-element selectors.  Let's call this M, with M[i] representing
41806    the ith byte-element selector value.  Then if we swap doublewords
41807    throughout the computation, we can get correct behavior by replacing
41808    M with M' as follows:
41809 
41810     M'[i] = { (M[i]+8)%16      : M[i] in [0,15]
41811             { ((M[i]+8)%16)+16 : M[i] in [16,31]
41812 
41813    This seems promising at first, since we are just replacing one mask
41814    with another.  But certain masks are preferable to others.  If M
41815    is a mask that matches a vmrghh pattern, for example, M' certainly
41816    will not.  Instead of a single vmrghh, we would generate a load of
41817    M' and a vperm.  So we would need to know how many xxswapd's we can
41818    remove as a result of this transformation to determine if it's
41819    profitable; and preferably the logic would need to be aware of all
41820    the special preferable masks.
41821 
41822    Another form of permute is an UNSPEC_VPERM, in which the mask is
41823    already in a register.  In some cases, this mask may be a constant
41824    that we can discover with ud-chains, in which case the above
41825    transformation is ok.  However, the common usage here is for the
41826    mask to be produced by an UNSPEC_LVSL, in which case the mask
41827    cannot be known at compile time.  In such a case we would have to
41828    generate several instructions to compute M' as above at run time,
41829    and a cost model is needed again.
41830 
41831    However, when the mask M for an UNSPEC_VPERM is loaded from the
41832    constant pool, we can replace M with M' as above at no cost
41833    beyond adding a constant pool entry.  */
41834 
41835 /* This is based on the union-find logic in web.c.  web_entry_base is
41836    defined in df.h.  */
41837 class swap_web_entry : public web_entry_base
41838 {
41839  public:
41840   /* Pointer to the insn.  */
41841   rtx_insn *insn;
41842   /* Set if insn contains a mention of a vector register.  All other
41843      fields are undefined if this field is unset.  */
41844   unsigned int is_relevant : 1;
41845   /* Set if insn is a load.  */
41846   unsigned int is_load : 1;
41847   /* Set if insn is a store.  */
41848   unsigned int is_store : 1;
41849   /* Set if insn is a doubleword swap.  This can either be a register swap
41850      or a permuting load or store (test is_load and is_store for this).  */
41851   unsigned int is_swap : 1;
41852   /* Set if the insn has a live-in use of a parameter register.  */
41853   unsigned int is_live_in : 1;
41854   /* Set if the insn has a live-out def of a return register.  */
41855   unsigned int is_live_out : 1;
41856   /* Set if the insn contains a subreg reference of a vector register.  */
41857   unsigned int contains_subreg : 1;
41858   /* Set if the insn contains a 128-bit integer operand.  */
41859   unsigned int is_128_int : 1;
41860   /* Set if this is a call-insn.  */
41861   unsigned int is_call : 1;
41862   /* Set if this insn does not perform a vector operation for which
41863      element order matters, or if we know how to fix it up if it does.
41864      Undefined if is_swap is set.  */
41865   unsigned int is_swappable : 1;
41866   /* A nonzero value indicates what kind of special handling for this
41867      insn is required if doublewords are swapped.  Undefined if
41868      is_swappable is not set.  */
41869   unsigned int special_handling : 4;
41870   /* Set if the web represented by this entry cannot be optimized.  */
41871   unsigned int web_not_optimizable : 1;
41872   /* Set if this insn should be deleted.  */
41873   unsigned int will_delete : 1;
41874 };
41875 
41876 enum special_handling_values {
41877   SH_NONE = 0,
41878   SH_CONST_VECTOR,
41879   SH_SUBREG,
41880   SH_NOSWAP_LD,
41881   SH_NOSWAP_ST,
41882   SH_EXTRACT,
41883   SH_SPLAT,
41884   SH_XXPERMDI,
41885   SH_CONCAT,
41886   SH_VPERM
41887 };
41888 
41889 /* Union INSN with all insns containing definitions that reach USE.
41890    Detect whether USE is live-in to the current function.  */
41891 static void
union_defs(swap_web_entry * insn_entry,rtx insn,df_ref use)41892 union_defs (swap_web_entry *insn_entry, rtx insn, df_ref use)
41893 {
41894   struct df_link *link = DF_REF_CHAIN (use);
41895 
41896   if (!link)
41897     insn_entry[INSN_UID (insn)].is_live_in = 1;
41898 
41899   while (link)
41900     {
41901       if (DF_REF_IS_ARTIFICIAL (link->ref))
41902 	insn_entry[INSN_UID (insn)].is_live_in = 1;
41903 
41904       if (DF_REF_INSN_INFO (link->ref))
41905 	{
41906 	  rtx def_insn = DF_REF_INSN (link->ref);
41907 	  (void)unionfind_union (insn_entry + INSN_UID (insn),
41908 				 insn_entry + INSN_UID (def_insn));
41909 	}
41910 
41911       link = link->next;
41912     }
41913 }
41914 
41915 /* Union INSN with all insns containing uses reached from DEF.
41916    Detect whether DEF is live-out from the current function.  */
41917 static void
union_uses(swap_web_entry * insn_entry,rtx insn,df_ref def)41918 union_uses (swap_web_entry *insn_entry, rtx insn, df_ref def)
41919 {
41920   struct df_link *link = DF_REF_CHAIN (def);
41921 
41922   if (!link)
41923     insn_entry[INSN_UID (insn)].is_live_out = 1;
41924 
41925   while (link)
41926     {
41927       /* This could be an eh use or some other artificial use;
41928 	 we treat these all the same (killing the optimization).  */
41929       if (DF_REF_IS_ARTIFICIAL (link->ref))
41930 	insn_entry[INSN_UID (insn)].is_live_out = 1;
41931 
41932       if (DF_REF_INSN_INFO (link->ref))
41933 	{
41934 	  rtx use_insn = DF_REF_INSN (link->ref);
41935 	  (void)unionfind_union (insn_entry + INSN_UID (insn),
41936 				 insn_entry + INSN_UID (use_insn));
41937 	}
41938 
41939       link = link->next;
41940     }
41941 }
41942 
41943 /* Return 1 iff INSN is a load insn, including permuting loads that
41944    represent an lvxd2x instruction; else return 0.  */
41945 static unsigned int
insn_is_load_p(rtx insn)41946 insn_is_load_p (rtx insn)
41947 {
41948   rtx body = PATTERN (insn);
41949 
41950   if (GET_CODE (body) == SET)
41951     {
41952       if (GET_CODE (SET_SRC (body)) == MEM)
41953 	return 1;
41954 
41955       if (GET_CODE (SET_SRC (body)) == VEC_SELECT
41956 	  && GET_CODE (XEXP (SET_SRC (body), 0)) == MEM)
41957 	return 1;
41958 
41959       return 0;
41960     }
41961 
41962   if (GET_CODE (body) != PARALLEL)
41963     return 0;
41964 
41965   rtx set = XVECEXP (body, 0, 0);
41966 
41967   if (GET_CODE (set) == SET && GET_CODE (SET_SRC (set)) == MEM)
41968     return 1;
41969 
41970   return 0;
41971 }
41972 
41973 /* Return 1 iff INSN is a store insn, including permuting stores that
41974    represent an stvxd2x instruction; else return 0.  */
41975 static unsigned int
insn_is_store_p(rtx insn)41976 insn_is_store_p (rtx insn)
41977 {
41978   rtx body = PATTERN (insn);
41979   if (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == MEM)
41980     return 1;
41981   if (GET_CODE (body) != PARALLEL)
41982     return 0;
41983   rtx set = XVECEXP (body, 0, 0);
41984   if (GET_CODE (set) == SET && GET_CODE (SET_DEST (set)) == MEM)
41985     return 1;
41986   return 0;
41987 }
41988 
41989 /* Return 1 iff INSN swaps doublewords.  This may be a reg-reg swap,
41990    a permuting load, or a permuting store.  */
41991 static unsigned int
insn_is_swap_p(rtx insn)41992 insn_is_swap_p (rtx insn)
41993 {
41994   rtx body = PATTERN (insn);
41995   if (GET_CODE (body) != SET)
41996     return 0;
41997   rtx rhs = SET_SRC (body);
41998   if (GET_CODE (rhs) != VEC_SELECT)
41999     return 0;
42000   rtx parallel = XEXP (rhs, 1);
42001   if (GET_CODE (parallel) != PARALLEL)
42002     return 0;
42003   unsigned int len = XVECLEN (parallel, 0);
42004   if (len != 2 && len != 4 && len != 8 && len != 16)
42005     return 0;
42006   for (unsigned int i = 0; i < len / 2; ++i)
42007     {
42008       rtx op = XVECEXP (parallel, 0, i);
42009       if (GET_CODE (op) != CONST_INT || INTVAL (op) != len / 2 + i)
42010 	return 0;
42011     }
42012   for (unsigned int i = len / 2; i < len; ++i)
42013     {
42014       rtx op = XVECEXP (parallel, 0, i);
42015       if (GET_CODE (op) != CONST_INT || INTVAL (op) != i - len / 2)
42016 	return 0;
42017     }
42018   return 1;
42019 }
42020 
42021 /* Return TRUE if insn is a swap fed by a load from the constant pool.  */
42022 static bool
const_load_sequence_p(swap_web_entry * insn_entry,rtx insn)42023 const_load_sequence_p (swap_web_entry *insn_entry, rtx insn)
42024 {
42025   unsigned uid = INSN_UID (insn);
42026   if (!insn_entry[uid].is_swap || insn_entry[uid].is_load)
42027     return false;
42028 
42029   /* Find the unique use in the swap and locate its def.  If the def
42030      isn't unique, punt.  */
42031   struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
42032   df_ref use;
42033   FOR_EACH_INSN_INFO_USE (use, insn_info)
42034     {
42035       struct df_link *def_link = DF_REF_CHAIN (use);
42036       if (!def_link || def_link->next)
42037 	return false;
42038 
42039       rtx def_insn = DF_REF_INSN (def_link->ref);
42040       unsigned uid2 = INSN_UID (def_insn);
42041       if (!insn_entry[uid2].is_load || !insn_entry[uid2].is_swap)
42042 	return false;
42043 
42044       rtx body = PATTERN (def_insn);
42045       if (GET_CODE (body) != SET
42046 	  || GET_CODE (SET_SRC (body)) != VEC_SELECT
42047 	  || GET_CODE (XEXP (SET_SRC (body), 0)) != MEM)
42048 	return false;
42049 
42050       rtx mem = XEXP (SET_SRC (body), 0);
42051       rtx base_reg = XEXP (mem, 0);
42052 
42053       df_ref base_use;
42054       insn_info = DF_INSN_INFO_GET (def_insn);
42055       FOR_EACH_INSN_INFO_USE (base_use, insn_info)
42056 	{
42057 	  if (!rtx_equal_p (DF_REF_REG (base_use), base_reg))
42058 	    continue;
42059 
42060 	  struct df_link *base_def_link = DF_REF_CHAIN (base_use);
42061 	  if (!base_def_link || base_def_link->next)
42062 	    return false;
42063 
42064 	  rtx tocrel_insn = DF_REF_INSN (base_def_link->ref);
42065 	  rtx tocrel_body = PATTERN (tocrel_insn);
42066 	  rtx base, offset;
42067 	  if (GET_CODE (tocrel_body) != SET)
42068 	    return false;
42069 	  /* There is an extra level of indirection for small/large
42070 	     code models.  */
42071 	  rtx tocrel_expr = SET_SRC (tocrel_body);
42072 	  if (GET_CODE (tocrel_expr) == MEM)
42073 	    tocrel_expr = XEXP (tocrel_expr, 0);
42074 	  if (!toc_relative_expr_p (tocrel_expr, false))
42075 	    return false;
42076 	  split_const (XVECEXP (tocrel_base, 0, 0), &base, &offset);
42077 	  if (GET_CODE (base) != SYMBOL_REF || !CONSTANT_POOL_ADDRESS_P (base))
42078 	    return false;
42079 	}
42080     }
42081   return true;
42082 }
42083 
42084 /* Return TRUE iff OP matches a V2DF reduction pattern.  See the
42085    definition of vsx_reduc_<VEC_reduc_name>_v2df in vsx.md.  */
42086 static bool
v2df_reduction_p(rtx op)42087 v2df_reduction_p (rtx op)
42088 {
42089   if (GET_MODE (op) != V2DFmode)
42090     return false;
42091 
42092   enum rtx_code code = GET_CODE (op);
42093   if (code != PLUS && code != SMIN && code != SMAX)
42094     return false;
42095 
42096   rtx concat = XEXP (op, 0);
42097   if (GET_CODE (concat) != VEC_CONCAT)
42098     return false;
42099 
42100   rtx select0 = XEXP (concat, 0);
42101   rtx select1 = XEXP (concat, 1);
42102   if (GET_CODE (select0) != VEC_SELECT || GET_CODE (select1) != VEC_SELECT)
42103     return false;
42104 
42105   rtx reg0 = XEXP (select0, 0);
42106   rtx reg1 = XEXP (select1, 0);
42107   if (!rtx_equal_p (reg0, reg1) || !REG_P (reg0))
42108     return false;
42109 
42110   rtx parallel0 = XEXP (select0, 1);
42111   rtx parallel1 = XEXP (select1, 1);
42112   if (GET_CODE (parallel0) != PARALLEL || GET_CODE (parallel1) != PARALLEL)
42113     return false;
42114 
42115   if (!rtx_equal_p (XVECEXP (parallel0, 0, 0), const1_rtx)
42116       || !rtx_equal_p (XVECEXP (parallel1, 0, 0), const0_rtx))
42117     return false;
42118 
42119   return true;
42120 }
42121 
42122 /* Return 1 iff OP is an operand that will not be affected by having
42123    vector doublewords swapped in memory.  */
42124 static unsigned int
rtx_is_swappable_p(rtx op,unsigned int * special)42125 rtx_is_swappable_p (rtx op, unsigned int *special)
42126 {
42127   enum rtx_code code = GET_CODE (op);
42128   int i, j;
42129   rtx parallel;
42130 
42131   switch (code)
42132     {
42133     case LABEL_REF:
42134     case SYMBOL_REF:
42135     case CLOBBER:
42136     case REG:
42137       return 1;
42138 
42139     case VEC_CONCAT:
42140     case ASM_INPUT:
42141     case ASM_OPERANDS:
42142       return 0;
42143 
42144     case CONST_VECTOR:
42145       {
42146 	*special = SH_CONST_VECTOR;
42147 	return 1;
42148       }
42149 
42150     case VEC_DUPLICATE:
42151       /* Opportunity: If XEXP (op, 0) has the same mode as the result,
42152 	 and XEXP (op, 1) is a PARALLEL with a single QImode const int,
42153 	 it represents a vector splat for which we can do special
42154 	 handling.  */
42155       if (GET_CODE (XEXP (op, 0)) == CONST_INT)
42156 	return 1;
42157       else if (REG_P (XEXP (op, 0))
42158 	       && GET_MODE_INNER (GET_MODE (op)) == GET_MODE (XEXP (op, 0)))
42159 	/* This catches V2DF and V2DI splat, at a minimum.  */
42160 	return 1;
42161       else if (GET_CODE (XEXP (op, 0)) == TRUNCATE
42162 	       && REG_P (XEXP (XEXP (op, 0), 0))
42163 	       && GET_MODE_INNER (GET_MODE (op)) == GET_MODE (XEXP (op, 0)))
42164 	/* This catches splat of a truncated value.  */
42165 	return 1;
42166       else if (GET_CODE (XEXP (op, 0)) == VEC_SELECT)
42167 	/* If the duplicated item is from a select, defer to the select
42168 	   processing to see if we can change the lane for the splat.  */
42169 	return rtx_is_swappable_p (XEXP (op, 0), special);
42170       else
42171 	return 0;
42172 
42173     case VEC_SELECT:
42174       /* A vec_extract operation is ok if we change the lane.  */
42175       if (GET_CODE (XEXP (op, 0)) == REG
42176 	  && GET_MODE_INNER (GET_MODE (XEXP (op, 0))) == GET_MODE (op)
42177 	  && GET_CODE ((parallel = XEXP (op, 1))) == PARALLEL
42178 	  && XVECLEN (parallel, 0) == 1
42179 	  && GET_CODE (XVECEXP (parallel, 0, 0)) == CONST_INT)
42180 	{
42181 	  *special = SH_EXTRACT;
42182 	  return 1;
42183 	}
42184       /* An XXPERMDI is ok if we adjust the lanes.  Note that if the
42185 	 XXPERMDI is a swap operation, it will be identified by
42186 	 insn_is_swap_p and therefore we won't get here.  */
42187       else if (GET_CODE (XEXP (op, 0)) == VEC_CONCAT
42188 	       && (GET_MODE (XEXP (op, 0)) == V4DFmode
42189 		   || GET_MODE (XEXP (op, 0)) == V4DImode)
42190 	       && GET_CODE ((parallel = XEXP (op, 1))) == PARALLEL
42191 	       && XVECLEN (parallel, 0) == 2
42192 	       && GET_CODE (XVECEXP (parallel, 0, 0)) == CONST_INT
42193 	       && GET_CODE (XVECEXP (parallel, 0, 1)) == CONST_INT)
42194 	{
42195 	  *special = SH_XXPERMDI;
42196 	  return 1;
42197 	}
42198       else if (v2df_reduction_p (op))
42199 	return 1;
42200       else
42201 	return 0;
42202 
42203     case UNSPEC:
42204       {
42205 	/* Various operations are unsafe for this optimization, at least
42206 	   without significant additional work.  Permutes are obviously
42207 	   problematic, as both the permute control vector and the ordering
42208 	   of the target values are invalidated by doubleword swapping.
42209 	   Vector pack and unpack modify the number of vector lanes.
42210 	   Merge-high/low will not operate correctly on swapped operands.
42211 	   Vector shifts across element boundaries are clearly uncool,
42212 	   as are vector select and concatenate operations.  Vector
42213 	   sum-across instructions define one operand with a specific
42214 	   order-dependent element, so additional fixup code would be
42215 	   needed to make those work.  Vector set and non-immediate-form
42216 	   vector splat are element-order sensitive.  A few of these
42217 	   cases might be workable with special handling if required.
42218 	   Adding cost modeling would be appropriate in some cases.  */
42219 	int val = XINT (op, 1);
42220 	switch (val)
42221 	  {
42222 	  default:
42223 	    break;
42224 	  case UNSPEC_VMRGH_DIRECT:
42225 	  case UNSPEC_VMRGL_DIRECT:
42226 	  case UNSPEC_VPACK_SIGN_SIGN_SAT:
42227 	  case UNSPEC_VPACK_SIGN_UNS_SAT:
42228 	  case UNSPEC_VPACK_UNS_UNS_MOD:
42229 	  case UNSPEC_VPACK_UNS_UNS_MOD_DIRECT:
42230 	  case UNSPEC_VPACK_UNS_UNS_SAT:
42231 	  case UNSPEC_VPERM:
42232 	  case UNSPEC_VPERM_UNS:
42233 	  case UNSPEC_VPERMHI:
42234 	  case UNSPEC_VPERMSI:
42235 	  case UNSPEC_VPKPX:
42236 	  case UNSPEC_VSLDOI:
42237 	  case UNSPEC_VSLO:
42238 	  case UNSPEC_VSRO:
42239 	  case UNSPEC_VSUM2SWS:
42240 	  case UNSPEC_VSUM4S:
42241 	  case UNSPEC_VSUM4UBS:
42242 	  case UNSPEC_VSUMSWS:
42243 	  case UNSPEC_VSUMSWS_DIRECT:
42244 	  case UNSPEC_VSX_CONCAT:
42245 	  case UNSPEC_VSX_SET:
42246 	  case UNSPEC_VSX_SLDWI:
42247 	  case UNSPEC_VUNPACK_HI_SIGN:
42248 	  case UNSPEC_VUNPACK_HI_SIGN_DIRECT:
42249 	  case UNSPEC_VUNPACK_LO_SIGN:
42250 	  case UNSPEC_VUNPACK_LO_SIGN_DIRECT:
42251 	  case UNSPEC_VUPKHPX:
42252 	  case UNSPEC_VUPKHS_V4SF:
42253 	  case UNSPEC_VUPKHU_V4SF:
42254 	  case UNSPEC_VUPKLPX:
42255 	  case UNSPEC_VUPKLS_V4SF:
42256 	  case UNSPEC_VUPKLU_V4SF:
42257 	  case UNSPEC_VSX_CVDPSPN:
42258 	  case UNSPEC_VSX_CVSPDP:
42259 	  case UNSPEC_VSX_CVSPDPN:
42260 	  case UNSPEC_VSX_EXTRACT:
42261 	  case UNSPEC_VSX_VSLO:
42262 	  case UNSPEC_VSX_VEC_INIT:
42263 	    return 0;
42264 	  case UNSPEC_VSPLT_DIRECT:
42265 	  case UNSPEC_VSX_XXSPLTD:
42266 	    *special = SH_SPLAT;
42267 	    return 1;
42268 	  case UNSPEC_REDUC_PLUS:
42269 	  case UNSPEC_REDUC:
42270 	    return 1;
42271 	  }
42272       }
42273 
42274     default:
42275       break;
42276     }
42277 
42278   const char *fmt = GET_RTX_FORMAT (code);
42279   int ok = 1;
42280 
42281   for (i = 0; i < GET_RTX_LENGTH (code); ++i)
42282     if (fmt[i] == 'e' || fmt[i] == 'u')
42283       {
42284 	unsigned int special_op = SH_NONE;
42285 	ok &= rtx_is_swappable_p (XEXP (op, i), &special_op);
42286 	if (special_op == SH_NONE)
42287 	  continue;
42288 	/* Ensure we never have two kinds of special handling
42289 	   for the same insn.  */
42290 	if (*special != SH_NONE && *special != special_op)
42291 	  return 0;
42292 	*special = special_op;
42293       }
42294     else if (fmt[i] == 'E')
42295       for (j = 0; j < XVECLEN (op, i); ++j)
42296 	{
42297 	  unsigned int special_op = SH_NONE;
42298 	  ok &= rtx_is_swappable_p (XVECEXP (op, i, j), &special_op);
42299 	if (special_op == SH_NONE)
42300 	  continue;
42301 	  /* Ensure we never have two kinds of special handling
42302 	     for the same insn.  */
42303 	  if (*special != SH_NONE && *special != special_op)
42304 	    return 0;
42305 	  *special = special_op;
42306 	}
42307 
42308   return ok;
42309 }
42310 
42311 /* Return 1 iff INSN is an operand that will not be affected by
42312    having vector doublewords swapped in memory (in which case
42313    *SPECIAL is unchanged), or that can be modified to be correct
42314    if vector doublewords are swapped in memory (in which case
42315    *SPECIAL is changed to a value indicating how).  */
42316 static unsigned int
insn_is_swappable_p(swap_web_entry * insn_entry,rtx insn,unsigned int * special)42317 insn_is_swappable_p (swap_web_entry *insn_entry, rtx insn,
42318 		     unsigned int *special)
42319 {
42320   /* Calls are always bad.  */
42321   if (GET_CODE (insn) == CALL_INSN)
42322     return 0;
42323 
42324   /* Loads and stores seen here are not permuting, but we can still
42325      fix them up by converting them to permuting ones.  Exceptions:
42326      UNSPEC_LVE, UNSPEC_LVX, and UNSPEC_STVX, which have a PARALLEL
42327      body instead of a SET; and UNSPEC_STVE, which has an UNSPEC
42328      for the SET source.  Also we must now make an exception for lvx
42329      and stvx when they are not in the UNSPEC_LVX/STVX form (with the
42330      explicit "& -16") since this leads to unrecognizable insns.  */
42331   rtx body = PATTERN (insn);
42332   int i = INSN_UID (insn);
42333 
42334   if (insn_entry[i].is_load)
42335     {
42336       if (GET_CODE (body) == SET)
42337 	{
42338 	  rtx rhs = SET_SRC (body);
42339 	  /* Even without a swap, the RHS might be a vec_select for, say,
42340 	     a byte-reversing load.  */
42341 	  if (GET_CODE (rhs) != MEM)
42342 	    return 0;
42343 	  if (GET_CODE (XEXP (rhs, 0)) == AND)
42344 	    return 0;
42345 
42346 	  *special = SH_NOSWAP_LD;
42347 	  return 1;
42348 	}
42349       else
42350 	return 0;
42351     }
42352 
42353   if (insn_entry[i].is_store)
42354     {
42355       if (GET_CODE (body) == SET
42356 	  && GET_CODE (SET_SRC (body)) != UNSPEC)
42357 	{
42358 	  rtx lhs = SET_DEST (body);
42359 	  /* Even without a swap, the LHS might be a vec_select for, say,
42360 	     a byte-reversing store.  */
42361 	  if (GET_CODE (lhs) != MEM)
42362 	    return 0;
42363 	  if (GET_CODE (XEXP (lhs, 0)) == AND)
42364 	    return 0;
42365 
42366 	  *special = SH_NOSWAP_ST;
42367 	  return 1;
42368 	}
42369       else
42370 	return 0;
42371     }
42372 
42373   /* A convert to single precision can be left as is provided that
42374      all of its uses are in xxspltw instructions that splat BE element
42375      zero.  */
42376   if (GET_CODE (body) == SET
42377       && GET_CODE (SET_SRC (body)) == UNSPEC
42378       && XINT (SET_SRC (body), 1) == UNSPEC_VSX_CVDPSPN)
42379     {
42380       df_ref def;
42381       struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
42382 
42383       FOR_EACH_INSN_INFO_DEF (def, insn_info)
42384 	{
42385 	  struct df_link *link = DF_REF_CHAIN (def);
42386 	  if (!link)
42387 	    return 0;
42388 
42389 	  for (; link; link = link->next) {
42390 	    rtx use_insn = DF_REF_INSN (link->ref);
42391 	    rtx use_body = PATTERN (use_insn);
42392 	    if (GET_CODE (use_body) != SET
42393 		|| GET_CODE (SET_SRC (use_body)) != UNSPEC
42394 		|| XINT (SET_SRC (use_body), 1) != UNSPEC_VSX_XXSPLTW
42395 		|| XVECEXP (SET_SRC (use_body), 0, 1) != const0_rtx)
42396 	      return 0;
42397 	  }
42398 	}
42399 
42400       return 1;
42401     }
42402 
42403   /* A concatenation of two doublewords is ok if we reverse the
42404      order of the inputs.  */
42405   if (GET_CODE (body) == SET
42406       && GET_CODE (SET_SRC (body)) == VEC_CONCAT
42407       && (GET_MODE (SET_SRC (body)) == V2DFmode
42408 	  || GET_MODE (SET_SRC (body)) == V2DImode))
42409     {
42410       *special = SH_CONCAT;
42411       return 1;
42412     }
42413 
42414   /* V2DF reductions are always swappable.  */
42415   if (GET_CODE (body) == PARALLEL)
42416     {
42417       rtx expr = XVECEXP (body, 0, 0);
42418       if (GET_CODE (expr) == SET
42419 	  && v2df_reduction_p (SET_SRC (expr)))
42420 	return 1;
42421     }
42422 
42423   /* An UNSPEC_VPERM is ok if the mask operand is loaded from the
42424      constant pool.  */
42425   if (GET_CODE (body) == SET
42426       && GET_CODE (SET_SRC (body)) == UNSPEC
42427       && XINT (SET_SRC (body), 1) == UNSPEC_VPERM
42428       && XVECLEN (SET_SRC (body), 0) == 3
42429       && GET_CODE (XVECEXP (SET_SRC (body), 0, 2)) == REG)
42430     {
42431       rtx mask_reg = XVECEXP (SET_SRC (body), 0, 2);
42432       struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
42433       df_ref use;
42434       FOR_EACH_INSN_INFO_USE (use, insn_info)
42435 	if (rtx_equal_p (DF_REF_REG (use), mask_reg))
42436 	  {
42437 	    struct df_link *def_link = DF_REF_CHAIN (use);
42438 	    /* Punt if multiple definitions for this reg.  */
42439 	    if (def_link && !def_link->next &&
42440 		const_load_sequence_p (insn_entry,
42441 				       DF_REF_INSN (def_link->ref)))
42442 	      {
42443 		*special = SH_VPERM;
42444 		return 1;
42445 	      }
42446 	  }
42447     }
42448 
42449   /* Otherwise check the operands for vector lane violations.  */
42450   return rtx_is_swappable_p (body, special);
42451 }
42452 
42453 enum chain_purpose { FOR_LOADS, FOR_STORES };
42454 
42455 /* Return true if the UD or DU chain headed by LINK is non-empty,
42456    and every entry on the chain references an insn that is a
42457    register swap.  Furthermore, if PURPOSE is FOR_LOADS, each such
42458    register swap must have only permuting loads as reaching defs.
42459    If PURPOSE is FOR_STORES, each such register swap must have only
42460    register swaps or permuting stores as reached uses.  */
42461 static bool
chain_contains_only_swaps(swap_web_entry * insn_entry,struct df_link * link,enum chain_purpose purpose)42462 chain_contains_only_swaps (swap_web_entry *insn_entry, struct df_link *link,
42463 			   enum chain_purpose purpose)
42464 {
42465   if (!link)
42466     return false;
42467 
42468   for (; link; link = link->next)
42469     {
42470       if (!ALTIVEC_OR_VSX_VECTOR_MODE (GET_MODE (DF_REF_REG (link->ref))))
42471 	continue;
42472 
42473       if (DF_REF_IS_ARTIFICIAL (link->ref))
42474 	return false;
42475 
42476       rtx reached_insn = DF_REF_INSN (link->ref);
42477       unsigned uid = INSN_UID (reached_insn);
42478       struct df_insn_info *insn_info = DF_INSN_INFO_GET (reached_insn);
42479 
42480       if (!insn_entry[uid].is_swap || insn_entry[uid].is_load
42481 	  || insn_entry[uid].is_store)
42482 	return false;
42483 
42484       if (purpose == FOR_LOADS)
42485 	{
42486 	  df_ref use;
42487 	  FOR_EACH_INSN_INFO_USE (use, insn_info)
42488 	    {
42489 	      struct df_link *swap_link = DF_REF_CHAIN (use);
42490 
42491 	      while (swap_link)
42492 		{
42493 		  if (DF_REF_IS_ARTIFICIAL (link->ref))
42494 		    return false;
42495 
42496 		  rtx swap_def_insn = DF_REF_INSN (swap_link->ref);
42497 		  unsigned uid2 = INSN_UID (swap_def_insn);
42498 
42499 		  /* Only permuting loads are allowed.  */
42500 		  if (!insn_entry[uid2].is_swap || !insn_entry[uid2].is_load)
42501 		    return false;
42502 
42503 		  swap_link = swap_link->next;
42504 		}
42505 	    }
42506 	}
42507       else if (purpose == FOR_STORES)
42508 	{
42509 	  df_ref def;
42510 	  FOR_EACH_INSN_INFO_DEF (def, insn_info)
42511 	    {
42512 	      struct df_link *swap_link = DF_REF_CHAIN (def);
42513 
42514 	      while (swap_link)
42515 		{
42516 		  if (DF_REF_IS_ARTIFICIAL (link->ref))
42517 		    return false;
42518 
42519 		  rtx swap_use_insn = DF_REF_INSN (swap_link->ref);
42520 		  unsigned uid2 = INSN_UID (swap_use_insn);
42521 
42522 		  /* Permuting stores or register swaps are allowed.  */
42523 		  if (!insn_entry[uid2].is_swap || insn_entry[uid2].is_load)
42524 		    return false;
42525 
42526 		  swap_link = swap_link->next;
42527 		}
42528 	    }
42529 	}
42530     }
42531 
42532   return true;
42533 }
42534 
42535 /* Mark the xxswapdi instructions associated with permuting loads and
42536    stores for removal.  Note that we only flag them for deletion here,
42537    as there is a possibility of a swap being reached from multiple
42538    loads, etc.  */
42539 static void
mark_swaps_for_removal(swap_web_entry * insn_entry,unsigned int i)42540 mark_swaps_for_removal (swap_web_entry *insn_entry, unsigned int i)
42541 {
42542   rtx insn = insn_entry[i].insn;
42543   struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
42544 
42545   if (insn_entry[i].is_load)
42546     {
42547       df_ref def;
42548       FOR_EACH_INSN_INFO_DEF (def, insn_info)
42549 	{
42550 	  struct df_link *link = DF_REF_CHAIN (def);
42551 
42552 	  /* We know by now that these are swaps, so we can delete
42553 	     them confidently.  */
42554 	  while (link)
42555 	    {
42556 	      rtx use_insn = DF_REF_INSN (link->ref);
42557 	      insn_entry[INSN_UID (use_insn)].will_delete = 1;
42558 	      link = link->next;
42559 	    }
42560 	}
42561     }
42562   else if (insn_entry[i].is_store)
42563     {
42564       df_ref use;
42565       FOR_EACH_INSN_INFO_USE (use, insn_info)
42566 	{
42567 	  /* Ignore uses for addressability.  */
42568 	  machine_mode mode = GET_MODE (DF_REF_REG (use));
42569 	  if (!ALTIVEC_OR_VSX_VECTOR_MODE (mode))
42570 	    continue;
42571 
42572 	  struct df_link *link = DF_REF_CHAIN (use);
42573 
42574 	  /* We know by now that these are swaps, so we can delete
42575 	     them confidently.  */
42576 	  while (link)
42577 	    {
42578 	      rtx def_insn = DF_REF_INSN (link->ref);
42579 	      insn_entry[INSN_UID (def_insn)].will_delete = 1;
42580 	      link = link->next;
42581 	    }
42582 	}
42583     }
42584 }
42585 
42586 /* *OP_PTR is either a CONST_VECTOR or an expression containing one.
42587    Swap the first half of the vector with the second in the first
42588    case.  Recurse to find it in the second.  */
42589 static void
swap_const_vector_halves(rtx * op_ptr)42590 swap_const_vector_halves (rtx *op_ptr)
42591 {
42592   int i;
42593   rtx op = *op_ptr;
42594   enum rtx_code code = GET_CODE (op);
42595   if (GET_CODE (op) == CONST_VECTOR)
42596     {
42597       int units = GET_MODE_NUNITS (GET_MODE (op));
42598       rtx_vector_builder builder (GET_MODE (op), units, 1);
42599       for (i = 0; i < units / 2; ++i)
42600 	builder.quick_push (CONST_VECTOR_ELT (op, i + units / 2));
42601       for (i = 0; i < units / 2; ++i)
42602 	builder.quick_push (CONST_VECTOR_ELT (op, i));
42603       *op_ptr = builder.build ();
42604     }
42605   else
42606     {
42607       int j;
42608       const char *fmt = GET_RTX_FORMAT (code);
42609       for (i = 0; i < GET_RTX_LENGTH (code); ++i)
42610 	if (fmt[i] == 'e' || fmt[i] == 'u')
42611 	  swap_const_vector_halves (&XEXP (op, i));
42612 	else if (fmt[i] == 'E')
42613 	  for (j = 0; j < XVECLEN (op, i); ++j)
42614 	    swap_const_vector_halves (&XVECEXP (op, i, j));
42615     }
42616 }
42617 
42618 /* Find all subregs of a vector expression that perform a narrowing,
42619    and adjust the subreg index to account for doubleword swapping.  */
42620 static void
adjust_subreg_index(rtx op)42621 adjust_subreg_index (rtx op)
42622 {
42623   enum rtx_code code = GET_CODE (op);
42624   if (code == SUBREG
42625       && (GET_MODE_SIZE (GET_MODE (op))
42626 	  < GET_MODE_SIZE (GET_MODE (XEXP (op, 0)))))
42627     {
42628       unsigned int index = SUBREG_BYTE (op);
42629       if (index < 8)
42630 	index += 8;
42631       else
42632 	index -= 8;
42633       SUBREG_BYTE (op) = index;
42634     }
42635 
42636   const char *fmt = GET_RTX_FORMAT (code);
42637   int i,j;
42638   for (i = 0; i < GET_RTX_LENGTH (code); ++i)
42639     if (fmt[i] == 'e' || fmt[i] == 'u')
42640       adjust_subreg_index (XEXP (op, i));
42641     else if (fmt[i] == 'E')
42642       for (j = 0; j < XVECLEN (op, i); ++j)
42643 	adjust_subreg_index (XVECEXP (op, i, j));
42644 }
42645 
42646 /* Convert the non-permuting load INSN to a permuting one.  */
42647 static void
permute_load(rtx_insn * insn)42648 permute_load (rtx_insn *insn)
42649 {
42650   rtx body = PATTERN (insn);
42651   rtx mem_op = SET_SRC (body);
42652   rtx tgt_reg = SET_DEST (body);
42653   machine_mode mode = GET_MODE (tgt_reg);
42654   int n_elts = GET_MODE_NUNITS (mode);
42655   int half_elts = n_elts / 2;
42656   rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts));
42657   int i, j;
42658   for (i = 0, j = half_elts; i < half_elts; ++i, ++j)
42659     XVECEXP (par, 0, i) = GEN_INT (j);
42660   for (i = half_elts, j = 0; j < half_elts; ++i, ++j)
42661     XVECEXP (par, 0, i) = GEN_INT (j);
42662   rtx sel = gen_rtx_VEC_SELECT (mode, mem_op, par);
42663   SET_SRC (body) = sel;
42664   INSN_CODE (insn) = -1; /* Force re-recognition.  */
42665   df_insn_rescan (insn);
42666 
42667   if (dump_file)
42668     fprintf (dump_file, "Replacing load %d with permuted load\n",
42669 	     INSN_UID (insn));
42670 }
42671 
42672 /* Convert the non-permuting store INSN to a permuting one.  */
42673 static void
permute_store(rtx_insn * insn)42674 permute_store (rtx_insn *insn)
42675 {
42676   rtx body = PATTERN (insn);
42677   rtx src_reg = SET_SRC (body);
42678   machine_mode mode = GET_MODE (src_reg);
42679   int n_elts = GET_MODE_NUNITS (mode);
42680   int half_elts = n_elts / 2;
42681   rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts));
42682   int i, j;
42683   for (i = 0, j = half_elts; i < half_elts; ++i, ++j)
42684     XVECEXP (par, 0, i) = GEN_INT (j);
42685   for (i = half_elts, j = 0; j < half_elts; ++i, ++j)
42686     XVECEXP (par, 0, i) = GEN_INT (j);
42687   rtx sel = gen_rtx_VEC_SELECT (mode, src_reg, par);
42688   SET_SRC (body) = sel;
42689   INSN_CODE (insn) = -1; /* Force re-recognition.  */
42690   df_insn_rescan (insn);
42691 
42692   if (dump_file)
42693     fprintf (dump_file, "Replacing store %d with permuted store\n",
42694 	     INSN_UID (insn));
42695 }
42696 
42697 /* Given OP that contains a vector extract operation, adjust the index
42698    of the extracted lane to account for the doubleword swap.  */
42699 static void
adjust_extract(rtx_insn * insn)42700 adjust_extract (rtx_insn *insn)
42701 {
42702   rtx pattern = PATTERN (insn);
42703   if (GET_CODE (pattern) == PARALLEL)
42704     pattern = XVECEXP (pattern, 0, 0);
42705   rtx src = SET_SRC (pattern);
42706   /* The vec_select may be wrapped in a vec_duplicate for a splat, so
42707      account for that.  */
42708   rtx sel = GET_CODE (src) == VEC_DUPLICATE ? XEXP (src, 0) : src;
42709   rtx par = XEXP (sel, 1);
42710   int half_elts = GET_MODE_NUNITS (GET_MODE (XEXP (sel, 0))) >> 1;
42711   int lane = INTVAL (XVECEXP (par, 0, 0));
42712   lane = lane >= half_elts ? lane - half_elts : lane + half_elts;
42713   XVECEXP (par, 0, 0) = GEN_INT (lane);
42714   INSN_CODE (insn) = -1; /* Force re-recognition.  */
42715   df_insn_rescan (insn);
42716 
42717   if (dump_file)
42718     fprintf (dump_file, "Changing lane for extract %d\n", INSN_UID (insn));
42719 }
42720 
42721 /* Given OP that contains a vector direct-splat operation, adjust the index
42722    of the source lane to account for the doubleword swap.  */
42723 static void
adjust_splat(rtx_insn * insn)42724 adjust_splat (rtx_insn *insn)
42725 {
42726   rtx body = PATTERN (insn);
42727   rtx unspec = XEXP (body, 1);
42728   int half_elts = GET_MODE_NUNITS (GET_MODE (unspec)) >> 1;
42729   int lane = INTVAL (XVECEXP (unspec, 0, 1));
42730   lane = lane >= half_elts ? lane - half_elts : lane + half_elts;
42731   XVECEXP (unspec, 0, 1) = GEN_INT (lane);
42732   INSN_CODE (insn) = -1; /* Force re-recognition.  */
42733   df_insn_rescan (insn);
42734 
42735   if (dump_file)
42736     fprintf (dump_file, "Changing lane for splat %d\n", INSN_UID (insn));
42737 }
42738 
42739 /* Given OP that contains an XXPERMDI operation (that is not a doubleword
42740    swap), reverse the order of the source operands and adjust the indices
42741    of the source lanes to account for doubleword reversal.  */
42742 static void
adjust_xxpermdi(rtx_insn * insn)42743 adjust_xxpermdi (rtx_insn *insn)
42744 {
42745   rtx set = PATTERN (insn);
42746   rtx select = XEXP (set, 1);
42747   rtx concat = XEXP (select, 0);
42748   rtx src0 = XEXP (concat, 0);
42749   XEXP (concat, 0) = XEXP (concat, 1);
42750   XEXP (concat, 1) = src0;
42751   rtx parallel = XEXP (select, 1);
42752   int lane0 = INTVAL (XVECEXP (parallel, 0, 0));
42753   int lane1 = INTVAL (XVECEXP (parallel, 0, 1));
42754   int new_lane0 = 3 - lane1;
42755   int new_lane1 = 3 - lane0;
42756   XVECEXP (parallel, 0, 0) = GEN_INT (new_lane0);
42757   XVECEXP (parallel, 0, 1) = GEN_INT (new_lane1);
42758   INSN_CODE (insn) = -1; /* Force re-recognition.  */
42759   df_insn_rescan (insn);
42760 
42761   if (dump_file)
42762     fprintf (dump_file, "Changing lanes for xxpermdi %d\n", INSN_UID (insn));
42763 }
42764 
42765 /* Given OP that contains a VEC_CONCAT operation of two doublewords,
42766    reverse the order of those inputs.  */
42767 static void
adjust_concat(rtx_insn * insn)42768 adjust_concat (rtx_insn *insn)
42769 {
42770   rtx set = PATTERN (insn);
42771   rtx concat = XEXP (set, 1);
42772   rtx src0 = XEXP (concat, 0);
42773   XEXP (concat, 0) = XEXP (concat, 1);
42774   XEXP (concat, 1) = src0;
42775   INSN_CODE (insn) = -1; /* Force re-recognition.  */
42776   df_insn_rescan (insn);
42777 
42778   if (dump_file)
42779     fprintf (dump_file, "Reversing inputs for concat %d\n", INSN_UID (insn));
42780 }
42781 
42782 /* Given an UNSPEC_VPERM insn, modify the mask loaded from the
42783    constant pool to reflect swapped doublewords.  */
42784 static void
adjust_vperm(rtx_insn * insn)42785 adjust_vperm (rtx_insn *insn)
42786 {
42787   /* We previously determined that the UNSPEC_VPERM was fed by a
42788      swap of a swapping load of a TOC-relative constant pool symbol.
42789      Find the MEM in the swapping load and replace it with a MEM for
42790      the adjusted mask constant.  */
42791   rtx set = PATTERN (insn);
42792   rtx mask_reg = XVECEXP (SET_SRC (set), 0, 2);
42793 
42794   /* Find the swap.  */
42795   struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
42796   df_ref use;
42797   rtx_insn *swap_insn = 0;
42798   FOR_EACH_INSN_INFO_USE (use, insn_info)
42799     if (rtx_equal_p (DF_REF_REG (use), mask_reg))
42800       {
42801 	struct df_link *def_link = DF_REF_CHAIN (use);
42802 	gcc_assert (def_link && !def_link->next);
42803 	swap_insn = DF_REF_INSN (def_link->ref);
42804 	break;
42805       }
42806   gcc_assert (swap_insn);
42807 
42808   /* Find the load.  */
42809   insn_info = DF_INSN_INFO_GET (swap_insn);
42810   rtx_insn *load_insn = 0;
42811   FOR_EACH_INSN_INFO_USE (use, insn_info)
42812     {
42813       struct df_link *def_link = DF_REF_CHAIN (use);
42814       gcc_assert (def_link && !def_link->next);
42815       load_insn = DF_REF_INSN (def_link->ref);
42816       break;
42817     }
42818   gcc_assert (load_insn);
42819 
42820   /* Find the TOC-relative symbol access.  */
42821   insn_info = DF_INSN_INFO_GET (load_insn);
42822   rtx_insn *tocrel_insn = 0;
42823   FOR_EACH_INSN_INFO_USE (use, insn_info)
42824     {
42825       struct df_link *def_link = DF_REF_CHAIN (use);
42826       gcc_assert (def_link && !def_link->next);
42827       tocrel_insn = DF_REF_INSN (def_link->ref);
42828       break;
42829     }
42830   gcc_assert (tocrel_insn);
42831 
42832   /* Find the embedded CONST_VECTOR.  We have to call toc_relative_expr_p
42833      to set tocrel_base; otherwise it would be unnecessary as we've
42834      already established it will return true.  */
42835   rtx base, offset;
42836   rtx tocrel_expr = SET_SRC (PATTERN (tocrel_insn));
42837   /* There is an extra level of indirection for small/large code models.  */
42838   if (GET_CODE (tocrel_expr) == MEM)
42839     tocrel_expr = XEXP (tocrel_expr, 0);
42840   if (!toc_relative_expr_p (tocrel_expr, false))
42841     gcc_unreachable ();
42842   split_const (XVECEXP (tocrel_base, 0, 0), &base, &offset);
42843   rtx const_vector = get_pool_constant (base);
42844   /* With the extra indirection, get_pool_constant will produce the
42845      real constant from the reg_equal expression, so get the real
42846      constant.  */
42847   if (GET_CODE (const_vector) == SYMBOL_REF)
42848     const_vector = get_pool_constant (const_vector);
42849   gcc_assert (GET_CODE (const_vector) == CONST_VECTOR);
42850 
42851   /* Create an adjusted mask from the initial mask.  */
42852   unsigned int new_mask[16], i, val;
42853   for (i = 0; i < 16; ++i) {
42854     val = INTVAL (XVECEXP (const_vector, 0, i));
42855     if (val < 16)
42856       new_mask[i] = (val + 8) % 16;
42857     else
42858       new_mask[i] = ((val + 8) % 16) + 16;
42859   }
42860 
42861   /* Create a new CONST_VECTOR and a MEM that references it.  */
42862   rtx vals = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
42863   for (i = 0; i < 16; ++i)
42864     XVECEXP (vals, 0, i) = GEN_INT (new_mask[i]);
42865   rtx new_const_vector = gen_rtx_CONST_VECTOR (V16QImode, XVEC (vals, 0));
42866   rtx new_mem = force_const_mem (V16QImode, new_const_vector);
42867   /* This gives us a MEM whose base operand is a SYMBOL_REF, which we
42868      can't recognize.  Force the SYMBOL_REF into a register.  */
42869   if (!REG_P (XEXP (new_mem, 0))) {
42870     rtx base_reg = force_reg (Pmode, XEXP (new_mem, 0));
42871     XEXP (new_mem, 0) = base_reg;
42872     /* Move the newly created insn ahead of the load insn.  */
42873     rtx_insn *force_insn = get_last_insn ();
42874     remove_insn (force_insn);
42875     rtx_insn *before_load_insn = PREV_INSN (load_insn);
42876     add_insn_after (force_insn, before_load_insn, BLOCK_FOR_INSN (load_insn));
42877     df_insn_rescan (before_load_insn);
42878     df_insn_rescan (force_insn);
42879   }
42880 
42881   /* Replace the MEM in the load instruction and rescan it.  */
42882   XEXP (SET_SRC (PATTERN (load_insn)), 0) = new_mem;
42883   INSN_CODE (load_insn) = -1; /* Force re-recognition.  */
42884   df_insn_rescan (load_insn);
42885 
42886   if (dump_file)
42887     fprintf (dump_file, "Adjusting mask for vperm %d\n", INSN_UID (insn));
42888 }
42889 
42890 /* The insn described by INSN_ENTRY[I] can be swapped, but only
42891    with special handling.  Take care of that here.  */
42892 static void
handle_special_swappables(swap_web_entry * insn_entry,unsigned i)42893 handle_special_swappables (swap_web_entry *insn_entry, unsigned i)
42894 {
42895   rtx_insn *insn = insn_entry[i].insn;
42896   rtx body = PATTERN (insn);
42897 
42898   switch (insn_entry[i].special_handling)
42899     {
42900     default:
42901       gcc_unreachable ();
42902     case SH_CONST_VECTOR:
42903       {
42904 	/* A CONST_VECTOR will only show up somewhere in the RHS of a SET.  */
42905 	gcc_assert (GET_CODE (body) == SET);
42906 	swap_const_vector_halves (&SET_SRC (body));
42907 	if (dump_file)
42908 	  fprintf (dump_file, "Swapping constant halves in insn %d\n", i);
42909 	break;
42910       }
42911     case SH_SUBREG:
42912       /* A subreg of the same size is already safe.  For subregs that
42913 	 select a smaller portion of a reg, adjust the index for
42914 	 swapped doublewords.  */
42915       adjust_subreg_index (body);
42916       if (dump_file)
42917 	fprintf (dump_file, "Adjusting subreg in insn %d\n", i);
42918       break;
42919     case SH_NOSWAP_LD:
42920       /* Convert a non-permuting load to a permuting one.  */
42921       permute_load (insn);
42922       break;
42923     case SH_NOSWAP_ST:
42924       /* Convert a non-permuting store to a permuting one.  */
42925       permute_store (insn);
42926       break;
42927     case SH_EXTRACT:
42928       /* Change the lane on an extract operation.  */
42929       adjust_extract (insn);
42930       break;
42931     case SH_SPLAT:
42932       /* Change the lane on a direct-splat operation.  */
42933       adjust_splat (insn);
42934       break;
42935     case SH_XXPERMDI:
42936       /* Change the lanes on an XXPERMDI operation.  */
42937       adjust_xxpermdi (insn);
42938       break;
42939     case SH_CONCAT:
42940       /* Reverse the order of a concatenation operation.  */
42941       adjust_concat (insn);
42942       break;
42943     case SH_VPERM:
42944       /* Change the mask loaded from the constant pool for a VPERM.  */
42945       adjust_vperm (insn);
42946       break;
42947     }
42948 }
42949 
42950 /* Find the insn from the Ith table entry, which is known to be a
42951    register swap Y = SWAP(X).  Replace it with a copy Y = X.  */
42952 static void
replace_swap_with_copy(swap_web_entry * insn_entry,unsigned i)42953 replace_swap_with_copy (swap_web_entry *insn_entry, unsigned i)
42954 {
42955   rtx_insn *insn = insn_entry[i].insn;
42956   rtx body = PATTERN (insn);
42957   rtx src_reg = XEXP (SET_SRC (body), 0);
42958   rtx copy = gen_rtx_SET (SET_DEST (body), src_reg);
42959   rtx_insn *new_insn = emit_insn_before (copy, insn);
42960   set_block_for_insn (new_insn, BLOCK_FOR_INSN (insn));
42961   df_insn_rescan (new_insn);
42962 
42963   if (dump_file)
42964     {
42965       unsigned int new_uid = INSN_UID (new_insn);
42966       fprintf (dump_file, "Replacing swap %d with copy %d\n", i, new_uid);
42967     }
42968 
42969   df_insn_delete (insn);
42970   remove_insn (insn);
42971   insn->set_deleted ();
42972 }
42973 
42974 /* Dump the swap table to DUMP_FILE.  */
42975 static void
dump_swap_insn_table(swap_web_entry * insn_entry)42976 dump_swap_insn_table (swap_web_entry *insn_entry)
42977 {
42978   int e = get_max_uid ();
42979   fprintf (dump_file, "\nRelevant insns with their flag settings\n\n");
42980 
42981   for (int i = 0; i < e; ++i)
42982     if (insn_entry[i].is_relevant)
42983       {
42984 	swap_web_entry *pred_entry = (swap_web_entry *)insn_entry[i].pred ();
42985 	fprintf (dump_file, "%6d %6d  ", i,
42986 		 pred_entry && pred_entry->insn
42987 		 ? INSN_UID (pred_entry->insn) : 0);
42988 	if (insn_entry[i].is_load)
42989 	  fputs ("load ", dump_file);
42990 	if (insn_entry[i].is_store)
42991 	  fputs ("store ", dump_file);
42992 	if (insn_entry[i].is_swap)
42993 	  fputs ("swap ", dump_file);
42994 	if (insn_entry[i].is_live_in)
42995 	  fputs ("live-in ", dump_file);
42996 	if (insn_entry[i].is_live_out)
42997 	  fputs ("live-out ", dump_file);
42998 	if (insn_entry[i].contains_subreg)
42999 	  fputs ("subreg ", dump_file);
43000 	if (insn_entry[i].is_128_int)
43001 	  fputs ("int128 ", dump_file);
43002 	if (insn_entry[i].is_call)
43003 	  fputs ("call ", dump_file);
43004 	if (insn_entry[i].is_swappable)
43005 	  {
43006 	    fputs ("swappable ", dump_file);
43007 	    if (insn_entry[i].special_handling == SH_CONST_VECTOR)
43008 	      fputs ("special:constvec ", dump_file);
43009 	    else if (insn_entry[i].special_handling == SH_SUBREG)
43010 	      fputs ("special:subreg ", dump_file);
43011 	    else if (insn_entry[i].special_handling == SH_NOSWAP_LD)
43012 	      fputs ("special:load ", dump_file);
43013 	    else if (insn_entry[i].special_handling == SH_NOSWAP_ST)
43014 	      fputs ("special:store ", dump_file);
43015 	    else if (insn_entry[i].special_handling == SH_EXTRACT)
43016 	      fputs ("special:extract ", dump_file);
43017 	    else if (insn_entry[i].special_handling == SH_SPLAT)
43018 	      fputs ("special:splat ", dump_file);
43019 	    else if (insn_entry[i].special_handling == SH_XXPERMDI)
43020 	      fputs ("special:xxpermdi ", dump_file);
43021 	    else if (insn_entry[i].special_handling == SH_CONCAT)
43022 	      fputs ("special:concat ", dump_file);
43023 	    else if (insn_entry[i].special_handling == SH_VPERM)
43024 	      fputs ("special:vperm ", dump_file);
43025 	  }
43026 	if (insn_entry[i].web_not_optimizable)
43027 	  fputs ("unoptimizable ", dump_file);
43028 	if (insn_entry[i].will_delete)
43029 	  fputs ("delete ", dump_file);
43030 	fputs ("\n", dump_file);
43031       }
43032   fputs ("\n", dump_file);
43033 }
43034 
43035 /* Return RTX with its address canonicalized to (reg) or (+ reg reg).
43036    Here RTX is an (& addr (const_int -16)).  Always return a new copy
43037    to avoid problems with combine.  */
43038 static rtx
alignment_with_canonical_addr(rtx align)43039 alignment_with_canonical_addr (rtx align)
43040 {
43041   rtx canon;
43042   rtx addr = XEXP (align, 0);
43043 
43044   if (REG_P (addr))
43045     canon = addr;
43046 
43047   else if (GET_CODE (addr) == PLUS)
43048     {
43049       rtx addrop0 = XEXP (addr, 0);
43050       rtx addrop1 = XEXP (addr, 1);
43051 
43052       if (!REG_P (addrop0))
43053 	addrop0 = force_reg (GET_MODE (addrop0), addrop0);
43054 
43055       if (!REG_P (addrop1))
43056 	addrop1 = force_reg (GET_MODE (addrop1), addrop1);
43057 
43058       canon = gen_rtx_PLUS (GET_MODE (addr), addrop0, addrop1);
43059     }
43060 
43061   else
43062     canon = force_reg (GET_MODE (addr), addr);
43063 
43064   return gen_rtx_AND (GET_MODE (align), canon, GEN_INT (-16));
43065 }
43066 
43067 /* Check whether an rtx is an alignment mask, and if so, return
43068    a fully-expanded rtx for the masking operation.  */
43069 static rtx
alignment_mask(rtx_insn * insn)43070 alignment_mask (rtx_insn *insn)
43071 {
43072   rtx body = PATTERN (insn);
43073 
43074   if (GET_CODE (body) != SET
43075       || GET_CODE (SET_SRC (body)) != AND
43076       || !REG_P (XEXP (SET_SRC (body), 0)))
43077     return 0;
43078 
43079   rtx mask = XEXP (SET_SRC (body), 1);
43080 
43081   if (GET_CODE (mask) == CONST_INT)
43082     {
43083       if (INTVAL (mask) == -16)
43084 	return alignment_with_canonical_addr (SET_SRC (body));
43085       else
43086 	return 0;
43087     }
43088 
43089   if (!REG_P (mask))
43090     return 0;
43091 
43092   struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
43093   df_ref use;
43094   rtx real_mask = 0;
43095 
43096   FOR_EACH_INSN_INFO_USE (use, insn_info)
43097     {
43098       if (!rtx_equal_p (DF_REF_REG (use), mask))
43099 	continue;
43100 
43101       struct df_link *def_link = DF_REF_CHAIN (use);
43102       if (!def_link || def_link->next)
43103 	return 0;
43104 
43105       rtx_insn *const_insn = DF_REF_INSN (def_link->ref);
43106       rtx const_body = PATTERN (const_insn);
43107       if (GET_CODE (const_body) != SET)
43108 	return 0;
43109 
43110       real_mask = SET_SRC (const_body);
43111 
43112       if (GET_CODE (real_mask) != CONST_INT
43113 	  || INTVAL (real_mask) != -16)
43114 	return 0;
43115     }
43116 
43117   if (real_mask == 0)
43118     return 0;
43119 
43120   return alignment_with_canonical_addr (SET_SRC (body));
43121 }
43122 
43123 /* Given INSN that's a load or store based at BASE_REG, look for a
43124    feeding computation that aligns its address on a 16-byte boundary.  */
43125 static rtx
find_alignment_op(rtx_insn * insn,rtx base_reg)43126 find_alignment_op (rtx_insn *insn, rtx base_reg)
43127 {
43128   df_ref base_use;
43129   struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
43130   rtx and_operation = 0;
43131 
43132   FOR_EACH_INSN_INFO_USE (base_use, insn_info)
43133     {
43134       if (!rtx_equal_p (DF_REF_REG (base_use), base_reg))
43135 	continue;
43136 
43137       struct df_link *base_def_link = DF_REF_CHAIN (base_use);
43138       if (!base_def_link || base_def_link->next)
43139 	break;
43140 
43141       /* With stack-protector code enabled, and possibly in other
43142 	 circumstances, there may not be an associated insn for
43143 	 the def.  */
43144       if (DF_REF_IS_ARTIFICIAL (base_def_link->ref))
43145 	break;
43146 
43147       rtx_insn *and_insn = DF_REF_INSN (base_def_link->ref);
43148       and_operation = alignment_mask (and_insn);
43149       if (and_operation != 0)
43150 	break;
43151     }
43152 
43153   return and_operation;
43154 }
43155 
43156 struct del_info { bool replace; rtx_insn *replace_insn; };
43157 
43158 /* If INSN is the load for an lvx pattern, put it in canonical form.  */
43159 static void
recombine_lvx_pattern(rtx_insn * insn,del_info * to_delete)43160 recombine_lvx_pattern (rtx_insn *insn, del_info *to_delete)
43161 {
43162   rtx body = PATTERN (insn);
43163   gcc_assert (GET_CODE (body) == SET
43164 	      && GET_CODE (SET_SRC (body)) == VEC_SELECT
43165 	      && GET_CODE (XEXP (SET_SRC (body), 0)) == MEM);
43166 
43167   rtx mem = XEXP (SET_SRC (body), 0);
43168   rtx base_reg = XEXP (mem, 0);
43169 
43170   rtx and_operation = find_alignment_op (insn, base_reg);
43171 
43172   if (and_operation != 0)
43173     {
43174       df_ref def;
43175       struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
43176       FOR_EACH_INSN_INFO_DEF (def, insn_info)
43177 	{
43178 	  struct df_link *link = DF_REF_CHAIN (def);
43179 	  if (!link || link->next)
43180 	    break;
43181 
43182 	  rtx_insn *swap_insn = DF_REF_INSN (link->ref);
43183 	  if (!insn_is_swap_p (swap_insn)
43184 	      || insn_is_load_p (swap_insn)
43185 	      || insn_is_store_p (swap_insn))
43186 	    break;
43187 
43188 	  /* Expected lvx pattern found.  Change the swap to
43189 	     a copy, and propagate the AND operation into the
43190 	     load.  */
43191 	  to_delete[INSN_UID (swap_insn)].replace = true;
43192 	  to_delete[INSN_UID (swap_insn)].replace_insn = swap_insn;
43193 
43194 	  XEXP (mem, 0) = and_operation;
43195 	  SET_SRC (body) = mem;
43196 	  INSN_CODE (insn) = -1; /* Force re-recognition.  */
43197 	  df_insn_rescan (insn);
43198 
43199 	  if (dump_file)
43200 	    fprintf (dump_file, "lvx opportunity found at %d\n",
43201 		     INSN_UID (insn));
43202 	}
43203     }
43204 }
43205 
43206 /* If INSN is the store for an stvx pattern, put it in canonical form.  */
43207 static void
recombine_stvx_pattern(rtx_insn * insn,del_info * to_delete)43208 recombine_stvx_pattern (rtx_insn *insn, del_info *to_delete)
43209 {
43210   rtx body = PATTERN (insn);
43211   gcc_assert (GET_CODE (body) == SET
43212 	      && GET_CODE (SET_DEST (body)) == MEM
43213 	      && GET_CODE (SET_SRC (body)) == VEC_SELECT);
43214   rtx mem = SET_DEST (body);
43215   rtx base_reg = XEXP (mem, 0);
43216 
43217   rtx and_operation = find_alignment_op (insn, base_reg);
43218 
43219   if (and_operation != 0)
43220     {
43221       rtx src_reg = XEXP (SET_SRC (body), 0);
43222       df_ref src_use;
43223       struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
43224       FOR_EACH_INSN_INFO_USE (src_use, insn_info)
43225 	{
43226 	  if (!rtx_equal_p (DF_REF_REG (src_use), src_reg))
43227 	    continue;
43228 
43229 	  struct df_link *link = DF_REF_CHAIN (src_use);
43230 	  if (!link || link->next)
43231 	    break;
43232 
43233 	  rtx_insn *swap_insn = DF_REF_INSN (link->ref);
43234 	  if (!insn_is_swap_p (swap_insn)
43235 	      || insn_is_load_p (swap_insn)
43236 	      || insn_is_store_p (swap_insn))
43237 	    break;
43238 
43239 	  /* Expected stvx pattern found.  Change the swap to
43240 	     a copy, and propagate the AND operation into the
43241 	     store.  */
43242 	  to_delete[INSN_UID (swap_insn)].replace = true;
43243 	  to_delete[INSN_UID (swap_insn)].replace_insn = swap_insn;
43244 
43245 	  XEXP (mem, 0) = and_operation;
43246 	  SET_SRC (body) = src_reg;
43247 	  INSN_CODE (insn) = -1; /* Force re-recognition.  */
43248 	  df_insn_rescan (insn);
43249 
43250 	  if (dump_file)
43251 	    fprintf (dump_file, "stvx opportunity found at %d\n",
43252 		     INSN_UID (insn));
43253 	}
43254     }
43255 }
43256 
43257 /* Look for patterns created from builtin lvx and stvx calls, and
43258    canonicalize them to be properly recognized as such.  */
43259 static void
recombine_lvx_stvx_patterns(function * fun)43260 recombine_lvx_stvx_patterns (function *fun)
43261 {
43262   int i;
43263   basic_block bb;
43264   rtx_insn *insn;
43265 
43266   int num_insns = get_max_uid ();
43267   del_info *to_delete = XCNEWVEC (del_info, num_insns);
43268 
43269   FOR_ALL_BB_FN (bb, fun)
43270     FOR_BB_INSNS (bb, insn)
43271     {
43272       if (!NONDEBUG_INSN_P (insn))
43273 	continue;
43274 
43275       if (insn_is_load_p (insn) && insn_is_swap_p (insn))
43276 	recombine_lvx_pattern (insn, to_delete);
43277       else if (insn_is_store_p (insn) && insn_is_swap_p (insn))
43278 	recombine_stvx_pattern (insn, to_delete);
43279     }
43280 
43281   /* Turning swaps into copies is delayed until now, to avoid problems
43282      with deleting instructions during the insn walk.  */
43283   for (i = 0; i < num_insns; i++)
43284     if (to_delete[i].replace)
43285       {
43286 	rtx swap_body = PATTERN (to_delete[i].replace_insn);
43287 	rtx src_reg = XEXP (SET_SRC (swap_body), 0);
43288 	rtx copy = gen_rtx_SET (SET_DEST (swap_body), src_reg);
43289 	rtx_insn *new_insn = emit_insn_before (copy,
43290 					       to_delete[i].replace_insn);
43291 	set_block_for_insn (new_insn,
43292 			    BLOCK_FOR_INSN (to_delete[i].replace_insn));
43293 	df_insn_rescan (new_insn);
43294 	df_insn_delete (to_delete[i].replace_insn);
43295 	remove_insn (to_delete[i].replace_insn);
43296 	to_delete[i].replace_insn->set_deleted ();
43297       }
43298 
43299   free (to_delete);
43300 }
43301 
43302 /* Main entry point for this pass.  */
43303 unsigned int
rs6000_analyze_swaps(function * fun)43304 rs6000_analyze_swaps (function *fun)
43305 {
43306   swap_web_entry *insn_entry;
43307   basic_block bb;
43308   rtx_insn *insn, *curr_insn = 0;
43309 
43310   /* Dataflow analysis for use-def chains.  */
43311   df_set_flags (DF_RD_PRUNE_DEAD_DEFS);
43312   df_chain_add_problem (DF_DU_CHAIN | DF_UD_CHAIN);
43313   df_analyze ();
43314   df_set_flags (DF_DEFER_INSN_RESCAN);
43315 
43316   /* Pre-pass to recombine lvx and stvx patterns so we don't lose info.  */
43317   recombine_lvx_stvx_patterns (fun);
43318 
43319   /* Allocate structure to represent webs of insns.  */
43320   insn_entry = XCNEWVEC (swap_web_entry, get_max_uid ());
43321 
43322   /* Walk the insns to gather basic data.  */
43323   FOR_ALL_BB_FN (bb, fun)
43324     FOR_BB_INSNS_SAFE (bb, insn, curr_insn)
43325     {
43326       unsigned int uid = INSN_UID (insn);
43327       if (NONDEBUG_INSN_P (insn))
43328 	{
43329 	  insn_entry[uid].insn = insn;
43330 
43331 	  if (GET_CODE (insn) == CALL_INSN)
43332 	    insn_entry[uid].is_call = 1;
43333 
43334 	  /* Walk the uses and defs to see if we mention vector regs.
43335 	     Record any constraints on optimization of such mentions.  */
43336 	  struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
43337 	  df_ref mention;
43338 	  FOR_EACH_INSN_INFO_USE (mention, insn_info)
43339 	    {
43340 	      /* We use DF_REF_REAL_REG here to get inside any subregs.  */
43341 	      machine_mode mode = GET_MODE (DF_REF_REAL_REG (mention));
43342 
43343 	      /* If a use gets its value from a call insn, it will be
43344 		 a hard register and will look like (reg:V4SI 3 3).
43345 		 The df analysis creates two mentions for GPR3 and GPR4,
43346 		 both DImode.  We must recognize this and treat it as a
43347 		 vector mention to ensure the call is unioned with this
43348 		 use.  */
43349 	      if (mode == DImode && DF_REF_INSN_INFO (mention))
43350 		{
43351 		  rtx feeder = DF_REF_INSN (mention);
43352 		  /* FIXME:  It is pretty hard to get from the df mention
43353 		     to the mode of the use in the insn.  We arbitrarily
43354 		     pick a vector mode here, even though the use might
43355 		     be a real DImode.  We can be too conservative
43356 		     (create a web larger than necessary) because of
43357 		     this, so consider eventually fixing this.  */
43358 		  if (GET_CODE (feeder) == CALL_INSN)
43359 		    mode = V4SImode;
43360 		}
43361 
43362 	      if (ALTIVEC_OR_VSX_VECTOR_MODE (mode) || mode == TImode)
43363 		{
43364 		  insn_entry[uid].is_relevant = 1;
43365 		  if (mode == TImode || mode == V1TImode
43366 		      || FLOAT128_VECTOR_P (mode))
43367 		    insn_entry[uid].is_128_int = 1;
43368 		  if (DF_REF_INSN_INFO (mention))
43369 		    insn_entry[uid].contains_subreg
43370 		      = !rtx_equal_p (DF_REF_REG (mention),
43371 				      DF_REF_REAL_REG (mention));
43372 		  union_defs (insn_entry, insn, mention);
43373 		}
43374 	    }
43375 	  FOR_EACH_INSN_INFO_DEF (mention, insn_info)
43376 	    {
43377 	      /* We use DF_REF_REAL_REG here to get inside any subregs.  */
43378 	      machine_mode mode = GET_MODE (DF_REF_REAL_REG (mention));
43379 
43380 	      /* If we're loading up a hard vector register for a call,
43381 		 it looks like (set (reg:V4SI 9 9) (...)).  The df
43382 		 analysis creates two mentions for GPR9 and GPR10, both
43383 		 DImode.  So relying on the mode from the mentions
43384 		 isn't sufficient to ensure we union the call into the
43385 		 web with the parameter setup code.  */
43386 	      if (mode == DImode && GET_CODE (insn) == SET
43387 		  && ALTIVEC_OR_VSX_VECTOR_MODE (GET_MODE (SET_DEST (insn))))
43388 		mode = GET_MODE (SET_DEST (insn));
43389 
43390 	      if (ALTIVEC_OR_VSX_VECTOR_MODE (mode) || mode == TImode)
43391 		{
43392 		  insn_entry[uid].is_relevant = 1;
43393 		  if (mode == TImode || mode == V1TImode
43394 		      || FLOAT128_VECTOR_P (mode))
43395 		    insn_entry[uid].is_128_int = 1;
43396 		  if (DF_REF_INSN_INFO (mention))
43397 		    insn_entry[uid].contains_subreg
43398 		      = !rtx_equal_p (DF_REF_REG (mention),
43399 				      DF_REF_REAL_REG (mention));
43400 		  /* REG_FUNCTION_VALUE_P is not valid for subregs. */
43401 		  else if (REG_FUNCTION_VALUE_P (DF_REF_REG (mention)))
43402 		    insn_entry[uid].is_live_out = 1;
43403 		  union_uses (insn_entry, insn, mention);
43404 		}
43405 	    }
43406 
43407 	  if (insn_entry[uid].is_relevant)
43408 	    {
43409 	      /* Determine if this is a load or store.  */
43410 	      insn_entry[uid].is_load = insn_is_load_p (insn);
43411 	      insn_entry[uid].is_store = insn_is_store_p (insn);
43412 
43413 	      /* Determine if this is a doubleword swap.  If not,
43414 		 determine whether it can legally be swapped.  */
43415 	      if (insn_is_swap_p (insn))
43416 		insn_entry[uid].is_swap = 1;
43417 	      else
43418 		{
43419 		  unsigned int special = SH_NONE;
43420 		  insn_entry[uid].is_swappable
43421 		    = insn_is_swappable_p (insn_entry, insn, &special);
43422 		  if (special != SH_NONE && insn_entry[uid].contains_subreg)
43423 		    insn_entry[uid].is_swappable = 0;
43424 		  else if (special != SH_NONE)
43425 		    insn_entry[uid].special_handling = special;
43426 		  else if (insn_entry[uid].contains_subreg)
43427 		    insn_entry[uid].special_handling = SH_SUBREG;
43428 		}
43429 	    }
43430 	}
43431     }
43432 
43433   if (dump_file)
43434     {
43435       fprintf (dump_file, "\nSwap insn entry table when first built\n");
43436       dump_swap_insn_table (insn_entry);
43437     }
43438 
43439   /* Record unoptimizable webs.  */
43440   unsigned e = get_max_uid (), i;
43441   for (i = 0; i < e; ++i)
43442     {
43443       if (!insn_entry[i].is_relevant)
43444 	continue;
43445 
43446       swap_web_entry *root
43447 	= (swap_web_entry*)(&insn_entry[i])->unionfind_root ();
43448 
43449       if (insn_entry[i].is_live_in || insn_entry[i].is_live_out
43450 	  || (insn_entry[i].contains_subreg
43451 	      && insn_entry[i].special_handling != SH_SUBREG)
43452 	  || insn_entry[i].is_128_int || insn_entry[i].is_call
43453 	  || !(insn_entry[i].is_swappable || insn_entry[i].is_swap))
43454 	root->web_not_optimizable = 1;
43455 
43456       /* If we have loads or stores that aren't permuting then the
43457 	 optimization isn't appropriate.  */
43458       else if ((insn_entry[i].is_load || insn_entry[i].is_store)
43459 	  && !insn_entry[i].is_swap && !insn_entry[i].is_swappable)
43460 	root->web_not_optimizable = 1;
43461 
43462       /* If we have permuting loads or stores that are not accompanied
43463 	 by a register swap, the optimization isn't appropriate.  */
43464       else if (insn_entry[i].is_load && insn_entry[i].is_swap)
43465 	{
43466 	  rtx insn = insn_entry[i].insn;
43467 	  struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
43468 	  df_ref def;
43469 
43470 	  FOR_EACH_INSN_INFO_DEF (def, insn_info)
43471 	    {
43472 	      struct df_link *link = DF_REF_CHAIN (def);
43473 
43474 	      if (!chain_contains_only_swaps (insn_entry, link, FOR_LOADS))
43475 		{
43476 		  root->web_not_optimizable = 1;
43477 		  break;
43478 		}
43479 	    }
43480 	}
43481       else if (insn_entry[i].is_store && insn_entry[i].is_swap)
43482 	{
43483 	  rtx insn = insn_entry[i].insn;
43484 	  struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
43485 	  df_ref use;
43486 
43487 	  FOR_EACH_INSN_INFO_USE (use, insn_info)
43488 	    {
43489 	      struct df_link *link = DF_REF_CHAIN (use);
43490 
43491 	      if (!chain_contains_only_swaps (insn_entry, link, FOR_STORES))
43492 		{
43493 		  root->web_not_optimizable = 1;
43494 		  break;
43495 		}
43496 	    }
43497 	}
43498     }
43499 
43500   if (dump_file)
43501     {
43502       fprintf (dump_file, "\nSwap insn entry table after web analysis\n");
43503       dump_swap_insn_table (insn_entry);
43504     }
43505 
43506   /* For each load and store in an optimizable web (which implies
43507      the loads and stores are permuting), find the associated
43508      register swaps and mark them for removal.  Due to various
43509      optimizations we may mark the same swap more than once.  Also
43510      perform special handling for swappable insns that require it.  */
43511   for (i = 0; i < e; ++i)
43512     if ((insn_entry[i].is_load || insn_entry[i].is_store)
43513 	&& insn_entry[i].is_swap)
43514       {
43515 	swap_web_entry* root_entry
43516 	  = (swap_web_entry*)((&insn_entry[i])->unionfind_root ());
43517 	if (!root_entry->web_not_optimizable)
43518 	  mark_swaps_for_removal (insn_entry, i);
43519       }
43520     else if (insn_entry[i].is_swappable && insn_entry[i].special_handling)
43521       {
43522 	swap_web_entry* root_entry
43523 	  = (swap_web_entry*)((&insn_entry[i])->unionfind_root ());
43524 	if (!root_entry->web_not_optimizable)
43525 	  handle_special_swappables (insn_entry, i);
43526       }
43527 
43528   /* Now delete the swaps marked for removal.  */
43529   for (i = 0; i < e; ++i)
43530     if (insn_entry[i].will_delete)
43531       replace_swap_with_copy (insn_entry, i);
43532 
43533   /* Clean up.  */
43534   free (insn_entry);
43535   return 0;
43536 }
43537 
43538 const pass_data pass_data_analyze_swaps =
43539 {
43540   RTL_PASS, /* type */
43541   "swaps", /* name */
43542   OPTGROUP_NONE, /* optinfo_flags */
43543   TV_NONE, /* tv_id */
43544   0, /* properties_required */
43545   0, /* properties_provided */
43546   0, /* properties_destroyed */
43547   0, /* todo_flags_start */
43548   TODO_df_finish, /* todo_flags_finish */
43549 };
43550 
43551 class pass_analyze_swaps : public rtl_opt_pass
43552 {
43553 public:
pass_analyze_swaps(gcc::context * ctxt)43554   pass_analyze_swaps(gcc::context *ctxt)
43555     : rtl_opt_pass(pass_data_analyze_swaps, ctxt)
43556   {}
43557 
43558   /* opt_pass methods: */
gate(function *)43559   virtual bool gate (function *)
43560     {
43561       return (optimize > 0 && !BYTES_BIG_ENDIAN && TARGET_VSX
43562 	      && !TARGET_P9_VECTOR && rs6000_optimize_swaps);
43563     }
43564 
execute(function * fun)43565   virtual unsigned int execute (function *fun)
43566     {
43567       return rs6000_analyze_swaps (fun);
43568     }
43569 
clone()43570   opt_pass *clone ()
43571     {
43572       return new pass_analyze_swaps (m_ctxt);
43573     }
43574 
43575 }; // class pass_analyze_swaps
43576 
43577 rtl_opt_pass *
make_pass_analyze_swaps(gcc::context * ctxt)43578 make_pass_analyze_swaps (gcc::context *ctxt)
43579 {
43580   return new pass_analyze_swaps (ctxt);
43581 }
43582 
43583 #ifdef RS6000_GLIBC_ATOMIC_FENV
43584 /* Function declarations for rs6000_atomic_assign_expand_fenv.  */
43585 static tree atomic_hold_decl, atomic_clear_decl, atomic_update_decl;
43586 #endif
43587 
43588 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook.  */
43589 
43590 static void
rs6000_atomic_assign_expand_fenv(tree * hold,tree * clear,tree * update)43591 rs6000_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
43592 {
43593   if (!TARGET_HARD_FLOAT || !TARGET_FPRS)
43594     {
43595 #ifdef RS6000_GLIBC_ATOMIC_FENV
43596       if (atomic_hold_decl == NULL_TREE)
43597 	{
43598 	  atomic_hold_decl
43599 	    = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
43600 			  get_identifier ("__atomic_feholdexcept"),
43601 			  build_function_type_list (void_type_node,
43602 						    double_ptr_type_node,
43603 						    NULL_TREE));
43604 	  TREE_PUBLIC (atomic_hold_decl) = 1;
43605 	  DECL_EXTERNAL (atomic_hold_decl) = 1;
43606 	}
43607 
43608       if (atomic_clear_decl == NULL_TREE)
43609 	{
43610 	  atomic_clear_decl
43611 	    = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
43612 			  get_identifier ("__atomic_feclearexcept"),
43613 			  build_function_type_list (void_type_node,
43614 						    NULL_TREE));
43615 	  TREE_PUBLIC (atomic_clear_decl) = 1;
43616 	  DECL_EXTERNAL (atomic_clear_decl) = 1;
43617 	}
43618 
43619       tree const_double = build_qualified_type (double_type_node,
43620 						TYPE_QUAL_CONST);
43621       tree const_double_ptr = build_pointer_type (const_double);
43622       if (atomic_update_decl == NULL_TREE)
43623 	{
43624 	  atomic_update_decl
43625 	    = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
43626 			  get_identifier ("__atomic_feupdateenv"),
43627 			  build_function_type_list (void_type_node,
43628 						    const_double_ptr,
43629 						    NULL_TREE));
43630 	  TREE_PUBLIC (atomic_update_decl) = 1;
43631 	  DECL_EXTERNAL (atomic_update_decl) = 1;
43632 	}
43633 
43634       tree fenv_var = create_tmp_var_raw (double_type_node);
43635       TREE_ADDRESSABLE (fenv_var) = 1;
43636       tree fenv_addr = build1 (ADDR_EXPR, double_ptr_type_node, fenv_var);
43637 
43638       *hold = build_call_expr (atomic_hold_decl, 1, fenv_addr);
43639       *clear = build_call_expr (atomic_clear_decl, 0);
43640       *update = build_call_expr (atomic_update_decl, 1,
43641 				 fold_convert (const_double_ptr, fenv_addr));
43642 #endif
43643       return;
43644     }
43645 
43646   tree mffs = rs6000_builtin_decls[RS6000_BUILTIN_MFFS];
43647   tree mtfsf = rs6000_builtin_decls[RS6000_BUILTIN_MTFSF];
43648   tree call_mffs = build_call_expr (mffs, 0);
43649 
43650   /* Generates the equivalent of feholdexcept (&fenv_var)
43651 
43652      *fenv_var = __builtin_mffs ();
43653      double fenv_hold;
43654      *(uint64_t*)&fenv_hold = *(uint64_t*)fenv_var & 0xffffffff00000007LL;
43655      __builtin_mtfsf (0xff, fenv_hold);  */
43656 
43657   /* Mask to clear everything except for the rounding modes and non-IEEE
43658      arithmetic flag.  */
43659   const unsigned HOST_WIDE_INT hold_exception_mask =
43660     HOST_WIDE_INT_C (0xffffffff00000007);
43661 
43662   tree fenv_var = create_tmp_var_raw (double_type_node);
43663 
43664   tree hold_mffs = build2 (MODIFY_EXPR, void_type_node, fenv_var, call_mffs);
43665 
43666   tree fenv_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_var);
43667   tree fenv_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu,
43668 			      build_int_cst (uint64_type_node,
43669 					     hold_exception_mask));
43670 
43671   tree fenv_hold_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
43672 				 fenv_llu_and);
43673 
43674   tree hold_mtfsf = build_call_expr (mtfsf, 2,
43675 				     build_int_cst (unsigned_type_node, 0xff),
43676 				     fenv_hold_mtfsf);
43677 
43678   *hold = build2 (COMPOUND_EXPR, void_type_node, hold_mffs, hold_mtfsf);
43679 
43680   /* Generates the equivalent of feclearexcept (FE_ALL_EXCEPT):
43681 
43682      double fenv_clear = __builtin_mffs ();
43683      *(uint64_t)&fenv_clear &= 0xffffffff00000000LL;
43684      __builtin_mtfsf (0xff, fenv_clear);  */
43685 
43686   /* Mask to clear everything except for the rounding modes and non-IEEE
43687      arithmetic flag.  */
43688   const unsigned HOST_WIDE_INT clear_exception_mask =
43689     HOST_WIDE_INT_C (0xffffffff00000000);
43690 
43691   tree fenv_clear = create_tmp_var_raw (double_type_node);
43692 
43693   tree clear_mffs = build2 (MODIFY_EXPR, void_type_node, fenv_clear, call_mffs);
43694 
43695   tree fenv_clean_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_clear);
43696   tree fenv_clear_llu_and = build2 (BIT_AND_EXPR, uint64_type_node,
43697 				    fenv_clean_llu,
43698 				    build_int_cst (uint64_type_node,
43699 						   clear_exception_mask));
43700 
43701   tree fenv_clear_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
43702 				  fenv_clear_llu_and);
43703 
43704   tree clear_mtfsf = build_call_expr (mtfsf, 2,
43705 				      build_int_cst (unsigned_type_node, 0xff),
43706 				      fenv_clear_mtfsf);
43707 
43708   *clear = build2 (COMPOUND_EXPR, void_type_node, clear_mffs, clear_mtfsf);
43709 
43710   /* Generates the equivalent of feupdateenv (&fenv_var)
43711 
43712      double old_fenv = __builtin_mffs ();
43713      double fenv_update;
43714      *(uint64_t*)&fenv_update = (*(uint64_t*)&old & 0xffffffff1fffff00LL) |
43715                                 (*(uint64_t*)fenv_var 0x1ff80fff);
43716      __builtin_mtfsf (0xff, fenv_update);  */
43717 
43718   const unsigned HOST_WIDE_INT update_exception_mask =
43719     HOST_WIDE_INT_C (0xffffffff1fffff00);
43720   const unsigned HOST_WIDE_INT new_exception_mask =
43721     HOST_WIDE_INT_C (0x1ff80fff);
43722 
43723   tree old_fenv = create_tmp_var_raw (double_type_node);
43724   tree update_mffs = build2 (MODIFY_EXPR, void_type_node, old_fenv, call_mffs);
43725 
43726   tree old_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, old_fenv);
43727   tree old_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, old_llu,
43728 			     build_int_cst (uint64_type_node,
43729 					    update_exception_mask));
43730 
43731   tree new_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu,
43732 			     build_int_cst (uint64_type_node,
43733 					    new_exception_mask));
43734 
43735   tree new_llu_mask = build2 (BIT_IOR_EXPR, uint64_type_node,
43736 			      old_llu_and, new_llu_and);
43737 
43738   tree fenv_update_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
43739 				   new_llu_mask);
43740 
43741   tree update_mtfsf = build_call_expr (mtfsf, 2,
43742 				       build_int_cst (unsigned_type_node, 0xff),
43743 				       fenv_update_mtfsf);
43744 
43745   *update = build2 (COMPOUND_EXPR, void_type_node, update_mffs, update_mtfsf);
43746 }
43747 
43748 /* Implement the TARGET_OPTAB_SUPPORTED_P hook.  */
43749 
43750 static bool
rs6000_optab_supported_p(int op,machine_mode mode1,machine_mode,optimization_type opt_type)43751 rs6000_optab_supported_p (int op, machine_mode mode1, machine_mode,
43752 			  optimization_type opt_type)
43753 {
43754   switch (op)
43755     {
43756     case rsqrt_optab:
43757       return (opt_type == OPTIMIZE_FOR_SPEED
43758 	      && RS6000_RECIP_AUTO_RSQRTE_P (mode1));
43759 
43760     default:
43761       return true;
43762     }
43763 }
43764 
43765 /* Implement TARGET_CONSTANT_ALIGNMENT.  */
43766 
43767 static HOST_WIDE_INT
rs6000_constant_alignment(const_tree exp,HOST_WIDE_INT align)43768 rs6000_constant_alignment (const_tree exp, HOST_WIDE_INT align)
43769 {
43770   if (TREE_CODE (exp) == STRING_CST
43771       && (STRICT_ALIGNMENT || !optimize_size))
43772     return MAX (align, BITS_PER_WORD);
43773   return align;
43774 }
43775 
43776 /* Implement TARGET_STARTING_FRAME_OFFSET.  */
43777 
43778 static HOST_WIDE_INT
rs6000_starting_frame_offset(void)43779 rs6000_starting_frame_offset (void)
43780 {
43781   if (FRAME_GROWS_DOWNWARD)
43782     return 0;
43783   return RS6000_STARTING_FRAME_OFFSET;
43784 }
43785 
43786 struct gcc_target targetm = TARGET_INITIALIZER;
43787 
43788 #include "gt-powerpcspe.h"
43789