1 /* Definitions of target machine for GNU compiler.
2    Copyright (C) 1999-2016 Free Software Foundation, Inc.
3    Contributed by James E. Wilson <wilson@cygnus.com> and
4 		  David Mosberger <davidm@hpl.hp.com>.
5 
6 This file is part of GCC.
7 
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
11 any later version.
12 
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 GNU General Public License for more details.
17 
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3.  If not see
20 <http://www.gnu.org/licenses/>.  */
21 
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "cfghooks.h"
30 #include "df.h"
31 #include "tm_p.h"
32 #include "stringpool.h"
33 #include "optabs.h"
34 #include "regs.h"
35 #include "emit-rtl.h"
36 #include "recog.h"
37 #include "diagnostic-core.h"
38 #include "alias.h"
39 #include "fold-const.h"
40 #include "stor-layout.h"
41 #include "calls.h"
42 #include "varasm.h"
43 #include "output.h"
44 #include "insn-attr.h"
45 #include "flags.h"
46 #include "explow.h"
47 #include "expr.h"
48 #include "cfgrtl.h"
49 #include "libfuncs.h"
50 #include "sched-int.h"
51 #include "common/common-target.h"
52 #include "langhooks.h"
53 #include "gimplify.h"
54 #include "intl.h"
55 #include "debug.h"
56 #include "params.h"
57 #include "dbgcnt.h"
58 #include "tm-constrs.h"
59 #include "sel-sched.h"
60 #include "reload.h"
61 #include "opts.h"
62 #include "dumpfile.h"
63 #include "builtins.h"
64 
65 /* This file should be included last.  */
66 #include "target-def.h"
67 
68 /* This is used for communication between ASM_OUTPUT_LABEL and
69    ASM_OUTPUT_LABELREF.  */
70 int ia64_asm_output_label = 0;
71 
72 /* Register names for ia64_expand_prologue.  */
73 static const char * const ia64_reg_numbers[96] =
74 { "r32", "r33", "r34", "r35", "r36", "r37", "r38", "r39",
75   "r40", "r41", "r42", "r43", "r44", "r45", "r46", "r47",
76   "r48", "r49", "r50", "r51", "r52", "r53", "r54", "r55",
77   "r56", "r57", "r58", "r59", "r60", "r61", "r62", "r63",
78   "r64", "r65", "r66", "r67", "r68", "r69", "r70", "r71",
79   "r72", "r73", "r74", "r75", "r76", "r77", "r78", "r79",
80   "r80", "r81", "r82", "r83", "r84", "r85", "r86", "r87",
81   "r88", "r89", "r90", "r91", "r92", "r93", "r94", "r95",
82   "r96", "r97", "r98", "r99", "r100","r101","r102","r103",
83   "r104","r105","r106","r107","r108","r109","r110","r111",
84   "r112","r113","r114","r115","r116","r117","r118","r119",
85   "r120","r121","r122","r123","r124","r125","r126","r127"};
86 
87 /* ??? These strings could be shared with REGISTER_NAMES.  */
88 static const char * const ia64_input_reg_names[8] =
89 { "in0",  "in1",  "in2",  "in3",  "in4",  "in5",  "in6",  "in7" };
90 
91 /* ??? These strings could be shared with REGISTER_NAMES.  */
92 static const char * const ia64_local_reg_names[80] =
93 { "loc0", "loc1", "loc2", "loc3", "loc4", "loc5", "loc6", "loc7",
94   "loc8", "loc9", "loc10","loc11","loc12","loc13","loc14","loc15",
95   "loc16","loc17","loc18","loc19","loc20","loc21","loc22","loc23",
96   "loc24","loc25","loc26","loc27","loc28","loc29","loc30","loc31",
97   "loc32","loc33","loc34","loc35","loc36","loc37","loc38","loc39",
98   "loc40","loc41","loc42","loc43","loc44","loc45","loc46","loc47",
99   "loc48","loc49","loc50","loc51","loc52","loc53","loc54","loc55",
100   "loc56","loc57","loc58","loc59","loc60","loc61","loc62","loc63",
101   "loc64","loc65","loc66","loc67","loc68","loc69","loc70","loc71",
102   "loc72","loc73","loc74","loc75","loc76","loc77","loc78","loc79" };
103 
104 /* ??? These strings could be shared with REGISTER_NAMES.  */
105 static const char * const ia64_output_reg_names[8] =
106 { "out0", "out1", "out2", "out3", "out4", "out5", "out6", "out7" };
107 
108 /* Variables which are this size or smaller are put in the sdata/sbss
109    sections.  */
110 
111 unsigned int ia64_section_threshold;
112 
113 /* The following variable is used by the DFA insn scheduler.  The value is
114    TRUE if we do insn bundling instead of insn scheduling.  */
115 int bundling_p = 0;
116 
117 enum ia64_frame_regs
118 {
119    reg_fp,
120    reg_save_b0,
121    reg_save_pr,
122    reg_save_ar_pfs,
123    reg_save_ar_unat,
124    reg_save_ar_lc,
125    reg_save_gp,
126    number_of_ia64_frame_regs
127 };
128 
129 /* Structure to be filled in by ia64_compute_frame_size with register
130    save masks and offsets for the current function.  */
131 
132 struct ia64_frame_info
133 {
134   HOST_WIDE_INT total_size;	/* size of the stack frame, not including
135 				   the caller's scratch area.  */
136   HOST_WIDE_INT spill_cfa_off;	/* top of the reg spill area from the cfa.  */
137   HOST_WIDE_INT spill_size;	/* size of the gr/br/fr spill area.  */
138   HOST_WIDE_INT extra_spill_size;  /* size of spill area for others.  */
139   HARD_REG_SET mask;		/* mask of saved registers.  */
140   unsigned int gr_used_mask;	/* mask of registers in use as gr spill
141 				   registers or long-term scratches.  */
142   int n_spilled;		/* number of spilled registers.  */
143   int r[number_of_ia64_frame_regs];  /* Frame related registers.  */
144   int n_input_regs;		/* number of input registers used.  */
145   int n_local_regs;		/* number of local registers used.  */
146   int n_output_regs;		/* number of output registers used.  */
147   int n_rotate_regs;		/* number of rotating registers used.  */
148 
149   char need_regstk;		/* true if a .regstk directive needed.  */
150   char initialized;		/* true if the data is finalized.  */
151 };
152 
153 /* Current frame information calculated by ia64_compute_frame_size.  */
154 static struct ia64_frame_info current_frame_info;
155 /* The actual registers that are emitted.  */
156 static int emitted_frame_related_regs[number_of_ia64_frame_regs];
157 
158 static int ia64_first_cycle_multipass_dfa_lookahead (void);
159 static void ia64_dependencies_evaluation_hook (rtx_insn *, rtx_insn *);
160 static void ia64_init_dfa_pre_cycle_insn (void);
161 static rtx ia64_dfa_pre_cycle_insn (void);
162 static int ia64_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *, int);
163 static int ia64_dfa_new_cycle (FILE *, int, rtx_insn *, int, int, int *);
164 static void ia64_h_i_d_extended (void);
165 static void * ia64_alloc_sched_context (void);
166 static void ia64_init_sched_context (void *, bool);
167 static void ia64_set_sched_context (void *);
168 static void ia64_clear_sched_context (void *);
169 static void ia64_free_sched_context (void *);
170 static int ia64_mode_to_int (machine_mode);
171 static void ia64_set_sched_flags (spec_info_t);
172 static ds_t ia64_get_insn_spec_ds (rtx_insn *);
173 static ds_t ia64_get_insn_checked_ds (rtx_insn *);
174 static bool ia64_skip_rtx_p (const_rtx);
175 static int ia64_speculate_insn (rtx_insn *, ds_t, rtx *);
176 static bool ia64_needs_block_p (ds_t);
177 static rtx ia64_gen_spec_check (rtx_insn *, rtx_insn *, ds_t);
178 static int ia64_spec_check_p (rtx);
179 static int ia64_spec_check_src_p (rtx);
180 static rtx gen_tls_get_addr (void);
181 static rtx gen_thread_pointer (void);
182 static int find_gr_spill (enum ia64_frame_regs, int);
183 static int next_scratch_gr_reg (void);
184 static void mark_reg_gr_used_mask (rtx, void *);
185 static void ia64_compute_frame_size (HOST_WIDE_INT);
186 static void setup_spill_pointers (int, rtx, HOST_WIDE_INT);
187 static void finish_spill_pointers (void);
188 static rtx spill_restore_mem (rtx, HOST_WIDE_INT);
189 static void do_spill (rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT, rtx);
190 static void do_restore (rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT);
191 static rtx gen_movdi_x (rtx, rtx, rtx);
192 static rtx gen_fr_spill_x (rtx, rtx, rtx);
193 static rtx gen_fr_restore_x (rtx, rtx, rtx);
194 
195 static void ia64_option_override (void);
196 static bool ia64_can_eliminate (const int, const int);
197 static machine_mode hfa_element_mode (const_tree, bool);
198 static void ia64_setup_incoming_varargs (cumulative_args_t, machine_mode,
199 					 tree, int *, int);
200 static int ia64_arg_partial_bytes (cumulative_args_t, machine_mode,
201 				   tree, bool);
202 static rtx ia64_function_arg_1 (cumulative_args_t, machine_mode,
203 				const_tree, bool, bool);
204 static rtx ia64_function_arg (cumulative_args_t, machine_mode,
205 			      const_tree, bool);
206 static rtx ia64_function_incoming_arg (cumulative_args_t,
207 				       machine_mode, const_tree, bool);
208 static void ia64_function_arg_advance (cumulative_args_t, machine_mode,
209 				       const_tree, bool);
210 static unsigned int ia64_function_arg_boundary (machine_mode,
211 						const_tree);
212 static bool ia64_function_ok_for_sibcall (tree, tree);
213 static bool ia64_return_in_memory (const_tree, const_tree);
214 static rtx ia64_function_value (const_tree, const_tree, bool);
215 static rtx ia64_libcall_value (machine_mode, const_rtx);
216 static bool ia64_function_value_regno_p (const unsigned int);
217 static int ia64_register_move_cost (machine_mode, reg_class_t,
218                                     reg_class_t);
219 static int ia64_memory_move_cost (machine_mode mode, reg_class_t,
220 				  bool);
221 static bool ia64_rtx_costs (rtx, machine_mode, int, int, int *, bool);
222 static int ia64_unspec_may_trap_p (const_rtx, unsigned);
223 static void fix_range (const char *);
224 static struct machine_function * ia64_init_machine_status (void);
225 static void emit_insn_group_barriers (FILE *);
226 static void emit_all_insn_group_barriers (FILE *);
227 static void final_emit_insn_group_barriers (FILE *);
228 static void emit_predicate_relation_info (void);
229 static void ia64_reorg (void);
230 static bool ia64_in_small_data_p (const_tree);
231 static void process_epilogue (FILE *, rtx, bool, bool);
232 
233 static bool ia64_assemble_integer (rtx, unsigned int, int);
234 static void ia64_output_function_prologue (FILE *, HOST_WIDE_INT);
235 static void ia64_output_function_epilogue (FILE *, HOST_WIDE_INT);
236 static void ia64_output_function_end_prologue (FILE *);
237 
238 static void ia64_print_operand (FILE *, rtx, int);
239 static void ia64_print_operand_address (FILE *, machine_mode, rtx);
240 static bool ia64_print_operand_punct_valid_p (unsigned char code);
241 
242 static int ia64_issue_rate (void);
243 static int ia64_adjust_cost_2 (rtx_insn *, int, rtx_insn *, int, dw_t);
244 static void ia64_sched_init (FILE *, int, int);
245 static void ia64_sched_init_global (FILE *, int, int);
246 static void ia64_sched_finish_global (FILE *, int);
247 static void ia64_sched_finish (FILE *, int);
248 static int ia64_dfa_sched_reorder (FILE *, int, rtx_insn **, int *, int, int);
249 static int ia64_sched_reorder (FILE *, int, rtx_insn **, int *, int);
250 static int ia64_sched_reorder2 (FILE *, int, rtx_insn **, int *, int);
251 static int ia64_variable_issue (FILE *, int, rtx_insn *, int);
252 
253 static void ia64_asm_unwind_emit (FILE *, rtx_insn *);
254 static void ia64_asm_emit_except_personality (rtx);
255 static void ia64_asm_init_sections (void);
256 
257 static enum unwind_info_type ia64_debug_unwind_info (void);
258 
259 static struct bundle_state *get_free_bundle_state (void);
260 static void free_bundle_state (struct bundle_state *);
261 static void initiate_bundle_states (void);
262 static void finish_bundle_states (void);
263 static int insert_bundle_state (struct bundle_state *);
264 static void initiate_bundle_state_table (void);
265 static void finish_bundle_state_table (void);
266 static int try_issue_nops (struct bundle_state *, int);
267 static int try_issue_insn (struct bundle_state *, rtx);
268 static void issue_nops_and_insn (struct bundle_state *, int, rtx_insn *,
269 				 int, int);
270 static int get_max_pos (state_t);
271 static int get_template (state_t, int);
272 
273 static rtx_insn *get_next_important_insn (rtx_insn *, rtx_insn *);
274 static bool important_for_bundling_p (rtx_insn *);
275 static bool unknown_for_bundling_p (rtx_insn *);
276 static void bundling (FILE *, int, rtx_insn *, rtx_insn *);
277 
278 static void ia64_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
279 				  HOST_WIDE_INT, tree);
280 static void ia64_file_start (void);
281 static void ia64_globalize_decl_name (FILE *, tree);
282 
283 static int ia64_hpux_reloc_rw_mask (void) ATTRIBUTE_UNUSED;
284 static int ia64_reloc_rw_mask (void) ATTRIBUTE_UNUSED;
285 static section *ia64_select_rtx_section (machine_mode, rtx,
286 					 unsigned HOST_WIDE_INT);
287 static void ia64_output_dwarf_dtprel (FILE *, int, rtx)
288      ATTRIBUTE_UNUSED;
289 static unsigned int ia64_section_type_flags (tree, const char *, int);
290 static void ia64_init_libfuncs (void)
291      ATTRIBUTE_UNUSED;
292 static void ia64_hpux_init_libfuncs (void)
293      ATTRIBUTE_UNUSED;
294 static void ia64_sysv4_init_libfuncs (void)
295      ATTRIBUTE_UNUSED;
296 static void ia64_vms_init_libfuncs (void)
297      ATTRIBUTE_UNUSED;
298 static void ia64_soft_fp_init_libfuncs (void)
299      ATTRIBUTE_UNUSED;
300 static bool ia64_vms_valid_pointer_mode (machine_mode mode)
301      ATTRIBUTE_UNUSED;
302 static tree ia64_vms_common_object_attribute (tree *, tree, tree, int, bool *)
303      ATTRIBUTE_UNUSED;
304 
305 static bool ia64_attribute_takes_identifier_p (const_tree);
306 static tree ia64_handle_model_attribute (tree *, tree, tree, int, bool *);
307 static tree ia64_handle_version_id_attribute (tree *, tree, tree, int, bool *);
308 static void ia64_encode_section_info (tree, rtx, int);
309 static rtx ia64_struct_value_rtx (tree, int);
310 static tree ia64_gimplify_va_arg (tree, tree, gimple_seq *, gimple_seq *);
311 static bool ia64_scalar_mode_supported_p (machine_mode mode);
312 static bool ia64_vector_mode_supported_p (machine_mode mode);
313 static bool ia64_libgcc_floating_mode_supported_p (machine_mode mode);
314 static bool ia64_legitimate_constant_p (machine_mode, rtx);
315 static bool ia64_legitimate_address_p (machine_mode, rtx, bool);
316 static bool ia64_cannot_force_const_mem (machine_mode, rtx);
317 static const char *ia64_mangle_type (const_tree);
318 static const char *ia64_invalid_conversion (const_tree, const_tree);
319 static const char *ia64_invalid_unary_op (int, const_tree);
320 static const char *ia64_invalid_binary_op (int, const_tree, const_tree);
321 static machine_mode ia64_c_mode_for_suffix (char);
322 static void ia64_trampoline_init (rtx, tree, rtx);
323 static void ia64_override_options_after_change (void);
324 static bool ia64_member_type_forces_blk (const_tree, machine_mode);
325 
326 static tree ia64_builtin_decl (unsigned, bool);
327 
328 static reg_class_t ia64_preferred_reload_class (rtx, reg_class_t);
329 static machine_mode ia64_get_reg_raw_mode (int regno);
330 static section * ia64_hpux_function_section (tree, enum node_frequency,
331 					     bool, bool);
332 
333 static bool ia64_vectorize_vec_perm_const_ok (machine_mode vmode,
334 					      const unsigned char *sel);
335 
336 #define MAX_VECT_LEN	8
337 
338 struct expand_vec_perm_d
339 {
340   rtx target, op0, op1;
341   unsigned char perm[MAX_VECT_LEN];
342   machine_mode vmode;
343   unsigned char nelt;
344   bool one_operand_p;
345   bool testing_p;
346 };
347 
348 static bool ia64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d);
349 
350 
351 /* Table of valid machine attributes.  */
352 static const struct attribute_spec ia64_attribute_table[] =
353 {
354   /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
355        affects_type_identity } */
356   { "syscall_linkage", 0, 0, false, true,  true,  NULL, false },
357   { "model",	       1, 1, true, false, false, ia64_handle_model_attribute,
358     false },
359 #if TARGET_ABI_OPEN_VMS
360   { "common_object",   1, 1, true, false, false,
361     ia64_vms_common_object_attribute, false },
362 #endif
363   { "version_id",      1, 1, true, false, false,
364     ia64_handle_version_id_attribute, false },
365   { NULL,	       0, 0, false, false, false, NULL, false }
366 };
367 
368 /* Initialize the GCC target structure.  */
369 #undef TARGET_ATTRIBUTE_TABLE
370 #define TARGET_ATTRIBUTE_TABLE ia64_attribute_table
371 
372 #undef TARGET_INIT_BUILTINS
373 #define TARGET_INIT_BUILTINS ia64_init_builtins
374 
375 #undef TARGET_EXPAND_BUILTIN
376 #define TARGET_EXPAND_BUILTIN ia64_expand_builtin
377 
378 #undef TARGET_BUILTIN_DECL
379 #define TARGET_BUILTIN_DECL ia64_builtin_decl
380 
381 #undef TARGET_ASM_BYTE_OP
382 #define TARGET_ASM_BYTE_OP "\tdata1\t"
383 #undef TARGET_ASM_ALIGNED_HI_OP
384 #define TARGET_ASM_ALIGNED_HI_OP "\tdata2\t"
385 #undef TARGET_ASM_ALIGNED_SI_OP
386 #define TARGET_ASM_ALIGNED_SI_OP "\tdata4\t"
387 #undef TARGET_ASM_ALIGNED_DI_OP
388 #define TARGET_ASM_ALIGNED_DI_OP "\tdata8\t"
389 #undef TARGET_ASM_UNALIGNED_HI_OP
390 #define TARGET_ASM_UNALIGNED_HI_OP "\tdata2.ua\t"
391 #undef TARGET_ASM_UNALIGNED_SI_OP
392 #define TARGET_ASM_UNALIGNED_SI_OP "\tdata4.ua\t"
393 #undef TARGET_ASM_UNALIGNED_DI_OP
394 #define TARGET_ASM_UNALIGNED_DI_OP "\tdata8.ua\t"
395 #undef TARGET_ASM_INTEGER
396 #define TARGET_ASM_INTEGER ia64_assemble_integer
397 
398 #undef TARGET_OPTION_OVERRIDE
399 #define TARGET_OPTION_OVERRIDE ia64_option_override
400 
401 #undef TARGET_ASM_FUNCTION_PROLOGUE
402 #define TARGET_ASM_FUNCTION_PROLOGUE ia64_output_function_prologue
403 #undef TARGET_ASM_FUNCTION_END_PROLOGUE
404 #define TARGET_ASM_FUNCTION_END_PROLOGUE ia64_output_function_end_prologue
405 #undef TARGET_ASM_FUNCTION_EPILOGUE
406 #define TARGET_ASM_FUNCTION_EPILOGUE ia64_output_function_epilogue
407 
408 #undef TARGET_PRINT_OPERAND
409 #define TARGET_PRINT_OPERAND ia64_print_operand
410 #undef TARGET_PRINT_OPERAND_ADDRESS
411 #define TARGET_PRINT_OPERAND_ADDRESS ia64_print_operand_address
412 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
413 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ia64_print_operand_punct_valid_p
414 
415 #undef TARGET_IN_SMALL_DATA_P
416 #define TARGET_IN_SMALL_DATA_P  ia64_in_small_data_p
417 
418 #undef TARGET_SCHED_ADJUST_COST_2
419 #define TARGET_SCHED_ADJUST_COST_2 ia64_adjust_cost_2
420 #undef TARGET_SCHED_ISSUE_RATE
421 #define TARGET_SCHED_ISSUE_RATE ia64_issue_rate
422 #undef TARGET_SCHED_VARIABLE_ISSUE
423 #define TARGET_SCHED_VARIABLE_ISSUE ia64_variable_issue
424 #undef TARGET_SCHED_INIT
425 #define TARGET_SCHED_INIT ia64_sched_init
426 #undef TARGET_SCHED_FINISH
427 #define TARGET_SCHED_FINISH ia64_sched_finish
428 #undef TARGET_SCHED_INIT_GLOBAL
429 #define TARGET_SCHED_INIT_GLOBAL ia64_sched_init_global
430 #undef TARGET_SCHED_FINISH_GLOBAL
431 #define TARGET_SCHED_FINISH_GLOBAL ia64_sched_finish_global
432 #undef TARGET_SCHED_REORDER
433 #define TARGET_SCHED_REORDER ia64_sched_reorder
434 #undef TARGET_SCHED_REORDER2
435 #define TARGET_SCHED_REORDER2 ia64_sched_reorder2
436 
437 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
438 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK ia64_dependencies_evaluation_hook
439 
440 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
441 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD ia64_first_cycle_multipass_dfa_lookahead
442 
443 #undef TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN
444 #define TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN ia64_init_dfa_pre_cycle_insn
445 #undef TARGET_SCHED_DFA_PRE_CYCLE_INSN
446 #define TARGET_SCHED_DFA_PRE_CYCLE_INSN ia64_dfa_pre_cycle_insn
447 
448 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
449 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD\
450   ia64_first_cycle_multipass_dfa_lookahead_guard
451 
452 #undef TARGET_SCHED_DFA_NEW_CYCLE
453 #define TARGET_SCHED_DFA_NEW_CYCLE ia64_dfa_new_cycle
454 
455 #undef TARGET_SCHED_H_I_D_EXTENDED
456 #define TARGET_SCHED_H_I_D_EXTENDED ia64_h_i_d_extended
457 
458 #undef TARGET_SCHED_ALLOC_SCHED_CONTEXT
459 #define TARGET_SCHED_ALLOC_SCHED_CONTEXT ia64_alloc_sched_context
460 
461 #undef TARGET_SCHED_INIT_SCHED_CONTEXT
462 #define TARGET_SCHED_INIT_SCHED_CONTEXT ia64_init_sched_context
463 
464 #undef TARGET_SCHED_SET_SCHED_CONTEXT
465 #define TARGET_SCHED_SET_SCHED_CONTEXT ia64_set_sched_context
466 
467 #undef TARGET_SCHED_CLEAR_SCHED_CONTEXT
468 #define TARGET_SCHED_CLEAR_SCHED_CONTEXT ia64_clear_sched_context
469 
470 #undef TARGET_SCHED_FREE_SCHED_CONTEXT
471 #define TARGET_SCHED_FREE_SCHED_CONTEXT ia64_free_sched_context
472 
473 #undef TARGET_SCHED_SET_SCHED_FLAGS
474 #define TARGET_SCHED_SET_SCHED_FLAGS ia64_set_sched_flags
475 
476 #undef TARGET_SCHED_GET_INSN_SPEC_DS
477 #define TARGET_SCHED_GET_INSN_SPEC_DS ia64_get_insn_spec_ds
478 
479 #undef TARGET_SCHED_GET_INSN_CHECKED_DS
480 #define TARGET_SCHED_GET_INSN_CHECKED_DS ia64_get_insn_checked_ds
481 
482 #undef TARGET_SCHED_SPECULATE_INSN
483 #define TARGET_SCHED_SPECULATE_INSN ia64_speculate_insn
484 
485 #undef TARGET_SCHED_NEEDS_BLOCK_P
486 #define TARGET_SCHED_NEEDS_BLOCK_P ia64_needs_block_p
487 
488 #undef TARGET_SCHED_GEN_SPEC_CHECK
489 #define TARGET_SCHED_GEN_SPEC_CHECK ia64_gen_spec_check
490 
491 #undef TARGET_SCHED_SKIP_RTX_P
492 #define TARGET_SCHED_SKIP_RTX_P ia64_skip_rtx_p
493 
494 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
495 #define TARGET_FUNCTION_OK_FOR_SIBCALL ia64_function_ok_for_sibcall
496 #undef TARGET_ARG_PARTIAL_BYTES
497 #define TARGET_ARG_PARTIAL_BYTES ia64_arg_partial_bytes
498 #undef TARGET_FUNCTION_ARG
499 #define TARGET_FUNCTION_ARG ia64_function_arg
500 #undef TARGET_FUNCTION_INCOMING_ARG
501 #define TARGET_FUNCTION_INCOMING_ARG ia64_function_incoming_arg
502 #undef TARGET_FUNCTION_ARG_ADVANCE
503 #define TARGET_FUNCTION_ARG_ADVANCE ia64_function_arg_advance
504 #undef TARGET_FUNCTION_ARG_BOUNDARY
505 #define TARGET_FUNCTION_ARG_BOUNDARY ia64_function_arg_boundary
506 
507 #undef TARGET_ASM_OUTPUT_MI_THUNK
508 #define TARGET_ASM_OUTPUT_MI_THUNK ia64_output_mi_thunk
509 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
510 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
511 
512 #undef TARGET_ASM_FILE_START
513 #define TARGET_ASM_FILE_START ia64_file_start
514 
515 #undef TARGET_ASM_GLOBALIZE_DECL_NAME
516 #define TARGET_ASM_GLOBALIZE_DECL_NAME ia64_globalize_decl_name
517 
518 #undef TARGET_REGISTER_MOVE_COST
519 #define TARGET_REGISTER_MOVE_COST ia64_register_move_cost
520 #undef TARGET_MEMORY_MOVE_COST
521 #define TARGET_MEMORY_MOVE_COST ia64_memory_move_cost
522 #undef TARGET_RTX_COSTS
523 #define TARGET_RTX_COSTS ia64_rtx_costs
524 #undef TARGET_ADDRESS_COST
525 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
526 
527 #undef TARGET_UNSPEC_MAY_TRAP_P
528 #define TARGET_UNSPEC_MAY_TRAP_P ia64_unspec_may_trap_p
529 
530 #undef TARGET_MACHINE_DEPENDENT_REORG
531 #define TARGET_MACHINE_DEPENDENT_REORG ia64_reorg
532 
533 #undef TARGET_ENCODE_SECTION_INFO
534 #define TARGET_ENCODE_SECTION_INFO ia64_encode_section_info
535 
536 #undef  TARGET_SECTION_TYPE_FLAGS
537 #define TARGET_SECTION_TYPE_FLAGS  ia64_section_type_flags
538 
539 #ifdef HAVE_AS_TLS
540 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
541 #define TARGET_ASM_OUTPUT_DWARF_DTPREL ia64_output_dwarf_dtprel
542 #endif
543 
544 /* ??? Investigate.  */
545 #if 0
546 #undef TARGET_PROMOTE_PROTOTYPES
547 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
548 #endif
549 
550 #undef TARGET_FUNCTION_VALUE
551 #define TARGET_FUNCTION_VALUE ia64_function_value
552 #undef TARGET_LIBCALL_VALUE
553 #define TARGET_LIBCALL_VALUE ia64_libcall_value
554 #undef TARGET_FUNCTION_VALUE_REGNO_P
555 #define TARGET_FUNCTION_VALUE_REGNO_P ia64_function_value_regno_p
556 
557 #undef TARGET_STRUCT_VALUE_RTX
558 #define TARGET_STRUCT_VALUE_RTX ia64_struct_value_rtx
559 #undef TARGET_RETURN_IN_MEMORY
560 #define TARGET_RETURN_IN_MEMORY ia64_return_in_memory
561 #undef TARGET_SETUP_INCOMING_VARARGS
562 #define TARGET_SETUP_INCOMING_VARARGS ia64_setup_incoming_varargs
563 #undef TARGET_STRICT_ARGUMENT_NAMING
564 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
565 #undef TARGET_MUST_PASS_IN_STACK
566 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
567 #undef TARGET_GET_RAW_RESULT_MODE
568 #define TARGET_GET_RAW_RESULT_MODE ia64_get_reg_raw_mode
569 #undef TARGET_GET_RAW_ARG_MODE
570 #define TARGET_GET_RAW_ARG_MODE ia64_get_reg_raw_mode
571 
572 #undef TARGET_MEMBER_TYPE_FORCES_BLK
573 #define TARGET_MEMBER_TYPE_FORCES_BLK ia64_member_type_forces_blk
574 
575 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
576 #define TARGET_GIMPLIFY_VA_ARG_EXPR ia64_gimplify_va_arg
577 
578 #undef TARGET_ASM_UNWIND_EMIT
579 #define TARGET_ASM_UNWIND_EMIT ia64_asm_unwind_emit
580 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
581 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY  ia64_asm_emit_except_personality
582 #undef TARGET_ASM_INIT_SECTIONS
583 #define TARGET_ASM_INIT_SECTIONS  ia64_asm_init_sections
584 
585 #undef TARGET_DEBUG_UNWIND_INFO
586 #define TARGET_DEBUG_UNWIND_INFO  ia64_debug_unwind_info
587 
588 #undef TARGET_SCALAR_MODE_SUPPORTED_P
589 #define TARGET_SCALAR_MODE_SUPPORTED_P ia64_scalar_mode_supported_p
590 #undef TARGET_VECTOR_MODE_SUPPORTED_P
591 #define TARGET_VECTOR_MODE_SUPPORTED_P ia64_vector_mode_supported_p
592 
593 #undef TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P
594 #define TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P \
595   ia64_libgcc_floating_mode_supported_p
596 
597 #undef TARGET_LEGITIMATE_CONSTANT_P
598 #define TARGET_LEGITIMATE_CONSTANT_P ia64_legitimate_constant_p
599 #undef TARGET_LEGITIMATE_ADDRESS_P
600 #define TARGET_LEGITIMATE_ADDRESS_P ia64_legitimate_address_p
601 
602 #undef TARGET_CANNOT_FORCE_CONST_MEM
603 #define TARGET_CANNOT_FORCE_CONST_MEM ia64_cannot_force_const_mem
604 
605 #undef TARGET_MANGLE_TYPE
606 #define TARGET_MANGLE_TYPE ia64_mangle_type
607 
608 #undef TARGET_INVALID_CONVERSION
609 #define TARGET_INVALID_CONVERSION ia64_invalid_conversion
610 #undef TARGET_INVALID_UNARY_OP
611 #define TARGET_INVALID_UNARY_OP ia64_invalid_unary_op
612 #undef TARGET_INVALID_BINARY_OP
613 #define TARGET_INVALID_BINARY_OP ia64_invalid_binary_op
614 
615 #undef TARGET_C_MODE_FOR_SUFFIX
616 #define TARGET_C_MODE_FOR_SUFFIX ia64_c_mode_for_suffix
617 
618 #undef TARGET_CAN_ELIMINATE
619 #define TARGET_CAN_ELIMINATE ia64_can_eliminate
620 
621 #undef TARGET_TRAMPOLINE_INIT
622 #define TARGET_TRAMPOLINE_INIT ia64_trampoline_init
623 
624 #undef TARGET_CAN_USE_DOLOOP_P
625 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
626 #undef TARGET_INVALID_WITHIN_DOLOOP
627 #define TARGET_INVALID_WITHIN_DOLOOP hook_constcharptr_const_rtx_insn_null
628 
629 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
630 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE ia64_override_options_after_change
631 
632 #undef TARGET_PREFERRED_RELOAD_CLASS
633 #define TARGET_PREFERRED_RELOAD_CLASS ia64_preferred_reload_class
634 
635 #undef TARGET_DELAY_SCHED2
636 #define TARGET_DELAY_SCHED2 true
637 
638 /* Variable tracking should be run after all optimizations which
639    change order of insns.  It also needs a valid CFG.  */
640 #undef TARGET_DELAY_VARTRACK
641 #define TARGET_DELAY_VARTRACK true
642 
643 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
644 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK ia64_vectorize_vec_perm_const_ok
645 
646 #undef TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P
647 #define TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P ia64_attribute_takes_identifier_p
648 
649 struct gcc_target targetm = TARGET_INITIALIZER;
650 
651 /* Returns TRUE iff the target attribute indicated by ATTR_ID takes a plain
652    identifier as an argument, so the front end shouldn't look it up.  */
653 
654 static bool
ia64_attribute_takes_identifier_p(const_tree attr_id)655 ia64_attribute_takes_identifier_p (const_tree attr_id)
656 {
657   if (is_attribute_p ("model", attr_id))
658     return true;
659 #if TARGET_ABI_OPEN_VMS
660   if (is_attribute_p ("common_object", attr_id))
661     return true;
662 #endif
663   return false;
664 }
665 
666 typedef enum
667   {
668     ADDR_AREA_NORMAL,	/* normal address area */
669     ADDR_AREA_SMALL	/* addressable by "addl" (-2MB < addr < 2MB) */
670   }
671 ia64_addr_area;
672 
673 static GTY(()) tree small_ident1;
674 static GTY(()) tree small_ident2;
675 
676 static void
init_idents(void)677 init_idents (void)
678 {
679   if (small_ident1 == 0)
680     {
681       small_ident1 = get_identifier ("small");
682       small_ident2 = get_identifier ("__small__");
683     }
684 }
685 
686 /* Retrieve the address area that has been chosen for the given decl.  */
687 
688 static ia64_addr_area
ia64_get_addr_area(tree decl)689 ia64_get_addr_area (tree decl)
690 {
691   tree model_attr;
692 
693   model_attr = lookup_attribute ("model", DECL_ATTRIBUTES (decl));
694   if (model_attr)
695     {
696       tree id;
697 
698       init_idents ();
699       id = TREE_VALUE (TREE_VALUE (model_attr));
700       if (id == small_ident1 || id == small_ident2)
701 	return ADDR_AREA_SMALL;
702     }
703   return ADDR_AREA_NORMAL;
704 }
705 
706 static tree
ia64_handle_model_attribute(tree * node,tree name,tree args,int flags ATTRIBUTE_UNUSED,bool * no_add_attrs)707 ia64_handle_model_attribute (tree *node, tree name, tree args,
708 			     int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
709 {
710   ia64_addr_area addr_area = ADDR_AREA_NORMAL;
711   ia64_addr_area area;
712   tree arg, decl = *node;
713 
714   init_idents ();
715   arg = TREE_VALUE (args);
716   if (arg == small_ident1 || arg == small_ident2)
717     {
718       addr_area = ADDR_AREA_SMALL;
719     }
720   else
721     {
722       warning (OPT_Wattributes, "invalid argument of %qE attribute",
723 	       name);
724       *no_add_attrs = true;
725     }
726 
727   switch (TREE_CODE (decl))
728     {
729     case VAR_DECL:
730       if ((DECL_CONTEXT (decl) && TREE_CODE (DECL_CONTEXT (decl))
731 	   == FUNCTION_DECL)
732 	  && !TREE_STATIC (decl))
733 	{
734 	  error_at (DECL_SOURCE_LOCATION (decl),
735 		    "an address area attribute cannot be specified for "
736 		    "local variables");
737 	  *no_add_attrs = true;
738 	}
739       area = ia64_get_addr_area (decl);
740       if (area != ADDR_AREA_NORMAL && addr_area != area)
741 	{
742 	  error ("address area of %q+D conflicts with previous "
743 		 "declaration", decl);
744 	  *no_add_attrs = true;
745 	}
746       break;
747 
748     case FUNCTION_DECL:
749       error_at (DECL_SOURCE_LOCATION (decl),
750 		"address area attribute cannot be specified for "
751 		"functions");
752       *no_add_attrs = true;
753       break;
754 
755     default:
756       warning (OPT_Wattributes, "%qE attribute ignored",
757 	       name);
758       *no_add_attrs = true;
759       break;
760     }
761 
762   return NULL_TREE;
763 }
764 
765 /* Part of the low level implementation of DEC Ada pragma Common_Object which
766    enables the shared use of variables stored in overlaid linker areas
767    corresponding to the use of Fortran COMMON.  */
768 
769 static tree
ia64_vms_common_object_attribute(tree * node,tree name,tree args,int flags ATTRIBUTE_UNUSED,bool * no_add_attrs)770 ia64_vms_common_object_attribute (tree *node, tree name, tree args,
771 				  int flags ATTRIBUTE_UNUSED,
772 				  bool *no_add_attrs)
773 {
774     tree decl = *node;
775     tree id;
776 
777     gcc_assert (DECL_P (decl));
778 
779     DECL_COMMON (decl) = 1;
780     id = TREE_VALUE (args);
781     if (TREE_CODE (id) != IDENTIFIER_NODE && TREE_CODE (id) != STRING_CST)
782       {
783 	error ("%qE attribute requires a string constant argument", name);
784 	*no_add_attrs = true;
785 	return NULL_TREE;
786       }
787     return NULL_TREE;
788 }
789 
790 /* Part of the low level implementation of DEC Ada pragma Common_Object.  */
791 
792 void
ia64_vms_output_aligned_decl_common(FILE * file,tree decl,const char * name,unsigned HOST_WIDE_INT size,unsigned int align)793 ia64_vms_output_aligned_decl_common (FILE *file, tree decl, const char *name,
794 				     unsigned HOST_WIDE_INT size,
795 				     unsigned int align)
796 {
797   tree attr = DECL_ATTRIBUTES (decl);
798 
799   if (attr)
800     attr = lookup_attribute ("common_object", attr);
801   if (attr)
802     {
803       tree id = TREE_VALUE (TREE_VALUE (attr));
804       const char *name;
805 
806       if (TREE_CODE (id) == IDENTIFIER_NODE)
807         name = IDENTIFIER_POINTER (id);
808       else if (TREE_CODE (id) == STRING_CST)
809         name = TREE_STRING_POINTER (id);
810       else
811         abort ();
812 
813       fprintf (file, "\t.vms_common\t\"%s\",", name);
814     }
815   else
816     fprintf (file, "%s", COMMON_ASM_OP);
817 
818   /*  Code from elfos.h.  */
819   assemble_name (file, name);
820   fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED",%u",
821            size, align / BITS_PER_UNIT);
822 
823   fputc ('\n', file);
824 }
825 
826 static void
ia64_encode_addr_area(tree decl,rtx symbol)827 ia64_encode_addr_area (tree decl, rtx symbol)
828 {
829   int flags;
830 
831   flags = SYMBOL_REF_FLAGS (symbol);
832   switch (ia64_get_addr_area (decl))
833     {
834     case ADDR_AREA_NORMAL: break;
835     case ADDR_AREA_SMALL: flags |= SYMBOL_FLAG_SMALL_ADDR; break;
836     default: gcc_unreachable ();
837     }
838   SYMBOL_REF_FLAGS (symbol) = flags;
839 }
840 
841 static void
ia64_encode_section_info(tree decl,rtx rtl,int first)842 ia64_encode_section_info (tree decl, rtx rtl, int first)
843 {
844   default_encode_section_info (decl, rtl, first);
845 
846   /* Careful not to prod global register variables.  */
847   if (TREE_CODE (decl) == VAR_DECL
848       && GET_CODE (DECL_RTL (decl)) == MEM
849       && GET_CODE (XEXP (DECL_RTL (decl), 0)) == SYMBOL_REF
850       && (TREE_STATIC (decl) || DECL_EXTERNAL (decl)))
851     ia64_encode_addr_area (decl, XEXP (rtl, 0));
852 }
853 
854 /* Return 1 if the operands of a move are ok.  */
855 
856 int
ia64_move_ok(rtx dst,rtx src)857 ia64_move_ok (rtx dst, rtx src)
858 {
859   /* If we're under init_recog_no_volatile, we'll not be able to use
860      memory_operand.  So check the code directly and don't worry about
861      the validity of the underlying address, which should have been
862      checked elsewhere anyway.  */
863   if (GET_CODE (dst) != MEM)
864     return 1;
865   if (GET_CODE (src) == MEM)
866     return 0;
867   if (register_operand (src, VOIDmode))
868     return 1;
869 
870   /* Otherwise, this must be a constant, and that either 0 or 0.0 or 1.0.  */
871   if (INTEGRAL_MODE_P (GET_MODE (dst)))
872     return src == const0_rtx;
873   else
874     return satisfies_constraint_G (src);
875 }
876 
877 /* Return 1 if the operands are ok for a floating point load pair.  */
878 
879 int
ia64_load_pair_ok(rtx dst,rtx src)880 ia64_load_pair_ok (rtx dst, rtx src)
881 {
882   /* ??? There is a thinko in the implementation of the "x" constraint and the
883      FP_REGS class.  The constraint will also reject (reg f30:TI) so we must
884      also return false for it.  */
885   if (GET_CODE (dst) != REG
886       || !(FP_REGNO_P (REGNO (dst)) && FP_REGNO_P (REGNO (dst) + 1)))
887     return 0;
888   if (GET_CODE (src) != MEM || MEM_VOLATILE_P (src))
889     return 0;
890   switch (GET_CODE (XEXP (src, 0)))
891     {
892     case REG:
893     case POST_INC:
894       break;
895     case POST_DEC:
896       return 0;
897     case POST_MODIFY:
898       {
899 	rtx adjust = XEXP (XEXP (XEXP (src, 0), 1), 1);
900 
901 	if (GET_CODE (adjust) != CONST_INT
902 	    || INTVAL (adjust) != GET_MODE_SIZE (GET_MODE (src)))
903 	  return 0;
904       }
905       break;
906     default:
907       abort ();
908     }
909   return 1;
910 }
911 
912 int
addp4_optimize_ok(rtx op1,rtx op2)913 addp4_optimize_ok (rtx op1, rtx op2)
914 {
915   return (basereg_operand (op1, GET_MODE(op1)) !=
916 	  basereg_operand (op2, GET_MODE(op2)));
917 }
918 
919 /* Check if OP is a mask suitable for use with SHIFT in a dep.z instruction.
920    Return the length of the field, or <= 0 on failure.  */
921 
922 int
ia64_depz_field_mask(rtx rop,rtx rshift)923 ia64_depz_field_mask (rtx rop, rtx rshift)
924 {
925   unsigned HOST_WIDE_INT op = INTVAL (rop);
926   unsigned HOST_WIDE_INT shift = INTVAL (rshift);
927 
928   /* Get rid of the zero bits we're shifting in.  */
929   op >>= shift;
930 
931   /* We must now have a solid block of 1's at bit 0.  */
932   return exact_log2 (op + 1);
933 }
934 
935 /* Return the TLS model to use for ADDR.  */
936 
937 static enum tls_model
tls_symbolic_operand_type(rtx addr)938 tls_symbolic_operand_type (rtx addr)
939 {
940   enum tls_model tls_kind = TLS_MODEL_NONE;
941 
942   if (GET_CODE (addr) == CONST)
943     {
944       if (GET_CODE (XEXP (addr, 0)) == PLUS
945 	  && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF)
946         tls_kind = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (addr, 0), 0));
947     }
948   else if (GET_CODE (addr) == SYMBOL_REF)
949     tls_kind = SYMBOL_REF_TLS_MODEL (addr);
950 
951   return tls_kind;
952 }
953 
954 /* Returns true if REG (assumed to be a `reg' RTX) is valid for use
955    as a base register.  */
956 
957 static inline bool
ia64_reg_ok_for_base_p(const_rtx reg,bool strict)958 ia64_reg_ok_for_base_p (const_rtx reg, bool strict)
959 {
960   if (strict
961       && REGNO_OK_FOR_BASE_P (REGNO (reg)))
962     return true;
963   else if (!strict
964 	   && (GENERAL_REGNO_P (REGNO (reg))
965 	       || !HARD_REGISTER_P (reg)))
966     return true;
967   else
968     return false;
969 }
970 
971 static bool
ia64_legitimate_address_reg(const_rtx reg,bool strict)972 ia64_legitimate_address_reg (const_rtx reg, bool strict)
973 {
974   if ((REG_P (reg) && ia64_reg_ok_for_base_p (reg, strict))
975       || (GET_CODE (reg) == SUBREG && REG_P (XEXP (reg, 0))
976 	  && ia64_reg_ok_for_base_p (XEXP (reg, 0), strict)))
977     return true;
978 
979   return false;
980 }
981 
982 static bool
ia64_legitimate_address_disp(const_rtx reg,const_rtx disp,bool strict)983 ia64_legitimate_address_disp (const_rtx reg, const_rtx disp, bool strict)
984 {
985   if (GET_CODE (disp) == PLUS
986       && rtx_equal_p (reg, XEXP (disp, 0))
987       && (ia64_legitimate_address_reg (XEXP (disp, 1), strict)
988 	  || (CONST_INT_P (XEXP (disp, 1))
989 	      && IN_RANGE (INTVAL (XEXP (disp, 1)), -256, 255))))
990     return true;
991 
992   return false;
993 }
994 
995 /* Implement TARGET_LEGITIMATE_ADDRESS_P.  */
996 
997 static bool
ia64_legitimate_address_p(machine_mode mode ATTRIBUTE_UNUSED,rtx x,bool strict)998 ia64_legitimate_address_p (machine_mode mode ATTRIBUTE_UNUSED,
999 			   rtx x, bool strict)
1000 {
1001   if (ia64_legitimate_address_reg (x, strict))
1002     return true;
1003   else if ((GET_CODE (x) == POST_INC || GET_CODE (x) == POST_DEC)
1004 	   && ia64_legitimate_address_reg (XEXP (x, 0), strict)
1005 	   && XEXP (x, 0) != arg_pointer_rtx)
1006     return true;
1007   else if (GET_CODE (x) == POST_MODIFY
1008 	   && ia64_legitimate_address_reg (XEXP (x, 0), strict)
1009 	   && XEXP (x, 0) != arg_pointer_rtx
1010 	   && ia64_legitimate_address_disp (XEXP (x, 0), XEXP (x, 1), strict))
1011     return true;
1012   else
1013     return false;
1014 }
1015 
1016 /* Return true if X is a constant that is valid for some immediate
1017    field in an instruction.  */
1018 
1019 static bool
ia64_legitimate_constant_p(machine_mode mode,rtx x)1020 ia64_legitimate_constant_p (machine_mode mode, rtx x)
1021 {
1022   switch (GET_CODE (x))
1023     {
1024     case CONST_INT:
1025     case LABEL_REF:
1026       return true;
1027 
1028     case CONST_DOUBLE:
1029       if (GET_MODE (x) == VOIDmode || mode == SFmode || mode == DFmode)
1030 	return true;
1031       return satisfies_constraint_G (x);
1032 
1033     case CONST:
1034     case SYMBOL_REF:
1035       /* ??? Short term workaround for PR 28490.  We must make the code here
1036 	 match the code in ia64_expand_move and move_operand, even though they
1037 	 are both technically wrong.  */
1038       if (tls_symbolic_operand_type (x) == 0)
1039 	{
1040 	  HOST_WIDE_INT addend = 0;
1041 	  rtx op = x;
1042 
1043 	  if (GET_CODE (op) == CONST
1044 	      && GET_CODE (XEXP (op, 0)) == PLUS
1045 	      && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
1046 	    {
1047 	      addend = INTVAL (XEXP (XEXP (op, 0), 1));
1048 	      op = XEXP (XEXP (op, 0), 0);
1049 	    }
1050 
1051           if (any_offset_symbol_operand (op, mode)
1052               || function_operand (op, mode))
1053             return true;
1054 	  if (aligned_offset_symbol_operand (op, mode))
1055 	    return (addend & 0x3fff) == 0;
1056 	  return false;
1057 	}
1058       return false;
1059 
1060     case CONST_VECTOR:
1061       if (mode == V2SFmode)
1062 	return satisfies_constraint_Y (x);
1063 
1064       return (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
1065 	      && GET_MODE_SIZE (mode) <= 8);
1066 
1067     default:
1068       return false;
1069     }
1070 }
1071 
1072 /* Don't allow TLS addresses to get spilled to memory.  */
1073 
1074 static bool
ia64_cannot_force_const_mem(machine_mode mode,rtx x)1075 ia64_cannot_force_const_mem (machine_mode mode, rtx x)
1076 {
1077   if (mode == RFmode)
1078     return true;
1079   return tls_symbolic_operand_type (x) != 0;
1080 }
1081 
1082 /* Expand a symbolic constant load.  */
1083 
1084 bool
ia64_expand_load_address(rtx dest,rtx src)1085 ia64_expand_load_address (rtx dest, rtx src)
1086 {
1087   gcc_assert (GET_CODE (dest) == REG);
1088 
1089   /* ILP32 mode still loads 64-bits of data from the GOT.  This avoids
1090      having to pointer-extend the value afterward.  Other forms of address
1091      computation below are also more natural to compute as 64-bit quantities.
1092      If we've been given an SImode destination register, change it.  */
1093   if (GET_MODE (dest) != Pmode)
1094     dest = gen_rtx_REG_offset (dest, Pmode, REGNO (dest),
1095 			       byte_lowpart_offset (Pmode, GET_MODE (dest)));
1096 
1097   if (TARGET_NO_PIC)
1098     return false;
1099   if (small_addr_symbolic_operand (src, VOIDmode))
1100     return false;
1101 
1102   if (TARGET_AUTO_PIC)
1103     emit_insn (gen_load_gprel64 (dest, src));
1104   else if (GET_CODE (src) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (src))
1105     emit_insn (gen_load_fptr (dest, src));
1106   else if (sdata_symbolic_operand (src, VOIDmode))
1107     emit_insn (gen_load_gprel (dest, src));
1108   else if (local_symbolic_operand64 (src, VOIDmode))
1109     {
1110       /* We want to use @gprel rather than @ltoff relocations for local
1111 	 symbols:
1112 	  - @gprel does not require dynamic linker
1113 	  - and does not use .sdata section
1114 	 https://gcc.gnu.org/bugzilla/60465 */
1115       emit_insn (gen_load_gprel64 (dest, src));
1116     }
1117   else
1118     {
1119       HOST_WIDE_INT addend = 0;
1120       rtx tmp;
1121 
1122       /* We did split constant offsets in ia64_expand_move, and we did try
1123 	 to keep them split in move_operand, but we also allowed reload to
1124 	 rematerialize arbitrary constants rather than spill the value to
1125 	 the stack and reload it.  So we have to be prepared here to split
1126 	 them apart again.  */
1127       if (GET_CODE (src) == CONST)
1128 	{
1129 	  HOST_WIDE_INT hi, lo;
1130 
1131 	  hi = INTVAL (XEXP (XEXP (src, 0), 1));
1132 	  lo = ((hi & 0x3fff) ^ 0x2000) - 0x2000;
1133 	  hi = hi - lo;
1134 
1135 	  if (lo != 0)
1136 	    {
1137 	      addend = lo;
1138 	      src = plus_constant (Pmode, XEXP (XEXP (src, 0), 0), hi);
1139 	    }
1140 	}
1141 
1142       tmp = gen_rtx_HIGH (Pmode, src);
1143       tmp = gen_rtx_PLUS (Pmode, tmp, pic_offset_table_rtx);
1144       emit_insn (gen_rtx_SET (dest, tmp));
1145 
1146       tmp = gen_rtx_LO_SUM (Pmode, gen_const_mem (Pmode, dest), src);
1147       emit_insn (gen_rtx_SET (dest, tmp));
1148 
1149       if (addend)
1150 	{
1151 	  tmp = gen_rtx_PLUS (Pmode, dest, GEN_INT (addend));
1152 	  emit_insn (gen_rtx_SET (dest, tmp));
1153 	}
1154     }
1155 
1156   return true;
1157 }
1158 
1159 static GTY(()) rtx gen_tls_tga;
1160 static rtx
gen_tls_get_addr(void)1161 gen_tls_get_addr (void)
1162 {
1163   if (!gen_tls_tga)
1164     gen_tls_tga = init_one_libfunc ("__tls_get_addr");
1165   return gen_tls_tga;
1166 }
1167 
1168 static GTY(()) rtx thread_pointer_rtx;
1169 static rtx
gen_thread_pointer(void)1170 gen_thread_pointer (void)
1171 {
1172   if (!thread_pointer_rtx)
1173     thread_pointer_rtx = gen_rtx_REG (Pmode, 13);
1174   return thread_pointer_rtx;
1175 }
1176 
1177 static rtx
ia64_expand_tls_address(enum tls_model tls_kind,rtx op0,rtx op1,rtx orig_op1,HOST_WIDE_INT addend)1178 ia64_expand_tls_address (enum tls_model tls_kind, rtx op0, rtx op1,
1179 			 rtx orig_op1, HOST_WIDE_INT addend)
1180 {
1181   rtx tga_op1, tga_op2, tga_ret, tga_eqv, tmp;
1182   rtx_insn *insns;
1183   rtx orig_op0 = op0;
1184   HOST_WIDE_INT addend_lo, addend_hi;
1185 
1186   switch (tls_kind)
1187     {
1188     case TLS_MODEL_GLOBAL_DYNAMIC:
1189       start_sequence ();
1190 
1191       tga_op1 = gen_reg_rtx (Pmode);
1192       emit_insn (gen_load_dtpmod (tga_op1, op1));
1193 
1194       tga_op2 = gen_reg_rtx (Pmode);
1195       emit_insn (gen_load_dtprel (tga_op2, op1));
1196 
1197       tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX,
1198 					 LCT_CONST, Pmode, 2, tga_op1,
1199 					 Pmode, tga_op2, Pmode);
1200 
1201       insns = get_insns ();
1202       end_sequence ();
1203 
1204       if (GET_MODE (op0) != Pmode)
1205 	op0 = tga_ret;
1206       emit_libcall_block (insns, op0, tga_ret, op1);
1207       break;
1208 
1209     case TLS_MODEL_LOCAL_DYNAMIC:
1210       /* ??? This isn't the completely proper way to do local-dynamic
1211 	 If the call to __tls_get_addr is used only by a single symbol,
1212 	 then we should (somehow) move the dtprel to the second arg
1213 	 to avoid the extra add.  */
1214       start_sequence ();
1215 
1216       tga_op1 = gen_reg_rtx (Pmode);
1217       emit_insn (gen_load_dtpmod (tga_op1, op1));
1218 
1219       tga_op2 = const0_rtx;
1220 
1221       tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX,
1222 					 LCT_CONST, Pmode, 2, tga_op1,
1223 					 Pmode, tga_op2, Pmode);
1224 
1225       insns = get_insns ();
1226       end_sequence ();
1227 
1228       tga_eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
1229 				UNSPEC_LD_BASE);
1230       tmp = gen_reg_rtx (Pmode);
1231       emit_libcall_block (insns, tmp, tga_ret, tga_eqv);
1232 
1233       if (!register_operand (op0, Pmode))
1234 	op0 = gen_reg_rtx (Pmode);
1235       if (TARGET_TLS64)
1236 	{
1237 	  emit_insn (gen_load_dtprel (op0, op1));
1238 	  emit_insn (gen_adddi3 (op0, tmp, op0));
1239 	}
1240       else
1241 	emit_insn (gen_add_dtprel (op0, op1, tmp));
1242       break;
1243 
1244     case TLS_MODEL_INITIAL_EXEC:
1245       addend_lo = ((addend & 0x3fff) ^ 0x2000) - 0x2000;
1246       addend_hi = addend - addend_lo;
1247 
1248       op1 = plus_constant (Pmode, op1, addend_hi);
1249       addend = addend_lo;
1250 
1251       tmp = gen_reg_rtx (Pmode);
1252       emit_insn (gen_load_tprel (tmp, op1));
1253 
1254       if (!register_operand (op0, Pmode))
1255 	op0 = gen_reg_rtx (Pmode);
1256       emit_insn (gen_adddi3 (op0, tmp, gen_thread_pointer ()));
1257       break;
1258 
1259     case TLS_MODEL_LOCAL_EXEC:
1260       if (!register_operand (op0, Pmode))
1261 	op0 = gen_reg_rtx (Pmode);
1262 
1263       op1 = orig_op1;
1264       addend = 0;
1265       if (TARGET_TLS64)
1266 	{
1267 	  emit_insn (gen_load_tprel (op0, op1));
1268 	  emit_insn (gen_adddi3 (op0, op0, gen_thread_pointer ()));
1269 	}
1270       else
1271 	emit_insn (gen_add_tprel (op0, op1, gen_thread_pointer ()));
1272       break;
1273 
1274     default:
1275       gcc_unreachable ();
1276     }
1277 
1278   if (addend)
1279     op0 = expand_simple_binop (Pmode, PLUS, op0, GEN_INT (addend),
1280 			       orig_op0, 1, OPTAB_DIRECT);
1281   if (orig_op0 == op0)
1282     return NULL_RTX;
1283   if (GET_MODE (orig_op0) == Pmode)
1284     return op0;
1285   return gen_lowpart (GET_MODE (orig_op0), op0);
1286 }
1287 
1288 rtx
ia64_expand_move(rtx op0,rtx op1)1289 ia64_expand_move (rtx op0, rtx op1)
1290 {
1291   machine_mode mode = GET_MODE (op0);
1292 
1293   if (!reload_in_progress && !reload_completed && !ia64_move_ok (op0, op1))
1294     op1 = force_reg (mode, op1);
1295 
1296   if ((mode == Pmode || mode == ptr_mode) && symbolic_operand (op1, VOIDmode))
1297     {
1298       HOST_WIDE_INT addend = 0;
1299       enum tls_model tls_kind;
1300       rtx sym = op1;
1301 
1302       if (GET_CODE (op1) == CONST
1303 	  && GET_CODE (XEXP (op1, 0)) == PLUS
1304 	  && GET_CODE (XEXP (XEXP (op1, 0), 1)) == CONST_INT)
1305 	{
1306 	  addend = INTVAL (XEXP (XEXP (op1, 0), 1));
1307 	  sym = XEXP (XEXP (op1, 0), 0);
1308 	}
1309 
1310       tls_kind = tls_symbolic_operand_type (sym);
1311       if (tls_kind)
1312 	return ia64_expand_tls_address (tls_kind, op0, sym, op1, addend);
1313 
1314       if (any_offset_symbol_operand (sym, mode))
1315 	addend = 0;
1316       else if (aligned_offset_symbol_operand (sym, mode))
1317 	{
1318 	  HOST_WIDE_INT addend_lo, addend_hi;
1319 
1320 	  addend_lo = ((addend & 0x3fff) ^ 0x2000) - 0x2000;
1321 	  addend_hi = addend - addend_lo;
1322 
1323 	  if (addend_lo != 0)
1324 	    {
1325 	      op1 = plus_constant (mode, sym, addend_hi);
1326 	      addend = addend_lo;
1327 	    }
1328 	  else
1329 	    addend = 0;
1330 	}
1331       else
1332 	op1 = sym;
1333 
1334       if (reload_completed)
1335 	{
1336 	  /* We really should have taken care of this offset earlier.  */
1337 	  gcc_assert (addend == 0);
1338 	  if (ia64_expand_load_address (op0, op1))
1339 	    return NULL_RTX;
1340 	}
1341 
1342       if (addend)
1343 	{
1344 	  rtx subtarget = !can_create_pseudo_p () ? op0 : gen_reg_rtx (mode);
1345 
1346 	  emit_insn (gen_rtx_SET (subtarget, op1));
1347 
1348 	  op1 = expand_simple_binop (mode, PLUS, subtarget,
1349 				     GEN_INT (addend), op0, 1, OPTAB_DIRECT);
1350 	  if (op0 == op1)
1351 	    return NULL_RTX;
1352 	}
1353     }
1354 
1355   return op1;
1356 }
1357 
1358 /* Split a move from OP1 to OP0 conditional on COND.  */
1359 
1360 void
ia64_emit_cond_move(rtx op0,rtx op1,rtx cond)1361 ia64_emit_cond_move (rtx op0, rtx op1, rtx cond)
1362 {
1363   rtx_insn *insn, *first = get_last_insn ();
1364 
1365   emit_move_insn (op0, op1);
1366 
1367   for (insn = get_last_insn (); insn != first; insn = PREV_INSN (insn))
1368     if (INSN_P (insn))
1369       PATTERN (insn) = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond),
1370 					  PATTERN (insn));
1371 }
1372 
1373 /* Split a post-reload TImode or TFmode reference into two DImode
1374    components.  This is made extra difficult by the fact that we do
1375    not get any scratch registers to work with, because reload cannot
1376    be prevented from giving us a scratch that overlaps the register
1377    pair involved.  So instead, when addressing memory, we tweak the
1378    pointer register up and back down with POST_INCs.  Or up and not
1379    back down when we can get away with it.
1380 
1381    REVERSED is true when the loads must be done in reversed order
1382    (high word first) for correctness.  DEAD is true when the pointer
1383    dies with the second insn we generate and therefore the second
1384    address must not carry a postmodify.
1385 
1386    May return an insn which is to be emitted after the moves.  */
1387 
1388 static rtx
ia64_split_tmode(rtx out[2],rtx in,bool reversed,bool dead)1389 ia64_split_tmode (rtx out[2], rtx in, bool reversed, bool dead)
1390 {
1391   rtx fixup = 0;
1392 
1393   switch (GET_CODE (in))
1394     {
1395     case REG:
1396       out[reversed] = gen_rtx_REG (DImode, REGNO (in));
1397       out[!reversed] = gen_rtx_REG (DImode, REGNO (in) + 1);
1398       break;
1399 
1400     case CONST_INT:
1401     case CONST_DOUBLE:
1402       /* Cannot occur reversed.  */
1403       gcc_assert (!reversed);
1404 
1405       if (GET_MODE (in) != TFmode)
1406 	split_double (in, &out[0], &out[1]);
1407       else
1408 	/* split_double does not understand how to split a TFmode
1409 	   quantity into a pair of DImode constants.  */
1410 	{
1411 	  unsigned HOST_WIDE_INT p[2];
1412 	  long l[4];  /* TFmode is 128 bits */
1413 
1414 	  real_to_target (l, CONST_DOUBLE_REAL_VALUE (in), TFmode);
1415 
1416 	  if (FLOAT_WORDS_BIG_ENDIAN)
1417 	    {
1418 	      p[0] = (((unsigned HOST_WIDE_INT) l[0]) << 32) + l[1];
1419 	      p[1] = (((unsigned HOST_WIDE_INT) l[2]) << 32) + l[3];
1420 	    }
1421 	  else
1422 	    {
1423 	      p[0] = (((unsigned HOST_WIDE_INT) l[1]) << 32) + l[0];
1424 	      p[1] = (((unsigned HOST_WIDE_INT) l[3]) << 32) + l[2];
1425 	    }
1426 	  out[0] = GEN_INT (p[0]);
1427 	  out[1] = GEN_INT (p[1]);
1428 	}
1429       break;
1430 
1431     case MEM:
1432       {
1433 	rtx base = XEXP (in, 0);
1434 	rtx offset;
1435 
1436 	switch (GET_CODE (base))
1437 	  {
1438 	  case REG:
1439 	    if (!reversed)
1440 	      {
1441 		out[0] = adjust_automodify_address
1442 		  (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
1443 		out[1] = adjust_automodify_address
1444 		  (in, DImode, dead ? 0 : gen_rtx_POST_DEC (Pmode, base), 8);
1445 	      }
1446 	    else
1447 	      {
1448 		/* Reversal requires a pre-increment, which can only
1449 		   be done as a separate insn.  */
1450 		emit_insn (gen_adddi3 (base, base, GEN_INT (8)));
1451 		out[0] = adjust_automodify_address
1452 		  (in, DImode, gen_rtx_POST_DEC (Pmode, base), 8);
1453 		out[1] = adjust_address (in, DImode, 0);
1454 	      }
1455 	    break;
1456 
1457 	  case POST_INC:
1458 	    gcc_assert (!reversed && !dead);
1459 
1460 	    /* Just do the increment in two steps.  */
1461 	    out[0] = adjust_automodify_address (in, DImode, 0, 0);
1462 	    out[1] = adjust_automodify_address (in, DImode, 0, 8);
1463 	    break;
1464 
1465 	  case POST_DEC:
1466 	    gcc_assert (!reversed && !dead);
1467 
1468 	    /* Add 8, subtract 24.  */
1469 	    base = XEXP (base, 0);
1470 	    out[0] = adjust_automodify_address
1471 	      (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
1472 	    out[1] = adjust_automodify_address
1473 	      (in, DImode,
1474 	       gen_rtx_POST_MODIFY (Pmode, base,
1475 				    plus_constant (Pmode, base, -24)),
1476 	       8);
1477 	    break;
1478 
1479 	  case POST_MODIFY:
1480 	    gcc_assert (!reversed && !dead);
1481 
1482 	    /* Extract and adjust the modification.  This case is
1483 	       trickier than the others, because we might have an
1484 	       index register, or we might have a combined offset that
1485 	       doesn't fit a signed 9-bit displacement field.  We can
1486 	       assume the incoming expression is already legitimate.  */
1487 	    offset = XEXP (base, 1);
1488 	    base = XEXP (base, 0);
1489 
1490 	    out[0] = adjust_automodify_address
1491 	      (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
1492 
1493 	    if (GET_CODE (XEXP (offset, 1)) == REG)
1494 	      {
1495 		/* Can't adjust the postmodify to match.  Emit the
1496 		   original, then a separate addition insn.  */
1497 		out[1] = adjust_automodify_address (in, DImode, 0, 8);
1498 		fixup = gen_adddi3 (base, base, GEN_INT (-8));
1499 	      }
1500 	    else
1501 	      {
1502 		gcc_assert (GET_CODE (XEXP (offset, 1)) == CONST_INT);
1503 		if (INTVAL (XEXP (offset, 1)) < -256 + 8)
1504 		  {
1505 		    /* Again the postmodify cannot be made to match,
1506 		       but in this case it's more efficient to get rid
1507 		       of the postmodify entirely and fix up with an
1508 		       add insn.  */
1509 		    out[1] = adjust_automodify_address (in, DImode, base, 8);
1510 		    fixup = gen_adddi3
1511 		      (base, base, GEN_INT (INTVAL (XEXP (offset, 1)) - 8));
1512 		  }
1513 		else
1514 		  {
1515 		    /* Combined offset still fits in the displacement field.
1516 		       (We cannot overflow it at the high end.)  */
1517 		    out[1] = adjust_automodify_address
1518 		      (in, DImode, gen_rtx_POST_MODIFY
1519 		       (Pmode, base, gen_rtx_PLUS
1520 			(Pmode, base,
1521 			 GEN_INT (INTVAL (XEXP (offset, 1)) - 8))),
1522 		       8);
1523 		  }
1524 	      }
1525 	    break;
1526 
1527 	  default:
1528 	    gcc_unreachable ();
1529 	  }
1530 	break;
1531       }
1532 
1533     default:
1534       gcc_unreachable ();
1535     }
1536 
1537   return fixup;
1538 }
1539 
1540 /* Split a TImode or TFmode move instruction after reload.
1541    This is used by *movtf_internal and *movti_internal.  */
1542 void
ia64_split_tmode_move(rtx operands[])1543 ia64_split_tmode_move (rtx operands[])
1544 {
1545   rtx in[2], out[2], insn;
1546   rtx fixup[2];
1547   bool dead = false;
1548   bool reversed = false;
1549 
1550   /* It is possible for reload to decide to overwrite a pointer with
1551      the value it points to.  In that case we have to do the loads in
1552      the appropriate order so that the pointer is not destroyed too
1553      early.  Also we must not generate a postmodify for that second
1554      load, or rws_access_regno will die.  And we must not generate a
1555      postmodify for the second load if the destination register
1556      overlaps with the base register.  */
1557   if (GET_CODE (operands[1]) == MEM
1558       && reg_overlap_mentioned_p (operands[0], operands[1]))
1559     {
1560       rtx base = XEXP (operands[1], 0);
1561       while (GET_CODE (base) != REG)
1562 	base = XEXP (base, 0);
1563 
1564       if (REGNO (base) == REGNO (operands[0]))
1565 	reversed = true;
1566 
1567       if (refers_to_regno_p (REGNO (operands[0]),
1568 			     REGNO (operands[0])+2,
1569 			     base, 0))
1570 	dead = true;
1571     }
1572   /* Another reason to do the moves in reversed order is if the first
1573      element of the target register pair is also the second element of
1574      the source register pair.  */
1575   if (GET_CODE (operands[0]) == REG && GET_CODE (operands[1]) == REG
1576       && REGNO (operands[0]) == REGNO (operands[1]) + 1)
1577     reversed = true;
1578 
1579   fixup[0] = ia64_split_tmode (in, operands[1], reversed, dead);
1580   fixup[1] = ia64_split_tmode (out, operands[0], reversed, dead);
1581 
1582 #define MAYBE_ADD_REG_INC_NOTE(INSN, EXP)				\
1583   if (GET_CODE (EXP) == MEM						\
1584       && (GET_CODE (XEXP (EXP, 0)) == POST_MODIFY			\
1585 	  || GET_CODE (XEXP (EXP, 0)) == POST_INC			\
1586 	  || GET_CODE (XEXP (EXP, 0)) == POST_DEC))			\
1587     add_reg_note (insn, REG_INC, XEXP (XEXP (EXP, 0), 0))
1588 
1589   insn = emit_insn (gen_rtx_SET (out[0], in[0]));
1590   MAYBE_ADD_REG_INC_NOTE (insn, in[0]);
1591   MAYBE_ADD_REG_INC_NOTE (insn, out[0]);
1592 
1593   insn = emit_insn (gen_rtx_SET (out[1], in[1]));
1594   MAYBE_ADD_REG_INC_NOTE (insn, in[1]);
1595   MAYBE_ADD_REG_INC_NOTE (insn, out[1]);
1596 
1597   if (fixup[0])
1598     emit_insn (fixup[0]);
1599   if (fixup[1])
1600     emit_insn (fixup[1]);
1601 
1602 #undef MAYBE_ADD_REG_INC_NOTE
1603 }
1604 
1605 /* ??? Fixing GR->FR XFmode moves during reload is hard.  You need to go
1606    through memory plus an extra GR scratch register.  Except that you can
1607    either get the first from SECONDARY_MEMORY_NEEDED or the second from
1608    SECONDARY_RELOAD_CLASS, but not both.
1609 
1610    We got into problems in the first place by allowing a construct like
1611    (subreg:XF (reg:TI)), which we got from a union containing a long double.
1612    This solution attempts to prevent this situation from occurring.  When
1613    we see something like the above, we spill the inner register to memory.  */
1614 
1615 static rtx
spill_xfmode_rfmode_operand(rtx in,int force,machine_mode mode)1616 spill_xfmode_rfmode_operand (rtx in, int force, machine_mode mode)
1617 {
1618   if (GET_CODE (in) == SUBREG
1619       && GET_MODE (SUBREG_REG (in)) == TImode
1620       && GET_CODE (SUBREG_REG (in)) == REG)
1621     {
1622       rtx memt = assign_stack_temp (TImode, 16);
1623       emit_move_insn (memt, SUBREG_REG (in));
1624       return adjust_address (memt, mode, 0);
1625     }
1626   else if (force && GET_CODE (in) == REG)
1627     {
1628       rtx memx = assign_stack_temp (mode, 16);
1629       emit_move_insn (memx, in);
1630       return memx;
1631     }
1632   else
1633     return in;
1634 }
1635 
1636 /* Expand the movxf or movrf pattern (MODE says which) with the given
1637    OPERANDS, returning true if the pattern should then invoke
1638    DONE.  */
1639 
1640 bool
ia64_expand_movxf_movrf(machine_mode mode,rtx operands[])1641 ia64_expand_movxf_movrf (machine_mode mode, rtx operands[])
1642 {
1643   rtx op0 = operands[0];
1644 
1645   if (GET_CODE (op0) == SUBREG)
1646     op0 = SUBREG_REG (op0);
1647 
1648   /* We must support XFmode loads into general registers for stdarg/vararg,
1649      unprototyped calls, and a rare case where a long double is passed as
1650      an argument after a float HFA fills the FP registers.  We split them into
1651      DImode loads for convenience.  We also need to support XFmode stores
1652      for the last case.  This case does not happen for stdarg/vararg routines,
1653      because we do a block store to memory of unnamed arguments.  */
1654 
1655   if (GET_CODE (op0) == REG && GR_REGNO_P (REGNO (op0)))
1656     {
1657       rtx out[2];
1658 
1659       /* We're hoping to transform everything that deals with XFmode
1660 	 quantities and GR registers early in the compiler.  */
1661       gcc_assert (can_create_pseudo_p ());
1662 
1663       /* Struct to register can just use TImode instead.  */
1664       if ((GET_CODE (operands[1]) == SUBREG
1665 	   && GET_MODE (SUBREG_REG (operands[1])) == TImode)
1666 	  || (GET_CODE (operands[1]) == REG
1667 	      && GR_REGNO_P (REGNO (operands[1]))))
1668 	{
1669 	  rtx op1 = operands[1];
1670 
1671 	  if (GET_CODE (op1) == SUBREG)
1672 	    op1 = SUBREG_REG (op1);
1673 	  else
1674 	    op1 = gen_rtx_REG (TImode, REGNO (op1));
1675 
1676 	  emit_move_insn (gen_rtx_REG (TImode, REGNO (op0)), op1);
1677 	  return true;
1678 	}
1679 
1680       if (GET_CODE (operands[1]) == CONST_DOUBLE)
1681 	{
1682 	  /* Don't word-swap when reading in the constant.  */
1683 	  emit_move_insn (gen_rtx_REG (DImode, REGNO (op0)),
1684 			  operand_subword (operands[1], WORDS_BIG_ENDIAN,
1685 					   0, mode));
1686 	  emit_move_insn (gen_rtx_REG (DImode, REGNO (op0) + 1),
1687 			  operand_subword (operands[1], !WORDS_BIG_ENDIAN,
1688 					   0, mode));
1689 	  return true;
1690 	}
1691 
1692       /* If the quantity is in a register not known to be GR, spill it.  */
1693       if (register_operand (operands[1], mode))
1694 	operands[1] = spill_xfmode_rfmode_operand (operands[1], 1, mode);
1695 
1696       gcc_assert (GET_CODE (operands[1]) == MEM);
1697 
1698       /* Don't word-swap when reading in the value.  */
1699       out[0] = gen_rtx_REG (DImode, REGNO (op0));
1700       out[1] = gen_rtx_REG (DImode, REGNO (op0) + 1);
1701 
1702       emit_move_insn (out[0], adjust_address (operands[1], DImode, 0));
1703       emit_move_insn (out[1], adjust_address (operands[1], DImode, 8));
1704       return true;
1705     }
1706 
1707   if (GET_CODE (operands[1]) == REG && GR_REGNO_P (REGNO (operands[1])))
1708     {
1709       /* We're hoping to transform everything that deals with XFmode
1710 	 quantities and GR registers early in the compiler.  */
1711       gcc_assert (can_create_pseudo_p ());
1712 
1713       /* Op0 can't be a GR_REG here, as that case is handled above.
1714 	 If op0 is a register, then we spill op1, so that we now have a
1715 	 MEM operand.  This requires creating an XFmode subreg of a TImode reg
1716 	 to force the spill.  */
1717       if (register_operand (operands[0], mode))
1718 	{
1719 	  rtx op1 = gen_rtx_REG (TImode, REGNO (operands[1]));
1720 	  op1 = gen_rtx_SUBREG (mode, op1, 0);
1721 	  operands[1] = spill_xfmode_rfmode_operand (op1, 0, mode);
1722 	}
1723 
1724       else
1725 	{
1726 	  rtx in[2];
1727 
1728 	  gcc_assert (GET_CODE (operands[0]) == MEM);
1729 
1730 	  /* Don't word-swap when writing out the value.  */
1731 	  in[0] = gen_rtx_REG (DImode, REGNO (operands[1]));
1732 	  in[1] = gen_rtx_REG (DImode, REGNO (operands[1]) + 1);
1733 
1734 	  emit_move_insn (adjust_address (operands[0], DImode, 0), in[0]);
1735 	  emit_move_insn (adjust_address (operands[0], DImode, 8), in[1]);
1736 	  return true;
1737 	}
1738     }
1739 
1740   if (!reload_in_progress && !reload_completed)
1741     {
1742       operands[1] = spill_xfmode_rfmode_operand (operands[1], 0, mode);
1743 
1744       if (GET_MODE (op0) == TImode && GET_CODE (op0) == REG)
1745 	{
1746 	  rtx memt, memx, in = operands[1];
1747 	  if (CONSTANT_P (in))
1748 	    in = validize_mem (force_const_mem (mode, in));
1749 	  if (GET_CODE (in) == MEM)
1750 	    memt = adjust_address (in, TImode, 0);
1751 	  else
1752 	    {
1753 	      memt = assign_stack_temp (TImode, 16);
1754 	      memx = adjust_address (memt, mode, 0);
1755 	      emit_move_insn (memx, in);
1756 	    }
1757 	  emit_move_insn (op0, memt);
1758 	  return true;
1759 	}
1760 
1761       if (!ia64_move_ok (operands[0], operands[1]))
1762 	operands[1] = force_reg (mode, operands[1]);
1763     }
1764 
1765   return false;
1766 }
1767 
1768 /* Emit comparison instruction if necessary, replacing *EXPR, *OP0, *OP1
1769    with the expression that holds the compare result (in VOIDmode).  */
1770 
1771 static GTY(()) rtx cmptf_libfunc;
1772 
1773 void
ia64_expand_compare(rtx * expr,rtx * op0,rtx * op1)1774 ia64_expand_compare (rtx *expr, rtx *op0, rtx *op1)
1775 {
1776   enum rtx_code code = GET_CODE (*expr);
1777   rtx cmp;
1778 
1779   /* If we have a BImode input, then we already have a compare result, and
1780      do not need to emit another comparison.  */
1781   if (GET_MODE (*op0) == BImode)
1782     {
1783       gcc_assert ((code == NE || code == EQ) && *op1 == const0_rtx);
1784       cmp = *op0;
1785     }
1786   /* HPUX TFmode compare requires a library call to _U_Qfcmp, which takes a
1787      magic number as its third argument, that indicates what to do.
1788      The return value is an integer to be compared against zero.  */
1789   else if (TARGET_HPUX && GET_MODE (*op0) == TFmode)
1790     {
1791       enum qfcmp_magic {
1792 	QCMP_INV = 1,	/* Raise FP_INVALID on NaNs as a side effect.  */
1793 	QCMP_UNORD = 2,
1794 	QCMP_EQ = 4,
1795 	QCMP_LT = 8,
1796 	QCMP_GT = 16
1797       };
1798       int magic;
1799       enum rtx_code ncode;
1800       rtx ret, insns;
1801 
1802       gcc_assert (cmptf_libfunc && GET_MODE (*op1) == TFmode);
1803       switch (code)
1804 	{
1805 	  /* 1 = equal, 0 = not equal.  Equality operators do
1806 	     not raise FP_INVALID when given a NaN operand.  */
1807 	case EQ:        magic = QCMP_EQ;                  ncode = NE; break;
1808 	case NE:        magic = QCMP_EQ;                  ncode = EQ; break;
1809 	  /* isunordered() from C99.  */
1810 	case UNORDERED: magic = QCMP_UNORD;               ncode = NE; break;
1811 	case ORDERED:   magic = QCMP_UNORD;               ncode = EQ; break;
1812 	  /* Relational operators raise FP_INVALID when given
1813 	     a NaN operand.  */
1814 	case LT:        magic = QCMP_LT        |QCMP_INV; ncode = NE; break;
1815 	case LE:        magic = QCMP_LT|QCMP_EQ|QCMP_INV; ncode = NE; break;
1816 	case GT:        magic = QCMP_GT        |QCMP_INV; ncode = NE; break;
1817 	case GE:        magic = QCMP_GT|QCMP_EQ|QCMP_INV; ncode = NE; break;
1818           /* Unordered relational operators do not raise FP_INVALID
1819 	     when given a NaN operand.  */
1820 	case UNLT:    magic = QCMP_LT        |QCMP_UNORD; ncode = NE; break;
1821 	case UNLE:    magic = QCMP_LT|QCMP_EQ|QCMP_UNORD; ncode = NE; break;
1822 	case UNGT:    magic = QCMP_GT        |QCMP_UNORD; ncode = NE; break;
1823 	case UNGE:    magic = QCMP_GT|QCMP_EQ|QCMP_UNORD; ncode = NE; break;
1824 	  /* Not supported.  */
1825 	case UNEQ:
1826 	case LTGT:
1827 	default: gcc_unreachable ();
1828 	}
1829 
1830       start_sequence ();
1831 
1832       ret = emit_library_call_value (cmptf_libfunc, 0, LCT_CONST, DImode, 3,
1833 				     *op0, TFmode, *op1, TFmode,
1834 				     GEN_INT (magic), DImode);
1835       cmp = gen_reg_rtx (BImode);
1836       emit_insn (gen_rtx_SET (cmp, gen_rtx_fmt_ee (ncode, BImode,
1837 						   ret, const0_rtx)));
1838 
1839       insns = get_insns ();
1840       end_sequence ();
1841 
1842       emit_libcall_block (insns, cmp, cmp,
1843 			  gen_rtx_fmt_ee (code, BImode, *op0, *op1));
1844       code = NE;
1845     }
1846   else
1847     {
1848       cmp = gen_reg_rtx (BImode);
1849       emit_insn (gen_rtx_SET (cmp, gen_rtx_fmt_ee (code, BImode, *op0, *op1)));
1850       code = NE;
1851     }
1852 
1853   *expr = gen_rtx_fmt_ee (code, VOIDmode, cmp, const0_rtx);
1854   *op0 = cmp;
1855   *op1 = const0_rtx;
1856 }
1857 
1858 /* Generate an integral vector comparison.  Return true if the condition has
1859    been reversed, and so the sense of the comparison should be inverted.  */
1860 
1861 static bool
ia64_expand_vecint_compare(enum rtx_code code,machine_mode mode,rtx dest,rtx op0,rtx op1)1862 ia64_expand_vecint_compare (enum rtx_code code, machine_mode mode,
1863 			    rtx dest, rtx op0, rtx op1)
1864 {
1865   bool negate = false;
1866   rtx x;
1867 
1868   /* Canonicalize the comparison to EQ, GT, GTU.  */
1869   switch (code)
1870     {
1871     case EQ:
1872     case GT:
1873     case GTU:
1874       break;
1875 
1876     case NE:
1877     case LE:
1878     case LEU:
1879       code = reverse_condition (code);
1880       negate = true;
1881       break;
1882 
1883     case GE:
1884     case GEU:
1885       code = reverse_condition (code);
1886       negate = true;
1887       /* FALLTHRU */
1888 
1889     case LT:
1890     case LTU:
1891       code = swap_condition (code);
1892       x = op0, op0 = op1, op1 = x;
1893       break;
1894 
1895     default:
1896       gcc_unreachable ();
1897     }
1898 
1899   /* Unsigned parallel compare is not supported by the hardware.  Play some
1900      tricks to turn this into a signed comparison against 0.  */
1901   if (code == GTU)
1902     {
1903       switch (mode)
1904 	{
1905 	case V2SImode:
1906 	  {
1907 	    rtx t1, t2, mask;
1908 
1909 	    /* Subtract (-(INT MAX) - 1) from both operands to make
1910 	       them signed.  */
1911 	    mask = gen_int_mode (0x80000000, SImode);
1912 	    mask = gen_rtx_CONST_VECTOR (V2SImode, gen_rtvec (2, mask, mask));
1913 	    mask = force_reg (mode, mask);
1914 	    t1 = gen_reg_rtx (mode);
1915 	    emit_insn (gen_subv2si3 (t1, op0, mask));
1916 	    t2 = gen_reg_rtx (mode);
1917 	    emit_insn (gen_subv2si3 (t2, op1, mask));
1918 	    op0 = t1;
1919 	    op1 = t2;
1920 	    code = GT;
1921 	  }
1922 	  break;
1923 
1924 	case V8QImode:
1925 	case V4HImode:
1926 	  /* Perform a parallel unsigned saturating subtraction.  */
1927 	  x = gen_reg_rtx (mode);
1928 	  emit_insn (gen_rtx_SET (x, gen_rtx_US_MINUS (mode, op0, op1)));
1929 
1930 	  code = EQ;
1931 	  op0 = x;
1932 	  op1 = CONST0_RTX (mode);
1933 	  negate = !negate;
1934 	  break;
1935 
1936 	default:
1937 	  gcc_unreachable ();
1938 	}
1939     }
1940 
1941   x = gen_rtx_fmt_ee (code, mode, op0, op1);
1942   emit_insn (gen_rtx_SET (dest, x));
1943 
1944   return negate;
1945 }
1946 
1947 /* Emit an integral vector conditional move.  */
1948 
1949 void
ia64_expand_vecint_cmov(rtx operands[])1950 ia64_expand_vecint_cmov (rtx operands[])
1951 {
1952   machine_mode mode = GET_MODE (operands[0]);
1953   enum rtx_code code = GET_CODE (operands[3]);
1954   bool negate;
1955   rtx cmp, x, ot, of;
1956 
1957   cmp = gen_reg_rtx (mode);
1958   negate = ia64_expand_vecint_compare (code, mode, cmp,
1959 				       operands[4], operands[5]);
1960 
1961   ot = operands[1+negate];
1962   of = operands[2-negate];
1963 
1964   if (ot == CONST0_RTX (mode))
1965     {
1966       if (of == CONST0_RTX (mode))
1967 	{
1968 	  emit_move_insn (operands[0], ot);
1969 	  return;
1970 	}
1971 
1972       x = gen_rtx_NOT (mode, cmp);
1973       x = gen_rtx_AND (mode, x, of);
1974       emit_insn (gen_rtx_SET (operands[0], x));
1975     }
1976   else if (of == CONST0_RTX (mode))
1977     {
1978       x = gen_rtx_AND (mode, cmp, ot);
1979       emit_insn (gen_rtx_SET (operands[0], x));
1980     }
1981   else
1982     {
1983       rtx t, f;
1984 
1985       t = gen_reg_rtx (mode);
1986       x = gen_rtx_AND (mode, cmp, operands[1+negate]);
1987       emit_insn (gen_rtx_SET (t, x));
1988 
1989       f = gen_reg_rtx (mode);
1990       x = gen_rtx_NOT (mode, cmp);
1991       x = gen_rtx_AND (mode, x, operands[2-negate]);
1992       emit_insn (gen_rtx_SET (f, x));
1993 
1994       x = gen_rtx_IOR (mode, t, f);
1995       emit_insn (gen_rtx_SET (operands[0], x));
1996     }
1997 }
1998 
1999 /* Emit an integral vector min or max operation.  Return true if all done.  */
2000 
2001 bool
ia64_expand_vecint_minmax(enum rtx_code code,machine_mode mode,rtx operands[])2002 ia64_expand_vecint_minmax (enum rtx_code code, machine_mode mode,
2003 			   rtx operands[])
2004 {
2005   rtx xops[6];
2006 
2007   /* These four combinations are supported directly.  */
2008   if (mode == V8QImode && (code == UMIN || code == UMAX))
2009     return false;
2010   if (mode == V4HImode && (code == SMIN || code == SMAX))
2011     return false;
2012 
2013   /* This combination can be implemented with only saturating subtraction.  */
2014   if (mode == V4HImode && code == UMAX)
2015     {
2016       rtx x, tmp = gen_reg_rtx (mode);
2017 
2018       x = gen_rtx_US_MINUS (mode, operands[1], operands[2]);
2019       emit_insn (gen_rtx_SET (tmp, x));
2020 
2021       emit_insn (gen_addv4hi3 (operands[0], tmp, operands[2]));
2022       return true;
2023     }
2024 
2025   /* Everything else implemented via vector comparisons.  */
2026   xops[0] = operands[0];
2027   xops[4] = xops[1] = operands[1];
2028   xops[5] = xops[2] = operands[2];
2029 
2030   switch (code)
2031     {
2032     case UMIN:
2033       code = LTU;
2034       break;
2035     case UMAX:
2036       code = GTU;
2037       break;
2038     case SMIN:
2039       code = LT;
2040       break;
2041     case SMAX:
2042       code = GT;
2043       break;
2044     default:
2045       gcc_unreachable ();
2046     }
2047   xops[3] = gen_rtx_fmt_ee (code, VOIDmode, operands[1], operands[2]);
2048 
2049   ia64_expand_vecint_cmov (xops);
2050   return true;
2051 }
2052 
2053 /* The vectors LO and HI each contain N halves of a double-wide vector.
2054    Reassemble either the first N/2 or the second N/2 elements.  */
2055 
2056 void
ia64_unpack_assemble(rtx out,rtx lo,rtx hi,bool highp)2057 ia64_unpack_assemble (rtx out, rtx lo, rtx hi, bool highp)
2058 {
2059   machine_mode vmode = GET_MODE (lo);
2060   unsigned int i, high, nelt = GET_MODE_NUNITS (vmode);
2061   struct expand_vec_perm_d d;
2062   bool ok;
2063 
2064   d.target = gen_lowpart (vmode, out);
2065   d.op0 = (TARGET_BIG_ENDIAN ? hi : lo);
2066   d.op1 = (TARGET_BIG_ENDIAN ? lo : hi);
2067   d.vmode = vmode;
2068   d.nelt = nelt;
2069   d.one_operand_p = false;
2070   d.testing_p = false;
2071 
2072   high = (highp ? nelt / 2 : 0);
2073   for (i = 0; i < nelt / 2; ++i)
2074     {
2075       d.perm[i * 2] = i + high;
2076       d.perm[i * 2 + 1] = i + high + nelt;
2077     }
2078 
2079   ok = ia64_expand_vec_perm_const_1 (&d);
2080   gcc_assert (ok);
2081 }
2082 
2083 /* Return a vector of the sign-extension of VEC.  */
2084 
2085 static rtx
ia64_unpack_sign(rtx vec,bool unsignedp)2086 ia64_unpack_sign (rtx vec, bool unsignedp)
2087 {
2088   machine_mode mode = GET_MODE (vec);
2089   rtx zero = CONST0_RTX (mode);
2090 
2091   if (unsignedp)
2092     return zero;
2093   else
2094     {
2095       rtx sign = gen_reg_rtx (mode);
2096       bool neg;
2097 
2098       neg = ia64_expand_vecint_compare (LT, mode, sign, vec, zero);
2099       gcc_assert (!neg);
2100 
2101       return sign;
2102     }
2103 }
2104 
2105 /* Emit an integral vector unpack operation.  */
2106 
2107 void
ia64_expand_unpack(rtx operands[3],bool unsignedp,bool highp)2108 ia64_expand_unpack (rtx operands[3], bool unsignedp, bool highp)
2109 {
2110   rtx sign = ia64_unpack_sign (operands[1], unsignedp);
2111   ia64_unpack_assemble (operands[0], operands[1], sign, highp);
2112 }
2113 
2114 /* Emit an integral vector widening sum operations.  */
2115 
2116 void
ia64_expand_widen_sum(rtx operands[3],bool unsignedp)2117 ia64_expand_widen_sum (rtx operands[3], bool unsignedp)
2118 {
2119   machine_mode wmode;
2120   rtx l, h, t, sign;
2121 
2122   sign = ia64_unpack_sign (operands[1], unsignedp);
2123 
2124   wmode = GET_MODE (operands[0]);
2125   l = gen_reg_rtx (wmode);
2126   h = gen_reg_rtx (wmode);
2127 
2128   ia64_unpack_assemble (l, operands[1], sign, false);
2129   ia64_unpack_assemble (h, operands[1], sign, true);
2130 
2131   t = expand_binop (wmode, add_optab, l, operands[2], NULL, 0, OPTAB_DIRECT);
2132   t = expand_binop (wmode, add_optab, h, t, operands[0], 0, OPTAB_DIRECT);
2133   if (t != operands[0])
2134     emit_move_insn (operands[0], t);
2135 }
2136 
2137 /* Emit the appropriate sequence for a call.  */
2138 
2139 void
ia64_expand_call(rtx retval,rtx addr,rtx nextarg ATTRIBUTE_UNUSED,int sibcall_p)2140 ia64_expand_call (rtx retval, rtx addr, rtx nextarg ATTRIBUTE_UNUSED,
2141 		  int sibcall_p)
2142 {
2143   rtx insn, b0;
2144 
2145   addr = XEXP (addr, 0);
2146   addr = convert_memory_address (DImode, addr);
2147   b0 = gen_rtx_REG (DImode, R_BR (0));
2148 
2149   /* ??? Should do this for functions known to bind local too.  */
2150   if (TARGET_NO_PIC || TARGET_AUTO_PIC)
2151     {
2152       if (sibcall_p)
2153 	insn = gen_sibcall_nogp (addr);
2154       else if (! retval)
2155 	insn = gen_call_nogp (addr, b0);
2156       else
2157 	insn = gen_call_value_nogp (retval, addr, b0);
2158       insn = emit_call_insn (insn);
2159     }
2160   else
2161     {
2162       if (sibcall_p)
2163 	insn = gen_sibcall_gp (addr);
2164       else if (! retval)
2165 	insn = gen_call_gp (addr, b0);
2166       else
2167 	insn = gen_call_value_gp (retval, addr, b0);
2168       insn = emit_call_insn (insn);
2169 
2170       use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx);
2171     }
2172 
2173   if (sibcall_p)
2174     use_reg (&CALL_INSN_FUNCTION_USAGE (insn), b0);
2175 
2176   if (TARGET_ABI_OPEN_VMS)
2177     use_reg (&CALL_INSN_FUNCTION_USAGE (insn),
2178 	     gen_rtx_REG (DImode, GR_REG (25)));
2179 }
2180 
2181 static void
reg_emitted(enum ia64_frame_regs r)2182 reg_emitted (enum ia64_frame_regs r)
2183 {
2184   if (emitted_frame_related_regs[r] == 0)
2185     emitted_frame_related_regs[r] = current_frame_info.r[r];
2186   else
2187     gcc_assert (emitted_frame_related_regs[r] == current_frame_info.r[r]);
2188 }
2189 
2190 static int
get_reg(enum ia64_frame_regs r)2191 get_reg (enum ia64_frame_regs r)
2192 {
2193   reg_emitted (r);
2194   return current_frame_info.r[r];
2195 }
2196 
2197 static bool
is_emitted(int regno)2198 is_emitted (int regno)
2199 {
2200   unsigned int r;
2201 
2202   for (r = reg_fp; r < number_of_ia64_frame_regs; r++)
2203     if (emitted_frame_related_regs[r] == regno)
2204       return true;
2205   return false;
2206 }
2207 
2208 void
ia64_reload_gp(void)2209 ia64_reload_gp (void)
2210 {
2211   rtx tmp;
2212 
2213   if (current_frame_info.r[reg_save_gp])
2214     {
2215       tmp = gen_rtx_REG (DImode, get_reg (reg_save_gp));
2216     }
2217   else
2218     {
2219       HOST_WIDE_INT offset;
2220       rtx offset_r;
2221 
2222       offset = (current_frame_info.spill_cfa_off
2223 	        + current_frame_info.spill_size);
2224       if (frame_pointer_needed)
2225         {
2226           tmp = hard_frame_pointer_rtx;
2227           offset = -offset;
2228         }
2229       else
2230         {
2231           tmp = stack_pointer_rtx;
2232           offset = current_frame_info.total_size - offset;
2233         }
2234 
2235       offset_r = GEN_INT (offset);
2236       if (satisfies_constraint_I (offset_r))
2237         emit_insn (gen_adddi3 (pic_offset_table_rtx, tmp, offset_r));
2238       else
2239         {
2240           emit_move_insn (pic_offset_table_rtx, offset_r);
2241           emit_insn (gen_adddi3 (pic_offset_table_rtx,
2242 			         pic_offset_table_rtx, tmp));
2243         }
2244 
2245       tmp = gen_rtx_MEM (DImode, pic_offset_table_rtx);
2246     }
2247 
2248   emit_move_insn (pic_offset_table_rtx, tmp);
2249 }
2250 
2251 void
ia64_split_call(rtx retval,rtx addr,rtx retaddr,rtx scratch_r,rtx scratch_b,int noreturn_p,int sibcall_p)2252 ia64_split_call (rtx retval, rtx addr, rtx retaddr, rtx scratch_r,
2253 		 rtx scratch_b, int noreturn_p, int sibcall_p)
2254 {
2255   rtx insn;
2256   bool is_desc = false;
2257 
2258   /* If we find we're calling through a register, then we're actually
2259      calling through a descriptor, so load up the values.  */
2260   if (REG_P (addr) && GR_REGNO_P (REGNO (addr)))
2261     {
2262       rtx tmp;
2263       bool addr_dead_p;
2264 
2265       /* ??? We are currently constrained to *not* use peep2, because
2266 	 we can legitimately change the global lifetime of the GP
2267 	 (in the form of killing where previously live).  This is
2268 	 because a call through a descriptor doesn't use the previous
2269 	 value of the GP, while a direct call does, and we do not
2270 	 commit to either form until the split here.
2271 
2272 	 That said, this means that we lack precise life info for
2273 	 whether ADDR is dead after this call.  This is not terribly
2274 	 important, since we can fix things up essentially for free
2275 	 with the POST_DEC below, but it's nice to not use it when we
2276 	 can immediately tell it's not necessary.  */
2277       addr_dead_p = ((noreturn_p || sibcall_p
2278 		      || TEST_HARD_REG_BIT (regs_invalidated_by_call,
2279 					    REGNO (addr)))
2280 		     && !FUNCTION_ARG_REGNO_P (REGNO (addr)));
2281 
2282       /* Load the code address into scratch_b.  */
2283       tmp = gen_rtx_POST_INC (Pmode, addr);
2284       tmp = gen_rtx_MEM (Pmode, tmp);
2285       emit_move_insn (scratch_r, tmp);
2286       emit_move_insn (scratch_b, scratch_r);
2287 
2288       /* Load the GP address.  If ADDR is not dead here, then we must
2289 	 revert the change made above via the POST_INCREMENT.  */
2290       if (!addr_dead_p)
2291 	tmp = gen_rtx_POST_DEC (Pmode, addr);
2292       else
2293 	tmp = addr;
2294       tmp = gen_rtx_MEM (Pmode, tmp);
2295       emit_move_insn (pic_offset_table_rtx, tmp);
2296 
2297       is_desc = true;
2298       addr = scratch_b;
2299     }
2300 
2301   if (sibcall_p)
2302     insn = gen_sibcall_nogp (addr);
2303   else if (retval)
2304     insn = gen_call_value_nogp (retval, addr, retaddr);
2305   else
2306     insn = gen_call_nogp (addr, retaddr);
2307   emit_call_insn (insn);
2308 
2309   if ((!TARGET_CONST_GP || is_desc) && !noreturn_p && !sibcall_p)
2310     ia64_reload_gp ();
2311 }
2312 
2313 /* Expand an atomic operation.  We want to perform MEM <CODE>= VAL atomically.
2314 
2315    This differs from the generic code in that we know about the zero-extending
2316    properties of cmpxchg, and the zero-extending requirements of ar.ccv.  We
2317    also know that ld.acq+cmpxchg.rel equals a full barrier.
2318 
2319    The loop we want to generate looks like
2320 
2321 	cmp_reg = mem;
2322       label:
2323         old_reg = cmp_reg;
2324 	new_reg = cmp_reg op val;
2325 	cmp_reg = compare-and-swap(mem, old_reg, new_reg)
2326 	if (cmp_reg != old_reg)
2327 	  goto label;
2328 
2329    Note that we only do the plain load from memory once.  Subsequent
2330    iterations use the value loaded by the compare-and-swap pattern.  */
2331 
2332 void
ia64_expand_atomic_op(enum rtx_code code,rtx mem,rtx val,rtx old_dst,rtx new_dst,enum memmodel model)2333 ia64_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
2334 		       rtx old_dst, rtx new_dst, enum memmodel model)
2335 {
2336   machine_mode mode = GET_MODE (mem);
2337   rtx old_reg, new_reg, cmp_reg, ar_ccv, label;
2338   enum insn_code icode;
2339 
2340   /* Special case for using fetchadd.  */
2341   if ((mode == SImode || mode == DImode)
2342       && (code == PLUS || code == MINUS)
2343       && fetchadd_operand (val, mode))
2344     {
2345       if (code == MINUS)
2346 	val = GEN_INT (-INTVAL (val));
2347 
2348       if (!old_dst)
2349         old_dst = gen_reg_rtx (mode);
2350 
2351       switch (model)
2352 	{
2353 	case MEMMODEL_ACQ_REL:
2354 	case MEMMODEL_SEQ_CST:
2355 	case MEMMODEL_SYNC_SEQ_CST:
2356 	  emit_insn (gen_memory_barrier ());
2357 	  /* FALLTHRU */
2358 	case MEMMODEL_RELAXED:
2359 	case MEMMODEL_ACQUIRE:
2360 	case MEMMODEL_SYNC_ACQUIRE:
2361 	case MEMMODEL_CONSUME:
2362 	  if (mode == SImode)
2363 	    icode = CODE_FOR_fetchadd_acq_si;
2364 	  else
2365 	    icode = CODE_FOR_fetchadd_acq_di;
2366 	  break;
2367 	case MEMMODEL_RELEASE:
2368 	case MEMMODEL_SYNC_RELEASE:
2369 	  if (mode == SImode)
2370 	    icode = CODE_FOR_fetchadd_rel_si;
2371 	  else
2372 	    icode = CODE_FOR_fetchadd_rel_di;
2373 	  break;
2374 
2375 	default:
2376 	  gcc_unreachable ();
2377 	}
2378 
2379       emit_insn (GEN_FCN (icode) (old_dst, mem, val));
2380 
2381       if (new_dst)
2382 	{
2383 	  new_reg = expand_simple_binop (mode, PLUS, old_dst, val, new_dst,
2384 					 true, OPTAB_WIDEN);
2385 	  if (new_reg != new_dst)
2386 	    emit_move_insn (new_dst, new_reg);
2387 	}
2388       return;
2389     }
2390 
2391   /* Because of the volatile mem read, we get an ld.acq, which is the
2392      front half of the full barrier.  The end half is the cmpxchg.rel.
2393      For relaxed and release memory models, we don't need this.  But we
2394      also don't bother trying to prevent it either.  */
2395   gcc_assert (is_mm_relaxed (model) || is_mm_release (model)
2396 	      || MEM_VOLATILE_P (mem));
2397 
2398   old_reg = gen_reg_rtx (DImode);
2399   cmp_reg = gen_reg_rtx (DImode);
2400   label = gen_label_rtx ();
2401 
2402   if (mode != DImode)
2403     {
2404       val = simplify_gen_subreg (DImode, val, mode, 0);
2405       emit_insn (gen_extend_insn (cmp_reg, mem, DImode, mode, 1));
2406     }
2407   else
2408     emit_move_insn (cmp_reg, mem);
2409 
2410   emit_label (label);
2411 
2412   ar_ccv = gen_rtx_REG (DImode, AR_CCV_REGNUM);
2413   emit_move_insn (old_reg, cmp_reg);
2414   emit_move_insn (ar_ccv, cmp_reg);
2415 
2416   if (old_dst)
2417     emit_move_insn (old_dst, gen_lowpart (mode, cmp_reg));
2418 
2419   new_reg = cmp_reg;
2420   if (code == NOT)
2421     {
2422       new_reg = expand_simple_binop (DImode, AND, new_reg, val, NULL_RTX,
2423 				     true, OPTAB_DIRECT);
2424       new_reg = expand_simple_unop (DImode, code, new_reg, NULL_RTX, true);
2425     }
2426   else
2427     new_reg = expand_simple_binop (DImode, code, new_reg, val, NULL_RTX,
2428 				   true, OPTAB_DIRECT);
2429 
2430   if (mode != DImode)
2431     new_reg = gen_lowpart (mode, new_reg);
2432   if (new_dst)
2433     emit_move_insn (new_dst, new_reg);
2434 
2435   switch (model)
2436     {
2437     case MEMMODEL_RELAXED:
2438     case MEMMODEL_ACQUIRE:
2439     case MEMMODEL_SYNC_ACQUIRE:
2440     case MEMMODEL_CONSUME:
2441       switch (mode)
2442 	{
2443 	case QImode: icode = CODE_FOR_cmpxchg_acq_qi;  break;
2444 	case HImode: icode = CODE_FOR_cmpxchg_acq_hi;  break;
2445 	case SImode: icode = CODE_FOR_cmpxchg_acq_si;  break;
2446 	case DImode: icode = CODE_FOR_cmpxchg_acq_di;  break;
2447 	default:
2448 	  gcc_unreachable ();
2449 	}
2450       break;
2451 
2452     case MEMMODEL_RELEASE:
2453     case MEMMODEL_SYNC_RELEASE:
2454     case MEMMODEL_ACQ_REL:
2455     case MEMMODEL_SEQ_CST:
2456     case MEMMODEL_SYNC_SEQ_CST:
2457       switch (mode)
2458 	{
2459 	case QImode: icode = CODE_FOR_cmpxchg_rel_qi;  break;
2460 	case HImode: icode = CODE_FOR_cmpxchg_rel_hi;  break;
2461 	case SImode: icode = CODE_FOR_cmpxchg_rel_si;  break;
2462 	case DImode: icode = CODE_FOR_cmpxchg_rel_di;  break;
2463 	default:
2464 	  gcc_unreachable ();
2465 	}
2466       break;
2467 
2468     default:
2469       gcc_unreachable ();
2470     }
2471 
2472   emit_insn (GEN_FCN (icode) (cmp_reg, mem, ar_ccv, new_reg));
2473 
2474   emit_cmp_and_jump_insns (cmp_reg, old_reg, NE, NULL, DImode, true, label);
2475 }
2476 
2477 /* Begin the assembly file.  */
2478 
2479 static void
ia64_file_start(void)2480 ia64_file_start (void)
2481 {
2482   default_file_start ();
2483   emit_safe_across_calls ();
2484 }
2485 
2486 void
emit_safe_across_calls(void)2487 emit_safe_across_calls (void)
2488 {
2489   unsigned int rs, re;
2490   int out_state;
2491 
2492   rs = 1;
2493   out_state = 0;
2494   while (1)
2495     {
2496       while (rs < 64 && call_used_regs[PR_REG (rs)])
2497 	rs++;
2498       if (rs >= 64)
2499 	break;
2500       for (re = rs + 1; re < 64 && ! call_used_regs[PR_REG (re)]; re++)
2501 	continue;
2502       if (out_state == 0)
2503 	{
2504 	  fputs ("\t.pred.safe_across_calls ", asm_out_file);
2505 	  out_state = 1;
2506 	}
2507       else
2508 	fputc (',', asm_out_file);
2509       if (re == rs + 1)
2510 	fprintf (asm_out_file, "p%u", rs);
2511       else
2512 	fprintf (asm_out_file, "p%u-p%u", rs, re - 1);
2513       rs = re + 1;
2514     }
2515   if (out_state)
2516     fputc ('\n', asm_out_file);
2517 }
2518 
2519 /* Globalize a declaration.  */
2520 
2521 static void
ia64_globalize_decl_name(FILE * stream,tree decl)2522 ia64_globalize_decl_name (FILE * stream, tree decl)
2523 {
2524   const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0);
2525   tree version_attr = lookup_attribute ("version_id", DECL_ATTRIBUTES (decl));
2526   if (version_attr)
2527     {
2528       tree v = TREE_VALUE (TREE_VALUE (version_attr));
2529       const char *p = TREE_STRING_POINTER (v);
2530       fprintf (stream, "\t.alias %s#, \"%s{%s}\"\n", name, name, p);
2531     }
2532   targetm.asm_out.globalize_label (stream, name);
2533   if (TREE_CODE (decl) == FUNCTION_DECL)
2534     ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "function");
2535 }
2536 
2537 /* Helper function for ia64_compute_frame_size: find an appropriate general
2538    register to spill some special register to.  SPECIAL_SPILL_MASK contains
2539    bits in GR0 to GR31 that have already been allocated by this routine.
2540    TRY_LOCALS is true if we should attempt to locate a local regnum.  */
2541 
2542 static int
find_gr_spill(enum ia64_frame_regs r,int try_locals)2543 find_gr_spill (enum ia64_frame_regs r, int try_locals)
2544 {
2545   int regno;
2546 
2547   if (emitted_frame_related_regs[r] != 0)
2548     {
2549       regno = emitted_frame_related_regs[r];
2550       if (regno >= LOC_REG (0) && regno < LOC_REG (80 - frame_pointer_needed)
2551 	  && current_frame_info.n_local_regs < regno - LOC_REG (0) + 1)
2552         current_frame_info.n_local_regs = regno - LOC_REG (0) + 1;
2553       else if (crtl->is_leaf
2554                && regno >= GR_REG (1) && regno <= GR_REG (31))
2555         current_frame_info.gr_used_mask |= 1 << regno;
2556 
2557       return regno;
2558     }
2559 
2560   /* If this is a leaf function, first try an otherwise unused
2561      call-clobbered register.  */
2562   if (crtl->is_leaf)
2563     {
2564       for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
2565 	if (! df_regs_ever_live_p (regno)
2566 	    && call_used_regs[regno]
2567 	    && ! fixed_regs[regno]
2568 	    && ! global_regs[regno]
2569 	    && ((current_frame_info.gr_used_mask >> regno) & 1) == 0
2570             && ! is_emitted (regno))
2571 	  {
2572 	    current_frame_info.gr_used_mask |= 1 << regno;
2573 	    return regno;
2574 	  }
2575     }
2576 
2577   if (try_locals)
2578     {
2579       regno = current_frame_info.n_local_regs;
2580       /* If there is a frame pointer, then we can't use loc79, because
2581 	 that is HARD_FRAME_POINTER_REGNUM.  In particular, see the
2582 	 reg_name switching code in ia64_expand_prologue.  */
2583       while (regno < (80 - frame_pointer_needed))
2584 	if (! is_emitted (LOC_REG (regno++)))
2585 	  {
2586 	    current_frame_info.n_local_regs = regno;
2587 	    return LOC_REG (regno - 1);
2588 	  }
2589     }
2590 
2591   /* Failed to find a general register to spill to.  Must use stack.  */
2592   return 0;
2593 }
2594 
2595 /* In order to make for nice schedules, we try to allocate every temporary
2596    to a different register.  We must of course stay away from call-saved,
2597    fixed, and global registers.  We must also stay away from registers
2598    allocated in current_frame_info.gr_used_mask, since those include regs
2599    used all through the prologue.
2600 
2601    Any register allocated here must be used immediately.  The idea is to
2602    aid scheduling, not to solve data flow problems.  */
2603 
2604 static int last_scratch_gr_reg;
2605 
2606 static int
next_scratch_gr_reg(void)2607 next_scratch_gr_reg (void)
2608 {
2609   int i, regno;
2610 
2611   for (i = 0; i < 32; ++i)
2612     {
2613       regno = (last_scratch_gr_reg + i + 1) & 31;
2614       if (call_used_regs[regno]
2615 	  && ! fixed_regs[regno]
2616 	  && ! global_regs[regno]
2617 	  && ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
2618 	{
2619 	  last_scratch_gr_reg = regno;
2620 	  return regno;
2621 	}
2622     }
2623 
2624   /* There must be _something_ available.  */
2625   gcc_unreachable ();
2626 }
2627 
2628 /* Helper function for ia64_compute_frame_size, called through
2629    diddle_return_value.  Mark REG in current_frame_info.gr_used_mask.  */
2630 
2631 static void
mark_reg_gr_used_mask(rtx reg,void * data ATTRIBUTE_UNUSED)2632 mark_reg_gr_used_mask (rtx reg, void *data ATTRIBUTE_UNUSED)
2633 {
2634   unsigned int regno = REGNO (reg);
2635   if (regno < 32)
2636     {
2637       unsigned int i, n = hard_regno_nregs[regno][GET_MODE (reg)];
2638       for (i = 0; i < n; ++i)
2639 	current_frame_info.gr_used_mask |= 1 << (regno + i);
2640     }
2641 }
2642 
2643 
2644 /* Returns the number of bytes offset between the frame pointer and the stack
2645    pointer for the current function.  SIZE is the number of bytes of space
2646    needed for local variables.  */
2647 
2648 static void
ia64_compute_frame_size(HOST_WIDE_INT size)2649 ia64_compute_frame_size (HOST_WIDE_INT size)
2650 {
2651   HOST_WIDE_INT total_size;
2652   HOST_WIDE_INT spill_size = 0;
2653   HOST_WIDE_INT extra_spill_size = 0;
2654   HOST_WIDE_INT pretend_args_size;
2655   HARD_REG_SET mask;
2656   int n_spilled = 0;
2657   int spilled_gr_p = 0;
2658   int spilled_fr_p = 0;
2659   unsigned int regno;
2660   int min_regno;
2661   int max_regno;
2662   int i;
2663 
2664   if (current_frame_info.initialized)
2665     return;
2666 
2667   memset (&current_frame_info, 0, sizeof current_frame_info);
2668   CLEAR_HARD_REG_SET (mask);
2669 
2670   /* Don't allocate scratches to the return register.  */
2671   diddle_return_value (mark_reg_gr_used_mask, NULL);
2672 
2673   /* Don't allocate scratches to the EH scratch registers.  */
2674   if (cfun->machine->ia64_eh_epilogue_sp)
2675     mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_sp, NULL);
2676   if (cfun->machine->ia64_eh_epilogue_bsp)
2677     mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_bsp, NULL);
2678 
2679   /* Static stack checking uses r2 and r3.  */
2680   if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
2681     current_frame_info.gr_used_mask |= 0xc;
2682 
2683   /* Find the size of the register stack frame.  We have only 80 local
2684      registers, because we reserve 8 for the inputs and 8 for the
2685      outputs.  */
2686 
2687   /* Skip HARD_FRAME_POINTER_REGNUM (loc79) when frame_pointer_needed,
2688      since we'll be adjusting that down later.  */
2689   regno = LOC_REG (78) + ! frame_pointer_needed;
2690   for (; regno >= LOC_REG (0); regno--)
2691     if (df_regs_ever_live_p (regno) && !is_emitted (regno))
2692       break;
2693   current_frame_info.n_local_regs = regno - LOC_REG (0) + 1;
2694 
2695   /* For functions marked with the syscall_linkage attribute, we must mark
2696      all eight input registers as in use, so that locals aren't visible to
2697      the caller.  */
2698 
2699   if (cfun->machine->n_varargs > 0
2700       || lookup_attribute ("syscall_linkage",
2701 			   TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
2702     current_frame_info.n_input_regs = 8;
2703   else
2704     {
2705       for (regno = IN_REG (7); regno >= IN_REG (0); regno--)
2706 	if (df_regs_ever_live_p (regno))
2707 	  break;
2708       current_frame_info.n_input_regs = regno - IN_REG (0) + 1;
2709     }
2710 
2711   for (regno = OUT_REG (7); regno >= OUT_REG (0); regno--)
2712     if (df_regs_ever_live_p (regno))
2713       break;
2714   i = regno - OUT_REG (0) + 1;
2715 
2716 #ifndef PROFILE_HOOK
2717   /* When -p profiling, we need one output register for the mcount argument.
2718      Likewise for -a profiling for the bb_init_func argument.  For -ax
2719      profiling, we need two output registers for the two bb_init_trace_func
2720      arguments.  */
2721   if (crtl->profile)
2722     i = MAX (i, 1);
2723 #endif
2724   current_frame_info.n_output_regs = i;
2725 
2726   /* ??? No rotating register support yet.  */
2727   current_frame_info.n_rotate_regs = 0;
2728 
2729   /* Discover which registers need spilling, and how much room that
2730      will take.  Begin with floating point and general registers,
2731      which will always wind up on the stack.  */
2732 
2733   for (regno = FR_REG (2); regno <= FR_REG (127); regno++)
2734     if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2735       {
2736 	SET_HARD_REG_BIT (mask, regno);
2737 	spill_size += 16;
2738 	n_spilled += 1;
2739 	spilled_fr_p = 1;
2740       }
2741 
2742   for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
2743     if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2744       {
2745 	SET_HARD_REG_BIT (mask, regno);
2746 	spill_size += 8;
2747 	n_spilled += 1;
2748 	spilled_gr_p = 1;
2749       }
2750 
2751   for (regno = BR_REG (1); regno <= BR_REG (7); regno++)
2752     if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2753       {
2754 	SET_HARD_REG_BIT (mask, regno);
2755 	spill_size += 8;
2756 	n_spilled += 1;
2757       }
2758 
2759   /* Now come all special registers that might get saved in other
2760      general registers.  */
2761 
2762   if (frame_pointer_needed)
2763     {
2764       current_frame_info.r[reg_fp] = find_gr_spill (reg_fp, 1);
2765       /* If we did not get a register, then we take LOC79.  This is guaranteed
2766 	 to be free, even if regs_ever_live is already set, because this is
2767 	 HARD_FRAME_POINTER_REGNUM.  This requires incrementing n_local_regs,
2768 	 as we don't count loc79 above.  */
2769       if (current_frame_info.r[reg_fp] == 0)
2770 	{
2771 	  current_frame_info.r[reg_fp] = LOC_REG (79);
2772 	  current_frame_info.n_local_regs = LOC_REG (79) - LOC_REG (0) + 1;
2773 	}
2774     }
2775 
2776   if (! crtl->is_leaf)
2777     {
2778       /* Emit a save of BR0 if we call other functions.  Do this even
2779 	 if this function doesn't return, as EH depends on this to be
2780 	 able to unwind the stack.  */
2781       SET_HARD_REG_BIT (mask, BR_REG (0));
2782 
2783       current_frame_info.r[reg_save_b0] = find_gr_spill (reg_save_b0, 1);
2784       if (current_frame_info.r[reg_save_b0] == 0)
2785 	{
2786 	  extra_spill_size += 8;
2787 	  n_spilled += 1;
2788 	}
2789 
2790       /* Similarly for ar.pfs.  */
2791       SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
2792       current_frame_info.r[reg_save_ar_pfs] = find_gr_spill (reg_save_ar_pfs, 1);
2793       if (current_frame_info.r[reg_save_ar_pfs] == 0)
2794 	{
2795 	  extra_spill_size += 8;
2796 	  n_spilled += 1;
2797 	}
2798 
2799       /* Similarly for gp.  Note that if we're calling setjmp, the stacked
2800 	 registers are clobbered, so we fall back to the stack.  */
2801       current_frame_info.r[reg_save_gp]
2802 	= (cfun->calls_setjmp ? 0 : find_gr_spill (reg_save_gp, 1));
2803       if (current_frame_info.r[reg_save_gp] == 0)
2804 	{
2805 	  SET_HARD_REG_BIT (mask, GR_REG (1));
2806 	  spill_size += 8;
2807 	  n_spilled += 1;
2808 	}
2809     }
2810   else
2811     {
2812       if (df_regs_ever_live_p (BR_REG (0)) && ! call_used_regs[BR_REG (0)])
2813 	{
2814 	  SET_HARD_REG_BIT (mask, BR_REG (0));
2815 	  extra_spill_size += 8;
2816 	  n_spilled += 1;
2817 	}
2818 
2819       if (df_regs_ever_live_p (AR_PFS_REGNUM))
2820 	{
2821 	  SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
2822  	  current_frame_info.r[reg_save_ar_pfs]
2823             = find_gr_spill (reg_save_ar_pfs, 1);
2824 	  if (current_frame_info.r[reg_save_ar_pfs] == 0)
2825 	    {
2826 	      extra_spill_size += 8;
2827 	      n_spilled += 1;
2828 	    }
2829 	}
2830     }
2831 
2832   /* Unwind descriptor hackery: things are most efficient if we allocate
2833      consecutive GR save registers for RP, PFS, FP in that order. However,
2834      it is absolutely critical that FP get the only hard register that's
2835      guaranteed to be free, so we allocated it first.  If all three did
2836      happen to be allocated hard regs, and are consecutive, rearrange them
2837      into the preferred order now.
2838 
2839      If we have already emitted code for any of those registers,
2840      then it's already too late to change.  */
2841   min_regno = MIN (current_frame_info.r[reg_fp],
2842 		   MIN (current_frame_info.r[reg_save_b0],
2843 			current_frame_info.r[reg_save_ar_pfs]));
2844   max_regno = MAX (current_frame_info.r[reg_fp],
2845 		   MAX (current_frame_info.r[reg_save_b0],
2846 			current_frame_info.r[reg_save_ar_pfs]));
2847   if (min_regno > 0
2848       && min_regno + 2 == max_regno
2849       && (current_frame_info.r[reg_fp] == min_regno + 1
2850 	  || current_frame_info.r[reg_save_b0] == min_regno + 1
2851 	  || current_frame_info.r[reg_save_ar_pfs] == min_regno + 1)
2852       && (emitted_frame_related_regs[reg_save_b0] == 0
2853 	  || emitted_frame_related_regs[reg_save_b0] == min_regno)
2854       && (emitted_frame_related_regs[reg_save_ar_pfs] == 0
2855 	  || emitted_frame_related_regs[reg_save_ar_pfs] == min_regno + 1)
2856       && (emitted_frame_related_regs[reg_fp] == 0
2857 	  || emitted_frame_related_regs[reg_fp] == min_regno + 2))
2858     {
2859       current_frame_info.r[reg_save_b0] = min_regno;
2860       current_frame_info.r[reg_save_ar_pfs] = min_regno + 1;
2861       current_frame_info.r[reg_fp] = min_regno + 2;
2862     }
2863 
2864   /* See if we need to store the predicate register block.  */
2865   for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
2866     if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2867       break;
2868   if (regno <= PR_REG (63))
2869     {
2870       SET_HARD_REG_BIT (mask, PR_REG (0));
2871       current_frame_info.r[reg_save_pr] = find_gr_spill (reg_save_pr, 1);
2872       if (current_frame_info.r[reg_save_pr] == 0)
2873 	{
2874 	  extra_spill_size += 8;
2875 	  n_spilled += 1;
2876 	}
2877 
2878       /* ??? Mark them all as used so that register renaming and such
2879 	 are free to use them.  */
2880       for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
2881 	df_set_regs_ever_live (regno, true);
2882     }
2883 
2884   /* If we're forced to use st8.spill, we're forced to save and restore
2885      ar.unat as well.  The check for existing liveness allows inline asm
2886      to touch ar.unat.  */
2887   if (spilled_gr_p || cfun->machine->n_varargs
2888       || df_regs_ever_live_p (AR_UNAT_REGNUM))
2889     {
2890       df_set_regs_ever_live (AR_UNAT_REGNUM, true);
2891       SET_HARD_REG_BIT (mask, AR_UNAT_REGNUM);
2892       current_frame_info.r[reg_save_ar_unat]
2893         = find_gr_spill (reg_save_ar_unat, spill_size == 0);
2894       if (current_frame_info.r[reg_save_ar_unat] == 0)
2895 	{
2896 	  extra_spill_size += 8;
2897 	  n_spilled += 1;
2898 	}
2899     }
2900 
2901   if (df_regs_ever_live_p (AR_LC_REGNUM))
2902     {
2903       SET_HARD_REG_BIT (mask, AR_LC_REGNUM);
2904       current_frame_info.r[reg_save_ar_lc]
2905         = find_gr_spill (reg_save_ar_lc, spill_size == 0);
2906       if (current_frame_info.r[reg_save_ar_lc] == 0)
2907 	{
2908 	  extra_spill_size += 8;
2909 	  n_spilled += 1;
2910 	}
2911     }
2912 
2913   /* If we have an odd number of words of pretend arguments written to
2914      the stack, then the FR save area will be unaligned.  We round the
2915      size of this area up to keep things 16 byte aligned.  */
2916   if (spilled_fr_p)
2917     pretend_args_size = IA64_STACK_ALIGN (crtl->args.pretend_args_size);
2918   else
2919     pretend_args_size = crtl->args.pretend_args_size;
2920 
2921   total_size = (spill_size + extra_spill_size + size + pretend_args_size
2922 		+ crtl->outgoing_args_size);
2923   total_size = IA64_STACK_ALIGN (total_size);
2924 
2925   /* We always use the 16-byte scratch area provided by the caller, but
2926      if we are a leaf function, there's no one to which we need to provide
2927      a scratch area.  However, if the function allocates dynamic stack space,
2928      the dynamic offset is computed early and contains STACK_POINTER_OFFSET,
2929      so we need to cope.  */
2930   if (crtl->is_leaf && !cfun->calls_alloca)
2931     total_size = MAX (0, total_size - 16);
2932 
2933   current_frame_info.total_size = total_size;
2934   current_frame_info.spill_cfa_off = pretend_args_size - 16;
2935   current_frame_info.spill_size = spill_size;
2936   current_frame_info.extra_spill_size = extra_spill_size;
2937   COPY_HARD_REG_SET (current_frame_info.mask, mask);
2938   current_frame_info.n_spilled = n_spilled;
2939   current_frame_info.initialized = reload_completed;
2940 }
2941 
2942 /* Worker function for TARGET_CAN_ELIMINATE.  */
2943 
2944 bool
ia64_can_eliminate(const int from ATTRIBUTE_UNUSED,const int to)2945 ia64_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
2946 {
2947   return (to == BR_REG (0) ? crtl->is_leaf : true);
2948 }
2949 
2950 /* Compute the initial difference between the specified pair of registers.  */
2951 
2952 HOST_WIDE_INT
ia64_initial_elimination_offset(int from,int to)2953 ia64_initial_elimination_offset (int from, int to)
2954 {
2955   HOST_WIDE_INT offset;
2956 
2957   ia64_compute_frame_size (get_frame_size ());
2958   switch (from)
2959     {
2960     case FRAME_POINTER_REGNUM:
2961       switch (to)
2962 	{
2963 	case HARD_FRAME_POINTER_REGNUM:
2964 	  offset = -current_frame_info.total_size;
2965 	  if (!crtl->is_leaf || cfun->calls_alloca)
2966 	    offset += 16 + crtl->outgoing_args_size;
2967 	  break;
2968 
2969 	case STACK_POINTER_REGNUM:
2970 	  offset = 0;
2971 	  if (!crtl->is_leaf || cfun->calls_alloca)
2972 	    offset += 16 + crtl->outgoing_args_size;
2973 	  break;
2974 
2975 	default:
2976 	  gcc_unreachable ();
2977 	}
2978       break;
2979 
2980     case ARG_POINTER_REGNUM:
2981       /* Arguments start above the 16 byte save area, unless stdarg
2982 	 in which case we store through the 16 byte save area.  */
2983       switch (to)
2984 	{
2985 	case HARD_FRAME_POINTER_REGNUM:
2986 	  offset = 16 - crtl->args.pretend_args_size;
2987 	  break;
2988 
2989 	case STACK_POINTER_REGNUM:
2990 	  offset = (current_frame_info.total_size
2991 		    + 16 - crtl->args.pretend_args_size);
2992 	  break;
2993 
2994 	default:
2995 	  gcc_unreachable ();
2996 	}
2997       break;
2998 
2999     default:
3000       gcc_unreachable ();
3001     }
3002 
3003   return offset;
3004 }
3005 
3006 /* If there are more than a trivial number of register spills, we use
3007    two interleaved iterators so that we can get two memory references
3008    per insn group.
3009 
3010    In order to simplify things in the prologue and epilogue expanders,
3011    we use helper functions to fix up the memory references after the
3012    fact with the appropriate offsets to a POST_MODIFY memory mode.
3013    The following data structure tracks the state of the two iterators
3014    while insns are being emitted.  */
3015 
3016 struct spill_fill_data
3017 {
3018   rtx_insn *init_after;		/* point at which to emit initializations */
3019   rtx init_reg[2];		/* initial base register */
3020   rtx iter_reg[2];		/* the iterator registers */
3021   rtx *prev_addr[2];		/* address of last memory use */
3022   rtx_insn *prev_insn[2];	/* the insn corresponding to prev_addr */
3023   HOST_WIDE_INT prev_off[2];	/* last offset */
3024   int n_iter;			/* number of iterators in use */
3025   int next_iter;		/* next iterator to use */
3026   unsigned int save_gr_used_mask;
3027 };
3028 
3029 static struct spill_fill_data spill_fill_data;
3030 
3031 static void
setup_spill_pointers(int n_spills,rtx init_reg,HOST_WIDE_INT cfa_off)3032 setup_spill_pointers (int n_spills, rtx init_reg, HOST_WIDE_INT cfa_off)
3033 {
3034   int i;
3035 
3036   spill_fill_data.init_after = get_last_insn ();
3037   spill_fill_data.init_reg[0] = init_reg;
3038   spill_fill_data.init_reg[1] = init_reg;
3039   spill_fill_data.prev_addr[0] = NULL;
3040   spill_fill_data.prev_addr[1] = NULL;
3041   spill_fill_data.prev_insn[0] = NULL;
3042   spill_fill_data.prev_insn[1] = NULL;
3043   spill_fill_data.prev_off[0] = cfa_off;
3044   spill_fill_data.prev_off[1] = cfa_off;
3045   spill_fill_data.next_iter = 0;
3046   spill_fill_data.save_gr_used_mask = current_frame_info.gr_used_mask;
3047 
3048   spill_fill_data.n_iter = 1 + (n_spills > 2);
3049   for (i = 0; i < spill_fill_data.n_iter; ++i)
3050     {
3051       int regno = next_scratch_gr_reg ();
3052       spill_fill_data.iter_reg[i] = gen_rtx_REG (DImode, regno);
3053       current_frame_info.gr_used_mask |= 1 << regno;
3054     }
3055 }
3056 
3057 static void
finish_spill_pointers(void)3058 finish_spill_pointers (void)
3059 {
3060   current_frame_info.gr_used_mask = spill_fill_data.save_gr_used_mask;
3061 }
3062 
3063 static rtx
spill_restore_mem(rtx reg,HOST_WIDE_INT cfa_off)3064 spill_restore_mem (rtx reg, HOST_WIDE_INT cfa_off)
3065 {
3066   int iter = spill_fill_data.next_iter;
3067   HOST_WIDE_INT disp = spill_fill_data.prev_off[iter] - cfa_off;
3068   rtx disp_rtx = GEN_INT (disp);
3069   rtx mem;
3070 
3071   if (spill_fill_data.prev_addr[iter])
3072     {
3073       if (satisfies_constraint_N (disp_rtx))
3074 	{
3075 	  *spill_fill_data.prev_addr[iter]
3076 	    = gen_rtx_POST_MODIFY (DImode, spill_fill_data.iter_reg[iter],
3077 				   gen_rtx_PLUS (DImode,
3078 						 spill_fill_data.iter_reg[iter],
3079 						 disp_rtx));
3080 	  add_reg_note (spill_fill_data.prev_insn[iter],
3081 			REG_INC, spill_fill_data.iter_reg[iter]);
3082 	}
3083       else
3084 	{
3085 	  /* ??? Could use register post_modify for loads.  */
3086 	  if (!satisfies_constraint_I (disp_rtx))
3087 	    {
3088 	      rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
3089 	      emit_move_insn (tmp, disp_rtx);
3090 	      disp_rtx = tmp;
3091 	    }
3092 	  emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
3093 				 spill_fill_data.iter_reg[iter], disp_rtx));
3094 	}
3095     }
3096   /* Micro-optimization: if we've created a frame pointer, it's at
3097      CFA 0, which may allow the real iterator to be initialized lower,
3098      slightly increasing parallelism.  Also, if there are few saves
3099      it may eliminate the iterator entirely.  */
3100   else if (disp == 0
3101 	   && spill_fill_data.init_reg[iter] == stack_pointer_rtx
3102 	   && frame_pointer_needed)
3103     {
3104       mem = gen_rtx_MEM (GET_MODE (reg), hard_frame_pointer_rtx);
3105       set_mem_alias_set (mem, get_varargs_alias_set ());
3106       return mem;
3107     }
3108   else
3109     {
3110       rtx seq;
3111       rtx_insn *insn;
3112 
3113       if (disp == 0)
3114 	seq = gen_movdi (spill_fill_data.iter_reg[iter],
3115 			 spill_fill_data.init_reg[iter]);
3116       else
3117 	{
3118 	  start_sequence ();
3119 
3120 	  if (!satisfies_constraint_I (disp_rtx))
3121 	    {
3122 	      rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
3123 	      emit_move_insn (tmp, disp_rtx);
3124 	      disp_rtx = tmp;
3125 	    }
3126 
3127 	  emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
3128 				 spill_fill_data.init_reg[iter],
3129 				 disp_rtx));
3130 
3131 	  seq = get_insns ();
3132 	  end_sequence ();
3133 	}
3134 
3135       /* Careful for being the first insn in a sequence.  */
3136       if (spill_fill_data.init_after)
3137 	insn = emit_insn_after (seq, spill_fill_data.init_after);
3138       else
3139 	{
3140 	  rtx_insn *first = get_insns ();
3141 	  if (first)
3142 	    insn = emit_insn_before (seq, first);
3143 	  else
3144 	    insn = emit_insn (seq);
3145 	}
3146       spill_fill_data.init_after = insn;
3147     }
3148 
3149   mem = gen_rtx_MEM (GET_MODE (reg), spill_fill_data.iter_reg[iter]);
3150 
3151   /* ??? Not all of the spills are for varargs, but some of them are.
3152      The rest of the spills belong in an alias set of their own.  But
3153      it doesn't actually hurt to include them here.  */
3154   set_mem_alias_set (mem, get_varargs_alias_set ());
3155 
3156   spill_fill_data.prev_addr[iter] = &XEXP (mem, 0);
3157   spill_fill_data.prev_off[iter] = cfa_off;
3158 
3159   if (++iter >= spill_fill_data.n_iter)
3160     iter = 0;
3161   spill_fill_data.next_iter = iter;
3162 
3163   return mem;
3164 }
3165 
3166 static void
do_spill(rtx (* move_fn)(rtx,rtx,rtx),rtx reg,HOST_WIDE_INT cfa_off,rtx frame_reg)3167 do_spill (rtx (*move_fn) (rtx, rtx, rtx), rtx reg, HOST_WIDE_INT cfa_off,
3168 	  rtx frame_reg)
3169 {
3170   int iter = spill_fill_data.next_iter;
3171   rtx mem;
3172   rtx_insn *insn;
3173 
3174   mem = spill_restore_mem (reg, cfa_off);
3175   insn = emit_insn ((*move_fn) (mem, reg, GEN_INT (cfa_off)));
3176   spill_fill_data.prev_insn[iter] = insn;
3177 
3178   if (frame_reg)
3179     {
3180       rtx base;
3181       HOST_WIDE_INT off;
3182 
3183       RTX_FRAME_RELATED_P (insn) = 1;
3184 
3185       /* Don't even pretend that the unwind code can intuit its way
3186 	 through a pair of interleaved post_modify iterators.  Just
3187 	 provide the correct answer.  */
3188 
3189       if (frame_pointer_needed)
3190 	{
3191 	  base = hard_frame_pointer_rtx;
3192 	  off = - cfa_off;
3193 	}
3194       else
3195 	{
3196 	  base = stack_pointer_rtx;
3197 	  off = current_frame_info.total_size - cfa_off;
3198 	}
3199 
3200       add_reg_note (insn, REG_CFA_OFFSET,
3201 		    gen_rtx_SET (gen_rtx_MEM (GET_MODE (reg),
3202 					      plus_constant (Pmode,
3203 							     base, off)),
3204 				 frame_reg));
3205     }
3206 }
3207 
3208 static void
do_restore(rtx (* move_fn)(rtx,rtx,rtx),rtx reg,HOST_WIDE_INT cfa_off)3209 do_restore (rtx (*move_fn) (rtx, rtx, rtx), rtx reg, HOST_WIDE_INT cfa_off)
3210 {
3211   int iter = spill_fill_data.next_iter;
3212   rtx_insn *insn;
3213 
3214   insn = emit_insn ((*move_fn) (reg, spill_restore_mem (reg, cfa_off),
3215 				GEN_INT (cfa_off)));
3216   spill_fill_data.prev_insn[iter] = insn;
3217 }
3218 
3219 /* Wrapper functions that discards the CONST_INT spill offset.  These
3220    exist so that we can give gr_spill/gr_fill the offset they need and
3221    use a consistent function interface.  */
3222 
3223 static rtx
gen_movdi_x(rtx dest,rtx src,rtx offset ATTRIBUTE_UNUSED)3224 gen_movdi_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
3225 {
3226   return gen_movdi (dest, src);
3227 }
3228 
3229 static rtx
gen_fr_spill_x(rtx dest,rtx src,rtx offset ATTRIBUTE_UNUSED)3230 gen_fr_spill_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
3231 {
3232   return gen_fr_spill (dest, src);
3233 }
3234 
3235 static rtx
gen_fr_restore_x(rtx dest,rtx src,rtx offset ATTRIBUTE_UNUSED)3236 gen_fr_restore_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
3237 {
3238   return gen_fr_restore (dest, src);
3239 }
3240 
3241 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
3242 
3243 /* See Table 6.2 of the IA-64 Software Developer Manual, Volume 2.  */
3244 #define BACKING_STORE_SIZE(N) ((N) > 0 ? ((N) + (N)/63 + 1) * 8 : 0)
3245 
3246 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
3247    inclusive.  These are offsets from the current stack pointer.  BS_SIZE
3248    is the size of the backing store.  ??? This clobbers r2 and r3.  */
3249 
3250 static void
ia64_emit_probe_stack_range(HOST_WIDE_INT first,HOST_WIDE_INT size,int bs_size)3251 ia64_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size,
3252 			     int bs_size)
3253 {
3254   rtx r2 = gen_rtx_REG (Pmode, GR_REG (2));
3255   rtx r3 = gen_rtx_REG (Pmode, GR_REG (3));
3256   rtx p6 = gen_rtx_REG (BImode, PR_REG (6));
3257 
3258   /* On the IA-64 there is a second stack in memory, namely the Backing Store
3259      of the Register Stack Engine.  We also need to probe it after checking
3260      that the 2 stacks don't overlap.  */
3261   emit_insn (gen_bsp_value (r3));
3262   emit_move_insn (r2, GEN_INT (-(first + size)));
3263 
3264   /* Compare current value of BSP and SP registers.  */
3265   emit_insn (gen_rtx_SET (p6, gen_rtx_fmt_ee (LTU, BImode,
3266 					      r3, stack_pointer_rtx)));
3267 
3268   /* Compute the address of the probe for the Backing Store (which grows
3269      towards higher addresses).  We probe only at the first offset of
3270      the next page because some OS (eg Linux/ia64) only extend the
3271      backing store when this specific address is hit (but generate a SEGV
3272      on other address).  Page size is the worst case (4KB).  The reserve
3273      size is at least 4096 - (96 + 2) * 8 = 3312 bytes, which is enough.
3274      Also compute the address of the last probe for the memory stack
3275      (which grows towards lower addresses).  */
3276   emit_insn (gen_rtx_SET (r3, plus_constant (Pmode, r3, 4095)));
3277   emit_insn (gen_rtx_SET (r2, gen_rtx_PLUS (Pmode, stack_pointer_rtx, r2)));
3278 
3279   /* Compare them and raise SEGV if the former has topped the latter.  */
3280   emit_insn (gen_rtx_COND_EXEC (VOIDmode,
3281 				gen_rtx_fmt_ee (NE, VOIDmode, p6, const0_rtx),
3282 				gen_rtx_SET (p6, gen_rtx_fmt_ee (GEU, BImode,
3283 								 r3, r2))));
3284   emit_insn (gen_rtx_SET (gen_rtx_ZERO_EXTRACT (DImode, r3, GEN_INT (12),
3285 						const0_rtx),
3286 			  const0_rtx));
3287   emit_insn (gen_rtx_COND_EXEC (VOIDmode,
3288 				gen_rtx_fmt_ee (NE, VOIDmode, p6, const0_rtx),
3289 				gen_rtx_TRAP_IF (VOIDmode, const1_rtx,
3290 						 GEN_INT (11))));
3291 
3292   /* Probe the Backing Store if necessary.  */
3293   if (bs_size > 0)
3294     emit_stack_probe (r3);
3295 
3296   /* Probe the memory stack if necessary.  */
3297   if (size == 0)
3298     ;
3299 
3300   /* See if we have a constant small number of probes to generate.  If so,
3301      that's the easy case.  */
3302   else if (size <= PROBE_INTERVAL)
3303     emit_stack_probe (r2);
3304 
3305   /* The run-time loop is made up of 9 insns in the generic case while this
3306      compile-time loop is made up of 5+2*(n-2) insns for n # of intervals.  */
3307   else if (size <= 4 * PROBE_INTERVAL)
3308     {
3309       HOST_WIDE_INT i;
3310 
3311       emit_move_insn (r2, GEN_INT (-(first + PROBE_INTERVAL)));
3312       emit_insn (gen_rtx_SET (r2,
3313 			      gen_rtx_PLUS (Pmode, stack_pointer_rtx, r2)));
3314       emit_stack_probe (r2);
3315 
3316       /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
3317 	 it exceeds SIZE.  If only two probes are needed, this will not
3318 	 generate any code.  Then probe at FIRST + SIZE.  */
3319       for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
3320 	{
3321 	  emit_insn (gen_rtx_SET (r2,
3322 				  plus_constant (Pmode, r2, -PROBE_INTERVAL)));
3323 	  emit_stack_probe (r2);
3324 	}
3325 
3326       emit_insn (gen_rtx_SET (r2,
3327 			      plus_constant (Pmode, r2,
3328 					     (i - PROBE_INTERVAL) - size)));
3329       emit_stack_probe (r2);
3330     }
3331 
3332   /* Otherwise, do the same as above, but in a loop.  Note that we must be
3333      extra careful with variables wrapping around because we might be at
3334      the very top (or the very bottom) of the address space and we have
3335      to be able to handle this case properly; in particular, we use an
3336      equality test for the loop condition.  */
3337   else
3338     {
3339       HOST_WIDE_INT rounded_size;
3340 
3341       emit_move_insn (r2, GEN_INT (-first));
3342 
3343 
3344       /* Step 1: round SIZE to the previous multiple of the interval.  */
3345 
3346       rounded_size = size & -PROBE_INTERVAL;
3347 
3348 
3349       /* Step 2: compute initial and final value of the loop counter.  */
3350 
3351       /* TEST_ADDR = SP + FIRST.  */
3352       emit_insn (gen_rtx_SET (r2,
3353 			      gen_rtx_PLUS (Pmode, stack_pointer_rtx, r2)));
3354 
3355       /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE.  */
3356       if (rounded_size > (1 << 21))
3357 	{
3358 	  emit_move_insn (r3, GEN_INT (-rounded_size));
3359 	  emit_insn (gen_rtx_SET (r3, gen_rtx_PLUS (Pmode, r2, r3)));
3360 	}
3361       else
3362         emit_insn (gen_rtx_SET (r3, gen_rtx_PLUS (Pmode, r2,
3363 						  GEN_INT (-rounded_size))));
3364 
3365 
3366       /* Step 3: the loop
3367 
3368 	 do
3369 	   {
3370 	     TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
3371 	     probe at TEST_ADDR
3372 	   }
3373 	 while (TEST_ADDR != LAST_ADDR)
3374 
3375 	 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
3376 	 until it is equal to ROUNDED_SIZE.  */
3377 
3378       emit_insn (gen_probe_stack_range (r2, r2, r3));
3379 
3380 
3381       /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
3382 	 that SIZE is equal to ROUNDED_SIZE.  */
3383 
3384       /* TEMP = SIZE - ROUNDED_SIZE.  */
3385       if (size != rounded_size)
3386 	{
3387 	  emit_insn (gen_rtx_SET (r2, plus_constant (Pmode, r2,
3388 						     rounded_size - size)));
3389 	  emit_stack_probe (r2);
3390 	}
3391     }
3392 
3393   /* Make sure nothing is scheduled before we are done.  */
3394   emit_insn (gen_blockage ());
3395 }
3396 
3397 /* Probe a range of stack addresses from REG1 to REG2 inclusive.  These are
3398    absolute addresses.  */
3399 
3400 const char *
output_probe_stack_range(rtx reg1,rtx reg2)3401 output_probe_stack_range (rtx reg1, rtx reg2)
3402 {
3403   static int labelno = 0;
3404   char loop_lab[32];
3405   rtx xops[3];
3406 
3407   ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
3408 
3409   /* Loop.  */
3410   ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
3411 
3412   /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL.  */
3413   xops[0] = reg1;
3414   xops[1] = GEN_INT (-PROBE_INTERVAL);
3415   output_asm_insn ("addl %0 = %1, %0", xops);
3416   fputs ("\t;;\n", asm_out_file);
3417 
3418   /* Probe at TEST_ADDR.  */
3419   output_asm_insn ("probe.w.fault %0, 0", xops);
3420 
3421   /* Test if TEST_ADDR == LAST_ADDR.  */
3422   xops[1] = reg2;
3423   xops[2] = gen_rtx_REG (BImode, PR_REG (6));
3424   output_asm_insn ("cmp.eq %2, %I2 = %0, %1", xops);
3425 
3426   /* Branch.  */
3427   fprintf (asm_out_file, "\t(%s) br.cond.dpnt ", reg_names [PR_REG (7)]);
3428   assemble_name_raw (asm_out_file, loop_lab);
3429   fputc ('\n', asm_out_file);
3430 
3431   return "";
3432 }
3433 
3434 /* Called after register allocation to add any instructions needed for the
3435    prologue.  Using a prologue insn is favored compared to putting all of the
3436    instructions in output_function_prologue(), since it allows the scheduler
3437    to intermix instructions with the saves of the caller saved registers.  In
3438    some cases, it might be necessary to emit a barrier instruction as the last
3439    insn to prevent such scheduling.
3440 
3441    Also any insns generated here should have RTX_FRAME_RELATED_P(insn) = 1
3442    so that the debug info generation code can handle them properly.
3443 
3444    The register save area is laid out like so:
3445    cfa+16
3446 	[ varargs spill area ]
3447 	[ fr register spill area ]
3448 	[ br register spill area ]
3449 	[ ar register spill area ]
3450 	[ pr register spill area ]
3451 	[ gr register spill area ] */
3452 
3453 /* ??? Get inefficient code when the frame size is larger than can fit in an
3454    adds instruction.  */
3455 
3456 void
ia64_expand_prologue(void)3457 ia64_expand_prologue (void)
3458 {
3459   rtx_insn *insn;
3460   rtx ar_pfs_save_reg, ar_unat_save_reg;
3461   int i, epilogue_p, regno, alt_regno, cfa_off, n_varargs;
3462   rtx reg, alt_reg;
3463 
3464   ia64_compute_frame_size (get_frame_size ());
3465   last_scratch_gr_reg = 15;
3466 
3467   if (flag_stack_usage_info)
3468     current_function_static_stack_size = current_frame_info.total_size;
3469 
3470   if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
3471     {
3472       HOST_WIDE_INT size = current_frame_info.total_size;
3473       int bs_size = BACKING_STORE_SIZE (current_frame_info.n_input_regs
3474 					  + current_frame_info.n_local_regs);
3475 
3476       if (crtl->is_leaf && !cfun->calls_alloca)
3477 	{
3478 	  if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
3479 	    ia64_emit_probe_stack_range (STACK_CHECK_PROTECT,
3480 					 size - STACK_CHECK_PROTECT,
3481 					 bs_size);
3482 	  else if (size + bs_size > STACK_CHECK_PROTECT)
3483 	    ia64_emit_probe_stack_range (STACK_CHECK_PROTECT, 0, bs_size);
3484 	}
3485       else if (size + bs_size > 0)
3486 	ia64_emit_probe_stack_range (STACK_CHECK_PROTECT, size, bs_size);
3487     }
3488 
3489   if (dump_file)
3490     {
3491       fprintf (dump_file, "ia64 frame related registers "
3492                "recorded in current_frame_info.r[]:\n");
3493 #define PRINTREG(a) if (current_frame_info.r[a]) \
3494         fprintf(dump_file, "%s = %d\n", #a, current_frame_info.r[a])
3495       PRINTREG(reg_fp);
3496       PRINTREG(reg_save_b0);
3497       PRINTREG(reg_save_pr);
3498       PRINTREG(reg_save_ar_pfs);
3499       PRINTREG(reg_save_ar_unat);
3500       PRINTREG(reg_save_ar_lc);
3501       PRINTREG(reg_save_gp);
3502 #undef PRINTREG
3503     }
3504 
3505   /* If there is no epilogue, then we don't need some prologue insns.
3506      We need to avoid emitting the dead prologue insns, because flow
3507      will complain about them.  */
3508   if (optimize)
3509     {
3510       edge e;
3511       edge_iterator ei;
3512 
3513       FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
3514 	if ((e->flags & EDGE_FAKE) == 0
3515 	    && (e->flags & EDGE_FALLTHRU) != 0)
3516 	  break;
3517       epilogue_p = (e != NULL);
3518     }
3519   else
3520     epilogue_p = 1;
3521 
3522   /* Set the local, input, and output register names.  We need to do this
3523      for GNU libc, which creates crti.S/crtn.S by splitting initfini.c in
3524      half.  If we use in/loc/out register names, then we get assembler errors
3525      in crtn.S because there is no alloc insn or regstk directive in there.  */
3526   if (! TARGET_REG_NAMES)
3527     {
3528       int inputs = current_frame_info.n_input_regs;
3529       int locals = current_frame_info.n_local_regs;
3530       int outputs = current_frame_info.n_output_regs;
3531 
3532       for (i = 0; i < inputs; i++)
3533 	reg_names[IN_REG (i)] = ia64_reg_numbers[i];
3534       for (i = 0; i < locals; i++)
3535 	reg_names[LOC_REG (i)] = ia64_reg_numbers[inputs + i];
3536       for (i = 0; i < outputs; i++)
3537 	reg_names[OUT_REG (i)] = ia64_reg_numbers[inputs + locals + i];
3538     }
3539 
3540   /* Set the frame pointer register name.  The regnum is logically loc79,
3541      but of course we'll not have allocated that many locals.  Rather than
3542      worrying about renumbering the existing rtxs, we adjust the name.  */
3543   /* ??? This code means that we can never use one local register when
3544      there is a frame pointer.  loc79 gets wasted in this case, as it is
3545      renamed to a register that will never be used.  See also the try_locals
3546      code in find_gr_spill.  */
3547   if (current_frame_info.r[reg_fp])
3548     {
3549       const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
3550       reg_names[HARD_FRAME_POINTER_REGNUM]
3551 	= reg_names[current_frame_info.r[reg_fp]];
3552       reg_names[current_frame_info.r[reg_fp]] = tmp;
3553     }
3554 
3555   /* We don't need an alloc instruction if we've used no outputs or locals.  */
3556   if (current_frame_info.n_local_regs == 0
3557       && current_frame_info.n_output_regs == 0
3558       && current_frame_info.n_input_regs <= crtl->args.info.int_regs
3559       && !TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM))
3560     {
3561       /* If there is no alloc, but there are input registers used, then we
3562 	 need a .regstk directive.  */
3563       current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
3564       ar_pfs_save_reg = NULL_RTX;
3565     }
3566   else
3567     {
3568       current_frame_info.need_regstk = 0;
3569 
3570       if (current_frame_info.r[reg_save_ar_pfs])
3571         {
3572 	  regno = current_frame_info.r[reg_save_ar_pfs];
3573 	  reg_emitted (reg_save_ar_pfs);
3574 	}
3575       else
3576 	regno = next_scratch_gr_reg ();
3577       ar_pfs_save_reg = gen_rtx_REG (DImode, regno);
3578 
3579       insn = emit_insn (gen_alloc (ar_pfs_save_reg,
3580 				   GEN_INT (current_frame_info.n_input_regs),
3581 				   GEN_INT (current_frame_info.n_local_regs),
3582 				   GEN_INT (current_frame_info.n_output_regs),
3583 				   GEN_INT (current_frame_info.n_rotate_regs)));
3584       if (current_frame_info.r[reg_save_ar_pfs])
3585 	{
3586 	  RTX_FRAME_RELATED_P (insn) = 1;
3587 	  add_reg_note (insn, REG_CFA_REGISTER,
3588 			gen_rtx_SET (ar_pfs_save_reg,
3589 				     gen_rtx_REG (DImode, AR_PFS_REGNUM)));
3590 	}
3591     }
3592 
3593   /* Set up frame pointer, stack pointer, and spill iterators.  */
3594 
3595   n_varargs = cfun->machine->n_varargs;
3596   setup_spill_pointers (current_frame_info.n_spilled + n_varargs,
3597 			stack_pointer_rtx, 0);
3598 
3599   if (frame_pointer_needed)
3600     {
3601       insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
3602       RTX_FRAME_RELATED_P (insn) = 1;
3603 
3604       /* Force the unwind info to recognize this as defining a new CFA,
3605 	 rather than some temp register setup.  */
3606       add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL_RTX);
3607     }
3608 
3609   if (current_frame_info.total_size != 0)
3610     {
3611       rtx frame_size_rtx = GEN_INT (- current_frame_info.total_size);
3612       rtx offset;
3613 
3614       if (satisfies_constraint_I (frame_size_rtx))
3615 	offset = frame_size_rtx;
3616       else
3617 	{
3618 	  regno = next_scratch_gr_reg ();
3619 	  offset = gen_rtx_REG (DImode, regno);
3620 	  emit_move_insn (offset, frame_size_rtx);
3621 	}
3622 
3623       insn = emit_insn (gen_adddi3 (stack_pointer_rtx,
3624 				    stack_pointer_rtx, offset));
3625 
3626       if (! frame_pointer_needed)
3627 	{
3628 	  RTX_FRAME_RELATED_P (insn) = 1;
3629 	  add_reg_note (insn, REG_CFA_ADJUST_CFA,
3630 			gen_rtx_SET (stack_pointer_rtx,
3631 				     gen_rtx_PLUS (DImode,
3632 						   stack_pointer_rtx,
3633 						   frame_size_rtx)));
3634 	}
3635 
3636       /* ??? At this point we must generate a magic insn that appears to
3637 	 modify the stack pointer, the frame pointer, and all spill
3638 	 iterators.  This would allow the most scheduling freedom.  For
3639 	 now, just hard stop.  */
3640       emit_insn (gen_blockage ());
3641     }
3642 
3643   /* Must copy out ar.unat before doing any integer spills.  */
3644   if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
3645     {
3646       if (current_frame_info.r[reg_save_ar_unat])
3647         {
3648 	  ar_unat_save_reg
3649 	    = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_unat]);
3650 	  reg_emitted (reg_save_ar_unat);
3651 	}
3652       else
3653 	{
3654 	  alt_regno = next_scratch_gr_reg ();
3655 	  ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
3656 	  current_frame_info.gr_used_mask |= 1 << alt_regno;
3657 	}
3658 
3659       reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
3660       insn = emit_move_insn (ar_unat_save_reg, reg);
3661       if (current_frame_info.r[reg_save_ar_unat])
3662 	{
3663 	  RTX_FRAME_RELATED_P (insn) = 1;
3664 	  add_reg_note (insn, REG_CFA_REGISTER, NULL_RTX);
3665 	}
3666 
3667       /* Even if we're not going to generate an epilogue, we still
3668 	 need to save the register so that EH works.  */
3669       if (! epilogue_p && current_frame_info.r[reg_save_ar_unat])
3670 	emit_insn (gen_prologue_use (ar_unat_save_reg));
3671     }
3672   else
3673     ar_unat_save_reg = NULL_RTX;
3674 
3675   /* Spill all varargs registers.  Do this before spilling any GR registers,
3676      since we want the UNAT bits for the GR registers to override the UNAT
3677      bits from varargs, which we don't care about.  */
3678 
3679   cfa_off = -16;
3680   for (regno = GR_ARG_FIRST + 7; n_varargs > 0; --n_varargs, --regno)
3681     {
3682       reg = gen_rtx_REG (DImode, regno);
3683       do_spill (gen_gr_spill, reg, cfa_off += 8, NULL_RTX);
3684     }
3685 
3686   /* Locate the bottom of the register save area.  */
3687   cfa_off = (current_frame_info.spill_cfa_off
3688 	     + current_frame_info.spill_size
3689 	     + current_frame_info.extra_spill_size);
3690 
3691   /* Save the predicate register block either in a register or in memory.  */
3692   if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
3693     {
3694       reg = gen_rtx_REG (DImode, PR_REG (0));
3695       if (current_frame_info.r[reg_save_pr] != 0)
3696 	{
3697 	  alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_pr]);
3698 	  reg_emitted (reg_save_pr);
3699 	  insn = emit_move_insn (alt_reg, reg);
3700 
3701 	  /* ??? Denote pr spill/fill by a DImode move that modifies all
3702 	     64 hard registers.  */
3703 	  RTX_FRAME_RELATED_P (insn) = 1;
3704 	  add_reg_note (insn, REG_CFA_REGISTER, NULL_RTX);
3705 
3706 	  /* Even if we're not going to generate an epilogue, we still
3707 	     need to save the register so that EH works.  */
3708 	  if (! epilogue_p)
3709 	    emit_insn (gen_prologue_use (alt_reg));
3710 	}
3711       else
3712 	{
3713 	  alt_regno = next_scratch_gr_reg ();
3714 	  alt_reg = gen_rtx_REG (DImode, alt_regno);
3715 	  insn = emit_move_insn (alt_reg, reg);
3716 	  do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
3717 	  cfa_off -= 8;
3718 	}
3719     }
3720 
3721   /* Handle AR regs in numerical order.  All of them get special handling.  */
3722   if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM)
3723       && current_frame_info.r[reg_save_ar_unat] == 0)
3724     {
3725       reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
3726       do_spill (gen_movdi_x, ar_unat_save_reg, cfa_off, reg);
3727       cfa_off -= 8;
3728     }
3729 
3730   /* The alloc insn already copied ar.pfs into a general register.  The
3731      only thing we have to do now is copy that register to a stack slot
3732      if we'd not allocated a local register for the job.  */
3733   if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM)
3734       && current_frame_info.r[reg_save_ar_pfs] == 0)
3735     {
3736       reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
3737       do_spill (gen_movdi_x, ar_pfs_save_reg, cfa_off, reg);
3738       cfa_off -= 8;
3739     }
3740 
3741   if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
3742     {
3743       reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
3744       if (current_frame_info.r[reg_save_ar_lc] != 0)
3745 	{
3746 	  alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_lc]);
3747 	  reg_emitted (reg_save_ar_lc);
3748 	  insn = emit_move_insn (alt_reg, reg);
3749 	  RTX_FRAME_RELATED_P (insn) = 1;
3750 	  add_reg_note (insn, REG_CFA_REGISTER, NULL_RTX);
3751 
3752 	  /* Even if we're not going to generate an epilogue, we still
3753 	     need to save the register so that EH works.  */
3754 	  if (! epilogue_p)
3755 	    emit_insn (gen_prologue_use (alt_reg));
3756 	}
3757       else
3758 	{
3759 	  alt_regno = next_scratch_gr_reg ();
3760 	  alt_reg = gen_rtx_REG (DImode, alt_regno);
3761 	  emit_move_insn (alt_reg, reg);
3762 	  do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
3763 	  cfa_off -= 8;
3764 	}
3765     }
3766 
3767   /* Save the return pointer.  */
3768   if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
3769     {
3770       reg = gen_rtx_REG (DImode, BR_REG (0));
3771       if (current_frame_info.r[reg_save_b0] != 0)
3772 	{
3773           alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_b0]);
3774           reg_emitted (reg_save_b0);
3775 	  insn = emit_move_insn (alt_reg, reg);
3776 	  RTX_FRAME_RELATED_P (insn) = 1;
3777 	  add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (alt_reg, pc_rtx));
3778 
3779 	  /* Even if we're not going to generate an epilogue, we still
3780 	     need to save the register so that EH works.  */
3781 	  if (! epilogue_p)
3782 	    emit_insn (gen_prologue_use (alt_reg));
3783 	}
3784       else
3785 	{
3786 	  alt_regno = next_scratch_gr_reg ();
3787 	  alt_reg = gen_rtx_REG (DImode, alt_regno);
3788 	  emit_move_insn (alt_reg, reg);
3789 	  do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
3790 	  cfa_off -= 8;
3791 	}
3792     }
3793 
3794   if (current_frame_info.r[reg_save_gp])
3795     {
3796       reg_emitted (reg_save_gp);
3797       insn = emit_move_insn (gen_rtx_REG (DImode,
3798 					  current_frame_info.r[reg_save_gp]),
3799 			     pic_offset_table_rtx);
3800     }
3801 
3802   /* We should now be at the base of the gr/br/fr spill area.  */
3803   gcc_assert (cfa_off == (current_frame_info.spill_cfa_off
3804 			  + current_frame_info.spill_size));
3805 
3806   /* Spill all general registers.  */
3807   for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
3808     if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3809       {
3810 	reg = gen_rtx_REG (DImode, regno);
3811 	do_spill (gen_gr_spill, reg, cfa_off, reg);
3812 	cfa_off -= 8;
3813       }
3814 
3815   /* Spill the rest of the BR registers.  */
3816   for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
3817     if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3818       {
3819 	alt_regno = next_scratch_gr_reg ();
3820 	alt_reg = gen_rtx_REG (DImode, alt_regno);
3821 	reg = gen_rtx_REG (DImode, regno);
3822 	emit_move_insn (alt_reg, reg);
3823 	do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
3824 	cfa_off -= 8;
3825       }
3826 
3827   /* Align the frame and spill all FR registers.  */
3828   for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
3829     if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3830       {
3831         gcc_assert (!(cfa_off & 15));
3832 	reg = gen_rtx_REG (XFmode, regno);
3833 	do_spill (gen_fr_spill_x, reg, cfa_off, reg);
3834 	cfa_off -= 16;
3835       }
3836 
3837   gcc_assert (cfa_off == current_frame_info.spill_cfa_off);
3838 
3839   finish_spill_pointers ();
3840 }
3841 
3842 /* Output the textual info surrounding the prologue.  */
3843 
3844 void
ia64_start_function(FILE * file,const char * fnname,tree decl ATTRIBUTE_UNUSED)3845 ia64_start_function (FILE *file, const char *fnname,
3846 		     tree decl ATTRIBUTE_UNUSED)
3847 {
3848 #if TARGET_ABI_OPEN_VMS
3849   vms_start_function (fnname);
3850 #endif
3851 
3852   fputs ("\t.proc ", file);
3853   assemble_name (file, fnname);
3854   fputc ('\n', file);
3855   ASM_OUTPUT_LABEL (file, fnname);
3856 }
3857 
3858 /* Called after register allocation to add any instructions needed for the
3859    epilogue.  Using an epilogue insn is favored compared to putting all of the
3860    instructions in output_function_prologue(), since it allows the scheduler
3861    to intermix instructions with the saves of the caller saved registers.  In
3862    some cases, it might be necessary to emit a barrier instruction as the last
3863    insn to prevent such scheduling.  */
3864 
3865 void
ia64_expand_epilogue(int sibcall_p)3866 ia64_expand_epilogue (int sibcall_p)
3867 {
3868   rtx_insn *insn;
3869   rtx reg, alt_reg, ar_unat_save_reg;
3870   int regno, alt_regno, cfa_off;
3871 
3872   ia64_compute_frame_size (get_frame_size ());
3873 
3874   /* If there is a frame pointer, then we use it instead of the stack
3875      pointer, so that the stack pointer does not need to be valid when
3876      the epilogue starts.  See EXIT_IGNORE_STACK.  */
3877   if (frame_pointer_needed)
3878     setup_spill_pointers (current_frame_info.n_spilled,
3879 			  hard_frame_pointer_rtx, 0);
3880   else
3881     setup_spill_pointers (current_frame_info.n_spilled, stack_pointer_rtx,
3882 			  current_frame_info.total_size);
3883 
3884   if (current_frame_info.total_size != 0)
3885     {
3886       /* ??? At this point we must generate a magic insn that appears to
3887          modify the spill iterators and the frame pointer.  This would
3888 	 allow the most scheduling freedom.  For now, just hard stop.  */
3889       emit_insn (gen_blockage ());
3890     }
3891 
3892   /* Locate the bottom of the register save area.  */
3893   cfa_off = (current_frame_info.spill_cfa_off
3894 	     + current_frame_info.spill_size
3895 	     + current_frame_info.extra_spill_size);
3896 
3897   /* Restore the predicate registers.  */
3898   if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
3899     {
3900       if (current_frame_info.r[reg_save_pr] != 0)
3901         {
3902 	  alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_pr]);
3903 	  reg_emitted (reg_save_pr);
3904 	}
3905       else
3906 	{
3907 	  alt_regno = next_scratch_gr_reg ();
3908 	  alt_reg = gen_rtx_REG (DImode, alt_regno);
3909 	  do_restore (gen_movdi_x, alt_reg, cfa_off);
3910 	  cfa_off -= 8;
3911 	}
3912       reg = gen_rtx_REG (DImode, PR_REG (0));
3913       emit_move_insn (reg, alt_reg);
3914     }
3915 
3916   /* Restore the application registers.  */
3917 
3918   /* Load the saved unat from the stack, but do not restore it until
3919      after the GRs have been restored.  */
3920   if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
3921     {
3922       if (current_frame_info.r[reg_save_ar_unat] != 0)
3923         {
3924           ar_unat_save_reg
3925 	    = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_unat]);
3926 	  reg_emitted (reg_save_ar_unat);
3927 	}
3928       else
3929 	{
3930 	  alt_regno = next_scratch_gr_reg ();
3931 	  ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
3932 	  current_frame_info.gr_used_mask |= 1 << alt_regno;
3933 	  do_restore (gen_movdi_x, ar_unat_save_reg, cfa_off);
3934 	  cfa_off -= 8;
3935 	}
3936     }
3937   else
3938     ar_unat_save_reg = NULL_RTX;
3939 
3940   if (current_frame_info.r[reg_save_ar_pfs] != 0)
3941     {
3942       reg_emitted (reg_save_ar_pfs);
3943       alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_pfs]);
3944       reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
3945       emit_move_insn (reg, alt_reg);
3946     }
3947   else if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM))
3948     {
3949       alt_regno = next_scratch_gr_reg ();
3950       alt_reg = gen_rtx_REG (DImode, alt_regno);
3951       do_restore (gen_movdi_x, alt_reg, cfa_off);
3952       cfa_off -= 8;
3953       reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
3954       emit_move_insn (reg, alt_reg);
3955     }
3956 
3957   if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
3958     {
3959       if (current_frame_info.r[reg_save_ar_lc] != 0)
3960         {
3961 	  alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_lc]);
3962           reg_emitted (reg_save_ar_lc);
3963 	}
3964       else
3965 	{
3966 	  alt_regno = next_scratch_gr_reg ();
3967 	  alt_reg = gen_rtx_REG (DImode, alt_regno);
3968 	  do_restore (gen_movdi_x, alt_reg, cfa_off);
3969 	  cfa_off -= 8;
3970 	}
3971       reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
3972       emit_move_insn (reg, alt_reg);
3973     }
3974 
3975   /* Restore the return pointer.  */
3976   if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
3977     {
3978       if (current_frame_info.r[reg_save_b0] != 0)
3979         {
3980          alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_b0]);
3981          reg_emitted (reg_save_b0);
3982         }
3983       else
3984 	{
3985 	  alt_regno = next_scratch_gr_reg ();
3986 	  alt_reg = gen_rtx_REG (DImode, alt_regno);
3987 	  do_restore (gen_movdi_x, alt_reg, cfa_off);
3988 	  cfa_off -= 8;
3989 	}
3990       reg = gen_rtx_REG (DImode, BR_REG (0));
3991       emit_move_insn (reg, alt_reg);
3992     }
3993 
3994   /* We should now be at the base of the gr/br/fr spill area.  */
3995   gcc_assert (cfa_off == (current_frame_info.spill_cfa_off
3996 			  + current_frame_info.spill_size));
3997 
3998   /* The GP may be stored on the stack in the prologue, but it's
3999      never restored in the epilogue.  Skip the stack slot.  */
4000   if (TEST_HARD_REG_BIT (current_frame_info.mask, GR_REG (1)))
4001     cfa_off -= 8;
4002 
4003   /* Restore all general registers.  */
4004   for (regno = GR_REG (2); regno <= GR_REG (31); ++regno)
4005     if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
4006       {
4007 	reg = gen_rtx_REG (DImode, regno);
4008 	do_restore (gen_gr_restore, reg, cfa_off);
4009 	cfa_off -= 8;
4010       }
4011 
4012   /* Restore the branch registers.  */
4013   for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
4014     if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
4015       {
4016 	alt_regno = next_scratch_gr_reg ();
4017 	alt_reg = gen_rtx_REG (DImode, alt_regno);
4018 	do_restore (gen_movdi_x, alt_reg, cfa_off);
4019 	cfa_off -= 8;
4020 	reg = gen_rtx_REG (DImode, regno);
4021 	emit_move_insn (reg, alt_reg);
4022       }
4023 
4024   /* Restore floating point registers.  */
4025   for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
4026     if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
4027       {
4028         gcc_assert (!(cfa_off & 15));
4029 	reg = gen_rtx_REG (XFmode, regno);
4030 	do_restore (gen_fr_restore_x, reg, cfa_off);
4031 	cfa_off -= 16;
4032       }
4033 
4034   /* Restore ar.unat for real.  */
4035   if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
4036     {
4037       reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
4038       emit_move_insn (reg, ar_unat_save_reg);
4039     }
4040 
4041   gcc_assert (cfa_off == current_frame_info.spill_cfa_off);
4042 
4043   finish_spill_pointers ();
4044 
4045   if (current_frame_info.total_size
4046       || cfun->machine->ia64_eh_epilogue_sp
4047       || frame_pointer_needed)
4048     {
4049       /* ??? At this point we must generate a magic insn that appears to
4050          modify the spill iterators, the stack pointer, and the frame
4051 	 pointer.  This would allow the most scheduling freedom.  For now,
4052 	 just hard stop.  */
4053       emit_insn (gen_blockage ());
4054     }
4055 
4056   if (cfun->machine->ia64_eh_epilogue_sp)
4057     emit_move_insn (stack_pointer_rtx, cfun->machine->ia64_eh_epilogue_sp);
4058   else if (frame_pointer_needed)
4059     {
4060       insn = emit_move_insn (stack_pointer_rtx, hard_frame_pointer_rtx);
4061       RTX_FRAME_RELATED_P (insn) = 1;
4062       add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
4063     }
4064   else if (current_frame_info.total_size)
4065     {
4066       rtx offset, frame_size_rtx;
4067 
4068       frame_size_rtx = GEN_INT (current_frame_info.total_size);
4069       if (satisfies_constraint_I (frame_size_rtx))
4070 	offset = frame_size_rtx;
4071       else
4072 	{
4073 	  regno = next_scratch_gr_reg ();
4074 	  offset = gen_rtx_REG (DImode, regno);
4075 	  emit_move_insn (offset, frame_size_rtx);
4076 	}
4077 
4078       insn = emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx,
4079 				    offset));
4080 
4081       RTX_FRAME_RELATED_P (insn) = 1;
4082       add_reg_note (insn, REG_CFA_ADJUST_CFA,
4083 		    gen_rtx_SET (stack_pointer_rtx,
4084 				 gen_rtx_PLUS (DImode,
4085 					       stack_pointer_rtx,
4086 					       frame_size_rtx)));
4087     }
4088 
4089   if (cfun->machine->ia64_eh_epilogue_bsp)
4090     emit_insn (gen_set_bsp (cfun->machine->ia64_eh_epilogue_bsp));
4091 
4092   if (! sibcall_p)
4093     emit_jump_insn (gen_return_internal (gen_rtx_REG (DImode, BR_REG (0))));
4094   else
4095     {
4096       int fp = GR_REG (2);
4097       /* We need a throw away register here, r0 and r1 are reserved,
4098 	 so r2 is the first available call clobbered register.  If
4099 	 there was a frame_pointer register, we may have swapped the
4100 	 names of r2 and HARD_FRAME_POINTER_REGNUM, so we have to make
4101 	 sure we're using the string "r2" when emitting the register
4102 	 name for the assembler.  */
4103       if (current_frame_info.r[reg_fp]
4104           && current_frame_info.r[reg_fp] == GR_REG (2))
4105 	fp = HARD_FRAME_POINTER_REGNUM;
4106 
4107       /* We must emit an alloc to force the input registers to become output
4108 	 registers.  Otherwise, if the callee tries to pass its parameters
4109 	 through to another call without an intervening alloc, then these
4110 	 values get lost.  */
4111       /* ??? We don't need to preserve all input registers.  We only need to
4112 	 preserve those input registers used as arguments to the sibling call.
4113 	 It is unclear how to compute that number here.  */
4114       if (current_frame_info.n_input_regs != 0)
4115 	{
4116 	  rtx n_inputs = GEN_INT (current_frame_info.n_input_regs);
4117 
4118 	  insn = emit_insn (gen_alloc (gen_rtx_REG (DImode, fp),
4119 				const0_rtx, const0_rtx,
4120 				n_inputs, const0_rtx));
4121 	  RTX_FRAME_RELATED_P (insn) = 1;
4122 
4123 	  /* ??? We need to mark the alloc as frame-related so that it gets
4124 	     passed into ia64_asm_unwind_emit for ia64-specific unwinding.
4125 	     But there's nothing dwarf2 related to be done wrt the register
4126 	     windows.  If we do nothing, dwarf2out will abort on the UNSPEC;
4127 	     the empty parallel means dwarf2out will not see anything.  */
4128 	  add_reg_note (insn, REG_FRAME_RELATED_EXPR,
4129 			gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (0)));
4130 	}
4131     }
4132 }
4133 
4134 /* Return 1 if br.ret can do all the work required to return from a
4135    function.  */
4136 
4137 int
ia64_direct_return(void)4138 ia64_direct_return (void)
4139 {
4140   if (reload_completed && ! frame_pointer_needed)
4141     {
4142       ia64_compute_frame_size (get_frame_size ());
4143 
4144       return (current_frame_info.total_size == 0
4145 	      && current_frame_info.n_spilled == 0
4146 	      && current_frame_info.r[reg_save_b0] == 0
4147 	      && current_frame_info.r[reg_save_pr] == 0
4148 	      && current_frame_info.r[reg_save_ar_pfs] == 0
4149 	      && current_frame_info.r[reg_save_ar_unat] == 0
4150 	      && current_frame_info.r[reg_save_ar_lc] == 0);
4151     }
4152   return 0;
4153 }
4154 
4155 /* Return the magic cookie that we use to hold the return address
4156    during early compilation.  */
4157 
4158 rtx
ia64_return_addr_rtx(HOST_WIDE_INT count,rtx frame ATTRIBUTE_UNUSED)4159 ia64_return_addr_rtx (HOST_WIDE_INT count, rtx frame ATTRIBUTE_UNUSED)
4160 {
4161   if (count != 0)
4162     return NULL;
4163   return gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_RET_ADDR);
4164 }
4165 
4166 /* Split this value after reload, now that we know where the return
4167    address is saved.  */
4168 
4169 void
ia64_split_return_addr_rtx(rtx dest)4170 ia64_split_return_addr_rtx (rtx dest)
4171 {
4172   rtx src;
4173 
4174   if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
4175     {
4176       if (current_frame_info.r[reg_save_b0] != 0)
4177         {
4178 	  src = gen_rtx_REG (DImode, current_frame_info.r[reg_save_b0]);
4179 	  reg_emitted (reg_save_b0);
4180 	}
4181       else
4182 	{
4183 	  HOST_WIDE_INT off;
4184 	  unsigned int regno;
4185 	  rtx off_r;
4186 
4187 	  /* Compute offset from CFA for BR0.  */
4188 	  /* ??? Must be kept in sync with ia64_expand_prologue.  */
4189 	  off = (current_frame_info.spill_cfa_off
4190 		 + current_frame_info.spill_size);
4191 	  for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
4192 	    if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
4193 	      off -= 8;
4194 
4195 	  /* Convert CFA offset to a register based offset.  */
4196 	  if (frame_pointer_needed)
4197 	    src = hard_frame_pointer_rtx;
4198 	  else
4199 	    {
4200 	      src = stack_pointer_rtx;
4201 	      off += current_frame_info.total_size;
4202 	    }
4203 
4204 	  /* Load address into scratch register.  */
4205 	  off_r = GEN_INT (off);
4206 	  if (satisfies_constraint_I (off_r))
4207 	    emit_insn (gen_adddi3 (dest, src, off_r));
4208 	  else
4209 	    {
4210 	      emit_move_insn (dest, off_r);
4211 	      emit_insn (gen_adddi3 (dest, src, dest));
4212 	    }
4213 
4214 	  src = gen_rtx_MEM (Pmode, dest);
4215 	}
4216     }
4217   else
4218     src = gen_rtx_REG (DImode, BR_REG (0));
4219 
4220   emit_move_insn (dest, src);
4221 }
4222 
4223 int
ia64_hard_regno_rename_ok(int from,int to)4224 ia64_hard_regno_rename_ok (int from, int to)
4225 {
4226   /* Don't clobber any of the registers we reserved for the prologue.  */
4227   unsigned int r;
4228 
4229   for (r = reg_fp; r <= reg_save_ar_lc; r++)
4230     if (to == current_frame_info.r[r]
4231         || from == current_frame_info.r[r]
4232         || to == emitted_frame_related_regs[r]
4233         || from == emitted_frame_related_regs[r])
4234       return 0;
4235 
4236   /* Don't use output registers outside the register frame.  */
4237   if (OUT_REGNO_P (to) && to >= OUT_REG (current_frame_info.n_output_regs))
4238     return 0;
4239 
4240   /* Retain even/oddness on predicate register pairs.  */
4241   if (PR_REGNO_P (from) && PR_REGNO_P (to))
4242     return (from & 1) == (to & 1);
4243 
4244   return 1;
4245 }
4246 
4247 /* Target hook for assembling integer objects.  Handle word-sized
4248    aligned objects and detect the cases when @fptr is needed.  */
4249 
4250 static bool
ia64_assemble_integer(rtx x,unsigned int size,int aligned_p)4251 ia64_assemble_integer (rtx x, unsigned int size, int aligned_p)
4252 {
4253   if (size == POINTER_SIZE / BITS_PER_UNIT
4254       && !(TARGET_NO_PIC || TARGET_AUTO_PIC)
4255       && GET_CODE (x) == SYMBOL_REF
4256       && SYMBOL_REF_FUNCTION_P (x))
4257     {
4258       static const char * const directive[2][2] = {
4259 	  /* 64-bit pointer */  /* 32-bit pointer */
4260 	{ "\tdata8.ua\t@fptr(", "\tdata4.ua\t@fptr("},	/* unaligned */
4261 	{ "\tdata8\t@fptr(",    "\tdata4\t@fptr("}	/* aligned */
4262       };
4263       fputs (directive[(aligned_p != 0)][POINTER_SIZE == 32], asm_out_file);
4264       output_addr_const (asm_out_file, x);
4265       fputs (")\n", asm_out_file);
4266       return true;
4267     }
4268   return default_assemble_integer (x, size, aligned_p);
4269 }
4270 
4271 /* Emit the function prologue.  */
4272 
4273 static void
ia64_output_function_prologue(FILE * file,HOST_WIDE_INT size ATTRIBUTE_UNUSED)4274 ia64_output_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
4275 {
4276   int mask, grsave, grsave_prev;
4277 
4278   if (current_frame_info.need_regstk)
4279     fprintf (file, "\t.regstk %d, %d, %d, %d\n",
4280 	     current_frame_info.n_input_regs,
4281 	     current_frame_info.n_local_regs,
4282 	     current_frame_info.n_output_regs,
4283 	     current_frame_info.n_rotate_regs);
4284 
4285   if (ia64_except_unwind_info (&global_options) != UI_TARGET)
4286     return;
4287 
4288   /* Emit the .prologue directive.  */
4289 
4290   mask = 0;
4291   grsave = grsave_prev = 0;
4292   if (current_frame_info.r[reg_save_b0] != 0)
4293     {
4294       mask |= 8;
4295       grsave = grsave_prev = current_frame_info.r[reg_save_b0];
4296     }
4297   if (current_frame_info.r[reg_save_ar_pfs] != 0
4298       && (grsave_prev == 0
4299 	  || current_frame_info.r[reg_save_ar_pfs] == grsave_prev + 1))
4300     {
4301       mask |= 4;
4302       if (grsave_prev == 0)
4303 	grsave = current_frame_info.r[reg_save_ar_pfs];
4304       grsave_prev = current_frame_info.r[reg_save_ar_pfs];
4305     }
4306   if (current_frame_info.r[reg_fp] != 0
4307       && (grsave_prev == 0
4308 	  || current_frame_info.r[reg_fp] == grsave_prev + 1))
4309     {
4310       mask |= 2;
4311       if (grsave_prev == 0)
4312 	grsave = HARD_FRAME_POINTER_REGNUM;
4313       grsave_prev = current_frame_info.r[reg_fp];
4314     }
4315   if (current_frame_info.r[reg_save_pr] != 0
4316       && (grsave_prev == 0
4317 	  || current_frame_info.r[reg_save_pr] == grsave_prev + 1))
4318     {
4319       mask |= 1;
4320       if (grsave_prev == 0)
4321 	grsave = current_frame_info.r[reg_save_pr];
4322     }
4323 
4324   if (mask && TARGET_GNU_AS)
4325     fprintf (file, "\t.prologue %d, %d\n", mask,
4326 	     ia64_dbx_register_number (grsave));
4327   else
4328     fputs ("\t.prologue\n", file);
4329 
4330   /* Emit a .spill directive, if necessary, to relocate the base of
4331      the register spill area.  */
4332   if (current_frame_info.spill_cfa_off != -16)
4333     fprintf (file, "\t.spill %ld\n",
4334 	     (long) (current_frame_info.spill_cfa_off
4335 		     + current_frame_info.spill_size));
4336 }
4337 
4338 /* Emit the .body directive at the scheduled end of the prologue.  */
4339 
4340 static void
ia64_output_function_end_prologue(FILE * file)4341 ia64_output_function_end_prologue (FILE *file)
4342 {
4343   if (ia64_except_unwind_info (&global_options) != UI_TARGET)
4344     return;
4345 
4346   fputs ("\t.body\n", file);
4347 }
4348 
4349 /* Emit the function epilogue.  */
4350 
4351 static void
ia64_output_function_epilogue(FILE * file ATTRIBUTE_UNUSED,HOST_WIDE_INT size ATTRIBUTE_UNUSED)4352 ia64_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
4353 			       HOST_WIDE_INT size ATTRIBUTE_UNUSED)
4354 {
4355   int i;
4356 
4357   if (current_frame_info.r[reg_fp])
4358     {
4359       const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
4360       reg_names[HARD_FRAME_POINTER_REGNUM]
4361 	= reg_names[current_frame_info.r[reg_fp]];
4362       reg_names[current_frame_info.r[reg_fp]] = tmp;
4363       reg_emitted (reg_fp);
4364     }
4365   if (! TARGET_REG_NAMES)
4366     {
4367       for (i = 0; i < current_frame_info.n_input_regs; i++)
4368 	reg_names[IN_REG (i)] = ia64_input_reg_names[i];
4369       for (i = 0; i < current_frame_info.n_local_regs; i++)
4370 	reg_names[LOC_REG (i)] = ia64_local_reg_names[i];
4371       for (i = 0; i < current_frame_info.n_output_regs; i++)
4372 	reg_names[OUT_REG (i)] = ia64_output_reg_names[i];
4373     }
4374 
4375   current_frame_info.initialized = 0;
4376 }
4377 
4378 int
ia64_dbx_register_number(int regno)4379 ia64_dbx_register_number (int regno)
4380 {
4381   /* In ia64_expand_prologue we quite literally renamed the frame pointer
4382      from its home at loc79 to something inside the register frame.  We
4383      must perform the same renumbering here for the debug info.  */
4384   if (current_frame_info.r[reg_fp])
4385     {
4386       if (regno == HARD_FRAME_POINTER_REGNUM)
4387 	regno = current_frame_info.r[reg_fp];
4388       else if (regno == current_frame_info.r[reg_fp])
4389 	regno = HARD_FRAME_POINTER_REGNUM;
4390     }
4391 
4392   if (IN_REGNO_P (regno))
4393     return 32 + regno - IN_REG (0);
4394   else if (LOC_REGNO_P (regno))
4395     return 32 + current_frame_info.n_input_regs + regno - LOC_REG (0);
4396   else if (OUT_REGNO_P (regno))
4397     return (32 + current_frame_info.n_input_regs
4398 	    + current_frame_info.n_local_regs + regno - OUT_REG (0));
4399   else
4400     return regno;
4401 }
4402 
4403 /* Implement TARGET_TRAMPOLINE_INIT.
4404 
4405    The trampoline should set the static chain pointer to value placed
4406    into the trampoline and should branch to the specified routine.
4407    To make the normal indirect-subroutine calling convention work,
4408    the trampoline must look like a function descriptor; the first
4409    word being the target address and the second being the target's
4410    global pointer.
4411 
4412    We abuse the concept of a global pointer by arranging for it
4413    to point to the data we need to load.  The complete trampoline
4414    has the following form:
4415 
4416 		+-------------------+ \
4417 	TRAMP:	| __ia64_trampoline | |
4418 		+-------------------+  > fake function descriptor
4419 		| TRAMP+16          | |
4420 		+-------------------+ /
4421 		| target descriptor |
4422 		+-------------------+
4423 		| static link	    |
4424 		+-------------------+
4425 */
4426 
4427 static void
ia64_trampoline_init(rtx m_tramp,tree fndecl,rtx static_chain)4428 ia64_trampoline_init (rtx m_tramp, tree fndecl, rtx static_chain)
4429 {
4430   rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
4431   rtx addr, addr_reg, tramp, eight = GEN_INT (8);
4432 
4433   /* The Intel assembler requires that the global __ia64_trampoline symbol
4434      be declared explicitly */
4435   if (!TARGET_GNU_AS)
4436     {
4437       static bool declared_ia64_trampoline = false;
4438 
4439       if (!declared_ia64_trampoline)
4440 	{
4441 	  declared_ia64_trampoline = true;
4442 	  (*targetm.asm_out.globalize_label) (asm_out_file,
4443 					      "__ia64_trampoline");
4444 	}
4445     }
4446 
4447   /* Make sure addresses are Pmode even if we are in ILP32 mode. */
4448   addr = convert_memory_address (Pmode, XEXP (m_tramp, 0));
4449   fnaddr = convert_memory_address (Pmode, fnaddr);
4450   static_chain = convert_memory_address (Pmode, static_chain);
4451 
4452   /* Load up our iterator.  */
4453   addr_reg = copy_to_reg (addr);
4454   m_tramp = adjust_automodify_address (m_tramp, Pmode, addr_reg, 0);
4455 
4456   /* The first two words are the fake descriptor:
4457      __ia64_trampoline, ADDR+16.  */
4458   tramp = gen_rtx_SYMBOL_REF (Pmode, "__ia64_trampoline");
4459   if (TARGET_ABI_OPEN_VMS)
4460     {
4461       /* HP decided to break the ELF ABI on VMS (to deal with an ambiguity
4462 	 in the Macro-32 compiler) and changed the semantics of the LTOFF22
4463 	 relocation against function symbols to make it identical to the
4464 	 LTOFF_FPTR22 relocation.  Emit the latter directly to stay within
4465 	 strict ELF and dereference to get the bare code address.  */
4466       rtx reg = gen_reg_rtx (Pmode);
4467       SYMBOL_REF_FLAGS (tramp) |= SYMBOL_FLAG_FUNCTION;
4468       emit_move_insn (reg, tramp);
4469       emit_move_insn (reg, gen_rtx_MEM (Pmode, reg));
4470       tramp = reg;
4471    }
4472   emit_move_insn (m_tramp, tramp);
4473   emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
4474   m_tramp = adjust_automodify_address (m_tramp, VOIDmode, NULL, 8);
4475 
4476   emit_move_insn (m_tramp, force_reg (Pmode, plus_constant (Pmode, addr, 16)));
4477   emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
4478   m_tramp = adjust_automodify_address (m_tramp, VOIDmode, NULL, 8);
4479 
4480   /* The third word is the target descriptor.  */
4481   emit_move_insn (m_tramp, force_reg (Pmode, fnaddr));
4482   emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
4483   m_tramp = adjust_automodify_address (m_tramp, VOIDmode, NULL, 8);
4484 
4485   /* The fourth word is the static chain.  */
4486   emit_move_insn (m_tramp, static_chain);
4487 }
4488 
4489 /* Do any needed setup for a variadic function.  CUM has not been updated
4490    for the last named argument which has type TYPE and mode MODE.
4491 
4492    We generate the actual spill instructions during prologue generation.  */
4493 
4494 static void
ia64_setup_incoming_varargs(cumulative_args_t cum,machine_mode mode,tree type,int * pretend_size,int second_time ATTRIBUTE_UNUSED)4495 ia64_setup_incoming_varargs (cumulative_args_t cum, machine_mode mode,
4496 			     tree type, int * pretend_size,
4497 			     int second_time ATTRIBUTE_UNUSED)
4498 {
4499   CUMULATIVE_ARGS next_cum = *get_cumulative_args (cum);
4500 
4501   /* Skip the current argument.  */
4502   ia64_function_arg_advance (pack_cumulative_args (&next_cum), mode, type, 1);
4503 
4504   if (next_cum.words < MAX_ARGUMENT_SLOTS)
4505     {
4506       int n = MAX_ARGUMENT_SLOTS - next_cum.words;
4507       *pretend_size = n * UNITS_PER_WORD;
4508       cfun->machine->n_varargs = n;
4509     }
4510 }
4511 
4512 /* Check whether TYPE is a homogeneous floating point aggregate.  If
4513    it is, return the mode of the floating point type that appears
4514    in all leafs.  If it is not, return VOIDmode.
4515 
4516    An aggregate is a homogeneous floating point aggregate is if all
4517    fields/elements in it have the same floating point type (e.g,
4518    SFmode).  128-bit quad-precision floats are excluded.
4519 
4520    Variable sized aggregates should never arrive here, since we should
4521    have already decided to pass them by reference.  Top-level zero-sized
4522    aggregates are excluded because our parallels crash the middle-end.  */
4523 
4524 static machine_mode
hfa_element_mode(const_tree type,bool nested)4525 hfa_element_mode (const_tree type, bool nested)
4526 {
4527   machine_mode element_mode = VOIDmode;
4528   machine_mode mode;
4529   enum tree_code code = TREE_CODE (type);
4530   int know_element_mode = 0;
4531   tree t;
4532 
4533   if (!nested && (!TYPE_SIZE (type) || integer_zerop (TYPE_SIZE (type))))
4534     return VOIDmode;
4535 
4536   switch (code)
4537     {
4538     case VOID_TYPE:	case INTEGER_TYPE:	case ENUMERAL_TYPE:
4539     case BOOLEAN_TYPE:	case POINTER_TYPE:
4540     case OFFSET_TYPE:	case REFERENCE_TYPE:	case METHOD_TYPE:
4541     case LANG_TYPE:		case FUNCTION_TYPE:
4542       return VOIDmode;
4543 
4544       /* Fortran complex types are supposed to be HFAs, so we need to handle
4545 	 gcc's COMPLEX_TYPEs as HFAs.  We need to exclude the integral complex
4546 	 types though.  */
4547     case COMPLEX_TYPE:
4548       if (GET_MODE_CLASS (TYPE_MODE (type)) == MODE_COMPLEX_FLOAT
4549 	  && TYPE_MODE (type) != TCmode)
4550 	return GET_MODE_INNER (TYPE_MODE (type));
4551       else
4552 	return VOIDmode;
4553 
4554     case REAL_TYPE:
4555       /* We want to return VOIDmode for raw REAL_TYPEs, but the actual
4556 	 mode if this is contained within an aggregate.  */
4557       if (nested && TYPE_MODE (type) != TFmode)
4558 	return TYPE_MODE (type);
4559       else
4560 	return VOIDmode;
4561 
4562     case ARRAY_TYPE:
4563       return hfa_element_mode (TREE_TYPE (type), 1);
4564 
4565     case RECORD_TYPE:
4566     case UNION_TYPE:
4567     case QUAL_UNION_TYPE:
4568       for (t = TYPE_FIELDS (type); t; t = DECL_CHAIN (t))
4569 	{
4570 	  if (TREE_CODE (t) != FIELD_DECL)
4571 	    continue;
4572 
4573 	  mode = hfa_element_mode (TREE_TYPE (t), 1);
4574 	  if (know_element_mode)
4575 	    {
4576 	      if (mode != element_mode)
4577 		return VOIDmode;
4578 	    }
4579 	  else if (GET_MODE_CLASS (mode) != MODE_FLOAT)
4580 	    return VOIDmode;
4581 	  else
4582 	    {
4583 	      know_element_mode = 1;
4584 	      element_mode = mode;
4585 	    }
4586 	}
4587       return element_mode;
4588 
4589     default:
4590       /* If we reach here, we probably have some front-end specific type
4591 	 that the backend doesn't know about.  This can happen via the
4592 	 aggregate_value_p call in init_function_start.  All we can do is
4593 	 ignore unknown tree types.  */
4594       return VOIDmode;
4595     }
4596 
4597   return VOIDmode;
4598 }
4599 
4600 /* Return the number of words required to hold a quantity of TYPE and MODE
4601    when passed as an argument.  */
4602 static int
ia64_function_arg_words(const_tree type,machine_mode mode)4603 ia64_function_arg_words (const_tree type, machine_mode mode)
4604 {
4605   int words;
4606 
4607   if (mode == BLKmode)
4608     words = int_size_in_bytes (type);
4609   else
4610     words = GET_MODE_SIZE (mode);
4611 
4612   return (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD;  /* round up */
4613 }
4614 
4615 /* Return the number of registers that should be skipped so the current
4616    argument (described by TYPE and WORDS) will be properly aligned.
4617 
4618    Integer and float arguments larger than 8 bytes start at the next
4619    even boundary.  Aggregates larger than 8 bytes start at the next
4620    even boundary if the aggregate has 16 byte alignment.  Note that
4621    in the 32-bit ABI, TImode and TFmode have only 8-byte alignment
4622    but are still to be aligned in registers.
4623 
4624    ??? The ABI does not specify how to handle aggregates with
4625    alignment from 9 to 15 bytes, or greater than 16.  We handle them
4626    all as if they had 16 byte alignment.  Such aggregates can occur
4627    only if gcc extensions are used.  */
4628 static int
ia64_function_arg_offset(const CUMULATIVE_ARGS * cum,const_tree type,int words)4629 ia64_function_arg_offset (const CUMULATIVE_ARGS *cum,
4630 			  const_tree type, int words)
4631 {
4632   /* No registers are skipped on VMS.  */
4633   if (TARGET_ABI_OPEN_VMS || (cum->words & 1) == 0)
4634     return 0;
4635 
4636   if (type
4637       && TREE_CODE (type) != INTEGER_TYPE
4638       && TREE_CODE (type) != REAL_TYPE)
4639     return TYPE_ALIGN (type) > 8 * BITS_PER_UNIT;
4640   else
4641     return words > 1;
4642 }
4643 
4644 /* Return rtx for register where argument is passed, or zero if it is passed
4645    on the stack.  */
4646 /* ??? 128-bit quad-precision floats are always passed in general
4647    registers.  */
4648 
4649 static rtx
ia64_function_arg_1(cumulative_args_t cum_v,machine_mode mode,const_tree type,bool named,bool incoming)4650 ia64_function_arg_1 (cumulative_args_t cum_v, machine_mode mode,
4651 		     const_tree type, bool named, bool incoming)
4652 {
4653   const CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
4654 
4655   int basereg = (incoming ? GR_ARG_FIRST : AR_ARG_FIRST);
4656   int words = ia64_function_arg_words (type, mode);
4657   int offset = ia64_function_arg_offset (cum, type, words);
4658   machine_mode hfa_mode = VOIDmode;
4659 
4660   /* For OPEN VMS, emit the instruction setting up the argument register here,
4661      when we know this will be together with the other arguments setup related
4662      insns.  This is not the conceptually best place to do this, but this is
4663      the easiest as we have convenient access to cumulative args info.  */
4664 
4665   if (TARGET_ABI_OPEN_VMS && mode == VOIDmode && type == void_type_node
4666       && named == 1)
4667     {
4668       unsigned HOST_WIDE_INT regval = cum->words;
4669       int i;
4670 
4671       for (i = 0; i < 8; i++)
4672 	regval |= ((int) cum->atypes[i]) << (i * 3 + 8);
4673 
4674       emit_move_insn (gen_rtx_REG (DImode, GR_REG (25)),
4675 		      GEN_INT (regval));
4676     }
4677 
4678   /* If all argument slots are used, then it must go on the stack.  */
4679   if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
4680     return 0;
4681 
4682   /* On OpenVMS argument is either in Rn or Fn.  */
4683   if (TARGET_ABI_OPEN_VMS)
4684     {
4685       if (FLOAT_MODE_P (mode))
4686 	return gen_rtx_REG (mode, FR_ARG_FIRST + cum->words);
4687       else
4688 	return gen_rtx_REG (mode, basereg + cum->words);
4689     }
4690 
4691   /* Check for and handle homogeneous FP aggregates.  */
4692   if (type)
4693     hfa_mode = hfa_element_mode (type, 0);
4694 
4695   /* Unnamed prototyped hfas are passed as usual.  Named prototyped hfas
4696      and unprototyped hfas are passed specially.  */
4697   if (hfa_mode != VOIDmode && (! cum->prototype || named))
4698     {
4699       rtx loc[16];
4700       int i = 0;
4701       int fp_regs = cum->fp_regs;
4702       int int_regs = cum->words + offset;
4703       int hfa_size = GET_MODE_SIZE (hfa_mode);
4704       int byte_size;
4705       int args_byte_size;
4706 
4707       /* If prototyped, pass it in FR regs then GR regs.
4708 	 If not prototyped, pass it in both FR and GR regs.
4709 
4710 	 If this is an SFmode aggregate, then it is possible to run out of
4711 	 FR regs while GR regs are still left.  In that case, we pass the
4712 	 remaining part in the GR regs.  */
4713 
4714       /* Fill the FP regs.  We do this always.  We stop if we reach the end
4715 	 of the argument, the last FP register, or the last argument slot.  */
4716 
4717       byte_size = ((mode == BLKmode)
4718 		   ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
4719       args_byte_size = int_regs * UNITS_PER_WORD;
4720       offset = 0;
4721       for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
4722 	      && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD)); i++)
4723 	{
4724 	  loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
4725 				      gen_rtx_REG (hfa_mode, (FR_ARG_FIRST
4726 							      + fp_regs)),
4727 				      GEN_INT (offset));
4728 	  offset += hfa_size;
4729 	  args_byte_size += hfa_size;
4730 	  fp_regs++;
4731 	}
4732 
4733       /* If no prototype, then the whole thing must go in GR regs.  */
4734       if (! cum->prototype)
4735 	offset = 0;
4736       /* If this is an SFmode aggregate, then we might have some left over
4737 	 that needs to go in GR regs.  */
4738       else if (byte_size != offset)
4739 	int_regs += offset / UNITS_PER_WORD;
4740 
4741       /* Fill in the GR regs.  We must use DImode here, not the hfa mode.  */
4742 
4743       for (; offset < byte_size && int_regs < MAX_ARGUMENT_SLOTS; i++)
4744 	{
4745 	  machine_mode gr_mode = DImode;
4746 	  unsigned int gr_size;
4747 
4748 	  /* If we have an odd 4 byte hunk because we ran out of FR regs,
4749 	     then this goes in a GR reg left adjusted/little endian, right
4750 	     adjusted/big endian.  */
4751 	  /* ??? Currently this is handled wrong, because 4-byte hunks are
4752 	     always right adjusted/little endian.  */
4753 	  if (offset & 0x4)
4754 	    gr_mode = SImode;
4755 	  /* If we have an even 4 byte hunk because the aggregate is a
4756 	     multiple of 4 bytes in size, then this goes in a GR reg right
4757 	     adjusted/little endian.  */
4758 	  else if (byte_size - offset == 4)
4759 	    gr_mode = SImode;
4760 
4761 	  loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
4762 				      gen_rtx_REG (gr_mode, (basereg
4763 							     + int_regs)),
4764 				      GEN_INT (offset));
4765 
4766 	  gr_size = GET_MODE_SIZE (gr_mode);
4767 	  offset += gr_size;
4768 	  if (gr_size == UNITS_PER_WORD
4769 	      || (gr_size < UNITS_PER_WORD && offset % UNITS_PER_WORD == 0))
4770 	    int_regs++;
4771 	  else if (gr_size > UNITS_PER_WORD)
4772 	    int_regs += gr_size / UNITS_PER_WORD;
4773 	}
4774       return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
4775     }
4776 
4777   /* Integral and aggregates go in general registers.  If we have run out of
4778      FR registers, then FP values must also go in general registers.  This can
4779      happen when we have a SFmode HFA.  */
4780   else if (mode == TFmode || mode == TCmode
4781 	   || (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS))
4782     {
4783       int byte_size = ((mode == BLKmode)
4784                        ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
4785       if (BYTES_BIG_ENDIAN
4786 	&& (mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
4787 	&& byte_size < UNITS_PER_WORD
4788 	&& byte_size > 0)
4789 	{
4790 	  rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
4791 					  gen_rtx_REG (DImode,
4792 						       (basereg + cum->words
4793 							+ offset)),
4794 					  const0_rtx);
4795 	  return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg));
4796 	}
4797       else
4798 	return gen_rtx_REG (mode, basereg + cum->words + offset);
4799 
4800     }
4801 
4802   /* If there is a prototype, then FP values go in a FR register when
4803      named, and in a GR register when unnamed.  */
4804   else if (cum->prototype)
4805     {
4806       if (named)
4807 	return gen_rtx_REG (mode, FR_ARG_FIRST + cum->fp_regs);
4808       /* In big-endian mode, an anonymous SFmode value must be represented
4809          as (parallel:SF [(expr_list (reg:DI n) (const_int 0))]) to force
4810 	 the value into the high half of the general register.  */
4811       else if (BYTES_BIG_ENDIAN && mode == SFmode)
4812 	return gen_rtx_PARALLEL (mode,
4813 		 gen_rtvec (1,
4814                    gen_rtx_EXPR_LIST (VOIDmode,
4815 		     gen_rtx_REG (DImode, basereg + cum->words + offset),
4816 				      const0_rtx)));
4817       else
4818 	return gen_rtx_REG (mode, basereg + cum->words + offset);
4819     }
4820   /* If there is no prototype, then FP values go in both FR and GR
4821      registers.  */
4822   else
4823     {
4824       /* See comment above.  */
4825       machine_mode inner_mode =
4826 	(BYTES_BIG_ENDIAN && mode == SFmode) ? DImode : mode;
4827 
4828       rtx fp_reg = gen_rtx_EXPR_LIST (VOIDmode,
4829 				      gen_rtx_REG (mode, (FR_ARG_FIRST
4830 							  + cum->fp_regs)),
4831 				      const0_rtx);
4832       rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
4833 				      gen_rtx_REG (inner_mode,
4834 						   (basereg + cum->words
4835 						    + offset)),
4836 				      const0_rtx);
4837 
4838       return gen_rtx_PARALLEL (mode, gen_rtvec (2, fp_reg, gr_reg));
4839     }
4840 }
4841 
4842 /* Implement TARGET_FUNCION_ARG target hook.  */
4843 
4844 static rtx
ia64_function_arg(cumulative_args_t cum,machine_mode mode,const_tree type,bool named)4845 ia64_function_arg (cumulative_args_t cum, machine_mode mode,
4846 		   const_tree type, bool named)
4847 {
4848   return ia64_function_arg_1 (cum, mode, type, named, false);
4849 }
4850 
4851 /* Implement TARGET_FUNCION_INCOMING_ARG target hook.  */
4852 
4853 static rtx
ia64_function_incoming_arg(cumulative_args_t cum,machine_mode mode,const_tree type,bool named)4854 ia64_function_incoming_arg (cumulative_args_t cum,
4855 			    machine_mode mode,
4856 			    const_tree type, bool named)
4857 {
4858   return ia64_function_arg_1 (cum, mode, type, named, true);
4859 }
4860 
4861 /* Return number of bytes, at the beginning of the argument, that must be
4862    put in registers.  0 is the argument is entirely in registers or entirely
4863    in memory.  */
4864 
4865 static int
ia64_arg_partial_bytes(cumulative_args_t cum_v,machine_mode mode,tree type,bool named ATTRIBUTE_UNUSED)4866 ia64_arg_partial_bytes (cumulative_args_t cum_v, machine_mode mode,
4867 			tree type, bool named ATTRIBUTE_UNUSED)
4868 {
4869   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
4870 
4871   int words = ia64_function_arg_words (type, mode);
4872   int offset = ia64_function_arg_offset (cum, type, words);
4873 
4874   /* If all argument slots are used, then it must go on the stack.  */
4875   if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
4876     return 0;
4877 
4878   /* It doesn't matter whether the argument goes in FR or GR regs.  If
4879      it fits within the 8 argument slots, then it goes entirely in
4880      registers.  If it extends past the last argument slot, then the rest
4881      goes on the stack.  */
4882 
4883   if (words + cum->words + offset <= MAX_ARGUMENT_SLOTS)
4884     return 0;
4885 
4886   return (MAX_ARGUMENT_SLOTS - cum->words - offset) * UNITS_PER_WORD;
4887 }
4888 
4889 /* Return ivms_arg_type based on machine_mode.  */
4890 
4891 static enum ivms_arg_type
ia64_arg_type(machine_mode mode)4892 ia64_arg_type (machine_mode mode)
4893 {
4894   switch (mode)
4895     {
4896     case SFmode:
4897       return FS;
4898     case DFmode:
4899       return FT;
4900     default:
4901       return I64;
4902     }
4903 }
4904 
4905 /* Update CUM to point after this argument.  This is patterned after
4906    ia64_function_arg.  */
4907 
4908 static void
ia64_function_arg_advance(cumulative_args_t cum_v,machine_mode mode,const_tree type,bool named)4909 ia64_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
4910 			   const_tree type, bool named)
4911 {
4912   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
4913   int words = ia64_function_arg_words (type, mode);
4914   int offset = ia64_function_arg_offset (cum, type, words);
4915   machine_mode hfa_mode = VOIDmode;
4916 
4917   /* If all arg slots are already full, then there is nothing to do.  */
4918   if (cum->words >= MAX_ARGUMENT_SLOTS)
4919     {
4920       cum->words += words + offset;
4921       return;
4922     }
4923 
4924   cum->atypes[cum->words] = ia64_arg_type (mode);
4925   cum->words += words + offset;
4926 
4927   /* On OpenVMS argument is either in Rn or Fn.  */
4928   if (TARGET_ABI_OPEN_VMS)
4929     {
4930       cum->int_regs = cum->words;
4931       cum->fp_regs = cum->words;
4932       return;
4933     }
4934 
4935   /* Check for and handle homogeneous FP aggregates.  */
4936   if (type)
4937     hfa_mode = hfa_element_mode (type, 0);
4938 
4939   /* Unnamed prototyped hfas are passed as usual.  Named prototyped hfas
4940      and unprototyped hfas are passed specially.  */
4941   if (hfa_mode != VOIDmode && (! cum->prototype || named))
4942     {
4943       int fp_regs = cum->fp_regs;
4944       /* This is the original value of cum->words + offset.  */
4945       int int_regs = cum->words - words;
4946       int hfa_size = GET_MODE_SIZE (hfa_mode);
4947       int byte_size;
4948       int args_byte_size;
4949 
4950       /* If prototyped, pass it in FR regs then GR regs.
4951 	 If not prototyped, pass it in both FR and GR regs.
4952 
4953 	 If this is an SFmode aggregate, then it is possible to run out of
4954 	 FR regs while GR regs are still left.  In that case, we pass the
4955 	 remaining part in the GR regs.  */
4956 
4957       /* Fill the FP regs.  We do this always.  We stop if we reach the end
4958 	 of the argument, the last FP register, or the last argument slot.  */
4959 
4960       byte_size = ((mode == BLKmode)
4961 		   ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
4962       args_byte_size = int_regs * UNITS_PER_WORD;
4963       offset = 0;
4964       for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
4965 	      && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD));)
4966 	{
4967 	  offset += hfa_size;
4968 	  args_byte_size += hfa_size;
4969 	  fp_regs++;
4970 	}
4971 
4972       cum->fp_regs = fp_regs;
4973     }
4974 
4975   /* Integral and aggregates go in general registers.  So do TFmode FP values.
4976      If we have run out of FR registers, then other FP values must also go in
4977      general registers.  This can happen when we have a SFmode HFA.  */
4978   else if (mode == TFmode || mode == TCmode
4979            || (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS))
4980     cum->int_regs = cum->words;
4981 
4982   /* If there is a prototype, then FP values go in a FR register when
4983      named, and in a GR register when unnamed.  */
4984   else if (cum->prototype)
4985     {
4986       if (! named)
4987 	cum->int_regs = cum->words;
4988       else
4989 	/* ??? Complex types should not reach here.  */
4990 	cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
4991     }
4992   /* If there is no prototype, then FP values go in both FR and GR
4993      registers.  */
4994   else
4995     {
4996       /* ??? Complex types should not reach here.  */
4997       cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
4998       cum->int_regs = cum->words;
4999     }
5000 }
5001 
5002 /* Arguments with alignment larger than 8 bytes start at the next even
5003    boundary.  On ILP32 HPUX, TFmode arguments start on next even boundary
5004    even though their normal alignment is 8 bytes.  See ia64_function_arg.  */
5005 
5006 static unsigned int
ia64_function_arg_boundary(machine_mode mode,const_tree type)5007 ia64_function_arg_boundary (machine_mode mode, const_tree type)
5008 {
5009   if (mode == TFmode && TARGET_HPUX && TARGET_ILP32)
5010     return PARM_BOUNDARY * 2;
5011 
5012   if (type)
5013     {
5014       if (TYPE_ALIGN (type) > PARM_BOUNDARY)
5015         return PARM_BOUNDARY * 2;
5016       else
5017         return PARM_BOUNDARY;
5018     }
5019 
5020   if (GET_MODE_BITSIZE (mode) > PARM_BOUNDARY)
5021     return PARM_BOUNDARY * 2;
5022   else
5023     return PARM_BOUNDARY;
5024 }
5025 
5026 /* True if it is OK to do sibling call optimization for the specified
5027    call expression EXP.  DECL will be the called function, or NULL if
5028    this is an indirect call.  */
5029 static bool
ia64_function_ok_for_sibcall(tree decl,tree exp ATTRIBUTE_UNUSED)5030 ia64_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
5031 {
5032   /* We can't perform a sibcall if the current function has the syscall_linkage
5033      attribute.  */
5034   if (lookup_attribute ("syscall_linkage",
5035 			TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
5036     return false;
5037 
5038   /* We must always return with our current GP.  This means we can
5039      only sibcall to functions defined in the current module unless
5040      TARGET_CONST_GP is set to true.  */
5041   return (decl && (*targetm.binds_local_p) (decl)) || TARGET_CONST_GP;
5042 }
5043 
5044 
5045 /* Implement va_arg.  */
5046 
5047 static tree
ia64_gimplify_va_arg(tree valist,tree type,gimple_seq * pre_p,gimple_seq * post_p)5048 ia64_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
5049 		      gimple_seq *post_p)
5050 {
5051   /* Variable sized types are passed by reference.  */
5052   if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
5053     {
5054       tree ptrtype = build_pointer_type (type);
5055       tree addr = std_gimplify_va_arg_expr (valist, ptrtype, pre_p, post_p);
5056       return build_va_arg_indirect_ref (addr);
5057     }
5058 
5059   /* Aggregate arguments with alignment larger than 8 bytes start at
5060      the next even boundary.  Integer and floating point arguments
5061      do so if they are larger than 8 bytes, whether or not they are
5062      also aligned larger than 8 bytes.  */
5063   if ((TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == INTEGER_TYPE)
5064       ? int_size_in_bytes (type) > 8 : TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
5065     {
5066       tree t = fold_build_pointer_plus_hwi (valist, 2 * UNITS_PER_WORD - 1);
5067       t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
5068 		  build_int_cst (TREE_TYPE (t), -2 * UNITS_PER_WORD));
5069       gimplify_assign (unshare_expr (valist), t, pre_p);
5070     }
5071 
5072   return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
5073 }
5074 
5075 /* Return 1 if function return value returned in memory.  Return 0 if it is
5076    in a register.  */
5077 
5078 static bool
ia64_return_in_memory(const_tree valtype,const_tree fntype ATTRIBUTE_UNUSED)5079 ia64_return_in_memory (const_tree valtype, const_tree fntype ATTRIBUTE_UNUSED)
5080 {
5081   machine_mode mode;
5082   machine_mode hfa_mode;
5083   HOST_WIDE_INT byte_size;
5084 
5085   mode = TYPE_MODE (valtype);
5086   byte_size = GET_MODE_SIZE (mode);
5087   if (mode == BLKmode)
5088     {
5089       byte_size = int_size_in_bytes (valtype);
5090       if (byte_size < 0)
5091 	return true;
5092     }
5093 
5094   /* Hfa's with up to 8 elements are returned in the FP argument registers.  */
5095 
5096   hfa_mode = hfa_element_mode (valtype, 0);
5097   if (hfa_mode != VOIDmode)
5098     {
5099       int hfa_size = GET_MODE_SIZE (hfa_mode);
5100 
5101       if (byte_size / hfa_size > MAX_ARGUMENT_SLOTS)
5102 	return true;
5103       else
5104 	return false;
5105     }
5106   else if (byte_size > UNITS_PER_WORD * MAX_INT_RETURN_SLOTS)
5107     return true;
5108   else
5109     return false;
5110 }
5111 
5112 /* Return rtx for register that holds the function return value.  */
5113 
5114 static rtx
ia64_function_value(const_tree valtype,const_tree fn_decl_or_type,bool outgoing ATTRIBUTE_UNUSED)5115 ia64_function_value (const_tree valtype,
5116 		     const_tree fn_decl_or_type,
5117 		     bool outgoing ATTRIBUTE_UNUSED)
5118 {
5119   machine_mode mode;
5120   machine_mode hfa_mode;
5121   int unsignedp;
5122   const_tree func = fn_decl_or_type;
5123 
5124   if (fn_decl_or_type
5125       && !DECL_P (fn_decl_or_type))
5126     func = NULL;
5127 
5128   mode = TYPE_MODE (valtype);
5129   hfa_mode = hfa_element_mode (valtype, 0);
5130 
5131   if (hfa_mode != VOIDmode)
5132     {
5133       rtx loc[8];
5134       int i;
5135       int hfa_size;
5136       int byte_size;
5137       int offset;
5138 
5139       hfa_size = GET_MODE_SIZE (hfa_mode);
5140       byte_size = ((mode == BLKmode)
5141 		   ? int_size_in_bytes (valtype) : GET_MODE_SIZE (mode));
5142       offset = 0;
5143       for (i = 0; offset < byte_size; i++)
5144 	{
5145 	  loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
5146 				      gen_rtx_REG (hfa_mode, FR_ARG_FIRST + i),
5147 				      GEN_INT (offset));
5148 	  offset += hfa_size;
5149 	}
5150       return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
5151     }
5152   else if (FLOAT_TYPE_P (valtype) && mode != TFmode && mode != TCmode)
5153     return gen_rtx_REG (mode, FR_ARG_FIRST);
5154   else
5155     {
5156       bool need_parallel = false;
5157 
5158       /* In big-endian mode, we need to manage the layout of aggregates
5159 	 in the registers so that we get the bits properly aligned in
5160 	 the highpart of the registers.  */
5161       if (BYTES_BIG_ENDIAN
5162 	  && (mode == BLKmode || (valtype && AGGREGATE_TYPE_P (valtype))))
5163 	need_parallel = true;
5164 
5165       /* Something like struct S { long double x; char a[0] } is not an
5166 	 HFA structure, and therefore doesn't go in fp registers.  But
5167 	 the middle-end will give it XFmode anyway, and XFmode values
5168 	 don't normally fit in integer registers.  So we need to smuggle
5169 	 the value inside a parallel.  */
5170       else if (mode == XFmode || mode == XCmode || mode == RFmode)
5171 	need_parallel = true;
5172 
5173       if (need_parallel)
5174 	{
5175 	  rtx loc[8];
5176 	  int offset;
5177 	  int bytesize;
5178 	  int i;
5179 
5180 	  offset = 0;
5181 	  bytesize = int_size_in_bytes (valtype);
5182 	  /* An empty PARALLEL is invalid here, but the return value
5183 	     doesn't matter for empty structs.  */
5184 	  if (bytesize == 0)
5185 	    return gen_rtx_REG (mode, GR_RET_FIRST);
5186 	  for (i = 0; offset < bytesize; i++)
5187 	    {
5188 	      loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
5189 					  gen_rtx_REG (DImode,
5190 						       GR_RET_FIRST + i),
5191 					  GEN_INT (offset));
5192 	      offset += UNITS_PER_WORD;
5193 	    }
5194 	  return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
5195 	}
5196 
5197       mode = promote_function_mode (valtype, mode, &unsignedp,
5198                                     func ? TREE_TYPE (func) : NULL_TREE,
5199                                     true);
5200 
5201       return gen_rtx_REG (mode, GR_RET_FIRST);
5202     }
5203 }
5204 
5205 /* Worker function for TARGET_LIBCALL_VALUE.  */
5206 
5207 static rtx
ia64_libcall_value(machine_mode mode,const_rtx fun ATTRIBUTE_UNUSED)5208 ia64_libcall_value (machine_mode mode,
5209 		    const_rtx fun ATTRIBUTE_UNUSED)
5210 {
5211   return gen_rtx_REG (mode,
5212 		      (((GET_MODE_CLASS (mode) == MODE_FLOAT
5213 			 || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
5214 			&& (mode) != TFmode)
5215 		       ? FR_RET_FIRST : GR_RET_FIRST));
5216 }
5217 
5218 /* Worker function for FUNCTION_VALUE_REGNO_P.  */
5219 
5220 static bool
ia64_function_value_regno_p(const unsigned int regno)5221 ia64_function_value_regno_p (const unsigned int regno)
5222 {
5223   return ((regno >= GR_RET_FIRST && regno <= GR_RET_LAST)
5224           || (regno >= FR_RET_FIRST && regno <= FR_RET_LAST));
5225 }
5226 
5227 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
5228    We need to emit DTP-relative relocations.  */
5229 
5230 static void
ia64_output_dwarf_dtprel(FILE * file,int size,rtx x)5231 ia64_output_dwarf_dtprel (FILE *file, int size, rtx x)
5232 {
5233   gcc_assert (size == 4 || size == 8);
5234   if (size == 4)
5235     fputs ("\tdata4.ua\t@dtprel(", file);
5236   else
5237     fputs ("\tdata8.ua\t@dtprel(", file);
5238   output_addr_const (file, x);
5239   fputs (")", file);
5240 }
5241 
5242 /* Print a memory address as an operand to reference that memory location.  */
5243 
5244 /* ??? Do we need this?  It gets used only for 'a' operands.  We could perhaps
5245    also call this from ia64_print_operand for memory addresses.  */
5246 
5247 static void
ia64_print_operand_address(FILE * stream ATTRIBUTE_UNUSED,machine_mode,rtx address ATTRIBUTE_UNUSED)5248 ia64_print_operand_address (FILE * stream ATTRIBUTE_UNUSED,
5249 			    machine_mode /*mode*/,
5250 			    rtx address ATTRIBUTE_UNUSED)
5251 {
5252 }
5253 
5254 /* Print an operand to an assembler instruction.
5255    C	Swap and print a comparison operator.
5256    D	Print an FP comparison operator.
5257    E    Print 32 - constant, for SImode shifts as extract.
5258    e    Print 64 - constant, for DImode rotates.
5259    F	A floating point constant 0.0 emitted as f0, or 1.0 emitted as f1, or
5260         a floating point register emitted normally.
5261    G	A floating point constant.
5262    I	Invert a predicate register by adding 1.
5263    J    Select the proper predicate register for a condition.
5264    j    Select the inverse predicate register for a condition.
5265    O	Append .acq for volatile load.
5266    P	Postincrement of a MEM.
5267    Q	Append .rel for volatile store.
5268    R	Print .s .d or nothing for a single, double or no truncation.
5269    S	Shift amount for shladd instruction.
5270    T	Print an 8-bit sign extended number (K) as a 32-bit unsigned number
5271 	for Intel assembler.
5272    U	Print an 8-bit sign extended number (K) as a 64-bit unsigned number
5273 	for Intel assembler.
5274    X	A pair of floating point registers.
5275    r	Print register name, or constant 0 as r0.  HP compatibility for
5276 	Linux kernel.
5277    v    Print vector constant value as an 8-byte integer value.  */
5278 
5279 static void
ia64_print_operand(FILE * file,rtx x,int code)5280 ia64_print_operand (FILE * file, rtx x, int code)
5281 {
5282   const char *str;
5283 
5284   switch (code)
5285     {
5286     case 0:
5287       /* Handled below.  */
5288       break;
5289 
5290     case 'C':
5291       {
5292 	enum rtx_code c = swap_condition (GET_CODE (x));
5293 	fputs (GET_RTX_NAME (c), file);
5294 	return;
5295       }
5296 
5297     case 'D':
5298       switch (GET_CODE (x))
5299 	{
5300 	case NE:
5301 	  str = "neq";
5302 	  break;
5303 	case UNORDERED:
5304 	  str = "unord";
5305 	  break;
5306 	case ORDERED:
5307 	  str = "ord";
5308 	  break;
5309 	case UNLT:
5310 	  str = "nge";
5311 	  break;
5312 	case UNLE:
5313 	  str = "ngt";
5314 	  break;
5315 	case UNGT:
5316 	  str = "nle";
5317 	  break;
5318 	case UNGE:
5319 	  str = "nlt";
5320 	  break;
5321 	case UNEQ:
5322 	case LTGT:
5323 	  gcc_unreachable ();
5324 	default:
5325 	  str = GET_RTX_NAME (GET_CODE (x));
5326 	  break;
5327 	}
5328       fputs (str, file);
5329       return;
5330 
5331     case 'E':
5332       fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - INTVAL (x));
5333       return;
5334 
5335     case 'e':
5336       fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - INTVAL (x));
5337       return;
5338 
5339     case 'F':
5340       if (x == CONST0_RTX (GET_MODE (x)))
5341 	str = reg_names [FR_REG (0)];
5342       else if (x == CONST1_RTX (GET_MODE (x)))
5343 	str = reg_names [FR_REG (1)];
5344       else
5345 	{
5346 	  gcc_assert (GET_CODE (x) == REG);
5347 	  str = reg_names [REGNO (x)];
5348 	}
5349       fputs (str, file);
5350       return;
5351 
5352     case 'G':
5353       {
5354 	long val[4];
5355 	real_to_target (val, CONST_DOUBLE_REAL_VALUE (x), GET_MODE (x));
5356 	if (GET_MODE (x) == SFmode)
5357 	  fprintf (file, "0x%08lx", val[0] & 0xffffffff);
5358 	else if (GET_MODE (x) == DFmode)
5359 	  fprintf (file, "0x%08lx%08lx", (WORDS_BIG_ENDIAN ? val[0] : val[1])
5360 					  & 0xffffffff,
5361 					 (WORDS_BIG_ENDIAN ? val[1] : val[0])
5362 					  & 0xffffffff);
5363 	else
5364 	  output_operand_lossage ("invalid %%G mode");
5365       }
5366       return;
5367 
5368     case 'I':
5369       fputs (reg_names [REGNO (x) + 1], file);
5370       return;
5371 
5372     case 'J':
5373     case 'j':
5374       {
5375 	unsigned int regno = REGNO (XEXP (x, 0));
5376 	if (GET_CODE (x) == EQ)
5377 	  regno += 1;
5378 	if (code == 'j')
5379 	  regno ^= 1;
5380         fputs (reg_names [regno], file);
5381       }
5382       return;
5383 
5384     case 'O':
5385       if (MEM_VOLATILE_P (x))
5386 	fputs(".acq", file);
5387       return;
5388 
5389     case 'P':
5390       {
5391 	HOST_WIDE_INT value;
5392 
5393 	switch (GET_CODE (XEXP (x, 0)))
5394 	  {
5395 	  default:
5396 	    return;
5397 
5398 	  case POST_MODIFY:
5399 	    x = XEXP (XEXP (XEXP (x, 0), 1), 1);
5400 	    if (GET_CODE (x) == CONST_INT)
5401 	      value = INTVAL (x);
5402 	    else
5403 	      {
5404 		gcc_assert (GET_CODE (x) == REG);
5405 		fprintf (file, ", %s", reg_names[REGNO (x)]);
5406 		return;
5407 	      }
5408 	    break;
5409 
5410 	  case POST_INC:
5411 	    value = GET_MODE_SIZE (GET_MODE (x));
5412 	    break;
5413 
5414 	  case POST_DEC:
5415 	    value = - (HOST_WIDE_INT) GET_MODE_SIZE (GET_MODE (x));
5416 	    break;
5417 	  }
5418 
5419 	fprintf (file, ", " HOST_WIDE_INT_PRINT_DEC, value);
5420 	return;
5421       }
5422 
5423     case 'Q':
5424       if (MEM_VOLATILE_P (x))
5425 	fputs(".rel", file);
5426       return;
5427 
5428     case 'R':
5429       if (x == CONST0_RTX (GET_MODE (x)))
5430 	fputs(".s", file);
5431       else if (x == CONST1_RTX (GET_MODE (x)))
5432 	fputs(".d", file);
5433       else if (x == CONST2_RTX (GET_MODE (x)))
5434 	;
5435       else
5436 	output_operand_lossage ("invalid %%R value");
5437       return;
5438 
5439     case 'S':
5440       fprintf (file, "%d", exact_log2 (INTVAL (x)));
5441       return;
5442 
5443     case 'T':
5444       if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
5445 	{
5446 	  fprintf (file, "0x%x", (int) INTVAL (x) & 0xffffffff);
5447 	  return;
5448 	}
5449       break;
5450 
5451     case 'U':
5452       if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
5453 	{
5454 	  const char *prefix = "0x";
5455 	  if (INTVAL (x) & 0x80000000)
5456 	    {
5457 	      fprintf (file, "0xffffffff");
5458 	      prefix = "";
5459 	    }
5460 	  fprintf (file, "%s%x", prefix, (int) INTVAL (x) & 0xffffffff);
5461 	  return;
5462 	}
5463       break;
5464 
5465     case 'X':
5466       {
5467 	unsigned int regno = REGNO (x);
5468 	fprintf (file, "%s, %s", reg_names [regno], reg_names [regno + 1]);
5469       }
5470       return;
5471 
5472     case 'r':
5473       /* If this operand is the constant zero, write it as register zero.
5474 	 Any register, zero, or CONST_INT value is OK here.  */
5475       if (GET_CODE (x) == REG)
5476 	fputs (reg_names[REGNO (x)], file);
5477       else if (x == CONST0_RTX (GET_MODE (x)))
5478 	fputs ("r0", file);
5479       else if (GET_CODE (x) == CONST_INT)
5480 	output_addr_const (file, x);
5481       else
5482 	output_operand_lossage ("invalid %%r value");
5483       return;
5484 
5485     case 'v':
5486       gcc_assert (GET_CODE (x) == CONST_VECTOR);
5487       x = simplify_subreg (DImode, x, GET_MODE (x), 0);
5488       break;
5489 
5490     case '+':
5491       {
5492 	const char *which;
5493 
5494 	/* For conditional branches, returns or calls, substitute
5495 	   sptk, dptk, dpnt, or spnt for %s.  */
5496 	x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
5497 	if (x)
5498 	  {
5499 	    int pred_val = XINT (x, 0);
5500 
5501 	    /* Guess top and bottom 10% statically predicted.  */
5502 	    if (pred_val < REG_BR_PROB_BASE / 50
5503 		&& br_prob_note_reliable_p (x))
5504 	      which = ".spnt";
5505 	    else if (pred_val < REG_BR_PROB_BASE / 2)
5506 	      which = ".dpnt";
5507 	    else if (pred_val < REG_BR_PROB_BASE / 100 * 98
5508 		     || !br_prob_note_reliable_p (x))
5509 	      which = ".dptk";
5510 	    else
5511 	      which = ".sptk";
5512 	  }
5513 	else if (CALL_P (current_output_insn))
5514 	  which = ".sptk";
5515 	else
5516 	  which = ".dptk";
5517 
5518 	fputs (which, file);
5519 	return;
5520       }
5521 
5522     case ',':
5523       x = current_insn_predicate;
5524       if (x)
5525 	{
5526 	  unsigned int regno = REGNO (XEXP (x, 0));
5527 	  if (GET_CODE (x) == EQ)
5528 	    regno += 1;
5529           fprintf (file, "(%s) ", reg_names [regno]);
5530 	}
5531       return;
5532 
5533     default:
5534       output_operand_lossage ("ia64_print_operand: unknown code");
5535       return;
5536     }
5537 
5538   switch (GET_CODE (x))
5539     {
5540       /* This happens for the spill/restore instructions.  */
5541     case POST_INC:
5542     case POST_DEC:
5543     case POST_MODIFY:
5544       x = XEXP (x, 0);
5545       /* ... fall through ...  */
5546 
5547     case REG:
5548       fputs (reg_names [REGNO (x)], file);
5549       break;
5550 
5551     case MEM:
5552       {
5553 	rtx addr = XEXP (x, 0);
5554 	if (GET_RTX_CLASS (GET_CODE (addr)) == RTX_AUTOINC)
5555 	  addr = XEXP (addr, 0);
5556 	fprintf (file, "[%s]", reg_names [REGNO (addr)]);
5557 	break;
5558       }
5559 
5560     default:
5561       output_addr_const (file, x);
5562       break;
5563     }
5564 
5565   return;
5566 }
5567 
5568 /* Worker function for TARGET_PRINT_OPERAND_PUNCT_VALID_P.  */
5569 
5570 static bool
ia64_print_operand_punct_valid_p(unsigned char code)5571 ia64_print_operand_punct_valid_p (unsigned char code)
5572 {
5573   return (code == '+' || code == ',');
5574 }
5575 
5576 /* Compute a (partial) cost for rtx X.  Return true if the complete
5577    cost has been computed, and false if subexpressions should be
5578    scanned.  In either case, *TOTAL contains the cost result.  */
5579 /* ??? This is incomplete.  */
5580 
5581 static bool
ia64_rtx_costs(rtx x,machine_mode mode,int outer_code,int opno ATTRIBUTE_UNUSED,int * total,bool speed ATTRIBUTE_UNUSED)5582 ia64_rtx_costs (rtx x, machine_mode mode, int outer_code,
5583 		int opno ATTRIBUTE_UNUSED,
5584 		int *total, bool speed ATTRIBUTE_UNUSED)
5585 {
5586   int code = GET_CODE (x);
5587 
5588   switch (code)
5589     {
5590     case CONST_INT:
5591       switch (outer_code)
5592         {
5593         case SET:
5594 	  *total = satisfies_constraint_J (x) ? 0 : COSTS_N_INSNS (1);
5595 	  return true;
5596         case PLUS:
5597 	  if (satisfies_constraint_I (x))
5598 	    *total = 0;
5599 	  else if (satisfies_constraint_J (x))
5600 	    *total = 1;
5601 	  else
5602 	    *total = COSTS_N_INSNS (1);
5603 	  return true;
5604         default:
5605 	  if (satisfies_constraint_K (x) || satisfies_constraint_L (x))
5606 	    *total = 0;
5607 	  else
5608 	    *total = COSTS_N_INSNS (1);
5609 	  return true;
5610 	}
5611 
5612     case CONST_DOUBLE:
5613       *total = COSTS_N_INSNS (1);
5614       return true;
5615 
5616     case CONST:
5617     case SYMBOL_REF:
5618     case LABEL_REF:
5619       *total = COSTS_N_INSNS (3);
5620       return true;
5621 
5622     case FMA:
5623       *total = COSTS_N_INSNS (4);
5624       return true;
5625 
5626     case MULT:
5627       /* For multiplies wider than HImode, we have to go to the FPU,
5628          which normally involves copies.  Plus there's the latency
5629          of the multiply itself, and the latency of the instructions to
5630          transfer integer regs to FP regs.  */
5631       if (FLOAT_MODE_P (mode))
5632 	*total = COSTS_N_INSNS (4);
5633       else if (GET_MODE_SIZE (mode) > 2)
5634         *total = COSTS_N_INSNS (10);
5635       else
5636 	*total = COSTS_N_INSNS (2);
5637       return true;
5638 
5639     case PLUS:
5640     case MINUS:
5641       if (FLOAT_MODE_P (mode))
5642 	{
5643 	  *total = COSTS_N_INSNS (4);
5644 	  return true;
5645 	}
5646       /* FALLTHRU */
5647 
5648     case ASHIFT:
5649     case ASHIFTRT:
5650     case LSHIFTRT:
5651       *total = COSTS_N_INSNS (1);
5652       return true;
5653 
5654     case DIV:
5655     case UDIV:
5656     case MOD:
5657     case UMOD:
5658       /* We make divide expensive, so that divide-by-constant will be
5659          optimized to a multiply.  */
5660       *total = COSTS_N_INSNS (60);
5661       return true;
5662 
5663     default:
5664       return false;
5665     }
5666 }
5667 
5668 /* Calculate the cost of moving data from a register in class FROM to
5669    one in class TO, using MODE.  */
5670 
5671 static int
ia64_register_move_cost(machine_mode mode,reg_class_t from,reg_class_t to)5672 ia64_register_move_cost (machine_mode mode, reg_class_t from,
5673 			 reg_class_t to)
5674 {
5675   /* ADDL_REGS is the same as GR_REGS for movement purposes.  */
5676   if (to == ADDL_REGS)
5677     to = GR_REGS;
5678   if (from == ADDL_REGS)
5679     from = GR_REGS;
5680 
5681   /* All costs are symmetric, so reduce cases by putting the
5682      lower number class as the destination.  */
5683   if (from < to)
5684     {
5685       reg_class_t tmp = to;
5686       to = from, from = tmp;
5687     }
5688 
5689   /* Moving from FR<->GR in XFmode must be more expensive than 2,
5690      so that we get secondary memory reloads.  Between FR_REGS,
5691      we have to make this at least as expensive as memory_move_cost
5692      to avoid spectacularly poor register class preferencing.  */
5693   if (mode == XFmode || mode == RFmode)
5694     {
5695       if (to != GR_REGS || from != GR_REGS)
5696         return memory_move_cost (mode, to, false);
5697       else
5698 	return 3;
5699     }
5700 
5701   switch (to)
5702     {
5703     case PR_REGS:
5704       /* Moving between PR registers takes two insns.  */
5705       if (from == PR_REGS)
5706 	return 3;
5707       /* Moving between PR and anything but GR is impossible.  */
5708       if (from != GR_REGS)
5709 	return memory_move_cost (mode, to, false);
5710       break;
5711 
5712     case BR_REGS:
5713       /* Moving between BR and anything but GR is impossible.  */
5714       if (from != GR_REGS && from != GR_AND_BR_REGS)
5715 	return memory_move_cost (mode, to, false);
5716       break;
5717 
5718     case AR_I_REGS:
5719     case AR_M_REGS:
5720       /* Moving between AR and anything but GR is impossible.  */
5721       if (from != GR_REGS)
5722 	return memory_move_cost (mode, to, false);
5723       break;
5724 
5725     case GR_REGS:
5726     case FR_REGS:
5727     case FP_REGS:
5728     case GR_AND_FR_REGS:
5729     case GR_AND_BR_REGS:
5730     case ALL_REGS:
5731       break;
5732 
5733     default:
5734       gcc_unreachable ();
5735     }
5736 
5737   return 2;
5738 }
5739 
5740 /* Calculate the cost of moving data of MODE from a register to or from
5741    memory.  */
5742 
5743 static int
ia64_memory_move_cost(machine_mode mode ATTRIBUTE_UNUSED,reg_class_t rclass,bool in ATTRIBUTE_UNUSED)5744 ia64_memory_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
5745 		       reg_class_t rclass,
5746 		       bool in ATTRIBUTE_UNUSED)
5747 {
5748   if (rclass == GENERAL_REGS
5749       || rclass == FR_REGS
5750       || rclass == FP_REGS
5751       || rclass == GR_AND_FR_REGS)
5752     return 4;
5753   else
5754     return 10;
5755 }
5756 
5757 /* Implement TARGET_PREFERRED_RELOAD_CLASS.  Place additional restrictions
5758    on RCLASS to use when copying X into that class.  */
5759 
5760 static reg_class_t
ia64_preferred_reload_class(rtx x,reg_class_t rclass)5761 ia64_preferred_reload_class (rtx x, reg_class_t rclass)
5762 {
5763   switch (rclass)
5764     {
5765     case FR_REGS:
5766     case FP_REGS:
5767       /* Don't allow volatile mem reloads into floating point registers.
5768 	 This is defined to force reload to choose the r/m case instead
5769 	 of the f/f case when reloading (set (reg fX) (mem/v)).  */
5770       if (MEM_P (x) && MEM_VOLATILE_P (x))
5771 	return NO_REGS;
5772 
5773       /* Force all unrecognized constants into the constant pool.  */
5774       if (CONSTANT_P (x))
5775 	return NO_REGS;
5776       break;
5777 
5778     case AR_M_REGS:
5779     case AR_I_REGS:
5780       if (!OBJECT_P (x))
5781 	return NO_REGS;
5782       break;
5783 
5784     default:
5785       break;
5786     }
5787 
5788   return rclass;
5789 }
5790 
5791 /* This function returns the register class required for a secondary
5792    register when copying between one of the registers in RCLASS, and X,
5793    using MODE.  A return value of NO_REGS means that no secondary register
5794    is required.  */
5795 
5796 enum reg_class
ia64_secondary_reload_class(enum reg_class rclass,machine_mode mode ATTRIBUTE_UNUSED,rtx x)5797 ia64_secondary_reload_class (enum reg_class rclass,
5798 			     machine_mode mode ATTRIBUTE_UNUSED, rtx x)
5799 {
5800   int regno = -1;
5801 
5802   if (GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
5803     regno = true_regnum (x);
5804 
5805   switch (rclass)
5806     {
5807     case BR_REGS:
5808     case AR_M_REGS:
5809     case AR_I_REGS:
5810       /* ??? BR<->BR register copies can happen due to a bad gcse/cse/global
5811 	 interaction.  We end up with two pseudos with overlapping lifetimes
5812 	 both of which are equiv to the same constant, and both which need
5813 	 to be in BR_REGS.  This seems to be a cse bug.  cse_basic_block_end
5814 	 changes depending on the path length, which means the qty_first_reg
5815 	 check in make_regs_eqv can give different answers at different times.
5816 	 At some point I'll probably need a reload_indi pattern to handle
5817 	 this.
5818 
5819 	 We can also get GR_AND_FR_REGS to BR_REGS/AR_REGS copies, where we
5820 	 wound up with a FP register from GR_AND_FR_REGS.  Extend that to all
5821 	 non-general registers for good measure.  */
5822       if (regno >= 0 && ! GENERAL_REGNO_P (regno))
5823 	return GR_REGS;
5824 
5825       /* This is needed if a pseudo used as a call_operand gets spilled to a
5826 	 stack slot.  */
5827       if (GET_CODE (x) == MEM)
5828 	return GR_REGS;
5829       break;
5830 
5831     case FR_REGS:
5832     case FP_REGS:
5833       /* Need to go through general registers to get to other class regs.  */
5834       if (regno >= 0 && ! (FR_REGNO_P (regno) || GENERAL_REGNO_P (regno)))
5835 	return GR_REGS;
5836 
5837       /* This can happen when a paradoxical subreg is an operand to the
5838 	 muldi3 pattern.  */
5839       /* ??? This shouldn't be necessary after instruction scheduling is
5840 	 enabled, because paradoxical subregs are not accepted by
5841 	 register_operand when INSN_SCHEDULING is defined.  Or alternatively,
5842 	 stop the paradoxical subreg stupidity in the *_operand functions
5843 	 in recog.c.  */
5844       if (GET_CODE (x) == MEM
5845 	  && (GET_MODE (x) == SImode || GET_MODE (x) == HImode
5846 	      || GET_MODE (x) == QImode))
5847 	return GR_REGS;
5848 
5849       /* This can happen because of the ior/and/etc patterns that accept FP
5850 	 registers as operands.  If the third operand is a constant, then it
5851 	 needs to be reloaded into a FP register.  */
5852       if (GET_CODE (x) == CONST_INT)
5853 	return GR_REGS;
5854 
5855       /* This can happen because of register elimination in a muldi3 insn.
5856 	 E.g. `26107 * (unsigned long)&u'.  */
5857       if (GET_CODE (x) == PLUS)
5858 	return GR_REGS;
5859       break;
5860 
5861     case PR_REGS:
5862       /* ??? This happens if we cse/gcse a BImode value across a call,
5863 	 and the function has a nonlocal goto.  This is because global
5864 	 does not allocate call crossing pseudos to hard registers when
5865 	 crtl->has_nonlocal_goto is true.  This is relatively
5866 	 common for C++ programs that use exceptions.  To reproduce,
5867 	 return NO_REGS and compile libstdc++.  */
5868       if (GET_CODE (x) == MEM)
5869 	return GR_REGS;
5870 
5871       /* This can happen when we take a BImode subreg of a DImode value,
5872 	 and that DImode value winds up in some non-GR register.  */
5873       if (regno >= 0 && ! GENERAL_REGNO_P (regno) && ! PR_REGNO_P (regno))
5874 	return GR_REGS;
5875       break;
5876 
5877     default:
5878       break;
5879     }
5880 
5881   return NO_REGS;
5882 }
5883 
5884 
5885 /* Implement targetm.unspec_may_trap_p hook.  */
5886 static int
ia64_unspec_may_trap_p(const_rtx x,unsigned flags)5887 ia64_unspec_may_trap_p (const_rtx x, unsigned flags)
5888 {
5889   switch (XINT (x, 1))
5890     {
5891     case UNSPEC_LDA:
5892     case UNSPEC_LDS:
5893     case UNSPEC_LDSA:
5894     case UNSPEC_LDCCLR:
5895     case UNSPEC_CHKACLR:
5896     case UNSPEC_CHKS:
5897       /* These unspecs are just wrappers.  */
5898       return may_trap_p_1 (XVECEXP (x, 0, 0), flags);
5899     }
5900 
5901   return default_unspec_may_trap_p (x, flags);
5902 }
5903 
5904 
5905 /* Parse the -mfixed-range= option string.  */
5906 
5907 static void
fix_range(const char * const_str)5908 fix_range (const char *const_str)
5909 {
5910   int i, first, last;
5911   char *str, *dash, *comma;
5912 
5913   /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
5914      REG2 are either register names or register numbers.  The effect
5915      of this option is to mark the registers in the range from REG1 to
5916      REG2 as ``fixed'' so they won't be used by the compiler.  This is
5917      used, e.g., to ensure that kernel mode code doesn't use f32-f127.  */
5918 
5919   i = strlen (const_str);
5920   str = (char *) alloca (i + 1);
5921   memcpy (str, const_str, i + 1);
5922 
5923   while (1)
5924     {
5925       dash = strchr (str, '-');
5926       if (!dash)
5927 	{
5928 	  warning (0, "value of -mfixed-range must have form REG1-REG2");
5929 	  return;
5930 	}
5931       *dash = '\0';
5932 
5933       comma = strchr (dash + 1, ',');
5934       if (comma)
5935 	*comma = '\0';
5936 
5937       first = decode_reg_name (str);
5938       if (first < 0)
5939 	{
5940 	  warning (0, "unknown register name: %s", str);
5941 	  return;
5942 	}
5943 
5944       last = decode_reg_name (dash + 1);
5945       if (last < 0)
5946 	{
5947 	  warning (0, "unknown register name: %s", dash + 1);
5948 	  return;
5949 	}
5950 
5951       *dash = '-';
5952 
5953       if (first > last)
5954 	{
5955 	  warning (0, "%s-%s is an empty range", str, dash + 1);
5956 	  return;
5957 	}
5958 
5959       for (i = first; i <= last; ++i)
5960 	fixed_regs[i] = call_used_regs[i] = 1;
5961 
5962       if (!comma)
5963 	break;
5964 
5965       *comma = ',';
5966       str = comma + 1;
5967     }
5968 }
5969 
5970 /* Implement TARGET_OPTION_OVERRIDE.  */
5971 
5972 static void
ia64_option_override(void)5973 ia64_option_override (void)
5974 {
5975   unsigned int i;
5976   cl_deferred_option *opt;
5977   vec<cl_deferred_option> *v
5978     = (vec<cl_deferred_option> *) ia64_deferred_options;
5979 
5980   if (v)
5981     FOR_EACH_VEC_ELT (*v, i, opt)
5982       {
5983 	switch (opt->opt_index)
5984 	  {
5985 	  case OPT_mfixed_range_:
5986 	    fix_range (opt->arg);
5987 	    break;
5988 
5989 	  default:
5990 	    gcc_unreachable ();
5991 	  }
5992       }
5993 
5994   if (TARGET_AUTO_PIC)
5995     target_flags |= MASK_CONST_GP;
5996 
5997   /* Numerous experiment shows that IRA based loop pressure
5998      calculation works better for RTL loop invariant motion on targets
5999      with enough (>= 32) registers.  It is an expensive optimization.
6000      So it is on only for peak performance.  */
6001   if (optimize >= 3)
6002     flag_ira_loop_pressure = 1;
6003 
6004 
6005   ia64_section_threshold = (global_options_set.x_g_switch_value
6006 			    ? g_switch_value
6007 			    : IA64_DEFAULT_GVALUE);
6008 
6009   init_machine_status = ia64_init_machine_status;
6010 
6011   if (align_functions <= 0)
6012     align_functions = 64;
6013   if (align_loops <= 0)
6014     align_loops = 32;
6015   if (TARGET_ABI_OPEN_VMS)
6016     flag_no_common = 1;
6017 
6018   ia64_override_options_after_change();
6019 }
6020 
6021 /* Implement targetm.override_options_after_change.  */
6022 
6023 static void
ia64_override_options_after_change(void)6024 ia64_override_options_after_change (void)
6025 {
6026   if (optimize >= 3
6027       && !global_options_set.x_flag_selective_scheduling
6028       && !global_options_set.x_flag_selective_scheduling2)
6029     {
6030       flag_selective_scheduling2 = 1;
6031       flag_sel_sched_pipelining = 1;
6032     }
6033   if (mflag_sched_control_spec == 2)
6034     {
6035       /* Control speculation is on by default for the selective scheduler,
6036          but not for the Haifa scheduler.  */
6037       mflag_sched_control_spec = flag_selective_scheduling2 ? 1 : 0;
6038     }
6039   if (flag_sel_sched_pipelining && flag_auto_inc_dec)
6040     {
6041       /* FIXME: remove this when we'd implement breaking autoinsns as
6042          a transformation.  */
6043       flag_auto_inc_dec = 0;
6044     }
6045 }
6046 
6047 /* Initialize the record of emitted frame related registers.  */
6048 
ia64_init_expanders(void)6049 void ia64_init_expanders (void)
6050 {
6051   memset (&emitted_frame_related_regs, 0, sizeof (emitted_frame_related_regs));
6052 }
6053 
6054 static struct machine_function *
ia64_init_machine_status(void)6055 ia64_init_machine_status (void)
6056 {
6057   return ggc_cleared_alloc<machine_function> ();
6058 }
6059 
6060 static enum attr_itanium_class ia64_safe_itanium_class (rtx_insn *);
6061 static enum attr_type ia64_safe_type (rtx_insn *);
6062 
6063 static enum attr_itanium_class
ia64_safe_itanium_class(rtx_insn * insn)6064 ia64_safe_itanium_class (rtx_insn *insn)
6065 {
6066   if (recog_memoized (insn) >= 0)
6067     return get_attr_itanium_class (insn);
6068   else if (DEBUG_INSN_P (insn))
6069     return ITANIUM_CLASS_IGNORE;
6070   else
6071     return ITANIUM_CLASS_UNKNOWN;
6072 }
6073 
6074 static enum attr_type
ia64_safe_type(rtx_insn * insn)6075 ia64_safe_type (rtx_insn *insn)
6076 {
6077   if (recog_memoized (insn) >= 0)
6078     return get_attr_type (insn);
6079   else
6080     return TYPE_UNKNOWN;
6081 }
6082 
6083 /* The following collection of routines emit instruction group stop bits as
6084    necessary to avoid dependencies.  */
6085 
6086 /* Need to track some additional registers as far as serialization is
6087    concerned so we can properly handle br.call and br.ret.  We could
6088    make these registers visible to gcc, but since these registers are
6089    never explicitly used in gcc generated code, it seems wasteful to
6090    do so (plus it would make the call and return patterns needlessly
6091    complex).  */
6092 #define REG_RP		(BR_REG (0))
6093 #define REG_AR_CFM	(FIRST_PSEUDO_REGISTER + 1)
6094 /* This is used for volatile asms which may require a stop bit immediately
6095    before and after them.  */
6096 #define REG_VOLATILE	(FIRST_PSEUDO_REGISTER + 2)
6097 #define AR_UNAT_BIT_0	(FIRST_PSEUDO_REGISTER + 3)
6098 #define NUM_REGS	(AR_UNAT_BIT_0 + 64)
6099 
6100 /* For each register, we keep track of how it has been written in the
6101    current instruction group.
6102 
6103    If a register is written unconditionally (no qualifying predicate),
6104    WRITE_COUNT is set to 2 and FIRST_PRED is ignored.
6105 
6106    If a register is written if its qualifying predicate P is true, we
6107    set WRITE_COUNT to 1 and FIRST_PRED to P.  Later on, the same register
6108    may be written again by the complement of P (P^1) and when this happens,
6109    WRITE_COUNT gets set to 2.
6110 
6111    The result of this is that whenever an insn attempts to write a register
6112    whose WRITE_COUNT is two, we need to issue an insn group barrier first.
6113 
6114    If a predicate register is written by a floating-point insn, we set
6115    WRITTEN_BY_FP to true.
6116 
6117    If a predicate register is written by an AND.ORCM we set WRITTEN_BY_AND
6118    to true; if it was written by an OR.ANDCM we set WRITTEN_BY_OR to true.  */
6119 
6120 #if GCC_VERSION >= 4000
6121 #define RWS_FIELD_TYPE __extension__ unsigned short
6122 #else
6123 #define RWS_FIELD_TYPE unsigned int
6124 #endif
6125 struct reg_write_state
6126 {
6127   RWS_FIELD_TYPE write_count : 2;
6128   RWS_FIELD_TYPE first_pred : 10;
6129   RWS_FIELD_TYPE written_by_fp : 1;
6130   RWS_FIELD_TYPE written_by_and : 1;
6131   RWS_FIELD_TYPE written_by_or : 1;
6132 };
6133 
6134 /* Cumulative info for the current instruction group.  */
6135 struct reg_write_state rws_sum[NUM_REGS];
6136 #if CHECKING_P
6137 /* Bitmap whether a register has been written in the current insn.  */
6138 HARD_REG_ELT_TYPE rws_insn[(NUM_REGS + HOST_BITS_PER_WIDEST_FAST_INT - 1)
6139 			   / HOST_BITS_PER_WIDEST_FAST_INT];
6140 
6141 static inline void
rws_insn_set(int regno)6142 rws_insn_set (int regno)
6143 {
6144   gcc_assert (!TEST_HARD_REG_BIT (rws_insn, regno));
6145   SET_HARD_REG_BIT (rws_insn, regno);
6146 }
6147 
6148 static inline int
rws_insn_test(int regno)6149 rws_insn_test (int regno)
6150 {
6151   return TEST_HARD_REG_BIT (rws_insn, regno);
6152 }
6153 #else
6154 /* When not checking, track just REG_AR_CFM and REG_VOLATILE.  */
6155 unsigned char rws_insn[2];
6156 
6157 static inline void
rws_insn_set(int regno)6158 rws_insn_set (int regno)
6159 {
6160   if (regno == REG_AR_CFM)
6161     rws_insn[0] = 1;
6162   else if (regno == REG_VOLATILE)
6163     rws_insn[1] = 1;
6164 }
6165 
6166 static inline int
rws_insn_test(int regno)6167 rws_insn_test (int regno)
6168 {
6169   if (regno == REG_AR_CFM)
6170     return rws_insn[0];
6171   if (regno == REG_VOLATILE)
6172     return rws_insn[1];
6173   return 0;
6174 }
6175 #endif
6176 
6177 /* Indicates whether this is the first instruction after a stop bit,
6178    in which case we don't need another stop bit.  Without this,
6179    ia64_variable_issue will die when scheduling an alloc.  */
6180 static int first_instruction;
6181 
6182 /* Misc flags needed to compute RAW/WAW dependencies while we are traversing
6183    RTL for one instruction.  */
6184 struct reg_flags
6185 {
6186   unsigned int is_write : 1;	/* Is register being written?  */
6187   unsigned int is_fp : 1;	/* Is register used as part of an fp op?  */
6188   unsigned int is_branch : 1;	/* Is register used as part of a branch?  */
6189   unsigned int is_and : 1;	/* Is register used as part of and.orcm?  */
6190   unsigned int is_or : 1;	/* Is register used as part of or.andcm?  */
6191   unsigned int is_sibcall : 1;	/* Is this a sibling or normal call?  */
6192 };
6193 
6194 static void rws_update (int, struct reg_flags, int);
6195 static int rws_access_regno (int, struct reg_flags, int);
6196 static int rws_access_reg (rtx, struct reg_flags, int);
6197 static void update_set_flags (rtx, struct reg_flags *);
6198 static int set_src_needs_barrier (rtx, struct reg_flags, int);
6199 static int rtx_needs_barrier (rtx, struct reg_flags, int);
6200 static void init_insn_group_barriers (void);
6201 static int group_barrier_needed (rtx_insn *);
6202 static int safe_group_barrier_needed (rtx_insn *);
6203 static int in_safe_group_barrier;
6204 
6205 /* Update *RWS for REGNO, which is being written by the current instruction,
6206    with predicate PRED, and associated register flags in FLAGS.  */
6207 
6208 static void
rws_update(int regno,struct reg_flags flags,int pred)6209 rws_update (int regno, struct reg_flags flags, int pred)
6210 {
6211   if (pred)
6212     rws_sum[regno].write_count++;
6213   else
6214     rws_sum[regno].write_count = 2;
6215   rws_sum[regno].written_by_fp |= flags.is_fp;
6216   /* ??? Not tracking and/or across differing predicates.  */
6217   rws_sum[regno].written_by_and = flags.is_and;
6218   rws_sum[regno].written_by_or = flags.is_or;
6219   rws_sum[regno].first_pred = pred;
6220 }
6221 
6222 /* Handle an access to register REGNO of type FLAGS using predicate register
6223    PRED.  Update rws_sum array.  Return 1 if this access creates
6224    a dependency with an earlier instruction in the same group.  */
6225 
6226 static int
rws_access_regno(int regno,struct reg_flags flags,int pred)6227 rws_access_regno (int regno, struct reg_flags flags, int pred)
6228 {
6229   int need_barrier = 0;
6230 
6231   gcc_assert (regno < NUM_REGS);
6232 
6233   if (! PR_REGNO_P (regno))
6234     flags.is_and = flags.is_or = 0;
6235 
6236   if (flags.is_write)
6237     {
6238       int write_count;
6239 
6240       rws_insn_set (regno);
6241       write_count = rws_sum[regno].write_count;
6242 
6243       switch (write_count)
6244 	{
6245 	case 0:
6246 	  /* The register has not been written yet.  */
6247 	  if (!in_safe_group_barrier)
6248 	    rws_update (regno, flags, pred);
6249 	  break;
6250 
6251 	case 1:
6252 	  /* The register has been written via a predicate.  Treat
6253 	     it like a unconditional write and do not try to check
6254 	     for complementary pred reg in earlier write.  */
6255 	  if (flags.is_and && rws_sum[regno].written_by_and)
6256 	    ;
6257 	  else if (flags.is_or && rws_sum[regno].written_by_or)
6258 	    ;
6259 	  else
6260 	    need_barrier = 1;
6261 	  if (!in_safe_group_barrier)
6262 	    rws_update (regno, flags, pred);
6263 	  break;
6264 
6265 	case 2:
6266 	  /* The register has been unconditionally written already.  We
6267 	     need a barrier.  */
6268 	  if (flags.is_and && rws_sum[regno].written_by_and)
6269 	    ;
6270 	  else if (flags.is_or && rws_sum[regno].written_by_or)
6271 	    ;
6272 	  else
6273 	    need_barrier = 1;
6274 	  if (!in_safe_group_barrier)
6275 	    {
6276 	      rws_sum[regno].written_by_and = flags.is_and;
6277 	      rws_sum[regno].written_by_or = flags.is_or;
6278 	    }
6279 	  break;
6280 
6281 	default:
6282 	  gcc_unreachable ();
6283 	}
6284     }
6285   else
6286     {
6287       if (flags.is_branch)
6288 	{
6289 	  /* Branches have several RAW exceptions that allow to avoid
6290 	     barriers.  */
6291 
6292 	  if (REGNO_REG_CLASS (regno) == BR_REGS || regno == AR_PFS_REGNUM)
6293 	    /* RAW dependencies on branch regs are permissible as long
6294 	       as the writer is a non-branch instruction.  Since we
6295 	       never generate code that uses a branch register written
6296 	       by a branch instruction, handling this case is
6297 	       easy.  */
6298 	    return 0;
6299 
6300 	  if (REGNO_REG_CLASS (regno) == PR_REGS
6301 	      && ! rws_sum[regno].written_by_fp)
6302 	    /* The predicates of a branch are available within the
6303 	       same insn group as long as the predicate was written by
6304 	       something other than a floating-point instruction.  */
6305 	    return 0;
6306 	}
6307 
6308       if (flags.is_and && rws_sum[regno].written_by_and)
6309 	return 0;
6310       if (flags.is_or && rws_sum[regno].written_by_or)
6311 	return 0;
6312 
6313       switch (rws_sum[regno].write_count)
6314 	{
6315 	case 0:
6316 	  /* The register has not been written yet.  */
6317 	  break;
6318 
6319 	case 1:
6320 	  /* The register has been written via a predicate, assume we
6321 	     need a barrier (don't check for complementary regs).  */
6322 	  need_barrier = 1;
6323 	  break;
6324 
6325 	case 2:
6326 	  /* The register has been unconditionally written already.  We
6327 	     need a barrier.  */
6328 	  need_barrier = 1;
6329 	  break;
6330 
6331 	default:
6332 	  gcc_unreachable ();
6333 	}
6334     }
6335 
6336   return need_barrier;
6337 }
6338 
6339 static int
rws_access_reg(rtx reg,struct reg_flags flags,int pred)6340 rws_access_reg (rtx reg, struct reg_flags flags, int pred)
6341 {
6342   int regno = REGNO (reg);
6343   int n = HARD_REGNO_NREGS (REGNO (reg), GET_MODE (reg));
6344 
6345   if (n == 1)
6346     return rws_access_regno (regno, flags, pred);
6347   else
6348     {
6349       int need_barrier = 0;
6350       while (--n >= 0)
6351 	need_barrier |= rws_access_regno (regno + n, flags, pred);
6352       return need_barrier;
6353     }
6354 }
6355 
6356 /* Examine X, which is a SET rtx, and update the flags, the predicate, and
6357    the condition, stored in *PFLAGS, *PPRED and *PCOND.  */
6358 
6359 static void
update_set_flags(rtx x,struct reg_flags * pflags)6360 update_set_flags (rtx x, struct reg_flags *pflags)
6361 {
6362   rtx src = SET_SRC (x);
6363 
6364   switch (GET_CODE (src))
6365     {
6366     case CALL:
6367       return;
6368 
6369     case IF_THEN_ELSE:
6370       /* There are four cases here:
6371 	 (1) The destination is (pc), in which case this is a branch,
6372 	 nothing here applies.
6373 	 (2) The destination is ar.lc, in which case this is a
6374 	 doloop_end_internal,
6375 	 (3) The destination is an fp register, in which case this is
6376 	 an fselect instruction.
6377 	 (4) The condition has (unspec [(reg)] UNSPEC_LDC), in which case
6378 	 this is a check load.
6379 	 In all cases, nothing we do in this function applies.  */
6380       return;
6381 
6382     default:
6383       if (COMPARISON_P (src)
6384 	  && SCALAR_FLOAT_MODE_P (GET_MODE (XEXP (src, 0))))
6385 	/* Set pflags->is_fp to 1 so that we know we're dealing
6386 	   with a floating point comparison when processing the
6387 	   destination of the SET.  */
6388 	pflags->is_fp = 1;
6389 
6390       /* Discover if this is a parallel comparison.  We only handle
6391 	 and.orcm and or.andcm at present, since we must retain a
6392 	 strict inverse on the predicate pair.  */
6393       else if (GET_CODE (src) == AND)
6394 	pflags->is_and = 1;
6395       else if (GET_CODE (src) == IOR)
6396 	pflags->is_or = 1;
6397 
6398       break;
6399     }
6400 }
6401 
6402 /* Subroutine of rtx_needs_barrier; this function determines whether the
6403    source of a given SET rtx found in X needs a barrier.  FLAGS and PRED
6404    are as in rtx_needs_barrier.  COND is an rtx that holds the condition
6405    for this insn.  */
6406 
6407 static int
set_src_needs_barrier(rtx x,struct reg_flags flags,int pred)6408 set_src_needs_barrier (rtx x, struct reg_flags flags, int pred)
6409 {
6410   int need_barrier = 0;
6411   rtx dst;
6412   rtx src = SET_SRC (x);
6413 
6414   if (GET_CODE (src) == CALL)
6415     /* We don't need to worry about the result registers that
6416        get written by subroutine call.  */
6417     return rtx_needs_barrier (src, flags, pred);
6418   else if (SET_DEST (x) == pc_rtx)
6419     {
6420       /* X is a conditional branch.  */
6421       /* ??? This seems redundant, as the caller sets this bit for
6422 	 all JUMP_INSNs.  */
6423       if (!ia64_spec_check_src_p (src))
6424 	flags.is_branch = 1;
6425       return rtx_needs_barrier (src, flags, pred);
6426     }
6427 
6428   if (ia64_spec_check_src_p (src))
6429     /* Avoid checking one register twice (in condition
6430        and in 'then' section) for ldc pattern.  */
6431     {
6432       gcc_assert (REG_P (XEXP (src, 2)));
6433       need_barrier = rtx_needs_barrier (XEXP (src, 2), flags, pred);
6434 
6435       /* We process MEM below.  */
6436       src = XEXP (src, 1);
6437     }
6438 
6439   need_barrier |= rtx_needs_barrier (src, flags, pred);
6440 
6441   dst = SET_DEST (x);
6442   if (GET_CODE (dst) == ZERO_EXTRACT)
6443     {
6444       need_barrier |= rtx_needs_barrier (XEXP (dst, 1), flags, pred);
6445       need_barrier |= rtx_needs_barrier (XEXP (dst, 2), flags, pred);
6446     }
6447   return need_barrier;
6448 }
6449 
6450 /* Handle an access to rtx X of type FLAGS using predicate register
6451    PRED.  Return 1 if this access creates a dependency with an earlier
6452    instruction in the same group.  */
6453 
6454 static int
rtx_needs_barrier(rtx x,struct reg_flags flags,int pred)6455 rtx_needs_barrier (rtx x, struct reg_flags flags, int pred)
6456 {
6457   int i, j;
6458   int is_complemented = 0;
6459   int need_barrier = 0;
6460   const char *format_ptr;
6461   struct reg_flags new_flags;
6462   rtx cond;
6463 
6464   if (! x)
6465     return 0;
6466 
6467   new_flags = flags;
6468 
6469   switch (GET_CODE (x))
6470     {
6471     case SET:
6472       update_set_flags (x, &new_flags);
6473       need_barrier = set_src_needs_barrier (x, new_flags, pred);
6474       if (GET_CODE (SET_SRC (x)) != CALL)
6475 	{
6476 	  new_flags.is_write = 1;
6477 	  need_barrier |= rtx_needs_barrier (SET_DEST (x), new_flags, pred);
6478 	}
6479       break;
6480 
6481     case CALL:
6482       new_flags.is_write = 0;
6483       need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
6484 
6485       /* Avoid multiple register writes, in case this is a pattern with
6486 	 multiple CALL rtx.  This avoids a failure in rws_access_reg.  */
6487       if (! flags.is_sibcall && ! rws_insn_test (REG_AR_CFM))
6488 	{
6489 	  new_flags.is_write = 1;
6490 	  need_barrier |= rws_access_regno (REG_RP, new_flags, pred);
6491 	  need_barrier |= rws_access_regno (AR_PFS_REGNUM, new_flags, pred);
6492 	  need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
6493 	}
6494       break;
6495 
6496     case COND_EXEC:
6497       /* X is a predicated instruction.  */
6498 
6499       cond = COND_EXEC_TEST (x);
6500       gcc_assert (!pred);
6501       need_barrier = rtx_needs_barrier (cond, flags, 0);
6502 
6503       if (GET_CODE (cond) == EQ)
6504 	is_complemented = 1;
6505       cond = XEXP (cond, 0);
6506       gcc_assert (GET_CODE (cond) == REG
6507 		  && REGNO_REG_CLASS (REGNO (cond)) == PR_REGS);
6508       pred = REGNO (cond);
6509       if (is_complemented)
6510 	++pred;
6511 
6512       need_barrier |= rtx_needs_barrier (COND_EXEC_CODE (x), flags, pred);
6513       return need_barrier;
6514 
6515     case CLOBBER:
6516     case USE:
6517       /* Clobber & use are for earlier compiler-phases only.  */
6518       break;
6519 
6520     case ASM_OPERANDS:
6521     case ASM_INPUT:
6522       /* We always emit stop bits for traditional asms.  We emit stop bits
6523 	 for volatile extended asms if TARGET_VOL_ASM_STOP is true.  */
6524       if (GET_CODE (x) != ASM_OPERANDS
6525 	  || (MEM_VOLATILE_P (x) && TARGET_VOL_ASM_STOP))
6526 	{
6527 	  /* Avoid writing the register multiple times if we have multiple
6528 	     asm outputs.  This avoids a failure in rws_access_reg.  */
6529 	  if (! rws_insn_test (REG_VOLATILE))
6530 	    {
6531 	      new_flags.is_write = 1;
6532 	      rws_access_regno (REG_VOLATILE, new_flags, pred);
6533 	    }
6534 	  return 1;
6535 	}
6536 
6537       /* For all ASM_OPERANDS, we must traverse the vector of input operands.
6538 	 We cannot just fall through here since then we would be confused
6539 	 by the ASM_INPUT rtx inside ASM_OPERANDS, which do not indicate
6540 	 traditional asms unlike their normal usage.  */
6541 
6542       for (i = ASM_OPERANDS_INPUT_LENGTH (x) - 1; i >= 0; --i)
6543 	if (rtx_needs_barrier (ASM_OPERANDS_INPUT (x, i), flags, pred))
6544 	  need_barrier = 1;
6545       break;
6546 
6547     case PARALLEL:
6548       for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
6549 	{
6550 	  rtx pat = XVECEXP (x, 0, i);
6551 	  switch (GET_CODE (pat))
6552 	    {
6553 	    case SET:
6554 	      update_set_flags (pat, &new_flags);
6555 	      need_barrier |= set_src_needs_barrier (pat, new_flags, pred);
6556 	      break;
6557 
6558 	    case USE:
6559 	    case CALL:
6560 	    case ASM_OPERANDS:
6561 	      need_barrier |= rtx_needs_barrier (pat, flags, pred);
6562 	      break;
6563 
6564 	    case CLOBBER:
6565 	      if (REG_P (XEXP (pat, 0))
6566 		  && extract_asm_operands (x) != NULL_RTX
6567 		  && REGNO (XEXP (pat, 0)) != AR_UNAT_REGNUM)
6568 		{
6569 		  new_flags.is_write = 1;
6570 		  need_barrier |= rtx_needs_barrier (XEXP (pat, 0),
6571 						     new_flags, pred);
6572 		  new_flags = flags;
6573 		}
6574 	      break;
6575 
6576 	    case RETURN:
6577 	      break;
6578 
6579 	    default:
6580 	      gcc_unreachable ();
6581 	    }
6582 	}
6583       for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
6584 	{
6585 	  rtx pat = XVECEXP (x, 0, i);
6586 	  if (GET_CODE (pat) == SET)
6587 	    {
6588 	      if (GET_CODE (SET_SRC (pat)) != CALL)
6589 		{
6590 		  new_flags.is_write = 1;
6591 		  need_barrier |= rtx_needs_barrier (SET_DEST (pat), new_flags,
6592 						     pred);
6593 		}
6594 	    }
6595 	  else if (GET_CODE (pat) == CLOBBER || GET_CODE (pat) == RETURN)
6596 	    need_barrier |= rtx_needs_barrier (pat, flags, pred);
6597 	}
6598       break;
6599 
6600     case SUBREG:
6601       need_barrier |= rtx_needs_barrier (SUBREG_REG (x), flags, pred);
6602       break;
6603     case REG:
6604       if (REGNO (x) == AR_UNAT_REGNUM)
6605 	{
6606 	  for (i = 0; i < 64; ++i)
6607 	    need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + i, flags, pred);
6608 	}
6609       else
6610 	need_barrier = rws_access_reg (x, flags, pred);
6611       break;
6612 
6613     case MEM:
6614       /* Find the regs used in memory address computation.  */
6615       new_flags.is_write = 0;
6616       need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
6617       break;
6618 
6619     case CONST_INT:   case CONST_DOUBLE:  case CONST_VECTOR:
6620     case SYMBOL_REF:  case LABEL_REF:     case CONST:
6621       break;
6622 
6623       /* Operators with side-effects.  */
6624     case POST_INC:    case POST_DEC:
6625       gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
6626 
6627       new_flags.is_write = 0;
6628       need_barrier  = rws_access_reg (XEXP (x, 0), new_flags, pred);
6629       new_flags.is_write = 1;
6630       need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
6631       break;
6632 
6633     case POST_MODIFY:
6634       gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
6635 
6636       new_flags.is_write = 0;
6637       need_barrier  = rws_access_reg (XEXP (x, 0), new_flags, pred);
6638       need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
6639       new_flags.is_write = 1;
6640       need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
6641       break;
6642 
6643       /* Handle common unary and binary ops for efficiency.  */
6644     case COMPARE:  case PLUS:    case MINUS:   case MULT:      case DIV:
6645     case MOD:      case UDIV:    case UMOD:    case AND:       case IOR:
6646     case XOR:      case ASHIFT:  case ROTATE:  case ASHIFTRT:  case LSHIFTRT:
6647     case ROTATERT: case SMIN:    case SMAX:    case UMIN:      case UMAX:
6648     case NE:       case EQ:      case GE:      case GT:        case LE:
6649     case LT:       case GEU:     case GTU:     case LEU:       case LTU:
6650       need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
6651       need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
6652       break;
6653 
6654     case NEG:      case NOT:	        case SIGN_EXTEND:     case ZERO_EXTEND:
6655     case TRUNCATE: case FLOAT_EXTEND:   case FLOAT_TRUNCATE:  case FLOAT:
6656     case FIX:      case UNSIGNED_FLOAT: case UNSIGNED_FIX:    case ABS:
6657     case SQRT:     case FFS:		case POPCOUNT:
6658       need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred);
6659       break;
6660 
6661     case VEC_SELECT:
6662       /* VEC_SELECT's second argument is a PARALLEL with integers that
6663 	 describe the elements selected.  On ia64, those integers are
6664 	 always constants.  Avoid walking the PARALLEL so that we don't
6665 	 get confused with "normal" parallels and then die.  */
6666       need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred);
6667       break;
6668 
6669     case UNSPEC:
6670       switch (XINT (x, 1))
6671 	{
6672 	case UNSPEC_LTOFF_DTPMOD:
6673 	case UNSPEC_LTOFF_DTPREL:
6674 	case UNSPEC_DTPREL:
6675 	case UNSPEC_LTOFF_TPREL:
6676 	case UNSPEC_TPREL:
6677 	case UNSPEC_PRED_REL_MUTEX:
6678 	case UNSPEC_PIC_CALL:
6679         case UNSPEC_MF:
6680         case UNSPEC_FETCHADD_ACQ:
6681         case UNSPEC_FETCHADD_REL:
6682 	case UNSPEC_BSP_VALUE:
6683 	case UNSPEC_FLUSHRS:
6684 	case UNSPEC_BUNDLE_SELECTOR:
6685           break;
6686 
6687 	case UNSPEC_GR_SPILL:
6688 	case UNSPEC_GR_RESTORE:
6689 	  {
6690 	    HOST_WIDE_INT offset = INTVAL (XVECEXP (x, 0, 1));
6691 	    HOST_WIDE_INT bit = (offset >> 3) & 63;
6692 
6693 	    need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
6694 	    new_flags.is_write = (XINT (x, 1) == UNSPEC_GR_SPILL);
6695 	    need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + bit,
6696 					      new_flags, pred);
6697 	    break;
6698 	  }
6699 
6700 	case UNSPEC_FR_SPILL:
6701 	case UNSPEC_FR_RESTORE:
6702 	case UNSPEC_GETF_EXP:
6703 	case UNSPEC_SETF_EXP:
6704         case UNSPEC_ADDP4:
6705 	case UNSPEC_FR_SQRT_RECIP_APPROX:
6706 	case UNSPEC_FR_SQRT_RECIP_APPROX_RES:
6707 	case UNSPEC_LDA:
6708 	case UNSPEC_LDS:
6709 	case UNSPEC_LDS_A:
6710 	case UNSPEC_LDSA:
6711 	case UNSPEC_CHKACLR:
6712         case UNSPEC_CHKS:
6713 	  need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
6714 	  break;
6715 
6716 	case UNSPEC_FR_RECIP_APPROX:
6717 	case UNSPEC_SHRP:
6718 	case UNSPEC_COPYSIGN:
6719 	case UNSPEC_FR_RECIP_APPROX_RES:
6720 	  need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
6721 	  need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
6722 	  break;
6723 
6724         case UNSPEC_CMPXCHG_ACQ:
6725         case UNSPEC_CMPXCHG_REL:
6726 	  need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
6727 	  need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 2), flags, pred);
6728 	  break;
6729 
6730 	default:
6731 	  gcc_unreachable ();
6732 	}
6733       break;
6734 
6735     case UNSPEC_VOLATILE:
6736       switch (XINT (x, 1))
6737 	{
6738 	case UNSPECV_ALLOC:
6739 	  /* Alloc must always be the first instruction of a group.
6740 	     We force this by always returning true.  */
6741 	  /* ??? We might get better scheduling if we explicitly check for
6742 	     input/local/output register dependencies, and modify the
6743 	     scheduler so that alloc is always reordered to the start of
6744 	     the current group.  We could then eliminate all of the
6745 	     first_instruction code.  */
6746 	  rws_access_regno (AR_PFS_REGNUM, flags, pred);
6747 
6748 	  new_flags.is_write = 1;
6749 	  rws_access_regno (REG_AR_CFM, new_flags, pred);
6750 	  return 1;
6751 
6752 	case UNSPECV_SET_BSP:
6753 	case UNSPECV_PROBE_STACK_RANGE:
6754 	  need_barrier = 1;
6755           break;
6756 
6757 	case UNSPECV_BLOCKAGE:
6758 	case UNSPECV_INSN_GROUP_BARRIER:
6759 	case UNSPECV_BREAK:
6760 	case UNSPECV_PSAC_ALL:
6761 	case UNSPECV_PSAC_NORMAL:
6762 	  return 0;
6763 
6764 	case UNSPECV_PROBE_STACK_ADDRESS:
6765 	  need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
6766 	  break;
6767 
6768 	default:
6769 	  gcc_unreachable ();
6770 	}
6771       break;
6772 
6773     case RETURN:
6774       new_flags.is_write = 0;
6775       need_barrier  = rws_access_regno (REG_RP, flags, pred);
6776       need_barrier |= rws_access_regno (AR_PFS_REGNUM, flags, pred);
6777 
6778       new_flags.is_write = 1;
6779       need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
6780       need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
6781       break;
6782 
6783     default:
6784       format_ptr = GET_RTX_FORMAT (GET_CODE (x));
6785       for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
6786 	switch (format_ptr[i])
6787 	  {
6788 	  case '0':	/* unused field */
6789 	  case 'i':	/* integer */
6790 	  case 'n':	/* note */
6791 	  case 'w':	/* wide integer */
6792 	  case 's':	/* pointer to string */
6793 	  case 'S':	/* optional pointer to string */
6794 	    break;
6795 
6796 	  case 'e':
6797 	    if (rtx_needs_barrier (XEXP (x, i), flags, pred))
6798 	      need_barrier = 1;
6799 	    break;
6800 
6801 	  case 'E':
6802 	    for (j = XVECLEN (x, i) - 1; j >= 0; --j)
6803 	      if (rtx_needs_barrier (XVECEXP (x, i, j), flags, pred))
6804 		need_barrier = 1;
6805 	    break;
6806 
6807 	  default:
6808 	    gcc_unreachable ();
6809 	  }
6810       break;
6811     }
6812   return need_barrier;
6813 }
6814 
6815 /* Clear out the state for group_barrier_needed at the start of a
6816    sequence of insns.  */
6817 
6818 static void
init_insn_group_barriers(void)6819 init_insn_group_barriers (void)
6820 {
6821   memset (rws_sum, 0, sizeof (rws_sum));
6822   first_instruction = 1;
6823 }
6824 
6825 /* Given the current state, determine whether a group barrier (a stop bit) is
6826    necessary before INSN.  Return nonzero if so.  This modifies the state to
6827    include the effects of INSN as a side-effect.  */
6828 
6829 static int
group_barrier_needed(rtx_insn * insn)6830 group_barrier_needed (rtx_insn *insn)
6831 {
6832   rtx pat;
6833   int need_barrier = 0;
6834   struct reg_flags flags;
6835 
6836   memset (&flags, 0, sizeof (flags));
6837   switch (GET_CODE (insn))
6838     {
6839     case NOTE:
6840     case DEBUG_INSN:
6841       break;
6842 
6843     case BARRIER:
6844       /* A barrier doesn't imply an instruction group boundary.  */
6845       break;
6846 
6847     case CODE_LABEL:
6848       memset (rws_insn, 0, sizeof (rws_insn));
6849       return 1;
6850 
6851     case CALL_INSN:
6852       flags.is_branch = 1;
6853       flags.is_sibcall = SIBLING_CALL_P (insn);
6854       memset (rws_insn, 0, sizeof (rws_insn));
6855 
6856       /* Don't bundle a call following another call.  */
6857       if ((pat = prev_active_insn (insn)) && CALL_P (pat))
6858 	{
6859 	  need_barrier = 1;
6860 	  break;
6861 	}
6862 
6863       need_barrier = rtx_needs_barrier (PATTERN (insn), flags, 0);
6864       break;
6865 
6866     case JUMP_INSN:
6867       if (!ia64_spec_check_p (insn))
6868 	flags.is_branch = 1;
6869 
6870       /* Don't bundle a jump following a call.  */
6871       if ((pat = prev_active_insn (insn)) && CALL_P (pat))
6872 	{
6873 	  need_barrier = 1;
6874 	  break;
6875 	}
6876       /* FALLTHRU */
6877 
6878     case INSN:
6879       if (GET_CODE (PATTERN (insn)) == USE
6880 	  || GET_CODE (PATTERN (insn)) == CLOBBER)
6881 	/* Don't care about USE and CLOBBER "insns"---those are used to
6882 	   indicate to the optimizer that it shouldn't get rid of
6883 	   certain operations.  */
6884 	break;
6885 
6886       pat = PATTERN (insn);
6887 
6888       /* Ug.  Hack hacks hacked elsewhere.  */
6889       switch (recog_memoized (insn))
6890 	{
6891 	  /* We play dependency tricks with the epilogue in order
6892 	     to get proper schedules.  Undo this for dv analysis.  */
6893 	case CODE_FOR_epilogue_deallocate_stack:
6894 	case CODE_FOR_prologue_allocate_stack:
6895 	  pat = XVECEXP (pat, 0, 0);
6896 	  break;
6897 
6898 	  /* The pattern we use for br.cloop confuses the code above.
6899 	     The second element of the vector is representative.  */
6900 	case CODE_FOR_doloop_end_internal:
6901 	  pat = XVECEXP (pat, 0, 1);
6902 	  break;
6903 
6904 	  /* Doesn't generate code.  */
6905 	case CODE_FOR_pred_rel_mutex:
6906 	case CODE_FOR_prologue_use:
6907 	  return 0;
6908 
6909 	default:
6910 	  break;
6911 	}
6912 
6913       memset (rws_insn, 0, sizeof (rws_insn));
6914       need_barrier = rtx_needs_barrier (pat, flags, 0);
6915 
6916       /* Check to see if the previous instruction was a volatile
6917 	 asm.  */
6918       if (! need_barrier)
6919 	need_barrier = rws_access_regno (REG_VOLATILE, flags, 0);
6920 
6921       break;
6922 
6923     default:
6924       gcc_unreachable ();
6925     }
6926 
6927   if (first_instruction && important_for_bundling_p (insn))
6928     {
6929       need_barrier = 0;
6930       first_instruction = 0;
6931     }
6932 
6933   return need_barrier;
6934 }
6935 
6936 /* Like group_barrier_needed, but do not clobber the current state.  */
6937 
6938 static int
safe_group_barrier_needed(rtx_insn * insn)6939 safe_group_barrier_needed (rtx_insn *insn)
6940 {
6941   int saved_first_instruction;
6942   int t;
6943 
6944   saved_first_instruction = first_instruction;
6945   in_safe_group_barrier = 1;
6946 
6947   t = group_barrier_needed (insn);
6948 
6949   first_instruction = saved_first_instruction;
6950   in_safe_group_barrier = 0;
6951 
6952   return t;
6953 }
6954 
6955 /* Scan the current function and insert stop bits as necessary to
6956    eliminate dependencies.  This function assumes that a final
6957    instruction scheduling pass has been run which has already
6958    inserted most of the necessary stop bits.  This function only
6959    inserts new ones at basic block boundaries, since these are
6960    invisible to the scheduler.  */
6961 
6962 static void
emit_insn_group_barriers(FILE * dump)6963 emit_insn_group_barriers (FILE *dump)
6964 {
6965   rtx_insn *insn;
6966   rtx_insn *last_label = 0;
6967   int insns_since_last_label = 0;
6968 
6969   init_insn_group_barriers ();
6970 
6971   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
6972     {
6973       if (LABEL_P (insn))
6974 	{
6975 	  if (insns_since_last_label)
6976 	    last_label = insn;
6977 	  insns_since_last_label = 0;
6978 	}
6979       else if (NOTE_P (insn)
6980 	       && NOTE_KIND (insn) == NOTE_INSN_BASIC_BLOCK)
6981 	{
6982 	  if (insns_since_last_label)
6983 	    last_label = insn;
6984 	  insns_since_last_label = 0;
6985 	}
6986       else if (NONJUMP_INSN_P (insn)
6987 	       && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
6988 	       && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER)
6989 	{
6990 	  init_insn_group_barriers ();
6991 	  last_label = 0;
6992 	}
6993       else if (NONDEBUG_INSN_P (insn))
6994 	{
6995 	  insns_since_last_label = 1;
6996 
6997 	  if (group_barrier_needed (insn))
6998 	    {
6999 	      if (last_label)
7000 		{
7001 		  if (dump)
7002 		    fprintf (dump, "Emitting stop before label %d\n",
7003 			     INSN_UID (last_label));
7004 		  emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), last_label);
7005 		  insn = last_label;
7006 
7007 		  init_insn_group_barriers ();
7008 		  last_label = 0;
7009 		}
7010 	    }
7011 	}
7012     }
7013 }
7014 
7015 /* Like emit_insn_group_barriers, but run if no final scheduling pass was run.
7016    This function has to emit all necessary group barriers.  */
7017 
7018 static void
emit_all_insn_group_barriers(FILE * dump ATTRIBUTE_UNUSED)7019 emit_all_insn_group_barriers (FILE *dump ATTRIBUTE_UNUSED)
7020 {
7021   rtx_insn *insn;
7022 
7023   init_insn_group_barriers ();
7024 
7025   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
7026     {
7027       if (BARRIER_P (insn))
7028 	{
7029 	  rtx_insn *last = prev_active_insn (insn);
7030 
7031 	  if (! last)
7032 	    continue;
7033 	  if (JUMP_TABLE_DATA_P (last))
7034 	    last = prev_active_insn (last);
7035 	  if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
7036 	    emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last);
7037 
7038 	  init_insn_group_barriers ();
7039 	}
7040       else if (NONDEBUG_INSN_P (insn))
7041 	{
7042 	  if (recog_memoized (insn) == CODE_FOR_insn_group_barrier)
7043 	    init_insn_group_barriers ();
7044 	  else if (group_barrier_needed (insn))
7045 	    {
7046 	      emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
7047 	      init_insn_group_barriers ();
7048 	      group_barrier_needed (insn);
7049 	    }
7050 	}
7051     }
7052 }
7053 
7054 
7055 
7056 /* Instruction scheduling support.  */
7057 
7058 #define NR_BUNDLES 10
7059 
7060 /* A list of names of all available bundles.  */
7061 
7062 static const char *bundle_name [NR_BUNDLES] =
7063 {
7064   ".mii",
7065   ".mmi",
7066   ".mfi",
7067   ".mmf",
7068 #if NR_BUNDLES == 10
7069   ".bbb",
7070   ".mbb",
7071 #endif
7072   ".mib",
7073   ".mmb",
7074   ".mfb",
7075   ".mlx"
7076 };
7077 
7078 /* Nonzero if we should insert stop bits into the schedule.  */
7079 
7080 int ia64_final_schedule = 0;
7081 
7082 /* Codes of the corresponding queried units: */
7083 
7084 static int _0mii_, _0mmi_, _0mfi_, _0mmf_;
7085 static int _0bbb_, _0mbb_, _0mib_, _0mmb_, _0mfb_, _0mlx_;
7086 
7087 static int _1mii_, _1mmi_, _1mfi_, _1mmf_;
7088 static int _1bbb_, _1mbb_, _1mib_, _1mmb_, _1mfb_, _1mlx_;
7089 
7090 static int pos_1, pos_2, pos_3, pos_4, pos_5, pos_6;
7091 
7092 /* The following variable value is an insn group barrier.  */
7093 
7094 static rtx_insn *dfa_stop_insn;
7095 
7096 /* The following variable value is the last issued insn.  */
7097 
7098 static rtx_insn *last_scheduled_insn;
7099 
7100 /* The following variable value is pointer to a DFA state used as
7101    temporary variable.  */
7102 
7103 static state_t temp_dfa_state = NULL;
7104 
7105 /* The following variable value is DFA state after issuing the last
7106    insn.  */
7107 
7108 static state_t prev_cycle_state = NULL;
7109 
7110 /* The following array element values are TRUE if the corresponding
7111    insn requires to add stop bits before it.  */
7112 
7113 static char *stops_p = NULL;
7114 
7115 /* The following variable is used to set up the mentioned above array.  */
7116 
7117 static int stop_before_p = 0;
7118 
7119 /* The following variable value is length of the arrays `clocks' and
7120    `add_cycles'. */
7121 
7122 static int clocks_length;
7123 
7124 /* The following variable value is number of data speculations in progress.  */
7125 static int pending_data_specs = 0;
7126 
7127 /* Number of memory references on current and three future processor cycles.  */
7128 static char mem_ops_in_group[4];
7129 
7130 /* Number of current processor cycle (from scheduler's point of view).  */
7131 static int current_cycle;
7132 
7133 static rtx ia64_single_set (rtx_insn *);
7134 static void ia64_emit_insn_before (rtx, rtx);
7135 
7136 /* Map a bundle number to its pseudo-op.  */
7137 
7138 const char *
get_bundle_name(int b)7139 get_bundle_name (int b)
7140 {
7141   return bundle_name[b];
7142 }
7143 
7144 
7145 /* Return the maximum number of instructions a cpu can issue.  */
7146 
7147 static int
ia64_issue_rate(void)7148 ia64_issue_rate (void)
7149 {
7150   return 6;
7151 }
7152 
7153 /* Helper function - like single_set, but look inside COND_EXEC.  */
7154 
7155 static rtx
ia64_single_set(rtx_insn * insn)7156 ia64_single_set (rtx_insn *insn)
7157 {
7158   rtx x = PATTERN (insn), ret;
7159   if (GET_CODE (x) == COND_EXEC)
7160     x = COND_EXEC_CODE (x);
7161   if (GET_CODE (x) == SET)
7162     return x;
7163 
7164   /* Special case here prologue_allocate_stack and epilogue_deallocate_stack.
7165      Although they are not classical single set, the second set is there just
7166      to protect it from moving past FP-relative stack accesses.  */
7167   switch (recog_memoized (insn))
7168     {
7169     case CODE_FOR_prologue_allocate_stack:
7170     case CODE_FOR_prologue_allocate_stack_pr:
7171     case CODE_FOR_epilogue_deallocate_stack:
7172     case CODE_FOR_epilogue_deallocate_stack_pr:
7173       ret = XVECEXP (x, 0, 0);
7174       break;
7175 
7176     default:
7177       ret = single_set_2 (insn, x);
7178       break;
7179     }
7180 
7181   return ret;
7182 }
7183 
7184 /* Adjust the cost of a scheduling dependency.
7185    Return the new cost of a dependency of type DEP_TYPE or INSN on DEP_INSN.
7186    COST is the current cost, DW is dependency weakness.  */
7187 static int
ia64_adjust_cost_2(rtx_insn * insn,int dep_type1,rtx_insn * dep_insn,int cost,dw_t dw)7188 ia64_adjust_cost_2 (rtx_insn *insn, int dep_type1, rtx_insn *dep_insn,
7189 		    int cost, dw_t dw)
7190 {
7191   enum reg_note dep_type = (enum reg_note) dep_type1;
7192   enum attr_itanium_class dep_class;
7193   enum attr_itanium_class insn_class;
7194 
7195   insn_class = ia64_safe_itanium_class (insn);
7196   dep_class = ia64_safe_itanium_class (dep_insn);
7197 
7198   /* Treat true memory dependencies separately.  Ignore apparent true
7199      dependence between store and call (call has a MEM inside a SYMBOL_REF).  */
7200   if (dep_type == REG_DEP_TRUE
7201       && (dep_class == ITANIUM_CLASS_ST || dep_class == ITANIUM_CLASS_STF)
7202       && (insn_class == ITANIUM_CLASS_BR || insn_class == ITANIUM_CLASS_SCALL))
7203     return 0;
7204 
7205   if (dw == MIN_DEP_WEAK)
7206     /* Store and load are likely to alias, use higher cost to avoid stall.  */
7207     return PARAM_VALUE (PARAM_SCHED_MEM_TRUE_DEP_COST);
7208   else if (dw > MIN_DEP_WEAK)
7209     {
7210       /* Store and load are less likely to alias.  */
7211       if (mflag_sched_fp_mem_deps_zero_cost && dep_class == ITANIUM_CLASS_STF)
7212 	/* Assume there will be no cache conflict for floating-point data.
7213 	   For integer data, L1 conflict penalty is huge (17 cycles), so we
7214 	   never assume it will not cause a conflict.  */
7215 	return 0;
7216       else
7217 	return cost;
7218     }
7219 
7220   if (dep_type != REG_DEP_OUTPUT)
7221     return cost;
7222 
7223   if (dep_class == ITANIUM_CLASS_ST || dep_class == ITANIUM_CLASS_STF
7224       || insn_class == ITANIUM_CLASS_ST || insn_class == ITANIUM_CLASS_STF)
7225     return 0;
7226 
7227   return cost;
7228 }
7229 
7230 /* Like emit_insn_before, but skip cycle_display notes.
7231    ??? When cycle display notes are implemented, update this.  */
7232 
7233 static void
ia64_emit_insn_before(rtx insn,rtx before)7234 ia64_emit_insn_before (rtx insn, rtx before)
7235 {
7236   emit_insn_before (insn, before);
7237 }
7238 
7239 /* The following function marks insns who produce addresses for load
7240    and store insns.  Such insns will be placed into M slots because it
7241    decrease latency time for Itanium1 (see function
7242    `ia64_produce_address_p' and the DFA descriptions).  */
7243 
7244 static void
ia64_dependencies_evaluation_hook(rtx_insn * head,rtx_insn * tail)7245 ia64_dependencies_evaluation_hook (rtx_insn *head, rtx_insn *tail)
7246 {
7247   rtx_insn *insn, *next, *next_tail;
7248 
7249   /* Before reload, which_alternative is not set, which means that
7250      ia64_safe_itanium_class will produce wrong results for (at least)
7251      move instructions.  */
7252   if (!reload_completed)
7253     return;
7254 
7255   next_tail = NEXT_INSN (tail);
7256   for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
7257     if (INSN_P (insn))
7258       insn->call = 0;
7259   for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
7260     if (INSN_P (insn)
7261 	&& ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IALU)
7262       {
7263 	sd_iterator_def sd_it;
7264 	dep_t dep;
7265 	bool has_mem_op_consumer_p = false;
7266 
7267 	FOR_EACH_DEP (insn, SD_LIST_FORW, sd_it, dep)
7268 	  {
7269 	    enum attr_itanium_class c;
7270 
7271 	    if (DEP_TYPE (dep) != REG_DEP_TRUE)
7272 	      continue;
7273 
7274 	    next = DEP_CON (dep);
7275 	    c = ia64_safe_itanium_class (next);
7276 	    if ((c == ITANIUM_CLASS_ST
7277 		 || c == ITANIUM_CLASS_STF)
7278 		&& ia64_st_address_bypass_p (insn, next))
7279 	      {
7280 		has_mem_op_consumer_p = true;
7281 		break;
7282 	      }
7283 	    else if ((c == ITANIUM_CLASS_LD
7284 		      || c == ITANIUM_CLASS_FLD
7285 		      || c == ITANIUM_CLASS_FLDP)
7286 		     && ia64_ld_address_bypass_p (insn, next))
7287 	      {
7288 		has_mem_op_consumer_p = true;
7289 		break;
7290 	      }
7291 	  }
7292 
7293 	insn->call = has_mem_op_consumer_p;
7294       }
7295 }
7296 
7297 /* We're beginning a new block.  Initialize data structures as necessary.  */
7298 
7299 static void
ia64_sched_init(FILE * dump ATTRIBUTE_UNUSED,int sched_verbose ATTRIBUTE_UNUSED,int max_ready ATTRIBUTE_UNUSED)7300 ia64_sched_init (FILE *dump ATTRIBUTE_UNUSED,
7301 		 int sched_verbose ATTRIBUTE_UNUSED,
7302 		 int max_ready ATTRIBUTE_UNUSED)
7303 {
7304   if (flag_checking && !sel_sched_p () && reload_completed)
7305     {
7306       for (rtx_insn *insn = NEXT_INSN (current_sched_info->prev_head);
7307 	   insn != current_sched_info->next_tail;
7308 	   insn = NEXT_INSN (insn))
7309 	gcc_assert (!SCHED_GROUP_P (insn));
7310     }
7311   last_scheduled_insn = NULL;
7312   init_insn_group_barriers ();
7313 
7314   current_cycle = 0;
7315   memset (mem_ops_in_group, 0, sizeof (mem_ops_in_group));
7316 }
7317 
7318 /* We're beginning a scheduling pass.  Check assertion.  */
7319 
7320 static void
ia64_sched_init_global(FILE * dump ATTRIBUTE_UNUSED,int sched_verbose ATTRIBUTE_UNUSED,int max_ready ATTRIBUTE_UNUSED)7321 ia64_sched_init_global (FILE *dump ATTRIBUTE_UNUSED,
7322                         int sched_verbose ATTRIBUTE_UNUSED,
7323                         int max_ready ATTRIBUTE_UNUSED)
7324 {
7325   gcc_assert (pending_data_specs == 0);
7326 }
7327 
7328 /* Scheduling pass is now finished.  Free/reset static variable.  */
7329 static void
ia64_sched_finish_global(FILE * dump ATTRIBUTE_UNUSED,int sched_verbose ATTRIBUTE_UNUSED)7330 ia64_sched_finish_global (FILE *dump ATTRIBUTE_UNUSED,
7331 			  int sched_verbose ATTRIBUTE_UNUSED)
7332 {
7333   gcc_assert (pending_data_specs == 0);
7334 }
7335 
7336 /* Return TRUE if INSN is a load (either normal or speculative, but not a
7337    speculation check), FALSE otherwise.  */
7338 static bool
is_load_p(rtx_insn * insn)7339 is_load_p (rtx_insn *insn)
7340 {
7341   enum attr_itanium_class insn_class = ia64_safe_itanium_class (insn);
7342 
7343   return
7344    ((insn_class == ITANIUM_CLASS_LD || insn_class == ITANIUM_CLASS_FLD)
7345     && get_attr_check_load (insn) == CHECK_LOAD_NO);
7346 }
7347 
7348 /* If INSN is a memory reference, memoize it in MEM_OPS_IN_GROUP global array
7349    (taking account for 3-cycle cache reference postponing for stores: Intel
7350    Itanium 2 Reference Manual for Software Development and Optimization,
7351    6.7.3.1).  */
7352 static void
record_memory_reference(rtx_insn * insn)7353 record_memory_reference (rtx_insn *insn)
7354 {
7355   enum attr_itanium_class insn_class = ia64_safe_itanium_class (insn);
7356 
7357   switch (insn_class) {
7358     case ITANIUM_CLASS_FLD:
7359     case ITANIUM_CLASS_LD:
7360       mem_ops_in_group[current_cycle % 4]++;
7361       break;
7362     case ITANIUM_CLASS_STF:
7363     case ITANIUM_CLASS_ST:
7364       mem_ops_in_group[(current_cycle + 3) % 4]++;
7365       break;
7366     default:;
7367   }
7368 }
7369 
7370 /* We are about to being issuing insns for this clock cycle.
7371    Override the default sort algorithm to better slot instructions.  */
7372 
7373 static int
ia64_dfa_sched_reorder(FILE * dump,int sched_verbose,rtx_insn ** ready,int * pn_ready,int clock_var,int reorder_type)7374 ia64_dfa_sched_reorder (FILE *dump, int sched_verbose, rtx_insn **ready,
7375 			int *pn_ready, int clock_var,
7376 			int reorder_type)
7377 {
7378   int n_asms;
7379   int n_ready = *pn_ready;
7380   rtx_insn **e_ready = ready + n_ready;
7381   rtx_insn **insnp;
7382 
7383   if (sched_verbose)
7384     fprintf (dump, "// ia64_dfa_sched_reorder (type %d):\n", reorder_type);
7385 
7386   if (reorder_type == 0)
7387     {
7388       /* First, move all USEs, CLOBBERs and other crud out of the way.  */
7389       n_asms = 0;
7390       for (insnp = ready; insnp < e_ready; insnp++)
7391 	if (insnp < e_ready)
7392 	  {
7393 	    rtx_insn *insn = *insnp;
7394 	    enum attr_type t = ia64_safe_type (insn);
7395 	    if (t == TYPE_UNKNOWN)
7396 	      {
7397 		if (GET_CODE (PATTERN (insn)) == ASM_INPUT
7398 		    || asm_noperands (PATTERN (insn)) >= 0)
7399 		  {
7400 		    rtx_insn *lowest = ready[n_asms];
7401 		    ready[n_asms] = insn;
7402 		    *insnp = lowest;
7403 		    n_asms++;
7404 		  }
7405 		else
7406 		  {
7407 		    rtx_insn *highest = ready[n_ready - 1];
7408 		    ready[n_ready - 1] = insn;
7409 		    *insnp = highest;
7410 		    return 1;
7411 		  }
7412 	      }
7413 	  }
7414 
7415       if (n_asms < n_ready)
7416 	{
7417 	  /* Some normal insns to process.  Skip the asms.  */
7418 	  ready += n_asms;
7419 	  n_ready -= n_asms;
7420 	}
7421       else if (n_ready > 0)
7422 	return 1;
7423     }
7424 
7425   if (ia64_final_schedule)
7426     {
7427       int deleted = 0;
7428       int nr_need_stop = 0;
7429 
7430       for (insnp = ready; insnp < e_ready; insnp++)
7431 	if (safe_group_barrier_needed (*insnp))
7432 	  nr_need_stop++;
7433 
7434       if (reorder_type == 1 && n_ready == nr_need_stop)
7435 	return 0;
7436       if (reorder_type == 0)
7437 	return 1;
7438       insnp = e_ready;
7439       /* Move down everything that needs a stop bit, preserving
7440 	 relative order.  */
7441       while (insnp-- > ready + deleted)
7442 	while (insnp >= ready + deleted)
7443 	  {
7444 	    rtx_insn *insn = *insnp;
7445 	    if (! safe_group_barrier_needed (insn))
7446 	      break;
7447 	    memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
7448 	    *ready = insn;
7449 	    deleted++;
7450 	  }
7451       n_ready -= deleted;
7452       ready += deleted;
7453     }
7454 
7455   current_cycle = clock_var;
7456   if (reload_completed && mem_ops_in_group[clock_var % 4] >= ia64_max_memory_insns)
7457     {
7458       int moved = 0;
7459 
7460       insnp = e_ready;
7461       /* Move down loads/stores, preserving relative order.  */
7462       while (insnp-- > ready + moved)
7463 	while (insnp >= ready + moved)
7464 	  {
7465 	    rtx_insn *insn = *insnp;
7466 	    if (! is_load_p (insn))
7467 	      break;
7468 	    memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
7469 	    *ready = insn;
7470 	    moved++;
7471 	  }
7472       n_ready -= moved;
7473       ready += moved;
7474     }
7475 
7476   return 1;
7477 }
7478 
7479 /* We are about to being issuing insns for this clock cycle.  Override
7480    the default sort algorithm to better slot instructions.  */
7481 
7482 static int
ia64_sched_reorder(FILE * dump,int sched_verbose,rtx_insn ** ready,int * pn_ready,int clock_var)7483 ia64_sched_reorder (FILE *dump, int sched_verbose, rtx_insn **ready,
7484 		    int *pn_ready, int clock_var)
7485 {
7486   return ia64_dfa_sched_reorder (dump, sched_verbose, ready,
7487 				 pn_ready, clock_var, 0);
7488 }
7489 
7490 /* Like ia64_sched_reorder, but called after issuing each insn.
7491    Override the default sort algorithm to better slot instructions.  */
7492 
7493 static int
ia64_sched_reorder2(FILE * dump ATTRIBUTE_UNUSED,int sched_verbose ATTRIBUTE_UNUSED,rtx_insn ** ready,int * pn_ready,int clock_var)7494 ia64_sched_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
7495 		     int sched_verbose ATTRIBUTE_UNUSED, rtx_insn **ready,
7496 		     int *pn_ready, int clock_var)
7497 {
7498   return ia64_dfa_sched_reorder (dump, sched_verbose, ready, pn_ready,
7499 				 clock_var, 1);
7500 }
7501 
7502 /* We are about to issue INSN.  Return the number of insns left on the
7503    ready queue that can be issued this cycle.  */
7504 
7505 static int
ia64_variable_issue(FILE * dump ATTRIBUTE_UNUSED,int sched_verbose ATTRIBUTE_UNUSED,rtx_insn * insn,int can_issue_more ATTRIBUTE_UNUSED)7506 ia64_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
7507 		     int sched_verbose ATTRIBUTE_UNUSED,
7508 		     rtx_insn *insn,
7509 		     int can_issue_more ATTRIBUTE_UNUSED)
7510 {
7511   if (sched_deps_info->generate_spec_deps && !sel_sched_p ())
7512     /* Modulo scheduling does not extend h_i_d when emitting
7513        new instructions.  Don't use h_i_d, if we don't have to.  */
7514     {
7515       if (DONE_SPEC (insn) & BEGIN_DATA)
7516 	pending_data_specs++;
7517       if (CHECK_SPEC (insn) & BEGIN_DATA)
7518 	pending_data_specs--;
7519     }
7520 
7521   if (DEBUG_INSN_P (insn))
7522     return 1;
7523 
7524   last_scheduled_insn = insn;
7525   memcpy (prev_cycle_state, curr_state, dfa_state_size);
7526   if (reload_completed)
7527     {
7528       int needed = group_barrier_needed (insn);
7529 
7530       gcc_assert (!needed);
7531       if (CALL_P (insn))
7532 	init_insn_group_barriers ();
7533       stops_p [INSN_UID (insn)] = stop_before_p;
7534       stop_before_p = 0;
7535 
7536       record_memory_reference (insn);
7537     }
7538   return 1;
7539 }
7540 
7541 /* We are choosing insn from the ready queue.  Return zero if INSN
7542    can be chosen.  */
7543 
7544 static int
ia64_first_cycle_multipass_dfa_lookahead_guard(rtx_insn * insn,int ready_index)7545 ia64_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *insn, int ready_index)
7546 {
7547   gcc_assert (insn && INSN_P (insn));
7548 
7549   /* Size of ALAT is 32.  As far as we perform conservative
7550      data speculation, we keep ALAT half-empty.  */
7551   if (pending_data_specs >= 16 && (TODO_SPEC (insn) & BEGIN_DATA))
7552     return ready_index == 0 ? -1 : 1;
7553 
7554   if (ready_index == 0)
7555     return 0;
7556 
7557   if ((!reload_completed
7558        || !safe_group_barrier_needed (insn))
7559       && (!mflag_sched_mem_insns_hard_limit
7560 	  || !is_load_p (insn)
7561 	  || mem_ops_in_group[current_cycle % 4] < ia64_max_memory_insns))
7562     return 0;
7563 
7564   return 1;
7565 }
7566 
7567 /* The following variable value is pseudo-insn used by the DFA insn
7568    scheduler to change the DFA state when the simulated clock is
7569    increased.  */
7570 
7571 static rtx_insn *dfa_pre_cycle_insn;
7572 
7573 /* Returns 1 when a meaningful insn was scheduled between the last group
7574    barrier and LAST.  */
7575 static int
scheduled_good_insn(rtx_insn * last)7576 scheduled_good_insn (rtx_insn *last)
7577 {
7578   if (last && recog_memoized (last) >= 0)
7579     return 1;
7580 
7581   for ( ;
7582        last != NULL && !NOTE_INSN_BASIC_BLOCK_P (last)
7583        && !stops_p[INSN_UID (last)];
7584        last = PREV_INSN (last))
7585     /* We could hit a NOTE_INSN_DELETED here which is actually outside
7586        the ebb we're scheduling.  */
7587     if (INSN_P (last) && recog_memoized (last) >= 0)
7588       return 1;
7589 
7590   return 0;
7591 }
7592 
7593 /* We are about to being issuing INSN.  Return nonzero if we cannot
7594    issue it on given cycle CLOCK and return zero if we should not sort
7595    the ready queue on the next clock start.  */
7596 
7597 static int
ia64_dfa_new_cycle(FILE * dump,int verbose,rtx_insn * insn,int last_clock,int clock,int * sort_p)7598 ia64_dfa_new_cycle (FILE *dump, int verbose, rtx_insn *insn, int last_clock,
7599 		    int clock, int *sort_p)
7600 {
7601   gcc_assert (insn && INSN_P (insn));
7602 
7603   if (DEBUG_INSN_P (insn))
7604     return 0;
7605 
7606   /* When a group barrier is needed for insn, last_scheduled_insn
7607      should be set.  */
7608   gcc_assert (!(reload_completed && safe_group_barrier_needed (insn))
7609               || last_scheduled_insn);
7610 
7611   if ((reload_completed
7612        && (safe_group_barrier_needed (insn)
7613 	   || (mflag_sched_stop_bits_after_every_cycle
7614 	       && last_clock != clock
7615 	       && last_scheduled_insn
7616 	       && scheduled_good_insn (last_scheduled_insn))))
7617       || (last_scheduled_insn
7618 	  && (CALL_P (last_scheduled_insn)
7619 	      || unknown_for_bundling_p (last_scheduled_insn))))
7620     {
7621       init_insn_group_barriers ();
7622 
7623       if (verbose && dump)
7624 	fprintf (dump, "//    Stop should be before %d%s\n", INSN_UID (insn),
7625 		 last_clock == clock ? " + cycle advance" : "");
7626 
7627       stop_before_p = 1;
7628       current_cycle = clock;
7629       mem_ops_in_group[current_cycle % 4] = 0;
7630 
7631       if (last_clock == clock)
7632 	{
7633 	  state_transition (curr_state, dfa_stop_insn);
7634 	  if (TARGET_EARLY_STOP_BITS)
7635 	    *sort_p = (last_scheduled_insn == NULL_RTX
7636 		       || ! CALL_P (last_scheduled_insn));
7637 	  else
7638 	    *sort_p = 0;
7639 	  return 1;
7640 	}
7641 
7642       if (last_scheduled_insn)
7643 	{
7644 	  if (unknown_for_bundling_p (last_scheduled_insn))
7645 	    state_reset (curr_state);
7646 	  else
7647 	    {
7648 	      memcpy (curr_state, prev_cycle_state, dfa_state_size);
7649 	      state_transition (curr_state, dfa_stop_insn);
7650 	      state_transition (curr_state, dfa_pre_cycle_insn);
7651 	      state_transition (curr_state, NULL);
7652 	    }
7653 	}
7654     }
7655   return 0;
7656 }
7657 
7658 /* Implement targetm.sched.h_i_d_extended hook.
7659    Extend internal data structures.  */
7660 static void
ia64_h_i_d_extended(void)7661 ia64_h_i_d_extended (void)
7662 {
7663   if (stops_p != NULL)
7664     {
7665       int new_clocks_length = get_max_uid () * 3 / 2;
7666       stops_p = (char *) xrecalloc (stops_p, new_clocks_length, clocks_length, 1);
7667       clocks_length = new_clocks_length;
7668     }
7669 }
7670 
7671 
7672 /* This structure describes the data used by the backend to guide scheduling.
7673    When the current scheduling point is switched, this data should be saved
7674    and restored later, if the scheduler returns to this point.  */
7675 struct _ia64_sched_context
7676 {
7677   state_t prev_cycle_state;
7678   rtx_insn *last_scheduled_insn;
7679   struct reg_write_state rws_sum[NUM_REGS];
7680   struct reg_write_state rws_insn[NUM_REGS];
7681   int first_instruction;
7682   int pending_data_specs;
7683   int current_cycle;
7684   char mem_ops_in_group[4];
7685 };
7686 typedef struct _ia64_sched_context *ia64_sched_context_t;
7687 
7688 /* Allocates a scheduling context.  */
7689 static void *
ia64_alloc_sched_context(void)7690 ia64_alloc_sched_context (void)
7691 {
7692   return xmalloc (sizeof (struct _ia64_sched_context));
7693 }
7694 
7695 /* Initializes the _SC context with clean data, if CLEAN_P, and from
7696    the global context otherwise.  */
7697 static void
ia64_init_sched_context(void * _sc,bool clean_p)7698 ia64_init_sched_context (void *_sc, bool clean_p)
7699 {
7700   ia64_sched_context_t sc = (ia64_sched_context_t) _sc;
7701 
7702   sc->prev_cycle_state = xmalloc (dfa_state_size);
7703   if (clean_p)
7704     {
7705       state_reset (sc->prev_cycle_state);
7706       sc->last_scheduled_insn = NULL;
7707       memset (sc->rws_sum, 0, sizeof (rws_sum));
7708       memset (sc->rws_insn, 0, sizeof (rws_insn));
7709       sc->first_instruction = 1;
7710       sc->pending_data_specs = 0;
7711       sc->current_cycle = 0;
7712       memset (sc->mem_ops_in_group, 0, sizeof (mem_ops_in_group));
7713     }
7714   else
7715     {
7716       memcpy (sc->prev_cycle_state, prev_cycle_state, dfa_state_size);
7717       sc->last_scheduled_insn = last_scheduled_insn;
7718       memcpy (sc->rws_sum, rws_sum, sizeof (rws_sum));
7719       memcpy (sc->rws_insn, rws_insn, sizeof (rws_insn));
7720       sc->first_instruction = first_instruction;
7721       sc->pending_data_specs = pending_data_specs;
7722       sc->current_cycle = current_cycle;
7723       memcpy (sc->mem_ops_in_group, mem_ops_in_group, sizeof (mem_ops_in_group));
7724     }
7725 }
7726 
7727 /* Sets the global scheduling context to the one pointed to by _SC.  */
7728 static void
ia64_set_sched_context(void * _sc)7729 ia64_set_sched_context (void *_sc)
7730 {
7731   ia64_sched_context_t sc = (ia64_sched_context_t) _sc;
7732 
7733   gcc_assert (sc != NULL);
7734 
7735   memcpy (prev_cycle_state, sc->prev_cycle_state, dfa_state_size);
7736   last_scheduled_insn = sc->last_scheduled_insn;
7737   memcpy (rws_sum, sc->rws_sum, sizeof (rws_sum));
7738   memcpy (rws_insn, sc->rws_insn, sizeof (rws_insn));
7739   first_instruction = sc->first_instruction;
7740   pending_data_specs = sc->pending_data_specs;
7741   current_cycle = sc->current_cycle;
7742   memcpy (mem_ops_in_group, sc->mem_ops_in_group, sizeof (mem_ops_in_group));
7743 }
7744 
7745 /* Clears the data in the _SC scheduling context.  */
7746 static void
ia64_clear_sched_context(void * _sc)7747 ia64_clear_sched_context (void *_sc)
7748 {
7749   ia64_sched_context_t sc = (ia64_sched_context_t) _sc;
7750 
7751   free (sc->prev_cycle_state);
7752   sc->prev_cycle_state = NULL;
7753 }
7754 
7755 /* Frees the _SC scheduling context.  */
7756 static void
ia64_free_sched_context(void * _sc)7757 ia64_free_sched_context (void *_sc)
7758 {
7759   gcc_assert (_sc != NULL);
7760 
7761   free (_sc);
7762 }
7763 
7764 typedef rtx (* gen_func_t) (rtx, rtx);
7765 
7766 /* Return a function that will generate a load of mode MODE_NO
7767    with speculation types TS.  */
7768 static gen_func_t
get_spec_load_gen_function(ds_t ts,int mode_no)7769 get_spec_load_gen_function (ds_t ts, int mode_no)
7770 {
7771   static gen_func_t gen_ld_[] = {
7772     gen_movbi,
7773     gen_movqi_internal,
7774     gen_movhi_internal,
7775     gen_movsi_internal,
7776     gen_movdi_internal,
7777     gen_movsf_internal,
7778     gen_movdf_internal,
7779     gen_movxf_internal,
7780     gen_movti_internal,
7781     gen_zero_extendqidi2,
7782     gen_zero_extendhidi2,
7783     gen_zero_extendsidi2,
7784   };
7785 
7786   static gen_func_t gen_ld_a[] = {
7787     gen_movbi_advanced,
7788     gen_movqi_advanced,
7789     gen_movhi_advanced,
7790     gen_movsi_advanced,
7791     gen_movdi_advanced,
7792     gen_movsf_advanced,
7793     gen_movdf_advanced,
7794     gen_movxf_advanced,
7795     gen_movti_advanced,
7796     gen_zero_extendqidi2_advanced,
7797     gen_zero_extendhidi2_advanced,
7798     gen_zero_extendsidi2_advanced,
7799   };
7800   static gen_func_t gen_ld_s[] = {
7801     gen_movbi_speculative,
7802     gen_movqi_speculative,
7803     gen_movhi_speculative,
7804     gen_movsi_speculative,
7805     gen_movdi_speculative,
7806     gen_movsf_speculative,
7807     gen_movdf_speculative,
7808     gen_movxf_speculative,
7809     gen_movti_speculative,
7810     gen_zero_extendqidi2_speculative,
7811     gen_zero_extendhidi2_speculative,
7812     gen_zero_extendsidi2_speculative,
7813   };
7814   static gen_func_t gen_ld_sa[] = {
7815     gen_movbi_speculative_advanced,
7816     gen_movqi_speculative_advanced,
7817     gen_movhi_speculative_advanced,
7818     gen_movsi_speculative_advanced,
7819     gen_movdi_speculative_advanced,
7820     gen_movsf_speculative_advanced,
7821     gen_movdf_speculative_advanced,
7822     gen_movxf_speculative_advanced,
7823     gen_movti_speculative_advanced,
7824     gen_zero_extendqidi2_speculative_advanced,
7825     gen_zero_extendhidi2_speculative_advanced,
7826     gen_zero_extendsidi2_speculative_advanced,
7827   };
7828   static gen_func_t gen_ld_s_a[] = {
7829     gen_movbi_speculative_a,
7830     gen_movqi_speculative_a,
7831     gen_movhi_speculative_a,
7832     gen_movsi_speculative_a,
7833     gen_movdi_speculative_a,
7834     gen_movsf_speculative_a,
7835     gen_movdf_speculative_a,
7836     gen_movxf_speculative_a,
7837     gen_movti_speculative_a,
7838     gen_zero_extendqidi2_speculative_a,
7839     gen_zero_extendhidi2_speculative_a,
7840     gen_zero_extendsidi2_speculative_a,
7841   };
7842 
7843   gen_func_t *gen_ld;
7844 
7845   if (ts & BEGIN_DATA)
7846     {
7847       if (ts & BEGIN_CONTROL)
7848 	gen_ld = gen_ld_sa;
7849       else
7850 	gen_ld = gen_ld_a;
7851     }
7852   else if (ts & BEGIN_CONTROL)
7853     {
7854       if ((spec_info->flags & SEL_SCHED_SPEC_DONT_CHECK_CONTROL)
7855 	  || ia64_needs_block_p (ts))
7856 	gen_ld = gen_ld_s;
7857       else
7858 	gen_ld = gen_ld_s_a;
7859     }
7860   else if (ts == 0)
7861     gen_ld = gen_ld_;
7862   else
7863     gcc_unreachable ();
7864 
7865   return gen_ld[mode_no];
7866 }
7867 
7868 /* Constants that help mapping 'machine_mode' to int.  */
7869 enum SPEC_MODES
7870   {
7871     SPEC_MODE_INVALID = -1,
7872     SPEC_MODE_FIRST = 0,
7873     SPEC_MODE_FOR_EXTEND_FIRST = 1,
7874     SPEC_MODE_FOR_EXTEND_LAST = 3,
7875     SPEC_MODE_LAST = 8
7876   };
7877 
7878 enum
7879   {
7880     /* Offset to reach ZERO_EXTEND patterns.  */
7881     SPEC_GEN_EXTEND_OFFSET = SPEC_MODE_LAST - SPEC_MODE_FOR_EXTEND_FIRST + 1
7882   };
7883 
7884 /* Return index of the MODE.  */
7885 static int
ia64_mode_to_int(machine_mode mode)7886 ia64_mode_to_int (machine_mode mode)
7887 {
7888   switch (mode)
7889     {
7890     case BImode: return 0; /* SPEC_MODE_FIRST  */
7891     case QImode: return 1; /* SPEC_MODE_FOR_EXTEND_FIRST  */
7892     case HImode: return 2;
7893     case SImode: return 3; /* SPEC_MODE_FOR_EXTEND_LAST  */
7894     case DImode: return 4;
7895     case SFmode: return 5;
7896     case DFmode: return 6;
7897     case XFmode: return 7;
7898     case TImode:
7899       /* ??? This mode needs testing.  Bypasses for ldfp8 instruction are not
7900 	 mentioned in itanium[12].md.  Predicate fp_register_operand also
7901 	 needs to be defined.  Bottom line: better disable for now.  */
7902       return SPEC_MODE_INVALID;
7903     default:     return SPEC_MODE_INVALID;
7904     }
7905 }
7906 
7907 /* Provide information about speculation capabilities.  */
7908 static void
ia64_set_sched_flags(spec_info_t spec_info)7909 ia64_set_sched_flags (spec_info_t spec_info)
7910 {
7911   unsigned int *flags = &(current_sched_info->flags);
7912 
7913   if (*flags & SCHED_RGN
7914       || *flags & SCHED_EBB
7915       || *flags & SEL_SCHED)
7916     {
7917       int mask = 0;
7918 
7919       if ((mflag_sched_br_data_spec && !reload_completed && optimize > 0)
7920           || (mflag_sched_ar_data_spec && reload_completed))
7921 	{
7922 	  mask |= BEGIN_DATA;
7923 
7924 	  if (!sel_sched_p ()
7925 	      && ((mflag_sched_br_in_data_spec && !reload_completed)
7926 		  || (mflag_sched_ar_in_data_spec && reload_completed)))
7927 	    mask |= BE_IN_DATA;
7928 	}
7929 
7930       if (mflag_sched_control_spec
7931           && (!sel_sched_p ()
7932 	      || reload_completed))
7933 	{
7934 	  mask |= BEGIN_CONTROL;
7935 
7936 	  if (!sel_sched_p () && mflag_sched_in_control_spec)
7937 	    mask |= BE_IN_CONTROL;
7938 	}
7939 
7940       spec_info->mask = mask;
7941 
7942       if (mask)
7943 	{
7944 	  *flags |= USE_DEPS_LIST | DO_SPECULATION;
7945 
7946 	  if (mask & BE_IN_SPEC)
7947 	    *flags |= NEW_BBS;
7948 
7949 	  spec_info->flags = 0;
7950 
7951 	  if ((mask & CONTROL_SPEC)
7952 	      && sel_sched_p () && mflag_sel_sched_dont_check_control_spec)
7953 	    spec_info->flags |= SEL_SCHED_SPEC_DONT_CHECK_CONTROL;
7954 
7955 	  if (sched_verbose >= 1)
7956 	    spec_info->dump = sched_dump;
7957 	  else
7958 	    spec_info->dump = 0;
7959 
7960 	  if (mflag_sched_count_spec_in_critical_path)
7961 	    spec_info->flags |= COUNT_SPEC_IN_CRITICAL_PATH;
7962 	}
7963     }
7964   else
7965     spec_info->mask = 0;
7966 }
7967 
7968 /* If INSN is an appropriate load return its mode.
7969    Return -1 otherwise.  */
7970 static int
get_mode_no_for_insn(rtx_insn * insn)7971 get_mode_no_for_insn (rtx_insn *insn)
7972 {
7973   rtx reg, mem, mode_rtx;
7974   int mode_no;
7975   bool extend_p;
7976 
7977   extract_insn_cached (insn);
7978 
7979   /* We use WHICH_ALTERNATIVE only after reload.  This will
7980      guarantee that reload won't touch a speculative insn.  */
7981 
7982   if (recog_data.n_operands != 2)
7983     return -1;
7984 
7985   reg = recog_data.operand[0];
7986   mem = recog_data.operand[1];
7987 
7988   /* We should use MEM's mode since REG's mode in presence of
7989      ZERO_EXTEND will always be DImode.  */
7990   if (get_attr_speculable1 (insn) == SPECULABLE1_YES)
7991     /* Process non-speculative ld.  */
7992     {
7993       if (!reload_completed)
7994 	{
7995 	  /* Do not speculate into regs like ar.lc.  */
7996 	  if (!REG_P (reg) || AR_REGNO_P (REGNO (reg)))
7997 	    return -1;
7998 
7999 	  if (!MEM_P (mem))
8000 	    return -1;
8001 
8002 	  {
8003 	    rtx mem_reg = XEXP (mem, 0);
8004 
8005 	    if (!REG_P (mem_reg))
8006 	      return -1;
8007 	  }
8008 
8009 	  mode_rtx = mem;
8010 	}
8011       else if (get_attr_speculable2 (insn) == SPECULABLE2_YES)
8012 	{
8013 	  gcc_assert (REG_P (reg) && MEM_P (mem));
8014 	  mode_rtx = mem;
8015 	}
8016       else
8017 	return -1;
8018     }
8019   else if (get_attr_data_speculative (insn) == DATA_SPECULATIVE_YES
8020 	   || get_attr_control_speculative (insn) == CONTROL_SPECULATIVE_YES
8021 	   || get_attr_check_load (insn) == CHECK_LOAD_YES)
8022     /* Process speculative ld or ld.c.  */
8023     {
8024       gcc_assert (REG_P (reg) && MEM_P (mem));
8025       mode_rtx = mem;
8026     }
8027   else
8028     {
8029       enum attr_itanium_class attr_class = get_attr_itanium_class (insn);
8030 
8031       if (attr_class == ITANIUM_CLASS_CHK_A
8032 	  || attr_class == ITANIUM_CLASS_CHK_S_I
8033 	  || attr_class == ITANIUM_CLASS_CHK_S_F)
8034 	/* Process chk.  */
8035 	mode_rtx = reg;
8036       else
8037 	return -1;
8038     }
8039 
8040   mode_no = ia64_mode_to_int (GET_MODE (mode_rtx));
8041 
8042   if (mode_no == SPEC_MODE_INVALID)
8043     return -1;
8044 
8045   extend_p = (GET_MODE (reg) != GET_MODE (mode_rtx));
8046 
8047   if (extend_p)
8048     {
8049       if (!(SPEC_MODE_FOR_EXTEND_FIRST <= mode_no
8050 	    && mode_no <= SPEC_MODE_FOR_EXTEND_LAST))
8051 	return -1;
8052 
8053       mode_no += SPEC_GEN_EXTEND_OFFSET;
8054     }
8055 
8056   return mode_no;
8057 }
8058 
8059 /* If X is an unspec part of a speculative load, return its code.
8060    Return -1 otherwise.  */
8061 static int
get_spec_unspec_code(const_rtx x)8062 get_spec_unspec_code (const_rtx x)
8063 {
8064   if (GET_CODE (x) != UNSPEC)
8065     return -1;
8066 
8067   {
8068     int code;
8069 
8070     code = XINT (x, 1);
8071 
8072     switch (code)
8073       {
8074       case UNSPEC_LDA:
8075       case UNSPEC_LDS:
8076       case UNSPEC_LDS_A:
8077       case UNSPEC_LDSA:
8078 	return code;
8079 
8080       default:
8081 	return -1;
8082       }
8083   }
8084 }
8085 
8086 /* Implement skip_rtx_p hook.  */
8087 static bool
ia64_skip_rtx_p(const_rtx x)8088 ia64_skip_rtx_p (const_rtx x)
8089 {
8090   return get_spec_unspec_code (x) != -1;
8091 }
8092 
8093 /* If INSN is a speculative load, return its UNSPEC code.
8094    Return -1 otherwise.  */
8095 static int
get_insn_spec_code(const_rtx insn)8096 get_insn_spec_code (const_rtx insn)
8097 {
8098   rtx pat, reg, mem;
8099 
8100   pat = PATTERN (insn);
8101 
8102   if (GET_CODE (pat) == COND_EXEC)
8103     pat = COND_EXEC_CODE (pat);
8104 
8105   if (GET_CODE (pat) != SET)
8106     return -1;
8107 
8108   reg = SET_DEST (pat);
8109   if (!REG_P (reg))
8110     return -1;
8111 
8112   mem = SET_SRC (pat);
8113   if (GET_CODE (mem) == ZERO_EXTEND)
8114     mem = XEXP (mem, 0);
8115 
8116   return get_spec_unspec_code (mem);
8117 }
8118 
8119 /* If INSN is a speculative load, return a ds with the speculation types.
8120    Otherwise [if INSN is a normal instruction] return 0.  */
8121 static ds_t
ia64_get_insn_spec_ds(rtx_insn * insn)8122 ia64_get_insn_spec_ds (rtx_insn *insn)
8123 {
8124   int code = get_insn_spec_code (insn);
8125 
8126   switch (code)
8127     {
8128     case UNSPEC_LDA:
8129       return BEGIN_DATA;
8130 
8131     case UNSPEC_LDS:
8132     case UNSPEC_LDS_A:
8133       return BEGIN_CONTROL;
8134 
8135     case UNSPEC_LDSA:
8136       return BEGIN_DATA | BEGIN_CONTROL;
8137 
8138     default:
8139       return 0;
8140     }
8141 }
8142 
8143 /* If INSN is a speculative load return a ds with the speculation types that
8144    will be checked.
8145    Otherwise [if INSN is a normal instruction] return 0.  */
8146 static ds_t
ia64_get_insn_checked_ds(rtx_insn * insn)8147 ia64_get_insn_checked_ds (rtx_insn *insn)
8148 {
8149   int code = get_insn_spec_code (insn);
8150 
8151   switch (code)
8152     {
8153     case UNSPEC_LDA:
8154       return BEGIN_DATA | BEGIN_CONTROL;
8155 
8156     case UNSPEC_LDS:
8157       return BEGIN_CONTROL;
8158 
8159     case UNSPEC_LDS_A:
8160     case UNSPEC_LDSA:
8161       return BEGIN_DATA | BEGIN_CONTROL;
8162 
8163     default:
8164       return 0;
8165     }
8166 }
8167 
8168 /* If GEN_P is true, calculate the index of needed speculation check and return
8169    speculative pattern for INSN with speculative mode TS, machine mode
8170    MODE_NO and with ZERO_EXTEND (if EXTEND_P is true).
8171    If GEN_P is false, just calculate the index of needed speculation check.  */
8172 static rtx
ia64_gen_spec_load(rtx insn,ds_t ts,int mode_no)8173 ia64_gen_spec_load (rtx insn, ds_t ts, int mode_no)
8174 {
8175   rtx pat, new_pat;
8176   gen_func_t gen_load;
8177 
8178   gen_load = get_spec_load_gen_function (ts, mode_no);
8179 
8180   new_pat = gen_load (copy_rtx (recog_data.operand[0]),
8181 		      copy_rtx (recog_data.operand[1]));
8182 
8183   pat = PATTERN (insn);
8184   if (GET_CODE (pat) == COND_EXEC)
8185     new_pat = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (COND_EXEC_TEST (pat)),
8186 				 new_pat);
8187 
8188   return new_pat;
8189 }
8190 
8191 static bool
insn_can_be_in_speculative_p(rtx insn ATTRIBUTE_UNUSED,ds_t ds ATTRIBUTE_UNUSED)8192 insn_can_be_in_speculative_p (rtx insn ATTRIBUTE_UNUSED,
8193 			      ds_t ds ATTRIBUTE_UNUSED)
8194 {
8195   return false;
8196 }
8197 
8198 /* Implement targetm.sched.speculate_insn hook.
8199    Check if the INSN can be TS speculative.
8200    If 'no' - return -1.
8201    If 'yes' - generate speculative pattern in the NEW_PAT and return 1.
8202    If current pattern of the INSN already provides TS speculation,
8203    return 0.  */
8204 static int
ia64_speculate_insn(rtx_insn * insn,ds_t ts,rtx * new_pat)8205 ia64_speculate_insn (rtx_insn *insn, ds_t ts, rtx *new_pat)
8206 {
8207   int mode_no;
8208   int res;
8209 
8210   gcc_assert (!(ts & ~SPECULATIVE));
8211 
8212   if (ia64_spec_check_p (insn))
8213     return -1;
8214 
8215   if ((ts & BE_IN_SPEC)
8216       && !insn_can_be_in_speculative_p (insn, ts))
8217     return -1;
8218 
8219   mode_no = get_mode_no_for_insn (insn);
8220 
8221   if (mode_no != SPEC_MODE_INVALID)
8222     {
8223       if (ia64_get_insn_spec_ds (insn) == ds_get_speculation_types (ts))
8224 	res = 0;
8225       else
8226 	{
8227 	  res = 1;
8228 	  *new_pat = ia64_gen_spec_load (insn, ts, mode_no);
8229 	}
8230     }
8231   else
8232     res = -1;
8233 
8234   return res;
8235 }
8236 
8237 /* Return a function that will generate a check for speculation TS with mode
8238    MODE_NO.
8239    If simple check is needed, pass true for SIMPLE_CHECK_P.
8240    If clearing check is needed, pass true for CLEARING_CHECK_P.  */
8241 static gen_func_t
get_spec_check_gen_function(ds_t ts,int mode_no,bool simple_check_p,bool clearing_check_p)8242 get_spec_check_gen_function (ds_t ts, int mode_no,
8243 			     bool simple_check_p, bool clearing_check_p)
8244 {
8245   static gen_func_t gen_ld_c_clr[] = {
8246     gen_movbi_clr,
8247     gen_movqi_clr,
8248     gen_movhi_clr,
8249     gen_movsi_clr,
8250     gen_movdi_clr,
8251     gen_movsf_clr,
8252     gen_movdf_clr,
8253     gen_movxf_clr,
8254     gen_movti_clr,
8255     gen_zero_extendqidi2_clr,
8256     gen_zero_extendhidi2_clr,
8257     gen_zero_extendsidi2_clr,
8258   };
8259   static gen_func_t gen_ld_c_nc[] = {
8260     gen_movbi_nc,
8261     gen_movqi_nc,
8262     gen_movhi_nc,
8263     gen_movsi_nc,
8264     gen_movdi_nc,
8265     gen_movsf_nc,
8266     gen_movdf_nc,
8267     gen_movxf_nc,
8268     gen_movti_nc,
8269     gen_zero_extendqidi2_nc,
8270     gen_zero_extendhidi2_nc,
8271     gen_zero_extendsidi2_nc,
8272   };
8273   static gen_func_t gen_chk_a_clr[] = {
8274     gen_advanced_load_check_clr_bi,
8275     gen_advanced_load_check_clr_qi,
8276     gen_advanced_load_check_clr_hi,
8277     gen_advanced_load_check_clr_si,
8278     gen_advanced_load_check_clr_di,
8279     gen_advanced_load_check_clr_sf,
8280     gen_advanced_load_check_clr_df,
8281     gen_advanced_load_check_clr_xf,
8282     gen_advanced_load_check_clr_ti,
8283     gen_advanced_load_check_clr_di,
8284     gen_advanced_load_check_clr_di,
8285     gen_advanced_load_check_clr_di,
8286   };
8287   static gen_func_t gen_chk_a_nc[] = {
8288     gen_advanced_load_check_nc_bi,
8289     gen_advanced_load_check_nc_qi,
8290     gen_advanced_load_check_nc_hi,
8291     gen_advanced_load_check_nc_si,
8292     gen_advanced_load_check_nc_di,
8293     gen_advanced_load_check_nc_sf,
8294     gen_advanced_load_check_nc_df,
8295     gen_advanced_load_check_nc_xf,
8296     gen_advanced_load_check_nc_ti,
8297     gen_advanced_load_check_nc_di,
8298     gen_advanced_load_check_nc_di,
8299     gen_advanced_load_check_nc_di,
8300   };
8301   static gen_func_t gen_chk_s[] = {
8302     gen_speculation_check_bi,
8303     gen_speculation_check_qi,
8304     gen_speculation_check_hi,
8305     gen_speculation_check_si,
8306     gen_speculation_check_di,
8307     gen_speculation_check_sf,
8308     gen_speculation_check_df,
8309     gen_speculation_check_xf,
8310     gen_speculation_check_ti,
8311     gen_speculation_check_di,
8312     gen_speculation_check_di,
8313     gen_speculation_check_di,
8314   };
8315 
8316   gen_func_t *gen_check;
8317 
8318   if (ts & BEGIN_DATA)
8319     {
8320       /* We don't need recovery because even if this is ld.sa
8321 	 ALAT entry will be allocated only if NAT bit is set to zero.
8322 	 So it is enough to use ld.c here.  */
8323 
8324       if (simple_check_p)
8325 	{
8326 	  gcc_assert (mflag_sched_spec_ldc);
8327 
8328 	  if (clearing_check_p)
8329 	    gen_check = gen_ld_c_clr;
8330 	  else
8331 	    gen_check = gen_ld_c_nc;
8332 	}
8333       else
8334 	{
8335 	  if (clearing_check_p)
8336 	    gen_check = gen_chk_a_clr;
8337 	  else
8338 	    gen_check = gen_chk_a_nc;
8339 	}
8340     }
8341   else if (ts & BEGIN_CONTROL)
8342     {
8343       if (simple_check_p)
8344 	/* We might want to use ld.sa -> ld.c instead of
8345 	   ld.s -> chk.s.  */
8346 	{
8347 	  gcc_assert (!ia64_needs_block_p (ts));
8348 
8349 	  if (clearing_check_p)
8350 	    gen_check = gen_ld_c_clr;
8351 	  else
8352 	    gen_check = gen_ld_c_nc;
8353 	}
8354       else
8355 	{
8356 	  gen_check = gen_chk_s;
8357 	}
8358     }
8359   else
8360     gcc_unreachable ();
8361 
8362   gcc_assert (mode_no >= 0);
8363   return gen_check[mode_no];
8364 }
8365 
8366 /* Return nonzero, if INSN needs branchy recovery check.  */
8367 static bool
ia64_needs_block_p(ds_t ts)8368 ia64_needs_block_p (ds_t ts)
8369 {
8370   if (ts & BEGIN_DATA)
8371     return !mflag_sched_spec_ldc;
8372 
8373   gcc_assert ((ts & BEGIN_CONTROL) != 0);
8374 
8375   return !(mflag_sched_spec_control_ldc && mflag_sched_spec_ldc);
8376 }
8377 
8378 /* Generate (or regenerate) a recovery check for INSN.  */
8379 static rtx
ia64_gen_spec_check(rtx_insn * insn,rtx_insn * label,ds_t ds)8380 ia64_gen_spec_check (rtx_insn *insn, rtx_insn *label, ds_t ds)
8381 {
8382   rtx op1, pat, check_pat;
8383   gen_func_t gen_check;
8384   int mode_no;
8385 
8386   mode_no = get_mode_no_for_insn (insn);
8387   gcc_assert (mode_no >= 0);
8388 
8389   if (label)
8390     op1 = label;
8391   else
8392     {
8393       gcc_assert (!ia64_needs_block_p (ds));
8394       op1 = copy_rtx (recog_data.operand[1]);
8395     }
8396 
8397   gen_check = get_spec_check_gen_function (ds, mode_no, label == NULL_RTX,
8398 					   true);
8399 
8400   check_pat = gen_check (copy_rtx (recog_data.operand[0]), op1);
8401 
8402   pat = PATTERN (insn);
8403   if (GET_CODE (pat) == COND_EXEC)
8404     check_pat = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (COND_EXEC_TEST (pat)),
8405 				   check_pat);
8406 
8407   return check_pat;
8408 }
8409 
8410 /* Return nonzero, if X is branchy recovery check.  */
8411 static int
ia64_spec_check_p(rtx x)8412 ia64_spec_check_p (rtx x)
8413 {
8414   x = PATTERN (x);
8415   if (GET_CODE (x) == COND_EXEC)
8416     x = COND_EXEC_CODE (x);
8417   if (GET_CODE (x) == SET)
8418     return ia64_spec_check_src_p (SET_SRC (x));
8419   return 0;
8420 }
8421 
8422 /* Return nonzero, if SRC belongs to recovery check.  */
8423 static int
ia64_spec_check_src_p(rtx src)8424 ia64_spec_check_src_p (rtx src)
8425 {
8426   if (GET_CODE (src) == IF_THEN_ELSE)
8427     {
8428       rtx t;
8429 
8430       t = XEXP (src, 0);
8431       if (GET_CODE (t) == NE)
8432 	{
8433 	  t = XEXP (t, 0);
8434 
8435 	  if (GET_CODE (t) == UNSPEC)
8436 	    {
8437 	      int code;
8438 
8439 	      code = XINT (t, 1);
8440 
8441 	      if (code == UNSPEC_LDCCLR
8442 		  || code == UNSPEC_LDCNC
8443 		  || code == UNSPEC_CHKACLR
8444 		  || code == UNSPEC_CHKANC
8445 		  || code == UNSPEC_CHKS)
8446 		{
8447 		  gcc_assert (code != 0);
8448 		  return code;
8449 		}
8450 	    }
8451 	}
8452     }
8453   return 0;
8454 }
8455 
8456 
8457 /* The following page contains abstract data `bundle states' which are
8458    used for bundling insns (inserting nops and template generation).  */
8459 
8460 /* The following describes state of insn bundling.  */
8461 
8462 struct bundle_state
8463 {
8464   /* Unique bundle state number to identify them in the debugging
8465      output  */
8466   int unique_num;
8467   rtx_insn *insn; /* corresponding insn, NULL for the 1st and the last state  */
8468   /* number nops before and after the insn  */
8469   short before_nops_num, after_nops_num;
8470   int insn_num; /* insn number (0 - for initial state, 1 - for the 1st
8471                    insn */
8472   int cost;     /* cost of the state in cycles */
8473   int accumulated_insns_num; /* number of all previous insns including
8474 				nops.  L is considered as 2 insns */
8475   int branch_deviation; /* deviation of previous branches from 3rd slots  */
8476   int middle_bundle_stops; /* number of stop bits in the middle of bundles */
8477   struct bundle_state *next;  /* next state with the same insn_num  */
8478   struct bundle_state *originator; /* originator (previous insn state)  */
8479   /* All bundle states are in the following chain.  */
8480   struct bundle_state *allocated_states_chain;
8481   /* The DFA State after issuing the insn and the nops.  */
8482   state_t dfa_state;
8483 };
8484 
8485 /* The following is map insn number to the corresponding bundle state.  */
8486 
8487 static struct bundle_state **index_to_bundle_states;
8488 
8489 /* The unique number of next bundle state.  */
8490 
8491 static int bundle_states_num;
8492 
8493 /* All allocated bundle states are in the following chain.  */
8494 
8495 static struct bundle_state *allocated_bundle_states_chain;
8496 
8497 /* All allocated but not used bundle states are in the following
8498    chain.  */
8499 
8500 static struct bundle_state *free_bundle_state_chain;
8501 
8502 
8503 /* The following function returns a free bundle state.  */
8504 
8505 static struct bundle_state *
get_free_bundle_state(void)8506 get_free_bundle_state (void)
8507 {
8508   struct bundle_state *result;
8509 
8510   if (free_bundle_state_chain != NULL)
8511     {
8512       result = free_bundle_state_chain;
8513       free_bundle_state_chain = result->next;
8514     }
8515   else
8516     {
8517       result = XNEW (struct bundle_state);
8518       result->dfa_state = xmalloc (dfa_state_size);
8519       result->allocated_states_chain = allocated_bundle_states_chain;
8520       allocated_bundle_states_chain = result;
8521     }
8522   result->unique_num = bundle_states_num++;
8523   return result;
8524 
8525 }
8526 
8527 /* The following function frees given bundle state.  */
8528 
8529 static void
free_bundle_state(struct bundle_state * state)8530 free_bundle_state (struct bundle_state *state)
8531 {
8532   state->next = free_bundle_state_chain;
8533   free_bundle_state_chain = state;
8534 }
8535 
8536 /* Start work with abstract data `bundle states'.  */
8537 
8538 static void
initiate_bundle_states(void)8539 initiate_bundle_states (void)
8540 {
8541   bundle_states_num = 0;
8542   free_bundle_state_chain = NULL;
8543   allocated_bundle_states_chain = NULL;
8544 }
8545 
8546 /* Finish work with abstract data `bundle states'.  */
8547 
8548 static void
finish_bundle_states(void)8549 finish_bundle_states (void)
8550 {
8551   struct bundle_state *curr_state, *next_state;
8552 
8553   for (curr_state = allocated_bundle_states_chain;
8554        curr_state != NULL;
8555        curr_state = next_state)
8556     {
8557       next_state = curr_state->allocated_states_chain;
8558       free (curr_state->dfa_state);
8559       free (curr_state);
8560     }
8561 }
8562 
8563 /* Hashtable helpers.  */
8564 
8565 struct bundle_state_hasher : nofree_ptr_hash <bundle_state>
8566 {
8567   static inline hashval_t hash (const bundle_state *);
8568   static inline bool equal (const bundle_state *, const bundle_state *);
8569 };
8570 
8571 /* The function returns hash of BUNDLE_STATE.  */
8572 
8573 inline hashval_t
hash(const bundle_state * state)8574 bundle_state_hasher::hash (const bundle_state *state)
8575 {
8576   unsigned result, i;
8577 
8578   for (result = i = 0; i < dfa_state_size; i++)
8579     result += (((unsigned char *) state->dfa_state) [i]
8580 	       << ((i % CHAR_BIT) * 3 + CHAR_BIT));
8581   return result + state->insn_num;
8582 }
8583 
8584 /* The function returns nonzero if the bundle state keys are equal.  */
8585 
8586 inline bool
equal(const bundle_state * state1,const bundle_state * state2)8587 bundle_state_hasher::equal (const bundle_state *state1,
8588 			    const bundle_state *state2)
8589 {
8590   return (state1->insn_num == state2->insn_num
8591 	  && memcmp (state1->dfa_state, state2->dfa_state,
8592 		     dfa_state_size) == 0);
8593 }
8594 
8595 /* Hash table of the bundle states.  The key is dfa_state and insn_num
8596    of the bundle states.  */
8597 
8598 static hash_table<bundle_state_hasher> *bundle_state_table;
8599 
8600 /* The function inserts the BUNDLE_STATE into the hash table.  The
8601    function returns nonzero if the bundle has been inserted into the
8602    table.  The table contains the best bundle state with given key.  */
8603 
8604 static int
insert_bundle_state(struct bundle_state * bundle_state)8605 insert_bundle_state (struct bundle_state *bundle_state)
8606 {
8607   struct bundle_state **entry_ptr;
8608 
8609   entry_ptr = bundle_state_table->find_slot (bundle_state, INSERT);
8610   if (*entry_ptr == NULL)
8611     {
8612       bundle_state->next = index_to_bundle_states [bundle_state->insn_num];
8613       index_to_bundle_states [bundle_state->insn_num] = bundle_state;
8614       *entry_ptr = bundle_state;
8615       return TRUE;
8616     }
8617   else if (bundle_state->cost < (*entry_ptr)->cost
8618 	   || (bundle_state->cost == (*entry_ptr)->cost
8619 	       && ((*entry_ptr)->accumulated_insns_num
8620 		   > bundle_state->accumulated_insns_num
8621 		   || ((*entry_ptr)->accumulated_insns_num
8622 		       == bundle_state->accumulated_insns_num
8623 		       && ((*entry_ptr)->branch_deviation
8624 			   > bundle_state->branch_deviation
8625 			   || ((*entry_ptr)->branch_deviation
8626 			       == bundle_state->branch_deviation
8627 			       && (*entry_ptr)->middle_bundle_stops
8628 			       > bundle_state->middle_bundle_stops))))))
8629 
8630     {
8631       struct bundle_state temp;
8632 
8633       temp = **entry_ptr;
8634       **entry_ptr = *bundle_state;
8635       (*entry_ptr)->next = temp.next;
8636       *bundle_state = temp;
8637     }
8638   return FALSE;
8639 }
8640 
8641 /* Start work with the hash table.  */
8642 
8643 static void
initiate_bundle_state_table(void)8644 initiate_bundle_state_table (void)
8645 {
8646   bundle_state_table = new hash_table<bundle_state_hasher> (50);
8647 }
8648 
8649 /* Finish work with the hash table.  */
8650 
8651 static void
finish_bundle_state_table(void)8652 finish_bundle_state_table (void)
8653 {
8654   delete bundle_state_table;
8655   bundle_state_table = NULL;
8656 }
8657 
8658 
8659 
8660 /* The following variable is a insn `nop' used to check bundle states
8661    with different number of inserted nops.  */
8662 
8663 static rtx_insn *ia64_nop;
8664 
8665 /* The following function tries to issue NOPS_NUM nops for the current
8666    state without advancing processor cycle.  If it failed, the
8667    function returns FALSE and frees the current state.  */
8668 
8669 static int
try_issue_nops(struct bundle_state * curr_state,int nops_num)8670 try_issue_nops (struct bundle_state *curr_state, int nops_num)
8671 {
8672   int i;
8673 
8674   for (i = 0; i < nops_num; i++)
8675     if (state_transition (curr_state->dfa_state, ia64_nop) >= 0)
8676       {
8677 	free_bundle_state (curr_state);
8678 	return FALSE;
8679       }
8680   return TRUE;
8681 }
8682 
8683 /* The following function tries to issue INSN for the current
8684    state without advancing processor cycle.  If it failed, the
8685    function returns FALSE and frees the current state.  */
8686 
8687 static int
try_issue_insn(struct bundle_state * curr_state,rtx insn)8688 try_issue_insn (struct bundle_state *curr_state, rtx insn)
8689 {
8690   if (insn && state_transition (curr_state->dfa_state, insn) >= 0)
8691     {
8692       free_bundle_state (curr_state);
8693       return FALSE;
8694     }
8695   return TRUE;
8696 }
8697 
8698 /* The following function tries to issue BEFORE_NOPS_NUM nops and INSN
8699    starting with ORIGINATOR without advancing processor cycle.  If
8700    TRY_BUNDLE_END_P is TRUE, the function also/only (if
8701    ONLY_BUNDLE_END_P is TRUE) tries to issue nops to fill all bundle.
8702    If it was successful, the function creates new bundle state and
8703    insert into the hash table and into `index_to_bundle_states'.  */
8704 
8705 static void
issue_nops_and_insn(struct bundle_state * originator,int before_nops_num,rtx_insn * insn,int try_bundle_end_p,int only_bundle_end_p)8706 issue_nops_and_insn (struct bundle_state *originator, int before_nops_num,
8707 		     rtx_insn *insn, int try_bundle_end_p,
8708 		     int only_bundle_end_p)
8709 {
8710   struct bundle_state *curr_state;
8711 
8712   curr_state = get_free_bundle_state ();
8713   memcpy (curr_state->dfa_state, originator->dfa_state, dfa_state_size);
8714   curr_state->insn = insn;
8715   curr_state->insn_num = originator->insn_num + 1;
8716   curr_state->cost = originator->cost;
8717   curr_state->originator = originator;
8718   curr_state->before_nops_num = before_nops_num;
8719   curr_state->after_nops_num = 0;
8720   curr_state->accumulated_insns_num
8721     = originator->accumulated_insns_num + before_nops_num;
8722   curr_state->branch_deviation = originator->branch_deviation;
8723   curr_state->middle_bundle_stops = originator->middle_bundle_stops;
8724   gcc_assert (insn);
8725   if (INSN_CODE (insn) == CODE_FOR_insn_group_barrier)
8726     {
8727       gcc_assert (GET_MODE (insn) != TImode);
8728       if (!try_issue_nops (curr_state, before_nops_num))
8729 	return;
8730       if (!try_issue_insn (curr_state, insn))
8731 	return;
8732       memcpy (temp_dfa_state, curr_state->dfa_state, dfa_state_size);
8733       if (curr_state->accumulated_insns_num % 3 != 0)
8734 	curr_state->middle_bundle_stops++;
8735       if (state_transition (temp_dfa_state, dfa_pre_cycle_insn) >= 0
8736 	  && curr_state->accumulated_insns_num % 3 != 0)
8737 	{
8738 	  free_bundle_state (curr_state);
8739 	  return;
8740 	}
8741     }
8742   else if (GET_MODE (insn) != TImode)
8743     {
8744       if (!try_issue_nops (curr_state, before_nops_num))
8745 	return;
8746       if (!try_issue_insn (curr_state, insn))
8747 	return;
8748       curr_state->accumulated_insns_num++;
8749       gcc_assert (!unknown_for_bundling_p (insn));
8750 
8751       if (ia64_safe_type (insn) == TYPE_L)
8752 	curr_state->accumulated_insns_num++;
8753     }
8754   else
8755     {
8756       /* If this is an insn that must be first in a group, then don't allow
8757 	 nops to be emitted before it.  Currently, alloc is the only such
8758 	 supported instruction.  */
8759       /* ??? The bundling automatons should handle this for us, but they do
8760 	 not yet have support for the first_insn attribute.  */
8761       if (before_nops_num > 0 && get_attr_first_insn (insn) == FIRST_INSN_YES)
8762 	{
8763 	  free_bundle_state (curr_state);
8764 	  return;
8765 	}
8766 
8767       state_transition (curr_state->dfa_state, dfa_pre_cycle_insn);
8768       state_transition (curr_state->dfa_state, NULL);
8769       curr_state->cost++;
8770       if (!try_issue_nops (curr_state, before_nops_num))
8771 	return;
8772       if (!try_issue_insn (curr_state, insn))
8773 	return;
8774       curr_state->accumulated_insns_num++;
8775       if (unknown_for_bundling_p (insn))
8776 	{
8777 	  /* Finish bundle containing asm insn.  */
8778 	  curr_state->after_nops_num
8779 	    = 3 - curr_state->accumulated_insns_num % 3;
8780 	  curr_state->accumulated_insns_num
8781 	    += 3 - curr_state->accumulated_insns_num % 3;
8782 	}
8783       else if (ia64_safe_type (insn) == TYPE_L)
8784 	curr_state->accumulated_insns_num++;
8785     }
8786   if (ia64_safe_type (insn) == TYPE_B)
8787     curr_state->branch_deviation
8788       += 2 - (curr_state->accumulated_insns_num - 1) % 3;
8789   if (try_bundle_end_p && curr_state->accumulated_insns_num % 3 != 0)
8790     {
8791       if (!only_bundle_end_p && insert_bundle_state (curr_state))
8792 	{
8793 	  state_t dfa_state;
8794 	  struct bundle_state *curr_state1;
8795 	  struct bundle_state *allocated_states_chain;
8796 
8797 	  curr_state1 = get_free_bundle_state ();
8798 	  dfa_state = curr_state1->dfa_state;
8799 	  allocated_states_chain = curr_state1->allocated_states_chain;
8800 	  *curr_state1 = *curr_state;
8801 	  curr_state1->dfa_state = dfa_state;
8802 	  curr_state1->allocated_states_chain = allocated_states_chain;
8803 	  memcpy (curr_state1->dfa_state, curr_state->dfa_state,
8804 		  dfa_state_size);
8805 	  curr_state = curr_state1;
8806 	}
8807       if (!try_issue_nops (curr_state,
8808 			   3 - curr_state->accumulated_insns_num % 3))
8809 	return;
8810       curr_state->after_nops_num
8811 	= 3 - curr_state->accumulated_insns_num % 3;
8812       curr_state->accumulated_insns_num
8813 	+= 3 - curr_state->accumulated_insns_num % 3;
8814     }
8815   if (!insert_bundle_state (curr_state))
8816     free_bundle_state (curr_state);
8817   return;
8818 }
8819 
8820 /* The following function returns position in the two window bundle
8821    for given STATE.  */
8822 
8823 static int
get_max_pos(state_t state)8824 get_max_pos (state_t state)
8825 {
8826   if (cpu_unit_reservation_p (state, pos_6))
8827     return 6;
8828   else if (cpu_unit_reservation_p (state, pos_5))
8829     return 5;
8830   else if (cpu_unit_reservation_p (state, pos_4))
8831     return 4;
8832   else if (cpu_unit_reservation_p (state, pos_3))
8833     return 3;
8834   else if (cpu_unit_reservation_p (state, pos_2))
8835     return 2;
8836   else if (cpu_unit_reservation_p (state, pos_1))
8837     return 1;
8838   else
8839     return 0;
8840 }
8841 
8842 /* The function returns code of a possible template for given position
8843    and state.  The function should be called only with 2 values of
8844    position equal to 3 or 6.  We avoid generating F NOPs by putting
8845    templates containing F insns at the end of the template search
8846    because undocumented anomaly in McKinley derived cores which can
8847    cause stalls if an F-unit insn (including a NOP) is issued within a
8848    six-cycle window after reading certain application registers (such
8849    as ar.bsp).  Furthermore, power-considerations also argue against
8850    the use of F-unit instructions unless they're really needed.  */
8851 
8852 static int
get_template(state_t state,int pos)8853 get_template (state_t state, int pos)
8854 {
8855   switch (pos)
8856     {
8857     case 3:
8858       if (cpu_unit_reservation_p (state, _0mmi_))
8859 	return 1;
8860       else if (cpu_unit_reservation_p (state, _0mii_))
8861 	return 0;
8862       else if (cpu_unit_reservation_p (state, _0mmb_))
8863 	return 7;
8864       else if (cpu_unit_reservation_p (state, _0mib_))
8865 	return 6;
8866       else if (cpu_unit_reservation_p (state, _0mbb_))
8867 	return 5;
8868       else if (cpu_unit_reservation_p (state, _0bbb_))
8869 	return 4;
8870       else if (cpu_unit_reservation_p (state, _0mmf_))
8871 	return 3;
8872       else if (cpu_unit_reservation_p (state, _0mfi_))
8873 	return 2;
8874       else if (cpu_unit_reservation_p (state, _0mfb_))
8875 	return 8;
8876       else if (cpu_unit_reservation_p (state, _0mlx_))
8877 	return 9;
8878       else
8879 	gcc_unreachable ();
8880     case 6:
8881       if (cpu_unit_reservation_p (state, _1mmi_))
8882 	return 1;
8883       else if (cpu_unit_reservation_p (state, _1mii_))
8884 	return 0;
8885       else if (cpu_unit_reservation_p (state, _1mmb_))
8886 	return 7;
8887       else if (cpu_unit_reservation_p (state, _1mib_))
8888 	return 6;
8889       else if (cpu_unit_reservation_p (state, _1mbb_))
8890 	return 5;
8891       else if (cpu_unit_reservation_p (state, _1bbb_))
8892 	return 4;
8893       else if (_1mmf_ >= 0 && cpu_unit_reservation_p (state, _1mmf_))
8894 	return 3;
8895       else if (cpu_unit_reservation_p (state, _1mfi_))
8896 	return 2;
8897       else if (cpu_unit_reservation_p (state, _1mfb_))
8898 	return 8;
8899       else if (cpu_unit_reservation_p (state, _1mlx_))
8900 	return 9;
8901       else
8902 	gcc_unreachable ();
8903     default:
8904       gcc_unreachable ();
8905     }
8906 }
8907 
8908 /* True when INSN is important for bundling.  */
8909 
8910 static bool
important_for_bundling_p(rtx_insn * insn)8911 important_for_bundling_p (rtx_insn *insn)
8912 {
8913   return (INSN_P (insn)
8914 	  && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE
8915 	  && GET_CODE (PATTERN (insn)) != USE
8916 	  && GET_CODE (PATTERN (insn)) != CLOBBER);
8917 }
8918 
8919 /* The following function returns an insn important for insn bundling
8920    followed by INSN and before TAIL.  */
8921 
8922 static rtx_insn *
get_next_important_insn(rtx_insn * insn,rtx_insn * tail)8923 get_next_important_insn (rtx_insn *insn, rtx_insn *tail)
8924 {
8925   for (; insn && insn != tail; insn = NEXT_INSN (insn))
8926     if (important_for_bundling_p (insn))
8927       return insn;
8928   return NULL;
8929 }
8930 
8931 /* True when INSN is unknown, but important, for bundling.  */
8932 
8933 static bool
unknown_for_bundling_p(rtx_insn * insn)8934 unknown_for_bundling_p (rtx_insn *insn)
8935 {
8936   return (INSN_P (insn)
8937 	  && ia64_safe_itanium_class (insn) == ITANIUM_CLASS_UNKNOWN
8938 	  && GET_CODE (PATTERN (insn)) != USE
8939 	  && GET_CODE (PATTERN (insn)) != CLOBBER);
8940 }
8941 
8942 /* Add a bundle selector TEMPLATE0 before INSN.  */
8943 
8944 static void
ia64_add_bundle_selector_before(int template0,rtx_insn * insn)8945 ia64_add_bundle_selector_before (int template0, rtx_insn *insn)
8946 {
8947   rtx b = gen_bundle_selector (GEN_INT (template0));
8948 
8949   ia64_emit_insn_before (b, insn);
8950 #if NR_BUNDLES == 10
8951   if ((template0 == 4 || template0 == 5)
8952       && ia64_except_unwind_info (&global_options) == UI_TARGET)
8953     {
8954       int i;
8955       rtx note = NULL_RTX;
8956 
8957       /* In .mbb and .bbb bundles, check if CALL_INSN isn't in the
8958 	 first or second slot.  If it is and has REG_EH_NOTE set, copy it
8959 	 to following nops, as br.call sets rp to the address of following
8960 	 bundle and therefore an EH region end must be on a bundle
8961 	 boundary.  */
8962       insn = PREV_INSN (insn);
8963       for (i = 0; i < 3; i++)
8964 	{
8965 	  do
8966 	    insn = next_active_insn (insn);
8967 	  while (NONJUMP_INSN_P (insn)
8968 		 && get_attr_empty (insn) == EMPTY_YES);
8969 	  if (CALL_P (insn))
8970 	    note = find_reg_note (insn, REG_EH_REGION, NULL_RTX);
8971 	  else if (note)
8972 	    {
8973 	      int code;
8974 
8975 	      gcc_assert ((code = recog_memoized (insn)) == CODE_FOR_nop
8976 			  || code == CODE_FOR_nop_b);
8977 	      if (find_reg_note (insn, REG_EH_REGION, NULL_RTX))
8978 		note = NULL_RTX;
8979 	      else
8980 		add_reg_note (insn, REG_EH_REGION, XEXP (note, 0));
8981 	    }
8982 	}
8983     }
8984 #endif
8985 }
8986 
8987 /* The following function does insn bundling.  Bundling means
8988    inserting templates and nop insns to fit insn groups into permitted
8989    templates.  Instruction scheduling uses NDFA (non-deterministic
8990    finite automata) encoding informations about the templates and the
8991    inserted nops.  Nondeterminism of the automata permits follows
8992    all possible insn sequences very fast.
8993 
8994    Unfortunately it is not possible to get information about inserting
8995    nop insns and used templates from the automata states.  The
8996    automata only says that we can issue an insn possibly inserting
8997    some nops before it and using some template.  Therefore insn
8998    bundling in this function is implemented by using DFA
8999    (deterministic finite automata).  We follow all possible insn
9000    sequences by inserting 0-2 nops (that is what the NDFA describe for
9001    insn scheduling) before/after each insn being bundled.  We know the
9002    start of simulated processor cycle from insn scheduling (insn
9003    starting a new cycle has TImode).
9004 
9005    Simple implementation of insn bundling would create enormous
9006    number of possible insn sequences satisfying information about new
9007    cycle ticks taken from the insn scheduling.  To make the algorithm
9008    practical we use dynamic programming.  Each decision (about
9009    inserting nops and implicitly about previous decisions) is described
9010    by structure bundle_state (see above).  If we generate the same
9011    bundle state (key is automaton state after issuing the insns and
9012    nops for it), we reuse already generated one.  As consequence we
9013    reject some decisions which cannot improve the solution and
9014    reduce memory for the algorithm.
9015 
9016    When we reach the end of EBB (extended basic block), we choose the
9017    best sequence and then, moving back in EBB, insert templates for
9018    the best alternative.  The templates are taken from querying
9019    automaton state for each insn in chosen bundle states.
9020 
9021    So the algorithm makes two (forward and backward) passes through
9022    EBB.  */
9023 
9024 static void
bundling(FILE * dump,int verbose,rtx_insn * prev_head_insn,rtx_insn * tail)9025 bundling (FILE *dump, int verbose, rtx_insn *prev_head_insn, rtx_insn *tail)
9026 {
9027   struct bundle_state *curr_state, *next_state, *best_state;
9028   rtx_insn *insn, *next_insn;
9029   int insn_num;
9030   int i, bundle_end_p, only_bundle_end_p, asm_p;
9031   int pos = 0, max_pos, template0, template1;
9032   rtx_insn *b;
9033   enum attr_type type;
9034 
9035   insn_num = 0;
9036   /* Count insns in the EBB.  */
9037   for (insn = NEXT_INSN (prev_head_insn);
9038        insn && insn != tail;
9039        insn = NEXT_INSN (insn))
9040     if (INSN_P (insn))
9041       insn_num++;
9042   if (insn_num == 0)
9043     return;
9044   bundling_p = 1;
9045   dfa_clean_insn_cache ();
9046   initiate_bundle_state_table ();
9047   index_to_bundle_states = XNEWVEC (struct bundle_state *, insn_num + 2);
9048   /* First (forward) pass -- generation of bundle states.  */
9049   curr_state = get_free_bundle_state ();
9050   curr_state->insn = NULL;
9051   curr_state->before_nops_num = 0;
9052   curr_state->after_nops_num = 0;
9053   curr_state->insn_num = 0;
9054   curr_state->cost = 0;
9055   curr_state->accumulated_insns_num = 0;
9056   curr_state->branch_deviation = 0;
9057   curr_state->middle_bundle_stops = 0;
9058   curr_state->next = NULL;
9059   curr_state->originator = NULL;
9060   state_reset (curr_state->dfa_state);
9061   index_to_bundle_states [0] = curr_state;
9062   insn_num = 0;
9063   /* Shift cycle mark if it is put on insn which could be ignored.  */
9064   for (insn = NEXT_INSN (prev_head_insn);
9065        insn != tail;
9066        insn = NEXT_INSN (insn))
9067     if (INSN_P (insn)
9068 	&& !important_for_bundling_p (insn)
9069 	&& GET_MODE (insn) == TImode)
9070       {
9071 	PUT_MODE (insn, VOIDmode);
9072 	for (next_insn = NEXT_INSN (insn);
9073 	     next_insn != tail;
9074 	     next_insn = NEXT_INSN (next_insn))
9075 	  if (important_for_bundling_p (next_insn)
9076 	      && INSN_CODE (next_insn) != CODE_FOR_insn_group_barrier)
9077 	    {
9078 	      PUT_MODE (next_insn, TImode);
9079 	      break;
9080 	    }
9081       }
9082   /* Forward pass: generation of bundle states.  */
9083   for (insn = get_next_important_insn (NEXT_INSN (prev_head_insn), tail);
9084        insn != NULL_RTX;
9085        insn = next_insn)
9086     {
9087       gcc_assert (important_for_bundling_p (insn));
9088       type = ia64_safe_type (insn);
9089       next_insn = get_next_important_insn (NEXT_INSN (insn), tail);
9090       insn_num++;
9091       index_to_bundle_states [insn_num] = NULL;
9092       for (curr_state = index_to_bundle_states [insn_num - 1];
9093 	   curr_state != NULL;
9094 	   curr_state = next_state)
9095 	{
9096 	  pos = curr_state->accumulated_insns_num % 3;
9097 	  next_state = curr_state->next;
9098 	  /* We must fill up the current bundle in order to start a
9099 	     subsequent asm insn in a new bundle.  Asm insn is always
9100 	     placed in a separate bundle.  */
9101 	  only_bundle_end_p
9102 	    = (next_insn != NULL_RTX
9103 	       && INSN_CODE (insn) == CODE_FOR_insn_group_barrier
9104 	       && unknown_for_bundling_p (next_insn));
9105 	  /* We may fill up the current bundle if it is the cycle end
9106 	     without a group barrier.  */
9107 	  bundle_end_p
9108 	    = (only_bundle_end_p || next_insn == NULL_RTX
9109 	       || (GET_MODE (next_insn) == TImode
9110 		   && INSN_CODE (insn) != CODE_FOR_insn_group_barrier));
9111 	  if (type == TYPE_F || type == TYPE_B || type == TYPE_L
9112 	      || type == TYPE_S)
9113 	    issue_nops_and_insn (curr_state, 2, insn, bundle_end_p,
9114 				 only_bundle_end_p);
9115 	  issue_nops_and_insn (curr_state, 1, insn, bundle_end_p,
9116 			       only_bundle_end_p);
9117 	  issue_nops_and_insn (curr_state, 0, insn, bundle_end_p,
9118 			       only_bundle_end_p);
9119 	}
9120       gcc_assert (index_to_bundle_states [insn_num]);
9121       for (curr_state = index_to_bundle_states [insn_num];
9122 	   curr_state != NULL;
9123 	   curr_state = curr_state->next)
9124 	if (verbose >= 2 && dump)
9125 	  {
9126 	    /* This structure is taken from generated code of the
9127 	       pipeline hazard recognizer (see file insn-attrtab.c).
9128 	       Please don't forget to change the structure if a new
9129 	       automaton is added to .md file.  */
9130 	    struct DFA_chip
9131 	    {
9132 	      unsigned short one_automaton_state;
9133 	      unsigned short oneb_automaton_state;
9134 	      unsigned short two_automaton_state;
9135 	      unsigned short twob_automaton_state;
9136 	    };
9137 
9138 	    fprintf
9139 	      (dump,
9140 	       "//    Bundle state %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, mid.stops %d state %d) for %d\n",
9141 	       curr_state->unique_num,
9142 	       (curr_state->originator == NULL
9143 		? -1 : curr_state->originator->unique_num),
9144 	       curr_state->cost,
9145 	       curr_state->before_nops_num, curr_state->after_nops_num,
9146 	       curr_state->accumulated_insns_num, curr_state->branch_deviation,
9147 	       curr_state->middle_bundle_stops,
9148 	       ((struct DFA_chip *) curr_state->dfa_state)->twob_automaton_state,
9149 	       INSN_UID (insn));
9150 	  }
9151     }
9152 
9153   /* We should find a solution because the 2nd insn scheduling has
9154      found one.  */
9155   gcc_assert (index_to_bundle_states [insn_num]);
9156   /* Find a state corresponding to the best insn sequence.  */
9157   best_state = NULL;
9158   for (curr_state = index_to_bundle_states [insn_num];
9159        curr_state != NULL;
9160        curr_state = curr_state->next)
9161     /* We are just looking at the states with fully filled up last
9162        bundle.  The first we prefer insn sequences with minimal cost
9163        then with minimal inserted nops and finally with branch insns
9164        placed in the 3rd slots.  */
9165     if (curr_state->accumulated_insns_num % 3 == 0
9166 	&& (best_state == NULL || best_state->cost > curr_state->cost
9167 	    || (best_state->cost == curr_state->cost
9168 		&& (curr_state->accumulated_insns_num
9169 		    < best_state->accumulated_insns_num
9170 		    || (curr_state->accumulated_insns_num
9171 			== best_state->accumulated_insns_num
9172 			&& (curr_state->branch_deviation
9173 			    < best_state->branch_deviation
9174 			    || (curr_state->branch_deviation
9175 				== best_state->branch_deviation
9176 				&& curr_state->middle_bundle_stops
9177 				< best_state->middle_bundle_stops)))))))
9178       best_state = curr_state;
9179   /* Second (backward) pass: adding nops and templates.  */
9180   gcc_assert (best_state);
9181   insn_num = best_state->before_nops_num;
9182   template0 = template1 = -1;
9183   for (curr_state = best_state;
9184        curr_state->originator != NULL;
9185        curr_state = curr_state->originator)
9186     {
9187       insn = curr_state->insn;
9188       asm_p = unknown_for_bundling_p (insn);
9189       insn_num++;
9190       if (verbose >= 2 && dump)
9191 	{
9192 	  struct DFA_chip
9193 	  {
9194 	    unsigned short one_automaton_state;
9195 	    unsigned short oneb_automaton_state;
9196 	    unsigned short two_automaton_state;
9197 	    unsigned short twob_automaton_state;
9198 	  };
9199 
9200 	  fprintf
9201 	    (dump,
9202 	     "//    Best %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, mid.stops %d, state %d) for %d\n",
9203 	     curr_state->unique_num,
9204 	     (curr_state->originator == NULL
9205 	      ? -1 : curr_state->originator->unique_num),
9206 	     curr_state->cost,
9207 	     curr_state->before_nops_num, curr_state->after_nops_num,
9208 	     curr_state->accumulated_insns_num, curr_state->branch_deviation,
9209 	     curr_state->middle_bundle_stops,
9210 	     ((struct DFA_chip *) curr_state->dfa_state)->twob_automaton_state,
9211 	     INSN_UID (insn));
9212 	}
9213       /* Find the position in the current bundle window.  The window can
9214 	 contain at most two bundles.  Two bundle window means that
9215 	 the processor will make two bundle rotation.  */
9216       max_pos = get_max_pos (curr_state->dfa_state);
9217       if (max_pos == 6
9218 	  /* The following (negative template number) means that the
9219 	     processor did one bundle rotation.  */
9220 	  || (max_pos == 3 && template0 < 0))
9221 	{
9222 	  /* We are at the end of the window -- find template(s) for
9223 	     its bundle(s).  */
9224 	  pos = max_pos;
9225 	  if (max_pos == 3)
9226 	    template0 = get_template (curr_state->dfa_state, 3);
9227 	  else
9228 	    {
9229 	      template1 = get_template (curr_state->dfa_state, 3);
9230 	      template0 = get_template (curr_state->dfa_state, 6);
9231 	    }
9232 	}
9233       if (max_pos > 3 && template1 < 0)
9234 	/* It may happen when we have the stop inside a bundle.  */
9235 	{
9236 	  gcc_assert (pos <= 3);
9237 	  template1 = get_template (curr_state->dfa_state, 3);
9238 	  pos += 3;
9239 	}
9240       if (!asm_p)
9241 	/* Emit nops after the current insn.  */
9242 	for (i = 0; i < curr_state->after_nops_num; i++)
9243 	  {
9244 	    rtx nop_pat = gen_nop ();
9245 	    rtx_insn *nop = emit_insn_after (nop_pat, insn);
9246 	    pos--;
9247 	    gcc_assert (pos >= 0);
9248 	    if (pos % 3 == 0)
9249 	      {
9250 		/* We are at the start of a bundle: emit the template
9251 		   (it should be defined).  */
9252 		gcc_assert (template0 >= 0);
9253 		ia64_add_bundle_selector_before (template0, nop);
9254 		/* If we have two bundle window, we make one bundle
9255 		   rotation.  Otherwise template0 will be undefined
9256 		   (negative value).  */
9257 		template0 = template1;
9258 		template1 = -1;
9259 	      }
9260 	  }
9261       /* Move the position backward in the window.  Group barrier has
9262 	 no slot.  Asm insn takes all bundle.  */
9263       if (INSN_CODE (insn) != CODE_FOR_insn_group_barrier
9264 	  && !unknown_for_bundling_p (insn))
9265 	pos--;
9266       /* Long insn takes 2 slots.  */
9267       if (ia64_safe_type (insn) == TYPE_L)
9268 	pos--;
9269       gcc_assert (pos >= 0);
9270       if (pos % 3 == 0
9271 	  && INSN_CODE (insn) != CODE_FOR_insn_group_barrier
9272 	  && !unknown_for_bundling_p (insn))
9273 	{
9274 	  /* The current insn is at the bundle start: emit the
9275 	     template.  */
9276 	  gcc_assert (template0 >= 0);
9277 	  ia64_add_bundle_selector_before (template0, insn);
9278 	  b = PREV_INSN (insn);
9279 	  insn = b;
9280 	  /* See comment above in analogous place for emitting nops
9281 	     after the insn.  */
9282 	  template0 = template1;
9283 	  template1 = -1;
9284 	}
9285       /* Emit nops after the current insn.  */
9286       for (i = 0; i < curr_state->before_nops_num; i++)
9287 	{
9288 	  rtx nop_pat = gen_nop ();
9289 	  ia64_emit_insn_before (nop_pat, insn);
9290 	  rtx_insn *nop = PREV_INSN (insn);
9291 	  insn = nop;
9292 	  pos--;
9293 	  gcc_assert (pos >= 0);
9294 	  if (pos % 3 == 0)
9295 	    {
9296 	      /* See comment above in analogous place for emitting nops
9297 		 after the insn.  */
9298 	      gcc_assert (template0 >= 0);
9299 	      ia64_add_bundle_selector_before (template0, insn);
9300 	      b = PREV_INSN (insn);
9301 	      insn = b;
9302 	      template0 = template1;
9303 	      template1 = -1;
9304 	    }
9305 	}
9306     }
9307 
9308   if (flag_checking)
9309     {
9310       /* Assert right calculation of middle_bundle_stops.  */
9311       int num = best_state->middle_bundle_stops;
9312       bool start_bundle = true, end_bundle = false;
9313 
9314       for (insn = NEXT_INSN (prev_head_insn);
9315 	   insn && insn != tail;
9316 	   insn = NEXT_INSN (insn))
9317 	{
9318 	  if (!INSN_P (insn))
9319 	    continue;
9320 	  if (recog_memoized (insn) == CODE_FOR_bundle_selector)
9321 	    start_bundle = true;
9322 	  else
9323 	    {
9324 	      rtx_insn *next_insn;
9325 
9326 	      for (next_insn = NEXT_INSN (insn);
9327 		   next_insn && next_insn != tail;
9328 		   next_insn = NEXT_INSN (next_insn))
9329 		if (INSN_P (next_insn)
9330 		    && (ia64_safe_itanium_class (next_insn)
9331 			!= ITANIUM_CLASS_IGNORE
9332 			|| recog_memoized (next_insn)
9333 			== CODE_FOR_bundle_selector)
9334 		    && GET_CODE (PATTERN (next_insn)) != USE
9335 		    && GET_CODE (PATTERN (next_insn)) != CLOBBER)
9336 		  break;
9337 
9338 	      end_bundle = next_insn == NULL_RTX
9339 		|| next_insn == tail
9340 		|| (INSN_P (next_insn)
9341 		    && recog_memoized (next_insn) == CODE_FOR_bundle_selector);
9342 	      if (recog_memoized (insn) == CODE_FOR_insn_group_barrier
9343 		  && !start_bundle && !end_bundle
9344 		  && next_insn
9345 		  && !unknown_for_bundling_p (next_insn))
9346 		num--;
9347 
9348 	      start_bundle = false;
9349 	    }
9350 	}
9351 
9352       gcc_assert (num == 0);
9353     }
9354 
9355   free (index_to_bundle_states);
9356   finish_bundle_state_table ();
9357   bundling_p = 0;
9358   dfa_clean_insn_cache ();
9359 }
9360 
9361 /* The following function is called at the end of scheduling BB or
9362    EBB.  After reload, it inserts stop bits and does insn bundling.  */
9363 
9364 static void
ia64_sched_finish(FILE * dump,int sched_verbose)9365 ia64_sched_finish (FILE *dump, int sched_verbose)
9366 {
9367   if (sched_verbose)
9368     fprintf (dump, "// Finishing schedule.\n");
9369   if (!reload_completed)
9370     return;
9371   if (reload_completed)
9372     {
9373       final_emit_insn_group_barriers (dump);
9374       bundling (dump, sched_verbose, current_sched_info->prev_head,
9375 		current_sched_info->next_tail);
9376       if (sched_verbose && dump)
9377 	fprintf (dump, "//    finishing %d-%d\n",
9378 		 INSN_UID (NEXT_INSN (current_sched_info->prev_head)),
9379 		 INSN_UID (PREV_INSN (current_sched_info->next_tail)));
9380 
9381       return;
9382     }
9383 }
9384 
9385 /* The following function inserts stop bits in scheduled BB or EBB.  */
9386 
9387 static void
final_emit_insn_group_barriers(FILE * dump ATTRIBUTE_UNUSED)9388 final_emit_insn_group_barriers (FILE *dump ATTRIBUTE_UNUSED)
9389 {
9390   rtx_insn *insn;
9391   int need_barrier_p = 0;
9392   int seen_good_insn = 0;
9393 
9394   init_insn_group_barriers ();
9395 
9396   for (insn = NEXT_INSN (current_sched_info->prev_head);
9397        insn != current_sched_info->next_tail;
9398        insn = NEXT_INSN (insn))
9399     {
9400       if (BARRIER_P (insn))
9401 	{
9402 	  rtx_insn *last = prev_active_insn (insn);
9403 
9404 	  if (! last)
9405 	    continue;
9406 	  if (JUMP_TABLE_DATA_P (last))
9407 	    last = prev_active_insn (last);
9408 	  if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
9409 	    emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last);
9410 
9411 	  init_insn_group_barriers ();
9412 	  seen_good_insn = 0;
9413 	  need_barrier_p = 0;
9414 	}
9415       else if (NONDEBUG_INSN_P (insn))
9416 	{
9417 	  if (recog_memoized (insn) == CODE_FOR_insn_group_barrier)
9418 	    {
9419 	      init_insn_group_barriers ();
9420 	      seen_good_insn = 0;
9421 	      need_barrier_p = 0;
9422 	    }
9423 	  else if (need_barrier_p || group_barrier_needed (insn)
9424 		   || (mflag_sched_stop_bits_after_every_cycle
9425 		       && GET_MODE (insn) == TImode
9426 		       && seen_good_insn))
9427 	    {
9428 	      if (TARGET_EARLY_STOP_BITS)
9429 		{
9430 		  rtx_insn *last;
9431 
9432 		  for (last = insn;
9433 		       last != current_sched_info->prev_head;
9434 		       last = PREV_INSN (last))
9435 		    if (INSN_P (last) && GET_MODE (last) == TImode
9436 			&& stops_p [INSN_UID (last)])
9437 		      break;
9438 		  if (last == current_sched_info->prev_head)
9439 		    last = insn;
9440 		  last = prev_active_insn (last);
9441 		  if (last
9442 		      && recog_memoized (last) != CODE_FOR_insn_group_barrier)
9443 		    emit_insn_after (gen_insn_group_barrier (GEN_INT (3)),
9444 				     last);
9445 		  init_insn_group_barriers ();
9446 		  for (last = NEXT_INSN (last);
9447 		       last != insn;
9448 		       last = NEXT_INSN (last))
9449 		    if (INSN_P (last))
9450 		      {
9451 			group_barrier_needed (last);
9452 			if (recog_memoized (last) >= 0
9453 			    && important_for_bundling_p (last))
9454 			  seen_good_insn = 1;
9455 		      }
9456 		}
9457 	      else
9458 		{
9459 		  emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
9460 				    insn);
9461 		  init_insn_group_barriers ();
9462 		  seen_good_insn = 0;
9463 		}
9464 	      group_barrier_needed (insn);
9465 	      if (recog_memoized (insn) >= 0
9466 		  && important_for_bundling_p (insn))
9467 		seen_good_insn = 1;
9468 	    }
9469 	  else if (recog_memoized (insn) >= 0
9470 		   && important_for_bundling_p (insn))
9471 	    seen_good_insn = 1;
9472 	  need_barrier_p = (CALL_P (insn) || unknown_for_bundling_p (insn));
9473 	}
9474     }
9475 }
9476 
9477 
9478 
9479 /* If the following function returns TRUE, we will use the DFA
9480    insn scheduler.  */
9481 
9482 static int
ia64_first_cycle_multipass_dfa_lookahead(void)9483 ia64_first_cycle_multipass_dfa_lookahead (void)
9484 {
9485   return (reload_completed ? 6 : 4);
9486 }
9487 
9488 /* The following function initiates variable `dfa_pre_cycle_insn'.  */
9489 
9490 static void
ia64_init_dfa_pre_cycle_insn(void)9491 ia64_init_dfa_pre_cycle_insn (void)
9492 {
9493   if (temp_dfa_state == NULL)
9494     {
9495       dfa_state_size = state_size ();
9496       temp_dfa_state = xmalloc (dfa_state_size);
9497       prev_cycle_state = xmalloc (dfa_state_size);
9498     }
9499   dfa_pre_cycle_insn = make_insn_raw (gen_pre_cycle ());
9500   SET_PREV_INSN (dfa_pre_cycle_insn) = SET_NEXT_INSN (dfa_pre_cycle_insn) = NULL_RTX;
9501   recog_memoized (dfa_pre_cycle_insn);
9502   dfa_stop_insn = make_insn_raw (gen_insn_group_barrier (GEN_INT (3)));
9503   SET_PREV_INSN (dfa_stop_insn) = SET_NEXT_INSN (dfa_stop_insn) = NULL_RTX;
9504   recog_memoized (dfa_stop_insn);
9505 }
9506 
9507 /* The following function returns the pseudo insn DFA_PRE_CYCLE_INSN
9508    used by the DFA insn scheduler.  */
9509 
9510 static rtx
ia64_dfa_pre_cycle_insn(void)9511 ia64_dfa_pre_cycle_insn (void)
9512 {
9513   return dfa_pre_cycle_insn;
9514 }
9515 
9516 /* The following function returns TRUE if PRODUCER (of type ilog or
9517    ld) produces address for CONSUMER (of type st or stf). */
9518 
9519 int
ia64_st_address_bypass_p(rtx_insn * producer,rtx_insn * consumer)9520 ia64_st_address_bypass_p (rtx_insn *producer, rtx_insn *consumer)
9521 {
9522   rtx dest, reg, mem;
9523 
9524   gcc_assert (producer && consumer);
9525   dest = ia64_single_set (producer);
9526   gcc_assert (dest);
9527   reg = SET_DEST (dest);
9528   gcc_assert (reg);
9529   if (GET_CODE (reg) == SUBREG)
9530     reg = SUBREG_REG (reg);
9531   gcc_assert (GET_CODE (reg) == REG);
9532 
9533   dest = ia64_single_set (consumer);
9534   gcc_assert (dest);
9535   mem = SET_DEST (dest);
9536   gcc_assert (mem && GET_CODE (mem) == MEM);
9537   return reg_mentioned_p (reg, mem);
9538 }
9539 
9540 /* The following function returns TRUE if PRODUCER (of type ilog or
9541    ld) produces address for CONSUMER (of type ld or fld). */
9542 
9543 int
ia64_ld_address_bypass_p(rtx_insn * producer,rtx_insn * consumer)9544 ia64_ld_address_bypass_p (rtx_insn *producer, rtx_insn *consumer)
9545 {
9546   rtx dest, src, reg, mem;
9547 
9548   gcc_assert (producer && consumer);
9549   dest = ia64_single_set (producer);
9550   gcc_assert (dest);
9551   reg = SET_DEST (dest);
9552   gcc_assert (reg);
9553   if (GET_CODE (reg) == SUBREG)
9554     reg = SUBREG_REG (reg);
9555   gcc_assert (GET_CODE (reg) == REG);
9556 
9557   src = ia64_single_set (consumer);
9558   gcc_assert (src);
9559   mem = SET_SRC (src);
9560   gcc_assert (mem);
9561 
9562   if (GET_CODE (mem) == UNSPEC && XVECLEN (mem, 0) > 0)
9563     mem = XVECEXP (mem, 0, 0);
9564   else if (GET_CODE (mem) == IF_THEN_ELSE)
9565     /* ??? Is this bypass necessary for ld.c?  */
9566     {
9567       gcc_assert (XINT (XEXP (XEXP (mem, 0), 0), 1) == UNSPEC_LDCCLR);
9568       mem = XEXP (mem, 1);
9569     }
9570 
9571   while (GET_CODE (mem) == SUBREG || GET_CODE (mem) == ZERO_EXTEND)
9572     mem = XEXP (mem, 0);
9573 
9574   if (GET_CODE (mem) == UNSPEC)
9575     {
9576       int c = XINT (mem, 1);
9577 
9578       gcc_assert (c == UNSPEC_LDA || c == UNSPEC_LDS || c == UNSPEC_LDS_A
9579 		  || c == UNSPEC_LDSA);
9580       mem = XVECEXP (mem, 0, 0);
9581     }
9582 
9583   /* Note that LO_SUM is used for GOT loads.  */
9584   gcc_assert (GET_CODE (mem) == LO_SUM || GET_CODE (mem) == MEM);
9585 
9586   return reg_mentioned_p (reg, mem);
9587 }
9588 
9589 /* The following function returns TRUE if INSN produces address for a
9590    load/store insn.  We will place such insns into M slot because it
9591    decreases its latency time.  */
9592 
9593 int
ia64_produce_address_p(rtx insn)9594 ia64_produce_address_p (rtx insn)
9595 {
9596   return insn->call;
9597 }
9598 
9599 
9600 /* Emit pseudo-ops for the assembler to describe predicate relations.
9601    At present this assumes that we only consider predicate pairs to
9602    be mutex, and that the assembler can deduce proper values from
9603    straight-line code.  */
9604 
9605 static void
emit_predicate_relation_info(void)9606 emit_predicate_relation_info (void)
9607 {
9608   basic_block bb;
9609 
9610   FOR_EACH_BB_REVERSE_FN (bb, cfun)
9611     {
9612       int r;
9613       rtx_insn *head = BB_HEAD (bb);
9614 
9615       /* We only need such notes at code labels.  */
9616       if (! LABEL_P (head))
9617 	continue;
9618       if (NOTE_INSN_BASIC_BLOCK_P (NEXT_INSN (head)))
9619 	head = NEXT_INSN (head);
9620 
9621       /* Skip p0, which may be thought to be live due to (reg:DI p0)
9622 	 grabbing the entire block of predicate registers.  */
9623       for (r = PR_REG (2); r < PR_REG (64); r += 2)
9624 	if (REGNO_REG_SET_P (df_get_live_in (bb), r))
9625 	  {
9626 	    rtx p = gen_rtx_REG (BImode, r);
9627 	    rtx_insn *n = emit_insn_after (gen_pred_rel_mutex (p), head);
9628 	    if (head == BB_END (bb))
9629 	      BB_END (bb) = n;
9630 	    head = n;
9631 	  }
9632     }
9633 
9634   /* Look for conditional calls that do not return, and protect predicate
9635      relations around them.  Otherwise the assembler will assume the call
9636      returns, and complain about uses of call-clobbered predicates after
9637      the call.  */
9638   FOR_EACH_BB_REVERSE_FN (bb, cfun)
9639     {
9640       rtx_insn *insn = BB_HEAD (bb);
9641 
9642       while (1)
9643 	{
9644 	  if (CALL_P (insn)
9645 	      && GET_CODE (PATTERN (insn)) == COND_EXEC
9646 	      && find_reg_note (insn, REG_NORETURN, NULL_RTX))
9647 	    {
9648 	      rtx_insn *b =
9649 		emit_insn_before (gen_safe_across_calls_all (), insn);
9650 	      rtx_insn *a = emit_insn_after (gen_safe_across_calls_normal (), insn);
9651 	      if (BB_HEAD (bb) == insn)
9652 		BB_HEAD (bb) = b;
9653 	      if (BB_END (bb) == insn)
9654 		BB_END (bb) = a;
9655 	    }
9656 
9657 	  if (insn == BB_END (bb))
9658 	    break;
9659 	  insn = NEXT_INSN (insn);
9660 	}
9661     }
9662 }
9663 
9664 /* Perform machine dependent operations on the rtl chain INSNS.  */
9665 
9666 static void
ia64_reorg(void)9667 ia64_reorg (void)
9668 {
9669   /* We are freeing block_for_insn in the toplev to keep compatibility
9670      with old MDEP_REORGS that are not CFG based.  Recompute it now.  */
9671   compute_bb_for_insn ();
9672 
9673   /* If optimizing, we'll have split before scheduling.  */
9674   if (optimize == 0)
9675     split_all_insns ();
9676 
9677   if (optimize && flag_schedule_insns_after_reload
9678       && dbg_cnt (ia64_sched2))
9679     {
9680       basic_block bb;
9681       timevar_push (TV_SCHED2);
9682       ia64_final_schedule = 1;
9683 
9684       /* We can't let modulo-sched prevent us from scheduling any bbs,
9685 	 since we need the final schedule to produce bundle information.  */
9686       FOR_EACH_BB_FN (bb, cfun)
9687 	bb->flags &= ~BB_DISABLE_SCHEDULE;
9688 
9689       initiate_bundle_states ();
9690       ia64_nop = make_insn_raw (gen_nop ());
9691       SET_PREV_INSN (ia64_nop) = SET_NEXT_INSN (ia64_nop) = NULL_RTX;
9692       recog_memoized (ia64_nop);
9693       clocks_length = get_max_uid () + 1;
9694       stops_p = XCNEWVEC (char, clocks_length);
9695 
9696       if (ia64_tune == PROCESSOR_ITANIUM2)
9697 	{
9698 	  pos_1 = get_cpu_unit_code ("2_1");
9699 	  pos_2 = get_cpu_unit_code ("2_2");
9700 	  pos_3 = get_cpu_unit_code ("2_3");
9701 	  pos_4 = get_cpu_unit_code ("2_4");
9702 	  pos_5 = get_cpu_unit_code ("2_5");
9703 	  pos_6 = get_cpu_unit_code ("2_6");
9704 	  _0mii_ = get_cpu_unit_code ("2b_0mii.");
9705 	  _0mmi_ = get_cpu_unit_code ("2b_0mmi.");
9706 	  _0mfi_ = get_cpu_unit_code ("2b_0mfi.");
9707 	  _0mmf_ = get_cpu_unit_code ("2b_0mmf.");
9708 	  _0bbb_ = get_cpu_unit_code ("2b_0bbb.");
9709 	  _0mbb_ = get_cpu_unit_code ("2b_0mbb.");
9710 	  _0mib_ = get_cpu_unit_code ("2b_0mib.");
9711 	  _0mmb_ = get_cpu_unit_code ("2b_0mmb.");
9712 	  _0mfb_ = get_cpu_unit_code ("2b_0mfb.");
9713 	  _0mlx_ = get_cpu_unit_code ("2b_0mlx.");
9714 	  _1mii_ = get_cpu_unit_code ("2b_1mii.");
9715 	  _1mmi_ = get_cpu_unit_code ("2b_1mmi.");
9716 	  _1mfi_ = get_cpu_unit_code ("2b_1mfi.");
9717 	  _1mmf_ = get_cpu_unit_code ("2b_1mmf.");
9718 	  _1bbb_ = get_cpu_unit_code ("2b_1bbb.");
9719 	  _1mbb_ = get_cpu_unit_code ("2b_1mbb.");
9720 	  _1mib_ = get_cpu_unit_code ("2b_1mib.");
9721 	  _1mmb_ = get_cpu_unit_code ("2b_1mmb.");
9722 	  _1mfb_ = get_cpu_unit_code ("2b_1mfb.");
9723 	  _1mlx_ = get_cpu_unit_code ("2b_1mlx.");
9724 	}
9725       else
9726 	{
9727 	  pos_1 = get_cpu_unit_code ("1_1");
9728 	  pos_2 = get_cpu_unit_code ("1_2");
9729 	  pos_3 = get_cpu_unit_code ("1_3");
9730 	  pos_4 = get_cpu_unit_code ("1_4");
9731 	  pos_5 = get_cpu_unit_code ("1_5");
9732 	  pos_6 = get_cpu_unit_code ("1_6");
9733 	  _0mii_ = get_cpu_unit_code ("1b_0mii.");
9734 	  _0mmi_ = get_cpu_unit_code ("1b_0mmi.");
9735 	  _0mfi_ = get_cpu_unit_code ("1b_0mfi.");
9736 	  _0mmf_ = get_cpu_unit_code ("1b_0mmf.");
9737 	  _0bbb_ = get_cpu_unit_code ("1b_0bbb.");
9738 	  _0mbb_ = get_cpu_unit_code ("1b_0mbb.");
9739 	  _0mib_ = get_cpu_unit_code ("1b_0mib.");
9740 	  _0mmb_ = get_cpu_unit_code ("1b_0mmb.");
9741 	  _0mfb_ = get_cpu_unit_code ("1b_0mfb.");
9742 	  _0mlx_ = get_cpu_unit_code ("1b_0mlx.");
9743 	  _1mii_ = get_cpu_unit_code ("1b_1mii.");
9744 	  _1mmi_ = get_cpu_unit_code ("1b_1mmi.");
9745 	  _1mfi_ = get_cpu_unit_code ("1b_1mfi.");
9746 	  _1mmf_ = get_cpu_unit_code ("1b_1mmf.");
9747 	  _1bbb_ = get_cpu_unit_code ("1b_1bbb.");
9748 	  _1mbb_ = get_cpu_unit_code ("1b_1mbb.");
9749 	  _1mib_ = get_cpu_unit_code ("1b_1mib.");
9750 	  _1mmb_ = get_cpu_unit_code ("1b_1mmb.");
9751 	  _1mfb_ = get_cpu_unit_code ("1b_1mfb.");
9752 	  _1mlx_ = get_cpu_unit_code ("1b_1mlx.");
9753 	}
9754 
9755       if (flag_selective_scheduling2
9756 	  && !maybe_skip_selective_scheduling ())
9757         run_selective_scheduling ();
9758       else
9759 	schedule_ebbs ();
9760 
9761       /* Redo alignment computation, as it might gone wrong.  */
9762       compute_alignments ();
9763 
9764       /* We cannot reuse this one because it has been corrupted by the
9765 	 evil glat.  */
9766       finish_bundle_states ();
9767       free (stops_p);
9768       stops_p = NULL;
9769       emit_insn_group_barriers (dump_file);
9770 
9771       ia64_final_schedule = 0;
9772       timevar_pop (TV_SCHED2);
9773     }
9774   else
9775     emit_all_insn_group_barriers (dump_file);
9776 
9777   df_analyze ();
9778 
9779   /* A call must not be the last instruction in a function, so that the
9780      return address is still within the function, so that unwinding works
9781      properly.  Note that IA-64 differs from dwarf2 on this point.  */
9782   if (ia64_except_unwind_info (&global_options) == UI_TARGET)
9783     {
9784       rtx_insn *insn;
9785       int saw_stop = 0;
9786 
9787       insn = get_last_insn ();
9788       if (! INSN_P (insn))
9789         insn = prev_active_insn (insn);
9790       if (insn)
9791 	{
9792 	  /* Skip over insns that expand to nothing.  */
9793 	  while (NONJUMP_INSN_P (insn)
9794 		 && get_attr_empty (insn) == EMPTY_YES)
9795 	    {
9796 	      if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
9797 		  && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER)
9798 		saw_stop = 1;
9799 	      insn = prev_active_insn (insn);
9800 	    }
9801 	  if (CALL_P (insn))
9802 	    {
9803 	      if (! saw_stop)
9804 		emit_insn (gen_insn_group_barrier (GEN_INT (3)));
9805 	      emit_insn (gen_break_f ());
9806 	      emit_insn (gen_insn_group_barrier (GEN_INT (3)));
9807 	    }
9808 	}
9809     }
9810 
9811   emit_predicate_relation_info ();
9812 
9813   if (flag_var_tracking)
9814     {
9815       timevar_push (TV_VAR_TRACKING);
9816       variable_tracking_main ();
9817       timevar_pop (TV_VAR_TRACKING);
9818     }
9819   df_finish_pass (false);
9820 }
9821 
9822 /* Return true if REGNO is used by the epilogue.  */
9823 
9824 int
ia64_epilogue_uses(int regno)9825 ia64_epilogue_uses (int regno)
9826 {
9827   switch (regno)
9828     {
9829     case R_GR (1):
9830       /* With a call to a function in another module, we will write a new
9831 	 value to "gp".  After returning from such a call, we need to make
9832 	 sure the function restores the original gp-value, even if the
9833 	 function itself does not use the gp anymore.  */
9834       return !(TARGET_AUTO_PIC || TARGET_NO_PIC);
9835 
9836     case IN_REG (0): case IN_REG (1): case IN_REG (2): case IN_REG (3):
9837     case IN_REG (4): case IN_REG (5): case IN_REG (6): case IN_REG (7):
9838       /* For functions defined with the syscall_linkage attribute, all
9839 	 input registers are marked as live at all function exits.  This
9840 	 prevents the register allocator from using the input registers,
9841 	 which in turn makes it possible to restart a system call after
9842 	 an interrupt without having to save/restore the input registers.
9843 	 This also prevents kernel data from leaking to application code.  */
9844       return lookup_attribute ("syscall_linkage",
9845 	   TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))) != NULL;
9846 
9847     case R_BR (0):
9848       /* Conditional return patterns can't represent the use of `b0' as
9849          the return address, so we force the value live this way.  */
9850       return 1;
9851 
9852     case AR_PFS_REGNUM:
9853       /* Likewise for ar.pfs, which is used by br.ret.  */
9854       return 1;
9855 
9856     default:
9857       return 0;
9858     }
9859 }
9860 
9861 /* Return true if REGNO is used by the frame unwinder.  */
9862 
9863 int
ia64_eh_uses(int regno)9864 ia64_eh_uses (int regno)
9865 {
9866   unsigned int r;
9867 
9868   if (! reload_completed)
9869     return 0;
9870 
9871   if (regno == 0)
9872     return 0;
9873 
9874   for (r = reg_save_b0; r <= reg_save_ar_lc; r++)
9875     if (regno == current_frame_info.r[r]
9876        || regno == emitted_frame_related_regs[r])
9877       return 1;
9878 
9879   return 0;
9880 }
9881 
9882 /* Return true if this goes in small data/bss.  */
9883 
9884 /* ??? We could also support own long data here.  Generating movl/add/ld8
9885    instead of addl,ld8/ld8.  This makes the code bigger, but should make the
9886    code faster because there is one less load.  This also includes incomplete
9887    types which can't go in sdata/sbss.  */
9888 
9889 static bool
ia64_in_small_data_p(const_tree exp)9890 ia64_in_small_data_p (const_tree exp)
9891 {
9892   if (TARGET_NO_SDATA)
9893     return false;
9894 
9895   /* We want to merge strings, so we never consider them small data.  */
9896   if (TREE_CODE (exp) == STRING_CST)
9897     return false;
9898 
9899   /* Functions are never small data.  */
9900   if (TREE_CODE (exp) == FUNCTION_DECL)
9901     return false;
9902 
9903   if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
9904     {
9905       const char *section = DECL_SECTION_NAME (exp);
9906 
9907       if (strcmp (section, ".sdata") == 0
9908 	  || strncmp (section, ".sdata.", 7) == 0
9909 	  || strncmp (section, ".gnu.linkonce.s.", 16) == 0
9910 	  || strcmp (section, ".sbss") == 0
9911 	  || strncmp (section, ".sbss.", 6) == 0
9912 	  || strncmp (section, ".gnu.linkonce.sb.", 17) == 0)
9913 	return true;
9914     }
9915   else
9916     {
9917       HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
9918 
9919       /* If this is an incomplete type with size 0, then we can't put it
9920 	 in sdata because it might be too big when completed.  */
9921       if (size > 0 && size <= ia64_section_threshold)
9922 	return true;
9923     }
9924 
9925   return false;
9926 }
9927 
9928 /* Output assembly directives for prologue regions.  */
9929 
9930 /* The current basic block number.  */
9931 
9932 static bool last_block;
9933 
9934 /* True if we need a copy_state command at the start of the next block.  */
9935 
9936 static bool need_copy_state;
9937 
9938 #ifndef MAX_ARTIFICIAL_LABEL_BYTES
9939 # define MAX_ARTIFICIAL_LABEL_BYTES 30
9940 #endif
9941 
9942 /* The function emits unwind directives for the start of an epilogue.  */
9943 
9944 static void
process_epilogue(FILE * asm_out_file,rtx insn ATTRIBUTE_UNUSED,bool unwind,bool frame ATTRIBUTE_UNUSED)9945 process_epilogue (FILE *asm_out_file, rtx insn ATTRIBUTE_UNUSED,
9946 		  bool unwind, bool frame ATTRIBUTE_UNUSED)
9947 {
9948   /* If this isn't the last block of the function, then we need to label the
9949      current state, and copy it back in at the start of the next block.  */
9950 
9951   if (!last_block)
9952     {
9953       if (unwind)
9954 	fprintf (asm_out_file, "\t.label_state %d\n",
9955 		 ++cfun->machine->state_num);
9956       need_copy_state = true;
9957     }
9958 
9959   if (unwind)
9960     fprintf (asm_out_file, "\t.restore sp\n");
9961 }
9962 
9963 /* This function processes a SET pattern for REG_CFA_ADJUST_CFA.  */
9964 
9965 static void
process_cfa_adjust_cfa(FILE * asm_out_file,rtx pat,rtx insn,bool unwind,bool frame)9966 process_cfa_adjust_cfa (FILE *asm_out_file, rtx pat, rtx insn,
9967 			bool unwind, bool frame)
9968 {
9969   rtx dest = SET_DEST (pat);
9970   rtx src = SET_SRC (pat);
9971 
9972   if (dest == stack_pointer_rtx)
9973     {
9974       if (GET_CODE (src) == PLUS)
9975 	{
9976 	  rtx op0 = XEXP (src, 0);
9977 	  rtx op1 = XEXP (src, 1);
9978 
9979 	  gcc_assert (op0 == dest && GET_CODE (op1) == CONST_INT);
9980 
9981 	  if (INTVAL (op1) < 0)
9982 	    {
9983 	      gcc_assert (!frame_pointer_needed);
9984 	      if (unwind)
9985 		fprintf (asm_out_file,
9986 			 "\t.fframe " HOST_WIDE_INT_PRINT_DEC"\n",
9987 			 -INTVAL (op1));
9988 	    }
9989 	  else
9990 	    process_epilogue (asm_out_file, insn, unwind, frame);
9991 	}
9992       else
9993 	{
9994 	  gcc_assert (src == hard_frame_pointer_rtx);
9995 	  process_epilogue (asm_out_file, insn, unwind, frame);
9996 	}
9997     }
9998   else if (dest == hard_frame_pointer_rtx)
9999     {
10000       gcc_assert (src == stack_pointer_rtx);
10001       gcc_assert (frame_pointer_needed);
10002 
10003       if (unwind)
10004 	fprintf (asm_out_file, "\t.vframe r%d\n",
10005 		 ia64_dbx_register_number (REGNO (dest)));
10006     }
10007   else
10008     gcc_unreachable ();
10009 }
10010 
10011 /* This function processes a SET pattern for REG_CFA_REGISTER.  */
10012 
10013 static void
process_cfa_register(FILE * asm_out_file,rtx pat,bool unwind)10014 process_cfa_register (FILE *asm_out_file, rtx pat, bool unwind)
10015 {
10016   rtx dest = SET_DEST (pat);
10017   rtx src = SET_SRC (pat);
10018   int dest_regno = REGNO (dest);
10019   int src_regno;
10020 
10021   if (src == pc_rtx)
10022     {
10023       /* Saving return address pointer.  */
10024       if (unwind)
10025 	fprintf (asm_out_file, "\t.save rp, r%d\n",
10026 		 ia64_dbx_register_number (dest_regno));
10027       return;
10028     }
10029 
10030   src_regno = REGNO (src);
10031 
10032   switch (src_regno)
10033     {
10034     case PR_REG (0):
10035       gcc_assert (dest_regno == current_frame_info.r[reg_save_pr]);
10036       if (unwind)
10037 	fprintf (asm_out_file, "\t.save pr, r%d\n",
10038 		 ia64_dbx_register_number (dest_regno));
10039       break;
10040 
10041     case AR_UNAT_REGNUM:
10042       gcc_assert (dest_regno == current_frame_info.r[reg_save_ar_unat]);
10043       if (unwind)
10044 	fprintf (asm_out_file, "\t.save ar.unat, r%d\n",
10045 		 ia64_dbx_register_number (dest_regno));
10046       break;
10047 
10048     case AR_LC_REGNUM:
10049       gcc_assert (dest_regno == current_frame_info.r[reg_save_ar_lc]);
10050       if (unwind)
10051 	fprintf (asm_out_file, "\t.save ar.lc, r%d\n",
10052 		 ia64_dbx_register_number (dest_regno));
10053       break;
10054 
10055     default:
10056       /* Everything else should indicate being stored to memory.  */
10057       gcc_unreachable ();
10058     }
10059 }
10060 
10061 /* This function processes a SET pattern for REG_CFA_OFFSET.  */
10062 
10063 static void
process_cfa_offset(FILE * asm_out_file,rtx pat,bool unwind)10064 process_cfa_offset (FILE *asm_out_file, rtx pat, bool unwind)
10065 {
10066   rtx dest = SET_DEST (pat);
10067   rtx src = SET_SRC (pat);
10068   int src_regno = REGNO (src);
10069   const char *saveop;
10070   HOST_WIDE_INT off;
10071   rtx base;
10072 
10073   gcc_assert (MEM_P (dest));
10074   if (GET_CODE (XEXP (dest, 0)) == REG)
10075     {
10076       base = XEXP (dest, 0);
10077       off = 0;
10078     }
10079   else
10080     {
10081       gcc_assert (GET_CODE (XEXP (dest, 0)) == PLUS
10082 		  && GET_CODE (XEXP (XEXP (dest, 0), 1)) == CONST_INT);
10083       base = XEXP (XEXP (dest, 0), 0);
10084       off = INTVAL (XEXP (XEXP (dest, 0), 1));
10085     }
10086 
10087   if (base == hard_frame_pointer_rtx)
10088     {
10089       saveop = ".savepsp";
10090       off = - off;
10091     }
10092   else
10093     {
10094       gcc_assert (base == stack_pointer_rtx);
10095       saveop = ".savesp";
10096     }
10097 
10098   src_regno = REGNO (src);
10099   switch (src_regno)
10100     {
10101     case BR_REG (0):
10102       gcc_assert (!current_frame_info.r[reg_save_b0]);
10103       if (unwind)
10104 	fprintf (asm_out_file, "\t%s rp, " HOST_WIDE_INT_PRINT_DEC "\n",
10105 		 saveop, off);
10106       break;
10107 
10108     case PR_REG (0):
10109       gcc_assert (!current_frame_info.r[reg_save_pr]);
10110       if (unwind)
10111 	fprintf (asm_out_file, "\t%s pr, " HOST_WIDE_INT_PRINT_DEC "\n",
10112 		 saveop, off);
10113       break;
10114 
10115     case AR_LC_REGNUM:
10116       gcc_assert (!current_frame_info.r[reg_save_ar_lc]);
10117       if (unwind)
10118 	fprintf (asm_out_file, "\t%s ar.lc, " HOST_WIDE_INT_PRINT_DEC "\n",
10119 		 saveop, off);
10120       break;
10121 
10122     case AR_PFS_REGNUM:
10123       gcc_assert (!current_frame_info.r[reg_save_ar_pfs]);
10124       if (unwind)
10125 	fprintf (asm_out_file, "\t%s ar.pfs, " HOST_WIDE_INT_PRINT_DEC "\n",
10126 		 saveop, off);
10127       break;
10128 
10129     case AR_UNAT_REGNUM:
10130       gcc_assert (!current_frame_info.r[reg_save_ar_unat]);
10131       if (unwind)
10132 	fprintf (asm_out_file, "\t%s ar.unat, " HOST_WIDE_INT_PRINT_DEC "\n",
10133 		 saveop, off);
10134       break;
10135 
10136     case GR_REG (4):
10137     case GR_REG (5):
10138     case GR_REG (6):
10139     case GR_REG (7):
10140       if (unwind)
10141 	fprintf (asm_out_file, "\t.save.g 0x%x\n",
10142 		 1 << (src_regno - GR_REG (4)));
10143       break;
10144 
10145     case BR_REG (1):
10146     case BR_REG (2):
10147     case BR_REG (3):
10148     case BR_REG (4):
10149     case BR_REG (5):
10150       if (unwind)
10151 	fprintf (asm_out_file, "\t.save.b 0x%x\n",
10152 		 1 << (src_regno - BR_REG (1)));
10153       break;
10154 
10155     case FR_REG (2):
10156     case FR_REG (3):
10157     case FR_REG (4):
10158     case FR_REG (5):
10159       if (unwind)
10160 	fprintf (asm_out_file, "\t.save.f 0x%x\n",
10161 		 1 << (src_regno - FR_REG (2)));
10162       break;
10163 
10164     case FR_REG (16): case FR_REG (17): case FR_REG (18): case FR_REG (19):
10165     case FR_REG (20): case FR_REG (21): case FR_REG (22): case FR_REG (23):
10166     case FR_REG (24): case FR_REG (25): case FR_REG (26): case FR_REG (27):
10167     case FR_REG (28): case FR_REG (29): case FR_REG (30): case FR_REG (31):
10168       if (unwind)
10169 	fprintf (asm_out_file, "\t.save.gf 0x0, 0x%x\n",
10170 		 1 << (src_regno - FR_REG (12)));
10171       break;
10172 
10173     default:
10174       /* ??? For some reason we mark other general registers, even those
10175 	 we can't represent in the unwind info.  Ignore them.  */
10176       break;
10177     }
10178 }
10179 
10180 /* This function looks at a single insn and emits any directives
10181    required to unwind this insn.  */
10182 
10183 static void
ia64_asm_unwind_emit(FILE * asm_out_file,rtx_insn * insn)10184 ia64_asm_unwind_emit (FILE *asm_out_file, rtx_insn *insn)
10185 {
10186   bool unwind = ia64_except_unwind_info (&global_options) == UI_TARGET;
10187   bool frame = dwarf2out_do_frame ();
10188   rtx note, pat;
10189   bool handled_one;
10190 
10191   if (!unwind && !frame)
10192     return;
10193 
10194   if (NOTE_INSN_BASIC_BLOCK_P (insn))
10195     {
10196       last_block = NOTE_BASIC_BLOCK (insn)->next_bb
10197      == EXIT_BLOCK_PTR_FOR_FN (cfun);
10198 
10199       /* Restore unwind state from immediately before the epilogue.  */
10200       if (need_copy_state)
10201 	{
10202 	  if (unwind)
10203 	    {
10204 	      fprintf (asm_out_file, "\t.body\n");
10205 	      fprintf (asm_out_file, "\t.copy_state %d\n",
10206 		       cfun->machine->state_num);
10207 	    }
10208 	  need_copy_state = false;
10209 	}
10210     }
10211 
10212   if (NOTE_P (insn) || ! RTX_FRAME_RELATED_P (insn))
10213     return;
10214 
10215   /* Look for the ALLOC insn.  */
10216   if (INSN_CODE (insn) == CODE_FOR_alloc)
10217     {
10218       rtx dest = SET_DEST (XVECEXP (PATTERN (insn), 0, 0));
10219       int dest_regno = REGNO (dest);
10220 
10221       /* If this is the final destination for ar.pfs, then this must
10222 	 be the alloc in the prologue.  */
10223       if (dest_regno == current_frame_info.r[reg_save_ar_pfs])
10224 	{
10225 	  if (unwind)
10226 	    fprintf (asm_out_file, "\t.save ar.pfs, r%d\n",
10227 		     ia64_dbx_register_number (dest_regno));
10228 	}
10229       else
10230 	{
10231 	  /* This must be an alloc before a sibcall.  We must drop the
10232 	     old frame info.  The easiest way to drop the old frame
10233 	     info is to ensure we had a ".restore sp" directive
10234 	     followed by a new prologue.  If the procedure doesn't
10235 	     have a memory-stack frame, we'll issue a dummy ".restore
10236 	     sp" now.  */
10237 	  if (current_frame_info.total_size == 0 && !frame_pointer_needed)
10238 	    /* if haven't done process_epilogue() yet, do it now */
10239 	    process_epilogue (asm_out_file, insn, unwind, frame);
10240 	  if (unwind)
10241 	    fprintf (asm_out_file, "\t.prologue\n");
10242 	}
10243       return;
10244     }
10245 
10246   handled_one = false;
10247   for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
10248     switch (REG_NOTE_KIND (note))
10249       {
10250       case REG_CFA_ADJUST_CFA:
10251 	pat = XEXP (note, 0);
10252 	if (pat == NULL)
10253 	  pat = PATTERN (insn);
10254 	process_cfa_adjust_cfa (asm_out_file, pat, insn, unwind, frame);
10255 	handled_one = true;
10256 	break;
10257 
10258       case REG_CFA_OFFSET:
10259 	pat = XEXP (note, 0);
10260 	if (pat == NULL)
10261 	  pat = PATTERN (insn);
10262 	process_cfa_offset (asm_out_file, pat, unwind);
10263 	handled_one = true;
10264 	break;
10265 
10266       case REG_CFA_REGISTER:
10267 	pat = XEXP (note, 0);
10268 	if (pat == NULL)
10269 	  pat = PATTERN (insn);
10270 	process_cfa_register (asm_out_file, pat, unwind);
10271 	handled_one = true;
10272 	break;
10273 
10274       case REG_FRAME_RELATED_EXPR:
10275       case REG_CFA_DEF_CFA:
10276       case REG_CFA_EXPRESSION:
10277       case REG_CFA_RESTORE:
10278       case REG_CFA_SET_VDRAP:
10279 	/* Not used in the ia64 port.  */
10280 	gcc_unreachable ();
10281 
10282       default:
10283 	/* Not a frame-related note.  */
10284 	break;
10285       }
10286 
10287   /* All REG_FRAME_RELATED_P insns, besides ALLOC, are marked with the
10288      explicit action to take.  No guessing required.  */
10289   gcc_assert (handled_one);
10290 }
10291 
10292 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY.  */
10293 
10294 static void
ia64_asm_emit_except_personality(rtx personality)10295 ia64_asm_emit_except_personality (rtx personality)
10296 {
10297   fputs ("\t.personality\t", asm_out_file);
10298   output_addr_const (asm_out_file, personality);
10299   fputc ('\n', asm_out_file);
10300 }
10301 
10302 /* Implement TARGET_ASM_INITIALIZE_SECTIONS.  */
10303 
10304 static void
ia64_asm_init_sections(void)10305 ia64_asm_init_sections (void)
10306 {
10307   exception_section = get_unnamed_section (0, output_section_asm_op,
10308 					   "\t.handlerdata");
10309 }
10310 
10311 /* Implement TARGET_DEBUG_UNWIND_INFO.  */
10312 
10313 static enum unwind_info_type
ia64_debug_unwind_info(void)10314 ia64_debug_unwind_info (void)
10315 {
10316   return UI_TARGET;
10317 }
10318 
10319 enum ia64_builtins
10320 {
10321   IA64_BUILTIN_BSP,
10322   IA64_BUILTIN_COPYSIGNQ,
10323   IA64_BUILTIN_FABSQ,
10324   IA64_BUILTIN_FLUSHRS,
10325   IA64_BUILTIN_INFQ,
10326   IA64_BUILTIN_HUGE_VALQ,
10327   IA64_BUILTIN_max
10328 };
10329 
10330 static GTY(()) tree ia64_builtins[(int) IA64_BUILTIN_max];
10331 
10332 void
ia64_init_builtins(void)10333 ia64_init_builtins (void)
10334 {
10335   tree fpreg_type;
10336   tree float80_type;
10337   tree decl;
10338 
10339   /* The __fpreg type.  */
10340   fpreg_type = make_node (REAL_TYPE);
10341   TYPE_PRECISION (fpreg_type) = 82;
10342   layout_type (fpreg_type);
10343   (*lang_hooks.types.register_builtin_type) (fpreg_type, "__fpreg");
10344 
10345   /* The __float80 type.  */
10346   float80_type = make_node (REAL_TYPE);
10347   TYPE_PRECISION (float80_type) = 80;
10348   layout_type (float80_type);
10349   (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
10350 
10351   /* The __float128 type.  */
10352   if (!TARGET_HPUX)
10353     {
10354       tree ftype;
10355       tree float128_type = make_node (REAL_TYPE);
10356 
10357       TYPE_PRECISION (float128_type) = 128;
10358       layout_type (float128_type);
10359       (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
10360 
10361       /* TFmode support builtins.  */
10362       ftype = build_function_type_list (float128_type, NULL_TREE);
10363       decl = add_builtin_function ("__builtin_infq", ftype,
10364 				   IA64_BUILTIN_INFQ, BUILT_IN_MD,
10365 				   NULL, NULL_TREE);
10366       ia64_builtins[IA64_BUILTIN_INFQ] = decl;
10367 
10368       decl = add_builtin_function ("__builtin_huge_valq", ftype,
10369 				   IA64_BUILTIN_HUGE_VALQ, BUILT_IN_MD,
10370 				   NULL, NULL_TREE);
10371       ia64_builtins[IA64_BUILTIN_HUGE_VALQ] = decl;
10372 
10373       ftype = build_function_type_list (float128_type,
10374 					float128_type,
10375 					NULL_TREE);
10376       decl = add_builtin_function ("__builtin_fabsq", ftype,
10377 				   IA64_BUILTIN_FABSQ, BUILT_IN_MD,
10378 				   "__fabstf2", NULL_TREE);
10379       TREE_READONLY (decl) = 1;
10380       ia64_builtins[IA64_BUILTIN_FABSQ] = decl;
10381 
10382       ftype = build_function_type_list (float128_type,
10383 					float128_type,
10384 					float128_type,
10385 					NULL_TREE);
10386       decl = add_builtin_function ("__builtin_copysignq", ftype,
10387 				   IA64_BUILTIN_COPYSIGNQ, BUILT_IN_MD,
10388 				   "__copysigntf3", NULL_TREE);
10389       TREE_READONLY (decl) = 1;
10390       ia64_builtins[IA64_BUILTIN_COPYSIGNQ] = decl;
10391     }
10392   else
10393     /* Under HPUX, this is a synonym for "long double".  */
10394     (*lang_hooks.types.register_builtin_type) (long_double_type_node,
10395 					       "__float128");
10396 
10397   /* Fwrite on VMS is non-standard.  */
10398 #if TARGET_ABI_OPEN_VMS
10399   vms_patch_builtins ();
10400 #endif
10401 
10402 #define def_builtin(name, type, code)					\
10403   add_builtin_function ((name), (type), (code), BUILT_IN_MD,	\
10404 		       NULL, NULL_TREE)
10405 
10406   decl = def_builtin ("__builtin_ia64_bsp",
10407 		      build_function_type_list (ptr_type_node, NULL_TREE),
10408 		      IA64_BUILTIN_BSP);
10409   ia64_builtins[IA64_BUILTIN_BSP] = decl;
10410 
10411   decl = def_builtin ("__builtin_ia64_flushrs",
10412 		      build_function_type_list (void_type_node, NULL_TREE),
10413 		      IA64_BUILTIN_FLUSHRS);
10414   ia64_builtins[IA64_BUILTIN_FLUSHRS] = decl;
10415 
10416 #undef def_builtin
10417 
10418   if (TARGET_HPUX)
10419     {
10420       if ((decl = builtin_decl_explicit (BUILT_IN_FINITE)) != NULL_TREE)
10421 	set_user_assembler_name (decl, "_Isfinite");
10422       if ((decl = builtin_decl_explicit (BUILT_IN_FINITEF)) != NULL_TREE)
10423 	set_user_assembler_name (decl, "_Isfinitef");
10424       if ((decl = builtin_decl_explicit (BUILT_IN_FINITEL)) != NULL_TREE)
10425 	set_user_assembler_name (decl, "_Isfinitef128");
10426     }
10427 }
10428 
10429 rtx
ia64_expand_builtin(tree exp,rtx target,rtx subtarget ATTRIBUTE_UNUSED,machine_mode mode ATTRIBUTE_UNUSED,int ignore ATTRIBUTE_UNUSED)10430 ia64_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
10431 		     machine_mode mode ATTRIBUTE_UNUSED,
10432 		     int ignore ATTRIBUTE_UNUSED)
10433 {
10434   tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
10435   unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
10436 
10437   switch (fcode)
10438     {
10439     case IA64_BUILTIN_BSP:
10440       if (! target || ! register_operand (target, DImode))
10441 	target = gen_reg_rtx (DImode);
10442       emit_insn (gen_bsp_value (target));
10443 #ifdef POINTERS_EXTEND_UNSIGNED
10444       target = convert_memory_address (ptr_mode, target);
10445 #endif
10446       return target;
10447 
10448     case IA64_BUILTIN_FLUSHRS:
10449       emit_insn (gen_flushrs ());
10450       return const0_rtx;
10451 
10452     case IA64_BUILTIN_INFQ:
10453     case IA64_BUILTIN_HUGE_VALQ:
10454       {
10455         machine_mode target_mode = TYPE_MODE (TREE_TYPE (exp));
10456 	REAL_VALUE_TYPE inf;
10457 	rtx tmp;
10458 
10459 	real_inf (&inf);
10460 	tmp = const_double_from_real_value (inf, target_mode);
10461 
10462 	tmp = validize_mem (force_const_mem (target_mode, tmp));
10463 
10464 	if (target == 0)
10465 	  target = gen_reg_rtx (target_mode);
10466 
10467 	emit_move_insn (target, tmp);
10468 	return target;
10469       }
10470 
10471     case IA64_BUILTIN_FABSQ:
10472     case IA64_BUILTIN_COPYSIGNQ:
10473       return expand_call (exp, target, ignore);
10474 
10475     default:
10476       gcc_unreachable ();
10477     }
10478 
10479   return NULL_RTX;
10480 }
10481 
10482 /* Return the ia64 builtin for CODE.  */
10483 
10484 static tree
ia64_builtin_decl(unsigned code,bool initialize_p ATTRIBUTE_UNUSED)10485 ia64_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
10486 {
10487   if (code >= IA64_BUILTIN_max)
10488     return error_mark_node;
10489 
10490   return ia64_builtins[code];
10491 }
10492 
10493 /* For the HP-UX IA64 aggregate parameters are passed stored in the
10494    most significant bits of the stack slot.  */
10495 
10496 enum direction
ia64_hpux_function_arg_padding(machine_mode mode,const_tree type)10497 ia64_hpux_function_arg_padding (machine_mode mode, const_tree type)
10498 {
10499    /* Exception to normal case for structures/unions/etc.  */
10500 
10501    if (type && AGGREGATE_TYPE_P (type)
10502        && int_size_in_bytes (type) < UNITS_PER_WORD)
10503      return upward;
10504 
10505    /* Fall back to the default.  */
10506    return DEFAULT_FUNCTION_ARG_PADDING (mode, type);
10507 }
10508 
10509 /* Emit text to declare externally defined variables and functions, because
10510    the Intel assembler does not support undefined externals.  */
10511 
10512 void
ia64_asm_output_external(FILE * file,tree decl,const char * name)10513 ia64_asm_output_external (FILE *file, tree decl, const char *name)
10514 {
10515   /* We output the name if and only if TREE_SYMBOL_REFERENCED is
10516      set in order to avoid putting out names that are never really
10517      used. */
10518   if (TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl)))
10519     {
10520       /* maybe_assemble_visibility will return 1 if the assembler
10521 	 visibility directive is output.  */
10522       int need_visibility = ((*targetm.binds_local_p) (decl)
10523 			     && maybe_assemble_visibility (decl));
10524 
10525       /* GNU as does not need anything here, but the HP linker does
10526 	 need something for external functions.  */
10527       if ((TARGET_HPUX_LD || !TARGET_GNU_AS)
10528 	  && TREE_CODE (decl) == FUNCTION_DECL)
10529 	  (*targetm.asm_out.globalize_decl_name) (file, decl);
10530       else if (need_visibility && !TARGET_GNU_AS)
10531 	(*targetm.asm_out.globalize_label) (file, name);
10532     }
10533 }
10534 
10535 /* Set SImode div/mod functions, init_integral_libfuncs only initializes
10536    modes of word_mode and larger.  Rename the TFmode libfuncs using the
10537    HPUX conventions. __divtf3 is used for XFmode. We need to keep it for
10538    backward compatibility. */
10539 
10540 static void
ia64_init_libfuncs(void)10541 ia64_init_libfuncs (void)
10542 {
10543   set_optab_libfunc (sdiv_optab, SImode, "__divsi3");
10544   set_optab_libfunc (udiv_optab, SImode, "__udivsi3");
10545   set_optab_libfunc (smod_optab, SImode, "__modsi3");
10546   set_optab_libfunc (umod_optab, SImode, "__umodsi3");
10547 
10548   set_optab_libfunc (add_optab, TFmode, "_U_Qfadd");
10549   set_optab_libfunc (sub_optab, TFmode, "_U_Qfsub");
10550   set_optab_libfunc (smul_optab, TFmode, "_U_Qfmpy");
10551   set_optab_libfunc (sdiv_optab, TFmode, "_U_Qfdiv");
10552   set_optab_libfunc (neg_optab, TFmode, "_U_Qfneg");
10553 
10554   set_conv_libfunc (sext_optab, TFmode, SFmode, "_U_Qfcnvff_sgl_to_quad");
10555   set_conv_libfunc (sext_optab, TFmode, DFmode, "_U_Qfcnvff_dbl_to_quad");
10556   set_conv_libfunc (sext_optab, TFmode, XFmode, "_U_Qfcnvff_f80_to_quad");
10557   set_conv_libfunc (trunc_optab, SFmode, TFmode, "_U_Qfcnvff_quad_to_sgl");
10558   set_conv_libfunc (trunc_optab, DFmode, TFmode, "_U_Qfcnvff_quad_to_dbl");
10559   set_conv_libfunc (trunc_optab, XFmode, TFmode, "_U_Qfcnvff_quad_to_f80");
10560 
10561   set_conv_libfunc (sfix_optab, SImode, TFmode, "_U_Qfcnvfxt_quad_to_sgl");
10562   set_conv_libfunc (sfix_optab, DImode, TFmode, "_U_Qfcnvfxt_quad_to_dbl");
10563   set_conv_libfunc (sfix_optab, TImode, TFmode, "_U_Qfcnvfxt_quad_to_quad");
10564   set_conv_libfunc (ufix_optab, SImode, TFmode, "_U_Qfcnvfxut_quad_to_sgl");
10565   set_conv_libfunc (ufix_optab, DImode, TFmode, "_U_Qfcnvfxut_quad_to_dbl");
10566 
10567   set_conv_libfunc (sfloat_optab, TFmode, SImode, "_U_Qfcnvxf_sgl_to_quad");
10568   set_conv_libfunc (sfloat_optab, TFmode, DImode, "_U_Qfcnvxf_dbl_to_quad");
10569   set_conv_libfunc (sfloat_optab, TFmode, TImode, "_U_Qfcnvxf_quad_to_quad");
10570   /* HP-UX 11.23 libc does not have a function for unsigned
10571      SImode-to-TFmode conversion.  */
10572   set_conv_libfunc (ufloat_optab, TFmode, DImode, "_U_Qfcnvxuf_dbl_to_quad");
10573 }
10574 
10575 /* Rename all the TFmode libfuncs using the HPUX conventions.  */
10576 
10577 static void
ia64_hpux_init_libfuncs(void)10578 ia64_hpux_init_libfuncs (void)
10579 {
10580   ia64_init_libfuncs ();
10581 
10582   /* The HP SI millicode division and mod functions expect DI arguments.
10583      By turning them off completely we avoid using both libgcc and the
10584      non-standard millicode routines and use the HP DI millicode routines
10585      instead.  */
10586 
10587   set_optab_libfunc (sdiv_optab, SImode, 0);
10588   set_optab_libfunc (udiv_optab, SImode, 0);
10589   set_optab_libfunc (smod_optab, SImode, 0);
10590   set_optab_libfunc (umod_optab, SImode, 0);
10591 
10592   set_optab_libfunc (sdiv_optab, DImode, "__milli_divI");
10593   set_optab_libfunc (udiv_optab, DImode, "__milli_divU");
10594   set_optab_libfunc (smod_optab, DImode, "__milli_remI");
10595   set_optab_libfunc (umod_optab, DImode, "__milli_remU");
10596 
10597   /* HP-UX libc has TF min/max/abs routines in it.  */
10598   set_optab_libfunc (smin_optab, TFmode, "_U_Qfmin");
10599   set_optab_libfunc (smax_optab, TFmode, "_U_Qfmax");
10600   set_optab_libfunc (abs_optab, TFmode, "_U_Qfabs");
10601 
10602   /* ia64_expand_compare uses this.  */
10603   cmptf_libfunc = init_one_libfunc ("_U_Qfcmp");
10604 
10605   /* These should never be used.  */
10606   set_optab_libfunc (eq_optab, TFmode, 0);
10607   set_optab_libfunc (ne_optab, TFmode, 0);
10608   set_optab_libfunc (gt_optab, TFmode, 0);
10609   set_optab_libfunc (ge_optab, TFmode, 0);
10610   set_optab_libfunc (lt_optab, TFmode, 0);
10611   set_optab_libfunc (le_optab, TFmode, 0);
10612 }
10613 
10614 /* Rename the division and modulus functions in VMS.  */
10615 
10616 static void
ia64_vms_init_libfuncs(void)10617 ia64_vms_init_libfuncs (void)
10618 {
10619   set_optab_libfunc (sdiv_optab, SImode, "OTS$DIV_I");
10620   set_optab_libfunc (sdiv_optab, DImode, "OTS$DIV_L");
10621   set_optab_libfunc (udiv_optab, SImode, "OTS$DIV_UI");
10622   set_optab_libfunc (udiv_optab, DImode, "OTS$DIV_UL");
10623   set_optab_libfunc (smod_optab, SImode, "OTS$REM_I");
10624   set_optab_libfunc (smod_optab, DImode, "OTS$REM_L");
10625   set_optab_libfunc (umod_optab, SImode, "OTS$REM_UI");
10626   set_optab_libfunc (umod_optab, DImode, "OTS$REM_UL");
10627   abort_libfunc = init_one_libfunc ("decc$abort");
10628   memcmp_libfunc = init_one_libfunc ("decc$memcmp");
10629 #ifdef MEM_LIBFUNCS_INIT
10630   MEM_LIBFUNCS_INIT;
10631 #endif
10632 }
10633 
10634 /* Rename the TFmode libfuncs available from soft-fp in glibc using
10635    the HPUX conventions.  */
10636 
10637 static void
ia64_sysv4_init_libfuncs(void)10638 ia64_sysv4_init_libfuncs (void)
10639 {
10640   ia64_init_libfuncs ();
10641 
10642   /* These functions are not part of the HPUX TFmode interface.  We
10643      use them instead of _U_Qfcmp, which doesn't work the way we
10644      expect.  */
10645   set_optab_libfunc (eq_optab, TFmode, "_U_Qfeq");
10646   set_optab_libfunc (ne_optab, TFmode, "_U_Qfne");
10647   set_optab_libfunc (gt_optab, TFmode, "_U_Qfgt");
10648   set_optab_libfunc (ge_optab, TFmode, "_U_Qfge");
10649   set_optab_libfunc (lt_optab, TFmode, "_U_Qflt");
10650   set_optab_libfunc (le_optab, TFmode, "_U_Qfle");
10651 
10652   /* We leave out _U_Qfmin, _U_Qfmax and _U_Qfabs since soft-fp in
10653      glibc doesn't have them.  */
10654 }
10655 
10656 /* Use soft-fp.  */
10657 
10658 static void
ia64_soft_fp_init_libfuncs(void)10659 ia64_soft_fp_init_libfuncs (void)
10660 {
10661 }
10662 
10663 static bool
ia64_vms_valid_pointer_mode(machine_mode mode)10664 ia64_vms_valid_pointer_mode (machine_mode mode)
10665 {
10666   return (mode == SImode || mode == DImode);
10667 }
10668 
10669 /* For HPUX, it is illegal to have relocations in shared segments.  */
10670 
10671 static int
ia64_hpux_reloc_rw_mask(void)10672 ia64_hpux_reloc_rw_mask (void)
10673 {
10674   return 3;
10675 }
10676 
10677 /* For others, relax this so that relocations to local data goes in
10678    read-only segments, but we still cannot allow global relocations
10679    in read-only segments.  */
10680 
10681 static int
ia64_reloc_rw_mask(void)10682 ia64_reloc_rw_mask (void)
10683 {
10684   return flag_pic ? 3 : 2;
10685 }
10686 
10687 /* Return the section to use for X.  The only special thing we do here
10688    is to honor small data.  */
10689 
10690 static section *
ia64_select_rtx_section(machine_mode mode,rtx x,unsigned HOST_WIDE_INT align)10691 ia64_select_rtx_section (machine_mode mode, rtx x,
10692 			 unsigned HOST_WIDE_INT align)
10693 {
10694   if (GET_MODE_SIZE (mode) > 0
10695       && GET_MODE_SIZE (mode) <= ia64_section_threshold
10696       && !TARGET_NO_SDATA)
10697     return sdata_section;
10698   else
10699     return default_elf_select_rtx_section (mode, x, align);
10700 }
10701 
10702 static unsigned int
ia64_section_type_flags(tree decl,const char * name,int reloc)10703 ia64_section_type_flags (tree decl, const char *name, int reloc)
10704 {
10705   unsigned int flags = 0;
10706 
10707   if (strcmp (name, ".sdata") == 0
10708       || strncmp (name, ".sdata.", 7) == 0
10709       || strncmp (name, ".gnu.linkonce.s.", 16) == 0
10710       || strncmp (name, ".sdata2.", 8) == 0
10711       || strncmp (name, ".gnu.linkonce.s2.", 17) == 0
10712       || strcmp (name, ".sbss") == 0
10713       || strncmp (name, ".sbss.", 6) == 0
10714       || strncmp (name, ".gnu.linkonce.sb.", 17) == 0)
10715     flags = SECTION_SMALL;
10716 
10717   flags |= default_section_type_flags (decl, name, reloc);
10718   return flags;
10719 }
10720 
10721 /* Returns true if FNTYPE (a FUNCTION_TYPE or a METHOD_TYPE) returns a
10722    structure type and that the address of that type should be passed
10723    in out0, rather than in r8.  */
10724 
10725 static bool
ia64_struct_retval_addr_is_first_parm_p(tree fntype)10726 ia64_struct_retval_addr_is_first_parm_p (tree fntype)
10727 {
10728   tree ret_type = TREE_TYPE (fntype);
10729 
10730   /* The Itanium C++ ABI requires that out0, rather than r8, be used
10731      as the structure return address parameter, if the return value
10732      type has a non-trivial copy constructor or destructor.  It is not
10733      clear if this same convention should be used for other
10734      programming languages.  Until G++ 3.4, we incorrectly used r8 for
10735      these return values.  */
10736   return (abi_version_at_least (2)
10737 	  && ret_type
10738 	  && TYPE_MODE (ret_type) == BLKmode
10739 	  && TREE_ADDRESSABLE (ret_type)
10740 	  && lang_GNU_CXX ());
10741 }
10742 
10743 /* Output the assembler code for a thunk function.  THUNK_DECL is the
10744    declaration for the thunk function itself, FUNCTION is the decl for
10745    the target function.  DELTA is an immediate constant offset to be
10746    added to THIS.  If VCALL_OFFSET is nonzero, the word at
10747    *(*this + vcall_offset) should be added to THIS.  */
10748 
10749 static void
ia64_output_mi_thunk(FILE * file,tree thunk ATTRIBUTE_UNUSED,HOST_WIDE_INT delta,HOST_WIDE_INT vcall_offset,tree function)10750 ia64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
10751 		      HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
10752 		      tree function)
10753 {
10754   rtx this_rtx, funexp;
10755   rtx_insn *insn;
10756   unsigned int this_parmno;
10757   unsigned int this_regno;
10758   rtx delta_rtx;
10759 
10760   reload_completed = 1;
10761   epilogue_completed = 1;
10762 
10763   /* Set things up as ia64_expand_prologue might.  */
10764   last_scratch_gr_reg = 15;
10765 
10766   memset (&current_frame_info, 0, sizeof (current_frame_info));
10767   current_frame_info.spill_cfa_off = -16;
10768   current_frame_info.n_input_regs = 1;
10769   current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
10770 
10771   /* Mark the end of the (empty) prologue.  */
10772   emit_note (NOTE_INSN_PROLOGUE_END);
10773 
10774   /* Figure out whether "this" will be the first parameter (the
10775      typical case) or the second parameter (as happens when the
10776      virtual function returns certain class objects).  */
10777   this_parmno
10778     = (ia64_struct_retval_addr_is_first_parm_p (TREE_TYPE (thunk))
10779        ? 1 : 0);
10780   this_regno = IN_REG (this_parmno);
10781   if (!TARGET_REG_NAMES)
10782     reg_names[this_regno] = ia64_reg_numbers[this_parmno];
10783 
10784   this_rtx = gen_rtx_REG (Pmode, this_regno);
10785 
10786   /* Apply the constant offset, if required.  */
10787   delta_rtx = GEN_INT (delta);
10788   if (TARGET_ILP32)
10789     {
10790       rtx tmp = gen_rtx_REG (ptr_mode, this_regno);
10791       REG_POINTER (tmp) = 1;
10792       if (delta && satisfies_constraint_I (delta_rtx))
10793 	{
10794 	  emit_insn (gen_ptr_extend_plus_imm (this_rtx, tmp, delta_rtx));
10795 	  delta = 0;
10796 	}
10797       else
10798 	emit_insn (gen_ptr_extend (this_rtx, tmp));
10799     }
10800   if (delta)
10801     {
10802       if (!satisfies_constraint_I (delta_rtx))
10803 	{
10804 	  rtx tmp = gen_rtx_REG (Pmode, 2);
10805 	  emit_move_insn (tmp, delta_rtx);
10806 	  delta_rtx = tmp;
10807 	}
10808       emit_insn (gen_adddi3 (this_rtx, this_rtx, delta_rtx));
10809     }
10810 
10811   /* Apply the offset from the vtable, if required.  */
10812   if (vcall_offset)
10813     {
10814       rtx vcall_offset_rtx = GEN_INT (vcall_offset);
10815       rtx tmp = gen_rtx_REG (Pmode, 2);
10816 
10817       if (TARGET_ILP32)
10818 	{
10819 	  rtx t = gen_rtx_REG (ptr_mode, 2);
10820 	  REG_POINTER (t) = 1;
10821 	  emit_move_insn (t, gen_rtx_MEM (ptr_mode, this_rtx));
10822 	  if (satisfies_constraint_I (vcall_offset_rtx))
10823 	    {
10824 	      emit_insn (gen_ptr_extend_plus_imm (tmp, t, vcall_offset_rtx));
10825 	      vcall_offset = 0;
10826 	    }
10827 	  else
10828 	    emit_insn (gen_ptr_extend (tmp, t));
10829 	}
10830       else
10831 	emit_move_insn (tmp, gen_rtx_MEM (Pmode, this_rtx));
10832 
10833       if (vcall_offset)
10834 	{
10835 	  if (!satisfies_constraint_J (vcall_offset_rtx))
10836 	    {
10837 	      rtx tmp2 = gen_rtx_REG (Pmode, next_scratch_gr_reg ());
10838 	      emit_move_insn (tmp2, vcall_offset_rtx);
10839 	      vcall_offset_rtx = tmp2;
10840 	    }
10841 	  emit_insn (gen_adddi3 (tmp, tmp, vcall_offset_rtx));
10842 	}
10843 
10844       if (TARGET_ILP32)
10845 	emit_insn (gen_zero_extendsidi2 (tmp, gen_rtx_MEM (ptr_mode, tmp)));
10846       else
10847 	emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp));
10848 
10849       emit_insn (gen_adddi3 (this_rtx, this_rtx, tmp));
10850     }
10851 
10852   /* Generate a tail call to the target function.  */
10853   if (! TREE_USED (function))
10854     {
10855       assemble_external (function);
10856       TREE_USED (function) = 1;
10857     }
10858   funexp = XEXP (DECL_RTL (function), 0);
10859   funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
10860   ia64_expand_call (NULL_RTX, funexp, NULL_RTX, 1);
10861   insn = get_last_insn ();
10862   SIBLING_CALL_P (insn) = 1;
10863 
10864   /* Code generation for calls relies on splitting.  */
10865   reload_completed = 1;
10866   epilogue_completed = 1;
10867   try_split (PATTERN (insn), insn, 0);
10868 
10869   emit_barrier ();
10870 
10871   /* Run just enough of rest_of_compilation to get the insns emitted.
10872      There's not really enough bulk here to make other passes such as
10873      instruction scheduling worth while.  Note that use_thunk calls
10874      assemble_start_function and assemble_end_function.  */
10875 
10876   emit_all_insn_group_barriers (NULL);
10877   insn = get_insns ();
10878   shorten_branches (insn);
10879   final_start_function (insn, file, 1);
10880   final (insn, file, 1);
10881   final_end_function ();
10882 
10883   reload_completed = 0;
10884   epilogue_completed = 0;
10885 }
10886 
10887 /* Worker function for TARGET_STRUCT_VALUE_RTX.  */
10888 
10889 static rtx
ia64_struct_value_rtx(tree fntype,int incoming ATTRIBUTE_UNUSED)10890 ia64_struct_value_rtx (tree fntype,
10891 		       int incoming ATTRIBUTE_UNUSED)
10892 {
10893   if (TARGET_ABI_OPEN_VMS ||
10894       (fntype && ia64_struct_retval_addr_is_first_parm_p (fntype)))
10895     return NULL_RTX;
10896   return gen_rtx_REG (Pmode, GR_REG (8));
10897 }
10898 
10899 static bool
ia64_scalar_mode_supported_p(machine_mode mode)10900 ia64_scalar_mode_supported_p (machine_mode mode)
10901 {
10902   switch (mode)
10903     {
10904     case QImode:
10905     case HImode:
10906     case SImode:
10907     case DImode:
10908     case TImode:
10909       return true;
10910 
10911     case SFmode:
10912     case DFmode:
10913     case XFmode:
10914     case RFmode:
10915       return true;
10916 
10917     case TFmode:
10918       return true;
10919 
10920     default:
10921       return false;
10922     }
10923 }
10924 
10925 static bool
ia64_vector_mode_supported_p(machine_mode mode)10926 ia64_vector_mode_supported_p (machine_mode mode)
10927 {
10928   switch (mode)
10929     {
10930     case V8QImode:
10931     case V4HImode:
10932     case V2SImode:
10933       return true;
10934 
10935     case V2SFmode:
10936       return true;
10937 
10938     default:
10939       return false;
10940     }
10941 }
10942 
10943 /* Implement TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P.  */
10944 
10945 static bool
ia64_libgcc_floating_mode_supported_p(machine_mode mode)10946 ia64_libgcc_floating_mode_supported_p (machine_mode mode)
10947 {
10948   switch (mode)
10949     {
10950     case SFmode:
10951     case DFmode:
10952       return true;
10953 
10954     case XFmode:
10955 #ifdef IA64_NO_LIBGCC_XFMODE
10956       return false;
10957 #else
10958       return true;
10959 #endif
10960 
10961     case TFmode:
10962 #ifdef IA64_NO_LIBGCC_TFMODE
10963       return false;
10964 #else
10965       return true;
10966 #endif
10967 
10968     default:
10969       return false;
10970     }
10971 }
10972 
10973 /* Implement the FUNCTION_PROFILER macro.  */
10974 
10975 void
ia64_output_function_profiler(FILE * file,int labelno)10976 ia64_output_function_profiler (FILE *file, int labelno)
10977 {
10978   bool indirect_call;
10979 
10980   /* If the function needs a static chain and the static chain
10981      register is r15, we use an indirect call so as to bypass
10982      the PLT stub in case the executable is dynamically linked,
10983      because the stub clobbers r15 as per 5.3.6 of the psABI.
10984      We don't need to do that in non canonical PIC mode.  */
10985 
10986   if (cfun->static_chain_decl && !TARGET_NO_PIC && !TARGET_AUTO_PIC)
10987     {
10988       gcc_assert (STATIC_CHAIN_REGNUM == 15);
10989       indirect_call = true;
10990     }
10991   else
10992     indirect_call = false;
10993 
10994   if (TARGET_GNU_AS)
10995     fputs ("\t.prologue 4, r40\n", file);
10996   else
10997     fputs ("\t.prologue\n\t.save ar.pfs, r40\n", file);
10998   fputs ("\talloc out0 = ar.pfs, 8, 0, 4, 0\n", file);
10999 
11000   if (NO_PROFILE_COUNTERS)
11001     fputs ("\tmov out3 = r0\n", file);
11002   else
11003     {
11004       char buf[20];
11005       ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
11006 
11007       if (TARGET_AUTO_PIC)
11008 	fputs ("\tmovl out3 = @gprel(", file);
11009       else
11010 	fputs ("\taddl out3 = @ltoff(", file);
11011       assemble_name (file, buf);
11012       if (TARGET_AUTO_PIC)
11013 	fputs (")\n", file);
11014       else
11015 	fputs ("), r1\n", file);
11016     }
11017 
11018   if (indirect_call)
11019     fputs ("\taddl r14 = @ltoff(@fptr(_mcount)), r1\n", file);
11020   fputs ("\t;;\n", file);
11021 
11022   fputs ("\t.save rp, r42\n", file);
11023   fputs ("\tmov out2 = b0\n", file);
11024   if (indirect_call)
11025     fputs ("\tld8 r14 = [r14]\n\t;;\n", file);
11026   fputs ("\t.body\n", file);
11027   fputs ("\tmov out1 = r1\n", file);
11028   if (indirect_call)
11029     {
11030       fputs ("\tld8 r16 = [r14], 8\n\t;;\n", file);
11031       fputs ("\tmov b6 = r16\n", file);
11032       fputs ("\tld8 r1 = [r14]\n", file);
11033       fputs ("\tbr.call.sptk.many b0 = b6\n\t;;\n", file);
11034     }
11035   else
11036     fputs ("\tbr.call.sptk.many b0 = _mcount\n\t;;\n", file);
11037 }
11038 
11039 static GTY(()) rtx mcount_func_rtx;
11040 static rtx
gen_mcount_func_rtx(void)11041 gen_mcount_func_rtx (void)
11042 {
11043   if (!mcount_func_rtx)
11044     mcount_func_rtx = init_one_libfunc ("_mcount");
11045   return mcount_func_rtx;
11046 }
11047 
11048 void
ia64_profile_hook(int labelno)11049 ia64_profile_hook (int labelno)
11050 {
11051   rtx label, ip;
11052 
11053   if (NO_PROFILE_COUNTERS)
11054     label = const0_rtx;
11055   else
11056     {
11057       char buf[30];
11058       const char *label_name;
11059       ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
11060       label_name = ggc_strdup ((*targetm.strip_name_encoding) (buf));
11061       label = gen_rtx_SYMBOL_REF (Pmode, label_name);
11062       SYMBOL_REF_FLAGS (label) = SYMBOL_FLAG_LOCAL;
11063     }
11064   ip = gen_reg_rtx (Pmode);
11065   emit_insn (gen_ip_value (ip));
11066   emit_library_call (gen_mcount_func_rtx (), LCT_NORMAL,
11067                      VOIDmode, 3,
11068 		     gen_rtx_REG (Pmode, BR_REG (0)), Pmode,
11069 		     ip, Pmode,
11070 		     label, Pmode);
11071 }
11072 
11073 /* Return the mangling of TYPE if it is an extended fundamental type.  */
11074 
11075 static const char *
ia64_mangle_type(const_tree type)11076 ia64_mangle_type (const_tree type)
11077 {
11078   type = TYPE_MAIN_VARIANT (type);
11079 
11080   if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
11081       && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
11082     return NULL;
11083 
11084   /* On HP-UX, "long double" is mangled as "e" so __float128 is
11085      mangled as "e".  */
11086   if (!TARGET_HPUX && TYPE_MODE (type) == TFmode)
11087     return "g";
11088   /* On HP-UX, "e" is not available as a mangling of __float80 so use
11089      an extended mangling.  Elsewhere, "e" is available since long
11090      double is 80 bits.  */
11091   if (TYPE_MODE (type) == XFmode)
11092     return TARGET_HPUX ? "u9__float80" : "e";
11093   if (TYPE_MODE (type) == RFmode)
11094     return "u7__fpreg";
11095   return NULL;
11096 }
11097 
11098 /* Return the diagnostic message string if conversion from FROMTYPE to
11099    TOTYPE is not allowed, NULL otherwise.  */
11100 static const char *
ia64_invalid_conversion(const_tree fromtype,const_tree totype)11101 ia64_invalid_conversion (const_tree fromtype, const_tree totype)
11102 {
11103   /* Reject nontrivial conversion to or from __fpreg.  */
11104   if (TYPE_MODE (fromtype) == RFmode
11105       && TYPE_MODE (totype) != RFmode
11106       && TYPE_MODE (totype) != VOIDmode)
11107     return N_("invalid conversion from %<__fpreg%>");
11108   if (TYPE_MODE (totype) == RFmode
11109       && TYPE_MODE (fromtype) != RFmode)
11110     return N_("invalid conversion to %<__fpreg%>");
11111   return NULL;
11112 }
11113 
11114 /* Return the diagnostic message string if the unary operation OP is
11115    not permitted on TYPE, NULL otherwise.  */
11116 static const char *
ia64_invalid_unary_op(int op,const_tree type)11117 ia64_invalid_unary_op (int op, const_tree type)
11118 {
11119   /* Reject operations on __fpreg other than unary + or &.  */
11120   if (TYPE_MODE (type) == RFmode
11121       && op != CONVERT_EXPR
11122       && op != ADDR_EXPR)
11123     return N_("invalid operation on %<__fpreg%>");
11124   return NULL;
11125 }
11126 
11127 /* Return the diagnostic message string if the binary operation OP is
11128    not permitted on TYPE1 and TYPE2, NULL otherwise.  */
11129 static const char *
ia64_invalid_binary_op(int op ATTRIBUTE_UNUSED,const_tree type1,const_tree type2)11130 ia64_invalid_binary_op (int op ATTRIBUTE_UNUSED, const_tree type1, const_tree type2)
11131 {
11132   /* Reject operations on __fpreg.  */
11133   if (TYPE_MODE (type1) == RFmode || TYPE_MODE (type2) == RFmode)
11134     return N_("invalid operation on %<__fpreg%>");
11135   return NULL;
11136 }
11137 
11138 /* HP-UX version_id attribute.
11139    For object foo, if the version_id is set to 1234 put out an alias
11140    of '.alias foo "foo{1234}"  We can't use "foo{1234}" in anything
11141    other than an alias statement because it is an illegal symbol name.  */
11142 
11143 static tree
ia64_handle_version_id_attribute(tree * node ATTRIBUTE_UNUSED,tree name ATTRIBUTE_UNUSED,tree args,int flags ATTRIBUTE_UNUSED,bool * no_add_attrs)11144 ia64_handle_version_id_attribute (tree *node ATTRIBUTE_UNUSED,
11145                                  tree name ATTRIBUTE_UNUSED,
11146                                  tree args,
11147                                  int flags ATTRIBUTE_UNUSED,
11148                                  bool *no_add_attrs)
11149 {
11150   tree arg = TREE_VALUE (args);
11151 
11152   if (TREE_CODE (arg) != STRING_CST)
11153     {
11154       error("version attribute is not a string");
11155       *no_add_attrs = true;
11156       return NULL_TREE;
11157     }
11158   return NULL_TREE;
11159 }
11160 
11161 /* Target hook for c_mode_for_suffix.  */
11162 
11163 static machine_mode
ia64_c_mode_for_suffix(char suffix)11164 ia64_c_mode_for_suffix (char suffix)
11165 {
11166   if (suffix == 'q')
11167     return TFmode;
11168   if (suffix == 'w')
11169     return XFmode;
11170 
11171   return VOIDmode;
11172 }
11173 
11174 static GTY(()) rtx ia64_dconst_0_5_rtx;
11175 
11176 rtx
ia64_dconst_0_5(void)11177 ia64_dconst_0_5 (void)
11178 {
11179   if (! ia64_dconst_0_5_rtx)
11180     {
11181       REAL_VALUE_TYPE rv;
11182       real_from_string (&rv, "0.5");
11183       ia64_dconst_0_5_rtx = const_double_from_real_value (rv, DFmode);
11184     }
11185   return ia64_dconst_0_5_rtx;
11186 }
11187 
11188 static GTY(()) rtx ia64_dconst_0_375_rtx;
11189 
11190 rtx
ia64_dconst_0_375(void)11191 ia64_dconst_0_375 (void)
11192 {
11193   if (! ia64_dconst_0_375_rtx)
11194     {
11195       REAL_VALUE_TYPE rv;
11196       real_from_string (&rv, "0.375");
11197       ia64_dconst_0_375_rtx = const_double_from_real_value (rv, DFmode);
11198     }
11199   return ia64_dconst_0_375_rtx;
11200 }
11201 
11202 static machine_mode
ia64_get_reg_raw_mode(int regno)11203 ia64_get_reg_raw_mode (int regno)
11204 {
11205   if (FR_REGNO_P (regno))
11206     return XFmode;
11207   return default_get_reg_raw_mode(regno);
11208 }
11209 
11210 /* Implement TARGET_MEMBER_TYPE_FORCES_BLK.  ??? Might not be needed
11211    anymore.  */
11212 
11213 bool
ia64_member_type_forces_blk(const_tree,machine_mode mode)11214 ia64_member_type_forces_blk (const_tree, machine_mode mode)
11215 {
11216   return TARGET_HPUX && mode == TFmode;
11217 }
11218 
11219 /* Always default to .text section until HP-UX linker is fixed.  */
11220 
11221 ATTRIBUTE_UNUSED static section *
ia64_hpux_function_section(tree decl ATTRIBUTE_UNUSED,enum node_frequency freq ATTRIBUTE_UNUSED,bool startup ATTRIBUTE_UNUSED,bool exit ATTRIBUTE_UNUSED)11222 ia64_hpux_function_section (tree decl ATTRIBUTE_UNUSED,
11223 			    enum node_frequency freq ATTRIBUTE_UNUSED,
11224 			    bool startup ATTRIBUTE_UNUSED,
11225 			    bool exit ATTRIBUTE_UNUSED)
11226 {
11227   return NULL;
11228 }
11229 
11230 /* Construct (set target (vec_select op0 (parallel perm))) and
11231    return true if that's a valid instruction in the active ISA.  */
11232 
11233 static bool
expand_vselect(rtx target,rtx op0,const unsigned char * perm,unsigned nelt)11234 expand_vselect (rtx target, rtx op0, const unsigned char *perm, unsigned nelt)
11235 {
11236   rtx rperm[MAX_VECT_LEN], x;
11237   unsigned i;
11238 
11239   for (i = 0; i < nelt; ++i)
11240     rperm[i] = GEN_INT (perm[i]);
11241 
11242   x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, rperm));
11243   x = gen_rtx_VEC_SELECT (GET_MODE (target), op0, x);
11244   x = gen_rtx_SET (target, x);
11245 
11246   rtx_insn *insn = emit_insn (x);
11247   if (recog_memoized (insn) < 0)
11248     {
11249       remove_insn (insn);
11250       return false;
11251     }
11252   return true;
11253 }
11254 
11255 /* Similar, but generate a vec_concat from op0 and op1 as well.  */
11256 
11257 static bool
expand_vselect_vconcat(rtx target,rtx op0,rtx op1,const unsigned char * perm,unsigned nelt)11258 expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
11259 			const unsigned char *perm, unsigned nelt)
11260 {
11261   machine_mode v2mode;
11262   rtx x;
11263 
11264   v2mode = GET_MODE_2XWIDER_MODE (GET_MODE (op0));
11265   x = gen_rtx_VEC_CONCAT (v2mode, op0, op1);
11266   return expand_vselect (target, x, perm, nelt);
11267 }
11268 
11269 /* Try to expand a no-op permutation.  */
11270 
11271 static bool
expand_vec_perm_identity(struct expand_vec_perm_d * d)11272 expand_vec_perm_identity (struct expand_vec_perm_d *d)
11273 {
11274   unsigned i, nelt = d->nelt;
11275 
11276   for (i = 0; i < nelt; ++i)
11277     if (d->perm[i] != i)
11278       return false;
11279 
11280   if (!d->testing_p)
11281     emit_move_insn (d->target, d->op0);
11282 
11283   return true;
11284 }
11285 
11286 /* Try to expand D via a shrp instruction.  */
11287 
11288 static bool
expand_vec_perm_shrp(struct expand_vec_perm_d * d)11289 expand_vec_perm_shrp (struct expand_vec_perm_d *d)
11290 {
11291   unsigned i, nelt = d->nelt, shift, mask;
11292   rtx tmp, hi, lo;
11293 
11294   /* ??? Don't force V2SFmode into the integer registers.  */
11295   if (d->vmode == V2SFmode)
11296     return false;
11297 
11298   mask = (d->one_operand_p ? nelt - 1 : 2 * nelt - 1);
11299 
11300   shift = d->perm[0];
11301   if (BYTES_BIG_ENDIAN && shift > nelt)
11302     return false;
11303 
11304   for (i = 1; i < nelt; ++i)
11305     if (d->perm[i] != ((shift + i) & mask))
11306       return false;
11307 
11308   if (d->testing_p)
11309     return true;
11310 
11311   hi = shift < nelt ? d->op1 : d->op0;
11312   lo = shift < nelt ? d->op0 : d->op1;
11313 
11314   shift %= nelt;
11315 
11316   shift *= GET_MODE_UNIT_SIZE (d->vmode) * BITS_PER_UNIT;
11317 
11318   /* We've eliminated the shift 0 case via expand_vec_perm_identity.  */
11319   gcc_assert (IN_RANGE (shift, 1, 63));
11320 
11321   /* Recall that big-endian elements are numbered starting at the top of
11322      the register.  Ideally we'd have a shift-left-pair.  But since we
11323      don't, convert to a shift the other direction.  */
11324   if (BYTES_BIG_ENDIAN)
11325     shift = 64 - shift;
11326 
11327   tmp = gen_reg_rtx (DImode);
11328   hi = gen_lowpart (DImode, hi);
11329   lo = gen_lowpart (DImode, lo);
11330   emit_insn (gen_shrp (tmp, hi, lo, GEN_INT (shift)));
11331 
11332   emit_move_insn (d->target, gen_lowpart (d->vmode, tmp));
11333   return true;
11334 }
11335 
11336 /* Try to instantiate D in a single instruction.  */
11337 
11338 static bool
expand_vec_perm_1(struct expand_vec_perm_d * d)11339 expand_vec_perm_1 (struct expand_vec_perm_d *d)
11340 {
11341   unsigned i, nelt = d->nelt;
11342   unsigned char perm2[MAX_VECT_LEN];
11343 
11344   /* Try single-operand selections.  */
11345   if (d->one_operand_p)
11346     {
11347       if (expand_vec_perm_identity (d))
11348 	return true;
11349       if (expand_vselect (d->target, d->op0, d->perm, nelt))
11350 	return true;
11351     }
11352 
11353   /* Try two operand selections.  */
11354   if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt))
11355     return true;
11356 
11357   /* Recognize interleave style patterns with reversed operands.  */
11358   if (!d->one_operand_p)
11359     {
11360       for (i = 0; i < nelt; ++i)
11361 	{
11362 	  unsigned e = d->perm[i];
11363 	  if (e >= nelt)
11364 	    e -= nelt;
11365 	  else
11366 	    e += nelt;
11367 	  perm2[i] = e;
11368 	}
11369 
11370       if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt))
11371 	return true;
11372     }
11373 
11374   if (expand_vec_perm_shrp (d))
11375     return true;
11376 
11377   /* ??? Look for deposit-like permutations where most of the result
11378      comes from one vector unchanged and the rest comes from a
11379      sequential hunk of the other vector.  */
11380 
11381   return false;
11382 }
11383 
11384 /* Pattern match broadcast permutations.  */
11385 
11386 static bool
expand_vec_perm_broadcast(struct expand_vec_perm_d * d)11387 expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
11388 {
11389   unsigned i, elt, nelt = d->nelt;
11390   unsigned char perm2[2];
11391   rtx temp;
11392   bool ok;
11393 
11394   if (!d->one_operand_p)
11395     return false;
11396 
11397   elt = d->perm[0];
11398   for (i = 1; i < nelt; ++i)
11399     if (d->perm[i] != elt)
11400       return false;
11401 
11402   switch (d->vmode)
11403     {
11404     case V2SImode:
11405     case V2SFmode:
11406       /* Implementable by interleave.  */
11407       perm2[0] = elt;
11408       perm2[1] = elt + 2;
11409       ok = expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, 2);
11410       gcc_assert (ok);
11411       break;
11412 
11413     case V8QImode:
11414       /* Implementable by extract + broadcast.  */
11415       if (BYTES_BIG_ENDIAN)
11416 	elt = 7 - elt;
11417       elt *= BITS_PER_UNIT;
11418       temp = gen_reg_rtx (DImode);
11419       emit_insn (gen_extzv (temp, gen_lowpart (DImode, d->op0),
11420 			    GEN_INT (8), GEN_INT (elt)));
11421       emit_insn (gen_mux1_brcst_qi (d->target, gen_lowpart (QImode, temp)));
11422       break;
11423 
11424     case V4HImode:
11425       /* Should have been matched directly by vec_select.  */
11426     default:
11427       gcc_unreachable ();
11428     }
11429 
11430   return true;
11431 }
11432 
11433 /* A subroutine of ia64_expand_vec_perm_const_1.  Try to simplify a
11434    two vector permutation into a single vector permutation by using
11435    an interleave operation to merge the vectors.  */
11436 
11437 static bool
expand_vec_perm_interleave_2(struct expand_vec_perm_d * d)11438 expand_vec_perm_interleave_2 (struct expand_vec_perm_d *d)
11439 {
11440   struct expand_vec_perm_d dremap, dfinal;
11441   unsigned char remap[2 * MAX_VECT_LEN];
11442   unsigned contents, i, nelt, nelt2;
11443   unsigned h0, h1, h2, h3;
11444   rtx_insn *seq;
11445   bool ok;
11446 
11447   if (d->one_operand_p)
11448     return false;
11449 
11450   nelt = d->nelt;
11451   nelt2 = nelt / 2;
11452 
11453   /* Examine from whence the elements come.  */
11454   contents = 0;
11455   for (i = 0; i < nelt; ++i)
11456     contents |= 1u << d->perm[i];
11457 
11458   memset (remap, 0xff, sizeof (remap));
11459   dremap = *d;
11460 
11461   h0 = (1u << nelt2) - 1;
11462   h1 = h0 << nelt2;
11463   h2 = h0 << nelt;
11464   h3 = h0 << (nelt + nelt2);
11465 
11466   if ((contents & (h0 | h2)) == contents)	/* punpck even halves */
11467     {
11468       for (i = 0; i < nelt; ++i)
11469 	{
11470 	  unsigned which = i / 2 + (i & 1 ? nelt : 0);
11471 	  remap[which] = i;
11472 	  dremap.perm[i] = which;
11473 	}
11474     }
11475   else if ((contents & (h1 | h3)) == contents)	/* punpck odd halves */
11476     {
11477       for (i = 0; i < nelt; ++i)
11478 	{
11479 	  unsigned which = i / 2 + nelt2 + (i & 1 ? nelt : 0);
11480 	  remap[which] = i;
11481 	  dremap.perm[i] = which;
11482 	}
11483     }
11484   else if ((contents & 0x5555) == contents)	/* mix even elements */
11485     {
11486       for (i = 0; i < nelt; ++i)
11487 	{
11488 	  unsigned which = (i & ~1) + (i & 1 ? nelt : 0);
11489 	  remap[which] = i;
11490 	  dremap.perm[i] = which;
11491 	}
11492     }
11493   else if ((contents & 0xaaaa) == contents)	/* mix odd elements */
11494     {
11495       for (i = 0; i < nelt; ++i)
11496 	{
11497 	  unsigned which = (i | 1) + (i & 1 ? nelt : 0);
11498 	  remap[which] = i;
11499 	  dremap.perm[i] = which;
11500 	}
11501     }
11502   else if (floor_log2 (contents) - ctz_hwi (contents) < (int)nelt) /* shrp */
11503     {
11504       unsigned shift = ctz_hwi (contents);
11505       for (i = 0; i < nelt; ++i)
11506 	{
11507 	  unsigned which = (i + shift) & (2 * nelt - 1);
11508 	  remap[which] = i;
11509 	  dremap.perm[i] = which;
11510 	}
11511     }
11512   else
11513     return false;
11514 
11515   /* Use the remapping array set up above to move the elements from their
11516      swizzled locations into their final destinations.  */
11517   dfinal = *d;
11518   for (i = 0; i < nelt; ++i)
11519     {
11520       unsigned e = remap[d->perm[i]];
11521       gcc_assert (e < nelt);
11522       dfinal.perm[i] = e;
11523     }
11524   if (d->testing_p)
11525     dfinal.op0 = gen_raw_REG (dfinal.vmode, LAST_VIRTUAL_REGISTER + 1);
11526   else
11527     dfinal.op0 = gen_reg_rtx (dfinal.vmode);
11528   dfinal.op1 = dfinal.op0;
11529   dfinal.one_operand_p = true;
11530   dremap.target = dfinal.op0;
11531 
11532   /* Test if the final remap can be done with a single insn.  For V4HImode
11533      this *will* succeed.  For V8QImode or V2SImode it may not.  */
11534   start_sequence ();
11535   ok = expand_vec_perm_1 (&dfinal);
11536   seq = get_insns ();
11537   end_sequence ();
11538   if (!ok)
11539     return false;
11540   if (d->testing_p)
11541     return true;
11542 
11543   ok = expand_vec_perm_1 (&dremap);
11544   gcc_assert (ok);
11545 
11546   emit_insn (seq);
11547   return true;
11548 }
11549 
11550 /* A subroutine of ia64_expand_vec_perm_const_1.  Emit a full V4HImode
11551    constant permutation via two mux2 and a merge.  */
11552 
11553 static bool
expand_vec_perm_v4hi_5(struct expand_vec_perm_d * d)11554 expand_vec_perm_v4hi_5 (struct expand_vec_perm_d *d)
11555 {
11556   unsigned char perm2[4];
11557   rtx rmask[4];
11558   unsigned i;
11559   rtx t0, t1, mask, x;
11560   bool ok;
11561 
11562   if (d->vmode != V4HImode || d->one_operand_p)
11563     return false;
11564   if (d->testing_p)
11565     return true;
11566 
11567   for (i = 0; i < 4; ++i)
11568     {
11569       perm2[i] = d->perm[i] & 3;
11570       rmask[i] = (d->perm[i] & 4 ? const0_rtx : constm1_rtx);
11571     }
11572   mask = gen_rtx_CONST_VECTOR (V4HImode, gen_rtvec_v (4, rmask));
11573   mask = force_reg (V4HImode, mask);
11574 
11575   t0 = gen_reg_rtx (V4HImode);
11576   t1 = gen_reg_rtx (V4HImode);
11577 
11578   ok = expand_vselect (t0, d->op0, perm2, 4);
11579   gcc_assert (ok);
11580   ok = expand_vselect (t1, d->op1, perm2, 4);
11581   gcc_assert (ok);
11582 
11583   x = gen_rtx_AND (V4HImode, mask, t0);
11584   emit_insn (gen_rtx_SET (t0, x));
11585 
11586   x = gen_rtx_NOT (V4HImode, mask);
11587   x = gen_rtx_AND (V4HImode, x, t1);
11588   emit_insn (gen_rtx_SET (t1, x));
11589 
11590   x = gen_rtx_IOR (V4HImode, t0, t1);
11591   emit_insn (gen_rtx_SET (d->target, x));
11592 
11593   return true;
11594 }
11595 
11596 /* The guts of ia64_expand_vec_perm_const, also used by the ok hook.
11597    With all of the interface bits taken care of, perform the expansion
11598    in D and return true on success.  */
11599 
11600 static bool
ia64_expand_vec_perm_const_1(struct expand_vec_perm_d * d)11601 ia64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
11602 {
11603   if (expand_vec_perm_1 (d))
11604     return true;
11605   if (expand_vec_perm_broadcast (d))
11606     return true;
11607   if (expand_vec_perm_interleave_2 (d))
11608     return true;
11609   if (expand_vec_perm_v4hi_5 (d))
11610     return true;
11611   return false;
11612 }
11613 
11614 bool
ia64_expand_vec_perm_const(rtx operands[4])11615 ia64_expand_vec_perm_const (rtx operands[4])
11616 {
11617   struct expand_vec_perm_d d;
11618   unsigned char perm[MAX_VECT_LEN];
11619   int i, nelt, which;
11620   rtx sel;
11621 
11622   d.target = operands[0];
11623   d.op0 = operands[1];
11624   d.op1 = operands[2];
11625   sel = operands[3];
11626 
11627   d.vmode = GET_MODE (d.target);
11628   gcc_assert (VECTOR_MODE_P (d.vmode));
11629   d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
11630   d.testing_p = false;
11631 
11632   gcc_assert (GET_CODE (sel) == CONST_VECTOR);
11633   gcc_assert (XVECLEN (sel, 0) == nelt);
11634   gcc_checking_assert (sizeof (d.perm) == sizeof (perm));
11635 
11636   for (i = which = 0; i < nelt; ++i)
11637     {
11638       rtx e = XVECEXP (sel, 0, i);
11639       int ei = INTVAL (e) & (2 * nelt - 1);
11640 
11641       which |= (ei < nelt ? 1 : 2);
11642       d.perm[i] = ei;
11643       perm[i] = ei;
11644     }
11645 
11646   switch (which)
11647     {
11648     default:
11649       gcc_unreachable();
11650 
11651     case 3:
11652       if (!rtx_equal_p (d.op0, d.op1))
11653 	{
11654 	  d.one_operand_p = false;
11655 	  break;
11656 	}
11657 
11658       /* The elements of PERM do not suggest that only the first operand
11659 	 is used, but both operands are identical.  Allow easier matching
11660 	 of the permutation by folding the permutation into the single
11661 	 input vector.  */
11662       for (i = 0; i < nelt; ++i)
11663 	if (d.perm[i] >= nelt)
11664 	  d.perm[i] -= nelt;
11665       /* FALLTHRU */
11666 
11667     case 1:
11668       d.op1 = d.op0;
11669       d.one_operand_p = true;
11670       break;
11671 
11672     case 2:
11673       for (i = 0; i < nelt; ++i)
11674         d.perm[i] -= nelt;
11675       d.op0 = d.op1;
11676       d.one_operand_p = true;
11677       break;
11678     }
11679 
11680   if (ia64_expand_vec_perm_const_1 (&d))
11681     return true;
11682 
11683   /* If the mask says both arguments are needed, but they are the same,
11684      the above tried to expand with one_operand_p true.  If that didn't
11685      work, retry with one_operand_p false, as that's what we used in _ok.  */
11686   if (which == 3 && d.one_operand_p)
11687     {
11688       memcpy (d.perm, perm, sizeof (perm));
11689       d.one_operand_p = false;
11690       return ia64_expand_vec_perm_const_1 (&d);
11691     }
11692 
11693   return false;
11694 }
11695 
11696 /* Implement targetm.vectorize.vec_perm_const_ok.  */
11697 
11698 static bool
ia64_vectorize_vec_perm_const_ok(machine_mode vmode,const unsigned char * sel)11699 ia64_vectorize_vec_perm_const_ok (machine_mode vmode,
11700 				  const unsigned char *sel)
11701 {
11702   struct expand_vec_perm_d d;
11703   unsigned int i, nelt, which;
11704   bool ret;
11705 
11706   d.vmode = vmode;
11707   d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
11708   d.testing_p = true;
11709 
11710   /* Extract the values from the vector CST into the permutation
11711      array in D.  */
11712   memcpy (d.perm, sel, nelt);
11713   for (i = which = 0; i < nelt; ++i)
11714     {
11715       unsigned char e = d.perm[i];
11716       gcc_assert (e < 2 * nelt);
11717       which |= (e < nelt ? 1 : 2);
11718     }
11719 
11720   /* For all elements from second vector, fold the elements to first.  */
11721   if (which == 2)
11722     for (i = 0; i < nelt; ++i)
11723       d.perm[i] -= nelt;
11724 
11725   /* Check whether the mask can be applied to the vector type.  */
11726   d.one_operand_p = (which != 3);
11727 
11728   /* Otherwise we have to go through the motions and see if we can
11729      figure out how to generate the requested permutation.  */
11730   d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
11731   d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
11732   if (!d.one_operand_p)
11733     d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
11734 
11735   start_sequence ();
11736   ret = ia64_expand_vec_perm_const_1 (&d);
11737   end_sequence ();
11738 
11739   return ret;
11740 }
11741 
11742 void
ia64_expand_vec_setv2sf(rtx operands[3])11743 ia64_expand_vec_setv2sf (rtx operands[3])
11744 {
11745   struct expand_vec_perm_d d;
11746   unsigned int which;
11747   bool ok;
11748 
11749   d.target = operands[0];
11750   d.op0 = operands[0];
11751   d.op1 = gen_reg_rtx (V2SFmode);
11752   d.vmode = V2SFmode;
11753   d.nelt = 2;
11754   d.one_operand_p = false;
11755   d.testing_p = false;
11756 
11757   which = INTVAL (operands[2]);
11758   gcc_assert (which <= 1);
11759   d.perm[0] = 1 - which;
11760   d.perm[1] = which + 2;
11761 
11762   emit_insn (gen_fpack (d.op1, operands[1], CONST0_RTX (SFmode)));
11763 
11764   ok = ia64_expand_vec_perm_const_1 (&d);
11765   gcc_assert (ok);
11766 }
11767 
11768 void
ia64_expand_vec_perm_even_odd(rtx target,rtx op0,rtx op1,int odd)11769 ia64_expand_vec_perm_even_odd (rtx target, rtx op0, rtx op1, int odd)
11770 {
11771   struct expand_vec_perm_d d;
11772   machine_mode vmode = GET_MODE (target);
11773   unsigned int i, nelt = GET_MODE_NUNITS (vmode);
11774   bool ok;
11775 
11776   d.target = target;
11777   d.op0 = op0;
11778   d.op1 = op1;
11779   d.vmode = vmode;
11780   d.nelt = nelt;
11781   d.one_operand_p = false;
11782   d.testing_p = false;
11783 
11784   for (i = 0; i < nelt; ++i)
11785     d.perm[i] = i * 2 + odd;
11786 
11787   ok = ia64_expand_vec_perm_const_1 (&d);
11788   gcc_assert (ok);
11789 }
11790 
11791 #include "gt-ia64.h"
11792