1 /* Definitions of target machine for GNU compiler.
2    Copyright (C) 1999-2020 Free Software Foundation, Inc.
3    Contributed by James E. Wilson <wilson@cygnus.com> and
4 		  David Mosberger <davidm@hpl.hp.com>.
5 
6 This file is part of GCC.
7 
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
11 any later version.
12 
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 GNU General Public License for more details.
17 
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3.  If not see
20 <http://www.gnu.org/licenses/>.  */
21 
22 #define IN_TARGET_CODE 1
23 
24 #include "config.h"
25 #include "system.h"
26 #include "coretypes.h"
27 #include "backend.h"
28 #include "target.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "memmodel.h"
32 #include "cfghooks.h"
33 #include "df.h"
34 #include "tm_p.h"
35 #include "stringpool.h"
36 #include "attribs.h"
37 #include "optabs.h"
38 #include "regs.h"
39 #include "emit-rtl.h"
40 #include "recog.h"
41 #include "diagnostic-core.h"
42 #include "alias.h"
43 #include "fold-const.h"
44 #include "stor-layout.h"
45 #include "calls.h"
46 #include "varasm.h"
47 #include "output.h"
48 #include "insn-attr.h"
49 #include "flags.h"
50 #include "explow.h"
51 #include "expr.h"
52 #include "cfgrtl.h"
53 #include "libfuncs.h"
54 #include "sched-int.h"
55 #include "common/common-target.h"
56 #include "langhooks.h"
57 #include "gimplify.h"
58 #include "intl.h"
59 #include "debug.h"
60 #include "dbgcnt.h"
61 #include "tm-constrs.h"
62 #include "sel-sched.h"
63 #include "reload.h"
64 #include "opts.h"
65 #include "dumpfile.h"
66 #include "builtins.h"
67 
68 /* This file should be included last.  */
69 #include "target-def.h"
70 
71 /* This is used for communication between ASM_OUTPUT_LABEL and
72    ASM_OUTPUT_LABELREF.  */
73 int ia64_asm_output_label = 0;
74 
75 /* Register names for ia64_expand_prologue.  */
76 static const char * const ia64_reg_numbers[96] =
77 { "r32", "r33", "r34", "r35", "r36", "r37", "r38", "r39",
78   "r40", "r41", "r42", "r43", "r44", "r45", "r46", "r47",
79   "r48", "r49", "r50", "r51", "r52", "r53", "r54", "r55",
80   "r56", "r57", "r58", "r59", "r60", "r61", "r62", "r63",
81   "r64", "r65", "r66", "r67", "r68", "r69", "r70", "r71",
82   "r72", "r73", "r74", "r75", "r76", "r77", "r78", "r79",
83   "r80", "r81", "r82", "r83", "r84", "r85", "r86", "r87",
84   "r88", "r89", "r90", "r91", "r92", "r93", "r94", "r95",
85   "r96", "r97", "r98", "r99", "r100","r101","r102","r103",
86   "r104","r105","r106","r107","r108","r109","r110","r111",
87   "r112","r113","r114","r115","r116","r117","r118","r119",
88   "r120","r121","r122","r123","r124","r125","r126","r127"};
89 
90 /* ??? These strings could be shared with REGISTER_NAMES.  */
91 static const char * const ia64_input_reg_names[8] =
92 { "in0",  "in1",  "in2",  "in3",  "in4",  "in5",  "in6",  "in7" };
93 
94 /* ??? These strings could be shared with REGISTER_NAMES.  */
95 static const char * const ia64_local_reg_names[80] =
96 { "loc0", "loc1", "loc2", "loc3", "loc4", "loc5", "loc6", "loc7",
97   "loc8", "loc9", "loc10","loc11","loc12","loc13","loc14","loc15",
98   "loc16","loc17","loc18","loc19","loc20","loc21","loc22","loc23",
99   "loc24","loc25","loc26","loc27","loc28","loc29","loc30","loc31",
100   "loc32","loc33","loc34","loc35","loc36","loc37","loc38","loc39",
101   "loc40","loc41","loc42","loc43","loc44","loc45","loc46","loc47",
102   "loc48","loc49","loc50","loc51","loc52","loc53","loc54","loc55",
103   "loc56","loc57","loc58","loc59","loc60","loc61","loc62","loc63",
104   "loc64","loc65","loc66","loc67","loc68","loc69","loc70","loc71",
105   "loc72","loc73","loc74","loc75","loc76","loc77","loc78","loc79" };
106 
107 /* ??? These strings could be shared with REGISTER_NAMES.  */
108 static const char * const ia64_output_reg_names[8] =
109 { "out0", "out1", "out2", "out3", "out4", "out5", "out6", "out7" };
110 
111 /* Variables which are this size or smaller are put in the sdata/sbss
112    sections.  */
113 
114 unsigned int ia64_section_threshold;
115 
116 /* The following variable is used by the DFA insn scheduler.  The value is
117    TRUE if we do insn bundling instead of insn scheduling.  */
118 int bundling_p = 0;
119 
120 enum ia64_frame_regs
121 {
122    reg_fp,
123    reg_save_b0,
124    reg_save_pr,
125    reg_save_ar_pfs,
126    reg_save_ar_unat,
127    reg_save_ar_lc,
128    reg_save_gp,
129    number_of_ia64_frame_regs
130 };
131 
132 /* Structure to be filled in by ia64_compute_frame_size with register
133    save masks and offsets for the current function.  */
134 
135 struct ia64_frame_info
136 {
137   HOST_WIDE_INT total_size;	/* size of the stack frame, not including
138 				   the caller's scratch area.  */
139   HOST_WIDE_INT spill_cfa_off;	/* top of the reg spill area from the cfa.  */
140   HOST_WIDE_INT spill_size;	/* size of the gr/br/fr spill area.  */
141   HOST_WIDE_INT extra_spill_size;  /* size of spill area for others.  */
142   HARD_REG_SET mask;		/* mask of saved registers.  */
143   unsigned int gr_used_mask;	/* mask of registers in use as gr spill
144 				   registers or long-term scratches.  */
145   int n_spilled;		/* number of spilled registers.  */
146   int r[number_of_ia64_frame_regs];  /* Frame related registers.  */
147   int n_input_regs;		/* number of input registers used.  */
148   int n_local_regs;		/* number of local registers used.  */
149   int n_output_regs;		/* number of output registers used.  */
150   int n_rotate_regs;		/* number of rotating registers used.  */
151 
152   char need_regstk;		/* true if a .regstk directive needed.  */
153   char initialized;		/* true if the data is finalized.  */
154 };
155 
156 /* Current frame information calculated by ia64_compute_frame_size.  */
157 static struct ia64_frame_info current_frame_info;
158 /* The actual registers that are emitted.  */
159 static int emitted_frame_related_regs[number_of_ia64_frame_regs];
160 
161 static int ia64_first_cycle_multipass_dfa_lookahead (void);
162 static void ia64_dependencies_evaluation_hook (rtx_insn *, rtx_insn *);
163 static void ia64_init_dfa_pre_cycle_insn (void);
164 static rtx ia64_dfa_pre_cycle_insn (void);
165 static int ia64_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *, int);
166 static int ia64_dfa_new_cycle (FILE *, int, rtx_insn *, int, int, int *);
167 static void ia64_h_i_d_extended (void);
168 static void * ia64_alloc_sched_context (void);
169 static void ia64_init_sched_context (void *, bool);
170 static void ia64_set_sched_context (void *);
171 static void ia64_clear_sched_context (void *);
172 static void ia64_free_sched_context (void *);
173 static int ia64_mode_to_int (machine_mode);
174 static void ia64_set_sched_flags (spec_info_t);
175 static ds_t ia64_get_insn_spec_ds (rtx_insn *);
176 static ds_t ia64_get_insn_checked_ds (rtx_insn *);
177 static bool ia64_skip_rtx_p (const_rtx);
178 static int ia64_speculate_insn (rtx_insn *, ds_t, rtx *);
179 static bool ia64_needs_block_p (ds_t);
180 static rtx ia64_gen_spec_check (rtx_insn *, rtx_insn *, ds_t);
181 static int ia64_spec_check_p (rtx);
182 static int ia64_spec_check_src_p (rtx);
183 static rtx gen_tls_get_addr (void);
184 static rtx gen_thread_pointer (void);
185 static int find_gr_spill (enum ia64_frame_regs, int);
186 static int next_scratch_gr_reg (void);
187 static void mark_reg_gr_used_mask (rtx, void *);
188 static void ia64_compute_frame_size (HOST_WIDE_INT);
189 static void setup_spill_pointers (int, rtx, HOST_WIDE_INT);
190 static void finish_spill_pointers (void);
191 static rtx spill_restore_mem (rtx, HOST_WIDE_INT);
192 static void do_spill (rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT, rtx);
193 static void do_restore (rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT);
194 static rtx gen_movdi_x (rtx, rtx, rtx);
195 static rtx gen_fr_spill_x (rtx, rtx, rtx);
196 static rtx gen_fr_restore_x (rtx, rtx, rtx);
197 
198 static void ia64_option_override (void);
199 static bool ia64_can_eliminate (const int, const int);
200 static machine_mode hfa_element_mode (const_tree, bool);
201 static void ia64_setup_incoming_varargs (cumulative_args_t,
202 					 const function_arg_info &,
203 					 int *, int);
204 static int ia64_arg_partial_bytes (cumulative_args_t,
205 				   const function_arg_info &);
206 static rtx ia64_function_arg (cumulative_args_t, const function_arg_info &);
207 static rtx ia64_function_incoming_arg (cumulative_args_t,
208 				       const function_arg_info &);
209 static void ia64_function_arg_advance (cumulative_args_t,
210 				       const function_arg_info &);
211 static pad_direction ia64_function_arg_padding (machine_mode, const_tree);
212 static unsigned int ia64_function_arg_boundary (machine_mode,
213 						const_tree);
214 static bool ia64_function_ok_for_sibcall (tree, tree);
215 static bool ia64_return_in_memory (const_tree, const_tree);
216 static rtx ia64_function_value (const_tree, const_tree, bool);
217 static rtx ia64_libcall_value (machine_mode, const_rtx);
218 static bool ia64_function_value_regno_p (const unsigned int);
219 static int ia64_register_move_cost (machine_mode, reg_class_t,
220                                     reg_class_t);
221 static int ia64_memory_move_cost (machine_mode mode, reg_class_t,
222 				  bool);
223 static bool ia64_rtx_costs (rtx, machine_mode, int, int, int *, bool);
224 static int ia64_unspec_may_trap_p (const_rtx, unsigned);
225 static void fix_range (const char *);
226 static struct machine_function * ia64_init_machine_status (void);
227 static void emit_insn_group_barriers (FILE *);
228 static void emit_all_insn_group_barriers (FILE *);
229 static void final_emit_insn_group_barriers (FILE *);
230 static void emit_predicate_relation_info (void);
231 static void ia64_reorg (void);
232 static bool ia64_in_small_data_p (const_tree);
233 static void process_epilogue (FILE *, rtx, bool, bool);
234 
235 static bool ia64_assemble_integer (rtx, unsigned int, int);
236 static void ia64_output_function_prologue (FILE *);
237 static void ia64_output_function_epilogue (FILE *);
238 static void ia64_output_function_end_prologue (FILE *);
239 
240 static void ia64_print_operand (FILE *, rtx, int);
241 static void ia64_print_operand_address (FILE *, machine_mode, rtx);
242 static bool ia64_print_operand_punct_valid_p (unsigned char code);
243 
244 static int ia64_issue_rate (void);
245 static int ia64_adjust_cost (rtx_insn *, int, rtx_insn *, int, dw_t);
246 static void ia64_sched_init (FILE *, int, int);
247 static void ia64_sched_init_global (FILE *, int, int);
248 static void ia64_sched_finish_global (FILE *, int);
249 static void ia64_sched_finish (FILE *, int);
250 static int ia64_dfa_sched_reorder (FILE *, int, rtx_insn **, int *, int, int);
251 static int ia64_sched_reorder (FILE *, int, rtx_insn **, int *, int);
252 static int ia64_sched_reorder2 (FILE *, int, rtx_insn **, int *, int);
253 static int ia64_variable_issue (FILE *, int, rtx_insn *, int);
254 
255 static void ia64_asm_unwind_emit (FILE *, rtx_insn *);
256 static void ia64_asm_emit_except_personality (rtx);
257 static void ia64_asm_init_sections (void);
258 
259 static enum unwind_info_type ia64_debug_unwind_info (void);
260 
261 static struct bundle_state *get_free_bundle_state (void);
262 static void free_bundle_state (struct bundle_state *);
263 static void initiate_bundle_states (void);
264 static void finish_bundle_states (void);
265 static int insert_bundle_state (struct bundle_state *);
266 static void initiate_bundle_state_table (void);
267 static void finish_bundle_state_table (void);
268 static int try_issue_nops (struct bundle_state *, int);
269 static int try_issue_insn (struct bundle_state *, rtx);
270 static void issue_nops_and_insn (struct bundle_state *, int, rtx_insn *,
271 				 int, int);
272 static int get_max_pos (state_t);
273 static int get_template (state_t, int);
274 
275 static rtx_insn *get_next_important_insn (rtx_insn *, rtx_insn *);
276 static bool important_for_bundling_p (rtx_insn *);
277 static bool unknown_for_bundling_p (rtx_insn *);
278 static void bundling (FILE *, int, rtx_insn *, rtx_insn *);
279 
280 static void ia64_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
281 				  HOST_WIDE_INT, tree);
282 static void ia64_file_start (void);
283 static void ia64_globalize_decl_name (FILE *, tree);
284 
285 static int ia64_hpux_reloc_rw_mask (void) ATTRIBUTE_UNUSED;
286 static int ia64_reloc_rw_mask (void) ATTRIBUTE_UNUSED;
287 static section *ia64_select_rtx_section (machine_mode, rtx,
288 					 unsigned HOST_WIDE_INT);
289 static void ia64_output_dwarf_dtprel (FILE *, int, rtx)
290      ATTRIBUTE_UNUSED;
291 static unsigned int ia64_section_type_flags (tree, const char *, int);
292 static void ia64_init_libfuncs (void)
293      ATTRIBUTE_UNUSED;
294 static void ia64_hpux_init_libfuncs (void)
295      ATTRIBUTE_UNUSED;
296 static void ia64_sysv4_init_libfuncs (void)
297      ATTRIBUTE_UNUSED;
298 static void ia64_vms_init_libfuncs (void)
299      ATTRIBUTE_UNUSED;
300 static void ia64_soft_fp_init_libfuncs (void)
301      ATTRIBUTE_UNUSED;
302 static bool ia64_vms_valid_pointer_mode (scalar_int_mode mode)
303      ATTRIBUTE_UNUSED;
304 static tree ia64_vms_common_object_attribute (tree *, tree, tree, int, bool *)
305      ATTRIBUTE_UNUSED;
306 
307 static bool ia64_attribute_takes_identifier_p (const_tree);
308 static tree ia64_handle_model_attribute (tree *, tree, tree, int, bool *);
309 static tree ia64_handle_version_id_attribute (tree *, tree, tree, int, bool *);
310 static void ia64_encode_section_info (tree, rtx, int);
311 static rtx ia64_struct_value_rtx (tree, int);
312 static tree ia64_gimplify_va_arg (tree, tree, gimple_seq *, gimple_seq *);
313 static bool ia64_scalar_mode_supported_p (scalar_mode mode);
314 static bool ia64_vector_mode_supported_p (machine_mode mode);
315 static bool ia64_legitimate_constant_p (machine_mode, rtx);
316 static bool ia64_legitimate_address_p (machine_mode, rtx, bool);
317 static bool ia64_cannot_force_const_mem (machine_mode, rtx);
318 static const char *ia64_mangle_type (const_tree);
319 static const char *ia64_invalid_conversion (const_tree, const_tree);
320 static const char *ia64_invalid_unary_op (int, const_tree);
321 static const char *ia64_invalid_binary_op (int, const_tree, const_tree);
322 static machine_mode ia64_c_mode_for_suffix (char);
323 static void ia64_trampoline_init (rtx, tree, rtx);
324 static void ia64_override_options_after_change (void);
325 static bool ia64_member_type_forces_blk (const_tree, machine_mode);
326 
327 static tree ia64_fold_builtin (tree, int, tree *, bool);
328 static tree ia64_builtin_decl (unsigned, bool);
329 
330 static reg_class_t ia64_preferred_reload_class (rtx, reg_class_t);
331 static fixed_size_mode ia64_get_reg_raw_mode (int regno);
332 static section * ia64_hpux_function_section (tree, enum node_frequency,
333 					     bool, bool);
334 
335 static bool ia64_vectorize_vec_perm_const (machine_mode, rtx, rtx, rtx,
336 					   const vec_perm_indices &);
337 
338 static unsigned int ia64_hard_regno_nregs (unsigned int, machine_mode);
339 static bool ia64_hard_regno_mode_ok (unsigned int, machine_mode);
340 static bool ia64_modes_tieable_p (machine_mode, machine_mode);
341 static bool ia64_can_change_mode_class (machine_mode, machine_mode,
342 					reg_class_t);
343 
344 #define MAX_VECT_LEN	8
345 
346 struct expand_vec_perm_d
347 {
348   rtx target, op0, op1;
349   unsigned char perm[MAX_VECT_LEN];
350   machine_mode vmode;
351   unsigned char nelt;
352   bool one_operand_p;
353   bool testing_p;
354 };
355 
356 static bool ia64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d);
357 
358 
359 /* Table of valid machine attributes.  */
360 static const struct attribute_spec ia64_attribute_table[] =
361 {
362   /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
363        affects_type_identity, handler, exclude } */
364   { "syscall_linkage", 0, 0, false, true,  true,  false, NULL, NULL },
365   { "model",	       1, 1, true, false, false,  false,
366     ia64_handle_model_attribute, NULL },
367 #if TARGET_ABI_OPEN_VMS
368   { "common_object",   1, 1, true, false, false, false,
369     ia64_vms_common_object_attribute, NULL },
370 #endif
371   { "version_id",      1, 1, true, false, false, false,
372     ia64_handle_version_id_attribute, NULL },
373   { NULL,	       0, 0, false, false, false, false, NULL, NULL }
374 };
375 
376 /* Initialize the GCC target structure.  */
377 #undef TARGET_ATTRIBUTE_TABLE
378 #define TARGET_ATTRIBUTE_TABLE ia64_attribute_table
379 
380 #undef TARGET_INIT_BUILTINS
381 #define TARGET_INIT_BUILTINS ia64_init_builtins
382 
383 #undef TARGET_FOLD_BUILTIN
384 #define TARGET_FOLD_BUILTIN ia64_fold_builtin
385 
386 #undef TARGET_EXPAND_BUILTIN
387 #define TARGET_EXPAND_BUILTIN ia64_expand_builtin
388 
389 #undef TARGET_BUILTIN_DECL
390 #define TARGET_BUILTIN_DECL ia64_builtin_decl
391 
392 #undef TARGET_ASM_BYTE_OP
393 #define TARGET_ASM_BYTE_OP "\tdata1\t"
394 #undef TARGET_ASM_ALIGNED_HI_OP
395 #define TARGET_ASM_ALIGNED_HI_OP "\tdata2\t"
396 #undef TARGET_ASM_ALIGNED_SI_OP
397 #define TARGET_ASM_ALIGNED_SI_OP "\tdata4\t"
398 #undef TARGET_ASM_ALIGNED_DI_OP
399 #define TARGET_ASM_ALIGNED_DI_OP "\tdata8\t"
400 #undef TARGET_ASM_UNALIGNED_HI_OP
401 #define TARGET_ASM_UNALIGNED_HI_OP "\tdata2.ua\t"
402 #undef TARGET_ASM_UNALIGNED_SI_OP
403 #define TARGET_ASM_UNALIGNED_SI_OP "\tdata4.ua\t"
404 #undef TARGET_ASM_UNALIGNED_DI_OP
405 #define TARGET_ASM_UNALIGNED_DI_OP "\tdata8.ua\t"
406 #undef TARGET_ASM_INTEGER
407 #define TARGET_ASM_INTEGER ia64_assemble_integer
408 
409 #undef TARGET_OPTION_OVERRIDE
410 #define TARGET_OPTION_OVERRIDE ia64_option_override
411 
412 #undef TARGET_ASM_FUNCTION_PROLOGUE
413 #define TARGET_ASM_FUNCTION_PROLOGUE ia64_output_function_prologue
414 #undef TARGET_ASM_FUNCTION_END_PROLOGUE
415 #define TARGET_ASM_FUNCTION_END_PROLOGUE ia64_output_function_end_prologue
416 #undef TARGET_ASM_FUNCTION_EPILOGUE
417 #define TARGET_ASM_FUNCTION_EPILOGUE ia64_output_function_epilogue
418 
419 #undef TARGET_PRINT_OPERAND
420 #define TARGET_PRINT_OPERAND ia64_print_operand
421 #undef TARGET_PRINT_OPERAND_ADDRESS
422 #define TARGET_PRINT_OPERAND_ADDRESS ia64_print_operand_address
423 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
424 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ia64_print_operand_punct_valid_p
425 
426 #undef TARGET_IN_SMALL_DATA_P
427 #define TARGET_IN_SMALL_DATA_P  ia64_in_small_data_p
428 
429 #undef TARGET_SCHED_ADJUST_COST
430 #define TARGET_SCHED_ADJUST_COST ia64_adjust_cost
431 #undef TARGET_SCHED_ISSUE_RATE
432 #define TARGET_SCHED_ISSUE_RATE ia64_issue_rate
433 #undef TARGET_SCHED_VARIABLE_ISSUE
434 #define TARGET_SCHED_VARIABLE_ISSUE ia64_variable_issue
435 #undef TARGET_SCHED_INIT
436 #define TARGET_SCHED_INIT ia64_sched_init
437 #undef TARGET_SCHED_FINISH
438 #define TARGET_SCHED_FINISH ia64_sched_finish
439 #undef TARGET_SCHED_INIT_GLOBAL
440 #define TARGET_SCHED_INIT_GLOBAL ia64_sched_init_global
441 #undef TARGET_SCHED_FINISH_GLOBAL
442 #define TARGET_SCHED_FINISH_GLOBAL ia64_sched_finish_global
443 #undef TARGET_SCHED_REORDER
444 #define TARGET_SCHED_REORDER ia64_sched_reorder
445 #undef TARGET_SCHED_REORDER2
446 #define TARGET_SCHED_REORDER2 ia64_sched_reorder2
447 
448 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
449 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK ia64_dependencies_evaluation_hook
450 
451 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
452 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD ia64_first_cycle_multipass_dfa_lookahead
453 
454 #undef TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN
455 #define TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN ia64_init_dfa_pre_cycle_insn
456 #undef TARGET_SCHED_DFA_PRE_CYCLE_INSN
457 #define TARGET_SCHED_DFA_PRE_CYCLE_INSN ia64_dfa_pre_cycle_insn
458 
459 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
460 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD\
461   ia64_first_cycle_multipass_dfa_lookahead_guard
462 
463 #undef TARGET_SCHED_DFA_NEW_CYCLE
464 #define TARGET_SCHED_DFA_NEW_CYCLE ia64_dfa_new_cycle
465 
466 #undef TARGET_SCHED_H_I_D_EXTENDED
467 #define TARGET_SCHED_H_I_D_EXTENDED ia64_h_i_d_extended
468 
469 #undef TARGET_SCHED_ALLOC_SCHED_CONTEXT
470 #define TARGET_SCHED_ALLOC_SCHED_CONTEXT ia64_alloc_sched_context
471 
472 #undef TARGET_SCHED_INIT_SCHED_CONTEXT
473 #define TARGET_SCHED_INIT_SCHED_CONTEXT ia64_init_sched_context
474 
475 #undef TARGET_SCHED_SET_SCHED_CONTEXT
476 #define TARGET_SCHED_SET_SCHED_CONTEXT ia64_set_sched_context
477 
478 #undef TARGET_SCHED_CLEAR_SCHED_CONTEXT
479 #define TARGET_SCHED_CLEAR_SCHED_CONTEXT ia64_clear_sched_context
480 
481 #undef TARGET_SCHED_FREE_SCHED_CONTEXT
482 #define TARGET_SCHED_FREE_SCHED_CONTEXT ia64_free_sched_context
483 
484 #undef TARGET_SCHED_SET_SCHED_FLAGS
485 #define TARGET_SCHED_SET_SCHED_FLAGS ia64_set_sched_flags
486 
487 #undef TARGET_SCHED_GET_INSN_SPEC_DS
488 #define TARGET_SCHED_GET_INSN_SPEC_DS ia64_get_insn_spec_ds
489 
490 #undef TARGET_SCHED_GET_INSN_CHECKED_DS
491 #define TARGET_SCHED_GET_INSN_CHECKED_DS ia64_get_insn_checked_ds
492 
493 #undef TARGET_SCHED_SPECULATE_INSN
494 #define TARGET_SCHED_SPECULATE_INSN ia64_speculate_insn
495 
496 #undef TARGET_SCHED_NEEDS_BLOCK_P
497 #define TARGET_SCHED_NEEDS_BLOCK_P ia64_needs_block_p
498 
499 #undef TARGET_SCHED_GEN_SPEC_CHECK
500 #define TARGET_SCHED_GEN_SPEC_CHECK ia64_gen_spec_check
501 
502 #undef TARGET_SCHED_SKIP_RTX_P
503 #define TARGET_SCHED_SKIP_RTX_P ia64_skip_rtx_p
504 
505 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
506 #define TARGET_FUNCTION_OK_FOR_SIBCALL ia64_function_ok_for_sibcall
507 #undef TARGET_ARG_PARTIAL_BYTES
508 #define TARGET_ARG_PARTIAL_BYTES ia64_arg_partial_bytes
509 #undef TARGET_FUNCTION_ARG
510 #define TARGET_FUNCTION_ARG ia64_function_arg
511 #undef TARGET_FUNCTION_INCOMING_ARG
512 #define TARGET_FUNCTION_INCOMING_ARG ia64_function_incoming_arg
513 #undef TARGET_FUNCTION_ARG_ADVANCE
514 #define TARGET_FUNCTION_ARG_ADVANCE ia64_function_arg_advance
515 #undef TARGET_FUNCTION_ARG_PADDING
516 #define TARGET_FUNCTION_ARG_PADDING ia64_function_arg_padding
517 #undef TARGET_FUNCTION_ARG_BOUNDARY
518 #define TARGET_FUNCTION_ARG_BOUNDARY ia64_function_arg_boundary
519 
520 #undef TARGET_ASM_OUTPUT_MI_THUNK
521 #define TARGET_ASM_OUTPUT_MI_THUNK ia64_output_mi_thunk
522 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
523 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
524 
525 #undef TARGET_ASM_FILE_START
526 #define TARGET_ASM_FILE_START ia64_file_start
527 
528 #undef TARGET_ASM_GLOBALIZE_DECL_NAME
529 #define TARGET_ASM_GLOBALIZE_DECL_NAME ia64_globalize_decl_name
530 
531 #undef TARGET_REGISTER_MOVE_COST
532 #define TARGET_REGISTER_MOVE_COST ia64_register_move_cost
533 #undef TARGET_MEMORY_MOVE_COST
534 #define TARGET_MEMORY_MOVE_COST ia64_memory_move_cost
535 #undef TARGET_RTX_COSTS
536 #define TARGET_RTX_COSTS ia64_rtx_costs
537 #undef TARGET_ADDRESS_COST
538 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
539 
540 #undef TARGET_UNSPEC_MAY_TRAP_P
541 #define TARGET_UNSPEC_MAY_TRAP_P ia64_unspec_may_trap_p
542 
543 #undef TARGET_MACHINE_DEPENDENT_REORG
544 #define TARGET_MACHINE_DEPENDENT_REORG ia64_reorg
545 
546 #undef TARGET_ENCODE_SECTION_INFO
547 #define TARGET_ENCODE_SECTION_INFO ia64_encode_section_info
548 
549 #undef  TARGET_SECTION_TYPE_FLAGS
550 #define TARGET_SECTION_TYPE_FLAGS  ia64_section_type_flags
551 
552 #ifdef HAVE_AS_TLS
553 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
554 #define TARGET_ASM_OUTPUT_DWARF_DTPREL ia64_output_dwarf_dtprel
555 #endif
556 
557 /* ??? Investigate.  */
558 #if 0
559 #undef TARGET_PROMOTE_PROTOTYPES
560 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
561 #endif
562 
563 #undef TARGET_FUNCTION_VALUE
564 #define TARGET_FUNCTION_VALUE ia64_function_value
565 #undef TARGET_LIBCALL_VALUE
566 #define TARGET_LIBCALL_VALUE ia64_libcall_value
567 #undef TARGET_FUNCTION_VALUE_REGNO_P
568 #define TARGET_FUNCTION_VALUE_REGNO_P ia64_function_value_regno_p
569 
570 #undef TARGET_STRUCT_VALUE_RTX
571 #define TARGET_STRUCT_VALUE_RTX ia64_struct_value_rtx
572 #undef TARGET_RETURN_IN_MEMORY
573 #define TARGET_RETURN_IN_MEMORY ia64_return_in_memory
574 #undef TARGET_SETUP_INCOMING_VARARGS
575 #define TARGET_SETUP_INCOMING_VARARGS ia64_setup_incoming_varargs
576 #undef TARGET_STRICT_ARGUMENT_NAMING
577 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
578 #undef TARGET_MUST_PASS_IN_STACK
579 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
580 #undef TARGET_GET_RAW_RESULT_MODE
581 #define TARGET_GET_RAW_RESULT_MODE ia64_get_reg_raw_mode
582 #undef TARGET_GET_RAW_ARG_MODE
583 #define TARGET_GET_RAW_ARG_MODE ia64_get_reg_raw_mode
584 
585 #undef TARGET_MEMBER_TYPE_FORCES_BLK
586 #define TARGET_MEMBER_TYPE_FORCES_BLK ia64_member_type_forces_blk
587 
588 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
589 #define TARGET_GIMPLIFY_VA_ARG_EXPR ia64_gimplify_va_arg
590 
591 #undef TARGET_ASM_UNWIND_EMIT
592 #define TARGET_ASM_UNWIND_EMIT ia64_asm_unwind_emit
593 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
594 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY  ia64_asm_emit_except_personality
595 #undef TARGET_ASM_INIT_SECTIONS
596 #define TARGET_ASM_INIT_SECTIONS  ia64_asm_init_sections
597 
598 #undef TARGET_DEBUG_UNWIND_INFO
599 #define TARGET_DEBUG_UNWIND_INFO  ia64_debug_unwind_info
600 
601 #undef TARGET_SCALAR_MODE_SUPPORTED_P
602 #define TARGET_SCALAR_MODE_SUPPORTED_P ia64_scalar_mode_supported_p
603 #undef TARGET_VECTOR_MODE_SUPPORTED_P
604 #define TARGET_VECTOR_MODE_SUPPORTED_P ia64_vector_mode_supported_p
605 
606 #undef TARGET_LEGITIMATE_CONSTANT_P
607 #define TARGET_LEGITIMATE_CONSTANT_P ia64_legitimate_constant_p
608 #undef TARGET_LEGITIMATE_ADDRESS_P
609 #define TARGET_LEGITIMATE_ADDRESS_P ia64_legitimate_address_p
610 
611 #undef TARGET_LRA_P
612 #define TARGET_LRA_P hook_bool_void_false
613 
614 #undef TARGET_CANNOT_FORCE_CONST_MEM
615 #define TARGET_CANNOT_FORCE_CONST_MEM ia64_cannot_force_const_mem
616 
617 #undef TARGET_MANGLE_TYPE
618 #define TARGET_MANGLE_TYPE ia64_mangle_type
619 
620 #undef TARGET_INVALID_CONVERSION
621 #define TARGET_INVALID_CONVERSION ia64_invalid_conversion
622 #undef TARGET_INVALID_UNARY_OP
623 #define TARGET_INVALID_UNARY_OP ia64_invalid_unary_op
624 #undef TARGET_INVALID_BINARY_OP
625 #define TARGET_INVALID_BINARY_OP ia64_invalid_binary_op
626 
627 #undef TARGET_C_MODE_FOR_SUFFIX
628 #define TARGET_C_MODE_FOR_SUFFIX ia64_c_mode_for_suffix
629 
630 #undef TARGET_CAN_ELIMINATE
631 #define TARGET_CAN_ELIMINATE ia64_can_eliminate
632 
633 #undef TARGET_TRAMPOLINE_INIT
634 #define TARGET_TRAMPOLINE_INIT ia64_trampoline_init
635 
636 #undef TARGET_CAN_USE_DOLOOP_P
637 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
638 #undef TARGET_INVALID_WITHIN_DOLOOP
639 #define TARGET_INVALID_WITHIN_DOLOOP hook_constcharptr_const_rtx_insn_null
640 
641 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
642 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE ia64_override_options_after_change
643 
644 #undef TARGET_PREFERRED_RELOAD_CLASS
645 #define TARGET_PREFERRED_RELOAD_CLASS ia64_preferred_reload_class
646 
647 #undef TARGET_DELAY_SCHED2
648 #define TARGET_DELAY_SCHED2 true
649 
650 /* Variable tracking should be run after all optimizations which
651    change order of insns.  It also needs a valid CFG.  */
652 #undef TARGET_DELAY_VARTRACK
653 #define TARGET_DELAY_VARTRACK true
654 
655 #undef TARGET_VECTORIZE_VEC_PERM_CONST
656 #define TARGET_VECTORIZE_VEC_PERM_CONST ia64_vectorize_vec_perm_const
657 
658 #undef TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P
659 #define TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P ia64_attribute_takes_identifier_p
660 
661 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
662 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 0
663 
664 #undef TARGET_HARD_REGNO_NREGS
665 #define TARGET_HARD_REGNO_NREGS ia64_hard_regno_nregs
666 #undef TARGET_HARD_REGNO_MODE_OK
667 #define TARGET_HARD_REGNO_MODE_OK ia64_hard_regno_mode_ok
668 
669 #undef TARGET_MODES_TIEABLE_P
670 #define TARGET_MODES_TIEABLE_P ia64_modes_tieable_p
671 
672 #undef TARGET_CAN_CHANGE_MODE_CLASS
673 #define TARGET_CAN_CHANGE_MODE_CLASS ia64_can_change_mode_class
674 
675 #undef TARGET_CONSTANT_ALIGNMENT
676 #define TARGET_CONSTANT_ALIGNMENT constant_alignment_word_strings
677 
678 struct gcc_target targetm = TARGET_INITIALIZER;
679 
680 /* Returns TRUE iff the target attribute indicated by ATTR_ID takes a plain
681    identifier as an argument, so the front end shouldn't look it up.  */
682 
683 static bool
ia64_attribute_takes_identifier_p(const_tree attr_id)684 ia64_attribute_takes_identifier_p (const_tree attr_id)
685 {
686   if (is_attribute_p ("model", attr_id))
687     return true;
688 #if TARGET_ABI_OPEN_VMS
689   if (is_attribute_p ("common_object", attr_id))
690     return true;
691 #endif
692   return false;
693 }
694 
695 typedef enum
696   {
697     ADDR_AREA_NORMAL,	/* normal address area */
698     ADDR_AREA_SMALL	/* addressable by "addl" (-2MB < addr < 2MB) */
699   }
700 ia64_addr_area;
701 
702 static GTY(()) tree small_ident1;
703 static GTY(()) tree small_ident2;
704 
705 static void
init_idents(void)706 init_idents (void)
707 {
708   if (small_ident1 == 0)
709     {
710       small_ident1 = get_identifier ("small");
711       small_ident2 = get_identifier ("__small__");
712     }
713 }
714 
715 /* Retrieve the address area that has been chosen for the given decl.  */
716 
717 static ia64_addr_area
ia64_get_addr_area(tree decl)718 ia64_get_addr_area (tree decl)
719 {
720   tree model_attr;
721 
722   model_attr = lookup_attribute ("model", DECL_ATTRIBUTES (decl));
723   if (model_attr)
724     {
725       tree id;
726 
727       init_idents ();
728       id = TREE_VALUE (TREE_VALUE (model_attr));
729       if (id == small_ident1 || id == small_ident2)
730 	return ADDR_AREA_SMALL;
731     }
732   return ADDR_AREA_NORMAL;
733 }
734 
735 static tree
ia64_handle_model_attribute(tree * node,tree name,tree args,int flags ATTRIBUTE_UNUSED,bool * no_add_attrs)736 ia64_handle_model_attribute (tree *node, tree name, tree args,
737 			     int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
738 {
739   ia64_addr_area addr_area = ADDR_AREA_NORMAL;
740   ia64_addr_area area;
741   tree arg, decl = *node;
742 
743   init_idents ();
744   arg = TREE_VALUE (args);
745   if (arg == small_ident1 || arg == small_ident2)
746     {
747       addr_area = ADDR_AREA_SMALL;
748     }
749   else
750     {
751       warning (OPT_Wattributes, "invalid argument of %qE attribute",
752 	       name);
753       *no_add_attrs = true;
754     }
755 
756   switch (TREE_CODE (decl))
757     {
758     case VAR_DECL:
759       if ((DECL_CONTEXT (decl) && TREE_CODE (DECL_CONTEXT (decl))
760 	   == FUNCTION_DECL)
761 	  && !TREE_STATIC (decl))
762 	{
763 	  error_at (DECL_SOURCE_LOCATION (decl),
764 		    "an address area attribute cannot be specified for "
765 		    "local variables");
766 	  *no_add_attrs = true;
767 	}
768       area = ia64_get_addr_area (decl);
769       if (area != ADDR_AREA_NORMAL && addr_area != area)
770 	{
771 	  error ("address area of %q+D conflicts with previous "
772 		 "declaration", decl);
773 	  *no_add_attrs = true;
774 	}
775       break;
776 
777     case FUNCTION_DECL:
778       error_at (DECL_SOURCE_LOCATION (decl),
779 		"address area attribute cannot be specified for "
780 		"functions");
781       *no_add_attrs = true;
782       break;
783 
784     default:
785       warning (OPT_Wattributes, "%qE attribute ignored",
786 	       name);
787       *no_add_attrs = true;
788       break;
789     }
790 
791   return NULL_TREE;
792 }
793 
794 /* Part of the low level implementation of DEC Ada pragma Common_Object which
795    enables the shared use of variables stored in overlaid linker areas
796    corresponding to the use of Fortran COMMON.  */
797 
798 static tree
ia64_vms_common_object_attribute(tree * node,tree name,tree args,int flags ATTRIBUTE_UNUSED,bool * no_add_attrs)799 ia64_vms_common_object_attribute (tree *node, tree name, tree args,
800 				  int flags ATTRIBUTE_UNUSED,
801 				  bool *no_add_attrs)
802 {
803     tree decl = *node;
804     tree id;
805 
806     gcc_assert (DECL_P (decl));
807 
808     DECL_COMMON (decl) = 1;
809     id = TREE_VALUE (args);
810     if (TREE_CODE (id) != IDENTIFIER_NODE && TREE_CODE (id) != STRING_CST)
811       {
812 	error ("%qE attribute requires a string constant argument", name);
813 	*no_add_attrs = true;
814 	return NULL_TREE;
815       }
816     return NULL_TREE;
817 }
818 
819 /* Part of the low level implementation of DEC Ada pragma Common_Object.  */
820 
821 void
ia64_vms_output_aligned_decl_common(FILE * file,tree decl,const char * name,unsigned HOST_WIDE_INT size,unsigned int align)822 ia64_vms_output_aligned_decl_common (FILE *file, tree decl, const char *name,
823 				     unsigned HOST_WIDE_INT size,
824 				     unsigned int align)
825 {
826   tree attr = DECL_ATTRIBUTES (decl);
827 
828   if (attr)
829     attr = lookup_attribute ("common_object", attr);
830   if (attr)
831     {
832       tree id = TREE_VALUE (TREE_VALUE (attr));
833       const char *name;
834 
835       if (TREE_CODE (id) == IDENTIFIER_NODE)
836         name = IDENTIFIER_POINTER (id);
837       else if (TREE_CODE (id) == STRING_CST)
838         name = TREE_STRING_POINTER (id);
839       else
840         abort ();
841 
842       fprintf (file, "\t.vms_common\t\"%s\",", name);
843     }
844   else
845     fprintf (file, "%s", COMMON_ASM_OP);
846 
847   /*  Code from elfos.h.  */
848   assemble_name (file, name);
849   fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED",%u",
850            size, align / BITS_PER_UNIT);
851 
852   fputc ('\n', file);
853 }
854 
855 static void
ia64_encode_addr_area(tree decl,rtx symbol)856 ia64_encode_addr_area (tree decl, rtx symbol)
857 {
858   int flags;
859 
860   flags = SYMBOL_REF_FLAGS (symbol);
861   switch (ia64_get_addr_area (decl))
862     {
863     case ADDR_AREA_NORMAL: break;
864     case ADDR_AREA_SMALL: flags |= SYMBOL_FLAG_SMALL_ADDR; break;
865     default: gcc_unreachable ();
866     }
867   SYMBOL_REF_FLAGS (symbol) = flags;
868 }
869 
870 static void
ia64_encode_section_info(tree decl,rtx rtl,int first)871 ia64_encode_section_info (tree decl, rtx rtl, int first)
872 {
873   default_encode_section_info (decl, rtl, first);
874 
875   /* Careful not to prod global register variables.  */
876   if (TREE_CODE (decl) == VAR_DECL
877       && GET_CODE (DECL_RTL (decl)) == MEM
878       && GET_CODE (XEXP (DECL_RTL (decl), 0)) == SYMBOL_REF
879       && (TREE_STATIC (decl) || DECL_EXTERNAL (decl)))
880     ia64_encode_addr_area (decl, XEXP (rtl, 0));
881 }
882 
883 /* Return 1 if the operands of a move are ok.  */
884 
885 int
ia64_move_ok(rtx dst,rtx src)886 ia64_move_ok (rtx dst, rtx src)
887 {
888   /* If we're under init_recog_no_volatile, we'll not be able to use
889      memory_operand.  So check the code directly and don't worry about
890      the validity of the underlying address, which should have been
891      checked elsewhere anyway.  */
892   if (GET_CODE (dst) != MEM)
893     return 1;
894   if (GET_CODE (src) == MEM)
895     return 0;
896   if (register_operand (src, VOIDmode))
897     return 1;
898 
899   /* Otherwise, this must be a constant, and that either 0 or 0.0 or 1.0.  */
900   if (INTEGRAL_MODE_P (GET_MODE (dst)))
901     return src == const0_rtx;
902   else
903     return satisfies_constraint_G (src);
904 }
905 
906 /* Return 1 if the operands are ok for a floating point load pair.  */
907 
908 int
ia64_load_pair_ok(rtx dst,rtx src)909 ia64_load_pair_ok (rtx dst, rtx src)
910 {
911   /* ??? There is a thinko in the implementation of the "x" constraint and the
912      FP_REGS class.  The constraint will also reject (reg f30:TI) so we must
913      also return false for it.  */
914   if (GET_CODE (dst) != REG
915       || !(FP_REGNO_P (REGNO (dst)) && FP_REGNO_P (REGNO (dst) + 1)))
916     return 0;
917   if (GET_CODE (src) != MEM || MEM_VOLATILE_P (src))
918     return 0;
919   switch (GET_CODE (XEXP (src, 0)))
920     {
921     case REG:
922     case POST_INC:
923       break;
924     case POST_DEC:
925       return 0;
926     case POST_MODIFY:
927       {
928 	rtx adjust = XEXP (XEXP (XEXP (src, 0), 1), 1);
929 
930 	if (GET_CODE (adjust) != CONST_INT
931 	    || INTVAL (adjust) != GET_MODE_SIZE (GET_MODE (src)))
932 	  return 0;
933       }
934       break;
935     default:
936       abort ();
937     }
938   return 1;
939 }
940 
941 int
addp4_optimize_ok(rtx op1,rtx op2)942 addp4_optimize_ok (rtx op1, rtx op2)
943 {
944   return (basereg_operand (op1, GET_MODE(op1)) !=
945 	  basereg_operand (op2, GET_MODE(op2)));
946 }
947 
948 /* Check if OP is a mask suitable for use with SHIFT in a dep.z instruction.
949    Return the length of the field, or <= 0 on failure.  */
950 
951 int
ia64_depz_field_mask(rtx rop,rtx rshift)952 ia64_depz_field_mask (rtx rop, rtx rshift)
953 {
954   unsigned HOST_WIDE_INT op = INTVAL (rop);
955   unsigned HOST_WIDE_INT shift = INTVAL (rshift);
956 
957   /* Get rid of the zero bits we're shifting in.  */
958   op >>= shift;
959 
960   /* We must now have a solid block of 1's at bit 0.  */
961   return exact_log2 (op + 1);
962 }
963 
964 /* Return the TLS model to use for ADDR.  */
965 
966 static enum tls_model
tls_symbolic_operand_type(rtx addr)967 tls_symbolic_operand_type (rtx addr)
968 {
969   enum tls_model tls_kind = TLS_MODEL_NONE;
970 
971   if (GET_CODE (addr) == CONST)
972     {
973       if (GET_CODE (XEXP (addr, 0)) == PLUS
974 	  && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF)
975         tls_kind = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (addr, 0), 0));
976     }
977   else if (GET_CODE (addr) == SYMBOL_REF)
978     tls_kind = SYMBOL_REF_TLS_MODEL (addr);
979 
980   return tls_kind;
981 }
982 
983 /* Returns true if REG (assumed to be a `reg' RTX) is valid for use
984    as a base register.  */
985 
986 static inline bool
ia64_reg_ok_for_base_p(const_rtx reg,bool strict)987 ia64_reg_ok_for_base_p (const_rtx reg, bool strict)
988 {
989   if (strict
990       && REGNO_OK_FOR_BASE_P (REGNO (reg)))
991     return true;
992   else if (!strict
993 	   && (GENERAL_REGNO_P (REGNO (reg))
994 	       || !HARD_REGISTER_P (reg)))
995     return true;
996   else
997     return false;
998 }
999 
1000 static bool
ia64_legitimate_address_reg(const_rtx reg,bool strict)1001 ia64_legitimate_address_reg (const_rtx reg, bool strict)
1002 {
1003   if ((REG_P (reg) && ia64_reg_ok_for_base_p (reg, strict))
1004       || (GET_CODE (reg) == SUBREG && REG_P (XEXP (reg, 0))
1005 	  && ia64_reg_ok_for_base_p (XEXP (reg, 0), strict)))
1006     return true;
1007 
1008   return false;
1009 }
1010 
1011 static bool
ia64_legitimate_address_disp(const_rtx reg,const_rtx disp,bool strict)1012 ia64_legitimate_address_disp (const_rtx reg, const_rtx disp, bool strict)
1013 {
1014   if (GET_CODE (disp) == PLUS
1015       && rtx_equal_p (reg, XEXP (disp, 0))
1016       && (ia64_legitimate_address_reg (XEXP (disp, 1), strict)
1017 	  || (CONST_INT_P (XEXP (disp, 1))
1018 	      && IN_RANGE (INTVAL (XEXP (disp, 1)), -256, 255))))
1019     return true;
1020 
1021   return false;
1022 }
1023 
1024 /* Implement TARGET_LEGITIMATE_ADDRESS_P.  */
1025 
1026 static bool
ia64_legitimate_address_p(machine_mode mode ATTRIBUTE_UNUSED,rtx x,bool strict)1027 ia64_legitimate_address_p (machine_mode mode ATTRIBUTE_UNUSED,
1028 			   rtx x, bool strict)
1029 {
1030   if (ia64_legitimate_address_reg (x, strict))
1031     return true;
1032   else if ((GET_CODE (x) == POST_INC || GET_CODE (x) == POST_DEC)
1033 	   && ia64_legitimate_address_reg (XEXP (x, 0), strict)
1034 	   && XEXP (x, 0) != arg_pointer_rtx)
1035     return true;
1036   else if (GET_CODE (x) == POST_MODIFY
1037 	   && ia64_legitimate_address_reg (XEXP (x, 0), strict)
1038 	   && XEXP (x, 0) != arg_pointer_rtx
1039 	   && ia64_legitimate_address_disp (XEXP (x, 0), XEXP (x, 1), strict))
1040     return true;
1041   else
1042     return false;
1043 }
1044 
1045 /* Return true if X is a constant that is valid for some immediate
1046    field in an instruction.  */
1047 
1048 static bool
ia64_legitimate_constant_p(machine_mode mode,rtx x)1049 ia64_legitimate_constant_p (machine_mode mode, rtx x)
1050 {
1051   switch (GET_CODE (x))
1052     {
1053     case CONST_INT:
1054     case LABEL_REF:
1055       return true;
1056 
1057     case CONST_DOUBLE:
1058       if (GET_MODE (x) == VOIDmode || mode == SFmode || mode == DFmode)
1059 	return true;
1060       return satisfies_constraint_G (x);
1061 
1062     case CONST:
1063     case SYMBOL_REF:
1064       /* ??? Short term workaround for PR 28490.  We must make the code here
1065 	 match the code in ia64_expand_move and move_operand, even though they
1066 	 are both technically wrong.  */
1067       if (tls_symbolic_operand_type (x) == 0)
1068 	{
1069 	  HOST_WIDE_INT addend = 0;
1070 	  rtx op = x;
1071 
1072 	  if (GET_CODE (op) == CONST
1073 	      && GET_CODE (XEXP (op, 0)) == PLUS
1074 	      && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
1075 	    {
1076 	      addend = INTVAL (XEXP (XEXP (op, 0), 1));
1077 	      op = XEXP (XEXP (op, 0), 0);
1078 	    }
1079 
1080           if (any_offset_symbol_operand (op, mode)
1081               || function_operand (op, mode))
1082             return true;
1083 	  if (aligned_offset_symbol_operand (op, mode))
1084 	    return (addend & 0x3fff) == 0;
1085 	  return false;
1086 	}
1087       return false;
1088 
1089     case CONST_VECTOR:
1090       if (mode == V2SFmode)
1091 	return satisfies_constraint_Y (x);
1092 
1093       return (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
1094 	      && GET_MODE_SIZE (mode) <= 8);
1095 
1096     default:
1097       return false;
1098     }
1099 }
1100 
1101 /* Don't allow TLS addresses to get spilled to memory.  */
1102 
1103 static bool
ia64_cannot_force_const_mem(machine_mode mode,rtx x)1104 ia64_cannot_force_const_mem (machine_mode mode, rtx x)
1105 {
1106   if (mode == RFmode)
1107     return true;
1108   return tls_symbolic_operand_type (x) != 0;
1109 }
1110 
1111 /* Expand a symbolic constant load.  */
1112 
1113 bool
ia64_expand_load_address(rtx dest,rtx src)1114 ia64_expand_load_address (rtx dest, rtx src)
1115 {
1116   gcc_assert (GET_CODE (dest) == REG);
1117 
1118   /* ILP32 mode still loads 64-bits of data from the GOT.  This avoids
1119      having to pointer-extend the value afterward.  Other forms of address
1120      computation below are also more natural to compute as 64-bit quantities.
1121      If we've been given an SImode destination register, change it.  */
1122   if (GET_MODE (dest) != Pmode)
1123     dest = gen_rtx_REG_offset (dest, Pmode, REGNO (dest),
1124 			       byte_lowpart_offset (Pmode, GET_MODE (dest)));
1125 
1126   if (TARGET_NO_PIC)
1127     return false;
1128   if (small_addr_symbolic_operand (src, VOIDmode))
1129     return false;
1130 
1131   if (TARGET_AUTO_PIC)
1132     emit_insn (gen_load_gprel64 (dest, src));
1133   else if (GET_CODE (src) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (src))
1134     emit_insn (gen_load_fptr (dest, src));
1135   else if (sdata_symbolic_operand (src, VOIDmode))
1136     emit_insn (gen_load_gprel (dest, src));
1137   else if (local_symbolic_operand64 (src, VOIDmode))
1138     {
1139       /* We want to use @gprel rather than @ltoff relocations for local
1140 	 symbols:
1141 	  - @gprel does not require dynamic linker
1142 	  - and does not use .sdata section
1143 	 https://gcc.gnu.org/bugzilla/60465 */
1144       emit_insn (gen_load_gprel64 (dest, src));
1145     }
1146   else
1147     {
1148       HOST_WIDE_INT addend = 0;
1149       rtx tmp;
1150 
1151       /* We did split constant offsets in ia64_expand_move, and we did try
1152 	 to keep them split in move_operand, but we also allowed reload to
1153 	 rematerialize arbitrary constants rather than spill the value to
1154 	 the stack and reload it.  So we have to be prepared here to split
1155 	 them apart again.  */
1156       if (GET_CODE (src) == CONST)
1157 	{
1158 	  HOST_WIDE_INT hi, lo;
1159 
1160 	  hi = INTVAL (XEXP (XEXP (src, 0), 1));
1161 	  lo = ((hi & 0x3fff) ^ 0x2000) - 0x2000;
1162 	  hi = hi - lo;
1163 
1164 	  if (lo != 0)
1165 	    {
1166 	      addend = lo;
1167 	      src = plus_constant (Pmode, XEXP (XEXP (src, 0), 0), hi);
1168 	    }
1169 	}
1170 
1171       tmp = gen_rtx_HIGH (Pmode, src);
1172       tmp = gen_rtx_PLUS (Pmode, tmp, pic_offset_table_rtx);
1173       emit_insn (gen_rtx_SET (dest, tmp));
1174 
1175       tmp = gen_rtx_LO_SUM (Pmode, gen_const_mem (Pmode, dest), src);
1176       emit_insn (gen_rtx_SET (dest, tmp));
1177 
1178       if (addend)
1179 	{
1180 	  tmp = gen_rtx_PLUS (Pmode, dest, GEN_INT (addend));
1181 	  emit_insn (gen_rtx_SET (dest, tmp));
1182 	}
1183     }
1184 
1185   return true;
1186 }
1187 
1188 static GTY(()) rtx gen_tls_tga;
1189 static rtx
gen_tls_get_addr(void)1190 gen_tls_get_addr (void)
1191 {
1192   if (!gen_tls_tga)
1193     gen_tls_tga = init_one_libfunc ("__tls_get_addr");
1194   return gen_tls_tga;
1195 }
1196 
1197 static GTY(()) rtx thread_pointer_rtx;
1198 static rtx
gen_thread_pointer(void)1199 gen_thread_pointer (void)
1200 {
1201   if (!thread_pointer_rtx)
1202     thread_pointer_rtx = gen_rtx_REG (Pmode, 13);
1203   return thread_pointer_rtx;
1204 }
1205 
1206 static rtx
ia64_expand_tls_address(enum tls_model tls_kind,rtx op0,rtx op1,rtx orig_op1,HOST_WIDE_INT addend)1207 ia64_expand_tls_address (enum tls_model tls_kind, rtx op0, rtx op1,
1208 			 rtx orig_op1, HOST_WIDE_INT addend)
1209 {
1210   rtx tga_op1, tga_op2, tga_ret, tga_eqv, tmp;
1211   rtx_insn *insns;
1212   rtx orig_op0 = op0;
1213   HOST_WIDE_INT addend_lo, addend_hi;
1214 
1215   switch (tls_kind)
1216     {
1217     case TLS_MODEL_GLOBAL_DYNAMIC:
1218       start_sequence ();
1219 
1220       tga_op1 = gen_reg_rtx (Pmode);
1221       emit_insn (gen_load_dtpmod (tga_op1, op1));
1222 
1223       tga_op2 = gen_reg_rtx (Pmode);
1224       emit_insn (gen_load_dtprel (tga_op2, op1));
1225 
1226       tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX,
1227 					 LCT_CONST, Pmode,
1228 					 tga_op1, Pmode, tga_op2, Pmode);
1229 
1230       insns = get_insns ();
1231       end_sequence ();
1232 
1233       if (GET_MODE (op0) != Pmode)
1234 	op0 = tga_ret;
1235       emit_libcall_block (insns, op0, tga_ret, op1);
1236       break;
1237 
1238     case TLS_MODEL_LOCAL_DYNAMIC:
1239       /* ??? This isn't the completely proper way to do local-dynamic
1240 	 If the call to __tls_get_addr is used only by a single symbol,
1241 	 then we should (somehow) move the dtprel to the second arg
1242 	 to avoid the extra add.  */
1243       start_sequence ();
1244 
1245       tga_op1 = gen_reg_rtx (Pmode);
1246       emit_insn (gen_load_dtpmod (tga_op1, op1));
1247 
1248       tga_op2 = const0_rtx;
1249 
1250       tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX,
1251 					 LCT_CONST, Pmode,
1252 					 tga_op1, Pmode, tga_op2, Pmode);
1253 
1254       insns = get_insns ();
1255       end_sequence ();
1256 
1257       tga_eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
1258 				UNSPEC_LD_BASE);
1259       tmp = gen_reg_rtx (Pmode);
1260       emit_libcall_block (insns, tmp, tga_ret, tga_eqv);
1261 
1262       if (!register_operand (op0, Pmode))
1263 	op0 = gen_reg_rtx (Pmode);
1264       if (TARGET_TLS64)
1265 	{
1266 	  emit_insn (gen_load_dtprel (op0, op1));
1267 	  emit_insn (gen_adddi3 (op0, tmp, op0));
1268 	}
1269       else
1270 	emit_insn (gen_add_dtprel (op0, op1, tmp));
1271       break;
1272 
1273     case TLS_MODEL_INITIAL_EXEC:
1274       addend_lo = ((addend & 0x3fff) ^ 0x2000) - 0x2000;
1275       addend_hi = addend - addend_lo;
1276 
1277       op1 = plus_constant (Pmode, op1, addend_hi);
1278       addend = addend_lo;
1279 
1280       tmp = gen_reg_rtx (Pmode);
1281       emit_insn (gen_load_tprel (tmp, op1));
1282 
1283       if (!register_operand (op0, Pmode))
1284 	op0 = gen_reg_rtx (Pmode);
1285       emit_insn (gen_adddi3 (op0, tmp, gen_thread_pointer ()));
1286       break;
1287 
1288     case TLS_MODEL_LOCAL_EXEC:
1289       if (!register_operand (op0, Pmode))
1290 	op0 = gen_reg_rtx (Pmode);
1291 
1292       op1 = orig_op1;
1293       addend = 0;
1294       if (TARGET_TLS64)
1295 	{
1296 	  emit_insn (gen_load_tprel (op0, op1));
1297 	  emit_insn (gen_adddi3 (op0, op0, gen_thread_pointer ()));
1298 	}
1299       else
1300 	emit_insn (gen_add_tprel (op0, op1, gen_thread_pointer ()));
1301       break;
1302 
1303     default:
1304       gcc_unreachable ();
1305     }
1306 
1307   if (addend)
1308     op0 = expand_simple_binop (Pmode, PLUS, op0, GEN_INT (addend),
1309 			       orig_op0, 1, OPTAB_DIRECT);
1310   if (orig_op0 == op0)
1311     return NULL_RTX;
1312   if (GET_MODE (orig_op0) == Pmode)
1313     return op0;
1314   return gen_lowpart (GET_MODE (orig_op0), op0);
1315 }
1316 
1317 rtx
ia64_expand_move(rtx op0,rtx op1)1318 ia64_expand_move (rtx op0, rtx op1)
1319 {
1320   machine_mode mode = GET_MODE (op0);
1321 
1322   if (!reload_in_progress && !reload_completed && !ia64_move_ok (op0, op1))
1323     op1 = force_reg (mode, op1);
1324 
1325   if ((mode == Pmode || mode == ptr_mode) && symbolic_operand (op1, VOIDmode))
1326     {
1327       HOST_WIDE_INT addend = 0;
1328       enum tls_model tls_kind;
1329       rtx sym = op1;
1330 
1331       if (GET_CODE (op1) == CONST
1332 	  && GET_CODE (XEXP (op1, 0)) == PLUS
1333 	  && GET_CODE (XEXP (XEXP (op1, 0), 1)) == CONST_INT)
1334 	{
1335 	  addend = INTVAL (XEXP (XEXP (op1, 0), 1));
1336 	  sym = XEXP (XEXP (op1, 0), 0);
1337 	}
1338 
1339       tls_kind = tls_symbolic_operand_type (sym);
1340       if (tls_kind)
1341 	return ia64_expand_tls_address (tls_kind, op0, sym, op1, addend);
1342 
1343       if (any_offset_symbol_operand (sym, mode))
1344 	addend = 0;
1345       else if (aligned_offset_symbol_operand (sym, mode))
1346 	{
1347 	  HOST_WIDE_INT addend_lo, addend_hi;
1348 
1349 	  addend_lo = ((addend & 0x3fff) ^ 0x2000) - 0x2000;
1350 	  addend_hi = addend - addend_lo;
1351 
1352 	  if (addend_lo != 0)
1353 	    {
1354 	      op1 = plus_constant (mode, sym, addend_hi);
1355 	      addend = addend_lo;
1356 	    }
1357 	  else
1358 	    addend = 0;
1359 	}
1360       else
1361 	op1 = sym;
1362 
1363       if (reload_completed)
1364 	{
1365 	  /* We really should have taken care of this offset earlier.  */
1366 	  gcc_assert (addend == 0);
1367 	  if (ia64_expand_load_address (op0, op1))
1368 	    return NULL_RTX;
1369 	}
1370 
1371       if (addend)
1372 	{
1373 	  rtx subtarget = !can_create_pseudo_p () ? op0 : gen_reg_rtx (mode);
1374 
1375 	  emit_insn (gen_rtx_SET (subtarget, op1));
1376 
1377 	  op1 = expand_simple_binop (mode, PLUS, subtarget,
1378 				     GEN_INT (addend), op0, 1, OPTAB_DIRECT);
1379 	  if (op0 == op1)
1380 	    return NULL_RTX;
1381 	}
1382     }
1383 
1384   return op1;
1385 }
1386 
1387 /* Split a move from OP1 to OP0 conditional on COND.  */
1388 
1389 void
ia64_emit_cond_move(rtx op0,rtx op1,rtx cond)1390 ia64_emit_cond_move (rtx op0, rtx op1, rtx cond)
1391 {
1392   rtx_insn *insn, *first = get_last_insn ();
1393 
1394   emit_move_insn (op0, op1);
1395 
1396   for (insn = get_last_insn (); insn != first; insn = PREV_INSN (insn))
1397     if (INSN_P (insn))
1398       PATTERN (insn) = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond),
1399 					  PATTERN (insn));
1400 }
1401 
1402 /* Split a post-reload TImode or TFmode reference into two DImode
1403    components.  This is made extra difficult by the fact that we do
1404    not get any scratch registers to work with, because reload cannot
1405    be prevented from giving us a scratch that overlaps the register
1406    pair involved.  So instead, when addressing memory, we tweak the
1407    pointer register up and back down with POST_INCs.  Or up and not
1408    back down when we can get away with it.
1409 
1410    REVERSED is true when the loads must be done in reversed order
1411    (high word first) for correctness.  DEAD is true when the pointer
1412    dies with the second insn we generate and therefore the second
1413    address must not carry a postmodify.
1414 
1415    May return an insn which is to be emitted after the moves.  */
1416 
1417 static rtx
ia64_split_tmode(rtx out[2],rtx in,bool reversed,bool dead)1418 ia64_split_tmode (rtx out[2], rtx in, bool reversed, bool dead)
1419 {
1420   rtx fixup = 0;
1421 
1422   switch (GET_CODE (in))
1423     {
1424     case REG:
1425       out[reversed] = gen_rtx_REG (DImode, REGNO (in));
1426       out[!reversed] = gen_rtx_REG (DImode, REGNO (in) + 1);
1427       break;
1428 
1429     case CONST_INT:
1430     case CONST_DOUBLE:
1431       /* Cannot occur reversed.  */
1432       gcc_assert (!reversed);
1433 
1434       if (GET_MODE (in) != TFmode)
1435 	split_double (in, &out[0], &out[1]);
1436       else
1437 	/* split_double does not understand how to split a TFmode
1438 	   quantity into a pair of DImode constants.  */
1439 	{
1440 	  unsigned HOST_WIDE_INT p[2];
1441 	  long l[4];  /* TFmode is 128 bits */
1442 
1443 	  real_to_target (l, CONST_DOUBLE_REAL_VALUE (in), TFmode);
1444 
1445 	  if (FLOAT_WORDS_BIG_ENDIAN)
1446 	    {
1447 	      p[0] = (((unsigned HOST_WIDE_INT) l[0]) << 32) + l[1];
1448 	      p[1] = (((unsigned HOST_WIDE_INT) l[2]) << 32) + l[3];
1449 	    }
1450 	  else
1451 	    {
1452 	      p[0] = (((unsigned HOST_WIDE_INT) l[1]) << 32) + l[0];
1453 	      p[1] = (((unsigned HOST_WIDE_INT) l[3]) << 32) + l[2];
1454 	    }
1455 	  out[0] = GEN_INT (p[0]);
1456 	  out[1] = GEN_INT (p[1]);
1457 	}
1458       break;
1459 
1460     case MEM:
1461       {
1462 	rtx base = XEXP (in, 0);
1463 	rtx offset;
1464 
1465 	switch (GET_CODE (base))
1466 	  {
1467 	  case REG:
1468 	    if (!reversed)
1469 	      {
1470 		out[0] = adjust_automodify_address
1471 		  (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
1472 		out[1] = adjust_automodify_address
1473 		  (in, DImode, dead ? 0 : gen_rtx_POST_DEC (Pmode, base), 8);
1474 	      }
1475 	    else
1476 	      {
1477 		/* Reversal requires a pre-increment, which can only
1478 		   be done as a separate insn.  */
1479 		emit_insn (gen_adddi3 (base, base, GEN_INT (8)));
1480 		out[0] = adjust_automodify_address
1481 		  (in, DImode, gen_rtx_POST_DEC (Pmode, base), 8);
1482 		out[1] = adjust_address (in, DImode, 0);
1483 	      }
1484 	    break;
1485 
1486 	  case POST_INC:
1487 	    gcc_assert (!reversed && !dead);
1488 
1489 	    /* Just do the increment in two steps.  */
1490 	    out[0] = adjust_automodify_address (in, DImode, 0, 0);
1491 	    out[1] = adjust_automodify_address (in, DImode, 0, 8);
1492 	    break;
1493 
1494 	  case POST_DEC:
1495 	    gcc_assert (!reversed && !dead);
1496 
1497 	    /* Add 8, subtract 24.  */
1498 	    base = XEXP (base, 0);
1499 	    out[0] = adjust_automodify_address
1500 	      (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
1501 	    out[1] = adjust_automodify_address
1502 	      (in, DImode,
1503 	       gen_rtx_POST_MODIFY (Pmode, base,
1504 				    plus_constant (Pmode, base, -24)),
1505 	       8);
1506 	    break;
1507 
1508 	  case POST_MODIFY:
1509 	    gcc_assert (!reversed && !dead);
1510 
1511 	    /* Extract and adjust the modification.  This case is
1512 	       trickier than the others, because we might have an
1513 	       index register, or we might have a combined offset that
1514 	       doesn't fit a signed 9-bit displacement field.  We can
1515 	       assume the incoming expression is already legitimate.  */
1516 	    offset = XEXP (base, 1);
1517 	    base = XEXP (base, 0);
1518 
1519 	    out[0] = adjust_automodify_address
1520 	      (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
1521 
1522 	    if (GET_CODE (XEXP (offset, 1)) == REG)
1523 	      {
1524 		/* Can't adjust the postmodify to match.  Emit the
1525 		   original, then a separate addition insn.  */
1526 		out[1] = adjust_automodify_address (in, DImode, 0, 8);
1527 		fixup = gen_adddi3 (base, base, GEN_INT (-8));
1528 	      }
1529 	    else
1530 	      {
1531 		gcc_assert (GET_CODE (XEXP (offset, 1)) == CONST_INT);
1532 		if (INTVAL (XEXP (offset, 1)) < -256 + 8)
1533 		  {
1534 		    /* Again the postmodify cannot be made to match,
1535 		       but in this case it's more efficient to get rid
1536 		       of the postmodify entirely and fix up with an
1537 		       add insn.  */
1538 		    out[1] = adjust_automodify_address (in, DImode, base, 8);
1539 		    fixup = gen_adddi3
1540 		      (base, base, GEN_INT (INTVAL (XEXP (offset, 1)) - 8));
1541 		  }
1542 		else
1543 		  {
1544 		    /* Combined offset still fits in the displacement field.
1545 		       (We cannot overflow it at the high end.)  */
1546 		    out[1] = adjust_automodify_address
1547 		      (in, DImode, gen_rtx_POST_MODIFY
1548 		       (Pmode, base, gen_rtx_PLUS
1549 			(Pmode, base,
1550 			 GEN_INT (INTVAL (XEXP (offset, 1)) - 8))),
1551 		       8);
1552 		  }
1553 	      }
1554 	    break;
1555 
1556 	  default:
1557 	    gcc_unreachable ();
1558 	  }
1559 	break;
1560       }
1561 
1562     default:
1563       gcc_unreachable ();
1564     }
1565 
1566   return fixup;
1567 }
1568 
1569 /* Split a TImode or TFmode move instruction after reload.
1570    This is used by *movtf_internal and *movti_internal.  */
1571 void
ia64_split_tmode_move(rtx operands[])1572 ia64_split_tmode_move (rtx operands[])
1573 {
1574   rtx in[2], out[2], insn;
1575   rtx fixup[2];
1576   bool dead = false;
1577   bool reversed = false;
1578 
1579   /* It is possible for reload to decide to overwrite a pointer with
1580      the value it points to.  In that case we have to do the loads in
1581      the appropriate order so that the pointer is not destroyed too
1582      early.  Also we must not generate a postmodify for that second
1583      load, or rws_access_regno will die.  And we must not generate a
1584      postmodify for the second load if the destination register
1585      overlaps with the base register.  */
1586   if (GET_CODE (operands[1]) == MEM
1587       && reg_overlap_mentioned_p (operands[0], operands[1]))
1588     {
1589       rtx base = XEXP (operands[1], 0);
1590       while (GET_CODE (base) != REG)
1591 	base = XEXP (base, 0);
1592 
1593       if (REGNO (base) == REGNO (operands[0]))
1594 	reversed = true;
1595 
1596       if (refers_to_regno_p (REGNO (operands[0]),
1597 			     REGNO (operands[0])+2,
1598 			     base, 0))
1599 	dead = true;
1600     }
1601   /* Another reason to do the moves in reversed order is if the first
1602      element of the target register pair is also the second element of
1603      the source register pair.  */
1604   if (GET_CODE (operands[0]) == REG && GET_CODE (operands[1]) == REG
1605       && REGNO (operands[0]) == REGNO (operands[1]) + 1)
1606     reversed = true;
1607 
1608   fixup[0] = ia64_split_tmode (in, operands[1], reversed, dead);
1609   fixup[1] = ia64_split_tmode (out, operands[0], reversed, dead);
1610 
1611 #define MAYBE_ADD_REG_INC_NOTE(INSN, EXP)				\
1612   if (GET_CODE (EXP) == MEM						\
1613       && (GET_CODE (XEXP (EXP, 0)) == POST_MODIFY			\
1614 	  || GET_CODE (XEXP (EXP, 0)) == POST_INC			\
1615 	  || GET_CODE (XEXP (EXP, 0)) == POST_DEC))			\
1616     add_reg_note (insn, REG_INC, XEXP (XEXP (EXP, 0), 0))
1617 
1618   insn = emit_insn (gen_rtx_SET (out[0], in[0]));
1619   MAYBE_ADD_REG_INC_NOTE (insn, in[0]);
1620   MAYBE_ADD_REG_INC_NOTE (insn, out[0]);
1621 
1622   insn = emit_insn (gen_rtx_SET (out[1], in[1]));
1623   MAYBE_ADD_REG_INC_NOTE (insn, in[1]);
1624   MAYBE_ADD_REG_INC_NOTE (insn, out[1]);
1625 
1626   if (fixup[0])
1627     emit_insn (fixup[0]);
1628   if (fixup[1])
1629     emit_insn (fixup[1]);
1630 
1631 #undef MAYBE_ADD_REG_INC_NOTE
1632 }
1633 
1634 /* ??? Fixing GR->FR XFmode moves during reload is hard.  You need to go
1635    through memory plus an extra GR scratch register.  Except that you can
1636    either get the first from TARGET_SECONDARY_MEMORY_NEEDED or the second
1637    from SECONDARY_RELOAD_CLASS, but not both.
1638 
1639    We got into problems in the first place by allowing a construct like
1640    (subreg:XF (reg:TI)), which we got from a union containing a long double.
1641    This solution attempts to prevent this situation from occurring.  When
1642    we see something like the above, we spill the inner register to memory.  */
1643 
1644 static rtx
spill_xfmode_rfmode_operand(rtx in,int force,machine_mode mode)1645 spill_xfmode_rfmode_operand (rtx in, int force, machine_mode mode)
1646 {
1647   if (GET_CODE (in) == SUBREG
1648       && GET_MODE (SUBREG_REG (in)) == TImode
1649       && GET_CODE (SUBREG_REG (in)) == REG)
1650     {
1651       rtx memt = assign_stack_temp (TImode, 16);
1652       emit_move_insn (memt, SUBREG_REG (in));
1653       return adjust_address (memt, mode, 0);
1654     }
1655   else if (force && GET_CODE (in) == REG)
1656     {
1657       rtx memx = assign_stack_temp (mode, 16);
1658       emit_move_insn (memx, in);
1659       return memx;
1660     }
1661   else
1662     return in;
1663 }
1664 
1665 /* Expand the movxf or movrf pattern (MODE says which) with the given
1666    OPERANDS, returning true if the pattern should then invoke
1667    DONE.  */
1668 
1669 bool
ia64_expand_movxf_movrf(machine_mode mode,rtx operands[])1670 ia64_expand_movxf_movrf (machine_mode mode, rtx operands[])
1671 {
1672   rtx op0 = operands[0];
1673 
1674   if (GET_CODE (op0) == SUBREG)
1675     op0 = SUBREG_REG (op0);
1676 
1677   /* We must support XFmode loads into general registers for stdarg/vararg,
1678      unprototyped calls, and a rare case where a long double is passed as
1679      an argument after a float HFA fills the FP registers.  We split them into
1680      DImode loads for convenience.  We also need to support XFmode stores
1681      for the last case.  This case does not happen for stdarg/vararg routines,
1682      because we do a block store to memory of unnamed arguments.  */
1683 
1684   if (GET_CODE (op0) == REG && GR_REGNO_P (REGNO (op0)))
1685     {
1686       rtx out[2];
1687 
1688       /* We're hoping to transform everything that deals with XFmode
1689 	 quantities and GR registers early in the compiler.  */
1690       gcc_assert (can_create_pseudo_p ());
1691 
1692       /* Struct to register can just use TImode instead.  */
1693       if ((GET_CODE (operands[1]) == SUBREG
1694 	   && GET_MODE (SUBREG_REG (operands[1])) == TImode)
1695 	  || (GET_CODE (operands[1]) == REG
1696 	      && GR_REGNO_P (REGNO (operands[1]))))
1697 	{
1698 	  rtx op1 = operands[1];
1699 
1700 	  if (GET_CODE (op1) == SUBREG)
1701 	    op1 = SUBREG_REG (op1);
1702 	  else
1703 	    op1 = gen_rtx_REG (TImode, REGNO (op1));
1704 
1705 	  emit_move_insn (gen_rtx_REG (TImode, REGNO (op0)), op1);
1706 	  return true;
1707 	}
1708 
1709       if (GET_CODE (operands[1]) == CONST_DOUBLE)
1710 	{
1711 	  /* Don't word-swap when reading in the constant.  */
1712 	  emit_move_insn (gen_rtx_REG (DImode, REGNO (op0)),
1713 			  operand_subword (operands[1], WORDS_BIG_ENDIAN,
1714 					   0, mode));
1715 	  emit_move_insn (gen_rtx_REG (DImode, REGNO (op0) + 1),
1716 			  operand_subword (operands[1], !WORDS_BIG_ENDIAN,
1717 					   0, mode));
1718 	  return true;
1719 	}
1720 
1721       /* If the quantity is in a register not known to be GR, spill it.  */
1722       if (register_operand (operands[1], mode))
1723 	operands[1] = spill_xfmode_rfmode_operand (operands[1], 1, mode);
1724 
1725       gcc_assert (GET_CODE (operands[1]) == MEM);
1726 
1727       /* Don't word-swap when reading in the value.  */
1728       out[0] = gen_rtx_REG (DImode, REGNO (op0));
1729       out[1] = gen_rtx_REG (DImode, REGNO (op0) + 1);
1730 
1731       emit_move_insn (out[0], adjust_address (operands[1], DImode, 0));
1732       emit_move_insn (out[1], adjust_address (operands[1], DImode, 8));
1733       return true;
1734     }
1735 
1736   if (GET_CODE (operands[1]) == REG && GR_REGNO_P (REGNO (operands[1])))
1737     {
1738       /* We're hoping to transform everything that deals with XFmode
1739 	 quantities and GR registers early in the compiler.  */
1740       gcc_assert (can_create_pseudo_p ());
1741 
1742       /* Op0 can't be a GR_REG here, as that case is handled above.
1743 	 If op0 is a register, then we spill op1, so that we now have a
1744 	 MEM operand.  This requires creating an XFmode subreg of a TImode reg
1745 	 to force the spill.  */
1746       if (register_operand (operands[0], mode))
1747 	{
1748 	  rtx op1 = gen_rtx_REG (TImode, REGNO (operands[1]));
1749 	  op1 = gen_rtx_SUBREG (mode, op1, 0);
1750 	  operands[1] = spill_xfmode_rfmode_operand (op1, 0, mode);
1751 	}
1752 
1753       else
1754 	{
1755 	  rtx in[2];
1756 
1757 	  gcc_assert (GET_CODE (operands[0]) == MEM);
1758 
1759 	  /* Don't word-swap when writing out the value.  */
1760 	  in[0] = gen_rtx_REG (DImode, REGNO (operands[1]));
1761 	  in[1] = gen_rtx_REG (DImode, REGNO (operands[1]) + 1);
1762 
1763 	  emit_move_insn (adjust_address (operands[0], DImode, 0), in[0]);
1764 	  emit_move_insn (adjust_address (operands[0], DImode, 8), in[1]);
1765 	  return true;
1766 	}
1767     }
1768 
1769   if (!reload_in_progress && !reload_completed)
1770     {
1771       operands[1] = spill_xfmode_rfmode_operand (operands[1], 0, mode);
1772 
1773       if (GET_MODE (op0) == TImode && GET_CODE (op0) == REG)
1774 	{
1775 	  rtx memt, memx, in = operands[1];
1776 	  if (CONSTANT_P (in))
1777 	    in = validize_mem (force_const_mem (mode, in));
1778 	  if (GET_CODE (in) == MEM)
1779 	    memt = adjust_address (in, TImode, 0);
1780 	  else
1781 	    {
1782 	      memt = assign_stack_temp (TImode, 16);
1783 	      memx = adjust_address (memt, mode, 0);
1784 	      emit_move_insn (memx, in);
1785 	    }
1786 	  emit_move_insn (op0, memt);
1787 	  return true;
1788 	}
1789 
1790       if (!ia64_move_ok (operands[0], operands[1]))
1791 	operands[1] = force_reg (mode, operands[1]);
1792     }
1793 
1794   return false;
1795 }
1796 
1797 /* Emit comparison instruction if necessary, replacing *EXPR, *OP0, *OP1
1798    with the expression that holds the compare result (in VOIDmode).  */
1799 
1800 static GTY(()) rtx cmptf_libfunc;
1801 
1802 void
ia64_expand_compare(rtx * expr,rtx * op0,rtx * op1)1803 ia64_expand_compare (rtx *expr, rtx *op0, rtx *op1)
1804 {
1805   enum rtx_code code = GET_CODE (*expr);
1806   rtx cmp;
1807 
1808   /* If we have a BImode input, then we already have a compare result, and
1809      do not need to emit another comparison.  */
1810   if (GET_MODE (*op0) == BImode)
1811     {
1812       gcc_assert ((code == NE || code == EQ) && *op1 == const0_rtx);
1813       cmp = *op0;
1814     }
1815   /* HPUX TFmode compare requires a library call to _U_Qfcmp, which takes a
1816      magic number as its third argument, that indicates what to do.
1817      The return value is an integer to be compared against zero.  */
1818   else if (TARGET_HPUX && GET_MODE (*op0) == TFmode)
1819     {
1820       enum qfcmp_magic {
1821 	QCMP_INV = 1,	/* Raise FP_INVALID on NaNs as a side effect.  */
1822 	QCMP_UNORD = 2,
1823 	QCMP_EQ = 4,
1824 	QCMP_LT = 8,
1825 	QCMP_GT = 16
1826       };
1827       int magic;
1828       enum rtx_code ncode;
1829       rtx ret;
1830 
1831       gcc_assert (cmptf_libfunc && GET_MODE (*op1) == TFmode);
1832       switch (code)
1833 	{
1834 	  /* 1 = equal, 0 = not equal.  Equality operators do
1835 	     not raise FP_INVALID when given a NaN operand.  */
1836 	case EQ:        magic = QCMP_EQ;                  ncode = NE; break;
1837 	case NE:        magic = QCMP_EQ;                  ncode = EQ; break;
1838 	  /* isunordered() from C99.  */
1839 	case UNORDERED: magic = QCMP_UNORD;               ncode = NE; break;
1840 	case ORDERED:   magic = QCMP_UNORD;               ncode = EQ; break;
1841 	  /* Relational operators raise FP_INVALID when given
1842 	     a NaN operand.  */
1843 	case LT:        magic = QCMP_LT        |QCMP_INV; ncode = NE; break;
1844 	case LE:        magic = QCMP_LT|QCMP_EQ|QCMP_INV; ncode = NE; break;
1845 	case GT:        magic = QCMP_GT        |QCMP_INV; ncode = NE; break;
1846 	case GE:        magic = QCMP_GT|QCMP_EQ|QCMP_INV; ncode = NE; break;
1847           /* Unordered relational operators do not raise FP_INVALID
1848 	     when given a NaN operand.  */
1849 	case UNLT:    magic = QCMP_LT        |QCMP_UNORD; ncode = NE; break;
1850 	case UNLE:    magic = QCMP_LT|QCMP_EQ|QCMP_UNORD; ncode = NE; break;
1851 	case UNGT:    magic = QCMP_GT        |QCMP_UNORD; ncode = NE; break;
1852 	case UNGE:    magic = QCMP_GT|QCMP_EQ|QCMP_UNORD; ncode = NE; break;
1853 	  /* Not supported.  */
1854 	case UNEQ:
1855 	case LTGT:
1856 	default: gcc_unreachable ();
1857 	}
1858 
1859       start_sequence ();
1860 
1861       ret = emit_library_call_value (cmptf_libfunc, 0, LCT_CONST, DImode,
1862 				     *op0, TFmode, *op1, TFmode,
1863 				     GEN_INT (magic), DImode);
1864       cmp = gen_reg_rtx (BImode);
1865       emit_insn (gen_rtx_SET (cmp, gen_rtx_fmt_ee (ncode, BImode,
1866 						   ret, const0_rtx)));
1867 
1868       rtx_insn *insns = get_insns ();
1869       end_sequence ();
1870 
1871       emit_libcall_block (insns, cmp, cmp,
1872 			  gen_rtx_fmt_ee (code, BImode, *op0, *op1));
1873       code = NE;
1874     }
1875   else
1876     {
1877       cmp = gen_reg_rtx (BImode);
1878       emit_insn (gen_rtx_SET (cmp, gen_rtx_fmt_ee (code, BImode, *op0, *op1)));
1879       code = NE;
1880     }
1881 
1882   *expr = gen_rtx_fmt_ee (code, VOIDmode, cmp, const0_rtx);
1883   *op0 = cmp;
1884   *op1 = const0_rtx;
1885 }
1886 
1887 /* Generate an integral vector comparison.  Return true if the condition has
1888    been reversed, and so the sense of the comparison should be inverted.  */
1889 
1890 static bool
ia64_expand_vecint_compare(enum rtx_code code,machine_mode mode,rtx dest,rtx op0,rtx op1)1891 ia64_expand_vecint_compare (enum rtx_code code, machine_mode mode,
1892 			    rtx dest, rtx op0, rtx op1)
1893 {
1894   bool negate = false;
1895   rtx x;
1896 
1897   /* Canonicalize the comparison to EQ, GT, GTU.  */
1898   switch (code)
1899     {
1900     case EQ:
1901     case GT:
1902     case GTU:
1903       break;
1904 
1905     case NE:
1906     case LE:
1907     case LEU:
1908       code = reverse_condition (code);
1909       negate = true;
1910       break;
1911 
1912     case GE:
1913     case GEU:
1914       code = reverse_condition (code);
1915       negate = true;
1916       /* FALLTHRU */
1917 
1918     case LT:
1919     case LTU:
1920       code = swap_condition (code);
1921       x = op0, op0 = op1, op1 = x;
1922       break;
1923 
1924     default:
1925       gcc_unreachable ();
1926     }
1927 
1928   /* Unsigned parallel compare is not supported by the hardware.  Play some
1929      tricks to turn this into a signed comparison against 0.  */
1930   if (code == GTU)
1931     {
1932       switch (mode)
1933 	{
1934 	case E_V2SImode:
1935 	  {
1936 	    rtx t1, t2, mask;
1937 
1938 	    /* Subtract (-(INT MAX) - 1) from both operands to make
1939 	       them signed.  */
1940 	    mask = gen_int_mode (0x80000000, SImode);
1941 	    mask = gen_const_vec_duplicate (V2SImode, mask);
1942 	    mask = force_reg (mode, mask);
1943 	    t1 = gen_reg_rtx (mode);
1944 	    emit_insn (gen_subv2si3 (t1, op0, mask));
1945 	    t2 = gen_reg_rtx (mode);
1946 	    emit_insn (gen_subv2si3 (t2, op1, mask));
1947 	    op0 = t1;
1948 	    op1 = t2;
1949 	    code = GT;
1950 	  }
1951 	  break;
1952 
1953 	case E_V8QImode:
1954 	case E_V4HImode:
1955 	  /* Perform a parallel unsigned saturating subtraction.  */
1956 	  x = gen_reg_rtx (mode);
1957 	  emit_insn (gen_rtx_SET (x, gen_rtx_US_MINUS (mode, op0, op1)));
1958 
1959 	  code = EQ;
1960 	  op0 = x;
1961 	  op1 = CONST0_RTX (mode);
1962 	  negate = !negate;
1963 	  break;
1964 
1965 	default:
1966 	  gcc_unreachable ();
1967 	}
1968     }
1969 
1970   x = gen_rtx_fmt_ee (code, mode, op0, op1);
1971   emit_insn (gen_rtx_SET (dest, x));
1972 
1973   return negate;
1974 }
1975 
1976 /* Emit an integral vector conditional move.  */
1977 
1978 void
ia64_expand_vecint_cmov(rtx operands[])1979 ia64_expand_vecint_cmov (rtx operands[])
1980 {
1981   machine_mode mode = GET_MODE (operands[0]);
1982   enum rtx_code code = GET_CODE (operands[3]);
1983   bool negate;
1984   rtx cmp, x, ot, of;
1985 
1986   cmp = gen_reg_rtx (mode);
1987   negate = ia64_expand_vecint_compare (code, mode, cmp,
1988 				       operands[4], operands[5]);
1989 
1990   ot = operands[1+negate];
1991   of = operands[2-negate];
1992 
1993   if (ot == CONST0_RTX (mode))
1994     {
1995       if (of == CONST0_RTX (mode))
1996 	{
1997 	  emit_move_insn (operands[0], ot);
1998 	  return;
1999 	}
2000 
2001       x = gen_rtx_NOT (mode, cmp);
2002       x = gen_rtx_AND (mode, x, of);
2003       emit_insn (gen_rtx_SET (operands[0], x));
2004     }
2005   else if (of == CONST0_RTX (mode))
2006     {
2007       x = gen_rtx_AND (mode, cmp, ot);
2008       emit_insn (gen_rtx_SET (operands[0], x));
2009     }
2010   else
2011     {
2012       rtx t, f;
2013 
2014       t = gen_reg_rtx (mode);
2015       x = gen_rtx_AND (mode, cmp, operands[1+negate]);
2016       emit_insn (gen_rtx_SET (t, x));
2017 
2018       f = gen_reg_rtx (mode);
2019       x = gen_rtx_NOT (mode, cmp);
2020       x = gen_rtx_AND (mode, x, operands[2-negate]);
2021       emit_insn (gen_rtx_SET (f, x));
2022 
2023       x = gen_rtx_IOR (mode, t, f);
2024       emit_insn (gen_rtx_SET (operands[0], x));
2025     }
2026 }
2027 
2028 /* Emit an integral vector min or max operation.  Return true if all done.  */
2029 
2030 bool
ia64_expand_vecint_minmax(enum rtx_code code,machine_mode mode,rtx operands[])2031 ia64_expand_vecint_minmax (enum rtx_code code, machine_mode mode,
2032 			   rtx operands[])
2033 {
2034   rtx xops[6];
2035 
2036   /* These four combinations are supported directly.  */
2037   if (mode == V8QImode && (code == UMIN || code == UMAX))
2038     return false;
2039   if (mode == V4HImode && (code == SMIN || code == SMAX))
2040     return false;
2041 
2042   /* This combination can be implemented with only saturating subtraction.  */
2043   if (mode == V4HImode && code == UMAX)
2044     {
2045       rtx x, tmp = gen_reg_rtx (mode);
2046 
2047       x = gen_rtx_US_MINUS (mode, operands[1], operands[2]);
2048       emit_insn (gen_rtx_SET (tmp, x));
2049 
2050       emit_insn (gen_addv4hi3 (operands[0], tmp, operands[2]));
2051       return true;
2052     }
2053 
2054   /* Everything else implemented via vector comparisons.  */
2055   xops[0] = operands[0];
2056   xops[4] = xops[1] = operands[1];
2057   xops[5] = xops[2] = operands[2];
2058 
2059   switch (code)
2060     {
2061     case UMIN:
2062       code = LTU;
2063       break;
2064     case UMAX:
2065       code = GTU;
2066       break;
2067     case SMIN:
2068       code = LT;
2069       break;
2070     case SMAX:
2071       code = GT;
2072       break;
2073     default:
2074       gcc_unreachable ();
2075     }
2076   xops[3] = gen_rtx_fmt_ee (code, VOIDmode, operands[1], operands[2]);
2077 
2078   ia64_expand_vecint_cmov (xops);
2079   return true;
2080 }
2081 
2082 /* The vectors LO and HI each contain N halves of a double-wide vector.
2083    Reassemble either the first N/2 or the second N/2 elements.  */
2084 
2085 void
ia64_unpack_assemble(rtx out,rtx lo,rtx hi,bool highp)2086 ia64_unpack_assemble (rtx out, rtx lo, rtx hi, bool highp)
2087 {
2088   machine_mode vmode = GET_MODE (lo);
2089   unsigned int i, high, nelt = GET_MODE_NUNITS (vmode);
2090   struct expand_vec_perm_d d;
2091   bool ok;
2092 
2093   d.target = gen_lowpart (vmode, out);
2094   d.op0 = (TARGET_BIG_ENDIAN ? hi : lo);
2095   d.op1 = (TARGET_BIG_ENDIAN ? lo : hi);
2096   d.vmode = vmode;
2097   d.nelt = nelt;
2098   d.one_operand_p = false;
2099   d.testing_p = false;
2100 
2101   high = (highp ? nelt / 2 : 0);
2102   for (i = 0; i < nelt / 2; ++i)
2103     {
2104       d.perm[i * 2] = i + high;
2105       d.perm[i * 2 + 1] = i + high + nelt;
2106     }
2107 
2108   ok = ia64_expand_vec_perm_const_1 (&d);
2109   gcc_assert (ok);
2110 }
2111 
2112 /* Return a vector of the sign-extension of VEC.  */
2113 
2114 static rtx
ia64_unpack_sign(rtx vec,bool unsignedp)2115 ia64_unpack_sign (rtx vec, bool unsignedp)
2116 {
2117   machine_mode mode = GET_MODE (vec);
2118   rtx zero = CONST0_RTX (mode);
2119 
2120   if (unsignedp)
2121     return zero;
2122   else
2123     {
2124       rtx sign = gen_reg_rtx (mode);
2125       bool neg;
2126 
2127       neg = ia64_expand_vecint_compare (LT, mode, sign, vec, zero);
2128       gcc_assert (!neg);
2129 
2130       return sign;
2131     }
2132 }
2133 
2134 /* Emit an integral vector unpack operation.  */
2135 
2136 void
ia64_expand_unpack(rtx operands[3],bool unsignedp,bool highp)2137 ia64_expand_unpack (rtx operands[3], bool unsignedp, bool highp)
2138 {
2139   rtx sign = ia64_unpack_sign (operands[1], unsignedp);
2140   ia64_unpack_assemble (operands[0], operands[1], sign, highp);
2141 }
2142 
2143 /* Emit an integral vector widening sum operations.  */
2144 
2145 void
ia64_expand_widen_sum(rtx operands[3],bool unsignedp)2146 ia64_expand_widen_sum (rtx operands[3], bool unsignedp)
2147 {
2148   machine_mode wmode;
2149   rtx l, h, t, sign;
2150 
2151   sign = ia64_unpack_sign (operands[1], unsignedp);
2152 
2153   wmode = GET_MODE (operands[0]);
2154   l = gen_reg_rtx (wmode);
2155   h = gen_reg_rtx (wmode);
2156 
2157   ia64_unpack_assemble (l, operands[1], sign, false);
2158   ia64_unpack_assemble (h, operands[1], sign, true);
2159 
2160   t = expand_binop (wmode, add_optab, l, operands[2], NULL, 0, OPTAB_DIRECT);
2161   t = expand_binop (wmode, add_optab, h, t, operands[0], 0, OPTAB_DIRECT);
2162   if (t != operands[0])
2163     emit_move_insn (operands[0], t);
2164 }
2165 
2166 /* Emit the appropriate sequence for a call.  */
2167 
2168 void
ia64_expand_call(rtx retval,rtx addr,rtx nextarg ATTRIBUTE_UNUSED,int sibcall_p)2169 ia64_expand_call (rtx retval, rtx addr, rtx nextarg ATTRIBUTE_UNUSED,
2170 		  int sibcall_p)
2171 {
2172   rtx insn, b0;
2173 
2174   addr = XEXP (addr, 0);
2175   addr = convert_memory_address (DImode, addr);
2176   b0 = gen_rtx_REG (DImode, R_BR (0));
2177 
2178   /* ??? Should do this for functions known to bind local too.  */
2179   if (TARGET_NO_PIC || TARGET_AUTO_PIC)
2180     {
2181       if (sibcall_p)
2182 	insn = gen_sibcall_nogp (addr);
2183       else if (! retval)
2184 	insn = gen_call_nogp (addr, b0);
2185       else
2186 	insn = gen_call_value_nogp (retval, addr, b0);
2187       insn = emit_call_insn (insn);
2188     }
2189   else
2190     {
2191       if (sibcall_p)
2192 	insn = gen_sibcall_gp (addr);
2193       else if (! retval)
2194 	insn = gen_call_gp (addr, b0);
2195       else
2196 	insn = gen_call_value_gp (retval, addr, b0);
2197       insn = emit_call_insn (insn);
2198 
2199       use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx);
2200     }
2201 
2202   if (sibcall_p)
2203     use_reg (&CALL_INSN_FUNCTION_USAGE (insn), b0);
2204 
2205   if (TARGET_ABI_OPEN_VMS)
2206     use_reg (&CALL_INSN_FUNCTION_USAGE (insn),
2207 	     gen_rtx_REG (DImode, GR_REG (25)));
2208 }
2209 
2210 static void
reg_emitted(enum ia64_frame_regs r)2211 reg_emitted (enum ia64_frame_regs r)
2212 {
2213   if (emitted_frame_related_regs[r] == 0)
2214     emitted_frame_related_regs[r] = current_frame_info.r[r];
2215   else
2216     gcc_assert (emitted_frame_related_regs[r] == current_frame_info.r[r]);
2217 }
2218 
2219 static int
get_reg(enum ia64_frame_regs r)2220 get_reg (enum ia64_frame_regs r)
2221 {
2222   reg_emitted (r);
2223   return current_frame_info.r[r];
2224 }
2225 
2226 static bool
is_emitted(int regno)2227 is_emitted (int regno)
2228 {
2229   unsigned int r;
2230 
2231   for (r = reg_fp; r < number_of_ia64_frame_regs; r++)
2232     if (emitted_frame_related_regs[r] == regno)
2233       return true;
2234   return false;
2235 }
2236 
2237 void
ia64_reload_gp(void)2238 ia64_reload_gp (void)
2239 {
2240   rtx tmp;
2241 
2242   if (current_frame_info.r[reg_save_gp])
2243     {
2244       tmp = gen_rtx_REG (DImode, get_reg (reg_save_gp));
2245     }
2246   else
2247     {
2248       HOST_WIDE_INT offset;
2249       rtx offset_r;
2250 
2251       offset = (current_frame_info.spill_cfa_off
2252 	        + current_frame_info.spill_size);
2253       if (frame_pointer_needed)
2254         {
2255           tmp = hard_frame_pointer_rtx;
2256           offset = -offset;
2257         }
2258       else
2259         {
2260           tmp = stack_pointer_rtx;
2261           offset = current_frame_info.total_size - offset;
2262         }
2263 
2264       offset_r = GEN_INT (offset);
2265       if (satisfies_constraint_I (offset_r))
2266         emit_insn (gen_adddi3 (pic_offset_table_rtx, tmp, offset_r));
2267       else
2268         {
2269           emit_move_insn (pic_offset_table_rtx, offset_r);
2270           emit_insn (gen_adddi3 (pic_offset_table_rtx,
2271 			         pic_offset_table_rtx, tmp));
2272         }
2273 
2274       tmp = gen_rtx_MEM (DImode, pic_offset_table_rtx);
2275     }
2276 
2277   emit_move_insn (pic_offset_table_rtx, tmp);
2278 }
2279 
2280 void
ia64_split_call(rtx retval,rtx addr,rtx retaddr,rtx scratch_r,rtx scratch_b,int noreturn_p,int sibcall_p)2281 ia64_split_call (rtx retval, rtx addr, rtx retaddr, rtx scratch_r,
2282 		 rtx scratch_b, int noreturn_p, int sibcall_p)
2283 {
2284   rtx insn;
2285   bool is_desc = false;
2286 
2287   /* If we find we're calling through a register, then we're actually
2288      calling through a descriptor, so load up the values.  */
2289   if (REG_P (addr) && GR_REGNO_P (REGNO (addr)))
2290     {
2291       rtx tmp;
2292       bool addr_dead_p;
2293 
2294       /* ??? We are currently constrained to *not* use peep2, because
2295 	 we can legitimately change the global lifetime of the GP
2296 	 (in the form of killing where previously live).  This is
2297 	 because a call through a descriptor doesn't use the previous
2298 	 value of the GP, while a direct call does, and we do not
2299 	 commit to either form until the split here.
2300 
2301 	 That said, this means that we lack precise life info for
2302 	 whether ADDR is dead after this call.  This is not terribly
2303 	 important, since we can fix things up essentially for free
2304 	 with the POST_DEC below, but it's nice to not use it when we
2305 	 can immediately tell it's not necessary.  */
2306       addr_dead_p = ((noreturn_p || sibcall_p
2307 		      || TEST_HARD_REG_BIT (regs_invalidated_by_call,
2308 					    REGNO (addr)))
2309 		     && !FUNCTION_ARG_REGNO_P (REGNO (addr)));
2310 
2311       /* Load the code address into scratch_b.  */
2312       tmp = gen_rtx_POST_INC (Pmode, addr);
2313       tmp = gen_rtx_MEM (Pmode, tmp);
2314       emit_move_insn (scratch_r, tmp);
2315       emit_move_insn (scratch_b, scratch_r);
2316 
2317       /* Load the GP address.  If ADDR is not dead here, then we must
2318 	 revert the change made above via the POST_INCREMENT.  */
2319       if (!addr_dead_p)
2320 	tmp = gen_rtx_POST_DEC (Pmode, addr);
2321       else
2322 	tmp = addr;
2323       tmp = gen_rtx_MEM (Pmode, tmp);
2324       emit_move_insn (pic_offset_table_rtx, tmp);
2325 
2326       is_desc = true;
2327       addr = scratch_b;
2328     }
2329 
2330   if (sibcall_p)
2331     insn = gen_sibcall_nogp (addr);
2332   else if (retval)
2333     insn = gen_call_value_nogp (retval, addr, retaddr);
2334   else
2335     insn = gen_call_nogp (addr, retaddr);
2336   emit_call_insn (insn);
2337 
2338   if ((!TARGET_CONST_GP || is_desc) && !noreturn_p && !sibcall_p)
2339     ia64_reload_gp ();
2340 }
2341 
2342 /* Expand an atomic operation.  We want to perform MEM <CODE>= VAL atomically.
2343 
2344    This differs from the generic code in that we know about the zero-extending
2345    properties of cmpxchg, and the zero-extending requirements of ar.ccv.  We
2346    also know that ld.acq+cmpxchg.rel equals a full barrier.
2347 
2348    The loop we want to generate looks like
2349 
2350 	cmp_reg = mem;
2351       label:
2352         old_reg = cmp_reg;
2353 	new_reg = cmp_reg op val;
2354 	cmp_reg = compare-and-swap(mem, old_reg, new_reg)
2355 	if (cmp_reg != old_reg)
2356 	  goto label;
2357 
2358    Note that we only do the plain load from memory once.  Subsequent
2359    iterations use the value loaded by the compare-and-swap pattern.  */
2360 
2361 void
ia64_expand_atomic_op(enum rtx_code code,rtx mem,rtx val,rtx old_dst,rtx new_dst,enum memmodel model)2362 ia64_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
2363 		       rtx old_dst, rtx new_dst, enum memmodel model)
2364 {
2365   machine_mode mode = GET_MODE (mem);
2366   rtx old_reg, new_reg, cmp_reg, ar_ccv, label;
2367   enum insn_code icode;
2368 
2369   /* Special case for using fetchadd.  */
2370   if ((mode == SImode || mode == DImode)
2371       && (code == PLUS || code == MINUS)
2372       && fetchadd_operand (val, mode))
2373     {
2374       if (code == MINUS)
2375 	val = GEN_INT (-INTVAL (val));
2376 
2377       if (!old_dst)
2378         old_dst = gen_reg_rtx (mode);
2379 
2380       switch (model)
2381 	{
2382 	case MEMMODEL_ACQ_REL:
2383 	case MEMMODEL_SEQ_CST:
2384 	case MEMMODEL_SYNC_SEQ_CST:
2385 	  emit_insn (gen_memory_barrier ());
2386 	  /* FALLTHRU */
2387 	case MEMMODEL_RELAXED:
2388 	case MEMMODEL_ACQUIRE:
2389 	case MEMMODEL_SYNC_ACQUIRE:
2390 	case MEMMODEL_CONSUME:
2391 	  if (mode == SImode)
2392 	    icode = CODE_FOR_fetchadd_acq_si;
2393 	  else
2394 	    icode = CODE_FOR_fetchadd_acq_di;
2395 	  break;
2396 	case MEMMODEL_RELEASE:
2397 	case MEMMODEL_SYNC_RELEASE:
2398 	  if (mode == SImode)
2399 	    icode = CODE_FOR_fetchadd_rel_si;
2400 	  else
2401 	    icode = CODE_FOR_fetchadd_rel_di;
2402 	  break;
2403 
2404 	default:
2405 	  gcc_unreachable ();
2406 	}
2407 
2408       emit_insn (GEN_FCN (icode) (old_dst, mem, val));
2409 
2410       if (new_dst)
2411 	{
2412 	  new_reg = expand_simple_binop (mode, PLUS, old_dst, val, new_dst,
2413 					 true, OPTAB_WIDEN);
2414 	  if (new_reg != new_dst)
2415 	    emit_move_insn (new_dst, new_reg);
2416 	}
2417       return;
2418     }
2419 
2420   /* Because of the volatile mem read, we get an ld.acq, which is the
2421      front half of the full barrier.  The end half is the cmpxchg.rel.
2422      For relaxed and release memory models, we don't need this.  But we
2423      also don't bother trying to prevent it either.  */
2424   gcc_assert (is_mm_relaxed (model) || is_mm_release (model)
2425 	      || MEM_VOLATILE_P (mem));
2426 
2427   old_reg = gen_reg_rtx (DImode);
2428   cmp_reg = gen_reg_rtx (DImode);
2429   label = gen_label_rtx ();
2430 
2431   if (mode != DImode)
2432     {
2433       val = simplify_gen_subreg (DImode, val, mode, 0);
2434       emit_insn (gen_extend_insn (cmp_reg, mem, DImode, mode, 1));
2435     }
2436   else
2437     emit_move_insn (cmp_reg, mem);
2438 
2439   emit_label (label);
2440 
2441   ar_ccv = gen_rtx_REG (DImode, AR_CCV_REGNUM);
2442   emit_move_insn (old_reg, cmp_reg);
2443   emit_move_insn (ar_ccv, cmp_reg);
2444 
2445   if (old_dst)
2446     emit_move_insn (old_dst, gen_lowpart (mode, cmp_reg));
2447 
2448   new_reg = cmp_reg;
2449   if (code == NOT)
2450     {
2451       new_reg = expand_simple_binop (DImode, AND, new_reg, val, NULL_RTX,
2452 				     true, OPTAB_DIRECT);
2453       new_reg = expand_simple_unop (DImode, code, new_reg, NULL_RTX, true);
2454     }
2455   else
2456     new_reg = expand_simple_binop (DImode, code, new_reg, val, NULL_RTX,
2457 				   true, OPTAB_DIRECT);
2458 
2459   if (mode != DImode)
2460     new_reg = gen_lowpart (mode, new_reg);
2461   if (new_dst)
2462     emit_move_insn (new_dst, new_reg);
2463 
2464   switch (model)
2465     {
2466     case MEMMODEL_RELAXED:
2467     case MEMMODEL_ACQUIRE:
2468     case MEMMODEL_SYNC_ACQUIRE:
2469     case MEMMODEL_CONSUME:
2470       switch (mode)
2471 	{
2472 	case E_QImode: icode = CODE_FOR_cmpxchg_acq_qi;  break;
2473 	case E_HImode: icode = CODE_FOR_cmpxchg_acq_hi;  break;
2474 	case E_SImode: icode = CODE_FOR_cmpxchg_acq_si;  break;
2475 	case E_DImode: icode = CODE_FOR_cmpxchg_acq_di;  break;
2476 	default:
2477 	  gcc_unreachable ();
2478 	}
2479       break;
2480 
2481     case MEMMODEL_RELEASE:
2482     case MEMMODEL_SYNC_RELEASE:
2483     case MEMMODEL_ACQ_REL:
2484     case MEMMODEL_SEQ_CST:
2485     case MEMMODEL_SYNC_SEQ_CST:
2486       switch (mode)
2487 	{
2488 	case E_QImode: icode = CODE_FOR_cmpxchg_rel_qi;  break;
2489 	case E_HImode: icode = CODE_FOR_cmpxchg_rel_hi;  break;
2490 	case E_SImode: icode = CODE_FOR_cmpxchg_rel_si;  break;
2491 	case E_DImode: icode = CODE_FOR_cmpxchg_rel_di;  break;
2492 	default:
2493 	  gcc_unreachable ();
2494 	}
2495       break;
2496 
2497     default:
2498       gcc_unreachable ();
2499     }
2500 
2501   emit_insn (GEN_FCN (icode) (cmp_reg, mem, ar_ccv, new_reg));
2502 
2503   emit_cmp_and_jump_insns (cmp_reg, old_reg, NE, NULL, DImode, true, label);
2504 }
2505 
2506 /* Begin the assembly file.  */
2507 
2508 static void
ia64_file_start(void)2509 ia64_file_start (void)
2510 {
2511   default_file_start ();
2512   emit_safe_across_calls ();
2513 }
2514 
2515 void
emit_safe_across_calls(void)2516 emit_safe_across_calls (void)
2517 {
2518   unsigned int rs, re;
2519   int out_state;
2520 
2521   rs = 1;
2522   out_state = 0;
2523   while (1)
2524     {
2525       while (rs < 64 && call_used_or_fixed_reg_p (PR_REG (rs)))
2526 	rs++;
2527       if (rs >= 64)
2528 	break;
2529       for (re = rs + 1;
2530 	   re < 64 && ! call_used_or_fixed_reg_p (PR_REG (re)); re++)
2531 	continue;
2532       if (out_state == 0)
2533 	{
2534 	  fputs ("\t.pred.safe_across_calls ", asm_out_file);
2535 	  out_state = 1;
2536 	}
2537       else
2538 	fputc (',', asm_out_file);
2539       if (re == rs + 1)
2540 	fprintf (asm_out_file, "p%u", rs);
2541       else
2542 	fprintf (asm_out_file, "p%u-p%u", rs, re - 1);
2543       rs = re + 1;
2544     }
2545   if (out_state)
2546     fputc ('\n', asm_out_file);
2547 }
2548 
2549 /* Globalize a declaration.  */
2550 
2551 static void
ia64_globalize_decl_name(FILE * stream,tree decl)2552 ia64_globalize_decl_name (FILE * stream, tree decl)
2553 {
2554   const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0);
2555   tree version_attr = lookup_attribute ("version_id", DECL_ATTRIBUTES (decl));
2556   if (version_attr)
2557     {
2558       tree v = TREE_VALUE (TREE_VALUE (version_attr));
2559       const char *p = TREE_STRING_POINTER (v);
2560       fprintf (stream, "\t.alias %s#, \"%s{%s}\"\n", name, name, p);
2561     }
2562   targetm.asm_out.globalize_label (stream, name);
2563   if (TREE_CODE (decl) == FUNCTION_DECL)
2564     ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "function");
2565 }
2566 
2567 /* Helper function for ia64_compute_frame_size: find an appropriate general
2568    register to spill some special register to.  SPECIAL_SPILL_MASK contains
2569    bits in GR0 to GR31 that have already been allocated by this routine.
2570    TRY_LOCALS is true if we should attempt to locate a local regnum.  */
2571 
2572 static int
find_gr_spill(enum ia64_frame_regs r,int try_locals)2573 find_gr_spill (enum ia64_frame_regs r, int try_locals)
2574 {
2575   int regno;
2576 
2577   if (emitted_frame_related_regs[r] != 0)
2578     {
2579       regno = emitted_frame_related_regs[r];
2580       if (regno >= LOC_REG (0) && regno < LOC_REG (80 - frame_pointer_needed)
2581 	  && current_frame_info.n_local_regs < regno - LOC_REG (0) + 1)
2582         current_frame_info.n_local_regs = regno - LOC_REG (0) + 1;
2583       else if (crtl->is_leaf
2584                && regno >= GR_REG (1) && regno <= GR_REG (31))
2585         current_frame_info.gr_used_mask |= 1 << regno;
2586 
2587       return regno;
2588     }
2589 
2590   /* If this is a leaf function, first try an otherwise unused
2591      call-clobbered register.  */
2592   if (crtl->is_leaf)
2593     {
2594       for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
2595 	if (! df_regs_ever_live_p (regno)
2596 	    && call_used_or_fixed_reg_p (regno)
2597 	    && ! fixed_regs[regno]
2598 	    && ! global_regs[regno]
2599 	    && ((current_frame_info.gr_used_mask >> regno) & 1) == 0
2600             && ! is_emitted (regno))
2601 	  {
2602 	    current_frame_info.gr_used_mask |= 1 << regno;
2603 	    return regno;
2604 	  }
2605     }
2606 
2607   if (try_locals)
2608     {
2609       regno = current_frame_info.n_local_regs;
2610       /* If there is a frame pointer, then we can't use loc79, because
2611 	 that is HARD_FRAME_POINTER_REGNUM.  In particular, see the
2612 	 reg_name switching code in ia64_expand_prologue.  */
2613       while (regno < (80 - frame_pointer_needed))
2614 	if (! is_emitted (LOC_REG (regno++)))
2615 	  {
2616 	    current_frame_info.n_local_regs = regno;
2617 	    return LOC_REG (regno - 1);
2618 	  }
2619     }
2620 
2621   /* Failed to find a general register to spill to.  Must use stack.  */
2622   return 0;
2623 }
2624 
2625 /* In order to make for nice schedules, we try to allocate every temporary
2626    to a different register.  We must of course stay away from call-saved,
2627    fixed, and global registers.  We must also stay away from registers
2628    allocated in current_frame_info.gr_used_mask, since those include regs
2629    used all through the prologue.
2630 
2631    Any register allocated here must be used immediately.  The idea is to
2632    aid scheduling, not to solve data flow problems.  */
2633 
2634 static int last_scratch_gr_reg;
2635 
2636 static int
next_scratch_gr_reg(void)2637 next_scratch_gr_reg (void)
2638 {
2639   int i, regno;
2640 
2641   for (i = 0; i < 32; ++i)
2642     {
2643       regno = (last_scratch_gr_reg + i + 1) & 31;
2644       if (call_used_or_fixed_reg_p (regno)
2645 	  && ! fixed_regs[regno]
2646 	  && ! global_regs[regno]
2647 	  && ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
2648 	{
2649 	  last_scratch_gr_reg = regno;
2650 	  return regno;
2651 	}
2652     }
2653 
2654   /* There must be _something_ available.  */
2655   gcc_unreachable ();
2656 }
2657 
2658 /* Helper function for ia64_compute_frame_size, called through
2659    diddle_return_value.  Mark REG in current_frame_info.gr_used_mask.  */
2660 
2661 static void
mark_reg_gr_used_mask(rtx reg,void * data ATTRIBUTE_UNUSED)2662 mark_reg_gr_used_mask (rtx reg, void *data ATTRIBUTE_UNUSED)
2663 {
2664   unsigned int regno = REGNO (reg);
2665   if (regno < 32)
2666     {
2667       unsigned int i, n = REG_NREGS (reg);
2668       for (i = 0; i < n; ++i)
2669 	current_frame_info.gr_used_mask |= 1 << (regno + i);
2670     }
2671 }
2672 
2673 
2674 /* Returns the number of bytes offset between the frame pointer and the stack
2675    pointer for the current function.  SIZE is the number of bytes of space
2676    needed for local variables.  */
2677 
2678 static void
ia64_compute_frame_size(HOST_WIDE_INT size)2679 ia64_compute_frame_size (HOST_WIDE_INT size)
2680 {
2681   HOST_WIDE_INT total_size;
2682   HOST_WIDE_INT spill_size = 0;
2683   HOST_WIDE_INT extra_spill_size = 0;
2684   HOST_WIDE_INT pretend_args_size;
2685   HARD_REG_SET mask;
2686   int n_spilled = 0;
2687   int spilled_gr_p = 0;
2688   int spilled_fr_p = 0;
2689   unsigned int regno;
2690   int min_regno;
2691   int max_regno;
2692   int i;
2693 
2694   if (current_frame_info.initialized)
2695     return;
2696 
2697   memset (&current_frame_info, 0, sizeof current_frame_info);
2698   CLEAR_HARD_REG_SET (mask);
2699 
2700   /* Don't allocate scratches to the return register.  */
2701   diddle_return_value (mark_reg_gr_used_mask, NULL);
2702 
2703   /* Don't allocate scratches to the EH scratch registers.  */
2704   if (cfun->machine->ia64_eh_epilogue_sp)
2705     mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_sp, NULL);
2706   if (cfun->machine->ia64_eh_epilogue_bsp)
2707     mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_bsp, NULL);
2708 
2709   /* Static stack checking uses r2 and r3.  */
2710   if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
2711       || flag_stack_clash_protection)
2712     current_frame_info.gr_used_mask |= 0xc;
2713 
2714   /* Find the size of the register stack frame.  We have only 80 local
2715      registers, because we reserve 8 for the inputs and 8 for the
2716      outputs.  */
2717 
2718   /* Skip HARD_FRAME_POINTER_REGNUM (loc79) when frame_pointer_needed,
2719      since we'll be adjusting that down later.  */
2720   regno = LOC_REG (78) + ! frame_pointer_needed;
2721   for (; regno >= LOC_REG (0); regno--)
2722     if (df_regs_ever_live_p (regno) && !is_emitted (regno))
2723       break;
2724   current_frame_info.n_local_regs = regno - LOC_REG (0) + 1;
2725 
2726   /* For functions marked with the syscall_linkage attribute, we must mark
2727      all eight input registers as in use, so that locals aren't visible to
2728      the caller.  */
2729 
2730   if (cfun->machine->n_varargs > 0
2731       || lookup_attribute ("syscall_linkage",
2732 			   TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
2733     current_frame_info.n_input_regs = 8;
2734   else
2735     {
2736       for (regno = IN_REG (7); regno >= IN_REG (0); regno--)
2737 	if (df_regs_ever_live_p (regno))
2738 	  break;
2739       current_frame_info.n_input_regs = regno - IN_REG (0) + 1;
2740     }
2741 
2742   for (regno = OUT_REG (7); regno >= OUT_REG (0); regno--)
2743     if (df_regs_ever_live_p (regno))
2744       break;
2745   i = regno - OUT_REG (0) + 1;
2746 
2747 #ifndef PROFILE_HOOK
2748   /* When -p profiling, we need one output register for the mcount argument.
2749      Likewise for -a profiling for the bb_init_func argument.  For -ax
2750      profiling, we need two output registers for the two bb_init_trace_func
2751      arguments.  */
2752   if (crtl->profile)
2753     i = MAX (i, 1);
2754 #endif
2755   current_frame_info.n_output_regs = i;
2756 
2757   /* ??? No rotating register support yet.  */
2758   current_frame_info.n_rotate_regs = 0;
2759 
2760   /* Discover which registers need spilling, and how much room that
2761      will take.  Begin with floating point and general registers,
2762      which will always wind up on the stack.  */
2763 
2764   for (regno = FR_REG (2); regno <= FR_REG (127); regno++)
2765     if (df_regs_ever_live_p (regno) && ! call_used_or_fixed_reg_p (regno))
2766       {
2767 	SET_HARD_REG_BIT (mask, regno);
2768 	spill_size += 16;
2769 	n_spilled += 1;
2770 	spilled_fr_p = 1;
2771       }
2772 
2773   for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
2774     if (df_regs_ever_live_p (regno) && ! call_used_or_fixed_reg_p (regno))
2775       {
2776 	SET_HARD_REG_BIT (mask, regno);
2777 	spill_size += 8;
2778 	n_spilled += 1;
2779 	spilled_gr_p = 1;
2780       }
2781 
2782   for (regno = BR_REG (1); regno <= BR_REG (7); regno++)
2783     if (df_regs_ever_live_p (regno) && ! call_used_or_fixed_reg_p (regno))
2784       {
2785 	SET_HARD_REG_BIT (mask, regno);
2786 	spill_size += 8;
2787 	n_spilled += 1;
2788       }
2789 
2790   /* Now come all special registers that might get saved in other
2791      general registers.  */
2792 
2793   if (frame_pointer_needed)
2794     {
2795       current_frame_info.r[reg_fp] = find_gr_spill (reg_fp, 1);
2796       /* If we did not get a register, then we take LOC79.  This is guaranteed
2797 	 to be free, even if regs_ever_live is already set, because this is
2798 	 HARD_FRAME_POINTER_REGNUM.  This requires incrementing n_local_regs,
2799 	 as we don't count loc79 above.  */
2800       if (current_frame_info.r[reg_fp] == 0)
2801 	{
2802 	  current_frame_info.r[reg_fp] = LOC_REG (79);
2803 	  current_frame_info.n_local_regs = LOC_REG (79) - LOC_REG (0) + 1;
2804 	}
2805     }
2806 
2807   if (! crtl->is_leaf)
2808     {
2809       /* Emit a save of BR0 if we call other functions.  Do this even
2810 	 if this function doesn't return, as EH depends on this to be
2811 	 able to unwind the stack.  */
2812       SET_HARD_REG_BIT (mask, BR_REG (0));
2813 
2814       current_frame_info.r[reg_save_b0] = find_gr_spill (reg_save_b0, 1);
2815       if (current_frame_info.r[reg_save_b0] == 0)
2816 	{
2817 	  extra_spill_size += 8;
2818 	  n_spilled += 1;
2819 	}
2820 
2821       /* Similarly for ar.pfs.  */
2822       SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
2823       current_frame_info.r[reg_save_ar_pfs] = find_gr_spill (reg_save_ar_pfs, 1);
2824       if (current_frame_info.r[reg_save_ar_pfs] == 0)
2825 	{
2826 	  extra_spill_size += 8;
2827 	  n_spilled += 1;
2828 	}
2829 
2830       /* Similarly for gp.  Note that if we're calling setjmp, the stacked
2831 	 registers are clobbered, so we fall back to the stack.  */
2832       current_frame_info.r[reg_save_gp]
2833 	= (cfun->calls_setjmp ? 0 : find_gr_spill (reg_save_gp, 1));
2834       if (current_frame_info.r[reg_save_gp] == 0)
2835 	{
2836 	  SET_HARD_REG_BIT (mask, GR_REG (1));
2837 	  spill_size += 8;
2838 	  n_spilled += 1;
2839 	}
2840     }
2841   else
2842     {
2843       if (df_regs_ever_live_p (BR_REG (0))
2844 	  && ! call_used_or_fixed_reg_p (BR_REG (0)))
2845 	{
2846 	  SET_HARD_REG_BIT (mask, BR_REG (0));
2847 	  extra_spill_size += 8;
2848 	  n_spilled += 1;
2849 	}
2850 
2851       if (df_regs_ever_live_p (AR_PFS_REGNUM))
2852 	{
2853 	  SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
2854  	  current_frame_info.r[reg_save_ar_pfs]
2855             = find_gr_spill (reg_save_ar_pfs, 1);
2856 	  if (current_frame_info.r[reg_save_ar_pfs] == 0)
2857 	    {
2858 	      extra_spill_size += 8;
2859 	      n_spilled += 1;
2860 	    }
2861 	}
2862     }
2863 
2864   /* Unwind descriptor hackery: things are most efficient if we allocate
2865      consecutive GR save registers for RP, PFS, FP in that order. However,
2866      it is absolutely critical that FP get the only hard register that's
2867      guaranteed to be free, so we allocated it first.  If all three did
2868      happen to be allocated hard regs, and are consecutive, rearrange them
2869      into the preferred order now.
2870 
2871      If we have already emitted code for any of those registers,
2872      then it's already too late to change.  */
2873   min_regno = MIN (current_frame_info.r[reg_fp],
2874 		   MIN (current_frame_info.r[reg_save_b0],
2875 			current_frame_info.r[reg_save_ar_pfs]));
2876   max_regno = MAX (current_frame_info.r[reg_fp],
2877 		   MAX (current_frame_info.r[reg_save_b0],
2878 			current_frame_info.r[reg_save_ar_pfs]));
2879   if (min_regno > 0
2880       && min_regno + 2 == max_regno
2881       && (current_frame_info.r[reg_fp] == min_regno + 1
2882 	  || current_frame_info.r[reg_save_b0] == min_regno + 1
2883 	  || current_frame_info.r[reg_save_ar_pfs] == min_regno + 1)
2884       && (emitted_frame_related_regs[reg_save_b0] == 0
2885 	  || emitted_frame_related_regs[reg_save_b0] == min_regno)
2886       && (emitted_frame_related_regs[reg_save_ar_pfs] == 0
2887 	  || emitted_frame_related_regs[reg_save_ar_pfs] == min_regno + 1)
2888       && (emitted_frame_related_regs[reg_fp] == 0
2889 	  || emitted_frame_related_regs[reg_fp] == min_regno + 2))
2890     {
2891       current_frame_info.r[reg_save_b0] = min_regno;
2892       current_frame_info.r[reg_save_ar_pfs] = min_regno + 1;
2893       current_frame_info.r[reg_fp] = min_regno + 2;
2894     }
2895 
2896   /* See if we need to store the predicate register block.  */
2897   for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
2898     if (df_regs_ever_live_p (regno) && ! call_used_or_fixed_reg_p (regno))
2899       break;
2900   if (regno <= PR_REG (63))
2901     {
2902       SET_HARD_REG_BIT (mask, PR_REG (0));
2903       current_frame_info.r[reg_save_pr] = find_gr_spill (reg_save_pr, 1);
2904       if (current_frame_info.r[reg_save_pr] == 0)
2905 	{
2906 	  extra_spill_size += 8;
2907 	  n_spilled += 1;
2908 	}
2909 
2910       /* ??? Mark them all as used so that register renaming and such
2911 	 are free to use them.  */
2912       for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
2913 	df_set_regs_ever_live (regno, true);
2914     }
2915 
2916   /* If we're forced to use st8.spill, we're forced to save and restore
2917      ar.unat as well.  The check for existing liveness allows inline asm
2918      to touch ar.unat.  */
2919   if (spilled_gr_p || cfun->machine->n_varargs
2920       || df_regs_ever_live_p (AR_UNAT_REGNUM))
2921     {
2922       df_set_regs_ever_live (AR_UNAT_REGNUM, true);
2923       SET_HARD_REG_BIT (mask, AR_UNAT_REGNUM);
2924       current_frame_info.r[reg_save_ar_unat]
2925         = find_gr_spill (reg_save_ar_unat, spill_size == 0);
2926       if (current_frame_info.r[reg_save_ar_unat] == 0)
2927 	{
2928 	  extra_spill_size += 8;
2929 	  n_spilled += 1;
2930 	}
2931     }
2932 
2933   if (df_regs_ever_live_p (AR_LC_REGNUM))
2934     {
2935       SET_HARD_REG_BIT (mask, AR_LC_REGNUM);
2936       current_frame_info.r[reg_save_ar_lc]
2937         = find_gr_spill (reg_save_ar_lc, spill_size == 0);
2938       if (current_frame_info.r[reg_save_ar_lc] == 0)
2939 	{
2940 	  extra_spill_size += 8;
2941 	  n_spilled += 1;
2942 	}
2943     }
2944 
2945   /* If we have an odd number of words of pretend arguments written to
2946      the stack, then the FR save area will be unaligned.  We round the
2947      size of this area up to keep things 16 byte aligned.  */
2948   if (spilled_fr_p)
2949     pretend_args_size = IA64_STACK_ALIGN (crtl->args.pretend_args_size);
2950   else
2951     pretend_args_size = crtl->args.pretend_args_size;
2952 
2953   total_size = (spill_size + extra_spill_size + size + pretend_args_size
2954 		+ crtl->outgoing_args_size);
2955   total_size = IA64_STACK_ALIGN (total_size);
2956 
2957   /* We always use the 16-byte scratch area provided by the caller, but
2958      if we are a leaf function, there's no one to which we need to provide
2959      a scratch area.  However, if the function allocates dynamic stack space,
2960      the dynamic offset is computed early and contains STACK_POINTER_OFFSET,
2961      so we need to cope.  */
2962   if (crtl->is_leaf && !cfun->calls_alloca)
2963     total_size = MAX (0, total_size - 16);
2964 
2965   current_frame_info.total_size = total_size;
2966   current_frame_info.spill_cfa_off = pretend_args_size - 16;
2967   current_frame_info.spill_size = spill_size;
2968   current_frame_info.extra_spill_size = extra_spill_size;
2969   current_frame_info.mask = mask;
2970   current_frame_info.n_spilled = n_spilled;
2971   current_frame_info.initialized = reload_completed;
2972 }
2973 
2974 /* Worker function for TARGET_CAN_ELIMINATE.  */
2975 
2976 bool
ia64_can_eliminate(const int from ATTRIBUTE_UNUSED,const int to)2977 ia64_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
2978 {
2979   return (to == BR_REG (0) ? crtl->is_leaf : true);
2980 }
2981 
2982 /* Compute the initial difference between the specified pair of registers.  */
2983 
2984 HOST_WIDE_INT
ia64_initial_elimination_offset(int from,int to)2985 ia64_initial_elimination_offset (int from, int to)
2986 {
2987   HOST_WIDE_INT offset;
2988 
2989   ia64_compute_frame_size (get_frame_size ());
2990   switch (from)
2991     {
2992     case FRAME_POINTER_REGNUM:
2993       switch (to)
2994 	{
2995 	case HARD_FRAME_POINTER_REGNUM:
2996 	  offset = -current_frame_info.total_size;
2997 	  if (!crtl->is_leaf || cfun->calls_alloca)
2998 	    offset += 16 + crtl->outgoing_args_size;
2999 	  break;
3000 
3001 	case STACK_POINTER_REGNUM:
3002 	  offset = 0;
3003 	  if (!crtl->is_leaf || cfun->calls_alloca)
3004 	    offset += 16 + crtl->outgoing_args_size;
3005 	  break;
3006 
3007 	default:
3008 	  gcc_unreachable ();
3009 	}
3010       break;
3011 
3012     case ARG_POINTER_REGNUM:
3013       /* Arguments start above the 16 byte save area, unless stdarg
3014 	 in which case we store through the 16 byte save area.  */
3015       switch (to)
3016 	{
3017 	case HARD_FRAME_POINTER_REGNUM:
3018 	  offset = 16 - crtl->args.pretend_args_size;
3019 	  break;
3020 
3021 	case STACK_POINTER_REGNUM:
3022 	  offset = (current_frame_info.total_size
3023 		    + 16 - crtl->args.pretend_args_size);
3024 	  break;
3025 
3026 	default:
3027 	  gcc_unreachable ();
3028 	}
3029       break;
3030 
3031     default:
3032       gcc_unreachable ();
3033     }
3034 
3035   return offset;
3036 }
3037 
3038 /* If there are more than a trivial number of register spills, we use
3039    two interleaved iterators so that we can get two memory references
3040    per insn group.
3041 
3042    In order to simplify things in the prologue and epilogue expanders,
3043    we use helper functions to fix up the memory references after the
3044    fact with the appropriate offsets to a POST_MODIFY memory mode.
3045    The following data structure tracks the state of the two iterators
3046    while insns are being emitted.  */
3047 
3048 struct spill_fill_data
3049 {
3050   rtx_insn *init_after;		/* point at which to emit initializations */
3051   rtx init_reg[2];		/* initial base register */
3052   rtx iter_reg[2];		/* the iterator registers */
3053   rtx *prev_addr[2];		/* address of last memory use */
3054   rtx_insn *prev_insn[2];	/* the insn corresponding to prev_addr */
3055   HOST_WIDE_INT prev_off[2];	/* last offset */
3056   int n_iter;			/* number of iterators in use */
3057   int next_iter;		/* next iterator to use */
3058   unsigned int save_gr_used_mask;
3059 };
3060 
3061 static struct spill_fill_data spill_fill_data;
3062 
3063 static void
setup_spill_pointers(int n_spills,rtx init_reg,HOST_WIDE_INT cfa_off)3064 setup_spill_pointers (int n_spills, rtx init_reg, HOST_WIDE_INT cfa_off)
3065 {
3066   int i;
3067 
3068   spill_fill_data.init_after = get_last_insn ();
3069   spill_fill_data.init_reg[0] = init_reg;
3070   spill_fill_data.init_reg[1] = init_reg;
3071   spill_fill_data.prev_addr[0] = NULL;
3072   spill_fill_data.prev_addr[1] = NULL;
3073   spill_fill_data.prev_insn[0] = NULL;
3074   spill_fill_data.prev_insn[1] = NULL;
3075   spill_fill_data.prev_off[0] = cfa_off;
3076   spill_fill_data.prev_off[1] = cfa_off;
3077   spill_fill_data.next_iter = 0;
3078   spill_fill_data.save_gr_used_mask = current_frame_info.gr_used_mask;
3079 
3080   spill_fill_data.n_iter = 1 + (n_spills > 2);
3081   for (i = 0; i < spill_fill_data.n_iter; ++i)
3082     {
3083       int regno = next_scratch_gr_reg ();
3084       spill_fill_data.iter_reg[i] = gen_rtx_REG (DImode, regno);
3085       current_frame_info.gr_used_mask |= 1 << regno;
3086     }
3087 }
3088 
3089 static void
finish_spill_pointers(void)3090 finish_spill_pointers (void)
3091 {
3092   current_frame_info.gr_used_mask = spill_fill_data.save_gr_used_mask;
3093 }
3094 
3095 static rtx
spill_restore_mem(rtx reg,HOST_WIDE_INT cfa_off)3096 spill_restore_mem (rtx reg, HOST_WIDE_INT cfa_off)
3097 {
3098   int iter = spill_fill_data.next_iter;
3099   HOST_WIDE_INT disp = spill_fill_data.prev_off[iter] - cfa_off;
3100   rtx disp_rtx = GEN_INT (disp);
3101   rtx mem;
3102 
3103   if (spill_fill_data.prev_addr[iter])
3104     {
3105       if (satisfies_constraint_N (disp_rtx))
3106 	{
3107 	  *spill_fill_data.prev_addr[iter]
3108 	    = gen_rtx_POST_MODIFY (DImode, spill_fill_data.iter_reg[iter],
3109 				   gen_rtx_PLUS (DImode,
3110 						 spill_fill_data.iter_reg[iter],
3111 						 disp_rtx));
3112 	  add_reg_note (spill_fill_data.prev_insn[iter],
3113 			REG_INC, spill_fill_data.iter_reg[iter]);
3114 	}
3115       else
3116 	{
3117 	  /* ??? Could use register post_modify for loads.  */
3118 	  if (!satisfies_constraint_I (disp_rtx))
3119 	    {
3120 	      rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
3121 	      emit_move_insn (tmp, disp_rtx);
3122 	      disp_rtx = tmp;
3123 	    }
3124 	  emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
3125 				 spill_fill_data.iter_reg[iter], disp_rtx));
3126 	}
3127     }
3128   /* Micro-optimization: if we've created a frame pointer, it's at
3129      CFA 0, which may allow the real iterator to be initialized lower,
3130      slightly increasing parallelism.  Also, if there are few saves
3131      it may eliminate the iterator entirely.  */
3132   else if (disp == 0
3133 	   && spill_fill_data.init_reg[iter] == stack_pointer_rtx
3134 	   && frame_pointer_needed)
3135     {
3136       mem = gen_rtx_MEM (GET_MODE (reg), hard_frame_pointer_rtx);
3137       set_mem_alias_set (mem, get_varargs_alias_set ());
3138       return mem;
3139     }
3140   else
3141     {
3142       rtx seq;
3143       rtx_insn *insn;
3144 
3145       if (disp == 0)
3146 	seq = gen_movdi (spill_fill_data.iter_reg[iter],
3147 			 spill_fill_data.init_reg[iter]);
3148       else
3149 	{
3150 	  start_sequence ();
3151 
3152 	  if (!satisfies_constraint_I (disp_rtx))
3153 	    {
3154 	      rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
3155 	      emit_move_insn (tmp, disp_rtx);
3156 	      disp_rtx = tmp;
3157 	    }
3158 
3159 	  emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
3160 				 spill_fill_data.init_reg[iter],
3161 				 disp_rtx));
3162 
3163 	  seq = get_insns ();
3164 	  end_sequence ();
3165 	}
3166 
3167       /* Careful for being the first insn in a sequence.  */
3168       if (spill_fill_data.init_after)
3169 	insn = emit_insn_after (seq, spill_fill_data.init_after);
3170       else
3171 	{
3172 	  rtx_insn *first = get_insns ();
3173 	  if (first)
3174 	    insn = emit_insn_before (seq, first);
3175 	  else
3176 	    insn = emit_insn (seq);
3177 	}
3178       spill_fill_data.init_after = insn;
3179     }
3180 
3181   mem = gen_rtx_MEM (GET_MODE (reg), spill_fill_data.iter_reg[iter]);
3182 
3183   /* ??? Not all of the spills are for varargs, but some of them are.
3184      The rest of the spills belong in an alias set of their own.  But
3185      it doesn't actually hurt to include them here.  */
3186   set_mem_alias_set (mem, get_varargs_alias_set ());
3187 
3188   spill_fill_data.prev_addr[iter] = &XEXP (mem, 0);
3189   spill_fill_data.prev_off[iter] = cfa_off;
3190 
3191   if (++iter >= spill_fill_data.n_iter)
3192     iter = 0;
3193   spill_fill_data.next_iter = iter;
3194 
3195   return mem;
3196 }
3197 
3198 static void
do_spill(rtx (* move_fn)(rtx,rtx,rtx),rtx reg,HOST_WIDE_INT cfa_off,rtx frame_reg)3199 do_spill (rtx (*move_fn) (rtx, rtx, rtx), rtx reg, HOST_WIDE_INT cfa_off,
3200 	  rtx frame_reg)
3201 {
3202   int iter = spill_fill_data.next_iter;
3203   rtx mem;
3204   rtx_insn *insn;
3205 
3206   mem = spill_restore_mem (reg, cfa_off);
3207   insn = emit_insn ((*move_fn) (mem, reg, GEN_INT (cfa_off)));
3208   spill_fill_data.prev_insn[iter] = insn;
3209 
3210   if (frame_reg)
3211     {
3212       rtx base;
3213       HOST_WIDE_INT off;
3214 
3215       RTX_FRAME_RELATED_P (insn) = 1;
3216 
3217       /* Don't even pretend that the unwind code can intuit its way
3218 	 through a pair of interleaved post_modify iterators.  Just
3219 	 provide the correct answer.  */
3220 
3221       if (frame_pointer_needed)
3222 	{
3223 	  base = hard_frame_pointer_rtx;
3224 	  off = - cfa_off;
3225 	}
3226       else
3227 	{
3228 	  base = stack_pointer_rtx;
3229 	  off = current_frame_info.total_size - cfa_off;
3230 	}
3231 
3232       add_reg_note (insn, REG_CFA_OFFSET,
3233 		    gen_rtx_SET (gen_rtx_MEM (GET_MODE (reg),
3234 					      plus_constant (Pmode,
3235 							     base, off)),
3236 				 frame_reg));
3237     }
3238 }
3239 
3240 static void
do_restore(rtx (* move_fn)(rtx,rtx,rtx),rtx reg,HOST_WIDE_INT cfa_off)3241 do_restore (rtx (*move_fn) (rtx, rtx, rtx), rtx reg, HOST_WIDE_INT cfa_off)
3242 {
3243   int iter = spill_fill_data.next_iter;
3244   rtx_insn *insn;
3245 
3246   insn = emit_insn ((*move_fn) (reg, spill_restore_mem (reg, cfa_off),
3247 				GEN_INT (cfa_off)));
3248   spill_fill_data.prev_insn[iter] = insn;
3249 }
3250 
3251 /* Wrapper functions that discards the CONST_INT spill offset.  These
3252    exist so that we can give gr_spill/gr_fill the offset they need and
3253    use a consistent function interface.  */
3254 
3255 static rtx
gen_movdi_x(rtx dest,rtx src,rtx offset ATTRIBUTE_UNUSED)3256 gen_movdi_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
3257 {
3258   return gen_movdi (dest, src);
3259 }
3260 
3261 static rtx
gen_fr_spill_x(rtx dest,rtx src,rtx offset ATTRIBUTE_UNUSED)3262 gen_fr_spill_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
3263 {
3264   return gen_fr_spill (dest, src);
3265 }
3266 
3267 static rtx
gen_fr_restore_x(rtx dest,rtx src,rtx offset ATTRIBUTE_UNUSED)3268 gen_fr_restore_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
3269 {
3270   return gen_fr_restore (dest, src);
3271 }
3272 
3273 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
3274 
3275 /* See Table 6.2 of the IA-64 Software Developer Manual, Volume 2.  */
3276 #define BACKING_STORE_SIZE(N) ((N) > 0 ? ((N) + (N)/63 + 1) * 8 : 0)
3277 
3278 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
3279    inclusive.  These are offsets from the current stack pointer.  BS_SIZE
3280    is the size of the backing store.  ??? This clobbers r2 and r3.  */
3281 
3282 static void
ia64_emit_probe_stack_range(HOST_WIDE_INT first,HOST_WIDE_INT size,int bs_size)3283 ia64_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size,
3284 			     int bs_size)
3285 {
3286   rtx r2 = gen_rtx_REG (Pmode, GR_REG (2));
3287   rtx r3 = gen_rtx_REG (Pmode, GR_REG (3));
3288   rtx p6 = gen_rtx_REG (BImode, PR_REG (6));
3289 
3290   /* On the IA-64 there is a second stack in memory, namely the Backing Store
3291      of the Register Stack Engine.  We also need to probe it after checking
3292      that the 2 stacks don't overlap.  */
3293   emit_insn (gen_bsp_value (r3));
3294   emit_move_insn (r2, GEN_INT (-(first + size)));
3295 
3296   /* Compare current value of BSP and SP registers.  */
3297   emit_insn (gen_rtx_SET (p6, gen_rtx_fmt_ee (LTU, BImode,
3298 					      r3, stack_pointer_rtx)));
3299 
3300   /* Compute the address of the probe for the Backing Store (which grows
3301      towards higher addresses).  We probe only at the first offset of
3302      the next page because some OS (eg Linux/ia64) only extend the
3303      backing store when this specific address is hit (but generate a SEGV
3304      on other address).  Page size is the worst case (4KB).  The reserve
3305      size is at least 4096 - (96 + 2) * 8 = 3312 bytes, which is enough.
3306      Also compute the address of the last probe for the memory stack
3307      (which grows towards lower addresses).  */
3308   emit_insn (gen_rtx_SET (r3, plus_constant (Pmode, r3, 4095)));
3309   emit_insn (gen_rtx_SET (r2, gen_rtx_PLUS (Pmode, stack_pointer_rtx, r2)));
3310 
3311   /* Compare them and raise SEGV if the former has topped the latter.  */
3312   emit_insn (gen_rtx_COND_EXEC (VOIDmode,
3313 				gen_rtx_fmt_ee (NE, VOIDmode, p6, const0_rtx),
3314 				gen_rtx_SET (p6, gen_rtx_fmt_ee (GEU, BImode,
3315 								 r3, r2))));
3316   emit_insn (gen_rtx_SET (gen_rtx_ZERO_EXTRACT (DImode, r3, GEN_INT (12),
3317 						const0_rtx),
3318 			  const0_rtx));
3319   emit_insn (gen_rtx_COND_EXEC (VOIDmode,
3320 				gen_rtx_fmt_ee (NE, VOIDmode, p6, const0_rtx),
3321 				gen_rtx_TRAP_IF (VOIDmode, const1_rtx,
3322 						 GEN_INT (11))));
3323 
3324   /* Probe the Backing Store if necessary.  */
3325   if (bs_size > 0)
3326     emit_stack_probe (r3);
3327 
3328   /* Probe the memory stack if necessary.  */
3329   if (size == 0)
3330     ;
3331 
3332   /* See if we have a constant small number of probes to generate.  If so,
3333      that's the easy case.  */
3334   else if (size <= PROBE_INTERVAL)
3335     emit_stack_probe (r2);
3336 
3337   /* The run-time loop is made up of 9 insns in the generic case while this
3338      compile-time loop is made up of 5+2*(n-2) insns for n # of intervals.  */
3339   else if (size <= 4 * PROBE_INTERVAL)
3340     {
3341       HOST_WIDE_INT i;
3342 
3343       emit_move_insn (r2, GEN_INT (-(first + PROBE_INTERVAL)));
3344       emit_insn (gen_rtx_SET (r2,
3345 			      gen_rtx_PLUS (Pmode, stack_pointer_rtx, r2)));
3346       emit_stack_probe (r2);
3347 
3348       /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
3349 	 it exceeds SIZE.  If only two probes are needed, this will not
3350 	 generate any code.  Then probe at FIRST + SIZE.  */
3351       for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
3352 	{
3353 	  emit_insn (gen_rtx_SET (r2,
3354 				  plus_constant (Pmode, r2, -PROBE_INTERVAL)));
3355 	  emit_stack_probe (r2);
3356 	}
3357 
3358       emit_insn (gen_rtx_SET (r2,
3359 			      plus_constant (Pmode, r2,
3360 					     (i - PROBE_INTERVAL) - size)));
3361       emit_stack_probe (r2);
3362     }
3363 
3364   /* Otherwise, do the same as above, but in a loop.  Note that we must be
3365      extra careful with variables wrapping around because we might be at
3366      the very top (or the very bottom) of the address space and we have
3367      to be able to handle this case properly; in particular, we use an
3368      equality test for the loop condition.  */
3369   else
3370     {
3371       HOST_WIDE_INT rounded_size;
3372 
3373       emit_move_insn (r2, GEN_INT (-first));
3374 
3375 
3376       /* Step 1: round SIZE to the previous multiple of the interval.  */
3377 
3378       rounded_size = size & -PROBE_INTERVAL;
3379 
3380 
3381       /* Step 2: compute initial and final value of the loop counter.  */
3382 
3383       /* TEST_ADDR = SP + FIRST.  */
3384       emit_insn (gen_rtx_SET (r2,
3385 			      gen_rtx_PLUS (Pmode, stack_pointer_rtx, r2)));
3386 
3387       /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE.  */
3388       if (rounded_size > (1 << 21))
3389 	{
3390 	  emit_move_insn (r3, GEN_INT (-rounded_size));
3391 	  emit_insn (gen_rtx_SET (r3, gen_rtx_PLUS (Pmode, r2, r3)));
3392 	}
3393       else
3394         emit_insn (gen_rtx_SET (r3, gen_rtx_PLUS (Pmode, r2,
3395 						  GEN_INT (-rounded_size))));
3396 
3397 
3398       /* Step 3: the loop
3399 
3400 	 do
3401 	   {
3402 	     TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
3403 	     probe at TEST_ADDR
3404 	   }
3405 	 while (TEST_ADDR != LAST_ADDR)
3406 
3407 	 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
3408 	 until it is equal to ROUNDED_SIZE.  */
3409 
3410       emit_insn (gen_probe_stack_range (r2, r2, r3));
3411 
3412 
3413       /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
3414 	 that SIZE is equal to ROUNDED_SIZE.  */
3415 
3416       /* TEMP = SIZE - ROUNDED_SIZE.  */
3417       if (size != rounded_size)
3418 	{
3419 	  emit_insn (gen_rtx_SET (r2, plus_constant (Pmode, r2,
3420 						     rounded_size - size)));
3421 	  emit_stack_probe (r2);
3422 	}
3423     }
3424 
3425   /* Make sure nothing is scheduled before we are done.  */
3426   emit_insn (gen_blockage ());
3427 }
3428 
3429 /* Probe a range of stack addresses from REG1 to REG2 inclusive.  These are
3430    absolute addresses.  */
3431 
3432 const char *
output_probe_stack_range(rtx reg1,rtx reg2)3433 output_probe_stack_range (rtx reg1, rtx reg2)
3434 {
3435   static int labelno = 0;
3436   char loop_lab[32];
3437   rtx xops[3];
3438 
3439   ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
3440 
3441   /* Loop.  */
3442   ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
3443 
3444   /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL.  */
3445   xops[0] = reg1;
3446   xops[1] = GEN_INT (-PROBE_INTERVAL);
3447   output_asm_insn ("addl %0 = %1, %0", xops);
3448   fputs ("\t;;\n", asm_out_file);
3449 
3450   /* Probe at TEST_ADDR.  */
3451   output_asm_insn ("probe.w.fault %0, 0", xops);
3452 
3453   /* Test if TEST_ADDR == LAST_ADDR.  */
3454   xops[1] = reg2;
3455   xops[2] = gen_rtx_REG (BImode, PR_REG (6));
3456   output_asm_insn ("cmp.eq %2, %I2 = %0, %1", xops);
3457 
3458   /* Branch.  */
3459   fprintf (asm_out_file, "\t(%s) br.cond.dpnt ", reg_names [PR_REG (7)]);
3460   assemble_name_raw (asm_out_file, loop_lab);
3461   fputc ('\n', asm_out_file);
3462 
3463   return "";
3464 }
3465 
3466 /* Called after register allocation to add any instructions needed for the
3467    prologue.  Using a prologue insn is favored compared to putting all of the
3468    instructions in output_function_prologue(), since it allows the scheduler
3469    to intermix instructions with the saves of the caller saved registers.  In
3470    some cases, it might be necessary to emit a barrier instruction as the last
3471    insn to prevent such scheduling.
3472 
3473    Also any insns generated here should have RTX_FRAME_RELATED_P(insn) = 1
3474    so that the debug info generation code can handle them properly.
3475 
3476    The register save area is laid out like so:
3477    cfa+16
3478 	[ varargs spill area ]
3479 	[ fr register spill area ]
3480 	[ br register spill area ]
3481 	[ ar register spill area ]
3482 	[ pr register spill area ]
3483 	[ gr register spill area ] */
3484 
3485 /* ??? Get inefficient code when the frame size is larger than can fit in an
3486    adds instruction.  */
3487 
3488 void
ia64_expand_prologue(void)3489 ia64_expand_prologue (void)
3490 {
3491   rtx_insn *insn;
3492   rtx ar_pfs_save_reg, ar_unat_save_reg;
3493   int i, epilogue_p, regno, alt_regno, cfa_off, n_varargs;
3494   rtx reg, alt_reg;
3495 
3496   ia64_compute_frame_size (get_frame_size ());
3497   last_scratch_gr_reg = 15;
3498 
3499   if (flag_stack_usage_info)
3500     current_function_static_stack_size = current_frame_info.total_size;
3501 
3502   if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
3503       || flag_stack_clash_protection)
3504     {
3505       HOST_WIDE_INT size = current_frame_info.total_size;
3506       int bs_size = BACKING_STORE_SIZE (current_frame_info.n_input_regs
3507 					  + current_frame_info.n_local_regs);
3508 
3509       if (crtl->is_leaf && !cfun->calls_alloca)
3510 	{
3511 	  if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
3512 	    ia64_emit_probe_stack_range (get_stack_check_protect (),
3513 					 size - get_stack_check_protect (),
3514 					 bs_size);
3515 	  else if (size + bs_size > get_stack_check_protect ())
3516 	    ia64_emit_probe_stack_range (get_stack_check_protect (),
3517 					 0, bs_size);
3518 	}
3519       else if (size + bs_size > 0)
3520 	ia64_emit_probe_stack_range (get_stack_check_protect (), size, bs_size);
3521     }
3522 
3523   if (dump_file)
3524     {
3525       fprintf (dump_file, "ia64 frame related registers "
3526                "recorded in current_frame_info.r[]:\n");
3527 #define PRINTREG(a) if (current_frame_info.r[a]) \
3528         fprintf(dump_file, "%s = %d\n", #a, current_frame_info.r[a])
3529       PRINTREG(reg_fp);
3530       PRINTREG(reg_save_b0);
3531       PRINTREG(reg_save_pr);
3532       PRINTREG(reg_save_ar_pfs);
3533       PRINTREG(reg_save_ar_unat);
3534       PRINTREG(reg_save_ar_lc);
3535       PRINTREG(reg_save_gp);
3536 #undef PRINTREG
3537     }
3538 
3539   /* If there is no epilogue, then we don't need some prologue insns.
3540      We need to avoid emitting the dead prologue insns, because flow
3541      will complain about them.  */
3542   if (optimize)
3543     {
3544       edge e;
3545       edge_iterator ei;
3546 
3547       FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
3548 	if ((e->flags & EDGE_FAKE) == 0
3549 	    && (e->flags & EDGE_FALLTHRU) != 0)
3550 	  break;
3551       epilogue_p = (e != NULL);
3552     }
3553   else
3554     epilogue_p = 1;
3555 
3556   /* Set the local, input, and output register names.  We need to do this
3557      for GNU libc, which creates crti.S/crtn.S by splitting initfini.c in
3558      half.  If we use in/loc/out register names, then we get assembler errors
3559      in crtn.S because there is no alloc insn or regstk directive in there.  */
3560   if (! TARGET_REG_NAMES)
3561     {
3562       int inputs = current_frame_info.n_input_regs;
3563       int locals = current_frame_info.n_local_regs;
3564       int outputs = current_frame_info.n_output_regs;
3565 
3566       for (i = 0; i < inputs; i++)
3567 	reg_names[IN_REG (i)] = ia64_reg_numbers[i];
3568       for (i = 0; i < locals; i++)
3569 	reg_names[LOC_REG (i)] = ia64_reg_numbers[inputs + i];
3570       for (i = 0; i < outputs; i++)
3571 	reg_names[OUT_REG (i)] = ia64_reg_numbers[inputs + locals + i];
3572     }
3573 
3574   /* Set the frame pointer register name.  The regnum is logically loc79,
3575      but of course we'll not have allocated that many locals.  Rather than
3576      worrying about renumbering the existing rtxs, we adjust the name.  */
3577   /* ??? This code means that we can never use one local register when
3578      there is a frame pointer.  loc79 gets wasted in this case, as it is
3579      renamed to a register that will never be used.  See also the try_locals
3580      code in find_gr_spill.  */
3581   if (current_frame_info.r[reg_fp])
3582     {
3583       const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
3584       reg_names[HARD_FRAME_POINTER_REGNUM]
3585 	= reg_names[current_frame_info.r[reg_fp]];
3586       reg_names[current_frame_info.r[reg_fp]] = tmp;
3587     }
3588 
3589   /* We don't need an alloc instruction if we've used no outputs or locals.  */
3590   if (current_frame_info.n_local_regs == 0
3591       && current_frame_info.n_output_regs == 0
3592       && current_frame_info.n_input_regs <= crtl->args.info.int_regs
3593       && !TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM))
3594     {
3595       /* If there is no alloc, but there are input registers used, then we
3596 	 need a .regstk directive.  */
3597       current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
3598       ar_pfs_save_reg = NULL_RTX;
3599     }
3600   else
3601     {
3602       current_frame_info.need_regstk = 0;
3603 
3604       if (current_frame_info.r[reg_save_ar_pfs])
3605         {
3606 	  regno = current_frame_info.r[reg_save_ar_pfs];
3607 	  reg_emitted (reg_save_ar_pfs);
3608 	}
3609       else
3610 	regno = next_scratch_gr_reg ();
3611       ar_pfs_save_reg = gen_rtx_REG (DImode, regno);
3612 
3613       insn = emit_insn (gen_alloc (ar_pfs_save_reg,
3614 				   GEN_INT (current_frame_info.n_input_regs),
3615 				   GEN_INT (current_frame_info.n_local_regs),
3616 				   GEN_INT (current_frame_info.n_output_regs),
3617 				   GEN_INT (current_frame_info.n_rotate_regs)));
3618       if (current_frame_info.r[reg_save_ar_pfs])
3619 	{
3620 	  RTX_FRAME_RELATED_P (insn) = 1;
3621 	  add_reg_note (insn, REG_CFA_REGISTER,
3622 			gen_rtx_SET (ar_pfs_save_reg,
3623 				     gen_rtx_REG (DImode, AR_PFS_REGNUM)));
3624 	}
3625     }
3626 
3627   /* Set up frame pointer, stack pointer, and spill iterators.  */
3628 
3629   n_varargs = cfun->machine->n_varargs;
3630   setup_spill_pointers (current_frame_info.n_spilled + n_varargs,
3631 			stack_pointer_rtx, 0);
3632 
3633   if (frame_pointer_needed)
3634     {
3635       insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
3636       RTX_FRAME_RELATED_P (insn) = 1;
3637 
3638       /* Force the unwind info to recognize this as defining a new CFA,
3639 	 rather than some temp register setup.  */
3640       add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL_RTX);
3641     }
3642 
3643   if (current_frame_info.total_size != 0)
3644     {
3645       rtx frame_size_rtx = GEN_INT (- current_frame_info.total_size);
3646       rtx offset;
3647 
3648       if (satisfies_constraint_I (frame_size_rtx))
3649 	offset = frame_size_rtx;
3650       else
3651 	{
3652 	  regno = next_scratch_gr_reg ();
3653 	  offset = gen_rtx_REG (DImode, regno);
3654 	  emit_move_insn (offset, frame_size_rtx);
3655 	}
3656 
3657       insn = emit_insn (gen_adddi3 (stack_pointer_rtx,
3658 				    stack_pointer_rtx, offset));
3659 
3660       if (! frame_pointer_needed)
3661 	{
3662 	  RTX_FRAME_RELATED_P (insn) = 1;
3663 	  add_reg_note (insn, REG_CFA_ADJUST_CFA,
3664 			gen_rtx_SET (stack_pointer_rtx,
3665 				     gen_rtx_PLUS (DImode,
3666 						   stack_pointer_rtx,
3667 						   frame_size_rtx)));
3668 	}
3669 
3670       /* ??? At this point we must generate a magic insn that appears to
3671 	 modify the stack pointer, the frame pointer, and all spill
3672 	 iterators.  This would allow the most scheduling freedom.  For
3673 	 now, just hard stop.  */
3674       emit_insn (gen_blockage ());
3675     }
3676 
3677   /* Must copy out ar.unat before doing any integer spills.  */
3678   if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
3679     {
3680       if (current_frame_info.r[reg_save_ar_unat])
3681         {
3682 	  ar_unat_save_reg
3683 	    = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_unat]);
3684 	  reg_emitted (reg_save_ar_unat);
3685 	}
3686       else
3687 	{
3688 	  alt_regno = next_scratch_gr_reg ();
3689 	  ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
3690 	  current_frame_info.gr_used_mask |= 1 << alt_regno;
3691 	}
3692 
3693       reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
3694       insn = emit_move_insn (ar_unat_save_reg, reg);
3695       if (current_frame_info.r[reg_save_ar_unat])
3696 	{
3697 	  RTX_FRAME_RELATED_P (insn) = 1;
3698 	  add_reg_note (insn, REG_CFA_REGISTER, NULL_RTX);
3699 	}
3700 
3701       /* Even if we're not going to generate an epilogue, we still
3702 	 need to save the register so that EH works.  */
3703       if (! epilogue_p && current_frame_info.r[reg_save_ar_unat])
3704 	emit_insn (gen_prologue_use (ar_unat_save_reg));
3705     }
3706   else
3707     ar_unat_save_reg = NULL_RTX;
3708 
3709   /* Spill all varargs registers.  Do this before spilling any GR registers,
3710      since we want the UNAT bits for the GR registers to override the UNAT
3711      bits from varargs, which we don't care about.  */
3712 
3713   cfa_off = -16;
3714   for (regno = GR_ARG_FIRST + 7; n_varargs > 0; --n_varargs, --regno)
3715     {
3716       reg = gen_rtx_REG (DImode, regno);
3717       do_spill (gen_gr_spill, reg, cfa_off += 8, NULL_RTX);
3718     }
3719 
3720   /* Locate the bottom of the register save area.  */
3721   cfa_off = (current_frame_info.spill_cfa_off
3722 	     + current_frame_info.spill_size
3723 	     + current_frame_info.extra_spill_size);
3724 
3725   /* Save the predicate register block either in a register or in memory.  */
3726   if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
3727     {
3728       reg = gen_rtx_REG (DImode, PR_REG (0));
3729       if (current_frame_info.r[reg_save_pr] != 0)
3730 	{
3731 	  alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_pr]);
3732 	  reg_emitted (reg_save_pr);
3733 	  insn = emit_move_insn (alt_reg, reg);
3734 
3735 	  /* ??? Denote pr spill/fill by a DImode move that modifies all
3736 	     64 hard registers.  */
3737 	  RTX_FRAME_RELATED_P (insn) = 1;
3738 	  add_reg_note (insn, REG_CFA_REGISTER, NULL_RTX);
3739 
3740 	  /* Even if we're not going to generate an epilogue, we still
3741 	     need to save the register so that EH works.  */
3742 	  if (! epilogue_p)
3743 	    emit_insn (gen_prologue_use (alt_reg));
3744 	}
3745       else
3746 	{
3747 	  alt_regno = next_scratch_gr_reg ();
3748 	  alt_reg = gen_rtx_REG (DImode, alt_regno);
3749 	  insn = emit_move_insn (alt_reg, reg);
3750 	  do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
3751 	  cfa_off -= 8;
3752 	}
3753     }
3754 
3755   /* Handle AR regs in numerical order.  All of them get special handling.  */
3756   if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM)
3757       && current_frame_info.r[reg_save_ar_unat] == 0)
3758     {
3759       reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
3760       do_spill (gen_movdi_x, ar_unat_save_reg, cfa_off, reg);
3761       cfa_off -= 8;
3762     }
3763 
3764   /* The alloc insn already copied ar.pfs into a general register.  The
3765      only thing we have to do now is copy that register to a stack slot
3766      if we'd not allocated a local register for the job.  */
3767   if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM)
3768       && current_frame_info.r[reg_save_ar_pfs] == 0)
3769     {
3770       reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
3771       do_spill (gen_movdi_x, ar_pfs_save_reg, cfa_off, reg);
3772       cfa_off -= 8;
3773     }
3774 
3775   if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
3776     {
3777       reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
3778       if (current_frame_info.r[reg_save_ar_lc] != 0)
3779 	{
3780 	  alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_lc]);
3781 	  reg_emitted (reg_save_ar_lc);
3782 	  insn = emit_move_insn (alt_reg, reg);
3783 	  RTX_FRAME_RELATED_P (insn) = 1;
3784 	  add_reg_note (insn, REG_CFA_REGISTER, NULL_RTX);
3785 
3786 	  /* Even if we're not going to generate an epilogue, we still
3787 	     need to save the register so that EH works.  */
3788 	  if (! epilogue_p)
3789 	    emit_insn (gen_prologue_use (alt_reg));
3790 	}
3791       else
3792 	{
3793 	  alt_regno = next_scratch_gr_reg ();
3794 	  alt_reg = gen_rtx_REG (DImode, alt_regno);
3795 	  emit_move_insn (alt_reg, reg);
3796 	  do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
3797 	  cfa_off -= 8;
3798 	}
3799     }
3800 
3801   /* Save the return pointer.  */
3802   if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
3803     {
3804       reg = gen_rtx_REG (DImode, BR_REG (0));
3805       if (current_frame_info.r[reg_save_b0] != 0)
3806 	{
3807           alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_b0]);
3808           reg_emitted (reg_save_b0);
3809 	  insn = emit_move_insn (alt_reg, reg);
3810 	  RTX_FRAME_RELATED_P (insn) = 1;
3811 	  add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (alt_reg, pc_rtx));
3812 
3813 	  /* Even if we're not going to generate an epilogue, we still
3814 	     need to save the register so that EH works.  */
3815 	  if (! epilogue_p)
3816 	    emit_insn (gen_prologue_use (alt_reg));
3817 	}
3818       else
3819 	{
3820 	  alt_regno = next_scratch_gr_reg ();
3821 	  alt_reg = gen_rtx_REG (DImode, alt_regno);
3822 	  emit_move_insn (alt_reg, reg);
3823 	  do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
3824 	  cfa_off -= 8;
3825 	}
3826     }
3827 
3828   if (current_frame_info.r[reg_save_gp])
3829     {
3830       reg_emitted (reg_save_gp);
3831       insn = emit_move_insn (gen_rtx_REG (DImode,
3832 					  current_frame_info.r[reg_save_gp]),
3833 			     pic_offset_table_rtx);
3834     }
3835 
3836   /* We should now be at the base of the gr/br/fr spill area.  */
3837   gcc_assert (cfa_off == (current_frame_info.spill_cfa_off
3838 			  + current_frame_info.spill_size));
3839 
3840   /* Spill all general registers.  */
3841   for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
3842     if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3843       {
3844 	reg = gen_rtx_REG (DImode, regno);
3845 	do_spill (gen_gr_spill, reg, cfa_off, reg);
3846 	cfa_off -= 8;
3847       }
3848 
3849   /* Spill the rest of the BR registers.  */
3850   for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
3851     if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3852       {
3853 	alt_regno = next_scratch_gr_reg ();
3854 	alt_reg = gen_rtx_REG (DImode, alt_regno);
3855 	reg = gen_rtx_REG (DImode, regno);
3856 	emit_move_insn (alt_reg, reg);
3857 	do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
3858 	cfa_off -= 8;
3859       }
3860 
3861   /* Align the frame and spill all FR registers.  */
3862   for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
3863     if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3864       {
3865         gcc_assert (!(cfa_off & 15));
3866 	reg = gen_rtx_REG (XFmode, regno);
3867 	do_spill (gen_fr_spill_x, reg, cfa_off, reg);
3868 	cfa_off -= 16;
3869       }
3870 
3871   gcc_assert (cfa_off == current_frame_info.spill_cfa_off);
3872 
3873   finish_spill_pointers ();
3874 }
3875 
3876 /* Output the textual info surrounding the prologue.  */
3877 
3878 void
ia64_start_function(FILE * file,const char * fnname,tree decl ATTRIBUTE_UNUSED)3879 ia64_start_function (FILE *file, const char *fnname,
3880 		     tree decl ATTRIBUTE_UNUSED)
3881 {
3882 #if TARGET_ABI_OPEN_VMS
3883   vms_start_function (fnname);
3884 #endif
3885 
3886   fputs ("\t.proc ", file);
3887   assemble_name (file, fnname);
3888   fputc ('\n', file);
3889   ASM_OUTPUT_LABEL (file, fnname);
3890 }
3891 
3892 /* Called after register allocation to add any instructions needed for the
3893    epilogue.  Using an epilogue insn is favored compared to putting all of the
3894    instructions in output_function_prologue(), since it allows the scheduler
3895    to intermix instructions with the saves of the caller saved registers.  In
3896    some cases, it might be necessary to emit a barrier instruction as the last
3897    insn to prevent such scheduling.  */
3898 
3899 void
ia64_expand_epilogue(int sibcall_p)3900 ia64_expand_epilogue (int sibcall_p)
3901 {
3902   rtx_insn *insn;
3903   rtx reg, alt_reg, ar_unat_save_reg;
3904   int regno, alt_regno, cfa_off;
3905 
3906   ia64_compute_frame_size (get_frame_size ());
3907 
3908   /* If there is a frame pointer, then we use it instead of the stack
3909      pointer, so that the stack pointer does not need to be valid when
3910      the epilogue starts.  See EXIT_IGNORE_STACK.  */
3911   if (frame_pointer_needed)
3912     setup_spill_pointers (current_frame_info.n_spilled,
3913 			  hard_frame_pointer_rtx, 0);
3914   else
3915     setup_spill_pointers (current_frame_info.n_spilled, stack_pointer_rtx,
3916 			  current_frame_info.total_size);
3917 
3918   if (current_frame_info.total_size != 0)
3919     {
3920       /* ??? At this point we must generate a magic insn that appears to
3921          modify the spill iterators and the frame pointer.  This would
3922 	 allow the most scheduling freedom.  For now, just hard stop.  */
3923       emit_insn (gen_blockage ());
3924     }
3925 
3926   /* Locate the bottom of the register save area.  */
3927   cfa_off = (current_frame_info.spill_cfa_off
3928 	     + current_frame_info.spill_size
3929 	     + current_frame_info.extra_spill_size);
3930 
3931   /* Restore the predicate registers.  */
3932   if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
3933     {
3934       if (current_frame_info.r[reg_save_pr] != 0)
3935         {
3936 	  alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_pr]);
3937 	  reg_emitted (reg_save_pr);
3938 	}
3939       else
3940 	{
3941 	  alt_regno = next_scratch_gr_reg ();
3942 	  alt_reg = gen_rtx_REG (DImode, alt_regno);
3943 	  do_restore (gen_movdi_x, alt_reg, cfa_off);
3944 	  cfa_off -= 8;
3945 	}
3946       reg = gen_rtx_REG (DImode, PR_REG (0));
3947       emit_move_insn (reg, alt_reg);
3948     }
3949 
3950   /* Restore the application registers.  */
3951 
3952   /* Load the saved unat from the stack, but do not restore it until
3953      after the GRs have been restored.  */
3954   if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
3955     {
3956       if (current_frame_info.r[reg_save_ar_unat] != 0)
3957         {
3958           ar_unat_save_reg
3959 	    = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_unat]);
3960 	  reg_emitted (reg_save_ar_unat);
3961 	}
3962       else
3963 	{
3964 	  alt_regno = next_scratch_gr_reg ();
3965 	  ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
3966 	  current_frame_info.gr_used_mask |= 1 << alt_regno;
3967 	  do_restore (gen_movdi_x, ar_unat_save_reg, cfa_off);
3968 	  cfa_off -= 8;
3969 	}
3970     }
3971   else
3972     ar_unat_save_reg = NULL_RTX;
3973 
3974   if (current_frame_info.r[reg_save_ar_pfs] != 0)
3975     {
3976       reg_emitted (reg_save_ar_pfs);
3977       alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_pfs]);
3978       reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
3979       emit_move_insn (reg, alt_reg);
3980     }
3981   else if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM))
3982     {
3983       alt_regno = next_scratch_gr_reg ();
3984       alt_reg = gen_rtx_REG (DImode, alt_regno);
3985       do_restore (gen_movdi_x, alt_reg, cfa_off);
3986       cfa_off -= 8;
3987       reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
3988       emit_move_insn (reg, alt_reg);
3989     }
3990 
3991   if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
3992     {
3993       if (current_frame_info.r[reg_save_ar_lc] != 0)
3994         {
3995 	  alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_lc]);
3996           reg_emitted (reg_save_ar_lc);
3997 	}
3998       else
3999 	{
4000 	  alt_regno = next_scratch_gr_reg ();
4001 	  alt_reg = gen_rtx_REG (DImode, alt_regno);
4002 	  do_restore (gen_movdi_x, alt_reg, cfa_off);
4003 	  cfa_off -= 8;
4004 	}
4005       reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
4006       emit_move_insn (reg, alt_reg);
4007     }
4008 
4009   /* Restore the return pointer.  */
4010   if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
4011     {
4012       if (current_frame_info.r[reg_save_b0] != 0)
4013         {
4014          alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_b0]);
4015          reg_emitted (reg_save_b0);
4016         }
4017       else
4018 	{
4019 	  alt_regno = next_scratch_gr_reg ();
4020 	  alt_reg = gen_rtx_REG (DImode, alt_regno);
4021 	  do_restore (gen_movdi_x, alt_reg, cfa_off);
4022 	  cfa_off -= 8;
4023 	}
4024       reg = gen_rtx_REG (DImode, BR_REG (0));
4025       emit_move_insn (reg, alt_reg);
4026     }
4027 
4028   /* We should now be at the base of the gr/br/fr spill area.  */
4029   gcc_assert (cfa_off == (current_frame_info.spill_cfa_off
4030 			  + current_frame_info.spill_size));
4031 
4032   /* The GP may be stored on the stack in the prologue, but it's
4033      never restored in the epilogue.  Skip the stack slot.  */
4034   if (TEST_HARD_REG_BIT (current_frame_info.mask, GR_REG (1)))
4035     cfa_off -= 8;
4036 
4037   /* Restore all general registers.  */
4038   for (regno = GR_REG (2); regno <= GR_REG (31); ++regno)
4039     if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
4040       {
4041 	reg = gen_rtx_REG (DImode, regno);
4042 	do_restore (gen_gr_restore, reg, cfa_off);
4043 	cfa_off -= 8;
4044       }
4045 
4046   /* Restore the branch registers.  */
4047   for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
4048     if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
4049       {
4050 	alt_regno = next_scratch_gr_reg ();
4051 	alt_reg = gen_rtx_REG (DImode, alt_regno);
4052 	do_restore (gen_movdi_x, alt_reg, cfa_off);
4053 	cfa_off -= 8;
4054 	reg = gen_rtx_REG (DImode, regno);
4055 	emit_move_insn (reg, alt_reg);
4056       }
4057 
4058   /* Restore floating point registers.  */
4059   for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
4060     if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
4061       {
4062         gcc_assert (!(cfa_off & 15));
4063 	reg = gen_rtx_REG (XFmode, regno);
4064 	do_restore (gen_fr_restore_x, reg, cfa_off);
4065 	cfa_off -= 16;
4066       }
4067 
4068   /* Restore ar.unat for real.  */
4069   if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
4070     {
4071       reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
4072       emit_move_insn (reg, ar_unat_save_reg);
4073     }
4074 
4075   gcc_assert (cfa_off == current_frame_info.spill_cfa_off);
4076 
4077   finish_spill_pointers ();
4078 
4079   if (current_frame_info.total_size
4080       || cfun->machine->ia64_eh_epilogue_sp
4081       || frame_pointer_needed)
4082     {
4083       /* ??? At this point we must generate a magic insn that appears to
4084          modify the spill iterators, the stack pointer, and the frame
4085 	 pointer.  This would allow the most scheduling freedom.  For now,
4086 	 just hard stop.  */
4087       emit_insn (gen_blockage ());
4088     }
4089 
4090   if (cfun->machine->ia64_eh_epilogue_sp)
4091     emit_move_insn (stack_pointer_rtx, cfun->machine->ia64_eh_epilogue_sp);
4092   else if (frame_pointer_needed)
4093     {
4094       insn = emit_move_insn (stack_pointer_rtx, hard_frame_pointer_rtx);
4095       RTX_FRAME_RELATED_P (insn) = 1;
4096       add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
4097     }
4098   else if (current_frame_info.total_size)
4099     {
4100       rtx offset, frame_size_rtx;
4101 
4102       frame_size_rtx = GEN_INT (current_frame_info.total_size);
4103       if (satisfies_constraint_I (frame_size_rtx))
4104 	offset = frame_size_rtx;
4105       else
4106 	{
4107 	  regno = next_scratch_gr_reg ();
4108 	  offset = gen_rtx_REG (DImode, regno);
4109 	  emit_move_insn (offset, frame_size_rtx);
4110 	}
4111 
4112       insn = emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx,
4113 				    offset));
4114 
4115       RTX_FRAME_RELATED_P (insn) = 1;
4116       add_reg_note (insn, REG_CFA_ADJUST_CFA,
4117 		    gen_rtx_SET (stack_pointer_rtx,
4118 				 gen_rtx_PLUS (DImode,
4119 					       stack_pointer_rtx,
4120 					       frame_size_rtx)));
4121     }
4122 
4123   if (cfun->machine->ia64_eh_epilogue_bsp)
4124     emit_insn (gen_set_bsp (cfun->machine->ia64_eh_epilogue_bsp));
4125 
4126   if (! sibcall_p)
4127     emit_jump_insn (gen_return_internal (gen_rtx_REG (DImode, BR_REG (0))));
4128   else
4129     {
4130       int fp = GR_REG (2);
4131       /* We need a throw away register here, r0 and r1 are reserved,
4132 	 so r2 is the first available call clobbered register.  If
4133 	 there was a frame_pointer register, we may have swapped the
4134 	 names of r2 and HARD_FRAME_POINTER_REGNUM, so we have to make
4135 	 sure we're using the string "r2" when emitting the register
4136 	 name for the assembler.  */
4137       if (current_frame_info.r[reg_fp]
4138           && current_frame_info.r[reg_fp] == GR_REG (2))
4139 	fp = HARD_FRAME_POINTER_REGNUM;
4140 
4141       /* We must emit an alloc to force the input registers to become output
4142 	 registers.  Otherwise, if the callee tries to pass its parameters
4143 	 through to another call without an intervening alloc, then these
4144 	 values get lost.  */
4145       /* ??? We don't need to preserve all input registers.  We only need to
4146 	 preserve those input registers used as arguments to the sibling call.
4147 	 It is unclear how to compute that number here.  */
4148       if (current_frame_info.n_input_regs != 0)
4149 	{
4150 	  rtx n_inputs = GEN_INT (current_frame_info.n_input_regs);
4151 
4152 	  insn = emit_insn (gen_alloc (gen_rtx_REG (DImode, fp),
4153 				const0_rtx, const0_rtx,
4154 				n_inputs, const0_rtx));
4155 	  RTX_FRAME_RELATED_P (insn) = 1;
4156 
4157 	  /* ??? We need to mark the alloc as frame-related so that it gets
4158 	     passed into ia64_asm_unwind_emit for ia64-specific unwinding.
4159 	     But there's nothing dwarf2 related to be done wrt the register
4160 	     windows.  If we do nothing, dwarf2out will abort on the UNSPEC;
4161 	     the empty parallel means dwarf2out will not see anything.  */
4162 	  add_reg_note (insn, REG_FRAME_RELATED_EXPR,
4163 			gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (0)));
4164 	}
4165     }
4166 }
4167 
4168 /* Return 1 if br.ret can do all the work required to return from a
4169    function.  */
4170 
4171 int
ia64_direct_return(void)4172 ia64_direct_return (void)
4173 {
4174   if (reload_completed && ! frame_pointer_needed)
4175     {
4176       ia64_compute_frame_size (get_frame_size ());
4177 
4178       return (current_frame_info.total_size == 0
4179 	      && current_frame_info.n_spilled == 0
4180 	      && current_frame_info.r[reg_save_b0] == 0
4181 	      && current_frame_info.r[reg_save_pr] == 0
4182 	      && current_frame_info.r[reg_save_ar_pfs] == 0
4183 	      && current_frame_info.r[reg_save_ar_unat] == 0
4184 	      && current_frame_info.r[reg_save_ar_lc] == 0);
4185     }
4186   return 0;
4187 }
4188 
4189 /* Return the magic cookie that we use to hold the return address
4190    during early compilation.  */
4191 
4192 rtx
ia64_return_addr_rtx(HOST_WIDE_INT count,rtx frame ATTRIBUTE_UNUSED)4193 ia64_return_addr_rtx (HOST_WIDE_INT count, rtx frame ATTRIBUTE_UNUSED)
4194 {
4195   if (count != 0)
4196     return NULL;
4197   return gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_RET_ADDR);
4198 }
4199 
4200 /* Split this value after reload, now that we know where the return
4201    address is saved.  */
4202 
4203 void
ia64_split_return_addr_rtx(rtx dest)4204 ia64_split_return_addr_rtx (rtx dest)
4205 {
4206   rtx src;
4207 
4208   if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
4209     {
4210       if (current_frame_info.r[reg_save_b0] != 0)
4211         {
4212 	  src = gen_rtx_REG (DImode, current_frame_info.r[reg_save_b0]);
4213 	  reg_emitted (reg_save_b0);
4214 	}
4215       else
4216 	{
4217 	  HOST_WIDE_INT off;
4218 	  unsigned int regno;
4219 	  rtx off_r;
4220 
4221 	  /* Compute offset from CFA for BR0.  */
4222 	  /* ??? Must be kept in sync with ia64_expand_prologue.  */
4223 	  off = (current_frame_info.spill_cfa_off
4224 		 + current_frame_info.spill_size);
4225 	  for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
4226 	    if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
4227 	      off -= 8;
4228 
4229 	  /* Convert CFA offset to a register based offset.  */
4230 	  if (frame_pointer_needed)
4231 	    src = hard_frame_pointer_rtx;
4232 	  else
4233 	    {
4234 	      src = stack_pointer_rtx;
4235 	      off += current_frame_info.total_size;
4236 	    }
4237 
4238 	  /* Load address into scratch register.  */
4239 	  off_r = GEN_INT (off);
4240 	  if (satisfies_constraint_I (off_r))
4241 	    emit_insn (gen_adddi3 (dest, src, off_r));
4242 	  else
4243 	    {
4244 	      emit_move_insn (dest, off_r);
4245 	      emit_insn (gen_adddi3 (dest, src, dest));
4246 	    }
4247 
4248 	  src = gen_rtx_MEM (Pmode, dest);
4249 	}
4250     }
4251   else
4252     src = gen_rtx_REG (DImode, BR_REG (0));
4253 
4254   emit_move_insn (dest, src);
4255 }
4256 
4257 int
ia64_hard_regno_rename_ok(int from,int to)4258 ia64_hard_regno_rename_ok (int from, int to)
4259 {
4260   /* Don't clobber any of the registers we reserved for the prologue.  */
4261   unsigned int r;
4262 
4263   for (r = reg_fp; r <= reg_save_ar_lc; r++)
4264     if (to == current_frame_info.r[r]
4265         || from == current_frame_info.r[r]
4266         || to == emitted_frame_related_regs[r]
4267         || from == emitted_frame_related_regs[r])
4268       return 0;
4269 
4270   /* Don't use output registers outside the register frame.  */
4271   if (OUT_REGNO_P (to) && to >= OUT_REG (current_frame_info.n_output_regs))
4272     return 0;
4273 
4274   /* Retain even/oddness on predicate register pairs.  */
4275   if (PR_REGNO_P (from) && PR_REGNO_P (to))
4276     return (from & 1) == (to & 1);
4277 
4278   return 1;
4279 }
4280 
4281 /* Implement TARGET_HARD_REGNO_NREGS.
4282 
4283    ??? We say that BImode PR values require two registers.  This allows us to
4284    easily store the normal and inverted values.  We use CCImode to indicate
4285    a single predicate register.  */
4286 
4287 static unsigned int
ia64_hard_regno_nregs(unsigned int regno,machine_mode mode)4288 ia64_hard_regno_nregs (unsigned int regno, machine_mode mode)
4289 {
4290   if (regno == PR_REG (0) && mode == DImode)
4291     return 64;
4292   if (PR_REGNO_P (regno) && (mode) == BImode)
4293     return 2;
4294   if ((PR_REGNO_P (regno) || GR_REGNO_P (regno)) && mode == CCImode)
4295     return 1;
4296   if (FR_REGNO_P (regno) && mode == XFmode)
4297     return 1;
4298   if (FR_REGNO_P (regno) && mode == RFmode)
4299     return 1;
4300   if (FR_REGNO_P (regno) && mode == XCmode)
4301     return 2;
4302   return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
4303 }
4304 
4305 /* Implement TARGET_HARD_REGNO_MODE_OK.  */
4306 
4307 static bool
ia64_hard_regno_mode_ok(unsigned int regno,machine_mode mode)4308 ia64_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
4309 {
4310   if (FR_REGNO_P (regno))
4311     return (GET_MODE_CLASS (mode) != MODE_CC
4312 	    && mode != BImode
4313 	    && mode != TFmode);
4314 
4315   if (PR_REGNO_P (regno))
4316     return mode == BImode || GET_MODE_CLASS (mode) == MODE_CC;
4317 
4318   if (GR_REGNO_P (regno))
4319     return mode != XFmode && mode != XCmode && mode != RFmode;
4320 
4321   if (AR_REGNO_P (regno))
4322     return mode == DImode;
4323 
4324   if (BR_REGNO_P (regno))
4325     return mode == DImode;
4326 
4327   return false;
4328 }
4329 
4330 /* Implement TARGET_MODES_TIEABLE_P.
4331 
4332    Don't tie integer and FP modes, as that causes us to get integer registers
4333    allocated for FP instructions.  XFmode only supported in FP registers so
4334    we can't tie it with any other modes.  */
4335 
4336 static bool
ia64_modes_tieable_p(machine_mode mode1,machine_mode mode2)4337 ia64_modes_tieable_p (machine_mode mode1, machine_mode mode2)
4338 {
4339   return (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2)
4340 	  && ((mode1 == XFmode || mode1 == XCmode || mode1 == RFmode)
4341 	      == (mode2 == XFmode || mode2 == XCmode || mode2 == RFmode))
4342 	  && (mode1 == BImode) == (mode2 == BImode));
4343 }
4344 
4345 /* Target hook for assembling integer objects.  Handle word-sized
4346    aligned objects and detect the cases when @fptr is needed.  */
4347 
4348 static bool
ia64_assemble_integer(rtx x,unsigned int size,int aligned_p)4349 ia64_assemble_integer (rtx x, unsigned int size, int aligned_p)
4350 {
4351   if (size == POINTER_SIZE / BITS_PER_UNIT
4352       && !(TARGET_NO_PIC || TARGET_AUTO_PIC)
4353       && GET_CODE (x) == SYMBOL_REF
4354       && SYMBOL_REF_FUNCTION_P (x))
4355     {
4356       static const char * const directive[2][2] = {
4357 	  /* 64-bit pointer */  /* 32-bit pointer */
4358 	{ "\tdata8.ua\t@fptr(", "\tdata4.ua\t@fptr("},	/* unaligned */
4359 	{ "\tdata8\t@fptr(",    "\tdata4\t@fptr("}	/* aligned */
4360       };
4361       fputs (directive[(aligned_p != 0)][POINTER_SIZE == 32], asm_out_file);
4362       output_addr_const (asm_out_file, x);
4363       fputs (")\n", asm_out_file);
4364       return true;
4365     }
4366   return default_assemble_integer (x, size, aligned_p);
4367 }
4368 
4369 /* Emit the function prologue.  */
4370 
4371 static void
ia64_output_function_prologue(FILE * file)4372 ia64_output_function_prologue (FILE *file)
4373 {
4374   int mask, grsave, grsave_prev;
4375 
4376   if (current_frame_info.need_regstk)
4377     fprintf (file, "\t.regstk %d, %d, %d, %d\n",
4378 	     current_frame_info.n_input_regs,
4379 	     current_frame_info.n_local_regs,
4380 	     current_frame_info.n_output_regs,
4381 	     current_frame_info.n_rotate_regs);
4382 
4383   if (ia64_except_unwind_info (&global_options) != UI_TARGET)
4384     return;
4385 
4386   /* Emit the .prologue directive.  */
4387 
4388   mask = 0;
4389   grsave = grsave_prev = 0;
4390   if (current_frame_info.r[reg_save_b0] != 0)
4391     {
4392       mask |= 8;
4393       grsave = grsave_prev = current_frame_info.r[reg_save_b0];
4394     }
4395   if (current_frame_info.r[reg_save_ar_pfs] != 0
4396       && (grsave_prev == 0
4397 	  || current_frame_info.r[reg_save_ar_pfs] == grsave_prev + 1))
4398     {
4399       mask |= 4;
4400       if (grsave_prev == 0)
4401 	grsave = current_frame_info.r[reg_save_ar_pfs];
4402       grsave_prev = current_frame_info.r[reg_save_ar_pfs];
4403     }
4404   if (current_frame_info.r[reg_fp] != 0
4405       && (grsave_prev == 0
4406 	  || current_frame_info.r[reg_fp] == grsave_prev + 1))
4407     {
4408       mask |= 2;
4409       if (grsave_prev == 0)
4410 	grsave = HARD_FRAME_POINTER_REGNUM;
4411       grsave_prev = current_frame_info.r[reg_fp];
4412     }
4413   if (current_frame_info.r[reg_save_pr] != 0
4414       && (grsave_prev == 0
4415 	  || current_frame_info.r[reg_save_pr] == grsave_prev + 1))
4416     {
4417       mask |= 1;
4418       if (grsave_prev == 0)
4419 	grsave = current_frame_info.r[reg_save_pr];
4420     }
4421 
4422   if (mask && TARGET_GNU_AS)
4423     fprintf (file, "\t.prologue %d, %d\n", mask,
4424 	     ia64_dbx_register_number (grsave));
4425   else
4426     fputs ("\t.prologue\n", file);
4427 
4428   /* Emit a .spill directive, if necessary, to relocate the base of
4429      the register spill area.  */
4430   if (current_frame_info.spill_cfa_off != -16)
4431     fprintf (file, "\t.spill %ld\n",
4432 	     (long) (current_frame_info.spill_cfa_off
4433 		     + current_frame_info.spill_size));
4434 }
4435 
4436 /* Emit the .body directive at the scheduled end of the prologue.  */
4437 
4438 static void
ia64_output_function_end_prologue(FILE * file)4439 ia64_output_function_end_prologue (FILE *file)
4440 {
4441   if (ia64_except_unwind_info (&global_options) != UI_TARGET)
4442     return;
4443 
4444   fputs ("\t.body\n", file);
4445 }
4446 
4447 /* Emit the function epilogue.  */
4448 
4449 static void
ia64_output_function_epilogue(FILE *)4450 ia64_output_function_epilogue (FILE *)
4451 {
4452   int i;
4453 
4454   if (current_frame_info.r[reg_fp])
4455     {
4456       const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
4457       reg_names[HARD_FRAME_POINTER_REGNUM]
4458 	= reg_names[current_frame_info.r[reg_fp]];
4459       reg_names[current_frame_info.r[reg_fp]] = tmp;
4460       reg_emitted (reg_fp);
4461     }
4462   if (! TARGET_REG_NAMES)
4463     {
4464       for (i = 0; i < current_frame_info.n_input_regs; i++)
4465 	reg_names[IN_REG (i)] = ia64_input_reg_names[i];
4466       for (i = 0; i < current_frame_info.n_local_regs; i++)
4467 	reg_names[LOC_REG (i)] = ia64_local_reg_names[i];
4468       for (i = 0; i < current_frame_info.n_output_regs; i++)
4469 	reg_names[OUT_REG (i)] = ia64_output_reg_names[i];
4470     }
4471 
4472   current_frame_info.initialized = 0;
4473 }
4474 
4475 int
ia64_dbx_register_number(int regno)4476 ia64_dbx_register_number (int regno)
4477 {
4478   /* In ia64_expand_prologue we quite literally renamed the frame pointer
4479      from its home at loc79 to something inside the register frame.  We
4480      must perform the same renumbering here for the debug info.  */
4481   if (current_frame_info.r[reg_fp])
4482     {
4483       if (regno == HARD_FRAME_POINTER_REGNUM)
4484 	regno = current_frame_info.r[reg_fp];
4485       else if (regno == current_frame_info.r[reg_fp])
4486 	regno = HARD_FRAME_POINTER_REGNUM;
4487     }
4488 
4489   if (IN_REGNO_P (regno))
4490     return 32 + regno - IN_REG (0);
4491   else if (LOC_REGNO_P (regno))
4492     return 32 + current_frame_info.n_input_regs + regno - LOC_REG (0);
4493   else if (OUT_REGNO_P (regno))
4494     return (32 + current_frame_info.n_input_regs
4495 	    + current_frame_info.n_local_regs + regno - OUT_REG (0));
4496   else
4497     return regno;
4498 }
4499 
4500 /* Implement TARGET_TRAMPOLINE_INIT.
4501 
4502    The trampoline should set the static chain pointer to value placed
4503    into the trampoline and should branch to the specified routine.
4504    To make the normal indirect-subroutine calling convention work,
4505    the trampoline must look like a function descriptor; the first
4506    word being the target address and the second being the target's
4507    global pointer.
4508 
4509    We abuse the concept of a global pointer by arranging for it
4510    to point to the data we need to load.  The complete trampoline
4511    has the following form:
4512 
4513 		+-------------------+ \
4514 	TRAMP:	| __ia64_trampoline | |
4515 		+-------------------+  > fake function descriptor
4516 		| TRAMP+16          | |
4517 		+-------------------+ /
4518 		| target descriptor |
4519 		+-------------------+
4520 		| static link	    |
4521 		+-------------------+
4522 */
4523 
4524 static void
ia64_trampoline_init(rtx m_tramp,tree fndecl,rtx static_chain)4525 ia64_trampoline_init (rtx m_tramp, tree fndecl, rtx static_chain)
4526 {
4527   rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
4528   rtx addr, addr_reg, tramp, eight = GEN_INT (8);
4529 
4530   /* The Intel assembler requires that the global __ia64_trampoline symbol
4531      be declared explicitly */
4532   if (!TARGET_GNU_AS)
4533     {
4534       static bool declared_ia64_trampoline = false;
4535 
4536       if (!declared_ia64_trampoline)
4537 	{
4538 	  declared_ia64_trampoline = true;
4539 	  (*targetm.asm_out.globalize_label) (asm_out_file,
4540 					      "__ia64_trampoline");
4541 	}
4542     }
4543 
4544   /* Make sure addresses are Pmode even if we are in ILP32 mode. */
4545   addr = convert_memory_address (Pmode, XEXP (m_tramp, 0));
4546   fnaddr = convert_memory_address (Pmode, fnaddr);
4547   static_chain = convert_memory_address (Pmode, static_chain);
4548 
4549   /* Load up our iterator.  */
4550   addr_reg = copy_to_reg (addr);
4551   m_tramp = adjust_automodify_address (m_tramp, Pmode, addr_reg, 0);
4552 
4553   /* The first two words are the fake descriptor:
4554      __ia64_trampoline, ADDR+16.  */
4555   tramp = gen_rtx_SYMBOL_REF (Pmode, "__ia64_trampoline");
4556   if (TARGET_ABI_OPEN_VMS)
4557     {
4558       /* HP decided to break the ELF ABI on VMS (to deal with an ambiguity
4559 	 in the Macro-32 compiler) and changed the semantics of the LTOFF22
4560 	 relocation against function symbols to make it identical to the
4561 	 LTOFF_FPTR22 relocation.  Emit the latter directly to stay within
4562 	 strict ELF and dereference to get the bare code address.  */
4563       rtx reg = gen_reg_rtx (Pmode);
4564       SYMBOL_REF_FLAGS (tramp) |= SYMBOL_FLAG_FUNCTION;
4565       emit_move_insn (reg, tramp);
4566       emit_move_insn (reg, gen_rtx_MEM (Pmode, reg));
4567       tramp = reg;
4568    }
4569   emit_move_insn (m_tramp, tramp);
4570   emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
4571   m_tramp = adjust_automodify_address (m_tramp, VOIDmode, NULL, 8);
4572 
4573   emit_move_insn (m_tramp, force_reg (Pmode, plus_constant (Pmode, addr, 16)));
4574   emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
4575   m_tramp = adjust_automodify_address (m_tramp, VOIDmode, NULL, 8);
4576 
4577   /* The third word is the target descriptor.  */
4578   emit_move_insn (m_tramp, force_reg (Pmode, fnaddr));
4579   emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
4580   m_tramp = adjust_automodify_address (m_tramp, VOIDmode, NULL, 8);
4581 
4582   /* The fourth word is the static chain.  */
4583   emit_move_insn (m_tramp, static_chain);
4584 }
4585 
4586 /* Do any needed setup for a variadic function.  CUM has not been updated
4587    for the last named argument, which is given by ARG.
4588 
4589    We generate the actual spill instructions during prologue generation.  */
4590 
4591 static void
ia64_setup_incoming_varargs(cumulative_args_t cum,const function_arg_info & arg,int * pretend_size,int second_time ATTRIBUTE_UNUSED)4592 ia64_setup_incoming_varargs (cumulative_args_t cum,
4593 			     const function_arg_info &arg,
4594 			     int *pretend_size,
4595 			     int second_time ATTRIBUTE_UNUSED)
4596 {
4597   CUMULATIVE_ARGS next_cum = *get_cumulative_args (cum);
4598 
4599   /* Skip the current argument.  */
4600   ia64_function_arg_advance (pack_cumulative_args (&next_cum), arg);
4601 
4602   if (next_cum.words < MAX_ARGUMENT_SLOTS)
4603     {
4604       int n = MAX_ARGUMENT_SLOTS - next_cum.words;
4605       *pretend_size = n * UNITS_PER_WORD;
4606       cfun->machine->n_varargs = n;
4607     }
4608 }
4609 
4610 /* Check whether TYPE is a homogeneous floating point aggregate.  If
4611    it is, return the mode of the floating point type that appears
4612    in all leafs.  If it is not, return VOIDmode.
4613 
4614    An aggregate is a homogeneous floating point aggregate is if all
4615    fields/elements in it have the same floating point type (e.g,
4616    SFmode).  128-bit quad-precision floats are excluded.
4617 
4618    Variable sized aggregates should never arrive here, since we should
4619    have already decided to pass them by reference.  Top-level zero-sized
4620    aggregates are excluded because our parallels crash the middle-end.  */
4621 
4622 static machine_mode
hfa_element_mode(const_tree type,bool nested)4623 hfa_element_mode (const_tree type, bool nested)
4624 {
4625   machine_mode element_mode = VOIDmode;
4626   machine_mode mode;
4627   enum tree_code code = TREE_CODE (type);
4628   int know_element_mode = 0;
4629   tree t;
4630 
4631   if (!nested && (!TYPE_SIZE (type) || integer_zerop (TYPE_SIZE (type))))
4632     return VOIDmode;
4633 
4634   switch (code)
4635     {
4636     case VOID_TYPE:	case INTEGER_TYPE:	case ENUMERAL_TYPE:
4637     case BOOLEAN_TYPE:	case POINTER_TYPE:
4638     case OFFSET_TYPE:	case REFERENCE_TYPE:	case METHOD_TYPE:
4639     case LANG_TYPE:		case FUNCTION_TYPE:
4640       return VOIDmode;
4641 
4642       /* Fortran complex types are supposed to be HFAs, so we need to handle
4643 	 gcc's COMPLEX_TYPEs as HFAs.  We need to exclude the integral complex
4644 	 types though.  */
4645     case COMPLEX_TYPE:
4646       if (GET_MODE_CLASS (TYPE_MODE (type)) == MODE_COMPLEX_FLOAT
4647 	  && TYPE_MODE (type) != TCmode)
4648 	return GET_MODE_INNER (TYPE_MODE (type));
4649       else
4650 	return VOIDmode;
4651 
4652     case REAL_TYPE:
4653       /* We want to return VOIDmode for raw REAL_TYPEs, but the actual
4654 	 mode if this is contained within an aggregate.  */
4655       if (nested && TYPE_MODE (type) != TFmode)
4656 	return TYPE_MODE (type);
4657       else
4658 	return VOIDmode;
4659 
4660     case ARRAY_TYPE:
4661       return hfa_element_mode (TREE_TYPE (type), 1);
4662 
4663     case RECORD_TYPE:
4664     case UNION_TYPE:
4665     case QUAL_UNION_TYPE:
4666       for (t = TYPE_FIELDS (type); t; t = DECL_CHAIN (t))
4667 	{
4668 	  if (TREE_CODE (t) != FIELD_DECL || DECL_FIELD_ABI_IGNORED (t))
4669 	    continue;
4670 
4671 	  mode = hfa_element_mode (TREE_TYPE (t), 1);
4672 	  if (know_element_mode)
4673 	    {
4674 	      if (mode != element_mode)
4675 		return VOIDmode;
4676 	    }
4677 	  else if (GET_MODE_CLASS (mode) != MODE_FLOAT)
4678 	    return VOIDmode;
4679 	  else
4680 	    {
4681 	      know_element_mode = 1;
4682 	      element_mode = mode;
4683 	    }
4684 	}
4685       return element_mode;
4686 
4687     default:
4688       /* If we reach here, we probably have some front-end specific type
4689 	 that the backend doesn't know about.  This can happen via the
4690 	 aggregate_value_p call in init_function_start.  All we can do is
4691 	 ignore unknown tree types.  */
4692       return VOIDmode;
4693     }
4694 
4695   return VOIDmode;
4696 }
4697 
4698 /* Return the number of words required to hold a quantity of TYPE and MODE
4699    when passed as an argument.  */
4700 static int
ia64_function_arg_words(const_tree type,machine_mode mode)4701 ia64_function_arg_words (const_tree type, machine_mode mode)
4702 {
4703   int words;
4704 
4705   if (mode == BLKmode)
4706     words = int_size_in_bytes (type);
4707   else
4708     words = GET_MODE_SIZE (mode);
4709 
4710   return (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD;  /* round up */
4711 }
4712 
4713 /* Return the number of registers that should be skipped so the current
4714    argument (described by TYPE and WORDS) will be properly aligned.
4715 
4716    Integer and float arguments larger than 8 bytes start at the next
4717    even boundary.  Aggregates larger than 8 bytes start at the next
4718    even boundary if the aggregate has 16 byte alignment.  Note that
4719    in the 32-bit ABI, TImode and TFmode have only 8-byte alignment
4720    but are still to be aligned in registers.
4721 
4722    ??? The ABI does not specify how to handle aggregates with
4723    alignment from 9 to 15 bytes, or greater than 16.  We handle them
4724    all as if they had 16 byte alignment.  Such aggregates can occur
4725    only if gcc extensions are used.  */
4726 static int
ia64_function_arg_offset(const CUMULATIVE_ARGS * cum,const_tree type,int words)4727 ia64_function_arg_offset (const CUMULATIVE_ARGS *cum,
4728 			  const_tree type, int words)
4729 {
4730   /* No registers are skipped on VMS.  */
4731   if (TARGET_ABI_OPEN_VMS || (cum->words & 1) == 0)
4732     return 0;
4733 
4734   if (type
4735       && TREE_CODE (type) != INTEGER_TYPE
4736       && TREE_CODE (type) != REAL_TYPE)
4737     return TYPE_ALIGN (type) > 8 * BITS_PER_UNIT;
4738   else
4739     return words > 1;
4740 }
4741 
4742 /* Return rtx for register where argument is passed, or zero if it is passed
4743    on the stack.  */
4744 /* ??? 128-bit quad-precision floats are always passed in general
4745    registers.  */
4746 
4747 static rtx
ia64_function_arg_1(cumulative_args_t cum_v,const function_arg_info & arg,bool incoming)4748 ia64_function_arg_1 (cumulative_args_t cum_v, const function_arg_info &arg,
4749 		     bool incoming)
4750 {
4751   const CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
4752 
4753   int basereg = (incoming ? GR_ARG_FIRST : AR_ARG_FIRST);
4754   int words = ia64_function_arg_words (arg.type, arg.mode);
4755   int offset = ia64_function_arg_offset (cum, arg.type, words);
4756   machine_mode hfa_mode = VOIDmode;
4757 
4758   /* For OPEN VMS, emit the instruction setting up the argument register here,
4759      when we know this will be together with the other arguments setup related
4760      insns.  This is not the conceptually best place to do this, but this is
4761      the easiest as we have convenient access to cumulative args info.  */
4762 
4763   if (TARGET_ABI_OPEN_VMS && arg.end_marker_p ())
4764     {
4765       unsigned HOST_WIDE_INT regval = cum->words;
4766       int i;
4767 
4768       for (i = 0; i < 8; i++)
4769 	regval |= ((int) cum->atypes[i]) << (i * 3 + 8);
4770 
4771       emit_move_insn (gen_rtx_REG (DImode, GR_REG (25)),
4772 		      GEN_INT (regval));
4773     }
4774 
4775   /* If all argument slots are used, then it must go on the stack.  */
4776   if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
4777     return 0;
4778 
4779   /* On OpenVMS argument is either in Rn or Fn.  */
4780   if (TARGET_ABI_OPEN_VMS)
4781     {
4782       if (FLOAT_MODE_P (arg.mode))
4783 	return gen_rtx_REG (arg.mode, FR_ARG_FIRST + cum->words);
4784       else
4785 	return gen_rtx_REG (arg.mode, basereg + cum->words);
4786     }
4787 
4788   /* Check for and handle homogeneous FP aggregates.  */
4789   if (arg.type)
4790     hfa_mode = hfa_element_mode (arg.type, 0);
4791 
4792   /* Unnamed prototyped hfas are passed as usual.  Named prototyped hfas
4793      and unprototyped hfas are passed specially.  */
4794   if (hfa_mode != VOIDmode && (! cum->prototype || arg.named))
4795     {
4796       rtx loc[16];
4797       int i = 0;
4798       int fp_regs = cum->fp_regs;
4799       int int_regs = cum->words + offset;
4800       int hfa_size = GET_MODE_SIZE (hfa_mode);
4801       int byte_size;
4802       int args_byte_size;
4803 
4804       /* If prototyped, pass it in FR regs then GR regs.
4805 	 If not prototyped, pass it in both FR and GR regs.
4806 
4807 	 If this is an SFmode aggregate, then it is possible to run out of
4808 	 FR regs while GR regs are still left.  In that case, we pass the
4809 	 remaining part in the GR regs.  */
4810 
4811       /* Fill the FP regs.  We do this always.  We stop if we reach the end
4812 	 of the argument, the last FP register, or the last argument slot.  */
4813 
4814       byte_size = arg.promoted_size_in_bytes ();
4815       args_byte_size = int_regs * UNITS_PER_WORD;
4816       offset = 0;
4817       for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
4818 	      && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD)); i++)
4819 	{
4820 	  loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
4821 				      gen_rtx_REG (hfa_mode, (FR_ARG_FIRST
4822 							      + fp_regs)),
4823 				      GEN_INT (offset));
4824 	  offset += hfa_size;
4825 	  args_byte_size += hfa_size;
4826 	  fp_regs++;
4827 	}
4828 
4829       /* If no prototype, then the whole thing must go in GR regs.  */
4830       if (! cum->prototype)
4831 	offset = 0;
4832       /* If this is an SFmode aggregate, then we might have some left over
4833 	 that needs to go in GR regs.  */
4834       else if (byte_size != offset)
4835 	int_regs += offset / UNITS_PER_WORD;
4836 
4837       /* Fill in the GR regs.  We must use DImode here, not the hfa mode.  */
4838 
4839       for (; offset < byte_size && int_regs < MAX_ARGUMENT_SLOTS; i++)
4840 	{
4841 	  machine_mode gr_mode = DImode;
4842 	  unsigned int gr_size;
4843 
4844 	  /* If we have an odd 4 byte hunk because we ran out of FR regs,
4845 	     then this goes in a GR reg left adjusted/little endian, right
4846 	     adjusted/big endian.  */
4847 	  /* ??? Currently this is handled wrong, because 4-byte hunks are
4848 	     always right adjusted/little endian.  */
4849 	  if (offset & 0x4)
4850 	    gr_mode = SImode;
4851 	  /* If we have an even 4 byte hunk because the aggregate is a
4852 	     multiple of 4 bytes in size, then this goes in a GR reg right
4853 	     adjusted/little endian.  */
4854 	  else if (byte_size - offset == 4)
4855 	    gr_mode = SImode;
4856 
4857 	  loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
4858 				      gen_rtx_REG (gr_mode, (basereg
4859 							     + int_regs)),
4860 				      GEN_INT (offset));
4861 
4862 	  gr_size = GET_MODE_SIZE (gr_mode);
4863 	  offset += gr_size;
4864 	  if (gr_size == UNITS_PER_WORD
4865 	      || (gr_size < UNITS_PER_WORD && offset % UNITS_PER_WORD == 0))
4866 	    int_regs++;
4867 	  else if (gr_size > UNITS_PER_WORD)
4868 	    int_regs += gr_size / UNITS_PER_WORD;
4869 	}
4870       return gen_rtx_PARALLEL (arg.mode, gen_rtvec_v (i, loc));
4871     }
4872 
4873   /* Integral and aggregates go in general registers.  If we have run out of
4874      FR registers, then FP values must also go in general registers.  This can
4875      happen when we have a SFmode HFA.  */
4876   else if (arg.mode == TFmode || arg.mode == TCmode
4877 	   || !FLOAT_MODE_P (arg.mode)
4878 	   || cum->fp_regs == MAX_ARGUMENT_SLOTS)
4879     {
4880       int byte_size = arg.promoted_size_in_bytes ();
4881       if (BYTES_BIG_ENDIAN
4882 	  && (arg.mode == BLKmode || arg.aggregate_type_p ())
4883 	  && byte_size < UNITS_PER_WORD
4884 	  && byte_size > 0)
4885 	{
4886 	  rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
4887 					  gen_rtx_REG (DImode,
4888 						       (basereg + cum->words
4889 							+ offset)),
4890 					  const0_rtx);
4891 	  return gen_rtx_PARALLEL (arg.mode, gen_rtvec (1, gr_reg));
4892 	}
4893       else
4894 	return gen_rtx_REG (arg.mode, basereg + cum->words + offset);
4895 
4896     }
4897 
4898   /* If there is a prototype, then FP values go in a FR register when
4899      named, and in a GR register when unnamed.  */
4900   else if (cum->prototype)
4901     {
4902       if (arg.named)
4903 	return gen_rtx_REG (arg.mode, FR_ARG_FIRST + cum->fp_regs);
4904       /* In big-endian mode, an anonymous SFmode value must be represented
4905          as (parallel:SF [(expr_list (reg:DI n) (const_int 0))]) to force
4906 	 the value into the high half of the general register.  */
4907       else if (BYTES_BIG_ENDIAN && arg.mode == SFmode)
4908 	return gen_rtx_PARALLEL (arg.mode,
4909 		 gen_rtvec (1,
4910                    gen_rtx_EXPR_LIST (VOIDmode,
4911 		     gen_rtx_REG (DImode, basereg + cum->words + offset),
4912 				      const0_rtx)));
4913       else
4914 	return gen_rtx_REG (arg.mode, basereg + cum->words + offset);
4915     }
4916   /* If there is no prototype, then FP values go in both FR and GR
4917      registers.  */
4918   else
4919     {
4920       /* See comment above.  */
4921       machine_mode inner_mode =
4922 	(BYTES_BIG_ENDIAN && arg.mode == SFmode) ? DImode : arg.mode;
4923 
4924       rtx fp_reg = gen_rtx_EXPR_LIST (VOIDmode,
4925 				      gen_rtx_REG (arg.mode, (FR_ARG_FIRST
4926 							  + cum->fp_regs)),
4927 				      const0_rtx);
4928       rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
4929 				      gen_rtx_REG (inner_mode,
4930 						   (basereg + cum->words
4931 						    + offset)),
4932 				      const0_rtx);
4933 
4934       return gen_rtx_PARALLEL (arg.mode, gen_rtvec (2, fp_reg, gr_reg));
4935     }
4936 }
4937 
4938 /* Implement TARGET_FUNCION_ARG target hook.  */
4939 
4940 static rtx
ia64_function_arg(cumulative_args_t cum,const function_arg_info & arg)4941 ia64_function_arg (cumulative_args_t cum, const function_arg_info &arg)
4942 {
4943   return ia64_function_arg_1 (cum, arg, false);
4944 }
4945 
4946 /* Implement TARGET_FUNCION_INCOMING_ARG target hook.  */
4947 
4948 static rtx
ia64_function_incoming_arg(cumulative_args_t cum,const function_arg_info & arg)4949 ia64_function_incoming_arg (cumulative_args_t cum,
4950 			    const function_arg_info &arg)
4951 {
4952   return ia64_function_arg_1 (cum, arg, true);
4953 }
4954 
4955 /* Return number of bytes, at the beginning of the argument, that must be
4956    put in registers.  0 is the argument is entirely in registers or entirely
4957    in memory.  */
4958 
4959 static int
ia64_arg_partial_bytes(cumulative_args_t cum_v,const function_arg_info & arg)4960 ia64_arg_partial_bytes (cumulative_args_t cum_v, const function_arg_info &arg)
4961 {
4962   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
4963 
4964   int words = ia64_function_arg_words (arg.type, arg.mode);
4965   int offset = ia64_function_arg_offset (cum, arg.type, words);
4966 
4967   /* If all argument slots are used, then it must go on the stack.  */
4968   if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
4969     return 0;
4970 
4971   /* It doesn't matter whether the argument goes in FR or GR regs.  If
4972      it fits within the 8 argument slots, then it goes entirely in
4973      registers.  If it extends past the last argument slot, then the rest
4974      goes on the stack.  */
4975 
4976   if (words + cum->words + offset <= MAX_ARGUMENT_SLOTS)
4977     return 0;
4978 
4979   return (MAX_ARGUMENT_SLOTS - cum->words - offset) * UNITS_PER_WORD;
4980 }
4981 
4982 /* Return ivms_arg_type based on machine_mode.  */
4983 
4984 static enum ivms_arg_type
ia64_arg_type(machine_mode mode)4985 ia64_arg_type (machine_mode mode)
4986 {
4987   switch (mode)
4988     {
4989     case E_SFmode:
4990       return FS;
4991     case E_DFmode:
4992       return FT;
4993     default:
4994       return I64;
4995     }
4996 }
4997 
4998 /* Update CUM to point after this argument.  This is patterned after
4999    ia64_function_arg.  */
5000 
5001 static void
ia64_function_arg_advance(cumulative_args_t cum_v,const function_arg_info & arg)5002 ia64_function_arg_advance (cumulative_args_t cum_v,
5003 			   const function_arg_info &arg)
5004 {
5005   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
5006   int words = ia64_function_arg_words (arg.type, arg.mode);
5007   int offset = ia64_function_arg_offset (cum, arg.type, words);
5008   machine_mode hfa_mode = VOIDmode;
5009 
5010   /* If all arg slots are already full, then there is nothing to do.  */
5011   if (cum->words >= MAX_ARGUMENT_SLOTS)
5012     {
5013       cum->words += words + offset;
5014       return;
5015     }
5016 
5017   cum->atypes[cum->words] = ia64_arg_type (arg.mode);
5018   cum->words += words + offset;
5019 
5020   /* On OpenVMS argument is either in Rn or Fn.  */
5021   if (TARGET_ABI_OPEN_VMS)
5022     {
5023       cum->int_regs = cum->words;
5024       cum->fp_regs = cum->words;
5025       return;
5026     }
5027 
5028   /* Check for and handle homogeneous FP aggregates.  */
5029   if (arg.type)
5030     hfa_mode = hfa_element_mode (arg.type, 0);
5031 
5032   /* Unnamed prototyped hfas are passed as usual.  Named prototyped hfas
5033      and unprototyped hfas are passed specially.  */
5034   if (hfa_mode != VOIDmode && (! cum->prototype || arg.named))
5035     {
5036       int fp_regs = cum->fp_regs;
5037       /* This is the original value of cum->words + offset.  */
5038       int int_regs = cum->words - words;
5039       int hfa_size = GET_MODE_SIZE (hfa_mode);
5040       int byte_size;
5041       int args_byte_size;
5042 
5043       /* If prototyped, pass it in FR regs then GR regs.
5044 	 If not prototyped, pass it in both FR and GR regs.
5045 
5046 	 If this is an SFmode aggregate, then it is possible to run out of
5047 	 FR regs while GR regs are still left.  In that case, we pass the
5048 	 remaining part in the GR regs.  */
5049 
5050       /* Fill the FP regs.  We do this always.  We stop if we reach the end
5051 	 of the argument, the last FP register, or the last argument slot.  */
5052 
5053       byte_size = arg.promoted_size_in_bytes ();
5054       args_byte_size = int_regs * UNITS_PER_WORD;
5055       offset = 0;
5056       for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
5057 	      && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD));)
5058 	{
5059 	  offset += hfa_size;
5060 	  args_byte_size += hfa_size;
5061 	  fp_regs++;
5062 	}
5063 
5064       cum->fp_regs = fp_regs;
5065     }
5066 
5067   /* Integral and aggregates go in general registers.  So do TFmode FP values.
5068      If we have run out of FR registers, then other FP values must also go in
5069      general registers.  This can happen when we have a SFmode HFA.  */
5070   else if (arg.mode == TFmode || arg.mode == TCmode
5071            || !FLOAT_MODE_P (arg.mode)
5072 	   || cum->fp_regs == MAX_ARGUMENT_SLOTS)
5073     cum->int_regs = cum->words;
5074 
5075   /* If there is a prototype, then FP values go in a FR register when
5076      named, and in a GR register when unnamed.  */
5077   else if (cum->prototype)
5078     {
5079       if (! arg.named)
5080 	cum->int_regs = cum->words;
5081       else
5082 	/* ??? Complex types should not reach here.  */
5083 	cum->fp_regs
5084 	  += (GET_MODE_CLASS (arg.mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
5085     }
5086   /* If there is no prototype, then FP values go in both FR and GR
5087      registers.  */
5088   else
5089     {
5090       /* ??? Complex types should not reach here.  */
5091       cum->fp_regs
5092 	+= (GET_MODE_CLASS (arg.mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
5093       cum->int_regs = cum->words;
5094     }
5095 }
5096 
5097 /* Arguments with alignment larger than 8 bytes start at the next even
5098    boundary.  On ILP32 HPUX, TFmode arguments start on next even boundary
5099    even though their normal alignment is 8 bytes.  See ia64_function_arg.  */
5100 
5101 static unsigned int
ia64_function_arg_boundary(machine_mode mode,const_tree type)5102 ia64_function_arg_boundary (machine_mode mode, const_tree type)
5103 {
5104   if (mode == TFmode && TARGET_HPUX && TARGET_ILP32)
5105     return PARM_BOUNDARY * 2;
5106 
5107   if (type)
5108     {
5109       if (TYPE_ALIGN (type) > PARM_BOUNDARY)
5110         return PARM_BOUNDARY * 2;
5111       else
5112         return PARM_BOUNDARY;
5113     }
5114 
5115   if (GET_MODE_BITSIZE (mode) > PARM_BOUNDARY)
5116     return PARM_BOUNDARY * 2;
5117   else
5118     return PARM_BOUNDARY;
5119 }
5120 
5121 /* True if it is OK to do sibling call optimization for the specified
5122    call expression EXP.  DECL will be the called function, or NULL if
5123    this is an indirect call.  */
5124 static bool
ia64_function_ok_for_sibcall(tree decl,tree exp ATTRIBUTE_UNUSED)5125 ia64_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
5126 {
5127   /* We can't perform a sibcall if the current function has the syscall_linkage
5128      attribute.  */
5129   if (lookup_attribute ("syscall_linkage",
5130 			TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
5131     return false;
5132 
5133   /* We must always return with our current GP.  This means we can
5134      only sibcall to functions defined in the current module unless
5135      TARGET_CONST_GP is set to true.  */
5136   return (decl && (*targetm.binds_local_p) (decl)) || TARGET_CONST_GP;
5137 }
5138 
5139 
5140 /* Implement va_arg.  */
5141 
5142 static tree
ia64_gimplify_va_arg(tree valist,tree type,gimple_seq * pre_p,gimple_seq * post_p)5143 ia64_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
5144 		      gimple_seq *post_p)
5145 {
5146   /* Variable sized types are passed by reference.  */
5147   if (pass_va_arg_by_reference (type))
5148     {
5149       tree ptrtype = build_pointer_type (type);
5150       tree addr = std_gimplify_va_arg_expr (valist, ptrtype, pre_p, post_p);
5151       return build_va_arg_indirect_ref (addr);
5152     }
5153 
5154   /* Aggregate arguments with alignment larger than 8 bytes start at
5155      the next even boundary.  Integer and floating point arguments
5156      do so if they are larger than 8 bytes, whether or not they are
5157      also aligned larger than 8 bytes.  */
5158   if ((TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == INTEGER_TYPE)
5159       ? int_size_in_bytes (type) > 8 : TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
5160     {
5161       tree t = fold_build_pointer_plus_hwi (valist, 2 * UNITS_PER_WORD - 1);
5162       t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
5163 		  build_int_cst (TREE_TYPE (t), -2 * UNITS_PER_WORD));
5164       gimplify_assign (unshare_expr (valist), t, pre_p);
5165     }
5166 
5167   return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
5168 }
5169 
5170 /* Return 1 if function return value returned in memory.  Return 0 if it is
5171    in a register.  */
5172 
5173 static bool
ia64_return_in_memory(const_tree valtype,const_tree fntype ATTRIBUTE_UNUSED)5174 ia64_return_in_memory (const_tree valtype, const_tree fntype ATTRIBUTE_UNUSED)
5175 {
5176   machine_mode mode;
5177   machine_mode hfa_mode;
5178   HOST_WIDE_INT byte_size;
5179 
5180   mode = TYPE_MODE (valtype);
5181   byte_size = GET_MODE_SIZE (mode);
5182   if (mode == BLKmode)
5183     {
5184       byte_size = int_size_in_bytes (valtype);
5185       if (byte_size < 0)
5186 	return true;
5187     }
5188 
5189   /* Hfa's with up to 8 elements are returned in the FP argument registers.  */
5190 
5191   hfa_mode = hfa_element_mode (valtype, 0);
5192   if (hfa_mode != VOIDmode)
5193     {
5194       int hfa_size = GET_MODE_SIZE (hfa_mode);
5195 
5196       if (byte_size / hfa_size > MAX_ARGUMENT_SLOTS)
5197 	return true;
5198       else
5199 	return false;
5200     }
5201   else if (byte_size > UNITS_PER_WORD * MAX_INT_RETURN_SLOTS)
5202     return true;
5203   else
5204     return false;
5205 }
5206 
5207 /* Return rtx for register that holds the function return value.  */
5208 
5209 static rtx
ia64_function_value(const_tree valtype,const_tree fn_decl_or_type,bool outgoing ATTRIBUTE_UNUSED)5210 ia64_function_value (const_tree valtype,
5211 		     const_tree fn_decl_or_type,
5212 		     bool outgoing ATTRIBUTE_UNUSED)
5213 {
5214   machine_mode mode;
5215   machine_mode hfa_mode;
5216   int unsignedp;
5217   const_tree func = fn_decl_or_type;
5218 
5219   if (fn_decl_or_type
5220       && !DECL_P (fn_decl_or_type))
5221     func = NULL;
5222 
5223   mode = TYPE_MODE (valtype);
5224   hfa_mode = hfa_element_mode (valtype, 0);
5225 
5226   if (hfa_mode != VOIDmode)
5227     {
5228       rtx loc[8];
5229       int i;
5230       int hfa_size;
5231       int byte_size;
5232       int offset;
5233 
5234       hfa_size = GET_MODE_SIZE (hfa_mode);
5235       byte_size = ((mode == BLKmode)
5236 		   ? int_size_in_bytes (valtype) : GET_MODE_SIZE (mode));
5237       offset = 0;
5238       for (i = 0; offset < byte_size; i++)
5239 	{
5240 	  loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
5241 				      gen_rtx_REG (hfa_mode, FR_ARG_FIRST + i),
5242 				      GEN_INT (offset));
5243 	  offset += hfa_size;
5244 	}
5245       return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
5246     }
5247   else if (FLOAT_TYPE_P (valtype) && mode != TFmode && mode != TCmode)
5248     return gen_rtx_REG (mode, FR_ARG_FIRST);
5249   else
5250     {
5251       bool need_parallel = false;
5252 
5253       /* In big-endian mode, we need to manage the layout of aggregates
5254 	 in the registers so that we get the bits properly aligned in
5255 	 the highpart of the registers.  */
5256       if (BYTES_BIG_ENDIAN
5257 	  && (mode == BLKmode || (valtype && AGGREGATE_TYPE_P (valtype))))
5258 	need_parallel = true;
5259 
5260       /* Something like struct S { long double x; char a[0] } is not an
5261 	 HFA structure, and therefore doesn't go in fp registers.  But
5262 	 the middle-end will give it XFmode anyway, and XFmode values
5263 	 don't normally fit in integer registers.  So we need to smuggle
5264 	 the value inside a parallel.  */
5265       else if (mode == XFmode || mode == XCmode || mode == RFmode)
5266 	need_parallel = true;
5267 
5268       if (need_parallel)
5269 	{
5270 	  rtx loc[8];
5271 	  int offset;
5272 	  int bytesize;
5273 	  int i;
5274 
5275 	  offset = 0;
5276 	  bytesize = int_size_in_bytes (valtype);
5277 	  /* An empty PARALLEL is invalid here, but the return value
5278 	     doesn't matter for empty structs.  */
5279 	  if (bytesize == 0)
5280 	    return gen_rtx_REG (mode, GR_RET_FIRST);
5281 	  for (i = 0; offset < bytesize; i++)
5282 	    {
5283 	      loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
5284 					  gen_rtx_REG (DImode,
5285 						       GR_RET_FIRST + i),
5286 					  GEN_INT (offset));
5287 	      offset += UNITS_PER_WORD;
5288 	    }
5289 	  return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
5290 	}
5291 
5292       mode = promote_function_mode (valtype, mode, &unsignedp,
5293                                     func ? TREE_TYPE (func) : NULL_TREE,
5294                                     true);
5295 
5296       return gen_rtx_REG (mode, GR_RET_FIRST);
5297     }
5298 }
5299 
5300 /* Worker function for TARGET_LIBCALL_VALUE.  */
5301 
5302 static rtx
ia64_libcall_value(machine_mode mode,const_rtx fun ATTRIBUTE_UNUSED)5303 ia64_libcall_value (machine_mode mode,
5304 		    const_rtx fun ATTRIBUTE_UNUSED)
5305 {
5306   return gen_rtx_REG (mode,
5307 		      (((GET_MODE_CLASS (mode) == MODE_FLOAT
5308 			 || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
5309 			&& (mode) != TFmode)
5310 		       ? FR_RET_FIRST : GR_RET_FIRST));
5311 }
5312 
5313 /* Worker function for FUNCTION_VALUE_REGNO_P.  */
5314 
5315 static bool
ia64_function_value_regno_p(const unsigned int regno)5316 ia64_function_value_regno_p (const unsigned int regno)
5317 {
5318   return ((regno >= GR_RET_FIRST && regno <= GR_RET_LAST)
5319           || (regno >= FR_RET_FIRST && regno <= FR_RET_LAST));
5320 }
5321 
5322 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
5323    We need to emit DTP-relative relocations.  */
5324 
5325 static void
ia64_output_dwarf_dtprel(FILE * file,int size,rtx x)5326 ia64_output_dwarf_dtprel (FILE *file, int size, rtx x)
5327 {
5328   gcc_assert (size == 4 || size == 8);
5329   if (size == 4)
5330     fputs ("\tdata4.ua\t@dtprel(", file);
5331   else
5332     fputs ("\tdata8.ua\t@dtprel(", file);
5333   output_addr_const (file, x);
5334   fputs (")", file);
5335 }
5336 
5337 /* Print a memory address as an operand to reference that memory location.  */
5338 
5339 /* ??? Do we need this?  It gets used only for 'a' operands.  We could perhaps
5340    also call this from ia64_print_operand for memory addresses.  */
5341 
5342 static void
ia64_print_operand_address(FILE * stream ATTRIBUTE_UNUSED,machine_mode,rtx address ATTRIBUTE_UNUSED)5343 ia64_print_operand_address (FILE * stream ATTRIBUTE_UNUSED,
5344 			    machine_mode /*mode*/,
5345 			    rtx address ATTRIBUTE_UNUSED)
5346 {
5347 }
5348 
5349 /* Print an operand to an assembler instruction.
5350    C	Swap and print a comparison operator.
5351    D	Print an FP comparison operator.
5352    E    Print 32 - constant, for SImode shifts as extract.
5353    e    Print 64 - constant, for DImode rotates.
5354    F	A floating point constant 0.0 emitted as f0, or 1.0 emitted as f1, or
5355         a floating point register emitted normally.
5356    G	A floating point constant.
5357    I	Invert a predicate register by adding 1.
5358    J    Select the proper predicate register for a condition.
5359    j    Select the inverse predicate register for a condition.
5360    O	Append .acq for volatile load.
5361    P	Postincrement of a MEM.
5362    Q	Append .rel for volatile store.
5363    R	Print .s .d or nothing for a single, double or no truncation.
5364    S	Shift amount for shladd instruction.
5365    T	Print an 8-bit sign extended number (K) as a 32-bit unsigned number
5366 	for Intel assembler.
5367    U	Print an 8-bit sign extended number (K) as a 64-bit unsigned number
5368 	for Intel assembler.
5369    X	A pair of floating point registers.
5370    r	Print register name, or constant 0 as r0.  HP compatibility for
5371 	Linux kernel.
5372    v    Print vector constant value as an 8-byte integer value.  */
5373 
5374 static void
ia64_print_operand(FILE * file,rtx x,int code)5375 ia64_print_operand (FILE * file, rtx x, int code)
5376 {
5377   const char *str;
5378 
5379   switch (code)
5380     {
5381     case 0:
5382       /* Handled below.  */
5383       break;
5384 
5385     case 'C':
5386       {
5387 	enum rtx_code c = swap_condition (GET_CODE (x));
5388 	fputs (GET_RTX_NAME (c), file);
5389 	return;
5390       }
5391 
5392     case 'D':
5393       switch (GET_CODE (x))
5394 	{
5395 	case NE:
5396 	  str = "neq";
5397 	  break;
5398 	case UNORDERED:
5399 	  str = "unord";
5400 	  break;
5401 	case ORDERED:
5402 	  str = "ord";
5403 	  break;
5404 	case UNLT:
5405 	  str = "nge";
5406 	  break;
5407 	case UNLE:
5408 	  str = "ngt";
5409 	  break;
5410 	case UNGT:
5411 	  str = "nle";
5412 	  break;
5413 	case UNGE:
5414 	  str = "nlt";
5415 	  break;
5416 	case UNEQ:
5417 	case LTGT:
5418 	  gcc_unreachable ();
5419 	default:
5420 	  str = GET_RTX_NAME (GET_CODE (x));
5421 	  break;
5422 	}
5423       fputs (str, file);
5424       return;
5425 
5426     case 'E':
5427       fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - INTVAL (x));
5428       return;
5429 
5430     case 'e':
5431       fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - INTVAL (x));
5432       return;
5433 
5434     case 'F':
5435       if (x == CONST0_RTX (GET_MODE (x)))
5436 	str = reg_names [FR_REG (0)];
5437       else if (x == CONST1_RTX (GET_MODE (x)))
5438 	str = reg_names [FR_REG (1)];
5439       else
5440 	{
5441 	  gcc_assert (GET_CODE (x) == REG);
5442 	  str = reg_names [REGNO (x)];
5443 	}
5444       fputs (str, file);
5445       return;
5446 
5447     case 'G':
5448       {
5449 	long val[4];
5450 	real_to_target (val, CONST_DOUBLE_REAL_VALUE (x), GET_MODE (x));
5451 	if (GET_MODE (x) == SFmode)
5452 	  fprintf (file, "0x%08lx", val[0] & 0xffffffff);
5453 	else if (GET_MODE (x) == DFmode)
5454 	  fprintf (file, "0x%08lx%08lx", (WORDS_BIG_ENDIAN ? val[0] : val[1])
5455 					  & 0xffffffff,
5456 					 (WORDS_BIG_ENDIAN ? val[1] : val[0])
5457 					  & 0xffffffff);
5458 	else
5459 	  output_operand_lossage ("invalid %%G mode");
5460       }
5461       return;
5462 
5463     case 'I':
5464       fputs (reg_names [REGNO (x) + 1], file);
5465       return;
5466 
5467     case 'J':
5468     case 'j':
5469       {
5470 	unsigned int regno = REGNO (XEXP (x, 0));
5471 	if (GET_CODE (x) == EQ)
5472 	  regno += 1;
5473 	if (code == 'j')
5474 	  regno ^= 1;
5475         fputs (reg_names [regno], file);
5476       }
5477       return;
5478 
5479     case 'O':
5480       if (MEM_VOLATILE_P (x))
5481 	fputs(".acq", file);
5482       return;
5483 
5484     case 'P':
5485       {
5486 	HOST_WIDE_INT value;
5487 
5488 	switch (GET_CODE (XEXP (x, 0)))
5489 	  {
5490 	  default:
5491 	    return;
5492 
5493 	  case POST_MODIFY:
5494 	    x = XEXP (XEXP (XEXP (x, 0), 1), 1);
5495 	    if (GET_CODE (x) == CONST_INT)
5496 	      value = INTVAL (x);
5497 	    else
5498 	      {
5499 		gcc_assert (GET_CODE (x) == REG);
5500 		fprintf (file, ", %s", reg_names[REGNO (x)]);
5501 		return;
5502 	      }
5503 	    break;
5504 
5505 	  case POST_INC:
5506 	    value = GET_MODE_SIZE (GET_MODE (x));
5507 	    break;
5508 
5509 	  case POST_DEC:
5510 	    value = - (HOST_WIDE_INT) GET_MODE_SIZE (GET_MODE (x));
5511 	    break;
5512 	  }
5513 
5514 	fprintf (file, ", " HOST_WIDE_INT_PRINT_DEC, value);
5515 	return;
5516       }
5517 
5518     case 'Q':
5519       if (MEM_VOLATILE_P (x))
5520 	fputs(".rel", file);
5521       return;
5522 
5523     case 'R':
5524       if (x == CONST0_RTX (GET_MODE (x)))
5525 	fputs(".s", file);
5526       else if (x == CONST1_RTX (GET_MODE (x)))
5527 	fputs(".d", file);
5528       else if (x == CONST2_RTX (GET_MODE (x)))
5529 	;
5530       else
5531 	output_operand_lossage ("invalid %%R value");
5532       return;
5533 
5534     case 'S':
5535       fprintf (file, "%d", exact_log2 (INTVAL (x)));
5536       return;
5537 
5538     case 'T':
5539       if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
5540 	{
5541 	  fprintf (file, "0x%x", (int) INTVAL (x) & 0xffffffff);
5542 	  return;
5543 	}
5544       break;
5545 
5546     case 'U':
5547       if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
5548 	{
5549 	  const char *prefix = "0x";
5550 	  if (INTVAL (x) & 0x80000000)
5551 	    {
5552 	      fprintf (file, "0xffffffff");
5553 	      prefix = "";
5554 	    }
5555 	  fprintf (file, "%s%x", prefix, (int) INTVAL (x) & 0xffffffff);
5556 	  return;
5557 	}
5558       break;
5559 
5560     case 'X':
5561       {
5562 	unsigned int regno = REGNO (x);
5563 	fprintf (file, "%s, %s", reg_names [regno], reg_names [regno + 1]);
5564       }
5565       return;
5566 
5567     case 'r':
5568       /* If this operand is the constant zero, write it as register zero.
5569 	 Any register, zero, or CONST_INT value is OK here.  */
5570       if (GET_CODE (x) == REG)
5571 	fputs (reg_names[REGNO (x)], file);
5572       else if (x == CONST0_RTX (GET_MODE (x)))
5573 	fputs ("r0", file);
5574       else if (GET_CODE (x) == CONST_INT)
5575 	output_addr_const (file, x);
5576       else
5577 	output_operand_lossage ("invalid %%r value");
5578       return;
5579 
5580     case 'v':
5581       gcc_assert (GET_CODE (x) == CONST_VECTOR);
5582       x = simplify_subreg (DImode, x, GET_MODE (x), 0);
5583       break;
5584 
5585     case '+':
5586       {
5587 	const char *which;
5588 
5589 	/* For conditional branches, returns or calls, substitute
5590 	   sptk, dptk, dpnt, or spnt for %s.  */
5591 	x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
5592 	if (x)
5593 	  {
5594 	    int pred_val = profile_probability::from_reg_br_prob_note
5595 				 (XINT (x, 0)).to_reg_br_prob_base ();
5596 
5597 	    /* Guess top and bottom 10% statically predicted.  */
5598 	    if (pred_val < REG_BR_PROB_BASE / 50
5599 		&& br_prob_note_reliable_p (x))
5600 	      which = ".spnt";
5601 	    else if (pred_val < REG_BR_PROB_BASE / 2)
5602 	      which = ".dpnt";
5603 	    else if (pred_val < REG_BR_PROB_BASE / 100 * 98
5604 		     || !br_prob_note_reliable_p (x))
5605 	      which = ".dptk";
5606 	    else
5607 	      which = ".sptk";
5608 	  }
5609 	else if (CALL_P (current_output_insn))
5610 	  which = ".sptk";
5611 	else
5612 	  which = ".dptk";
5613 
5614 	fputs (which, file);
5615 	return;
5616       }
5617 
5618     case ',':
5619       x = current_insn_predicate;
5620       if (x)
5621 	{
5622 	  unsigned int regno = REGNO (XEXP (x, 0));
5623 	  if (GET_CODE (x) == EQ)
5624 	    regno += 1;
5625           fprintf (file, "(%s) ", reg_names [regno]);
5626 	}
5627       return;
5628 
5629     default:
5630       output_operand_lossage ("ia64_print_operand: unknown code");
5631       return;
5632     }
5633 
5634   switch (GET_CODE (x))
5635     {
5636       /* This happens for the spill/restore instructions.  */
5637     case POST_INC:
5638     case POST_DEC:
5639     case POST_MODIFY:
5640       x = XEXP (x, 0);
5641       /* fall through */
5642 
5643     case REG:
5644       fputs (reg_names [REGNO (x)], file);
5645       break;
5646 
5647     case MEM:
5648       {
5649 	rtx addr = XEXP (x, 0);
5650 	if (GET_RTX_CLASS (GET_CODE (addr)) == RTX_AUTOINC)
5651 	  addr = XEXP (addr, 0);
5652 	fprintf (file, "[%s]", reg_names [REGNO (addr)]);
5653 	break;
5654       }
5655 
5656     default:
5657       output_addr_const (file, x);
5658       break;
5659     }
5660 
5661   return;
5662 }
5663 
5664 /* Worker function for TARGET_PRINT_OPERAND_PUNCT_VALID_P.  */
5665 
5666 static bool
ia64_print_operand_punct_valid_p(unsigned char code)5667 ia64_print_operand_punct_valid_p (unsigned char code)
5668 {
5669   return (code == '+' || code == ',');
5670 }
5671 
5672 /* Compute a (partial) cost for rtx X.  Return true if the complete
5673    cost has been computed, and false if subexpressions should be
5674    scanned.  In either case, *TOTAL contains the cost result.  */
5675 /* ??? This is incomplete.  */
5676 
5677 static bool
ia64_rtx_costs(rtx x,machine_mode mode,int outer_code,int opno ATTRIBUTE_UNUSED,int * total,bool speed ATTRIBUTE_UNUSED)5678 ia64_rtx_costs (rtx x, machine_mode mode, int outer_code,
5679 		int opno ATTRIBUTE_UNUSED,
5680 		int *total, bool speed ATTRIBUTE_UNUSED)
5681 {
5682   int code = GET_CODE (x);
5683 
5684   switch (code)
5685     {
5686     case CONST_INT:
5687       switch (outer_code)
5688         {
5689         case SET:
5690 	  *total = satisfies_constraint_J (x) ? 0 : COSTS_N_INSNS (1);
5691 	  return true;
5692         case PLUS:
5693 	  if (satisfies_constraint_I (x))
5694 	    *total = 0;
5695 	  else if (satisfies_constraint_J (x))
5696 	    *total = 1;
5697 	  else
5698 	    *total = COSTS_N_INSNS (1);
5699 	  return true;
5700         default:
5701 	  if (satisfies_constraint_K (x) || satisfies_constraint_L (x))
5702 	    *total = 0;
5703 	  else
5704 	    *total = COSTS_N_INSNS (1);
5705 	  return true;
5706 	}
5707 
5708     case CONST_DOUBLE:
5709       *total = COSTS_N_INSNS (1);
5710       return true;
5711 
5712     case CONST:
5713     case SYMBOL_REF:
5714     case LABEL_REF:
5715       *total = COSTS_N_INSNS (3);
5716       return true;
5717 
5718     case FMA:
5719       *total = COSTS_N_INSNS (4);
5720       return true;
5721 
5722     case MULT:
5723       /* For multiplies wider than HImode, we have to go to the FPU,
5724          which normally involves copies.  Plus there's the latency
5725          of the multiply itself, and the latency of the instructions to
5726          transfer integer regs to FP regs.  */
5727       if (FLOAT_MODE_P (mode))
5728 	*total = COSTS_N_INSNS (4);
5729       else if (GET_MODE_SIZE (mode) > 2)
5730         *total = COSTS_N_INSNS (10);
5731       else
5732 	*total = COSTS_N_INSNS (2);
5733       return true;
5734 
5735     case PLUS:
5736     case MINUS:
5737       if (FLOAT_MODE_P (mode))
5738 	{
5739 	  *total = COSTS_N_INSNS (4);
5740 	  return true;
5741 	}
5742       /* FALLTHRU */
5743 
5744     case ASHIFT:
5745     case ASHIFTRT:
5746     case LSHIFTRT:
5747       *total = COSTS_N_INSNS (1);
5748       return true;
5749 
5750     case DIV:
5751     case UDIV:
5752     case MOD:
5753     case UMOD:
5754       /* We make divide expensive, so that divide-by-constant will be
5755          optimized to a multiply.  */
5756       *total = COSTS_N_INSNS (60);
5757       return true;
5758 
5759     default:
5760       return false;
5761     }
5762 }
5763 
5764 /* Calculate the cost of moving data from a register in class FROM to
5765    one in class TO, using MODE.  */
5766 
5767 static int
ia64_register_move_cost(machine_mode mode,reg_class_t from,reg_class_t to)5768 ia64_register_move_cost (machine_mode mode, reg_class_t from,
5769 			 reg_class_t to)
5770 {
5771   /* ADDL_REGS is the same as GR_REGS for movement purposes.  */
5772   if (to == ADDL_REGS)
5773     to = GR_REGS;
5774   if (from == ADDL_REGS)
5775     from = GR_REGS;
5776 
5777   /* All costs are symmetric, so reduce cases by putting the
5778      lower number class as the destination.  */
5779   if (from < to)
5780     {
5781       reg_class_t tmp = to;
5782       to = from, from = tmp;
5783     }
5784 
5785   /* Moving from FR<->GR in XFmode must be more expensive than 2,
5786      so that we get secondary memory reloads.  Between FR_REGS,
5787      we have to make this at least as expensive as memory_move_cost
5788      to avoid spectacularly poor register class preferencing.  */
5789   if (mode == XFmode || mode == RFmode)
5790     {
5791       if (to != GR_REGS || from != GR_REGS)
5792         return memory_move_cost (mode, to, false);
5793       else
5794 	return 3;
5795     }
5796 
5797   switch (to)
5798     {
5799     case PR_REGS:
5800       /* Moving between PR registers takes two insns.  */
5801       if (from == PR_REGS)
5802 	return 3;
5803       /* Moving between PR and anything but GR is impossible.  */
5804       if (from != GR_REGS)
5805 	return memory_move_cost (mode, to, false);
5806       break;
5807 
5808     case BR_REGS:
5809       /* Moving between BR and anything but GR is impossible.  */
5810       if (from != GR_REGS && from != GR_AND_BR_REGS)
5811 	return memory_move_cost (mode, to, false);
5812       break;
5813 
5814     case AR_I_REGS:
5815     case AR_M_REGS:
5816       /* Moving between AR and anything but GR is impossible.  */
5817       if (from != GR_REGS)
5818 	return memory_move_cost (mode, to, false);
5819       break;
5820 
5821     case GR_REGS:
5822     case FR_REGS:
5823     case FP_REGS:
5824     case GR_AND_FR_REGS:
5825     case GR_AND_BR_REGS:
5826     case ALL_REGS:
5827       break;
5828 
5829     default:
5830       gcc_unreachable ();
5831     }
5832 
5833   return 2;
5834 }
5835 
5836 /* Calculate the cost of moving data of MODE from a register to or from
5837    memory.  */
5838 
5839 static int
ia64_memory_move_cost(machine_mode mode ATTRIBUTE_UNUSED,reg_class_t rclass,bool in ATTRIBUTE_UNUSED)5840 ia64_memory_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
5841 		       reg_class_t rclass,
5842 		       bool in ATTRIBUTE_UNUSED)
5843 {
5844   if (rclass == GENERAL_REGS
5845       || rclass == FR_REGS
5846       || rclass == FP_REGS
5847       || rclass == GR_AND_FR_REGS)
5848     return 4;
5849   else
5850     return 10;
5851 }
5852 
5853 /* Implement TARGET_PREFERRED_RELOAD_CLASS.  Place additional restrictions
5854    on RCLASS to use when copying X into that class.  */
5855 
5856 static reg_class_t
ia64_preferred_reload_class(rtx x,reg_class_t rclass)5857 ia64_preferred_reload_class (rtx x, reg_class_t rclass)
5858 {
5859   switch (rclass)
5860     {
5861     case FR_REGS:
5862     case FP_REGS:
5863       /* Don't allow volatile mem reloads into floating point registers.
5864 	 This is defined to force reload to choose the r/m case instead
5865 	 of the f/f case when reloading (set (reg fX) (mem/v)).  */
5866       if (MEM_P (x) && MEM_VOLATILE_P (x))
5867 	return NO_REGS;
5868 
5869       /* Force all unrecognized constants into the constant pool.  */
5870       if (CONSTANT_P (x))
5871 	return NO_REGS;
5872       break;
5873 
5874     case AR_M_REGS:
5875     case AR_I_REGS:
5876       if (!OBJECT_P (x))
5877 	return NO_REGS;
5878       break;
5879 
5880     default:
5881       break;
5882     }
5883 
5884   return rclass;
5885 }
5886 
5887 /* This function returns the register class required for a secondary
5888    register when copying between one of the registers in RCLASS, and X,
5889    using MODE.  A return value of NO_REGS means that no secondary register
5890    is required.  */
5891 
5892 enum reg_class
ia64_secondary_reload_class(enum reg_class rclass,machine_mode mode ATTRIBUTE_UNUSED,rtx x)5893 ia64_secondary_reload_class (enum reg_class rclass,
5894 			     machine_mode mode ATTRIBUTE_UNUSED, rtx x)
5895 {
5896   int regno = -1;
5897 
5898   if (GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
5899     regno = true_regnum (x);
5900 
5901   switch (rclass)
5902     {
5903     case BR_REGS:
5904     case AR_M_REGS:
5905     case AR_I_REGS:
5906       /* ??? BR<->BR register copies can happen due to a bad gcse/cse/global
5907 	 interaction.  We end up with two pseudos with overlapping lifetimes
5908 	 both of which are equiv to the same constant, and both which need
5909 	 to be in BR_REGS.  This seems to be a cse bug.  cse_basic_block_end
5910 	 changes depending on the path length, which means the qty_first_reg
5911 	 check in make_regs_eqv can give different answers at different times.
5912 	 At some point I'll probably need a reload_indi pattern to handle
5913 	 this.
5914 
5915 	 We can also get GR_AND_FR_REGS to BR_REGS/AR_REGS copies, where we
5916 	 wound up with a FP register from GR_AND_FR_REGS.  Extend that to all
5917 	 non-general registers for good measure.  */
5918       if (regno >= 0 && ! GENERAL_REGNO_P (regno))
5919 	return GR_REGS;
5920 
5921       /* This is needed if a pseudo used as a call_operand gets spilled to a
5922 	 stack slot.  */
5923       if (GET_CODE (x) == MEM)
5924 	return GR_REGS;
5925       break;
5926 
5927     case FR_REGS:
5928     case FP_REGS:
5929       /* Need to go through general registers to get to other class regs.  */
5930       if (regno >= 0 && ! (FR_REGNO_P (regno) || GENERAL_REGNO_P (regno)))
5931 	return GR_REGS;
5932 
5933       /* This can happen when a paradoxical subreg is an operand to the
5934 	 muldi3 pattern.  */
5935       /* ??? This shouldn't be necessary after instruction scheduling is
5936 	 enabled, because paradoxical subregs are not accepted by
5937 	 register_operand when INSN_SCHEDULING is defined.  Or alternatively,
5938 	 stop the paradoxical subreg stupidity in the *_operand functions
5939 	 in recog.c.  */
5940       if (GET_CODE (x) == MEM
5941 	  && (GET_MODE (x) == SImode || GET_MODE (x) == HImode
5942 	      || GET_MODE (x) == QImode))
5943 	return GR_REGS;
5944 
5945       /* This can happen because of the ior/and/etc patterns that accept FP
5946 	 registers as operands.  If the third operand is a constant, then it
5947 	 needs to be reloaded into a FP register.  */
5948       if (GET_CODE (x) == CONST_INT)
5949 	return GR_REGS;
5950 
5951       /* This can happen because of register elimination in a muldi3 insn.
5952 	 E.g. `26107 * (unsigned long)&u'.  */
5953       if (GET_CODE (x) == PLUS)
5954 	return GR_REGS;
5955       break;
5956 
5957     case PR_REGS:
5958       /* ??? This happens if we cse/gcse a BImode value across a call,
5959 	 and the function has a nonlocal goto.  This is because global
5960 	 does not allocate call crossing pseudos to hard registers when
5961 	 crtl->has_nonlocal_goto is true.  This is relatively
5962 	 common for C++ programs that use exceptions.  To reproduce,
5963 	 return NO_REGS and compile libstdc++.  */
5964       if (GET_CODE (x) == MEM)
5965 	return GR_REGS;
5966 
5967       /* This can happen when we take a BImode subreg of a DImode value,
5968 	 and that DImode value winds up in some non-GR register.  */
5969       if (regno >= 0 && ! GENERAL_REGNO_P (regno) && ! PR_REGNO_P (regno))
5970 	return GR_REGS;
5971       break;
5972 
5973     default:
5974       break;
5975     }
5976 
5977   return NO_REGS;
5978 }
5979 
5980 
5981 /* Implement targetm.unspec_may_trap_p hook.  */
5982 static int
ia64_unspec_may_trap_p(const_rtx x,unsigned flags)5983 ia64_unspec_may_trap_p (const_rtx x, unsigned flags)
5984 {
5985   switch (XINT (x, 1))
5986     {
5987     case UNSPEC_LDA:
5988     case UNSPEC_LDS:
5989     case UNSPEC_LDSA:
5990     case UNSPEC_LDCCLR:
5991     case UNSPEC_CHKACLR:
5992     case UNSPEC_CHKS:
5993       /* These unspecs are just wrappers.  */
5994       return may_trap_p_1 (XVECEXP (x, 0, 0), flags);
5995     }
5996 
5997   return default_unspec_may_trap_p (x, flags);
5998 }
5999 
6000 
6001 /* Parse the -mfixed-range= option string.  */
6002 
6003 static void
fix_range(const char * const_str)6004 fix_range (const char *const_str)
6005 {
6006   int i, first, last;
6007   char *str, *dash, *comma;
6008 
6009   /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
6010      REG2 are either register names or register numbers.  The effect
6011      of this option is to mark the registers in the range from REG1 to
6012      REG2 as ``fixed'' so they won't be used by the compiler.  This is
6013      used, e.g., to ensure that kernel mode code doesn't use f32-f127.  */
6014 
6015   i = strlen (const_str);
6016   str = (char *) alloca (i + 1);
6017   memcpy (str, const_str, i + 1);
6018 
6019   while (1)
6020     {
6021       dash = strchr (str, '-');
6022       if (!dash)
6023 	{
6024 	  warning (0, "value of %<-mfixed-range%> must have form REG1-REG2");
6025 	  return;
6026 	}
6027       *dash = '\0';
6028 
6029       comma = strchr (dash + 1, ',');
6030       if (comma)
6031 	*comma = '\0';
6032 
6033       first = decode_reg_name (str);
6034       if (first < 0)
6035 	{
6036 	  warning (0, "unknown register name: %s", str);
6037 	  return;
6038 	}
6039 
6040       last = decode_reg_name (dash + 1);
6041       if (last < 0)
6042 	{
6043 	  warning (0, "unknown register name: %s", dash + 1);
6044 	  return;
6045 	}
6046 
6047       *dash = '-';
6048 
6049       if (first > last)
6050 	{
6051 	  warning (0, "%s-%s is an empty range", str, dash + 1);
6052 	  return;
6053 	}
6054 
6055       for (i = first; i <= last; ++i)
6056 	fixed_regs[i] = 1;
6057 
6058       if (!comma)
6059 	break;
6060 
6061       *comma = ',';
6062       str = comma + 1;
6063     }
6064 }
6065 
6066 /* Implement TARGET_OPTION_OVERRIDE.  */
6067 
6068 static void
ia64_option_override(void)6069 ia64_option_override (void)
6070 {
6071   unsigned int i;
6072   cl_deferred_option *opt;
6073   vec<cl_deferred_option> *v
6074     = (vec<cl_deferred_option> *) ia64_deferred_options;
6075 
6076   if (v)
6077     FOR_EACH_VEC_ELT (*v, i, opt)
6078       {
6079 	switch (opt->opt_index)
6080 	  {
6081 	  case OPT_mfixed_range_:
6082 	    fix_range (opt->arg);
6083 	    break;
6084 
6085 	  default:
6086 	    gcc_unreachable ();
6087 	  }
6088       }
6089 
6090   if (TARGET_AUTO_PIC)
6091     target_flags |= MASK_CONST_GP;
6092 
6093   /* Numerous experiment shows that IRA based loop pressure
6094      calculation works better for RTL loop invariant motion on targets
6095      with enough (>= 32) registers.  It is an expensive optimization.
6096      So it is on only for peak performance.  */
6097   if (optimize >= 3)
6098     flag_ira_loop_pressure = 1;
6099 
6100 
6101   ia64_section_threshold = (global_options_set.x_g_switch_value
6102 			    ? g_switch_value
6103 			    : IA64_DEFAULT_GVALUE);
6104 
6105   init_machine_status = ia64_init_machine_status;
6106 
6107   if (flag_align_functions && !str_align_functions)
6108     str_align_functions = "64";
6109   if (flag_align_loops && !str_align_loops)
6110     str_align_loops = "32";
6111   if (TARGET_ABI_OPEN_VMS)
6112     flag_no_common = 1;
6113 
6114   ia64_override_options_after_change();
6115 }
6116 
6117 /* Implement targetm.override_options_after_change.  */
6118 
6119 static void
ia64_override_options_after_change(void)6120 ia64_override_options_after_change (void)
6121 {
6122   if (optimize >= 3
6123       && !global_options_set.x_flag_selective_scheduling
6124       && !global_options_set.x_flag_selective_scheduling2)
6125     {
6126       flag_selective_scheduling2 = 1;
6127       flag_sel_sched_pipelining = 1;
6128     }
6129   if (mflag_sched_control_spec == 2)
6130     {
6131       /* Control speculation is on by default for the selective scheduler,
6132          but not for the Haifa scheduler.  */
6133       mflag_sched_control_spec = flag_selective_scheduling2 ? 1 : 0;
6134     }
6135   if (flag_sel_sched_pipelining && flag_auto_inc_dec)
6136     {
6137       /* FIXME: remove this when we'd implement breaking autoinsns as
6138          a transformation.  */
6139       flag_auto_inc_dec = 0;
6140     }
6141 }
6142 
6143 /* Initialize the record of emitted frame related registers.  */
6144 
ia64_init_expanders(void)6145 void ia64_init_expanders (void)
6146 {
6147   memset (&emitted_frame_related_regs, 0, sizeof (emitted_frame_related_regs));
6148 }
6149 
6150 static struct machine_function *
ia64_init_machine_status(void)6151 ia64_init_machine_status (void)
6152 {
6153   return ggc_cleared_alloc<machine_function> ();
6154 }
6155 
6156 static enum attr_itanium_class ia64_safe_itanium_class (rtx_insn *);
6157 static enum attr_type ia64_safe_type (rtx_insn *);
6158 
6159 static enum attr_itanium_class
ia64_safe_itanium_class(rtx_insn * insn)6160 ia64_safe_itanium_class (rtx_insn *insn)
6161 {
6162   if (recog_memoized (insn) >= 0)
6163     return get_attr_itanium_class (insn);
6164   else if (DEBUG_INSN_P (insn))
6165     return ITANIUM_CLASS_IGNORE;
6166   else
6167     return ITANIUM_CLASS_UNKNOWN;
6168 }
6169 
6170 static enum attr_type
ia64_safe_type(rtx_insn * insn)6171 ia64_safe_type (rtx_insn *insn)
6172 {
6173   if (recog_memoized (insn) >= 0)
6174     return get_attr_type (insn);
6175   else
6176     return TYPE_UNKNOWN;
6177 }
6178 
6179 /* The following collection of routines emit instruction group stop bits as
6180    necessary to avoid dependencies.  */
6181 
6182 /* Need to track some additional registers as far as serialization is
6183    concerned so we can properly handle br.call and br.ret.  We could
6184    make these registers visible to gcc, but since these registers are
6185    never explicitly used in gcc generated code, it seems wasteful to
6186    do so (plus it would make the call and return patterns needlessly
6187    complex).  */
6188 #define REG_RP		(BR_REG (0))
6189 #define REG_AR_CFM	(FIRST_PSEUDO_REGISTER + 1)
6190 /* This is used for volatile asms which may require a stop bit immediately
6191    before and after them.  */
6192 #define REG_VOLATILE	(FIRST_PSEUDO_REGISTER + 2)
6193 #define AR_UNAT_BIT_0	(FIRST_PSEUDO_REGISTER + 3)
6194 #define NUM_REGS	(AR_UNAT_BIT_0 + 64)
6195 
6196 /* For each register, we keep track of how it has been written in the
6197    current instruction group.
6198 
6199    If a register is written unconditionally (no qualifying predicate),
6200    WRITE_COUNT is set to 2 and FIRST_PRED is ignored.
6201 
6202    If a register is written if its qualifying predicate P is true, we
6203    set WRITE_COUNT to 1 and FIRST_PRED to P.  Later on, the same register
6204    may be written again by the complement of P (P^1) and when this happens,
6205    WRITE_COUNT gets set to 2.
6206 
6207    The result of this is that whenever an insn attempts to write a register
6208    whose WRITE_COUNT is two, we need to issue an insn group barrier first.
6209 
6210    If a predicate register is written by a floating-point insn, we set
6211    WRITTEN_BY_FP to true.
6212 
6213    If a predicate register is written by an AND.ORCM we set WRITTEN_BY_AND
6214    to true; if it was written by an OR.ANDCM we set WRITTEN_BY_OR to true.  */
6215 
6216 #if GCC_VERSION >= 4000
6217 #define RWS_FIELD_TYPE __extension__ unsigned short
6218 #else
6219 #define RWS_FIELD_TYPE unsigned int
6220 #endif
6221 struct reg_write_state
6222 {
6223   RWS_FIELD_TYPE write_count : 2;
6224   RWS_FIELD_TYPE first_pred : 10;
6225   RWS_FIELD_TYPE written_by_fp : 1;
6226   RWS_FIELD_TYPE written_by_and : 1;
6227   RWS_FIELD_TYPE written_by_or : 1;
6228 };
6229 
6230 /* Cumulative info for the current instruction group.  */
6231 struct reg_write_state rws_sum[NUM_REGS];
6232 #if CHECKING_P
6233 /* Bitmap whether a register has been written in the current insn.  */
6234 unsigned HOST_WIDEST_FAST_INT rws_insn
6235   [(NUM_REGS + HOST_BITS_PER_WIDEST_FAST_INT - 1)
6236    / HOST_BITS_PER_WIDEST_FAST_INT];
6237 
6238 static inline void
rws_insn_set(unsigned int regno)6239 rws_insn_set (unsigned int regno)
6240 {
6241   unsigned int elt = regno / HOST_BITS_PER_WIDEST_FAST_INT;
6242   unsigned int bit = regno % HOST_BITS_PER_WIDEST_FAST_INT;
6243   gcc_assert (!((rws_insn[elt] >> bit) & 1));
6244   rws_insn[elt] |= (unsigned HOST_WIDEST_FAST_INT) 1 << bit;
6245 }
6246 
6247 static inline int
rws_insn_test(unsigned int regno)6248 rws_insn_test (unsigned int regno)
6249 {
6250   unsigned int elt = regno / HOST_BITS_PER_WIDEST_FAST_INT;
6251   unsigned int bit = regno % HOST_BITS_PER_WIDEST_FAST_INT;
6252   return (rws_insn[elt] >> bit) & 1;
6253 }
6254 #else
6255 /* When not checking, track just REG_AR_CFM and REG_VOLATILE.  */
6256 unsigned char rws_insn[2];
6257 
6258 static inline void
rws_insn_set(int regno)6259 rws_insn_set (int regno)
6260 {
6261   if (regno == REG_AR_CFM)
6262     rws_insn[0] = 1;
6263   else if (regno == REG_VOLATILE)
6264     rws_insn[1] = 1;
6265 }
6266 
6267 static inline int
rws_insn_test(int regno)6268 rws_insn_test (int regno)
6269 {
6270   if (regno == REG_AR_CFM)
6271     return rws_insn[0];
6272   if (regno == REG_VOLATILE)
6273     return rws_insn[1];
6274   return 0;
6275 }
6276 #endif
6277 
6278 /* Indicates whether this is the first instruction after a stop bit,
6279    in which case we don't need another stop bit.  Without this,
6280    ia64_variable_issue will die when scheduling an alloc.  */
6281 static int first_instruction;
6282 
6283 /* Misc flags needed to compute RAW/WAW dependencies while we are traversing
6284    RTL for one instruction.  */
6285 struct reg_flags
6286 {
6287   unsigned int is_write : 1;	/* Is register being written?  */
6288   unsigned int is_fp : 1;	/* Is register used as part of an fp op?  */
6289   unsigned int is_branch : 1;	/* Is register used as part of a branch?  */
6290   unsigned int is_and : 1;	/* Is register used as part of and.orcm?  */
6291   unsigned int is_or : 1;	/* Is register used as part of or.andcm?  */
6292   unsigned int is_sibcall : 1;	/* Is this a sibling or normal call?  */
6293 };
6294 
6295 static void rws_update (int, struct reg_flags, int);
6296 static int rws_access_regno (int, struct reg_flags, int);
6297 static int rws_access_reg (rtx, struct reg_flags, int);
6298 static void update_set_flags (rtx, struct reg_flags *);
6299 static int set_src_needs_barrier (rtx, struct reg_flags, int);
6300 static int rtx_needs_barrier (rtx, struct reg_flags, int);
6301 static void init_insn_group_barriers (void);
6302 static int group_barrier_needed (rtx_insn *);
6303 static int safe_group_barrier_needed (rtx_insn *);
6304 static int in_safe_group_barrier;
6305 
6306 /* Update *RWS for REGNO, which is being written by the current instruction,
6307    with predicate PRED, and associated register flags in FLAGS.  */
6308 
6309 static void
rws_update(int regno,struct reg_flags flags,int pred)6310 rws_update (int regno, struct reg_flags flags, int pred)
6311 {
6312   if (pred)
6313     rws_sum[regno].write_count++;
6314   else
6315     rws_sum[regno].write_count = 2;
6316   rws_sum[regno].written_by_fp |= flags.is_fp;
6317   /* ??? Not tracking and/or across differing predicates.  */
6318   rws_sum[regno].written_by_and = flags.is_and;
6319   rws_sum[regno].written_by_or = flags.is_or;
6320   rws_sum[regno].first_pred = pred;
6321 }
6322 
6323 /* Handle an access to register REGNO of type FLAGS using predicate register
6324    PRED.  Update rws_sum array.  Return 1 if this access creates
6325    a dependency with an earlier instruction in the same group.  */
6326 
6327 static int
rws_access_regno(int regno,struct reg_flags flags,int pred)6328 rws_access_regno (int regno, struct reg_flags flags, int pred)
6329 {
6330   int need_barrier = 0;
6331 
6332   gcc_assert (regno < NUM_REGS);
6333 
6334   if (! PR_REGNO_P (regno))
6335     flags.is_and = flags.is_or = 0;
6336 
6337   if (flags.is_write)
6338     {
6339       int write_count;
6340 
6341       rws_insn_set (regno);
6342       write_count = rws_sum[regno].write_count;
6343 
6344       switch (write_count)
6345 	{
6346 	case 0:
6347 	  /* The register has not been written yet.  */
6348 	  if (!in_safe_group_barrier)
6349 	    rws_update (regno, flags, pred);
6350 	  break;
6351 
6352 	case 1:
6353 	  /* The register has been written via a predicate.  Treat
6354 	     it like a unconditional write and do not try to check
6355 	     for complementary pred reg in earlier write.  */
6356 	  if (flags.is_and && rws_sum[regno].written_by_and)
6357 	    ;
6358 	  else if (flags.is_or && rws_sum[regno].written_by_or)
6359 	    ;
6360 	  else
6361 	    need_barrier = 1;
6362 	  if (!in_safe_group_barrier)
6363 	    rws_update (regno, flags, pred);
6364 	  break;
6365 
6366 	case 2:
6367 	  /* The register has been unconditionally written already.  We
6368 	     need a barrier.  */
6369 	  if (flags.is_and && rws_sum[regno].written_by_and)
6370 	    ;
6371 	  else if (flags.is_or && rws_sum[regno].written_by_or)
6372 	    ;
6373 	  else
6374 	    need_barrier = 1;
6375 	  if (!in_safe_group_barrier)
6376 	    {
6377 	      rws_sum[regno].written_by_and = flags.is_and;
6378 	      rws_sum[regno].written_by_or = flags.is_or;
6379 	    }
6380 	  break;
6381 
6382 	default:
6383 	  gcc_unreachable ();
6384 	}
6385     }
6386   else
6387     {
6388       if (flags.is_branch)
6389 	{
6390 	  /* Branches have several RAW exceptions that allow to avoid
6391 	     barriers.  */
6392 
6393 	  if (REGNO_REG_CLASS (regno) == BR_REGS || regno == AR_PFS_REGNUM)
6394 	    /* RAW dependencies on branch regs are permissible as long
6395 	       as the writer is a non-branch instruction.  Since we
6396 	       never generate code that uses a branch register written
6397 	       by a branch instruction, handling this case is
6398 	       easy.  */
6399 	    return 0;
6400 
6401 	  if (REGNO_REG_CLASS (regno) == PR_REGS
6402 	      && ! rws_sum[regno].written_by_fp)
6403 	    /* The predicates of a branch are available within the
6404 	       same insn group as long as the predicate was written by
6405 	       something other than a floating-point instruction.  */
6406 	    return 0;
6407 	}
6408 
6409       if (flags.is_and && rws_sum[regno].written_by_and)
6410 	return 0;
6411       if (flags.is_or && rws_sum[regno].written_by_or)
6412 	return 0;
6413 
6414       switch (rws_sum[regno].write_count)
6415 	{
6416 	case 0:
6417 	  /* The register has not been written yet.  */
6418 	  break;
6419 
6420 	case 1:
6421 	  /* The register has been written via a predicate, assume we
6422 	     need a barrier (don't check for complementary regs).  */
6423 	  need_barrier = 1;
6424 	  break;
6425 
6426 	case 2:
6427 	  /* The register has been unconditionally written already.  We
6428 	     need a barrier.  */
6429 	  need_barrier = 1;
6430 	  break;
6431 
6432 	default:
6433 	  gcc_unreachable ();
6434 	}
6435     }
6436 
6437   return need_barrier;
6438 }
6439 
6440 static int
rws_access_reg(rtx reg,struct reg_flags flags,int pred)6441 rws_access_reg (rtx reg, struct reg_flags flags, int pred)
6442 {
6443   int regno = REGNO (reg);
6444   int n = REG_NREGS (reg);
6445 
6446   if (n == 1)
6447     return rws_access_regno (regno, flags, pred);
6448   else
6449     {
6450       int need_barrier = 0;
6451       while (--n >= 0)
6452 	need_barrier |= rws_access_regno (regno + n, flags, pred);
6453       return need_barrier;
6454     }
6455 }
6456 
6457 /* Examine X, which is a SET rtx, and update the flags, the predicate, and
6458    the condition, stored in *PFLAGS, *PPRED and *PCOND.  */
6459 
6460 static void
update_set_flags(rtx x,struct reg_flags * pflags)6461 update_set_flags (rtx x, struct reg_flags *pflags)
6462 {
6463   rtx src = SET_SRC (x);
6464 
6465   switch (GET_CODE (src))
6466     {
6467     case CALL:
6468       return;
6469 
6470     case IF_THEN_ELSE:
6471       /* There are four cases here:
6472 	 (1) The destination is (pc), in which case this is a branch,
6473 	 nothing here applies.
6474 	 (2) The destination is ar.lc, in which case this is a
6475 	 doloop_end_internal,
6476 	 (3) The destination is an fp register, in which case this is
6477 	 an fselect instruction.
6478 	 (4) The condition has (unspec [(reg)] UNSPEC_LDC), in which case
6479 	 this is a check load.
6480 	 In all cases, nothing we do in this function applies.  */
6481       return;
6482 
6483     default:
6484       if (COMPARISON_P (src)
6485 	  && SCALAR_FLOAT_MODE_P (GET_MODE (XEXP (src, 0))))
6486 	/* Set pflags->is_fp to 1 so that we know we're dealing
6487 	   with a floating point comparison when processing the
6488 	   destination of the SET.  */
6489 	pflags->is_fp = 1;
6490 
6491       /* Discover if this is a parallel comparison.  We only handle
6492 	 and.orcm and or.andcm at present, since we must retain a
6493 	 strict inverse on the predicate pair.  */
6494       else if (GET_CODE (src) == AND)
6495 	pflags->is_and = 1;
6496       else if (GET_CODE (src) == IOR)
6497 	pflags->is_or = 1;
6498 
6499       break;
6500     }
6501 }
6502 
6503 /* Subroutine of rtx_needs_barrier; this function determines whether the
6504    source of a given SET rtx found in X needs a barrier.  FLAGS and PRED
6505    are as in rtx_needs_barrier.  COND is an rtx that holds the condition
6506    for this insn.  */
6507 
6508 static int
set_src_needs_barrier(rtx x,struct reg_flags flags,int pred)6509 set_src_needs_barrier (rtx x, struct reg_flags flags, int pred)
6510 {
6511   int need_barrier = 0;
6512   rtx dst;
6513   rtx src = SET_SRC (x);
6514 
6515   if (GET_CODE (src) == CALL)
6516     /* We don't need to worry about the result registers that
6517        get written by subroutine call.  */
6518     return rtx_needs_barrier (src, flags, pred);
6519   else if (SET_DEST (x) == pc_rtx)
6520     {
6521       /* X is a conditional branch.  */
6522       /* ??? This seems redundant, as the caller sets this bit for
6523 	 all JUMP_INSNs.  */
6524       if (!ia64_spec_check_src_p (src))
6525 	flags.is_branch = 1;
6526       return rtx_needs_barrier (src, flags, pred);
6527     }
6528 
6529   if (ia64_spec_check_src_p (src))
6530     /* Avoid checking one register twice (in condition
6531        and in 'then' section) for ldc pattern.  */
6532     {
6533       gcc_assert (REG_P (XEXP (src, 2)));
6534       need_barrier = rtx_needs_barrier (XEXP (src, 2), flags, pred);
6535 
6536       /* We process MEM below.  */
6537       src = XEXP (src, 1);
6538     }
6539 
6540   need_barrier |= rtx_needs_barrier (src, flags, pred);
6541 
6542   dst = SET_DEST (x);
6543   if (GET_CODE (dst) == ZERO_EXTRACT)
6544     {
6545       need_barrier |= rtx_needs_barrier (XEXP (dst, 1), flags, pred);
6546       need_barrier |= rtx_needs_barrier (XEXP (dst, 2), flags, pred);
6547     }
6548   return need_barrier;
6549 }
6550 
6551 /* Handle an access to rtx X of type FLAGS using predicate register
6552    PRED.  Return 1 if this access creates a dependency with an earlier
6553    instruction in the same group.  */
6554 
6555 static int
rtx_needs_barrier(rtx x,struct reg_flags flags,int pred)6556 rtx_needs_barrier (rtx x, struct reg_flags flags, int pred)
6557 {
6558   int i, j;
6559   int is_complemented = 0;
6560   int need_barrier = 0;
6561   const char *format_ptr;
6562   struct reg_flags new_flags;
6563   rtx cond;
6564 
6565   if (! x)
6566     return 0;
6567 
6568   new_flags = flags;
6569 
6570   switch (GET_CODE (x))
6571     {
6572     case SET:
6573       update_set_flags (x, &new_flags);
6574       need_barrier = set_src_needs_barrier (x, new_flags, pred);
6575       if (GET_CODE (SET_SRC (x)) != CALL)
6576 	{
6577 	  new_flags.is_write = 1;
6578 	  need_barrier |= rtx_needs_barrier (SET_DEST (x), new_flags, pred);
6579 	}
6580       break;
6581 
6582     case CALL:
6583       new_flags.is_write = 0;
6584       need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
6585 
6586       /* Avoid multiple register writes, in case this is a pattern with
6587 	 multiple CALL rtx.  This avoids a failure in rws_access_reg.  */
6588       if (! flags.is_sibcall && ! rws_insn_test (REG_AR_CFM))
6589 	{
6590 	  new_flags.is_write = 1;
6591 	  need_barrier |= rws_access_regno (REG_RP, new_flags, pred);
6592 	  need_barrier |= rws_access_regno (AR_PFS_REGNUM, new_flags, pred);
6593 	  need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
6594 	}
6595       break;
6596 
6597     case COND_EXEC:
6598       /* X is a predicated instruction.  */
6599 
6600       cond = COND_EXEC_TEST (x);
6601       gcc_assert (!pred);
6602       need_barrier = rtx_needs_barrier (cond, flags, 0);
6603 
6604       if (GET_CODE (cond) == EQ)
6605 	is_complemented = 1;
6606       cond = XEXP (cond, 0);
6607       gcc_assert (GET_CODE (cond) == REG
6608 		  && REGNO_REG_CLASS (REGNO (cond)) == PR_REGS);
6609       pred = REGNO (cond);
6610       if (is_complemented)
6611 	++pred;
6612 
6613       need_barrier |= rtx_needs_barrier (COND_EXEC_CODE (x), flags, pred);
6614       return need_barrier;
6615 
6616     case CLOBBER:
6617     case USE:
6618       /* Clobber & use are for earlier compiler-phases only.  */
6619       break;
6620 
6621     case ASM_OPERANDS:
6622     case ASM_INPUT:
6623       /* We always emit stop bits for traditional asms.  We emit stop bits
6624 	 for volatile extended asms if TARGET_VOL_ASM_STOP is true.  */
6625       if (GET_CODE (x) != ASM_OPERANDS
6626 	  || (MEM_VOLATILE_P (x) && TARGET_VOL_ASM_STOP))
6627 	{
6628 	  /* Avoid writing the register multiple times if we have multiple
6629 	     asm outputs.  This avoids a failure in rws_access_reg.  */
6630 	  if (! rws_insn_test (REG_VOLATILE))
6631 	    {
6632 	      new_flags.is_write = 1;
6633 	      rws_access_regno (REG_VOLATILE, new_flags, pred);
6634 	    }
6635 	  return 1;
6636 	}
6637 
6638       /* For all ASM_OPERANDS, we must traverse the vector of input operands.
6639 	 We cannot just fall through here since then we would be confused
6640 	 by the ASM_INPUT rtx inside ASM_OPERANDS, which do not indicate
6641 	 traditional asms unlike their normal usage.  */
6642 
6643       for (i = ASM_OPERANDS_INPUT_LENGTH (x) - 1; i >= 0; --i)
6644 	if (rtx_needs_barrier (ASM_OPERANDS_INPUT (x, i), flags, pred))
6645 	  need_barrier = 1;
6646       break;
6647 
6648     case PARALLEL:
6649       for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
6650 	{
6651 	  rtx pat = XVECEXP (x, 0, i);
6652 	  switch (GET_CODE (pat))
6653 	    {
6654 	    case SET:
6655 	      update_set_flags (pat, &new_flags);
6656 	      need_barrier |= set_src_needs_barrier (pat, new_flags, pred);
6657 	      break;
6658 
6659 	    case USE:
6660 	    case CALL:
6661 	    case ASM_OPERANDS:
6662 	    case ASM_INPUT:
6663 	      need_barrier |= rtx_needs_barrier (pat, flags, pred);
6664 	      break;
6665 
6666 	    case CLOBBER:
6667 	      if (REG_P (XEXP (pat, 0))
6668 		  && extract_asm_operands (x) != NULL_RTX
6669 		  && REGNO (XEXP (pat, 0)) != AR_UNAT_REGNUM)
6670 		{
6671 		  new_flags.is_write = 1;
6672 		  need_barrier |= rtx_needs_barrier (XEXP (pat, 0),
6673 						     new_flags, pred);
6674 		  new_flags = flags;
6675 		}
6676 	      break;
6677 
6678 	    case RETURN:
6679 	      break;
6680 
6681 	    default:
6682 	      gcc_unreachable ();
6683 	    }
6684 	}
6685       for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
6686 	{
6687 	  rtx pat = XVECEXP (x, 0, i);
6688 	  if (GET_CODE (pat) == SET)
6689 	    {
6690 	      if (GET_CODE (SET_SRC (pat)) != CALL)
6691 		{
6692 		  new_flags.is_write = 1;
6693 		  need_barrier |= rtx_needs_barrier (SET_DEST (pat), new_flags,
6694 						     pred);
6695 		}
6696 	    }
6697 	  else if (GET_CODE (pat) == CLOBBER || GET_CODE (pat) == RETURN)
6698 	    need_barrier |= rtx_needs_barrier (pat, flags, pred);
6699 	}
6700       break;
6701 
6702     case SUBREG:
6703       need_barrier |= rtx_needs_barrier (SUBREG_REG (x), flags, pred);
6704       break;
6705     case REG:
6706       if (REGNO (x) == AR_UNAT_REGNUM)
6707 	{
6708 	  for (i = 0; i < 64; ++i)
6709 	    need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + i, flags, pred);
6710 	}
6711       else
6712 	need_barrier = rws_access_reg (x, flags, pred);
6713       break;
6714 
6715     case MEM:
6716       /* Find the regs used in memory address computation.  */
6717       new_flags.is_write = 0;
6718       need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
6719       break;
6720 
6721     case CONST_INT:   case CONST_DOUBLE:  case CONST_VECTOR:
6722     case SYMBOL_REF:  case LABEL_REF:     case CONST:
6723       break;
6724 
6725       /* Operators with side-effects.  */
6726     case POST_INC:    case POST_DEC:
6727       gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
6728 
6729       new_flags.is_write = 0;
6730       need_barrier  = rws_access_reg (XEXP (x, 0), new_flags, pred);
6731       new_flags.is_write = 1;
6732       need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
6733       break;
6734 
6735     case POST_MODIFY:
6736       gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
6737 
6738       new_flags.is_write = 0;
6739       need_barrier  = rws_access_reg (XEXP (x, 0), new_flags, pred);
6740       need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
6741       new_flags.is_write = 1;
6742       need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
6743       break;
6744 
6745       /* Handle common unary and binary ops for efficiency.  */
6746     case COMPARE:  case PLUS:    case MINUS:   case MULT:      case DIV:
6747     case MOD:      case UDIV:    case UMOD:    case AND:       case IOR:
6748     case XOR:      case ASHIFT:  case ROTATE:  case ASHIFTRT:  case LSHIFTRT:
6749     case ROTATERT: case SMIN:    case SMAX:    case UMIN:      case UMAX:
6750     case NE:       case EQ:      case GE:      case GT:        case LE:
6751     case LT:       case GEU:     case GTU:     case LEU:       case LTU:
6752       need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
6753       need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
6754       break;
6755 
6756     case NEG:      case NOT:	        case SIGN_EXTEND:     case ZERO_EXTEND:
6757     case TRUNCATE: case FLOAT_EXTEND:   case FLOAT_TRUNCATE:  case FLOAT:
6758     case FIX:      case UNSIGNED_FLOAT: case UNSIGNED_FIX:    case ABS:
6759     case SQRT:     case FFS:		case POPCOUNT:
6760       need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred);
6761       break;
6762 
6763     case VEC_SELECT:
6764       /* VEC_SELECT's second argument is a PARALLEL with integers that
6765 	 describe the elements selected.  On ia64, those integers are
6766 	 always constants.  Avoid walking the PARALLEL so that we don't
6767 	 get confused with "normal" parallels and then die.  */
6768       need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred);
6769       break;
6770 
6771     case UNSPEC:
6772       switch (XINT (x, 1))
6773 	{
6774 	case UNSPEC_LTOFF_DTPMOD:
6775 	case UNSPEC_LTOFF_DTPREL:
6776 	case UNSPEC_DTPREL:
6777 	case UNSPEC_LTOFF_TPREL:
6778 	case UNSPEC_TPREL:
6779 	case UNSPEC_PRED_REL_MUTEX:
6780 	case UNSPEC_PIC_CALL:
6781         case UNSPEC_MF:
6782         case UNSPEC_FETCHADD_ACQ:
6783         case UNSPEC_FETCHADD_REL:
6784 	case UNSPEC_BSP_VALUE:
6785 	case UNSPEC_FLUSHRS:
6786 	case UNSPEC_BUNDLE_SELECTOR:
6787           break;
6788 
6789 	case UNSPEC_GR_SPILL:
6790 	case UNSPEC_GR_RESTORE:
6791 	  {
6792 	    HOST_WIDE_INT offset = INTVAL (XVECEXP (x, 0, 1));
6793 	    HOST_WIDE_INT bit = (offset >> 3) & 63;
6794 
6795 	    need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
6796 	    new_flags.is_write = (XINT (x, 1) == UNSPEC_GR_SPILL);
6797 	    need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + bit,
6798 					      new_flags, pred);
6799 	    break;
6800 	  }
6801 
6802 	case UNSPEC_FR_SPILL:
6803 	case UNSPEC_FR_RESTORE:
6804 	case UNSPEC_GETF_EXP:
6805 	case UNSPEC_SETF_EXP:
6806         case UNSPEC_ADDP4:
6807 	case UNSPEC_FR_SQRT_RECIP_APPROX:
6808 	case UNSPEC_FR_SQRT_RECIP_APPROX_RES:
6809 	case UNSPEC_LDA:
6810 	case UNSPEC_LDS:
6811 	case UNSPEC_LDS_A:
6812 	case UNSPEC_LDSA:
6813 	case UNSPEC_CHKACLR:
6814         case UNSPEC_CHKS:
6815 	  need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
6816 	  break;
6817 
6818 	case UNSPEC_FR_RECIP_APPROX:
6819 	case UNSPEC_SHRP:
6820 	case UNSPEC_COPYSIGN:
6821 	case UNSPEC_FR_RECIP_APPROX_RES:
6822 	  need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
6823 	  need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
6824 	  break;
6825 
6826         case UNSPEC_CMPXCHG_ACQ:
6827         case UNSPEC_CMPXCHG_REL:
6828 	  need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
6829 	  need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 2), flags, pred);
6830 	  break;
6831 
6832 	default:
6833 	  gcc_unreachable ();
6834 	}
6835       break;
6836 
6837     case UNSPEC_VOLATILE:
6838       switch (XINT (x, 1))
6839 	{
6840 	case UNSPECV_ALLOC:
6841 	  /* Alloc must always be the first instruction of a group.
6842 	     We force this by always returning true.  */
6843 	  /* ??? We might get better scheduling if we explicitly check for
6844 	     input/local/output register dependencies, and modify the
6845 	     scheduler so that alloc is always reordered to the start of
6846 	     the current group.  We could then eliminate all of the
6847 	     first_instruction code.  */
6848 	  rws_access_regno (AR_PFS_REGNUM, flags, pred);
6849 
6850 	  new_flags.is_write = 1;
6851 	  rws_access_regno (REG_AR_CFM, new_flags, pred);
6852 	  return 1;
6853 
6854 	case UNSPECV_SET_BSP:
6855 	case UNSPECV_PROBE_STACK_RANGE:
6856 	  need_barrier = 1;
6857           break;
6858 
6859 	case UNSPECV_BLOCKAGE:
6860 	case UNSPECV_INSN_GROUP_BARRIER:
6861 	case UNSPECV_BREAK:
6862 	case UNSPECV_PSAC_ALL:
6863 	case UNSPECV_PSAC_NORMAL:
6864 	  return 0;
6865 
6866 	case UNSPECV_PROBE_STACK_ADDRESS:
6867 	  need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
6868 	  break;
6869 
6870 	default:
6871 	  gcc_unreachable ();
6872 	}
6873       break;
6874 
6875     case RETURN:
6876       new_flags.is_write = 0;
6877       need_barrier  = rws_access_regno (REG_RP, flags, pred);
6878       need_barrier |= rws_access_regno (AR_PFS_REGNUM, flags, pred);
6879 
6880       new_flags.is_write = 1;
6881       need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
6882       need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
6883       break;
6884 
6885     default:
6886       format_ptr = GET_RTX_FORMAT (GET_CODE (x));
6887       for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
6888 	switch (format_ptr[i])
6889 	  {
6890 	  case '0':	/* unused field */
6891 	  case 'i':	/* integer */
6892 	  case 'n':	/* note */
6893 	  case 'w':	/* wide integer */
6894 	  case 's':	/* pointer to string */
6895 	  case 'S':	/* optional pointer to string */
6896 	    break;
6897 
6898 	  case 'e':
6899 	    if (rtx_needs_barrier (XEXP (x, i), flags, pred))
6900 	      need_barrier = 1;
6901 	    break;
6902 
6903 	  case 'E':
6904 	    for (j = XVECLEN (x, i) - 1; j >= 0; --j)
6905 	      if (rtx_needs_barrier (XVECEXP (x, i, j), flags, pred))
6906 		need_barrier = 1;
6907 	    break;
6908 
6909 	  default:
6910 	    gcc_unreachable ();
6911 	  }
6912       break;
6913     }
6914   return need_barrier;
6915 }
6916 
6917 /* Clear out the state for group_barrier_needed at the start of a
6918    sequence of insns.  */
6919 
6920 static void
init_insn_group_barriers(void)6921 init_insn_group_barriers (void)
6922 {
6923   memset (rws_sum, 0, sizeof (rws_sum));
6924   first_instruction = 1;
6925 }
6926 
6927 /* Given the current state, determine whether a group barrier (a stop bit) is
6928    necessary before INSN.  Return nonzero if so.  This modifies the state to
6929    include the effects of INSN as a side-effect.  */
6930 
6931 static int
group_barrier_needed(rtx_insn * insn)6932 group_barrier_needed (rtx_insn *insn)
6933 {
6934   rtx pat;
6935   int need_barrier = 0;
6936   struct reg_flags flags;
6937 
6938   memset (&flags, 0, sizeof (flags));
6939   switch (GET_CODE (insn))
6940     {
6941     case NOTE:
6942     case DEBUG_INSN:
6943       break;
6944 
6945     case BARRIER:
6946       /* A barrier doesn't imply an instruction group boundary.  */
6947       break;
6948 
6949     case CODE_LABEL:
6950       memset (rws_insn, 0, sizeof (rws_insn));
6951       return 1;
6952 
6953     case CALL_INSN:
6954       flags.is_branch = 1;
6955       flags.is_sibcall = SIBLING_CALL_P (insn);
6956       memset (rws_insn, 0, sizeof (rws_insn));
6957 
6958       /* Don't bundle a call following another call.  */
6959       if ((pat = prev_active_insn (insn)) && CALL_P (pat))
6960 	{
6961 	  need_barrier = 1;
6962 	  break;
6963 	}
6964 
6965       need_barrier = rtx_needs_barrier (PATTERN (insn), flags, 0);
6966       break;
6967 
6968     case JUMP_INSN:
6969       if (!ia64_spec_check_p (insn))
6970 	flags.is_branch = 1;
6971 
6972       /* Don't bundle a jump following a call.  */
6973       if ((pat = prev_active_insn (insn)) && CALL_P (pat))
6974 	{
6975 	  need_barrier = 1;
6976 	  break;
6977 	}
6978       /* FALLTHRU */
6979 
6980     case INSN:
6981       if (GET_CODE (PATTERN (insn)) == USE
6982 	  || GET_CODE (PATTERN (insn)) == CLOBBER)
6983 	/* Don't care about USE and CLOBBER "insns"---those are used to
6984 	   indicate to the optimizer that it shouldn't get rid of
6985 	   certain operations.  */
6986 	break;
6987 
6988       pat = PATTERN (insn);
6989 
6990       /* Ug.  Hack hacks hacked elsewhere.  */
6991       switch (recog_memoized (insn))
6992 	{
6993 	  /* We play dependency tricks with the epilogue in order
6994 	     to get proper schedules.  Undo this for dv analysis.  */
6995 	case CODE_FOR_epilogue_deallocate_stack:
6996 	case CODE_FOR_prologue_allocate_stack:
6997 	  pat = XVECEXP (pat, 0, 0);
6998 	  break;
6999 
7000 	  /* The pattern we use for br.cloop confuses the code above.
7001 	     The second element of the vector is representative.  */
7002 	case CODE_FOR_doloop_end_internal:
7003 	  pat = XVECEXP (pat, 0, 1);
7004 	  break;
7005 
7006 	  /* Doesn't generate code.  */
7007 	case CODE_FOR_pred_rel_mutex:
7008 	case CODE_FOR_prologue_use:
7009 	  return 0;
7010 
7011 	default:
7012 	  break;
7013 	}
7014 
7015       memset (rws_insn, 0, sizeof (rws_insn));
7016       need_barrier = rtx_needs_barrier (pat, flags, 0);
7017 
7018       /* Check to see if the previous instruction was a volatile
7019 	 asm.  */
7020       if (! need_barrier)
7021 	need_barrier = rws_access_regno (REG_VOLATILE, flags, 0);
7022 
7023       break;
7024 
7025     default:
7026       gcc_unreachable ();
7027     }
7028 
7029   if (first_instruction && important_for_bundling_p (insn))
7030     {
7031       need_barrier = 0;
7032       first_instruction = 0;
7033     }
7034 
7035   return need_barrier;
7036 }
7037 
7038 /* Like group_barrier_needed, but do not clobber the current state.  */
7039 
7040 static int
safe_group_barrier_needed(rtx_insn * insn)7041 safe_group_barrier_needed (rtx_insn *insn)
7042 {
7043   int saved_first_instruction;
7044   int t;
7045 
7046   saved_first_instruction = first_instruction;
7047   in_safe_group_barrier = 1;
7048 
7049   t = group_barrier_needed (insn);
7050 
7051   first_instruction = saved_first_instruction;
7052   in_safe_group_barrier = 0;
7053 
7054   return t;
7055 }
7056 
7057 /* Scan the current function and insert stop bits as necessary to
7058    eliminate dependencies.  This function assumes that a final
7059    instruction scheduling pass has been run which has already
7060    inserted most of the necessary stop bits.  This function only
7061    inserts new ones at basic block boundaries, since these are
7062    invisible to the scheduler.  */
7063 
7064 static void
emit_insn_group_barriers(FILE * dump)7065 emit_insn_group_barriers (FILE *dump)
7066 {
7067   rtx_insn *insn;
7068   rtx_insn *last_label = 0;
7069   int insns_since_last_label = 0;
7070 
7071   init_insn_group_barriers ();
7072 
7073   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
7074     {
7075       if (LABEL_P (insn))
7076 	{
7077 	  if (insns_since_last_label)
7078 	    last_label = insn;
7079 	  insns_since_last_label = 0;
7080 	}
7081       else if (NOTE_P (insn)
7082 	       && NOTE_KIND (insn) == NOTE_INSN_BASIC_BLOCK)
7083 	{
7084 	  if (insns_since_last_label)
7085 	    last_label = insn;
7086 	  insns_since_last_label = 0;
7087 	}
7088       else if (NONJUMP_INSN_P (insn)
7089 	       && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
7090 	       && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER)
7091 	{
7092 	  init_insn_group_barriers ();
7093 	  last_label = 0;
7094 	}
7095       else if (NONDEBUG_INSN_P (insn))
7096 	{
7097 	  insns_since_last_label = 1;
7098 
7099 	  if (group_barrier_needed (insn))
7100 	    {
7101 	      if (last_label)
7102 		{
7103 		  if (dump)
7104 		    fprintf (dump, "Emitting stop before label %d\n",
7105 			     INSN_UID (last_label));
7106 		  emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), last_label);
7107 		  insn = last_label;
7108 
7109 		  init_insn_group_barriers ();
7110 		  last_label = 0;
7111 		}
7112 	    }
7113 	}
7114     }
7115 }
7116 
7117 /* Like emit_insn_group_barriers, but run if no final scheduling pass was run.
7118    This function has to emit all necessary group barriers.  */
7119 
7120 static void
emit_all_insn_group_barriers(FILE * dump ATTRIBUTE_UNUSED)7121 emit_all_insn_group_barriers (FILE *dump ATTRIBUTE_UNUSED)
7122 {
7123   rtx_insn *insn;
7124 
7125   init_insn_group_barriers ();
7126 
7127   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
7128     {
7129       if (BARRIER_P (insn))
7130 	{
7131 	  rtx_insn *last = prev_active_insn (insn);
7132 
7133 	  if (! last)
7134 	    continue;
7135 	  if (JUMP_TABLE_DATA_P (last))
7136 	    last = prev_active_insn (last);
7137 	  if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
7138 	    emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last);
7139 
7140 	  init_insn_group_barriers ();
7141 	}
7142       else if (NONDEBUG_INSN_P (insn))
7143 	{
7144 	  if (recog_memoized (insn) == CODE_FOR_insn_group_barrier)
7145 	    init_insn_group_barriers ();
7146 	  else if (group_barrier_needed (insn))
7147 	    {
7148 	      emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
7149 	      init_insn_group_barriers ();
7150 	      group_barrier_needed (insn);
7151 	    }
7152 	}
7153     }
7154 }
7155 
7156 
7157 
7158 /* Instruction scheduling support.  */
7159 
7160 #define NR_BUNDLES 10
7161 
7162 /* A list of names of all available bundles.  */
7163 
7164 static const char *bundle_name [NR_BUNDLES] =
7165 {
7166   ".mii",
7167   ".mmi",
7168   ".mfi",
7169   ".mmf",
7170 #if NR_BUNDLES == 10
7171   ".bbb",
7172   ".mbb",
7173 #endif
7174   ".mib",
7175   ".mmb",
7176   ".mfb",
7177   ".mlx"
7178 };
7179 
7180 /* Nonzero if we should insert stop bits into the schedule.  */
7181 
7182 int ia64_final_schedule = 0;
7183 
7184 /* Codes of the corresponding queried units: */
7185 
7186 static int _0mii_, _0mmi_, _0mfi_, _0mmf_;
7187 static int _0bbb_, _0mbb_, _0mib_, _0mmb_, _0mfb_, _0mlx_;
7188 
7189 static int _1mii_, _1mmi_, _1mfi_, _1mmf_;
7190 static int _1bbb_, _1mbb_, _1mib_, _1mmb_, _1mfb_, _1mlx_;
7191 
7192 static int pos_1, pos_2, pos_3, pos_4, pos_5, pos_6;
7193 
7194 /* The following variable value is an insn group barrier.  */
7195 
7196 static rtx_insn *dfa_stop_insn;
7197 
7198 /* The following variable value is the last issued insn.  */
7199 
7200 static rtx_insn *last_scheduled_insn;
7201 
7202 /* The following variable value is pointer to a DFA state used as
7203    temporary variable.  */
7204 
7205 static state_t temp_dfa_state = NULL;
7206 
7207 /* The following variable value is DFA state after issuing the last
7208    insn.  */
7209 
7210 static state_t prev_cycle_state = NULL;
7211 
7212 /* The following array element values are TRUE if the corresponding
7213    insn requires to add stop bits before it.  */
7214 
7215 static char *stops_p = NULL;
7216 
7217 /* The following variable is used to set up the mentioned above array.  */
7218 
7219 static int stop_before_p = 0;
7220 
7221 /* The following variable value is length of the arrays `clocks' and
7222    `add_cycles'. */
7223 
7224 static int clocks_length;
7225 
7226 /* The following variable value is number of data speculations in progress.  */
7227 static int pending_data_specs = 0;
7228 
7229 /* Number of memory references on current and three future processor cycles.  */
7230 static char mem_ops_in_group[4];
7231 
7232 /* Number of current processor cycle (from scheduler's point of view).  */
7233 static int current_cycle;
7234 
7235 static rtx ia64_single_set (rtx_insn *);
7236 static void ia64_emit_insn_before (rtx, rtx_insn *);
7237 
7238 /* Map a bundle number to its pseudo-op.  */
7239 
7240 const char *
get_bundle_name(int b)7241 get_bundle_name (int b)
7242 {
7243   return bundle_name[b];
7244 }
7245 
7246 
7247 /* Return the maximum number of instructions a cpu can issue.  */
7248 
7249 static int
ia64_issue_rate(void)7250 ia64_issue_rate (void)
7251 {
7252   return 6;
7253 }
7254 
7255 /* Helper function - like single_set, but look inside COND_EXEC.  */
7256 
7257 static rtx
ia64_single_set(rtx_insn * insn)7258 ia64_single_set (rtx_insn *insn)
7259 {
7260   rtx x = PATTERN (insn), ret;
7261   if (GET_CODE (x) == COND_EXEC)
7262     x = COND_EXEC_CODE (x);
7263   if (GET_CODE (x) == SET)
7264     return x;
7265 
7266   /* Special case here prologue_allocate_stack and epilogue_deallocate_stack.
7267      Although they are not classical single set, the second set is there just
7268      to protect it from moving past FP-relative stack accesses.  */
7269   switch (recog_memoized (insn))
7270     {
7271     case CODE_FOR_prologue_allocate_stack:
7272     case CODE_FOR_prologue_allocate_stack_pr:
7273     case CODE_FOR_epilogue_deallocate_stack:
7274     case CODE_FOR_epilogue_deallocate_stack_pr:
7275       ret = XVECEXP (x, 0, 0);
7276       break;
7277 
7278     default:
7279       ret = single_set_2 (insn, x);
7280       break;
7281     }
7282 
7283   return ret;
7284 }
7285 
7286 /* Adjust the cost of a scheduling dependency.
7287    Return the new cost of a dependency of type DEP_TYPE or INSN on DEP_INSN.
7288    COST is the current cost, DW is dependency weakness.  */
7289 static int
ia64_adjust_cost(rtx_insn * insn,int dep_type1,rtx_insn * dep_insn,int cost,dw_t dw)7290 ia64_adjust_cost (rtx_insn *insn, int dep_type1, rtx_insn *dep_insn,
7291 		  int cost, dw_t dw)
7292 {
7293   enum reg_note dep_type = (enum reg_note) dep_type1;
7294   enum attr_itanium_class dep_class;
7295   enum attr_itanium_class insn_class;
7296 
7297   insn_class = ia64_safe_itanium_class (insn);
7298   dep_class = ia64_safe_itanium_class (dep_insn);
7299 
7300   /* Treat true memory dependencies separately.  Ignore apparent true
7301      dependence between store and call (call has a MEM inside a SYMBOL_REF).  */
7302   if (dep_type == REG_DEP_TRUE
7303       && (dep_class == ITANIUM_CLASS_ST || dep_class == ITANIUM_CLASS_STF)
7304       && (insn_class == ITANIUM_CLASS_BR || insn_class == ITANIUM_CLASS_SCALL))
7305     return 0;
7306 
7307   if (dw == MIN_DEP_WEAK)
7308     /* Store and load are likely to alias, use higher cost to avoid stall.  */
7309     return param_sched_mem_true_dep_cost;
7310   else if (dw > MIN_DEP_WEAK)
7311     {
7312       /* Store and load are less likely to alias.  */
7313       if (mflag_sched_fp_mem_deps_zero_cost && dep_class == ITANIUM_CLASS_STF)
7314 	/* Assume there will be no cache conflict for floating-point data.
7315 	   For integer data, L1 conflict penalty is huge (17 cycles), so we
7316 	   never assume it will not cause a conflict.  */
7317 	return 0;
7318       else
7319 	return cost;
7320     }
7321 
7322   if (dep_type != REG_DEP_OUTPUT)
7323     return cost;
7324 
7325   if (dep_class == ITANIUM_CLASS_ST || dep_class == ITANIUM_CLASS_STF
7326       || insn_class == ITANIUM_CLASS_ST || insn_class == ITANIUM_CLASS_STF)
7327     return 0;
7328 
7329   return cost;
7330 }
7331 
7332 /* Like emit_insn_before, but skip cycle_display notes.
7333    ??? When cycle display notes are implemented, update this.  */
7334 
7335 static void
ia64_emit_insn_before(rtx insn,rtx_insn * before)7336 ia64_emit_insn_before (rtx insn, rtx_insn *before)
7337 {
7338   emit_insn_before (insn, before);
7339 }
7340 
7341 /* The following function marks insns who produce addresses for load
7342    and store insns.  Such insns will be placed into M slots because it
7343    decrease latency time for Itanium1 (see function
7344    `ia64_produce_address_p' and the DFA descriptions).  */
7345 
7346 static void
ia64_dependencies_evaluation_hook(rtx_insn * head,rtx_insn * tail)7347 ia64_dependencies_evaluation_hook (rtx_insn *head, rtx_insn *tail)
7348 {
7349   rtx_insn *insn, *next, *next_tail;
7350 
7351   /* Before reload, which_alternative is not set, which means that
7352      ia64_safe_itanium_class will produce wrong results for (at least)
7353      move instructions.  */
7354   if (!reload_completed)
7355     return;
7356 
7357   next_tail = NEXT_INSN (tail);
7358   for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
7359     if (INSN_P (insn))
7360       insn->call = 0;
7361   for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
7362     if (INSN_P (insn)
7363 	&& ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IALU)
7364       {
7365 	sd_iterator_def sd_it;
7366 	dep_t dep;
7367 	bool has_mem_op_consumer_p = false;
7368 
7369 	FOR_EACH_DEP (insn, SD_LIST_FORW, sd_it, dep)
7370 	  {
7371 	    enum attr_itanium_class c;
7372 
7373 	    if (DEP_TYPE (dep) != REG_DEP_TRUE)
7374 	      continue;
7375 
7376 	    next = DEP_CON (dep);
7377 	    c = ia64_safe_itanium_class (next);
7378 	    if ((c == ITANIUM_CLASS_ST
7379 		 || c == ITANIUM_CLASS_STF)
7380 		&& ia64_st_address_bypass_p (insn, next))
7381 	      {
7382 		has_mem_op_consumer_p = true;
7383 		break;
7384 	      }
7385 	    else if ((c == ITANIUM_CLASS_LD
7386 		      || c == ITANIUM_CLASS_FLD
7387 		      || c == ITANIUM_CLASS_FLDP)
7388 		     && ia64_ld_address_bypass_p (insn, next))
7389 	      {
7390 		has_mem_op_consumer_p = true;
7391 		break;
7392 	      }
7393 	  }
7394 
7395 	insn->call = has_mem_op_consumer_p;
7396       }
7397 }
7398 
7399 /* We're beginning a new block.  Initialize data structures as necessary.  */
7400 
7401 static void
ia64_sched_init(FILE * dump ATTRIBUTE_UNUSED,int sched_verbose ATTRIBUTE_UNUSED,int max_ready ATTRIBUTE_UNUSED)7402 ia64_sched_init (FILE *dump ATTRIBUTE_UNUSED,
7403 		 int sched_verbose ATTRIBUTE_UNUSED,
7404 		 int max_ready ATTRIBUTE_UNUSED)
7405 {
7406   if (flag_checking && !sel_sched_p () && reload_completed)
7407     {
7408       for (rtx_insn *insn = NEXT_INSN (current_sched_info->prev_head);
7409 	   insn != current_sched_info->next_tail;
7410 	   insn = NEXT_INSN (insn))
7411 	gcc_assert (!SCHED_GROUP_P (insn));
7412     }
7413   last_scheduled_insn = NULL;
7414   init_insn_group_barriers ();
7415 
7416   current_cycle = 0;
7417   memset (mem_ops_in_group, 0, sizeof (mem_ops_in_group));
7418 }
7419 
7420 /* We're beginning a scheduling pass.  Check assertion.  */
7421 
7422 static void
ia64_sched_init_global(FILE * dump ATTRIBUTE_UNUSED,int sched_verbose ATTRIBUTE_UNUSED,int max_ready ATTRIBUTE_UNUSED)7423 ia64_sched_init_global (FILE *dump ATTRIBUTE_UNUSED,
7424                         int sched_verbose ATTRIBUTE_UNUSED,
7425                         int max_ready ATTRIBUTE_UNUSED)
7426 {
7427   gcc_assert (pending_data_specs == 0);
7428 }
7429 
7430 /* Scheduling pass is now finished.  Free/reset static variable.  */
7431 static void
ia64_sched_finish_global(FILE * dump ATTRIBUTE_UNUSED,int sched_verbose ATTRIBUTE_UNUSED)7432 ia64_sched_finish_global (FILE *dump ATTRIBUTE_UNUSED,
7433 			  int sched_verbose ATTRIBUTE_UNUSED)
7434 {
7435   gcc_assert (pending_data_specs == 0);
7436 }
7437 
7438 /* Return TRUE if INSN is a load (either normal or speculative, but not a
7439    speculation check), FALSE otherwise.  */
7440 static bool
is_load_p(rtx_insn * insn)7441 is_load_p (rtx_insn *insn)
7442 {
7443   enum attr_itanium_class insn_class = ia64_safe_itanium_class (insn);
7444 
7445   return
7446    ((insn_class == ITANIUM_CLASS_LD || insn_class == ITANIUM_CLASS_FLD)
7447     && get_attr_check_load (insn) == CHECK_LOAD_NO);
7448 }
7449 
7450 /* If INSN is a memory reference, memoize it in MEM_OPS_IN_GROUP global array
7451    (taking account for 3-cycle cache reference postponing for stores: Intel
7452    Itanium 2 Reference Manual for Software Development and Optimization,
7453    6.7.3.1).  */
7454 static void
record_memory_reference(rtx_insn * insn)7455 record_memory_reference (rtx_insn *insn)
7456 {
7457   enum attr_itanium_class insn_class = ia64_safe_itanium_class (insn);
7458 
7459   switch (insn_class) {
7460     case ITANIUM_CLASS_FLD:
7461     case ITANIUM_CLASS_LD:
7462       mem_ops_in_group[current_cycle % 4]++;
7463       break;
7464     case ITANIUM_CLASS_STF:
7465     case ITANIUM_CLASS_ST:
7466       mem_ops_in_group[(current_cycle + 3) % 4]++;
7467       break;
7468     default:;
7469   }
7470 }
7471 
7472 /* We are about to being issuing insns for this clock cycle.
7473    Override the default sort algorithm to better slot instructions.  */
7474 
7475 static int
ia64_dfa_sched_reorder(FILE * dump,int sched_verbose,rtx_insn ** ready,int * pn_ready,int clock_var,int reorder_type)7476 ia64_dfa_sched_reorder (FILE *dump, int sched_verbose, rtx_insn **ready,
7477 			int *pn_ready, int clock_var,
7478 			int reorder_type)
7479 {
7480   int n_asms;
7481   int n_ready = *pn_ready;
7482   rtx_insn **e_ready = ready + n_ready;
7483   rtx_insn **insnp;
7484 
7485   if (sched_verbose)
7486     fprintf (dump, "// ia64_dfa_sched_reorder (type %d):\n", reorder_type);
7487 
7488   if (reorder_type == 0)
7489     {
7490       /* First, move all USEs, CLOBBERs and other crud out of the way.  */
7491       n_asms = 0;
7492       for (insnp = ready; insnp < e_ready; insnp++)
7493 	if (insnp < e_ready)
7494 	  {
7495 	    rtx_insn *insn = *insnp;
7496 	    enum attr_type t = ia64_safe_type (insn);
7497 	    if (t == TYPE_UNKNOWN)
7498 	      {
7499 		if (GET_CODE (PATTERN (insn)) == ASM_INPUT
7500 		    || asm_noperands (PATTERN (insn)) >= 0)
7501 		  {
7502 		    rtx_insn *lowest = ready[n_asms];
7503 		    ready[n_asms] = insn;
7504 		    *insnp = lowest;
7505 		    n_asms++;
7506 		  }
7507 		else
7508 		  {
7509 		    rtx_insn *highest = ready[n_ready - 1];
7510 		    ready[n_ready - 1] = insn;
7511 		    *insnp = highest;
7512 		    return 1;
7513 		  }
7514 	      }
7515 	  }
7516 
7517       if (n_asms < n_ready)
7518 	{
7519 	  /* Some normal insns to process.  Skip the asms.  */
7520 	  ready += n_asms;
7521 	  n_ready -= n_asms;
7522 	}
7523       else if (n_ready > 0)
7524 	return 1;
7525     }
7526 
7527   if (ia64_final_schedule)
7528     {
7529       int deleted = 0;
7530       int nr_need_stop = 0;
7531 
7532       for (insnp = ready; insnp < e_ready; insnp++)
7533 	if (safe_group_barrier_needed (*insnp))
7534 	  nr_need_stop++;
7535 
7536       if (reorder_type == 1 && n_ready == nr_need_stop)
7537 	return 0;
7538       if (reorder_type == 0)
7539 	return 1;
7540       insnp = e_ready;
7541       /* Move down everything that needs a stop bit, preserving
7542 	 relative order.  */
7543       while (insnp-- > ready + deleted)
7544 	while (insnp >= ready + deleted)
7545 	  {
7546 	    rtx_insn *insn = *insnp;
7547 	    if (! safe_group_barrier_needed (insn))
7548 	      break;
7549 	    memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
7550 	    *ready = insn;
7551 	    deleted++;
7552 	  }
7553       n_ready -= deleted;
7554       ready += deleted;
7555     }
7556 
7557   current_cycle = clock_var;
7558   if (reload_completed && mem_ops_in_group[clock_var % 4] >= ia64_max_memory_insns)
7559     {
7560       int moved = 0;
7561 
7562       insnp = e_ready;
7563       /* Move down loads/stores, preserving relative order.  */
7564       while (insnp-- > ready + moved)
7565 	while (insnp >= ready + moved)
7566 	  {
7567 	    rtx_insn *insn = *insnp;
7568 	    if (! is_load_p (insn))
7569 	      break;
7570 	    memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
7571 	    *ready = insn;
7572 	    moved++;
7573 	  }
7574       n_ready -= moved;
7575       ready += moved;
7576     }
7577 
7578   return 1;
7579 }
7580 
7581 /* We are about to being issuing insns for this clock cycle.  Override
7582    the default sort algorithm to better slot instructions.  */
7583 
7584 static int
ia64_sched_reorder(FILE * dump,int sched_verbose,rtx_insn ** ready,int * pn_ready,int clock_var)7585 ia64_sched_reorder (FILE *dump, int sched_verbose, rtx_insn **ready,
7586 		    int *pn_ready, int clock_var)
7587 {
7588   return ia64_dfa_sched_reorder (dump, sched_verbose, ready,
7589 				 pn_ready, clock_var, 0);
7590 }
7591 
7592 /* Like ia64_sched_reorder, but called after issuing each insn.
7593    Override the default sort algorithm to better slot instructions.  */
7594 
7595 static int
ia64_sched_reorder2(FILE * dump ATTRIBUTE_UNUSED,int sched_verbose ATTRIBUTE_UNUSED,rtx_insn ** ready,int * pn_ready,int clock_var)7596 ia64_sched_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
7597 		     int sched_verbose ATTRIBUTE_UNUSED, rtx_insn **ready,
7598 		     int *pn_ready, int clock_var)
7599 {
7600   return ia64_dfa_sched_reorder (dump, sched_verbose, ready, pn_ready,
7601 				 clock_var, 1);
7602 }
7603 
7604 /* We are about to issue INSN.  Return the number of insns left on the
7605    ready queue that can be issued this cycle.  */
7606 
7607 static int
ia64_variable_issue(FILE * dump ATTRIBUTE_UNUSED,int sched_verbose ATTRIBUTE_UNUSED,rtx_insn * insn,int can_issue_more ATTRIBUTE_UNUSED)7608 ia64_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
7609 		     int sched_verbose ATTRIBUTE_UNUSED,
7610 		     rtx_insn *insn,
7611 		     int can_issue_more ATTRIBUTE_UNUSED)
7612 {
7613   if (sched_deps_info->generate_spec_deps && !sel_sched_p ())
7614     /* Modulo scheduling does not extend h_i_d when emitting
7615        new instructions.  Don't use h_i_d, if we don't have to.  */
7616     {
7617       if (DONE_SPEC (insn) & BEGIN_DATA)
7618 	pending_data_specs++;
7619       if (CHECK_SPEC (insn) & BEGIN_DATA)
7620 	pending_data_specs--;
7621     }
7622 
7623   if (DEBUG_INSN_P (insn))
7624     return 1;
7625 
7626   last_scheduled_insn = insn;
7627   memcpy (prev_cycle_state, curr_state, dfa_state_size);
7628   if (reload_completed)
7629     {
7630       int needed = group_barrier_needed (insn);
7631 
7632       gcc_assert (!needed);
7633       if (CALL_P (insn))
7634 	init_insn_group_barriers ();
7635       stops_p [INSN_UID (insn)] = stop_before_p;
7636       stop_before_p = 0;
7637 
7638       record_memory_reference (insn);
7639     }
7640   return 1;
7641 }
7642 
7643 /* We are choosing insn from the ready queue.  Return zero if INSN
7644    can be chosen.  */
7645 
7646 static int
ia64_first_cycle_multipass_dfa_lookahead_guard(rtx_insn * insn,int ready_index)7647 ia64_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *insn, int ready_index)
7648 {
7649   gcc_assert (insn && INSN_P (insn));
7650 
7651   /* Size of ALAT is 32.  As far as we perform conservative
7652      data speculation, we keep ALAT half-empty.  */
7653   if (pending_data_specs >= 16 && (TODO_SPEC (insn) & BEGIN_DATA))
7654     return ready_index == 0 ? -1 : 1;
7655 
7656   if (ready_index == 0)
7657     return 0;
7658 
7659   if ((!reload_completed
7660        || !safe_group_barrier_needed (insn))
7661       && (!mflag_sched_mem_insns_hard_limit
7662 	  || !is_load_p (insn)
7663 	  || mem_ops_in_group[current_cycle % 4] < ia64_max_memory_insns))
7664     return 0;
7665 
7666   return 1;
7667 }
7668 
7669 /* The following variable value is pseudo-insn used by the DFA insn
7670    scheduler to change the DFA state when the simulated clock is
7671    increased.  */
7672 
7673 static rtx_insn *dfa_pre_cycle_insn;
7674 
7675 /* Returns 1 when a meaningful insn was scheduled between the last group
7676    barrier and LAST.  */
7677 static int
scheduled_good_insn(rtx_insn * last)7678 scheduled_good_insn (rtx_insn *last)
7679 {
7680   if (last && recog_memoized (last) >= 0)
7681     return 1;
7682 
7683   for ( ;
7684        last != NULL && !NOTE_INSN_BASIC_BLOCK_P (last)
7685        && !stops_p[INSN_UID (last)];
7686        last = PREV_INSN (last))
7687     /* We could hit a NOTE_INSN_DELETED here which is actually outside
7688        the ebb we're scheduling.  */
7689     if (INSN_P (last) && recog_memoized (last) >= 0)
7690       return 1;
7691 
7692   return 0;
7693 }
7694 
7695 /* We are about to being issuing INSN.  Return nonzero if we cannot
7696    issue it on given cycle CLOCK and return zero if we should not sort
7697    the ready queue on the next clock start.  */
7698 
7699 static int
ia64_dfa_new_cycle(FILE * dump,int verbose,rtx_insn * insn,int last_clock,int clock,int * sort_p)7700 ia64_dfa_new_cycle (FILE *dump, int verbose, rtx_insn *insn, int last_clock,
7701 		    int clock, int *sort_p)
7702 {
7703   gcc_assert (insn && INSN_P (insn));
7704 
7705   if (DEBUG_INSN_P (insn))
7706     return 0;
7707 
7708   /* When a group barrier is needed for insn, last_scheduled_insn
7709      should be set.  */
7710   gcc_assert (!(reload_completed && safe_group_barrier_needed (insn))
7711               || last_scheduled_insn);
7712 
7713   if ((reload_completed
7714        && (safe_group_barrier_needed (insn)
7715 	   || (mflag_sched_stop_bits_after_every_cycle
7716 	       && last_clock != clock
7717 	       && last_scheduled_insn
7718 	       && scheduled_good_insn (last_scheduled_insn))))
7719       || (last_scheduled_insn
7720 	  && (CALL_P (last_scheduled_insn)
7721 	      || unknown_for_bundling_p (last_scheduled_insn))))
7722     {
7723       init_insn_group_barriers ();
7724 
7725       if (verbose && dump)
7726 	fprintf (dump, "//    Stop should be before %d%s\n", INSN_UID (insn),
7727 		 last_clock == clock ? " + cycle advance" : "");
7728 
7729       stop_before_p = 1;
7730       current_cycle = clock;
7731       mem_ops_in_group[current_cycle % 4] = 0;
7732 
7733       if (last_clock == clock)
7734 	{
7735 	  state_transition (curr_state, dfa_stop_insn);
7736 	  if (TARGET_EARLY_STOP_BITS)
7737 	    *sort_p = (last_scheduled_insn == NULL_RTX
7738 		       || ! CALL_P (last_scheduled_insn));
7739 	  else
7740 	    *sort_p = 0;
7741 	  return 1;
7742 	}
7743 
7744       if (last_scheduled_insn)
7745 	{
7746 	  if (unknown_for_bundling_p (last_scheduled_insn))
7747 	    state_reset (curr_state);
7748 	  else
7749 	    {
7750 	      memcpy (curr_state, prev_cycle_state, dfa_state_size);
7751 	      state_transition (curr_state, dfa_stop_insn);
7752 	      state_transition (curr_state, dfa_pre_cycle_insn);
7753 	      state_transition (curr_state, NULL);
7754 	    }
7755 	}
7756     }
7757   return 0;
7758 }
7759 
7760 /* Implement targetm.sched.h_i_d_extended hook.
7761    Extend internal data structures.  */
7762 static void
ia64_h_i_d_extended(void)7763 ia64_h_i_d_extended (void)
7764 {
7765   if (stops_p != NULL)
7766     {
7767       int new_clocks_length = get_max_uid () * 3 / 2;
7768       stops_p = (char *) xrecalloc (stops_p, new_clocks_length, clocks_length, 1);
7769       clocks_length = new_clocks_length;
7770     }
7771 }
7772 
7773 
7774 /* This structure describes the data used by the backend to guide scheduling.
7775    When the current scheduling point is switched, this data should be saved
7776    and restored later, if the scheduler returns to this point.  */
7777 struct _ia64_sched_context
7778 {
7779   state_t prev_cycle_state;
7780   rtx_insn *last_scheduled_insn;
7781   struct reg_write_state rws_sum[NUM_REGS];
7782   struct reg_write_state rws_insn[NUM_REGS];
7783   int first_instruction;
7784   int pending_data_specs;
7785   int current_cycle;
7786   char mem_ops_in_group[4];
7787 };
7788 typedef struct _ia64_sched_context *ia64_sched_context_t;
7789 
7790 /* Allocates a scheduling context.  */
7791 static void *
ia64_alloc_sched_context(void)7792 ia64_alloc_sched_context (void)
7793 {
7794   return xmalloc (sizeof (struct _ia64_sched_context));
7795 }
7796 
7797 /* Initializes the _SC context with clean data, if CLEAN_P, and from
7798    the global context otherwise.  */
7799 static void
ia64_init_sched_context(void * _sc,bool clean_p)7800 ia64_init_sched_context (void *_sc, bool clean_p)
7801 {
7802   ia64_sched_context_t sc = (ia64_sched_context_t) _sc;
7803 
7804   sc->prev_cycle_state = xmalloc (dfa_state_size);
7805   if (clean_p)
7806     {
7807       state_reset (sc->prev_cycle_state);
7808       sc->last_scheduled_insn = NULL;
7809       memset (sc->rws_sum, 0, sizeof (rws_sum));
7810       memset (sc->rws_insn, 0, sizeof (rws_insn));
7811       sc->first_instruction = 1;
7812       sc->pending_data_specs = 0;
7813       sc->current_cycle = 0;
7814       memset (sc->mem_ops_in_group, 0, sizeof (mem_ops_in_group));
7815     }
7816   else
7817     {
7818       memcpy (sc->prev_cycle_state, prev_cycle_state, dfa_state_size);
7819       sc->last_scheduled_insn = last_scheduled_insn;
7820       memcpy (sc->rws_sum, rws_sum, sizeof (rws_sum));
7821       memcpy (sc->rws_insn, rws_insn, sizeof (rws_insn));
7822       sc->first_instruction = first_instruction;
7823       sc->pending_data_specs = pending_data_specs;
7824       sc->current_cycle = current_cycle;
7825       memcpy (sc->mem_ops_in_group, mem_ops_in_group, sizeof (mem_ops_in_group));
7826     }
7827 }
7828 
7829 /* Sets the global scheduling context to the one pointed to by _SC.  */
7830 static void
ia64_set_sched_context(void * _sc)7831 ia64_set_sched_context (void *_sc)
7832 {
7833   ia64_sched_context_t sc = (ia64_sched_context_t) _sc;
7834 
7835   gcc_assert (sc != NULL);
7836 
7837   memcpy (prev_cycle_state, sc->prev_cycle_state, dfa_state_size);
7838   last_scheduled_insn = sc->last_scheduled_insn;
7839   memcpy (rws_sum, sc->rws_sum, sizeof (rws_sum));
7840   memcpy (rws_insn, sc->rws_insn, sizeof (rws_insn));
7841   first_instruction = sc->first_instruction;
7842   pending_data_specs = sc->pending_data_specs;
7843   current_cycle = sc->current_cycle;
7844   memcpy (mem_ops_in_group, sc->mem_ops_in_group, sizeof (mem_ops_in_group));
7845 }
7846 
7847 /* Clears the data in the _SC scheduling context.  */
7848 static void
ia64_clear_sched_context(void * _sc)7849 ia64_clear_sched_context (void *_sc)
7850 {
7851   ia64_sched_context_t sc = (ia64_sched_context_t) _sc;
7852 
7853   free (sc->prev_cycle_state);
7854   sc->prev_cycle_state = NULL;
7855 }
7856 
7857 /* Frees the _SC scheduling context.  */
7858 static void
ia64_free_sched_context(void * _sc)7859 ia64_free_sched_context (void *_sc)
7860 {
7861   gcc_assert (_sc != NULL);
7862 
7863   free (_sc);
7864 }
7865 
7866 typedef rtx (* gen_func_t) (rtx, rtx);
7867 
7868 /* Return a function that will generate a load of mode MODE_NO
7869    with speculation types TS.  */
7870 static gen_func_t
get_spec_load_gen_function(ds_t ts,int mode_no)7871 get_spec_load_gen_function (ds_t ts, int mode_no)
7872 {
7873   static gen_func_t gen_ld_[] = {
7874     gen_movbi,
7875     gen_movqi_internal,
7876     gen_movhi_internal,
7877     gen_movsi_internal,
7878     gen_movdi_internal,
7879     gen_movsf_internal,
7880     gen_movdf_internal,
7881     gen_movxf_internal,
7882     gen_movti_internal,
7883     gen_zero_extendqidi2,
7884     gen_zero_extendhidi2,
7885     gen_zero_extendsidi2,
7886   };
7887 
7888   static gen_func_t gen_ld_a[] = {
7889     gen_movbi_advanced,
7890     gen_movqi_advanced,
7891     gen_movhi_advanced,
7892     gen_movsi_advanced,
7893     gen_movdi_advanced,
7894     gen_movsf_advanced,
7895     gen_movdf_advanced,
7896     gen_movxf_advanced,
7897     gen_movti_advanced,
7898     gen_zero_extendqidi2_advanced,
7899     gen_zero_extendhidi2_advanced,
7900     gen_zero_extendsidi2_advanced,
7901   };
7902   static gen_func_t gen_ld_s[] = {
7903     gen_movbi_speculative,
7904     gen_movqi_speculative,
7905     gen_movhi_speculative,
7906     gen_movsi_speculative,
7907     gen_movdi_speculative,
7908     gen_movsf_speculative,
7909     gen_movdf_speculative,
7910     gen_movxf_speculative,
7911     gen_movti_speculative,
7912     gen_zero_extendqidi2_speculative,
7913     gen_zero_extendhidi2_speculative,
7914     gen_zero_extendsidi2_speculative,
7915   };
7916   static gen_func_t gen_ld_sa[] = {
7917     gen_movbi_speculative_advanced,
7918     gen_movqi_speculative_advanced,
7919     gen_movhi_speculative_advanced,
7920     gen_movsi_speculative_advanced,
7921     gen_movdi_speculative_advanced,
7922     gen_movsf_speculative_advanced,
7923     gen_movdf_speculative_advanced,
7924     gen_movxf_speculative_advanced,
7925     gen_movti_speculative_advanced,
7926     gen_zero_extendqidi2_speculative_advanced,
7927     gen_zero_extendhidi2_speculative_advanced,
7928     gen_zero_extendsidi2_speculative_advanced,
7929   };
7930   static gen_func_t gen_ld_s_a[] = {
7931     gen_movbi_speculative_a,
7932     gen_movqi_speculative_a,
7933     gen_movhi_speculative_a,
7934     gen_movsi_speculative_a,
7935     gen_movdi_speculative_a,
7936     gen_movsf_speculative_a,
7937     gen_movdf_speculative_a,
7938     gen_movxf_speculative_a,
7939     gen_movti_speculative_a,
7940     gen_zero_extendqidi2_speculative_a,
7941     gen_zero_extendhidi2_speculative_a,
7942     gen_zero_extendsidi2_speculative_a,
7943   };
7944 
7945   gen_func_t *gen_ld;
7946 
7947   if (ts & BEGIN_DATA)
7948     {
7949       if (ts & BEGIN_CONTROL)
7950 	gen_ld = gen_ld_sa;
7951       else
7952 	gen_ld = gen_ld_a;
7953     }
7954   else if (ts & BEGIN_CONTROL)
7955     {
7956       if ((spec_info->flags & SEL_SCHED_SPEC_DONT_CHECK_CONTROL)
7957 	  || ia64_needs_block_p (ts))
7958 	gen_ld = gen_ld_s;
7959       else
7960 	gen_ld = gen_ld_s_a;
7961     }
7962   else if (ts == 0)
7963     gen_ld = gen_ld_;
7964   else
7965     gcc_unreachable ();
7966 
7967   return gen_ld[mode_no];
7968 }
7969 
7970 /* Constants that help mapping 'machine_mode' to int.  */
7971 enum SPEC_MODES
7972   {
7973     SPEC_MODE_INVALID = -1,
7974     SPEC_MODE_FIRST = 0,
7975     SPEC_MODE_FOR_EXTEND_FIRST = 1,
7976     SPEC_MODE_FOR_EXTEND_LAST = 3,
7977     SPEC_MODE_LAST = 8
7978   };
7979 
7980 enum
7981   {
7982     /* Offset to reach ZERO_EXTEND patterns.  */
7983     SPEC_GEN_EXTEND_OFFSET = SPEC_MODE_LAST - SPEC_MODE_FOR_EXTEND_FIRST + 1
7984   };
7985 
7986 /* Return index of the MODE.  */
7987 static int
ia64_mode_to_int(machine_mode mode)7988 ia64_mode_to_int (machine_mode mode)
7989 {
7990   switch (mode)
7991     {
7992     case E_BImode: return 0; /* SPEC_MODE_FIRST  */
7993     case E_QImode: return 1; /* SPEC_MODE_FOR_EXTEND_FIRST  */
7994     case E_HImode: return 2;
7995     case E_SImode: return 3; /* SPEC_MODE_FOR_EXTEND_LAST  */
7996     case E_DImode: return 4;
7997     case E_SFmode: return 5;
7998     case E_DFmode: return 6;
7999     case E_XFmode: return 7;
8000     case E_TImode:
8001       /* ??? This mode needs testing.  Bypasses for ldfp8 instruction are not
8002 	 mentioned in itanium[12].md.  Predicate fp_register_operand also
8003 	 needs to be defined.  Bottom line: better disable for now.  */
8004       return SPEC_MODE_INVALID;
8005     default:     return SPEC_MODE_INVALID;
8006     }
8007 }
8008 
8009 /* Provide information about speculation capabilities.  */
8010 static void
ia64_set_sched_flags(spec_info_t spec_info)8011 ia64_set_sched_flags (spec_info_t spec_info)
8012 {
8013   unsigned int *flags = &(current_sched_info->flags);
8014 
8015   if (*flags & SCHED_RGN
8016       || *flags & SCHED_EBB
8017       || *flags & SEL_SCHED)
8018     {
8019       int mask = 0;
8020 
8021       if ((mflag_sched_br_data_spec && !reload_completed && optimize > 0)
8022           || (mflag_sched_ar_data_spec && reload_completed))
8023 	{
8024 	  mask |= BEGIN_DATA;
8025 
8026 	  if (!sel_sched_p ()
8027 	      && ((mflag_sched_br_in_data_spec && !reload_completed)
8028 		  || (mflag_sched_ar_in_data_spec && reload_completed)))
8029 	    mask |= BE_IN_DATA;
8030 	}
8031 
8032       if (mflag_sched_control_spec
8033           && (!sel_sched_p ()
8034 	      || reload_completed))
8035 	{
8036 	  mask |= BEGIN_CONTROL;
8037 
8038 	  if (!sel_sched_p () && mflag_sched_in_control_spec)
8039 	    mask |= BE_IN_CONTROL;
8040 	}
8041 
8042       spec_info->mask = mask;
8043 
8044       if (mask)
8045 	{
8046 	  *flags |= USE_DEPS_LIST | DO_SPECULATION;
8047 
8048 	  if (mask & BE_IN_SPEC)
8049 	    *flags |= NEW_BBS;
8050 
8051 	  spec_info->flags = 0;
8052 
8053 	  if ((mask & CONTROL_SPEC)
8054 	      && sel_sched_p () && mflag_sel_sched_dont_check_control_spec)
8055 	    spec_info->flags |= SEL_SCHED_SPEC_DONT_CHECK_CONTROL;
8056 
8057 	  if (sched_verbose >= 1)
8058 	    spec_info->dump = sched_dump;
8059 	  else
8060 	    spec_info->dump = 0;
8061 
8062 	  if (mflag_sched_count_spec_in_critical_path)
8063 	    spec_info->flags |= COUNT_SPEC_IN_CRITICAL_PATH;
8064 	}
8065     }
8066   else
8067     spec_info->mask = 0;
8068 }
8069 
8070 /* If INSN is an appropriate load return its mode.
8071    Return -1 otherwise.  */
8072 static int
get_mode_no_for_insn(rtx_insn * insn)8073 get_mode_no_for_insn (rtx_insn *insn)
8074 {
8075   rtx reg, mem, mode_rtx;
8076   int mode_no;
8077   bool extend_p;
8078 
8079   extract_insn_cached (insn);
8080 
8081   /* We use WHICH_ALTERNATIVE only after reload.  This will
8082      guarantee that reload won't touch a speculative insn.  */
8083 
8084   if (recog_data.n_operands != 2)
8085     return -1;
8086 
8087   reg = recog_data.operand[0];
8088   mem = recog_data.operand[1];
8089 
8090   /* We should use MEM's mode since REG's mode in presence of
8091      ZERO_EXTEND will always be DImode.  */
8092   if (get_attr_speculable1 (insn) == SPECULABLE1_YES)
8093     /* Process non-speculative ld.  */
8094     {
8095       if (!reload_completed)
8096 	{
8097 	  /* Do not speculate into regs like ar.lc.  */
8098 	  if (!REG_P (reg) || AR_REGNO_P (REGNO (reg)))
8099 	    return -1;
8100 
8101 	  if (!MEM_P (mem))
8102 	    return -1;
8103 
8104 	  {
8105 	    rtx mem_reg = XEXP (mem, 0);
8106 
8107 	    if (!REG_P (mem_reg))
8108 	      return -1;
8109 	  }
8110 
8111 	  mode_rtx = mem;
8112 	}
8113       else if (get_attr_speculable2 (insn) == SPECULABLE2_YES)
8114 	{
8115 	  gcc_assert (REG_P (reg) && MEM_P (mem));
8116 	  mode_rtx = mem;
8117 	}
8118       else
8119 	return -1;
8120     }
8121   else if (get_attr_data_speculative (insn) == DATA_SPECULATIVE_YES
8122 	   || get_attr_control_speculative (insn) == CONTROL_SPECULATIVE_YES
8123 	   || get_attr_check_load (insn) == CHECK_LOAD_YES)
8124     /* Process speculative ld or ld.c.  */
8125     {
8126       gcc_assert (REG_P (reg) && MEM_P (mem));
8127       mode_rtx = mem;
8128     }
8129   else
8130     {
8131       enum attr_itanium_class attr_class = get_attr_itanium_class (insn);
8132 
8133       if (attr_class == ITANIUM_CLASS_CHK_A
8134 	  || attr_class == ITANIUM_CLASS_CHK_S_I
8135 	  || attr_class == ITANIUM_CLASS_CHK_S_F)
8136 	/* Process chk.  */
8137 	mode_rtx = reg;
8138       else
8139 	return -1;
8140     }
8141 
8142   mode_no = ia64_mode_to_int (GET_MODE (mode_rtx));
8143 
8144   if (mode_no == SPEC_MODE_INVALID)
8145     return -1;
8146 
8147   extend_p = (GET_MODE (reg) != GET_MODE (mode_rtx));
8148 
8149   if (extend_p)
8150     {
8151       if (!(SPEC_MODE_FOR_EXTEND_FIRST <= mode_no
8152 	    && mode_no <= SPEC_MODE_FOR_EXTEND_LAST))
8153 	return -1;
8154 
8155       mode_no += SPEC_GEN_EXTEND_OFFSET;
8156     }
8157 
8158   return mode_no;
8159 }
8160 
8161 /* If X is an unspec part of a speculative load, return its code.
8162    Return -1 otherwise.  */
8163 static int
get_spec_unspec_code(const_rtx x)8164 get_spec_unspec_code (const_rtx x)
8165 {
8166   if (GET_CODE (x) != UNSPEC)
8167     return -1;
8168 
8169   {
8170     int code;
8171 
8172     code = XINT (x, 1);
8173 
8174     switch (code)
8175       {
8176       case UNSPEC_LDA:
8177       case UNSPEC_LDS:
8178       case UNSPEC_LDS_A:
8179       case UNSPEC_LDSA:
8180 	return code;
8181 
8182       default:
8183 	return -1;
8184       }
8185   }
8186 }
8187 
8188 /* Implement skip_rtx_p hook.  */
8189 static bool
ia64_skip_rtx_p(const_rtx x)8190 ia64_skip_rtx_p (const_rtx x)
8191 {
8192   return get_spec_unspec_code (x) != -1;
8193 }
8194 
8195 /* If INSN is a speculative load, return its UNSPEC code.
8196    Return -1 otherwise.  */
8197 static int
get_insn_spec_code(const_rtx insn)8198 get_insn_spec_code (const_rtx insn)
8199 {
8200   rtx pat, reg, mem;
8201 
8202   pat = PATTERN (insn);
8203 
8204   if (GET_CODE (pat) == COND_EXEC)
8205     pat = COND_EXEC_CODE (pat);
8206 
8207   if (GET_CODE (pat) != SET)
8208     return -1;
8209 
8210   reg = SET_DEST (pat);
8211   if (!REG_P (reg))
8212     return -1;
8213 
8214   mem = SET_SRC (pat);
8215   if (GET_CODE (mem) == ZERO_EXTEND)
8216     mem = XEXP (mem, 0);
8217 
8218   return get_spec_unspec_code (mem);
8219 }
8220 
8221 /* If INSN is a speculative load, return a ds with the speculation types.
8222    Otherwise [if INSN is a normal instruction] return 0.  */
8223 static ds_t
ia64_get_insn_spec_ds(rtx_insn * insn)8224 ia64_get_insn_spec_ds (rtx_insn *insn)
8225 {
8226   int code = get_insn_spec_code (insn);
8227 
8228   switch (code)
8229     {
8230     case UNSPEC_LDA:
8231       return BEGIN_DATA;
8232 
8233     case UNSPEC_LDS:
8234     case UNSPEC_LDS_A:
8235       return BEGIN_CONTROL;
8236 
8237     case UNSPEC_LDSA:
8238       return BEGIN_DATA | BEGIN_CONTROL;
8239 
8240     default:
8241       return 0;
8242     }
8243 }
8244 
8245 /* If INSN is a speculative load return a ds with the speculation types that
8246    will be checked.
8247    Otherwise [if INSN is a normal instruction] return 0.  */
8248 static ds_t
ia64_get_insn_checked_ds(rtx_insn * insn)8249 ia64_get_insn_checked_ds (rtx_insn *insn)
8250 {
8251   int code = get_insn_spec_code (insn);
8252 
8253   switch (code)
8254     {
8255     case UNSPEC_LDA:
8256       return BEGIN_DATA | BEGIN_CONTROL;
8257 
8258     case UNSPEC_LDS:
8259       return BEGIN_CONTROL;
8260 
8261     case UNSPEC_LDS_A:
8262     case UNSPEC_LDSA:
8263       return BEGIN_DATA | BEGIN_CONTROL;
8264 
8265     default:
8266       return 0;
8267     }
8268 }
8269 
8270 /* If GEN_P is true, calculate the index of needed speculation check and return
8271    speculative pattern for INSN with speculative mode TS, machine mode
8272    MODE_NO and with ZERO_EXTEND (if EXTEND_P is true).
8273    If GEN_P is false, just calculate the index of needed speculation check.  */
8274 static rtx
ia64_gen_spec_load(rtx insn,ds_t ts,int mode_no)8275 ia64_gen_spec_load (rtx insn, ds_t ts, int mode_no)
8276 {
8277   rtx pat, new_pat;
8278   gen_func_t gen_load;
8279 
8280   gen_load = get_spec_load_gen_function (ts, mode_no);
8281 
8282   new_pat = gen_load (copy_rtx (recog_data.operand[0]),
8283 		      copy_rtx (recog_data.operand[1]));
8284 
8285   pat = PATTERN (insn);
8286   if (GET_CODE (pat) == COND_EXEC)
8287     new_pat = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (COND_EXEC_TEST (pat)),
8288 				 new_pat);
8289 
8290   return new_pat;
8291 }
8292 
8293 static bool
insn_can_be_in_speculative_p(rtx insn ATTRIBUTE_UNUSED,ds_t ds ATTRIBUTE_UNUSED)8294 insn_can_be_in_speculative_p (rtx insn ATTRIBUTE_UNUSED,
8295 			      ds_t ds ATTRIBUTE_UNUSED)
8296 {
8297   return false;
8298 }
8299 
8300 /* Implement targetm.sched.speculate_insn hook.
8301    Check if the INSN can be TS speculative.
8302    If 'no' - return -1.
8303    If 'yes' - generate speculative pattern in the NEW_PAT and return 1.
8304    If current pattern of the INSN already provides TS speculation,
8305    return 0.  */
8306 static int
ia64_speculate_insn(rtx_insn * insn,ds_t ts,rtx * new_pat)8307 ia64_speculate_insn (rtx_insn *insn, ds_t ts, rtx *new_pat)
8308 {
8309   int mode_no;
8310   int res;
8311 
8312   gcc_assert (!(ts & ~SPECULATIVE));
8313 
8314   if (ia64_spec_check_p (insn))
8315     return -1;
8316 
8317   if ((ts & BE_IN_SPEC)
8318       && !insn_can_be_in_speculative_p (insn, ts))
8319     return -1;
8320 
8321   mode_no = get_mode_no_for_insn (insn);
8322 
8323   if (mode_no != SPEC_MODE_INVALID)
8324     {
8325       if (ia64_get_insn_spec_ds (insn) == ds_get_speculation_types (ts))
8326 	res = 0;
8327       else
8328 	{
8329 	  res = 1;
8330 	  *new_pat = ia64_gen_spec_load (insn, ts, mode_no);
8331 	}
8332     }
8333   else
8334     res = -1;
8335 
8336   return res;
8337 }
8338 
8339 /* Return a function that will generate a check for speculation TS with mode
8340    MODE_NO.
8341    If simple check is needed, pass true for SIMPLE_CHECK_P.
8342    If clearing check is needed, pass true for CLEARING_CHECK_P.  */
8343 static gen_func_t
get_spec_check_gen_function(ds_t ts,int mode_no,bool simple_check_p,bool clearing_check_p)8344 get_spec_check_gen_function (ds_t ts, int mode_no,
8345 			     bool simple_check_p, bool clearing_check_p)
8346 {
8347   static gen_func_t gen_ld_c_clr[] = {
8348     gen_movbi_clr,
8349     gen_movqi_clr,
8350     gen_movhi_clr,
8351     gen_movsi_clr,
8352     gen_movdi_clr,
8353     gen_movsf_clr,
8354     gen_movdf_clr,
8355     gen_movxf_clr,
8356     gen_movti_clr,
8357     gen_zero_extendqidi2_clr,
8358     gen_zero_extendhidi2_clr,
8359     gen_zero_extendsidi2_clr,
8360   };
8361   static gen_func_t gen_ld_c_nc[] = {
8362     gen_movbi_nc,
8363     gen_movqi_nc,
8364     gen_movhi_nc,
8365     gen_movsi_nc,
8366     gen_movdi_nc,
8367     gen_movsf_nc,
8368     gen_movdf_nc,
8369     gen_movxf_nc,
8370     gen_movti_nc,
8371     gen_zero_extendqidi2_nc,
8372     gen_zero_extendhidi2_nc,
8373     gen_zero_extendsidi2_nc,
8374   };
8375   static gen_func_t gen_chk_a_clr[] = {
8376     gen_advanced_load_check_clr_bi,
8377     gen_advanced_load_check_clr_qi,
8378     gen_advanced_load_check_clr_hi,
8379     gen_advanced_load_check_clr_si,
8380     gen_advanced_load_check_clr_di,
8381     gen_advanced_load_check_clr_sf,
8382     gen_advanced_load_check_clr_df,
8383     gen_advanced_load_check_clr_xf,
8384     gen_advanced_load_check_clr_ti,
8385     gen_advanced_load_check_clr_di,
8386     gen_advanced_load_check_clr_di,
8387     gen_advanced_load_check_clr_di,
8388   };
8389   static gen_func_t gen_chk_a_nc[] = {
8390     gen_advanced_load_check_nc_bi,
8391     gen_advanced_load_check_nc_qi,
8392     gen_advanced_load_check_nc_hi,
8393     gen_advanced_load_check_nc_si,
8394     gen_advanced_load_check_nc_di,
8395     gen_advanced_load_check_nc_sf,
8396     gen_advanced_load_check_nc_df,
8397     gen_advanced_load_check_nc_xf,
8398     gen_advanced_load_check_nc_ti,
8399     gen_advanced_load_check_nc_di,
8400     gen_advanced_load_check_nc_di,
8401     gen_advanced_load_check_nc_di,
8402   };
8403   static gen_func_t gen_chk_s[] = {
8404     gen_speculation_check_bi,
8405     gen_speculation_check_qi,
8406     gen_speculation_check_hi,
8407     gen_speculation_check_si,
8408     gen_speculation_check_di,
8409     gen_speculation_check_sf,
8410     gen_speculation_check_df,
8411     gen_speculation_check_xf,
8412     gen_speculation_check_ti,
8413     gen_speculation_check_di,
8414     gen_speculation_check_di,
8415     gen_speculation_check_di,
8416   };
8417 
8418   gen_func_t *gen_check;
8419 
8420   if (ts & BEGIN_DATA)
8421     {
8422       /* We don't need recovery because even if this is ld.sa
8423 	 ALAT entry will be allocated only if NAT bit is set to zero.
8424 	 So it is enough to use ld.c here.  */
8425 
8426       if (simple_check_p)
8427 	{
8428 	  gcc_assert (mflag_sched_spec_ldc);
8429 
8430 	  if (clearing_check_p)
8431 	    gen_check = gen_ld_c_clr;
8432 	  else
8433 	    gen_check = gen_ld_c_nc;
8434 	}
8435       else
8436 	{
8437 	  if (clearing_check_p)
8438 	    gen_check = gen_chk_a_clr;
8439 	  else
8440 	    gen_check = gen_chk_a_nc;
8441 	}
8442     }
8443   else if (ts & BEGIN_CONTROL)
8444     {
8445       if (simple_check_p)
8446 	/* We might want to use ld.sa -> ld.c instead of
8447 	   ld.s -> chk.s.  */
8448 	{
8449 	  gcc_assert (!ia64_needs_block_p (ts));
8450 
8451 	  if (clearing_check_p)
8452 	    gen_check = gen_ld_c_clr;
8453 	  else
8454 	    gen_check = gen_ld_c_nc;
8455 	}
8456       else
8457 	{
8458 	  gen_check = gen_chk_s;
8459 	}
8460     }
8461   else
8462     gcc_unreachable ();
8463 
8464   gcc_assert (mode_no >= 0);
8465   return gen_check[mode_no];
8466 }
8467 
8468 /* Return nonzero, if INSN needs branchy recovery check.  */
8469 static bool
ia64_needs_block_p(ds_t ts)8470 ia64_needs_block_p (ds_t ts)
8471 {
8472   if (ts & BEGIN_DATA)
8473     return !mflag_sched_spec_ldc;
8474 
8475   gcc_assert ((ts & BEGIN_CONTROL) != 0);
8476 
8477   return !(mflag_sched_spec_control_ldc && mflag_sched_spec_ldc);
8478 }
8479 
8480 /* Generate (or regenerate) a recovery check for INSN.  */
8481 static rtx
ia64_gen_spec_check(rtx_insn * insn,rtx_insn * label,ds_t ds)8482 ia64_gen_spec_check (rtx_insn *insn, rtx_insn *label, ds_t ds)
8483 {
8484   rtx op1, pat, check_pat;
8485   gen_func_t gen_check;
8486   int mode_no;
8487 
8488   mode_no = get_mode_no_for_insn (insn);
8489   gcc_assert (mode_no >= 0);
8490 
8491   if (label)
8492     op1 = label;
8493   else
8494     {
8495       gcc_assert (!ia64_needs_block_p (ds));
8496       op1 = copy_rtx (recog_data.operand[1]);
8497     }
8498 
8499   gen_check = get_spec_check_gen_function (ds, mode_no, label == NULL_RTX,
8500 					   true);
8501 
8502   check_pat = gen_check (copy_rtx (recog_data.operand[0]), op1);
8503 
8504   pat = PATTERN (insn);
8505   if (GET_CODE (pat) == COND_EXEC)
8506     check_pat = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (COND_EXEC_TEST (pat)),
8507 				   check_pat);
8508 
8509   return check_pat;
8510 }
8511 
8512 /* Return nonzero, if X is branchy recovery check.  */
8513 static int
ia64_spec_check_p(rtx x)8514 ia64_spec_check_p (rtx x)
8515 {
8516   x = PATTERN (x);
8517   if (GET_CODE (x) == COND_EXEC)
8518     x = COND_EXEC_CODE (x);
8519   if (GET_CODE (x) == SET)
8520     return ia64_spec_check_src_p (SET_SRC (x));
8521   return 0;
8522 }
8523 
8524 /* Return nonzero, if SRC belongs to recovery check.  */
8525 static int
ia64_spec_check_src_p(rtx src)8526 ia64_spec_check_src_p (rtx src)
8527 {
8528   if (GET_CODE (src) == IF_THEN_ELSE)
8529     {
8530       rtx t;
8531 
8532       t = XEXP (src, 0);
8533       if (GET_CODE (t) == NE)
8534 	{
8535 	  t = XEXP (t, 0);
8536 
8537 	  if (GET_CODE (t) == UNSPEC)
8538 	    {
8539 	      int code;
8540 
8541 	      code = XINT (t, 1);
8542 
8543 	      if (code == UNSPEC_LDCCLR
8544 		  || code == UNSPEC_LDCNC
8545 		  || code == UNSPEC_CHKACLR
8546 		  || code == UNSPEC_CHKANC
8547 		  || code == UNSPEC_CHKS)
8548 		{
8549 		  gcc_assert (code != 0);
8550 		  return code;
8551 		}
8552 	    }
8553 	}
8554     }
8555   return 0;
8556 }
8557 
8558 
8559 /* The following page contains abstract data `bundle states' which are
8560    used for bundling insns (inserting nops and template generation).  */
8561 
8562 /* The following describes state of insn bundling.  */
8563 
8564 struct bundle_state
8565 {
8566   /* Unique bundle state number to identify them in the debugging
8567      output  */
8568   int unique_num;
8569   rtx_insn *insn; /* corresponding insn, NULL for the 1st and the last state  */
8570   /* number nops before and after the insn  */
8571   short before_nops_num, after_nops_num;
8572   int insn_num; /* insn number (0 - for initial state, 1 - for the 1st
8573                    insn */
8574   int cost;     /* cost of the state in cycles */
8575   int accumulated_insns_num; /* number of all previous insns including
8576 				nops.  L is considered as 2 insns */
8577   int branch_deviation; /* deviation of previous branches from 3rd slots  */
8578   int middle_bundle_stops; /* number of stop bits in the middle of bundles */
8579   struct bundle_state *next;  /* next state with the same insn_num  */
8580   struct bundle_state *originator; /* originator (previous insn state)  */
8581   /* All bundle states are in the following chain.  */
8582   struct bundle_state *allocated_states_chain;
8583   /* The DFA State after issuing the insn and the nops.  */
8584   state_t dfa_state;
8585 };
8586 
8587 /* The following is map insn number to the corresponding bundle state.  */
8588 
8589 static struct bundle_state **index_to_bundle_states;
8590 
8591 /* The unique number of next bundle state.  */
8592 
8593 static int bundle_states_num;
8594 
8595 /* All allocated bundle states are in the following chain.  */
8596 
8597 static struct bundle_state *allocated_bundle_states_chain;
8598 
8599 /* All allocated but not used bundle states are in the following
8600    chain.  */
8601 
8602 static struct bundle_state *free_bundle_state_chain;
8603 
8604 
8605 /* The following function returns a free bundle state.  */
8606 
8607 static struct bundle_state *
get_free_bundle_state(void)8608 get_free_bundle_state (void)
8609 {
8610   struct bundle_state *result;
8611 
8612   if (free_bundle_state_chain != NULL)
8613     {
8614       result = free_bundle_state_chain;
8615       free_bundle_state_chain = result->next;
8616     }
8617   else
8618     {
8619       result = XNEW (struct bundle_state);
8620       result->dfa_state = xmalloc (dfa_state_size);
8621       result->allocated_states_chain = allocated_bundle_states_chain;
8622       allocated_bundle_states_chain = result;
8623     }
8624   result->unique_num = bundle_states_num++;
8625   return result;
8626 
8627 }
8628 
8629 /* The following function frees given bundle state.  */
8630 
8631 static void
free_bundle_state(struct bundle_state * state)8632 free_bundle_state (struct bundle_state *state)
8633 {
8634   state->next = free_bundle_state_chain;
8635   free_bundle_state_chain = state;
8636 }
8637 
8638 /* Start work with abstract data `bundle states'.  */
8639 
8640 static void
initiate_bundle_states(void)8641 initiate_bundle_states (void)
8642 {
8643   bundle_states_num = 0;
8644   free_bundle_state_chain = NULL;
8645   allocated_bundle_states_chain = NULL;
8646 }
8647 
8648 /* Finish work with abstract data `bundle states'.  */
8649 
8650 static void
finish_bundle_states(void)8651 finish_bundle_states (void)
8652 {
8653   struct bundle_state *curr_state, *next_state;
8654 
8655   for (curr_state = allocated_bundle_states_chain;
8656        curr_state != NULL;
8657        curr_state = next_state)
8658     {
8659       next_state = curr_state->allocated_states_chain;
8660       free (curr_state->dfa_state);
8661       free (curr_state);
8662     }
8663 }
8664 
8665 /* Hashtable helpers.  */
8666 
8667 struct bundle_state_hasher : nofree_ptr_hash <bundle_state>
8668 {
8669   static inline hashval_t hash (const bundle_state *);
8670   static inline bool equal (const bundle_state *, const bundle_state *);
8671 };
8672 
8673 /* The function returns hash of BUNDLE_STATE.  */
8674 
8675 inline hashval_t
hash(const bundle_state * state)8676 bundle_state_hasher::hash (const bundle_state *state)
8677 {
8678   unsigned result, i;
8679 
8680   for (result = i = 0; i < dfa_state_size; i++)
8681     result += (((unsigned char *) state->dfa_state) [i]
8682 	       << ((i % CHAR_BIT) * 3 + CHAR_BIT));
8683   return result + state->insn_num;
8684 }
8685 
8686 /* The function returns nonzero if the bundle state keys are equal.  */
8687 
8688 inline bool
equal(const bundle_state * state1,const bundle_state * state2)8689 bundle_state_hasher::equal (const bundle_state *state1,
8690 			    const bundle_state *state2)
8691 {
8692   return (state1->insn_num == state2->insn_num
8693 	  && memcmp (state1->dfa_state, state2->dfa_state,
8694 		     dfa_state_size) == 0);
8695 }
8696 
8697 /* Hash table of the bundle states.  The key is dfa_state and insn_num
8698    of the bundle states.  */
8699 
8700 static hash_table<bundle_state_hasher> *bundle_state_table;
8701 
8702 /* The function inserts the BUNDLE_STATE into the hash table.  The
8703    function returns nonzero if the bundle has been inserted into the
8704    table.  The table contains the best bundle state with given key.  */
8705 
8706 static int
insert_bundle_state(struct bundle_state * bundle_state)8707 insert_bundle_state (struct bundle_state *bundle_state)
8708 {
8709   struct bundle_state **entry_ptr;
8710 
8711   entry_ptr = bundle_state_table->find_slot (bundle_state, INSERT);
8712   if (*entry_ptr == NULL)
8713     {
8714       bundle_state->next = index_to_bundle_states [bundle_state->insn_num];
8715       index_to_bundle_states [bundle_state->insn_num] = bundle_state;
8716       *entry_ptr = bundle_state;
8717       return TRUE;
8718     }
8719   else if (bundle_state->cost < (*entry_ptr)->cost
8720 	   || (bundle_state->cost == (*entry_ptr)->cost
8721 	       && ((*entry_ptr)->accumulated_insns_num
8722 		   > bundle_state->accumulated_insns_num
8723 		   || ((*entry_ptr)->accumulated_insns_num
8724 		       == bundle_state->accumulated_insns_num
8725 		       && ((*entry_ptr)->branch_deviation
8726 			   > bundle_state->branch_deviation
8727 			   || ((*entry_ptr)->branch_deviation
8728 			       == bundle_state->branch_deviation
8729 			       && (*entry_ptr)->middle_bundle_stops
8730 			       > bundle_state->middle_bundle_stops))))))
8731 
8732     {
8733       struct bundle_state temp;
8734 
8735       temp = **entry_ptr;
8736       **entry_ptr = *bundle_state;
8737       (*entry_ptr)->next = temp.next;
8738       *bundle_state = temp;
8739     }
8740   return FALSE;
8741 }
8742 
8743 /* Start work with the hash table.  */
8744 
8745 static void
initiate_bundle_state_table(void)8746 initiate_bundle_state_table (void)
8747 {
8748   bundle_state_table = new hash_table<bundle_state_hasher> (50);
8749 }
8750 
8751 /* Finish work with the hash table.  */
8752 
8753 static void
finish_bundle_state_table(void)8754 finish_bundle_state_table (void)
8755 {
8756   delete bundle_state_table;
8757   bundle_state_table = NULL;
8758 }
8759 
8760 
8761 
8762 /* The following variable is a insn `nop' used to check bundle states
8763    with different number of inserted nops.  */
8764 
8765 static rtx_insn *ia64_nop;
8766 
8767 /* The following function tries to issue NOPS_NUM nops for the current
8768    state without advancing processor cycle.  If it failed, the
8769    function returns FALSE and frees the current state.  */
8770 
8771 static int
try_issue_nops(struct bundle_state * curr_state,int nops_num)8772 try_issue_nops (struct bundle_state *curr_state, int nops_num)
8773 {
8774   int i;
8775 
8776   for (i = 0; i < nops_num; i++)
8777     if (state_transition (curr_state->dfa_state, ia64_nop) >= 0)
8778       {
8779 	free_bundle_state (curr_state);
8780 	return FALSE;
8781       }
8782   return TRUE;
8783 }
8784 
8785 /* The following function tries to issue INSN for the current
8786    state without advancing processor cycle.  If it failed, the
8787    function returns FALSE and frees the current state.  */
8788 
8789 static int
try_issue_insn(struct bundle_state * curr_state,rtx insn)8790 try_issue_insn (struct bundle_state *curr_state, rtx insn)
8791 {
8792   if (insn && state_transition (curr_state->dfa_state, insn) >= 0)
8793     {
8794       free_bundle_state (curr_state);
8795       return FALSE;
8796     }
8797   return TRUE;
8798 }
8799 
8800 /* The following function tries to issue BEFORE_NOPS_NUM nops and INSN
8801    starting with ORIGINATOR without advancing processor cycle.  If
8802    TRY_BUNDLE_END_P is TRUE, the function also/only (if
8803    ONLY_BUNDLE_END_P is TRUE) tries to issue nops to fill all bundle.
8804    If it was successful, the function creates new bundle state and
8805    insert into the hash table and into `index_to_bundle_states'.  */
8806 
8807 static void
issue_nops_and_insn(struct bundle_state * originator,int before_nops_num,rtx_insn * insn,int try_bundle_end_p,int only_bundle_end_p)8808 issue_nops_and_insn (struct bundle_state *originator, int before_nops_num,
8809 		     rtx_insn *insn, int try_bundle_end_p,
8810 		     int only_bundle_end_p)
8811 {
8812   struct bundle_state *curr_state;
8813 
8814   curr_state = get_free_bundle_state ();
8815   memcpy (curr_state->dfa_state, originator->dfa_state, dfa_state_size);
8816   curr_state->insn = insn;
8817   curr_state->insn_num = originator->insn_num + 1;
8818   curr_state->cost = originator->cost;
8819   curr_state->originator = originator;
8820   curr_state->before_nops_num = before_nops_num;
8821   curr_state->after_nops_num = 0;
8822   curr_state->accumulated_insns_num
8823     = originator->accumulated_insns_num + before_nops_num;
8824   curr_state->branch_deviation = originator->branch_deviation;
8825   curr_state->middle_bundle_stops = originator->middle_bundle_stops;
8826   gcc_assert (insn);
8827   if (INSN_CODE (insn) == CODE_FOR_insn_group_barrier)
8828     {
8829       gcc_assert (GET_MODE (insn) != TImode);
8830       if (!try_issue_nops (curr_state, before_nops_num))
8831 	return;
8832       if (!try_issue_insn (curr_state, insn))
8833 	return;
8834       memcpy (temp_dfa_state, curr_state->dfa_state, dfa_state_size);
8835       if (curr_state->accumulated_insns_num % 3 != 0)
8836 	curr_state->middle_bundle_stops++;
8837       if (state_transition (temp_dfa_state, dfa_pre_cycle_insn) >= 0
8838 	  && curr_state->accumulated_insns_num % 3 != 0)
8839 	{
8840 	  free_bundle_state (curr_state);
8841 	  return;
8842 	}
8843     }
8844   else if (GET_MODE (insn) != TImode)
8845     {
8846       if (!try_issue_nops (curr_state, before_nops_num))
8847 	return;
8848       if (!try_issue_insn (curr_state, insn))
8849 	return;
8850       curr_state->accumulated_insns_num++;
8851       gcc_assert (!unknown_for_bundling_p (insn));
8852 
8853       if (ia64_safe_type (insn) == TYPE_L)
8854 	curr_state->accumulated_insns_num++;
8855     }
8856   else
8857     {
8858       /* If this is an insn that must be first in a group, then don't allow
8859 	 nops to be emitted before it.  Currently, alloc is the only such
8860 	 supported instruction.  */
8861       /* ??? The bundling automatons should handle this for us, but they do
8862 	 not yet have support for the first_insn attribute.  */
8863       if (before_nops_num > 0 && get_attr_first_insn (insn) == FIRST_INSN_YES)
8864 	{
8865 	  free_bundle_state (curr_state);
8866 	  return;
8867 	}
8868 
8869       state_transition (curr_state->dfa_state, dfa_pre_cycle_insn);
8870       state_transition (curr_state->dfa_state, NULL);
8871       curr_state->cost++;
8872       if (!try_issue_nops (curr_state, before_nops_num))
8873 	return;
8874       if (!try_issue_insn (curr_state, insn))
8875 	return;
8876       curr_state->accumulated_insns_num++;
8877       if (unknown_for_bundling_p (insn))
8878 	{
8879 	  /* Finish bundle containing asm insn.  */
8880 	  curr_state->after_nops_num
8881 	    = 3 - curr_state->accumulated_insns_num % 3;
8882 	  curr_state->accumulated_insns_num
8883 	    += 3 - curr_state->accumulated_insns_num % 3;
8884 	}
8885       else if (ia64_safe_type (insn) == TYPE_L)
8886 	curr_state->accumulated_insns_num++;
8887     }
8888   if (ia64_safe_type (insn) == TYPE_B)
8889     curr_state->branch_deviation
8890       += 2 - (curr_state->accumulated_insns_num - 1) % 3;
8891   if (try_bundle_end_p && curr_state->accumulated_insns_num % 3 != 0)
8892     {
8893       if (!only_bundle_end_p && insert_bundle_state (curr_state))
8894 	{
8895 	  state_t dfa_state;
8896 	  struct bundle_state *curr_state1;
8897 	  struct bundle_state *allocated_states_chain;
8898 
8899 	  curr_state1 = get_free_bundle_state ();
8900 	  dfa_state = curr_state1->dfa_state;
8901 	  allocated_states_chain = curr_state1->allocated_states_chain;
8902 	  *curr_state1 = *curr_state;
8903 	  curr_state1->dfa_state = dfa_state;
8904 	  curr_state1->allocated_states_chain = allocated_states_chain;
8905 	  memcpy (curr_state1->dfa_state, curr_state->dfa_state,
8906 		  dfa_state_size);
8907 	  curr_state = curr_state1;
8908 	}
8909       if (!try_issue_nops (curr_state,
8910 			   3 - curr_state->accumulated_insns_num % 3))
8911 	return;
8912       curr_state->after_nops_num
8913 	= 3 - curr_state->accumulated_insns_num % 3;
8914       curr_state->accumulated_insns_num
8915 	+= 3 - curr_state->accumulated_insns_num % 3;
8916     }
8917   if (!insert_bundle_state (curr_state))
8918     free_bundle_state (curr_state);
8919   return;
8920 }
8921 
8922 /* The following function returns position in the two window bundle
8923    for given STATE.  */
8924 
8925 static int
get_max_pos(state_t state)8926 get_max_pos (state_t state)
8927 {
8928   if (cpu_unit_reservation_p (state, pos_6))
8929     return 6;
8930   else if (cpu_unit_reservation_p (state, pos_5))
8931     return 5;
8932   else if (cpu_unit_reservation_p (state, pos_4))
8933     return 4;
8934   else if (cpu_unit_reservation_p (state, pos_3))
8935     return 3;
8936   else if (cpu_unit_reservation_p (state, pos_2))
8937     return 2;
8938   else if (cpu_unit_reservation_p (state, pos_1))
8939     return 1;
8940   else
8941     return 0;
8942 }
8943 
8944 /* The function returns code of a possible template for given position
8945    and state.  The function should be called only with 2 values of
8946    position equal to 3 or 6.  We avoid generating F NOPs by putting
8947    templates containing F insns at the end of the template search
8948    because undocumented anomaly in McKinley derived cores which can
8949    cause stalls if an F-unit insn (including a NOP) is issued within a
8950    six-cycle window after reading certain application registers (such
8951    as ar.bsp).  Furthermore, power-considerations also argue against
8952    the use of F-unit instructions unless they're really needed.  */
8953 
8954 static int
get_template(state_t state,int pos)8955 get_template (state_t state, int pos)
8956 {
8957   switch (pos)
8958     {
8959     case 3:
8960       if (cpu_unit_reservation_p (state, _0mmi_))
8961 	return 1;
8962       else if (cpu_unit_reservation_p (state, _0mii_))
8963 	return 0;
8964       else if (cpu_unit_reservation_p (state, _0mmb_))
8965 	return 7;
8966       else if (cpu_unit_reservation_p (state, _0mib_))
8967 	return 6;
8968       else if (cpu_unit_reservation_p (state, _0mbb_))
8969 	return 5;
8970       else if (cpu_unit_reservation_p (state, _0bbb_))
8971 	return 4;
8972       else if (cpu_unit_reservation_p (state, _0mmf_))
8973 	return 3;
8974       else if (cpu_unit_reservation_p (state, _0mfi_))
8975 	return 2;
8976       else if (cpu_unit_reservation_p (state, _0mfb_))
8977 	return 8;
8978       else if (cpu_unit_reservation_p (state, _0mlx_))
8979 	return 9;
8980       else
8981 	gcc_unreachable ();
8982     case 6:
8983       if (cpu_unit_reservation_p (state, _1mmi_))
8984 	return 1;
8985       else if (cpu_unit_reservation_p (state, _1mii_))
8986 	return 0;
8987       else if (cpu_unit_reservation_p (state, _1mmb_))
8988 	return 7;
8989       else if (cpu_unit_reservation_p (state, _1mib_))
8990 	return 6;
8991       else if (cpu_unit_reservation_p (state, _1mbb_))
8992 	return 5;
8993       else if (cpu_unit_reservation_p (state, _1bbb_))
8994 	return 4;
8995       else if (_1mmf_ >= 0 && cpu_unit_reservation_p (state, _1mmf_))
8996 	return 3;
8997       else if (cpu_unit_reservation_p (state, _1mfi_))
8998 	return 2;
8999       else if (cpu_unit_reservation_p (state, _1mfb_))
9000 	return 8;
9001       else if (cpu_unit_reservation_p (state, _1mlx_))
9002 	return 9;
9003       else
9004 	gcc_unreachable ();
9005     default:
9006       gcc_unreachable ();
9007     }
9008 }
9009 
9010 /* True when INSN is important for bundling.  */
9011 
9012 static bool
important_for_bundling_p(rtx_insn * insn)9013 important_for_bundling_p (rtx_insn *insn)
9014 {
9015   return (INSN_P (insn)
9016 	  && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE
9017 	  && GET_CODE (PATTERN (insn)) != USE
9018 	  && GET_CODE (PATTERN (insn)) != CLOBBER);
9019 }
9020 
9021 /* The following function returns an insn important for insn bundling
9022    followed by INSN and before TAIL.  */
9023 
9024 static rtx_insn *
get_next_important_insn(rtx_insn * insn,rtx_insn * tail)9025 get_next_important_insn (rtx_insn *insn, rtx_insn *tail)
9026 {
9027   for (; insn && insn != tail; insn = NEXT_INSN (insn))
9028     if (important_for_bundling_p (insn))
9029       return insn;
9030   return NULL;
9031 }
9032 
9033 /* True when INSN is unknown, but important, for bundling.  */
9034 
9035 static bool
unknown_for_bundling_p(rtx_insn * insn)9036 unknown_for_bundling_p (rtx_insn *insn)
9037 {
9038   return (INSN_P (insn)
9039 	  && ia64_safe_itanium_class (insn) == ITANIUM_CLASS_UNKNOWN
9040 	  && GET_CODE (PATTERN (insn)) != USE
9041 	  && GET_CODE (PATTERN (insn)) != CLOBBER);
9042 }
9043 
9044 /* Add a bundle selector TEMPLATE0 before INSN.  */
9045 
9046 static void
ia64_add_bundle_selector_before(int template0,rtx_insn * insn)9047 ia64_add_bundle_selector_before (int template0, rtx_insn *insn)
9048 {
9049   rtx b = gen_bundle_selector (GEN_INT (template0));
9050 
9051   ia64_emit_insn_before (b, insn);
9052 #if NR_BUNDLES == 10
9053   if ((template0 == 4 || template0 == 5)
9054       && ia64_except_unwind_info (&global_options) == UI_TARGET)
9055     {
9056       int i;
9057       rtx note = NULL_RTX;
9058 
9059       /* In .mbb and .bbb bundles, check if CALL_INSN isn't in the
9060 	 first or second slot.  If it is and has REG_EH_NOTE set, copy it
9061 	 to following nops, as br.call sets rp to the address of following
9062 	 bundle and therefore an EH region end must be on a bundle
9063 	 boundary.  */
9064       insn = PREV_INSN (insn);
9065       for (i = 0; i < 3; i++)
9066 	{
9067 	  do
9068 	    insn = next_active_insn (insn);
9069 	  while (NONJUMP_INSN_P (insn)
9070 		 && get_attr_empty (insn) == EMPTY_YES);
9071 	  if (CALL_P (insn))
9072 	    note = find_reg_note (insn, REG_EH_REGION, NULL_RTX);
9073 	  else if (note)
9074 	    {
9075 	      int code;
9076 
9077 	      gcc_assert ((code = recog_memoized (insn)) == CODE_FOR_nop
9078 			  || code == CODE_FOR_nop_b);
9079 	      if (find_reg_note (insn, REG_EH_REGION, NULL_RTX))
9080 		note = NULL_RTX;
9081 	      else
9082 		add_reg_note (insn, REG_EH_REGION, XEXP (note, 0));
9083 	    }
9084 	}
9085     }
9086 #endif
9087 }
9088 
9089 /* The following function does insn bundling.  Bundling means
9090    inserting templates and nop insns to fit insn groups into permitted
9091    templates.  Instruction scheduling uses NDFA (non-deterministic
9092    finite automata) encoding informations about the templates and the
9093    inserted nops.  Nondeterminism of the automata permits follows
9094    all possible insn sequences very fast.
9095 
9096    Unfortunately it is not possible to get information about inserting
9097    nop insns and used templates from the automata states.  The
9098    automata only says that we can issue an insn possibly inserting
9099    some nops before it and using some template.  Therefore insn
9100    bundling in this function is implemented by using DFA
9101    (deterministic finite automata).  We follow all possible insn
9102    sequences by inserting 0-2 nops (that is what the NDFA describe for
9103    insn scheduling) before/after each insn being bundled.  We know the
9104    start of simulated processor cycle from insn scheduling (insn
9105    starting a new cycle has TImode).
9106 
9107    Simple implementation of insn bundling would create enormous
9108    number of possible insn sequences satisfying information about new
9109    cycle ticks taken from the insn scheduling.  To make the algorithm
9110    practical we use dynamic programming.  Each decision (about
9111    inserting nops and implicitly about previous decisions) is described
9112    by structure bundle_state (see above).  If we generate the same
9113    bundle state (key is automaton state after issuing the insns and
9114    nops for it), we reuse already generated one.  As consequence we
9115    reject some decisions which cannot improve the solution and
9116    reduce memory for the algorithm.
9117 
9118    When we reach the end of EBB (extended basic block), we choose the
9119    best sequence and then, moving back in EBB, insert templates for
9120    the best alternative.  The templates are taken from querying
9121    automaton state for each insn in chosen bundle states.
9122 
9123    So the algorithm makes two (forward and backward) passes through
9124    EBB.  */
9125 
9126 static void
bundling(FILE * dump,int verbose,rtx_insn * prev_head_insn,rtx_insn * tail)9127 bundling (FILE *dump, int verbose, rtx_insn *prev_head_insn, rtx_insn *tail)
9128 {
9129   struct bundle_state *curr_state, *next_state, *best_state;
9130   rtx_insn *insn, *next_insn;
9131   int insn_num;
9132   int i, bundle_end_p, only_bundle_end_p, asm_p;
9133   int pos = 0, max_pos, template0, template1;
9134   rtx_insn *b;
9135   enum attr_type type;
9136 
9137   insn_num = 0;
9138   /* Count insns in the EBB.  */
9139   for (insn = NEXT_INSN (prev_head_insn);
9140        insn && insn != tail;
9141        insn = NEXT_INSN (insn))
9142     if (INSN_P (insn))
9143       insn_num++;
9144   if (insn_num == 0)
9145     return;
9146   bundling_p = 1;
9147   dfa_clean_insn_cache ();
9148   initiate_bundle_state_table ();
9149   index_to_bundle_states = XNEWVEC (struct bundle_state *, insn_num + 2);
9150   /* First (forward) pass -- generation of bundle states.  */
9151   curr_state = get_free_bundle_state ();
9152   curr_state->insn = NULL;
9153   curr_state->before_nops_num = 0;
9154   curr_state->after_nops_num = 0;
9155   curr_state->insn_num = 0;
9156   curr_state->cost = 0;
9157   curr_state->accumulated_insns_num = 0;
9158   curr_state->branch_deviation = 0;
9159   curr_state->middle_bundle_stops = 0;
9160   curr_state->next = NULL;
9161   curr_state->originator = NULL;
9162   state_reset (curr_state->dfa_state);
9163   index_to_bundle_states [0] = curr_state;
9164   insn_num = 0;
9165   /* Shift cycle mark if it is put on insn which could be ignored.  */
9166   for (insn = NEXT_INSN (prev_head_insn);
9167        insn != tail;
9168        insn = NEXT_INSN (insn))
9169     if (INSN_P (insn)
9170 	&& !important_for_bundling_p (insn)
9171 	&& GET_MODE (insn) == TImode)
9172       {
9173 	PUT_MODE (insn, VOIDmode);
9174 	for (next_insn = NEXT_INSN (insn);
9175 	     next_insn != tail;
9176 	     next_insn = NEXT_INSN (next_insn))
9177 	  if (important_for_bundling_p (next_insn)
9178 	      && INSN_CODE (next_insn) != CODE_FOR_insn_group_barrier)
9179 	    {
9180 	      PUT_MODE (next_insn, TImode);
9181 	      break;
9182 	    }
9183       }
9184   /* Forward pass: generation of bundle states.  */
9185   for (insn = get_next_important_insn (NEXT_INSN (prev_head_insn), tail);
9186        insn != NULL_RTX;
9187        insn = next_insn)
9188     {
9189       gcc_assert (important_for_bundling_p (insn));
9190       type = ia64_safe_type (insn);
9191       next_insn = get_next_important_insn (NEXT_INSN (insn), tail);
9192       insn_num++;
9193       index_to_bundle_states [insn_num] = NULL;
9194       for (curr_state = index_to_bundle_states [insn_num - 1];
9195 	   curr_state != NULL;
9196 	   curr_state = next_state)
9197 	{
9198 	  pos = curr_state->accumulated_insns_num % 3;
9199 	  next_state = curr_state->next;
9200 	  /* We must fill up the current bundle in order to start a
9201 	     subsequent asm insn in a new bundle.  Asm insn is always
9202 	     placed in a separate bundle.  */
9203 	  only_bundle_end_p
9204 	    = (next_insn != NULL_RTX
9205 	       && INSN_CODE (insn) == CODE_FOR_insn_group_barrier
9206 	       && unknown_for_bundling_p (next_insn));
9207 	  /* We may fill up the current bundle if it is the cycle end
9208 	     without a group barrier.  */
9209 	  bundle_end_p
9210 	    = (only_bundle_end_p || next_insn == NULL_RTX
9211 	       || (GET_MODE (next_insn) == TImode
9212 		   && INSN_CODE (insn) != CODE_FOR_insn_group_barrier));
9213 	  if (type == TYPE_F || type == TYPE_B || type == TYPE_L
9214 	      || type == TYPE_S)
9215 	    issue_nops_and_insn (curr_state, 2, insn, bundle_end_p,
9216 				 only_bundle_end_p);
9217 	  issue_nops_and_insn (curr_state, 1, insn, bundle_end_p,
9218 			       only_bundle_end_p);
9219 	  issue_nops_and_insn (curr_state, 0, insn, bundle_end_p,
9220 			       only_bundle_end_p);
9221 	}
9222       gcc_assert (index_to_bundle_states [insn_num]);
9223       for (curr_state = index_to_bundle_states [insn_num];
9224 	   curr_state != NULL;
9225 	   curr_state = curr_state->next)
9226 	if (verbose >= 2 && dump)
9227 	  {
9228 	    /* This structure is taken from generated code of the
9229 	       pipeline hazard recognizer (see file insn-attrtab.c).
9230 	       Please don't forget to change the structure if a new
9231 	       automaton is added to .md file.  */
9232 	    struct DFA_chip
9233 	    {
9234 	      unsigned short one_automaton_state;
9235 	      unsigned short oneb_automaton_state;
9236 	      unsigned short two_automaton_state;
9237 	      unsigned short twob_automaton_state;
9238 	    };
9239 
9240 	    fprintf
9241 	      (dump,
9242 	       "//    Bundle state %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, mid.stops %d state %d) for %d\n",
9243 	       curr_state->unique_num,
9244 	       (curr_state->originator == NULL
9245 		? -1 : curr_state->originator->unique_num),
9246 	       curr_state->cost,
9247 	       curr_state->before_nops_num, curr_state->after_nops_num,
9248 	       curr_state->accumulated_insns_num, curr_state->branch_deviation,
9249 	       curr_state->middle_bundle_stops,
9250 	       ((struct DFA_chip *) curr_state->dfa_state)->twob_automaton_state,
9251 	       INSN_UID (insn));
9252 	  }
9253     }
9254 
9255   /* We should find a solution because the 2nd insn scheduling has
9256      found one.  */
9257   gcc_assert (index_to_bundle_states [insn_num]);
9258   /* Find a state corresponding to the best insn sequence.  */
9259   best_state = NULL;
9260   for (curr_state = index_to_bundle_states [insn_num];
9261        curr_state != NULL;
9262        curr_state = curr_state->next)
9263     /* We are just looking at the states with fully filled up last
9264        bundle.  The first we prefer insn sequences with minimal cost
9265        then with minimal inserted nops and finally with branch insns
9266        placed in the 3rd slots.  */
9267     if (curr_state->accumulated_insns_num % 3 == 0
9268 	&& (best_state == NULL || best_state->cost > curr_state->cost
9269 	    || (best_state->cost == curr_state->cost
9270 		&& (curr_state->accumulated_insns_num
9271 		    < best_state->accumulated_insns_num
9272 		    || (curr_state->accumulated_insns_num
9273 			== best_state->accumulated_insns_num
9274 			&& (curr_state->branch_deviation
9275 			    < best_state->branch_deviation
9276 			    || (curr_state->branch_deviation
9277 				== best_state->branch_deviation
9278 				&& curr_state->middle_bundle_stops
9279 				< best_state->middle_bundle_stops)))))))
9280       best_state = curr_state;
9281   /* Second (backward) pass: adding nops and templates.  */
9282   gcc_assert (best_state);
9283   insn_num = best_state->before_nops_num;
9284   template0 = template1 = -1;
9285   for (curr_state = best_state;
9286        curr_state->originator != NULL;
9287        curr_state = curr_state->originator)
9288     {
9289       insn = curr_state->insn;
9290       asm_p = unknown_for_bundling_p (insn);
9291       insn_num++;
9292       if (verbose >= 2 && dump)
9293 	{
9294 	  struct DFA_chip
9295 	  {
9296 	    unsigned short one_automaton_state;
9297 	    unsigned short oneb_automaton_state;
9298 	    unsigned short two_automaton_state;
9299 	    unsigned short twob_automaton_state;
9300 	  };
9301 
9302 	  fprintf
9303 	    (dump,
9304 	     "//    Best %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, mid.stops %d, state %d) for %d\n",
9305 	     curr_state->unique_num,
9306 	     (curr_state->originator == NULL
9307 	      ? -1 : curr_state->originator->unique_num),
9308 	     curr_state->cost,
9309 	     curr_state->before_nops_num, curr_state->after_nops_num,
9310 	     curr_state->accumulated_insns_num, curr_state->branch_deviation,
9311 	     curr_state->middle_bundle_stops,
9312 	     ((struct DFA_chip *) curr_state->dfa_state)->twob_automaton_state,
9313 	     INSN_UID (insn));
9314 	}
9315       /* Find the position in the current bundle window.  The window can
9316 	 contain at most two bundles.  Two bundle window means that
9317 	 the processor will make two bundle rotation.  */
9318       max_pos = get_max_pos (curr_state->dfa_state);
9319       if (max_pos == 6
9320 	  /* The following (negative template number) means that the
9321 	     processor did one bundle rotation.  */
9322 	  || (max_pos == 3 && template0 < 0))
9323 	{
9324 	  /* We are at the end of the window -- find template(s) for
9325 	     its bundle(s).  */
9326 	  pos = max_pos;
9327 	  if (max_pos == 3)
9328 	    template0 = get_template (curr_state->dfa_state, 3);
9329 	  else
9330 	    {
9331 	      template1 = get_template (curr_state->dfa_state, 3);
9332 	      template0 = get_template (curr_state->dfa_state, 6);
9333 	    }
9334 	}
9335       if (max_pos > 3 && template1 < 0)
9336 	/* It may happen when we have the stop inside a bundle.  */
9337 	{
9338 	  gcc_assert (pos <= 3);
9339 	  template1 = get_template (curr_state->dfa_state, 3);
9340 	  pos += 3;
9341 	}
9342       if (!asm_p)
9343 	/* Emit nops after the current insn.  */
9344 	for (i = 0; i < curr_state->after_nops_num; i++)
9345 	  {
9346 	    rtx nop_pat = gen_nop ();
9347 	    rtx_insn *nop = emit_insn_after (nop_pat, insn);
9348 	    pos--;
9349 	    gcc_assert (pos >= 0);
9350 	    if (pos % 3 == 0)
9351 	      {
9352 		/* We are at the start of a bundle: emit the template
9353 		   (it should be defined).  */
9354 		gcc_assert (template0 >= 0);
9355 		ia64_add_bundle_selector_before (template0, nop);
9356 		/* If we have two bundle window, we make one bundle
9357 		   rotation.  Otherwise template0 will be undefined
9358 		   (negative value).  */
9359 		template0 = template1;
9360 		template1 = -1;
9361 	      }
9362 	  }
9363       /* Move the position backward in the window.  Group barrier has
9364 	 no slot.  Asm insn takes all bundle.  */
9365       if (INSN_CODE (insn) != CODE_FOR_insn_group_barrier
9366 	  && !unknown_for_bundling_p (insn))
9367 	pos--;
9368       /* Long insn takes 2 slots.  */
9369       if (ia64_safe_type (insn) == TYPE_L)
9370 	pos--;
9371       gcc_assert (pos >= 0);
9372       if (pos % 3 == 0
9373 	  && INSN_CODE (insn) != CODE_FOR_insn_group_barrier
9374 	  && !unknown_for_bundling_p (insn))
9375 	{
9376 	  /* The current insn is at the bundle start: emit the
9377 	     template.  */
9378 	  gcc_assert (template0 >= 0);
9379 	  ia64_add_bundle_selector_before (template0, insn);
9380 	  b = PREV_INSN (insn);
9381 	  insn = b;
9382 	  /* See comment above in analogous place for emitting nops
9383 	     after the insn.  */
9384 	  template0 = template1;
9385 	  template1 = -1;
9386 	}
9387       /* Emit nops after the current insn.  */
9388       for (i = 0; i < curr_state->before_nops_num; i++)
9389 	{
9390 	  rtx nop_pat = gen_nop ();
9391 	  ia64_emit_insn_before (nop_pat, insn);
9392 	  rtx_insn *nop = PREV_INSN (insn);
9393 	  insn = nop;
9394 	  pos--;
9395 	  gcc_assert (pos >= 0);
9396 	  if (pos % 3 == 0)
9397 	    {
9398 	      /* See comment above in analogous place for emitting nops
9399 		 after the insn.  */
9400 	      gcc_assert (template0 >= 0);
9401 	      ia64_add_bundle_selector_before (template0, insn);
9402 	      b = PREV_INSN (insn);
9403 	      insn = b;
9404 	      template0 = template1;
9405 	      template1 = -1;
9406 	    }
9407 	}
9408     }
9409 
9410   if (flag_checking)
9411     {
9412       /* Assert right calculation of middle_bundle_stops.  */
9413       int num = best_state->middle_bundle_stops;
9414       bool start_bundle = true, end_bundle = false;
9415 
9416       for (insn = NEXT_INSN (prev_head_insn);
9417 	   insn && insn != tail;
9418 	   insn = NEXT_INSN (insn))
9419 	{
9420 	  if (!INSN_P (insn))
9421 	    continue;
9422 	  if (recog_memoized (insn) == CODE_FOR_bundle_selector)
9423 	    start_bundle = true;
9424 	  else
9425 	    {
9426 	      rtx_insn *next_insn;
9427 
9428 	      for (next_insn = NEXT_INSN (insn);
9429 		   next_insn && next_insn != tail;
9430 		   next_insn = NEXT_INSN (next_insn))
9431 		if (INSN_P (next_insn)
9432 		    && (ia64_safe_itanium_class (next_insn)
9433 			!= ITANIUM_CLASS_IGNORE
9434 			|| recog_memoized (next_insn)
9435 			== CODE_FOR_bundle_selector)
9436 		    && GET_CODE (PATTERN (next_insn)) != USE
9437 		    && GET_CODE (PATTERN (next_insn)) != CLOBBER)
9438 		  break;
9439 
9440 	      end_bundle = next_insn == NULL_RTX
9441 		|| next_insn == tail
9442 		|| (INSN_P (next_insn)
9443 		    && recog_memoized (next_insn) == CODE_FOR_bundle_selector);
9444 	      if (recog_memoized (insn) == CODE_FOR_insn_group_barrier
9445 		  && !start_bundle && !end_bundle
9446 		  && next_insn
9447 		  && !unknown_for_bundling_p (next_insn))
9448 		num--;
9449 
9450 	      start_bundle = false;
9451 	    }
9452 	}
9453 
9454       gcc_assert (num == 0);
9455     }
9456 
9457   free (index_to_bundle_states);
9458   finish_bundle_state_table ();
9459   bundling_p = 0;
9460   dfa_clean_insn_cache ();
9461 }
9462 
9463 /* The following function is called at the end of scheduling BB or
9464    EBB.  After reload, it inserts stop bits and does insn bundling.  */
9465 
9466 static void
ia64_sched_finish(FILE * dump,int sched_verbose)9467 ia64_sched_finish (FILE *dump, int sched_verbose)
9468 {
9469   if (sched_verbose)
9470     fprintf (dump, "// Finishing schedule.\n");
9471   if (!reload_completed)
9472     return;
9473   if (reload_completed)
9474     {
9475       final_emit_insn_group_barriers (dump);
9476       bundling (dump, sched_verbose, current_sched_info->prev_head,
9477 		current_sched_info->next_tail);
9478       if (sched_verbose && dump)
9479 	fprintf (dump, "//    finishing %d-%d\n",
9480 		 INSN_UID (NEXT_INSN (current_sched_info->prev_head)),
9481 		 INSN_UID (PREV_INSN (current_sched_info->next_tail)));
9482 
9483       return;
9484     }
9485 }
9486 
9487 /* The following function inserts stop bits in scheduled BB or EBB.  */
9488 
9489 static void
final_emit_insn_group_barriers(FILE * dump ATTRIBUTE_UNUSED)9490 final_emit_insn_group_barriers (FILE *dump ATTRIBUTE_UNUSED)
9491 {
9492   rtx_insn *insn;
9493   int need_barrier_p = 0;
9494   int seen_good_insn = 0;
9495 
9496   init_insn_group_barriers ();
9497 
9498   for (insn = NEXT_INSN (current_sched_info->prev_head);
9499        insn != current_sched_info->next_tail;
9500        insn = NEXT_INSN (insn))
9501     {
9502       if (BARRIER_P (insn))
9503 	{
9504 	  rtx_insn *last = prev_active_insn (insn);
9505 
9506 	  if (! last)
9507 	    continue;
9508 	  if (JUMP_TABLE_DATA_P (last))
9509 	    last = prev_active_insn (last);
9510 	  if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
9511 	    emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last);
9512 
9513 	  init_insn_group_barriers ();
9514 	  seen_good_insn = 0;
9515 	  need_barrier_p = 0;
9516 	}
9517       else if (NONDEBUG_INSN_P (insn))
9518 	{
9519 	  if (recog_memoized (insn) == CODE_FOR_insn_group_barrier)
9520 	    {
9521 	      init_insn_group_barriers ();
9522 	      seen_good_insn = 0;
9523 	      need_barrier_p = 0;
9524 	    }
9525 	  else if (need_barrier_p || group_barrier_needed (insn)
9526 		   || (mflag_sched_stop_bits_after_every_cycle
9527 		       && GET_MODE (insn) == TImode
9528 		       && seen_good_insn))
9529 	    {
9530 	      if (TARGET_EARLY_STOP_BITS)
9531 		{
9532 		  rtx_insn *last;
9533 
9534 		  for (last = insn;
9535 		       last != current_sched_info->prev_head;
9536 		       last = PREV_INSN (last))
9537 		    if (INSN_P (last) && GET_MODE (last) == TImode
9538 			&& stops_p [INSN_UID (last)])
9539 		      break;
9540 		  if (last == current_sched_info->prev_head)
9541 		    last = insn;
9542 		  last = prev_active_insn (last);
9543 		  if (last
9544 		      && recog_memoized (last) != CODE_FOR_insn_group_barrier)
9545 		    emit_insn_after (gen_insn_group_barrier (GEN_INT (3)),
9546 				     last);
9547 		  init_insn_group_barriers ();
9548 		  for (last = NEXT_INSN (last);
9549 		       last != insn;
9550 		       last = NEXT_INSN (last))
9551 		    if (INSN_P (last))
9552 		      {
9553 			group_barrier_needed (last);
9554 			if (recog_memoized (last) >= 0
9555 			    && important_for_bundling_p (last))
9556 			  seen_good_insn = 1;
9557 		      }
9558 		}
9559 	      else
9560 		{
9561 		  emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
9562 				    insn);
9563 		  init_insn_group_barriers ();
9564 		  seen_good_insn = 0;
9565 		}
9566 	      group_barrier_needed (insn);
9567 	      if (recog_memoized (insn) >= 0
9568 		  && important_for_bundling_p (insn))
9569 		seen_good_insn = 1;
9570 	    }
9571 	  else if (recog_memoized (insn) >= 0
9572 		   && important_for_bundling_p (insn))
9573 	    seen_good_insn = 1;
9574 	  need_barrier_p = (CALL_P (insn) || unknown_for_bundling_p (insn));
9575 	}
9576     }
9577 }
9578 
9579 
9580 
9581 /* If the following function returns TRUE, we will use the DFA
9582    insn scheduler.  */
9583 
9584 static int
ia64_first_cycle_multipass_dfa_lookahead(void)9585 ia64_first_cycle_multipass_dfa_lookahead (void)
9586 {
9587   return (reload_completed ? 6 : 4);
9588 }
9589 
9590 /* The following function initiates variable `dfa_pre_cycle_insn'.  */
9591 
9592 static void
ia64_init_dfa_pre_cycle_insn(void)9593 ia64_init_dfa_pre_cycle_insn (void)
9594 {
9595   if (temp_dfa_state == NULL)
9596     {
9597       dfa_state_size = state_size ();
9598       temp_dfa_state = xmalloc (dfa_state_size);
9599       prev_cycle_state = xmalloc (dfa_state_size);
9600     }
9601   dfa_pre_cycle_insn = make_insn_raw (gen_pre_cycle ());
9602   SET_PREV_INSN (dfa_pre_cycle_insn) = SET_NEXT_INSN (dfa_pre_cycle_insn) = NULL_RTX;
9603   recog_memoized (dfa_pre_cycle_insn);
9604   dfa_stop_insn = make_insn_raw (gen_insn_group_barrier (GEN_INT (3)));
9605   SET_PREV_INSN (dfa_stop_insn) = SET_NEXT_INSN (dfa_stop_insn) = NULL_RTX;
9606   recog_memoized (dfa_stop_insn);
9607 }
9608 
9609 /* The following function returns the pseudo insn DFA_PRE_CYCLE_INSN
9610    used by the DFA insn scheduler.  */
9611 
9612 static rtx
ia64_dfa_pre_cycle_insn(void)9613 ia64_dfa_pre_cycle_insn (void)
9614 {
9615   return dfa_pre_cycle_insn;
9616 }
9617 
9618 /* The following function returns TRUE if PRODUCER (of type ilog or
9619    ld) produces address for CONSUMER (of type st or stf). */
9620 
9621 int
ia64_st_address_bypass_p(rtx_insn * producer,rtx_insn * consumer)9622 ia64_st_address_bypass_p (rtx_insn *producer, rtx_insn *consumer)
9623 {
9624   rtx dest, reg, mem;
9625 
9626   gcc_assert (producer && consumer);
9627   dest = ia64_single_set (producer);
9628   gcc_assert (dest);
9629   reg = SET_DEST (dest);
9630   gcc_assert (reg);
9631   if (GET_CODE (reg) == SUBREG)
9632     reg = SUBREG_REG (reg);
9633   gcc_assert (GET_CODE (reg) == REG);
9634 
9635   dest = ia64_single_set (consumer);
9636   gcc_assert (dest);
9637   mem = SET_DEST (dest);
9638   gcc_assert (mem && GET_CODE (mem) == MEM);
9639   return reg_mentioned_p (reg, mem);
9640 }
9641 
9642 /* The following function returns TRUE if PRODUCER (of type ilog or
9643    ld) produces address for CONSUMER (of type ld or fld). */
9644 
9645 int
ia64_ld_address_bypass_p(rtx_insn * producer,rtx_insn * consumer)9646 ia64_ld_address_bypass_p (rtx_insn *producer, rtx_insn *consumer)
9647 {
9648   rtx dest, src, reg, mem;
9649 
9650   gcc_assert (producer && consumer);
9651   dest = ia64_single_set (producer);
9652   gcc_assert (dest);
9653   reg = SET_DEST (dest);
9654   gcc_assert (reg);
9655   if (GET_CODE (reg) == SUBREG)
9656     reg = SUBREG_REG (reg);
9657   gcc_assert (GET_CODE (reg) == REG);
9658 
9659   src = ia64_single_set (consumer);
9660   gcc_assert (src);
9661   mem = SET_SRC (src);
9662   gcc_assert (mem);
9663 
9664   if (GET_CODE (mem) == UNSPEC && XVECLEN (mem, 0) > 0)
9665     mem = XVECEXP (mem, 0, 0);
9666   else if (GET_CODE (mem) == IF_THEN_ELSE)
9667     /* ??? Is this bypass necessary for ld.c?  */
9668     {
9669       gcc_assert (XINT (XEXP (XEXP (mem, 0), 0), 1) == UNSPEC_LDCCLR);
9670       mem = XEXP (mem, 1);
9671     }
9672 
9673   while (GET_CODE (mem) == SUBREG || GET_CODE (mem) == ZERO_EXTEND)
9674     mem = XEXP (mem, 0);
9675 
9676   if (GET_CODE (mem) == UNSPEC)
9677     {
9678       int c = XINT (mem, 1);
9679 
9680       gcc_assert (c == UNSPEC_LDA || c == UNSPEC_LDS || c == UNSPEC_LDS_A
9681 		  || c == UNSPEC_LDSA);
9682       mem = XVECEXP (mem, 0, 0);
9683     }
9684 
9685   /* Note that LO_SUM is used for GOT loads.  */
9686   gcc_assert (GET_CODE (mem) == LO_SUM || GET_CODE (mem) == MEM);
9687 
9688   return reg_mentioned_p (reg, mem);
9689 }
9690 
9691 /* The following function returns TRUE if INSN produces address for a
9692    load/store insn.  We will place such insns into M slot because it
9693    decreases its latency time.  */
9694 
9695 int
ia64_produce_address_p(rtx insn)9696 ia64_produce_address_p (rtx insn)
9697 {
9698   return insn->call;
9699 }
9700 
9701 
9702 /* Emit pseudo-ops for the assembler to describe predicate relations.
9703    At present this assumes that we only consider predicate pairs to
9704    be mutex, and that the assembler can deduce proper values from
9705    straight-line code.  */
9706 
9707 static void
emit_predicate_relation_info(void)9708 emit_predicate_relation_info (void)
9709 {
9710   basic_block bb;
9711 
9712   FOR_EACH_BB_REVERSE_FN (bb, cfun)
9713     {
9714       int r;
9715       rtx_insn *head = BB_HEAD (bb);
9716 
9717       /* We only need such notes at code labels.  */
9718       if (! LABEL_P (head))
9719 	continue;
9720       if (NOTE_INSN_BASIC_BLOCK_P (NEXT_INSN (head)))
9721 	head = NEXT_INSN (head);
9722 
9723       /* Skip p0, which may be thought to be live due to (reg:DI p0)
9724 	 grabbing the entire block of predicate registers.  */
9725       for (r = PR_REG (2); r < PR_REG (64); r += 2)
9726 	if (REGNO_REG_SET_P (df_get_live_in (bb), r))
9727 	  {
9728 	    rtx p = gen_rtx_REG (BImode, r);
9729 	    rtx_insn *n = emit_insn_after (gen_pred_rel_mutex (p), head);
9730 	    if (head == BB_END (bb))
9731 	      BB_END (bb) = n;
9732 	    head = n;
9733 	  }
9734     }
9735 
9736   /* Look for conditional calls that do not return, and protect predicate
9737      relations around them.  Otherwise the assembler will assume the call
9738      returns, and complain about uses of call-clobbered predicates after
9739      the call.  */
9740   FOR_EACH_BB_REVERSE_FN (bb, cfun)
9741     {
9742       rtx_insn *insn = BB_HEAD (bb);
9743 
9744       while (1)
9745 	{
9746 	  if (CALL_P (insn)
9747 	      && GET_CODE (PATTERN (insn)) == COND_EXEC
9748 	      && find_reg_note (insn, REG_NORETURN, NULL_RTX))
9749 	    {
9750 	      rtx_insn *b =
9751 		emit_insn_before (gen_safe_across_calls_all (), insn);
9752 	      rtx_insn *a = emit_insn_after (gen_safe_across_calls_normal (), insn);
9753 	      if (BB_HEAD (bb) == insn)
9754 		BB_HEAD (bb) = b;
9755 	      if (BB_END (bb) == insn)
9756 		BB_END (bb) = a;
9757 	    }
9758 
9759 	  if (insn == BB_END (bb))
9760 	    break;
9761 	  insn = NEXT_INSN (insn);
9762 	}
9763     }
9764 }
9765 
9766 /* Perform machine dependent operations on the rtl chain INSNS.  */
9767 
9768 static void
ia64_reorg(void)9769 ia64_reorg (void)
9770 {
9771   /* We are freeing block_for_insn in the toplev to keep compatibility
9772      with old MDEP_REORGS that are not CFG based.  Recompute it now.  */
9773   compute_bb_for_insn ();
9774 
9775   /* If optimizing, we'll have split before scheduling.  */
9776   if (optimize == 0)
9777     split_all_insns ();
9778 
9779   if (optimize && flag_schedule_insns_after_reload
9780       && dbg_cnt (ia64_sched2))
9781     {
9782       basic_block bb;
9783       timevar_push (TV_SCHED2);
9784       ia64_final_schedule = 1;
9785 
9786       /* We can't let modulo-sched prevent us from scheduling any bbs,
9787 	 since we need the final schedule to produce bundle information.  */
9788       FOR_EACH_BB_FN (bb, cfun)
9789 	bb->flags &= ~BB_DISABLE_SCHEDULE;
9790 
9791       initiate_bundle_states ();
9792       ia64_nop = make_insn_raw (gen_nop ());
9793       SET_PREV_INSN (ia64_nop) = SET_NEXT_INSN (ia64_nop) = NULL_RTX;
9794       recog_memoized (ia64_nop);
9795       clocks_length = get_max_uid () + 1;
9796       stops_p = XCNEWVEC (char, clocks_length);
9797 
9798       if (ia64_tune == PROCESSOR_ITANIUM2)
9799 	{
9800 	  pos_1 = get_cpu_unit_code ("2_1");
9801 	  pos_2 = get_cpu_unit_code ("2_2");
9802 	  pos_3 = get_cpu_unit_code ("2_3");
9803 	  pos_4 = get_cpu_unit_code ("2_4");
9804 	  pos_5 = get_cpu_unit_code ("2_5");
9805 	  pos_6 = get_cpu_unit_code ("2_6");
9806 	  _0mii_ = get_cpu_unit_code ("2b_0mii.");
9807 	  _0mmi_ = get_cpu_unit_code ("2b_0mmi.");
9808 	  _0mfi_ = get_cpu_unit_code ("2b_0mfi.");
9809 	  _0mmf_ = get_cpu_unit_code ("2b_0mmf.");
9810 	  _0bbb_ = get_cpu_unit_code ("2b_0bbb.");
9811 	  _0mbb_ = get_cpu_unit_code ("2b_0mbb.");
9812 	  _0mib_ = get_cpu_unit_code ("2b_0mib.");
9813 	  _0mmb_ = get_cpu_unit_code ("2b_0mmb.");
9814 	  _0mfb_ = get_cpu_unit_code ("2b_0mfb.");
9815 	  _0mlx_ = get_cpu_unit_code ("2b_0mlx.");
9816 	  _1mii_ = get_cpu_unit_code ("2b_1mii.");
9817 	  _1mmi_ = get_cpu_unit_code ("2b_1mmi.");
9818 	  _1mfi_ = get_cpu_unit_code ("2b_1mfi.");
9819 	  _1mmf_ = get_cpu_unit_code ("2b_1mmf.");
9820 	  _1bbb_ = get_cpu_unit_code ("2b_1bbb.");
9821 	  _1mbb_ = get_cpu_unit_code ("2b_1mbb.");
9822 	  _1mib_ = get_cpu_unit_code ("2b_1mib.");
9823 	  _1mmb_ = get_cpu_unit_code ("2b_1mmb.");
9824 	  _1mfb_ = get_cpu_unit_code ("2b_1mfb.");
9825 	  _1mlx_ = get_cpu_unit_code ("2b_1mlx.");
9826 	}
9827       else
9828 	{
9829 	  pos_1 = get_cpu_unit_code ("1_1");
9830 	  pos_2 = get_cpu_unit_code ("1_2");
9831 	  pos_3 = get_cpu_unit_code ("1_3");
9832 	  pos_4 = get_cpu_unit_code ("1_4");
9833 	  pos_5 = get_cpu_unit_code ("1_5");
9834 	  pos_6 = get_cpu_unit_code ("1_6");
9835 	  _0mii_ = get_cpu_unit_code ("1b_0mii.");
9836 	  _0mmi_ = get_cpu_unit_code ("1b_0mmi.");
9837 	  _0mfi_ = get_cpu_unit_code ("1b_0mfi.");
9838 	  _0mmf_ = get_cpu_unit_code ("1b_0mmf.");
9839 	  _0bbb_ = get_cpu_unit_code ("1b_0bbb.");
9840 	  _0mbb_ = get_cpu_unit_code ("1b_0mbb.");
9841 	  _0mib_ = get_cpu_unit_code ("1b_0mib.");
9842 	  _0mmb_ = get_cpu_unit_code ("1b_0mmb.");
9843 	  _0mfb_ = get_cpu_unit_code ("1b_0mfb.");
9844 	  _0mlx_ = get_cpu_unit_code ("1b_0mlx.");
9845 	  _1mii_ = get_cpu_unit_code ("1b_1mii.");
9846 	  _1mmi_ = get_cpu_unit_code ("1b_1mmi.");
9847 	  _1mfi_ = get_cpu_unit_code ("1b_1mfi.");
9848 	  _1mmf_ = get_cpu_unit_code ("1b_1mmf.");
9849 	  _1bbb_ = get_cpu_unit_code ("1b_1bbb.");
9850 	  _1mbb_ = get_cpu_unit_code ("1b_1mbb.");
9851 	  _1mib_ = get_cpu_unit_code ("1b_1mib.");
9852 	  _1mmb_ = get_cpu_unit_code ("1b_1mmb.");
9853 	  _1mfb_ = get_cpu_unit_code ("1b_1mfb.");
9854 	  _1mlx_ = get_cpu_unit_code ("1b_1mlx.");
9855 	}
9856 
9857       if (flag_selective_scheduling2
9858 	  && !maybe_skip_selective_scheduling ())
9859         run_selective_scheduling ();
9860       else
9861 	schedule_ebbs ();
9862 
9863       /* Redo alignment computation, as it might gone wrong.  */
9864       compute_alignments ();
9865 
9866       /* We cannot reuse this one because it has been corrupted by the
9867 	 evil glat.  */
9868       finish_bundle_states ();
9869       free (stops_p);
9870       stops_p = NULL;
9871       emit_insn_group_barriers (dump_file);
9872 
9873       ia64_final_schedule = 0;
9874       timevar_pop (TV_SCHED2);
9875     }
9876   else
9877     emit_all_insn_group_barriers (dump_file);
9878 
9879   df_analyze ();
9880 
9881   /* A call must not be the last instruction in a function, so that the
9882      return address is still within the function, so that unwinding works
9883      properly.  Note that IA-64 differs from dwarf2 on this point.  */
9884   if (ia64_except_unwind_info (&global_options) == UI_TARGET)
9885     {
9886       rtx_insn *insn;
9887       int saw_stop = 0;
9888 
9889       insn = get_last_insn ();
9890       if (! INSN_P (insn))
9891         insn = prev_active_insn (insn);
9892       if (insn)
9893 	{
9894 	  /* Skip over insns that expand to nothing.  */
9895 	  while (NONJUMP_INSN_P (insn)
9896 		 && get_attr_empty (insn) == EMPTY_YES)
9897 	    {
9898 	      if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
9899 		  && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER)
9900 		saw_stop = 1;
9901 	      insn = prev_active_insn (insn);
9902 	    }
9903 	  if (CALL_P (insn))
9904 	    {
9905 	      if (! saw_stop)
9906 		emit_insn (gen_insn_group_barrier (GEN_INT (3)));
9907 	      emit_insn (gen_break_f ());
9908 	      emit_insn (gen_insn_group_barrier (GEN_INT (3)));
9909 	    }
9910 	}
9911     }
9912 
9913   emit_predicate_relation_info ();
9914 
9915   if (flag_var_tracking)
9916     {
9917       timevar_push (TV_VAR_TRACKING);
9918       variable_tracking_main ();
9919       timevar_pop (TV_VAR_TRACKING);
9920     }
9921   df_finish_pass (false);
9922 }
9923 
9924 /* Return true if REGNO is used by the epilogue.  */
9925 
9926 int
ia64_epilogue_uses(int regno)9927 ia64_epilogue_uses (int regno)
9928 {
9929   switch (regno)
9930     {
9931     case R_GR (1):
9932       /* With a call to a function in another module, we will write a new
9933 	 value to "gp".  After returning from such a call, we need to make
9934 	 sure the function restores the original gp-value, even if the
9935 	 function itself does not use the gp anymore.  */
9936       return !(TARGET_AUTO_PIC || TARGET_NO_PIC);
9937 
9938     case IN_REG (0): case IN_REG (1): case IN_REG (2): case IN_REG (3):
9939     case IN_REG (4): case IN_REG (5): case IN_REG (6): case IN_REG (7):
9940       /* For functions defined with the syscall_linkage attribute, all
9941 	 input registers are marked as live at all function exits.  This
9942 	 prevents the register allocator from using the input registers,
9943 	 which in turn makes it possible to restart a system call after
9944 	 an interrupt without having to save/restore the input registers.
9945 	 This also prevents kernel data from leaking to application code.  */
9946       return lookup_attribute ("syscall_linkage",
9947 	   TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))) != NULL;
9948 
9949     case R_BR (0):
9950       /* Conditional return patterns can't represent the use of `b0' as
9951          the return address, so we force the value live this way.  */
9952       return 1;
9953 
9954     case AR_PFS_REGNUM:
9955       /* Likewise for ar.pfs, which is used by br.ret.  */
9956       return 1;
9957 
9958     default:
9959       return 0;
9960     }
9961 }
9962 
9963 /* Return true if REGNO is used by the frame unwinder.  */
9964 
9965 int
ia64_eh_uses(int regno)9966 ia64_eh_uses (int regno)
9967 {
9968   unsigned int r;
9969 
9970   if (! reload_completed)
9971     return 0;
9972 
9973   if (regno == 0)
9974     return 0;
9975 
9976   for (r = reg_save_b0; r <= reg_save_ar_lc; r++)
9977     if (regno == current_frame_info.r[r]
9978        || regno == emitted_frame_related_regs[r])
9979       return 1;
9980 
9981   return 0;
9982 }
9983 
9984 /* Return true if this goes in small data/bss.  */
9985 
9986 /* ??? We could also support own long data here.  Generating movl/add/ld8
9987    instead of addl,ld8/ld8.  This makes the code bigger, but should make the
9988    code faster because there is one less load.  This also includes incomplete
9989    types which can't go in sdata/sbss.  */
9990 
9991 static bool
ia64_in_small_data_p(const_tree exp)9992 ia64_in_small_data_p (const_tree exp)
9993 {
9994   if (TARGET_NO_SDATA)
9995     return false;
9996 
9997   /* We want to merge strings, so we never consider them small data.  */
9998   if (TREE_CODE (exp) == STRING_CST)
9999     return false;
10000 
10001   /* Functions are never small data.  */
10002   if (TREE_CODE (exp) == FUNCTION_DECL)
10003     return false;
10004 
10005   if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
10006     {
10007       const char *section = DECL_SECTION_NAME (exp);
10008 
10009       if (strcmp (section, ".sdata") == 0
10010 	  || strncmp (section, ".sdata.", 7) == 0
10011 	  || strncmp (section, ".gnu.linkonce.s.", 16) == 0
10012 	  || strcmp (section, ".sbss") == 0
10013 	  || strncmp (section, ".sbss.", 6) == 0
10014 	  || strncmp (section, ".gnu.linkonce.sb.", 17) == 0)
10015 	return true;
10016     }
10017   else
10018     {
10019       HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
10020 
10021       /* If this is an incomplete type with size 0, then we can't put it
10022 	 in sdata because it might be too big when completed.  */
10023       if (size > 0 && size <= ia64_section_threshold)
10024 	return true;
10025     }
10026 
10027   return false;
10028 }
10029 
10030 /* Output assembly directives for prologue regions.  */
10031 
10032 /* The current basic block number.  */
10033 
10034 static bool last_block;
10035 
10036 /* True if we need a copy_state command at the start of the next block.  */
10037 
10038 static bool need_copy_state;
10039 
10040 #ifndef MAX_ARTIFICIAL_LABEL_BYTES
10041 # define MAX_ARTIFICIAL_LABEL_BYTES 30
10042 #endif
10043 
10044 /* The function emits unwind directives for the start of an epilogue.  */
10045 
10046 static void
process_epilogue(FILE * asm_out_file,rtx insn ATTRIBUTE_UNUSED,bool unwind,bool frame ATTRIBUTE_UNUSED)10047 process_epilogue (FILE *asm_out_file, rtx insn ATTRIBUTE_UNUSED,
10048 		  bool unwind, bool frame ATTRIBUTE_UNUSED)
10049 {
10050   /* If this isn't the last block of the function, then we need to label the
10051      current state, and copy it back in at the start of the next block.  */
10052 
10053   if (!last_block)
10054     {
10055       if (unwind)
10056 	fprintf (asm_out_file, "\t.label_state %d\n",
10057 		 ++cfun->machine->state_num);
10058       need_copy_state = true;
10059     }
10060 
10061   if (unwind)
10062     fprintf (asm_out_file, "\t.restore sp\n");
10063 }
10064 
10065 /* This function processes a SET pattern for REG_CFA_ADJUST_CFA.  */
10066 
10067 static void
process_cfa_adjust_cfa(FILE * asm_out_file,rtx pat,rtx insn,bool unwind,bool frame)10068 process_cfa_adjust_cfa (FILE *asm_out_file, rtx pat, rtx insn,
10069 			bool unwind, bool frame)
10070 {
10071   rtx dest = SET_DEST (pat);
10072   rtx src = SET_SRC (pat);
10073 
10074   if (dest == stack_pointer_rtx)
10075     {
10076       if (GET_CODE (src) == PLUS)
10077 	{
10078 	  rtx op0 = XEXP (src, 0);
10079 	  rtx op1 = XEXP (src, 1);
10080 
10081 	  gcc_assert (op0 == dest && GET_CODE (op1) == CONST_INT);
10082 
10083 	  if (INTVAL (op1) < 0)
10084 	    {
10085 	      gcc_assert (!frame_pointer_needed);
10086 	      if (unwind)
10087 		fprintf (asm_out_file,
10088 			 "\t.fframe " HOST_WIDE_INT_PRINT_DEC"\n",
10089 			 -INTVAL (op1));
10090 	    }
10091 	  else
10092 	    process_epilogue (asm_out_file, insn, unwind, frame);
10093 	}
10094       else
10095 	{
10096 	  gcc_assert (src == hard_frame_pointer_rtx);
10097 	  process_epilogue (asm_out_file, insn, unwind, frame);
10098 	}
10099     }
10100   else if (dest == hard_frame_pointer_rtx)
10101     {
10102       gcc_assert (src == stack_pointer_rtx);
10103       gcc_assert (frame_pointer_needed);
10104 
10105       if (unwind)
10106 	fprintf (asm_out_file, "\t.vframe r%d\n",
10107 		 ia64_dbx_register_number (REGNO (dest)));
10108     }
10109   else
10110     gcc_unreachable ();
10111 }
10112 
10113 /* This function processes a SET pattern for REG_CFA_REGISTER.  */
10114 
10115 static void
process_cfa_register(FILE * asm_out_file,rtx pat,bool unwind)10116 process_cfa_register (FILE *asm_out_file, rtx pat, bool unwind)
10117 {
10118   rtx dest = SET_DEST (pat);
10119   rtx src = SET_SRC (pat);
10120   int dest_regno = REGNO (dest);
10121   int src_regno;
10122 
10123   if (src == pc_rtx)
10124     {
10125       /* Saving return address pointer.  */
10126       if (unwind)
10127 	fprintf (asm_out_file, "\t.save rp, r%d\n",
10128 		 ia64_dbx_register_number (dest_regno));
10129       return;
10130     }
10131 
10132   src_regno = REGNO (src);
10133 
10134   switch (src_regno)
10135     {
10136     case PR_REG (0):
10137       gcc_assert (dest_regno == current_frame_info.r[reg_save_pr]);
10138       if (unwind)
10139 	fprintf (asm_out_file, "\t.save pr, r%d\n",
10140 		 ia64_dbx_register_number (dest_regno));
10141       break;
10142 
10143     case AR_UNAT_REGNUM:
10144       gcc_assert (dest_regno == current_frame_info.r[reg_save_ar_unat]);
10145       if (unwind)
10146 	fprintf (asm_out_file, "\t.save ar.unat, r%d\n",
10147 		 ia64_dbx_register_number (dest_regno));
10148       break;
10149 
10150     case AR_LC_REGNUM:
10151       gcc_assert (dest_regno == current_frame_info.r[reg_save_ar_lc]);
10152       if (unwind)
10153 	fprintf (asm_out_file, "\t.save ar.lc, r%d\n",
10154 		 ia64_dbx_register_number (dest_regno));
10155       break;
10156 
10157     default:
10158       /* Everything else should indicate being stored to memory.  */
10159       gcc_unreachable ();
10160     }
10161 }
10162 
10163 /* This function processes a SET pattern for REG_CFA_OFFSET.  */
10164 
10165 static void
process_cfa_offset(FILE * asm_out_file,rtx pat,bool unwind)10166 process_cfa_offset (FILE *asm_out_file, rtx pat, bool unwind)
10167 {
10168   rtx dest = SET_DEST (pat);
10169   rtx src = SET_SRC (pat);
10170   int src_regno = REGNO (src);
10171   const char *saveop;
10172   HOST_WIDE_INT off;
10173   rtx base;
10174 
10175   gcc_assert (MEM_P (dest));
10176   if (GET_CODE (XEXP (dest, 0)) == REG)
10177     {
10178       base = XEXP (dest, 0);
10179       off = 0;
10180     }
10181   else
10182     {
10183       gcc_assert (GET_CODE (XEXP (dest, 0)) == PLUS
10184 		  && GET_CODE (XEXP (XEXP (dest, 0), 1)) == CONST_INT);
10185       base = XEXP (XEXP (dest, 0), 0);
10186       off = INTVAL (XEXP (XEXP (dest, 0), 1));
10187     }
10188 
10189   if (base == hard_frame_pointer_rtx)
10190     {
10191       saveop = ".savepsp";
10192       off = - off;
10193     }
10194   else
10195     {
10196       gcc_assert (base == stack_pointer_rtx);
10197       saveop = ".savesp";
10198     }
10199 
10200   src_regno = REGNO (src);
10201   switch (src_regno)
10202     {
10203     case BR_REG (0):
10204       gcc_assert (!current_frame_info.r[reg_save_b0]);
10205       if (unwind)
10206 	fprintf (asm_out_file, "\t%s rp, " HOST_WIDE_INT_PRINT_DEC "\n",
10207 		 saveop, off);
10208       break;
10209 
10210     case PR_REG (0):
10211       gcc_assert (!current_frame_info.r[reg_save_pr]);
10212       if (unwind)
10213 	fprintf (asm_out_file, "\t%s pr, " HOST_WIDE_INT_PRINT_DEC "\n",
10214 		 saveop, off);
10215       break;
10216 
10217     case AR_LC_REGNUM:
10218       gcc_assert (!current_frame_info.r[reg_save_ar_lc]);
10219       if (unwind)
10220 	fprintf (asm_out_file, "\t%s ar.lc, " HOST_WIDE_INT_PRINT_DEC "\n",
10221 		 saveop, off);
10222       break;
10223 
10224     case AR_PFS_REGNUM:
10225       gcc_assert (!current_frame_info.r[reg_save_ar_pfs]);
10226       if (unwind)
10227 	fprintf (asm_out_file, "\t%s ar.pfs, " HOST_WIDE_INT_PRINT_DEC "\n",
10228 		 saveop, off);
10229       break;
10230 
10231     case AR_UNAT_REGNUM:
10232       gcc_assert (!current_frame_info.r[reg_save_ar_unat]);
10233       if (unwind)
10234 	fprintf (asm_out_file, "\t%s ar.unat, " HOST_WIDE_INT_PRINT_DEC "\n",
10235 		 saveop, off);
10236       break;
10237 
10238     case GR_REG (4):
10239     case GR_REG (5):
10240     case GR_REG (6):
10241     case GR_REG (7):
10242       if (unwind)
10243 	fprintf (asm_out_file, "\t.save.g 0x%x\n",
10244 		 1 << (src_regno - GR_REG (4)));
10245       break;
10246 
10247     case BR_REG (1):
10248     case BR_REG (2):
10249     case BR_REG (3):
10250     case BR_REG (4):
10251     case BR_REG (5):
10252       if (unwind)
10253 	fprintf (asm_out_file, "\t.save.b 0x%x\n",
10254 		 1 << (src_regno - BR_REG (1)));
10255       break;
10256 
10257     case FR_REG (2):
10258     case FR_REG (3):
10259     case FR_REG (4):
10260     case FR_REG (5):
10261       if (unwind)
10262 	fprintf (asm_out_file, "\t.save.f 0x%x\n",
10263 		 1 << (src_regno - FR_REG (2)));
10264       break;
10265 
10266     case FR_REG (16): case FR_REG (17): case FR_REG (18): case FR_REG (19):
10267     case FR_REG (20): case FR_REG (21): case FR_REG (22): case FR_REG (23):
10268     case FR_REG (24): case FR_REG (25): case FR_REG (26): case FR_REG (27):
10269     case FR_REG (28): case FR_REG (29): case FR_REG (30): case FR_REG (31):
10270       if (unwind)
10271 	fprintf (asm_out_file, "\t.save.gf 0x0, 0x%x\n",
10272 		 1 << (src_regno - FR_REG (12)));
10273       break;
10274 
10275     default:
10276       /* ??? For some reason we mark other general registers, even those
10277 	 we can't represent in the unwind info.  Ignore them.  */
10278       break;
10279     }
10280 }
10281 
10282 /* This function looks at a single insn and emits any directives
10283    required to unwind this insn.  */
10284 
10285 static void
ia64_asm_unwind_emit(FILE * asm_out_file,rtx_insn * insn)10286 ia64_asm_unwind_emit (FILE *asm_out_file, rtx_insn *insn)
10287 {
10288   bool unwind = ia64_except_unwind_info (&global_options) == UI_TARGET;
10289   bool frame = dwarf2out_do_frame ();
10290   rtx note, pat;
10291   bool handled_one;
10292 
10293   if (!unwind && !frame)
10294     return;
10295 
10296   if (NOTE_INSN_BASIC_BLOCK_P (insn))
10297     {
10298       last_block = NOTE_BASIC_BLOCK (insn)->next_bb
10299      == EXIT_BLOCK_PTR_FOR_FN (cfun);
10300 
10301       /* Restore unwind state from immediately before the epilogue.  */
10302       if (need_copy_state)
10303 	{
10304 	  if (unwind)
10305 	    {
10306 	      fprintf (asm_out_file, "\t.body\n");
10307 	      fprintf (asm_out_file, "\t.copy_state %d\n",
10308 		       cfun->machine->state_num);
10309 	    }
10310 	  need_copy_state = false;
10311 	}
10312     }
10313 
10314   if (NOTE_P (insn) || ! RTX_FRAME_RELATED_P (insn))
10315     return;
10316 
10317   /* Look for the ALLOC insn.  */
10318   if (INSN_CODE (insn) == CODE_FOR_alloc)
10319     {
10320       rtx dest = SET_DEST (XVECEXP (PATTERN (insn), 0, 0));
10321       int dest_regno = REGNO (dest);
10322 
10323       /* If this is the final destination for ar.pfs, then this must
10324 	 be the alloc in the prologue.  */
10325       if (dest_regno == current_frame_info.r[reg_save_ar_pfs])
10326 	{
10327 	  if (unwind)
10328 	    fprintf (asm_out_file, "\t.save ar.pfs, r%d\n",
10329 		     ia64_dbx_register_number (dest_regno));
10330 	}
10331       else
10332 	{
10333 	  /* This must be an alloc before a sibcall.  We must drop the
10334 	     old frame info.  The easiest way to drop the old frame
10335 	     info is to ensure we had a ".restore sp" directive
10336 	     followed by a new prologue.  If the procedure doesn't
10337 	     have a memory-stack frame, we'll issue a dummy ".restore
10338 	     sp" now.  */
10339 	  if (current_frame_info.total_size == 0 && !frame_pointer_needed)
10340 	    /* if haven't done process_epilogue() yet, do it now */
10341 	    process_epilogue (asm_out_file, insn, unwind, frame);
10342 	  if (unwind)
10343 	    fprintf (asm_out_file, "\t.prologue\n");
10344 	}
10345       return;
10346     }
10347 
10348   handled_one = false;
10349   for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
10350     switch (REG_NOTE_KIND (note))
10351       {
10352       case REG_CFA_ADJUST_CFA:
10353 	pat = XEXP (note, 0);
10354 	if (pat == NULL)
10355 	  pat = PATTERN (insn);
10356 	process_cfa_adjust_cfa (asm_out_file, pat, insn, unwind, frame);
10357 	handled_one = true;
10358 	break;
10359 
10360       case REG_CFA_OFFSET:
10361 	pat = XEXP (note, 0);
10362 	if (pat == NULL)
10363 	  pat = PATTERN (insn);
10364 	process_cfa_offset (asm_out_file, pat, unwind);
10365 	handled_one = true;
10366 	break;
10367 
10368       case REG_CFA_REGISTER:
10369 	pat = XEXP (note, 0);
10370 	if (pat == NULL)
10371 	  pat = PATTERN (insn);
10372 	process_cfa_register (asm_out_file, pat, unwind);
10373 	handled_one = true;
10374 	break;
10375 
10376       case REG_FRAME_RELATED_EXPR:
10377       case REG_CFA_DEF_CFA:
10378       case REG_CFA_EXPRESSION:
10379       case REG_CFA_RESTORE:
10380       case REG_CFA_SET_VDRAP:
10381 	/* Not used in the ia64 port.  */
10382 	gcc_unreachable ();
10383 
10384       default:
10385 	/* Not a frame-related note.  */
10386 	break;
10387       }
10388 
10389   /* All REG_FRAME_RELATED_P insns, besides ALLOC, are marked with the
10390      explicit action to take.  No guessing required.  */
10391   gcc_assert (handled_one);
10392 }
10393 
10394 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY.  */
10395 
10396 static void
ia64_asm_emit_except_personality(rtx personality)10397 ia64_asm_emit_except_personality (rtx personality)
10398 {
10399   fputs ("\t.personality\t", asm_out_file);
10400   output_addr_const (asm_out_file, personality);
10401   fputc ('\n', asm_out_file);
10402 }
10403 
10404 /* Implement TARGET_ASM_INITIALIZE_SECTIONS.  */
10405 
10406 static void
ia64_asm_init_sections(void)10407 ia64_asm_init_sections (void)
10408 {
10409   exception_section = get_unnamed_section (0, output_section_asm_op,
10410 					   "\t.handlerdata");
10411 }
10412 
10413 /* Implement TARGET_DEBUG_UNWIND_INFO.  */
10414 
10415 static enum unwind_info_type
ia64_debug_unwind_info(void)10416 ia64_debug_unwind_info (void)
10417 {
10418   return UI_TARGET;
10419 }
10420 
10421 enum ia64_builtins
10422 {
10423   IA64_BUILTIN_BSP,
10424   IA64_BUILTIN_COPYSIGNQ,
10425   IA64_BUILTIN_FABSQ,
10426   IA64_BUILTIN_FLUSHRS,
10427   IA64_BUILTIN_INFQ,
10428   IA64_BUILTIN_HUGE_VALQ,
10429   IA64_BUILTIN_NANQ,
10430   IA64_BUILTIN_NANSQ,
10431   IA64_BUILTIN_max
10432 };
10433 
10434 static GTY(()) tree ia64_builtins[(int) IA64_BUILTIN_max];
10435 
10436 void
ia64_init_builtins(void)10437 ia64_init_builtins (void)
10438 {
10439   tree fpreg_type;
10440   tree float80_type;
10441   tree decl;
10442 
10443   /* The __fpreg type.  */
10444   fpreg_type = make_node (REAL_TYPE);
10445   TYPE_PRECISION (fpreg_type) = 82;
10446   layout_type (fpreg_type);
10447   (*lang_hooks.types.register_builtin_type) (fpreg_type, "__fpreg");
10448 
10449   /* The __float80 type.  */
10450   if (float64x_type_node != NULL_TREE
10451       && TYPE_MODE (float64x_type_node) == XFmode)
10452     float80_type = float64x_type_node;
10453   else
10454     {
10455       float80_type = make_node (REAL_TYPE);
10456       TYPE_PRECISION (float80_type) = 80;
10457       layout_type (float80_type);
10458     }
10459   (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
10460 
10461   /* The __float128 type.  */
10462   if (!TARGET_HPUX)
10463     {
10464       tree ftype;
10465       tree const_string_type
10466 	= build_pointer_type (build_qualified_type
10467 			      (char_type_node, TYPE_QUAL_CONST));
10468 
10469       (*lang_hooks.types.register_builtin_type) (float128_type_node,
10470 						 "__float128");
10471 
10472       /* TFmode support builtins.  */
10473       ftype = build_function_type_list (float128_type_node, NULL_TREE);
10474       decl = add_builtin_function ("__builtin_infq", ftype,
10475 				   IA64_BUILTIN_INFQ, BUILT_IN_MD,
10476 				   NULL, NULL_TREE);
10477       ia64_builtins[IA64_BUILTIN_INFQ] = decl;
10478 
10479       decl = add_builtin_function ("__builtin_huge_valq", ftype,
10480 				   IA64_BUILTIN_HUGE_VALQ, BUILT_IN_MD,
10481 				   NULL, NULL_TREE);
10482       ia64_builtins[IA64_BUILTIN_HUGE_VALQ] = decl;
10483 
10484       ftype = build_function_type_list (float128_type_node,
10485 					const_string_type,
10486 					NULL_TREE);
10487       decl = add_builtin_function ("__builtin_nanq", ftype,
10488 				   IA64_BUILTIN_NANQ, BUILT_IN_MD,
10489 				   "nanq", NULL_TREE);
10490       TREE_READONLY (decl) = 1;
10491       ia64_builtins[IA64_BUILTIN_NANQ] = decl;
10492 
10493       decl = add_builtin_function ("__builtin_nansq", ftype,
10494 				   IA64_BUILTIN_NANSQ, BUILT_IN_MD,
10495 				   "nansq", NULL_TREE);
10496       TREE_READONLY (decl) = 1;
10497       ia64_builtins[IA64_BUILTIN_NANSQ] = decl;
10498 
10499       ftype = build_function_type_list (float128_type_node,
10500 					float128_type_node,
10501 					NULL_TREE);
10502       decl = add_builtin_function ("__builtin_fabsq", ftype,
10503 				   IA64_BUILTIN_FABSQ, BUILT_IN_MD,
10504 				   "__fabstf2", NULL_TREE);
10505       TREE_READONLY (decl) = 1;
10506       ia64_builtins[IA64_BUILTIN_FABSQ] = decl;
10507 
10508       ftype = build_function_type_list (float128_type_node,
10509 					float128_type_node,
10510 					float128_type_node,
10511 					NULL_TREE);
10512       decl = add_builtin_function ("__builtin_copysignq", ftype,
10513 				   IA64_BUILTIN_COPYSIGNQ, BUILT_IN_MD,
10514 				   "__copysigntf3", NULL_TREE);
10515       TREE_READONLY (decl) = 1;
10516       ia64_builtins[IA64_BUILTIN_COPYSIGNQ] = decl;
10517     }
10518   else
10519     /* Under HPUX, this is a synonym for "long double".  */
10520     (*lang_hooks.types.register_builtin_type) (long_double_type_node,
10521 					       "__float128");
10522 
10523   /* Fwrite on VMS is non-standard.  */
10524 #if TARGET_ABI_OPEN_VMS
10525   vms_patch_builtins ();
10526 #endif
10527 
10528 #define def_builtin(name, type, code)					\
10529   add_builtin_function ((name), (type), (code), BUILT_IN_MD,	\
10530 		       NULL, NULL_TREE)
10531 
10532   decl = def_builtin ("__builtin_ia64_bsp",
10533 		      build_function_type_list (ptr_type_node, NULL_TREE),
10534 		      IA64_BUILTIN_BSP);
10535   ia64_builtins[IA64_BUILTIN_BSP] = decl;
10536 
10537   decl = def_builtin ("__builtin_ia64_flushrs",
10538 		      build_function_type_list (void_type_node, NULL_TREE),
10539 		      IA64_BUILTIN_FLUSHRS);
10540   ia64_builtins[IA64_BUILTIN_FLUSHRS] = decl;
10541 
10542 #undef def_builtin
10543 
10544   if (TARGET_HPUX)
10545     {
10546       if ((decl = builtin_decl_explicit (BUILT_IN_FINITE)) != NULL_TREE)
10547 	set_user_assembler_name (decl, "_Isfinite");
10548       if ((decl = builtin_decl_explicit (BUILT_IN_FINITEF)) != NULL_TREE)
10549 	set_user_assembler_name (decl, "_Isfinitef");
10550       if ((decl = builtin_decl_explicit (BUILT_IN_FINITEL)) != NULL_TREE)
10551 	set_user_assembler_name (decl, "_Isfinitef128");
10552     }
10553 }
10554 
10555 static tree
ia64_fold_builtin(tree fndecl,int n_args ATTRIBUTE_UNUSED,tree * args,bool ignore ATTRIBUTE_UNUSED)10556 ia64_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED,
10557 		   tree *args, bool ignore ATTRIBUTE_UNUSED)
10558 {
10559   if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
10560     {
10561       enum ia64_builtins fn_code
10562 	= (enum ia64_builtins) DECL_MD_FUNCTION_CODE (fndecl);
10563       switch (fn_code)
10564 	{
10565 	case IA64_BUILTIN_NANQ:
10566 	case IA64_BUILTIN_NANSQ:
10567 	  {
10568 	    tree type = TREE_TYPE (TREE_TYPE (fndecl));
10569 	    const char *str = c_getstr (*args);
10570 	    int quiet = fn_code == IA64_BUILTIN_NANQ;
10571 	    REAL_VALUE_TYPE real;
10572 
10573 	    if (str && real_nan (&real, str, quiet, TYPE_MODE (type)))
10574 	      return build_real (type, real);
10575 	    return NULL_TREE;
10576 	  }
10577 
10578 	default:
10579 	  break;
10580 	}
10581     }
10582 
10583 #ifdef SUBTARGET_FOLD_BUILTIN
10584   return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore);
10585 #endif
10586 
10587   return NULL_TREE;
10588 }
10589 
10590 rtx
ia64_expand_builtin(tree exp,rtx target,rtx subtarget ATTRIBUTE_UNUSED,machine_mode mode ATTRIBUTE_UNUSED,int ignore ATTRIBUTE_UNUSED)10591 ia64_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
10592 		     machine_mode mode ATTRIBUTE_UNUSED,
10593 		     int ignore ATTRIBUTE_UNUSED)
10594 {
10595   tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
10596   unsigned int fcode = DECL_MD_FUNCTION_CODE (fndecl);
10597 
10598   switch (fcode)
10599     {
10600     case IA64_BUILTIN_BSP:
10601       if (! target || ! register_operand (target, DImode))
10602 	target = gen_reg_rtx (DImode);
10603       emit_insn (gen_bsp_value (target));
10604 #ifdef POINTERS_EXTEND_UNSIGNED
10605       target = convert_memory_address (ptr_mode, target);
10606 #endif
10607       return target;
10608 
10609     case IA64_BUILTIN_FLUSHRS:
10610       emit_insn (gen_flushrs ());
10611       return const0_rtx;
10612 
10613     case IA64_BUILTIN_INFQ:
10614     case IA64_BUILTIN_HUGE_VALQ:
10615       {
10616         machine_mode target_mode = TYPE_MODE (TREE_TYPE (exp));
10617 	REAL_VALUE_TYPE inf;
10618 	rtx tmp;
10619 
10620 	real_inf (&inf);
10621 	tmp = const_double_from_real_value (inf, target_mode);
10622 
10623 	tmp = validize_mem (force_const_mem (target_mode, tmp));
10624 
10625 	if (target == 0)
10626 	  target = gen_reg_rtx (target_mode);
10627 
10628 	emit_move_insn (target, tmp);
10629 	return target;
10630       }
10631 
10632     case IA64_BUILTIN_NANQ:
10633     case IA64_BUILTIN_NANSQ:
10634     case IA64_BUILTIN_FABSQ:
10635     case IA64_BUILTIN_COPYSIGNQ:
10636       return expand_call (exp, target, ignore);
10637 
10638     default:
10639       gcc_unreachable ();
10640     }
10641 
10642   return NULL_RTX;
10643 }
10644 
10645 /* Return the ia64 builtin for CODE.  */
10646 
10647 static tree
ia64_builtin_decl(unsigned code,bool initialize_p ATTRIBUTE_UNUSED)10648 ia64_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
10649 {
10650   if (code >= IA64_BUILTIN_max)
10651     return error_mark_node;
10652 
10653   return ia64_builtins[code];
10654 }
10655 
10656 /* Implement TARGET_FUNCTION_ARG_PADDING.
10657 
10658    For the HP-UX IA64 aggregate parameters are passed stored in the
10659    most significant bits of the stack slot.  */
10660 
10661 static pad_direction
ia64_function_arg_padding(machine_mode mode,const_tree type)10662 ia64_function_arg_padding (machine_mode mode, const_tree type)
10663 {
10664   /* Exception to normal case for structures/unions/etc.  */
10665   if (TARGET_HPUX
10666       && type
10667       && AGGREGATE_TYPE_P (type)
10668       && int_size_in_bytes (type) < UNITS_PER_WORD)
10669     return PAD_UPWARD;
10670 
10671   /* Fall back to the default.  */
10672   return default_function_arg_padding (mode, type);
10673 }
10674 
10675 /* Emit text to declare externally defined variables and functions, because
10676    the Intel assembler does not support undefined externals.  */
10677 
10678 void
ia64_asm_output_external(FILE * file,tree decl,const char * name)10679 ia64_asm_output_external (FILE *file, tree decl, const char *name)
10680 {
10681   /* We output the name if and only if TREE_SYMBOL_REFERENCED is
10682      set in order to avoid putting out names that are never really
10683      used. */
10684   if (TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl)))
10685     {
10686       /* maybe_assemble_visibility will return 1 if the assembler
10687 	 visibility directive is output.  */
10688       int need_visibility = ((*targetm.binds_local_p) (decl)
10689 			     && maybe_assemble_visibility (decl));
10690 
10691       /* GNU as does not need anything here, but the HP linker does
10692 	 need something for external functions.  */
10693       if ((TARGET_HPUX_LD || !TARGET_GNU_AS)
10694 	  && TREE_CODE (decl) == FUNCTION_DECL)
10695 	  (*targetm.asm_out.globalize_decl_name) (file, decl);
10696       else if (need_visibility && !TARGET_GNU_AS)
10697 	(*targetm.asm_out.globalize_label) (file, name);
10698     }
10699 }
10700 
10701 /* Set SImode div/mod functions, init_integral_libfuncs only initializes
10702    modes of word_mode and larger.  Rename the TFmode libfuncs using the
10703    HPUX conventions. __divtf3 is used for XFmode. We need to keep it for
10704    backward compatibility. */
10705 
10706 static void
ia64_init_libfuncs(void)10707 ia64_init_libfuncs (void)
10708 {
10709   set_optab_libfunc (sdiv_optab, SImode, "__divsi3");
10710   set_optab_libfunc (udiv_optab, SImode, "__udivsi3");
10711   set_optab_libfunc (smod_optab, SImode, "__modsi3");
10712   set_optab_libfunc (umod_optab, SImode, "__umodsi3");
10713 
10714   set_optab_libfunc (add_optab, TFmode, "_U_Qfadd");
10715   set_optab_libfunc (sub_optab, TFmode, "_U_Qfsub");
10716   set_optab_libfunc (smul_optab, TFmode, "_U_Qfmpy");
10717   set_optab_libfunc (sdiv_optab, TFmode, "_U_Qfdiv");
10718   set_optab_libfunc (neg_optab, TFmode, "_U_Qfneg");
10719 
10720   set_conv_libfunc (sext_optab, TFmode, SFmode, "_U_Qfcnvff_sgl_to_quad");
10721   set_conv_libfunc (sext_optab, TFmode, DFmode, "_U_Qfcnvff_dbl_to_quad");
10722   set_conv_libfunc (sext_optab, TFmode, XFmode, "_U_Qfcnvff_f80_to_quad");
10723   set_conv_libfunc (trunc_optab, SFmode, TFmode, "_U_Qfcnvff_quad_to_sgl");
10724   set_conv_libfunc (trunc_optab, DFmode, TFmode, "_U_Qfcnvff_quad_to_dbl");
10725   set_conv_libfunc (trunc_optab, XFmode, TFmode, "_U_Qfcnvff_quad_to_f80");
10726 
10727   set_conv_libfunc (sfix_optab, SImode, TFmode, "_U_Qfcnvfxt_quad_to_sgl");
10728   set_conv_libfunc (sfix_optab, DImode, TFmode, "_U_Qfcnvfxt_quad_to_dbl");
10729   set_conv_libfunc (sfix_optab, TImode, TFmode, "_U_Qfcnvfxt_quad_to_quad");
10730   set_conv_libfunc (ufix_optab, SImode, TFmode, "_U_Qfcnvfxut_quad_to_sgl");
10731   set_conv_libfunc (ufix_optab, DImode, TFmode, "_U_Qfcnvfxut_quad_to_dbl");
10732 
10733   set_conv_libfunc (sfloat_optab, TFmode, SImode, "_U_Qfcnvxf_sgl_to_quad");
10734   set_conv_libfunc (sfloat_optab, TFmode, DImode, "_U_Qfcnvxf_dbl_to_quad");
10735   set_conv_libfunc (sfloat_optab, TFmode, TImode, "_U_Qfcnvxf_quad_to_quad");
10736   /* HP-UX 11.23 libc does not have a function for unsigned
10737      SImode-to-TFmode conversion.  */
10738   set_conv_libfunc (ufloat_optab, TFmode, DImode, "_U_Qfcnvxuf_dbl_to_quad");
10739 }
10740 
10741 /* Rename all the TFmode libfuncs using the HPUX conventions.  */
10742 
10743 static void
ia64_hpux_init_libfuncs(void)10744 ia64_hpux_init_libfuncs (void)
10745 {
10746   ia64_init_libfuncs ();
10747 
10748   /* The HP SI millicode division and mod functions expect DI arguments.
10749      By turning them off completely we avoid using both libgcc and the
10750      non-standard millicode routines and use the HP DI millicode routines
10751      instead.  */
10752 
10753   set_optab_libfunc (sdiv_optab, SImode, 0);
10754   set_optab_libfunc (udiv_optab, SImode, 0);
10755   set_optab_libfunc (smod_optab, SImode, 0);
10756   set_optab_libfunc (umod_optab, SImode, 0);
10757 
10758   set_optab_libfunc (sdiv_optab, DImode, "__milli_divI");
10759   set_optab_libfunc (udiv_optab, DImode, "__milli_divU");
10760   set_optab_libfunc (smod_optab, DImode, "__milli_remI");
10761   set_optab_libfunc (umod_optab, DImode, "__milli_remU");
10762 
10763   /* HP-UX libc has TF min/max/abs routines in it.  */
10764   set_optab_libfunc (smin_optab, TFmode, "_U_Qfmin");
10765   set_optab_libfunc (smax_optab, TFmode, "_U_Qfmax");
10766   set_optab_libfunc (abs_optab, TFmode, "_U_Qfabs");
10767 
10768   /* ia64_expand_compare uses this.  */
10769   cmptf_libfunc = init_one_libfunc ("_U_Qfcmp");
10770 
10771   /* These should never be used.  */
10772   set_optab_libfunc (eq_optab, TFmode, 0);
10773   set_optab_libfunc (ne_optab, TFmode, 0);
10774   set_optab_libfunc (gt_optab, TFmode, 0);
10775   set_optab_libfunc (ge_optab, TFmode, 0);
10776   set_optab_libfunc (lt_optab, TFmode, 0);
10777   set_optab_libfunc (le_optab, TFmode, 0);
10778 }
10779 
10780 /* Rename the division and modulus functions in VMS.  */
10781 
10782 static void
ia64_vms_init_libfuncs(void)10783 ia64_vms_init_libfuncs (void)
10784 {
10785   set_optab_libfunc (sdiv_optab, SImode, "OTS$DIV_I");
10786   set_optab_libfunc (sdiv_optab, DImode, "OTS$DIV_L");
10787   set_optab_libfunc (udiv_optab, SImode, "OTS$DIV_UI");
10788   set_optab_libfunc (udiv_optab, DImode, "OTS$DIV_UL");
10789   set_optab_libfunc (smod_optab, SImode, "OTS$REM_I");
10790   set_optab_libfunc (smod_optab, DImode, "OTS$REM_L");
10791   set_optab_libfunc (umod_optab, SImode, "OTS$REM_UI");
10792   set_optab_libfunc (umod_optab, DImode, "OTS$REM_UL");
10793 #ifdef MEM_LIBFUNCS_INIT
10794   MEM_LIBFUNCS_INIT;
10795 #endif
10796 }
10797 
10798 /* Rename the TFmode libfuncs available from soft-fp in glibc using
10799    the HPUX conventions.  */
10800 
10801 static void
ia64_sysv4_init_libfuncs(void)10802 ia64_sysv4_init_libfuncs (void)
10803 {
10804   ia64_init_libfuncs ();
10805 
10806   /* These functions are not part of the HPUX TFmode interface.  We
10807      use them instead of _U_Qfcmp, which doesn't work the way we
10808      expect.  */
10809   set_optab_libfunc (eq_optab, TFmode, "_U_Qfeq");
10810   set_optab_libfunc (ne_optab, TFmode, "_U_Qfne");
10811   set_optab_libfunc (gt_optab, TFmode, "_U_Qfgt");
10812   set_optab_libfunc (ge_optab, TFmode, "_U_Qfge");
10813   set_optab_libfunc (lt_optab, TFmode, "_U_Qflt");
10814   set_optab_libfunc (le_optab, TFmode, "_U_Qfle");
10815 
10816   /* We leave out _U_Qfmin, _U_Qfmax and _U_Qfabs since soft-fp in
10817      glibc doesn't have them.  */
10818 }
10819 
10820 /* Use soft-fp.  */
10821 
10822 static void
ia64_soft_fp_init_libfuncs(void)10823 ia64_soft_fp_init_libfuncs (void)
10824 {
10825 }
10826 
10827 static bool
ia64_vms_valid_pointer_mode(scalar_int_mode mode)10828 ia64_vms_valid_pointer_mode (scalar_int_mode mode)
10829 {
10830   return (mode == SImode || mode == DImode);
10831 }
10832 
10833 /* For HPUX, it is illegal to have relocations in shared segments.  */
10834 
10835 static int
ia64_hpux_reloc_rw_mask(void)10836 ia64_hpux_reloc_rw_mask (void)
10837 {
10838   return 3;
10839 }
10840 
10841 /* For others, relax this so that relocations to local data goes in
10842    read-only segments, but we still cannot allow global relocations
10843    in read-only segments.  */
10844 
10845 static int
ia64_reloc_rw_mask(void)10846 ia64_reloc_rw_mask (void)
10847 {
10848   return flag_pic ? 3 : 2;
10849 }
10850 
10851 /* Return the section to use for X.  The only special thing we do here
10852    is to honor small data.  */
10853 
10854 static section *
ia64_select_rtx_section(machine_mode mode,rtx x,unsigned HOST_WIDE_INT align)10855 ia64_select_rtx_section (machine_mode mode, rtx x,
10856 			 unsigned HOST_WIDE_INT align)
10857 {
10858   if (GET_MODE_SIZE (mode) > 0
10859       && GET_MODE_SIZE (mode) <= ia64_section_threshold
10860       && !TARGET_NO_SDATA)
10861     return sdata_section;
10862   else
10863     return default_elf_select_rtx_section (mode, x, align);
10864 }
10865 
10866 static unsigned int
ia64_section_type_flags(tree decl,const char * name,int reloc)10867 ia64_section_type_flags (tree decl, const char *name, int reloc)
10868 {
10869   unsigned int flags = 0;
10870 
10871   if (strcmp (name, ".sdata") == 0
10872       || strncmp (name, ".sdata.", 7) == 0
10873       || strncmp (name, ".gnu.linkonce.s.", 16) == 0
10874       || strncmp (name, ".sdata2.", 8) == 0
10875       || strncmp (name, ".gnu.linkonce.s2.", 17) == 0
10876       || strcmp (name, ".sbss") == 0
10877       || strncmp (name, ".sbss.", 6) == 0
10878       || strncmp (name, ".gnu.linkonce.sb.", 17) == 0)
10879     flags = SECTION_SMALL;
10880 
10881   flags |= default_section_type_flags (decl, name, reloc);
10882   return flags;
10883 }
10884 
10885 /* Returns true if FNTYPE (a FUNCTION_TYPE or a METHOD_TYPE) returns a
10886    structure type and that the address of that type should be passed
10887    in out0, rather than in r8.  */
10888 
10889 static bool
ia64_struct_retval_addr_is_first_parm_p(tree fntype)10890 ia64_struct_retval_addr_is_first_parm_p (tree fntype)
10891 {
10892   tree ret_type = TREE_TYPE (fntype);
10893 
10894   /* The Itanium C++ ABI requires that out0, rather than r8, be used
10895      as the structure return address parameter, if the return value
10896      type has a non-trivial copy constructor or destructor.  It is not
10897      clear if this same convention should be used for other
10898      programming languages.  Until G++ 3.4, we incorrectly used r8 for
10899      these return values.  */
10900   return (abi_version_at_least (2)
10901 	  && ret_type
10902 	  && TYPE_MODE (ret_type) == BLKmode
10903 	  && TREE_ADDRESSABLE (ret_type)
10904 	  && lang_GNU_CXX ());
10905 }
10906 
10907 /* Output the assembler code for a thunk function.  THUNK_DECL is the
10908    declaration for the thunk function itself, FUNCTION is the decl for
10909    the target function.  DELTA is an immediate constant offset to be
10910    added to THIS.  If VCALL_OFFSET is nonzero, the word at
10911    *(*this + vcall_offset) should be added to THIS.  */
10912 
10913 static void
ia64_output_mi_thunk(FILE * file,tree thunk ATTRIBUTE_UNUSED,HOST_WIDE_INT delta,HOST_WIDE_INT vcall_offset,tree function)10914 ia64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
10915 		      HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
10916 		      tree function)
10917 {
10918   const char *fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk));
10919   rtx this_rtx, funexp;
10920   rtx_insn *insn;
10921   unsigned int this_parmno;
10922   unsigned int this_regno;
10923   rtx delta_rtx;
10924 
10925   reload_completed = 1;
10926   epilogue_completed = 1;
10927 
10928   /* Set things up as ia64_expand_prologue might.  */
10929   last_scratch_gr_reg = 15;
10930 
10931   memset (&current_frame_info, 0, sizeof (current_frame_info));
10932   current_frame_info.spill_cfa_off = -16;
10933   current_frame_info.n_input_regs = 1;
10934   current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
10935 
10936   /* Mark the end of the (empty) prologue.  */
10937   emit_note (NOTE_INSN_PROLOGUE_END);
10938 
10939   /* Figure out whether "this" will be the first parameter (the
10940      typical case) or the second parameter (as happens when the
10941      virtual function returns certain class objects).  */
10942   this_parmno
10943     = (ia64_struct_retval_addr_is_first_parm_p (TREE_TYPE (thunk))
10944        ? 1 : 0);
10945   this_regno = IN_REG (this_parmno);
10946   if (!TARGET_REG_NAMES)
10947     reg_names[this_regno] = ia64_reg_numbers[this_parmno];
10948 
10949   this_rtx = gen_rtx_REG (Pmode, this_regno);
10950 
10951   /* Apply the constant offset, if required.  */
10952   delta_rtx = GEN_INT (delta);
10953   if (TARGET_ILP32)
10954     {
10955       rtx tmp = gen_rtx_REG (ptr_mode, this_regno);
10956       REG_POINTER (tmp) = 1;
10957       if (delta && satisfies_constraint_I (delta_rtx))
10958 	{
10959 	  emit_insn (gen_ptr_extend_plus_imm (this_rtx, tmp, delta_rtx));
10960 	  delta = 0;
10961 	}
10962       else
10963 	emit_insn (gen_ptr_extend (this_rtx, tmp));
10964     }
10965   if (delta)
10966     {
10967       if (!satisfies_constraint_I (delta_rtx))
10968 	{
10969 	  rtx tmp = gen_rtx_REG (Pmode, 2);
10970 	  emit_move_insn (tmp, delta_rtx);
10971 	  delta_rtx = tmp;
10972 	}
10973       emit_insn (gen_adddi3 (this_rtx, this_rtx, delta_rtx));
10974     }
10975 
10976   /* Apply the offset from the vtable, if required.  */
10977   if (vcall_offset)
10978     {
10979       rtx vcall_offset_rtx = GEN_INT (vcall_offset);
10980       rtx tmp = gen_rtx_REG (Pmode, 2);
10981 
10982       if (TARGET_ILP32)
10983 	{
10984 	  rtx t = gen_rtx_REG (ptr_mode, 2);
10985 	  REG_POINTER (t) = 1;
10986 	  emit_move_insn (t, gen_rtx_MEM (ptr_mode, this_rtx));
10987 	  if (satisfies_constraint_I (vcall_offset_rtx))
10988 	    {
10989 	      emit_insn (gen_ptr_extend_plus_imm (tmp, t, vcall_offset_rtx));
10990 	      vcall_offset = 0;
10991 	    }
10992 	  else
10993 	    emit_insn (gen_ptr_extend (tmp, t));
10994 	}
10995       else
10996 	emit_move_insn (tmp, gen_rtx_MEM (Pmode, this_rtx));
10997 
10998       if (vcall_offset)
10999 	{
11000 	  if (!satisfies_constraint_J (vcall_offset_rtx))
11001 	    {
11002 	      rtx tmp2 = gen_rtx_REG (Pmode, next_scratch_gr_reg ());
11003 	      emit_move_insn (tmp2, vcall_offset_rtx);
11004 	      vcall_offset_rtx = tmp2;
11005 	    }
11006 	  emit_insn (gen_adddi3 (tmp, tmp, vcall_offset_rtx));
11007 	}
11008 
11009       if (TARGET_ILP32)
11010 	emit_insn (gen_zero_extendsidi2 (tmp, gen_rtx_MEM (ptr_mode, tmp)));
11011       else
11012 	emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp));
11013 
11014       emit_insn (gen_adddi3 (this_rtx, this_rtx, tmp));
11015     }
11016 
11017   /* Generate a tail call to the target function.  */
11018   if (! TREE_USED (function))
11019     {
11020       assemble_external (function);
11021       TREE_USED (function) = 1;
11022     }
11023   funexp = XEXP (DECL_RTL (function), 0);
11024   funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
11025   ia64_expand_call (NULL_RTX, funexp, NULL_RTX, 1);
11026   insn = get_last_insn ();
11027   SIBLING_CALL_P (insn) = 1;
11028 
11029   /* Code generation for calls relies on splitting.  */
11030   reload_completed = 1;
11031   epilogue_completed = 1;
11032   try_split (PATTERN (insn), insn, 0);
11033 
11034   emit_barrier ();
11035 
11036   /* Run just enough of rest_of_compilation to get the insns emitted.
11037      There's not really enough bulk here to make other passes such as
11038      instruction scheduling worth while.  */
11039 
11040   emit_all_insn_group_barriers (NULL);
11041   insn = get_insns ();
11042   shorten_branches (insn);
11043   assemble_start_function (thunk, fnname);
11044   final_start_function (insn, file, 1);
11045   final (insn, file, 1);
11046   final_end_function ();
11047   assemble_end_function (thunk, fnname);
11048 
11049   reload_completed = 0;
11050   epilogue_completed = 0;
11051 }
11052 
11053 /* Worker function for TARGET_STRUCT_VALUE_RTX.  */
11054 
11055 static rtx
ia64_struct_value_rtx(tree fntype,int incoming ATTRIBUTE_UNUSED)11056 ia64_struct_value_rtx (tree fntype,
11057 		       int incoming ATTRIBUTE_UNUSED)
11058 {
11059   if (TARGET_ABI_OPEN_VMS ||
11060       (fntype && ia64_struct_retval_addr_is_first_parm_p (fntype)))
11061     return NULL_RTX;
11062   return gen_rtx_REG (Pmode, GR_REG (8));
11063 }
11064 
11065 static bool
ia64_scalar_mode_supported_p(scalar_mode mode)11066 ia64_scalar_mode_supported_p (scalar_mode mode)
11067 {
11068   switch (mode)
11069     {
11070     case E_QImode:
11071     case E_HImode:
11072     case E_SImode:
11073     case E_DImode:
11074     case E_TImode:
11075       return true;
11076 
11077     case E_SFmode:
11078     case E_DFmode:
11079     case E_XFmode:
11080     case E_RFmode:
11081       return true;
11082 
11083     case E_TFmode:
11084       return true;
11085 
11086     default:
11087       return false;
11088     }
11089 }
11090 
11091 static bool
ia64_vector_mode_supported_p(machine_mode mode)11092 ia64_vector_mode_supported_p (machine_mode mode)
11093 {
11094   switch (mode)
11095     {
11096     case E_V8QImode:
11097     case E_V4HImode:
11098     case E_V2SImode:
11099       return true;
11100 
11101     case E_V2SFmode:
11102       return true;
11103 
11104     default:
11105       return false;
11106     }
11107 }
11108 
11109 /* Implement the FUNCTION_PROFILER macro.  */
11110 
11111 void
ia64_output_function_profiler(FILE * file,int labelno)11112 ia64_output_function_profiler (FILE *file, int labelno)
11113 {
11114   bool indirect_call;
11115 
11116   /* If the function needs a static chain and the static chain
11117      register is r15, we use an indirect call so as to bypass
11118      the PLT stub in case the executable is dynamically linked,
11119      because the stub clobbers r15 as per 5.3.6 of the psABI.
11120      We don't need to do that in non canonical PIC mode.  */
11121 
11122   if (cfun->static_chain_decl && !TARGET_NO_PIC && !TARGET_AUTO_PIC)
11123     {
11124       gcc_assert (STATIC_CHAIN_REGNUM == 15);
11125       indirect_call = true;
11126     }
11127   else
11128     indirect_call = false;
11129 
11130   if (TARGET_GNU_AS)
11131     fputs ("\t.prologue 4, r40\n", file);
11132   else
11133     fputs ("\t.prologue\n\t.save ar.pfs, r40\n", file);
11134   fputs ("\talloc out0 = ar.pfs, 8, 0, 4, 0\n", file);
11135 
11136   if (NO_PROFILE_COUNTERS)
11137     fputs ("\tmov out3 = r0\n", file);
11138   else
11139     {
11140       char buf[20];
11141       ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
11142 
11143       if (TARGET_AUTO_PIC)
11144 	fputs ("\tmovl out3 = @gprel(", file);
11145       else
11146 	fputs ("\taddl out3 = @ltoff(", file);
11147       assemble_name (file, buf);
11148       if (TARGET_AUTO_PIC)
11149 	fputs (")\n", file);
11150       else
11151 	fputs ("), r1\n", file);
11152     }
11153 
11154   if (indirect_call)
11155     fputs ("\taddl r14 = @ltoff(@fptr(_mcount)), r1\n", file);
11156   fputs ("\t;;\n", file);
11157 
11158   fputs ("\t.save rp, r42\n", file);
11159   fputs ("\tmov out2 = b0\n", file);
11160   if (indirect_call)
11161     fputs ("\tld8 r14 = [r14]\n\t;;\n", file);
11162   fputs ("\t.body\n", file);
11163   fputs ("\tmov out1 = r1\n", file);
11164   if (indirect_call)
11165     {
11166       fputs ("\tld8 r16 = [r14], 8\n\t;;\n", file);
11167       fputs ("\tmov b6 = r16\n", file);
11168       fputs ("\tld8 r1 = [r14]\n", file);
11169       fputs ("\tbr.call.sptk.many b0 = b6\n\t;;\n", file);
11170     }
11171   else
11172     fputs ("\tbr.call.sptk.many b0 = _mcount\n\t;;\n", file);
11173 }
11174 
11175 static GTY(()) rtx mcount_func_rtx;
11176 static rtx
gen_mcount_func_rtx(void)11177 gen_mcount_func_rtx (void)
11178 {
11179   if (!mcount_func_rtx)
11180     mcount_func_rtx = init_one_libfunc ("_mcount");
11181   return mcount_func_rtx;
11182 }
11183 
11184 void
ia64_profile_hook(int labelno)11185 ia64_profile_hook (int labelno)
11186 {
11187   rtx label, ip;
11188 
11189   if (NO_PROFILE_COUNTERS)
11190     label = const0_rtx;
11191   else
11192     {
11193       char buf[30];
11194       const char *label_name;
11195       ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
11196       label_name = ggc_strdup ((*targetm.strip_name_encoding) (buf));
11197       label = gen_rtx_SYMBOL_REF (Pmode, label_name);
11198       SYMBOL_REF_FLAGS (label) = SYMBOL_FLAG_LOCAL;
11199     }
11200   ip = gen_reg_rtx (Pmode);
11201   emit_insn (gen_ip_value (ip));
11202   emit_library_call (gen_mcount_func_rtx (), LCT_NORMAL,
11203                      VOIDmode,
11204 		     gen_rtx_REG (Pmode, BR_REG (0)), Pmode,
11205 		     ip, Pmode,
11206 		     label, Pmode);
11207 }
11208 
11209 /* Return the mangling of TYPE if it is an extended fundamental type.  */
11210 
11211 static const char *
ia64_mangle_type(const_tree type)11212 ia64_mangle_type (const_tree type)
11213 {
11214   type = TYPE_MAIN_VARIANT (type);
11215 
11216   if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
11217       && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
11218     return NULL;
11219 
11220   /* On HP-UX, "long double" is mangled as "e" so __float128 is
11221      mangled as "e".  */
11222   if (!TARGET_HPUX && TYPE_MODE (type) == TFmode)
11223     return "g";
11224   /* On HP-UX, "e" is not available as a mangling of __float80 so use
11225      an extended mangling.  Elsewhere, "e" is available since long
11226      double is 80 bits.  */
11227   if (TYPE_MODE (type) == XFmode)
11228     return TARGET_HPUX ? "u9__float80" : "e";
11229   if (TYPE_MODE (type) == RFmode)
11230     return "u7__fpreg";
11231   return NULL;
11232 }
11233 
11234 /* Return the diagnostic message string if conversion from FROMTYPE to
11235    TOTYPE is not allowed, NULL otherwise.  */
11236 static const char *
ia64_invalid_conversion(const_tree fromtype,const_tree totype)11237 ia64_invalid_conversion (const_tree fromtype, const_tree totype)
11238 {
11239   /* Reject nontrivial conversion to or from __fpreg.  */
11240   if (TYPE_MODE (fromtype) == RFmode
11241       && TYPE_MODE (totype) != RFmode
11242       && TYPE_MODE (totype) != VOIDmode)
11243     return N_("invalid conversion from %<__fpreg%>");
11244   if (TYPE_MODE (totype) == RFmode
11245       && TYPE_MODE (fromtype) != RFmode)
11246     return N_("invalid conversion to %<__fpreg%>");
11247   return NULL;
11248 }
11249 
11250 /* Return the diagnostic message string if the unary operation OP is
11251    not permitted on TYPE, NULL otherwise.  */
11252 static const char *
ia64_invalid_unary_op(int op,const_tree type)11253 ia64_invalid_unary_op (int op, const_tree type)
11254 {
11255   /* Reject operations on __fpreg other than unary + or &.  */
11256   if (TYPE_MODE (type) == RFmode
11257       && op != CONVERT_EXPR
11258       && op != ADDR_EXPR)
11259     return N_("invalid operation on %<__fpreg%>");
11260   return NULL;
11261 }
11262 
11263 /* Return the diagnostic message string if the binary operation OP is
11264    not permitted on TYPE1 and TYPE2, NULL otherwise.  */
11265 static const char *
ia64_invalid_binary_op(int op ATTRIBUTE_UNUSED,const_tree type1,const_tree type2)11266 ia64_invalid_binary_op (int op ATTRIBUTE_UNUSED, const_tree type1, const_tree type2)
11267 {
11268   /* Reject operations on __fpreg.  */
11269   if (TYPE_MODE (type1) == RFmode || TYPE_MODE (type2) == RFmode)
11270     return N_("invalid operation on %<__fpreg%>");
11271   return NULL;
11272 }
11273 
11274 /* HP-UX version_id attribute.
11275    For object foo, if the version_id is set to 1234 put out an alias
11276    of '.alias foo "foo{1234}"  We can't use "foo{1234}" in anything
11277    other than an alias statement because it is an illegal symbol name.  */
11278 
11279 static tree
ia64_handle_version_id_attribute(tree * node ATTRIBUTE_UNUSED,tree name ATTRIBUTE_UNUSED,tree args,int flags ATTRIBUTE_UNUSED,bool * no_add_attrs)11280 ia64_handle_version_id_attribute (tree *node ATTRIBUTE_UNUSED,
11281                                  tree name ATTRIBUTE_UNUSED,
11282                                  tree args,
11283                                  int flags ATTRIBUTE_UNUSED,
11284                                  bool *no_add_attrs)
11285 {
11286   tree arg = TREE_VALUE (args);
11287 
11288   if (TREE_CODE (arg) != STRING_CST)
11289     {
11290       error("version attribute is not a string");
11291       *no_add_attrs = true;
11292       return NULL_TREE;
11293     }
11294   return NULL_TREE;
11295 }
11296 
11297 /* Target hook for c_mode_for_suffix.  */
11298 
11299 static machine_mode
ia64_c_mode_for_suffix(char suffix)11300 ia64_c_mode_for_suffix (char suffix)
11301 {
11302   if (suffix == 'q')
11303     return TFmode;
11304   if (suffix == 'w')
11305     return XFmode;
11306 
11307   return VOIDmode;
11308 }
11309 
11310 static GTY(()) rtx ia64_dconst_0_5_rtx;
11311 
11312 rtx
ia64_dconst_0_5(void)11313 ia64_dconst_0_5 (void)
11314 {
11315   if (! ia64_dconst_0_5_rtx)
11316     {
11317       REAL_VALUE_TYPE rv;
11318       real_from_string (&rv, "0.5");
11319       ia64_dconst_0_5_rtx = const_double_from_real_value (rv, DFmode);
11320     }
11321   return ia64_dconst_0_5_rtx;
11322 }
11323 
11324 static GTY(()) rtx ia64_dconst_0_375_rtx;
11325 
11326 rtx
ia64_dconst_0_375(void)11327 ia64_dconst_0_375 (void)
11328 {
11329   if (! ia64_dconst_0_375_rtx)
11330     {
11331       REAL_VALUE_TYPE rv;
11332       real_from_string (&rv, "0.375");
11333       ia64_dconst_0_375_rtx = const_double_from_real_value (rv, DFmode);
11334     }
11335   return ia64_dconst_0_375_rtx;
11336 }
11337 
11338 static fixed_size_mode
ia64_get_reg_raw_mode(int regno)11339 ia64_get_reg_raw_mode (int regno)
11340 {
11341   if (FR_REGNO_P (regno))
11342     return XFmode;
11343   return default_get_reg_raw_mode(regno);
11344 }
11345 
11346 /* Implement TARGET_MEMBER_TYPE_FORCES_BLK.  ??? Might not be needed
11347    anymore.  */
11348 
11349 bool
ia64_member_type_forces_blk(const_tree,machine_mode mode)11350 ia64_member_type_forces_blk (const_tree, machine_mode mode)
11351 {
11352   return TARGET_HPUX && mode == TFmode;
11353 }
11354 
11355 /* Always default to .text section until HP-UX linker is fixed.  */
11356 
11357 ATTRIBUTE_UNUSED static section *
ia64_hpux_function_section(tree decl ATTRIBUTE_UNUSED,enum node_frequency freq ATTRIBUTE_UNUSED,bool startup ATTRIBUTE_UNUSED,bool exit ATTRIBUTE_UNUSED)11358 ia64_hpux_function_section (tree decl ATTRIBUTE_UNUSED,
11359 			    enum node_frequency freq ATTRIBUTE_UNUSED,
11360 			    bool startup ATTRIBUTE_UNUSED,
11361 			    bool exit ATTRIBUTE_UNUSED)
11362 {
11363   return NULL;
11364 }
11365 
11366 /* Construct (set target (vec_select op0 (parallel perm))) and
11367    return true if that's a valid instruction in the active ISA.  */
11368 
11369 static bool
expand_vselect(rtx target,rtx op0,const unsigned char * perm,unsigned nelt)11370 expand_vselect (rtx target, rtx op0, const unsigned char *perm, unsigned nelt)
11371 {
11372   rtx rperm[MAX_VECT_LEN], x;
11373   unsigned i;
11374 
11375   for (i = 0; i < nelt; ++i)
11376     rperm[i] = GEN_INT (perm[i]);
11377 
11378   x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, rperm));
11379   x = gen_rtx_VEC_SELECT (GET_MODE (target), op0, x);
11380   x = gen_rtx_SET (target, x);
11381 
11382   rtx_insn *insn = emit_insn (x);
11383   if (recog_memoized (insn) < 0)
11384     {
11385       remove_insn (insn);
11386       return false;
11387     }
11388   return true;
11389 }
11390 
11391 /* Similar, but generate a vec_concat from op0 and op1 as well.  */
11392 
11393 static bool
expand_vselect_vconcat(rtx target,rtx op0,rtx op1,const unsigned char * perm,unsigned nelt)11394 expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
11395 			const unsigned char *perm, unsigned nelt)
11396 {
11397   machine_mode v2mode;
11398   rtx x;
11399 
11400   if (!GET_MODE_2XWIDER_MODE (GET_MODE (op0)).exists (&v2mode))
11401     return false;
11402   x = gen_rtx_VEC_CONCAT (v2mode, op0, op1);
11403   return expand_vselect (target, x, perm, nelt);
11404 }
11405 
11406 /* Try to expand a no-op permutation.  */
11407 
11408 static bool
expand_vec_perm_identity(struct expand_vec_perm_d * d)11409 expand_vec_perm_identity (struct expand_vec_perm_d *d)
11410 {
11411   unsigned i, nelt = d->nelt;
11412 
11413   for (i = 0; i < nelt; ++i)
11414     if (d->perm[i] != i)
11415       return false;
11416 
11417   if (!d->testing_p)
11418     emit_move_insn (d->target, d->op0);
11419 
11420   return true;
11421 }
11422 
11423 /* Try to expand D via a shrp instruction.  */
11424 
11425 static bool
expand_vec_perm_shrp(struct expand_vec_perm_d * d)11426 expand_vec_perm_shrp (struct expand_vec_perm_d *d)
11427 {
11428   unsigned i, nelt = d->nelt, shift, mask;
11429   rtx tmp, hi, lo;
11430 
11431   /* ??? Don't force V2SFmode into the integer registers.  */
11432   if (d->vmode == V2SFmode)
11433     return false;
11434 
11435   mask = (d->one_operand_p ? nelt - 1 : 2 * nelt - 1);
11436 
11437   shift = d->perm[0];
11438   if (BYTES_BIG_ENDIAN && shift > nelt)
11439     return false;
11440 
11441   for (i = 1; i < nelt; ++i)
11442     if (d->perm[i] != ((shift + i) & mask))
11443       return false;
11444 
11445   if (d->testing_p)
11446     return true;
11447 
11448   hi = shift < nelt ? d->op1 : d->op0;
11449   lo = shift < nelt ? d->op0 : d->op1;
11450 
11451   shift %= nelt;
11452 
11453   shift *= GET_MODE_UNIT_SIZE (d->vmode) * BITS_PER_UNIT;
11454 
11455   /* We've eliminated the shift 0 case via expand_vec_perm_identity.  */
11456   gcc_assert (IN_RANGE (shift, 1, 63));
11457 
11458   /* Recall that big-endian elements are numbered starting at the top of
11459      the register.  Ideally we'd have a shift-left-pair.  But since we
11460      don't, convert to a shift the other direction.  */
11461   if (BYTES_BIG_ENDIAN)
11462     shift = 64 - shift;
11463 
11464   tmp = gen_reg_rtx (DImode);
11465   hi = gen_lowpart (DImode, hi);
11466   lo = gen_lowpart (DImode, lo);
11467   emit_insn (gen_shrp (tmp, hi, lo, GEN_INT (shift)));
11468 
11469   emit_move_insn (d->target, gen_lowpart (d->vmode, tmp));
11470   return true;
11471 }
11472 
11473 /* Try to instantiate D in a single instruction.  */
11474 
11475 static bool
expand_vec_perm_1(struct expand_vec_perm_d * d)11476 expand_vec_perm_1 (struct expand_vec_perm_d *d)
11477 {
11478   unsigned i, nelt = d->nelt;
11479   unsigned char perm2[MAX_VECT_LEN];
11480 
11481   /* Try single-operand selections.  */
11482   if (d->one_operand_p)
11483     {
11484       if (expand_vec_perm_identity (d))
11485 	return true;
11486       if (expand_vselect (d->target, d->op0, d->perm, nelt))
11487 	return true;
11488     }
11489 
11490   /* Try two operand selections.  */
11491   if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt))
11492     return true;
11493 
11494   /* Recognize interleave style patterns with reversed operands.  */
11495   if (!d->one_operand_p)
11496     {
11497       for (i = 0; i < nelt; ++i)
11498 	{
11499 	  unsigned e = d->perm[i];
11500 	  if (e >= nelt)
11501 	    e -= nelt;
11502 	  else
11503 	    e += nelt;
11504 	  perm2[i] = e;
11505 	}
11506 
11507       if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt))
11508 	return true;
11509     }
11510 
11511   if (expand_vec_perm_shrp (d))
11512     return true;
11513 
11514   /* ??? Look for deposit-like permutations where most of the result
11515      comes from one vector unchanged and the rest comes from a
11516      sequential hunk of the other vector.  */
11517 
11518   return false;
11519 }
11520 
11521 /* Pattern match broadcast permutations.  */
11522 
11523 static bool
expand_vec_perm_broadcast(struct expand_vec_perm_d * d)11524 expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
11525 {
11526   unsigned i, elt, nelt = d->nelt;
11527   unsigned char perm2[2];
11528   rtx temp;
11529   bool ok;
11530 
11531   if (!d->one_operand_p)
11532     return false;
11533 
11534   elt = d->perm[0];
11535   for (i = 1; i < nelt; ++i)
11536     if (d->perm[i] != elt)
11537       return false;
11538 
11539   switch (d->vmode)
11540     {
11541     case E_V2SImode:
11542     case E_V2SFmode:
11543       /* Implementable by interleave.  */
11544       perm2[0] = elt;
11545       perm2[1] = elt + 2;
11546       ok = expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, 2);
11547       gcc_assert (ok);
11548       break;
11549 
11550     case E_V8QImode:
11551       /* Implementable by extract + broadcast.  */
11552       if (BYTES_BIG_ENDIAN)
11553 	elt = 7 - elt;
11554       elt *= BITS_PER_UNIT;
11555       temp = gen_reg_rtx (DImode);
11556       emit_insn (gen_extzv (temp, gen_lowpart (DImode, d->op0),
11557 			    GEN_INT (8), GEN_INT (elt)));
11558       emit_insn (gen_mux1_brcst_qi (d->target, gen_lowpart (QImode, temp)));
11559       break;
11560 
11561     case E_V4HImode:
11562       /* Should have been matched directly by vec_select.  */
11563     default:
11564       gcc_unreachable ();
11565     }
11566 
11567   return true;
11568 }
11569 
11570 /* A subroutine of ia64_expand_vec_perm_const_1.  Try to simplify a
11571    two vector permutation into a single vector permutation by using
11572    an interleave operation to merge the vectors.  */
11573 
11574 static bool
expand_vec_perm_interleave_2(struct expand_vec_perm_d * d)11575 expand_vec_perm_interleave_2 (struct expand_vec_perm_d *d)
11576 {
11577   struct expand_vec_perm_d dremap, dfinal;
11578   unsigned char remap[2 * MAX_VECT_LEN];
11579   unsigned contents, i, nelt, nelt2;
11580   unsigned h0, h1, h2, h3;
11581   rtx_insn *seq;
11582   bool ok;
11583 
11584   if (d->one_operand_p)
11585     return false;
11586 
11587   nelt = d->nelt;
11588   nelt2 = nelt / 2;
11589 
11590   /* Examine from whence the elements come.  */
11591   contents = 0;
11592   for (i = 0; i < nelt; ++i)
11593     contents |= 1u << d->perm[i];
11594 
11595   memset (remap, 0xff, sizeof (remap));
11596   dremap = *d;
11597 
11598   h0 = (1u << nelt2) - 1;
11599   h1 = h0 << nelt2;
11600   h2 = h0 << nelt;
11601   h3 = h0 << (nelt + nelt2);
11602 
11603   if ((contents & (h0 | h2)) == contents)	/* punpck even halves */
11604     {
11605       for (i = 0; i < nelt; ++i)
11606 	{
11607 	  unsigned which = i / 2 + (i & 1 ? nelt : 0);
11608 	  remap[which] = i;
11609 	  dremap.perm[i] = which;
11610 	}
11611     }
11612   else if ((contents & (h1 | h3)) == contents)	/* punpck odd halves */
11613     {
11614       for (i = 0; i < nelt; ++i)
11615 	{
11616 	  unsigned which = i / 2 + nelt2 + (i & 1 ? nelt : 0);
11617 	  remap[which] = i;
11618 	  dremap.perm[i] = which;
11619 	}
11620     }
11621   else if ((contents & 0x5555) == contents)	/* mix even elements */
11622     {
11623       for (i = 0; i < nelt; ++i)
11624 	{
11625 	  unsigned which = (i & ~1) + (i & 1 ? nelt : 0);
11626 	  remap[which] = i;
11627 	  dremap.perm[i] = which;
11628 	}
11629     }
11630   else if ((contents & 0xaaaa) == contents)	/* mix odd elements */
11631     {
11632       for (i = 0; i < nelt; ++i)
11633 	{
11634 	  unsigned which = (i | 1) + (i & 1 ? nelt : 0);
11635 	  remap[which] = i;
11636 	  dremap.perm[i] = which;
11637 	}
11638     }
11639   else if (floor_log2 (contents) - ctz_hwi (contents) < (int)nelt) /* shrp */
11640     {
11641       unsigned shift = ctz_hwi (contents);
11642       for (i = 0; i < nelt; ++i)
11643 	{
11644 	  unsigned which = (i + shift) & (2 * nelt - 1);
11645 	  remap[which] = i;
11646 	  dremap.perm[i] = which;
11647 	}
11648     }
11649   else
11650     return false;
11651 
11652   /* Use the remapping array set up above to move the elements from their
11653      swizzled locations into their final destinations.  */
11654   dfinal = *d;
11655   for (i = 0; i < nelt; ++i)
11656     {
11657       unsigned e = remap[d->perm[i]];
11658       gcc_assert (e < nelt);
11659       dfinal.perm[i] = e;
11660     }
11661   if (d->testing_p)
11662     dfinal.op0 = gen_raw_REG (dfinal.vmode, LAST_VIRTUAL_REGISTER + 1);
11663   else
11664     dfinal.op0 = gen_reg_rtx (dfinal.vmode);
11665   dfinal.op1 = dfinal.op0;
11666   dfinal.one_operand_p = true;
11667   dremap.target = dfinal.op0;
11668 
11669   /* Test if the final remap can be done with a single insn.  For V4HImode
11670      this *will* succeed.  For V8QImode or V2SImode it may not.  */
11671   start_sequence ();
11672   ok = expand_vec_perm_1 (&dfinal);
11673   seq = get_insns ();
11674   end_sequence ();
11675   if (!ok)
11676     return false;
11677   if (d->testing_p)
11678     return true;
11679 
11680   ok = expand_vec_perm_1 (&dremap);
11681   gcc_assert (ok);
11682 
11683   emit_insn (seq);
11684   return true;
11685 }
11686 
11687 /* A subroutine of ia64_expand_vec_perm_const_1.  Emit a full V4HImode
11688    constant permutation via two mux2 and a merge.  */
11689 
11690 static bool
expand_vec_perm_v4hi_5(struct expand_vec_perm_d * d)11691 expand_vec_perm_v4hi_5 (struct expand_vec_perm_d *d)
11692 {
11693   unsigned char perm2[4];
11694   rtx rmask[4];
11695   unsigned i;
11696   rtx t0, t1, mask, x;
11697   bool ok;
11698 
11699   if (d->vmode != V4HImode || d->one_operand_p)
11700     return false;
11701   if (d->testing_p)
11702     return true;
11703 
11704   for (i = 0; i < 4; ++i)
11705     {
11706       perm2[i] = d->perm[i] & 3;
11707       rmask[i] = (d->perm[i] & 4 ? const0_rtx : constm1_rtx);
11708     }
11709   mask = gen_rtx_CONST_VECTOR (V4HImode, gen_rtvec_v (4, rmask));
11710   mask = force_reg (V4HImode, mask);
11711 
11712   t0 = gen_reg_rtx (V4HImode);
11713   t1 = gen_reg_rtx (V4HImode);
11714 
11715   ok = expand_vselect (t0, d->op0, perm2, 4);
11716   gcc_assert (ok);
11717   ok = expand_vselect (t1, d->op1, perm2, 4);
11718   gcc_assert (ok);
11719 
11720   x = gen_rtx_AND (V4HImode, mask, t0);
11721   emit_insn (gen_rtx_SET (t0, x));
11722 
11723   x = gen_rtx_NOT (V4HImode, mask);
11724   x = gen_rtx_AND (V4HImode, x, t1);
11725   emit_insn (gen_rtx_SET (t1, x));
11726 
11727   x = gen_rtx_IOR (V4HImode, t0, t1);
11728   emit_insn (gen_rtx_SET (d->target, x));
11729 
11730   return true;
11731 }
11732 
11733 /* The guts of ia64_expand_vec_perm_const, also used by the ok hook.
11734    With all of the interface bits taken care of, perform the expansion
11735    in D and return true on success.  */
11736 
11737 static bool
ia64_expand_vec_perm_const_1(struct expand_vec_perm_d * d)11738 ia64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
11739 {
11740   if (expand_vec_perm_1 (d))
11741     return true;
11742   if (expand_vec_perm_broadcast (d))
11743     return true;
11744   if (expand_vec_perm_interleave_2 (d))
11745     return true;
11746   if (expand_vec_perm_v4hi_5 (d))
11747     return true;
11748   return false;
11749 }
11750 
11751 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST.  */
11752 
11753 static bool
ia64_vectorize_vec_perm_const(machine_mode vmode,rtx target,rtx op0,rtx op1,const vec_perm_indices & sel)11754 ia64_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0,
11755 			       rtx op1, const vec_perm_indices &sel)
11756 {
11757   struct expand_vec_perm_d d;
11758   unsigned char perm[MAX_VECT_LEN];
11759   unsigned int i, nelt, which;
11760 
11761   d.target = target;
11762   d.op0 = op0;
11763   d.op1 = op1;
11764 
11765   d.vmode = vmode;
11766   gcc_assert (VECTOR_MODE_P (d.vmode));
11767   d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
11768   d.testing_p = !target;
11769 
11770   gcc_assert (sel.length () == nelt);
11771   gcc_checking_assert (sizeof (d.perm) == sizeof (perm));
11772 
11773   for (i = which = 0; i < nelt; ++i)
11774     {
11775       unsigned int ei = sel[i] & (2 * nelt - 1);
11776 
11777       which |= (ei < nelt ? 1 : 2);
11778       d.perm[i] = ei;
11779       perm[i] = ei;
11780     }
11781 
11782   switch (which)
11783     {
11784     default:
11785       gcc_unreachable();
11786 
11787     case 3:
11788       if (d.testing_p || !rtx_equal_p (d.op0, d.op1))
11789 	{
11790 	  d.one_operand_p = false;
11791 	  break;
11792 	}
11793 
11794       /* The elements of PERM do not suggest that only the first operand
11795 	 is used, but both operands are identical.  Allow easier matching
11796 	 of the permutation by folding the permutation into the single
11797 	 input vector.  */
11798       for (i = 0; i < nelt; ++i)
11799 	if (d.perm[i] >= nelt)
11800 	  d.perm[i] -= nelt;
11801       /* FALLTHRU */
11802 
11803     case 1:
11804       d.op1 = d.op0;
11805       d.one_operand_p = true;
11806       break;
11807 
11808     case 2:
11809       for (i = 0; i < nelt; ++i)
11810         d.perm[i] -= nelt;
11811       d.op0 = d.op1;
11812       d.one_operand_p = true;
11813       break;
11814     }
11815 
11816   if (d.testing_p)
11817     {
11818       /* We have to go through the motions and see if we can
11819 	 figure out how to generate the requested permutation.  */
11820       d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
11821       d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
11822       if (!d.one_operand_p)
11823 	d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
11824 
11825       start_sequence ();
11826       bool ret = ia64_expand_vec_perm_const_1 (&d);
11827       end_sequence ();
11828 
11829       return ret;
11830     }
11831 
11832   if (ia64_expand_vec_perm_const_1 (&d))
11833     return true;
11834 
11835   /* If the mask says both arguments are needed, but they are the same,
11836      the above tried to expand with one_operand_p true.  If that didn't
11837      work, retry with one_operand_p false, as that's what we used in _ok.  */
11838   if (which == 3 && d.one_operand_p)
11839     {
11840       memcpy (d.perm, perm, sizeof (perm));
11841       d.one_operand_p = false;
11842       return ia64_expand_vec_perm_const_1 (&d);
11843     }
11844 
11845   return false;
11846 }
11847 
11848 void
ia64_expand_vec_setv2sf(rtx operands[3])11849 ia64_expand_vec_setv2sf (rtx operands[3])
11850 {
11851   struct expand_vec_perm_d d;
11852   unsigned int which;
11853   bool ok;
11854 
11855   d.target = operands[0];
11856   d.op0 = operands[0];
11857   d.op1 = gen_reg_rtx (V2SFmode);
11858   d.vmode = V2SFmode;
11859   d.nelt = 2;
11860   d.one_operand_p = false;
11861   d.testing_p = false;
11862 
11863   which = INTVAL (operands[2]);
11864   gcc_assert (which <= 1);
11865   d.perm[0] = 1 - which;
11866   d.perm[1] = which + 2;
11867 
11868   emit_insn (gen_fpack (d.op1, operands[1], CONST0_RTX (SFmode)));
11869 
11870   ok = ia64_expand_vec_perm_const_1 (&d);
11871   gcc_assert (ok);
11872 }
11873 
11874 void
ia64_expand_vec_perm_even_odd(rtx target,rtx op0,rtx op1,int odd)11875 ia64_expand_vec_perm_even_odd (rtx target, rtx op0, rtx op1, int odd)
11876 {
11877   struct expand_vec_perm_d d;
11878   machine_mode vmode = GET_MODE (target);
11879   unsigned int i, nelt = GET_MODE_NUNITS (vmode);
11880   bool ok;
11881 
11882   d.target = target;
11883   d.op0 = op0;
11884   d.op1 = op1;
11885   d.vmode = vmode;
11886   d.nelt = nelt;
11887   d.one_operand_p = false;
11888   d.testing_p = false;
11889 
11890   for (i = 0; i < nelt; ++i)
11891     d.perm[i] = i * 2 + odd;
11892 
11893   ok = ia64_expand_vec_perm_const_1 (&d);
11894   gcc_assert (ok);
11895 }
11896 
11897 /* Implement TARGET_CAN_CHANGE_MODE_CLASS.
11898 
11899    In BR regs, we can't change the DImode at all.
11900    In FP regs, we can't change FP values to integer values and vice versa,
11901    but we can change e.g. DImode to SImode, and V2SFmode into DImode.  */
11902 
11903 static bool
ia64_can_change_mode_class(machine_mode from,machine_mode to,reg_class_t rclass)11904 ia64_can_change_mode_class (machine_mode from, machine_mode to,
11905 			    reg_class_t rclass)
11906 {
11907   if (reg_classes_intersect_p (rclass, BR_REGS))
11908     return from == to;
11909   if (SCALAR_FLOAT_MODE_P (from) != SCALAR_FLOAT_MODE_P (to))
11910     return !reg_classes_intersect_p (rclass, FR_REGS);
11911   return true;
11912 }
11913 
11914 #include "gt-ia64.h"
11915