1 /* Subroutines for insn-output.c for HPPA.
2    Copyright (C) 1992-2021 Free Software Foundation, Inc.
3    Contributed by Tim Moore (moore@cs.utah.edu), based on sparc.c
4 
5 This file is part of GCC.
6 
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11 
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 GNU General Public License for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3.  If not see
19 <http://www.gnu.org/licenses/>.  */
20 
21 #define IN_TARGET_CODE 1
22 
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "memmodel.h"
27 #include "backend.h"
28 #include "target.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "df.h"
32 #include "tm_p.h"
33 #include "stringpool.h"
34 #include "attribs.h"
35 #include "optabs.h"
36 #include "regs.h"
37 #include "emit-rtl.h"
38 #include "recog.h"
39 #include "diagnostic-core.h"
40 #include "insn-attr.h"
41 #include "alias.h"
42 #include "fold-const.h"
43 #include "stor-layout.h"
44 #include "varasm.h"
45 #include "calls.h"
46 #include "output.h"
47 #include "except.h"
48 #include "explow.h"
49 #include "expr.h"
50 #include "reload.h"
51 #include "common/common-target.h"
52 #include "langhooks.h"
53 #include "cfgrtl.h"
54 #include "opts.h"
55 #include "builtins.h"
56 
57 /* This file should be included last.  */
58 #include "target-def.h"
59 
60 /* Return nonzero if there is a bypass for the output of
61    OUT_INSN and the fp store IN_INSN.  */
62 int
pa_fpstore_bypass_p(rtx_insn * out_insn,rtx_insn * in_insn)63 pa_fpstore_bypass_p (rtx_insn *out_insn, rtx_insn *in_insn)
64 {
65   machine_mode store_mode;
66   machine_mode other_mode;
67   rtx set;
68 
69   if (recog_memoized (in_insn) < 0
70       || (get_attr_type (in_insn) != TYPE_FPSTORE
71 	  && get_attr_type (in_insn) != TYPE_FPSTORE_LOAD)
72       || recog_memoized (out_insn) < 0)
73     return 0;
74 
75   store_mode = GET_MODE (SET_SRC (PATTERN (in_insn)));
76 
77   set = single_set (out_insn);
78   if (!set)
79     return 0;
80 
81   other_mode = GET_MODE (SET_SRC (set));
82 
83   return (GET_MODE_SIZE (store_mode) == GET_MODE_SIZE (other_mode));
84 }
85 
86 
87 #ifndef DO_FRAME_NOTES
88 #ifdef INCOMING_RETURN_ADDR_RTX
89 #define DO_FRAME_NOTES 1
90 #else
91 #define DO_FRAME_NOTES 0
92 #endif
93 #endif
94 
95 static void pa_option_override (void);
96 static void copy_reg_pointer (rtx, rtx);
97 static void fix_range (const char *);
98 static int hppa_register_move_cost (machine_mode mode, reg_class_t,
99 				    reg_class_t);
100 static int hppa_address_cost (rtx, machine_mode mode, addr_space_t, bool);
101 static bool hppa_rtx_costs (rtx, machine_mode, int, int, int *, bool);
102 static inline rtx force_mode (machine_mode, rtx);
103 static void pa_reorg (void);
104 static void pa_combine_instructions (void);
105 static int pa_can_combine_p (rtx_insn *, rtx_insn *, rtx_insn *, int, rtx,
106 			     rtx, rtx);
107 static bool forward_branch_p (rtx_insn *);
108 static void compute_zdepwi_operands (unsigned HOST_WIDE_INT, unsigned *);
109 static void compute_zdepdi_operands (unsigned HOST_WIDE_INT, unsigned *);
110 static int compute_cpymem_length (rtx_insn *);
111 static int compute_clrmem_length (rtx_insn *);
112 static bool pa_assemble_integer (rtx, unsigned int, int);
113 static void remove_useless_addtr_insns (int);
114 static void store_reg (int, HOST_WIDE_INT, int);
115 static void store_reg_modify (int, int, HOST_WIDE_INT);
116 static void load_reg (int, HOST_WIDE_INT, int);
117 static void set_reg_plus_d (int, int, HOST_WIDE_INT, int);
118 static rtx pa_function_value (const_tree, const_tree, bool);
119 static rtx pa_libcall_value (machine_mode, const_rtx);
120 static bool pa_function_value_regno_p (const unsigned int);
121 static void pa_output_function_prologue (FILE *) ATTRIBUTE_UNUSED;
122 static void pa_linux_output_function_prologue (FILE *) ATTRIBUTE_UNUSED;
123 static void update_total_code_bytes (unsigned int);
124 static void pa_output_function_epilogue (FILE *);
125 static int pa_adjust_cost (rtx_insn *, int, rtx_insn *, int, unsigned int);
126 static int pa_issue_rate (void);
127 static int pa_reloc_rw_mask (void);
128 static void pa_som_asm_init_sections (void) ATTRIBUTE_UNUSED;
129 static section *pa_som_tm_clone_table_section (void) ATTRIBUTE_UNUSED;
130 static section *pa_select_section (tree, int, unsigned HOST_WIDE_INT)
131      ATTRIBUTE_UNUSED;
132 static void pa_encode_section_info (tree, rtx, int);
133 static const char *pa_strip_name_encoding (const char *);
134 static bool pa_function_ok_for_sibcall (tree, tree);
135 static void pa_globalize_label (FILE *, const char *)
136      ATTRIBUTE_UNUSED;
137 static void pa_asm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
138 				    HOST_WIDE_INT, tree);
139 #if !defined(USE_COLLECT2)
140 static void pa_asm_out_constructor (rtx, int);
141 static void pa_asm_out_destructor (rtx, int);
142 #endif
143 static void pa_init_builtins (void);
144 static rtx pa_expand_builtin (tree, rtx, rtx, machine_mode mode, int);
145 static rtx hppa_builtin_saveregs (void);
146 static void hppa_va_start (tree, rtx);
147 static tree hppa_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
148 static bool pa_scalar_mode_supported_p (scalar_mode);
149 static bool pa_commutative_p (const_rtx x, int outer_code);
150 static void copy_fp_args (rtx_insn *) ATTRIBUTE_UNUSED;
151 static int length_fp_args (rtx_insn *) ATTRIBUTE_UNUSED;
152 static rtx hppa_legitimize_address (rtx, rtx, machine_mode);
153 static inline void pa_file_start_level (void) ATTRIBUTE_UNUSED;
154 static inline void pa_file_start_space (int) ATTRIBUTE_UNUSED;
155 static inline void pa_file_start_file (int) ATTRIBUTE_UNUSED;
156 static inline void pa_file_start_mcount (const char*) ATTRIBUTE_UNUSED;
157 static void pa_elf_file_start (void) ATTRIBUTE_UNUSED;
158 static void pa_som_file_start (void) ATTRIBUTE_UNUSED;
159 static void pa_linux_file_start (void) ATTRIBUTE_UNUSED;
160 static void pa_hpux64_gas_file_start (void) ATTRIBUTE_UNUSED;
161 static void pa_hpux64_hpas_file_start (void) ATTRIBUTE_UNUSED;
162 static void output_deferred_plabels (void);
163 static void output_deferred_profile_counters (void) ATTRIBUTE_UNUSED;
164 static void pa_file_end (void);
165 static void pa_init_libfuncs (void);
166 static rtx pa_struct_value_rtx (tree, int);
167 static bool pa_pass_by_reference (cumulative_args_t,
168 				  const function_arg_info &);
169 static int pa_arg_partial_bytes (cumulative_args_t, const function_arg_info &);
170 static void pa_function_arg_advance (cumulative_args_t,
171 				     const function_arg_info &);
172 static rtx pa_function_arg (cumulative_args_t, const function_arg_info &);
173 static pad_direction pa_function_arg_padding (machine_mode, const_tree);
174 static unsigned int pa_function_arg_boundary (machine_mode, const_tree);
175 static struct machine_function * pa_init_machine_status (void);
176 static reg_class_t pa_secondary_reload (bool, rtx, reg_class_t,
177 					machine_mode,
178 					secondary_reload_info *);
179 static bool pa_secondary_memory_needed (machine_mode,
180 					reg_class_t, reg_class_t);
181 static void pa_extra_live_on_entry (bitmap);
182 static machine_mode pa_promote_function_mode (const_tree,
183 						   machine_mode, int *,
184 						   const_tree, int);
185 
186 static void pa_asm_trampoline_template (FILE *);
187 static void pa_trampoline_init (rtx, tree, rtx);
188 static rtx pa_trampoline_adjust_address (rtx);
189 static rtx pa_delegitimize_address (rtx);
190 static bool pa_print_operand_punct_valid_p (unsigned char);
191 static rtx pa_internal_arg_pointer (void);
192 static bool pa_can_eliminate (const int, const int);
193 static void pa_conditional_register_usage (void);
194 static machine_mode pa_c_mode_for_suffix (char);
195 static section *pa_function_section (tree, enum node_frequency, bool, bool);
196 static bool pa_cannot_force_const_mem (machine_mode, rtx);
197 static bool pa_legitimate_constant_p (machine_mode, rtx);
198 static unsigned int pa_section_type_flags (tree, const char *, int);
199 static bool pa_legitimate_address_p (machine_mode, rtx, bool);
200 static bool pa_callee_copies (cumulative_args_t, const function_arg_info &);
201 static unsigned int pa_hard_regno_nregs (unsigned int, machine_mode);
202 static bool pa_hard_regno_mode_ok (unsigned int, machine_mode);
203 static bool pa_modes_tieable_p (machine_mode, machine_mode);
204 static bool pa_can_change_mode_class (machine_mode, machine_mode, reg_class_t);
205 static HOST_WIDE_INT pa_starting_frame_offset (void);
206 static section* pa_elf_select_rtx_section(machine_mode, rtx, unsigned HOST_WIDE_INT) ATTRIBUTE_UNUSED;
207 
208 /* The following extra sections are only used for SOM.  */
209 static GTY(()) section *som_readonly_data_section;
210 static GTY(()) section *som_one_only_readonly_data_section;
211 static GTY(()) section *som_one_only_data_section;
212 static GTY(()) section *som_tm_clone_table_section;
213 
214 /* Counts for the number of callee-saved general and floating point
215    registers which were saved by the current function's prologue.  */
216 static int gr_saved, fr_saved;
217 
218 /* Boolean indicating whether the return pointer was saved by the
219    current function's prologue.  */
220 static bool rp_saved;
221 
222 static rtx find_addr_reg (rtx);
223 
224 /* Keep track of the number of bytes we have output in the CODE subspace
225    during this compilation so we'll know when to emit inline long-calls.  */
226 unsigned long total_code_bytes;
227 
228 /* The last address of the previous function plus the number of bytes in
229    associated thunks that have been output.  This is used to determine if
230    a thunk can use an IA-relative branch to reach its target function.  */
231 static unsigned int last_address;
232 
233 /* Variables to handle plabels that we discover are necessary at assembly
234    output time.  They are output after the current function.  */
235 struct GTY(()) deferred_plabel
236 {
237   rtx internal_label;
238   rtx symbol;
239 };
240 static GTY((length ("n_deferred_plabels"))) struct deferred_plabel *
241   deferred_plabels;
242 static size_t n_deferred_plabels = 0;
243 
244 /* Initialize the GCC target structure.  */
245 
246 #undef TARGET_OPTION_OVERRIDE
247 #define TARGET_OPTION_OVERRIDE pa_option_override
248 
249 #undef TARGET_ASM_ALIGNED_HI_OP
250 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
251 #undef TARGET_ASM_ALIGNED_SI_OP
252 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
253 #undef TARGET_ASM_ALIGNED_DI_OP
254 #define TARGET_ASM_ALIGNED_DI_OP "\t.dword\t"
255 #undef TARGET_ASM_UNALIGNED_HI_OP
256 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
257 #undef TARGET_ASM_UNALIGNED_SI_OP
258 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
259 #undef TARGET_ASM_UNALIGNED_DI_OP
260 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
261 #undef TARGET_ASM_INTEGER
262 #define TARGET_ASM_INTEGER pa_assemble_integer
263 
264 #undef TARGET_ASM_FUNCTION_EPILOGUE
265 #define TARGET_ASM_FUNCTION_EPILOGUE pa_output_function_epilogue
266 
267 #undef TARGET_FUNCTION_VALUE
268 #define TARGET_FUNCTION_VALUE pa_function_value
269 #undef TARGET_LIBCALL_VALUE
270 #define TARGET_LIBCALL_VALUE pa_libcall_value
271 #undef TARGET_FUNCTION_VALUE_REGNO_P
272 #define TARGET_FUNCTION_VALUE_REGNO_P pa_function_value_regno_p
273 
274 #undef TARGET_LEGITIMIZE_ADDRESS
275 #define TARGET_LEGITIMIZE_ADDRESS hppa_legitimize_address
276 
277 #undef TARGET_SCHED_ADJUST_COST
278 #define TARGET_SCHED_ADJUST_COST pa_adjust_cost
279 #undef TARGET_SCHED_ISSUE_RATE
280 #define TARGET_SCHED_ISSUE_RATE pa_issue_rate
281 
282 #undef TARGET_ENCODE_SECTION_INFO
283 #define TARGET_ENCODE_SECTION_INFO pa_encode_section_info
284 #undef TARGET_STRIP_NAME_ENCODING
285 #define TARGET_STRIP_NAME_ENCODING pa_strip_name_encoding
286 
287 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
288 #define TARGET_FUNCTION_OK_FOR_SIBCALL pa_function_ok_for_sibcall
289 
290 #undef TARGET_COMMUTATIVE_P
291 #define TARGET_COMMUTATIVE_P pa_commutative_p
292 
293 #undef TARGET_ASM_OUTPUT_MI_THUNK
294 #define TARGET_ASM_OUTPUT_MI_THUNK pa_asm_output_mi_thunk
295 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
296 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
297 
298 #undef TARGET_ASM_FILE_END
299 #define TARGET_ASM_FILE_END pa_file_end
300 
301 #undef TARGET_ASM_RELOC_RW_MASK
302 #define TARGET_ASM_RELOC_RW_MASK pa_reloc_rw_mask
303 
304 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
305 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P pa_print_operand_punct_valid_p
306 
307 #if !defined(USE_COLLECT2)
308 #undef TARGET_ASM_CONSTRUCTOR
309 #define TARGET_ASM_CONSTRUCTOR pa_asm_out_constructor
310 #undef TARGET_ASM_DESTRUCTOR
311 #define TARGET_ASM_DESTRUCTOR pa_asm_out_destructor
312 #endif
313 
314 #undef TARGET_INIT_BUILTINS
315 #define TARGET_INIT_BUILTINS pa_init_builtins
316 
317 #undef TARGET_EXPAND_BUILTIN
318 #define TARGET_EXPAND_BUILTIN pa_expand_builtin
319 
320 #undef TARGET_REGISTER_MOVE_COST
321 #define TARGET_REGISTER_MOVE_COST hppa_register_move_cost
322 #undef TARGET_RTX_COSTS
323 #define TARGET_RTX_COSTS hppa_rtx_costs
324 #undef TARGET_ADDRESS_COST
325 #define TARGET_ADDRESS_COST hppa_address_cost
326 
327 #undef TARGET_MACHINE_DEPENDENT_REORG
328 #define TARGET_MACHINE_DEPENDENT_REORG pa_reorg
329 
330 #undef TARGET_INIT_LIBFUNCS
331 #define TARGET_INIT_LIBFUNCS pa_init_libfuncs
332 
333 #undef TARGET_PROMOTE_FUNCTION_MODE
334 #define TARGET_PROMOTE_FUNCTION_MODE pa_promote_function_mode
335 #undef TARGET_PROMOTE_PROTOTYPES
336 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
337 
338 #undef TARGET_STRUCT_VALUE_RTX
339 #define TARGET_STRUCT_VALUE_RTX pa_struct_value_rtx
340 #undef TARGET_RETURN_IN_MEMORY
341 #define TARGET_RETURN_IN_MEMORY pa_return_in_memory
342 #undef TARGET_MUST_PASS_IN_STACK
343 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
344 #undef TARGET_PASS_BY_REFERENCE
345 #define TARGET_PASS_BY_REFERENCE pa_pass_by_reference
346 #undef TARGET_CALLEE_COPIES
347 #define TARGET_CALLEE_COPIES pa_callee_copies
348 #undef TARGET_ARG_PARTIAL_BYTES
349 #define TARGET_ARG_PARTIAL_BYTES pa_arg_partial_bytes
350 #undef TARGET_FUNCTION_ARG
351 #define TARGET_FUNCTION_ARG pa_function_arg
352 #undef TARGET_FUNCTION_ARG_ADVANCE
353 #define TARGET_FUNCTION_ARG_ADVANCE pa_function_arg_advance
354 #undef TARGET_FUNCTION_ARG_PADDING
355 #define TARGET_FUNCTION_ARG_PADDING pa_function_arg_padding
356 #undef TARGET_FUNCTION_ARG_BOUNDARY
357 #define TARGET_FUNCTION_ARG_BOUNDARY pa_function_arg_boundary
358 
359 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
360 #define TARGET_EXPAND_BUILTIN_SAVEREGS hppa_builtin_saveregs
361 #undef TARGET_EXPAND_BUILTIN_VA_START
362 #define TARGET_EXPAND_BUILTIN_VA_START hppa_va_start
363 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
364 #define TARGET_GIMPLIFY_VA_ARG_EXPR hppa_gimplify_va_arg_expr
365 
366 #undef TARGET_SCALAR_MODE_SUPPORTED_P
367 #define TARGET_SCALAR_MODE_SUPPORTED_P pa_scalar_mode_supported_p
368 
369 #undef TARGET_CANNOT_FORCE_CONST_MEM
370 #define TARGET_CANNOT_FORCE_CONST_MEM pa_cannot_force_const_mem
371 
372 #undef TARGET_SECONDARY_RELOAD
373 #define TARGET_SECONDARY_RELOAD pa_secondary_reload
374 #undef TARGET_SECONDARY_MEMORY_NEEDED
375 #define TARGET_SECONDARY_MEMORY_NEEDED pa_secondary_memory_needed
376 
377 #undef TARGET_EXTRA_LIVE_ON_ENTRY
378 #define TARGET_EXTRA_LIVE_ON_ENTRY pa_extra_live_on_entry
379 
380 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
381 #define TARGET_ASM_TRAMPOLINE_TEMPLATE pa_asm_trampoline_template
382 #undef TARGET_TRAMPOLINE_INIT
383 #define TARGET_TRAMPOLINE_INIT pa_trampoline_init
384 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
385 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS pa_trampoline_adjust_address
386 #undef TARGET_DELEGITIMIZE_ADDRESS
387 #define TARGET_DELEGITIMIZE_ADDRESS pa_delegitimize_address
388 #undef TARGET_INTERNAL_ARG_POINTER
389 #define TARGET_INTERNAL_ARG_POINTER pa_internal_arg_pointer
390 #undef TARGET_CAN_ELIMINATE
391 #define TARGET_CAN_ELIMINATE pa_can_eliminate
392 #undef TARGET_CONDITIONAL_REGISTER_USAGE
393 #define TARGET_CONDITIONAL_REGISTER_USAGE pa_conditional_register_usage
394 #undef TARGET_C_MODE_FOR_SUFFIX
395 #define TARGET_C_MODE_FOR_SUFFIX pa_c_mode_for_suffix
396 #undef TARGET_ASM_FUNCTION_SECTION
397 #define TARGET_ASM_FUNCTION_SECTION pa_function_section
398 
399 #undef TARGET_LEGITIMATE_CONSTANT_P
400 #define TARGET_LEGITIMATE_CONSTANT_P pa_legitimate_constant_p
401 #undef TARGET_SECTION_TYPE_FLAGS
402 #define TARGET_SECTION_TYPE_FLAGS pa_section_type_flags
403 #undef TARGET_LEGITIMATE_ADDRESS_P
404 #define TARGET_LEGITIMATE_ADDRESS_P pa_legitimate_address_p
405 
406 #undef TARGET_LRA_P
407 #define TARGET_LRA_P hook_bool_void_false
408 
409 #undef TARGET_HARD_REGNO_NREGS
410 #define TARGET_HARD_REGNO_NREGS pa_hard_regno_nregs
411 #undef TARGET_HARD_REGNO_MODE_OK
412 #define TARGET_HARD_REGNO_MODE_OK pa_hard_regno_mode_ok
413 #undef TARGET_MODES_TIEABLE_P
414 #define TARGET_MODES_TIEABLE_P pa_modes_tieable_p
415 
416 #undef TARGET_CAN_CHANGE_MODE_CLASS
417 #define TARGET_CAN_CHANGE_MODE_CLASS pa_can_change_mode_class
418 
419 #undef TARGET_CONSTANT_ALIGNMENT
420 #define TARGET_CONSTANT_ALIGNMENT constant_alignment_word_strings
421 
422 #undef TARGET_STARTING_FRAME_OFFSET
423 #define TARGET_STARTING_FRAME_OFFSET pa_starting_frame_offset
424 
425 #undef TARGET_HAVE_SPECULATION_SAFE_VALUE
426 #define TARGET_HAVE_SPECULATION_SAFE_VALUE speculation_safe_value_not_needed
427 
428 struct gcc_target targetm = TARGET_INITIALIZER;
429 
430 /* Parse the -mfixed-range= option string.  */
431 
432 static void
fix_range(const char * const_str)433 fix_range (const char *const_str)
434 {
435   int i, first, last;
436   char *str, *dash, *comma;
437 
438   /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
439      REG2 are either register names or register numbers.  The effect
440      of this option is to mark the registers in the range from REG1 to
441      REG2 as ``fixed'' so they won't be used by the compiler.  This is
442      used, e.g., to ensure that kernel mode code doesn't use fr4-fr31.  */
443 
444   i = strlen (const_str);
445   str = (char *) alloca (i + 1);
446   memcpy (str, const_str, i + 1);
447 
448   while (1)
449     {
450       dash = strchr (str, '-');
451       if (!dash)
452 	{
453 	  warning (0, "value of %<-mfixed-range%> must have form REG1-REG2");
454 	  return;
455 	}
456       *dash = '\0';
457 
458       comma = strchr (dash + 1, ',');
459       if (comma)
460 	*comma = '\0';
461 
462       first = decode_reg_name (str);
463       if (first < 0)
464 	{
465 	  warning (0, "unknown register name: %s", str);
466 	  return;
467 	}
468 
469       last = decode_reg_name (dash + 1);
470       if (last < 0)
471 	{
472 	  warning (0, "unknown register name: %s", dash + 1);
473 	  return;
474 	}
475 
476       *dash = '-';
477 
478       if (first > last)
479 	{
480 	  warning (0, "%s-%s is an empty range", str, dash + 1);
481 	  return;
482 	}
483 
484       for (i = first; i <= last; ++i)
485 	fixed_regs[i] = call_used_regs[i] = 1;
486 
487       if (!comma)
488 	break;
489 
490       *comma = ',';
491       str = comma + 1;
492     }
493 
494   /* Check if all floating point registers have been fixed.  */
495   for (i = FP_REG_FIRST; i <= FP_REG_LAST; i++)
496     if (!fixed_regs[i])
497       break;
498 
499   if (i > FP_REG_LAST)
500     target_flags |= MASK_SOFT_FLOAT;
501 }
502 
503 /* Implement the TARGET_OPTION_OVERRIDE hook.  */
504 
505 static void
pa_option_override(void)506 pa_option_override (void)
507 {
508   unsigned int i;
509   cl_deferred_option *opt;
510   vec<cl_deferred_option> *v
511     = (vec<cl_deferred_option> *) pa_deferred_options;
512 
513   if (v)
514     FOR_EACH_VEC_ELT (*v, i, opt)
515       {
516 	switch (opt->opt_index)
517 	  {
518 	  case OPT_mfixed_range_:
519 	    fix_range (opt->arg);
520 	    break;
521 
522 	  default:
523 	    gcc_unreachable ();
524 	  }
525       }
526 
527   if (flag_pic && TARGET_PORTABLE_RUNTIME)
528     {
529       warning (0, "PIC code generation is not supported in the portable runtime model");
530     }
531 
532   if (flag_pic && TARGET_FAST_INDIRECT_CALLS)
533    {
534       warning (0, "PIC code generation is not compatible with fast indirect calls");
535    }
536 
537   if (! TARGET_GAS && write_symbols != NO_DEBUG)
538     {
539       warning (0, "%<-g%> is only supported when using GAS on this processor");
540       warning (0, "%<-g%> option disabled");
541       write_symbols = NO_DEBUG;
542     }
543 
544   if (TARGET_64BIT && TARGET_HPUX)
545     {
546       /* DWARF5 is not supported by gdb.  Don't emit DWARF5 unless
547 	 specifically selected.  */
548       if (!OPTION_SET_P (dwarf_strict))
549 	dwarf_strict = 1;
550       if (!OPTION_SET_P (dwarf_version))
551 	dwarf_version = 4;
552     }
553 
554   /* We only support the "big PIC" model now.  And we always generate PIC
555      code when in 64bit mode.  */
556   if (flag_pic == 1 || TARGET_64BIT)
557     flag_pic = 2;
558 
559   /* Disable -freorder-blocks-and-partition as we don't support hot and
560      cold partitioning.  */
561   if (flag_reorder_blocks_and_partition)
562     {
563       inform (input_location,
564 	      "%<-freorder-blocks-and-partition%> does not work "
565 	      "on this architecture");
566       flag_reorder_blocks_and_partition = 0;
567       flag_reorder_blocks = 1;
568     }
569 
570   /* We can't guarantee that .dword is available for 32-bit targets.  */
571   if (UNITS_PER_WORD == 4)
572     targetm.asm_out.aligned_op.di = NULL;
573 
574   /* The unaligned ops are only available when using GAS.  */
575   if (!TARGET_GAS)
576     {
577       targetm.asm_out.unaligned_op.hi = NULL;
578       targetm.asm_out.unaligned_op.si = NULL;
579       targetm.asm_out.unaligned_op.di = NULL;
580     }
581 
582   init_machine_status = pa_init_machine_status;
583 }
584 
585 enum pa_builtins
586 {
587   PA_BUILTIN_COPYSIGNQ,
588   PA_BUILTIN_FABSQ,
589   PA_BUILTIN_INFQ,
590   PA_BUILTIN_HUGE_VALQ,
591   PA_BUILTIN_max
592 };
593 
594 static GTY(()) tree pa_builtins[(int) PA_BUILTIN_max];
595 
596 static void
pa_init_builtins(void)597 pa_init_builtins (void)
598 {
599 #ifdef DONT_HAVE_FPUTC_UNLOCKED
600   {
601     tree decl = builtin_decl_explicit (BUILT_IN_PUTC_UNLOCKED);
602     set_builtin_decl (BUILT_IN_FPUTC_UNLOCKED, decl,
603 		      builtin_decl_implicit_p (BUILT_IN_PUTC_UNLOCKED));
604   }
605 #endif
606 #if TARGET_HPUX_11
607   {
608     tree decl;
609 
610     if ((decl = builtin_decl_explicit (BUILT_IN_FINITE)) != NULL_TREE)
611       set_user_assembler_name (decl, "_Isfinite");
612     if ((decl = builtin_decl_explicit (BUILT_IN_FINITEF)) != NULL_TREE)
613       set_user_assembler_name (decl, "_Isfinitef");
614   }
615 #endif
616 
617   if (HPUX_LONG_DOUBLE_LIBRARY)
618     {
619       tree decl, ftype;
620 
621       /* Under HPUX, the __float128 type is a synonym for "long double".  */
622       (*lang_hooks.types.register_builtin_type) (long_double_type_node,
623 						 "__float128");
624 
625       /* TFmode support builtins.  */
626       ftype = build_function_type_list (long_double_type_node,
627 					long_double_type_node,
628 					NULL_TREE);
629       decl = add_builtin_function ("__builtin_fabsq", ftype,
630 				   PA_BUILTIN_FABSQ, BUILT_IN_MD,
631 				   "_U_Qfabs", NULL_TREE);
632       TREE_READONLY (decl) = 1;
633       pa_builtins[PA_BUILTIN_FABSQ] = decl;
634 
635       ftype = build_function_type_list (long_double_type_node,
636 					long_double_type_node,
637 					long_double_type_node,
638 					NULL_TREE);
639       decl = add_builtin_function ("__builtin_copysignq", ftype,
640 				   PA_BUILTIN_COPYSIGNQ, BUILT_IN_MD,
641 				   "_U_Qfcopysign", NULL_TREE);
642       TREE_READONLY (decl) = 1;
643       pa_builtins[PA_BUILTIN_COPYSIGNQ] = decl;
644 
645       ftype = build_function_type_list (long_double_type_node, NULL_TREE);
646       decl = add_builtin_function ("__builtin_infq", ftype,
647 				   PA_BUILTIN_INFQ, BUILT_IN_MD,
648 				   NULL, NULL_TREE);
649       pa_builtins[PA_BUILTIN_INFQ] = decl;
650 
651       decl = add_builtin_function ("__builtin_huge_valq", ftype,
652                                    PA_BUILTIN_HUGE_VALQ, BUILT_IN_MD,
653                                    NULL, NULL_TREE);
654       pa_builtins[PA_BUILTIN_HUGE_VALQ] = decl;
655     }
656 }
657 
658 static rtx
pa_expand_builtin(tree exp,rtx target,rtx subtarget ATTRIBUTE_UNUSED,machine_mode mode ATTRIBUTE_UNUSED,int ignore ATTRIBUTE_UNUSED)659 pa_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
660 		   machine_mode mode ATTRIBUTE_UNUSED,
661 		   int ignore ATTRIBUTE_UNUSED)
662 {
663   tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
664   unsigned int fcode = DECL_MD_FUNCTION_CODE (fndecl);
665 
666   switch (fcode)
667     {
668     case PA_BUILTIN_FABSQ:
669     case PA_BUILTIN_COPYSIGNQ:
670       return expand_call (exp, target, ignore);
671 
672     case PA_BUILTIN_INFQ:
673     case PA_BUILTIN_HUGE_VALQ:
674       {
675 	machine_mode target_mode = TYPE_MODE (TREE_TYPE (exp));
676 	REAL_VALUE_TYPE inf;
677 	rtx tmp;
678 
679 	real_inf (&inf);
680 	tmp = const_double_from_real_value (inf, target_mode);
681 
682 	tmp = validize_mem (force_const_mem (target_mode, tmp));
683 
684 	if (target == 0)
685 	  target = gen_reg_rtx (target_mode);
686 
687 	emit_move_insn (target, tmp);
688 	return target;
689       }
690 
691     default:
692       gcc_unreachable ();
693     }
694 
695   return NULL_RTX;
696 }
697 
698 /* Function to init struct machine_function.
699    This will be called, via a pointer variable,
700    from push_function_context.  */
701 
702 static struct machine_function *
pa_init_machine_status(void)703 pa_init_machine_status (void)
704 {
705   return ggc_cleared_alloc<machine_function> ();
706 }
707 
708 /* If FROM is a probable pointer register, mark TO as a probable
709    pointer register with the same pointer alignment as FROM.  */
710 
711 static void
copy_reg_pointer(rtx to,rtx from)712 copy_reg_pointer (rtx to, rtx from)
713 {
714   if (REG_POINTER (from))
715     mark_reg_pointer (to, REGNO_POINTER_ALIGN (REGNO (from)));
716 }
717 
718 /* Return 1 if X contains a symbolic expression.  We know these
719    expressions will have one of a few well defined forms, so
720    we need only check those forms.  */
721 int
pa_symbolic_expression_p(rtx x)722 pa_symbolic_expression_p (rtx x)
723 {
724 
725   /* Strip off any HIGH.  */
726   if (GET_CODE (x) == HIGH)
727     x = XEXP (x, 0);
728 
729   return symbolic_operand (x, VOIDmode);
730 }
731 
732 /* Accept any constant that can be moved in one instruction into a
733    general register.  */
734 int
pa_cint_ok_for_move(unsigned HOST_WIDE_INT ival)735 pa_cint_ok_for_move (unsigned HOST_WIDE_INT ival)
736 {
737   /* OK if ldo, ldil, or zdepi, can be used.  */
738   return (VAL_14_BITS_P (ival)
739 	  || pa_ldil_cint_p (ival)
740 	  || pa_zdepi_cint_p (ival));
741 }
742 
743 /* True iff ldil can be used to load this CONST_INT.  The least
744    significant 11 bits of the value must be zero and the value must
745    not change sign when extended from 32 to 64 bits.  */
746 int
pa_ldil_cint_p(unsigned HOST_WIDE_INT ival)747 pa_ldil_cint_p (unsigned HOST_WIDE_INT ival)
748 {
749   unsigned HOST_WIDE_INT x;
750 
751   x = ival & (((unsigned HOST_WIDE_INT) -1 << 31) | 0x7ff);
752   return x == 0 || x == ((unsigned HOST_WIDE_INT) -1 << 31);
753 }
754 
755 /* True iff zdepi can be used to generate this CONST_INT.
756    zdepi first sign extends a 5-bit signed number to a given field
757    length, then places this field anywhere in a zero.  */
758 int
pa_zdepi_cint_p(unsigned HOST_WIDE_INT x)759 pa_zdepi_cint_p (unsigned HOST_WIDE_INT x)
760 {
761   unsigned HOST_WIDE_INT lsb_mask, t;
762 
763   /* This might not be obvious, but it's at least fast.
764      This function is critical; we don't have the time loops would take.  */
765   lsb_mask = x & -x;
766   t = ((x >> 4) + lsb_mask) & ~(lsb_mask - 1);
767   /* Return true iff t is a power of two.  */
768   return ((t & (t - 1)) == 0);
769 }
770 
771 /* True iff depi or extru can be used to compute (reg & mask).
772    Accept bit pattern like these:
773    0....01....1
774    1....10....0
775    1..10..01..1  */
776 int
pa_and_mask_p(unsigned HOST_WIDE_INT mask)777 pa_and_mask_p (unsigned HOST_WIDE_INT mask)
778 {
779   mask = ~mask;
780   mask += mask & -mask;
781   return (mask & (mask - 1)) == 0;
782 }
783 
784 /* True iff depi can be used to compute (reg | MASK).  */
785 int
pa_ior_mask_p(unsigned HOST_WIDE_INT mask)786 pa_ior_mask_p (unsigned HOST_WIDE_INT mask)
787 {
788   mask += mask & -mask;
789   return (mask & (mask - 1)) == 0;
790 }
791 
792 /* Legitimize PIC addresses.  If the address is already
793    position-independent, we return ORIG.  Newly generated
794    position-independent addresses go to REG.  If we need more
795    than one register, we lose.  */
796 
797 static rtx
legitimize_pic_address(rtx orig,machine_mode mode,rtx reg)798 legitimize_pic_address (rtx orig, machine_mode mode, rtx reg)
799 {
800   rtx pic_ref = orig;
801 
802   gcc_assert (!PA_SYMBOL_REF_TLS_P (orig));
803 
804   /* Labels need special handling.  */
805   if (pic_label_operand (orig, mode))
806     {
807       rtx_insn *insn;
808 
809       /* We do not want to go through the movXX expanders here since that
810 	 would create recursion.
811 
812 	 Nor do we really want to call a generator for a named pattern
813 	 since that requires multiple patterns if we want to support
814 	 multiple word sizes.
815 
816 	 So instead we just emit the raw set, which avoids the movXX
817 	 expanders completely.  */
818       mark_reg_pointer (reg, BITS_PER_UNIT);
819       insn = emit_insn (gen_rtx_SET (reg, orig));
820 
821       /* Put a REG_EQUAL note on this insn, so that it can be optimized.  */
822       add_reg_note (insn, REG_EQUAL, orig);
823 
824       /* During and after reload, we need to generate a REG_LABEL_OPERAND note
825 	 and update LABEL_NUSES because this is not done automatically.  */
826       if (reload_in_progress || reload_completed)
827 	{
828 	  /* Extract LABEL_REF.  */
829 	  if (GET_CODE (orig) == CONST)
830 	    orig = XEXP (XEXP (orig, 0), 0);
831 	  /* Extract CODE_LABEL.  */
832 	  orig = XEXP (orig, 0);
833 	  add_reg_note (insn, REG_LABEL_OPERAND, orig);
834 	  /* Make sure we have label and not a note.  */
835 	  if (LABEL_P (orig))
836 	    LABEL_NUSES (orig)++;
837 	}
838       crtl->uses_pic_offset_table = 1;
839       return reg;
840     }
841   if (GET_CODE (orig) == SYMBOL_REF)
842     {
843       rtx_insn *insn;
844       rtx tmp_reg;
845 
846       gcc_assert (reg);
847 
848       /* Before reload, allocate a temporary register for the intermediate
849 	 result.  This allows the sequence to be deleted when the final
850 	 result is unused and the insns are trivially dead.  */
851       tmp_reg = ((reload_in_progress || reload_completed)
852 		 ? reg : gen_reg_rtx (Pmode));
853 
854       if (function_label_operand (orig, VOIDmode))
855 	{
856 	  /* Force function label into memory in word mode.  */
857 	  orig = XEXP (force_const_mem (word_mode, orig), 0);
858 	  /* Load plabel address from DLT.  */
859 	  emit_move_insn (tmp_reg,
860 			  gen_rtx_PLUS (word_mode, pic_offset_table_rtx,
861 					gen_rtx_HIGH (word_mode, orig)));
862 	  pic_ref
863 	    = gen_const_mem (Pmode,
864 			     gen_rtx_LO_SUM (Pmode, tmp_reg,
865 					     gen_rtx_UNSPEC (Pmode,
866 						         gen_rtvec (1, orig),
867 						         UNSPEC_DLTIND14R)));
868 	  emit_move_insn (reg, pic_ref);
869 	  /* Now load address of function descriptor.  */
870 	  pic_ref = gen_rtx_MEM (Pmode, reg);
871 	}
872       else
873 	{
874 	  /* Load symbol reference from DLT.  */
875 	  emit_move_insn (tmp_reg,
876 			  gen_rtx_PLUS (word_mode, pic_offset_table_rtx,
877 					gen_rtx_HIGH (word_mode, orig)));
878 	  pic_ref
879 	    = gen_const_mem (Pmode,
880 			     gen_rtx_LO_SUM (Pmode, tmp_reg,
881 					     gen_rtx_UNSPEC (Pmode,
882 						         gen_rtvec (1, orig),
883 						         UNSPEC_DLTIND14R)));
884 	}
885 
886       crtl->uses_pic_offset_table = 1;
887       mark_reg_pointer (reg, BITS_PER_UNIT);
888       insn = emit_move_insn (reg, pic_ref);
889 
890       /* Put a REG_EQUAL note on this insn, so that it can be optimized.  */
891       set_unique_reg_note (insn, REG_EQUAL, orig);
892 
893       return reg;
894     }
895   else if (GET_CODE (orig) == CONST)
896     {
897       rtx base;
898 
899       if (GET_CODE (XEXP (orig, 0)) == PLUS
900 	  && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
901 	return orig;
902 
903       gcc_assert (reg);
904       gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
905 
906       base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
907       orig = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
908 				     base == reg ? 0 : reg);
909 
910       if (GET_CODE (orig) == CONST_INT)
911 	{
912 	  if (INT_14_BITS (orig))
913 	    return plus_constant (Pmode, base, INTVAL (orig));
914 	  orig = force_reg (Pmode, orig);
915 	}
916       pic_ref = gen_rtx_PLUS (Pmode, base, orig);
917       /* Likewise, should we set special REG_NOTEs here?  */
918     }
919 
920   return pic_ref;
921 }
922 
923 static GTY(()) rtx gen_tls_tga;
924 
925 static rtx
gen_tls_get_addr(void)926 gen_tls_get_addr (void)
927 {
928   if (!gen_tls_tga)
929     gen_tls_tga = init_one_libfunc ("__tls_get_addr");
930   return gen_tls_tga;
931 }
932 
933 static rtx
hppa_tls_call(rtx arg)934 hppa_tls_call (rtx arg)
935 {
936   rtx ret;
937 
938   ret = gen_reg_rtx (Pmode);
939   emit_library_call_value (gen_tls_get_addr (), ret,
940 			   LCT_CONST, Pmode, arg, Pmode);
941 
942   return ret;
943 }
944 
945 static rtx
legitimize_tls_address(rtx addr)946 legitimize_tls_address (rtx addr)
947 {
948   rtx ret, tmp, t1, t2, tp;
949   rtx_insn *insn;
950 
951   /* Currently, we can't handle anything but a SYMBOL_REF.  */
952   if (GET_CODE (addr) != SYMBOL_REF)
953     return addr;
954 
955   switch (SYMBOL_REF_TLS_MODEL (addr))
956     {
957       case TLS_MODEL_GLOBAL_DYNAMIC:
958 	tmp = gen_reg_rtx (Pmode);
959 	if (flag_pic)
960 	  emit_insn (gen_tgd_load_pic (tmp, addr));
961 	else
962 	  emit_insn (gen_tgd_load (tmp, addr));
963 	ret = hppa_tls_call (tmp);
964 	break;
965 
966       case TLS_MODEL_LOCAL_DYNAMIC:
967 	ret = gen_reg_rtx (Pmode);
968 	tmp = gen_reg_rtx (Pmode);
969 	start_sequence ();
970 	if (flag_pic)
971 	  emit_insn (gen_tld_load_pic (tmp, addr));
972 	else
973 	  emit_insn (gen_tld_load (tmp, addr));
974 	t1 = hppa_tls_call (tmp);
975 	insn = get_insns ();
976 	end_sequence ();
977 	t2 = gen_reg_rtx (Pmode);
978 	emit_libcall_block (insn, t2, t1,
979 			    gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
980 				            UNSPEC_TLSLDBASE));
981 	emit_insn (gen_tld_offset_load (ret, addr, t2));
982 	break;
983 
984       case TLS_MODEL_INITIAL_EXEC:
985 	tp = gen_reg_rtx (Pmode);
986 	tmp = gen_reg_rtx (Pmode);
987 	ret = gen_reg_rtx (Pmode);
988 	emit_insn (gen_tp_load (tp));
989 	if (flag_pic)
990 	  emit_insn (gen_tie_load_pic (tmp, addr));
991 	else
992 	  emit_insn (gen_tie_load (tmp, addr));
993 	emit_move_insn (ret, gen_rtx_PLUS (Pmode, tp, tmp));
994 	break;
995 
996       case TLS_MODEL_LOCAL_EXEC:
997 	tp = gen_reg_rtx (Pmode);
998 	ret = gen_reg_rtx (Pmode);
999 	emit_insn (gen_tp_load (tp));
1000 	emit_insn (gen_tle_load (ret, addr, tp));
1001 	break;
1002 
1003       default:
1004 	gcc_unreachable ();
1005     }
1006 
1007   return ret;
1008 }
1009 
1010 /* Helper for hppa_legitimize_address.  Given X, return true if it
1011    is a left shift by 1, 2 or 3 positions or a multiply by 2, 4 or 8.
1012 
1013    This respectively represent canonical shift-add rtxs or scaled
1014    memory addresses.  */
1015 static bool
mem_shadd_or_shadd_rtx_p(rtx x)1016 mem_shadd_or_shadd_rtx_p (rtx x)
1017 {
1018   return ((GET_CODE (x) == ASHIFT
1019 	   || GET_CODE (x) == MULT)
1020 	  && GET_CODE (XEXP (x, 1)) == CONST_INT
1021 	  && ((GET_CODE (x) == ASHIFT
1022 	       && pa_shadd_constant_p (INTVAL (XEXP (x, 1))))
1023 	      || (GET_CODE (x) == MULT
1024 		  && pa_mem_shadd_constant_p (INTVAL (XEXP (x, 1))))));
1025 }
1026 
1027 /* Try machine-dependent ways of modifying an illegitimate address
1028    to be legitimate.  If we find one, return the new, valid address.
1029    This macro is used in only one place: `memory_address' in explow.c.
1030 
1031    OLDX is the address as it was before break_out_memory_refs was called.
1032    In some cases it is useful to look at this to decide what needs to be done.
1033 
1034    It is always safe for this macro to do nothing.  It exists to recognize
1035    opportunities to optimize the output.
1036 
1037    For the PA, transform:
1038 
1039 	memory(X + <large int>)
1040 
1041    into:
1042 
1043 	if (<large int> & mask) >= 16
1044 	  Y = (<large int> & ~mask) + mask + 1	Round up.
1045 	else
1046 	  Y = (<large int> & ~mask)		Round down.
1047 	Z = X + Y
1048 	memory (Z + (<large int> - Y));
1049 
1050    This is for CSE to find several similar references, and only use one Z.
1051 
1052    X can either be a SYMBOL_REF or REG, but because combine cannot
1053    perform a 4->2 combination we do nothing for SYMBOL_REF + D where
1054    D will not fit in 14 bits.
1055 
1056    MODE_FLOAT references allow displacements which fit in 5 bits, so use
1057    0x1f as the mask.
1058 
1059    MODE_INT references allow displacements which fit in 14 bits, so use
1060    0x3fff as the mask.
1061 
1062    This relies on the fact that most mode MODE_FLOAT references will use FP
1063    registers and most mode MODE_INT references will use integer registers.
1064    (In the rare case of an FP register used in an integer MODE, we depend
1065    on secondary reloads to clean things up.)
1066 
1067 
1068    It is also beneficial to handle (plus (mult (X) (Y)) (Z)) in a special
1069    manner if Y is 2, 4, or 8.  (allows more shadd insns and shifted indexed
1070    addressing modes to be used).
1071 
1072    Note that the addresses passed into hppa_legitimize_address always
1073    come from a MEM, so we only have to match the MULT form on incoming
1074    addresses.  But to be future proof we also match the ASHIFT form.
1075 
1076    However, this routine always places those shift-add sequences into
1077    registers, so we have to generate the ASHIFT form as our output.
1078 
1079    Put X and Z into registers.  Then put the entire expression into
1080    a register.  */
1081 
1082 rtx
hppa_legitimize_address(rtx x,rtx oldx ATTRIBUTE_UNUSED,machine_mode mode)1083 hppa_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
1084 			 machine_mode mode)
1085 {
1086   rtx orig = x;
1087 
1088   /* We need to canonicalize the order of operands in unscaled indexed
1089      addresses since the code that checks if an address is valid doesn't
1090      always try both orders.  */
1091   if (!TARGET_NO_SPACE_REGS
1092       && GET_CODE (x) == PLUS
1093       && GET_MODE (x) == Pmode
1094       && REG_P (XEXP (x, 0))
1095       && REG_P (XEXP (x, 1))
1096       && REG_POINTER (XEXP (x, 0))
1097       && !REG_POINTER (XEXP (x, 1)))
1098     return gen_rtx_PLUS (Pmode, XEXP (x, 1), XEXP (x, 0));
1099 
1100   if (tls_referenced_p (x))
1101     return legitimize_tls_address (x);
1102   else if (flag_pic)
1103     return legitimize_pic_address (x, mode, gen_reg_rtx (Pmode));
1104 
1105   /* Strip off CONST.  */
1106   if (GET_CODE (x) == CONST)
1107     x = XEXP (x, 0);
1108 
1109   /* Special case.  Get the SYMBOL_REF into a register and use indexing.
1110      That should always be safe.  */
1111   if (GET_CODE (x) == PLUS
1112       && GET_CODE (XEXP (x, 0)) == REG
1113       && GET_CODE (XEXP (x, 1)) == SYMBOL_REF)
1114     {
1115       rtx reg = force_reg (Pmode, XEXP (x, 1));
1116       return force_reg (Pmode, gen_rtx_PLUS (Pmode, reg, XEXP (x, 0)));
1117     }
1118 
1119   /* Note we must reject symbols which represent function addresses
1120      since the assembler/linker can't handle arithmetic on plabels.  */
1121   if (GET_CODE (x) == PLUS
1122       && GET_CODE (XEXP (x, 1)) == CONST_INT
1123       && ((GET_CODE (XEXP (x, 0)) == SYMBOL_REF
1124 	   && !FUNCTION_NAME_P (XSTR (XEXP (x, 0), 0)))
1125 	  || GET_CODE (XEXP (x, 0)) == REG))
1126     {
1127       rtx int_part, ptr_reg;
1128       int newoffset;
1129       int offset = INTVAL (XEXP (x, 1));
1130       int mask;
1131 
1132       mask = (GET_MODE_CLASS (mode) == MODE_FLOAT
1133 	      && !INT14_OK_STRICT ? 0x1f : 0x3fff);
1134 
1135       /* Choose which way to round the offset.  Round up if we
1136 	 are >= halfway to the next boundary.  */
1137       if ((offset & mask) >= ((mask + 1) / 2))
1138 	newoffset = (offset & ~ mask) + mask + 1;
1139       else
1140 	newoffset = (offset & ~ mask);
1141 
1142       /* If the newoffset will not fit in 14 bits (ldo), then
1143 	 handling this would take 4 or 5 instructions (2 to load
1144 	 the SYMBOL_REF + 1 or 2 to load the newoffset + 1 to
1145 	 add the new offset and the SYMBOL_REF.)  Combine cannot
1146 	 handle 4->2 or 5->2 combinations, so do not create
1147 	 them.  */
1148       if (! VAL_14_BITS_P (newoffset)
1149 	  && GET_CODE (XEXP (x, 0)) == SYMBOL_REF)
1150 	{
1151 	  rtx const_part = plus_constant (Pmode, XEXP (x, 0), newoffset);
1152 	  rtx tmp_reg
1153 	    = force_reg (Pmode,
1154 			 gen_rtx_HIGH (Pmode, const_part));
1155 	  ptr_reg
1156 	    = force_reg (Pmode,
1157 			 gen_rtx_LO_SUM (Pmode,
1158 					 tmp_reg, const_part));
1159 	}
1160       else
1161 	{
1162 	  if (! VAL_14_BITS_P (newoffset))
1163 	    int_part = force_reg (Pmode, GEN_INT (newoffset));
1164 	  else
1165 	    int_part = GEN_INT (newoffset);
1166 
1167 	  ptr_reg = force_reg (Pmode,
1168 			       gen_rtx_PLUS (Pmode,
1169 					     force_reg (Pmode, XEXP (x, 0)),
1170 					     int_part));
1171 	}
1172       return plus_constant (Pmode, ptr_reg, offset - newoffset);
1173     }
1174 
1175   /* Handle (plus (mult (a) (mem_shadd_constant)) (b)).  */
1176 
1177   if (GET_CODE (x) == PLUS
1178       && mem_shadd_or_shadd_rtx_p (XEXP (x, 0))
1179       && (OBJECT_P (XEXP (x, 1))
1180 	  || GET_CODE (XEXP (x, 1)) == SUBREG)
1181       && GET_CODE (XEXP (x, 1)) != CONST)
1182     {
1183       /* If we were given a MULT, we must fix the constant
1184 	 as we're going to create the ASHIFT form.  */
1185       int shift_val = INTVAL (XEXP (XEXP (x, 0), 1));
1186       if (GET_CODE (XEXP (x, 0)) == MULT)
1187 	shift_val = exact_log2 (shift_val);
1188 
1189       rtx reg1, reg2;
1190       reg1 = XEXP (x, 1);
1191       if (GET_CODE (reg1) != REG)
1192 	reg1 = force_reg (Pmode, force_operand (reg1, 0));
1193 
1194       reg2 = XEXP (XEXP (x, 0), 0);
1195       if (GET_CODE (reg2) != REG)
1196         reg2 = force_reg (Pmode, force_operand (reg2, 0));
1197 
1198       return force_reg (Pmode,
1199 			gen_rtx_PLUS (Pmode,
1200 				      gen_rtx_ASHIFT (Pmode, reg2,
1201 						      GEN_INT (shift_val)),
1202 				      reg1));
1203     }
1204 
1205   /* Similarly for (plus (plus (mult (a) (mem_shadd_constant)) (b)) (c)).
1206 
1207      Only do so for floating point modes since this is more speculative
1208      and we lose if it's an integer store.  */
1209   if (GET_CODE (x) == PLUS
1210       && GET_CODE (XEXP (x, 0)) == PLUS
1211       && mem_shadd_or_shadd_rtx_p (XEXP (XEXP (x, 0), 0))
1212       && (mode == SFmode || mode == DFmode))
1213     {
1214       int shift_val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
1215 
1216       /* If we were given a MULT, we must fix the constant
1217 	 as we're going to create the ASHIFT form.  */
1218       if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT)
1219 	shift_val = exact_log2 (shift_val);
1220 
1221       /* Try and figure out what to use as a base register.  */
1222       rtx reg1, reg2, base, idx;
1223 
1224       reg1 = XEXP (XEXP (x, 0), 1);
1225       reg2 = XEXP (x, 1);
1226       base = NULL_RTX;
1227       idx = NULL_RTX;
1228 
1229       /* Make sure they're both regs.  If one was a SYMBOL_REF [+ const],
1230 	 then pa_emit_move_sequence will turn on REG_POINTER so we'll know
1231 	 it's a base register below.  */
1232       if (GET_CODE (reg1) != REG)
1233 	reg1 = force_reg (Pmode, force_operand (reg1, 0));
1234 
1235       if (GET_CODE (reg2) != REG)
1236 	reg2 = force_reg (Pmode, force_operand (reg2, 0));
1237 
1238       /* Figure out what the base and index are.  */
1239 
1240       if (GET_CODE (reg1) == REG
1241 	  && REG_POINTER (reg1))
1242 	{
1243 	  base = reg1;
1244 	  idx = gen_rtx_PLUS (Pmode,
1245 			      gen_rtx_ASHIFT (Pmode,
1246 					      XEXP (XEXP (XEXP (x, 0), 0), 0),
1247 					      GEN_INT (shift_val)),
1248 			      XEXP (x, 1));
1249 	}
1250       else if (GET_CODE (reg2) == REG
1251 	       && REG_POINTER (reg2))
1252 	{
1253 	  base = reg2;
1254 	  idx = XEXP (x, 0);
1255 	}
1256 
1257       if (base == 0)
1258 	return orig;
1259 
1260       /* If the index adds a large constant, try to scale the
1261 	 constant so that it can be loaded with only one insn.  */
1262       if (GET_CODE (XEXP (idx, 1)) == CONST_INT
1263 	  && VAL_14_BITS_P (INTVAL (XEXP (idx, 1))
1264 			    / INTVAL (XEXP (XEXP (idx, 0), 1)))
1265 	  && INTVAL (XEXP (idx, 1)) % INTVAL (XEXP (XEXP (idx, 0), 1)) == 0)
1266 	{
1267 	  /* Divide the CONST_INT by the scale factor, then add it to A.  */
1268 	  int val = INTVAL (XEXP (idx, 1));
1269 	  val /= (1 << shift_val);
1270 
1271 	  reg1 = XEXP (XEXP (idx, 0), 0);
1272 	  if (GET_CODE (reg1) != REG)
1273 	    reg1 = force_reg (Pmode, force_operand (reg1, 0));
1274 
1275 	  reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, reg1, GEN_INT (val)));
1276 
1277 	  /* We can now generate a simple scaled indexed address.  */
1278 	  return
1279 	    force_reg
1280 	      (Pmode, gen_rtx_PLUS (Pmode,
1281 				    gen_rtx_ASHIFT (Pmode, reg1,
1282 						    GEN_INT (shift_val)),
1283 				    base));
1284 	}
1285 
1286       /* If B + C is still a valid base register, then add them.  */
1287       if (GET_CODE (XEXP (idx, 1)) == CONST_INT
1288 	  && INTVAL (XEXP (idx, 1)) <= 4096
1289 	  && INTVAL (XEXP (idx, 1)) >= -4096)
1290 	{
1291 	  rtx reg1, reg2;
1292 
1293 	  reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, XEXP (idx, 1)));
1294 
1295 	  reg2 = XEXP (XEXP (idx, 0), 0);
1296 	  if (GET_CODE (reg2) != CONST_INT)
1297 	    reg2 = force_reg (Pmode, force_operand (reg2, 0));
1298 
1299 	  return force_reg (Pmode,
1300 			    gen_rtx_PLUS (Pmode,
1301 					  gen_rtx_ASHIFT (Pmode, reg2,
1302 							  GEN_INT (shift_val)),
1303 					  reg1));
1304 	}
1305 
1306       /* Get the index into a register, then add the base + index and
1307 	 return a register holding the result.  */
1308 
1309       /* First get A into a register.  */
1310       reg1 = XEXP (XEXP (idx, 0), 0);
1311       if (GET_CODE (reg1) != REG)
1312 	reg1 = force_reg (Pmode, force_operand (reg1, 0));
1313 
1314       /* And get B into a register.  */
1315       reg2 = XEXP (idx, 1);
1316       if (GET_CODE (reg2) != REG)
1317 	reg2 = force_reg (Pmode, force_operand (reg2, 0));
1318 
1319       reg1 = force_reg (Pmode,
1320 			gen_rtx_PLUS (Pmode,
1321 				      gen_rtx_ASHIFT (Pmode, reg1,
1322 						      GEN_INT (shift_val)),
1323 				      reg2));
1324 
1325       /* Add the result to our base register and return.  */
1326       return force_reg (Pmode, gen_rtx_PLUS (Pmode, base, reg1));
1327 
1328     }
1329 
1330   /* Uh-oh.  We might have an address for x[n-100000].  This needs
1331      special handling to avoid creating an indexed memory address
1332      with x-100000 as the base.
1333 
1334      If the constant part is small enough, then it's still safe because
1335      there is a guard page at the beginning and end of the data segment.
1336 
1337      Scaled references are common enough that we want to try and rearrange the
1338      terms so that we can use indexing for these addresses too.  Only
1339      do the optimization for floatint point modes.  */
1340 
1341   if (GET_CODE (x) == PLUS
1342       && pa_symbolic_expression_p (XEXP (x, 1)))
1343     {
1344       /* Ugly.  We modify things here so that the address offset specified
1345 	 by the index expression is computed first, then added to x to form
1346 	 the entire address.  */
1347 
1348       rtx regx1, regx2, regy1, regy2, y;
1349 
1350       /* Strip off any CONST.  */
1351       y = XEXP (x, 1);
1352       if (GET_CODE (y) == CONST)
1353 	y = XEXP (y, 0);
1354 
1355       if (GET_CODE (y) == PLUS || GET_CODE (y) == MINUS)
1356 	{
1357 	  /* See if this looks like
1358 		(plus (mult (reg) (mem_shadd_const))
1359 		      (const (plus (symbol_ref) (const_int))))
1360 
1361 	     Where const_int is small.  In that case the const
1362 	     expression is a valid pointer for indexing.
1363 
1364 	     If const_int is big, but can be divided evenly by shadd_const
1365 	     and added to (reg).  This allows more scaled indexed addresses.  */
1366 	  if (GET_CODE (XEXP (y, 0)) == SYMBOL_REF
1367 	      && mem_shadd_or_shadd_rtx_p (XEXP (x, 0))
1368 	      && GET_CODE (XEXP (y, 1)) == CONST_INT
1369 	      && INTVAL (XEXP (y, 1)) >= -4096
1370 	      && INTVAL (XEXP (y, 1)) <= 4095)
1371 	    {
1372 	      int shift_val = INTVAL (XEXP (XEXP (x, 0), 1));
1373 
1374 	      /* If we were given a MULT, we must fix the constant
1375 		 as we're going to create the ASHIFT form.  */
1376 	      if (GET_CODE (XEXP (x, 0)) == MULT)
1377 		shift_val = exact_log2 (shift_val);
1378 
1379 	      rtx reg1, reg2;
1380 
1381 	      reg1 = XEXP (x, 1);
1382 	      if (GET_CODE (reg1) != REG)
1383 		reg1 = force_reg (Pmode, force_operand (reg1, 0));
1384 
1385 	      reg2 = XEXP (XEXP (x, 0), 0);
1386 	      if (GET_CODE (reg2) != REG)
1387 	        reg2 = force_reg (Pmode, force_operand (reg2, 0));
1388 
1389 	      return
1390 		force_reg (Pmode,
1391 			   gen_rtx_PLUS (Pmode,
1392 					 gen_rtx_ASHIFT (Pmode,
1393 							 reg2,
1394 							 GEN_INT (shift_val)),
1395 					 reg1));
1396 	    }
1397 	  else if ((mode == DFmode || mode == SFmode)
1398 		   && GET_CODE (XEXP (y, 0)) == SYMBOL_REF
1399 		   && mem_shadd_or_shadd_rtx_p (XEXP (x, 0))
1400 		   && GET_CODE (XEXP (y, 1)) == CONST_INT
1401 		   && INTVAL (XEXP (y, 1)) % (1 << INTVAL (XEXP (XEXP (x, 0), 1))) == 0)
1402 	    {
1403 	      int shift_val = INTVAL (XEXP (XEXP (x, 0), 1));
1404 
1405 	      /* If we were given a MULT, we must fix the constant
1406 		 as we're going to create the ASHIFT form.  */
1407 	      if (GET_CODE (XEXP (x, 0)) == MULT)
1408 		shift_val = exact_log2 (shift_val);
1409 
1410 	      regx1
1411 		= force_reg (Pmode, GEN_INT (INTVAL (XEXP (y, 1))
1412 					     / INTVAL (XEXP (XEXP (x, 0), 1))));
1413 	      regx2 = XEXP (XEXP (x, 0), 0);
1414 	      if (GET_CODE (regx2) != REG)
1415 		regx2 = force_reg (Pmode, force_operand (regx2, 0));
1416 	      regx2 = force_reg (Pmode, gen_rtx_fmt_ee (GET_CODE (y), Pmode,
1417 							regx2, regx1));
1418 	      return
1419 		force_reg (Pmode,
1420 			   gen_rtx_PLUS (Pmode,
1421 					 gen_rtx_ASHIFT (Pmode, regx2,
1422 						         GEN_INT (shift_val)),
1423 					 force_reg (Pmode, XEXP (y, 0))));
1424 	    }
1425 	  else if (GET_CODE (XEXP (y, 1)) == CONST_INT
1426 		   && INTVAL (XEXP (y, 1)) >= -4096
1427 		   && INTVAL (XEXP (y, 1)) <= 4095)
1428 	    {
1429 	      /* This is safe because of the guard page at the
1430 		 beginning and end of the data space.  Just
1431 		 return the original address.  */
1432 	      return orig;
1433 	    }
1434 	  else
1435 	    {
1436 	      /* Doesn't look like one we can optimize.  */
1437 	      regx1 = force_reg (Pmode, force_operand (XEXP (x, 0), 0));
1438 	      regy1 = force_reg (Pmode, force_operand (XEXP (y, 0), 0));
1439 	      regy2 = force_reg (Pmode, force_operand (XEXP (y, 1), 0));
1440 	      regx1 = force_reg (Pmode,
1441 				 gen_rtx_fmt_ee (GET_CODE (y), Pmode,
1442 						 regx1, regy2));
1443 	      return force_reg (Pmode, gen_rtx_PLUS (Pmode, regx1, regy1));
1444 	    }
1445 	}
1446     }
1447 
1448   return orig;
1449 }
1450 
1451 /* Implement the TARGET_REGISTER_MOVE_COST hook.
1452 
1453    Compute extra cost of moving data between one register class
1454    and another.
1455 
1456    Make moves from SAR so expensive they should never happen.  We used to
1457    have 0xffff here, but that generates overflow in rare cases.
1458 
1459    Copies involving a FP register and a non-FP register are relatively
1460    expensive because they must go through memory.
1461 
1462    Other copies are reasonably cheap.  */
1463 
1464 static int
hppa_register_move_cost(machine_mode mode ATTRIBUTE_UNUSED,reg_class_t from,reg_class_t to)1465 hppa_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
1466 			 reg_class_t from, reg_class_t to)
1467 {
1468   if (from == SHIFT_REGS)
1469     return 0x100;
1470   else if (to == SHIFT_REGS && FP_REG_CLASS_P (from))
1471     return 18;
1472   else if ((FP_REG_CLASS_P (from) && ! FP_REG_CLASS_P (to))
1473            || (FP_REG_CLASS_P (to) && ! FP_REG_CLASS_P (from)))
1474     return 16;
1475   else
1476     return 2;
1477 }
1478 
1479 /* For the HPPA, REG and REG+CONST is cost 0
1480    and addresses involving symbolic constants are cost 2.
1481 
1482    PIC addresses are very expensive.
1483 
1484    It is no coincidence that this has the same structure
1485    as pa_legitimate_address_p.  */
1486 
1487 static int
hppa_address_cost(rtx X,machine_mode mode ATTRIBUTE_UNUSED,addr_space_t as ATTRIBUTE_UNUSED,bool speed ATTRIBUTE_UNUSED)1488 hppa_address_cost (rtx X, machine_mode mode ATTRIBUTE_UNUSED,
1489 		   addr_space_t as ATTRIBUTE_UNUSED,
1490 		   bool speed ATTRIBUTE_UNUSED)
1491 {
1492   switch (GET_CODE (X))
1493     {
1494     case REG:
1495     case PLUS:
1496     case LO_SUM:
1497       return 1;
1498     case HIGH:
1499       return 2;
1500     default:
1501       return 4;
1502     }
1503 }
1504 
1505 /* Return true if X represents a (possibly non-canonical) shNadd pattern.
1506    The machine mode of X is known to be SImode or DImode.  */
1507 
1508 static bool
hppa_rtx_costs_shadd_p(rtx x)1509 hppa_rtx_costs_shadd_p (rtx x)
1510 {
1511   if (GET_CODE (x) != PLUS
1512       || !REG_P (XEXP (x, 1)))
1513     return false;
1514   rtx op0 = XEXP (x, 0);
1515   if (GET_CODE (op0) == ASHIFT
1516       && CONST_INT_P (XEXP (op0, 1))
1517       && REG_P (XEXP (op0, 0)))
1518     {
1519       unsigned HOST_WIDE_INT x = UINTVAL (XEXP (op0, 1));
1520       return x == 1 || x == 2 || x == 3;
1521     }
1522   if (GET_CODE (op0) == MULT
1523       && CONST_INT_P (XEXP (op0, 1))
1524       && REG_P (XEXP (op0, 0)))
1525     {
1526       unsigned HOST_WIDE_INT x = UINTVAL (XEXP (op0, 1));
1527       return x == 2 || x == 4 || x == 8;
1528     }
1529   return false;
1530 }
1531 
1532 /* Compute a (partial) cost for rtx X.  Return true if the complete
1533    cost has been computed, and false if subexpressions should be
1534    scanned.  In either case, *TOTAL contains the cost result.  */
1535 
1536 static bool
hppa_rtx_costs(rtx x,machine_mode mode,int outer_code,int opno ATTRIBUTE_UNUSED,int * total,bool speed)1537 hppa_rtx_costs (rtx x, machine_mode mode, int outer_code,
1538 		int opno ATTRIBUTE_UNUSED,
1539 		int *total, bool speed)
1540 {
1541   int code = GET_CODE (x);
1542 
1543   switch (code)
1544     {
1545     case CONST_INT:
1546       if (outer_code == SET)
1547 	*total = COSTS_N_INSNS (1);
1548       else if (INTVAL (x) == 0)
1549 	*total = 0;
1550       else if (INT_14_BITS (x))
1551 	*total = 1;
1552       else
1553 	*total = 2;
1554       return true;
1555 
1556     case HIGH:
1557       *total = 2;
1558       return true;
1559 
1560     case CONST:
1561     case LABEL_REF:
1562     case SYMBOL_REF:
1563       *total = 4;
1564       return true;
1565 
1566     case CONST_DOUBLE:
1567       if ((x == CONST0_RTX (DFmode) || x == CONST0_RTX (SFmode))
1568 	  && outer_code != SET)
1569 	*total = 0;
1570       else
1571 	*total = 8;
1572       return true;
1573 
1574     case MULT:
1575       if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1576 	{
1577 	  *total = COSTS_N_INSNS (3);
1578 	}
1579       else if (mode == DImode)
1580 	{
1581 	  if (TARGET_PA_11 && !TARGET_SOFT_FLOAT && !TARGET_SOFT_MULT)
1582 	    *total = COSTS_N_INSNS (25);
1583 	  else
1584 	    *total = COSTS_N_INSNS (80);
1585 	}
1586       else
1587 	{
1588 	  if (TARGET_PA_11 && !TARGET_SOFT_FLOAT && !TARGET_SOFT_MULT)
1589 	    *total = COSTS_N_INSNS (8);
1590 	  else
1591 	    *total = COSTS_N_INSNS (20);
1592 	}
1593       return REG_P (XEXP (x, 0)) && REG_P (XEXP (x, 1));
1594 
1595     case DIV:
1596       if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1597 	{
1598 	  *total = COSTS_N_INSNS (14);
1599 	  return false;
1600 	}
1601       /* FALLTHRU */
1602 
1603     case UDIV:
1604     case MOD:
1605     case UMOD:
1606       /* A mode size N times larger than SImode needs O(N*N) more insns.  */
1607       if (mode == DImode)
1608 	*total = COSTS_N_INSNS (240);
1609       else
1610 	*total = COSTS_N_INSNS (60);
1611       return REG_P (XEXP (x, 0)) && REG_P (XEXP (x, 1));
1612 
1613     case PLUS: /* this includes shNadd insns */
1614     case MINUS:
1615       if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1616 	*total = COSTS_N_INSNS (3);
1617       else if (mode == DImode)
1618 	{
1619 	  if (TARGET_64BIT)
1620 	    {
1621 	      *total = COSTS_N_INSNS (1);
1622 	      /* Handle shladd,l instructions.  */
1623 	      if (hppa_rtx_costs_shadd_p (x))
1624 		return true;
1625 	    }
1626 	  else
1627 	    *total = COSTS_N_INSNS (2);
1628 	}
1629       else
1630 	{
1631 	  *total = COSTS_N_INSNS (1);
1632 	  /* Handle shNadd instructions.  */
1633 	  if (hppa_rtx_costs_shadd_p (x))
1634 	    return true;
1635 	}
1636       return REG_P (XEXP (x, 0))
1637 	     && (REG_P (XEXP (x, 1))
1638 		 || CONST_INT_P (XEXP (x, 1)));
1639 
1640     case ASHIFT:
1641       if (mode == DImode)
1642 	{
1643 	  if (REG_P (XEXP (x, 0)) && CONST_INT_P (XEXP (x, 1)))
1644 	    {
1645 	      if (TARGET_64BIT)
1646 		*total = COSTS_N_INSNS (1);
1647 	      else
1648 		*total = COSTS_N_INSNS (2);
1649 	      return true;
1650 	    }
1651 	  else if (TARGET_64BIT)
1652 	    *total = COSTS_N_INSNS (3);
1653 	  else if (speed)
1654 	    *total = COSTS_N_INSNS (13);
1655 	  else
1656 	    *total = COSTS_N_INSNS (18);
1657 	}
1658       else if (REG_P (XEXP (x, 0)) && CONST_INT_P (XEXP (x, 1)))
1659 	{
1660 	  if (TARGET_64BIT)
1661 	    *total = COSTS_N_INSNS (2);
1662 	  else
1663 	    *total = COSTS_N_INSNS (1);
1664 	  return true;
1665 	}
1666       else if (TARGET_64BIT)
1667 	*total = COSTS_N_INSNS (4);
1668       else
1669 	*total = COSTS_N_INSNS (2);
1670       return REG_P (XEXP (x, 0))
1671 	     && (REG_P (XEXP (x, 1))
1672 		 || CONST_INT_P (XEXP (x, 1)));
1673 
1674     case ASHIFTRT:
1675       if (mode == DImode)
1676 	{
1677 	  if (REG_P (XEXP (x, 0)) && CONST_INT_P (XEXP (x, 1)))
1678 	    {
1679 	      if (TARGET_64BIT)
1680 		*total = COSTS_N_INSNS (1);
1681 	      else
1682 		*total = COSTS_N_INSNS (2);
1683 	      return true;
1684 	    }
1685 	  else if (TARGET_64BIT)
1686 	    *total = COSTS_N_INSNS (3);
1687 	  else if (speed)
1688 	    *total = COSTS_N_INSNS (14);
1689 	  else
1690 	    *total = COSTS_N_INSNS (19);
1691 	}
1692       else if (REG_P (XEXP (x, 0)) && CONST_INT_P (XEXP (x, 1)))
1693 	{
1694 	  if (TARGET_64BIT)
1695 	    *total = COSTS_N_INSNS (2);
1696 	  else
1697 	    *total = COSTS_N_INSNS (1);
1698 	  return true;
1699 	}
1700       else if (TARGET_64BIT)
1701 	*total = COSTS_N_INSNS (4);
1702       else
1703 	*total = COSTS_N_INSNS (2);
1704       return REG_P (XEXP (x, 0))
1705 	     && (REG_P (XEXP (x, 1))
1706 		 || CONST_INT_P (XEXP (x, 1)));
1707 
1708     case LSHIFTRT:
1709       if (mode == DImode)
1710 	{
1711 	  if (REG_P (XEXP (x, 0)) && CONST_INT_P (XEXP (x, 1)))
1712 	    {
1713 	      if (TARGET_64BIT)
1714 		*total = COSTS_N_INSNS (1);
1715 	      else
1716 		*total = COSTS_N_INSNS (2);
1717 	      return true;
1718 	    }
1719 	  else if (TARGET_64BIT)
1720 	    *total = COSTS_N_INSNS (2);
1721 	  else if (speed)
1722 	    *total = COSTS_N_INSNS (12);
1723 	  else
1724 	    *total = COSTS_N_INSNS (15);
1725 	}
1726       else if (REG_P (XEXP (x, 0)) && CONST_INT_P (XEXP (x, 1)))
1727 	{
1728 	  *total = COSTS_N_INSNS (1);
1729 	  return true;
1730 	}
1731       else if (TARGET_64BIT)
1732 	*total = COSTS_N_INSNS (3);
1733       else
1734 	*total = COSTS_N_INSNS (2);
1735       return REG_P (XEXP (x, 0))
1736 	     && (REG_P (XEXP (x, 1))
1737 		 || CONST_INT_P (XEXP (x, 1)));
1738 
1739     default:
1740       return false;
1741     }
1742 }
1743 
1744 /* Ensure mode of ORIG, a REG rtx, is MODE.  Returns either ORIG or a
1745    new rtx with the correct mode.  */
1746 static inline rtx
force_mode(machine_mode mode,rtx orig)1747 force_mode (machine_mode mode, rtx orig)
1748 {
1749   if (mode == GET_MODE (orig))
1750     return orig;
1751 
1752   gcc_assert (REGNO (orig) < FIRST_PSEUDO_REGISTER);
1753 
1754   return gen_rtx_REG (mode, REGNO (orig));
1755 }
1756 
1757 /* Implement TARGET_CANNOT_FORCE_CONST_MEM.  */
1758 
1759 static bool
pa_cannot_force_const_mem(machine_mode mode ATTRIBUTE_UNUSED,rtx x)1760 pa_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
1761 {
1762   return tls_referenced_p (x);
1763 }
1764 
1765 /* Emit insns to move operands[1] into operands[0].
1766 
1767    Return 1 if we have written out everything that needs to be done to
1768    do the move.  Otherwise, return 0 and the caller will emit the move
1769    normally.
1770 
1771    Note SCRATCH_REG may not be in the proper mode depending on how it
1772    will be used.  This routine is responsible for creating a new copy
1773    of SCRATCH_REG in the proper mode.  */
1774 
1775 int
pa_emit_move_sequence(rtx * operands,machine_mode mode,rtx scratch_reg)1776 pa_emit_move_sequence (rtx *operands, machine_mode mode, rtx scratch_reg)
1777 {
1778   rtx operand0 = operands[0];
1779   rtx operand1 = operands[1];
1780   rtx tem;
1781 
1782   /* We can only handle indexed addresses in the destination operand
1783      of floating point stores.  Thus, we need to break out indexed
1784      addresses from the destination operand.  */
1785   if (GET_CODE (operand0) == MEM && IS_INDEX_ADDR_P (XEXP (operand0, 0)))
1786     {
1787       gcc_assert (can_create_pseudo_p ());
1788 
1789       tem = copy_to_mode_reg (Pmode, XEXP (operand0, 0));
1790       operand0 = replace_equiv_address (operand0, tem);
1791     }
1792 
1793   /* On targets with non-equivalent space registers, break out unscaled
1794      indexed addresses from the source operand before the final CSE.
1795      We have to do this because the REG_POINTER flag is not correctly
1796      carried through various optimization passes and CSE may substitute
1797      a pseudo without the pointer set for one with the pointer set.  As
1798      a result, we loose various opportunities to create insns with
1799      unscaled indexed addresses.  */
1800   if (!TARGET_NO_SPACE_REGS
1801       && !cse_not_expected
1802       && GET_CODE (operand1) == MEM
1803       && GET_CODE (XEXP (operand1, 0)) == PLUS
1804       && REG_P (XEXP (XEXP (operand1, 0), 0))
1805       && REG_P (XEXP (XEXP (operand1, 0), 1)))
1806     operand1
1807       = replace_equiv_address (operand1,
1808 			       copy_to_mode_reg (Pmode, XEXP (operand1, 0)));
1809 
1810   if (scratch_reg
1811       && reload_in_progress && GET_CODE (operand0) == REG
1812       && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
1813     operand0 = reg_equiv_mem (REGNO (operand0));
1814   else if (scratch_reg
1815 	   && reload_in_progress && GET_CODE (operand0) == SUBREG
1816 	   && GET_CODE (SUBREG_REG (operand0)) == REG
1817 	   && REGNO (SUBREG_REG (operand0)) >= FIRST_PSEUDO_REGISTER)
1818     {
1819      /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1820 	the code which tracks sets/uses for delete_output_reload.  */
1821       rtx temp = gen_rtx_SUBREG (GET_MODE (operand0),
1822 				 reg_equiv_mem (REGNO (SUBREG_REG (operand0))),
1823 				 SUBREG_BYTE (operand0));
1824       operand0 = alter_subreg (&temp, true);
1825     }
1826 
1827   if (scratch_reg
1828       && reload_in_progress && GET_CODE (operand1) == REG
1829       && REGNO (operand1) >= FIRST_PSEUDO_REGISTER)
1830     operand1 = reg_equiv_mem (REGNO (operand1));
1831   else if (scratch_reg
1832 	   && reload_in_progress && GET_CODE (operand1) == SUBREG
1833 	   && GET_CODE (SUBREG_REG (operand1)) == REG
1834 	   && REGNO (SUBREG_REG (operand1)) >= FIRST_PSEUDO_REGISTER)
1835     {
1836      /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1837 	the code which tracks sets/uses for delete_output_reload.  */
1838       rtx temp = gen_rtx_SUBREG (GET_MODE (operand1),
1839 				 reg_equiv_mem (REGNO (SUBREG_REG (operand1))),
1840 				 SUBREG_BYTE (operand1));
1841       operand1 = alter_subreg (&temp, true);
1842     }
1843 
1844   if (scratch_reg && reload_in_progress && GET_CODE (operand0) == MEM
1845       && ((tem = find_replacement (&XEXP (operand0, 0)))
1846 	  != XEXP (operand0, 0)))
1847     operand0 = replace_equiv_address (operand0, tem);
1848 
1849   if (scratch_reg && reload_in_progress && GET_CODE (operand1) == MEM
1850       && ((tem = find_replacement (&XEXP (operand1, 0)))
1851 	  != XEXP (operand1, 0)))
1852     operand1 = replace_equiv_address (operand1, tem);
1853 
1854   /* Handle secondary reloads for loads/stores of FP registers from
1855      REG+D addresses where D does not fit in 5 or 14 bits, including
1856      (subreg (mem (addr))) cases, and reloads for other unsupported
1857      memory operands.  */
1858   if (scratch_reg
1859       && FP_REG_P (operand0)
1860       && (MEM_P (operand1)
1861 	  || (GET_CODE (operand1) == SUBREG
1862 	      && MEM_P (XEXP (operand1, 0)))))
1863     {
1864       rtx op1 = operand1;
1865 
1866       if (GET_CODE (op1) == SUBREG)
1867 	op1 = XEXP (op1, 0);
1868 
1869       if (reg_plus_base_memory_operand (op1, GET_MODE (op1)))
1870 	{
1871 	  if (!(TARGET_PA_20
1872 		&& !TARGET_ELF32
1873 		&& INT_14_BITS (XEXP (XEXP (op1, 0), 1)))
1874 	      && !INT_5_BITS (XEXP (XEXP (op1, 0), 1)))
1875 	    {
1876 	      /* SCRATCH_REG will hold an address and maybe the actual data.
1877 		 We want it in WORD_MODE regardless of what mode it was
1878 		 originally given to us.  */
1879 	      scratch_reg = force_mode (word_mode, scratch_reg);
1880 
1881 	      /* D might not fit in 14 bits either; for such cases load D
1882 		 into scratch reg.  */
1883 	      if (!INT_14_BITS (XEXP (XEXP (op1, 0), 1)))
1884 		{
1885 		  emit_move_insn (scratch_reg, XEXP (XEXP (op1, 0), 1));
1886 		  emit_move_insn (scratch_reg,
1887 				  gen_rtx_fmt_ee (GET_CODE (XEXP (op1, 0)),
1888 						  Pmode,
1889 						  XEXP (XEXP (op1, 0), 0),
1890 						  scratch_reg));
1891 		}
1892 	      else
1893 		emit_move_insn (scratch_reg, XEXP (op1, 0));
1894 	      op1 = replace_equiv_address (op1, scratch_reg);
1895 	    }
1896 	}
1897       else if ((!INT14_OK_STRICT && symbolic_memory_operand (op1, VOIDmode))
1898 	       || IS_LO_SUM_DLT_ADDR_P (XEXP (op1, 0))
1899 	       || IS_INDEX_ADDR_P (XEXP (op1, 0)))
1900 	{
1901 	  /* Load memory address into SCRATCH_REG.  */
1902 	  scratch_reg = force_mode (word_mode, scratch_reg);
1903 	  emit_move_insn (scratch_reg, XEXP (op1, 0));
1904 	  op1 = replace_equiv_address (op1, scratch_reg);
1905 	}
1906       emit_insn (gen_rtx_SET (operand0, op1));
1907       return 1;
1908     }
1909   else if (scratch_reg
1910 	   && FP_REG_P (operand1)
1911 	   && (MEM_P (operand0)
1912 	       || (GET_CODE (operand0) == SUBREG
1913 		   && MEM_P (XEXP (operand0, 0)))))
1914     {
1915       rtx op0 = operand0;
1916 
1917       if (GET_CODE (op0) == SUBREG)
1918 	op0 = XEXP (op0, 0);
1919 
1920       if (reg_plus_base_memory_operand (op0, GET_MODE (op0)))
1921 	{
1922 	  if (!(TARGET_PA_20
1923 		&& !TARGET_ELF32
1924 		&& INT_14_BITS (XEXP (XEXP (op0, 0), 1)))
1925 	      && !INT_5_BITS (XEXP (XEXP (op0, 0), 1)))
1926 	    {
1927 	      /* SCRATCH_REG will hold an address and maybe the actual data.
1928 		 We want it in WORD_MODE regardless of what mode it was
1929 		 originally given to us.  */
1930 	      scratch_reg = force_mode (word_mode, scratch_reg);
1931 
1932 	      /* D might not fit in 14 bits either; for such cases load D
1933 		 into scratch reg.  */
1934 	      if (!INT_14_BITS (XEXP (XEXP (op0, 0), 1)))
1935 		{
1936 		  emit_move_insn (scratch_reg, XEXP (XEXP (op0, 0), 1));
1937 		  emit_move_insn (scratch_reg,
1938 				  gen_rtx_fmt_ee (GET_CODE (XEXP (op0, 0)),
1939 						  Pmode,
1940 						  XEXP (XEXP (op0, 0), 0),
1941 						  scratch_reg));
1942 		}
1943 	      else
1944 		emit_move_insn (scratch_reg, XEXP (op0, 0));
1945 	      op0 = replace_equiv_address (op0, scratch_reg);
1946 	    }
1947 	}
1948       else if ((!INT14_OK_STRICT && symbolic_memory_operand (op0, VOIDmode))
1949 	       || IS_LO_SUM_DLT_ADDR_P (XEXP (op0, 0))
1950 	       || IS_INDEX_ADDR_P (XEXP (op0, 0)))
1951 	{
1952 	  /* Load memory address into SCRATCH_REG.  */
1953 	  scratch_reg = force_mode (word_mode, scratch_reg);
1954 	  emit_move_insn (scratch_reg, XEXP (op0, 0));
1955 	  op0 = replace_equiv_address (op0, scratch_reg);
1956 	}
1957       emit_insn (gen_rtx_SET (op0, operand1));
1958       return 1;
1959     }
1960   /* Handle secondary reloads for loads of FP registers from constant
1961      expressions by forcing the constant into memory.  For the most part,
1962      this is only necessary for SImode and DImode.
1963 
1964      Use scratch_reg to hold the address of the memory location.  */
1965   else if (scratch_reg
1966 	   && CONSTANT_P (operand1)
1967 	   && FP_REG_P (operand0))
1968     {
1969       rtx const_mem, xoperands[2];
1970 
1971       if (operand1 == CONST0_RTX (mode))
1972 	{
1973 	  emit_insn (gen_rtx_SET (operand0, operand1));
1974 	  return 1;
1975 	}
1976 
1977       /* SCRATCH_REG will hold an address and maybe the actual data.  We want
1978 	 it in WORD_MODE regardless of what mode it was originally given
1979 	 to us.  */
1980       scratch_reg = force_mode (word_mode, scratch_reg);
1981 
1982       /* Force the constant into memory and put the address of the
1983 	 memory location into scratch_reg.  */
1984       const_mem = force_const_mem (mode, operand1);
1985       xoperands[0] = scratch_reg;
1986       xoperands[1] = XEXP (const_mem, 0);
1987       pa_emit_move_sequence (xoperands, Pmode, 0);
1988 
1989       /* Now load the destination register.  */
1990       emit_insn (gen_rtx_SET (operand0,
1991 			      replace_equiv_address (const_mem, scratch_reg)));
1992       return 1;
1993     }
1994   /* Handle secondary reloads for SAR.  These occur when trying to load
1995      the SAR from memory or a constant.  */
1996   else if (scratch_reg
1997 	   && GET_CODE (operand0) == REG
1998 	   && REGNO (operand0) < FIRST_PSEUDO_REGISTER
1999 	   && REGNO_REG_CLASS (REGNO (operand0)) == SHIFT_REGS
2000 	   && (GET_CODE (operand1) == MEM || GET_CODE (operand1) == CONST_INT))
2001     {
2002       /* D might not fit in 14 bits either; for such cases load D into
2003 	 scratch reg.  */
2004       if (GET_CODE (operand1) == MEM
2005 	  && !memory_address_p (GET_MODE (operand0), XEXP (operand1, 0)))
2006 	{
2007 	  /* We are reloading the address into the scratch register, so we
2008 	     want to make sure the scratch register is a full register.  */
2009 	  scratch_reg = force_mode (word_mode, scratch_reg);
2010 
2011 	  emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1));
2012 	  emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand1,
2013 								        0)),
2014 						       Pmode,
2015 						       XEXP (XEXP (operand1, 0),
2016 						       0),
2017 						       scratch_reg));
2018 
2019 	  /* Now we are going to load the scratch register from memory,
2020 	     we want to load it in the same width as the original MEM,
2021 	     which must be the same as the width of the ultimate destination,
2022 	     OPERAND0.  */
2023 	  scratch_reg = force_mode (GET_MODE (operand0), scratch_reg);
2024 
2025 	  emit_move_insn (scratch_reg,
2026 			  replace_equiv_address (operand1, scratch_reg));
2027 	}
2028       else
2029 	{
2030 	  /* We want to load the scratch register using the same mode as
2031 	     the ultimate destination.  */
2032 	  scratch_reg = force_mode (GET_MODE (operand0), scratch_reg);
2033 
2034 	  emit_move_insn (scratch_reg, operand1);
2035 	}
2036 
2037       /* And emit the insn to set the ultimate destination.  We know that
2038 	 the scratch register has the same mode as the destination at this
2039 	 point.  */
2040       emit_move_insn (operand0, scratch_reg);
2041       return 1;
2042     }
2043 
2044   /* Handle the most common case: storing into a register.  */
2045   if (register_operand (operand0, mode))
2046     {
2047       /* Legitimize TLS symbol references.  This happens for references
2048 	 that aren't a legitimate constant.  */
2049       if (PA_SYMBOL_REF_TLS_P (operand1))
2050 	operand1 = legitimize_tls_address (operand1);
2051 
2052       if (register_operand (operand1, mode)
2053 	  || (GET_CODE (operand1) == CONST_INT
2054 	      && pa_cint_ok_for_move (UINTVAL (operand1)))
2055 	  || (operand1 == CONST0_RTX (mode))
2056 	  || (GET_CODE (operand1) == HIGH
2057 	      && !symbolic_operand (XEXP (operand1, 0), VOIDmode))
2058 	  /* Only `general_operands' can come here, so MEM is ok.  */
2059 	  || GET_CODE (operand1) == MEM)
2060 	{
2061 	  /* Various sets are created during RTL generation which don't
2062 	     have the REG_POINTER flag correctly set.  After the CSE pass,
2063 	     instruction recognition can fail if we don't consistently
2064 	     set this flag when performing register copies.  This should
2065 	     also improve the opportunities for creating insns that use
2066 	     unscaled indexing.  */
2067 	  if (REG_P (operand0) && REG_P (operand1))
2068 	    {
2069 	      if (REG_POINTER (operand1)
2070 		  && !REG_POINTER (operand0)
2071 		  && !HARD_REGISTER_P (operand0))
2072 		copy_reg_pointer (operand0, operand1);
2073 	    }
2074 
2075 	  /* When MEMs are broken out, the REG_POINTER flag doesn't
2076 	     get set.  In some cases, we can set the REG_POINTER flag
2077 	     from the declaration for the MEM.  */
2078 	  if (REG_P (operand0)
2079 	      && GET_CODE (operand1) == MEM
2080 	      && !REG_POINTER (operand0))
2081 	    {
2082 	      tree decl = MEM_EXPR (operand1);
2083 
2084 	      /* Set the register pointer flag and register alignment
2085 		 if the declaration for this memory reference is a
2086 		 pointer type.  */
2087 	      if (decl)
2088 		{
2089 		  tree type;
2090 
2091 		  /* If this is a COMPONENT_REF, use the FIELD_DECL from
2092 		     tree operand 1.  */
2093 		  if (TREE_CODE (decl) == COMPONENT_REF)
2094 		    decl = TREE_OPERAND (decl, 1);
2095 
2096 		  type = TREE_TYPE (decl);
2097 		  type = strip_array_types (type);
2098 
2099 		  if (POINTER_TYPE_P (type))
2100 		    mark_reg_pointer (operand0, BITS_PER_UNIT);
2101 		}
2102 	    }
2103 
2104 	  emit_insn (gen_rtx_SET (operand0, operand1));
2105 	  return 1;
2106 	}
2107     }
2108   else if (GET_CODE (operand0) == MEM)
2109     {
2110       if (mode == DFmode && operand1 == CONST0_RTX (mode)
2111 	  && !(reload_in_progress || reload_completed))
2112 	{
2113 	  rtx temp = gen_reg_rtx (DFmode);
2114 
2115 	  emit_insn (gen_rtx_SET (temp, operand1));
2116 	  emit_insn (gen_rtx_SET (operand0, temp));
2117 	  return 1;
2118 	}
2119       if (register_operand (operand1, mode) || operand1 == CONST0_RTX (mode))
2120 	{
2121 	  /* Run this case quickly.  */
2122 	  emit_insn (gen_rtx_SET (operand0, operand1));
2123 	  return 1;
2124 	}
2125       if (! (reload_in_progress || reload_completed))
2126 	{
2127 	  operands[0] = validize_mem (operand0);
2128 	  operands[1] = operand1 = force_reg (mode, operand1);
2129 	}
2130     }
2131 
2132   /* Simplify the source if we need to.
2133      Note we do have to handle function labels here, even though we do
2134      not consider them legitimate constants.  Loop optimizations can
2135      call the emit_move_xxx with one as a source.  */
2136   if ((GET_CODE (operand1) != HIGH && immediate_operand (operand1, mode))
2137       || (GET_CODE (operand1) == HIGH
2138 	  && symbolic_operand (XEXP (operand1, 0), mode))
2139       || function_label_operand (operand1, VOIDmode)
2140       || tls_referenced_p (operand1))
2141     {
2142       int ishighonly = 0;
2143 
2144       if (GET_CODE (operand1) == HIGH)
2145 	{
2146 	  ishighonly = 1;
2147 	  operand1 = XEXP (operand1, 0);
2148 	}
2149       if (symbolic_operand (operand1, mode))
2150 	{
2151 	  /* Argh.  The assembler and linker can't handle arithmetic
2152 	     involving plabels.
2153 
2154 	     So we force the plabel into memory, load operand0 from
2155 	     the memory location, then add in the constant part.  */
2156 	  if ((GET_CODE (operand1) == CONST
2157 	       && GET_CODE (XEXP (operand1, 0)) == PLUS
2158 	       && function_label_operand (XEXP (XEXP (operand1, 0), 0),
2159 					  VOIDmode))
2160 	      || function_label_operand (operand1, VOIDmode))
2161 	    {
2162 	      rtx temp, const_part;
2163 
2164 	      /* Figure out what (if any) scratch register to use.  */
2165 	      if (reload_in_progress || reload_completed)
2166 		{
2167 		  scratch_reg = scratch_reg ? scratch_reg : operand0;
2168 		  /* SCRATCH_REG will hold an address and maybe the actual
2169 		     data.  We want it in WORD_MODE regardless of what mode it
2170 		     was originally given to us.  */
2171 		  scratch_reg = force_mode (word_mode, scratch_reg);
2172 		}
2173 	      else if (flag_pic)
2174 		scratch_reg = gen_reg_rtx (Pmode);
2175 
2176 	      if (GET_CODE (operand1) == CONST)
2177 		{
2178 		  /* Save away the constant part of the expression.  */
2179 		  const_part = XEXP (XEXP (operand1, 0), 1);
2180 		  gcc_assert (GET_CODE (const_part) == CONST_INT);
2181 
2182 		  /* Force the function label into memory.  */
2183 		  temp = force_const_mem (mode, XEXP (XEXP (operand1, 0), 0));
2184 		}
2185 	      else
2186 		{
2187 		  /* No constant part.  */
2188 		  const_part = NULL_RTX;
2189 
2190 		  /* Force the function label into memory.  */
2191 		  temp = force_const_mem (mode, operand1);
2192 		}
2193 
2194 
2195 	      /* Get the address of the memory location.  PIC-ify it if
2196 		 necessary.  */
2197 	      temp = XEXP (temp, 0);
2198 	      if (flag_pic)
2199 		temp = legitimize_pic_address (temp, mode, scratch_reg);
2200 
2201 	      /* Put the address of the memory location into our destination
2202 		 register.  */
2203 	      operands[1] = temp;
2204 	      pa_emit_move_sequence (operands, mode, scratch_reg);
2205 
2206 	      /* Now load from the memory location into our destination
2207 		 register.  */
2208 	      operands[1] = gen_rtx_MEM (Pmode, operands[0]);
2209 	      pa_emit_move_sequence (operands, mode, scratch_reg);
2210 
2211 	      /* And add back in the constant part.  */
2212 	      if (const_part != NULL_RTX)
2213 		expand_inc (operand0, const_part);
2214 
2215 	      return 1;
2216 	    }
2217 
2218 	  if (flag_pic)
2219 	    {
2220 	      rtx_insn *insn;
2221 	      rtx temp;
2222 
2223 	      if (reload_in_progress || reload_completed)
2224 		{
2225 		  temp = scratch_reg ? scratch_reg : operand0;
2226 		  /* TEMP will hold an address and maybe the actual
2227 		     data.  We want it in WORD_MODE regardless of what mode it
2228 		     was originally given to us.  */
2229 		  temp = force_mode (word_mode, temp);
2230 		}
2231 	      else
2232 		temp = gen_reg_rtx (Pmode);
2233 
2234 	      /* Force (const (plus (symbol) (const_int))) to memory
2235 	         if the const_int will not fit in 14 bits.  Although
2236 		 this requires a relocation, the instruction sequence
2237 		 needed to load the value is shorter.  */
2238 	      if (GET_CODE (operand1) == CONST
2239 		       && GET_CODE (XEXP (operand1, 0)) == PLUS
2240 		       && GET_CODE (XEXP (XEXP (operand1, 0), 1)) == CONST_INT
2241 		       && !INT_14_BITS (XEXP (XEXP (operand1, 0), 1)))
2242 		{
2243 		  rtx x, m = force_const_mem (mode, operand1);
2244 
2245 		  x = legitimize_pic_address (XEXP (m, 0), mode, temp);
2246 		  x = replace_equiv_address (m, x);
2247 		  insn = emit_move_insn (operand0, x);
2248 		}
2249 	      else
2250 		{
2251 		  operands[1] = legitimize_pic_address (operand1, mode, temp);
2252 		  if (REG_P (operand0) && REG_P (operands[1]))
2253 		    copy_reg_pointer (operand0, operands[1]);
2254 		  insn = emit_move_insn (operand0, operands[1]);
2255 		}
2256 
2257 	      /* Put a REG_EQUAL note on this insn.  */
2258 	      set_unique_reg_note (insn, REG_EQUAL, operand1);
2259 	    }
2260 	  /* On the HPPA, references to data space are supposed to use dp,
2261 	     register 27, but showing it in the RTL inhibits various cse
2262 	     and loop optimizations.  */
2263 	  else
2264 	    {
2265 	      rtx temp, set;
2266 
2267 	      if (reload_in_progress || reload_completed)
2268 		{
2269 		  temp = scratch_reg ? scratch_reg : operand0;
2270 		  /* TEMP will hold an address and maybe the actual
2271 		     data.  We want it in WORD_MODE regardless of what mode it
2272 		     was originally given to us.  */
2273 		  temp = force_mode (word_mode, temp);
2274 		}
2275 	      else
2276 		temp = gen_reg_rtx (mode);
2277 
2278 	      /* Loading a SYMBOL_REF into a register makes that register
2279 		 safe to be used as the base in an indexed address.
2280 
2281 		 Don't mark hard registers though.  That loses.  */
2282 	      if (GET_CODE (operand0) == REG
2283 		  && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
2284 		mark_reg_pointer (operand0, BITS_PER_UNIT);
2285 	      if (REGNO (temp) >= FIRST_PSEUDO_REGISTER)
2286 		mark_reg_pointer (temp, BITS_PER_UNIT);
2287 
2288 	      if (ishighonly)
2289 		set = gen_rtx_SET (operand0, temp);
2290 	      else
2291 		set = gen_rtx_SET (operand0,
2292 				   gen_rtx_LO_SUM (mode, temp, operand1));
2293 
2294 	      emit_insn (gen_rtx_SET (temp, gen_rtx_HIGH (mode, operand1)));
2295 	      emit_insn (set);
2296 
2297 	    }
2298 	  return 1;
2299 	}
2300       else if (tls_referenced_p (operand1))
2301 	{
2302 	  rtx tmp = operand1;
2303 	  rtx addend = NULL;
2304 
2305 	  if (GET_CODE (tmp) == CONST && GET_CODE (XEXP (tmp, 0)) == PLUS)
2306 	    {
2307 	      addend = XEXP (XEXP (tmp, 0), 1);
2308 	      tmp = XEXP (XEXP (tmp, 0), 0);
2309 	    }
2310 
2311 	  gcc_assert (GET_CODE (tmp) == SYMBOL_REF);
2312 	  tmp = legitimize_tls_address (tmp);
2313 	  if (addend)
2314 	    {
2315 	      tmp = gen_rtx_PLUS (mode, tmp, addend);
2316 	      tmp = force_operand (tmp, operands[0]);
2317 	    }
2318 	  operands[1] = tmp;
2319 	}
2320       else if (GET_CODE (operand1) != CONST_INT
2321 	       || !pa_cint_ok_for_move (UINTVAL (operand1)))
2322 	{
2323 	  rtx temp;
2324 	  rtx_insn *insn;
2325 	  rtx op1 = operand1;
2326 	  HOST_WIDE_INT value = 0;
2327 	  HOST_WIDE_INT insv = 0;
2328 	  int insert = 0;
2329 
2330 	  if (GET_CODE (operand1) == CONST_INT)
2331 	    value = INTVAL (operand1);
2332 
2333 	  if (TARGET_64BIT
2334 	      && GET_CODE (operand1) == CONST_INT
2335 	      && HOST_BITS_PER_WIDE_INT > 32
2336 	      && GET_MODE_BITSIZE (GET_MODE (operand0)) > 32)
2337 	    {
2338 	      HOST_WIDE_INT nval;
2339 
2340 	      /* Extract the low order 32 bits of the value and sign extend.
2341 		 If the new value is the same as the original value, we can
2342 		 can use the original value as-is.  If the new value is
2343 		 different, we use it and insert the most-significant 32-bits
2344 		 of the original value into the final result.  */
2345 	      nval = ((value & (((HOST_WIDE_INT) 2 << 31) - 1))
2346 		      ^ ((HOST_WIDE_INT) 1 << 31)) - ((HOST_WIDE_INT) 1 << 31);
2347 	      if (value != nval)
2348 		{
2349 #if HOST_BITS_PER_WIDE_INT > 32
2350 		  insv = value >= 0 ? value >> 32 : ~(~value >> 32);
2351 #endif
2352 		  insert = 1;
2353 		  value = nval;
2354 		  operand1 = GEN_INT (nval);
2355 		}
2356 	    }
2357 
2358 	  if (reload_in_progress || reload_completed)
2359 	    temp = scratch_reg ? scratch_reg : operand0;
2360 	  else
2361 	    temp = gen_reg_rtx (mode);
2362 
2363 	  /* We don't directly split DImode constants on 32-bit targets
2364 	     because PLUS uses an 11-bit immediate and the insn sequence
2365 	     generated is not as efficient as the one using HIGH/LO_SUM.  */
2366 	  if (GET_CODE (operand1) == CONST_INT
2367 	      && GET_MODE_BITSIZE (mode) <= BITS_PER_WORD
2368 	      && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
2369 	      && !insert)
2370 	    {
2371 	      /* Directly break constant into high and low parts.  This
2372 		 provides better optimization opportunities because various
2373 		 passes recognize constants split with PLUS but not LO_SUM.
2374 		 We use a 14-bit signed low part except when the addition
2375 		 of 0x4000 to the high part might change the sign of the
2376 		 high part.  */
2377 	      HOST_WIDE_INT low = value & 0x3fff;
2378 	      HOST_WIDE_INT high = value & ~ 0x3fff;
2379 
2380 	      if (low >= 0x2000)
2381 		{
2382 		  if (high == 0x7fffc000 || (mode == HImode && high == 0x4000))
2383 		    high += 0x2000;
2384 		  else
2385 		    high += 0x4000;
2386 		}
2387 
2388 	      low = value - high;
2389 
2390 	      emit_insn (gen_rtx_SET (temp, GEN_INT (high)));
2391 	      operands[1] = gen_rtx_PLUS (mode, temp, GEN_INT (low));
2392 	    }
2393 	  else
2394 	    {
2395 	      emit_insn (gen_rtx_SET (temp, gen_rtx_HIGH (mode, operand1)));
2396 	      operands[1] = gen_rtx_LO_SUM (mode, temp, operand1);
2397 	    }
2398 
2399 	  insn = emit_move_insn (operands[0], operands[1]);
2400 
2401 	  /* Now insert the most significant 32 bits of the value
2402 	     into the register.  When we don't have a second register
2403 	     available, it could take up to nine instructions to load
2404 	     a 64-bit integer constant.  Prior to reload, we force
2405 	     constants that would take more than three instructions
2406 	     to load to the constant pool.  During and after reload,
2407 	     we have to handle all possible values.  */
2408 	  if (insert)
2409 	    {
2410 	      /* Use a HIGH/LO_SUM/INSV sequence if we have a second
2411 		 register and the value to be inserted is outside the
2412 		 range that can be loaded with three depdi instructions.  */
2413 	      if (temp != operand0 && (insv >= 16384 || insv < -16384))
2414 		{
2415 		  operand1 = GEN_INT (insv);
2416 
2417 		  emit_insn (gen_rtx_SET (temp,
2418 					  gen_rtx_HIGH (mode, operand1)));
2419 		  emit_move_insn (temp, gen_rtx_LO_SUM (mode, temp, operand1));
2420 		  if (mode == DImode)
2421 		    insn = emit_insn (gen_insvdi (operand0, GEN_INT (32),
2422 						  const0_rtx, temp));
2423 		  else
2424 		    insn = emit_insn (gen_insvsi (operand0, GEN_INT (32),
2425 						  const0_rtx, temp));
2426 		}
2427 	      else
2428 		{
2429 		  int len = 5, pos = 27;
2430 
2431 		  /* Insert the bits using the depdi instruction.  */
2432 		  while (pos >= 0)
2433 		    {
2434 		      HOST_WIDE_INT v5 = ((insv & 31) ^ 16) - 16;
2435 		      HOST_WIDE_INT sign = v5 < 0;
2436 
2437 		      /* Left extend the insertion.  */
2438 		      insv = (insv >= 0 ? insv >> len : ~(~insv >> len));
2439 		      while (pos > 0 && (insv & 1) == sign)
2440 			{
2441 			  insv = (insv >= 0 ? insv >> 1 : ~(~insv >> 1));
2442 			  len += 1;
2443 			  pos -= 1;
2444 			}
2445 
2446 		      if (mode == DImode)
2447 			insn = emit_insn (gen_insvdi (operand0,
2448 						      GEN_INT (len),
2449 						      GEN_INT (pos),
2450 						      GEN_INT (v5)));
2451 		      else
2452 			insn = emit_insn (gen_insvsi (operand0,
2453 						      GEN_INT (len),
2454 						      GEN_INT (pos),
2455 						      GEN_INT (v5)));
2456 
2457 		      len = pos > 0 && pos < 5 ? pos : 5;
2458 		      pos -= len;
2459 		    }
2460 		}
2461 	    }
2462 
2463 	  set_unique_reg_note (insn, REG_EQUAL, op1);
2464 
2465 	  return 1;
2466 	}
2467     }
2468   /* Now have insn-emit do whatever it normally does.  */
2469   return 0;
2470 }
2471 
2472 /* Examine EXP and return nonzero if it contains an ADDR_EXPR (meaning
2473    it will need a link/runtime reloc).  */
2474 
2475 int
pa_reloc_needed(tree exp)2476 pa_reloc_needed (tree exp)
2477 {
2478   int reloc = 0;
2479 
2480   switch (TREE_CODE (exp))
2481     {
2482     case ADDR_EXPR:
2483       return 1;
2484 
2485     case POINTER_PLUS_EXPR:
2486     case PLUS_EXPR:
2487     case MINUS_EXPR:
2488       reloc = pa_reloc_needed (TREE_OPERAND (exp, 0));
2489       reloc |= pa_reloc_needed (TREE_OPERAND (exp, 1));
2490       break;
2491 
2492     CASE_CONVERT:
2493     case NON_LVALUE_EXPR:
2494       reloc = pa_reloc_needed (TREE_OPERAND (exp, 0));
2495       break;
2496 
2497     case CONSTRUCTOR:
2498       {
2499 	tree value;
2500 	unsigned HOST_WIDE_INT ix;
2501 
2502 	FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (exp), ix, value)
2503 	  if (value)
2504 	    reloc |= pa_reloc_needed (value);
2505       }
2506       break;
2507 
2508     case ERROR_MARK:
2509       break;
2510 
2511     default:
2512       break;
2513     }
2514   return reloc;
2515 }
2516 
2517 
2518 /* Return the best assembler insn template
2519    for moving operands[1] into operands[0] as a fullword.  */
2520 const char *
pa_singlemove_string(rtx * operands)2521 pa_singlemove_string (rtx *operands)
2522 {
2523   HOST_WIDE_INT intval;
2524 
2525   if (GET_CODE (operands[0]) == MEM)
2526     return "stw %r1,%0";
2527   if (GET_CODE (operands[1]) == MEM)
2528     return "ldw %1,%0";
2529   if (GET_CODE (operands[1]) == CONST_DOUBLE)
2530     {
2531       long i;
2532 
2533       gcc_assert (GET_MODE (operands[1]) == SFmode);
2534 
2535       /* Translate the CONST_DOUBLE to a CONST_INT with the same target
2536 	 bit pattern.  */
2537       REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (operands[1]), i);
2538 
2539       operands[1] = GEN_INT (i);
2540       /* Fall through to CONST_INT case.  */
2541     }
2542   if (GET_CODE (operands[1]) == CONST_INT)
2543     {
2544       intval = INTVAL (operands[1]);
2545 
2546       if (VAL_14_BITS_P (intval))
2547 	return "ldi %1,%0";
2548       else if ((intval & 0x7ff) == 0)
2549 	return "ldil L'%1,%0";
2550       else if (pa_zdepi_cint_p (intval))
2551 	return "{zdepi %Z1,%0|depwi,z %Z1,%0}";
2552       else
2553 	return "ldil L'%1,%0\n\tldo R'%1(%0),%0";
2554     }
2555   return "copy %1,%0";
2556 }
2557 
2558 
2559 /* Compute position (in OP[1]) and width (in OP[2])
2560    useful for copying IMM to a register using the zdepi
2561    instructions.  Store the immediate value to insert in OP[0].  */
2562 static void
compute_zdepwi_operands(unsigned HOST_WIDE_INT imm,unsigned * op)2563 compute_zdepwi_operands (unsigned HOST_WIDE_INT imm, unsigned *op)
2564 {
2565   int lsb, len;
2566 
2567   /* Find the least significant set bit in IMM.  */
2568   for (lsb = 0; lsb < 32; lsb++)
2569     {
2570       if ((imm & 1) != 0)
2571         break;
2572       imm >>= 1;
2573     }
2574 
2575   /* Choose variants based on *sign* of the 5-bit field.  */
2576   if ((imm & 0x10) == 0)
2577     len = (lsb <= 28) ? 4 : 32 - lsb;
2578   else
2579     {
2580       /* Find the width of the bitstring in IMM.  */
2581       for (len = 5; len < 32 - lsb; len++)
2582 	{
2583 	  if ((imm & ((unsigned HOST_WIDE_INT) 1 << len)) == 0)
2584 	    break;
2585 	}
2586 
2587       /* Sign extend IMM as a 5-bit value.  */
2588       imm = (imm & 0xf) - 0x10;
2589     }
2590 
2591   op[0] = imm;
2592   op[1] = 31 - lsb;
2593   op[2] = len;
2594 }
2595 
2596 /* Compute position (in OP[1]) and width (in OP[2])
2597    useful for copying IMM to a register using the depdi,z
2598    instructions.  Store the immediate value to insert in OP[0].  */
2599 
2600 static void
compute_zdepdi_operands(unsigned HOST_WIDE_INT imm,unsigned * op)2601 compute_zdepdi_operands (unsigned HOST_WIDE_INT imm, unsigned *op)
2602 {
2603   int lsb, len, maxlen;
2604 
2605   maxlen = MIN (HOST_BITS_PER_WIDE_INT, 64);
2606 
2607   /* Find the least significant set bit in IMM.  */
2608   for (lsb = 0; lsb < maxlen; lsb++)
2609     {
2610       if ((imm & 1) != 0)
2611         break;
2612       imm >>= 1;
2613     }
2614 
2615   /* Choose variants based on *sign* of the 5-bit field.  */
2616   if ((imm & 0x10) == 0)
2617     len = (lsb <= maxlen - 4) ? 4 : maxlen - lsb;
2618   else
2619     {
2620       /* Find the width of the bitstring in IMM.  */
2621       for (len = 5; len < maxlen - lsb; len++)
2622 	{
2623 	  if ((imm & ((unsigned HOST_WIDE_INT) 1 << len)) == 0)
2624 	    break;
2625 	}
2626 
2627       /* Extend length if host is narrow and IMM is negative.  */
2628       if (HOST_BITS_PER_WIDE_INT == 32 && len == maxlen - lsb)
2629 	len += 32;
2630 
2631       /* Sign extend IMM as a 5-bit value.  */
2632       imm = (imm & 0xf) - 0x10;
2633     }
2634 
2635   op[0] = imm;
2636   op[1] = 63 - lsb;
2637   op[2] = len;
2638 }
2639 
2640 /* Output assembler code to perform a doubleword move insn
2641    with operands OPERANDS.  */
2642 
2643 const char *
pa_output_move_double(rtx * operands)2644 pa_output_move_double (rtx *operands)
2645 {
2646   enum { REGOP, OFFSOP, MEMOP, CNSTOP, RNDOP } optype0, optype1;
2647   rtx latehalf[2];
2648   rtx addreg0 = 0, addreg1 = 0;
2649   int highonly = 0;
2650 
2651   /* First classify both operands.  */
2652 
2653   if (REG_P (operands[0]))
2654     optype0 = REGOP;
2655   else if (offsettable_memref_p (operands[0]))
2656     optype0 = OFFSOP;
2657   else if (GET_CODE (operands[0]) == MEM)
2658     optype0 = MEMOP;
2659   else
2660     optype0 = RNDOP;
2661 
2662   if (REG_P (operands[1]))
2663     optype1 = REGOP;
2664   else if (CONSTANT_P (operands[1]))
2665     optype1 = CNSTOP;
2666   else if (offsettable_memref_p (operands[1]))
2667     optype1 = OFFSOP;
2668   else if (GET_CODE (operands[1]) == MEM)
2669     optype1 = MEMOP;
2670   else
2671     optype1 = RNDOP;
2672 
2673   /* Check for the cases that the operand constraints are not
2674      supposed to allow to happen.  */
2675   gcc_assert (optype0 == REGOP || optype1 == REGOP);
2676 
2677   /* Handle copies between general and floating registers.  */
2678 
2679   if (optype0 == REGOP && optype1 == REGOP
2680       && FP_REG_P (operands[0]) ^ FP_REG_P (operands[1]))
2681     {
2682       if (FP_REG_P (operands[0]))
2683 	{
2684 	  output_asm_insn ("{stws|stw} %1,-16(%%sp)", operands);
2685 	  output_asm_insn ("{stws|stw} %R1,-12(%%sp)", operands);
2686 	  return "{fldds|fldd} -16(%%sp),%0";
2687 	}
2688       else
2689 	{
2690 	  output_asm_insn ("{fstds|fstd} %1,-16(%%sp)", operands);
2691 	  output_asm_insn ("{ldws|ldw} -16(%%sp),%0", operands);
2692 	  return "{ldws|ldw} -12(%%sp),%R0";
2693 	}
2694     }
2695 
2696    /* Handle auto decrementing and incrementing loads and stores
2697      specifically, since the structure of the function doesn't work
2698      for them without major modification.  Do it better when we learn
2699      this port about the general inc/dec addressing of PA.
2700      (This was written by tege.  Chide him if it doesn't work.)  */
2701 
2702   if (optype0 == MEMOP)
2703     {
2704       /* We have to output the address syntax ourselves, since print_operand
2705 	 doesn't deal with the addresses we want to use.  Fix this later.  */
2706 
2707       rtx addr = XEXP (operands[0], 0);
2708       if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
2709 	{
2710 	  rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
2711 
2712 	  operands[0] = XEXP (addr, 0);
2713 	  gcc_assert (GET_CODE (operands[1]) == REG
2714 		      && GET_CODE (operands[0]) == REG);
2715 
2716 	  gcc_assert (!reg_overlap_mentioned_p (high_reg, addr));
2717 
2718 	  /* No overlap between high target register and address
2719 	     register.  (We do this in a non-obvious way to
2720 	     save a register file writeback)  */
2721 	  if (GET_CODE (addr) == POST_INC)
2722 	    return "{stws|stw},ma %1,8(%0)\n\tstw %R1,-4(%0)";
2723 	  return "{stws|stw},ma %1,-8(%0)\n\tstw %R1,12(%0)";
2724 	}
2725       else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
2726 	{
2727 	  rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
2728 
2729 	  operands[0] = XEXP (addr, 0);
2730 	  gcc_assert (GET_CODE (operands[1]) == REG
2731 		      && GET_CODE (operands[0]) == REG);
2732 
2733 	  gcc_assert (!reg_overlap_mentioned_p (high_reg, addr));
2734 	  /* No overlap between high target register and address
2735 	     register.  (We do this in a non-obvious way to save a
2736 	     register file writeback)  */
2737 	  if (GET_CODE (addr) == PRE_INC)
2738 	    return "{stws|stw},mb %1,8(%0)\n\tstw %R1,4(%0)";
2739 	  return "{stws|stw},mb %1,-8(%0)\n\tstw %R1,4(%0)";
2740 	}
2741     }
2742   if (optype1 == MEMOP)
2743     {
2744       /* We have to output the address syntax ourselves, since print_operand
2745 	 doesn't deal with the addresses we want to use.  Fix this later.  */
2746 
2747       rtx addr = XEXP (operands[1], 0);
2748       if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
2749 	{
2750 	  rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2751 
2752 	  operands[1] = XEXP (addr, 0);
2753 	  gcc_assert (GET_CODE (operands[0]) == REG
2754 		      && GET_CODE (operands[1]) == REG);
2755 
2756 	  if (!reg_overlap_mentioned_p (high_reg, addr))
2757 	    {
2758 	      /* No overlap between high target register and address
2759 		 register.  (We do this in a non-obvious way to
2760 		 save a register file writeback)  */
2761 	      if (GET_CODE (addr) == POST_INC)
2762 		return "{ldws|ldw},ma 8(%1),%0\n\tldw -4(%1),%R0";
2763 	      return "{ldws|ldw},ma -8(%1),%0\n\tldw 12(%1),%R0";
2764 	    }
2765 	  else
2766 	    {
2767 	      /* This is an undefined situation.  We should load into the
2768 		 address register *and* update that register.  Probably
2769 		 we don't need to handle this at all.  */
2770 	      if (GET_CODE (addr) == POST_INC)
2771 		return "ldw 4(%1),%R0\n\t{ldws|ldw},ma 8(%1),%0";
2772 	      return "ldw 4(%1),%R0\n\t{ldws|ldw},ma -8(%1),%0";
2773 	    }
2774 	}
2775       else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
2776 	{
2777 	  rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2778 
2779 	  operands[1] = XEXP (addr, 0);
2780 	  gcc_assert (GET_CODE (operands[0]) == REG
2781 		      && GET_CODE (operands[1]) == REG);
2782 
2783 	  if (!reg_overlap_mentioned_p (high_reg, addr))
2784 	    {
2785 	      /* No overlap between high target register and address
2786 		 register.  (We do this in a non-obvious way to
2787 		 save a register file writeback)  */
2788 	      if (GET_CODE (addr) == PRE_INC)
2789 		return "{ldws|ldw},mb 8(%1),%0\n\tldw 4(%1),%R0";
2790 	      return "{ldws|ldw},mb -8(%1),%0\n\tldw 4(%1),%R0";
2791 	    }
2792 	  else
2793 	    {
2794 	      /* This is an undefined situation.  We should load into the
2795 		 address register *and* update that register.  Probably
2796 		 we don't need to handle this at all.  */
2797 	      if (GET_CODE (addr) == PRE_INC)
2798 		return "ldw 12(%1),%R0\n\t{ldws|ldw},mb 8(%1),%0";
2799 	      return "ldw -4(%1),%R0\n\t{ldws|ldw},mb -8(%1),%0";
2800 	    }
2801 	}
2802       else if (GET_CODE (addr) == PLUS
2803 	       && GET_CODE (XEXP (addr, 0)) == MULT)
2804 	{
2805 	  rtx xoperands[4];
2806 
2807 	  /* Load address into left half of destination register.  */
2808 	  xoperands[0] = gen_rtx_SUBREG (SImode, operands[0], 0);
2809 	  xoperands[1] = XEXP (addr, 1);
2810 	  xoperands[2] = XEXP (XEXP (addr, 0), 0);
2811 	  xoperands[3] = XEXP (XEXP (addr, 0), 1);
2812 	  output_asm_insn ("{sh%O3addl %2,%1,%0|shladd,l %2,%O3,%1,%0}",
2813 			   xoperands);
2814 	  return "ldw 4(%0),%R0\n\tldw 0(%0),%0";
2815 	}
2816       else if (GET_CODE (addr) == PLUS
2817 	       && REG_P (XEXP (addr, 0))
2818 	       && REG_P (XEXP (addr, 1)))
2819 	{
2820 	  rtx xoperands[3];
2821 
2822 	  /* Load address into left half of destination register.  */
2823 	  xoperands[0] = gen_rtx_SUBREG (SImode, operands[0], 0);
2824 	  xoperands[1] = XEXP (addr, 0);
2825 	  xoperands[2] = XEXP (addr, 1);
2826 	  output_asm_insn ("{addl|add,l} %1,%2,%0",
2827 			   xoperands);
2828 	  return "ldw 4(%0),%R0\n\tldw 0(%0),%0";
2829 	}
2830     }
2831 
2832   /* If an operand is an unoffsettable memory ref, find a register
2833      we can increment temporarily to make it refer to the second word.  */
2834 
2835   if (optype0 == MEMOP)
2836     addreg0 = find_addr_reg (XEXP (operands[0], 0));
2837 
2838   if (optype1 == MEMOP)
2839     addreg1 = find_addr_reg (XEXP (operands[1], 0));
2840 
2841   /* Ok, we can do one word at a time.
2842      Normally we do the low-numbered word first.
2843 
2844      In either case, set up in LATEHALF the operands to use
2845      for the high-numbered word and in some cases alter the
2846      operands in OPERANDS to be suitable for the low-numbered word.  */
2847 
2848   if (optype0 == REGOP)
2849     latehalf[0] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
2850   else if (optype0 == OFFSOP)
2851     latehalf[0] = adjust_address_nv (operands[0], SImode, 4);
2852   else
2853     latehalf[0] = operands[0];
2854 
2855   if (optype1 == REGOP)
2856     latehalf[1] = gen_rtx_REG (SImode, REGNO (operands[1]) + 1);
2857   else if (optype1 == OFFSOP)
2858     latehalf[1] = adjust_address_nv (operands[1], SImode, 4);
2859   else if (optype1 == CNSTOP)
2860     {
2861       if (GET_CODE (operands[1]) == HIGH)
2862 	{
2863 	  operands[1] = XEXP (operands[1], 0);
2864 	  highonly = 1;
2865 	}
2866       split_double (operands[1], &operands[1], &latehalf[1]);
2867     }
2868   else
2869     latehalf[1] = operands[1];
2870 
2871   /* If the first move would clobber the source of the second one,
2872      do them in the other order.
2873 
2874      This can happen in two cases:
2875 
2876 	mem -> register where the first half of the destination register
2877  	is the same register used in the memory's address.  Reload
2878 	can create such insns.
2879 
2880 	mem in this case will be either register indirect or register
2881 	indirect plus a valid offset.
2882 
2883 	register -> register move where REGNO(dst) == REGNO(src + 1)
2884 	someone (Tim/Tege?) claimed this can happen for parameter loads.
2885 
2886      Handle mem -> register case first.  */
2887   if (optype0 == REGOP
2888       && (optype1 == MEMOP || optype1 == OFFSOP)
2889       && refers_to_regno_p (REGNO (operands[0]), operands[1]))
2890     {
2891       /* Do the late half first.  */
2892       if (addreg1)
2893 	output_asm_insn ("ldo 4(%0),%0", &addreg1);
2894       output_asm_insn (pa_singlemove_string (latehalf), latehalf);
2895 
2896       /* Then clobber.  */
2897       if (addreg1)
2898 	output_asm_insn ("ldo -4(%0),%0", &addreg1);
2899       return pa_singlemove_string (operands);
2900     }
2901 
2902   /* Now handle register -> register case.  */
2903   if (optype0 == REGOP && optype1 == REGOP
2904       && REGNO (operands[0]) == REGNO (operands[1]) + 1)
2905     {
2906       output_asm_insn (pa_singlemove_string (latehalf), latehalf);
2907       return pa_singlemove_string (operands);
2908     }
2909 
2910   /* Normal case: do the two words, low-numbered first.  */
2911 
2912   output_asm_insn (pa_singlemove_string (operands), operands);
2913 
2914   /* Make any unoffsettable addresses point at high-numbered word.  */
2915   if (addreg0)
2916     output_asm_insn ("ldo 4(%0),%0", &addreg0);
2917   if (addreg1)
2918     output_asm_insn ("ldo 4(%0),%0", &addreg1);
2919 
2920   /* Do high-numbered word.  */
2921   if (highonly)
2922     output_asm_insn ("ldil L'%1,%0", latehalf);
2923   else
2924     output_asm_insn (pa_singlemove_string (latehalf), latehalf);
2925 
2926   /* Undo the adds we just did.  */
2927   if (addreg0)
2928     output_asm_insn ("ldo -4(%0),%0", &addreg0);
2929   if (addreg1)
2930     output_asm_insn ("ldo -4(%0),%0", &addreg1);
2931 
2932   return "";
2933 }
2934 
2935 const char *
pa_output_fp_move_double(rtx * operands)2936 pa_output_fp_move_double (rtx *operands)
2937 {
2938   if (FP_REG_P (operands[0]))
2939     {
2940       if (FP_REG_P (operands[1])
2941 	  || operands[1] == CONST0_RTX (GET_MODE (operands[0])))
2942 	output_asm_insn ("fcpy,dbl %f1,%0", operands);
2943       else
2944 	output_asm_insn ("fldd%F1 %1,%0", operands);
2945     }
2946   else if (FP_REG_P (operands[1]))
2947     {
2948       output_asm_insn ("fstd%F0 %1,%0", operands);
2949     }
2950   else
2951     {
2952       rtx xoperands[2];
2953 
2954       gcc_assert (operands[1] == CONST0_RTX (GET_MODE (operands[0])));
2955 
2956       /* This is a pain.  You have to be prepared to deal with an
2957 	 arbitrary address here including pre/post increment/decrement.
2958 
2959 	 so avoid this in the MD.  */
2960       gcc_assert (GET_CODE (operands[0]) == REG);
2961 
2962       xoperands[1] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
2963       xoperands[0] = operands[0];
2964       output_asm_insn ("copy %%r0,%0\n\tcopy %%r0,%1", xoperands);
2965     }
2966   return "";
2967 }
2968 
2969 /* Return a REG that occurs in ADDR with coefficient 1.
2970    ADDR can be effectively incremented by incrementing REG.  */
2971 
2972 static rtx
find_addr_reg(rtx addr)2973 find_addr_reg (rtx addr)
2974 {
2975   while (GET_CODE (addr) == PLUS)
2976     {
2977       if (GET_CODE (XEXP (addr, 0)) == REG)
2978 	addr = XEXP (addr, 0);
2979       else if (GET_CODE (XEXP (addr, 1)) == REG)
2980 	addr = XEXP (addr, 1);
2981       else if (CONSTANT_P (XEXP (addr, 0)))
2982 	addr = XEXP (addr, 1);
2983       else if (CONSTANT_P (XEXP (addr, 1)))
2984 	addr = XEXP (addr, 0);
2985       else
2986 	gcc_unreachable ();
2987     }
2988   gcc_assert (GET_CODE (addr) == REG);
2989   return addr;
2990 }
2991 
2992 /* Emit code to perform a block move.
2993 
2994    OPERANDS[0] is the destination pointer as a REG, clobbered.
2995    OPERANDS[1] is the source pointer as a REG, clobbered.
2996    OPERANDS[2] is a register for temporary storage.
2997    OPERANDS[3] is a register for temporary storage.
2998    OPERANDS[4] is the size as a CONST_INT
2999    OPERANDS[5] is the alignment safe to use, as a CONST_INT.
3000    OPERANDS[6] is another temporary register.  */
3001 
3002 const char *
pa_output_block_move(rtx * operands,int size_is_constant ATTRIBUTE_UNUSED)3003 pa_output_block_move (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED)
3004 {
3005   int align = INTVAL (operands[5]);
3006   unsigned long n_bytes = INTVAL (operands[4]);
3007 
3008   /* We can't move more than a word at a time because the PA
3009      has no longer integer move insns.  (Could use fp mem ops?)  */
3010   if (align > (TARGET_64BIT ? 8 : 4))
3011     align = (TARGET_64BIT ? 8 : 4);
3012 
3013   /* Note that we know each loop below will execute at least twice
3014      (else we would have open-coded the copy).  */
3015   switch (align)
3016     {
3017       case 8:
3018 	/* Pre-adjust the loop counter.  */
3019 	operands[4] = GEN_INT (n_bytes - 16);
3020 	output_asm_insn ("ldi %4,%2", operands);
3021 
3022 	/* Copying loop.  */
3023 	output_asm_insn ("ldd,ma 8(%1),%3", operands);
3024 	output_asm_insn ("ldd,ma 8(%1),%6", operands);
3025 	output_asm_insn ("std,ma %3,8(%0)", operands);
3026 	output_asm_insn ("addib,>= -16,%2,.-12", operands);
3027 	output_asm_insn ("std,ma %6,8(%0)", operands);
3028 
3029 	/* Handle the residual.  There could be up to 7 bytes of
3030 	   residual to copy!  */
3031 	if (n_bytes % 16 != 0)
3032 	  {
3033 	    operands[4] = GEN_INT (n_bytes % 8);
3034 	    if (n_bytes % 16 >= 8)
3035 	      output_asm_insn ("ldd,ma 8(%1),%3", operands);
3036 	    if (n_bytes % 8 != 0)
3037 	      output_asm_insn ("ldd 0(%1),%6", operands);
3038 	    if (n_bytes % 16 >= 8)
3039 	      output_asm_insn ("std,ma %3,8(%0)", operands);
3040 	    if (n_bytes % 8 != 0)
3041 	      output_asm_insn ("stdby,e %6,%4(%0)", operands);
3042 	  }
3043 	return "";
3044 
3045       case 4:
3046 	/* Pre-adjust the loop counter.  */
3047 	operands[4] = GEN_INT (n_bytes - 8);
3048 	output_asm_insn ("ldi %4,%2", operands);
3049 
3050 	/* Copying loop.  */
3051 	output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands);
3052 	output_asm_insn ("{ldws|ldw},ma 4(%1),%6", operands);
3053 	output_asm_insn ("{stws|stw},ma %3,4(%0)", operands);
3054 	output_asm_insn ("addib,>= -8,%2,.-12", operands);
3055 	output_asm_insn ("{stws|stw},ma %6,4(%0)", operands);
3056 
3057 	/* Handle the residual.  There could be up to 7 bytes of
3058 	   residual to copy!  */
3059 	if (n_bytes % 8 != 0)
3060 	  {
3061 	    operands[4] = GEN_INT (n_bytes % 4);
3062 	    if (n_bytes % 8 >= 4)
3063 	      output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands);
3064 	    if (n_bytes % 4 != 0)
3065 	      output_asm_insn ("ldw 0(%1),%6", operands);
3066 	    if (n_bytes % 8 >= 4)
3067 	      output_asm_insn ("{stws|stw},ma %3,4(%0)", operands);
3068 	    if (n_bytes % 4 != 0)
3069 	      output_asm_insn ("{stbys|stby},e %6,%4(%0)", operands);
3070 	  }
3071 	return "";
3072 
3073       case 2:
3074 	/* Pre-adjust the loop counter.  */
3075 	operands[4] = GEN_INT (n_bytes - 4);
3076 	output_asm_insn ("ldi %4,%2", operands);
3077 
3078 	/* Copying loop.  */
3079 	output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands);
3080 	output_asm_insn ("{ldhs|ldh},ma 2(%1),%6", operands);
3081 	output_asm_insn ("{sths|sth},ma %3,2(%0)", operands);
3082 	output_asm_insn ("addib,>= -4,%2,.-12", operands);
3083 	output_asm_insn ("{sths|sth},ma %6,2(%0)", operands);
3084 
3085 	/* Handle the residual.  */
3086 	if (n_bytes % 4 != 0)
3087 	  {
3088 	    if (n_bytes % 4 >= 2)
3089 	      output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands);
3090 	    if (n_bytes % 2 != 0)
3091 	      output_asm_insn ("ldb 0(%1),%6", operands);
3092 	    if (n_bytes % 4 >= 2)
3093 	      output_asm_insn ("{sths|sth},ma %3,2(%0)", operands);
3094 	    if (n_bytes % 2 != 0)
3095 	      output_asm_insn ("stb %6,0(%0)", operands);
3096 	  }
3097 	return "";
3098 
3099       case 1:
3100 	/* Pre-adjust the loop counter.  */
3101 	operands[4] = GEN_INT (n_bytes - 2);
3102 	output_asm_insn ("ldi %4,%2", operands);
3103 
3104 	/* Copying loop.  */
3105 	output_asm_insn ("{ldbs|ldb},ma 1(%1),%3", operands);
3106 	output_asm_insn ("{ldbs|ldb},ma 1(%1),%6", operands);
3107 	output_asm_insn ("{stbs|stb},ma %3,1(%0)", operands);
3108 	output_asm_insn ("addib,>= -2,%2,.-12", operands);
3109 	output_asm_insn ("{stbs|stb},ma %6,1(%0)", operands);
3110 
3111 	/* Handle the residual.  */
3112 	if (n_bytes % 2 != 0)
3113 	  {
3114 	    output_asm_insn ("ldb 0(%1),%3", operands);
3115 	    output_asm_insn ("stb %3,0(%0)", operands);
3116 	  }
3117 	return "";
3118 
3119       default:
3120 	gcc_unreachable ();
3121     }
3122 }
3123 
3124 /* Count the number of insns necessary to handle this block move.
3125 
3126    Basic structure is the same as emit_block_move, except that we
3127    count insns rather than emit them.  */
3128 
3129 static int
compute_cpymem_length(rtx_insn * insn)3130 compute_cpymem_length (rtx_insn *insn)
3131 {
3132   rtx pat = PATTERN (insn);
3133   unsigned int align = INTVAL (XEXP (XVECEXP (pat, 0, 7), 0));
3134   unsigned long n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 6), 0));
3135   unsigned int n_insns = 0;
3136 
3137   /* We can't move more than four bytes at a time because the PA
3138      has no longer integer move insns.  (Could use fp mem ops?)  */
3139   if (align > (TARGET_64BIT ? 8 : 4))
3140     align = (TARGET_64BIT ? 8 : 4);
3141 
3142   /* The basic copying loop.  */
3143   n_insns = 6;
3144 
3145   /* Residuals.  */
3146   if (n_bytes % (2 * align) != 0)
3147     {
3148       if ((n_bytes % (2 * align)) >= align)
3149 	n_insns += 2;
3150 
3151       if ((n_bytes % align) != 0)
3152 	n_insns += 2;
3153     }
3154 
3155   /* Lengths are expressed in bytes now; each insn is 4 bytes.  */
3156   return n_insns * 4;
3157 }
3158 
3159 /* Emit code to perform a block clear.
3160 
3161    OPERANDS[0] is the destination pointer as a REG, clobbered.
3162    OPERANDS[1] is a register for temporary storage.
3163    OPERANDS[2] is the size as a CONST_INT
3164    OPERANDS[3] is the alignment safe to use, as a CONST_INT.  */
3165 
3166 const char *
pa_output_block_clear(rtx * operands,int size_is_constant ATTRIBUTE_UNUSED)3167 pa_output_block_clear (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED)
3168 {
3169   int align = INTVAL (operands[3]);
3170   unsigned long n_bytes = INTVAL (operands[2]);
3171 
3172   /* We can't clear more than a word at a time because the PA
3173      has no longer integer move insns.  */
3174   if (align > (TARGET_64BIT ? 8 : 4))
3175     align = (TARGET_64BIT ? 8 : 4);
3176 
3177   /* Note that we know each loop below will execute at least twice
3178      (else we would have open-coded the copy).  */
3179   switch (align)
3180     {
3181       case 8:
3182 	/* Pre-adjust the loop counter.  */
3183 	operands[2] = GEN_INT (n_bytes - 16);
3184 	output_asm_insn ("ldi %2,%1", operands);
3185 
3186 	/* Loop.  */
3187 	output_asm_insn ("std,ma %%r0,8(%0)", operands);
3188 	output_asm_insn ("addib,>= -16,%1,.-4", operands);
3189 	output_asm_insn ("std,ma %%r0,8(%0)", operands);
3190 
3191 	/* Handle the residual.  There could be up to 7 bytes of
3192 	   residual to copy!  */
3193 	if (n_bytes % 16 != 0)
3194 	  {
3195 	    operands[2] = GEN_INT (n_bytes % 8);
3196 	    if (n_bytes % 16 >= 8)
3197 	      output_asm_insn ("std,ma %%r0,8(%0)", operands);
3198 	    if (n_bytes % 8 != 0)
3199 	      output_asm_insn ("stdby,e %%r0,%2(%0)", operands);
3200 	  }
3201 	return "";
3202 
3203       case 4:
3204 	/* Pre-adjust the loop counter.  */
3205 	operands[2] = GEN_INT (n_bytes - 8);
3206 	output_asm_insn ("ldi %2,%1", operands);
3207 
3208 	/* Loop.  */
3209 	output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
3210 	output_asm_insn ("addib,>= -8,%1,.-4", operands);
3211 	output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
3212 
3213 	/* Handle the residual.  There could be up to 7 bytes of
3214 	   residual to copy!  */
3215 	if (n_bytes % 8 != 0)
3216 	  {
3217 	    operands[2] = GEN_INT (n_bytes % 4);
3218 	    if (n_bytes % 8 >= 4)
3219 	      output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
3220 	    if (n_bytes % 4 != 0)
3221 	      output_asm_insn ("{stbys|stby},e %%r0,%2(%0)", operands);
3222 	  }
3223 	return "";
3224 
3225       case 2:
3226 	/* Pre-adjust the loop counter.  */
3227 	operands[2] = GEN_INT (n_bytes - 4);
3228 	output_asm_insn ("ldi %2,%1", operands);
3229 
3230 	/* Loop.  */
3231 	output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
3232 	output_asm_insn ("addib,>= -4,%1,.-4", operands);
3233 	output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
3234 
3235 	/* Handle the residual.  */
3236 	if (n_bytes % 4 != 0)
3237 	  {
3238 	    if (n_bytes % 4 >= 2)
3239 	      output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
3240 	    if (n_bytes % 2 != 0)
3241 	      output_asm_insn ("stb %%r0,0(%0)", operands);
3242 	  }
3243 	return "";
3244 
3245       case 1:
3246 	/* Pre-adjust the loop counter.  */
3247 	operands[2] = GEN_INT (n_bytes - 2);
3248 	output_asm_insn ("ldi %2,%1", operands);
3249 
3250 	/* Loop.  */
3251 	output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands);
3252 	output_asm_insn ("addib,>= -2,%1,.-4", operands);
3253 	output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands);
3254 
3255 	/* Handle the residual.  */
3256 	if (n_bytes % 2 != 0)
3257 	  output_asm_insn ("stb %%r0,0(%0)", operands);
3258 
3259 	return "";
3260 
3261       default:
3262 	gcc_unreachable ();
3263     }
3264 }
3265 
3266 /* Count the number of insns necessary to handle this block move.
3267 
3268    Basic structure is the same as emit_block_move, except that we
3269    count insns rather than emit them.  */
3270 
3271 static int
compute_clrmem_length(rtx_insn * insn)3272 compute_clrmem_length (rtx_insn *insn)
3273 {
3274   rtx pat = PATTERN (insn);
3275   unsigned int align = INTVAL (XEXP (XVECEXP (pat, 0, 4), 0));
3276   unsigned long n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 3), 0));
3277   unsigned int n_insns = 0;
3278 
3279   /* We can't clear more than a word at a time because the PA
3280      has no longer integer move insns.  */
3281   if (align > (TARGET_64BIT ? 8 : 4))
3282     align = (TARGET_64BIT ? 8 : 4);
3283 
3284   /* The basic loop.  */
3285   n_insns = 4;
3286 
3287   /* Residuals.  */
3288   if (n_bytes % (2 * align) != 0)
3289     {
3290       if ((n_bytes % (2 * align)) >= align)
3291 	n_insns++;
3292 
3293       if ((n_bytes % align) != 0)
3294 	n_insns++;
3295     }
3296 
3297   /* Lengths are expressed in bytes now; each insn is 4 bytes.  */
3298   return n_insns * 4;
3299 }
3300 
3301 
3302 const char *
pa_output_and(rtx * operands)3303 pa_output_and (rtx *operands)
3304 {
3305   if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
3306     {
3307       unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3308       int ls0, ls1, ms0, p, len;
3309 
3310       for (ls0 = 0; ls0 < 32; ls0++)
3311 	if ((mask & (1 << ls0)) == 0)
3312 	  break;
3313 
3314       for (ls1 = ls0; ls1 < 32; ls1++)
3315 	if ((mask & (1 << ls1)) != 0)
3316 	  break;
3317 
3318       for (ms0 = ls1; ms0 < 32; ms0++)
3319 	if ((mask & (1 << ms0)) == 0)
3320 	  break;
3321 
3322       gcc_assert (ms0 == 32);
3323 
3324       if (ls1 == 32)
3325 	{
3326 	  len = ls0;
3327 
3328 	  gcc_assert (len);
3329 
3330 	  operands[2] = GEN_INT (len);
3331 	  return "{extru|extrw,u} %1,31,%2,%0";
3332 	}
3333       else
3334 	{
3335 	  /* We could use this `depi' for the case above as well, but `depi'
3336 	     requires one more register file access than an `extru'.  */
3337 
3338 	  p = 31 - ls0;
3339 	  len = ls1 - ls0;
3340 
3341 	  operands[2] = GEN_INT (p);
3342 	  operands[3] = GEN_INT (len);
3343 	  return "{depi|depwi} 0,%2,%3,%0";
3344 	}
3345     }
3346   else
3347     return "and %1,%2,%0";
3348 }
3349 
3350 /* Return a string to perform a bitwise-and of operands[1] with operands[2]
3351    storing the result in operands[0].  */
3352 const char *
pa_output_64bit_and(rtx * operands)3353 pa_output_64bit_and (rtx *operands)
3354 {
3355   if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
3356     {
3357       unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3358       int ls0, ls1, ms0, p, len;
3359 
3360       for (ls0 = 0; ls0 < HOST_BITS_PER_WIDE_INT; ls0++)
3361 	if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls0)) == 0)
3362 	  break;
3363 
3364       for (ls1 = ls0; ls1 < HOST_BITS_PER_WIDE_INT; ls1++)
3365 	if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls1)) != 0)
3366 	  break;
3367 
3368       for (ms0 = ls1; ms0 < HOST_BITS_PER_WIDE_INT; ms0++)
3369 	if ((mask & ((unsigned HOST_WIDE_INT) 1 << ms0)) == 0)
3370 	  break;
3371 
3372       gcc_assert (ms0 == HOST_BITS_PER_WIDE_INT);
3373 
3374       if (ls1 == HOST_BITS_PER_WIDE_INT)
3375 	{
3376 	  len = ls0;
3377 
3378 	  gcc_assert (len);
3379 
3380 	  operands[2] = GEN_INT (len);
3381 	  return "extrd,u %1,63,%2,%0";
3382 	}
3383       else
3384 	{
3385 	  /* We could use this `depi' for the case above as well, but `depi'
3386 	     requires one more register file access than an `extru'.  */
3387 
3388 	  p = 63 - ls0;
3389 	  len = ls1 - ls0;
3390 
3391 	  operands[2] = GEN_INT (p);
3392 	  operands[3] = GEN_INT (len);
3393 	  return "depdi 0,%2,%3,%0";
3394 	}
3395     }
3396   else
3397     return "and %1,%2,%0";
3398 }
3399 
3400 const char *
pa_output_ior(rtx * operands)3401 pa_output_ior (rtx *operands)
3402 {
3403   unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3404   int bs0, bs1, p, len;
3405 
3406   if (INTVAL (operands[2]) == 0)
3407     return "copy %1,%0";
3408 
3409   for (bs0 = 0; bs0 < 32; bs0++)
3410     if ((mask & (1 << bs0)) != 0)
3411       break;
3412 
3413   for (bs1 = bs0; bs1 < 32; bs1++)
3414     if ((mask & (1 << bs1)) == 0)
3415       break;
3416 
3417   gcc_assert (bs1 == 32 || ((unsigned HOST_WIDE_INT) 1 << bs1) > mask);
3418 
3419   p = 31 - bs0;
3420   len = bs1 - bs0;
3421 
3422   operands[2] = GEN_INT (p);
3423   operands[3] = GEN_INT (len);
3424   return "{depi|depwi} -1,%2,%3,%0";
3425 }
3426 
3427 /* Return a string to perform a bitwise-and of operands[1] with operands[2]
3428    storing the result in operands[0].  */
3429 const char *
pa_output_64bit_ior(rtx * operands)3430 pa_output_64bit_ior (rtx *operands)
3431 {
3432   unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3433   int bs0, bs1, p, len;
3434 
3435   if (INTVAL (operands[2]) == 0)
3436     return "copy %1,%0";
3437 
3438   for (bs0 = 0; bs0 < HOST_BITS_PER_WIDE_INT; bs0++)
3439     if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs0)) != 0)
3440       break;
3441 
3442   for (bs1 = bs0; bs1 < HOST_BITS_PER_WIDE_INT; bs1++)
3443     if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs1)) == 0)
3444       break;
3445 
3446   gcc_assert (bs1 == HOST_BITS_PER_WIDE_INT
3447 	      || ((unsigned HOST_WIDE_INT) 1 << bs1) > mask);
3448 
3449   p = 63 - bs0;
3450   len = bs1 - bs0;
3451 
3452   operands[2] = GEN_INT (p);
3453   operands[3] = GEN_INT (len);
3454   return "depdi -1,%2,%3,%0";
3455 }
3456 
3457 /* Target hook for assembling integer objects.  This code handles
3458    aligned SI and DI integers specially since function references
3459    must be preceded by P%.  */
3460 
3461 static bool
pa_assemble_integer(rtx x,unsigned int size,int aligned_p)3462 pa_assemble_integer (rtx x, unsigned int size, int aligned_p)
3463 {
3464   bool result;
3465   tree decl = NULL;
3466 
3467   /* When we have a SYMBOL_REF with a SYMBOL_REF_DECL, we need to call
3468      call assemble_external and set the SYMBOL_REF_DECL to NULL before
3469      calling output_addr_const.  Otherwise, it may call assemble_external
3470      in the midst of outputing the assembler code for the SYMBOL_REF.
3471      We restore the SYMBOL_REF_DECL after the output is done.  */
3472   if (GET_CODE (x) == SYMBOL_REF)
3473     {
3474       decl = SYMBOL_REF_DECL (x);
3475       if (decl)
3476 	{
3477 	  assemble_external (decl);
3478 	  SET_SYMBOL_REF_DECL (x, NULL);
3479 	}
3480     }
3481 
3482   if (size == UNITS_PER_WORD
3483       && aligned_p
3484       && function_label_operand (x, VOIDmode))
3485     {
3486       fputs (size == 8? "\t.dword\t" : "\t.word\t", asm_out_file);
3487 
3488       /* We don't want an OPD when generating fast indirect calls.  */
3489       if (!TARGET_FAST_INDIRECT_CALLS)
3490 	fputs ("P%", asm_out_file);
3491 
3492       output_addr_const (asm_out_file, x);
3493       fputc ('\n', asm_out_file);
3494       result = true;
3495     }
3496   else
3497     result = default_assemble_integer (x, size, aligned_p);
3498 
3499   if (decl)
3500     SET_SYMBOL_REF_DECL (x, decl);
3501 
3502   return result;
3503 }
3504 
3505 /* Output an ascii string.  */
3506 void
pa_output_ascii(FILE * file,const char * p,int size)3507 pa_output_ascii (FILE *file, const char *p, int size)
3508 {
3509   int i;
3510   int chars_output;
3511   unsigned char partial_output[16];	/* Max space 4 chars can occupy.  */
3512 
3513   /* The HP assembler can only take strings of 256 characters at one
3514      time.  This is a limitation on input line length, *not* the
3515      length of the string.  Sigh.  Even worse, it seems that the
3516      restriction is in number of input characters (see \xnn &
3517      \whatever).  So we have to do this very carefully.  */
3518 
3519   fputs ("\t.STRING \"", file);
3520 
3521   chars_output = 0;
3522   for (i = 0; i < size; i += 4)
3523     {
3524       int co = 0;
3525       int io = 0;
3526       for (io = 0, co = 0; io < MIN (4, size - i); io++)
3527 	{
3528 	  unsigned int c = (unsigned char) p[i + io];
3529 
3530 	  if (c == '\"' || c == '\\')
3531 	    partial_output[co++] = '\\';
3532 	  if (c >= ' ' && c < 0177)
3533 	    partial_output[co++] = c;
3534 	  else
3535 	    {
3536 	      unsigned int hexd;
3537 	      partial_output[co++] = '\\';
3538 	      partial_output[co++] = 'x';
3539 	      hexd =  c  / 16 - 0 + '0';
3540 	      if (hexd > '9')
3541 		hexd -= '9' - 'a' + 1;
3542 	      partial_output[co++] = hexd;
3543 	      hexd =  c % 16 - 0 + '0';
3544 	      if (hexd > '9')
3545 		hexd -= '9' - 'a' + 1;
3546 	      partial_output[co++] = hexd;
3547 	    }
3548 	}
3549       if (chars_output + co > 243)
3550 	{
3551 	  fputs ("\"\n\t.STRING \"", file);
3552 	  chars_output = 0;
3553 	}
3554       fwrite (partial_output, 1, (size_t) co, file);
3555       chars_output += co;
3556       co = 0;
3557     }
3558   fputs ("\"\n", file);
3559 }
3560 
3561 /* Try to rewrite floating point comparisons & branches to avoid
3562    useless add,tr insns.
3563 
3564    CHECK_NOTES is nonzero if we should examine REG_DEAD notes
3565    to see if FPCC is dead.  CHECK_NOTES is nonzero for the
3566    first attempt to remove useless add,tr insns.  It is zero
3567    for the second pass as reorg sometimes leaves bogus REG_DEAD
3568    notes lying around.
3569 
3570    When CHECK_NOTES is zero we can only eliminate add,tr insns
3571    when there's a 1:1 correspondence between fcmp and ftest/fbranch
3572    instructions.  */
3573 static void
remove_useless_addtr_insns(int check_notes)3574 remove_useless_addtr_insns (int check_notes)
3575 {
3576   rtx_insn *insn;
3577   static int pass = 0;
3578 
3579   /* This is fairly cheap, so always run it when optimizing.  */
3580   if (optimize > 0)
3581     {
3582       int fcmp_count = 0;
3583       int fbranch_count = 0;
3584 
3585       /* Walk all the insns in this function looking for fcmp & fbranch
3586 	 instructions.  Keep track of how many of each we find.  */
3587       for (insn = get_insns (); insn; insn = next_insn (insn))
3588 	{
3589 	  rtx tmp;
3590 
3591 	  /* Ignore anything that isn't an INSN or a JUMP_INSN.  */
3592 	  if (! NONJUMP_INSN_P (insn) && ! JUMP_P (insn))
3593 	    continue;
3594 
3595 	  tmp = PATTERN (insn);
3596 
3597 	  /* It must be a set.  */
3598 	  if (GET_CODE (tmp) != SET)
3599 	    continue;
3600 
3601 	  /* If the destination is CCFP, then we've found an fcmp insn.  */
3602 	  tmp = SET_DEST (tmp);
3603 	  if (GET_CODE (tmp) == REG && REGNO (tmp) == 0)
3604 	    {
3605 	      fcmp_count++;
3606 	      continue;
3607 	    }
3608 
3609 	  tmp = PATTERN (insn);
3610 	  /* If this is an fbranch instruction, bump the fbranch counter.  */
3611 	  if (GET_CODE (tmp) == SET
3612 	      && SET_DEST (tmp) == pc_rtx
3613 	      && GET_CODE (SET_SRC (tmp)) == IF_THEN_ELSE
3614 	      && GET_CODE (XEXP (SET_SRC (tmp), 0)) == NE
3615 	      && GET_CODE (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == REG
3616 	      && REGNO (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == 0)
3617 	    {
3618 	      fbranch_count++;
3619 	      continue;
3620 	    }
3621 	}
3622 
3623 
3624       /* Find all floating point compare + branch insns.  If possible,
3625 	 reverse the comparison & the branch to avoid add,tr insns.  */
3626       for (insn = get_insns (); insn; insn = next_insn (insn))
3627 	{
3628 	  rtx tmp;
3629 	  rtx_insn *next;
3630 
3631 	  /* Ignore anything that isn't an INSN.  */
3632 	  if (! NONJUMP_INSN_P (insn))
3633 	    continue;
3634 
3635 	  tmp = PATTERN (insn);
3636 
3637 	  /* It must be a set.  */
3638 	  if (GET_CODE (tmp) != SET)
3639 	    continue;
3640 
3641 	  /* The destination must be CCFP, which is register zero.  */
3642 	  tmp = SET_DEST (tmp);
3643 	  if (GET_CODE (tmp) != REG || REGNO (tmp) != 0)
3644 	    continue;
3645 
3646 	  /* INSN should be a set of CCFP.
3647 
3648 	     See if the result of this insn is used in a reversed FP
3649 	     conditional branch.  If so, reverse our condition and
3650 	     the branch.  Doing so avoids useless add,tr insns.  */
3651 	  next = next_insn (insn);
3652 	  while (next)
3653 	    {
3654 	      /* Jumps, calls and labels stop our search.  */
3655 	      if (JUMP_P (next) || CALL_P (next) || LABEL_P (next))
3656 		break;
3657 
3658 	      /* As does another fcmp insn.  */
3659 	      if (NONJUMP_INSN_P (next)
3660 		  && GET_CODE (PATTERN (next)) == SET
3661 		  && GET_CODE (SET_DEST (PATTERN (next))) == REG
3662 		  && REGNO (SET_DEST (PATTERN (next))) == 0)
3663 		break;
3664 
3665 	      next = next_insn (next);
3666 	    }
3667 
3668 	  /* Is NEXT_INSN a branch?  */
3669 	  if (next && JUMP_P (next))
3670 	    {
3671 	      rtx pattern = PATTERN (next);
3672 
3673 	      /* If it a reversed fp conditional branch (e.g. uses add,tr)
3674 		 and CCFP dies, then reverse our conditional and the branch
3675 		 to avoid the add,tr.  */
3676 	      if (GET_CODE (pattern) == SET
3677 		  && SET_DEST (pattern) == pc_rtx
3678 		  && GET_CODE (SET_SRC (pattern)) == IF_THEN_ELSE
3679 		  && GET_CODE (XEXP (SET_SRC (pattern), 0)) == NE
3680 		  && GET_CODE (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == REG
3681 		  && REGNO (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == 0
3682 		  && GET_CODE (XEXP (SET_SRC (pattern), 1)) == PC
3683 		  && (fcmp_count == fbranch_count
3684 		      || (check_notes
3685 			  && find_regno_note (next, REG_DEAD, 0))))
3686 		{
3687 		  /* Reverse the branch.  */
3688 		  tmp = XEXP (SET_SRC (pattern), 1);
3689 		  XEXP (SET_SRC (pattern), 1) = XEXP (SET_SRC (pattern), 2);
3690 		  XEXP (SET_SRC (pattern), 2) = tmp;
3691 		  INSN_CODE (next) = -1;
3692 
3693 		  /* Reverse our condition.  */
3694 		  tmp = PATTERN (insn);
3695 		  PUT_CODE (XEXP (tmp, 1),
3696 			    (reverse_condition_maybe_unordered
3697 			     (GET_CODE (XEXP (tmp, 1)))));
3698 		}
3699 	    }
3700 	}
3701     }
3702 
3703   pass = !pass;
3704 
3705 }
3706 
3707 /* You may have trouble believing this, but this is the 32 bit HP-PA
3708    stack layout.  Wow.
3709 
3710    Offset		Contents
3711 
3712    Variable arguments	(optional; any number may be allocated)
3713 
3714    SP-(4*(N+9))		arg word N
3715    	:		    :
3716       SP-56		arg word 5
3717       SP-52		arg word 4
3718 
3719    Fixed arguments	(must be allocated; may remain unused)
3720 
3721       SP-48		arg word 3
3722       SP-44		arg word 2
3723       SP-40		arg word 1
3724       SP-36		arg word 0
3725 
3726    Frame Marker
3727 
3728       SP-32		External Data Pointer (DP)
3729       SP-28		External sr4
3730       SP-24		External/stub RP (RP')
3731       SP-20		Current RP
3732       SP-16		Static Link
3733       SP-12		Clean up
3734       SP-8		Calling Stub RP (RP'')
3735       SP-4		Previous SP
3736 
3737    Top of Frame
3738 
3739       SP-0		Stack Pointer (points to next available address)
3740 
3741 */
3742 
3743 /* This function saves registers as follows.  Registers marked with ' are
3744    this function's registers (as opposed to the previous function's).
3745    If a frame_pointer isn't needed, r4 is saved as a general register;
3746    the space for the frame pointer is still allocated, though, to keep
3747    things simple.
3748 
3749 
3750    Top of Frame
3751 
3752        SP (FP')		Previous FP
3753        SP + 4		Alignment filler (sigh)
3754        SP + 8		Space for locals reserved here.
3755        .
3756        .
3757        .
3758        SP + n		All call saved register used.
3759        .
3760        .
3761        .
3762        SP + o		All call saved fp registers used.
3763        .
3764        .
3765        .
3766        SP + p (SP')	points to next available address.
3767 
3768 */
3769 
3770 /* Global variables set by output_function_prologue().  */
3771 /* Size of frame.  Need to know this to emit return insns from
3772    leaf procedures.  */
3773 static HOST_WIDE_INT actual_fsize, local_fsize;
3774 static int save_fregs;
3775 
3776 /* Emit RTL to store REG at the memory location specified by BASE+DISP.
3777    Handle case where DISP > 8k by using the add_high_const patterns.
3778 
3779    Note in DISP > 8k case, we will leave the high part of the address
3780    in %r1.  There is code in expand_hppa_{prologue,epilogue} that knows this.*/
3781 
3782 static void
store_reg(int reg,HOST_WIDE_INT disp,int base)3783 store_reg (int reg, HOST_WIDE_INT disp, int base)
3784 {
3785   rtx dest, src, basereg;
3786   rtx_insn *insn;
3787 
3788   src = gen_rtx_REG (word_mode, reg);
3789   basereg = gen_rtx_REG (Pmode, base);
3790   if (VAL_14_BITS_P (disp))
3791     {
3792       dest = gen_rtx_MEM (word_mode, plus_constant (Pmode, basereg, disp));
3793       insn = emit_move_insn (dest, src);
3794     }
3795   else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
3796     {
3797       rtx delta = GEN_INT (disp);
3798       rtx tmpreg = gen_rtx_REG (Pmode, 1);
3799 
3800       emit_move_insn (tmpreg, delta);
3801       insn = emit_move_insn (tmpreg, gen_rtx_PLUS (Pmode, tmpreg, basereg));
3802       if (DO_FRAME_NOTES)
3803 	{
3804 	  add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3805 			gen_rtx_SET (tmpreg,
3806 				     gen_rtx_PLUS (Pmode, basereg, delta)));
3807 	  RTX_FRAME_RELATED_P (insn) = 1;
3808 	}
3809       dest = gen_rtx_MEM (word_mode, tmpreg);
3810       insn = emit_move_insn (dest, src);
3811     }
3812   else
3813     {
3814       rtx delta = GEN_INT (disp);
3815       rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta));
3816       rtx tmpreg = gen_rtx_REG (Pmode, 1);
3817 
3818       emit_move_insn (tmpreg, high);
3819       dest = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta));
3820       insn = emit_move_insn (dest, src);
3821       if (DO_FRAME_NOTES)
3822 	add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3823 		      gen_rtx_SET (gen_rtx_MEM (word_mode,
3824 						gen_rtx_PLUS (word_mode,
3825 							      basereg,
3826 							      delta)),
3827 				   src));
3828     }
3829 
3830   if (DO_FRAME_NOTES)
3831     RTX_FRAME_RELATED_P (insn) = 1;
3832 }
3833 
3834 /* Emit RTL to store REG at the memory location specified by BASE and then
3835    add MOD to BASE.  MOD must be <= 8k.  */
3836 
3837 static void
store_reg_modify(int base,int reg,HOST_WIDE_INT mod)3838 store_reg_modify (int base, int reg, HOST_WIDE_INT mod)
3839 {
3840   rtx basereg, srcreg, delta;
3841   rtx_insn *insn;
3842 
3843   gcc_assert (VAL_14_BITS_P (mod));
3844 
3845   basereg = gen_rtx_REG (Pmode, base);
3846   srcreg = gen_rtx_REG (word_mode, reg);
3847   delta = GEN_INT (mod);
3848 
3849   insn = emit_insn (gen_post_store (basereg, srcreg, delta));
3850   if (DO_FRAME_NOTES)
3851     {
3852       RTX_FRAME_RELATED_P (insn) = 1;
3853 
3854       /* RTX_FRAME_RELATED_P must be set on each frame related set
3855 	 in a parallel with more than one element.  */
3856       RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 0)) = 1;
3857       RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
3858     }
3859 }
3860 
3861 /* Emit RTL to set REG to the value specified by BASE+DISP.  Handle case
3862    where DISP > 8k by using the add_high_const patterns.  NOTE indicates
3863    whether to add a frame note or not.
3864 
3865    In the DISP > 8k case, we leave the high part of the address in %r1.
3866    There is code in expand_hppa_{prologue,epilogue} that knows about this.  */
3867 
3868 static void
set_reg_plus_d(int reg,int base,HOST_WIDE_INT disp,int note)3869 set_reg_plus_d (int reg, int base, HOST_WIDE_INT disp, int note)
3870 {
3871   rtx_insn *insn;
3872 
3873   if (VAL_14_BITS_P (disp))
3874     {
3875       insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3876 			     plus_constant (Pmode,
3877 					    gen_rtx_REG (Pmode, base), disp));
3878     }
3879   else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
3880     {
3881       rtx basereg = gen_rtx_REG (Pmode, base);
3882       rtx delta = GEN_INT (disp);
3883       rtx tmpreg = gen_rtx_REG (Pmode, 1);
3884 
3885       emit_move_insn (tmpreg, delta);
3886       insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3887 			     gen_rtx_PLUS (Pmode, tmpreg, basereg));
3888       if (DO_FRAME_NOTES)
3889 	add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3890 		      gen_rtx_SET (tmpreg,
3891 				   gen_rtx_PLUS (Pmode, basereg, delta)));
3892     }
3893   else
3894     {
3895       rtx basereg = gen_rtx_REG (Pmode, base);
3896       rtx delta = GEN_INT (disp);
3897       rtx tmpreg = gen_rtx_REG (Pmode, 1);
3898 
3899       emit_move_insn (tmpreg,
3900 		      gen_rtx_PLUS (Pmode, basereg,
3901 				    gen_rtx_HIGH (Pmode, delta)));
3902       insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3903 			     gen_rtx_LO_SUM (Pmode, tmpreg, delta));
3904     }
3905 
3906   if (DO_FRAME_NOTES && note)
3907     RTX_FRAME_RELATED_P (insn) = 1;
3908 }
3909 
3910 HOST_WIDE_INT
pa_compute_frame_size(poly_int64 size,int * fregs_live)3911 pa_compute_frame_size (poly_int64 size, int *fregs_live)
3912 {
3913   int freg_saved = 0;
3914   int i, j;
3915 
3916   /* The code in pa_expand_prologue and pa_expand_epilogue must
3917      be consistent with the rounding and size calculation done here.
3918      Change them at the same time.  */
3919 
3920   /* We do our own stack alignment.  First, round the size of the
3921      stack locals up to a word boundary.  */
3922   size = (size + UNITS_PER_WORD - 1) & ~(UNITS_PER_WORD - 1);
3923 
3924   /* Space for previous frame pointer + filler.  If any frame is
3925      allocated, we need to add in the TARGET_STARTING_FRAME_OFFSET.  We
3926      waste some space here for the sake of HP compatibility.  The
3927      first slot is only used when the frame pointer is needed.  */
3928   if (size || frame_pointer_needed)
3929     size += pa_starting_frame_offset ();
3930 
3931   /* If the current function calls __builtin_eh_return, then we need
3932      to allocate stack space for registers that will hold data for
3933      the exception handler.  */
3934   if (DO_FRAME_NOTES && crtl->calls_eh_return)
3935     {
3936       unsigned int i;
3937 
3938       for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM; ++i)
3939 	continue;
3940       size += i * UNITS_PER_WORD;
3941     }
3942 
3943   /* Account for space used by the callee general register saves.  */
3944   for (i = 18, j = frame_pointer_needed ? 4 : 3; i >= j; i--)
3945     if (df_regs_ever_live_p (i))
3946       size += UNITS_PER_WORD;
3947 
3948   /* Account for space used by the callee floating point register saves.  */
3949   for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
3950     if (df_regs_ever_live_p (i)
3951 	|| (!TARGET_64BIT && df_regs_ever_live_p (i + 1)))
3952       {
3953 	freg_saved = 1;
3954 
3955 	/* We always save both halves of the FP register, so always
3956 	   increment the frame size by 8 bytes.  */
3957 	size += 8;
3958       }
3959 
3960   /* If any of the floating registers are saved, account for the
3961      alignment needed for the floating point register save block.  */
3962   if (freg_saved)
3963     {
3964       size = (size + 7) & ~7;
3965       if (fregs_live)
3966 	*fregs_live = 1;
3967     }
3968 
3969   /* The various ABIs include space for the outgoing parameters in the
3970      size of the current function's stack frame.  We don't need to align
3971      for the outgoing arguments as their alignment is set by the final
3972      rounding for the frame as a whole.  */
3973   size += crtl->outgoing_args_size;
3974 
3975   /* Allocate space for the fixed frame marker.  This space must be
3976      allocated for any function that makes calls or allocates
3977      stack space.  */
3978   if (!crtl->is_leaf || size)
3979     size += TARGET_64BIT ? 48 : 32;
3980 
3981   /* Finally, round to the preferred stack boundary.  */
3982   return ((size + PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT - 1)
3983 	  & ~(PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT - 1));
3984 }
3985 
3986 /* Output function label, and associated .PROC and .CALLINFO statements.  */
3987 
3988 void
pa_output_function_label(FILE * file)3989 pa_output_function_label (FILE *file)
3990 {
3991   /* The function's label and associated .PROC must never be
3992      separated and must be output *after* any profiling declarations
3993      to avoid changing spaces/subspaces within a procedure.  */
3994   ASM_OUTPUT_LABEL (file, XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0));
3995   fputs ("\t.PROC\n", file);
3996 
3997   /* pa_expand_prologue does the dirty work now.  We just need
3998      to output the assembler directives which denote the start
3999      of a function.  */
4000   fprintf (file, "\t.CALLINFO FRAME=" HOST_WIDE_INT_PRINT_DEC, actual_fsize);
4001   if (crtl->is_leaf)
4002     fputs (",NO_CALLS", file);
4003   else
4004     fputs (",CALLS", file);
4005   if (rp_saved)
4006     fputs (",SAVE_RP", file);
4007 
4008   /* The SAVE_SP flag is used to indicate that register %r3 is stored
4009      at the beginning of the frame and that it is used as the frame
4010      pointer for the frame.  We do this because our current frame
4011      layout doesn't conform to that specified in the HP runtime
4012      documentation and we need a way to indicate to programs such as
4013      GDB where %r3 is saved.  The SAVE_SP flag was chosen because it
4014      isn't used by HP compilers but is supported by the assembler.
4015      However, SAVE_SP is supposed to indicate that the previous stack
4016      pointer has been saved in the frame marker.  */
4017   if (frame_pointer_needed)
4018     fputs (",SAVE_SP", file);
4019 
4020   /* Pass on information about the number of callee register saves
4021      performed in the prologue.
4022 
4023      The compiler is supposed to pass the highest register number
4024      saved, the assembler then has to adjust that number before
4025      entering it into the unwind descriptor (to account for any
4026      caller saved registers with lower register numbers than the
4027      first callee saved register).  */
4028   if (gr_saved)
4029     fprintf (file, ",ENTRY_GR=%d", gr_saved + 2);
4030 
4031   if (fr_saved)
4032     fprintf (file, ",ENTRY_FR=%d", fr_saved + 11);
4033 
4034   fputs ("\n\t.ENTRY\n", file);
4035 }
4036 
4037 /* Output function prologue.  */
4038 
4039 static void
pa_output_function_prologue(FILE * file)4040 pa_output_function_prologue (FILE *file)
4041 {
4042   pa_output_function_label (file);
4043   remove_useless_addtr_insns (0);
4044 }
4045 
4046 /* The label is output by ASM_DECLARE_FUNCTION_NAME on linux.  */
4047 
4048 static void
pa_linux_output_function_prologue(FILE * file ATTRIBUTE_UNUSED)4049 pa_linux_output_function_prologue (FILE *file ATTRIBUTE_UNUSED)
4050 {
4051   remove_useless_addtr_insns (0);
4052 }
4053 
4054 void
pa_expand_prologue(void)4055 pa_expand_prologue (void)
4056 {
4057   int merge_sp_adjust_with_store = 0;
4058   HOST_WIDE_INT size = get_frame_size ();
4059   HOST_WIDE_INT offset;
4060   int i;
4061   rtx tmpreg;
4062   rtx_insn *insn;
4063 
4064   gr_saved = 0;
4065   fr_saved = 0;
4066   save_fregs = 0;
4067 
4068   /* Compute total size for frame pointer, filler, locals and rounding to
4069      the next word boundary.  Similar code appears in pa_compute_frame_size
4070      and must be changed in tandem with this code.  */
4071   local_fsize = (size + UNITS_PER_WORD - 1) & ~(UNITS_PER_WORD - 1);
4072   if (local_fsize || frame_pointer_needed)
4073     local_fsize += pa_starting_frame_offset ();
4074 
4075   actual_fsize = pa_compute_frame_size (size, &save_fregs);
4076   if (flag_stack_usage_info)
4077     current_function_static_stack_size = actual_fsize;
4078 
4079   /* Compute a few things we will use often.  */
4080   tmpreg = gen_rtx_REG (word_mode, 1);
4081 
4082   /* Save RP first.  The calling conventions manual states RP will
4083      always be stored into the caller's frame at sp - 20 or sp - 16
4084      depending on which ABI is in use.  */
4085   if (df_regs_ever_live_p (2) || crtl->calls_eh_return)
4086     {
4087       store_reg (2, TARGET_64BIT ? -16 : -20, STACK_POINTER_REGNUM);
4088       rp_saved = true;
4089     }
4090   else
4091     rp_saved = false;
4092 
4093   /* Allocate the local frame and set up the frame pointer if needed.  */
4094   if (actual_fsize != 0)
4095     {
4096       if (frame_pointer_needed)
4097 	{
4098 	  /* Copy the old frame pointer temporarily into %r1.  Set up the
4099 	     new stack pointer, then store away the saved old frame pointer
4100 	     into the stack at sp and at the same time update the stack
4101 	     pointer by actual_fsize bytes.  Two versions, first
4102 	     handles small (<8k) frames.  The second handles large (>=8k)
4103 	     frames.  */
4104 	  insn = emit_move_insn (tmpreg, hard_frame_pointer_rtx);
4105 	  if (DO_FRAME_NOTES)
4106 	    RTX_FRAME_RELATED_P (insn) = 1;
4107 
4108 	  insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
4109 	  if (DO_FRAME_NOTES)
4110 	    RTX_FRAME_RELATED_P (insn) = 1;
4111 
4112 	  if (VAL_14_BITS_P (actual_fsize))
4113 	    store_reg_modify (STACK_POINTER_REGNUM, 1, actual_fsize);
4114 	  else
4115 	    {
4116 	      /* It is incorrect to store the saved frame pointer at *sp,
4117 		 then increment sp (writes beyond the current stack boundary).
4118 
4119 		 So instead use stwm to store at *sp and post-increment the
4120 		 stack pointer as an atomic operation.  Then increment sp to
4121 		 finish allocating the new frame.  */
4122 	      HOST_WIDE_INT adjust1 = 8192 - 64;
4123 	      HOST_WIDE_INT adjust2 = actual_fsize - adjust1;
4124 
4125 	      store_reg_modify (STACK_POINTER_REGNUM, 1, adjust1);
4126 	      set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
4127 			      adjust2, 1);
4128 	    }
4129 
4130 	  /* We set SAVE_SP in frames that need a frame pointer.  Thus,
4131 	     we need to store the previous stack pointer (frame pointer)
4132 	     into the frame marker on targets that use the HP unwind
4133 	     library.  This allows the HP unwind library to be used to
4134 	     unwind GCC frames.  However, we are not fully compatible
4135 	     with the HP library because our frame layout differs from
4136 	     that specified in the HP runtime specification.
4137 
4138 	     We don't want a frame note on this instruction as the frame
4139 	     marker moves during dynamic stack allocation.
4140 
4141 	     This instruction also serves as a blockage to prevent
4142 	     register spills from being scheduled before the stack
4143 	     pointer is raised.  This is necessary as we store
4144 	     registers using the frame pointer as a base register,
4145 	     and the frame pointer is set before sp is raised.  */
4146 	  if (TARGET_HPUX_UNWIND_LIBRARY)
4147 	    {
4148 	      rtx addr = gen_rtx_PLUS (word_mode, stack_pointer_rtx,
4149 				       GEN_INT (TARGET_64BIT ? -8 : -4));
4150 
4151 	      emit_move_insn (gen_rtx_MEM (word_mode, addr),
4152 			      hard_frame_pointer_rtx);
4153 	    }
4154 	  else
4155 	    emit_insn (gen_blockage ());
4156 	}
4157       /* no frame pointer needed.  */
4158       else
4159 	{
4160 	  /* In some cases we can perform the first callee register save
4161 	     and allocating the stack frame at the same time.   If so, just
4162 	     make a note of it and defer allocating the frame until saving
4163 	     the callee registers.  */
4164 	  if (VAL_14_BITS_P (actual_fsize) && local_fsize == 0)
4165 	    merge_sp_adjust_with_store = 1;
4166 	  /* Cannot optimize.  Adjust the stack frame by actual_fsize
4167 	     bytes.  */
4168 	  else
4169 	    set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
4170 			    actual_fsize, 1);
4171 	}
4172     }
4173 
4174   /* Normal register save.
4175 
4176      Do not save the frame pointer in the frame_pointer_needed case.  It
4177      was done earlier.  */
4178   if (frame_pointer_needed)
4179     {
4180       offset = local_fsize;
4181 
4182       /* Saving the EH return data registers in the frame is the simplest
4183 	 way to get the frame unwind information emitted.  We put them
4184 	 just before the general registers.  */
4185       if (DO_FRAME_NOTES && crtl->calls_eh_return)
4186 	{
4187 	  unsigned int i, regno;
4188 
4189 	  for (i = 0; ; ++i)
4190 	    {
4191 	      regno = EH_RETURN_DATA_REGNO (i);
4192 	      if (regno == INVALID_REGNUM)
4193 		break;
4194 
4195 	      store_reg (regno, offset, HARD_FRAME_POINTER_REGNUM);
4196 	      offset += UNITS_PER_WORD;
4197 	    }
4198 	}
4199 
4200       for (i = 18; i >= 4; i--)
4201 	if (df_regs_ever_live_p (i) && !call_used_or_fixed_reg_p (i))
4202 	  {
4203 	    store_reg (i, offset, HARD_FRAME_POINTER_REGNUM);
4204 	    offset += UNITS_PER_WORD;
4205 	    gr_saved++;
4206 	  }
4207       /* Account for %r3 which is saved in a special place.  */
4208       gr_saved++;
4209     }
4210   /* No frame pointer needed.  */
4211   else
4212     {
4213       offset = local_fsize - actual_fsize;
4214 
4215       /* Saving the EH return data registers in the frame is the simplest
4216          way to get the frame unwind information emitted.  */
4217       if (DO_FRAME_NOTES && crtl->calls_eh_return)
4218 	{
4219 	  unsigned int i, regno;
4220 
4221 	  for (i = 0; ; ++i)
4222 	    {
4223 	      regno = EH_RETURN_DATA_REGNO (i);
4224 	      if (regno == INVALID_REGNUM)
4225 		break;
4226 
4227 	      /* If merge_sp_adjust_with_store is nonzero, then we can
4228 		 optimize the first save.  */
4229 	      if (merge_sp_adjust_with_store)
4230 		{
4231 		  store_reg_modify (STACK_POINTER_REGNUM, regno, -offset);
4232 		  merge_sp_adjust_with_store = 0;
4233 		}
4234 	      else
4235 		store_reg (regno, offset, STACK_POINTER_REGNUM);
4236 	      offset += UNITS_PER_WORD;
4237 	    }
4238 	}
4239 
4240       for (i = 18; i >= 3; i--)
4241 	if (df_regs_ever_live_p (i) && !call_used_or_fixed_reg_p (i))
4242 	  {
4243 	    /* If merge_sp_adjust_with_store is nonzero, then we can
4244 	       optimize the first GR save.  */
4245 	    if (merge_sp_adjust_with_store)
4246 	      {
4247 		store_reg_modify (STACK_POINTER_REGNUM, i, -offset);
4248 		merge_sp_adjust_with_store = 0;
4249 	      }
4250 	    else
4251 	      store_reg (i, offset, STACK_POINTER_REGNUM);
4252 	    offset += UNITS_PER_WORD;
4253 	    gr_saved++;
4254 	  }
4255 
4256       /* If we wanted to merge the SP adjustment with a GR save, but we never
4257 	 did any GR saves, then just emit the adjustment here.  */
4258       if (merge_sp_adjust_with_store)
4259 	set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
4260 			actual_fsize, 1);
4261     }
4262 
4263   /* The hppa calling conventions say that %r19, the pic offset
4264      register, is saved at sp - 32 (in this function's frame)
4265      when generating PIC code.  FIXME:  What is the correct thing
4266      to do for functions which make no calls and allocate no
4267      frame?  Do we need to allocate a frame, or can we just omit
4268      the save?   For now we'll just omit the save.
4269 
4270      We don't want a note on this insn as the frame marker can
4271      move if there is a dynamic stack allocation.  */
4272   if (flag_pic && actual_fsize != 0 && !TARGET_64BIT)
4273     {
4274       rtx addr = gen_rtx_PLUS (word_mode, stack_pointer_rtx, GEN_INT (-32));
4275 
4276       emit_move_insn (gen_rtx_MEM (word_mode, addr), pic_offset_table_rtx);
4277 
4278     }
4279 
4280   /* Align pointer properly (doubleword boundary).  */
4281   offset = (offset + 7) & ~7;
4282 
4283   /* Floating point register store.  */
4284   if (save_fregs)
4285     {
4286       rtx base;
4287 
4288       /* First get the frame or stack pointer to the start of the FP register
4289 	 save area.  */
4290       if (frame_pointer_needed)
4291 	{
4292 	  set_reg_plus_d (1, HARD_FRAME_POINTER_REGNUM, offset, 0);
4293 	  base = hard_frame_pointer_rtx;
4294 	}
4295       else
4296 	{
4297 	  set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0);
4298 	  base = stack_pointer_rtx;
4299 	}
4300 
4301       /* Now actually save the FP registers.  */
4302       for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
4303 	{
4304 	  if (df_regs_ever_live_p (i)
4305 	      || (! TARGET_64BIT && df_regs_ever_live_p (i + 1)))
4306 	    {
4307 	      rtx addr, reg;
4308 	      rtx_insn *insn;
4309 	      addr = gen_rtx_MEM (DFmode,
4310 				  gen_rtx_POST_INC (word_mode, tmpreg));
4311 	      reg = gen_rtx_REG (DFmode, i);
4312 	      insn = emit_move_insn (addr, reg);
4313 	      if (DO_FRAME_NOTES)
4314 		{
4315 		  RTX_FRAME_RELATED_P (insn) = 1;
4316 		  if (TARGET_64BIT)
4317 		    {
4318 		      rtx mem = gen_rtx_MEM (DFmode,
4319 					     plus_constant (Pmode, base,
4320 							    offset));
4321 		      add_reg_note (insn, REG_FRAME_RELATED_EXPR,
4322 				    gen_rtx_SET (mem, reg));
4323 		    }
4324 		  else
4325 		    {
4326 		      rtx meml = gen_rtx_MEM (SFmode,
4327 					      plus_constant (Pmode, base,
4328 							     offset));
4329 		      rtx memr = gen_rtx_MEM (SFmode,
4330 					      plus_constant (Pmode, base,
4331 							     offset + 4));
4332 		      rtx regl = gen_rtx_REG (SFmode, i);
4333 		      rtx regr = gen_rtx_REG (SFmode, i + 1);
4334 		      rtx setl = gen_rtx_SET (meml, regl);
4335 		      rtx setr = gen_rtx_SET (memr, regr);
4336 		      rtvec vec;
4337 
4338 		      RTX_FRAME_RELATED_P (setl) = 1;
4339 		      RTX_FRAME_RELATED_P (setr) = 1;
4340 		      vec = gen_rtvec (2, setl, setr);
4341 		      add_reg_note (insn, REG_FRAME_RELATED_EXPR,
4342 				    gen_rtx_SEQUENCE (VOIDmode, vec));
4343 		    }
4344 		}
4345 	      offset += GET_MODE_SIZE (DFmode);
4346 	      fr_saved++;
4347 	    }
4348 	}
4349     }
4350 }
4351 
4352 /* Emit RTL to load REG from the memory location specified by BASE+DISP.
4353    Handle case where DISP > 8k by using the add_high_const patterns.  */
4354 
4355 static void
load_reg(int reg,HOST_WIDE_INT disp,int base)4356 load_reg (int reg, HOST_WIDE_INT disp, int base)
4357 {
4358   rtx dest = gen_rtx_REG (word_mode, reg);
4359   rtx basereg = gen_rtx_REG (Pmode, base);
4360   rtx src;
4361 
4362   if (VAL_14_BITS_P (disp))
4363     src = gen_rtx_MEM (word_mode, plus_constant (Pmode, basereg, disp));
4364   else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
4365     {
4366       rtx delta = GEN_INT (disp);
4367       rtx tmpreg = gen_rtx_REG (Pmode, 1);
4368 
4369       emit_move_insn (tmpreg, delta);
4370       if (TARGET_DISABLE_INDEXING)
4371 	{
4372 	  emit_move_insn (tmpreg, gen_rtx_PLUS (Pmode, tmpreg, basereg));
4373 	  src = gen_rtx_MEM (word_mode, tmpreg);
4374 	}
4375       else
4376 	src = gen_rtx_MEM (word_mode, gen_rtx_PLUS (Pmode, tmpreg, basereg));
4377     }
4378   else
4379     {
4380       rtx delta = GEN_INT (disp);
4381       rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta));
4382       rtx tmpreg = gen_rtx_REG (Pmode, 1);
4383 
4384       emit_move_insn (tmpreg, high);
4385       src = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta));
4386     }
4387 
4388   emit_move_insn (dest, src);
4389 }
4390 
4391 /* Update the total code bytes output to the text section.  */
4392 
4393 static void
update_total_code_bytes(unsigned int nbytes)4394 update_total_code_bytes (unsigned int nbytes)
4395 {
4396   if ((TARGET_PORTABLE_RUNTIME || !TARGET_GAS || !TARGET_SOM)
4397       && !IN_NAMED_SECTION_P (cfun->decl))
4398     {
4399       unsigned int old_total = total_code_bytes;
4400 
4401       total_code_bytes += nbytes;
4402 
4403       /* Be prepared to handle overflows.  */
4404       if (old_total > total_code_bytes)
4405         total_code_bytes = UINT_MAX;
4406     }
4407 }
4408 
4409 /* This function generates the assembly code for function exit.
4410    Args are as for output_function_prologue ().
4411 
4412    The function epilogue should not depend on the current stack
4413    pointer!  It should use the frame pointer only.  This is mandatory
4414    because of alloca; we also take advantage of it to omit stack
4415    adjustments before returning.  */
4416 
4417 static void
pa_output_function_epilogue(FILE * file)4418 pa_output_function_epilogue (FILE *file)
4419 {
4420   rtx_insn *insn = get_last_insn ();
4421   bool extra_nop;
4422 
4423   /* pa_expand_epilogue does the dirty work now.  We just need
4424      to output the assembler directives which denote the end
4425      of a function.
4426 
4427      To make debuggers happy, emit a nop if the epilogue was completely
4428      eliminated due to a volatile call as the last insn in the
4429      current function.  That way the return address (in %r2) will
4430      always point to a valid instruction in the current function.  */
4431 
4432   /* Get the last real insn.  */
4433   if (NOTE_P (insn))
4434     insn = prev_real_insn (insn);
4435 
4436   /* If it is a sequence, then look inside.  */
4437   if (insn && NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == SEQUENCE)
4438     insn = as_a <rtx_sequence *> (PATTERN (insn))-> insn (0);
4439 
4440   /* If insn is a CALL_INSN, then it must be a call to a volatile
4441      function (otherwise there would be epilogue insns).  */
4442   if (insn && CALL_P (insn))
4443     {
4444       fputs ("\tnop\n", file);
4445       extra_nop = true;
4446     }
4447   else
4448     extra_nop = false;
4449 
4450   fputs ("\t.EXIT\n\t.PROCEND\n", file);
4451 
4452   if (TARGET_SOM && TARGET_GAS)
4453     {
4454       /* We are done with this subspace except possibly for some additional
4455 	 debug information.  Forget that we are in this subspace to ensure
4456 	 that the next function is output in its own subspace.  */
4457       in_section = NULL;
4458       cfun->machine->in_nsubspa = 2;
4459     }
4460 
4461   /* Thunks do their own insn accounting.  */
4462   if (cfun->is_thunk)
4463     return;
4464 
4465   if (INSN_ADDRESSES_SET_P ())
4466     {
4467       last_address = extra_nop ? 4 : 0;
4468       insn = get_last_nonnote_insn ();
4469       if (insn)
4470 	{
4471 	  last_address += INSN_ADDRESSES (INSN_UID (insn));
4472 	  if (INSN_P (insn))
4473 	    last_address += insn_default_length (insn);
4474 	}
4475       last_address = ((last_address + FUNCTION_BOUNDARY / BITS_PER_UNIT - 1)
4476 		      & ~(FUNCTION_BOUNDARY / BITS_PER_UNIT - 1));
4477     }
4478   else
4479     last_address = UINT_MAX;
4480 
4481   /* Finally, update the total number of code bytes output so far.  */
4482   update_total_code_bytes (last_address);
4483 }
4484 
4485 void
pa_expand_epilogue(void)4486 pa_expand_epilogue (void)
4487 {
4488   rtx tmpreg;
4489   HOST_WIDE_INT offset;
4490   HOST_WIDE_INT ret_off = 0;
4491   int i;
4492   int merge_sp_adjust_with_load = 0;
4493 
4494   /* We will use this often.  */
4495   tmpreg = gen_rtx_REG (word_mode, 1);
4496 
4497   /* Try to restore RP early to avoid load/use interlocks when
4498      RP gets used in the return (bv) instruction.  This appears to still
4499      be necessary even when we schedule the prologue and epilogue.  */
4500   if (rp_saved)
4501     {
4502       ret_off = TARGET_64BIT ? -16 : -20;
4503       if (frame_pointer_needed)
4504 	{
4505 	  load_reg (2, ret_off, HARD_FRAME_POINTER_REGNUM);
4506 	  ret_off = 0;
4507 	}
4508       else
4509 	{
4510 	  /* No frame pointer, and stack is smaller than 8k.  */
4511 	  if (VAL_14_BITS_P (ret_off - actual_fsize))
4512 	    {
4513 	      load_reg (2, ret_off - actual_fsize, STACK_POINTER_REGNUM);
4514 	      ret_off = 0;
4515 	    }
4516 	}
4517     }
4518 
4519   /* General register restores.  */
4520   if (frame_pointer_needed)
4521     {
4522       offset = local_fsize;
4523 
4524       /* If the current function calls __builtin_eh_return, then we need
4525          to restore the saved EH data registers.  */
4526       if (DO_FRAME_NOTES && crtl->calls_eh_return)
4527 	{
4528 	  unsigned int i, regno;
4529 
4530 	  for (i = 0; ; ++i)
4531 	    {
4532 	      regno = EH_RETURN_DATA_REGNO (i);
4533 	      if (regno == INVALID_REGNUM)
4534 		break;
4535 
4536 	      load_reg (regno, offset, HARD_FRAME_POINTER_REGNUM);
4537 	      offset += UNITS_PER_WORD;
4538 	    }
4539 	}
4540 
4541       for (i = 18; i >= 4; i--)
4542 	if (df_regs_ever_live_p (i) && !call_used_or_fixed_reg_p (i))
4543 	  {
4544 	    load_reg (i, offset, HARD_FRAME_POINTER_REGNUM);
4545 	    offset += UNITS_PER_WORD;
4546 	  }
4547     }
4548   else
4549     {
4550       offset = local_fsize - actual_fsize;
4551 
4552       /* If the current function calls __builtin_eh_return, then we need
4553          to restore the saved EH data registers.  */
4554       if (DO_FRAME_NOTES && crtl->calls_eh_return)
4555 	{
4556 	  unsigned int i, regno;
4557 
4558 	  for (i = 0; ; ++i)
4559 	    {
4560 	      regno = EH_RETURN_DATA_REGNO (i);
4561 	      if (regno == INVALID_REGNUM)
4562 		break;
4563 
4564 	      /* Only for the first load.
4565 	         merge_sp_adjust_with_load holds the register load
4566 	         with which we will merge the sp adjustment.  */
4567 	      if (merge_sp_adjust_with_load == 0
4568 		  && local_fsize == 0
4569 		  && VAL_14_BITS_P (-actual_fsize))
4570 	        merge_sp_adjust_with_load = regno;
4571 	      else
4572 		load_reg (regno, offset, STACK_POINTER_REGNUM);
4573 	      offset += UNITS_PER_WORD;
4574 	    }
4575 	}
4576 
4577       for (i = 18; i >= 3; i--)
4578 	{
4579 	  if (df_regs_ever_live_p (i) && !call_used_or_fixed_reg_p (i))
4580 	    {
4581 	      /* Only for the first load.
4582 	         merge_sp_adjust_with_load holds the register load
4583 	         with which we will merge the sp adjustment.  */
4584 	      if (merge_sp_adjust_with_load == 0
4585 		  && local_fsize == 0
4586 		  && VAL_14_BITS_P (-actual_fsize))
4587 	        merge_sp_adjust_with_load = i;
4588 	      else
4589 		load_reg (i, offset, STACK_POINTER_REGNUM);
4590 	      offset += UNITS_PER_WORD;
4591 	    }
4592 	}
4593     }
4594 
4595   /* Align pointer properly (doubleword boundary).  */
4596   offset = (offset + 7) & ~7;
4597 
4598   /* FP register restores.  */
4599   if (save_fregs)
4600     {
4601       /* Adjust the register to index off of.  */
4602       if (frame_pointer_needed)
4603 	set_reg_plus_d (1, HARD_FRAME_POINTER_REGNUM, offset, 0);
4604       else
4605 	set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0);
4606 
4607       /* Actually do the restores now.  */
4608       for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
4609 	if (df_regs_ever_live_p (i)
4610 	    || (! TARGET_64BIT && df_regs_ever_live_p (i + 1)))
4611 	  {
4612 	    rtx src = gen_rtx_MEM (DFmode,
4613 				   gen_rtx_POST_INC (word_mode, tmpreg));
4614 	    rtx dest = gen_rtx_REG (DFmode, i);
4615 	    emit_move_insn (dest, src);
4616 	  }
4617     }
4618 
4619   /* Emit a blockage insn here to keep these insns from being moved to
4620      an earlier spot in the epilogue, or into the main instruction stream.
4621 
4622      This is necessary as we must not cut the stack back before all the
4623      restores are finished.  */
4624   emit_insn (gen_blockage ());
4625 
4626   /* Reset stack pointer (and possibly frame pointer).  The stack
4627      pointer is initially set to fp + 64 to avoid a race condition.  */
4628   if (frame_pointer_needed)
4629     {
4630       rtx delta = GEN_INT (-64);
4631 
4632       set_reg_plus_d (STACK_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM, 64, 0);
4633       emit_insn (gen_pre_load (hard_frame_pointer_rtx,
4634 			       stack_pointer_rtx, delta));
4635     }
4636   /* If we were deferring a callee register restore, do it now.  */
4637   else if (merge_sp_adjust_with_load)
4638     {
4639       rtx delta = GEN_INT (-actual_fsize);
4640       rtx dest = gen_rtx_REG (word_mode, merge_sp_adjust_with_load);
4641 
4642       emit_insn (gen_pre_load (dest, stack_pointer_rtx, delta));
4643     }
4644   else if (actual_fsize != 0)
4645     set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
4646 		    - actual_fsize, 0);
4647 
4648   /* If we haven't restored %r2 yet (no frame pointer, and a stack
4649      frame greater than 8k), do so now.  */
4650   if (ret_off != 0)
4651     load_reg (2, ret_off, STACK_POINTER_REGNUM);
4652 
4653   if (DO_FRAME_NOTES && crtl->calls_eh_return)
4654     {
4655       rtx sa = EH_RETURN_STACKADJ_RTX;
4656 
4657       emit_insn (gen_blockage ());
4658       emit_insn (TARGET_64BIT
4659 		 ? gen_subdi3 (stack_pointer_rtx, stack_pointer_rtx, sa)
4660 		 : gen_subsi3 (stack_pointer_rtx, stack_pointer_rtx, sa));
4661     }
4662 }
4663 
4664 bool
pa_can_use_return_insn(void)4665 pa_can_use_return_insn (void)
4666 {
4667   if (!reload_completed)
4668     return false;
4669 
4670   if (frame_pointer_needed)
4671     return false;
4672 
4673   if (df_regs_ever_live_p (2))
4674     return false;
4675 
4676   if (crtl->profile)
4677     return false;
4678 
4679   return pa_compute_frame_size (get_frame_size (), 0) == 0;
4680 }
4681 
4682 rtx
hppa_pic_save_rtx(void)4683 hppa_pic_save_rtx (void)
4684 {
4685   return get_hard_reg_initial_val (word_mode, PIC_OFFSET_TABLE_REGNUM);
4686 }
4687 
4688 #ifndef NO_DEFERRED_PROFILE_COUNTERS
4689 #define NO_DEFERRED_PROFILE_COUNTERS 0
4690 #endif
4691 
4692 
4693 /* Vector of funcdef numbers.  */
4694 static vec<int> funcdef_nos;
4695 
4696 /* Output deferred profile counters.  */
4697 static void
output_deferred_profile_counters(void)4698 output_deferred_profile_counters (void)
4699 {
4700   unsigned int i;
4701   int align, n;
4702 
4703   if (funcdef_nos.is_empty ())
4704    return;
4705 
4706   switch_to_section (data_section);
4707   align = MIN (BIGGEST_ALIGNMENT, LONG_TYPE_SIZE);
4708   ASM_OUTPUT_ALIGN (asm_out_file, floor_log2 (align / BITS_PER_UNIT));
4709 
4710   for (i = 0; funcdef_nos.iterate (i, &n); i++)
4711     {
4712       targetm.asm_out.internal_label (asm_out_file, "LP", n);
4713       assemble_integer (const0_rtx, LONG_TYPE_SIZE / BITS_PER_UNIT, align, 1);
4714     }
4715 
4716   funcdef_nos.release ();
4717 }
4718 
4719 void
hppa_profile_hook(int label_no)4720 hppa_profile_hook (int label_no)
4721 {
4722   rtx_code_label *label_rtx = gen_label_rtx ();
4723   int reg_parm_stack_space = REG_PARM_STACK_SPACE (NULL_TREE);
4724   rtx arg_bytes, begin_label_rtx, mcount, sym;
4725   rtx_insn *call_insn;
4726   char begin_label_name[16];
4727   bool use_mcount_pcrel_call;
4728 
4729   /* Set up call destination.  */
4730   sym = gen_rtx_SYMBOL_REF (Pmode, "_mcount");
4731   pa_encode_label (sym);
4732   mcount = gen_rtx_MEM (Pmode, sym);
4733 
4734   /* If we can reach _mcount with a pc-relative call, we can optimize
4735      loading the address of the current function.  This requires linker
4736      long branch stub support.  */
4737   if (!TARGET_PORTABLE_RUNTIME
4738       && !TARGET_LONG_CALLS
4739       && (TARGET_SOM || flag_function_sections))
4740     use_mcount_pcrel_call = TRUE;
4741   else
4742     use_mcount_pcrel_call = FALSE;
4743 
4744   ASM_GENERATE_INTERNAL_LABEL (begin_label_name, FUNC_BEGIN_PROLOG_LABEL,
4745 			       label_no);
4746   begin_label_rtx = gen_rtx_SYMBOL_REF (SImode, ggc_strdup (begin_label_name));
4747 
4748   emit_move_insn (gen_rtx_REG (word_mode, 26), gen_rtx_REG (word_mode, 2));
4749 
4750   if (!use_mcount_pcrel_call)
4751     {
4752       /* The address of the function is loaded into %r25 with an instruction-
4753 	 relative sequence that avoids the use of relocations.  We use SImode
4754 	 for the address of the function in both 32 and 64-bit code to avoid
4755 	 having to provide DImode versions of the lcla2 pattern.  */
4756       if (TARGET_PA_20)
4757 	emit_insn (gen_lcla2 (gen_rtx_REG (SImode, 25), label_rtx));
4758       else
4759 	emit_insn (gen_lcla1 (gen_rtx_REG (SImode, 25), label_rtx));
4760     }
4761 
4762   if (!NO_DEFERRED_PROFILE_COUNTERS)
4763     {
4764       rtx count_label_rtx, addr, r24;
4765       char count_label_name[16];
4766 
4767       funcdef_nos.safe_push (label_no);
4768       ASM_GENERATE_INTERNAL_LABEL (count_label_name, "LP", label_no);
4769       count_label_rtx = gen_rtx_SYMBOL_REF (Pmode,
4770 					    ggc_strdup (count_label_name));
4771 
4772       addr = force_reg (Pmode, count_label_rtx);
4773       r24 = gen_rtx_REG (Pmode, 24);
4774       emit_move_insn (r24, addr);
4775 
4776       arg_bytes = GEN_INT (TARGET_64BIT ? 24 : 12);
4777       if (use_mcount_pcrel_call)
4778 	call_insn = emit_call_insn (gen_call_mcount (mcount, arg_bytes,
4779 						     begin_label_rtx));
4780       else
4781 	call_insn = emit_call_insn (gen_call (mcount, arg_bytes));
4782 
4783       use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), r24);
4784     }
4785   else
4786     {
4787       arg_bytes = GEN_INT (TARGET_64BIT ? 16 : 8);
4788       if (use_mcount_pcrel_call)
4789 	call_insn = emit_call_insn (gen_call_mcount (mcount, arg_bytes,
4790 						     begin_label_rtx));
4791       else
4792 	call_insn = emit_call_insn (gen_call (mcount, arg_bytes));
4793     }
4794 
4795   use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), gen_rtx_REG (SImode, 25));
4796   use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), gen_rtx_REG (SImode, 26));
4797 
4798   /* Indicate the _mcount call cannot throw, nor will it execute a
4799      non-local goto.  */
4800   make_reg_eh_region_note_nothrow_nononlocal (call_insn);
4801 
4802   /* Allocate space for fixed arguments.  */
4803   if (reg_parm_stack_space > crtl->outgoing_args_size)
4804     crtl->outgoing_args_size = reg_parm_stack_space;
4805 }
4806 
4807 /* Fetch the return address for the frame COUNT steps up from
4808    the current frame, after the prologue.  FRAMEADDR is the
4809    frame pointer of the COUNT frame.
4810 
4811    We want to ignore any export stub remnants here.  To handle this,
4812    we examine the code at the return address, and if it is an export
4813    stub, we return a memory rtx for the stub return address stored
4814    at frame-24.
4815 
4816    The value returned is used in two different ways:
4817 
4818 	1. To find a function's caller.
4819 
4820 	2. To change the return address for a function.
4821 
4822    This function handles most instances of case 1; however, it will
4823    fail if there are two levels of stubs to execute on the return
4824    path.  The only way I believe that can happen is if the return value
4825    needs a parameter relocation, which never happens for C code.
4826 
4827    This function handles most instances of case 2; however, it will
4828    fail if we did not originally have stub code on the return path
4829    but will need stub code on the new return path.  This can happen if
4830    the caller & callee are both in the main program, but the new
4831    return location is in a shared library.  */
4832 
4833 rtx
pa_return_addr_rtx(int count,rtx frameaddr)4834 pa_return_addr_rtx (int count, rtx frameaddr)
4835 {
4836   rtx label;
4837   rtx rp;
4838   rtx saved_rp;
4839   rtx ins;
4840 
4841   /* The instruction stream at the return address of a PA1.X export stub is:
4842 
4843 	0x4bc23fd1 | stub+8:   ldw -18(sr0,sp),rp
4844 	0x004010a1 | stub+12:  ldsid (sr0,rp),r1
4845 	0x00011820 | stub+16:  mtsp r1,sr0
4846 	0xe0400002 | stub+20:  be,n 0(sr0,rp)
4847 
4848      0xe0400002 must be specified as -532676606 so that it won't be
4849      rejected as an invalid immediate operand on 64-bit hosts.
4850 
4851      The instruction stream at the return address of a PA2.0 export stub is:
4852 
4853 	0x4bc23fd1 | stub+8:   ldw -18(sr0,sp),rp
4854 	0xe840d002 | stub+12:  bve,n (rp)
4855   */
4856 
4857   HOST_WIDE_INT insns[4];
4858   int i, len;
4859 
4860   if (count != 0)
4861     return NULL_RTX;
4862 
4863   rp = get_hard_reg_initial_val (Pmode, 2);
4864 
4865   if (TARGET_64BIT || TARGET_NO_SPACE_REGS)
4866     return rp;
4867 
4868   /* If there is no export stub then just use the value saved from
4869      the return pointer register.  */
4870 
4871   saved_rp = gen_reg_rtx (Pmode);
4872   emit_move_insn (saved_rp, rp);
4873 
4874   /* Get pointer to the instruction stream.  We have to mask out the
4875      privilege level from the two low order bits of the return address
4876      pointer here so that ins will point to the start of the first
4877      instruction that would have been executed if we returned.  */
4878   ins = copy_to_reg (gen_rtx_AND (Pmode, rp, MASK_RETURN_ADDR));
4879   label = gen_label_rtx ();
4880 
4881   if (TARGET_PA_20)
4882     {
4883       insns[0] = 0x4bc23fd1;
4884       insns[1] = -398405630;
4885       len = 2;
4886     }
4887   else
4888     {
4889       insns[0] = 0x4bc23fd1;
4890       insns[1] = 0x004010a1;
4891       insns[2] = 0x00011820;
4892       insns[3] = -532676606;
4893       len = 4;
4894     }
4895 
4896   /* Check the instruction stream at the normal return address for the
4897      export stub.  If it is an export stub, than our return address is
4898      really in -24[frameaddr].  */
4899 
4900   for (i = 0; i < len; i++)
4901     {
4902       rtx op0 = gen_rtx_MEM (SImode, plus_constant (Pmode, ins, i * 4));
4903       rtx op1 = GEN_INT (insns[i]);
4904       emit_cmp_and_jump_insns (op0, op1, NE, NULL, SImode, 0, label);
4905     }
4906 
4907   /* Here we know that our return address points to an export
4908      stub.  We don't want to return the address of the export stub,
4909      but rather the return address of the export stub.  That return
4910      address is stored at -24[frameaddr].  */
4911 
4912   emit_move_insn (saved_rp,
4913 		  gen_rtx_MEM (Pmode,
4914 			       memory_address (Pmode,
4915 					       plus_constant (Pmode, frameaddr,
4916 							      -24))));
4917 
4918   emit_label (label);
4919 
4920   return saved_rp;
4921 }
4922 
4923 void
pa_emit_bcond_fp(rtx operands[])4924 pa_emit_bcond_fp (rtx operands[])
4925 {
4926   enum rtx_code code = GET_CODE (operands[0]);
4927   rtx operand0 = operands[1];
4928   rtx operand1 = operands[2];
4929   rtx label = operands[3];
4930 
4931   emit_insn (gen_rtx_SET (gen_rtx_REG (CCFPmode, 0),
4932 		          gen_rtx_fmt_ee (code, CCFPmode, operand0, operand1)));
4933 
4934   emit_jump_insn (gen_rtx_SET (pc_rtx,
4935 			       gen_rtx_IF_THEN_ELSE (VOIDmode,
4936 						     gen_rtx_fmt_ee (NE,
4937 							      VOIDmode,
4938 							      gen_rtx_REG (CCFPmode, 0),
4939 							      const0_rtx),
4940 						     gen_rtx_LABEL_REF (VOIDmode, label),
4941 						     pc_rtx)));
4942 
4943 }
4944 
4945 /* Adjust the cost of a scheduling dependency.  Return the new cost of
4946    a dependency LINK or INSN on DEP_INSN.  COST is the current cost.  */
4947 
4948 static int
pa_adjust_cost(rtx_insn * insn,int dep_type,rtx_insn * dep_insn,int cost,unsigned int)4949 pa_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost,
4950 		unsigned int)
4951 {
4952   enum attr_type attr_type;
4953 
4954   /* Don't adjust costs for a pa8000 chip, also do not adjust any
4955      true dependencies as they are described with bypasses now.  */
4956   if (pa_cpu >= PROCESSOR_8000 || dep_type == 0)
4957     return cost;
4958 
4959   if (! recog_memoized (insn))
4960     return 0;
4961 
4962   attr_type = get_attr_type (insn);
4963 
4964   switch (dep_type)
4965     {
4966     case REG_DEP_ANTI:
4967       /* Anti dependency; DEP_INSN reads a register that INSN writes some
4968 	 cycles later.  */
4969 
4970       if (attr_type == TYPE_FPLOAD)
4971 	{
4972 	  rtx pat = PATTERN (insn);
4973 	  rtx dep_pat = PATTERN (dep_insn);
4974 	  if (GET_CODE (pat) == PARALLEL)
4975 	    {
4976 	      /* This happens for the fldXs,mb patterns.  */
4977 	      pat = XVECEXP (pat, 0, 0);
4978 	    }
4979 	  if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4980 	    /* If this happens, we have to extend this to schedule
4981 	       optimally.  Return 0 for now.  */
4982 	  return 0;
4983 
4984 	  if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
4985 	    {
4986 	      if (! recog_memoized (dep_insn))
4987 		return 0;
4988 	      switch (get_attr_type (dep_insn))
4989 		{
4990 		case TYPE_FPALU:
4991 		case TYPE_FPMULSGL:
4992 		case TYPE_FPMULDBL:
4993 		case TYPE_FPDIVSGL:
4994 		case TYPE_FPDIVDBL:
4995 		case TYPE_FPSQRTSGL:
4996 		case TYPE_FPSQRTDBL:
4997 		  /* A fpload can't be issued until one cycle before a
4998 		     preceding arithmetic operation has finished if
4999 		     the target of the fpload is any of the sources
5000 		     (or destination) of the arithmetic operation.  */
5001 		  return insn_default_latency (dep_insn) - 1;
5002 
5003 		default:
5004 		  return 0;
5005 		}
5006 	    }
5007 	}
5008       else if (attr_type == TYPE_FPALU)
5009 	{
5010 	  rtx pat = PATTERN (insn);
5011 	  rtx dep_pat = PATTERN (dep_insn);
5012 	  if (GET_CODE (pat) == PARALLEL)
5013 	    {
5014 	      /* This happens for the fldXs,mb patterns.  */
5015 	      pat = XVECEXP (pat, 0, 0);
5016 	    }
5017 	  if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
5018 	    /* If this happens, we have to extend this to schedule
5019 	       optimally.  Return 0 for now.  */
5020 	  return 0;
5021 
5022 	  if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
5023 	    {
5024 	      if (! recog_memoized (dep_insn))
5025 		return 0;
5026 	      switch (get_attr_type (dep_insn))
5027 		{
5028 		case TYPE_FPDIVSGL:
5029 		case TYPE_FPDIVDBL:
5030 		case TYPE_FPSQRTSGL:
5031 		case TYPE_FPSQRTDBL:
5032 		  /* An ALU flop can't be issued until two cycles before a
5033 		     preceding divide or sqrt operation has finished if
5034 		     the target of the ALU flop is any of the sources
5035 		     (or destination) of the divide or sqrt operation.  */
5036 		  return insn_default_latency (dep_insn) - 2;
5037 
5038 		default:
5039 		  return 0;
5040 		}
5041 	    }
5042 	}
5043 
5044       /* For other anti dependencies, the cost is 0.  */
5045       return 0;
5046 
5047     case REG_DEP_OUTPUT:
5048       /* Output dependency; DEP_INSN writes a register that INSN writes some
5049 	 cycles later.  */
5050       if (attr_type == TYPE_FPLOAD)
5051 	{
5052 	  rtx pat = PATTERN (insn);
5053 	  rtx dep_pat = PATTERN (dep_insn);
5054 	  if (GET_CODE (pat) == PARALLEL)
5055 	    {
5056 	      /* This happens for the fldXs,mb patterns.  */
5057 	      pat = XVECEXP (pat, 0, 0);
5058 	    }
5059 	  if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
5060 	    /* If this happens, we have to extend this to schedule
5061 	       optimally.  Return 0 for now.  */
5062 	  return 0;
5063 
5064 	  if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
5065 	    {
5066 	      if (! recog_memoized (dep_insn))
5067 		return 0;
5068 	      switch (get_attr_type (dep_insn))
5069 		{
5070 		case TYPE_FPALU:
5071 		case TYPE_FPMULSGL:
5072 		case TYPE_FPMULDBL:
5073 		case TYPE_FPDIVSGL:
5074 		case TYPE_FPDIVDBL:
5075 		case TYPE_FPSQRTSGL:
5076 		case TYPE_FPSQRTDBL:
5077 		  /* A fpload can't be issued until one cycle before a
5078 		     preceding arithmetic operation has finished if
5079 		     the target of the fpload is the destination of the
5080 		     arithmetic operation.
5081 
5082 		     Exception: For PA7100LC, PA7200 and PA7300, the cost
5083 		     is 3 cycles, unless they bundle together.   We also
5084 		     pay the penalty if the second insn is a fpload.  */
5085 		  return insn_default_latency (dep_insn) - 1;
5086 
5087 		default:
5088 		  return 0;
5089 		}
5090 	    }
5091 	}
5092       else if (attr_type == TYPE_FPALU)
5093 	{
5094 	  rtx pat = PATTERN (insn);
5095 	  rtx dep_pat = PATTERN (dep_insn);
5096 	  if (GET_CODE (pat) == PARALLEL)
5097 	    {
5098 	      /* This happens for the fldXs,mb patterns.  */
5099 	      pat = XVECEXP (pat, 0, 0);
5100 	    }
5101 	  if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
5102 	    /* If this happens, we have to extend this to schedule
5103 	       optimally.  Return 0 for now.  */
5104 	  return 0;
5105 
5106 	  if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
5107 	    {
5108 	      if (! recog_memoized (dep_insn))
5109 		return 0;
5110 	      switch (get_attr_type (dep_insn))
5111 		{
5112 		case TYPE_FPDIVSGL:
5113 		case TYPE_FPDIVDBL:
5114 		case TYPE_FPSQRTSGL:
5115 		case TYPE_FPSQRTDBL:
5116 		  /* An ALU flop can't be issued until two cycles before a
5117 		     preceding divide or sqrt operation has finished if
5118 		     the target of the ALU flop is also the target of
5119 		     the divide or sqrt operation.  */
5120 		  return insn_default_latency (dep_insn) - 2;
5121 
5122 		default:
5123 		  return 0;
5124 		}
5125 	    }
5126 	}
5127 
5128       /* For other output dependencies, the cost is 0.  */
5129       return 0;
5130 
5131     default:
5132       gcc_unreachable ();
5133     }
5134 }
5135 
5136 /* The 700 can only issue a single insn at a time.
5137    The 7XXX processors can issue two insns at a time.
5138    The 8000 can issue 4 insns at a time.  */
5139 static int
pa_issue_rate(void)5140 pa_issue_rate (void)
5141 {
5142   switch (pa_cpu)
5143     {
5144     case PROCESSOR_700:		return 1;
5145     case PROCESSOR_7100:	return 2;
5146     case PROCESSOR_7100LC:	return 2;
5147     case PROCESSOR_7200:	return 2;
5148     case PROCESSOR_7300:	return 2;
5149     case PROCESSOR_8000:	return 4;
5150 
5151     default:
5152       gcc_unreachable ();
5153     }
5154 }
5155 
5156 
5157 
5158 /* Return any length plus adjustment needed by INSN which already has
5159    its length computed as LENGTH.   Return LENGTH if no adjustment is
5160    necessary.
5161 
5162    Also compute the length of an inline block move here as it is too
5163    complicated to express as a length attribute in pa.md.  */
5164 int
pa_adjust_insn_length(rtx_insn * insn,int length)5165 pa_adjust_insn_length (rtx_insn *insn, int length)
5166 {
5167   rtx pat = PATTERN (insn);
5168 
5169   /* If length is negative or undefined, provide initial length.  */
5170   if ((unsigned int) length >= INT_MAX)
5171     {
5172       if (GET_CODE (pat) == SEQUENCE)
5173 	insn = as_a <rtx_insn *> (XVECEXP (pat, 0, 0));
5174 
5175       switch (get_attr_type (insn))
5176 	{
5177 	case TYPE_MILLI:
5178 	  length = pa_attr_length_millicode_call (insn);
5179 	  break;
5180 	case TYPE_CALL:
5181 	  length = pa_attr_length_call (insn, 0);
5182 	  break;
5183 	case TYPE_SIBCALL:
5184 	  length = pa_attr_length_call (insn, 1);
5185 	  break;
5186 	case TYPE_DYNCALL:
5187 	  length = pa_attr_length_indirect_call (insn);
5188 	  break;
5189 	case TYPE_SH_FUNC_ADRS:
5190 	  length = pa_attr_length_millicode_call (insn) + 20;
5191 	  break;
5192 	default:
5193 	  gcc_unreachable ();
5194 	}
5195     }
5196 
5197   /* Block move pattern.  */
5198   if (NONJUMP_INSN_P (insn)
5199       && GET_CODE (pat) == PARALLEL
5200       && GET_CODE (XVECEXP (pat, 0, 0)) == SET
5201       && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM
5202       && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 1)) == MEM
5203       && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode
5204       && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 1)) == BLKmode)
5205     length += compute_cpymem_length (insn) - 4;
5206   /* Block clear pattern.  */
5207   else if (NONJUMP_INSN_P (insn)
5208 	   && GET_CODE (pat) == PARALLEL
5209 	   && GET_CODE (XVECEXP (pat, 0, 0)) == SET
5210 	   && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM
5211 	   && XEXP (XVECEXP (pat, 0, 0), 1) == const0_rtx
5212 	   && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode)
5213     length += compute_clrmem_length (insn) - 4;
5214   /* Conditional branch with an unfilled delay slot.  */
5215   else if (JUMP_P (insn) && ! simplejump_p (insn))
5216     {
5217       /* Adjust a short backwards conditional with an unfilled delay slot.  */
5218       if (GET_CODE (pat) == SET
5219 	  && length == 4
5220 	  && JUMP_LABEL (insn) != NULL_RTX
5221 	  && ! forward_branch_p (insn))
5222 	length += 4;
5223       else if (GET_CODE (pat) == PARALLEL
5224 	       && get_attr_type (insn) == TYPE_PARALLEL_BRANCH
5225 	       && length == 4)
5226 	length += 4;
5227       /* Adjust dbra insn with short backwards conditional branch with
5228 	 unfilled delay slot -- only for case where counter is in a
5229 	 general register register.  */
5230       else if (GET_CODE (pat) == PARALLEL
5231 	       && GET_CODE (XVECEXP (pat, 0, 1)) == SET
5232 	       && GET_CODE (XEXP (XVECEXP (pat, 0, 1), 0)) == REG
5233  	       && ! FP_REG_P (XEXP (XVECEXP (pat, 0, 1), 0))
5234 	       && length == 4
5235 	       && ! forward_branch_p (insn))
5236 	length += 4;
5237     }
5238   return length;
5239 }
5240 
5241 /* Implement the TARGET_PRINT_OPERAND_PUNCT_VALID_P hook.  */
5242 
5243 static bool
pa_print_operand_punct_valid_p(unsigned char code)5244 pa_print_operand_punct_valid_p (unsigned char code)
5245 {
5246   if (code == '@'
5247       || code == '#'
5248       || code == '*'
5249       || code == '^')
5250     return true;
5251 
5252   return false;
5253 }
5254 
5255 /* Print operand X (an rtx) in assembler syntax to file FILE.
5256    CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
5257    For `%' followed by punctuation, CODE is the punctuation and X is null.  */
5258 
5259 void
pa_print_operand(FILE * file,rtx x,int code)5260 pa_print_operand (FILE *file, rtx x, int code)
5261 {
5262   switch (code)
5263     {
5264     case '#':
5265       /* Output a 'nop' if there's nothing for the delay slot.  */
5266       if (dbr_sequence_length () == 0)
5267 	fputs ("\n\tnop", file);
5268       return;
5269     case '*':
5270       /* Output a nullification completer if there's nothing for the */
5271       /* delay slot or nullification is requested.  */
5272       if (dbr_sequence_length () == 0 ||
5273 	  (final_sequence &&
5274 	   INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))))
5275         fputs (",n", file);
5276       return;
5277     case 'R':
5278       /* Print out the second register name of a register pair.
5279 	 I.e., R (6) => 7.  */
5280       fputs (reg_names[REGNO (x) + 1], file);
5281       return;
5282     case 'r':
5283       /* A register or zero.  */
5284       if (x == const0_rtx
5285 	  || (x == CONST0_RTX (DFmode))
5286 	  || (x == CONST0_RTX (SFmode)))
5287 	{
5288 	  fputs ("%r0", file);
5289 	  return;
5290 	}
5291       else
5292 	break;
5293     case 'f':
5294       /* A register or zero (floating point).  */
5295       if (x == const0_rtx
5296 	  || (x == CONST0_RTX (DFmode))
5297 	  || (x == CONST0_RTX (SFmode)))
5298 	{
5299 	  fputs ("%fr0", file);
5300 	  return;
5301 	}
5302       else
5303 	break;
5304     case 'A':
5305       {
5306 	rtx xoperands[2];
5307 
5308 	xoperands[0] = XEXP (XEXP (x, 0), 0);
5309 	xoperands[1] = XVECEXP (XEXP (XEXP (x, 0), 1), 0, 0);
5310 	pa_output_global_address (file, xoperands[1], 0);
5311         fprintf (file, "(%s)", reg_names [REGNO (xoperands[0])]);
5312 	return;
5313       }
5314 
5315     case 'C':			/* Plain (C)ondition */
5316     case 'X':
5317       switch (GET_CODE (x))
5318 	{
5319 	case EQ:
5320 	  fputs ("=", file);  break;
5321 	case NE:
5322 	  fputs ("<>", file);  break;
5323 	case GT:
5324 	  fputs (">", file);  break;
5325 	case GE:
5326 	  fputs (">=", file);  break;
5327 	case GEU:
5328 	  fputs (">>=", file);  break;
5329 	case GTU:
5330 	  fputs (">>", file);  break;
5331 	case LT:
5332 	  fputs ("<", file);  break;
5333 	case LE:
5334 	  fputs ("<=", file);  break;
5335 	case LEU:
5336 	  fputs ("<<=", file);  break;
5337 	case LTU:
5338 	  fputs ("<<", file);  break;
5339 	default:
5340 	  gcc_unreachable ();
5341 	}
5342       return;
5343     case 'N':			/* Condition, (N)egated */
5344       switch (GET_CODE (x))
5345 	{
5346 	case EQ:
5347 	  fputs ("<>", file);  break;
5348 	case NE:
5349 	  fputs ("=", file);  break;
5350 	case GT:
5351 	  fputs ("<=", file);  break;
5352 	case GE:
5353 	  fputs ("<", file);  break;
5354 	case GEU:
5355 	  fputs ("<<", file);  break;
5356 	case GTU:
5357 	  fputs ("<<=", file);  break;
5358 	case LT:
5359 	  fputs (">=", file);  break;
5360 	case LE:
5361 	  fputs (">", file);  break;
5362 	case LEU:
5363 	  fputs (">>", file);  break;
5364 	case LTU:
5365 	  fputs (">>=", file);  break;
5366 	default:
5367 	  gcc_unreachable ();
5368 	}
5369       return;
5370     /* For floating point comparisons.  Note that the output
5371        predicates are the complement of the desired mode.  The
5372        conditions for GT, GE, LT, LE and LTGT cause an invalid
5373        operation exception if the result is unordered and this
5374        exception is enabled in the floating-point status register.  */
5375     case 'Y':
5376       switch (GET_CODE (x))
5377 	{
5378 	case EQ:
5379 	  fputs ("!=", file);  break;
5380 	case NE:
5381 	  fputs ("=", file);  break;
5382 	case GT:
5383 	  fputs ("!>", file);  break;
5384 	case GE:
5385 	  fputs ("!>=", file);  break;
5386 	case LT:
5387 	  fputs ("!<", file);  break;
5388 	case LE:
5389 	  fputs ("!<=", file);  break;
5390 	case LTGT:
5391 	  fputs ("!<>", file);  break;
5392 	case UNLE:
5393 	  fputs ("!?<=", file);  break;
5394 	case UNLT:
5395 	  fputs ("!?<", file);  break;
5396 	case UNGE:
5397 	  fputs ("!?>=", file);  break;
5398 	case UNGT:
5399 	  fputs ("!?>", file);  break;
5400 	case UNEQ:
5401 	  fputs ("!?=", file);  break;
5402 	case UNORDERED:
5403 	  fputs ("!?", file);  break;
5404 	case ORDERED:
5405 	  fputs ("?", file);  break;
5406 	default:
5407 	  gcc_unreachable ();
5408 	}
5409       return;
5410     case 'S':			/* Condition, operands are (S)wapped.  */
5411       switch (GET_CODE (x))
5412 	{
5413 	case EQ:
5414 	  fputs ("=", file);  break;
5415 	case NE:
5416 	  fputs ("<>", file);  break;
5417 	case GT:
5418 	  fputs ("<", file);  break;
5419 	case GE:
5420 	  fputs ("<=", file);  break;
5421 	case GEU:
5422 	  fputs ("<<=", file);  break;
5423 	case GTU:
5424 	  fputs ("<<", file);  break;
5425 	case LT:
5426 	  fputs (">", file);  break;
5427 	case LE:
5428 	  fputs (">=", file);  break;
5429 	case LEU:
5430 	  fputs (">>=", file);  break;
5431 	case LTU:
5432 	  fputs (">>", file);  break;
5433 	default:
5434 	  gcc_unreachable ();
5435 	}
5436       return;
5437     case 'B':			/* Condition, (B)oth swapped and negate.  */
5438       switch (GET_CODE (x))
5439 	{
5440 	case EQ:
5441 	  fputs ("<>", file);  break;
5442 	case NE:
5443 	  fputs ("=", file);  break;
5444 	case GT:
5445 	  fputs (">=", file);  break;
5446 	case GE:
5447 	  fputs (">", file);  break;
5448 	case GEU:
5449 	  fputs (">>", file);  break;
5450 	case GTU:
5451 	  fputs (">>=", file);  break;
5452 	case LT:
5453 	  fputs ("<=", file);  break;
5454 	case LE:
5455 	  fputs ("<", file);  break;
5456 	case LEU:
5457 	  fputs ("<<", file);  break;
5458 	case LTU:
5459 	  fputs ("<<=", file);  break;
5460 	default:
5461 	  gcc_unreachable ();
5462 	}
5463       return;
5464     case 'k':
5465       gcc_assert (GET_CODE (x) == CONST_INT);
5466       fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~INTVAL (x));
5467       return;
5468     case 'Q':
5469       gcc_assert (GET_CODE (x) == CONST_INT);
5470       fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - (INTVAL (x) & 63));
5471       return;
5472     case 'L':
5473       gcc_assert (GET_CODE (x) == CONST_INT);
5474       fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - (INTVAL (x) & 31));
5475       return;
5476     case 'o':
5477       gcc_assert (GET_CODE (x) == CONST_INT
5478 		  && (INTVAL (x) == 1 || INTVAL (x) == 2 || INTVAL (x) == 3));
5479       fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
5480       return;
5481     case 'O':
5482       gcc_assert (GET_CODE (x) == CONST_INT && exact_log2 (INTVAL (x)) >= 0);
5483       fprintf (file, "%d", exact_log2 (INTVAL (x)));
5484       return;
5485     case 'p':
5486       gcc_assert (GET_CODE (x) == CONST_INT);
5487       fprintf (file, HOST_WIDE_INT_PRINT_DEC, 63 - (INTVAL (x) & 63));
5488       return;
5489     case 'P':
5490       gcc_assert (GET_CODE (x) == CONST_INT);
5491       fprintf (file, HOST_WIDE_INT_PRINT_DEC, 31 - (INTVAL (x) & 31));
5492       return;
5493     case 'I':
5494       if (GET_CODE (x) == CONST_INT)
5495 	fputs ("i", file);
5496       return;
5497     case 'M':
5498     case 'F':
5499       switch (GET_CODE (XEXP (x, 0)))
5500 	{
5501 	case PRE_DEC:
5502 	case PRE_INC:
5503 	  if (ASSEMBLER_DIALECT == 0)
5504 	    fputs ("s,mb", file);
5505 	  else
5506 	    fputs (",mb", file);
5507 	  break;
5508 	case POST_DEC:
5509 	case POST_INC:
5510 	  if (ASSEMBLER_DIALECT == 0)
5511 	    fputs ("s,ma", file);
5512 	  else
5513 	    fputs (",ma", file);
5514 	  break;
5515 	case PLUS:
5516 	  if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
5517 	      && GET_CODE (XEXP (XEXP (x, 0), 1)) == REG)
5518 	    {
5519 	      if (ASSEMBLER_DIALECT == 0)
5520 		fputs ("x", file);
5521 	    }
5522 	  else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
5523 		   || GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
5524 	    {
5525 	      if (ASSEMBLER_DIALECT == 0)
5526 		fputs ("x,s", file);
5527 	      else
5528 		fputs (",s", file);
5529 	    }
5530 	  else if (code == 'F' && ASSEMBLER_DIALECT == 0)
5531 	    fputs ("s", file);
5532 	  break;
5533 	default:
5534 	  if (code == 'F' && ASSEMBLER_DIALECT == 0)
5535 	    fputs ("s", file);
5536 	  break;
5537 	}
5538       return;
5539     case 'G':
5540       pa_output_global_address (file, x, 0);
5541       return;
5542     case 'H':
5543       pa_output_global_address (file, x, 1);
5544       return;
5545     case 0:			/* Don't do anything special */
5546       break;
5547     case 'Z':
5548       {
5549 	unsigned op[3];
5550 	compute_zdepwi_operands (INTVAL (x), op);
5551 	fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
5552 	return;
5553       }
5554     case 'z':
5555       {
5556 	unsigned op[3];
5557 	compute_zdepdi_operands (INTVAL (x), op);
5558 	fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
5559 	return;
5560       }
5561     case 'c':
5562       /* We can get here from a .vtable_inherit due to our
5563 	 CONSTANT_ADDRESS_P rejecting perfectly good constant
5564 	 addresses.  */
5565       break;
5566     default:
5567       gcc_unreachable ();
5568     }
5569   if (GET_CODE (x) == REG)
5570     {
5571       fputs (reg_names [REGNO (x)], file);
5572       if (TARGET_64BIT && FP_REG_P (x) && GET_MODE_SIZE (GET_MODE (x)) <= 4)
5573 	{
5574 	  fputs ("R", file);
5575 	  return;
5576 	}
5577       if (FP_REG_P (x)
5578 	  && GET_MODE_SIZE (GET_MODE (x)) <= 4
5579 	  && (REGNO (x) & 1) == 0)
5580 	fputs ("L", file);
5581     }
5582   else if (GET_CODE (x) == MEM)
5583     {
5584       int size = GET_MODE_SIZE (GET_MODE (x));
5585       rtx base = NULL_RTX;
5586       switch (GET_CODE (XEXP (x, 0)))
5587 	{
5588 	case PRE_DEC:
5589 	case POST_DEC:
5590           base = XEXP (XEXP (x, 0), 0);
5591 	  fprintf (file, "-%d(%s)", size, reg_names [REGNO (base)]);
5592 	  break;
5593 	case PRE_INC:
5594 	case POST_INC:
5595           base = XEXP (XEXP (x, 0), 0);
5596 	  fprintf (file, "%d(%s)", size, reg_names [REGNO (base)]);
5597 	  break;
5598 	case PLUS:
5599 	  if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT)
5600 	    fprintf (file, "%s(%s)",
5601 		     reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 0), 0))],
5602 		     reg_names [REGNO (XEXP (XEXP (x, 0), 1))]);
5603 	  else if (GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
5604 	    fprintf (file, "%s(%s)",
5605 		     reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 1), 0))],
5606 		     reg_names [REGNO (XEXP (XEXP (x, 0), 0))]);
5607 	  else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
5608 		   && GET_CODE (XEXP (XEXP (x, 0), 1)) == REG)
5609 	    {
5610 	      /* Because the REG_POINTER flag can get lost during reload,
5611 		 pa_legitimate_address_p canonicalizes the order of the
5612 		 index and base registers in the combined move patterns.  */
5613 	      rtx base = XEXP (XEXP (x, 0), 1);
5614 	      rtx index = XEXP (XEXP (x, 0), 0);
5615 
5616 	      fprintf (file, "%s(%s)",
5617 		       reg_names [REGNO (index)], reg_names [REGNO (base)]);
5618 	    }
5619 	  else
5620 	    output_address (GET_MODE (x), XEXP (x, 0));
5621 	  break;
5622 	default:
5623 	  output_address (GET_MODE (x), XEXP (x, 0));
5624 	  break;
5625 	}
5626     }
5627   else
5628     output_addr_const (file, x);
5629 }
5630 
5631 /* output a SYMBOL_REF or a CONST expression involving a SYMBOL_REF.  */
5632 
5633 void
pa_output_global_address(FILE * file,rtx x,int round_constant)5634 pa_output_global_address (FILE *file, rtx x, int round_constant)
5635 {
5636 
5637   /* Imagine  (high (const (plus ...))).  */
5638   if (GET_CODE (x) == HIGH)
5639     x = XEXP (x, 0);
5640 
5641   if (GET_CODE (x) == SYMBOL_REF && read_only_operand (x, VOIDmode))
5642     output_addr_const (file, x);
5643   else if (GET_CODE (x) == SYMBOL_REF && !flag_pic)
5644     {
5645       output_addr_const (file, x);
5646       fputs ("-$global$", file);
5647     }
5648   else if (GET_CODE (x) == CONST)
5649     {
5650       const char *sep = "";
5651       int offset = 0;		/* assembler wants -$global$ at end */
5652       rtx base = NULL_RTX;
5653 
5654       switch (GET_CODE (XEXP (XEXP (x, 0), 0)))
5655 	{
5656 	case LABEL_REF:
5657 	case SYMBOL_REF:
5658 	  base = XEXP (XEXP (x, 0), 0);
5659 	  output_addr_const (file, base);
5660 	  break;
5661 	case CONST_INT:
5662 	  offset = INTVAL (XEXP (XEXP (x, 0), 0));
5663 	  break;
5664 	default:
5665 	  gcc_unreachable ();
5666 	}
5667 
5668       switch (GET_CODE (XEXP (XEXP (x, 0), 1)))
5669 	{
5670 	case LABEL_REF:
5671 	case SYMBOL_REF:
5672 	  base = XEXP (XEXP (x, 0), 1);
5673 	  output_addr_const (file, base);
5674 	  break;
5675 	case CONST_INT:
5676 	  offset = INTVAL (XEXP (XEXP (x, 0), 1));
5677 	  break;
5678 	default:
5679 	  gcc_unreachable ();
5680 	}
5681 
5682       /* How bogus.  The compiler is apparently responsible for
5683 	 rounding the constant if it uses an LR field selector.
5684 
5685 	 The linker and/or assembler seem a better place since
5686 	 they have to do this kind of thing already.
5687 
5688 	 If we fail to do this, HP's optimizing linker may eliminate
5689 	 an addil, but not update the ldw/stw/ldo instruction that
5690 	 uses the result of the addil.  */
5691       if (round_constant)
5692 	offset = ((offset + 0x1000) & ~0x1fff);
5693 
5694       switch (GET_CODE (XEXP (x, 0)))
5695 	{
5696 	case PLUS:
5697 	  if (offset < 0)
5698 	    {
5699 	      offset = -offset;
5700 	      sep = "-";
5701 	    }
5702 	  else
5703 	    sep = "+";
5704 	  break;
5705 
5706 	case MINUS:
5707 	  gcc_assert (GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF);
5708 	  sep = "-";
5709 	  break;
5710 
5711 	default:
5712 	  gcc_unreachable ();
5713 	}
5714 
5715       if (!read_only_operand (base, VOIDmode) && !flag_pic)
5716 	fputs ("-$global$", file);
5717       if (offset)
5718 	fprintf (file, "%s%d", sep, offset);
5719     }
5720   else
5721     output_addr_const (file, x);
5722 }
5723 
5724 /* Output boilerplate text to appear at the beginning of the file.
5725    There are several possible versions.  */
5726 #define aputs(x) fputs(x, asm_out_file)
5727 static inline void
pa_file_start_level(void)5728 pa_file_start_level (void)
5729 {
5730   if (TARGET_64BIT)
5731     aputs ("\t.LEVEL 2.0w\n");
5732   else if (TARGET_PA_20)
5733     aputs ("\t.LEVEL 2.0\n");
5734   else if (TARGET_PA_11)
5735     aputs ("\t.LEVEL 1.1\n");
5736   else
5737     aputs ("\t.LEVEL 1.0\n");
5738 }
5739 
5740 static inline void
pa_file_start_space(int sortspace)5741 pa_file_start_space (int sortspace)
5742 {
5743   aputs ("\t.SPACE $PRIVATE$");
5744   if (sortspace)
5745     aputs (",SORT=16");
5746   aputs ("\n\t.SUBSPA $DATA$,QUAD=1,ALIGN=8,ACCESS=31");
5747   if (flag_tm)
5748     aputs ("\n\t.SUBSPA $TM_CLONE_TABLE$,QUAD=1,ALIGN=8,ACCESS=31");
5749   aputs ("\n\t.SUBSPA $BSS$,QUAD=1,ALIGN=8,ACCESS=31,ZERO,SORT=82"
5750 	 "\n\t.SPACE $TEXT$");
5751   if (sortspace)
5752     aputs (",SORT=8");
5753   aputs ("\n\t.SUBSPA $LIT$,QUAD=0,ALIGN=8,ACCESS=44"
5754 	 "\n\t.SUBSPA $CODE$,QUAD=0,ALIGN=8,ACCESS=44,CODE_ONLY\n");
5755 }
5756 
5757 static inline void
pa_file_start_file(int want_version)5758 pa_file_start_file (int want_version)
5759 {
5760   if (write_symbols != NO_DEBUG)
5761     {
5762       output_file_directive (asm_out_file, main_input_filename);
5763       if (want_version)
5764 	aputs ("\t.version\t\"01.01\"\n");
5765     }
5766 }
5767 
5768 static inline void
pa_file_start_mcount(const char * aswhat)5769 pa_file_start_mcount (const char *aswhat)
5770 {
5771   if (profile_flag)
5772     fprintf (asm_out_file, "\t.IMPORT _mcount,%s\n", aswhat);
5773 }
5774 
5775 static void
pa_elf_file_start(void)5776 pa_elf_file_start (void)
5777 {
5778   pa_file_start_level ();
5779   pa_file_start_mcount ("ENTRY");
5780   pa_file_start_file (0);
5781 }
5782 
5783 static void
pa_som_file_start(void)5784 pa_som_file_start (void)
5785 {
5786   pa_file_start_level ();
5787   pa_file_start_space (0);
5788   aputs ("\t.IMPORT $global$,DATA\n"
5789          "\t.IMPORT $$dyncall,MILLICODE\n");
5790   pa_file_start_mcount ("CODE");
5791   pa_file_start_file (0);
5792 }
5793 
5794 static void
pa_linux_file_start(void)5795 pa_linux_file_start (void)
5796 {
5797   pa_file_start_file (1);
5798   pa_file_start_level ();
5799   pa_file_start_mcount ("CODE");
5800 }
5801 
5802 static void
pa_hpux64_gas_file_start(void)5803 pa_hpux64_gas_file_start (void)
5804 {
5805   pa_file_start_level ();
5806 #ifdef ASM_OUTPUT_TYPE_DIRECTIVE
5807   if (profile_flag)
5808     ASM_OUTPUT_TYPE_DIRECTIVE (asm_out_file, "_mcount", "function");
5809 #endif
5810   pa_file_start_file (1);
5811 }
5812 
5813 static void
pa_hpux64_hpas_file_start(void)5814 pa_hpux64_hpas_file_start (void)
5815 {
5816   pa_file_start_level ();
5817   pa_file_start_space (1);
5818   pa_file_start_mcount ("CODE");
5819   pa_file_start_file (0);
5820 }
5821 #undef aputs
5822 
5823 /* Search the deferred plabel list for SYMBOL and return its internal
5824    label.  If an entry for SYMBOL is not found, a new entry is created.  */
5825 
5826 rtx
pa_get_deferred_plabel(rtx symbol)5827 pa_get_deferred_plabel (rtx symbol)
5828 {
5829   const char *fname = XSTR (symbol, 0);
5830   size_t i;
5831 
5832   /* See if we have already put this function on the list of deferred
5833      plabels.  This list is generally small, so a liner search is not
5834      too ugly.  If it proves too slow replace it with something faster.  */
5835   for (i = 0; i < n_deferred_plabels; i++)
5836     if (strcmp (fname, XSTR (deferred_plabels[i].symbol, 0)) == 0)
5837       break;
5838 
5839   /* If the deferred plabel list is empty, or this entry was not found
5840      on the list, create a new entry on the list.  */
5841   if (deferred_plabels == NULL || i == n_deferred_plabels)
5842     {
5843       tree id;
5844 
5845       if (deferred_plabels == 0)
5846 	deferred_plabels =  ggc_alloc<deferred_plabel> ();
5847       else
5848         deferred_plabels = GGC_RESIZEVEC (struct deferred_plabel,
5849                                           deferred_plabels,
5850                                           n_deferred_plabels + 1);
5851 
5852       i = n_deferred_plabels++;
5853       deferred_plabels[i].internal_label = gen_label_rtx ();
5854       deferred_plabels[i].symbol = symbol;
5855 
5856       /* Gross.  We have just implicitly taken the address of this
5857 	 function.  Mark it in the same manner as assemble_name.  */
5858       id = maybe_get_identifier (targetm.strip_name_encoding (fname));
5859       if (id)
5860 	mark_referenced (id);
5861     }
5862 
5863   return deferred_plabels[i].internal_label;
5864 }
5865 
5866 static void
output_deferred_plabels(void)5867 output_deferred_plabels (void)
5868 {
5869   size_t i;
5870 
5871   /* If we have some deferred plabels, then we need to switch into the
5872      data or readonly data section, and align it to a 4 byte boundary
5873      before outputting the deferred plabels.  */
5874   if (n_deferred_plabels)
5875     {
5876       switch_to_section (flag_pic ? data_section : readonly_data_section);
5877       ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
5878     }
5879 
5880   /* Now output the deferred plabels.  */
5881   for (i = 0; i < n_deferred_plabels; i++)
5882     {
5883       targetm.asm_out.internal_label (asm_out_file, "L",
5884 		 CODE_LABEL_NUMBER (deferred_plabels[i].internal_label));
5885       assemble_integer (deferred_plabels[i].symbol,
5886 			TARGET_64BIT ? 8 : 4, TARGET_64BIT ? 64 : 32, 1);
5887     }
5888 }
5889 
5890 /* Initialize optabs to point to emulation routines.  */
5891 
5892 static void
pa_init_libfuncs(void)5893 pa_init_libfuncs (void)
5894 {
5895   if (HPUX_LONG_DOUBLE_LIBRARY)
5896     {
5897       set_optab_libfunc (add_optab, TFmode, "_U_Qfadd");
5898       set_optab_libfunc (sub_optab, TFmode, "_U_Qfsub");
5899       set_optab_libfunc (smul_optab, TFmode, "_U_Qfmpy");
5900       set_optab_libfunc (sdiv_optab, TFmode, "_U_Qfdiv");
5901       set_optab_libfunc (smin_optab, TFmode, "_U_Qmin");
5902       set_optab_libfunc (smax_optab, TFmode, "_U_Qfmax");
5903       set_optab_libfunc (sqrt_optab, TFmode, "_U_Qfsqrt");
5904       set_optab_libfunc (abs_optab, TFmode, "_U_Qfabs");
5905       set_optab_libfunc (neg_optab, TFmode, "_U_Qfneg");
5906 
5907       set_optab_libfunc (eq_optab, TFmode, "_U_Qfeq");
5908       set_optab_libfunc (ne_optab, TFmode, "_U_Qfne");
5909       set_optab_libfunc (gt_optab, TFmode, "_U_Qfgt");
5910       set_optab_libfunc (ge_optab, TFmode, "_U_Qfge");
5911       set_optab_libfunc (lt_optab, TFmode, "_U_Qflt");
5912       set_optab_libfunc (le_optab, TFmode, "_U_Qfle");
5913       set_optab_libfunc (unord_optab, TFmode, "_U_Qfunord");
5914 
5915       set_conv_libfunc (sext_optab, TFmode, SFmode, "_U_Qfcnvff_sgl_to_quad");
5916       set_conv_libfunc (sext_optab, TFmode, DFmode, "_U_Qfcnvff_dbl_to_quad");
5917       set_conv_libfunc (trunc_optab, SFmode, TFmode, "_U_Qfcnvff_quad_to_sgl");
5918       set_conv_libfunc (trunc_optab, DFmode, TFmode, "_U_Qfcnvff_quad_to_dbl");
5919 
5920       set_conv_libfunc (sfix_optab, SImode, TFmode,
5921 			TARGET_64BIT ? "__U_Qfcnvfxt_quad_to_sgl"
5922 				     : "_U_Qfcnvfxt_quad_to_sgl");
5923       set_conv_libfunc (sfix_optab, DImode, TFmode,
5924 			"_U_Qfcnvfxt_quad_to_dbl");
5925       set_conv_libfunc (ufix_optab, SImode, TFmode,
5926 			"_U_Qfcnvfxt_quad_to_usgl");
5927       set_conv_libfunc (ufix_optab, DImode, TFmode,
5928 			"_U_Qfcnvfxt_quad_to_udbl");
5929 
5930       set_conv_libfunc (sfloat_optab, TFmode, SImode,
5931 			"_U_Qfcnvxf_sgl_to_quad");
5932       set_conv_libfunc (sfloat_optab, TFmode, DImode,
5933 			"_U_Qfcnvxf_dbl_to_quad");
5934       set_conv_libfunc (ufloat_optab, TFmode, SImode,
5935 			"_U_Qfcnvxf_usgl_to_quad");
5936       set_conv_libfunc (ufloat_optab, TFmode, DImode,
5937 			"_U_Qfcnvxf_udbl_to_quad");
5938     }
5939 
5940   if (TARGET_SYNC_LIBCALL)
5941     init_sync_libfuncs (8);
5942 }
5943 
5944 /* HP's millicode routines mean something special to the assembler.
5945    Keep track of which ones we have used.  */
5946 
5947 enum millicodes { remI, remU, divI, divU, mulI, end1000 };
5948 static void import_milli (enum millicodes);
5949 static char imported[(int) end1000];
5950 static const char * const milli_names[] = {"remI", "remU", "divI", "divU", "mulI"};
5951 static const char import_string[] = ".IMPORT $$....,MILLICODE";
5952 #define MILLI_START 10
5953 
5954 static void
import_milli(enum millicodes code)5955 import_milli (enum millicodes code)
5956 {
5957   char str[sizeof (import_string)];
5958 
5959   if (!imported[(int) code])
5960     {
5961       imported[(int) code] = 1;
5962       strcpy (str, import_string);
5963       memcpy (str + MILLI_START, milli_names[(int) code], 4);
5964       output_asm_insn (str, 0);
5965     }
5966 }
5967 
5968 /* The register constraints have put the operands and return value in
5969    the proper registers.  */
5970 
5971 const char *
pa_output_mul_insn(int unsignedp ATTRIBUTE_UNUSED,rtx_insn * insn)5972 pa_output_mul_insn (int unsignedp ATTRIBUTE_UNUSED, rtx_insn *insn)
5973 {
5974   import_milli (mulI);
5975   return pa_output_millicode_call (insn, gen_rtx_SYMBOL_REF (Pmode, "$$mulI"));
5976 }
5977 
5978 /* Emit the rtl for doing a division by a constant.  */
5979 
5980 /* Do magic division millicodes exist for this value? */
5981 const int pa_magic_milli[]= {0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1};
5982 
5983 /* We'll use an array to keep track of the magic millicodes and
5984    whether or not we've used them already. [n][0] is signed, [n][1] is
5985    unsigned.  */
5986 
5987 static int div_milli[16][2];
5988 
5989 int
pa_emit_hpdiv_const(rtx * operands,int unsignedp)5990 pa_emit_hpdiv_const (rtx *operands, int unsignedp)
5991 {
5992   if (GET_CODE (operands[2]) == CONST_INT
5993       && INTVAL (operands[2]) > 0
5994       && INTVAL (operands[2]) < 16
5995       && pa_magic_milli[INTVAL (operands[2])])
5996     {
5997       rtx ret = gen_rtx_REG (SImode, TARGET_64BIT ? 2 : 31);
5998 
5999       emit_move_insn (gen_rtx_REG (SImode, 26), operands[1]);
6000       emit
6001 	(gen_rtx_PARALLEL
6002 	 (VOIDmode,
6003 	  gen_rtvec (6, gen_rtx_SET (gen_rtx_REG (SImode, 29),
6004 				     gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
6005 						     SImode,
6006 						     gen_rtx_REG (SImode, 26),
6007 						     operands[2])),
6008 		     gen_rtx_CLOBBER (VOIDmode, operands[4]),
6009 		     gen_rtx_CLOBBER (VOIDmode, operands[3]),
6010 		     gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 26)),
6011 		     gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 25)),
6012 		     gen_rtx_CLOBBER (VOIDmode, ret))));
6013       emit_move_insn (operands[0], gen_rtx_REG (SImode, 29));
6014       return 1;
6015     }
6016   return 0;
6017 }
6018 
6019 const char *
pa_output_div_insn(rtx * operands,int unsignedp,rtx_insn * insn)6020 pa_output_div_insn (rtx *operands, int unsignedp, rtx_insn *insn)
6021 {
6022   int divisor;
6023 
6024   /* If the divisor is a constant, try to use one of the special
6025      opcodes .*/
6026   if (GET_CODE (operands[0]) == CONST_INT)
6027     {
6028       static char buf[100];
6029       divisor = INTVAL (operands[0]);
6030       if (!div_milli[divisor][unsignedp])
6031 	{
6032 	  div_milli[divisor][unsignedp] = 1;
6033 	  if (unsignedp)
6034 	    output_asm_insn (".IMPORT $$divU_%0,MILLICODE", operands);
6035 	  else
6036 	    output_asm_insn (".IMPORT $$divI_%0,MILLICODE", operands);
6037 	}
6038       if (unsignedp)
6039 	{
6040 	  sprintf (buf, "$$divU_" HOST_WIDE_INT_PRINT_DEC,
6041 		   INTVAL (operands[0]));
6042 	  return pa_output_millicode_call (insn,
6043 					   gen_rtx_SYMBOL_REF (SImode, buf));
6044 	}
6045       else
6046 	{
6047 	  sprintf (buf, "$$divI_" HOST_WIDE_INT_PRINT_DEC,
6048 		   INTVAL (operands[0]));
6049 	  return pa_output_millicode_call (insn,
6050 					   gen_rtx_SYMBOL_REF (SImode, buf));
6051 	}
6052     }
6053   /* Divisor isn't a special constant.  */
6054   else
6055     {
6056       if (unsignedp)
6057 	{
6058 	  import_milli (divU);
6059 	  return pa_output_millicode_call (insn,
6060 					gen_rtx_SYMBOL_REF (SImode, "$$divU"));
6061 	}
6062       else
6063 	{
6064 	  import_milli (divI);
6065 	  return pa_output_millicode_call (insn,
6066 					gen_rtx_SYMBOL_REF (SImode, "$$divI"));
6067 	}
6068     }
6069 }
6070 
6071 /* Output a $$rem millicode to do mod.  */
6072 
6073 const char *
pa_output_mod_insn(int unsignedp,rtx_insn * insn)6074 pa_output_mod_insn (int unsignedp, rtx_insn *insn)
6075 {
6076   if (unsignedp)
6077     {
6078       import_milli (remU);
6079       return pa_output_millicode_call (insn,
6080 				       gen_rtx_SYMBOL_REF (SImode, "$$remU"));
6081     }
6082   else
6083     {
6084       import_milli (remI);
6085       return pa_output_millicode_call (insn,
6086 				       gen_rtx_SYMBOL_REF (SImode, "$$remI"));
6087     }
6088 }
6089 
6090 void
pa_output_arg_descriptor(rtx_insn * call_insn)6091 pa_output_arg_descriptor (rtx_insn *call_insn)
6092 {
6093   const char *arg_regs[4];
6094   machine_mode arg_mode;
6095   rtx link;
6096   int i, output_flag = 0;
6097   int regno;
6098 
6099   /* We neither need nor want argument location descriptors for the
6100      64bit runtime environment or the ELF32 environment.  */
6101   if (TARGET_64BIT || TARGET_ELF32)
6102     return;
6103 
6104   for (i = 0; i < 4; i++)
6105     arg_regs[i] = 0;
6106 
6107   /* Specify explicitly that no argument relocations should take place
6108      if using the portable runtime calling conventions.  */
6109   if (TARGET_PORTABLE_RUNTIME)
6110     {
6111       fputs ("\t.CALL ARGW0=NO,ARGW1=NO,ARGW2=NO,ARGW3=NO,RETVAL=NO\n",
6112 	     asm_out_file);
6113       return;
6114     }
6115 
6116   gcc_assert (CALL_P (call_insn));
6117   for (link = CALL_INSN_FUNCTION_USAGE (call_insn);
6118        link; link = XEXP (link, 1))
6119     {
6120       rtx use = XEXP (link, 0);
6121 
6122       if (! (GET_CODE (use) == USE
6123 	     && GET_CODE (XEXP (use, 0)) == REG
6124 	     && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
6125 	continue;
6126 
6127       arg_mode = GET_MODE (XEXP (use, 0));
6128       regno = REGNO (XEXP (use, 0));
6129       if (regno >= 23 && regno <= 26)
6130 	{
6131 	  arg_regs[26 - regno] = "GR";
6132 	  if (arg_mode == DImode)
6133 	    arg_regs[25 - regno] = "GR";
6134 	}
6135       else if (regno >= 32 && regno <= 39)
6136 	{
6137 	  if (arg_mode == SFmode)
6138 	    arg_regs[(regno - 32) / 2] = "FR";
6139 	  else
6140 	    {
6141 #ifndef HP_FP_ARG_DESCRIPTOR_REVERSED
6142 	      arg_regs[(regno - 34) / 2] = "FR";
6143 	      arg_regs[(regno - 34) / 2 + 1] = "FU";
6144 #else
6145 	      arg_regs[(regno - 34) / 2] = "FU";
6146 	      arg_regs[(regno - 34) / 2 + 1] = "FR";
6147 #endif
6148 	    }
6149 	}
6150     }
6151   fputs ("\t.CALL ", asm_out_file);
6152   for (i = 0; i < 4; i++)
6153     {
6154       if (arg_regs[i])
6155 	{
6156 	  if (output_flag++)
6157 	    fputc (',', asm_out_file);
6158 	  fprintf (asm_out_file, "ARGW%d=%s", i, arg_regs[i]);
6159 	}
6160     }
6161   fputc ('\n', asm_out_file);
6162 }
6163 
6164 /* Inform reload about cases where moving X with a mode MODE to or from
6165    a register in RCLASS requires an extra scratch or immediate register.
6166    Return the class needed for the immediate register.  */
6167 
6168 static reg_class_t
pa_secondary_reload(bool in_p,rtx x,reg_class_t rclass_i,machine_mode mode,secondary_reload_info * sri)6169 pa_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
6170 		     machine_mode mode, secondary_reload_info *sri)
6171 {
6172   int regno;
6173   enum reg_class rclass = (enum reg_class) rclass_i;
6174 
6175   /* Handle the easy stuff first.  */
6176   if (rclass == R1_REGS)
6177     return NO_REGS;
6178 
6179   if (REG_P (x))
6180     {
6181       regno = REGNO (x);
6182       if (rclass == BASE_REG_CLASS && regno < FIRST_PSEUDO_REGISTER)
6183 	return NO_REGS;
6184     }
6185   else
6186     regno = -1;
6187 
6188   /* If we have something like (mem (mem (...)), we can safely assume the
6189      inner MEM will end up in a general register after reloading, so there's
6190      no need for a secondary reload.  */
6191   if (GET_CODE (x) == MEM && GET_CODE (XEXP (x, 0)) == MEM)
6192     return NO_REGS;
6193 
6194   /* Trying to load a constant into a FP register during PIC code
6195      generation requires %r1 as a scratch register.  For float modes,
6196      the only legitimate constant is CONST0_RTX.  However, there are
6197      a few patterns that accept constant double operands.  */
6198   if (flag_pic
6199       && FP_REG_CLASS_P (rclass)
6200       && (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE))
6201     {
6202       switch (mode)
6203 	{
6204 	case E_SImode:
6205 	  sri->icode = CODE_FOR_reload_insi_r1;
6206 	  break;
6207 
6208 	case E_DImode:
6209 	  sri->icode = CODE_FOR_reload_indi_r1;
6210 	  break;
6211 
6212 	case E_SFmode:
6213 	  sri->icode = CODE_FOR_reload_insf_r1;
6214 	  break;
6215 
6216 	case E_DFmode:
6217 	  sri->icode = CODE_FOR_reload_indf_r1;
6218 	  break;
6219 
6220 	default:
6221 	  gcc_unreachable ();
6222 	}
6223       return NO_REGS;
6224     }
6225 
6226   /* Secondary reloads of symbolic expressions require %r1 as a scratch
6227      register when we're generating PIC code or when the operand isn't
6228      readonly.  */
6229   if (pa_symbolic_expression_p (x))
6230     {
6231       if (GET_CODE (x) == HIGH)
6232 	x = XEXP (x, 0);
6233 
6234       if (flag_pic || !read_only_operand (x, VOIDmode))
6235 	{
6236 	  switch (mode)
6237 	    {
6238 	    case E_SImode:
6239 	      sri->icode = CODE_FOR_reload_insi_r1;
6240 	      break;
6241 
6242 	    case E_DImode:
6243 	      sri->icode = CODE_FOR_reload_indi_r1;
6244 	      break;
6245 
6246 	    default:
6247 	      gcc_unreachable ();
6248 	    }
6249 	  return NO_REGS;
6250 	}
6251     }
6252 
6253   /* Profiling showed the PA port spends about 1.3% of its compilation
6254      time in true_regnum from calls inside pa_secondary_reload_class.  */
6255   if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
6256     regno = true_regnum (x);
6257 
6258   /* Handle reloads for floating point loads and stores.  */
6259   if ((regno >= FIRST_PSEUDO_REGISTER || regno == -1)
6260       && FP_REG_CLASS_P (rclass))
6261     {
6262       if (MEM_P (x))
6263 	{
6264 	  x = XEXP (x, 0);
6265 
6266 	  /* We don't need a secondary reload for indexed memory addresses.
6267 
6268 	     When INT14_OK_STRICT is true, it might appear that we could
6269 	     directly allow register indirect memory addresses.  However,
6270 	     this doesn't work because we don't support SUBREGs in
6271 	     floating-point register copies and reload doesn't tell us
6272 	     when it's going to use a SUBREG.  */
6273 	  if (IS_INDEX_ADDR_P (x))
6274 	    return NO_REGS;
6275 	}
6276 
6277       /* Request a secondary reload with a general scratch register
6278 	 for everything else.  ??? Could symbolic operands be handled
6279 	 directly when generating non-pic PA 2.0 code?  */
6280       sri->icode = (in_p
6281 		    ? direct_optab_handler (reload_in_optab, mode)
6282 		    : direct_optab_handler (reload_out_optab, mode));
6283       return NO_REGS;
6284     }
6285 
6286   /* A SAR<->FP register copy requires an intermediate general register
6287      and secondary memory.  We need a secondary reload with a general
6288      scratch register for spills.  */
6289   if (rclass == SHIFT_REGS)
6290     {
6291       /* Handle spill.  */
6292       if (regno >= FIRST_PSEUDO_REGISTER || regno < 0)
6293 	{
6294 	  sri->icode = (in_p
6295 			? direct_optab_handler (reload_in_optab, mode)
6296 			: direct_optab_handler (reload_out_optab, mode));
6297 	  return NO_REGS;
6298 	}
6299 
6300       /* Handle FP copy.  */
6301       if (FP_REG_CLASS_P (REGNO_REG_CLASS (regno)))
6302 	return GENERAL_REGS;
6303     }
6304 
6305   if (regno >= 0 && regno < FIRST_PSEUDO_REGISTER
6306       && REGNO_REG_CLASS (regno) == SHIFT_REGS
6307       && FP_REG_CLASS_P (rclass))
6308     return GENERAL_REGS;
6309 
6310   return NO_REGS;
6311 }
6312 
6313 /* Implement TARGET_SECONDARY_MEMORY_NEEDED.  */
6314 
6315 static bool
pa_secondary_memory_needed(machine_mode mode ATTRIBUTE_UNUSED,reg_class_t class1 ATTRIBUTE_UNUSED,reg_class_t class2 ATTRIBUTE_UNUSED)6316 pa_secondary_memory_needed (machine_mode mode ATTRIBUTE_UNUSED,
6317 			    reg_class_t class1 ATTRIBUTE_UNUSED,
6318 			    reg_class_t class2 ATTRIBUTE_UNUSED)
6319 {
6320 #ifdef PA_SECONDARY_MEMORY_NEEDED
6321   return PA_SECONDARY_MEMORY_NEEDED (mode, class1, class2);
6322 #else
6323   return false;
6324 #endif
6325 }
6326 
6327 /* Implement TARGET_EXTRA_LIVE_ON_ENTRY.  The argument pointer
6328    is only marked as live on entry by df-scan when it is a fixed
6329    register.  It isn't a fixed register in the 64-bit runtime,
6330    so we need to mark it here.  */
6331 
6332 static void
pa_extra_live_on_entry(bitmap regs)6333 pa_extra_live_on_entry (bitmap regs)
6334 {
6335   if (TARGET_64BIT)
6336     bitmap_set_bit (regs, ARG_POINTER_REGNUM);
6337 }
6338 
6339 /* Implement EH_RETURN_HANDLER_RTX.  The MEM needs to be volatile
6340    to prevent it from being deleted.  */
6341 
6342 rtx
pa_eh_return_handler_rtx(void)6343 pa_eh_return_handler_rtx (void)
6344 {
6345   rtx tmp;
6346 
6347   tmp = gen_rtx_PLUS (word_mode, hard_frame_pointer_rtx,
6348 		      TARGET_64BIT ? GEN_INT (-16) : GEN_INT (-20));
6349   tmp = gen_rtx_MEM (word_mode, tmp);
6350   tmp->volatil = 1;
6351   return tmp;
6352 }
6353 
6354 /* In the 32-bit runtime, arguments larger than eight bytes are passed
6355    by invisible reference.  As a GCC extension, we also pass anything
6356    with a zero or variable size by reference.
6357 
6358    The 64-bit runtime does not describe passing any types by invisible
6359    reference.  The internals of GCC can't currently handle passing
6360    empty structures, and zero or variable length arrays when they are
6361    not passed entirely on the stack or by reference.  Thus, as a GCC
6362    extension, we pass these types by reference.  The HP compiler doesn't
6363    support these types, so hopefully there shouldn't be any compatibility
6364    issues.  This may have to be revisited when HP releases a C99 compiler
6365    or updates the ABI.  */
6366 
6367 static bool
pa_pass_by_reference(cumulative_args_t,const function_arg_info & arg)6368 pa_pass_by_reference (cumulative_args_t, const function_arg_info &arg)
6369 {
6370   HOST_WIDE_INT size = arg.type_size_in_bytes ();
6371   if (TARGET_64BIT)
6372     return size <= 0;
6373   else
6374     return size <= 0 || size > 8;
6375 }
6376 
6377 /* Implement TARGET_FUNCTION_ARG_PADDING.  */
6378 
6379 static pad_direction
pa_function_arg_padding(machine_mode mode,const_tree type)6380 pa_function_arg_padding (machine_mode mode, const_tree type)
6381 {
6382   if (mode == BLKmode
6383       || (TARGET_64BIT
6384 	  && type
6385 	  && (AGGREGATE_TYPE_P (type)
6386 	      || TREE_CODE (type) == COMPLEX_TYPE
6387 	      || TREE_CODE (type) == VECTOR_TYPE)))
6388     {
6389       /* Return PAD_NONE if justification is not required.  */
6390       if (type
6391 	  && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
6392 	  && (int_size_in_bytes (type) * BITS_PER_UNIT) % PARM_BOUNDARY == 0)
6393 	return PAD_NONE;
6394 
6395       /* The directions set here are ignored when a BLKmode argument larger
6396 	 than a word is placed in a register.  Different code is used for
6397 	 the stack and registers.  This makes it difficult to have a
6398 	 consistent data representation for both the stack and registers.
6399 	 For both runtimes, the justification and padding for arguments on
6400 	 the stack and in registers should be identical.  */
6401       if (TARGET_64BIT)
6402 	/* The 64-bit runtime specifies left justification for aggregates.  */
6403 	return PAD_UPWARD;
6404       else
6405 	/* The 32-bit runtime architecture specifies right justification.
6406 	   When the argument is passed on the stack, the argument is padded
6407 	   with garbage on the left.  The HP compiler pads with zeros.  */
6408 	return PAD_DOWNWARD;
6409     }
6410 
6411   if (GET_MODE_BITSIZE (mode) < PARM_BOUNDARY)
6412     return PAD_DOWNWARD;
6413   else
6414     return PAD_NONE;
6415 }
6416 
6417 
6418 /* Do what is necessary for `va_start'.  We look at the current function
6419    to determine if stdargs or varargs is used and fill in an initial
6420    va_list.  A pointer to this constructor is returned.  */
6421 
6422 static rtx
hppa_builtin_saveregs(void)6423 hppa_builtin_saveregs (void)
6424 {
6425   rtx offset, dest;
6426   tree fntype = TREE_TYPE (current_function_decl);
6427   int argadj = ((!stdarg_p (fntype))
6428 		? UNITS_PER_WORD : 0);
6429 
6430   if (argadj)
6431     offset = plus_constant (Pmode, crtl->args.arg_offset_rtx, argadj);
6432   else
6433     offset = crtl->args.arg_offset_rtx;
6434 
6435   if (TARGET_64BIT)
6436     {
6437       int i, off;
6438 
6439       /* Adjust for varargs/stdarg differences.  */
6440       if (argadj)
6441 	offset = plus_constant (Pmode, crtl->args.arg_offset_rtx, -argadj);
6442       else
6443 	offset = crtl->args.arg_offset_rtx;
6444 
6445       /* We need to save %r26 .. %r19 inclusive starting at offset -64
6446 	 from the incoming arg pointer and growing to larger addresses.  */
6447       for (i = 26, off = -64; i >= 19; i--, off += 8)
6448 	emit_move_insn (gen_rtx_MEM (word_mode,
6449 				     plus_constant (Pmode,
6450 						    arg_pointer_rtx, off)),
6451 			gen_rtx_REG (word_mode, i));
6452 
6453       /* The incoming args pointer points just beyond the flushback area;
6454 	 normally this is not a serious concern.  However, when we are doing
6455 	 varargs/stdargs we want to make the arg pointer point to the start
6456 	 of the incoming argument area.  */
6457       emit_move_insn (virtual_incoming_args_rtx,
6458 		      plus_constant (Pmode, arg_pointer_rtx, -64));
6459 
6460       /* Now return a pointer to the first anonymous argument.  */
6461       return copy_to_reg (expand_binop (Pmode, add_optab,
6462 					virtual_incoming_args_rtx,
6463 					offset, 0, 0, OPTAB_LIB_WIDEN));
6464     }
6465 
6466   /* Store general registers on the stack.  */
6467   dest = gen_rtx_MEM (BLKmode,
6468 		      plus_constant (Pmode, crtl->args.internal_arg_pointer,
6469 				     -16));
6470   set_mem_alias_set (dest, get_varargs_alias_set ());
6471   set_mem_align (dest, BITS_PER_WORD);
6472   move_block_from_reg (23, dest, 4);
6473 
6474   /* move_block_from_reg will emit code to store the argument registers
6475      individually as scalar stores.
6476 
6477      However, other insns may later load from the same addresses for
6478      a structure load (passing a struct to a varargs routine).
6479 
6480      The alias code assumes that such aliasing can never happen, so we
6481      have to keep memory referencing insns from moving up beyond the
6482      last argument register store.  So we emit a blockage insn here.  */
6483   emit_insn (gen_blockage ());
6484 
6485   return copy_to_reg (expand_binop (Pmode, add_optab,
6486 				    crtl->args.internal_arg_pointer,
6487 				    offset, 0, 0, OPTAB_LIB_WIDEN));
6488 }
6489 
6490 static void
hppa_va_start(tree valist,rtx nextarg)6491 hppa_va_start (tree valist, rtx nextarg)
6492 {
6493   nextarg = expand_builtin_saveregs ();
6494   std_expand_builtin_va_start (valist, nextarg);
6495 }
6496 
6497 static tree
hppa_gimplify_va_arg_expr(tree valist,tree type,gimple_seq * pre_p,gimple_seq * post_p)6498 hppa_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
6499 			   gimple_seq *post_p)
6500 {
6501   if (TARGET_64BIT)
6502     {
6503       /* Args grow upward.  We can use the generic routines.  */
6504       return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
6505     }
6506   else /* !TARGET_64BIT */
6507     {
6508       tree ptr = build_pointer_type (type);
6509       tree valist_type;
6510       tree t, u;
6511       unsigned int size, ofs;
6512       bool indirect;
6513 
6514       indirect = pass_va_arg_by_reference (type);
6515       if (indirect)
6516 	{
6517 	  type = ptr;
6518 	  ptr = build_pointer_type (type);
6519 	}
6520       size = int_size_in_bytes (type);
6521       valist_type = TREE_TYPE (valist);
6522 
6523       /* Args grow down.  Not handled by generic routines.  */
6524 
6525       u = fold_convert (sizetype, size_in_bytes (type));
6526       u = fold_build1 (NEGATE_EXPR, sizetype, u);
6527       t = fold_build_pointer_plus (valist, u);
6528 
6529       /* Align to 4 or 8 byte boundary depending on argument size.  */
6530 
6531       u = build_int_cst (TREE_TYPE (t), (HOST_WIDE_INT)(size > 4 ? -8 : -4));
6532       t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t, u);
6533       t = fold_convert (valist_type, t);
6534 
6535       t = build2 (MODIFY_EXPR, valist_type, valist, t);
6536 
6537       ofs = (8 - size) % 4;
6538       if (ofs != 0)
6539 	t = fold_build_pointer_plus_hwi (t, ofs);
6540 
6541       t = fold_convert (ptr, t);
6542       t = build_va_arg_indirect_ref (t);
6543 
6544       if (indirect)
6545 	t = build_va_arg_indirect_ref (t);
6546 
6547       return t;
6548     }
6549 }
6550 
6551 /* True if MODE is valid for the target.  By "valid", we mean able to
6552    be manipulated in non-trivial ways.  In particular, this means all
6553    the arithmetic is supported.  */
6554 
6555 static bool
pa_scalar_mode_supported_p(scalar_mode mode)6556 pa_scalar_mode_supported_p (scalar_mode mode)
6557 {
6558   int precision = GET_MODE_PRECISION (mode);
6559 
6560   if (TARGET_64BIT && mode == TImode)
6561     return true;
6562 
6563   switch (GET_MODE_CLASS (mode))
6564     {
6565     case MODE_PARTIAL_INT:
6566     case MODE_INT:
6567       if (precision == CHAR_TYPE_SIZE)
6568 	return true;
6569       if (precision == SHORT_TYPE_SIZE)
6570 	return true;
6571       if (precision == INT_TYPE_SIZE)
6572 	return true;
6573       if (precision == LONG_TYPE_SIZE)
6574 	return true;
6575       if (precision == LONG_LONG_TYPE_SIZE)
6576 	return true;
6577       return false;
6578 
6579     case MODE_FLOAT:
6580       if (precision == FLOAT_TYPE_SIZE)
6581 	return true;
6582       if (precision == DOUBLE_TYPE_SIZE)
6583 	return true;
6584       if (precision == LONG_DOUBLE_TYPE_SIZE)
6585 	return true;
6586       return false;
6587 
6588     case MODE_DECIMAL_FLOAT:
6589       return false;
6590 
6591     default:
6592       gcc_unreachable ();
6593     }
6594 }
6595 
6596 /* Return TRUE if INSN, a jump insn, has an unfilled delay slot and
6597    it branches into the delay slot.  Otherwise, return FALSE.  */
6598 
6599 static bool
branch_to_delay_slot_p(rtx_insn * insn)6600 branch_to_delay_slot_p (rtx_insn *insn)
6601 {
6602   rtx_insn *jump_insn;
6603 
6604   if (dbr_sequence_length ())
6605     return FALSE;
6606 
6607   jump_insn = next_active_insn (JUMP_LABEL_AS_INSN (insn));
6608   while (insn)
6609     {
6610       insn = next_active_insn (insn);
6611       if (jump_insn == insn)
6612 	return TRUE;
6613 
6614       /* We can't rely on the length of asms.  So, we return FALSE when
6615 	 the branch is followed by an asm.  */
6616       if (!insn
6617 	  || GET_CODE (PATTERN (insn)) == ASM_INPUT
6618 	  || asm_noperands (PATTERN (insn)) >= 0
6619 	  || get_attr_length (insn) > 0)
6620 	break;
6621     }
6622 
6623   return FALSE;
6624 }
6625 
6626 /* Return TRUE if INSN, a forward jump insn, needs a nop in its delay slot.
6627 
6628    This occurs when INSN has an unfilled delay slot and is followed
6629    by an asm.  Disaster can occur if the asm is empty and the jump
6630    branches into the delay slot.  So, we add a nop in the delay slot
6631    when this occurs.  */
6632 
6633 static bool
branch_needs_nop_p(rtx_insn * insn)6634 branch_needs_nop_p (rtx_insn *insn)
6635 {
6636   rtx_insn *jump_insn;
6637 
6638   if (dbr_sequence_length ())
6639     return FALSE;
6640 
6641   jump_insn = next_active_insn (JUMP_LABEL_AS_INSN (insn));
6642   while (insn)
6643     {
6644       insn = next_active_insn (insn);
6645       if (!insn || jump_insn == insn)
6646 	return TRUE;
6647 
6648       if (!(GET_CODE (PATTERN (insn)) == ASM_INPUT
6649 	   || asm_noperands (PATTERN (insn)) >= 0)
6650 	  && get_attr_length (insn) > 0)
6651 	break;
6652     }
6653 
6654   return FALSE;
6655 }
6656 
6657 /* Return TRUE if INSN, a forward jump insn, can use nullification
6658    to skip the following instruction.  This avoids an extra cycle due
6659    to a mis-predicted branch when we fall through.  */
6660 
6661 static bool
use_skip_p(rtx_insn * insn)6662 use_skip_p (rtx_insn *insn)
6663 {
6664   rtx_insn *jump_insn = next_active_insn (JUMP_LABEL_AS_INSN (insn));
6665 
6666   while (insn)
6667     {
6668       insn = next_active_insn (insn);
6669 
6670       /* We can't rely on the length of asms, so we can't skip asms.  */
6671       if (!insn
6672 	  || GET_CODE (PATTERN (insn)) == ASM_INPUT
6673 	  || asm_noperands (PATTERN (insn)) >= 0)
6674 	break;
6675       if (get_attr_length (insn) == 4
6676 	  && jump_insn == next_active_insn (insn))
6677 	return TRUE;
6678       if (get_attr_length (insn) > 0)
6679 	break;
6680     }
6681 
6682   return FALSE;
6683 }
6684 
6685 /* This routine handles all the normal conditional branch sequences we
6686    might need to generate.  It handles compare immediate vs compare
6687    register, nullification of delay slots, varying length branches,
6688    negated branches, and all combinations of the above.  It returns the
6689    output appropriate to emit the branch corresponding to all given
6690    parameters.  */
6691 
6692 const char *
pa_output_cbranch(rtx * operands,int negated,rtx_insn * insn)6693 pa_output_cbranch (rtx *operands, int negated, rtx_insn *insn)
6694 {
6695   static char buf[100];
6696   bool useskip;
6697   int nullify = INSN_ANNULLED_BRANCH_P (insn);
6698   int length = get_attr_length (insn);
6699   int xdelay;
6700 
6701   /* A conditional branch to the following instruction (e.g. the delay slot)
6702      is asking for a disaster.  This can happen when not optimizing and
6703      when jump optimization fails.
6704 
6705      While it is usually safe to emit nothing, this can fail if the
6706      preceding instruction is a nullified branch with an empty delay
6707      slot and the same branch target as this branch.  We could check
6708      for this but jump optimization should eliminate nop jumps.  It
6709      is always safe to emit a nop.  */
6710   if (branch_to_delay_slot_p (insn))
6711     return "nop";
6712 
6713   /* The doubleword form of the cmpib instruction doesn't have the LEU
6714      and GTU conditions while the cmpb instruction does.  Since we accept
6715      zero for cmpb, we must ensure that we use cmpb for the comparison.  */
6716   if (GET_MODE (operands[1]) == DImode && operands[2] == const0_rtx)
6717     operands[2] = gen_rtx_REG (DImode, 0);
6718   if (GET_MODE (operands[2]) == DImode && operands[1] == const0_rtx)
6719     operands[1] = gen_rtx_REG (DImode, 0);
6720 
6721   /* If this is a long branch with its delay slot unfilled, set `nullify'
6722      as it can nullify the delay slot and save a nop.  */
6723   if (length == 8 && dbr_sequence_length () == 0)
6724     nullify = 1;
6725 
6726   /* If this is a short forward conditional branch which did not get
6727      its delay slot filled, the delay slot can still be nullified.  */
6728   if (! nullify && length == 4 && dbr_sequence_length () == 0)
6729     nullify = forward_branch_p (insn);
6730 
6731   /* A forward branch over a single nullified insn can be done with a
6732      comclr instruction.  This avoids a single cycle penalty due to
6733      mis-predicted branch if we fall through (branch not taken).  */
6734   useskip = (length == 4 && nullify) ? use_skip_p (insn) : FALSE;
6735 
6736   switch (length)
6737     {
6738       /* All short conditional branches except backwards with an unfilled
6739 	 delay slot.  */
6740       case 4:
6741 	if (useskip)
6742 	  strcpy (buf, "{com%I2clr,|cmp%I2clr,}");
6743 	else
6744 	  strcpy (buf, "{com%I2b,|cmp%I2b,}");
6745 	if (GET_MODE (operands[1]) == DImode)
6746 	  strcat (buf, "*");
6747 	if (negated)
6748 	  strcat (buf, "%B3");
6749 	else
6750 	  strcat (buf, "%S3");
6751 	if (useskip)
6752 	  strcat (buf, " %2,%r1,%%r0");
6753 	else if (nullify)
6754 	  {
6755 	    if (branch_needs_nop_p (insn))
6756 	      strcat (buf, ",n %2,%r1,%0%#");
6757 	    else
6758 	      strcat (buf, ",n %2,%r1,%0");
6759 	  }
6760 	else
6761 	  strcat (buf, " %2,%r1,%0");
6762 	break;
6763 
6764      /* All long conditionals.  Note a short backward branch with an
6765 	unfilled delay slot is treated just like a long backward branch
6766 	with an unfilled delay slot.  */
6767       case 8:
6768 	/* Handle weird backwards branch with a filled delay slot
6769 	   which is nullified.  */
6770 	if (dbr_sequence_length () != 0
6771 	    && ! forward_branch_p (insn)
6772 	    && nullify)
6773 	  {
6774 	    strcpy (buf, "{com%I2b,|cmp%I2b,}");
6775 	    if (GET_MODE (operands[1]) == DImode)
6776 	      strcat (buf, "*");
6777 	    if (negated)
6778 	      strcat (buf, "%S3");
6779 	    else
6780 	      strcat (buf, "%B3");
6781 	    strcat (buf, ",n %2,%r1,.+12\n\tb %0");
6782 	  }
6783 	/* Handle short backwards branch with an unfilled delay slot.
6784 	   Using a comb;nop rather than comiclr;bl saves 1 cycle for both
6785 	   taken and untaken branches.  */
6786 	else if (dbr_sequence_length () == 0
6787 		 && ! forward_branch_p (insn)
6788 		 && INSN_ADDRESSES_SET_P ()
6789 		 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6790 				    - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6791 	  {
6792 	    strcpy (buf, "{com%I2b,|cmp%I2b,}");
6793 	    if (GET_MODE (operands[1]) == DImode)
6794 	      strcat (buf, "*");
6795 	    if (negated)
6796 	      strcat (buf, "%B3 %2,%r1,%0%#");
6797 	    else
6798 	      strcat (buf, "%S3 %2,%r1,%0%#");
6799 	  }
6800 	else
6801 	  {
6802 	    strcpy (buf, "{com%I2clr,|cmp%I2clr,}");
6803 	    if (GET_MODE (operands[1]) == DImode)
6804 	      strcat (buf, "*");
6805 	    if (negated)
6806 	      strcat (buf, "%S3");
6807 	    else
6808 	      strcat (buf, "%B3");
6809 	    if (nullify)
6810 	      strcat (buf, " %2,%r1,%%r0\n\tb,n %0");
6811 	    else
6812 	      strcat (buf, " %2,%r1,%%r0\n\tb %0");
6813 	  }
6814 	break;
6815 
6816       default:
6817 	/* The reversed conditional branch must branch over one additional
6818 	   instruction if the delay slot is filled and needs to be extracted
6819 	   by pa_output_lbranch.  If the delay slot is empty or this is a
6820 	   nullified forward branch, the instruction after the reversed
6821 	   condition branch must be nullified.  */
6822 	if (dbr_sequence_length () == 0
6823 	    || (nullify && forward_branch_p (insn)))
6824 	  {
6825 	    nullify = 1;
6826 	    xdelay = 0;
6827 	    operands[4] = GEN_INT (length);
6828 	  }
6829 	else
6830 	  {
6831 	    xdelay = 1;
6832 	    operands[4] = GEN_INT (length + 4);
6833 	  }
6834 
6835 	/* Create a reversed conditional branch which branches around
6836 	   the following insns.  */
6837 	if (GET_MODE (operands[1]) != DImode)
6838 	  {
6839 	    if (nullify)
6840 	      {
6841 		if (negated)
6842 		  strcpy (buf,
6843 		    "{com%I2b,%S3,n %2,%r1,.+%4|cmp%I2b,%S3,n %2,%r1,.+%4}");
6844 		else
6845 		  strcpy (buf,
6846 		    "{com%I2b,%B3,n %2,%r1,.+%4|cmp%I2b,%B3,n %2,%r1,.+%4}");
6847 	      }
6848 	    else
6849 	      {
6850 		if (negated)
6851 		  strcpy (buf,
6852 		    "{com%I2b,%S3 %2,%r1,.+%4|cmp%I2b,%S3 %2,%r1,.+%4}");
6853 		else
6854 		  strcpy (buf,
6855 		    "{com%I2b,%B3 %2,%r1,.+%4|cmp%I2b,%B3 %2,%r1,.+%4}");
6856 	      }
6857 	  }
6858 	else
6859 	  {
6860 	    if (nullify)
6861 	      {
6862 		if (negated)
6863 		  strcpy (buf,
6864 		    "{com%I2b,*%S3,n %2,%r1,.+%4|cmp%I2b,*%S3,n %2,%r1,.+%4}");
6865 		else
6866 		  strcpy (buf,
6867 		    "{com%I2b,*%B3,n %2,%r1,.+%4|cmp%I2b,*%B3,n %2,%r1,.+%4}");
6868 	      }
6869 	    else
6870 	      {
6871 		if (negated)
6872 		  strcpy (buf,
6873 		    "{com%I2b,*%S3 %2,%r1,.+%4|cmp%I2b,*%S3 %2,%r1,.+%4}");
6874 		else
6875 		  strcpy (buf,
6876 		    "{com%I2b,*%B3 %2,%r1,.+%4|cmp%I2b,*%B3 %2,%r1,.+%4}");
6877 	      }
6878 	  }
6879 
6880 	output_asm_insn (buf, operands);
6881 	return pa_output_lbranch (operands[0], insn, xdelay);
6882     }
6883   return buf;
6884 }
6885 
6886 /* Output a PIC pc-relative instruction sequence to load the address of
6887    OPERANDS[0] to register OPERANDS[2].  OPERANDS[0] is a symbol ref
6888    or a code label.  OPERANDS[1] specifies the register to use to load
6889    the program counter.  OPERANDS[3] may be used for label generation
6890    The sequence is always three instructions in length.  The program
6891    counter recorded for PA 1.X is eight bytes more than that for PA 2.0.
6892    Register %r1 is clobbered.  */
6893 
6894 static void
pa_output_pic_pcrel_sequence(rtx * operands)6895 pa_output_pic_pcrel_sequence (rtx *operands)
6896 {
6897   gcc_assert (SYMBOL_REF_P (operands[0]) || LABEL_P (operands[0]));
6898   if (TARGET_PA_20)
6899     {
6900       /* We can use mfia to determine the current program counter.  */
6901       if (TARGET_SOM || !TARGET_GAS)
6902 	{
6903 	  operands[3] = gen_label_rtx ();
6904 	  targetm.asm_out.internal_label (asm_out_file, "L",
6905 					  CODE_LABEL_NUMBER (operands[3]));
6906 	  output_asm_insn ("mfia %1", operands);
6907 	  output_asm_insn ("addil L'%0-%l3,%1", operands);
6908 	  output_asm_insn ("ldo R'%0-%l3(%%r1),%2", operands);
6909 	}
6910       else
6911 	{
6912 	  output_asm_insn ("mfia %1", operands);
6913 	  output_asm_insn ("addil L'%0-$PIC_pcrel$0+12,%1", operands);
6914 	  output_asm_insn ("ldo R'%0-$PIC_pcrel$0+16(%%r1),%2", operands);
6915 	}
6916     }
6917   else
6918     {
6919       /* We need to use a branch to determine the current program counter.  */
6920       output_asm_insn ("{bl|b,l} .+8,%1", operands);
6921       if (TARGET_SOM || !TARGET_GAS)
6922 	{
6923 	  operands[3] = gen_label_rtx ();
6924 	  output_asm_insn ("addil L'%0-%l3,%1", operands);
6925 	  targetm.asm_out.internal_label (asm_out_file, "L",
6926 					  CODE_LABEL_NUMBER (operands[3]));
6927 	  output_asm_insn ("ldo R'%0-%l3(%%r1),%2", operands);
6928 	}
6929       else
6930 	{
6931 	  output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%1", operands);
6932 	  output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%2", operands);
6933 	}
6934     }
6935 }
6936 
6937 /* This routine handles output of long unconditional branches that
6938    exceed the maximum range of a simple branch instruction.  Since
6939    we don't have a register available for the branch, we save register
6940    %r1 in the frame marker, load the branch destination DEST into %r1,
6941    execute the branch, and restore %r1 in the delay slot of the branch.
6942 
6943    Since long branches may have an insn in the delay slot and the
6944    delay slot is used to restore %r1, we in general need to extract
6945    this insn and execute it before the branch.  However, to facilitate
6946    use of this function by conditional branches, we also provide an
6947    option to not extract the delay insn so that it will be emitted
6948    after the long branch.  So, if there is an insn in the delay slot,
6949    it is extracted if XDELAY is nonzero.
6950 
6951    The lengths of the various long-branch sequences are 20, 16 and 24
6952    bytes for the portable runtime, non-PIC and PIC cases, respectively.  */
6953 
6954 const char *
pa_output_lbranch(rtx dest,rtx_insn * insn,int xdelay)6955 pa_output_lbranch (rtx dest, rtx_insn *insn, int xdelay)
6956 {
6957   rtx xoperands[4];
6958 
6959   xoperands[0] = dest;
6960 
6961   /* First, free up the delay slot.  */
6962   if (xdelay && dbr_sequence_length () != 0)
6963     {
6964       /* We can't handle a jump in the delay slot.  */
6965       gcc_assert (! JUMP_P (NEXT_INSN (insn)));
6966 
6967       final_scan_insn (NEXT_INSN (insn), asm_out_file,
6968 		       optimize, 0, NULL);
6969 
6970       /* Now delete the delay insn.  */
6971       SET_INSN_DELETED (NEXT_INSN (insn));
6972     }
6973 
6974   /* Output an insn to save %r1.  The runtime documentation doesn't
6975      specify whether the "Clean Up" slot in the callers frame can
6976      be clobbered by the callee.  It isn't copied by HP's builtin
6977      alloca, so this suggests that it can be clobbered if necessary.
6978      The "Static Link" location is copied by HP builtin alloca, so
6979      we avoid using it.  Using the cleanup slot might be a problem
6980      if we have to interoperate with languages that pass cleanup
6981      information.  However, it should be possible to handle these
6982      situations with GCC's asm feature.
6983 
6984      The "Current RP" slot is reserved for the called procedure, so
6985      we try to use it when we don't have a frame of our own.  It's
6986      rather unlikely that we won't have a frame when we need to emit
6987      a very long branch.
6988 
6989      Really the way to go long term is a register scavenger; goto
6990      the target of the jump and find a register which we can use
6991      as a scratch to hold the value in %r1.  Then, we wouldn't have
6992      to free up the delay slot or clobber a slot that may be needed
6993      for other purposes.  */
6994   if (TARGET_64BIT)
6995     {
6996       if (actual_fsize == 0 && !df_regs_ever_live_p (2))
6997 	/* Use the return pointer slot in the frame marker.  */
6998 	output_asm_insn ("std %%r1,-16(%%r30)", xoperands);
6999       else
7000 	/* Use the slot at -40 in the frame marker since HP builtin
7001 	   alloca doesn't copy it.  */
7002 	output_asm_insn ("std %%r1,-40(%%r30)", xoperands);
7003     }
7004   else
7005     {
7006       if (actual_fsize == 0 && !df_regs_ever_live_p (2))
7007 	/* Use the return pointer slot in the frame marker.  */
7008 	output_asm_insn ("stw %%r1,-20(%%r30)", xoperands);
7009       else
7010 	/* Use the "Clean Up" slot in the frame marker.  In GCC,
7011 	   the only other use of this location is for copying a
7012 	   floating point double argument from a floating-point
7013 	   register to two general registers.  The copy is done
7014 	   as an "atomic" operation when outputting a call, so it
7015 	   won't interfere with our using the location here.  */
7016 	output_asm_insn ("stw %%r1,-12(%%r30)", xoperands);
7017     }
7018 
7019   if (TARGET_PORTABLE_RUNTIME)
7020     {
7021       output_asm_insn ("ldil L'%0,%%r1", xoperands);
7022       output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands);
7023       output_asm_insn ("bv %%r0(%%r1)", xoperands);
7024     }
7025   else if (flag_pic)
7026     {
7027       xoperands[1] = gen_rtx_REG (Pmode, 1);
7028       xoperands[2] = xoperands[1];
7029       pa_output_pic_pcrel_sequence (xoperands);
7030       output_asm_insn ("bv %%r0(%%r1)", xoperands);
7031     }
7032   else
7033     /* Now output a very long branch to the original target.  */
7034     output_asm_insn ("ldil L'%l0,%%r1\n\tbe R'%l0(%%sr4,%%r1)", xoperands);
7035 
7036   /* Now restore the value of %r1 in the delay slot.  */
7037   if (TARGET_64BIT)
7038     {
7039       if (actual_fsize == 0 && !df_regs_ever_live_p (2))
7040 	return "ldd -16(%%r30),%%r1";
7041       else
7042 	return "ldd -40(%%r30),%%r1";
7043     }
7044   else
7045     {
7046       if (actual_fsize == 0 && !df_regs_ever_live_p (2))
7047 	return "ldw -20(%%r30),%%r1";
7048       else
7049 	return "ldw -12(%%r30),%%r1";
7050     }
7051 }
7052 
7053 /* This routine handles all the branch-on-bit conditional branch sequences we
7054    might need to generate.  It handles nullification of delay slots,
7055    varying length branches, negated branches and all combinations of the
7056    above.  it returns the appropriate output template to emit the branch.  */
7057 
7058 const char *
pa_output_bb(rtx * operands ATTRIBUTE_UNUSED,int negated,rtx_insn * insn,int which)7059 pa_output_bb (rtx *operands ATTRIBUTE_UNUSED, int negated, rtx_insn *insn, int which)
7060 {
7061   static char buf[100];
7062   bool useskip;
7063   int nullify = INSN_ANNULLED_BRANCH_P (insn);
7064   int length = get_attr_length (insn);
7065   int xdelay;
7066 
7067   /* A conditional branch to the following instruction (e.g. the delay slot) is
7068      asking for a disaster.  I do not think this can happen as this pattern
7069      is only used when optimizing; jump optimization should eliminate the
7070      jump.  But be prepared just in case.  */
7071 
7072   if (branch_to_delay_slot_p (insn))
7073     return "nop";
7074 
7075   /* If this is a long branch with its delay slot unfilled, set `nullify'
7076      as it can nullify the delay slot and save a nop.  */
7077   if (length == 8 && dbr_sequence_length () == 0)
7078     nullify = 1;
7079 
7080   /* If this is a short forward conditional branch which did not get
7081      its delay slot filled, the delay slot can still be nullified.  */
7082   if (! nullify && length == 4 && dbr_sequence_length () == 0)
7083     nullify = forward_branch_p (insn);
7084 
7085   /* A forward branch over a single nullified insn can be done with a
7086      extrs instruction.  This avoids a single cycle penalty due to
7087      mis-predicted branch if we fall through (branch not taken).  */
7088   useskip = (length == 4 && nullify) ? use_skip_p (insn) : FALSE;
7089 
7090   switch (length)
7091     {
7092 
7093       /* All short conditional branches except backwards with an unfilled
7094 	 delay slot.  */
7095       case 4:
7096 	if (useskip)
7097 	  strcpy (buf, "{extrs,|extrw,s,}");
7098 	else
7099 	  strcpy (buf, "bb,");
7100 	if (useskip && GET_MODE (operands[0]) == DImode)
7101 	  strcpy (buf, "extrd,s,*");
7102 	else if (GET_MODE (operands[0]) == DImode)
7103 	  strcpy (buf, "bb,*");
7104 	if ((which == 0 && negated)
7105 	     || (which == 1 && ! negated))
7106 	  strcat (buf, ">=");
7107 	else
7108 	  strcat (buf, "<");
7109 	if (useskip)
7110 	  strcat (buf, " %0,%1,1,%%r0");
7111 	else if (nullify && negated)
7112 	  {
7113 	    if (branch_needs_nop_p (insn))
7114 	      strcat (buf, ",n %0,%1,%3%#");
7115 	    else
7116 	      strcat (buf, ",n %0,%1,%3");
7117 	  }
7118 	else if (nullify && ! negated)
7119 	  {
7120 	    if (branch_needs_nop_p (insn))
7121 	      strcat (buf, ",n %0,%1,%2%#");
7122 	    else
7123 	      strcat (buf, ",n %0,%1,%2");
7124 	  }
7125 	else if (! nullify && negated)
7126 	  strcat (buf, " %0,%1,%3");
7127 	else if (! nullify && ! negated)
7128 	  strcat (buf, " %0,%1,%2");
7129 	break;
7130 
7131      /* All long conditionals.  Note a short backward branch with an
7132 	unfilled delay slot is treated just like a long backward branch
7133 	with an unfilled delay slot.  */
7134       case 8:
7135 	/* Handle weird backwards branch with a filled delay slot
7136 	   which is nullified.  */
7137 	if (dbr_sequence_length () != 0
7138 	    && ! forward_branch_p (insn)
7139 	    && nullify)
7140 	  {
7141 	    strcpy (buf, "bb,");
7142 	    if (GET_MODE (operands[0]) == DImode)
7143 	      strcat (buf, "*");
7144 	    if ((which == 0 && negated)
7145 		|| (which == 1 && ! negated))
7146 	      strcat (buf, "<");
7147 	    else
7148 	      strcat (buf, ">=");
7149 	    if (negated)
7150 	      strcat (buf, ",n %0,%1,.+12\n\tb %3");
7151 	    else
7152 	      strcat (buf, ",n %0,%1,.+12\n\tb %2");
7153 	  }
7154 	/* Handle short backwards branch with an unfilled delay slot.
7155 	   Using a bb;nop rather than extrs;bl saves 1 cycle for both
7156 	   taken and untaken branches.  */
7157 	else if (dbr_sequence_length () == 0
7158 		 && ! forward_branch_p (insn)
7159 		 && INSN_ADDRESSES_SET_P ()
7160 		 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
7161 				    - INSN_ADDRESSES (INSN_UID (insn)) - 8))
7162 	  {
7163 	    strcpy (buf, "bb,");
7164 	    if (GET_MODE (operands[0]) == DImode)
7165 	      strcat (buf, "*");
7166 	    if ((which == 0 && negated)
7167 		|| (which == 1 && ! negated))
7168 	      strcat (buf, ">=");
7169 	    else
7170 	      strcat (buf, "<");
7171 	    if (negated)
7172 	      strcat (buf, " %0,%1,%3%#");
7173 	    else
7174 	      strcat (buf, " %0,%1,%2%#");
7175 	  }
7176 	else
7177 	  {
7178 	    if (GET_MODE (operands[0]) == DImode)
7179 	      strcpy (buf, "extrd,s,*");
7180 	    else
7181 	      strcpy (buf, "{extrs,|extrw,s,}");
7182 	    if ((which == 0 && negated)
7183 		|| (which == 1 && ! negated))
7184 	      strcat (buf, "<");
7185 	    else
7186 	      strcat (buf, ">=");
7187 	    if (nullify && negated)
7188 	      strcat (buf, " %0,%1,1,%%r0\n\tb,n %3");
7189 	    else if (nullify && ! negated)
7190 	      strcat (buf, " %0,%1,1,%%r0\n\tb,n %2");
7191 	    else if (negated)
7192 	      strcat (buf, " %0,%1,1,%%r0\n\tb %3");
7193 	    else
7194 	      strcat (buf, " %0,%1,1,%%r0\n\tb %2");
7195 	  }
7196 	break;
7197 
7198       default:
7199 	/* The reversed conditional branch must branch over one additional
7200 	   instruction if the delay slot is filled and needs to be extracted
7201 	   by pa_output_lbranch.  If the delay slot is empty or this is a
7202 	   nullified forward branch, the instruction after the reversed
7203 	   condition branch must be nullified.  */
7204 	if (dbr_sequence_length () == 0
7205 	    || (nullify && forward_branch_p (insn)))
7206 	  {
7207 	    nullify = 1;
7208 	    xdelay = 0;
7209 	    operands[4] = GEN_INT (length);
7210 	  }
7211 	else
7212 	  {
7213 	    xdelay = 1;
7214 	    operands[4] = GEN_INT (length + 4);
7215 	  }
7216 
7217 	if (GET_MODE (operands[0]) == DImode)
7218 	  strcpy (buf, "bb,*");
7219 	else
7220 	  strcpy (buf, "bb,");
7221 	if ((which == 0 && negated)
7222 	    || (which == 1 && !negated))
7223 	  strcat (buf, "<");
7224 	else
7225 	  strcat (buf, ">=");
7226 	if (nullify)
7227 	  strcat (buf, ",n %0,%1,.+%4");
7228 	else
7229 	  strcat (buf, " %0,%1,.+%4");
7230 	output_asm_insn (buf, operands);
7231 	return pa_output_lbranch (negated ? operands[3] : operands[2],
7232 				  insn, xdelay);
7233     }
7234   return buf;
7235 }
7236 
7237 /* This routine handles all the branch-on-variable-bit conditional branch
7238    sequences we might need to generate.  It handles nullification of delay
7239    slots, varying length branches, negated branches and all combinations
7240    of the above.  it returns the appropriate output template to emit the
7241    branch.  */
7242 
7243 const char *
pa_output_bvb(rtx * operands ATTRIBUTE_UNUSED,int negated,rtx_insn * insn,int which)7244 pa_output_bvb (rtx *operands ATTRIBUTE_UNUSED, int negated, rtx_insn *insn,
7245 	       int which)
7246 {
7247   static char buf[100];
7248   bool useskip;
7249   int nullify = INSN_ANNULLED_BRANCH_P (insn);
7250   int length = get_attr_length (insn);
7251   int xdelay;
7252 
7253   /* A conditional branch to the following instruction (e.g. the delay slot) is
7254      asking for a disaster.  I do not think this can happen as this pattern
7255      is only used when optimizing; jump optimization should eliminate the
7256      jump.  But be prepared just in case.  */
7257 
7258   if (branch_to_delay_slot_p (insn))
7259     return "nop";
7260 
7261   /* If this is a long branch with its delay slot unfilled, set `nullify'
7262      as it can nullify the delay slot and save a nop.  */
7263   if (length == 8 && dbr_sequence_length () == 0)
7264     nullify = 1;
7265 
7266   /* If this is a short forward conditional branch which did not get
7267      its delay slot filled, the delay slot can still be nullified.  */
7268   if (! nullify && length == 4 && dbr_sequence_length () == 0)
7269     nullify = forward_branch_p (insn);
7270 
7271   /* A forward branch over a single nullified insn can be done with a
7272      extrs instruction.  This avoids a single cycle penalty due to
7273      mis-predicted branch if we fall through (branch not taken).  */
7274   useskip = (length == 4 && nullify) ? use_skip_p (insn) : FALSE;
7275 
7276   switch (length)
7277     {
7278 
7279       /* All short conditional branches except backwards with an unfilled
7280 	 delay slot.  */
7281       case 4:
7282 	if (useskip)
7283 	  strcpy (buf, "{vextrs,|extrw,s,}");
7284 	else
7285 	  strcpy (buf, "{bvb,|bb,}");
7286 	if (useskip && GET_MODE (operands[0]) == DImode)
7287 	  strcpy (buf, "extrd,s,*");
7288 	else if (GET_MODE (operands[0]) == DImode)
7289 	  strcpy (buf, "bb,*");
7290 	if ((which == 0 && negated)
7291 	     || (which == 1 && ! negated))
7292 	  strcat (buf, ">=");
7293 	else
7294 	  strcat (buf, "<");
7295 	if (useskip)
7296 	  strcat (buf, "{ %0,1,%%r0| %0,%%sar,1,%%r0}");
7297 	else if (nullify && negated)
7298 	  {
7299 	    if (branch_needs_nop_p (insn))
7300 	      strcat (buf, "{,n %0,%3%#|,n %0,%%sar,%3%#}");
7301 	    else
7302 	      strcat (buf, "{,n %0,%3|,n %0,%%sar,%3}");
7303 	  }
7304 	else if (nullify && ! negated)
7305 	  {
7306 	    if (branch_needs_nop_p (insn))
7307 	      strcat (buf, "{,n %0,%2%#|,n %0,%%sar,%2%#}");
7308 	    else
7309 	      strcat (buf, "{,n %0,%2|,n %0,%%sar,%2}");
7310 	  }
7311 	else if (! nullify && negated)
7312 	  strcat (buf, "{ %0,%3| %0,%%sar,%3}");
7313 	else if (! nullify && ! negated)
7314 	  strcat (buf, "{ %0,%2| %0,%%sar,%2}");
7315 	break;
7316 
7317      /* All long conditionals.  Note a short backward branch with an
7318 	unfilled delay slot is treated just like a long backward branch
7319 	with an unfilled delay slot.  */
7320       case 8:
7321 	/* Handle weird backwards branch with a filled delay slot
7322 	   which is nullified.  */
7323 	if (dbr_sequence_length () != 0
7324 	    && ! forward_branch_p (insn)
7325 	    && nullify)
7326 	  {
7327 	    strcpy (buf, "{bvb,|bb,}");
7328 	    if (GET_MODE (operands[0]) == DImode)
7329 	      strcat (buf, "*");
7330 	    if ((which == 0 && negated)
7331 		|| (which == 1 && ! negated))
7332 	      strcat (buf, "<");
7333 	    else
7334 	      strcat (buf, ">=");
7335 	    if (negated)
7336 	      strcat (buf, "{,n %0,.+12\n\tb %3|,n %0,%%sar,.+12\n\tb %3}");
7337 	    else
7338 	      strcat (buf, "{,n %0,.+12\n\tb %2|,n %0,%%sar,.+12\n\tb %2}");
7339 	  }
7340 	/* Handle short backwards branch with an unfilled delay slot.
7341 	   Using a bb;nop rather than extrs;bl saves 1 cycle for both
7342 	   taken and untaken branches.  */
7343 	else if (dbr_sequence_length () == 0
7344 		 && ! forward_branch_p (insn)
7345 		 && INSN_ADDRESSES_SET_P ()
7346 		 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
7347 				    - INSN_ADDRESSES (INSN_UID (insn)) - 8))
7348 	  {
7349 	    strcpy (buf, "{bvb,|bb,}");
7350 	    if (GET_MODE (operands[0]) == DImode)
7351 	      strcat (buf, "*");
7352 	    if ((which == 0 && negated)
7353 		|| (which == 1 && ! negated))
7354 	      strcat (buf, ">=");
7355 	    else
7356 	      strcat (buf, "<");
7357 	    if (negated)
7358 	      strcat (buf, "{ %0,%3%#| %0,%%sar,%3%#}");
7359 	    else
7360 	      strcat (buf, "{ %0,%2%#| %0,%%sar,%2%#}");
7361 	  }
7362 	else
7363 	  {
7364 	    strcpy (buf, "{vextrs,|extrw,s,}");
7365 	    if (GET_MODE (operands[0]) == DImode)
7366 	      strcpy (buf, "extrd,s,*");
7367 	    if ((which == 0 && negated)
7368 		|| (which == 1 && ! negated))
7369 	      strcat (buf, "<");
7370 	    else
7371 	      strcat (buf, ">=");
7372 	    if (nullify && negated)
7373 	      strcat (buf, "{ %0,1,%%r0\n\tb,n %3| %0,%%sar,1,%%r0\n\tb,n %3}");
7374 	    else if (nullify && ! negated)
7375 	      strcat (buf, "{ %0,1,%%r0\n\tb,n %2| %0,%%sar,1,%%r0\n\tb,n %2}");
7376 	    else if (negated)
7377 	      strcat (buf, "{ %0,1,%%r0\n\tb %3| %0,%%sar,1,%%r0\n\tb %3}");
7378 	    else
7379 	      strcat (buf, "{ %0,1,%%r0\n\tb %2| %0,%%sar,1,%%r0\n\tb %2}");
7380 	  }
7381 	break;
7382 
7383       default:
7384 	/* The reversed conditional branch must branch over one additional
7385 	   instruction if the delay slot is filled and needs to be extracted
7386 	   by pa_output_lbranch.  If the delay slot is empty or this is a
7387 	   nullified forward branch, the instruction after the reversed
7388 	   condition branch must be nullified.  */
7389 	if (dbr_sequence_length () == 0
7390 	    || (nullify && forward_branch_p (insn)))
7391 	  {
7392 	    nullify = 1;
7393 	    xdelay = 0;
7394 	    operands[4] = GEN_INT (length);
7395 	  }
7396 	else
7397 	  {
7398 	    xdelay = 1;
7399 	    operands[4] = GEN_INT (length + 4);
7400 	  }
7401 
7402 	if (GET_MODE (operands[0]) == DImode)
7403 	  strcpy (buf, "bb,*");
7404 	else
7405 	  strcpy (buf, "{bvb,|bb,}");
7406 	if ((which == 0 && negated)
7407 	    || (which == 1 && !negated))
7408 	  strcat (buf, "<");
7409 	else
7410 	  strcat (buf, ">=");
7411 	if (nullify)
7412 	  strcat (buf, ",n {%0,.+%4|%0,%%sar,.+%4}");
7413 	else
7414 	  strcat (buf, " {%0,.+%4|%0,%%sar,.+%4}");
7415 	output_asm_insn (buf, operands);
7416 	return pa_output_lbranch (negated ? operands[3] : operands[2],
7417 				  insn, xdelay);
7418     }
7419   return buf;
7420 }
7421 
7422 /* Return the output template for emitting a dbra type insn.
7423 
7424    Note it may perform some output operations on its own before
7425    returning the final output string.  */
7426 const char *
pa_output_dbra(rtx * operands,rtx_insn * insn,int which_alternative)7427 pa_output_dbra (rtx *operands, rtx_insn *insn, int which_alternative)
7428 {
7429   int length = get_attr_length (insn);
7430 
7431   /* A conditional branch to the following instruction (e.g. the delay slot) is
7432      asking for a disaster.  Be prepared!  */
7433 
7434   if (branch_to_delay_slot_p (insn))
7435     {
7436       if (which_alternative == 0)
7437 	return "ldo %1(%0),%0";
7438       else if (which_alternative == 1)
7439 	{
7440 	  output_asm_insn ("{fstws|fstw} %0,-16(%%r30)", operands);
7441 	  output_asm_insn ("ldw -16(%%r30),%4", operands);
7442 	  output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands);
7443 	  return "{fldws|fldw} -16(%%r30),%0";
7444 	}
7445       else
7446 	{
7447 	  output_asm_insn ("ldw %0,%4", operands);
7448 	  return "ldo %1(%4),%4\n\tstw %4,%0";
7449 	}
7450     }
7451 
7452   if (which_alternative == 0)
7453     {
7454       int nullify = INSN_ANNULLED_BRANCH_P (insn);
7455       int xdelay;
7456 
7457       /* If this is a long branch with its delay slot unfilled, set `nullify'
7458 	 as it can nullify the delay slot and save a nop.  */
7459       if (length == 8 && dbr_sequence_length () == 0)
7460 	nullify = 1;
7461 
7462       /* If this is a short forward conditional branch which did not get
7463 	 its delay slot filled, the delay slot can still be nullified.  */
7464       if (! nullify && length == 4 && dbr_sequence_length () == 0)
7465 	nullify = forward_branch_p (insn);
7466 
7467       switch (length)
7468 	{
7469 	case 4:
7470 	  if (nullify)
7471 	    {
7472 	      if (branch_needs_nop_p (insn))
7473 		return "addib,%C2,n %1,%0,%3%#";
7474 	      else
7475 		return "addib,%C2,n %1,%0,%3";
7476 	    }
7477 	  else
7478 	    return "addib,%C2 %1,%0,%3";
7479 
7480 	case 8:
7481 	  /* Handle weird backwards branch with a fulled delay slot
7482 	     which is nullified.  */
7483 	  if (dbr_sequence_length () != 0
7484 	      && ! forward_branch_p (insn)
7485 	      && nullify)
7486 	    return "addib,%N2,n %1,%0,.+12\n\tb %3";
7487 	  /* Handle short backwards branch with an unfilled delay slot.
7488 	     Using a addb;nop rather than addi;bl saves 1 cycle for both
7489 	     taken and untaken branches.  */
7490 	  else if (dbr_sequence_length () == 0
7491 		   && ! forward_branch_p (insn)
7492 		   && INSN_ADDRESSES_SET_P ()
7493 		   && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
7494 				      - INSN_ADDRESSES (INSN_UID (insn)) - 8))
7495 	      return "addib,%C2 %1,%0,%3%#";
7496 
7497 	  /* Handle normal cases.  */
7498 	  if (nullify)
7499 	    return "addi,%N2 %1,%0,%0\n\tb,n %3";
7500 	  else
7501 	    return "addi,%N2 %1,%0,%0\n\tb %3";
7502 
7503 	default:
7504 	  /* The reversed conditional branch must branch over one additional
7505 	     instruction if the delay slot is filled and needs to be extracted
7506 	     by pa_output_lbranch.  If the delay slot is empty or this is a
7507 	     nullified forward branch, the instruction after the reversed
7508 	     condition branch must be nullified.  */
7509 	  if (dbr_sequence_length () == 0
7510 	      || (nullify && forward_branch_p (insn)))
7511 	    {
7512 	      nullify = 1;
7513 	      xdelay = 0;
7514 	      operands[4] = GEN_INT (length);
7515 	    }
7516 	  else
7517 	    {
7518 	      xdelay = 1;
7519 	      operands[4] = GEN_INT (length + 4);
7520 	    }
7521 
7522 	  if (nullify)
7523 	    output_asm_insn ("addib,%N2,n %1,%0,.+%4", operands);
7524 	  else
7525 	    output_asm_insn ("addib,%N2 %1,%0,.+%4", operands);
7526 
7527 	  return pa_output_lbranch (operands[3], insn, xdelay);
7528 	}
7529 
7530     }
7531   /* Deal with gross reload from FP register case.  */
7532   else if (which_alternative == 1)
7533     {
7534       /* Move loop counter from FP register to MEM then into a GR,
7535 	 increment the GR, store the GR into MEM, and finally reload
7536 	 the FP register from MEM from within the branch's delay slot.  */
7537       output_asm_insn ("{fstws|fstw} %0,-16(%%r30)\n\tldw -16(%%r30),%4",
7538 		       operands);
7539       output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands);
7540       if (length == 24)
7541 	return "{comb|cmpb},%S2 %%r0,%4,%3\n\t{fldws|fldw} -16(%%r30),%0";
7542       else if (length == 28)
7543 	return "{comclr|cmpclr},%B2 %%r0,%4,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
7544       else
7545 	{
7546 	  operands[5] = GEN_INT (length - 16);
7547 	  output_asm_insn ("{comb|cmpb},%B2 %%r0,%4,.+%5", operands);
7548 	  output_asm_insn ("{fldws|fldw} -16(%%r30),%0", operands);
7549 	  return pa_output_lbranch (operands[3], insn, 0);
7550 	}
7551     }
7552   /* Deal with gross reload from memory case.  */
7553   else
7554     {
7555       /* Reload loop counter from memory, the store back to memory
7556 	 happens in the branch's delay slot.  */
7557       output_asm_insn ("ldw %0,%4", operands);
7558       if (length == 12)
7559 	return "addib,%C2 %1,%4,%3\n\tstw %4,%0";
7560       else if (length == 16)
7561 	return "addi,%N2 %1,%4,%4\n\tb %3\n\tstw %4,%0";
7562       else
7563 	{
7564 	  operands[5] = GEN_INT (length - 4);
7565 	  output_asm_insn ("addib,%N2 %1,%4,.+%5\n\tstw %4,%0", operands);
7566 	  return pa_output_lbranch (operands[3], insn, 0);
7567 	}
7568     }
7569 }
7570 
7571 /* Return the output template for emitting a movb type insn.
7572 
7573    Note it may perform some output operations on its own before
7574    returning the final output string.  */
7575 const char *
pa_output_movb(rtx * operands,rtx_insn * insn,int which_alternative,int reverse_comparison)7576 pa_output_movb (rtx *operands, rtx_insn *insn, int which_alternative,
7577 	     int reverse_comparison)
7578 {
7579   int length = get_attr_length (insn);
7580 
7581   /* A conditional branch to the following instruction (e.g. the delay slot) is
7582      asking for a disaster.  Be prepared!  */
7583 
7584   if (branch_to_delay_slot_p (insn))
7585     {
7586       if (which_alternative == 0)
7587 	return "copy %1,%0";
7588       else if (which_alternative == 1)
7589 	{
7590 	  output_asm_insn ("stw %1,-16(%%r30)", operands);
7591 	  return "{fldws|fldw} -16(%%r30),%0";
7592 	}
7593       else if (which_alternative == 2)
7594 	return "stw %1,%0";
7595       else
7596 	return "mtsar %r1";
7597     }
7598 
7599   /* Support the second variant.  */
7600   if (reverse_comparison)
7601     PUT_CODE (operands[2], reverse_condition (GET_CODE (operands[2])));
7602 
7603   if (which_alternative == 0)
7604     {
7605       int nullify = INSN_ANNULLED_BRANCH_P (insn);
7606       int xdelay;
7607 
7608       /* If this is a long branch with its delay slot unfilled, set `nullify'
7609 	 as it can nullify the delay slot and save a nop.  */
7610       if (length == 8 && dbr_sequence_length () == 0)
7611 	nullify = 1;
7612 
7613       /* If this is a short forward conditional branch which did not get
7614 	 its delay slot filled, the delay slot can still be nullified.  */
7615       if (! nullify && length == 4 && dbr_sequence_length () == 0)
7616 	nullify = forward_branch_p (insn);
7617 
7618       switch (length)
7619 	{
7620 	case 4:
7621 	  if (nullify)
7622 	    {
7623 	      if (branch_needs_nop_p (insn))
7624 		return "movb,%C2,n %1,%0,%3%#";
7625 	      else
7626 		return "movb,%C2,n %1,%0,%3";
7627 	    }
7628 	  else
7629 	    return "movb,%C2 %1,%0,%3";
7630 
7631 	case 8:
7632 	  /* Handle weird backwards branch with a filled delay slot
7633 	     which is nullified.  */
7634 	  if (dbr_sequence_length () != 0
7635 	      && ! forward_branch_p (insn)
7636 	      && nullify)
7637 	    return "movb,%N2,n %1,%0,.+12\n\tb %3";
7638 
7639 	  /* Handle short backwards branch with an unfilled delay slot.
7640 	     Using a movb;nop rather than or;bl saves 1 cycle for both
7641 	     taken and untaken branches.  */
7642 	  else if (dbr_sequence_length () == 0
7643 		   && ! forward_branch_p (insn)
7644 		   && INSN_ADDRESSES_SET_P ()
7645 		   && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
7646 				      - INSN_ADDRESSES (INSN_UID (insn)) - 8))
7647 	    return "movb,%C2 %1,%0,%3%#";
7648 	  /* Handle normal cases.  */
7649 	  if (nullify)
7650 	    return "or,%N2 %1,%%r0,%0\n\tb,n %3";
7651 	  else
7652 	    return "or,%N2 %1,%%r0,%0\n\tb %3";
7653 
7654 	default:
7655 	  /* The reversed conditional branch must branch over one additional
7656 	     instruction if the delay slot is filled and needs to be extracted
7657 	     by pa_output_lbranch.  If the delay slot is empty or this is a
7658 	     nullified forward branch, the instruction after the reversed
7659 	     condition branch must be nullified.  */
7660 	  if (dbr_sequence_length () == 0
7661 	      || (nullify && forward_branch_p (insn)))
7662 	    {
7663 	      nullify = 1;
7664 	      xdelay = 0;
7665 	      operands[4] = GEN_INT (length);
7666 	    }
7667 	  else
7668 	    {
7669 	      xdelay = 1;
7670 	      operands[4] = GEN_INT (length + 4);
7671 	    }
7672 
7673 	  if (nullify)
7674 	    output_asm_insn ("movb,%N2,n %1,%0,.+%4", operands);
7675 	  else
7676 	    output_asm_insn ("movb,%N2 %1,%0,.+%4", operands);
7677 
7678 	  return pa_output_lbranch (operands[3], insn, xdelay);
7679 	}
7680     }
7681   /* Deal with gross reload for FP destination register case.  */
7682   else if (which_alternative == 1)
7683     {
7684       /* Move source register to MEM, perform the branch test, then
7685 	 finally load the FP register from MEM from within the branch's
7686 	 delay slot.  */
7687       output_asm_insn ("stw %1,-16(%%r30)", operands);
7688       if (length == 12)
7689 	return "{comb|cmpb},%S2 %%r0,%1,%3\n\t{fldws|fldw} -16(%%r30),%0";
7690       else if (length == 16)
7691 	return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
7692       else
7693 	{
7694 	  operands[4] = GEN_INT (length - 4);
7695 	  output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4", operands);
7696 	  output_asm_insn ("{fldws|fldw} -16(%%r30),%0", operands);
7697 	  return pa_output_lbranch (operands[3], insn, 0);
7698 	}
7699     }
7700   /* Deal with gross reload from memory case.  */
7701   else if (which_alternative == 2)
7702     {
7703       /* Reload loop counter from memory, the store back to memory
7704 	 happens in the branch's delay slot.  */
7705       if (length == 8)
7706 	return "{comb|cmpb},%S2 %%r0,%1,%3\n\tstw %1,%0";
7707       else if (length == 12)
7708 	return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tstw %1,%0";
7709       else
7710 	{
7711 	  operands[4] = GEN_INT (length);
7712 	  output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4\n\tstw %1,%0",
7713 			   operands);
7714 	  return pa_output_lbranch (operands[3], insn, 0);
7715 	}
7716     }
7717   /* Handle SAR as a destination.  */
7718   else
7719     {
7720       if (length == 8)
7721 	return "{comb|cmpb},%S2 %%r0,%1,%3\n\tmtsar %r1";
7722       else if (length == 12)
7723 	return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tmtsar %r1";
7724       else
7725 	{
7726 	  operands[4] = GEN_INT (length);
7727 	  output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4\n\tmtsar %r1",
7728 			   operands);
7729 	  return pa_output_lbranch (operands[3], insn, 0);
7730 	}
7731     }
7732 }
7733 
7734 /* Copy any FP arguments in INSN into integer registers.  */
7735 static void
copy_fp_args(rtx_insn * insn)7736 copy_fp_args (rtx_insn *insn)
7737 {
7738   rtx link;
7739   rtx xoperands[2];
7740 
7741   for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
7742     {
7743       int arg_mode, regno;
7744       rtx use = XEXP (link, 0);
7745 
7746       if (! (GET_CODE (use) == USE
7747 	  && GET_CODE (XEXP (use, 0)) == REG
7748 	  && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
7749 	continue;
7750 
7751       arg_mode = GET_MODE (XEXP (use, 0));
7752       regno = REGNO (XEXP (use, 0));
7753 
7754       /* Is it a floating point register?  */
7755       if (regno >= 32 && regno <= 39)
7756 	{
7757 	  /* Copy the FP register into an integer register via memory.  */
7758 	  if (arg_mode == SFmode)
7759 	    {
7760 	      xoperands[0] = XEXP (use, 0);
7761 	      xoperands[1] = gen_rtx_REG (SImode, 26 - (regno - 32) / 2);
7762 	      output_asm_insn ("{fstws|fstw} %0,-16(%%sr0,%%r30)", xoperands);
7763 	      output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
7764 	    }
7765 	  else
7766 	    {
7767 	      xoperands[0] = XEXP (use, 0);
7768 	      xoperands[1] = gen_rtx_REG (DImode, 25 - (regno - 34) / 2);
7769 	      output_asm_insn ("{fstds|fstd} %0,-16(%%sr0,%%r30)", xoperands);
7770 	      output_asm_insn ("ldw -12(%%sr0,%%r30),%R1", xoperands);
7771 	      output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
7772 	    }
7773 	}
7774     }
7775 }
7776 
7777 /* Compute length of the FP argument copy sequence for INSN.  */
7778 static int
length_fp_args(rtx_insn * insn)7779 length_fp_args (rtx_insn *insn)
7780 {
7781   int length = 0;
7782   rtx link;
7783 
7784   for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
7785     {
7786       int arg_mode, regno;
7787       rtx use = XEXP (link, 0);
7788 
7789       if (! (GET_CODE (use) == USE
7790 	  && GET_CODE (XEXP (use, 0)) == REG
7791 	  && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
7792 	continue;
7793 
7794       arg_mode = GET_MODE (XEXP (use, 0));
7795       regno = REGNO (XEXP (use, 0));
7796 
7797       /* Is it a floating point register?  */
7798       if (regno >= 32 && regno <= 39)
7799 	{
7800 	  if (arg_mode == SFmode)
7801 	    length += 8;
7802 	  else
7803 	    length += 12;
7804 	}
7805     }
7806 
7807   return length;
7808 }
7809 
7810 /* Return the attribute length for the millicode call instruction INSN.
7811    The length must match the code generated by pa_output_millicode_call.
7812    We include the delay slot in the returned length as it is better to
7813    over estimate the length than to under estimate it.  */
7814 
7815 int
pa_attr_length_millicode_call(rtx_insn * insn)7816 pa_attr_length_millicode_call (rtx_insn *insn)
7817 {
7818   unsigned long distance = -1;
7819   unsigned long total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
7820 
7821   if (INSN_ADDRESSES_SET_P ())
7822     {
7823       distance = (total + insn_current_reference_address (insn));
7824       if (distance < total)
7825 	distance = -1;
7826     }
7827 
7828   if (TARGET_64BIT)
7829     {
7830       if (!TARGET_LONG_CALLS && distance < 7600000)
7831 	return 8;
7832 
7833       return 20;
7834     }
7835   else if (TARGET_PORTABLE_RUNTIME)
7836     return 24;
7837   else
7838     {
7839       if (!TARGET_LONG_CALLS && distance < MAX_PCREL17F_OFFSET)
7840 	return 8;
7841 
7842       if (!flag_pic)
7843 	return 12;
7844 
7845       return 24;
7846     }
7847 }
7848 
7849 /* INSN is a function call.
7850 
7851    CALL_DEST is the routine we are calling.  */
7852 
7853 const char *
pa_output_millicode_call(rtx_insn * insn,rtx call_dest)7854 pa_output_millicode_call (rtx_insn *insn, rtx call_dest)
7855 {
7856   int attr_length = get_attr_length (insn);
7857   int seq_length = dbr_sequence_length ();
7858   rtx xoperands[4];
7859 
7860   xoperands[0] = call_dest;
7861 
7862   /* Handle the common case where we are sure that the branch will
7863      reach the beginning of the $CODE$ subspace.  The within reach
7864      form of the $$sh_func_adrs call has a length of 28.  Because it
7865      has an attribute type of sh_func_adrs, it never has a nonzero
7866      sequence length (i.e., the delay slot is never filled).  */
7867   if (!TARGET_LONG_CALLS
7868       && (attr_length == 8
7869 	  || (attr_length == 28
7870 	      && get_attr_type (insn) == TYPE_SH_FUNC_ADRS)))
7871     {
7872       xoperands[1] = gen_rtx_REG (Pmode, TARGET_64BIT ? 2 : 31);
7873       output_asm_insn ("{bl|b,l} %0,%1", xoperands);
7874     }
7875   else
7876     {
7877       if (TARGET_64BIT)
7878 	{
7879 	  /* It might seem that one insn could be saved by accessing
7880 	     the millicode function using the linkage table.  However,
7881 	     this doesn't work in shared libraries and other dynamically
7882 	     loaded objects.  Using a pc-relative sequence also avoids
7883 	     problems related to the implicit use of the gp register.  */
7884 	  xoperands[1] = gen_rtx_REG (Pmode, 1);
7885 	  xoperands[2] = xoperands[1];
7886 	  pa_output_pic_pcrel_sequence (xoperands);
7887 	  output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
7888 	}
7889       else if (TARGET_PORTABLE_RUNTIME)
7890 	{
7891 	  /* Pure portable runtime doesn't allow be/ble; we also don't
7892 	     have PIC support in the assembler/linker, so this sequence
7893 	     is needed.  */
7894 
7895 	  /* Get the address of our target into %r1.  */
7896 	  output_asm_insn ("ldil L'%0,%%r1", xoperands);
7897 	  output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands);
7898 
7899 	  /* Get our return address into %r31.  */
7900 	  output_asm_insn ("{bl|b,l} .+8,%%r31", xoperands);
7901 	  output_asm_insn ("addi 8,%%r31,%%r31", xoperands);
7902 
7903 	  /* Jump to our target address in %r1.  */
7904 	  output_asm_insn ("bv %%r0(%%r1)", xoperands);
7905 	}
7906       else if (!flag_pic)
7907 	{
7908 	  output_asm_insn ("ldil L'%0,%%r1", xoperands);
7909 	  if (TARGET_PA_20)
7910 	    output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31", xoperands);
7911 	  else
7912 	    output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands);
7913 	}
7914       else
7915 	{
7916 	  xoperands[1] = gen_rtx_REG (Pmode, 31);
7917 	  xoperands[2] = gen_rtx_REG (Pmode, 1);
7918 	  pa_output_pic_pcrel_sequence (xoperands);
7919 
7920 	  /* Adjust return address.  */
7921 	  output_asm_insn ("ldo {16|24}(%%r31),%%r31", xoperands);
7922 
7923 	  /* Jump to our target address in %r1.  */
7924 	  output_asm_insn ("bv %%r0(%%r1)", xoperands);
7925 	}
7926     }
7927 
7928   if (seq_length == 0)
7929     output_asm_insn ("nop", xoperands);
7930 
7931   return "";
7932 }
7933 
7934 /* Return the attribute length of the call instruction INSN.  The SIBCALL
7935    flag indicates whether INSN is a regular call or a sibling call.  The
7936    length returned must be longer than the code actually generated by
7937    pa_output_call.  Since branch shortening is done before delay branch
7938    sequencing, there is no way to determine whether or not the delay
7939    slot will be filled during branch shortening.  Even when the delay
7940    slot is filled, we may have to add a nop if the delay slot contains
7941    a branch that can't reach its target.  Thus, we always have to include
7942    the delay slot in the length estimate.  This used to be done in
7943    pa_adjust_insn_length but we do it here now as some sequences always
7944    fill the delay slot and we can save four bytes in the estimate for
7945    these sequences.  */
7946 
7947 int
pa_attr_length_call(rtx_insn * insn,int sibcall)7948 pa_attr_length_call (rtx_insn *insn, int sibcall)
7949 {
7950   int local_call;
7951   rtx call, call_dest;
7952   tree call_decl;
7953   int length = 0;
7954   rtx pat = PATTERN (insn);
7955   unsigned long distance = -1;
7956 
7957   gcc_assert (CALL_P (insn));
7958 
7959   if (INSN_ADDRESSES_SET_P ())
7960     {
7961       unsigned long total;
7962 
7963       total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
7964       distance = (total + insn_current_reference_address (insn));
7965       if (distance < total)
7966 	distance = -1;
7967     }
7968 
7969   gcc_assert (GET_CODE (pat) == PARALLEL);
7970 
7971   /* Get the call rtx.  */
7972   call = XVECEXP (pat, 0, 0);
7973   if (GET_CODE (call) == SET)
7974     call = SET_SRC (call);
7975 
7976   gcc_assert (GET_CODE (call) == CALL);
7977 
7978   /* Determine if this is a local call.  */
7979   call_dest = XEXP (XEXP (call, 0), 0);
7980   call_decl = SYMBOL_REF_DECL (call_dest);
7981   local_call = call_decl && targetm.binds_local_p (call_decl);
7982 
7983   /* pc-relative branch.  */
7984   if (!TARGET_LONG_CALLS
7985       && ((TARGET_PA_20 && !sibcall && distance < 7600000)
7986 	  || distance < MAX_PCREL17F_OFFSET))
7987     length += 8;
7988 
7989   /* 64-bit plabel sequence.  */
7990   else if (TARGET_64BIT && !local_call)
7991     length += 24;
7992 
7993   /* non-pic long absolute branch sequence.  */
7994   else if ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
7995     length += 12;
7996 
7997   /* long pc-relative branch sequence.  */
7998   else if (TARGET_LONG_PIC_SDIFF_CALL
7999 	   || (TARGET_GAS && !TARGET_SOM && local_call))
8000     {
8001       length += 20;
8002 
8003       if (!TARGET_PA_20 && !TARGET_NO_SPACE_REGS && (!local_call || flag_pic))
8004 	length += 8;
8005     }
8006 
8007   /* 32-bit plabel sequence.  */
8008   else
8009     {
8010       length += 32;
8011 
8012       if (TARGET_SOM)
8013 	length += length_fp_args (insn);
8014 
8015       if (flag_pic)
8016 	length += 4;
8017 
8018       if (!TARGET_PA_20)
8019 	{
8020 	  if (!sibcall)
8021 	    length += 8;
8022 
8023 	  if (!TARGET_NO_SPACE_REGS && (!local_call || flag_pic))
8024 	    length += 8;
8025 	}
8026     }
8027 
8028   return length;
8029 }
8030 
8031 /* INSN is a function call.
8032 
8033    CALL_DEST is the routine we are calling.  */
8034 
8035 const char *
pa_output_call(rtx_insn * insn,rtx call_dest,int sibcall)8036 pa_output_call (rtx_insn *insn, rtx call_dest, int sibcall)
8037 {
8038   int seq_length = dbr_sequence_length ();
8039   tree call_decl = SYMBOL_REF_DECL (call_dest);
8040   int local_call = call_decl && targetm.binds_local_p (call_decl);
8041   rtx xoperands[4];
8042 
8043   xoperands[0] = call_dest;
8044 
8045   /* Handle the common case where we're sure that the branch will reach
8046      the beginning of the "$CODE$" subspace.  This is the beginning of
8047      the current function if we are in a named section.  */
8048   if (!TARGET_LONG_CALLS && pa_attr_length_call (insn, sibcall) == 8)
8049     {
8050       xoperands[1] = gen_rtx_REG (word_mode, sibcall ? 0 : 2);
8051       output_asm_insn ("{bl|b,l} %0,%1", xoperands);
8052     }
8053   else
8054     {
8055       if (TARGET_64BIT && !local_call)
8056 	{
8057 	  /* ??? As far as I can tell, the HP linker doesn't support the
8058 	     long pc-relative sequence described in the 64-bit runtime
8059 	     architecture.  So, we use a slightly longer indirect call.  */
8060 	  xoperands[0] = pa_get_deferred_plabel (call_dest);
8061 	  xoperands[1] = gen_label_rtx ();
8062 
8063 	  /* Put the load of %r27 into the delay slot.  We don't need to
8064 	     do anything when generating fast indirect calls.  */
8065 	  if (seq_length != 0)
8066 	    {
8067 	      final_scan_insn (NEXT_INSN (insn), asm_out_file,
8068 			       optimize, 0, NULL);
8069 
8070 	      /* Now delete the delay insn.  */
8071 	      SET_INSN_DELETED (NEXT_INSN (insn));
8072 	    }
8073 
8074 	  output_asm_insn ("addil LT'%0,%%r27", xoperands);
8075 	  output_asm_insn ("ldd RT'%0(%%r1),%%r1", xoperands);
8076 	  output_asm_insn ("ldd 0(%%r1),%%r1", xoperands);
8077 	  output_asm_insn ("ldd 16(%%r1),%%r2", xoperands);
8078 	  output_asm_insn ("bve,l (%%r2),%%r2", xoperands);
8079 	  output_asm_insn ("ldd 24(%%r1),%%r27", xoperands);
8080 	  seq_length = 1;
8081 	}
8082       else
8083 	{
8084 	  int indirect_call = 0;
8085 
8086 	  /* Emit a long call.  There are several different sequences
8087 	     of increasing length and complexity.  In most cases,
8088              they don't allow an instruction in the delay slot.  */
8089 	  if (!((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
8090 	      && !TARGET_LONG_PIC_SDIFF_CALL
8091 	      && !(TARGET_GAS && !TARGET_SOM && local_call)
8092 	      && !TARGET_64BIT)
8093 	    indirect_call = 1;
8094 
8095 	  if (seq_length != 0
8096 	      && !sibcall
8097 	      && (!TARGET_PA_20
8098 		  || indirect_call
8099 		  || ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)))
8100 	    {
8101 	      /* A non-jump insn in the delay slot.  By definition we can
8102 		 emit this insn before the call (and in fact before argument
8103 		 relocating.  */
8104 	      final_scan_insn (NEXT_INSN (insn), asm_out_file, optimize, 0,
8105 			       NULL);
8106 
8107 	      /* Now delete the delay insn.  */
8108 	      SET_INSN_DELETED (NEXT_INSN (insn));
8109 	      seq_length = 0;
8110 	    }
8111 
8112 	  if ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
8113 	    {
8114 	      /* This is the best sequence for making long calls in
8115 		 non-pic code.  Unfortunately, GNU ld doesn't provide
8116 		 the stub needed for external calls, and GAS's support
8117 		 for this with the SOM linker is buggy.  It is safe
8118 		 to use this for local calls.  */
8119 	      output_asm_insn ("ldil L'%0,%%r1", xoperands);
8120 	      if (sibcall)
8121 		output_asm_insn ("be R'%0(%%sr4,%%r1)", xoperands);
8122 	      else
8123 		{
8124 		  if (TARGET_PA_20)
8125 		    output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31",
8126 				     xoperands);
8127 		  else
8128 		    output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands);
8129 
8130 		  output_asm_insn ("copy %%r31,%%r2", xoperands);
8131 		  seq_length = 1;
8132 		}
8133 	    }
8134 	  else
8135 	    {
8136 	      /* The HP assembler and linker can handle relocations for
8137 		 the difference of two symbols.  The HP assembler
8138 		 recognizes the sequence as a pc-relative call and
8139 		 the linker provides stubs when needed.  */
8140 
8141 	      /* GAS currently can't generate the relocations that
8142 		 are needed for the SOM linker under HP-UX using this
8143 		 sequence.  The GNU linker doesn't generate the stubs
8144 		 that are needed for external calls on TARGET_ELF32
8145 		 with this sequence.  For now, we have to use a longer
8146 	         plabel sequence when using GAS for non local calls.  */
8147 	      if (TARGET_LONG_PIC_SDIFF_CALL
8148 		  || (TARGET_GAS && !TARGET_SOM && local_call))
8149 		{
8150 		  xoperands[1] = gen_rtx_REG (Pmode, 1);
8151 		  xoperands[2] = xoperands[1];
8152 		  pa_output_pic_pcrel_sequence (xoperands);
8153 		}
8154 	      else
8155 		{
8156 		  /* Emit a long plabel-based call sequence.  This is
8157 		     essentially an inline implementation of $$dyncall.
8158 		     We don't actually try to call $$dyncall as this is
8159 		     as difficult as calling the function itself.  */
8160 		  xoperands[0] = pa_get_deferred_plabel (call_dest);
8161 		  xoperands[1] = gen_label_rtx ();
8162 
8163 		  /* Since the call is indirect, FP arguments in registers
8164 		     need to be copied to the general registers.  Then, the
8165 		     argument relocation stub will copy them back.  */
8166 		  if (TARGET_SOM)
8167 		    copy_fp_args (insn);
8168 
8169 		  if (flag_pic)
8170 		    {
8171 		      output_asm_insn ("addil LT'%0,%%r19", xoperands);
8172 		      output_asm_insn ("ldw RT'%0(%%r1),%%r1", xoperands);
8173 		      output_asm_insn ("ldw 0(%%r1),%%r22", xoperands);
8174 		    }
8175 		  else
8176 		    {
8177 		      output_asm_insn ("addil LR'%0-$global$,%%r27",
8178 				       xoperands);
8179 		      output_asm_insn ("ldw RR'%0-$global$(%%r1),%%r22",
8180 				       xoperands);
8181 		    }
8182 
8183 		  output_asm_insn ("bb,>=,n %%r22,30,.+16", xoperands);
8184 		  output_asm_insn ("depi 0,31,2,%%r22", xoperands);
8185 		  /* Should this be an ordered load to ensure the target
8186 	             address is loaded before the global pointer?  */
8187 		  output_asm_insn ("ldw 0(%%r22),%%r1", xoperands);
8188 		  output_asm_insn ("ldw 4(%%r22),%%r19", xoperands);
8189 
8190 		  if (!sibcall && !TARGET_PA_20)
8191 		    {
8192 		      output_asm_insn ("{bl|b,l} .+8,%%r2", xoperands);
8193 		      if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic))
8194 			output_asm_insn ("addi 8,%%r2,%%r2", xoperands);
8195 		      else
8196 			output_asm_insn ("addi 16,%%r2,%%r2", xoperands);
8197 		    }
8198 		}
8199 
8200 	      if (TARGET_PA_20)
8201 		{
8202 		  if (sibcall)
8203 		    output_asm_insn ("bve (%%r1)", xoperands);
8204 		  else
8205 		    {
8206 		      if (indirect_call)
8207 			{
8208 			  output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
8209 			  output_asm_insn ("stw %%r2,-24(%%sp)", xoperands);
8210 			  seq_length = 1;
8211 			}
8212 		      else
8213 			output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
8214 		    }
8215 		}
8216 	      else
8217 		{
8218 		  if (!TARGET_NO_SPACE_REGS && (!local_call || flag_pic))
8219 		    output_asm_insn ("ldsid (%%r1),%%r31\n\tmtsp %%r31,%%sr0",
8220 				     xoperands);
8221 
8222 		  if (sibcall)
8223 		    {
8224 		      if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic))
8225 			output_asm_insn ("be 0(%%sr4,%%r1)", xoperands);
8226 		      else
8227 			output_asm_insn ("be 0(%%sr0,%%r1)", xoperands);
8228 		    }
8229 		  else
8230 		    {
8231 		      if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic))
8232 			output_asm_insn ("ble 0(%%sr4,%%r1)", xoperands);
8233 		      else
8234 			output_asm_insn ("ble 0(%%sr0,%%r1)", xoperands);
8235 
8236 		      if (indirect_call)
8237 			output_asm_insn ("stw %%r31,-24(%%sp)", xoperands);
8238 		      else
8239 			output_asm_insn ("copy %%r31,%%r2", xoperands);
8240 		      seq_length = 1;
8241 		    }
8242 		}
8243 	    }
8244 	}
8245     }
8246 
8247   if (seq_length == 0)
8248     output_asm_insn ("nop", xoperands);
8249 
8250   return "";
8251 }
8252 
8253 /* Return the attribute length of the indirect call instruction INSN.
8254    The length must match the code generated by output_indirect call.
8255    The returned length includes the delay slot.  Currently, the delay
8256    slot of an indirect call sequence is not exposed and it is used by
8257    the sequence itself.  */
8258 
8259 int
pa_attr_length_indirect_call(rtx_insn * insn)8260 pa_attr_length_indirect_call (rtx_insn *insn)
8261 {
8262   unsigned long distance = -1;
8263   unsigned long total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
8264 
8265   if (INSN_ADDRESSES_SET_P ())
8266     {
8267       distance = (total + insn_current_reference_address (insn));
8268       if (distance < total)
8269 	distance = -1;
8270     }
8271 
8272   if (TARGET_64BIT)
8273     return 12;
8274 
8275   if (TARGET_FAST_INDIRECT_CALLS)
8276     return 8;
8277 
8278   if (TARGET_PORTABLE_RUNTIME)
8279     return 16;
8280 
8281   if (!TARGET_LONG_CALLS
8282       && ((TARGET_PA_20 && !TARGET_SOM && distance < 7600000)
8283 	  || distance < MAX_PCREL17F_OFFSET))
8284     return 8;
8285 
8286   /* Out of reach, can use ble.  */
8287   if (!flag_pic)
8288     return 12;
8289 
8290   /* Inline versions of $$dyncall.  */
8291   if (!optimize_size)
8292     {
8293       if (TARGET_NO_SPACE_REGS)
8294 	return 28;
8295 
8296       if (TARGET_PA_20)
8297 	return 32;
8298     }
8299 
8300   /* Long PIC pc-relative call.  */
8301   return 20;
8302 }
8303 
8304 const char *
pa_output_indirect_call(rtx_insn * insn,rtx call_dest)8305 pa_output_indirect_call (rtx_insn *insn, rtx call_dest)
8306 {
8307   rtx xoperands[4];
8308   int length;
8309 
8310   if (TARGET_64BIT)
8311     {
8312       xoperands[0] = call_dest;
8313       output_asm_insn ("ldd 16(%0),%%r2\n\t"
8314 		       "bve,l (%%r2),%%r2\n\t"
8315 		       "ldd 24(%0),%%r27", xoperands);
8316       return "";
8317     }
8318 
8319   /* First the special case for kernels, level 0 systems, etc.  */
8320   if (TARGET_FAST_INDIRECT_CALLS)
8321     {
8322       pa_output_arg_descriptor (insn);
8323       if (TARGET_PA_20)
8324 	return "bve,l,n (%%r22),%%r2\n\tnop";
8325       return "ble 0(%%sr4,%%r22)\n\tcopy %%r31,%%r2";
8326     }
8327 
8328   if (TARGET_PORTABLE_RUNTIME)
8329     {
8330       output_asm_insn ("ldil L'$$dyncall,%%r31\n\t"
8331 		       "ldo R'$$dyncall(%%r31),%%r31", xoperands);
8332       pa_output_arg_descriptor (insn);
8333       return "blr %%r0,%%r2\n\tbv,n %%r0(%%r31)";
8334     }
8335 
8336   /* Now the normal case -- we can reach $$dyncall directly or
8337      we're sure that we can get there via a long-branch stub.
8338 
8339      No need to check target flags as the length uniquely identifies
8340      the remaining cases.  */
8341   length = pa_attr_length_indirect_call (insn);
8342   if (length == 8)
8343     {
8344       pa_output_arg_descriptor (insn);
8345 
8346       /* The HP linker sometimes substitutes a BLE for BL/B,L calls to
8347 	 $$dyncall.  Since BLE uses %r31 as the link register, the 22-bit
8348 	 variant of the B,L instruction can't be used on the SOM target.  */
8349       if (TARGET_PA_20 && !TARGET_SOM)
8350 	return "b,l,n $$dyncall,%%r2\n\tnop";
8351       else
8352 	return "bl $$dyncall,%%r31\n\tcopy %%r31,%%r2";
8353     }
8354 
8355   /* Long millicode call, but we are not generating PIC or portable runtime
8356      code.  */
8357   if (length == 12)
8358     {
8359       output_asm_insn ("ldil L'$$dyncall,%%r2", xoperands);
8360       pa_output_arg_descriptor (insn);
8361       return "ble R'$$dyncall(%%sr4,%%r2)\n\tcopy %%r31,%%r2";
8362     }
8363 
8364   /* The long PIC pc-relative call sequence is five instructions.  So,
8365      let's use an inline version of $$dyncall when the calling sequence
8366      has a roughly similar number of instructions and we are not optimizing
8367      for size.  We need two instructions to load the return pointer plus
8368      the $$dyncall implementation.  */
8369   if (!optimize_size)
8370     {
8371       if (TARGET_NO_SPACE_REGS)
8372 	{
8373 	  pa_output_arg_descriptor (insn);
8374 	  output_asm_insn ("bl .+8,%%r2\n\t"
8375 			   "ldo 20(%%r2),%%r2\n\t"
8376 			   "extru,<> %%r22,30,1,%%r0\n\t"
8377 			   "bv,n %%r0(%%r22)\n\t"
8378 			   "ldw -2(%%r22),%%r21\n\t"
8379 			   "bv %%r0(%%r21)\n\t"
8380 			   "ldw 2(%%r22),%%r19", xoperands);
8381 	  return "";
8382 	}
8383       if (TARGET_PA_20)
8384 	{
8385 	  pa_output_arg_descriptor (insn);
8386 	  output_asm_insn ("bl .+8,%%r2\n\t"
8387 			   "ldo 24(%%r2),%%r2\n\t"
8388 			   "stw %%r2,-24(%%sp)\n\t"
8389 			   "extru,<> %r22,30,1,%%r0\n\t"
8390 			   "bve,n (%%r22)\n\t"
8391 			   "ldw -2(%%r22),%%r21\n\t"
8392 			   "bve (%%r21)\n\t"
8393 			   "ldw 2(%%r22),%%r19", xoperands);
8394 	  return "";
8395 	}
8396     }
8397 
8398   /* We need a long PIC call to $$dyncall.  */
8399   xoperands[0] = gen_rtx_SYMBOL_REF (Pmode, "$$dyncall");
8400   xoperands[1] = gen_rtx_REG (Pmode, 2);
8401   xoperands[2] = gen_rtx_REG (Pmode, 1);
8402   pa_output_pic_pcrel_sequence (xoperands);
8403   pa_output_arg_descriptor (insn);
8404   return "bv %%r0(%%r1)\n\tldo {12|20}(%%r2),%%r2";
8405 }
8406 
8407 /* In HPUX 8.0's shared library scheme, special relocations are needed
8408    for function labels if they might be passed to a function
8409    in a shared library (because shared libraries don't live in code
8410    space), and special magic is needed to construct their address.  */
8411 
8412 void
pa_encode_label(rtx sym)8413 pa_encode_label (rtx sym)
8414 {
8415   const char *str = XSTR (sym, 0);
8416   int len = strlen (str) + 1;
8417   char *newstr, *p;
8418 
8419   p = newstr = XALLOCAVEC (char, len + 1);
8420   *p++ = '@';
8421   strcpy (p, str);
8422 
8423   XSTR (sym, 0) = ggc_alloc_string (newstr, len);
8424 }
8425 
8426 static void
pa_encode_section_info(tree decl,rtx rtl,int first)8427 pa_encode_section_info (tree decl, rtx rtl, int first)
8428 {
8429   int old_referenced = 0;
8430 
8431   if (!first && MEM_P (rtl) && GET_CODE (XEXP (rtl, 0)) == SYMBOL_REF)
8432     old_referenced
8433       = SYMBOL_REF_FLAGS (XEXP (rtl, 0)) & SYMBOL_FLAG_REFERENCED;
8434 
8435   default_encode_section_info (decl, rtl, first);
8436 
8437   if (first && TEXT_SPACE_P (decl))
8438     {
8439       SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
8440       if (TREE_CODE (decl) == FUNCTION_DECL)
8441 	pa_encode_label (XEXP (rtl, 0));
8442     }
8443   else if (old_referenced)
8444     SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= old_referenced;
8445 }
8446 
8447 /* This is sort of inverse to pa_encode_section_info.  */
8448 
8449 static const char *
pa_strip_name_encoding(const char * str)8450 pa_strip_name_encoding (const char *str)
8451 {
8452   str += (*str == '@');
8453   str += (*str == '*');
8454   return str;
8455 }
8456 
8457 /* Returns 1 if OP is a function label involved in a simple addition
8458    with a constant.  Used to keep certain patterns from matching
8459    during instruction combination.  */
8460 int
pa_is_function_label_plus_const(rtx op)8461 pa_is_function_label_plus_const (rtx op)
8462 {
8463   /* Strip off any CONST.  */
8464   if (GET_CODE (op) == CONST)
8465     op = XEXP (op, 0);
8466 
8467   return (GET_CODE (op) == PLUS
8468 	  && function_label_operand (XEXP (op, 0), VOIDmode)
8469 	  && GET_CODE (XEXP (op, 1)) == CONST_INT);
8470 }
8471 
8472 /* Output the assembler code for a thunk function.  THUNK_DECL is the
8473    declaration for the thunk function itself, FUNCTION is the decl for
8474    the target function.  DELTA is an immediate constant offset to be
8475    added to THIS.  If VCALL_OFFSET is nonzero, the word at
8476    *(*this + vcall_offset) should be added to THIS.  */
8477 
8478 static void
pa_asm_output_mi_thunk(FILE * file,tree thunk_fndecl,HOST_WIDE_INT delta,HOST_WIDE_INT vcall_offset,tree function)8479 pa_asm_output_mi_thunk (FILE *file, tree thunk_fndecl, HOST_WIDE_INT delta,
8480 			HOST_WIDE_INT vcall_offset, tree function)
8481 {
8482   const char *fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk_fndecl));
8483   static unsigned int current_thunk_number;
8484   int val_14 = VAL_14_BITS_P (delta);
8485   unsigned int old_last_address = last_address, nbytes = 0;
8486   char label[17];
8487   rtx xoperands[4];
8488 
8489   xoperands[0] = XEXP (DECL_RTL (function), 0);
8490   xoperands[1] = XEXP (DECL_RTL (thunk_fndecl), 0);
8491   xoperands[2] = GEN_INT (delta);
8492 
8493   assemble_start_function (thunk_fndecl, fnname);
8494   final_start_function (emit_barrier (), file, 1);
8495 
8496   if (!vcall_offset)
8497     {
8498       /* Output the thunk.  We know that the function is in the same
8499 	 translation unit (i.e., the same space) as the thunk, and that
8500 	 thunks are output after their method.  Thus, we don't need an
8501 	 external branch to reach the function.  With SOM and GAS,
8502 	 functions and thunks are effectively in different sections.
8503 	 Thus, we can always use a IA-relative branch and the linker
8504 	 will add a long branch stub if necessary.
8505 
8506 	 However, we have to be careful when generating PIC code on the
8507 	 SOM port to ensure that the sequence does not transfer to an
8508 	 import stub for the target function as this could clobber the
8509 	 return value saved at SP-24.  This would also apply to the
8510 	32-bit linux port if the multi-space model is implemented.  */
8511       if ((!TARGET_LONG_CALLS && TARGET_SOM && !TARGET_PORTABLE_RUNTIME
8512 	   && !(flag_pic && TREE_PUBLIC (function))
8513 	   && (TARGET_GAS || last_address < 262132))
8514 	  || (!TARGET_LONG_CALLS && !TARGET_SOM && !TARGET_PORTABLE_RUNTIME
8515 	      && ((targetm_common.have_named_sections
8516 		   && DECL_SECTION_NAME (thunk_fndecl) != NULL
8517 		   /* The GNU 64-bit linker has rather poor stub management.
8518 		      So, we use a long branch from thunks that aren't in
8519 		      the same section as the target function.  */
8520 		    && ((!TARGET_64BIT
8521 			 && (DECL_SECTION_NAME (thunk_fndecl)
8522 			     != DECL_SECTION_NAME (function)))
8523 			|| ((DECL_SECTION_NAME (thunk_fndecl)
8524 			     == DECL_SECTION_NAME (function))
8525 			    && last_address < 262132)))
8526 		  /* In this case, we need to be able to reach the start of
8527 		     the stub table even though the function is likely closer
8528 		     and can be jumped to directly.  */
8529 		  || (targetm_common.have_named_sections
8530 		      && DECL_SECTION_NAME (thunk_fndecl) == NULL
8531 		      && DECL_SECTION_NAME (function) == NULL
8532 		      && total_code_bytes < MAX_PCREL17F_OFFSET)
8533 		  /* Likewise.  */
8534 		  || (!targetm_common.have_named_sections
8535 		      && total_code_bytes < MAX_PCREL17F_OFFSET))))
8536 	{
8537 	  if (!val_14)
8538 	    output_asm_insn ("addil L'%2,%%r26", xoperands);
8539 
8540 	  output_asm_insn ("b %0", xoperands);
8541 
8542 	  if (val_14)
8543 	    {
8544 	      output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8545 	      nbytes += 8;
8546 	    }
8547 	  else
8548 	    {
8549 	      output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8550 	      nbytes += 12;
8551 	    }
8552 	}
8553       else if (TARGET_64BIT)
8554 	{
8555 	  rtx xop[4];
8556 
8557 	  /* We only have one call-clobbered scratch register, so we can't
8558 	     make use of the delay slot if delta doesn't fit in 14 bits.  */
8559 	  if (!val_14)
8560 	    {
8561 	      output_asm_insn ("addil L'%2,%%r26", xoperands);
8562 	      output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8563 	    }
8564 
8565 	  /* Load function address into %r1.  */
8566 	  xop[0] = xoperands[0];
8567 	  xop[1] = gen_rtx_REG (Pmode, 1);
8568 	  xop[2] = xop[1];
8569 	  pa_output_pic_pcrel_sequence (xop);
8570 
8571 	  if (val_14)
8572 	    {
8573 	      output_asm_insn ("bv %%r0(%%r1)", xoperands);
8574 	      output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8575 	      nbytes += 20;
8576 	    }
8577 	  else
8578 	    {
8579 	      output_asm_insn ("bv,n %%r0(%%r1)", xoperands);
8580 	      nbytes += 24;
8581 	    }
8582 	}
8583       else if (TARGET_PORTABLE_RUNTIME)
8584 	{
8585 	  output_asm_insn ("ldil L'%0,%%r1", xoperands);
8586 	  output_asm_insn ("ldo R'%0(%%r1),%%r22", xoperands);
8587 
8588 	  if (!val_14)
8589 	    output_asm_insn ("ldil L'%2,%%r26", xoperands);
8590 
8591 	  output_asm_insn ("bv %%r0(%%r22)", xoperands);
8592 
8593 	  if (val_14)
8594 	    {
8595 	      output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8596 	      nbytes += 16;
8597 	    }
8598 	  else
8599 	    {
8600 	      output_asm_insn ("ldo R'%2(%%r26),%%r26", xoperands);
8601 	      nbytes += 20;
8602 	    }
8603 	}
8604       else if (TARGET_SOM && flag_pic && TREE_PUBLIC (function))
8605 	{
8606 	  /* The function is accessible from outside this module.  The only
8607 	     way to avoid an import stub between the thunk and function is to
8608 	     call the function directly with an indirect sequence similar to
8609 	     that used by $$dyncall.  This is possible because $$dyncall acts
8610 	     as the import stub in an indirect call.  */
8611 	  ASM_GENERATE_INTERNAL_LABEL (label, "LTHN", current_thunk_number);
8612 	  xoperands[3] = gen_rtx_SYMBOL_REF (Pmode, label);
8613 	  output_asm_insn ("addil LT'%3,%%r19", xoperands);
8614 	  output_asm_insn ("ldw RT'%3(%%r1),%%r22", xoperands);
8615 	  output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands);
8616 	  output_asm_insn ("bb,>=,n %%r22,30,.+16", xoperands);
8617 	  output_asm_insn ("depi 0,31,2,%%r22", xoperands);
8618 	  output_asm_insn ("ldw 4(%%sr0,%%r22),%%r19", xoperands);
8619 	  output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands);
8620 
8621 	  if (!val_14)
8622 	    {
8623 	      output_asm_insn ("addil L'%2,%%r26", xoperands);
8624 	      nbytes += 4;
8625 	    }
8626 
8627 	  if (TARGET_PA_20)
8628 	    {
8629 	      output_asm_insn ("bve (%%r22)", xoperands);
8630 	      nbytes += 36;
8631 	    }
8632 	  else if (TARGET_NO_SPACE_REGS)
8633 	    {
8634 	      output_asm_insn ("be 0(%%sr4,%%r22)", xoperands);
8635 	      nbytes += 36;
8636 	    }
8637 	  else
8638 	    {
8639 	      output_asm_insn ("ldsid (%%sr0,%%r22),%%r21", xoperands);
8640 	      output_asm_insn ("mtsp %%r21,%%sr0", xoperands);
8641 	      output_asm_insn ("be 0(%%sr0,%%r22)", xoperands);
8642 	      nbytes += 44;
8643 	    }
8644 
8645 	  if (val_14)
8646 	    output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8647 	  else
8648 	    output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8649 	}
8650       else if (flag_pic)
8651 	{
8652 	  rtx xop[4];
8653 
8654 	  /* Load function address into %r22.  */
8655 	  xop[0] = xoperands[0];
8656 	  xop[1] = gen_rtx_REG (Pmode, 1);
8657 	  xop[2] = gen_rtx_REG (Pmode, 22);
8658 	  pa_output_pic_pcrel_sequence (xop);
8659 
8660 	  if (!val_14)
8661 	    output_asm_insn ("addil L'%2,%%r26", xoperands);
8662 
8663 	  output_asm_insn ("bv %%r0(%%r22)", xoperands);
8664 
8665 	  if (val_14)
8666 	    {
8667 	      output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8668 	      nbytes += 20;
8669 	    }
8670 	  else
8671 	    {
8672 	      output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8673 	      nbytes += 24;
8674 	    }
8675 	}
8676       else
8677 	{
8678 	  if (!val_14)
8679 	    output_asm_insn ("addil L'%2,%%r26", xoperands);
8680 
8681 	  output_asm_insn ("ldil L'%0,%%r22", xoperands);
8682 	  output_asm_insn ("be R'%0(%%sr4,%%r22)", xoperands);
8683 
8684 	  if (val_14)
8685 	    {
8686 	      output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8687 	      nbytes += 12;
8688 	    }
8689 	  else
8690 	    {
8691 	      output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8692 	      nbytes += 16;
8693 	    }
8694 	}
8695     }
8696   else
8697     {
8698       rtx xop[4];
8699 
8700       /* Add DELTA to THIS.  */
8701       if (val_14)
8702 	{
8703 	  output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8704 	  nbytes += 4;
8705 	}
8706       else
8707 	{
8708 	  output_asm_insn ("addil L'%2,%%r26", xoperands);
8709 	  output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8710 	  nbytes += 8;
8711 	}
8712 
8713       if (TARGET_64BIT)
8714 	{
8715 	  /* Load *(THIS + DELTA) to %r1.  */
8716 	  output_asm_insn ("ldd 0(%%r26),%%r1", xoperands);
8717 
8718 	  val_14 = VAL_14_BITS_P (vcall_offset);
8719 	  xoperands[2] = GEN_INT (vcall_offset);
8720 
8721 	  /* Load  *(*(THIS + DELTA) + VCALL_OFFSET) to %r1.  */
8722 	  if (val_14)
8723 	    {
8724 	      output_asm_insn ("ldd %2(%%r1),%%r1", xoperands);
8725 	      nbytes += 8;
8726 	    }
8727 	  else
8728 	    {
8729 	      output_asm_insn ("addil L'%2,%%r1", xoperands);
8730 	      output_asm_insn ("ldd R'%2(%%r1),%%r1", xoperands);
8731 	      nbytes += 12;
8732 	    }
8733 	}
8734       else
8735 	{
8736 	  /* Load *(THIS + DELTA) to %r1.  */
8737 	  output_asm_insn ("ldw 0(%%r26),%%r1", xoperands);
8738 
8739 	  val_14 = VAL_14_BITS_P (vcall_offset);
8740 	  xoperands[2] = GEN_INT (vcall_offset);
8741 
8742 	  /* Load  *(*(THIS + DELTA) + VCALL_OFFSET) to %r1.  */
8743 	  if (val_14)
8744 	    {
8745 	      output_asm_insn ("ldw %2(%%r1),%%r1", xoperands);
8746 	      nbytes += 8;
8747 	    }
8748 	  else
8749 	    {
8750 	      output_asm_insn ("addil L'%2,%%r1", xoperands);
8751 	      output_asm_insn ("ldw R'%2(%%r1),%%r1", xoperands);
8752 	      nbytes += 12;
8753 	    }
8754 	}
8755 
8756       /* Branch to FUNCTION and add %r1 to THIS in delay slot if possible.  */
8757       if ((!TARGET_LONG_CALLS && TARGET_SOM && !TARGET_PORTABLE_RUNTIME
8758 	   && !(flag_pic && TREE_PUBLIC (function))
8759 	   && (TARGET_GAS || last_address < 262132))
8760 	  || (!TARGET_LONG_CALLS && !TARGET_SOM && !TARGET_PORTABLE_RUNTIME
8761 	      && ((targetm_common.have_named_sections
8762 		   && DECL_SECTION_NAME (thunk_fndecl) != NULL
8763 		   /* The GNU 64-bit linker has rather poor stub management.
8764 		      So, we use a long branch from thunks that aren't in
8765 		      the same section as the target function.  */
8766 		    && ((!TARGET_64BIT
8767 			 && (DECL_SECTION_NAME (thunk_fndecl)
8768 			     != DECL_SECTION_NAME (function)))
8769 			|| ((DECL_SECTION_NAME (thunk_fndecl)
8770 			     == DECL_SECTION_NAME (function))
8771 			    && last_address < 262132)))
8772 		  /* In this case, we need to be able to reach the start of
8773 		     the stub table even though the function is likely closer
8774 		     and can be jumped to directly.  */
8775 		  || (targetm_common.have_named_sections
8776 		      && DECL_SECTION_NAME (thunk_fndecl) == NULL
8777 		      && DECL_SECTION_NAME (function) == NULL
8778 		      && total_code_bytes < MAX_PCREL17F_OFFSET)
8779 		  /* Likewise.  */
8780 		  || (!targetm_common.have_named_sections
8781 		      && total_code_bytes < MAX_PCREL17F_OFFSET))))
8782 	{
8783 	  nbytes += 4;
8784 	  output_asm_insn ("b %0", xoperands);
8785 
8786 	  /* Add *(*(THIS + DELTA) + VCALL_OFFSET) to THIS.  */
8787 	  output_asm_insn ("addl %%r1,%%r26,%%r26", xoperands);
8788 	}
8789       else if (TARGET_64BIT)
8790 	{
8791 	  /* Add *(*(THIS + DELTA) + VCALL_OFFSET) to THIS.  */
8792 	  output_asm_insn ("addl %%r1,%%r26,%%r26", xoperands);
8793 
8794 	  /* Load function address into %r1.  */
8795 	  nbytes += 16;
8796 	  xop[0] = xoperands[0];
8797 	  xop[1] = gen_rtx_REG (Pmode, 1);
8798 	  xop[2] = xop[1];
8799 	  pa_output_pic_pcrel_sequence (xop);
8800 
8801 	  output_asm_insn ("bv,n %%r0(%%r1)", xoperands);
8802 	}
8803       else if (TARGET_PORTABLE_RUNTIME)
8804 	{
8805 	  /* Load function address into %r22.  */
8806 	  nbytes += 12;
8807 	  output_asm_insn ("ldil L'%0,%%r22", xoperands);
8808 	  output_asm_insn ("ldo R'%0(%%r22),%%r22", xoperands);
8809 
8810 	  output_asm_insn ("bv %%r0(%%r22)", xoperands);
8811 
8812 	  /* Add *(*(THIS + DELTA) + VCALL_OFFSET) to THIS.  */
8813 	  output_asm_insn ("addl %%r1,%%r26,%%r26", xoperands);
8814 	}
8815       else if (TARGET_SOM && flag_pic && TREE_PUBLIC (function))
8816 	{
8817 	  /* Add *(*(THIS + DELTA) + VCALL_OFFSET) to THIS.  */
8818 	  output_asm_insn ("addl %%r1,%%r26,%%r26", xoperands);
8819 
8820 	  /* The function is accessible from outside this module.  The only
8821 	     way to avoid an import stub between the thunk and function is to
8822 	     call the function directly with an indirect sequence similar to
8823 	     that used by $$dyncall.  This is possible because $$dyncall acts
8824 	     as the import stub in an indirect call.  */
8825 	  ASM_GENERATE_INTERNAL_LABEL (label, "LTHN", current_thunk_number);
8826 	  xoperands[3] = gen_rtx_SYMBOL_REF (Pmode, label);
8827 	  output_asm_insn ("addil LT'%3,%%r19", xoperands);
8828 	  output_asm_insn ("ldw RT'%3(%%r1),%%r22", xoperands);
8829 	  output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands);
8830 	  output_asm_insn ("bb,>=,n %%r22,30,.+16", xoperands);
8831 	  output_asm_insn ("depi 0,31,2,%%r22", xoperands);
8832 	  output_asm_insn ("ldw 4(%%sr0,%%r22),%%r19", xoperands);
8833 	  output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands);
8834 
8835 	  if (TARGET_PA_20)
8836 	    {
8837 	      output_asm_insn ("bve,n (%%r22)", xoperands);
8838 	      nbytes += 32;
8839 	    }
8840 	  else if (TARGET_NO_SPACE_REGS)
8841 	    {
8842 	      output_asm_insn ("be,n 0(%%sr4,%%r22)", xoperands);
8843 	      nbytes += 32;
8844 	    }
8845 	  else
8846 	    {
8847 	      output_asm_insn ("ldsid (%%sr0,%%r22),%%r21", xoperands);
8848 	      output_asm_insn ("mtsp %%r21,%%sr0", xoperands);
8849 	      output_asm_insn ("be,n 0(%%sr0,%%r22)", xoperands);
8850 	      nbytes += 40;
8851 	    }
8852 	}
8853       else if (flag_pic)
8854 	{
8855 	  /* Add *(*(THIS + DELTA) + VCALL_OFFSET) to THIS.  */
8856 	  output_asm_insn ("addl %%r1,%%r26,%%r26", xoperands);
8857 
8858 	  /* Load function address into %r1.  */
8859 	  nbytes += 16;
8860 	  xop[0] = xoperands[0];
8861 	  xop[1] = gen_rtx_REG (Pmode, 1);
8862 	  xop[2] = xop[1];
8863 	  pa_output_pic_pcrel_sequence (xop);
8864 
8865 	  output_asm_insn ("bv,n %%r0(%%r1)", xoperands);
8866 	}
8867       else
8868 	{
8869 	  /* Load function address into %r22.  */
8870 	  nbytes += 8;
8871 	  output_asm_insn ("ldil L'%0,%%r22", xoperands);
8872 	  output_asm_insn ("be R'%0(%%sr4,%%r22)", xoperands);
8873 
8874 	  /* Add *(*(THIS + DELTA) + VCALL_OFFSET) to THIS.  */
8875 	  output_asm_insn ("addl %%r1,%%r26,%%r26", xoperands);
8876 	}
8877     }
8878 
8879   final_end_function ();
8880 
8881   if (TARGET_SOM && flag_pic && TREE_PUBLIC (function))
8882     {
8883       switch_to_section (data_section);
8884       output_asm_insn (".align 4", xoperands);
8885       ASM_OUTPUT_LABEL (file, label);
8886       output_asm_insn (".word P'%0", xoperands);
8887     }
8888 
8889   current_thunk_number++;
8890   nbytes = ((nbytes + FUNCTION_BOUNDARY / BITS_PER_UNIT - 1)
8891 	    & ~(FUNCTION_BOUNDARY / BITS_PER_UNIT - 1));
8892   last_address += nbytes;
8893   if (old_last_address > last_address)
8894     last_address = UINT_MAX;
8895   update_total_code_bytes (nbytes);
8896   assemble_end_function (thunk_fndecl, fnname);
8897 }
8898 
8899 /* Only direct calls to static functions are allowed to be sibling (tail)
8900    call optimized.
8901 
8902    This restriction is necessary because some linker generated stubs will
8903    store return pointers into rp' in some cases which might clobber a
8904    live value already in rp'.
8905 
8906    In a sibcall the current function and the target function share stack
8907    space.  Thus if the path to the current function and the path to the
8908    target function save a value in rp', they save the value into the
8909    same stack slot, which has undesirable consequences.
8910 
8911    Because of the deferred binding nature of shared libraries any function
8912    with external scope could be in a different load module and thus require
8913    rp' to be saved when calling that function.  So sibcall optimizations
8914    can only be safe for static function.
8915 
8916    Note that GCC never needs return value relocations, so we don't have to
8917    worry about static calls with return value relocations (which require
8918    saving rp').
8919 
8920    It is safe to perform a sibcall optimization when the target function
8921    will never return.  */
8922 static bool
pa_function_ok_for_sibcall(tree decl,tree exp ATTRIBUTE_UNUSED)8923 pa_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
8924 {
8925   /* Sibcalls are not ok because the arg pointer register is not a fixed
8926      register.  This prevents the sibcall optimization from occurring.  In
8927      addition, there are problems with stub placement using GNU ld.  This
8928      is because a normal sibcall branch uses a 17-bit relocation while
8929      a regular call branch uses a 22-bit relocation.  As a result, more
8930      care needs to be taken in the placement of long-branch stubs.  */
8931   if (TARGET_64BIT)
8932     return false;
8933 
8934   if (TARGET_PORTABLE_RUNTIME)
8935     return false;
8936 
8937   /* Sibcalls are only ok within a translation unit.  */
8938   return decl && targetm.binds_local_p (decl);
8939 }
8940 
8941 /* ??? Addition is not commutative on the PA due to the weird implicit
8942    space register selection rules for memory addresses.  Therefore, we
8943    don't consider a + b == b + a, as this might be inside a MEM.  */
8944 static bool
pa_commutative_p(const_rtx x,int outer_code)8945 pa_commutative_p (const_rtx x, int outer_code)
8946 {
8947   return (COMMUTATIVE_P (x)
8948 	  && (TARGET_NO_SPACE_REGS
8949 	      || (outer_code != UNKNOWN && outer_code != MEM)
8950 	      || GET_CODE (x) != PLUS));
8951 }
8952 
8953 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
8954    use in fmpyadd instructions.  */
8955 int
pa_fmpyaddoperands(rtx * operands)8956 pa_fmpyaddoperands (rtx *operands)
8957 {
8958   machine_mode mode = GET_MODE (operands[0]);
8959 
8960   /* Must be a floating point mode.  */
8961   if (mode != SFmode && mode != DFmode)
8962     return 0;
8963 
8964   /* All modes must be the same.  */
8965   if (! (mode == GET_MODE (operands[1])
8966 	 && mode == GET_MODE (operands[2])
8967 	 && mode == GET_MODE (operands[3])
8968 	 && mode == GET_MODE (operands[4])
8969 	 && mode == GET_MODE (operands[5])))
8970     return 0;
8971 
8972   /* All operands must be registers.  */
8973   if (! (GET_CODE (operands[1]) == REG
8974 	 && GET_CODE (operands[2]) == REG
8975 	 && GET_CODE (operands[3]) == REG
8976 	 && GET_CODE (operands[4]) == REG
8977 	 && GET_CODE (operands[5]) == REG))
8978     return 0;
8979 
8980   /* Only 2 real operands to the addition.  One of the input operands must
8981      be the same as the output operand.  */
8982   if (! rtx_equal_p (operands[3], operands[4])
8983       && ! rtx_equal_p (operands[3], operands[5]))
8984     return 0;
8985 
8986   /* Inout operand of add cannot conflict with any operands from multiply.  */
8987   if (rtx_equal_p (operands[3], operands[0])
8988      || rtx_equal_p (operands[3], operands[1])
8989      || rtx_equal_p (operands[3], operands[2]))
8990     return 0;
8991 
8992   /* multiply cannot feed into addition operands.  */
8993   if (rtx_equal_p (operands[4], operands[0])
8994       || rtx_equal_p (operands[5], operands[0]))
8995     return 0;
8996 
8997   /* SFmode limits the registers to the upper 32 of the 32bit FP regs.  */
8998   if (mode == SFmode
8999       && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS
9000 	  || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS
9001 	  || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS
9002 	  || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS
9003 	  || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS
9004 	  || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS))
9005     return 0;
9006 
9007   /* Passed.  Operands are suitable for fmpyadd.  */
9008   return 1;
9009 }
9010 
9011 #if !defined(USE_COLLECT2)
9012 static void
pa_asm_out_constructor(rtx symbol,int priority)9013 pa_asm_out_constructor (rtx symbol, int priority)
9014 {
9015   if (!function_label_operand (symbol, VOIDmode))
9016     pa_encode_label (symbol);
9017 
9018 #ifdef CTORS_SECTION_ASM_OP
9019   default_ctor_section_asm_out_constructor (symbol, priority);
9020 #else
9021 # ifdef TARGET_ASM_NAMED_SECTION
9022   default_named_section_asm_out_constructor (symbol, priority);
9023 # else
9024   default_stabs_asm_out_constructor (symbol, priority);
9025 # endif
9026 #endif
9027 }
9028 
9029 static void
pa_asm_out_destructor(rtx symbol,int priority)9030 pa_asm_out_destructor (rtx symbol, int priority)
9031 {
9032   if (!function_label_operand (symbol, VOIDmode))
9033     pa_encode_label (symbol);
9034 
9035 #ifdef DTORS_SECTION_ASM_OP
9036   default_dtor_section_asm_out_destructor (symbol, priority);
9037 #else
9038 # ifdef TARGET_ASM_NAMED_SECTION
9039   default_named_section_asm_out_destructor (symbol, priority);
9040 # else
9041   default_stabs_asm_out_destructor (symbol, priority);
9042 # endif
9043 #endif
9044 }
9045 #endif
9046 
9047 /* This function places uninitialized global data in the bss section.
9048    The ASM_OUTPUT_ALIGNED_BSS macro needs to be defined to call this
9049    function on the SOM port to prevent uninitialized global data from
9050    being placed in the data section.  */
9051 
9052 void
pa_asm_output_aligned_bss(FILE * stream,const char * name,unsigned HOST_WIDE_INT size,unsigned int align)9053 pa_asm_output_aligned_bss (FILE *stream,
9054 			   const char *name,
9055 			   unsigned HOST_WIDE_INT size,
9056 			   unsigned int align)
9057 {
9058   switch_to_section (bss_section);
9059 
9060 #ifdef ASM_OUTPUT_TYPE_DIRECTIVE
9061   ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "object");
9062 #endif
9063 
9064 #ifdef ASM_OUTPUT_SIZE_DIRECTIVE
9065   ASM_OUTPUT_SIZE_DIRECTIVE (stream, name, size);
9066 #endif
9067 
9068   fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
9069   ASM_OUTPUT_LABEL (stream, name);
9070   fprintf (stream, "\t.block " HOST_WIDE_INT_PRINT_UNSIGNED"\n", size);
9071 }
9072 
9073 /* Both the HP and GNU assemblers under HP-UX provide a .comm directive
9074    that doesn't allow the alignment of global common storage to be directly
9075    specified.  The SOM linker aligns common storage based on the rounded
9076    value of the NUM_BYTES parameter in the .comm directive.  It's not
9077    possible to use the .align directive as it doesn't affect the alignment
9078    of the label associated with a .comm directive.  */
9079 
9080 void
pa_asm_output_aligned_common(FILE * stream,const char * name,unsigned HOST_WIDE_INT size,unsigned int align)9081 pa_asm_output_aligned_common (FILE *stream,
9082 			      const char *name,
9083 			      unsigned HOST_WIDE_INT size,
9084 			      unsigned int align)
9085 {
9086   unsigned int max_common_align;
9087 
9088   max_common_align = TARGET_64BIT ? 128 : (size >= 4096 ? 256 : 64);
9089   if (align > max_common_align)
9090     {
9091       /* Alignment exceeds maximum alignment for global common data.  */
9092       align = max_common_align;
9093     }
9094 
9095   switch_to_section (bss_section);
9096 
9097   assemble_name (stream, name);
9098   fprintf (stream, "\t.comm " HOST_WIDE_INT_PRINT_UNSIGNED"\n",
9099            MAX (size, align / BITS_PER_UNIT));
9100 }
9101 
9102 /* We can't use .comm for local common storage as the SOM linker effectively
9103    treats the symbol as universal and uses the same storage for local symbols
9104    with the same name in different object files.  The .block directive
9105    reserves an uninitialized block of storage.  However, it's not common
9106    storage.  Fortunately, GCC never requests common storage with the same
9107    name in any given translation unit.  */
9108 
9109 void
pa_asm_output_aligned_local(FILE * stream,const char * name,unsigned HOST_WIDE_INT size,unsigned int align)9110 pa_asm_output_aligned_local (FILE *stream,
9111 			     const char *name,
9112 			     unsigned HOST_WIDE_INT size,
9113 			     unsigned int align)
9114 {
9115   switch_to_section (bss_section);
9116   fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
9117 
9118 #ifdef LOCAL_ASM_OP
9119   fprintf (stream, "%s", LOCAL_ASM_OP);
9120   assemble_name (stream, name);
9121   fprintf (stream, "\n");
9122 #endif
9123 
9124   ASM_OUTPUT_LABEL (stream, name);
9125   fprintf (stream, "\t.block " HOST_WIDE_INT_PRINT_UNSIGNED"\n", size);
9126 }
9127 
9128 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
9129    use in fmpysub instructions.  */
9130 int
pa_fmpysuboperands(rtx * operands)9131 pa_fmpysuboperands (rtx *operands)
9132 {
9133   machine_mode mode = GET_MODE (operands[0]);
9134 
9135   /* Must be a floating point mode.  */
9136   if (mode != SFmode && mode != DFmode)
9137     return 0;
9138 
9139   /* All modes must be the same.  */
9140   if (! (mode == GET_MODE (operands[1])
9141 	 && mode == GET_MODE (operands[2])
9142 	 && mode == GET_MODE (operands[3])
9143 	 && mode == GET_MODE (operands[4])
9144 	 && mode == GET_MODE (operands[5])))
9145     return 0;
9146 
9147   /* All operands must be registers.  */
9148   if (! (GET_CODE (operands[1]) == REG
9149 	 && GET_CODE (operands[2]) == REG
9150 	 && GET_CODE (operands[3]) == REG
9151 	 && GET_CODE (operands[4]) == REG
9152 	 && GET_CODE (operands[5]) == REG))
9153     return 0;
9154 
9155   /* Only 2 real operands to the subtraction.  Subtraction is not a commutative
9156      operation, so operands[4] must be the same as operand[3].  */
9157   if (! rtx_equal_p (operands[3], operands[4]))
9158     return 0;
9159 
9160   /* multiply cannot feed into subtraction.  */
9161   if (rtx_equal_p (operands[5], operands[0]))
9162     return 0;
9163 
9164   /* Inout operand of sub cannot conflict with any operands from multiply.  */
9165   if (rtx_equal_p (operands[3], operands[0])
9166      || rtx_equal_p (operands[3], operands[1])
9167      || rtx_equal_p (operands[3], operands[2]))
9168     return 0;
9169 
9170   /* SFmode limits the registers to the upper 32 of the 32bit FP regs.  */
9171   if (mode == SFmode
9172       && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS
9173 	  || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS
9174 	  || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS
9175 	  || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS
9176 	  || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS
9177 	  || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS))
9178     return 0;
9179 
9180   /* Passed.  Operands are suitable for fmpysub.  */
9181   return 1;
9182 }
9183 
9184 /* Return 1 if the given constant is 2, 4, or 8.  These are the valid
9185    constants for a MULT embedded inside a memory address.  */
9186 int
pa_mem_shadd_constant_p(int val)9187 pa_mem_shadd_constant_p (int val)
9188 {
9189   if (val == 2 || val == 4 || val == 8)
9190     return 1;
9191   else
9192     return 0;
9193 }
9194 
9195 /* Return 1 if the given constant is 1, 2, or 3.  These are the valid
9196    constants for shadd instructions.  */
9197 int
pa_shadd_constant_p(int val)9198 pa_shadd_constant_p (int val)
9199 {
9200   if (val == 1 || val == 2 || val == 3)
9201     return 1;
9202   else
9203     return 0;
9204 }
9205 
9206 /* Return TRUE if INSN branches forward.  */
9207 
9208 static bool
forward_branch_p(rtx_insn * insn)9209 forward_branch_p (rtx_insn *insn)
9210 {
9211   rtx lab = JUMP_LABEL (insn);
9212 
9213   /* The INSN must have a jump label.  */
9214   gcc_assert (lab != NULL_RTX);
9215 
9216   if (INSN_ADDRESSES_SET_P ())
9217     return INSN_ADDRESSES (INSN_UID (lab)) > INSN_ADDRESSES (INSN_UID (insn));
9218 
9219   while (insn)
9220     {
9221       if (insn == lab)
9222 	return true;
9223       else
9224 	insn = NEXT_INSN (insn);
9225     }
9226 
9227   return false;
9228 }
9229 
9230 /* Output an unconditional move and branch insn.  */
9231 
9232 const char *
pa_output_parallel_movb(rtx * operands,rtx_insn * insn)9233 pa_output_parallel_movb (rtx *operands, rtx_insn *insn)
9234 {
9235   int length = get_attr_length (insn);
9236 
9237   /* These are the cases in which we win.  */
9238   if (length == 4)
9239     return "mov%I1b,tr %1,%0,%2";
9240 
9241   /* None of the following cases win, but they don't lose either.  */
9242   if (length == 8)
9243     {
9244       if (dbr_sequence_length () == 0)
9245 	{
9246 	  /* Nothing in the delay slot, fake it by putting the combined
9247 	     insn (the copy or add) in the delay slot of a bl.  */
9248 	  if (GET_CODE (operands[1]) == CONST_INT)
9249 	    return "b %2\n\tldi %1,%0";
9250 	  else
9251 	    return "b %2\n\tcopy %1,%0";
9252 	}
9253       else
9254 	{
9255 	  /* Something in the delay slot, but we've got a long branch.  */
9256 	  if (GET_CODE (operands[1]) == CONST_INT)
9257 	    return "ldi %1,%0\n\tb %2";
9258 	  else
9259 	    return "copy %1,%0\n\tb %2";
9260 	}
9261     }
9262 
9263   if (GET_CODE (operands[1]) == CONST_INT)
9264     output_asm_insn ("ldi %1,%0", operands);
9265   else
9266     output_asm_insn ("copy %1,%0", operands);
9267   return pa_output_lbranch (operands[2], insn, 1);
9268 }
9269 
9270 /* Output an unconditional add and branch insn.  */
9271 
9272 const char *
pa_output_parallel_addb(rtx * operands,rtx_insn * insn)9273 pa_output_parallel_addb (rtx *operands, rtx_insn *insn)
9274 {
9275   int length = get_attr_length (insn);
9276 
9277   /* To make life easy we want operand0 to be the shared input/output
9278      operand and operand1 to be the readonly operand.  */
9279   if (operands[0] == operands[1])
9280     operands[1] = operands[2];
9281 
9282   /* These are the cases in which we win.  */
9283   if (length == 4)
9284     return "add%I1b,tr %1,%0,%3";
9285 
9286   /* None of the following cases win, but they don't lose either.  */
9287   if (length == 8)
9288     {
9289       if (dbr_sequence_length () == 0)
9290 	/* Nothing in the delay slot, fake it by putting the combined
9291 	   insn (the copy or add) in the delay slot of a bl.  */
9292 	return "b %3\n\tadd%I1 %1,%0,%0";
9293       else
9294 	/* Something in the delay slot, but we've got a long branch.  */
9295 	return "add%I1 %1,%0,%0\n\tb %3";
9296     }
9297 
9298   output_asm_insn ("add%I1 %1,%0,%0", operands);
9299   return pa_output_lbranch (operands[3], insn, 1);
9300 }
9301 
9302 /* We use this hook to perform a PA specific optimization which is difficult
9303    to do in earlier passes.  */
9304 
9305 static void
pa_reorg(void)9306 pa_reorg (void)
9307 {
9308   remove_useless_addtr_insns (1);
9309 
9310   if (pa_cpu < PROCESSOR_8000)
9311     pa_combine_instructions ();
9312 }
9313 
9314 /* The PA has a number of odd instructions which can perform multiple
9315    tasks at once.  On first generation PA machines (PA1.0 and PA1.1)
9316    it may be profitable to combine two instructions into one instruction
9317    with two outputs.  It's not profitable PA2.0 machines because the
9318    two outputs would take two slots in the reorder buffers.
9319 
9320    This routine finds instructions which can be combined and combines
9321    them.  We only support some of the potential combinations, and we
9322    only try common ways to find suitable instructions.
9323 
9324       * addb can add two registers or a register and a small integer
9325       and jump to a nearby (+-8k) location.  Normally the jump to the
9326       nearby location is conditional on the result of the add, but by
9327       using the "true" condition we can make the jump unconditional.
9328       Thus addb can perform two independent operations in one insn.
9329 
9330       * movb is similar to addb in that it can perform a reg->reg
9331       or small immediate->reg copy and jump to a nearby (+-8k location).
9332 
9333       * fmpyadd and fmpysub can perform a FP multiply and either an
9334       FP add or FP sub if the operands of the multiply and add/sub are
9335       independent (there are other minor restrictions).  Note both
9336       the fmpy and fadd/fsub can in theory move to better spots according
9337       to data dependencies, but for now we require the fmpy stay at a
9338       fixed location.
9339 
9340       * Many of the memory operations can perform pre & post updates
9341       of index registers.  GCC's pre/post increment/decrement addressing
9342       is far too simple to take advantage of all the possibilities.  This
9343       pass may not be suitable since those insns may not be independent.
9344 
9345       * comclr can compare two ints or an int and a register, nullify
9346       the following instruction and zero some other register.  This
9347       is more difficult to use as it's harder to find an insn which
9348       will generate a comclr than finding something like an unconditional
9349       branch.  (conditional moves & long branches create comclr insns).
9350 
9351       * Most arithmetic operations can conditionally skip the next
9352       instruction.  They can be viewed as "perform this operation
9353       and conditionally jump to this nearby location" (where nearby
9354       is an insns away).  These are difficult to use due to the
9355       branch length restrictions.  */
9356 
9357 static void
pa_combine_instructions(void)9358 pa_combine_instructions (void)
9359 {
9360   rtx_insn *anchor;
9361 
9362   /* This can get expensive since the basic algorithm is on the
9363      order of O(n^2) (or worse).  Only do it for -O2 or higher
9364      levels of optimization.  */
9365   if (optimize < 2)
9366     return;
9367 
9368   /* Walk down the list of insns looking for "anchor" insns which
9369      may be combined with "floating" insns.  As the name implies,
9370      "anchor" instructions don't move, while "floating" insns may
9371      move around.  */
9372   rtx par = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, NULL_RTX, NULL_RTX));
9373   rtx_insn *new_rtx = make_insn_raw (par);
9374 
9375   for (anchor = get_insns (); anchor; anchor = NEXT_INSN (anchor))
9376     {
9377       enum attr_pa_combine_type anchor_attr;
9378       enum attr_pa_combine_type floater_attr;
9379 
9380       /* We only care about INSNs, JUMP_INSNs, and CALL_INSNs.
9381 	 Also ignore any special USE insns.  */
9382       if ((! NONJUMP_INSN_P (anchor) && ! JUMP_P (anchor) && ! CALL_P (anchor))
9383 	  || GET_CODE (PATTERN (anchor)) == USE
9384 	  || GET_CODE (PATTERN (anchor)) == CLOBBER)
9385 	continue;
9386 
9387       anchor_attr = get_attr_pa_combine_type (anchor);
9388       /* See if anchor is an insn suitable for combination.  */
9389       if (anchor_attr == PA_COMBINE_TYPE_FMPY
9390 	  || anchor_attr == PA_COMBINE_TYPE_FADDSUB
9391 	  || (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
9392 	      && ! forward_branch_p (anchor)))
9393 	{
9394 	  rtx_insn *floater;
9395 
9396 	  for (floater = PREV_INSN (anchor);
9397 	       floater;
9398 	       floater = PREV_INSN (floater))
9399 	    {
9400 	      if (NOTE_P (floater)
9401 		  || (NONJUMP_INSN_P (floater)
9402 		      && (GET_CODE (PATTERN (floater)) == USE
9403 			  || GET_CODE (PATTERN (floater)) == CLOBBER)))
9404 		continue;
9405 
9406 	      /* Anything except a regular INSN will stop our search.  */
9407 	      if (! NONJUMP_INSN_P (floater))
9408 		{
9409 		  floater = NULL;
9410 		  break;
9411 		}
9412 
9413 	      /* See if FLOATER is suitable for combination with the
9414 		 anchor.  */
9415 	      floater_attr = get_attr_pa_combine_type (floater);
9416 	      if ((anchor_attr == PA_COMBINE_TYPE_FMPY
9417 		   && floater_attr == PA_COMBINE_TYPE_FADDSUB)
9418 		  || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
9419 		      && floater_attr == PA_COMBINE_TYPE_FMPY))
9420 		{
9421 		  /* If ANCHOR and FLOATER can be combined, then we're
9422 		     done with this pass.  */
9423 		  if (pa_can_combine_p (new_rtx, anchor, floater, 0,
9424 					SET_DEST (PATTERN (floater)),
9425 					XEXP (SET_SRC (PATTERN (floater)), 0),
9426 					XEXP (SET_SRC (PATTERN (floater)), 1)))
9427 		    break;
9428 		}
9429 
9430 	      else if (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
9431 		       && floater_attr == PA_COMBINE_TYPE_ADDMOVE)
9432 		{
9433 		  if (GET_CODE (SET_SRC (PATTERN (floater))) == PLUS)
9434 		    {
9435 		      if (pa_can_combine_p (new_rtx, anchor, floater, 0,
9436 					    SET_DEST (PATTERN (floater)),
9437 					XEXP (SET_SRC (PATTERN (floater)), 0),
9438 					XEXP (SET_SRC (PATTERN (floater)), 1)))
9439 			break;
9440 		    }
9441 		  else
9442 		    {
9443 		      if (pa_can_combine_p (new_rtx, anchor, floater, 0,
9444 					    SET_DEST (PATTERN (floater)),
9445 					    SET_SRC (PATTERN (floater)),
9446 					    SET_SRC (PATTERN (floater))))
9447 			break;
9448 		    }
9449 		}
9450 	    }
9451 
9452 	  /* If we didn't find anything on the backwards scan try forwards.  */
9453 	  if (!floater
9454 	      && (anchor_attr == PA_COMBINE_TYPE_FMPY
9455 		  || anchor_attr == PA_COMBINE_TYPE_FADDSUB))
9456 	    {
9457 	      for (floater = anchor; floater; floater = NEXT_INSN (floater))
9458 		{
9459 		  if (NOTE_P (floater)
9460 		      || (NONJUMP_INSN_P (floater)
9461 			  && (GET_CODE (PATTERN (floater)) == USE
9462 			      || GET_CODE (PATTERN (floater)) == CLOBBER)))
9463 
9464 		    continue;
9465 
9466 		  /* Anything except a regular INSN will stop our search.  */
9467 		  if (! NONJUMP_INSN_P (floater))
9468 		    {
9469 		      floater = NULL;
9470 		      break;
9471 		    }
9472 
9473 		  /* See if FLOATER is suitable for combination with the
9474 		     anchor.  */
9475 		  floater_attr = get_attr_pa_combine_type (floater);
9476 		  if ((anchor_attr == PA_COMBINE_TYPE_FMPY
9477 		       && floater_attr == PA_COMBINE_TYPE_FADDSUB)
9478 		      || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
9479 			  && floater_attr == PA_COMBINE_TYPE_FMPY))
9480 		    {
9481 		      /* If ANCHOR and FLOATER can be combined, then we're
9482 			 done with this pass.  */
9483 		      if (pa_can_combine_p (new_rtx, anchor, floater, 1,
9484 					    SET_DEST (PATTERN (floater)),
9485 					    XEXP (SET_SRC (PATTERN (floater)),
9486 						  0),
9487 					    XEXP (SET_SRC (PATTERN (floater)),
9488 						  1)))
9489 			break;
9490 		    }
9491 		}
9492 	    }
9493 
9494 	  /* FLOATER will be nonzero if we found a suitable floating
9495 	     insn for combination with ANCHOR.  */
9496 	  if (floater
9497 	      && (anchor_attr == PA_COMBINE_TYPE_FADDSUB
9498 		  || anchor_attr == PA_COMBINE_TYPE_FMPY))
9499 	    {
9500 	      /* Emit the new instruction and delete the old anchor.  */
9501 	      rtvec vtemp = gen_rtvec (2, copy_rtx (PATTERN (anchor)),
9502 				       copy_rtx (PATTERN (floater)));
9503 	      rtx temp = gen_rtx_PARALLEL (VOIDmode, vtemp);
9504 	      emit_insn_before (temp, anchor);
9505 
9506 	      SET_INSN_DELETED (anchor);
9507 
9508 	      /* Emit a special USE insn for FLOATER, then delete
9509 		 the floating insn.  */
9510 	      temp = copy_rtx (PATTERN (floater));
9511 	      emit_insn_before (gen_rtx_USE (VOIDmode, temp), floater);
9512 	      delete_insn (floater);
9513 
9514 	      continue;
9515 	    }
9516 	  else if (floater
9517 		   && anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH)
9518 	    {
9519 	      /* Emit the new_jump instruction and delete the old anchor.  */
9520 	      rtvec vtemp = gen_rtvec (2, copy_rtx (PATTERN (anchor)),
9521 				       copy_rtx (PATTERN (floater)));
9522 	      rtx temp = gen_rtx_PARALLEL (VOIDmode, vtemp);
9523 	      temp = emit_jump_insn_before (temp, anchor);
9524 
9525 	      JUMP_LABEL (temp) = JUMP_LABEL (anchor);
9526 	      SET_INSN_DELETED (anchor);
9527 
9528 	      /* Emit a special USE insn for FLOATER, then delete
9529 		 the floating insn.  */
9530 	      temp = copy_rtx (PATTERN (floater));
9531 	      emit_insn_before (gen_rtx_USE (VOIDmode, temp), floater);
9532 	      delete_insn (floater);
9533 	      continue;
9534 	    }
9535 	}
9536     }
9537 }
9538 
9539 static int
pa_can_combine_p(rtx_insn * new_rtx,rtx_insn * anchor,rtx_insn * floater,int reversed,rtx dest,rtx src1,rtx src2)9540 pa_can_combine_p (rtx_insn *new_rtx, rtx_insn *anchor, rtx_insn *floater,
9541 		  int reversed, rtx dest,
9542 		  rtx src1, rtx src2)
9543 {
9544   int insn_code_number;
9545   rtx_insn *start, *end;
9546 
9547   /* Create a PARALLEL with the patterns of ANCHOR and
9548      FLOATER, try to recognize it, then test constraints
9549      for the resulting pattern.
9550 
9551      If the pattern doesn't match or the constraints
9552      aren't met keep searching for a suitable floater
9553      insn.  */
9554   XVECEXP (PATTERN (new_rtx), 0, 0) = PATTERN (anchor);
9555   XVECEXP (PATTERN (new_rtx), 0, 1) = PATTERN (floater);
9556   INSN_CODE (new_rtx) = -1;
9557   insn_code_number = recog_memoized (new_rtx);
9558   basic_block bb = BLOCK_FOR_INSN (anchor);
9559   if (insn_code_number < 0
9560       || (extract_insn (new_rtx),
9561 	  !constrain_operands (1, get_preferred_alternatives (new_rtx, bb))))
9562     return 0;
9563 
9564   if (reversed)
9565     {
9566       start = anchor;
9567       end = floater;
9568     }
9569   else
9570     {
9571       start = floater;
9572       end = anchor;
9573     }
9574 
9575   /* There's up to three operands to consider.  One
9576      output and two inputs.
9577 
9578      The output must not be used between FLOATER & ANCHOR
9579      exclusive.  The inputs must not be set between
9580      FLOATER and ANCHOR exclusive.  */
9581 
9582   if (reg_used_between_p (dest, start, end))
9583     return 0;
9584 
9585   if (reg_set_between_p (src1, start, end))
9586     return 0;
9587 
9588   if (reg_set_between_p (src2, start, end))
9589     return 0;
9590 
9591   /* If we get here, then everything is good.  */
9592   return 1;
9593 }
9594 
9595 /* Return nonzero if references for INSN are delayed.
9596 
9597    Millicode insns are actually function calls with some special
9598    constraints on arguments and register usage.
9599 
9600    Millicode calls always expect their arguments in the integer argument
9601    registers, and always return their result in %r29 (ret1).  They
9602    are expected to clobber their arguments, %r1, %r29, and the return
9603    pointer which is %r31 on 32-bit and %r2 on 64-bit, and nothing else.
9604 
9605    This function tells reorg that the references to arguments and
9606    millicode calls do not appear to happen until after the millicode call.
9607    This allows reorg to put insns which set the argument registers into the
9608    delay slot of the millicode call -- thus they act more like traditional
9609    CALL_INSNs.
9610 
9611    Note we cannot consider side effects of the insn to be delayed because
9612    the branch and link insn will clobber the return pointer.  If we happened
9613    to use the return pointer in the delay slot of the call, then we lose.
9614 
9615    get_attr_type will try to recognize the given insn, so make sure to
9616    filter out things it will not accept -- SEQUENCE, USE and CLOBBER insns
9617    in particular.  */
9618 int
pa_insn_refs_are_delayed(rtx_insn * insn)9619 pa_insn_refs_are_delayed (rtx_insn *insn)
9620 {
9621   return ((NONJUMP_INSN_P (insn)
9622 	   && GET_CODE (PATTERN (insn)) != SEQUENCE
9623 	   && GET_CODE (PATTERN (insn)) != USE
9624 	   && GET_CODE (PATTERN (insn)) != CLOBBER
9625 	   && get_attr_type (insn) == TYPE_MILLI));
9626 }
9627 
9628 /* Promote the return value, but not the arguments.  */
9629 
9630 static machine_mode
pa_promote_function_mode(const_tree type ATTRIBUTE_UNUSED,machine_mode mode,int * punsignedp ATTRIBUTE_UNUSED,const_tree fntype ATTRIBUTE_UNUSED,int for_return)9631 pa_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
9632                           machine_mode mode,
9633                           int *punsignedp ATTRIBUTE_UNUSED,
9634                           const_tree fntype ATTRIBUTE_UNUSED,
9635                           int for_return)
9636 {
9637   if (for_return == 0)
9638     return mode;
9639   return promote_mode (type, mode, punsignedp);
9640 }
9641 
9642 /* On the HP-PA the value is found in register(s) 28(-29), unless
9643    the mode is SF or DF. Then the value is returned in fr4 (32).
9644 
9645    This must perform the same promotions as PROMOTE_MODE, else promoting
9646    return values in TARGET_PROMOTE_FUNCTION_MODE will not work correctly.
9647 
9648    Small structures must be returned in a PARALLEL on PA64 in order
9649    to match the HP Compiler ABI.  */
9650 
9651 static rtx
pa_function_value(const_tree valtype,const_tree func ATTRIBUTE_UNUSED,bool outgoing ATTRIBUTE_UNUSED)9652 pa_function_value (const_tree valtype,
9653                    const_tree func ATTRIBUTE_UNUSED,
9654                    bool outgoing ATTRIBUTE_UNUSED)
9655 {
9656   machine_mode valmode;
9657 
9658   if (AGGREGATE_TYPE_P (valtype)
9659       || TREE_CODE (valtype) == COMPLEX_TYPE
9660       || TREE_CODE (valtype) == VECTOR_TYPE)
9661     {
9662       HOST_WIDE_INT valsize = int_size_in_bytes (valtype);
9663 
9664       /* Handle aggregates that fit exactly in a word or double word.  */
9665       if (valsize == UNITS_PER_WORD || valsize == 2 * UNITS_PER_WORD)
9666 	return gen_rtx_REG (TYPE_MODE (valtype), 28);
9667 
9668       if (TARGET_64BIT)
9669 	{
9670           /* Aggregates with a size less than or equal to 128 bits are
9671 	     returned in GR 28(-29).  They are left justified.  The pad
9672 	     bits are undefined.  Larger aggregates are returned in
9673 	     memory.  */
9674 	  rtx loc[2];
9675 	  int i, offset = 0;
9676 	  int ub = valsize <= UNITS_PER_WORD ? 1 : 2;
9677 
9678 	  for (i = 0; i < ub; i++)
9679 	    {
9680 	      loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
9681 					  gen_rtx_REG (DImode, 28 + i),
9682 					  GEN_INT (offset));
9683 	      offset += 8;
9684 	    }
9685 
9686 	  return gen_rtx_PARALLEL (BLKmode, gen_rtvec_v (ub, loc));
9687 	}
9688       else if (valsize > UNITS_PER_WORD)
9689 	{
9690 	  /* Aggregates 5 to 8 bytes in size are returned in general
9691 	     registers r28-r29 in the same manner as other non
9692 	     floating-point objects.  The data is right-justified and
9693 	     zero-extended to 64 bits.  This is opposite to the normal
9694 	     justification used on big endian targets and requires
9695 	     special treatment.  */
9696 	  rtx loc = gen_rtx_EXPR_LIST (VOIDmode,
9697 				       gen_rtx_REG (DImode, 28), const0_rtx);
9698 	  return gen_rtx_PARALLEL (BLKmode, gen_rtvec (1, loc));
9699 	}
9700     }
9701 
9702   if ((INTEGRAL_TYPE_P (valtype)
9703        && GET_MODE_BITSIZE (TYPE_MODE (valtype)) < BITS_PER_WORD)
9704       || POINTER_TYPE_P (valtype))
9705     valmode = word_mode;
9706   else
9707     valmode = TYPE_MODE (valtype);
9708 
9709   if (TREE_CODE (valtype) == REAL_TYPE
9710       && !AGGREGATE_TYPE_P (valtype)
9711       && TYPE_MODE (valtype) != TFmode
9712       && !TARGET_SOFT_FLOAT)
9713     return gen_rtx_REG (valmode, 32);
9714 
9715   return gen_rtx_REG (valmode, 28);
9716 }
9717 
9718 /* Implement the TARGET_LIBCALL_VALUE hook.  */
9719 
9720 static rtx
pa_libcall_value(machine_mode mode,const_rtx fun ATTRIBUTE_UNUSED)9721 pa_libcall_value (machine_mode mode,
9722 		  const_rtx fun ATTRIBUTE_UNUSED)
9723 {
9724   if (! TARGET_SOFT_FLOAT
9725       && (mode == SFmode || mode == DFmode))
9726     return  gen_rtx_REG (mode, 32);
9727   else
9728     return  gen_rtx_REG (mode, 28);
9729 }
9730 
9731 /* Implement the TARGET_FUNCTION_VALUE_REGNO_P hook.  */
9732 
9733 static bool
pa_function_value_regno_p(const unsigned int regno)9734 pa_function_value_regno_p (const unsigned int regno)
9735 {
9736   if (regno == 28
9737       || (! TARGET_SOFT_FLOAT &&  regno == 32))
9738     return true;
9739 
9740   return false;
9741 }
9742 
9743 /* Update the data in CUM to advance over argument ARG.  */
9744 
9745 static void
pa_function_arg_advance(cumulative_args_t cum_v,const function_arg_info & arg)9746 pa_function_arg_advance (cumulative_args_t cum_v,
9747 			 const function_arg_info &arg)
9748 {
9749   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
9750   int arg_size = pa_function_arg_size (arg.mode, arg.type);
9751 
9752   cum->nargs_prototype--;
9753   cum->words += (arg_size
9754 		 + ((cum->words & 01)
9755 		    && arg.type != NULL_TREE
9756 		    && arg_size > 1));
9757 }
9758 
9759 /* Return the location of a parameter that is passed in a register or NULL
9760    if the parameter has any component that is passed in memory.
9761 
9762    This is new code and will be pushed to into the net sources after
9763    further testing.
9764 
9765    ??? We might want to restructure this so that it looks more like other
9766    ports.  */
9767 static rtx
pa_function_arg(cumulative_args_t cum_v,const function_arg_info & arg)9768 pa_function_arg (cumulative_args_t cum_v, const function_arg_info &arg)
9769 {
9770   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
9771   tree type = arg.type;
9772   machine_mode mode = arg.mode;
9773   int max_arg_words = (TARGET_64BIT ? 8 : 4);
9774   int alignment = 0;
9775   int arg_size;
9776   int fpr_reg_base;
9777   int gpr_reg_base;
9778   rtx retval;
9779 
9780   if (arg.end_marker_p ())
9781     return NULL_RTX;
9782 
9783   arg_size = pa_function_arg_size (mode, type);
9784 
9785   /* If this arg would be passed partially or totally on the stack, then
9786      this routine should return zero.  pa_arg_partial_bytes will
9787      handle arguments which are split between regs and stack slots if
9788      the ABI mandates split arguments.  */
9789   if (!TARGET_64BIT)
9790     {
9791       /* The 32-bit ABI does not split arguments.  */
9792       if (cum->words + arg_size > max_arg_words)
9793 	return NULL_RTX;
9794     }
9795   else
9796     {
9797       if (arg_size > 1)
9798 	alignment = cum->words & 1;
9799       if (cum->words + alignment >= max_arg_words)
9800 	return NULL_RTX;
9801     }
9802 
9803   /* The 32bit ABIs and the 64bit ABIs are rather different,
9804      particularly in their handling of FP registers.  We might
9805      be able to cleverly share code between them, but I'm not
9806      going to bother in the hope that splitting them up results
9807      in code that is more easily understood.  */
9808 
9809   if (TARGET_64BIT)
9810     {
9811       /* Advance the base registers to their current locations.
9812 
9813          Remember, gprs grow towards smaller register numbers while
9814 	 fprs grow to higher register numbers.  Also remember that
9815 	 although FP regs are 32-bit addressable, we pretend that
9816 	 the registers are 64-bits wide.  */
9817       gpr_reg_base = 26 - cum->words;
9818       fpr_reg_base = 32 + cum->words;
9819 
9820       /* Arguments wider than one word and small aggregates need special
9821 	 treatment.  */
9822       if (arg_size > 1
9823 	  || mode == BLKmode
9824 	  || (type && (AGGREGATE_TYPE_P (type)
9825 		       || TREE_CODE (type) == COMPLEX_TYPE
9826 		       || TREE_CODE (type) == VECTOR_TYPE)))
9827 	{
9828 	  /* Double-extended precision (80-bit), quad-precision (128-bit)
9829 	     and aggregates including complex numbers are aligned on
9830 	     128-bit boundaries.  The first eight 64-bit argument slots
9831 	     are associated one-to-one, with general registers r26
9832 	     through r19, and also with floating-point registers fr4
9833 	     through fr11.  Arguments larger than one word are always
9834 	     passed in general registers.
9835 
9836 	     Using a PARALLEL with a word mode register results in left
9837 	     justified data on a big-endian target.  */
9838 
9839 	  rtx loc[8];
9840 	  int i, offset = 0, ub = arg_size;
9841 
9842 	  /* Align the base register.  */
9843 	  gpr_reg_base -= alignment;
9844 
9845 	  ub = MIN (ub, max_arg_words - cum->words - alignment);
9846 	  for (i = 0; i < ub; i++)
9847 	    {
9848 	      loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
9849 					  gen_rtx_REG (DImode, gpr_reg_base),
9850 					  GEN_INT (offset));
9851 	      gpr_reg_base -= 1;
9852 	      offset += 8;
9853 	    }
9854 
9855 	  return gen_rtx_PARALLEL (mode, gen_rtvec_v (ub, loc));
9856 	}
9857      }
9858   else
9859     {
9860       /* If the argument is larger than a word, then we know precisely
9861 	 which registers we must use.  */
9862       if (arg_size > 1)
9863 	{
9864 	  if (cum->words)
9865 	    {
9866 	      gpr_reg_base = 23;
9867 	      fpr_reg_base = 38;
9868 	    }
9869 	  else
9870 	    {
9871 	      gpr_reg_base = 25;
9872 	      fpr_reg_base = 34;
9873 	    }
9874 
9875 	  /* Structures 5 to 8 bytes in size are passed in the general
9876 	     registers in the same manner as other non floating-point
9877 	     objects.  The data is right-justified and zero-extended
9878 	     to 64 bits.  This is opposite to the normal justification
9879 	     used on big endian targets and requires special treatment.
9880 	     We now define BLOCK_REG_PADDING to pad these objects.
9881 	     Aggregates, complex and vector types are passed in the same
9882 	     manner as structures.  */
9883 	  if (mode == BLKmode
9884 	      || (type && (AGGREGATE_TYPE_P (type)
9885 			   || TREE_CODE (type) == COMPLEX_TYPE
9886 			   || TREE_CODE (type) == VECTOR_TYPE)))
9887 	    {
9888 	      rtx loc = gen_rtx_EXPR_LIST (VOIDmode,
9889 					   gen_rtx_REG (DImode, gpr_reg_base),
9890 					   const0_rtx);
9891 	      return gen_rtx_PARALLEL (BLKmode, gen_rtvec (1, loc));
9892 	    }
9893 	}
9894       else
9895         {
9896 	   /* We have a single word (32 bits).  A simple computation
9897 	      will get us the register #s we need.  */
9898 	   gpr_reg_base = 26 - cum->words;
9899 	   fpr_reg_base = 32 + 2 * cum->words;
9900 	}
9901     }
9902 
9903   /* Determine if the argument needs to be passed in both general and
9904      floating point registers.  */
9905   if (((TARGET_PORTABLE_RUNTIME || TARGET_64BIT || TARGET_ELF32)
9906        /* If we are doing soft-float with portable runtime, then there
9907 	  is no need to worry about FP regs.  */
9908        && !TARGET_SOFT_FLOAT
9909        /* The parameter must be some kind of scalar float, else we just
9910 	  pass it in integer registers.  */
9911        && GET_MODE_CLASS (mode) == MODE_FLOAT
9912        /* The target function must not have a prototype.  */
9913        && cum->nargs_prototype <= 0
9914        /* libcalls do not need to pass items in both FP and general
9915 	  registers.  */
9916        && type != NULL_TREE
9917        /* All this hair applies to "outgoing" args only.  This includes
9918 	  sibcall arguments setup with FUNCTION_INCOMING_ARG.  */
9919        && !cum->incoming)
9920       /* Also pass outgoing floating arguments in both registers in indirect
9921 	 calls with the 32 bit ABI and the HP assembler since there is no
9922 	 way to the specify argument locations in static functions.  */
9923       || (!TARGET_64BIT
9924 	  && !TARGET_GAS
9925 	  && !cum->incoming
9926 	  && cum->indirect
9927 	  && GET_MODE_CLASS (mode) == MODE_FLOAT))
9928     {
9929       retval
9930 	= gen_rtx_PARALLEL
9931 	    (mode,
9932 	     gen_rtvec (2,
9933 			gen_rtx_EXPR_LIST (VOIDmode,
9934 					   gen_rtx_REG (mode, fpr_reg_base),
9935 					   const0_rtx),
9936 			gen_rtx_EXPR_LIST (VOIDmode,
9937 					   gen_rtx_REG (mode, gpr_reg_base),
9938 					   const0_rtx)));
9939     }
9940   else
9941     {
9942       /* See if we should pass this parameter in a general register.  */
9943       if (TARGET_SOFT_FLOAT
9944 	  /* Indirect calls in the normal 32bit ABI require all arguments
9945 	     to be passed in general registers.  */
9946 	  || (!TARGET_PORTABLE_RUNTIME
9947 	      && !TARGET_64BIT
9948 	      && !TARGET_ELF32
9949 	      && cum->indirect)
9950 	  /* If the parameter is not a scalar floating-point parameter,
9951 	     then it belongs in GPRs.  */
9952 	  || GET_MODE_CLASS (mode) != MODE_FLOAT
9953 	  /* Structure with single SFmode field belongs in GPR.  */
9954 	  || (type && AGGREGATE_TYPE_P (type)))
9955 	retval = gen_rtx_REG (mode, gpr_reg_base);
9956       else
9957 	retval = gen_rtx_REG (mode, fpr_reg_base);
9958     }
9959   return retval;
9960 }
9961 
9962 /* Arguments larger than one word are double word aligned.  */
9963 
9964 static unsigned int
pa_function_arg_boundary(machine_mode mode,const_tree type)9965 pa_function_arg_boundary (machine_mode mode, const_tree type)
9966 {
9967   bool singleword = (type
9968 		     ? (integer_zerop (TYPE_SIZE (type))
9969 			|| !TREE_CONSTANT (TYPE_SIZE (type))
9970 			|| int_size_in_bytes (type) <= UNITS_PER_WORD)
9971 		     : GET_MODE_SIZE (mode) <= UNITS_PER_WORD);
9972 
9973   return singleword ? PARM_BOUNDARY : MAX_PARM_BOUNDARY;
9974 }
9975 
9976 /* If this arg would be passed totally in registers or totally on the stack,
9977    then this routine should return zero.  */
9978 
9979 static int
pa_arg_partial_bytes(cumulative_args_t cum_v,const function_arg_info & arg)9980 pa_arg_partial_bytes (cumulative_args_t cum_v, const function_arg_info &arg)
9981 {
9982   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
9983   unsigned int max_arg_words = 8;
9984   unsigned int offset = 0;
9985 
9986   if (!TARGET_64BIT)
9987     return 0;
9988 
9989   if (pa_function_arg_size (arg.mode, arg.type) > 1 && (cum->words & 1))
9990     offset = 1;
9991 
9992   if (cum->words + offset + pa_function_arg_size (arg.mode, arg.type)
9993       <= max_arg_words)
9994     /* Arg fits fully into registers.  */
9995     return 0;
9996   else if (cum->words + offset >= max_arg_words)
9997     /* Arg fully on the stack.  */
9998     return 0;
9999   else
10000     /* Arg is split.  */
10001     return (max_arg_words - cum->words - offset) * UNITS_PER_WORD;
10002 }
10003 
10004 
10005 /* A get_unnamed_section callback for switching to the text section.
10006 
10007    This function is only used with SOM.  Because we don't support
10008    named subspaces, we can only create a new subspace or switch back
10009    to the default text subspace.  */
10010 
10011 static void
som_output_text_section_asm_op(const char * data ATTRIBUTE_UNUSED)10012 som_output_text_section_asm_op (const char *data ATTRIBUTE_UNUSED)
10013 {
10014   gcc_assert (TARGET_SOM);
10015   if (TARGET_GAS)
10016     {
10017       if (cfun && cfun->machine && !cfun->machine->in_nsubspa)
10018 	{
10019 	  /* We only want to emit a .nsubspa directive once at the
10020 	     start of the function.  */
10021 	  cfun->machine->in_nsubspa = 1;
10022 
10023 	  /* Create a new subspace for the text.  This provides
10024 	     better stub placement and one-only functions.  */
10025 	  if (cfun->decl
10026 	      && DECL_ONE_ONLY (cfun->decl)
10027 	      && !DECL_WEAK (cfun->decl))
10028 	    {
10029 	      output_section_asm_op ("\t.SPACE $TEXT$\n"
10030 				     "\t.NSUBSPA $CODE$,QUAD=0,ALIGN=8,"
10031 				     "ACCESS=44,SORT=24,COMDAT");
10032 	      return;
10033 	    }
10034 	}
10035       else
10036 	{
10037 	  /* There isn't a current function or the body of the current
10038 	     function has been completed.  So, we are changing to the
10039 	     text section to output debugging information.  Thus, we
10040 	     need to forget that we are in the text section so that
10041 	     varasm.c will call us when text_section is selected again.  */
10042 	  gcc_assert (!cfun || !cfun->machine
10043 		      || cfun->machine->in_nsubspa == 2);
10044 	  in_section = NULL;
10045 	}
10046       output_section_asm_op ("\t.SPACE $TEXT$\n\t.NSUBSPA $CODE$");
10047       return;
10048     }
10049   output_section_asm_op ("\t.SPACE $TEXT$\n\t.SUBSPA $CODE$");
10050 }
10051 
10052 /* A get_unnamed_section callback for switching to comdat data
10053    sections.  This function is only used with SOM.  */
10054 
10055 static void
som_output_comdat_data_section_asm_op(const char * data)10056 som_output_comdat_data_section_asm_op (const char *data)
10057 {
10058   in_section = NULL;
10059   output_section_asm_op (data);
10060 }
10061 
10062 /* Implement TARGET_ASM_INIT_SECTIONS.  */
10063 
10064 static void
pa_som_asm_init_sections(void)10065 pa_som_asm_init_sections (void)
10066 {
10067   text_section
10068     = get_unnamed_section (0, som_output_text_section_asm_op, NULL);
10069 
10070   /* SOM puts readonly data in the default $LIT$ subspace when PIC code
10071      is not being generated.  */
10072   som_readonly_data_section
10073     = get_unnamed_section (0, output_section_asm_op,
10074 			   "\t.SPACE $TEXT$\n\t.SUBSPA $LIT$");
10075 
10076   /* When secondary definitions are not supported, SOM makes readonly
10077      data one-only by creating a new $LIT$ subspace in $TEXT$ with
10078      the comdat flag.  */
10079   som_one_only_readonly_data_section
10080     = get_unnamed_section (0, som_output_comdat_data_section_asm_op,
10081 			   "\t.SPACE $TEXT$\n"
10082 			   "\t.NSUBSPA $LIT$,QUAD=0,ALIGN=8,"
10083 			   "ACCESS=0x2c,SORT=16,COMDAT");
10084 
10085 
10086   /* When secondary definitions are not supported, SOM makes data one-only
10087      by creating a new $DATA$ subspace in $PRIVATE$ with the comdat flag.  */
10088   som_one_only_data_section
10089     = get_unnamed_section (SECTION_WRITE,
10090 			   som_output_comdat_data_section_asm_op,
10091 			   "\t.SPACE $PRIVATE$\n"
10092 			   "\t.NSUBSPA $DATA$,QUAD=1,ALIGN=8,"
10093 			   "ACCESS=31,SORT=24,COMDAT");
10094 
10095   if (flag_tm)
10096     som_tm_clone_table_section
10097       = get_unnamed_section (0, output_section_asm_op,
10098 			     "\t.SPACE $PRIVATE$\n\t.SUBSPA $TM_CLONE_TABLE$");
10099 
10100   /* HPUX ld generates incorrect GOT entries for "T" fixups which
10101      reference data within the $TEXT$ space (for example constant
10102      strings in the $LIT$ subspace).
10103 
10104      The assemblers (GAS and HP as) both have problems with handling
10105      the difference of two symbols.  This is the other correct way to
10106      reference constant data during PIC code generation.
10107 
10108      Thus, we can't put constant data needing relocation in the $TEXT$
10109      space during PIC generation.
10110 
10111      Previously, we placed all constant data into the $DATA$ subspace
10112      when generating PIC code.  This reduces sharing, but it works
10113      correctly.  Now we rely on pa_reloc_rw_mask() for section selection.
10114      This puts constant data not needing relocation into the $TEXT$ space.  */
10115   readonly_data_section = som_readonly_data_section;
10116 
10117   /* We must not have a reference to an external symbol defined in a
10118      shared library in a readonly section, else the SOM linker will
10119      complain.
10120 
10121      So, we force exception information into the data section.  */
10122   exception_section = data_section;
10123 }
10124 
10125 /* Implement TARGET_ASM_TM_CLONE_TABLE_SECTION.  */
10126 
10127 static section *
pa_som_tm_clone_table_section(void)10128 pa_som_tm_clone_table_section (void)
10129 {
10130   return som_tm_clone_table_section;
10131 }
10132 
10133 /* On hpux10, the linker will give an error if we have a reference
10134    in the read-only data section to a symbol defined in a shared
10135    library.  Therefore, expressions that might require a reloc
10136    cannot be placed in the read-only data section.  */
10137 
10138 static section *
pa_select_section(tree exp,int reloc,unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)10139 pa_select_section (tree exp, int reloc,
10140 		   unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
10141 {
10142   if (TREE_CODE (exp) == VAR_DECL
10143       && TREE_READONLY (exp)
10144       && !TREE_THIS_VOLATILE (exp)
10145       && DECL_INITIAL (exp)
10146       && (DECL_INITIAL (exp) == error_mark_node
10147           || TREE_CONSTANT (DECL_INITIAL (exp)))
10148       && !(reloc & pa_reloc_rw_mask ()))
10149     {
10150       if (TARGET_SOM
10151 	  && DECL_ONE_ONLY (exp)
10152 	  && !DECL_WEAK (exp))
10153 	return som_one_only_readonly_data_section;
10154       else
10155 	return readonly_data_section;
10156     }
10157   else if (CONSTANT_CLASS_P (exp)
10158 	   && !(reloc & pa_reloc_rw_mask ()))
10159     return readonly_data_section;
10160   else if (TARGET_SOM
10161 	   && TREE_CODE (exp) == VAR_DECL
10162 	   && DECL_ONE_ONLY (exp)
10163 	   && !DECL_WEAK (exp))
10164     return som_one_only_data_section;
10165   else
10166     return data_section;
10167 }
10168 
10169 /* Implement pa_elf_select_rtx_section.  If X is a function label operand
10170    and the function is in a COMDAT group, place the plabel reference in the
10171    .data.rel.ro.local section.  The linker ignores references to symbols in
10172    discarded sections from this section.  */
10173 
10174 static section *
pa_elf_select_rtx_section(machine_mode mode,rtx x,unsigned HOST_WIDE_INT align)10175 pa_elf_select_rtx_section (machine_mode mode, rtx x,
10176 			   unsigned HOST_WIDE_INT align)
10177 {
10178   if (function_label_operand (x, VOIDmode))
10179     {
10180       tree decl = SYMBOL_REF_DECL (x);
10181 
10182       if (!decl || (DECL_P (decl) && DECL_COMDAT_GROUP (decl)))
10183 	return get_named_section (NULL, ".data.rel.ro.local", 1);
10184     }
10185 
10186   return default_elf_select_rtx_section (mode, x, align);
10187 }
10188 
10189 /* Implement pa_reloc_rw_mask.  */
10190 
10191 static int
pa_reloc_rw_mask(void)10192 pa_reloc_rw_mask (void)
10193 {
10194   if (flag_pic || (TARGET_SOM && !TARGET_HPUX_11))
10195     return 3;
10196 
10197   /* HP linker does not support global relocs in readonly memory.  */
10198   return TARGET_SOM ? 2 : 0;
10199 }
10200 
10201 static void
pa_globalize_label(FILE * stream,const char * name)10202 pa_globalize_label (FILE *stream, const char *name)
10203 {
10204   /* We only handle DATA objects here, functions are globalized in
10205      ASM_DECLARE_FUNCTION_NAME.  */
10206   if (! FUNCTION_NAME_P (name))
10207   {
10208     fputs ("\t.EXPORT ", stream);
10209     assemble_name (stream, name);
10210     fputs (",DATA\n", stream);
10211   }
10212 }
10213 
10214 /* Worker function for TARGET_STRUCT_VALUE_RTX.  */
10215 
10216 static rtx
pa_struct_value_rtx(tree fntype ATTRIBUTE_UNUSED,int incoming ATTRIBUTE_UNUSED)10217 pa_struct_value_rtx (tree fntype ATTRIBUTE_UNUSED,
10218 		     int incoming ATTRIBUTE_UNUSED)
10219 {
10220   return gen_rtx_REG (Pmode, PA_STRUCT_VALUE_REGNUM);
10221 }
10222 
10223 /* Worker function for TARGET_RETURN_IN_MEMORY.  */
10224 
10225 bool
pa_return_in_memory(const_tree type,const_tree fntype ATTRIBUTE_UNUSED)10226 pa_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
10227 {
10228   /* SOM ABI says that objects larger than 64 bits are returned in memory.
10229      PA64 ABI says that objects larger than 128 bits are returned in memory.
10230      Note, int_size_in_bytes can return -1 if the size of the object is
10231      variable or larger than the maximum value that can be expressed as
10232      a HOST_WIDE_INT.   It can also return zero for an empty type.  The
10233      simplest way to handle variable and empty types is to pass them in
10234      memory.  This avoids problems in defining the boundaries of argument
10235      slots, allocating registers, etc.  */
10236   return (int_size_in_bytes (type) > (TARGET_64BIT ? 16 : 8)
10237 	  || int_size_in_bytes (type) <= 0);
10238 }
10239 
10240 /* Structure to hold declaration and name of external symbols that are
10241    emitted by GCC.  We generate a vector of these symbols and output them
10242    at the end of the file if and only if SYMBOL_REF_REFERENCED_P is true.
10243    This avoids putting out names that are never really used.  */
10244 
10245 typedef struct GTY(()) extern_symbol
10246 {
10247   tree decl;
10248   const char *name;
10249 } extern_symbol;
10250 
10251 /* Define gc'd vector type for extern_symbol.  */
10252 
10253 /* Vector of extern_symbol pointers.  */
10254 static GTY(()) vec<extern_symbol, va_gc> *extern_symbols;
10255 
10256 #ifdef ASM_OUTPUT_EXTERNAL_REAL
10257 /* Mark DECL (name NAME) as an external reference (assembler output
10258    file FILE).  This saves the names to output at the end of the file
10259    if actually referenced.  */
10260 
10261 void
pa_hpux_asm_output_external(FILE * file,tree decl,const char * name)10262 pa_hpux_asm_output_external (FILE *file, tree decl, const char *name)
10263 {
10264   gcc_assert (file == asm_out_file);
10265   extern_symbol p = {decl, name};
10266   vec_safe_push (extern_symbols, p);
10267 }
10268 #endif
10269 
10270 /* Output text required at the end of an assembler file.
10271    This includes deferred plabels and .import directives for
10272    all external symbols that were actually referenced.  */
10273 
10274 static void
pa_file_end(void)10275 pa_file_end (void)
10276 {
10277 #ifdef ASM_OUTPUT_EXTERNAL_REAL
10278   unsigned int i;
10279   extern_symbol *p;
10280 
10281   if (!NO_DEFERRED_PROFILE_COUNTERS)
10282     output_deferred_profile_counters ();
10283 #endif
10284 
10285   output_deferred_plabels ();
10286 
10287 #ifdef ASM_OUTPUT_EXTERNAL_REAL
10288   for (i = 0; vec_safe_iterate (extern_symbols, i, &p); i++)
10289     {
10290       tree decl = p->decl;
10291 
10292       if (!TREE_ASM_WRITTEN (decl)
10293 	  && SYMBOL_REF_REFERENCED_P (XEXP (DECL_RTL (decl), 0)))
10294 	ASM_OUTPUT_EXTERNAL_REAL (asm_out_file, decl, p->name);
10295     }
10296 
10297   vec_free (extern_symbols);
10298 #endif
10299 
10300   if (NEED_INDICATE_EXEC_STACK)
10301     file_end_indicate_exec_stack ();
10302 }
10303 
10304 /* Implement TARGET_CAN_CHANGE_MODE_CLASS.  */
10305 
10306 static bool
pa_can_change_mode_class(machine_mode from,machine_mode to,reg_class_t rclass)10307 pa_can_change_mode_class (machine_mode from, machine_mode to,
10308 			  reg_class_t rclass)
10309 {
10310   if (from == to)
10311     return true;
10312 
10313   if (GET_MODE_SIZE (from) == GET_MODE_SIZE (to))
10314     return true;
10315 
10316   /* Reject changes to/from modes with zero size.  */
10317   if (!GET_MODE_SIZE (from) || !GET_MODE_SIZE (to))
10318     return false;
10319 
10320   /* Reject changes to/from complex and vector modes.  */
10321   if (COMPLEX_MODE_P (from) || VECTOR_MODE_P (from)
10322       || COMPLEX_MODE_P (to) || VECTOR_MODE_P (to))
10323     return false;
10324 
10325   /* There is no way to load QImode or HImode values directly from memory
10326      to a FP register.  SImode loads to the FP registers are not zero
10327      extended.  On the 64-bit target, this conflicts with the definition
10328      of LOAD_EXTEND_OP.  Thus, we reject all mode changes in the FP registers
10329      except for DImode to SImode on the 64-bit target.  It is handled by
10330      register renaming in pa_print_operand.  */
10331   if (MAYBE_FP_REG_CLASS_P (rclass))
10332     return TARGET_64BIT && from == DImode && to == SImode;
10333 
10334   /* TARGET_HARD_REGNO_MODE_OK places modes with sizes larger than a word
10335      in specific sets of registers.  Thus, we cannot allow changing
10336      to a larger mode when it's larger than a word.  */
10337   if (GET_MODE_SIZE (to) > UNITS_PER_WORD
10338       && GET_MODE_SIZE (to) > GET_MODE_SIZE (from))
10339     return false;
10340 
10341   return true;
10342 }
10343 
10344 /* Implement TARGET_MODES_TIEABLE_P.
10345 
10346    We should return FALSE for QImode and HImode because these modes
10347    are not ok in the floating-point registers.  However, this prevents
10348    tieing these modes to SImode and DImode in the general registers.
10349    So, this isn't a good idea.  We rely on TARGET_HARD_REGNO_MODE_OK and
10350    TARGET_CAN_CHANGE_MODE_CLASS to prevent these modes from being used
10351    in the floating-point registers.  */
10352 
10353 static bool
pa_modes_tieable_p(machine_mode mode1,machine_mode mode2)10354 pa_modes_tieable_p (machine_mode mode1, machine_mode mode2)
10355 {
10356   /* Don't tie modes in different classes.  */
10357   if (GET_MODE_CLASS (mode1) != GET_MODE_CLASS (mode2))
10358     return false;
10359 
10360   return true;
10361 }
10362 
10363 
10364 /* Length in units of the trampoline instruction code.  */
10365 
10366 #define TRAMPOLINE_CODE_SIZE (TARGET_64BIT ? 24 : (TARGET_PA_20 ? 36 : 48))
10367 
10368 
10369 /* Output assembler code for a block containing the constant parts
10370    of a trampoline, leaving space for the variable parts.\
10371 
10372    The trampoline sets the static chain pointer to STATIC_CHAIN_REGNUM
10373    and then branches to the specified routine.
10374 
10375    This code template is copied from text segment to stack location
10376    and then patched with pa_trampoline_init to contain valid values,
10377    and then entered as a subroutine.
10378 
10379    It is best to keep this as small as possible to avoid having to
10380    flush multiple lines in the cache.  */
10381 
10382 static void
pa_asm_trampoline_template(FILE * f)10383 pa_asm_trampoline_template (FILE *f)
10384 {
10385   if (!TARGET_64BIT)
10386     {
10387       if (TARGET_PA_20)
10388 	{
10389 	  fputs ("\tmfia	%r20\n", f);
10390 	  fputs ("\tldw		48(%r20),%r22\n", f);
10391 	  fputs ("\tcopy	%r22,%r21\n", f);
10392 	  fputs ("\tbb,>=,n	%r22,30,.+16\n", f);
10393 	  fputs ("\tdepwi	0,31,2,%r22\n", f);
10394 	  fputs ("\tldw		0(%r22),%r21\n", f);
10395 	  fputs ("\tldw		4(%r22),%r19\n", f);
10396 	  fputs ("\tbve		(%r21)\n", f);
10397 	  fputs ("\tldw		52(%r1),%r29\n", f);
10398 	  fputs ("\t.word	0\n", f);
10399 	  fputs ("\t.word	0\n", f);
10400 	  fputs ("\t.word	0\n", f);
10401 	}
10402       else
10403 	{
10404 	  if (ASSEMBLER_DIALECT == 0)
10405 	    {
10406 	      fputs ("\tbl	.+8,%r20\n", f);
10407 	      fputs ("\tdepi	0,31,2,%r20\n", f);
10408 	    }
10409 	  else
10410 	    {
10411 	      fputs ("\tb,l	.+8,%r20\n", f);
10412 	      fputs ("\tdepwi	0,31,2,%r20\n", f);
10413 	    }
10414 	  fputs ("\tldw		40(%r20),%r22\n", f);
10415 	  fputs ("\tcopy	%r22,%r21\n", f);
10416 	  fputs ("\tbb,>=,n	%r22,30,.+16\n", f);
10417 	  if (ASSEMBLER_DIALECT == 0)
10418 	    fputs ("\tdepi	0,31,2,%r22\n", f);
10419 	  else
10420 	    fputs ("\tdepwi	0,31,2,%r22\n", f);
10421 	  fputs ("\tldw		0(%r22),%r21\n", f);
10422 	  fputs ("\tldw		4(%r22),%r19\n", f);
10423 	  fputs ("\tldsid	(%r21),%r1\n", f);
10424 	  fputs ("\tmtsp	%r1,%sr0\n", f);
10425 	  fputs ("\tbe		0(%sr0,%r21)\n", f);
10426 	  fputs ("\tldw		44(%r20),%r29\n", f);
10427 	}
10428       fputs ("\t.word	0\n", f);
10429       fputs ("\t.word	0\n", f);
10430       fputs ("\t.word	0\n", f);
10431       fputs ("\t.word	0\n", f);
10432     }
10433   else
10434     {
10435       fputs ("\t.dword 0\n", f);
10436       fputs ("\t.dword 0\n", f);
10437       fputs ("\t.dword 0\n", f);
10438       fputs ("\t.dword 0\n", f);
10439       fputs ("\tmfia	%r31\n", f);
10440       fputs ("\tldd	24(%r31),%r27\n", f);
10441       fputs ("\tldd	32(%r31),%r31\n", f);
10442       fputs ("\tldd	16(%r27),%r1\n", f);
10443       fputs ("\tbve	(%r1)\n", f);
10444       fputs ("\tldd	24(%r27),%r27\n", f);
10445       fputs ("\t.dword 0  ; fptr\n", f);
10446       fputs ("\t.dword 0  ; static link\n", f);
10447     }
10448 }
10449 
10450 /* Emit RTL insns to initialize the variable parts of a trampoline.
10451    FNADDR is an RTX for the address of the function's pure code.
10452    CXT is an RTX for the static chain value for the function.
10453 
10454    Move the function address to the trampoline template at offset 48.
10455    Move the static chain value to trampoline template at offset 52.
10456    Move the trampoline address to trampoline template at offset 56.
10457    Move r19 to trampoline template at offset 60.  The latter two
10458    words create a plabel for the indirect call to the trampoline.
10459 
10460    A similar sequence is used for the 64-bit port but the plabel is
10461    at the beginning of the trampoline.
10462 
10463    Finally, the cache entries for the trampoline code are flushed.
10464    This is necessary to ensure that the trampoline instruction sequence
10465    is written to memory prior to any attempts at prefetching the code
10466    sequence.  */
10467 
10468 static void
pa_trampoline_init(rtx m_tramp,tree fndecl,rtx chain_value)10469 pa_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
10470 {
10471   rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
10472   rtx start_addr = gen_reg_rtx (Pmode);
10473   rtx end_addr = gen_reg_rtx (Pmode);
10474   rtx line_length = gen_reg_rtx (Pmode);
10475   rtx r_tramp, tmp;
10476 
10477   emit_block_move (m_tramp, assemble_trampoline_template (),
10478 		   GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
10479   r_tramp = force_reg (Pmode, XEXP (m_tramp, 0));
10480 
10481   if (!TARGET_64BIT)
10482     {
10483       tmp = adjust_address (m_tramp, Pmode, 48);
10484       emit_move_insn (tmp, fnaddr);
10485       tmp = adjust_address (m_tramp, Pmode, 52);
10486       emit_move_insn (tmp, chain_value);
10487 
10488       /* Create a fat pointer for the trampoline.  */
10489       tmp = adjust_address (m_tramp, Pmode, 56);
10490       emit_move_insn (tmp, r_tramp);
10491       tmp = adjust_address (m_tramp, Pmode, 60);
10492       emit_move_insn (tmp, gen_rtx_REG (Pmode, 19));
10493 
10494       /* fdc and fic only use registers for the address to flush,
10495 	 they do not accept integer displacements.  We align the
10496 	 start and end addresses to the beginning of their respective
10497 	 cache lines to minimize the number of lines flushed.  */
10498       emit_insn (gen_andsi3 (start_addr, r_tramp,
10499 			     GEN_INT (-MIN_CACHELINE_SIZE)));
10500       tmp = force_reg (Pmode, plus_constant (Pmode, r_tramp,
10501 					     TRAMPOLINE_CODE_SIZE-1));
10502       emit_insn (gen_andsi3 (end_addr, tmp,
10503 			     GEN_INT (-MIN_CACHELINE_SIZE)));
10504       emit_move_insn (line_length, GEN_INT (MIN_CACHELINE_SIZE));
10505       emit_insn (gen_dcacheflushsi (start_addr, end_addr, line_length));
10506       emit_insn (gen_icacheflushsi (start_addr, end_addr, line_length,
10507 				    gen_reg_rtx (Pmode),
10508 				    gen_reg_rtx (Pmode)));
10509     }
10510   else
10511     {
10512       tmp = adjust_address (m_tramp, Pmode, 56);
10513       emit_move_insn (tmp, fnaddr);
10514       tmp = adjust_address (m_tramp, Pmode, 64);
10515       emit_move_insn (tmp, chain_value);
10516 
10517       /* Create a fat pointer for the trampoline.  */
10518       tmp = adjust_address (m_tramp, Pmode, 16);
10519       emit_move_insn (tmp, force_reg (Pmode, plus_constant (Pmode,
10520 							    r_tramp, 32)));
10521       tmp = adjust_address (m_tramp, Pmode, 24);
10522       emit_move_insn (tmp, gen_rtx_REG (Pmode, 27));
10523 
10524       /* fdc and fic only use registers for the address to flush,
10525 	 they do not accept integer displacements.  We align the
10526 	 start and end addresses to the beginning of their respective
10527 	 cache lines to minimize the number of lines flushed.  */
10528       tmp = force_reg (Pmode, plus_constant (Pmode, r_tramp, 32));
10529       emit_insn (gen_anddi3 (start_addr, tmp,
10530 			     GEN_INT (-MIN_CACHELINE_SIZE)));
10531       tmp = force_reg (Pmode, plus_constant (Pmode, tmp,
10532 					     TRAMPOLINE_CODE_SIZE - 1));
10533       emit_insn (gen_anddi3 (end_addr, tmp,
10534 			     GEN_INT (-MIN_CACHELINE_SIZE)));
10535       emit_move_insn (line_length, GEN_INT (MIN_CACHELINE_SIZE));
10536       emit_insn (gen_dcacheflushdi (start_addr, end_addr, line_length));
10537       emit_insn (gen_icacheflushdi (start_addr, end_addr, line_length,
10538 				    gen_reg_rtx (Pmode),
10539 				    gen_reg_rtx (Pmode)));
10540     }
10541 
10542 #ifdef HAVE_ENABLE_EXECUTE_STACK
10543   emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
10544 		     LCT_NORMAL, VOIDmode, XEXP (m_tramp, 0), Pmode);
10545 #endif
10546 }
10547 
10548 /* Perform any machine-specific adjustment in the address of the trampoline.
10549    ADDR contains the address that was passed to pa_trampoline_init.
10550    Adjust the trampoline address to point to the plabel at offset 56.  */
10551 
10552 static rtx
pa_trampoline_adjust_address(rtx addr)10553 pa_trampoline_adjust_address (rtx addr)
10554 {
10555   if (!TARGET_64BIT)
10556     addr = memory_address (Pmode, plus_constant (Pmode, addr, 58));
10557   return addr;
10558 }
10559 
10560 static rtx
pa_delegitimize_address(rtx orig_x)10561 pa_delegitimize_address (rtx orig_x)
10562 {
10563   rtx x = delegitimize_mem_from_attrs (orig_x);
10564 
10565   if (GET_CODE (x) == LO_SUM
10566       && GET_CODE (XEXP (x, 1)) == UNSPEC
10567       && XINT (XEXP (x, 1), 1) == UNSPEC_DLTIND14R)
10568     return gen_const_mem (Pmode, XVECEXP (XEXP (x, 1), 0, 0));
10569   return x;
10570 }
10571 
10572 static rtx
pa_internal_arg_pointer(void)10573 pa_internal_arg_pointer (void)
10574 {
10575   /* The argument pointer and the hard frame pointer are the same in
10576      the 32-bit runtime, so we don't need a copy.  */
10577   if (TARGET_64BIT)
10578     return copy_to_reg (virtual_incoming_args_rtx);
10579   else
10580     return virtual_incoming_args_rtx;
10581 }
10582 
10583 /* Given FROM and TO register numbers, say whether this elimination is allowed.
10584    Frame pointer elimination is automatically handled.  */
10585 
10586 static bool
pa_can_eliminate(const int from,const int to)10587 pa_can_eliminate (const int from, const int to)
10588 {
10589   /* The argument cannot be eliminated in the 64-bit runtime.  */
10590   if (TARGET_64BIT && from == ARG_POINTER_REGNUM)
10591     return false;
10592 
10593   return (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
10594           ? ! frame_pointer_needed
10595           : true);
10596 }
10597 
10598 /* Define the offset between two registers, FROM to be eliminated and its
10599    replacement TO, at the start of a routine.  */
10600 HOST_WIDE_INT
pa_initial_elimination_offset(int from,int to)10601 pa_initial_elimination_offset (int from, int to)
10602 {
10603   HOST_WIDE_INT offset;
10604 
10605   if ((from == HARD_FRAME_POINTER_REGNUM || from == FRAME_POINTER_REGNUM)
10606       && to == STACK_POINTER_REGNUM)
10607     offset = -pa_compute_frame_size (get_frame_size (), 0);
10608   else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
10609     offset = 0;
10610   else
10611     gcc_unreachable ();
10612 
10613   return offset;
10614 }
10615 
10616 static void
pa_conditional_register_usage(void)10617 pa_conditional_register_usage (void)
10618 {
10619   int i;
10620 
10621   if (!TARGET_64BIT && !TARGET_PA_11)
10622     {
10623       for (i = 56; i <= FP_REG_LAST; i++)
10624 	fixed_regs[i] = call_used_regs[i] = 1;
10625       for (i = 33; i < 56; i += 2)
10626 	fixed_regs[i] = call_used_regs[i] = 1;
10627     }
10628   if (TARGET_SOFT_FLOAT)
10629     {
10630       for (i = FP_REG_FIRST; i <= FP_REG_LAST; i++)
10631 	fixed_regs[i] = call_used_regs[i] = 1;
10632     }
10633   if (flag_pic)
10634     fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
10635 }
10636 
10637 /* Target hook for c_mode_for_suffix.  */
10638 
10639 static machine_mode
pa_c_mode_for_suffix(char suffix)10640 pa_c_mode_for_suffix (char suffix)
10641 {
10642   if (HPUX_LONG_DOUBLE_LIBRARY)
10643     {
10644       if (suffix == 'q')
10645 	return TFmode;
10646     }
10647 
10648   return VOIDmode;
10649 }
10650 
10651 /* Target hook for function_section.  */
10652 
10653 static section *
pa_function_section(tree decl,enum node_frequency freq,bool startup,bool exit)10654 pa_function_section (tree decl, enum node_frequency freq,
10655 		     bool startup, bool exit)
10656 {
10657   /* Put functions in text section if target doesn't have named sections.  */
10658   if (!targetm_common.have_named_sections)
10659     return text_section;
10660 
10661   /* Force nested functions into the same section as the containing
10662      function.  */
10663   if (decl
10664       && DECL_SECTION_NAME (decl) == NULL
10665       && DECL_CONTEXT (decl) != NULL_TREE
10666       && TREE_CODE (DECL_CONTEXT (decl)) == FUNCTION_DECL
10667       && DECL_SECTION_NAME (DECL_CONTEXT (decl)) == NULL)
10668     return function_section (DECL_CONTEXT (decl));
10669 
10670   /* Otherwise, use the default function section.  */
10671   return default_function_section (decl, freq, startup, exit);
10672 }
10673 
10674 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
10675 
10676    In 64-bit mode, we reject CONST_DOUBLES.  We also reject CONST_INTS
10677    that need more than three instructions to load prior to reload.  This
10678    limit is somewhat arbitrary.  It takes three instructions to load a
10679    CONST_INT from memory but two are memory accesses.  It may be better
10680    to increase the allowed range for CONST_INTS.  We may also be able
10681    to handle CONST_DOUBLES.  */
10682 
10683 static bool
pa_legitimate_constant_p(machine_mode mode,rtx x)10684 pa_legitimate_constant_p (machine_mode mode, rtx x)
10685 {
10686   if (GET_MODE_CLASS (mode) == MODE_FLOAT && x != CONST0_RTX (mode))
10687     return false;
10688 
10689   if (!NEW_HP_ASSEMBLER && !TARGET_GAS && GET_CODE (x) == LABEL_REF)
10690     return false;
10691 
10692   /* TLS_MODEL_GLOBAL_DYNAMIC and TLS_MODEL_LOCAL_DYNAMIC are not
10693      legitimate constants.  The other variants can't be handled by
10694      the move patterns after reload starts.  */
10695   if (tls_referenced_p (x))
10696     return false;
10697 
10698   if (TARGET_64BIT && GET_CODE (x) == CONST_DOUBLE)
10699     return false;
10700 
10701   if (TARGET_64BIT
10702       && HOST_BITS_PER_WIDE_INT > 32
10703       && GET_CODE (x) == CONST_INT
10704       && !reload_in_progress
10705       && !reload_completed
10706       && !LEGITIMATE_64BIT_CONST_INT_P (INTVAL (x))
10707       && !pa_cint_ok_for_move (UINTVAL (x)))
10708     return false;
10709 
10710   if (function_label_operand (x, mode))
10711     return false;
10712 
10713   return true;
10714 }
10715 
10716 /* Implement TARGET_SECTION_TYPE_FLAGS.  */
10717 
10718 static unsigned int
pa_section_type_flags(tree decl,const char * name,int reloc)10719 pa_section_type_flags (tree decl, const char *name, int reloc)
10720 {
10721   unsigned int flags;
10722 
10723   flags = default_section_type_flags (decl, name, reloc);
10724 
10725   /* Function labels are placed in the constant pool.  This can
10726      cause a section conflict if decls are put in ".data.rel.ro"
10727      or ".data.rel.ro.local" using the __attribute__ construct.  */
10728   if (strcmp (name, ".data.rel.ro") == 0
10729       || strcmp (name, ".data.rel.ro.local") == 0)
10730     flags |= SECTION_WRITE | SECTION_RELRO;
10731 
10732   return flags;
10733 }
10734 
10735 /* pa_legitimate_address_p recognizes an RTL expression that is a
10736    valid memory address for an instruction.  The MODE argument is the
10737    machine mode for the MEM expression that wants to use this address.
10738 
10739    On HP PA-RISC, the legitimate address forms are REG+SMALLINT,
10740    REG+REG, and REG+(REG*SCALE).  The indexed address forms are only
10741    available with floating point loads and stores, and integer loads.
10742    We get better code by allowing indexed addresses in the initial
10743    RTL generation.
10744 
10745    The acceptance of indexed addresses as legitimate implies that we
10746    must provide patterns for doing indexed integer stores, or the move
10747    expanders must force the address of an indexed store to a register.
10748    We have adopted the latter approach.
10749 
10750    Another function of pa_legitimate_address_p is to ensure that
10751    the base register is a valid pointer for indexed instructions.
10752    On targets that have non-equivalent space registers, we have to
10753    know at the time of assembler output which register in a REG+REG
10754    pair is the base register.  The REG_POINTER flag is sometimes lost
10755    in reload and the following passes, so it can't be relied on during
10756    code generation.  Thus, we either have to canonicalize the order
10757    of the registers in REG+REG indexed addresses, or treat REG+REG
10758    addresses separately and provide patterns for both permutations.
10759 
10760    The latter approach requires several hundred additional lines of
10761    code in pa.md.  The downside to canonicalizing is that a PLUS
10762    in the wrong order can't combine to form to make a scaled indexed
10763    memory operand.  As we won't need to canonicalize the operands if
10764    the REG_POINTER lossage can be fixed, it seems better canonicalize.
10765 
10766    We initially break out scaled indexed addresses in canonical order
10767    in pa_emit_move_sequence.  LEGITIMIZE_ADDRESS also canonicalizes
10768    scaled indexed addresses during RTL generation.  However, fold_rtx
10769    has its own opinion on how the operands of a PLUS should be ordered.
10770    If one of the operands is equivalent to a constant, it will make
10771    that operand the second operand.  As the base register is likely to
10772    be equivalent to a SYMBOL_REF, we have made it the second operand.
10773 
10774    pa_legitimate_address_p accepts REG+REG as legitimate when the
10775    operands are in the order INDEX+BASE on targets with non-equivalent
10776    space registers, and in any order on targets with equivalent space
10777    registers.  It accepts both MULT+BASE and BASE+MULT for scaled indexing.
10778 
10779    We treat a SYMBOL_REF as legitimate if it is part of the current
10780    function's constant-pool, because such addresses can actually be
10781    output as REG+SMALLINT.  */
10782 
10783 static bool
pa_legitimate_address_p(machine_mode mode,rtx x,bool strict)10784 pa_legitimate_address_p (machine_mode mode, rtx x, bool strict)
10785 {
10786   if ((REG_P (x)
10787        && (strict ? STRICT_REG_OK_FOR_BASE_P (x)
10788 		  : REG_OK_FOR_BASE_P (x)))
10789       || ((GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC
10790 	   || GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC)
10791 	  && REG_P (XEXP (x, 0))
10792 	  && (strict ? STRICT_REG_OK_FOR_BASE_P (XEXP (x, 0))
10793 		     : REG_OK_FOR_BASE_P (XEXP (x, 0)))))
10794     return true;
10795 
10796   if (GET_CODE (x) == PLUS)
10797     {
10798       rtx base, index;
10799 
10800       /* For REG+REG, the base register should be in XEXP (x, 1),
10801 	 so check it first.  */
10802       if (REG_P (XEXP (x, 1))
10803 	  && (strict ? STRICT_REG_OK_FOR_BASE_P (XEXP (x, 1))
10804 		     : REG_OK_FOR_BASE_P (XEXP (x, 1))))
10805 	base = XEXP (x, 1), index = XEXP (x, 0);
10806       else if (REG_P (XEXP (x, 0))
10807 	       && (strict ? STRICT_REG_OK_FOR_BASE_P (XEXP (x, 0))
10808 			  : REG_OK_FOR_BASE_P (XEXP (x, 0))))
10809 	base = XEXP (x, 0), index = XEXP (x, 1);
10810       else
10811 	return false;
10812 
10813       if (GET_CODE (index) == CONST_INT)
10814 	{
10815 	  if (INT_5_BITS (index))
10816 	    return true;
10817 
10818 	  /* When INT14_OK_STRICT is false, a secondary reload is needed
10819 	     to adjust the displacement of SImode and DImode floating point
10820 	     instructions but this may fail when the register also needs
10821 	     reloading.  So, we return false when STRICT is true.  We
10822 	     also reject long displacements for float mode addresses since
10823 	     the majority of accesses will use floating point instructions
10824 	     that don't support 14-bit offsets.  */
10825 	  if (!INT14_OK_STRICT
10826 	      && (strict || !(reload_in_progress || reload_completed))
10827 	      && mode != QImode
10828 	      && mode != HImode)
10829 	    return false;
10830 
10831 	  return base14_operand (index, mode);
10832 	}
10833 
10834       if (!TARGET_DISABLE_INDEXING
10835 	  /* Only accept the "canonical" INDEX+BASE operand order
10836 	     on targets with non-equivalent space registers.  */
10837 	  && (TARGET_NO_SPACE_REGS
10838 	      ? REG_P (index)
10839 	      : (base == XEXP (x, 1) && REG_P (index)
10840 		 && (reload_completed
10841 		     || (reload_in_progress && HARD_REGISTER_P (base))
10842 		     || REG_POINTER (base))
10843 		 && (reload_completed
10844 		     || (reload_in_progress && HARD_REGISTER_P (index))
10845 		     || !REG_POINTER (index))))
10846 	  && MODE_OK_FOR_UNSCALED_INDEXING_P (mode)
10847 	  && (strict ? STRICT_REG_OK_FOR_INDEX_P (index)
10848 		     : REG_OK_FOR_INDEX_P (index))
10849 	  && borx_reg_operand (base, Pmode)
10850 	  && borx_reg_operand (index, Pmode))
10851 	return true;
10852 
10853       if (!TARGET_DISABLE_INDEXING
10854 	  && GET_CODE (index) == MULT
10855 	  /* Only accept base operands with the REG_POINTER flag prior to
10856 	     reload on targets with non-equivalent space registers.  */
10857 	  && (TARGET_NO_SPACE_REGS
10858 	      || (base == XEXP (x, 1)
10859 		  && (reload_completed
10860 		      || (reload_in_progress && HARD_REGISTER_P (base))
10861 		      || REG_POINTER (base))))
10862 	  && REG_P (XEXP (index, 0))
10863 	  && GET_MODE (XEXP (index, 0)) == Pmode
10864 	  && MODE_OK_FOR_SCALED_INDEXING_P (mode)
10865 	  && (strict ? STRICT_REG_OK_FOR_INDEX_P (XEXP (index, 0))
10866 		     : REG_OK_FOR_INDEX_P (XEXP (index, 0)))
10867 	  && GET_CODE (XEXP (index, 1)) == CONST_INT
10868 	  && INTVAL (XEXP (index, 1))
10869 	     == (HOST_WIDE_INT) GET_MODE_SIZE (mode)
10870 	  && borx_reg_operand (base, Pmode))
10871 	return true;
10872 
10873       return false;
10874     }
10875 
10876   if (GET_CODE (x) == LO_SUM)
10877     {
10878       rtx y = XEXP (x, 0);
10879 
10880       if (GET_CODE (y) == SUBREG)
10881 	y = SUBREG_REG (y);
10882 
10883       if (REG_P (y)
10884 	  && (strict ? STRICT_REG_OK_FOR_BASE_P (y)
10885 		     : REG_OK_FOR_BASE_P (y)))
10886 	{
10887 	  /* Needed for -fPIC */
10888 	  if (mode == Pmode
10889 	      && GET_CODE (XEXP (x, 1)) == UNSPEC)
10890 	    return true;
10891 
10892 	  if (!INT14_OK_STRICT
10893 	      && (strict || !(reload_in_progress || reload_completed))
10894 	      && mode != QImode
10895 	      && mode != HImode)
10896 	    return false;
10897 
10898 	  if (CONSTANT_P (XEXP (x, 1)))
10899 	    return true;
10900 	}
10901       return false;
10902     }
10903 
10904   if (GET_CODE (x) == CONST_INT && INT_5_BITS (x))
10905     return true;
10906 
10907   return false;
10908 }
10909 
10910 /* Look for machine dependent ways to make the invalid address AD a
10911    valid address.
10912 
10913    For the PA, transform:
10914 
10915         memory(X + <large int>)
10916 
10917    into:
10918 
10919         if (<large int> & mask) >= 16
10920           Y = (<large int> & ~mask) + mask + 1  Round up.
10921         else
10922           Y = (<large int> & ~mask)             Round down.
10923         Z = X + Y
10924         memory (Z + (<large int> - Y));
10925 
10926    This makes reload inheritance and reload_cse work better since Z
10927    can be reused.
10928 
10929    There may be more opportunities to improve code with this hook.  */
10930 
10931 rtx
pa_legitimize_reload_address(rtx ad,machine_mode mode,int opnum,int type,int ind_levels ATTRIBUTE_UNUSED)10932 pa_legitimize_reload_address (rtx ad, machine_mode mode,
10933 			      int opnum, int type,
10934 			      int ind_levels ATTRIBUTE_UNUSED)
10935 {
10936   long offset, newoffset, mask;
10937   rtx new_rtx, temp = NULL_RTX;
10938 
10939   mask = (GET_MODE_CLASS (mode) == MODE_FLOAT
10940 	  && !INT14_OK_STRICT ? 0x1f : 0x3fff);
10941 
10942   if (optimize && GET_CODE (ad) == PLUS)
10943     temp = simplify_binary_operation (PLUS, Pmode,
10944 				      XEXP (ad, 0), XEXP (ad, 1));
10945 
10946   new_rtx = temp ? temp : ad;
10947 
10948   if (optimize
10949       && GET_CODE (new_rtx) == PLUS
10950       && GET_CODE (XEXP (new_rtx, 0)) == REG
10951       && GET_CODE (XEXP (new_rtx, 1)) == CONST_INT)
10952     {
10953       offset = INTVAL (XEXP ((new_rtx), 1));
10954 
10955       /* Choose rounding direction.  Round up if we are >= halfway.  */
10956       if ((offset & mask) >= ((mask + 1) / 2))
10957 	newoffset = (offset & ~mask) + mask + 1;
10958       else
10959 	newoffset = offset & ~mask;
10960 
10961       /* Ensure that long displacements are aligned.  */
10962       if (mask == 0x3fff
10963 	  && (GET_MODE_CLASS (mode) == MODE_FLOAT
10964 	      || (TARGET_64BIT && (mode) == DImode)))
10965 	newoffset &= ~(GET_MODE_SIZE (mode) - 1);
10966 
10967       if (newoffset != 0 && VAL_14_BITS_P (newoffset))
10968 	{
10969 	  temp = gen_rtx_PLUS (Pmode, XEXP (new_rtx, 0),
10970 			       GEN_INT (newoffset));
10971 	  ad = gen_rtx_PLUS (Pmode, temp, GEN_INT (offset - newoffset));
10972 	  push_reload (XEXP (ad, 0), 0, &XEXP (ad, 0), 0,
10973 		       BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
10974 		       opnum, (enum reload_type) type);
10975 	  return ad;
10976 	}
10977     }
10978 
10979   return NULL_RTX;
10980 }
10981 
10982 /* Output address vector.  */
10983 
10984 void
pa_output_addr_vec(rtx lab,rtx body)10985 pa_output_addr_vec (rtx lab, rtx body)
10986 {
10987   int idx, vlen = XVECLEN (body, 0);
10988 
10989   if (!TARGET_SOM)
10990     fputs ("\t.align 4\n", asm_out_file);
10991   targetm.asm_out.internal_label (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
10992   if (TARGET_GAS)
10993     fputs ("\t.begin_brtab\n", asm_out_file);
10994   for (idx = 0; idx < vlen; idx++)
10995     {
10996       ASM_OUTPUT_ADDR_VEC_ELT
10997 	(asm_out_file, CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 0, idx), 0)));
10998     }
10999   if (TARGET_GAS)
11000     fputs ("\t.end_brtab\n", asm_out_file);
11001 }
11002 
11003 /* Output address difference vector.  */
11004 
11005 void
pa_output_addr_diff_vec(rtx lab,rtx body)11006 pa_output_addr_diff_vec (rtx lab, rtx body)
11007 {
11008   rtx base = XEXP (XEXP (body, 0), 0);
11009   int idx, vlen = XVECLEN (body, 1);
11010 
11011   targetm.asm_out.internal_label (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
11012   if (TARGET_GAS)
11013     fputs ("\t.begin_brtab\n", asm_out_file);
11014   for (idx = 0; idx < vlen; idx++)
11015     {
11016       ASM_OUTPUT_ADDR_DIFF_ELT
11017 	(asm_out_file,
11018 	 body,
11019 	 CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 1, idx), 0)),
11020 	 CODE_LABEL_NUMBER (base));
11021     }
11022   if (TARGET_GAS)
11023     fputs ("\t.end_brtab\n", asm_out_file);
11024 }
11025 
11026 /* This is a helper function for the other atomic operations.  This function
11027    emits a loop that contains SEQ that iterates until a compare-and-swap
11028    operation at the end succeeds.  MEM is the memory to be modified.  SEQ is
11029    a set of instructions that takes a value from OLD_REG as an input and
11030    produces a value in NEW_REG as an output.  Before SEQ, OLD_REG will be
11031    set to the current contents of MEM.  After SEQ, a compare-and-swap will
11032    attempt to update MEM with NEW_REG.  The function returns true when the
11033    loop was generated successfully.  */
11034 
11035 static bool
pa_expand_compare_and_swap_loop(rtx mem,rtx old_reg,rtx new_reg,rtx seq)11036 pa_expand_compare_and_swap_loop (rtx mem, rtx old_reg, rtx new_reg, rtx seq)
11037 {
11038   machine_mode mode = GET_MODE (mem);
11039   rtx_code_label *label;
11040   rtx cmp_reg, success, oldval;
11041 
11042   /* The loop we want to generate looks like
11043 
11044         cmp_reg = mem;
11045       label:
11046         old_reg = cmp_reg;
11047         seq;
11048         (success, cmp_reg) = compare-and-swap(mem, old_reg, new_reg)
11049         if (success)
11050           goto label;
11051 
11052      Note that we only do the plain load from memory once.  Subsequent
11053      iterations use the value loaded by the compare-and-swap pattern.  */
11054 
11055   label = gen_label_rtx ();
11056   cmp_reg = gen_reg_rtx (mode);
11057 
11058   emit_move_insn (cmp_reg, mem);
11059   emit_label (label);
11060   emit_move_insn (old_reg, cmp_reg);
11061   if (seq)
11062     emit_insn (seq);
11063 
11064   success = NULL_RTX;
11065   oldval = cmp_reg;
11066   if (!expand_atomic_compare_and_swap (&success, &oldval, mem, old_reg,
11067                                        new_reg, false, MEMMODEL_SYNC_SEQ_CST,
11068                                        MEMMODEL_RELAXED))
11069     return false;
11070 
11071   if (oldval != cmp_reg)
11072     emit_move_insn (cmp_reg, oldval);
11073 
11074   /* Mark this jump predicted not taken.  */
11075   emit_cmp_and_jump_insns (success, const0_rtx, EQ, const0_rtx,
11076                            GET_MODE (success), 1, label,
11077 			   profile_probability::guessed_never ());
11078   return true;
11079 }
11080 
11081 /* This function tries to implement an atomic exchange operation using a
11082    compare_and_swap loop. VAL is written to *MEM.  The previous contents of
11083    *MEM are returned, using TARGET if possible.  No memory model is required
11084    since a compare_and_swap loop is seq-cst.  */
11085 
11086 rtx
pa_maybe_emit_compare_and_swap_exchange_loop(rtx target,rtx mem,rtx val)11087 pa_maybe_emit_compare_and_swap_exchange_loop (rtx target, rtx mem, rtx val)
11088 {
11089   machine_mode mode = GET_MODE (mem);
11090 
11091   if (can_compare_and_swap_p (mode, true))
11092     {
11093       if (!target || !register_operand (target, mode))
11094         target = gen_reg_rtx (mode);
11095       if (pa_expand_compare_and_swap_loop (mem, target, val, NULL_RTX))
11096         return target;
11097     }
11098 
11099   return NULL_RTX;
11100 }
11101 
11102 /* Implement TARGET_CALLEE_COPIES.  The callee is responsible for copying
11103    arguments passed by hidden reference in the 32-bit HP runtime.  Users
11104    can override this behavior for better compatibility with openmp at the
11105    risk of library incompatibilities.  Arguments are always passed by value
11106    in the 64-bit HP runtime.  */
11107 
11108 static bool
pa_callee_copies(cumulative_args_t,const function_arg_info &)11109 pa_callee_copies (cumulative_args_t, const function_arg_info &)
11110 {
11111   return !TARGET_CALLER_COPIES;
11112 }
11113 
11114 /* Implement TARGET_HARD_REGNO_NREGS.  */
11115 
11116 static unsigned int
pa_hard_regno_nregs(unsigned int regno ATTRIBUTE_UNUSED,machine_mode mode)11117 pa_hard_regno_nregs (unsigned int regno ATTRIBUTE_UNUSED, machine_mode mode)
11118 {
11119   return PA_HARD_REGNO_NREGS (regno, mode);
11120 }
11121 
11122 /* Implement TARGET_HARD_REGNO_MODE_OK.  */
11123 
11124 static bool
pa_hard_regno_mode_ok(unsigned int regno,machine_mode mode)11125 pa_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
11126 {
11127   return PA_HARD_REGNO_MODE_OK (regno, mode);
11128 }
11129 
11130 /* Implement TARGET_STARTING_FRAME_OFFSET.
11131 
11132    On the 32-bit ports, we reserve one slot for the previous frame
11133    pointer and one fill slot.  The fill slot is for compatibility
11134    with HP compiled programs.  On the 64-bit ports, we reserve one
11135    slot for the previous frame pointer.  */
11136 
11137 static HOST_WIDE_INT
pa_starting_frame_offset(void)11138 pa_starting_frame_offset (void)
11139 {
11140   return 8;
11141 }
11142 
11143 /* Figure out the size in words of the function argument.  The size
11144    returned by this function should always be greater than zero because
11145    we pass variable and zero sized objects by reference.  */
11146 
11147 HOST_WIDE_INT
pa_function_arg_size(machine_mode mode,const_tree type)11148 pa_function_arg_size (machine_mode mode, const_tree type)
11149 {
11150   HOST_WIDE_INT size;
11151 
11152   size = mode != BLKmode ? GET_MODE_SIZE (mode) : int_size_in_bytes (type);
11153   return CEIL (size, UNITS_PER_WORD);
11154 }
11155 
11156 #include "gt-pa.h"
11157