xref: /netbsd/external/gpl3/gcc/dist/gcc/config/pa/pa.cc (revision f0fbc68b)
1 /* Subroutines for insn-output.cc for HPPA.
2    Copyright (C) 1992-2022 Free Software Foundation, Inc.
3    Contributed by Tim Moore (moore@cs.utah.edu), based on sparc.cc
4 
5 This file is part of GCC.
6 
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11 
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 GNU General Public License for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3.  If not see
19 <http://www.gnu.org/licenses/>.  */
20 
21 #define IN_TARGET_CODE 1
22 
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "memmodel.h"
27 #include "backend.h"
28 #include "target.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "df.h"
32 #include "tm_p.h"
33 #include "stringpool.h"
34 #include "attribs.h"
35 #include "optabs.h"
36 #include "regs.h"
37 #include "emit-rtl.h"
38 #include "recog.h"
39 #include "diagnostic-core.h"
40 #include "insn-attr.h"
41 #include "alias.h"
42 #include "fold-const.h"
43 #include "stor-layout.h"
44 #include "varasm.h"
45 #include "calls.h"
46 #include "output.h"
47 #include "except.h"
48 #include "explow.h"
49 #include "expr.h"
50 #include "reload.h"
51 #include "common/common-target.h"
52 #include "langhooks.h"
53 #include "cfgrtl.h"
54 #include "opts.h"
55 #include "builtins.h"
56 
57 /* This file should be included last.  */
58 #include "target-def.h"
59 
60 /* Return nonzero if there is a bypass for the output of
61    OUT_INSN and the fp store IN_INSN.  */
62 int
pa_fpstore_bypass_p(rtx_insn * out_insn,rtx_insn * in_insn)63 pa_fpstore_bypass_p (rtx_insn *out_insn, rtx_insn *in_insn)
64 {
65   machine_mode store_mode;
66   machine_mode other_mode;
67   rtx set;
68 
69   if (recog_memoized (in_insn) < 0
70       || (get_attr_type (in_insn) != TYPE_FPSTORE
71 	  && get_attr_type (in_insn) != TYPE_FPSTORE_LOAD)
72       || recog_memoized (out_insn) < 0)
73     return 0;
74 
75   store_mode = GET_MODE (SET_SRC (PATTERN (in_insn)));
76 
77   set = single_set (out_insn);
78   if (!set)
79     return 0;
80 
81   other_mode = GET_MODE (SET_SRC (set));
82 
83   return (GET_MODE_SIZE (store_mode) == GET_MODE_SIZE (other_mode));
84 }
85 
86 
87 #ifndef DO_FRAME_NOTES
88 #ifdef INCOMING_RETURN_ADDR_RTX
89 #define DO_FRAME_NOTES 1
90 #else
91 #define DO_FRAME_NOTES 0
92 #endif
93 #endif
94 
95 static void pa_option_override (void);
96 static void copy_reg_pointer (rtx, rtx);
97 static void fix_range (const char *);
98 static int hppa_register_move_cost (machine_mode mode, reg_class_t,
99 				    reg_class_t);
100 static int hppa_address_cost (rtx, machine_mode mode, addr_space_t, bool);
101 static bool hppa_rtx_costs (rtx, machine_mode, int, int, int *, bool);
102 static inline rtx force_mode (machine_mode, rtx);
103 static void pa_reorg (void);
104 static void pa_combine_instructions (void);
105 static int pa_can_combine_p (rtx_insn *, rtx_insn *, rtx_insn *, int, rtx,
106 			     rtx, rtx);
107 static bool forward_branch_p (rtx_insn *);
108 static void compute_zdepwi_operands (unsigned HOST_WIDE_INT, unsigned *);
109 static void compute_zdepdi_operands (unsigned HOST_WIDE_INT, unsigned *);
110 static int compute_cpymem_length (rtx_insn *);
111 static int compute_clrmem_length (rtx_insn *);
112 static bool pa_assemble_integer (rtx, unsigned int, int);
113 static void remove_useless_addtr_insns (int);
114 static void store_reg (int, HOST_WIDE_INT, int);
115 static void store_reg_modify (int, int, HOST_WIDE_INT);
116 static void load_reg (int, HOST_WIDE_INT, int);
117 static void set_reg_plus_d (int, int, HOST_WIDE_INT, int);
118 static rtx pa_function_value (const_tree, const_tree, bool);
119 static rtx pa_libcall_value (machine_mode, const_rtx);
120 static bool pa_function_value_regno_p (const unsigned int);
121 static void pa_output_function_prologue (FILE *) ATTRIBUTE_UNUSED;
122 static void pa_linux_output_function_prologue (FILE *) ATTRIBUTE_UNUSED;
123 static void update_total_code_bytes (unsigned int);
124 static void pa_output_function_epilogue (FILE *);
125 static int pa_adjust_cost (rtx_insn *, int, rtx_insn *, int, unsigned int);
126 static int pa_issue_rate (void);
127 static int pa_reloc_rw_mask (void);
128 static void pa_som_asm_init_sections (void) ATTRIBUTE_UNUSED;
129 static section *pa_som_tm_clone_table_section (void) ATTRIBUTE_UNUSED;
130 static section *pa_select_section (tree, int, unsigned HOST_WIDE_INT)
131      ATTRIBUTE_UNUSED;
132 static void pa_encode_section_info (tree, rtx, int);
133 static const char *pa_strip_name_encoding (const char *);
134 static bool pa_function_ok_for_sibcall (tree, tree);
135 static void pa_globalize_label (FILE *, const char *)
136      ATTRIBUTE_UNUSED;
137 static void pa_asm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
138 				    HOST_WIDE_INT, tree);
139 #if !defined(USE_COLLECT2)
140 static void pa_asm_out_constructor (rtx, int);
141 static void pa_asm_out_destructor (rtx, int);
142 #endif
143 static void pa_init_builtins (void);
144 static rtx pa_expand_builtin (tree, rtx, rtx, machine_mode mode, int);
145 static rtx hppa_builtin_saveregs (void);
146 static void hppa_va_start (tree, rtx);
147 static tree hppa_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
148 static bool pa_scalar_mode_supported_p (scalar_mode);
149 static bool pa_commutative_p (const_rtx x, int outer_code);
150 static void copy_fp_args (rtx_insn *) ATTRIBUTE_UNUSED;
151 static int length_fp_args (rtx_insn *) ATTRIBUTE_UNUSED;
152 static rtx hppa_legitimize_address (rtx, rtx, machine_mode);
153 static inline void pa_file_start_level (void) ATTRIBUTE_UNUSED;
154 static inline void pa_file_start_space (int) ATTRIBUTE_UNUSED;
155 static inline void pa_file_start_file (int) ATTRIBUTE_UNUSED;
156 static inline void pa_file_start_mcount (const char*) ATTRIBUTE_UNUSED;
157 static void pa_elf_file_start (void) ATTRIBUTE_UNUSED;
158 static void pa_som_file_start (void) ATTRIBUTE_UNUSED;
159 static void pa_linux_file_start (void) ATTRIBUTE_UNUSED;
160 static void pa_hpux64_gas_file_start (void) ATTRIBUTE_UNUSED;
161 static void pa_hpux64_hpas_file_start (void) ATTRIBUTE_UNUSED;
162 static void output_deferred_plabels (void);
163 static void output_deferred_profile_counters (void) ATTRIBUTE_UNUSED;
164 static void pa_file_end (void);
165 static void pa_init_libfuncs (void);
166 static rtx pa_struct_value_rtx (tree, int);
167 static bool pa_pass_by_reference (cumulative_args_t,
168 				  const function_arg_info &);
169 static int pa_arg_partial_bytes (cumulative_args_t, const function_arg_info &);
170 static void pa_function_arg_advance (cumulative_args_t,
171 				     const function_arg_info &);
172 static rtx pa_function_arg (cumulative_args_t, const function_arg_info &);
173 static pad_direction pa_function_arg_padding (machine_mode, const_tree);
174 static unsigned int pa_function_arg_boundary (machine_mode, const_tree);
175 static struct machine_function * pa_init_machine_status (void);
176 static reg_class_t pa_secondary_reload (bool, rtx, reg_class_t,
177 					machine_mode,
178 					secondary_reload_info *);
179 static bool pa_secondary_memory_needed (machine_mode,
180 					reg_class_t, reg_class_t);
181 static void pa_extra_live_on_entry (bitmap);
182 static machine_mode pa_promote_function_mode (const_tree,
183 						   machine_mode, int *,
184 						   const_tree, int);
185 
186 static void pa_asm_trampoline_template (FILE *);
187 static void pa_trampoline_init (rtx, tree, rtx);
188 static rtx pa_trampoline_adjust_address (rtx);
189 static rtx pa_delegitimize_address (rtx);
190 static bool pa_print_operand_punct_valid_p (unsigned char);
191 static rtx pa_internal_arg_pointer (void);
192 static bool pa_can_eliminate (const int, const int);
193 static void pa_conditional_register_usage (void);
194 static machine_mode pa_c_mode_for_suffix (char);
195 static section *pa_function_section (tree, enum node_frequency, bool, bool);
196 static bool pa_cannot_force_const_mem (machine_mode, rtx);
197 static bool pa_legitimate_constant_p (machine_mode, rtx);
198 static unsigned int pa_section_type_flags (tree, const char *, int);
199 static bool pa_legitimate_address_p (machine_mode, rtx, bool);
200 static bool pa_callee_copies (cumulative_args_t, const function_arg_info &);
201 static unsigned int pa_hard_regno_nregs (unsigned int, machine_mode);
202 static bool pa_hard_regno_mode_ok (unsigned int, machine_mode);
203 static bool pa_modes_tieable_p (machine_mode, machine_mode);
204 static bool pa_can_change_mode_class (machine_mode, machine_mode, reg_class_t);
205 static HOST_WIDE_INT pa_starting_frame_offset (void);
206 static section* pa_elf_select_rtx_section(machine_mode, rtx, unsigned HOST_WIDE_INT) ATTRIBUTE_UNUSED;
207 
208 /* The following extra sections are only used for SOM.  */
209 static GTY(()) section *som_readonly_data_section;
210 static GTY(()) section *som_one_only_readonly_data_section;
211 static GTY(()) section *som_one_only_data_section;
212 static GTY(()) section *som_tm_clone_table_section;
213 
214 /* Counts for the number of callee-saved general and floating point
215    registers which were saved by the current function's prologue.  */
216 static int gr_saved, fr_saved;
217 
218 /* Boolean indicating whether the return pointer was saved by the
219    current function's prologue.  */
220 static bool rp_saved;
221 
222 static rtx find_addr_reg (rtx);
223 
224 /* Keep track of the number of bytes we have output in the CODE subspace
225    during this compilation so we'll know when to emit inline long-calls.  */
226 unsigned long total_code_bytes;
227 
228 /* The last address of the previous function plus the number of bytes in
229    associated thunks that have been output.  This is used to determine if
230    a thunk can use an IA-relative branch to reach its target function.  */
231 static unsigned int last_address;
232 
233 /* Variables to handle plabels that we discover are necessary at assembly
234    output time.  They are output after the current function.  */
235 struct GTY(()) deferred_plabel
236 {
237   rtx internal_label;
238   rtx symbol;
239 };
240 static GTY((length ("n_deferred_plabels"))) struct deferred_plabel *
241   deferred_plabels;
242 static size_t n_deferred_plabels = 0;
243 
244 /* Initialize the GCC target structure.  */
245 
246 #undef TARGET_OPTION_OVERRIDE
247 #define TARGET_OPTION_OVERRIDE pa_option_override
248 
249 #undef TARGET_ASM_ALIGNED_HI_OP
250 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
251 #undef TARGET_ASM_ALIGNED_SI_OP
252 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
253 #undef TARGET_ASM_ALIGNED_DI_OP
254 #define TARGET_ASM_ALIGNED_DI_OP "\t.dword\t"
255 #undef TARGET_ASM_UNALIGNED_HI_OP
256 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
257 #undef TARGET_ASM_UNALIGNED_SI_OP
258 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
259 #undef TARGET_ASM_UNALIGNED_DI_OP
260 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
261 #undef TARGET_ASM_INTEGER
262 #define TARGET_ASM_INTEGER pa_assemble_integer
263 
264 #undef TARGET_ASM_FUNCTION_EPILOGUE
265 #define TARGET_ASM_FUNCTION_EPILOGUE pa_output_function_epilogue
266 
267 #undef TARGET_FUNCTION_VALUE
268 #define TARGET_FUNCTION_VALUE pa_function_value
269 #undef TARGET_LIBCALL_VALUE
270 #define TARGET_LIBCALL_VALUE pa_libcall_value
271 #undef TARGET_FUNCTION_VALUE_REGNO_P
272 #define TARGET_FUNCTION_VALUE_REGNO_P pa_function_value_regno_p
273 
274 #undef TARGET_LEGITIMIZE_ADDRESS
275 #define TARGET_LEGITIMIZE_ADDRESS hppa_legitimize_address
276 
277 #undef TARGET_SCHED_ADJUST_COST
278 #define TARGET_SCHED_ADJUST_COST pa_adjust_cost
279 #undef TARGET_SCHED_ISSUE_RATE
280 #define TARGET_SCHED_ISSUE_RATE pa_issue_rate
281 
282 #undef TARGET_ENCODE_SECTION_INFO
283 #define TARGET_ENCODE_SECTION_INFO pa_encode_section_info
284 #undef TARGET_STRIP_NAME_ENCODING
285 #define TARGET_STRIP_NAME_ENCODING pa_strip_name_encoding
286 
287 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
288 #define TARGET_FUNCTION_OK_FOR_SIBCALL pa_function_ok_for_sibcall
289 
290 #undef TARGET_COMMUTATIVE_P
291 #define TARGET_COMMUTATIVE_P pa_commutative_p
292 
293 #undef TARGET_ASM_OUTPUT_MI_THUNK
294 #define TARGET_ASM_OUTPUT_MI_THUNK pa_asm_output_mi_thunk
295 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
296 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
297 
298 #undef TARGET_ASM_FILE_END
299 #define TARGET_ASM_FILE_END pa_file_end
300 
301 #undef TARGET_ASM_RELOC_RW_MASK
302 #define TARGET_ASM_RELOC_RW_MASK pa_reloc_rw_mask
303 
304 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
305 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P pa_print_operand_punct_valid_p
306 
307 #if !defined(USE_COLLECT2)
308 #undef TARGET_ASM_CONSTRUCTOR
309 #define TARGET_ASM_CONSTRUCTOR pa_asm_out_constructor
310 #undef TARGET_ASM_DESTRUCTOR
311 #define TARGET_ASM_DESTRUCTOR pa_asm_out_destructor
312 #endif
313 
314 #undef TARGET_INIT_BUILTINS
315 #define TARGET_INIT_BUILTINS pa_init_builtins
316 
317 #undef TARGET_EXPAND_BUILTIN
318 #define TARGET_EXPAND_BUILTIN pa_expand_builtin
319 
320 #undef TARGET_REGISTER_MOVE_COST
321 #define TARGET_REGISTER_MOVE_COST hppa_register_move_cost
322 #undef TARGET_RTX_COSTS
323 #define TARGET_RTX_COSTS hppa_rtx_costs
324 #undef TARGET_ADDRESS_COST
325 #define TARGET_ADDRESS_COST hppa_address_cost
326 
327 #undef TARGET_MACHINE_DEPENDENT_REORG
328 #define TARGET_MACHINE_DEPENDENT_REORG pa_reorg
329 
330 #undef TARGET_INIT_LIBFUNCS
331 #define TARGET_INIT_LIBFUNCS pa_init_libfuncs
332 
333 #undef TARGET_PROMOTE_FUNCTION_MODE
334 #define TARGET_PROMOTE_FUNCTION_MODE pa_promote_function_mode
335 #undef TARGET_PROMOTE_PROTOTYPES
336 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
337 
338 #undef TARGET_STRUCT_VALUE_RTX
339 #define TARGET_STRUCT_VALUE_RTX pa_struct_value_rtx
340 #undef TARGET_RETURN_IN_MEMORY
341 #define TARGET_RETURN_IN_MEMORY pa_return_in_memory
342 #undef TARGET_MUST_PASS_IN_STACK
343 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
344 #undef TARGET_PASS_BY_REFERENCE
345 #define TARGET_PASS_BY_REFERENCE pa_pass_by_reference
346 #undef TARGET_CALLEE_COPIES
347 #define TARGET_CALLEE_COPIES pa_callee_copies
348 #undef TARGET_ARG_PARTIAL_BYTES
349 #define TARGET_ARG_PARTIAL_BYTES pa_arg_partial_bytes
350 #undef TARGET_FUNCTION_ARG
351 #define TARGET_FUNCTION_ARG pa_function_arg
352 #undef TARGET_FUNCTION_ARG_ADVANCE
353 #define TARGET_FUNCTION_ARG_ADVANCE pa_function_arg_advance
354 #undef TARGET_FUNCTION_ARG_PADDING
355 #define TARGET_FUNCTION_ARG_PADDING pa_function_arg_padding
356 #undef TARGET_FUNCTION_ARG_BOUNDARY
357 #define TARGET_FUNCTION_ARG_BOUNDARY pa_function_arg_boundary
358 
359 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
360 #define TARGET_EXPAND_BUILTIN_SAVEREGS hppa_builtin_saveregs
361 #undef TARGET_EXPAND_BUILTIN_VA_START
362 #define TARGET_EXPAND_BUILTIN_VA_START hppa_va_start
363 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
364 #define TARGET_GIMPLIFY_VA_ARG_EXPR hppa_gimplify_va_arg_expr
365 
366 #undef TARGET_SCALAR_MODE_SUPPORTED_P
367 #define TARGET_SCALAR_MODE_SUPPORTED_P pa_scalar_mode_supported_p
368 
369 #undef TARGET_CANNOT_FORCE_CONST_MEM
370 #define TARGET_CANNOT_FORCE_CONST_MEM pa_cannot_force_const_mem
371 
372 #undef TARGET_SECONDARY_RELOAD
373 #define TARGET_SECONDARY_RELOAD pa_secondary_reload
374 #undef TARGET_SECONDARY_MEMORY_NEEDED
375 #define TARGET_SECONDARY_MEMORY_NEEDED pa_secondary_memory_needed
376 
377 #undef TARGET_EXTRA_LIVE_ON_ENTRY
378 #define TARGET_EXTRA_LIVE_ON_ENTRY pa_extra_live_on_entry
379 
380 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
381 #define TARGET_ASM_TRAMPOLINE_TEMPLATE pa_asm_trampoline_template
382 #undef TARGET_TRAMPOLINE_INIT
383 #define TARGET_TRAMPOLINE_INIT pa_trampoline_init
384 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
385 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS pa_trampoline_adjust_address
386 #undef TARGET_DELEGITIMIZE_ADDRESS
387 #define TARGET_DELEGITIMIZE_ADDRESS pa_delegitimize_address
388 #undef TARGET_INTERNAL_ARG_POINTER
389 #define TARGET_INTERNAL_ARG_POINTER pa_internal_arg_pointer
390 #undef TARGET_CAN_ELIMINATE
391 #define TARGET_CAN_ELIMINATE pa_can_eliminate
392 #undef TARGET_CONDITIONAL_REGISTER_USAGE
393 #define TARGET_CONDITIONAL_REGISTER_USAGE pa_conditional_register_usage
394 #undef TARGET_C_MODE_FOR_SUFFIX
395 #define TARGET_C_MODE_FOR_SUFFIX pa_c_mode_for_suffix
396 #undef TARGET_ASM_FUNCTION_SECTION
397 #define TARGET_ASM_FUNCTION_SECTION pa_function_section
398 
399 #undef TARGET_LEGITIMATE_CONSTANT_P
400 #define TARGET_LEGITIMATE_CONSTANT_P pa_legitimate_constant_p
401 #undef TARGET_SECTION_TYPE_FLAGS
402 #define TARGET_SECTION_TYPE_FLAGS pa_section_type_flags
403 #undef TARGET_LEGITIMATE_ADDRESS_P
404 #define TARGET_LEGITIMATE_ADDRESS_P pa_legitimate_address_p
405 
406 #undef TARGET_LRA_P
407 #define TARGET_LRA_P hook_bool_void_false
408 
409 #undef TARGET_HARD_REGNO_NREGS
410 #define TARGET_HARD_REGNO_NREGS pa_hard_regno_nregs
411 #undef TARGET_HARD_REGNO_MODE_OK
412 #define TARGET_HARD_REGNO_MODE_OK pa_hard_regno_mode_ok
413 #undef TARGET_MODES_TIEABLE_P
414 #define TARGET_MODES_TIEABLE_P pa_modes_tieable_p
415 
416 #undef TARGET_CAN_CHANGE_MODE_CLASS
417 #define TARGET_CAN_CHANGE_MODE_CLASS pa_can_change_mode_class
418 
419 #undef TARGET_CONSTANT_ALIGNMENT
420 #define TARGET_CONSTANT_ALIGNMENT constant_alignment_word_strings
421 
422 #undef TARGET_STARTING_FRAME_OFFSET
423 #define TARGET_STARTING_FRAME_OFFSET pa_starting_frame_offset
424 
425 #undef TARGET_HAVE_SPECULATION_SAFE_VALUE
426 #define TARGET_HAVE_SPECULATION_SAFE_VALUE speculation_safe_value_not_needed
427 
428 struct gcc_target targetm = TARGET_INITIALIZER;
429 
430 /* Parse the -mfixed-range= option string.  */
431 
432 static void
fix_range(const char * const_str)433 fix_range (const char *const_str)
434 {
435   int i, first, last;
436   char *str, *dash, *comma;
437 
438   /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
439      REG2 are either register names or register numbers.  The effect
440      of this option is to mark the registers in the range from REG1 to
441      REG2 as ``fixed'' so they won't be used by the compiler.  This is
442      used, e.g., to ensure that kernel mode code doesn't use fr4-fr31.  */
443 
444   i = strlen (const_str);
445   str = (char *) alloca (i + 1);
446   memcpy (str, const_str, i + 1);
447 
448   while (1)
449     {
450       dash = strchr (str, '-');
451       if (!dash)
452 	{
453 	  warning (0, "value of %<-mfixed-range%> must have form REG1-REG2");
454 	  return;
455 	}
456       *dash = '\0';
457 
458       comma = strchr (dash + 1, ',');
459       if (comma)
460 	*comma = '\0';
461 
462       first = decode_reg_name (str);
463       if (first < 0)
464 	{
465 	  warning (0, "unknown register name: %s", str);
466 	  return;
467 	}
468 
469       last = decode_reg_name (dash + 1);
470       if (last < 0)
471 	{
472 	  warning (0, "unknown register name: %s", dash + 1);
473 	  return;
474 	}
475 
476       *dash = '-';
477 
478       if (first > last)
479 	{
480 	  warning (0, "%s-%s is an empty range", str, dash + 1);
481 	  return;
482 	}
483 
484       for (i = first; i <= last; ++i)
485 	fixed_regs[i] = call_used_regs[i] = 1;
486 
487       if (!comma)
488 	break;
489 
490       *comma = ',';
491       str = comma + 1;
492     }
493 
494   /* Check if all floating point registers have been fixed.  */
495   for (i = FP_REG_FIRST; i <= FP_REG_LAST; i++)
496     if (!fixed_regs[i])
497       break;
498 
499   if (i > FP_REG_LAST)
500     target_flags |= MASK_SOFT_FLOAT;
501 }
502 
503 /* Implement the TARGET_OPTION_OVERRIDE hook.  */
504 
505 static void
pa_option_override(void)506 pa_option_override (void)
507 {
508   unsigned int i;
509   cl_deferred_option *opt;
510   vec<cl_deferred_option> *v
511     = (vec<cl_deferred_option> *) pa_deferred_options;
512 
513   if (v)
514     FOR_EACH_VEC_ELT (*v, i, opt)
515       {
516 	switch (opt->opt_index)
517 	  {
518 	  case OPT_mfixed_range_:
519 	    fix_range (opt->arg);
520 	    break;
521 
522 	  default:
523 	    gcc_unreachable ();
524 	  }
525       }
526 
527   if (flag_pic && TARGET_PORTABLE_RUNTIME)
528     {
529       warning (0, "PIC code generation is not supported in the portable runtime model");
530     }
531 
532   if (flag_pic && TARGET_FAST_INDIRECT_CALLS)
533    {
534       warning (0, "PIC code generation is not compatible with fast indirect calls");
535    }
536 
537   if (! TARGET_GAS && write_symbols != NO_DEBUG)
538     {
539       warning (0, "%<-g%> is only supported when using GAS on this processor");
540       warning (0, "%<-g%> option disabled");
541       write_symbols = NO_DEBUG;
542     }
543 
544   if (TARGET_64BIT && TARGET_HPUX)
545     {
546       /* DWARF5 is not supported by gdb.  Don't emit DWARF5 unless
547 	 specifically selected.  */
548       if (!OPTION_SET_P (dwarf_strict))
549 	dwarf_strict = 1;
550       if (!OPTION_SET_P (dwarf_version))
551 	dwarf_version = 4;
552     }
553 
554   /* We only support the "big PIC" model now.  And we always generate PIC
555      code when in 64bit mode.  */
556   if (flag_pic == 1 || TARGET_64BIT)
557     flag_pic = 2;
558 
559   /* Disable -freorder-blocks-and-partition as we don't support hot and
560      cold partitioning.  */
561   if (flag_reorder_blocks_and_partition)
562     {
563       inform (input_location,
564 	      "%<-freorder-blocks-and-partition%> does not work "
565 	      "on this architecture");
566       flag_reorder_blocks_and_partition = 0;
567       flag_reorder_blocks = 1;
568     }
569 
570   /* Disable -fstack-protector to suppress warning.  */
571   flag_stack_protect = 0;
572 
573   /* We can't guarantee that .dword is available for 32-bit targets.  */
574   if (UNITS_PER_WORD == 4)
575     targetm.asm_out.aligned_op.di = NULL;
576 
577   /* The unaligned ops are only available when using GAS.  */
578   if (!TARGET_GAS)
579     {
580       targetm.asm_out.unaligned_op.hi = NULL;
581       targetm.asm_out.unaligned_op.si = NULL;
582       targetm.asm_out.unaligned_op.di = NULL;
583     }
584 
585   init_machine_status = pa_init_machine_status;
586 }
587 
588 enum pa_builtins
589 {
590   PA_BUILTIN_COPYSIGNQ,
591   PA_BUILTIN_FABSQ,
592   PA_BUILTIN_INFQ,
593   PA_BUILTIN_HUGE_VALQ,
594   PA_BUILTIN_max
595 };
596 
597 static GTY(()) tree pa_builtins[(int) PA_BUILTIN_max];
598 
599 static void
pa_init_builtins(void)600 pa_init_builtins (void)
601 {
602 #ifdef DONT_HAVE_FPUTC_UNLOCKED
603   {
604     tree decl = builtin_decl_explicit (BUILT_IN_PUTC_UNLOCKED);
605     set_builtin_decl (BUILT_IN_FPUTC_UNLOCKED, decl,
606 		      builtin_decl_implicit_p (BUILT_IN_PUTC_UNLOCKED));
607   }
608 #endif
609 #if TARGET_HPUX_11
610   {
611     tree decl;
612 
613     if ((decl = builtin_decl_explicit (BUILT_IN_FINITE)) != NULL_TREE)
614       set_user_assembler_name (decl, "_Isfinite");
615     if ((decl = builtin_decl_explicit (BUILT_IN_FINITEF)) != NULL_TREE)
616       set_user_assembler_name (decl, "_Isfinitef");
617   }
618 #endif
619 
620   if (HPUX_LONG_DOUBLE_LIBRARY)
621     {
622       tree decl, ftype;
623 
624       /* Under HPUX, the __float128 type is a synonym for "long double".  */
625       (*lang_hooks.types.register_builtin_type) (long_double_type_node,
626 						 "__float128");
627 
628       /* TFmode support builtins.  */
629       ftype = build_function_type_list (long_double_type_node,
630 					long_double_type_node,
631 					NULL_TREE);
632       decl = add_builtin_function ("__builtin_fabsq", ftype,
633 				   PA_BUILTIN_FABSQ, BUILT_IN_MD,
634 				   "_U_Qfabs", NULL_TREE);
635       TREE_READONLY (decl) = 1;
636       pa_builtins[PA_BUILTIN_FABSQ] = decl;
637 
638       ftype = build_function_type_list (long_double_type_node,
639 					long_double_type_node,
640 					long_double_type_node,
641 					NULL_TREE);
642       decl = add_builtin_function ("__builtin_copysignq", ftype,
643 				   PA_BUILTIN_COPYSIGNQ, BUILT_IN_MD,
644 				   "_U_Qfcopysign", NULL_TREE);
645       TREE_READONLY (decl) = 1;
646       pa_builtins[PA_BUILTIN_COPYSIGNQ] = decl;
647 
648       ftype = build_function_type_list (long_double_type_node, NULL_TREE);
649       decl = add_builtin_function ("__builtin_infq", ftype,
650 				   PA_BUILTIN_INFQ, BUILT_IN_MD,
651 				   NULL, NULL_TREE);
652       pa_builtins[PA_BUILTIN_INFQ] = decl;
653 
654       decl = add_builtin_function ("__builtin_huge_valq", ftype,
655                                    PA_BUILTIN_HUGE_VALQ, BUILT_IN_MD,
656                                    NULL, NULL_TREE);
657       pa_builtins[PA_BUILTIN_HUGE_VALQ] = decl;
658     }
659 }
660 
661 static rtx
pa_expand_builtin(tree exp,rtx target,rtx subtarget ATTRIBUTE_UNUSED,machine_mode mode ATTRIBUTE_UNUSED,int ignore ATTRIBUTE_UNUSED)662 pa_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
663 		   machine_mode mode ATTRIBUTE_UNUSED,
664 		   int ignore ATTRIBUTE_UNUSED)
665 {
666   tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
667   unsigned int fcode = DECL_MD_FUNCTION_CODE (fndecl);
668 
669   switch (fcode)
670     {
671     case PA_BUILTIN_FABSQ:
672     case PA_BUILTIN_COPYSIGNQ:
673       return expand_call (exp, target, ignore);
674 
675     case PA_BUILTIN_INFQ:
676     case PA_BUILTIN_HUGE_VALQ:
677       {
678 	machine_mode target_mode = TYPE_MODE (TREE_TYPE (exp));
679 	REAL_VALUE_TYPE inf;
680 	rtx tmp;
681 
682 	real_inf (&inf);
683 	tmp = const_double_from_real_value (inf, target_mode);
684 
685 	tmp = validize_mem (force_const_mem (target_mode, tmp));
686 
687 	if (target == 0)
688 	  target = gen_reg_rtx (target_mode);
689 
690 	emit_move_insn (target, tmp);
691 	return target;
692       }
693 
694     default:
695       gcc_unreachable ();
696     }
697 
698   return NULL_RTX;
699 }
700 
701 /* Function to init struct machine_function.
702    This will be called, via a pointer variable,
703    from push_function_context.  */
704 
705 static struct machine_function *
pa_init_machine_status(void)706 pa_init_machine_status (void)
707 {
708   return ggc_cleared_alloc<machine_function> ();
709 }
710 
711 /* If FROM is a probable pointer register, mark TO as a probable
712    pointer register with the same pointer alignment as FROM.  */
713 
714 static void
copy_reg_pointer(rtx to,rtx from)715 copy_reg_pointer (rtx to, rtx from)
716 {
717   if (REG_POINTER (from))
718     mark_reg_pointer (to, REGNO_POINTER_ALIGN (REGNO (from)));
719 }
720 
721 /* Return 1 if X contains a symbolic expression.  We know these
722    expressions will have one of a few well defined forms, so
723    we need only check those forms.  */
724 int
pa_symbolic_expression_p(rtx x)725 pa_symbolic_expression_p (rtx x)
726 {
727 
728   /* Strip off any HIGH.  */
729   if (GET_CODE (x) == HIGH)
730     x = XEXP (x, 0);
731 
732   return symbolic_operand (x, VOIDmode);
733 }
734 
735 /* Accept any constant that can be moved in one instruction into a
736    general register.  */
737 int
pa_cint_ok_for_move(unsigned HOST_WIDE_INT ival)738 pa_cint_ok_for_move (unsigned HOST_WIDE_INT ival)
739 {
740   /* OK if ldo, ldil, or zdepi, can be used.  */
741   return (VAL_14_BITS_P (ival)
742 	  || pa_ldil_cint_p (ival)
743 	  || pa_zdepi_cint_p (ival));
744 }
745 
746 /* True iff ldil can be used to load this CONST_INT.  The least
747    significant 11 bits of the value must be zero and the value must
748    not change sign when extended from 32 to 64 bits.  */
749 int
pa_ldil_cint_p(unsigned HOST_WIDE_INT ival)750 pa_ldil_cint_p (unsigned HOST_WIDE_INT ival)
751 {
752   unsigned HOST_WIDE_INT x;
753 
754   x = ival & (((unsigned HOST_WIDE_INT) -1 << 31) | 0x7ff);
755   return x == 0 || x == ((unsigned HOST_WIDE_INT) -1 << 31);
756 }
757 
758 /* True iff zdepi can be used to generate this CONST_INT.
759    zdepi first sign extends a 5-bit signed number to a given field
760    length, then places this field anywhere in a zero.  */
761 int
pa_zdepi_cint_p(unsigned HOST_WIDE_INT x)762 pa_zdepi_cint_p (unsigned HOST_WIDE_INT x)
763 {
764   unsigned HOST_WIDE_INT lsb_mask, t;
765 
766   /* This might not be obvious, but it's at least fast.
767      This function is critical; we don't have the time loops would take.  */
768   lsb_mask = x & -x;
769   t = ((x >> 4) + lsb_mask) & ~(lsb_mask - 1);
770   /* Return true iff t is a power of two.  */
771   return ((t & (t - 1)) == 0);
772 }
773 
774 /* True iff depi or extru can be used to compute (reg & mask).
775    Accept bit pattern like these:
776    0....01....1
777    1....10....0
778    1..10..01..1  */
779 int
pa_and_mask_p(unsigned HOST_WIDE_INT mask)780 pa_and_mask_p (unsigned HOST_WIDE_INT mask)
781 {
782   mask = ~mask;
783   mask += mask & -mask;
784   return (mask & (mask - 1)) == 0;
785 }
786 
787 /* True iff depi can be used to compute (reg | MASK).  */
788 int
pa_ior_mask_p(unsigned HOST_WIDE_INT mask)789 pa_ior_mask_p (unsigned HOST_WIDE_INT mask)
790 {
791   mask += mask & -mask;
792   return (mask & (mask - 1)) == 0;
793 }
794 
795 /* Legitimize PIC addresses.  If the address is already
796    position-independent, we return ORIG.  Newly generated
797    position-independent addresses go to REG.  If we need more
798    than one register, we lose.  */
799 
800 static rtx
legitimize_pic_address(rtx orig,machine_mode mode,rtx reg)801 legitimize_pic_address (rtx orig, machine_mode mode, rtx reg)
802 {
803   rtx pic_ref = orig;
804 
805   gcc_assert (!PA_SYMBOL_REF_TLS_P (orig));
806 
807   /* Labels need special handling.  */
808   if (pic_label_operand (orig, mode))
809     {
810       rtx_insn *insn;
811 
812       /* We do not want to go through the movXX expanders here since that
813 	 would create recursion.
814 
815 	 Nor do we really want to call a generator for a named pattern
816 	 since that requires multiple patterns if we want to support
817 	 multiple word sizes.
818 
819 	 So instead we just emit the raw set, which avoids the movXX
820 	 expanders completely.  */
821       mark_reg_pointer (reg, BITS_PER_UNIT);
822       insn = emit_insn (gen_rtx_SET (reg, orig));
823 
824       /* Put a REG_EQUAL note on this insn, so that it can be optimized.  */
825       add_reg_note (insn, REG_EQUAL, orig);
826 
827       /* During and after reload, we need to generate a REG_LABEL_OPERAND note
828 	 and update LABEL_NUSES because this is not done automatically.  */
829       if (reload_in_progress || reload_completed)
830 	{
831 	  /* Extract LABEL_REF.  */
832 	  if (GET_CODE (orig) == CONST)
833 	    orig = XEXP (XEXP (orig, 0), 0);
834 	  /* Extract CODE_LABEL.  */
835 	  orig = XEXP (orig, 0);
836 	  add_reg_note (insn, REG_LABEL_OPERAND, orig);
837 	  /* Make sure we have label and not a note.  */
838 	  if (LABEL_P (orig))
839 	    LABEL_NUSES (orig)++;
840 	}
841       crtl->uses_pic_offset_table = 1;
842       return reg;
843     }
844   if (GET_CODE (orig) == SYMBOL_REF)
845     {
846       rtx_insn *insn;
847       rtx tmp_reg;
848 
849       gcc_assert (reg);
850 
851       /* Before reload, allocate a temporary register for the intermediate
852 	 result.  This allows the sequence to be deleted when the final
853 	 result is unused and the insns are trivially dead.  */
854       tmp_reg = ((reload_in_progress || reload_completed)
855 		 ? reg : gen_reg_rtx (Pmode));
856 
857       if (function_label_operand (orig, VOIDmode))
858 	{
859 	  /* Force function label into memory in word mode.  */
860 	  orig = XEXP (force_const_mem (word_mode, orig), 0);
861 	  /* Load plabel address from DLT.  */
862 	  emit_move_insn (tmp_reg,
863 			  gen_rtx_PLUS (word_mode, pic_offset_table_rtx,
864 					gen_rtx_HIGH (word_mode, orig)));
865 	  pic_ref
866 	    = gen_const_mem (Pmode,
867 			     gen_rtx_LO_SUM (Pmode, tmp_reg,
868 					     gen_rtx_UNSPEC (Pmode,
869 						         gen_rtvec (1, orig),
870 						         UNSPEC_DLTIND14R)));
871 	  emit_move_insn (reg, pic_ref);
872 	  /* Now load address of function descriptor.  */
873 	  pic_ref = gen_rtx_MEM (Pmode, reg);
874 	}
875       else
876 	{
877 	  /* Load symbol reference from DLT.  */
878 	  emit_move_insn (tmp_reg,
879 			  gen_rtx_PLUS (word_mode, pic_offset_table_rtx,
880 					gen_rtx_HIGH (word_mode, orig)));
881 	  pic_ref
882 	    = gen_const_mem (Pmode,
883 			     gen_rtx_LO_SUM (Pmode, tmp_reg,
884 					     gen_rtx_UNSPEC (Pmode,
885 						         gen_rtvec (1, orig),
886 						         UNSPEC_DLTIND14R)));
887 	}
888 
889       crtl->uses_pic_offset_table = 1;
890       mark_reg_pointer (reg, BITS_PER_UNIT);
891       insn = emit_move_insn (reg, pic_ref);
892 
893       /* Put a REG_EQUAL note on this insn, so that it can be optimized.  */
894       set_unique_reg_note (insn, REG_EQUAL, orig);
895 
896       return reg;
897     }
898   else if (GET_CODE (orig) == CONST)
899     {
900       rtx base;
901 
902       if (GET_CODE (XEXP (orig, 0)) == PLUS
903 	  && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
904 	return orig;
905 
906       gcc_assert (reg);
907       gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
908 
909       base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
910       orig = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
911 				     base == reg ? 0 : reg);
912 
913       if (GET_CODE (orig) == CONST_INT)
914 	{
915 	  if (INT_14_BITS (orig))
916 	    return plus_constant (Pmode, base, INTVAL (orig));
917 	  orig = force_reg (Pmode, orig);
918 	}
919       pic_ref = gen_rtx_PLUS (Pmode, base, orig);
920       /* Likewise, should we set special REG_NOTEs here?  */
921     }
922 
923   return pic_ref;
924 }
925 
926 static GTY(()) rtx gen_tls_tga;
927 
928 static rtx
gen_tls_get_addr(void)929 gen_tls_get_addr (void)
930 {
931   if (!gen_tls_tga)
932     gen_tls_tga = init_one_libfunc ("__tls_get_addr");
933   return gen_tls_tga;
934 }
935 
936 static rtx
hppa_tls_call(rtx arg)937 hppa_tls_call (rtx arg)
938 {
939   rtx ret;
940 
941   ret = gen_reg_rtx (Pmode);
942   emit_library_call_value (gen_tls_get_addr (), ret,
943 			   LCT_CONST, Pmode, arg, Pmode);
944 
945   return ret;
946 }
947 
948 static rtx
legitimize_tls_address(rtx addr)949 legitimize_tls_address (rtx addr)
950 {
951   rtx ret, tmp, t1, t2, tp;
952   rtx_insn *insn;
953 
954   /* Currently, we can't handle anything but a SYMBOL_REF.  */
955   if (GET_CODE (addr) != SYMBOL_REF)
956     return addr;
957 
958   switch (SYMBOL_REF_TLS_MODEL (addr))
959     {
960       case TLS_MODEL_GLOBAL_DYNAMIC:
961 	tmp = gen_reg_rtx (Pmode);
962 	if (flag_pic)
963 	  emit_insn (gen_tgd_load_pic (tmp, addr));
964 	else
965 	  emit_insn (gen_tgd_load (tmp, addr));
966 	ret = hppa_tls_call (tmp);
967 	break;
968 
969       case TLS_MODEL_LOCAL_DYNAMIC:
970 	ret = gen_reg_rtx (Pmode);
971 	tmp = gen_reg_rtx (Pmode);
972 	start_sequence ();
973 	if (flag_pic)
974 	  emit_insn (gen_tld_load_pic (tmp, addr));
975 	else
976 	  emit_insn (gen_tld_load (tmp, addr));
977 	t1 = hppa_tls_call (tmp);
978 	insn = get_insns ();
979 	end_sequence ();
980 	t2 = gen_reg_rtx (Pmode);
981 	emit_libcall_block (insn, t2, t1,
982 			    gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
983 				            UNSPEC_TLSLDBASE));
984 	emit_insn (gen_tld_offset_load (ret, addr, t2));
985 	break;
986 
987       case TLS_MODEL_INITIAL_EXEC:
988 	tp = gen_reg_rtx (Pmode);
989 	tmp = gen_reg_rtx (Pmode);
990 	ret = gen_reg_rtx (Pmode);
991 	emit_insn (gen_tp_load (tp));
992 	if (flag_pic)
993 	  emit_insn (gen_tie_load_pic (tmp, addr));
994 	else
995 	  emit_insn (gen_tie_load (tmp, addr));
996 	emit_move_insn (ret, gen_rtx_PLUS (Pmode, tp, tmp));
997 	break;
998 
999       case TLS_MODEL_LOCAL_EXEC:
1000 	tp = gen_reg_rtx (Pmode);
1001 	ret = gen_reg_rtx (Pmode);
1002 	emit_insn (gen_tp_load (tp));
1003 	emit_insn (gen_tle_load (ret, addr, tp));
1004 	break;
1005 
1006       default:
1007 	gcc_unreachable ();
1008     }
1009 
1010   return ret;
1011 }
1012 
1013 /* Helper for hppa_legitimize_address.  Given X, return true if it
1014    is a left shift by 1, 2 or 3 positions or a multiply by 2, 4 or 8.
1015 
1016    This respectively represent canonical shift-add rtxs or scaled
1017    memory addresses.  */
1018 static bool
mem_shadd_or_shadd_rtx_p(rtx x)1019 mem_shadd_or_shadd_rtx_p (rtx x)
1020 {
1021   return ((GET_CODE (x) == ASHIFT
1022 	   || GET_CODE (x) == MULT)
1023 	  && GET_CODE (XEXP (x, 1)) == CONST_INT
1024 	  && ((GET_CODE (x) == ASHIFT
1025 	       && pa_shadd_constant_p (INTVAL (XEXP (x, 1))))
1026 	      || (GET_CODE (x) == MULT
1027 		  && pa_mem_shadd_constant_p (INTVAL (XEXP (x, 1))))));
1028 }
1029 
1030 /* Try machine-dependent ways of modifying an illegitimate address
1031    to be legitimate.  If we find one, return the new, valid address.
1032    This macro is used in only one place: `memory_address' in explow.cc.
1033 
1034    OLDX is the address as it was before break_out_memory_refs was called.
1035    In some cases it is useful to look at this to decide what needs to be done.
1036 
1037    It is always safe for this macro to do nothing.  It exists to recognize
1038    opportunities to optimize the output.
1039 
1040    For the PA, transform:
1041 
1042 	memory(X + <large int>)
1043 
1044    into:
1045 
1046 	if (<large int> & mask) >= 16
1047 	  Y = (<large int> & ~mask) + mask + 1	Round up.
1048 	else
1049 	  Y = (<large int> & ~mask)		Round down.
1050 	Z = X + Y
1051 	memory (Z + (<large int> - Y));
1052 
1053    This is for CSE to find several similar references, and only use one Z.
1054 
1055    X can either be a SYMBOL_REF or REG, but because combine cannot
1056    perform a 4->2 combination we do nothing for SYMBOL_REF + D where
1057    D will not fit in 14 bits.
1058 
1059    MODE_FLOAT references allow displacements which fit in 5 bits, so use
1060    0x1f as the mask.
1061 
1062    MODE_INT references allow displacements which fit in 14 bits, so use
1063    0x3fff as the mask.
1064 
1065    This relies on the fact that most mode MODE_FLOAT references will use FP
1066    registers and most mode MODE_INT references will use integer registers.
1067    (In the rare case of an FP register used in an integer MODE, we depend
1068    on secondary reloads to clean things up.)
1069 
1070 
1071    It is also beneficial to handle (plus (mult (X) (Y)) (Z)) in a special
1072    manner if Y is 2, 4, or 8.  (allows more shadd insns and shifted indexed
1073    addressing modes to be used).
1074 
1075    Note that the addresses passed into hppa_legitimize_address always
1076    come from a MEM, so we only have to match the MULT form on incoming
1077    addresses.  But to be future proof we also match the ASHIFT form.
1078 
1079    However, this routine always places those shift-add sequences into
1080    registers, so we have to generate the ASHIFT form as our output.
1081 
1082    Put X and Z into registers.  Then put the entire expression into
1083    a register.  */
1084 
1085 rtx
hppa_legitimize_address(rtx x,rtx oldx ATTRIBUTE_UNUSED,machine_mode mode)1086 hppa_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
1087 			 machine_mode mode)
1088 {
1089   rtx orig = x;
1090 
1091   /* We need to canonicalize the order of operands in unscaled indexed
1092      addresses since the code that checks if an address is valid doesn't
1093      always try both orders.  */
1094   if (!TARGET_NO_SPACE_REGS
1095       && GET_CODE (x) == PLUS
1096       && GET_MODE (x) == Pmode
1097       && REG_P (XEXP (x, 0))
1098       && REG_P (XEXP (x, 1))
1099       && REG_POINTER (XEXP (x, 0))
1100       && !REG_POINTER (XEXP (x, 1)))
1101     return gen_rtx_PLUS (Pmode, XEXP (x, 1), XEXP (x, 0));
1102 
1103   if (tls_referenced_p (x))
1104     return legitimize_tls_address (x);
1105   else if (flag_pic)
1106     return legitimize_pic_address (x, mode, gen_reg_rtx (Pmode));
1107 
1108   /* Strip off CONST.  */
1109   if (GET_CODE (x) == CONST)
1110     x = XEXP (x, 0);
1111 
1112   /* Special case.  Get the SYMBOL_REF into a register and use indexing.
1113      That should always be safe.  */
1114   if (GET_CODE (x) == PLUS
1115       && GET_CODE (XEXP (x, 0)) == REG
1116       && GET_CODE (XEXP (x, 1)) == SYMBOL_REF)
1117     {
1118       rtx reg = force_reg (Pmode, XEXP (x, 1));
1119       return force_reg (Pmode, gen_rtx_PLUS (Pmode, reg, XEXP (x, 0)));
1120     }
1121 
1122   /* Note we must reject symbols which represent function addresses
1123      since the assembler/linker can't handle arithmetic on plabels.  */
1124   if (GET_CODE (x) == PLUS
1125       && GET_CODE (XEXP (x, 1)) == CONST_INT
1126       && ((GET_CODE (XEXP (x, 0)) == SYMBOL_REF
1127 	   && !FUNCTION_NAME_P (XSTR (XEXP (x, 0), 0)))
1128 	  || GET_CODE (XEXP (x, 0)) == REG))
1129     {
1130       rtx int_part, ptr_reg;
1131       int newoffset;
1132       int offset = INTVAL (XEXP (x, 1));
1133       int mask;
1134 
1135       mask = (GET_MODE_CLASS (mode) == MODE_FLOAT
1136 	      && !INT14_OK_STRICT ? 0x1f : 0x3fff);
1137 
1138       /* Choose which way to round the offset.  Round up if we
1139 	 are >= halfway to the next boundary.  */
1140       if ((offset & mask) >= ((mask + 1) / 2))
1141 	newoffset = (offset & ~ mask) + mask + 1;
1142       else
1143 	newoffset = (offset & ~ mask);
1144 
1145       /* If the newoffset will not fit in 14 bits (ldo), then
1146 	 handling this would take 4 or 5 instructions (2 to load
1147 	 the SYMBOL_REF + 1 or 2 to load the newoffset + 1 to
1148 	 add the new offset and the SYMBOL_REF.)  Combine cannot
1149 	 handle 4->2 or 5->2 combinations, so do not create
1150 	 them.  */
1151       if (! VAL_14_BITS_P (newoffset)
1152 	  && GET_CODE (XEXP (x, 0)) == SYMBOL_REF)
1153 	{
1154 	  rtx const_part = plus_constant (Pmode, XEXP (x, 0), newoffset);
1155 	  rtx tmp_reg
1156 	    = force_reg (Pmode,
1157 			 gen_rtx_HIGH (Pmode, const_part));
1158 	  ptr_reg
1159 	    = force_reg (Pmode,
1160 			 gen_rtx_LO_SUM (Pmode,
1161 					 tmp_reg, const_part));
1162 	}
1163       else
1164 	{
1165 	  if (! VAL_14_BITS_P (newoffset))
1166 	    int_part = force_reg (Pmode, GEN_INT (newoffset));
1167 	  else
1168 	    int_part = GEN_INT (newoffset);
1169 
1170 	  ptr_reg = force_reg (Pmode,
1171 			       gen_rtx_PLUS (Pmode,
1172 					     force_reg (Pmode, XEXP (x, 0)),
1173 					     int_part));
1174 	}
1175       return plus_constant (Pmode, ptr_reg, offset - newoffset);
1176     }
1177 
1178   /* Handle (plus (mult (a) (mem_shadd_constant)) (b)).  */
1179 
1180   if (GET_CODE (x) == PLUS
1181       && mem_shadd_or_shadd_rtx_p (XEXP (x, 0))
1182       && (OBJECT_P (XEXP (x, 1))
1183 	  || GET_CODE (XEXP (x, 1)) == SUBREG)
1184       && GET_CODE (XEXP (x, 1)) != CONST)
1185     {
1186       /* If we were given a MULT, we must fix the constant
1187 	 as we're going to create the ASHIFT form.  */
1188       int shift_val = INTVAL (XEXP (XEXP (x, 0), 1));
1189       if (GET_CODE (XEXP (x, 0)) == MULT)
1190 	shift_val = exact_log2 (shift_val);
1191 
1192       rtx reg1, reg2;
1193       reg1 = XEXP (x, 1);
1194       if (GET_CODE (reg1) != REG)
1195 	reg1 = force_reg (Pmode, force_operand (reg1, 0));
1196 
1197       reg2 = XEXP (XEXP (x, 0), 0);
1198       if (GET_CODE (reg2) != REG)
1199         reg2 = force_reg (Pmode, force_operand (reg2, 0));
1200 
1201       return force_reg (Pmode,
1202 			gen_rtx_PLUS (Pmode,
1203 				      gen_rtx_ASHIFT (Pmode, reg2,
1204 						      GEN_INT (shift_val)),
1205 				      reg1));
1206     }
1207 
1208   /* Similarly for (plus (plus (mult (a) (mem_shadd_constant)) (b)) (c)).
1209 
1210      Only do so for floating point modes since this is more speculative
1211      and we lose if it's an integer store.  */
1212   if (GET_CODE (x) == PLUS
1213       && GET_CODE (XEXP (x, 0)) == PLUS
1214       && mem_shadd_or_shadd_rtx_p (XEXP (XEXP (x, 0), 0))
1215       && (mode == SFmode || mode == DFmode))
1216     {
1217       int shift_val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
1218 
1219       /* If we were given a MULT, we must fix the constant
1220 	 as we're going to create the ASHIFT form.  */
1221       if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT)
1222 	shift_val = exact_log2 (shift_val);
1223 
1224       /* Try and figure out what to use as a base register.  */
1225       rtx reg1, reg2, base, idx;
1226 
1227       reg1 = XEXP (XEXP (x, 0), 1);
1228       reg2 = XEXP (x, 1);
1229       base = NULL_RTX;
1230       idx = NULL_RTX;
1231 
1232       /* Make sure they're both regs.  If one was a SYMBOL_REF [+ const],
1233 	 then pa_emit_move_sequence will turn on REG_POINTER so we'll know
1234 	 it's a base register below.  */
1235       if (GET_CODE (reg1) != REG)
1236 	reg1 = force_reg (Pmode, force_operand (reg1, 0));
1237 
1238       if (GET_CODE (reg2) != REG)
1239 	reg2 = force_reg (Pmode, force_operand (reg2, 0));
1240 
1241       /* Figure out what the base and index are.  */
1242 
1243       if (GET_CODE (reg1) == REG
1244 	  && REG_POINTER (reg1))
1245 	{
1246 	  base = reg1;
1247 	  idx = gen_rtx_PLUS (Pmode,
1248 			      gen_rtx_ASHIFT (Pmode,
1249 					      XEXP (XEXP (XEXP (x, 0), 0), 0),
1250 					      GEN_INT (shift_val)),
1251 			      XEXP (x, 1));
1252 	}
1253       else if (GET_CODE (reg2) == REG
1254 	       && REG_POINTER (reg2))
1255 	{
1256 	  base = reg2;
1257 	  idx = XEXP (x, 0);
1258 	}
1259 
1260       if (base == 0)
1261 	return orig;
1262 
1263       /* If the index adds a large constant, try to scale the
1264 	 constant so that it can be loaded with only one insn.  */
1265       if (GET_CODE (XEXP (idx, 1)) == CONST_INT
1266 	  && VAL_14_BITS_P (INTVAL (XEXP (idx, 1))
1267 			    / INTVAL (XEXP (XEXP (idx, 0), 1)))
1268 	  && INTVAL (XEXP (idx, 1)) % INTVAL (XEXP (XEXP (idx, 0), 1)) == 0)
1269 	{
1270 	  /* Divide the CONST_INT by the scale factor, then add it to A.  */
1271 	  int val = INTVAL (XEXP (idx, 1));
1272 	  val /= (1 << shift_val);
1273 
1274 	  reg1 = XEXP (XEXP (idx, 0), 0);
1275 	  if (GET_CODE (reg1) != REG)
1276 	    reg1 = force_reg (Pmode, force_operand (reg1, 0));
1277 
1278 	  reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, reg1, GEN_INT (val)));
1279 
1280 	  /* We can now generate a simple scaled indexed address.  */
1281 	  return
1282 	    force_reg
1283 	      (Pmode, gen_rtx_PLUS (Pmode,
1284 				    gen_rtx_ASHIFT (Pmode, reg1,
1285 						    GEN_INT (shift_val)),
1286 				    base));
1287 	}
1288 
1289       /* If B + C is still a valid base register, then add them.  */
1290       if (GET_CODE (XEXP (idx, 1)) == CONST_INT
1291 	  && INTVAL (XEXP (idx, 1)) <= 4096
1292 	  && INTVAL (XEXP (idx, 1)) >= -4096)
1293 	{
1294 	  rtx reg1, reg2;
1295 
1296 	  reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, XEXP (idx, 1)));
1297 
1298 	  reg2 = XEXP (XEXP (idx, 0), 0);
1299 	  if (GET_CODE (reg2) != CONST_INT)
1300 	    reg2 = force_reg (Pmode, force_operand (reg2, 0));
1301 
1302 	  return force_reg (Pmode,
1303 			    gen_rtx_PLUS (Pmode,
1304 					  gen_rtx_ASHIFT (Pmode, reg2,
1305 							  GEN_INT (shift_val)),
1306 					  reg1));
1307 	}
1308 
1309       /* Get the index into a register, then add the base + index and
1310 	 return a register holding the result.  */
1311 
1312       /* First get A into a register.  */
1313       reg1 = XEXP (XEXP (idx, 0), 0);
1314       if (GET_CODE (reg1) != REG)
1315 	reg1 = force_reg (Pmode, force_operand (reg1, 0));
1316 
1317       /* And get B into a register.  */
1318       reg2 = XEXP (idx, 1);
1319       if (GET_CODE (reg2) != REG)
1320 	reg2 = force_reg (Pmode, force_operand (reg2, 0));
1321 
1322       reg1 = force_reg (Pmode,
1323 			gen_rtx_PLUS (Pmode,
1324 				      gen_rtx_ASHIFT (Pmode, reg1,
1325 						      GEN_INT (shift_val)),
1326 				      reg2));
1327 
1328       /* Add the result to our base register and return.  */
1329       return force_reg (Pmode, gen_rtx_PLUS (Pmode, base, reg1));
1330 
1331     }
1332 
1333   /* Uh-oh.  We might have an address for x[n-100000].  This needs
1334      special handling to avoid creating an indexed memory address
1335      with x-100000 as the base.
1336 
1337      If the constant part is small enough, then it's still safe because
1338      there is a guard page at the beginning and end of the data segment.
1339 
1340      Scaled references are common enough that we want to try and rearrange the
1341      terms so that we can use indexing for these addresses too.  Only
1342      do the optimization for floatint point modes.  */
1343 
1344   if (GET_CODE (x) == PLUS
1345       && pa_symbolic_expression_p (XEXP (x, 1)))
1346     {
1347       /* Ugly.  We modify things here so that the address offset specified
1348 	 by the index expression is computed first, then added to x to form
1349 	 the entire address.  */
1350 
1351       rtx regx1, regx2, regy1, regy2, y;
1352 
1353       /* Strip off any CONST.  */
1354       y = XEXP (x, 1);
1355       if (GET_CODE (y) == CONST)
1356 	y = XEXP (y, 0);
1357 
1358       if (GET_CODE (y) == PLUS || GET_CODE (y) == MINUS)
1359 	{
1360 	  /* See if this looks like
1361 		(plus (mult (reg) (mem_shadd_const))
1362 		      (const (plus (symbol_ref) (const_int))))
1363 
1364 	     Where const_int is small.  In that case the const
1365 	     expression is a valid pointer for indexing.
1366 
1367 	     If const_int is big, but can be divided evenly by shadd_const
1368 	     and added to (reg).  This allows more scaled indexed addresses.  */
1369 	  if (GET_CODE (XEXP (y, 0)) == SYMBOL_REF
1370 	      && mem_shadd_or_shadd_rtx_p (XEXP (x, 0))
1371 	      && GET_CODE (XEXP (y, 1)) == CONST_INT
1372 	      && INTVAL (XEXP (y, 1)) >= -4096
1373 	      && INTVAL (XEXP (y, 1)) <= 4095)
1374 	    {
1375 	      int shift_val = INTVAL (XEXP (XEXP (x, 0), 1));
1376 
1377 	      /* If we were given a MULT, we must fix the constant
1378 		 as we're going to create the ASHIFT form.  */
1379 	      if (GET_CODE (XEXP (x, 0)) == MULT)
1380 		shift_val = exact_log2 (shift_val);
1381 
1382 	      rtx reg1, reg2;
1383 
1384 	      reg1 = XEXP (x, 1);
1385 	      if (GET_CODE (reg1) != REG)
1386 		reg1 = force_reg (Pmode, force_operand (reg1, 0));
1387 
1388 	      reg2 = XEXP (XEXP (x, 0), 0);
1389 	      if (GET_CODE (reg2) != REG)
1390 	        reg2 = force_reg (Pmode, force_operand (reg2, 0));
1391 
1392 	      return
1393 		force_reg (Pmode,
1394 			   gen_rtx_PLUS (Pmode,
1395 					 gen_rtx_ASHIFT (Pmode,
1396 							 reg2,
1397 							 GEN_INT (shift_val)),
1398 					 reg1));
1399 	    }
1400 	  else if ((mode == DFmode || mode == SFmode)
1401 		   && GET_CODE (XEXP (y, 0)) == SYMBOL_REF
1402 		   && mem_shadd_or_shadd_rtx_p (XEXP (x, 0))
1403 		   && GET_CODE (XEXP (y, 1)) == CONST_INT
1404 		   && INTVAL (XEXP (y, 1)) % (1 << INTVAL (XEXP (XEXP (x, 0), 1))) == 0)
1405 	    {
1406 	      int shift_val = INTVAL (XEXP (XEXP (x, 0), 1));
1407 
1408 	      /* If we were given a MULT, we must fix the constant
1409 		 as we're going to create the ASHIFT form.  */
1410 	      if (GET_CODE (XEXP (x, 0)) == MULT)
1411 		shift_val = exact_log2 (shift_val);
1412 
1413 	      regx1
1414 		= force_reg (Pmode, GEN_INT (INTVAL (XEXP (y, 1))
1415 					     / INTVAL (XEXP (XEXP (x, 0), 1))));
1416 	      regx2 = XEXP (XEXP (x, 0), 0);
1417 	      if (GET_CODE (regx2) != REG)
1418 		regx2 = force_reg (Pmode, force_operand (regx2, 0));
1419 	      regx2 = force_reg (Pmode, gen_rtx_fmt_ee (GET_CODE (y), Pmode,
1420 							regx2, regx1));
1421 	      return
1422 		force_reg (Pmode,
1423 			   gen_rtx_PLUS (Pmode,
1424 					 gen_rtx_ASHIFT (Pmode, regx2,
1425 						         GEN_INT (shift_val)),
1426 					 force_reg (Pmode, XEXP (y, 0))));
1427 	    }
1428 	  else if (GET_CODE (XEXP (y, 1)) == CONST_INT
1429 		   && INTVAL (XEXP (y, 1)) >= -4096
1430 		   && INTVAL (XEXP (y, 1)) <= 4095)
1431 	    {
1432 	      /* This is safe because of the guard page at the
1433 		 beginning and end of the data space.  Just
1434 		 return the original address.  */
1435 	      return orig;
1436 	    }
1437 	  else
1438 	    {
1439 	      /* Doesn't look like one we can optimize.  */
1440 	      regx1 = force_reg (Pmode, force_operand (XEXP (x, 0), 0));
1441 	      regy1 = force_reg (Pmode, force_operand (XEXP (y, 0), 0));
1442 	      regy2 = force_reg (Pmode, force_operand (XEXP (y, 1), 0));
1443 	      regx1 = force_reg (Pmode,
1444 				 gen_rtx_fmt_ee (GET_CODE (y), Pmode,
1445 						 regx1, regy2));
1446 	      return force_reg (Pmode, gen_rtx_PLUS (Pmode, regx1, regy1));
1447 	    }
1448 	}
1449     }
1450 
1451   return orig;
1452 }
1453 
1454 /* Implement the TARGET_REGISTER_MOVE_COST hook.
1455 
1456    Compute extra cost of moving data between one register class
1457    and another.
1458 
1459    Make moves from SAR so expensive they should never happen.  We used to
1460    have 0xffff here, but that generates overflow in rare cases.
1461 
1462    Copies involving a FP register and a non-FP register are relatively
1463    expensive because they must go through memory.
1464 
1465    Other copies are reasonably cheap.  */
1466 
1467 static int
hppa_register_move_cost(machine_mode mode ATTRIBUTE_UNUSED,reg_class_t from,reg_class_t to)1468 hppa_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
1469 			 reg_class_t from, reg_class_t to)
1470 {
1471   if (from == SHIFT_REGS)
1472     return 0x100;
1473   else if (to == SHIFT_REGS && FP_REG_CLASS_P (from))
1474     return 18;
1475   else if ((FP_REG_CLASS_P (from) && ! FP_REG_CLASS_P (to))
1476            || (FP_REG_CLASS_P (to) && ! FP_REG_CLASS_P (from)))
1477     return 16;
1478   else
1479     return 2;
1480 }
1481 
1482 /* For the HPPA, REG and REG+CONST is cost 0
1483    and addresses involving symbolic constants are cost 2.
1484 
1485    PIC addresses are very expensive.
1486 
1487    It is no coincidence that this has the same structure
1488    as pa_legitimate_address_p.  */
1489 
1490 static int
hppa_address_cost(rtx X,machine_mode mode ATTRIBUTE_UNUSED,addr_space_t as ATTRIBUTE_UNUSED,bool speed ATTRIBUTE_UNUSED)1491 hppa_address_cost (rtx X, machine_mode mode ATTRIBUTE_UNUSED,
1492 		   addr_space_t as ATTRIBUTE_UNUSED,
1493 		   bool speed ATTRIBUTE_UNUSED)
1494 {
1495   switch (GET_CODE (X))
1496     {
1497     case REG:
1498     case PLUS:
1499     case LO_SUM:
1500       return 1;
1501     case HIGH:
1502       return 2;
1503     default:
1504       return 4;
1505     }
1506 }
1507 
1508 /* Return true if X represents a (possibly non-canonical) shNadd pattern.
1509    The machine mode of X is known to be SImode or DImode.  */
1510 
1511 static bool
hppa_rtx_costs_shadd_p(rtx x)1512 hppa_rtx_costs_shadd_p (rtx x)
1513 {
1514   if (GET_CODE (x) != PLUS
1515       || !REG_P (XEXP (x, 1)))
1516     return false;
1517   rtx op0 = XEXP (x, 0);
1518   if (GET_CODE (op0) == ASHIFT
1519       && CONST_INT_P (XEXP (op0, 1))
1520       && REG_P (XEXP (op0, 0)))
1521     {
1522       unsigned HOST_WIDE_INT x = UINTVAL (XEXP (op0, 1));
1523       return x == 1 || x == 2 || x == 3;
1524     }
1525   if (GET_CODE (op0) == MULT
1526       && CONST_INT_P (XEXP (op0, 1))
1527       && REG_P (XEXP (op0, 0)))
1528     {
1529       unsigned HOST_WIDE_INT x = UINTVAL (XEXP (op0, 1));
1530       return x == 2 || x == 4 || x == 8;
1531     }
1532   return false;
1533 }
1534 
1535 /* Compute a (partial) cost for rtx X.  Return true if the complete
1536    cost has been computed, and false if subexpressions should be
1537    scanned.  In either case, *TOTAL contains the cost result.  */
1538 
1539 static bool
hppa_rtx_costs(rtx x,machine_mode mode,int outer_code,int opno ATTRIBUTE_UNUSED,int * total,bool speed)1540 hppa_rtx_costs (rtx x, machine_mode mode, int outer_code,
1541 		int opno ATTRIBUTE_UNUSED,
1542 		int *total, bool speed)
1543 {
1544   int code = GET_CODE (x);
1545 
1546   switch (code)
1547     {
1548     case CONST_INT:
1549       if (outer_code == SET)
1550 	*total = COSTS_N_INSNS (1);
1551       else if (INTVAL (x) == 0)
1552 	*total = 0;
1553       else if (INT_14_BITS (x))
1554 	*total = 1;
1555       else
1556 	*total = 2;
1557       return true;
1558 
1559     case HIGH:
1560       *total = 2;
1561       return true;
1562 
1563     case CONST:
1564     case LABEL_REF:
1565     case SYMBOL_REF:
1566       *total = 4;
1567       return true;
1568 
1569     case CONST_DOUBLE:
1570       if ((x == CONST0_RTX (DFmode) || x == CONST0_RTX (SFmode))
1571 	  && outer_code != SET)
1572 	*total = 0;
1573       else
1574 	*total = 8;
1575       return true;
1576 
1577     case MULT:
1578       if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1579 	{
1580 	  *total = COSTS_N_INSNS (3);
1581 	}
1582       else if (mode == DImode)
1583 	{
1584 	  if (TARGET_PA_11 && !TARGET_SOFT_FLOAT && !TARGET_SOFT_MULT)
1585 	    *total = COSTS_N_INSNS (25);
1586 	  else
1587 	    *total = COSTS_N_INSNS (80);
1588 	}
1589       else
1590 	{
1591 	  if (TARGET_PA_11 && !TARGET_SOFT_FLOAT && !TARGET_SOFT_MULT)
1592 	    *total = COSTS_N_INSNS (8);
1593 	  else
1594 	    *total = COSTS_N_INSNS (20);
1595 	}
1596       return REG_P (XEXP (x, 0)) && REG_P (XEXP (x, 1));
1597 
1598     case DIV:
1599       if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1600 	{
1601 	  *total = COSTS_N_INSNS (14);
1602 	  return false;
1603 	}
1604       /* FALLTHRU */
1605 
1606     case UDIV:
1607     case MOD:
1608     case UMOD:
1609       /* A mode size N times larger than SImode needs O(N*N) more insns.  */
1610       if (mode == DImode)
1611 	*total = COSTS_N_INSNS (240);
1612       else
1613 	*total = COSTS_N_INSNS (60);
1614       return REG_P (XEXP (x, 0)) && REG_P (XEXP (x, 1));
1615 
1616     case PLUS: /* this includes shNadd insns */
1617     case MINUS:
1618       if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1619 	*total = COSTS_N_INSNS (3);
1620       else if (mode == DImode)
1621 	{
1622 	  if (TARGET_64BIT)
1623 	    {
1624 	      *total = COSTS_N_INSNS (1);
1625 	      /* Handle shladd,l instructions.  */
1626 	      if (hppa_rtx_costs_shadd_p (x))
1627 		return true;
1628 	    }
1629 	  else
1630 	    *total = COSTS_N_INSNS (2);
1631 	}
1632       else
1633 	{
1634 	  *total = COSTS_N_INSNS (1);
1635 	  /* Handle shNadd instructions.  */
1636 	  if (hppa_rtx_costs_shadd_p (x))
1637 	    return true;
1638 	}
1639       return REG_P (XEXP (x, 0))
1640 	     && (REG_P (XEXP (x, 1))
1641 		 || CONST_INT_P (XEXP (x, 1)));
1642 
1643     case ASHIFT:
1644       if (mode == DImode)
1645 	{
1646 	  if (REG_P (XEXP (x, 0)) && CONST_INT_P (XEXP (x, 1)))
1647 	    {
1648 	      if (TARGET_64BIT)
1649 		*total = COSTS_N_INSNS (1);
1650 	      else
1651 		*total = COSTS_N_INSNS (2);
1652 	      return true;
1653 	    }
1654 	  else if (TARGET_64BIT)
1655 	    *total = COSTS_N_INSNS (3);
1656 	  else if (speed)
1657 	    *total = COSTS_N_INSNS (13);
1658 	  else
1659 	    *total = COSTS_N_INSNS (18);
1660 	}
1661       else if (REG_P (XEXP (x, 0)) && CONST_INT_P (XEXP (x, 1)))
1662 	{
1663 	  if (TARGET_64BIT)
1664 	    *total = COSTS_N_INSNS (2);
1665 	  else
1666 	    *total = COSTS_N_INSNS (1);
1667 	  return true;
1668 	}
1669       else if (TARGET_64BIT)
1670 	*total = COSTS_N_INSNS (4);
1671       else
1672 	*total = COSTS_N_INSNS (2);
1673       return REG_P (XEXP (x, 0))
1674 	     && (REG_P (XEXP (x, 1))
1675 		 || CONST_INT_P (XEXP (x, 1)));
1676 
1677     case ASHIFTRT:
1678       if (mode == DImode)
1679 	{
1680 	  if (REG_P (XEXP (x, 0)) && CONST_INT_P (XEXP (x, 1)))
1681 	    {
1682 	      if (TARGET_64BIT)
1683 		*total = COSTS_N_INSNS (1);
1684 	      else
1685 		*total = COSTS_N_INSNS (2);
1686 	      return true;
1687 	    }
1688 	  else if (TARGET_64BIT)
1689 	    *total = COSTS_N_INSNS (3);
1690 	  else if (speed)
1691 	    *total = COSTS_N_INSNS (14);
1692 	  else
1693 	    *total = COSTS_N_INSNS (19);
1694 	}
1695       else if (REG_P (XEXP (x, 0)) && CONST_INT_P (XEXP (x, 1)))
1696 	{
1697 	  if (TARGET_64BIT)
1698 	    *total = COSTS_N_INSNS (2);
1699 	  else
1700 	    *total = COSTS_N_INSNS (1);
1701 	  return true;
1702 	}
1703       else if (TARGET_64BIT)
1704 	*total = COSTS_N_INSNS (4);
1705       else
1706 	*total = COSTS_N_INSNS (2);
1707       return REG_P (XEXP (x, 0))
1708 	     && (REG_P (XEXP (x, 1))
1709 		 || CONST_INT_P (XEXP (x, 1)));
1710 
1711     case LSHIFTRT:
1712       if (mode == DImode)
1713 	{
1714 	  if (REG_P (XEXP (x, 0)) && CONST_INT_P (XEXP (x, 1)))
1715 	    {
1716 	      if (TARGET_64BIT)
1717 		*total = COSTS_N_INSNS (1);
1718 	      else
1719 		*total = COSTS_N_INSNS (2);
1720 	      return true;
1721 	    }
1722 	  else if (TARGET_64BIT)
1723 	    *total = COSTS_N_INSNS (2);
1724 	  else if (speed)
1725 	    *total = COSTS_N_INSNS (12);
1726 	  else
1727 	    *total = COSTS_N_INSNS (15);
1728 	}
1729       else if (REG_P (XEXP (x, 0)) && CONST_INT_P (XEXP (x, 1)))
1730 	{
1731 	  *total = COSTS_N_INSNS (1);
1732 	  return true;
1733 	}
1734       else if (TARGET_64BIT)
1735 	*total = COSTS_N_INSNS (3);
1736       else
1737 	*total = COSTS_N_INSNS (2);
1738       return REG_P (XEXP (x, 0))
1739 	     && (REG_P (XEXP (x, 1))
1740 		 || CONST_INT_P (XEXP (x, 1)));
1741 
1742     default:
1743       return false;
1744     }
1745 }
1746 
1747 /* Ensure mode of ORIG, a REG rtx, is MODE.  Returns either ORIG or a
1748    new rtx with the correct mode.  */
1749 static inline rtx
force_mode(machine_mode mode,rtx orig)1750 force_mode (machine_mode mode, rtx orig)
1751 {
1752   if (mode == GET_MODE (orig))
1753     return orig;
1754 
1755   gcc_assert (REGNO (orig) < FIRST_PSEUDO_REGISTER);
1756 
1757   return gen_rtx_REG (mode, REGNO (orig));
1758 }
1759 
1760 /* Implement TARGET_CANNOT_FORCE_CONST_MEM.  */
1761 
1762 static bool
pa_cannot_force_const_mem(machine_mode mode ATTRIBUTE_UNUSED,rtx x)1763 pa_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
1764 {
1765   return tls_referenced_p (x);
1766 }
1767 
1768 /* Emit insns to move operands[1] into operands[0].
1769 
1770    Return 1 if we have written out everything that needs to be done to
1771    do the move.  Otherwise, return 0 and the caller will emit the move
1772    normally.
1773 
1774    Note SCRATCH_REG may not be in the proper mode depending on how it
1775    will be used.  This routine is responsible for creating a new copy
1776    of SCRATCH_REG in the proper mode.  */
1777 
1778 int
pa_emit_move_sequence(rtx * operands,machine_mode mode,rtx scratch_reg)1779 pa_emit_move_sequence (rtx *operands, machine_mode mode, rtx scratch_reg)
1780 {
1781   rtx operand0 = operands[0];
1782   rtx operand1 = operands[1];
1783   rtx tem;
1784 
1785   /* We can only handle indexed addresses in the destination operand
1786      of floating point stores.  Thus, we need to break out indexed
1787      addresses from the destination operand.  */
1788   if (GET_CODE (operand0) == MEM && IS_INDEX_ADDR_P (XEXP (operand0, 0)))
1789     {
1790       gcc_assert (can_create_pseudo_p ());
1791 
1792       tem = copy_to_mode_reg (Pmode, XEXP (operand0, 0));
1793       operand0 = replace_equiv_address (operand0, tem);
1794     }
1795 
1796   /* On targets with non-equivalent space registers, break out unscaled
1797      indexed addresses from the source operand before the final CSE.
1798      We have to do this because the REG_POINTER flag is not correctly
1799      carried through various optimization passes and CSE may substitute
1800      a pseudo without the pointer set for one with the pointer set.  As
1801      a result, we loose various opportunities to create insns with
1802      unscaled indexed addresses.  */
1803   if (!TARGET_NO_SPACE_REGS
1804       && !cse_not_expected
1805       && GET_CODE (operand1) == MEM
1806       && GET_CODE (XEXP (operand1, 0)) == PLUS
1807       && REG_P (XEXP (XEXP (operand1, 0), 0))
1808       && REG_P (XEXP (XEXP (operand1, 0), 1)))
1809     operand1
1810       = replace_equiv_address (operand1,
1811 			       copy_to_mode_reg (Pmode, XEXP (operand1, 0)));
1812 
1813   if (scratch_reg
1814       && reload_in_progress && GET_CODE (operand0) == REG
1815       && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
1816     operand0 = reg_equiv_mem (REGNO (operand0));
1817   else if (scratch_reg
1818 	   && reload_in_progress && GET_CODE (operand0) == SUBREG
1819 	   && GET_CODE (SUBREG_REG (operand0)) == REG
1820 	   && REGNO (SUBREG_REG (operand0)) >= FIRST_PSEUDO_REGISTER)
1821     {
1822      /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1823 	the code which tracks sets/uses for delete_output_reload.  */
1824       rtx temp = gen_rtx_SUBREG (GET_MODE (operand0),
1825 				 reg_equiv_mem (REGNO (SUBREG_REG (operand0))),
1826 				 SUBREG_BYTE (operand0));
1827       operand0 = alter_subreg (&temp, true);
1828     }
1829 
1830   if (scratch_reg
1831       && reload_in_progress && GET_CODE (operand1) == REG
1832       && REGNO (operand1) >= FIRST_PSEUDO_REGISTER)
1833     operand1 = reg_equiv_mem (REGNO (operand1));
1834   else if (scratch_reg
1835 	   && reload_in_progress && GET_CODE (operand1) == SUBREG
1836 	   && GET_CODE (SUBREG_REG (operand1)) == REG
1837 	   && REGNO (SUBREG_REG (operand1)) >= FIRST_PSEUDO_REGISTER)
1838     {
1839      /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1840 	the code which tracks sets/uses for delete_output_reload.  */
1841       rtx temp = gen_rtx_SUBREG (GET_MODE (operand1),
1842 				 reg_equiv_mem (REGNO (SUBREG_REG (operand1))),
1843 				 SUBREG_BYTE (operand1));
1844       operand1 = alter_subreg (&temp, true);
1845     }
1846 
1847   if (scratch_reg && reload_in_progress && GET_CODE (operand0) == MEM
1848       && ((tem = find_replacement (&XEXP (operand0, 0)))
1849 	  != XEXP (operand0, 0)))
1850     operand0 = replace_equiv_address (operand0, tem);
1851 
1852   if (scratch_reg && reload_in_progress && GET_CODE (operand1) == MEM
1853       && ((tem = find_replacement (&XEXP (operand1, 0)))
1854 	  != XEXP (operand1, 0)))
1855     operand1 = replace_equiv_address (operand1, tem);
1856 
1857   /* Handle secondary reloads for loads/stores of FP registers from
1858      REG+D addresses where D does not fit in 5 or 14 bits, including
1859      (subreg (mem (addr))) cases, and reloads for other unsupported
1860      memory operands.  */
1861   if (scratch_reg
1862       && FP_REG_P (operand0)
1863       && (MEM_P (operand1)
1864 	  || (GET_CODE (operand1) == SUBREG
1865 	      && MEM_P (XEXP (operand1, 0)))))
1866     {
1867       rtx op1 = operand1;
1868 
1869       if (GET_CODE (op1) == SUBREG)
1870 	op1 = XEXP (op1, 0);
1871 
1872       if (reg_plus_base_memory_operand (op1, GET_MODE (op1)))
1873 	{
1874 	  if (!(TARGET_PA_20
1875 		&& !TARGET_ELF32
1876 		&& INT_14_BITS (XEXP (XEXP (op1, 0), 1)))
1877 	      && !INT_5_BITS (XEXP (XEXP (op1, 0), 1)))
1878 	    {
1879 	      /* SCRATCH_REG will hold an address and maybe the actual data.
1880 		 We want it in WORD_MODE regardless of what mode it was
1881 		 originally given to us.  */
1882 	      scratch_reg = force_mode (word_mode, scratch_reg);
1883 
1884 	      /* D might not fit in 14 bits either; for such cases load D
1885 		 into scratch reg.  */
1886 	      if (!INT_14_BITS (XEXP (XEXP (op1, 0), 1)))
1887 		{
1888 		  emit_move_insn (scratch_reg, XEXP (XEXP (op1, 0), 1));
1889 		  emit_move_insn (scratch_reg,
1890 				  gen_rtx_fmt_ee (GET_CODE (XEXP (op1, 0)),
1891 						  Pmode,
1892 						  XEXP (XEXP (op1, 0), 0),
1893 						  scratch_reg));
1894 		}
1895 	      else
1896 		emit_move_insn (scratch_reg, XEXP (op1, 0));
1897 	      op1 = replace_equiv_address (op1, scratch_reg);
1898 	    }
1899 	}
1900       else if ((!INT14_OK_STRICT && symbolic_memory_operand (op1, VOIDmode))
1901 	       || IS_LO_SUM_DLT_ADDR_P (XEXP (op1, 0))
1902 	       || IS_INDEX_ADDR_P (XEXP (op1, 0)))
1903 	{
1904 	  /* Load memory address into SCRATCH_REG.  */
1905 	  scratch_reg = force_mode (word_mode, scratch_reg);
1906 	  emit_move_insn (scratch_reg, XEXP (op1, 0));
1907 	  op1 = replace_equiv_address (op1, scratch_reg);
1908 	}
1909       emit_insn (gen_rtx_SET (operand0, op1));
1910       return 1;
1911     }
1912   else if (scratch_reg
1913 	   && FP_REG_P (operand1)
1914 	   && (MEM_P (operand0)
1915 	       || (GET_CODE (operand0) == SUBREG
1916 		   && MEM_P (XEXP (operand0, 0)))))
1917     {
1918       rtx op0 = operand0;
1919 
1920       if (GET_CODE (op0) == SUBREG)
1921 	op0 = XEXP (op0, 0);
1922 
1923       if (reg_plus_base_memory_operand (op0, GET_MODE (op0)))
1924 	{
1925 	  if (!(TARGET_PA_20
1926 		&& !TARGET_ELF32
1927 		&& INT_14_BITS (XEXP (XEXP (op0, 0), 1)))
1928 	      && !INT_5_BITS (XEXP (XEXP (op0, 0), 1)))
1929 	    {
1930 	      /* SCRATCH_REG will hold an address and maybe the actual data.
1931 		 We want it in WORD_MODE regardless of what mode it was
1932 		 originally given to us.  */
1933 	      scratch_reg = force_mode (word_mode, scratch_reg);
1934 
1935 	      /* D might not fit in 14 bits either; for such cases load D
1936 		 into scratch reg.  */
1937 	      if (!INT_14_BITS (XEXP (XEXP (op0, 0), 1)))
1938 		{
1939 		  emit_move_insn (scratch_reg, XEXP (XEXP (op0, 0), 1));
1940 		  emit_move_insn (scratch_reg,
1941 				  gen_rtx_fmt_ee (GET_CODE (XEXP (op0, 0)),
1942 						  Pmode,
1943 						  XEXP (XEXP (op0, 0), 0),
1944 						  scratch_reg));
1945 		}
1946 	      else
1947 		emit_move_insn (scratch_reg, XEXP (op0, 0));
1948 	      op0 = replace_equiv_address (op0, scratch_reg);
1949 	    }
1950 	}
1951       else if ((!INT14_OK_STRICT && symbolic_memory_operand (op0, VOIDmode))
1952 	       || IS_LO_SUM_DLT_ADDR_P (XEXP (op0, 0))
1953 	       || IS_INDEX_ADDR_P (XEXP (op0, 0)))
1954 	{
1955 	  /* Load memory address into SCRATCH_REG.  */
1956 	  scratch_reg = force_mode (word_mode, scratch_reg);
1957 	  emit_move_insn (scratch_reg, XEXP (op0, 0));
1958 	  op0 = replace_equiv_address (op0, scratch_reg);
1959 	}
1960       emit_insn (gen_rtx_SET (op0, operand1));
1961       return 1;
1962     }
1963   /* Handle secondary reloads for loads of FP registers from constant
1964      expressions by forcing the constant into memory.  For the most part,
1965      this is only necessary for SImode and DImode.
1966 
1967      Use scratch_reg to hold the address of the memory location.  */
1968   else if (scratch_reg
1969 	   && CONSTANT_P (operand1)
1970 	   && FP_REG_P (operand0))
1971     {
1972       rtx const_mem, xoperands[2];
1973 
1974       if (operand1 == CONST0_RTX (mode))
1975 	{
1976 	  emit_insn (gen_rtx_SET (operand0, operand1));
1977 	  return 1;
1978 	}
1979 
1980       /* SCRATCH_REG will hold an address and maybe the actual data.  We want
1981 	 it in WORD_MODE regardless of what mode it was originally given
1982 	 to us.  */
1983       scratch_reg = force_mode (word_mode, scratch_reg);
1984 
1985       /* Force the constant into memory and put the address of the
1986 	 memory location into scratch_reg.  */
1987       const_mem = force_const_mem (mode, operand1);
1988       xoperands[0] = scratch_reg;
1989       xoperands[1] = XEXP (const_mem, 0);
1990       pa_emit_move_sequence (xoperands, Pmode, 0);
1991 
1992       /* Now load the destination register.  */
1993       emit_insn (gen_rtx_SET (operand0,
1994 			      replace_equiv_address (const_mem, scratch_reg)));
1995       return 1;
1996     }
1997   /* Handle secondary reloads for SAR.  These occur when trying to load
1998      the SAR from memory or a constant.  */
1999   else if (scratch_reg
2000 	   && GET_CODE (operand0) == REG
2001 	   && REGNO (operand0) < FIRST_PSEUDO_REGISTER
2002 	   && REGNO_REG_CLASS (REGNO (operand0)) == SHIFT_REGS
2003 	   && (GET_CODE (operand1) == MEM || GET_CODE (operand1) == CONST_INT))
2004     {
2005       /* D might not fit in 14 bits either; for such cases load D into
2006 	 scratch reg.  */
2007       if (GET_CODE (operand1) == MEM
2008 	  && !memory_address_p (GET_MODE (operand0), XEXP (operand1, 0)))
2009 	{
2010 	  /* We are reloading the address into the scratch register, so we
2011 	     want to make sure the scratch register is a full register.  */
2012 	  scratch_reg = force_mode (word_mode, scratch_reg);
2013 
2014 	  emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1));
2015 	  emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand1,
2016 								        0)),
2017 						       Pmode,
2018 						       XEXP (XEXP (operand1, 0),
2019 						       0),
2020 						       scratch_reg));
2021 
2022 	  /* Now we are going to load the scratch register from memory,
2023 	     we want to load it in the same width as the original MEM,
2024 	     which must be the same as the width of the ultimate destination,
2025 	     OPERAND0.  */
2026 	  scratch_reg = force_mode (GET_MODE (operand0), scratch_reg);
2027 
2028 	  emit_move_insn (scratch_reg,
2029 			  replace_equiv_address (operand1, scratch_reg));
2030 	}
2031       else
2032 	{
2033 	  /* We want to load the scratch register using the same mode as
2034 	     the ultimate destination.  */
2035 	  scratch_reg = force_mode (GET_MODE (operand0), scratch_reg);
2036 
2037 	  emit_move_insn (scratch_reg, operand1);
2038 	}
2039 
2040       /* And emit the insn to set the ultimate destination.  We know that
2041 	 the scratch register has the same mode as the destination at this
2042 	 point.  */
2043       emit_move_insn (operand0, scratch_reg);
2044       return 1;
2045     }
2046 
2047   /* Handle the most common case: storing into a register.  */
2048   if (register_operand (operand0, mode))
2049     {
2050       /* Legitimize TLS symbol references.  This happens for references
2051 	 that aren't a legitimate constant.  */
2052       if (PA_SYMBOL_REF_TLS_P (operand1))
2053 	operand1 = legitimize_tls_address (operand1);
2054 
2055       if (register_operand (operand1, mode)
2056 	  || (GET_CODE (operand1) == CONST_INT
2057 	      && pa_cint_ok_for_move (UINTVAL (operand1)))
2058 	  || (operand1 == CONST0_RTX (mode))
2059 	  || (GET_CODE (operand1) == HIGH
2060 	      && !symbolic_operand (XEXP (operand1, 0), VOIDmode))
2061 	  /* Only `general_operands' can come here, so MEM is ok.  */
2062 	  || GET_CODE (operand1) == MEM)
2063 	{
2064 	  /* Various sets are created during RTL generation which don't
2065 	     have the REG_POINTER flag correctly set.  After the CSE pass,
2066 	     instruction recognition can fail if we don't consistently
2067 	     set this flag when performing register copies.  This should
2068 	     also improve the opportunities for creating insns that use
2069 	     unscaled indexing.  */
2070 	  if (REG_P (operand0) && REG_P (operand1))
2071 	    {
2072 	      if (REG_POINTER (operand1)
2073 		  && !REG_POINTER (operand0)
2074 		  && !HARD_REGISTER_P (operand0))
2075 		copy_reg_pointer (operand0, operand1);
2076 	    }
2077 
2078 	  /* When MEMs are broken out, the REG_POINTER flag doesn't
2079 	     get set.  In some cases, we can set the REG_POINTER flag
2080 	     from the declaration for the MEM.  */
2081 	  if (REG_P (operand0)
2082 	      && GET_CODE (operand1) == MEM
2083 	      && !REG_POINTER (operand0))
2084 	    {
2085 	      tree decl = MEM_EXPR (operand1);
2086 
2087 	      /* Set the register pointer flag and register alignment
2088 		 if the declaration for this memory reference is a
2089 		 pointer type.  */
2090 	      if (decl)
2091 		{
2092 		  tree type;
2093 
2094 		  /* If this is a COMPONENT_REF, use the FIELD_DECL from
2095 		     tree operand 1.  */
2096 		  if (TREE_CODE (decl) == COMPONENT_REF)
2097 		    decl = TREE_OPERAND (decl, 1);
2098 
2099 		  type = TREE_TYPE (decl);
2100 		  type = strip_array_types (type);
2101 
2102 		  if (POINTER_TYPE_P (type))
2103 		    mark_reg_pointer (operand0, BITS_PER_UNIT);
2104 		}
2105 	    }
2106 
2107 	  emit_insn (gen_rtx_SET (operand0, operand1));
2108 	  return 1;
2109 	}
2110     }
2111   else if (GET_CODE (operand0) == MEM)
2112     {
2113       if (mode == DFmode && operand1 == CONST0_RTX (mode)
2114 	  && !(reload_in_progress || reload_completed))
2115 	{
2116 	  rtx temp = gen_reg_rtx (DFmode);
2117 
2118 	  emit_insn (gen_rtx_SET (temp, operand1));
2119 	  emit_insn (gen_rtx_SET (operand0, temp));
2120 	  return 1;
2121 	}
2122       if (register_operand (operand1, mode) || operand1 == CONST0_RTX (mode))
2123 	{
2124 	  /* Run this case quickly.  */
2125 	  emit_insn (gen_rtx_SET (operand0, operand1));
2126 	  return 1;
2127 	}
2128       if (! (reload_in_progress || reload_completed))
2129 	{
2130 	  operands[0] = validize_mem (operand0);
2131 	  operands[1] = operand1 = force_reg (mode, operand1);
2132 	}
2133     }
2134 
2135   /* Simplify the source if we need to.
2136      Note we do have to handle function labels here, even though we do
2137      not consider them legitimate constants.  Loop optimizations can
2138      call the emit_move_xxx with one as a source.  */
2139   if ((GET_CODE (operand1) != HIGH && immediate_operand (operand1, mode))
2140       || (GET_CODE (operand1) == HIGH
2141 	  && symbolic_operand (XEXP (operand1, 0), mode))
2142       || function_label_operand (operand1, VOIDmode)
2143       || tls_referenced_p (operand1))
2144     {
2145       int ishighonly = 0;
2146 
2147       if (GET_CODE (operand1) == HIGH)
2148 	{
2149 	  ishighonly = 1;
2150 	  operand1 = XEXP (operand1, 0);
2151 	}
2152       if (symbolic_operand (operand1, mode))
2153 	{
2154 	  /* Argh.  The assembler and linker can't handle arithmetic
2155 	     involving plabels.
2156 
2157 	     So we force the plabel into memory, load operand0 from
2158 	     the memory location, then add in the constant part.  */
2159 	  if ((GET_CODE (operand1) == CONST
2160 	       && GET_CODE (XEXP (operand1, 0)) == PLUS
2161 	       && function_label_operand (XEXP (XEXP (operand1, 0), 0),
2162 					  VOIDmode))
2163 	      || function_label_operand (operand1, VOIDmode))
2164 	    {
2165 	      rtx temp, const_part;
2166 
2167 	      /* Figure out what (if any) scratch register to use.  */
2168 	      if (reload_in_progress || reload_completed)
2169 		{
2170 		  scratch_reg = scratch_reg ? scratch_reg : operand0;
2171 		  /* SCRATCH_REG will hold an address and maybe the actual
2172 		     data.  We want it in WORD_MODE regardless of what mode it
2173 		     was originally given to us.  */
2174 		  scratch_reg = force_mode (word_mode, scratch_reg);
2175 		}
2176 	      else if (flag_pic)
2177 		scratch_reg = gen_reg_rtx (Pmode);
2178 
2179 	      if (GET_CODE (operand1) == CONST)
2180 		{
2181 		  /* Save away the constant part of the expression.  */
2182 		  const_part = XEXP (XEXP (operand1, 0), 1);
2183 		  gcc_assert (GET_CODE (const_part) == CONST_INT);
2184 
2185 		  /* Force the function label into memory.  */
2186 		  temp = force_const_mem (mode, XEXP (XEXP (operand1, 0), 0));
2187 		}
2188 	      else
2189 		{
2190 		  /* No constant part.  */
2191 		  const_part = NULL_RTX;
2192 
2193 		  /* Force the function label into memory.  */
2194 		  temp = force_const_mem (mode, operand1);
2195 		}
2196 
2197 
2198 	      /* Get the address of the memory location.  PIC-ify it if
2199 		 necessary.  */
2200 	      temp = XEXP (temp, 0);
2201 	      if (flag_pic)
2202 		temp = legitimize_pic_address (temp, mode, scratch_reg);
2203 
2204 	      /* Put the address of the memory location into our destination
2205 		 register.  */
2206 	      operands[1] = temp;
2207 	      pa_emit_move_sequence (operands, mode, scratch_reg);
2208 
2209 	      /* Now load from the memory location into our destination
2210 		 register.  */
2211 	      operands[1] = gen_rtx_MEM (Pmode, operands[0]);
2212 	      pa_emit_move_sequence (operands, mode, scratch_reg);
2213 
2214 	      /* And add back in the constant part.  */
2215 	      if (const_part != NULL_RTX)
2216 		expand_inc (operand0, const_part);
2217 
2218 	      return 1;
2219 	    }
2220 
2221 	  if (flag_pic)
2222 	    {
2223 	      rtx_insn *insn;
2224 	      rtx temp;
2225 
2226 	      if (reload_in_progress || reload_completed)
2227 		{
2228 		  temp = scratch_reg ? scratch_reg : operand0;
2229 		  /* TEMP will hold an address and maybe the actual
2230 		     data.  We want it in WORD_MODE regardless of what mode it
2231 		     was originally given to us.  */
2232 		  temp = force_mode (word_mode, temp);
2233 		}
2234 	      else
2235 		temp = gen_reg_rtx (Pmode);
2236 
2237 	      /* Force (const (plus (symbol) (const_int))) to memory
2238 	         if the const_int will not fit in 14 bits.  Although
2239 		 this requires a relocation, the instruction sequence
2240 		 needed to load the value is shorter.  */
2241 	      if (GET_CODE (operand1) == CONST
2242 		       && GET_CODE (XEXP (operand1, 0)) == PLUS
2243 		       && GET_CODE (XEXP (XEXP (operand1, 0), 1)) == CONST_INT
2244 		       && !INT_14_BITS (XEXP (XEXP (operand1, 0), 1)))
2245 		{
2246 		  rtx x, m = force_const_mem (mode, operand1);
2247 
2248 		  x = legitimize_pic_address (XEXP (m, 0), mode, temp);
2249 		  x = replace_equiv_address (m, x);
2250 		  insn = emit_move_insn (operand0, x);
2251 		}
2252 	      else
2253 		{
2254 		  operands[1] = legitimize_pic_address (operand1, mode, temp);
2255 		  if (REG_P (operand0) && REG_P (operands[1]))
2256 		    copy_reg_pointer (operand0, operands[1]);
2257 		  insn = emit_move_insn (operand0, operands[1]);
2258 		}
2259 
2260 	      /* Put a REG_EQUAL note on this insn.  */
2261 	      set_unique_reg_note (insn, REG_EQUAL, operand1);
2262 	    }
2263 	  /* On the HPPA, references to data space are supposed to use dp,
2264 	     register 27, but showing it in the RTL inhibits various cse
2265 	     and loop optimizations.  */
2266 	  else
2267 	    {
2268 	      rtx temp, set;
2269 
2270 	      if (reload_in_progress || reload_completed)
2271 		{
2272 		  temp = scratch_reg ? scratch_reg : operand0;
2273 		  /* TEMP will hold an address and maybe the actual
2274 		     data.  We want it in WORD_MODE regardless of what mode it
2275 		     was originally given to us.  */
2276 		  temp = force_mode (word_mode, temp);
2277 		}
2278 	      else
2279 		temp = gen_reg_rtx (mode);
2280 
2281 	      /* Loading a SYMBOL_REF into a register makes that register
2282 		 safe to be used as the base in an indexed address.
2283 
2284 		 Don't mark hard registers though.  That loses.  */
2285 	      if (GET_CODE (operand0) == REG
2286 		  && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
2287 		mark_reg_pointer (operand0, BITS_PER_UNIT);
2288 	      if (REGNO (temp) >= FIRST_PSEUDO_REGISTER)
2289 		mark_reg_pointer (temp, BITS_PER_UNIT);
2290 
2291 	      if (ishighonly)
2292 		set = gen_rtx_SET (operand0, temp);
2293 	      else
2294 		set = gen_rtx_SET (operand0,
2295 				   gen_rtx_LO_SUM (mode, temp, operand1));
2296 
2297 	      emit_insn (gen_rtx_SET (temp, gen_rtx_HIGH (mode, operand1)));
2298 	      emit_insn (set);
2299 
2300 	    }
2301 	  return 1;
2302 	}
2303       else if (tls_referenced_p (operand1))
2304 	{
2305 	  rtx tmp = operand1;
2306 	  rtx addend = NULL;
2307 
2308 	  if (GET_CODE (tmp) == CONST && GET_CODE (XEXP (tmp, 0)) == PLUS)
2309 	    {
2310 	      addend = XEXP (XEXP (tmp, 0), 1);
2311 	      tmp = XEXP (XEXP (tmp, 0), 0);
2312 	    }
2313 
2314 	  gcc_assert (GET_CODE (tmp) == SYMBOL_REF);
2315 	  tmp = legitimize_tls_address (tmp);
2316 	  if (addend)
2317 	    {
2318 	      tmp = gen_rtx_PLUS (mode, tmp, addend);
2319 	      tmp = force_operand (tmp, operands[0]);
2320 	    }
2321 	  operands[1] = tmp;
2322 	}
2323       else if (GET_CODE (operand1) != CONST_INT
2324 	       || !pa_cint_ok_for_move (UINTVAL (operand1)))
2325 	{
2326 	  rtx temp;
2327 	  rtx_insn *insn;
2328 	  rtx op1 = operand1;
2329 	  HOST_WIDE_INT value = 0;
2330 	  HOST_WIDE_INT insv = 0;
2331 	  int insert = 0;
2332 
2333 	  if (GET_CODE (operand1) == CONST_INT)
2334 	    value = INTVAL (operand1);
2335 
2336 	  if (TARGET_64BIT
2337 	      && GET_CODE (operand1) == CONST_INT
2338 	      && HOST_BITS_PER_WIDE_INT > 32
2339 	      && GET_MODE_BITSIZE (GET_MODE (operand0)) > 32)
2340 	    {
2341 	      HOST_WIDE_INT nval;
2342 
2343 	      /* Extract the low order 32 bits of the value and sign extend.
2344 		 If the new value is the same as the original value, we can
2345 		 can use the original value as-is.  If the new value is
2346 		 different, we use it and insert the most-significant 32-bits
2347 		 of the original value into the final result.  */
2348 	      nval = ((value & (((HOST_WIDE_INT) 2 << 31) - 1))
2349 		      ^ ((HOST_WIDE_INT) 1 << 31)) - ((HOST_WIDE_INT) 1 << 31);
2350 	      if (value != nval)
2351 		{
2352 #if HOST_BITS_PER_WIDE_INT > 32
2353 		  insv = value >= 0 ? value >> 32 : ~(~value >> 32);
2354 #endif
2355 		  insert = 1;
2356 		  value = nval;
2357 		  operand1 = GEN_INT (nval);
2358 		}
2359 	    }
2360 
2361 	  if (reload_in_progress || reload_completed)
2362 	    temp = scratch_reg ? scratch_reg : operand0;
2363 	  else
2364 	    temp = gen_reg_rtx (mode);
2365 
2366 	  /* We don't directly split DImode constants on 32-bit targets
2367 	     because PLUS uses an 11-bit immediate and the insn sequence
2368 	     generated is not as efficient as the one using HIGH/LO_SUM.  */
2369 	  if (GET_CODE (operand1) == CONST_INT
2370 	      && GET_MODE_BITSIZE (mode) <= BITS_PER_WORD
2371 	      && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
2372 	      && !insert)
2373 	    {
2374 	      /* Directly break constant into high and low parts.  This
2375 		 provides better optimization opportunities because various
2376 		 passes recognize constants split with PLUS but not LO_SUM.
2377 		 We use a 14-bit signed low part except when the addition
2378 		 of 0x4000 to the high part might change the sign of the
2379 		 high part.  */
2380 	      HOST_WIDE_INT low = value & 0x3fff;
2381 	      HOST_WIDE_INT high = value & ~ 0x3fff;
2382 
2383 	      if (low >= 0x2000)
2384 		{
2385 		  if (high == 0x7fffc000 || (mode == HImode && high == 0x4000))
2386 		    high += 0x2000;
2387 		  else
2388 		    high += 0x4000;
2389 		}
2390 
2391 	      low = value - high;
2392 
2393 	      emit_insn (gen_rtx_SET (temp, GEN_INT (high)));
2394 	      operands[1] = gen_rtx_PLUS (mode, temp, GEN_INT (low));
2395 	    }
2396 	  else
2397 	    {
2398 	      emit_insn (gen_rtx_SET (temp, gen_rtx_HIGH (mode, operand1)));
2399 	      operands[1] = gen_rtx_LO_SUM (mode, temp, operand1);
2400 	    }
2401 
2402 	  insn = emit_move_insn (operands[0], operands[1]);
2403 
2404 	  /* Now insert the most significant 32 bits of the value
2405 	     into the register.  When we don't have a second register
2406 	     available, it could take up to nine instructions to load
2407 	     a 64-bit integer constant.  Prior to reload, we force
2408 	     constants that would take more than three instructions
2409 	     to load to the constant pool.  During and after reload,
2410 	     we have to handle all possible values.  */
2411 	  if (insert)
2412 	    {
2413 	      /* Use a HIGH/LO_SUM/INSV sequence if we have a second
2414 		 register and the value to be inserted is outside the
2415 		 range that can be loaded with three depdi instructions.  */
2416 	      if (temp != operand0 && (insv >= 16384 || insv < -16384))
2417 		{
2418 		  operand1 = GEN_INT (insv);
2419 
2420 		  emit_insn (gen_rtx_SET (temp,
2421 					  gen_rtx_HIGH (mode, operand1)));
2422 		  emit_move_insn (temp, gen_rtx_LO_SUM (mode, temp, operand1));
2423 		  if (mode == DImode)
2424 		    insn = emit_insn (gen_insvdi (operand0, GEN_INT (32),
2425 						  const0_rtx, temp));
2426 		  else
2427 		    insn = emit_insn (gen_insvsi (operand0, GEN_INT (32),
2428 						  const0_rtx, temp));
2429 		}
2430 	      else
2431 		{
2432 		  int len = 5, pos = 27;
2433 
2434 		  /* Insert the bits using the depdi instruction.  */
2435 		  while (pos >= 0)
2436 		    {
2437 		      HOST_WIDE_INT v5 = ((insv & 31) ^ 16) - 16;
2438 		      HOST_WIDE_INT sign = v5 < 0;
2439 
2440 		      /* Left extend the insertion.  */
2441 		      insv = (insv >= 0 ? insv >> len : ~(~insv >> len));
2442 		      while (pos > 0 && (insv & 1) == sign)
2443 			{
2444 			  insv = (insv >= 0 ? insv >> 1 : ~(~insv >> 1));
2445 			  len += 1;
2446 			  pos -= 1;
2447 			}
2448 
2449 		      if (mode == DImode)
2450 			insn = emit_insn (gen_insvdi (operand0,
2451 						      GEN_INT (len),
2452 						      GEN_INT (pos),
2453 						      GEN_INT (v5)));
2454 		      else
2455 			insn = emit_insn (gen_insvsi (operand0,
2456 						      GEN_INT (len),
2457 						      GEN_INT (pos),
2458 						      GEN_INT (v5)));
2459 
2460 		      len = pos > 0 && pos < 5 ? pos : 5;
2461 		      pos -= len;
2462 		    }
2463 		}
2464 	    }
2465 
2466 	  set_unique_reg_note (insn, REG_EQUAL, op1);
2467 
2468 	  return 1;
2469 	}
2470     }
2471   /* Now have insn-emit do whatever it normally does.  */
2472   return 0;
2473 }
2474 
2475 /* Examine EXP and return nonzero if it contains an ADDR_EXPR (meaning
2476    it will need a link/runtime reloc).  */
2477 
2478 int
pa_reloc_needed(tree exp)2479 pa_reloc_needed (tree exp)
2480 {
2481   int reloc = 0;
2482 
2483   switch (TREE_CODE (exp))
2484     {
2485     case ADDR_EXPR:
2486       return 1;
2487 
2488     case POINTER_PLUS_EXPR:
2489     case PLUS_EXPR:
2490     case MINUS_EXPR:
2491       reloc = pa_reloc_needed (TREE_OPERAND (exp, 0));
2492       reloc |= pa_reloc_needed (TREE_OPERAND (exp, 1));
2493       break;
2494 
2495     CASE_CONVERT:
2496     case NON_LVALUE_EXPR:
2497       reloc = pa_reloc_needed (TREE_OPERAND (exp, 0));
2498       break;
2499 
2500     case CONSTRUCTOR:
2501       {
2502 	tree value;
2503 	unsigned HOST_WIDE_INT ix;
2504 
2505 	FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (exp), ix, value)
2506 	  if (value)
2507 	    reloc |= pa_reloc_needed (value);
2508       }
2509       break;
2510 
2511     case ERROR_MARK:
2512       break;
2513 
2514     default:
2515       break;
2516     }
2517   return reloc;
2518 }
2519 
2520 
2521 /* Return the best assembler insn template
2522    for moving operands[1] into operands[0] as a fullword.  */
2523 const char *
pa_singlemove_string(rtx * operands)2524 pa_singlemove_string (rtx *operands)
2525 {
2526   HOST_WIDE_INT intval;
2527 
2528   if (GET_CODE (operands[0]) == MEM)
2529     return "stw %r1,%0";
2530   if (GET_CODE (operands[1]) == MEM)
2531     return "ldw %1,%0";
2532   if (GET_CODE (operands[1]) == CONST_DOUBLE)
2533     {
2534       long i;
2535 
2536       gcc_assert (GET_MODE (operands[1]) == SFmode);
2537 
2538       /* Translate the CONST_DOUBLE to a CONST_INT with the same target
2539 	 bit pattern.  */
2540       REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (operands[1]), i);
2541 
2542       operands[1] = GEN_INT (i);
2543       /* Fall through to CONST_INT case.  */
2544     }
2545   if (GET_CODE (operands[1]) == CONST_INT)
2546     {
2547       intval = INTVAL (operands[1]);
2548 
2549       if (VAL_14_BITS_P (intval))
2550 	return "ldi %1,%0";
2551       else if ((intval & 0x7ff) == 0)
2552 	return "ldil L'%1,%0";
2553       else if (pa_zdepi_cint_p (intval))
2554 	return "{zdepi %Z1,%0|depwi,z %Z1,%0}";
2555       else
2556 	return "ldil L'%1,%0\n\tldo R'%1(%0),%0";
2557     }
2558   return "copy %1,%0";
2559 }
2560 
2561 
2562 /* Compute position (in OP[1]) and width (in OP[2])
2563    useful for copying IMM to a register using the zdepi
2564    instructions.  Store the immediate value to insert in OP[0].  */
2565 static void
compute_zdepwi_operands(unsigned HOST_WIDE_INT imm,unsigned * op)2566 compute_zdepwi_operands (unsigned HOST_WIDE_INT imm, unsigned *op)
2567 {
2568   int lsb, len;
2569 
2570   /* Find the least significant set bit in IMM.  */
2571   for (lsb = 0; lsb < 32; lsb++)
2572     {
2573       if ((imm & 1) != 0)
2574         break;
2575       imm >>= 1;
2576     }
2577 
2578   /* Choose variants based on *sign* of the 5-bit field.  */
2579   if ((imm & 0x10) == 0)
2580     len = (lsb <= 28) ? 4 : 32 - lsb;
2581   else
2582     {
2583       /* Find the width of the bitstring in IMM.  */
2584       for (len = 5; len < 32 - lsb; len++)
2585 	{
2586 	  if ((imm & ((unsigned HOST_WIDE_INT) 1 << len)) == 0)
2587 	    break;
2588 	}
2589 
2590       /* Sign extend IMM as a 5-bit value.  */
2591       imm = (imm & 0xf) - 0x10;
2592     }
2593 
2594   op[0] = imm;
2595   op[1] = 31 - lsb;
2596   op[2] = len;
2597 }
2598 
2599 /* Compute position (in OP[1]) and width (in OP[2])
2600    useful for copying IMM to a register using the depdi,z
2601    instructions.  Store the immediate value to insert in OP[0].  */
2602 
2603 static void
compute_zdepdi_operands(unsigned HOST_WIDE_INT imm,unsigned * op)2604 compute_zdepdi_operands (unsigned HOST_WIDE_INT imm, unsigned *op)
2605 {
2606   int lsb, len, maxlen;
2607 
2608   maxlen = MIN (HOST_BITS_PER_WIDE_INT, 64);
2609 
2610   /* Find the least significant set bit in IMM.  */
2611   for (lsb = 0; lsb < maxlen; lsb++)
2612     {
2613       if ((imm & 1) != 0)
2614         break;
2615       imm >>= 1;
2616     }
2617 
2618   /* Choose variants based on *sign* of the 5-bit field.  */
2619   if ((imm & 0x10) == 0)
2620     len = (lsb <= maxlen - 4) ? 4 : maxlen - lsb;
2621   else
2622     {
2623       /* Find the width of the bitstring in IMM.  */
2624       for (len = 5; len < maxlen - lsb; len++)
2625 	{
2626 	  if ((imm & ((unsigned HOST_WIDE_INT) 1 << len)) == 0)
2627 	    break;
2628 	}
2629 
2630       /* Extend length if host is narrow and IMM is negative.  */
2631       if (HOST_BITS_PER_WIDE_INT == 32 && len == maxlen - lsb)
2632 	len += 32;
2633 
2634       /* Sign extend IMM as a 5-bit value.  */
2635       imm = (imm & 0xf) - 0x10;
2636     }
2637 
2638   op[0] = imm;
2639   op[1] = 63 - lsb;
2640   op[2] = len;
2641 }
2642 
2643 /* Output assembler code to perform a doubleword move insn
2644    with operands OPERANDS.  */
2645 
2646 const char *
pa_output_move_double(rtx * operands)2647 pa_output_move_double (rtx *operands)
2648 {
2649   enum { REGOP, OFFSOP, MEMOP, CNSTOP, RNDOP } optype0, optype1;
2650   rtx latehalf[2];
2651   rtx addreg0 = 0, addreg1 = 0;
2652   int highonly = 0;
2653 
2654   /* First classify both operands.  */
2655 
2656   if (REG_P (operands[0]))
2657     optype0 = REGOP;
2658   else if (offsettable_memref_p (operands[0]))
2659     optype0 = OFFSOP;
2660   else if (GET_CODE (operands[0]) == MEM)
2661     optype0 = MEMOP;
2662   else
2663     optype0 = RNDOP;
2664 
2665   if (REG_P (operands[1]))
2666     optype1 = REGOP;
2667   else if (CONSTANT_P (operands[1]))
2668     optype1 = CNSTOP;
2669   else if (offsettable_memref_p (operands[1]))
2670     optype1 = OFFSOP;
2671   else if (GET_CODE (operands[1]) == MEM)
2672     optype1 = MEMOP;
2673   else
2674     optype1 = RNDOP;
2675 
2676   /* Check for the cases that the operand constraints are not
2677      supposed to allow to happen.  */
2678   gcc_assert (optype0 == REGOP || optype1 == REGOP);
2679 
2680   /* Handle copies between general and floating registers.  */
2681 
2682   if (optype0 == REGOP && optype1 == REGOP
2683       && FP_REG_P (operands[0]) ^ FP_REG_P (operands[1]))
2684     {
2685       if (FP_REG_P (operands[0]))
2686 	{
2687 	  output_asm_insn ("{stws|stw} %1,-16(%%sp)", operands);
2688 	  output_asm_insn ("{stws|stw} %R1,-12(%%sp)", operands);
2689 	  return "{fldds|fldd} -16(%%sp),%0";
2690 	}
2691       else
2692 	{
2693 	  output_asm_insn ("{fstds|fstd} %1,-16(%%sp)", operands);
2694 	  output_asm_insn ("{ldws|ldw} -16(%%sp),%0", operands);
2695 	  return "{ldws|ldw} -12(%%sp),%R0";
2696 	}
2697     }
2698 
2699    /* Handle auto decrementing and incrementing loads and stores
2700      specifically, since the structure of the function doesn't work
2701      for them without major modification.  Do it better when we learn
2702      this port about the general inc/dec addressing of PA.
2703      (This was written by tege.  Chide him if it doesn't work.)  */
2704 
2705   if (optype0 == MEMOP)
2706     {
2707       /* We have to output the address syntax ourselves, since print_operand
2708 	 doesn't deal with the addresses we want to use.  Fix this later.  */
2709 
2710       rtx addr = XEXP (operands[0], 0);
2711       if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
2712 	{
2713 	  rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
2714 
2715 	  operands[0] = XEXP (addr, 0);
2716 	  gcc_assert (GET_CODE (operands[1]) == REG
2717 		      && GET_CODE (operands[0]) == REG);
2718 
2719 	  gcc_assert (!reg_overlap_mentioned_p (high_reg, addr));
2720 
2721 	  /* No overlap between high target register and address
2722 	     register.  (We do this in a non-obvious way to
2723 	     save a register file writeback)  */
2724 	  if (GET_CODE (addr) == POST_INC)
2725 	    return "{stws|stw},ma %1,8(%0)\n\tstw %R1,-4(%0)";
2726 	  return "{stws|stw},ma %1,-8(%0)\n\tstw %R1,12(%0)";
2727 	}
2728       else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
2729 	{
2730 	  rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
2731 
2732 	  operands[0] = XEXP (addr, 0);
2733 	  gcc_assert (GET_CODE (operands[1]) == REG
2734 		      && GET_CODE (operands[0]) == REG);
2735 
2736 	  gcc_assert (!reg_overlap_mentioned_p (high_reg, addr));
2737 	  /* No overlap between high target register and address
2738 	     register.  (We do this in a non-obvious way to save a
2739 	     register file writeback)  */
2740 	  if (GET_CODE (addr) == PRE_INC)
2741 	    return "{stws|stw},mb %1,8(%0)\n\tstw %R1,4(%0)";
2742 	  return "{stws|stw},mb %1,-8(%0)\n\tstw %R1,4(%0)";
2743 	}
2744     }
2745   if (optype1 == MEMOP)
2746     {
2747       /* We have to output the address syntax ourselves, since print_operand
2748 	 doesn't deal with the addresses we want to use.  Fix this later.  */
2749 
2750       rtx addr = XEXP (operands[1], 0);
2751       if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
2752 	{
2753 	  rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2754 
2755 	  operands[1] = XEXP (addr, 0);
2756 	  gcc_assert (GET_CODE (operands[0]) == REG
2757 		      && GET_CODE (operands[1]) == REG);
2758 
2759 	  if (!reg_overlap_mentioned_p (high_reg, addr))
2760 	    {
2761 	      /* No overlap between high target register and address
2762 		 register.  (We do this in a non-obvious way to
2763 		 save a register file writeback)  */
2764 	      if (GET_CODE (addr) == POST_INC)
2765 		return "{ldws|ldw},ma 8(%1),%0\n\tldw -4(%1),%R0";
2766 	      return "{ldws|ldw},ma -8(%1),%0\n\tldw 12(%1),%R0";
2767 	    }
2768 	  else
2769 	    {
2770 	      /* This is an undefined situation.  We should load into the
2771 		 address register *and* update that register.  Probably
2772 		 we don't need to handle this at all.  */
2773 	      if (GET_CODE (addr) == POST_INC)
2774 		return "ldw 4(%1),%R0\n\t{ldws|ldw},ma 8(%1),%0";
2775 	      return "ldw 4(%1),%R0\n\t{ldws|ldw},ma -8(%1),%0";
2776 	    }
2777 	}
2778       else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
2779 	{
2780 	  rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2781 
2782 	  operands[1] = XEXP (addr, 0);
2783 	  gcc_assert (GET_CODE (operands[0]) == REG
2784 		      && GET_CODE (operands[1]) == REG);
2785 
2786 	  if (!reg_overlap_mentioned_p (high_reg, addr))
2787 	    {
2788 	      /* No overlap between high target register and address
2789 		 register.  (We do this in a non-obvious way to
2790 		 save a register file writeback)  */
2791 	      if (GET_CODE (addr) == PRE_INC)
2792 		return "{ldws|ldw},mb 8(%1),%0\n\tldw 4(%1),%R0";
2793 	      return "{ldws|ldw},mb -8(%1),%0\n\tldw 4(%1),%R0";
2794 	    }
2795 	  else
2796 	    {
2797 	      /* This is an undefined situation.  We should load into the
2798 		 address register *and* update that register.  Probably
2799 		 we don't need to handle this at all.  */
2800 	      if (GET_CODE (addr) == PRE_INC)
2801 		return "ldw 12(%1),%R0\n\t{ldws|ldw},mb 8(%1),%0";
2802 	      return "ldw -4(%1),%R0\n\t{ldws|ldw},mb -8(%1),%0";
2803 	    }
2804 	}
2805       else if (GET_CODE (addr) == PLUS
2806 	       && GET_CODE (XEXP (addr, 0)) == MULT)
2807 	{
2808 	  rtx xoperands[4];
2809 
2810 	  /* Load address into left half of destination register.  */
2811 	  xoperands[0] = gen_rtx_SUBREG (SImode, operands[0], 0);
2812 	  xoperands[1] = XEXP (addr, 1);
2813 	  xoperands[2] = XEXP (XEXP (addr, 0), 0);
2814 	  xoperands[3] = XEXP (XEXP (addr, 0), 1);
2815 	  output_asm_insn ("{sh%O3addl %2,%1,%0|shladd,l %2,%O3,%1,%0}",
2816 			   xoperands);
2817 	  return "ldw 4(%0),%R0\n\tldw 0(%0),%0";
2818 	}
2819       else if (GET_CODE (addr) == PLUS
2820 	       && REG_P (XEXP (addr, 0))
2821 	       && REG_P (XEXP (addr, 1)))
2822 	{
2823 	  rtx xoperands[3];
2824 
2825 	  /* Load address into left half of destination register.  */
2826 	  xoperands[0] = gen_rtx_SUBREG (SImode, operands[0], 0);
2827 	  xoperands[1] = XEXP (addr, 0);
2828 	  xoperands[2] = XEXP (addr, 1);
2829 	  output_asm_insn ("{addl|add,l} %1,%2,%0",
2830 			   xoperands);
2831 	  return "ldw 4(%0),%R0\n\tldw 0(%0),%0";
2832 	}
2833     }
2834 
2835   /* If an operand is an unoffsettable memory ref, find a register
2836      we can increment temporarily to make it refer to the second word.  */
2837 
2838   if (optype0 == MEMOP)
2839     addreg0 = find_addr_reg (XEXP (operands[0], 0));
2840 
2841   if (optype1 == MEMOP)
2842     addreg1 = find_addr_reg (XEXP (operands[1], 0));
2843 
2844   /* Ok, we can do one word at a time.
2845      Normally we do the low-numbered word first.
2846 
2847      In either case, set up in LATEHALF the operands to use
2848      for the high-numbered word and in some cases alter the
2849      operands in OPERANDS to be suitable for the low-numbered word.  */
2850 
2851   if (optype0 == REGOP)
2852     latehalf[0] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
2853   else if (optype0 == OFFSOP)
2854     latehalf[0] = adjust_address_nv (operands[0], SImode, 4);
2855   else
2856     latehalf[0] = operands[0];
2857 
2858   if (optype1 == REGOP)
2859     latehalf[1] = gen_rtx_REG (SImode, REGNO (operands[1]) + 1);
2860   else if (optype1 == OFFSOP)
2861     latehalf[1] = adjust_address_nv (operands[1], SImode, 4);
2862   else if (optype1 == CNSTOP)
2863     {
2864       if (GET_CODE (operands[1]) == HIGH)
2865 	{
2866 	  operands[1] = XEXP (operands[1], 0);
2867 	  highonly = 1;
2868 	}
2869       split_double (operands[1], &operands[1], &latehalf[1]);
2870     }
2871   else
2872     latehalf[1] = operands[1];
2873 
2874   /* If the first move would clobber the source of the second one,
2875      do them in the other order.
2876 
2877      This can happen in two cases:
2878 
2879 	mem -> register where the first half of the destination register
2880  	is the same register used in the memory's address.  Reload
2881 	can create such insns.
2882 
2883 	mem in this case will be either register indirect or register
2884 	indirect plus a valid offset.
2885 
2886 	register -> register move where REGNO(dst) == REGNO(src + 1)
2887 	someone (Tim/Tege?) claimed this can happen for parameter loads.
2888 
2889      Handle mem -> register case first.  */
2890   if (optype0 == REGOP
2891       && (optype1 == MEMOP || optype1 == OFFSOP)
2892       && refers_to_regno_p (REGNO (operands[0]), operands[1]))
2893     {
2894       /* Do the late half first.  */
2895       if (addreg1)
2896 	output_asm_insn ("ldo 4(%0),%0", &addreg1);
2897       output_asm_insn (pa_singlemove_string (latehalf), latehalf);
2898 
2899       /* Then clobber.  */
2900       if (addreg1)
2901 	output_asm_insn ("ldo -4(%0),%0", &addreg1);
2902       return pa_singlemove_string (operands);
2903     }
2904 
2905   /* Now handle register -> register case.  */
2906   if (optype0 == REGOP && optype1 == REGOP
2907       && REGNO (operands[0]) == REGNO (operands[1]) + 1)
2908     {
2909       output_asm_insn (pa_singlemove_string (latehalf), latehalf);
2910       return pa_singlemove_string (operands);
2911     }
2912 
2913   /* Normal case: do the two words, low-numbered first.  */
2914 
2915   output_asm_insn (pa_singlemove_string (operands), operands);
2916 
2917   /* Make any unoffsettable addresses point at high-numbered word.  */
2918   if (addreg0)
2919     output_asm_insn ("ldo 4(%0),%0", &addreg0);
2920   if (addreg1)
2921     output_asm_insn ("ldo 4(%0),%0", &addreg1);
2922 
2923   /* Do high-numbered word.  */
2924   if (highonly)
2925     output_asm_insn ("ldil L'%1,%0", latehalf);
2926   else
2927     output_asm_insn (pa_singlemove_string (latehalf), latehalf);
2928 
2929   /* Undo the adds we just did.  */
2930   if (addreg0)
2931     output_asm_insn ("ldo -4(%0),%0", &addreg0);
2932   if (addreg1)
2933     output_asm_insn ("ldo -4(%0),%0", &addreg1);
2934 
2935   return "";
2936 }
2937 
2938 const char *
pa_output_fp_move_double(rtx * operands)2939 pa_output_fp_move_double (rtx *operands)
2940 {
2941   if (FP_REG_P (operands[0]))
2942     {
2943       if (FP_REG_P (operands[1])
2944 	  || operands[1] == CONST0_RTX (GET_MODE (operands[0])))
2945 	output_asm_insn ("fcpy,dbl %f1,%0", operands);
2946       else
2947 	output_asm_insn ("fldd%F1 %1,%0", operands);
2948     }
2949   else if (FP_REG_P (operands[1]))
2950     {
2951       output_asm_insn ("fstd%F0 %1,%0", operands);
2952     }
2953   else
2954     {
2955       rtx xoperands[2];
2956 
2957       gcc_assert (operands[1] == CONST0_RTX (GET_MODE (operands[0])));
2958 
2959       /* This is a pain.  You have to be prepared to deal with an
2960 	 arbitrary address here including pre/post increment/decrement.
2961 
2962 	 so avoid this in the MD.  */
2963       gcc_assert (GET_CODE (operands[0]) == REG);
2964 
2965       xoperands[1] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
2966       xoperands[0] = operands[0];
2967       output_asm_insn ("copy %%r0,%0\n\tcopy %%r0,%1", xoperands);
2968     }
2969   return "";
2970 }
2971 
2972 /* Return a REG that occurs in ADDR with coefficient 1.
2973    ADDR can be effectively incremented by incrementing REG.  */
2974 
2975 static rtx
find_addr_reg(rtx addr)2976 find_addr_reg (rtx addr)
2977 {
2978   while (GET_CODE (addr) == PLUS)
2979     {
2980       if (GET_CODE (XEXP (addr, 0)) == REG)
2981 	addr = XEXP (addr, 0);
2982       else if (GET_CODE (XEXP (addr, 1)) == REG)
2983 	addr = XEXP (addr, 1);
2984       else if (CONSTANT_P (XEXP (addr, 0)))
2985 	addr = XEXP (addr, 1);
2986       else if (CONSTANT_P (XEXP (addr, 1)))
2987 	addr = XEXP (addr, 0);
2988       else
2989 	gcc_unreachable ();
2990     }
2991   gcc_assert (GET_CODE (addr) == REG);
2992   return addr;
2993 }
2994 
2995 /* Emit code to perform a block move.
2996 
2997    OPERANDS[0] is the destination pointer as a REG, clobbered.
2998    OPERANDS[1] is the source pointer as a REG, clobbered.
2999    OPERANDS[2] is a register for temporary storage.
3000    OPERANDS[3] is a register for temporary storage.
3001    OPERANDS[4] is the size as a CONST_INT
3002    OPERANDS[5] is the alignment safe to use, as a CONST_INT.
3003    OPERANDS[6] is another temporary register.  */
3004 
3005 const char *
pa_output_block_move(rtx * operands,int size_is_constant ATTRIBUTE_UNUSED)3006 pa_output_block_move (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED)
3007 {
3008   int align = INTVAL (operands[5]);
3009   unsigned long n_bytes = INTVAL (operands[4]);
3010 
3011   /* We can't move more than a word at a time because the PA
3012      has no longer integer move insns.  (Could use fp mem ops?)  */
3013   if (align > (TARGET_64BIT ? 8 : 4))
3014     align = (TARGET_64BIT ? 8 : 4);
3015 
3016   /* Note that we know each loop below will execute at least twice
3017      (else we would have open-coded the copy).  */
3018   switch (align)
3019     {
3020       case 8:
3021 	/* Pre-adjust the loop counter.  */
3022 	operands[4] = GEN_INT (n_bytes - 16);
3023 	output_asm_insn ("ldi %4,%2", operands);
3024 
3025 	/* Copying loop.  */
3026 	output_asm_insn ("ldd,ma 8(%1),%3", operands);
3027 	output_asm_insn ("ldd,ma 8(%1),%6", operands);
3028 	output_asm_insn ("std,ma %3,8(%0)", operands);
3029 	output_asm_insn ("addib,>= -16,%2,.-12", operands);
3030 	output_asm_insn ("std,ma %6,8(%0)", operands);
3031 
3032 	/* Handle the residual.  There could be up to 7 bytes of
3033 	   residual to copy!  */
3034 	if (n_bytes % 16 != 0)
3035 	  {
3036 	    operands[4] = GEN_INT (n_bytes % 8);
3037 	    if (n_bytes % 16 >= 8)
3038 	      output_asm_insn ("ldd,ma 8(%1),%3", operands);
3039 	    if (n_bytes % 8 != 0)
3040 	      output_asm_insn ("ldd 0(%1),%6", operands);
3041 	    if (n_bytes % 16 >= 8)
3042 	      output_asm_insn ("std,ma %3,8(%0)", operands);
3043 	    if (n_bytes % 8 != 0)
3044 	      output_asm_insn ("stdby,e %6,%4(%0)", operands);
3045 	  }
3046 	return "";
3047 
3048       case 4:
3049 	/* Pre-adjust the loop counter.  */
3050 	operands[4] = GEN_INT (n_bytes - 8);
3051 	output_asm_insn ("ldi %4,%2", operands);
3052 
3053 	/* Copying loop.  */
3054 	output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands);
3055 	output_asm_insn ("{ldws|ldw},ma 4(%1),%6", operands);
3056 	output_asm_insn ("{stws|stw},ma %3,4(%0)", operands);
3057 	output_asm_insn ("addib,>= -8,%2,.-12", operands);
3058 	output_asm_insn ("{stws|stw},ma %6,4(%0)", operands);
3059 
3060 	/* Handle the residual.  There could be up to 7 bytes of
3061 	   residual to copy!  */
3062 	if (n_bytes % 8 != 0)
3063 	  {
3064 	    operands[4] = GEN_INT (n_bytes % 4);
3065 	    if (n_bytes % 8 >= 4)
3066 	      output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands);
3067 	    if (n_bytes % 4 != 0)
3068 	      output_asm_insn ("ldw 0(%1),%6", operands);
3069 	    if (n_bytes % 8 >= 4)
3070 	      output_asm_insn ("{stws|stw},ma %3,4(%0)", operands);
3071 	    if (n_bytes % 4 != 0)
3072 	      output_asm_insn ("{stbys|stby},e %6,%4(%0)", operands);
3073 	  }
3074 	return "";
3075 
3076       case 2:
3077 	/* Pre-adjust the loop counter.  */
3078 	operands[4] = GEN_INT (n_bytes - 4);
3079 	output_asm_insn ("ldi %4,%2", operands);
3080 
3081 	/* Copying loop.  */
3082 	output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands);
3083 	output_asm_insn ("{ldhs|ldh},ma 2(%1),%6", operands);
3084 	output_asm_insn ("{sths|sth},ma %3,2(%0)", operands);
3085 	output_asm_insn ("addib,>= -4,%2,.-12", operands);
3086 	output_asm_insn ("{sths|sth},ma %6,2(%0)", operands);
3087 
3088 	/* Handle the residual.  */
3089 	if (n_bytes % 4 != 0)
3090 	  {
3091 	    if (n_bytes % 4 >= 2)
3092 	      output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands);
3093 	    if (n_bytes % 2 != 0)
3094 	      output_asm_insn ("ldb 0(%1),%6", operands);
3095 	    if (n_bytes % 4 >= 2)
3096 	      output_asm_insn ("{sths|sth},ma %3,2(%0)", operands);
3097 	    if (n_bytes % 2 != 0)
3098 	      output_asm_insn ("stb %6,0(%0)", operands);
3099 	  }
3100 	return "";
3101 
3102       case 1:
3103 	/* Pre-adjust the loop counter.  */
3104 	operands[4] = GEN_INT (n_bytes - 2);
3105 	output_asm_insn ("ldi %4,%2", operands);
3106 
3107 	/* Copying loop.  */
3108 	output_asm_insn ("{ldbs|ldb},ma 1(%1),%3", operands);
3109 	output_asm_insn ("{ldbs|ldb},ma 1(%1),%6", operands);
3110 	output_asm_insn ("{stbs|stb},ma %3,1(%0)", operands);
3111 	output_asm_insn ("addib,>= -2,%2,.-12", operands);
3112 	output_asm_insn ("{stbs|stb},ma %6,1(%0)", operands);
3113 
3114 	/* Handle the residual.  */
3115 	if (n_bytes % 2 != 0)
3116 	  {
3117 	    output_asm_insn ("ldb 0(%1),%3", operands);
3118 	    output_asm_insn ("stb %3,0(%0)", operands);
3119 	  }
3120 	return "";
3121 
3122       default:
3123 	gcc_unreachable ();
3124     }
3125 }
3126 
3127 /* Count the number of insns necessary to handle this block move.
3128 
3129    Basic structure is the same as emit_block_move, except that we
3130    count insns rather than emit them.  */
3131 
3132 static int
compute_cpymem_length(rtx_insn * insn)3133 compute_cpymem_length (rtx_insn *insn)
3134 {
3135   rtx pat = PATTERN (insn);
3136   unsigned int align = INTVAL (XEXP (XVECEXP (pat, 0, 7), 0));
3137   unsigned long n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 6), 0));
3138   unsigned int n_insns = 0;
3139 
3140   /* We can't move more than four bytes at a time because the PA
3141      has no longer integer move insns.  (Could use fp mem ops?)  */
3142   if (align > (TARGET_64BIT ? 8 : 4))
3143     align = (TARGET_64BIT ? 8 : 4);
3144 
3145   /* The basic copying loop.  */
3146   n_insns = 6;
3147 
3148   /* Residuals.  */
3149   if (n_bytes % (2 * align) != 0)
3150     {
3151       if ((n_bytes % (2 * align)) >= align)
3152 	n_insns += 2;
3153 
3154       if ((n_bytes % align) != 0)
3155 	n_insns += 2;
3156     }
3157 
3158   /* Lengths are expressed in bytes now; each insn is 4 bytes.  */
3159   return n_insns * 4;
3160 }
3161 
3162 /* Emit code to perform a block clear.
3163 
3164    OPERANDS[0] is the destination pointer as a REG, clobbered.
3165    OPERANDS[1] is a register for temporary storage.
3166    OPERANDS[2] is the size as a CONST_INT
3167    OPERANDS[3] is the alignment safe to use, as a CONST_INT.  */
3168 
3169 const char *
pa_output_block_clear(rtx * operands,int size_is_constant ATTRIBUTE_UNUSED)3170 pa_output_block_clear (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED)
3171 {
3172   int align = INTVAL (operands[3]);
3173   unsigned long n_bytes = INTVAL (operands[2]);
3174 
3175   /* We can't clear more than a word at a time because the PA
3176      has no longer integer move insns.  */
3177   if (align > (TARGET_64BIT ? 8 : 4))
3178     align = (TARGET_64BIT ? 8 : 4);
3179 
3180   /* Note that we know each loop below will execute at least twice
3181      (else we would have open-coded the copy).  */
3182   switch (align)
3183     {
3184       case 8:
3185 	/* Pre-adjust the loop counter.  */
3186 	operands[2] = GEN_INT (n_bytes - 16);
3187 	output_asm_insn ("ldi %2,%1", operands);
3188 
3189 	/* Loop.  */
3190 	output_asm_insn ("std,ma %%r0,8(%0)", operands);
3191 	output_asm_insn ("addib,>= -16,%1,.-4", operands);
3192 	output_asm_insn ("std,ma %%r0,8(%0)", operands);
3193 
3194 	/* Handle the residual.  There could be up to 7 bytes of
3195 	   residual to copy!  */
3196 	if (n_bytes % 16 != 0)
3197 	  {
3198 	    operands[2] = GEN_INT (n_bytes % 8);
3199 	    if (n_bytes % 16 >= 8)
3200 	      output_asm_insn ("std,ma %%r0,8(%0)", operands);
3201 	    if (n_bytes % 8 != 0)
3202 	      output_asm_insn ("stdby,e %%r0,%2(%0)", operands);
3203 	  }
3204 	return "";
3205 
3206       case 4:
3207 	/* Pre-adjust the loop counter.  */
3208 	operands[2] = GEN_INT (n_bytes - 8);
3209 	output_asm_insn ("ldi %2,%1", operands);
3210 
3211 	/* Loop.  */
3212 	output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
3213 	output_asm_insn ("addib,>= -8,%1,.-4", operands);
3214 	output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
3215 
3216 	/* Handle the residual.  There could be up to 7 bytes of
3217 	   residual to copy!  */
3218 	if (n_bytes % 8 != 0)
3219 	  {
3220 	    operands[2] = GEN_INT (n_bytes % 4);
3221 	    if (n_bytes % 8 >= 4)
3222 	      output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
3223 	    if (n_bytes % 4 != 0)
3224 	      output_asm_insn ("{stbys|stby},e %%r0,%2(%0)", operands);
3225 	  }
3226 	return "";
3227 
3228       case 2:
3229 	/* Pre-adjust the loop counter.  */
3230 	operands[2] = GEN_INT (n_bytes - 4);
3231 	output_asm_insn ("ldi %2,%1", operands);
3232 
3233 	/* Loop.  */
3234 	output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
3235 	output_asm_insn ("addib,>= -4,%1,.-4", operands);
3236 	output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
3237 
3238 	/* Handle the residual.  */
3239 	if (n_bytes % 4 != 0)
3240 	  {
3241 	    if (n_bytes % 4 >= 2)
3242 	      output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
3243 	    if (n_bytes % 2 != 0)
3244 	      output_asm_insn ("stb %%r0,0(%0)", operands);
3245 	  }
3246 	return "";
3247 
3248       case 1:
3249 	/* Pre-adjust the loop counter.  */
3250 	operands[2] = GEN_INT (n_bytes - 2);
3251 	output_asm_insn ("ldi %2,%1", operands);
3252 
3253 	/* Loop.  */
3254 	output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands);
3255 	output_asm_insn ("addib,>= -2,%1,.-4", operands);
3256 	output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands);
3257 
3258 	/* Handle the residual.  */
3259 	if (n_bytes % 2 != 0)
3260 	  output_asm_insn ("stb %%r0,0(%0)", operands);
3261 
3262 	return "";
3263 
3264       default:
3265 	gcc_unreachable ();
3266     }
3267 }
3268 
3269 /* Count the number of insns necessary to handle this block move.
3270 
3271    Basic structure is the same as emit_block_move, except that we
3272    count insns rather than emit them.  */
3273 
3274 static int
compute_clrmem_length(rtx_insn * insn)3275 compute_clrmem_length (rtx_insn *insn)
3276 {
3277   rtx pat = PATTERN (insn);
3278   unsigned int align = INTVAL (XEXP (XVECEXP (pat, 0, 4), 0));
3279   unsigned long n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 3), 0));
3280   unsigned int n_insns = 0;
3281 
3282   /* We can't clear more than a word at a time because the PA
3283      has no longer integer move insns.  */
3284   if (align > (TARGET_64BIT ? 8 : 4))
3285     align = (TARGET_64BIT ? 8 : 4);
3286 
3287   /* The basic loop.  */
3288   n_insns = 4;
3289 
3290   /* Residuals.  */
3291   if (n_bytes % (2 * align) != 0)
3292     {
3293       if ((n_bytes % (2 * align)) >= align)
3294 	n_insns++;
3295 
3296       if ((n_bytes % align) != 0)
3297 	n_insns++;
3298     }
3299 
3300   /* Lengths are expressed in bytes now; each insn is 4 bytes.  */
3301   return n_insns * 4;
3302 }
3303 
3304 
3305 const char *
pa_output_and(rtx * operands)3306 pa_output_and (rtx *operands)
3307 {
3308   if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
3309     {
3310       unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3311       int ls0, ls1, ms0, p, len;
3312 
3313       for (ls0 = 0; ls0 < 32; ls0++)
3314 	if ((mask & (1 << ls0)) == 0)
3315 	  break;
3316 
3317       for (ls1 = ls0; ls1 < 32; ls1++)
3318 	if ((mask & (1 << ls1)) != 0)
3319 	  break;
3320 
3321       for (ms0 = ls1; ms0 < 32; ms0++)
3322 	if ((mask & (1 << ms0)) == 0)
3323 	  break;
3324 
3325       gcc_assert (ms0 == 32);
3326 
3327       if (ls1 == 32)
3328 	{
3329 	  len = ls0;
3330 
3331 	  gcc_assert (len);
3332 
3333 	  operands[2] = GEN_INT (len);
3334 	  return "{extru|extrw,u} %1,31,%2,%0";
3335 	}
3336       else
3337 	{
3338 	  /* We could use this `depi' for the case above as well, but `depi'
3339 	     requires one more register file access than an `extru'.  */
3340 
3341 	  p = 31 - ls0;
3342 	  len = ls1 - ls0;
3343 
3344 	  operands[2] = GEN_INT (p);
3345 	  operands[3] = GEN_INT (len);
3346 	  return "{depi|depwi} 0,%2,%3,%0";
3347 	}
3348     }
3349   else
3350     return "and %1,%2,%0";
3351 }
3352 
3353 /* Return a string to perform a bitwise-and of operands[1] with operands[2]
3354    storing the result in operands[0].  */
3355 const char *
pa_output_64bit_and(rtx * operands)3356 pa_output_64bit_and (rtx *operands)
3357 {
3358   if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
3359     {
3360       unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3361       int ls0, ls1, ms0, p, len;
3362 
3363       for (ls0 = 0; ls0 < HOST_BITS_PER_WIDE_INT; ls0++)
3364 	if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls0)) == 0)
3365 	  break;
3366 
3367       for (ls1 = ls0; ls1 < HOST_BITS_PER_WIDE_INT; ls1++)
3368 	if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls1)) != 0)
3369 	  break;
3370 
3371       for (ms0 = ls1; ms0 < HOST_BITS_PER_WIDE_INT; ms0++)
3372 	if ((mask & ((unsigned HOST_WIDE_INT) 1 << ms0)) == 0)
3373 	  break;
3374 
3375       gcc_assert (ms0 == HOST_BITS_PER_WIDE_INT);
3376 
3377       if (ls1 == HOST_BITS_PER_WIDE_INT)
3378 	{
3379 	  len = ls0;
3380 
3381 	  gcc_assert (len);
3382 
3383 	  operands[2] = GEN_INT (len);
3384 	  return "extrd,u %1,63,%2,%0";
3385 	}
3386       else
3387 	{
3388 	  /* We could use this `depi' for the case above as well, but `depi'
3389 	     requires one more register file access than an `extru'.  */
3390 
3391 	  p = 63 - ls0;
3392 	  len = ls1 - ls0;
3393 
3394 	  operands[2] = GEN_INT (p);
3395 	  operands[3] = GEN_INT (len);
3396 	  return "depdi 0,%2,%3,%0";
3397 	}
3398     }
3399   else
3400     return "and %1,%2,%0";
3401 }
3402 
3403 const char *
pa_output_ior(rtx * operands)3404 pa_output_ior (rtx *operands)
3405 {
3406   unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3407   int bs0, bs1, p, len;
3408 
3409   if (INTVAL (operands[2]) == 0)
3410     return "copy %1,%0";
3411 
3412   for (bs0 = 0; bs0 < 32; bs0++)
3413     if ((mask & (1 << bs0)) != 0)
3414       break;
3415 
3416   for (bs1 = bs0; bs1 < 32; bs1++)
3417     if ((mask & (1 << bs1)) == 0)
3418       break;
3419 
3420   gcc_assert (bs1 == 32 || ((unsigned HOST_WIDE_INT) 1 << bs1) > mask);
3421 
3422   p = 31 - bs0;
3423   len = bs1 - bs0;
3424 
3425   operands[2] = GEN_INT (p);
3426   operands[3] = GEN_INT (len);
3427   return "{depi|depwi} -1,%2,%3,%0";
3428 }
3429 
3430 /* Return a string to perform a bitwise-and of operands[1] with operands[2]
3431    storing the result in operands[0].  */
3432 const char *
pa_output_64bit_ior(rtx * operands)3433 pa_output_64bit_ior (rtx *operands)
3434 {
3435   unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3436   int bs0, bs1, p, len;
3437 
3438   if (INTVAL (operands[2]) == 0)
3439     return "copy %1,%0";
3440 
3441   for (bs0 = 0; bs0 < HOST_BITS_PER_WIDE_INT; bs0++)
3442     if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs0)) != 0)
3443       break;
3444 
3445   for (bs1 = bs0; bs1 < HOST_BITS_PER_WIDE_INT; bs1++)
3446     if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs1)) == 0)
3447       break;
3448 
3449   gcc_assert (bs1 == HOST_BITS_PER_WIDE_INT
3450 	      || ((unsigned HOST_WIDE_INT) 1 << bs1) > mask);
3451 
3452   p = 63 - bs0;
3453   len = bs1 - bs0;
3454 
3455   operands[2] = GEN_INT (p);
3456   operands[3] = GEN_INT (len);
3457   return "depdi -1,%2,%3,%0";
3458 }
3459 
3460 /* Target hook for assembling integer objects.  This code handles
3461    aligned SI and DI integers specially since function references
3462    must be preceded by P%.  */
3463 
3464 static bool
pa_assemble_integer(rtx x,unsigned int size,int aligned_p)3465 pa_assemble_integer (rtx x, unsigned int size, int aligned_p)
3466 {
3467   bool result;
3468   tree decl = NULL;
3469 
3470   /* When we have a SYMBOL_REF with a SYMBOL_REF_DECL, we need to call
3471      call assemble_external and set the SYMBOL_REF_DECL to NULL before
3472      calling output_addr_const.  Otherwise, it may call assemble_external
3473      in the midst of outputing the assembler code for the SYMBOL_REF.
3474      We restore the SYMBOL_REF_DECL after the output is done.  */
3475   if (GET_CODE (x) == SYMBOL_REF)
3476     {
3477       decl = SYMBOL_REF_DECL (x);
3478       if (decl)
3479 	{
3480 	  assemble_external (decl);
3481 	  SET_SYMBOL_REF_DECL (x, NULL);
3482 	}
3483     }
3484 
3485   if (size == UNITS_PER_WORD
3486       && aligned_p
3487       && function_label_operand (x, VOIDmode))
3488     {
3489       fputs (size == 8? "\t.dword\t" : "\t.word\t", asm_out_file);
3490 
3491       /* We don't want an OPD when generating fast indirect calls.  */
3492       if (!TARGET_FAST_INDIRECT_CALLS)
3493 	fputs ("P%", asm_out_file);
3494 
3495       output_addr_const (asm_out_file, x);
3496       fputc ('\n', asm_out_file);
3497       result = true;
3498     }
3499   else
3500     result = default_assemble_integer (x, size, aligned_p);
3501 
3502   if (decl)
3503     SET_SYMBOL_REF_DECL (x, decl);
3504 
3505   return result;
3506 }
3507 
3508 /* Output an ascii string.  */
3509 void
pa_output_ascii(FILE * file,const char * p,int size)3510 pa_output_ascii (FILE *file, const char *p, int size)
3511 {
3512   int i;
3513   int chars_output;
3514   unsigned char partial_output[16];	/* Max space 4 chars can occupy.  */
3515 
3516   /* The HP assembler can only take strings of 256 characters at one
3517      time.  This is a limitation on input line length, *not* the
3518      length of the string.  Sigh.  Even worse, it seems that the
3519      restriction is in number of input characters (see \xnn &
3520      \whatever).  So we have to do this very carefully.  */
3521 
3522   fputs ("\t.STRING \"", file);
3523 
3524   chars_output = 0;
3525   for (i = 0; i < size; i += 4)
3526     {
3527       int co = 0;
3528       int io = 0;
3529       for (io = 0, co = 0; io < MIN (4, size - i); io++)
3530 	{
3531 	  unsigned int c = (unsigned char) p[i + io];
3532 
3533 	  if (c == '\"' || c == '\\')
3534 	    partial_output[co++] = '\\';
3535 	  if (c >= ' ' && c < 0177)
3536 	    partial_output[co++] = c;
3537 	  else
3538 	    {
3539 	      unsigned int hexd;
3540 	      partial_output[co++] = '\\';
3541 	      partial_output[co++] = 'x';
3542 	      hexd =  c  / 16 - 0 + '0';
3543 	      if (hexd > '9')
3544 		hexd -= '9' - 'a' + 1;
3545 	      partial_output[co++] = hexd;
3546 	      hexd =  c % 16 - 0 + '0';
3547 	      if (hexd > '9')
3548 		hexd -= '9' - 'a' + 1;
3549 	      partial_output[co++] = hexd;
3550 	    }
3551 	}
3552       if (chars_output + co > 243)
3553 	{
3554 	  fputs ("\"\n\t.STRING \"", file);
3555 	  chars_output = 0;
3556 	}
3557       fwrite (partial_output, 1, (size_t) co, file);
3558       chars_output += co;
3559       co = 0;
3560     }
3561   fputs ("\"\n", file);
3562 }
3563 
3564 /* Try to rewrite floating point comparisons & branches to avoid
3565    useless add,tr insns.
3566 
3567    CHECK_NOTES is nonzero if we should examine REG_DEAD notes
3568    to see if FPCC is dead.  CHECK_NOTES is nonzero for the
3569    first attempt to remove useless add,tr insns.  It is zero
3570    for the second pass as reorg sometimes leaves bogus REG_DEAD
3571    notes lying around.
3572 
3573    When CHECK_NOTES is zero we can only eliminate add,tr insns
3574    when there's a 1:1 correspondence between fcmp and ftest/fbranch
3575    instructions.  */
3576 static void
remove_useless_addtr_insns(int check_notes)3577 remove_useless_addtr_insns (int check_notes)
3578 {
3579   rtx_insn *insn;
3580   static int pass = 0;
3581 
3582   /* This is fairly cheap, so always run it when optimizing.  */
3583   if (optimize > 0)
3584     {
3585       int fcmp_count = 0;
3586       int fbranch_count = 0;
3587 
3588       /* Walk all the insns in this function looking for fcmp & fbranch
3589 	 instructions.  Keep track of how many of each we find.  */
3590       for (insn = get_insns (); insn; insn = next_insn (insn))
3591 	{
3592 	  rtx tmp;
3593 
3594 	  /* Ignore anything that isn't an INSN or a JUMP_INSN.  */
3595 	  if (! NONJUMP_INSN_P (insn) && ! JUMP_P (insn))
3596 	    continue;
3597 
3598 	  tmp = PATTERN (insn);
3599 
3600 	  /* It must be a set.  */
3601 	  if (GET_CODE (tmp) != SET)
3602 	    continue;
3603 
3604 	  /* If the destination is CCFP, then we've found an fcmp insn.  */
3605 	  tmp = SET_DEST (tmp);
3606 	  if (GET_CODE (tmp) == REG && REGNO (tmp) == 0)
3607 	    {
3608 	      fcmp_count++;
3609 	      continue;
3610 	    }
3611 
3612 	  tmp = PATTERN (insn);
3613 	  /* If this is an fbranch instruction, bump the fbranch counter.  */
3614 	  if (GET_CODE (tmp) == SET
3615 	      && SET_DEST (tmp) == pc_rtx
3616 	      && GET_CODE (SET_SRC (tmp)) == IF_THEN_ELSE
3617 	      && GET_CODE (XEXP (SET_SRC (tmp), 0)) == NE
3618 	      && GET_CODE (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == REG
3619 	      && REGNO (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == 0)
3620 	    {
3621 	      fbranch_count++;
3622 	      continue;
3623 	    }
3624 	}
3625 
3626 
3627       /* Find all floating point compare + branch insns.  If possible,
3628 	 reverse the comparison & the branch to avoid add,tr insns.  */
3629       for (insn = get_insns (); insn; insn = next_insn (insn))
3630 	{
3631 	  rtx tmp;
3632 	  rtx_insn *next;
3633 
3634 	  /* Ignore anything that isn't an INSN.  */
3635 	  if (! NONJUMP_INSN_P (insn))
3636 	    continue;
3637 
3638 	  tmp = PATTERN (insn);
3639 
3640 	  /* It must be a set.  */
3641 	  if (GET_CODE (tmp) != SET)
3642 	    continue;
3643 
3644 	  /* The destination must be CCFP, which is register zero.  */
3645 	  tmp = SET_DEST (tmp);
3646 	  if (GET_CODE (tmp) != REG || REGNO (tmp) != 0)
3647 	    continue;
3648 
3649 	  /* INSN should be a set of CCFP.
3650 
3651 	     See if the result of this insn is used in a reversed FP
3652 	     conditional branch.  If so, reverse our condition and
3653 	     the branch.  Doing so avoids useless add,tr insns.  */
3654 	  next = next_insn (insn);
3655 	  while (next)
3656 	    {
3657 	      /* Jumps, calls and labels stop our search.  */
3658 	      if (JUMP_P (next) || CALL_P (next) || LABEL_P (next))
3659 		break;
3660 
3661 	      /* As does another fcmp insn.  */
3662 	      if (NONJUMP_INSN_P (next)
3663 		  && GET_CODE (PATTERN (next)) == SET
3664 		  && GET_CODE (SET_DEST (PATTERN (next))) == REG
3665 		  && REGNO (SET_DEST (PATTERN (next))) == 0)
3666 		break;
3667 
3668 	      next = next_insn (next);
3669 	    }
3670 
3671 	  /* Is NEXT_INSN a branch?  */
3672 	  if (next && JUMP_P (next))
3673 	    {
3674 	      rtx pattern = PATTERN (next);
3675 
3676 	      /* If it a reversed fp conditional branch (e.g. uses add,tr)
3677 		 and CCFP dies, then reverse our conditional and the branch
3678 		 to avoid the add,tr.  */
3679 	      if (GET_CODE (pattern) == SET
3680 		  && SET_DEST (pattern) == pc_rtx
3681 		  && GET_CODE (SET_SRC (pattern)) == IF_THEN_ELSE
3682 		  && GET_CODE (XEXP (SET_SRC (pattern), 0)) == NE
3683 		  && GET_CODE (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == REG
3684 		  && REGNO (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == 0
3685 		  && GET_CODE (XEXP (SET_SRC (pattern), 1)) == PC
3686 		  && (fcmp_count == fbranch_count
3687 		      || (check_notes
3688 			  && find_regno_note (next, REG_DEAD, 0))))
3689 		{
3690 		  /* Reverse the branch.  */
3691 		  tmp = XEXP (SET_SRC (pattern), 1);
3692 		  XEXP (SET_SRC (pattern), 1) = XEXP (SET_SRC (pattern), 2);
3693 		  XEXP (SET_SRC (pattern), 2) = tmp;
3694 		  INSN_CODE (next) = -1;
3695 
3696 		  /* Reverse our condition.  */
3697 		  tmp = PATTERN (insn);
3698 		  PUT_CODE (XEXP (tmp, 1),
3699 			    (reverse_condition_maybe_unordered
3700 			     (GET_CODE (XEXP (tmp, 1)))));
3701 		}
3702 	    }
3703 	}
3704     }
3705 
3706   pass = !pass;
3707 
3708 }
3709 
3710 /* You may have trouble believing this, but this is the 32 bit HP-PA
3711    stack layout.  Wow.
3712 
3713    Offset		Contents
3714 
3715    Variable arguments	(optional; any number may be allocated)
3716 
3717    SP-(4*(N+9))		arg word N
3718    	:		    :
3719       SP-56		arg word 5
3720       SP-52		arg word 4
3721 
3722    Fixed arguments	(must be allocated; may remain unused)
3723 
3724       SP-48		arg word 3
3725       SP-44		arg word 2
3726       SP-40		arg word 1
3727       SP-36		arg word 0
3728 
3729    Frame Marker
3730 
3731       SP-32		External Data Pointer (DP)
3732       SP-28		External sr4
3733       SP-24		External/stub RP (RP')
3734       SP-20		Current RP
3735       SP-16		Static Link
3736       SP-12		Clean up
3737       SP-8		Calling Stub RP (RP'')
3738       SP-4		Previous SP
3739 
3740    Top of Frame
3741 
3742       SP-0		Stack Pointer (points to next available address)
3743 
3744 */
3745 
3746 /* This function saves registers as follows.  Registers marked with ' are
3747    this function's registers (as opposed to the previous function's).
3748    If a frame_pointer isn't needed, r4 is saved as a general register;
3749    the space for the frame pointer is still allocated, though, to keep
3750    things simple.
3751 
3752 
3753    Top of Frame
3754 
3755        SP (FP')		Previous FP
3756        SP + 4		Alignment filler (sigh)
3757        SP + 8		Space for locals reserved here.
3758        .
3759        .
3760        .
3761        SP + n		All call saved register used.
3762        .
3763        .
3764        .
3765        SP + o		All call saved fp registers used.
3766        .
3767        .
3768        .
3769        SP + p (SP')	points to next available address.
3770 
3771 */
3772 
3773 /* Global variables set by output_function_prologue().  */
3774 /* Size of frame.  Need to know this to emit return insns from
3775    leaf procedures.  */
3776 static HOST_WIDE_INT actual_fsize, local_fsize;
3777 static int save_fregs;
3778 
3779 /* Emit RTL to store REG at the memory location specified by BASE+DISP.
3780    Handle case where DISP > 8k by using the add_high_const patterns.
3781 
3782    Note in DISP > 8k case, we will leave the high part of the address
3783    in %r1.  There is code in expand_hppa_{prologue,epilogue} that knows this.*/
3784 
3785 static void
store_reg(int reg,HOST_WIDE_INT disp,int base)3786 store_reg (int reg, HOST_WIDE_INT disp, int base)
3787 {
3788   rtx dest, src, basereg;
3789   rtx_insn *insn;
3790 
3791   src = gen_rtx_REG (word_mode, reg);
3792   basereg = gen_rtx_REG (Pmode, base);
3793   if (VAL_14_BITS_P (disp))
3794     {
3795       dest = gen_rtx_MEM (word_mode, plus_constant (Pmode, basereg, disp));
3796       insn = emit_move_insn (dest, src);
3797     }
3798   else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
3799     {
3800       rtx delta = GEN_INT (disp);
3801       rtx tmpreg = gen_rtx_REG (Pmode, 1);
3802 
3803       emit_move_insn (tmpreg, delta);
3804       insn = emit_move_insn (tmpreg, gen_rtx_PLUS (Pmode, tmpreg, basereg));
3805       if (DO_FRAME_NOTES)
3806 	{
3807 	  add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3808 			gen_rtx_SET (tmpreg,
3809 				     gen_rtx_PLUS (Pmode, basereg, delta)));
3810 	  RTX_FRAME_RELATED_P (insn) = 1;
3811 	}
3812       dest = gen_rtx_MEM (word_mode, tmpreg);
3813       insn = emit_move_insn (dest, src);
3814     }
3815   else
3816     {
3817       rtx delta = GEN_INT (disp);
3818       rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta));
3819       rtx tmpreg = gen_rtx_REG (Pmode, 1);
3820 
3821       emit_move_insn (tmpreg, high);
3822       dest = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta));
3823       insn = emit_move_insn (dest, src);
3824       if (DO_FRAME_NOTES)
3825 	add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3826 		      gen_rtx_SET (gen_rtx_MEM (word_mode,
3827 						gen_rtx_PLUS (word_mode,
3828 							      basereg,
3829 							      delta)),
3830 				   src));
3831     }
3832 
3833   if (DO_FRAME_NOTES)
3834     RTX_FRAME_RELATED_P (insn) = 1;
3835 }
3836 
3837 /* Emit RTL to store REG at the memory location specified by BASE and then
3838    add MOD to BASE.  MOD must be <= 8k.  */
3839 
3840 static void
store_reg_modify(int base,int reg,HOST_WIDE_INT mod)3841 store_reg_modify (int base, int reg, HOST_WIDE_INT mod)
3842 {
3843   rtx basereg, srcreg, delta;
3844   rtx_insn *insn;
3845 
3846   gcc_assert (VAL_14_BITS_P (mod));
3847 
3848   basereg = gen_rtx_REG (Pmode, base);
3849   srcreg = gen_rtx_REG (word_mode, reg);
3850   delta = GEN_INT (mod);
3851 
3852   insn = emit_insn (gen_post_store (basereg, srcreg, delta));
3853   if (DO_FRAME_NOTES)
3854     {
3855       RTX_FRAME_RELATED_P (insn) = 1;
3856 
3857       /* RTX_FRAME_RELATED_P must be set on each frame related set
3858 	 in a parallel with more than one element.  */
3859       RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 0)) = 1;
3860       RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
3861     }
3862 }
3863 
3864 /* Emit RTL to set REG to the value specified by BASE+DISP.  Handle case
3865    where DISP > 8k by using the add_high_const patterns.  NOTE indicates
3866    whether to add a frame note or not.
3867 
3868    In the DISP > 8k case, we leave the high part of the address in %r1.
3869    There is code in expand_hppa_{prologue,epilogue} that knows about this.  */
3870 
3871 static void
set_reg_plus_d(int reg,int base,HOST_WIDE_INT disp,int note)3872 set_reg_plus_d (int reg, int base, HOST_WIDE_INT disp, int note)
3873 {
3874   rtx_insn *insn;
3875 
3876   if (VAL_14_BITS_P (disp))
3877     {
3878       insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3879 			     plus_constant (Pmode,
3880 					    gen_rtx_REG (Pmode, base), disp));
3881     }
3882   else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
3883     {
3884       rtx basereg = gen_rtx_REG (Pmode, base);
3885       rtx delta = GEN_INT (disp);
3886       rtx tmpreg = gen_rtx_REG (Pmode, 1);
3887 
3888       emit_move_insn (tmpreg, delta);
3889       insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3890 			     gen_rtx_PLUS (Pmode, tmpreg, basereg));
3891       if (DO_FRAME_NOTES)
3892 	add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3893 		      gen_rtx_SET (tmpreg,
3894 				   gen_rtx_PLUS (Pmode, basereg, delta)));
3895     }
3896   else
3897     {
3898       rtx basereg = gen_rtx_REG (Pmode, base);
3899       rtx delta = GEN_INT (disp);
3900       rtx tmpreg = gen_rtx_REG (Pmode, 1);
3901 
3902       emit_move_insn (tmpreg,
3903 		      gen_rtx_PLUS (Pmode, basereg,
3904 				    gen_rtx_HIGH (Pmode, delta)));
3905       insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3906 			     gen_rtx_LO_SUM (Pmode, tmpreg, delta));
3907     }
3908 
3909   if (DO_FRAME_NOTES && note)
3910     RTX_FRAME_RELATED_P (insn) = 1;
3911 }
3912 
3913 HOST_WIDE_INT
pa_compute_frame_size(poly_int64 size,int * fregs_live)3914 pa_compute_frame_size (poly_int64 size, int *fregs_live)
3915 {
3916   int freg_saved = 0;
3917   int i, j;
3918 
3919   /* The code in pa_expand_prologue and pa_expand_epilogue must
3920      be consistent with the rounding and size calculation done here.
3921      Change them at the same time.  */
3922 
3923   /* We do our own stack alignment.  First, round the size of the
3924      stack locals up to a word boundary.  */
3925   size = (size + UNITS_PER_WORD - 1) & ~(UNITS_PER_WORD - 1);
3926 
3927   /* Space for previous frame pointer + filler.  If any frame is
3928      allocated, we need to add in the TARGET_STARTING_FRAME_OFFSET.  We
3929      waste some space here for the sake of HP compatibility.  The
3930      first slot is only used when the frame pointer is needed.  */
3931   if (size || frame_pointer_needed)
3932     size += pa_starting_frame_offset ();
3933 
3934   /* If the current function calls __builtin_eh_return, then we need
3935      to allocate stack space for registers that will hold data for
3936      the exception handler.  */
3937   if (DO_FRAME_NOTES && crtl->calls_eh_return)
3938     {
3939       unsigned int i;
3940 
3941       for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM; ++i)
3942 	continue;
3943       size += i * UNITS_PER_WORD;
3944     }
3945 
3946   /* Account for space used by the callee general register saves.  */
3947   for (i = 18, j = frame_pointer_needed ? 4 : 3; i >= j; i--)
3948     if (df_regs_ever_live_p (i))
3949       size += UNITS_PER_WORD;
3950 
3951   /* Account for space used by the callee floating point register saves.  */
3952   for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
3953     if (df_regs_ever_live_p (i)
3954 	|| (!TARGET_64BIT && df_regs_ever_live_p (i + 1)))
3955       {
3956 	freg_saved = 1;
3957 
3958 	/* We always save both halves of the FP register, so always
3959 	   increment the frame size by 8 bytes.  */
3960 	size += 8;
3961       }
3962 
3963   /* If any of the floating registers are saved, account for the
3964      alignment needed for the floating point register save block.  */
3965   if (freg_saved)
3966     {
3967       size = (size + 7) & ~7;
3968       if (fregs_live)
3969 	*fregs_live = 1;
3970     }
3971 
3972   /* The various ABIs include space for the outgoing parameters in the
3973      size of the current function's stack frame.  We don't need to align
3974      for the outgoing arguments as their alignment is set by the final
3975      rounding for the frame as a whole.  */
3976   size += crtl->outgoing_args_size;
3977 
3978   /* Allocate space for the fixed frame marker.  This space must be
3979      allocated for any function that makes calls or allocates
3980      stack space.  */
3981   if (!crtl->is_leaf || size)
3982     size += TARGET_64BIT ? 48 : 32;
3983 
3984   /* Finally, round to the preferred stack boundary.  */
3985   return ((size + PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT - 1)
3986 	  & ~(PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT - 1));
3987 }
3988 
3989 /* Output function label, and associated .PROC and .CALLINFO statements.  */
3990 
3991 void
pa_output_function_label(FILE * file)3992 pa_output_function_label (FILE *file)
3993 {
3994   /* The function's label and associated .PROC must never be
3995      separated and must be output *after* any profiling declarations
3996      to avoid changing spaces/subspaces within a procedure.  */
3997   ASM_OUTPUT_LABEL (file, XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0));
3998   fputs ("\t.PROC\n", file);
3999 
4000   /* pa_expand_prologue does the dirty work now.  We just need
4001      to output the assembler directives which denote the start
4002      of a function.  */
4003   fprintf (file, "\t.CALLINFO FRAME=" HOST_WIDE_INT_PRINT_DEC, actual_fsize);
4004   if (crtl->is_leaf)
4005     fputs (",NO_CALLS", file);
4006   else
4007     fputs (",CALLS", file);
4008   if (rp_saved)
4009     fputs (",SAVE_RP", file);
4010 
4011   /* The SAVE_SP flag is used to indicate that register %r3 is stored
4012      at the beginning of the frame and that it is used as the frame
4013      pointer for the frame.  We do this because our current frame
4014      layout doesn't conform to that specified in the HP runtime
4015      documentation and we need a way to indicate to programs such as
4016      GDB where %r3 is saved.  The SAVE_SP flag was chosen because it
4017      isn't used by HP compilers but is supported by the assembler.
4018      However, SAVE_SP is supposed to indicate that the previous stack
4019      pointer has been saved in the frame marker.  */
4020   if (frame_pointer_needed)
4021     fputs (",SAVE_SP", file);
4022 
4023   /* Pass on information about the number of callee register saves
4024      performed in the prologue.
4025 
4026      The compiler is supposed to pass the highest register number
4027      saved, the assembler then has to adjust that number before
4028      entering it into the unwind descriptor (to account for any
4029      caller saved registers with lower register numbers than the
4030      first callee saved register).  */
4031   if (gr_saved)
4032     fprintf (file, ",ENTRY_GR=%d", gr_saved + 2);
4033 
4034   if (fr_saved)
4035     fprintf (file, ",ENTRY_FR=%d", fr_saved + 11);
4036 
4037   fputs ("\n\t.ENTRY\n", file);
4038 }
4039 
4040 /* Output function prologue.  */
4041 
4042 static void
pa_output_function_prologue(FILE * file)4043 pa_output_function_prologue (FILE *file)
4044 {
4045   pa_output_function_label (file);
4046   remove_useless_addtr_insns (0);
4047 }
4048 
4049 /* The label is output by ASM_DECLARE_FUNCTION_NAME on linux.  */
4050 
4051 static void
pa_linux_output_function_prologue(FILE * file ATTRIBUTE_UNUSED)4052 pa_linux_output_function_prologue (FILE *file ATTRIBUTE_UNUSED)
4053 {
4054   remove_useless_addtr_insns (0);
4055 }
4056 
4057 void
pa_expand_prologue(void)4058 pa_expand_prologue (void)
4059 {
4060   int merge_sp_adjust_with_store = 0;
4061   HOST_WIDE_INT size = get_frame_size ();
4062   HOST_WIDE_INT offset;
4063   int i;
4064   rtx tmpreg;
4065   rtx_insn *insn;
4066 
4067   gr_saved = 0;
4068   fr_saved = 0;
4069   save_fregs = 0;
4070 
4071   /* Compute total size for frame pointer, filler, locals and rounding to
4072      the next word boundary.  Similar code appears in pa_compute_frame_size
4073      and must be changed in tandem with this code.  */
4074   local_fsize = (size + UNITS_PER_WORD - 1) & ~(UNITS_PER_WORD - 1);
4075   if (local_fsize || frame_pointer_needed)
4076     local_fsize += pa_starting_frame_offset ();
4077 
4078   actual_fsize = pa_compute_frame_size (size, &save_fregs);
4079   if (flag_stack_usage_info)
4080     current_function_static_stack_size = actual_fsize;
4081 
4082   /* Compute a few things we will use often.  */
4083   tmpreg = gen_rtx_REG (word_mode, 1);
4084 
4085   /* Save RP first.  The calling conventions manual states RP will
4086      always be stored into the caller's frame at sp - 20 or sp - 16
4087      depending on which ABI is in use.  */
4088   if (df_regs_ever_live_p (2) || crtl->calls_eh_return)
4089     {
4090       store_reg (2, TARGET_64BIT ? -16 : -20, STACK_POINTER_REGNUM);
4091       rp_saved = true;
4092     }
4093   else
4094     rp_saved = false;
4095 
4096   /* Allocate the local frame and set up the frame pointer if needed.  */
4097   if (actual_fsize != 0)
4098     {
4099       if (frame_pointer_needed)
4100 	{
4101 	  /* Copy the old frame pointer temporarily into %r1.  Set up the
4102 	     new stack pointer, then store away the saved old frame pointer
4103 	     into the stack at sp and at the same time update the stack
4104 	     pointer by actual_fsize bytes.  Two versions, first
4105 	     handles small (<8k) frames.  The second handles large (>=8k)
4106 	     frames.  */
4107 	  insn = emit_move_insn (tmpreg, hard_frame_pointer_rtx);
4108 	  if (DO_FRAME_NOTES)
4109 	    RTX_FRAME_RELATED_P (insn) = 1;
4110 
4111 	  insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
4112 	  if (DO_FRAME_NOTES)
4113 	    RTX_FRAME_RELATED_P (insn) = 1;
4114 
4115 	  if (VAL_14_BITS_P (actual_fsize))
4116 	    store_reg_modify (STACK_POINTER_REGNUM, 1, actual_fsize);
4117 	  else
4118 	    {
4119 	      /* It is incorrect to store the saved frame pointer at *sp,
4120 		 then increment sp (writes beyond the current stack boundary).
4121 
4122 		 So instead use stwm to store at *sp and post-increment the
4123 		 stack pointer as an atomic operation.  Then increment sp to
4124 		 finish allocating the new frame.  */
4125 	      HOST_WIDE_INT adjust1 = 8192 - 64;
4126 	      HOST_WIDE_INT adjust2 = actual_fsize - adjust1;
4127 
4128 	      store_reg_modify (STACK_POINTER_REGNUM, 1, adjust1);
4129 	      set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
4130 			      adjust2, 1);
4131 	    }
4132 
4133 	  /* We set SAVE_SP in frames that need a frame pointer.  Thus,
4134 	     we need to store the previous stack pointer (frame pointer)
4135 	     into the frame marker on targets that use the HP unwind
4136 	     library.  This allows the HP unwind library to be used to
4137 	     unwind GCC frames.  However, we are not fully compatible
4138 	     with the HP library because our frame layout differs from
4139 	     that specified in the HP runtime specification.
4140 
4141 	     We don't want a frame note on this instruction as the frame
4142 	     marker moves during dynamic stack allocation.
4143 
4144 	     This instruction also serves as a blockage to prevent
4145 	     register spills from being scheduled before the stack
4146 	     pointer is raised.  This is necessary as we store
4147 	     registers using the frame pointer as a base register,
4148 	     and the frame pointer is set before sp is raised.  */
4149 	  if (TARGET_HPUX_UNWIND_LIBRARY)
4150 	    {
4151 	      rtx addr = gen_rtx_PLUS (word_mode, stack_pointer_rtx,
4152 				       GEN_INT (TARGET_64BIT ? -8 : -4));
4153 
4154 	      emit_move_insn (gen_rtx_MEM (word_mode, addr),
4155 			      hard_frame_pointer_rtx);
4156 	    }
4157 	  else
4158 	    emit_insn (gen_blockage ());
4159 	}
4160       /* no frame pointer needed.  */
4161       else
4162 	{
4163 	  /* In some cases we can perform the first callee register save
4164 	     and allocating the stack frame at the same time.   If so, just
4165 	     make a note of it and defer allocating the frame until saving
4166 	     the callee registers.  */
4167 	  if (VAL_14_BITS_P (actual_fsize) && local_fsize == 0)
4168 	    merge_sp_adjust_with_store = 1;
4169 	  /* Cannot optimize.  Adjust the stack frame by actual_fsize
4170 	     bytes.  */
4171 	  else
4172 	    set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
4173 			    actual_fsize, 1);
4174 	}
4175     }
4176 
4177   /* Normal register save.
4178 
4179      Do not save the frame pointer in the frame_pointer_needed case.  It
4180      was done earlier.  */
4181   if (frame_pointer_needed)
4182     {
4183       offset = local_fsize;
4184 
4185       /* Saving the EH return data registers in the frame is the simplest
4186 	 way to get the frame unwind information emitted.  We put them
4187 	 just before the general registers.  */
4188       if (DO_FRAME_NOTES && crtl->calls_eh_return)
4189 	{
4190 	  unsigned int i, regno;
4191 
4192 	  for (i = 0; ; ++i)
4193 	    {
4194 	      regno = EH_RETURN_DATA_REGNO (i);
4195 	      if (regno == INVALID_REGNUM)
4196 		break;
4197 
4198 	      store_reg (regno, offset, HARD_FRAME_POINTER_REGNUM);
4199 	      offset += UNITS_PER_WORD;
4200 	    }
4201 	}
4202 
4203       for (i = 18; i >= 4; i--)
4204 	if (df_regs_ever_live_p (i) && !call_used_or_fixed_reg_p (i))
4205 	  {
4206 	    store_reg (i, offset, HARD_FRAME_POINTER_REGNUM);
4207 	    offset += UNITS_PER_WORD;
4208 	    gr_saved++;
4209 	  }
4210       /* Account for %r3 which is saved in a special place.  */
4211       gr_saved++;
4212     }
4213   /* No frame pointer needed.  */
4214   else
4215     {
4216       offset = local_fsize - actual_fsize;
4217 
4218       /* Saving the EH return data registers in the frame is the simplest
4219          way to get the frame unwind information emitted.  */
4220       if (DO_FRAME_NOTES && crtl->calls_eh_return)
4221 	{
4222 	  unsigned int i, regno;
4223 
4224 	  for (i = 0; ; ++i)
4225 	    {
4226 	      regno = EH_RETURN_DATA_REGNO (i);
4227 	      if (regno == INVALID_REGNUM)
4228 		break;
4229 
4230 	      /* If merge_sp_adjust_with_store is nonzero, then we can
4231 		 optimize the first save.  */
4232 	      if (merge_sp_adjust_with_store)
4233 		{
4234 		  store_reg_modify (STACK_POINTER_REGNUM, regno, -offset);
4235 		  merge_sp_adjust_with_store = 0;
4236 		}
4237 	      else
4238 		store_reg (regno, offset, STACK_POINTER_REGNUM);
4239 	      offset += UNITS_PER_WORD;
4240 	    }
4241 	}
4242 
4243       for (i = 18; i >= 3; i--)
4244 	if (df_regs_ever_live_p (i) && !call_used_or_fixed_reg_p (i))
4245 	  {
4246 	    /* If merge_sp_adjust_with_store is nonzero, then we can
4247 	       optimize the first GR save.  */
4248 	    if (merge_sp_adjust_with_store)
4249 	      {
4250 		store_reg_modify (STACK_POINTER_REGNUM, i, -offset);
4251 		merge_sp_adjust_with_store = 0;
4252 	      }
4253 	    else
4254 	      store_reg (i, offset, STACK_POINTER_REGNUM);
4255 	    offset += UNITS_PER_WORD;
4256 	    gr_saved++;
4257 	  }
4258 
4259       /* If we wanted to merge the SP adjustment with a GR save, but we never
4260 	 did any GR saves, then just emit the adjustment here.  */
4261       if (merge_sp_adjust_with_store)
4262 	set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
4263 			actual_fsize, 1);
4264     }
4265 
4266   /* The hppa calling conventions say that %r19, the pic offset
4267      register, is saved at sp - 32 (in this function's frame)
4268      when generating PIC code.  FIXME:  What is the correct thing
4269      to do for functions which make no calls and allocate no
4270      frame?  Do we need to allocate a frame, or can we just omit
4271      the save?   For now we'll just omit the save.
4272 
4273      We don't want a note on this insn as the frame marker can
4274      move if there is a dynamic stack allocation.  */
4275   if (flag_pic && actual_fsize != 0 && !TARGET_64BIT)
4276     {
4277       rtx addr = gen_rtx_PLUS (word_mode, stack_pointer_rtx, GEN_INT (-32));
4278 
4279       emit_move_insn (gen_rtx_MEM (word_mode, addr), pic_offset_table_rtx);
4280 
4281     }
4282 
4283   /* Align pointer properly (doubleword boundary).  */
4284   offset = (offset + 7) & ~7;
4285 
4286   /* Floating point register store.  */
4287   if (save_fregs)
4288     {
4289       rtx base;
4290 
4291       /* First get the frame or stack pointer to the start of the FP register
4292 	 save area.  */
4293       if (frame_pointer_needed)
4294 	{
4295 	  set_reg_plus_d (1, HARD_FRAME_POINTER_REGNUM, offset, 0);
4296 	  base = hard_frame_pointer_rtx;
4297 	}
4298       else
4299 	{
4300 	  set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0);
4301 	  base = stack_pointer_rtx;
4302 	}
4303 
4304       /* Now actually save the FP registers.  */
4305       for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
4306 	{
4307 	  if (df_regs_ever_live_p (i)
4308 	      || (! TARGET_64BIT && df_regs_ever_live_p (i + 1)))
4309 	    {
4310 	      rtx addr, reg;
4311 	      rtx_insn *insn;
4312 	      addr = gen_rtx_MEM (DFmode,
4313 				  gen_rtx_POST_INC (word_mode, tmpreg));
4314 	      reg = gen_rtx_REG (DFmode, i);
4315 	      insn = emit_move_insn (addr, reg);
4316 	      if (DO_FRAME_NOTES)
4317 		{
4318 		  RTX_FRAME_RELATED_P (insn) = 1;
4319 		  if (TARGET_64BIT)
4320 		    {
4321 		      rtx mem = gen_rtx_MEM (DFmode,
4322 					     plus_constant (Pmode, base,
4323 							    offset));
4324 		      add_reg_note (insn, REG_FRAME_RELATED_EXPR,
4325 				    gen_rtx_SET (mem, reg));
4326 		    }
4327 		  else
4328 		    {
4329 		      rtx meml = gen_rtx_MEM (SFmode,
4330 					      plus_constant (Pmode, base,
4331 							     offset));
4332 		      rtx memr = gen_rtx_MEM (SFmode,
4333 					      plus_constant (Pmode, base,
4334 							     offset + 4));
4335 		      rtx regl = gen_rtx_REG (SFmode, i);
4336 		      rtx regr = gen_rtx_REG (SFmode, i + 1);
4337 		      rtx setl = gen_rtx_SET (meml, regl);
4338 		      rtx setr = gen_rtx_SET (memr, regr);
4339 		      rtvec vec;
4340 
4341 		      RTX_FRAME_RELATED_P (setl) = 1;
4342 		      RTX_FRAME_RELATED_P (setr) = 1;
4343 		      vec = gen_rtvec (2, setl, setr);
4344 		      add_reg_note (insn, REG_FRAME_RELATED_EXPR,
4345 				    gen_rtx_SEQUENCE (VOIDmode, vec));
4346 		    }
4347 		}
4348 	      offset += GET_MODE_SIZE (DFmode);
4349 	      fr_saved++;
4350 	    }
4351 	}
4352     }
4353 }
4354 
4355 /* Emit RTL to load REG from the memory location specified by BASE+DISP.
4356    Handle case where DISP > 8k by using the add_high_const patterns.  */
4357 
4358 static void
load_reg(int reg,HOST_WIDE_INT disp,int base)4359 load_reg (int reg, HOST_WIDE_INT disp, int base)
4360 {
4361   rtx dest = gen_rtx_REG (word_mode, reg);
4362   rtx basereg = gen_rtx_REG (Pmode, base);
4363   rtx src;
4364 
4365   if (VAL_14_BITS_P (disp))
4366     src = gen_rtx_MEM (word_mode, plus_constant (Pmode, basereg, disp));
4367   else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
4368     {
4369       rtx delta = GEN_INT (disp);
4370       rtx tmpreg = gen_rtx_REG (Pmode, 1);
4371 
4372       emit_move_insn (tmpreg, delta);
4373       if (TARGET_DISABLE_INDEXING)
4374 	{
4375 	  emit_move_insn (tmpreg, gen_rtx_PLUS (Pmode, tmpreg, basereg));
4376 	  src = gen_rtx_MEM (word_mode, tmpreg);
4377 	}
4378       else
4379 	src = gen_rtx_MEM (word_mode, gen_rtx_PLUS (Pmode, tmpreg, basereg));
4380     }
4381   else
4382     {
4383       rtx delta = GEN_INT (disp);
4384       rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta));
4385       rtx tmpreg = gen_rtx_REG (Pmode, 1);
4386 
4387       emit_move_insn (tmpreg, high);
4388       src = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta));
4389     }
4390 
4391   emit_move_insn (dest, src);
4392 }
4393 
4394 /* Update the total code bytes output to the text section.  */
4395 
4396 static void
update_total_code_bytes(unsigned int nbytes)4397 update_total_code_bytes (unsigned int nbytes)
4398 {
4399   if ((TARGET_PORTABLE_RUNTIME || !TARGET_GAS || !TARGET_SOM)
4400       && !IN_NAMED_SECTION_P (cfun->decl))
4401     {
4402       unsigned int old_total = total_code_bytes;
4403 
4404       total_code_bytes += nbytes;
4405 
4406       /* Be prepared to handle overflows.  */
4407       if (old_total > total_code_bytes)
4408         total_code_bytes = UINT_MAX;
4409     }
4410 }
4411 
4412 /* This function generates the assembly code for function exit.
4413    Args are as for output_function_prologue ().
4414 
4415    The function epilogue should not depend on the current stack
4416    pointer!  It should use the frame pointer only.  This is mandatory
4417    because of alloca; we also take advantage of it to omit stack
4418    adjustments before returning.  */
4419 
4420 static void
pa_output_function_epilogue(FILE * file)4421 pa_output_function_epilogue (FILE *file)
4422 {
4423   rtx_insn *insn = get_last_insn ();
4424   bool extra_nop;
4425 
4426   /* pa_expand_epilogue does the dirty work now.  We just need
4427      to output the assembler directives which denote the end
4428      of a function.
4429 
4430      To make debuggers happy, emit a nop if the epilogue was completely
4431      eliminated due to a volatile call as the last insn in the
4432      current function.  That way the return address (in %r2) will
4433      always point to a valid instruction in the current function.  */
4434 
4435   /* Get the last real insn.  */
4436   if (NOTE_P (insn))
4437     insn = prev_real_insn (insn);
4438 
4439   /* If it is a sequence, then look inside.  */
4440   if (insn && NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == SEQUENCE)
4441     insn = as_a <rtx_sequence *> (PATTERN (insn))-> insn (0);
4442 
4443   /* If insn is a CALL_INSN, then it must be a call to a volatile
4444      function (otherwise there would be epilogue insns).  */
4445   if (insn && CALL_P (insn))
4446     {
4447       fputs ("\tnop\n", file);
4448       extra_nop = true;
4449     }
4450   else
4451     extra_nop = false;
4452 
4453   fputs ("\t.EXIT\n\t.PROCEND\n", file);
4454 
4455   if (TARGET_SOM && TARGET_GAS)
4456     {
4457       /* We are done with this subspace except possibly for some additional
4458 	 debug information.  Forget that we are in this subspace to ensure
4459 	 that the next function is output in its own subspace.  */
4460       in_section = NULL;
4461       cfun->machine->in_nsubspa = 2;
4462     }
4463 
4464   /* Thunks do their own insn accounting.  */
4465   if (cfun->is_thunk)
4466     return;
4467 
4468   if (INSN_ADDRESSES_SET_P ())
4469     {
4470       last_address = extra_nop ? 4 : 0;
4471       insn = get_last_nonnote_insn ();
4472       if (insn)
4473 	{
4474 	  last_address += INSN_ADDRESSES (INSN_UID (insn));
4475 	  if (INSN_P (insn))
4476 	    last_address += insn_default_length (insn);
4477 	}
4478       last_address = ((last_address + FUNCTION_BOUNDARY / BITS_PER_UNIT - 1)
4479 		      & ~(FUNCTION_BOUNDARY / BITS_PER_UNIT - 1));
4480     }
4481   else
4482     last_address = UINT_MAX;
4483 
4484   /* Finally, update the total number of code bytes output so far.  */
4485   update_total_code_bytes (last_address);
4486 }
4487 
4488 void
pa_expand_epilogue(void)4489 pa_expand_epilogue (void)
4490 {
4491   rtx tmpreg;
4492   HOST_WIDE_INT offset;
4493   HOST_WIDE_INT ret_off = 0;
4494   int i;
4495   int merge_sp_adjust_with_load = 0;
4496 
4497   /* We will use this often.  */
4498   tmpreg = gen_rtx_REG (word_mode, 1);
4499 
4500   /* Try to restore RP early to avoid load/use interlocks when
4501      RP gets used in the return (bv) instruction.  This appears to still
4502      be necessary even when we schedule the prologue and epilogue.  */
4503   if (rp_saved)
4504     {
4505       ret_off = TARGET_64BIT ? -16 : -20;
4506       if (frame_pointer_needed)
4507 	{
4508 	  load_reg (2, ret_off, HARD_FRAME_POINTER_REGNUM);
4509 	  ret_off = 0;
4510 	}
4511       else
4512 	{
4513 	  /* No frame pointer, and stack is smaller than 8k.  */
4514 	  if (VAL_14_BITS_P (ret_off - actual_fsize))
4515 	    {
4516 	      load_reg (2, ret_off - actual_fsize, STACK_POINTER_REGNUM);
4517 	      ret_off = 0;
4518 	    }
4519 	}
4520     }
4521 
4522   /* General register restores.  */
4523   if (frame_pointer_needed)
4524     {
4525       offset = local_fsize;
4526 
4527       /* If the current function calls __builtin_eh_return, then we need
4528          to restore the saved EH data registers.  */
4529       if (DO_FRAME_NOTES && crtl->calls_eh_return)
4530 	{
4531 	  unsigned int i, regno;
4532 
4533 	  for (i = 0; ; ++i)
4534 	    {
4535 	      regno = EH_RETURN_DATA_REGNO (i);
4536 	      if (regno == INVALID_REGNUM)
4537 		break;
4538 
4539 	      load_reg (regno, offset, HARD_FRAME_POINTER_REGNUM);
4540 	      offset += UNITS_PER_WORD;
4541 	    }
4542 	}
4543 
4544       for (i = 18; i >= 4; i--)
4545 	if (df_regs_ever_live_p (i) && !call_used_or_fixed_reg_p (i))
4546 	  {
4547 	    load_reg (i, offset, HARD_FRAME_POINTER_REGNUM);
4548 	    offset += UNITS_PER_WORD;
4549 	  }
4550     }
4551   else
4552     {
4553       offset = local_fsize - actual_fsize;
4554 
4555       /* If the current function calls __builtin_eh_return, then we need
4556          to restore the saved EH data registers.  */
4557       if (DO_FRAME_NOTES && crtl->calls_eh_return)
4558 	{
4559 	  unsigned int i, regno;
4560 
4561 	  for (i = 0; ; ++i)
4562 	    {
4563 	      regno = EH_RETURN_DATA_REGNO (i);
4564 	      if (regno == INVALID_REGNUM)
4565 		break;
4566 
4567 	      /* Only for the first load.
4568 	         merge_sp_adjust_with_load holds the register load
4569 	         with which we will merge the sp adjustment.  */
4570 	      if (merge_sp_adjust_with_load == 0
4571 		  && local_fsize == 0
4572 		  && VAL_14_BITS_P (-actual_fsize))
4573 	        merge_sp_adjust_with_load = regno;
4574 	      else
4575 		load_reg (regno, offset, STACK_POINTER_REGNUM);
4576 	      offset += UNITS_PER_WORD;
4577 	    }
4578 	}
4579 
4580       for (i = 18; i >= 3; i--)
4581 	{
4582 	  if (df_regs_ever_live_p (i) && !call_used_or_fixed_reg_p (i))
4583 	    {
4584 	      /* Only for the first load.
4585 	         merge_sp_adjust_with_load holds the register load
4586 	         with which we will merge the sp adjustment.  */
4587 	      if (merge_sp_adjust_with_load == 0
4588 		  && local_fsize == 0
4589 		  && VAL_14_BITS_P (-actual_fsize))
4590 	        merge_sp_adjust_with_load = i;
4591 	      else
4592 		load_reg (i, offset, STACK_POINTER_REGNUM);
4593 	      offset += UNITS_PER_WORD;
4594 	    }
4595 	}
4596     }
4597 
4598   /* Align pointer properly (doubleword boundary).  */
4599   offset = (offset + 7) & ~7;
4600 
4601   /* FP register restores.  */
4602   if (save_fregs)
4603     {
4604       /* Adjust the register to index off of.  */
4605       if (frame_pointer_needed)
4606 	set_reg_plus_d (1, HARD_FRAME_POINTER_REGNUM, offset, 0);
4607       else
4608 	set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0);
4609 
4610       /* Actually do the restores now.  */
4611       for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
4612 	if (df_regs_ever_live_p (i)
4613 	    || (! TARGET_64BIT && df_regs_ever_live_p (i + 1)))
4614 	  {
4615 	    rtx src = gen_rtx_MEM (DFmode,
4616 				   gen_rtx_POST_INC (word_mode, tmpreg));
4617 	    rtx dest = gen_rtx_REG (DFmode, i);
4618 	    emit_move_insn (dest, src);
4619 	  }
4620     }
4621 
4622   /* Emit a blockage insn here to keep these insns from being moved to
4623      an earlier spot in the epilogue, or into the main instruction stream.
4624 
4625      This is necessary as we must not cut the stack back before all the
4626      restores are finished.  */
4627   emit_insn (gen_blockage ());
4628 
4629   /* Reset stack pointer (and possibly frame pointer).  The stack
4630      pointer is initially set to fp + 64 to avoid a race condition.  */
4631   if (frame_pointer_needed)
4632     {
4633       rtx delta = GEN_INT (-64);
4634 
4635       set_reg_plus_d (STACK_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM, 64, 0);
4636       emit_insn (gen_pre_load (hard_frame_pointer_rtx,
4637 			       stack_pointer_rtx, delta));
4638     }
4639   /* If we were deferring a callee register restore, do it now.  */
4640   else if (merge_sp_adjust_with_load)
4641     {
4642       rtx delta = GEN_INT (-actual_fsize);
4643       rtx dest = gen_rtx_REG (word_mode, merge_sp_adjust_with_load);
4644 
4645       emit_insn (gen_pre_load (dest, stack_pointer_rtx, delta));
4646     }
4647   else if (actual_fsize != 0)
4648     set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
4649 		    - actual_fsize, 0);
4650 
4651   /* If we haven't restored %r2 yet (no frame pointer, and a stack
4652      frame greater than 8k), do so now.  */
4653   if (ret_off != 0)
4654     load_reg (2, ret_off, STACK_POINTER_REGNUM);
4655 
4656   if (DO_FRAME_NOTES && crtl->calls_eh_return)
4657     {
4658       rtx sa = EH_RETURN_STACKADJ_RTX;
4659 
4660       emit_insn (gen_blockage ());
4661       emit_insn (TARGET_64BIT
4662 		 ? gen_subdi3 (stack_pointer_rtx, stack_pointer_rtx, sa)
4663 		 : gen_subsi3 (stack_pointer_rtx, stack_pointer_rtx, sa));
4664     }
4665 }
4666 
4667 bool
pa_can_use_return_insn(void)4668 pa_can_use_return_insn (void)
4669 {
4670   if (!reload_completed)
4671     return false;
4672 
4673   if (frame_pointer_needed)
4674     return false;
4675 
4676   if (df_regs_ever_live_p (2))
4677     return false;
4678 
4679   if (crtl->profile)
4680     return false;
4681 
4682   return pa_compute_frame_size (get_frame_size (), 0) == 0;
4683 }
4684 
4685 rtx
hppa_pic_save_rtx(void)4686 hppa_pic_save_rtx (void)
4687 {
4688   return get_hard_reg_initial_val (word_mode, PIC_OFFSET_TABLE_REGNUM);
4689 }
4690 
4691 #ifndef NO_DEFERRED_PROFILE_COUNTERS
4692 #define NO_DEFERRED_PROFILE_COUNTERS 0
4693 #endif
4694 
4695 
4696 /* Vector of funcdef numbers.  */
4697 static vec<int> funcdef_nos;
4698 
4699 /* Output deferred profile counters.  */
4700 static void
output_deferred_profile_counters(void)4701 output_deferred_profile_counters (void)
4702 {
4703   unsigned int i;
4704   int align, n;
4705 
4706   if (funcdef_nos.is_empty ())
4707    return;
4708 
4709   switch_to_section (data_section);
4710   align = MIN (BIGGEST_ALIGNMENT, LONG_TYPE_SIZE);
4711   ASM_OUTPUT_ALIGN (asm_out_file, floor_log2 (align / BITS_PER_UNIT));
4712 
4713   for (i = 0; funcdef_nos.iterate (i, &n); i++)
4714     {
4715       targetm.asm_out.internal_label (asm_out_file, "LP", n);
4716       assemble_integer (const0_rtx, LONG_TYPE_SIZE / BITS_PER_UNIT, align, 1);
4717     }
4718 
4719   funcdef_nos.release ();
4720 }
4721 
4722 void
hppa_profile_hook(int label_no)4723 hppa_profile_hook (int label_no)
4724 {
4725   rtx_code_label *label_rtx = gen_label_rtx ();
4726   int reg_parm_stack_space = REG_PARM_STACK_SPACE (NULL_TREE);
4727   rtx arg_bytes, begin_label_rtx, mcount, sym;
4728   rtx_insn *call_insn;
4729   char begin_label_name[16];
4730   bool use_mcount_pcrel_call;
4731 
4732   /* Set up call destination.  */
4733   sym = gen_rtx_SYMBOL_REF (Pmode, "_mcount");
4734   pa_encode_label (sym);
4735   mcount = gen_rtx_MEM (Pmode, sym);
4736 
4737   /* If we can reach _mcount with a pc-relative call, we can optimize
4738      loading the address of the current function.  This requires linker
4739      long branch stub support.  */
4740   if (!TARGET_PORTABLE_RUNTIME
4741       && !TARGET_LONG_CALLS
4742       && (TARGET_SOM || flag_function_sections))
4743     use_mcount_pcrel_call = TRUE;
4744   else
4745     use_mcount_pcrel_call = FALSE;
4746 
4747   ASM_GENERATE_INTERNAL_LABEL (begin_label_name, FUNC_BEGIN_PROLOG_LABEL,
4748 			       label_no);
4749   begin_label_rtx = gen_rtx_SYMBOL_REF (SImode, ggc_strdup (begin_label_name));
4750 
4751   emit_move_insn (gen_rtx_REG (word_mode, 26), gen_rtx_REG (word_mode, 2));
4752 
4753   if (!use_mcount_pcrel_call)
4754     {
4755       /* The address of the function is loaded into %r25 with an instruction-
4756 	 relative sequence that avoids the use of relocations.  We use SImode
4757 	 for the address of the function in both 32 and 64-bit code to avoid
4758 	 having to provide DImode versions of the lcla2 pattern.  */
4759       if (TARGET_PA_20)
4760 	emit_insn (gen_lcla2 (gen_rtx_REG (SImode, 25), label_rtx));
4761       else
4762 	emit_insn (gen_lcla1 (gen_rtx_REG (SImode, 25), label_rtx));
4763     }
4764 
4765   if (!NO_DEFERRED_PROFILE_COUNTERS)
4766     {
4767       rtx count_label_rtx, addr, r24;
4768       char count_label_name[16];
4769 
4770       funcdef_nos.safe_push (label_no);
4771       ASM_GENERATE_INTERNAL_LABEL (count_label_name, "LP", label_no);
4772       count_label_rtx = gen_rtx_SYMBOL_REF (Pmode,
4773 					    ggc_strdup (count_label_name));
4774 
4775       addr = force_reg (Pmode, count_label_rtx);
4776       r24 = gen_rtx_REG (Pmode, 24);
4777       emit_move_insn (r24, addr);
4778 
4779       arg_bytes = GEN_INT (TARGET_64BIT ? 24 : 12);
4780       if (use_mcount_pcrel_call)
4781 	call_insn = emit_call_insn (gen_call_mcount (mcount, arg_bytes,
4782 						     begin_label_rtx));
4783       else
4784 	call_insn = emit_call_insn (gen_call (mcount, arg_bytes));
4785 
4786       use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), r24);
4787     }
4788   else
4789     {
4790       arg_bytes = GEN_INT (TARGET_64BIT ? 16 : 8);
4791       if (use_mcount_pcrel_call)
4792 	call_insn = emit_call_insn (gen_call_mcount (mcount, arg_bytes,
4793 						     begin_label_rtx));
4794       else
4795 	call_insn = emit_call_insn (gen_call (mcount, arg_bytes));
4796     }
4797 
4798   use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), gen_rtx_REG (SImode, 25));
4799   use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), gen_rtx_REG (SImode, 26));
4800 
4801   /* Indicate the _mcount call cannot throw, nor will it execute a
4802      non-local goto.  */
4803   make_reg_eh_region_note_nothrow_nononlocal (call_insn);
4804 
4805   /* Allocate space for fixed arguments.  */
4806   if (reg_parm_stack_space > crtl->outgoing_args_size)
4807     crtl->outgoing_args_size = reg_parm_stack_space;
4808 }
4809 
4810 /* Fetch the return address for the frame COUNT steps up from
4811    the current frame, after the prologue.  FRAMEADDR is the
4812    frame pointer of the COUNT frame.
4813 
4814    We want to ignore any export stub remnants here.  To handle this,
4815    we examine the code at the return address, and if it is an export
4816    stub, we return a memory rtx for the stub return address stored
4817    at frame-24.
4818 
4819    The value returned is used in two different ways:
4820 
4821 	1. To find a function's caller.
4822 
4823 	2. To change the return address for a function.
4824 
4825    This function handles most instances of case 1; however, it will
4826    fail if there are two levels of stubs to execute on the return
4827    path.  The only way I believe that can happen is if the return value
4828    needs a parameter relocation, which never happens for C code.
4829 
4830    This function handles most instances of case 2; however, it will
4831    fail if we did not originally have stub code on the return path
4832    but will need stub code on the new return path.  This can happen if
4833    the caller & callee are both in the main program, but the new
4834    return location is in a shared library.  */
4835 
4836 rtx
pa_return_addr_rtx(int count,rtx frameaddr)4837 pa_return_addr_rtx (int count, rtx frameaddr)
4838 {
4839   rtx label;
4840   rtx rp;
4841   rtx saved_rp;
4842   rtx ins;
4843 
4844   /* The instruction stream at the return address of a PA1.X export stub is:
4845 
4846 	0x4bc23fd1 | stub+8:   ldw -18(sr0,sp),rp
4847 	0x004010a1 | stub+12:  ldsid (sr0,rp),r1
4848 	0x00011820 | stub+16:  mtsp r1,sr0
4849 	0xe0400002 | stub+20:  be,n 0(sr0,rp)
4850 
4851      0xe0400002 must be specified as -532676606 so that it won't be
4852      rejected as an invalid immediate operand on 64-bit hosts.
4853 
4854      The instruction stream at the return address of a PA2.0 export stub is:
4855 
4856 	0x4bc23fd1 | stub+8:   ldw -18(sr0,sp),rp
4857 	0xe840d002 | stub+12:  bve,n (rp)
4858   */
4859 
4860   HOST_WIDE_INT insns[4];
4861   int i, len;
4862 
4863   if (count != 0)
4864     return NULL_RTX;
4865 
4866   rp = get_hard_reg_initial_val (Pmode, 2);
4867 
4868   if (TARGET_64BIT || TARGET_NO_SPACE_REGS)
4869     return rp;
4870 
4871   /* If there is no export stub then just use the value saved from
4872      the return pointer register.  */
4873 
4874   saved_rp = gen_reg_rtx (Pmode);
4875   emit_move_insn (saved_rp, rp);
4876 
4877   /* Get pointer to the instruction stream.  We have to mask out the
4878      privilege level from the two low order bits of the return address
4879      pointer here so that ins will point to the start of the first
4880      instruction that would have been executed if we returned.  */
4881   ins = copy_to_reg (gen_rtx_AND (Pmode, rp, MASK_RETURN_ADDR));
4882   label = gen_label_rtx ();
4883 
4884   if (TARGET_PA_20)
4885     {
4886       insns[0] = 0x4bc23fd1;
4887       insns[1] = -398405630;
4888       len = 2;
4889     }
4890   else
4891     {
4892       insns[0] = 0x4bc23fd1;
4893       insns[1] = 0x004010a1;
4894       insns[2] = 0x00011820;
4895       insns[3] = -532676606;
4896       len = 4;
4897     }
4898 
4899   /* Check the instruction stream at the normal return address for the
4900      export stub.  If it is an export stub, than our return address is
4901      really in -24[frameaddr].  */
4902 
4903   for (i = 0; i < len; i++)
4904     {
4905       rtx op0 = gen_rtx_MEM (SImode, plus_constant (Pmode, ins, i * 4));
4906       rtx op1 = GEN_INT (insns[i]);
4907       emit_cmp_and_jump_insns (op0, op1, NE, NULL, SImode, 0, label);
4908     }
4909 
4910   /* Here we know that our return address points to an export
4911      stub.  We don't want to return the address of the export stub,
4912      but rather the return address of the export stub.  That return
4913      address is stored at -24[frameaddr].  */
4914 
4915   emit_move_insn (saved_rp,
4916 		  gen_rtx_MEM (Pmode,
4917 			       memory_address (Pmode,
4918 					       plus_constant (Pmode, frameaddr,
4919 							      -24))));
4920 
4921   emit_label (label);
4922 
4923   return saved_rp;
4924 }
4925 
4926 void
pa_emit_bcond_fp(rtx operands[])4927 pa_emit_bcond_fp (rtx operands[])
4928 {
4929   enum rtx_code code = GET_CODE (operands[0]);
4930   rtx operand0 = operands[1];
4931   rtx operand1 = operands[2];
4932   rtx label = operands[3];
4933 
4934   emit_insn (gen_rtx_SET (gen_rtx_REG (CCFPmode, 0),
4935 		          gen_rtx_fmt_ee (code, CCFPmode, operand0, operand1)));
4936 
4937   emit_jump_insn (gen_rtx_SET (pc_rtx,
4938 			       gen_rtx_IF_THEN_ELSE (VOIDmode,
4939 						     gen_rtx_fmt_ee (NE,
4940 							      VOIDmode,
4941 							      gen_rtx_REG (CCFPmode, 0),
4942 							      const0_rtx),
4943 						     gen_rtx_LABEL_REF (VOIDmode, label),
4944 						     pc_rtx)));
4945 
4946 }
4947 
4948 /* Adjust the cost of a scheduling dependency.  Return the new cost of
4949    a dependency LINK or INSN on DEP_INSN.  COST is the current cost.  */
4950 
4951 static int
pa_adjust_cost(rtx_insn * insn,int dep_type,rtx_insn * dep_insn,int cost,unsigned int)4952 pa_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost,
4953 		unsigned int)
4954 {
4955   enum attr_type attr_type;
4956 
4957   /* Don't adjust costs for a pa8000 chip, also do not adjust any
4958      true dependencies as they are described with bypasses now.  */
4959   if (pa_cpu >= PROCESSOR_8000 || dep_type == 0)
4960     return cost;
4961 
4962   if (! recog_memoized (insn))
4963     return 0;
4964 
4965   attr_type = get_attr_type (insn);
4966 
4967   switch (dep_type)
4968     {
4969     case REG_DEP_ANTI:
4970       /* Anti dependency; DEP_INSN reads a register that INSN writes some
4971 	 cycles later.  */
4972 
4973       if (attr_type == TYPE_FPLOAD)
4974 	{
4975 	  rtx pat = PATTERN (insn);
4976 	  rtx dep_pat = PATTERN (dep_insn);
4977 	  if (GET_CODE (pat) == PARALLEL)
4978 	    {
4979 	      /* This happens for the fldXs,mb patterns.  */
4980 	      pat = XVECEXP (pat, 0, 0);
4981 	    }
4982 	  if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4983 	    /* If this happens, we have to extend this to schedule
4984 	       optimally.  Return 0 for now.  */
4985 	  return 0;
4986 
4987 	  if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
4988 	    {
4989 	      if (! recog_memoized (dep_insn))
4990 		return 0;
4991 	      switch (get_attr_type (dep_insn))
4992 		{
4993 		case TYPE_FPALU:
4994 		case TYPE_FPMULSGL:
4995 		case TYPE_FPMULDBL:
4996 		case TYPE_FPDIVSGL:
4997 		case TYPE_FPDIVDBL:
4998 		case TYPE_FPSQRTSGL:
4999 		case TYPE_FPSQRTDBL:
5000 		  /* A fpload can't be issued until one cycle before a
5001 		     preceding arithmetic operation has finished if
5002 		     the target of the fpload is any of the sources
5003 		     (or destination) of the arithmetic operation.  */
5004 		  return insn_default_latency (dep_insn) - 1;
5005 
5006 		default:
5007 		  return 0;
5008 		}
5009 	    }
5010 	}
5011       else if (attr_type == TYPE_FPALU)
5012 	{
5013 	  rtx pat = PATTERN (insn);
5014 	  rtx dep_pat = PATTERN (dep_insn);
5015 	  if (GET_CODE (pat) == PARALLEL)
5016 	    {
5017 	      /* This happens for the fldXs,mb patterns.  */
5018 	      pat = XVECEXP (pat, 0, 0);
5019 	    }
5020 	  if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
5021 	    /* If this happens, we have to extend this to schedule
5022 	       optimally.  Return 0 for now.  */
5023 	  return 0;
5024 
5025 	  if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
5026 	    {
5027 	      if (! recog_memoized (dep_insn))
5028 		return 0;
5029 	      switch (get_attr_type (dep_insn))
5030 		{
5031 		case TYPE_FPDIVSGL:
5032 		case TYPE_FPDIVDBL:
5033 		case TYPE_FPSQRTSGL:
5034 		case TYPE_FPSQRTDBL:
5035 		  /* An ALU flop can't be issued until two cycles before a
5036 		     preceding divide or sqrt operation has finished if
5037 		     the target of the ALU flop is any of the sources
5038 		     (or destination) of the divide or sqrt operation.  */
5039 		  return insn_default_latency (dep_insn) - 2;
5040 
5041 		default:
5042 		  return 0;
5043 		}
5044 	    }
5045 	}
5046 
5047       /* For other anti dependencies, the cost is 0.  */
5048       return 0;
5049 
5050     case REG_DEP_OUTPUT:
5051       /* Output dependency; DEP_INSN writes a register that INSN writes some
5052 	 cycles later.  */
5053       if (attr_type == TYPE_FPLOAD)
5054 	{
5055 	  rtx pat = PATTERN (insn);
5056 	  rtx dep_pat = PATTERN (dep_insn);
5057 	  if (GET_CODE (pat) == PARALLEL)
5058 	    {
5059 	      /* This happens for the fldXs,mb patterns.  */
5060 	      pat = XVECEXP (pat, 0, 0);
5061 	    }
5062 	  if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
5063 	    /* If this happens, we have to extend this to schedule
5064 	       optimally.  Return 0 for now.  */
5065 	  return 0;
5066 
5067 	  if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
5068 	    {
5069 	      if (! recog_memoized (dep_insn))
5070 		return 0;
5071 	      switch (get_attr_type (dep_insn))
5072 		{
5073 		case TYPE_FPALU:
5074 		case TYPE_FPMULSGL:
5075 		case TYPE_FPMULDBL:
5076 		case TYPE_FPDIVSGL:
5077 		case TYPE_FPDIVDBL:
5078 		case TYPE_FPSQRTSGL:
5079 		case TYPE_FPSQRTDBL:
5080 		  /* A fpload can't be issued until one cycle before a
5081 		     preceding arithmetic operation has finished if
5082 		     the target of the fpload is the destination of the
5083 		     arithmetic operation.
5084 
5085 		     Exception: For PA7100LC, PA7200 and PA7300, the cost
5086 		     is 3 cycles, unless they bundle together.   We also
5087 		     pay the penalty if the second insn is a fpload.  */
5088 		  return insn_default_latency (dep_insn) - 1;
5089 
5090 		default:
5091 		  return 0;
5092 		}
5093 	    }
5094 	}
5095       else if (attr_type == TYPE_FPALU)
5096 	{
5097 	  rtx pat = PATTERN (insn);
5098 	  rtx dep_pat = PATTERN (dep_insn);
5099 	  if (GET_CODE (pat) == PARALLEL)
5100 	    {
5101 	      /* This happens for the fldXs,mb patterns.  */
5102 	      pat = XVECEXP (pat, 0, 0);
5103 	    }
5104 	  if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
5105 	    /* If this happens, we have to extend this to schedule
5106 	       optimally.  Return 0 for now.  */
5107 	  return 0;
5108 
5109 	  if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
5110 	    {
5111 	      if (! recog_memoized (dep_insn))
5112 		return 0;
5113 	      switch (get_attr_type (dep_insn))
5114 		{
5115 		case TYPE_FPDIVSGL:
5116 		case TYPE_FPDIVDBL:
5117 		case TYPE_FPSQRTSGL:
5118 		case TYPE_FPSQRTDBL:
5119 		  /* An ALU flop can't be issued until two cycles before a
5120 		     preceding divide or sqrt operation has finished if
5121 		     the target of the ALU flop is also the target of
5122 		     the divide or sqrt operation.  */
5123 		  return insn_default_latency (dep_insn) - 2;
5124 
5125 		default:
5126 		  return 0;
5127 		}
5128 	    }
5129 	}
5130 
5131       /* For other output dependencies, the cost is 0.  */
5132       return 0;
5133 
5134     default:
5135       gcc_unreachable ();
5136     }
5137 }
5138 
5139 /* The 700 can only issue a single insn at a time.
5140    The 7XXX processors can issue two insns at a time.
5141    The 8000 can issue 4 insns at a time.  */
5142 static int
pa_issue_rate(void)5143 pa_issue_rate (void)
5144 {
5145   switch (pa_cpu)
5146     {
5147     case PROCESSOR_700:		return 1;
5148     case PROCESSOR_7100:	return 2;
5149     case PROCESSOR_7100LC:	return 2;
5150     case PROCESSOR_7200:	return 2;
5151     case PROCESSOR_7300:	return 2;
5152     case PROCESSOR_8000:	return 4;
5153 
5154     default:
5155       gcc_unreachable ();
5156     }
5157 }
5158 
5159 
5160 
5161 /* Return any length plus adjustment needed by INSN which already has
5162    its length computed as LENGTH.   Return LENGTH if no adjustment is
5163    necessary.
5164 
5165    Also compute the length of an inline block move here as it is too
5166    complicated to express as a length attribute in pa.md.  */
5167 int
pa_adjust_insn_length(rtx_insn * insn,int length)5168 pa_adjust_insn_length (rtx_insn *insn, int length)
5169 {
5170   rtx pat = PATTERN (insn);
5171 
5172   /* If length is negative or undefined, provide initial length.  */
5173   if ((unsigned int) length >= INT_MAX)
5174     {
5175       if (GET_CODE (pat) == SEQUENCE)
5176 	insn = as_a <rtx_insn *> (XVECEXP (pat, 0, 0));
5177 
5178       switch (get_attr_type (insn))
5179 	{
5180 	case TYPE_MILLI:
5181 	  length = pa_attr_length_millicode_call (insn);
5182 	  break;
5183 	case TYPE_CALL:
5184 	  length = pa_attr_length_call (insn, 0);
5185 	  break;
5186 	case TYPE_SIBCALL:
5187 	  length = pa_attr_length_call (insn, 1);
5188 	  break;
5189 	case TYPE_DYNCALL:
5190 	  length = pa_attr_length_indirect_call (insn);
5191 	  break;
5192 	case TYPE_SH_FUNC_ADRS:
5193 	  length = pa_attr_length_millicode_call (insn) + 20;
5194 	  break;
5195 	default:
5196 	  gcc_unreachable ();
5197 	}
5198     }
5199 
5200   /* Block move pattern.  */
5201   if (NONJUMP_INSN_P (insn)
5202       && GET_CODE (pat) == PARALLEL
5203       && GET_CODE (XVECEXP (pat, 0, 0)) == SET
5204       && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM
5205       && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 1)) == MEM
5206       && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode
5207       && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 1)) == BLKmode)
5208     length += compute_cpymem_length (insn) - 4;
5209   /* Block clear pattern.  */
5210   else if (NONJUMP_INSN_P (insn)
5211 	   && GET_CODE (pat) == PARALLEL
5212 	   && GET_CODE (XVECEXP (pat, 0, 0)) == SET
5213 	   && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM
5214 	   && XEXP (XVECEXP (pat, 0, 0), 1) == const0_rtx
5215 	   && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode)
5216     length += compute_clrmem_length (insn) - 4;
5217   /* Conditional branch with an unfilled delay slot.  */
5218   else if (JUMP_P (insn) && ! simplejump_p (insn))
5219     {
5220       /* Adjust a short backwards conditional with an unfilled delay slot.  */
5221       if (GET_CODE (pat) == SET
5222 	  && length == 4
5223 	  && JUMP_LABEL (insn) != NULL_RTX
5224 	  && ! forward_branch_p (insn))
5225 	length += 4;
5226       else if (GET_CODE (pat) == PARALLEL
5227 	       && get_attr_type (insn) == TYPE_PARALLEL_BRANCH
5228 	       && length == 4)
5229 	length += 4;
5230       /* Adjust dbra insn with short backwards conditional branch with
5231 	 unfilled delay slot -- only for case where counter is in a
5232 	 general register register.  */
5233       else if (GET_CODE (pat) == PARALLEL
5234 	       && GET_CODE (XVECEXP (pat, 0, 1)) == SET
5235 	       && GET_CODE (XEXP (XVECEXP (pat, 0, 1), 0)) == REG
5236  	       && ! FP_REG_P (XEXP (XVECEXP (pat, 0, 1), 0))
5237 	       && length == 4
5238 	       && ! forward_branch_p (insn))
5239 	length += 4;
5240     }
5241   return length;
5242 }
5243 
5244 /* Implement the TARGET_PRINT_OPERAND_PUNCT_VALID_P hook.  */
5245 
5246 static bool
pa_print_operand_punct_valid_p(unsigned char code)5247 pa_print_operand_punct_valid_p (unsigned char code)
5248 {
5249   if (code == '@'
5250       || code == '#'
5251       || code == '*'
5252       || code == '^')
5253     return true;
5254 
5255   return false;
5256 }
5257 
5258 /* Print operand X (an rtx) in assembler syntax to file FILE.
5259    CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
5260    For `%' followed by punctuation, CODE is the punctuation and X is null.  */
5261 
5262 void
pa_print_operand(FILE * file,rtx x,int code)5263 pa_print_operand (FILE *file, rtx x, int code)
5264 {
5265   switch (code)
5266     {
5267     case '#':
5268       /* Output a 'nop' if there's nothing for the delay slot.  */
5269       if (dbr_sequence_length () == 0)
5270 	fputs ("\n\tnop", file);
5271       return;
5272     case '*':
5273       /* Output a nullification completer if there's nothing for the */
5274       /* delay slot or nullification is requested.  */
5275       if (dbr_sequence_length () == 0 ||
5276 	  (final_sequence &&
5277 	   INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))))
5278         fputs (",n", file);
5279       return;
5280     case 'R':
5281       /* Print out the second register name of a register pair.
5282 	 I.e., R (6) => 7.  */
5283       fputs (reg_names[REGNO (x) + 1], file);
5284       return;
5285     case 'r':
5286       /* A register or zero.  */
5287       if (x == const0_rtx
5288 	  || (x == CONST0_RTX (DFmode))
5289 	  || (x == CONST0_RTX (SFmode)))
5290 	{
5291 	  fputs ("%r0", file);
5292 	  return;
5293 	}
5294       else
5295 	break;
5296     case 'f':
5297       /* A register or zero (floating point).  */
5298       if (x == const0_rtx
5299 	  || (x == CONST0_RTX (DFmode))
5300 	  || (x == CONST0_RTX (SFmode)))
5301 	{
5302 	  fputs ("%fr0", file);
5303 	  return;
5304 	}
5305       else
5306 	break;
5307     case 'A':
5308       {
5309 	rtx xoperands[2];
5310 
5311 	xoperands[0] = XEXP (XEXP (x, 0), 0);
5312 	xoperands[1] = XVECEXP (XEXP (XEXP (x, 0), 1), 0, 0);
5313 	pa_output_global_address (file, xoperands[1], 0);
5314         fprintf (file, "(%s)", reg_names [REGNO (xoperands[0])]);
5315 	return;
5316       }
5317 
5318     case 'C':			/* Plain (C)ondition */
5319     case 'X':
5320       switch (GET_CODE (x))
5321 	{
5322 	case EQ:
5323 	  fputs ("=", file);  break;
5324 	case NE:
5325 	  fputs ("<>", file);  break;
5326 	case GT:
5327 	  fputs (">", file);  break;
5328 	case GE:
5329 	  fputs (">=", file);  break;
5330 	case GEU:
5331 	  fputs (">>=", file);  break;
5332 	case GTU:
5333 	  fputs (">>", file);  break;
5334 	case LT:
5335 	  fputs ("<", file);  break;
5336 	case LE:
5337 	  fputs ("<=", file);  break;
5338 	case LEU:
5339 	  fputs ("<<=", file);  break;
5340 	case LTU:
5341 	  fputs ("<<", file);  break;
5342 	default:
5343 	  gcc_unreachable ();
5344 	}
5345       return;
5346     case 'N':			/* Condition, (N)egated */
5347       switch (GET_CODE (x))
5348 	{
5349 	case EQ:
5350 	  fputs ("<>", file);  break;
5351 	case NE:
5352 	  fputs ("=", file);  break;
5353 	case GT:
5354 	  fputs ("<=", file);  break;
5355 	case GE:
5356 	  fputs ("<", file);  break;
5357 	case GEU:
5358 	  fputs ("<<", file);  break;
5359 	case GTU:
5360 	  fputs ("<<=", file);  break;
5361 	case LT:
5362 	  fputs (">=", file);  break;
5363 	case LE:
5364 	  fputs (">", file);  break;
5365 	case LEU:
5366 	  fputs (">>", file);  break;
5367 	case LTU:
5368 	  fputs (">>=", file);  break;
5369 	default:
5370 	  gcc_unreachable ();
5371 	}
5372       return;
5373     /* For floating point comparisons.  Note that the output
5374        predicates are the complement of the desired mode.  The
5375        conditions for GT, GE, LT, LE and LTGT cause an invalid
5376        operation exception if the result is unordered and this
5377        exception is enabled in the floating-point status register.  */
5378     case 'Y':
5379       switch (GET_CODE (x))
5380 	{
5381 	case EQ:
5382 	  fputs ("!=", file);  break;
5383 	case NE:
5384 	  fputs ("=", file);  break;
5385 	case GT:
5386 	  fputs ("!>", file);  break;
5387 	case GE:
5388 	  fputs ("!>=", file);  break;
5389 	case LT:
5390 	  fputs ("!<", file);  break;
5391 	case LE:
5392 	  fputs ("!<=", file);  break;
5393 	case LTGT:
5394 	  fputs ("!<>", file);  break;
5395 	case UNLE:
5396 	  fputs ("!?<=", file);  break;
5397 	case UNLT:
5398 	  fputs ("!?<", file);  break;
5399 	case UNGE:
5400 	  fputs ("!?>=", file);  break;
5401 	case UNGT:
5402 	  fputs ("!?>", file);  break;
5403 	case UNEQ:
5404 	  fputs ("!?=", file);  break;
5405 	case UNORDERED:
5406 	  fputs ("!?", file);  break;
5407 	case ORDERED:
5408 	  fputs ("?", file);  break;
5409 	default:
5410 	  gcc_unreachable ();
5411 	}
5412       return;
5413     case 'S':			/* Condition, operands are (S)wapped.  */
5414       switch (GET_CODE (x))
5415 	{
5416 	case EQ:
5417 	  fputs ("=", file);  break;
5418 	case NE:
5419 	  fputs ("<>", file);  break;
5420 	case GT:
5421 	  fputs ("<", file);  break;
5422 	case GE:
5423 	  fputs ("<=", file);  break;
5424 	case GEU:
5425 	  fputs ("<<=", file);  break;
5426 	case GTU:
5427 	  fputs ("<<", file);  break;
5428 	case LT:
5429 	  fputs (">", file);  break;
5430 	case LE:
5431 	  fputs (">=", file);  break;
5432 	case LEU:
5433 	  fputs (">>=", file);  break;
5434 	case LTU:
5435 	  fputs (">>", file);  break;
5436 	default:
5437 	  gcc_unreachable ();
5438 	}
5439       return;
5440     case 'B':			/* Condition, (B)oth swapped and negate.  */
5441       switch (GET_CODE (x))
5442 	{
5443 	case EQ:
5444 	  fputs ("<>", file);  break;
5445 	case NE:
5446 	  fputs ("=", file);  break;
5447 	case GT:
5448 	  fputs (">=", file);  break;
5449 	case GE:
5450 	  fputs (">", file);  break;
5451 	case GEU:
5452 	  fputs (">>", file);  break;
5453 	case GTU:
5454 	  fputs (">>=", file);  break;
5455 	case LT:
5456 	  fputs ("<=", file);  break;
5457 	case LE:
5458 	  fputs ("<", file);  break;
5459 	case LEU:
5460 	  fputs ("<<", file);  break;
5461 	case LTU:
5462 	  fputs ("<<=", file);  break;
5463 	default:
5464 	  gcc_unreachable ();
5465 	}
5466       return;
5467     case 'k':
5468       gcc_assert (GET_CODE (x) == CONST_INT);
5469       fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~INTVAL (x));
5470       return;
5471     case 'Q':
5472       gcc_assert (GET_CODE (x) == CONST_INT);
5473       fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - (INTVAL (x) & 63));
5474       return;
5475     case 'L':
5476       gcc_assert (GET_CODE (x) == CONST_INT);
5477       fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - (INTVAL (x) & 31));
5478       return;
5479     case 'o':
5480       gcc_assert (GET_CODE (x) == CONST_INT
5481 		  && (INTVAL (x) == 1 || INTVAL (x) == 2 || INTVAL (x) == 3));
5482       fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
5483       return;
5484     case 'O':
5485       gcc_assert (GET_CODE (x) == CONST_INT && exact_log2 (INTVAL (x)) >= 0);
5486       fprintf (file, "%d", exact_log2 (INTVAL (x)));
5487       return;
5488     case 'p':
5489       gcc_assert (GET_CODE (x) == CONST_INT);
5490       fprintf (file, HOST_WIDE_INT_PRINT_DEC, 63 - (INTVAL (x) & 63));
5491       return;
5492     case 'P':
5493       gcc_assert (GET_CODE (x) == CONST_INT);
5494       fprintf (file, HOST_WIDE_INT_PRINT_DEC, 31 - (INTVAL (x) & 31));
5495       return;
5496     case 'I':
5497       if (GET_CODE (x) == CONST_INT)
5498 	fputs ("i", file);
5499       return;
5500     case 'M':
5501     case 'F':
5502       switch (GET_CODE (XEXP (x, 0)))
5503 	{
5504 	case PRE_DEC:
5505 	case PRE_INC:
5506 	  if (ASSEMBLER_DIALECT == 0)
5507 	    fputs ("s,mb", file);
5508 	  else
5509 	    fputs (",mb", file);
5510 	  break;
5511 	case POST_DEC:
5512 	case POST_INC:
5513 	  if (ASSEMBLER_DIALECT == 0)
5514 	    fputs ("s,ma", file);
5515 	  else
5516 	    fputs (",ma", file);
5517 	  break;
5518 	case PLUS:
5519 	  if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
5520 	      && GET_CODE (XEXP (XEXP (x, 0), 1)) == REG)
5521 	    {
5522 	      if (ASSEMBLER_DIALECT == 0)
5523 		fputs ("x", file);
5524 	    }
5525 	  else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
5526 		   || GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
5527 	    {
5528 	      if (ASSEMBLER_DIALECT == 0)
5529 		fputs ("x,s", file);
5530 	      else
5531 		fputs (",s", file);
5532 	    }
5533 	  else if (code == 'F' && ASSEMBLER_DIALECT == 0)
5534 	    fputs ("s", file);
5535 	  break;
5536 	default:
5537 	  if (code == 'F' && ASSEMBLER_DIALECT == 0)
5538 	    fputs ("s", file);
5539 	  break;
5540 	}
5541       return;
5542     case 'G':
5543       pa_output_global_address (file, x, 0);
5544       return;
5545     case 'H':
5546       pa_output_global_address (file, x, 1);
5547       return;
5548     case 0:			/* Don't do anything special */
5549       break;
5550     case 'Z':
5551       {
5552 	unsigned op[3];
5553 	compute_zdepwi_operands (INTVAL (x), op);
5554 	fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
5555 	return;
5556       }
5557     case 'z':
5558       {
5559 	unsigned op[3];
5560 	compute_zdepdi_operands (INTVAL (x), op);
5561 	fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
5562 	return;
5563       }
5564     case 'c':
5565       /* We can get here from a .vtable_inherit due to our
5566 	 CONSTANT_ADDRESS_P rejecting perfectly good constant
5567 	 addresses.  */
5568       break;
5569     default:
5570       gcc_unreachable ();
5571     }
5572   if (GET_CODE (x) == REG)
5573     {
5574       fputs (reg_names [REGNO (x)], file);
5575       if (TARGET_64BIT && FP_REG_P (x) && GET_MODE_SIZE (GET_MODE (x)) <= 4)
5576 	{
5577 	  fputs ("R", file);
5578 	  return;
5579 	}
5580       if (FP_REG_P (x)
5581 	  && GET_MODE_SIZE (GET_MODE (x)) <= 4
5582 	  && (REGNO (x) & 1) == 0)
5583 	fputs ("L", file);
5584     }
5585   else if (GET_CODE (x) == MEM)
5586     {
5587       int size = GET_MODE_SIZE (GET_MODE (x));
5588       rtx base = NULL_RTX;
5589       switch (GET_CODE (XEXP (x, 0)))
5590 	{
5591 	case PRE_DEC:
5592 	case POST_DEC:
5593           base = XEXP (XEXP (x, 0), 0);
5594 	  fprintf (file, "-%d(%s)", size, reg_names [REGNO (base)]);
5595 	  break;
5596 	case PRE_INC:
5597 	case POST_INC:
5598           base = XEXP (XEXP (x, 0), 0);
5599 	  fprintf (file, "%d(%s)", size, reg_names [REGNO (base)]);
5600 	  break;
5601 	case PLUS:
5602 	  if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT)
5603 	    fprintf (file, "%s(%s)",
5604 		     reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 0), 0))],
5605 		     reg_names [REGNO (XEXP (XEXP (x, 0), 1))]);
5606 	  else if (GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
5607 	    fprintf (file, "%s(%s)",
5608 		     reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 1), 0))],
5609 		     reg_names [REGNO (XEXP (XEXP (x, 0), 0))]);
5610 	  else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
5611 		   && GET_CODE (XEXP (XEXP (x, 0), 1)) == REG)
5612 	    {
5613 	      /* Because the REG_POINTER flag can get lost during reload,
5614 		 pa_legitimate_address_p canonicalizes the order of the
5615 		 index and base registers in the combined move patterns.  */
5616 	      rtx base = XEXP (XEXP (x, 0), 1);
5617 	      rtx index = XEXP (XEXP (x, 0), 0);
5618 
5619 	      fprintf (file, "%s(%s)",
5620 		       reg_names [REGNO (index)], reg_names [REGNO (base)]);
5621 	    }
5622 	  else
5623 	    output_address (GET_MODE (x), XEXP (x, 0));
5624 	  break;
5625 	default:
5626 	  output_address (GET_MODE (x), XEXP (x, 0));
5627 	  break;
5628 	}
5629     }
5630   else
5631     output_addr_const (file, x);
5632 }
5633 
5634 /* output a SYMBOL_REF or a CONST expression involving a SYMBOL_REF.  */
5635 
5636 void
pa_output_global_address(FILE * file,rtx x,int round_constant)5637 pa_output_global_address (FILE *file, rtx x, int round_constant)
5638 {
5639 
5640   /* Imagine  (high (const (plus ...))).  */
5641   if (GET_CODE (x) == HIGH)
5642     x = XEXP (x, 0);
5643 
5644   if (GET_CODE (x) == SYMBOL_REF && read_only_operand (x, VOIDmode))
5645     output_addr_const (file, x);
5646   else if (GET_CODE (x) == SYMBOL_REF && !flag_pic)
5647     {
5648       output_addr_const (file, x);
5649       fputs ("-$global$", file);
5650     }
5651   else if (GET_CODE (x) == CONST)
5652     {
5653       const char *sep = "";
5654       int offset = 0;		/* assembler wants -$global$ at end */
5655       rtx base = NULL_RTX;
5656 
5657       switch (GET_CODE (XEXP (XEXP (x, 0), 0)))
5658 	{
5659 	case LABEL_REF:
5660 	case SYMBOL_REF:
5661 	  base = XEXP (XEXP (x, 0), 0);
5662 	  output_addr_const (file, base);
5663 	  break;
5664 	case CONST_INT:
5665 	  offset = INTVAL (XEXP (XEXP (x, 0), 0));
5666 	  break;
5667 	default:
5668 	  gcc_unreachable ();
5669 	}
5670 
5671       switch (GET_CODE (XEXP (XEXP (x, 0), 1)))
5672 	{
5673 	case LABEL_REF:
5674 	case SYMBOL_REF:
5675 	  base = XEXP (XEXP (x, 0), 1);
5676 	  output_addr_const (file, base);
5677 	  break;
5678 	case CONST_INT:
5679 	  offset = INTVAL (XEXP (XEXP (x, 0), 1));
5680 	  break;
5681 	default:
5682 	  gcc_unreachable ();
5683 	}
5684 
5685       /* How bogus.  The compiler is apparently responsible for
5686 	 rounding the constant if it uses an LR field selector.
5687 
5688 	 The linker and/or assembler seem a better place since
5689 	 they have to do this kind of thing already.
5690 
5691 	 If we fail to do this, HP's optimizing linker may eliminate
5692 	 an addil, but not update the ldw/stw/ldo instruction that
5693 	 uses the result of the addil.  */
5694       if (round_constant)
5695 	offset = ((offset + 0x1000) & ~0x1fff);
5696 
5697       switch (GET_CODE (XEXP (x, 0)))
5698 	{
5699 	case PLUS:
5700 	  if (offset < 0)
5701 	    {
5702 	      offset = -offset;
5703 	      sep = "-";
5704 	    }
5705 	  else
5706 	    sep = "+";
5707 	  break;
5708 
5709 	case MINUS:
5710 	  gcc_assert (GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF);
5711 	  sep = "-";
5712 	  break;
5713 
5714 	default:
5715 	  gcc_unreachable ();
5716 	}
5717 
5718       if (!read_only_operand (base, VOIDmode) && !flag_pic)
5719 	fputs ("-$global$", file);
5720       if (offset)
5721 	fprintf (file, "%s%d", sep, offset);
5722     }
5723   else
5724     output_addr_const (file, x);
5725 }
5726 
5727 /* Output boilerplate text to appear at the beginning of the file.
5728    There are several possible versions.  */
5729 #define aputs(x) fputs(x, asm_out_file)
5730 static inline void
pa_file_start_level(void)5731 pa_file_start_level (void)
5732 {
5733   if (TARGET_64BIT)
5734     aputs ("\t.LEVEL 2.0w\n");
5735   else if (TARGET_PA_20)
5736     aputs ("\t.LEVEL 2.0\n");
5737   else if (TARGET_PA_11)
5738     aputs ("\t.LEVEL 1.1\n");
5739   else
5740     aputs ("\t.LEVEL 1.0\n");
5741 }
5742 
5743 static inline void
pa_file_start_space(int sortspace)5744 pa_file_start_space (int sortspace)
5745 {
5746   aputs ("\t.SPACE $PRIVATE$");
5747   if (sortspace)
5748     aputs (",SORT=16");
5749   aputs ("\n\t.SUBSPA $DATA$,QUAD=1,ALIGN=8,ACCESS=31");
5750   if (flag_tm)
5751     aputs ("\n\t.SUBSPA $TM_CLONE_TABLE$,QUAD=1,ALIGN=8,ACCESS=31");
5752   aputs ("\n\t.SUBSPA $BSS$,QUAD=1,ALIGN=8,ACCESS=31,ZERO,SORT=82"
5753 	 "\n\t.SPACE $TEXT$");
5754   if (sortspace)
5755     aputs (",SORT=8");
5756   aputs ("\n\t.SUBSPA $LIT$,QUAD=0,ALIGN=8,ACCESS=44"
5757 	 "\n\t.SUBSPA $CODE$,QUAD=0,ALIGN=8,ACCESS=44,CODE_ONLY\n");
5758 }
5759 
5760 static inline void
pa_file_start_file(int want_version)5761 pa_file_start_file (int want_version)
5762 {
5763   if (write_symbols != NO_DEBUG)
5764     {
5765       output_file_directive (asm_out_file, main_input_filename);
5766       if (want_version)
5767 	aputs ("\t.version\t\"01.01\"\n");
5768     }
5769 }
5770 
5771 static inline void
pa_file_start_mcount(const char * aswhat)5772 pa_file_start_mcount (const char *aswhat)
5773 {
5774   if (profile_flag)
5775     fprintf (asm_out_file, "\t.IMPORT _mcount,%s\n", aswhat);
5776 }
5777 
5778 static void
pa_elf_file_start(void)5779 pa_elf_file_start (void)
5780 {
5781   pa_file_start_level ();
5782   pa_file_start_mcount ("ENTRY");
5783   pa_file_start_file (0);
5784 }
5785 
5786 static void
pa_som_file_start(void)5787 pa_som_file_start (void)
5788 {
5789   pa_file_start_level ();
5790   pa_file_start_space (0);
5791   aputs ("\t.IMPORT $global$,DATA\n"
5792          "\t.IMPORT $$dyncall,MILLICODE\n");
5793   pa_file_start_mcount ("CODE");
5794   pa_file_start_file (0);
5795 }
5796 
5797 static void
pa_linux_file_start(void)5798 pa_linux_file_start (void)
5799 {
5800   pa_file_start_file (1);
5801   pa_file_start_level ();
5802   pa_file_start_mcount ("CODE");
5803 }
5804 
5805 static void
pa_hpux64_gas_file_start(void)5806 pa_hpux64_gas_file_start (void)
5807 {
5808   pa_file_start_level ();
5809 #ifdef ASM_OUTPUT_TYPE_DIRECTIVE
5810   if (profile_flag)
5811     ASM_OUTPUT_TYPE_DIRECTIVE (asm_out_file, "_mcount", "function");
5812 #endif
5813   pa_file_start_file (1);
5814 }
5815 
5816 static void
pa_hpux64_hpas_file_start(void)5817 pa_hpux64_hpas_file_start (void)
5818 {
5819   pa_file_start_level ();
5820   pa_file_start_space (1);
5821   pa_file_start_mcount ("CODE");
5822   pa_file_start_file (0);
5823 }
5824 #undef aputs
5825 
5826 /* Search the deferred plabel list for SYMBOL and return its internal
5827    label.  If an entry for SYMBOL is not found, a new entry is created.  */
5828 
5829 rtx
pa_get_deferred_plabel(rtx symbol)5830 pa_get_deferred_plabel (rtx symbol)
5831 {
5832   const char *fname = XSTR (symbol, 0);
5833   size_t i;
5834 
5835   /* See if we have already put this function on the list of deferred
5836      plabels.  This list is generally small, so a liner search is not
5837      too ugly.  If it proves too slow replace it with something faster.  */
5838   for (i = 0; i < n_deferred_plabels; i++)
5839     if (strcmp (fname, XSTR (deferred_plabels[i].symbol, 0)) == 0)
5840       break;
5841 
5842   /* If the deferred plabel list is empty, or this entry was not found
5843      on the list, create a new entry on the list.  */
5844   if (deferred_plabels == NULL || i == n_deferred_plabels)
5845     {
5846       tree id;
5847 
5848       if (deferred_plabels == 0)
5849 	deferred_plabels =  ggc_alloc<deferred_plabel> ();
5850       else
5851         deferred_plabels = GGC_RESIZEVEC (struct deferred_plabel,
5852                                           deferred_plabels,
5853                                           n_deferred_plabels + 1);
5854 
5855       i = n_deferred_plabels++;
5856       deferred_plabels[i].internal_label = gen_label_rtx ();
5857       deferred_plabels[i].symbol = symbol;
5858 
5859       /* Gross.  We have just implicitly taken the address of this
5860 	 function.  Mark it in the same manner as assemble_name.  */
5861       id = maybe_get_identifier (targetm.strip_name_encoding (fname));
5862       if (id)
5863 	mark_referenced (id);
5864     }
5865 
5866   return deferred_plabels[i].internal_label;
5867 }
5868 
5869 static void
output_deferred_plabels(void)5870 output_deferred_plabels (void)
5871 {
5872   size_t i;
5873 
5874   /* If we have some deferred plabels, then we need to switch into the
5875      data or readonly data section, and align it to a 4 byte boundary
5876      before outputting the deferred plabels.  */
5877   if (n_deferred_plabels)
5878     {
5879       switch_to_section (flag_pic ? data_section : readonly_data_section);
5880       ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
5881     }
5882 
5883   /* Now output the deferred plabels.  */
5884   for (i = 0; i < n_deferred_plabels; i++)
5885     {
5886       targetm.asm_out.internal_label (asm_out_file, "L",
5887 		 CODE_LABEL_NUMBER (deferred_plabels[i].internal_label));
5888       assemble_integer (deferred_plabels[i].symbol,
5889 			TARGET_64BIT ? 8 : 4, TARGET_64BIT ? 64 : 32, 1);
5890     }
5891 }
5892 
5893 /* Initialize optabs to point to emulation routines.  */
5894 
5895 static void
pa_init_libfuncs(void)5896 pa_init_libfuncs (void)
5897 {
5898   if (HPUX_LONG_DOUBLE_LIBRARY)
5899     {
5900       set_optab_libfunc (add_optab, TFmode, "_U_Qfadd");
5901       set_optab_libfunc (sub_optab, TFmode, "_U_Qfsub");
5902       set_optab_libfunc (smul_optab, TFmode, "_U_Qfmpy");
5903       set_optab_libfunc (sdiv_optab, TFmode, "_U_Qfdiv");
5904       set_optab_libfunc (smin_optab, TFmode, "_U_Qmin");
5905       set_optab_libfunc (smax_optab, TFmode, "_U_Qfmax");
5906       set_optab_libfunc (sqrt_optab, TFmode, "_U_Qfsqrt");
5907       set_optab_libfunc (abs_optab, TFmode, "_U_Qfabs");
5908       set_optab_libfunc (neg_optab, TFmode, "_U_Qfneg");
5909 
5910       set_optab_libfunc (eq_optab, TFmode, "_U_Qfeq");
5911       set_optab_libfunc (ne_optab, TFmode, "_U_Qfne");
5912       set_optab_libfunc (gt_optab, TFmode, "_U_Qfgt");
5913       set_optab_libfunc (ge_optab, TFmode, "_U_Qfge");
5914       set_optab_libfunc (lt_optab, TFmode, "_U_Qflt");
5915       set_optab_libfunc (le_optab, TFmode, "_U_Qfle");
5916       set_optab_libfunc (unord_optab, TFmode, "_U_Qfunord");
5917 
5918       set_conv_libfunc (sext_optab, TFmode, SFmode, "_U_Qfcnvff_sgl_to_quad");
5919       set_conv_libfunc (sext_optab, TFmode, DFmode, "_U_Qfcnvff_dbl_to_quad");
5920       set_conv_libfunc (trunc_optab, SFmode, TFmode, "_U_Qfcnvff_quad_to_sgl");
5921       set_conv_libfunc (trunc_optab, DFmode, TFmode, "_U_Qfcnvff_quad_to_dbl");
5922 
5923       set_conv_libfunc (sfix_optab, SImode, TFmode,
5924 			TARGET_64BIT ? "__U_Qfcnvfxt_quad_to_sgl"
5925 				     : "_U_Qfcnvfxt_quad_to_sgl");
5926       set_conv_libfunc (sfix_optab, DImode, TFmode,
5927 			"_U_Qfcnvfxt_quad_to_dbl");
5928       set_conv_libfunc (ufix_optab, SImode, TFmode,
5929 			"_U_Qfcnvfxt_quad_to_usgl");
5930       set_conv_libfunc (ufix_optab, DImode, TFmode,
5931 			"_U_Qfcnvfxt_quad_to_udbl");
5932 
5933       set_conv_libfunc (sfloat_optab, TFmode, SImode,
5934 			"_U_Qfcnvxf_sgl_to_quad");
5935       set_conv_libfunc (sfloat_optab, TFmode, DImode,
5936 			"_U_Qfcnvxf_dbl_to_quad");
5937       set_conv_libfunc (ufloat_optab, TFmode, SImode,
5938 			"_U_Qfcnvxf_usgl_to_quad");
5939       set_conv_libfunc (ufloat_optab, TFmode, DImode,
5940 			"_U_Qfcnvxf_udbl_to_quad");
5941     }
5942 
5943   if (TARGET_SYNC_LIBCALL)
5944     init_sync_libfuncs (8);
5945 }
5946 
5947 /* HP's millicode routines mean something special to the assembler.
5948    Keep track of which ones we have used.  */
5949 
5950 enum millicodes { remI, remU, divI, divU, mulI, end1000 };
5951 static void import_milli (enum millicodes);
5952 static char imported[(int) end1000];
5953 static const char * const milli_names[] = {"remI", "remU", "divI", "divU", "mulI"};
5954 static const char import_string[] = ".IMPORT $$....,MILLICODE";
5955 #define MILLI_START 10
5956 
5957 static void
import_milli(enum millicodes code)5958 import_milli (enum millicodes code)
5959 {
5960   char str[sizeof (import_string)];
5961 
5962   if (!imported[(int) code])
5963     {
5964       imported[(int) code] = 1;
5965       strcpy (str, import_string);
5966       memcpy (str + MILLI_START, milli_names[(int) code], 4);
5967       output_asm_insn (str, 0);
5968     }
5969 }
5970 
5971 /* The register constraints have put the operands and return value in
5972    the proper registers.  */
5973 
5974 const char *
pa_output_mul_insn(int unsignedp ATTRIBUTE_UNUSED,rtx_insn * insn)5975 pa_output_mul_insn (int unsignedp ATTRIBUTE_UNUSED, rtx_insn *insn)
5976 {
5977   import_milli (mulI);
5978   return pa_output_millicode_call (insn, gen_rtx_SYMBOL_REF (Pmode, "$$mulI"));
5979 }
5980 
5981 /* Emit the rtl for doing a division by a constant.  */
5982 
5983 /* Do magic division millicodes exist for this value? */
5984 const int pa_magic_milli[]= {0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1};
5985 
5986 /* We'll use an array to keep track of the magic millicodes and
5987    whether or not we've used them already. [n][0] is signed, [n][1] is
5988    unsigned.  */
5989 
5990 static int div_milli[16][2];
5991 
5992 int
pa_emit_hpdiv_const(rtx * operands,int unsignedp)5993 pa_emit_hpdiv_const (rtx *operands, int unsignedp)
5994 {
5995   if (GET_CODE (operands[2]) == CONST_INT
5996       && INTVAL (operands[2]) > 0
5997       && INTVAL (operands[2]) < 16
5998       && pa_magic_milli[INTVAL (operands[2])])
5999     {
6000       rtx ret = gen_rtx_REG (SImode, TARGET_64BIT ? 2 : 31);
6001 
6002       emit_move_insn (gen_rtx_REG (SImode, 26), operands[1]);
6003       emit
6004 	(gen_rtx_PARALLEL
6005 	 (VOIDmode,
6006 	  gen_rtvec (6, gen_rtx_SET (gen_rtx_REG (SImode, 29),
6007 				     gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
6008 						     SImode,
6009 						     gen_rtx_REG (SImode, 26),
6010 						     operands[2])),
6011 		     gen_rtx_CLOBBER (VOIDmode, operands[4]),
6012 		     gen_rtx_CLOBBER (VOIDmode, operands[3]),
6013 		     gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 26)),
6014 		     gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 25)),
6015 		     gen_rtx_CLOBBER (VOIDmode, ret))));
6016       emit_move_insn (operands[0], gen_rtx_REG (SImode, 29));
6017       return 1;
6018     }
6019   return 0;
6020 }
6021 
6022 const char *
pa_output_div_insn(rtx * operands,int unsignedp,rtx_insn * insn)6023 pa_output_div_insn (rtx *operands, int unsignedp, rtx_insn *insn)
6024 {
6025   int divisor;
6026 
6027   /* If the divisor is a constant, try to use one of the special
6028      opcodes .*/
6029   if (GET_CODE (operands[0]) == CONST_INT)
6030     {
6031       static char buf[100];
6032       divisor = INTVAL (operands[0]);
6033       if (!div_milli[divisor][unsignedp])
6034 	{
6035 	  div_milli[divisor][unsignedp] = 1;
6036 	  if (unsignedp)
6037 	    output_asm_insn (".IMPORT $$divU_%0,MILLICODE", operands);
6038 	  else
6039 	    output_asm_insn (".IMPORT $$divI_%0,MILLICODE", operands);
6040 	}
6041       if (unsignedp)
6042 	{
6043 	  sprintf (buf, "$$divU_" HOST_WIDE_INT_PRINT_DEC,
6044 		   INTVAL (operands[0]));
6045 	  return pa_output_millicode_call (insn,
6046 					   gen_rtx_SYMBOL_REF (SImode, buf));
6047 	}
6048       else
6049 	{
6050 	  sprintf (buf, "$$divI_" HOST_WIDE_INT_PRINT_DEC,
6051 		   INTVAL (operands[0]));
6052 	  return pa_output_millicode_call (insn,
6053 					   gen_rtx_SYMBOL_REF (SImode, buf));
6054 	}
6055     }
6056   /* Divisor isn't a special constant.  */
6057   else
6058     {
6059       if (unsignedp)
6060 	{
6061 	  import_milli (divU);
6062 	  return pa_output_millicode_call (insn,
6063 					gen_rtx_SYMBOL_REF (SImode, "$$divU"));
6064 	}
6065       else
6066 	{
6067 	  import_milli (divI);
6068 	  return pa_output_millicode_call (insn,
6069 					gen_rtx_SYMBOL_REF (SImode, "$$divI"));
6070 	}
6071     }
6072 }
6073 
6074 /* Output a $$rem millicode to do mod.  */
6075 
6076 const char *
pa_output_mod_insn(int unsignedp,rtx_insn * insn)6077 pa_output_mod_insn (int unsignedp, rtx_insn *insn)
6078 {
6079   if (unsignedp)
6080     {
6081       import_milli (remU);
6082       return pa_output_millicode_call (insn,
6083 				       gen_rtx_SYMBOL_REF (SImode, "$$remU"));
6084     }
6085   else
6086     {
6087       import_milli (remI);
6088       return pa_output_millicode_call (insn,
6089 				       gen_rtx_SYMBOL_REF (SImode, "$$remI"));
6090     }
6091 }
6092 
6093 void
pa_output_arg_descriptor(rtx_insn * call_insn)6094 pa_output_arg_descriptor (rtx_insn *call_insn)
6095 {
6096   const char *arg_regs[4];
6097   machine_mode arg_mode;
6098   rtx link;
6099   int i, output_flag = 0;
6100   int regno;
6101 
6102   /* We neither need nor want argument location descriptors for the
6103      64bit runtime environment or the ELF32 environment.  */
6104   if (TARGET_64BIT || TARGET_ELF32)
6105     return;
6106 
6107   for (i = 0; i < 4; i++)
6108     arg_regs[i] = 0;
6109 
6110   /* Specify explicitly that no argument relocations should take place
6111      if using the portable runtime calling conventions.  */
6112   if (TARGET_PORTABLE_RUNTIME)
6113     {
6114       fputs ("\t.CALL ARGW0=NO,ARGW1=NO,ARGW2=NO,ARGW3=NO,RETVAL=NO\n",
6115 	     asm_out_file);
6116       return;
6117     }
6118 
6119   gcc_assert (CALL_P (call_insn));
6120   for (link = CALL_INSN_FUNCTION_USAGE (call_insn);
6121        link; link = XEXP (link, 1))
6122     {
6123       rtx use = XEXP (link, 0);
6124 
6125       if (! (GET_CODE (use) == USE
6126 	     && GET_CODE (XEXP (use, 0)) == REG
6127 	     && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
6128 	continue;
6129 
6130       arg_mode = GET_MODE (XEXP (use, 0));
6131       regno = REGNO (XEXP (use, 0));
6132       if (regno >= 23 && regno <= 26)
6133 	{
6134 	  arg_regs[26 - regno] = "GR";
6135 	  if (arg_mode == DImode)
6136 	    arg_regs[25 - regno] = "GR";
6137 	}
6138       else if (regno >= 32 && regno <= 39)
6139 	{
6140 	  if (arg_mode == SFmode)
6141 	    arg_regs[(regno - 32) / 2] = "FR";
6142 	  else
6143 	    {
6144 #ifndef HP_FP_ARG_DESCRIPTOR_REVERSED
6145 	      arg_regs[(regno - 34) / 2] = "FR";
6146 	      arg_regs[(regno - 34) / 2 + 1] = "FU";
6147 #else
6148 	      arg_regs[(regno - 34) / 2] = "FU";
6149 	      arg_regs[(regno - 34) / 2 + 1] = "FR";
6150 #endif
6151 	    }
6152 	}
6153     }
6154   fputs ("\t.CALL ", asm_out_file);
6155   for (i = 0; i < 4; i++)
6156     {
6157       if (arg_regs[i])
6158 	{
6159 	  if (output_flag++)
6160 	    fputc (',', asm_out_file);
6161 	  fprintf (asm_out_file, "ARGW%d=%s", i, arg_regs[i]);
6162 	}
6163     }
6164   fputc ('\n', asm_out_file);
6165 }
6166 
6167 /* Inform reload about cases where moving X with a mode MODE to or from
6168    a register in RCLASS requires an extra scratch or immediate register.
6169    Return the class needed for the immediate register.  */
6170 
6171 static reg_class_t
pa_secondary_reload(bool in_p,rtx x,reg_class_t rclass_i,machine_mode mode,secondary_reload_info * sri)6172 pa_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
6173 		     machine_mode mode, secondary_reload_info *sri)
6174 {
6175   int regno;
6176   enum reg_class rclass = (enum reg_class) rclass_i;
6177 
6178   /* Handle the easy stuff first.  */
6179   if (rclass == R1_REGS)
6180     return NO_REGS;
6181 
6182   if (REG_P (x))
6183     {
6184       regno = REGNO (x);
6185       if (rclass == BASE_REG_CLASS && regno < FIRST_PSEUDO_REGISTER)
6186 	return NO_REGS;
6187     }
6188   else
6189     regno = -1;
6190 
6191   /* If we have something like (mem (mem (...)), we can safely assume the
6192      inner MEM will end up in a general register after reloading, so there's
6193      no need for a secondary reload.  */
6194   if (GET_CODE (x) == MEM && GET_CODE (XEXP (x, 0)) == MEM)
6195     return NO_REGS;
6196 
6197   /* Trying to load a constant into a FP register during PIC code
6198      generation requires %r1 as a scratch register.  For float modes,
6199      the only legitimate constant is CONST0_RTX.  However, there are
6200      a few patterns that accept constant double operands.  */
6201   if (flag_pic
6202       && FP_REG_CLASS_P (rclass)
6203       && (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE))
6204     {
6205       switch (mode)
6206 	{
6207 	case E_SImode:
6208 	  sri->icode = CODE_FOR_reload_insi_r1;
6209 	  break;
6210 
6211 	case E_DImode:
6212 	  sri->icode = CODE_FOR_reload_indi_r1;
6213 	  break;
6214 
6215 	case E_SFmode:
6216 	  sri->icode = CODE_FOR_reload_insf_r1;
6217 	  break;
6218 
6219 	case E_DFmode:
6220 	  sri->icode = CODE_FOR_reload_indf_r1;
6221 	  break;
6222 
6223 	default:
6224 	  gcc_unreachable ();
6225 	}
6226       return NO_REGS;
6227     }
6228 
6229   /* Secondary reloads of symbolic expressions require %r1 as a scratch
6230      register when we're generating PIC code or when the operand isn't
6231      readonly.  */
6232   if (pa_symbolic_expression_p (x))
6233     {
6234       if (GET_CODE (x) == HIGH)
6235 	x = XEXP (x, 0);
6236 
6237       if (flag_pic || !read_only_operand (x, VOIDmode))
6238 	{
6239 	  switch (mode)
6240 	    {
6241 	    case E_SImode:
6242 	      sri->icode = CODE_FOR_reload_insi_r1;
6243 	      break;
6244 
6245 	    case E_DImode:
6246 	      sri->icode = CODE_FOR_reload_indi_r1;
6247 	      break;
6248 
6249 	    default:
6250 	      gcc_unreachable ();
6251 	    }
6252 	  return NO_REGS;
6253 	}
6254     }
6255 
6256   /* Profiling showed the PA port spends about 1.3% of its compilation
6257      time in true_regnum from calls inside pa_secondary_reload_class.  */
6258   if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
6259     regno = true_regnum (x);
6260 
6261   /* Handle reloads for floating point loads and stores.  */
6262   if ((regno >= FIRST_PSEUDO_REGISTER || regno == -1)
6263       && FP_REG_CLASS_P (rclass))
6264     {
6265       if (MEM_P (x))
6266 	{
6267 	  x = XEXP (x, 0);
6268 
6269 	  /* We don't need a secondary reload for indexed memory addresses.
6270 
6271 	     When INT14_OK_STRICT is true, it might appear that we could
6272 	     directly allow register indirect memory addresses.  However,
6273 	     this doesn't work because we don't support SUBREGs in
6274 	     floating-point register copies and reload doesn't tell us
6275 	     when it's going to use a SUBREG.  */
6276 	  if (IS_INDEX_ADDR_P (x))
6277 	    return NO_REGS;
6278 	}
6279 
6280       /* Request a secondary reload with a general scratch register
6281 	 for everything else.  ??? Could symbolic operands be handled
6282 	 directly when generating non-pic PA 2.0 code?  */
6283       sri->icode = (in_p
6284 		    ? direct_optab_handler (reload_in_optab, mode)
6285 		    : direct_optab_handler (reload_out_optab, mode));
6286       return NO_REGS;
6287     }
6288 
6289   /* A SAR<->FP register copy requires an intermediate general register
6290      and secondary memory.  We need a secondary reload with a general
6291      scratch register for spills.  */
6292   if (rclass == SHIFT_REGS)
6293     {
6294       /* Handle spill.  */
6295       if (regno >= FIRST_PSEUDO_REGISTER || regno < 0)
6296 	{
6297 	  sri->icode = (in_p
6298 			? direct_optab_handler (reload_in_optab, mode)
6299 			: direct_optab_handler (reload_out_optab, mode));
6300 	  return NO_REGS;
6301 	}
6302 
6303       /* Handle FP copy.  */
6304       if (FP_REG_CLASS_P (REGNO_REG_CLASS (regno)))
6305 	return GENERAL_REGS;
6306     }
6307 
6308   if (regno >= 0 && regno < FIRST_PSEUDO_REGISTER
6309       && REGNO_REG_CLASS (regno) == SHIFT_REGS
6310       && FP_REG_CLASS_P (rclass))
6311     return GENERAL_REGS;
6312 
6313   return NO_REGS;
6314 }
6315 
6316 /* Implement TARGET_SECONDARY_MEMORY_NEEDED.  */
6317 
6318 static bool
pa_secondary_memory_needed(machine_mode mode ATTRIBUTE_UNUSED,reg_class_t class1 ATTRIBUTE_UNUSED,reg_class_t class2 ATTRIBUTE_UNUSED)6319 pa_secondary_memory_needed (machine_mode mode ATTRIBUTE_UNUSED,
6320 			    reg_class_t class1 ATTRIBUTE_UNUSED,
6321 			    reg_class_t class2 ATTRIBUTE_UNUSED)
6322 {
6323 #ifdef PA_SECONDARY_MEMORY_NEEDED
6324   return PA_SECONDARY_MEMORY_NEEDED (mode, class1, class2);
6325 #else
6326   return false;
6327 #endif
6328 }
6329 
6330 /* Implement TARGET_EXTRA_LIVE_ON_ENTRY.  The argument pointer
6331    is only marked as live on entry by df-scan when it is a fixed
6332    register.  It isn't a fixed register in the 64-bit runtime,
6333    so we need to mark it here.  */
6334 
6335 static void
pa_extra_live_on_entry(bitmap regs)6336 pa_extra_live_on_entry (bitmap regs)
6337 {
6338   if (TARGET_64BIT)
6339     bitmap_set_bit (regs, ARG_POINTER_REGNUM);
6340 }
6341 
6342 /* Implement EH_RETURN_HANDLER_RTX.  The MEM needs to be volatile
6343    to prevent it from being deleted.  */
6344 
6345 rtx
pa_eh_return_handler_rtx(void)6346 pa_eh_return_handler_rtx (void)
6347 {
6348   rtx tmp;
6349 
6350   tmp = gen_rtx_PLUS (word_mode, hard_frame_pointer_rtx,
6351 		      TARGET_64BIT ? GEN_INT (-16) : GEN_INT (-20));
6352   tmp = gen_rtx_MEM (word_mode, tmp);
6353   tmp->volatil = 1;
6354   return tmp;
6355 }
6356 
6357 /* In the 32-bit runtime, arguments larger than eight bytes are passed
6358    by invisible reference.  As a GCC extension, we also pass anything
6359    with a zero or variable size by reference.
6360 
6361    The 64-bit runtime does not describe passing any types by invisible
6362    reference.  The internals of GCC can't currently handle passing
6363    empty structures, and zero or variable length arrays when they are
6364    not passed entirely on the stack or by reference.  Thus, as a GCC
6365    extension, we pass these types by reference.  The HP compiler doesn't
6366    support these types, so hopefully there shouldn't be any compatibility
6367    issues.  This may have to be revisited when HP releases a C99 compiler
6368    or updates the ABI.  */
6369 
6370 static bool
pa_pass_by_reference(cumulative_args_t,const function_arg_info & arg)6371 pa_pass_by_reference (cumulative_args_t, const function_arg_info &arg)
6372 {
6373   HOST_WIDE_INT size = arg.type_size_in_bytes ();
6374   if (TARGET_64BIT)
6375     return size <= 0;
6376   else
6377     return size <= 0 || size > 8;
6378 }
6379 
6380 /* Implement TARGET_FUNCTION_ARG_PADDING.  */
6381 
6382 static pad_direction
pa_function_arg_padding(machine_mode mode,const_tree type)6383 pa_function_arg_padding (machine_mode mode, const_tree type)
6384 {
6385   if (mode == BLKmode
6386       || (TARGET_64BIT
6387 	  && type
6388 	  && (AGGREGATE_TYPE_P (type)
6389 	      || TREE_CODE (type) == COMPLEX_TYPE
6390 	      || TREE_CODE (type) == VECTOR_TYPE)))
6391     {
6392       /* Return PAD_NONE if justification is not required.  */
6393       if (type
6394 	  && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
6395 	  && (int_size_in_bytes (type) * BITS_PER_UNIT) % PARM_BOUNDARY == 0)
6396 	return PAD_NONE;
6397 
6398       /* The directions set here are ignored when a BLKmode argument larger
6399 	 than a word is placed in a register.  Different code is used for
6400 	 the stack and registers.  This makes it difficult to have a
6401 	 consistent data representation for both the stack and registers.
6402 	 For both runtimes, the justification and padding for arguments on
6403 	 the stack and in registers should be identical.  */
6404       if (TARGET_64BIT)
6405 	/* The 64-bit runtime specifies left justification for aggregates.  */
6406 	return PAD_UPWARD;
6407       else
6408 	/* The 32-bit runtime architecture specifies right justification.
6409 	   When the argument is passed on the stack, the argument is padded
6410 	   with garbage on the left.  The HP compiler pads with zeros.  */
6411 	return PAD_DOWNWARD;
6412     }
6413 
6414   if (GET_MODE_BITSIZE (mode) < PARM_BOUNDARY)
6415     return PAD_DOWNWARD;
6416   else
6417     return PAD_NONE;
6418 }
6419 
6420 
6421 /* Do what is necessary for `va_start'.  We look at the current function
6422    to determine if stdargs or varargs is used and fill in an initial
6423    va_list.  A pointer to this constructor is returned.  */
6424 
6425 static rtx
hppa_builtin_saveregs(void)6426 hppa_builtin_saveregs (void)
6427 {
6428   rtx offset, dest;
6429   tree fntype = TREE_TYPE (current_function_decl);
6430   int argadj = ((!stdarg_p (fntype))
6431 		? UNITS_PER_WORD : 0);
6432 
6433   if (argadj)
6434     offset = plus_constant (Pmode, crtl->args.arg_offset_rtx, argadj);
6435   else
6436     offset = crtl->args.arg_offset_rtx;
6437 
6438   if (TARGET_64BIT)
6439     {
6440       int i, off;
6441 
6442       /* Adjust for varargs/stdarg differences.  */
6443       if (argadj)
6444 	offset = plus_constant (Pmode, crtl->args.arg_offset_rtx, -argadj);
6445       else
6446 	offset = crtl->args.arg_offset_rtx;
6447 
6448       /* We need to save %r26 .. %r19 inclusive starting at offset -64
6449 	 from the incoming arg pointer and growing to larger addresses.  */
6450       for (i = 26, off = -64; i >= 19; i--, off += 8)
6451 	emit_move_insn (gen_rtx_MEM (word_mode,
6452 				     plus_constant (Pmode,
6453 						    arg_pointer_rtx, off)),
6454 			gen_rtx_REG (word_mode, i));
6455 
6456       /* The incoming args pointer points just beyond the flushback area;
6457 	 normally this is not a serious concern.  However, when we are doing
6458 	 varargs/stdargs we want to make the arg pointer point to the start
6459 	 of the incoming argument area.  */
6460       emit_move_insn (virtual_incoming_args_rtx,
6461 		      plus_constant (Pmode, arg_pointer_rtx, -64));
6462 
6463       /* Now return a pointer to the first anonymous argument.  */
6464       return copy_to_reg (expand_binop (Pmode, add_optab,
6465 					virtual_incoming_args_rtx,
6466 					offset, 0, 0, OPTAB_LIB_WIDEN));
6467     }
6468 
6469   /* Store general registers on the stack.  */
6470   dest = gen_rtx_MEM (BLKmode,
6471 		      plus_constant (Pmode, crtl->args.internal_arg_pointer,
6472 				     -16));
6473   set_mem_alias_set (dest, get_varargs_alias_set ());
6474   set_mem_align (dest, BITS_PER_WORD);
6475   move_block_from_reg (23, dest, 4);
6476 
6477   /* move_block_from_reg will emit code to store the argument registers
6478      individually as scalar stores.
6479 
6480      However, other insns may later load from the same addresses for
6481      a structure load (passing a struct to a varargs routine).
6482 
6483      The alias code assumes that such aliasing can never happen, so we
6484      have to keep memory referencing insns from moving up beyond the
6485      last argument register store.  So we emit a blockage insn here.  */
6486   emit_insn (gen_blockage ());
6487 
6488   return copy_to_reg (expand_binop (Pmode, add_optab,
6489 				    crtl->args.internal_arg_pointer,
6490 				    offset, 0, 0, OPTAB_LIB_WIDEN));
6491 }
6492 
6493 static void
hppa_va_start(tree valist,rtx nextarg)6494 hppa_va_start (tree valist, rtx nextarg)
6495 {
6496   nextarg = expand_builtin_saveregs ();
6497   std_expand_builtin_va_start (valist, nextarg);
6498 }
6499 
6500 static tree
hppa_gimplify_va_arg_expr(tree valist,tree type,gimple_seq * pre_p,gimple_seq * post_p)6501 hppa_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
6502 			   gimple_seq *post_p)
6503 {
6504   if (TARGET_64BIT)
6505     {
6506       /* Args grow upward.  We can use the generic routines.  */
6507       return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
6508     }
6509   else /* !TARGET_64BIT */
6510     {
6511       tree ptr = build_pointer_type (type);
6512       tree valist_type;
6513       tree t, u;
6514       unsigned int size, ofs;
6515       bool indirect;
6516 
6517       indirect = pass_va_arg_by_reference (type);
6518       if (indirect)
6519 	{
6520 	  type = ptr;
6521 	  ptr = build_pointer_type (type);
6522 	}
6523       size = int_size_in_bytes (type);
6524       valist_type = TREE_TYPE (valist);
6525 
6526       /* Args grow down.  Not handled by generic routines.  */
6527 
6528       u = fold_convert (sizetype, size_in_bytes (type));
6529       u = fold_build1 (NEGATE_EXPR, sizetype, u);
6530       t = fold_build_pointer_plus (valist, u);
6531 
6532       /* Align to 4 or 8 byte boundary depending on argument size.  */
6533 
6534       u = build_int_cst (TREE_TYPE (t), (HOST_WIDE_INT)(size > 4 ? -8 : -4));
6535       t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t, u);
6536       t = fold_convert (valist_type, t);
6537 
6538       t = build2 (MODIFY_EXPR, valist_type, valist, t);
6539 
6540       ofs = (8 - size) % 4;
6541       if (ofs != 0)
6542 	t = fold_build_pointer_plus_hwi (t, ofs);
6543 
6544       t = fold_convert (ptr, t);
6545       t = build_va_arg_indirect_ref (t);
6546 
6547       if (indirect)
6548 	t = build_va_arg_indirect_ref (t);
6549 
6550       return t;
6551     }
6552 }
6553 
6554 /* True if MODE is valid for the target.  By "valid", we mean able to
6555    be manipulated in non-trivial ways.  In particular, this means all
6556    the arithmetic is supported.  */
6557 
6558 static bool
pa_scalar_mode_supported_p(scalar_mode mode)6559 pa_scalar_mode_supported_p (scalar_mode mode)
6560 {
6561   int precision = GET_MODE_PRECISION (mode);
6562 
6563   if (TARGET_64BIT && mode == TImode)
6564     return true;
6565 
6566   switch (GET_MODE_CLASS (mode))
6567     {
6568     case MODE_PARTIAL_INT:
6569     case MODE_INT:
6570       if (precision == CHAR_TYPE_SIZE)
6571 	return true;
6572       if (precision == SHORT_TYPE_SIZE)
6573 	return true;
6574       if (precision == INT_TYPE_SIZE)
6575 	return true;
6576       if (precision == LONG_TYPE_SIZE)
6577 	return true;
6578       if (precision == LONG_LONG_TYPE_SIZE)
6579 	return true;
6580       return false;
6581 
6582     case MODE_FLOAT:
6583       if (precision == FLOAT_TYPE_SIZE)
6584 	return true;
6585       if (precision == DOUBLE_TYPE_SIZE)
6586 	return true;
6587       if (precision == LONG_DOUBLE_TYPE_SIZE)
6588 	return true;
6589       return false;
6590 
6591     case MODE_DECIMAL_FLOAT:
6592       return false;
6593 
6594     default:
6595       gcc_unreachable ();
6596     }
6597 }
6598 
6599 /* Return TRUE if INSN, a jump insn, has an unfilled delay slot and
6600    it branches into the delay slot.  Otherwise, return FALSE.  */
6601 
6602 static bool
branch_to_delay_slot_p(rtx_insn * insn)6603 branch_to_delay_slot_p (rtx_insn *insn)
6604 {
6605   rtx_insn *jump_insn;
6606 
6607   if (dbr_sequence_length ())
6608     return FALSE;
6609 
6610   jump_insn = next_active_insn (JUMP_LABEL_AS_INSN (insn));
6611   while (insn)
6612     {
6613       insn = next_active_insn (insn);
6614       if (jump_insn == insn)
6615 	return TRUE;
6616 
6617       /* We can't rely on the length of asms.  So, we return FALSE when
6618 	 the branch is followed by an asm.  */
6619       if (!insn
6620 	  || GET_CODE (PATTERN (insn)) == ASM_INPUT
6621 	  || asm_noperands (PATTERN (insn)) >= 0
6622 	  || get_attr_length (insn) > 0)
6623 	break;
6624     }
6625 
6626   return FALSE;
6627 }
6628 
6629 /* Return TRUE if INSN, a forward jump insn, needs a nop in its delay slot.
6630 
6631    This occurs when INSN has an unfilled delay slot and is followed
6632    by an asm.  Disaster can occur if the asm is empty and the jump
6633    branches into the delay slot.  So, we add a nop in the delay slot
6634    when this occurs.  */
6635 
6636 static bool
branch_needs_nop_p(rtx_insn * insn)6637 branch_needs_nop_p (rtx_insn *insn)
6638 {
6639   rtx_insn *jump_insn;
6640 
6641   if (dbr_sequence_length ())
6642     return FALSE;
6643 
6644   jump_insn = next_active_insn (JUMP_LABEL_AS_INSN (insn));
6645   while (insn)
6646     {
6647       insn = next_active_insn (insn);
6648       if (!insn || jump_insn == insn)
6649 	return TRUE;
6650 
6651       if (!(GET_CODE (PATTERN (insn)) == ASM_INPUT
6652 	   || asm_noperands (PATTERN (insn)) >= 0)
6653 	  && get_attr_length (insn) > 0)
6654 	break;
6655     }
6656 
6657   return FALSE;
6658 }
6659 
6660 /* Return TRUE if INSN, a forward jump insn, can use nullification
6661    to skip the following instruction.  This avoids an extra cycle due
6662    to a mis-predicted branch when we fall through.  */
6663 
6664 static bool
use_skip_p(rtx_insn * insn)6665 use_skip_p (rtx_insn *insn)
6666 {
6667   rtx_insn *jump_insn = next_active_insn (JUMP_LABEL_AS_INSN (insn));
6668 
6669   while (insn)
6670     {
6671       insn = next_active_insn (insn);
6672 
6673       /* We can't rely on the length of asms, so we can't skip asms.  */
6674       if (!insn
6675 	  || GET_CODE (PATTERN (insn)) == ASM_INPUT
6676 	  || asm_noperands (PATTERN (insn)) >= 0)
6677 	break;
6678       if (get_attr_length (insn) == 4
6679 	  && jump_insn == next_active_insn (insn))
6680 	return TRUE;
6681       if (get_attr_length (insn) > 0)
6682 	break;
6683     }
6684 
6685   return FALSE;
6686 }
6687 
6688 /* This routine handles all the normal conditional branch sequences we
6689    might need to generate.  It handles compare immediate vs compare
6690    register, nullification of delay slots, varying length branches,
6691    negated branches, and all combinations of the above.  It returns the
6692    output appropriate to emit the branch corresponding to all given
6693    parameters.  */
6694 
6695 const char *
pa_output_cbranch(rtx * operands,int negated,rtx_insn * insn)6696 pa_output_cbranch (rtx *operands, int negated, rtx_insn *insn)
6697 {
6698   static char buf[100];
6699   bool useskip;
6700   int nullify = INSN_ANNULLED_BRANCH_P (insn);
6701   int length = get_attr_length (insn);
6702   int xdelay;
6703 
6704   /* A conditional branch to the following instruction (e.g. the delay slot)
6705      is asking for a disaster.  This can happen when not optimizing and
6706      when jump optimization fails.
6707 
6708      While it is usually safe to emit nothing, this can fail if the
6709      preceding instruction is a nullified branch with an empty delay
6710      slot and the same branch target as this branch.  We could check
6711      for this but jump optimization should eliminate nop jumps.  It
6712      is always safe to emit a nop.  */
6713   if (branch_to_delay_slot_p (insn))
6714     return "nop";
6715 
6716   /* The doubleword form of the cmpib instruction doesn't have the LEU
6717      and GTU conditions while the cmpb instruction does.  Since we accept
6718      zero for cmpb, we must ensure that we use cmpb for the comparison.  */
6719   if (GET_MODE (operands[1]) == DImode && operands[2] == const0_rtx)
6720     operands[2] = gen_rtx_REG (DImode, 0);
6721   if (GET_MODE (operands[2]) == DImode && operands[1] == const0_rtx)
6722     operands[1] = gen_rtx_REG (DImode, 0);
6723 
6724   /* If this is a long branch with its delay slot unfilled, set `nullify'
6725      as it can nullify the delay slot and save a nop.  */
6726   if (length == 8 && dbr_sequence_length () == 0)
6727     nullify = 1;
6728 
6729   /* If this is a short forward conditional branch which did not get
6730      its delay slot filled, the delay slot can still be nullified.  */
6731   if (! nullify && length == 4 && dbr_sequence_length () == 0)
6732     nullify = forward_branch_p (insn);
6733 
6734   /* A forward branch over a single nullified insn can be done with a
6735      comclr instruction.  This avoids a single cycle penalty due to
6736      mis-predicted branch if we fall through (branch not taken).  */
6737   useskip = (length == 4 && nullify) ? use_skip_p (insn) : FALSE;
6738 
6739   switch (length)
6740     {
6741       /* All short conditional branches except backwards with an unfilled
6742 	 delay slot.  */
6743       case 4:
6744 	if (useskip)
6745 	  strcpy (buf, "{com%I2clr,|cmp%I2clr,}");
6746 	else
6747 	  strcpy (buf, "{com%I2b,|cmp%I2b,}");
6748 	if (GET_MODE (operands[1]) == DImode)
6749 	  strcat (buf, "*");
6750 	if (negated)
6751 	  strcat (buf, "%B3");
6752 	else
6753 	  strcat (buf, "%S3");
6754 	if (useskip)
6755 	  strcat (buf, " %2,%r1,%%r0");
6756 	else if (nullify)
6757 	  {
6758 	    if (branch_needs_nop_p (insn))
6759 	      strcat (buf, ",n %2,%r1,%0%#");
6760 	    else
6761 	      strcat (buf, ",n %2,%r1,%0");
6762 	  }
6763 	else
6764 	  strcat (buf, " %2,%r1,%0");
6765 	break;
6766 
6767      /* All long conditionals.  Note a short backward branch with an
6768 	unfilled delay slot is treated just like a long backward branch
6769 	with an unfilled delay slot.  */
6770       case 8:
6771 	/* Handle weird backwards branch with a filled delay slot
6772 	   which is nullified.  */
6773 	if (dbr_sequence_length () != 0
6774 	    && ! forward_branch_p (insn)
6775 	    && nullify)
6776 	  {
6777 	    strcpy (buf, "{com%I2b,|cmp%I2b,}");
6778 	    if (GET_MODE (operands[1]) == DImode)
6779 	      strcat (buf, "*");
6780 	    if (negated)
6781 	      strcat (buf, "%S3");
6782 	    else
6783 	      strcat (buf, "%B3");
6784 	    strcat (buf, ",n %2,%r1,.+12\n\tb %0");
6785 	  }
6786 	/* Handle short backwards branch with an unfilled delay slot.
6787 	   Using a comb;nop rather than comiclr;bl saves 1 cycle for both
6788 	   taken and untaken branches.  */
6789 	else if (dbr_sequence_length () == 0
6790 		 && ! forward_branch_p (insn)
6791 		 && INSN_ADDRESSES_SET_P ()
6792 		 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6793 				    - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6794 	  {
6795 	    strcpy (buf, "{com%I2b,|cmp%I2b,}");
6796 	    if (GET_MODE (operands[1]) == DImode)
6797 	      strcat (buf, "*");
6798 	    if (negated)
6799 	      strcat (buf, "%B3 %2,%r1,%0%#");
6800 	    else
6801 	      strcat (buf, "%S3 %2,%r1,%0%#");
6802 	  }
6803 	else
6804 	  {
6805 	    strcpy (buf, "{com%I2clr,|cmp%I2clr,}");
6806 	    if (GET_MODE (operands[1]) == DImode)
6807 	      strcat (buf, "*");
6808 	    if (negated)
6809 	      strcat (buf, "%S3");
6810 	    else
6811 	      strcat (buf, "%B3");
6812 	    if (nullify)
6813 	      strcat (buf, " %2,%r1,%%r0\n\tb,n %0");
6814 	    else
6815 	      strcat (buf, " %2,%r1,%%r0\n\tb %0");
6816 	  }
6817 	break;
6818 
6819       default:
6820 	/* The reversed conditional branch must branch over one additional
6821 	   instruction if the delay slot is filled and needs to be extracted
6822 	   by pa_output_lbranch.  If the delay slot is empty or this is a
6823 	   nullified forward branch, the instruction after the reversed
6824 	   condition branch must be nullified.  */
6825 	if (dbr_sequence_length () == 0
6826 	    || (nullify && forward_branch_p (insn)))
6827 	  {
6828 	    nullify = 1;
6829 	    xdelay = 0;
6830 	    operands[4] = GEN_INT (length);
6831 	  }
6832 	else
6833 	  {
6834 	    xdelay = 1;
6835 	    operands[4] = GEN_INT (length + 4);
6836 	  }
6837 
6838 	/* Create a reversed conditional branch which branches around
6839 	   the following insns.  */
6840 	if (GET_MODE (operands[1]) != DImode)
6841 	  {
6842 	    if (nullify)
6843 	      {
6844 		if (negated)
6845 		  strcpy (buf,
6846 		    "{com%I2b,%S3,n %2,%r1,.+%4|cmp%I2b,%S3,n %2,%r1,.+%4}");
6847 		else
6848 		  strcpy (buf,
6849 		    "{com%I2b,%B3,n %2,%r1,.+%4|cmp%I2b,%B3,n %2,%r1,.+%4}");
6850 	      }
6851 	    else
6852 	      {
6853 		if (negated)
6854 		  strcpy (buf,
6855 		    "{com%I2b,%S3 %2,%r1,.+%4|cmp%I2b,%S3 %2,%r1,.+%4}");
6856 		else
6857 		  strcpy (buf,
6858 		    "{com%I2b,%B3 %2,%r1,.+%4|cmp%I2b,%B3 %2,%r1,.+%4}");
6859 	      }
6860 	  }
6861 	else
6862 	  {
6863 	    if (nullify)
6864 	      {
6865 		if (negated)
6866 		  strcpy (buf,
6867 		    "{com%I2b,*%S3,n %2,%r1,.+%4|cmp%I2b,*%S3,n %2,%r1,.+%4}");
6868 		else
6869 		  strcpy (buf,
6870 		    "{com%I2b,*%B3,n %2,%r1,.+%4|cmp%I2b,*%B3,n %2,%r1,.+%4}");
6871 	      }
6872 	    else
6873 	      {
6874 		if (negated)
6875 		  strcpy (buf,
6876 		    "{com%I2b,*%S3 %2,%r1,.+%4|cmp%I2b,*%S3 %2,%r1,.+%4}");
6877 		else
6878 		  strcpy (buf,
6879 		    "{com%I2b,*%B3 %2,%r1,.+%4|cmp%I2b,*%B3 %2,%r1,.+%4}");
6880 	      }
6881 	  }
6882 
6883 	output_asm_insn (buf, operands);
6884 	return pa_output_lbranch (operands[0], insn, xdelay);
6885     }
6886   return buf;
6887 }
6888 
6889 /* Output a PIC pc-relative instruction sequence to load the address of
6890    OPERANDS[0] to register OPERANDS[2].  OPERANDS[0] is a symbol ref
6891    or a code label.  OPERANDS[1] specifies the register to use to load
6892    the program counter.  OPERANDS[3] may be used for label generation
6893    The sequence is always three instructions in length.  The program
6894    counter recorded for PA 1.X is eight bytes more than that for PA 2.0.
6895    Register %r1 is clobbered.  */
6896 
6897 static void
pa_output_pic_pcrel_sequence(rtx * operands)6898 pa_output_pic_pcrel_sequence (rtx *operands)
6899 {
6900   gcc_assert (SYMBOL_REF_P (operands[0]) || LABEL_P (operands[0]));
6901   if (TARGET_PA_20)
6902     {
6903       /* We can use mfia to determine the current program counter.  */
6904       if (TARGET_SOM || !TARGET_GAS)
6905 	{
6906 	  operands[3] = gen_label_rtx ();
6907 	  targetm.asm_out.internal_label (asm_out_file, "L",
6908 					  CODE_LABEL_NUMBER (operands[3]));
6909 	  output_asm_insn ("mfia %1", operands);
6910 	  output_asm_insn ("addil L'%0-%l3,%1", operands);
6911 	  output_asm_insn ("ldo R'%0-%l3(%%r1),%2", operands);
6912 	}
6913       else
6914 	{
6915 	  output_asm_insn ("mfia %1", operands);
6916 	  output_asm_insn ("addil L'%0-$PIC_pcrel$0+12,%1", operands);
6917 	  output_asm_insn ("ldo R'%0-$PIC_pcrel$0+16(%%r1),%2", operands);
6918 	}
6919     }
6920   else
6921     {
6922       /* We need to use a branch to determine the current program counter.  */
6923       output_asm_insn ("{bl|b,l} .+8,%1", operands);
6924       if (TARGET_SOM || !TARGET_GAS)
6925 	{
6926 	  operands[3] = gen_label_rtx ();
6927 	  output_asm_insn ("addil L'%0-%l3,%1", operands);
6928 	  targetm.asm_out.internal_label (asm_out_file, "L",
6929 					  CODE_LABEL_NUMBER (operands[3]));
6930 	  output_asm_insn ("ldo R'%0-%l3(%%r1),%2", operands);
6931 	}
6932       else
6933 	{
6934 	  output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%1", operands);
6935 	  output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%2", operands);
6936 	}
6937     }
6938 }
6939 
6940 /* This routine handles output of long unconditional branches that
6941    exceed the maximum range of a simple branch instruction.  Since
6942    we don't have a register available for the branch, we save register
6943    %r1 in the frame marker, load the branch destination DEST into %r1,
6944    execute the branch, and restore %r1 in the delay slot of the branch.
6945 
6946    Since long branches may have an insn in the delay slot and the
6947    delay slot is used to restore %r1, we in general need to extract
6948    this insn and execute it before the branch.  However, to facilitate
6949    use of this function by conditional branches, we also provide an
6950    option to not extract the delay insn so that it will be emitted
6951    after the long branch.  So, if there is an insn in the delay slot,
6952    it is extracted if XDELAY is nonzero.
6953 
6954    The lengths of the various long-branch sequences are 20, 16 and 24
6955    bytes for the portable runtime, non-PIC and PIC cases, respectively.  */
6956 
6957 const char *
pa_output_lbranch(rtx dest,rtx_insn * insn,int xdelay)6958 pa_output_lbranch (rtx dest, rtx_insn *insn, int xdelay)
6959 {
6960   rtx xoperands[4];
6961 
6962   xoperands[0] = dest;
6963 
6964   /* First, free up the delay slot.  */
6965   if (xdelay && dbr_sequence_length () != 0)
6966     {
6967       /* We can't handle a jump in the delay slot.  */
6968       gcc_assert (! JUMP_P (NEXT_INSN (insn)));
6969 
6970       final_scan_insn (NEXT_INSN (insn), asm_out_file,
6971 		       optimize, 0, NULL);
6972 
6973       /* Now delete the delay insn.  */
6974       SET_INSN_DELETED (NEXT_INSN (insn));
6975     }
6976 
6977   /* Output an insn to save %r1.  The runtime documentation doesn't
6978      specify whether the "Clean Up" slot in the callers frame can
6979      be clobbered by the callee.  It isn't copied by HP's builtin
6980      alloca, so this suggests that it can be clobbered if necessary.
6981      The "Static Link" location is copied by HP builtin alloca, so
6982      we avoid using it.  Using the cleanup slot might be a problem
6983      if we have to interoperate with languages that pass cleanup
6984      information.  However, it should be possible to handle these
6985      situations with GCC's asm feature.
6986 
6987      The "Current RP" slot is reserved for the called procedure, so
6988      we try to use it when we don't have a frame of our own.  It's
6989      rather unlikely that we won't have a frame when we need to emit
6990      a very long branch.
6991 
6992      Really the way to go long term is a register scavenger; goto
6993      the target of the jump and find a register which we can use
6994      as a scratch to hold the value in %r1.  Then, we wouldn't have
6995      to free up the delay slot or clobber a slot that may be needed
6996      for other purposes.  */
6997   if (TARGET_64BIT)
6998     {
6999       if (actual_fsize == 0 && !df_regs_ever_live_p (2))
7000 	/* Use the return pointer slot in the frame marker.  */
7001 	output_asm_insn ("std %%r1,-16(%%r30)", xoperands);
7002       else
7003 	/* Use the slot at -40 in the frame marker since HP builtin
7004 	   alloca doesn't copy it.  */
7005 	output_asm_insn ("std %%r1,-40(%%r30)", xoperands);
7006     }
7007   else
7008     {
7009       if (actual_fsize == 0 && !df_regs_ever_live_p (2))
7010 	/* Use the return pointer slot in the frame marker.  */
7011 	output_asm_insn ("stw %%r1,-20(%%r30)", xoperands);
7012       else
7013 	/* Use the "Clean Up" slot in the frame marker.  In GCC,
7014 	   the only other use of this location is for copying a
7015 	   floating point double argument from a floating-point
7016 	   register to two general registers.  The copy is done
7017 	   as an "atomic" operation when outputting a call, so it
7018 	   won't interfere with our using the location here.  */
7019 	output_asm_insn ("stw %%r1,-12(%%r30)", xoperands);
7020     }
7021 
7022   if (TARGET_PORTABLE_RUNTIME)
7023     {
7024       output_asm_insn ("ldil L'%0,%%r1", xoperands);
7025       output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands);
7026       output_asm_insn ("bv %%r0(%%r1)", xoperands);
7027     }
7028   else if (flag_pic)
7029     {
7030       xoperands[1] = gen_rtx_REG (Pmode, 1);
7031       xoperands[2] = xoperands[1];
7032       pa_output_pic_pcrel_sequence (xoperands);
7033       output_asm_insn ("bv %%r0(%%r1)", xoperands);
7034     }
7035   else
7036     /* Now output a very long branch to the original target.  */
7037     output_asm_insn ("ldil L'%l0,%%r1\n\tbe R'%l0(%%sr4,%%r1)", xoperands);
7038 
7039   /* Now restore the value of %r1 in the delay slot.  */
7040   if (TARGET_64BIT)
7041     {
7042       if (actual_fsize == 0 && !df_regs_ever_live_p (2))
7043 	return "ldd -16(%%r30),%%r1";
7044       else
7045 	return "ldd -40(%%r30),%%r1";
7046     }
7047   else
7048     {
7049       if (actual_fsize == 0 && !df_regs_ever_live_p (2))
7050 	return "ldw -20(%%r30),%%r1";
7051       else
7052 	return "ldw -12(%%r30),%%r1";
7053     }
7054 }
7055 
7056 /* This routine handles all the branch-on-bit conditional branch sequences we
7057    might need to generate.  It handles nullification of delay slots,
7058    varying length branches, negated branches and all combinations of the
7059    above.  it returns the appropriate output template to emit the branch.  */
7060 
7061 const char *
pa_output_bb(rtx * operands ATTRIBUTE_UNUSED,int negated,rtx_insn * insn,int which)7062 pa_output_bb (rtx *operands ATTRIBUTE_UNUSED, int negated, rtx_insn *insn, int which)
7063 {
7064   static char buf[100];
7065   bool useskip;
7066   int nullify = INSN_ANNULLED_BRANCH_P (insn);
7067   int length = get_attr_length (insn);
7068   int xdelay;
7069 
7070   /* A conditional branch to the following instruction (e.g. the delay slot) is
7071      asking for a disaster.  I do not think this can happen as this pattern
7072      is only used when optimizing; jump optimization should eliminate the
7073      jump.  But be prepared just in case.  */
7074 
7075   if (branch_to_delay_slot_p (insn))
7076     return "nop";
7077 
7078   /* If this is a long branch with its delay slot unfilled, set `nullify'
7079      as it can nullify the delay slot and save a nop.  */
7080   if (length == 8 && dbr_sequence_length () == 0)
7081     nullify = 1;
7082 
7083   /* If this is a short forward conditional branch which did not get
7084      its delay slot filled, the delay slot can still be nullified.  */
7085   if (! nullify && length == 4 && dbr_sequence_length () == 0)
7086     nullify = forward_branch_p (insn);
7087 
7088   /* A forward branch over a single nullified insn can be done with a
7089      extrs instruction.  This avoids a single cycle penalty due to
7090      mis-predicted branch if we fall through (branch not taken).  */
7091   useskip = (length == 4 && nullify) ? use_skip_p (insn) : FALSE;
7092 
7093   switch (length)
7094     {
7095 
7096       /* All short conditional branches except backwards with an unfilled
7097 	 delay slot.  */
7098       case 4:
7099 	if (useskip)
7100 	  strcpy (buf, "{extrs,|extrw,s,}");
7101 	else
7102 	  strcpy (buf, "bb,");
7103 	if (useskip && GET_MODE (operands[0]) == DImode)
7104 	  strcpy (buf, "extrd,s,*");
7105 	else if (GET_MODE (operands[0]) == DImode)
7106 	  strcpy (buf, "bb,*");
7107 	if ((which == 0 && negated)
7108 	     || (which == 1 && ! negated))
7109 	  strcat (buf, ">=");
7110 	else
7111 	  strcat (buf, "<");
7112 	if (useskip)
7113 	  strcat (buf, " %0,%1,1,%%r0");
7114 	else if (nullify && negated)
7115 	  {
7116 	    if (branch_needs_nop_p (insn))
7117 	      strcat (buf, ",n %0,%1,%3%#");
7118 	    else
7119 	      strcat (buf, ",n %0,%1,%3");
7120 	  }
7121 	else if (nullify && ! negated)
7122 	  {
7123 	    if (branch_needs_nop_p (insn))
7124 	      strcat (buf, ",n %0,%1,%2%#");
7125 	    else
7126 	      strcat (buf, ",n %0,%1,%2");
7127 	  }
7128 	else if (! nullify && negated)
7129 	  strcat (buf, " %0,%1,%3");
7130 	else if (! nullify && ! negated)
7131 	  strcat (buf, " %0,%1,%2");
7132 	break;
7133 
7134      /* All long conditionals.  Note a short backward branch with an
7135 	unfilled delay slot is treated just like a long backward branch
7136 	with an unfilled delay slot.  */
7137       case 8:
7138 	/* Handle weird backwards branch with a filled delay slot
7139 	   which is nullified.  */
7140 	if (dbr_sequence_length () != 0
7141 	    && ! forward_branch_p (insn)
7142 	    && nullify)
7143 	  {
7144 	    strcpy (buf, "bb,");
7145 	    if (GET_MODE (operands[0]) == DImode)
7146 	      strcat (buf, "*");
7147 	    if ((which == 0 && negated)
7148 		|| (which == 1 && ! negated))
7149 	      strcat (buf, "<");
7150 	    else
7151 	      strcat (buf, ">=");
7152 	    if (negated)
7153 	      strcat (buf, ",n %0,%1,.+12\n\tb %3");
7154 	    else
7155 	      strcat (buf, ",n %0,%1,.+12\n\tb %2");
7156 	  }
7157 	/* Handle short backwards branch with an unfilled delay slot.
7158 	   Using a bb;nop rather than extrs;bl saves 1 cycle for both
7159 	   taken and untaken branches.  */
7160 	else if (dbr_sequence_length () == 0
7161 		 && ! forward_branch_p (insn)
7162 		 && INSN_ADDRESSES_SET_P ()
7163 		 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
7164 				    - INSN_ADDRESSES (INSN_UID (insn)) - 8))
7165 	  {
7166 	    strcpy (buf, "bb,");
7167 	    if (GET_MODE (operands[0]) == DImode)
7168 	      strcat (buf, "*");
7169 	    if ((which == 0 && negated)
7170 		|| (which == 1 && ! negated))
7171 	      strcat (buf, ">=");
7172 	    else
7173 	      strcat (buf, "<");
7174 	    if (negated)
7175 	      strcat (buf, " %0,%1,%3%#");
7176 	    else
7177 	      strcat (buf, " %0,%1,%2%#");
7178 	  }
7179 	else
7180 	  {
7181 	    if (GET_MODE (operands[0]) == DImode)
7182 	      strcpy (buf, "extrd,s,*");
7183 	    else
7184 	      strcpy (buf, "{extrs,|extrw,s,}");
7185 	    if ((which == 0 && negated)
7186 		|| (which == 1 && ! negated))
7187 	      strcat (buf, "<");
7188 	    else
7189 	      strcat (buf, ">=");
7190 	    if (nullify && negated)
7191 	      strcat (buf, " %0,%1,1,%%r0\n\tb,n %3");
7192 	    else if (nullify && ! negated)
7193 	      strcat (buf, " %0,%1,1,%%r0\n\tb,n %2");
7194 	    else if (negated)
7195 	      strcat (buf, " %0,%1,1,%%r0\n\tb %3");
7196 	    else
7197 	      strcat (buf, " %0,%1,1,%%r0\n\tb %2");
7198 	  }
7199 	break;
7200 
7201       default:
7202 	/* The reversed conditional branch must branch over one additional
7203 	   instruction if the delay slot is filled and needs to be extracted
7204 	   by pa_output_lbranch.  If the delay slot is empty or this is a
7205 	   nullified forward branch, the instruction after the reversed
7206 	   condition branch must be nullified.  */
7207 	if (dbr_sequence_length () == 0
7208 	    || (nullify && forward_branch_p (insn)))
7209 	  {
7210 	    nullify = 1;
7211 	    xdelay = 0;
7212 	    operands[4] = GEN_INT (length);
7213 	  }
7214 	else
7215 	  {
7216 	    xdelay = 1;
7217 	    operands[4] = GEN_INT (length + 4);
7218 	  }
7219 
7220 	if (GET_MODE (operands[0]) == DImode)
7221 	  strcpy (buf, "bb,*");
7222 	else
7223 	  strcpy (buf, "bb,");
7224 	if ((which == 0 && negated)
7225 	    || (which == 1 && !negated))
7226 	  strcat (buf, "<");
7227 	else
7228 	  strcat (buf, ">=");
7229 	if (nullify)
7230 	  strcat (buf, ",n %0,%1,.+%4");
7231 	else
7232 	  strcat (buf, " %0,%1,.+%4");
7233 	output_asm_insn (buf, operands);
7234 	return pa_output_lbranch (negated ? operands[3] : operands[2],
7235 				  insn, xdelay);
7236     }
7237   return buf;
7238 }
7239 
7240 /* This routine handles all the branch-on-variable-bit conditional branch
7241    sequences we might need to generate.  It handles nullification of delay
7242    slots, varying length branches, negated branches and all combinations
7243    of the above.  it returns the appropriate output template to emit the
7244    branch.  */
7245 
7246 const char *
pa_output_bvb(rtx * operands ATTRIBUTE_UNUSED,int negated,rtx_insn * insn,int which)7247 pa_output_bvb (rtx *operands ATTRIBUTE_UNUSED, int negated, rtx_insn *insn,
7248 	       int which)
7249 {
7250   static char buf[100];
7251   bool useskip;
7252   int nullify = INSN_ANNULLED_BRANCH_P (insn);
7253   int length = get_attr_length (insn);
7254   int xdelay;
7255 
7256   /* A conditional branch to the following instruction (e.g. the delay slot) is
7257      asking for a disaster.  I do not think this can happen as this pattern
7258      is only used when optimizing; jump optimization should eliminate the
7259      jump.  But be prepared just in case.  */
7260 
7261   if (branch_to_delay_slot_p (insn))
7262     return "nop";
7263 
7264   /* If this is a long branch with its delay slot unfilled, set `nullify'
7265      as it can nullify the delay slot and save a nop.  */
7266   if (length == 8 && dbr_sequence_length () == 0)
7267     nullify = 1;
7268 
7269   /* If this is a short forward conditional branch which did not get
7270      its delay slot filled, the delay slot can still be nullified.  */
7271   if (! nullify && length == 4 && dbr_sequence_length () == 0)
7272     nullify = forward_branch_p (insn);
7273 
7274   /* A forward branch over a single nullified insn can be done with a
7275      extrs instruction.  This avoids a single cycle penalty due to
7276      mis-predicted branch if we fall through (branch not taken).  */
7277   useskip = (length == 4 && nullify) ? use_skip_p (insn) : FALSE;
7278 
7279   switch (length)
7280     {
7281 
7282       /* All short conditional branches except backwards with an unfilled
7283 	 delay slot.  */
7284       case 4:
7285 	if (useskip)
7286 	  strcpy (buf, "{vextrs,|extrw,s,}");
7287 	else
7288 	  strcpy (buf, "{bvb,|bb,}");
7289 	if (useskip && GET_MODE (operands[0]) == DImode)
7290 	  strcpy (buf, "extrd,s,*");
7291 	else if (GET_MODE (operands[0]) == DImode)
7292 	  strcpy (buf, "bb,*");
7293 	if ((which == 0 && negated)
7294 	     || (which == 1 && ! negated))
7295 	  strcat (buf, ">=");
7296 	else
7297 	  strcat (buf, "<");
7298 	if (useskip)
7299 	  strcat (buf, "{ %0,1,%%r0| %0,%%sar,1,%%r0}");
7300 	else if (nullify && negated)
7301 	  {
7302 	    if (branch_needs_nop_p (insn))
7303 	      strcat (buf, "{,n %0,%3%#|,n %0,%%sar,%3%#}");
7304 	    else
7305 	      strcat (buf, "{,n %0,%3|,n %0,%%sar,%3}");
7306 	  }
7307 	else if (nullify && ! negated)
7308 	  {
7309 	    if (branch_needs_nop_p (insn))
7310 	      strcat (buf, "{,n %0,%2%#|,n %0,%%sar,%2%#}");
7311 	    else
7312 	      strcat (buf, "{,n %0,%2|,n %0,%%sar,%2}");
7313 	  }
7314 	else if (! nullify && negated)
7315 	  strcat (buf, "{ %0,%3| %0,%%sar,%3}");
7316 	else if (! nullify && ! negated)
7317 	  strcat (buf, "{ %0,%2| %0,%%sar,%2}");
7318 	break;
7319 
7320      /* All long conditionals.  Note a short backward branch with an
7321 	unfilled delay slot is treated just like a long backward branch
7322 	with an unfilled delay slot.  */
7323       case 8:
7324 	/* Handle weird backwards branch with a filled delay slot
7325 	   which is nullified.  */
7326 	if (dbr_sequence_length () != 0
7327 	    && ! forward_branch_p (insn)
7328 	    && nullify)
7329 	  {
7330 	    strcpy (buf, "{bvb,|bb,}");
7331 	    if (GET_MODE (operands[0]) == DImode)
7332 	      strcat (buf, "*");
7333 	    if ((which == 0 && negated)
7334 		|| (which == 1 && ! negated))
7335 	      strcat (buf, "<");
7336 	    else
7337 	      strcat (buf, ">=");
7338 	    if (negated)
7339 	      strcat (buf, "{,n %0,.+12\n\tb %3|,n %0,%%sar,.+12\n\tb %3}");
7340 	    else
7341 	      strcat (buf, "{,n %0,.+12\n\tb %2|,n %0,%%sar,.+12\n\tb %2}");
7342 	  }
7343 	/* Handle short backwards branch with an unfilled delay slot.
7344 	   Using a bb;nop rather than extrs;bl saves 1 cycle for both
7345 	   taken and untaken branches.  */
7346 	else if (dbr_sequence_length () == 0
7347 		 && ! forward_branch_p (insn)
7348 		 && INSN_ADDRESSES_SET_P ()
7349 		 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
7350 				    - INSN_ADDRESSES (INSN_UID (insn)) - 8))
7351 	  {
7352 	    strcpy (buf, "{bvb,|bb,}");
7353 	    if (GET_MODE (operands[0]) == DImode)
7354 	      strcat (buf, "*");
7355 	    if ((which == 0 && negated)
7356 		|| (which == 1 && ! negated))
7357 	      strcat (buf, ">=");
7358 	    else
7359 	      strcat (buf, "<");
7360 	    if (negated)
7361 	      strcat (buf, "{ %0,%3%#| %0,%%sar,%3%#}");
7362 	    else
7363 	      strcat (buf, "{ %0,%2%#| %0,%%sar,%2%#}");
7364 	  }
7365 	else
7366 	  {
7367 	    strcpy (buf, "{vextrs,|extrw,s,}");
7368 	    if (GET_MODE (operands[0]) == DImode)
7369 	      strcpy (buf, "extrd,s,*");
7370 	    if ((which == 0 && negated)
7371 		|| (which == 1 && ! negated))
7372 	      strcat (buf, "<");
7373 	    else
7374 	      strcat (buf, ">=");
7375 	    if (nullify && negated)
7376 	      strcat (buf, "{ %0,1,%%r0\n\tb,n %3| %0,%%sar,1,%%r0\n\tb,n %3}");
7377 	    else if (nullify && ! negated)
7378 	      strcat (buf, "{ %0,1,%%r0\n\tb,n %2| %0,%%sar,1,%%r0\n\tb,n %2}");
7379 	    else if (negated)
7380 	      strcat (buf, "{ %0,1,%%r0\n\tb %3| %0,%%sar,1,%%r0\n\tb %3}");
7381 	    else
7382 	      strcat (buf, "{ %0,1,%%r0\n\tb %2| %0,%%sar,1,%%r0\n\tb %2}");
7383 	  }
7384 	break;
7385 
7386       default:
7387 	/* The reversed conditional branch must branch over one additional
7388 	   instruction if the delay slot is filled and needs to be extracted
7389 	   by pa_output_lbranch.  If the delay slot is empty or this is a
7390 	   nullified forward branch, the instruction after the reversed
7391 	   condition branch must be nullified.  */
7392 	if (dbr_sequence_length () == 0
7393 	    || (nullify && forward_branch_p (insn)))
7394 	  {
7395 	    nullify = 1;
7396 	    xdelay = 0;
7397 	    operands[4] = GEN_INT (length);
7398 	  }
7399 	else
7400 	  {
7401 	    xdelay = 1;
7402 	    operands[4] = GEN_INT (length + 4);
7403 	  }
7404 
7405 	if (GET_MODE (operands[0]) == DImode)
7406 	  strcpy (buf, "bb,*");
7407 	else
7408 	  strcpy (buf, "{bvb,|bb,}");
7409 	if ((which == 0 && negated)
7410 	    || (which == 1 && !negated))
7411 	  strcat (buf, "<");
7412 	else
7413 	  strcat (buf, ">=");
7414 	if (nullify)
7415 	  strcat (buf, ",n {%0,.+%4|%0,%%sar,.+%4}");
7416 	else
7417 	  strcat (buf, " {%0,.+%4|%0,%%sar,.+%4}");
7418 	output_asm_insn (buf, operands);
7419 	return pa_output_lbranch (negated ? operands[3] : operands[2],
7420 				  insn, xdelay);
7421     }
7422   return buf;
7423 }
7424 
7425 /* Return the output template for emitting a dbra type insn.
7426 
7427    Note it may perform some output operations on its own before
7428    returning the final output string.  */
7429 const char *
pa_output_dbra(rtx * operands,rtx_insn * insn,int which_alternative)7430 pa_output_dbra (rtx *operands, rtx_insn *insn, int which_alternative)
7431 {
7432   int length = get_attr_length (insn);
7433 
7434   /* A conditional branch to the following instruction (e.g. the delay slot) is
7435      asking for a disaster.  Be prepared!  */
7436 
7437   if (branch_to_delay_slot_p (insn))
7438     {
7439       if (which_alternative == 0)
7440 	return "ldo %1(%0),%0";
7441       else if (which_alternative == 1)
7442 	{
7443 	  output_asm_insn ("{fstws|fstw} %0,-16(%%r30)", operands);
7444 	  output_asm_insn ("ldw -16(%%r30),%4", operands);
7445 	  output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands);
7446 	  return "{fldws|fldw} -16(%%r30),%0";
7447 	}
7448       else
7449 	{
7450 	  output_asm_insn ("ldw %0,%4", operands);
7451 	  return "ldo %1(%4),%4\n\tstw %4,%0";
7452 	}
7453     }
7454 
7455   if (which_alternative == 0)
7456     {
7457       int nullify = INSN_ANNULLED_BRANCH_P (insn);
7458       int xdelay;
7459 
7460       /* If this is a long branch with its delay slot unfilled, set `nullify'
7461 	 as it can nullify the delay slot and save a nop.  */
7462       if (length == 8 && dbr_sequence_length () == 0)
7463 	nullify = 1;
7464 
7465       /* If this is a short forward conditional branch which did not get
7466 	 its delay slot filled, the delay slot can still be nullified.  */
7467       if (! nullify && length == 4 && dbr_sequence_length () == 0)
7468 	nullify = forward_branch_p (insn);
7469 
7470       switch (length)
7471 	{
7472 	case 4:
7473 	  if (nullify)
7474 	    {
7475 	      if (branch_needs_nop_p (insn))
7476 		return "addib,%C2,n %1,%0,%3%#";
7477 	      else
7478 		return "addib,%C2,n %1,%0,%3";
7479 	    }
7480 	  else
7481 	    return "addib,%C2 %1,%0,%3";
7482 
7483 	case 8:
7484 	  /* Handle weird backwards branch with a fulled delay slot
7485 	     which is nullified.  */
7486 	  if (dbr_sequence_length () != 0
7487 	      && ! forward_branch_p (insn)
7488 	      && nullify)
7489 	    return "addib,%N2,n %1,%0,.+12\n\tb %3";
7490 	  /* Handle short backwards branch with an unfilled delay slot.
7491 	     Using a addb;nop rather than addi;bl saves 1 cycle for both
7492 	     taken and untaken branches.  */
7493 	  else if (dbr_sequence_length () == 0
7494 		   && ! forward_branch_p (insn)
7495 		   && INSN_ADDRESSES_SET_P ()
7496 		   && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
7497 				      - INSN_ADDRESSES (INSN_UID (insn)) - 8))
7498 	      return "addib,%C2 %1,%0,%3%#";
7499 
7500 	  /* Handle normal cases.  */
7501 	  if (nullify)
7502 	    return "addi,%N2 %1,%0,%0\n\tb,n %3";
7503 	  else
7504 	    return "addi,%N2 %1,%0,%0\n\tb %3";
7505 
7506 	default:
7507 	  /* The reversed conditional branch must branch over one additional
7508 	     instruction if the delay slot is filled and needs to be extracted
7509 	     by pa_output_lbranch.  If the delay slot is empty or this is a
7510 	     nullified forward branch, the instruction after the reversed
7511 	     condition branch must be nullified.  */
7512 	  if (dbr_sequence_length () == 0
7513 	      || (nullify && forward_branch_p (insn)))
7514 	    {
7515 	      nullify = 1;
7516 	      xdelay = 0;
7517 	      operands[4] = GEN_INT (length);
7518 	    }
7519 	  else
7520 	    {
7521 	      xdelay = 1;
7522 	      operands[4] = GEN_INT (length + 4);
7523 	    }
7524 
7525 	  if (nullify)
7526 	    output_asm_insn ("addib,%N2,n %1,%0,.+%4", operands);
7527 	  else
7528 	    output_asm_insn ("addib,%N2 %1,%0,.+%4", operands);
7529 
7530 	  return pa_output_lbranch (operands[3], insn, xdelay);
7531 	}
7532 
7533     }
7534   /* Deal with gross reload from FP register case.  */
7535   else if (which_alternative == 1)
7536     {
7537       /* Move loop counter from FP register to MEM then into a GR,
7538 	 increment the GR, store the GR into MEM, and finally reload
7539 	 the FP register from MEM from within the branch's delay slot.  */
7540       output_asm_insn ("{fstws|fstw} %0,-16(%%r30)\n\tldw -16(%%r30),%4",
7541 		       operands);
7542       output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands);
7543       if (length == 24)
7544 	return "{comb|cmpb},%S2 %%r0,%4,%3\n\t{fldws|fldw} -16(%%r30),%0";
7545       else if (length == 28)
7546 	return "{comclr|cmpclr},%B2 %%r0,%4,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
7547       else
7548 	{
7549 	  operands[5] = GEN_INT (length - 16);
7550 	  output_asm_insn ("{comb|cmpb},%B2 %%r0,%4,.+%5", operands);
7551 	  output_asm_insn ("{fldws|fldw} -16(%%r30),%0", operands);
7552 	  return pa_output_lbranch (operands[3], insn, 0);
7553 	}
7554     }
7555   /* Deal with gross reload from memory case.  */
7556   else
7557     {
7558       /* Reload loop counter from memory, the store back to memory
7559 	 happens in the branch's delay slot.  */
7560       output_asm_insn ("ldw %0,%4", operands);
7561       if (length == 12)
7562 	return "addib,%C2 %1,%4,%3\n\tstw %4,%0";
7563       else if (length == 16)
7564 	return "addi,%N2 %1,%4,%4\n\tb %3\n\tstw %4,%0";
7565       else
7566 	{
7567 	  operands[5] = GEN_INT (length - 4);
7568 	  output_asm_insn ("addib,%N2 %1,%4,.+%5\n\tstw %4,%0", operands);
7569 	  return pa_output_lbranch (operands[3], insn, 0);
7570 	}
7571     }
7572 }
7573 
7574 /* Return the output template for emitting a movb type insn.
7575 
7576    Note it may perform some output operations on its own before
7577    returning the final output string.  */
7578 const char *
pa_output_movb(rtx * operands,rtx_insn * insn,int which_alternative,int reverse_comparison)7579 pa_output_movb (rtx *operands, rtx_insn *insn, int which_alternative,
7580 	     int reverse_comparison)
7581 {
7582   int length = get_attr_length (insn);
7583 
7584   /* A conditional branch to the following instruction (e.g. the delay slot) is
7585      asking for a disaster.  Be prepared!  */
7586 
7587   if (branch_to_delay_slot_p (insn))
7588     {
7589       if (which_alternative == 0)
7590 	return "copy %1,%0";
7591       else if (which_alternative == 1)
7592 	{
7593 	  output_asm_insn ("stw %1,-16(%%r30)", operands);
7594 	  return "{fldws|fldw} -16(%%r30),%0";
7595 	}
7596       else if (which_alternative == 2)
7597 	return "stw %1,%0";
7598       else
7599 	return "mtsar %r1";
7600     }
7601 
7602   /* Support the second variant.  */
7603   if (reverse_comparison)
7604     PUT_CODE (operands[2], reverse_condition (GET_CODE (operands[2])));
7605 
7606   if (which_alternative == 0)
7607     {
7608       int nullify = INSN_ANNULLED_BRANCH_P (insn);
7609       int xdelay;
7610 
7611       /* If this is a long branch with its delay slot unfilled, set `nullify'
7612 	 as it can nullify the delay slot and save a nop.  */
7613       if (length == 8 && dbr_sequence_length () == 0)
7614 	nullify = 1;
7615 
7616       /* If this is a short forward conditional branch which did not get
7617 	 its delay slot filled, the delay slot can still be nullified.  */
7618       if (! nullify && length == 4 && dbr_sequence_length () == 0)
7619 	nullify = forward_branch_p (insn);
7620 
7621       switch (length)
7622 	{
7623 	case 4:
7624 	  if (nullify)
7625 	    {
7626 	      if (branch_needs_nop_p (insn))
7627 		return "movb,%C2,n %1,%0,%3%#";
7628 	      else
7629 		return "movb,%C2,n %1,%0,%3";
7630 	    }
7631 	  else
7632 	    return "movb,%C2 %1,%0,%3";
7633 
7634 	case 8:
7635 	  /* Handle weird backwards branch with a filled delay slot
7636 	     which is nullified.  */
7637 	  if (dbr_sequence_length () != 0
7638 	      && ! forward_branch_p (insn)
7639 	      && nullify)
7640 	    return "movb,%N2,n %1,%0,.+12\n\tb %3";
7641 
7642 	  /* Handle short backwards branch with an unfilled delay slot.
7643 	     Using a movb;nop rather than or;bl saves 1 cycle for both
7644 	     taken and untaken branches.  */
7645 	  else if (dbr_sequence_length () == 0
7646 		   && ! forward_branch_p (insn)
7647 		   && INSN_ADDRESSES_SET_P ()
7648 		   && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
7649 				      - INSN_ADDRESSES (INSN_UID (insn)) - 8))
7650 	    return "movb,%C2 %1,%0,%3%#";
7651 	  /* Handle normal cases.  */
7652 	  if (nullify)
7653 	    return "or,%N2 %1,%%r0,%0\n\tb,n %3";
7654 	  else
7655 	    return "or,%N2 %1,%%r0,%0\n\tb %3";
7656 
7657 	default:
7658 	  /* The reversed conditional branch must branch over one additional
7659 	     instruction if the delay slot is filled and needs to be extracted
7660 	     by pa_output_lbranch.  If the delay slot is empty or this is a
7661 	     nullified forward branch, the instruction after the reversed
7662 	     condition branch must be nullified.  */
7663 	  if (dbr_sequence_length () == 0
7664 	      || (nullify && forward_branch_p (insn)))
7665 	    {
7666 	      nullify = 1;
7667 	      xdelay = 0;
7668 	      operands[4] = GEN_INT (length);
7669 	    }
7670 	  else
7671 	    {
7672 	      xdelay = 1;
7673 	      operands[4] = GEN_INT (length + 4);
7674 	    }
7675 
7676 	  if (nullify)
7677 	    output_asm_insn ("movb,%N2,n %1,%0,.+%4", operands);
7678 	  else
7679 	    output_asm_insn ("movb,%N2 %1,%0,.+%4", operands);
7680 
7681 	  return pa_output_lbranch (operands[3], insn, xdelay);
7682 	}
7683     }
7684   /* Deal with gross reload for FP destination register case.  */
7685   else if (which_alternative == 1)
7686     {
7687       /* Move source register to MEM, perform the branch test, then
7688 	 finally load the FP register from MEM from within the branch's
7689 	 delay slot.  */
7690       output_asm_insn ("stw %1,-16(%%r30)", operands);
7691       if (length == 12)
7692 	return "{comb|cmpb},%S2 %%r0,%1,%3\n\t{fldws|fldw} -16(%%r30),%0";
7693       else if (length == 16)
7694 	return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
7695       else
7696 	{
7697 	  operands[4] = GEN_INT (length - 4);
7698 	  output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4", operands);
7699 	  output_asm_insn ("{fldws|fldw} -16(%%r30),%0", operands);
7700 	  return pa_output_lbranch (operands[3], insn, 0);
7701 	}
7702     }
7703   /* Deal with gross reload from memory case.  */
7704   else if (which_alternative == 2)
7705     {
7706       /* Reload loop counter from memory, the store back to memory
7707 	 happens in the branch's delay slot.  */
7708       if (length == 8)
7709 	return "{comb|cmpb},%S2 %%r0,%1,%3\n\tstw %1,%0";
7710       else if (length == 12)
7711 	return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tstw %1,%0";
7712       else
7713 	{
7714 	  operands[4] = GEN_INT (length);
7715 	  output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4\n\tstw %1,%0",
7716 			   operands);
7717 	  return pa_output_lbranch (operands[3], insn, 0);
7718 	}
7719     }
7720   /* Handle SAR as a destination.  */
7721   else
7722     {
7723       if (length == 8)
7724 	return "{comb|cmpb},%S2 %%r0,%1,%3\n\tmtsar %r1";
7725       else if (length == 12)
7726 	return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tmtsar %r1";
7727       else
7728 	{
7729 	  operands[4] = GEN_INT (length);
7730 	  output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4\n\tmtsar %r1",
7731 			   operands);
7732 	  return pa_output_lbranch (operands[3], insn, 0);
7733 	}
7734     }
7735 }
7736 
7737 /* Copy any FP arguments in INSN into integer registers.  */
7738 static void
copy_fp_args(rtx_insn * insn)7739 copy_fp_args (rtx_insn *insn)
7740 {
7741   rtx link;
7742   rtx xoperands[2];
7743 
7744   for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
7745     {
7746       int arg_mode, regno;
7747       rtx use = XEXP (link, 0);
7748 
7749       if (! (GET_CODE (use) == USE
7750 	  && GET_CODE (XEXP (use, 0)) == REG
7751 	  && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
7752 	continue;
7753 
7754       arg_mode = GET_MODE (XEXP (use, 0));
7755       regno = REGNO (XEXP (use, 0));
7756 
7757       /* Is it a floating point register?  */
7758       if (regno >= 32 && regno <= 39)
7759 	{
7760 	  /* Copy the FP register into an integer register via memory.  */
7761 	  if (arg_mode == SFmode)
7762 	    {
7763 	      xoperands[0] = XEXP (use, 0);
7764 	      xoperands[1] = gen_rtx_REG (SImode, 26 - (regno - 32) / 2);
7765 	      output_asm_insn ("{fstws|fstw} %0,-16(%%sr0,%%r30)", xoperands);
7766 	      output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
7767 	    }
7768 	  else
7769 	    {
7770 	      xoperands[0] = XEXP (use, 0);
7771 	      xoperands[1] = gen_rtx_REG (DImode, 25 - (regno - 34) / 2);
7772 	      output_asm_insn ("{fstds|fstd} %0,-16(%%sr0,%%r30)", xoperands);
7773 	      output_asm_insn ("ldw -12(%%sr0,%%r30),%R1", xoperands);
7774 	      output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
7775 	    }
7776 	}
7777     }
7778 }
7779 
7780 /* Compute length of the FP argument copy sequence for INSN.  */
7781 static int
length_fp_args(rtx_insn * insn)7782 length_fp_args (rtx_insn *insn)
7783 {
7784   int length = 0;
7785   rtx link;
7786 
7787   for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
7788     {
7789       int arg_mode, regno;
7790       rtx use = XEXP (link, 0);
7791 
7792       if (! (GET_CODE (use) == USE
7793 	  && GET_CODE (XEXP (use, 0)) == REG
7794 	  && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
7795 	continue;
7796 
7797       arg_mode = GET_MODE (XEXP (use, 0));
7798       regno = REGNO (XEXP (use, 0));
7799 
7800       /* Is it a floating point register?  */
7801       if (regno >= 32 && regno <= 39)
7802 	{
7803 	  if (arg_mode == SFmode)
7804 	    length += 8;
7805 	  else
7806 	    length += 12;
7807 	}
7808     }
7809 
7810   return length;
7811 }
7812 
7813 /* Return the attribute length for the millicode call instruction INSN.
7814    The length must match the code generated by pa_output_millicode_call.
7815    We include the delay slot in the returned length as it is better to
7816    over estimate the length than to under estimate it.  */
7817 
7818 int
pa_attr_length_millicode_call(rtx_insn * insn)7819 pa_attr_length_millicode_call (rtx_insn *insn)
7820 {
7821   unsigned long distance = -1;
7822   unsigned long total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
7823 
7824   if (INSN_ADDRESSES_SET_P ())
7825     {
7826       distance = (total + insn_current_reference_address (insn));
7827       if (distance < total)
7828 	distance = -1;
7829     }
7830 
7831   if (TARGET_64BIT)
7832     {
7833       if (!TARGET_LONG_CALLS && distance < 7600000)
7834 	return 8;
7835 
7836       return 20;
7837     }
7838   else if (TARGET_PORTABLE_RUNTIME)
7839     return 24;
7840   else
7841     {
7842       if (!TARGET_LONG_CALLS && distance < MAX_PCREL17F_OFFSET)
7843 	return 8;
7844 
7845       if (!flag_pic)
7846 	return 12;
7847 
7848       return 24;
7849     }
7850 }
7851 
7852 /* INSN is a function call.
7853 
7854    CALL_DEST is the routine we are calling.  */
7855 
7856 const char *
pa_output_millicode_call(rtx_insn * insn,rtx call_dest)7857 pa_output_millicode_call (rtx_insn *insn, rtx call_dest)
7858 {
7859   int attr_length = get_attr_length (insn);
7860   int seq_length = dbr_sequence_length ();
7861   rtx xoperands[4];
7862 
7863   xoperands[0] = call_dest;
7864 
7865   /* Handle the common case where we are sure that the branch will
7866      reach the beginning of the $CODE$ subspace.  The within reach
7867      form of the $$sh_func_adrs call has a length of 28.  Because it
7868      has an attribute type of sh_func_adrs, it never has a nonzero
7869      sequence length (i.e., the delay slot is never filled).  */
7870   if (!TARGET_LONG_CALLS
7871       && (attr_length == 8
7872 	  || (attr_length == 28
7873 	      && get_attr_type (insn) == TYPE_SH_FUNC_ADRS)))
7874     {
7875       xoperands[1] = gen_rtx_REG (Pmode, TARGET_64BIT ? 2 : 31);
7876       output_asm_insn ("{bl|b,l} %0,%1", xoperands);
7877     }
7878   else
7879     {
7880       if (TARGET_64BIT)
7881 	{
7882 	  /* It might seem that one insn could be saved by accessing
7883 	     the millicode function using the linkage table.  However,
7884 	     this doesn't work in shared libraries and other dynamically
7885 	     loaded objects.  Using a pc-relative sequence also avoids
7886 	     problems related to the implicit use of the gp register.  */
7887 	  xoperands[1] = gen_rtx_REG (Pmode, 1);
7888 	  xoperands[2] = xoperands[1];
7889 	  pa_output_pic_pcrel_sequence (xoperands);
7890 	  output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
7891 	}
7892       else if (TARGET_PORTABLE_RUNTIME)
7893 	{
7894 	  /* Pure portable runtime doesn't allow be/ble; we also don't
7895 	     have PIC support in the assembler/linker, so this sequence
7896 	     is needed.  */
7897 
7898 	  /* Get the address of our target into %r1.  */
7899 	  output_asm_insn ("ldil L'%0,%%r1", xoperands);
7900 	  output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands);
7901 
7902 	  /* Get our return address into %r31.  */
7903 	  output_asm_insn ("{bl|b,l} .+8,%%r31", xoperands);
7904 	  output_asm_insn ("addi 8,%%r31,%%r31", xoperands);
7905 
7906 	  /* Jump to our target address in %r1.  */
7907 	  output_asm_insn ("bv %%r0(%%r1)", xoperands);
7908 	}
7909       else if (!flag_pic)
7910 	{
7911 	  output_asm_insn ("ldil L'%0,%%r1", xoperands);
7912 	  if (TARGET_PA_20)
7913 	    output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31", xoperands);
7914 	  else
7915 	    output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands);
7916 	}
7917       else
7918 	{
7919 	  xoperands[1] = gen_rtx_REG (Pmode, 31);
7920 	  xoperands[2] = gen_rtx_REG (Pmode, 1);
7921 	  pa_output_pic_pcrel_sequence (xoperands);
7922 
7923 	  /* Adjust return address.  */
7924 	  output_asm_insn ("ldo {16|24}(%%r31),%%r31", xoperands);
7925 
7926 	  /* Jump to our target address in %r1.  */
7927 	  output_asm_insn ("bv %%r0(%%r1)", xoperands);
7928 	}
7929     }
7930 
7931   if (seq_length == 0)
7932     output_asm_insn ("nop", xoperands);
7933 
7934   return "";
7935 }
7936 
7937 /* Return the attribute length of the call instruction INSN.  The SIBCALL
7938    flag indicates whether INSN is a regular call or a sibling call.  The
7939    length returned must be longer than the code actually generated by
7940    pa_output_call.  Since branch shortening is done before delay branch
7941    sequencing, there is no way to determine whether or not the delay
7942    slot will be filled during branch shortening.  Even when the delay
7943    slot is filled, we may have to add a nop if the delay slot contains
7944    a branch that can't reach its target.  Thus, we always have to include
7945    the delay slot in the length estimate.  This used to be done in
7946    pa_adjust_insn_length but we do it here now as some sequences always
7947    fill the delay slot and we can save four bytes in the estimate for
7948    these sequences.  */
7949 
7950 int
pa_attr_length_call(rtx_insn * insn,int sibcall)7951 pa_attr_length_call (rtx_insn *insn, int sibcall)
7952 {
7953   int local_call;
7954   rtx call, call_dest;
7955   tree call_decl;
7956   int length = 0;
7957   rtx pat = PATTERN (insn);
7958   unsigned long distance = -1;
7959 
7960   gcc_assert (CALL_P (insn));
7961 
7962   if (INSN_ADDRESSES_SET_P ())
7963     {
7964       unsigned long total;
7965 
7966       total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
7967       distance = (total + insn_current_reference_address (insn));
7968       if (distance < total)
7969 	distance = -1;
7970     }
7971 
7972   gcc_assert (GET_CODE (pat) == PARALLEL);
7973 
7974   /* Get the call rtx.  */
7975   call = XVECEXP (pat, 0, 0);
7976   if (GET_CODE (call) == SET)
7977     call = SET_SRC (call);
7978 
7979   gcc_assert (GET_CODE (call) == CALL);
7980 
7981   /* Determine if this is a local call.  */
7982   call_dest = XEXP (XEXP (call, 0), 0);
7983   call_decl = SYMBOL_REF_DECL (call_dest);
7984   local_call = call_decl && targetm.binds_local_p (call_decl);
7985 
7986   /* pc-relative branch.  */
7987   if (!TARGET_LONG_CALLS
7988       && ((TARGET_PA_20 && !sibcall && distance < 7600000)
7989 	  || distance < MAX_PCREL17F_OFFSET))
7990     length += 8;
7991 
7992   /* 64-bit plabel sequence.  */
7993   else if (TARGET_64BIT && !local_call)
7994     length += 24;
7995 
7996   /* non-pic long absolute branch sequence.  */
7997   else if ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
7998     length += 12;
7999 
8000   /* long pc-relative branch sequence.  */
8001   else if (TARGET_LONG_PIC_SDIFF_CALL
8002 	   || (TARGET_GAS && !TARGET_SOM && local_call))
8003     {
8004       length += 20;
8005 
8006       if (!TARGET_PA_20 && !TARGET_NO_SPACE_REGS && (!local_call || flag_pic))
8007 	length += 8;
8008     }
8009 
8010   /* 32-bit plabel sequence.  */
8011   else
8012     {
8013       length += 32;
8014 
8015       if (TARGET_SOM)
8016 	length += length_fp_args (insn);
8017 
8018       if (flag_pic)
8019 	length += 4;
8020 
8021       if (!TARGET_PA_20)
8022 	{
8023 	  if (!sibcall)
8024 	    length += 8;
8025 
8026 	  if (!TARGET_NO_SPACE_REGS && (!local_call || flag_pic))
8027 	    length += 8;
8028 	}
8029     }
8030 
8031   return length;
8032 }
8033 
8034 /* INSN is a function call.
8035 
8036    CALL_DEST is the routine we are calling.  */
8037 
8038 const char *
pa_output_call(rtx_insn * insn,rtx call_dest,int sibcall)8039 pa_output_call (rtx_insn *insn, rtx call_dest, int sibcall)
8040 {
8041   int seq_length = dbr_sequence_length ();
8042   tree call_decl = SYMBOL_REF_DECL (call_dest);
8043   int local_call = call_decl && targetm.binds_local_p (call_decl);
8044   rtx xoperands[4];
8045 
8046   xoperands[0] = call_dest;
8047 
8048   /* Handle the common case where we're sure that the branch will reach
8049      the beginning of the "$CODE$" subspace.  This is the beginning of
8050      the current function if we are in a named section.  */
8051   if (!TARGET_LONG_CALLS && pa_attr_length_call (insn, sibcall) == 8)
8052     {
8053       xoperands[1] = gen_rtx_REG (word_mode, sibcall ? 0 : 2);
8054       output_asm_insn ("{bl|b,l} %0,%1", xoperands);
8055     }
8056   else
8057     {
8058       if (TARGET_64BIT && !local_call)
8059 	{
8060 	  /* ??? As far as I can tell, the HP linker doesn't support the
8061 	     long pc-relative sequence described in the 64-bit runtime
8062 	     architecture.  So, we use a slightly longer indirect call.  */
8063 	  xoperands[0] = pa_get_deferred_plabel (call_dest);
8064 	  xoperands[1] = gen_label_rtx ();
8065 
8066 	  /* Put the load of %r27 into the delay slot.  We don't need to
8067 	     do anything when generating fast indirect calls.  */
8068 	  if (seq_length != 0)
8069 	    {
8070 	      final_scan_insn (NEXT_INSN (insn), asm_out_file,
8071 			       optimize, 0, NULL);
8072 
8073 	      /* Now delete the delay insn.  */
8074 	      SET_INSN_DELETED (NEXT_INSN (insn));
8075 	    }
8076 
8077 	  output_asm_insn ("addil LT'%0,%%r27", xoperands);
8078 	  output_asm_insn ("ldd RT'%0(%%r1),%%r1", xoperands);
8079 	  output_asm_insn ("ldd 0(%%r1),%%r1", xoperands);
8080 	  output_asm_insn ("ldd 16(%%r1),%%r2", xoperands);
8081 	  output_asm_insn ("bve,l (%%r2),%%r2", xoperands);
8082 	  output_asm_insn ("ldd 24(%%r1),%%r27", xoperands);
8083 	  seq_length = 1;
8084 	}
8085       else
8086 	{
8087 	  int indirect_call = 0;
8088 
8089 	  /* Emit a long call.  There are several different sequences
8090 	     of increasing length and complexity.  In most cases,
8091              they don't allow an instruction in the delay slot.  */
8092 	  if (!((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
8093 	      && !TARGET_LONG_PIC_SDIFF_CALL
8094 	      && !(TARGET_GAS && !TARGET_SOM && local_call)
8095 	      && !TARGET_64BIT)
8096 	    indirect_call = 1;
8097 
8098 	  if (seq_length != 0
8099 	      && !sibcall
8100 	      && (!TARGET_PA_20
8101 		  || indirect_call
8102 		  || ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)))
8103 	    {
8104 	      /* A non-jump insn in the delay slot.  By definition we can
8105 		 emit this insn before the call (and in fact before argument
8106 		 relocating.  */
8107 	      final_scan_insn (NEXT_INSN (insn), asm_out_file, optimize, 0,
8108 			       NULL);
8109 
8110 	      /* Now delete the delay insn.  */
8111 	      SET_INSN_DELETED (NEXT_INSN (insn));
8112 	      seq_length = 0;
8113 	    }
8114 
8115 	  if ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
8116 	    {
8117 	      /* This is the best sequence for making long calls in
8118 		 non-pic code.  Unfortunately, GNU ld doesn't provide
8119 		 the stub needed for external calls, and GAS's support
8120 		 for this with the SOM linker is buggy.  It is safe
8121 		 to use this for local calls.  */
8122 	      output_asm_insn ("ldil L'%0,%%r1", xoperands);
8123 	      if (sibcall)
8124 		output_asm_insn ("be R'%0(%%sr4,%%r1)", xoperands);
8125 	      else
8126 		{
8127 		  if (TARGET_PA_20)
8128 		    output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31",
8129 				     xoperands);
8130 		  else
8131 		    output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands);
8132 
8133 		  output_asm_insn ("copy %%r31,%%r2", xoperands);
8134 		  seq_length = 1;
8135 		}
8136 	    }
8137 	  else
8138 	    {
8139 	      /* The HP assembler and linker can handle relocations for
8140 		 the difference of two symbols.  The HP assembler
8141 		 recognizes the sequence as a pc-relative call and
8142 		 the linker provides stubs when needed.  */
8143 
8144 	      /* GAS currently can't generate the relocations that
8145 		 are needed for the SOM linker under HP-UX using this
8146 		 sequence.  The GNU linker doesn't generate the stubs
8147 		 that are needed for external calls on TARGET_ELF32
8148 		 with this sequence.  For now, we have to use a longer
8149 	         plabel sequence when using GAS for non local calls.  */
8150 	      if (TARGET_LONG_PIC_SDIFF_CALL
8151 		  || (TARGET_GAS && !TARGET_SOM && local_call))
8152 		{
8153 		  xoperands[1] = gen_rtx_REG (Pmode, 1);
8154 		  xoperands[2] = xoperands[1];
8155 		  pa_output_pic_pcrel_sequence (xoperands);
8156 		}
8157 	      else
8158 		{
8159 		  /* Emit a long plabel-based call sequence.  This is
8160 		     essentially an inline implementation of $$dyncall.
8161 		     We don't actually try to call $$dyncall as this is
8162 		     as difficult as calling the function itself.  */
8163 		  xoperands[0] = pa_get_deferred_plabel (call_dest);
8164 		  xoperands[1] = gen_label_rtx ();
8165 
8166 		  /* Since the call is indirect, FP arguments in registers
8167 		     need to be copied to the general registers.  Then, the
8168 		     argument relocation stub will copy them back.  */
8169 		  if (TARGET_SOM)
8170 		    copy_fp_args (insn);
8171 
8172 		  if (flag_pic)
8173 		    {
8174 		      output_asm_insn ("addil LT'%0,%%r19", xoperands);
8175 		      output_asm_insn ("ldw RT'%0(%%r1),%%r1", xoperands);
8176 		      output_asm_insn ("ldw 0(%%r1),%%r22", xoperands);
8177 		    }
8178 		  else
8179 		    {
8180 		      output_asm_insn ("addil LR'%0-$global$,%%r27",
8181 				       xoperands);
8182 		      output_asm_insn ("ldw RR'%0-$global$(%%r1),%%r22",
8183 				       xoperands);
8184 		    }
8185 
8186 		  output_asm_insn ("bb,>=,n %%r22,30,.+16", xoperands);
8187 		  output_asm_insn ("depi 0,31,2,%%r22", xoperands);
8188 		  /* Should this be an ordered load to ensure the target
8189 	             address is loaded before the global pointer?  */
8190 		  output_asm_insn ("ldw 0(%%r22),%%r1", xoperands);
8191 		  output_asm_insn ("ldw 4(%%r22),%%r19", xoperands);
8192 
8193 		  if (!sibcall && !TARGET_PA_20)
8194 		    {
8195 		      output_asm_insn ("{bl|b,l} .+8,%%r2", xoperands);
8196 		      if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic))
8197 			output_asm_insn ("addi 8,%%r2,%%r2", xoperands);
8198 		      else
8199 			output_asm_insn ("addi 16,%%r2,%%r2", xoperands);
8200 		    }
8201 		}
8202 
8203 	      if (TARGET_PA_20)
8204 		{
8205 		  if (sibcall)
8206 		    output_asm_insn ("bve (%%r1)", xoperands);
8207 		  else
8208 		    {
8209 		      if (indirect_call)
8210 			{
8211 			  output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
8212 			  output_asm_insn ("stw %%r2,-24(%%sp)", xoperands);
8213 			  seq_length = 1;
8214 			}
8215 		      else
8216 			output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
8217 		    }
8218 		}
8219 	      else
8220 		{
8221 		  if (!TARGET_NO_SPACE_REGS && (!local_call || flag_pic))
8222 		    output_asm_insn ("ldsid (%%r1),%%r31\n\tmtsp %%r31,%%sr0",
8223 				     xoperands);
8224 
8225 		  if (sibcall)
8226 		    {
8227 		      if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic))
8228 			output_asm_insn ("be 0(%%sr4,%%r1)", xoperands);
8229 		      else
8230 			output_asm_insn ("be 0(%%sr0,%%r1)", xoperands);
8231 		    }
8232 		  else
8233 		    {
8234 		      if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic))
8235 			output_asm_insn ("ble 0(%%sr4,%%r1)", xoperands);
8236 		      else
8237 			output_asm_insn ("ble 0(%%sr0,%%r1)", xoperands);
8238 
8239 		      if (indirect_call)
8240 			output_asm_insn ("stw %%r31,-24(%%sp)", xoperands);
8241 		      else
8242 			output_asm_insn ("copy %%r31,%%r2", xoperands);
8243 		      seq_length = 1;
8244 		    }
8245 		}
8246 	    }
8247 	}
8248     }
8249 
8250   if (seq_length == 0)
8251     output_asm_insn ("nop", xoperands);
8252 
8253   return "";
8254 }
8255 
8256 /* Return the attribute length of the indirect call instruction INSN.
8257    The length must match the code generated by output_indirect call.
8258    The returned length includes the delay slot.  Currently, the delay
8259    slot of an indirect call sequence is not exposed and it is used by
8260    the sequence itself.  */
8261 
8262 int
pa_attr_length_indirect_call(rtx_insn * insn)8263 pa_attr_length_indirect_call (rtx_insn *insn)
8264 {
8265   unsigned long distance = -1;
8266   unsigned long total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
8267 
8268   if (INSN_ADDRESSES_SET_P ())
8269     {
8270       distance = (total + insn_current_reference_address (insn));
8271       if (distance < total)
8272 	distance = -1;
8273     }
8274 
8275   if (TARGET_64BIT)
8276     return 12;
8277 
8278   if (TARGET_FAST_INDIRECT_CALLS)
8279     return 8;
8280 
8281   if (TARGET_PORTABLE_RUNTIME)
8282     return 16;
8283 
8284   if (!TARGET_LONG_CALLS
8285       && ((TARGET_PA_20 && !TARGET_SOM && distance < 7600000)
8286 	  || distance < MAX_PCREL17F_OFFSET))
8287     return 8;
8288 
8289   /* Out of reach, can use ble.  */
8290   if (!flag_pic)
8291     return 12;
8292 
8293   /* Inline versions of $$dyncall.  */
8294   if (!optimize_size)
8295     {
8296       if (TARGET_NO_SPACE_REGS)
8297 	return 28;
8298 
8299       if (TARGET_PA_20)
8300 	return 32;
8301     }
8302 
8303   /* Long PIC pc-relative call.  */
8304   return 20;
8305 }
8306 
8307 const char *
pa_output_indirect_call(rtx_insn * insn,rtx call_dest)8308 pa_output_indirect_call (rtx_insn *insn, rtx call_dest)
8309 {
8310   rtx xoperands[4];
8311   int length;
8312 
8313   if (TARGET_64BIT)
8314     {
8315       xoperands[0] = call_dest;
8316       output_asm_insn ("ldd 16(%0),%%r2\n\t"
8317 		       "bve,l (%%r2),%%r2\n\t"
8318 		       "ldd 24(%0),%%r27", xoperands);
8319       return "";
8320     }
8321 
8322   /* First the special case for kernels, level 0 systems, etc.  */
8323   if (TARGET_FAST_INDIRECT_CALLS)
8324     {
8325       pa_output_arg_descriptor (insn);
8326       if (TARGET_PA_20)
8327 	return "bve,l,n (%%r22),%%r2\n\tnop";
8328       return "ble 0(%%sr4,%%r22)\n\tcopy %%r31,%%r2";
8329     }
8330 
8331   if (TARGET_PORTABLE_RUNTIME)
8332     {
8333       output_asm_insn ("ldil L'$$dyncall,%%r31\n\t"
8334 		       "ldo R'$$dyncall(%%r31),%%r31", xoperands);
8335       pa_output_arg_descriptor (insn);
8336       return "blr %%r0,%%r2\n\tbv,n %%r0(%%r31)";
8337     }
8338 
8339   /* Now the normal case -- we can reach $$dyncall directly or
8340      we're sure that we can get there via a long-branch stub.
8341 
8342      No need to check target flags as the length uniquely identifies
8343      the remaining cases.  */
8344   length = pa_attr_length_indirect_call (insn);
8345   if (length == 8)
8346     {
8347       pa_output_arg_descriptor (insn);
8348 
8349       /* The HP linker sometimes substitutes a BLE for BL/B,L calls to
8350 	 $$dyncall.  Since BLE uses %r31 as the link register, the 22-bit
8351 	 variant of the B,L instruction can't be used on the SOM target.  */
8352       if (TARGET_PA_20 && !TARGET_SOM)
8353 	return "b,l,n $$dyncall,%%r2\n\tnop";
8354       else
8355 	return "bl $$dyncall,%%r31\n\tcopy %%r31,%%r2";
8356     }
8357 
8358   /* Long millicode call, but we are not generating PIC or portable runtime
8359      code.  */
8360   if (length == 12)
8361     {
8362       output_asm_insn ("ldil L'$$dyncall,%%r2", xoperands);
8363       pa_output_arg_descriptor (insn);
8364       return "ble R'$$dyncall(%%sr4,%%r2)\n\tcopy %%r31,%%r2";
8365     }
8366 
8367   /* The long PIC pc-relative call sequence is five instructions.  So,
8368      let's use an inline version of $$dyncall when the calling sequence
8369      has a roughly similar number of instructions and we are not optimizing
8370      for size.  We need two instructions to load the return pointer plus
8371      the $$dyncall implementation.  */
8372   if (!optimize_size)
8373     {
8374       if (TARGET_NO_SPACE_REGS)
8375 	{
8376 	  pa_output_arg_descriptor (insn);
8377 	  output_asm_insn ("bl .+8,%%r2\n\t"
8378 			   "ldo 20(%%r2),%%r2\n\t"
8379 			   "extru,<> %%r22,30,1,%%r0\n\t"
8380 			   "bv,n %%r0(%%r22)\n\t"
8381 			   "ldw -2(%%r22),%%r21\n\t"
8382 			   "bv %%r0(%%r21)\n\t"
8383 			   "ldw 2(%%r22),%%r19", xoperands);
8384 	  return "";
8385 	}
8386       if (TARGET_PA_20)
8387 	{
8388 	  pa_output_arg_descriptor (insn);
8389 	  output_asm_insn ("bl .+8,%%r2\n\t"
8390 			   "ldo 24(%%r2),%%r2\n\t"
8391 			   "stw %%r2,-24(%%sp)\n\t"
8392 			   "extru,<> %r22,30,1,%%r0\n\t"
8393 			   "bve,n (%%r22)\n\t"
8394 			   "ldw -2(%%r22),%%r21\n\t"
8395 			   "bve (%%r21)\n\t"
8396 			   "ldw 2(%%r22),%%r19", xoperands);
8397 	  return "";
8398 	}
8399     }
8400 
8401   /* We need a long PIC call to $$dyncall.  */
8402   xoperands[0] = gen_rtx_SYMBOL_REF (Pmode, "$$dyncall");
8403   xoperands[1] = gen_rtx_REG (Pmode, 2);
8404   xoperands[2] = gen_rtx_REG (Pmode, 1);
8405   pa_output_pic_pcrel_sequence (xoperands);
8406   pa_output_arg_descriptor (insn);
8407   return "bv %%r0(%%r1)\n\tldo {12|20}(%%r2),%%r2";
8408 }
8409 
8410 /* In HPUX 8.0's shared library scheme, special relocations are needed
8411    for function labels if they might be passed to a function
8412    in a shared library (because shared libraries don't live in code
8413    space), and special magic is needed to construct their address.  */
8414 
8415 void
pa_encode_label(rtx sym)8416 pa_encode_label (rtx sym)
8417 {
8418   const char *str = XSTR (sym, 0);
8419   int len = strlen (str) + 1;
8420   char *newstr, *p;
8421 
8422   p = newstr = XALLOCAVEC (char, len + 1);
8423   *p++ = '@';
8424   strcpy (p, str);
8425 
8426   XSTR (sym, 0) = ggc_alloc_string (newstr, len);
8427 }
8428 
8429 static void
pa_encode_section_info(tree decl,rtx rtl,int first)8430 pa_encode_section_info (tree decl, rtx rtl, int first)
8431 {
8432   int old_referenced = 0;
8433 
8434   if (!first && MEM_P (rtl) && GET_CODE (XEXP (rtl, 0)) == SYMBOL_REF)
8435     old_referenced
8436       = SYMBOL_REF_FLAGS (XEXP (rtl, 0)) & SYMBOL_FLAG_REFERENCED;
8437 
8438   default_encode_section_info (decl, rtl, first);
8439 
8440   if (first && TEXT_SPACE_P (decl))
8441     {
8442       SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
8443       if (TREE_CODE (decl) == FUNCTION_DECL)
8444 	pa_encode_label (XEXP (rtl, 0));
8445     }
8446   else if (old_referenced)
8447     SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= old_referenced;
8448 }
8449 
8450 /* This is sort of inverse to pa_encode_section_info.  */
8451 
8452 static const char *
pa_strip_name_encoding(const char * str)8453 pa_strip_name_encoding (const char *str)
8454 {
8455   str += (*str == '@');
8456   str += (*str == '*');
8457   return str;
8458 }
8459 
8460 /* Returns 1 if OP is a function label involved in a simple addition
8461    with a constant.  Used to keep certain patterns from matching
8462    during instruction combination.  */
8463 int
pa_is_function_label_plus_const(rtx op)8464 pa_is_function_label_plus_const (rtx op)
8465 {
8466   /* Strip off any CONST.  */
8467   if (GET_CODE (op) == CONST)
8468     op = XEXP (op, 0);
8469 
8470   return (GET_CODE (op) == PLUS
8471 	  && function_label_operand (XEXP (op, 0), VOIDmode)
8472 	  && GET_CODE (XEXP (op, 1)) == CONST_INT);
8473 }
8474 
8475 /* Output the assembler code for a thunk function.  THUNK_DECL is the
8476    declaration for the thunk function itself, FUNCTION is the decl for
8477    the target function.  DELTA is an immediate constant offset to be
8478    added to THIS.  If VCALL_OFFSET is nonzero, the word at
8479    *(*this + vcall_offset) should be added to THIS.  */
8480 
8481 static void
pa_asm_output_mi_thunk(FILE * file,tree thunk_fndecl,HOST_WIDE_INT delta,HOST_WIDE_INT vcall_offset,tree function)8482 pa_asm_output_mi_thunk (FILE *file, tree thunk_fndecl, HOST_WIDE_INT delta,
8483 			HOST_WIDE_INT vcall_offset, tree function)
8484 {
8485   const char *fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk_fndecl));
8486   static unsigned int current_thunk_number;
8487   int val_14 = VAL_14_BITS_P (delta);
8488   unsigned int old_last_address = last_address, nbytes = 0;
8489   char label[17];
8490   rtx xoperands[4];
8491 
8492   xoperands[0] = XEXP (DECL_RTL (function), 0);
8493   xoperands[1] = XEXP (DECL_RTL (thunk_fndecl), 0);
8494   xoperands[2] = GEN_INT (delta);
8495 
8496   assemble_start_function (thunk_fndecl, fnname);
8497   final_start_function (emit_barrier (), file, 1);
8498 
8499   if (!vcall_offset)
8500     {
8501       /* Output the thunk.  We know that the function is in the same
8502 	 translation unit (i.e., the same space) as the thunk, and that
8503 	 thunks are output after their method.  Thus, we don't need an
8504 	 external branch to reach the function.  With SOM and GAS,
8505 	 functions and thunks are effectively in different sections.
8506 	 Thus, we can always use a IA-relative branch and the linker
8507 	 will add a long branch stub if necessary.
8508 
8509 	 However, we have to be careful when generating PIC code on the
8510 	 SOM port to ensure that the sequence does not transfer to an
8511 	 import stub for the target function as this could clobber the
8512 	 return value saved at SP-24.  This would also apply to the
8513 	32-bit linux port if the multi-space model is implemented.  */
8514       if ((!TARGET_LONG_CALLS && TARGET_SOM && !TARGET_PORTABLE_RUNTIME
8515 	   && !(flag_pic && TREE_PUBLIC (function))
8516 	   && (TARGET_GAS || last_address < 262132))
8517 	  || (!TARGET_LONG_CALLS && !TARGET_SOM && !TARGET_PORTABLE_RUNTIME
8518 	      && ((targetm_common.have_named_sections
8519 		   && DECL_SECTION_NAME (thunk_fndecl) != NULL
8520 		   /* The GNU 64-bit linker has rather poor stub management.
8521 		      So, we use a long branch from thunks that aren't in
8522 		      the same section as the target function.  */
8523 		    && ((!TARGET_64BIT
8524 			 && (DECL_SECTION_NAME (thunk_fndecl)
8525 			     != DECL_SECTION_NAME (function)))
8526 			|| ((DECL_SECTION_NAME (thunk_fndecl)
8527 			     == DECL_SECTION_NAME (function))
8528 			    && last_address < 262132)))
8529 		  /* In this case, we need to be able to reach the start of
8530 		     the stub table even though the function is likely closer
8531 		     and can be jumped to directly.  */
8532 		  || (targetm_common.have_named_sections
8533 		      && DECL_SECTION_NAME (thunk_fndecl) == NULL
8534 		      && DECL_SECTION_NAME (function) == NULL
8535 		      && total_code_bytes < MAX_PCREL17F_OFFSET)
8536 		  /* Likewise.  */
8537 		  || (!targetm_common.have_named_sections
8538 		      && total_code_bytes < MAX_PCREL17F_OFFSET))))
8539 	{
8540 	  if (!val_14)
8541 	    output_asm_insn ("addil L'%2,%%r26", xoperands);
8542 
8543 	  output_asm_insn ("b %0", xoperands);
8544 
8545 	  if (val_14)
8546 	    {
8547 	      output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8548 	      nbytes += 8;
8549 	    }
8550 	  else
8551 	    {
8552 	      output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8553 	      nbytes += 12;
8554 	    }
8555 	}
8556       else if (TARGET_64BIT)
8557 	{
8558 	  rtx xop[4];
8559 
8560 	  /* We only have one call-clobbered scratch register, so we can't
8561 	     make use of the delay slot if delta doesn't fit in 14 bits.  */
8562 	  if (!val_14)
8563 	    {
8564 	      output_asm_insn ("addil L'%2,%%r26", xoperands);
8565 	      output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8566 	    }
8567 
8568 	  /* Load function address into %r1.  */
8569 	  xop[0] = xoperands[0];
8570 	  xop[1] = gen_rtx_REG (Pmode, 1);
8571 	  xop[2] = xop[1];
8572 	  pa_output_pic_pcrel_sequence (xop);
8573 
8574 	  if (val_14)
8575 	    {
8576 	      output_asm_insn ("bv %%r0(%%r1)", xoperands);
8577 	      output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8578 	      nbytes += 20;
8579 	    }
8580 	  else
8581 	    {
8582 	      output_asm_insn ("bv,n %%r0(%%r1)", xoperands);
8583 	      nbytes += 24;
8584 	    }
8585 	}
8586       else if (TARGET_PORTABLE_RUNTIME)
8587 	{
8588 	  output_asm_insn ("ldil L'%0,%%r1", xoperands);
8589 	  output_asm_insn ("ldo R'%0(%%r1),%%r22", xoperands);
8590 
8591 	  if (!val_14)
8592 	    output_asm_insn ("ldil L'%2,%%r26", xoperands);
8593 
8594 	  output_asm_insn ("bv %%r0(%%r22)", xoperands);
8595 
8596 	  if (val_14)
8597 	    {
8598 	      output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8599 	      nbytes += 16;
8600 	    }
8601 	  else
8602 	    {
8603 	      output_asm_insn ("ldo R'%2(%%r26),%%r26", xoperands);
8604 	      nbytes += 20;
8605 	    }
8606 	}
8607       else if (TARGET_SOM && flag_pic && TREE_PUBLIC (function))
8608 	{
8609 	  /* The function is accessible from outside this module.  The only
8610 	     way to avoid an import stub between the thunk and function is to
8611 	     call the function directly with an indirect sequence similar to
8612 	     that used by $$dyncall.  This is possible because $$dyncall acts
8613 	     as the import stub in an indirect call.  */
8614 	  ASM_GENERATE_INTERNAL_LABEL (label, "LTHN", current_thunk_number);
8615 	  xoperands[3] = gen_rtx_SYMBOL_REF (Pmode, label);
8616 	  output_asm_insn ("addil LT'%3,%%r19", xoperands);
8617 	  output_asm_insn ("ldw RT'%3(%%r1),%%r22", xoperands);
8618 	  output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands);
8619 	  output_asm_insn ("bb,>=,n %%r22,30,.+16", xoperands);
8620 	  output_asm_insn ("depi 0,31,2,%%r22", xoperands);
8621 	  output_asm_insn ("ldw 4(%%sr0,%%r22),%%r19", xoperands);
8622 	  output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands);
8623 
8624 	  if (!val_14)
8625 	    {
8626 	      output_asm_insn ("addil L'%2,%%r26", xoperands);
8627 	      nbytes += 4;
8628 	    }
8629 
8630 	  if (TARGET_PA_20)
8631 	    {
8632 	      output_asm_insn ("bve (%%r22)", xoperands);
8633 	      nbytes += 36;
8634 	    }
8635 	  else if (TARGET_NO_SPACE_REGS)
8636 	    {
8637 	      output_asm_insn ("be 0(%%sr4,%%r22)", xoperands);
8638 	      nbytes += 36;
8639 	    }
8640 	  else
8641 	    {
8642 	      output_asm_insn ("ldsid (%%sr0,%%r22),%%r21", xoperands);
8643 	      output_asm_insn ("mtsp %%r21,%%sr0", xoperands);
8644 	      output_asm_insn ("be 0(%%sr0,%%r22)", xoperands);
8645 	      nbytes += 44;
8646 	    }
8647 
8648 	  if (val_14)
8649 	    output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8650 	  else
8651 	    output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8652 	}
8653       else if (flag_pic)
8654 	{
8655 	  rtx xop[4];
8656 
8657 	  /* Load function address into %r22.  */
8658 	  xop[0] = xoperands[0];
8659 	  xop[1] = gen_rtx_REG (Pmode, 1);
8660 	  xop[2] = gen_rtx_REG (Pmode, 22);
8661 	  pa_output_pic_pcrel_sequence (xop);
8662 
8663 	  if (!val_14)
8664 	    output_asm_insn ("addil L'%2,%%r26", xoperands);
8665 
8666 	  output_asm_insn ("bv %%r0(%%r22)", xoperands);
8667 
8668 	  if (val_14)
8669 	    {
8670 	      output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8671 	      nbytes += 20;
8672 	    }
8673 	  else
8674 	    {
8675 	      output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8676 	      nbytes += 24;
8677 	    }
8678 	}
8679       else
8680 	{
8681 	  if (!val_14)
8682 	    output_asm_insn ("addil L'%2,%%r26", xoperands);
8683 
8684 	  output_asm_insn ("ldil L'%0,%%r22", xoperands);
8685 	  output_asm_insn ("be R'%0(%%sr4,%%r22)", xoperands);
8686 
8687 	  if (val_14)
8688 	    {
8689 	      output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8690 	      nbytes += 12;
8691 	    }
8692 	  else
8693 	    {
8694 	      output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8695 	      nbytes += 16;
8696 	    }
8697 	}
8698     }
8699   else
8700     {
8701       rtx xop[4];
8702 
8703       /* Add DELTA to THIS.  */
8704       if (val_14)
8705 	{
8706 	  output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8707 	  nbytes += 4;
8708 	}
8709       else
8710 	{
8711 	  output_asm_insn ("addil L'%2,%%r26", xoperands);
8712 	  output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8713 	  nbytes += 8;
8714 	}
8715 
8716       if (TARGET_64BIT)
8717 	{
8718 	  /* Load *(THIS + DELTA) to %r1.  */
8719 	  output_asm_insn ("ldd 0(%%r26),%%r1", xoperands);
8720 
8721 	  val_14 = VAL_14_BITS_P (vcall_offset);
8722 	  xoperands[2] = GEN_INT (vcall_offset);
8723 
8724 	  /* Load  *(*(THIS + DELTA) + VCALL_OFFSET) to %r1.  */
8725 	  if (val_14)
8726 	    {
8727 	      output_asm_insn ("ldd %2(%%r1),%%r1", xoperands);
8728 	      nbytes += 8;
8729 	    }
8730 	  else
8731 	    {
8732 	      output_asm_insn ("addil L'%2,%%r1", xoperands);
8733 	      output_asm_insn ("ldd R'%2(%%r1),%%r1", xoperands);
8734 	      nbytes += 12;
8735 	    }
8736 	}
8737       else
8738 	{
8739 	  /* Load *(THIS + DELTA) to %r1.  */
8740 	  output_asm_insn ("ldw 0(%%r26),%%r1", xoperands);
8741 
8742 	  val_14 = VAL_14_BITS_P (vcall_offset);
8743 	  xoperands[2] = GEN_INT (vcall_offset);
8744 
8745 	  /* Load  *(*(THIS + DELTA) + VCALL_OFFSET) to %r1.  */
8746 	  if (val_14)
8747 	    {
8748 	      output_asm_insn ("ldw %2(%%r1),%%r1", xoperands);
8749 	      nbytes += 8;
8750 	    }
8751 	  else
8752 	    {
8753 	      output_asm_insn ("addil L'%2,%%r1", xoperands);
8754 	      output_asm_insn ("ldw R'%2(%%r1),%%r1", xoperands);
8755 	      nbytes += 12;
8756 	    }
8757 	}
8758 
8759       /* Branch to FUNCTION and add %r1 to THIS in delay slot if possible.  */
8760       if ((!TARGET_LONG_CALLS && TARGET_SOM && !TARGET_PORTABLE_RUNTIME
8761 	   && !(flag_pic && TREE_PUBLIC (function))
8762 	   && (TARGET_GAS || last_address < 262132))
8763 	  || (!TARGET_LONG_CALLS && !TARGET_SOM && !TARGET_PORTABLE_RUNTIME
8764 	      && ((targetm_common.have_named_sections
8765 		   && DECL_SECTION_NAME (thunk_fndecl) != NULL
8766 		   /* The GNU 64-bit linker has rather poor stub management.
8767 		      So, we use a long branch from thunks that aren't in
8768 		      the same section as the target function.  */
8769 		    && ((!TARGET_64BIT
8770 			 && (DECL_SECTION_NAME (thunk_fndecl)
8771 			     != DECL_SECTION_NAME (function)))
8772 			|| ((DECL_SECTION_NAME (thunk_fndecl)
8773 			     == DECL_SECTION_NAME (function))
8774 			    && last_address < 262132)))
8775 		  /* In this case, we need to be able to reach the start of
8776 		     the stub table even though the function is likely closer
8777 		     and can be jumped to directly.  */
8778 		  || (targetm_common.have_named_sections
8779 		      && DECL_SECTION_NAME (thunk_fndecl) == NULL
8780 		      && DECL_SECTION_NAME (function) == NULL
8781 		      && total_code_bytes < MAX_PCREL17F_OFFSET)
8782 		  /* Likewise.  */
8783 		  || (!targetm_common.have_named_sections
8784 		      && total_code_bytes < MAX_PCREL17F_OFFSET))))
8785 	{
8786 	  nbytes += 4;
8787 	  output_asm_insn ("b %0", xoperands);
8788 
8789 	  /* Add *(*(THIS + DELTA) + VCALL_OFFSET) to THIS.  */
8790 	  output_asm_insn ("addl %%r1,%%r26,%%r26", xoperands);
8791 	}
8792       else if (TARGET_64BIT)
8793 	{
8794 	  /* Add *(*(THIS + DELTA) + VCALL_OFFSET) to THIS.  */
8795 	  output_asm_insn ("addl %%r1,%%r26,%%r26", xoperands);
8796 
8797 	  /* Load function address into %r1.  */
8798 	  nbytes += 16;
8799 	  xop[0] = xoperands[0];
8800 	  xop[1] = gen_rtx_REG (Pmode, 1);
8801 	  xop[2] = xop[1];
8802 	  pa_output_pic_pcrel_sequence (xop);
8803 
8804 	  output_asm_insn ("bv,n %%r0(%%r1)", xoperands);
8805 	}
8806       else if (TARGET_PORTABLE_RUNTIME)
8807 	{
8808 	  /* Load function address into %r22.  */
8809 	  nbytes += 12;
8810 	  output_asm_insn ("ldil L'%0,%%r22", xoperands);
8811 	  output_asm_insn ("ldo R'%0(%%r22),%%r22", xoperands);
8812 
8813 	  output_asm_insn ("bv %%r0(%%r22)", xoperands);
8814 
8815 	  /* Add *(*(THIS + DELTA) + VCALL_OFFSET) to THIS.  */
8816 	  output_asm_insn ("addl %%r1,%%r26,%%r26", xoperands);
8817 	}
8818       else if (TARGET_SOM && flag_pic && TREE_PUBLIC (function))
8819 	{
8820 	  /* Add *(*(THIS + DELTA) + VCALL_OFFSET) to THIS.  */
8821 	  output_asm_insn ("addl %%r1,%%r26,%%r26", xoperands);
8822 
8823 	  /* The function is accessible from outside this module.  The only
8824 	     way to avoid an import stub between the thunk and function is to
8825 	     call the function directly with an indirect sequence similar to
8826 	     that used by $$dyncall.  This is possible because $$dyncall acts
8827 	     as the import stub in an indirect call.  */
8828 	  ASM_GENERATE_INTERNAL_LABEL (label, "LTHN", current_thunk_number);
8829 	  xoperands[3] = gen_rtx_SYMBOL_REF (Pmode, label);
8830 	  output_asm_insn ("addil LT'%3,%%r19", xoperands);
8831 	  output_asm_insn ("ldw RT'%3(%%r1),%%r22", xoperands);
8832 	  output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands);
8833 	  output_asm_insn ("bb,>=,n %%r22,30,.+16", xoperands);
8834 	  output_asm_insn ("depi 0,31,2,%%r22", xoperands);
8835 	  output_asm_insn ("ldw 4(%%sr0,%%r22),%%r19", xoperands);
8836 	  output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands);
8837 
8838 	  if (TARGET_PA_20)
8839 	    {
8840 	      output_asm_insn ("bve,n (%%r22)", xoperands);
8841 	      nbytes += 32;
8842 	    }
8843 	  else if (TARGET_NO_SPACE_REGS)
8844 	    {
8845 	      output_asm_insn ("be,n 0(%%sr4,%%r22)", xoperands);
8846 	      nbytes += 32;
8847 	    }
8848 	  else
8849 	    {
8850 	      output_asm_insn ("ldsid (%%sr0,%%r22),%%r21", xoperands);
8851 	      output_asm_insn ("mtsp %%r21,%%sr0", xoperands);
8852 	      output_asm_insn ("be,n 0(%%sr0,%%r22)", xoperands);
8853 	      nbytes += 40;
8854 	    }
8855 	}
8856       else if (flag_pic)
8857 	{
8858 	  /* Add *(*(THIS + DELTA) + VCALL_OFFSET) to THIS.  */
8859 	  output_asm_insn ("addl %%r1,%%r26,%%r26", xoperands);
8860 
8861 	  /* Load function address into %r1.  */
8862 	  nbytes += 16;
8863 	  xop[0] = xoperands[0];
8864 	  xop[1] = gen_rtx_REG (Pmode, 1);
8865 	  xop[2] = xop[1];
8866 	  pa_output_pic_pcrel_sequence (xop);
8867 
8868 	  output_asm_insn ("bv,n %%r0(%%r1)", xoperands);
8869 	}
8870       else
8871 	{
8872 	  /* Load function address into %r22.  */
8873 	  nbytes += 8;
8874 	  output_asm_insn ("ldil L'%0,%%r22", xoperands);
8875 	  output_asm_insn ("be R'%0(%%sr4,%%r22)", xoperands);
8876 
8877 	  /* Add *(*(THIS + DELTA) + VCALL_OFFSET) to THIS.  */
8878 	  output_asm_insn ("addl %%r1,%%r26,%%r26", xoperands);
8879 	}
8880     }
8881 
8882   final_end_function ();
8883 
8884   if (TARGET_SOM && flag_pic && TREE_PUBLIC (function))
8885     {
8886       switch_to_section (data_section);
8887       output_asm_insn (".align 4", xoperands);
8888       ASM_OUTPUT_LABEL (file, label);
8889       output_asm_insn (".word P'%0", xoperands);
8890     }
8891 
8892   current_thunk_number++;
8893   nbytes = ((nbytes + FUNCTION_BOUNDARY / BITS_PER_UNIT - 1)
8894 	    & ~(FUNCTION_BOUNDARY / BITS_PER_UNIT - 1));
8895   last_address += nbytes;
8896   if (old_last_address > last_address)
8897     last_address = UINT_MAX;
8898   update_total_code_bytes (nbytes);
8899   assemble_end_function (thunk_fndecl, fnname);
8900 }
8901 
8902 /* Only direct calls to static functions are allowed to be sibling (tail)
8903    call optimized.
8904 
8905    This restriction is necessary because some linker generated stubs will
8906    store return pointers into rp' in some cases which might clobber a
8907    live value already in rp'.
8908 
8909    In a sibcall the current function and the target function share stack
8910    space.  Thus if the path to the current function and the path to the
8911    target function save a value in rp', they save the value into the
8912    same stack slot, which has undesirable consequences.
8913 
8914    Because of the deferred binding nature of shared libraries any function
8915    with external scope could be in a different load module and thus require
8916    rp' to be saved when calling that function.  So sibcall optimizations
8917    can only be safe for static function.
8918 
8919    Note that GCC never needs return value relocations, so we don't have to
8920    worry about static calls with return value relocations (which require
8921    saving rp').
8922 
8923    It is safe to perform a sibcall optimization when the target function
8924    will never return.  */
8925 static bool
pa_function_ok_for_sibcall(tree decl,tree exp ATTRIBUTE_UNUSED)8926 pa_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
8927 {
8928   /* Sibcalls are not ok because the arg pointer register is not a fixed
8929      register.  This prevents the sibcall optimization from occurring.  In
8930      addition, there are problems with stub placement using GNU ld.  This
8931      is because a normal sibcall branch uses a 17-bit relocation while
8932      a regular call branch uses a 22-bit relocation.  As a result, more
8933      care needs to be taken in the placement of long-branch stubs.  */
8934   if (TARGET_64BIT)
8935     return false;
8936 
8937   if (TARGET_PORTABLE_RUNTIME)
8938     return false;
8939 
8940   /* Sibcalls are only ok within a translation unit.  */
8941   return decl && targetm.binds_local_p (decl);
8942 }
8943 
8944 /* ??? Addition is not commutative on the PA due to the weird implicit
8945    space register selection rules for memory addresses.  Therefore, we
8946    don't consider a + b == b + a, as this might be inside a MEM.  */
8947 static bool
pa_commutative_p(const_rtx x,int outer_code)8948 pa_commutative_p (const_rtx x, int outer_code)
8949 {
8950   return (COMMUTATIVE_P (x)
8951 	  && (TARGET_NO_SPACE_REGS
8952 	      || (outer_code != UNKNOWN && outer_code != MEM)
8953 	      || GET_CODE (x) != PLUS));
8954 }
8955 
8956 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
8957    use in fmpyadd instructions.  */
8958 int
pa_fmpyaddoperands(rtx * operands)8959 pa_fmpyaddoperands (rtx *operands)
8960 {
8961   machine_mode mode = GET_MODE (operands[0]);
8962 
8963   /* Must be a floating point mode.  */
8964   if (mode != SFmode && mode != DFmode)
8965     return 0;
8966 
8967   /* All modes must be the same.  */
8968   if (! (mode == GET_MODE (operands[1])
8969 	 && mode == GET_MODE (operands[2])
8970 	 && mode == GET_MODE (operands[3])
8971 	 && mode == GET_MODE (operands[4])
8972 	 && mode == GET_MODE (operands[5])))
8973     return 0;
8974 
8975   /* All operands must be registers.  */
8976   if (! (GET_CODE (operands[1]) == REG
8977 	 && GET_CODE (operands[2]) == REG
8978 	 && GET_CODE (operands[3]) == REG
8979 	 && GET_CODE (operands[4]) == REG
8980 	 && GET_CODE (operands[5]) == REG))
8981     return 0;
8982 
8983   /* Only 2 real operands to the addition.  One of the input operands must
8984      be the same as the output operand.  */
8985   if (! rtx_equal_p (operands[3], operands[4])
8986       && ! rtx_equal_p (operands[3], operands[5]))
8987     return 0;
8988 
8989   /* Inout operand of add cannot conflict with any operands from multiply.  */
8990   if (rtx_equal_p (operands[3], operands[0])
8991      || rtx_equal_p (operands[3], operands[1])
8992      || rtx_equal_p (operands[3], operands[2]))
8993     return 0;
8994 
8995   /* multiply cannot feed into addition operands.  */
8996   if (rtx_equal_p (operands[4], operands[0])
8997       || rtx_equal_p (operands[5], operands[0]))
8998     return 0;
8999 
9000   /* SFmode limits the registers to the upper 32 of the 32bit FP regs.  */
9001   if (mode == SFmode
9002       && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS
9003 	  || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS
9004 	  || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS
9005 	  || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS
9006 	  || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS
9007 	  || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS))
9008     return 0;
9009 
9010   /* Passed.  Operands are suitable for fmpyadd.  */
9011   return 1;
9012 }
9013 
9014 #if !defined(USE_COLLECT2)
9015 static void
pa_asm_out_constructor(rtx symbol,int priority)9016 pa_asm_out_constructor (rtx symbol, int priority)
9017 {
9018   if (!function_label_operand (symbol, VOIDmode))
9019     pa_encode_label (symbol);
9020 
9021 #ifdef CTORS_SECTION_ASM_OP
9022   default_ctor_section_asm_out_constructor (symbol, priority);
9023 #else
9024 # ifdef TARGET_ASM_NAMED_SECTION
9025   default_named_section_asm_out_constructor (symbol, priority);
9026 # else
9027   default_stabs_asm_out_constructor (symbol, priority);
9028 # endif
9029 #endif
9030 }
9031 
9032 static void
pa_asm_out_destructor(rtx symbol,int priority)9033 pa_asm_out_destructor (rtx symbol, int priority)
9034 {
9035   if (!function_label_operand (symbol, VOIDmode))
9036     pa_encode_label (symbol);
9037 
9038 #ifdef DTORS_SECTION_ASM_OP
9039   default_dtor_section_asm_out_destructor (symbol, priority);
9040 #else
9041 # ifdef TARGET_ASM_NAMED_SECTION
9042   default_named_section_asm_out_destructor (symbol, priority);
9043 # else
9044   default_stabs_asm_out_destructor (symbol, priority);
9045 # endif
9046 #endif
9047 }
9048 #endif
9049 
9050 /* This function places uninitialized global data in the bss section.
9051    The ASM_OUTPUT_ALIGNED_BSS macro needs to be defined to call this
9052    function on the SOM port to prevent uninitialized global data from
9053    being placed in the data section.  */
9054 
9055 void
pa_asm_output_aligned_bss(FILE * stream,const char * name,unsigned HOST_WIDE_INT size,unsigned int align)9056 pa_asm_output_aligned_bss (FILE *stream,
9057 			   const char *name,
9058 			   unsigned HOST_WIDE_INT size,
9059 			   unsigned int align)
9060 {
9061   switch_to_section (bss_section);
9062 
9063 #ifdef ASM_OUTPUT_TYPE_DIRECTIVE
9064   ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "object");
9065 #endif
9066 
9067 #ifdef ASM_OUTPUT_SIZE_DIRECTIVE
9068   ASM_OUTPUT_SIZE_DIRECTIVE (stream, name, size);
9069 #endif
9070 
9071   fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
9072   ASM_OUTPUT_LABEL (stream, name);
9073   fprintf (stream, "\t.block " HOST_WIDE_INT_PRINT_UNSIGNED"\n", size);
9074 }
9075 
9076 /* Both the HP and GNU assemblers under HP-UX provide a .comm directive
9077    that doesn't allow the alignment of global common storage to be directly
9078    specified.  The SOM linker aligns common storage based on the rounded
9079    value of the NUM_BYTES parameter in the .comm directive.  It's not
9080    possible to use the .align directive as it doesn't affect the alignment
9081    of the label associated with a .comm directive.  */
9082 
9083 void
pa_asm_output_aligned_common(FILE * stream,const char * name,unsigned HOST_WIDE_INT size,unsigned int align)9084 pa_asm_output_aligned_common (FILE *stream,
9085 			      const char *name,
9086 			      unsigned HOST_WIDE_INT size,
9087 			      unsigned int align)
9088 {
9089   unsigned int max_common_align;
9090 
9091   max_common_align = TARGET_64BIT ? 128 : (size >= 4096 ? 256 : 64);
9092   if (align > max_common_align)
9093     {
9094       /* Alignment exceeds maximum alignment for global common data.  */
9095       align = max_common_align;
9096     }
9097 
9098   switch_to_section (bss_section);
9099 
9100   assemble_name (stream, name);
9101   fprintf (stream, "\t.comm " HOST_WIDE_INT_PRINT_UNSIGNED"\n",
9102            MAX (size, align / BITS_PER_UNIT));
9103 }
9104 
9105 /* We can't use .comm for local common storage as the SOM linker effectively
9106    treats the symbol as universal and uses the same storage for local symbols
9107    with the same name in different object files.  The .block directive
9108    reserves an uninitialized block of storage.  However, it's not common
9109    storage.  Fortunately, GCC never requests common storage with the same
9110    name in any given translation unit.  */
9111 
9112 void
pa_asm_output_aligned_local(FILE * stream,const char * name,unsigned HOST_WIDE_INT size,unsigned int align)9113 pa_asm_output_aligned_local (FILE *stream,
9114 			     const char *name,
9115 			     unsigned HOST_WIDE_INT size,
9116 			     unsigned int align)
9117 {
9118   switch_to_section (bss_section);
9119   fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
9120 
9121 #ifdef LOCAL_ASM_OP
9122   fprintf (stream, "%s", LOCAL_ASM_OP);
9123   assemble_name (stream, name);
9124   fprintf (stream, "\n");
9125 #endif
9126 
9127   ASM_OUTPUT_LABEL (stream, name);
9128   fprintf (stream, "\t.block " HOST_WIDE_INT_PRINT_UNSIGNED"\n", size);
9129 }
9130 
9131 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
9132    use in fmpysub instructions.  */
9133 int
pa_fmpysuboperands(rtx * operands)9134 pa_fmpysuboperands (rtx *operands)
9135 {
9136   machine_mode mode = GET_MODE (operands[0]);
9137 
9138   /* Must be a floating point mode.  */
9139   if (mode != SFmode && mode != DFmode)
9140     return 0;
9141 
9142   /* All modes must be the same.  */
9143   if (! (mode == GET_MODE (operands[1])
9144 	 && mode == GET_MODE (operands[2])
9145 	 && mode == GET_MODE (operands[3])
9146 	 && mode == GET_MODE (operands[4])
9147 	 && mode == GET_MODE (operands[5])))
9148     return 0;
9149 
9150   /* All operands must be registers.  */
9151   if (! (GET_CODE (operands[1]) == REG
9152 	 && GET_CODE (operands[2]) == REG
9153 	 && GET_CODE (operands[3]) == REG
9154 	 && GET_CODE (operands[4]) == REG
9155 	 && GET_CODE (operands[5]) == REG))
9156     return 0;
9157 
9158   /* Only 2 real operands to the subtraction.  Subtraction is not a commutative
9159      operation, so operands[4] must be the same as operand[3].  */
9160   if (! rtx_equal_p (operands[3], operands[4]))
9161     return 0;
9162 
9163   /* multiply cannot feed into subtraction.  */
9164   if (rtx_equal_p (operands[5], operands[0]))
9165     return 0;
9166 
9167   /* Inout operand of sub cannot conflict with any operands from multiply.  */
9168   if (rtx_equal_p (operands[3], operands[0])
9169      || rtx_equal_p (operands[3], operands[1])
9170      || rtx_equal_p (operands[3], operands[2]))
9171     return 0;
9172 
9173   /* SFmode limits the registers to the upper 32 of the 32bit FP regs.  */
9174   if (mode == SFmode
9175       && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS
9176 	  || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS
9177 	  || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS
9178 	  || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS
9179 	  || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS
9180 	  || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS))
9181     return 0;
9182 
9183   /* Passed.  Operands are suitable for fmpysub.  */
9184   return 1;
9185 }
9186 
9187 /* Return 1 if the given constant is 2, 4, or 8.  These are the valid
9188    constants for a MULT embedded inside a memory address.  */
9189 int
pa_mem_shadd_constant_p(int val)9190 pa_mem_shadd_constant_p (int val)
9191 {
9192   if (val == 2 || val == 4 || val == 8)
9193     return 1;
9194   else
9195     return 0;
9196 }
9197 
9198 /* Return 1 if the given constant is 1, 2, or 3.  These are the valid
9199    constants for shadd instructions.  */
9200 int
pa_shadd_constant_p(int val)9201 pa_shadd_constant_p (int val)
9202 {
9203   if (val == 1 || val == 2 || val == 3)
9204     return 1;
9205   else
9206     return 0;
9207 }
9208 
9209 /* Return TRUE if INSN branches forward.  */
9210 
9211 static bool
forward_branch_p(rtx_insn * insn)9212 forward_branch_p (rtx_insn *insn)
9213 {
9214   rtx lab = JUMP_LABEL (insn);
9215 
9216   /* The INSN must have a jump label.  */
9217   gcc_assert (lab != NULL_RTX);
9218 
9219   if (INSN_ADDRESSES_SET_P ())
9220     return INSN_ADDRESSES (INSN_UID (lab)) > INSN_ADDRESSES (INSN_UID (insn));
9221 
9222   while (insn)
9223     {
9224       if (insn == lab)
9225 	return true;
9226       else
9227 	insn = NEXT_INSN (insn);
9228     }
9229 
9230   return false;
9231 }
9232 
9233 /* Output an unconditional move and branch insn.  */
9234 
9235 const char *
pa_output_parallel_movb(rtx * operands,rtx_insn * insn)9236 pa_output_parallel_movb (rtx *operands, rtx_insn *insn)
9237 {
9238   int length = get_attr_length (insn);
9239 
9240   /* These are the cases in which we win.  */
9241   if (length == 4)
9242     return "mov%I1b,tr %1,%0,%2";
9243 
9244   /* None of the following cases win, but they don't lose either.  */
9245   if (length == 8)
9246     {
9247       if (dbr_sequence_length () == 0)
9248 	{
9249 	  /* Nothing in the delay slot, fake it by putting the combined
9250 	     insn (the copy or add) in the delay slot of a bl.  */
9251 	  if (GET_CODE (operands[1]) == CONST_INT)
9252 	    return "b %2\n\tldi %1,%0";
9253 	  else
9254 	    return "b %2\n\tcopy %1,%0";
9255 	}
9256       else
9257 	{
9258 	  /* Something in the delay slot, but we've got a long branch.  */
9259 	  if (GET_CODE (operands[1]) == CONST_INT)
9260 	    return "ldi %1,%0\n\tb %2";
9261 	  else
9262 	    return "copy %1,%0\n\tb %2";
9263 	}
9264     }
9265 
9266   if (GET_CODE (operands[1]) == CONST_INT)
9267     output_asm_insn ("ldi %1,%0", operands);
9268   else
9269     output_asm_insn ("copy %1,%0", operands);
9270   return pa_output_lbranch (operands[2], insn, 1);
9271 }
9272 
9273 /* Output an unconditional add and branch insn.  */
9274 
9275 const char *
pa_output_parallel_addb(rtx * operands,rtx_insn * insn)9276 pa_output_parallel_addb (rtx *operands, rtx_insn *insn)
9277 {
9278   int length = get_attr_length (insn);
9279 
9280   /* To make life easy we want operand0 to be the shared input/output
9281      operand and operand1 to be the readonly operand.  */
9282   if (operands[0] == operands[1])
9283     operands[1] = operands[2];
9284 
9285   /* These are the cases in which we win.  */
9286   if (length == 4)
9287     return "add%I1b,tr %1,%0,%3";
9288 
9289   /* None of the following cases win, but they don't lose either.  */
9290   if (length == 8)
9291     {
9292       if (dbr_sequence_length () == 0)
9293 	/* Nothing in the delay slot, fake it by putting the combined
9294 	   insn (the copy or add) in the delay slot of a bl.  */
9295 	return "b %3\n\tadd%I1 %1,%0,%0";
9296       else
9297 	/* Something in the delay slot, but we've got a long branch.  */
9298 	return "add%I1 %1,%0,%0\n\tb %3";
9299     }
9300 
9301   output_asm_insn ("add%I1 %1,%0,%0", operands);
9302   return pa_output_lbranch (operands[3], insn, 1);
9303 }
9304 
9305 /* We use this hook to perform a PA specific optimization which is difficult
9306    to do in earlier passes.  */
9307 
9308 static void
pa_reorg(void)9309 pa_reorg (void)
9310 {
9311   remove_useless_addtr_insns (1);
9312 
9313   if (pa_cpu < PROCESSOR_8000)
9314     pa_combine_instructions ();
9315 }
9316 
9317 /* The PA has a number of odd instructions which can perform multiple
9318    tasks at once.  On first generation PA machines (PA1.0 and PA1.1)
9319    it may be profitable to combine two instructions into one instruction
9320    with two outputs.  It's not profitable PA2.0 machines because the
9321    two outputs would take two slots in the reorder buffers.
9322 
9323    This routine finds instructions which can be combined and combines
9324    them.  We only support some of the potential combinations, and we
9325    only try common ways to find suitable instructions.
9326 
9327       * addb can add two registers or a register and a small integer
9328       and jump to a nearby (+-8k) location.  Normally the jump to the
9329       nearby location is conditional on the result of the add, but by
9330       using the "true" condition we can make the jump unconditional.
9331       Thus addb can perform two independent operations in one insn.
9332 
9333       * movb is similar to addb in that it can perform a reg->reg
9334       or small immediate->reg copy and jump to a nearby (+-8k location).
9335 
9336       * fmpyadd and fmpysub can perform a FP multiply and either an
9337       FP add or FP sub if the operands of the multiply and add/sub are
9338       independent (there are other minor restrictions).  Note both
9339       the fmpy and fadd/fsub can in theory move to better spots according
9340       to data dependencies, but for now we require the fmpy stay at a
9341       fixed location.
9342 
9343       * Many of the memory operations can perform pre & post updates
9344       of index registers.  GCC's pre/post increment/decrement addressing
9345       is far too simple to take advantage of all the possibilities.  This
9346       pass may not be suitable since those insns may not be independent.
9347 
9348       * comclr can compare two ints or an int and a register, nullify
9349       the following instruction and zero some other register.  This
9350       is more difficult to use as it's harder to find an insn which
9351       will generate a comclr than finding something like an unconditional
9352       branch.  (conditional moves & long branches create comclr insns).
9353 
9354       * Most arithmetic operations can conditionally skip the next
9355       instruction.  They can be viewed as "perform this operation
9356       and conditionally jump to this nearby location" (where nearby
9357       is an insns away).  These are difficult to use due to the
9358       branch length restrictions.  */
9359 
9360 static void
pa_combine_instructions(void)9361 pa_combine_instructions (void)
9362 {
9363   rtx_insn *anchor;
9364 
9365   /* This can get expensive since the basic algorithm is on the
9366      order of O(n^2) (or worse).  Only do it for -O2 or higher
9367      levels of optimization.  */
9368   if (optimize < 2)
9369     return;
9370 
9371   /* Walk down the list of insns looking for "anchor" insns which
9372      may be combined with "floating" insns.  As the name implies,
9373      "anchor" instructions don't move, while "floating" insns may
9374      move around.  */
9375   rtx par = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, NULL_RTX, NULL_RTX));
9376   rtx_insn *new_rtx = make_insn_raw (par);
9377 
9378   for (anchor = get_insns (); anchor; anchor = NEXT_INSN (anchor))
9379     {
9380       enum attr_pa_combine_type anchor_attr;
9381       enum attr_pa_combine_type floater_attr;
9382 
9383       /* We only care about INSNs, JUMP_INSNs, and CALL_INSNs.
9384 	 Also ignore any special USE insns.  */
9385       if ((! NONJUMP_INSN_P (anchor) && ! JUMP_P (anchor) && ! CALL_P (anchor))
9386 	  || GET_CODE (PATTERN (anchor)) == USE
9387 	  || GET_CODE (PATTERN (anchor)) == CLOBBER)
9388 	continue;
9389 
9390       anchor_attr = get_attr_pa_combine_type (anchor);
9391       /* See if anchor is an insn suitable for combination.  */
9392       if (anchor_attr == PA_COMBINE_TYPE_FMPY
9393 	  || anchor_attr == PA_COMBINE_TYPE_FADDSUB
9394 	  || (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
9395 	      && ! forward_branch_p (anchor)))
9396 	{
9397 	  rtx_insn *floater;
9398 
9399 	  for (floater = PREV_INSN (anchor);
9400 	       floater;
9401 	       floater = PREV_INSN (floater))
9402 	    {
9403 	      if (NOTE_P (floater)
9404 		  || (NONJUMP_INSN_P (floater)
9405 		      && (GET_CODE (PATTERN (floater)) == USE
9406 			  || GET_CODE (PATTERN (floater)) == CLOBBER)))
9407 		continue;
9408 
9409 	      /* Anything except a regular INSN will stop our search.  */
9410 	      if (! NONJUMP_INSN_P (floater))
9411 		{
9412 		  floater = NULL;
9413 		  break;
9414 		}
9415 
9416 	      /* See if FLOATER is suitable for combination with the
9417 		 anchor.  */
9418 	      floater_attr = get_attr_pa_combine_type (floater);
9419 	      if ((anchor_attr == PA_COMBINE_TYPE_FMPY
9420 		   && floater_attr == PA_COMBINE_TYPE_FADDSUB)
9421 		  || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
9422 		      && floater_attr == PA_COMBINE_TYPE_FMPY))
9423 		{
9424 		  /* If ANCHOR and FLOATER can be combined, then we're
9425 		     done with this pass.  */
9426 		  if (pa_can_combine_p (new_rtx, anchor, floater, 0,
9427 					SET_DEST (PATTERN (floater)),
9428 					XEXP (SET_SRC (PATTERN (floater)), 0),
9429 					XEXP (SET_SRC (PATTERN (floater)), 1)))
9430 		    break;
9431 		}
9432 
9433 	      else if (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
9434 		       && floater_attr == PA_COMBINE_TYPE_ADDMOVE)
9435 		{
9436 		  if (GET_CODE (SET_SRC (PATTERN (floater))) == PLUS)
9437 		    {
9438 		      if (pa_can_combine_p (new_rtx, anchor, floater, 0,
9439 					    SET_DEST (PATTERN (floater)),
9440 					XEXP (SET_SRC (PATTERN (floater)), 0),
9441 					XEXP (SET_SRC (PATTERN (floater)), 1)))
9442 			break;
9443 		    }
9444 		  else
9445 		    {
9446 		      if (pa_can_combine_p (new_rtx, anchor, floater, 0,
9447 					    SET_DEST (PATTERN (floater)),
9448 					    SET_SRC (PATTERN (floater)),
9449 					    SET_SRC (PATTERN (floater))))
9450 			break;
9451 		    }
9452 		}
9453 	    }
9454 
9455 	  /* If we didn't find anything on the backwards scan try forwards.  */
9456 	  if (!floater
9457 	      && (anchor_attr == PA_COMBINE_TYPE_FMPY
9458 		  || anchor_attr == PA_COMBINE_TYPE_FADDSUB))
9459 	    {
9460 	      for (floater = anchor; floater; floater = NEXT_INSN (floater))
9461 		{
9462 		  if (NOTE_P (floater)
9463 		      || (NONJUMP_INSN_P (floater)
9464 			  && (GET_CODE (PATTERN (floater)) == USE
9465 			      || GET_CODE (PATTERN (floater)) == CLOBBER)))
9466 
9467 		    continue;
9468 
9469 		  /* Anything except a regular INSN will stop our search.  */
9470 		  if (! NONJUMP_INSN_P (floater))
9471 		    {
9472 		      floater = NULL;
9473 		      break;
9474 		    }
9475 
9476 		  /* See if FLOATER is suitable for combination with the
9477 		     anchor.  */
9478 		  floater_attr = get_attr_pa_combine_type (floater);
9479 		  if ((anchor_attr == PA_COMBINE_TYPE_FMPY
9480 		       && floater_attr == PA_COMBINE_TYPE_FADDSUB)
9481 		      || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
9482 			  && floater_attr == PA_COMBINE_TYPE_FMPY))
9483 		    {
9484 		      /* If ANCHOR and FLOATER can be combined, then we're
9485 			 done with this pass.  */
9486 		      if (pa_can_combine_p (new_rtx, anchor, floater, 1,
9487 					    SET_DEST (PATTERN (floater)),
9488 					    XEXP (SET_SRC (PATTERN (floater)),
9489 						  0),
9490 					    XEXP (SET_SRC (PATTERN (floater)),
9491 						  1)))
9492 			break;
9493 		    }
9494 		}
9495 	    }
9496 
9497 	  /* FLOATER will be nonzero if we found a suitable floating
9498 	     insn for combination with ANCHOR.  */
9499 	  if (floater
9500 	      && (anchor_attr == PA_COMBINE_TYPE_FADDSUB
9501 		  || anchor_attr == PA_COMBINE_TYPE_FMPY))
9502 	    {
9503 	      /* Emit the new instruction and delete the old anchor.  */
9504 	      rtvec vtemp = gen_rtvec (2, copy_rtx (PATTERN (anchor)),
9505 				       copy_rtx (PATTERN (floater)));
9506 	      rtx temp = gen_rtx_PARALLEL (VOIDmode, vtemp);
9507 	      emit_insn_before (temp, anchor);
9508 
9509 	      SET_INSN_DELETED (anchor);
9510 
9511 	      /* Emit a special USE insn for FLOATER, then delete
9512 		 the floating insn.  */
9513 	      temp = copy_rtx (PATTERN (floater));
9514 	      emit_insn_before (gen_rtx_USE (VOIDmode, temp), floater);
9515 	      delete_insn (floater);
9516 
9517 	      continue;
9518 	    }
9519 	  else if (floater
9520 		   && anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH)
9521 	    {
9522 	      /* Emit the new_jump instruction and delete the old anchor.  */
9523 	      rtvec vtemp = gen_rtvec (2, copy_rtx (PATTERN (anchor)),
9524 				       copy_rtx (PATTERN (floater)));
9525 	      rtx temp = gen_rtx_PARALLEL (VOIDmode, vtemp);
9526 	      temp = emit_jump_insn_before (temp, anchor);
9527 
9528 	      JUMP_LABEL (temp) = JUMP_LABEL (anchor);
9529 	      SET_INSN_DELETED (anchor);
9530 
9531 	      /* Emit a special USE insn for FLOATER, then delete
9532 		 the floating insn.  */
9533 	      temp = copy_rtx (PATTERN (floater));
9534 	      emit_insn_before (gen_rtx_USE (VOIDmode, temp), floater);
9535 	      delete_insn (floater);
9536 	      continue;
9537 	    }
9538 	}
9539     }
9540 }
9541 
9542 static int
pa_can_combine_p(rtx_insn * new_rtx,rtx_insn * anchor,rtx_insn * floater,int reversed,rtx dest,rtx src1,rtx src2)9543 pa_can_combine_p (rtx_insn *new_rtx, rtx_insn *anchor, rtx_insn *floater,
9544 		  int reversed, rtx dest,
9545 		  rtx src1, rtx src2)
9546 {
9547   int insn_code_number;
9548   rtx_insn *start, *end;
9549 
9550   /* Create a PARALLEL with the patterns of ANCHOR and
9551      FLOATER, try to recognize it, then test constraints
9552      for the resulting pattern.
9553 
9554      If the pattern doesn't match or the constraints
9555      aren't met keep searching for a suitable floater
9556      insn.  */
9557   XVECEXP (PATTERN (new_rtx), 0, 0) = PATTERN (anchor);
9558   XVECEXP (PATTERN (new_rtx), 0, 1) = PATTERN (floater);
9559   INSN_CODE (new_rtx) = -1;
9560   insn_code_number = recog_memoized (new_rtx);
9561   basic_block bb = BLOCK_FOR_INSN (anchor);
9562   if (insn_code_number < 0
9563       || (extract_insn (new_rtx),
9564 	  !constrain_operands (1, get_preferred_alternatives (new_rtx, bb))))
9565     return 0;
9566 
9567   if (reversed)
9568     {
9569       start = anchor;
9570       end = floater;
9571     }
9572   else
9573     {
9574       start = floater;
9575       end = anchor;
9576     }
9577 
9578   /* There's up to three operands to consider.  One
9579      output and two inputs.
9580 
9581      The output must not be used between FLOATER & ANCHOR
9582      exclusive.  The inputs must not be set between
9583      FLOATER and ANCHOR exclusive.  */
9584 
9585   if (reg_used_between_p (dest, start, end))
9586     return 0;
9587 
9588   if (reg_set_between_p (src1, start, end))
9589     return 0;
9590 
9591   if (reg_set_between_p (src2, start, end))
9592     return 0;
9593 
9594   /* If we get here, then everything is good.  */
9595   return 1;
9596 }
9597 
9598 /* Return nonzero if references for INSN are delayed.
9599 
9600    Millicode insns are actually function calls with some special
9601    constraints on arguments and register usage.
9602 
9603    Millicode calls always expect their arguments in the integer argument
9604    registers, and always return their result in %r29 (ret1).  They
9605    are expected to clobber their arguments, %r1, %r29, and the return
9606    pointer which is %r31 on 32-bit and %r2 on 64-bit, and nothing else.
9607 
9608    This function tells reorg that the references to arguments and
9609    millicode calls do not appear to happen until after the millicode call.
9610    This allows reorg to put insns which set the argument registers into the
9611    delay slot of the millicode call -- thus they act more like traditional
9612    CALL_INSNs.
9613 
9614    Note we cannot consider side effects of the insn to be delayed because
9615    the branch and link insn will clobber the return pointer.  If we happened
9616    to use the return pointer in the delay slot of the call, then we lose.
9617 
9618    get_attr_type will try to recognize the given insn, so make sure to
9619    filter out things it will not accept -- SEQUENCE, USE and CLOBBER insns
9620    in particular.  */
9621 int
pa_insn_refs_are_delayed(rtx_insn * insn)9622 pa_insn_refs_are_delayed (rtx_insn *insn)
9623 {
9624   return ((NONJUMP_INSN_P (insn)
9625 	   && GET_CODE (PATTERN (insn)) != SEQUENCE
9626 	   && GET_CODE (PATTERN (insn)) != USE
9627 	   && GET_CODE (PATTERN (insn)) != CLOBBER
9628 	   && get_attr_type (insn) == TYPE_MILLI));
9629 }
9630 
9631 /* Promote the return value, but not the arguments.  */
9632 
9633 static machine_mode
pa_promote_function_mode(const_tree type ATTRIBUTE_UNUSED,machine_mode mode,int * punsignedp ATTRIBUTE_UNUSED,const_tree fntype ATTRIBUTE_UNUSED,int for_return)9634 pa_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
9635                           machine_mode mode,
9636                           int *punsignedp ATTRIBUTE_UNUSED,
9637                           const_tree fntype ATTRIBUTE_UNUSED,
9638                           int for_return)
9639 {
9640   if (for_return == 0)
9641     return mode;
9642   return promote_mode (type, mode, punsignedp);
9643 }
9644 
9645 /* On the HP-PA the value is found in register(s) 28(-29), unless
9646    the mode is SF or DF. Then the value is returned in fr4 (32).
9647 
9648    This must perform the same promotions as PROMOTE_MODE, else promoting
9649    return values in TARGET_PROMOTE_FUNCTION_MODE will not work correctly.
9650 
9651    Small structures must be returned in a PARALLEL on PA64 in order
9652    to match the HP Compiler ABI.  */
9653 
9654 static rtx
pa_function_value(const_tree valtype,const_tree func ATTRIBUTE_UNUSED,bool outgoing ATTRIBUTE_UNUSED)9655 pa_function_value (const_tree valtype,
9656                    const_tree func ATTRIBUTE_UNUSED,
9657                    bool outgoing ATTRIBUTE_UNUSED)
9658 {
9659   machine_mode valmode;
9660 
9661   if (AGGREGATE_TYPE_P (valtype)
9662       || TREE_CODE (valtype) == COMPLEX_TYPE
9663       || TREE_CODE (valtype) == VECTOR_TYPE)
9664     {
9665       HOST_WIDE_INT valsize = int_size_in_bytes (valtype);
9666 
9667       /* Handle aggregates that fit exactly in a word or double word.  */
9668       if (valsize == UNITS_PER_WORD || valsize == 2 * UNITS_PER_WORD)
9669 	return gen_rtx_REG (TYPE_MODE (valtype), 28);
9670 
9671       if (TARGET_64BIT)
9672 	{
9673           /* Aggregates with a size less than or equal to 128 bits are
9674 	     returned in GR 28(-29).  They are left justified.  The pad
9675 	     bits are undefined.  Larger aggregates are returned in
9676 	     memory.  */
9677 	  rtx loc[2];
9678 	  int i, offset = 0;
9679 	  int ub = valsize <= UNITS_PER_WORD ? 1 : 2;
9680 
9681 	  for (i = 0; i < ub; i++)
9682 	    {
9683 	      loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
9684 					  gen_rtx_REG (DImode, 28 + i),
9685 					  GEN_INT (offset));
9686 	      offset += 8;
9687 	    }
9688 
9689 	  return gen_rtx_PARALLEL (BLKmode, gen_rtvec_v (ub, loc));
9690 	}
9691       else if (valsize > UNITS_PER_WORD)
9692 	{
9693 	  /* Aggregates 5 to 8 bytes in size are returned in general
9694 	     registers r28-r29 in the same manner as other non
9695 	     floating-point objects.  The data is right-justified and
9696 	     zero-extended to 64 bits.  This is opposite to the normal
9697 	     justification used on big endian targets and requires
9698 	     special treatment.  */
9699 	  rtx loc = gen_rtx_EXPR_LIST (VOIDmode,
9700 				       gen_rtx_REG (DImode, 28), const0_rtx);
9701 	  return gen_rtx_PARALLEL (BLKmode, gen_rtvec (1, loc));
9702 	}
9703     }
9704 
9705   if ((INTEGRAL_TYPE_P (valtype)
9706        && GET_MODE_BITSIZE (TYPE_MODE (valtype)) < BITS_PER_WORD)
9707       || POINTER_TYPE_P (valtype))
9708     valmode = word_mode;
9709   else
9710     valmode = TYPE_MODE (valtype);
9711 
9712   if (TREE_CODE (valtype) == REAL_TYPE
9713       && !AGGREGATE_TYPE_P (valtype)
9714       && TYPE_MODE (valtype) != TFmode
9715       && !TARGET_SOFT_FLOAT)
9716     return gen_rtx_REG (valmode, 32);
9717 
9718   return gen_rtx_REG (valmode, 28);
9719 }
9720 
9721 /* Implement the TARGET_LIBCALL_VALUE hook.  */
9722 
9723 static rtx
pa_libcall_value(machine_mode mode,const_rtx fun ATTRIBUTE_UNUSED)9724 pa_libcall_value (machine_mode mode,
9725 		  const_rtx fun ATTRIBUTE_UNUSED)
9726 {
9727   if (! TARGET_SOFT_FLOAT
9728       && (mode == SFmode || mode == DFmode))
9729     return  gen_rtx_REG (mode, 32);
9730   else
9731     return  gen_rtx_REG (mode, 28);
9732 }
9733 
9734 /* Implement the TARGET_FUNCTION_VALUE_REGNO_P hook.  */
9735 
9736 static bool
pa_function_value_regno_p(const unsigned int regno)9737 pa_function_value_regno_p (const unsigned int regno)
9738 {
9739   if (regno == 28
9740       || (! TARGET_SOFT_FLOAT &&  regno == 32))
9741     return true;
9742 
9743   return false;
9744 }
9745 
9746 /* Update the data in CUM to advance over argument ARG.  */
9747 
9748 static void
pa_function_arg_advance(cumulative_args_t cum_v,const function_arg_info & arg)9749 pa_function_arg_advance (cumulative_args_t cum_v,
9750 			 const function_arg_info &arg)
9751 {
9752   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
9753   int arg_size = pa_function_arg_size (arg.mode, arg.type);
9754 
9755   cum->nargs_prototype--;
9756   cum->words += (arg_size
9757 		 + ((cum->words & 01)
9758 		    && arg.type != NULL_TREE
9759 		    && arg_size > 1));
9760 }
9761 
9762 /* Return the location of a parameter that is passed in a register or NULL
9763    if the parameter has any component that is passed in memory.
9764 
9765    This is new code and will be pushed to into the net sources after
9766    further testing.
9767 
9768    ??? We might want to restructure this so that it looks more like other
9769    ports.  */
9770 static rtx
pa_function_arg(cumulative_args_t cum_v,const function_arg_info & arg)9771 pa_function_arg (cumulative_args_t cum_v, const function_arg_info &arg)
9772 {
9773   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
9774   tree type = arg.type;
9775   machine_mode mode = arg.mode;
9776   int max_arg_words = (TARGET_64BIT ? 8 : 4);
9777   int alignment = 0;
9778   int arg_size;
9779   int fpr_reg_base;
9780   int gpr_reg_base;
9781   rtx retval;
9782 
9783   if (arg.end_marker_p ())
9784     return NULL_RTX;
9785 
9786   arg_size = pa_function_arg_size (mode, type);
9787   if (!arg_size)
9788     return NULL_RTX;
9789 
9790   /* If this arg would be passed partially or totally on the stack, then
9791      this routine should return zero.  pa_arg_partial_bytes will
9792      handle arguments which are split between regs and stack slots if
9793      the ABI mandates split arguments.  */
9794   if (!TARGET_64BIT)
9795     {
9796       /* The 32-bit ABI does not split arguments.  */
9797       if (cum->words + arg_size > max_arg_words)
9798 	return NULL_RTX;
9799     }
9800   else
9801     {
9802       if (arg_size > 1)
9803 	alignment = cum->words & 1;
9804       if (cum->words + alignment >= max_arg_words)
9805 	return NULL_RTX;
9806     }
9807 
9808   /* The 32bit ABIs and the 64bit ABIs are rather different,
9809      particularly in their handling of FP registers.  We might
9810      be able to cleverly share code between them, but I'm not
9811      going to bother in the hope that splitting them up results
9812      in code that is more easily understood.  */
9813 
9814   if (TARGET_64BIT)
9815     {
9816       /* Advance the base registers to their current locations.
9817 
9818          Remember, gprs grow towards smaller register numbers while
9819 	 fprs grow to higher register numbers.  Also remember that
9820 	 although FP regs are 32-bit addressable, we pretend that
9821 	 the registers are 64-bits wide.  */
9822       gpr_reg_base = 26 - cum->words;
9823       fpr_reg_base = 32 + cum->words;
9824 
9825       /* Arguments wider than one word and small aggregates need special
9826 	 treatment.  */
9827       if (arg_size > 1
9828 	  || mode == BLKmode
9829 	  || (type && (AGGREGATE_TYPE_P (type)
9830 		       || TREE_CODE (type) == COMPLEX_TYPE
9831 		       || TREE_CODE (type) == VECTOR_TYPE)))
9832 	{
9833 	  /* Double-extended precision (80-bit), quad-precision (128-bit)
9834 	     and aggregates including complex numbers are aligned on
9835 	     128-bit boundaries.  The first eight 64-bit argument slots
9836 	     are associated one-to-one, with general registers r26
9837 	     through r19, and also with floating-point registers fr4
9838 	     through fr11.  Arguments larger than one word are always
9839 	     passed in general registers.
9840 
9841 	     Using a PARALLEL with a word mode register results in left
9842 	     justified data on a big-endian target.  */
9843 
9844 	  rtx loc[8];
9845 	  int i, offset = 0, ub = arg_size;
9846 
9847 	  /* Align the base register.  */
9848 	  gpr_reg_base -= alignment;
9849 
9850 	  ub = MIN (ub, max_arg_words - cum->words - alignment);
9851 	  for (i = 0; i < ub; i++)
9852 	    {
9853 	      loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
9854 					  gen_rtx_REG (DImode, gpr_reg_base),
9855 					  GEN_INT (offset));
9856 	      gpr_reg_base -= 1;
9857 	      offset += 8;
9858 	    }
9859 
9860 	  return gen_rtx_PARALLEL (mode, gen_rtvec_v (ub, loc));
9861 	}
9862      }
9863   else
9864     {
9865       /* If the argument is larger than a word, then we know precisely
9866 	 which registers we must use.  */
9867       if (arg_size > 1)
9868 	{
9869 	  if (cum->words)
9870 	    {
9871 	      gpr_reg_base = 23;
9872 	      fpr_reg_base = 38;
9873 	    }
9874 	  else
9875 	    {
9876 	      gpr_reg_base = 25;
9877 	      fpr_reg_base = 34;
9878 	    }
9879 
9880 	  /* Structures 5 to 8 bytes in size are passed in the general
9881 	     registers in the same manner as other non floating-point
9882 	     objects.  The data is right-justified and zero-extended
9883 	     to 64 bits.  This is opposite to the normal justification
9884 	     used on big endian targets and requires special treatment.
9885 	     We now define BLOCK_REG_PADDING to pad these objects.
9886 	     Aggregates, complex and vector types are passed in the same
9887 	     manner as structures.  */
9888 	  if (mode == BLKmode
9889 	      || (type && (AGGREGATE_TYPE_P (type)
9890 			   || TREE_CODE (type) == COMPLEX_TYPE
9891 			   || TREE_CODE (type) == VECTOR_TYPE)))
9892 	    {
9893 	      rtx loc = gen_rtx_EXPR_LIST (VOIDmode,
9894 					   gen_rtx_REG (DImode, gpr_reg_base),
9895 					   const0_rtx);
9896 	      return gen_rtx_PARALLEL (BLKmode, gen_rtvec (1, loc));
9897 	    }
9898 	}
9899       else
9900         {
9901 	   /* We have a single word (32 bits).  A simple computation
9902 	      will get us the register #s we need.  */
9903 	   gpr_reg_base = 26 - cum->words;
9904 	   fpr_reg_base = 32 + 2 * cum->words;
9905 	}
9906     }
9907 
9908   /* Determine if the argument needs to be passed in both general and
9909      floating point registers.  */
9910   if (((TARGET_PORTABLE_RUNTIME || TARGET_64BIT || TARGET_ELF32)
9911        /* If we are doing soft-float with portable runtime, then there
9912 	  is no need to worry about FP regs.  */
9913        && !TARGET_SOFT_FLOAT
9914        /* The parameter must be some kind of scalar float, else we just
9915 	  pass it in integer registers.  */
9916        && GET_MODE_CLASS (mode) == MODE_FLOAT
9917        /* The target function must not have a prototype.  */
9918        && cum->nargs_prototype <= 0
9919        /* libcalls do not need to pass items in both FP and general
9920 	  registers.  */
9921        && type != NULL_TREE
9922        /* All this hair applies to "outgoing" args only.  This includes
9923 	  sibcall arguments setup with FUNCTION_INCOMING_ARG.  */
9924        && !cum->incoming)
9925       /* Also pass outgoing floating arguments in both registers in indirect
9926 	 calls with the 32 bit ABI and the HP assembler since there is no
9927 	 way to the specify argument locations in static functions.  */
9928       || (!TARGET_64BIT
9929 	  && !TARGET_GAS
9930 	  && !cum->incoming
9931 	  && cum->indirect
9932 	  && GET_MODE_CLASS (mode) == MODE_FLOAT))
9933     {
9934       retval
9935 	= gen_rtx_PARALLEL
9936 	    (mode,
9937 	     gen_rtvec (2,
9938 			gen_rtx_EXPR_LIST (VOIDmode,
9939 					   gen_rtx_REG (mode, fpr_reg_base),
9940 					   const0_rtx),
9941 			gen_rtx_EXPR_LIST (VOIDmode,
9942 					   gen_rtx_REG (mode, gpr_reg_base),
9943 					   const0_rtx)));
9944     }
9945   else
9946     {
9947       /* See if we should pass this parameter in a general register.  */
9948       if (TARGET_SOFT_FLOAT
9949 	  /* Indirect calls in the normal 32bit ABI require all arguments
9950 	     to be passed in general registers.  */
9951 	  || (!TARGET_PORTABLE_RUNTIME
9952 	      && !TARGET_64BIT
9953 	      && !TARGET_ELF32
9954 	      && cum->indirect)
9955 	  /* If the parameter is not a scalar floating-point parameter,
9956 	     then it belongs in GPRs.  */
9957 	  || GET_MODE_CLASS (mode) != MODE_FLOAT
9958 	  /* Structure with single SFmode field belongs in GPR.  */
9959 	  || (type && AGGREGATE_TYPE_P (type)))
9960 	retval = gen_rtx_REG (mode, gpr_reg_base);
9961       else
9962 	retval = gen_rtx_REG (mode, fpr_reg_base);
9963     }
9964   return retval;
9965 }
9966 
9967 /* Arguments larger than one word are double word aligned.  */
9968 
9969 static unsigned int
pa_function_arg_boundary(machine_mode mode,const_tree type)9970 pa_function_arg_boundary (machine_mode mode, const_tree type)
9971 {
9972   bool singleword = (type
9973 		     ? (integer_zerop (TYPE_SIZE (type))
9974 			|| !TREE_CONSTANT (TYPE_SIZE (type))
9975 			|| int_size_in_bytes (type) <= UNITS_PER_WORD)
9976 		     : GET_MODE_SIZE (mode) <= UNITS_PER_WORD);
9977 
9978   return singleword ? PARM_BOUNDARY : MAX_PARM_BOUNDARY;
9979 }
9980 
9981 /* If this arg would be passed totally in registers or totally on the stack,
9982    then this routine should return zero.  */
9983 
9984 static int
pa_arg_partial_bytes(cumulative_args_t cum_v,const function_arg_info & arg)9985 pa_arg_partial_bytes (cumulative_args_t cum_v, const function_arg_info &arg)
9986 {
9987   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
9988   unsigned int max_arg_words = 8;
9989   unsigned int offset = 0;
9990   int arg_size;
9991 
9992   if (!TARGET_64BIT)
9993     return 0;
9994 
9995   arg_size = pa_function_arg_size (arg.mode, arg.type);
9996   if (arg_size > 1 && (cum->words & 1))
9997     offset = 1;
9998 
9999   if (cum->words + offset + arg_size <= max_arg_words)
10000     /* Arg fits fully into registers.  */
10001     return 0;
10002   else if (cum->words + offset >= max_arg_words)
10003     /* Arg fully on the stack.  */
10004     return 0;
10005   else
10006     /* Arg is split.  */
10007     return (max_arg_words - cum->words - offset) * UNITS_PER_WORD;
10008 }
10009 
10010 
10011 /* A get_unnamed_section callback for switching to the text section.
10012 
10013    This function is only used with SOM.  Because we don't support
10014    named subspaces, we can only create a new subspace or switch back
10015    to the default text subspace.  */
10016 
10017 static void
som_output_text_section_asm_op(const char * data ATTRIBUTE_UNUSED)10018 som_output_text_section_asm_op (const char *data ATTRIBUTE_UNUSED)
10019 {
10020   gcc_assert (TARGET_SOM);
10021   if (TARGET_GAS)
10022     {
10023       if (cfun && cfun->machine && !cfun->machine->in_nsubspa)
10024 	{
10025 	  /* We only want to emit a .nsubspa directive once at the
10026 	     start of the function.  */
10027 	  cfun->machine->in_nsubspa = 1;
10028 
10029 	  /* Create a new subspace for the text.  This provides
10030 	     better stub placement and one-only functions.  */
10031 	  if (cfun->decl
10032 	      && DECL_ONE_ONLY (cfun->decl)
10033 	      && !DECL_WEAK (cfun->decl))
10034 	    {
10035 	      output_section_asm_op ("\t.SPACE $TEXT$\n"
10036 				     "\t.NSUBSPA $CODE$,QUAD=0,ALIGN=8,"
10037 				     "ACCESS=44,SORT=24,COMDAT");
10038 	      return;
10039 	    }
10040 	}
10041       else
10042 	{
10043 	  /* There isn't a current function or the body of the current
10044 	     function has been completed.  So, we are changing to the
10045 	     text section to output debugging information.  Thus, we
10046 	     need to forget that we are in the text section so that
10047 	     varasm.cc will call us when text_section is selected again.  */
10048 	  gcc_assert (!cfun || !cfun->machine
10049 		      || cfun->machine->in_nsubspa == 2);
10050 	  in_section = NULL;
10051 	}
10052       output_section_asm_op ("\t.SPACE $TEXT$\n\t.NSUBSPA $CODE$");
10053       return;
10054     }
10055   output_section_asm_op ("\t.SPACE $TEXT$\n\t.SUBSPA $CODE$");
10056 }
10057 
10058 /* A get_unnamed_section callback for switching to comdat data
10059    sections.  This function is only used with SOM.  */
10060 
10061 static void
som_output_comdat_data_section_asm_op(const char * data)10062 som_output_comdat_data_section_asm_op (const char *data)
10063 {
10064   in_section = NULL;
10065   output_section_asm_op (data);
10066 }
10067 
10068 /* Implement TARGET_ASM_INIT_SECTIONS.  */
10069 
10070 static void
pa_som_asm_init_sections(void)10071 pa_som_asm_init_sections (void)
10072 {
10073   text_section
10074     = get_unnamed_section (0, som_output_text_section_asm_op, NULL);
10075 
10076   /* SOM puts readonly data in the default $LIT$ subspace when PIC code
10077      is not being generated.  */
10078   som_readonly_data_section
10079     = get_unnamed_section (0, output_section_asm_op,
10080 			   "\t.SPACE $TEXT$\n\t.SUBSPA $LIT$");
10081 
10082   /* When secondary definitions are not supported, SOM makes readonly
10083      data one-only by creating a new $LIT$ subspace in $TEXT$ with
10084      the comdat flag.  */
10085   som_one_only_readonly_data_section
10086     = get_unnamed_section (0, som_output_comdat_data_section_asm_op,
10087 			   "\t.SPACE $TEXT$\n"
10088 			   "\t.NSUBSPA $LIT$,QUAD=0,ALIGN=8,"
10089 			   "ACCESS=0x2c,SORT=16,COMDAT");
10090 
10091 
10092   /* When secondary definitions are not supported, SOM makes data one-only
10093      by creating a new $DATA$ subspace in $PRIVATE$ with the comdat flag.  */
10094   som_one_only_data_section
10095     = get_unnamed_section (SECTION_WRITE,
10096 			   som_output_comdat_data_section_asm_op,
10097 			   "\t.SPACE $PRIVATE$\n"
10098 			   "\t.NSUBSPA $DATA$,QUAD=1,ALIGN=8,"
10099 			   "ACCESS=31,SORT=24,COMDAT");
10100 
10101   if (flag_tm)
10102     som_tm_clone_table_section
10103       = get_unnamed_section (0, output_section_asm_op,
10104 			     "\t.SPACE $PRIVATE$\n\t.SUBSPA $TM_CLONE_TABLE$");
10105 
10106   /* HPUX ld generates incorrect GOT entries for "T" fixups which
10107      reference data within the $TEXT$ space (for example constant
10108      strings in the $LIT$ subspace).
10109 
10110      The assemblers (GAS and HP as) both have problems with handling
10111      the difference of two symbols.  This is the other correct way to
10112      reference constant data during PIC code generation.
10113 
10114      Thus, we can't put constant data needing relocation in the $TEXT$
10115      space during PIC generation.
10116 
10117      Previously, we placed all constant data into the $DATA$ subspace
10118      when generating PIC code.  This reduces sharing, but it works
10119      correctly.  Now we rely on pa_reloc_rw_mask() for section selection.
10120      This puts constant data not needing relocation into the $TEXT$ space.  */
10121   readonly_data_section = som_readonly_data_section;
10122 
10123   /* We must not have a reference to an external symbol defined in a
10124      shared library in a readonly section, else the SOM linker will
10125      complain.
10126 
10127      So, we force exception information into the data section.  */
10128   exception_section = data_section;
10129 }
10130 
10131 /* Implement TARGET_ASM_TM_CLONE_TABLE_SECTION.  */
10132 
10133 static section *
pa_som_tm_clone_table_section(void)10134 pa_som_tm_clone_table_section (void)
10135 {
10136   return som_tm_clone_table_section;
10137 }
10138 
10139 /* On hpux10, the linker will give an error if we have a reference
10140    in the read-only data section to a symbol defined in a shared
10141    library.  Therefore, expressions that might require a reloc
10142    cannot be placed in the read-only data section.  */
10143 
10144 static section *
pa_select_section(tree exp,int reloc,unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)10145 pa_select_section (tree exp, int reloc,
10146 		   unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
10147 {
10148   if (TREE_CODE (exp) == VAR_DECL
10149       && TREE_READONLY (exp)
10150       && !TREE_THIS_VOLATILE (exp)
10151       && DECL_INITIAL (exp)
10152       && (DECL_INITIAL (exp) == error_mark_node
10153           || TREE_CONSTANT (DECL_INITIAL (exp)))
10154       && !(reloc & pa_reloc_rw_mask ()))
10155     {
10156       if (TARGET_SOM
10157 	  && DECL_ONE_ONLY (exp)
10158 	  && !DECL_WEAK (exp))
10159 	return som_one_only_readonly_data_section;
10160       else
10161 	return readonly_data_section;
10162     }
10163   else if (CONSTANT_CLASS_P (exp)
10164 	   && !(reloc & pa_reloc_rw_mask ()))
10165     return readonly_data_section;
10166   else if (TARGET_SOM
10167 	   && TREE_CODE (exp) == VAR_DECL
10168 	   && DECL_ONE_ONLY (exp)
10169 	   && !DECL_WEAK (exp))
10170     return som_one_only_data_section;
10171   else
10172     return data_section;
10173 }
10174 
10175 /* Implement pa_elf_select_rtx_section.  If X is a function label operand
10176    and the function is in a COMDAT group, place the plabel reference in the
10177    .data.rel.ro.local section.  The linker ignores references to symbols in
10178    discarded sections from this section.  */
10179 
10180 static section *
pa_elf_select_rtx_section(machine_mode mode,rtx x,unsigned HOST_WIDE_INT align)10181 pa_elf_select_rtx_section (machine_mode mode, rtx x,
10182 			   unsigned HOST_WIDE_INT align)
10183 {
10184   if (function_label_operand (x, VOIDmode))
10185     {
10186       tree decl = SYMBOL_REF_DECL (x);
10187 
10188       if (!decl || (DECL_P (decl) && DECL_COMDAT_GROUP (decl)))
10189 	return get_named_section (NULL, ".data.rel.ro.local", 1);
10190     }
10191 
10192   return default_elf_select_rtx_section (mode, x, align);
10193 }
10194 
10195 /* Implement pa_reloc_rw_mask.  */
10196 
10197 static int
pa_reloc_rw_mask(void)10198 pa_reloc_rw_mask (void)
10199 {
10200   if (flag_pic || (TARGET_SOM && !TARGET_HPUX_11))
10201     return 3;
10202 
10203   /* HP linker does not support global relocs in readonly memory.  */
10204   return TARGET_SOM ? 2 : 0;
10205 }
10206 
10207 static void
pa_globalize_label(FILE * stream,const char * name)10208 pa_globalize_label (FILE *stream, const char *name)
10209 {
10210   /* We only handle DATA objects here, functions are globalized in
10211      ASM_DECLARE_FUNCTION_NAME.  */
10212   if (! FUNCTION_NAME_P (name))
10213   {
10214     fputs ("\t.EXPORT ", stream);
10215     assemble_name (stream, name);
10216     fputs (",DATA\n", stream);
10217   }
10218 }
10219 
10220 /* Worker function for TARGET_STRUCT_VALUE_RTX.  */
10221 
10222 static rtx
pa_struct_value_rtx(tree fntype ATTRIBUTE_UNUSED,int incoming ATTRIBUTE_UNUSED)10223 pa_struct_value_rtx (tree fntype ATTRIBUTE_UNUSED,
10224 		     int incoming ATTRIBUTE_UNUSED)
10225 {
10226   return gen_rtx_REG (Pmode, PA_STRUCT_VALUE_REGNUM);
10227 }
10228 
10229 /* Worker function for TARGET_RETURN_IN_MEMORY.  */
10230 
10231 bool
pa_return_in_memory(const_tree type,const_tree fntype ATTRIBUTE_UNUSED)10232 pa_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
10233 {
10234   /* SOM ABI says that objects larger than 64 bits are returned in memory.
10235      PA64 ABI says that objects larger than 128 bits are returned in memory.
10236      Note, int_size_in_bytes can return -1 if the size of the object is
10237      variable or larger than the maximum value that can be expressed as
10238      a HOST_WIDE_INT.   It can also return zero for an empty type.  The
10239      simplest way to handle variable and empty types is to pass them in
10240      memory.  This avoids problems in defining the boundaries of argument
10241      slots, allocating registers, etc.  */
10242   return (int_size_in_bytes (type) > (TARGET_64BIT ? 16 : 8)
10243 	  || int_size_in_bytes (type) <= 0);
10244 }
10245 
10246 /* Structure to hold declaration and name of external symbols that are
10247    emitted by GCC.  We generate a vector of these symbols and output them
10248    at the end of the file if and only if SYMBOL_REF_REFERENCED_P is true.
10249    This avoids putting out names that are never really used.  */
10250 
10251 typedef struct GTY(()) extern_symbol
10252 {
10253   tree decl;
10254   const char *name;
10255 } extern_symbol;
10256 
10257 /* Define gc'd vector type for extern_symbol.  */
10258 
10259 /* Vector of extern_symbol pointers.  */
10260 static GTY(()) vec<extern_symbol, va_gc> *extern_symbols;
10261 
10262 #ifdef ASM_OUTPUT_EXTERNAL_REAL
10263 /* Mark DECL (name NAME) as an external reference (assembler output
10264    file FILE).  This saves the names to output at the end of the file
10265    if actually referenced.  */
10266 
10267 void
pa_hpux_asm_output_external(FILE * file,tree decl,const char * name)10268 pa_hpux_asm_output_external (FILE *file, tree decl, const char *name)
10269 {
10270   gcc_assert (file == asm_out_file);
10271   extern_symbol p = {decl, name};
10272   vec_safe_push (extern_symbols, p);
10273 }
10274 #endif
10275 
10276 /* Output text required at the end of an assembler file.
10277    This includes deferred plabels and .import directives for
10278    all external symbols that were actually referenced.  */
10279 
10280 static void
pa_file_end(void)10281 pa_file_end (void)
10282 {
10283 #ifdef ASM_OUTPUT_EXTERNAL_REAL
10284   unsigned int i;
10285   extern_symbol *p;
10286 
10287   if (!NO_DEFERRED_PROFILE_COUNTERS)
10288     output_deferred_profile_counters ();
10289 #endif
10290 
10291   output_deferred_plabels ();
10292 
10293 #ifdef ASM_OUTPUT_EXTERNAL_REAL
10294   for (i = 0; vec_safe_iterate (extern_symbols, i, &p); i++)
10295     {
10296       tree decl = p->decl;
10297 
10298       if (!TREE_ASM_WRITTEN (decl)
10299 	  && SYMBOL_REF_REFERENCED_P (XEXP (DECL_RTL (decl), 0)))
10300 	ASM_OUTPUT_EXTERNAL_REAL (asm_out_file, decl, p->name);
10301     }
10302 
10303   vec_free (extern_symbols);
10304 #endif
10305 
10306   if (NEED_INDICATE_EXEC_STACK)
10307     file_end_indicate_exec_stack ();
10308 }
10309 
10310 /* Implement TARGET_CAN_CHANGE_MODE_CLASS.  */
10311 
10312 static bool
pa_can_change_mode_class(machine_mode from,machine_mode to,reg_class_t rclass)10313 pa_can_change_mode_class (machine_mode from, machine_mode to,
10314 			  reg_class_t rclass)
10315 {
10316   if (from == to)
10317     return true;
10318 
10319   if (GET_MODE_SIZE (from) == GET_MODE_SIZE (to))
10320     return true;
10321 
10322   /* Reject changes to/from modes with zero size.  */
10323   if (!GET_MODE_SIZE (from) || !GET_MODE_SIZE (to))
10324     return false;
10325 
10326   /* Reject changes to/from complex and vector modes.  */
10327   if (COMPLEX_MODE_P (from) || VECTOR_MODE_P (from)
10328       || COMPLEX_MODE_P (to) || VECTOR_MODE_P (to))
10329     return false;
10330 
10331   /* There is no way to load QImode or HImode values directly from memory
10332      to a FP register.  SImode loads to the FP registers are not zero
10333      extended.  On the 64-bit target, this conflicts with the definition
10334      of LOAD_EXTEND_OP.  Thus, we reject all mode changes in the FP registers
10335      except for DImode to SImode on the 64-bit target.  It is handled by
10336      register renaming in pa_print_operand.  */
10337   if (MAYBE_FP_REG_CLASS_P (rclass))
10338     return TARGET_64BIT && from == DImode && to == SImode;
10339 
10340   /* TARGET_HARD_REGNO_MODE_OK places modes with sizes larger than a word
10341      in specific sets of registers.  Thus, we cannot allow changing
10342      to a larger mode when it's larger than a word.  */
10343   if (GET_MODE_SIZE (to) > UNITS_PER_WORD
10344       && GET_MODE_SIZE (to) > GET_MODE_SIZE (from))
10345     return false;
10346 
10347   return true;
10348 }
10349 
10350 /* Implement TARGET_MODES_TIEABLE_P.
10351 
10352    We should return FALSE for QImode and HImode because these modes
10353    are not ok in the floating-point registers.  However, this prevents
10354    tieing these modes to SImode and DImode in the general registers.
10355    So, this isn't a good idea.  We rely on TARGET_HARD_REGNO_MODE_OK and
10356    TARGET_CAN_CHANGE_MODE_CLASS to prevent these modes from being used
10357    in the floating-point registers.  */
10358 
10359 static bool
pa_modes_tieable_p(machine_mode mode1,machine_mode mode2)10360 pa_modes_tieable_p (machine_mode mode1, machine_mode mode2)
10361 {
10362   /* Don't tie modes in different classes.  */
10363   if (GET_MODE_CLASS (mode1) != GET_MODE_CLASS (mode2))
10364     return false;
10365 
10366   return true;
10367 }
10368 
10369 
10370 /* Length in units of the trampoline instruction code.  */
10371 
10372 #define TRAMPOLINE_CODE_SIZE (TARGET_64BIT ? 24 : (TARGET_PA_20 ? 36 : 48))
10373 
10374 
10375 /* Output assembler code for a block containing the constant parts
10376    of a trampoline, leaving space for the variable parts.\
10377 
10378    The trampoline sets the static chain pointer to STATIC_CHAIN_REGNUM
10379    and then branches to the specified routine.
10380 
10381    This code template is copied from text segment to stack location
10382    and then patched with pa_trampoline_init to contain valid values,
10383    and then entered as a subroutine.
10384 
10385    It is best to keep this as small as possible to avoid having to
10386    flush multiple lines in the cache.  */
10387 
10388 static void
pa_asm_trampoline_template(FILE * f)10389 pa_asm_trampoline_template (FILE *f)
10390 {
10391   if (!TARGET_64BIT)
10392     {
10393       if (TARGET_PA_20)
10394 	{
10395 	  fputs ("\tmfia	%r20\n", f);
10396 	  fputs ("\tldw		48(%r20),%r22\n", f);
10397 	  fputs ("\tcopy	%r22,%r21\n", f);
10398 	  fputs ("\tbb,>=,n	%r22,30,.+16\n", f);
10399 	  fputs ("\tdepwi	0,31,2,%r22\n", f);
10400 	  fputs ("\tldw		0(%r22),%r21\n", f);
10401 	  fputs ("\tldw		4(%r22),%r19\n", f);
10402 	  fputs ("\tbve		(%r21)\n", f);
10403 	  fputs ("\tldw		52(%r1),%r29\n", f);
10404 	  fputs ("\t.word	0\n", f);
10405 	  fputs ("\t.word	0\n", f);
10406 	  fputs ("\t.word	0\n", f);
10407 	}
10408       else
10409 	{
10410 	  if (ASSEMBLER_DIALECT == 0)
10411 	    {
10412 	      fputs ("\tbl	.+8,%r20\n", f);
10413 	      fputs ("\tdepi	0,31,2,%r20\n", f);
10414 	    }
10415 	  else
10416 	    {
10417 	      fputs ("\tb,l	.+8,%r20\n", f);
10418 	      fputs ("\tdepwi	0,31,2,%r20\n", f);
10419 	    }
10420 	  fputs ("\tldw		40(%r20),%r22\n", f);
10421 	  fputs ("\tcopy	%r22,%r21\n", f);
10422 	  fputs ("\tbb,>=,n	%r22,30,.+16\n", f);
10423 	  if (ASSEMBLER_DIALECT == 0)
10424 	    fputs ("\tdepi	0,31,2,%r22\n", f);
10425 	  else
10426 	    fputs ("\tdepwi	0,31,2,%r22\n", f);
10427 	  fputs ("\tldw		0(%r22),%r21\n", f);
10428 	  fputs ("\tldw		4(%r22),%r19\n", f);
10429 	  fputs ("\tldsid	(%r21),%r1\n", f);
10430 	  fputs ("\tmtsp	%r1,%sr0\n", f);
10431 	  fputs ("\tbe		0(%sr0,%r21)\n", f);
10432 	  fputs ("\tldw		44(%r20),%r29\n", f);
10433 	}
10434       fputs ("\t.word	0\n", f);
10435       fputs ("\t.word	0\n", f);
10436       fputs ("\t.word	0\n", f);
10437       fputs ("\t.word	0\n", f);
10438     }
10439   else
10440     {
10441       fputs ("\t.dword 0\n", f);
10442       fputs ("\t.dword 0\n", f);
10443       fputs ("\t.dword 0\n", f);
10444       fputs ("\t.dword 0\n", f);
10445       fputs ("\tmfia	%r31\n", f);
10446       fputs ("\tldd	24(%r31),%r27\n", f);
10447       fputs ("\tldd	32(%r31),%r31\n", f);
10448       fputs ("\tldd	16(%r27),%r1\n", f);
10449       fputs ("\tbve	(%r1)\n", f);
10450       fputs ("\tldd	24(%r27),%r27\n", f);
10451       fputs ("\t.dword 0  ; fptr\n", f);
10452       fputs ("\t.dword 0  ; static link\n", f);
10453     }
10454 }
10455 
10456 /* Emit RTL insns to initialize the variable parts of a trampoline.
10457    FNADDR is an RTX for the address of the function's pure code.
10458    CXT is an RTX for the static chain value for the function.
10459 
10460    Move the function address to the trampoline template at offset 48.
10461    Move the static chain value to trampoline template at offset 52.
10462    Move the trampoline address to trampoline template at offset 56.
10463    Move r19 to trampoline template at offset 60.  The latter two
10464    words create a plabel for the indirect call to the trampoline.
10465 
10466    A similar sequence is used for the 64-bit port but the plabel is
10467    at the beginning of the trampoline.
10468 
10469    Finally, the cache entries for the trampoline code are flushed.
10470    This is necessary to ensure that the trampoline instruction sequence
10471    is written to memory prior to any attempts at prefetching the code
10472    sequence.  */
10473 
10474 static void
pa_trampoline_init(rtx m_tramp,tree fndecl,rtx chain_value)10475 pa_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
10476 {
10477   rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
10478   rtx start_addr = gen_reg_rtx (Pmode);
10479   rtx end_addr = gen_reg_rtx (Pmode);
10480   rtx line_length = gen_reg_rtx (Pmode);
10481   rtx r_tramp, tmp;
10482 
10483   emit_block_move (m_tramp, assemble_trampoline_template (),
10484 		   GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
10485   r_tramp = force_reg (Pmode, XEXP (m_tramp, 0));
10486 
10487   if (!TARGET_64BIT)
10488     {
10489       tmp = adjust_address (m_tramp, Pmode, 48);
10490       emit_move_insn (tmp, fnaddr);
10491       tmp = adjust_address (m_tramp, Pmode, 52);
10492       emit_move_insn (tmp, chain_value);
10493 
10494       /* Create a fat pointer for the trampoline.  */
10495       tmp = adjust_address (m_tramp, Pmode, 56);
10496       emit_move_insn (tmp, r_tramp);
10497       tmp = adjust_address (m_tramp, Pmode, 60);
10498       emit_move_insn (tmp, gen_rtx_REG (Pmode, 19));
10499 
10500       /* fdc and fic only use registers for the address to flush,
10501 	 they do not accept integer displacements.  We align the
10502 	 start and end addresses to the beginning of their respective
10503 	 cache lines to minimize the number of lines flushed.  */
10504       emit_insn (gen_andsi3 (start_addr, r_tramp,
10505 			     GEN_INT (-MIN_CACHELINE_SIZE)));
10506       tmp = force_reg (Pmode, plus_constant (Pmode, r_tramp,
10507 					     TRAMPOLINE_CODE_SIZE-1));
10508       emit_insn (gen_andsi3 (end_addr, tmp,
10509 			     GEN_INT (-MIN_CACHELINE_SIZE)));
10510       emit_move_insn (line_length, GEN_INT (MIN_CACHELINE_SIZE));
10511       emit_insn (gen_dcacheflushsi (start_addr, end_addr, line_length));
10512       emit_insn (gen_icacheflushsi (start_addr, end_addr, line_length,
10513 				    gen_reg_rtx (Pmode),
10514 				    gen_reg_rtx (Pmode)));
10515     }
10516   else
10517     {
10518       tmp = adjust_address (m_tramp, Pmode, 56);
10519       emit_move_insn (tmp, fnaddr);
10520       tmp = adjust_address (m_tramp, Pmode, 64);
10521       emit_move_insn (tmp, chain_value);
10522 
10523       /* Create a fat pointer for the trampoline.  */
10524       tmp = adjust_address (m_tramp, Pmode, 16);
10525       emit_move_insn (tmp, force_reg (Pmode, plus_constant (Pmode,
10526 							    r_tramp, 32)));
10527       tmp = adjust_address (m_tramp, Pmode, 24);
10528       emit_move_insn (tmp, gen_rtx_REG (Pmode, 27));
10529 
10530       /* fdc and fic only use registers for the address to flush,
10531 	 they do not accept integer displacements.  We align the
10532 	 start and end addresses to the beginning of their respective
10533 	 cache lines to minimize the number of lines flushed.  */
10534       tmp = force_reg (Pmode, plus_constant (Pmode, r_tramp, 32));
10535       emit_insn (gen_anddi3 (start_addr, tmp,
10536 			     GEN_INT (-MIN_CACHELINE_SIZE)));
10537       tmp = force_reg (Pmode, plus_constant (Pmode, tmp,
10538 					     TRAMPOLINE_CODE_SIZE - 1));
10539       emit_insn (gen_anddi3 (end_addr, tmp,
10540 			     GEN_INT (-MIN_CACHELINE_SIZE)));
10541       emit_move_insn (line_length, GEN_INT (MIN_CACHELINE_SIZE));
10542       emit_insn (gen_dcacheflushdi (start_addr, end_addr, line_length));
10543       emit_insn (gen_icacheflushdi (start_addr, end_addr, line_length,
10544 				    gen_reg_rtx (Pmode),
10545 				    gen_reg_rtx (Pmode)));
10546     }
10547 
10548 #ifdef HAVE_ENABLE_EXECUTE_STACK
10549   emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
10550 		     LCT_NORMAL, VOIDmode, XEXP (m_tramp, 0), Pmode);
10551 #endif
10552 }
10553 
10554 /* Perform any machine-specific adjustment in the address of the trampoline.
10555    ADDR contains the address that was passed to pa_trampoline_init.
10556    Adjust the trampoline address to point to the plabel at offset 56.  */
10557 
10558 static rtx
pa_trampoline_adjust_address(rtx addr)10559 pa_trampoline_adjust_address (rtx addr)
10560 {
10561   if (!TARGET_64BIT)
10562     addr = memory_address (Pmode, plus_constant (Pmode, addr, 58));
10563   return addr;
10564 }
10565 
10566 static rtx
pa_delegitimize_address(rtx orig_x)10567 pa_delegitimize_address (rtx orig_x)
10568 {
10569   rtx x = delegitimize_mem_from_attrs (orig_x);
10570 
10571   if (GET_CODE (x) == LO_SUM
10572       && GET_CODE (XEXP (x, 1)) == UNSPEC
10573       && XINT (XEXP (x, 1), 1) == UNSPEC_DLTIND14R)
10574     return gen_const_mem (Pmode, XVECEXP (XEXP (x, 1), 0, 0));
10575   return x;
10576 }
10577 
10578 static rtx
pa_internal_arg_pointer(void)10579 pa_internal_arg_pointer (void)
10580 {
10581   /* The argument pointer and the hard frame pointer are the same in
10582      the 32-bit runtime, so we don't need a copy.  */
10583   if (TARGET_64BIT)
10584     return copy_to_reg (virtual_incoming_args_rtx);
10585   else
10586     return virtual_incoming_args_rtx;
10587 }
10588 
10589 /* Given FROM and TO register numbers, say whether this elimination is allowed.
10590    Frame pointer elimination is automatically handled.  */
10591 
10592 static bool
pa_can_eliminate(const int from,const int to)10593 pa_can_eliminate (const int from, const int to)
10594 {
10595   /* The argument cannot be eliminated in the 64-bit runtime.  */
10596   if (TARGET_64BIT && from == ARG_POINTER_REGNUM)
10597     return false;
10598 
10599   return (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
10600           ? ! frame_pointer_needed
10601           : true);
10602 }
10603 
10604 /* Define the offset between two registers, FROM to be eliminated and its
10605    replacement TO, at the start of a routine.  */
10606 HOST_WIDE_INT
pa_initial_elimination_offset(int from,int to)10607 pa_initial_elimination_offset (int from, int to)
10608 {
10609   HOST_WIDE_INT offset;
10610 
10611   if ((from == HARD_FRAME_POINTER_REGNUM || from == FRAME_POINTER_REGNUM)
10612       && to == STACK_POINTER_REGNUM)
10613     offset = -pa_compute_frame_size (get_frame_size (), 0);
10614   else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
10615     offset = 0;
10616   else
10617     gcc_unreachable ();
10618 
10619   return offset;
10620 }
10621 
10622 static void
pa_conditional_register_usage(void)10623 pa_conditional_register_usage (void)
10624 {
10625   int i;
10626 
10627   if (!TARGET_64BIT && !TARGET_PA_11)
10628     {
10629       for (i = 56; i <= FP_REG_LAST; i++)
10630 	fixed_regs[i] = call_used_regs[i] = 1;
10631       for (i = 33; i < 56; i += 2)
10632 	fixed_regs[i] = call_used_regs[i] = 1;
10633     }
10634   if (TARGET_SOFT_FLOAT)
10635     {
10636       for (i = FP_REG_FIRST; i <= FP_REG_LAST; i++)
10637 	fixed_regs[i] = call_used_regs[i] = 1;
10638     }
10639   if (flag_pic)
10640     fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
10641 }
10642 
10643 /* Target hook for c_mode_for_suffix.  */
10644 
10645 static machine_mode
pa_c_mode_for_suffix(char suffix)10646 pa_c_mode_for_suffix (char suffix)
10647 {
10648   if (HPUX_LONG_DOUBLE_LIBRARY)
10649     {
10650       if (suffix == 'q')
10651 	return TFmode;
10652     }
10653 
10654   return VOIDmode;
10655 }
10656 
10657 /* Target hook for function_section.  */
10658 
10659 static section *
pa_function_section(tree decl,enum node_frequency freq,bool startup,bool exit)10660 pa_function_section (tree decl, enum node_frequency freq,
10661 		     bool startup, bool exit)
10662 {
10663   /* Put functions in text section if target doesn't have named sections.  */
10664   if (!targetm_common.have_named_sections)
10665     return text_section;
10666 
10667   /* Force nested functions into the same section as the containing
10668      function.  */
10669   if (decl
10670       && DECL_SECTION_NAME (decl) == NULL
10671       && DECL_CONTEXT (decl) != NULL_TREE
10672       && TREE_CODE (DECL_CONTEXT (decl)) == FUNCTION_DECL
10673       && DECL_SECTION_NAME (DECL_CONTEXT (decl)) == NULL)
10674     return function_section (DECL_CONTEXT (decl));
10675 
10676   /* Otherwise, use the default function section.  */
10677   return default_function_section (decl, freq, startup, exit);
10678 }
10679 
10680 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
10681 
10682    In 64-bit mode, we reject CONST_DOUBLES.  We also reject CONST_INTS
10683    that need more than three instructions to load prior to reload.  This
10684    limit is somewhat arbitrary.  It takes three instructions to load a
10685    CONST_INT from memory but two are memory accesses.  It may be better
10686    to increase the allowed range for CONST_INTS.  We may also be able
10687    to handle CONST_DOUBLES.  */
10688 
10689 static bool
pa_legitimate_constant_p(machine_mode mode,rtx x)10690 pa_legitimate_constant_p (machine_mode mode, rtx x)
10691 {
10692   if (GET_MODE_CLASS (mode) == MODE_FLOAT && x != CONST0_RTX (mode))
10693     return false;
10694 
10695   if (!NEW_HP_ASSEMBLER && !TARGET_GAS && GET_CODE (x) == LABEL_REF)
10696     return false;
10697 
10698   /* TLS_MODEL_GLOBAL_DYNAMIC and TLS_MODEL_LOCAL_DYNAMIC are not
10699      legitimate constants.  The other variants can't be handled by
10700      the move patterns after reload starts.  */
10701   if (tls_referenced_p (x))
10702     return false;
10703 
10704   if (TARGET_64BIT && GET_CODE (x) == CONST_DOUBLE)
10705     return false;
10706 
10707   if (TARGET_64BIT
10708       && HOST_BITS_PER_WIDE_INT > 32
10709       && GET_CODE (x) == CONST_INT
10710       && !reload_in_progress
10711       && !reload_completed
10712       && !LEGITIMATE_64BIT_CONST_INT_P (INTVAL (x))
10713       && !pa_cint_ok_for_move (UINTVAL (x)))
10714     return false;
10715 
10716   if (function_label_operand (x, mode))
10717     return false;
10718 
10719   return true;
10720 }
10721 
10722 /* Implement TARGET_SECTION_TYPE_FLAGS.  */
10723 
10724 static unsigned int
pa_section_type_flags(tree decl,const char * name,int reloc)10725 pa_section_type_flags (tree decl, const char *name, int reloc)
10726 {
10727   unsigned int flags;
10728 
10729   flags = default_section_type_flags (decl, name, reloc);
10730 
10731   /* Function labels are placed in the constant pool.  This can
10732      cause a section conflict if decls are put in ".data.rel.ro"
10733      or ".data.rel.ro.local" using the __attribute__ construct.  */
10734   if (strcmp (name, ".data.rel.ro") == 0
10735       || strcmp (name, ".data.rel.ro.local") == 0)
10736     flags |= SECTION_WRITE | SECTION_RELRO;
10737 
10738   return flags;
10739 }
10740 
10741 /* pa_legitimate_address_p recognizes an RTL expression that is a
10742    valid memory address for an instruction.  The MODE argument is the
10743    machine mode for the MEM expression that wants to use this address.
10744 
10745    On HP PA-RISC, the legitimate address forms are REG+SMALLINT,
10746    REG+REG, and REG+(REG*SCALE).  The indexed address forms are only
10747    available with floating point loads and stores, and integer loads.
10748    We get better code by allowing indexed addresses in the initial
10749    RTL generation.
10750 
10751    The acceptance of indexed addresses as legitimate implies that we
10752    must provide patterns for doing indexed integer stores, or the move
10753    expanders must force the address of an indexed store to a register.
10754    We have adopted the latter approach.
10755 
10756    Another function of pa_legitimate_address_p is to ensure that
10757    the base register is a valid pointer for indexed instructions.
10758    On targets that have non-equivalent space registers, we have to
10759    know at the time of assembler output which register in a REG+REG
10760    pair is the base register.  The REG_POINTER flag is sometimes lost
10761    in reload and the following passes, so it can't be relied on during
10762    code generation.  Thus, we either have to canonicalize the order
10763    of the registers in REG+REG indexed addresses, or treat REG+REG
10764    addresses separately and provide patterns for both permutations.
10765 
10766    The latter approach requires several hundred additional lines of
10767    code in pa.md.  The downside to canonicalizing is that a PLUS
10768    in the wrong order can't combine to form to make a scaled indexed
10769    memory operand.  As we won't need to canonicalize the operands if
10770    the REG_POINTER lossage can be fixed, it seems better canonicalize.
10771 
10772    We initially break out scaled indexed addresses in canonical order
10773    in pa_emit_move_sequence.  LEGITIMIZE_ADDRESS also canonicalizes
10774    scaled indexed addresses during RTL generation.  However, fold_rtx
10775    has its own opinion on how the operands of a PLUS should be ordered.
10776    If one of the operands is equivalent to a constant, it will make
10777    that operand the second operand.  As the base register is likely to
10778    be equivalent to a SYMBOL_REF, we have made it the second operand.
10779 
10780    pa_legitimate_address_p accepts REG+REG as legitimate when the
10781    operands are in the order INDEX+BASE on targets with non-equivalent
10782    space registers, and in any order on targets with equivalent space
10783    registers.  It accepts both MULT+BASE and BASE+MULT for scaled indexing.
10784 
10785    We treat a SYMBOL_REF as legitimate if it is part of the current
10786    function's constant-pool, because such addresses can actually be
10787    output as REG+SMALLINT.  */
10788 
10789 static bool
pa_legitimate_address_p(machine_mode mode,rtx x,bool strict)10790 pa_legitimate_address_p (machine_mode mode, rtx x, bool strict)
10791 {
10792   if ((REG_P (x)
10793        && (strict ? STRICT_REG_OK_FOR_BASE_P (x)
10794 		  : REG_OK_FOR_BASE_P (x)))
10795       || ((GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC
10796 	   || GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC)
10797 	  && REG_P (XEXP (x, 0))
10798 	  && (strict ? STRICT_REG_OK_FOR_BASE_P (XEXP (x, 0))
10799 		     : REG_OK_FOR_BASE_P (XEXP (x, 0)))))
10800     return true;
10801 
10802   if (GET_CODE (x) == PLUS)
10803     {
10804       rtx base, index;
10805 
10806       /* For REG+REG, the base register should be in XEXP (x, 1),
10807 	 so check it first.  */
10808       if (REG_P (XEXP (x, 1))
10809 	  && (strict ? STRICT_REG_OK_FOR_BASE_P (XEXP (x, 1))
10810 		     : REG_OK_FOR_BASE_P (XEXP (x, 1))))
10811 	base = XEXP (x, 1), index = XEXP (x, 0);
10812       else if (REG_P (XEXP (x, 0))
10813 	       && (strict ? STRICT_REG_OK_FOR_BASE_P (XEXP (x, 0))
10814 			  : REG_OK_FOR_BASE_P (XEXP (x, 0))))
10815 	base = XEXP (x, 0), index = XEXP (x, 1);
10816       else
10817 	return false;
10818 
10819       if (GET_CODE (index) == CONST_INT)
10820 	{
10821 	  if (INT_5_BITS (index))
10822 	    return true;
10823 
10824 	  /* When INT14_OK_STRICT is false, a secondary reload is needed
10825 	     to adjust the displacement of SImode and DImode floating point
10826 	     instructions but this may fail when the register also needs
10827 	     reloading.  So, we return false when STRICT is true.  We
10828 	     also reject long displacements for float mode addresses since
10829 	     the majority of accesses will use floating point instructions
10830 	     that don't support 14-bit offsets.  */
10831 	  if (!INT14_OK_STRICT
10832 	      && (strict || !(reload_in_progress || reload_completed))
10833 	      && mode != QImode
10834 	      && mode != HImode)
10835 	    return false;
10836 
10837 	  return base14_operand (index, mode);
10838 	}
10839 
10840       if (!TARGET_DISABLE_INDEXING
10841 	  /* Only accept the "canonical" INDEX+BASE operand order
10842 	     on targets with non-equivalent space registers.  */
10843 	  && (TARGET_NO_SPACE_REGS
10844 	      ? REG_P (index)
10845 	      : (base == XEXP (x, 1) && REG_P (index)
10846 		 && (reload_completed
10847 		     || (reload_in_progress && HARD_REGISTER_P (base))
10848 		     || REG_POINTER (base))
10849 		 && (reload_completed
10850 		     || (reload_in_progress && HARD_REGISTER_P (index))
10851 		     || !REG_POINTER (index))))
10852 	  && MODE_OK_FOR_UNSCALED_INDEXING_P (mode)
10853 	  && (strict ? STRICT_REG_OK_FOR_INDEX_P (index)
10854 		     : REG_OK_FOR_INDEX_P (index))
10855 	  && borx_reg_operand (base, Pmode)
10856 	  && borx_reg_operand (index, Pmode))
10857 	return true;
10858 
10859       if (!TARGET_DISABLE_INDEXING
10860 	  && GET_CODE (index) == MULT
10861 	  /* Only accept base operands with the REG_POINTER flag prior to
10862 	     reload on targets with non-equivalent space registers.  */
10863 	  && (TARGET_NO_SPACE_REGS
10864 	      || (base == XEXP (x, 1)
10865 		  && (reload_completed
10866 		      || (reload_in_progress && HARD_REGISTER_P (base))
10867 		      || REG_POINTER (base))))
10868 	  && REG_P (XEXP (index, 0))
10869 	  && GET_MODE (XEXP (index, 0)) == Pmode
10870 	  && MODE_OK_FOR_SCALED_INDEXING_P (mode)
10871 	  && (strict ? STRICT_REG_OK_FOR_INDEX_P (XEXP (index, 0))
10872 		     : REG_OK_FOR_INDEX_P (XEXP (index, 0)))
10873 	  && GET_CODE (XEXP (index, 1)) == CONST_INT
10874 	  && INTVAL (XEXP (index, 1))
10875 	     == (HOST_WIDE_INT) GET_MODE_SIZE (mode)
10876 	  && borx_reg_operand (base, Pmode))
10877 	return true;
10878 
10879       return false;
10880     }
10881 
10882   if (GET_CODE (x) == LO_SUM)
10883     {
10884       rtx y = XEXP (x, 0);
10885 
10886       if (GET_CODE (y) == SUBREG)
10887 	y = SUBREG_REG (y);
10888 
10889       if (REG_P (y)
10890 	  && (strict ? STRICT_REG_OK_FOR_BASE_P (y)
10891 		     : REG_OK_FOR_BASE_P (y)))
10892 	{
10893 	  /* Needed for -fPIC */
10894 	  if (mode == Pmode
10895 	      && GET_CODE (XEXP (x, 1)) == UNSPEC)
10896 	    return true;
10897 
10898 	  if (!INT14_OK_STRICT
10899 	      && (strict || !(reload_in_progress || reload_completed))
10900 	      && mode != QImode
10901 	      && mode != HImode)
10902 	    return false;
10903 
10904 	  if (CONSTANT_P (XEXP (x, 1)))
10905 	    return true;
10906 	}
10907       return false;
10908     }
10909 
10910   if (GET_CODE (x) == CONST_INT && INT_5_BITS (x))
10911     return true;
10912 
10913   return false;
10914 }
10915 
10916 /* Look for machine dependent ways to make the invalid address AD a
10917    valid address.
10918 
10919    For the PA, transform:
10920 
10921         memory(X + <large int>)
10922 
10923    into:
10924 
10925         if (<large int> & mask) >= 16
10926           Y = (<large int> & ~mask) + mask + 1  Round up.
10927         else
10928           Y = (<large int> & ~mask)             Round down.
10929         Z = X + Y
10930         memory (Z + (<large int> - Y));
10931 
10932    This makes reload inheritance and reload_cse work better since Z
10933    can be reused.
10934 
10935    There may be more opportunities to improve code with this hook.  */
10936 
10937 rtx
pa_legitimize_reload_address(rtx ad,machine_mode mode,int opnum,int type,int ind_levels ATTRIBUTE_UNUSED)10938 pa_legitimize_reload_address (rtx ad, machine_mode mode,
10939 			      int opnum, int type,
10940 			      int ind_levels ATTRIBUTE_UNUSED)
10941 {
10942   long offset, newoffset, mask;
10943   rtx new_rtx, temp = NULL_RTX;
10944 
10945   mask = (GET_MODE_CLASS (mode) == MODE_FLOAT
10946 	  && !INT14_OK_STRICT ? 0x1f : 0x3fff);
10947 
10948   if (optimize && GET_CODE (ad) == PLUS)
10949     temp = simplify_binary_operation (PLUS, Pmode,
10950 				      XEXP (ad, 0), XEXP (ad, 1));
10951 
10952   new_rtx = temp ? temp : ad;
10953 
10954   if (optimize
10955       && GET_CODE (new_rtx) == PLUS
10956       && GET_CODE (XEXP (new_rtx, 0)) == REG
10957       && GET_CODE (XEXP (new_rtx, 1)) == CONST_INT)
10958     {
10959       offset = INTVAL (XEXP ((new_rtx), 1));
10960 
10961       /* Choose rounding direction.  Round up if we are >= halfway.  */
10962       if ((offset & mask) >= ((mask + 1) / 2))
10963 	newoffset = (offset & ~mask) + mask + 1;
10964       else
10965 	newoffset = offset & ~mask;
10966 
10967       /* Ensure that long displacements are aligned.  */
10968       if (mask == 0x3fff
10969 	  && (GET_MODE_CLASS (mode) == MODE_FLOAT
10970 	      || (TARGET_64BIT && (mode) == DImode)))
10971 	newoffset &= ~(GET_MODE_SIZE (mode) - 1);
10972 
10973       if (newoffset != 0 && VAL_14_BITS_P (newoffset))
10974 	{
10975 	  temp = gen_rtx_PLUS (Pmode, XEXP (new_rtx, 0),
10976 			       GEN_INT (newoffset));
10977 	  ad = gen_rtx_PLUS (Pmode, temp, GEN_INT (offset - newoffset));
10978 	  push_reload (XEXP (ad, 0), 0, &XEXP (ad, 0), 0,
10979 		       BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
10980 		       opnum, (enum reload_type) type);
10981 	  return ad;
10982 	}
10983     }
10984 
10985   return NULL_RTX;
10986 }
10987 
10988 /* Output address vector.  */
10989 
10990 void
pa_output_addr_vec(rtx lab,rtx body)10991 pa_output_addr_vec (rtx lab, rtx body)
10992 {
10993   int idx, vlen = XVECLEN (body, 0);
10994 
10995   if (!TARGET_SOM)
10996     fputs ("\t.align 4\n", asm_out_file);
10997   targetm.asm_out.internal_label (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
10998   if (TARGET_GAS)
10999     fputs ("\t.begin_brtab\n", asm_out_file);
11000   for (idx = 0; idx < vlen; idx++)
11001     {
11002       ASM_OUTPUT_ADDR_VEC_ELT
11003 	(asm_out_file, CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 0, idx), 0)));
11004     }
11005   if (TARGET_GAS)
11006     fputs ("\t.end_brtab\n", asm_out_file);
11007 }
11008 
11009 /* Output address difference vector.  */
11010 
11011 void
pa_output_addr_diff_vec(rtx lab,rtx body)11012 pa_output_addr_diff_vec (rtx lab, rtx body)
11013 {
11014   rtx base = XEXP (XEXP (body, 0), 0);
11015   int idx, vlen = XVECLEN (body, 1);
11016 
11017   targetm.asm_out.internal_label (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
11018   if (TARGET_GAS)
11019     fputs ("\t.begin_brtab\n", asm_out_file);
11020   for (idx = 0; idx < vlen; idx++)
11021     {
11022       ASM_OUTPUT_ADDR_DIFF_ELT
11023 	(asm_out_file,
11024 	 body,
11025 	 CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 1, idx), 0)),
11026 	 CODE_LABEL_NUMBER (base));
11027     }
11028   if (TARGET_GAS)
11029     fputs ("\t.end_brtab\n", asm_out_file);
11030 }
11031 
11032 /* Implement TARGET_CALLEE_COPIES.  The callee is responsible for copying
11033    arguments passed by hidden reference in the 32-bit HP runtime.  Users
11034    can override this behavior for better compatibility with openmp at the
11035    risk of library incompatibilities.  Arguments are always passed by value
11036    in the 64-bit HP runtime.  */
11037 
11038 static bool
pa_callee_copies(cumulative_args_t,const function_arg_info &)11039 pa_callee_copies (cumulative_args_t, const function_arg_info &)
11040 {
11041   return !TARGET_CALLER_COPIES;
11042 }
11043 
11044 /* Implement TARGET_HARD_REGNO_NREGS.  */
11045 
11046 static unsigned int
pa_hard_regno_nregs(unsigned int regno ATTRIBUTE_UNUSED,machine_mode mode)11047 pa_hard_regno_nregs (unsigned int regno ATTRIBUTE_UNUSED, machine_mode mode)
11048 {
11049   return PA_HARD_REGNO_NREGS (regno, mode);
11050 }
11051 
11052 /* Implement TARGET_HARD_REGNO_MODE_OK.  */
11053 
11054 static bool
pa_hard_regno_mode_ok(unsigned int regno,machine_mode mode)11055 pa_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
11056 {
11057   return PA_HARD_REGNO_MODE_OK (regno, mode);
11058 }
11059 
11060 /* Implement TARGET_STARTING_FRAME_OFFSET.
11061 
11062    On the 32-bit ports, we reserve one slot for the previous frame
11063    pointer and one fill slot.  The fill slot is for compatibility
11064    with HP compiled programs.  On the 64-bit ports, we reserve one
11065    slot for the previous frame pointer.  */
11066 
11067 static HOST_WIDE_INT
pa_starting_frame_offset(void)11068 pa_starting_frame_offset (void)
11069 {
11070   return 8;
11071 }
11072 
11073 /* Figure out the size in words of the function argument.  */
11074 
11075 int
pa_function_arg_size(machine_mode mode,const_tree type)11076 pa_function_arg_size (machine_mode mode, const_tree type)
11077 {
11078   HOST_WIDE_INT size;
11079 
11080   size = mode != BLKmode ? GET_MODE_SIZE (mode) : int_size_in_bytes (type);
11081 
11082   /* The 64-bit runtime does not restrict the size of stack frames,
11083      but the gcc calling conventions limit argument sizes to 1G.  Our
11084      prologue/epilogue code limits frame sizes to just under 32 bits.
11085      1G is also the maximum frame size that can be handled by the HPUX
11086      unwind descriptor.  Since very large TYPE_SIZE_UNIT values can
11087      occur for (parallel:BLK []), we need to ignore large arguments
11088      passed by value.  */
11089   if (size >= (1 << (HOST_BITS_PER_INT - 2)))
11090     size = 0;
11091   return (int) CEIL (size, UNITS_PER_WORD);
11092 }
11093 
11094 #include "gt-pa.h"
11095