1 /* Subroutines for insn-output.c for HPPA.
2    Copyright (C) 1992-2021 Free Software Foundation, Inc.
3    Contributed by Tim Moore (moore@cs.utah.edu), based on sparc.c
4 
5 This file is part of GCC.
6 
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11 
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 GNU General Public License for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3.  If not see
19 <http://www.gnu.org/licenses/>.  */
20 
21 #define IN_TARGET_CODE 1
22 
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "memmodel.h"
27 #include "backend.h"
28 #include "target.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "df.h"
32 #include "tm_p.h"
33 #include "stringpool.h"
34 #include "attribs.h"
35 #include "optabs.h"
36 #include "regs.h"
37 #include "emit-rtl.h"
38 #include "recog.h"
39 #include "diagnostic-core.h"
40 #include "insn-attr.h"
41 #include "alias.h"
42 #include "fold-const.h"
43 #include "stor-layout.h"
44 #include "varasm.h"
45 #include "calls.h"
46 #include "output.h"
47 #include "except.h"
48 #include "explow.h"
49 #include "expr.h"
50 #include "reload.h"
51 #include "common/common-target.h"
52 #include "langhooks.h"
53 #include "cfgrtl.h"
54 #include "opts.h"
55 #include "builtins.h"
56 
57 /* This file should be included last.  */
58 #include "target-def.h"
59 
60 /* Return nonzero if there is a bypass for the output of
61    OUT_INSN and the fp store IN_INSN.  */
62 int
pa_fpstore_bypass_p(rtx_insn * out_insn,rtx_insn * in_insn)63 pa_fpstore_bypass_p (rtx_insn *out_insn, rtx_insn *in_insn)
64 {
65   machine_mode store_mode;
66   machine_mode other_mode;
67   rtx set;
68 
69   if (recog_memoized (in_insn) < 0
70       || (get_attr_type (in_insn) != TYPE_FPSTORE
71 	  && get_attr_type (in_insn) != TYPE_FPSTORE_LOAD)
72       || recog_memoized (out_insn) < 0)
73     return 0;
74 
75   store_mode = GET_MODE (SET_SRC (PATTERN (in_insn)));
76 
77   set = single_set (out_insn);
78   if (!set)
79     return 0;
80 
81   other_mode = GET_MODE (SET_SRC (set));
82 
83   return (GET_MODE_SIZE (store_mode) == GET_MODE_SIZE (other_mode));
84 }
85 
86 
87 #ifndef DO_FRAME_NOTES
88 #ifdef INCOMING_RETURN_ADDR_RTX
89 #define DO_FRAME_NOTES 1
90 #else
91 #define DO_FRAME_NOTES 0
92 #endif
93 #endif
94 
95 static void pa_option_override (void);
96 static void copy_reg_pointer (rtx, rtx);
97 static void fix_range (const char *);
98 static int hppa_register_move_cost (machine_mode mode, reg_class_t,
99 				    reg_class_t);
100 static int hppa_address_cost (rtx, machine_mode mode, addr_space_t, bool);
101 static bool hppa_rtx_costs (rtx, machine_mode, int, int, int *, bool);
102 static inline rtx force_mode (machine_mode, rtx);
103 static void pa_reorg (void);
104 static void pa_combine_instructions (void);
105 static int pa_can_combine_p (rtx_insn *, rtx_insn *, rtx_insn *, int, rtx,
106 			     rtx, rtx);
107 static bool forward_branch_p (rtx_insn *);
108 static void compute_zdepwi_operands (unsigned HOST_WIDE_INT, unsigned *);
109 static void compute_zdepdi_operands (unsigned HOST_WIDE_INT, unsigned *);
110 static int compute_cpymem_length (rtx_insn *);
111 static int compute_clrmem_length (rtx_insn *);
112 static bool pa_assemble_integer (rtx, unsigned int, int);
113 static void remove_useless_addtr_insns (int);
114 static void store_reg (int, HOST_WIDE_INT, int);
115 static void store_reg_modify (int, int, HOST_WIDE_INT);
116 static void load_reg (int, HOST_WIDE_INT, int);
117 static void set_reg_plus_d (int, int, HOST_WIDE_INT, int);
118 static rtx pa_function_value (const_tree, const_tree, bool);
119 static rtx pa_libcall_value (machine_mode, const_rtx);
120 static bool pa_function_value_regno_p (const unsigned int);
121 static void pa_output_function_prologue (FILE *) ATTRIBUTE_UNUSED;
122 static void pa_linux_output_function_prologue (FILE *) ATTRIBUTE_UNUSED;
123 static void update_total_code_bytes (unsigned int);
124 static void pa_output_function_epilogue (FILE *);
125 static int pa_adjust_cost (rtx_insn *, int, rtx_insn *, int, unsigned int);
126 static int pa_issue_rate (void);
127 static int pa_reloc_rw_mask (void);
128 static void pa_som_asm_init_sections (void) ATTRIBUTE_UNUSED;
129 static section *pa_som_tm_clone_table_section (void) ATTRIBUTE_UNUSED;
130 static section *pa_select_section (tree, int, unsigned HOST_WIDE_INT)
131      ATTRIBUTE_UNUSED;
132 static void pa_encode_section_info (tree, rtx, int);
133 static const char *pa_strip_name_encoding (const char *);
134 static bool pa_function_ok_for_sibcall (tree, tree);
135 static void pa_globalize_label (FILE *, const char *)
136      ATTRIBUTE_UNUSED;
137 static void pa_asm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
138 				    HOST_WIDE_INT, tree);
139 #if !defined(USE_COLLECT2)
140 static void pa_asm_out_constructor (rtx, int);
141 static void pa_asm_out_destructor (rtx, int);
142 #endif
143 static void pa_init_builtins (void);
144 static rtx pa_expand_builtin (tree, rtx, rtx, machine_mode mode, int);
145 static rtx hppa_builtin_saveregs (void);
146 static void hppa_va_start (tree, rtx);
147 static tree hppa_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
148 static bool pa_scalar_mode_supported_p (scalar_mode);
149 static bool pa_commutative_p (const_rtx x, int outer_code);
150 static void copy_fp_args (rtx_insn *) ATTRIBUTE_UNUSED;
151 static int length_fp_args (rtx_insn *) ATTRIBUTE_UNUSED;
152 static rtx hppa_legitimize_address (rtx, rtx, machine_mode);
153 static inline void pa_file_start_level (void) ATTRIBUTE_UNUSED;
154 static inline void pa_file_start_space (int) ATTRIBUTE_UNUSED;
155 static inline void pa_file_start_file (int) ATTRIBUTE_UNUSED;
156 static inline void pa_file_start_mcount (const char*) ATTRIBUTE_UNUSED;
157 static void pa_elf_file_start (void) ATTRIBUTE_UNUSED;
158 static void pa_som_file_start (void) ATTRIBUTE_UNUSED;
159 static void pa_linux_file_start (void) ATTRIBUTE_UNUSED;
160 static void pa_hpux64_gas_file_start (void) ATTRIBUTE_UNUSED;
161 static void pa_hpux64_hpas_file_start (void) ATTRIBUTE_UNUSED;
162 static void output_deferred_plabels (void);
163 static void output_deferred_profile_counters (void) ATTRIBUTE_UNUSED;
164 static void pa_file_end (void);
165 static void pa_init_libfuncs (void);
166 static rtx pa_struct_value_rtx (tree, int);
167 static bool pa_pass_by_reference (cumulative_args_t,
168 				  const function_arg_info &);
169 static int pa_arg_partial_bytes (cumulative_args_t, const function_arg_info &);
170 static void pa_function_arg_advance (cumulative_args_t,
171 				     const function_arg_info &);
172 static rtx pa_function_arg (cumulative_args_t, const function_arg_info &);
173 static pad_direction pa_function_arg_padding (machine_mode, const_tree);
174 static unsigned int pa_function_arg_boundary (machine_mode, const_tree);
175 static struct machine_function * pa_init_machine_status (void);
176 static reg_class_t pa_secondary_reload (bool, rtx, reg_class_t,
177 					machine_mode,
178 					secondary_reload_info *);
179 static bool pa_secondary_memory_needed (machine_mode,
180 					reg_class_t, reg_class_t);
181 static void pa_extra_live_on_entry (bitmap);
182 static machine_mode pa_promote_function_mode (const_tree,
183 						   machine_mode, int *,
184 						   const_tree, int);
185 
186 static void pa_asm_trampoline_template (FILE *);
187 static void pa_trampoline_init (rtx, tree, rtx);
188 static rtx pa_trampoline_adjust_address (rtx);
189 static rtx pa_delegitimize_address (rtx);
190 static bool pa_print_operand_punct_valid_p (unsigned char);
191 static rtx pa_internal_arg_pointer (void);
192 static bool pa_can_eliminate (const int, const int);
193 static void pa_conditional_register_usage (void);
194 static machine_mode pa_c_mode_for_suffix (char);
195 static section *pa_function_section (tree, enum node_frequency, bool, bool);
196 static bool pa_cannot_force_const_mem (machine_mode, rtx);
197 static bool pa_legitimate_constant_p (machine_mode, rtx);
198 static unsigned int pa_section_type_flags (tree, const char *, int);
199 static bool pa_legitimate_address_p (machine_mode, rtx, bool);
200 static bool pa_callee_copies (cumulative_args_t, const function_arg_info &);
201 static unsigned int pa_hard_regno_nregs (unsigned int, machine_mode);
202 static bool pa_hard_regno_mode_ok (unsigned int, machine_mode);
203 static bool pa_modes_tieable_p (machine_mode, machine_mode);
204 static bool pa_can_change_mode_class (machine_mode, machine_mode, reg_class_t);
205 static HOST_WIDE_INT pa_starting_frame_offset (void);
206 static section* pa_elf_select_rtx_section(machine_mode, rtx, unsigned HOST_WIDE_INT) ATTRIBUTE_UNUSED;
207 
208 /* The following extra sections are only used for SOM.  */
209 static GTY(()) section *som_readonly_data_section;
210 static GTY(()) section *som_one_only_readonly_data_section;
211 static GTY(()) section *som_one_only_data_section;
212 static GTY(()) section *som_tm_clone_table_section;
213 
214 /* Counts for the number of callee-saved general and floating point
215    registers which were saved by the current function's prologue.  */
216 static int gr_saved, fr_saved;
217 
218 /* Boolean indicating whether the return pointer was saved by the
219    current function's prologue.  */
220 static bool rp_saved;
221 
222 static rtx find_addr_reg (rtx);
223 
224 /* Keep track of the number of bytes we have output in the CODE subspace
225    during this compilation so we'll know when to emit inline long-calls.  */
226 unsigned long total_code_bytes;
227 
228 /* The last address of the previous function plus the number of bytes in
229    associated thunks that have been output.  This is used to determine if
230    a thunk can use an IA-relative branch to reach its target function.  */
231 static unsigned int last_address;
232 
233 /* Variables to handle plabels that we discover are necessary at assembly
234    output time.  They are output after the current function.  */
235 struct GTY(()) deferred_plabel
236 {
237   rtx internal_label;
238   rtx symbol;
239 };
240 static GTY((length ("n_deferred_plabels"))) struct deferred_plabel *
241   deferred_plabels;
242 static size_t n_deferred_plabels = 0;
243 
244 /* Initialize the GCC target structure.  */
245 
246 #undef TARGET_OPTION_OVERRIDE
247 #define TARGET_OPTION_OVERRIDE pa_option_override
248 
249 #undef TARGET_ASM_ALIGNED_HI_OP
250 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
251 #undef TARGET_ASM_ALIGNED_SI_OP
252 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
253 #undef TARGET_ASM_ALIGNED_DI_OP
254 #define TARGET_ASM_ALIGNED_DI_OP "\t.dword\t"
255 #undef TARGET_ASM_UNALIGNED_HI_OP
256 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
257 #undef TARGET_ASM_UNALIGNED_SI_OP
258 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
259 #undef TARGET_ASM_UNALIGNED_DI_OP
260 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
261 #undef TARGET_ASM_INTEGER
262 #define TARGET_ASM_INTEGER pa_assemble_integer
263 
264 #undef TARGET_ASM_FUNCTION_EPILOGUE
265 #define TARGET_ASM_FUNCTION_EPILOGUE pa_output_function_epilogue
266 
267 #undef TARGET_FUNCTION_VALUE
268 #define TARGET_FUNCTION_VALUE pa_function_value
269 #undef TARGET_LIBCALL_VALUE
270 #define TARGET_LIBCALL_VALUE pa_libcall_value
271 #undef TARGET_FUNCTION_VALUE_REGNO_P
272 #define TARGET_FUNCTION_VALUE_REGNO_P pa_function_value_regno_p
273 
274 #undef TARGET_LEGITIMIZE_ADDRESS
275 #define TARGET_LEGITIMIZE_ADDRESS hppa_legitimize_address
276 
277 #undef TARGET_SCHED_ADJUST_COST
278 #define TARGET_SCHED_ADJUST_COST pa_adjust_cost
279 #undef TARGET_SCHED_ISSUE_RATE
280 #define TARGET_SCHED_ISSUE_RATE pa_issue_rate
281 
282 #undef TARGET_ENCODE_SECTION_INFO
283 #define TARGET_ENCODE_SECTION_INFO pa_encode_section_info
284 #undef TARGET_STRIP_NAME_ENCODING
285 #define TARGET_STRIP_NAME_ENCODING pa_strip_name_encoding
286 
287 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
288 #define TARGET_FUNCTION_OK_FOR_SIBCALL pa_function_ok_for_sibcall
289 
290 #undef TARGET_COMMUTATIVE_P
291 #define TARGET_COMMUTATIVE_P pa_commutative_p
292 
293 #undef TARGET_ASM_OUTPUT_MI_THUNK
294 #define TARGET_ASM_OUTPUT_MI_THUNK pa_asm_output_mi_thunk
295 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
296 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
297 
298 #undef TARGET_ASM_FILE_END
299 #define TARGET_ASM_FILE_END pa_file_end
300 
301 #undef TARGET_ASM_RELOC_RW_MASK
302 #define TARGET_ASM_RELOC_RW_MASK pa_reloc_rw_mask
303 
304 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
305 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P pa_print_operand_punct_valid_p
306 
307 #if !defined(USE_COLLECT2)
308 #undef TARGET_ASM_CONSTRUCTOR
309 #define TARGET_ASM_CONSTRUCTOR pa_asm_out_constructor
310 #undef TARGET_ASM_DESTRUCTOR
311 #define TARGET_ASM_DESTRUCTOR pa_asm_out_destructor
312 #endif
313 
314 #undef TARGET_INIT_BUILTINS
315 #define TARGET_INIT_BUILTINS pa_init_builtins
316 
317 #undef TARGET_EXPAND_BUILTIN
318 #define TARGET_EXPAND_BUILTIN pa_expand_builtin
319 
320 #undef TARGET_REGISTER_MOVE_COST
321 #define TARGET_REGISTER_MOVE_COST hppa_register_move_cost
322 #undef TARGET_RTX_COSTS
323 #define TARGET_RTX_COSTS hppa_rtx_costs
324 #undef TARGET_ADDRESS_COST
325 #define TARGET_ADDRESS_COST hppa_address_cost
326 
327 #undef TARGET_MACHINE_DEPENDENT_REORG
328 #define TARGET_MACHINE_DEPENDENT_REORG pa_reorg
329 
330 #undef TARGET_INIT_LIBFUNCS
331 #define TARGET_INIT_LIBFUNCS pa_init_libfuncs
332 
333 #undef TARGET_PROMOTE_FUNCTION_MODE
334 #define TARGET_PROMOTE_FUNCTION_MODE pa_promote_function_mode
335 #undef TARGET_PROMOTE_PROTOTYPES
336 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
337 
338 #undef TARGET_STRUCT_VALUE_RTX
339 #define TARGET_STRUCT_VALUE_RTX pa_struct_value_rtx
340 #undef TARGET_RETURN_IN_MEMORY
341 #define TARGET_RETURN_IN_MEMORY pa_return_in_memory
342 #undef TARGET_MUST_PASS_IN_STACK
343 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
344 #undef TARGET_PASS_BY_REFERENCE
345 #define TARGET_PASS_BY_REFERENCE pa_pass_by_reference
346 #undef TARGET_CALLEE_COPIES
347 #define TARGET_CALLEE_COPIES pa_callee_copies
348 #undef TARGET_ARG_PARTIAL_BYTES
349 #define TARGET_ARG_PARTIAL_BYTES pa_arg_partial_bytes
350 #undef TARGET_FUNCTION_ARG
351 #define TARGET_FUNCTION_ARG pa_function_arg
352 #undef TARGET_FUNCTION_ARG_ADVANCE
353 #define TARGET_FUNCTION_ARG_ADVANCE pa_function_arg_advance
354 #undef TARGET_FUNCTION_ARG_PADDING
355 #define TARGET_FUNCTION_ARG_PADDING pa_function_arg_padding
356 #undef TARGET_FUNCTION_ARG_BOUNDARY
357 #define TARGET_FUNCTION_ARG_BOUNDARY pa_function_arg_boundary
358 
359 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
360 #define TARGET_EXPAND_BUILTIN_SAVEREGS hppa_builtin_saveregs
361 #undef TARGET_EXPAND_BUILTIN_VA_START
362 #define TARGET_EXPAND_BUILTIN_VA_START hppa_va_start
363 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
364 #define TARGET_GIMPLIFY_VA_ARG_EXPR hppa_gimplify_va_arg_expr
365 
366 #undef TARGET_SCALAR_MODE_SUPPORTED_P
367 #define TARGET_SCALAR_MODE_SUPPORTED_P pa_scalar_mode_supported_p
368 
369 #undef TARGET_CANNOT_FORCE_CONST_MEM
370 #define TARGET_CANNOT_FORCE_CONST_MEM pa_cannot_force_const_mem
371 
372 #undef TARGET_SECONDARY_RELOAD
373 #define TARGET_SECONDARY_RELOAD pa_secondary_reload
374 #undef TARGET_SECONDARY_MEMORY_NEEDED
375 #define TARGET_SECONDARY_MEMORY_NEEDED pa_secondary_memory_needed
376 
377 #undef TARGET_EXTRA_LIVE_ON_ENTRY
378 #define TARGET_EXTRA_LIVE_ON_ENTRY pa_extra_live_on_entry
379 
380 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
381 #define TARGET_ASM_TRAMPOLINE_TEMPLATE pa_asm_trampoline_template
382 #undef TARGET_TRAMPOLINE_INIT
383 #define TARGET_TRAMPOLINE_INIT pa_trampoline_init
384 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
385 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS pa_trampoline_adjust_address
386 #undef TARGET_DELEGITIMIZE_ADDRESS
387 #define TARGET_DELEGITIMIZE_ADDRESS pa_delegitimize_address
388 #undef TARGET_INTERNAL_ARG_POINTER
389 #define TARGET_INTERNAL_ARG_POINTER pa_internal_arg_pointer
390 #undef TARGET_CAN_ELIMINATE
391 #define TARGET_CAN_ELIMINATE pa_can_eliminate
392 #undef TARGET_CONDITIONAL_REGISTER_USAGE
393 #define TARGET_CONDITIONAL_REGISTER_USAGE pa_conditional_register_usage
394 #undef TARGET_C_MODE_FOR_SUFFIX
395 #define TARGET_C_MODE_FOR_SUFFIX pa_c_mode_for_suffix
396 #undef TARGET_ASM_FUNCTION_SECTION
397 #define TARGET_ASM_FUNCTION_SECTION pa_function_section
398 
399 #undef TARGET_LEGITIMATE_CONSTANT_P
400 #define TARGET_LEGITIMATE_CONSTANT_P pa_legitimate_constant_p
401 #undef TARGET_SECTION_TYPE_FLAGS
402 #define TARGET_SECTION_TYPE_FLAGS pa_section_type_flags
403 #undef TARGET_LEGITIMATE_ADDRESS_P
404 #define TARGET_LEGITIMATE_ADDRESS_P pa_legitimate_address_p
405 
406 #undef TARGET_LRA_P
407 #define TARGET_LRA_P hook_bool_void_false
408 
409 #undef TARGET_HARD_REGNO_NREGS
410 #define TARGET_HARD_REGNO_NREGS pa_hard_regno_nregs
411 #undef TARGET_HARD_REGNO_MODE_OK
412 #define TARGET_HARD_REGNO_MODE_OK pa_hard_regno_mode_ok
413 #undef TARGET_MODES_TIEABLE_P
414 #define TARGET_MODES_TIEABLE_P pa_modes_tieable_p
415 
416 #undef TARGET_CAN_CHANGE_MODE_CLASS
417 #define TARGET_CAN_CHANGE_MODE_CLASS pa_can_change_mode_class
418 
419 #undef TARGET_CONSTANT_ALIGNMENT
420 #define TARGET_CONSTANT_ALIGNMENT constant_alignment_word_strings
421 
422 #undef TARGET_STARTING_FRAME_OFFSET
423 #define TARGET_STARTING_FRAME_OFFSET pa_starting_frame_offset
424 
425 #undef TARGET_HAVE_SPECULATION_SAFE_VALUE
426 #define TARGET_HAVE_SPECULATION_SAFE_VALUE speculation_safe_value_not_needed
427 
428 struct gcc_target targetm = TARGET_INITIALIZER;
429 
430 /* Parse the -mfixed-range= option string.  */
431 
432 static void
fix_range(const char * const_str)433 fix_range (const char *const_str)
434 {
435   int i, first, last;
436   char *str, *dash, *comma;
437 
438   /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
439      REG2 are either register names or register numbers.  The effect
440      of this option is to mark the registers in the range from REG1 to
441      REG2 as ``fixed'' so they won't be used by the compiler.  This is
442      used, e.g., to ensure that kernel mode code doesn't use fr4-fr31.  */
443 
444   i = strlen (const_str);
445   str = (char *) alloca (i + 1);
446   memcpy (str, const_str, i + 1);
447 
448   while (1)
449     {
450       dash = strchr (str, '-');
451       if (!dash)
452 	{
453 	  warning (0, "value of %<-mfixed-range%> must have form REG1-REG2");
454 	  return;
455 	}
456       *dash = '\0';
457 
458       comma = strchr (dash + 1, ',');
459       if (comma)
460 	*comma = '\0';
461 
462       first = decode_reg_name (str);
463       if (first < 0)
464 	{
465 	  warning (0, "unknown register name: %s", str);
466 	  return;
467 	}
468 
469       last = decode_reg_name (dash + 1);
470       if (last < 0)
471 	{
472 	  warning (0, "unknown register name: %s", dash + 1);
473 	  return;
474 	}
475 
476       *dash = '-';
477 
478       if (first > last)
479 	{
480 	  warning (0, "%s-%s is an empty range", str, dash + 1);
481 	  return;
482 	}
483 
484       for (i = first; i <= last; ++i)
485 	fixed_regs[i] = call_used_regs[i] = 1;
486 
487       if (!comma)
488 	break;
489 
490       *comma = ',';
491       str = comma + 1;
492     }
493 
494   /* Check if all floating point registers have been fixed.  */
495   for (i = FP_REG_FIRST; i <= FP_REG_LAST; i++)
496     if (!fixed_regs[i])
497       break;
498 
499   if (i > FP_REG_LAST)
500     target_flags |= MASK_DISABLE_FPREGS;
501 }
502 
503 /* Implement the TARGET_OPTION_OVERRIDE hook.  */
504 
505 static void
pa_option_override(void)506 pa_option_override (void)
507 {
508   unsigned int i;
509   cl_deferred_option *opt;
510   vec<cl_deferred_option> *v
511     = (vec<cl_deferred_option> *) pa_deferred_options;
512 
513   if (v)
514     FOR_EACH_VEC_ELT (*v, i, opt)
515       {
516 	switch (opt->opt_index)
517 	  {
518 	  case OPT_mfixed_range_:
519 	    fix_range (opt->arg);
520 	    break;
521 
522 	  default:
523 	    gcc_unreachable ();
524 	  }
525       }
526 
527   if (flag_pic && TARGET_PORTABLE_RUNTIME)
528     {
529       warning (0, "PIC code generation is not supported in the portable runtime model");
530     }
531 
532   if (flag_pic && TARGET_FAST_INDIRECT_CALLS)
533    {
534       warning (0, "PIC code generation is not compatible with fast indirect calls");
535    }
536 
537   if (! TARGET_GAS && write_symbols != NO_DEBUG)
538     {
539       warning (0, "%<-g%> is only supported when using GAS on this processor");
540       warning (0, "%<-g%> option disabled");
541       write_symbols = NO_DEBUG;
542     }
543 
544   /* We only support the "big PIC" model now.  And we always generate PIC
545      code when in 64bit mode.  */
546   if (flag_pic == 1 || TARGET_64BIT)
547     flag_pic = 2;
548 
549   /* Disable -freorder-blocks-and-partition as we don't support hot and
550      cold partitioning.  */
551   if (flag_reorder_blocks_and_partition)
552     {
553       inform (input_location,
554 	      "%<-freorder-blocks-and-partition%> does not work "
555 	      "on this architecture");
556       flag_reorder_blocks_and_partition = 0;
557       flag_reorder_blocks = 1;
558     }
559 
560   /* We can't guarantee that .dword is available for 32-bit targets.  */
561   if (UNITS_PER_WORD == 4)
562     targetm.asm_out.aligned_op.di = NULL;
563 
564   /* The unaligned ops are only available when using GAS.  */
565   if (!TARGET_GAS)
566     {
567       targetm.asm_out.unaligned_op.hi = NULL;
568       targetm.asm_out.unaligned_op.si = NULL;
569       targetm.asm_out.unaligned_op.di = NULL;
570     }
571 
572   init_machine_status = pa_init_machine_status;
573 }
574 
575 enum pa_builtins
576 {
577   PA_BUILTIN_COPYSIGNQ,
578   PA_BUILTIN_FABSQ,
579   PA_BUILTIN_INFQ,
580   PA_BUILTIN_HUGE_VALQ,
581   PA_BUILTIN_max
582 };
583 
584 static GTY(()) tree pa_builtins[(int) PA_BUILTIN_max];
585 
586 static void
pa_init_builtins(void)587 pa_init_builtins (void)
588 {
589 #ifdef DONT_HAVE_FPUTC_UNLOCKED
590   {
591     tree decl = builtin_decl_explicit (BUILT_IN_PUTC_UNLOCKED);
592     set_builtin_decl (BUILT_IN_FPUTC_UNLOCKED, decl,
593 		      builtin_decl_implicit_p (BUILT_IN_PUTC_UNLOCKED));
594   }
595 #endif
596 #if TARGET_HPUX_11
597   {
598     tree decl;
599 
600     if ((decl = builtin_decl_explicit (BUILT_IN_FINITE)) != NULL_TREE)
601       set_user_assembler_name (decl, "_Isfinite");
602     if ((decl = builtin_decl_explicit (BUILT_IN_FINITEF)) != NULL_TREE)
603       set_user_assembler_name (decl, "_Isfinitef");
604   }
605 #endif
606 
607   if (HPUX_LONG_DOUBLE_LIBRARY)
608     {
609       tree decl, ftype;
610 
611       /* Under HPUX, the __float128 type is a synonym for "long double".  */
612       (*lang_hooks.types.register_builtin_type) (long_double_type_node,
613 						 "__float128");
614 
615       /* TFmode support builtins.  */
616       ftype = build_function_type_list (long_double_type_node,
617 					long_double_type_node,
618 					NULL_TREE);
619       decl = add_builtin_function ("__builtin_fabsq", ftype,
620 				   PA_BUILTIN_FABSQ, BUILT_IN_MD,
621 				   "_U_Qfabs", NULL_TREE);
622       TREE_READONLY (decl) = 1;
623       pa_builtins[PA_BUILTIN_FABSQ] = decl;
624 
625       ftype = build_function_type_list (long_double_type_node,
626 					long_double_type_node,
627 					long_double_type_node,
628 					NULL_TREE);
629       decl = add_builtin_function ("__builtin_copysignq", ftype,
630 				   PA_BUILTIN_COPYSIGNQ, BUILT_IN_MD,
631 				   "_U_Qfcopysign", NULL_TREE);
632       TREE_READONLY (decl) = 1;
633       pa_builtins[PA_BUILTIN_COPYSIGNQ] = decl;
634 
635       ftype = build_function_type_list (long_double_type_node, NULL_TREE);
636       decl = add_builtin_function ("__builtin_infq", ftype,
637 				   PA_BUILTIN_INFQ, BUILT_IN_MD,
638 				   NULL, NULL_TREE);
639       pa_builtins[PA_BUILTIN_INFQ] = decl;
640 
641       decl = add_builtin_function ("__builtin_huge_valq", ftype,
642                                    PA_BUILTIN_HUGE_VALQ, BUILT_IN_MD,
643                                    NULL, NULL_TREE);
644       pa_builtins[PA_BUILTIN_HUGE_VALQ] = decl;
645     }
646 }
647 
648 static rtx
pa_expand_builtin(tree exp,rtx target,rtx subtarget ATTRIBUTE_UNUSED,machine_mode mode ATTRIBUTE_UNUSED,int ignore ATTRIBUTE_UNUSED)649 pa_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
650 		   machine_mode mode ATTRIBUTE_UNUSED,
651 		   int ignore ATTRIBUTE_UNUSED)
652 {
653   tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
654   unsigned int fcode = DECL_MD_FUNCTION_CODE (fndecl);
655 
656   switch (fcode)
657     {
658     case PA_BUILTIN_FABSQ:
659     case PA_BUILTIN_COPYSIGNQ:
660       return expand_call (exp, target, ignore);
661 
662     case PA_BUILTIN_INFQ:
663     case PA_BUILTIN_HUGE_VALQ:
664       {
665 	machine_mode target_mode = TYPE_MODE (TREE_TYPE (exp));
666 	REAL_VALUE_TYPE inf;
667 	rtx tmp;
668 
669 	real_inf (&inf);
670 	tmp = const_double_from_real_value (inf, target_mode);
671 
672 	tmp = validize_mem (force_const_mem (target_mode, tmp));
673 
674 	if (target == 0)
675 	  target = gen_reg_rtx (target_mode);
676 
677 	emit_move_insn (target, tmp);
678 	return target;
679       }
680 
681     default:
682       gcc_unreachable ();
683     }
684 
685   return NULL_RTX;
686 }
687 
688 /* Function to init struct machine_function.
689    This will be called, via a pointer variable,
690    from push_function_context.  */
691 
692 static struct machine_function *
pa_init_machine_status(void)693 pa_init_machine_status (void)
694 {
695   return ggc_cleared_alloc<machine_function> ();
696 }
697 
698 /* If FROM is a probable pointer register, mark TO as a probable
699    pointer register with the same pointer alignment as FROM.  */
700 
701 static void
copy_reg_pointer(rtx to,rtx from)702 copy_reg_pointer (rtx to, rtx from)
703 {
704   if (REG_POINTER (from))
705     mark_reg_pointer (to, REGNO_POINTER_ALIGN (REGNO (from)));
706 }
707 
708 /* Return 1 if X contains a symbolic expression.  We know these
709    expressions will have one of a few well defined forms, so
710    we need only check those forms.  */
711 int
pa_symbolic_expression_p(rtx x)712 pa_symbolic_expression_p (rtx x)
713 {
714 
715   /* Strip off any HIGH.  */
716   if (GET_CODE (x) == HIGH)
717     x = XEXP (x, 0);
718 
719   return symbolic_operand (x, VOIDmode);
720 }
721 
722 /* Accept any constant that can be moved in one instruction into a
723    general register.  */
724 int
pa_cint_ok_for_move(unsigned HOST_WIDE_INT ival)725 pa_cint_ok_for_move (unsigned HOST_WIDE_INT ival)
726 {
727   /* OK if ldo, ldil, or zdepi, can be used.  */
728   return (VAL_14_BITS_P (ival)
729 	  || pa_ldil_cint_p (ival)
730 	  || pa_zdepi_cint_p (ival));
731 }
732 
733 /* True iff ldil can be used to load this CONST_INT.  The least
734    significant 11 bits of the value must be zero and the value must
735    not change sign when extended from 32 to 64 bits.  */
736 int
pa_ldil_cint_p(unsigned HOST_WIDE_INT ival)737 pa_ldil_cint_p (unsigned HOST_WIDE_INT ival)
738 {
739   unsigned HOST_WIDE_INT x;
740 
741   x = ival & (((unsigned HOST_WIDE_INT) -1 << 31) | 0x7ff);
742   return x == 0 || x == ((unsigned HOST_WIDE_INT) -1 << 31);
743 }
744 
745 /* True iff zdepi can be used to generate this CONST_INT.
746    zdepi first sign extends a 5-bit signed number to a given field
747    length, then places this field anywhere in a zero.  */
748 int
pa_zdepi_cint_p(unsigned HOST_WIDE_INT x)749 pa_zdepi_cint_p (unsigned HOST_WIDE_INT x)
750 {
751   unsigned HOST_WIDE_INT lsb_mask, t;
752 
753   /* This might not be obvious, but it's at least fast.
754      This function is critical; we don't have the time loops would take.  */
755   lsb_mask = x & -x;
756   t = ((x >> 4) + lsb_mask) & ~(lsb_mask - 1);
757   /* Return true iff t is a power of two.  */
758   return ((t & (t - 1)) == 0);
759 }
760 
761 /* True iff depi or extru can be used to compute (reg & mask).
762    Accept bit pattern like these:
763    0....01....1
764    1....10....0
765    1..10..01..1  */
766 int
pa_and_mask_p(unsigned HOST_WIDE_INT mask)767 pa_and_mask_p (unsigned HOST_WIDE_INT mask)
768 {
769   mask = ~mask;
770   mask += mask & -mask;
771   return (mask & (mask - 1)) == 0;
772 }
773 
774 /* True iff depi can be used to compute (reg | MASK).  */
775 int
pa_ior_mask_p(unsigned HOST_WIDE_INT mask)776 pa_ior_mask_p (unsigned HOST_WIDE_INT mask)
777 {
778   mask += mask & -mask;
779   return (mask & (mask - 1)) == 0;
780 }
781 
782 /* Legitimize PIC addresses.  If the address is already
783    position-independent, we return ORIG.  Newly generated
784    position-independent addresses go to REG.  If we need more
785    than one register, we lose.  */
786 
787 static rtx
legitimize_pic_address(rtx orig,machine_mode mode,rtx reg)788 legitimize_pic_address (rtx orig, machine_mode mode, rtx reg)
789 {
790   rtx pic_ref = orig;
791 
792   gcc_assert (!PA_SYMBOL_REF_TLS_P (orig));
793 
794   /* Labels need special handling.  */
795   if (pic_label_operand (orig, mode))
796     {
797       rtx_insn *insn;
798 
799       /* We do not want to go through the movXX expanders here since that
800 	 would create recursion.
801 
802 	 Nor do we really want to call a generator for a named pattern
803 	 since that requires multiple patterns if we want to support
804 	 multiple word sizes.
805 
806 	 So instead we just emit the raw set, which avoids the movXX
807 	 expanders completely.  */
808       mark_reg_pointer (reg, BITS_PER_UNIT);
809       insn = emit_insn (gen_rtx_SET (reg, orig));
810 
811       /* Put a REG_EQUAL note on this insn, so that it can be optimized.  */
812       add_reg_note (insn, REG_EQUAL, orig);
813 
814       /* During and after reload, we need to generate a REG_LABEL_OPERAND note
815 	 and update LABEL_NUSES because this is not done automatically.  */
816       if (reload_in_progress || reload_completed)
817 	{
818 	  /* Extract LABEL_REF.  */
819 	  if (GET_CODE (orig) == CONST)
820 	    orig = XEXP (XEXP (orig, 0), 0);
821 	  /* Extract CODE_LABEL.  */
822 	  orig = XEXP (orig, 0);
823 	  add_reg_note (insn, REG_LABEL_OPERAND, orig);
824 	  /* Make sure we have label and not a note.  */
825 	  if (LABEL_P (orig))
826 	    LABEL_NUSES (orig)++;
827 	}
828       crtl->uses_pic_offset_table = 1;
829       return reg;
830     }
831   if (GET_CODE (orig) == SYMBOL_REF)
832     {
833       rtx_insn *insn;
834       rtx tmp_reg;
835 
836       gcc_assert (reg);
837 
838       /* Before reload, allocate a temporary register for the intermediate
839 	 result.  This allows the sequence to be deleted when the final
840 	 result is unused and the insns are trivially dead.  */
841       tmp_reg = ((reload_in_progress || reload_completed)
842 		 ? reg : gen_reg_rtx (Pmode));
843 
844       if (function_label_operand (orig, VOIDmode))
845 	{
846 	  /* Force function label into memory in word mode.  */
847 	  orig = XEXP (force_const_mem (word_mode, orig), 0);
848 	  /* Load plabel address from DLT.  */
849 	  emit_move_insn (tmp_reg,
850 			  gen_rtx_PLUS (word_mode, pic_offset_table_rtx,
851 					gen_rtx_HIGH (word_mode, orig)));
852 	  pic_ref
853 	    = gen_const_mem (Pmode,
854 			     gen_rtx_LO_SUM (Pmode, tmp_reg,
855 					     gen_rtx_UNSPEC (Pmode,
856 						         gen_rtvec (1, orig),
857 						         UNSPEC_DLTIND14R)));
858 	  emit_move_insn (reg, pic_ref);
859 	  /* Now load address of function descriptor.  */
860 	  pic_ref = gen_rtx_MEM (Pmode, reg);
861 	}
862       else
863 	{
864 	  /* Load symbol reference from DLT.  */
865 	  emit_move_insn (tmp_reg,
866 			  gen_rtx_PLUS (word_mode, pic_offset_table_rtx,
867 					gen_rtx_HIGH (word_mode, orig)));
868 	  pic_ref
869 	    = gen_const_mem (Pmode,
870 			     gen_rtx_LO_SUM (Pmode, tmp_reg,
871 					     gen_rtx_UNSPEC (Pmode,
872 						         gen_rtvec (1, orig),
873 						         UNSPEC_DLTIND14R)));
874 	}
875 
876       crtl->uses_pic_offset_table = 1;
877       mark_reg_pointer (reg, BITS_PER_UNIT);
878       insn = emit_move_insn (reg, pic_ref);
879 
880       /* Put a REG_EQUAL note on this insn, so that it can be optimized.  */
881       set_unique_reg_note (insn, REG_EQUAL, orig);
882 
883       return reg;
884     }
885   else if (GET_CODE (orig) == CONST)
886     {
887       rtx base;
888 
889       if (GET_CODE (XEXP (orig, 0)) == PLUS
890 	  && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
891 	return orig;
892 
893       gcc_assert (reg);
894       gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
895 
896       base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
897       orig = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
898 				     base == reg ? 0 : reg);
899 
900       if (GET_CODE (orig) == CONST_INT)
901 	{
902 	  if (INT_14_BITS (orig))
903 	    return plus_constant (Pmode, base, INTVAL (orig));
904 	  orig = force_reg (Pmode, orig);
905 	}
906       pic_ref = gen_rtx_PLUS (Pmode, base, orig);
907       /* Likewise, should we set special REG_NOTEs here?  */
908     }
909 
910   return pic_ref;
911 }
912 
913 static GTY(()) rtx gen_tls_tga;
914 
915 static rtx
gen_tls_get_addr(void)916 gen_tls_get_addr (void)
917 {
918   if (!gen_tls_tga)
919     gen_tls_tga = init_one_libfunc ("__tls_get_addr");
920   return gen_tls_tga;
921 }
922 
923 static rtx
hppa_tls_call(rtx arg)924 hppa_tls_call (rtx arg)
925 {
926   rtx ret;
927 
928   ret = gen_reg_rtx (Pmode);
929   emit_library_call_value (gen_tls_get_addr (), ret,
930 			   LCT_CONST, Pmode, arg, Pmode);
931 
932   return ret;
933 }
934 
935 static rtx
legitimize_tls_address(rtx addr)936 legitimize_tls_address (rtx addr)
937 {
938   rtx ret, tmp, t1, t2, tp;
939   rtx_insn *insn;
940 
941   /* Currently, we can't handle anything but a SYMBOL_REF.  */
942   if (GET_CODE (addr) != SYMBOL_REF)
943     return addr;
944 
945   switch (SYMBOL_REF_TLS_MODEL (addr))
946     {
947       case TLS_MODEL_GLOBAL_DYNAMIC:
948 	tmp = gen_reg_rtx (Pmode);
949 	if (flag_pic)
950 	  emit_insn (gen_tgd_load_pic (tmp, addr));
951 	else
952 	  emit_insn (gen_tgd_load (tmp, addr));
953 	ret = hppa_tls_call (tmp);
954 	break;
955 
956       case TLS_MODEL_LOCAL_DYNAMIC:
957 	ret = gen_reg_rtx (Pmode);
958 	tmp = gen_reg_rtx (Pmode);
959 	start_sequence ();
960 	if (flag_pic)
961 	  emit_insn (gen_tld_load_pic (tmp, addr));
962 	else
963 	  emit_insn (gen_tld_load (tmp, addr));
964 	t1 = hppa_tls_call (tmp);
965 	insn = get_insns ();
966 	end_sequence ();
967 	t2 = gen_reg_rtx (Pmode);
968 	emit_libcall_block (insn, t2, t1,
969 			    gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
970 				            UNSPEC_TLSLDBASE));
971 	emit_insn (gen_tld_offset_load (ret, addr, t2));
972 	break;
973 
974       case TLS_MODEL_INITIAL_EXEC:
975 	tp = gen_reg_rtx (Pmode);
976 	tmp = gen_reg_rtx (Pmode);
977 	ret = gen_reg_rtx (Pmode);
978 	emit_insn (gen_tp_load (tp));
979 	if (flag_pic)
980 	  emit_insn (gen_tie_load_pic (tmp, addr));
981 	else
982 	  emit_insn (gen_tie_load (tmp, addr));
983 	emit_move_insn (ret, gen_rtx_PLUS (Pmode, tp, tmp));
984 	break;
985 
986       case TLS_MODEL_LOCAL_EXEC:
987 	tp = gen_reg_rtx (Pmode);
988 	ret = gen_reg_rtx (Pmode);
989 	emit_insn (gen_tp_load (tp));
990 	emit_insn (gen_tle_load (ret, addr, tp));
991 	break;
992 
993       default:
994 	gcc_unreachable ();
995     }
996 
997   return ret;
998 }
999 
1000 /* Helper for hppa_legitimize_address.  Given X, return true if it
1001    is a left shift by 1, 2 or 3 positions or a multiply by 2, 4 or 8.
1002 
1003    This respectively represent canonical shift-add rtxs or scaled
1004    memory addresses.  */
1005 static bool
mem_shadd_or_shadd_rtx_p(rtx x)1006 mem_shadd_or_shadd_rtx_p (rtx x)
1007 {
1008   return ((GET_CODE (x) == ASHIFT
1009 	   || GET_CODE (x) == MULT)
1010 	  && GET_CODE (XEXP (x, 1)) == CONST_INT
1011 	  && ((GET_CODE (x) == ASHIFT
1012 	       && pa_shadd_constant_p (INTVAL (XEXP (x, 1))))
1013 	      || (GET_CODE (x) == MULT
1014 		  && pa_mem_shadd_constant_p (INTVAL (XEXP (x, 1))))));
1015 }
1016 
1017 /* Try machine-dependent ways of modifying an illegitimate address
1018    to be legitimate.  If we find one, return the new, valid address.
1019    This macro is used in only one place: `memory_address' in explow.c.
1020 
1021    OLDX is the address as it was before break_out_memory_refs was called.
1022    In some cases it is useful to look at this to decide what needs to be done.
1023 
1024    It is always safe for this macro to do nothing.  It exists to recognize
1025    opportunities to optimize the output.
1026 
1027    For the PA, transform:
1028 
1029 	memory(X + <large int>)
1030 
1031    into:
1032 
1033 	if (<large int> & mask) >= 16
1034 	  Y = (<large int> & ~mask) + mask + 1	Round up.
1035 	else
1036 	  Y = (<large int> & ~mask)		Round down.
1037 	Z = X + Y
1038 	memory (Z + (<large int> - Y));
1039 
1040    This is for CSE to find several similar references, and only use one Z.
1041 
1042    X can either be a SYMBOL_REF or REG, but because combine cannot
1043    perform a 4->2 combination we do nothing for SYMBOL_REF + D where
1044    D will not fit in 14 bits.
1045 
1046    MODE_FLOAT references allow displacements which fit in 5 bits, so use
1047    0x1f as the mask.
1048 
1049    MODE_INT references allow displacements which fit in 14 bits, so use
1050    0x3fff as the mask.
1051 
1052    This relies on the fact that most mode MODE_FLOAT references will use FP
1053    registers and most mode MODE_INT references will use integer registers.
1054    (In the rare case of an FP register used in an integer MODE, we depend
1055    on secondary reloads to clean things up.)
1056 
1057 
1058    It is also beneficial to handle (plus (mult (X) (Y)) (Z)) in a special
1059    manner if Y is 2, 4, or 8.  (allows more shadd insns and shifted indexed
1060    addressing modes to be used).
1061 
1062    Note that the addresses passed into hppa_legitimize_address always
1063    come from a MEM, so we only have to match the MULT form on incoming
1064    addresses.  But to be future proof we also match the ASHIFT form.
1065 
1066    However, this routine always places those shift-add sequences into
1067    registers, so we have to generate the ASHIFT form as our output.
1068 
1069    Put X and Z into registers.  Then put the entire expression into
1070    a register.  */
1071 
1072 rtx
hppa_legitimize_address(rtx x,rtx oldx ATTRIBUTE_UNUSED,machine_mode mode)1073 hppa_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
1074 			 machine_mode mode)
1075 {
1076   rtx orig = x;
1077 
1078   /* We need to canonicalize the order of operands in unscaled indexed
1079      addresses since the code that checks if an address is valid doesn't
1080      always try both orders.  */
1081   if (!TARGET_NO_SPACE_REGS
1082       && GET_CODE (x) == PLUS
1083       && GET_MODE (x) == Pmode
1084       && REG_P (XEXP (x, 0))
1085       && REG_P (XEXP (x, 1))
1086       && REG_POINTER (XEXP (x, 0))
1087       && !REG_POINTER (XEXP (x, 1)))
1088     return gen_rtx_PLUS (Pmode, XEXP (x, 1), XEXP (x, 0));
1089 
1090   if (tls_referenced_p (x))
1091     return legitimize_tls_address (x);
1092   else if (flag_pic)
1093     return legitimize_pic_address (x, mode, gen_reg_rtx (Pmode));
1094 
1095   /* Strip off CONST.  */
1096   if (GET_CODE (x) == CONST)
1097     x = XEXP (x, 0);
1098 
1099   /* Special case.  Get the SYMBOL_REF into a register and use indexing.
1100      That should always be safe.  */
1101   if (GET_CODE (x) == PLUS
1102       && GET_CODE (XEXP (x, 0)) == REG
1103       && GET_CODE (XEXP (x, 1)) == SYMBOL_REF)
1104     {
1105       rtx reg = force_reg (Pmode, XEXP (x, 1));
1106       return force_reg (Pmode, gen_rtx_PLUS (Pmode, reg, XEXP (x, 0)));
1107     }
1108 
1109   /* Note we must reject symbols which represent function addresses
1110      since the assembler/linker can't handle arithmetic on plabels.  */
1111   if (GET_CODE (x) == PLUS
1112       && GET_CODE (XEXP (x, 1)) == CONST_INT
1113       && ((GET_CODE (XEXP (x, 0)) == SYMBOL_REF
1114 	   && !FUNCTION_NAME_P (XSTR (XEXP (x, 0), 0)))
1115 	  || GET_CODE (XEXP (x, 0)) == REG))
1116     {
1117       rtx int_part, ptr_reg;
1118       int newoffset;
1119       int offset = INTVAL (XEXP (x, 1));
1120       int mask;
1121 
1122       mask = (GET_MODE_CLASS (mode) == MODE_FLOAT
1123 	      && !INT14_OK_STRICT ? 0x1f : 0x3fff);
1124 
1125       /* Choose which way to round the offset.  Round up if we
1126 	 are >= halfway to the next boundary.  */
1127       if ((offset & mask) >= ((mask + 1) / 2))
1128 	newoffset = (offset & ~ mask) + mask + 1;
1129       else
1130 	newoffset = (offset & ~ mask);
1131 
1132       /* If the newoffset will not fit in 14 bits (ldo), then
1133 	 handling this would take 4 or 5 instructions (2 to load
1134 	 the SYMBOL_REF + 1 or 2 to load the newoffset + 1 to
1135 	 add the new offset and the SYMBOL_REF.)  Combine cannot
1136 	 handle 4->2 or 5->2 combinations, so do not create
1137 	 them.  */
1138       if (! VAL_14_BITS_P (newoffset)
1139 	  && GET_CODE (XEXP (x, 0)) == SYMBOL_REF)
1140 	{
1141 	  rtx const_part = plus_constant (Pmode, XEXP (x, 0), newoffset);
1142 	  rtx tmp_reg
1143 	    = force_reg (Pmode,
1144 			 gen_rtx_HIGH (Pmode, const_part));
1145 	  ptr_reg
1146 	    = force_reg (Pmode,
1147 			 gen_rtx_LO_SUM (Pmode,
1148 					 tmp_reg, const_part));
1149 	}
1150       else
1151 	{
1152 	  if (! VAL_14_BITS_P (newoffset))
1153 	    int_part = force_reg (Pmode, GEN_INT (newoffset));
1154 	  else
1155 	    int_part = GEN_INT (newoffset);
1156 
1157 	  ptr_reg = force_reg (Pmode,
1158 			       gen_rtx_PLUS (Pmode,
1159 					     force_reg (Pmode, XEXP (x, 0)),
1160 					     int_part));
1161 	}
1162       return plus_constant (Pmode, ptr_reg, offset - newoffset);
1163     }
1164 
1165   /* Handle (plus (mult (a) (mem_shadd_constant)) (b)).  */
1166 
1167   if (GET_CODE (x) == PLUS
1168       && mem_shadd_or_shadd_rtx_p (XEXP (x, 0))
1169       && (OBJECT_P (XEXP (x, 1))
1170 	  || GET_CODE (XEXP (x, 1)) == SUBREG)
1171       && GET_CODE (XEXP (x, 1)) != CONST)
1172     {
1173       /* If we were given a MULT, we must fix the constant
1174 	 as we're going to create the ASHIFT form.  */
1175       int shift_val = INTVAL (XEXP (XEXP (x, 0), 1));
1176       if (GET_CODE (XEXP (x, 0)) == MULT)
1177 	shift_val = exact_log2 (shift_val);
1178 
1179       rtx reg1, reg2;
1180       reg1 = XEXP (x, 1);
1181       if (GET_CODE (reg1) != REG)
1182 	reg1 = force_reg (Pmode, force_operand (reg1, 0));
1183 
1184       reg2 = XEXP (XEXP (x, 0), 0);
1185       if (GET_CODE (reg2) != REG)
1186         reg2 = force_reg (Pmode, force_operand (reg2, 0));
1187 
1188       return force_reg (Pmode,
1189 			gen_rtx_PLUS (Pmode,
1190 				      gen_rtx_ASHIFT (Pmode, reg2,
1191 						      GEN_INT (shift_val)),
1192 				      reg1));
1193     }
1194 
1195   /* Similarly for (plus (plus (mult (a) (mem_shadd_constant)) (b)) (c)).
1196 
1197      Only do so for floating point modes since this is more speculative
1198      and we lose if it's an integer store.  */
1199   if (GET_CODE (x) == PLUS
1200       && GET_CODE (XEXP (x, 0)) == PLUS
1201       && mem_shadd_or_shadd_rtx_p (XEXP (XEXP (x, 0), 0))
1202       && (mode == SFmode || mode == DFmode))
1203     {
1204       int shift_val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
1205 
1206       /* If we were given a MULT, we must fix the constant
1207 	 as we're going to create the ASHIFT form.  */
1208       if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT)
1209 	shift_val = exact_log2 (shift_val);
1210 
1211       /* Try and figure out what to use as a base register.  */
1212       rtx reg1, reg2, base, idx;
1213 
1214       reg1 = XEXP (XEXP (x, 0), 1);
1215       reg2 = XEXP (x, 1);
1216       base = NULL_RTX;
1217       idx = NULL_RTX;
1218 
1219       /* Make sure they're both regs.  If one was a SYMBOL_REF [+ const],
1220 	 then pa_emit_move_sequence will turn on REG_POINTER so we'll know
1221 	 it's a base register below.  */
1222       if (GET_CODE (reg1) != REG)
1223 	reg1 = force_reg (Pmode, force_operand (reg1, 0));
1224 
1225       if (GET_CODE (reg2) != REG)
1226 	reg2 = force_reg (Pmode, force_operand (reg2, 0));
1227 
1228       /* Figure out what the base and index are.  */
1229 
1230       if (GET_CODE (reg1) == REG
1231 	  && REG_POINTER (reg1))
1232 	{
1233 	  base = reg1;
1234 	  idx = gen_rtx_PLUS (Pmode,
1235 			      gen_rtx_ASHIFT (Pmode,
1236 					      XEXP (XEXP (XEXP (x, 0), 0), 0),
1237 					      GEN_INT (shift_val)),
1238 			      XEXP (x, 1));
1239 	}
1240       else if (GET_CODE (reg2) == REG
1241 	       && REG_POINTER (reg2))
1242 	{
1243 	  base = reg2;
1244 	  idx = XEXP (x, 0);
1245 	}
1246 
1247       if (base == 0)
1248 	return orig;
1249 
1250       /* If the index adds a large constant, try to scale the
1251 	 constant so that it can be loaded with only one insn.  */
1252       if (GET_CODE (XEXP (idx, 1)) == CONST_INT
1253 	  && VAL_14_BITS_P (INTVAL (XEXP (idx, 1))
1254 			    / INTVAL (XEXP (XEXP (idx, 0), 1)))
1255 	  && INTVAL (XEXP (idx, 1)) % INTVAL (XEXP (XEXP (idx, 0), 1)) == 0)
1256 	{
1257 	  /* Divide the CONST_INT by the scale factor, then add it to A.  */
1258 	  int val = INTVAL (XEXP (idx, 1));
1259 	  val /= (1 << shift_val);
1260 
1261 	  reg1 = XEXP (XEXP (idx, 0), 0);
1262 	  if (GET_CODE (reg1) != REG)
1263 	    reg1 = force_reg (Pmode, force_operand (reg1, 0));
1264 
1265 	  reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, reg1, GEN_INT (val)));
1266 
1267 	  /* We can now generate a simple scaled indexed address.  */
1268 	  return
1269 	    force_reg
1270 	      (Pmode, gen_rtx_PLUS (Pmode,
1271 				    gen_rtx_ASHIFT (Pmode, reg1,
1272 						    GEN_INT (shift_val)),
1273 				    base));
1274 	}
1275 
1276       /* If B + C is still a valid base register, then add them.  */
1277       if (GET_CODE (XEXP (idx, 1)) == CONST_INT
1278 	  && INTVAL (XEXP (idx, 1)) <= 4096
1279 	  && INTVAL (XEXP (idx, 1)) >= -4096)
1280 	{
1281 	  rtx reg1, reg2;
1282 
1283 	  reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, XEXP (idx, 1)));
1284 
1285 	  reg2 = XEXP (XEXP (idx, 0), 0);
1286 	  if (GET_CODE (reg2) != CONST_INT)
1287 	    reg2 = force_reg (Pmode, force_operand (reg2, 0));
1288 
1289 	  return force_reg (Pmode,
1290 			    gen_rtx_PLUS (Pmode,
1291 					  gen_rtx_ASHIFT (Pmode, reg2,
1292 							  GEN_INT (shift_val)),
1293 					  reg1));
1294 	}
1295 
1296       /* Get the index into a register, then add the base + index and
1297 	 return a register holding the result.  */
1298 
1299       /* First get A into a register.  */
1300       reg1 = XEXP (XEXP (idx, 0), 0);
1301       if (GET_CODE (reg1) != REG)
1302 	reg1 = force_reg (Pmode, force_operand (reg1, 0));
1303 
1304       /* And get B into a register.  */
1305       reg2 = XEXP (idx, 1);
1306       if (GET_CODE (reg2) != REG)
1307 	reg2 = force_reg (Pmode, force_operand (reg2, 0));
1308 
1309       reg1 = force_reg (Pmode,
1310 			gen_rtx_PLUS (Pmode,
1311 				      gen_rtx_ASHIFT (Pmode, reg1,
1312 						      GEN_INT (shift_val)),
1313 				      reg2));
1314 
1315       /* Add the result to our base register and return.  */
1316       return force_reg (Pmode, gen_rtx_PLUS (Pmode, base, reg1));
1317 
1318     }
1319 
1320   /* Uh-oh.  We might have an address for x[n-100000].  This needs
1321      special handling to avoid creating an indexed memory address
1322      with x-100000 as the base.
1323 
1324      If the constant part is small enough, then it's still safe because
1325      there is a guard page at the beginning and end of the data segment.
1326 
1327      Scaled references are common enough that we want to try and rearrange the
1328      terms so that we can use indexing for these addresses too.  Only
1329      do the optimization for floatint point modes.  */
1330 
1331   if (GET_CODE (x) == PLUS
1332       && pa_symbolic_expression_p (XEXP (x, 1)))
1333     {
1334       /* Ugly.  We modify things here so that the address offset specified
1335 	 by the index expression is computed first, then added to x to form
1336 	 the entire address.  */
1337 
1338       rtx regx1, regx2, regy1, regy2, y;
1339 
1340       /* Strip off any CONST.  */
1341       y = XEXP (x, 1);
1342       if (GET_CODE (y) == CONST)
1343 	y = XEXP (y, 0);
1344 
1345       if (GET_CODE (y) == PLUS || GET_CODE (y) == MINUS)
1346 	{
1347 	  /* See if this looks like
1348 		(plus (mult (reg) (mem_shadd_const))
1349 		      (const (plus (symbol_ref) (const_int))))
1350 
1351 	     Where const_int is small.  In that case the const
1352 	     expression is a valid pointer for indexing.
1353 
1354 	     If const_int is big, but can be divided evenly by shadd_const
1355 	     and added to (reg).  This allows more scaled indexed addresses.  */
1356 	  if (GET_CODE (XEXP (y, 0)) == SYMBOL_REF
1357 	      && mem_shadd_or_shadd_rtx_p (XEXP (x, 0))
1358 	      && GET_CODE (XEXP (y, 1)) == CONST_INT
1359 	      && INTVAL (XEXP (y, 1)) >= -4096
1360 	      && INTVAL (XEXP (y, 1)) <= 4095)
1361 	    {
1362 	      int shift_val = INTVAL (XEXP (XEXP (x, 0), 1));
1363 
1364 	      /* If we were given a MULT, we must fix the constant
1365 		 as we're going to create the ASHIFT form.  */
1366 	      if (GET_CODE (XEXP (x, 0)) == MULT)
1367 		shift_val = exact_log2 (shift_val);
1368 
1369 	      rtx reg1, reg2;
1370 
1371 	      reg1 = XEXP (x, 1);
1372 	      if (GET_CODE (reg1) != REG)
1373 		reg1 = force_reg (Pmode, force_operand (reg1, 0));
1374 
1375 	      reg2 = XEXP (XEXP (x, 0), 0);
1376 	      if (GET_CODE (reg2) != REG)
1377 	        reg2 = force_reg (Pmode, force_operand (reg2, 0));
1378 
1379 	      return
1380 		force_reg (Pmode,
1381 			   gen_rtx_PLUS (Pmode,
1382 					 gen_rtx_ASHIFT (Pmode,
1383 							 reg2,
1384 							 GEN_INT (shift_val)),
1385 					 reg1));
1386 	    }
1387 	  else if ((mode == DFmode || mode == SFmode)
1388 		   && GET_CODE (XEXP (y, 0)) == SYMBOL_REF
1389 		   && mem_shadd_or_shadd_rtx_p (XEXP (x, 0))
1390 		   && GET_CODE (XEXP (y, 1)) == CONST_INT
1391 		   && INTVAL (XEXP (y, 1)) % (1 << INTVAL (XEXP (XEXP (x, 0), 1))) == 0)
1392 	    {
1393 	      int shift_val = INTVAL (XEXP (XEXP (x, 0), 1));
1394 
1395 	      /* If we were given a MULT, we must fix the constant
1396 		 as we're going to create the ASHIFT form.  */
1397 	      if (GET_CODE (XEXP (x, 0)) == MULT)
1398 		shift_val = exact_log2 (shift_val);
1399 
1400 	      regx1
1401 		= force_reg (Pmode, GEN_INT (INTVAL (XEXP (y, 1))
1402 					     / INTVAL (XEXP (XEXP (x, 0), 1))));
1403 	      regx2 = XEXP (XEXP (x, 0), 0);
1404 	      if (GET_CODE (regx2) != REG)
1405 		regx2 = force_reg (Pmode, force_operand (regx2, 0));
1406 	      regx2 = force_reg (Pmode, gen_rtx_fmt_ee (GET_CODE (y), Pmode,
1407 							regx2, regx1));
1408 	      return
1409 		force_reg (Pmode,
1410 			   gen_rtx_PLUS (Pmode,
1411 					 gen_rtx_ASHIFT (Pmode, regx2,
1412 						         GEN_INT (shift_val)),
1413 					 force_reg (Pmode, XEXP (y, 0))));
1414 	    }
1415 	  else if (GET_CODE (XEXP (y, 1)) == CONST_INT
1416 		   && INTVAL (XEXP (y, 1)) >= -4096
1417 		   && INTVAL (XEXP (y, 1)) <= 4095)
1418 	    {
1419 	      /* This is safe because of the guard page at the
1420 		 beginning and end of the data space.  Just
1421 		 return the original address.  */
1422 	      return orig;
1423 	    }
1424 	  else
1425 	    {
1426 	      /* Doesn't look like one we can optimize.  */
1427 	      regx1 = force_reg (Pmode, force_operand (XEXP (x, 0), 0));
1428 	      regy1 = force_reg (Pmode, force_operand (XEXP (y, 0), 0));
1429 	      regy2 = force_reg (Pmode, force_operand (XEXP (y, 1), 0));
1430 	      regx1 = force_reg (Pmode,
1431 				 gen_rtx_fmt_ee (GET_CODE (y), Pmode,
1432 						 regx1, regy2));
1433 	      return force_reg (Pmode, gen_rtx_PLUS (Pmode, regx1, regy1));
1434 	    }
1435 	}
1436     }
1437 
1438   return orig;
1439 }
1440 
1441 /* Implement the TARGET_REGISTER_MOVE_COST hook.
1442 
1443    Compute extra cost of moving data between one register class
1444    and another.
1445 
1446    Make moves from SAR so expensive they should never happen.  We used to
1447    have 0xffff here, but that generates overflow in rare cases.
1448 
1449    Copies involving a FP register and a non-FP register are relatively
1450    expensive because they must go through memory.
1451 
1452    Other copies are reasonably cheap.  */
1453 
1454 static int
hppa_register_move_cost(machine_mode mode ATTRIBUTE_UNUSED,reg_class_t from,reg_class_t to)1455 hppa_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
1456 			 reg_class_t from, reg_class_t to)
1457 {
1458   if (from == SHIFT_REGS)
1459     return 0x100;
1460   else if (to == SHIFT_REGS && FP_REG_CLASS_P (from))
1461     return 18;
1462   else if ((FP_REG_CLASS_P (from) && ! FP_REG_CLASS_P (to))
1463            || (FP_REG_CLASS_P (to) && ! FP_REG_CLASS_P (from)))
1464     return 16;
1465   else
1466     return 2;
1467 }
1468 
1469 /* For the HPPA, REG and REG+CONST is cost 0
1470    and addresses involving symbolic constants are cost 2.
1471 
1472    PIC addresses are very expensive.
1473 
1474    It is no coincidence that this has the same structure
1475    as pa_legitimate_address_p.  */
1476 
1477 static int
hppa_address_cost(rtx X,machine_mode mode ATTRIBUTE_UNUSED,addr_space_t as ATTRIBUTE_UNUSED,bool speed ATTRIBUTE_UNUSED)1478 hppa_address_cost (rtx X, machine_mode mode ATTRIBUTE_UNUSED,
1479 		   addr_space_t as ATTRIBUTE_UNUSED,
1480 		   bool speed ATTRIBUTE_UNUSED)
1481 {
1482   switch (GET_CODE (X))
1483     {
1484     case REG:
1485     case PLUS:
1486     case LO_SUM:
1487       return 1;
1488     case HIGH:
1489       return 2;
1490     default:
1491       return 4;
1492     }
1493 }
1494 
1495 /* Return true if X represents a (possibly non-canonical) shNadd pattern.
1496    The machine mode of X is known to be SImode or DImode.  */
1497 
1498 static bool
hppa_rtx_costs_shadd_p(rtx x)1499 hppa_rtx_costs_shadd_p (rtx x)
1500 {
1501   if (GET_CODE (x) != PLUS
1502       || !REG_P (XEXP (x, 1)))
1503     return false;
1504   rtx op0 = XEXP (x, 0);
1505   if (GET_CODE (op0) == ASHIFT
1506       && CONST_INT_P (XEXP (op0, 1))
1507       && REG_P (XEXP (op0, 0)))
1508     {
1509       unsigned HOST_WIDE_INT x = UINTVAL (XEXP (op0, 1));
1510       return x == 1 || x == 2 || x == 3;
1511     }
1512   if (GET_CODE (op0) == MULT
1513       && CONST_INT_P (XEXP (op0, 1))
1514       && REG_P (XEXP (op0, 0)))
1515     {
1516       unsigned HOST_WIDE_INT x = UINTVAL (XEXP (op0, 1));
1517       return x == 2 || x == 4 || x == 8;
1518     }
1519   return false;
1520 }
1521 
1522 /* Compute a (partial) cost for rtx X.  Return true if the complete
1523    cost has been computed, and false if subexpressions should be
1524    scanned.  In either case, *TOTAL contains the cost result.  */
1525 
1526 static bool
hppa_rtx_costs(rtx x,machine_mode mode,int outer_code,int opno ATTRIBUTE_UNUSED,int * total,bool speed)1527 hppa_rtx_costs (rtx x, machine_mode mode, int outer_code,
1528 		int opno ATTRIBUTE_UNUSED,
1529 		int *total, bool speed)
1530 {
1531   int code = GET_CODE (x);
1532 
1533   switch (code)
1534     {
1535     case CONST_INT:
1536       if (outer_code == SET)
1537 	*total = COSTS_N_INSNS (1);
1538       else if (INTVAL (x) == 0)
1539 	*total = 0;
1540       else if (INT_14_BITS (x))
1541 	*total = 1;
1542       else
1543 	*total = 2;
1544       return true;
1545 
1546     case HIGH:
1547       *total = 2;
1548       return true;
1549 
1550     case CONST:
1551     case LABEL_REF:
1552     case SYMBOL_REF:
1553       *total = 4;
1554       return true;
1555 
1556     case CONST_DOUBLE:
1557       if ((x == CONST0_RTX (DFmode) || x == CONST0_RTX (SFmode))
1558 	  && outer_code != SET)
1559 	*total = 0;
1560       else
1561 	*total = 8;
1562       return true;
1563 
1564     case MULT:
1565       if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1566 	{
1567 	  *total = COSTS_N_INSNS (3);
1568 	}
1569       else if (mode == DImode)
1570 	{
1571 	  if (TARGET_PA_11 && !TARGET_DISABLE_FPREGS && !TARGET_SOFT_FLOAT)
1572 	    *total = COSTS_N_INSNS (32);
1573 	  else
1574 	    *total = COSTS_N_INSNS (80);
1575 	}
1576       else
1577 	{
1578 	  if (TARGET_PA_11 && !TARGET_DISABLE_FPREGS && !TARGET_SOFT_FLOAT)
1579 	    *total = COSTS_N_INSNS (8);
1580 	  else
1581 	    *total = COSTS_N_INSNS (20);
1582 	}
1583       return REG_P (XEXP (x, 0)) && REG_P (XEXP (x, 1));
1584 
1585     case DIV:
1586       if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1587 	{
1588 	  *total = COSTS_N_INSNS (14);
1589 	  return false;
1590 	}
1591       /* FALLTHRU */
1592 
1593     case UDIV:
1594     case MOD:
1595     case UMOD:
1596       /* A mode size N times larger than SImode needs O(N*N) more insns.  */
1597       if (mode == DImode)
1598 	*total = COSTS_N_INSNS (240);
1599       else
1600 	*total = COSTS_N_INSNS (60);
1601       return REG_P (XEXP (x, 0)) && REG_P (XEXP (x, 1));
1602 
1603     case PLUS: /* this includes shNadd insns */
1604     case MINUS:
1605       if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1606 	*total = COSTS_N_INSNS (3);
1607       else if (mode == DImode)
1608 	{
1609 	  if (TARGET_64BIT)
1610 	    {
1611 	      *total = COSTS_N_INSNS (1);
1612 	      /* Handle shladd,l instructions.  */
1613 	      if (hppa_rtx_costs_shadd_p (x))
1614 		return true;
1615 	    }
1616 	  else
1617 	    *total = COSTS_N_INSNS (2);
1618 	}
1619       else
1620 	{
1621 	  *total = COSTS_N_INSNS (1);
1622 	  /* Handle shNadd instructions.  */
1623 	  if (hppa_rtx_costs_shadd_p (x))
1624 	    return true;
1625 	}
1626       return REG_P (XEXP (x, 0))
1627 	     && (REG_P (XEXP (x, 1))
1628 		 || CONST_INT_P (XEXP (x, 1)));
1629 
1630     case ASHIFT:
1631       if (mode == DImode)
1632 	{
1633 	  if (REG_P (XEXP (x, 0)) && CONST_INT_P (XEXP (x, 1)))
1634 	    {
1635 	      if (TARGET_64BIT)
1636 		*total = COSTS_N_INSNS (1);
1637 	      else
1638 		*total = COSTS_N_INSNS (2);
1639 	      return true;
1640 	    }
1641 	  else if (TARGET_64BIT)
1642 	    *total = COSTS_N_INSNS (3);
1643 	  else if (speed)
1644 	    *total = COSTS_N_INSNS (13);
1645 	  else
1646 	    *total = COSTS_N_INSNS (18);
1647 	}
1648       else if (REG_P (XEXP (x, 0)) && CONST_INT_P (XEXP (x, 1)))
1649 	{
1650 	  if (TARGET_64BIT)
1651 	    *total = COSTS_N_INSNS (2);
1652 	  else
1653 	    *total = COSTS_N_INSNS (1);
1654 	  return true;
1655 	}
1656       else if (TARGET_64BIT)
1657 	*total = COSTS_N_INSNS (4);
1658       else
1659 	*total = COSTS_N_INSNS (2);
1660       return REG_P (XEXP (x, 0))
1661 	     && (REG_P (XEXP (x, 1))
1662 		 || CONST_INT_P (XEXP (x, 1)));
1663 
1664     case ASHIFTRT:
1665       if (mode == DImode)
1666 	{
1667 	  if (REG_P (XEXP (x, 0)) && CONST_INT_P (XEXP (x, 1)))
1668 	    {
1669 	      if (TARGET_64BIT)
1670 		*total = COSTS_N_INSNS (1);
1671 	      else
1672 		*total = COSTS_N_INSNS (2);
1673 	      return true;
1674 	    }
1675 	  else if (TARGET_64BIT)
1676 	    *total = COSTS_N_INSNS (3);
1677 	  else if (speed)
1678 	    *total = COSTS_N_INSNS (14);
1679 	  else
1680 	    *total = COSTS_N_INSNS (19);
1681 	}
1682       else if (REG_P (XEXP (x, 0)) && CONST_INT_P (XEXP (x, 1)))
1683 	{
1684 	  if (TARGET_64BIT)
1685 	    *total = COSTS_N_INSNS (2);
1686 	  else
1687 	    *total = COSTS_N_INSNS (1);
1688 	  return true;
1689 	}
1690       else if (TARGET_64BIT)
1691 	*total = COSTS_N_INSNS (4);
1692       else
1693 	*total = COSTS_N_INSNS (2);
1694       return REG_P (XEXP (x, 0))
1695 	     && (REG_P (XEXP (x, 1))
1696 		 || CONST_INT_P (XEXP (x, 1)));
1697 
1698     case LSHIFTRT:
1699       if (mode == DImode)
1700 	{
1701 	  if (REG_P (XEXP (x, 0)) && CONST_INT_P (XEXP (x, 1)))
1702 	    {
1703 	      if (TARGET_64BIT)
1704 		*total = COSTS_N_INSNS (1);
1705 	      else
1706 		*total = COSTS_N_INSNS (2);
1707 	      return true;
1708 	    }
1709 	  else if (TARGET_64BIT)
1710 	    *total = COSTS_N_INSNS (2);
1711 	  else if (speed)
1712 	    *total = COSTS_N_INSNS (12);
1713 	  else
1714 	    *total = COSTS_N_INSNS (15);
1715 	}
1716       else if (REG_P (XEXP (x, 0)) && CONST_INT_P (XEXP (x, 1)))
1717 	{
1718 	  *total = COSTS_N_INSNS (1);
1719 	  return true;
1720 	}
1721       else if (TARGET_64BIT)
1722 	*total = COSTS_N_INSNS (3);
1723       else
1724 	*total = COSTS_N_INSNS (2);
1725       return REG_P (XEXP (x, 0))
1726 	     && (REG_P (XEXP (x, 1))
1727 		 || CONST_INT_P (XEXP (x, 1)));
1728 
1729     default:
1730       return false;
1731     }
1732 }
1733 
1734 /* Ensure mode of ORIG, a REG rtx, is MODE.  Returns either ORIG or a
1735    new rtx with the correct mode.  */
1736 static inline rtx
force_mode(machine_mode mode,rtx orig)1737 force_mode (machine_mode mode, rtx orig)
1738 {
1739   if (mode == GET_MODE (orig))
1740     return orig;
1741 
1742   gcc_assert (REGNO (orig) < FIRST_PSEUDO_REGISTER);
1743 
1744   return gen_rtx_REG (mode, REGNO (orig));
1745 }
1746 
1747 /* Implement TARGET_CANNOT_FORCE_CONST_MEM.  */
1748 
1749 static bool
pa_cannot_force_const_mem(machine_mode mode ATTRIBUTE_UNUSED,rtx x)1750 pa_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
1751 {
1752   return tls_referenced_p (x);
1753 }
1754 
1755 /* Emit insns to move operands[1] into operands[0].
1756 
1757    Return 1 if we have written out everything that needs to be done to
1758    do the move.  Otherwise, return 0 and the caller will emit the move
1759    normally.
1760 
1761    Note SCRATCH_REG may not be in the proper mode depending on how it
1762    will be used.  This routine is responsible for creating a new copy
1763    of SCRATCH_REG in the proper mode.  */
1764 
1765 int
pa_emit_move_sequence(rtx * operands,machine_mode mode,rtx scratch_reg)1766 pa_emit_move_sequence (rtx *operands, machine_mode mode, rtx scratch_reg)
1767 {
1768   rtx operand0 = operands[0];
1769   rtx operand1 = operands[1];
1770   rtx tem;
1771 
1772   /* We can only handle indexed addresses in the destination operand
1773      of floating point stores.  Thus, we need to break out indexed
1774      addresses from the destination operand.  */
1775   if (GET_CODE (operand0) == MEM && IS_INDEX_ADDR_P (XEXP (operand0, 0)))
1776     {
1777       gcc_assert (can_create_pseudo_p ());
1778 
1779       tem = copy_to_mode_reg (Pmode, XEXP (operand0, 0));
1780       operand0 = replace_equiv_address (operand0, tem);
1781     }
1782 
1783   /* On targets with non-equivalent space registers, break out unscaled
1784      indexed addresses from the source operand before the final CSE.
1785      We have to do this because the REG_POINTER flag is not correctly
1786      carried through various optimization passes and CSE may substitute
1787      a pseudo without the pointer set for one with the pointer set.  As
1788      a result, we loose various opportunities to create insns with
1789      unscaled indexed addresses.  */
1790   if (!TARGET_NO_SPACE_REGS
1791       && !cse_not_expected
1792       && GET_CODE (operand1) == MEM
1793       && GET_CODE (XEXP (operand1, 0)) == PLUS
1794       && REG_P (XEXP (XEXP (operand1, 0), 0))
1795       && REG_P (XEXP (XEXP (operand1, 0), 1)))
1796     operand1
1797       = replace_equiv_address (operand1,
1798 			       copy_to_mode_reg (Pmode, XEXP (operand1, 0)));
1799 
1800   if (scratch_reg
1801       && reload_in_progress && GET_CODE (operand0) == REG
1802       && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
1803     operand0 = reg_equiv_mem (REGNO (operand0));
1804   else if (scratch_reg
1805 	   && reload_in_progress && GET_CODE (operand0) == SUBREG
1806 	   && GET_CODE (SUBREG_REG (operand0)) == REG
1807 	   && REGNO (SUBREG_REG (operand0)) >= FIRST_PSEUDO_REGISTER)
1808     {
1809      /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1810 	the code which tracks sets/uses for delete_output_reload.  */
1811       rtx temp = gen_rtx_SUBREG (GET_MODE (operand0),
1812 				 reg_equiv_mem (REGNO (SUBREG_REG (operand0))),
1813 				 SUBREG_BYTE (operand0));
1814       operand0 = alter_subreg (&temp, true);
1815     }
1816 
1817   if (scratch_reg
1818       && reload_in_progress && GET_CODE (operand1) == REG
1819       && REGNO (operand1) >= FIRST_PSEUDO_REGISTER)
1820     operand1 = reg_equiv_mem (REGNO (operand1));
1821   else if (scratch_reg
1822 	   && reload_in_progress && GET_CODE (operand1) == SUBREG
1823 	   && GET_CODE (SUBREG_REG (operand1)) == REG
1824 	   && REGNO (SUBREG_REG (operand1)) >= FIRST_PSEUDO_REGISTER)
1825     {
1826      /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1827 	the code which tracks sets/uses for delete_output_reload.  */
1828       rtx temp = gen_rtx_SUBREG (GET_MODE (operand1),
1829 				 reg_equiv_mem (REGNO (SUBREG_REG (operand1))),
1830 				 SUBREG_BYTE (operand1));
1831       operand1 = alter_subreg (&temp, true);
1832     }
1833 
1834   if (scratch_reg && reload_in_progress && GET_CODE (operand0) == MEM
1835       && ((tem = find_replacement (&XEXP (operand0, 0)))
1836 	  != XEXP (operand0, 0)))
1837     operand0 = replace_equiv_address (operand0, tem);
1838 
1839   if (scratch_reg && reload_in_progress && GET_CODE (operand1) == MEM
1840       && ((tem = find_replacement (&XEXP (operand1, 0)))
1841 	  != XEXP (operand1, 0)))
1842     operand1 = replace_equiv_address (operand1, tem);
1843 
1844   /* Handle secondary reloads for loads/stores of FP registers from
1845      REG+D addresses where D does not fit in 5 or 14 bits, including
1846      (subreg (mem (addr))) cases, and reloads for other unsupported
1847      memory operands.  */
1848   if (scratch_reg
1849       && FP_REG_P (operand0)
1850       && (MEM_P (operand1)
1851 	  || (GET_CODE (operand1) == SUBREG
1852 	      && MEM_P (XEXP (operand1, 0)))))
1853     {
1854       rtx op1 = operand1;
1855 
1856       if (GET_CODE (op1) == SUBREG)
1857 	op1 = XEXP (op1, 0);
1858 
1859       if (reg_plus_base_memory_operand (op1, GET_MODE (op1)))
1860 	{
1861 	  if (!(TARGET_PA_20
1862 		&& !TARGET_ELF32
1863 		&& INT_14_BITS (XEXP (XEXP (op1, 0), 1)))
1864 	      && !INT_5_BITS (XEXP (XEXP (op1, 0), 1)))
1865 	    {
1866 	      /* SCRATCH_REG will hold an address and maybe the actual data.
1867 		 We want it in WORD_MODE regardless of what mode it was
1868 		 originally given to us.  */
1869 	      scratch_reg = force_mode (word_mode, scratch_reg);
1870 
1871 	      /* D might not fit in 14 bits either; for such cases load D
1872 		 into scratch reg.  */
1873 	      if (!INT_14_BITS (XEXP (XEXP (op1, 0), 1)))
1874 		{
1875 		  emit_move_insn (scratch_reg, XEXP (XEXP (op1, 0), 1));
1876 		  emit_move_insn (scratch_reg,
1877 				  gen_rtx_fmt_ee (GET_CODE (XEXP (op1, 0)),
1878 						  Pmode,
1879 						  XEXP (XEXP (op1, 0), 0),
1880 						  scratch_reg));
1881 		}
1882 	      else
1883 		emit_move_insn (scratch_reg, XEXP (op1, 0));
1884 	      op1 = replace_equiv_address (op1, scratch_reg);
1885 	    }
1886 	}
1887       else if ((!INT14_OK_STRICT && symbolic_memory_operand (op1, VOIDmode))
1888 	       || IS_LO_SUM_DLT_ADDR_P (XEXP (op1, 0))
1889 	       || IS_INDEX_ADDR_P (XEXP (op1, 0)))
1890 	{
1891 	  /* Load memory address into SCRATCH_REG.  */
1892 	  scratch_reg = force_mode (word_mode, scratch_reg);
1893 	  emit_move_insn (scratch_reg, XEXP (op1, 0));
1894 	  op1 = replace_equiv_address (op1, scratch_reg);
1895 	}
1896       emit_insn (gen_rtx_SET (operand0, op1));
1897       return 1;
1898     }
1899   else if (scratch_reg
1900 	   && FP_REG_P (operand1)
1901 	   && (MEM_P (operand0)
1902 	       || (GET_CODE (operand0) == SUBREG
1903 		   && MEM_P (XEXP (operand0, 0)))))
1904     {
1905       rtx op0 = operand0;
1906 
1907       if (GET_CODE (op0) == SUBREG)
1908 	op0 = XEXP (op0, 0);
1909 
1910       if (reg_plus_base_memory_operand (op0, GET_MODE (op0)))
1911 	{
1912 	  if (!(TARGET_PA_20
1913 		&& !TARGET_ELF32
1914 		&& INT_14_BITS (XEXP (XEXP (op0, 0), 1)))
1915 	      && !INT_5_BITS (XEXP (XEXP (op0, 0), 1)))
1916 	    {
1917 	      /* SCRATCH_REG will hold an address and maybe the actual data.
1918 		 We want it in WORD_MODE regardless of what mode it was
1919 		 originally given to us.  */
1920 	      scratch_reg = force_mode (word_mode, scratch_reg);
1921 
1922 	      /* D might not fit in 14 bits either; for such cases load D
1923 		 into scratch reg.  */
1924 	      if (!INT_14_BITS (XEXP (XEXP (op0, 0), 1)))
1925 		{
1926 		  emit_move_insn (scratch_reg, XEXP (XEXP (op0, 0), 1));
1927 		  emit_move_insn (scratch_reg,
1928 				  gen_rtx_fmt_ee (GET_CODE (XEXP (op0, 0)),
1929 						  Pmode,
1930 						  XEXP (XEXP (op0, 0), 0),
1931 						  scratch_reg));
1932 		}
1933 	      else
1934 		emit_move_insn (scratch_reg, XEXP (op0, 0));
1935 	      op0 = replace_equiv_address (op0, scratch_reg);
1936 	    }
1937 	}
1938       else if ((!INT14_OK_STRICT && symbolic_memory_operand (op0, VOIDmode))
1939 	       || IS_LO_SUM_DLT_ADDR_P (XEXP (op0, 0))
1940 	       || IS_INDEX_ADDR_P (XEXP (op0, 0)))
1941 	{
1942 	  /* Load memory address into SCRATCH_REG.  */
1943 	  scratch_reg = force_mode (word_mode, scratch_reg);
1944 	  emit_move_insn (scratch_reg, XEXP (op0, 0));
1945 	  op0 = replace_equiv_address (op0, scratch_reg);
1946 	}
1947       emit_insn (gen_rtx_SET (op0, operand1));
1948       return 1;
1949     }
1950   /* Handle secondary reloads for loads of FP registers from constant
1951      expressions by forcing the constant into memory.  For the most part,
1952      this is only necessary for SImode and DImode.
1953 
1954      Use scratch_reg to hold the address of the memory location.  */
1955   else if (scratch_reg
1956 	   && CONSTANT_P (operand1)
1957 	   && FP_REG_P (operand0))
1958     {
1959       rtx const_mem, xoperands[2];
1960 
1961       if (operand1 == CONST0_RTX (mode))
1962 	{
1963 	  emit_insn (gen_rtx_SET (operand0, operand1));
1964 	  return 1;
1965 	}
1966 
1967       /* SCRATCH_REG will hold an address and maybe the actual data.  We want
1968 	 it in WORD_MODE regardless of what mode it was originally given
1969 	 to us.  */
1970       scratch_reg = force_mode (word_mode, scratch_reg);
1971 
1972       /* Force the constant into memory and put the address of the
1973 	 memory location into scratch_reg.  */
1974       const_mem = force_const_mem (mode, operand1);
1975       xoperands[0] = scratch_reg;
1976       xoperands[1] = XEXP (const_mem, 0);
1977       pa_emit_move_sequence (xoperands, Pmode, 0);
1978 
1979       /* Now load the destination register.  */
1980       emit_insn (gen_rtx_SET (operand0,
1981 			      replace_equiv_address (const_mem, scratch_reg)));
1982       return 1;
1983     }
1984   /* Handle secondary reloads for SAR.  These occur when trying to load
1985      the SAR from memory or a constant.  */
1986   else if (scratch_reg
1987 	   && GET_CODE (operand0) == REG
1988 	   && REGNO (operand0) < FIRST_PSEUDO_REGISTER
1989 	   && REGNO_REG_CLASS (REGNO (operand0)) == SHIFT_REGS
1990 	   && (GET_CODE (operand1) == MEM || GET_CODE (operand1) == CONST_INT))
1991     {
1992       /* D might not fit in 14 bits either; for such cases load D into
1993 	 scratch reg.  */
1994       if (GET_CODE (operand1) == MEM
1995 	  && !memory_address_p (GET_MODE (operand0), XEXP (operand1, 0)))
1996 	{
1997 	  /* We are reloading the address into the scratch register, so we
1998 	     want to make sure the scratch register is a full register.  */
1999 	  scratch_reg = force_mode (word_mode, scratch_reg);
2000 
2001 	  emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1));
2002 	  emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand1,
2003 								        0)),
2004 						       Pmode,
2005 						       XEXP (XEXP (operand1, 0),
2006 						       0),
2007 						       scratch_reg));
2008 
2009 	  /* Now we are going to load the scratch register from memory,
2010 	     we want to load it in the same width as the original MEM,
2011 	     which must be the same as the width of the ultimate destination,
2012 	     OPERAND0.  */
2013 	  scratch_reg = force_mode (GET_MODE (operand0), scratch_reg);
2014 
2015 	  emit_move_insn (scratch_reg,
2016 			  replace_equiv_address (operand1, scratch_reg));
2017 	}
2018       else
2019 	{
2020 	  /* We want to load the scratch register using the same mode as
2021 	     the ultimate destination.  */
2022 	  scratch_reg = force_mode (GET_MODE (operand0), scratch_reg);
2023 
2024 	  emit_move_insn (scratch_reg, operand1);
2025 	}
2026 
2027       /* And emit the insn to set the ultimate destination.  We know that
2028 	 the scratch register has the same mode as the destination at this
2029 	 point.  */
2030       emit_move_insn (operand0, scratch_reg);
2031       return 1;
2032     }
2033 
2034   /* Handle the most common case: storing into a register.  */
2035   if (register_operand (operand0, mode))
2036     {
2037       /* Legitimize TLS symbol references.  This happens for references
2038 	 that aren't a legitimate constant.  */
2039       if (PA_SYMBOL_REF_TLS_P (operand1))
2040 	operand1 = legitimize_tls_address (operand1);
2041 
2042       if (register_operand (operand1, mode)
2043 	  || (GET_CODE (operand1) == CONST_INT
2044 	      && pa_cint_ok_for_move (UINTVAL (operand1)))
2045 	  || (operand1 == CONST0_RTX (mode))
2046 	  || (GET_CODE (operand1) == HIGH
2047 	      && !symbolic_operand (XEXP (operand1, 0), VOIDmode))
2048 	  /* Only `general_operands' can come here, so MEM is ok.  */
2049 	  || GET_CODE (operand1) == MEM)
2050 	{
2051 	  /* Various sets are created during RTL generation which don't
2052 	     have the REG_POINTER flag correctly set.  After the CSE pass,
2053 	     instruction recognition can fail if we don't consistently
2054 	     set this flag when performing register copies.  This should
2055 	     also improve the opportunities for creating insns that use
2056 	     unscaled indexing.  */
2057 	  if (REG_P (operand0) && REG_P (operand1))
2058 	    {
2059 	      if (REG_POINTER (operand1)
2060 		  && !REG_POINTER (operand0)
2061 		  && !HARD_REGISTER_P (operand0))
2062 		copy_reg_pointer (operand0, operand1);
2063 	    }
2064 
2065 	  /* When MEMs are broken out, the REG_POINTER flag doesn't
2066 	     get set.  In some cases, we can set the REG_POINTER flag
2067 	     from the declaration for the MEM.  */
2068 	  if (REG_P (operand0)
2069 	      && GET_CODE (operand1) == MEM
2070 	      && !REG_POINTER (operand0))
2071 	    {
2072 	      tree decl = MEM_EXPR (operand1);
2073 
2074 	      /* Set the register pointer flag and register alignment
2075 		 if the declaration for this memory reference is a
2076 		 pointer type.  */
2077 	      if (decl)
2078 		{
2079 		  tree type;
2080 
2081 		  /* If this is a COMPONENT_REF, use the FIELD_DECL from
2082 		     tree operand 1.  */
2083 		  if (TREE_CODE (decl) == COMPONENT_REF)
2084 		    decl = TREE_OPERAND (decl, 1);
2085 
2086 		  type = TREE_TYPE (decl);
2087 		  type = strip_array_types (type);
2088 
2089 		  if (POINTER_TYPE_P (type))
2090 		    mark_reg_pointer (operand0, BITS_PER_UNIT);
2091 		}
2092 	    }
2093 
2094 	  emit_insn (gen_rtx_SET (operand0, operand1));
2095 	  return 1;
2096 	}
2097     }
2098   else if (GET_CODE (operand0) == MEM)
2099     {
2100       if (mode == DFmode && operand1 == CONST0_RTX (mode)
2101 	  && !(reload_in_progress || reload_completed))
2102 	{
2103 	  rtx temp = gen_reg_rtx (DFmode);
2104 
2105 	  emit_insn (gen_rtx_SET (temp, operand1));
2106 	  emit_insn (gen_rtx_SET (operand0, temp));
2107 	  return 1;
2108 	}
2109       if (register_operand (operand1, mode) || operand1 == CONST0_RTX (mode))
2110 	{
2111 	  /* Run this case quickly.  */
2112 	  emit_insn (gen_rtx_SET (operand0, operand1));
2113 	  return 1;
2114 	}
2115       if (! (reload_in_progress || reload_completed))
2116 	{
2117 	  operands[0] = validize_mem (operand0);
2118 	  operands[1] = operand1 = force_reg (mode, operand1);
2119 	}
2120     }
2121 
2122   /* Simplify the source if we need to.
2123      Note we do have to handle function labels here, even though we do
2124      not consider them legitimate constants.  Loop optimizations can
2125      call the emit_move_xxx with one as a source.  */
2126   if ((GET_CODE (operand1) != HIGH && immediate_operand (operand1, mode))
2127       || (GET_CODE (operand1) == HIGH
2128 	  && symbolic_operand (XEXP (operand1, 0), mode))
2129       || function_label_operand (operand1, VOIDmode)
2130       || tls_referenced_p (operand1))
2131     {
2132       int ishighonly = 0;
2133 
2134       if (GET_CODE (operand1) == HIGH)
2135 	{
2136 	  ishighonly = 1;
2137 	  operand1 = XEXP (operand1, 0);
2138 	}
2139       if (symbolic_operand (operand1, mode))
2140 	{
2141 	  /* Argh.  The assembler and linker can't handle arithmetic
2142 	     involving plabels.
2143 
2144 	     So we force the plabel into memory, load operand0 from
2145 	     the memory location, then add in the constant part.  */
2146 	  if ((GET_CODE (operand1) == CONST
2147 	       && GET_CODE (XEXP (operand1, 0)) == PLUS
2148 	       && function_label_operand (XEXP (XEXP (operand1, 0), 0),
2149 					  VOIDmode))
2150 	      || function_label_operand (operand1, VOIDmode))
2151 	    {
2152 	      rtx temp, const_part;
2153 
2154 	      /* Figure out what (if any) scratch register to use.  */
2155 	      if (reload_in_progress || reload_completed)
2156 		{
2157 		  scratch_reg = scratch_reg ? scratch_reg : operand0;
2158 		  /* SCRATCH_REG will hold an address and maybe the actual
2159 		     data.  We want it in WORD_MODE regardless of what mode it
2160 		     was originally given to us.  */
2161 		  scratch_reg = force_mode (word_mode, scratch_reg);
2162 		}
2163 	      else if (flag_pic)
2164 		scratch_reg = gen_reg_rtx (Pmode);
2165 
2166 	      if (GET_CODE (operand1) == CONST)
2167 		{
2168 		  /* Save away the constant part of the expression.  */
2169 		  const_part = XEXP (XEXP (operand1, 0), 1);
2170 		  gcc_assert (GET_CODE (const_part) == CONST_INT);
2171 
2172 		  /* Force the function label into memory.  */
2173 		  temp = force_const_mem (mode, XEXP (XEXP (operand1, 0), 0));
2174 		}
2175 	      else
2176 		{
2177 		  /* No constant part.  */
2178 		  const_part = NULL_RTX;
2179 
2180 		  /* Force the function label into memory.  */
2181 		  temp = force_const_mem (mode, operand1);
2182 		}
2183 
2184 
2185 	      /* Get the address of the memory location.  PIC-ify it if
2186 		 necessary.  */
2187 	      temp = XEXP (temp, 0);
2188 	      if (flag_pic)
2189 		temp = legitimize_pic_address (temp, mode, scratch_reg);
2190 
2191 	      /* Put the address of the memory location into our destination
2192 		 register.  */
2193 	      operands[1] = temp;
2194 	      pa_emit_move_sequence (operands, mode, scratch_reg);
2195 
2196 	      /* Now load from the memory location into our destination
2197 		 register.  */
2198 	      operands[1] = gen_rtx_MEM (Pmode, operands[0]);
2199 	      pa_emit_move_sequence (operands, mode, scratch_reg);
2200 
2201 	      /* And add back in the constant part.  */
2202 	      if (const_part != NULL_RTX)
2203 		expand_inc (operand0, const_part);
2204 
2205 	      return 1;
2206 	    }
2207 
2208 	  if (flag_pic)
2209 	    {
2210 	      rtx_insn *insn;
2211 	      rtx temp;
2212 
2213 	      if (reload_in_progress || reload_completed)
2214 		{
2215 		  temp = scratch_reg ? scratch_reg : operand0;
2216 		  /* TEMP will hold an address and maybe the actual
2217 		     data.  We want it in WORD_MODE regardless of what mode it
2218 		     was originally given to us.  */
2219 		  temp = force_mode (word_mode, temp);
2220 		}
2221 	      else
2222 		temp = gen_reg_rtx (Pmode);
2223 
2224 	      /* Force (const (plus (symbol) (const_int))) to memory
2225 	         if the const_int will not fit in 14 bits.  Although
2226 		 this requires a relocation, the instruction sequence
2227 		 needed to load the value is shorter.  */
2228 	      if (GET_CODE (operand1) == CONST
2229 		       && GET_CODE (XEXP (operand1, 0)) == PLUS
2230 		       && GET_CODE (XEXP (XEXP (operand1, 0), 1)) == CONST_INT
2231 		       && !INT_14_BITS (XEXP (XEXP (operand1, 0), 1)))
2232 		{
2233 		  rtx x, m = force_const_mem (mode, operand1);
2234 
2235 		  x = legitimize_pic_address (XEXP (m, 0), mode, temp);
2236 		  x = replace_equiv_address (m, x);
2237 		  insn = emit_move_insn (operand0, x);
2238 		}
2239 	      else
2240 		{
2241 		  operands[1] = legitimize_pic_address (operand1, mode, temp);
2242 		  if (REG_P (operand0) && REG_P (operands[1]))
2243 		    copy_reg_pointer (operand0, operands[1]);
2244 		  insn = emit_move_insn (operand0, operands[1]);
2245 		}
2246 
2247 	      /* Put a REG_EQUAL note on this insn.  */
2248 	      set_unique_reg_note (insn, REG_EQUAL, operand1);
2249 	    }
2250 	  /* On the HPPA, references to data space are supposed to use dp,
2251 	     register 27, but showing it in the RTL inhibits various cse
2252 	     and loop optimizations.  */
2253 	  else
2254 	    {
2255 	      rtx temp, set;
2256 
2257 	      if (reload_in_progress || reload_completed)
2258 		{
2259 		  temp = scratch_reg ? scratch_reg : operand0;
2260 		  /* TEMP will hold an address and maybe the actual
2261 		     data.  We want it in WORD_MODE regardless of what mode it
2262 		     was originally given to us.  */
2263 		  temp = force_mode (word_mode, temp);
2264 		}
2265 	      else
2266 		temp = gen_reg_rtx (mode);
2267 
2268 	      /* Loading a SYMBOL_REF into a register makes that register
2269 		 safe to be used as the base in an indexed address.
2270 
2271 		 Don't mark hard registers though.  That loses.  */
2272 	      if (GET_CODE (operand0) == REG
2273 		  && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
2274 		mark_reg_pointer (operand0, BITS_PER_UNIT);
2275 	      if (REGNO (temp) >= FIRST_PSEUDO_REGISTER)
2276 		mark_reg_pointer (temp, BITS_PER_UNIT);
2277 
2278 	      if (ishighonly)
2279 		set = gen_rtx_SET (operand0, temp);
2280 	      else
2281 		set = gen_rtx_SET (operand0,
2282 				   gen_rtx_LO_SUM (mode, temp, operand1));
2283 
2284 	      emit_insn (gen_rtx_SET (temp, gen_rtx_HIGH (mode, operand1)));
2285 	      emit_insn (set);
2286 
2287 	    }
2288 	  return 1;
2289 	}
2290       else if (tls_referenced_p (operand1))
2291 	{
2292 	  rtx tmp = operand1;
2293 	  rtx addend = NULL;
2294 
2295 	  if (GET_CODE (tmp) == CONST && GET_CODE (XEXP (tmp, 0)) == PLUS)
2296 	    {
2297 	      addend = XEXP (XEXP (tmp, 0), 1);
2298 	      tmp = XEXP (XEXP (tmp, 0), 0);
2299 	    }
2300 
2301 	  gcc_assert (GET_CODE (tmp) == SYMBOL_REF);
2302 	  tmp = legitimize_tls_address (tmp);
2303 	  if (addend)
2304 	    {
2305 	      tmp = gen_rtx_PLUS (mode, tmp, addend);
2306 	      tmp = force_operand (tmp, operands[0]);
2307 	    }
2308 	  operands[1] = tmp;
2309 	}
2310       else if (GET_CODE (operand1) != CONST_INT
2311 	       || !pa_cint_ok_for_move (UINTVAL (operand1)))
2312 	{
2313 	  rtx temp;
2314 	  rtx_insn *insn;
2315 	  rtx op1 = operand1;
2316 	  HOST_WIDE_INT value = 0;
2317 	  HOST_WIDE_INT insv = 0;
2318 	  int insert = 0;
2319 
2320 	  if (GET_CODE (operand1) == CONST_INT)
2321 	    value = INTVAL (operand1);
2322 
2323 	  if (TARGET_64BIT
2324 	      && GET_CODE (operand1) == CONST_INT
2325 	      && HOST_BITS_PER_WIDE_INT > 32
2326 	      && GET_MODE_BITSIZE (GET_MODE (operand0)) > 32)
2327 	    {
2328 	      HOST_WIDE_INT nval;
2329 
2330 	      /* Extract the low order 32 bits of the value and sign extend.
2331 		 If the new value is the same as the original value, we can
2332 		 can use the original value as-is.  If the new value is
2333 		 different, we use it and insert the most-significant 32-bits
2334 		 of the original value into the final result.  */
2335 	      nval = ((value & (((HOST_WIDE_INT) 2 << 31) - 1))
2336 		      ^ ((HOST_WIDE_INT) 1 << 31)) - ((HOST_WIDE_INT) 1 << 31);
2337 	      if (value != nval)
2338 		{
2339 #if HOST_BITS_PER_WIDE_INT > 32
2340 		  insv = value >= 0 ? value >> 32 : ~(~value >> 32);
2341 #endif
2342 		  insert = 1;
2343 		  value = nval;
2344 		  operand1 = GEN_INT (nval);
2345 		}
2346 	    }
2347 
2348 	  if (reload_in_progress || reload_completed)
2349 	    temp = scratch_reg ? scratch_reg : operand0;
2350 	  else
2351 	    temp = gen_reg_rtx (mode);
2352 
2353 	  /* We don't directly split DImode constants on 32-bit targets
2354 	     because PLUS uses an 11-bit immediate and the insn sequence
2355 	     generated is not as efficient as the one using HIGH/LO_SUM.  */
2356 	  if (GET_CODE (operand1) == CONST_INT
2357 	      && GET_MODE_BITSIZE (mode) <= BITS_PER_WORD
2358 	      && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
2359 	      && !insert)
2360 	    {
2361 	      /* Directly break constant into high and low parts.  This
2362 		 provides better optimization opportunities because various
2363 		 passes recognize constants split with PLUS but not LO_SUM.
2364 		 We use a 14-bit signed low part except when the addition
2365 		 of 0x4000 to the high part might change the sign of the
2366 		 high part.  */
2367 	      HOST_WIDE_INT low = value & 0x3fff;
2368 	      HOST_WIDE_INT high = value & ~ 0x3fff;
2369 
2370 	      if (low >= 0x2000)
2371 		{
2372 		  if (high == 0x7fffc000 || (mode == HImode && high == 0x4000))
2373 		    high += 0x2000;
2374 		  else
2375 		    high += 0x4000;
2376 		}
2377 
2378 	      low = value - high;
2379 
2380 	      emit_insn (gen_rtx_SET (temp, GEN_INT (high)));
2381 	      operands[1] = gen_rtx_PLUS (mode, temp, GEN_INT (low));
2382 	    }
2383 	  else
2384 	    {
2385 	      emit_insn (gen_rtx_SET (temp, gen_rtx_HIGH (mode, operand1)));
2386 	      operands[1] = gen_rtx_LO_SUM (mode, temp, operand1);
2387 	    }
2388 
2389 	  insn = emit_move_insn (operands[0], operands[1]);
2390 
2391 	  /* Now insert the most significant 32 bits of the value
2392 	     into the register.  When we don't have a second register
2393 	     available, it could take up to nine instructions to load
2394 	     a 64-bit integer constant.  Prior to reload, we force
2395 	     constants that would take more than three instructions
2396 	     to load to the constant pool.  During and after reload,
2397 	     we have to handle all possible values.  */
2398 	  if (insert)
2399 	    {
2400 	      /* Use a HIGH/LO_SUM/INSV sequence if we have a second
2401 		 register and the value to be inserted is outside the
2402 		 range that can be loaded with three depdi instructions.  */
2403 	      if (temp != operand0 && (insv >= 16384 || insv < -16384))
2404 		{
2405 		  operand1 = GEN_INT (insv);
2406 
2407 		  emit_insn (gen_rtx_SET (temp,
2408 					  gen_rtx_HIGH (mode, operand1)));
2409 		  emit_move_insn (temp, gen_rtx_LO_SUM (mode, temp, operand1));
2410 		  if (mode == DImode)
2411 		    insn = emit_insn (gen_insvdi (operand0, GEN_INT (32),
2412 						  const0_rtx, temp));
2413 		  else
2414 		    insn = emit_insn (gen_insvsi (operand0, GEN_INT (32),
2415 						  const0_rtx, temp));
2416 		}
2417 	      else
2418 		{
2419 		  int len = 5, pos = 27;
2420 
2421 		  /* Insert the bits using the depdi instruction.  */
2422 		  while (pos >= 0)
2423 		    {
2424 		      HOST_WIDE_INT v5 = ((insv & 31) ^ 16) - 16;
2425 		      HOST_WIDE_INT sign = v5 < 0;
2426 
2427 		      /* Left extend the insertion.  */
2428 		      insv = (insv >= 0 ? insv >> len : ~(~insv >> len));
2429 		      while (pos > 0 && (insv & 1) == sign)
2430 			{
2431 			  insv = (insv >= 0 ? insv >> 1 : ~(~insv >> 1));
2432 			  len += 1;
2433 			  pos -= 1;
2434 			}
2435 
2436 		      if (mode == DImode)
2437 			insn = emit_insn (gen_insvdi (operand0,
2438 						      GEN_INT (len),
2439 						      GEN_INT (pos),
2440 						      GEN_INT (v5)));
2441 		      else
2442 			insn = emit_insn (gen_insvsi (operand0,
2443 						      GEN_INT (len),
2444 						      GEN_INT (pos),
2445 						      GEN_INT (v5)));
2446 
2447 		      len = pos > 0 && pos < 5 ? pos : 5;
2448 		      pos -= len;
2449 		    }
2450 		}
2451 	    }
2452 
2453 	  set_unique_reg_note (insn, REG_EQUAL, op1);
2454 
2455 	  return 1;
2456 	}
2457     }
2458   /* Now have insn-emit do whatever it normally does.  */
2459   return 0;
2460 }
2461 
2462 /* Examine EXP and return nonzero if it contains an ADDR_EXPR (meaning
2463    it will need a link/runtime reloc).  */
2464 
2465 int
pa_reloc_needed(tree exp)2466 pa_reloc_needed (tree exp)
2467 {
2468   int reloc = 0;
2469 
2470   switch (TREE_CODE (exp))
2471     {
2472     case ADDR_EXPR:
2473       return 1;
2474 
2475     case POINTER_PLUS_EXPR:
2476     case PLUS_EXPR:
2477     case MINUS_EXPR:
2478       reloc = pa_reloc_needed (TREE_OPERAND (exp, 0));
2479       reloc |= pa_reloc_needed (TREE_OPERAND (exp, 1));
2480       break;
2481 
2482     CASE_CONVERT:
2483     case NON_LVALUE_EXPR:
2484       reloc = pa_reloc_needed (TREE_OPERAND (exp, 0));
2485       break;
2486 
2487     case CONSTRUCTOR:
2488       {
2489 	tree value;
2490 	unsigned HOST_WIDE_INT ix;
2491 
2492 	FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (exp), ix, value)
2493 	  if (value)
2494 	    reloc |= pa_reloc_needed (value);
2495       }
2496       break;
2497 
2498     case ERROR_MARK:
2499       break;
2500 
2501     default:
2502       break;
2503     }
2504   return reloc;
2505 }
2506 
2507 
2508 /* Return the best assembler insn template
2509    for moving operands[1] into operands[0] as a fullword.  */
2510 const char *
pa_singlemove_string(rtx * operands)2511 pa_singlemove_string (rtx *operands)
2512 {
2513   HOST_WIDE_INT intval;
2514 
2515   if (GET_CODE (operands[0]) == MEM)
2516     return "stw %r1,%0";
2517   if (GET_CODE (operands[1]) == MEM)
2518     return "ldw %1,%0";
2519   if (GET_CODE (operands[1]) == CONST_DOUBLE)
2520     {
2521       long i;
2522 
2523       gcc_assert (GET_MODE (operands[1]) == SFmode);
2524 
2525       /* Translate the CONST_DOUBLE to a CONST_INT with the same target
2526 	 bit pattern.  */
2527       REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (operands[1]), i);
2528 
2529       operands[1] = GEN_INT (i);
2530       /* Fall through to CONST_INT case.  */
2531     }
2532   if (GET_CODE (operands[1]) == CONST_INT)
2533     {
2534       intval = INTVAL (operands[1]);
2535 
2536       if (VAL_14_BITS_P (intval))
2537 	return "ldi %1,%0";
2538       else if ((intval & 0x7ff) == 0)
2539 	return "ldil L'%1,%0";
2540       else if (pa_zdepi_cint_p (intval))
2541 	return "{zdepi %Z1,%0|depwi,z %Z1,%0}";
2542       else
2543 	return "ldil L'%1,%0\n\tldo R'%1(%0),%0";
2544     }
2545   return "copy %1,%0";
2546 }
2547 
2548 
2549 /* Compute position (in OP[1]) and width (in OP[2])
2550    useful for copying IMM to a register using the zdepi
2551    instructions.  Store the immediate value to insert in OP[0].  */
2552 static void
compute_zdepwi_operands(unsigned HOST_WIDE_INT imm,unsigned * op)2553 compute_zdepwi_operands (unsigned HOST_WIDE_INT imm, unsigned *op)
2554 {
2555   int lsb, len;
2556 
2557   /* Find the least significant set bit in IMM.  */
2558   for (lsb = 0; lsb < 32; lsb++)
2559     {
2560       if ((imm & 1) != 0)
2561         break;
2562       imm >>= 1;
2563     }
2564 
2565   /* Choose variants based on *sign* of the 5-bit field.  */
2566   if ((imm & 0x10) == 0)
2567     len = (lsb <= 28) ? 4 : 32 - lsb;
2568   else
2569     {
2570       /* Find the width of the bitstring in IMM.  */
2571       for (len = 5; len < 32 - lsb; len++)
2572 	{
2573 	  if ((imm & ((unsigned HOST_WIDE_INT) 1 << len)) == 0)
2574 	    break;
2575 	}
2576 
2577       /* Sign extend IMM as a 5-bit value.  */
2578       imm = (imm & 0xf) - 0x10;
2579     }
2580 
2581   op[0] = imm;
2582   op[1] = 31 - lsb;
2583   op[2] = len;
2584 }
2585 
2586 /* Compute position (in OP[1]) and width (in OP[2])
2587    useful for copying IMM to a register using the depdi,z
2588    instructions.  Store the immediate value to insert in OP[0].  */
2589 
2590 static void
compute_zdepdi_operands(unsigned HOST_WIDE_INT imm,unsigned * op)2591 compute_zdepdi_operands (unsigned HOST_WIDE_INT imm, unsigned *op)
2592 {
2593   int lsb, len, maxlen;
2594 
2595   maxlen = MIN (HOST_BITS_PER_WIDE_INT, 64);
2596 
2597   /* Find the least significant set bit in IMM.  */
2598   for (lsb = 0; lsb < maxlen; lsb++)
2599     {
2600       if ((imm & 1) != 0)
2601         break;
2602       imm >>= 1;
2603     }
2604 
2605   /* Choose variants based on *sign* of the 5-bit field.  */
2606   if ((imm & 0x10) == 0)
2607     len = (lsb <= maxlen - 4) ? 4 : maxlen - lsb;
2608   else
2609     {
2610       /* Find the width of the bitstring in IMM.  */
2611       for (len = 5; len < maxlen - lsb; len++)
2612 	{
2613 	  if ((imm & ((unsigned HOST_WIDE_INT) 1 << len)) == 0)
2614 	    break;
2615 	}
2616 
2617       /* Extend length if host is narrow and IMM is negative.  */
2618       if (HOST_BITS_PER_WIDE_INT == 32 && len == maxlen - lsb)
2619 	len += 32;
2620 
2621       /* Sign extend IMM as a 5-bit value.  */
2622       imm = (imm & 0xf) - 0x10;
2623     }
2624 
2625   op[0] = imm;
2626   op[1] = 63 - lsb;
2627   op[2] = len;
2628 }
2629 
2630 /* Output assembler code to perform a doubleword move insn
2631    with operands OPERANDS.  */
2632 
2633 const char *
pa_output_move_double(rtx * operands)2634 pa_output_move_double (rtx *operands)
2635 {
2636   enum { REGOP, OFFSOP, MEMOP, CNSTOP, RNDOP } optype0, optype1;
2637   rtx latehalf[2];
2638   rtx addreg0 = 0, addreg1 = 0;
2639   int highonly = 0;
2640 
2641   /* First classify both operands.  */
2642 
2643   if (REG_P (operands[0]))
2644     optype0 = REGOP;
2645   else if (offsettable_memref_p (operands[0]))
2646     optype0 = OFFSOP;
2647   else if (GET_CODE (operands[0]) == MEM)
2648     optype0 = MEMOP;
2649   else
2650     optype0 = RNDOP;
2651 
2652   if (REG_P (operands[1]))
2653     optype1 = REGOP;
2654   else if (CONSTANT_P (operands[1]))
2655     optype1 = CNSTOP;
2656   else if (offsettable_memref_p (operands[1]))
2657     optype1 = OFFSOP;
2658   else if (GET_CODE (operands[1]) == MEM)
2659     optype1 = MEMOP;
2660   else
2661     optype1 = RNDOP;
2662 
2663   /* Check for the cases that the operand constraints are not
2664      supposed to allow to happen.  */
2665   gcc_assert (optype0 == REGOP || optype1 == REGOP);
2666 
2667   /* Handle copies between general and floating registers.  */
2668 
2669   if (optype0 == REGOP && optype1 == REGOP
2670       && FP_REG_P (operands[0]) ^ FP_REG_P (operands[1]))
2671     {
2672       if (FP_REG_P (operands[0]))
2673 	{
2674 	  output_asm_insn ("{stws|stw} %1,-16(%%sp)", operands);
2675 	  output_asm_insn ("{stws|stw} %R1,-12(%%sp)", operands);
2676 	  return "{fldds|fldd} -16(%%sp),%0";
2677 	}
2678       else
2679 	{
2680 	  output_asm_insn ("{fstds|fstd} %1,-16(%%sp)", operands);
2681 	  output_asm_insn ("{ldws|ldw} -16(%%sp),%0", operands);
2682 	  return "{ldws|ldw} -12(%%sp),%R0";
2683 	}
2684     }
2685 
2686    /* Handle auto decrementing and incrementing loads and stores
2687      specifically, since the structure of the function doesn't work
2688      for them without major modification.  Do it better when we learn
2689      this port about the general inc/dec addressing of PA.
2690      (This was written by tege.  Chide him if it doesn't work.)  */
2691 
2692   if (optype0 == MEMOP)
2693     {
2694       /* We have to output the address syntax ourselves, since print_operand
2695 	 doesn't deal with the addresses we want to use.  Fix this later.  */
2696 
2697       rtx addr = XEXP (operands[0], 0);
2698       if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
2699 	{
2700 	  rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
2701 
2702 	  operands[0] = XEXP (addr, 0);
2703 	  gcc_assert (GET_CODE (operands[1]) == REG
2704 		      && GET_CODE (operands[0]) == REG);
2705 
2706 	  gcc_assert (!reg_overlap_mentioned_p (high_reg, addr));
2707 
2708 	  /* No overlap between high target register and address
2709 	     register.  (We do this in a non-obvious way to
2710 	     save a register file writeback)  */
2711 	  if (GET_CODE (addr) == POST_INC)
2712 	    return "{stws|stw},ma %1,8(%0)\n\tstw %R1,-4(%0)";
2713 	  return "{stws|stw},ma %1,-8(%0)\n\tstw %R1,12(%0)";
2714 	}
2715       else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
2716 	{
2717 	  rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
2718 
2719 	  operands[0] = XEXP (addr, 0);
2720 	  gcc_assert (GET_CODE (operands[1]) == REG
2721 		      && GET_CODE (operands[0]) == REG);
2722 
2723 	  gcc_assert (!reg_overlap_mentioned_p (high_reg, addr));
2724 	  /* No overlap between high target register and address
2725 	     register.  (We do this in a non-obvious way to save a
2726 	     register file writeback)  */
2727 	  if (GET_CODE (addr) == PRE_INC)
2728 	    return "{stws|stw},mb %1,8(%0)\n\tstw %R1,4(%0)";
2729 	  return "{stws|stw},mb %1,-8(%0)\n\tstw %R1,4(%0)";
2730 	}
2731     }
2732   if (optype1 == MEMOP)
2733     {
2734       /* We have to output the address syntax ourselves, since print_operand
2735 	 doesn't deal with the addresses we want to use.  Fix this later.  */
2736 
2737       rtx addr = XEXP (operands[1], 0);
2738       if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
2739 	{
2740 	  rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2741 
2742 	  operands[1] = XEXP (addr, 0);
2743 	  gcc_assert (GET_CODE (operands[0]) == REG
2744 		      && GET_CODE (operands[1]) == REG);
2745 
2746 	  if (!reg_overlap_mentioned_p (high_reg, addr))
2747 	    {
2748 	      /* No overlap between high target register and address
2749 		 register.  (We do this in a non-obvious way to
2750 		 save a register file writeback)  */
2751 	      if (GET_CODE (addr) == POST_INC)
2752 		return "{ldws|ldw},ma 8(%1),%0\n\tldw -4(%1),%R0";
2753 	      return "{ldws|ldw},ma -8(%1),%0\n\tldw 12(%1),%R0";
2754 	    }
2755 	  else
2756 	    {
2757 	      /* This is an undefined situation.  We should load into the
2758 		 address register *and* update that register.  Probably
2759 		 we don't need to handle this at all.  */
2760 	      if (GET_CODE (addr) == POST_INC)
2761 		return "ldw 4(%1),%R0\n\t{ldws|ldw},ma 8(%1),%0";
2762 	      return "ldw 4(%1),%R0\n\t{ldws|ldw},ma -8(%1),%0";
2763 	    }
2764 	}
2765       else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
2766 	{
2767 	  rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2768 
2769 	  operands[1] = XEXP (addr, 0);
2770 	  gcc_assert (GET_CODE (operands[0]) == REG
2771 		      && GET_CODE (operands[1]) == REG);
2772 
2773 	  if (!reg_overlap_mentioned_p (high_reg, addr))
2774 	    {
2775 	      /* No overlap between high target register and address
2776 		 register.  (We do this in a non-obvious way to
2777 		 save a register file writeback)  */
2778 	      if (GET_CODE (addr) == PRE_INC)
2779 		return "{ldws|ldw},mb 8(%1),%0\n\tldw 4(%1),%R0";
2780 	      return "{ldws|ldw},mb -8(%1),%0\n\tldw 4(%1),%R0";
2781 	    }
2782 	  else
2783 	    {
2784 	      /* This is an undefined situation.  We should load into the
2785 		 address register *and* update that register.  Probably
2786 		 we don't need to handle this at all.  */
2787 	      if (GET_CODE (addr) == PRE_INC)
2788 		return "ldw 12(%1),%R0\n\t{ldws|ldw},mb 8(%1),%0";
2789 	      return "ldw -4(%1),%R0\n\t{ldws|ldw},mb -8(%1),%0";
2790 	    }
2791 	}
2792       else if (GET_CODE (addr) == PLUS
2793 	       && GET_CODE (XEXP (addr, 0)) == MULT)
2794 	{
2795 	  rtx xoperands[4];
2796 
2797 	  /* Load address into left half of destination register.  */
2798 	  xoperands[0] = gen_rtx_SUBREG (SImode, operands[0], 0);
2799 	  xoperands[1] = XEXP (addr, 1);
2800 	  xoperands[2] = XEXP (XEXP (addr, 0), 0);
2801 	  xoperands[3] = XEXP (XEXP (addr, 0), 1);
2802 	  output_asm_insn ("{sh%O3addl %2,%1,%0|shladd,l %2,%O3,%1,%0}",
2803 			   xoperands);
2804 	  return "ldw 4(%0),%R0\n\tldw 0(%0),%0";
2805 	}
2806       else if (GET_CODE (addr) == PLUS
2807 	       && REG_P (XEXP (addr, 0))
2808 	       && REG_P (XEXP (addr, 1)))
2809 	{
2810 	  rtx xoperands[3];
2811 
2812 	  /* Load address into left half of destination register.  */
2813 	  xoperands[0] = gen_rtx_SUBREG (SImode, operands[0], 0);
2814 	  xoperands[1] = XEXP (addr, 0);
2815 	  xoperands[2] = XEXP (addr, 1);
2816 	  output_asm_insn ("{addl|add,l} %1,%2,%0",
2817 			   xoperands);
2818 	  return "ldw 4(%0),%R0\n\tldw 0(%0),%0";
2819 	}
2820     }
2821 
2822   /* If an operand is an unoffsettable memory ref, find a register
2823      we can increment temporarily to make it refer to the second word.  */
2824 
2825   if (optype0 == MEMOP)
2826     addreg0 = find_addr_reg (XEXP (operands[0], 0));
2827 
2828   if (optype1 == MEMOP)
2829     addreg1 = find_addr_reg (XEXP (operands[1], 0));
2830 
2831   /* Ok, we can do one word at a time.
2832      Normally we do the low-numbered word first.
2833 
2834      In either case, set up in LATEHALF the operands to use
2835      for the high-numbered word and in some cases alter the
2836      operands in OPERANDS to be suitable for the low-numbered word.  */
2837 
2838   if (optype0 == REGOP)
2839     latehalf[0] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
2840   else if (optype0 == OFFSOP)
2841     latehalf[0] = adjust_address_nv (operands[0], SImode, 4);
2842   else
2843     latehalf[0] = operands[0];
2844 
2845   if (optype1 == REGOP)
2846     latehalf[1] = gen_rtx_REG (SImode, REGNO (operands[1]) + 1);
2847   else if (optype1 == OFFSOP)
2848     latehalf[1] = adjust_address_nv (operands[1], SImode, 4);
2849   else if (optype1 == CNSTOP)
2850     {
2851       if (GET_CODE (operands[1]) == HIGH)
2852 	{
2853 	  operands[1] = XEXP (operands[1], 0);
2854 	  highonly = 1;
2855 	}
2856       split_double (operands[1], &operands[1], &latehalf[1]);
2857     }
2858   else
2859     latehalf[1] = operands[1];
2860 
2861   /* If the first move would clobber the source of the second one,
2862      do them in the other order.
2863 
2864      This can happen in two cases:
2865 
2866 	mem -> register where the first half of the destination register
2867  	is the same register used in the memory's address.  Reload
2868 	can create such insns.
2869 
2870 	mem in this case will be either register indirect or register
2871 	indirect plus a valid offset.
2872 
2873 	register -> register move where REGNO(dst) == REGNO(src + 1)
2874 	someone (Tim/Tege?) claimed this can happen for parameter loads.
2875 
2876      Handle mem -> register case first.  */
2877   if (optype0 == REGOP
2878       && (optype1 == MEMOP || optype1 == OFFSOP)
2879       && refers_to_regno_p (REGNO (operands[0]), operands[1]))
2880     {
2881       /* Do the late half first.  */
2882       if (addreg1)
2883 	output_asm_insn ("ldo 4(%0),%0", &addreg1);
2884       output_asm_insn (pa_singlemove_string (latehalf), latehalf);
2885 
2886       /* Then clobber.  */
2887       if (addreg1)
2888 	output_asm_insn ("ldo -4(%0),%0", &addreg1);
2889       return pa_singlemove_string (operands);
2890     }
2891 
2892   /* Now handle register -> register case.  */
2893   if (optype0 == REGOP && optype1 == REGOP
2894       && REGNO (operands[0]) == REGNO (operands[1]) + 1)
2895     {
2896       output_asm_insn (pa_singlemove_string (latehalf), latehalf);
2897       return pa_singlemove_string (operands);
2898     }
2899 
2900   /* Normal case: do the two words, low-numbered first.  */
2901 
2902   output_asm_insn (pa_singlemove_string (operands), operands);
2903 
2904   /* Make any unoffsettable addresses point at high-numbered word.  */
2905   if (addreg0)
2906     output_asm_insn ("ldo 4(%0),%0", &addreg0);
2907   if (addreg1)
2908     output_asm_insn ("ldo 4(%0),%0", &addreg1);
2909 
2910   /* Do high-numbered word.  */
2911   if (highonly)
2912     output_asm_insn ("ldil L'%1,%0", latehalf);
2913   else
2914     output_asm_insn (pa_singlemove_string (latehalf), latehalf);
2915 
2916   /* Undo the adds we just did.  */
2917   if (addreg0)
2918     output_asm_insn ("ldo -4(%0),%0", &addreg0);
2919   if (addreg1)
2920     output_asm_insn ("ldo -4(%0),%0", &addreg1);
2921 
2922   return "";
2923 }
2924 
2925 const char *
pa_output_fp_move_double(rtx * operands)2926 pa_output_fp_move_double (rtx *operands)
2927 {
2928   if (FP_REG_P (operands[0]))
2929     {
2930       if (FP_REG_P (operands[1])
2931 	  || operands[1] == CONST0_RTX (GET_MODE (operands[0])))
2932 	output_asm_insn ("fcpy,dbl %f1,%0", operands);
2933       else
2934 	output_asm_insn ("fldd%F1 %1,%0", operands);
2935     }
2936   else if (FP_REG_P (operands[1]))
2937     {
2938       output_asm_insn ("fstd%F0 %1,%0", operands);
2939     }
2940   else
2941     {
2942       rtx xoperands[2];
2943 
2944       gcc_assert (operands[1] == CONST0_RTX (GET_MODE (operands[0])));
2945 
2946       /* This is a pain.  You have to be prepared to deal with an
2947 	 arbitrary address here including pre/post increment/decrement.
2948 
2949 	 so avoid this in the MD.  */
2950       gcc_assert (GET_CODE (operands[0]) == REG);
2951 
2952       xoperands[1] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
2953       xoperands[0] = operands[0];
2954       output_asm_insn ("copy %%r0,%0\n\tcopy %%r0,%1", xoperands);
2955     }
2956   return "";
2957 }
2958 
2959 /* Return a REG that occurs in ADDR with coefficient 1.
2960    ADDR can be effectively incremented by incrementing REG.  */
2961 
2962 static rtx
find_addr_reg(rtx addr)2963 find_addr_reg (rtx addr)
2964 {
2965   while (GET_CODE (addr) == PLUS)
2966     {
2967       if (GET_CODE (XEXP (addr, 0)) == REG)
2968 	addr = XEXP (addr, 0);
2969       else if (GET_CODE (XEXP (addr, 1)) == REG)
2970 	addr = XEXP (addr, 1);
2971       else if (CONSTANT_P (XEXP (addr, 0)))
2972 	addr = XEXP (addr, 1);
2973       else if (CONSTANT_P (XEXP (addr, 1)))
2974 	addr = XEXP (addr, 0);
2975       else
2976 	gcc_unreachable ();
2977     }
2978   gcc_assert (GET_CODE (addr) == REG);
2979   return addr;
2980 }
2981 
2982 /* Emit code to perform a block move.
2983 
2984    OPERANDS[0] is the destination pointer as a REG, clobbered.
2985    OPERANDS[1] is the source pointer as a REG, clobbered.
2986    OPERANDS[2] is a register for temporary storage.
2987    OPERANDS[3] is a register for temporary storage.
2988    OPERANDS[4] is the size as a CONST_INT
2989    OPERANDS[5] is the alignment safe to use, as a CONST_INT.
2990    OPERANDS[6] is another temporary register.  */
2991 
2992 const char *
pa_output_block_move(rtx * operands,int size_is_constant ATTRIBUTE_UNUSED)2993 pa_output_block_move (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED)
2994 {
2995   int align = INTVAL (operands[5]);
2996   unsigned long n_bytes = INTVAL (operands[4]);
2997 
2998   /* We can't move more than a word at a time because the PA
2999      has no longer integer move insns.  (Could use fp mem ops?)  */
3000   if (align > (TARGET_64BIT ? 8 : 4))
3001     align = (TARGET_64BIT ? 8 : 4);
3002 
3003   /* Note that we know each loop below will execute at least twice
3004      (else we would have open-coded the copy).  */
3005   switch (align)
3006     {
3007       case 8:
3008 	/* Pre-adjust the loop counter.  */
3009 	operands[4] = GEN_INT (n_bytes - 16);
3010 	output_asm_insn ("ldi %4,%2", operands);
3011 
3012 	/* Copying loop.  */
3013 	output_asm_insn ("ldd,ma 8(%1),%3", operands);
3014 	output_asm_insn ("ldd,ma 8(%1),%6", operands);
3015 	output_asm_insn ("std,ma %3,8(%0)", operands);
3016 	output_asm_insn ("addib,>= -16,%2,.-12", operands);
3017 	output_asm_insn ("std,ma %6,8(%0)", operands);
3018 
3019 	/* Handle the residual.  There could be up to 7 bytes of
3020 	   residual to copy!  */
3021 	if (n_bytes % 16 != 0)
3022 	  {
3023 	    operands[4] = GEN_INT (n_bytes % 8);
3024 	    if (n_bytes % 16 >= 8)
3025 	      output_asm_insn ("ldd,ma 8(%1),%3", operands);
3026 	    if (n_bytes % 8 != 0)
3027 	      output_asm_insn ("ldd 0(%1),%6", operands);
3028 	    if (n_bytes % 16 >= 8)
3029 	      output_asm_insn ("std,ma %3,8(%0)", operands);
3030 	    if (n_bytes % 8 != 0)
3031 	      output_asm_insn ("stdby,e %6,%4(%0)", operands);
3032 	  }
3033 	return "";
3034 
3035       case 4:
3036 	/* Pre-adjust the loop counter.  */
3037 	operands[4] = GEN_INT (n_bytes - 8);
3038 	output_asm_insn ("ldi %4,%2", operands);
3039 
3040 	/* Copying loop.  */
3041 	output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands);
3042 	output_asm_insn ("{ldws|ldw},ma 4(%1),%6", operands);
3043 	output_asm_insn ("{stws|stw},ma %3,4(%0)", operands);
3044 	output_asm_insn ("addib,>= -8,%2,.-12", operands);
3045 	output_asm_insn ("{stws|stw},ma %6,4(%0)", operands);
3046 
3047 	/* Handle the residual.  There could be up to 7 bytes of
3048 	   residual to copy!  */
3049 	if (n_bytes % 8 != 0)
3050 	  {
3051 	    operands[4] = GEN_INT (n_bytes % 4);
3052 	    if (n_bytes % 8 >= 4)
3053 	      output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands);
3054 	    if (n_bytes % 4 != 0)
3055 	      output_asm_insn ("ldw 0(%1),%6", operands);
3056 	    if (n_bytes % 8 >= 4)
3057 	      output_asm_insn ("{stws|stw},ma %3,4(%0)", operands);
3058 	    if (n_bytes % 4 != 0)
3059 	      output_asm_insn ("{stbys|stby},e %6,%4(%0)", operands);
3060 	  }
3061 	return "";
3062 
3063       case 2:
3064 	/* Pre-adjust the loop counter.  */
3065 	operands[4] = GEN_INT (n_bytes - 4);
3066 	output_asm_insn ("ldi %4,%2", operands);
3067 
3068 	/* Copying loop.  */
3069 	output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands);
3070 	output_asm_insn ("{ldhs|ldh},ma 2(%1),%6", operands);
3071 	output_asm_insn ("{sths|sth},ma %3,2(%0)", operands);
3072 	output_asm_insn ("addib,>= -4,%2,.-12", operands);
3073 	output_asm_insn ("{sths|sth},ma %6,2(%0)", operands);
3074 
3075 	/* Handle the residual.  */
3076 	if (n_bytes % 4 != 0)
3077 	  {
3078 	    if (n_bytes % 4 >= 2)
3079 	      output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands);
3080 	    if (n_bytes % 2 != 0)
3081 	      output_asm_insn ("ldb 0(%1),%6", operands);
3082 	    if (n_bytes % 4 >= 2)
3083 	      output_asm_insn ("{sths|sth},ma %3,2(%0)", operands);
3084 	    if (n_bytes % 2 != 0)
3085 	      output_asm_insn ("stb %6,0(%0)", operands);
3086 	  }
3087 	return "";
3088 
3089       case 1:
3090 	/* Pre-adjust the loop counter.  */
3091 	operands[4] = GEN_INT (n_bytes - 2);
3092 	output_asm_insn ("ldi %4,%2", operands);
3093 
3094 	/* Copying loop.  */
3095 	output_asm_insn ("{ldbs|ldb},ma 1(%1),%3", operands);
3096 	output_asm_insn ("{ldbs|ldb},ma 1(%1),%6", operands);
3097 	output_asm_insn ("{stbs|stb},ma %3,1(%0)", operands);
3098 	output_asm_insn ("addib,>= -2,%2,.-12", operands);
3099 	output_asm_insn ("{stbs|stb},ma %6,1(%0)", operands);
3100 
3101 	/* Handle the residual.  */
3102 	if (n_bytes % 2 != 0)
3103 	  {
3104 	    output_asm_insn ("ldb 0(%1),%3", operands);
3105 	    output_asm_insn ("stb %3,0(%0)", operands);
3106 	  }
3107 	return "";
3108 
3109       default:
3110 	gcc_unreachable ();
3111     }
3112 }
3113 
3114 /* Count the number of insns necessary to handle this block move.
3115 
3116    Basic structure is the same as emit_block_move, except that we
3117    count insns rather than emit them.  */
3118 
3119 static int
compute_cpymem_length(rtx_insn * insn)3120 compute_cpymem_length (rtx_insn *insn)
3121 {
3122   rtx pat = PATTERN (insn);
3123   unsigned int align = INTVAL (XEXP (XVECEXP (pat, 0, 7), 0));
3124   unsigned long n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 6), 0));
3125   unsigned int n_insns = 0;
3126 
3127   /* We can't move more than four bytes at a time because the PA
3128      has no longer integer move insns.  (Could use fp mem ops?)  */
3129   if (align > (TARGET_64BIT ? 8 : 4))
3130     align = (TARGET_64BIT ? 8 : 4);
3131 
3132   /* The basic copying loop.  */
3133   n_insns = 6;
3134 
3135   /* Residuals.  */
3136   if (n_bytes % (2 * align) != 0)
3137     {
3138       if ((n_bytes % (2 * align)) >= align)
3139 	n_insns += 2;
3140 
3141       if ((n_bytes % align) != 0)
3142 	n_insns += 2;
3143     }
3144 
3145   /* Lengths are expressed in bytes now; each insn is 4 bytes.  */
3146   return n_insns * 4;
3147 }
3148 
3149 /* Emit code to perform a block clear.
3150 
3151    OPERANDS[0] is the destination pointer as a REG, clobbered.
3152    OPERANDS[1] is a register for temporary storage.
3153    OPERANDS[2] is the size as a CONST_INT
3154    OPERANDS[3] is the alignment safe to use, as a CONST_INT.  */
3155 
3156 const char *
pa_output_block_clear(rtx * operands,int size_is_constant ATTRIBUTE_UNUSED)3157 pa_output_block_clear (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED)
3158 {
3159   int align = INTVAL (operands[3]);
3160   unsigned long n_bytes = INTVAL (operands[2]);
3161 
3162   /* We can't clear more than a word at a time because the PA
3163      has no longer integer move insns.  */
3164   if (align > (TARGET_64BIT ? 8 : 4))
3165     align = (TARGET_64BIT ? 8 : 4);
3166 
3167   /* Note that we know each loop below will execute at least twice
3168      (else we would have open-coded the copy).  */
3169   switch (align)
3170     {
3171       case 8:
3172 	/* Pre-adjust the loop counter.  */
3173 	operands[2] = GEN_INT (n_bytes - 16);
3174 	output_asm_insn ("ldi %2,%1", operands);
3175 
3176 	/* Loop.  */
3177 	output_asm_insn ("std,ma %%r0,8(%0)", operands);
3178 	output_asm_insn ("addib,>= -16,%1,.-4", operands);
3179 	output_asm_insn ("std,ma %%r0,8(%0)", operands);
3180 
3181 	/* Handle the residual.  There could be up to 7 bytes of
3182 	   residual to copy!  */
3183 	if (n_bytes % 16 != 0)
3184 	  {
3185 	    operands[2] = GEN_INT (n_bytes % 8);
3186 	    if (n_bytes % 16 >= 8)
3187 	      output_asm_insn ("std,ma %%r0,8(%0)", operands);
3188 	    if (n_bytes % 8 != 0)
3189 	      output_asm_insn ("stdby,e %%r0,%2(%0)", operands);
3190 	  }
3191 	return "";
3192 
3193       case 4:
3194 	/* Pre-adjust the loop counter.  */
3195 	operands[2] = GEN_INT (n_bytes - 8);
3196 	output_asm_insn ("ldi %2,%1", operands);
3197 
3198 	/* Loop.  */
3199 	output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
3200 	output_asm_insn ("addib,>= -8,%1,.-4", operands);
3201 	output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
3202 
3203 	/* Handle the residual.  There could be up to 7 bytes of
3204 	   residual to copy!  */
3205 	if (n_bytes % 8 != 0)
3206 	  {
3207 	    operands[2] = GEN_INT (n_bytes % 4);
3208 	    if (n_bytes % 8 >= 4)
3209 	      output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
3210 	    if (n_bytes % 4 != 0)
3211 	      output_asm_insn ("{stbys|stby},e %%r0,%2(%0)", operands);
3212 	  }
3213 	return "";
3214 
3215       case 2:
3216 	/* Pre-adjust the loop counter.  */
3217 	operands[2] = GEN_INT (n_bytes - 4);
3218 	output_asm_insn ("ldi %2,%1", operands);
3219 
3220 	/* Loop.  */
3221 	output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
3222 	output_asm_insn ("addib,>= -4,%1,.-4", operands);
3223 	output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
3224 
3225 	/* Handle the residual.  */
3226 	if (n_bytes % 4 != 0)
3227 	  {
3228 	    if (n_bytes % 4 >= 2)
3229 	      output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
3230 	    if (n_bytes % 2 != 0)
3231 	      output_asm_insn ("stb %%r0,0(%0)", operands);
3232 	  }
3233 	return "";
3234 
3235       case 1:
3236 	/* Pre-adjust the loop counter.  */
3237 	operands[2] = GEN_INT (n_bytes - 2);
3238 	output_asm_insn ("ldi %2,%1", operands);
3239 
3240 	/* Loop.  */
3241 	output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands);
3242 	output_asm_insn ("addib,>= -2,%1,.-4", operands);
3243 	output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands);
3244 
3245 	/* Handle the residual.  */
3246 	if (n_bytes % 2 != 0)
3247 	  output_asm_insn ("stb %%r0,0(%0)", operands);
3248 
3249 	return "";
3250 
3251       default:
3252 	gcc_unreachable ();
3253     }
3254 }
3255 
3256 /* Count the number of insns necessary to handle this block move.
3257 
3258    Basic structure is the same as emit_block_move, except that we
3259    count insns rather than emit them.  */
3260 
3261 static int
compute_clrmem_length(rtx_insn * insn)3262 compute_clrmem_length (rtx_insn *insn)
3263 {
3264   rtx pat = PATTERN (insn);
3265   unsigned int align = INTVAL (XEXP (XVECEXP (pat, 0, 4), 0));
3266   unsigned long n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 3), 0));
3267   unsigned int n_insns = 0;
3268 
3269   /* We can't clear more than a word at a time because the PA
3270      has no longer integer move insns.  */
3271   if (align > (TARGET_64BIT ? 8 : 4))
3272     align = (TARGET_64BIT ? 8 : 4);
3273 
3274   /* The basic loop.  */
3275   n_insns = 4;
3276 
3277   /* Residuals.  */
3278   if (n_bytes % (2 * align) != 0)
3279     {
3280       if ((n_bytes % (2 * align)) >= align)
3281 	n_insns++;
3282 
3283       if ((n_bytes % align) != 0)
3284 	n_insns++;
3285     }
3286 
3287   /* Lengths are expressed in bytes now; each insn is 4 bytes.  */
3288   return n_insns * 4;
3289 }
3290 
3291 
3292 const char *
pa_output_and(rtx * operands)3293 pa_output_and (rtx *operands)
3294 {
3295   if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
3296     {
3297       unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3298       int ls0, ls1, ms0, p, len;
3299 
3300       for (ls0 = 0; ls0 < 32; ls0++)
3301 	if ((mask & (1 << ls0)) == 0)
3302 	  break;
3303 
3304       for (ls1 = ls0; ls1 < 32; ls1++)
3305 	if ((mask & (1 << ls1)) != 0)
3306 	  break;
3307 
3308       for (ms0 = ls1; ms0 < 32; ms0++)
3309 	if ((mask & (1 << ms0)) == 0)
3310 	  break;
3311 
3312       gcc_assert (ms0 == 32);
3313 
3314       if (ls1 == 32)
3315 	{
3316 	  len = ls0;
3317 
3318 	  gcc_assert (len);
3319 
3320 	  operands[2] = GEN_INT (len);
3321 	  return "{extru|extrw,u} %1,31,%2,%0";
3322 	}
3323       else
3324 	{
3325 	  /* We could use this `depi' for the case above as well, but `depi'
3326 	     requires one more register file access than an `extru'.  */
3327 
3328 	  p = 31 - ls0;
3329 	  len = ls1 - ls0;
3330 
3331 	  operands[2] = GEN_INT (p);
3332 	  operands[3] = GEN_INT (len);
3333 	  return "{depi|depwi} 0,%2,%3,%0";
3334 	}
3335     }
3336   else
3337     return "and %1,%2,%0";
3338 }
3339 
3340 /* Return a string to perform a bitwise-and of operands[1] with operands[2]
3341    storing the result in operands[0].  */
3342 const char *
pa_output_64bit_and(rtx * operands)3343 pa_output_64bit_and (rtx *operands)
3344 {
3345   if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
3346     {
3347       unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3348       int ls0, ls1, ms0, p, len;
3349 
3350       for (ls0 = 0; ls0 < HOST_BITS_PER_WIDE_INT; ls0++)
3351 	if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls0)) == 0)
3352 	  break;
3353 
3354       for (ls1 = ls0; ls1 < HOST_BITS_PER_WIDE_INT; ls1++)
3355 	if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls1)) != 0)
3356 	  break;
3357 
3358       for (ms0 = ls1; ms0 < HOST_BITS_PER_WIDE_INT; ms0++)
3359 	if ((mask & ((unsigned HOST_WIDE_INT) 1 << ms0)) == 0)
3360 	  break;
3361 
3362       gcc_assert (ms0 == HOST_BITS_PER_WIDE_INT);
3363 
3364       if (ls1 == HOST_BITS_PER_WIDE_INT)
3365 	{
3366 	  len = ls0;
3367 
3368 	  gcc_assert (len);
3369 
3370 	  operands[2] = GEN_INT (len);
3371 	  return "extrd,u %1,63,%2,%0";
3372 	}
3373       else
3374 	{
3375 	  /* We could use this `depi' for the case above as well, but `depi'
3376 	     requires one more register file access than an `extru'.  */
3377 
3378 	  p = 63 - ls0;
3379 	  len = ls1 - ls0;
3380 
3381 	  operands[2] = GEN_INT (p);
3382 	  operands[3] = GEN_INT (len);
3383 	  return "depdi 0,%2,%3,%0";
3384 	}
3385     }
3386   else
3387     return "and %1,%2,%0";
3388 }
3389 
3390 const char *
pa_output_ior(rtx * operands)3391 pa_output_ior (rtx *operands)
3392 {
3393   unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3394   int bs0, bs1, p, len;
3395 
3396   if (INTVAL (operands[2]) == 0)
3397     return "copy %1,%0";
3398 
3399   for (bs0 = 0; bs0 < 32; bs0++)
3400     if ((mask & (1 << bs0)) != 0)
3401       break;
3402 
3403   for (bs1 = bs0; bs1 < 32; bs1++)
3404     if ((mask & (1 << bs1)) == 0)
3405       break;
3406 
3407   gcc_assert (bs1 == 32 || ((unsigned HOST_WIDE_INT) 1 << bs1) > mask);
3408 
3409   p = 31 - bs0;
3410   len = bs1 - bs0;
3411 
3412   operands[2] = GEN_INT (p);
3413   operands[3] = GEN_INT (len);
3414   return "{depi|depwi} -1,%2,%3,%0";
3415 }
3416 
3417 /* Return a string to perform a bitwise-and of operands[1] with operands[2]
3418    storing the result in operands[0].  */
3419 const char *
pa_output_64bit_ior(rtx * operands)3420 pa_output_64bit_ior (rtx *operands)
3421 {
3422   unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3423   int bs0, bs1, p, len;
3424 
3425   if (INTVAL (operands[2]) == 0)
3426     return "copy %1,%0";
3427 
3428   for (bs0 = 0; bs0 < HOST_BITS_PER_WIDE_INT; bs0++)
3429     if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs0)) != 0)
3430       break;
3431 
3432   for (bs1 = bs0; bs1 < HOST_BITS_PER_WIDE_INT; bs1++)
3433     if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs1)) == 0)
3434       break;
3435 
3436   gcc_assert (bs1 == HOST_BITS_PER_WIDE_INT
3437 	      || ((unsigned HOST_WIDE_INT) 1 << bs1) > mask);
3438 
3439   p = 63 - bs0;
3440   len = bs1 - bs0;
3441 
3442   operands[2] = GEN_INT (p);
3443   operands[3] = GEN_INT (len);
3444   return "depdi -1,%2,%3,%0";
3445 }
3446 
3447 /* Target hook for assembling integer objects.  This code handles
3448    aligned SI and DI integers specially since function references
3449    must be preceded by P%.  */
3450 
3451 static bool
pa_assemble_integer(rtx x,unsigned int size,int aligned_p)3452 pa_assemble_integer (rtx x, unsigned int size, int aligned_p)
3453 {
3454   bool result;
3455   tree decl = NULL;
3456 
3457   /* When we have a SYMBOL_REF with a SYMBOL_REF_DECL, we need to call
3458      call assemble_external and set the SYMBOL_REF_DECL to NULL before
3459      calling output_addr_const.  Otherwise, it may call assemble_external
3460      in the midst of outputing the assembler code for the SYMBOL_REF.
3461      We restore the SYMBOL_REF_DECL after the output is done.  */
3462   if (GET_CODE (x) == SYMBOL_REF)
3463     {
3464       decl = SYMBOL_REF_DECL (x);
3465       if (decl)
3466 	{
3467 	  assemble_external (decl);
3468 	  SET_SYMBOL_REF_DECL (x, NULL);
3469 	}
3470     }
3471 
3472   if (size == UNITS_PER_WORD
3473       && aligned_p
3474       && function_label_operand (x, VOIDmode))
3475     {
3476       fputs (size == 8? "\t.dword\t" : "\t.word\t", asm_out_file);
3477 
3478       /* We don't want an OPD when generating fast indirect calls.  */
3479       if (!TARGET_FAST_INDIRECT_CALLS)
3480 	fputs ("P%", asm_out_file);
3481 
3482       output_addr_const (asm_out_file, x);
3483       fputc ('\n', asm_out_file);
3484       result = true;
3485     }
3486   else
3487     result = default_assemble_integer (x, size, aligned_p);
3488 
3489   if (decl)
3490     SET_SYMBOL_REF_DECL (x, decl);
3491 
3492   return result;
3493 }
3494 
3495 /* Output an ascii string.  */
3496 void
pa_output_ascii(FILE * file,const char * p,int size)3497 pa_output_ascii (FILE *file, const char *p, int size)
3498 {
3499   int i;
3500   int chars_output;
3501   unsigned char partial_output[16];	/* Max space 4 chars can occupy.  */
3502 
3503   /* The HP assembler can only take strings of 256 characters at one
3504      time.  This is a limitation on input line length, *not* the
3505      length of the string.  Sigh.  Even worse, it seems that the
3506      restriction is in number of input characters (see \xnn &
3507      \whatever).  So we have to do this very carefully.  */
3508 
3509   fputs ("\t.STRING \"", file);
3510 
3511   chars_output = 0;
3512   for (i = 0; i < size; i += 4)
3513     {
3514       int co = 0;
3515       int io = 0;
3516       for (io = 0, co = 0; io < MIN (4, size - i); io++)
3517 	{
3518 	  unsigned int c = (unsigned char) p[i + io];
3519 
3520 	  if (c == '\"' || c == '\\')
3521 	    partial_output[co++] = '\\';
3522 	  if (c >= ' ' && c < 0177)
3523 	    partial_output[co++] = c;
3524 	  else
3525 	    {
3526 	      unsigned int hexd;
3527 	      partial_output[co++] = '\\';
3528 	      partial_output[co++] = 'x';
3529 	      hexd =  c  / 16 - 0 + '0';
3530 	      if (hexd > '9')
3531 		hexd -= '9' - 'a' + 1;
3532 	      partial_output[co++] = hexd;
3533 	      hexd =  c % 16 - 0 + '0';
3534 	      if (hexd > '9')
3535 		hexd -= '9' - 'a' + 1;
3536 	      partial_output[co++] = hexd;
3537 	    }
3538 	}
3539       if (chars_output + co > 243)
3540 	{
3541 	  fputs ("\"\n\t.STRING \"", file);
3542 	  chars_output = 0;
3543 	}
3544       fwrite (partial_output, 1, (size_t) co, file);
3545       chars_output += co;
3546       co = 0;
3547     }
3548   fputs ("\"\n", file);
3549 }
3550 
3551 /* Try to rewrite floating point comparisons & branches to avoid
3552    useless add,tr insns.
3553 
3554    CHECK_NOTES is nonzero if we should examine REG_DEAD notes
3555    to see if FPCC is dead.  CHECK_NOTES is nonzero for the
3556    first attempt to remove useless add,tr insns.  It is zero
3557    for the second pass as reorg sometimes leaves bogus REG_DEAD
3558    notes lying around.
3559 
3560    When CHECK_NOTES is zero we can only eliminate add,tr insns
3561    when there's a 1:1 correspondence between fcmp and ftest/fbranch
3562    instructions.  */
3563 static void
remove_useless_addtr_insns(int check_notes)3564 remove_useless_addtr_insns (int check_notes)
3565 {
3566   rtx_insn *insn;
3567   static int pass = 0;
3568 
3569   /* This is fairly cheap, so always run it when optimizing.  */
3570   if (optimize > 0)
3571     {
3572       int fcmp_count = 0;
3573       int fbranch_count = 0;
3574 
3575       /* Walk all the insns in this function looking for fcmp & fbranch
3576 	 instructions.  Keep track of how many of each we find.  */
3577       for (insn = get_insns (); insn; insn = next_insn (insn))
3578 	{
3579 	  rtx tmp;
3580 
3581 	  /* Ignore anything that isn't an INSN or a JUMP_INSN.  */
3582 	  if (! NONJUMP_INSN_P (insn) && ! JUMP_P (insn))
3583 	    continue;
3584 
3585 	  tmp = PATTERN (insn);
3586 
3587 	  /* It must be a set.  */
3588 	  if (GET_CODE (tmp) != SET)
3589 	    continue;
3590 
3591 	  /* If the destination is CCFP, then we've found an fcmp insn.  */
3592 	  tmp = SET_DEST (tmp);
3593 	  if (GET_CODE (tmp) == REG && REGNO (tmp) == 0)
3594 	    {
3595 	      fcmp_count++;
3596 	      continue;
3597 	    }
3598 
3599 	  tmp = PATTERN (insn);
3600 	  /* If this is an fbranch instruction, bump the fbranch counter.  */
3601 	  if (GET_CODE (tmp) == SET
3602 	      && SET_DEST (tmp) == pc_rtx
3603 	      && GET_CODE (SET_SRC (tmp)) == IF_THEN_ELSE
3604 	      && GET_CODE (XEXP (SET_SRC (tmp), 0)) == NE
3605 	      && GET_CODE (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == REG
3606 	      && REGNO (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == 0)
3607 	    {
3608 	      fbranch_count++;
3609 	      continue;
3610 	    }
3611 	}
3612 
3613 
3614       /* Find all floating point compare + branch insns.  If possible,
3615 	 reverse the comparison & the branch to avoid add,tr insns.  */
3616       for (insn = get_insns (); insn; insn = next_insn (insn))
3617 	{
3618 	  rtx tmp;
3619 	  rtx_insn *next;
3620 
3621 	  /* Ignore anything that isn't an INSN.  */
3622 	  if (! NONJUMP_INSN_P (insn))
3623 	    continue;
3624 
3625 	  tmp = PATTERN (insn);
3626 
3627 	  /* It must be a set.  */
3628 	  if (GET_CODE (tmp) != SET)
3629 	    continue;
3630 
3631 	  /* The destination must be CCFP, which is register zero.  */
3632 	  tmp = SET_DEST (tmp);
3633 	  if (GET_CODE (tmp) != REG || REGNO (tmp) != 0)
3634 	    continue;
3635 
3636 	  /* INSN should be a set of CCFP.
3637 
3638 	     See if the result of this insn is used in a reversed FP
3639 	     conditional branch.  If so, reverse our condition and
3640 	     the branch.  Doing so avoids useless add,tr insns.  */
3641 	  next = next_insn (insn);
3642 	  while (next)
3643 	    {
3644 	      /* Jumps, calls and labels stop our search.  */
3645 	      if (JUMP_P (next) || CALL_P (next) || LABEL_P (next))
3646 		break;
3647 
3648 	      /* As does another fcmp insn.  */
3649 	      if (NONJUMP_INSN_P (next)
3650 		  && GET_CODE (PATTERN (next)) == SET
3651 		  && GET_CODE (SET_DEST (PATTERN (next))) == REG
3652 		  && REGNO (SET_DEST (PATTERN (next))) == 0)
3653 		break;
3654 
3655 	      next = next_insn (next);
3656 	    }
3657 
3658 	  /* Is NEXT_INSN a branch?  */
3659 	  if (next && JUMP_P (next))
3660 	    {
3661 	      rtx pattern = PATTERN (next);
3662 
3663 	      /* If it a reversed fp conditional branch (e.g. uses add,tr)
3664 		 and CCFP dies, then reverse our conditional and the branch
3665 		 to avoid the add,tr.  */
3666 	      if (GET_CODE (pattern) == SET
3667 		  && SET_DEST (pattern) == pc_rtx
3668 		  && GET_CODE (SET_SRC (pattern)) == IF_THEN_ELSE
3669 		  && GET_CODE (XEXP (SET_SRC (pattern), 0)) == NE
3670 		  && GET_CODE (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == REG
3671 		  && REGNO (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == 0
3672 		  && GET_CODE (XEXP (SET_SRC (pattern), 1)) == PC
3673 		  && (fcmp_count == fbranch_count
3674 		      || (check_notes
3675 			  && find_regno_note (next, REG_DEAD, 0))))
3676 		{
3677 		  /* Reverse the branch.  */
3678 		  tmp = XEXP (SET_SRC (pattern), 1);
3679 		  XEXP (SET_SRC (pattern), 1) = XEXP (SET_SRC (pattern), 2);
3680 		  XEXP (SET_SRC (pattern), 2) = tmp;
3681 		  INSN_CODE (next) = -1;
3682 
3683 		  /* Reverse our condition.  */
3684 		  tmp = PATTERN (insn);
3685 		  PUT_CODE (XEXP (tmp, 1),
3686 			    (reverse_condition_maybe_unordered
3687 			     (GET_CODE (XEXP (tmp, 1)))));
3688 		}
3689 	    }
3690 	}
3691     }
3692 
3693   pass = !pass;
3694 
3695 }
3696 
3697 /* You may have trouble believing this, but this is the 32 bit HP-PA
3698    stack layout.  Wow.
3699 
3700    Offset		Contents
3701 
3702    Variable arguments	(optional; any number may be allocated)
3703 
3704    SP-(4*(N+9))		arg word N
3705    	:		    :
3706       SP-56		arg word 5
3707       SP-52		arg word 4
3708 
3709    Fixed arguments	(must be allocated; may remain unused)
3710 
3711       SP-48		arg word 3
3712       SP-44		arg word 2
3713       SP-40		arg word 1
3714       SP-36		arg word 0
3715 
3716    Frame Marker
3717 
3718       SP-32		External Data Pointer (DP)
3719       SP-28		External sr4
3720       SP-24		External/stub RP (RP')
3721       SP-20		Current RP
3722       SP-16		Static Link
3723       SP-12		Clean up
3724       SP-8		Calling Stub RP (RP'')
3725       SP-4		Previous SP
3726 
3727    Top of Frame
3728 
3729       SP-0		Stack Pointer (points to next available address)
3730 
3731 */
3732 
3733 /* This function saves registers as follows.  Registers marked with ' are
3734    this function's registers (as opposed to the previous function's).
3735    If a frame_pointer isn't needed, r4 is saved as a general register;
3736    the space for the frame pointer is still allocated, though, to keep
3737    things simple.
3738 
3739 
3740    Top of Frame
3741 
3742        SP (FP')		Previous FP
3743        SP + 4		Alignment filler (sigh)
3744        SP + 8		Space for locals reserved here.
3745        .
3746        .
3747        .
3748        SP + n		All call saved register used.
3749        .
3750        .
3751        .
3752        SP + o		All call saved fp registers used.
3753        .
3754        .
3755        .
3756        SP + p (SP')	points to next available address.
3757 
3758 */
3759 
3760 /* Global variables set by output_function_prologue().  */
3761 /* Size of frame.  Need to know this to emit return insns from
3762    leaf procedures.  */
3763 static HOST_WIDE_INT actual_fsize, local_fsize;
3764 static int save_fregs;
3765 
3766 /* Emit RTL to store REG at the memory location specified by BASE+DISP.
3767    Handle case where DISP > 8k by using the add_high_const patterns.
3768 
3769    Note in DISP > 8k case, we will leave the high part of the address
3770    in %r1.  There is code in expand_hppa_{prologue,epilogue} that knows this.*/
3771 
3772 static void
store_reg(int reg,HOST_WIDE_INT disp,int base)3773 store_reg (int reg, HOST_WIDE_INT disp, int base)
3774 {
3775   rtx dest, src, basereg;
3776   rtx_insn *insn;
3777 
3778   src = gen_rtx_REG (word_mode, reg);
3779   basereg = gen_rtx_REG (Pmode, base);
3780   if (VAL_14_BITS_P (disp))
3781     {
3782       dest = gen_rtx_MEM (word_mode, plus_constant (Pmode, basereg, disp));
3783       insn = emit_move_insn (dest, src);
3784     }
3785   else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
3786     {
3787       rtx delta = GEN_INT (disp);
3788       rtx tmpreg = gen_rtx_REG (Pmode, 1);
3789 
3790       emit_move_insn (tmpreg, delta);
3791       insn = emit_move_insn (tmpreg, gen_rtx_PLUS (Pmode, tmpreg, basereg));
3792       if (DO_FRAME_NOTES)
3793 	{
3794 	  add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3795 			gen_rtx_SET (tmpreg,
3796 				     gen_rtx_PLUS (Pmode, basereg, delta)));
3797 	  RTX_FRAME_RELATED_P (insn) = 1;
3798 	}
3799       dest = gen_rtx_MEM (word_mode, tmpreg);
3800       insn = emit_move_insn (dest, src);
3801     }
3802   else
3803     {
3804       rtx delta = GEN_INT (disp);
3805       rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta));
3806       rtx tmpreg = gen_rtx_REG (Pmode, 1);
3807 
3808       emit_move_insn (tmpreg, high);
3809       dest = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta));
3810       insn = emit_move_insn (dest, src);
3811       if (DO_FRAME_NOTES)
3812 	add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3813 		      gen_rtx_SET (gen_rtx_MEM (word_mode,
3814 						gen_rtx_PLUS (word_mode,
3815 							      basereg,
3816 							      delta)),
3817 				   src));
3818     }
3819 
3820   if (DO_FRAME_NOTES)
3821     RTX_FRAME_RELATED_P (insn) = 1;
3822 }
3823 
3824 /* Emit RTL to store REG at the memory location specified by BASE and then
3825    add MOD to BASE.  MOD must be <= 8k.  */
3826 
3827 static void
store_reg_modify(int base,int reg,HOST_WIDE_INT mod)3828 store_reg_modify (int base, int reg, HOST_WIDE_INT mod)
3829 {
3830   rtx basereg, srcreg, delta;
3831   rtx_insn *insn;
3832 
3833   gcc_assert (VAL_14_BITS_P (mod));
3834 
3835   basereg = gen_rtx_REG (Pmode, base);
3836   srcreg = gen_rtx_REG (word_mode, reg);
3837   delta = GEN_INT (mod);
3838 
3839   insn = emit_insn (gen_post_store (basereg, srcreg, delta));
3840   if (DO_FRAME_NOTES)
3841     {
3842       RTX_FRAME_RELATED_P (insn) = 1;
3843 
3844       /* RTX_FRAME_RELATED_P must be set on each frame related set
3845 	 in a parallel with more than one element.  */
3846       RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 0)) = 1;
3847       RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
3848     }
3849 }
3850 
3851 /* Emit RTL to set REG to the value specified by BASE+DISP.  Handle case
3852    where DISP > 8k by using the add_high_const patterns.  NOTE indicates
3853    whether to add a frame note or not.
3854 
3855    In the DISP > 8k case, we leave the high part of the address in %r1.
3856    There is code in expand_hppa_{prologue,epilogue} that knows about this.  */
3857 
3858 static void
set_reg_plus_d(int reg,int base,HOST_WIDE_INT disp,int note)3859 set_reg_plus_d (int reg, int base, HOST_WIDE_INT disp, int note)
3860 {
3861   rtx_insn *insn;
3862 
3863   if (VAL_14_BITS_P (disp))
3864     {
3865       insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3866 			     plus_constant (Pmode,
3867 					    gen_rtx_REG (Pmode, base), disp));
3868     }
3869   else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
3870     {
3871       rtx basereg = gen_rtx_REG (Pmode, base);
3872       rtx delta = GEN_INT (disp);
3873       rtx tmpreg = gen_rtx_REG (Pmode, 1);
3874 
3875       emit_move_insn (tmpreg, delta);
3876       insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3877 			     gen_rtx_PLUS (Pmode, tmpreg, basereg));
3878       if (DO_FRAME_NOTES)
3879 	add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3880 		      gen_rtx_SET (tmpreg,
3881 				   gen_rtx_PLUS (Pmode, basereg, delta)));
3882     }
3883   else
3884     {
3885       rtx basereg = gen_rtx_REG (Pmode, base);
3886       rtx delta = GEN_INT (disp);
3887       rtx tmpreg = gen_rtx_REG (Pmode, 1);
3888 
3889       emit_move_insn (tmpreg,
3890 		      gen_rtx_PLUS (Pmode, basereg,
3891 				    gen_rtx_HIGH (Pmode, delta)));
3892       insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3893 			     gen_rtx_LO_SUM (Pmode, tmpreg, delta));
3894     }
3895 
3896   if (DO_FRAME_NOTES && note)
3897     RTX_FRAME_RELATED_P (insn) = 1;
3898 }
3899 
3900 HOST_WIDE_INT
pa_compute_frame_size(poly_int64 size,int * fregs_live)3901 pa_compute_frame_size (poly_int64 size, int *fregs_live)
3902 {
3903   int freg_saved = 0;
3904   int i, j;
3905 
3906   /* The code in pa_expand_prologue and pa_expand_epilogue must
3907      be consistent with the rounding and size calculation done here.
3908      Change them at the same time.  */
3909 
3910   /* We do our own stack alignment.  First, round the size of the
3911      stack locals up to a word boundary.  */
3912   size = (size + UNITS_PER_WORD - 1) & ~(UNITS_PER_WORD - 1);
3913 
3914   /* Space for previous frame pointer + filler.  If any frame is
3915      allocated, we need to add in the TARGET_STARTING_FRAME_OFFSET.  We
3916      waste some space here for the sake of HP compatibility.  The
3917      first slot is only used when the frame pointer is needed.  */
3918   if (size || frame_pointer_needed)
3919     size += pa_starting_frame_offset ();
3920 
3921   /* If the current function calls __builtin_eh_return, then we need
3922      to allocate stack space for registers that will hold data for
3923      the exception handler.  */
3924   if (DO_FRAME_NOTES && crtl->calls_eh_return)
3925     {
3926       unsigned int i;
3927 
3928       for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM; ++i)
3929 	continue;
3930       size += i * UNITS_PER_WORD;
3931     }
3932 
3933   /* Account for space used by the callee general register saves.  */
3934   for (i = 18, j = frame_pointer_needed ? 4 : 3; i >= j; i--)
3935     if (df_regs_ever_live_p (i))
3936       size += UNITS_PER_WORD;
3937 
3938   /* Account for space used by the callee floating point register saves.  */
3939   for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
3940     if (df_regs_ever_live_p (i)
3941 	|| (!TARGET_64BIT && df_regs_ever_live_p (i + 1)))
3942       {
3943 	freg_saved = 1;
3944 
3945 	/* We always save both halves of the FP register, so always
3946 	   increment the frame size by 8 bytes.  */
3947 	size += 8;
3948       }
3949 
3950   /* If any of the floating registers are saved, account for the
3951      alignment needed for the floating point register save block.  */
3952   if (freg_saved)
3953     {
3954       size = (size + 7) & ~7;
3955       if (fregs_live)
3956 	*fregs_live = 1;
3957     }
3958 
3959   /* The various ABIs include space for the outgoing parameters in the
3960      size of the current function's stack frame.  We don't need to align
3961      for the outgoing arguments as their alignment is set by the final
3962      rounding for the frame as a whole.  */
3963   size += crtl->outgoing_args_size;
3964 
3965   /* Allocate space for the fixed frame marker.  This space must be
3966      allocated for any function that makes calls or allocates
3967      stack space.  */
3968   if (!crtl->is_leaf || size)
3969     size += TARGET_64BIT ? 48 : 32;
3970 
3971   /* Finally, round to the preferred stack boundary.  */
3972   return ((size + PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT - 1)
3973 	  & ~(PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT - 1));
3974 }
3975 
3976 /* Output function label, and associated .PROC and .CALLINFO statements.  */
3977 
3978 void
pa_output_function_label(FILE * file)3979 pa_output_function_label (FILE *file)
3980 {
3981   /* The function's label and associated .PROC must never be
3982      separated and must be output *after* any profiling declarations
3983      to avoid changing spaces/subspaces within a procedure.  */
3984   ASM_OUTPUT_LABEL (file, XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0));
3985   fputs ("\t.PROC\n", file);
3986 
3987   /* pa_expand_prologue does the dirty work now.  We just need
3988      to output the assembler directives which denote the start
3989      of a function.  */
3990   fprintf (file, "\t.CALLINFO FRAME=" HOST_WIDE_INT_PRINT_DEC, actual_fsize);
3991   if (crtl->is_leaf)
3992     fputs (",NO_CALLS", file);
3993   else
3994     fputs (",CALLS", file);
3995   if (rp_saved)
3996     fputs (",SAVE_RP", file);
3997 
3998   /* The SAVE_SP flag is used to indicate that register %r3 is stored
3999      at the beginning of the frame and that it is used as the frame
4000      pointer for the frame.  We do this because our current frame
4001      layout doesn't conform to that specified in the HP runtime
4002      documentation and we need a way to indicate to programs such as
4003      GDB where %r3 is saved.  The SAVE_SP flag was chosen because it
4004      isn't used by HP compilers but is supported by the assembler.
4005      However, SAVE_SP is supposed to indicate that the previous stack
4006      pointer has been saved in the frame marker.  */
4007   if (frame_pointer_needed)
4008     fputs (",SAVE_SP", file);
4009 
4010   /* Pass on information about the number of callee register saves
4011      performed in the prologue.
4012 
4013      The compiler is supposed to pass the highest register number
4014      saved, the assembler then has to adjust that number before
4015      entering it into the unwind descriptor (to account for any
4016      caller saved registers with lower register numbers than the
4017      first callee saved register).  */
4018   if (gr_saved)
4019     fprintf (file, ",ENTRY_GR=%d", gr_saved + 2);
4020 
4021   if (fr_saved)
4022     fprintf (file, ",ENTRY_FR=%d", fr_saved + 11);
4023 
4024   fputs ("\n\t.ENTRY\n", file);
4025 }
4026 
4027 /* Output function prologue.  */
4028 
4029 static void
pa_output_function_prologue(FILE * file)4030 pa_output_function_prologue (FILE *file)
4031 {
4032   pa_output_function_label (file);
4033   remove_useless_addtr_insns (0);
4034 }
4035 
4036 /* The label is output by ASM_DECLARE_FUNCTION_NAME on linux.  */
4037 
4038 static void
pa_linux_output_function_prologue(FILE * file ATTRIBUTE_UNUSED)4039 pa_linux_output_function_prologue (FILE *file ATTRIBUTE_UNUSED)
4040 {
4041   remove_useless_addtr_insns (0);
4042 }
4043 
4044 void
pa_expand_prologue(void)4045 pa_expand_prologue (void)
4046 {
4047   int merge_sp_adjust_with_store = 0;
4048   HOST_WIDE_INT size = get_frame_size ();
4049   HOST_WIDE_INT offset;
4050   int i;
4051   rtx tmpreg;
4052   rtx_insn *insn;
4053 
4054   gr_saved = 0;
4055   fr_saved = 0;
4056   save_fregs = 0;
4057 
4058   /* Compute total size for frame pointer, filler, locals and rounding to
4059      the next word boundary.  Similar code appears in pa_compute_frame_size
4060      and must be changed in tandem with this code.  */
4061   local_fsize = (size + UNITS_PER_WORD - 1) & ~(UNITS_PER_WORD - 1);
4062   if (local_fsize || frame_pointer_needed)
4063     local_fsize += pa_starting_frame_offset ();
4064 
4065   actual_fsize = pa_compute_frame_size (size, &save_fregs);
4066   if (flag_stack_usage_info)
4067     current_function_static_stack_size = actual_fsize;
4068 
4069   /* Compute a few things we will use often.  */
4070   tmpreg = gen_rtx_REG (word_mode, 1);
4071 
4072   /* Save RP first.  The calling conventions manual states RP will
4073      always be stored into the caller's frame at sp - 20 or sp - 16
4074      depending on which ABI is in use.  */
4075   if (df_regs_ever_live_p (2) || crtl->calls_eh_return)
4076     {
4077       store_reg (2, TARGET_64BIT ? -16 : -20, STACK_POINTER_REGNUM);
4078       rp_saved = true;
4079     }
4080   else
4081     rp_saved = false;
4082 
4083   /* Allocate the local frame and set up the frame pointer if needed.  */
4084   if (actual_fsize != 0)
4085     {
4086       if (frame_pointer_needed)
4087 	{
4088 	  /* Copy the old frame pointer temporarily into %r1.  Set up the
4089 	     new stack pointer, then store away the saved old frame pointer
4090 	     into the stack at sp and at the same time update the stack
4091 	     pointer by actual_fsize bytes.  Two versions, first
4092 	     handles small (<8k) frames.  The second handles large (>=8k)
4093 	     frames.  */
4094 	  insn = emit_move_insn (tmpreg, hard_frame_pointer_rtx);
4095 	  if (DO_FRAME_NOTES)
4096 	    RTX_FRAME_RELATED_P (insn) = 1;
4097 
4098 	  insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
4099 	  if (DO_FRAME_NOTES)
4100 	    RTX_FRAME_RELATED_P (insn) = 1;
4101 
4102 	  if (VAL_14_BITS_P (actual_fsize))
4103 	    store_reg_modify (STACK_POINTER_REGNUM, 1, actual_fsize);
4104 	  else
4105 	    {
4106 	      /* It is incorrect to store the saved frame pointer at *sp,
4107 		 then increment sp (writes beyond the current stack boundary).
4108 
4109 		 So instead use stwm to store at *sp and post-increment the
4110 		 stack pointer as an atomic operation.  Then increment sp to
4111 		 finish allocating the new frame.  */
4112 	      HOST_WIDE_INT adjust1 = 8192 - 64;
4113 	      HOST_WIDE_INT adjust2 = actual_fsize - adjust1;
4114 
4115 	      store_reg_modify (STACK_POINTER_REGNUM, 1, adjust1);
4116 	      set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
4117 			      adjust2, 1);
4118 	    }
4119 
4120 	  /* We set SAVE_SP in frames that need a frame pointer.  Thus,
4121 	     we need to store the previous stack pointer (frame pointer)
4122 	     into the frame marker on targets that use the HP unwind
4123 	     library.  This allows the HP unwind library to be used to
4124 	     unwind GCC frames.  However, we are not fully compatible
4125 	     with the HP library because our frame layout differs from
4126 	     that specified in the HP runtime specification.
4127 
4128 	     We don't want a frame note on this instruction as the frame
4129 	     marker moves during dynamic stack allocation.
4130 
4131 	     This instruction also serves as a blockage to prevent
4132 	     register spills from being scheduled before the stack
4133 	     pointer is raised.  This is necessary as we store
4134 	     registers using the frame pointer as a base register,
4135 	     and the frame pointer is set before sp is raised.  */
4136 	  if (TARGET_HPUX_UNWIND_LIBRARY)
4137 	    {
4138 	      rtx addr = gen_rtx_PLUS (word_mode, stack_pointer_rtx,
4139 				       GEN_INT (TARGET_64BIT ? -8 : -4));
4140 
4141 	      emit_move_insn (gen_rtx_MEM (word_mode, addr),
4142 			      hard_frame_pointer_rtx);
4143 	    }
4144 	  else
4145 	    emit_insn (gen_blockage ());
4146 	}
4147       /* no frame pointer needed.  */
4148       else
4149 	{
4150 	  /* In some cases we can perform the first callee register save
4151 	     and allocating the stack frame at the same time.   If so, just
4152 	     make a note of it and defer allocating the frame until saving
4153 	     the callee registers.  */
4154 	  if (VAL_14_BITS_P (actual_fsize) && local_fsize == 0)
4155 	    merge_sp_adjust_with_store = 1;
4156 	  /* Cannot optimize.  Adjust the stack frame by actual_fsize
4157 	     bytes.  */
4158 	  else
4159 	    set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
4160 			    actual_fsize, 1);
4161 	}
4162     }
4163 
4164   /* Normal register save.
4165 
4166      Do not save the frame pointer in the frame_pointer_needed case.  It
4167      was done earlier.  */
4168   if (frame_pointer_needed)
4169     {
4170       offset = local_fsize;
4171 
4172       /* Saving the EH return data registers in the frame is the simplest
4173 	 way to get the frame unwind information emitted.  We put them
4174 	 just before the general registers.  */
4175       if (DO_FRAME_NOTES && crtl->calls_eh_return)
4176 	{
4177 	  unsigned int i, regno;
4178 
4179 	  for (i = 0; ; ++i)
4180 	    {
4181 	      regno = EH_RETURN_DATA_REGNO (i);
4182 	      if (regno == INVALID_REGNUM)
4183 		break;
4184 
4185 	      store_reg (regno, offset, HARD_FRAME_POINTER_REGNUM);
4186 	      offset += UNITS_PER_WORD;
4187 	    }
4188 	}
4189 
4190       for (i = 18; i >= 4; i--)
4191 	if (df_regs_ever_live_p (i) && !call_used_or_fixed_reg_p (i))
4192 	  {
4193 	    store_reg (i, offset, HARD_FRAME_POINTER_REGNUM);
4194 	    offset += UNITS_PER_WORD;
4195 	    gr_saved++;
4196 	  }
4197       /* Account for %r3 which is saved in a special place.  */
4198       gr_saved++;
4199     }
4200   /* No frame pointer needed.  */
4201   else
4202     {
4203       offset = local_fsize - actual_fsize;
4204 
4205       /* Saving the EH return data registers in the frame is the simplest
4206          way to get the frame unwind information emitted.  */
4207       if (DO_FRAME_NOTES && crtl->calls_eh_return)
4208 	{
4209 	  unsigned int i, regno;
4210 
4211 	  for (i = 0; ; ++i)
4212 	    {
4213 	      regno = EH_RETURN_DATA_REGNO (i);
4214 	      if (regno == INVALID_REGNUM)
4215 		break;
4216 
4217 	      /* If merge_sp_adjust_with_store is nonzero, then we can
4218 		 optimize the first save.  */
4219 	      if (merge_sp_adjust_with_store)
4220 		{
4221 		  store_reg_modify (STACK_POINTER_REGNUM, regno, -offset);
4222 		  merge_sp_adjust_with_store = 0;
4223 		}
4224 	      else
4225 		store_reg (regno, offset, STACK_POINTER_REGNUM);
4226 	      offset += UNITS_PER_WORD;
4227 	    }
4228 	}
4229 
4230       for (i = 18; i >= 3; i--)
4231 	if (df_regs_ever_live_p (i) && !call_used_or_fixed_reg_p (i))
4232 	  {
4233 	    /* If merge_sp_adjust_with_store is nonzero, then we can
4234 	       optimize the first GR save.  */
4235 	    if (merge_sp_adjust_with_store)
4236 	      {
4237 		store_reg_modify (STACK_POINTER_REGNUM, i, -offset);
4238 		merge_sp_adjust_with_store = 0;
4239 	      }
4240 	    else
4241 	      store_reg (i, offset, STACK_POINTER_REGNUM);
4242 	    offset += UNITS_PER_WORD;
4243 	    gr_saved++;
4244 	  }
4245 
4246       /* If we wanted to merge the SP adjustment with a GR save, but we never
4247 	 did any GR saves, then just emit the adjustment here.  */
4248       if (merge_sp_adjust_with_store)
4249 	set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
4250 			actual_fsize, 1);
4251     }
4252 
4253   /* The hppa calling conventions say that %r19, the pic offset
4254      register, is saved at sp - 32 (in this function's frame)
4255      when generating PIC code.  FIXME:  What is the correct thing
4256      to do for functions which make no calls and allocate no
4257      frame?  Do we need to allocate a frame, or can we just omit
4258      the save?   For now we'll just omit the save.
4259 
4260      We don't want a note on this insn as the frame marker can
4261      move if there is a dynamic stack allocation.  */
4262   if (flag_pic && actual_fsize != 0 && !TARGET_64BIT)
4263     {
4264       rtx addr = gen_rtx_PLUS (word_mode, stack_pointer_rtx, GEN_INT (-32));
4265 
4266       emit_move_insn (gen_rtx_MEM (word_mode, addr), pic_offset_table_rtx);
4267 
4268     }
4269 
4270   /* Align pointer properly (doubleword boundary).  */
4271   offset = (offset + 7) & ~7;
4272 
4273   /* Floating point register store.  */
4274   if (save_fregs)
4275     {
4276       rtx base;
4277 
4278       /* First get the frame or stack pointer to the start of the FP register
4279 	 save area.  */
4280       if (frame_pointer_needed)
4281 	{
4282 	  set_reg_plus_d (1, HARD_FRAME_POINTER_REGNUM, offset, 0);
4283 	  base = hard_frame_pointer_rtx;
4284 	}
4285       else
4286 	{
4287 	  set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0);
4288 	  base = stack_pointer_rtx;
4289 	}
4290 
4291       /* Now actually save the FP registers.  */
4292       for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
4293 	{
4294 	  if (df_regs_ever_live_p (i)
4295 	      || (! TARGET_64BIT && df_regs_ever_live_p (i + 1)))
4296 	    {
4297 	      rtx addr, reg;
4298 	      rtx_insn *insn;
4299 	      addr = gen_rtx_MEM (DFmode,
4300 				  gen_rtx_POST_INC (word_mode, tmpreg));
4301 	      reg = gen_rtx_REG (DFmode, i);
4302 	      insn = emit_move_insn (addr, reg);
4303 	      if (DO_FRAME_NOTES)
4304 		{
4305 		  RTX_FRAME_RELATED_P (insn) = 1;
4306 		  if (TARGET_64BIT)
4307 		    {
4308 		      rtx mem = gen_rtx_MEM (DFmode,
4309 					     plus_constant (Pmode, base,
4310 							    offset));
4311 		      add_reg_note (insn, REG_FRAME_RELATED_EXPR,
4312 				    gen_rtx_SET (mem, reg));
4313 		    }
4314 		  else
4315 		    {
4316 		      rtx meml = gen_rtx_MEM (SFmode,
4317 					      plus_constant (Pmode, base,
4318 							     offset));
4319 		      rtx memr = gen_rtx_MEM (SFmode,
4320 					      plus_constant (Pmode, base,
4321 							     offset + 4));
4322 		      rtx regl = gen_rtx_REG (SFmode, i);
4323 		      rtx regr = gen_rtx_REG (SFmode, i + 1);
4324 		      rtx setl = gen_rtx_SET (meml, regl);
4325 		      rtx setr = gen_rtx_SET (memr, regr);
4326 		      rtvec vec;
4327 
4328 		      RTX_FRAME_RELATED_P (setl) = 1;
4329 		      RTX_FRAME_RELATED_P (setr) = 1;
4330 		      vec = gen_rtvec (2, setl, setr);
4331 		      add_reg_note (insn, REG_FRAME_RELATED_EXPR,
4332 				    gen_rtx_SEQUENCE (VOIDmode, vec));
4333 		    }
4334 		}
4335 	      offset += GET_MODE_SIZE (DFmode);
4336 	      fr_saved++;
4337 	    }
4338 	}
4339     }
4340 }
4341 
4342 /* Emit RTL to load REG from the memory location specified by BASE+DISP.
4343    Handle case where DISP > 8k by using the add_high_const patterns.  */
4344 
4345 static void
load_reg(int reg,HOST_WIDE_INT disp,int base)4346 load_reg (int reg, HOST_WIDE_INT disp, int base)
4347 {
4348   rtx dest = gen_rtx_REG (word_mode, reg);
4349   rtx basereg = gen_rtx_REG (Pmode, base);
4350   rtx src;
4351 
4352   if (VAL_14_BITS_P (disp))
4353     src = gen_rtx_MEM (word_mode, plus_constant (Pmode, basereg, disp));
4354   else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
4355     {
4356       rtx delta = GEN_INT (disp);
4357       rtx tmpreg = gen_rtx_REG (Pmode, 1);
4358 
4359       emit_move_insn (tmpreg, delta);
4360       if (TARGET_DISABLE_INDEXING)
4361 	{
4362 	  emit_move_insn (tmpreg, gen_rtx_PLUS (Pmode, tmpreg, basereg));
4363 	  src = gen_rtx_MEM (word_mode, tmpreg);
4364 	}
4365       else
4366 	src = gen_rtx_MEM (word_mode, gen_rtx_PLUS (Pmode, tmpreg, basereg));
4367     }
4368   else
4369     {
4370       rtx delta = GEN_INT (disp);
4371       rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta));
4372       rtx tmpreg = gen_rtx_REG (Pmode, 1);
4373 
4374       emit_move_insn (tmpreg, high);
4375       src = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta));
4376     }
4377 
4378   emit_move_insn (dest, src);
4379 }
4380 
4381 /* Update the total code bytes output to the text section.  */
4382 
4383 static void
update_total_code_bytes(unsigned int nbytes)4384 update_total_code_bytes (unsigned int nbytes)
4385 {
4386   if ((TARGET_PORTABLE_RUNTIME || !TARGET_GAS || !TARGET_SOM)
4387       && !IN_NAMED_SECTION_P (cfun->decl))
4388     {
4389       unsigned int old_total = total_code_bytes;
4390 
4391       total_code_bytes += nbytes;
4392 
4393       /* Be prepared to handle overflows.  */
4394       if (old_total > total_code_bytes)
4395         total_code_bytes = UINT_MAX;
4396     }
4397 }
4398 
4399 /* This function generates the assembly code for function exit.
4400    Args are as for output_function_prologue ().
4401 
4402    The function epilogue should not depend on the current stack
4403    pointer!  It should use the frame pointer only.  This is mandatory
4404    because of alloca; we also take advantage of it to omit stack
4405    adjustments before returning.  */
4406 
4407 static void
pa_output_function_epilogue(FILE * file)4408 pa_output_function_epilogue (FILE *file)
4409 {
4410   rtx_insn *insn = get_last_insn ();
4411   bool extra_nop;
4412 
4413   /* pa_expand_epilogue does the dirty work now.  We just need
4414      to output the assembler directives which denote the end
4415      of a function.
4416 
4417      To make debuggers happy, emit a nop if the epilogue was completely
4418      eliminated due to a volatile call as the last insn in the
4419      current function.  That way the return address (in %r2) will
4420      always point to a valid instruction in the current function.  */
4421 
4422   /* Get the last real insn.  */
4423   if (NOTE_P (insn))
4424     insn = prev_real_insn (insn);
4425 
4426   /* If it is a sequence, then look inside.  */
4427   if (insn && NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == SEQUENCE)
4428     insn = as_a <rtx_sequence *> (PATTERN (insn))-> insn (0);
4429 
4430   /* If insn is a CALL_INSN, then it must be a call to a volatile
4431      function (otherwise there would be epilogue insns).  */
4432   if (insn && CALL_P (insn))
4433     {
4434       fputs ("\tnop\n", file);
4435       extra_nop = true;
4436     }
4437   else
4438     extra_nop = false;
4439 
4440   fputs ("\t.EXIT\n\t.PROCEND\n", file);
4441 
4442   if (TARGET_SOM && TARGET_GAS)
4443     {
4444       /* We are done with this subspace except possibly for some additional
4445 	 debug information.  Forget that we are in this subspace to ensure
4446 	 that the next function is output in its own subspace.  */
4447       in_section = NULL;
4448       cfun->machine->in_nsubspa = 2;
4449     }
4450 
4451   /* Thunks do their own insn accounting.  */
4452   if (cfun->is_thunk)
4453     return;
4454 
4455   if (INSN_ADDRESSES_SET_P ())
4456     {
4457       last_address = extra_nop ? 4 : 0;
4458       insn = get_last_nonnote_insn ();
4459       if (insn)
4460 	{
4461 	  last_address += INSN_ADDRESSES (INSN_UID (insn));
4462 	  if (INSN_P (insn))
4463 	    last_address += insn_default_length (insn);
4464 	}
4465       last_address = ((last_address + FUNCTION_BOUNDARY / BITS_PER_UNIT - 1)
4466 		      & ~(FUNCTION_BOUNDARY / BITS_PER_UNIT - 1));
4467     }
4468   else
4469     last_address = UINT_MAX;
4470 
4471   /* Finally, update the total number of code bytes output so far.  */
4472   update_total_code_bytes (last_address);
4473 }
4474 
4475 void
pa_expand_epilogue(void)4476 pa_expand_epilogue (void)
4477 {
4478   rtx tmpreg;
4479   HOST_WIDE_INT offset;
4480   HOST_WIDE_INT ret_off = 0;
4481   int i;
4482   int merge_sp_adjust_with_load = 0;
4483 
4484   /* We will use this often.  */
4485   tmpreg = gen_rtx_REG (word_mode, 1);
4486 
4487   /* Try to restore RP early to avoid load/use interlocks when
4488      RP gets used in the return (bv) instruction.  This appears to still
4489      be necessary even when we schedule the prologue and epilogue.  */
4490   if (rp_saved)
4491     {
4492       ret_off = TARGET_64BIT ? -16 : -20;
4493       if (frame_pointer_needed)
4494 	{
4495 	  load_reg (2, ret_off, HARD_FRAME_POINTER_REGNUM);
4496 	  ret_off = 0;
4497 	}
4498       else
4499 	{
4500 	  /* No frame pointer, and stack is smaller than 8k.  */
4501 	  if (VAL_14_BITS_P (ret_off - actual_fsize))
4502 	    {
4503 	      load_reg (2, ret_off - actual_fsize, STACK_POINTER_REGNUM);
4504 	      ret_off = 0;
4505 	    }
4506 	}
4507     }
4508 
4509   /* General register restores.  */
4510   if (frame_pointer_needed)
4511     {
4512       offset = local_fsize;
4513 
4514       /* If the current function calls __builtin_eh_return, then we need
4515          to restore the saved EH data registers.  */
4516       if (DO_FRAME_NOTES && crtl->calls_eh_return)
4517 	{
4518 	  unsigned int i, regno;
4519 
4520 	  for (i = 0; ; ++i)
4521 	    {
4522 	      regno = EH_RETURN_DATA_REGNO (i);
4523 	      if (regno == INVALID_REGNUM)
4524 		break;
4525 
4526 	      load_reg (regno, offset, HARD_FRAME_POINTER_REGNUM);
4527 	      offset += UNITS_PER_WORD;
4528 	    }
4529 	}
4530 
4531       for (i = 18; i >= 4; i--)
4532 	if (df_regs_ever_live_p (i) && !call_used_or_fixed_reg_p (i))
4533 	  {
4534 	    load_reg (i, offset, HARD_FRAME_POINTER_REGNUM);
4535 	    offset += UNITS_PER_WORD;
4536 	  }
4537     }
4538   else
4539     {
4540       offset = local_fsize - actual_fsize;
4541 
4542       /* If the current function calls __builtin_eh_return, then we need
4543          to restore the saved EH data registers.  */
4544       if (DO_FRAME_NOTES && crtl->calls_eh_return)
4545 	{
4546 	  unsigned int i, regno;
4547 
4548 	  for (i = 0; ; ++i)
4549 	    {
4550 	      regno = EH_RETURN_DATA_REGNO (i);
4551 	      if (regno == INVALID_REGNUM)
4552 		break;
4553 
4554 	      /* Only for the first load.
4555 	         merge_sp_adjust_with_load holds the register load
4556 	         with which we will merge the sp adjustment.  */
4557 	      if (merge_sp_adjust_with_load == 0
4558 		  && local_fsize == 0
4559 		  && VAL_14_BITS_P (-actual_fsize))
4560 	        merge_sp_adjust_with_load = regno;
4561 	      else
4562 		load_reg (regno, offset, STACK_POINTER_REGNUM);
4563 	      offset += UNITS_PER_WORD;
4564 	    }
4565 	}
4566 
4567       for (i = 18; i >= 3; i--)
4568 	{
4569 	  if (df_regs_ever_live_p (i) && !call_used_or_fixed_reg_p (i))
4570 	    {
4571 	      /* Only for the first load.
4572 	         merge_sp_adjust_with_load holds the register load
4573 	         with which we will merge the sp adjustment.  */
4574 	      if (merge_sp_adjust_with_load == 0
4575 		  && local_fsize == 0
4576 		  && VAL_14_BITS_P (-actual_fsize))
4577 	        merge_sp_adjust_with_load = i;
4578 	      else
4579 		load_reg (i, offset, STACK_POINTER_REGNUM);
4580 	      offset += UNITS_PER_WORD;
4581 	    }
4582 	}
4583     }
4584 
4585   /* Align pointer properly (doubleword boundary).  */
4586   offset = (offset + 7) & ~7;
4587 
4588   /* FP register restores.  */
4589   if (save_fregs)
4590     {
4591       /* Adjust the register to index off of.  */
4592       if (frame_pointer_needed)
4593 	set_reg_plus_d (1, HARD_FRAME_POINTER_REGNUM, offset, 0);
4594       else
4595 	set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0);
4596 
4597       /* Actually do the restores now.  */
4598       for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
4599 	if (df_regs_ever_live_p (i)
4600 	    || (! TARGET_64BIT && df_regs_ever_live_p (i + 1)))
4601 	  {
4602 	    rtx src = gen_rtx_MEM (DFmode,
4603 				   gen_rtx_POST_INC (word_mode, tmpreg));
4604 	    rtx dest = gen_rtx_REG (DFmode, i);
4605 	    emit_move_insn (dest, src);
4606 	  }
4607     }
4608 
4609   /* Emit a blockage insn here to keep these insns from being moved to
4610      an earlier spot in the epilogue, or into the main instruction stream.
4611 
4612      This is necessary as we must not cut the stack back before all the
4613      restores are finished.  */
4614   emit_insn (gen_blockage ());
4615 
4616   /* Reset stack pointer (and possibly frame pointer).  The stack
4617      pointer is initially set to fp + 64 to avoid a race condition.  */
4618   if (frame_pointer_needed)
4619     {
4620       rtx delta = GEN_INT (-64);
4621 
4622       set_reg_plus_d (STACK_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM, 64, 0);
4623       emit_insn (gen_pre_load (hard_frame_pointer_rtx,
4624 			       stack_pointer_rtx, delta));
4625     }
4626   /* If we were deferring a callee register restore, do it now.  */
4627   else if (merge_sp_adjust_with_load)
4628     {
4629       rtx delta = GEN_INT (-actual_fsize);
4630       rtx dest = gen_rtx_REG (word_mode, merge_sp_adjust_with_load);
4631 
4632       emit_insn (gen_pre_load (dest, stack_pointer_rtx, delta));
4633     }
4634   else if (actual_fsize != 0)
4635     set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
4636 		    - actual_fsize, 0);
4637 
4638   /* If we haven't restored %r2 yet (no frame pointer, and a stack
4639      frame greater than 8k), do so now.  */
4640   if (ret_off != 0)
4641     load_reg (2, ret_off, STACK_POINTER_REGNUM);
4642 
4643   if (DO_FRAME_NOTES && crtl->calls_eh_return)
4644     {
4645       rtx sa = EH_RETURN_STACKADJ_RTX;
4646 
4647       emit_insn (gen_blockage ());
4648       emit_insn (TARGET_64BIT
4649 		 ? gen_subdi3 (stack_pointer_rtx, stack_pointer_rtx, sa)
4650 		 : gen_subsi3 (stack_pointer_rtx, stack_pointer_rtx, sa));
4651     }
4652 }
4653 
4654 bool
pa_can_use_return_insn(void)4655 pa_can_use_return_insn (void)
4656 {
4657   if (!reload_completed)
4658     return false;
4659 
4660   if (frame_pointer_needed)
4661     return false;
4662 
4663   if (df_regs_ever_live_p (2))
4664     return false;
4665 
4666   if (crtl->profile)
4667     return false;
4668 
4669   return pa_compute_frame_size (get_frame_size (), 0) == 0;
4670 }
4671 
4672 rtx
hppa_pic_save_rtx(void)4673 hppa_pic_save_rtx (void)
4674 {
4675   return get_hard_reg_initial_val (word_mode, PIC_OFFSET_TABLE_REGNUM);
4676 }
4677 
4678 #ifndef NO_DEFERRED_PROFILE_COUNTERS
4679 #define NO_DEFERRED_PROFILE_COUNTERS 0
4680 #endif
4681 
4682 
4683 /* Vector of funcdef numbers.  */
4684 static vec<int> funcdef_nos;
4685 
4686 /* Output deferred profile counters.  */
4687 static void
output_deferred_profile_counters(void)4688 output_deferred_profile_counters (void)
4689 {
4690   unsigned int i;
4691   int align, n;
4692 
4693   if (funcdef_nos.is_empty ())
4694    return;
4695 
4696   switch_to_section (data_section);
4697   align = MIN (BIGGEST_ALIGNMENT, LONG_TYPE_SIZE);
4698   ASM_OUTPUT_ALIGN (asm_out_file, floor_log2 (align / BITS_PER_UNIT));
4699 
4700   for (i = 0; funcdef_nos.iterate (i, &n); i++)
4701     {
4702       targetm.asm_out.internal_label (asm_out_file, "LP", n);
4703       assemble_integer (const0_rtx, LONG_TYPE_SIZE / BITS_PER_UNIT, align, 1);
4704     }
4705 
4706   funcdef_nos.release ();
4707 }
4708 
4709 void
hppa_profile_hook(int label_no)4710 hppa_profile_hook (int label_no)
4711 {
4712   rtx_code_label *label_rtx = gen_label_rtx ();
4713   int reg_parm_stack_space = REG_PARM_STACK_SPACE (NULL_TREE);
4714   rtx arg_bytes, begin_label_rtx, mcount, sym;
4715   rtx_insn *call_insn;
4716   char begin_label_name[16];
4717   bool use_mcount_pcrel_call;
4718 
4719   /* Set up call destination.  */
4720   sym = gen_rtx_SYMBOL_REF (Pmode, "_mcount");
4721   pa_encode_label (sym);
4722   mcount = gen_rtx_MEM (Pmode, sym);
4723 
4724   /* If we can reach _mcount with a pc-relative call, we can optimize
4725      loading the address of the current function.  This requires linker
4726      long branch stub support.  */
4727   if (!TARGET_PORTABLE_RUNTIME
4728       && !TARGET_LONG_CALLS
4729       && (TARGET_SOM || flag_function_sections))
4730     use_mcount_pcrel_call = TRUE;
4731   else
4732     use_mcount_pcrel_call = FALSE;
4733 
4734   ASM_GENERATE_INTERNAL_LABEL (begin_label_name, FUNC_BEGIN_PROLOG_LABEL,
4735 			       label_no);
4736   begin_label_rtx = gen_rtx_SYMBOL_REF (SImode, ggc_strdup (begin_label_name));
4737 
4738   emit_move_insn (gen_rtx_REG (word_mode, 26), gen_rtx_REG (word_mode, 2));
4739 
4740   if (!use_mcount_pcrel_call)
4741     {
4742       /* The address of the function is loaded into %r25 with an instruction-
4743 	 relative sequence that avoids the use of relocations.  We use SImode
4744 	 for the address of the function in both 32 and 64-bit code to avoid
4745 	 having to provide DImode versions of the lcla2 pattern.  */
4746       if (TARGET_PA_20)
4747 	emit_insn (gen_lcla2 (gen_rtx_REG (SImode, 25), label_rtx));
4748       else
4749 	emit_insn (gen_lcla1 (gen_rtx_REG (SImode, 25), label_rtx));
4750     }
4751 
4752   if (!NO_DEFERRED_PROFILE_COUNTERS)
4753     {
4754       rtx count_label_rtx, addr, r24;
4755       char count_label_name[16];
4756 
4757       funcdef_nos.safe_push (label_no);
4758       ASM_GENERATE_INTERNAL_LABEL (count_label_name, "LP", label_no);
4759       count_label_rtx = gen_rtx_SYMBOL_REF (Pmode,
4760 					    ggc_strdup (count_label_name));
4761 
4762       addr = force_reg (Pmode, count_label_rtx);
4763       r24 = gen_rtx_REG (Pmode, 24);
4764       emit_move_insn (r24, addr);
4765 
4766       arg_bytes = GEN_INT (TARGET_64BIT ? 24 : 12);
4767       if (use_mcount_pcrel_call)
4768 	call_insn = emit_call_insn (gen_call_mcount (mcount, arg_bytes,
4769 						     begin_label_rtx));
4770       else
4771 	call_insn = emit_call_insn (gen_call (mcount, arg_bytes));
4772 
4773       use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), r24);
4774     }
4775   else
4776     {
4777       arg_bytes = GEN_INT (TARGET_64BIT ? 16 : 8);
4778       if (use_mcount_pcrel_call)
4779 	call_insn = emit_call_insn (gen_call_mcount (mcount, arg_bytes,
4780 						     begin_label_rtx));
4781       else
4782 	call_insn = emit_call_insn (gen_call (mcount, arg_bytes));
4783     }
4784 
4785   use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), gen_rtx_REG (SImode, 25));
4786   use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), gen_rtx_REG (SImode, 26));
4787 
4788   /* Indicate the _mcount call cannot throw, nor will it execute a
4789      non-local goto.  */
4790   make_reg_eh_region_note_nothrow_nononlocal (call_insn);
4791 
4792   /* Allocate space for fixed arguments.  */
4793   if (reg_parm_stack_space > crtl->outgoing_args_size)
4794     crtl->outgoing_args_size = reg_parm_stack_space;
4795 }
4796 
4797 /* Fetch the return address for the frame COUNT steps up from
4798    the current frame, after the prologue.  FRAMEADDR is the
4799    frame pointer of the COUNT frame.
4800 
4801    We want to ignore any export stub remnants here.  To handle this,
4802    we examine the code at the return address, and if it is an export
4803    stub, we return a memory rtx for the stub return address stored
4804    at frame-24.
4805 
4806    The value returned is used in two different ways:
4807 
4808 	1. To find a function's caller.
4809 
4810 	2. To change the return address for a function.
4811 
4812    This function handles most instances of case 1; however, it will
4813    fail if there are two levels of stubs to execute on the return
4814    path.  The only way I believe that can happen is if the return value
4815    needs a parameter relocation, which never happens for C code.
4816 
4817    This function handles most instances of case 2; however, it will
4818    fail if we did not originally have stub code on the return path
4819    but will need stub code on the new return path.  This can happen if
4820    the caller & callee are both in the main program, but the new
4821    return location is in a shared library.  */
4822 
4823 rtx
pa_return_addr_rtx(int count,rtx frameaddr)4824 pa_return_addr_rtx (int count, rtx frameaddr)
4825 {
4826   rtx label;
4827   rtx rp;
4828   rtx saved_rp;
4829   rtx ins;
4830 
4831   /* The instruction stream at the return address of a PA1.X export stub is:
4832 
4833 	0x4bc23fd1 | stub+8:   ldw -18(sr0,sp),rp
4834 	0x004010a1 | stub+12:  ldsid (sr0,rp),r1
4835 	0x00011820 | stub+16:  mtsp r1,sr0
4836 	0xe0400002 | stub+20:  be,n 0(sr0,rp)
4837 
4838      0xe0400002 must be specified as -532676606 so that it won't be
4839      rejected as an invalid immediate operand on 64-bit hosts.
4840 
4841      The instruction stream at the return address of a PA2.0 export stub is:
4842 
4843 	0x4bc23fd1 | stub+8:   ldw -18(sr0,sp),rp
4844 	0xe840d002 | stub+12:  bve,n (rp)
4845   */
4846 
4847   HOST_WIDE_INT insns[4];
4848   int i, len;
4849 
4850   if (count != 0)
4851     return NULL_RTX;
4852 
4853   rp = get_hard_reg_initial_val (Pmode, 2);
4854 
4855   if (TARGET_64BIT || TARGET_NO_SPACE_REGS)
4856     return rp;
4857 
4858   /* If there is no export stub then just use the value saved from
4859      the return pointer register.  */
4860 
4861   saved_rp = gen_reg_rtx (Pmode);
4862   emit_move_insn (saved_rp, rp);
4863 
4864   /* Get pointer to the instruction stream.  We have to mask out the
4865      privilege level from the two low order bits of the return address
4866      pointer here so that ins will point to the start of the first
4867      instruction that would have been executed if we returned.  */
4868   ins = copy_to_reg (gen_rtx_AND (Pmode, rp, MASK_RETURN_ADDR));
4869   label = gen_label_rtx ();
4870 
4871   if (TARGET_PA_20)
4872     {
4873       insns[0] = 0x4bc23fd1;
4874       insns[1] = -398405630;
4875       len = 2;
4876     }
4877   else
4878     {
4879       insns[0] = 0x4bc23fd1;
4880       insns[1] = 0x004010a1;
4881       insns[2] = 0x00011820;
4882       insns[3] = -532676606;
4883       len = 4;
4884     }
4885 
4886   /* Check the instruction stream at the normal return address for the
4887      export stub.  If it is an export stub, than our return address is
4888      really in -24[frameaddr].  */
4889 
4890   for (i = 0; i < len; i++)
4891     {
4892       rtx op0 = gen_rtx_MEM (SImode, plus_constant (Pmode, ins, i * 4));
4893       rtx op1 = GEN_INT (insns[i]);
4894       emit_cmp_and_jump_insns (op0, op1, NE, NULL, SImode, 0, label);
4895     }
4896 
4897   /* Here we know that our return address points to an export
4898      stub.  We don't want to return the address of the export stub,
4899      but rather the return address of the export stub.  That return
4900      address is stored at -24[frameaddr].  */
4901 
4902   emit_move_insn (saved_rp,
4903 		  gen_rtx_MEM (Pmode,
4904 			       memory_address (Pmode,
4905 					       plus_constant (Pmode, frameaddr,
4906 							      -24))));
4907 
4908   emit_label (label);
4909 
4910   return saved_rp;
4911 }
4912 
4913 void
pa_emit_bcond_fp(rtx operands[])4914 pa_emit_bcond_fp (rtx operands[])
4915 {
4916   enum rtx_code code = GET_CODE (operands[0]);
4917   rtx operand0 = operands[1];
4918   rtx operand1 = operands[2];
4919   rtx label = operands[3];
4920 
4921   emit_insn (gen_rtx_SET (gen_rtx_REG (CCFPmode, 0),
4922 		          gen_rtx_fmt_ee (code, CCFPmode, operand0, operand1)));
4923 
4924   emit_jump_insn (gen_rtx_SET (pc_rtx,
4925 			       gen_rtx_IF_THEN_ELSE (VOIDmode,
4926 						     gen_rtx_fmt_ee (NE,
4927 							      VOIDmode,
4928 							      gen_rtx_REG (CCFPmode, 0),
4929 							      const0_rtx),
4930 						     gen_rtx_LABEL_REF (VOIDmode, label),
4931 						     pc_rtx)));
4932 
4933 }
4934 
4935 /* Adjust the cost of a scheduling dependency.  Return the new cost of
4936    a dependency LINK or INSN on DEP_INSN.  COST is the current cost.  */
4937 
4938 static int
pa_adjust_cost(rtx_insn * insn,int dep_type,rtx_insn * dep_insn,int cost,unsigned int)4939 pa_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost,
4940 		unsigned int)
4941 {
4942   enum attr_type attr_type;
4943 
4944   /* Don't adjust costs for a pa8000 chip, also do not adjust any
4945      true dependencies as they are described with bypasses now.  */
4946   if (pa_cpu >= PROCESSOR_8000 || dep_type == 0)
4947     return cost;
4948 
4949   if (! recog_memoized (insn))
4950     return 0;
4951 
4952   attr_type = get_attr_type (insn);
4953 
4954   switch (dep_type)
4955     {
4956     case REG_DEP_ANTI:
4957       /* Anti dependency; DEP_INSN reads a register that INSN writes some
4958 	 cycles later.  */
4959 
4960       if (attr_type == TYPE_FPLOAD)
4961 	{
4962 	  rtx pat = PATTERN (insn);
4963 	  rtx dep_pat = PATTERN (dep_insn);
4964 	  if (GET_CODE (pat) == PARALLEL)
4965 	    {
4966 	      /* This happens for the fldXs,mb patterns.  */
4967 	      pat = XVECEXP (pat, 0, 0);
4968 	    }
4969 	  if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4970 	    /* If this happens, we have to extend this to schedule
4971 	       optimally.  Return 0 for now.  */
4972 	  return 0;
4973 
4974 	  if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
4975 	    {
4976 	      if (! recog_memoized (dep_insn))
4977 		return 0;
4978 	      switch (get_attr_type (dep_insn))
4979 		{
4980 		case TYPE_FPALU:
4981 		case TYPE_FPMULSGL:
4982 		case TYPE_FPMULDBL:
4983 		case TYPE_FPDIVSGL:
4984 		case TYPE_FPDIVDBL:
4985 		case TYPE_FPSQRTSGL:
4986 		case TYPE_FPSQRTDBL:
4987 		  /* A fpload can't be issued until one cycle before a
4988 		     preceding arithmetic operation has finished if
4989 		     the target of the fpload is any of the sources
4990 		     (or destination) of the arithmetic operation.  */
4991 		  return insn_default_latency (dep_insn) - 1;
4992 
4993 		default:
4994 		  return 0;
4995 		}
4996 	    }
4997 	}
4998       else if (attr_type == TYPE_FPALU)
4999 	{
5000 	  rtx pat = PATTERN (insn);
5001 	  rtx dep_pat = PATTERN (dep_insn);
5002 	  if (GET_CODE (pat) == PARALLEL)
5003 	    {
5004 	      /* This happens for the fldXs,mb patterns.  */
5005 	      pat = XVECEXP (pat, 0, 0);
5006 	    }
5007 	  if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
5008 	    /* If this happens, we have to extend this to schedule
5009 	       optimally.  Return 0 for now.  */
5010 	  return 0;
5011 
5012 	  if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
5013 	    {
5014 	      if (! recog_memoized (dep_insn))
5015 		return 0;
5016 	      switch (get_attr_type (dep_insn))
5017 		{
5018 		case TYPE_FPDIVSGL:
5019 		case TYPE_FPDIVDBL:
5020 		case TYPE_FPSQRTSGL:
5021 		case TYPE_FPSQRTDBL:
5022 		  /* An ALU flop can't be issued until two cycles before a
5023 		     preceding divide or sqrt operation has finished if
5024 		     the target of the ALU flop is any of the sources
5025 		     (or destination) of the divide or sqrt operation.  */
5026 		  return insn_default_latency (dep_insn) - 2;
5027 
5028 		default:
5029 		  return 0;
5030 		}
5031 	    }
5032 	}
5033 
5034       /* For other anti dependencies, the cost is 0.  */
5035       return 0;
5036 
5037     case REG_DEP_OUTPUT:
5038       /* Output dependency; DEP_INSN writes a register that INSN writes some
5039 	 cycles later.  */
5040       if (attr_type == TYPE_FPLOAD)
5041 	{
5042 	  rtx pat = PATTERN (insn);
5043 	  rtx dep_pat = PATTERN (dep_insn);
5044 	  if (GET_CODE (pat) == PARALLEL)
5045 	    {
5046 	      /* This happens for the fldXs,mb patterns.  */
5047 	      pat = XVECEXP (pat, 0, 0);
5048 	    }
5049 	  if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
5050 	    /* If this happens, we have to extend this to schedule
5051 	       optimally.  Return 0 for now.  */
5052 	  return 0;
5053 
5054 	  if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
5055 	    {
5056 	      if (! recog_memoized (dep_insn))
5057 		return 0;
5058 	      switch (get_attr_type (dep_insn))
5059 		{
5060 		case TYPE_FPALU:
5061 		case TYPE_FPMULSGL:
5062 		case TYPE_FPMULDBL:
5063 		case TYPE_FPDIVSGL:
5064 		case TYPE_FPDIVDBL:
5065 		case TYPE_FPSQRTSGL:
5066 		case TYPE_FPSQRTDBL:
5067 		  /* A fpload can't be issued until one cycle before a
5068 		     preceding arithmetic operation has finished if
5069 		     the target of the fpload is the destination of the
5070 		     arithmetic operation.
5071 
5072 		     Exception: For PA7100LC, PA7200 and PA7300, the cost
5073 		     is 3 cycles, unless they bundle together.   We also
5074 		     pay the penalty if the second insn is a fpload.  */
5075 		  return insn_default_latency (dep_insn) - 1;
5076 
5077 		default:
5078 		  return 0;
5079 		}
5080 	    }
5081 	}
5082       else if (attr_type == TYPE_FPALU)
5083 	{
5084 	  rtx pat = PATTERN (insn);
5085 	  rtx dep_pat = PATTERN (dep_insn);
5086 	  if (GET_CODE (pat) == PARALLEL)
5087 	    {
5088 	      /* This happens for the fldXs,mb patterns.  */
5089 	      pat = XVECEXP (pat, 0, 0);
5090 	    }
5091 	  if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
5092 	    /* If this happens, we have to extend this to schedule
5093 	       optimally.  Return 0 for now.  */
5094 	  return 0;
5095 
5096 	  if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
5097 	    {
5098 	      if (! recog_memoized (dep_insn))
5099 		return 0;
5100 	      switch (get_attr_type (dep_insn))
5101 		{
5102 		case TYPE_FPDIVSGL:
5103 		case TYPE_FPDIVDBL:
5104 		case TYPE_FPSQRTSGL:
5105 		case TYPE_FPSQRTDBL:
5106 		  /* An ALU flop can't be issued until two cycles before a
5107 		     preceding divide or sqrt operation has finished if
5108 		     the target of the ALU flop is also the target of
5109 		     the divide or sqrt operation.  */
5110 		  return insn_default_latency (dep_insn) - 2;
5111 
5112 		default:
5113 		  return 0;
5114 		}
5115 	    }
5116 	}
5117 
5118       /* For other output dependencies, the cost is 0.  */
5119       return 0;
5120 
5121     default:
5122       gcc_unreachable ();
5123     }
5124 }
5125 
5126 /* The 700 can only issue a single insn at a time.
5127    The 7XXX processors can issue two insns at a time.
5128    The 8000 can issue 4 insns at a time.  */
5129 static int
pa_issue_rate(void)5130 pa_issue_rate (void)
5131 {
5132   switch (pa_cpu)
5133     {
5134     case PROCESSOR_700:		return 1;
5135     case PROCESSOR_7100:	return 2;
5136     case PROCESSOR_7100LC:	return 2;
5137     case PROCESSOR_7200:	return 2;
5138     case PROCESSOR_7300:	return 2;
5139     case PROCESSOR_8000:	return 4;
5140 
5141     default:
5142       gcc_unreachable ();
5143     }
5144 }
5145 
5146 
5147 
5148 /* Return any length plus adjustment needed by INSN which already has
5149    its length computed as LENGTH.   Return LENGTH if no adjustment is
5150    necessary.
5151 
5152    Also compute the length of an inline block move here as it is too
5153    complicated to express as a length attribute in pa.md.  */
5154 int
pa_adjust_insn_length(rtx_insn * insn,int length)5155 pa_adjust_insn_length (rtx_insn *insn, int length)
5156 {
5157   rtx pat = PATTERN (insn);
5158 
5159   /* If length is negative or undefined, provide initial length.  */
5160   if ((unsigned int) length >= INT_MAX)
5161     {
5162       if (GET_CODE (pat) == SEQUENCE)
5163 	insn = as_a <rtx_insn *> (XVECEXP (pat, 0, 0));
5164 
5165       switch (get_attr_type (insn))
5166 	{
5167 	case TYPE_MILLI:
5168 	  length = pa_attr_length_millicode_call (insn);
5169 	  break;
5170 	case TYPE_CALL:
5171 	  length = pa_attr_length_call (insn, 0);
5172 	  break;
5173 	case TYPE_SIBCALL:
5174 	  length = pa_attr_length_call (insn, 1);
5175 	  break;
5176 	case TYPE_DYNCALL:
5177 	  length = pa_attr_length_indirect_call (insn);
5178 	  break;
5179 	case TYPE_SH_FUNC_ADRS:
5180 	  length = pa_attr_length_millicode_call (insn) + 20;
5181 	  break;
5182 	default:
5183 	  gcc_unreachable ();
5184 	}
5185     }
5186 
5187   /* Block move pattern.  */
5188   if (NONJUMP_INSN_P (insn)
5189       && GET_CODE (pat) == PARALLEL
5190       && GET_CODE (XVECEXP (pat, 0, 0)) == SET
5191       && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM
5192       && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 1)) == MEM
5193       && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode
5194       && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 1)) == BLKmode)
5195     length += compute_cpymem_length (insn) - 4;
5196   /* Block clear pattern.  */
5197   else if (NONJUMP_INSN_P (insn)
5198 	   && GET_CODE (pat) == PARALLEL
5199 	   && GET_CODE (XVECEXP (pat, 0, 0)) == SET
5200 	   && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM
5201 	   && XEXP (XVECEXP (pat, 0, 0), 1) == const0_rtx
5202 	   && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode)
5203     length += compute_clrmem_length (insn) - 4;
5204   /* Conditional branch with an unfilled delay slot.  */
5205   else if (JUMP_P (insn) && ! simplejump_p (insn))
5206     {
5207       /* Adjust a short backwards conditional with an unfilled delay slot.  */
5208       if (GET_CODE (pat) == SET
5209 	  && length == 4
5210 	  && JUMP_LABEL (insn) != NULL_RTX
5211 	  && ! forward_branch_p (insn))
5212 	length += 4;
5213       else if (GET_CODE (pat) == PARALLEL
5214 	       && get_attr_type (insn) == TYPE_PARALLEL_BRANCH
5215 	       && length == 4)
5216 	length += 4;
5217       /* Adjust dbra insn with short backwards conditional branch with
5218 	 unfilled delay slot -- only for case where counter is in a
5219 	 general register register.  */
5220       else if (GET_CODE (pat) == PARALLEL
5221 	       && GET_CODE (XVECEXP (pat, 0, 1)) == SET
5222 	       && GET_CODE (XEXP (XVECEXP (pat, 0, 1), 0)) == REG
5223  	       && ! FP_REG_P (XEXP (XVECEXP (pat, 0, 1), 0))
5224 	       && length == 4
5225 	       && ! forward_branch_p (insn))
5226 	length += 4;
5227     }
5228   return length;
5229 }
5230 
5231 /* Implement the TARGET_PRINT_OPERAND_PUNCT_VALID_P hook.  */
5232 
5233 static bool
pa_print_operand_punct_valid_p(unsigned char code)5234 pa_print_operand_punct_valid_p (unsigned char code)
5235 {
5236   if (code == '@'
5237       || code == '#'
5238       || code == '*'
5239       || code == '^')
5240     return true;
5241 
5242   return false;
5243 }
5244 
5245 /* Print operand X (an rtx) in assembler syntax to file FILE.
5246    CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
5247    For `%' followed by punctuation, CODE is the punctuation and X is null.  */
5248 
5249 void
pa_print_operand(FILE * file,rtx x,int code)5250 pa_print_operand (FILE *file, rtx x, int code)
5251 {
5252   switch (code)
5253     {
5254     case '#':
5255       /* Output a 'nop' if there's nothing for the delay slot.  */
5256       if (dbr_sequence_length () == 0)
5257 	fputs ("\n\tnop", file);
5258       return;
5259     case '*':
5260       /* Output a nullification completer if there's nothing for the */
5261       /* delay slot or nullification is requested.  */
5262       if (dbr_sequence_length () == 0 ||
5263 	  (final_sequence &&
5264 	   INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))))
5265         fputs (",n", file);
5266       return;
5267     case 'R':
5268       /* Print out the second register name of a register pair.
5269 	 I.e., R (6) => 7.  */
5270       fputs (reg_names[REGNO (x) + 1], file);
5271       return;
5272     case 'r':
5273       /* A register or zero.  */
5274       if (x == const0_rtx
5275 	  || (x == CONST0_RTX (DFmode))
5276 	  || (x == CONST0_RTX (SFmode)))
5277 	{
5278 	  fputs ("%r0", file);
5279 	  return;
5280 	}
5281       else
5282 	break;
5283     case 'f':
5284       /* A register or zero (floating point).  */
5285       if (x == const0_rtx
5286 	  || (x == CONST0_RTX (DFmode))
5287 	  || (x == CONST0_RTX (SFmode)))
5288 	{
5289 	  fputs ("%fr0", file);
5290 	  return;
5291 	}
5292       else
5293 	break;
5294     case 'A':
5295       {
5296 	rtx xoperands[2];
5297 
5298 	xoperands[0] = XEXP (XEXP (x, 0), 0);
5299 	xoperands[1] = XVECEXP (XEXP (XEXP (x, 0), 1), 0, 0);
5300 	pa_output_global_address (file, xoperands[1], 0);
5301         fprintf (file, "(%s)", reg_names [REGNO (xoperands[0])]);
5302 	return;
5303       }
5304 
5305     case 'C':			/* Plain (C)ondition */
5306     case 'X':
5307       switch (GET_CODE (x))
5308 	{
5309 	case EQ:
5310 	  fputs ("=", file);  break;
5311 	case NE:
5312 	  fputs ("<>", file);  break;
5313 	case GT:
5314 	  fputs (">", file);  break;
5315 	case GE:
5316 	  fputs (">=", file);  break;
5317 	case GEU:
5318 	  fputs (">>=", file);  break;
5319 	case GTU:
5320 	  fputs (">>", file);  break;
5321 	case LT:
5322 	  fputs ("<", file);  break;
5323 	case LE:
5324 	  fputs ("<=", file);  break;
5325 	case LEU:
5326 	  fputs ("<<=", file);  break;
5327 	case LTU:
5328 	  fputs ("<<", file);  break;
5329 	default:
5330 	  gcc_unreachable ();
5331 	}
5332       return;
5333     case 'N':			/* Condition, (N)egated */
5334       switch (GET_CODE (x))
5335 	{
5336 	case EQ:
5337 	  fputs ("<>", file);  break;
5338 	case NE:
5339 	  fputs ("=", file);  break;
5340 	case GT:
5341 	  fputs ("<=", file);  break;
5342 	case GE:
5343 	  fputs ("<", file);  break;
5344 	case GEU:
5345 	  fputs ("<<", file);  break;
5346 	case GTU:
5347 	  fputs ("<<=", file);  break;
5348 	case LT:
5349 	  fputs (">=", file);  break;
5350 	case LE:
5351 	  fputs (">", file);  break;
5352 	case LEU:
5353 	  fputs (">>", file);  break;
5354 	case LTU:
5355 	  fputs (">>=", file);  break;
5356 	default:
5357 	  gcc_unreachable ();
5358 	}
5359       return;
5360     /* For floating point comparisons.  Note that the output
5361        predicates are the complement of the desired mode.  The
5362        conditions for GT, GE, LT, LE and LTGT cause an invalid
5363        operation exception if the result is unordered and this
5364        exception is enabled in the floating-point status register.  */
5365     case 'Y':
5366       switch (GET_CODE (x))
5367 	{
5368 	case EQ:
5369 	  fputs ("!=", file);  break;
5370 	case NE:
5371 	  fputs ("=", file);  break;
5372 	case GT:
5373 	  fputs ("!>", file);  break;
5374 	case GE:
5375 	  fputs ("!>=", file);  break;
5376 	case LT:
5377 	  fputs ("!<", file);  break;
5378 	case LE:
5379 	  fputs ("!<=", file);  break;
5380 	case LTGT:
5381 	  fputs ("!<>", file);  break;
5382 	case UNLE:
5383 	  fputs ("!?<=", file);  break;
5384 	case UNLT:
5385 	  fputs ("!?<", file);  break;
5386 	case UNGE:
5387 	  fputs ("!?>=", file);  break;
5388 	case UNGT:
5389 	  fputs ("!?>", file);  break;
5390 	case UNEQ:
5391 	  fputs ("!?=", file);  break;
5392 	case UNORDERED:
5393 	  fputs ("!?", file);  break;
5394 	case ORDERED:
5395 	  fputs ("?", file);  break;
5396 	default:
5397 	  gcc_unreachable ();
5398 	}
5399       return;
5400     case 'S':			/* Condition, operands are (S)wapped.  */
5401       switch (GET_CODE (x))
5402 	{
5403 	case EQ:
5404 	  fputs ("=", file);  break;
5405 	case NE:
5406 	  fputs ("<>", file);  break;
5407 	case GT:
5408 	  fputs ("<", file);  break;
5409 	case GE:
5410 	  fputs ("<=", file);  break;
5411 	case GEU:
5412 	  fputs ("<<=", file);  break;
5413 	case GTU:
5414 	  fputs ("<<", file);  break;
5415 	case LT:
5416 	  fputs (">", file);  break;
5417 	case LE:
5418 	  fputs (">=", file);  break;
5419 	case LEU:
5420 	  fputs (">>=", file);  break;
5421 	case LTU:
5422 	  fputs (">>", file);  break;
5423 	default:
5424 	  gcc_unreachable ();
5425 	}
5426       return;
5427     case 'B':			/* Condition, (B)oth swapped and negate.  */
5428       switch (GET_CODE (x))
5429 	{
5430 	case EQ:
5431 	  fputs ("<>", file);  break;
5432 	case NE:
5433 	  fputs ("=", file);  break;
5434 	case GT:
5435 	  fputs (">=", file);  break;
5436 	case GE:
5437 	  fputs (">", file);  break;
5438 	case GEU:
5439 	  fputs (">>", file);  break;
5440 	case GTU:
5441 	  fputs (">>=", file);  break;
5442 	case LT:
5443 	  fputs ("<=", file);  break;
5444 	case LE:
5445 	  fputs ("<", file);  break;
5446 	case LEU:
5447 	  fputs ("<<", file);  break;
5448 	case LTU:
5449 	  fputs ("<<=", file);  break;
5450 	default:
5451 	  gcc_unreachable ();
5452 	}
5453       return;
5454     case 'k':
5455       gcc_assert (GET_CODE (x) == CONST_INT);
5456       fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~INTVAL (x));
5457       return;
5458     case 'Q':
5459       gcc_assert (GET_CODE (x) == CONST_INT);
5460       fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - (INTVAL (x) & 63));
5461       return;
5462     case 'L':
5463       gcc_assert (GET_CODE (x) == CONST_INT);
5464       fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - (INTVAL (x) & 31));
5465       return;
5466     case 'o':
5467       gcc_assert (GET_CODE (x) == CONST_INT
5468 		  && (INTVAL (x) == 1 || INTVAL (x) == 2 || INTVAL (x) == 3));
5469       fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
5470       return;
5471     case 'O':
5472       gcc_assert (GET_CODE (x) == CONST_INT && exact_log2 (INTVAL (x)) >= 0);
5473       fprintf (file, "%d", exact_log2 (INTVAL (x)));
5474       return;
5475     case 'p':
5476       gcc_assert (GET_CODE (x) == CONST_INT);
5477       fprintf (file, HOST_WIDE_INT_PRINT_DEC, 63 - (INTVAL (x) & 63));
5478       return;
5479     case 'P':
5480       gcc_assert (GET_CODE (x) == CONST_INT);
5481       fprintf (file, HOST_WIDE_INT_PRINT_DEC, 31 - (INTVAL (x) & 31));
5482       return;
5483     case 'I':
5484       if (GET_CODE (x) == CONST_INT)
5485 	fputs ("i", file);
5486       return;
5487     case 'M':
5488     case 'F':
5489       switch (GET_CODE (XEXP (x, 0)))
5490 	{
5491 	case PRE_DEC:
5492 	case PRE_INC:
5493 	  if (ASSEMBLER_DIALECT == 0)
5494 	    fputs ("s,mb", file);
5495 	  else
5496 	    fputs (",mb", file);
5497 	  break;
5498 	case POST_DEC:
5499 	case POST_INC:
5500 	  if (ASSEMBLER_DIALECT == 0)
5501 	    fputs ("s,ma", file);
5502 	  else
5503 	    fputs (",ma", file);
5504 	  break;
5505 	case PLUS:
5506 	  if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
5507 	      && GET_CODE (XEXP (XEXP (x, 0), 1)) == REG)
5508 	    {
5509 	      if (ASSEMBLER_DIALECT == 0)
5510 		fputs ("x", file);
5511 	    }
5512 	  else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
5513 		   || GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
5514 	    {
5515 	      if (ASSEMBLER_DIALECT == 0)
5516 		fputs ("x,s", file);
5517 	      else
5518 		fputs (",s", file);
5519 	    }
5520 	  else if (code == 'F' && ASSEMBLER_DIALECT == 0)
5521 	    fputs ("s", file);
5522 	  break;
5523 	default:
5524 	  if (code == 'F' && ASSEMBLER_DIALECT == 0)
5525 	    fputs ("s", file);
5526 	  break;
5527 	}
5528       return;
5529     case 'G':
5530       pa_output_global_address (file, x, 0);
5531       return;
5532     case 'H':
5533       pa_output_global_address (file, x, 1);
5534       return;
5535     case 0:			/* Don't do anything special */
5536       break;
5537     case 'Z':
5538       {
5539 	unsigned op[3];
5540 	compute_zdepwi_operands (INTVAL (x), op);
5541 	fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
5542 	return;
5543       }
5544     case 'z':
5545       {
5546 	unsigned op[3];
5547 	compute_zdepdi_operands (INTVAL (x), op);
5548 	fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
5549 	return;
5550       }
5551     case 'c':
5552       /* We can get here from a .vtable_inherit due to our
5553 	 CONSTANT_ADDRESS_P rejecting perfectly good constant
5554 	 addresses.  */
5555       break;
5556     default:
5557       gcc_unreachable ();
5558     }
5559   if (GET_CODE (x) == REG)
5560     {
5561       fputs (reg_names [REGNO (x)], file);
5562       if (TARGET_64BIT && FP_REG_P (x) && GET_MODE_SIZE (GET_MODE (x)) <= 4)
5563 	{
5564 	  fputs ("R", file);
5565 	  return;
5566 	}
5567       if (FP_REG_P (x)
5568 	  && GET_MODE_SIZE (GET_MODE (x)) <= 4
5569 	  && (REGNO (x) & 1) == 0)
5570 	fputs ("L", file);
5571     }
5572   else if (GET_CODE (x) == MEM)
5573     {
5574       int size = GET_MODE_SIZE (GET_MODE (x));
5575       rtx base = NULL_RTX;
5576       switch (GET_CODE (XEXP (x, 0)))
5577 	{
5578 	case PRE_DEC:
5579 	case POST_DEC:
5580           base = XEXP (XEXP (x, 0), 0);
5581 	  fprintf (file, "-%d(%s)", size, reg_names [REGNO (base)]);
5582 	  break;
5583 	case PRE_INC:
5584 	case POST_INC:
5585           base = XEXP (XEXP (x, 0), 0);
5586 	  fprintf (file, "%d(%s)", size, reg_names [REGNO (base)]);
5587 	  break;
5588 	case PLUS:
5589 	  if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT)
5590 	    fprintf (file, "%s(%s)",
5591 		     reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 0), 0))],
5592 		     reg_names [REGNO (XEXP (XEXP (x, 0), 1))]);
5593 	  else if (GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
5594 	    fprintf (file, "%s(%s)",
5595 		     reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 1), 0))],
5596 		     reg_names [REGNO (XEXP (XEXP (x, 0), 0))]);
5597 	  else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
5598 		   && GET_CODE (XEXP (XEXP (x, 0), 1)) == REG)
5599 	    {
5600 	      /* Because the REG_POINTER flag can get lost during reload,
5601 		 pa_legitimate_address_p canonicalizes the order of the
5602 		 index and base registers in the combined move patterns.  */
5603 	      rtx base = XEXP (XEXP (x, 0), 1);
5604 	      rtx index = XEXP (XEXP (x, 0), 0);
5605 
5606 	      fprintf (file, "%s(%s)",
5607 		       reg_names [REGNO (index)], reg_names [REGNO (base)]);
5608 	    }
5609 	  else
5610 	    output_address (GET_MODE (x), XEXP (x, 0));
5611 	  break;
5612 	default:
5613 	  output_address (GET_MODE (x), XEXP (x, 0));
5614 	  break;
5615 	}
5616     }
5617   else
5618     output_addr_const (file, x);
5619 }
5620 
5621 /* output a SYMBOL_REF or a CONST expression involving a SYMBOL_REF.  */
5622 
5623 void
pa_output_global_address(FILE * file,rtx x,int round_constant)5624 pa_output_global_address (FILE *file, rtx x, int round_constant)
5625 {
5626 
5627   /* Imagine  (high (const (plus ...))).  */
5628   if (GET_CODE (x) == HIGH)
5629     x = XEXP (x, 0);
5630 
5631   if (GET_CODE (x) == SYMBOL_REF && read_only_operand (x, VOIDmode))
5632     output_addr_const (file, x);
5633   else if (GET_CODE (x) == SYMBOL_REF && !flag_pic)
5634     {
5635       output_addr_const (file, x);
5636       fputs ("-$global$", file);
5637     }
5638   else if (GET_CODE (x) == CONST)
5639     {
5640       const char *sep = "";
5641       int offset = 0;		/* assembler wants -$global$ at end */
5642       rtx base = NULL_RTX;
5643 
5644       switch (GET_CODE (XEXP (XEXP (x, 0), 0)))
5645 	{
5646 	case LABEL_REF:
5647 	case SYMBOL_REF:
5648 	  base = XEXP (XEXP (x, 0), 0);
5649 	  output_addr_const (file, base);
5650 	  break;
5651 	case CONST_INT:
5652 	  offset = INTVAL (XEXP (XEXP (x, 0), 0));
5653 	  break;
5654 	default:
5655 	  gcc_unreachable ();
5656 	}
5657 
5658       switch (GET_CODE (XEXP (XEXP (x, 0), 1)))
5659 	{
5660 	case LABEL_REF:
5661 	case SYMBOL_REF:
5662 	  base = XEXP (XEXP (x, 0), 1);
5663 	  output_addr_const (file, base);
5664 	  break;
5665 	case CONST_INT:
5666 	  offset = INTVAL (XEXP (XEXP (x, 0), 1));
5667 	  break;
5668 	default:
5669 	  gcc_unreachable ();
5670 	}
5671 
5672       /* How bogus.  The compiler is apparently responsible for
5673 	 rounding the constant if it uses an LR field selector.
5674 
5675 	 The linker and/or assembler seem a better place since
5676 	 they have to do this kind of thing already.
5677 
5678 	 If we fail to do this, HP's optimizing linker may eliminate
5679 	 an addil, but not update the ldw/stw/ldo instruction that
5680 	 uses the result of the addil.  */
5681       if (round_constant)
5682 	offset = ((offset + 0x1000) & ~0x1fff);
5683 
5684       switch (GET_CODE (XEXP (x, 0)))
5685 	{
5686 	case PLUS:
5687 	  if (offset < 0)
5688 	    {
5689 	      offset = -offset;
5690 	      sep = "-";
5691 	    }
5692 	  else
5693 	    sep = "+";
5694 	  break;
5695 
5696 	case MINUS:
5697 	  gcc_assert (GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF);
5698 	  sep = "-";
5699 	  break;
5700 
5701 	default:
5702 	  gcc_unreachable ();
5703 	}
5704 
5705       if (!read_only_operand (base, VOIDmode) && !flag_pic)
5706 	fputs ("-$global$", file);
5707       if (offset)
5708 	fprintf (file, "%s%d", sep, offset);
5709     }
5710   else
5711     output_addr_const (file, x);
5712 }
5713 
5714 /* Output boilerplate text to appear at the beginning of the file.
5715    There are several possible versions.  */
5716 #define aputs(x) fputs(x, asm_out_file)
5717 static inline void
pa_file_start_level(void)5718 pa_file_start_level (void)
5719 {
5720   if (TARGET_64BIT)
5721     aputs ("\t.LEVEL 2.0w\n");
5722   else if (TARGET_PA_20)
5723     aputs ("\t.LEVEL 2.0\n");
5724   else if (TARGET_PA_11)
5725     aputs ("\t.LEVEL 1.1\n");
5726   else
5727     aputs ("\t.LEVEL 1.0\n");
5728 }
5729 
5730 static inline void
pa_file_start_space(int sortspace)5731 pa_file_start_space (int sortspace)
5732 {
5733   aputs ("\t.SPACE $PRIVATE$");
5734   if (sortspace)
5735     aputs (",SORT=16");
5736   aputs ("\n\t.SUBSPA $DATA$,QUAD=1,ALIGN=8,ACCESS=31");
5737   if (flag_tm)
5738     aputs ("\n\t.SUBSPA $TM_CLONE_TABLE$,QUAD=1,ALIGN=8,ACCESS=31");
5739   aputs ("\n\t.SUBSPA $BSS$,QUAD=1,ALIGN=8,ACCESS=31,ZERO,SORT=82"
5740 	 "\n\t.SPACE $TEXT$");
5741   if (sortspace)
5742     aputs (",SORT=8");
5743   aputs ("\n\t.SUBSPA $LIT$,QUAD=0,ALIGN=8,ACCESS=44"
5744 	 "\n\t.SUBSPA $CODE$,QUAD=0,ALIGN=8,ACCESS=44,CODE_ONLY\n");
5745 }
5746 
5747 static inline void
pa_file_start_file(int want_version)5748 pa_file_start_file (int want_version)
5749 {
5750   if (write_symbols != NO_DEBUG)
5751     {
5752       output_file_directive (asm_out_file, main_input_filename);
5753       if (want_version)
5754 	aputs ("\t.version\t\"01.01\"\n");
5755     }
5756 }
5757 
5758 static inline void
pa_file_start_mcount(const char * aswhat)5759 pa_file_start_mcount (const char *aswhat)
5760 {
5761   if (profile_flag)
5762     fprintf (asm_out_file, "\t.IMPORT _mcount,%s\n", aswhat);
5763 }
5764 
5765 static void
pa_elf_file_start(void)5766 pa_elf_file_start (void)
5767 {
5768   pa_file_start_level ();
5769   pa_file_start_mcount ("ENTRY");
5770   pa_file_start_file (0);
5771 }
5772 
5773 static void
pa_som_file_start(void)5774 pa_som_file_start (void)
5775 {
5776   pa_file_start_level ();
5777   pa_file_start_space (0);
5778   aputs ("\t.IMPORT $global$,DATA\n"
5779          "\t.IMPORT $$dyncall,MILLICODE\n");
5780   pa_file_start_mcount ("CODE");
5781   pa_file_start_file (0);
5782 }
5783 
5784 static void
pa_linux_file_start(void)5785 pa_linux_file_start (void)
5786 {
5787   pa_file_start_file (1);
5788   pa_file_start_level ();
5789   pa_file_start_mcount ("CODE");
5790 }
5791 
5792 static void
pa_hpux64_gas_file_start(void)5793 pa_hpux64_gas_file_start (void)
5794 {
5795   pa_file_start_level ();
5796 #ifdef ASM_OUTPUT_TYPE_DIRECTIVE
5797   if (profile_flag)
5798     ASM_OUTPUT_TYPE_DIRECTIVE (asm_out_file, "_mcount", "function");
5799 #endif
5800   pa_file_start_file (1);
5801 }
5802 
5803 static void
pa_hpux64_hpas_file_start(void)5804 pa_hpux64_hpas_file_start (void)
5805 {
5806   pa_file_start_level ();
5807   pa_file_start_space (1);
5808   pa_file_start_mcount ("CODE");
5809   pa_file_start_file (0);
5810 }
5811 #undef aputs
5812 
5813 /* Search the deferred plabel list for SYMBOL and return its internal
5814    label.  If an entry for SYMBOL is not found, a new entry is created.  */
5815 
5816 rtx
pa_get_deferred_plabel(rtx symbol)5817 pa_get_deferred_plabel (rtx symbol)
5818 {
5819   const char *fname = XSTR (symbol, 0);
5820   size_t i;
5821 
5822   /* See if we have already put this function on the list of deferred
5823      plabels.  This list is generally small, so a liner search is not
5824      too ugly.  If it proves too slow replace it with something faster.  */
5825   for (i = 0; i < n_deferred_plabels; i++)
5826     if (strcmp (fname, XSTR (deferred_plabels[i].symbol, 0)) == 0)
5827       break;
5828 
5829   /* If the deferred plabel list is empty, or this entry was not found
5830      on the list, create a new entry on the list.  */
5831   if (deferred_plabels == NULL || i == n_deferred_plabels)
5832     {
5833       tree id;
5834 
5835       if (deferred_plabels == 0)
5836 	deferred_plabels =  ggc_alloc<deferred_plabel> ();
5837       else
5838         deferred_plabels = GGC_RESIZEVEC (struct deferred_plabel,
5839                                           deferred_plabels,
5840                                           n_deferred_plabels + 1);
5841 
5842       i = n_deferred_plabels++;
5843       deferred_plabels[i].internal_label = gen_label_rtx ();
5844       deferred_plabels[i].symbol = symbol;
5845 
5846       /* Gross.  We have just implicitly taken the address of this
5847 	 function.  Mark it in the same manner as assemble_name.  */
5848       id = maybe_get_identifier (targetm.strip_name_encoding (fname));
5849       if (id)
5850 	mark_referenced (id);
5851     }
5852 
5853   return deferred_plabels[i].internal_label;
5854 }
5855 
5856 static void
output_deferred_plabels(void)5857 output_deferred_plabels (void)
5858 {
5859   size_t i;
5860 
5861   /* If we have some deferred plabels, then we need to switch into the
5862      data or readonly data section, and align it to a 4 byte boundary
5863      before outputting the deferred plabels.  */
5864   if (n_deferred_plabels)
5865     {
5866       switch_to_section (flag_pic ? data_section : readonly_data_section);
5867       ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
5868     }
5869 
5870   /* Now output the deferred plabels.  */
5871   for (i = 0; i < n_deferred_plabels; i++)
5872     {
5873       targetm.asm_out.internal_label (asm_out_file, "L",
5874 		 CODE_LABEL_NUMBER (deferred_plabels[i].internal_label));
5875       assemble_integer (deferred_plabels[i].symbol,
5876 			TARGET_64BIT ? 8 : 4, TARGET_64BIT ? 64 : 32, 1);
5877     }
5878 }
5879 
5880 /* Initialize optabs to point to emulation routines.  */
5881 
5882 static void
pa_init_libfuncs(void)5883 pa_init_libfuncs (void)
5884 {
5885   if (HPUX_LONG_DOUBLE_LIBRARY)
5886     {
5887       set_optab_libfunc (add_optab, TFmode, "_U_Qfadd");
5888       set_optab_libfunc (sub_optab, TFmode, "_U_Qfsub");
5889       set_optab_libfunc (smul_optab, TFmode, "_U_Qfmpy");
5890       set_optab_libfunc (sdiv_optab, TFmode, "_U_Qfdiv");
5891       set_optab_libfunc (smin_optab, TFmode, "_U_Qmin");
5892       set_optab_libfunc (smax_optab, TFmode, "_U_Qfmax");
5893       set_optab_libfunc (sqrt_optab, TFmode, "_U_Qfsqrt");
5894       set_optab_libfunc (abs_optab, TFmode, "_U_Qfabs");
5895       set_optab_libfunc (neg_optab, TFmode, "_U_Qfneg");
5896 
5897       set_optab_libfunc (eq_optab, TFmode, "_U_Qfeq");
5898       set_optab_libfunc (ne_optab, TFmode, "_U_Qfne");
5899       set_optab_libfunc (gt_optab, TFmode, "_U_Qfgt");
5900       set_optab_libfunc (ge_optab, TFmode, "_U_Qfge");
5901       set_optab_libfunc (lt_optab, TFmode, "_U_Qflt");
5902       set_optab_libfunc (le_optab, TFmode, "_U_Qfle");
5903       set_optab_libfunc (unord_optab, TFmode, "_U_Qfunord");
5904 
5905       set_conv_libfunc (sext_optab, TFmode, SFmode, "_U_Qfcnvff_sgl_to_quad");
5906       set_conv_libfunc (sext_optab, TFmode, DFmode, "_U_Qfcnvff_dbl_to_quad");
5907       set_conv_libfunc (trunc_optab, SFmode, TFmode, "_U_Qfcnvff_quad_to_sgl");
5908       set_conv_libfunc (trunc_optab, DFmode, TFmode, "_U_Qfcnvff_quad_to_dbl");
5909 
5910       set_conv_libfunc (sfix_optab, SImode, TFmode,
5911 			TARGET_64BIT ? "__U_Qfcnvfxt_quad_to_sgl"
5912 				     : "_U_Qfcnvfxt_quad_to_sgl");
5913       set_conv_libfunc (sfix_optab, DImode, TFmode,
5914 			"_U_Qfcnvfxt_quad_to_dbl");
5915       set_conv_libfunc (ufix_optab, SImode, TFmode,
5916 			"_U_Qfcnvfxt_quad_to_usgl");
5917       set_conv_libfunc (ufix_optab, DImode, TFmode,
5918 			"_U_Qfcnvfxt_quad_to_udbl");
5919 
5920       set_conv_libfunc (sfloat_optab, TFmode, SImode,
5921 			"_U_Qfcnvxf_sgl_to_quad");
5922       set_conv_libfunc (sfloat_optab, TFmode, DImode,
5923 			"_U_Qfcnvxf_dbl_to_quad");
5924       set_conv_libfunc (ufloat_optab, TFmode, SImode,
5925 			"_U_Qfcnvxf_usgl_to_quad");
5926       set_conv_libfunc (ufloat_optab, TFmode, DImode,
5927 			"_U_Qfcnvxf_udbl_to_quad");
5928     }
5929 
5930   if (TARGET_SYNC_LIBCALL)
5931     init_sync_libfuncs (8);
5932 }
5933 
5934 /* HP's millicode routines mean something special to the assembler.
5935    Keep track of which ones we have used.  */
5936 
5937 enum millicodes { remI, remU, divI, divU, mulI, end1000 };
5938 static void import_milli (enum millicodes);
5939 static char imported[(int) end1000];
5940 static const char * const milli_names[] = {"remI", "remU", "divI", "divU", "mulI"};
5941 static const char import_string[] = ".IMPORT $$....,MILLICODE";
5942 #define MILLI_START 10
5943 
5944 static void
import_milli(enum millicodes code)5945 import_milli (enum millicodes code)
5946 {
5947   char str[sizeof (import_string)];
5948 
5949   if (!imported[(int) code])
5950     {
5951       imported[(int) code] = 1;
5952       strcpy (str, import_string);
5953       memcpy (str + MILLI_START, milli_names[(int) code], 4);
5954       output_asm_insn (str, 0);
5955     }
5956 }
5957 
5958 /* The register constraints have put the operands and return value in
5959    the proper registers.  */
5960 
5961 const char *
pa_output_mul_insn(int unsignedp ATTRIBUTE_UNUSED,rtx_insn * insn)5962 pa_output_mul_insn (int unsignedp ATTRIBUTE_UNUSED, rtx_insn *insn)
5963 {
5964   import_milli (mulI);
5965   return pa_output_millicode_call (insn, gen_rtx_SYMBOL_REF (Pmode, "$$mulI"));
5966 }
5967 
5968 /* Emit the rtl for doing a division by a constant.  */
5969 
5970 /* Do magic division millicodes exist for this value? */
5971 const int pa_magic_milli[]= {0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1};
5972 
5973 /* We'll use an array to keep track of the magic millicodes and
5974    whether or not we've used them already. [n][0] is signed, [n][1] is
5975    unsigned.  */
5976 
5977 static int div_milli[16][2];
5978 
5979 int
pa_emit_hpdiv_const(rtx * operands,int unsignedp)5980 pa_emit_hpdiv_const (rtx *operands, int unsignedp)
5981 {
5982   if (GET_CODE (operands[2]) == CONST_INT
5983       && INTVAL (operands[2]) > 0
5984       && INTVAL (operands[2]) < 16
5985       && pa_magic_milli[INTVAL (operands[2])])
5986     {
5987       rtx ret = gen_rtx_REG (SImode, TARGET_64BIT ? 2 : 31);
5988 
5989       emit_move_insn (gen_rtx_REG (SImode, 26), operands[1]);
5990       emit
5991 	(gen_rtx_PARALLEL
5992 	 (VOIDmode,
5993 	  gen_rtvec (6, gen_rtx_SET (gen_rtx_REG (SImode, 29),
5994 				     gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
5995 						     SImode,
5996 						     gen_rtx_REG (SImode, 26),
5997 						     operands[2])),
5998 		     gen_rtx_CLOBBER (VOIDmode, operands[4]),
5999 		     gen_rtx_CLOBBER (VOIDmode, operands[3]),
6000 		     gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 26)),
6001 		     gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 25)),
6002 		     gen_rtx_CLOBBER (VOIDmode, ret))));
6003       emit_move_insn (operands[0], gen_rtx_REG (SImode, 29));
6004       return 1;
6005     }
6006   return 0;
6007 }
6008 
6009 const char *
pa_output_div_insn(rtx * operands,int unsignedp,rtx_insn * insn)6010 pa_output_div_insn (rtx *operands, int unsignedp, rtx_insn *insn)
6011 {
6012   int divisor;
6013 
6014   /* If the divisor is a constant, try to use one of the special
6015      opcodes .*/
6016   if (GET_CODE (operands[0]) == CONST_INT)
6017     {
6018       static char buf[100];
6019       divisor = INTVAL (operands[0]);
6020       if (!div_milli[divisor][unsignedp])
6021 	{
6022 	  div_milli[divisor][unsignedp] = 1;
6023 	  if (unsignedp)
6024 	    output_asm_insn (".IMPORT $$divU_%0,MILLICODE", operands);
6025 	  else
6026 	    output_asm_insn (".IMPORT $$divI_%0,MILLICODE", operands);
6027 	}
6028       if (unsignedp)
6029 	{
6030 	  sprintf (buf, "$$divU_" HOST_WIDE_INT_PRINT_DEC,
6031 		   INTVAL (operands[0]));
6032 	  return pa_output_millicode_call (insn,
6033 					   gen_rtx_SYMBOL_REF (SImode, buf));
6034 	}
6035       else
6036 	{
6037 	  sprintf (buf, "$$divI_" HOST_WIDE_INT_PRINT_DEC,
6038 		   INTVAL (operands[0]));
6039 	  return pa_output_millicode_call (insn,
6040 					   gen_rtx_SYMBOL_REF (SImode, buf));
6041 	}
6042     }
6043   /* Divisor isn't a special constant.  */
6044   else
6045     {
6046       if (unsignedp)
6047 	{
6048 	  import_milli (divU);
6049 	  return pa_output_millicode_call (insn,
6050 					gen_rtx_SYMBOL_REF (SImode, "$$divU"));
6051 	}
6052       else
6053 	{
6054 	  import_milli (divI);
6055 	  return pa_output_millicode_call (insn,
6056 					gen_rtx_SYMBOL_REF (SImode, "$$divI"));
6057 	}
6058     }
6059 }
6060 
6061 /* Output a $$rem millicode to do mod.  */
6062 
6063 const char *
pa_output_mod_insn(int unsignedp,rtx_insn * insn)6064 pa_output_mod_insn (int unsignedp, rtx_insn *insn)
6065 {
6066   if (unsignedp)
6067     {
6068       import_milli (remU);
6069       return pa_output_millicode_call (insn,
6070 				       gen_rtx_SYMBOL_REF (SImode, "$$remU"));
6071     }
6072   else
6073     {
6074       import_milli (remI);
6075       return pa_output_millicode_call (insn,
6076 				       gen_rtx_SYMBOL_REF (SImode, "$$remI"));
6077     }
6078 }
6079 
6080 void
pa_output_arg_descriptor(rtx_insn * call_insn)6081 pa_output_arg_descriptor (rtx_insn *call_insn)
6082 {
6083   const char *arg_regs[4];
6084   machine_mode arg_mode;
6085   rtx link;
6086   int i, output_flag = 0;
6087   int regno;
6088 
6089   /* We neither need nor want argument location descriptors for the
6090      64bit runtime environment or the ELF32 environment.  */
6091   if (TARGET_64BIT || TARGET_ELF32)
6092     return;
6093 
6094   for (i = 0; i < 4; i++)
6095     arg_regs[i] = 0;
6096 
6097   /* Specify explicitly that no argument relocations should take place
6098      if using the portable runtime calling conventions.  */
6099   if (TARGET_PORTABLE_RUNTIME)
6100     {
6101       fputs ("\t.CALL ARGW0=NO,ARGW1=NO,ARGW2=NO,ARGW3=NO,RETVAL=NO\n",
6102 	     asm_out_file);
6103       return;
6104     }
6105 
6106   gcc_assert (CALL_P (call_insn));
6107   for (link = CALL_INSN_FUNCTION_USAGE (call_insn);
6108        link; link = XEXP (link, 1))
6109     {
6110       rtx use = XEXP (link, 0);
6111 
6112       if (! (GET_CODE (use) == USE
6113 	     && GET_CODE (XEXP (use, 0)) == REG
6114 	     && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
6115 	continue;
6116 
6117       arg_mode = GET_MODE (XEXP (use, 0));
6118       regno = REGNO (XEXP (use, 0));
6119       if (regno >= 23 && regno <= 26)
6120 	{
6121 	  arg_regs[26 - regno] = "GR";
6122 	  if (arg_mode == DImode)
6123 	    arg_regs[25 - regno] = "GR";
6124 	}
6125       else if (regno >= 32 && regno <= 39)
6126 	{
6127 	  if (arg_mode == SFmode)
6128 	    arg_regs[(regno - 32) / 2] = "FR";
6129 	  else
6130 	    {
6131 #ifndef HP_FP_ARG_DESCRIPTOR_REVERSED
6132 	      arg_regs[(regno - 34) / 2] = "FR";
6133 	      arg_regs[(regno - 34) / 2 + 1] = "FU";
6134 #else
6135 	      arg_regs[(regno - 34) / 2] = "FU";
6136 	      arg_regs[(regno - 34) / 2 + 1] = "FR";
6137 #endif
6138 	    }
6139 	}
6140     }
6141   fputs ("\t.CALL ", asm_out_file);
6142   for (i = 0; i < 4; i++)
6143     {
6144       if (arg_regs[i])
6145 	{
6146 	  if (output_flag++)
6147 	    fputc (',', asm_out_file);
6148 	  fprintf (asm_out_file, "ARGW%d=%s", i, arg_regs[i]);
6149 	}
6150     }
6151   fputc ('\n', asm_out_file);
6152 }
6153 
6154 /* Inform reload about cases where moving X with a mode MODE to or from
6155    a register in RCLASS requires an extra scratch or immediate register.
6156    Return the class needed for the immediate register.  */
6157 
6158 static reg_class_t
pa_secondary_reload(bool in_p,rtx x,reg_class_t rclass_i,machine_mode mode,secondary_reload_info * sri)6159 pa_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
6160 		     machine_mode mode, secondary_reload_info *sri)
6161 {
6162   int regno;
6163   enum reg_class rclass = (enum reg_class) rclass_i;
6164 
6165   /* Handle the easy stuff first.  */
6166   if (rclass == R1_REGS)
6167     return NO_REGS;
6168 
6169   if (REG_P (x))
6170     {
6171       regno = REGNO (x);
6172       if (rclass == BASE_REG_CLASS && regno < FIRST_PSEUDO_REGISTER)
6173 	return NO_REGS;
6174     }
6175   else
6176     regno = -1;
6177 
6178   /* If we have something like (mem (mem (...)), we can safely assume the
6179      inner MEM will end up in a general register after reloading, so there's
6180      no need for a secondary reload.  */
6181   if (GET_CODE (x) == MEM && GET_CODE (XEXP (x, 0)) == MEM)
6182     return NO_REGS;
6183 
6184   /* Trying to load a constant into a FP register during PIC code
6185      generation requires %r1 as a scratch register.  For float modes,
6186      the only legitimate constant is CONST0_RTX.  However, there are
6187      a few patterns that accept constant double operands.  */
6188   if (flag_pic
6189       && FP_REG_CLASS_P (rclass)
6190       && (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE))
6191     {
6192       switch (mode)
6193 	{
6194 	case E_SImode:
6195 	  sri->icode = CODE_FOR_reload_insi_r1;
6196 	  break;
6197 
6198 	case E_DImode:
6199 	  sri->icode = CODE_FOR_reload_indi_r1;
6200 	  break;
6201 
6202 	case E_SFmode:
6203 	  sri->icode = CODE_FOR_reload_insf_r1;
6204 	  break;
6205 
6206 	case E_DFmode:
6207 	  sri->icode = CODE_FOR_reload_indf_r1;
6208 	  break;
6209 
6210 	default:
6211 	  gcc_unreachable ();
6212 	}
6213       return NO_REGS;
6214     }
6215 
6216   /* Secondary reloads of symbolic expressions require %r1 as a scratch
6217      register when we're generating PIC code or when the operand isn't
6218      readonly.  */
6219   if (pa_symbolic_expression_p (x))
6220     {
6221       if (GET_CODE (x) == HIGH)
6222 	x = XEXP (x, 0);
6223 
6224       if (flag_pic || !read_only_operand (x, VOIDmode))
6225 	{
6226 	  switch (mode)
6227 	    {
6228 	    case E_SImode:
6229 	      sri->icode = CODE_FOR_reload_insi_r1;
6230 	      break;
6231 
6232 	    case E_DImode:
6233 	      sri->icode = CODE_FOR_reload_indi_r1;
6234 	      break;
6235 
6236 	    default:
6237 	      gcc_unreachable ();
6238 	    }
6239 	  return NO_REGS;
6240 	}
6241     }
6242 
6243   /* Profiling showed the PA port spends about 1.3% of its compilation
6244      time in true_regnum from calls inside pa_secondary_reload_class.  */
6245   if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
6246     regno = true_regnum (x);
6247 
6248   /* Handle reloads for floating point loads and stores.  */
6249   if ((regno >= FIRST_PSEUDO_REGISTER || regno == -1)
6250       && FP_REG_CLASS_P (rclass))
6251     {
6252       if (MEM_P (x))
6253 	{
6254 	  x = XEXP (x, 0);
6255 
6256 	  /* We don't need a secondary reload for indexed memory addresses.
6257 
6258 	     When INT14_OK_STRICT is true, it might appear that we could
6259 	     directly allow register indirect memory addresses.  However,
6260 	     this doesn't work because we don't support SUBREGs in
6261 	     floating-point register copies and reload doesn't tell us
6262 	     when it's going to use a SUBREG.  */
6263 	  if (IS_INDEX_ADDR_P (x))
6264 	    return NO_REGS;
6265 	}
6266 
6267       /* Request a secondary reload with a general scratch register
6268 	 for everything else.  ??? Could symbolic operands be handled
6269 	 directly when generating non-pic PA 2.0 code?  */
6270       sri->icode = (in_p
6271 		    ? direct_optab_handler (reload_in_optab, mode)
6272 		    : direct_optab_handler (reload_out_optab, mode));
6273       return NO_REGS;
6274     }
6275 
6276   /* A SAR<->FP register copy requires an intermediate general register
6277      and secondary memory.  We need a secondary reload with a general
6278      scratch register for spills.  */
6279   if (rclass == SHIFT_REGS)
6280     {
6281       /* Handle spill.  */
6282       if (regno >= FIRST_PSEUDO_REGISTER || regno < 0)
6283 	{
6284 	  sri->icode = (in_p
6285 			? direct_optab_handler (reload_in_optab, mode)
6286 			: direct_optab_handler (reload_out_optab, mode));
6287 	  return NO_REGS;
6288 	}
6289 
6290       /* Handle FP copy.  */
6291       if (FP_REG_CLASS_P (REGNO_REG_CLASS (regno)))
6292 	return GENERAL_REGS;
6293     }
6294 
6295   if (regno >= 0 && regno < FIRST_PSEUDO_REGISTER
6296       && REGNO_REG_CLASS (regno) == SHIFT_REGS
6297       && FP_REG_CLASS_P (rclass))
6298     return GENERAL_REGS;
6299 
6300   return NO_REGS;
6301 }
6302 
6303 /* Implement TARGET_SECONDARY_MEMORY_NEEDED.  */
6304 
6305 static bool
pa_secondary_memory_needed(machine_mode mode ATTRIBUTE_UNUSED,reg_class_t class1 ATTRIBUTE_UNUSED,reg_class_t class2 ATTRIBUTE_UNUSED)6306 pa_secondary_memory_needed (machine_mode mode ATTRIBUTE_UNUSED,
6307 			    reg_class_t class1 ATTRIBUTE_UNUSED,
6308 			    reg_class_t class2 ATTRIBUTE_UNUSED)
6309 {
6310 #ifdef PA_SECONDARY_MEMORY_NEEDED
6311   return PA_SECONDARY_MEMORY_NEEDED (mode, class1, class2);
6312 #else
6313   return false;
6314 #endif
6315 }
6316 
6317 /* Implement TARGET_EXTRA_LIVE_ON_ENTRY.  The argument pointer
6318    is only marked as live on entry by df-scan when it is a fixed
6319    register.  It isn't a fixed register in the 64-bit runtime,
6320    so we need to mark it here.  */
6321 
6322 static void
pa_extra_live_on_entry(bitmap regs)6323 pa_extra_live_on_entry (bitmap regs)
6324 {
6325   if (TARGET_64BIT)
6326     bitmap_set_bit (regs, ARG_POINTER_REGNUM);
6327 }
6328 
6329 /* Implement EH_RETURN_HANDLER_RTX.  The MEM needs to be volatile
6330    to prevent it from being deleted.  */
6331 
6332 rtx
pa_eh_return_handler_rtx(void)6333 pa_eh_return_handler_rtx (void)
6334 {
6335   rtx tmp;
6336 
6337   tmp = gen_rtx_PLUS (word_mode, hard_frame_pointer_rtx,
6338 		      TARGET_64BIT ? GEN_INT (-16) : GEN_INT (-20));
6339   tmp = gen_rtx_MEM (word_mode, tmp);
6340   tmp->volatil = 1;
6341   return tmp;
6342 }
6343 
6344 /* In the 32-bit runtime, arguments larger than eight bytes are passed
6345    by invisible reference.  As a GCC extension, we also pass anything
6346    with a zero or variable size by reference.
6347 
6348    The 64-bit runtime does not describe passing any types by invisible
6349    reference.  The internals of GCC can't currently handle passing
6350    empty structures, and zero or variable length arrays when they are
6351    not passed entirely on the stack or by reference.  Thus, as a GCC
6352    extension, we pass these types by reference.  The HP compiler doesn't
6353    support these types, so hopefully there shouldn't be any compatibility
6354    issues.  This may have to be revisited when HP releases a C99 compiler
6355    or updates the ABI.  */
6356 
6357 static bool
pa_pass_by_reference(cumulative_args_t,const function_arg_info & arg)6358 pa_pass_by_reference (cumulative_args_t, const function_arg_info &arg)
6359 {
6360   HOST_WIDE_INT size = arg.type_size_in_bytes ();
6361   if (TARGET_64BIT)
6362     return size <= 0;
6363   else
6364     return size <= 0 || size > 8;
6365 }
6366 
6367 /* Implement TARGET_FUNCTION_ARG_PADDING.  */
6368 
6369 static pad_direction
pa_function_arg_padding(machine_mode mode,const_tree type)6370 pa_function_arg_padding (machine_mode mode, const_tree type)
6371 {
6372   if (mode == BLKmode
6373       || (TARGET_64BIT
6374 	  && type
6375 	  && (AGGREGATE_TYPE_P (type)
6376 	      || TREE_CODE (type) == COMPLEX_TYPE
6377 	      || TREE_CODE (type) == VECTOR_TYPE)))
6378     {
6379       /* Return PAD_NONE if justification is not required.  */
6380       if (type
6381 	  && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
6382 	  && (int_size_in_bytes (type) * BITS_PER_UNIT) % PARM_BOUNDARY == 0)
6383 	return PAD_NONE;
6384 
6385       /* The directions set here are ignored when a BLKmode argument larger
6386 	 than a word is placed in a register.  Different code is used for
6387 	 the stack and registers.  This makes it difficult to have a
6388 	 consistent data representation for both the stack and registers.
6389 	 For both runtimes, the justification and padding for arguments on
6390 	 the stack and in registers should be identical.  */
6391       if (TARGET_64BIT)
6392 	/* The 64-bit runtime specifies left justification for aggregates.  */
6393 	return PAD_UPWARD;
6394       else
6395 	/* The 32-bit runtime architecture specifies right justification.
6396 	   When the argument is passed on the stack, the argument is padded
6397 	   with garbage on the left.  The HP compiler pads with zeros.  */
6398 	return PAD_DOWNWARD;
6399     }
6400 
6401   if (GET_MODE_BITSIZE (mode) < PARM_BOUNDARY)
6402     return PAD_DOWNWARD;
6403   else
6404     return PAD_NONE;
6405 }
6406 
6407 
6408 /* Do what is necessary for `va_start'.  We look at the current function
6409    to determine if stdargs or varargs is used and fill in an initial
6410    va_list.  A pointer to this constructor is returned.  */
6411 
6412 static rtx
hppa_builtin_saveregs(void)6413 hppa_builtin_saveregs (void)
6414 {
6415   rtx offset, dest;
6416   tree fntype = TREE_TYPE (current_function_decl);
6417   int argadj = ((!stdarg_p (fntype))
6418 		? UNITS_PER_WORD : 0);
6419 
6420   if (argadj)
6421     offset = plus_constant (Pmode, crtl->args.arg_offset_rtx, argadj);
6422   else
6423     offset = crtl->args.arg_offset_rtx;
6424 
6425   if (TARGET_64BIT)
6426     {
6427       int i, off;
6428 
6429       /* Adjust for varargs/stdarg differences.  */
6430       if (argadj)
6431 	offset = plus_constant (Pmode, crtl->args.arg_offset_rtx, -argadj);
6432       else
6433 	offset = crtl->args.arg_offset_rtx;
6434 
6435       /* We need to save %r26 .. %r19 inclusive starting at offset -64
6436 	 from the incoming arg pointer and growing to larger addresses.  */
6437       for (i = 26, off = -64; i >= 19; i--, off += 8)
6438 	emit_move_insn (gen_rtx_MEM (word_mode,
6439 				     plus_constant (Pmode,
6440 						    arg_pointer_rtx, off)),
6441 			gen_rtx_REG (word_mode, i));
6442 
6443       /* The incoming args pointer points just beyond the flushback area;
6444 	 normally this is not a serious concern.  However, when we are doing
6445 	 varargs/stdargs we want to make the arg pointer point to the start
6446 	 of the incoming argument area.  */
6447       emit_move_insn (virtual_incoming_args_rtx,
6448 		      plus_constant (Pmode, arg_pointer_rtx, -64));
6449 
6450       /* Now return a pointer to the first anonymous argument.  */
6451       return copy_to_reg (expand_binop (Pmode, add_optab,
6452 					virtual_incoming_args_rtx,
6453 					offset, 0, 0, OPTAB_LIB_WIDEN));
6454     }
6455 
6456   /* Store general registers on the stack.  */
6457   dest = gen_rtx_MEM (BLKmode,
6458 		      plus_constant (Pmode, crtl->args.internal_arg_pointer,
6459 				     -16));
6460   set_mem_alias_set (dest, get_varargs_alias_set ());
6461   set_mem_align (dest, BITS_PER_WORD);
6462   move_block_from_reg (23, dest, 4);
6463 
6464   /* move_block_from_reg will emit code to store the argument registers
6465      individually as scalar stores.
6466 
6467      However, other insns may later load from the same addresses for
6468      a structure load (passing a struct to a varargs routine).
6469 
6470      The alias code assumes that such aliasing can never happen, so we
6471      have to keep memory referencing insns from moving up beyond the
6472      last argument register store.  So we emit a blockage insn here.  */
6473   emit_insn (gen_blockage ());
6474 
6475   return copy_to_reg (expand_binop (Pmode, add_optab,
6476 				    crtl->args.internal_arg_pointer,
6477 				    offset, 0, 0, OPTAB_LIB_WIDEN));
6478 }
6479 
6480 static void
hppa_va_start(tree valist,rtx nextarg)6481 hppa_va_start (tree valist, rtx nextarg)
6482 {
6483   nextarg = expand_builtin_saveregs ();
6484   std_expand_builtin_va_start (valist, nextarg);
6485 }
6486 
6487 static tree
hppa_gimplify_va_arg_expr(tree valist,tree type,gimple_seq * pre_p,gimple_seq * post_p)6488 hppa_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
6489 			   gimple_seq *post_p)
6490 {
6491   if (TARGET_64BIT)
6492     {
6493       /* Args grow upward.  We can use the generic routines.  */
6494       return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
6495     }
6496   else /* !TARGET_64BIT */
6497     {
6498       tree ptr = build_pointer_type (type);
6499       tree valist_type;
6500       tree t, u;
6501       unsigned int size, ofs;
6502       bool indirect;
6503 
6504       indirect = pass_va_arg_by_reference (type);
6505       if (indirect)
6506 	{
6507 	  type = ptr;
6508 	  ptr = build_pointer_type (type);
6509 	}
6510       size = int_size_in_bytes (type);
6511       valist_type = TREE_TYPE (valist);
6512 
6513       /* Args grow down.  Not handled by generic routines.  */
6514 
6515       u = fold_convert (sizetype, size_in_bytes (type));
6516       u = fold_build1 (NEGATE_EXPR, sizetype, u);
6517       t = fold_build_pointer_plus (valist, u);
6518 
6519       /* Align to 4 or 8 byte boundary depending on argument size.  */
6520 
6521       u = build_int_cst (TREE_TYPE (t), (HOST_WIDE_INT)(size > 4 ? -8 : -4));
6522       t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t, u);
6523       t = fold_convert (valist_type, t);
6524 
6525       t = build2 (MODIFY_EXPR, valist_type, valist, t);
6526 
6527       ofs = (8 - size) % 4;
6528       if (ofs != 0)
6529 	t = fold_build_pointer_plus_hwi (t, ofs);
6530 
6531       t = fold_convert (ptr, t);
6532       t = build_va_arg_indirect_ref (t);
6533 
6534       if (indirect)
6535 	t = build_va_arg_indirect_ref (t);
6536 
6537       return t;
6538     }
6539 }
6540 
6541 /* True if MODE is valid for the target.  By "valid", we mean able to
6542    be manipulated in non-trivial ways.  In particular, this means all
6543    the arithmetic is supported.
6544 
6545    Currently, TImode is not valid as the HP 64-bit runtime documentation
6546    doesn't document the alignment and calling conventions for this type.
6547    Thus, we return false when PRECISION is 2 * BITS_PER_WORD and
6548    2 * BITS_PER_WORD isn't equal LONG_LONG_TYPE_SIZE.  */
6549 
6550 static bool
pa_scalar_mode_supported_p(scalar_mode mode)6551 pa_scalar_mode_supported_p (scalar_mode mode)
6552 {
6553   int precision = GET_MODE_PRECISION (mode);
6554 
6555   switch (GET_MODE_CLASS (mode))
6556     {
6557     case MODE_PARTIAL_INT:
6558     case MODE_INT:
6559       if (precision == CHAR_TYPE_SIZE)
6560 	return true;
6561       if (precision == SHORT_TYPE_SIZE)
6562 	return true;
6563       if (precision == INT_TYPE_SIZE)
6564 	return true;
6565       if (precision == LONG_TYPE_SIZE)
6566 	return true;
6567       if (precision == LONG_LONG_TYPE_SIZE)
6568 	return true;
6569       return false;
6570 
6571     case MODE_FLOAT:
6572       if (precision == FLOAT_TYPE_SIZE)
6573 	return true;
6574       if (precision == DOUBLE_TYPE_SIZE)
6575 	return true;
6576       if (precision == LONG_DOUBLE_TYPE_SIZE)
6577 	return true;
6578       return false;
6579 
6580     case MODE_DECIMAL_FLOAT:
6581       return false;
6582 
6583     default:
6584       gcc_unreachable ();
6585     }
6586 }
6587 
6588 /* Return TRUE if INSN, a jump insn, has an unfilled delay slot and
6589    it branches into the delay slot.  Otherwise, return FALSE.  */
6590 
6591 static bool
branch_to_delay_slot_p(rtx_insn * insn)6592 branch_to_delay_slot_p (rtx_insn *insn)
6593 {
6594   rtx_insn *jump_insn;
6595 
6596   if (dbr_sequence_length ())
6597     return FALSE;
6598 
6599   jump_insn = next_active_insn (JUMP_LABEL_AS_INSN (insn));
6600   while (insn)
6601     {
6602       insn = next_active_insn (insn);
6603       if (jump_insn == insn)
6604 	return TRUE;
6605 
6606       /* We can't rely on the length of asms.  So, we return FALSE when
6607 	 the branch is followed by an asm.  */
6608       if (!insn
6609 	  || GET_CODE (PATTERN (insn)) == ASM_INPUT
6610 	  || asm_noperands (PATTERN (insn)) >= 0
6611 	  || get_attr_length (insn) > 0)
6612 	break;
6613     }
6614 
6615   return FALSE;
6616 }
6617 
6618 /* Return TRUE if INSN, a forward jump insn, needs a nop in its delay slot.
6619 
6620    This occurs when INSN has an unfilled delay slot and is followed
6621    by an asm.  Disaster can occur if the asm is empty and the jump
6622    branches into the delay slot.  So, we add a nop in the delay slot
6623    when this occurs.  */
6624 
6625 static bool
branch_needs_nop_p(rtx_insn * insn)6626 branch_needs_nop_p (rtx_insn *insn)
6627 {
6628   rtx_insn *jump_insn;
6629 
6630   if (dbr_sequence_length ())
6631     return FALSE;
6632 
6633   jump_insn = next_active_insn (JUMP_LABEL_AS_INSN (insn));
6634   while (insn)
6635     {
6636       insn = next_active_insn (insn);
6637       if (!insn || jump_insn == insn)
6638 	return TRUE;
6639 
6640       if (!(GET_CODE (PATTERN (insn)) == ASM_INPUT
6641 	   || asm_noperands (PATTERN (insn)) >= 0)
6642 	  && get_attr_length (insn) > 0)
6643 	break;
6644     }
6645 
6646   return FALSE;
6647 }
6648 
6649 /* Return TRUE if INSN, a forward jump insn, can use nullification
6650    to skip the following instruction.  This avoids an extra cycle due
6651    to a mis-predicted branch when we fall through.  */
6652 
6653 static bool
use_skip_p(rtx_insn * insn)6654 use_skip_p (rtx_insn *insn)
6655 {
6656   rtx_insn *jump_insn = next_active_insn (JUMP_LABEL_AS_INSN (insn));
6657 
6658   while (insn)
6659     {
6660       insn = next_active_insn (insn);
6661 
6662       /* We can't rely on the length of asms, so we can't skip asms.  */
6663       if (!insn
6664 	  || GET_CODE (PATTERN (insn)) == ASM_INPUT
6665 	  || asm_noperands (PATTERN (insn)) >= 0)
6666 	break;
6667       if (get_attr_length (insn) == 4
6668 	  && jump_insn == next_active_insn (insn))
6669 	return TRUE;
6670       if (get_attr_length (insn) > 0)
6671 	break;
6672     }
6673 
6674   return FALSE;
6675 }
6676 
6677 /* This routine handles all the normal conditional branch sequences we
6678    might need to generate.  It handles compare immediate vs compare
6679    register, nullification of delay slots, varying length branches,
6680    negated branches, and all combinations of the above.  It returns the
6681    output appropriate to emit the branch corresponding to all given
6682    parameters.  */
6683 
6684 const char *
pa_output_cbranch(rtx * operands,int negated,rtx_insn * insn)6685 pa_output_cbranch (rtx *operands, int negated, rtx_insn *insn)
6686 {
6687   static char buf[100];
6688   bool useskip;
6689   int nullify = INSN_ANNULLED_BRANCH_P (insn);
6690   int length = get_attr_length (insn);
6691   int xdelay;
6692 
6693   /* A conditional branch to the following instruction (e.g. the delay slot)
6694      is asking for a disaster.  This can happen when not optimizing and
6695      when jump optimization fails.
6696 
6697      While it is usually safe to emit nothing, this can fail if the
6698      preceding instruction is a nullified branch with an empty delay
6699      slot and the same branch target as this branch.  We could check
6700      for this but jump optimization should eliminate nop jumps.  It
6701      is always safe to emit a nop.  */
6702   if (branch_to_delay_slot_p (insn))
6703     return "nop";
6704 
6705   /* The doubleword form of the cmpib instruction doesn't have the LEU
6706      and GTU conditions while the cmpb instruction does.  Since we accept
6707      zero for cmpb, we must ensure that we use cmpb for the comparison.  */
6708   if (GET_MODE (operands[1]) == DImode && operands[2] == const0_rtx)
6709     operands[2] = gen_rtx_REG (DImode, 0);
6710   if (GET_MODE (operands[2]) == DImode && operands[1] == const0_rtx)
6711     operands[1] = gen_rtx_REG (DImode, 0);
6712 
6713   /* If this is a long branch with its delay slot unfilled, set `nullify'
6714      as it can nullify the delay slot and save a nop.  */
6715   if (length == 8 && dbr_sequence_length () == 0)
6716     nullify = 1;
6717 
6718   /* If this is a short forward conditional branch which did not get
6719      its delay slot filled, the delay slot can still be nullified.  */
6720   if (! nullify && length == 4 && dbr_sequence_length () == 0)
6721     nullify = forward_branch_p (insn);
6722 
6723   /* A forward branch over a single nullified insn can be done with a
6724      comclr instruction.  This avoids a single cycle penalty due to
6725      mis-predicted branch if we fall through (branch not taken).  */
6726   useskip = (length == 4 && nullify) ? use_skip_p (insn) : FALSE;
6727 
6728   switch (length)
6729     {
6730       /* All short conditional branches except backwards with an unfilled
6731 	 delay slot.  */
6732       case 4:
6733 	if (useskip)
6734 	  strcpy (buf, "{com%I2clr,|cmp%I2clr,}");
6735 	else
6736 	  strcpy (buf, "{com%I2b,|cmp%I2b,}");
6737 	if (GET_MODE (operands[1]) == DImode)
6738 	  strcat (buf, "*");
6739 	if (negated)
6740 	  strcat (buf, "%B3");
6741 	else
6742 	  strcat (buf, "%S3");
6743 	if (useskip)
6744 	  strcat (buf, " %2,%r1,%%r0");
6745 	else if (nullify)
6746 	  {
6747 	    if (branch_needs_nop_p (insn))
6748 	      strcat (buf, ",n %2,%r1,%0%#");
6749 	    else
6750 	      strcat (buf, ",n %2,%r1,%0");
6751 	  }
6752 	else
6753 	  strcat (buf, " %2,%r1,%0");
6754 	break;
6755 
6756      /* All long conditionals.  Note a short backward branch with an
6757 	unfilled delay slot is treated just like a long backward branch
6758 	with an unfilled delay slot.  */
6759       case 8:
6760 	/* Handle weird backwards branch with a filled delay slot
6761 	   which is nullified.  */
6762 	if (dbr_sequence_length () != 0
6763 	    && ! forward_branch_p (insn)
6764 	    && nullify)
6765 	  {
6766 	    strcpy (buf, "{com%I2b,|cmp%I2b,}");
6767 	    if (GET_MODE (operands[1]) == DImode)
6768 	      strcat (buf, "*");
6769 	    if (negated)
6770 	      strcat (buf, "%S3");
6771 	    else
6772 	      strcat (buf, "%B3");
6773 	    strcat (buf, ",n %2,%r1,.+12\n\tb %0");
6774 	  }
6775 	/* Handle short backwards branch with an unfilled delay slot.
6776 	   Using a comb;nop rather than comiclr;bl saves 1 cycle for both
6777 	   taken and untaken branches.  */
6778 	else if (dbr_sequence_length () == 0
6779 		 && ! forward_branch_p (insn)
6780 		 && INSN_ADDRESSES_SET_P ()
6781 		 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6782 				    - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6783 	  {
6784 	    strcpy (buf, "{com%I2b,|cmp%I2b,}");
6785 	    if (GET_MODE (operands[1]) == DImode)
6786 	      strcat (buf, "*");
6787 	    if (negated)
6788 	      strcat (buf, "%B3 %2,%r1,%0%#");
6789 	    else
6790 	      strcat (buf, "%S3 %2,%r1,%0%#");
6791 	  }
6792 	else
6793 	  {
6794 	    strcpy (buf, "{com%I2clr,|cmp%I2clr,}");
6795 	    if (GET_MODE (operands[1]) == DImode)
6796 	      strcat (buf, "*");
6797 	    if (negated)
6798 	      strcat (buf, "%S3");
6799 	    else
6800 	      strcat (buf, "%B3");
6801 	    if (nullify)
6802 	      strcat (buf, " %2,%r1,%%r0\n\tb,n %0");
6803 	    else
6804 	      strcat (buf, " %2,%r1,%%r0\n\tb %0");
6805 	  }
6806 	break;
6807 
6808       default:
6809 	/* The reversed conditional branch must branch over one additional
6810 	   instruction if the delay slot is filled and needs to be extracted
6811 	   by pa_output_lbranch.  If the delay slot is empty or this is a
6812 	   nullified forward branch, the instruction after the reversed
6813 	   condition branch must be nullified.  */
6814 	if (dbr_sequence_length () == 0
6815 	    || (nullify && forward_branch_p (insn)))
6816 	  {
6817 	    nullify = 1;
6818 	    xdelay = 0;
6819 	    operands[4] = GEN_INT (length);
6820 	  }
6821 	else
6822 	  {
6823 	    xdelay = 1;
6824 	    operands[4] = GEN_INT (length + 4);
6825 	  }
6826 
6827 	/* Create a reversed conditional branch which branches around
6828 	   the following insns.  */
6829 	if (GET_MODE (operands[1]) != DImode)
6830 	  {
6831 	    if (nullify)
6832 	      {
6833 		if (negated)
6834 		  strcpy (buf,
6835 		    "{com%I2b,%S3,n %2,%r1,.+%4|cmp%I2b,%S3,n %2,%r1,.+%4}");
6836 		else
6837 		  strcpy (buf,
6838 		    "{com%I2b,%B3,n %2,%r1,.+%4|cmp%I2b,%B3,n %2,%r1,.+%4}");
6839 	      }
6840 	    else
6841 	      {
6842 		if (negated)
6843 		  strcpy (buf,
6844 		    "{com%I2b,%S3 %2,%r1,.+%4|cmp%I2b,%S3 %2,%r1,.+%4}");
6845 		else
6846 		  strcpy (buf,
6847 		    "{com%I2b,%B3 %2,%r1,.+%4|cmp%I2b,%B3 %2,%r1,.+%4}");
6848 	      }
6849 	  }
6850 	else
6851 	  {
6852 	    if (nullify)
6853 	      {
6854 		if (negated)
6855 		  strcpy (buf,
6856 		    "{com%I2b,*%S3,n %2,%r1,.+%4|cmp%I2b,*%S3,n %2,%r1,.+%4}");
6857 		else
6858 		  strcpy (buf,
6859 		    "{com%I2b,*%B3,n %2,%r1,.+%4|cmp%I2b,*%B3,n %2,%r1,.+%4}");
6860 	      }
6861 	    else
6862 	      {
6863 		if (negated)
6864 		  strcpy (buf,
6865 		    "{com%I2b,*%S3 %2,%r1,.+%4|cmp%I2b,*%S3 %2,%r1,.+%4}");
6866 		else
6867 		  strcpy (buf,
6868 		    "{com%I2b,*%B3 %2,%r1,.+%4|cmp%I2b,*%B3 %2,%r1,.+%4}");
6869 	      }
6870 	  }
6871 
6872 	output_asm_insn (buf, operands);
6873 	return pa_output_lbranch (operands[0], insn, xdelay);
6874     }
6875   return buf;
6876 }
6877 
6878 /* Output a PIC pc-relative instruction sequence to load the address of
6879    OPERANDS[0] to register OPERANDS[2].  OPERANDS[0] is a symbol ref
6880    or a code label.  OPERANDS[1] specifies the register to use to load
6881    the program counter.  OPERANDS[3] may be used for label generation
6882    The sequence is always three instructions in length.  The program
6883    counter recorded for PA 1.X is eight bytes more than that for PA 2.0.
6884    Register %r1 is clobbered.  */
6885 
6886 static void
pa_output_pic_pcrel_sequence(rtx * operands)6887 pa_output_pic_pcrel_sequence (rtx *operands)
6888 {
6889   gcc_assert (SYMBOL_REF_P (operands[0]) || LABEL_P (operands[0]));
6890   if (TARGET_PA_20)
6891     {
6892       /* We can use mfia to determine the current program counter.  */
6893       if (TARGET_SOM || !TARGET_GAS)
6894 	{
6895 	  operands[3] = gen_label_rtx ();
6896 	  targetm.asm_out.internal_label (asm_out_file, "L",
6897 					  CODE_LABEL_NUMBER (operands[3]));
6898 	  output_asm_insn ("mfia %1", operands);
6899 	  output_asm_insn ("addil L'%0-%l3,%1", operands);
6900 	  output_asm_insn ("ldo R'%0-%l3(%%r1),%2", operands);
6901 	}
6902       else
6903 	{
6904 	  output_asm_insn ("mfia %1", operands);
6905 	  output_asm_insn ("addil L'%0-$PIC_pcrel$0+12,%1", operands);
6906 	  output_asm_insn ("ldo R'%0-$PIC_pcrel$0+16(%%r1),%2", operands);
6907 	}
6908     }
6909   else
6910     {
6911       /* We need to use a branch to determine the current program counter.  */
6912       output_asm_insn ("{bl|b,l} .+8,%1", operands);
6913       if (TARGET_SOM || !TARGET_GAS)
6914 	{
6915 	  operands[3] = gen_label_rtx ();
6916 	  output_asm_insn ("addil L'%0-%l3,%1", operands);
6917 	  targetm.asm_out.internal_label (asm_out_file, "L",
6918 					  CODE_LABEL_NUMBER (operands[3]));
6919 	  output_asm_insn ("ldo R'%0-%l3(%%r1),%2", operands);
6920 	}
6921       else
6922 	{
6923 	  output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%1", operands);
6924 	  output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%2", operands);
6925 	}
6926     }
6927 }
6928 
6929 /* This routine handles output of long unconditional branches that
6930    exceed the maximum range of a simple branch instruction.  Since
6931    we don't have a register available for the branch, we save register
6932    %r1 in the frame marker, load the branch destination DEST into %r1,
6933    execute the branch, and restore %r1 in the delay slot of the branch.
6934 
6935    Since long branches may have an insn in the delay slot and the
6936    delay slot is used to restore %r1, we in general need to extract
6937    this insn and execute it before the branch.  However, to facilitate
6938    use of this function by conditional branches, we also provide an
6939    option to not extract the delay insn so that it will be emitted
6940    after the long branch.  So, if there is an insn in the delay slot,
6941    it is extracted if XDELAY is nonzero.
6942 
6943    The lengths of the various long-branch sequences are 20, 16 and 24
6944    bytes for the portable runtime, non-PIC and PIC cases, respectively.  */
6945 
6946 const char *
pa_output_lbranch(rtx dest,rtx_insn * insn,int xdelay)6947 pa_output_lbranch (rtx dest, rtx_insn *insn, int xdelay)
6948 {
6949   rtx xoperands[4];
6950 
6951   xoperands[0] = dest;
6952 
6953   /* First, free up the delay slot.  */
6954   if (xdelay && dbr_sequence_length () != 0)
6955     {
6956       /* We can't handle a jump in the delay slot.  */
6957       gcc_assert (! JUMP_P (NEXT_INSN (insn)));
6958 
6959       final_scan_insn (NEXT_INSN (insn), asm_out_file,
6960 		       optimize, 0, NULL);
6961 
6962       /* Now delete the delay insn.  */
6963       SET_INSN_DELETED (NEXT_INSN (insn));
6964     }
6965 
6966   /* Output an insn to save %r1.  The runtime documentation doesn't
6967      specify whether the "Clean Up" slot in the callers frame can
6968      be clobbered by the callee.  It isn't copied by HP's builtin
6969      alloca, so this suggests that it can be clobbered if necessary.
6970      The "Static Link" location is copied by HP builtin alloca, so
6971      we avoid using it.  Using the cleanup slot might be a problem
6972      if we have to interoperate with languages that pass cleanup
6973      information.  However, it should be possible to handle these
6974      situations with GCC's asm feature.
6975 
6976      The "Current RP" slot is reserved for the called procedure, so
6977      we try to use it when we don't have a frame of our own.  It's
6978      rather unlikely that we won't have a frame when we need to emit
6979      a very long branch.
6980 
6981      Really the way to go long term is a register scavenger; goto
6982      the target of the jump and find a register which we can use
6983      as a scratch to hold the value in %r1.  Then, we wouldn't have
6984      to free up the delay slot or clobber a slot that may be needed
6985      for other purposes.  */
6986   if (TARGET_64BIT)
6987     {
6988       if (actual_fsize == 0 && !df_regs_ever_live_p (2))
6989 	/* Use the return pointer slot in the frame marker.  */
6990 	output_asm_insn ("std %%r1,-16(%%r30)", xoperands);
6991       else
6992 	/* Use the slot at -40 in the frame marker since HP builtin
6993 	   alloca doesn't copy it.  */
6994 	output_asm_insn ("std %%r1,-40(%%r30)", xoperands);
6995     }
6996   else
6997     {
6998       if (actual_fsize == 0 && !df_regs_ever_live_p (2))
6999 	/* Use the return pointer slot in the frame marker.  */
7000 	output_asm_insn ("stw %%r1,-20(%%r30)", xoperands);
7001       else
7002 	/* Use the "Clean Up" slot in the frame marker.  In GCC,
7003 	   the only other use of this location is for copying a
7004 	   floating point double argument from a floating-point
7005 	   register to two general registers.  The copy is done
7006 	   as an "atomic" operation when outputting a call, so it
7007 	   won't interfere with our using the location here.  */
7008 	output_asm_insn ("stw %%r1,-12(%%r30)", xoperands);
7009     }
7010 
7011   if (TARGET_PORTABLE_RUNTIME)
7012     {
7013       output_asm_insn ("ldil L'%0,%%r1", xoperands);
7014       output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands);
7015       output_asm_insn ("bv %%r0(%%r1)", xoperands);
7016     }
7017   else if (flag_pic)
7018     {
7019       xoperands[1] = gen_rtx_REG (Pmode, 1);
7020       xoperands[2] = xoperands[1];
7021       pa_output_pic_pcrel_sequence (xoperands);
7022       output_asm_insn ("bv %%r0(%%r1)", xoperands);
7023     }
7024   else
7025     /* Now output a very long branch to the original target.  */
7026     output_asm_insn ("ldil L'%l0,%%r1\n\tbe R'%l0(%%sr4,%%r1)", xoperands);
7027 
7028   /* Now restore the value of %r1 in the delay slot.  */
7029   if (TARGET_64BIT)
7030     {
7031       if (actual_fsize == 0 && !df_regs_ever_live_p (2))
7032 	return "ldd -16(%%r30),%%r1";
7033       else
7034 	return "ldd -40(%%r30),%%r1";
7035     }
7036   else
7037     {
7038       if (actual_fsize == 0 && !df_regs_ever_live_p (2))
7039 	return "ldw -20(%%r30),%%r1";
7040       else
7041 	return "ldw -12(%%r30),%%r1";
7042     }
7043 }
7044 
7045 /* This routine handles all the branch-on-bit conditional branch sequences we
7046    might need to generate.  It handles nullification of delay slots,
7047    varying length branches, negated branches and all combinations of the
7048    above.  it returns the appropriate output template to emit the branch.  */
7049 
7050 const char *
pa_output_bb(rtx * operands ATTRIBUTE_UNUSED,int negated,rtx_insn * insn,int which)7051 pa_output_bb (rtx *operands ATTRIBUTE_UNUSED, int negated, rtx_insn *insn, int which)
7052 {
7053   static char buf[100];
7054   bool useskip;
7055   int nullify = INSN_ANNULLED_BRANCH_P (insn);
7056   int length = get_attr_length (insn);
7057   int xdelay;
7058 
7059   /* A conditional branch to the following instruction (e.g. the delay slot) is
7060      asking for a disaster.  I do not think this can happen as this pattern
7061      is only used when optimizing; jump optimization should eliminate the
7062      jump.  But be prepared just in case.  */
7063 
7064   if (branch_to_delay_slot_p (insn))
7065     return "nop";
7066 
7067   /* If this is a long branch with its delay slot unfilled, set `nullify'
7068      as it can nullify the delay slot and save a nop.  */
7069   if (length == 8 && dbr_sequence_length () == 0)
7070     nullify = 1;
7071 
7072   /* If this is a short forward conditional branch which did not get
7073      its delay slot filled, the delay slot can still be nullified.  */
7074   if (! nullify && length == 4 && dbr_sequence_length () == 0)
7075     nullify = forward_branch_p (insn);
7076 
7077   /* A forward branch over a single nullified insn can be done with a
7078      extrs instruction.  This avoids a single cycle penalty due to
7079      mis-predicted branch if we fall through (branch not taken).  */
7080   useskip = (length == 4 && nullify) ? use_skip_p (insn) : FALSE;
7081 
7082   switch (length)
7083     {
7084 
7085       /* All short conditional branches except backwards with an unfilled
7086 	 delay slot.  */
7087       case 4:
7088 	if (useskip)
7089 	  strcpy (buf, "{extrs,|extrw,s,}");
7090 	else
7091 	  strcpy (buf, "bb,");
7092 	if (useskip && GET_MODE (operands[0]) == DImode)
7093 	  strcpy (buf, "extrd,s,*");
7094 	else if (GET_MODE (operands[0]) == DImode)
7095 	  strcpy (buf, "bb,*");
7096 	if ((which == 0 && negated)
7097 	     || (which == 1 && ! negated))
7098 	  strcat (buf, ">=");
7099 	else
7100 	  strcat (buf, "<");
7101 	if (useskip)
7102 	  strcat (buf, " %0,%1,1,%%r0");
7103 	else if (nullify && negated)
7104 	  {
7105 	    if (branch_needs_nop_p (insn))
7106 	      strcat (buf, ",n %0,%1,%3%#");
7107 	    else
7108 	      strcat (buf, ",n %0,%1,%3");
7109 	  }
7110 	else if (nullify && ! negated)
7111 	  {
7112 	    if (branch_needs_nop_p (insn))
7113 	      strcat (buf, ",n %0,%1,%2%#");
7114 	    else
7115 	      strcat (buf, ",n %0,%1,%2");
7116 	  }
7117 	else if (! nullify && negated)
7118 	  strcat (buf, " %0,%1,%3");
7119 	else if (! nullify && ! negated)
7120 	  strcat (buf, " %0,%1,%2");
7121 	break;
7122 
7123      /* All long conditionals.  Note a short backward branch with an
7124 	unfilled delay slot is treated just like a long backward branch
7125 	with an unfilled delay slot.  */
7126       case 8:
7127 	/* Handle weird backwards branch with a filled delay slot
7128 	   which is nullified.  */
7129 	if (dbr_sequence_length () != 0
7130 	    && ! forward_branch_p (insn)
7131 	    && nullify)
7132 	  {
7133 	    strcpy (buf, "bb,");
7134 	    if (GET_MODE (operands[0]) == DImode)
7135 	      strcat (buf, "*");
7136 	    if ((which == 0 && negated)
7137 		|| (which == 1 && ! negated))
7138 	      strcat (buf, "<");
7139 	    else
7140 	      strcat (buf, ">=");
7141 	    if (negated)
7142 	      strcat (buf, ",n %0,%1,.+12\n\tb %3");
7143 	    else
7144 	      strcat (buf, ",n %0,%1,.+12\n\tb %2");
7145 	  }
7146 	/* Handle short backwards branch with an unfilled delay slot.
7147 	   Using a bb;nop rather than extrs;bl saves 1 cycle for both
7148 	   taken and untaken branches.  */
7149 	else if (dbr_sequence_length () == 0
7150 		 && ! forward_branch_p (insn)
7151 		 && INSN_ADDRESSES_SET_P ()
7152 		 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
7153 				    - INSN_ADDRESSES (INSN_UID (insn)) - 8))
7154 	  {
7155 	    strcpy (buf, "bb,");
7156 	    if (GET_MODE (operands[0]) == DImode)
7157 	      strcat (buf, "*");
7158 	    if ((which == 0 && negated)
7159 		|| (which == 1 && ! negated))
7160 	      strcat (buf, ">=");
7161 	    else
7162 	      strcat (buf, "<");
7163 	    if (negated)
7164 	      strcat (buf, " %0,%1,%3%#");
7165 	    else
7166 	      strcat (buf, " %0,%1,%2%#");
7167 	  }
7168 	else
7169 	  {
7170 	    if (GET_MODE (operands[0]) == DImode)
7171 	      strcpy (buf, "extrd,s,*");
7172 	    else
7173 	      strcpy (buf, "{extrs,|extrw,s,}");
7174 	    if ((which == 0 && negated)
7175 		|| (which == 1 && ! negated))
7176 	      strcat (buf, "<");
7177 	    else
7178 	      strcat (buf, ">=");
7179 	    if (nullify && negated)
7180 	      strcat (buf, " %0,%1,1,%%r0\n\tb,n %3");
7181 	    else if (nullify && ! negated)
7182 	      strcat (buf, " %0,%1,1,%%r0\n\tb,n %2");
7183 	    else if (negated)
7184 	      strcat (buf, " %0,%1,1,%%r0\n\tb %3");
7185 	    else
7186 	      strcat (buf, " %0,%1,1,%%r0\n\tb %2");
7187 	  }
7188 	break;
7189 
7190       default:
7191 	/* The reversed conditional branch must branch over one additional
7192 	   instruction if the delay slot is filled and needs to be extracted
7193 	   by pa_output_lbranch.  If the delay slot is empty or this is a
7194 	   nullified forward branch, the instruction after the reversed
7195 	   condition branch must be nullified.  */
7196 	if (dbr_sequence_length () == 0
7197 	    || (nullify && forward_branch_p (insn)))
7198 	  {
7199 	    nullify = 1;
7200 	    xdelay = 0;
7201 	    operands[4] = GEN_INT (length);
7202 	  }
7203 	else
7204 	  {
7205 	    xdelay = 1;
7206 	    operands[4] = GEN_INT (length + 4);
7207 	  }
7208 
7209 	if (GET_MODE (operands[0]) == DImode)
7210 	  strcpy (buf, "bb,*");
7211 	else
7212 	  strcpy (buf, "bb,");
7213 	if ((which == 0 && negated)
7214 	    || (which == 1 && !negated))
7215 	  strcat (buf, "<");
7216 	else
7217 	  strcat (buf, ">=");
7218 	if (nullify)
7219 	  strcat (buf, ",n %0,%1,.+%4");
7220 	else
7221 	  strcat (buf, " %0,%1,.+%4");
7222 	output_asm_insn (buf, operands);
7223 	return pa_output_lbranch (negated ? operands[3] : operands[2],
7224 				  insn, xdelay);
7225     }
7226   return buf;
7227 }
7228 
7229 /* This routine handles all the branch-on-variable-bit conditional branch
7230    sequences we might need to generate.  It handles nullification of delay
7231    slots, varying length branches, negated branches and all combinations
7232    of the above.  it returns the appropriate output template to emit the
7233    branch.  */
7234 
7235 const char *
pa_output_bvb(rtx * operands ATTRIBUTE_UNUSED,int negated,rtx_insn * insn,int which)7236 pa_output_bvb (rtx *operands ATTRIBUTE_UNUSED, int negated, rtx_insn *insn,
7237 	       int which)
7238 {
7239   static char buf[100];
7240   bool useskip;
7241   int nullify = INSN_ANNULLED_BRANCH_P (insn);
7242   int length = get_attr_length (insn);
7243   int xdelay;
7244 
7245   /* A conditional branch to the following instruction (e.g. the delay slot) is
7246      asking for a disaster.  I do not think this can happen as this pattern
7247      is only used when optimizing; jump optimization should eliminate the
7248      jump.  But be prepared just in case.  */
7249 
7250   if (branch_to_delay_slot_p (insn))
7251     return "nop";
7252 
7253   /* If this is a long branch with its delay slot unfilled, set `nullify'
7254      as it can nullify the delay slot and save a nop.  */
7255   if (length == 8 && dbr_sequence_length () == 0)
7256     nullify = 1;
7257 
7258   /* If this is a short forward conditional branch which did not get
7259      its delay slot filled, the delay slot can still be nullified.  */
7260   if (! nullify && length == 4 && dbr_sequence_length () == 0)
7261     nullify = forward_branch_p (insn);
7262 
7263   /* A forward branch over a single nullified insn can be done with a
7264      extrs instruction.  This avoids a single cycle penalty due to
7265      mis-predicted branch if we fall through (branch not taken).  */
7266   useskip = (length == 4 && nullify) ? use_skip_p (insn) : FALSE;
7267 
7268   switch (length)
7269     {
7270 
7271       /* All short conditional branches except backwards with an unfilled
7272 	 delay slot.  */
7273       case 4:
7274 	if (useskip)
7275 	  strcpy (buf, "{vextrs,|extrw,s,}");
7276 	else
7277 	  strcpy (buf, "{bvb,|bb,}");
7278 	if (useskip && GET_MODE (operands[0]) == DImode)
7279 	  strcpy (buf, "extrd,s,*");
7280 	else if (GET_MODE (operands[0]) == DImode)
7281 	  strcpy (buf, "bb,*");
7282 	if ((which == 0 && negated)
7283 	     || (which == 1 && ! negated))
7284 	  strcat (buf, ">=");
7285 	else
7286 	  strcat (buf, "<");
7287 	if (useskip)
7288 	  strcat (buf, "{ %0,1,%%r0| %0,%%sar,1,%%r0}");
7289 	else if (nullify && negated)
7290 	  {
7291 	    if (branch_needs_nop_p (insn))
7292 	      strcat (buf, "{,n %0,%3%#|,n %0,%%sar,%3%#}");
7293 	    else
7294 	      strcat (buf, "{,n %0,%3|,n %0,%%sar,%3}");
7295 	  }
7296 	else if (nullify && ! negated)
7297 	  {
7298 	    if (branch_needs_nop_p (insn))
7299 	      strcat (buf, "{,n %0,%2%#|,n %0,%%sar,%2%#}");
7300 	    else
7301 	      strcat (buf, "{,n %0,%2|,n %0,%%sar,%2}");
7302 	  }
7303 	else if (! nullify && negated)
7304 	  strcat (buf, "{ %0,%3| %0,%%sar,%3}");
7305 	else if (! nullify && ! negated)
7306 	  strcat (buf, "{ %0,%2| %0,%%sar,%2}");
7307 	break;
7308 
7309      /* All long conditionals.  Note a short backward branch with an
7310 	unfilled delay slot is treated just like a long backward branch
7311 	with an unfilled delay slot.  */
7312       case 8:
7313 	/* Handle weird backwards branch with a filled delay slot
7314 	   which is nullified.  */
7315 	if (dbr_sequence_length () != 0
7316 	    && ! forward_branch_p (insn)
7317 	    && nullify)
7318 	  {
7319 	    strcpy (buf, "{bvb,|bb,}");
7320 	    if (GET_MODE (operands[0]) == DImode)
7321 	      strcat (buf, "*");
7322 	    if ((which == 0 && negated)
7323 		|| (which == 1 && ! negated))
7324 	      strcat (buf, "<");
7325 	    else
7326 	      strcat (buf, ">=");
7327 	    if (negated)
7328 	      strcat (buf, "{,n %0,.+12\n\tb %3|,n %0,%%sar,.+12\n\tb %3}");
7329 	    else
7330 	      strcat (buf, "{,n %0,.+12\n\tb %2|,n %0,%%sar,.+12\n\tb %2}");
7331 	  }
7332 	/* Handle short backwards branch with an unfilled delay slot.
7333 	   Using a bb;nop rather than extrs;bl saves 1 cycle for both
7334 	   taken and untaken branches.  */
7335 	else if (dbr_sequence_length () == 0
7336 		 && ! forward_branch_p (insn)
7337 		 && INSN_ADDRESSES_SET_P ()
7338 		 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
7339 				    - INSN_ADDRESSES (INSN_UID (insn)) - 8))
7340 	  {
7341 	    strcpy (buf, "{bvb,|bb,}");
7342 	    if (GET_MODE (operands[0]) == DImode)
7343 	      strcat (buf, "*");
7344 	    if ((which == 0 && negated)
7345 		|| (which == 1 && ! negated))
7346 	      strcat (buf, ">=");
7347 	    else
7348 	      strcat (buf, "<");
7349 	    if (negated)
7350 	      strcat (buf, "{ %0,%3%#| %0,%%sar,%3%#}");
7351 	    else
7352 	      strcat (buf, "{ %0,%2%#| %0,%%sar,%2%#}");
7353 	  }
7354 	else
7355 	  {
7356 	    strcpy (buf, "{vextrs,|extrw,s,}");
7357 	    if (GET_MODE (operands[0]) == DImode)
7358 	      strcpy (buf, "extrd,s,*");
7359 	    if ((which == 0 && negated)
7360 		|| (which == 1 && ! negated))
7361 	      strcat (buf, "<");
7362 	    else
7363 	      strcat (buf, ">=");
7364 	    if (nullify && negated)
7365 	      strcat (buf, "{ %0,1,%%r0\n\tb,n %3| %0,%%sar,1,%%r0\n\tb,n %3}");
7366 	    else if (nullify && ! negated)
7367 	      strcat (buf, "{ %0,1,%%r0\n\tb,n %2| %0,%%sar,1,%%r0\n\tb,n %2}");
7368 	    else if (negated)
7369 	      strcat (buf, "{ %0,1,%%r0\n\tb %3| %0,%%sar,1,%%r0\n\tb %3}");
7370 	    else
7371 	      strcat (buf, "{ %0,1,%%r0\n\tb %2| %0,%%sar,1,%%r0\n\tb %2}");
7372 	  }
7373 	break;
7374 
7375       default:
7376 	/* The reversed conditional branch must branch over one additional
7377 	   instruction if the delay slot is filled and needs to be extracted
7378 	   by pa_output_lbranch.  If the delay slot is empty or this is a
7379 	   nullified forward branch, the instruction after the reversed
7380 	   condition branch must be nullified.  */
7381 	if (dbr_sequence_length () == 0
7382 	    || (nullify && forward_branch_p (insn)))
7383 	  {
7384 	    nullify = 1;
7385 	    xdelay = 0;
7386 	    operands[4] = GEN_INT (length);
7387 	  }
7388 	else
7389 	  {
7390 	    xdelay = 1;
7391 	    operands[4] = GEN_INT (length + 4);
7392 	  }
7393 
7394 	if (GET_MODE (operands[0]) == DImode)
7395 	  strcpy (buf, "bb,*");
7396 	else
7397 	  strcpy (buf, "{bvb,|bb,}");
7398 	if ((which == 0 && negated)
7399 	    || (which == 1 && !negated))
7400 	  strcat (buf, "<");
7401 	else
7402 	  strcat (buf, ">=");
7403 	if (nullify)
7404 	  strcat (buf, ",n {%0,.+%4|%0,%%sar,.+%4}");
7405 	else
7406 	  strcat (buf, " {%0,.+%4|%0,%%sar,.+%4}");
7407 	output_asm_insn (buf, operands);
7408 	return pa_output_lbranch (negated ? operands[3] : operands[2],
7409 				  insn, xdelay);
7410     }
7411   return buf;
7412 }
7413 
7414 /* Return the output template for emitting a dbra type insn.
7415 
7416    Note it may perform some output operations on its own before
7417    returning the final output string.  */
7418 const char *
pa_output_dbra(rtx * operands,rtx_insn * insn,int which_alternative)7419 pa_output_dbra (rtx *operands, rtx_insn *insn, int which_alternative)
7420 {
7421   int length = get_attr_length (insn);
7422 
7423   /* A conditional branch to the following instruction (e.g. the delay slot) is
7424      asking for a disaster.  Be prepared!  */
7425 
7426   if (branch_to_delay_slot_p (insn))
7427     {
7428       if (which_alternative == 0)
7429 	return "ldo %1(%0),%0";
7430       else if (which_alternative == 1)
7431 	{
7432 	  output_asm_insn ("{fstws|fstw} %0,-16(%%r30)", operands);
7433 	  output_asm_insn ("ldw -16(%%r30),%4", operands);
7434 	  output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands);
7435 	  return "{fldws|fldw} -16(%%r30),%0";
7436 	}
7437       else
7438 	{
7439 	  output_asm_insn ("ldw %0,%4", operands);
7440 	  return "ldo %1(%4),%4\n\tstw %4,%0";
7441 	}
7442     }
7443 
7444   if (which_alternative == 0)
7445     {
7446       int nullify = INSN_ANNULLED_BRANCH_P (insn);
7447       int xdelay;
7448 
7449       /* If this is a long branch with its delay slot unfilled, set `nullify'
7450 	 as it can nullify the delay slot and save a nop.  */
7451       if (length == 8 && dbr_sequence_length () == 0)
7452 	nullify = 1;
7453 
7454       /* If this is a short forward conditional branch which did not get
7455 	 its delay slot filled, the delay slot can still be nullified.  */
7456       if (! nullify && length == 4 && dbr_sequence_length () == 0)
7457 	nullify = forward_branch_p (insn);
7458 
7459       switch (length)
7460 	{
7461 	case 4:
7462 	  if (nullify)
7463 	    {
7464 	      if (branch_needs_nop_p (insn))
7465 		return "addib,%C2,n %1,%0,%3%#";
7466 	      else
7467 		return "addib,%C2,n %1,%0,%3";
7468 	    }
7469 	  else
7470 	    return "addib,%C2 %1,%0,%3";
7471 
7472 	case 8:
7473 	  /* Handle weird backwards branch with a fulled delay slot
7474 	     which is nullified.  */
7475 	  if (dbr_sequence_length () != 0
7476 	      && ! forward_branch_p (insn)
7477 	      && nullify)
7478 	    return "addib,%N2,n %1,%0,.+12\n\tb %3";
7479 	  /* Handle short backwards branch with an unfilled delay slot.
7480 	     Using a addb;nop rather than addi;bl saves 1 cycle for both
7481 	     taken and untaken branches.  */
7482 	  else if (dbr_sequence_length () == 0
7483 		   && ! forward_branch_p (insn)
7484 		   && INSN_ADDRESSES_SET_P ()
7485 		   && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
7486 				      - INSN_ADDRESSES (INSN_UID (insn)) - 8))
7487 	      return "addib,%C2 %1,%0,%3%#";
7488 
7489 	  /* Handle normal cases.  */
7490 	  if (nullify)
7491 	    return "addi,%N2 %1,%0,%0\n\tb,n %3";
7492 	  else
7493 	    return "addi,%N2 %1,%0,%0\n\tb %3";
7494 
7495 	default:
7496 	  /* The reversed conditional branch must branch over one additional
7497 	     instruction if the delay slot is filled and needs to be extracted
7498 	     by pa_output_lbranch.  If the delay slot is empty or this is a
7499 	     nullified forward branch, the instruction after the reversed
7500 	     condition branch must be nullified.  */
7501 	  if (dbr_sequence_length () == 0
7502 	      || (nullify && forward_branch_p (insn)))
7503 	    {
7504 	      nullify = 1;
7505 	      xdelay = 0;
7506 	      operands[4] = GEN_INT (length);
7507 	    }
7508 	  else
7509 	    {
7510 	      xdelay = 1;
7511 	      operands[4] = GEN_INT (length + 4);
7512 	    }
7513 
7514 	  if (nullify)
7515 	    output_asm_insn ("addib,%N2,n %1,%0,.+%4", operands);
7516 	  else
7517 	    output_asm_insn ("addib,%N2 %1,%0,.+%4", operands);
7518 
7519 	  return pa_output_lbranch (operands[3], insn, xdelay);
7520 	}
7521 
7522     }
7523   /* Deal with gross reload from FP register case.  */
7524   else if (which_alternative == 1)
7525     {
7526       /* Move loop counter from FP register to MEM then into a GR,
7527 	 increment the GR, store the GR into MEM, and finally reload
7528 	 the FP register from MEM from within the branch's delay slot.  */
7529       output_asm_insn ("{fstws|fstw} %0,-16(%%r30)\n\tldw -16(%%r30),%4",
7530 		       operands);
7531       output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands);
7532       if (length == 24)
7533 	return "{comb|cmpb},%S2 %%r0,%4,%3\n\t{fldws|fldw} -16(%%r30),%0";
7534       else if (length == 28)
7535 	return "{comclr|cmpclr},%B2 %%r0,%4,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
7536       else
7537 	{
7538 	  operands[5] = GEN_INT (length - 16);
7539 	  output_asm_insn ("{comb|cmpb},%B2 %%r0,%4,.+%5", operands);
7540 	  output_asm_insn ("{fldws|fldw} -16(%%r30),%0", operands);
7541 	  return pa_output_lbranch (operands[3], insn, 0);
7542 	}
7543     }
7544   /* Deal with gross reload from memory case.  */
7545   else
7546     {
7547       /* Reload loop counter from memory, the store back to memory
7548 	 happens in the branch's delay slot.  */
7549       output_asm_insn ("ldw %0,%4", operands);
7550       if (length == 12)
7551 	return "addib,%C2 %1,%4,%3\n\tstw %4,%0";
7552       else if (length == 16)
7553 	return "addi,%N2 %1,%4,%4\n\tb %3\n\tstw %4,%0";
7554       else
7555 	{
7556 	  operands[5] = GEN_INT (length - 4);
7557 	  output_asm_insn ("addib,%N2 %1,%4,.+%5\n\tstw %4,%0", operands);
7558 	  return pa_output_lbranch (operands[3], insn, 0);
7559 	}
7560     }
7561 }
7562 
7563 /* Return the output template for emitting a movb type insn.
7564 
7565    Note it may perform some output operations on its own before
7566    returning the final output string.  */
7567 const char *
pa_output_movb(rtx * operands,rtx_insn * insn,int which_alternative,int reverse_comparison)7568 pa_output_movb (rtx *operands, rtx_insn *insn, int which_alternative,
7569 	     int reverse_comparison)
7570 {
7571   int length = get_attr_length (insn);
7572 
7573   /* A conditional branch to the following instruction (e.g. the delay slot) is
7574      asking for a disaster.  Be prepared!  */
7575 
7576   if (branch_to_delay_slot_p (insn))
7577     {
7578       if (which_alternative == 0)
7579 	return "copy %1,%0";
7580       else if (which_alternative == 1)
7581 	{
7582 	  output_asm_insn ("stw %1,-16(%%r30)", operands);
7583 	  return "{fldws|fldw} -16(%%r30),%0";
7584 	}
7585       else if (which_alternative == 2)
7586 	return "stw %1,%0";
7587       else
7588 	return "mtsar %r1";
7589     }
7590 
7591   /* Support the second variant.  */
7592   if (reverse_comparison)
7593     PUT_CODE (operands[2], reverse_condition (GET_CODE (operands[2])));
7594 
7595   if (which_alternative == 0)
7596     {
7597       int nullify = INSN_ANNULLED_BRANCH_P (insn);
7598       int xdelay;
7599 
7600       /* If this is a long branch with its delay slot unfilled, set `nullify'
7601 	 as it can nullify the delay slot and save a nop.  */
7602       if (length == 8 && dbr_sequence_length () == 0)
7603 	nullify = 1;
7604 
7605       /* If this is a short forward conditional branch which did not get
7606 	 its delay slot filled, the delay slot can still be nullified.  */
7607       if (! nullify && length == 4 && dbr_sequence_length () == 0)
7608 	nullify = forward_branch_p (insn);
7609 
7610       switch (length)
7611 	{
7612 	case 4:
7613 	  if (nullify)
7614 	    {
7615 	      if (branch_needs_nop_p (insn))
7616 		return "movb,%C2,n %1,%0,%3%#";
7617 	      else
7618 		return "movb,%C2,n %1,%0,%3";
7619 	    }
7620 	  else
7621 	    return "movb,%C2 %1,%0,%3";
7622 
7623 	case 8:
7624 	  /* Handle weird backwards branch with a filled delay slot
7625 	     which is nullified.  */
7626 	  if (dbr_sequence_length () != 0
7627 	      && ! forward_branch_p (insn)
7628 	      && nullify)
7629 	    return "movb,%N2,n %1,%0,.+12\n\tb %3";
7630 
7631 	  /* Handle short backwards branch with an unfilled delay slot.
7632 	     Using a movb;nop rather than or;bl saves 1 cycle for both
7633 	     taken and untaken branches.  */
7634 	  else if (dbr_sequence_length () == 0
7635 		   && ! forward_branch_p (insn)
7636 		   && INSN_ADDRESSES_SET_P ()
7637 		   && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
7638 				      - INSN_ADDRESSES (INSN_UID (insn)) - 8))
7639 	    return "movb,%C2 %1,%0,%3%#";
7640 	  /* Handle normal cases.  */
7641 	  if (nullify)
7642 	    return "or,%N2 %1,%%r0,%0\n\tb,n %3";
7643 	  else
7644 	    return "or,%N2 %1,%%r0,%0\n\tb %3";
7645 
7646 	default:
7647 	  /* The reversed conditional branch must branch over one additional
7648 	     instruction if the delay slot is filled and needs to be extracted
7649 	     by pa_output_lbranch.  If the delay slot is empty or this is a
7650 	     nullified forward branch, the instruction after the reversed
7651 	     condition branch must be nullified.  */
7652 	  if (dbr_sequence_length () == 0
7653 	      || (nullify && forward_branch_p (insn)))
7654 	    {
7655 	      nullify = 1;
7656 	      xdelay = 0;
7657 	      operands[4] = GEN_INT (length);
7658 	    }
7659 	  else
7660 	    {
7661 	      xdelay = 1;
7662 	      operands[4] = GEN_INT (length + 4);
7663 	    }
7664 
7665 	  if (nullify)
7666 	    output_asm_insn ("movb,%N2,n %1,%0,.+%4", operands);
7667 	  else
7668 	    output_asm_insn ("movb,%N2 %1,%0,.+%4", operands);
7669 
7670 	  return pa_output_lbranch (operands[3], insn, xdelay);
7671 	}
7672     }
7673   /* Deal with gross reload for FP destination register case.  */
7674   else if (which_alternative == 1)
7675     {
7676       /* Move source register to MEM, perform the branch test, then
7677 	 finally load the FP register from MEM from within the branch's
7678 	 delay slot.  */
7679       output_asm_insn ("stw %1,-16(%%r30)", operands);
7680       if (length == 12)
7681 	return "{comb|cmpb},%S2 %%r0,%1,%3\n\t{fldws|fldw} -16(%%r30),%0";
7682       else if (length == 16)
7683 	return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
7684       else
7685 	{
7686 	  operands[4] = GEN_INT (length - 4);
7687 	  output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4", operands);
7688 	  output_asm_insn ("{fldws|fldw} -16(%%r30),%0", operands);
7689 	  return pa_output_lbranch (operands[3], insn, 0);
7690 	}
7691     }
7692   /* Deal with gross reload from memory case.  */
7693   else if (which_alternative == 2)
7694     {
7695       /* Reload loop counter from memory, the store back to memory
7696 	 happens in the branch's delay slot.  */
7697       if (length == 8)
7698 	return "{comb|cmpb},%S2 %%r0,%1,%3\n\tstw %1,%0";
7699       else if (length == 12)
7700 	return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tstw %1,%0";
7701       else
7702 	{
7703 	  operands[4] = GEN_INT (length);
7704 	  output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4\n\tstw %1,%0",
7705 			   operands);
7706 	  return pa_output_lbranch (operands[3], insn, 0);
7707 	}
7708     }
7709   /* Handle SAR as a destination.  */
7710   else
7711     {
7712       if (length == 8)
7713 	return "{comb|cmpb},%S2 %%r0,%1,%3\n\tmtsar %r1";
7714       else if (length == 12)
7715 	return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tmtsar %r1";
7716       else
7717 	{
7718 	  operands[4] = GEN_INT (length);
7719 	  output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4\n\tmtsar %r1",
7720 			   operands);
7721 	  return pa_output_lbranch (operands[3], insn, 0);
7722 	}
7723     }
7724 }
7725 
7726 /* Copy any FP arguments in INSN into integer registers.  */
7727 static void
copy_fp_args(rtx_insn * insn)7728 copy_fp_args (rtx_insn *insn)
7729 {
7730   rtx link;
7731   rtx xoperands[2];
7732 
7733   for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
7734     {
7735       int arg_mode, regno;
7736       rtx use = XEXP (link, 0);
7737 
7738       if (! (GET_CODE (use) == USE
7739 	  && GET_CODE (XEXP (use, 0)) == REG
7740 	  && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
7741 	continue;
7742 
7743       arg_mode = GET_MODE (XEXP (use, 0));
7744       regno = REGNO (XEXP (use, 0));
7745 
7746       /* Is it a floating point register?  */
7747       if (regno >= 32 && regno <= 39)
7748 	{
7749 	  /* Copy the FP register into an integer register via memory.  */
7750 	  if (arg_mode == SFmode)
7751 	    {
7752 	      xoperands[0] = XEXP (use, 0);
7753 	      xoperands[1] = gen_rtx_REG (SImode, 26 - (regno - 32) / 2);
7754 	      output_asm_insn ("{fstws|fstw} %0,-16(%%sr0,%%r30)", xoperands);
7755 	      output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
7756 	    }
7757 	  else
7758 	    {
7759 	      xoperands[0] = XEXP (use, 0);
7760 	      xoperands[1] = gen_rtx_REG (DImode, 25 - (regno - 34) / 2);
7761 	      output_asm_insn ("{fstds|fstd} %0,-16(%%sr0,%%r30)", xoperands);
7762 	      output_asm_insn ("ldw -12(%%sr0,%%r30),%R1", xoperands);
7763 	      output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
7764 	    }
7765 	}
7766     }
7767 }
7768 
7769 /* Compute length of the FP argument copy sequence for INSN.  */
7770 static int
length_fp_args(rtx_insn * insn)7771 length_fp_args (rtx_insn *insn)
7772 {
7773   int length = 0;
7774   rtx link;
7775 
7776   for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
7777     {
7778       int arg_mode, regno;
7779       rtx use = XEXP (link, 0);
7780 
7781       if (! (GET_CODE (use) == USE
7782 	  && GET_CODE (XEXP (use, 0)) == REG
7783 	  && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
7784 	continue;
7785 
7786       arg_mode = GET_MODE (XEXP (use, 0));
7787       regno = REGNO (XEXP (use, 0));
7788 
7789       /* Is it a floating point register?  */
7790       if (regno >= 32 && regno <= 39)
7791 	{
7792 	  if (arg_mode == SFmode)
7793 	    length += 8;
7794 	  else
7795 	    length += 12;
7796 	}
7797     }
7798 
7799   return length;
7800 }
7801 
7802 /* Return the attribute length for the millicode call instruction INSN.
7803    The length must match the code generated by pa_output_millicode_call.
7804    We include the delay slot in the returned length as it is better to
7805    over estimate the length than to under estimate it.  */
7806 
7807 int
pa_attr_length_millicode_call(rtx_insn * insn)7808 pa_attr_length_millicode_call (rtx_insn *insn)
7809 {
7810   unsigned long distance = -1;
7811   unsigned long total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
7812 
7813   if (INSN_ADDRESSES_SET_P ())
7814     {
7815       distance = (total + insn_current_reference_address (insn));
7816       if (distance < total)
7817 	distance = -1;
7818     }
7819 
7820   if (TARGET_64BIT)
7821     {
7822       if (!TARGET_LONG_CALLS && distance < 7600000)
7823 	return 8;
7824 
7825       return 20;
7826     }
7827   else if (TARGET_PORTABLE_RUNTIME)
7828     return 24;
7829   else
7830     {
7831       if (!TARGET_LONG_CALLS && distance < MAX_PCREL17F_OFFSET)
7832 	return 8;
7833 
7834       if (!flag_pic)
7835 	return 12;
7836 
7837       return 24;
7838     }
7839 }
7840 
7841 /* INSN is a function call.
7842 
7843    CALL_DEST is the routine we are calling.  */
7844 
7845 const char *
pa_output_millicode_call(rtx_insn * insn,rtx call_dest)7846 pa_output_millicode_call (rtx_insn *insn, rtx call_dest)
7847 {
7848   int attr_length = get_attr_length (insn);
7849   int seq_length = dbr_sequence_length ();
7850   rtx xoperands[4];
7851 
7852   xoperands[0] = call_dest;
7853 
7854   /* Handle the common case where we are sure that the branch will
7855      reach the beginning of the $CODE$ subspace.  The within reach
7856      form of the $$sh_func_adrs call has a length of 28.  Because it
7857      has an attribute type of sh_func_adrs, it never has a nonzero
7858      sequence length (i.e., the delay slot is never filled).  */
7859   if (!TARGET_LONG_CALLS
7860       && (attr_length == 8
7861 	  || (attr_length == 28
7862 	      && get_attr_type (insn) == TYPE_SH_FUNC_ADRS)))
7863     {
7864       xoperands[1] = gen_rtx_REG (Pmode, TARGET_64BIT ? 2 : 31);
7865       output_asm_insn ("{bl|b,l} %0,%1", xoperands);
7866     }
7867   else
7868     {
7869       if (TARGET_64BIT)
7870 	{
7871 	  /* It might seem that one insn could be saved by accessing
7872 	     the millicode function using the linkage table.  However,
7873 	     this doesn't work in shared libraries and other dynamically
7874 	     loaded objects.  Using a pc-relative sequence also avoids
7875 	     problems related to the implicit use of the gp register.  */
7876 	  xoperands[1] = gen_rtx_REG (Pmode, 1);
7877 	  xoperands[2] = xoperands[1];
7878 	  pa_output_pic_pcrel_sequence (xoperands);
7879 	  output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
7880 	}
7881       else if (TARGET_PORTABLE_RUNTIME)
7882 	{
7883 	  /* Pure portable runtime doesn't allow be/ble; we also don't
7884 	     have PIC support in the assembler/linker, so this sequence
7885 	     is needed.  */
7886 
7887 	  /* Get the address of our target into %r1.  */
7888 	  output_asm_insn ("ldil L'%0,%%r1", xoperands);
7889 	  output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands);
7890 
7891 	  /* Get our return address into %r31.  */
7892 	  output_asm_insn ("{bl|b,l} .+8,%%r31", xoperands);
7893 	  output_asm_insn ("addi 8,%%r31,%%r31", xoperands);
7894 
7895 	  /* Jump to our target address in %r1.  */
7896 	  output_asm_insn ("bv %%r0(%%r1)", xoperands);
7897 	}
7898       else if (!flag_pic)
7899 	{
7900 	  output_asm_insn ("ldil L'%0,%%r1", xoperands);
7901 	  if (TARGET_PA_20)
7902 	    output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31", xoperands);
7903 	  else
7904 	    output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands);
7905 	}
7906       else
7907 	{
7908 	  xoperands[1] = gen_rtx_REG (Pmode, 31);
7909 	  xoperands[2] = gen_rtx_REG (Pmode, 1);
7910 	  pa_output_pic_pcrel_sequence (xoperands);
7911 
7912 	  /* Adjust return address.  */
7913 	  output_asm_insn ("ldo {16|24}(%%r31),%%r31", xoperands);
7914 
7915 	  /* Jump to our target address in %r1.  */
7916 	  output_asm_insn ("bv %%r0(%%r1)", xoperands);
7917 	}
7918     }
7919 
7920   if (seq_length == 0)
7921     output_asm_insn ("nop", xoperands);
7922 
7923   return "";
7924 }
7925 
7926 /* Return the attribute length of the call instruction INSN.  The SIBCALL
7927    flag indicates whether INSN is a regular call or a sibling call.  The
7928    length returned must be longer than the code actually generated by
7929    pa_output_call.  Since branch shortening is done before delay branch
7930    sequencing, there is no way to determine whether or not the delay
7931    slot will be filled during branch shortening.  Even when the delay
7932    slot is filled, we may have to add a nop if the delay slot contains
7933    a branch that can't reach its target.  Thus, we always have to include
7934    the delay slot in the length estimate.  This used to be done in
7935    pa_adjust_insn_length but we do it here now as some sequences always
7936    fill the delay slot and we can save four bytes in the estimate for
7937    these sequences.  */
7938 
7939 int
pa_attr_length_call(rtx_insn * insn,int sibcall)7940 pa_attr_length_call (rtx_insn *insn, int sibcall)
7941 {
7942   int local_call;
7943   rtx call, call_dest;
7944   tree call_decl;
7945   int length = 0;
7946   rtx pat = PATTERN (insn);
7947   unsigned long distance = -1;
7948 
7949   gcc_assert (CALL_P (insn));
7950 
7951   if (INSN_ADDRESSES_SET_P ())
7952     {
7953       unsigned long total;
7954 
7955       total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
7956       distance = (total + insn_current_reference_address (insn));
7957       if (distance < total)
7958 	distance = -1;
7959     }
7960 
7961   gcc_assert (GET_CODE (pat) == PARALLEL);
7962 
7963   /* Get the call rtx.  */
7964   call = XVECEXP (pat, 0, 0);
7965   if (GET_CODE (call) == SET)
7966     call = SET_SRC (call);
7967 
7968   gcc_assert (GET_CODE (call) == CALL);
7969 
7970   /* Determine if this is a local call.  */
7971   call_dest = XEXP (XEXP (call, 0), 0);
7972   call_decl = SYMBOL_REF_DECL (call_dest);
7973   local_call = call_decl && targetm.binds_local_p (call_decl);
7974 
7975   /* pc-relative branch.  */
7976   if (!TARGET_LONG_CALLS
7977       && ((TARGET_PA_20 && !sibcall && distance < 7600000)
7978 	  || distance < MAX_PCREL17F_OFFSET))
7979     length += 8;
7980 
7981   /* 64-bit plabel sequence.  */
7982   else if (TARGET_64BIT && !local_call)
7983     length += 24;
7984 
7985   /* non-pic long absolute branch sequence.  */
7986   else if ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
7987     length += 12;
7988 
7989   /* long pc-relative branch sequence.  */
7990   else if (TARGET_LONG_PIC_SDIFF_CALL
7991 	   || (TARGET_GAS && !TARGET_SOM && local_call))
7992     {
7993       length += 20;
7994 
7995       if (!TARGET_PA_20 && !TARGET_NO_SPACE_REGS && (!local_call || flag_pic))
7996 	length += 8;
7997     }
7998 
7999   /* 32-bit plabel sequence.  */
8000   else
8001     {
8002       length += 32;
8003 
8004       if (TARGET_SOM)
8005 	length += length_fp_args (insn);
8006 
8007       if (flag_pic)
8008 	length += 4;
8009 
8010       if (!TARGET_PA_20)
8011 	{
8012 	  if (!sibcall)
8013 	    length += 8;
8014 
8015 	  if (!TARGET_NO_SPACE_REGS && (!local_call || flag_pic))
8016 	    length += 8;
8017 	}
8018     }
8019 
8020   return length;
8021 }
8022 
8023 /* INSN is a function call.
8024 
8025    CALL_DEST is the routine we are calling.  */
8026 
8027 const char *
pa_output_call(rtx_insn * insn,rtx call_dest,int sibcall)8028 pa_output_call (rtx_insn *insn, rtx call_dest, int sibcall)
8029 {
8030   int seq_length = dbr_sequence_length ();
8031   tree call_decl = SYMBOL_REF_DECL (call_dest);
8032   int local_call = call_decl && targetm.binds_local_p (call_decl);
8033   rtx xoperands[4];
8034 
8035   xoperands[0] = call_dest;
8036 
8037   /* Handle the common case where we're sure that the branch will reach
8038      the beginning of the "$CODE$" subspace.  This is the beginning of
8039      the current function if we are in a named section.  */
8040   if (!TARGET_LONG_CALLS && pa_attr_length_call (insn, sibcall) == 8)
8041     {
8042       xoperands[1] = gen_rtx_REG (word_mode, sibcall ? 0 : 2);
8043       output_asm_insn ("{bl|b,l} %0,%1", xoperands);
8044     }
8045   else
8046     {
8047       if (TARGET_64BIT && !local_call)
8048 	{
8049 	  /* ??? As far as I can tell, the HP linker doesn't support the
8050 	     long pc-relative sequence described in the 64-bit runtime
8051 	     architecture.  So, we use a slightly longer indirect call.  */
8052 	  xoperands[0] = pa_get_deferred_plabel (call_dest);
8053 	  xoperands[1] = gen_label_rtx ();
8054 
8055 	  /* Put the load of %r27 into the delay slot.  We don't need to
8056 	     do anything when generating fast indirect calls.  */
8057 	  if (seq_length != 0)
8058 	    {
8059 	      final_scan_insn (NEXT_INSN (insn), asm_out_file,
8060 			       optimize, 0, NULL);
8061 
8062 	      /* Now delete the delay insn.  */
8063 	      SET_INSN_DELETED (NEXT_INSN (insn));
8064 	    }
8065 
8066 	  output_asm_insn ("addil LT'%0,%%r27", xoperands);
8067 	  output_asm_insn ("ldd RT'%0(%%r1),%%r1", xoperands);
8068 	  output_asm_insn ("ldd 0(%%r1),%%r1", xoperands);
8069 	  output_asm_insn ("ldd 16(%%r1),%%r2", xoperands);
8070 	  output_asm_insn ("bve,l (%%r2),%%r2", xoperands);
8071 	  output_asm_insn ("ldd 24(%%r1),%%r27", xoperands);
8072 	  seq_length = 1;
8073 	}
8074       else
8075 	{
8076 	  int indirect_call = 0;
8077 
8078 	  /* Emit a long call.  There are several different sequences
8079 	     of increasing length and complexity.  In most cases,
8080              they don't allow an instruction in the delay slot.  */
8081 	  if (!((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
8082 	      && !TARGET_LONG_PIC_SDIFF_CALL
8083 	      && !(TARGET_GAS && !TARGET_SOM && local_call)
8084 	      && !TARGET_64BIT)
8085 	    indirect_call = 1;
8086 
8087 	  if (seq_length != 0
8088 	      && !sibcall
8089 	      && (!TARGET_PA_20
8090 		  || indirect_call
8091 		  || ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)))
8092 	    {
8093 	      /* A non-jump insn in the delay slot.  By definition we can
8094 		 emit this insn before the call (and in fact before argument
8095 		 relocating.  */
8096 	      final_scan_insn (NEXT_INSN (insn), asm_out_file, optimize, 0,
8097 			       NULL);
8098 
8099 	      /* Now delete the delay insn.  */
8100 	      SET_INSN_DELETED (NEXT_INSN (insn));
8101 	      seq_length = 0;
8102 	    }
8103 
8104 	  if ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
8105 	    {
8106 	      /* This is the best sequence for making long calls in
8107 		 non-pic code.  Unfortunately, GNU ld doesn't provide
8108 		 the stub needed for external calls, and GAS's support
8109 		 for this with the SOM linker is buggy.  It is safe
8110 		 to use this for local calls.  */
8111 	      output_asm_insn ("ldil L'%0,%%r1", xoperands);
8112 	      if (sibcall)
8113 		output_asm_insn ("be R'%0(%%sr4,%%r1)", xoperands);
8114 	      else
8115 		{
8116 		  if (TARGET_PA_20)
8117 		    output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31",
8118 				     xoperands);
8119 		  else
8120 		    output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands);
8121 
8122 		  output_asm_insn ("copy %%r31,%%r2", xoperands);
8123 		  seq_length = 1;
8124 		}
8125 	    }
8126 	  else
8127 	    {
8128 	      /* The HP assembler and linker can handle relocations for
8129 		 the difference of two symbols.  The HP assembler
8130 		 recognizes the sequence as a pc-relative call and
8131 		 the linker provides stubs when needed.  */
8132 
8133 	      /* GAS currently can't generate the relocations that
8134 		 are needed for the SOM linker under HP-UX using this
8135 		 sequence.  The GNU linker doesn't generate the stubs
8136 		 that are needed for external calls on TARGET_ELF32
8137 		 with this sequence.  For now, we have to use a longer
8138 	         plabel sequence when using GAS for non local calls.  */
8139 	      if (TARGET_LONG_PIC_SDIFF_CALL
8140 		  || (TARGET_GAS && !TARGET_SOM && local_call))
8141 		{
8142 		  xoperands[1] = gen_rtx_REG (Pmode, 1);
8143 		  xoperands[2] = xoperands[1];
8144 		  pa_output_pic_pcrel_sequence (xoperands);
8145 		}
8146 	      else
8147 		{
8148 		  /* Emit a long plabel-based call sequence.  This is
8149 		     essentially an inline implementation of $$dyncall.
8150 		     We don't actually try to call $$dyncall as this is
8151 		     as difficult as calling the function itself.  */
8152 		  xoperands[0] = pa_get_deferred_plabel (call_dest);
8153 		  xoperands[1] = gen_label_rtx ();
8154 
8155 		  /* Since the call is indirect, FP arguments in registers
8156 		     need to be copied to the general registers.  Then, the
8157 		     argument relocation stub will copy them back.  */
8158 		  if (TARGET_SOM)
8159 		    copy_fp_args (insn);
8160 
8161 		  if (flag_pic)
8162 		    {
8163 		      output_asm_insn ("addil LT'%0,%%r19", xoperands);
8164 		      output_asm_insn ("ldw RT'%0(%%r1),%%r1", xoperands);
8165 		      output_asm_insn ("ldw 0(%%r1),%%r22", xoperands);
8166 		    }
8167 		  else
8168 		    {
8169 		      output_asm_insn ("addil LR'%0-$global$,%%r27",
8170 				       xoperands);
8171 		      output_asm_insn ("ldw RR'%0-$global$(%%r1),%%r22",
8172 				       xoperands);
8173 		    }
8174 
8175 		  output_asm_insn ("bb,>=,n %%r22,30,.+16", xoperands);
8176 		  output_asm_insn ("depi 0,31,2,%%r22", xoperands);
8177 		  /* Should this be an ordered load to ensure the target
8178 	             address is loaded before the global pointer?  */
8179 		  output_asm_insn ("ldw 0(%%r22),%%r1", xoperands);
8180 		  output_asm_insn ("ldw 4(%%r22),%%r19", xoperands);
8181 
8182 		  if (!sibcall && !TARGET_PA_20)
8183 		    {
8184 		      output_asm_insn ("{bl|b,l} .+8,%%r2", xoperands);
8185 		      if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic))
8186 			output_asm_insn ("addi 8,%%r2,%%r2", xoperands);
8187 		      else
8188 			output_asm_insn ("addi 16,%%r2,%%r2", xoperands);
8189 		    }
8190 		}
8191 
8192 	      if (TARGET_PA_20)
8193 		{
8194 		  if (sibcall)
8195 		    output_asm_insn ("bve (%%r1)", xoperands);
8196 		  else
8197 		    {
8198 		      if (indirect_call)
8199 			{
8200 			  output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
8201 			  output_asm_insn ("stw %%r2,-24(%%sp)", xoperands);
8202 			  seq_length = 1;
8203 			}
8204 		      else
8205 			output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
8206 		    }
8207 		}
8208 	      else
8209 		{
8210 		  if (!TARGET_NO_SPACE_REGS && (!local_call || flag_pic))
8211 		    output_asm_insn ("ldsid (%%r1),%%r31\n\tmtsp %%r31,%%sr0",
8212 				     xoperands);
8213 
8214 		  if (sibcall)
8215 		    {
8216 		      if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic))
8217 			output_asm_insn ("be 0(%%sr4,%%r1)", xoperands);
8218 		      else
8219 			output_asm_insn ("be 0(%%sr0,%%r1)", xoperands);
8220 		    }
8221 		  else
8222 		    {
8223 		      if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic))
8224 			output_asm_insn ("ble 0(%%sr4,%%r1)", xoperands);
8225 		      else
8226 			output_asm_insn ("ble 0(%%sr0,%%r1)", xoperands);
8227 
8228 		      if (indirect_call)
8229 			output_asm_insn ("stw %%r31,-24(%%sp)", xoperands);
8230 		      else
8231 			output_asm_insn ("copy %%r31,%%r2", xoperands);
8232 		      seq_length = 1;
8233 		    }
8234 		}
8235 	    }
8236 	}
8237     }
8238 
8239   if (seq_length == 0)
8240     output_asm_insn ("nop", xoperands);
8241 
8242   return "";
8243 }
8244 
8245 /* Return the attribute length of the indirect call instruction INSN.
8246    The length must match the code generated by output_indirect call.
8247    The returned length includes the delay slot.  Currently, the delay
8248    slot of an indirect call sequence is not exposed and it is used by
8249    the sequence itself.  */
8250 
8251 int
pa_attr_length_indirect_call(rtx_insn * insn)8252 pa_attr_length_indirect_call (rtx_insn *insn)
8253 {
8254   unsigned long distance = -1;
8255   unsigned long total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
8256 
8257   if (INSN_ADDRESSES_SET_P ())
8258     {
8259       distance = (total + insn_current_reference_address (insn));
8260       if (distance < total)
8261 	distance = -1;
8262     }
8263 
8264   if (TARGET_64BIT)
8265     return 12;
8266 
8267   if (TARGET_FAST_INDIRECT_CALLS)
8268     return 8;
8269 
8270   if (TARGET_PORTABLE_RUNTIME)
8271     return 16;
8272 
8273   if (!TARGET_LONG_CALLS
8274       && ((TARGET_PA_20 && !TARGET_SOM && distance < 7600000)
8275 	  || distance < MAX_PCREL17F_OFFSET))
8276     return 8;
8277 
8278   /* Out of reach, can use ble.  */
8279   if (!flag_pic)
8280     return 12;
8281 
8282   /* Inline versions of $$dyncall.  */
8283   if (!optimize_size)
8284     {
8285       if (TARGET_NO_SPACE_REGS)
8286 	return 28;
8287 
8288       if (TARGET_PA_20)
8289 	return 32;
8290     }
8291 
8292   /* Long PIC pc-relative call.  */
8293   return 20;
8294 }
8295 
8296 const char *
pa_output_indirect_call(rtx_insn * insn,rtx call_dest)8297 pa_output_indirect_call (rtx_insn *insn, rtx call_dest)
8298 {
8299   rtx xoperands[4];
8300   int length;
8301 
8302   if (TARGET_64BIT)
8303     {
8304       xoperands[0] = call_dest;
8305       output_asm_insn ("ldd 16(%0),%%r2\n\t"
8306 		       "bve,l (%%r2),%%r2\n\t"
8307 		       "ldd 24(%0),%%r27", xoperands);
8308       return "";
8309     }
8310 
8311   /* First the special case for kernels, level 0 systems, etc.  */
8312   if (TARGET_FAST_INDIRECT_CALLS)
8313     {
8314       pa_output_arg_descriptor (insn);
8315       if (TARGET_PA_20)
8316 	return "bve,l,n (%%r22),%%r2\n\tnop";
8317       return "ble 0(%%sr4,%%r22)\n\tcopy %%r31,%%r2";
8318     }
8319 
8320   if (TARGET_PORTABLE_RUNTIME)
8321     {
8322       output_asm_insn ("ldil L'$$dyncall,%%r31\n\t"
8323 		       "ldo R'$$dyncall(%%r31),%%r31", xoperands);
8324       pa_output_arg_descriptor (insn);
8325       return "blr %%r0,%%r2\n\tbv,n %%r0(%%r31)";
8326     }
8327 
8328   /* Now the normal case -- we can reach $$dyncall directly or
8329      we're sure that we can get there via a long-branch stub.
8330 
8331      No need to check target flags as the length uniquely identifies
8332      the remaining cases.  */
8333   length = pa_attr_length_indirect_call (insn);
8334   if (length == 8)
8335     {
8336       pa_output_arg_descriptor (insn);
8337 
8338       /* The HP linker sometimes substitutes a BLE for BL/B,L calls to
8339 	 $$dyncall.  Since BLE uses %r31 as the link register, the 22-bit
8340 	 variant of the B,L instruction can't be used on the SOM target.  */
8341       if (TARGET_PA_20 && !TARGET_SOM)
8342 	return "b,l,n $$dyncall,%%r2\n\tnop";
8343       else
8344 	return "bl $$dyncall,%%r31\n\tcopy %%r31,%%r2";
8345     }
8346 
8347   /* Long millicode call, but we are not generating PIC or portable runtime
8348      code.  */
8349   if (length == 12)
8350     {
8351       output_asm_insn ("ldil L'$$dyncall,%%r2", xoperands);
8352       pa_output_arg_descriptor (insn);
8353       return "ble R'$$dyncall(%%sr4,%%r2)\n\tcopy %%r31,%%r2";
8354     }
8355 
8356   /* The long PIC pc-relative call sequence is five instructions.  So,
8357      let's use an inline version of $$dyncall when the calling sequence
8358      has a roughly similar number of instructions and we are not optimizing
8359      for size.  We need two instructions to load the return pointer plus
8360      the $$dyncall implementation.  */
8361   if (!optimize_size)
8362     {
8363       if (TARGET_NO_SPACE_REGS)
8364 	{
8365 	  pa_output_arg_descriptor (insn);
8366 	  output_asm_insn ("bl .+8,%%r2\n\t"
8367 			   "ldo 20(%%r2),%%r2\n\t"
8368 			   "extru,<> %%r22,30,1,%%r0\n\t"
8369 			   "bv,n %%r0(%%r22)\n\t"
8370 			   "ldw -2(%%r22),%%r21\n\t"
8371 			   "bv %%r0(%%r21)\n\t"
8372 			   "ldw 2(%%r22),%%r19", xoperands);
8373 	  return "";
8374 	}
8375       if (TARGET_PA_20)
8376 	{
8377 	  pa_output_arg_descriptor (insn);
8378 	  output_asm_insn ("bl .+8,%%r2\n\t"
8379 			   "ldo 24(%%r2),%%r2\n\t"
8380 			   "stw %%r2,-24(%%sp)\n\t"
8381 			   "extru,<> %r22,30,1,%%r0\n\t"
8382 			   "bve,n (%%r22)\n\t"
8383 			   "ldw -2(%%r22),%%r21\n\t"
8384 			   "bve (%%r21)\n\t"
8385 			   "ldw 2(%%r22),%%r19", xoperands);
8386 	  return "";
8387 	}
8388     }
8389 
8390   /* We need a long PIC call to $$dyncall.  */
8391   xoperands[0] = gen_rtx_SYMBOL_REF (Pmode, "$$dyncall");
8392   xoperands[1] = gen_rtx_REG (Pmode, 2);
8393   xoperands[2] = gen_rtx_REG (Pmode, 1);
8394   pa_output_pic_pcrel_sequence (xoperands);
8395   pa_output_arg_descriptor (insn);
8396   return "bv %%r0(%%r1)\n\tldo {12|20}(%%r2),%%r2";
8397 }
8398 
8399 /* In HPUX 8.0's shared library scheme, special relocations are needed
8400    for function labels if they might be passed to a function
8401    in a shared library (because shared libraries don't live in code
8402    space), and special magic is needed to construct their address.  */
8403 
8404 void
pa_encode_label(rtx sym)8405 pa_encode_label (rtx sym)
8406 {
8407   const char *str = XSTR (sym, 0);
8408   int len = strlen (str) + 1;
8409   char *newstr, *p;
8410 
8411   p = newstr = XALLOCAVEC (char, len + 1);
8412   *p++ = '@';
8413   strcpy (p, str);
8414 
8415   XSTR (sym, 0) = ggc_alloc_string (newstr, len);
8416 }
8417 
8418 static void
pa_encode_section_info(tree decl,rtx rtl,int first)8419 pa_encode_section_info (tree decl, rtx rtl, int first)
8420 {
8421   int old_referenced = 0;
8422 
8423   if (!first && MEM_P (rtl) && GET_CODE (XEXP (rtl, 0)) == SYMBOL_REF)
8424     old_referenced
8425       = SYMBOL_REF_FLAGS (XEXP (rtl, 0)) & SYMBOL_FLAG_REFERENCED;
8426 
8427   default_encode_section_info (decl, rtl, first);
8428 
8429   if (first && TEXT_SPACE_P (decl))
8430     {
8431       SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
8432       if (TREE_CODE (decl) == FUNCTION_DECL)
8433 	pa_encode_label (XEXP (rtl, 0));
8434     }
8435   else if (old_referenced)
8436     SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= old_referenced;
8437 }
8438 
8439 /* This is sort of inverse to pa_encode_section_info.  */
8440 
8441 static const char *
pa_strip_name_encoding(const char * str)8442 pa_strip_name_encoding (const char *str)
8443 {
8444   str += (*str == '@');
8445   str += (*str == '*');
8446   return str;
8447 }
8448 
8449 /* Returns 1 if OP is a function label involved in a simple addition
8450    with a constant.  Used to keep certain patterns from matching
8451    during instruction combination.  */
8452 int
pa_is_function_label_plus_const(rtx op)8453 pa_is_function_label_plus_const (rtx op)
8454 {
8455   /* Strip off any CONST.  */
8456   if (GET_CODE (op) == CONST)
8457     op = XEXP (op, 0);
8458 
8459   return (GET_CODE (op) == PLUS
8460 	  && function_label_operand (XEXP (op, 0), VOIDmode)
8461 	  && GET_CODE (XEXP (op, 1)) == CONST_INT);
8462 }
8463 
8464 /* Output the assembler code for a thunk function.  THUNK_DECL is the
8465    declaration for the thunk function itself, FUNCTION is the decl for
8466    the target function.  DELTA is an immediate constant offset to be
8467    added to THIS.  If VCALL_OFFSET is nonzero, the word at
8468    *(*this + vcall_offset) should be added to THIS.  */
8469 
8470 static void
pa_asm_output_mi_thunk(FILE * file,tree thunk_fndecl,HOST_WIDE_INT delta,HOST_WIDE_INT vcall_offset,tree function)8471 pa_asm_output_mi_thunk (FILE *file, tree thunk_fndecl, HOST_WIDE_INT delta,
8472 			HOST_WIDE_INT vcall_offset, tree function)
8473 {
8474   const char *fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk_fndecl));
8475   static unsigned int current_thunk_number;
8476   int val_14 = VAL_14_BITS_P (delta);
8477   unsigned int old_last_address = last_address, nbytes = 0;
8478   char label[17];
8479   rtx xoperands[4];
8480 
8481   xoperands[0] = XEXP (DECL_RTL (function), 0);
8482   xoperands[1] = XEXP (DECL_RTL (thunk_fndecl), 0);
8483   xoperands[2] = GEN_INT (delta);
8484 
8485   assemble_start_function (thunk_fndecl, fnname);
8486   final_start_function (emit_barrier (), file, 1);
8487 
8488   if (!vcall_offset)
8489     {
8490       /* Output the thunk.  We know that the function is in the same
8491 	 translation unit (i.e., the same space) as the thunk, and that
8492 	 thunks are output after their method.  Thus, we don't need an
8493 	 external branch to reach the function.  With SOM and GAS,
8494 	 functions and thunks are effectively in different sections.
8495 	 Thus, we can always use a IA-relative branch and the linker
8496 	 will add a long branch stub if necessary.
8497 
8498 	 However, we have to be careful when generating PIC code on the
8499 	 SOM port to ensure that the sequence does not transfer to an
8500 	 import stub for the target function as this could clobber the
8501 	 return value saved at SP-24.  This would also apply to the
8502 	32-bit linux port if the multi-space model is implemented.  */
8503       if ((!TARGET_LONG_CALLS && TARGET_SOM && !TARGET_PORTABLE_RUNTIME
8504 	   && !(flag_pic && TREE_PUBLIC (function))
8505 	   && (TARGET_GAS || last_address < 262132))
8506 	  || (!TARGET_LONG_CALLS && !TARGET_SOM && !TARGET_PORTABLE_RUNTIME
8507 	      && ((targetm_common.have_named_sections
8508 		   && DECL_SECTION_NAME (thunk_fndecl) != NULL
8509 		   /* The GNU 64-bit linker has rather poor stub management.
8510 		      So, we use a long branch from thunks that aren't in
8511 		      the same section as the target function.  */
8512 		    && ((!TARGET_64BIT
8513 			 && (DECL_SECTION_NAME (thunk_fndecl)
8514 			     != DECL_SECTION_NAME (function)))
8515 			|| ((DECL_SECTION_NAME (thunk_fndecl)
8516 			     == DECL_SECTION_NAME (function))
8517 			    && last_address < 262132)))
8518 		  /* In this case, we need to be able to reach the start of
8519 		     the stub table even though the function is likely closer
8520 		     and can be jumped to directly.  */
8521 		  || (targetm_common.have_named_sections
8522 		      && DECL_SECTION_NAME (thunk_fndecl) == NULL
8523 		      && DECL_SECTION_NAME (function) == NULL
8524 		      && total_code_bytes < MAX_PCREL17F_OFFSET)
8525 		  /* Likewise.  */
8526 		  || (!targetm_common.have_named_sections
8527 		      && total_code_bytes < MAX_PCREL17F_OFFSET))))
8528 	{
8529 	  if (!val_14)
8530 	    output_asm_insn ("addil L'%2,%%r26", xoperands);
8531 
8532 	  output_asm_insn ("b %0", xoperands);
8533 
8534 	  if (val_14)
8535 	    {
8536 	      output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8537 	      nbytes += 8;
8538 	    }
8539 	  else
8540 	    {
8541 	      output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8542 	      nbytes += 12;
8543 	    }
8544 	}
8545       else if (TARGET_64BIT)
8546 	{
8547 	  rtx xop[4];
8548 
8549 	  /* We only have one call-clobbered scratch register, so we can't
8550 	     make use of the delay slot if delta doesn't fit in 14 bits.  */
8551 	  if (!val_14)
8552 	    {
8553 	      output_asm_insn ("addil L'%2,%%r26", xoperands);
8554 	      output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8555 	    }
8556 
8557 	  /* Load function address into %r1.  */
8558 	  xop[0] = xoperands[0];
8559 	  xop[1] = gen_rtx_REG (Pmode, 1);
8560 	  xop[2] = xop[1];
8561 	  pa_output_pic_pcrel_sequence (xop);
8562 
8563 	  if (val_14)
8564 	    {
8565 	      output_asm_insn ("bv %%r0(%%r1)", xoperands);
8566 	      output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8567 	      nbytes += 20;
8568 	    }
8569 	  else
8570 	    {
8571 	      output_asm_insn ("bv,n %%r0(%%r1)", xoperands);
8572 	      nbytes += 24;
8573 	    }
8574 	}
8575       else if (TARGET_PORTABLE_RUNTIME)
8576 	{
8577 	  output_asm_insn ("ldil L'%0,%%r1", xoperands);
8578 	  output_asm_insn ("ldo R'%0(%%r1),%%r22", xoperands);
8579 
8580 	  if (!val_14)
8581 	    output_asm_insn ("ldil L'%2,%%r26", xoperands);
8582 
8583 	  output_asm_insn ("bv %%r0(%%r22)", xoperands);
8584 
8585 	  if (val_14)
8586 	    {
8587 	      output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8588 	      nbytes += 16;
8589 	    }
8590 	  else
8591 	    {
8592 	      output_asm_insn ("ldo R'%2(%%r26),%%r26", xoperands);
8593 	      nbytes += 20;
8594 	    }
8595 	}
8596       else if (TARGET_SOM && flag_pic && TREE_PUBLIC (function))
8597 	{
8598 	  /* The function is accessible from outside this module.  The only
8599 	     way to avoid an import stub between the thunk and function is to
8600 	     call the function directly with an indirect sequence similar to
8601 	     that used by $$dyncall.  This is possible because $$dyncall acts
8602 	     as the import stub in an indirect call.  */
8603 	  ASM_GENERATE_INTERNAL_LABEL (label, "LTHN", current_thunk_number);
8604 	  xoperands[3] = gen_rtx_SYMBOL_REF (Pmode, label);
8605 	  output_asm_insn ("addil LT'%3,%%r19", xoperands);
8606 	  output_asm_insn ("ldw RT'%3(%%r1),%%r22", xoperands);
8607 	  output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands);
8608 	  output_asm_insn ("bb,>=,n %%r22,30,.+16", xoperands);
8609 	  output_asm_insn ("depi 0,31,2,%%r22", xoperands);
8610 	  output_asm_insn ("ldw 4(%%sr0,%%r22),%%r19", xoperands);
8611 	  output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands);
8612 
8613 	  if (!val_14)
8614 	    {
8615 	      output_asm_insn ("addil L'%2,%%r26", xoperands);
8616 	      nbytes += 4;
8617 	    }
8618 
8619 	  if (TARGET_PA_20)
8620 	    {
8621 	      output_asm_insn ("bve (%%r22)", xoperands);
8622 	      nbytes += 36;
8623 	    }
8624 	  else if (TARGET_NO_SPACE_REGS)
8625 	    {
8626 	      output_asm_insn ("be 0(%%sr4,%%r22)", xoperands);
8627 	      nbytes += 36;
8628 	    }
8629 	  else
8630 	    {
8631 	      output_asm_insn ("ldsid (%%sr0,%%r22),%%r21", xoperands);
8632 	      output_asm_insn ("mtsp %%r21,%%sr0", xoperands);
8633 	      output_asm_insn ("be 0(%%sr0,%%r22)", xoperands);
8634 	      nbytes += 44;
8635 	    }
8636 
8637 	  if (val_14)
8638 	    output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8639 	  else
8640 	    output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8641 	}
8642       else if (flag_pic)
8643 	{
8644 	  rtx xop[4];
8645 
8646 	  /* Load function address into %r22.  */
8647 	  xop[0] = xoperands[0];
8648 	  xop[1] = gen_rtx_REG (Pmode, 1);
8649 	  xop[2] = gen_rtx_REG (Pmode, 22);
8650 	  pa_output_pic_pcrel_sequence (xop);
8651 
8652 	  if (!val_14)
8653 	    output_asm_insn ("addil L'%2,%%r26", xoperands);
8654 
8655 	  output_asm_insn ("bv %%r0(%%r22)", xoperands);
8656 
8657 	  if (val_14)
8658 	    {
8659 	      output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8660 	      nbytes += 20;
8661 	    }
8662 	  else
8663 	    {
8664 	      output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8665 	      nbytes += 24;
8666 	    }
8667 	}
8668       else
8669 	{
8670 	  if (!val_14)
8671 	    output_asm_insn ("addil L'%2,%%r26", xoperands);
8672 
8673 	  output_asm_insn ("ldil L'%0,%%r22", xoperands);
8674 	  output_asm_insn ("be R'%0(%%sr4,%%r22)", xoperands);
8675 
8676 	  if (val_14)
8677 	    {
8678 	      output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8679 	      nbytes += 12;
8680 	    }
8681 	  else
8682 	    {
8683 	      output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8684 	      nbytes += 16;
8685 	    }
8686 	}
8687     }
8688   else
8689     {
8690       rtx xop[4];
8691 
8692       /* Add DELTA to THIS.  */
8693       if (val_14)
8694 	{
8695 	  output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8696 	  nbytes += 4;
8697 	}
8698       else
8699 	{
8700 	  output_asm_insn ("addil L'%2,%%r26", xoperands);
8701 	  output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8702 	  nbytes += 8;
8703 	}
8704 
8705       if (TARGET_64BIT)
8706 	{
8707 	  /* Load *(THIS + DELTA) to %r1.  */
8708 	  output_asm_insn ("ldd 0(%%r26),%%r1", xoperands);
8709 
8710 	  val_14 = VAL_14_BITS_P (vcall_offset);
8711 	  xoperands[2] = GEN_INT (vcall_offset);
8712 
8713 	  /* Load  *(*(THIS + DELTA) + VCALL_OFFSET) to %r1.  */
8714 	  if (val_14)
8715 	    {
8716 	      output_asm_insn ("ldd %2(%%r1),%%r1", xoperands);
8717 	      nbytes += 8;
8718 	    }
8719 	  else
8720 	    {
8721 	      output_asm_insn ("addil L'%2,%%r1", xoperands);
8722 	      output_asm_insn ("ldd R'%2(%%r1),%%r1", xoperands);
8723 	      nbytes += 12;
8724 	    }
8725 	}
8726       else
8727 	{
8728 	  /* Load *(THIS + DELTA) to %r1.  */
8729 	  output_asm_insn ("ldw 0(%%r26),%%r1", xoperands);
8730 
8731 	  val_14 = VAL_14_BITS_P (vcall_offset);
8732 	  xoperands[2] = GEN_INT (vcall_offset);
8733 
8734 	  /* Load  *(*(THIS + DELTA) + VCALL_OFFSET) to %r1.  */
8735 	  if (val_14)
8736 	    {
8737 	      output_asm_insn ("ldw %2(%%r1),%%r1", xoperands);
8738 	      nbytes += 8;
8739 	    }
8740 	  else
8741 	    {
8742 	      output_asm_insn ("addil L'%2,%%r1", xoperands);
8743 	      output_asm_insn ("ldw R'%2(%%r1),%%r1", xoperands);
8744 	      nbytes += 12;
8745 	    }
8746 	}
8747 
8748       /* Branch to FUNCTION and add %r1 to THIS in delay slot if possible.  */
8749       if ((!TARGET_LONG_CALLS && TARGET_SOM && !TARGET_PORTABLE_RUNTIME
8750 	   && !(flag_pic && TREE_PUBLIC (function))
8751 	   && (TARGET_GAS || last_address < 262132))
8752 	  || (!TARGET_LONG_CALLS && !TARGET_SOM && !TARGET_PORTABLE_RUNTIME
8753 	      && ((targetm_common.have_named_sections
8754 		   && DECL_SECTION_NAME (thunk_fndecl) != NULL
8755 		   /* The GNU 64-bit linker has rather poor stub management.
8756 		      So, we use a long branch from thunks that aren't in
8757 		      the same section as the target function.  */
8758 		    && ((!TARGET_64BIT
8759 			 && (DECL_SECTION_NAME (thunk_fndecl)
8760 			     != DECL_SECTION_NAME (function)))
8761 			|| ((DECL_SECTION_NAME (thunk_fndecl)
8762 			     == DECL_SECTION_NAME (function))
8763 			    && last_address < 262132)))
8764 		  /* In this case, we need to be able to reach the start of
8765 		     the stub table even though the function is likely closer
8766 		     and can be jumped to directly.  */
8767 		  || (targetm_common.have_named_sections
8768 		      && DECL_SECTION_NAME (thunk_fndecl) == NULL
8769 		      && DECL_SECTION_NAME (function) == NULL
8770 		      && total_code_bytes < MAX_PCREL17F_OFFSET)
8771 		  /* Likewise.  */
8772 		  || (!targetm_common.have_named_sections
8773 		      && total_code_bytes < MAX_PCREL17F_OFFSET))))
8774 	{
8775 	  nbytes += 4;
8776 	  output_asm_insn ("b %0", xoperands);
8777 
8778 	  /* Add *(*(THIS + DELTA) + VCALL_OFFSET) to THIS.  */
8779 	  output_asm_insn ("addl %%r1,%%r26,%%r26", xoperands);
8780 	}
8781       else if (TARGET_64BIT)
8782 	{
8783 	  /* Add *(*(THIS + DELTA) + VCALL_OFFSET) to THIS.  */
8784 	  output_asm_insn ("addl %%r1,%%r26,%%r26", xoperands);
8785 
8786 	  /* Load function address into %r1.  */
8787 	  nbytes += 16;
8788 	  xop[0] = xoperands[0];
8789 	  xop[1] = gen_rtx_REG (Pmode, 1);
8790 	  xop[2] = xop[1];
8791 	  pa_output_pic_pcrel_sequence (xop);
8792 
8793 	  output_asm_insn ("bv,n %%r0(%%r1)", xoperands);
8794 	}
8795       else if (TARGET_PORTABLE_RUNTIME)
8796 	{
8797 	  /* Load function address into %r22.  */
8798 	  nbytes += 12;
8799 	  output_asm_insn ("ldil L'%0,%%r22", xoperands);
8800 	  output_asm_insn ("ldo R'%0(%%r22),%%r22", xoperands);
8801 
8802 	  output_asm_insn ("bv %%r0(%%r22)", xoperands);
8803 
8804 	  /* Add *(*(THIS + DELTA) + VCALL_OFFSET) to THIS.  */
8805 	  output_asm_insn ("addl %%r1,%%r26,%%r26", xoperands);
8806 	}
8807       else if (TARGET_SOM && flag_pic && TREE_PUBLIC (function))
8808 	{
8809 	  /* Add *(*(THIS + DELTA) + VCALL_OFFSET) to THIS.  */
8810 	  output_asm_insn ("addl %%r1,%%r26,%%r26", xoperands);
8811 
8812 	  /* The function is accessible from outside this module.  The only
8813 	     way to avoid an import stub between the thunk and function is to
8814 	     call the function directly with an indirect sequence similar to
8815 	     that used by $$dyncall.  This is possible because $$dyncall acts
8816 	     as the import stub in an indirect call.  */
8817 	  ASM_GENERATE_INTERNAL_LABEL (label, "LTHN", current_thunk_number);
8818 	  xoperands[3] = gen_rtx_SYMBOL_REF (Pmode, label);
8819 	  output_asm_insn ("addil LT'%3,%%r19", xoperands);
8820 	  output_asm_insn ("ldw RT'%3(%%r1),%%r22", xoperands);
8821 	  output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands);
8822 	  output_asm_insn ("bb,>=,n %%r22,30,.+16", xoperands);
8823 	  output_asm_insn ("depi 0,31,2,%%r22", xoperands);
8824 	  output_asm_insn ("ldw 4(%%sr0,%%r22),%%r19", xoperands);
8825 	  output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands);
8826 
8827 	  if (TARGET_PA_20)
8828 	    {
8829 	      output_asm_insn ("bve,n (%%r22)", xoperands);
8830 	      nbytes += 32;
8831 	    }
8832 	  else if (TARGET_NO_SPACE_REGS)
8833 	    {
8834 	      output_asm_insn ("be,n 0(%%sr4,%%r22)", xoperands);
8835 	      nbytes += 32;
8836 	    }
8837 	  else
8838 	    {
8839 	      output_asm_insn ("ldsid (%%sr0,%%r22),%%r21", xoperands);
8840 	      output_asm_insn ("mtsp %%r21,%%sr0", xoperands);
8841 	      output_asm_insn ("be,n 0(%%sr0,%%r22)", xoperands);
8842 	      nbytes += 40;
8843 	    }
8844 	}
8845       else if (flag_pic)
8846 	{
8847 	  /* Add *(*(THIS + DELTA) + VCALL_OFFSET) to THIS.  */
8848 	  output_asm_insn ("addl %%r1,%%r26,%%r26", xoperands);
8849 
8850 	  /* Load function address into %r1.  */
8851 	  nbytes += 16;
8852 	  xop[0] = xoperands[0];
8853 	  xop[1] = gen_rtx_REG (Pmode, 1);
8854 	  xop[2] = xop[1];
8855 	  pa_output_pic_pcrel_sequence (xop);
8856 
8857 	  output_asm_insn ("bv,n %%r0(%%r1)", xoperands);
8858 	}
8859       else
8860 	{
8861 	  /* Load function address into %r22.  */
8862 	  nbytes += 8;
8863 	  output_asm_insn ("ldil L'%0,%%r22", xoperands);
8864 	  output_asm_insn ("be R'%0(%%sr4,%%r22)", xoperands);
8865 
8866 	  /* Add *(*(THIS + DELTA) + VCALL_OFFSET) to THIS.  */
8867 	  output_asm_insn ("addl %%r1,%%r26,%%r26", xoperands);
8868 	}
8869     }
8870 
8871   final_end_function ();
8872 
8873   if (TARGET_SOM && flag_pic && TREE_PUBLIC (function))
8874     {
8875       switch_to_section (data_section);
8876       output_asm_insn (".align 4", xoperands);
8877       ASM_OUTPUT_LABEL (file, label);
8878       output_asm_insn (".word P'%0", xoperands);
8879     }
8880 
8881   current_thunk_number++;
8882   nbytes = ((nbytes + FUNCTION_BOUNDARY / BITS_PER_UNIT - 1)
8883 	    & ~(FUNCTION_BOUNDARY / BITS_PER_UNIT - 1));
8884   last_address += nbytes;
8885   if (old_last_address > last_address)
8886     last_address = UINT_MAX;
8887   update_total_code_bytes (nbytes);
8888   assemble_end_function (thunk_fndecl, fnname);
8889 }
8890 
8891 /* Only direct calls to static functions are allowed to be sibling (tail)
8892    call optimized.
8893 
8894    This restriction is necessary because some linker generated stubs will
8895    store return pointers into rp' in some cases which might clobber a
8896    live value already in rp'.
8897 
8898    In a sibcall the current function and the target function share stack
8899    space.  Thus if the path to the current function and the path to the
8900    target function save a value in rp', they save the value into the
8901    same stack slot, which has undesirable consequences.
8902 
8903    Because of the deferred binding nature of shared libraries any function
8904    with external scope could be in a different load module and thus require
8905    rp' to be saved when calling that function.  So sibcall optimizations
8906    can only be safe for static function.
8907 
8908    Note that GCC never needs return value relocations, so we don't have to
8909    worry about static calls with return value relocations (which require
8910    saving rp').
8911 
8912    It is safe to perform a sibcall optimization when the target function
8913    will never return.  */
8914 static bool
pa_function_ok_for_sibcall(tree decl,tree exp ATTRIBUTE_UNUSED)8915 pa_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
8916 {
8917   /* Sibcalls are not ok because the arg pointer register is not a fixed
8918      register.  This prevents the sibcall optimization from occurring.  In
8919      addition, there are problems with stub placement using GNU ld.  This
8920      is because a normal sibcall branch uses a 17-bit relocation while
8921      a regular call branch uses a 22-bit relocation.  As a result, more
8922      care needs to be taken in the placement of long-branch stubs.  */
8923   if (TARGET_64BIT)
8924     return false;
8925 
8926   if (TARGET_PORTABLE_RUNTIME)
8927     return false;
8928 
8929   /* Sibcalls are only ok within a translation unit.  */
8930   return decl && targetm.binds_local_p (decl);
8931 }
8932 
8933 /* ??? Addition is not commutative on the PA due to the weird implicit
8934    space register selection rules for memory addresses.  Therefore, we
8935    don't consider a + b == b + a, as this might be inside a MEM.  */
8936 static bool
pa_commutative_p(const_rtx x,int outer_code)8937 pa_commutative_p (const_rtx x, int outer_code)
8938 {
8939   return (COMMUTATIVE_P (x)
8940 	  && (TARGET_NO_SPACE_REGS
8941 	      || (outer_code != UNKNOWN && outer_code != MEM)
8942 	      || GET_CODE (x) != PLUS));
8943 }
8944 
8945 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
8946    use in fmpyadd instructions.  */
8947 int
pa_fmpyaddoperands(rtx * operands)8948 pa_fmpyaddoperands (rtx *operands)
8949 {
8950   machine_mode mode = GET_MODE (operands[0]);
8951 
8952   /* Must be a floating point mode.  */
8953   if (mode != SFmode && mode != DFmode)
8954     return 0;
8955 
8956   /* All modes must be the same.  */
8957   if (! (mode == GET_MODE (operands[1])
8958 	 && mode == GET_MODE (operands[2])
8959 	 && mode == GET_MODE (operands[3])
8960 	 && mode == GET_MODE (operands[4])
8961 	 && mode == GET_MODE (operands[5])))
8962     return 0;
8963 
8964   /* All operands must be registers.  */
8965   if (! (GET_CODE (operands[1]) == REG
8966 	 && GET_CODE (operands[2]) == REG
8967 	 && GET_CODE (operands[3]) == REG
8968 	 && GET_CODE (operands[4]) == REG
8969 	 && GET_CODE (operands[5]) == REG))
8970     return 0;
8971 
8972   /* Only 2 real operands to the addition.  One of the input operands must
8973      be the same as the output operand.  */
8974   if (! rtx_equal_p (operands[3], operands[4])
8975       && ! rtx_equal_p (operands[3], operands[5]))
8976     return 0;
8977 
8978   /* Inout operand of add cannot conflict with any operands from multiply.  */
8979   if (rtx_equal_p (operands[3], operands[0])
8980      || rtx_equal_p (operands[3], operands[1])
8981      || rtx_equal_p (operands[3], operands[2]))
8982     return 0;
8983 
8984   /* multiply cannot feed into addition operands.  */
8985   if (rtx_equal_p (operands[4], operands[0])
8986       || rtx_equal_p (operands[5], operands[0]))
8987     return 0;
8988 
8989   /* SFmode limits the registers to the upper 32 of the 32bit FP regs.  */
8990   if (mode == SFmode
8991       && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS
8992 	  || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS
8993 	  || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS
8994 	  || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS
8995 	  || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS
8996 	  || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS))
8997     return 0;
8998 
8999   /* Passed.  Operands are suitable for fmpyadd.  */
9000   return 1;
9001 }
9002 
9003 #if !defined(USE_COLLECT2)
9004 static void
pa_asm_out_constructor(rtx symbol,int priority)9005 pa_asm_out_constructor (rtx symbol, int priority)
9006 {
9007   if (!function_label_operand (symbol, VOIDmode))
9008     pa_encode_label (symbol);
9009 
9010 #ifdef CTORS_SECTION_ASM_OP
9011   default_ctor_section_asm_out_constructor (symbol, priority);
9012 #else
9013 # ifdef TARGET_ASM_NAMED_SECTION
9014   default_named_section_asm_out_constructor (symbol, priority);
9015 # else
9016   default_stabs_asm_out_constructor (symbol, priority);
9017 # endif
9018 #endif
9019 }
9020 
9021 static void
pa_asm_out_destructor(rtx symbol,int priority)9022 pa_asm_out_destructor (rtx symbol, int priority)
9023 {
9024   if (!function_label_operand (symbol, VOIDmode))
9025     pa_encode_label (symbol);
9026 
9027 #ifdef DTORS_SECTION_ASM_OP
9028   default_dtor_section_asm_out_destructor (symbol, priority);
9029 #else
9030 # ifdef TARGET_ASM_NAMED_SECTION
9031   default_named_section_asm_out_destructor (symbol, priority);
9032 # else
9033   default_stabs_asm_out_destructor (symbol, priority);
9034 # endif
9035 #endif
9036 }
9037 #endif
9038 
9039 /* This function places uninitialized global data in the bss section.
9040    The ASM_OUTPUT_ALIGNED_BSS macro needs to be defined to call this
9041    function on the SOM port to prevent uninitialized global data from
9042    being placed in the data section.  */
9043 
9044 void
pa_asm_output_aligned_bss(FILE * stream,const char * name,unsigned HOST_WIDE_INT size,unsigned int align)9045 pa_asm_output_aligned_bss (FILE *stream,
9046 			   const char *name,
9047 			   unsigned HOST_WIDE_INT size,
9048 			   unsigned int align)
9049 {
9050   switch_to_section (bss_section);
9051 
9052 #ifdef ASM_OUTPUT_TYPE_DIRECTIVE
9053   ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "object");
9054 #endif
9055 
9056 #ifdef ASM_OUTPUT_SIZE_DIRECTIVE
9057   ASM_OUTPUT_SIZE_DIRECTIVE (stream, name, size);
9058 #endif
9059 
9060   fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
9061   ASM_OUTPUT_LABEL (stream, name);
9062   fprintf (stream, "\t.block " HOST_WIDE_INT_PRINT_UNSIGNED"\n", size);
9063 }
9064 
9065 /* Both the HP and GNU assemblers under HP-UX provide a .comm directive
9066    that doesn't allow the alignment of global common storage to be directly
9067    specified.  The SOM linker aligns common storage based on the rounded
9068    value of the NUM_BYTES parameter in the .comm directive.  It's not
9069    possible to use the .align directive as it doesn't affect the alignment
9070    of the label associated with a .comm directive.  */
9071 
9072 void
pa_asm_output_aligned_common(FILE * stream,const char * name,unsigned HOST_WIDE_INT size,unsigned int align)9073 pa_asm_output_aligned_common (FILE *stream,
9074 			      const char *name,
9075 			      unsigned HOST_WIDE_INT size,
9076 			      unsigned int align)
9077 {
9078   unsigned int max_common_align;
9079 
9080   max_common_align = TARGET_64BIT ? 128 : (size >= 4096 ? 256 : 64);
9081   if (align > max_common_align)
9082     {
9083       warning (0, "alignment (%u) for %s exceeds maximum alignment "
9084 	       "for global common data.  Using %u",
9085 	       align / BITS_PER_UNIT, name, max_common_align / BITS_PER_UNIT);
9086       align = max_common_align;
9087     }
9088 
9089   switch_to_section (bss_section);
9090 
9091   assemble_name (stream, name);
9092   fprintf (stream, "\t.comm " HOST_WIDE_INT_PRINT_UNSIGNED"\n",
9093            MAX (size, align / BITS_PER_UNIT));
9094 }
9095 
9096 /* We can't use .comm for local common storage as the SOM linker effectively
9097    treats the symbol as universal and uses the same storage for local symbols
9098    with the same name in different object files.  The .block directive
9099    reserves an uninitialized block of storage.  However, it's not common
9100    storage.  Fortunately, GCC never requests common storage with the same
9101    name in any given translation unit.  */
9102 
9103 void
pa_asm_output_aligned_local(FILE * stream,const char * name,unsigned HOST_WIDE_INT size,unsigned int align)9104 pa_asm_output_aligned_local (FILE *stream,
9105 			     const char *name,
9106 			     unsigned HOST_WIDE_INT size,
9107 			     unsigned int align)
9108 {
9109   switch_to_section (bss_section);
9110   fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
9111 
9112 #ifdef LOCAL_ASM_OP
9113   fprintf (stream, "%s", LOCAL_ASM_OP);
9114   assemble_name (stream, name);
9115   fprintf (stream, "\n");
9116 #endif
9117 
9118   ASM_OUTPUT_LABEL (stream, name);
9119   fprintf (stream, "\t.block " HOST_WIDE_INT_PRINT_UNSIGNED"\n", size);
9120 }
9121 
9122 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
9123    use in fmpysub instructions.  */
9124 int
pa_fmpysuboperands(rtx * operands)9125 pa_fmpysuboperands (rtx *operands)
9126 {
9127   machine_mode mode = GET_MODE (operands[0]);
9128 
9129   /* Must be a floating point mode.  */
9130   if (mode != SFmode && mode != DFmode)
9131     return 0;
9132 
9133   /* All modes must be the same.  */
9134   if (! (mode == GET_MODE (operands[1])
9135 	 && mode == GET_MODE (operands[2])
9136 	 && mode == GET_MODE (operands[3])
9137 	 && mode == GET_MODE (operands[4])
9138 	 && mode == GET_MODE (operands[5])))
9139     return 0;
9140 
9141   /* All operands must be registers.  */
9142   if (! (GET_CODE (operands[1]) == REG
9143 	 && GET_CODE (operands[2]) == REG
9144 	 && GET_CODE (operands[3]) == REG
9145 	 && GET_CODE (operands[4]) == REG
9146 	 && GET_CODE (operands[5]) == REG))
9147     return 0;
9148 
9149   /* Only 2 real operands to the subtraction.  Subtraction is not a commutative
9150      operation, so operands[4] must be the same as operand[3].  */
9151   if (! rtx_equal_p (operands[3], operands[4]))
9152     return 0;
9153 
9154   /* multiply cannot feed into subtraction.  */
9155   if (rtx_equal_p (operands[5], operands[0]))
9156     return 0;
9157 
9158   /* Inout operand of sub cannot conflict with any operands from multiply.  */
9159   if (rtx_equal_p (operands[3], operands[0])
9160      || rtx_equal_p (operands[3], operands[1])
9161      || rtx_equal_p (operands[3], operands[2]))
9162     return 0;
9163 
9164   /* SFmode limits the registers to the upper 32 of the 32bit FP regs.  */
9165   if (mode == SFmode
9166       && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS
9167 	  || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS
9168 	  || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS
9169 	  || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS
9170 	  || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS
9171 	  || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS))
9172     return 0;
9173 
9174   /* Passed.  Operands are suitable for fmpysub.  */
9175   return 1;
9176 }
9177 
9178 /* Return 1 if the given constant is 2, 4, or 8.  These are the valid
9179    constants for a MULT embedded inside a memory address.  */
9180 int
pa_mem_shadd_constant_p(int val)9181 pa_mem_shadd_constant_p (int val)
9182 {
9183   if (val == 2 || val == 4 || val == 8)
9184     return 1;
9185   else
9186     return 0;
9187 }
9188 
9189 /* Return 1 if the given constant is 1, 2, or 3.  These are the valid
9190    constants for shadd instructions.  */
9191 int
pa_shadd_constant_p(int val)9192 pa_shadd_constant_p (int val)
9193 {
9194   if (val == 1 || val == 2 || val == 3)
9195     return 1;
9196   else
9197     return 0;
9198 }
9199 
9200 /* Return TRUE if INSN branches forward.  */
9201 
9202 static bool
forward_branch_p(rtx_insn * insn)9203 forward_branch_p (rtx_insn *insn)
9204 {
9205   rtx lab = JUMP_LABEL (insn);
9206 
9207   /* The INSN must have a jump label.  */
9208   gcc_assert (lab != NULL_RTX);
9209 
9210   if (INSN_ADDRESSES_SET_P ())
9211     return INSN_ADDRESSES (INSN_UID (lab)) > INSN_ADDRESSES (INSN_UID (insn));
9212 
9213   while (insn)
9214     {
9215       if (insn == lab)
9216 	return true;
9217       else
9218 	insn = NEXT_INSN (insn);
9219     }
9220 
9221   return false;
9222 }
9223 
9224 /* Output an unconditional move and branch insn.  */
9225 
9226 const char *
pa_output_parallel_movb(rtx * operands,rtx_insn * insn)9227 pa_output_parallel_movb (rtx *operands, rtx_insn *insn)
9228 {
9229   int length = get_attr_length (insn);
9230 
9231   /* These are the cases in which we win.  */
9232   if (length == 4)
9233     return "mov%I1b,tr %1,%0,%2";
9234 
9235   /* None of the following cases win, but they don't lose either.  */
9236   if (length == 8)
9237     {
9238       if (dbr_sequence_length () == 0)
9239 	{
9240 	  /* Nothing in the delay slot, fake it by putting the combined
9241 	     insn (the copy or add) in the delay slot of a bl.  */
9242 	  if (GET_CODE (operands[1]) == CONST_INT)
9243 	    return "b %2\n\tldi %1,%0";
9244 	  else
9245 	    return "b %2\n\tcopy %1,%0";
9246 	}
9247       else
9248 	{
9249 	  /* Something in the delay slot, but we've got a long branch.  */
9250 	  if (GET_CODE (operands[1]) == CONST_INT)
9251 	    return "ldi %1,%0\n\tb %2";
9252 	  else
9253 	    return "copy %1,%0\n\tb %2";
9254 	}
9255     }
9256 
9257   if (GET_CODE (operands[1]) == CONST_INT)
9258     output_asm_insn ("ldi %1,%0", operands);
9259   else
9260     output_asm_insn ("copy %1,%0", operands);
9261   return pa_output_lbranch (operands[2], insn, 1);
9262 }
9263 
9264 /* Output an unconditional add and branch insn.  */
9265 
9266 const char *
pa_output_parallel_addb(rtx * operands,rtx_insn * insn)9267 pa_output_parallel_addb (rtx *operands, rtx_insn *insn)
9268 {
9269   int length = get_attr_length (insn);
9270 
9271   /* To make life easy we want operand0 to be the shared input/output
9272      operand and operand1 to be the readonly operand.  */
9273   if (operands[0] == operands[1])
9274     operands[1] = operands[2];
9275 
9276   /* These are the cases in which we win.  */
9277   if (length == 4)
9278     return "add%I1b,tr %1,%0,%3";
9279 
9280   /* None of the following cases win, but they don't lose either.  */
9281   if (length == 8)
9282     {
9283       if (dbr_sequence_length () == 0)
9284 	/* Nothing in the delay slot, fake it by putting the combined
9285 	   insn (the copy or add) in the delay slot of a bl.  */
9286 	return "b %3\n\tadd%I1 %1,%0,%0";
9287       else
9288 	/* Something in the delay slot, but we've got a long branch.  */
9289 	return "add%I1 %1,%0,%0\n\tb %3";
9290     }
9291 
9292   output_asm_insn ("add%I1 %1,%0,%0", operands);
9293   return pa_output_lbranch (operands[3], insn, 1);
9294 }
9295 
9296 /* We use this hook to perform a PA specific optimization which is difficult
9297    to do in earlier passes.  */
9298 
9299 static void
pa_reorg(void)9300 pa_reorg (void)
9301 {
9302   remove_useless_addtr_insns (1);
9303 
9304   if (pa_cpu < PROCESSOR_8000)
9305     pa_combine_instructions ();
9306 }
9307 
9308 /* The PA has a number of odd instructions which can perform multiple
9309    tasks at once.  On first generation PA machines (PA1.0 and PA1.1)
9310    it may be profitable to combine two instructions into one instruction
9311    with two outputs.  It's not profitable PA2.0 machines because the
9312    two outputs would take two slots in the reorder buffers.
9313 
9314    This routine finds instructions which can be combined and combines
9315    them.  We only support some of the potential combinations, and we
9316    only try common ways to find suitable instructions.
9317 
9318       * addb can add two registers or a register and a small integer
9319       and jump to a nearby (+-8k) location.  Normally the jump to the
9320       nearby location is conditional on the result of the add, but by
9321       using the "true" condition we can make the jump unconditional.
9322       Thus addb can perform two independent operations in one insn.
9323 
9324       * movb is similar to addb in that it can perform a reg->reg
9325       or small immediate->reg copy and jump to a nearby (+-8k location).
9326 
9327       * fmpyadd and fmpysub can perform a FP multiply and either an
9328       FP add or FP sub if the operands of the multiply and add/sub are
9329       independent (there are other minor restrictions).  Note both
9330       the fmpy and fadd/fsub can in theory move to better spots according
9331       to data dependencies, but for now we require the fmpy stay at a
9332       fixed location.
9333 
9334       * Many of the memory operations can perform pre & post updates
9335       of index registers.  GCC's pre/post increment/decrement addressing
9336       is far too simple to take advantage of all the possibilities.  This
9337       pass may not be suitable since those insns may not be independent.
9338 
9339       * comclr can compare two ints or an int and a register, nullify
9340       the following instruction and zero some other register.  This
9341       is more difficult to use as it's harder to find an insn which
9342       will generate a comclr than finding something like an unconditional
9343       branch.  (conditional moves & long branches create comclr insns).
9344 
9345       * Most arithmetic operations can conditionally skip the next
9346       instruction.  They can be viewed as "perform this operation
9347       and conditionally jump to this nearby location" (where nearby
9348       is an insns away).  These are difficult to use due to the
9349       branch length restrictions.  */
9350 
9351 static void
pa_combine_instructions(void)9352 pa_combine_instructions (void)
9353 {
9354   rtx_insn *anchor;
9355 
9356   /* This can get expensive since the basic algorithm is on the
9357      order of O(n^2) (or worse).  Only do it for -O2 or higher
9358      levels of optimization.  */
9359   if (optimize < 2)
9360     return;
9361 
9362   /* Walk down the list of insns looking for "anchor" insns which
9363      may be combined with "floating" insns.  As the name implies,
9364      "anchor" instructions don't move, while "floating" insns may
9365      move around.  */
9366   rtx par = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, NULL_RTX, NULL_RTX));
9367   rtx_insn *new_rtx = make_insn_raw (par);
9368 
9369   for (anchor = get_insns (); anchor; anchor = NEXT_INSN (anchor))
9370     {
9371       enum attr_pa_combine_type anchor_attr;
9372       enum attr_pa_combine_type floater_attr;
9373 
9374       /* We only care about INSNs, JUMP_INSNs, and CALL_INSNs.
9375 	 Also ignore any special USE insns.  */
9376       if ((! NONJUMP_INSN_P (anchor) && ! JUMP_P (anchor) && ! CALL_P (anchor))
9377 	  || GET_CODE (PATTERN (anchor)) == USE
9378 	  || GET_CODE (PATTERN (anchor)) == CLOBBER)
9379 	continue;
9380 
9381       anchor_attr = get_attr_pa_combine_type (anchor);
9382       /* See if anchor is an insn suitable for combination.  */
9383       if (anchor_attr == PA_COMBINE_TYPE_FMPY
9384 	  || anchor_attr == PA_COMBINE_TYPE_FADDSUB
9385 	  || (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
9386 	      && ! forward_branch_p (anchor)))
9387 	{
9388 	  rtx_insn *floater;
9389 
9390 	  for (floater = PREV_INSN (anchor);
9391 	       floater;
9392 	       floater = PREV_INSN (floater))
9393 	    {
9394 	      if (NOTE_P (floater)
9395 		  || (NONJUMP_INSN_P (floater)
9396 		      && (GET_CODE (PATTERN (floater)) == USE
9397 			  || GET_CODE (PATTERN (floater)) == CLOBBER)))
9398 		continue;
9399 
9400 	      /* Anything except a regular INSN will stop our search.  */
9401 	      if (! NONJUMP_INSN_P (floater))
9402 		{
9403 		  floater = NULL;
9404 		  break;
9405 		}
9406 
9407 	      /* See if FLOATER is suitable for combination with the
9408 		 anchor.  */
9409 	      floater_attr = get_attr_pa_combine_type (floater);
9410 	      if ((anchor_attr == PA_COMBINE_TYPE_FMPY
9411 		   && floater_attr == PA_COMBINE_TYPE_FADDSUB)
9412 		  || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
9413 		      && floater_attr == PA_COMBINE_TYPE_FMPY))
9414 		{
9415 		  /* If ANCHOR and FLOATER can be combined, then we're
9416 		     done with this pass.  */
9417 		  if (pa_can_combine_p (new_rtx, anchor, floater, 0,
9418 					SET_DEST (PATTERN (floater)),
9419 					XEXP (SET_SRC (PATTERN (floater)), 0),
9420 					XEXP (SET_SRC (PATTERN (floater)), 1)))
9421 		    break;
9422 		}
9423 
9424 	      else if (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
9425 		       && floater_attr == PA_COMBINE_TYPE_ADDMOVE)
9426 		{
9427 		  if (GET_CODE (SET_SRC (PATTERN (floater))) == PLUS)
9428 		    {
9429 		      if (pa_can_combine_p (new_rtx, anchor, floater, 0,
9430 					    SET_DEST (PATTERN (floater)),
9431 					XEXP (SET_SRC (PATTERN (floater)), 0),
9432 					XEXP (SET_SRC (PATTERN (floater)), 1)))
9433 			break;
9434 		    }
9435 		  else
9436 		    {
9437 		      if (pa_can_combine_p (new_rtx, anchor, floater, 0,
9438 					    SET_DEST (PATTERN (floater)),
9439 					    SET_SRC (PATTERN (floater)),
9440 					    SET_SRC (PATTERN (floater))))
9441 			break;
9442 		    }
9443 		}
9444 	    }
9445 
9446 	  /* If we didn't find anything on the backwards scan try forwards.  */
9447 	  if (!floater
9448 	      && (anchor_attr == PA_COMBINE_TYPE_FMPY
9449 		  || anchor_attr == PA_COMBINE_TYPE_FADDSUB))
9450 	    {
9451 	      for (floater = anchor; floater; floater = NEXT_INSN (floater))
9452 		{
9453 		  if (NOTE_P (floater)
9454 		      || (NONJUMP_INSN_P (floater)
9455 			  && (GET_CODE (PATTERN (floater)) == USE
9456 			      || GET_CODE (PATTERN (floater)) == CLOBBER)))
9457 
9458 		    continue;
9459 
9460 		  /* Anything except a regular INSN will stop our search.  */
9461 		  if (! NONJUMP_INSN_P (floater))
9462 		    {
9463 		      floater = NULL;
9464 		      break;
9465 		    }
9466 
9467 		  /* See if FLOATER is suitable for combination with the
9468 		     anchor.  */
9469 		  floater_attr = get_attr_pa_combine_type (floater);
9470 		  if ((anchor_attr == PA_COMBINE_TYPE_FMPY
9471 		       && floater_attr == PA_COMBINE_TYPE_FADDSUB)
9472 		      || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
9473 			  && floater_attr == PA_COMBINE_TYPE_FMPY))
9474 		    {
9475 		      /* If ANCHOR and FLOATER can be combined, then we're
9476 			 done with this pass.  */
9477 		      if (pa_can_combine_p (new_rtx, anchor, floater, 1,
9478 					    SET_DEST (PATTERN (floater)),
9479 					    XEXP (SET_SRC (PATTERN (floater)),
9480 						  0),
9481 					    XEXP (SET_SRC (PATTERN (floater)),
9482 						  1)))
9483 			break;
9484 		    }
9485 		}
9486 	    }
9487 
9488 	  /* FLOATER will be nonzero if we found a suitable floating
9489 	     insn for combination with ANCHOR.  */
9490 	  if (floater
9491 	      && (anchor_attr == PA_COMBINE_TYPE_FADDSUB
9492 		  || anchor_attr == PA_COMBINE_TYPE_FMPY))
9493 	    {
9494 	      /* Emit the new instruction and delete the old anchor.  */
9495 	      rtvec vtemp = gen_rtvec (2, copy_rtx (PATTERN (anchor)),
9496 				       copy_rtx (PATTERN (floater)));
9497 	      rtx temp = gen_rtx_PARALLEL (VOIDmode, vtemp);
9498 	      emit_insn_before (temp, anchor);
9499 
9500 	      SET_INSN_DELETED (anchor);
9501 
9502 	      /* Emit a special USE insn for FLOATER, then delete
9503 		 the floating insn.  */
9504 	      temp = copy_rtx (PATTERN (floater));
9505 	      emit_insn_before (gen_rtx_USE (VOIDmode, temp), floater);
9506 	      delete_insn (floater);
9507 
9508 	      continue;
9509 	    }
9510 	  else if (floater
9511 		   && anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH)
9512 	    {
9513 	      /* Emit the new_jump instruction and delete the old anchor.  */
9514 	      rtvec vtemp = gen_rtvec (2, copy_rtx (PATTERN (anchor)),
9515 				       copy_rtx (PATTERN (floater)));
9516 	      rtx temp = gen_rtx_PARALLEL (VOIDmode, vtemp);
9517 	      temp = emit_jump_insn_before (temp, anchor);
9518 
9519 	      JUMP_LABEL (temp) = JUMP_LABEL (anchor);
9520 	      SET_INSN_DELETED (anchor);
9521 
9522 	      /* Emit a special USE insn for FLOATER, then delete
9523 		 the floating insn.  */
9524 	      temp = copy_rtx (PATTERN (floater));
9525 	      emit_insn_before (gen_rtx_USE (VOIDmode, temp), floater);
9526 	      delete_insn (floater);
9527 	      continue;
9528 	    }
9529 	}
9530     }
9531 }
9532 
9533 static int
pa_can_combine_p(rtx_insn * new_rtx,rtx_insn * anchor,rtx_insn * floater,int reversed,rtx dest,rtx src1,rtx src2)9534 pa_can_combine_p (rtx_insn *new_rtx, rtx_insn *anchor, rtx_insn *floater,
9535 		  int reversed, rtx dest,
9536 		  rtx src1, rtx src2)
9537 {
9538   int insn_code_number;
9539   rtx_insn *start, *end;
9540 
9541   /* Create a PARALLEL with the patterns of ANCHOR and
9542      FLOATER, try to recognize it, then test constraints
9543      for the resulting pattern.
9544 
9545      If the pattern doesn't match or the constraints
9546      aren't met keep searching for a suitable floater
9547      insn.  */
9548   XVECEXP (PATTERN (new_rtx), 0, 0) = PATTERN (anchor);
9549   XVECEXP (PATTERN (new_rtx), 0, 1) = PATTERN (floater);
9550   INSN_CODE (new_rtx) = -1;
9551   insn_code_number = recog_memoized (new_rtx);
9552   basic_block bb = BLOCK_FOR_INSN (anchor);
9553   if (insn_code_number < 0
9554       || (extract_insn (new_rtx),
9555 	  !constrain_operands (1, get_preferred_alternatives (new_rtx, bb))))
9556     return 0;
9557 
9558   if (reversed)
9559     {
9560       start = anchor;
9561       end = floater;
9562     }
9563   else
9564     {
9565       start = floater;
9566       end = anchor;
9567     }
9568 
9569   /* There's up to three operands to consider.  One
9570      output and two inputs.
9571 
9572      The output must not be used between FLOATER & ANCHOR
9573      exclusive.  The inputs must not be set between
9574      FLOATER and ANCHOR exclusive.  */
9575 
9576   if (reg_used_between_p (dest, start, end))
9577     return 0;
9578 
9579   if (reg_set_between_p (src1, start, end))
9580     return 0;
9581 
9582   if (reg_set_between_p (src2, start, end))
9583     return 0;
9584 
9585   /* If we get here, then everything is good.  */
9586   return 1;
9587 }
9588 
9589 /* Return nonzero if references for INSN are delayed.
9590 
9591    Millicode insns are actually function calls with some special
9592    constraints on arguments and register usage.
9593 
9594    Millicode calls always expect their arguments in the integer argument
9595    registers, and always return their result in %r29 (ret1).  They
9596    are expected to clobber their arguments, %r1, %r29, and the return
9597    pointer which is %r31 on 32-bit and %r2 on 64-bit, and nothing else.
9598 
9599    This function tells reorg that the references to arguments and
9600    millicode calls do not appear to happen until after the millicode call.
9601    This allows reorg to put insns which set the argument registers into the
9602    delay slot of the millicode call -- thus they act more like traditional
9603    CALL_INSNs.
9604 
9605    Note we cannot consider side effects of the insn to be delayed because
9606    the branch and link insn will clobber the return pointer.  If we happened
9607    to use the return pointer in the delay slot of the call, then we lose.
9608 
9609    get_attr_type will try to recognize the given insn, so make sure to
9610    filter out things it will not accept -- SEQUENCE, USE and CLOBBER insns
9611    in particular.  */
9612 int
pa_insn_refs_are_delayed(rtx_insn * insn)9613 pa_insn_refs_are_delayed (rtx_insn *insn)
9614 {
9615   return ((NONJUMP_INSN_P (insn)
9616 	   && GET_CODE (PATTERN (insn)) != SEQUENCE
9617 	   && GET_CODE (PATTERN (insn)) != USE
9618 	   && GET_CODE (PATTERN (insn)) != CLOBBER
9619 	   && get_attr_type (insn) == TYPE_MILLI));
9620 }
9621 
9622 /* Promote the return value, but not the arguments.  */
9623 
9624 static machine_mode
pa_promote_function_mode(const_tree type ATTRIBUTE_UNUSED,machine_mode mode,int * punsignedp ATTRIBUTE_UNUSED,const_tree fntype ATTRIBUTE_UNUSED,int for_return)9625 pa_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
9626                           machine_mode mode,
9627                           int *punsignedp ATTRIBUTE_UNUSED,
9628                           const_tree fntype ATTRIBUTE_UNUSED,
9629                           int for_return)
9630 {
9631   if (for_return == 0)
9632     return mode;
9633   return promote_mode (type, mode, punsignedp);
9634 }
9635 
9636 /* On the HP-PA the value is found in register(s) 28(-29), unless
9637    the mode is SF or DF. Then the value is returned in fr4 (32).
9638 
9639    This must perform the same promotions as PROMOTE_MODE, else promoting
9640    return values in TARGET_PROMOTE_FUNCTION_MODE will not work correctly.
9641 
9642    Small structures must be returned in a PARALLEL on PA64 in order
9643    to match the HP Compiler ABI.  */
9644 
9645 static rtx
pa_function_value(const_tree valtype,const_tree func ATTRIBUTE_UNUSED,bool outgoing ATTRIBUTE_UNUSED)9646 pa_function_value (const_tree valtype,
9647                    const_tree func ATTRIBUTE_UNUSED,
9648                    bool outgoing ATTRIBUTE_UNUSED)
9649 {
9650   machine_mode valmode;
9651 
9652   if (AGGREGATE_TYPE_P (valtype)
9653       || TREE_CODE (valtype) == COMPLEX_TYPE
9654       || TREE_CODE (valtype) == VECTOR_TYPE)
9655     {
9656       HOST_WIDE_INT valsize = int_size_in_bytes (valtype);
9657 
9658       /* Handle aggregates that fit exactly in a word or double word.  */
9659       if (valsize == UNITS_PER_WORD || valsize == 2 * UNITS_PER_WORD)
9660 	return gen_rtx_REG (TYPE_MODE (valtype), 28);
9661 
9662       if (TARGET_64BIT)
9663 	{
9664           /* Aggregates with a size less than or equal to 128 bits are
9665 	     returned in GR 28(-29).  They are left justified.  The pad
9666 	     bits are undefined.  Larger aggregates are returned in
9667 	     memory.  */
9668 	  rtx loc[2];
9669 	  int i, offset = 0;
9670 	  int ub = valsize <= UNITS_PER_WORD ? 1 : 2;
9671 
9672 	  for (i = 0; i < ub; i++)
9673 	    {
9674 	      loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
9675 					  gen_rtx_REG (DImode, 28 + i),
9676 					  GEN_INT (offset));
9677 	      offset += 8;
9678 	    }
9679 
9680 	  return gen_rtx_PARALLEL (BLKmode, gen_rtvec_v (ub, loc));
9681 	}
9682       else if (valsize > UNITS_PER_WORD)
9683 	{
9684 	  /* Aggregates 5 to 8 bytes in size are returned in general
9685 	     registers r28-r29 in the same manner as other non
9686 	     floating-point objects.  The data is right-justified and
9687 	     zero-extended to 64 bits.  This is opposite to the normal
9688 	     justification used on big endian targets and requires
9689 	     special treatment.  */
9690 	  rtx loc = gen_rtx_EXPR_LIST (VOIDmode,
9691 				       gen_rtx_REG (DImode, 28), const0_rtx);
9692 	  return gen_rtx_PARALLEL (BLKmode, gen_rtvec (1, loc));
9693 	}
9694     }
9695 
9696   if ((INTEGRAL_TYPE_P (valtype)
9697        && GET_MODE_BITSIZE (TYPE_MODE (valtype)) < BITS_PER_WORD)
9698       || POINTER_TYPE_P (valtype))
9699     valmode = word_mode;
9700   else
9701     valmode = TYPE_MODE (valtype);
9702 
9703   if (TREE_CODE (valtype) == REAL_TYPE
9704       && !AGGREGATE_TYPE_P (valtype)
9705       && TYPE_MODE (valtype) != TFmode
9706       && !TARGET_SOFT_FLOAT)
9707     return gen_rtx_REG (valmode, 32);
9708 
9709   return gen_rtx_REG (valmode, 28);
9710 }
9711 
9712 /* Implement the TARGET_LIBCALL_VALUE hook.  */
9713 
9714 static rtx
pa_libcall_value(machine_mode mode,const_rtx fun ATTRIBUTE_UNUSED)9715 pa_libcall_value (machine_mode mode,
9716 		  const_rtx fun ATTRIBUTE_UNUSED)
9717 {
9718   if (! TARGET_SOFT_FLOAT
9719       && (mode == SFmode || mode == DFmode))
9720     return  gen_rtx_REG (mode, 32);
9721   else
9722     return  gen_rtx_REG (mode, 28);
9723 }
9724 
9725 /* Implement the TARGET_FUNCTION_VALUE_REGNO_P hook.  */
9726 
9727 static bool
pa_function_value_regno_p(const unsigned int regno)9728 pa_function_value_regno_p (const unsigned int regno)
9729 {
9730   if (regno == 28
9731       || (! TARGET_SOFT_FLOAT &&  regno == 32))
9732     return true;
9733 
9734   return false;
9735 }
9736 
9737 /* Update the data in CUM to advance over argument ARG.  */
9738 
9739 static void
pa_function_arg_advance(cumulative_args_t cum_v,const function_arg_info & arg)9740 pa_function_arg_advance (cumulative_args_t cum_v,
9741 			 const function_arg_info &arg)
9742 {
9743   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
9744   int arg_size = pa_function_arg_size (arg.mode, arg.type);
9745 
9746   cum->nargs_prototype--;
9747   cum->words += (arg_size
9748 		 + ((cum->words & 01)
9749 		    && arg.type != NULL_TREE
9750 		    && arg_size > 1));
9751 }
9752 
9753 /* Return the location of a parameter that is passed in a register or NULL
9754    if the parameter has any component that is passed in memory.
9755 
9756    This is new code and will be pushed to into the net sources after
9757    further testing.
9758 
9759    ??? We might want to restructure this so that it looks more like other
9760    ports.  */
9761 static rtx
pa_function_arg(cumulative_args_t cum_v,const function_arg_info & arg)9762 pa_function_arg (cumulative_args_t cum_v, const function_arg_info &arg)
9763 {
9764   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
9765   tree type = arg.type;
9766   machine_mode mode = arg.mode;
9767   int max_arg_words = (TARGET_64BIT ? 8 : 4);
9768   int alignment = 0;
9769   int arg_size;
9770   int fpr_reg_base;
9771   int gpr_reg_base;
9772   rtx retval;
9773 
9774   if (arg.end_marker_p ())
9775     return NULL_RTX;
9776 
9777   arg_size = pa_function_arg_size (mode, type);
9778 
9779   /* If this arg would be passed partially or totally on the stack, then
9780      this routine should return zero.  pa_arg_partial_bytes will
9781      handle arguments which are split between regs and stack slots if
9782      the ABI mandates split arguments.  */
9783   if (!TARGET_64BIT)
9784     {
9785       /* The 32-bit ABI does not split arguments.  */
9786       if (cum->words + arg_size > max_arg_words)
9787 	return NULL_RTX;
9788     }
9789   else
9790     {
9791       if (arg_size > 1)
9792 	alignment = cum->words & 1;
9793       if (cum->words + alignment >= max_arg_words)
9794 	return NULL_RTX;
9795     }
9796 
9797   /* The 32bit ABIs and the 64bit ABIs are rather different,
9798      particularly in their handling of FP registers.  We might
9799      be able to cleverly share code between them, but I'm not
9800      going to bother in the hope that splitting them up results
9801      in code that is more easily understood.  */
9802 
9803   if (TARGET_64BIT)
9804     {
9805       /* Advance the base registers to their current locations.
9806 
9807          Remember, gprs grow towards smaller register numbers while
9808 	 fprs grow to higher register numbers.  Also remember that
9809 	 although FP regs are 32-bit addressable, we pretend that
9810 	 the registers are 64-bits wide.  */
9811       gpr_reg_base = 26 - cum->words;
9812       fpr_reg_base = 32 + cum->words;
9813 
9814       /* Arguments wider than one word and small aggregates need special
9815 	 treatment.  */
9816       if (arg_size > 1
9817 	  || mode == BLKmode
9818 	  || (type && (AGGREGATE_TYPE_P (type)
9819 		       || TREE_CODE (type) == COMPLEX_TYPE
9820 		       || TREE_CODE (type) == VECTOR_TYPE)))
9821 	{
9822 	  /* Double-extended precision (80-bit), quad-precision (128-bit)
9823 	     and aggregates including complex numbers are aligned on
9824 	     128-bit boundaries.  The first eight 64-bit argument slots
9825 	     are associated one-to-one, with general registers r26
9826 	     through r19, and also with floating-point registers fr4
9827 	     through fr11.  Arguments larger than one word are always
9828 	     passed in general registers.
9829 
9830 	     Using a PARALLEL with a word mode register results in left
9831 	     justified data on a big-endian target.  */
9832 
9833 	  rtx loc[8];
9834 	  int i, offset = 0, ub = arg_size;
9835 
9836 	  /* Align the base register.  */
9837 	  gpr_reg_base -= alignment;
9838 
9839 	  ub = MIN (ub, max_arg_words - cum->words - alignment);
9840 	  for (i = 0; i < ub; i++)
9841 	    {
9842 	      loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
9843 					  gen_rtx_REG (DImode, gpr_reg_base),
9844 					  GEN_INT (offset));
9845 	      gpr_reg_base -= 1;
9846 	      offset += 8;
9847 	    }
9848 
9849 	  return gen_rtx_PARALLEL (mode, gen_rtvec_v (ub, loc));
9850 	}
9851      }
9852   else
9853     {
9854       /* If the argument is larger than a word, then we know precisely
9855 	 which registers we must use.  */
9856       if (arg_size > 1)
9857 	{
9858 	  if (cum->words)
9859 	    {
9860 	      gpr_reg_base = 23;
9861 	      fpr_reg_base = 38;
9862 	    }
9863 	  else
9864 	    {
9865 	      gpr_reg_base = 25;
9866 	      fpr_reg_base = 34;
9867 	    }
9868 
9869 	  /* Structures 5 to 8 bytes in size are passed in the general
9870 	     registers in the same manner as other non floating-point
9871 	     objects.  The data is right-justified and zero-extended
9872 	     to 64 bits.  This is opposite to the normal justification
9873 	     used on big endian targets and requires special treatment.
9874 	     We now define BLOCK_REG_PADDING to pad these objects.
9875 	     Aggregates, complex and vector types are passed in the same
9876 	     manner as structures.  */
9877 	  if (mode == BLKmode
9878 	      || (type && (AGGREGATE_TYPE_P (type)
9879 			   || TREE_CODE (type) == COMPLEX_TYPE
9880 			   || TREE_CODE (type) == VECTOR_TYPE)))
9881 	    {
9882 	      rtx loc = gen_rtx_EXPR_LIST (VOIDmode,
9883 					   gen_rtx_REG (DImode, gpr_reg_base),
9884 					   const0_rtx);
9885 	      return gen_rtx_PARALLEL (BLKmode, gen_rtvec (1, loc));
9886 	    }
9887 	}
9888       else
9889         {
9890 	   /* We have a single word (32 bits).  A simple computation
9891 	      will get us the register #s we need.  */
9892 	   gpr_reg_base = 26 - cum->words;
9893 	   fpr_reg_base = 32 + 2 * cum->words;
9894 	}
9895     }
9896 
9897   /* Determine if the argument needs to be passed in both general and
9898      floating point registers.  */
9899   if (((TARGET_PORTABLE_RUNTIME || TARGET_64BIT || TARGET_ELF32)
9900        /* If we are doing soft-float with portable runtime, then there
9901 	  is no need to worry about FP regs.  */
9902        && !TARGET_SOFT_FLOAT
9903        /* The parameter must be some kind of scalar float, else we just
9904 	  pass it in integer registers.  */
9905        && GET_MODE_CLASS (mode) == MODE_FLOAT
9906        /* The target function must not have a prototype.  */
9907        && cum->nargs_prototype <= 0
9908        /* libcalls do not need to pass items in both FP and general
9909 	  registers.  */
9910        && type != NULL_TREE
9911        /* All this hair applies to "outgoing" args only.  This includes
9912 	  sibcall arguments setup with FUNCTION_INCOMING_ARG.  */
9913        && !cum->incoming)
9914       /* Also pass outgoing floating arguments in both registers in indirect
9915 	 calls with the 32 bit ABI and the HP assembler since there is no
9916 	 way to the specify argument locations in static functions.  */
9917       || (!TARGET_64BIT
9918 	  && !TARGET_GAS
9919 	  && !cum->incoming
9920 	  && cum->indirect
9921 	  && GET_MODE_CLASS (mode) == MODE_FLOAT))
9922     {
9923       retval
9924 	= gen_rtx_PARALLEL
9925 	    (mode,
9926 	     gen_rtvec (2,
9927 			gen_rtx_EXPR_LIST (VOIDmode,
9928 					   gen_rtx_REG (mode, fpr_reg_base),
9929 					   const0_rtx),
9930 			gen_rtx_EXPR_LIST (VOIDmode,
9931 					   gen_rtx_REG (mode, gpr_reg_base),
9932 					   const0_rtx)));
9933     }
9934   else
9935     {
9936       /* See if we should pass this parameter in a general register.  */
9937       if (TARGET_SOFT_FLOAT
9938 	  /* Indirect calls in the normal 32bit ABI require all arguments
9939 	     to be passed in general registers.  */
9940 	  || (!TARGET_PORTABLE_RUNTIME
9941 	      && !TARGET_64BIT
9942 	      && !TARGET_ELF32
9943 	      && cum->indirect)
9944 	  /* If the parameter is not a scalar floating-point parameter,
9945 	     then it belongs in GPRs.  */
9946 	  || GET_MODE_CLASS (mode) != MODE_FLOAT
9947 	  /* Structure with single SFmode field belongs in GPR.  */
9948 	  || (type && AGGREGATE_TYPE_P (type)))
9949 	retval = gen_rtx_REG (mode, gpr_reg_base);
9950       else
9951 	retval = gen_rtx_REG (mode, fpr_reg_base);
9952     }
9953   return retval;
9954 }
9955 
9956 /* Arguments larger than one word are double word aligned.  */
9957 
9958 static unsigned int
pa_function_arg_boundary(machine_mode mode,const_tree type)9959 pa_function_arg_boundary (machine_mode mode, const_tree type)
9960 {
9961   bool singleword = (type
9962 		     ? (integer_zerop (TYPE_SIZE (type))
9963 			|| !TREE_CONSTANT (TYPE_SIZE (type))
9964 			|| int_size_in_bytes (type) <= UNITS_PER_WORD)
9965 		     : GET_MODE_SIZE (mode) <= UNITS_PER_WORD);
9966 
9967   return singleword ? PARM_BOUNDARY : MAX_PARM_BOUNDARY;
9968 }
9969 
9970 /* If this arg would be passed totally in registers or totally on the stack,
9971    then this routine should return zero.  */
9972 
9973 static int
pa_arg_partial_bytes(cumulative_args_t cum_v,const function_arg_info & arg)9974 pa_arg_partial_bytes (cumulative_args_t cum_v, const function_arg_info &arg)
9975 {
9976   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
9977   unsigned int max_arg_words = 8;
9978   unsigned int offset = 0;
9979 
9980   if (!TARGET_64BIT)
9981     return 0;
9982 
9983   if (pa_function_arg_size (arg.mode, arg.type) > 1 && (cum->words & 1))
9984     offset = 1;
9985 
9986   if (cum->words + offset + pa_function_arg_size (arg.mode, arg.type)
9987       <= max_arg_words)
9988     /* Arg fits fully into registers.  */
9989     return 0;
9990   else if (cum->words + offset >= max_arg_words)
9991     /* Arg fully on the stack.  */
9992     return 0;
9993   else
9994     /* Arg is split.  */
9995     return (max_arg_words - cum->words - offset) * UNITS_PER_WORD;
9996 }
9997 
9998 
9999 /* A get_unnamed_section callback for switching to the text section.
10000 
10001    This function is only used with SOM.  Because we don't support
10002    named subspaces, we can only create a new subspace or switch back
10003    to the default text subspace.  */
10004 
10005 static void
som_output_text_section_asm_op(const void * data ATTRIBUTE_UNUSED)10006 som_output_text_section_asm_op (const void *data ATTRIBUTE_UNUSED)
10007 {
10008   gcc_assert (TARGET_SOM);
10009   if (TARGET_GAS)
10010     {
10011       if (cfun && cfun->machine && !cfun->machine->in_nsubspa)
10012 	{
10013 	  /* We only want to emit a .nsubspa directive once at the
10014 	     start of the function.  */
10015 	  cfun->machine->in_nsubspa = 1;
10016 
10017 	  /* Create a new subspace for the text.  This provides
10018 	     better stub placement and one-only functions.  */
10019 	  if (cfun->decl
10020 	      && DECL_ONE_ONLY (cfun->decl)
10021 	      && !DECL_WEAK (cfun->decl))
10022 	    {
10023 	      output_section_asm_op ("\t.SPACE $TEXT$\n"
10024 				     "\t.NSUBSPA $CODE$,QUAD=0,ALIGN=8,"
10025 				     "ACCESS=44,SORT=24,COMDAT");
10026 	      return;
10027 	    }
10028 	}
10029       else
10030 	{
10031 	  /* There isn't a current function or the body of the current
10032 	     function has been completed.  So, we are changing to the
10033 	     text section to output debugging information.  Thus, we
10034 	     need to forget that we are in the text section so that
10035 	     varasm.c will call us when text_section is selected again.  */
10036 	  gcc_assert (!cfun || !cfun->machine
10037 		      || cfun->machine->in_nsubspa == 2);
10038 	  in_section = NULL;
10039 	}
10040       output_section_asm_op ("\t.SPACE $TEXT$\n\t.NSUBSPA $CODE$");
10041       return;
10042     }
10043   output_section_asm_op ("\t.SPACE $TEXT$\n\t.SUBSPA $CODE$");
10044 }
10045 
10046 /* A get_unnamed_section callback for switching to comdat data
10047    sections.  This function is only used with SOM.  */
10048 
10049 static void
som_output_comdat_data_section_asm_op(const void * data)10050 som_output_comdat_data_section_asm_op (const void *data)
10051 {
10052   in_section = NULL;
10053   output_section_asm_op (data);
10054 }
10055 
10056 /* Implement TARGET_ASM_INIT_SECTIONS.  */
10057 
10058 static void
pa_som_asm_init_sections(void)10059 pa_som_asm_init_sections (void)
10060 {
10061   text_section
10062     = get_unnamed_section (0, som_output_text_section_asm_op, NULL);
10063 
10064   /* SOM puts readonly data in the default $LIT$ subspace when PIC code
10065      is not being generated.  */
10066   som_readonly_data_section
10067     = get_unnamed_section (0, output_section_asm_op,
10068 			   "\t.SPACE $TEXT$\n\t.SUBSPA $LIT$");
10069 
10070   /* When secondary definitions are not supported, SOM makes readonly
10071      data one-only by creating a new $LIT$ subspace in $TEXT$ with
10072      the comdat flag.  */
10073   som_one_only_readonly_data_section
10074     = get_unnamed_section (0, som_output_comdat_data_section_asm_op,
10075 			   "\t.SPACE $TEXT$\n"
10076 			   "\t.NSUBSPA $LIT$,QUAD=0,ALIGN=8,"
10077 			   "ACCESS=0x2c,SORT=16,COMDAT");
10078 
10079 
10080   /* When secondary definitions are not supported, SOM makes data one-only
10081      by creating a new $DATA$ subspace in $PRIVATE$ with the comdat flag.  */
10082   som_one_only_data_section
10083     = get_unnamed_section (SECTION_WRITE,
10084 			   som_output_comdat_data_section_asm_op,
10085 			   "\t.SPACE $PRIVATE$\n"
10086 			   "\t.NSUBSPA $DATA$,QUAD=1,ALIGN=8,"
10087 			   "ACCESS=31,SORT=24,COMDAT");
10088 
10089   if (flag_tm)
10090     som_tm_clone_table_section
10091       = get_unnamed_section (0, output_section_asm_op,
10092 			     "\t.SPACE $PRIVATE$\n\t.SUBSPA $TM_CLONE_TABLE$");
10093 
10094   /* HPUX ld generates incorrect GOT entries for "T" fixups which
10095      reference data within the $TEXT$ space (for example constant
10096      strings in the $LIT$ subspace).
10097 
10098      The assemblers (GAS and HP as) both have problems with handling
10099      the difference of two symbols.  This is the other correct way to
10100      reference constant data during PIC code generation.
10101 
10102      Thus, we can't put constant data needing relocation in the $TEXT$
10103      space during PIC generation.
10104 
10105      Previously, we placed all constant data into the $DATA$ subspace
10106      when generating PIC code.  This reduces sharing, but it works
10107      correctly.  Now we rely on pa_reloc_rw_mask() for section selection.
10108      This puts constant data not needing relocation into the $TEXT$ space.  */
10109   readonly_data_section = som_readonly_data_section;
10110 
10111   /* We must not have a reference to an external symbol defined in a
10112      shared library in a readonly section, else the SOM linker will
10113      complain.
10114 
10115      So, we force exception information into the data section.  */
10116   exception_section = data_section;
10117 }
10118 
10119 /* Implement TARGET_ASM_TM_CLONE_TABLE_SECTION.  */
10120 
10121 static section *
pa_som_tm_clone_table_section(void)10122 pa_som_tm_clone_table_section (void)
10123 {
10124   return som_tm_clone_table_section;
10125 }
10126 
10127 /* On hpux10, the linker will give an error if we have a reference
10128    in the read-only data section to a symbol defined in a shared
10129    library.  Therefore, expressions that might require a reloc
10130    cannot be placed in the read-only data section.  */
10131 
10132 static section *
pa_select_section(tree exp,int reloc,unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)10133 pa_select_section (tree exp, int reloc,
10134 		   unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
10135 {
10136   if (TREE_CODE (exp) == VAR_DECL
10137       && TREE_READONLY (exp)
10138       && !TREE_THIS_VOLATILE (exp)
10139       && DECL_INITIAL (exp)
10140       && (DECL_INITIAL (exp) == error_mark_node
10141           || TREE_CONSTANT (DECL_INITIAL (exp)))
10142       && !(reloc & pa_reloc_rw_mask ()))
10143     {
10144       if (TARGET_SOM
10145 	  && DECL_ONE_ONLY (exp)
10146 	  && !DECL_WEAK (exp))
10147 	return som_one_only_readonly_data_section;
10148       else
10149 	return readonly_data_section;
10150     }
10151   else if (CONSTANT_CLASS_P (exp)
10152 	   && !(reloc & pa_reloc_rw_mask ()))
10153     return readonly_data_section;
10154   else if (TARGET_SOM
10155 	   && TREE_CODE (exp) == VAR_DECL
10156 	   && DECL_ONE_ONLY (exp)
10157 	   && !DECL_WEAK (exp))
10158     return som_one_only_data_section;
10159   else
10160     return data_section;
10161 }
10162 
10163 /* Implement pa_elf_select_rtx_section.  If X is a function label operand
10164    and the function is in a COMDAT group, place the plabel reference in the
10165    .data.rel.ro.local section.  The linker ignores references to symbols in
10166    discarded sections from this section.  */
10167 
10168 static section *
pa_elf_select_rtx_section(machine_mode mode,rtx x,unsigned HOST_WIDE_INT align)10169 pa_elf_select_rtx_section (machine_mode mode, rtx x,
10170 			   unsigned HOST_WIDE_INT align)
10171 {
10172   if (function_label_operand (x, VOIDmode))
10173     {
10174       tree decl = SYMBOL_REF_DECL (x);
10175 
10176       if (!decl || (DECL_P (decl) && DECL_COMDAT_GROUP (decl)))
10177 	return get_named_section (NULL, ".data.rel.ro.local", 1);
10178     }
10179 
10180   return default_elf_select_rtx_section (mode, x, align);
10181 }
10182 
10183 /* Implement pa_reloc_rw_mask.  */
10184 
10185 static int
pa_reloc_rw_mask(void)10186 pa_reloc_rw_mask (void)
10187 {
10188   if (flag_pic || (TARGET_SOM && !TARGET_HPUX_11))
10189     return 3;
10190 
10191   /* HP linker does not support global relocs in readonly memory.  */
10192   return TARGET_SOM ? 2 : 0;
10193 }
10194 
10195 static void
pa_globalize_label(FILE * stream,const char * name)10196 pa_globalize_label (FILE *stream, const char *name)
10197 {
10198   /* We only handle DATA objects here, functions are globalized in
10199      ASM_DECLARE_FUNCTION_NAME.  */
10200   if (! FUNCTION_NAME_P (name))
10201   {
10202     fputs ("\t.EXPORT ", stream);
10203     assemble_name (stream, name);
10204     fputs (",DATA\n", stream);
10205   }
10206 }
10207 
10208 /* Worker function for TARGET_STRUCT_VALUE_RTX.  */
10209 
10210 static rtx
pa_struct_value_rtx(tree fntype ATTRIBUTE_UNUSED,int incoming ATTRIBUTE_UNUSED)10211 pa_struct_value_rtx (tree fntype ATTRIBUTE_UNUSED,
10212 		     int incoming ATTRIBUTE_UNUSED)
10213 {
10214   return gen_rtx_REG (Pmode, PA_STRUCT_VALUE_REGNUM);
10215 }
10216 
10217 /* Worker function for TARGET_RETURN_IN_MEMORY.  */
10218 
10219 bool
pa_return_in_memory(const_tree type,const_tree fntype ATTRIBUTE_UNUSED)10220 pa_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
10221 {
10222   /* SOM ABI says that objects larger than 64 bits are returned in memory.
10223      PA64 ABI says that objects larger than 128 bits are returned in memory.
10224      Note, int_size_in_bytes can return -1 if the size of the object is
10225      variable or larger than the maximum value that can be expressed as
10226      a HOST_WIDE_INT.   It can also return zero for an empty type.  The
10227      simplest way to handle variable and empty types is to pass them in
10228      memory.  This avoids problems in defining the boundaries of argument
10229      slots, allocating registers, etc.  */
10230   return (int_size_in_bytes (type) > (TARGET_64BIT ? 16 : 8)
10231 	  || int_size_in_bytes (type) <= 0);
10232 }
10233 
10234 /* Structure to hold declaration and name of external symbols that are
10235    emitted by GCC.  We generate a vector of these symbols and output them
10236    at the end of the file if and only if SYMBOL_REF_REFERENCED_P is true.
10237    This avoids putting out names that are never really used.  */
10238 
10239 typedef struct GTY(()) extern_symbol
10240 {
10241   tree decl;
10242   const char *name;
10243 } extern_symbol;
10244 
10245 /* Define gc'd vector type for extern_symbol.  */
10246 
10247 /* Vector of extern_symbol pointers.  */
10248 static GTY(()) vec<extern_symbol, va_gc> *extern_symbols;
10249 
10250 #ifdef ASM_OUTPUT_EXTERNAL_REAL
10251 /* Mark DECL (name NAME) as an external reference (assembler output
10252    file FILE).  This saves the names to output at the end of the file
10253    if actually referenced.  */
10254 
10255 void
pa_hpux_asm_output_external(FILE * file,tree decl,const char * name)10256 pa_hpux_asm_output_external (FILE *file, tree decl, const char *name)
10257 {
10258   gcc_assert (file == asm_out_file);
10259   extern_symbol p = {decl, name};
10260   vec_safe_push (extern_symbols, p);
10261 }
10262 #endif
10263 
10264 /* Output text required at the end of an assembler file.
10265    This includes deferred plabels and .import directives for
10266    all external symbols that were actually referenced.  */
10267 
10268 static void
pa_file_end(void)10269 pa_file_end (void)
10270 {
10271 #ifdef ASM_OUTPUT_EXTERNAL_REAL
10272   unsigned int i;
10273   extern_symbol *p;
10274 
10275   if (!NO_DEFERRED_PROFILE_COUNTERS)
10276     output_deferred_profile_counters ();
10277 #endif
10278 
10279   output_deferred_plabels ();
10280 
10281 #ifdef ASM_OUTPUT_EXTERNAL_REAL
10282   for (i = 0; vec_safe_iterate (extern_symbols, i, &p); i++)
10283     {
10284       tree decl = p->decl;
10285 
10286       if (!TREE_ASM_WRITTEN (decl)
10287 	  && SYMBOL_REF_REFERENCED_P (XEXP (DECL_RTL (decl), 0)))
10288 	ASM_OUTPUT_EXTERNAL_REAL (asm_out_file, decl, p->name);
10289     }
10290 
10291   vec_free (extern_symbols);
10292 #endif
10293 
10294   if (NEED_INDICATE_EXEC_STACK)
10295     file_end_indicate_exec_stack ();
10296 }
10297 
10298 /* Implement TARGET_CAN_CHANGE_MODE_CLASS.  */
10299 
10300 static bool
pa_can_change_mode_class(machine_mode from,machine_mode to,reg_class_t rclass)10301 pa_can_change_mode_class (machine_mode from, machine_mode to,
10302 			  reg_class_t rclass)
10303 {
10304   if (from == to)
10305     return true;
10306 
10307   if (GET_MODE_SIZE (from) == GET_MODE_SIZE (to))
10308     return true;
10309 
10310   /* Reject changes to/from modes with zero size.  */
10311   if (!GET_MODE_SIZE (from) || !GET_MODE_SIZE (to))
10312     return false;
10313 
10314   /* Reject changes to/from complex and vector modes.  */
10315   if (COMPLEX_MODE_P (from) || VECTOR_MODE_P (from)
10316       || COMPLEX_MODE_P (to) || VECTOR_MODE_P (to))
10317     return false;
10318 
10319   /* There is no way to load QImode or HImode values directly from memory
10320      to a FP register.  SImode loads to the FP registers are not zero
10321      extended.  On the 64-bit target, this conflicts with the definition
10322      of LOAD_EXTEND_OP.  Thus, we reject all mode changes in the FP registers
10323      except for DImode to SImode on the 64-bit target.  It is handled by
10324      register renaming in pa_print_operand.  */
10325   if (MAYBE_FP_REG_CLASS_P (rclass))
10326     return TARGET_64BIT && from == DImode && to == SImode;
10327 
10328   /* TARGET_HARD_REGNO_MODE_OK places modes with sizes larger than a word
10329      in specific sets of registers.  Thus, we cannot allow changing
10330      to a larger mode when it's larger than a word.  */
10331   if (GET_MODE_SIZE (to) > UNITS_PER_WORD
10332       && GET_MODE_SIZE (to) > GET_MODE_SIZE (from))
10333     return false;
10334 
10335   return true;
10336 }
10337 
10338 /* Implement TARGET_MODES_TIEABLE_P.
10339 
10340    We should return FALSE for QImode and HImode because these modes
10341    are not ok in the floating-point registers.  However, this prevents
10342    tieing these modes to SImode and DImode in the general registers.
10343    So, this isn't a good idea.  We rely on TARGET_HARD_REGNO_MODE_OK and
10344    TARGET_CAN_CHANGE_MODE_CLASS to prevent these modes from being used
10345    in the floating-point registers.  */
10346 
10347 static bool
pa_modes_tieable_p(machine_mode mode1,machine_mode mode2)10348 pa_modes_tieable_p (machine_mode mode1, machine_mode mode2)
10349 {
10350   /* Don't tie modes in different classes.  */
10351   if (GET_MODE_CLASS (mode1) != GET_MODE_CLASS (mode2))
10352     return false;
10353 
10354   return true;
10355 }
10356 
10357 
10358 /* Length in units of the trampoline instruction code.  */
10359 
10360 #define TRAMPOLINE_CODE_SIZE (TARGET_64BIT ? 24 : (TARGET_PA_20 ? 36 : 48))
10361 
10362 
10363 /* Output assembler code for a block containing the constant parts
10364    of a trampoline, leaving space for the variable parts.\
10365 
10366    The trampoline sets the static chain pointer to STATIC_CHAIN_REGNUM
10367    and then branches to the specified routine.
10368 
10369    This code template is copied from text segment to stack location
10370    and then patched with pa_trampoline_init to contain valid values,
10371    and then entered as a subroutine.
10372 
10373    It is best to keep this as small as possible to avoid having to
10374    flush multiple lines in the cache.  */
10375 
10376 static void
pa_asm_trampoline_template(FILE * f)10377 pa_asm_trampoline_template (FILE *f)
10378 {
10379   if (!TARGET_64BIT)
10380     {
10381       if (TARGET_PA_20)
10382 	{
10383 	  fputs ("\tmfia	%r20\n", f);
10384 	  fputs ("\tldw		48(%r20),%r22\n", f);
10385 	  fputs ("\tcopy	%r22,%r21\n", f);
10386 	  fputs ("\tbb,>=,n	%r22,30,.+16\n", f);
10387 	  fputs ("\tdepwi	0,31,2,%r22\n", f);
10388 	  fputs ("\tldw		0(%r22),%r21\n", f);
10389 	  fputs ("\tldw		4(%r22),%r19\n", f);
10390 	  fputs ("\tbve		(%r21)\n", f);
10391 	  fputs ("\tldw		52(%r1),%r29\n", f);
10392 	  fputs ("\t.word	0\n", f);
10393 	  fputs ("\t.word	0\n", f);
10394 	  fputs ("\t.word	0\n", f);
10395 	}
10396       else
10397 	{
10398 	  if (ASSEMBLER_DIALECT == 0)
10399 	    {
10400 	      fputs ("\tbl	.+8,%r20\n", f);
10401 	      fputs ("\tdepi	0,31,2,%r20\n", f);
10402 	    }
10403 	  else
10404 	    {
10405 	      fputs ("\tb,l	.+8,%r20\n", f);
10406 	      fputs ("\tdepwi	0,31,2,%r20\n", f);
10407 	    }
10408 	  fputs ("\tldw		40(%r20),%r22\n", f);
10409 	  fputs ("\tcopy	%r22,%r21\n", f);
10410 	  fputs ("\tbb,>=,n	%r22,30,.+16\n", f);
10411 	  if (ASSEMBLER_DIALECT == 0)
10412 	    fputs ("\tdepi	0,31,2,%r22\n", f);
10413 	  else
10414 	    fputs ("\tdepwi	0,31,2,%r22\n", f);
10415 	  fputs ("\tldw		0(%r22),%r21\n", f);
10416 	  fputs ("\tldw		4(%r22),%r19\n", f);
10417 	  fputs ("\tldsid	(%r21),%r1\n", f);
10418 	  fputs ("\tmtsp	%r1,%sr0\n", f);
10419 	  fputs ("\tbe		0(%sr0,%r21)\n", f);
10420 	  fputs ("\tldw		44(%r20),%r29\n", f);
10421 	}
10422       fputs ("\t.word	0\n", f);
10423       fputs ("\t.word	0\n", f);
10424       fputs ("\t.word	0\n", f);
10425       fputs ("\t.word	0\n", f);
10426     }
10427   else
10428     {
10429       fputs ("\t.dword 0\n", f);
10430       fputs ("\t.dword 0\n", f);
10431       fputs ("\t.dword 0\n", f);
10432       fputs ("\t.dword 0\n", f);
10433       fputs ("\tmfia	%r31\n", f);
10434       fputs ("\tldd	24(%r31),%r27\n", f);
10435       fputs ("\tldd	32(%r31),%r31\n", f);
10436       fputs ("\tldd	16(%r27),%r1\n", f);
10437       fputs ("\tbve	(%r1)\n", f);
10438       fputs ("\tldd	24(%r27),%r27\n", f);
10439       fputs ("\t.dword 0  ; fptr\n", f);
10440       fputs ("\t.dword 0  ; static link\n", f);
10441     }
10442 }
10443 
10444 /* Emit RTL insns to initialize the variable parts of a trampoline.
10445    FNADDR is an RTX for the address of the function's pure code.
10446    CXT is an RTX for the static chain value for the function.
10447 
10448    Move the function address to the trampoline template at offset 48.
10449    Move the static chain value to trampoline template at offset 52.
10450    Move the trampoline address to trampoline template at offset 56.
10451    Move r19 to trampoline template at offset 60.  The latter two
10452    words create a plabel for the indirect call to the trampoline.
10453 
10454    A similar sequence is used for the 64-bit port but the plabel is
10455    at the beginning of the trampoline.
10456 
10457    Finally, the cache entries for the trampoline code are flushed.
10458    This is necessary to ensure that the trampoline instruction sequence
10459    is written to memory prior to any attempts at prefetching the code
10460    sequence.  */
10461 
10462 static void
pa_trampoline_init(rtx m_tramp,tree fndecl,rtx chain_value)10463 pa_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
10464 {
10465   rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
10466   rtx start_addr = gen_reg_rtx (Pmode);
10467   rtx end_addr = gen_reg_rtx (Pmode);
10468   rtx line_length = gen_reg_rtx (Pmode);
10469   rtx r_tramp, tmp;
10470 
10471   emit_block_move (m_tramp, assemble_trampoline_template (),
10472 		   GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
10473   r_tramp = force_reg (Pmode, XEXP (m_tramp, 0));
10474 
10475   if (!TARGET_64BIT)
10476     {
10477       tmp = adjust_address (m_tramp, Pmode, 48);
10478       emit_move_insn (tmp, fnaddr);
10479       tmp = adjust_address (m_tramp, Pmode, 52);
10480       emit_move_insn (tmp, chain_value);
10481 
10482       /* Create a fat pointer for the trampoline.  */
10483       tmp = adjust_address (m_tramp, Pmode, 56);
10484       emit_move_insn (tmp, r_tramp);
10485       tmp = adjust_address (m_tramp, Pmode, 60);
10486       emit_move_insn (tmp, gen_rtx_REG (Pmode, 19));
10487 
10488       /* fdc and fic only use registers for the address to flush,
10489 	 they do not accept integer displacements.  We align the
10490 	 start and end addresses to the beginning of their respective
10491 	 cache lines to minimize the number of lines flushed.  */
10492       emit_insn (gen_andsi3 (start_addr, r_tramp,
10493 			     GEN_INT (-MIN_CACHELINE_SIZE)));
10494       tmp = force_reg (Pmode, plus_constant (Pmode, r_tramp,
10495 					     TRAMPOLINE_CODE_SIZE-1));
10496       emit_insn (gen_andsi3 (end_addr, tmp,
10497 			     GEN_INT (-MIN_CACHELINE_SIZE)));
10498       emit_move_insn (line_length, GEN_INT (MIN_CACHELINE_SIZE));
10499       emit_insn (gen_dcacheflushsi (start_addr, end_addr, line_length));
10500       emit_insn (gen_icacheflushsi (start_addr, end_addr, line_length,
10501 				    gen_reg_rtx (Pmode),
10502 				    gen_reg_rtx (Pmode)));
10503     }
10504   else
10505     {
10506       tmp = adjust_address (m_tramp, Pmode, 56);
10507       emit_move_insn (tmp, fnaddr);
10508       tmp = adjust_address (m_tramp, Pmode, 64);
10509       emit_move_insn (tmp, chain_value);
10510 
10511       /* Create a fat pointer for the trampoline.  */
10512       tmp = adjust_address (m_tramp, Pmode, 16);
10513       emit_move_insn (tmp, force_reg (Pmode, plus_constant (Pmode,
10514 							    r_tramp, 32)));
10515       tmp = adjust_address (m_tramp, Pmode, 24);
10516       emit_move_insn (tmp, gen_rtx_REG (Pmode, 27));
10517 
10518       /* fdc and fic only use registers for the address to flush,
10519 	 they do not accept integer displacements.  We align the
10520 	 start and end addresses to the beginning of their respective
10521 	 cache lines to minimize the number of lines flushed.  */
10522       tmp = force_reg (Pmode, plus_constant (Pmode, r_tramp, 32));
10523       emit_insn (gen_anddi3 (start_addr, tmp,
10524 			     GEN_INT (-MIN_CACHELINE_SIZE)));
10525       tmp = force_reg (Pmode, plus_constant (Pmode, tmp,
10526 					     TRAMPOLINE_CODE_SIZE - 1));
10527       emit_insn (gen_anddi3 (end_addr, tmp,
10528 			     GEN_INT (-MIN_CACHELINE_SIZE)));
10529       emit_move_insn (line_length, GEN_INT (MIN_CACHELINE_SIZE));
10530       emit_insn (gen_dcacheflushdi (start_addr, end_addr, line_length));
10531       emit_insn (gen_icacheflushdi (start_addr, end_addr, line_length,
10532 				    gen_reg_rtx (Pmode),
10533 				    gen_reg_rtx (Pmode)));
10534     }
10535 
10536 #ifdef HAVE_ENABLE_EXECUTE_STACK
10537   emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
10538 		     LCT_NORMAL, VOIDmode, XEXP (m_tramp, 0), Pmode);
10539 #endif
10540 }
10541 
10542 /* Perform any machine-specific adjustment in the address of the trampoline.
10543    ADDR contains the address that was passed to pa_trampoline_init.
10544    Adjust the trampoline address to point to the plabel at offset 56.  */
10545 
10546 static rtx
pa_trampoline_adjust_address(rtx addr)10547 pa_trampoline_adjust_address (rtx addr)
10548 {
10549   if (!TARGET_64BIT)
10550     addr = memory_address (Pmode, plus_constant (Pmode, addr, 58));
10551   return addr;
10552 }
10553 
10554 static rtx
pa_delegitimize_address(rtx orig_x)10555 pa_delegitimize_address (rtx orig_x)
10556 {
10557   rtx x = delegitimize_mem_from_attrs (orig_x);
10558 
10559   if (GET_CODE (x) == LO_SUM
10560       && GET_CODE (XEXP (x, 1)) == UNSPEC
10561       && XINT (XEXP (x, 1), 1) == UNSPEC_DLTIND14R)
10562     return gen_const_mem (Pmode, XVECEXP (XEXP (x, 1), 0, 0));
10563   return x;
10564 }
10565 
10566 static rtx
pa_internal_arg_pointer(void)10567 pa_internal_arg_pointer (void)
10568 {
10569   /* The argument pointer and the hard frame pointer are the same in
10570      the 32-bit runtime, so we don't need a copy.  */
10571   if (TARGET_64BIT)
10572     return copy_to_reg (virtual_incoming_args_rtx);
10573   else
10574     return virtual_incoming_args_rtx;
10575 }
10576 
10577 /* Given FROM and TO register numbers, say whether this elimination is allowed.
10578    Frame pointer elimination is automatically handled.  */
10579 
10580 static bool
pa_can_eliminate(const int from,const int to)10581 pa_can_eliminate (const int from, const int to)
10582 {
10583   /* The argument cannot be eliminated in the 64-bit runtime.  */
10584   if (TARGET_64BIT && from == ARG_POINTER_REGNUM)
10585     return false;
10586 
10587   return (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
10588           ? ! frame_pointer_needed
10589           : true);
10590 }
10591 
10592 /* Define the offset between two registers, FROM to be eliminated and its
10593    replacement TO, at the start of a routine.  */
10594 HOST_WIDE_INT
pa_initial_elimination_offset(int from,int to)10595 pa_initial_elimination_offset (int from, int to)
10596 {
10597   HOST_WIDE_INT offset;
10598 
10599   if ((from == HARD_FRAME_POINTER_REGNUM || from == FRAME_POINTER_REGNUM)
10600       && to == STACK_POINTER_REGNUM)
10601     offset = -pa_compute_frame_size (get_frame_size (), 0);
10602   else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
10603     offset = 0;
10604   else
10605     gcc_unreachable ();
10606 
10607   return offset;
10608 }
10609 
10610 static void
pa_conditional_register_usage(void)10611 pa_conditional_register_usage (void)
10612 {
10613   int i;
10614 
10615   if (!TARGET_64BIT && !TARGET_PA_11)
10616     {
10617       for (i = 56; i <= FP_REG_LAST; i++)
10618 	fixed_regs[i] = call_used_regs[i] = 1;
10619       for (i = 33; i < 56; i += 2)
10620 	fixed_regs[i] = call_used_regs[i] = 1;
10621     }
10622   if (TARGET_DISABLE_FPREGS || TARGET_SOFT_FLOAT)
10623     {
10624       for (i = FP_REG_FIRST; i <= FP_REG_LAST; i++)
10625 	fixed_regs[i] = call_used_regs[i] = 1;
10626     }
10627   if (flag_pic)
10628     fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
10629 }
10630 
10631 /* Target hook for c_mode_for_suffix.  */
10632 
10633 static machine_mode
pa_c_mode_for_suffix(char suffix)10634 pa_c_mode_for_suffix (char suffix)
10635 {
10636   if (HPUX_LONG_DOUBLE_LIBRARY)
10637     {
10638       if (suffix == 'q')
10639 	return TFmode;
10640     }
10641 
10642   return VOIDmode;
10643 }
10644 
10645 /* Target hook for function_section.  */
10646 
10647 static section *
pa_function_section(tree decl,enum node_frequency freq,bool startup,bool exit)10648 pa_function_section (tree decl, enum node_frequency freq,
10649 		     bool startup, bool exit)
10650 {
10651   /* Put functions in text section if target doesn't have named sections.  */
10652   if (!targetm_common.have_named_sections)
10653     return text_section;
10654 
10655   /* Force nested functions into the same section as the containing
10656      function.  */
10657   if (decl
10658       && DECL_SECTION_NAME (decl) == NULL
10659       && DECL_CONTEXT (decl) != NULL_TREE
10660       && TREE_CODE (DECL_CONTEXT (decl)) == FUNCTION_DECL
10661       && DECL_SECTION_NAME (DECL_CONTEXT (decl)) == NULL)
10662     return function_section (DECL_CONTEXT (decl));
10663 
10664   /* Otherwise, use the default function section.  */
10665   return default_function_section (decl, freq, startup, exit);
10666 }
10667 
10668 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
10669 
10670    In 64-bit mode, we reject CONST_DOUBLES.  We also reject CONST_INTS
10671    that need more than three instructions to load prior to reload.  This
10672    limit is somewhat arbitrary.  It takes three instructions to load a
10673    CONST_INT from memory but two are memory accesses.  It may be better
10674    to increase the allowed range for CONST_INTS.  We may also be able
10675    to handle CONST_DOUBLES.  */
10676 
10677 static bool
pa_legitimate_constant_p(machine_mode mode,rtx x)10678 pa_legitimate_constant_p (machine_mode mode, rtx x)
10679 {
10680   if (GET_MODE_CLASS (mode) == MODE_FLOAT && x != CONST0_RTX (mode))
10681     return false;
10682 
10683   if (!NEW_HP_ASSEMBLER && !TARGET_GAS && GET_CODE (x) == LABEL_REF)
10684     return false;
10685 
10686   /* TLS_MODEL_GLOBAL_DYNAMIC and TLS_MODEL_LOCAL_DYNAMIC are not
10687      legitimate constants.  The other variants can't be handled by
10688      the move patterns after reload starts.  */
10689   if (tls_referenced_p (x))
10690     return false;
10691 
10692   if (TARGET_64BIT && GET_CODE (x) == CONST_DOUBLE)
10693     return false;
10694 
10695   if (TARGET_64BIT
10696       && HOST_BITS_PER_WIDE_INT > 32
10697       && GET_CODE (x) == CONST_INT
10698       && !reload_in_progress
10699       && !reload_completed
10700       && !LEGITIMATE_64BIT_CONST_INT_P (INTVAL (x))
10701       && !pa_cint_ok_for_move (UINTVAL (x)))
10702     return false;
10703 
10704   if (function_label_operand (x, mode))
10705     return false;
10706 
10707   return true;
10708 }
10709 
10710 /* Implement TARGET_SECTION_TYPE_FLAGS.  */
10711 
10712 static unsigned int
pa_section_type_flags(tree decl,const char * name,int reloc)10713 pa_section_type_flags (tree decl, const char *name, int reloc)
10714 {
10715   unsigned int flags;
10716 
10717   flags = default_section_type_flags (decl, name, reloc);
10718 
10719   /* Function labels are placed in the constant pool.  This can
10720      cause a section conflict if decls are put in ".data.rel.ro"
10721      or ".data.rel.ro.local" using the __attribute__ construct.  */
10722   if (strcmp (name, ".data.rel.ro") == 0
10723       || strcmp (name, ".data.rel.ro.local") == 0)
10724     flags |= SECTION_WRITE | SECTION_RELRO;
10725 
10726   return flags;
10727 }
10728 
10729 /* pa_legitimate_address_p recognizes an RTL expression that is a
10730    valid memory address for an instruction.  The MODE argument is the
10731    machine mode for the MEM expression that wants to use this address.
10732 
10733    On HP PA-RISC, the legitimate address forms are REG+SMALLINT,
10734    REG+REG, and REG+(REG*SCALE).  The indexed address forms are only
10735    available with floating point loads and stores, and integer loads.
10736    We get better code by allowing indexed addresses in the initial
10737    RTL generation.
10738 
10739    The acceptance of indexed addresses as legitimate implies that we
10740    must provide patterns for doing indexed integer stores, or the move
10741    expanders must force the address of an indexed store to a register.
10742    We have adopted the latter approach.
10743 
10744    Another function of pa_legitimate_address_p is to ensure that
10745    the base register is a valid pointer for indexed instructions.
10746    On targets that have non-equivalent space registers, we have to
10747    know at the time of assembler output which register in a REG+REG
10748    pair is the base register.  The REG_POINTER flag is sometimes lost
10749    in reload and the following passes, so it can't be relied on during
10750    code generation.  Thus, we either have to canonicalize the order
10751    of the registers in REG+REG indexed addresses, or treat REG+REG
10752    addresses separately and provide patterns for both permutations.
10753 
10754    The latter approach requires several hundred additional lines of
10755    code in pa.md.  The downside to canonicalizing is that a PLUS
10756    in the wrong order can't combine to form to make a scaled indexed
10757    memory operand.  As we won't need to canonicalize the operands if
10758    the REG_POINTER lossage can be fixed, it seems better canonicalize.
10759 
10760    We initially break out scaled indexed addresses in canonical order
10761    in pa_emit_move_sequence.  LEGITIMIZE_ADDRESS also canonicalizes
10762    scaled indexed addresses during RTL generation.  However, fold_rtx
10763    has its own opinion on how the operands of a PLUS should be ordered.
10764    If one of the operands is equivalent to a constant, it will make
10765    that operand the second operand.  As the base register is likely to
10766    be equivalent to a SYMBOL_REF, we have made it the second operand.
10767 
10768    pa_legitimate_address_p accepts REG+REG as legitimate when the
10769    operands are in the order INDEX+BASE on targets with non-equivalent
10770    space registers, and in any order on targets with equivalent space
10771    registers.  It accepts both MULT+BASE and BASE+MULT for scaled indexing.
10772 
10773    We treat a SYMBOL_REF as legitimate if it is part of the current
10774    function's constant-pool, because such addresses can actually be
10775    output as REG+SMALLINT.  */
10776 
10777 static bool
pa_legitimate_address_p(machine_mode mode,rtx x,bool strict)10778 pa_legitimate_address_p (machine_mode mode, rtx x, bool strict)
10779 {
10780   if ((REG_P (x)
10781        && (strict ? STRICT_REG_OK_FOR_BASE_P (x)
10782 		  : REG_OK_FOR_BASE_P (x)))
10783       || ((GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC
10784 	   || GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC)
10785 	  && REG_P (XEXP (x, 0))
10786 	  && (strict ? STRICT_REG_OK_FOR_BASE_P (XEXP (x, 0))
10787 		     : REG_OK_FOR_BASE_P (XEXP (x, 0)))))
10788     return true;
10789 
10790   if (GET_CODE (x) == PLUS)
10791     {
10792       rtx base, index;
10793 
10794       /* For REG+REG, the base register should be in XEXP (x, 1),
10795 	 so check it first.  */
10796       if (REG_P (XEXP (x, 1))
10797 	  && (strict ? STRICT_REG_OK_FOR_BASE_P (XEXP (x, 1))
10798 		     : REG_OK_FOR_BASE_P (XEXP (x, 1))))
10799 	base = XEXP (x, 1), index = XEXP (x, 0);
10800       else if (REG_P (XEXP (x, 0))
10801 	       && (strict ? STRICT_REG_OK_FOR_BASE_P (XEXP (x, 0))
10802 			  : REG_OK_FOR_BASE_P (XEXP (x, 0))))
10803 	base = XEXP (x, 0), index = XEXP (x, 1);
10804       else
10805 	return false;
10806 
10807       if (GET_CODE (index) == CONST_INT)
10808 	{
10809 	  if (INT_5_BITS (index))
10810 	    return true;
10811 
10812 	  /* When INT14_OK_STRICT is false, a secondary reload is needed
10813 	     to adjust the displacement of SImode and DImode floating point
10814 	     instructions but this may fail when the register also needs
10815 	     reloading.  So, we return false when STRICT is true.  We
10816 	     also reject long displacements for float mode addresses since
10817 	     the majority of accesses will use floating point instructions
10818 	     that don't support 14-bit offsets.  */
10819 	  if (!INT14_OK_STRICT
10820 	      && (strict || !(reload_in_progress || reload_completed))
10821 	      && mode != QImode
10822 	      && mode != HImode)
10823 	    return false;
10824 
10825 	  return base14_operand (index, mode);
10826 	}
10827 
10828       if (!TARGET_DISABLE_INDEXING
10829 	  /* Only accept the "canonical" INDEX+BASE operand order
10830 	     on targets with non-equivalent space registers.  */
10831 	  && (TARGET_NO_SPACE_REGS
10832 	      ? REG_P (index)
10833 	      : (base == XEXP (x, 1) && REG_P (index)
10834 		 && (reload_completed
10835 		     || (reload_in_progress && HARD_REGISTER_P (base))
10836 		     || REG_POINTER (base))
10837 		 && (reload_completed
10838 		     || (reload_in_progress && HARD_REGISTER_P (index))
10839 		     || !REG_POINTER (index))))
10840 	  && MODE_OK_FOR_UNSCALED_INDEXING_P (mode)
10841 	  && (strict ? STRICT_REG_OK_FOR_INDEX_P (index)
10842 		     : REG_OK_FOR_INDEX_P (index))
10843 	  && borx_reg_operand (base, Pmode)
10844 	  && borx_reg_operand (index, Pmode))
10845 	return true;
10846 
10847       if (!TARGET_DISABLE_INDEXING
10848 	  && GET_CODE (index) == MULT
10849 	  /* Only accept base operands with the REG_POINTER flag prior to
10850 	     reload on targets with non-equivalent space registers.  */
10851 	  && (TARGET_NO_SPACE_REGS
10852 	      || (base == XEXP (x, 1)
10853 		  && (reload_completed
10854 		      || (reload_in_progress && HARD_REGISTER_P (base))
10855 		      || REG_POINTER (base))))
10856 	  && REG_P (XEXP (index, 0))
10857 	  && GET_MODE (XEXP (index, 0)) == Pmode
10858 	  && MODE_OK_FOR_SCALED_INDEXING_P (mode)
10859 	  && (strict ? STRICT_REG_OK_FOR_INDEX_P (XEXP (index, 0))
10860 		     : REG_OK_FOR_INDEX_P (XEXP (index, 0)))
10861 	  && GET_CODE (XEXP (index, 1)) == CONST_INT
10862 	  && INTVAL (XEXP (index, 1))
10863 	     == (HOST_WIDE_INT) GET_MODE_SIZE (mode)
10864 	  && borx_reg_operand (base, Pmode))
10865 	return true;
10866 
10867       return false;
10868     }
10869 
10870   if (GET_CODE (x) == LO_SUM)
10871     {
10872       rtx y = XEXP (x, 0);
10873 
10874       if (GET_CODE (y) == SUBREG)
10875 	y = SUBREG_REG (y);
10876 
10877       if (REG_P (y)
10878 	  && (strict ? STRICT_REG_OK_FOR_BASE_P (y)
10879 		     : REG_OK_FOR_BASE_P (y)))
10880 	{
10881 	  /* Needed for -fPIC */
10882 	  if (mode == Pmode
10883 	      && GET_CODE (XEXP (x, 1)) == UNSPEC)
10884 	    return true;
10885 
10886 	  if (!INT14_OK_STRICT
10887 	      && (strict || !(reload_in_progress || reload_completed))
10888 	      && mode != QImode
10889 	      && mode != HImode)
10890 	    return false;
10891 
10892 	  if (CONSTANT_P (XEXP (x, 1)))
10893 	    return true;
10894 	}
10895       return false;
10896     }
10897 
10898   if (GET_CODE (x) == CONST_INT && INT_5_BITS (x))
10899     return true;
10900 
10901   return false;
10902 }
10903 
10904 /* Look for machine dependent ways to make the invalid address AD a
10905    valid address.
10906 
10907    For the PA, transform:
10908 
10909         memory(X + <large int>)
10910 
10911    into:
10912 
10913         if (<large int> & mask) >= 16
10914           Y = (<large int> & ~mask) + mask + 1  Round up.
10915         else
10916           Y = (<large int> & ~mask)             Round down.
10917         Z = X + Y
10918         memory (Z + (<large int> - Y));
10919 
10920    This makes reload inheritance and reload_cse work better since Z
10921    can be reused.
10922 
10923    There may be more opportunities to improve code with this hook.  */
10924 
10925 rtx
pa_legitimize_reload_address(rtx ad,machine_mode mode,int opnum,int type,int ind_levels ATTRIBUTE_UNUSED)10926 pa_legitimize_reload_address (rtx ad, machine_mode mode,
10927 			      int opnum, int type,
10928 			      int ind_levels ATTRIBUTE_UNUSED)
10929 {
10930   long offset, newoffset, mask;
10931   rtx new_rtx, temp = NULL_RTX;
10932 
10933   mask = (GET_MODE_CLASS (mode) == MODE_FLOAT
10934 	  && !INT14_OK_STRICT ? 0x1f : 0x3fff);
10935 
10936   if (optimize && GET_CODE (ad) == PLUS)
10937     temp = simplify_binary_operation (PLUS, Pmode,
10938 				      XEXP (ad, 0), XEXP (ad, 1));
10939 
10940   new_rtx = temp ? temp : ad;
10941 
10942   if (optimize
10943       && GET_CODE (new_rtx) == PLUS
10944       && GET_CODE (XEXP (new_rtx, 0)) == REG
10945       && GET_CODE (XEXP (new_rtx, 1)) == CONST_INT)
10946     {
10947       offset = INTVAL (XEXP ((new_rtx), 1));
10948 
10949       /* Choose rounding direction.  Round up if we are >= halfway.  */
10950       if ((offset & mask) >= ((mask + 1) / 2))
10951 	newoffset = (offset & ~mask) + mask + 1;
10952       else
10953 	newoffset = offset & ~mask;
10954 
10955       /* Ensure that long displacements are aligned.  */
10956       if (mask == 0x3fff
10957 	  && (GET_MODE_CLASS (mode) == MODE_FLOAT
10958 	      || (TARGET_64BIT && (mode) == DImode)))
10959 	newoffset &= ~(GET_MODE_SIZE (mode) - 1);
10960 
10961       if (newoffset != 0 && VAL_14_BITS_P (newoffset))
10962 	{
10963 	  temp = gen_rtx_PLUS (Pmode, XEXP (new_rtx, 0),
10964 			       GEN_INT (newoffset));
10965 	  ad = gen_rtx_PLUS (Pmode, temp, GEN_INT (offset - newoffset));
10966 	  push_reload (XEXP (ad, 0), 0, &XEXP (ad, 0), 0,
10967 		       BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
10968 		       opnum, (enum reload_type) type);
10969 	  return ad;
10970 	}
10971     }
10972 
10973   return NULL_RTX;
10974 }
10975 
10976 /* Output address vector.  */
10977 
10978 void
pa_output_addr_vec(rtx lab,rtx body)10979 pa_output_addr_vec (rtx lab, rtx body)
10980 {
10981   int idx, vlen = XVECLEN (body, 0);
10982 
10983   if (!TARGET_SOM)
10984     fputs ("\t.align 4\n", asm_out_file);
10985   targetm.asm_out.internal_label (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
10986   if (TARGET_GAS)
10987     fputs ("\t.begin_brtab\n", asm_out_file);
10988   for (idx = 0; idx < vlen; idx++)
10989     {
10990       ASM_OUTPUT_ADDR_VEC_ELT
10991 	(asm_out_file, CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 0, idx), 0)));
10992     }
10993   if (TARGET_GAS)
10994     fputs ("\t.end_brtab\n", asm_out_file);
10995 }
10996 
10997 /* Output address difference vector.  */
10998 
10999 void
pa_output_addr_diff_vec(rtx lab,rtx body)11000 pa_output_addr_diff_vec (rtx lab, rtx body)
11001 {
11002   rtx base = XEXP (XEXP (body, 0), 0);
11003   int idx, vlen = XVECLEN (body, 1);
11004 
11005   targetm.asm_out.internal_label (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
11006   if (TARGET_GAS)
11007     fputs ("\t.begin_brtab\n", asm_out_file);
11008   for (idx = 0; idx < vlen; idx++)
11009     {
11010       ASM_OUTPUT_ADDR_DIFF_ELT
11011 	(asm_out_file,
11012 	 body,
11013 	 CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 1, idx), 0)),
11014 	 CODE_LABEL_NUMBER (base));
11015     }
11016   if (TARGET_GAS)
11017     fputs ("\t.end_brtab\n", asm_out_file);
11018 }
11019 
11020 /* This is a helper function for the other atomic operations.  This function
11021    emits a loop that contains SEQ that iterates until a compare-and-swap
11022    operation at the end succeeds.  MEM is the memory to be modified.  SEQ is
11023    a set of instructions that takes a value from OLD_REG as an input and
11024    produces a value in NEW_REG as an output.  Before SEQ, OLD_REG will be
11025    set to the current contents of MEM.  After SEQ, a compare-and-swap will
11026    attempt to update MEM with NEW_REG.  The function returns true when the
11027    loop was generated successfully.  */
11028 
11029 static bool
pa_expand_compare_and_swap_loop(rtx mem,rtx old_reg,rtx new_reg,rtx seq)11030 pa_expand_compare_and_swap_loop (rtx mem, rtx old_reg, rtx new_reg, rtx seq)
11031 {
11032   machine_mode mode = GET_MODE (mem);
11033   rtx_code_label *label;
11034   rtx cmp_reg, success, oldval;
11035 
11036   /* The loop we want to generate looks like
11037 
11038         cmp_reg = mem;
11039       label:
11040         old_reg = cmp_reg;
11041         seq;
11042         (success, cmp_reg) = compare-and-swap(mem, old_reg, new_reg)
11043         if (success)
11044           goto label;
11045 
11046      Note that we only do the plain load from memory once.  Subsequent
11047      iterations use the value loaded by the compare-and-swap pattern.  */
11048 
11049   label = gen_label_rtx ();
11050   cmp_reg = gen_reg_rtx (mode);
11051 
11052   emit_move_insn (cmp_reg, mem);
11053   emit_label (label);
11054   emit_move_insn (old_reg, cmp_reg);
11055   if (seq)
11056     emit_insn (seq);
11057 
11058   success = NULL_RTX;
11059   oldval = cmp_reg;
11060   if (!expand_atomic_compare_and_swap (&success, &oldval, mem, old_reg,
11061                                        new_reg, false, MEMMODEL_SYNC_SEQ_CST,
11062                                        MEMMODEL_RELAXED))
11063     return false;
11064 
11065   if (oldval != cmp_reg)
11066     emit_move_insn (cmp_reg, oldval);
11067 
11068   /* Mark this jump predicted not taken.  */
11069   emit_cmp_and_jump_insns (success, const0_rtx, EQ, const0_rtx,
11070                            GET_MODE (success), 1, label,
11071 			   profile_probability::guessed_never ());
11072   return true;
11073 }
11074 
11075 /* This function tries to implement an atomic exchange operation using a
11076    compare_and_swap loop. VAL is written to *MEM.  The previous contents of
11077    *MEM are returned, using TARGET if possible.  No memory model is required
11078    since a compare_and_swap loop is seq-cst.  */
11079 
11080 rtx
pa_maybe_emit_compare_and_swap_exchange_loop(rtx target,rtx mem,rtx val)11081 pa_maybe_emit_compare_and_swap_exchange_loop (rtx target, rtx mem, rtx val)
11082 {
11083   machine_mode mode = GET_MODE (mem);
11084 
11085   if (can_compare_and_swap_p (mode, true))
11086     {
11087       if (!target || !register_operand (target, mode))
11088         target = gen_reg_rtx (mode);
11089       if (pa_expand_compare_and_swap_loop (mem, target, val, NULL_RTX))
11090         return target;
11091     }
11092 
11093   return NULL_RTX;
11094 }
11095 
11096 /* Implement TARGET_CALLEE_COPIES.  The callee is responsible for copying
11097    arguments passed by hidden reference in the 32-bit HP runtime.  Users
11098    can override this behavior for better compatibility with openmp at the
11099    risk of library incompatibilities.  Arguments are always passed by value
11100    in the 64-bit HP runtime.  */
11101 
11102 static bool
pa_callee_copies(cumulative_args_t,const function_arg_info &)11103 pa_callee_copies (cumulative_args_t, const function_arg_info &)
11104 {
11105   return !TARGET_CALLER_COPIES;
11106 }
11107 
11108 /* Implement TARGET_HARD_REGNO_NREGS.  */
11109 
11110 static unsigned int
pa_hard_regno_nregs(unsigned int regno ATTRIBUTE_UNUSED,machine_mode mode)11111 pa_hard_regno_nregs (unsigned int regno ATTRIBUTE_UNUSED, machine_mode mode)
11112 {
11113   return PA_HARD_REGNO_NREGS (regno, mode);
11114 }
11115 
11116 /* Implement TARGET_HARD_REGNO_MODE_OK.  */
11117 
11118 static bool
pa_hard_regno_mode_ok(unsigned int regno,machine_mode mode)11119 pa_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
11120 {
11121   return PA_HARD_REGNO_MODE_OK (regno, mode);
11122 }
11123 
11124 /* Implement TARGET_STARTING_FRAME_OFFSET.
11125 
11126    On the 32-bit ports, we reserve one slot for the previous frame
11127    pointer and one fill slot.  The fill slot is for compatibility
11128    with HP compiled programs.  On the 64-bit ports, we reserve one
11129    slot for the previous frame pointer.  */
11130 
11131 static HOST_WIDE_INT
pa_starting_frame_offset(void)11132 pa_starting_frame_offset (void)
11133 {
11134   return 8;
11135 }
11136 
11137 /* Figure out the size in words of the function argument.  The size
11138    returned by this function should always be greater than zero because
11139    we pass variable and zero sized objects by reference.  */
11140 
11141 HOST_WIDE_INT
pa_function_arg_size(machine_mode mode,const_tree type)11142 pa_function_arg_size (machine_mode mode, const_tree type)
11143 {
11144   HOST_WIDE_INT size;
11145 
11146   size = mode != BLKmode ? GET_MODE_SIZE (mode) : int_size_in_bytes (type);
11147   return CEIL (size, UNITS_PER_WORD);
11148 }
11149 
11150 #include "gt-pa.h"
11151