1 /* Subroutines for insn-output.c for HPPA.
2    Copyright (C) 1992-2020 Free Software Foundation, Inc.
3    Contributed by Tim Moore (moore@cs.utah.edu), based on sparc.c
4 
5 This file is part of GCC.
6 
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11 
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 GNU General Public License for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3.  If not see
19 <http://www.gnu.org/licenses/>.  */
20 
21 #define IN_TARGET_CODE 1
22 
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "memmodel.h"
27 #include "backend.h"
28 #include "target.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "df.h"
32 #include "tm_p.h"
33 #include "stringpool.h"
34 #include "attribs.h"
35 #include "optabs.h"
36 #include "regs.h"
37 #include "emit-rtl.h"
38 #include "recog.h"
39 #include "diagnostic-core.h"
40 #include "insn-attr.h"
41 #include "alias.h"
42 #include "fold-const.h"
43 #include "stor-layout.h"
44 #include "varasm.h"
45 #include "calls.h"
46 #include "output.h"
47 #include "except.h"
48 #include "explow.h"
49 #include "expr.h"
50 #include "reload.h"
51 #include "common/common-target.h"
52 #include "langhooks.h"
53 #include "cfgrtl.h"
54 #include "opts.h"
55 #include "builtins.h"
56 
57 /* This file should be included last.  */
58 #include "target-def.h"
59 
60 /* Return nonzero if there is a bypass for the output of
61    OUT_INSN and the fp store IN_INSN.  */
62 int
pa_fpstore_bypass_p(rtx_insn * out_insn,rtx_insn * in_insn)63 pa_fpstore_bypass_p (rtx_insn *out_insn, rtx_insn *in_insn)
64 {
65   machine_mode store_mode;
66   machine_mode other_mode;
67   rtx set;
68 
69   if (recog_memoized (in_insn) < 0
70       || (get_attr_type (in_insn) != TYPE_FPSTORE
71 	  && get_attr_type (in_insn) != TYPE_FPSTORE_LOAD)
72       || recog_memoized (out_insn) < 0)
73     return 0;
74 
75   store_mode = GET_MODE (SET_SRC (PATTERN (in_insn)));
76 
77   set = single_set (out_insn);
78   if (!set)
79     return 0;
80 
81   other_mode = GET_MODE (SET_SRC (set));
82 
83   return (GET_MODE_SIZE (store_mode) == GET_MODE_SIZE (other_mode));
84 }
85 
86 
87 #ifndef DO_FRAME_NOTES
88 #ifdef INCOMING_RETURN_ADDR_RTX
89 #define DO_FRAME_NOTES 1
90 #else
91 #define DO_FRAME_NOTES 0
92 #endif
93 #endif
94 
95 static void pa_option_override (void);
96 static void copy_reg_pointer (rtx, rtx);
97 static void fix_range (const char *);
98 static int hppa_register_move_cost (machine_mode mode, reg_class_t,
99 				    reg_class_t);
100 static int hppa_address_cost (rtx, machine_mode mode, addr_space_t, bool);
101 static bool hppa_rtx_costs (rtx, machine_mode, int, int, int *, bool);
102 static inline rtx force_mode (machine_mode, rtx);
103 static void pa_reorg (void);
104 static void pa_combine_instructions (void);
105 static int pa_can_combine_p (rtx_insn *, rtx_insn *, rtx_insn *, int, rtx,
106 			     rtx, rtx);
107 static bool forward_branch_p (rtx_insn *);
108 static void compute_zdepwi_operands (unsigned HOST_WIDE_INT, unsigned *);
109 static void compute_zdepdi_operands (unsigned HOST_WIDE_INT, unsigned *);
110 static int compute_cpymem_length (rtx_insn *);
111 static int compute_clrmem_length (rtx_insn *);
112 static bool pa_assemble_integer (rtx, unsigned int, int);
113 static void remove_useless_addtr_insns (int);
114 static void store_reg (int, HOST_WIDE_INT, int);
115 static void store_reg_modify (int, int, HOST_WIDE_INT);
116 static void load_reg (int, HOST_WIDE_INT, int);
117 static void set_reg_plus_d (int, int, HOST_WIDE_INT, int);
118 static rtx pa_function_value (const_tree, const_tree, bool);
119 static rtx pa_libcall_value (machine_mode, const_rtx);
120 static bool pa_function_value_regno_p (const unsigned int);
121 static void pa_output_function_prologue (FILE *) ATTRIBUTE_UNUSED;
122 static void pa_linux_output_function_prologue (FILE *) ATTRIBUTE_UNUSED;
123 static void update_total_code_bytes (unsigned int);
124 static void pa_output_function_epilogue (FILE *);
125 static int pa_adjust_cost (rtx_insn *, int, rtx_insn *, int, unsigned int);
126 static int pa_issue_rate (void);
127 static int pa_reloc_rw_mask (void);
128 static void pa_som_asm_init_sections (void) ATTRIBUTE_UNUSED;
129 static section *pa_som_tm_clone_table_section (void) ATTRIBUTE_UNUSED;
130 static section *pa_select_section (tree, int, unsigned HOST_WIDE_INT)
131      ATTRIBUTE_UNUSED;
132 static void pa_encode_section_info (tree, rtx, int);
133 static const char *pa_strip_name_encoding (const char *);
134 static bool pa_function_ok_for_sibcall (tree, tree);
135 static void pa_globalize_label (FILE *, const char *)
136      ATTRIBUTE_UNUSED;
137 static void pa_asm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
138 				    HOST_WIDE_INT, tree);
139 #if !defined(USE_COLLECT2)
140 static void pa_asm_out_constructor (rtx, int);
141 static void pa_asm_out_destructor (rtx, int);
142 #endif
143 static void pa_init_builtins (void);
144 static rtx pa_expand_builtin (tree, rtx, rtx, machine_mode mode, int);
145 static rtx hppa_builtin_saveregs (void);
146 static void hppa_va_start (tree, rtx);
147 static tree hppa_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
148 static bool pa_scalar_mode_supported_p (scalar_mode);
149 static bool pa_commutative_p (const_rtx x, int outer_code);
150 static void copy_fp_args (rtx_insn *) ATTRIBUTE_UNUSED;
151 static int length_fp_args (rtx_insn *) ATTRIBUTE_UNUSED;
152 static rtx hppa_legitimize_address (rtx, rtx, machine_mode);
153 static inline void pa_file_start_level (void) ATTRIBUTE_UNUSED;
154 static inline void pa_file_start_space (int) ATTRIBUTE_UNUSED;
155 static inline void pa_file_start_file (int) ATTRIBUTE_UNUSED;
156 static inline void pa_file_start_mcount (const char*) ATTRIBUTE_UNUSED;
157 static void pa_elf_file_start (void) ATTRIBUTE_UNUSED;
158 static void pa_som_file_start (void) ATTRIBUTE_UNUSED;
159 static void pa_linux_file_start (void) ATTRIBUTE_UNUSED;
160 static void pa_hpux64_gas_file_start (void) ATTRIBUTE_UNUSED;
161 static void pa_hpux64_hpas_file_start (void) ATTRIBUTE_UNUSED;
162 static void output_deferred_plabels (void);
163 static void output_deferred_profile_counters (void) ATTRIBUTE_UNUSED;
164 static void pa_file_end (void);
165 static void pa_init_libfuncs (void);
166 static rtx pa_struct_value_rtx (tree, int);
167 static bool pa_pass_by_reference (cumulative_args_t,
168 				  const function_arg_info &);
169 static int pa_arg_partial_bytes (cumulative_args_t, const function_arg_info &);
170 static void pa_function_arg_advance (cumulative_args_t,
171 				     const function_arg_info &);
172 static rtx pa_function_arg (cumulative_args_t, const function_arg_info &);
173 static pad_direction pa_function_arg_padding (machine_mode, const_tree);
174 static unsigned int pa_function_arg_boundary (machine_mode, const_tree);
175 static struct machine_function * pa_init_machine_status (void);
176 static reg_class_t pa_secondary_reload (bool, rtx, reg_class_t,
177 					machine_mode,
178 					secondary_reload_info *);
179 static bool pa_secondary_memory_needed (machine_mode,
180 					reg_class_t, reg_class_t);
181 static void pa_extra_live_on_entry (bitmap);
182 static machine_mode pa_promote_function_mode (const_tree,
183 						   machine_mode, int *,
184 						   const_tree, int);
185 
186 static void pa_asm_trampoline_template (FILE *);
187 static void pa_trampoline_init (rtx, tree, rtx);
188 static rtx pa_trampoline_adjust_address (rtx);
189 static rtx pa_delegitimize_address (rtx);
190 static bool pa_print_operand_punct_valid_p (unsigned char);
191 static rtx pa_internal_arg_pointer (void);
192 static bool pa_can_eliminate (const int, const int);
193 static void pa_conditional_register_usage (void);
194 static machine_mode pa_c_mode_for_suffix (char);
195 static section *pa_function_section (tree, enum node_frequency, bool, bool);
196 static bool pa_cannot_force_const_mem (machine_mode, rtx);
197 static bool pa_legitimate_constant_p (machine_mode, rtx);
198 static unsigned int pa_section_type_flags (tree, const char *, int);
199 static bool pa_legitimate_address_p (machine_mode, rtx, bool);
200 static bool pa_callee_copies (cumulative_args_t, const function_arg_info &);
201 static unsigned int pa_hard_regno_nregs (unsigned int, machine_mode);
202 static bool pa_hard_regno_mode_ok (unsigned int, machine_mode);
203 static bool pa_modes_tieable_p (machine_mode, machine_mode);
204 static bool pa_can_change_mode_class (machine_mode, machine_mode, reg_class_t);
205 static HOST_WIDE_INT pa_starting_frame_offset (void);
206 static section* pa_elf_select_rtx_section(machine_mode, rtx, unsigned HOST_WIDE_INT) ATTRIBUTE_UNUSED;
207 
208 /* The following extra sections are only used for SOM.  */
209 static GTY(()) section *som_readonly_data_section;
210 static GTY(()) section *som_one_only_readonly_data_section;
211 static GTY(()) section *som_one_only_data_section;
212 static GTY(()) section *som_tm_clone_table_section;
213 
214 /* Counts for the number of callee-saved general and floating point
215    registers which were saved by the current function's prologue.  */
216 static int gr_saved, fr_saved;
217 
218 /* Boolean indicating whether the return pointer was saved by the
219    current function's prologue.  */
220 static bool rp_saved;
221 
222 static rtx find_addr_reg (rtx);
223 
224 /* Keep track of the number of bytes we have output in the CODE subspace
225    during this compilation so we'll know when to emit inline long-calls.  */
226 unsigned long total_code_bytes;
227 
228 /* The last address of the previous function plus the number of bytes in
229    associated thunks that have been output.  This is used to determine if
230    a thunk can use an IA-relative branch to reach its target function.  */
231 static unsigned int last_address;
232 
233 /* Variables to handle plabels that we discover are necessary at assembly
234    output time.  They are output after the current function.  */
235 struct GTY(()) deferred_plabel
236 {
237   rtx internal_label;
238   rtx symbol;
239 };
240 static GTY((length ("n_deferred_plabels"))) struct deferred_plabel *
241   deferred_plabels;
242 static size_t n_deferred_plabels = 0;
243 
244 /* Initialize the GCC target structure.  */
245 
246 #undef TARGET_OPTION_OVERRIDE
247 #define TARGET_OPTION_OVERRIDE pa_option_override
248 
249 #undef TARGET_ASM_ALIGNED_HI_OP
250 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
251 #undef TARGET_ASM_ALIGNED_SI_OP
252 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
253 #undef TARGET_ASM_ALIGNED_DI_OP
254 #define TARGET_ASM_ALIGNED_DI_OP "\t.dword\t"
255 #undef TARGET_ASM_UNALIGNED_HI_OP
256 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
257 #undef TARGET_ASM_UNALIGNED_SI_OP
258 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
259 #undef TARGET_ASM_UNALIGNED_DI_OP
260 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
261 #undef TARGET_ASM_INTEGER
262 #define TARGET_ASM_INTEGER pa_assemble_integer
263 
264 #undef TARGET_ASM_FUNCTION_EPILOGUE
265 #define TARGET_ASM_FUNCTION_EPILOGUE pa_output_function_epilogue
266 
267 #undef TARGET_FUNCTION_VALUE
268 #define TARGET_FUNCTION_VALUE pa_function_value
269 #undef TARGET_LIBCALL_VALUE
270 #define TARGET_LIBCALL_VALUE pa_libcall_value
271 #undef TARGET_FUNCTION_VALUE_REGNO_P
272 #define TARGET_FUNCTION_VALUE_REGNO_P pa_function_value_regno_p
273 
274 #undef TARGET_LEGITIMIZE_ADDRESS
275 #define TARGET_LEGITIMIZE_ADDRESS hppa_legitimize_address
276 
277 #undef TARGET_SCHED_ADJUST_COST
278 #define TARGET_SCHED_ADJUST_COST pa_adjust_cost
279 #undef TARGET_SCHED_ISSUE_RATE
280 #define TARGET_SCHED_ISSUE_RATE pa_issue_rate
281 
282 #undef TARGET_ENCODE_SECTION_INFO
283 #define TARGET_ENCODE_SECTION_INFO pa_encode_section_info
284 #undef TARGET_STRIP_NAME_ENCODING
285 #define TARGET_STRIP_NAME_ENCODING pa_strip_name_encoding
286 
287 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
288 #define TARGET_FUNCTION_OK_FOR_SIBCALL pa_function_ok_for_sibcall
289 
290 #undef TARGET_COMMUTATIVE_P
291 #define TARGET_COMMUTATIVE_P pa_commutative_p
292 
293 #undef TARGET_ASM_OUTPUT_MI_THUNK
294 #define TARGET_ASM_OUTPUT_MI_THUNK pa_asm_output_mi_thunk
295 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
296 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
297 
298 #undef TARGET_ASM_FILE_END
299 #define TARGET_ASM_FILE_END pa_file_end
300 
301 #undef TARGET_ASM_RELOC_RW_MASK
302 #define TARGET_ASM_RELOC_RW_MASK pa_reloc_rw_mask
303 
304 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
305 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P pa_print_operand_punct_valid_p
306 
307 #if !defined(USE_COLLECT2)
308 #undef TARGET_ASM_CONSTRUCTOR
309 #define TARGET_ASM_CONSTRUCTOR pa_asm_out_constructor
310 #undef TARGET_ASM_DESTRUCTOR
311 #define TARGET_ASM_DESTRUCTOR pa_asm_out_destructor
312 #endif
313 
314 #undef TARGET_INIT_BUILTINS
315 #define TARGET_INIT_BUILTINS pa_init_builtins
316 
317 #undef TARGET_EXPAND_BUILTIN
318 #define TARGET_EXPAND_BUILTIN pa_expand_builtin
319 
320 #undef TARGET_REGISTER_MOVE_COST
321 #define TARGET_REGISTER_MOVE_COST hppa_register_move_cost
322 #undef TARGET_RTX_COSTS
323 #define TARGET_RTX_COSTS hppa_rtx_costs
324 #undef TARGET_ADDRESS_COST
325 #define TARGET_ADDRESS_COST hppa_address_cost
326 
327 #undef TARGET_MACHINE_DEPENDENT_REORG
328 #define TARGET_MACHINE_DEPENDENT_REORG pa_reorg
329 
330 #undef TARGET_INIT_LIBFUNCS
331 #define TARGET_INIT_LIBFUNCS pa_init_libfuncs
332 
333 #undef TARGET_PROMOTE_FUNCTION_MODE
334 #define TARGET_PROMOTE_FUNCTION_MODE pa_promote_function_mode
335 #undef TARGET_PROMOTE_PROTOTYPES
336 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
337 
338 #undef TARGET_STRUCT_VALUE_RTX
339 #define TARGET_STRUCT_VALUE_RTX pa_struct_value_rtx
340 #undef TARGET_RETURN_IN_MEMORY
341 #define TARGET_RETURN_IN_MEMORY pa_return_in_memory
342 #undef TARGET_MUST_PASS_IN_STACK
343 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
344 #undef TARGET_PASS_BY_REFERENCE
345 #define TARGET_PASS_BY_REFERENCE pa_pass_by_reference
346 #undef TARGET_CALLEE_COPIES
347 #define TARGET_CALLEE_COPIES pa_callee_copies
348 #undef TARGET_ARG_PARTIAL_BYTES
349 #define TARGET_ARG_PARTIAL_BYTES pa_arg_partial_bytes
350 #undef TARGET_FUNCTION_ARG
351 #define TARGET_FUNCTION_ARG pa_function_arg
352 #undef TARGET_FUNCTION_ARG_ADVANCE
353 #define TARGET_FUNCTION_ARG_ADVANCE pa_function_arg_advance
354 #undef TARGET_FUNCTION_ARG_PADDING
355 #define TARGET_FUNCTION_ARG_PADDING pa_function_arg_padding
356 #undef TARGET_FUNCTION_ARG_BOUNDARY
357 #define TARGET_FUNCTION_ARG_BOUNDARY pa_function_arg_boundary
358 
359 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
360 #define TARGET_EXPAND_BUILTIN_SAVEREGS hppa_builtin_saveregs
361 #undef TARGET_EXPAND_BUILTIN_VA_START
362 #define TARGET_EXPAND_BUILTIN_VA_START hppa_va_start
363 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
364 #define TARGET_GIMPLIFY_VA_ARG_EXPR hppa_gimplify_va_arg_expr
365 
366 #undef TARGET_SCALAR_MODE_SUPPORTED_P
367 #define TARGET_SCALAR_MODE_SUPPORTED_P pa_scalar_mode_supported_p
368 
369 #undef TARGET_CANNOT_FORCE_CONST_MEM
370 #define TARGET_CANNOT_FORCE_CONST_MEM pa_cannot_force_const_mem
371 
372 #undef TARGET_SECONDARY_RELOAD
373 #define TARGET_SECONDARY_RELOAD pa_secondary_reload
374 #undef TARGET_SECONDARY_MEMORY_NEEDED
375 #define TARGET_SECONDARY_MEMORY_NEEDED pa_secondary_memory_needed
376 
377 #undef TARGET_EXTRA_LIVE_ON_ENTRY
378 #define TARGET_EXTRA_LIVE_ON_ENTRY pa_extra_live_on_entry
379 
380 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
381 #define TARGET_ASM_TRAMPOLINE_TEMPLATE pa_asm_trampoline_template
382 #undef TARGET_TRAMPOLINE_INIT
383 #define TARGET_TRAMPOLINE_INIT pa_trampoline_init
384 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
385 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS pa_trampoline_adjust_address
386 #undef TARGET_DELEGITIMIZE_ADDRESS
387 #define TARGET_DELEGITIMIZE_ADDRESS pa_delegitimize_address
388 #undef TARGET_INTERNAL_ARG_POINTER
389 #define TARGET_INTERNAL_ARG_POINTER pa_internal_arg_pointer
390 #undef TARGET_CAN_ELIMINATE
391 #define TARGET_CAN_ELIMINATE pa_can_eliminate
392 #undef TARGET_CONDITIONAL_REGISTER_USAGE
393 #define TARGET_CONDITIONAL_REGISTER_USAGE pa_conditional_register_usage
394 #undef TARGET_C_MODE_FOR_SUFFIX
395 #define TARGET_C_MODE_FOR_SUFFIX pa_c_mode_for_suffix
396 #undef TARGET_ASM_FUNCTION_SECTION
397 #define TARGET_ASM_FUNCTION_SECTION pa_function_section
398 
399 #undef TARGET_LEGITIMATE_CONSTANT_P
400 #define TARGET_LEGITIMATE_CONSTANT_P pa_legitimate_constant_p
401 #undef TARGET_SECTION_TYPE_FLAGS
402 #define TARGET_SECTION_TYPE_FLAGS pa_section_type_flags
403 #undef TARGET_LEGITIMATE_ADDRESS_P
404 #define TARGET_LEGITIMATE_ADDRESS_P pa_legitimate_address_p
405 
406 #undef TARGET_LRA_P
407 #define TARGET_LRA_P hook_bool_void_false
408 
409 #undef TARGET_HARD_REGNO_NREGS
410 #define TARGET_HARD_REGNO_NREGS pa_hard_regno_nregs
411 #undef TARGET_HARD_REGNO_MODE_OK
412 #define TARGET_HARD_REGNO_MODE_OK pa_hard_regno_mode_ok
413 #undef TARGET_MODES_TIEABLE_P
414 #define TARGET_MODES_TIEABLE_P pa_modes_tieable_p
415 
416 #undef TARGET_CAN_CHANGE_MODE_CLASS
417 #define TARGET_CAN_CHANGE_MODE_CLASS pa_can_change_mode_class
418 
419 #undef TARGET_CONSTANT_ALIGNMENT
420 #define TARGET_CONSTANT_ALIGNMENT constant_alignment_word_strings
421 
422 #undef TARGET_STARTING_FRAME_OFFSET
423 #define TARGET_STARTING_FRAME_OFFSET pa_starting_frame_offset
424 
425 #undef TARGET_HAVE_SPECULATION_SAFE_VALUE
426 #define TARGET_HAVE_SPECULATION_SAFE_VALUE speculation_safe_value_not_needed
427 
428 struct gcc_target targetm = TARGET_INITIALIZER;
429 
430 /* Parse the -mfixed-range= option string.  */
431 
432 static void
fix_range(const char * const_str)433 fix_range (const char *const_str)
434 {
435   int i, first, last;
436   char *str, *dash, *comma;
437 
438   /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
439      REG2 are either register names or register numbers.  The effect
440      of this option is to mark the registers in the range from REG1 to
441      REG2 as ``fixed'' so they won't be used by the compiler.  This is
442      used, e.g., to ensure that kernel mode code doesn't use fr4-fr31.  */
443 
444   i = strlen (const_str);
445   str = (char *) alloca (i + 1);
446   memcpy (str, const_str, i + 1);
447 
448   while (1)
449     {
450       dash = strchr (str, '-');
451       if (!dash)
452 	{
453 	  warning (0, "value of %<-mfixed-range%> must have form REG1-REG2");
454 	  return;
455 	}
456       *dash = '\0';
457 
458       comma = strchr (dash + 1, ',');
459       if (comma)
460 	*comma = '\0';
461 
462       first = decode_reg_name (str);
463       if (first < 0)
464 	{
465 	  warning (0, "unknown register name: %s", str);
466 	  return;
467 	}
468 
469       last = decode_reg_name (dash + 1);
470       if (last < 0)
471 	{
472 	  warning (0, "unknown register name: %s", dash + 1);
473 	  return;
474 	}
475 
476       *dash = '-';
477 
478       if (first > last)
479 	{
480 	  warning (0, "%s-%s is an empty range", str, dash + 1);
481 	  return;
482 	}
483 
484       for (i = first; i <= last; ++i)
485 	fixed_regs[i] = call_used_regs[i] = 1;
486 
487       if (!comma)
488 	break;
489 
490       *comma = ',';
491       str = comma + 1;
492     }
493 
494   /* Check if all floating point registers have been fixed.  */
495   for (i = FP_REG_FIRST; i <= FP_REG_LAST; i++)
496     if (!fixed_regs[i])
497       break;
498 
499   if (i > FP_REG_LAST)
500     target_flags |= MASK_DISABLE_FPREGS;
501 }
502 
503 /* Implement the TARGET_OPTION_OVERRIDE hook.  */
504 
505 static void
pa_option_override(void)506 pa_option_override (void)
507 {
508   unsigned int i;
509   cl_deferred_option *opt;
510   vec<cl_deferred_option> *v
511     = (vec<cl_deferred_option> *) pa_deferred_options;
512 
513   if (v)
514     FOR_EACH_VEC_ELT (*v, i, opt)
515       {
516 	switch (opt->opt_index)
517 	  {
518 	  case OPT_mfixed_range_:
519 	    fix_range (opt->arg);
520 	    break;
521 
522 	  default:
523 	    gcc_unreachable ();
524 	  }
525       }
526 
527   if (flag_pic && TARGET_PORTABLE_RUNTIME)
528     {
529       warning (0, "PIC code generation is not supported in the portable runtime model");
530     }
531 
532   if (flag_pic && TARGET_FAST_INDIRECT_CALLS)
533    {
534       warning (0, "PIC code generation is not compatible with fast indirect calls");
535    }
536 
537   if (! TARGET_GAS && write_symbols != NO_DEBUG)
538     {
539       warning (0, "%<-g%> is only supported when using GAS on this processor");
540       warning (0, "%<-g%> option disabled");
541       write_symbols = NO_DEBUG;
542     }
543 
544   /* We only support the "big PIC" model now.  And we always generate PIC
545      code when in 64bit mode.  */
546   if (flag_pic == 1 || TARGET_64BIT)
547     flag_pic = 2;
548 
549   /* Disable -freorder-blocks-and-partition as we don't support hot and
550      cold partitioning.  */
551   if (flag_reorder_blocks_and_partition)
552     {
553       inform (input_location,
554 	      "%<-freorder-blocks-and-partition%> does not work "
555 	      "on this architecture");
556       flag_reorder_blocks_and_partition = 0;
557       flag_reorder_blocks = 1;
558     }
559 
560   /* We can't guarantee that .dword is available for 32-bit targets.  */
561   if (UNITS_PER_WORD == 4)
562     targetm.asm_out.aligned_op.di = NULL;
563 
564   /* The unaligned ops are only available when using GAS.  */
565   if (!TARGET_GAS)
566     {
567       targetm.asm_out.unaligned_op.hi = NULL;
568       targetm.asm_out.unaligned_op.si = NULL;
569       targetm.asm_out.unaligned_op.di = NULL;
570     }
571 
572   init_machine_status = pa_init_machine_status;
573 }
574 
575 enum pa_builtins
576 {
577   PA_BUILTIN_COPYSIGNQ,
578   PA_BUILTIN_FABSQ,
579   PA_BUILTIN_INFQ,
580   PA_BUILTIN_HUGE_VALQ,
581   PA_BUILTIN_max
582 };
583 
584 static GTY(()) tree pa_builtins[(int) PA_BUILTIN_max];
585 
586 static void
pa_init_builtins(void)587 pa_init_builtins (void)
588 {
589 #ifdef DONT_HAVE_FPUTC_UNLOCKED
590   {
591     tree decl = builtin_decl_explicit (BUILT_IN_PUTC_UNLOCKED);
592     set_builtin_decl (BUILT_IN_FPUTC_UNLOCKED, decl,
593 		      builtin_decl_implicit_p (BUILT_IN_PUTC_UNLOCKED));
594   }
595 #endif
596 #if TARGET_HPUX_11
597   {
598     tree decl;
599 
600     if ((decl = builtin_decl_explicit (BUILT_IN_FINITE)) != NULL_TREE)
601       set_user_assembler_name (decl, "_Isfinite");
602     if ((decl = builtin_decl_explicit (BUILT_IN_FINITEF)) != NULL_TREE)
603       set_user_assembler_name (decl, "_Isfinitef");
604   }
605 #endif
606 
607   if (HPUX_LONG_DOUBLE_LIBRARY)
608     {
609       tree decl, ftype;
610 
611       /* Under HPUX, the __float128 type is a synonym for "long double".  */
612       (*lang_hooks.types.register_builtin_type) (long_double_type_node,
613 						 "__float128");
614 
615       /* TFmode support builtins.  */
616       ftype = build_function_type_list (long_double_type_node,
617 					long_double_type_node,
618 					NULL_TREE);
619       decl = add_builtin_function ("__builtin_fabsq", ftype,
620 				   PA_BUILTIN_FABSQ, BUILT_IN_MD,
621 				   "_U_Qfabs", NULL_TREE);
622       TREE_READONLY (decl) = 1;
623       pa_builtins[PA_BUILTIN_FABSQ] = decl;
624 
625       ftype = build_function_type_list (long_double_type_node,
626 					long_double_type_node,
627 					long_double_type_node,
628 					NULL_TREE);
629       decl = add_builtin_function ("__builtin_copysignq", ftype,
630 				   PA_BUILTIN_COPYSIGNQ, BUILT_IN_MD,
631 				   "_U_Qfcopysign", NULL_TREE);
632       TREE_READONLY (decl) = 1;
633       pa_builtins[PA_BUILTIN_COPYSIGNQ] = decl;
634 
635       ftype = build_function_type_list (long_double_type_node, NULL_TREE);
636       decl = add_builtin_function ("__builtin_infq", ftype,
637 				   PA_BUILTIN_INFQ, BUILT_IN_MD,
638 				   NULL, NULL_TREE);
639       pa_builtins[PA_BUILTIN_INFQ] = decl;
640 
641       decl = add_builtin_function ("__builtin_huge_valq", ftype,
642                                    PA_BUILTIN_HUGE_VALQ, BUILT_IN_MD,
643                                    NULL, NULL_TREE);
644       pa_builtins[PA_BUILTIN_HUGE_VALQ] = decl;
645     }
646 }
647 
648 static rtx
pa_expand_builtin(tree exp,rtx target,rtx subtarget ATTRIBUTE_UNUSED,machine_mode mode ATTRIBUTE_UNUSED,int ignore ATTRIBUTE_UNUSED)649 pa_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
650 		   machine_mode mode ATTRIBUTE_UNUSED,
651 		   int ignore ATTRIBUTE_UNUSED)
652 {
653   tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
654   unsigned int fcode = DECL_MD_FUNCTION_CODE (fndecl);
655 
656   switch (fcode)
657     {
658     case PA_BUILTIN_FABSQ:
659     case PA_BUILTIN_COPYSIGNQ:
660       return expand_call (exp, target, ignore);
661 
662     case PA_BUILTIN_INFQ:
663     case PA_BUILTIN_HUGE_VALQ:
664       {
665 	machine_mode target_mode = TYPE_MODE (TREE_TYPE (exp));
666 	REAL_VALUE_TYPE inf;
667 	rtx tmp;
668 
669 	real_inf (&inf);
670 	tmp = const_double_from_real_value (inf, target_mode);
671 
672 	tmp = validize_mem (force_const_mem (target_mode, tmp));
673 
674 	if (target == 0)
675 	  target = gen_reg_rtx (target_mode);
676 
677 	emit_move_insn (target, tmp);
678 	return target;
679       }
680 
681     default:
682       gcc_unreachable ();
683     }
684 
685   return NULL_RTX;
686 }
687 
688 /* Function to init struct machine_function.
689    This will be called, via a pointer variable,
690    from push_function_context.  */
691 
692 static struct machine_function *
pa_init_machine_status(void)693 pa_init_machine_status (void)
694 {
695   return ggc_cleared_alloc<machine_function> ();
696 }
697 
698 /* If FROM is a probable pointer register, mark TO as a probable
699    pointer register with the same pointer alignment as FROM.  */
700 
701 static void
copy_reg_pointer(rtx to,rtx from)702 copy_reg_pointer (rtx to, rtx from)
703 {
704   if (REG_POINTER (from))
705     mark_reg_pointer (to, REGNO_POINTER_ALIGN (REGNO (from)));
706 }
707 
708 /* Return 1 if X contains a symbolic expression.  We know these
709    expressions will have one of a few well defined forms, so
710    we need only check those forms.  */
711 int
pa_symbolic_expression_p(rtx x)712 pa_symbolic_expression_p (rtx x)
713 {
714 
715   /* Strip off any HIGH.  */
716   if (GET_CODE (x) == HIGH)
717     x = XEXP (x, 0);
718 
719   return symbolic_operand (x, VOIDmode);
720 }
721 
722 /* Accept any constant that can be moved in one instruction into a
723    general register.  */
724 int
pa_cint_ok_for_move(unsigned HOST_WIDE_INT ival)725 pa_cint_ok_for_move (unsigned HOST_WIDE_INT ival)
726 {
727   /* OK if ldo, ldil, or zdepi, can be used.  */
728   return (VAL_14_BITS_P (ival)
729 	  || pa_ldil_cint_p (ival)
730 	  || pa_zdepi_cint_p (ival));
731 }
732 
733 /* True iff ldil can be used to load this CONST_INT.  The least
734    significant 11 bits of the value must be zero and the value must
735    not change sign when extended from 32 to 64 bits.  */
736 int
pa_ldil_cint_p(unsigned HOST_WIDE_INT ival)737 pa_ldil_cint_p (unsigned HOST_WIDE_INT ival)
738 {
739   unsigned HOST_WIDE_INT x;
740 
741   x = ival & (((unsigned HOST_WIDE_INT) -1 << 31) | 0x7ff);
742   return x == 0 || x == ((unsigned HOST_WIDE_INT) -1 << 31);
743 }
744 
745 /* True iff zdepi can be used to generate this CONST_INT.
746    zdepi first sign extends a 5-bit signed number to a given field
747    length, then places this field anywhere in a zero.  */
748 int
pa_zdepi_cint_p(unsigned HOST_WIDE_INT x)749 pa_zdepi_cint_p (unsigned HOST_WIDE_INT x)
750 {
751   unsigned HOST_WIDE_INT lsb_mask, t;
752 
753   /* This might not be obvious, but it's at least fast.
754      This function is critical; we don't have the time loops would take.  */
755   lsb_mask = x & -x;
756   t = ((x >> 4) + lsb_mask) & ~(lsb_mask - 1);
757   /* Return true iff t is a power of two.  */
758   return ((t & (t - 1)) == 0);
759 }
760 
761 /* True iff depi or extru can be used to compute (reg & mask).
762    Accept bit pattern like these:
763    0....01....1
764    1....10....0
765    1..10..01..1  */
766 int
pa_and_mask_p(unsigned HOST_WIDE_INT mask)767 pa_and_mask_p (unsigned HOST_WIDE_INT mask)
768 {
769   mask = ~mask;
770   mask += mask & -mask;
771   return (mask & (mask - 1)) == 0;
772 }
773 
774 /* True iff depi can be used to compute (reg | MASK).  */
775 int
pa_ior_mask_p(unsigned HOST_WIDE_INT mask)776 pa_ior_mask_p (unsigned HOST_WIDE_INT mask)
777 {
778   mask += mask & -mask;
779   return (mask & (mask - 1)) == 0;
780 }
781 
782 /* Legitimize PIC addresses.  If the address is already
783    position-independent, we return ORIG.  Newly generated
784    position-independent addresses go to REG.  If we need more
785    than one register, we lose.  */
786 
787 static rtx
legitimize_pic_address(rtx orig,machine_mode mode,rtx reg)788 legitimize_pic_address (rtx orig, machine_mode mode, rtx reg)
789 {
790   rtx pic_ref = orig;
791 
792   gcc_assert (!PA_SYMBOL_REF_TLS_P (orig));
793 
794   /* Labels need special handling.  */
795   if (pic_label_operand (orig, mode))
796     {
797       rtx_insn *insn;
798 
799       /* We do not want to go through the movXX expanders here since that
800 	 would create recursion.
801 
802 	 Nor do we really want to call a generator for a named pattern
803 	 since that requires multiple patterns if we want to support
804 	 multiple word sizes.
805 
806 	 So instead we just emit the raw set, which avoids the movXX
807 	 expanders completely.  */
808       mark_reg_pointer (reg, BITS_PER_UNIT);
809       insn = emit_insn (gen_rtx_SET (reg, orig));
810 
811       /* Put a REG_EQUAL note on this insn, so that it can be optimized.  */
812       add_reg_note (insn, REG_EQUAL, orig);
813 
814       /* During and after reload, we need to generate a REG_LABEL_OPERAND note
815 	 and update LABEL_NUSES because this is not done automatically.  */
816       if (reload_in_progress || reload_completed)
817 	{
818 	  /* Extract LABEL_REF.  */
819 	  if (GET_CODE (orig) == CONST)
820 	    orig = XEXP (XEXP (orig, 0), 0);
821 	  /* Extract CODE_LABEL.  */
822 	  orig = XEXP (orig, 0);
823 	  add_reg_note (insn, REG_LABEL_OPERAND, orig);
824 	  /* Make sure we have label and not a note.  */
825 	  if (LABEL_P (orig))
826 	    LABEL_NUSES (orig)++;
827 	}
828       crtl->uses_pic_offset_table = 1;
829       return reg;
830     }
831   if (GET_CODE (orig) == SYMBOL_REF)
832     {
833       rtx_insn *insn;
834       rtx tmp_reg;
835 
836       gcc_assert (reg);
837 
838       /* Before reload, allocate a temporary register for the intermediate
839 	 result.  This allows the sequence to be deleted when the final
840 	 result is unused and the insns are trivially dead.  */
841       tmp_reg = ((reload_in_progress || reload_completed)
842 		 ? reg : gen_reg_rtx (Pmode));
843 
844       if (function_label_operand (orig, VOIDmode))
845 	{
846 	  /* Force function label into memory in word mode.  */
847 	  orig = XEXP (force_const_mem (word_mode, orig), 0);
848 	  /* Load plabel address from DLT.  */
849 	  emit_move_insn (tmp_reg,
850 			  gen_rtx_PLUS (word_mode, pic_offset_table_rtx,
851 					gen_rtx_HIGH (word_mode, orig)));
852 	  pic_ref
853 	    = gen_const_mem (Pmode,
854 			     gen_rtx_LO_SUM (Pmode, tmp_reg,
855 					     gen_rtx_UNSPEC (Pmode,
856 						         gen_rtvec (1, orig),
857 						         UNSPEC_DLTIND14R)));
858 	  emit_move_insn (reg, pic_ref);
859 	  /* Now load address of function descriptor.  */
860 	  pic_ref = gen_rtx_MEM (Pmode, reg);
861 	}
862       else
863 	{
864 	  /* Load symbol reference from DLT.  */
865 	  emit_move_insn (tmp_reg,
866 			  gen_rtx_PLUS (word_mode, pic_offset_table_rtx,
867 					gen_rtx_HIGH (word_mode, orig)));
868 	  pic_ref
869 	    = gen_const_mem (Pmode,
870 			     gen_rtx_LO_SUM (Pmode, tmp_reg,
871 					     gen_rtx_UNSPEC (Pmode,
872 						         gen_rtvec (1, orig),
873 						         UNSPEC_DLTIND14R)));
874 	}
875 
876       crtl->uses_pic_offset_table = 1;
877       mark_reg_pointer (reg, BITS_PER_UNIT);
878       insn = emit_move_insn (reg, pic_ref);
879 
880       /* Put a REG_EQUAL note on this insn, so that it can be optimized.  */
881       set_unique_reg_note (insn, REG_EQUAL, orig);
882 
883       return reg;
884     }
885   else if (GET_CODE (orig) == CONST)
886     {
887       rtx base;
888 
889       if (GET_CODE (XEXP (orig, 0)) == PLUS
890 	  && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
891 	return orig;
892 
893       gcc_assert (reg);
894       gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
895 
896       base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
897       orig = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
898 				     base == reg ? 0 : reg);
899 
900       if (GET_CODE (orig) == CONST_INT)
901 	{
902 	  if (INT_14_BITS (orig))
903 	    return plus_constant (Pmode, base, INTVAL (orig));
904 	  orig = force_reg (Pmode, orig);
905 	}
906       pic_ref = gen_rtx_PLUS (Pmode, base, orig);
907       /* Likewise, should we set special REG_NOTEs here?  */
908     }
909 
910   return pic_ref;
911 }
912 
913 static GTY(()) rtx gen_tls_tga;
914 
915 static rtx
gen_tls_get_addr(void)916 gen_tls_get_addr (void)
917 {
918   if (!gen_tls_tga)
919     gen_tls_tga = init_one_libfunc ("__tls_get_addr");
920   return gen_tls_tga;
921 }
922 
923 static rtx
hppa_tls_call(rtx arg)924 hppa_tls_call (rtx arg)
925 {
926   rtx ret;
927 
928   ret = gen_reg_rtx (Pmode);
929   emit_library_call_value (gen_tls_get_addr (), ret,
930 			   LCT_CONST, Pmode, arg, Pmode);
931 
932   return ret;
933 }
934 
935 static rtx
legitimize_tls_address(rtx addr)936 legitimize_tls_address (rtx addr)
937 {
938   rtx ret, tmp, t1, t2, tp;
939   rtx_insn *insn;
940 
941   /* Currently, we can't handle anything but a SYMBOL_REF.  */
942   if (GET_CODE (addr) != SYMBOL_REF)
943     return addr;
944 
945   switch (SYMBOL_REF_TLS_MODEL (addr))
946     {
947       case TLS_MODEL_GLOBAL_DYNAMIC:
948 	tmp = gen_reg_rtx (Pmode);
949 	if (flag_pic)
950 	  emit_insn (gen_tgd_load_pic (tmp, addr));
951 	else
952 	  emit_insn (gen_tgd_load (tmp, addr));
953 	ret = hppa_tls_call (tmp);
954 	break;
955 
956       case TLS_MODEL_LOCAL_DYNAMIC:
957 	ret = gen_reg_rtx (Pmode);
958 	tmp = gen_reg_rtx (Pmode);
959 	start_sequence ();
960 	if (flag_pic)
961 	  emit_insn (gen_tld_load_pic (tmp, addr));
962 	else
963 	  emit_insn (gen_tld_load (tmp, addr));
964 	t1 = hppa_tls_call (tmp);
965 	insn = get_insns ();
966 	end_sequence ();
967 	t2 = gen_reg_rtx (Pmode);
968 	emit_libcall_block (insn, t2, t1,
969 			    gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
970 				            UNSPEC_TLSLDBASE));
971 	emit_insn (gen_tld_offset_load (ret, addr, t2));
972 	break;
973 
974       case TLS_MODEL_INITIAL_EXEC:
975 	tp = gen_reg_rtx (Pmode);
976 	tmp = gen_reg_rtx (Pmode);
977 	ret = gen_reg_rtx (Pmode);
978 	emit_insn (gen_tp_load (tp));
979 	if (flag_pic)
980 	  emit_insn (gen_tie_load_pic (tmp, addr));
981 	else
982 	  emit_insn (gen_tie_load (tmp, addr));
983 	emit_move_insn (ret, gen_rtx_PLUS (Pmode, tp, tmp));
984 	break;
985 
986       case TLS_MODEL_LOCAL_EXEC:
987 	tp = gen_reg_rtx (Pmode);
988 	ret = gen_reg_rtx (Pmode);
989 	emit_insn (gen_tp_load (tp));
990 	emit_insn (gen_tle_load (ret, addr, tp));
991 	break;
992 
993       default:
994 	gcc_unreachable ();
995     }
996 
997   return ret;
998 }
999 
1000 /* Helper for hppa_legitimize_address.  Given X, return true if it
1001    is a left shift by 1, 2 or 3 positions or a multiply by 2, 4 or 8.
1002 
1003    This respectively represent canonical shift-add rtxs or scaled
1004    memory addresses.  */
1005 static bool
mem_shadd_or_shadd_rtx_p(rtx x)1006 mem_shadd_or_shadd_rtx_p (rtx x)
1007 {
1008   return ((GET_CODE (x) == ASHIFT
1009 	   || GET_CODE (x) == MULT)
1010 	  && GET_CODE (XEXP (x, 1)) == CONST_INT
1011 	  && ((GET_CODE (x) == ASHIFT
1012 	       && pa_shadd_constant_p (INTVAL (XEXP (x, 1))))
1013 	      || (GET_CODE (x) == MULT
1014 		  && pa_mem_shadd_constant_p (INTVAL (XEXP (x, 1))))));
1015 }
1016 
1017 /* Try machine-dependent ways of modifying an illegitimate address
1018    to be legitimate.  If we find one, return the new, valid address.
1019    This macro is used in only one place: `memory_address' in explow.c.
1020 
1021    OLDX is the address as it was before break_out_memory_refs was called.
1022    In some cases it is useful to look at this to decide what needs to be done.
1023 
1024    It is always safe for this macro to do nothing.  It exists to recognize
1025    opportunities to optimize the output.
1026 
1027    For the PA, transform:
1028 
1029 	memory(X + <large int>)
1030 
1031    into:
1032 
1033 	if (<large int> & mask) >= 16
1034 	  Y = (<large int> & ~mask) + mask + 1	Round up.
1035 	else
1036 	  Y = (<large int> & ~mask)		Round down.
1037 	Z = X + Y
1038 	memory (Z + (<large int> - Y));
1039 
1040    This is for CSE to find several similar references, and only use one Z.
1041 
1042    X can either be a SYMBOL_REF or REG, but because combine cannot
1043    perform a 4->2 combination we do nothing for SYMBOL_REF + D where
1044    D will not fit in 14 bits.
1045 
1046    MODE_FLOAT references allow displacements which fit in 5 bits, so use
1047    0x1f as the mask.
1048 
1049    MODE_INT references allow displacements which fit in 14 bits, so use
1050    0x3fff as the mask.
1051 
1052    This relies on the fact that most mode MODE_FLOAT references will use FP
1053    registers and most mode MODE_INT references will use integer registers.
1054    (In the rare case of an FP register used in an integer MODE, we depend
1055    on secondary reloads to clean things up.)
1056 
1057 
1058    It is also beneficial to handle (plus (mult (X) (Y)) (Z)) in a special
1059    manner if Y is 2, 4, or 8.  (allows more shadd insns and shifted indexed
1060    addressing modes to be used).
1061 
1062    Note that the addresses passed into hppa_legitimize_address always
1063    come from a MEM, so we only have to match the MULT form on incoming
1064    addresses.  But to be future proof we also match the ASHIFT form.
1065 
1066    However, this routine always places those shift-add sequences into
1067    registers, so we have to generate the ASHIFT form as our output.
1068 
1069    Put X and Z into registers.  Then put the entire expression into
1070    a register.  */
1071 
1072 rtx
hppa_legitimize_address(rtx x,rtx oldx ATTRIBUTE_UNUSED,machine_mode mode)1073 hppa_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
1074 			 machine_mode mode)
1075 {
1076   rtx orig = x;
1077 
1078   /* We need to canonicalize the order of operands in unscaled indexed
1079      addresses since the code that checks if an address is valid doesn't
1080      always try both orders.  */
1081   if (!TARGET_NO_SPACE_REGS
1082       && GET_CODE (x) == PLUS
1083       && GET_MODE (x) == Pmode
1084       && REG_P (XEXP (x, 0))
1085       && REG_P (XEXP (x, 1))
1086       && REG_POINTER (XEXP (x, 0))
1087       && !REG_POINTER (XEXP (x, 1)))
1088     return gen_rtx_PLUS (Pmode, XEXP (x, 1), XEXP (x, 0));
1089 
1090   if (tls_referenced_p (x))
1091     return legitimize_tls_address (x);
1092   else if (flag_pic)
1093     return legitimize_pic_address (x, mode, gen_reg_rtx (Pmode));
1094 
1095   /* Strip off CONST.  */
1096   if (GET_CODE (x) == CONST)
1097     x = XEXP (x, 0);
1098 
1099   /* Special case.  Get the SYMBOL_REF into a register and use indexing.
1100      That should always be safe.  */
1101   if (GET_CODE (x) == PLUS
1102       && GET_CODE (XEXP (x, 0)) == REG
1103       && GET_CODE (XEXP (x, 1)) == SYMBOL_REF)
1104     {
1105       rtx reg = force_reg (Pmode, XEXP (x, 1));
1106       return force_reg (Pmode, gen_rtx_PLUS (Pmode, reg, XEXP (x, 0)));
1107     }
1108 
1109   /* Note we must reject symbols which represent function addresses
1110      since the assembler/linker can't handle arithmetic on plabels.  */
1111   if (GET_CODE (x) == PLUS
1112       && GET_CODE (XEXP (x, 1)) == CONST_INT
1113       && ((GET_CODE (XEXP (x, 0)) == SYMBOL_REF
1114 	   && !FUNCTION_NAME_P (XSTR (XEXP (x, 0), 0)))
1115 	  || GET_CODE (XEXP (x, 0)) == REG))
1116     {
1117       rtx int_part, ptr_reg;
1118       int newoffset;
1119       int offset = INTVAL (XEXP (x, 1));
1120       int mask;
1121 
1122       mask = (GET_MODE_CLASS (mode) == MODE_FLOAT
1123 	      && !INT14_OK_STRICT ? 0x1f : 0x3fff);
1124 
1125       /* Choose which way to round the offset.  Round up if we
1126 	 are >= halfway to the next boundary.  */
1127       if ((offset & mask) >= ((mask + 1) / 2))
1128 	newoffset = (offset & ~ mask) + mask + 1;
1129       else
1130 	newoffset = (offset & ~ mask);
1131 
1132       /* If the newoffset will not fit in 14 bits (ldo), then
1133 	 handling this would take 4 or 5 instructions (2 to load
1134 	 the SYMBOL_REF + 1 or 2 to load the newoffset + 1 to
1135 	 add the new offset and the SYMBOL_REF.)  Combine cannot
1136 	 handle 4->2 or 5->2 combinations, so do not create
1137 	 them.  */
1138       if (! VAL_14_BITS_P (newoffset)
1139 	  && GET_CODE (XEXP (x, 0)) == SYMBOL_REF)
1140 	{
1141 	  rtx const_part = plus_constant (Pmode, XEXP (x, 0), newoffset);
1142 	  rtx tmp_reg
1143 	    = force_reg (Pmode,
1144 			 gen_rtx_HIGH (Pmode, const_part));
1145 	  ptr_reg
1146 	    = force_reg (Pmode,
1147 			 gen_rtx_LO_SUM (Pmode,
1148 					 tmp_reg, const_part));
1149 	}
1150       else
1151 	{
1152 	  if (! VAL_14_BITS_P (newoffset))
1153 	    int_part = force_reg (Pmode, GEN_INT (newoffset));
1154 	  else
1155 	    int_part = GEN_INT (newoffset);
1156 
1157 	  ptr_reg = force_reg (Pmode,
1158 			       gen_rtx_PLUS (Pmode,
1159 					     force_reg (Pmode, XEXP (x, 0)),
1160 					     int_part));
1161 	}
1162       return plus_constant (Pmode, ptr_reg, offset - newoffset);
1163     }
1164 
1165   /* Handle (plus (mult (a) (mem_shadd_constant)) (b)).  */
1166 
1167   if (GET_CODE (x) == PLUS
1168       && mem_shadd_or_shadd_rtx_p (XEXP (x, 0))
1169       && (OBJECT_P (XEXP (x, 1))
1170 	  || GET_CODE (XEXP (x, 1)) == SUBREG)
1171       && GET_CODE (XEXP (x, 1)) != CONST)
1172     {
1173       /* If we were given a MULT, we must fix the constant
1174 	 as we're going to create the ASHIFT form.  */
1175       int shift_val = INTVAL (XEXP (XEXP (x, 0), 1));
1176       if (GET_CODE (XEXP (x, 0)) == MULT)
1177 	shift_val = exact_log2 (shift_val);
1178 
1179       rtx reg1, reg2;
1180       reg1 = XEXP (x, 1);
1181       if (GET_CODE (reg1) != REG)
1182 	reg1 = force_reg (Pmode, force_operand (reg1, 0));
1183 
1184       reg2 = XEXP (XEXP (x, 0), 0);
1185       if (GET_CODE (reg2) != REG)
1186         reg2 = force_reg (Pmode, force_operand (reg2, 0));
1187 
1188       return force_reg (Pmode,
1189 			gen_rtx_PLUS (Pmode,
1190 				      gen_rtx_ASHIFT (Pmode, reg2,
1191 						      GEN_INT (shift_val)),
1192 				      reg1));
1193     }
1194 
1195   /* Similarly for (plus (plus (mult (a) (mem_shadd_constant)) (b)) (c)).
1196 
1197      Only do so for floating point modes since this is more speculative
1198      and we lose if it's an integer store.  */
1199   if (GET_CODE (x) == PLUS
1200       && GET_CODE (XEXP (x, 0)) == PLUS
1201       && mem_shadd_or_shadd_rtx_p (XEXP (XEXP (x, 0), 0))
1202       && (mode == SFmode || mode == DFmode))
1203     {
1204       int shift_val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
1205 
1206       /* If we were given a MULT, we must fix the constant
1207 	 as we're going to create the ASHIFT form.  */
1208       if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT)
1209 	shift_val = exact_log2 (shift_val);
1210 
1211       /* Try and figure out what to use as a base register.  */
1212       rtx reg1, reg2, base, idx;
1213 
1214       reg1 = XEXP (XEXP (x, 0), 1);
1215       reg2 = XEXP (x, 1);
1216       base = NULL_RTX;
1217       idx = NULL_RTX;
1218 
1219       /* Make sure they're both regs.  If one was a SYMBOL_REF [+ const],
1220 	 then pa_emit_move_sequence will turn on REG_POINTER so we'll know
1221 	 it's a base register below.  */
1222       if (GET_CODE (reg1) != REG)
1223 	reg1 = force_reg (Pmode, force_operand (reg1, 0));
1224 
1225       if (GET_CODE (reg2) != REG)
1226 	reg2 = force_reg (Pmode, force_operand (reg2, 0));
1227 
1228       /* Figure out what the base and index are.  */
1229 
1230       if (GET_CODE (reg1) == REG
1231 	  && REG_POINTER (reg1))
1232 	{
1233 	  base = reg1;
1234 	  idx = gen_rtx_PLUS (Pmode,
1235 			      gen_rtx_ASHIFT (Pmode,
1236 					      XEXP (XEXP (XEXP (x, 0), 0), 0),
1237 					      GEN_INT (shift_val)),
1238 			      XEXP (x, 1));
1239 	}
1240       else if (GET_CODE (reg2) == REG
1241 	       && REG_POINTER (reg2))
1242 	{
1243 	  base = reg2;
1244 	  idx = XEXP (x, 0);
1245 	}
1246 
1247       if (base == 0)
1248 	return orig;
1249 
1250       /* If the index adds a large constant, try to scale the
1251 	 constant so that it can be loaded with only one insn.  */
1252       if (GET_CODE (XEXP (idx, 1)) == CONST_INT
1253 	  && VAL_14_BITS_P (INTVAL (XEXP (idx, 1))
1254 			    / INTVAL (XEXP (XEXP (idx, 0), 1)))
1255 	  && INTVAL (XEXP (idx, 1)) % INTVAL (XEXP (XEXP (idx, 0), 1)) == 0)
1256 	{
1257 	  /* Divide the CONST_INT by the scale factor, then add it to A.  */
1258 	  int val = INTVAL (XEXP (idx, 1));
1259 	  val /= (1 << shift_val);
1260 
1261 	  reg1 = XEXP (XEXP (idx, 0), 0);
1262 	  if (GET_CODE (reg1) != REG)
1263 	    reg1 = force_reg (Pmode, force_operand (reg1, 0));
1264 
1265 	  reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, reg1, GEN_INT (val)));
1266 
1267 	  /* We can now generate a simple scaled indexed address.  */
1268 	  return
1269 	    force_reg
1270 	      (Pmode, gen_rtx_PLUS (Pmode,
1271 				    gen_rtx_ASHIFT (Pmode, reg1,
1272 						    GEN_INT (shift_val)),
1273 				    base));
1274 	}
1275 
1276       /* If B + C is still a valid base register, then add them.  */
1277       if (GET_CODE (XEXP (idx, 1)) == CONST_INT
1278 	  && INTVAL (XEXP (idx, 1)) <= 4096
1279 	  && INTVAL (XEXP (idx, 1)) >= -4096)
1280 	{
1281 	  rtx reg1, reg2;
1282 
1283 	  reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, XEXP (idx, 1)));
1284 
1285 	  reg2 = XEXP (XEXP (idx, 0), 0);
1286 	  if (GET_CODE (reg2) != CONST_INT)
1287 	    reg2 = force_reg (Pmode, force_operand (reg2, 0));
1288 
1289 	  return force_reg (Pmode,
1290 			    gen_rtx_PLUS (Pmode,
1291 					  gen_rtx_ASHIFT (Pmode, reg2,
1292 							  GEN_INT (shift_val)),
1293 					  reg1));
1294 	}
1295 
1296       /* Get the index into a register, then add the base + index and
1297 	 return a register holding the result.  */
1298 
1299       /* First get A into a register.  */
1300       reg1 = XEXP (XEXP (idx, 0), 0);
1301       if (GET_CODE (reg1) != REG)
1302 	reg1 = force_reg (Pmode, force_operand (reg1, 0));
1303 
1304       /* And get B into a register.  */
1305       reg2 = XEXP (idx, 1);
1306       if (GET_CODE (reg2) != REG)
1307 	reg2 = force_reg (Pmode, force_operand (reg2, 0));
1308 
1309       reg1 = force_reg (Pmode,
1310 			gen_rtx_PLUS (Pmode,
1311 				      gen_rtx_ASHIFT (Pmode, reg1,
1312 						      GEN_INT (shift_val)),
1313 				      reg2));
1314 
1315       /* Add the result to our base register and return.  */
1316       return force_reg (Pmode, gen_rtx_PLUS (Pmode, base, reg1));
1317 
1318     }
1319 
1320   /* Uh-oh.  We might have an address for x[n-100000].  This needs
1321      special handling to avoid creating an indexed memory address
1322      with x-100000 as the base.
1323 
1324      If the constant part is small enough, then it's still safe because
1325      there is a guard page at the beginning and end of the data segment.
1326 
1327      Scaled references are common enough that we want to try and rearrange the
1328      terms so that we can use indexing for these addresses too.  Only
1329      do the optimization for floatint point modes.  */
1330 
1331   if (GET_CODE (x) == PLUS
1332       && pa_symbolic_expression_p (XEXP (x, 1)))
1333     {
1334       /* Ugly.  We modify things here so that the address offset specified
1335 	 by the index expression is computed first, then added to x to form
1336 	 the entire address.  */
1337 
1338       rtx regx1, regx2, regy1, regy2, y;
1339 
1340       /* Strip off any CONST.  */
1341       y = XEXP (x, 1);
1342       if (GET_CODE (y) == CONST)
1343 	y = XEXP (y, 0);
1344 
1345       if (GET_CODE (y) == PLUS || GET_CODE (y) == MINUS)
1346 	{
1347 	  /* See if this looks like
1348 		(plus (mult (reg) (mem_shadd_const))
1349 		      (const (plus (symbol_ref) (const_int))))
1350 
1351 	     Where const_int is small.  In that case the const
1352 	     expression is a valid pointer for indexing.
1353 
1354 	     If const_int is big, but can be divided evenly by shadd_const
1355 	     and added to (reg).  This allows more scaled indexed addresses.  */
1356 	  if (GET_CODE (XEXP (y, 0)) == SYMBOL_REF
1357 	      && mem_shadd_or_shadd_rtx_p (XEXP (x, 0))
1358 	      && GET_CODE (XEXP (y, 1)) == CONST_INT
1359 	      && INTVAL (XEXP (y, 1)) >= -4096
1360 	      && INTVAL (XEXP (y, 1)) <= 4095)
1361 	    {
1362 	      int shift_val = INTVAL (XEXP (XEXP (x, 0), 1));
1363 
1364 	      /* If we were given a MULT, we must fix the constant
1365 		 as we're going to create the ASHIFT form.  */
1366 	      if (GET_CODE (XEXP (x, 0)) == MULT)
1367 		shift_val = exact_log2 (shift_val);
1368 
1369 	      rtx reg1, reg2;
1370 
1371 	      reg1 = XEXP (x, 1);
1372 	      if (GET_CODE (reg1) != REG)
1373 		reg1 = force_reg (Pmode, force_operand (reg1, 0));
1374 
1375 	      reg2 = XEXP (XEXP (x, 0), 0);
1376 	      if (GET_CODE (reg2) != REG)
1377 	        reg2 = force_reg (Pmode, force_operand (reg2, 0));
1378 
1379 	      return
1380 		force_reg (Pmode,
1381 			   gen_rtx_PLUS (Pmode,
1382 					 gen_rtx_ASHIFT (Pmode,
1383 							 reg2,
1384 							 GEN_INT (shift_val)),
1385 					 reg1));
1386 	    }
1387 	  else if ((mode == DFmode || mode == SFmode)
1388 		   && GET_CODE (XEXP (y, 0)) == SYMBOL_REF
1389 		   && mem_shadd_or_shadd_rtx_p (XEXP (x, 0))
1390 		   && GET_CODE (XEXP (y, 1)) == CONST_INT
1391 		   && INTVAL (XEXP (y, 1)) % (1 << INTVAL (XEXP (XEXP (x, 0), 1))) == 0)
1392 	    {
1393 	      int shift_val = INTVAL (XEXP (XEXP (x, 0), 1));
1394 
1395 	      /* If we were given a MULT, we must fix the constant
1396 		 as we're going to create the ASHIFT form.  */
1397 	      if (GET_CODE (XEXP (x, 0)) == MULT)
1398 		shift_val = exact_log2 (shift_val);
1399 
1400 	      regx1
1401 		= force_reg (Pmode, GEN_INT (INTVAL (XEXP (y, 1))
1402 					     / INTVAL (XEXP (XEXP (x, 0), 1))));
1403 	      regx2 = XEXP (XEXP (x, 0), 0);
1404 	      if (GET_CODE (regx2) != REG)
1405 		regx2 = force_reg (Pmode, force_operand (regx2, 0));
1406 	      regx2 = force_reg (Pmode, gen_rtx_fmt_ee (GET_CODE (y), Pmode,
1407 							regx2, regx1));
1408 	      return
1409 		force_reg (Pmode,
1410 			   gen_rtx_PLUS (Pmode,
1411 					 gen_rtx_ASHIFT (Pmode, regx2,
1412 						         GEN_INT (shift_val)),
1413 					 force_reg (Pmode, XEXP (y, 0))));
1414 	    }
1415 	  else if (GET_CODE (XEXP (y, 1)) == CONST_INT
1416 		   && INTVAL (XEXP (y, 1)) >= -4096
1417 		   && INTVAL (XEXP (y, 1)) <= 4095)
1418 	    {
1419 	      /* This is safe because of the guard page at the
1420 		 beginning and end of the data space.  Just
1421 		 return the original address.  */
1422 	      return orig;
1423 	    }
1424 	  else
1425 	    {
1426 	      /* Doesn't look like one we can optimize.  */
1427 	      regx1 = force_reg (Pmode, force_operand (XEXP (x, 0), 0));
1428 	      regy1 = force_reg (Pmode, force_operand (XEXP (y, 0), 0));
1429 	      regy2 = force_reg (Pmode, force_operand (XEXP (y, 1), 0));
1430 	      regx1 = force_reg (Pmode,
1431 				 gen_rtx_fmt_ee (GET_CODE (y), Pmode,
1432 						 regx1, regy2));
1433 	      return force_reg (Pmode, gen_rtx_PLUS (Pmode, regx1, regy1));
1434 	    }
1435 	}
1436     }
1437 
1438   return orig;
1439 }
1440 
1441 /* Implement the TARGET_REGISTER_MOVE_COST hook.
1442 
1443    Compute extra cost of moving data between one register class
1444    and another.
1445 
1446    Make moves from SAR so expensive they should never happen.  We used to
1447    have 0xffff here, but that generates overflow in rare cases.
1448 
1449    Copies involving a FP register and a non-FP register are relatively
1450    expensive because they must go through memory.
1451 
1452    Other copies are reasonably cheap.  */
1453 
1454 static int
hppa_register_move_cost(machine_mode mode ATTRIBUTE_UNUSED,reg_class_t from,reg_class_t to)1455 hppa_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
1456 			 reg_class_t from, reg_class_t to)
1457 {
1458   if (from == SHIFT_REGS)
1459     return 0x100;
1460   else if (to == SHIFT_REGS && FP_REG_CLASS_P (from))
1461     return 18;
1462   else if ((FP_REG_CLASS_P (from) && ! FP_REG_CLASS_P (to))
1463            || (FP_REG_CLASS_P (to) && ! FP_REG_CLASS_P (from)))
1464     return 16;
1465   else
1466     return 2;
1467 }
1468 
1469 /* For the HPPA, REG and REG+CONST is cost 0
1470    and addresses involving symbolic constants are cost 2.
1471 
1472    PIC addresses are very expensive.
1473 
1474    It is no coincidence that this has the same structure
1475    as pa_legitimate_address_p.  */
1476 
1477 static int
hppa_address_cost(rtx X,machine_mode mode ATTRIBUTE_UNUSED,addr_space_t as ATTRIBUTE_UNUSED,bool speed ATTRIBUTE_UNUSED)1478 hppa_address_cost (rtx X, machine_mode mode ATTRIBUTE_UNUSED,
1479 		   addr_space_t as ATTRIBUTE_UNUSED,
1480 		   bool speed ATTRIBUTE_UNUSED)
1481 {
1482   switch (GET_CODE (X))
1483     {
1484     case REG:
1485     case PLUS:
1486     case LO_SUM:
1487       return 1;
1488     case HIGH:
1489       return 2;
1490     default:
1491       return 4;
1492     }
1493 }
1494 
1495 /* Compute a (partial) cost for rtx X.  Return true if the complete
1496    cost has been computed, and false if subexpressions should be
1497    scanned.  In either case, *TOTAL contains the cost result.  */
1498 
1499 static bool
hppa_rtx_costs(rtx x,machine_mode mode,int outer_code,int opno ATTRIBUTE_UNUSED,int * total,bool speed ATTRIBUTE_UNUSED)1500 hppa_rtx_costs (rtx x, machine_mode mode, int outer_code,
1501 		int opno ATTRIBUTE_UNUSED,
1502 		int *total, bool speed ATTRIBUTE_UNUSED)
1503 {
1504   int factor;
1505   int code = GET_CODE (x);
1506 
1507   switch (code)
1508     {
1509     case CONST_INT:
1510       if (INTVAL (x) == 0)
1511 	*total = 0;
1512       else if (INT_14_BITS (x))
1513 	*total = 1;
1514       else
1515 	*total = 2;
1516       return true;
1517 
1518     case HIGH:
1519       *total = 2;
1520       return true;
1521 
1522     case CONST:
1523     case LABEL_REF:
1524     case SYMBOL_REF:
1525       *total = 4;
1526       return true;
1527 
1528     case CONST_DOUBLE:
1529       if ((x == CONST0_RTX (DFmode) || x == CONST0_RTX (SFmode))
1530 	  && outer_code != SET)
1531 	*total = 0;
1532       else
1533         *total = 8;
1534       return true;
1535 
1536     case MULT:
1537       if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1538 	{
1539 	  *total = COSTS_N_INSNS (3);
1540 	  return true;
1541 	}
1542 
1543       /* A mode size N times larger than SImode needs O(N*N) more insns.  */
1544       factor = GET_MODE_SIZE (mode) / 4;
1545       if (factor == 0)
1546 	factor = 1;
1547 
1548       if (TARGET_PA_11 && !TARGET_DISABLE_FPREGS && !TARGET_SOFT_FLOAT)
1549 	*total = factor * factor * COSTS_N_INSNS (8);
1550       else
1551 	*total = factor * factor * COSTS_N_INSNS (20);
1552       return true;
1553 
1554     case DIV:
1555       if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1556 	{
1557 	  *total = COSTS_N_INSNS (14);
1558 	  return true;
1559 	}
1560       /* FALLTHRU */
1561 
1562     case UDIV:
1563     case MOD:
1564     case UMOD:
1565       /* A mode size N times larger than SImode needs O(N*N) more insns.  */
1566       factor = GET_MODE_SIZE (mode) / 4;
1567       if (factor == 0)
1568 	factor = 1;
1569 
1570       *total = factor * factor * COSTS_N_INSNS (60);
1571       return true;
1572 
1573     case PLUS: /* this includes shNadd insns */
1574     case MINUS:
1575       if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1576 	{
1577 	  *total = COSTS_N_INSNS (3);
1578 	  return true;
1579 	}
1580 
1581       /* A size N times larger than UNITS_PER_WORD needs N times as
1582 	 many insns, taking N times as long.  */
1583       factor = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
1584       if (factor == 0)
1585 	factor = 1;
1586       *total = factor * COSTS_N_INSNS (1);
1587       return true;
1588 
1589     case ASHIFT:
1590     case ASHIFTRT:
1591     case LSHIFTRT:
1592       *total = COSTS_N_INSNS (1);
1593       return true;
1594 
1595     default:
1596       return false;
1597     }
1598 }
1599 
1600 /* Ensure mode of ORIG, a REG rtx, is MODE.  Returns either ORIG or a
1601    new rtx with the correct mode.  */
1602 static inline rtx
force_mode(machine_mode mode,rtx orig)1603 force_mode (machine_mode mode, rtx orig)
1604 {
1605   if (mode == GET_MODE (orig))
1606     return orig;
1607 
1608   gcc_assert (REGNO (orig) < FIRST_PSEUDO_REGISTER);
1609 
1610   return gen_rtx_REG (mode, REGNO (orig));
1611 }
1612 
1613 /* Implement TARGET_CANNOT_FORCE_CONST_MEM.  */
1614 
1615 static bool
pa_cannot_force_const_mem(machine_mode mode ATTRIBUTE_UNUSED,rtx x)1616 pa_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
1617 {
1618   return tls_referenced_p (x);
1619 }
1620 
1621 /* Emit insns to move operands[1] into operands[0].
1622 
1623    Return 1 if we have written out everything that needs to be done to
1624    do the move.  Otherwise, return 0 and the caller will emit the move
1625    normally.
1626 
1627    Note SCRATCH_REG may not be in the proper mode depending on how it
1628    will be used.  This routine is responsible for creating a new copy
1629    of SCRATCH_REG in the proper mode.  */
1630 
1631 int
pa_emit_move_sequence(rtx * operands,machine_mode mode,rtx scratch_reg)1632 pa_emit_move_sequence (rtx *operands, machine_mode mode, rtx scratch_reg)
1633 {
1634   register rtx operand0 = operands[0];
1635   register rtx operand1 = operands[1];
1636   register rtx tem;
1637 
1638   /* We can only handle indexed addresses in the destination operand
1639      of floating point stores.  Thus, we need to break out indexed
1640      addresses from the destination operand.  */
1641   if (GET_CODE (operand0) == MEM && IS_INDEX_ADDR_P (XEXP (operand0, 0)))
1642     {
1643       gcc_assert (can_create_pseudo_p ());
1644 
1645       tem = copy_to_mode_reg (Pmode, XEXP (operand0, 0));
1646       operand0 = replace_equiv_address (operand0, tem);
1647     }
1648 
1649   /* On targets with non-equivalent space registers, break out unscaled
1650      indexed addresses from the source operand before the final CSE.
1651      We have to do this because the REG_POINTER flag is not correctly
1652      carried through various optimization passes and CSE may substitute
1653      a pseudo without the pointer set for one with the pointer set.  As
1654      a result, we loose various opportunities to create insns with
1655      unscaled indexed addresses.  */
1656   if (!TARGET_NO_SPACE_REGS
1657       && !cse_not_expected
1658       && GET_CODE (operand1) == MEM
1659       && GET_CODE (XEXP (operand1, 0)) == PLUS
1660       && REG_P (XEXP (XEXP (operand1, 0), 0))
1661       && REG_P (XEXP (XEXP (operand1, 0), 1)))
1662     operand1
1663       = replace_equiv_address (operand1,
1664 			       copy_to_mode_reg (Pmode, XEXP (operand1, 0)));
1665 
1666   if (scratch_reg
1667       && reload_in_progress && GET_CODE (operand0) == REG
1668       && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
1669     operand0 = reg_equiv_mem (REGNO (operand0));
1670   else if (scratch_reg
1671 	   && reload_in_progress && GET_CODE (operand0) == SUBREG
1672 	   && GET_CODE (SUBREG_REG (operand0)) == REG
1673 	   && REGNO (SUBREG_REG (operand0)) >= FIRST_PSEUDO_REGISTER)
1674     {
1675      /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1676 	the code which tracks sets/uses for delete_output_reload.  */
1677       rtx temp = gen_rtx_SUBREG (GET_MODE (operand0),
1678 				 reg_equiv_mem (REGNO (SUBREG_REG (operand0))),
1679 				 SUBREG_BYTE (operand0));
1680       operand0 = alter_subreg (&temp, true);
1681     }
1682 
1683   if (scratch_reg
1684       && reload_in_progress && GET_CODE (operand1) == REG
1685       && REGNO (operand1) >= FIRST_PSEUDO_REGISTER)
1686     operand1 = reg_equiv_mem (REGNO (operand1));
1687   else if (scratch_reg
1688 	   && reload_in_progress && GET_CODE (operand1) == SUBREG
1689 	   && GET_CODE (SUBREG_REG (operand1)) == REG
1690 	   && REGNO (SUBREG_REG (operand1)) >= FIRST_PSEUDO_REGISTER)
1691     {
1692      /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1693 	the code which tracks sets/uses for delete_output_reload.  */
1694       rtx temp = gen_rtx_SUBREG (GET_MODE (operand1),
1695 				 reg_equiv_mem (REGNO (SUBREG_REG (operand1))),
1696 				 SUBREG_BYTE (operand1));
1697       operand1 = alter_subreg (&temp, true);
1698     }
1699 
1700   if (scratch_reg && reload_in_progress && GET_CODE (operand0) == MEM
1701       && ((tem = find_replacement (&XEXP (operand0, 0)))
1702 	  != XEXP (operand0, 0)))
1703     operand0 = replace_equiv_address (operand0, tem);
1704 
1705   if (scratch_reg && reload_in_progress && GET_CODE (operand1) == MEM
1706       && ((tem = find_replacement (&XEXP (operand1, 0)))
1707 	  != XEXP (operand1, 0)))
1708     operand1 = replace_equiv_address (operand1, tem);
1709 
1710   /* Handle secondary reloads for loads/stores of FP registers from
1711      REG+D addresses where D does not fit in 5 or 14 bits, including
1712      (subreg (mem (addr))) cases, and reloads for other unsupported
1713      memory operands.  */
1714   if (scratch_reg
1715       && FP_REG_P (operand0)
1716       && (MEM_P (operand1)
1717 	  || (GET_CODE (operand1) == SUBREG
1718 	      && MEM_P (XEXP (operand1, 0)))))
1719     {
1720       rtx op1 = operand1;
1721 
1722       if (GET_CODE (op1) == SUBREG)
1723 	op1 = XEXP (op1, 0);
1724 
1725       if (reg_plus_base_memory_operand (op1, GET_MODE (op1)))
1726 	{
1727 	  if (!(TARGET_PA_20
1728 		&& !TARGET_ELF32
1729 		&& INT_14_BITS (XEXP (XEXP (op1, 0), 1)))
1730 	      && !INT_5_BITS (XEXP (XEXP (op1, 0), 1)))
1731 	    {
1732 	      /* SCRATCH_REG will hold an address and maybe the actual data.
1733 		 We want it in WORD_MODE regardless of what mode it was
1734 		 originally given to us.  */
1735 	      scratch_reg = force_mode (word_mode, scratch_reg);
1736 
1737 	      /* D might not fit in 14 bits either; for such cases load D
1738 		 into scratch reg.  */
1739 	      if (!INT_14_BITS (XEXP (XEXP (op1, 0), 1)))
1740 		{
1741 		  emit_move_insn (scratch_reg, XEXP (XEXP (op1, 0), 1));
1742 		  emit_move_insn (scratch_reg,
1743 				  gen_rtx_fmt_ee (GET_CODE (XEXP (op1, 0)),
1744 						  Pmode,
1745 						  XEXP (XEXP (op1, 0), 0),
1746 						  scratch_reg));
1747 		}
1748 	      else
1749 		emit_move_insn (scratch_reg, XEXP (op1, 0));
1750 	      op1 = replace_equiv_address (op1, scratch_reg);
1751 	    }
1752 	}
1753       else if ((!INT14_OK_STRICT && symbolic_memory_operand (op1, VOIDmode))
1754 	       || IS_LO_SUM_DLT_ADDR_P (XEXP (op1, 0))
1755 	       || IS_INDEX_ADDR_P (XEXP (op1, 0)))
1756 	{
1757 	  /* Load memory address into SCRATCH_REG.  */
1758 	  scratch_reg = force_mode (word_mode, scratch_reg);
1759 	  emit_move_insn (scratch_reg, XEXP (op1, 0));
1760 	  op1 = replace_equiv_address (op1, scratch_reg);
1761 	}
1762       emit_insn (gen_rtx_SET (operand0, op1));
1763       return 1;
1764     }
1765   else if (scratch_reg
1766 	   && FP_REG_P (operand1)
1767 	   && (MEM_P (operand0)
1768 	       || (GET_CODE (operand0) == SUBREG
1769 		   && MEM_P (XEXP (operand0, 0)))))
1770     {
1771       rtx op0 = operand0;
1772 
1773       if (GET_CODE (op0) == SUBREG)
1774 	op0 = XEXP (op0, 0);
1775 
1776       if (reg_plus_base_memory_operand (op0, GET_MODE (op0)))
1777 	{
1778 	  if (!(TARGET_PA_20
1779 		&& !TARGET_ELF32
1780 		&& INT_14_BITS (XEXP (XEXP (op0, 0), 1)))
1781 	      && !INT_5_BITS (XEXP (XEXP (op0, 0), 1)))
1782 	    {
1783 	      /* SCRATCH_REG will hold an address and maybe the actual data.
1784 		 We want it in WORD_MODE regardless of what mode it was
1785 		 originally given to us.  */
1786 	      scratch_reg = force_mode (word_mode, scratch_reg);
1787 
1788 	      /* D might not fit in 14 bits either; for such cases load D
1789 		 into scratch reg.  */
1790 	      if (!INT_14_BITS (XEXP (XEXP (op0, 0), 1)))
1791 		{
1792 		  emit_move_insn (scratch_reg, XEXP (XEXP (op0, 0), 1));
1793 		  emit_move_insn (scratch_reg,
1794 				  gen_rtx_fmt_ee (GET_CODE (XEXP (op0, 0)),
1795 						  Pmode,
1796 						  XEXP (XEXP (op0, 0), 0),
1797 						  scratch_reg));
1798 		}
1799 	      else
1800 		emit_move_insn (scratch_reg, XEXP (op0, 0));
1801 	      op0 = replace_equiv_address (op0, scratch_reg);
1802 	    }
1803 	}
1804       else if ((!INT14_OK_STRICT && symbolic_memory_operand (op0, VOIDmode))
1805 	       || IS_LO_SUM_DLT_ADDR_P (XEXP (op0, 0))
1806 	       || IS_INDEX_ADDR_P (XEXP (op0, 0)))
1807 	{
1808 	  /* Load memory address into SCRATCH_REG.  */
1809 	  scratch_reg = force_mode (word_mode, scratch_reg);
1810 	  emit_move_insn (scratch_reg, XEXP (op0, 0));
1811 	  op0 = replace_equiv_address (op0, scratch_reg);
1812 	}
1813       emit_insn (gen_rtx_SET (op0, operand1));
1814       return 1;
1815     }
1816   /* Handle secondary reloads for loads of FP registers from constant
1817      expressions by forcing the constant into memory.  For the most part,
1818      this is only necessary for SImode and DImode.
1819 
1820      Use scratch_reg to hold the address of the memory location.  */
1821   else if (scratch_reg
1822 	   && CONSTANT_P (operand1)
1823 	   && FP_REG_P (operand0))
1824     {
1825       rtx const_mem, xoperands[2];
1826 
1827       if (operand1 == CONST0_RTX (mode))
1828 	{
1829 	  emit_insn (gen_rtx_SET (operand0, operand1));
1830 	  return 1;
1831 	}
1832 
1833       /* SCRATCH_REG will hold an address and maybe the actual data.  We want
1834 	 it in WORD_MODE regardless of what mode it was originally given
1835 	 to us.  */
1836       scratch_reg = force_mode (word_mode, scratch_reg);
1837 
1838       /* Force the constant into memory and put the address of the
1839 	 memory location into scratch_reg.  */
1840       const_mem = force_const_mem (mode, operand1);
1841       xoperands[0] = scratch_reg;
1842       xoperands[1] = XEXP (const_mem, 0);
1843       pa_emit_move_sequence (xoperands, Pmode, 0);
1844 
1845       /* Now load the destination register.  */
1846       emit_insn (gen_rtx_SET (operand0,
1847 			      replace_equiv_address (const_mem, scratch_reg)));
1848       return 1;
1849     }
1850   /* Handle secondary reloads for SAR.  These occur when trying to load
1851      the SAR from memory or a constant.  */
1852   else if (scratch_reg
1853 	   && GET_CODE (operand0) == REG
1854 	   && REGNO (operand0) < FIRST_PSEUDO_REGISTER
1855 	   && REGNO_REG_CLASS (REGNO (operand0)) == SHIFT_REGS
1856 	   && (GET_CODE (operand1) == MEM || GET_CODE (operand1) == CONST_INT))
1857     {
1858       /* D might not fit in 14 bits either; for such cases load D into
1859 	 scratch reg.  */
1860       if (GET_CODE (operand1) == MEM
1861 	  && !memory_address_p (GET_MODE (operand0), XEXP (operand1, 0)))
1862 	{
1863 	  /* We are reloading the address into the scratch register, so we
1864 	     want to make sure the scratch register is a full register.  */
1865 	  scratch_reg = force_mode (word_mode, scratch_reg);
1866 
1867 	  emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1));
1868 	  emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand1,
1869 								        0)),
1870 						       Pmode,
1871 						       XEXP (XEXP (operand1, 0),
1872 						       0),
1873 						       scratch_reg));
1874 
1875 	  /* Now we are going to load the scratch register from memory,
1876 	     we want to load it in the same width as the original MEM,
1877 	     which must be the same as the width of the ultimate destination,
1878 	     OPERAND0.  */
1879 	  scratch_reg = force_mode (GET_MODE (operand0), scratch_reg);
1880 
1881 	  emit_move_insn (scratch_reg,
1882 			  replace_equiv_address (operand1, scratch_reg));
1883 	}
1884       else
1885 	{
1886 	  /* We want to load the scratch register using the same mode as
1887 	     the ultimate destination.  */
1888 	  scratch_reg = force_mode (GET_MODE (operand0), scratch_reg);
1889 
1890 	  emit_move_insn (scratch_reg, operand1);
1891 	}
1892 
1893       /* And emit the insn to set the ultimate destination.  We know that
1894 	 the scratch register has the same mode as the destination at this
1895 	 point.  */
1896       emit_move_insn (operand0, scratch_reg);
1897       return 1;
1898     }
1899 
1900   /* Handle the most common case: storing into a register.  */
1901   if (register_operand (operand0, mode))
1902     {
1903       /* Legitimize TLS symbol references.  This happens for references
1904 	 that aren't a legitimate constant.  */
1905       if (PA_SYMBOL_REF_TLS_P (operand1))
1906 	operand1 = legitimize_tls_address (operand1);
1907 
1908       if (register_operand (operand1, mode)
1909 	  || (GET_CODE (operand1) == CONST_INT
1910 	      && pa_cint_ok_for_move (UINTVAL (operand1)))
1911 	  || (operand1 == CONST0_RTX (mode))
1912 	  || (GET_CODE (operand1) == HIGH
1913 	      && !symbolic_operand (XEXP (operand1, 0), VOIDmode))
1914 	  /* Only `general_operands' can come here, so MEM is ok.  */
1915 	  || GET_CODE (operand1) == MEM)
1916 	{
1917 	  /* Various sets are created during RTL generation which don't
1918 	     have the REG_POINTER flag correctly set.  After the CSE pass,
1919 	     instruction recognition can fail if we don't consistently
1920 	     set this flag when performing register copies.  This should
1921 	     also improve the opportunities for creating insns that use
1922 	     unscaled indexing.  */
1923 	  if (REG_P (operand0) && REG_P (operand1))
1924 	    {
1925 	      if (REG_POINTER (operand1)
1926 		  && !REG_POINTER (operand0)
1927 		  && !HARD_REGISTER_P (operand0))
1928 		copy_reg_pointer (operand0, operand1);
1929 	    }
1930 
1931 	  /* When MEMs are broken out, the REG_POINTER flag doesn't
1932 	     get set.  In some cases, we can set the REG_POINTER flag
1933 	     from the declaration for the MEM.  */
1934 	  if (REG_P (operand0)
1935 	      && GET_CODE (operand1) == MEM
1936 	      && !REG_POINTER (operand0))
1937 	    {
1938 	      tree decl = MEM_EXPR (operand1);
1939 
1940 	      /* Set the register pointer flag and register alignment
1941 		 if the declaration for this memory reference is a
1942 		 pointer type.  */
1943 	      if (decl)
1944 		{
1945 		  tree type;
1946 
1947 		  /* If this is a COMPONENT_REF, use the FIELD_DECL from
1948 		     tree operand 1.  */
1949 		  if (TREE_CODE (decl) == COMPONENT_REF)
1950 		    decl = TREE_OPERAND (decl, 1);
1951 
1952 		  type = TREE_TYPE (decl);
1953 		  type = strip_array_types (type);
1954 
1955 		  if (POINTER_TYPE_P (type))
1956 		    mark_reg_pointer (operand0, BITS_PER_UNIT);
1957 		}
1958 	    }
1959 
1960 	  emit_insn (gen_rtx_SET (operand0, operand1));
1961 	  return 1;
1962 	}
1963     }
1964   else if (GET_CODE (operand0) == MEM)
1965     {
1966       if (mode == DFmode && operand1 == CONST0_RTX (mode)
1967 	  && !(reload_in_progress || reload_completed))
1968 	{
1969 	  rtx temp = gen_reg_rtx (DFmode);
1970 
1971 	  emit_insn (gen_rtx_SET (temp, operand1));
1972 	  emit_insn (gen_rtx_SET (operand0, temp));
1973 	  return 1;
1974 	}
1975       if (register_operand (operand1, mode) || operand1 == CONST0_RTX (mode))
1976 	{
1977 	  /* Run this case quickly.  */
1978 	  emit_insn (gen_rtx_SET (operand0, operand1));
1979 	  return 1;
1980 	}
1981       if (! (reload_in_progress || reload_completed))
1982 	{
1983 	  operands[0] = validize_mem (operand0);
1984 	  operands[1] = operand1 = force_reg (mode, operand1);
1985 	}
1986     }
1987 
1988   /* Simplify the source if we need to.
1989      Note we do have to handle function labels here, even though we do
1990      not consider them legitimate constants.  Loop optimizations can
1991      call the emit_move_xxx with one as a source.  */
1992   if ((GET_CODE (operand1) != HIGH && immediate_operand (operand1, mode))
1993       || (GET_CODE (operand1) == HIGH
1994 	  && symbolic_operand (XEXP (operand1, 0), mode))
1995       || function_label_operand (operand1, VOIDmode)
1996       || tls_referenced_p (operand1))
1997     {
1998       int ishighonly = 0;
1999 
2000       if (GET_CODE (operand1) == HIGH)
2001 	{
2002 	  ishighonly = 1;
2003 	  operand1 = XEXP (operand1, 0);
2004 	}
2005       if (symbolic_operand (operand1, mode))
2006 	{
2007 	  /* Argh.  The assembler and linker can't handle arithmetic
2008 	     involving plabels.
2009 
2010 	     So we force the plabel into memory, load operand0 from
2011 	     the memory location, then add in the constant part.  */
2012 	  if ((GET_CODE (operand1) == CONST
2013 	       && GET_CODE (XEXP (operand1, 0)) == PLUS
2014 	       && function_label_operand (XEXP (XEXP (operand1, 0), 0),
2015 					  VOIDmode))
2016 	      || function_label_operand (operand1, VOIDmode))
2017 	    {
2018 	      rtx temp, const_part;
2019 
2020 	      /* Figure out what (if any) scratch register to use.  */
2021 	      if (reload_in_progress || reload_completed)
2022 		{
2023 		  scratch_reg = scratch_reg ? scratch_reg : operand0;
2024 		  /* SCRATCH_REG will hold an address and maybe the actual
2025 		     data.  We want it in WORD_MODE regardless of what mode it
2026 		     was originally given to us.  */
2027 		  scratch_reg = force_mode (word_mode, scratch_reg);
2028 		}
2029 	      else if (flag_pic)
2030 		scratch_reg = gen_reg_rtx (Pmode);
2031 
2032 	      if (GET_CODE (operand1) == CONST)
2033 		{
2034 		  /* Save away the constant part of the expression.  */
2035 		  const_part = XEXP (XEXP (operand1, 0), 1);
2036 		  gcc_assert (GET_CODE (const_part) == CONST_INT);
2037 
2038 		  /* Force the function label into memory.  */
2039 		  temp = force_const_mem (mode, XEXP (XEXP (operand1, 0), 0));
2040 		}
2041 	      else
2042 		{
2043 		  /* No constant part.  */
2044 		  const_part = NULL_RTX;
2045 
2046 		  /* Force the function label into memory.  */
2047 		  temp = force_const_mem (mode, operand1);
2048 		}
2049 
2050 
2051 	      /* Get the address of the memory location.  PIC-ify it if
2052 		 necessary.  */
2053 	      temp = XEXP (temp, 0);
2054 	      if (flag_pic)
2055 		temp = legitimize_pic_address (temp, mode, scratch_reg);
2056 
2057 	      /* Put the address of the memory location into our destination
2058 		 register.  */
2059 	      operands[1] = temp;
2060 	      pa_emit_move_sequence (operands, mode, scratch_reg);
2061 
2062 	      /* Now load from the memory location into our destination
2063 		 register.  */
2064 	      operands[1] = gen_rtx_MEM (Pmode, operands[0]);
2065 	      pa_emit_move_sequence (operands, mode, scratch_reg);
2066 
2067 	      /* And add back in the constant part.  */
2068 	      if (const_part != NULL_RTX)
2069 		expand_inc (operand0, const_part);
2070 
2071 	      return 1;
2072 	    }
2073 
2074 	  if (flag_pic)
2075 	    {
2076 	      rtx_insn *insn;
2077 	      rtx temp;
2078 
2079 	      if (reload_in_progress || reload_completed)
2080 		{
2081 		  temp = scratch_reg ? scratch_reg : operand0;
2082 		  /* TEMP will hold an address and maybe the actual
2083 		     data.  We want it in WORD_MODE regardless of what mode it
2084 		     was originally given to us.  */
2085 		  temp = force_mode (word_mode, temp);
2086 		}
2087 	      else
2088 		temp = gen_reg_rtx (Pmode);
2089 
2090 	      /* Force (const (plus (symbol) (const_int))) to memory
2091 	         if the const_int will not fit in 14 bits.  Although
2092 		 this requires a relocation, the instruction sequence
2093 		 needed to load the value is shorter.  */
2094 	      if (GET_CODE (operand1) == CONST
2095 		       && GET_CODE (XEXP (operand1, 0)) == PLUS
2096 		       && GET_CODE (XEXP (XEXP (operand1, 0), 1)) == CONST_INT
2097 		       && !INT_14_BITS (XEXP (XEXP (operand1, 0), 1)))
2098 		{
2099 		  rtx x, m = force_const_mem (mode, operand1);
2100 
2101 		  x = legitimize_pic_address (XEXP (m, 0), mode, temp);
2102 		  x = replace_equiv_address (m, x);
2103 		  insn = emit_move_insn (operand0, x);
2104 		}
2105 	      else
2106 		{
2107 		  operands[1] = legitimize_pic_address (operand1, mode, temp);
2108 		  if (REG_P (operand0) && REG_P (operands[1]))
2109 		    copy_reg_pointer (operand0, operands[1]);
2110 		  insn = emit_move_insn (operand0, operands[1]);
2111 		}
2112 
2113 	      /* Put a REG_EQUAL note on this insn.  */
2114 	      set_unique_reg_note (insn, REG_EQUAL, operand1);
2115 	    }
2116 	  /* On the HPPA, references to data space are supposed to use dp,
2117 	     register 27, but showing it in the RTL inhibits various cse
2118 	     and loop optimizations.  */
2119 	  else
2120 	    {
2121 	      rtx temp, set;
2122 
2123 	      if (reload_in_progress || reload_completed)
2124 		{
2125 		  temp = scratch_reg ? scratch_reg : operand0;
2126 		  /* TEMP will hold an address and maybe the actual
2127 		     data.  We want it in WORD_MODE regardless of what mode it
2128 		     was originally given to us.  */
2129 		  temp = force_mode (word_mode, temp);
2130 		}
2131 	      else
2132 		temp = gen_reg_rtx (mode);
2133 
2134 	      /* Loading a SYMBOL_REF into a register makes that register
2135 		 safe to be used as the base in an indexed address.
2136 
2137 		 Don't mark hard registers though.  That loses.  */
2138 	      if (GET_CODE (operand0) == REG
2139 		  && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
2140 		mark_reg_pointer (operand0, BITS_PER_UNIT);
2141 	      if (REGNO (temp) >= FIRST_PSEUDO_REGISTER)
2142 		mark_reg_pointer (temp, BITS_PER_UNIT);
2143 
2144 	      if (ishighonly)
2145 		set = gen_rtx_SET (operand0, temp);
2146 	      else
2147 		set = gen_rtx_SET (operand0,
2148 				   gen_rtx_LO_SUM (mode, temp, operand1));
2149 
2150 	      emit_insn (gen_rtx_SET (temp, gen_rtx_HIGH (mode, operand1)));
2151 	      emit_insn (set);
2152 
2153 	    }
2154 	  return 1;
2155 	}
2156       else if (tls_referenced_p (operand1))
2157 	{
2158 	  rtx tmp = operand1;
2159 	  rtx addend = NULL;
2160 
2161 	  if (GET_CODE (tmp) == CONST && GET_CODE (XEXP (tmp, 0)) == PLUS)
2162 	    {
2163 	      addend = XEXP (XEXP (tmp, 0), 1);
2164 	      tmp = XEXP (XEXP (tmp, 0), 0);
2165 	    }
2166 
2167 	  gcc_assert (GET_CODE (tmp) == SYMBOL_REF);
2168 	  tmp = legitimize_tls_address (tmp);
2169 	  if (addend)
2170 	    {
2171 	      tmp = gen_rtx_PLUS (mode, tmp, addend);
2172 	      tmp = force_operand (tmp, operands[0]);
2173 	    }
2174 	  operands[1] = tmp;
2175 	}
2176       else if (GET_CODE (operand1) != CONST_INT
2177 	       || !pa_cint_ok_for_move (UINTVAL (operand1)))
2178 	{
2179 	  rtx temp;
2180 	  rtx_insn *insn;
2181 	  rtx op1 = operand1;
2182 	  HOST_WIDE_INT value = 0;
2183 	  HOST_WIDE_INT insv = 0;
2184 	  int insert = 0;
2185 
2186 	  if (GET_CODE (operand1) == CONST_INT)
2187 	    value = INTVAL (operand1);
2188 
2189 	  if (TARGET_64BIT
2190 	      && GET_CODE (operand1) == CONST_INT
2191 	      && HOST_BITS_PER_WIDE_INT > 32
2192 	      && GET_MODE_BITSIZE (GET_MODE (operand0)) > 32)
2193 	    {
2194 	      HOST_WIDE_INT nval;
2195 
2196 	      /* Extract the low order 32 bits of the value and sign extend.
2197 		 If the new value is the same as the original value, we can
2198 		 can use the original value as-is.  If the new value is
2199 		 different, we use it and insert the most-significant 32-bits
2200 		 of the original value into the final result.  */
2201 	      nval = ((value & (((HOST_WIDE_INT) 2 << 31) - 1))
2202 		      ^ ((HOST_WIDE_INT) 1 << 31)) - ((HOST_WIDE_INT) 1 << 31);
2203 	      if (value != nval)
2204 		{
2205 #if HOST_BITS_PER_WIDE_INT > 32
2206 		  insv = value >= 0 ? value >> 32 : ~(~value >> 32);
2207 #endif
2208 		  insert = 1;
2209 		  value = nval;
2210 		  operand1 = GEN_INT (nval);
2211 		}
2212 	    }
2213 
2214 	  if (reload_in_progress || reload_completed)
2215 	    temp = scratch_reg ? scratch_reg : operand0;
2216 	  else
2217 	    temp = gen_reg_rtx (mode);
2218 
2219 	  /* We don't directly split DImode constants on 32-bit targets
2220 	     because PLUS uses an 11-bit immediate and the insn sequence
2221 	     generated is not as efficient as the one using HIGH/LO_SUM.  */
2222 	  if (GET_CODE (operand1) == CONST_INT
2223 	      && GET_MODE_BITSIZE (mode) <= BITS_PER_WORD
2224 	      && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
2225 	      && !insert)
2226 	    {
2227 	      /* Directly break constant into high and low parts.  This
2228 		 provides better optimization opportunities because various
2229 		 passes recognize constants split with PLUS but not LO_SUM.
2230 		 We use a 14-bit signed low part except when the addition
2231 		 of 0x4000 to the high part might change the sign of the
2232 		 high part.  */
2233 	      HOST_WIDE_INT low = value & 0x3fff;
2234 	      HOST_WIDE_INT high = value & ~ 0x3fff;
2235 
2236 	      if (low >= 0x2000)
2237 		{
2238 		  if (high == 0x7fffc000 || (mode == HImode && high == 0x4000))
2239 		    high += 0x2000;
2240 		  else
2241 		    high += 0x4000;
2242 		}
2243 
2244 	      low = value - high;
2245 
2246 	      emit_insn (gen_rtx_SET (temp, GEN_INT (high)));
2247 	      operands[1] = gen_rtx_PLUS (mode, temp, GEN_INT (low));
2248 	    }
2249 	  else
2250 	    {
2251 	      emit_insn (gen_rtx_SET (temp, gen_rtx_HIGH (mode, operand1)));
2252 	      operands[1] = gen_rtx_LO_SUM (mode, temp, operand1);
2253 	    }
2254 
2255 	  insn = emit_move_insn (operands[0], operands[1]);
2256 
2257 	  /* Now insert the most significant 32 bits of the value
2258 	     into the register.  When we don't have a second register
2259 	     available, it could take up to nine instructions to load
2260 	     a 64-bit integer constant.  Prior to reload, we force
2261 	     constants that would take more than three instructions
2262 	     to load to the constant pool.  During and after reload,
2263 	     we have to handle all possible values.  */
2264 	  if (insert)
2265 	    {
2266 	      /* Use a HIGH/LO_SUM/INSV sequence if we have a second
2267 		 register and the value to be inserted is outside the
2268 		 range that can be loaded with three depdi instructions.  */
2269 	      if (temp != operand0 && (insv >= 16384 || insv < -16384))
2270 		{
2271 		  operand1 = GEN_INT (insv);
2272 
2273 		  emit_insn (gen_rtx_SET (temp,
2274 					  gen_rtx_HIGH (mode, operand1)));
2275 		  emit_move_insn (temp, gen_rtx_LO_SUM (mode, temp, operand1));
2276 		  if (mode == DImode)
2277 		    insn = emit_insn (gen_insvdi (operand0, GEN_INT (32),
2278 						  const0_rtx, temp));
2279 		  else
2280 		    insn = emit_insn (gen_insvsi (operand0, GEN_INT (32),
2281 						  const0_rtx, temp));
2282 		}
2283 	      else
2284 		{
2285 		  int len = 5, pos = 27;
2286 
2287 		  /* Insert the bits using the depdi instruction.  */
2288 		  while (pos >= 0)
2289 		    {
2290 		      HOST_WIDE_INT v5 = ((insv & 31) ^ 16) - 16;
2291 		      HOST_WIDE_INT sign = v5 < 0;
2292 
2293 		      /* Left extend the insertion.  */
2294 		      insv = (insv >= 0 ? insv >> len : ~(~insv >> len));
2295 		      while (pos > 0 && (insv & 1) == sign)
2296 			{
2297 			  insv = (insv >= 0 ? insv >> 1 : ~(~insv >> 1));
2298 			  len += 1;
2299 			  pos -= 1;
2300 			}
2301 
2302 		      if (mode == DImode)
2303 			insn = emit_insn (gen_insvdi (operand0,
2304 						      GEN_INT (len),
2305 						      GEN_INT (pos),
2306 						      GEN_INT (v5)));
2307 		      else
2308 			insn = emit_insn (gen_insvsi (operand0,
2309 						      GEN_INT (len),
2310 						      GEN_INT (pos),
2311 						      GEN_INT (v5)));
2312 
2313 		      len = pos > 0 && pos < 5 ? pos : 5;
2314 		      pos -= len;
2315 		    }
2316 		}
2317 	    }
2318 
2319 	  set_unique_reg_note (insn, REG_EQUAL, op1);
2320 
2321 	  return 1;
2322 	}
2323     }
2324   /* Now have insn-emit do whatever it normally does.  */
2325   return 0;
2326 }
2327 
2328 /* Examine EXP and return nonzero if it contains an ADDR_EXPR (meaning
2329    it will need a link/runtime reloc).  */
2330 
2331 int
pa_reloc_needed(tree exp)2332 pa_reloc_needed (tree exp)
2333 {
2334   int reloc = 0;
2335 
2336   switch (TREE_CODE (exp))
2337     {
2338     case ADDR_EXPR:
2339       return 1;
2340 
2341     case POINTER_PLUS_EXPR:
2342     case PLUS_EXPR:
2343     case MINUS_EXPR:
2344       reloc = pa_reloc_needed (TREE_OPERAND (exp, 0));
2345       reloc |= pa_reloc_needed (TREE_OPERAND (exp, 1));
2346       break;
2347 
2348     CASE_CONVERT:
2349     case NON_LVALUE_EXPR:
2350       reloc = pa_reloc_needed (TREE_OPERAND (exp, 0));
2351       break;
2352 
2353     case CONSTRUCTOR:
2354       {
2355 	tree value;
2356 	unsigned HOST_WIDE_INT ix;
2357 
2358 	FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (exp), ix, value)
2359 	  if (value)
2360 	    reloc |= pa_reloc_needed (value);
2361       }
2362       break;
2363 
2364     case ERROR_MARK:
2365       break;
2366 
2367     default:
2368       break;
2369     }
2370   return reloc;
2371 }
2372 
2373 
2374 /* Return the best assembler insn template
2375    for moving operands[1] into operands[0] as a fullword.  */
2376 const char *
pa_singlemove_string(rtx * operands)2377 pa_singlemove_string (rtx *operands)
2378 {
2379   HOST_WIDE_INT intval;
2380 
2381   if (GET_CODE (operands[0]) == MEM)
2382     return "stw %r1,%0";
2383   if (GET_CODE (operands[1]) == MEM)
2384     return "ldw %1,%0";
2385   if (GET_CODE (operands[1]) == CONST_DOUBLE)
2386     {
2387       long i;
2388 
2389       gcc_assert (GET_MODE (operands[1]) == SFmode);
2390 
2391       /* Translate the CONST_DOUBLE to a CONST_INT with the same target
2392 	 bit pattern.  */
2393       REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (operands[1]), i);
2394 
2395       operands[1] = GEN_INT (i);
2396       /* Fall through to CONST_INT case.  */
2397     }
2398   if (GET_CODE (operands[1]) == CONST_INT)
2399     {
2400       intval = INTVAL (operands[1]);
2401 
2402       if (VAL_14_BITS_P (intval))
2403 	return "ldi %1,%0";
2404       else if ((intval & 0x7ff) == 0)
2405 	return "ldil L'%1,%0";
2406       else if (pa_zdepi_cint_p (intval))
2407 	return "{zdepi %Z1,%0|depwi,z %Z1,%0}";
2408       else
2409 	return "ldil L'%1,%0\n\tldo R'%1(%0),%0";
2410     }
2411   return "copy %1,%0";
2412 }
2413 
2414 
2415 /* Compute position (in OP[1]) and width (in OP[2])
2416    useful for copying IMM to a register using the zdepi
2417    instructions.  Store the immediate value to insert in OP[0].  */
2418 static void
compute_zdepwi_operands(unsigned HOST_WIDE_INT imm,unsigned * op)2419 compute_zdepwi_operands (unsigned HOST_WIDE_INT imm, unsigned *op)
2420 {
2421   int lsb, len;
2422 
2423   /* Find the least significant set bit in IMM.  */
2424   for (lsb = 0; lsb < 32; lsb++)
2425     {
2426       if ((imm & 1) != 0)
2427         break;
2428       imm >>= 1;
2429     }
2430 
2431   /* Choose variants based on *sign* of the 5-bit field.  */
2432   if ((imm & 0x10) == 0)
2433     len = (lsb <= 28) ? 4 : 32 - lsb;
2434   else
2435     {
2436       /* Find the width of the bitstring in IMM.  */
2437       for (len = 5; len < 32 - lsb; len++)
2438 	{
2439 	  if ((imm & ((unsigned HOST_WIDE_INT) 1 << len)) == 0)
2440 	    break;
2441 	}
2442 
2443       /* Sign extend IMM as a 5-bit value.  */
2444       imm = (imm & 0xf) - 0x10;
2445     }
2446 
2447   op[0] = imm;
2448   op[1] = 31 - lsb;
2449   op[2] = len;
2450 }
2451 
2452 /* Compute position (in OP[1]) and width (in OP[2])
2453    useful for copying IMM to a register using the depdi,z
2454    instructions.  Store the immediate value to insert in OP[0].  */
2455 
2456 static void
compute_zdepdi_operands(unsigned HOST_WIDE_INT imm,unsigned * op)2457 compute_zdepdi_operands (unsigned HOST_WIDE_INT imm, unsigned *op)
2458 {
2459   int lsb, len, maxlen;
2460 
2461   maxlen = MIN (HOST_BITS_PER_WIDE_INT, 64);
2462 
2463   /* Find the least significant set bit in IMM.  */
2464   for (lsb = 0; lsb < maxlen; lsb++)
2465     {
2466       if ((imm & 1) != 0)
2467         break;
2468       imm >>= 1;
2469     }
2470 
2471   /* Choose variants based on *sign* of the 5-bit field.  */
2472   if ((imm & 0x10) == 0)
2473     len = (lsb <= maxlen - 4) ? 4 : maxlen - lsb;
2474   else
2475     {
2476       /* Find the width of the bitstring in IMM.  */
2477       for (len = 5; len < maxlen - lsb; len++)
2478 	{
2479 	  if ((imm & ((unsigned HOST_WIDE_INT) 1 << len)) == 0)
2480 	    break;
2481 	}
2482 
2483       /* Extend length if host is narrow and IMM is negative.  */
2484       if (HOST_BITS_PER_WIDE_INT == 32 && len == maxlen - lsb)
2485 	len += 32;
2486 
2487       /* Sign extend IMM as a 5-bit value.  */
2488       imm = (imm & 0xf) - 0x10;
2489     }
2490 
2491   op[0] = imm;
2492   op[1] = 63 - lsb;
2493   op[2] = len;
2494 }
2495 
2496 /* Output assembler code to perform a doubleword move insn
2497    with operands OPERANDS.  */
2498 
2499 const char *
pa_output_move_double(rtx * operands)2500 pa_output_move_double (rtx *operands)
2501 {
2502   enum { REGOP, OFFSOP, MEMOP, CNSTOP, RNDOP } optype0, optype1;
2503   rtx latehalf[2];
2504   rtx addreg0 = 0, addreg1 = 0;
2505   int highonly = 0;
2506 
2507   /* First classify both operands.  */
2508 
2509   if (REG_P (operands[0]))
2510     optype0 = REGOP;
2511   else if (offsettable_memref_p (operands[0]))
2512     optype0 = OFFSOP;
2513   else if (GET_CODE (operands[0]) == MEM)
2514     optype0 = MEMOP;
2515   else
2516     optype0 = RNDOP;
2517 
2518   if (REG_P (operands[1]))
2519     optype1 = REGOP;
2520   else if (CONSTANT_P (operands[1]))
2521     optype1 = CNSTOP;
2522   else if (offsettable_memref_p (operands[1]))
2523     optype1 = OFFSOP;
2524   else if (GET_CODE (operands[1]) == MEM)
2525     optype1 = MEMOP;
2526   else
2527     optype1 = RNDOP;
2528 
2529   /* Check for the cases that the operand constraints are not
2530      supposed to allow to happen.  */
2531   gcc_assert (optype0 == REGOP || optype1 == REGOP);
2532 
2533   /* Handle copies between general and floating registers.  */
2534 
2535   if (optype0 == REGOP && optype1 == REGOP
2536       && FP_REG_P (operands[0]) ^ FP_REG_P (operands[1]))
2537     {
2538       if (FP_REG_P (operands[0]))
2539 	{
2540 	  output_asm_insn ("{stws|stw} %1,-16(%%sp)", operands);
2541 	  output_asm_insn ("{stws|stw} %R1,-12(%%sp)", operands);
2542 	  return "{fldds|fldd} -16(%%sp),%0";
2543 	}
2544       else
2545 	{
2546 	  output_asm_insn ("{fstds|fstd} %1,-16(%%sp)", operands);
2547 	  output_asm_insn ("{ldws|ldw} -16(%%sp),%0", operands);
2548 	  return "{ldws|ldw} -12(%%sp),%R0";
2549 	}
2550     }
2551 
2552    /* Handle auto decrementing and incrementing loads and stores
2553      specifically, since the structure of the function doesn't work
2554      for them without major modification.  Do it better when we learn
2555      this port about the general inc/dec addressing of PA.
2556      (This was written by tege.  Chide him if it doesn't work.)  */
2557 
2558   if (optype0 == MEMOP)
2559     {
2560       /* We have to output the address syntax ourselves, since print_operand
2561 	 doesn't deal with the addresses we want to use.  Fix this later.  */
2562 
2563       rtx addr = XEXP (operands[0], 0);
2564       if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
2565 	{
2566 	  rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
2567 
2568 	  operands[0] = XEXP (addr, 0);
2569 	  gcc_assert (GET_CODE (operands[1]) == REG
2570 		      && GET_CODE (operands[0]) == REG);
2571 
2572 	  gcc_assert (!reg_overlap_mentioned_p (high_reg, addr));
2573 
2574 	  /* No overlap between high target register and address
2575 	     register.  (We do this in a non-obvious way to
2576 	     save a register file writeback)  */
2577 	  if (GET_CODE (addr) == POST_INC)
2578 	    return "{stws|stw},ma %1,8(%0)\n\tstw %R1,-4(%0)";
2579 	  return "{stws|stw},ma %1,-8(%0)\n\tstw %R1,12(%0)";
2580 	}
2581       else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
2582 	{
2583 	  rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
2584 
2585 	  operands[0] = XEXP (addr, 0);
2586 	  gcc_assert (GET_CODE (operands[1]) == REG
2587 		      && GET_CODE (operands[0]) == REG);
2588 
2589 	  gcc_assert (!reg_overlap_mentioned_p (high_reg, addr));
2590 	  /* No overlap between high target register and address
2591 	     register.  (We do this in a non-obvious way to save a
2592 	     register file writeback)  */
2593 	  if (GET_CODE (addr) == PRE_INC)
2594 	    return "{stws|stw},mb %1,8(%0)\n\tstw %R1,4(%0)";
2595 	  return "{stws|stw},mb %1,-8(%0)\n\tstw %R1,4(%0)";
2596 	}
2597     }
2598   if (optype1 == MEMOP)
2599     {
2600       /* We have to output the address syntax ourselves, since print_operand
2601 	 doesn't deal with the addresses we want to use.  Fix this later.  */
2602 
2603       rtx addr = XEXP (operands[1], 0);
2604       if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
2605 	{
2606 	  rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2607 
2608 	  operands[1] = XEXP (addr, 0);
2609 	  gcc_assert (GET_CODE (operands[0]) == REG
2610 		      && GET_CODE (operands[1]) == REG);
2611 
2612 	  if (!reg_overlap_mentioned_p (high_reg, addr))
2613 	    {
2614 	      /* No overlap between high target register and address
2615 		 register.  (We do this in a non-obvious way to
2616 		 save a register file writeback)  */
2617 	      if (GET_CODE (addr) == POST_INC)
2618 		return "{ldws|ldw},ma 8(%1),%0\n\tldw -4(%1),%R0";
2619 	      return "{ldws|ldw},ma -8(%1),%0\n\tldw 12(%1),%R0";
2620 	    }
2621 	  else
2622 	    {
2623 	      /* This is an undefined situation.  We should load into the
2624 		 address register *and* update that register.  Probably
2625 		 we don't need to handle this at all.  */
2626 	      if (GET_CODE (addr) == POST_INC)
2627 		return "ldw 4(%1),%R0\n\t{ldws|ldw},ma 8(%1),%0";
2628 	      return "ldw 4(%1),%R0\n\t{ldws|ldw},ma -8(%1),%0";
2629 	    }
2630 	}
2631       else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
2632 	{
2633 	  rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2634 
2635 	  operands[1] = XEXP (addr, 0);
2636 	  gcc_assert (GET_CODE (operands[0]) == REG
2637 		      && GET_CODE (operands[1]) == REG);
2638 
2639 	  if (!reg_overlap_mentioned_p (high_reg, addr))
2640 	    {
2641 	      /* No overlap between high target register and address
2642 		 register.  (We do this in a non-obvious way to
2643 		 save a register file writeback)  */
2644 	      if (GET_CODE (addr) == PRE_INC)
2645 		return "{ldws|ldw},mb 8(%1),%0\n\tldw 4(%1),%R0";
2646 	      return "{ldws|ldw},mb -8(%1),%0\n\tldw 4(%1),%R0";
2647 	    }
2648 	  else
2649 	    {
2650 	      /* This is an undefined situation.  We should load into the
2651 		 address register *and* update that register.  Probably
2652 		 we don't need to handle this at all.  */
2653 	      if (GET_CODE (addr) == PRE_INC)
2654 		return "ldw 12(%1),%R0\n\t{ldws|ldw},mb 8(%1),%0";
2655 	      return "ldw -4(%1),%R0\n\t{ldws|ldw},mb -8(%1),%0";
2656 	    }
2657 	}
2658       else if (GET_CODE (addr) == PLUS
2659 	       && GET_CODE (XEXP (addr, 0)) == MULT)
2660 	{
2661 	  rtx xoperands[4];
2662 
2663 	  /* Load address into left half of destination register.  */
2664 	  xoperands[0] = gen_rtx_SUBREG (SImode, operands[0], 0);
2665 	  xoperands[1] = XEXP (addr, 1);
2666 	  xoperands[2] = XEXP (XEXP (addr, 0), 0);
2667 	  xoperands[3] = XEXP (XEXP (addr, 0), 1);
2668 	  output_asm_insn ("{sh%O3addl %2,%1,%0|shladd,l %2,%O3,%1,%0}",
2669 			   xoperands);
2670 	  return "ldw 4(%0),%R0\n\tldw 0(%0),%0";
2671 	}
2672       else if (GET_CODE (addr) == PLUS
2673 	       && REG_P (XEXP (addr, 0))
2674 	       && REG_P (XEXP (addr, 1)))
2675 	{
2676 	  rtx xoperands[3];
2677 
2678 	  /* Load address into left half of destination register.  */
2679 	  xoperands[0] = gen_rtx_SUBREG (SImode, operands[0], 0);
2680 	  xoperands[1] = XEXP (addr, 0);
2681 	  xoperands[2] = XEXP (addr, 1);
2682 	  output_asm_insn ("{addl|add,l} %1,%2,%0",
2683 			   xoperands);
2684 	  return "ldw 4(%0),%R0\n\tldw 0(%0),%0";
2685 	}
2686     }
2687 
2688   /* If an operand is an unoffsettable memory ref, find a register
2689      we can increment temporarily to make it refer to the second word.  */
2690 
2691   if (optype0 == MEMOP)
2692     addreg0 = find_addr_reg (XEXP (operands[0], 0));
2693 
2694   if (optype1 == MEMOP)
2695     addreg1 = find_addr_reg (XEXP (operands[1], 0));
2696 
2697   /* Ok, we can do one word at a time.
2698      Normally we do the low-numbered word first.
2699 
2700      In either case, set up in LATEHALF the operands to use
2701      for the high-numbered word and in some cases alter the
2702      operands in OPERANDS to be suitable for the low-numbered word.  */
2703 
2704   if (optype0 == REGOP)
2705     latehalf[0] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
2706   else if (optype0 == OFFSOP)
2707     latehalf[0] = adjust_address_nv (operands[0], SImode, 4);
2708   else
2709     latehalf[0] = operands[0];
2710 
2711   if (optype1 == REGOP)
2712     latehalf[1] = gen_rtx_REG (SImode, REGNO (operands[1]) + 1);
2713   else if (optype1 == OFFSOP)
2714     latehalf[1] = adjust_address_nv (operands[1], SImode, 4);
2715   else if (optype1 == CNSTOP)
2716     {
2717       if (GET_CODE (operands[1]) == HIGH)
2718 	{
2719 	  operands[1] = XEXP (operands[1], 0);
2720 	  highonly = 1;
2721 	}
2722       split_double (operands[1], &operands[1], &latehalf[1]);
2723     }
2724   else
2725     latehalf[1] = operands[1];
2726 
2727   /* If the first move would clobber the source of the second one,
2728      do them in the other order.
2729 
2730      This can happen in two cases:
2731 
2732 	mem -> register where the first half of the destination register
2733  	is the same register used in the memory's address.  Reload
2734 	can create such insns.
2735 
2736 	mem in this case will be either register indirect or register
2737 	indirect plus a valid offset.
2738 
2739 	register -> register move where REGNO(dst) == REGNO(src + 1)
2740 	someone (Tim/Tege?) claimed this can happen for parameter loads.
2741 
2742      Handle mem -> register case first.  */
2743   if (optype0 == REGOP
2744       && (optype1 == MEMOP || optype1 == OFFSOP)
2745       && refers_to_regno_p (REGNO (operands[0]), operands[1]))
2746     {
2747       /* Do the late half first.  */
2748       if (addreg1)
2749 	output_asm_insn ("ldo 4(%0),%0", &addreg1);
2750       output_asm_insn (pa_singlemove_string (latehalf), latehalf);
2751 
2752       /* Then clobber.  */
2753       if (addreg1)
2754 	output_asm_insn ("ldo -4(%0),%0", &addreg1);
2755       return pa_singlemove_string (operands);
2756     }
2757 
2758   /* Now handle register -> register case.  */
2759   if (optype0 == REGOP && optype1 == REGOP
2760       && REGNO (operands[0]) == REGNO (operands[1]) + 1)
2761     {
2762       output_asm_insn (pa_singlemove_string (latehalf), latehalf);
2763       return pa_singlemove_string (operands);
2764     }
2765 
2766   /* Normal case: do the two words, low-numbered first.  */
2767 
2768   output_asm_insn (pa_singlemove_string (operands), operands);
2769 
2770   /* Make any unoffsettable addresses point at high-numbered word.  */
2771   if (addreg0)
2772     output_asm_insn ("ldo 4(%0),%0", &addreg0);
2773   if (addreg1)
2774     output_asm_insn ("ldo 4(%0),%0", &addreg1);
2775 
2776   /* Do high-numbered word.  */
2777   if (highonly)
2778     output_asm_insn ("ldil L'%1,%0", latehalf);
2779   else
2780     output_asm_insn (pa_singlemove_string (latehalf), latehalf);
2781 
2782   /* Undo the adds we just did.  */
2783   if (addreg0)
2784     output_asm_insn ("ldo -4(%0),%0", &addreg0);
2785   if (addreg1)
2786     output_asm_insn ("ldo -4(%0),%0", &addreg1);
2787 
2788   return "";
2789 }
2790 
2791 const char *
pa_output_fp_move_double(rtx * operands)2792 pa_output_fp_move_double (rtx *operands)
2793 {
2794   if (FP_REG_P (operands[0]))
2795     {
2796       if (FP_REG_P (operands[1])
2797 	  || operands[1] == CONST0_RTX (GET_MODE (operands[0])))
2798 	output_asm_insn ("fcpy,dbl %f1,%0", operands);
2799       else
2800 	output_asm_insn ("fldd%F1 %1,%0", operands);
2801     }
2802   else if (FP_REG_P (operands[1]))
2803     {
2804       output_asm_insn ("fstd%F0 %1,%0", operands);
2805     }
2806   else
2807     {
2808       rtx xoperands[2];
2809 
2810       gcc_assert (operands[1] == CONST0_RTX (GET_MODE (operands[0])));
2811 
2812       /* This is a pain.  You have to be prepared to deal with an
2813 	 arbitrary address here including pre/post increment/decrement.
2814 
2815 	 so avoid this in the MD.  */
2816       gcc_assert (GET_CODE (operands[0]) == REG);
2817 
2818       xoperands[1] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
2819       xoperands[0] = operands[0];
2820       output_asm_insn ("copy %%r0,%0\n\tcopy %%r0,%1", xoperands);
2821     }
2822   return "";
2823 }
2824 
2825 /* Return a REG that occurs in ADDR with coefficient 1.
2826    ADDR can be effectively incremented by incrementing REG.  */
2827 
2828 static rtx
find_addr_reg(rtx addr)2829 find_addr_reg (rtx addr)
2830 {
2831   while (GET_CODE (addr) == PLUS)
2832     {
2833       if (GET_CODE (XEXP (addr, 0)) == REG)
2834 	addr = XEXP (addr, 0);
2835       else if (GET_CODE (XEXP (addr, 1)) == REG)
2836 	addr = XEXP (addr, 1);
2837       else if (CONSTANT_P (XEXP (addr, 0)))
2838 	addr = XEXP (addr, 1);
2839       else if (CONSTANT_P (XEXP (addr, 1)))
2840 	addr = XEXP (addr, 0);
2841       else
2842 	gcc_unreachable ();
2843     }
2844   gcc_assert (GET_CODE (addr) == REG);
2845   return addr;
2846 }
2847 
2848 /* Emit code to perform a block move.
2849 
2850    OPERANDS[0] is the destination pointer as a REG, clobbered.
2851    OPERANDS[1] is the source pointer as a REG, clobbered.
2852    OPERANDS[2] is a register for temporary storage.
2853    OPERANDS[3] is a register for temporary storage.
2854    OPERANDS[4] is the size as a CONST_INT
2855    OPERANDS[5] is the alignment safe to use, as a CONST_INT.
2856    OPERANDS[6] is another temporary register.  */
2857 
2858 const char *
pa_output_block_move(rtx * operands,int size_is_constant ATTRIBUTE_UNUSED)2859 pa_output_block_move (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED)
2860 {
2861   int align = INTVAL (operands[5]);
2862   unsigned long n_bytes = INTVAL (operands[4]);
2863 
2864   /* We can't move more than a word at a time because the PA
2865      has no longer integer move insns.  (Could use fp mem ops?)  */
2866   if (align > (TARGET_64BIT ? 8 : 4))
2867     align = (TARGET_64BIT ? 8 : 4);
2868 
2869   /* Note that we know each loop below will execute at least twice
2870      (else we would have open-coded the copy).  */
2871   switch (align)
2872     {
2873       case 8:
2874 	/* Pre-adjust the loop counter.  */
2875 	operands[4] = GEN_INT (n_bytes - 16);
2876 	output_asm_insn ("ldi %4,%2", operands);
2877 
2878 	/* Copying loop.  */
2879 	output_asm_insn ("ldd,ma 8(%1),%3", operands);
2880 	output_asm_insn ("ldd,ma 8(%1),%6", operands);
2881 	output_asm_insn ("std,ma %3,8(%0)", operands);
2882 	output_asm_insn ("addib,>= -16,%2,.-12", operands);
2883 	output_asm_insn ("std,ma %6,8(%0)", operands);
2884 
2885 	/* Handle the residual.  There could be up to 7 bytes of
2886 	   residual to copy!  */
2887 	if (n_bytes % 16 != 0)
2888 	  {
2889 	    operands[4] = GEN_INT (n_bytes % 8);
2890 	    if (n_bytes % 16 >= 8)
2891 	      output_asm_insn ("ldd,ma 8(%1),%3", operands);
2892 	    if (n_bytes % 8 != 0)
2893 	      output_asm_insn ("ldd 0(%1),%6", operands);
2894 	    if (n_bytes % 16 >= 8)
2895 	      output_asm_insn ("std,ma %3,8(%0)", operands);
2896 	    if (n_bytes % 8 != 0)
2897 	      output_asm_insn ("stdby,e %6,%4(%0)", operands);
2898 	  }
2899 	return "";
2900 
2901       case 4:
2902 	/* Pre-adjust the loop counter.  */
2903 	operands[4] = GEN_INT (n_bytes - 8);
2904 	output_asm_insn ("ldi %4,%2", operands);
2905 
2906 	/* Copying loop.  */
2907 	output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands);
2908 	output_asm_insn ("{ldws|ldw},ma 4(%1),%6", operands);
2909 	output_asm_insn ("{stws|stw},ma %3,4(%0)", operands);
2910 	output_asm_insn ("addib,>= -8,%2,.-12", operands);
2911 	output_asm_insn ("{stws|stw},ma %6,4(%0)", operands);
2912 
2913 	/* Handle the residual.  There could be up to 7 bytes of
2914 	   residual to copy!  */
2915 	if (n_bytes % 8 != 0)
2916 	  {
2917 	    operands[4] = GEN_INT (n_bytes % 4);
2918 	    if (n_bytes % 8 >= 4)
2919 	      output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands);
2920 	    if (n_bytes % 4 != 0)
2921 	      output_asm_insn ("ldw 0(%1),%6", operands);
2922 	    if (n_bytes % 8 >= 4)
2923 	      output_asm_insn ("{stws|stw},ma %3,4(%0)", operands);
2924 	    if (n_bytes % 4 != 0)
2925 	      output_asm_insn ("{stbys|stby},e %6,%4(%0)", operands);
2926 	  }
2927 	return "";
2928 
2929       case 2:
2930 	/* Pre-adjust the loop counter.  */
2931 	operands[4] = GEN_INT (n_bytes - 4);
2932 	output_asm_insn ("ldi %4,%2", operands);
2933 
2934 	/* Copying loop.  */
2935 	output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands);
2936 	output_asm_insn ("{ldhs|ldh},ma 2(%1),%6", operands);
2937 	output_asm_insn ("{sths|sth},ma %3,2(%0)", operands);
2938 	output_asm_insn ("addib,>= -4,%2,.-12", operands);
2939 	output_asm_insn ("{sths|sth},ma %6,2(%0)", operands);
2940 
2941 	/* Handle the residual.  */
2942 	if (n_bytes % 4 != 0)
2943 	  {
2944 	    if (n_bytes % 4 >= 2)
2945 	      output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands);
2946 	    if (n_bytes % 2 != 0)
2947 	      output_asm_insn ("ldb 0(%1),%6", operands);
2948 	    if (n_bytes % 4 >= 2)
2949 	      output_asm_insn ("{sths|sth},ma %3,2(%0)", operands);
2950 	    if (n_bytes % 2 != 0)
2951 	      output_asm_insn ("stb %6,0(%0)", operands);
2952 	  }
2953 	return "";
2954 
2955       case 1:
2956 	/* Pre-adjust the loop counter.  */
2957 	operands[4] = GEN_INT (n_bytes - 2);
2958 	output_asm_insn ("ldi %4,%2", operands);
2959 
2960 	/* Copying loop.  */
2961 	output_asm_insn ("{ldbs|ldb},ma 1(%1),%3", operands);
2962 	output_asm_insn ("{ldbs|ldb},ma 1(%1),%6", operands);
2963 	output_asm_insn ("{stbs|stb},ma %3,1(%0)", operands);
2964 	output_asm_insn ("addib,>= -2,%2,.-12", operands);
2965 	output_asm_insn ("{stbs|stb},ma %6,1(%0)", operands);
2966 
2967 	/* Handle the residual.  */
2968 	if (n_bytes % 2 != 0)
2969 	  {
2970 	    output_asm_insn ("ldb 0(%1),%3", operands);
2971 	    output_asm_insn ("stb %3,0(%0)", operands);
2972 	  }
2973 	return "";
2974 
2975       default:
2976 	gcc_unreachable ();
2977     }
2978 }
2979 
2980 /* Count the number of insns necessary to handle this block move.
2981 
2982    Basic structure is the same as emit_block_move, except that we
2983    count insns rather than emit them.  */
2984 
2985 static int
compute_cpymem_length(rtx_insn * insn)2986 compute_cpymem_length (rtx_insn *insn)
2987 {
2988   rtx pat = PATTERN (insn);
2989   unsigned int align = INTVAL (XEXP (XVECEXP (pat, 0, 7), 0));
2990   unsigned long n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 6), 0));
2991   unsigned int n_insns = 0;
2992 
2993   /* We can't move more than four bytes at a time because the PA
2994      has no longer integer move insns.  (Could use fp mem ops?)  */
2995   if (align > (TARGET_64BIT ? 8 : 4))
2996     align = (TARGET_64BIT ? 8 : 4);
2997 
2998   /* The basic copying loop.  */
2999   n_insns = 6;
3000 
3001   /* Residuals.  */
3002   if (n_bytes % (2 * align) != 0)
3003     {
3004       if ((n_bytes % (2 * align)) >= align)
3005 	n_insns += 2;
3006 
3007       if ((n_bytes % align) != 0)
3008 	n_insns += 2;
3009     }
3010 
3011   /* Lengths are expressed in bytes now; each insn is 4 bytes.  */
3012   return n_insns * 4;
3013 }
3014 
3015 /* Emit code to perform a block clear.
3016 
3017    OPERANDS[0] is the destination pointer as a REG, clobbered.
3018    OPERANDS[1] is a register for temporary storage.
3019    OPERANDS[2] is the size as a CONST_INT
3020    OPERANDS[3] is the alignment safe to use, as a CONST_INT.  */
3021 
3022 const char *
pa_output_block_clear(rtx * operands,int size_is_constant ATTRIBUTE_UNUSED)3023 pa_output_block_clear (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED)
3024 {
3025   int align = INTVAL (operands[3]);
3026   unsigned long n_bytes = INTVAL (operands[2]);
3027 
3028   /* We can't clear more than a word at a time because the PA
3029      has no longer integer move insns.  */
3030   if (align > (TARGET_64BIT ? 8 : 4))
3031     align = (TARGET_64BIT ? 8 : 4);
3032 
3033   /* Note that we know each loop below will execute at least twice
3034      (else we would have open-coded the copy).  */
3035   switch (align)
3036     {
3037       case 8:
3038 	/* Pre-adjust the loop counter.  */
3039 	operands[2] = GEN_INT (n_bytes - 16);
3040 	output_asm_insn ("ldi %2,%1", operands);
3041 
3042 	/* Loop.  */
3043 	output_asm_insn ("std,ma %%r0,8(%0)", operands);
3044 	output_asm_insn ("addib,>= -16,%1,.-4", operands);
3045 	output_asm_insn ("std,ma %%r0,8(%0)", operands);
3046 
3047 	/* Handle the residual.  There could be up to 7 bytes of
3048 	   residual to copy!  */
3049 	if (n_bytes % 16 != 0)
3050 	  {
3051 	    operands[2] = GEN_INT (n_bytes % 8);
3052 	    if (n_bytes % 16 >= 8)
3053 	      output_asm_insn ("std,ma %%r0,8(%0)", operands);
3054 	    if (n_bytes % 8 != 0)
3055 	      output_asm_insn ("stdby,e %%r0,%2(%0)", operands);
3056 	  }
3057 	return "";
3058 
3059       case 4:
3060 	/* Pre-adjust the loop counter.  */
3061 	operands[2] = GEN_INT (n_bytes - 8);
3062 	output_asm_insn ("ldi %2,%1", operands);
3063 
3064 	/* Loop.  */
3065 	output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
3066 	output_asm_insn ("addib,>= -8,%1,.-4", operands);
3067 	output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
3068 
3069 	/* Handle the residual.  There could be up to 7 bytes of
3070 	   residual to copy!  */
3071 	if (n_bytes % 8 != 0)
3072 	  {
3073 	    operands[2] = GEN_INT (n_bytes % 4);
3074 	    if (n_bytes % 8 >= 4)
3075 	      output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
3076 	    if (n_bytes % 4 != 0)
3077 	      output_asm_insn ("{stbys|stby},e %%r0,%2(%0)", operands);
3078 	  }
3079 	return "";
3080 
3081       case 2:
3082 	/* Pre-adjust the loop counter.  */
3083 	operands[2] = GEN_INT (n_bytes - 4);
3084 	output_asm_insn ("ldi %2,%1", operands);
3085 
3086 	/* Loop.  */
3087 	output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
3088 	output_asm_insn ("addib,>= -4,%1,.-4", operands);
3089 	output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
3090 
3091 	/* Handle the residual.  */
3092 	if (n_bytes % 4 != 0)
3093 	  {
3094 	    if (n_bytes % 4 >= 2)
3095 	      output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
3096 	    if (n_bytes % 2 != 0)
3097 	      output_asm_insn ("stb %%r0,0(%0)", operands);
3098 	  }
3099 	return "";
3100 
3101       case 1:
3102 	/* Pre-adjust the loop counter.  */
3103 	operands[2] = GEN_INT (n_bytes - 2);
3104 	output_asm_insn ("ldi %2,%1", operands);
3105 
3106 	/* Loop.  */
3107 	output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands);
3108 	output_asm_insn ("addib,>= -2,%1,.-4", operands);
3109 	output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands);
3110 
3111 	/* Handle the residual.  */
3112 	if (n_bytes % 2 != 0)
3113 	  output_asm_insn ("stb %%r0,0(%0)", operands);
3114 
3115 	return "";
3116 
3117       default:
3118 	gcc_unreachable ();
3119     }
3120 }
3121 
3122 /* Count the number of insns necessary to handle this block move.
3123 
3124    Basic structure is the same as emit_block_move, except that we
3125    count insns rather than emit them.  */
3126 
3127 static int
compute_clrmem_length(rtx_insn * insn)3128 compute_clrmem_length (rtx_insn *insn)
3129 {
3130   rtx pat = PATTERN (insn);
3131   unsigned int align = INTVAL (XEXP (XVECEXP (pat, 0, 4), 0));
3132   unsigned long n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 3), 0));
3133   unsigned int n_insns = 0;
3134 
3135   /* We can't clear more than a word at a time because the PA
3136      has no longer integer move insns.  */
3137   if (align > (TARGET_64BIT ? 8 : 4))
3138     align = (TARGET_64BIT ? 8 : 4);
3139 
3140   /* The basic loop.  */
3141   n_insns = 4;
3142 
3143   /* Residuals.  */
3144   if (n_bytes % (2 * align) != 0)
3145     {
3146       if ((n_bytes % (2 * align)) >= align)
3147 	n_insns++;
3148 
3149       if ((n_bytes % align) != 0)
3150 	n_insns++;
3151     }
3152 
3153   /* Lengths are expressed in bytes now; each insn is 4 bytes.  */
3154   return n_insns * 4;
3155 }
3156 
3157 
3158 const char *
pa_output_and(rtx * operands)3159 pa_output_and (rtx *operands)
3160 {
3161   if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
3162     {
3163       unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3164       int ls0, ls1, ms0, p, len;
3165 
3166       for (ls0 = 0; ls0 < 32; ls0++)
3167 	if ((mask & (1 << ls0)) == 0)
3168 	  break;
3169 
3170       for (ls1 = ls0; ls1 < 32; ls1++)
3171 	if ((mask & (1 << ls1)) != 0)
3172 	  break;
3173 
3174       for (ms0 = ls1; ms0 < 32; ms0++)
3175 	if ((mask & (1 << ms0)) == 0)
3176 	  break;
3177 
3178       gcc_assert (ms0 == 32);
3179 
3180       if (ls1 == 32)
3181 	{
3182 	  len = ls0;
3183 
3184 	  gcc_assert (len);
3185 
3186 	  operands[2] = GEN_INT (len);
3187 	  return "{extru|extrw,u} %1,31,%2,%0";
3188 	}
3189       else
3190 	{
3191 	  /* We could use this `depi' for the case above as well, but `depi'
3192 	     requires one more register file access than an `extru'.  */
3193 
3194 	  p = 31 - ls0;
3195 	  len = ls1 - ls0;
3196 
3197 	  operands[2] = GEN_INT (p);
3198 	  operands[3] = GEN_INT (len);
3199 	  return "{depi|depwi} 0,%2,%3,%0";
3200 	}
3201     }
3202   else
3203     return "and %1,%2,%0";
3204 }
3205 
3206 /* Return a string to perform a bitwise-and of operands[1] with operands[2]
3207    storing the result in operands[0].  */
3208 const char *
pa_output_64bit_and(rtx * operands)3209 pa_output_64bit_and (rtx *operands)
3210 {
3211   if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
3212     {
3213       unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3214       int ls0, ls1, ms0, p, len;
3215 
3216       for (ls0 = 0; ls0 < HOST_BITS_PER_WIDE_INT; ls0++)
3217 	if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls0)) == 0)
3218 	  break;
3219 
3220       for (ls1 = ls0; ls1 < HOST_BITS_PER_WIDE_INT; ls1++)
3221 	if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls1)) != 0)
3222 	  break;
3223 
3224       for (ms0 = ls1; ms0 < HOST_BITS_PER_WIDE_INT; ms0++)
3225 	if ((mask & ((unsigned HOST_WIDE_INT) 1 << ms0)) == 0)
3226 	  break;
3227 
3228       gcc_assert (ms0 == HOST_BITS_PER_WIDE_INT);
3229 
3230       if (ls1 == HOST_BITS_PER_WIDE_INT)
3231 	{
3232 	  len = ls0;
3233 
3234 	  gcc_assert (len);
3235 
3236 	  operands[2] = GEN_INT (len);
3237 	  return "extrd,u %1,63,%2,%0";
3238 	}
3239       else
3240 	{
3241 	  /* We could use this `depi' for the case above as well, but `depi'
3242 	     requires one more register file access than an `extru'.  */
3243 
3244 	  p = 63 - ls0;
3245 	  len = ls1 - ls0;
3246 
3247 	  operands[2] = GEN_INT (p);
3248 	  operands[3] = GEN_INT (len);
3249 	  return "depdi 0,%2,%3,%0";
3250 	}
3251     }
3252   else
3253     return "and %1,%2,%0";
3254 }
3255 
3256 const char *
pa_output_ior(rtx * operands)3257 pa_output_ior (rtx *operands)
3258 {
3259   unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3260   int bs0, bs1, p, len;
3261 
3262   if (INTVAL (operands[2]) == 0)
3263     return "copy %1,%0";
3264 
3265   for (bs0 = 0; bs0 < 32; bs0++)
3266     if ((mask & (1 << bs0)) != 0)
3267       break;
3268 
3269   for (bs1 = bs0; bs1 < 32; bs1++)
3270     if ((mask & (1 << bs1)) == 0)
3271       break;
3272 
3273   gcc_assert (bs1 == 32 || ((unsigned HOST_WIDE_INT) 1 << bs1) > mask);
3274 
3275   p = 31 - bs0;
3276   len = bs1 - bs0;
3277 
3278   operands[2] = GEN_INT (p);
3279   operands[3] = GEN_INT (len);
3280   return "{depi|depwi} -1,%2,%3,%0";
3281 }
3282 
3283 /* Return a string to perform a bitwise-and of operands[1] with operands[2]
3284    storing the result in operands[0].  */
3285 const char *
pa_output_64bit_ior(rtx * operands)3286 pa_output_64bit_ior (rtx *operands)
3287 {
3288   unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3289   int bs0, bs1, p, len;
3290 
3291   if (INTVAL (operands[2]) == 0)
3292     return "copy %1,%0";
3293 
3294   for (bs0 = 0; bs0 < HOST_BITS_PER_WIDE_INT; bs0++)
3295     if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs0)) != 0)
3296       break;
3297 
3298   for (bs1 = bs0; bs1 < HOST_BITS_PER_WIDE_INT; bs1++)
3299     if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs1)) == 0)
3300       break;
3301 
3302   gcc_assert (bs1 == HOST_BITS_PER_WIDE_INT
3303 	      || ((unsigned HOST_WIDE_INT) 1 << bs1) > mask);
3304 
3305   p = 63 - bs0;
3306   len = bs1 - bs0;
3307 
3308   operands[2] = GEN_INT (p);
3309   operands[3] = GEN_INT (len);
3310   return "depdi -1,%2,%3,%0";
3311 }
3312 
3313 /* Target hook for assembling integer objects.  This code handles
3314    aligned SI and DI integers specially since function references
3315    must be preceded by P%.  */
3316 
3317 static bool
pa_assemble_integer(rtx x,unsigned int size,int aligned_p)3318 pa_assemble_integer (rtx x, unsigned int size, int aligned_p)
3319 {
3320   bool result;
3321   tree decl = NULL;
3322 
3323   /* When we have a SYMBOL_REF with a SYMBOL_REF_DECL, we need to call
3324      call assemble_external and set the SYMBOL_REF_DECL to NULL before
3325      calling output_addr_const.  Otherwise, it may call assemble_external
3326      in the midst of outputing the assembler code for the SYMBOL_REF.
3327      We restore the SYMBOL_REF_DECL after the output is done.  */
3328   if (GET_CODE (x) == SYMBOL_REF)
3329     {
3330       decl = SYMBOL_REF_DECL (x);
3331       if (decl)
3332 	{
3333 	  assemble_external (decl);
3334 	  SET_SYMBOL_REF_DECL (x, NULL);
3335 	}
3336     }
3337 
3338   if (size == UNITS_PER_WORD
3339       && aligned_p
3340       && function_label_operand (x, VOIDmode))
3341     {
3342       fputs (size == 8? "\t.dword\t" : "\t.word\t", asm_out_file);
3343 
3344       /* We don't want an OPD when generating fast indirect calls.  */
3345       if (!TARGET_FAST_INDIRECT_CALLS)
3346 	fputs ("P%", asm_out_file);
3347 
3348       output_addr_const (asm_out_file, x);
3349       fputc ('\n', asm_out_file);
3350       result = true;
3351     }
3352   else
3353     result = default_assemble_integer (x, size, aligned_p);
3354 
3355   if (decl)
3356     SET_SYMBOL_REF_DECL (x, decl);
3357 
3358   return result;
3359 }
3360 
3361 /* Output an ascii string.  */
3362 void
pa_output_ascii(FILE * file,const char * p,int size)3363 pa_output_ascii (FILE *file, const char *p, int size)
3364 {
3365   int i;
3366   int chars_output;
3367   unsigned char partial_output[16];	/* Max space 4 chars can occupy.  */
3368 
3369   /* The HP assembler can only take strings of 256 characters at one
3370      time.  This is a limitation on input line length, *not* the
3371      length of the string.  Sigh.  Even worse, it seems that the
3372      restriction is in number of input characters (see \xnn &
3373      \whatever).  So we have to do this very carefully.  */
3374 
3375   fputs ("\t.STRING \"", file);
3376 
3377   chars_output = 0;
3378   for (i = 0; i < size; i += 4)
3379     {
3380       int co = 0;
3381       int io = 0;
3382       for (io = 0, co = 0; io < MIN (4, size - i); io++)
3383 	{
3384 	  register unsigned int c = (unsigned char) p[i + io];
3385 
3386 	  if (c == '\"' || c == '\\')
3387 	    partial_output[co++] = '\\';
3388 	  if (c >= ' ' && c < 0177)
3389 	    partial_output[co++] = c;
3390 	  else
3391 	    {
3392 	      unsigned int hexd;
3393 	      partial_output[co++] = '\\';
3394 	      partial_output[co++] = 'x';
3395 	      hexd =  c  / 16 - 0 + '0';
3396 	      if (hexd > '9')
3397 		hexd -= '9' - 'a' + 1;
3398 	      partial_output[co++] = hexd;
3399 	      hexd =  c % 16 - 0 + '0';
3400 	      if (hexd > '9')
3401 		hexd -= '9' - 'a' + 1;
3402 	      partial_output[co++] = hexd;
3403 	    }
3404 	}
3405       if (chars_output + co > 243)
3406 	{
3407 	  fputs ("\"\n\t.STRING \"", file);
3408 	  chars_output = 0;
3409 	}
3410       fwrite (partial_output, 1, (size_t) co, file);
3411       chars_output += co;
3412       co = 0;
3413     }
3414   fputs ("\"\n", file);
3415 }
3416 
3417 /* Try to rewrite floating point comparisons & branches to avoid
3418    useless add,tr insns.
3419 
3420    CHECK_NOTES is nonzero if we should examine REG_DEAD notes
3421    to see if FPCC is dead.  CHECK_NOTES is nonzero for the
3422    first attempt to remove useless add,tr insns.  It is zero
3423    for the second pass as reorg sometimes leaves bogus REG_DEAD
3424    notes lying around.
3425 
3426    When CHECK_NOTES is zero we can only eliminate add,tr insns
3427    when there's a 1:1 correspondence between fcmp and ftest/fbranch
3428    instructions.  */
3429 static void
remove_useless_addtr_insns(int check_notes)3430 remove_useless_addtr_insns (int check_notes)
3431 {
3432   rtx_insn *insn;
3433   static int pass = 0;
3434 
3435   /* This is fairly cheap, so always run it when optimizing.  */
3436   if (optimize > 0)
3437     {
3438       int fcmp_count = 0;
3439       int fbranch_count = 0;
3440 
3441       /* Walk all the insns in this function looking for fcmp & fbranch
3442 	 instructions.  Keep track of how many of each we find.  */
3443       for (insn = get_insns (); insn; insn = next_insn (insn))
3444 	{
3445 	  rtx tmp;
3446 
3447 	  /* Ignore anything that isn't an INSN or a JUMP_INSN.  */
3448 	  if (! NONJUMP_INSN_P (insn) && ! JUMP_P (insn))
3449 	    continue;
3450 
3451 	  tmp = PATTERN (insn);
3452 
3453 	  /* It must be a set.  */
3454 	  if (GET_CODE (tmp) != SET)
3455 	    continue;
3456 
3457 	  /* If the destination is CCFP, then we've found an fcmp insn.  */
3458 	  tmp = SET_DEST (tmp);
3459 	  if (GET_CODE (tmp) == REG && REGNO (tmp) == 0)
3460 	    {
3461 	      fcmp_count++;
3462 	      continue;
3463 	    }
3464 
3465 	  tmp = PATTERN (insn);
3466 	  /* If this is an fbranch instruction, bump the fbranch counter.  */
3467 	  if (GET_CODE (tmp) == SET
3468 	      && SET_DEST (tmp) == pc_rtx
3469 	      && GET_CODE (SET_SRC (tmp)) == IF_THEN_ELSE
3470 	      && GET_CODE (XEXP (SET_SRC (tmp), 0)) == NE
3471 	      && GET_CODE (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == REG
3472 	      && REGNO (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == 0)
3473 	    {
3474 	      fbranch_count++;
3475 	      continue;
3476 	    }
3477 	}
3478 
3479 
3480       /* Find all floating point compare + branch insns.  If possible,
3481 	 reverse the comparison & the branch to avoid add,tr insns.  */
3482       for (insn = get_insns (); insn; insn = next_insn (insn))
3483 	{
3484 	  rtx tmp;
3485 	  rtx_insn *next;
3486 
3487 	  /* Ignore anything that isn't an INSN.  */
3488 	  if (! NONJUMP_INSN_P (insn))
3489 	    continue;
3490 
3491 	  tmp = PATTERN (insn);
3492 
3493 	  /* It must be a set.  */
3494 	  if (GET_CODE (tmp) != SET)
3495 	    continue;
3496 
3497 	  /* The destination must be CCFP, which is register zero.  */
3498 	  tmp = SET_DEST (tmp);
3499 	  if (GET_CODE (tmp) != REG || REGNO (tmp) != 0)
3500 	    continue;
3501 
3502 	  /* INSN should be a set of CCFP.
3503 
3504 	     See if the result of this insn is used in a reversed FP
3505 	     conditional branch.  If so, reverse our condition and
3506 	     the branch.  Doing so avoids useless add,tr insns.  */
3507 	  next = next_insn (insn);
3508 	  while (next)
3509 	    {
3510 	      /* Jumps, calls and labels stop our search.  */
3511 	      if (JUMP_P (next) || CALL_P (next) || LABEL_P (next))
3512 		break;
3513 
3514 	      /* As does another fcmp insn.  */
3515 	      if (NONJUMP_INSN_P (next)
3516 		  && GET_CODE (PATTERN (next)) == SET
3517 		  && GET_CODE (SET_DEST (PATTERN (next))) == REG
3518 		  && REGNO (SET_DEST (PATTERN (next))) == 0)
3519 		break;
3520 
3521 	      next = next_insn (next);
3522 	    }
3523 
3524 	  /* Is NEXT_INSN a branch?  */
3525 	  if (next && JUMP_P (next))
3526 	    {
3527 	      rtx pattern = PATTERN (next);
3528 
3529 	      /* If it a reversed fp conditional branch (e.g. uses add,tr)
3530 		 and CCFP dies, then reverse our conditional and the branch
3531 		 to avoid the add,tr.  */
3532 	      if (GET_CODE (pattern) == SET
3533 		  && SET_DEST (pattern) == pc_rtx
3534 		  && GET_CODE (SET_SRC (pattern)) == IF_THEN_ELSE
3535 		  && GET_CODE (XEXP (SET_SRC (pattern), 0)) == NE
3536 		  && GET_CODE (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == REG
3537 		  && REGNO (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == 0
3538 		  && GET_CODE (XEXP (SET_SRC (pattern), 1)) == PC
3539 		  && (fcmp_count == fbranch_count
3540 		      || (check_notes
3541 			  && find_regno_note (next, REG_DEAD, 0))))
3542 		{
3543 		  /* Reverse the branch.  */
3544 		  tmp = XEXP (SET_SRC (pattern), 1);
3545 		  XEXP (SET_SRC (pattern), 1) = XEXP (SET_SRC (pattern), 2);
3546 		  XEXP (SET_SRC (pattern), 2) = tmp;
3547 		  INSN_CODE (next) = -1;
3548 
3549 		  /* Reverse our condition.  */
3550 		  tmp = PATTERN (insn);
3551 		  PUT_CODE (XEXP (tmp, 1),
3552 			    (reverse_condition_maybe_unordered
3553 			     (GET_CODE (XEXP (tmp, 1)))));
3554 		}
3555 	    }
3556 	}
3557     }
3558 
3559   pass = !pass;
3560 
3561 }
3562 
3563 /* You may have trouble believing this, but this is the 32 bit HP-PA
3564    stack layout.  Wow.
3565 
3566    Offset		Contents
3567 
3568    Variable arguments	(optional; any number may be allocated)
3569 
3570    SP-(4*(N+9))		arg word N
3571    	:		    :
3572       SP-56		arg word 5
3573       SP-52		arg word 4
3574 
3575    Fixed arguments	(must be allocated; may remain unused)
3576 
3577       SP-48		arg word 3
3578       SP-44		arg word 2
3579       SP-40		arg word 1
3580       SP-36		arg word 0
3581 
3582    Frame Marker
3583 
3584       SP-32		External Data Pointer (DP)
3585       SP-28		External sr4
3586       SP-24		External/stub RP (RP')
3587       SP-20		Current RP
3588       SP-16		Static Link
3589       SP-12		Clean up
3590       SP-8		Calling Stub RP (RP'')
3591       SP-4		Previous SP
3592 
3593    Top of Frame
3594 
3595       SP-0		Stack Pointer (points to next available address)
3596 
3597 */
3598 
3599 /* This function saves registers as follows.  Registers marked with ' are
3600    this function's registers (as opposed to the previous function's).
3601    If a frame_pointer isn't needed, r4 is saved as a general register;
3602    the space for the frame pointer is still allocated, though, to keep
3603    things simple.
3604 
3605 
3606    Top of Frame
3607 
3608        SP (FP')		Previous FP
3609        SP + 4		Alignment filler (sigh)
3610        SP + 8		Space for locals reserved here.
3611        .
3612        .
3613        .
3614        SP + n		All call saved register used.
3615        .
3616        .
3617        .
3618        SP + o		All call saved fp registers used.
3619        .
3620        .
3621        .
3622        SP + p (SP')	points to next available address.
3623 
3624 */
3625 
3626 /* Global variables set by output_function_prologue().  */
3627 /* Size of frame.  Need to know this to emit return insns from
3628    leaf procedures.  */
3629 static HOST_WIDE_INT actual_fsize, local_fsize;
3630 static int save_fregs;
3631 
3632 /* Emit RTL to store REG at the memory location specified by BASE+DISP.
3633    Handle case where DISP > 8k by using the add_high_const patterns.
3634 
3635    Note in DISP > 8k case, we will leave the high part of the address
3636    in %r1.  There is code in expand_hppa_{prologue,epilogue} that knows this.*/
3637 
3638 static void
store_reg(int reg,HOST_WIDE_INT disp,int base)3639 store_reg (int reg, HOST_WIDE_INT disp, int base)
3640 {
3641   rtx dest, src, basereg;
3642   rtx_insn *insn;
3643 
3644   src = gen_rtx_REG (word_mode, reg);
3645   basereg = gen_rtx_REG (Pmode, base);
3646   if (VAL_14_BITS_P (disp))
3647     {
3648       dest = gen_rtx_MEM (word_mode, plus_constant (Pmode, basereg, disp));
3649       insn = emit_move_insn (dest, src);
3650     }
3651   else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
3652     {
3653       rtx delta = GEN_INT (disp);
3654       rtx tmpreg = gen_rtx_REG (Pmode, 1);
3655 
3656       emit_move_insn (tmpreg, delta);
3657       insn = emit_move_insn (tmpreg, gen_rtx_PLUS (Pmode, tmpreg, basereg));
3658       if (DO_FRAME_NOTES)
3659 	{
3660 	  add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3661 			gen_rtx_SET (tmpreg,
3662 				     gen_rtx_PLUS (Pmode, basereg, delta)));
3663 	  RTX_FRAME_RELATED_P (insn) = 1;
3664 	}
3665       dest = gen_rtx_MEM (word_mode, tmpreg);
3666       insn = emit_move_insn (dest, src);
3667     }
3668   else
3669     {
3670       rtx delta = GEN_INT (disp);
3671       rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta));
3672       rtx tmpreg = gen_rtx_REG (Pmode, 1);
3673 
3674       emit_move_insn (tmpreg, high);
3675       dest = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta));
3676       insn = emit_move_insn (dest, src);
3677       if (DO_FRAME_NOTES)
3678 	add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3679 		      gen_rtx_SET (gen_rtx_MEM (word_mode,
3680 						gen_rtx_PLUS (word_mode,
3681 							      basereg,
3682 							      delta)),
3683 				   src));
3684     }
3685 
3686   if (DO_FRAME_NOTES)
3687     RTX_FRAME_RELATED_P (insn) = 1;
3688 }
3689 
3690 /* Emit RTL to store REG at the memory location specified by BASE and then
3691    add MOD to BASE.  MOD must be <= 8k.  */
3692 
3693 static void
store_reg_modify(int base,int reg,HOST_WIDE_INT mod)3694 store_reg_modify (int base, int reg, HOST_WIDE_INT mod)
3695 {
3696   rtx basereg, srcreg, delta;
3697   rtx_insn *insn;
3698 
3699   gcc_assert (VAL_14_BITS_P (mod));
3700 
3701   basereg = gen_rtx_REG (Pmode, base);
3702   srcreg = gen_rtx_REG (word_mode, reg);
3703   delta = GEN_INT (mod);
3704 
3705   insn = emit_insn (gen_post_store (basereg, srcreg, delta));
3706   if (DO_FRAME_NOTES)
3707     {
3708       RTX_FRAME_RELATED_P (insn) = 1;
3709 
3710       /* RTX_FRAME_RELATED_P must be set on each frame related set
3711 	 in a parallel with more than one element.  */
3712       RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 0)) = 1;
3713       RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
3714     }
3715 }
3716 
3717 /* Emit RTL to set REG to the value specified by BASE+DISP.  Handle case
3718    where DISP > 8k by using the add_high_const patterns.  NOTE indicates
3719    whether to add a frame note or not.
3720 
3721    In the DISP > 8k case, we leave the high part of the address in %r1.
3722    There is code in expand_hppa_{prologue,epilogue} that knows about this.  */
3723 
3724 static void
set_reg_plus_d(int reg,int base,HOST_WIDE_INT disp,int note)3725 set_reg_plus_d (int reg, int base, HOST_WIDE_INT disp, int note)
3726 {
3727   rtx_insn *insn;
3728 
3729   if (VAL_14_BITS_P (disp))
3730     {
3731       insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3732 			     plus_constant (Pmode,
3733 					    gen_rtx_REG (Pmode, base), disp));
3734     }
3735   else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
3736     {
3737       rtx basereg = gen_rtx_REG (Pmode, base);
3738       rtx delta = GEN_INT (disp);
3739       rtx tmpreg = gen_rtx_REG (Pmode, 1);
3740 
3741       emit_move_insn (tmpreg, delta);
3742       insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3743 			     gen_rtx_PLUS (Pmode, tmpreg, basereg));
3744       if (DO_FRAME_NOTES)
3745 	add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3746 		      gen_rtx_SET (tmpreg,
3747 				   gen_rtx_PLUS (Pmode, basereg, delta)));
3748     }
3749   else
3750     {
3751       rtx basereg = gen_rtx_REG (Pmode, base);
3752       rtx delta = GEN_INT (disp);
3753       rtx tmpreg = gen_rtx_REG (Pmode, 1);
3754 
3755       emit_move_insn (tmpreg,
3756 		      gen_rtx_PLUS (Pmode, basereg,
3757 				    gen_rtx_HIGH (Pmode, delta)));
3758       insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3759 			     gen_rtx_LO_SUM (Pmode, tmpreg, delta));
3760     }
3761 
3762   if (DO_FRAME_NOTES && note)
3763     RTX_FRAME_RELATED_P (insn) = 1;
3764 }
3765 
3766 HOST_WIDE_INT
pa_compute_frame_size(poly_int64 size,int * fregs_live)3767 pa_compute_frame_size (poly_int64 size, int *fregs_live)
3768 {
3769   int freg_saved = 0;
3770   int i, j;
3771 
3772   /* The code in pa_expand_prologue and pa_expand_epilogue must
3773      be consistent with the rounding and size calculation done here.
3774      Change them at the same time.  */
3775 
3776   /* We do our own stack alignment.  First, round the size of the
3777      stack locals up to a word boundary.  */
3778   size = (size + UNITS_PER_WORD - 1) & ~(UNITS_PER_WORD - 1);
3779 
3780   /* Space for previous frame pointer + filler.  If any frame is
3781      allocated, we need to add in the TARGET_STARTING_FRAME_OFFSET.  We
3782      waste some space here for the sake of HP compatibility.  The
3783      first slot is only used when the frame pointer is needed.  */
3784   if (size || frame_pointer_needed)
3785     size += pa_starting_frame_offset ();
3786 
3787   /* If the current function calls __builtin_eh_return, then we need
3788      to allocate stack space for registers that will hold data for
3789      the exception handler.  */
3790   if (DO_FRAME_NOTES && crtl->calls_eh_return)
3791     {
3792       unsigned int i;
3793 
3794       for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM; ++i)
3795 	continue;
3796       size += i * UNITS_PER_WORD;
3797     }
3798 
3799   /* Account for space used by the callee general register saves.  */
3800   for (i = 18, j = frame_pointer_needed ? 4 : 3; i >= j; i--)
3801     if (df_regs_ever_live_p (i))
3802       size += UNITS_PER_WORD;
3803 
3804   /* Account for space used by the callee floating point register saves.  */
3805   for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
3806     if (df_regs_ever_live_p (i)
3807 	|| (!TARGET_64BIT && df_regs_ever_live_p (i + 1)))
3808       {
3809 	freg_saved = 1;
3810 
3811 	/* We always save both halves of the FP register, so always
3812 	   increment the frame size by 8 bytes.  */
3813 	size += 8;
3814       }
3815 
3816   /* If any of the floating registers are saved, account for the
3817      alignment needed for the floating point register save block.  */
3818   if (freg_saved)
3819     {
3820       size = (size + 7) & ~7;
3821       if (fregs_live)
3822 	*fregs_live = 1;
3823     }
3824 
3825   /* The various ABIs include space for the outgoing parameters in the
3826      size of the current function's stack frame.  We don't need to align
3827      for the outgoing arguments as their alignment is set by the final
3828      rounding for the frame as a whole.  */
3829   size += crtl->outgoing_args_size;
3830 
3831   /* Allocate space for the fixed frame marker.  This space must be
3832      allocated for any function that makes calls or allocates
3833      stack space.  */
3834   if (!crtl->is_leaf || size)
3835     size += TARGET_64BIT ? 48 : 32;
3836 
3837   /* Finally, round to the preferred stack boundary.  */
3838   return ((size + PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT - 1)
3839 	  & ~(PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT - 1));
3840 }
3841 
3842 /* Output function label, and associated .PROC and .CALLINFO statements.  */
3843 
3844 void
pa_output_function_label(FILE * file)3845 pa_output_function_label (FILE *file)
3846 {
3847   /* The function's label and associated .PROC must never be
3848      separated and must be output *after* any profiling declarations
3849      to avoid changing spaces/subspaces within a procedure.  */
3850   ASM_OUTPUT_LABEL (file, XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0));
3851   fputs ("\t.PROC\n", file);
3852 
3853   /* pa_expand_prologue does the dirty work now.  We just need
3854      to output the assembler directives which denote the start
3855      of a function.  */
3856   fprintf (file, "\t.CALLINFO FRAME=" HOST_WIDE_INT_PRINT_DEC, actual_fsize);
3857   if (crtl->is_leaf)
3858     fputs (",NO_CALLS", file);
3859   else
3860     fputs (",CALLS", file);
3861   if (rp_saved)
3862     fputs (",SAVE_RP", file);
3863 
3864   /* The SAVE_SP flag is used to indicate that register %r3 is stored
3865      at the beginning of the frame and that it is used as the frame
3866      pointer for the frame.  We do this because our current frame
3867      layout doesn't conform to that specified in the HP runtime
3868      documentation and we need a way to indicate to programs such as
3869      GDB where %r3 is saved.  The SAVE_SP flag was chosen because it
3870      isn't used by HP compilers but is supported by the assembler.
3871      However, SAVE_SP is supposed to indicate that the previous stack
3872      pointer has been saved in the frame marker.  */
3873   if (frame_pointer_needed)
3874     fputs (",SAVE_SP", file);
3875 
3876   /* Pass on information about the number of callee register saves
3877      performed in the prologue.
3878 
3879      The compiler is supposed to pass the highest register number
3880      saved, the assembler then has to adjust that number before
3881      entering it into the unwind descriptor (to account for any
3882      caller saved registers with lower register numbers than the
3883      first callee saved register).  */
3884   if (gr_saved)
3885     fprintf (file, ",ENTRY_GR=%d", gr_saved + 2);
3886 
3887   if (fr_saved)
3888     fprintf (file, ",ENTRY_FR=%d", fr_saved + 11);
3889 
3890   fputs ("\n\t.ENTRY\n", file);
3891 }
3892 
3893 /* Output function prologue.  */
3894 
3895 static void
pa_output_function_prologue(FILE * file)3896 pa_output_function_prologue (FILE *file)
3897 {
3898   pa_output_function_label (file);
3899   remove_useless_addtr_insns (0);
3900 }
3901 
3902 /* The label is output by ASM_DECLARE_FUNCTION_NAME on linux.  */
3903 
3904 static void
pa_linux_output_function_prologue(FILE * file ATTRIBUTE_UNUSED)3905 pa_linux_output_function_prologue (FILE *file ATTRIBUTE_UNUSED)
3906 {
3907   remove_useless_addtr_insns (0);
3908 }
3909 
3910 void
pa_expand_prologue(void)3911 pa_expand_prologue (void)
3912 {
3913   int merge_sp_adjust_with_store = 0;
3914   HOST_WIDE_INT size = get_frame_size ();
3915   HOST_WIDE_INT offset;
3916   int i;
3917   rtx tmpreg;
3918   rtx_insn *insn;
3919 
3920   gr_saved = 0;
3921   fr_saved = 0;
3922   save_fregs = 0;
3923 
3924   /* Compute total size for frame pointer, filler, locals and rounding to
3925      the next word boundary.  Similar code appears in pa_compute_frame_size
3926      and must be changed in tandem with this code.  */
3927   local_fsize = (size + UNITS_PER_WORD - 1) & ~(UNITS_PER_WORD - 1);
3928   if (local_fsize || frame_pointer_needed)
3929     local_fsize += pa_starting_frame_offset ();
3930 
3931   actual_fsize = pa_compute_frame_size (size, &save_fregs);
3932   if (flag_stack_usage_info)
3933     current_function_static_stack_size = actual_fsize;
3934 
3935   /* Compute a few things we will use often.  */
3936   tmpreg = gen_rtx_REG (word_mode, 1);
3937 
3938   /* Save RP first.  The calling conventions manual states RP will
3939      always be stored into the caller's frame at sp - 20 or sp - 16
3940      depending on which ABI is in use.  */
3941   if (df_regs_ever_live_p (2) || crtl->calls_eh_return)
3942     {
3943       store_reg (2, TARGET_64BIT ? -16 : -20, STACK_POINTER_REGNUM);
3944       rp_saved = true;
3945     }
3946   else
3947     rp_saved = false;
3948 
3949   /* Allocate the local frame and set up the frame pointer if needed.  */
3950   if (actual_fsize != 0)
3951     {
3952       if (frame_pointer_needed)
3953 	{
3954 	  /* Copy the old frame pointer temporarily into %r1.  Set up the
3955 	     new stack pointer, then store away the saved old frame pointer
3956 	     into the stack at sp and at the same time update the stack
3957 	     pointer by actual_fsize bytes.  Two versions, first
3958 	     handles small (<8k) frames.  The second handles large (>=8k)
3959 	     frames.  */
3960 	  insn = emit_move_insn (tmpreg, hard_frame_pointer_rtx);
3961 	  if (DO_FRAME_NOTES)
3962 	    RTX_FRAME_RELATED_P (insn) = 1;
3963 
3964 	  insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
3965 	  if (DO_FRAME_NOTES)
3966 	    RTX_FRAME_RELATED_P (insn) = 1;
3967 
3968 	  if (VAL_14_BITS_P (actual_fsize))
3969 	    store_reg_modify (STACK_POINTER_REGNUM, 1, actual_fsize);
3970 	  else
3971 	    {
3972 	      /* It is incorrect to store the saved frame pointer at *sp,
3973 		 then increment sp (writes beyond the current stack boundary).
3974 
3975 		 So instead use stwm to store at *sp and post-increment the
3976 		 stack pointer as an atomic operation.  Then increment sp to
3977 		 finish allocating the new frame.  */
3978 	      HOST_WIDE_INT adjust1 = 8192 - 64;
3979 	      HOST_WIDE_INT adjust2 = actual_fsize - adjust1;
3980 
3981 	      store_reg_modify (STACK_POINTER_REGNUM, 1, adjust1);
3982 	      set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
3983 			      adjust2, 1);
3984 	    }
3985 
3986 	  /* We set SAVE_SP in frames that need a frame pointer.  Thus,
3987 	     we need to store the previous stack pointer (frame pointer)
3988 	     into the frame marker on targets that use the HP unwind
3989 	     library.  This allows the HP unwind library to be used to
3990 	     unwind GCC frames.  However, we are not fully compatible
3991 	     with the HP library because our frame layout differs from
3992 	     that specified in the HP runtime specification.
3993 
3994 	     We don't want a frame note on this instruction as the frame
3995 	     marker moves during dynamic stack allocation.
3996 
3997 	     This instruction also serves as a blockage to prevent
3998 	     register spills from being scheduled before the stack
3999 	     pointer is raised.  This is necessary as we store
4000 	     registers using the frame pointer as a base register,
4001 	     and the frame pointer is set before sp is raised.  */
4002 	  if (TARGET_HPUX_UNWIND_LIBRARY)
4003 	    {
4004 	      rtx addr = gen_rtx_PLUS (word_mode, stack_pointer_rtx,
4005 				       GEN_INT (TARGET_64BIT ? -8 : -4));
4006 
4007 	      emit_move_insn (gen_rtx_MEM (word_mode, addr),
4008 			      hard_frame_pointer_rtx);
4009 	    }
4010 	  else
4011 	    emit_insn (gen_blockage ());
4012 	}
4013       /* no frame pointer needed.  */
4014       else
4015 	{
4016 	  /* In some cases we can perform the first callee register save
4017 	     and allocating the stack frame at the same time.   If so, just
4018 	     make a note of it and defer allocating the frame until saving
4019 	     the callee registers.  */
4020 	  if (VAL_14_BITS_P (actual_fsize) && local_fsize == 0)
4021 	    merge_sp_adjust_with_store = 1;
4022 	  /* Cannot optimize.  Adjust the stack frame by actual_fsize
4023 	     bytes.  */
4024 	  else
4025 	    set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
4026 			    actual_fsize, 1);
4027 	}
4028     }
4029 
4030   /* Normal register save.
4031 
4032      Do not save the frame pointer in the frame_pointer_needed case.  It
4033      was done earlier.  */
4034   if (frame_pointer_needed)
4035     {
4036       offset = local_fsize;
4037 
4038       /* Saving the EH return data registers in the frame is the simplest
4039 	 way to get the frame unwind information emitted.  We put them
4040 	 just before the general registers.  */
4041       if (DO_FRAME_NOTES && crtl->calls_eh_return)
4042 	{
4043 	  unsigned int i, regno;
4044 
4045 	  for (i = 0; ; ++i)
4046 	    {
4047 	      regno = EH_RETURN_DATA_REGNO (i);
4048 	      if (regno == INVALID_REGNUM)
4049 		break;
4050 
4051 	      store_reg (regno, offset, HARD_FRAME_POINTER_REGNUM);
4052 	      offset += UNITS_PER_WORD;
4053 	    }
4054 	}
4055 
4056       for (i = 18; i >= 4; i--)
4057 	if (df_regs_ever_live_p (i) && !call_used_or_fixed_reg_p (i))
4058 	  {
4059 	    store_reg (i, offset, HARD_FRAME_POINTER_REGNUM);
4060 	    offset += UNITS_PER_WORD;
4061 	    gr_saved++;
4062 	  }
4063       /* Account for %r3 which is saved in a special place.  */
4064       gr_saved++;
4065     }
4066   /* No frame pointer needed.  */
4067   else
4068     {
4069       offset = local_fsize - actual_fsize;
4070 
4071       /* Saving the EH return data registers in the frame is the simplest
4072          way to get the frame unwind information emitted.  */
4073       if (DO_FRAME_NOTES && crtl->calls_eh_return)
4074 	{
4075 	  unsigned int i, regno;
4076 
4077 	  for (i = 0; ; ++i)
4078 	    {
4079 	      regno = EH_RETURN_DATA_REGNO (i);
4080 	      if (regno == INVALID_REGNUM)
4081 		break;
4082 
4083 	      /* If merge_sp_adjust_with_store is nonzero, then we can
4084 		 optimize the first save.  */
4085 	      if (merge_sp_adjust_with_store)
4086 		{
4087 		  store_reg_modify (STACK_POINTER_REGNUM, regno, -offset);
4088 		  merge_sp_adjust_with_store = 0;
4089 		}
4090 	      else
4091 		store_reg (regno, offset, STACK_POINTER_REGNUM);
4092 	      offset += UNITS_PER_WORD;
4093 	    }
4094 	}
4095 
4096       for (i = 18; i >= 3; i--)
4097 	if (df_regs_ever_live_p (i) && !call_used_or_fixed_reg_p (i))
4098 	  {
4099 	    /* If merge_sp_adjust_with_store is nonzero, then we can
4100 	       optimize the first GR save.  */
4101 	    if (merge_sp_adjust_with_store)
4102 	      {
4103 		store_reg_modify (STACK_POINTER_REGNUM, i, -offset);
4104 		merge_sp_adjust_with_store = 0;
4105 	      }
4106 	    else
4107 	      store_reg (i, offset, STACK_POINTER_REGNUM);
4108 	    offset += UNITS_PER_WORD;
4109 	    gr_saved++;
4110 	  }
4111 
4112       /* If we wanted to merge the SP adjustment with a GR save, but we never
4113 	 did any GR saves, then just emit the adjustment here.  */
4114       if (merge_sp_adjust_with_store)
4115 	set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
4116 			actual_fsize, 1);
4117     }
4118 
4119   /* The hppa calling conventions say that %r19, the pic offset
4120      register, is saved at sp - 32 (in this function's frame)
4121      when generating PIC code.  FIXME:  What is the correct thing
4122      to do for functions which make no calls and allocate no
4123      frame?  Do we need to allocate a frame, or can we just omit
4124      the save?   For now we'll just omit the save.
4125 
4126      We don't want a note on this insn as the frame marker can
4127      move if there is a dynamic stack allocation.  */
4128   if (flag_pic && actual_fsize != 0 && !TARGET_64BIT)
4129     {
4130       rtx addr = gen_rtx_PLUS (word_mode, stack_pointer_rtx, GEN_INT (-32));
4131 
4132       emit_move_insn (gen_rtx_MEM (word_mode, addr), pic_offset_table_rtx);
4133 
4134     }
4135 
4136   /* Align pointer properly (doubleword boundary).  */
4137   offset = (offset + 7) & ~7;
4138 
4139   /* Floating point register store.  */
4140   if (save_fregs)
4141     {
4142       rtx base;
4143 
4144       /* First get the frame or stack pointer to the start of the FP register
4145 	 save area.  */
4146       if (frame_pointer_needed)
4147 	{
4148 	  set_reg_plus_d (1, HARD_FRAME_POINTER_REGNUM, offset, 0);
4149 	  base = hard_frame_pointer_rtx;
4150 	}
4151       else
4152 	{
4153 	  set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0);
4154 	  base = stack_pointer_rtx;
4155 	}
4156 
4157       /* Now actually save the FP registers.  */
4158       for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
4159 	{
4160 	  if (df_regs_ever_live_p (i)
4161 	      || (! TARGET_64BIT && df_regs_ever_live_p (i + 1)))
4162 	    {
4163 	      rtx addr, reg;
4164 	      rtx_insn *insn;
4165 	      addr = gen_rtx_MEM (DFmode,
4166 				  gen_rtx_POST_INC (word_mode, tmpreg));
4167 	      reg = gen_rtx_REG (DFmode, i);
4168 	      insn = emit_move_insn (addr, reg);
4169 	      if (DO_FRAME_NOTES)
4170 		{
4171 		  RTX_FRAME_RELATED_P (insn) = 1;
4172 		  if (TARGET_64BIT)
4173 		    {
4174 		      rtx mem = gen_rtx_MEM (DFmode,
4175 					     plus_constant (Pmode, base,
4176 							    offset));
4177 		      add_reg_note (insn, REG_FRAME_RELATED_EXPR,
4178 				    gen_rtx_SET (mem, reg));
4179 		    }
4180 		  else
4181 		    {
4182 		      rtx meml = gen_rtx_MEM (SFmode,
4183 					      plus_constant (Pmode, base,
4184 							     offset));
4185 		      rtx memr = gen_rtx_MEM (SFmode,
4186 					      plus_constant (Pmode, base,
4187 							     offset + 4));
4188 		      rtx regl = gen_rtx_REG (SFmode, i);
4189 		      rtx regr = gen_rtx_REG (SFmode, i + 1);
4190 		      rtx setl = gen_rtx_SET (meml, regl);
4191 		      rtx setr = gen_rtx_SET (memr, regr);
4192 		      rtvec vec;
4193 
4194 		      RTX_FRAME_RELATED_P (setl) = 1;
4195 		      RTX_FRAME_RELATED_P (setr) = 1;
4196 		      vec = gen_rtvec (2, setl, setr);
4197 		      add_reg_note (insn, REG_FRAME_RELATED_EXPR,
4198 				    gen_rtx_SEQUENCE (VOIDmode, vec));
4199 		    }
4200 		}
4201 	      offset += GET_MODE_SIZE (DFmode);
4202 	      fr_saved++;
4203 	    }
4204 	}
4205     }
4206 }
4207 
4208 /* Emit RTL to load REG from the memory location specified by BASE+DISP.
4209    Handle case where DISP > 8k by using the add_high_const patterns.  */
4210 
4211 static void
load_reg(int reg,HOST_WIDE_INT disp,int base)4212 load_reg (int reg, HOST_WIDE_INT disp, int base)
4213 {
4214   rtx dest = gen_rtx_REG (word_mode, reg);
4215   rtx basereg = gen_rtx_REG (Pmode, base);
4216   rtx src;
4217 
4218   if (VAL_14_BITS_P (disp))
4219     src = gen_rtx_MEM (word_mode, plus_constant (Pmode, basereg, disp));
4220   else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
4221     {
4222       rtx delta = GEN_INT (disp);
4223       rtx tmpreg = gen_rtx_REG (Pmode, 1);
4224 
4225       emit_move_insn (tmpreg, delta);
4226       if (TARGET_DISABLE_INDEXING)
4227 	{
4228 	  emit_move_insn (tmpreg, gen_rtx_PLUS (Pmode, tmpreg, basereg));
4229 	  src = gen_rtx_MEM (word_mode, tmpreg);
4230 	}
4231       else
4232 	src = gen_rtx_MEM (word_mode, gen_rtx_PLUS (Pmode, tmpreg, basereg));
4233     }
4234   else
4235     {
4236       rtx delta = GEN_INT (disp);
4237       rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta));
4238       rtx tmpreg = gen_rtx_REG (Pmode, 1);
4239 
4240       emit_move_insn (tmpreg, high);
4241       src = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta));
4242     }
4243 
4244   emit_move_insn (dest, src);
4245 }
4246 
4247 /* Update the total code bytes output to the text section.  */
4248 
4249 static void
update_total_code_bytes(unsigned int nbytes)4250 update_total_code_bytes (unsigned int nbytes)
4251 {
4252   if ((TARGET_PORTABLE_RUNTIME || !TARGET_GAS || !TARGET_SOM)
4253       && !IN_NAMED_SECTION_P (cfun->decl))
4254     {
4255       unsigned int old_total = total_code_bytes;
4256 
4257       total_code_bytes += nbytes;
4258 
4259       /* Be prepared to handle overflows.  */
4260       if (old_total > total_code_bytes)
4261         total_code_bytes = UINT_MAX;
4262     }
4263 }
4264 
4265 /* This function generates the assembly code for function exit.
4266    Args are as for output_function_prologue ().
4267 
4268    The function epilogue should not depend on the current stack
4269    pointer!  It should use the frame pointer only.  This is mandatory
4270    because of alloca; we also take advantage of it to omit stack
4271    adjustments before returning.  */
4272 
4273 static void
pa_output_function_epilogue(FILE * file)4274 pa_output_function_epilogue (FILE *file)
4275 {
4276   rtx_insn *insn = get_last_insn ();
4277   bool extra_nop;
4278 
4279   /* pa_expand_epilogue does the dirty work now.  We just need
4280      to output the assembler directives which denote the end
4281      of a function.
4282 
4283      To make debuggers happy, emit a nop if the epilogue was completely
4284      eliminated due to a volatile call as the last insn in the
4285      current function.  That way the return address (in %r2) will
4286      always point to a valid instruction in the current function.  */
4287 
4288   /* Get the last real insn.  */
4289   if (NOTE_P (insn))
4290     insn = prev_real_insn (insn);
4291 
4292   /* If it is a sequence, then look inside.  */
4293   if (insn && NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == SEQUENCE)
4294     insn = as_a <rtx_sequence *> (PATTERN (insn))-> insn (0);
4295 
4296   /* If insn is a CALL_INSN, then it must be a call to a volatile
4297      function (otherwise there would be epilogue insns).  */
4298   if (insn && CALL_P (insn))
4299     {
4300       fputs ("\tnop\n", file);
4301       extra_nop = true;
4302     }
4303   else
4304     extra_nop = false;
4305 
4306   fputs ("\t.EXIT\n\t.PROCEND\n", file);
4307 
4308   if (TARGET_SOM && TARGET_GAS)
4309     {
4310       /* We are done with this subspace except possibly for some additional
4311 	 debug information.  Forget that we are in this subspace to ensure
4312 	 that the next function is output in its own subspace.  */
4313       in_section = NULL;
4314       cfun->machine->in_nsubspa = 2;
4315     }
4316 
4317   /* Thunks do their own insn accounting.  */
4318   if (cfun->is_thunk)
4319     return;
4320 
4321   if (INSN_ADDRESSES_SET_P ())
4322     {
4323       last_address = extra_nop ? 4 : 0;
4324       insn = get_last_nonnote_insn ();
4325       if (insn)
4326 	{
4327 	  last_address += INSN_ADDRESSES (INSN_UID (insn));
4328 	  if (INSN_P (insn))
4329 	    last_address += insn_default_length (insn);
4330 	}
4331       last_address = ((last_address + FUNCTION_BOUNDARY / BITS_PER_UNIT - 1)
4332 		      & ~(FUNCTION_BOUNDARY / BITS_PER_UNIT - 1));
4333     }
4334   else
4335     last_address = UINT_MAX;
4336 
4337   /* Finally, update the total number of code bytes output so far.  */
4338   update_total_code_bytes (last_address);
4339 }
4340 
4341 void
pa_expand_epilogue(void)4342 pa_expand_epilogue (void)
4343 {
4344   rtx tmpreg;
4345   HOST_WIDE_INT offset;
4346   HOST_WIDE_INT ret_off = 0;
4347   int i;
4348   int merge_sp_adjust_with_load = 0;
4349 
4350   /* We will use this often.  */
4351   tmpreg = gen_rtx_REG (word_mode, 1);
4352 
4353   /* Try to restore RP early to avoid load/use interlocks when
4354      RP gets used in the return (bv) instruction.  This appears to still
4355      be necessary even when we schedule the prologue and epilogue.  */
4356   if (rp_saved)
4357     {
4358       ret_off = TARGET_64BIT ? -16 : -20;
4359       if (frame_pointer_needed)
4360 	{
4361 	  load_reg (2, ret_off, HARD_FRAME_POINTER_REGNUM);
4362 	  ret_off = 0;
4363 	}
4364       else
4365 	{
4366 	  /* No frame pointer, and stack is smaller than 8k.  */
4367 	  if (VAL_14_BITS_P (ret_off - actual_fsize))
4368 	    {
4369 	      load_reg (2, ret_off - actual_fsize, STACK_POINTER_REGNUM);
4370 	      ret_off = 0;
4371 	    }
4372 	}
4373     }
4374 
4375   /* General register restores.  */
4376   if (frame_pointer_needed)
4377     {
4378       offset = local_fsize;
4379 
4380       /* If the current function calls __builtin_eh_return, then we need
4381          to restore the saved EH data registers.  */
4382       if (DO_FRAME_NOTES && crtl->calls_eh_return)
4383 	{
4384 	  unsigned int i, regno;
4385 
4386 	  for (i = 0; ; ++i)
4387 	    {
4388 	      regno = EH_RETURN_DATA_REGNO (i);
4389 	      if (regno == INVALID_REGNUM)
4390 		break;
4391 
4392 	      load_reg (regno, offset, HARD_FRAME_POINTER_REGNUM);
4393 	      offset += UNITS_PER_WORD;
4394 	    }
4395 	}
4396 
4397       for (i = 18; i >= 4; i--)
4398 	if (df_regs_ever_live_p (i) && !call_used_or_fixed_reg_p (i))
4399 	  {
4400 	    load_reg (i, offset, HARD_FRAME_POINTER_REGNUM);
4401 	    offset += UNITS_PER_WORD;
4402 	  }
4403     }
4404   else
4405     {
4406       offset = local_fsize - actual_fsize;
4407 
4408       /* If the current function calls __builtin_eh_return, then we need
4409          to restore the saved EH data registers.  */
4410       if (DO_FRAME_NOTES && crtl->calls_eh_return)
4411 	{
4412 	  unsigned int i, regno;
4413 
4414 	  for (i = 0; ; ++i)
4415 	    {
4416 	      regno = EH_RETURN_DATA_REGNO (i);
4417 	      if (regno == INVALID_REGNUM)
4418 		break;
4419 
4420 	      /* Only for the first load.
4421 	         merge_sp_adjust_with_load holds the register load
4422 	         with which we will merge the sp adjustment.  */
4423 	      if (merge_sp_adjust_with_load == 0
4424 		  && local_fsize == 0
4425 		  && VAL_14_BITS_P (-actual_fsize))
4426 	        merge_sp_adjust_with_load = regno;
4427 	      else
4428 		load_reg (regno, offset, STACK_POINTER_REGNUM);
4429 	      offset += UNITS_PER_WORD;
4430 	    }
4431 	}
4432 
4433       for (i = 18; i >= 3; i--)
4434 	{
4435 	  if (df_regs_ever_live_p (i) && !call_used_or_fixed_reg_p (i))
4436 	    {
4437 	      /* Only for the first load.
4438 	         merge_sp_adjust_with_load holds the register load
4439 	         with which we will merge the sp adjustment.  */
4440 	      if (merge_sp_adjust_with_load == 0
4441 		  && local_fsize == 0
4442 		  && VAL_14_BITS_P (-actual_fsize))
4443 	        merge_sp_adjust_with_load = i;
4444 	      else
4445 		load_reg (i, offset, STACK_POINTER_REGNUM);
4446 	      offset += UNITS_PER_WORD;
4447 	    }
4448 	}
4449     }
4450 
4451   /* Align pointer properly (doubleword boundary).  */
4452   offset = (offset + 7) & ~7;
4453 
4454   /* FP register restores.  */
4455   if (save_fregs)
4456     {
4457       /* Adjust the register to index off of.  */
4458       if (frame_pointer_needed)
4459 	set_reg_plus_d (1, HARD_FRAME_POINTER_REGNUM, offset, 0);
4460       else
4461 	set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0);
4462 
4463       /* Actually do the restores now.  */
4464       for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
4465 	if (df_regs_ever_live_p (i)
4466 	    || (! TARGET_64BIT && df_regs_ever_live_p (i + 1)))
4467 	  {
4468 	    rtx src = gen_rtx_MEM (DFmode,
4469 				   gen_rtx_POST_INC (word_mode, tmpreg));
4470 	    rtx dest = gen_rtx_REG (DFmode, i);
4471 	    emit_move_insn (dest, src);
4472 	  }
4473     }
4474 
4475   /* Emit a blockage insn here to keep these insns from being moved to
4476      an earlier spot in the epilogue, or into the main instruction stream.
4477 
4478      This is necessary as we must not cut the stack back before all the
4479      restores are finished.  */
4480   emit_insn (gen_blockage ());
4481 
4482   /* Reset stack pointer (and possibly frame pointer).  The stack
4483      pointer is initially set to fp + 64 to avoid a race condition.  */
4484   if (frame_pointer_needed)
4485     {
4486       rtx delta = GEN_INT (-64);
4487 
4488       set_reg_plus_d (STACK_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM, 64, 0);
4489       emit_insn (gen_pre_load (hard_frame_pointer_rtx,
4490 			       stack_pointer_rtx, delta));
4491     }
4492   /* If we were deferring a callee register restore, do it now.  */
4493   else if (merge_sp_adjust_with_load)
4494     {
4495       rtx delta = GEN_INT (-actual_fsize);
4496       rtx dest = gen_rtx_REG (word_mode, merge_sp_adjust_with_load);
4497 
4498       emit_insn (gen_pre_load (dest, stack_pointer_rtx, delta));
4499     }
4500   else if (actual_fsize != 0)
4501     set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
4502 		    - actual_fsize, 0);
4503 
4504   /* If we haven't restored %r2 yet (no frame pointer, and a stack
4505      frame greater than 8k), do so now.  */
4506   if (ret_off != 0)
4507     load_reg (2, ret_off, STACK_POINTER_REGNUM);
4508 
4509   if (DO_FRAME_NOTES && crtl->calls_eh_return)
4510     {
4511       rtx sa = EH_RETURN_STACKADJ_RTX;
4512 
4513       emit_insn (gen_blockage ());
4514       emit_insn (TARGET_64BIT
4515 		 ? gen_subdi3 (stack_pointer_rtx, stack_pointer_rtx, sa)
4516 		 : gen_subsi3 (stack_pointer_rtx, stack_pointer_rtx, sa));
4517     }
4518 }
4519 
4520 bool
pa_can_use_return_insn(void)4521 pa_can_use_return_insn (void)
4522 {
4523   if (!reload_completed)
4524     return false;
4525 
4526   if (frame_pointer_needed)
4527     return false;
4528 
4529   if (df_regs_ever_live_p (2))
4530     return false;
4531 
4532   if (crtl->profile)
4533     return false;
4534 
4535   return pa_compute_frame_size (get_frame_size (), 0) == 0;
4536 }
4537 
4538 rtx
hppa_pic_save_rtx(void)4539 hppa_pic_save_rtx (void)
4540 {
4541   return get_hard_reg_initial_val (word_mode, PIC_OFFSET_TABLE_REGNUM);
4542 }
4543 
4544 #ifndef NO_DEFERRED_PROFILE_COUNTERS
4545 #define NO_DEFERRED_PROFILE_COUNTERS 0
4546 #endif
4547 
4548 
4549 /* Vector of funcdef numbers.  */
4550 static vec<int> funcdef_nos;
4551 
4552 /* Output deferred profile counters.  */
4553 static void
output_deferred_profile_counters(void)4554 output_deferred_profile_counters (void)
4555 {
4556   unsigned int i;
4557   int align, n;
4558 
4559   if (funcdef_nos.is_empty ())
4560    return;
4561 
4562   switch_to_section (data_section);
4563   align = MIN (BIGGEST_ALIGNMENT, LONG_TYPE_SIZE);
4564   ASM_OUTPUT_ALIGN (asm_out_file, floor_log2 (align / BITS_PER_UNIT));
4565 
4566   for (i = 0; funcdef_nos.iterate (i, &n); i++)
4567     {
4568       targetm.asm_out.internal_label (asm_out_file, "LP", n);
4569       assemble_integer (const0_rtx, LONG_TYPE_SIZE / BITS_PER_UNIT, align, 1);
4570     }
4571 
4572   funcdef_nos.release ();
4573 }
4574 
4575 void
hppa_profile_hook(int label_no)4576 hppa_profile_hook (int label_no)
4577 {
4578   rtx_code_label *label_rtx = gen_label_rtx ();
4579   int reg_parm_stack_space = REG_PARM_STACK_SPACE (NULL_TREE);
4580   rtx arg_bytes, begin_label_rtx, mcount, sym;
4581   rtx_insn *call_insn;
4582   char begin_label_name[16];
4583   bool use_mcount_pcrel_call;
4584 
4585   /* Set up call destination.  */
4586   sym = gen_rtx_SYMBOL_REF (Pmode, "_mcount");
4587   pa_encode_label (sym);
4588   mcount = gen_rtx_MEM (Pmode, sym);
4589 
4590   /* If we can reach _mcount with a pc-relative call, we can optimize
4591      loading the address of the current function.  This requires linker
4592      long branch stub support.  */
4593   if (!TARGET_PORTABLE_RUNTIME
4594       && !TARGET_LONG_CALLS
4595       && (TARGET_SOM || flag_function_sections))
4596     use_mcount_pcrel_call = TRUE;
4597   else
4598     use_mcount_pcrel_call = FALSE;
4599 
4600   ASM_GENERATE_INTERNAL_LABEL (begin_label_name, FUNC_BEGIN_PROLOG_LABEL,
4601 			       label_no);
4602   begin_label_rtx = gen_rtx_SYMBOL_REF (SImode, ggc_strdup (begin_label_name));
4603 
4604   emit_move_insn (gen_rtx_REG (word_mode, 26), gen_rtx_REG (word_mode, 2));
4605 
4606   if (!use_mcount_pcrel_call)
4607     {
4608       /* The address of the function is loaded into %r25 with an instruction-
4609 	 relative sequence that avoids the use of relocations.  We use SImode
4610 	 for the address of the function in both 32 and 64-bit code to avoid
4611 	 having to provide DImode versions of the lcla2 pattern.  */
4612       if (TARGET_PA_20)
4613 	emit_insn (gen_lcla2 (gen_rtx_REG (SImode, 25), label_rtx));
4614       else
4615 	emit_insn (gen_lcla1 (gen_rtx_REG (SImode, 25), label_rtx));
4616     }
4617 
4618   if (!NO_DEFERRED_PROFILE_COUNTERS)
4619     {
4620       rtx count_label_rtx, addr, r24;
4621       char count_label_name[16];
4622 
4623       funcdef_nos.safe_push (label_no);
4624       ASM_GENERATE_INTERNAL_LABEL (count_label_name, "LP", label_no);
4625       count_label_rtx = gen_rtx_SYMBOL_REF (Pmode,
4626 					    ggc_strdup (count_label_name));
4627 
4628       addr = force_reg (Pmode, count_label_rtx);
4629       r24 = gen_rtx_REG (Pmode, 24);
4630       emit_move_insn (r24, addr);
4631 
4632       arg_bytes = GEN_INT (TARGET_64BIT ? 24 : 12);
4633       if (use_mcount_pcrel_call)
4634 	call_insn = emit_call_insn (gen_call_mcount (mcount, arg_bytes,
4635 						     begin_label_rtx));
4636       else
4637 	call_insn = emit_call_insn (gen_call (mcount, arg_bytes));
4638 
4639       use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), r24);
4640     }
4641   else
4642     {
4643       arg_bytes = GEN_INT (TARGET_64BIT ? 16 : 8);
4644       if (use_mcount_pcrel_call)
4645 	call_insn = emit_call_insn (gen_call_mcount (mcount, arg_bytes,
4646 						     begin_label_rtx));
4647       else
4648 	call_insn = emit_call_insn (gen_call (mcount, arg_bytes));
4649     }
4650 
4651   use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), gen_rtx_REG (SImode, 25));
4652   use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), gen_rtx_REG (SImode, 26));
4653 
4654   /* Indicate the _mcount call cannot throw, nor will it execute a
4655      non-local goto.  */
4656   make_reg_eh_region_note_nothrow_nononlocal (call_insn);
4657 
4658   /* Allocate space for fixed arguments.  */
4659   if (reg_parm_stack_space > crtl->outgoing_args_size)
4660     crtl->outgoing_args_size = reg_parm_stack_space;
4661 }
4662 
4663 /* Fetch the return address for the frame COUNT steps up from
4664    the current frame, after the prologue.  FRAMEADDR is the
4665    frame pointer of the COUNT frame.
4666 
4667    We want to ignore any export stub remnants here.  To handle this,
4668    we examine the code at the return address, and if it is an export
4669    stub, we return a memory rtx for the stub return address stored
4670    at frame-24.
4671 
4672    The value returned is used in two different ways:
4673 
4674 	1. To find a function's caller.
4675 
4676 	2. To change the return address for a function.
4677 
4678    This function handles most instances of case 1; however, it will
4679    fail if there are two levels of stubs to execute on the return
4680    path.  The only way I believe that can happen is if the return value
4681    needs a parameter relocation, which never happens for C code.
4682 
4683    This function handles most instances of case 2; however, it will
4684    fail if we did not originally have stub code on the return path
4685    but will need stub code on the new return path.  This can happen if
4686    the caller & callee are both in the main program, but the new
4687    return location is in a shared library.  */
4688 
4689 rtx
pa_return_addr_rtx(int count,rtx frameaddr)4690 pa_return_addr_rtx (int count, rtx frameaddr)
4691 {
4692   rtx label;
4693   rtx rp;
4694   rtx saved_rp;
4695   rtx ins;
4696 
4697   /* The instruction stream at the return address of a PA1.X export stub is:
4698 
4699 	0x4bc23fd1 | stub+8:   ldw -18(sr0,sp),rp
4700 	0x004010a1 | stub+12:  ldsid (sr0,rp),r1
4701 	0x00011820 | stub+16:  mtsp r1,sr0
4702 	0xe0400002 | stub+20:  be,n 0(sr0,rp)
4703 
4704      0xe0400002 must be specified as -532676606 so that it won't be
4705      rejected as an invalid immediate operand on 64-bit hosts.
4706 
4707      The instruction stream at the return address of a PA2.0 export stub is:
4708 
4709 	0x4bc23fd1 | stub+8:   ldw -18(sr0,sp),rp
4710 	0xe840d002 | stub+12:  bve,n (rp)
4711   */
4712 
4713   HOST_WIDE_INT insns[4];
4714   int i, len;
4715 
4716   if (count != 0)
4717     return NULL_RTX;
4718 
4719   rp = get_hard_reg_initial_val (Pmode, 2);
4720 
4721   if (TARGET_64BIT || TARGET_NO_SPACE_REGS)
4722     return rp;
4723 
4724   /* If there is no export stub then just use the value saved from
4725      the return pointer register.  */
4726 
4727   saved_rp = gen_reg_rtx (Pmode);
4728   emit_move_insn (saved_rp, rp);
4729 
4730   /* Get pointer to the instruction stream.  We have to mask out the
4731      privilege level from the two low order bits of the return address
4732      pointer here so that ins will point to the start of the first
4733      instruction that would have been executed if we returned.  */
4734   ins = copy_to_reg (gen_rtx_AND (Pmode, rp, MASK_RETURN_ADDR));
4735   label = gen_label_rtx ();
4736 
4737   if (TARGET_PA_20)
4738     {
4739       insns[0] = 0x4bc23fd1;
4740       insns[1] = -398405630;
4741       len = 2;
4742     }
4743   else
4744     {
4745       insns[0] = 0x4bc23fd1;
4746       insns[1] = 0x004010a1;
4747       insns[2] = 0x00011820;
4748       insns[3] = -532676606;
4749       len = 4;
4750     }
4751 
4752   /* Check the instruction stream at the normal return address for the
4753      export stub.  If it is an export stub, than our return address is
4754      really in -24[frameaddr].  */
4755 
4756   for (i = 0; i < len; i++)
4757     {
4758       rtx op0 = gen_rtx_MEM (SImode, plus_constant (Pmode, ins, i * 4));
4759       rtx op1 = GEN_INT (insns[i]);
4760       emit_cmp_and_jump_insns (op0, op1, NE, NULL, SImode, 0, label);
4761     }
4762 
4763   /* Here we know that our return address points to an export
4764      stub.  We don't want to return the address of the export stub,
4765      but rather the return address of the export stub.  That return
4766      address is stored at -24[frameaddr].  */
4767 
4768   emit_move_insn (saved_rp,
4769 		  gen_rtx_MEM (Pmode,
4770 			       memory_address (Pmode,
4771 					       plus_constant (Pmode, frameaddr,
4772 							      -24))));
4773 
4774   emit_label (label);
4775 
4776   return saved_rp;
4777 }
4778 
4779 void
pa_emit_bcond_fp(rtx operands[])4780 pa_emit_bcond_fp (rtx operands[])
4781 {
4782   enum rtx_code code = GET_CODE (operands[0]);
4783   rtx operand0 = operands[1];
4784   rtx operand1 = operands[2];
4785   rtx label = operands[3];
4786 
4787   emit_insn (gen_rtx_SET (gen_rtx_REG (CCFPmode, 0),
4788 		          gen_rtx_fmt_ee (code, CCFPmode, operand0, operand1)));
4789 
4790   emit_jump_insn (gen_rtx_SET (pc_rtx,
4791 			       gen_rtx_IF_THEN_ELSE (VOIDmode,
4792 						     gen_rtx_fmt_ee (NE,
4793 							      VOIDmode,
4794 							      gen_rtx_REG (CCFPmode, 0),
4795 							      const0_rtx),
4796 						     gen_rtx_LABEL_REF (VOIDmode, label),
4797 						     pc_rtx)));
4798 
4799 }
4800 
4801 /* Adjust the cost of a scheduling dependency.  Return the new cost of
4802    a dependency LINK or INSN on DEP_INSN.  COST is the current cost.  */
4803 
4804 static int
pa_adjust_cost(rtx_insn * insn,int dep_type,rtx_insn * dep_insn,int cost,unsigned int)4805 pa_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost,
4806 		unsigned int)
4807 {
4808   enum attr_type attr_type;
4809 
4810   /* Don't adjust costs for a pa8000 chip, also do not adjust any
4811      true dependencies as they are described with bypasses now.  */
4812   if (pa_cpu >= PROCESSOR_8000 || dep_type == 0)
4813     return cost;
4814 
4815   if (! recog_memoized (insn))
4816     return 0;
4817 
4818   attr_type = get_attr_type (insn);
4819 
4820   switch (dep_type)
4821     {
4822     case REG_DEP_ANTI:
4823       /* Anti dependency; DEP_INSN reads a register that INSN writes some
4824 	 cycles later.  */
4825 
4826       if (attr_type == TYPE_FPLOAD)
4827 	{
4828 	  rtx pat = PATTERN (insn);
4829 	  rtx dep_pat = PATTERN (dep_insn);
4830 	  if (GET_CODE (pat) == PARALLEL)
4831 	    {
4832 	      /* This happens for the fldXs,mb patterns.  */
4833 	      pat = XVECEXP (pat, 0, 0);
4834 	    }
4835 	  if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4836 	    /* If this happens, we have to extend this to schedule
4837 	       optimally.  Return 0 for now.  */
4838 	  return 0;
4839 
4840 	  if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
4841 	    {
4842 	      if (! recog_memoized (dep_insn))
4843 		return 0;
4844 	      switch (get_attr_type (dep_insn))
4845 		{
4846 		case TYPE_FPALU:
4847 		case TYPE_FPMULSGL:
4848 		case TYPE_FPMULDBL:
4849 		case TYPE_FPDIVSGL:
4850 		case TYPE_FPDIVDBL:
4851 		case TYPE_FPSQRTSGL:
4852 		case TYPE_FPSQRTDBL:
4853 		  /* A fpload can't be issued until one cycle before a
4854 		     preceding arithmetic operation has finished if
4855 		     the target of the fpload is any of the sources
4856 		     (or destination) of the arithmetic operation.  */
4857 		  return insn_default_latency (dep_insn) - 1;
4858 
4859 		default:
4860 		  return 0;
4861 		}
4862 	    }
4863 	}
4864       else if (attr_type == TYPE_FPALU)
4865 	{
4866 	  rtx pat = PATTERN (insn);
4867 	  rtx dep_pat = PATTERN (dep_insn);
4868 	  if (GET_CODE (pat) == PARALLEL)
4869 	    {
4870 	      /* This happens for the fldXs,mb patterns.  */
4871 	      pat = XVECEXP (pat, 0, 0);
4872 	    }
4873 	  if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4874 	    /* If this happens, we have to extend this to schedule
4875 	       optimally.  Return 0 for now.  */
4876 	  return 0;
4877 
4878 	  if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
4879 	    {
4880 	      if (! recog_memoized (dep_insn))
4881 		return 0;
4882 	      switch (get_attr_type (dep_insn))
4883 		{
4884 		case TYPE_FPDIVSGL:
4885 		case TYPE_FPDIVDBL:
4886 		case TYPE_FPSQRTSGL:
4887 		case TYPE_FPSQRTDBL:
4888 		  /* An ALU flop can't be issued until two cycles before a
4889 		     preceding divide or sqrt operation has finished if
4890 		     the target of the ALU flop is any of the sources
4891 		     (or destination) of the divide or sqrt operation.  */
4892 		  return insn_default_latency (dep_insn) - 2;
4893 
4894 		default:
4895 		  return 0;
4896 		}
4897 	    }
4898 	}
4899 
4900       /* For other anti dependencies, the cost is 0.  */
4901       return 0;
4902 
4903     case REG_DEP_OUTPUT:
4904       /* Output dependency; DEP_INSN writes a register that INSN writes some
4905 	 cycles later.  */
4906       if (attr_type == TYPE_FPLOAD)
4907 	{
4908 	  rtx pat = PATTERN (insn);
4909 	  rtx dep_pat = PATTERN (dep_insn);
4910 	  if (GET_CODE (pat) == PARALLEL)
4911 	    {
4912 	      /* This happens for the fldXs,mb patterns.  */
4913 	      pat = XVECEXP (pat, 0, 0);
4914 	    }
4915 	  if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4916 	    /* If this happens, we have to extend this to schedule
4917 	       optimally.  Return 0 for now.  */
4918 	  return 0;
4919 
4920 	  if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
4921 	    {
4922 	      if (! recog_memoized (dep_insn))
4923 		return 0;
4924 	      switch (get_attr_type (dep_insn))
4925 		{
4926 		case TYPE_FPALU:
4927 		case TYPE_FPMULSGL:
4928 		case TYPE_FPMULDBL:
4929 		case TYPE_FPDIVSGL:
4930 		case TYPE_FPDIVDBL:
4931 		case TYPE_FPSQRTSGL:
4932 		case TYPE_FPSQRTDBL:
4933 		  /* A fpload can't be issued until one cycle before a
4934 		     preceding arithmetic operation has finished if
4935 		     the target of the fpload is the destination of the
4936 		     arithmetic operation.
4937 
4938 		     Exception: For PA7100LC, PA7200 and PA7300, the cost
4939 		     is 3 cycles, unless they bundle together.   We also
4940 		     pay the penalty if the second insn is a fpload.  */
4941 		  return insn_default_latency (dep_insn) - 1;
4942 
4943 		default:
4944 		  return 0;
4945 		}
4946 	    }
4947 	}
4948       else if (attr_type == TYPE_FPALU)
4949 	{
4950 	  rtx pat = PATTERN (insn);
4951 	  rtx dep_pat = PATTERN (dep_insn);
4952 	  if (GET_CODE (pat) == PARALLEL)
4953 	    {
4954 	      /* This happens for the fldXs,mb patterns.  */
4955 	      pat = XVECEXP (pat, 0, 0);
4956 	    }
4957 	  if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4958 	    /* If this happens, we have to extend this to schedule
4959 	       optimally.  Return 0 for now.  */
4960 	  return 0;
4961 
4962 	  if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
4963 	    {
4964 	      if (! recog_memoized (dep_insn))
4965 		return 0;
4966 	      switch (get_attr_type (dep_insn))
4967 		{
4968 		case TYPE_FPDIVSGL:
4969 		case TYPE_FPDIVDBL:
4970 		case TYPE_FPSQRTSGL:
4971 		case TYPE_FPSQRTDBL:
4972 		  /* An ALU flop can't be issued until two cycles before a
4973 		     preceding divide or sqrt operation has finished if
4974 		     the target of the ALU flop is also the target of
4975 		     the divide or sqrt operation.  */
4976 		  return insn_default_latency (dep_insn) - 2;
4977 
4978 		default:
4979 		  return 0;
4980 		}
4981 	    }
4982 	}
4983 
4984       /* For other output dependencies, the cost is 0.  */
4985       return 0;
4986 
4987     default:
4988       gcc_unreachable ();
4989     }
4990 }
4991 
4992 /* The 700 can only issue a single insn at a time.
4993    The 7XXX processors can issue two insns at a time.
4994    The 8000 can issue 4 insns at a time.  */
4995 static int
pa_issue_rate(void)4996 pa_issue_rate (void)
4997 {
4998   switch (pa_cpu)
4999     {
5000     case PROCESSOR_700:		return 1;
5001     case PROCESSOR_7100:	return 2;
5002     case PROCESSOR_7100LC:	return 2;
5003     case PROCESSOR_7200:	return 2;
5004     case PROCESSOR_7300:	return 2;
5005     case PROCESSOR_8000:	return 4;
5006 
5007     default:
5008       gcc_unreachable ();
5009     }
5010 }
5011 
5012 
5013 
5014 /* Return any length plus adjustment needed by INSN which already has
5015    its length computed as LENGTH.   Return LENGTH if no adjustment is
5016    necessary.
5017 
5018    Also compute the length of an inline block move here as it is too
5019    complicated to express as a length attribute in pa.md.  */
5020 int
pa_adjust_insn_length(rtx_insn * insn,int length)5021 pa_adjust_insn_length (rtx_insn *insn, int length)
5022 {
5023   rtx pat = PATTERN (insn);
5024 
5025   /* If length is negative or undefined, provide initial length.  */
5026   if ((unsigned int) length >= INT_MAX)
5027     {
5028       if (GET_CODE (pat) == SEQUENCE)
5029 	insn = as_a <rtx_insn *> (XVECEXP (pat, 0, 0));
5030 
5031       switch (get_attr_type (insn))
5032 	{
5033 	case TYPE_MILLI:
5034 	  length = pa_attr_length_millicode_call (insn);
5035 	  break;
5036 	case TYPE_CALL:
5037 	  length = pa_attr_length_call (insn, 0);
5038 	  break;
5039 	case TYPE_SIBCALL:
5040 	  length = pa_attr_length_call (insn, 1);
5041 	  break;
5042 	case TYPE_DYNCALL:
5043 	  length = pa_attr_length_indirect_call (insn);
5044 	  break;
5045 	case TYPE_SH_FUNC_ADRS:
5046 	  length = pa_attr_length_millicode_call (insn) + 20;
5047 	  break;
5048 	default:
5049 	  gcc_unreachable ();
5050 	}
5051     }
5052 
5053   /* Block move pattern.  */
5054   if (NONJUMP_INSN_P (insn)
5055       && GET_CODE (pat) == PARALLEL
5056       && GET_CODE (XVECEXP (pat, 0, 0)) == SET
5057       && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM
5058       && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 1)) == MEM
5059       && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode
5060       && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 1)) == BLKmode)
5061     length += compute_cpymem_length (insn) - 4;
5062   /* Block clear pattern.  */
5063   else if (NONJUMP_INSN_P (insn)
5064 	   && GET_CODE (pat) == PARALLEL
5065 	   && GET_CODE (XVECEXP (pat, 0, 0)) == SET
5066 	   && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM
5067 	   && XEXP (XVECEXP (pat, 0, 0), 1) == const0_rtx
5068 	   && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode)
5069     length += compute_clrmem_length (insn) - 4;
5070   /* Conditional branch with an unfilled delay slot.  */
5071   else if (JUMP_P (insn) && ! simplejump_p (insn))
5072     {
5073       /* Adjust a short backwards conditional with an unfilled delay slot.  */
5074       if (GET_CODE (pat) == SET
5075 	  && length == 4
5076 	  && JUMP_LABEL (insn) != NULL_RTX
5077 	  && ! forward_branch_p (insn))
5078 	length += 4;
5079       else if (GET_CODE (pat) == PARALLEL
5080 	       && get_attr_type (insn) == TYPE_PARALLEL_BRANCH
5081 	       && length == 4)
5082 	length += 4;
5083       /* Adjust dbra insn with short backwards conditional branch with
5084 	 unfilled delay slot -- only for case where counter is in a
5085 	 general register register.  */
5086       else if (GET_CODE (pat) == PARALLEL
5087 	       && GET_CODE (XVECEXP (pat, 0, 1)) == SET
5088 	       && GET_CODE (XEXP (XVECEXP (pat, 0, 1), 0)) == REG
5089  	       && ! FP_REG_P (XEXP (XVECEXP (pat, 0, 1), 0))
5090 	       && length == 4
5091 	       && ! forward_branch_p (insn))
5092 	length += 4;
5093     }
5094   return length;
5095 }
5096 
5097 /* Implement the TARGET_PRINT_OPERAND_PUNCT_VALID_P hook.  */
5098 
5099 static bool
pa_print_operand_punct_valid_p(unsigned char code)5100 pa_print_operand_punct_valid_p (unsigned char code)
5101 {
5102   if (code == '@'
5103       || code == '#'
5104       || code == '*'
5105       || code == '^')
5106     return true;
5107 
5108   return false;
5109 }
5110 
5111 /* Print operand X (an rtx) in assembler syntax to file FILE.
5112    CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
5113    For `%' followed by punctuation, CODE is the punctuation and X is null.  */
5114 
5115 void
pa_print_operand(FILE * file,rtx x,int code)5116 pa_print_operand (FILE *file, rtx x, int code)
5117 {
5118   switch (code)
5119     {
5120     case '#':
5121       /* Output a 'nop' if there's nothing for the delay slot.  */
5122       if (dbr_sequence_length () == 0)
5123 	fputs ("\n\tnop", file);
5124       return;
5125     case '*':
5126       /* Output a nullification completer if there's nothing for the */
5127       /* delay slot or nullification is requested.  */
5128       if (dbr_sequence_length () == 0 ||
5129 	  (final_sequence &&
5130 	   INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))))
5131         fputs (",n", file);
5132       return;
5133     case 'R':
5134       /* Print out the second register name of a register pair.
5135 	 I.e., R (6) => 7.  */
5136       fputs (reg_names[REGNO (x) + 1], file);
5137       return;
5138     case 'r':
5139       /* A register or zero.  */
5140       if (x == const0_rtx
5141 	  || (x == CONST0_RTX (DFmode))
5142 	  || (x == CONST0_RTX (SFmode)))
5143 	{
5144 	  fputs ("%r0", file);
5145 	  return;
5146 	}
5147       else
5148 	break;
5149     case 'f':
5150       /* A register or zero (floating point).  */
5151       if (x == const0_rtx
5152 	  || (x == CONST0_RTX (DFmode))
5153 	  || (x == CONST0_RTX (SFmode)))
5154 	{
5155 	  fputs ("%fr0", file);
5156 	  return;
5157 	}
5158       else
5159 	break;
5160     case 'A':
5161       {
5162 	rtx xoperands[2];
5163 
5164 	xoperands[0] = XEXP (XEXP (x, 0), 0);
5165 	xoperands[1] = XVECEXP (XEXP (XEXP (x, 0), 1), 0, 0);
5166 	pa_output_global_address (file, xoperands[1], 0);
5167         fprintf (file, "(%s)", reg_names [REGNO (xoperands[0])]);
5168 	return;
5169       }
5170 
5171     case 'C':			/* Plain (C)ondition */
5172     case 'X':
5173       switch (GET_CODE (x))
5174 	{
5175 	case EQ:
5176 	  fputs ("=", file);  break;
5177 	case NE:
5178 	  fputs ("<>", file);  break;
5179 	case GT:
5180 	  fputs (">", file);  break;
5181 	case GE:
5182 	  fputs (">=", file);  break;
5183 	case GEU:
5184 	  fputs (">>=", file);  break;
5185 	case GTU:
5186 	  fputs (">>", file);  break;
5187 	case LT:
5188 	  fputs ("<", file);  break;
5189 	case LE:
5190 	  fputs ("<=", file);  break;
5191 	case LEU:
5192 	  fputs ("<<=", file);  break;
5193 	case LTU:
5194 	  fputs ("<<", file);  break;
5195 	default:
5196 	  gcc_unreachable ();
5197 	}
5198       return;
5199     case 'N':			/* Condition, (N)egated */
5200       switch (GET_CODE (x))
5201 	{
5202 	case EQ:
5203 	  fputs ("<>", file);  break;
5204 	case NE:
5205 	  fputs ("=", file);  break;
5206 	case GT:
5207 	  fputs ("<=", file);  break;
5208 	case GE:
5209 	  fputs ("<", file);  break;
5210 	case GEU:
5211 	  fputs ("<<", file);  break;
5212 	case GTU:
5213 	  fputs ("<<=", file);  break;
5214 	case LT:
5215 	  fputs (">=", file);  break;
5216 	case LE:
5217 	  fputs (">", file);  break;
5218 	case LEU:
5219 	  fputs (">>", file);  break;
5220 	case LTU:
5221 	  fputs (">>=", file);  break;
5222 	default:
5223 	  gcc_unreachable ();
5224 	}
5225       return;
5226     /* For floating point comparisons.  Note that the output
5227        predicates are the complement of the desired mode.  The
5228        conditions for GT, GE, LT, LE and LTGT cause an invalid
5229        operation exception if the result is unordered and this
5230        exception is enabled in the floating-point status register.  */
5231     case 'Y':
5232       switch (GET_CODE (x))
5233 	{
5234 	case EQ:
5235 	  fputs ("!=", file);  break;
5236 	case NE:
5237 	  fputs ("=", file);  break;
5238 	case GT:
5239 	  fputs ("!>", file);  break;
5240 	case GE:
5241 	  fputs ("!>=", file);  break;
5242 	case LT:
5243 	  fputs ("!<", file);  break;
5244 	case LE:
5245 	  fputs ("!<=", file);  break;
5246 	case LTGT:
5247 	  fputs ("!<>", file);  break;
5248 	case UNLE:
5249 	  fputs ("!?<=", file);  break;
5250 	case UNLT:
5251 	  fputs ("!?<", file);  break;
5252 	case UNGE:
5253 	  fputs ("!?>=", file);  break;
5254 	case UNGT:
5255 	  fputs ("!?>", file);  break;
5256 	case UNEQ:
5257 	  fputs ("!?=", file);  break;
5258 	case UNORDERED:
5259 	  fputs ("!?", file);  break;
5260 	case ORDERED:
5261 	  fputs ("?", file);  break;
5262 	default:
5263 	  gcc_unreachable ();
5264 	}
5265       return;
5266     case 'S':			/* Condition, operands are (S)wapped.  */
5267       switch (GET_CODE (x))
5268 	{
5269 	case EQ:
5270 	  fputs ("=", file);  break;
5271 	case NE:
5272 	  fputs ("<>", file);  break;
5273 	case GT:
5274 	  fputs ("<", file);  break;
5275 	case GE:
5276 	  fputs ("<=", file);  break;
5277 	case GEU:
5278 	  fputs ("<<=", file);  break;
5279 	case GTU:
5280 	  fputs ("<<", file);  break;
5281 	case LT:
5282 	  fputs (">", file);  break;
5283 	case LE:
5284 	  fputs (">=", file);  break;
5285 	case LEU:
5286 	  fputs (">>=", file);  break;
5287 	case LTU:
5288 	  fputs (">>", file);  break;
5289 	default:
5290 	  gcc_unreachable ();
5291 	}
5292       return;
5293     case 'B':			/* Condition, (B)oth swapped and negate.  */
5294       switch (GET_CODE (x))
5295 	{
5296 	case EQ:
5297 	  fputs ("<>", file);  break;
5298 	case NE:
5299 	  fputs ("=", file);  break;
5300 	case GT:
5301 	  fputs (">=", file);  break;
5302 	case GE:
5303 	  fputs (">", file);  break;
5304 	case GEU:
5305 	  fputs (">>", file);  break;
5306 	case GTU:
5307 	  fputs (">>=", file);  break;
5308 	case LT:
5309 	  fputs ("<=", file);  break;
5310 	case LE:
5311 	  fputs ("<", file);  break;
5312 	case LEU:
5313 	  fputs ("<<", file);  break;
5314 	case LTU:
5315 	  fputs ("<<=", file);  break;
5316 	default:
5317 	  gcc_unreachable ();
5318 	}
5319       return;
5320     case 'k':
5321       gcc_assert (GET_CODE (x) == CONST_INT);
5322       fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~INTVAL (x));
5323       return;
5324     case 'Q':
5325       gcc_assert (GET_CODE (x) == CONST_INT);
5326       fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - (INTVAL (x) & 63));
5327       return;
5328     case 'L':
5329       gcc_assert (GET_CODE (x) == CONST_INT);
5330       fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - (INTVAL (x) & 31));
5331       return;
5332     case 'o':
5333       gcc_assert (GET_CODE (x) == CONST_INT
5334 		  && (INTVAL (x) == 1 || INTVAL (x) == 2 || INTVAL (x) == 3));
5335       fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
5336       return;
5337     case 'O':
5338       gcc_assert (GET_CODE (x) == CONST_INT && exact_log2 (INTVAL (x)) >= 0);
5339       fprintf (file, "%d", exact_log2 (INTVAL (x)));
5340       return;
5341     case 'p':
5342       gcc_assert (GET_CODE (x) == CONST_INT);
5343       fprintf (file, HOST_WIDE_INT_PRINT_DEC, 63 - (INTVAL (x) & 63));
5344       return;
5345     case 'P':
5346       gcc_assert (GET_CODE (x) == CONST_INT);
5347       fprintf (file, HOST_WIDE_INT_PRINT_DEC, 31 - (INTVAL (x) & 31));
5348       return;
5349     case 'I':
5350       if (GET_CODE (x) == CONST_INT)
5351 	fputs ("i", file);
5352       return;
5353     case 'M':
5354     case 'F':
5355       switch (GET_CODE (XEXP (x, 0)))
5356 	{
5357 	case PRE_DEC:
5358 	case PRE_INC:
5359 	  if (ASSEMBLER_DIALECT == 0)
5360 	    fputs ("s,mb", file);
5361 	  else
5362 	    fputs (",mb", file);
5363 	  break;
5364 	case POST_DEC:
5365 	case POST_INC:
5366 	  if (ASSEMBLER_DIALECT == 0)
5367 	    fputs ("s,ma", file);
5368 	  else
5369 	    fputs (",ma", file);
5370 	  break;
5371 	case PLUS:
5372 	  if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
5373 	      && GET_CODE (XEXP (XEXP (x, 0), 1)) == REG)
5374 	    {
5375 	      if (ASSEMBLER_DIALECT == 0)
5376 		fputs ("x", file);
5377 	    }
5378 	  else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
5379 		   || GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
5380 	    {
5381 	      if (ASSEMBLER_DIALECT == 0)
5382 		fputs ("x,s", file);
5383 	      else
5384 		fputs (",s", file);
5385 	    }
5386 	  else if (code == 'F' && ASSEMBLER_DIALECT == 0)
5387 	    fputs ("s", file);
5388 	  break;
5389 	default:
5390 	  if (code == 'F' && ASSEMBLER_DIALECT == 0)
5391 	    fputs ("s", file);
5392 	  break;
5393 	}
5394       return;
5395     case 'G':
5396       pa_output_global_address (file, x, 0);
5397       return;
5398     case 'H':
5399       pa_output_global_address (file, x, 1);
5400       return;
5401     case 0:			/* Don't do anything special */
5402       break;
5403     case 'Z':
5404       {
5405 	unsigned op[3];
5406 	compute_zdepwi_operands (INTVAL (x), op);
5407 	fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
5408 	return;
5409       }
5410     case 'z':
5411       {
5412 	unsigned op[3];
5413 	compute_zdepdi_operands (INTVAL (x), op);
5414 	fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
5415 	return;
5416       }
5417     case 'c':
5418       /* We can get here from a .vtable_inherit due to our
5419 	 CONSTANT_ADDRESS_P rejecting perfectly good constant
5420 	 addresses.  */
5421       break;
5422     default:
5423       gcc_unreachable ();
5424     }
5425   if (GET_CODE (x) == REG)
5426     {
5427       fputs (reg_names [REGNO (x)], file);
5428       if (TARGET_64BIT && FP_REG_P (x) && GET_MODE_SIZE (GET_MODE (x)) <= 4)
5429 	{
5430 	  fputs ("R", file);
5431 	  return;
5432 	}
5433       if (FP_REG_P (x)
5434 	  && GET_MODE_SIZE (GET_MODE (x)) <= 4
5435 	  && (REGNO (x) & 1) == 0)
5436 	fputs ("L", file);
5437     }
5438   else if (GET_CODE (x) == MEM)
5439     {
5440       int size = GET_MODE_SIZE (GET_MODE (x));
5441       rtx base = NULL_RTX;
5442       switch (GET_CODE (XEXP (x, 0)))
5443 	{
5444 	case PRE_DEC:
5445 	case POST_DEC:
5446           base = XEXP (XEXP (x, 0), 0);
5447 	  fprintf (file, "-%d(%s)", size, reg_names [REGNO (base)]);
5448 	  break;
5449 	case PRE_INC:
5450 	case POST_INC:
5451           base = XEXP (XEXP (x, 0), 0);
5452 	  fprintf (file, "%d(%s)", size, reg_names [REGNO (base)]);
5453 	  break;
5454 	case PLUS:
5455 	  if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT)
5456 	    fprintf (file, "%s(%s)",
5457 		     reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 0), 0))],
5458 		     reg_names [REGNO (XEXP (XEXP (x, 0), 1))]);
5459 	  else if (GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
5460 	    fprintf (file, "%s(%s)",
5461 		     reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 1), 0))],
5462 		     reg_names [REGNO (XEXP (XEXP (x, 0), 0))]);
5463 	  else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
5464 		   && GET_CODE (XEXP (XEXP (x, 0), 1)) == REG)
5465 	    {
5466 	      /* Because the REG_POINTER flag can get lost during reload,
5467 		 pa_legitimate_address_p canonicalizes the order of the
5468 		 index and base registers in the combined move patterns.  */
5469 	      rtx base = XEXP (XEXP (x, 0), 1);
5470 	      rtx index = XEXP (XEXP (x, 0), 0);
5471 
5472 	      fprintf (file, "%s(%s)",
5473 		       reg_names [REGNO (index)], reg_names [REGNO (base)]);
5474 	    }
5475 	  else
5476 	    output_address (GET_MODE (x), XEXP (x, 0));
5477 	  break;
5478 	default:
5479 	  output_address (GET_MODE (x), XEXP (x, 0));
5480 	  break;
5481 	}
5482     }
5483   else
5484     output_addr_const (file, x);
5485 }
5486 
5487 /* output a SYMBOL_REF or a CONST expression involving a SYMBOL_REF.  */
5488 
5489 void
pa_output_global_address(FILE * file,rtx x,int round_constant)5490 pa_output_global_address (FILE *file, rtx x, int round_constant)
5491 {
5492 
5493   /* Imagine  (high (const (plus ...))).  */
5494   if (GET_CODE (x) == HIGH)
5495     x = XEXP (x, 0);
5496 
5497   if (GET_CODE (x) == SYMBOL_REF && read_only_operand (x, VOIDmode))
5498     output_addr_const (file, x);
5499   else if (GET_CODE (x) == SYMBOL_REF && !flag_pic)
5500     {
5501       output_addr_const (file, x);
5502       fputs ("-$global$", file);
5503     }
5504   else if (GET_CODE (x) == CONST)
5505     {
5506       const char *sep = "";
5507       int offset = 0;		/* assembler wants -$global$ at end */
5508       rtx base = NULL_RTX;
5509 
5510       switch (GET_CODE (XEXP (XEXP (x, 0), 0)))
5511 	{
5512 	case LABEL_REF:
5513 	case SYMBOL_REF:
5514 	  base = XEXP (XEXP (x, 0), 0);
5515 	  output_addr_const (file, base);
5516 	  break;
5517 	case CONST_INT:
5518 	  offset = INTVAL (XEXP (XEXP (x, 0), 0));
5519 	  break;
5520 	default:
5521 	  gcc_unreachable ();
5522 	}
5523 
5524       switch (GET_CODE (XEXP (XEXP (x, 0), 1)))
5525 	{
5526 	case LABEL_REF:
5527 	case SYMBOL_REF:
5528 	  base = XEXP (XEXP (x, 0), 1);
5529 	  output_addr_const (file, base);
5530 	  break;
5531 	case CONST_INT:
5532 	  offset = INTVAL (XEXP (XEXP (x, 0), 1));
5533 	  break;
5534 	default:
5535 	  gcc_unreachable ();
5536 	}
5537 
5538       /* How bogus.  The compiler is apparently responsible for
5539 	 rounding the constant if it uses an LR field selector.
5540 
5541 	 The linker and/or assembler seem a better place since
5542 	 they have to do this kind of thing already.
5543 
5544 	 If we fail to do this, HP's optimizing linker may eliminate
5545 	 an addil, but not update the ldw/stw/ldo instruction that
5546 	 uses the result of the addil.  */
5547       if (round_constant)
5548 	offset = ((offset + 0x1000) & ~0x1fff);
5549 
5550       switch (GET_CODE (XEXP (x, 0)))
5551 	{
5552 	case PLUS:
5553 	  if (offset < 0)
5554 	    {
5555 	      offset = -offset;
5556 	      sep = "-";
5557 	    }
5558 	  else
5559 	    sep = "+";
5560 	  break;
5561 
5562 	case MINUS:
5563 	  gcc_assert (GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF);
5564 	  sep = "-";
5565 	  break;
5566 
5567 	default:
5568 	  gcc_unreachable ();
5569 	}
5570 
5571       if (!read_only_operand (base, VOIDmode) && !flag_pic)
5572 	fputs ("-$global$", file);
5573       if (offset)
5574 	fprintf (file, "%s%d", sep, offset);
5575     }
5576   else
5577     output_addr_const (file, x);
5578 }
5579 
5580 /* Output boilerplate text to appear at the beginning of the file.
5581    There are several possible versions.  */
5582 #define aputs(x) fputs(x, asm_out_file)
5583 static inline void
pa_file_start_level(void)5584 pa_file_start_level (void)
5585 {
5586   if (TARGET_64BIT)
5587     aputs ("\t.LEVEL 2.0w\n");
5588   else if (TARGET_PA_20)
5589     aputs ("\t.LEVEL 2.0\n");
5590   else if (TARGET_PA_11)
5591     aputs ("\t.LEVEL 1.1\n");
5592   else
5593     aputs ("\t.LEVEL 1.0\n");
5594 }
5595 
5596 static inline void
pa_file_start_space(int sortspace)5597 pa_file_start_space (int sortspace)
5598 {
5599   aputs ("\t.SPACE $PRIVATE$");
5600   if (sortspace)
5601     aputs (",SORT=16");
5602   aputs ("\n\t.SUBSPA $DATA$,QUAD=1,ALIGN=8,ACCESS=31");
5603   if (flag_tm)
5604     aputs ("\n\t.SUBSPA $TM_CLONE_TABLE$,QUAD=1,ALIGN=8,ACCESS=31");
5605   aputs ("\n\t.SUBSPA $BSS$,QUAD=1,ALIGN=8,ACCESS=31,ZERO,SORT=82"
5606 	 "\n\t.SPACE $TEXT$");
5607   if (sortspace)
5608     aputs (",SORT=8");
5609   aputs ("\n\t.SUBSPA $LIT$,QUAD=0,ALIGN=8,ACCESS=44"
5610 	 "\n\t.SUBSPA $CODE$,QUAD=0,ALIGN=8,ACCESS=44,CODE_ONLY\n");
5611 }
5612 
5613 static inline void
pa_file_start_file(int want_version)5614 pa_file_start_file (int want_version)
5615 {
5616   if (write_symbols != NO_DEBUG)
5617     {
5618       output_file_directive (asm_out_file, main_input_filename);
5619       if (want_version)
5620 	aputs ("\t.version\t\"01.01\"\n");
5621     }
5622 }
5623 
5624 static inline void
pa_file_start_mcount(const char * aswhat)5625 pa_file_start_mcount (const char *aswhat)
5626 {
5627   if (profile_flag)
5628     fprintf (asm_out_file, "\t.IMPORT _mcount,%s\n", aswhat);
5629 }
5630 
5631 static void
pa_elf_file_start(void)5632 pa_elf_file_start (void)
5633 {
5634   pa_file_start_level ();
5635   pa_file_start_mcount ("ENTRY");
5636   pa_file_start_file (0);
5637 }
5638 
5639 static void
pa_som_file_start(void)5640 pa_som_file_start (void)
5641 {
5642   pa_file_start_level ();
5643   pa_file_start_space (0);
5644   aputs ("\t.IMPORT $global$,DATA\n"
5645          "\t.IMPORT $$dyncall,MILLICODE\n");
5646   pa_file_start_mcount ("CODE");
5647   pa_file_start_file (0);
5648 }
5649 
5650 static void
pa_linux_file_start(void)5651 pa_linux_file_start (void)
5652 {
5653   pa_file_start_file (1);
5654   pa_file_start_level ();
5655   pa_file_start_mcount ("CODE");
5656 }
5657 
5658 static void
pa_hpux64_gas_file_start(void)5659 pa_hpux64_gas_file_start (void)
5660 {
5661   pa_file_start_level ();
5662 #ifdef ASM_OUTPUT_TYPE_DIRECTIVE
5663   if (profile_flag)
5664     ASM_OUTPUT_TYPE_DIRECTIVE (asm_out_file, "_mcount", "function");
5665 #endif
5666   pa_file_start_file (1);
5667 }
5668 
5669 static void
pa_hpux64_hpas_file_start(void)5670 pa_hpux64_hpas_file_start (void)
5671 {
5672   pa_file_start_level ();
5673   pa_file_start_space (1);
5674   pa_file_start_mcount ("CODE");
5675   pa_file_start_file (0);
5676 }
5677 #undef aputs
5678 
5679 /* Search the deferred plabel list for SYMBOL and return its internal
5680    label.  If an entry for SYMBOL is not found, a new entry is created.  */
5681 
5682 rtx
pa_get_deferred_plabel(rtx symbol)5683 pa_get_deferred_plabel (rtx symbol)
5684 {
5685   const char *fname = XSTR (symbol, 0);
5686   size_t i;
5687 
5688   /* See if we have already put this function on the list of deferred
5689      plabels.  This list is generally small, so a liner search is not
5690      too ugly.  If it proves too slow replace it with something faster.  */
5691   for (i = 0; i < n_deferred_plabels; i++)
5692     if (strcmp (fname, XSTR (deferred_plabels[i].symbol, 0)) == 0)
5693       break;
5694 
5695   /* If the deferred plabel list is empty, or this entry was not found
5696      on the list, create a new entry on the list.  */
5697   if (deferred_plabels == NULL || i == n_deferred_plabels)
5698     {
5699       tree id;
5700 
5701       if (deferred_plabels == 0)
5702 	deferred_plabels =  ggc_alloc<deferred_plabel> ();
5703       else
5704         deferred_plabels = GGC_RESIZEVEC (struct deferred_plabel,
5705                                           deferred_plabels,
5706                                           n_deferred_plabels + 1);
5707 
5708       i = n_deferred_plabels++;
5709       deferred_plabels[i].internal_label = gen_label_rtx ();
5710       deferred_plabels[i].symbol = symbol;
5711 
5712       /* Gross.  We have just implicitly taken the address of this
5713 	 function.  Mark it in the same manner as assemble_name.  */
5714       id = maybe_get_identifier (targetm.strip_name_encoding (fname));
5715       if (id)
5716 	mark_referenced (id);
5717     }
5718 
5719   return deferred_plabels[i].internal_label;
5720 }
5721 
5722 static void
output_deferred_plabels(void)5723 output_deferred_plabels (void)
5724 {
5725   size_t i;
5726 
5727   /* If we have some deferred plabels, then we need to switch into the
5728      data or readonly data section, and align it to a 4 byte boundary
5729      before outputting the deferred plabels.  */
5730   if (n_deferred_plabels)
5731     {
5732       switch_to_section (flag_pic ? data_section : readonly_data_section);
5733       ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
5734     }
5735 
5736   /* Now output the deferred plabels.  */
5737   for (i = 0; i < n_deferred_plabels; i++)
5738     {
5739       targetm.asm_out.internal_label (asm_out_file, "L",
5740 		 CODE_LABEL_NUMBER (deferred_plabels[i].internal_label));
5741       assemble_integer (deferred_plabels[i].symbol,
5742 			TARGET_64BIT ? 8 : 4, TARGET_64BIT ? 64 : 32, 1);
5743     }
5744 }
5745 
5746 /* Initialize optabs to point to emulation routines.  */
5747 
5748 static void
pa_init_libfuncs(void)5749 pa_init_libfuncs (void)
5750 {
5751   if (HPUX_LONG_DOUBLE_LIBRARY)
5752     {
5753       set_optab_libfunc (add_optab, TFmode, "_U_Qfadd");
5754       set_optab_libfunc (sub_optab, TFmode, "_U_Qfsub");
5755       set_optab_libfunc (smul_optab, TFmode, "_U_Qfmpy");
5756       set_optab_libfunc (sdiv_optab, TFmode, "_U_Qfdiv");
5757       set_optab_libfunc (smin_optab, TFmode, "_U_Qmin");
5758       set_optab_libfunc (smax_optab, TFmode, "_U_Qfmax");
5759       set_optab_libfunc (sqrt_optab, TFmode, "_U_Qfsqrt");
5760       set_optab_libfunc (abs_optab, TFmode, "_U_Qfabs");
5761       set_optab_libfunc (neg_optab, TFmode, "_U_Qfneg");
5762 
5763       set_optab_libfunc (eq_optab, TFmode, "_U_Qfeq");
5764       set_optab_libfunc (ne_optab, TFmode, "_U_Qfne");
5765       set_optab_libfunc (gt_optab, TFmode, "_U_Qfgt");
5766       set_optab_libfunc (ge_optab, TFmode, "_U_Qfge");
5767       set_optab_libfunc (lt_optab, TFmode, "_U_Qflt");
5768       set_optab_libfunc (le_optab, TFmode, "_U_Qfle");
5769       set_optab_libfunc (unord_optab, TFmode, "_U_Qfunord");
5770 
5771       set_conv_libfunc (sext_optab, TFmode, SFmode, "_U_Qfcnvff_sgl_to_quad");
5772       set_conv_libfunc (sext_optab, TFmode, DFmode, "_U_Qfcnvff_dbl_to_quad");
5773       set_conv_libfunc (trunc_optab, SFmode, TFmode, "_U_Qfcnvff_quad_to_sgl");
5774       set_conv_libfunc (trunc_optab, DFmode, TFmode, "_U_Qfcnvff_quad_to_dbl");
5775 
5776       set_conv_libfunc (sfix_optab, SImode, TFmode,
5777 			TARGET_64BIT ? "__U_Qfcnvfxt_quad_to_sgl"
5778 				     : "_U_Qfcnvfxt_quad_to_sgl");
5779       set_conv_libfunc (sfix_optab, DImode, TFmode,
5780 			"_U_Qfcnvfxt_quad_to_dbl");
5781       set_conv_libfunc (ufix_optab, SImode, TFmode,
5782 			"_U_Qfcnvfxt_quad_to_usgl");
5783       set_conv_libfunc (ufix_optab, DImode, TFmode,
5784 			"_U_Qfcnvfxt_quad_to_udbl");
5785 
5786       set_conv_libfunc (sfloat_optab, TFmode, SImode,
5787 			"_U_Qfcnvxf_sgl_to_quad");
5788       set_conv_libfunc (sfloat_optab, TFmode, DImode,
5789 			"_U_Qfcnvxf_dbl_to_quad");
5790       set_conv_libfunc (ufloat_optab, TFmode, SImode,
5791 			"_U_Qfcnvxf_usgl_to_quad");
5792       set_conv_libfunc (ufloat_optab, TFmode, DImode,
5793 			"_U_Qfcnvxf_udbl_to_quad");
5794     }
5795 
5796   if (TARGET_SYNC_LIBCALL)
5797     init_sync_libfuncs (8);
5798 }
5799 
5800 /* HP's millicode routines mean something special to the assembler.
5801    Keep track of which ones we have used.  */
5802 
5803 enum millicodes { remI, remU, divI, divU, mulI, end1000 };
5804 static void import_milli (enum millicodes);
5805 static char imported[(int) end1000];
5806 static const char * const milli_names[] = {"remI", "remU", "divI", "divU", "mulI"};
5807 static const char import_string[] = ".IMPORT $$....,MILLICODE";
5808 #define MILLI_START 10
5809 
5810 static void
import_milli(enum millicodes code)5811 import_milli (enum millicodes code)
5812 {
5813   char str[sizeof (import_string)];
5814 
5815   if (!imported[(int) code])
5816     {
5817       imported[(int) code] = 1;
5818       strcpy (str, import_string);
5819       strncpy (str + MILLI_START, milli_names[(int) code], 4);
5820       output_asm_insn (str, 0);
5821     }
5822 }
5823 
5824 /* The register constraints have put the operands and return value in
5825    the proper registers.  */
5826 
5827 const char *
pa_output_mul_insn(int unsignedp ATTRIBUTE_UNUSED,rtx_insn * insn)5828 pa_output_mul_insn (int unsignedp ATTRIBUTE_UNUSED, rtx_insn *insn)
5829 {
5830   import_milli (mulI);
5831   return pa_output_millicode_call (insn, gen_rtx_SYMBOL_REF (Pmode, "$$mulI"));
5832 }
5833 
5834 /* Emit the rtl for doing a division by a constant.  */
5835 
5836 /* Do magic division millicodes exist for this value? */
5837 const int pa_magic_milli[]= {0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1};
5838 
5839 /* We'll use an array to keep track of the magic millicodes and
5840    whether or not we've used them already. [n][0] is signed, [n][1] is
5841    unsigned.  */
5842 
5843 static int div_milli[16][2];
5844 
5845 int
pa_emit_hpdiv_const(rtx * operands,int unsignedp)5846 pa_emit_hpdiv_const (rtx *operands, int unsignedp)
5847 {
5848   if (GET_CODE (operands[2]) == CONST_INT
5849       && INTVAL (operands[2]) > 0
5850       && INTVAL (operands[2]) < 16
5851       && pa_magic_milli[INTVAL (operands[2])])
5852     {
5853       rtx ret = gen_rtx_REG (SImode, TARGET_64BIT ? 2 : 31);
5854 
5855       emit_move_insn (gen_rtx_REG (SImode, 26), operands[1]);
5856       emit
5857 	(gen_rtx_PARALLEL
5858 	 (VOIDmode,
5859 	  gen_rtvec (6, gen_rtx_SET (gen_rtx_REG (SImode, 29),
5860 				     gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
5861 						     SImode,
5862 						     gen_rtx_REG (SImode, 26),
5863 						     operands[2])),
5864 		     gen_rtx_CLOBBER (VOIDmode, operands[4]),
5865 		     gen_rtx_CLOBBER (VOIDmode, operands[3]),
5866 		     gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 26)),
5867 		     gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 25)),
5868 		     gen_rtx_CLOBBER (VOIDmode, ret))));
5869       emit_move_insn (operands[0], gen_rtx_REG (SImode, 29));
5870       return 1;
5871     }
5872   return 0;
5873 }
5874 
5875 const char *
pa_output_div_insn(rtx * operands,int unsignedp,rtx_insn * insn)5876 pa_output_div_insn (rtx *operands, int unsignedp, rtx_insn *insn)
5877 {
5878   int divisor;
5879 
5880   /* If the divisor is a constant, try to use one of the special
5881      opcodes .*/
5882   if (GET_CODE (operands[0]) == CONST_INT)
5883     {
5884       static char buf[100];
5885       divisor = INTVAL (operands[0]);
5886       if (!div_milli[divisor][unsignedp])
5887 	{
5888 	  div_milli[divisor][unsignedp] = 1;
5889 	  if (unsignedp)
5890 	    output_asm_insn (".IMPORT $$divU_%0,MILLICODE", operands);
5891 	  else
5892 	    output_asm_insn (".IMPORT $$divI_%0,MILLICODE", operands);
5893 	}
5894       if (unsignedp)
5895 	{
5896 	  sprintf (buf, "$$divU_" HOST_WIDE_INT_PRINT_DEC,
5897 		   INTVAL (operands[0]));
5898 	  return pa_output_millicode_call (insn,
5899 					   gen_rtx_SYMBOL_REF (SImode, buf));
5900 	}
5901       else
5902 	{
5903 	  sprintf (buf, "$$divI_" HOST_WIDE_INT_PRINT_DEC,
5904 		   INTVAL (operands[0]));
5905 	  return pa_output_millicode_call (insn,
5906 					   gen_rtx_SYMBOL_REF (SImode, buf));
5907 	}
5908     }
5909   /* Divisor isn't a special constant.  */
5910   else
5911     {
5912       if (unsignedp)
5913 	{
5914 	  import_milli (divU);
5915 	  return pa_output_millicode_call (insn,
5916 					gen_rtx_SYMBOL_REF (SImode, "$$divU"));
5917 	}
5918       else
5919 	{
5920 	  import_milli (divI);
5921 	  return pa_output_millicode_call (insn,
5922 					gen_rtx_SYMBOL_REF (SImode, "$$divI"));
5923 	}
5924     }
5925 }
5926 
5927 /* Output a $$rem millicode to do mod.  */
5928 
5929 const char *
pa_output_mod_insn(int unsignedp,rtx_insn * insn)5930 pa_output_mod_insn (int unsignedp, rtx_insn *insn)
5931 {
5932   if (unsignedp)
5933     {
5934       import_milli (remU);
5935       return pa_output_millicode_call (insn,
5936 				       gen_rtx_SYMBOL_REF (SImode, "$$remU"));
5937     }
5938   else
5939     {
5940       import_milli (remI);
5941       return pa_output_millicode_call (insn,
5942 				       gen_rtx_SYMBOL_REF (SImode, "$$remI"));
5943     }
5944 }
5945 
5946 void
pa_output_arg_descriptor(rtx_insn * call_insn)5947 pa_output_arg_descriptor (rtx_insn *call_insn)
5948 {
5949   const char *arg_regs[4];
5950   machine_mode arg_mode;
5951   rtx link;
5952   int i, output_flag = 0;
5953   int regno;
5954 
5955   /* We neither need nor want argument location descriptors for the
5956      64bit runtime environment or the ELF32 environment.  */
5957   if (TARGET_64BIT || TARGET_ELF32)
5958     return;
5959 
5960   for (i = 0; i < 4; i++)
5961     arg_regs[i] = 0;
5962 
5963   /* Specify explicitly that no argument relocations should take place
5964      if using the portable runtime calling conventions.  */
5965   if (TARGET_PORTABLE_RUNTIME)
5966     {
5967       fputs ("\t.CALL ARGW0=NO,ARGW1=NO,ARGW2=NO,ARGW3=NO,RETVAL=NO\n",
5968 	     asm_out_file);
5969       return;
5970     }
5971 
5972   gcc_assert (CALL_P (call_insn));
5973   for (link = CALL_INSN_FUNCTION_USAGE (call_insn);
5974        link; link = XEXP (link, 1))
5975     {
5976       rtx use = XEXP (link, 0);
5977 
5978       if (! (GET_CODE (use) == USE
5979 	     && GET_CODE (XEXP (use, 0)) == REG
5980 	     && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
5981 	continue;
5982 
5983       arg_mode = GET_MODE (XEXP (use, 0));
5984       regno = REGNO (XEXP (use, 0));
5985       if (regno >= 23 && regno <= 26)
5986 	{
5987 	  arg_regs[26 - regno] = "GR";
5988 	  if (arg_mode == DImode)
5989 	    arg_regs[25 - regno] = "GR";
5990 	}
5991       else if (regno >= 32 && regno <= 39)
5992 	{
5993 	  if (arg_mode == SFmode)
5994 	    arg_regs[(regno - 32) / 2] = "FR";
5995 	  else
5996 	    {
5997 #ifndef HP_FP_ARG_DESCRIPTOR_REVERSED
5998 	      arg_regs[(regno - 34) / 2] = "FR";
5999 	      arg_regs[(regno - 34) / 2 + 1] = "FU";
6000 #else
6001 	      arg_regs[(regno - 34) / 2] = "FU";
6002 	      arg_regs[(regno - 34) / 2 + 1] = "FR";
6003 #endif
6004 	    }
6005 	}
6006     }
6007   fputs ("\t.CALL ", asm_out_file);
6008   for (i = 0; i < 4; i++)
6009     {
6010       if (arg_regs[i])
6011 	{
6012 	  if (output_flag++)
6013 	    fputc (',', asm_out_file);
6014 	  fprintf (asm_out_file, "ARGW%d=%s", i, arg_regs[i]);
6015 	}
6016     }
6017   fputc ('\n', asm_out_file);
6018 }
6019 
6020 /* Inform reload about cases where moving X with a mode MODE to or from
6021    a register in RCLASS requires an extra scratch or immediate register.
6022    Return the class needed for the immediate register.  */
6023 
6024 static reg_class_t
pa_secondary_reload(bool in_p,rtx x,reg_class_t rclass_i,machine_mode mode,secondary_reload_info * sri)6025 pa_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
6026 		     machine_mode mode, secondary_reload_info *sri)
6027 {
6028   int regno;
6029   enum reg_class rclass = (enum reg_class) rclass_i;
6030 
6031   /* Handle the easy stuff first.  */
6032   if (rclass == R1_REGS)
6033     return NO_REGS;
6034 
6035   if (REG_P (x))
6036     {
6037       regno = REGNO (x);
6038       if (rclass == BASE_REG_CLASS && regno < FIRST_PSEUDO_REGISTER)
6039 	return NO_REGS;
6040     }
6041   else
6042     regno = -1;
6043 
6044   /* If we have something like (mem (mem (...)), we can safely assume the
6045      inner MEM will end up in a general register after reloading, so there's
6046      no need for a secondary reload.  */
6047   if (GET_CODE (x) == MEM && GET_CODE (XEXP (x, 0)) == MEM)
6048     return NO_REGS;
6049 
6050   /* Trying to load a constant into a FP register during PIC code
6051      generation requires %r1 as a scratch register.  For float modes,
6052      the only legitimate constant is CONST0_RTX.  However, there are
6053      a few patterns that accept constant double operands.  */
6054   if (flag_pic
6055       && FP_REG_CLASS_P (rclass)
6056       && (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE))
6057     {
6058       switch (mode)
6059 	{
6060 	case E_SImode:
6061 	  sri->icode = CODE_FOR_reload_insi_r1;
6062 	  break;
6063 
6064 	case E_DImode:
6065 	  sri->icode = CODE_FOR_reload_indi_r1;
6066 	  break;
6067 
6068 	case E_SFmode:
6069 	  sri->icode = CODE_FOR_reload_insf_r1;
6070 	  break;
6071 
6072 	case E_DFmode:
6073 	  sri->icode = CODE_FOR_reload_indf_r1;
6074 	  break;
6075 
6076 	default:
6077 	  gcc_unreachable ();
6078 	}
6079       return NO_REGS;
6080     }
6081 
6082   /* Secondary reloads of symbolic expressions require %r1 as a scratch
6083      register when we're generating PIC code or when the operand isn't
6084      readonly.  */
6085   if (pa_symbolic_expression_p (x))
6086     {
6087       if (GET_CODE (x) == HIGH)
6088 	x = XEXP (x, 0);
6089 
6090       if (flag_pic || !read_only_operand (x, VOIDmode))
6091 	{
6092 	  switch (mode)
6093 	    {
6094 	    case E_SImode:
6095 	      sri->icode = CODE_FOR_reload_insi_r1;
6096 	      break;
6097 
6098 	    case E_DImode:
6099 	      sri->icode = CODE_FOR_reload_indi_r1;
6100 	      break;
6101 
6102 	    default:
6103 	      gcc_unreachable ();
6104 	    }
6105 	  return NO_REGS;
6106 	}
6107     }
6108 
6109   /* Profiling showed the PA port spends about 1.3% of its compilation
6110      time in true_regnum from calls inside pa_secondary_reload_class.  */
6111   if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
6112     regno = true_regnum (x);
6113 
6114   /* Handle reloads for floating point loads and stores.  */
6115   if ((regno >= FIRST_PSEUDO_REGISTER || regno == -1)
6116       && FP_REG_CLASS_P (rclass))
6117     {
6118       if (MEM_P (x))
6119 	{
6120 	  x = XEXP (x, 0);
6121 
6122 	  /* We don't need a secondary reload for indexed memory addresses.
6123 
6124 	     When INT14_OK_STRICT is true, it might appear that we could
6125 	     directly allow register indirect memory addresses.  However,
6126 	     this doesn't work because we don't support SUBREGs in
6127 	     floating-point register copies and reload doesn't tell us
6128 	     when it's going to use a SUBREG.  */
6129 	  if (IS_INDEX_ADDR_P (x))
6130 	    return NO_REGS;
6131 	}
6132 
6133       /* Request a secondary reload with a general scratch register
6134 	 for everything else.  ??? Could symbolic operands be handled
6135 	 directly when generating non-pic PA 2.0 code?  */
6136       sri->icode = (in_p
6137 		    ? direct_optab_handler (reload_in_optab, mode)
6138 		    : direct_optab_handler (reload_out_optab, mode));
6139       return NO_REGS;
6140     }
6141 
6142   /* A SAR<->FP register copy requires an intermediate general register
6143      and secondary memory.  We need a secondary reload with a general
6144      scratch register for spills.  */
6145   if (rclass == SHIFT_REGS)
6146     {
6147       /* Handle spill.  */
6148       if (regno >= FIRST_PSEUDO_REGISTER || regno < 0)
6149 	{
6150 	  sri->icode = (in_p
6151 			? direct_optab_handler (reload_in_optab, mode)
6152 			: direct_optab_handler (reload_out_optab, mode));
6153 	  return NO_REGS;
6154 	}
6155 
6156       /* Handle FP copy.  */
6157       if (FP_REG_CLASS_P (REGNO_REG_CLASS (regno)))
6158 	return GENERAL_REGS;
6159     }
6160 
6161   if (regno >= 0 && regno < FIRST_PSEUDO_REGISTER
6162       && REGNO_REG_CLASS (regno) == SHIFT_REGS
6163       && FP_REG_CLASS_P (rclass))
6164     return GENERAL_REGS;
6165 
6166   return NO_REGS;
6167 }
6168 
6169 /* Implement TARGET_SECONDARY_MEMORY_NEEDED.  */
6170 
6171 static bool
pa_secondary_memory_needed(machine_mode mode ATTRIBUTE_UNUSED,reg_class_t class1 ATTRIBUTE_UNUSED,reg_class_t class2 ATTRIBUTE_UNUSED)6172 pa_secondary_memory_needed (machine_mode mode ATTRIBUTE_UNUSED,
6173 			    reg_class_t class1 ATTRIBUTE_UNUSED,
6174 			    reg_class_t class2 ATTRIBUTE_UNUSED)
6175 {
6176 #ifdef PA_SECONDARY_MEMORY_NEEDED
6177   return PA_SECONDARY_MEMORY_NEEDED (mode, class1, class2);
6178 #else
6179   return false;
6180 #endif
6181 }
6182 
6183 /* Implement TARGET_EXTRA_LIVE_ON_ENTRY.  The argument pointer
6184    is only marked as live on entry by df-scan when it is a fixed
6185    register.  It isn't a fixed register in the 64-bit runtime,
6186    so we need to mark it here.  */
6187 
6188 static void
pa_extra_live_on_entry(bitmap regs)6189 pa_extra_live_on_entry (bitmap regs)
6190 {
6191   if (TARGET_64BIT)
6192     bitmap_set_bit (regs, ARG_POINTER_REGNUM);
6193 }
6194 
6195 /* Implement EH_RETURN_HANDLER_RTX.  The MEM needs to be volatile
6196    to prevent it from being deleted.  */
6197 
6198 rtx
pa_eh_return_handler_rtx(void)6199 pa_eh_return_handler_rtx (void)
6200 {
6201   rtx tmp;
6202 
6203   tmp = gen_rtx_PLUS (word_mode, hard_frame_pointer_rtx,
6204 		      TARGET_64BIT ? GEN_INT (-16) : GEN_INT (-20));
6205   tmp = gen_rtx_MEM (word_mode, tmp);
6206   tmp->volatil = 1;
6207   return tmp;
6208 }
6209 
6210 /* In the 32-bit runtime, arguments larger than eight bytes are passed
6211    by invisible reference.  As a GCC extension, we also pass anything
6212    with a zero or variable size by reference.
6213 
6214    The 64-bit runtime does not describe passing any types by invisible
6215    reference.  The internals of GCC can't currently handle passing
6216    empty structures, and zero or variable length arrays when they are
6217    not passed entirely on the stack or by reference.  Thus, as a GCC
6218    extension, we pass these types by reference.  The HP compiler doesn't
6219    support these types, so hopefully there shouldn't be any compatibility
6220    issues.  This may have to be revisited when HP releases a C99 compiler
6221    or updates the ABI.  */
6222 
6223 static bool
pa_pass_by_reference(cumulative_args_t,const function_arg_info & arg)6224 pa_pass_by_reference (cumulative_args_t, const function_arg_info &arg)
6225 {
6226   HOST_WIDE_INT size = arg.type_size_in_bytes ();
6227   if (TARGET_64BIT)
6228     return size <= 0;
6229   else
6230     return size <= 0 || size > 8;
6231 }
6232 
6233 /* Implement TARGET_FUNCTION_ARG_PADDING.  */
6234 
6235 static pad_direction
pa_function_arg_padding(machine_mode mode,const_tree type)6236 pa_function_arg_padding (machine_mode mode, const_tree type)
6237 {
6238   if (mode == BLKmode
6239       || (TARGET_64BIT
6240 	  && type
6241 	  && (AGGREGATE_TYPE_P (type)
6242 	      || TREE_CODE (type) == COMPLEX_TYPE
6243 	      || TREE_CODE (type) == VECTOR_TYPE)))
6244     {
6245       /* Return PAD_NONE if justification is not required.  */
6246       if (type
6247 	  && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
6248 	  && (int_size_in_bytes (type) * BITS_PER_UNIT) % PARM_BOUNDARY == 0)
6249 	return PAD_NONE;
6250 
6251       /* The directions set here are ignored when a BLKmode argument larger
6252 	 than a word is placed in a register.  Different code is used for
6253 	 the stack and registers.  This makes it difficult to have a
6254 	 consistent data representation for both the stack and registers.
6255 	 For both runtimes, the justification and padding for arguments on
6256 	 the stack and in registers should be identical.  */
6257       if (TARGET_64BIT)
6258 	/* The 64-bit runtime specifies left justification for aggregates.  */
6259 	return PAD_UPWARD;
6260       else
6261 	/* The 32-bit runtime architecture specifies right justification.
6262 	   When the argument is passed on the stack, the argument is padded
6263 	   with garbage on the left.  The HP compiler pads with zeros.  */
6264 	return PAD_DOWNWARD;
6265     }
6266 
6267   if (GET_MODE_BITSIZE (mode) < PARM_BOUNDARY)
6268     return PAD_DOWNWARD;
6269   else
6270     return PAD_NONE;
6271 }
6272 
6273 
6274 /* Do what is necessary for `va_start'.  We look at the current function
6275    to determine if stdargs or varargs is used and fill in an initial
6276    va_list.  A pointer to this constructor is returned.  */
6277 
6278 static rtx
hppa_builtin_saveregs(void)6279 hppa_builtin_saveregs (void)
6280 {
6281   rtx offset, dest;
6282   tree fntype = TREE_TYPE (current_function_decl);
6283   int argadj = ((!stdarg_p (fntype))
6284 		? UNITS_PER_WORD : 0);
6285 
6286   if (argadj)
6287     offset = plus_constant (Pmode, crtl->args.arg_offset_rtx, argadj);
6288   else
6289     offset = crtl->args.arg_offset_rtx;
6290 
6291   if (TARGET_64BIT)
6292     {
6293       int i, off;
6294 
6295       /* Adjust for varargs/stdarg differences.  */
6296       if (argadj)
6297 	offset = plus_constant (Pmode, crtl->args.arg_offset_rtx, -argadj);
6298       else
6299 	offset = crtl->args.arg_offset_rtx;
6300 
6301       /* We need to save %r26 .. %r19 inclusive starting at offset -64
6302 	 from the incoming arg pointer and growing to larger addresses.  */
6303       for (i = 26, off = -64; i >= 19; i--, off += 8)
6304 	emit_move_insn (gen_rtx_MEM (word_mode,
6305 				     plus_constant (Pmode,
6306 						    arg_pointer_rtx, off)),
6307 			gen_rtx_REG (word_mode, i));
6308 
6309       /* The incoming args pointer points just beyond the flushback area;
6310 	 normally this is not a serious concern.  However, when we are doing
6311 	 varargs/stdargs we want to make the arg pointer point to the start
6312 	 of the incoming argument area.  */
6313       emit_move_insn (virtual_incoming_args_rtx,
6314 		      plus_constant (Pmode, arg_pointer_rtx, -64));
6315 
6316       /* Now return a pointer to the first anonymous argument.  */
6317       return copy_to_reg (expand_binop (Pmode, add_optab,
6318 					virtual_incoming_args_rtx,
6319 					offset, 0, 0, OPTAB_LIB_WIDEN));
6320     }
6321 
6322   /* Store general registers on the stack.  */
6323   dest = gen_rtx_MEM (BLKmode,
6324 		      plus_constant (Pmode, crtl->args.internal_arg_pointer,
6325 				     -16));
6326   set_mem_alias_set (dest, get_varargs_alias_set ());
6327   set_mem_align (dest, BITS_PER_WORD);
6328   move_block_from_reg (23, dest, 4);
6329 
6330   /* move_block_from_reg will emit code to store the argument registers
6331      individually as scalar stores.
6332 
6333      However, other insns may later load from the same addresses for
6334      a structure load (passing a struct to a varargs routine).
6335 
6336      The alias code assumes that such aliasing can never happen, so we
6337      have to keep memory referencing insns from moving up beyond the
6338      last argument register store.  So we emit a blockage insn here.  */
6339   emit_insn (gen_blockage ());
6340 
6341   return copy_to_reg (expand_binop (Pmode, add_optab,
6342 				    crtl->args.internal_arg_pointer,
6343 				    offset, 0, 0, OPTAB_LIB_WIDEN));
6344 }
6345 
6346 static void
hppa_va_start(tree valist,rtx nextarg)6347 hppa_va_start (tree valist, rtx nextarg)
6348 {
6349   nextarg = expand_builtin_saveregs ();
6350   std_expand_builtin_va_start (valist, nextarg);
6351 }
6352 
6353 static tree
hppa_gimplify_va_arg_expr(tree valist,tree type,gimple_seq * pre_p,gimple_seq * post_p)6354 hppa_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
6355 			   gimple_seq *post_p)
6356 {
6357   if (TARGET_64BIT)
6358     {
6359       /* Args grow upward.  We can use the generic routines.  */
6360       return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
6361     }
6362   else /* !TARGET_64BIT */
6363     {
6364       tree ptr = build_pointer_type (type);
6365       tree valist_type;
6366       tree t, u;
6367       unsigned int size, ofs;
6368       bool indirect;
6369 
6370       indirect = pass_va_arg_by_reference (type);
6371       if (indirect)
6372 	{
6373 	  type = ptr;
6374 	  ptr = build_pointer_type (type);
6375 	}
6376       size = int_size_in_bytes (type);
6377       valist_type = TREE_TYPE (valist);
6378 
6379       /* Args grow down.  Not handled by generic routines.  */
6380 
6381       u = fold_convert (sizetype, size_in_bytes (type));
6382       u = fold_build1 (NEGATE_EXPR, sizetype, u);
6383       t = fold_build_pointer_plus (valist, u);
6384 
6385       /* Align to 4 or 8 byte boundary depending on argument size.  */
6386 
6387       u = build_int_cst (TREE_TYPE (t), (HOST_WIDE_INT)(size > 4 ? -8 : -4));
6388       t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t, u);
6389       t = fold_convert (valist_type, t);
6390 
6391       t = build2 (MODIFY_EXPR, valist_type, valist, t);
6392 
6393       ofs = (8 - size) % 4;
6394       if (ofs != 0)
6395 	t = fold_build_pointer_plus_hwi (t, ofs);
6396 
6397       t = fold_convert (ptr, t);
6398       t = build_va_arg_indirect_ref (t);
6399 
6400       if (indirect)
6401 	t = build_va_arg_indirect_ref (t);
6402 
6403       return t;
6404     }
6405 }
6406 
6407 /* True if MODE is valid for the target.  By "valid", we mean able to
6408    be manipulated in non-trivial ways.  In particular, this means all
6409    the arithmetic is supported.
6410 
6411    Currently, TImode is not valid as the HP 64-bit runtime documentation
6412    doesn't document the alignment and calling conventions for this type.
6413    Thus, we return false when PRECISION is 2 * BITS_PER_WORD and
6414    2 * BITS_PER_WORD isn't equal LONG_LONG_TYPE_SIZE.  */
6415 
6416 static bool
pa_scalar_mode_supported_p(scalar_mode mode)6417 pa_scalar_mode_supported_p (scalar_mode mode)
6418 {
6419   int precision = GET_MODE_PRECISION (mode);
6420 
6421   switch (GET_MODE_CLASS (mode))
6422     {
6423     case MODE_PARTIAL_INT:
6424     case MODE_INT:
6425       if (precision == CHAR_TYPE_SIZE)
6426 	return true;
6427       if (precision == SHORT_TYPE_SIZE)
6428 	return true;
6429       if (precision == INT_TYPE_SIZE)
6430 	return true;
6431       if (precision == LONG_TYPE_SIZE)
6432 	return true;
6433       if (precision == LONG_LONG_TYPE_SIZE)
6434 	return true;
6435       return false;
6436 
6437     case MODE_FLOAT:
6438       if (precision == FLOAT_TYPE_SIZE)
6439 	return true;
6440       if (precision == DOUBLE_TYPE_SIZE)
6441 	return true;
6442       if (precision == LONG_DOUBLE_TYPE_SIZE)
6443 	return true;
6444       return false;
6445 
6446     case MODE_DECIMAL_FLOAT:
6447       return false;
6448 
6449     default:
6450       gcc_unreachable ();
6451     }
6452 }
6453 
6454 /* Return TRUE if INSN, a jump insn, has an unfilled delay slot and
6455    it branches into the delay slot.  Otherwise, return FALSE.  */
6456 
6457 static bool
branch_to_delay_slot_p(rtx_insn * insn)6458 branch_to_delay_slot_p (rtx_insn *insn)
6459 {
6460   rtx_insn *jump_insn;
6461 
6462   if (dbr_sequence_length ())
6463     return FALSE;
6464 
6465   jump_insn = next_active_insn (JUMP_LABEL_AS_INSN (insn));
6466   while (insn)
6467     {
6468       insn = next_active_insn (insn);
6469       if (jump_insn == insn)
6470 	return TRUE;
6471 
6472       /* We can't rely on the length of asms.  So, we return FALSE when
6473 	 the branch is followed by an asm.  */
6474       if (!insn
6475 	  || GET_CODE (PATTERN (insn)) == ASM_INPUT
6476 	  || asm_noperands (PATTERN (insn)) >= 0
6477 	  || get_attr_length (insn) > 0)
6478 	break;
6479     }
6480 
6481   return FALSE;
6482 }
6483 
6484 /* Return TRUE if INSN, a forward jump insn, needs a nop in its delay slot.
6485 
6486    This occurs when INSN has an unfilled delay slot and is followed
6487    by an asm.  Disaster can occur if the asm is empty and the jump
6488    branches into the delay slot.  So, we add a nop in the delay slot
6489    when this occurs.  */
6490 
6491 static bool
branch_needs_nop_p(rtx_insn * insn)6492 branch_needs_nop_p (rtx_insn *insn)
6493 {
6494   rtx_insn *jump_insn;
6495 
6496   if (dbr_sequence_length ())
6497     return FALSE;
6498 
6499   jump_insn = next_active_insn (JUMP_LABEL_AS_INSN (insn));
6500   while (insn)
6501     {
6502       insn = next_active_insn (insn);
6503       if (!insn || jump_insn == insn)
6504 	return TRUE;
6505 
6506       if (!(GET_CODE (PATTERN (insn)) == ASM_INPUT
6507 	   || asm_noperands (PATTERN (insn)) >= 0)
6508 	  && get_attr_length (insn) > 0)
6509 	break;
6510     }
6511 
6512   return FALSE;
6513 }
6514 
6515 /* Return TRUE if INSN, a forward jump insn, can use nullification
6516    to skip the following instruction.  This avoids an extra cycle due
6517    to a mis-predicted branch when we fall through.  */
6518 
6519 static bool
use_skip_p(rtx_insn * insn)6520 use_skip_p (rtx_insn *insn)
6521 {
6522   rtx_insn *jump_insn = next_active_insn (JUMP_LABEL_AS_INSN (insn));
6523 
6524   while (insn)
6525     {
6526       insn = next_active_insn (insn);
6527 
6528       /* We can't rely on the length of asms, so we can't skip asms.  */
6529       if (!insn
6530 	  || GET_CODE (PATTERN (insn)) == ASM_INPUT
6531 	  || asm_noperands (PATTERN (insn)) >= 0)
6532 	break;
6533       if (get_attr_length (insn) == 4
6534 	  && jump_insn == next_active_insn (insn))
6535 	return TRUE;
6536       if (get_attr_length (insn) > 0)
6537 	break;
6538     }
6539 
6540   return FALSE;
6541 }
6542 
6543 /* This routine handles all the normal conditional branch sequences we
6544    might need to generate.  It handles compare immediate vs compare
6545    register, nullification of delay slots, varying length branches,
6546    negated branches, and all combinations of the above.  It returns the
6547    output appropriate to emit the branch corresponding to all given
6548    parameters.  */
6549 
6550 const char *
pa_output_cbranch(rtx * operands,int negated,rtx_insn * insn)6551 pa_output_cbranch (rtx *operands, int negated, rtx_insn *insn)
6552 {
6553   static char buf[100];
6554   bool useskip;
6555   int nullify = INSN_ANNULLED_BRANCH_P (insn);
6556   int length = get_attr_length (insn);
6557   int xdelay;
6558 
6559   /* A conditional branch to the following instruction (e.g. the delay slot)
6560      is asking for a disaster.  This can happen when not optimizing and
6561      when jump optimization fails.
6562 
6563      While it is usually safe to emit nothing, this can fail if the
6564      preceding instruction is a nullified branch with an empty delay
6565      slot and the same branch target as this branch.  We could check
6566      for this but jump optimization should eliminate nop jumps.  It
6567      is always safe to emit a nop.  */
6568   if (branch_to_delay_slot_p (insn))
6569     return "nop";
6570 
6571   /* The doubleword form of the cmpib instruction doesn't have the LEU
6572      and GTU conditions while the cmpb instruction does.  Since we accept
6573      zero for cmpb, we must ensure that we use cmpb for the comparison.  */
6574   if (GET_MODE (operands[1]) == DImode && operands[2] == const0_rtx)
6575     operands[2] = gen_rtx_REG (DImode, 0);
6576   if (GET_MODE (operands[2]) == DImode && operands[1] == const0_rtx)
6577     operands[1] = gen_rtx_REG (DImode, 0);
6578 
6579   /* If this is a long branch with its delay slot unfilled, set `nullify'
6580      as it can nullify the delay slot and save a nop.  */
6581   if (length == 8 && dbr_sequence_length () == 0)
6582     nullify = 1;
6583 
6584   /* If this is a short forward conditional branch which did not get
6585      its delay slot filled, the delay slot can still be nullified.  */
6586   if (! nullify && length == 4 && dbr_sequence_length () == 0)
6587     nullify = forward_branch_p (insn);
6588 
6589   /* A forward branch over a single nullified insn can be done with a
6590      comclr instruction.  This avoids a single cycle penalty due to
6591      mis-predicted branch if we fall through (branch not taken).  */
6592   useskip = (length == 4 && nullify) ? use_skip_p (insn) : FALSE;
6593 
6594   switch (length)
6595     {
6596       /* All short conditional branches except backwards with an unfilled
6597 	 delay slot.  */
6598       case 4:
6599 	if (useskip)
6600 	  strcpy (buf, "{com%I2clr,|cmp%I2clr,}");
6601 	else
6602 	  strcpy (buf, "{com%I2b,|cmp%I2b,}");
6603 	if (GET_MODE (operands[1]) == DImode)
6604 	  strcat (buf, "*");
6605 	if (negated)
6606 	  strcat (buf, "%B3");
6607 	else
6608 	  strcat (buf, "%S3");
6609 	if (useskip)
6610 	  strcat (buf, " %2,%r1,%%r0");
6611 	else if (nullify)
6612 	  {
6613 	    if (branch_needs_nop_p (insn))
6614 	      strcat (buf, ",n %2,%r1,%0%#");
6615 	    else
6616 	      strcat (buf, ",n %2,%r1,%0");
6617 	  }
6618 	else
6619 	  strcat (buf, " %2,%r1,%0");
6620 	break;
6621 
6622      /* All long conditionals.  Note a short backward branch with an
6623 	unfilled delay slot is treated just like a long backward branch
6624 	with an unfilled delay slot.  */
6625       case 8:
6626 	/* Handle weird backwards branch with a filled delay slot
6627 	   which is nullified.  */
6628 	if (dbr_sequence_length () != 0
6629 	    && ! forward_branch_p (insn)
6630 	    && nullify)
6631 	  {
6632 	    strcpy (buf, "{com%I2b,|cmp%I2b,}");
6633 	    if (GET_MODE (operands[1]) == DImode)
6634 	      strcat (buf, "*");
6635 	    if (negated)
6636 	      strcat (buf, "%S3");
6637 	    else
6638 	      strcat (buf, "%B3");
6639 	    strcat (buf, ",n %2,%r1,.+12\n\tb %0");
6640 	  }
6641 	/* Handle short backwards branch with an unfilled delay slot.
6642 	   Using a comb;nop rather than comiclr;bl saves 1 cycle for both
6643 	   taken and untaken branches.  */
6644 	else if (dbr_sequence_length () == 0
6645 		 && ! forward_branch_p (insn)
6646 		 && INSN_ADDRESSES_SET_P ()
6647 		 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6648 				    - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6649 	  {
6650 	    strcpy (buf, "{com%I2b,|cmp%I2b,}");
6651 	    if (GET_MODE (operands[1]) == DImode)
6652 	      strcat (buf, "*");
6653 	    if (negated)
6654 	      strcat (buf, "%B3 %2,%r1,%0%#");
6655 	    else
6656 	      strcat (buf, "%S3 %2,%r1,%0%#");
6657 	  }
6658 	else
6659 	  {
6660 	    strcpy (buf, "{com%I2clr,|cmp%I2clr,}");
6661 	    if (GET_MODE (operands[1]) == DImode)
6662 	      strcat (buf, "*");
6663 	    if (negated)
6664 	      strcat (buf, "%S3");
6665 	    else
6666 	      strcat (buf, "%B3");
6667 	    if (nullify)
6668 	      strcat (buf, " %2,%r1,%%r0\n\tb,n %0");
6669 	    else
6670 	      strcat (buf, " %2,%r1,%%r0\n\tb %0");
6671 	  }
6672 	break;
6673 
6674       default:
6675 	/* The reversed conditional branch must branch over one additional
6676 	   instruction if the delay slot is filled and needs to be extracted
6677 	   by pa_output_lbranch.  If the delay slot is empty or this is a
6678 	   nullified forward branch, the instruction after the reversed
6679 	   condition branch must be nullified.  */
6680 	if (dbr_sequence_length () == 0
6681 	    || (nullify && forward_branch_p (insn)))
6682 	  {
6683 	    nullify = 1;
6684 	    xdelay = 0;
6685 	    operands[4] = GEN_INT (length);
6686 	  }
6687 	else
6688 	  {
6689 	    xdelay = 1;
6690 	    operands[4] = GEN_INT (length + 4);
6691 	  }
6692 
6693 	/* Create a reversed conditional branch which branches around
6694 	   the following insns.  */
6695 	if (GET_MODE (operands[1]) != DImode)
6696 	  {
6697 	    if (nullify)
6698 	      {
6699 		if (negated)
6700 		  strcpy (buf,
6701 		    "{com%I2b,%S3,n %2,%r1,.+%4|cmp%I2b,%S3,n %2,%r1,.+%4}");
6702 		else
6703 		  strcpy (buf,
6704 		    "{com%I2b,%B3,n %2,%r1,.+%4|cmp%I2b,%B3,n %2,%r1,.+%4}");
6705 	      }
6706 	    else
6707 	      {
6708 		if (negated)
6709 		  strcpy (buf,
6710 		    "{com%I2b,%S3 %2,%r1,.+%4|cmp%I2b,%S3 %2,%r1,.+%4}");
6711 		else
6712 		  strcpy (buf,
6713 		    "{com%I2b,%B3 %2,%r1,.+%4|cmp%I2b,%B3 %2,%r1,.+%4}");
6714 	      }
6715 	  }
6716 	else
6717 	  {
6718 	    if (nullify)
6719 	      {
6720 		if (negated)
6721 		  strcpy (buf,
6722 		    "{com%I2b,*%S3,n %2,%r1,.+%4|cmp%I2b,*%S3,n %2,%r1,.+%4}");
6723 		else
6724 		  strcpy (buf,
6725 		    "{com%I2b,*%B3,n %2,%r1,.+%4|cmp%I2b,*%B3,n %2,%r1,.+%4}");
6726 	      }
6727 	    else
6728 	      {
6729 		if (negated)
6730 		  strcpy (buf,
6731 		    "{com%I2b,*%S3 %2,%r1,.+%4|cmp%I2b,*%S3 %2,%r1,.+%4}");
6732 		else
6733 		  strcpy (buf,
6734 		    "{com%I2b,*%B3 %2,%r1,.+%4|cmp%I2b,*%B3 %2,%r1,.+%4}");
6735 	      }
6736 	  }
6737 
6738 	output_asm_insn (buf, operands);
6739 	return pa_output_lbranch (operands[0], insn, xdelay);
6740     }
6741   return buf;
6742 }
6743 
6744 /* Output a PIC pc-relative instruction sequence to load the address of
6745    OPERANDS[0] to register OPERANDS[2].  OPERANDS[0] is a symbol ref
6746    or a code label.  OPERANDS[1] specifies the register to use to load
6747    the program counter.  OPERANDS[3] may be used for label generation
6748    The sequence is always three instructions in length.  The program
6749    counter recorded for PA 1.X is eight bytes more than that for PA 2.0.
6750    Register %r1 is clobbered.  */
6751 
6752 static void
pa_output_pic_pcrel_sequence(rtx * operands)6753 pa_output_pic_pcrel_sequence (rtx *operands)
6754 {
6755   gcc_assert (SYMBOL_REF_P (operands[0]) || LABEL_P (operands[0]));
6756   if (TARGET_PA_20)
6757     {
6758       /* We can use mfia to determine the current program counter.  */
6759       if (TARGET_SOM || !TARGET_GAS)
6760 	{
6761 	  operands[3] = gen_label_rtx ();
6762 	  targetm.asm_out.internal_label (asm_out_file, "L",
6763 					  CODE_LABEL_NUMBER (operands[3]));
6764 	  output_asm_insn ("mfia %1", operands);
6765 	  output_asm_insn ("addil L'%0-%l3,%1", operands);
6766 	  output_asm_insn ("ldo R'%0-%l3(%%r1),%2", operands);
6767 	}
6768       else
6769 	{
6770 	  output_asm_insn ("mfia %1", operands);
6771 	  output_asm_insn ("addil L'%0-$PIC_pcrel$0+12,%1", operands);
6772 	  output_asm_insn ("ldo R'%0-$PIC_pcrel$0+16(%%r1),%2", operands);
6773 	}
6774     }
6775   else
6776     {
6777       /* We need to use a branch to determine the current program counter.  */
6778       output_asm_insn ("{bl|b,l} .+8,%1", operands);
6779       if (TARGET_SOM || !TARGET_GAS)
6780 	{
6781 	  operands[3] = gen_label_rtx ();
6782 	  output_asm_insn ("addil L'%0-%l3,%1", operands);
6783 	  targetm.asm_out.internal_label (asm_out_file, "L",
6784 					  CODE_LABEL_NUMBER (operands[3]));
6785 	  output_asm_insn ("ldo R'%0-%l3(%%r1),%2", operands);
6786 	}
6787       else
6788 	{
6789 	  output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%1", operands);
6790 	  output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%2", operands);
6791 	}
6792     }
6793 }
6794 
6795 /* This routine handles output of long unconditional branches that
6796    exceed the maximum range of a simple branch instruction.  Since
6797    we don't have a register available for the branch, we save register
6798    %r1 in the frame marker, load the branch destination DEST into %r1,
6799    execute the branch, and restore %r1 in the delay slot of the branch.
6800 
6801    Since long branches may have an insn in the delay slot and the
6802    delay slot is used to restore %r1, we in general need to extract
6803    this insn and execute it before the branch.  However, to facilitate
6804    use of this function by conditional branches, we also provide an
6805    option to not extract the delay insn so that it will be emitted
6806    after the long branch.  So, if there is an insn in the delay slot,
6807    it is extracted if XDELAY is nonzero.
6808 
6809    The lengths of the various long-branch sequences are 20, 16 and 24
6810    bytes for the portable runtime, non-PIC and PIC cases, respectively.  */
6811 
6812 const char *
pa_output_lbranch(rtx dest,rtx_insn * insn,int xdelay)6813 pa_output_lbranch (rtx dest, rtx_insn *insn, int xdelay)
6814 {
6815   rtx xoperands[4];
6816 
6817   xoperands[0] = dest;
6818 
6819   /* First, free up the delay slot.  */
6820   if (xdelay && dbr_sequence_length () != 0)
6821     {
6822       /* We can't handle a jump in the delay slot.  */
6823       gcc_assert (! JUMP_P (NEXT_INSN (insn)));
6824 
6825       final_scan_insn (NEXT_INSN (insn), asm_out_file,
6826 		       optimize, 0, NULL);
6827 
6828       /* Now delete the delay insn.  */
6829       SET_INSN_DELETED (NEXT_INSN (insn));
6830     }
6831 
6832   /* Output an insn to save %r1.  The runtime documentation doesn't
6833      specify whether the "Clean Up" slot in the callers frame can
6834      be clobbered by the callee.  It isn't copied by HP's builtin
6835      alloca, so this suggests that it can be clobbered if necessary.
6836      The "Static Link" location is copied by HP builtin alloca, so
6837      we avoid using it.  Using the cleanup slot might be a problem
6838      if we have to interoperate with languages that pass cleanup
6839      information.  However, it should be possible to handle these
6840      situations with GCC's asm feature.
6841 
6842      The "Current RP" slot is reserved for the called procedure, so
6843      we try to use it when we don't have a frame of our own.  It's
6844      rather unlikely that we won't have a frame when we need to emit
6845      a very long branch.
6846 
6847      Really the way to go long term is a register scavenger; goto
6848      the target of the jump and find a register which we can use
6849      as a scratch to hold the value in %r1.  Then, we wouldn't have
6850      to free up the delay slot or clobber a slot that may be needed
6851      for other purposes.  */
6852   if (TARGET_64BIT)
6853     {
6854       if (actual_fsize == 0 && !df_regs_ever_live_p (2))
6855 	/* Use the return pointer slot in the frame marker.  */
6856 	output_asm_insn ("std %%r1,-16(%%r30)", xoperands);
6857       else
6858 	/* Use the slot at -40 in the frame marker since HP builtin
6859 	   alloca doesn't copy it.  */
6860 	output_asm_insn ("std %%r1,-40(%%r30)", xoperands);
6861     }
6862   else
6863     {
6864       if (actual_fsize == 0 && !df_regs_ever_live_p (2))
6865 	/* Use the return pointer slot in the frame marker.  */
6866 	output_asm_insn ("stw %%r1,-20(%%r30)", xoperands);
6867       else
6868 	/* Use the "Clean Up" slot in the frame marker.  In GCC,
6869 	   the only other use of this location is for copying a
6870 	   floating point double argument from a floating-point
6871 	   register to two general registers.  The copy is done
6872 	   as an "atomic" operation when outputting a call, so it
6873 	   won't interfere with our using the location here.  */
6874 	output_asm_insn ("stw %%r1,-12(%%r30)", xoperands);
6875     }
6876 
6877   if (TARGET_PORTABLE_RUNTIME)
6878     {
6879       output_asm_insn ("ldil L'%0,%%r1", xoperands);
6880       output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands);
6881       output_asm_insn ("bv %%r0(%%r1)", xoperands);
6882     }
6883   else if (flag_pic)
6884     {
6885       xoperands[1] = gen_rtx_REG (Pmode, 1);
6886       xoperands[2] = xoperands[1];
6887       pa_output_pic_pcrel_sequence (xoperands);
6888       output_asm_insn ("bv %%r0(%%r1)", xoperands);
6889     }
6890   else
6891     /* Now output a very long branch to the original target.  */
6892     output_asm_insn ("ldil L'%l0,%%r1\n\tbe R'%l0(%%sr4,%%r1)", xoperands);
6893 
6894   /* Now restore the value of %r1 in the delay slot.  */
6895   if (TARGET_64BIT)
6896     {
6897       if (actual_fsize == 0 && !df_regs_ever_live_p (2))
6898 	return "ldd -16(%%r30),%%r1";
6899       else
6900 	return "ldd -40(%%r30),%%r1";
6901     }
6902   else
6903     {
6904       if (actual_fsize == 0 && !df_regs_ever_live_p (2))
6905 	return "ldw -20(%%r30),%%r1";
6906       else
6907 	return "ldw -12(%%r30),%%r1";
6908     }
6909 }
6910 
6911 /* This routine handles all the branch-on-bit conditional branch sequences we
6912    might need to generate.  It handles nullification of delay slots,
6913    varying length branches, negated branches and all combinations of the
6914    above.  it returns the appropriate output template to emit the branch.  */
6915 
6916 const char *
pa_output_bb(rtx * operands ATTRIBUTE_UNUSED,int negated,rtx_insn * insn,int which)6917 pa_output_bb (rtx *operands ATTRIBUTE_UNUSED, int negated, rtx_insn *insn, int which)
6918 {
6919   static char buf[100];
6920   bool useskip;
6921   int nullify = INSN_ANNULLED_BRANCH_P (insn);
6922   int length = get_attr_length (insn);
6923   int xdelay;
6924 
6925   /* A conditional branch to the following instruction (e.g. the delay slot) is
6926      asking for a disaster.  I do not think this can happen as this pattern
6927      is only used when optimizing; jump optimization should eliminate the
6928      jump.  But be prepared just in case.  */
6929 
6930   if (branch_to_delay_slot_p (insn))
6931     return "nop";
6932 
6933   /* If this is a long branch with its delay slot unfilled, set `nullify'
6934      as it can nullify the delay slot and save a nop.  */
6935   if (length == 8 && dbr_sequence_length () == 0)
6936     nullify = 1;
6937 
6938   /* If this is a short forward conditional branch which did not get
6939      its delay slot filled, the delay slot can still be nullified.  */
6940   if (! nullify && length == 4 && dbr_sequence_length () == 0)
6941     nullify = forward_branch_p (insn);
6942 
6943   /* A forward branch over a single nullified insn can be done with a
6944      extrs instruction.  This avoids a single cycle penalty due to
6945      mis-predicted branch if we fall through (branch not taken).  */
6946   useskip = (length == 4 && nullify) ? use_skip_p (insn) : FALSE;
6947 
6948   switch (length)
6949     {
6950 
6951       /* All short conditional branches except backwards with an unfilled
6952 	 delay slot.  */
6953       case 4:
6954 	if (useskip)
6955 	  strcpy (buf, "{extrs,|extrw,s,}");
6956 	else
6957 	  strcpy (buf, "bb,");
6958 	if (useskip && GET_MODE (operands[0]) == DImode)
6959 	  strcpy (buf, "extrd,s,*");
6960 	else if (GET_MODE (operands[0]) == DImode)
6961 	  strcpy (buf, "bb,*");
6962 	if ((which == 0 && negated)
6963 	     || (which == 1 && ! negated))
6964 	  strcat (buf, ">=");
6965 	else
6966 	  strcat (buf, "<");
6967 	if (useskip)
6968 	  strcat (buf, " %0,%1,1,%%r0");
6969 	else if (nullify && negated)
6970 	  {
6971 	    if (branch_needs_nop_p (insn))
6972 	      strcat (buf, ",n %0,%1,%3%#");
6973 	    else
6974 	      strcat (buf, ",n %0,%1,%3");
6975 	  }
6976 	else if (nullify && ! negated)
6977 	  {
6978 	    if (branch_needs_nop_p (insn))
6979 	      strcat (buf, ",n %0,%1,%2%#");
6980 	    else
6981 	      strcat (buf, ",n %0,%1,%2");
6982 	  }
6983 	else if (! nullify && negated)
6984 	  strcat (buf, " %0,%1,%3");
6985 	else if (! nullify && ! negated)
6986 	  strcat (buf, " %0,%1,%2");
6987 	break;
6988 
6989      /* All long conditionals.  Note a short backward branch with an
6990 	unfilled delay slot is treated just like a long backward branch
6991 	with an unfilled delay slot.  */
6992       case 8:
6993 	/* Handle weird backwards branch with a filled delay slot
6994 	   which is nullified.  */
6995 	if (dbr_sequence_length () != 0
6996 	    && ! forward_branch_p (insn)
6997 	    && nullify)
6998 	  {
6999 	    strcpy (buf, "bb,");
7000 	    if (GET_MODE (operands[0]) == DImode)
7001 	      strcat (buf, "*");
7002 	    if ((which == 0 && negated)
7003 		|| (which == 1 && ! negated))
7004 	      strcat (buf, "<");
7005 	    else
7006 	      strcat (buf, ">=");
7007 	    if (negated)
7008 	      strcat (buf, ",n %0,%1,.+12\n\tb %3");
7009 	    else
7010 	      strcat (buf, ",n %0,%1,.+12\n\tb %2");
7011 	  }
7012 	/* Handle short backwards branch with an unfilled delay slot.
7013 	   Using a bb;nop rather than extrs;bl saves 1 cycle for both
7014 	   taken and untaken branches.  */
7015 	else if (dbr_sequence_length () == 0
7016 		 && ! forward_branch_p (insn)
7017 		 && INSN_ADDRESSES_SET_P ()
7018 		 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
7019 				    - INSN_ADDRESSES (INSN_UID (insn)) - 8))
7020 	  {
7021 	    strcpy (buf, "bb,");
7022 	    if (GET_MODE (operands[0]) == DImode)
7023 	      strcat (buf, "*");
7024 	    if ((which == 0 && negated)
7025 		|| (which == 1 && ! negated))
7026 	      strcat (buf, ">=");
7027 	    else
7028 	      strcat (buf, "<");
7029 	    if (negated)
7030 	      strcat (buf, " %0,%1,%3%#");
7031 	    else
7032 	      strcat (buf, " %0,%1,%2%#");
7033 	  }
7034 	else
7035 	  {
7036 	    if (GET_MODE (operands[0]) == DImode)
7037 	      strcpy (buf, "extrd,s,*");
7038 	    else
7039 	      strcpy (buf, "{extrs,|extrw,s,}");
7040 	    if ((which == 0 && negated)
7041 		|| (which == 1 && ! negated))
7042 	      strcat (buf, "<");
7043 	    else
7044 	      strcat (buf, ">=");
7045 	    if (nullify && negated)
7046 	      strcat (buf, " %0,%1,1,%%r0\n\tb,n %3");
7047 	    else if (nullify && ! negated)
7048 	      strcat (buf, " %0,%1,1,%%r0\n\tb,n %2");
7049 	    else if (negated)
7050 	      strcat (buf, " %0,%1,1,%%r0\n\tb %3");
7051 	    else
7052 	      strcat (buf, " %0,%1,1,%%r0\n\tb %2");
7053 	  }
7054 	break;
7055 
7056       default:
7057 	/* The reversed conditional branch must branch over one additional
7058 	   instruction if the delay slot is filled and needs to be extracted
7059 	   by pa_output_lbranch.  If the delay slot is empty or this is a
7060 	   nullified forward branch, the instruction after the reversed
7061 	   condition branch must be nullified.  */
7062 	if (dbr_sequence_length () == 0
7063 	    || (nullify && forward_branch_p (insn)))
7064 	  {
7065 	    nullify = 1;
7066 	    xdelay = 0;
7067 	    operands[4] = GEN_INT (length);
7068 	  }
7069 	else
7070 	  {
7071 	    xdelay = 1;
7072 	    operands[4] = GEN_INT (length + 4);
7073 	  }
7074 
7075 	if (GET_MODE (operands[0]) == DImode)
7076 	  strcpy (buf, "bb,*");
7077 	else
7078 	  strcpy (buf, "bb,");
7079 	if ((which == 0 && negated)
7080 	    || (which == 1 && !negated))
7081 	  strcat (buf, "<");
7082 	else
7083 	  strcat (buf, ">=");
7084 	if (nullify)
7085 	  strcat (buf, ",n %0,%1,.+%4");
7086 	else
7087 	  strcat (buf, " %0,%1,.+%4");
7088 	output_asm_insn (buf, operands);
7089 	return pa_output_lbranch (negated ? operands[3] : operands[2],
7090 				  insn, xdelay);
7091     }
7092   return buf;
7093 }
7094 
7095 /* This routine handles all the branch-on-variable-bit conditional branch
7096    sequences we might need to generate.  It handles nullification of delay
7097    slots, varying length branches, negated branches and all combinations
7098    of the above.  it returns the appropriate output template to emit the
7099    branch.  */
7100 
7101 const char *
pa_output_bvb(rtx * operands ATTRIBUTE_UNUSED,int negated,rtx_insn * insn,int which)7102 pa_output_bvb (rtx *operands ATTRIBUTE_UNUSED, int negated, rtx_insn *insn,
7103 	       int which)
7104 {
7105   static char buf[100];
7106   bool useskip;
7107   int nullify = INSN_ANNULLED_BRANCH_P (insn);
7108   int length = get_attr_length (insn);
7109   int xdelay;
7110 
7111   /* A conditional branch to the following instruction (e.g. the delay slot) is
7112      asking for a disaster.  I do not think this can happen as this pattern
7113      is only used when optimizing; jump optimization should eliminate the
7114      jump.  But be prepared just in case.  */
7115 
7116   if (branch_to_delay_slot_p (insn))
7117     return "nop";
7118 
7119   /* If this is a long branch with its delay slot unfilled, set `nullify'
7120      as it can nullify the delay slot and save a nop.  */
7121   if (length == 8 && dbr_sequence_length () == 0)
7122     nullify = 1;
7123 
7124   /* If this is a short forward conditional branch which did not get
7125      its delay slot filled, the delay slot can still be nullified.  */
7126   if (! nullify && length == 4 && dbr_sequence_length () == 0)
7127     nullify = forward_branch_p (insn);
7128 
7129   /* A forward branch over a single nullified insn can be done with a
7130      extrs instruction.  This avoids a single cycle penalty due to
7131      mis-predicted branch if we fall through (branch not taken).  */
7132   useskip = (length == 4 && nullify) ? use_skip_p (insn) : FALSE;
7133 
7134   switch (length)
7135     {
7136 
7137       /* All short conditional branches except backwards with an unfilled
7138 	 delay slot.  */
7139       case 4:
7140 	if (useskip)
7141 	  strcpy (buf, "{vextrs,|extrw,s,}");
7142 	else
7143 	  strcpy (buf, "{bvb,|bb,}");
7144 	if (useskip && GET_MODE (operands[0]) == DImode)
7145 	  strcpy (buf, "extrd,s,*");
7146 	else if (GET_MODE (operands[0]) == DImode)
7147 	  strcpy (buf, "bb,*");
7148 	if ((which == 0 && negated)
7149 	     || (which == 1 && ! negated))
7150 	  strcat (buf, ">=");
7151 	else
7152 	  strcat (buf, "<");
7153 	if (useskip)
7154 	  strcat (buf, "{ %0,1,%%r0| %0,%%sar,1,%%r0}");
7155 	else if (nullify && negated)
7156 	  {
7157 	    if (branch_needs_nop_p (insn))
7158 	      strcat (buf, "{,n %0,%3%#|,n %0,%%sar,%3%#}");
7159 	    else
7160 	      strcat (buf, "{,n %0,%3|,n %0,%%sar,%3}");
7161 	  }
7162 	else if (nullify && ! negated)
7163 	  {
7164 	    if (branch_needs_nop_p (insn))
7165 	      strcat (buf, "{,n %0,%2%#|,n %0,%%sar,%2%#}");
7166 	    else
7167 	      strcat (buf, "{,n %0,%2|,n %0,%%sar,%2}");
7168 	  }
7169 	else if (! nullify && negated)
7170 	  strcat (buf, "{ %0,%3| %0,%%sar,%3}");
7171 	else if (! nullify && ! negated)
7172 	  strcat (buf, "{ %0,%2| %0,%%sar,%2}");
7173 	break;
7174 
7175      /* All long conditionals.  Note a short backward branch with an
7176 	unfilled delay slot is treated just like a long backward branch
7177 	with an unfilled delay slot.  */
7178       case 8:
7179 	/* Handle weird backwards branch with a filled delay slot
7180 	   which is nullified.  */
7181 	if (dbr_sequence_length () != 0
7182 	    && ! forward_branch_p (insn)
7183 	    && nullify)
7184 	  {
7185 	    strcpy (buf, "{bvb,|bb,}");
7186 	    if (GET_MODE (operands[0]) == DImode)
7187 	      strcat (buf, "*");
7188 	    if ((which == 0 && negated)
7189 		|| (which == 1 && ! negated))
7190 	      strcat (buf, "<");
7191 	    else
7192 	      strcat (buf, ">=");
7193 	    if (negated)
7194 	      strcat (buf, "{,n %0,.+12\n\tb %3|,n %0,%%sar,.+12\n\tb %3}");
7195 	    else
7196 	      strcat (buf, "{,n %0,.+12\n\tb %2|,n %0,%%sar,.+12\n\tb %2}");
7197 	  }
7198 	/* Handle short backwards branch with an unfilled delay slot.
7199 	   Using a bb;nop rather than extrs;bl saves 1 cycle for both
7200 	   taken and untaken branches.  */
7201 	else if (dbr_sequence_length () == 0
7202 		 && ! forward_branch_p (insn)
7203 		 && INSN_ADDRESSES_SET_P ()
7204 		 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
7205 				    - INSN_ADDRESSES (INSN_UID (insn)) - 8))
7206 	  {
7207 	    strcpy (buf, "{bvb,|bb,}");
7208 	    if (GET_MODE (operands[0]) == DImode)
7209 	      strcat (buf, "*");
7210 	    if ((which == 0 && negated)
7211 		|| (which == 1 && ! negated))
7212 	      strcat (buf, ">=");
7213 	    else
7214 	      strcat (buf, "<");
7215 	    if (negated)
7216 	      strcat (buf, "{ %0,%3%#| %0,%%sar,%3%#}");
7217 	    else
7218 	      strcat (buf, "{ %0,%2%#| %0,%%sar,%2%#}");
7219 	  }
7220 	else
7221 	  {
7222 	    strcpy (buf, "{vextrs,|extrw,s,}");
7223 	    if (GET_MODE (operands[0]) == DImode)
7224 	      strcpy (buf, "extrd,s,*");
7225 	    if ((which == 0 && negated)
7226 		|| (which == 1 && ! negated))
7227 	      strcat (buf, "<");
7228 	    else
7229 	      strcat (buf, ">=");
7230 	    if (nullify && negated)
7231 	      strcat (buf, "{ %0,1,%%r0\n\tb,n %3| %0,%%sar,1,%%r0\n\tb,n %3}");
7232 	    else if (nullify && ! negated)
7233 	      strcat (buf, "{ %0,1,%%r0\n\tb,n %2| %0,%%sar,1,%%r0\n\tb,n %2}");
7234 	    else if (negated)
7235 	      strcat (buf, "{ %0,1,%%r0\n\tb %3| %0,%%sar,1,%%r0\n\tb %3}");
7236 	    else
7237 	      strcat (buf, "{ %0,1,%%r0\n\tb %2| %0,%%sar,1,%%r0\n\tb %2}");
7238 	  }
7239 	break;
7240 
7241       default:
7242 	/* The reversed conditional branch must branch over one additional
7243 	   instruction if the delay slot is filled and needs to be extracted
7244 	   by pa_output_lbranch.  If the delay slot is empty or this is a
7245 	   nullified forward branch, the instruction after the reversed
7246 	   condition branch must be nullified.  */
7247 	if (dbr_sequence_length () == 0
7248 	    || (nullify && forward_branch_p (insn)))
7249 	  {
7250 	    nullify = 1;
7251 	    xdelay = 0;
7252 	    operands[4] = GEN_INT (length);
7253 	  }
7254 	else
7255 	  {
7256 	    xdelay = 1;
7257 	    operands[4] = GEN_INT (length + 4);
7258 	  }
7259 
7260 	if (GET_MODE (operands[0]) == DImode)
7261 	  strcpy (buf, "bb,*");
7262 	else
7263 	  strcpy (buf, "{bvb,|bb,}");
7264 	if ((which == 0 && negated)
7265 	    || (which == 1 && !negated))
7266 	  strcat (buf, "<");
7267 	else
7268 	  strcat (buf, ">=");
7269 	if (nullify)
7270 	  strcat (buf, ",n {%0,.+%4|%0,%%sar,.+%4}");
7271 	else
7272 	  strcat (buf, " {%0,.+%4|%0,%%sar,.+%4}");
7273 	output_asm_insn (buf, operands);
7274 	return pa_output_lbranch (negated ? operands[3] : operands[2],
7275 				  insn, xdelay);
7276     }
7277   return buf;
7278 }
7279 
7280 /* Return the output template for emitting a dbra type insn.
7281 
7282    Note it may perform some output operations on its own before
7283    returning the final output string.  */
7284 const char *
pa_output_dbra(rtx * operands,rtx_insn * insn,int which_alternative)7285 pa_output_dbra (rtx *operands, rtx_insn *insn, int which_alternative)
7286 {
7287   int length = get_attr_length (insn);
7288 
7289   /* A conditional branch to the following instruction (e.g. the delay slot) is
7290      asking for a disaster.  Be prepared!  */
7291 
7292   if (branch_to_delay_slot_p (insn))
7293     {
7294       if (which_alternative == 0)
7295 	return "ldo %1(%0),%0";
7296       else if (which_alternative == 1)
7297 	{
7298 	  output_asm_insn ("{fstws|fstw} %0,-16(%%r30)", operands);
7299 	  output_asm_insn ("ldw -16(%%r30),%4", operands);
7300 	  output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands);
7301 	  return "{fldws|fldw} -16(%%r30),%0";
7302 	}
7303       else
7304 	{
7305 	  output_asm_insn ("ldw %0,%4", operands);
7306 	  return "ldo %1(%4),%4\n\tstw %4,%0";
7307 	}
7308     }
7309 
7310   if (which_alternative == 0)
7311     {
7312       int nullify = INSN_ANNULLED_BRANCH_P (insn);
7313       int xdelay;
7314 
7315       /* If this is a long branch with its delay slot unfilled, set `nullify'
7316 	 as it can nullify the delay slot and save a nop.  */
7317       if (length == 8 && dbr_sequence_length () == 0)
7318 	nullify = 1;
7319 
7320       /* If this is a short forward conditional branch which did not get
7321 	 its delay slot filled, the delay slot can still be nullified.  */
7322       if (! nullify && length == 4 && dbr_sequence_length () == 0)
7323 	nullify = forward_branch_p (insn);
7324 
7325       switch (length)
7326 	{
7327 	case 4:
7328 	  if (nullify)
7329 	    {
7330 	      if (branch_needs_nop_p (insn))
7331 		return "addib,%C2,n %1,%0,%3%#";
7332 	      else
7333 		return "addib,%C2,n %1,%0,%3";
7334 	    }
7335 	  else
7336 	    return "addib,%C2 %1,%0,%3";
7337 
7338 	case 8:
7339 	  /* Handle weird backwards branch with a fulled delay slot
7340 	     which is nullified.  */
7341 	  if (dbr_sequence_length () != 0
7342 	      && ! forward_branch_p (insn)
7343 	      && nullify)
7344 	    return "addib,%N2,n %1,%0,.+12\n\tb %3";
7345 	  /* Handle short backwards branch with an unfilled delay slot.
7346 	     Using a addb;nop rather than addi;bl saves 1 cycle for both
7347 	     taken and untaken branches.  */
7348 	  else if (dbr_sequence_length () == 0
7349 		   && ! forward_branch_p (insn)
7350 		   && INSN_ADDRESSES_SET_P ()
7351 		   && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
7352 				      - INSN_ADDRESSES (INSN_UID (insn)) - 8))
7353 	      return "addib,%C2 %1,%0,%3%#";
7354 
7355 	  /* Handle normal cases.  */
7356 	  if (nullify)
7357 	    return "addi,%N2 %1,%0,%0\n\tb,n %3";
7358 	  else
7359 	    return "addi,%N2 %1,%0,%0\n\tb %3";
7360 
7361 	default:
7362 	  /* The reversed conditional branch must branch over one additional
7363 	     instruction if the delay slot is filled and needs to be extracted
7364 	     by pa_output_lbranch.  If the delay slot is empty or this is a
7365 	     nullified forward branch, the instruction after the reversed
7366 	     condition branch must be nullified.  */
7367 	  if (dbr_sequence_length () == 0
7368 	      || (nullify && forward_branch_p (insn)))
7369 	    {
7370 	      nullify = 1;
7371 	      xdelay = 0;
7372 	      operands[4] = GEN_INT (length);
7373 	    }
7374 	  else
7375 	    {
7376 	      xdelay = 1;
7377 	      operands[4] = GEN_INT (length + 4);
7378 	    }
7379 
7380 	  if (nullify)
7381 	    output_asm_insn ("addib,%N2,n %1,%0,.+%4", operands);
7382 	  else
7383 	    output_asm_insn ("addib,%N2 %1,%0,.+%4", operands);
7384 
7385 	  return pa_output_lbranch (operands[3], insn, xdelay);
7386 	}
7387 
7388     }
7389   /* Deal with gross reload from FP register case.  */
7390   else if (which_alternative == 1)
7391     {
7392       /* Move loop counter from FP register to MEM then into a GR,
7393 	 increment the GR, store the GR into MEM, and finally reload
7394 	 the FP register from MEM from within the branch's delay slot.  */
7395       output_asm_insn ("{fstws|fstw} %0,-16(%%r30)\n\tldw -16(%%r30),%4",
7396 		       operands);
7397       output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands);
7398       if (length == 24)
7399 	return "{comb|cmpb},%S2 %%r0,%4,%3\n\t{fldws|fldw} -16(%%r30),%0";
7400       else if (length == 28)
7401 	return "{comclr|cmpclr},%B2 %%r0,%4,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
7402       else
7403 	{
7404 	  operands[5] = GEN_INT (length - 16);
7405 	  output_asm_insn ("{comb|cmpb},%B2 %%r0,%4,.+%5", operands);
7406 	  output_asm_insn ("{fldws|fldw} -16(%%r30),%0", operands);
7407 	  return pa_output_lbranch (operands[3], insn, 0);
7408 	}
7409     }
7410   /* Deal with gross reload from memory case.  */
7411   else
7412     {
7413       /* Reload loop counter from memory, the store back to memory
7414 	 happens in the branch's delay slot.  */
7415       output_asm_insn ("ldw %0,%4", operands);
7416       if (length == 12)
7417 	return "addib,%C2 %1,%4,%3\n\tstw %4,%0";
7418       else if (length == 16)
7419 	return "addi,%N2 %1,%4,%4\n\tb %3\n\tstw %4,%0";
7420       else
7421 	{
7422 	  operands[5] = GEN_INT (length - 4);
7423 	  output_asm_insn ("addib,%N2 %1,%4,.+%5\n\tstw %4,%0", operands);
7424 	  return pa_output_lbranch (operands[3], insn, 0);
7425 	}
7426     }
7427 }
7428 
7429 /* Return the output template for emitting a movb type insn.
7430 
7431    Note it may perform some output operations on its own before
7432    returning the final output string.  */
7433 const char *
pa_output_movb(rtx * operands,rtx_insn * insn,int which_alternative,int reverse_comparison)7434 pa_output_movb (rtx *operands, rtx_insn *insn, int which_alternative,
7435 	     int reverse_comparison)
7436 {
7437   int length = get_attr_length (insn);
7438 
7439   /* A conditional branch to the following instruction (e.g. the delay slot) is
7440      asking for a disaster.  Be prepared!  */
7441 
7442   if (branch_to_delay_slot_p (insn))
7443     {
7444       if (which_alternative == 0)
7445 	return "copy %1,%0";
7446       else if (which_alternative == 1)
7447 	{
7448 	  output_asm_insn ("stw %1,-16(%%r30)", operands);
7449 	  return "{fldws|fldw} -16(%%r30),%0";
7450 	}
7451       else if (which_alternative == 2)
7452 	return "stw %1,%0";
7453       else
7454 	return "mtsar %r1";
7455     }
7456 
7457   /* Support the second variant.  */
7458   if (reverse_comparison)
7459     PUT_CODE (operands[2], reverse_condition (GET_CODE (operands[2])));
7460 
7461   if (which_alternative == 0)
7462     {
7463       int nullify = INSN_ANNULLED_BRANCH_P (insn);
7464       int xdelay;
7465 
7466       /* If this is a long branch with its delay slot unfilled, set `nullify'
7467 	 as it can nullify the delay slot and save a nop.  */
7468       if (length == 8 && dbr_sequence_length () == 0)
7469 	nullify = 1;
7470 
7471       /* If this is a short forward conditional branch which did not get
7472 	 its delay slot filled, the delay slot can still be nullified.  */
7473       if (! nullify && length == 4 && dbr_sequence_length () == 0)
7474 	nullify = forward_branch_p (insn);
7475 
7476       switch (length)
7477 	{
7478 	case 4:
7479 	  if (nullify)
7480 	    {
7481 	      if (branch_needs_nop_p (insn))
7482 		return "movb,%C2,n %1,%0,%3%#";
7483 	      else
7484 		return "movb,%C2,n %1,%0,%3";
7485 	    }
7486 	  else
7487 	    return "movb,%C2 %1,%0,%3";
7488 
7489 	case 8:
7490 	  /* Handle weird backwards branch with a filled delay slot
7491 	     which is nullified.  */
7492 	  if (dbr_sequence_length () != 0
7493 	      && ! forward_branch_p (insn)
7494 	      && nullify)
7495 	    return "movb,%N2,n %1,%0,.+12\n\tb %3";
7496 
7497 	  /* Handle short backwards branch with an unfilled delay slot.
7498 	     Using a movb;nop rather than or;bl saves 1 cycle for both
7499 	     taken and untaken branches.  */
7500 	  else if (dbr_sequence_length () == 0
7501 		   && ! forward_branch_p (insn)
7502 		   && INSN_ADDRESSES_SET_P ()
7503 		   && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
7504 				      - INSN_ADDRESSES (INSN_UID (insn)) - 8))
7505 	    return "movb,%C2 %1,%0,%3%#";
7506 	  /* Handle normal cases.  */
7507 	  if (nullify)
7508 	    return "or,%N2 %1,%%r0,%0\n\tb,n %3";
7509 	  else
7510 	    return "or,%N2 %1,%%r0,%0\n\tb %3";
7511 
7512 	default:
7513 	  /* The reversed conditional branch must branch over one additional
7514 	     instruction if the delay slot is filled and needs to be extracted
7515 	     by pa_output_lbranch.  If the delay slot is empty or this is a
7516 	     nullified forward branch, the instruction after the reversed
7517 	     condition branch must be nullified.  */
7518 	  if (dbr_sequence_length () == 0
7519 	      || (nullify && forward_branch_p (insn)))
7520 	    {
7521 	      nullify = 1;
7522 	      xdelay = 0;
7523 	      operands[4] = GEN_INT (length);
7524 	    }
7525 	  else
7526 	    {
7527 	      xdelay = 1;
7528 	      operands[4] = GEN_INT (length + 4);
7529 	    }
7530 
7531 	  if (nullify)
7532 	    output_asm_insn ("movb,%N2,n %1,%0,.+%4", operands);
7533 	  else
7534 	    output_asm_insn ("movb,%N2 %1,%0,.+%4", operands);
7535 
7536 	  return pa_output_lbranch (operands[3], insn, xdelay);
7537 	}
7538     }
7539   /* Deal with gross reload for FP destination register case.  */
7540   else if (which_alternative == 1)
7541     {
7542       /* Move source register to MEM, perform the branch test, then
7543 	 finally load the FP register from MEM from within the branch's
7544 	 delay slot.  */
7545       output_asm_insn ("stw %1,-16(%%r30)", operands);
7546       if (length == 12)
7547 	return "{comb|cmpb},%S2 %%r0,%1,%3\n\t{fldws|fldw} -16(%%r30),%0";
7548       else if (length == 16)
7549 	return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
7550       else
7551 	{
7552 	  operands[4] = GEN_INT (length - 4);
7553 	  output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4", operands);
7554 	  output_asm_insn ("{fldws|fldw} -16(%%r30),%0", operands);
7555 	  return pa_output_lbranch (operands[3], insn, 0);
7556 	}
7557     }
7558   /* Deal with gross reload from memory case.  */
7559   else if (which_alternative == 2)
7560     {
7561       /* Reload loop counter from memory, the store back to memory
7562 	 happens in the branch's delay slot.  */
7563       if (length == 8)
7564 	return "{comb|cmpb},%S2 %%r0,%1,%3\n\tstw %1,%0";
7565       else if (length == 12)
7566 	return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tstw %1,%0";
7567       else
7568 	{
7569 	  operands[4] = GEN_INT (length);
7570 	  output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4\n\tstw %1,%0",
7571 			   operands);
7572 	  return pa_output_lbranch (operands[3], insn, 0);
7573 	}
7574     }
7575   /* Handle SAR as a destination.  */
7576   else
7577     {
7578       if (length == 8)
7579 	return "{comb|cmpb},%S2 %%r0,%1,%3\n\tmtsar %r1";
7580       else if (length == 12)
7581 	return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tmtsar %r1";
7582       else
7583 	{
7584 	  operands[4] = GEN_INT (length);
7585 	  output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4\n\tmtsar %r1",
7586 			   operands);
7587 	  return pa_output_lbranch (operands[3], insn, 0);
7588 	}
7589     }
7590 }
7591 
7592 /* Copy any FP arguments in INSN into integer registers.  */
7593 static void
copy_fp_args(rtx_insn * insn)7594 copy_fp_args (rtx_insn *insn)
7595 {
7596   rtx link;
7597   rtx xoperands[2];
7598 
7599   for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
7600     {
7601       int arg_mode, regno;
7602       rtx use = XEXP (link, 0);
7603 
7604       if (! (GET_CODE (use) == USE
7605 	  && GET_CODE (XEXP (use, 0)) == REG
7606 	  && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
7607 	continue;
7608 
7609       arg_mode = GET_MODE (XEXP (use, 0));
7610       regno = REGNO (XEXP (use, 0));
7611 
7612       /* Is it a floating point register?  */
7613       if (regno >= 32 && regno <= 39)
7614 	{
7615 	  /* Copy the FP register into an integer register via memory.  */
7616 	  if (arg_mode == SFmode)
7617 	    {
7618 	      xoperands[0] = XEXP (use, 0);
7619 	      xoperands[1] = gen_rtx_REG (SImode, 26 - (regno - 32) / 2);
7620 	      output_asm_insn ("{fstws|fstw} %0,-16(%%sr0,%%r30)", xoperands);
7621 	      output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
7622 	    }
7623 	  else
7624 	    {
7625 	      xoperands[0] = XEXP (use, 0);
7626 	      xoperands[1] = gen_rtx_REG (DImode, 25 - (regno - 34) / 2);
7627 	      output_asm_insn ("{fstds|fstd} %0,-16(%%sr0,%%r30)", xoperands);
7628 	      output_asm_insn ("ldw -12(%%sr0,%%r30),%R1", xoperands);
7629 	      output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
7630 	    }
7631 	}
7632     }
7633 }
7634 
7635 /* Compute length of the FP argument copy sequence for INSN.  */
7636 static int
length_fp_args(rtx_insn * insn)7637 length_fp_args (rtx_insn *insn)
7638 {
7639   int length = 0;
7640   rtx link;
7641 
7642   for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
7643     {
7644       int arg_mode, regno;
7645       rtx use = XEXP (link, 0);
7646 
7647       if (! (GET_CODE (use) == USE
7648 	  && GET_CODE (XEXP (use, 0)) == REG
7649 	  && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
7650 	continue;
7651 
7652       arg_mode = GET_MODE (XEXP (use, 0));
7653       regno = REGNO (XEXP (use, 0));
7654 
7655       /* Is it a floating point register?  */
7656       if (regno >= 32 && regno <= 39)
7657 	{
7658 	  if (arg_mode == SFmode)
7659 	    length += 8;
7660 	  else
7661 	    length += 12;
7662 	}
7663     }
7664 
7665   return length;
7666 }
7667 
7668 /* Return the attribute length for the millicode call instruction INSN.
7669    The length must match the code generated by pa_output_millicode_call.
7670    We include the delay slot in the returned length as it is better to
7671    over estimate the length than to under estimate it.  */
7672 
7673 int
pa_attr_length_millicode_call(rtx_insn * insn)7674 pa_attr_length_millicode_call (rtx_insn *insn)
7675 {
7676   unsigned long distance = -1;
7677   unsigned long total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
7678 
7679   if (INSN_ADDRESSES_SET_P ())
7680     {
7681       distance = (total + insn_current_reference_address (insn));
7682       if (distance < total)
7683 	distance = -1;
7684     }
7685 
7686   if (TARGET_64BIT)
7687     {
7688       if (!TARGET_LONG_CALLS && distance < 7600000)
7689 	return 8;
7690 
7691       return 20;
7692     }
7693   else if (TARGET_PORTABLE_RUNTIME)
7694     return 24;
7695   else
7696     {
7697       if (!TARGET_LONG_CALLS && distance < MAX_PCREL17F_OFFSET)
7698 	return 8;
7699 
7700       if (!flag_pic)
7701 	return 12;
7702 
7703       return 24;
7704     }
7705 }
7706 
7707 /* INSN is a function call.
7708 
7709    CALL_DEST is the routine we are calling.  */
7710 
7711 const char *
pa_output_millicode_call(rtx_insn * insn,rtx call_dest)7712 pa_output_millicode_call (rtx_insn *insn, rtx call_dest)
7713 {
7714   int attr_length = get_attr_length (insn);
7715   int seq_length = dbr_sequence_length ();
7716   rtx xoperands[4];
7717 
7718   xoperands[0] = call_dest;
7719 
7720   /* Handle the common case where we are sure that the branch will
7721      reach the beginning of the $CODE$ subspace.  The within reach
7722      form of the $$sh_func_adrs call has a length of 28.  Because it
7723      has an attribute type of sh_func_adrs, it never has a nonzero
7724      sequence length (i.e., the delay slot is never filled).  */
7725   if (!TARGET_LONG_CALLS
7726       && (attr_length == 8
7727 	  || (attr_length == 28
7728 	      && get_attr_type (insn) == TYPE_SH_FUNC_ADRS)))
7729     {
7730       xoperands[1] = gen_rtx_REG (Pmode, TARGET_64BIT ? 2 : 31);
7731       output_asm_insn ("{bl|b,l} %0,%1", xoperands);
7732     }
7733   else
7734     {
7735       if (TARGET_64BIT)
7736 	{
7737 	  /* It might seem that one insn could be saved by accessing
7738 	     the millicode function using the linkage table.  However,
7739 	     this doesn't work in shared libraries and other dynamically
7740 	     loaded objects.  Using a pc-relative sequence also avoids
7741 	     problems related to the implicit use of the gp register.  */
7742 	  xoperands[1] = gen_rtx_REG (Pmode, 1);
7743 	  xoperands[2] = xoperands[1];
7744 	  pa_output_pic_pcrel_sequence (xoperands);
7745 	  output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
7746 	}
7747       else if (TARGET_PORTABLE_RUNTIME)
7748 	{
7749 	  /* Pure portable runtime doesn't allow be/ble; we also don't
7750 	     have PIC support in the assembler/linker, so this sequence
7751 	     is needed.  */
7752 
7753 	  /* Get the address of our target into %r1.  */
7754 	  output_asm_insn ("ldil L'%0,%%r1", xoperands);
7755 	  output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands);
7756 
7757 	  /* Get our return address into %r31.  */
7758 	  output_asm_insn ("{bl|b,l} .+8,%%r31", xoperands);
7759 	  output_asm_insn ("addi 8,%%r31,%%r31", xoperands);
7760 
7761 	  /* Jump to our target address in %r1.  */
7762 	  output_asm_insn ("bv %%r0(%%r1)", xoperands);
7763 	}
7764       else if (!flag_pic)
7765 	{
7766 	  output_asm_insn ("ldil L'%0,%%r1", xoperands);
7767 	  if (TARGET_PA_20)
7768 	    output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31", xoperands);
7769 	  else
7770 	    output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands);
7771 	}
7772       else
7773 	{
7774 	  xoperands[1] = gen_rtx_REG (Pmode, 31);
7775 	  xoperands[2] = gen_rtx_REG (Pmode, 1);
7776 	  pa_output_pic_pcrel_sequence (xoperands);
7777 
7778 	  /* Adjust return address.  */
7779 	  output_asm_insn ("ldo {16|24}(%%r31),%%r31", xoperands);
7780 
7781 	  /* Jump to our target address in %r1.  */
7782 	  output_asm_insn ("bv %%r0(%%r1)", xoperands);
7783 	}
7784     }
7785 
7786   if (seq_length == 0)
7787     output_asm_insn ("nop", xoperands);
7788 
7789   return "";
7790 }
7791 
7792 /* Return the attribute length of the call instruction INSN.  The SIBCALL
7793    flag indicates whether INSN is a regular call or a sibling call.  The
7794    length returned must be longer than the code actually generated by
7795    pa_output_call.  Since branch shortening is done before delay branch
7796    sequencing, there is no way to determine whether or not the delay
7797    slot will be filled during branch shortening.  Even when the delay
7798    slot is filled, we may have to add a nop if the delay slot contains
7799    a branch that can't reach its target.  Thus, we always have to include
7800    the delay slot in the length estimate.  This used to be done in
7801    pa_adjust_insn_length but we do it here now as some sequences always
7802    fill the delay slot and we can save four bytes in the estimate for
7803    these sequences.  */
7804 
7805 int
pa_attr_length_call(rtx_insn * insn,int sibcall)7806 pa_attr_length_call (rtx_insn *insn, int sibcall)
7807 {
7808   int local_call;
7809   rtx call, call_dest;
7810   tree call_decl;
7811   int length = 0;
7812   rtx pat = PATTERN (insn);
7813   unsigned long distance = -1;
7814 
7815   gcc_assert (CALL_P (insn));
7816 
7817   if (INSN_ADDRESSES_SET_P ())
7818     {
7819       unsigned long total;
7820 
7821       total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
7822       distance = (total + insn_current_reference_address (insn));
7823       if (distance < total)
7824 	distance = -1;
7825     }
7826 
7827   gcc_assert (GET_CODE (pat) == PARALLEL);
7828 
7829   /* Get the call rtx.  */
7830   call = XVECEXP (pat, 0, 0);
7831   if (GET_CODE (call) == SET)
7832     call = SET_SRC (call);
7833 
7834   gcc_assert (GET_CODE (call) == CALL);
7835 
7836   /* Determine if this is a local call.  */
7837   call_dest = XEXP (XEXP (call, 0), 0);
7838   call_decl = SYMBOL_REF_DECL (call_dest);
7839   local_call = call_decl && targetm.binds_local_p (call_decl);
7840 
7841   /* pc-relative branch.  */
7842   if (!TARGET_LONG_CALLS
7843       && ((TARGET_PA_20 && !sibcall && distance < 7600000)
7844 	  || distance < MAX_PCREL17F_OFFSET))
7845     length += 8;
7846 
7847   /* 64-bit plabel sequence.  */
7848   else if (TARGET_64BIT && !local_call)
7849     length += 24;
7850 
7851   /* non-pic long absolute branch sequence.  */
7852   else if ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
7853     length += 12;
7854 
7855   /* long pc-relative branch sequence.  */
7856   else if (TARGET_LONG_PIC_SDIFF_CALL
7857 	   || (TARGET_GAS && !TARGET_SOM && local_call))
7858     {
7859       length += 20;
7860 
7861       if (!TARGET_PA_20 && !TARGET_NO_SPACE_REGS && (!local_call || flag_pic))
7862 	length += 8;
7863     }
7864 
7865   /* 32-bit plabel sequence.  */
7866   else
7867     {
7868       length += 32;
7869 
7870       if (TARGET_SOM)
7871 	length += length_fp_args (insn);
7872 
7873       if (flag_pic)
7874 	length += 4;
7875 
7876       if (!TARGET_PA_20)
7877 	{
7878 	  if (!sibcall)
7879 	    length += 8;
7880 
7881 	  if (!TARGET_NO_SPACE_REGS && (!local_call || flag_pic))
7882 	    length += 8;
7883 	}
7884     }
7885 
7886   return length;
7887 }
7888 
7889 /* INSN is a function call.
7890 
7891    CALL_DEST is the routine we are calling.  */
7892 
7893 const char *
pa_output_call(rtx_insn * insn,rtx call_dest,int sibcall)7894 pa_output_call (rtx_insn *insn, rtx call_dest, int sibcall)
7895 {
7896   int seq_length = dbr_sequence_length ();
7897   tree call_decl = SYMBOL_REF_DECL (call_dest);
7898   int local_call = call_decl && targetm.binds_local_p (call_decl);
7899   rtx xoperands[4];
7900 
7901   xoperands[0] = call_dest;
7902 
7903   /* Handle the common case where we're sure that the branch will reach
7904      the beginning of the "$CODE$" subspace.  This is the beginning of
7905      the current function if we are in a named section.  */
7906   if (!TARGET_LONG_CALLS && pa_attr_length_call (insn, sibcall) == 8)
7907     {
7908       xoperands[1] = gen_rtx_REG (word_mode, sibcall ? 0 : 2);
7909       output_asm_insn ("{bl|b,l} %0,%1", xoperands);
7910     }
7911   else
7912     {
7913       if (TARGET_64BIT && !local_call)
7914 	{
7915 	  /* ??? As far as I can tell, the HP linker doesn't support the
7916 	     long pc-relative sequence described in the 64-bit runtime
7917 	     architecture.  So, we use a slightly longer indirect call.  */
7918 	  xoperands[0] = pa_get_deferred_plabel (call_dest);
7919 	  xoperands[1] = gen_label_rtx ();
7920 
7921 	  /* Put the load of %r27 into the delay slot.  We don't need to
7922 	     do anything when generating fast indirect calls.  */
7923 	  if (seq_length != 0)
7924 	    {
7925 	      final_scan_insn (NEXT_INSN (insn), asm_out_file,
7926 			       optimize, 0, NULL);
7927 
7928 	      /* Now delete the delay insn.  */
7929 	      SET_INSN_DELETED (NEXT_INSN (insn));
7930 	    }
7931 
7932 	  output_asm_insn ("addil LT'%0,%%r27", xoperands);
7933 	  output_asm_insn ("ldd RT'%0(%%r1),%%r1", xoperands);
7934 	  output_asm_insn ("ldd 0(%%r1),%%r1", xoperands);
7935 	  output_asm_insn ("ldd 16(%%r1),%%r2", xoperands);
7936 	  output_asm_insn ("bve,l (%%r2),%%r2", xoperands);
7937 	  output_asm_insn ("ldd 24(%%r1),%%r27", xoperands);
7938 	  seq_length = 1;
7939 	}
7940       else
7941 	{
7942 	  int indirect_call = 0;
7943 
7944 	  /* Emit a long call.  There are several different sequences
7945 	     of increasing length and complexity.  In most cases,
7946              they don't allow an instruction in the delay slot.  */
7947 	  if (!((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
7948 	      && !TARGET_LONG_PIC_SDIFF_CALL
7949 	      && !(TARGET_GAS && !TARGET_SOM && local_call)
7950 	      && !TARGET_64BIT)
7951 	    indirect_call = 1;
7952 
7953 	  if (seq_length != 0
7954 	      && !sibcall
7955 	      && (!TARGET_PA_20
7956 		  || indirect_call
7957 		  || ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)))
7958 	    {
7959 	      /* A non-jump insn in the delay slot.  By definition we can
7960 		 emit this insn before the call (and in fact before argument
7961 		 relocating.  */
7962 	      final_scan_insn (NEXT_INSN (insn), asm_out_file, optimize, 0,
7963 			       NULL);
7964 
7965 	      /* Now delete the delay insn.  */
7966 	      SET_INSN_DELETED (NEXT_INSN (insn));
7967 	      seq_length = 0;
7968 	    }
7969 
7970 	  if ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
7971 	    {
7972 	      /* This is the best sequence for making long calls in
7973 		 non-pic code.  Unfortunately, GNU ld doesn't provide
7974 		 the stub needed for external calls, and GAS's support
7975 		 for this with the SOM linker is buggy.  It is safe
7976 		 to use this for local calls.  */
7977 	      output_asm_insn ("ldil L'%0,%%r1", xoperands);
7978 	      if (sibcall)
7979 		output_asm_insn ("be R'%0(%%sr4,%%r1)", xoperands);
7980 	      else
7981 		{
7982 		  if (TARGET_PA_20)
7983 		    output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31",
7984 				     xoperands);
7985 		  else
7986 		    output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands);
7987 
7988 		  output_asm_insn ("copy %%r31,%%r2", xoperands);
7989 		  seq_length = 1;
7990 		}
7991 	    }
7992 	  else
7993 	    {
7994 	      /* The HP assembler and linker can handle relocations for
7995 		 the difference of two symbols.  The HP assembler
7996 		 recognizes the sequence as a pc-relative call and
7997 		 the linker provides stubs when needed.  */
7998 
7999 	      /* GAS currently can't generate the relocations that
8000 		 are needed for the SOM linker under HP-UX using this
8001 		 sequence.  The GNU linker doesn't generate the stubs
8002 		 that are needed for external calls on TARGET_ELF32
8003 		 with this sequence.  For now, we have to use a longer
8004 	         plabel sequence when using GAS for non local calls.  */
8005 	      if (TARGET_LONG_PIC_SDIFF_CALL
8006 		  || (TARGET_GAS && !TARGET_SOM && local_call))
8007 		{
8008 		  xoperands[1] = gen_rtx_REG (Pmode, 1);
8009 		  xoperands[2] = xoperands[1];
8010 		  pa_output_pic_pcrel_sequence (xoperands);
8011 		}
8012 	      else
8013 		{
8014 		  /* Emit a long plabel-based call sequence.  This is
8015 		     essentially an inline implementation of $$dyncall.
8016 		     We don't actually try to call $$dyncall as this is
8017 		     as difficult as calling the function itself.  */
8018 		  xoperands[0] = pa_get_deferred_plabel (call_dest);
8019 		  xoperands[1] = gen_label_rtx ();
8020 
8021 		  /* Since the call is indirect, FP arguments in registers
8022 		     need to be copied to the general registers.  Then, the
8023 		     argument relocation stub will copy them back.  */
8024 		  if (TARGET_SOM)
8025 		    copy_fp_args (insn);
8026 
8027 		  if (flag_pic)
8028 		    {
8029 		      output_asm_insn ("addil LT'%0,%%r19", xoperands);
8030 		      output_asm_insn ("ldw RT'%0(%%r1),%%r1", xoperands);
8031 		      output_asm_insn ("ldw 0(%%r1),%%r22", xoperands);
8032 		    }
8033 		  else
8034 		    {
8035 		      output_asm_insn ("addil LR'%0-$global$,%%r27",
8036 				       xoperands);
8037 		      output_asm_insn ("ldw RR'%0-$global$(%%r1),%%r22",
8038 				       xoperands);
8039 		    }
8040 
8041 		  output_asm_insn ("bb,>=,n %%r22,30,.+16", xoperands);
8042 		  output_asm_insn ("depi 0,31,2,%%r22", xoperands);
8043 		  /* Should this be an ordered load to ensure the target
8044 	             address is loaded before the global pointer?  */
8045 		  output_asm_insn ("ldw 0(%%r22),%%r1", xoperands);
8046 		  output_asm_insn ("ldw 4(%%r22),%%r19", xoperands);
8047 
8048 		  if (!sibcall && !TARGET_PA_20)
8049 		    {
8050 		      output_asm_insn ("{bl|b,l} .+8,%%r2", xoperands);
8051 		      if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic))
8052 			output_asm_insn ("addi 8,%%r2,%%r2", xoperands);
8053 		      else
8054 			output_asm_insn ("addi 16,%%r2,%%r2", xoperands);
8055 		    }
8056 		}
8057 
8058 	      if (TARGET_PA_20)
8059 		{
8060 		  if (sibcall)
8061 		    output_asm_insn ("bve (%%r1)", xoperands);
8062 		  else
8063 		    {
8064 		      if (indirect_call)
8065 			{
8066 			  output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
8067 			  output_asm_insn ("stw %%r2,-24(%%sp)", xoperands);
8068 			  seq_length = 1;
8069 			}
8070 		      else
8071 			output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
8072 		    }
8073 		}
8074 	      else
8075 		{
8076 		  if (!TARGET_NO_SPACE_REGS && (!local_call || flag_pic))
8077 		    output_asm_insn ("ldsid (%%r1),%%r31\n\tmtsp %%r31,%%sr0",
8078 				     xoperands);
8079 
8080 		  if (sibcall)
8081 		    {
8082 		      if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic))
8083 			output_asm_insn ("be 0(%%sr4,%%r1)", xoperands);
8084 		      else
8085 			output_asm_insn ("be 0(%%sr0,%%r1)", xoperands);
8086 		    }
8087 		  else
8088 		    {
8089 		      if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic))
8090 			output_asm_insn ("ble 0(%%sr4,%%r1)", xoperands);
8091 		      else
8092 			output_asm_insn ("ble 0(%%sr0,%%r1)", xoperands);
8093 
8094 		      if (indirect_call)
8095 			output_asm_insn ("stw %%r31,-24(%%sp)", xoperands);
8096 		      else
8097 			output_asm_insn ("copy %%r31,%%r2", xoperands);
8098 		      seq_length = 1;
8099 		    }
8100 		}
8101 	    }
8102 	}
8103     }
8104 
8105   if (seq_length == 0)
8106     output_asm_insn ("nop", xoperands);
8107 
8108   return "";
8109 }
8110 
8111 /* Return the attribute length of the indirect call instruction INSN.
8112    The length must match the code generated by output_indirect call.
8113    The returned length includes the delay slot.  Currently, the delay
8114    slot of an indirect call sequence is not exposed and it is used by
8115    the sequence itself.  */
8116 
8117 int
pa_attr_length_indirect_call(rtx_insn * insn)8118 pa_attr_length_indirect_call (rtx_insn *insn)
8119 {
8120   unsigned long distance = -1;
8121   unsigned long total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
8122 
8123   if (INSN_ADDRESSES_SET_P ())
8124     {
8125       distance = (total + insn_current_reference_address (insn));
8126       if (distance < total)
8127 	distance = -1;
8128     }
8129 
8130   if (TARGET_64BIT)
8131     return 12;
8132 
8133   if (TARGET_FAST_INDIRECT_CALLS)
8134     return 8;
8135 
8136   if (TARGET_PORTABLE_RUNTIME)
8137     return 16;
8138 
8139   if (!TARGET_LONG_CALLS
8140       && ((TARGET_PA_20 && !TARGET_SOM && distance < 7600000)
8141 	  || distance < MAX_PCREL17F_OFFSET))
8142     return 8;
8143 
8144   /* Out of reach, can use ble.  */
8145   if (!flag_pic)
8146     return 12;
8147 
8148   /* Inline versions of $$dyncall.  */
8149   if (!optimize_size)
8150     {
8151       if (TARGET_NO_SPACE_REGS)
8152 	return 28;
8153 
8154       if (TARGET_PA_20)
8155 	return 32;
8156     }
8157 
8158   /* Long PIC pc-relative call.  */
8159   return 20;
8160 }
8161 
8162 const char *
pa_output_indirect_call(rtx_insn * insn,rtx call_dest)8163 pa_output_indirect_call (rtx_insn *insn, rtx call_dest)
8164 {
8165   rtx xoperands[4];
8166   int length;
8167 
8168   if (TARGET_64BIT)
8169     {
8170       xoperands[0] = call_dest;
8171       output_asm_insn ("ldd 16(%0),%%r2\n\t"
8172 		       "bve,l (%%r2),%%r2\n\t"
8173 		       "ldd 24(%0),%%r27", xoperands);
8174       return "";
8175     }
8176 
8177   /* First the special case for kernels, level 0 systems, etc.  */
8178   if (TARGET_FAST_INDIRECT_CALLS)
8179     {
8180       pa_output_arg_descriptor (insn);
8181       if (TARGET_PA_20)
8182 	return "bve,l,n (%%r22),%%r2\n\tnop";
8183       return "ble 0(%%sr4,%%r22)\n\tcopy %%r31,%%r2";
8184     }
8185 
8186   if (TARGET_PORTABLE_RUNTIME)
8187     {
8188       output_asm_insn ("ldil L'$$dyncall,%%r31\n\t"
8189 		       "ldo R'$$dyncall(%%r31),%%r31", xoperands);
8190       pa_output_arg_descriptor (insn);
8191       return "blr %%r0,%%r2\n\tbv,n %%r0(%%r31)";
8192     }
8193 
8194   /* Now the normal case -- we can reach $$dyncall directly or
8195      we're sure that we can get there via a long-branch stub.
8196 
8197      No need to check target flags as the length uniquely identifies
8198      the remaining cases.  */
8199   length = pa_attr_length_indirect_call (insn);
8200   if (length == 8)
8201     {
8202       pa_output_arg_descriptor (insn);
8203 
8204       /* The HP linker sometimes substitutes a BLE for BL/B,L calls to
8205 	 $$dyncall.  Since BLE uses %r31 as the link register, the 22-bit
8206 	 variant of the B,L instruction can't be used on the SOM target.  */
8207       if (TARGET_PA_20 && !TARGET_SOM)
8208 	return "b,l,n $$dyncall,%%r2\n\tnop";
8209       else
8210 	return "bl $$dyncall,%%r31\n\tcopy %%r31,%%r2";
8211     }
8212 
8213   /* Long millicode call, but we are not generating PIC or portable runtime
8214      code.  */
8215   if (length == 12)
8216     {
8217       output_asm_insn ("ldil L'$$dyncall,%%r2", xoperands);
8218       pa_output_arg_descriptor (insn);
8219       return "ble R'$$dyncall(%%sr4,%%r2)\n\tcopy %%r31,%%r2";
8220     }
8221 
8222   /* The long PIC pc-relative call sequence is five instructions.  So,
8223      let's use an inline version of $$dyncall when the calling sequence
8224      has a roughly similar number of instructions and we are not optimizing
8225      for size.  We need two instructions to load the return pointer plus
8226      the $$dyncall implementation.  */
8227   if (!optimize_size)
8228     {
8229       if (TARGET_NO_SPACE_REGS)
8230 	{
8231 	  pa_output_arg_descriptor (insn);
8232 	  output_asm_insn ("bl .+8,%%r2\n\t"
8233 			   "ldo 20(%%r2),%%r2\n\t"
8234 			   "extru,<> %%r22,30,1,%%r0\n\t"
8235 			   "bv,n %%r0(%%r22)\n\t"
8236 			   "ldw -2(%%r22),%%r21\n\t"
8237 			   "bv %%r0(%%r21)\n\t"
8238 			   "ldw 2(%%r22),%%r19", xoperands);
8239 	  return "";
8240 	}
8241       if (TARGET_PA_20)
8242 	{
8243 	  pa_output_arg_descriptor (insn);
8244 	  output_asm_insn ("bl .+8,%%r2\n\t"
8245 			   "ldo 24(%%r2),%%r2\n\t"
8246 			   "stw %%r2,-24(%%sp)\n\t"
8247 			   "extru,<> %r22,30,1,%%r0\n\t"
8248 			   "bve,n (%%r22)\n\t"
8249 			   "ldw -2(%%r22),%%r21\n\t"
8250 			   "bve (%%r21)\n\t"
8251 			   "ldw 2(%%r22),%%r19", xoperands);
8252 	  return "";
8253 	}
8254     }
8255 
8256   /* We need a long PIC call to $$dyncall.  */
8257   xoperands[0] = gen_rtx_SYMBOL_REF (Pmode, "$$dyncall");
8258   xoperands[1] = gen_rtx_REG (Pmode, 2);
8259   xoperands[2] = gen_rtx_REG (Pmode, 1);
8260   pa_output_pic_pcrel_sequence (xoperands);
8261   pa_output_arg_descriptor (insn);
8262   return "bv %%r0(%%r1)\n\tldo {12|20}(%%r2),%%r2";
8263 }
8264 
8265 /* In HPUX 8.0's shared library scheme, special relocations are needed
8266    for function labels if they might be passed to a function
8267    in a shared library (because shared libraries don't live in code
8268    space), and special magic is needed to construct their address.  */
8269 
8270 void
pa_encode_label(rtx sym)8271 pa_encode_label (rtx sym)
8272 {
8273   const char *str = XSTR (sym, 0);
8274   int len = strlen (str) + 1;
8275   char *newstr, *p;
8276 
8277   p = newstr = XALLOCAVEC (char, len + 1);
8278   *p++ = '@';
8279   strcpy (p, str);
8280 
8281   XSTR (sym, 0) = ggc_alloc_string (newstr, len);
8282 }
8283 
8284 static void
pa_encode_section_info(tree decl,rtx rtl,int first)8285 pa_encode_section_info (tree decl, rtx rtl, int first)
8286 {
8287   int old_referenced = 0;
8288 
8289   if (!first && MEM_P (rtl) && GET_CODE (XEXP (rtl, 0)) == SYMBOL_REF)
8290     old_referenced
8291       = SYMBOL_REF_FLAGS (XEXP (rtl, 0)) & SYMBOL_FLAG_REFERENCED;
8292 
8293   default_encode_section_info (decl, rtl, first);
8294 
8295   if (first && TEXT_SPACE_P (decl))
8296     {
8297       SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
8298       if (TREE_CODE (decl) == FUNCTION_DECL)
8299 	pa_encode_label (XEXP (rtl, 0));
8300     }
8301   else if (old_referenced)
8302     SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= old_referenced;
8303 }
8304 
8305 /* This is sort of inverse to pa_encode_section_info.  */
8306 
8307 static const char *
pa_strip_name_encoding(const char * str)8308 pa_strip_name_encoding (const char *str)
8309 {
8310   str += (*str == '@');
8311   str += (*str == '*');
8312   return str;
8313 }
8314 
8315 /* Returns 1 if OP is a function label involved in a simple addition
8316    with a constant.  Used to keep certain patterns from matching
8317    during instruction combination.  */
8318 int
pa_is_function_label_plus_const(rtx op)8319 pa_is_function_label_plus_const (rtx op)
8320 {
8321   /* Strip off any CONST.  */
8322   if (GET_CODE (op) == CONST)
8323     op = XEXP (op, 0);
8324 
8325   return (GET_CODE (op) == PLUS
8326 	  && function_label_operand (XEXP (op, 0), VOIDmode)
8327 	  && GET_CODE (XEXP (op, 1)) == CONST_INT);
8328 }
8329 
8330 /* Output assembly code for a thunk to FUNCTION.  */
8331 
8332 static void
pa_asm_output_mi_thunk(FILE * file,tree thunk_fndecl,HOST_WIDE_INT delta,HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,tree function)8333 pa_asm_output_mi_thunk (FILE *file, tree thunk_fndecl, HOST_WIDE_INT delta,
8334 			HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
8335 			tree function)
8336 {
8337   const char *fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk_fndecl));
8338   static unsigned int current_thunk_number;
8339   int val_14 = VAL_14_BITS_P (delta);
8340   unsigned int old_last_address = last_address, nbytes = 0;
8341   char label[17];
8342   rtx xoperands[4];
8343 
8344   xoperands[0] = XEXP (DECL_RTL (function), 0);
8345   xoperands[1] = XEXP (DECL_RTL (thunk_fndecl), 0);
8346   xoperands[2] = GEN_INT (delta);
8347 
8348   assemble_start_function (thunk_fndecl, fnname);
8349   final_start_function (emit_barrier (), file, 1);
8350 
8351   /* Output the thunk.  We know that the function is in the same
8352      translation unit (i.e., the same space) as the thunk, and that
8353      thunks are output after their method.  Thus, we don't need an
8354      external branch to reach the function.  With SOM and GAS,
8355      functions and thunks are effectively in different sections.
8356      Thus, we can always use a IA-relative branch and the linker
8357      will add a long branch stub if necessary.
8358 
8359      However, we have to be careful when generating PIC code on the
8360      SOM port to ensure that the sequence does not transfer to an
8361      import stub for the target function as this could clobber the
8362      return value saved at SP-24.  This would also apply to the
8363      32-bit linux port if the multi-space model is implemented.  */
8364   if ((!TARGET_LONG_CALLS && TARGET_SOM && !TARGET_PORTABLE_RUNTIME
8365        && !(flag_pic && TREE_PUBLIC (function))
8366        && (TARGET_GAS || last_address < 262132))
8367       || (!TARGET_LONG_CALLS && !TARGET_SOM && !TARGET_PORTABLE_RUNTIME
8368 	  && ((targetm_common.have_named_sections
8369 	       && DECL_SECTION_NAME (thunk_fndecl) != NULL
8370 	       /* The GNU 64-bit linker has rather poor stub management.
8371 		  So, we use a long branch from thunks that aren't in
8372 		  the same section as the target function.  */
8373 	       && ((!TARGET_64BIT
8374 		    && (DECL_SECTION_NAME (thunk_fndecl)
8375 			!= DECL_SECTION_NAME (function)))
8376 		   || ((DECL_SECTION_NAME (thunk_fndecl)
8377 			== DECL_SECTION_NAME (function))
8378 		       && last_address < 262132)))
8379 	      /* In this case, we need to be able to reach the start of
8380 		 the stub table even though the function is likely closer
8381 		 and can be jumped to directly.  */
8382 	      || (targetm_common.have_named_sections
8383 		  && DECL_SECTION_NAME (thunk_fndecl) == NULL
8384 		  && DECL_SECTION_NAME (function) == NULL
8385 		  && total_code_bytes < MAX_PCREL17F_OFFSET)
8386 	      /* Likewise.  */
8387 	      || (!targetm_common.have_named_sections
8388 		  && total_code_bytes < MAX_PCREL17F_OFFSET))))
8389     {
8390       if (!val_14)
8391 	output_asm_insn ("addil L'%2,%%r26", xoperands);
8392 
8393       output_asm_insn ("b %0", xoperands);
8394 
8395       if (val_14)
8396 	{
8397 	  output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8398 	  nbytes += 8;
8399 	}
8400       else
8401 	{
8402 	  output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8403 	  nbytes += 12;
8404 	}
8405     }
8406   else if (TARGET_64BIT)
8407     {
8408       rtx xop[4];
8409 
8410       /* We only have one call-clobbered scratch register, so we can't
8411          make use of the delay slot if delta doesn't fit in 14 bits.  */
8412       if (!val_14)
8413 	{
8414 	  output_asm_insn ("addil L'%2,%%r26", xoperands);
8415 	  output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8416 	}
8417 
8418       /* Load function address into %r1.  */
8419       xop[0] = xoperands[0];
8420       xop[1] = gen_rtx_REG (Pmode, 1);
8421       xop[2] = xop[1];
8422       pa_output_pic_pcrel_sequence (xop);
8423 
8424       if (val_14)
8425 	{
8426 	  output_asm_insn ("bv %%r0(%%r1)", xoperands);
8427 	  output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8428 	  nbytes += 20;
8429 	}
8430       else
8431 	{
8432 	  output_asm_insn ("bv,n %%r0(%%r1)", xoperands);
8433 	  nbytes += 24;
8434 	}
8435     }
8436   else if (TARGET_PORTABLE_RUNTIME)
8437     {
8438       output_asm_insn ("ldil L'%0,%%r1", xoperands);
8439       output_asm_insn ("ldo R'%0(%%r1),%%r22", xoperands);
8440 
8441       if (!val_14)
8442 	output_asm_insn ("ldil L'%2,%%r26", xoperands);
8443 
8444       output_asm_insn ("bv %%r0(%%r22)", xoperands);
8445 
8446       if (val_14)
8447 	{
8448 	  output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8449 	  nbytes += 16;
8450 	}
8451       else
8452 	{
8453 	  output_asm_insn ("ldo R'%2(%%r26),%%r26", xoperands);
8454 	  nbytes += 20;
8455 	}
8456     }
8457   else if (TARGET_SOM && flag_pic && TREE_PUBLIC (function))
8458     {
8459       /* The function is accessible from outside this module.  The only
8460 	 way to avoid an import stub between the thunk and function is to
8461 	 call the function directly with an indirect sequence similar to
8462 	 that used by $$dyncall.  This is possible because $$dyncall acts
8463 	 as the import stub in an indirect call.  */
8464       ASM_GENERATE_INTERNAL_LABEL (label, "LTHN", current_thunk_number);
8465       xoperands[3] = gen_rtx_SYMBOL_REF (Pmode, label);
8466       output_asm_insn ("addil LT'%3,%%r19", xoperands);
8467       output_asm_insn ("ldw RT'%3(%%r1),%%r22", xoperands);
8468       output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands);
8469       output_asm_insn ("bb,>=,n %%r22,30,.+16", xoperands);
8470       output_asm_insn ("depi 0,31,2,%%r22", xoperands);
8471       output_asm_insn ("ldw 4(%%sr0,%%r22),%%r19", xoperands);
8472       output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands);
8473 
8474       if (!val_14)
8475 	{
8476 	  output_asm_insn ("addil L'%2,%%r26", xoperands);
8477 	  nbytes += 4;
8478 	}
8479 
8480       if (TARGET_PA_20)
8481 	{
8482 	  output_asm_insn ("bve (%%r22)", xoperands);
8483 	  nbytes += 36;
8484 	}
8485       else if (TARGET_NO_SPACE_REGS)
8486 	{
8487 	  output_asm_insn ("be 0(%%sr4,%%r22)", xoperands);
8488 	  nbytes += 36;
8489 	}
8490       else
8491 	{
8492 	  output_asm_insn ("ldsid (%%sr0,%%r22),%%r21", xoperands);
8493 	  output_asm_insn ("mtsp %%r21,%%sr0", xoperands);
8494 	  output_asm_insn ("be 0(%%sr0,%%r22)", xoperands);
8495 	  nbytes += 44;
8496 	}
8497 
8498       if (val_14)
8499 	output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8500       else
8501 	output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8502     }
8503   else if (flag_pic)
8504     {
8505       rtx xop[4];
8506 
8507       /* Load function address into %r22.  */
8508       xop[0] = xoperands[0];
8509       xop[1] = gen_rtx_REG (Pmode, 1);
8510       xop[2] = gen_rtx_REG (Pmode, 22);
8511       pa_output_pic_pcrel_sequence (xop);
8512 
8513       if (!val_14)
8514 	output_asm_insn ("addil L'%2,%%r26", xoperands);
8515 
8516       output_asm_insn ("bv %%r0(%%r22)", xoperands);
8517 
8518       if (val_14)
8519 	{
8520 	  output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8521 	  nbytes += 20;
8522 	}
8523       else
8524 	{
8525 	  output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8526 	  nbytes += 24;
8527 	}
8528     }
8529   else
8530     {
8531       if (!val_14)
8532 	output_asm_insn ("addil L'%2,%%r26", xoperands);
8533 
8534       output_asm_insn ("ldil L'%0,%%r22", xoperands);
8535       output_asm_insn ("be R'%0(%%sr4,%%r22)", xoperands);
8536 
8537       if (val_14)
8538 	{
8539 	  output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8540 	  nbytes += 12;
8541 	}
8542       else
8543 	{
8544 	  output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8545 	  nbytes += 16;
8546 	}
8547     }
8548 
8549   final_end_function ();
8550 
8551   if (TARGET_SOM && flag_pic && TREE_PUBLIC (function))
8552     {
8553       switch_to_section (data_section);
8554       output_asm_insn (".align 4", xoperands);
8555       ASM_OUTPUT_LABEL (file, label);
8556       output_asm_insn (".word P'%0", xoperands);
8557     }
8558 
8559   current_thunk_number++;
8560   nbytes = ((nbytes + FUNCTION_BOUNDARY / BITS_PER_UNIT - 1)
8561 	    & ~(FUNCTION_BOUNDARY / BITS_PER_UNIT - 1));
8562   last_address += nbytes;
8563   if (old_last_address > last_address)
8564     last_address = UINT_MAX;
8565   update_total_code_bytes (nbytes);
8566   assemble_end_function (thunk_fndecl, fnname);
8567 }
8568 
8569 /* Only direct calls to static functions are allowed to be sibling (tail)
8570    call optimized.
8571 
8572    This restriction is necessary because some linker generated stubs will
8573    store return pointers into rp' in some cases which might clobber a
8574    live value already in rp'.
8575 
8576    In a sibcall the current function and the target function share stack
8577    space.  Thus if the path to the current function and the path to the
8578    target function save a value in rp', they save the value into the
8579    same stack slot, which has undesirable consequences.
8580 
8581    Because of the deferred binding nature of shared libraries any function
8582    with external scope could be in a different load module and thus require
8583    rp' to be saved when calling that function.  So sibcall optimizations
8584    can only be safe for static function.
8585 
8586    Note that GCC never needs return value relocations, so we don't have to
8587    worry about static calls with return value relocations (which require
8588    saving rp').
8589 
8590    It is safe to perform a sibcall optimization when the target function
8591    will never return.  */
8592 static bool
pa_function_ok_for_sibcall(tree decl,tree exp ATTRIBUTE_UNUSED)8593 pa_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
8594 {
8595   /* Sibcalls are not ok because the arg pointer register is not a fixed
8596      register.  This prevents the sibcall optimization from occurring.  In
8597      addition, there are problems with stub placement using GNU ld.  This
8598      is because a normal sibcall branch uses a 17-bit relocation while
8599      a regular call branch uses a 22-bit relocation.  As a result, more
8600      care needs to be taken in the placement of long-branch stubs.  */
8601   if (TARGET_64BIT)
8602     return false;
8603 
8604   if (TARGET_PORTABLE_RUNTIME)
8605     return false;
8606 
8607   /* Sibcalls are only ok within a translation unit.  */
8608   return decl && targetm.binds_local_p (decl);
8609 }
8610 
8611 /* ??? Addition is not commutative on the PA due to the weird implicit
8612    space register selection rules for memory addresses.  Therefore, we
8613    don't consider a + b == b + a, as this might be inside a MEM.  */
8614 static bool
pa_commutative_p(const_rtx x,int outer_code)8615 pa_commutative_p (const_rtx x, int outer_code)
8616 {
8617   return (COMMUTATIVE_P (x)
8618 	  && (TARGET_NO_SPACE_REGS
8619 	      || (outer_code != UNKNOWN && outer_code != MEM)
8620 	      || GET_CODE (x) != PLUS));
8621 }
8622 
8623 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
8624    use in fmpyadd instructions.  */
8625 int
pa_fmpyaddoperands(rtx * operands)8626 pa_fmpyaddoperands (rtx *operands)
8627 {
8628   machine_mode mode = GET_MODE (operands[0]);
8629 
8630   /* Must be a floating point mode.  */
8631   if (mode != SFmode && mode != DFmode)
8632     return 0;
8633 
8634   /* All modes must be the same.  */
8635   if (! (mode == GET_MODE (operands[1])
8636 	 && mode == GET_MODE (operands[2])
8637 	 && mode == GET_MODE (operands[3])
8638 	 && mode == GET_MODE (operands[4])
8639 	 && mode == GET_MODE (operands[5])))
8640     return 0;
8641 
8642   /* All operands must be registers.  */
8643   if (! (GET_CODE (operands[1]) == REG
8644 	 && GET_CODE (operands[2]) == REG
8645 	 && GET_CODE (operands[3]) == REG
8646 	 && GET_CODE (operands[4]) == REG
8647 	 && GET_CODE (operands[5]) == REG))
8648     return 0;
8649 
8650   /* Only 2 real operands to the addition.  One of the input operands must
8651      be the same as the output operand.  */
8652   if (! rtx_equal_p (operands[3], operands[4])
8653       && ! rtx_equal_p (operands[3], operands[5]))
8654     return 0;
8655 
8656   /* Inout operand of add cannot conflict with any operands from multiply.  */
8657   if (rtx_equal_p (operands[3], operands[0])
8658      || rtx_equal_p (operands[3], operands[1])
8659      || rtx_equal_p (operands[3], operands[2]))
8660     return 0;
8661 
8662   /* multiply cannot feed into addition operands.  */
8663   if (rtx_equal_p (operands[4], operands[0])
8664       || rtx_equal_p (operands[5], operands[0]))
8665     return 0;
8666 
8667   /* SFmode limits the registers to the upper 32 of the 32bit FP regs.  */
8668   if (mode == SFmode
8669       && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS
8670 	  || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS
8671 	  || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS
8672 	  || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS
8673 	  || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS
8674 	  || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS))
8675     return 0;
8676 
8677   /* Passed.  Operands are suitable for fmpyadd.  */
8678   return 1;
8679 }
8680 
8681 #if !defined(USE_COLLECT2)
8682 static void
pa_asm_out_constructor(rtx symbol,int priority)8683 pa_asm_out_constructor (rtx symbol, int priority)
8684 {
8685   if (!function_label_operand (symbol, VOIDmode))
8686     pa_encode_label (symbol);
8687 
8688 #ifdef CTORS_SECTION_ASM_OP
8689   default_ctor_section_asm_out_constructor (symbol, priority);
8690 #else
8691 # ifdef TARGET_ASM_NAMED_SECTION
8692   default_named_section_asm_out_constructor (symbol, priority);
8693 # else
8694   default_stabs_asm_out_constructor (symbol, priority);
8695 # endif
8696 #endif
8697 }
8698 
8699 static void
pa_asm_out_destructor(rtx symbol,int priority)8700 pa_asm_out_destructor (rtx symbol, int priority)
8701 {
8702   if (!function_label_operand (symbol, VOIDmode))
8703     pa_encode_label (symbol);
8704 
8705 #ifdef DTORS_SECTION_ASM_OP
8706   default_dtor_section_asm_out_destructor (symbol, priority);
8707 #else
8708 # ifdef TARGET_ASM_NAMED_SECTION
8709   default_named_section_asm_out_destructor (symbol, priority);
8710 # else
8711   default_stabs_asm_out_destructor (symbol, priority);
8712 # endif
8713 #endif
8714 }
8715 #endif
8716 
8717 /* This function places uninitialized global data in the bss section.
8718    The ASM_OUTPUT_ALIGNED_BSS macro needs to be defined to call this
8719    function on the SOM port to prevent uninitialized global data from
8720    being placed in the data section.  */
8721 
8722 void
pa_asm_output_aligned_bss(FILE * stream,const char * name,unsigned HOST_WIDE_INT size,unsigned int align)8723 pa_asm_output_aligned_bss (FILE *stream,
8724 			   const char *name,
8725 			   unsigned HOST_WIDE_INT size,
8726 			   unsigned int align)
8727 {
8728   switch_to_section (bss_section);
8729 
8730 #ifdef ASM_OUTPUT_TYPE_DIRECTIVE
8731   ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "object");
8732 #endif
8733 
8734 #ifdef ASM_OUTPUT_SIZE_DIRECTIVE
8735   ASM_OUTPUT_SIZE_DIRECTIVE (stream, name, size);
8736 #endif
8737 
8738   fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
8739   ASM_OUTPUT_LABEL (stream, name);
8740   fprintf (stream, "\t.block " HOST_WIDE_INT_PRINT_UNSIGNED"\n", size);
8741 }
8742 
8743 /* Both the HP and GNU assemblers under HP-UX provide a .comm directive
8744    that doesn't allow the alignment of global common storage to be directly
8745    specified.  The SOM linker aligns common storage based on the rounded
8746    value of the NUM_BYTES parameter in the .comm directive.  It's not
8747    possible to use the .align directive as it doesn't affect the alignment
8748    of the label associated with a .comm directive.  */
8749 
8750 void
pa_asm_output_aligned_common(FILE * stream,const char * name,unsigned HOST_WIDE_INT size,unsigned int align)8751 pa_asm_output_aligned_common (FILE *stream,
8752 			      const char *name,
8753 			      unsigned HOST_WIDE_INT size,
8754 			      unsigned int align)
8755 {
8756   unsigned int max_common_align;
8757 
8758   max_common_align = TARGET_64BIT ? 128 : (size >= 4096 ? 256 : 64);
8759   if (align > max_common_align)
8760     {
8761       warning (0, "alignment (%u) for %s exceeds maximum alignment "
8762 	       "for global common data.  Using %u",
8763 	       align / BITS_PER_UNIT, name, max_common_align / BITS_PER_UNIT);
8764       align = max_common_align;
8765     }
8766 
8767   switch_to_section (bss_section);
8768 
8769   assemble_name (stream, name);
8770   fprintf (stream, "\t.comm " HOST_WIDE_INT_PRINT_UNSIGNED"\n",
8771            MAX (size, align / BITS_PER_UNIT));
8772 }
8773 
8774 /* We can't use .comm for local common storage as the SOM linker effectively
8775    treats the symbol as universal and uses the same storage for local symbols
8776    with the same name in different object files.  The .block directive
8777    reserves an uninitialized block of storage.  However, it's not common
8778    storage.  Fortunately, GCC never requests common storage with the same
8779    name in any given translation unit.  */
8780 
8781 void
pa_asm_output_aligned_local(FILE * stream,const char * name,unsigned HOST_WIDE_INT size,unsigned int align)8782 pa_asm_output_aligned_local (FILE *stream,
8783 			     const char *name,
8784 			     unsigned HOST_WIDE_INT size,
8785 			     unsigned int align)
8786 {
8787   switch_to_section (bss_section);
8788   fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
8789 
8790 #ifdef LOCAL_ASM_OP
8791   fprintf (stream, "%s", LOCAL_ASM_OP);
8792   assemble_name (stream, name);
8793   fprintf (stream, "\n");
8794 #endif
8795 
8796   ASM_OUTPUT_LABEL (stream, name);
8797   fprintf (stream, "\t.block " HOST_WIDE_INT_PRINT_UNSIGNED"\n", size);
8798 }
8799 
8800 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
8801    use in fmpysub instructions.  */
8802 int
pa_fmpysuboperands(rtx * operands)8803 pa_fmpysuboperands (rtx *operands)
8804 {
8805   machine_mode mode = GET_MODE (operands[0]);
8806 
8807   /* Must be a floating point mode.  */
8808   if (mode != SFmode && mode != DFmode)
8809     return 0;
8810 
8811   /* All modes must be the same.  */
8812   if (! (mode == GET_MODE (operands[1])
8813 	 && mode == GET_MODE (operands[2])
8814 	 && mode == GET_MODE (operands[3])
8815 	 && mode == GET_MODE (operands[4])
8816 	 && mode == GET_MODE (operands[5])))
8817     return 0;
8818 
8819   /* All operands must be registers.  */
8820   if (! (GET_CODE (operands[1]) == REG
8821 	 && GET_CODE (operands[2]) == REG
8822 	 && GET_CODE (operands[3]) == REG
8823 	 && GET_CODE (operands[4]) == REG
8824 	 && GET_CODE (operands[5]) == REG))
8825     return 0;
8826 
8827   /* Only 2 real operands to the subtraction.  Subtraction is not a commutative
8828      operation, so operands[4] must be the same as operand[3].  */
8829   if (! rtx_equal_p (operands[3], operands[4]))
8830     return 0;
8831 
8832   /* multiply cannot feed into subtraction.  */
8833   if (rtx_equal_p (operands[5], operands[0]))
8834     return 0;
8835 
8836   /* Inout operand of sub cannot conflict with any operands from multiply.  */
8837   if (rtx_equal_p (operands[3], operands[0])
8838      || rtx_equal_p (operands[3], operands[1])
8839      || rtx_equal_p (operands[3], operands[2]))
8840     return 0;
8841 
8842   /* SFmode limits the registers to the upper 32 of the 32bit FP regs.  */
8843   if (mode == SFmode
8844       && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS
8845 	  || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS
8846 	  || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS
8847 	  || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS
8848 	  || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS
8849 	  || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS))
8850     return 0;
8851 
8852   /* Passed.  Operands are suitable for fmpysub.  */
8853   return 1;
8854 }
8855 
8856 /* Return 1 if the given constant is 2, 4, or 8.  These are the valid
8857    constants for a MULT embedded inside a memory address.  */
8858 int
pa_mem_shadd_constant_p(int val)8859 pa_mem_shadd_constant_p (int val)
8860 {
8861   if (val == 2 || val == 4 || val == 8)
8862     return 1;
8863   else
8864     return 0;
8865 }
8866 
8867 /* Return 1 if the given constant is 1, 2, or 3.  These are the valid
8868    constants for shadd instructions.  */
8869 int
pa_shadd_constant_p(int val)8870 pa_shadd_constant_p (int val)
8871 {
8872   if (val == 1 || val == 2 || val == 3)
8873     return 1;
8874   else
8875     return 0;
8876 }
8877 
8878 /* Return TRUE if INSN branches forward.  */
8879 
8880 static bool
forward_branch_p(rtx_insn * insn)8881 forward_branch_p (rtx_insn *insn)
8882 {
8883   rtx lab = JUMP_LABEL (insn);
8884 
8885   /* The INSN must have a jump label.  */
8886   gcc_assert (lab != NULL_RTX);
8887 
8888   if (INSN_ADDRESSES_SET_P ())
8889     return INSN_ADDRESSES (INSN_UID (lab)) > INSN_ADDRESSES (INSN_UID (insn));
8890 
8891   while (insn)
8892     {
8893       if (insn == lab)
8894 	return true;
8895       else
8896 	insn = NEXT_INSN (insn);
8897     }
8898 
8899   return false;
8900 }
8901 
8902 /* Output an unconditional move and branch insn.  */
8903 
8904 const char *
pa_output_parallel_movb(rtx * operands,rtx_insn * insn)8905 pa_output_parallel_movb (rtx *operands, rtx_insn *insn)
8906 {
8907   int length = get_attr_length (insn);
8908 
8909   /* These are the cases in which we win.  */
8910   if (length == 4)
8911     return "mov%I1b,tr %1,%0,%2";
8912 
8913   /* None of the following cases win, but they don't lose either.  */
8914   if (length == 8)
8915     {
8916       if (dbr_sequence_length () == 0)
8917 	{
8918 	  /* Nothing in the delay slot, fake it by putting the combined
8919 	     insn (the copy or add) in the delay slot of a bl.  */
8920 	  if (GET_CODE (operands[1]) == CONST_INT)
8921 	    return "b %2\n\tldi %1,%0";
8922 	  else
8923 	    return "b %2\n\tcopy %1,%0";
8924 	}
8925       else
8926 	{
8927 	  /* Something in the delay slot, but we've got a long branch.  */
8928 	  if (GET_CODE (operands[1]) == CONST_INT)
8929 	    return "ldi %1,%0\n\tb %2";
8930 	  else
8931 	    return "copy %1,%0\n\tb %2";
8932 	}
8933     }
8934 
8935   if (GET_CODE (operands[1]) == CONST_INT)
8936     output_asm_insn ("ldi %1,%0", operands);
8937   else
8938     output_asm_insn ("copy %1,%0", operands);
8939   return pa_output_lbranch (operands[2], insn, 1);
8940 }
8941 
8942 /* Output an unconditional add and branch insn.  */
8943 
8944 const char *
pa_output_parallel_addb(rtx * operands,rtx_insn * insn)8945 pa_output_parallel_addb (rtx *operands, rtx_insn *insn)
8946 {
8947   int length = get_attr_length (insn);
8948 
8949   /* To make life easy we want operand0 to be the shared input/output
8950      operand and operand1 to be the readonly operand.  */
8951   if (operands[0] == operands[1])
8952     operands[1] = operands[2];
8953 
8954   /* These are the cases in which we win.  */
8955   if (length == 4)
8956     return "add%I1b,tr %1,%0,%3";
8957 
8958   /* None of the following cases win, but they don't lose either.  */
8959   if (length == 8)
8960     {
8961       if (dbr_sequence_length () == 0)
8962 	/* Nothing in the delay slot, fake it by putting the combined
8963 	   insn (the copy or add) in the delay slot of a bl.  */
8964 	return "b %3\n\tadd%I1 %1,%0,%0";
8965       else
8966 	/* Something in the delay slot, but we've got a long branch.  */
8967 	return "add%I1 %1,%0,%0\n\tb %3";
8968     }
8969 
8970   output_asm_insn ("add%I1 %1,%0,%0", operands);
8971   return pa_output_lbranch (operands[3], insn, 1);
8972 }
8973 
8974 /* We use this hook to perform a PA specific optimization which is difficult
8975    to do in earlier passes.  */
8976 
8977 static void
pa_reorg(void)8978 pa_reorg (void)
8979 {
8980   remove_useless_addtr_insns (1);
8981 
8982   if (pa_cpu < PROCESSOR_8000)
8983     pa_combine_instructions ();
8984 }
8985 
8986 /* The PA has a number of odd instructions which can perform multiple
8987    tasks at once.  On first generation PA machines (PA1.0 and PA1.1)
8988    it may be profitable to combine two instructions into one instruction
8989    with two outputs.  It's not profitable PA2.0 machines because the
8990    two outputs would take two slots in the reorder buffers.
8991 
8992    This routine finds instructions which can be combined and combines
8993    them.  We only support some of the potential combinations, and we
8994    only try common ways to find suitable instructions.
8995 
8996       * addb can add two registers or a register and a small integer
8997       and jump to a nearby (+-8k) location.  Normally the jump to the
8998       nearby location is conditional on the result of the add, but by
8999       using the "true" condition we can make the jump unconditional.
9000       Thus addb can perform two independent operations in one insn.
9001 
9002       * movb is similar to addb in that it can perform a reg->reg
9003       or small immediate->reg copy and jump to a nearby (+-8k location).
9004 
9005       * fmpyadd and fmpysub can perform a FP multiply and either an
9006       FP add or FP sub if the operands of the multiply and add/sub are
9007       independent (there are other minor restrictions).  Note both
9008       the fmpy and fadd/fsub can in theory move to better spots according
9009       to data dependencies, but for now we require the fmpy stay at a
9010       fixed location.
9011 
9012       * Many of the memory operations can perform pre & post updates
9013       of index registers.  GCC's pre/post increment/decrement addressing
9014       is far too simple to take advantage of all the possibilities.  This
9015       pass may not be suitable since those insns may not be independent.
9016 
9017       * comclr can compare two ints or an int and a register, nullify
9018       the following instruction and zero some other register.  This
9019       is more difficult to use as it's harder to find an insn which
9020       will generate a comclr than finding something like an unconditional
9021       branch.  (conditional moves & long branches create comclr insns).
9022 
9023       * Most arithmetic operations can conditionally skip the next
9024       instruction.  They can be viewed as "perform this operation
9025       and conditionally jump to this nearby location" (where nearby
9026       is an insns away).  These are difficult to use due to the
9027       branch length restrictions.  */
9028 
9029 static void
pa_combine_instructions(void)9030 pa_combine_instructions (void)
9031 {
9032   rtx_insn *anchor;
9033 
9034   /* This can get expensive since the basic algorithm is on the
9035      order of O(n^2) (or worse).  Only do it for -O2 or higher
9036      levels of optimization.  */
9037   if (optimize < 2)
9038     return;
9039 
9040   /* Walk down the list of insns looking for "anchor" insns which
9041      may be combined with "floating" insns.  As the name implies,
9042      "anchor" instructions don't move, while "floating" insns may
9043      move around.  */
9044   rtx par = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, NULL_RTX, NULL_RTX));
9045   rtx_insn *new_rtx = make_insn_raw (par);
9046 
9047   for (anchor = get_insns (); anchor; anchor = NEXT_INSN (anchor))
9048     {
9049       enum attr_pa_combine_type anchor_attr;
9050       enum attr_pa_combine_type floater_attr;
9051 
9052       /* We only care about INSNs, JUMP_INSNs, and CALL_INSNs.
9053 	 Also ignore any special USE insns.  */
9054       if ((! NONJUMP_INSN_P (anchor) && ! JUMP_P (anchor) && ! CALL_P (anchor))
9055 	  || GET_CODE (PATTERN (anchor)) == USE
9056 	  || GET_CODE (PATTERN (anchor)) == CLOBBER)
9057 	continue;
9058 
9059       anchor_attr = get_attr_pa_combine_type (anchor);
9060       /* See if anchor is an insn suitable for combination.  */
9061       if (anchor_attr == PA_COMBINE_TYPE_FMPY
9062 	  || anchor_attr == PA_COMBINE_TYPE_FADDSUB
9063 	  || (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
9064 	      && ! forward_branch_p (anchor)))
9065 	{
9066 	  rtx_insn *floater;
9067 
9068 	  for (floater = PREV_INSN (anchor);
9069 	       floater;
9070 	       floater = PREV_INSN (floater))
9071 	    {
9072 	      if (NOTE_P (floater)
9073 		  || (NONJUMP_INSN_P (floater)
9074 		      && (GET_CODE (PATTERN (floater)) == USE
9075 			  || GET_CODE (PATTERN (floater)) == CLOBBER)))
9076 		continue;
9077 
9078 	      /* Anything except a regular INSN will stop our search.  */
9079 	      if (! NONJUMP_INSN_P (floater))
9080 		{
9081 		  floater = NULL;
9082 		  break;
9083 		}
9084 
9085 	      /* See if FLOATER is suitable for combination with the
9086 		 anchor.  */
9087 	      floater_attr = get_attr_pa_combine_type (floater);
9088 	      if ((anchor_attr == PA_COMBINE_TYPE_FMPY
9089 		   && floater_attr == PA_COMBINE_TYPE_FADDSUB)
9090 		  || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
9091 		      && floater_attr == PA_COMBINE_TYPE_FMPY))
9092 		{
9093 		  /* If ANCHOR and FLOATER can be combined, then we're
9094 		     done with this pass.  */
9095 		  if (pa_can_combine_p (new_rtx, anchor, floater, 0,
9096 					SET_DEST (PATTERN (floater)),
9097 					XEXP (SET_SRC (PATTERN (floater)), 0),
9098 					XEXP (SET_SRC (PATTERN (floater)), 1)))
9099 		    break;
9100 		}
9101 
9102 	      else if (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
9103 		       && floater_attr == PA_COMBINE_TYPE_ADDMOVE)
9104 		{
9105 		  if (GET_CODE (SET_SRC (PATTERN (floater))) == PLUS)
9106 		    {
9107 		      if (pa_can_combine_p (new_rtx, anchor, floater, 0,
9108 					    SET_DEST (PATTERN (floater)),
9109 					XEXP (SET_SRC (PATTERN (floater)), 0),
9110 					XEXP (SET_SRC (PATTERN (floater)), 1)))
9111 			break;
9112 		    }
9113 		  else
9114 		    {
9115 		      if (pa_can_combine_p (new_rtx, anchor, floater, 0,
9116 					    SET_DEST (PATTERN (floater)),
9117 					    SET_SRC (PATTERN (floater)),
9118 					    SET_SRC (PATTERN (floater))))
9119 			break;
9120 		    }
9121 		}
9122 	    }
9123 
9124 	  /* If we didn't find anything on the backwards scan try forwards.  */
9125 	  if (!floater
9126 	      && (anchor_attr == PA_COMBINE_TYPE_FMPY
9127 		  || anchor_attr == PA_COMBINE_TYPE_FADDSUB))
9128 	    {
9129 	      for (floater = anchor; floater; floater = NEXT_INSN (floater))
9130 		{
9131 		  if (NOTE_P (floater)
9132 		      || (NONJUMP_INSN_P (floater)
9133 			  && (GET_CODE (PATTERN (floater)) == USE
9134 			      || GET_CODE (PATTERN (floater)) == CLOBBER)))
9135 
9136 		    continue;
9137 
9138 		  /* Anything except a regular INSN will stop our search.  */
9139 		  if (! NONJUMP_INSN_P (floater))
9140 		    {
9141 		      floater = NULL;
9142 		      break;
9143 		    }
9144 
9145 		  /* See if FLOATER is suitable for combination with the
9146 		     anchor.  */
9147 		  floater_attr = get_attr_pa_combine_type (floater);
9148 		  if ((anchor_attr == PA_COMBINE_TYPE_FMPY
9149 		       && floater_attr == PA_COMBINE_TYPE_FADDSUB)
9150 		      || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
9151 			  && floater_attr == PA_COMBINE_TYPE_FMPY))
9152 		    {
9153 		      /* If ANCHOR and FLOATER can be combined, then we're
9154 			 done with this pass.  */
9155 		      if (pa_can_combine_p (new_rtx, anchor, floater, 1,
9156 					    SET_DEST (PATTERN (floater)),
9157 					    XEXP (SET_SRC (PATTERN (floater)),
9158 						  0),
9159 					    XEXP (SET_SRC (PATTERN (floater)),
9160 						  1)))
9161 			break;
9162 		    }
9163 		}
9164 	    }
9165 
9166 	  /* FLOATER will be nonzero if we found a suitable floating
9167 	     insn for combination with ANCHOR.  */
9168 	  if (floater
9169 	      && (anchor_attr == PA_COMBINE_TYPE_FADDSUB
9170 		  || anchor_attr == PA_COMBINE_TYPE_FMPY))
9171 	    {
9172 	      /* Emit the new instruction and delete the old anchor.  */
9173 	      rtvec vtemp = gen_rtvec (2, copy_rtx (PATTERN (anchor)),
9174 				       copy_rtx (PATTERN (floater)));
9175 	      rtx temp = gen_rtx_PARALLEL (VOIDmode, vtemp);
9176 	      emit_insn_before (temp, anchor);
9177 
9178 	      SET_INSN_DELETED (anchor);
9179 
9180 	      /* Emit a special USE insn for FLOATER, then delete
9181 		 the floating insn.  */
9182 	      temp = copy_rtx (PATTERN (floater));
9183 	      emit_insn_before (gen_rtx_USE (VOIDmode, temp), floater);
9184 	      delete_insn (floater);
9185 
9186 	      continue;
9187 	    }
9188 	  else if (floater
9189 		   && anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH)
9190 	    {
9191 	      /* Emit the new_jump instruction and delete the old anchor.  */
9192 	      rtvec vtemp = gen_rtvec (2, copy_rtx (PATTERN (anchor)),
9193 				       copy_rtx (PATTERN (floater)));
9194 	      rtx temp = gen_rtx_PARALLEL (VOIDmode, vtemp);
9195 	      temp = emit_jump_insn_before (temp, anchor);
9196 
9197 	      JUMP_LABEL (temp) = JUMP_LABEL (anchor);
9198 	      SET_INSN_DELETED (anchor);
9199 
9200 	      /* Emit a special USE insn for FLOATER, then delete
9201 		 the floating insn.  */
9202 	      temp = copy_rtx (PATTERN (floater));
9203 	      emit_insn_before (gen_rtx_USE (VOIDmode, temp), floater);
9204 	      delete_insn (floater);
9205 	      continue;
9206 	    }
9207 	}
9208     }
9209 }
9210 
9211 static int
pa_can_combine_p(rtx_insn * new_rtx,rtx_insn * anchor,rtx_insn * floater,int reversed,rtx dest,rtx src1,rtx src2)9212 pa_can_combine_p (rtx_insn *new_rtx, rtx_insn *anchor, rtx_insn *floater,
9213 		  int reversed, rtx dest,
9214 		  rtx src1, rtx src2)
9215 {
9216   int insn_code_number;
9217   rtx_insn *start, *end;
9218 
9219   /* Create a PARALLEL with the patterns of ANCHOR and
9220      FLOATER, try to recognize it, then test constraints
9221      for the resulting pattern.
9222 
9223      If the pattern doesn't match or the constraints
9224      aren't met keep searching for a suitable floater
9225      insn.  */
9226   XVECEXP (PATTERN (new_rtx), 0, 0) = PATTERN (anchor);
9227   XVECEXP (PATTERN (new_rtx), 0, 1) = PATTERN (floater);
9228   INSN_CODE (new_rtx) = -1;
9229   insn_code_number = recog_memoized (new_rtx);
9230   basic_block bb = BLOCK_FOR_INSN (anchor);
9231   if (insn_code_number < 0
9232       || (extract_insn (new_rtx),
9233 	  !constrain_operands (1, get_preferred_alternatives (new_rtx, bb))))
9234     return 0;
9235 
9236   if (reversed)
9237     {
9238       start = anchor;
9239       end = floater;
9240     }
9241   else
9242     {
9243       start = floater;
9244       end = anchor;
9245     }
9246 
9247   /* There's up to three operands to consider.  One
9248      output and two inputs.
9249 
9250      The output must not be used between FLOATER & ANCHOR
9251      exclusive.  The inputs must not be set between
9252      FLOATER and ANCHOR exclusive.  */
9253 
9254   if (reg_used_between_p (dest, start, end))
9255     return 0;
9256 
9257   if (reg_set_between_p (src1, start, end))
9258     return 0;
9259 
9260   if (reg_set_between_p (src2, start, end))
9261     return 0;
9262 
9263   /* If we get here, then everything is good.  */
9264   return 1;
9265 }
9266 
9267 /* Return nonzero if references for INSN are delayed.
9268 
9269    Millicode insns are actually function calls with some special
9270    constraints on arguments and register usage.
9271 
9272    Millicode calls always expect their arguments in the integer argument
9273    registers, and always return their result in %r29 (ret1).  They
9274    are expected to clobber their arguments, %r1, %r29, and the return
9275    pointer which is %r31 on 32-bit and %r2 on 64-bit, and nothing else.
9276 
9277    This function tells reorg that the references to arguments and
9278    millicode calls do not appear to happen until after the millicode call.
9279    This allows reorg to put insns which set the argument registers into the
9280    delay slot of the millicode call -- thus they act more like traditional
9281    CALL_INSNs.
9282 
9283    Note we cannot consider side effects of the insn to be delayed because
9284    the branch and link insn will clobber the return pointer.  If we happened
9285    to use the return pointer in the delay slot of the call, then we lose.
9286 
9287    get_attr_type will try to recognize the given insn, so make sure to
9288    filter out things it will not accept -- SEQUENCE, USE and CLOBBER insns
9289    in particular.  */
9290 int
pa_insn_refs_are_delayed(rtx_insn * insn)9291 pa_insn_refs_are_delayed (rtx_insn *insn)
9292 {
9293   return ((NONJUMP_INSN_P (insn)
9294 	   && GET_CODE (PATTERN (insn)) != SEQUENCE
9295 	   && GET_CODE (PATTERN (insn)) != USE
9296 	   && GET_CODE (PATTERN (insn)) != CLOBBER
9297 	   && get_attr_type (insn) == TYPE_MILLI));
9298 }
9299 
9300 /* Promote the return value, but not the arguments.  */
9301 
9302 static machine_mode
pa_promote_function_mode(const_tree type ATTRIBUTE_UNUSED,machine_mode mode,int * punsignedp ATTRIBUTE_UNUSED,const_tree fntype ATTRIBUTE_UNUSED,int for_return)9303 pa_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
9304                           machine_mode mode,
9305                           int *punsignedp ATTRIBUTE_UNUSED,
9306                           const_tree fntype ATTRIBUTE_UNUSED,
9307                           int for_return)
9308 {
9309   if (for_return == 0)
9310     return mode;
9311   return promote_mode (type, mode, punsignedp);
9312 }
9313 
9314 /* On the HP-PA the value is found in register(s) 28(-29), unless
9315    the mode is SF or DF. Then the value is returned in fr4 (32).
9316 
9317    This must perform the same promotions as PROMOTE_MODE, else promoting
9318    return values in TARGET_PROMOTE_FUNCTION_MODE will not work correctly.
9319 
9320    Small structures must be returned in a PARALLEL on PA64 in order
9321    to match the HP Compiler ABI.  */
9322 
9323 static rtx
pa_function_value(const_tree valtype,const_tree func ATTRIBUTE_UNUSED,bool outgoing ATTRIBUTE_UNUSED)9324 pa_function_value (const_tree valtype,
9325                    const_tree func ATTRIBUTE_UNUSED,
9326                    bool outgoing ATTRIBUTE_UNUSED)
9327 {
9328   machine_mode valmode;
9329 
9330   if (AGGREGATE_TYPE_P (valtype)
9331       || TREE_CODE (valtype) == COMPLEX_TYPE
9332       || TREE_CODE (valtype) == VECTOR_TYPE)
9333     {
9334       HOST_WIDE_INT valsize = int_size_in_bytes (valtype);
9335 
9336       /* Handle aggregates that fit exactly in a word or double word.  */
9337       if (valsize == UNITS_PER_WORD || valsize == 2 * UNITS_PER_WORD)
9338 	return gen_rtx_REG (TYPE_MODE (valtype), 28);
9339 
9340       if (TARGET_64BIT)
9341 	{
9342           /* Aggregates with a size less than or equal to 128 bits are
9343 	     returned in GR 28(-29).  They are left justified.  The pad
9344 	     bits are undefined.  Larger aggregates are returned in
9345 	     memory.  */
9346 	  rtx loc[2];
9347 	  int i, offset = 0;
9348 	  int ub = valsize <= UNITS_PER_WORD ? 1 : 2;
9349 
9350 	  for (i = 0; i < ub; i++)
9351 	    {
9352 	      loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
9353 					  gen_rtx_REG (DImode, 28 + i),
9354 					  GEN_INT (offset));
9355 	      offset += 8;
9356 	    }
9357 
9358 	  return gen_rtx_PARALLEL (BLKmode, gen_rtvec_v (ub, loc));
9359 	}
9360       else if (valsize > UNITS_PER_WORD)
9361 	{
9362 	  /* Aggregates 5 to 8 bytes in size are returned in general
9363 	     registers r28-r29 in the same manner as other non
9364 	     floating-point objects.  The data is right-justified and
9365 	     zero-extended to 64 bits.  This is opposite to the normal
9366 	     justification used on big endian targets and requires
9367 	     special treatment.  */
9368 	  rtx loc = gen_rtx_EXPR_LIST (VOIDmode,
9369 				       gen_rtx_REG (DImode, 28), const0_rtx);
9370 	  return gen_rtx_PARALLEL (BLKmode, gen_rtvec (1, loc));
9371 	}
9372     }
9373 
9374   if ((INTEGRAL_TYPE_P (valtype)
9375        && GET_MODE_BITSIZE (TYPE_MODE (valtype)) < BITS_PER_WORD)
9376       || POINTER_TYPE_P (valtype))
9377     valmode = word_mode;
9378   else
9379     valmode = TYPE_MODE (valtype);
9380 
9381   if (TREE_CODE (valtype) == REAL_TYPE
9382       && !AGGREGATE_TYPE_P (valtype)
9383       && TYPE_MODE (valtype) != TFmode
9384       && !TARGET_SOFT_FLOAT)
9385     return gen_rtx_REG (valmode, 32);
9386 
9387   return gen_rtx_REG (valmode, 28);
9388 }
9389 
9390 /* Implement the TARGET_LIBCALL_VALUE hook.  */
9391 
9392 static rtx
pa_libcall_value(machine_mode mode,const_rtx fun ATTRIBUTE_UNUSED)9393 pa_libcall_value (machine_mode mode,
9394 		  const_rtx fun ATTRIBUTE_UNUSED)
9395 {
9396   if (! TARGET_SOFT_FLOAT
9397       && (mode == SFmode || mode == DFmode))
9398     return  gen_rtx_REG (mode, 32);
9399   else
9400     return  gen_rtx_REG (mode, 28);
9401 }
9402 
9403 /* Implement the TARGET_FUNCTION_VALUE_REGNO_P hook.  */
9404 
9405 static bool
pa_function_value_regno_p(const unsigned int regno)9406 pa_function_value_regno_p (const unsigned int regno)
9407 {
9408   if (regno == 28
9409       || (! TARGET_SOFT_FLOAT &&  regno == 32))
9410     return true;
9411 
9412   return false;
9413 }
9414 
9415 /* Update the data in CUM to advance over argument ARG.  */
9416 
9417 static void
pa_function_arg_advance(cumulative_args_t cum_v,const function_arg_info & arg)9418 pa_function_arg_advance (cumulative_args_t cum_v,
9419 			 const function_arg_info &arg)
9420 {
9421   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
9422   int arg_size = pa_function_arg_size (arg.mode, arg.type);
9423 
9424   cum->nargs_prototype--;
9425   cum->words += (arg_size
9426 		 + ((cum->words & 01)
9427 		    && arg.type != NULL_TREE
9428 		    && arg_size > 1));
9429 }
9430 
9431 /* Return the location of a parameter that is passed in a register or NULL
9432    if the parameter has any component that is passed in memory.
9433 
9434    This is new code and will be pushed to into the net sources after
9435    further testing.
9436 
9437    ??? We might want to restructure this so that it looks more like other
9438    ports.  */
9439 static rtx
pa_function_arg(cumulative_args_t cum_v,const function_arg_info & arg)9440 pa_function_arg (cumulative_args_t cum_v, const function_arg_info &arg)
9441 {
9442   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
9443   tree type = arg.type;
9444   machine_mode mode = arg.mode;
9445   int max_arg_words = (TARGET_64BIT ? 8 : 4);
9446   int alignment = 0;
9447   int arg_size;
9448   int fpr_reg_base;
9449   int gpr_reg_base;
9450   rtx retval;
9451 
9452   if (arg.end_marker_p ())
9453     return NULL_RTX;
9454 
9455   arg_size = pa_function_arg_size (mode, type);
9456 
9457   /* If this arg would be passed partially or totally on the stack, then
9458      this routine should return zero.  pa_arg_partial_bytes will
9459      handle arguments which are split between regs and stack slots if
9460      the ABI mandates split arguments.  */
9461   if (!TARGET_64BIT)
9462     {
9463       /* The 32-bit ABI does not split arguments.  */
9464       if (cum->words + arg_size > max_arg_words)
9465 	return NULL_RTX;
9466     }
9467   else
9468     {
9469       if (arg_size > 1)
9470 	alignment = cum->words & 1;
9471       if (cum->words + alignment >= max_arg_words)
9472 	return NULL_RTX;
9473     }
9474 
9475   /* The 32bit ABIs and the 64bit ABIs are rather different,
9476      particularly in their handling of FP registers.  We might
9477      be able to cleverly share code between them, but I'm not
9478      going to bother in the hope that splitting them up results
9479      in code that is more easily understood.  */
9480 
9481   if (TARGET_64BIT)
9482     {
9483       /* Advance the base registers to their current locations.
9484 
9485          Remember, gprs grow towards smaller register numbers while
9486 	 fprs grow to higher register numbers.  Also remember that
9487 	 although FP regs are 32-bit addressable, we pretend that
9488 	 the registers are 64-bits wide.  */
9489       gpr_reg_base = 26 - cum->words;
9490       fpr_reg_base = 32 + cum->words;
9491 
9492       /* Arguments wider than one word and small aggregates need special
9493 	 treatment.  */
9494       if (arg_size > 1
9495 	  || mode == BLKmode
9496 	  || (type && (AGGREGATE_TYPE_P (type)
9497 		       || TREE_CODE (type) == COMPLEX_TYPE
9498 		       || TREE_CODE (type) == VECTOR_TYPE)))
9499 	{
9500 	  /* Double-extended precision (80-bit), quad-precision (128-bit)
9501 	     and aggregates including complex numbers are aligned on
9502 	     128-bit boundaries.  The first eight 64-bit argument slots
9503 	     are associated one-to-one, with general registers r26
9504 	     through r19, and also with floating-point registers fr4
9505 	     through fr11.  Arguments larger than one word are always
9506 	     passed in general registers.
9507 
9508 	     Using a PARALLEL with a word mode register results in left
9509 	     justified data on a big-endian target.  */
9510 
9511 	  rtx loc[8];
9512 	  int i, offset = 0, ub = arg_size;
9513 
9514 	  /* Align the base register.  */
9515 	  gpr_reg_base -= alignment;
9516 
9517 	  ub = MIN (ub, max_arg_words - cum->words - alignment);
9518 	  for (i = 0; i < ub; i++)
9519 	    {
9520 	      loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
9521 					  gen_rtx_REG (DImode, gpr_reg_base),
9522 					  GEN_INT (offset));
9523 	      gpr_reg_base -= 1;
9524 	      offset += 8;
9525 	    }
9526 
9527 	  return gen_rtx_PARALLEL (mode, gen_rtvec_v (ub, loc));
9528 	}
9529      }
9530   else
9531     {
9532       /* If the argument is larger than a word, then we know precisely
9533 	 which registers we must use.  */
9534       if (arg_size > 1)
9535 	{
9536 	  if (cum->words)
9537 	    {
9538 	      gpr_reg_base = 23;
9539 	      fpr_reg_base = 38;
9540 	    }
9541 	  else
9542 	    {
9543 	      gpr_reg_base = 25;
9544 	      fpr_reg_base = 34;
9545 	    }
9546 
9547 	  /* Structures 5 to 8 bytes in size are passed in the general
9548 	     registers in the same manner as other non floating-point
9549 	     objects.  The data is right-justified and zero-extended
9550 	     to 64 bits.  This is opposite to the normal justification
9551 	     used on big endian targets and requires special treatment.
9552 	     We now define BLOCK_REG_PADDING to pad these objects.
9553 	     Aggregates, complex and vector types are passed in the same
9554 	     manner as structures.  */
9555 	  if (mode == BLKmode
9556 	      || (type && (AGGREGATE_TYPE_P (type)
9557 			   || TREE_CODE (type) == COMPLEX_TYPE
9558 			   || TREE_CODE (type) == VECTOR_TYPE)))
9559 	    {
9560 	      rtx loc = gen_rtx_EXPR_LIST (VOIDmode,
9561 					   gen_rtx_REG (DImode, gpr_reg_base),
9562 					   const0_rtx);
9563 	      return gen_rtx_PARALLEL (BLKmode, gen_rtvec (1, loc));
9564 	    }
9565 	}
9566       else
9567         {
9568 	   /* We have a single word (32 bits).  A simple computation
9569 	      will get us the register #s we need.  */
9570 	   gpr_reg_base = 26 - cum->words;
9571 	   fpr_reg_base = 32 + 2 * cum->words;
9572 	}
9573     }
9574 
9575   /* Determine if the argument needs to be passed in both general and
9576      floating point registers.  */
9577   if (((TARGET_PORTABLE_RUNTIME || TARGET_64BIT || TARGET_ELF32)
9578        /* If we are doing soft-float with portable runtime, then there
9579 	  is no need to worry about FP regs.  */
9580        && !TARGET_SOFT_FLOAT
9581        /* The parameter must be some kind of scalar float, else we just
9582 	  pass it in integer registers.  */
9583        && GET_MODE_CLASS (mode) == MODE_FLOAT
9584        /* The target function must not have a prototype.  */
9585        && cum->nargs_prototype <= 0
9586        /* libcalls do not need to pass items in both FP and general
9587 	  registers.  */
9588        && type != NULL_TREE
9589        /* All this hair applies to "outgoing" args only.  This includes
9590 	  sibcall arguments setup with FUNCTION_INCOMING_ARG.  */
9591        && !cum->incoming)
9592       /* Also pass outgoing floating arguments in both registers in indirect
9593 	 calls with the 32 bit ABI and the HP assembler since there is no
9594 	 way to the specify argument locations in static functions.  */
9595       || (!TARGET_64BIT
9596 	  && !TARGET_GAS
9597 	  && !cum->incoming
9598 	  && cum->indirect
9599 	  && GET_MODE_CLASS (mode) == MODE_FLOAT))
9600     {
9601       retval
9602 	= gen_rtx_PARALLEL
9603 	    (mode,
9604 	     gen_rtvec (2,
9605 			gen_rtx_EXPR_LIST (VOIDmode,
9606 					   gen_rtx_REG (mode, fpr_reg_base),
9607 					   const0_rtx),
9608 			gen_rtx_EXPR_LIST (VOIDmode,
9609 					   gen_rtx_REG (mode, gpr_reg_base),
9610 					   const0_rtx)));
9611     }
9612   else
9613     {
9614       /* See if we should pass this parameter in a general register.  */
9615       if (TARGET_SOFT_FLOAT
9616 	  /* Indirect calls in the normal 32bit ABI require all arguments
9617 	     to be passed in general registers.  */
9618 	  || (!TARGET_PORTABLE_RUNTIME
9619 	      && !TARGET_64BIT
9620 	      && !TARGET_ELF32
9621 	      && cum->indirect)
9622 	  /* If the parameter is not a scalar floating-point parameter,
9623 	     then it belongs in GPRs.  */
9624 	  || GET_MODE_CLASS (mode) != MODE_FLOAT
9625 	  /* Structure with single SFmode field belongs in GPR.  */
9626 	  || (type && AGGREGATE_TYPE_P (type)))
9627 	retval = gen_rtx_REG (mode, gpr_reg_base);
9628       else
9629 	retval = gen_rtx_REG (mode, fpr_reg_base);
9630     }
9631   return retval;
9632 }
9633 
9634 /* Arguments larger than one word are double word aligned.  */
9635 
9636 static unsigned int
pa_function_arg_boundary(machine_mode mode,const_tree type)9637 pa_function_arg_boundary (machine_mode mode, const_tree type)
9638 {
9639   bool singleword = (type
9640 		     ? (integer_zerop (TYPE_SIZE (type))
9641 			|| !TREE_CONSTANT (TYPE_SIZE (type))
9642 			|| int_size_in_bytes (type) <= UNITS_PER_WORD)
9643 		     : GET_MODE_SIZE (mode) <= UNITS_PER_WORD);
9644 
9645   return singleword ? PARM_BOUNDARY : MAX_PARM_BOUNDARY;
9646 }
9647 
9648 /* If this arg would be passed totally in registers or totally on the stack,
9649    then this routine should return zero.  */
9650 
9651 static int
pa_arg_partial_bytes(cumulative_args_t cum_v,const function_arg_info & arg)9652 pa_arg_partial_bytes (cumulative_args_t cum_v, const function_arg_info &arg)
9653 {
9654   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
9655   unsigned int max_arg_words = 8;
9656   unsigned int offset = 0;
9657 
9658   if (!TARGET_64BIT)
9659     return 0;
9660 
9661   if (pa_function_arg_size (arg.mode, arg.type) > 1 && (cum->words & 1))
9662     offset = 1;
9663 
9664   if (cum->words + offset + pa_function_arg_size (arg.mode, arg.type)
9665       <= max_arg_words)
9666     /* Arg fits fully into registers.  */
9667     return 0;
9668   else if (cum->words + offset >= max_arg_words)
9669     /* Arg fully on the stack.  */
9670     return 0;
9671   else
9672     /* Arg is split.  */
9673     return (max_arg_words - cum->words - offset) * UNITS_PER_WORD;
9674 }
9675 
9676 
9677 /* A get_unnamed_section callback for switching to the text section.
9678 
9679    This function is only used with SOM.  Because we don't support
9680    named subspaces, we can only create a new subspace or switch back
9681    to the default text subspace.  */
9682 
9683 static void
som_output_text_section_asm_op(const void * data ATTRIBUTE_UNUSED)9684 som_output_text_section_asm_op (const void *data ATTRIBUTE_UNUSED)
9685 {
9686   gcc_assert (TARGET_SOM);
9687   if (TARGET_GAS)
9688     {
9689       if (cfun && cfun->machine && !cfun->machine->in_nsubspa)
9690 	{
9691 	  /* We only want to emit a .nsubspa directive once at the
9692 	     start of the function.  */
9693 	  cfun->machine->in_nsubspa = 1;
9694 
9695 	  /* Create a new subspace for the text.  This provides
9696 	     better stub placement and one-only functions.  */
9697 	  if (cfun->decl
9698 	      && DECL_ONE_ONLY (cfun->decl)
9699 	      && !DECL_WEAK (cfun->decl))
9700 	    {
9701 	      output_section_asm_op ("\t.SPACE $TEXT$\n"
9702 				     "\t.NSUBSPA $CODE$,QUAD=0,ALIGN=8,"
9703 				     "ACCESS=44,SORT=24,COMDAT");
9704 	      return;
9705 	    }
9706 	}
9707       else
9708 	{
9709 	  /* There isn't a current function or the body of the current
9710 	     function has been completed.  So, we are changing to the
9711 	     text section to output debugging information.  Thus, we
9712 	     need to forget that we are in the text section so that
9713 	     varasm.c will call us when text_section is selected again.  */
9714 	  gcc_assert (!cfun || !cfun->machine
9715 		      || cfun->machine->in_nsubspa == 2);
9716 	  in_section = NULL;
9717 	}
9718       output_section_asm_op ("\t.SPACE $TEXT$\n\t.NSUBSPA $CODE$");
9719       return;
9720     }
9721   output_section_asm_op ("\t.SPACE $TEXT$\n\t.SUBSPA $CODE$");
9722 }
9723 
9724 /* A get_unnamed_section callback for switching to comdat data
9725    sections.  This function is only used with SOM.  */
9726 
9727 static void
som_output_comdat_data_section_asm_op(const void * data)9728 som_output_comdat_data_section_asm_op (const void *data)
9729 {
9730   in_section = NULL;
9731   output_section_asm_op (data);
9732 }
9733 
9734 /* Implement TARGET_ASM_INIT_SECTIONS.  */
9735 
9736 static void
pa_som_asm_init_sections(void)9737 pa_som_asm_init_sections (void)
9738 {
9739   text_section
9740     = get_unnamed_section (0, som_output_text_section_asm_op, NULL);
9741 
9742   /* SOM puts readonly data in the default $LIT$ subspace when PIC code
9743      is not being generated.  */
9744   som_readonly_data_section
9745     = get_unnamed_section (0, output_section_asm_op,
9746 			   "\t.SPACE $TEXT$\n\t.SUBSPA $LIT$");
9747 
9748   /* When secondary definitions are not supported, SOM makes readonly
9749      data one-only by creating a new $LIT$ subspace in $TEXT$ with
9750      the comdat flag.  */
9751   som_one_only_readonly_data_section
9752     = get_unnamed_section (0, som_output_comdat_data_section_asm_op,
9753 			   "\t.SPACE $TEXT$\n"
9754 			   "\t.NSUBSPA $LIT$,QUAD=0,ALIGN=8,"
9755 			   "ACCESS=0x2c,SORT=16,COMDAT");
9756 
9757 
9758   /* When secondary definitions are not supported, SOM makes data one-only
9759      by creating a new $DATA$ subspace in $PRIVATE$ with the comdat flag.  */
9760   som_one_only_data_section
9761     = get_unnamed_section (SECTION_WRITE,
9762 			   som_output_comdat_data_section_asm_op,
9763 			   "\t.SPACE $PRIVATE$\n"
9764 			   "\t.NSUBSPA $DATA$,QUAD=1,ALIGN=8,"
9765 			   "ACCESS=31,SORT=24,COMDAT");
9766 
9767   if (flag_tm)
9768     som_tm_clone_table_section
9769       = get_unnamed_section (0, output_section_asm_op,
9770 			     "\t.SPACE $PRIVATE$\n\t.SUBSPA $TM_CLONE_TABLE$");
9771 
9772   /* HPUX ld generates incorrect GOT entries for "T" fixups which
9773      reference data within the $TEXT$ space (for example constant
9774      strings in the $LIT$ subspace).
9775 
9776      The assemblers (GAS and HP as) both have problems with handling
9777      the difference of two symbols.  This is the other correct way to
9778      reference constant data during PIC code generation.
9779 
9780      Thus, we can't put constant data needing relocation in the $TEXT$
9781      space during PIC generation.
9782 
9783      Previously, we placed all constant data into the $DATA$ subspace
9784      when generating PIC code.  This reduces sharing, but it works
9785      correctly.  Now we rely on pa_reloc_rw_mask() for section selection.
9786      This puts constant data not needing relocation into the $TEXT$ space.  */
9787   readonly_data_section = som_readonly_data_section;
9788 
9789   /* We must not have a reference to an external symbol defined in a
9790      shared library in a readonly section, else the SOM linker will
9791      complain.
9792 
9793      So, we force exception information into the data section.  */
9794   exception_section = data_section;
9795 }
9796 
9797 /* Implement TARGET_ASM_TM_CLONE_TABLE_SECTION.  */
9798 
9799 static section *
pa_som_tm_clone_table_section(void)9800 pa_som_tm_clone_table_section (void)
9801 {
9802   return som_tm_clone_table_section;
9803 }
9804 
9805 /* On hpux10, the linker will give an error if we have a reference
9806    in the read-only data section to a symbol defined in a shared
9807    library.  Therefore, expressions that might require a reloc
9808    cannot be placed in the read-only data section.  */
9809 
9810 static section *
pa_select_section(tree exp,int reloc,unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)9811 pa_select_section (tree exp, int reloc,
9812 		   unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
9813 {
9814   if (TREE_CODE (exp) == VAR_DECL
9815       && TREE_READONLY (exp)
9816       && !TREE_THIS_VOLATILE (exp)
9817       && DECL_INITIAL (exp)
9818       && (DECL_INITIAL (exp) == error_mark_node
9819           || TREE_CONSTANT (DECL_INITIAL (exp)))
9820       && !(reloc & pa_reloc_rw_mask ()))
9821     {
9822       if (TARGET_SOM
9823 	  && DECL_ONE_ONLY (exp)
9824 	  && !DECL_WEAK (exp))
9825 	return som_one_only_readonly_data_section;
9826       else
9827 	return readonly_data_section;
9828     }
9829   else if (CONSTANT_CLASS_P (exp)
9830 	   && !(reloc & pa_reloc_rw_mask ()))
9831     return readonly_data_section;
9832   else if (TARGET_SOM
9833 	   && TREE_CODE (exp) == VAR_DECL
9834 	   && DECL_ONE_ONLY (exp)
9835 	   && !DECL_WEAK (exp))
9836     return som_one_only_data_section;
9837   else
9838     return data_section;
9839 }
9840 
9841 /* Implement pa_elf_select_rtx_section.  If X is a function label operand
9842    and the function is in a COMDAT group, place the plabel reference in the
9843    .data.rel.ro.local section.  The linker ignores references to symbols in
9844    discarded sections from this section.  */
9845 
9846 static section *
pa_elf_select_rtx_section(machine_mode mode,rtx x,unsigned HOST_WIDE_INT align)9847 pa_elf_select_rtx_section (machine_mode mode, rtx x,
9848 			   unsigned HOST_WIDE_INT align)
9849 {
9850   if (function_label_operand (x, VOIDmode))
9851     {
9852       tree decl = SYMBOL_REF_DECL (x);
9853 
9854       if (!decl || (DECL_P (decl) && DECL_COMDAT_GROUP (decl)))
9855 	return get_named_section (NULL, ".data.rel.ro.local", 1);
9856     }
9857 
9858   return default_elf_select_rtx_section (mode, x, align);
9859 }
9860 
9861 /* Implement pa_reloc_rw_mask.  */
9862 
9863 static int
pa_reloc_rw_mask(void)9864 pa_reloc_rw_mask (void)
9865 {
9866   if (flag_pic || (TARGET_SOM && !TARGET_HPUX_11))
9867     return 3;
9868 
9869   /* HP linker does not support global relocs in readonly memory.  */
9870   return TARGET_SOM ? 2 : 0;
9871 }
9872 
9873 static void
pa_globalize_label(FILE * stream,const char * name)9874 pa_globalize_label (FILE *stream, const char *name)
9875 {
9876   /* We only handle DATA objects here, functions are globalized in
9877      ASM_DECLARE_FUNCTION_NAME.  */
9878   if (! FUNCTION_NAME_P (name))
9879   {
9880     fputs ("\t.EXPORT ", stream);
9881     assemble_name (stream, name);
9882     fputs (",DATA\n", stream);
9883   }
9884 }
9885 
9886 /* Worker function for TARGET_STRUCT_VALUE_RTX.  */
9887 
9888 static rtx
pa_struct_value_rtx(tree fntype ATTRIBUTE_UNUSED,int incoming ATTRIBUTE_UNUSED)9889 pa_struct_value_rtx (tree fntype ATTRIBUTE_UNUSED,
9890 		     int incoming ATTRIBUTE_UNUSED)
9891 {
9892   return gen_rtx_REG (Pmode, PA_STRUCT_VALUE_REGNUM);
9893 }
9894 
9895 /* Worker function for TARGET_RETURN_IN_MEMORY.  */
9896 
9897 bool
pa_return_in_memory(const_tree type,const_tree fntype ATTRIBUTE_UNUSED)9898 pa_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
9899 {
9900   /* SOM ABI says that objects larger than 64 bits are returned in memory.
9901      PA64 ABI says that objects larger than 128 bits are returned in memory.
9902      Note, int_size_in_bytes can return -1 if the size of the object is
9903      variable or larger than the maximum value that can be expressed as
9904      a HOST_WIDE_INT.   It can also return zero for an empty type.  The
9905      simplest way to handle variable and empty types is to pass them in
9906      memory.  This avoids problems in defining the boundaries of argument
9907      slots, allocating registers, etc.  */
9908   return (int_size_in_bytes (type) > (TARGET_64BIT ? 16 : 8)
9909 	  || int_size_in_bytes (type) <= 0);
9910 }
9911 
9912 /* Structure to hold declaration and name of external symbols that are
9913    emitted by GCC.  We generate a vector of these symbols and output them
9914    at the end of the file if and only if SYMBOL_REF_REFERENCED_P is true.
9915    This avoids putting out names that are never really used.  */
9916 
9917 typedef struct GTY(()) extern_symbol
9918 {
9919   tree decl;
9920   const char *name;
9921 } extern_symbol;
9922 
9923 /* Define gc'd vector type for extern_symbol.  */
9924 
9925 /* Vector of extern_symbol pointers.  */
9926 static GTY(()) vec<extern_symbol, va_gc> *extern_symbols;
9927 
9928 #ifdef ASM_OUTPUT_EXTERNAL_REAL
9929 /* Mark DECL (name NAME) as an external reference (assembler output
9930    file FILE).  This saves the names to output at the end of the file
9931    if actually referenced.  */
9932 
9933 void
pa_hpux_asm_output_external(FILE * file,tree decl,const char * name)9934 pa_hpux_asm_output_external (FILE *file, tree decl, const char *name)
9935 {
9936   gcc_assert (file == asm_out_file);
9937   extern_symbol p = {decl, name};
9938   vec_safe_push (extern_symbols, p);
9939 }
9940 #endif
9941 
9942 /* Output text required at the end of an assembler file.
9943    This includes deferred plabels and .import directives for
9944    all external symbols that were actually referenced.  */
9945 
9946 static void
pa_file_end(void)9947 pa_file_end (void)
9948 {
9949 #ifdef ASM_OUTPUT_EXTERNAL_REAL
9950   unsigned int i;
9951   extern_symbol *p;
9952 
9953   if (!NO_DEFERRED_PROFILE_COUNTERS)
9954     output_deferred_profile_counters ();
9955 #endif
9956 
9957   output_deferred_plabels ();
9958 
9959 #ifdef ASM_OUTPUT_EXTERNAL_REAL
9960   for (i = 0; vec_safe_iterate (extern_symbols, i, &p); i++)
9961     {
9962       tree decl = p->decl;
9963 
9964       if (!TREE_ASM_WRITTEN (decl)
9965 	  && SYMBOL_REF_REFERENCED_P (XEXP (DECL_RTL (decl), 0)))
9966 	ASM_OUTPUT_EXTERNAL_REAL (asm_out_file, decl, p->name);
9967     }
9968 
9969   vec_free (extern_symbols);
9970 #endif
9971 
9972   if (NEED_INDICATE_EXEC_STACK)
9973     file_end_indicate_exec_stack ();
9974 }
9975 
9976 /* Implement TARGET_CAN_CHANGE_MODE_CLASS.  */
9977 
9978 static bool
pa_can_change_mode_class(machine_mode from,machine_mode to,reg_class_t rclass)9979 pa_can_change_mode_class (machine_mode from, machine_mode to,
9980 			  reg_class_t rclass)
9981 {
9982   if (from == to)
9983     return true;
9984 
9985   if (GET_MODE_SIZE (from) == GET_MODE_SIZE (to))
9986     return true;
9987 
9988   /* Reject changes to/from modes with zero size.  */
9989   if (!GET_MODE_SIZE (from) || !GET_MODE_SIZE (to))
9990     return false;
9991 
9992   /* Reject changes to/from complex and vector modes.  */
9993   if (COMPLEX_MODE_P (from) || VECTOR_MODE_P (from)
9994       || COMPLEX_MODE_P (to) || VECTOR_MODE_P (to))
9995     return false;
9996 
9997   /* There is no way to load QImode or HImode values directly from memory
9998      to a FP register.  SImode loads to the FP registers are not zero
9999      extended.  On the 64-bit target, this conflicts with the definition
10000      of LOAD_EXTEND_OP.  Thus, we reject all mode changes in the FP registers
10001      except for DImode to SImode on the 64-bit target.  It is handled by
10002      register renaming in pa_print_operand.  */
10003   if (MAYBE_FP_REG_CLASS_P (rclass))
10004     return TARGET_64BIT && from == DImode && to == SImode;
10005 
10006   /* TARGET_HARD_REGNO_MODE_OK places modes with sizes larger than a word
10007      in specific sets of registers.  Thus, we cannot allow changing
10008      to a larger mode when it's larger than a word.  */
10009   if (GET_MODE_SIZE (to) > UNITS_PER_WORD
10010       && GET_MODE_SIZE (to) > GET_MODE_SIZE (from))
10011     return false;
10012 
10013   return true;
10014 }
10015 
10016 /* Implement TARGET_MODES_TIEABLE_P.
10017 
10018    We should return FALSE for QImode and HImode because these modes
10019    are not ok in the floating-point registers.  However, this prevents
10020    tieing these modes to SImode and DImode in the general registers.
10021    So, this isn't a good idea.  We rely on TARGET_HARD_REGNO_MODE_OK and
10022    TARGET_CAN_CHANGE_MODE_CLASS to prevent these modes from being used
10023    in the floating-point registers.  */
10024 
10025 static bool
pa_modes_tieable_p(machine_mode mode1,machine_mode mode2)10026 pa_modes_tieable_p (machine_mode mode1, machine_mode mode2)
10027 {
10028   /* Don't tie modes in different classes.  */
10029   if (GET_MODE_CLASS (mode1) != GET_MODE_CLASS (mode2))
10030     return false;
10031 
10032   return true;
10033 }
10034 
10035 
10036 /* Length in units of the trampoline instruction code.  */
10037 
10038 #define TRAMPOLINE_CODE_SIZE (TARGET_64BIT ? 24 : (TARGET_PA_20 ? 36 : 48))
10039 
10040 
10041 /* Output assembler code for a block containing the constant parts
10042    of a trampoline, leaving space for the variable parts.\
10043 
10044    The trampoline sets the static chain pointer to STATIC_CHAIN_REGNUM
10045    and then branches to the specified routine.
10046 
10047    This code template is copied from text segment to stack location
10048    and then patched with pa_trampoline_init to contain valid values,
10049    and then entered as a subroutine.
10050 
10051    It is best to keep this as small as possible to avoid having to
10052    flush multiple lines in the cache.  */
10053 
10054 static void
pa_asm_trampoline_template(FILE * f)10055 pa_asm_trampoline_template (FILE *f)
10056 {
10057   if (!TARGET_64BIT)
10058     {
10059       if (TARGET_PA_20)
10060 	{
10061 	  fputs ("\tmfia	%r20\n", f);
10062 	  fputs ("\tldw		48(%r20),%r22\n", f);
10063 	  fputs ("\tcopy	%r22,%r21\n", f);
10064 	  fputs ("\tbb,>=,n	%r22,30,.+16\n", f);
10065 	  fputs ("\tdepwi	0,31,2,%r22\n", f);
10066 	  fputs ("\tldw		0(%r22),%r21\n", f);
10067 	  fputs ("\tldw		4(%r22),%r19\n", f);
10068 	  fputs ("\tbve		(%r21)\n", f);
10069 	  fputs ("\tldw		52(%r1),%r29\n", f);
10070 	  fputs ("\t.word	0\n", f);
10071 	  fputs ("\t.word	0\n", f);
10072 	  fputs ("\t.word	0\n", f);
10073 	}
10074       else
10075 	{
10076 	  if (ASSEMBLER_DIALECT == 0)
10077 	    {
10078 	      fputs ("\tbl	.+8,%r20\n", f);
10079 	      fputs ("\tdepi	0,31,2,%r20\n", f);
10080 	    }
10081 	  else
10082 	    {
10083 	      fputs ("\tb,l	.+8,%r20\n", f);
10084 	      fputs ("\tdepwi	0,31,2,%r20\n", f);
10085 	    }
10086 	  fputs ("\tldw		40(%r20),%r22\n", f);
10087 	  fputs ("\tcopy	%r22,%r21\n", f);
10088 	  fputs ("\tbb,>=,n	%r22,30,.+16\n", f);
10089 	  if (ASSEMBLER_DIALECT == 0)
10090 	    fputs ("\tdepi	0,31,2,%r22\n", f);
10091 	  else
10092 	    fputs ("\tdepwi	0,31,2,%r22\n", f);
10093 	  fputs ("\tldw		0(%r22),%r21\n", f);
10094 	  fputs ("\tldw		4(%r22),%r19\n", f);
10095 	  fputs ("\tldsid	(%r21),%r1\n", f);
10096 	  fputs ("\tmtsp	%r1,%sr0\n", f);
10097 	  fputs ("\tbe		0(%sr0,%r21)\n", f);
10098 	  fputs ("\tldw		44(%r20),%r29\n", f);
10099 	}
10100       fputs ("\t.word	0\n", f);
10101       fputs ("\t.word	0\n", f);
10102       fputs ("\t.word	0\n", f);
10103       fputs ("\t.word	0\n", f);
10104     }
10105   else
10106     {
10107       fputs ("\t.dword 0\n", f);
10108       fputs ("\t.dword 0\n", f);
10109       fputs ("\t.dword 0\n", f);
10110       fputs ("\t.dword 0\n", f);
10111       fputs ("\tmfia	%r31\n", f);
10112       fputs ("\tldd	24(%r31),%r27\n", f);
10113       fputs ("\tldd	32(%r31),%r31\n", f);
10114       fputs ("\tldd	16(%r27),%r1\n", f);
10115       fputs ("\tbve	(%r1)\n", f);
10116       fputs ("\tldd	24(%r27),%r27\n", f);
10117       fputs ("\t.dword 0  ; fptr\n", f);
10118       fputs ("\t.dword 0  ; static link\n", f);
10119     }
10120 }
10121 
10122 /* Emit RTL insns to initialize the variable parts of a trampoline.
10123    FNADDR is an RTX for the address of the function's pure code.
10124    CXT is an RTX for the static chain value for the function.
10125 
10126    Move the function address to the trampoline template at offset 48.
10127    Move the static chain value to trampoline template at offset 52.
10128    Move the trampoline address to trampoline template at offset 56.
10129    Move r19 to trampoline template at offset 60.  The latter two
10130    words create a plabel for the indirect call to the trampoline.
10131 
10132    A similar sequence is used for the 64-bit port but the plabel is
10133    at the beginning of the trampoline.
10134 
10135    Finally, the cache entries for the trampoline code are flushed.
10136    This is necessary to ensure that the trampoline instruction sequence
10137    is written to memory prior to any attempts at prefetching the code
10138    sequence.  */
10139 
10140 static void
pa_trampoline_init(rtx m_tramp,tree fndecl,rtx chain_value)10141 pa_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
10142 {
10143   rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
10144   rtx start_addr = gen_reg_rtx (Pmode);
10145   rtx end_addr = gen_reg_rtx (Pmode);
10146   rtx line_length = gen_reg_rtx (Pmode);
10147   rtx r_tramp, tmp;
10148 
10149   emit_block_move (m_tramp, assemble_trampoline_template (),
10150 		   GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
10151   r_tramp = force_reg (Pmode, XEXP (m_tramp, 0));
10152 
10153   if (!TARGET_64BIT)
10154     {
10155       tmp = adjust_address (m_tramp, Pmode, 48);
10156       emit_move_insn (tmp, fnaddr);
10157       tmp = adjust_address (m_tramp, Pmode, 52);
10158       emit_move_insn (tmp, chain_value);
10159 
10160       /* Create a fat pointer for the trampoline.  */
10161       tmp = adjust_address (m_tramp, Pmode, 56);
10162       emit_move_insn (tmp, r_tramp);
10163       tmp = adjust_address (m_tramp, Pmode, 60);
10164       emit_move_insn (tmp, gen_rtx_REG (Pmode, 19));
10165 
10166       /* fdc and fic only use registers for the address to flush,
10167 	 they do not accept integer displacements.  We align the
10168 	 start and end addresses to the beginning of their respective
10169 	 cache lines to minimize the number of lines flushed.  */
10170       emit_insn (gen_andsi3 (start_addr, r_tramp,
10171 			     GEN_INT (-MIN_CACHELINE_SIZE)));
10172       tmp = force_reg (Pmode, plus_constant (Pmode, r_tramp,
10173 					     TRAMPOLINE_CODE_SIZE-1));
10174       emit_insn (gen_andsi3 (end_addr, tmp,
10175 			     GEN_INT (-MIN_CACHELINE_SIZE)));
10176       emit_move_insn (line_length, GEN_INT (MIN_CACHELINE_SIZE));
10177       emit_insn (gen_dcacheflushsi (start_addr, end_addr, line_length));
10178       emit_insn (gen_icacheflushsi (start_addr, end_addr, line_length,
10179 				    gen_reg_rtx (Pmode),
10180 				    gen_reg_rtx (Pmode)));
10181     }
10182   else
10183     {
10184       tmp = adjust_address (m_tramp, Pmode, 56);
10185       emit_move_insn (tmp, fnaddr);
10186       tmp = adjust_address (m_tramp, Pmode, 64);
10187       emit_move_insn (tmp, chain_value);
10188 
10189       /* Create a fat pointer for the trampoline.  */
10190       tmp = adjust_address (m_tramp, Pmode, 16);
10191       emit_move_insn (tmp, force_reg (Pmode, plus_constant (Pmode,
10192 							    r_tramp, 32)));
10193       tmp = adjust_address (m_tramp, Pmode, 24);
10194       emit_move_insn (tmp, gen_rtx_REG (Pmode, 27));
10195 
10196       /* fdc and fic only use registers for the address to flush,
10197 	 they do not accept integer displacements.  We align the
10198 	 start and end addresses to the beginning of their respective
10199 	 cache lines to minimize the number of lines flushed.  */
10200       tmp = force_reg (Pmode, plus_constant (Pmode, r_tramp, 32));
10201       emit_insn (gen_anddi3 (start_addr, tmp,
10202 			     GEN_INT (-MIN_CACHELINE_SIZE)));
10203       tmp = force_reg (Pmode, plus_constant (Pmode, tmp,
10204 					     TRAMPOLINE_CODE_SIZE - 1));
10205       emit_insn (gen_anddi3 (end_addr, tmp,
10206 			     GEN_INT (-MIN_CACHELINE_SIZE)));
10207       emit_move_insn (line_length, GEN_INT (MIN_CACHELINE_SIZE));
10208       emit_insn (gen_dcacheflushdi (start_addr, end_addr, line_length));
10209       emit_insn (gen_icacheflushdi (start_addr, end_addr, line_length,
10210 				    gen_reg_rtx (Pmode),
10211 				    gen_reg_rtx (Pmode)));
10212     }
10213 
10214 #ifdef HAVE_ENABLE_EXECUTE_STACK
10215   emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
10216 		     LCT_NORMAL, VOIDmode, XEXP (m_tramp, 0), Pmode);
10217 #endif
10218 }
10219 
10220 /* Perform any machine-specific adjustment in the address of the trampoline.
10221    ADDR contains the address that was passed to pa_trampoline_init.
10222    Adjust the trampoline address to point to the plabel at offset 56.  */
10223 
10224 static rtx
pa_trampoline_adjust_address(rtx addr)10225 pa_trampoline_adjust_address (rtx addr)
10226 {
10227   if (!TARGET_64BIT)
10228     addr = memory_address (Pmode, plus_constant (Pmode, addr, 58));
10229   return addr;
10230 }
10231 
10232 static rtx
pa_delegitimize_address(rtx orig_x)10233 pa_delegitimize_address (rtx orig_x)
10234 {
10235   rtx x = delegitimize_mem_from_attrs (orig_x);
10236 
10237   if (GET_CODE (x) == LO_SUM
10238       && GET_CODE (XEXP (x, 1)) == UNSPEC
10239       && XINT (XEXP (x, 1), 1) == UNSPEC_DLTIND14R)
10240     return gen_const_mem (Pmode, XVECEXP (XEXP (x, 1), 0, 0));
10241   return x;
10242 }
10243 
10244 static rtx
pa_internal_arg_pointer(void)10245 pa_internal_arg_pointer (void)
10246 {
10247   /* The argument pointer and the hard frame pointer are the same in
10248      the 32-bit runtime, so we don't need a copy.  */
10249   if (TARGET_64BIT)
10250     return copy_to_reg (virtual_incoming_args_rtx);
10251   else
10252     return virtual_incoming_args_rtx;
10253 }
10254 
10255 /* Given FROM and TO register numbers, say whether this elimination is allowed.
10256    Frame pointer elimination is automatically handled.  */
10257 
10258 static bool
pa_can_eliminate(const int from,const int to)10259 pa_can_eliminate (const int from, const int to)
10260 {
10261   /* The argument cannot be eliminated in the 64-bit runtime.  */
10262   if (TARGET_64BIT && from == ARG_POINTER_REGNUM)
10263     return false;
10264 
10265   return (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
10266           ? ! frame_pointer_needed
10267           : true);
10268 }
10269 
10270 /* Define the offset between two registers, FROM to be eliminated and its
10271    replacement TO, at the start of a routine.  */
10272 HOST_WIDE_INT
pa_initial_elimination_offset(int from,int to)10273 pa_initial_elimination_offset (int from, int to)
10274 {
10275   HOST_WIDE_INT offset;
10276 
10277   if ((from == HARD_FRAME_POINTER_REGNUM || from == FRAME_POINTER_REGNUM)
10278       && to == STACK_POINTER_REGNUM)
10279     offset = -pa_compute_frame_size (get_frame_size (), 0);
10280   else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
10281     offset = 0;
10282   else
10283     gcc_unreachable ();
10284 
10285   return offset;
10286 }
10287 
10288 static void
pa_conditional_register_usage(void)10289 pa_conditional_register_usage (void)
10290 {
10291   int i;
10292 
10293   if (!TARGET_64BIT && !TARGET_PA_11)
10294     {
10295       for (i = 56; i <= FP_REG_LAST; i++)
10296 	fixed_regs[i] = call_used_regs[i] = 1;
10297       for (i = 33; i < 56; i += 2)
10298 	fixed_regs[i] = call_used_regs[i] = 1;
10299     }
10300   if (TARGET_DISABLE_FPREGS || TARGET_SOFT_FLOAT)
10301     {
10302       for (i = FP_REG_FIRST; i <= FP_REG_LAST; i++)
10303 	fixed_regs[i] = call_used_regs[i] = 1;
10304     }
10305   if (flag_pic)
10306     fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
10307 }
10308 
10309 /* Target hook for c_mode_for_suffix.  */
10310 
10311 static machine_mode
pa_c_mode_for_suffix(char suffix)10312 pa_c_mode_for_suffix (char suffix)
10313 {
10314   if (HPUX_LONG_DOUBLE_LIBRARY)
10315     {
10316       if (suffix == 'q')
10317 	return TFmode;
10318     }
10319 
10320   return VOIDmode;
10321 }
10322 
10323 /* Target hook for function_section.  */
10324 
10325 static section *
pa_function_section(tree decl,enum node_frequency freq,bool startup,bool exit)10326 pa_function_section (tree decl, enum node_frequency freq,
10327 		     bool startup, bool exit)
10328 {
10329   /* Put functions in text section if target doesn't have named sections.  */
10330   if (!targetm_common.have_named_sections)
10331     return text_section;
10332 
10333   /* Force nested functions into the same section as the containing
10334      function.  */
10335   if (decl
10336       && DECL_SECTION_NAME (decl) == NULL
10337       && DECL_CONTEXT (decl) != NULL_TREE
10338       && TREE_CODE (DECL_CONTEXT (decl)) == FUNCTION_DECL
10339       && DECL_SECTION_NAME (DECL_CONTEXT (decl)) == NULL)
10340     return function_section (DECL_CONTEXT (decl));
10341 
10342   /* Otherwise, use the default function section.  */
10343   return default_function_section (decl, freq, startup, exit);
10344 }
10345 
10346 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
10347 
10348    In 64-bit mode, we reject CONST_DOUBLES.  We also reject CONST_INTS
10349    that need more than three instructions to load prior to reload.  This
10350    limit is somewhat arbitrary.  It takes three instructions to load a
10351    CONST_INT from memory but two are memory accesses.  It may be better
10352    to increase the allowed range for CONST_INTS.  We may also be able
10353    to handle CONST_DOUBLES.  */
10354 
10355 static bool
pa_legitimate_constant_p(machine_mode mode,rtx x)10356 pa_legitimate_constant_p (machine_mode mode, rtx x)
10357 {
10358   if (GET_MODE_CLASS (mode) == MODE_FLOAT && x != CONST0_RTX (mode))
10359     return false;
10360 
10361   if (!NEW_HP_ASSEMBLER && !TARGET_GAS && GET_CODE (x) == LABEL_REF)
10362     return false;
10363 
10364   /* TLS_MODEL_GLOBAL_DYNAMIC and TLS_MODEL_LOCAL_DYNAMIC are not
10365      legitimate constants.  The other variants can't be handled by
10366      the move patterns after reload starts.  */
10367   if (tls_referenced_p (x))
10368     return false;
10369 
10370   if (TARGET_64BIT && GET_CODE (x) == CONST_DOUBLE)
10371     return false;
10372 
10373   if (TARGET_64BIT
10374       && HOST_BITS_PER_WIDE_INT > 32
10375       && GET_CODE (x) == CONST_INT
10376       && !reload_in_progress
10377       && !reload_completed
10378       && !LEGITIMATE_64BIT_CONST_INT_P (INTVAL (x))
10379       && !pa_cint_ok_for_move (UINTVAL (x)))
10380     return false;
10381 
10382   if (function_label_operand (x, mode))
10383     return false;
10384 
10385   return true;
10386 }
10387 
10388 /* Implement TARGET_SECTION_TYPE_FLAGS.  */
10389 
10390 static unsigned int
pa_section_type_flags(tree decl,const char * name,int reloc)10391 pa_section_type_flags (tree decl, const char *name, int reloc)
10392 {
10393   unsigned int flags;
10394 
10395   flags = default_section_type_flags (decl, name, reloc);
10396 
10397   /* Function labels are placed in the constant pool.  This can
10398      cause a section conflict if decls are put in ".data.rel.ro"
10399      or ".data.rel.ro.local" using the __attribute__ construct.  */
10400   if (strcmp (name, ".data.rel.ro") == 0
10401       || strcmp (name, ".data.rel.ro.local") == 0)
10402     flags |= SECTION_WRITE | SECTION_RELRO;
10403 
10404   return flags;
10405 }
10406 
10407 /* pa_legitimate_address_p recognizes an RTL expression that is a
10408    valid memory address for an instruction.  The MODE argument is the
10409    machine mode for the MEM expression that wants to use this address.
10410 
10411    On HP PA-RISC, the legitimate address forms are REG+SMALLINT,
10412    REG+REG, and REG+(REG*SCALE).  The indexed address forms are only
10413    available with floating point loads and stores, and integer loads.
10414    We get better code by allowing indexed addresses in the initial
10415    RTL generation.
10416 
10417    The acceptance of indexed addresses as legitimate implies that we
10418    must provide patterns for doing indexed integer stores, or the move
10419    expanders must force the address of an indexed store to a register.
10420    We have adopted the latter approach.
10421 
10422    Another function of pa_legitimate_address_p is to ensure that
10423    the base register is a valid pointer for indexed instructions.
10424    On targets that have non-equivalent space registers, we have to
10425    know at the time of assembler output which register in a REG+REG
10426    pair is the base register.  The REG_POINTER flag is sometimes lost
10427    in reload and the following passes, so it can't be relied on during
10428    code generation.  Thus, we either have to canonicalize the order
10429    of the registers in REG+REG indexed addresses, or treat REG+REG
10430    addresses separately and provide patterns for both permutations.
10431 
10432    The latter approach requires several hundred additional lines of
10433    code in pa.md.  The downside to canonicalizing is that a PLUS
10434    in the wrong order can't combine to form to make a scaled indexed
10435    memory operand.  As we won't need to canonicalize the operands if
10436    the REG_POINTER lossage can be fixed, it seems better canonicalize.
10437 
10438    We initially break out scaled indexed addresses in canonical order
10439    in pa_emit_move_sequence.  LEGITIMIZE_ADDRESS also canonicalizes
10440    scaled indexed addresses during RTL generation.  However, fold_rtx
10441    has its own opinion on how the operands of a PLUS should be ordered.
10442    If one of the operands is equivalent to a constant, it will make
10443    that operand the second operand.  As the base register is likely to
10444    be equivalent to a SYMBOL_REF, we have made it the second operand.
10445 
10446    pa_legitimate_address_p accepts REG+REG as legitimate when the
10447    operands are in the order INDEX+BASE on targets with non-equivalent
10448    space registers, and in any order on targets with equivalent space
10449    registers.  It accepts both MULT+BASE and BASE+MULT for scaled indexing.
10450 
10451    We treat a SYMBOL_REF as legitimate if it is part of the current
10452    function's constant-pool, because such addresses can actually be
10453    output as REG+SMALLINT.  */
10454 
10455 static bool
pa_legitimate_address_p(machine_mode mode,rtx x,bool strict)10456 pa_legitimate_address_p (machine_mode mode, rtx x, bool strict)
10457 {
10458   if ((REG_P (x)
10459        && (strict ? STRICT_REG_OK_FOR_BASE_P (x)
10460 		  : REG_OK_FOR_BASE_P (x)))
10461       || ((GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC
10462 	   || GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC)
10463 	  && REG_P (XEXP (x, 0))
10464 	  && (strict ? STRICT_REG_OK_FOR_BASE_P (XEXP (x, 0))
10465 		     : REG_OK_FOR_BASE_P (XEXP (x, 0)))))
10466     return true;
10467 
10468   if (GET_CODE (x) == PLUS)
10469     {
10470       rtx base, index;
10471 
10472       /* For REG+REG, the base register should be in XEXP (x, 1),
10473 	 so check it first.  */
10474       if (REG_P (XEXP (x, 1))
10475 	  && (strict ? STRICT_REG_OK_FOR_BASE_P (XEXP (x, 1))
10476 		     : REG_OK_FOR_BASE_P (XEXP (x, 1))))
10477 	base = XEXP (x, 1), index = XEXP (x, 0);
10478       else if (REG_P (XEXP (x, 0))
10479 	       && (strict ? STRICT_REG_OK_FOR_BASE_P (XEXP (x, 0))
10480 			  : REG_OK_FOR_BASE_P (XEXP (x, 0))))
10481 	base = XEXP (x, 0), index = XEXP (x, 1);
10482       else
10483 	return false;
10484 
10485       if (GET_CODE (index) == CONST_INT)
10486 	{
10487 	  if (INT_5_BITS (index))
10488 	    return true;
10489 
10490 	  /* When INT14_OK_STRICT is false, a secondary reload is needed
10491 	     to adjust the displacement of SImode and DImode floating point
10492 	     instructions but this may fail when the register also needs
10493 	     reloading.  So, we return false when STRICT is true.  We
10494 	     also reject long displacements for float mode addresses since
10495 	     the majority of accesses will use floating point instructions
10496 	     that don't support 14-bit offsets.  */
10497 	  if (!INT14_OK_STRICT
10498 	      && (strict || !(reload_in_progress || reload_completed))
10499 	      && mode != QImode
10500 	      && mode != HImode)
10501 	    return false;
10502 
10503 	  return base14_operand (index, mode);
10504 	}
10505 
10506       if (!TARGET_DISABLE_INDEXING
10507 	  /* Only accept the "canonical" INDEX+BASE operand order
10508 	     on targets with non-equivalent space registers.  */
10509 	  && (TARGET_NO_SPACE_REGS
10510 	      ? REG_P (index)
10511 	      : (base == XEXP (x, 1) && REG_P (index)
10512 		 && (reload_completed
10513 		     || (reload_in_progress && HARD_REGISTER_P (base))
10514 		     || REG_POINTER (base))
10515 		 && (reload_completed
10516 		     || (reload_in_progress && HARD_REGISTER_P (index))
10517 		     || !REG_POINTER (index))))
10518 	  && MODE_OK_FOR_UNSCALED_INDEXING_P (mode)
10519 	  && (strict ? STRICT_REG_OK_FOR_INDEX_P (index)
10520 		     : REG_OK_FOR_INDEX_P (index))
10521 	  && borx_reg_operand (base, Pmode)
10522 	  && borx_reg_operand (index, Pmode))
10523 	return true;
10524 
10525       if (!TARGET_DISABLE_INDEXING
10526 	  && GET_CODE (index) == MULT
10527 	  /* Only accept base operands with the REG_POINTER flag prior to
10528 	     reload on targets with non-equivalent space registers.  */
10529 	  && (TARGET_NO_SPACE_REGS
10530 	      || (base == XEXP (x, 1)
10531 		  && (reload_completed
10532 		      || (reload_in_progress && HARD_REGISTER_P (base))
10533 		      || REG_POINTER (base))))
10534 	  && REG_P (XEXP (index, 0))
10535 	  && GET_MODE (XEXP (index, 0)) == Pmode
10536 	  && MODE_OK_FOR_SCALED_INDEXING_P (mode)
10537 	  && (strict ? STRICT_REG_OK_FOR_INDEX_P (XEXP (index, 0))
10538 		     : REG_OK_FOR_INDEX_P (XEXP (index, 0)))
10539 	  && GET_CODE (XEXP (index, 1)) == CONST_INT
10540 	  && INTVAL (XEXP (index, 1))
10541 	     == (HOST_WIDE_INT) GET_MODE_SIZE (mode)
10542 	  && borx_reg_operand (base, Pmode))
10543 	return true;
10544 
10545       return false;
10546     }
10547 
10548   if (GET_CODE (x) == LO_SUM)
10549     {
10550       rtx y = XEXP (x, 0);
10551 
10552       if (GET_CODE (y) == SUBREG)
10553 	y = SUBREG_REG (y);
10554 
10555       if (REG_P (y)
10556 	  && (strict ? STRICT_REG_OK_FOR_BASE_P (y)
10557 		     : REG_OK_FOR_BASE_P (y)))
10558 	{
10559 	  /* Needed for -fPIC */
10560 	  if (mode == Pmode
10561 	      && GET_CODE (XEXP (x, 1)) == UNSPEC)
10562 	    return true;
10563 
10564 	  if (!INT14_OK_STRICT
10565 	      && (strict || !(reload_in_progress || reload_completed))
10566 	      && mode != QImode
10567 	      && mode != HImode)
10568 	    return false;
10569 
10570 	  if (CONSTANT_P (XEXP (x, 1)))
10571 	    return true;
10572 	}
10573       return false;
10574     }
10575 
10576   if (GET_CODE (x) == CONST_INT && INT_5_BITS (x))
10577     return true;
10578 
10579   return false;
10580 }
10581 
10582 /* Look for machine dependent ways to make the invalid address AD a
10583    valid address.
10584 
10585    For the PA, transform:
10586 
10587         memory(X + <large int>)
10588 
10589    into:
10590 
10591         if (<large int> & mask) >= 16
10592           Y = (<large int> & ~mask) + mask + 1  Round up.
10593         else
10594           Y = (<large int> & ~mask)             Round down.
10595         Z = X + Y
10596         memory (Z + (<large int> - Y));
10597 
10598    This makes reload inheritance and reload_cse work better since Z
10599    can be reused.
10600 
10601    There may be more opportunities to improve code with this hook.  */
10602 
10603 rtx
pa_legitimize_reload_address(rtx ad,machine_mode mode,int opnum,int type,int ind_levels ATTRIBUTE_UNUSED)10604 pa_legitimize_reload_address (rtx ad, machine_mode mode,
10605 			      int opnum, int type,
10606 			      int ind_levels ATTRIBUTE_UNUSED)
10607 {
10608   long offset, newoffset, mask;
10609   rtx new_rtx, temp = NULL_RTX;
10610 
10611   mask = (GET_MODE_CLASS (mode) == MODE_FLOAT
10612 	  && !INT14_OK_STRICT ? 0x1f : 0x3fff);
10613 
10614   if (optimize && GET_CODE (ad) == PLUS)
10615     temp = simplify_binary_operation (PLUS, Pmode,
10616 				      XEXP (ad, 0), XEXP (ad, 1));
10617 
10618   new_rtx = temp ? temp : ad;
10619 
10620   if (optimize
10621       && GET_CODE (new_rtx) == PLUS
10622       && GET_CODE (XEXP (new_rtx, 0)) == REG
10623       && GET_CODE (XEXP (new_rtx, 1)) == CONST_INT)
10624     {
10625       offset = INTVAL (XEXP ((new_rtx), 1));
10626 
10627       /* Choose rounding direction.  Round up if we are >= halfway.  */
10628       if ((offset & mask) >= ((mask + 1) / 2))
10629 	newoffset = (offset & ~mask) + mask + 1;
10630       else
10631 	newoffset = offset & ~mask;
10632 
10633       /* Ensure that long displacements are aligned.  */
10634       if (mask == 0x3fff
10635 	  && (GET_MODE_CLASS (mode) == MODE_FLOAT
10636 	      || (TARGET_64BIT && (mode) == DImode)))
10637 	newoffset &= ~(GET_MODE_SIZE (mode) - 1);
10638 
10639       if (newoffset != 0 && VAL_14_BITS_P (newoffset))
10640 	{
10641 	  temp = gen_rtx_PLUS (Pmode, XEXP (new_rtx, 0),
10642 			       GEN_INT (newoffset));
10643 	  ad = gen_rtx_PLUS (Pmode, temp, GEN_INT (offset - newoffset));
10644 	  push_reload (XEXP (ad, 0), 0, &XEXP (ad, 0), 0,
10645 		       BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
10646 		       opnum, (enum reload_type) type);
10647 	  return ad;
10648 	}
10649     }
10650 
10651   return NULL_RTX;
10652 }
10653 
10654 /* Output address vector.  */
10655 
10656 void
pa_output_addr_vec(rtx lab,rtx body)10657 pa_output_addr_vec (rtx lab, rtx body)
10658 {
10659   int idx, vlen = XVECLEN (body, 0);
10660 
10661   if (!TARGET_SOM)
10662     fputs ("\t.align 4\n", asm_out_file);
10663   targetm.asm_out.internal_label (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
10664   if (TARGET_GAS)
10665     fputs ("\t.begin_brtab\n", asm_out_file);
10666   for (idx = 0; idx < vlen; idx++)
10667     {
10668       ASM_OUTPUT_ADDR_VEC_ELT
10669 	(asm_out_file, CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 0, idx), 0)));
10670     }
10671   if (TARGET_GAS)
10672     fputs ("\t.end_brtab\n", asm_out_file);
10673 }
10674 
10675 /* Output address difference vector.  */
10676 
10677 void
pa_output_addr_diff_vec(rtx lab,rtx body)10678 pa_output_addr_diff_vec (rtx lab, rtx body)
10679 {
10680   rtx base = XEXP (XEXP (body, 0), 0);
10681   int idx, vlen = XVECLEN (body, 1);
10682 
10683   targetm.asm_out.internal_label (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
10684   if (TARGET_GAS)
10685     fputs ("\t.begin_brtab\n", asm_out_file);
10686   for (idx = 0; idx < vlen; idx++)
10687     {
10688       ASM_OUTPUT_ADDR_DIFF_ELT
10689 	(asm_out_file,
10690 	 body,
10691 	 CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 1, idx), 0)),
10692 	 CODE_LABEL_NUMBER (base));
10693     }
10694   if (TARGET_GAS)
10695     fputs ("\t.end_brtab\n", asm_out_file);
10696 }
10697 
10698 /* This is a helper function for the other atomic operations.  This function
10699    emits a loop that contains SEQ that iterates until a compare-and-swap
10700    operation at the end succeeds.  MEM is the memory to be modified.  SEQ is
10701    a set of instructions that takes a value from OLD_REG as an input and
10702    produces a value in NEW_REG as an output.  Before SEQ, OLD_REG will be
10703    set to the current contents of MEM.  After SEQ, a compare-and-swap will
10704    attempt to update MEM with NEW_REG.  The function returns true when the
10705    loop was generated successfully.  */
10706 
10707 static bool
pa_expand_compare_and_swap_loop(rtx mem,rtx old_reg,rtx new_reg,rtx seq)10708 pa_expand_compare_and_swap_loop (rtx mem, rtx old_reg, rtx new_reg, rtx seq)
10709 {
10710   machine_mode mode = GET_MODE (mem);
10711   rtx_code_label *label;
10712   rtx cmp_reg, success, oldval;
10713 
10714   /* The loop we want to generate looks like
10715 
10716         cmp_reg = mem;
10717       label:
10718         old_reg = cmp_reg;
10719         seq;
10720         (success, cmp_reg) = compare-and-swap(mem, old_reg, new_reg)
10721         if (success)
10722           goto label;
10723 
10724      Note that we only do the plain load from memory once.  Subsequent
10725      iterations use the value loaded by the compare-and-swap pattern.  */
10726 
10727   label = gen_label_rtx ();
10728   cmp_reg = gen_reg_rtx (mode);
10729 
10730   emit_move_insn (cmp_reg, mem);
10731   emit_label (label);
10732   emit_move_insn (old_reg, cmp_reg);
10733   if (seq)
10734     emit_insn (seq);
10735 
10736   success = NULL_RTX;
10737   oldval = cmp_reg;
10738   if (!expand_atomic_compare_and_swap (&success, &oldval, mem, old_reg,
10739                                        new_reg, false, MEMMODEL_SYNC_SEQ_CST,
10740                                        MEMMODEL_RELAXED))
10741     return false;
10742 
10743   if (oldval != cmp_reg)
10744     emit_move_insn (cmp_reg, oldval);
10745 
10746   /* Mark this jump predicted not taken.  */
10747   emit_cmp_and_jump_insns (success, const0_rtx, EQ, const0_rtx,
10748                            GET_MODE (success), 1, label,
10749 			   profile_probability::guessed_never ());
10750   return true;
10751 }
10752 
10753 /* This function tries to implement an atomic exchange operation using a
10754    compare_and_swap loop. VAL is written to *MEM.  The previous contents of
10755    *MEM are returned, using TARGET if possible.  No memory model is required
10756    since a compare_and_swap loop is seq-cst.  */
10757 
10758 rtx
pa_maybe_emit_compare_and_swap_exchange_loop(rtx target,rtx mem,rtx val)10759 pa_maybe_emit_compare_and_swap_exchange_loop (rtx target, rtx mem, rtx val)
10760 {
10761   machine_mode mode = GET_MODE (mem);
10762 
10763   if (can_compare_and_swap_p (mode, true))
10764     {
10765       if (!target || !register_operand (target, mode))
10766         target = gen_reg_rtx (mode);
10767       if (pa_expand_compare_and_swap_loop (mem, target, val, NULL_RTX))
10768         return target;
10769     }
10770 
10771   return NULL_RTX;
10772 }
10773 
10774 /* Implement TARGET_CALLEE_COPIES.  The callee is responsible for copying
10775    arguments passed by hidden reference in the 32-bit HP runtime.  Users
10776    can override this behavior for better compatibility with openmp at the
10777    risk of library incompatibilities.  Arguments are always passed by value
10778    in the 64-bit HP runtime.  */
10779 
10780 static bool
pa_callee_copies(cumulative_args_t,const function_arg_info &)10781 pa_callee_copies (cumulative_args_t, const function_arg_info &)
10782 {
10783   return !TARGET_CALLER_COPIES;
10784 }
10785 
10786 /* Implement TARGET_HARD_REGNO_NREGS.  */
10787 
10788 static unsigned int
pa_hard_regno_nregs(unsigned int regno ATTRIBUTE_UNUSED,machine_mode mode)10789 pa_hard_regno_nregs (unsigned int regno ATTRIBUTE_UNUSED, machine_mode mode)
10790 {
10791   return PA_HARD_REGNO_NREGS (regno, mode);
10792 }
10793 
10794 /* Implement TARGET_HARD_REGNO_MODE_OK.  */
10795 
10796 static bool
pa_hard_regno_mode_ok(unsigned int regno,machine_mode mode)10797 pa_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
10798 {
10799   return PA_HARD_REGNO_MODE_OK (regno, mode);
10800 }
10801 
10802 /* Implement TARGET_STARTING_FRAME_OFFSET.
10803 
10804    On the 32-bit ports, we reserve one slot for the previous frame
10805    pointer and one fill slot.  The fill slot is for compatibility
10806    with HP compiled programs.  On the 64-bit ports, we reserve one
10807    slot for the previous frame pointer.  */
10808 
10809 static HOST_WIDE_INT
pa_starting_frame_offset(void)10810 pa_starting_frame_offset (void)
10811 {
10812   return 8;
10813 }
10814 
10815 /* Figure out the size in words of the function argument.  The size
10816    returned by this function should always be greater than zero because
10817    we pass variable and zero sized objects by reference.  */
10818 
10819 HOST_WIDE_INT
pa_function_arg_size(machine_mode mode,const_tree type)10820 pa_function_arg_size (machine_mode mode, const_tree type)
10821 {
10822   HOST_WIDE_INT size;
10823 
10824   size = mode != BLKmode ? GET_MODE_SIZE (mode) : int_size_in_bytes (type);
10825   return CEIL (size, UNITS_PER_WORD);
10826 }
10827 
10828 #include "gt-pa.h"
10829