1 /* Subroutines for insn-output.c for HPPA.
2    Copyright (C) 1992-2018 Free Software Foundation, Inc.
3    Contributed by Tim Moore (moore@cs.utah.edu), based on sparc.c
4 
5 This file is part of GCC.
6 
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11 
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 GNU General Public License for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3.  If not see
19 <http://www.gnu.org/licenses/>.  */
20 
21 #define IN_TARGET_CODE 1
22 
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "memmodel.h"
27 #include "backend.h"
28 #include "target.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "df.h"
32 #include "tm_p.h"
33 #include "stringpool.h"
34 #include "attribs.h"
35 #include "optabs.h"
36 #include "regs.h"
37 #include "emit-rtl.h"
38 #include "recog.h"
39 #include "diagnostic-core.h"
40 #include "insn-attr.h"
41 #include "alias.h"
42 #include "fold-const.h"
43 #include "stor-layout.h"
44 #include "varasm.h"
45 #include "calls.h"
46 #include "output.h"
47 #include "except.h"
48 #include "explow.h"
49 #include "expr.h"
50 #include "reload.h"
51 #include "common/common-target.h"
52 #include "langhooks.h"
53 #include "cfgrtl.h"
54 #include "opts.h"
55 #include "builtins.h"
56 
57 /* This file should be included last.  */
58 #include "target-def.h"
59 
60 /* Return nonzero if there is a bypass for the output of
61    OUT_INSN and the fp store IN_INSN.  */
62 int
pa_fpstore_bypass_p(rtx_insn * out_insn,rtx_insn * in_insn)63 pa_fpstore_bypass_p (rtx_insn *out_insn, rtx_insn *in_insn)
64 {
65   machine_mode store_mode;
66   machine_mode other_mode;
67   rtx set;
68 
69   if (recog_memoized (in_insn) < 0
70       || (get_attr_type (in_insn) != TYPE_FPSTORE
71 	  && get_attr_type (in_insn) != TYPE_FPSTORE_LOAD)
72       || recog_memoized (out_insn) < 0)
73     return 0;
74 
75   store_mode = GET_MODE (SET_SRC (PATTERN (in_insn)));
76 
77   set = single_set (out_insn);
78   if (!set)
79     return 0;
80 
81   other_mode = GET_MODE (SET_SRC (set));
82 
83   return (GET_MODE_SIZE (store_mode) == GET_MODE_SIZE (other_mode));
84 }
85 
86 
87 #ifndef DO_FRAME_NOTES
88 #ifdef INCOMING_RETURN_ADDR_RTX
89 #define DO_FRAME_NOTES 1
90 #else
91 #define DO_FRAME_NOTES 0
92 #endif
93 #endif
94 
95 static void pa_option_override (void);
96 static void copy_reg_pointer (rtx, rtx);
97 static void fix_range (const char *);
98 static int hppa_register_move_cost (machine_mode mode, reg_class_t,
99 				    reg_class_t);
100 static int hppa_address_cost (rtx, machine_mode mode, addr_space_t, bool);
101 static bool hppa_rtx_costs (rtx, machine_mode, int, int, int *, bool);
102 static inline rtx force_mode (machine_mode, rtx);
103 static void pa_reorg (void);
104 static void pa_combine_instructions (void);
105 static int pa_can_combine_p (rtx_insn *, rtx_insn *, rtx_insn *, int, rtx,
106 			     rtx, rtx);
107 static bool forward_branch_p (rtx_insn *);
108 static void compute_zdepwi_operands (unsigned HOST_WIDE_INT, unsigned *);
109 static void compute_zdepdi_operands (unsigned HOST_WIDE_INT, unsigned *);
110 static int compute_movmem_length (rtx_insn *);
111 static int compute_clrmem_length (rtx_insn *);
112 static bool pa_assemble_integer (rtx, unsigned int, int);
113 static void remove_useless_addtr_insns (int);
114 static void store_reg (int, HOST_WIDE_INT, int);
115 static void store_reg_modify (int, int, HOST_WIDE_INT);
116 static void load_reg (int, HOST_WIDE_INT, int);
117 static void set_reg_plus_d (int, int, HOST_WIDE_INT, int);
118 static rtx pa_function_value (const_tree, const_tree, bool);
119 static rtx pa_libcall_value (machine_mode, const_rtx);
120 static bool pa_function_value_regno_p (const unsigned int);
121 static void pa_output_function_prologue (FILE *) ATTRIBUTE_UNUSED;
122 static void pa_linux_output_function_prologue (FILE *) ATTRIBUTE_UNUSED;
123 static void update_total_code_bytes (unsigned int);
124 static void pa_output_function_epilogue (FILE *);
125 static int pa_adjust_cost (rtx_insn *, int, rtx_insn *, int, unsigned int);
126 static int pa_issue_rate (void);
127 static int pa_reloc_rw_mask (void);
128 static void pa_som_asm_init_sections (void) ATTRIBUTE_UNUSED;
129 static section *pa_som_tm_clone_table_section (void) ATTRIBUTE_UNUSED;
130 static section *pa_select_section (tree, int, unsigned HOST_WIDE_INT)
131      ATTRIBUTE_UNUSED;
132 static void pa_encode_section_info (tree, rtx, int);
133 static const char *pa_strip_name_encoding (const char *);
134 static bool pa_function_ok_for_sibcall (tree, tree);
135 static void pa_globalize_label (FILE *, const char *)
136      ATTRIBUTE_UNUSED;
137 static void pa_asm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
138 				    HOST_WIDE_INT, tree);
139 #if !defined(USE_COLLECT2)
140 static void pa_asm_out_constructor (rtx, int);
141 static void pa_asm_out_destructor (rtx, int);
142 #endif
143 static void pa_init_builtins (void);
144 static rtx pa_expand_builtin (tree, rtx, rtx, machine_mode mode, int);
145 static rtx hppa_builtin_saveregs (void);
146 static void hppa_va_start (tree, rtx);
147 static tree hppa_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
148 static bool pa_scalar_mode_supported_p (scalar_mode);
149 static bool pa_commutative_p (const_rtx x, int outer_code);
150 static void copy_fp_args (rtx_insn *) ATTRIBUTE_UNUSED;
151 static int length_fp_args (rtx_insn *) ATTRIBUTE_UNUSED;
152 static rtx hppa_legitimize_address (rtx, rtx, machine_mode);
153 static inline void pa_file_start_level (void) ATTRIBUTE_UNUSED;
154 static inline void pa_file_start_space (int) ATTRIBUTE_UNUSED;
155 static inline void pa_file_start_file (int) ATTRIBUTE_UNUSED;
156 static inline void pa_file_start_mcount (const char*) ATTRIBUTE_UNUSED;
157 static void pa_elf_file_start (void) ATTRIBUTE_UNUSED;
158 static void pa_som_file_start (void) ATTRIBUTE_UNUSED;
159 static void pa_linux_file_start (void) ATTRIBUTE_UNUSED;
160 static void pa_hpux64_gas_file_start (void) ATTRIBUTE_UNUSED;
161 static void pa_hpux64_hpas_file_start (void) ATTRIBUTE_UNUSED;
162 static void output_deferred_plabels (void);
163 static void output_deferred_profile_counters (void) ATTRIBUTE_UNUSED;
164 static void pa_file_end (void);
165 static void pa_init_libfuncs (void);
166 static rtx pa_struct_value_rtx (tree, int);
167 static bool pa_pass_by_reference (cumulative_args_t, machine_mode,
168 				  const_tree, bool);
169 static int pa_arg_partial_bytes (cumulative_args_t, machine_mode,
170 				 tree, bool);
171 static void pa_function_arg_advance (cumulative_args_t, machine_mode,
172 				     const_tree, bool);
173 static rtx pa_function_arg (cumulative_args_t, machine_mode,
174 			    const_tree, bool);
175 static pad_direction pa_function_arg_padding (machine_mode, const_tree);
176 static unsigned int pa_function_arg_boundary (machine_mode, const_tree);
177 static struct machine_function * pa_init_machine_status (void);
178 static reg_class_t pa_secondary_reload (bool, rtx, reg_class_t,
179 					machine_mode,
180 					secondary_reload_info *);
181 static bool pa_secondary_memory_needed (machine_mode,
182 					reg_class_t, reg_class_t);
183 static void pa_extra_live_on_entry (bitmap);
184 static machine_mode pa_promote_function_mode (const_tree,
185 						   machine_mode, int *,
186 						   const_tree, int);
187 
188 static void pa_asm_trampoline_template (FILE *);
189 static void pa_trampoline_init (rtx, tree, rtx);
190 static rtx pa_trampoline_adjust_address (rtx);
191 static rtx pa_delegitimize_address (rtx);
192 static bool pa_print_operand_punct_valid_p (unsigned char);
193 static rtx pa_internal_arg_pointer (void);
194 static bool pa_can_eliminate (const int, const int);
195 static void pa_conditional_register_usage (void);
196 static machine_mode pa_c_mode_for_suffix (char);
197 static section *pa_function_section (tree, enum node_frequency, bool, bool);
198 static bool pa_cannot_force_const_mem (machine_mode, rtx);
199 static bool pa_legitimate_constant_p (machine_mode, rtx);
200 static unsigned int pa_section_type_flags (tree, const char *, int);
201 static bool pa_legitimate_address_p (machine_mode, rtx, bool);
202 static bool pa_callee_copies (cumulative_args_t, machine_mode,
203 			      const_tree, bool);
204 static unsigned int pa_hard_regno_nregs (unsigned int, machine_mode);
205 static bool pa_hard_regno_mode_ok (unsigned int, machine_mode);
206 static bool pa_modes_tieable_p (machine_mode, machine_mode);
207 static bool pa_can_change_mode_class (machine_mode, machine_mode, reg_class_t);
208 static HOST_WIDE_INT pa_starting_frame_offset (void);
209 
210 /* The following extra sections are only used for SOM.  */
211 static GTY(()) section *som_readonly_data_section;
212 static GTY(()) section *som_one_only_readonly_data_section;
213 static GTY(()) section *som_one_only_data_section;
214 static GTY(()) section *som_tm_clone_table_section;
215 
216 /* Counts for the number of callee-saved general and floating point
217    registers which were saved by the current function's prologue.  */
218 static int gr_saved, fr_saved;
219 
220 /* Boolean indicating whether the return pointer was saved by the
221    current function's prologue.  */
222 static bool rp_saved;
223 
224 static rtx find_addr_reg (rtx);
225 
226 /* Keep track of the number of bytes we have output in the CODE subspace
227    during this compilation so we'll know when to emit inline long-calls.  */
228 unsigned long total_code_bytes;
229 
230 /* The last address of the previous function plus the number of bytes in
231    associated thunks that have been output.  This is used to determine if
232    a thunk can use an IA-relative branch to reach its target function.  */
233 static unsigned int last_address;
234 
235 /* Variables to handle plabels that we discover are necessary at assembly
236    output time.  They are output after the current function.  */
237 struct GTY(()) deferred_plabel
238 {
239   rtx internal_label;
240   rtx symbol;
241 };
242 static GTY((length ("n_deferred_plabels"))) struct deferred_plabel *
243   deferred_plabels;
244 static size_t n_deferred_plabels = 0;
245 
246 /* Initialize the GCC target structure.  */
247 
248 #undef TARGET_OPTION_OVERRIDE
249 #define TARGET_OPTION_OVERRIDE pa_option_override
250 
251 #undef TARGET_ASM_ALIGNED_HI_OP
252 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
253 #undef TARGET_ASM_ALIGNED_SI_OP
254 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
255 #undef TARGET_ASM_ALIGNED_DI_OP
256 #define TARGET_ASM_ALIGNED_DI_OP "\t.dword\t"
257 #undef TARGET_ASM_UNALIGNED_HI_OP
258 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
259 #undef TARGET_ASM_UNALIGNED_SI_OP
260 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
261 #undef TARGET_ASM_UNALIGNED_DI_OP
262 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
263 #undef TARGET_ASM_INTEGER
264 #define TARGET_ASM_INTEGER pa_assemble_integer
265 
266 #undef TARGET_ASM_FUNCTION_EPILOGUE
267 #define TARGET_ASM_FUNCTION_EPILOGUE pa_output_function_epilogue
268 
269 #undef TARGET_FUNCTION_VALUE
270 #define TARGET_FUNCTION_VALUE pa_function_value
271 #undef TARGET_LIBCALL_VALUE
272 #define TARGET_LIBCALL_VALUE pa_libcall_value
273 #undef TARGET_FUNCTION_VALUE_REGNO_P
274 #define TARGET_FUNCTION_VALUE_REGNO_P pa_function_value_regno_p
275 
276 #undef TARGET_LEGITIMIZE_ADDRESS
277 #define TARGET_LEGITIMIZE_ADDRESS hppa_legitimize_address
278 
279 #undef TARGET_SCHED_ADJUST_COST
280 #define TARGET_SCHED_ADJUST_COST pa_adjust_cost
281 #undef TARGET_SCHED_ISSUE_RATE
282 #define TARGET_SCHED_ISSUE_RATE pa_issue_rate
283 
284 #undef TARGET_ENCODE_SECTION_INFO
285 #define TARGET_ENCODE_SECTION_INFO pa_encode_section_info
286 #undef TARGET_STRIP_NAME_ENCODING
287 #define TARGET_STRIP_NAME_ENCODING pa_strip_name_encoding
288 
289 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
290 #define TARGET_FUNCTION_OK_FOR_SIBCALL pa_function_ok_for_sibcall
291 
292 #undef TARGET_COMMUTATIVE_P
293 #define TARGET_COMMUTATIVE_P pa_commutative_p
294 
295 #undef TARGET_ASM_OUTPUT_MI_THUNK
296 #define TARGET_ASM_OUTPUT_MI_THUNK pa_asm_output_mi_thunk
297 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
298 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
299 
300 #undef TARGET_ASM_FILE_END
301 #define TARGET_ASM_FILE_END pa_file_end
302 
303 #undef TARGET_ASM_RELOC_RW_MASK
304 #define TARGET_ASM_RELOC_RW_MASK pa_reloc_rw_mask
305 
306 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
307 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P pa_print_operand_punct_valid_p
308 
309 #if !defined(USE_COLLECT2)
310 #undef TARGET_ASM_CONSTRUCTOR
311 #define TARGET_ASM_CONSTRUCTOR pa_asm_out_constructor
312 #undef TARGET_ASM_DESTRUCTOR
313 #define TARGET_ASM_DESTRUCTOR pa_asm_out_destructor
314 #endif
315 
316 #undef TARGET_INIT_BUILTINS
317 #define TARGET_INIT_BUILTINS pa_init_builtins
318 
319 #undef TARGET_EXPAND_BUILTIN
320 #define TARGET_EXPAND_BUILTIN pa_expand_builtin
321 
322 #undef TARGET_REGISTER_MOVE_COST
323 #define TARGET_REGISTER_MOVE_COST hppa_register_move_cost
324 #undef TARGET_RTX_COSTS
325 #define TARGET_RTX_COSTS hppa_rtx_costs
326 #undef TARGET_ADDRESS_COST
327 #define TARGET_ADDRESS_COST hppa_address_cost
328 
329 #undef TARGET_MACHINE_DEPENDENT_REORG
330 #define TARGET_MACHINE_DEPENDENT_REORG pa_reorg
331 
332 #undef TARGET_INIT_LIBFUNCS
333 #define TARGET_INIT_LIBFUNCS pa_init_libfuncs
334 
335 #undef TARGET_PROMOTE_FUNCTION_MODE
336 #define TARGET_PROMOTE_FUNCTION_MODE pa_promote_function_mode
337 #undef TARGET_PROMOTE_PROTOTYPES
338 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
339 
340 #undef TARGET_STRUCT_VALUE_RTX
341 #define TARGET_STRUCT_VALUE_RTX pa_struct_value_rtx
342 #undef TARGET_RETURN_IN_MEMORY
343 #define TARGET_RETURN_IN_MEMORY pa_return_in_memory
344 #undef TARGET_MUST_PASS_IN_STACK
345 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
346 #undef TARGET_PASS_BY_REFERENCE
347 #define TARGET_PASS_BY_REFERENCE pa_pass_by_reference
348 #undef TARGET_CALLEE_COPIES
349 #define TARGET_CALLEE_COPIES pa_callee_copies
350 #undef TARGET_ARG_PARTIAL_BYTES
351 #define TARGET_ARG_PARTIAL_BYTES pa_arg_partial_bytes
352 #undef TARGET_FUNCTION_ARG
353 #define TARGET_FUNCTION_ARG pa_function_arg
354 #undef TARGET_FUNCTION_ARG_ADVANCE
355 #define TARGET_FUNCTION_ARG_ADVANCE pa_function_arg_advance
356 #undef TARGET_FUNCTION_ARG_PADDING
357 #define TARGET_FUNCTION_ARG_PADDING pa_function_arg_padding
358 #undef TARGET_FUNCTION_ARG_BOUNDARY
359 #define TARGET_FUNCTION_ARG_BOUNDARY pa_function_arg_boundary
360 
361 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
362 #define TARGET_EXPAND_BUILTIN_SAVEREGS hppa_builtin_saveregs
363 #undef TARGET_EXPAND_BUILTIN_VA_START
364 #define TARGET_EXPAND_BUILTIN_VA_START hppa_va_start
365 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
366 #define TARGET_GIMPLIFY_VA_ARG_EXPR hppa_gimplify_va_arg_expr
367 
368 #undef TARGET_SCALAR_MODE_SUPPORTED_P
369 #define TARGET_SCALAR_MODE_SUPPORTED_P pa_scalar_mode_supported_p
370 
371 #undef TARGET_CANNOT_FORCE_CONST_MEM
372 #define TARGET_CANNOT_FORCE_CONST_MEM pa_cannot_force_const_mem
373 
374 #undef TARGET_SECONDARY_RELOAD
375 #define TARGET_SECONDARY_RELOAD pa_secondary_reload
376 #undef TARGET_SECONDARY_MEMORY_NEEDED
377 #define TARGET_SECONDARY_MEMORY_NEEDED pa_secondary_memory_needed
378 
379 #undef TARGET_EXTRA_LIVE_ON_ENTRY
380 #define TARGET_EXTRA_LIVE_ON_ENTRY pa_extra_live_on_entry
381 
382 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
383 #define TARGET_ASM_TRAMPOLINE_TEMPLATE pa_asm_trampoline_template
384 #undef TARGET_TRAMPOLINE_INIT
385 #define TARGET_TRAMPOLINE_INIT pa_trampoline_init
386 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
387 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS pa_trampoline_adjust_address
388 #undef TARGET_DELEGITIMIZE_ADDRESS
389 #define TARGET_DELEGITIMIZE_ADDRESS pa_delegitimize_address
390 #undef TARGET_INTERNAL_ARG_POINTER
391 #define TARGET_INTERNAL_ARG_POINTER pa_internal_arg_pointer
392 #undef TARGET_CAN_ELIMINATE
393 #define TARGET_CAN_ELIMINATE pa_can_eliminate
394 #undef TARGET_CONDITIONAL_REGISTER_USAGE
395 #define TARGET_CONDITIONAL_REGISTER_USAGE pa_conditional_register_usage
396 #undef TARGET_C_MODE_FOR_SUFFIX
397 #define TARGET_C_MODE_FOR_SUFFIX pa_c_mode_for_suffix
398 #undef TARGET_ASM_FUNCTION_SECTION
399 #define TARGET_ASM_FUNCTION_SECTION pa_function_section
400 
401 #undef TARGET_LEGITIMATE_CONSTANT_P
402 #define TARGET_LEGITIMATE_CONSTANT_P pa_legitimate_constant_p
403 #undef TARGET_SECTION_TYPE_FLAGS
404 #define TARGET_SECTION_TYPE_FLAGS pa_section_type_flags
405 #undef TARGET_LEGITIMATE_ADDRESS_P
406 #define TARGET_LEGITIMATE_ADDRESS_P pa_legitimate_address_p
407 
408 #undef TARGET_LRA_P
409 #define TARGET_LRA_P hook_bool_void_false
410 
411 #undef TARGET_HARD_REGNO_NREGS
412 #define TARGET_HARD_REGNO_NREGS pa_hard_regno_nregs
413 #undef TARGET_HARD_REGNO_MODE_OK
414 #define TARGET_HARD_REGNO_MODE_OK pa_hard_regno_mode_ok
415 #undef TARGET_MODES_TIEABLE_P
416 #define TARGET_MODES_TIEABLE_P pa_modes_tieable_p
417 
418 #undef TARGET_CAN_CHANGE_MODE_CLASS
419 #define TARGET_CAN_CHANGE_MODE_CLASS pa_can_change_mode_class
420 
421 #undef TARGET_CONSTANT_ALIGNMENT
422 #define TARGET_CONSTANT_ALIGNMENT constant_alignment_word_strings
423 
424 #undef TARGET_STARTING_FRAME_OFFSET
425 #define TARGET_STARTING_FRAME_OFFSET pa_starting_frame_offset
426 
427 struct gcc_target targetm = TARGET_INITIALIZER;
428 
429 /* Parse the -mfixed-range= option string.  */
430 
431 static void
fix_range(const char * const_str)432 fix_range (const char *const_str)
433 {
434   int i, first, last;
435   char *str, *dash, *comma;
436 
437   /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
438      REG2 are either register names or register numbers.  The effect
439      of this option is to mark the registers in the range from REG1 to
440      REG2 as ``fixed'' so they won't be used by the compiler.  This is
441      used, e.g., to ensure that kernel mode code doesn't use fr4-fr31.  */
442 
443   i = strlen (const_str);
444   str = (char *) alloca (i + 1);
445   memcpy (str, const_str, i + 1);
446 
447   while (1)
448     {
449       dash = strchr (str, '-');
450       if (!dash)
451 	{
452 	  warning (0, "value of -mfixed-range must have form REG1-REG2");
453 	  return;
454 	}
455       *dash = '\0';
456 
457       comma = strchr (dash + 1, ',');
458       if (comma)
459 	*comma = '\0';
460 
461       first = decode_reg_name (str);
462       if (first < 0)
463 	{
464 	  warning (0, "unknown register name: %s", str);
465 	  return;
466 	}
467 
468       last = decode_reg_name (dash + 1);
469       if (last < 0)
470 	{
471 	  warning (0, "unknown register name: %s", dash + 1);
472 	  return;
473 	}
474 
475       *dash = '-';
476 
477       if (first > last)
478 	{
479 	  warning (0, "%s-%s is an empty range", str, dash + 1);
480 	  return;
481 	}
482 
483       for (i = first; i <= last; ++i)
484 	fixed_regs[i] = call_used_regs[i] = 1;
485 
486       if (!comma)
487 	break;
488 
489       *comma = ',';
490       str = comma + 1;
491     }
492 
493   /* Check if all floating point registers have been fixed.  */
494   for (i = FP_REG_FIRST; i <= FP_REG_LAST; i++)
495     if (!fixed_regs[i])
496       break;
497 
498   if (i > FP_REG_LAST)
499     target_flags |= MASK_DISABLE_FPREGS;
500 }
501 
502 /* Implement the TARGET_OPTION_OVERRIDE hook.  */
503 
504 static void
pa_option_override(void)505 pa_option_override (void)
506 {
507   unsigned int i;
508   cl_deferred_option *opt;
509   vec<cl_deferred_option> *v
510     = (vec<cl_deferred_option> *) pa_deferred_options;
511 
512   if (v)
513     FOR_EACH_VEC_ELT (*v, i, opt)
514       {
515 	switch (opt->opt_index)
516 	  {
517 	  case OPT_mfixed_range_:
518 	    fix_range (opt->arg);
519 	    break;
520 
521 	  default:
522 	    gcc_unreachable ();
523 	  }
524       }
525 
526   if (flag_pic && TARGET_PORTABLE_RUNTIME)
527     {
528       warning (0, "PIC code generation is not supported in the portable runtime model");
529     }
530 
531   if (flag_pic && TARGET_FAST_INDIRECT_CALLS)
532    {
533       warning (0, "PIC code generation is not compatible with fast indirect calls");
534    }
535 
536   if (! TARGET_GAS && write_symbols != NO_DEBUG)
537     {
538       warning (0, "-g is only supported when using GAS on this processor,");
539       warning (0, "-g option disabled");
540       write_symbols = NO_DEBUG;
541     }
542 
543   /* We only support the "big PIC" model now.  And we always generate PIC
544      code when in 64bit mode.  */
545   if (flag_pic == 1 || TARGET_64BIT)
546     flag_pic = 2;
547 
548   /* Disable -freorder-blocks-and-partition as we don't support hot and
549      cold partitioning.  */
550   if (flag_reorder_blocks_and_partition)
551     {
552       inform (input_location,
553               "-freorder-blocks-and-partition does not work "
554               "on this architecture");
555       flag_reorder_blocks_and_partition = 0;
556       flag_reorder_blocks = 1;
557     }
558 
559   /* We can't guarantee that .dword is available for 32-bit targets.  */
560   if (UNITS_PER_WORD == 4)
561     targetm.asm_out.aligned_op.di = NULL;
562 
563   /* The unaligned ops are only available when using GAS.  */
564   if (!TARGET_GAS)
565     {
566       targetm.asm_out.unaligned_op.hi = NULL;
567       targetm.asm_out.unaligned_op.si = NULL;
568       targetm.asm_out.unaligned_op.di = NULL;
569     }
570 
571   init_machine_status = pa_init_machine_status;
572 }
573 
574 enum pa_builtins
575 {
576   PA_BUILTIN_COPYSIGNQ,
577   PA_BUILTIN_FABSQ,
578   PA_BUILTIN_INFQ,
579   PA_BUILTIN_HUGE_VALQ,
580   PA_BUILTIN_max
581 };
582 
583 static GTY(()) tree pa_builtins[(int) PA_BUILTIN_max];
584 
585 static void
pa_init_builtins(void)586 pa_init_builtins (void)
587 {
588 #ifdef DONT_HAVE_FPUTC_UNLOCKED
589   {
590     tree decl = builtin_decl_explicit (BUILT_IN_PUTC_UNLOCKED);
591     set_builtin_decl (BUILT_IN_FPUTC_UNLOCKED, decl,
592 		      builtin_decl_implicit_p (BUILT_IN_PUTC_UNLOCKED));
593   }
594 #endif
595 #if TARGET_HPUX_11
596   {
597     tree decl;
598 
599     if ((decl = builtin_decl_explicit (BUILT_IN_FINITE)) != NULL_TREE)
600       set_user_assembler_name (decl, "_Isfinite");
601     if ((decl = builtin_decl_explicit (BUILT_IN_FINITEF)) != NULL_TREE)
602       set_user_assembler_name (decl, "_Isfinitef");
603   }
604 #endif
605 
606   if (HPUX_LONG_DOUBLE_LIBRARY)
607     {
608       tree decl, ftype;
609 
610       /* Under HPUX, the __float128 type is a synonym for "long double".  */
611       (*lang_hooks.types.register_builtin_type) (long_double_type_node,
612 						 "__float128");
613 
614       /* TFmode support builtins.  */
615       ftype = build_function_type_list (long_double_type_node,
616 					long_double_type_node,
617 					NULL_TREE);
618       decl = add_builtin_function ("__builtin_fabsq", ftype,
619 				   PA_BUILTIN_FABSQ, BUILT_IN_MD,
620 				   "_U_Qfabs", NULL_TREE);
621       TREE_READONLY (decl) = 1;
622       pa_builtins[PA_BUILTIN_FABSQ] = decl;
623 
624       ftype = build_function_type_list (long_double_type_node,
625 					long_double_type_node,
626 					long_double_type_node,
627 					NULL_TREE);
628       decl = add_builtin_function ("__builtin_copysignq", ftype,
629 				   PA_BUILTIN_COPYSIGNQ, BUILT_IN_MD,
630 				   "_U_Qfcopysign", NULL_TREE);
631       TREE_READONLY (decl) = 1;
632       pa_builtins[PA_BUILTIN_COPYSIGNQ] = decl;
633 
634       ftype = build_function_type_list (long_double_type_node, NULL_TREE);
635       decl = add_builtin_function ("__builtin_infq", ftype,
636 				   PA_BUILTIN_INFQ, BUILT_IN_MD,
637 				   NULL, NULL_TREE);
638       pa_builtins[PA_BUILTIN_INFQ] = decl;
639 
640       decl = add_builtin_function ("__builtin_huge_valq", ftype,
641                                    PA_BUILTIN_HUGE_VALQ, BUILT_IN_MD,
642                                    NULL, NULL_TREE);
643       pa_builtins[PA_BUILTIN_HUGE_VALQ] = decl;
644     }
645 }
646 
647 static rtx
pa_expand_builtin(tree exp,rtx target,rtx subtarget ATTRIBUTE_UNUSED,machine_mode mode ATTRIBUTE_UNUSED,int ignore ATTRIBUTE_UNUSED)648 pa_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
649 		   machine_mode mode ATTRIBUTE_UNUSED,
650 		   int ignore ATTRIBUTE_UNUSED)
651 {
652   tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
653   unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
654 
655   switch (fcode)
656     {
657     case PA_BUILTIN_FABSQ:
658     case PA_BUILTIN_COPYSIGNQ:
659       return expand_call (exp, target, ignore);
660 
661     case PA_BUILTIN_INFQ:
662     case PA_BUILTIN_HUGE_VALQ:
663       {
664 	machine_mode target_mode = TYPE_MODE (TREE_TYPE (exp));
665 	REAL_VALUE_TYPE inf;
666 	rtx tmp;
667 
668 	real_inf (&inf);
669 	tmp = const_double_from_real_value (inf, target_mode);
670 
671 	tmp = validize_mem (force_const_mem (target_mode, tmp));
672 
673 	if (target == 0)
674 	  target = gen_reg_rtx (target_mode);
675 
676 	emit_move_insn (target, tmp);
677 	return target;
678       }
679 
680     default:
681       gcc_unreachable ();
682     }
683 
684   return NULL_RTX;
685 }
686 
687 /* Function to init struct machine_function.
688    This will be called, via a pointer variable,
689    from push_function_context.  */
690 
691 static struct machine_function *
pa_init_machine_status(void)692 pa_init_machine_status (void)
693 {
694   return ggc_cleared_alloc<machine_function> ();
695 }
696 
697 /* If FROM is a probable pointer register, mark TO as a probable
698    pointer register with the same pointer alignment as FROM.  */
699 
700 static void
copy_reg_pointer(rtx to,rtx from)701 copy_reg_pointer (rtx to, rtx from)
702 {
703   if (REG_POINTER (from))
704     mark_reg_pointer (to, REGNO_POINTER_ALIGN (REGNO (from)));
705 }
706 
707 /* Return 1 if X contains a symbolic expression.  We know these
708    expressions will have one of a few well defined forms, so
709    we need only check those forms.  */
710 int
pa_symbolic_expression_p(rtx x)711 pa_symbolic_expression_p (rtx x)
712 {
713 
714   /* Strip off any HIGH.  */
715   if (GET_CODE (x) == HIGH)
716     x = XEXP (x, 0);
717 
718   return symbolic_operand (x, VOIDmode);
719 }
720 
721 /* Accept any constant that can be moved in one instruction into a
722    general register.  */
723 int
pa_cint_ok_for_move(unsigned HOST_WIDE_INT ival)724 pa_cint_ok_for_move (unsigned HOST_WIDE_INT ival)
725 {
726   /* OK if ldo, ldil, or zdepi, can be used.  */
727   return (VAL_14_BITS_P (ival)
728 	  || pa_ldil_cint_p (ival)
729 	  || pa_zdepi_cint_p (ival));
730 }
731 
732 /* True iff ldil can be used to load this CONST_INT.  The least
733    significant 11 bits of the value must be zero and the value must
734    not change sign when extended from 32 to 64 bits.  */
735 int
pa_ldil_cint_p(unsigned HOST_WIDE_INT ival)736 pa_ldil_cint_p (unsigned HOST_WIDE_INT ival)
737 {
738   unsigned HOST_WIDE_INT x;
739 
740   x = ival & (((unsigned HOST_WIDE_INT) -1 << 31) | 0x7ff);
741   return x == 0 || x == ((unsigned HOST_WIDE_INT) -1 << 31);
742 }
743 
744 /* True iff zdepi can be used to generate this CONST_INT.
745    zdepi first sign extends a 5-bit signed number to a given field
746    length, then places this field anywhere in a zero.  */
747 int
pa_zdepi_cint_p(unsigned HOST_WIDE_INT x)748 pa_zdepi_cint_p (unsigned HOST_WIDE_INT x)
749 {
750   unsigned HOST_WIDE_INT lsb_mask, t;
751 
752   /* This might not be obvious, but it's at least fast.
753      This function is critical; we don't have the time loops would take.  */
754   lsb_mask = x & -x;
755   t = ((x >> 4) + lsb_mask) & ~(lsb_mask - 1);
756   /* Return true iff t is a power of two.  */
757   return ((t & (t - 1)) == 0);
758 }
759 
760 /* True iff depi or extru can be used to compute (reg & mask).
761    Accept bit pattern like these:
762    0....01....1
763    1....10....0
764    1..10..01..1  */
765 int
pa_and_mask_p(unsigned HOST_WIDE_INT mask)766 pa_and_mask_p (unsigned HOST_WIDE_INT mask)
767 {
768   mask = ~mask;
769   mask += mask & -mask;
770   return (mask & (mask - 1)) == 0;
771 }
772 
773 /* True iff depi can be used to compute (reg | MASK).  */
774 int
pa_ior_mask_p(unsigned HOST_WIDE_INT mask)775 pa_ior_mask_p (unsigned HOST_WIDE_INT mask)
776 {
777   mask += mask & -mask;
778   return (mask & (mask - 1)) == 0;
779 }
780 
781 /* Legitimize PIC addresses.  If the address is already
782    position-independent, we return ORIG.  Newly generated
783    position-independent addresses go to REG.  If we need more
784    than one register, we lose.  */
785 
786 static rtx
legitimize_pic_address(rtx orig,machine_mode mode,rtx reg)787 legitimize_pic_address (rtx orig, machine_mode mode, rtx reg)
788 {
789   rtx pic_ref = orig;
790 
791   gcc_assert (!PA_SYMBOL_REF_TLS_P (orig));
792 
793   /* Labels need special handling.  */
794   if (pic_label_operand (orig, mode))
795     {
796       rtx_insn *insn;
797 
798       /* We do not want to go through the movXX expanders here since that
799 	 would create recursion.
800 
801 	 Nor do we really want to call a generator for a named pattern
802 	 since that requires multiple patterns if we want to support
803 	 multiple word sizes.
804 
805 	 So instead we just emit the raw set, which avoids the movXX
806 	 expanders completely.  */
807       mark_reg_pointer (reg, BITS_PER_UNIT);
808       insn = emit_insn (gen_rtx_SET (reg, orig));
809 
810       /* Put a REG_EQUAL note on this insn, so that it can be optimized.  */
811       add_reg_note (insn, REG_EQUAL, orig);
812 
813       /* During and after reload, we need to generate a REG_LABEL_OPERAND note
814 	 and update LABEL_NUSES because this is not done automatically.  */
815       if (reload_in_progress || reload_completed)
816 	{
817 	  /* Extract LABEL_REF.  */
818 	  if (GET_CODE (orig) == CONST)
819 	    orig = XEXP (XEXP (orig, 0), 0);
820 	  /* Extract CODE_LABEL.  */
821 	  orig = XEXP (orig, 0);
822 	  add_reg_note (insn, REG_LABEL_OPERAND, orig);
823 	  /* Make sure we have label and not a note.  */
824 	  if (LABEL_P (orig))
825 	    LABEL_NUSES (orig)++;
826 	}
827       crtl->uses_pic_offset_table = 1;
828       return reg;
829     }
830   if (GET_CODE (orig) == SYMBOL_REF)
831     {
832       rtx_insn *insn;
833       rtx tmp_reg;
834 
835       gcc_assert (reg);
836 
837       /* Before reload, allocate a temporary register for the intermediate
838 	 result.  This allows the sequence to be deleted when the final
839 	 result is unused and the insns are trivially dead.  */
840       tmp_reg = ((reload_in_progress || reload_completed)
841 		 ? reg : gen_reg_rtx (Pmode));
842 
843       if (function_label_operand (orig, VOIDmode))
844 	{
845 	  /* Force function label into memory in word mode.  */
846 	  orig = XEXP (force_const_mem (word_mode, orig), 0);
847 	  /* Load plabel address from DLT.  */
848 	  emit_move_insn (tmp_reg,
849 			  gen_rtx_PLUS (word_mode, pic_offset_table_rtx,
850 					gen_rtx_HIGH (word_mode, orig)));
851 	  pic_ref
852 	    = gen_const_mem (Pmode,
853 			     gen_rtx_LO_SUM (Pmode, tmp_reg,
854 					     gen_rtx_UNSPEC (Pmode,
855 						         gen_rtvec (1, orig),
856 						         UNSPEC_DLTIND14R)));
857 	  emit_move_insn (reg, pic_ref);
858 	  /* Now load address of function descriptor.  */
859 	  pic_ref = gen_rtx_MEM (Pmode, reg);
860 	}
861       else
862 	{
863 	  /* Load symbol reference from DLT.  */
864 	  emit_move_insn (tmp_reg,
865 			  gen_rtx_PLUS (word_mode, pic_offset_table_rtx,
866 					gen_rtx_HIGH (word_mode, orig)));
867 	  pic_ref
868 	    = gen_const_mem (Pmode,
869 			     gen_rtx_LO_SUM (Pmode, tmp_reg,
870 					     gen_rtx_UNSPEC (Pmode,
871 						         gen_rtvec (1, orig),
872 						         UNSPEC_DLTIND14R)));
873 	}
874 
875       crtl->uses_pic_offset_table = 1;
876       mark_reg_pointer (reg, BITS_PER_UNIT);
877       insn = emit_move_insn (reg, pic_ref);
878 
879       /* Put a REG_EQUAL note on this insn, so that it can be optimized.  */
880       set_unique_reg_note (insn, REG_EQUAL, orig);
881 
882       return reg;
883     }
884   else if (GET_CODE (orig) == CONST)
885     {
886       rtx base;
887 
888       if (GET_CODE (XEXP (orig, 0)) == PLUS
889 	  && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
890 	return orig;
891 
892       gcc_assert (reg);
893       gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
894 
895       base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
896       orig = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
897 				     base == reg ? 0 : reg);
898 
899       if (GET_CODE (orig) == CONST_INT)
900 	{
901 	  if (INT_14_BITS (orig))
902 	    return plus_constant (Pmode, base, INTVAL (orig));
903 	  orig = force_reg (Pmode, orig);
904 	}
905       pic_ref = gen_rtx_PLUS (Pmode, base, orig);
906       /* Likewise, should we set special REG_NOTEs here?  */
907     }
908 
909   return pic_ref;
910 }
911 
912 static GTY(()) rtx gen_tls_tga;
913 
914 static rtx
gen_tls_get_addr(void)915 gen_tls_get_addr (void)
916 {
917   if (!gen_tls_tga)
918     gen_tls_tga = init_one_libfunc ("__tls_get_addr");
919   return gen_tls_tga;
920 }
921 
922 static rtx
hppa_tls_call(rtx arg)923 hppa_tls_call (rtx arg)
924 {
925   rtx ret;
926 
927   ret = gen_reg_rtx (Pmode);
928   emit_library_call_value (gen_tls_get_addr (), ret,
929 			   LCT_CONST, Pmode, arg, Pmode);
930 
931   return ret;
932 }
933 
934 static rtx
legitimize_tls_address(rtx addr)935 legitimize_tls_address (rtx addr)
936 {
937   rtx ret, tmp, t1, t2, tp;
938   rtx_insn *insn;
939 
940   /* Currently, we can't handle anything but a SYMBOL_REF.  */
941   if (GET_CODE (addr) != SYMBOL_REF)
942     return addr;
943 
944   switch (SYMBOL_REF_TLS_MODEL (addr))
945     {
946       case TLS_MODEL_GLOBAL_DYNAMIC:
947 	tmp = gen_reg_rtx (Pmode);
948 	if (flag_pic)
949 	  emit_insn (gen_tgd_load_pic (tmp, addr));
950 	else
951 	  emit_insn (gen_tgd_load (tmp, addr));
952 	ret = hppa_tls_call (tmp);
953 	break;
954 
955       case TLS_MODEL_LOCAL_DYNAMIC:
956 	ret = gen_reg_rtx (Pmode);
957 	tmp = gen_reg_rtx (Pmode);
958 	start_sequence ();
959 	if (flag_pic)
960 	  emit_insn (gen_tld_load_pic (tmp, addr));
961 	else
962 	  emit_insn (gen_tld_load (tmp, addr));
963 	t1 = hppa_tls_call (tmp);
964 	insn = get_insns ();
965 	end_sequence ();
966 	t2 = gen_reg_rtx (Pmode);
967 	emit_libcall_block (insn, t2, t1,
968 			    gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
969 				            UNSPEC_TLSLDBASE));
970 	emit_insn (gen_tld_offset_load (ret, addr, t2));
971 	break;
972 
973       case TLS_MODEL_INITIAL_EXEC:
974 	tp = gen_reg_rtx (Pmode);
975 	tmp = gen_reg_rtx (Pmode);
976 	ret = gen_reg_rtx (Pmode);
977 	emit_insn (gen_tp_load (tp));
978 	if (flag_pic)
979 	  emit_insn (gen_tie_load_pic (tmp, addr));
980 	else
981 	  emit_insn (gen_tie_load (tmp, addr));
982 	emit_move_insn (ret, gen_rtx_PLUS (Pmode, tp, tmp));
983 	break;
984 
985       case TLS_MODEL_LOCAL_EXEC:
986 	tp = gen_reg_rtx (Pmode);
987 	ret = gen_reg_rtx (Pmode);
988 	emit_insn (gen_tp_load (tp));
989 	emit_insn (gen_tle_load (ret, addr, tp));
990 	break;
991 
992       default:
993 	gcc_unreachable ();
994     }
995 
996   return ret;
997 }
998 
999 /* Helper for hppa_legitimize_address.  Given X, return true if it
1000    is a left shift by 1, 2 or 3 positions or a multiply by 2, 4 or 8.
1001 
1002    This respectively represent canonical shift-add rtxs or scaled
1003    memory addresses.  */
1004 static bool
mem_shadd_or_shadd_rtx_p(rtx x)1005 mem_shadd_or_shadd_rtx_p (rtx x)
1006 {
1007   return ((GET_CODE (x) == ASHIFT
1008 	   || GET_CODE (x) == MULT)
1009 	  && GET_CODE (XEXP (x, 1)) == CONST_INT
1010 	  && ((GET_CODE (x) == ASHIFT
1011 	       && pa_shadd_constant_p (INTVAL (XEXP (x, 1))))
1012 	      || (GET_CODE (x) == MULT
1013 		  && pa_mem_shadd_constant_p (INTVAL (XEXP (x, 1))))));
1014 }
1015 
1016 /* Try machine-dependent ways of modifying an illegitimate address
1017    to be legitimate.  If we find one, return the new, valid address.
1018    This macro is used in only one place: `memory_address' in explow.c.
1019 
1020    OLDX is the address as it was before break_out_memory_refs was called.
1021    In some cases it is useful to look at this to decide what needs to be done.
1022 
1023    It is always safe for this macro to do nothing.  It exists to recognize
1024    opportunities to optimize the output.
1025 
1026    For the PA, transform:
1027 
1028 	memory(X + <large int>)
1029 
1030    into:
1031 
1032 	if (<large int> & mask) >= 16
1033 	  Y = (<large int> & ~mask) + mask + 1	Round up.
1034 	else
1035 	  Y = (<large int> & ~mask)		Round down.
1036 	Z = X + Y
1037 	memory (Z + (<large int> - Y));
1038 
1039    This is for CSE to find several similar references, and only use one Z.
1040 
1041    X can either be a SYMBOL_REF or REG, but because combine cannot
1042    perform a 4->2 combination we do nothing for SYMBOL_REF + D where
1043    D will not fit in 14 bits.
1044 
1045    MODE_FLOAT references allow displacements which fit in 5 bits, so use
1046    0x1f as the mask.
1047 
1048    MODE_INT references allow displacements which fit in 14 bits, so use
1049    0x3fff as the mask.
1050 
1051    This relies on the fact that most mode MODE_FLOAT references will use FP
1052    registers and most mode MODE_INT references will use integer registers.
1053    (In the rare case of an FP register used in an integer MODE, we depend
1054    on secondary reloads to clean things up.)
1055 
1056 
1057    It is also beneficial to handle (plus (mult (X) (Y)) (Z)) in a special
1058    manner if Y is 2, 4, or 8.  (allows more shadd insns and shifted indexed
1059    addressing modes to be used).
1060 
1061    Note that the addresses passed into hppa_legitimize_address always
1062    come from a MEM, so we only have to match the MULT form on incoming
1063    addresses.  But to be future proof we also match the ASHIFT form.
1064 
1065    However, this routine always places those shift-add sequences into
1066    registers, so we have to generate the ASHIFT form as our output.
1067 
1068    Put X and Z into registers.  Then put the entire expression into
1069    a register.  */
1070 
1071 rtx
hppa_legitimize_address(rtx x,rtx oldx ATTRIBUTE_UNUSED,machine_mode mode)1072 hppa_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
1073 			 machine_mode mode)
1074 {
1075   rtx orig = x;
1076 
1077   /* We need to canonicalize the order of operands in unscaled indexed
1078      addresses since the code that checks if an address is valid doesn't
1079      always try both orders.  */
1080   if (!TARGET_NO_SPACE_REGS
1081       && GET_CODE (x) == PLUS
1082       && GET_MODE (x) == Pmode
1083       && REG_P (XEXP (x, 0))
1084       && REG_P (XEXP (x, 1))
1085       && REG_POINTER (XEXP (x, 0))
1086       && !REG_POINTER (XEXP (x, 1)))
1087     return gen_rtx_PLUS (Pmode, XEXP (x, 1), XEXP (x, 0));
1088 
1089   if (tls_referenced_p (x))
1090     return legitimize_tls_address (x);
1091   else if (flag_pic)
1092     return legitimize_pic_address (x, mode, gen_reg_rtx (Pmode));
1093 
1094   /* Strip off CONST.  */
1095   if (GET_CODE (x) == CONST)
1096     x = XEXP (x, 0);
1097 
1098   /* Special case.  Get the SYMBOL_REF into a register and use indexing.
1099      That should always be safe.  */
1100   if (GET_CODE (x) == PLUS
1101       && GET_CODE (XEXP (x, 0)) == REG
1102       && GET_CODE (XEXP (x, 1)) == SYMBOL_REF)
1103     {
1104       rtx reg = force_reg (Pmode, XEXP (x, 1));
1105       return force_reg (Pmode, gen_rtx_PLUS (Pmode, reg, XEXP (x, 0)));
1106     }
1107 
1108   /* Note we must reject symbols which represent function addresses
1109      since the assembler/linker can't handle arithmetic on plabels.  */
1110   if (GET_CODE (x) == PLUS
1111       && GET_CODE (XEXP (x, 1)) == CONST_INT
1112       && ((GET_CODE (XEXP (x, 0)) == SYMBOL_REF
1113 	   && !FUNCTION_NAME_P (XSTR (XEXP (x, 0), 0)))
1114 	  || GET_CODE (XEXP (x, 0)) == REG))
1115     {
1116       rtx int_part, ptr_reg;
1117       int newoffset;
1118       int offset = INTVAL (XEXP (x, 1));
1119       int mask;
1120 
1121       mask = (GET_MODE_CLASS (mode) == MODE_FLOAT
1122 	      && !INT14_OK_STRICT ? 0x1f : 0x3fff);
1123 
1124       /* Choose which way to round the offset.  Round up if we
1125 	 are >= halfway to the next boundary.  */
1126       if ((offset & mask) >= ((mask + 1) / 2))
1127 	newoffset = (offset & ~ mask) + mask + 1;
1128       else
1129 	newoffset = (offset & ~ mask);
1130 
1131       /* If the newoffset will not fit in 14 bits (ldo), then
1132 	 handling this would take 4 or 5 instructions (2 to load
1133 	 the SYMBOL_REF + 1 or 2 to load the newoffset + 1 to
1134 	 add the new offset and the SYMBOL_REF.)  Combine can
1135 	 not handle 4->2 or 5->2 combinations, so do not create
1136 	 them.  */
1137       if (! VAL_14_BITS_P (newoffset)
1138 	  && GET_CODE (XEXP (x, 0)) == SYMBOL_REF)
1139 	{
1140 	  rtx const_part = plus_constant (Pmode, XEXP (x, 0), newoffset);
1141 	  rtx tmp_reg
1142 	    = force_reg (Pmode,
1143 			 gen_rtx_HIGH (Pmode, const_part));
1144 	  ptr_reg
1145 	    = force_reg (Pmode,
1146 			 gen_rtx_LO_SUM (Pmode,
1147 					 tmp_reg, const_part));
1148 	}
1149       else
1150 	{
1151 	  if (! VAL_14_BITS_P (newoffset))
1152 	    int_part = force_reg (Pmode, GEN_INT (newoffset));
1153 	  else
1154 	    int_part = GEN_INT (newoffset);
1155 
1156 	  ptr_reg = force_reg (Pmode,
1157 			       gen_rtx_PLUS (Pmode,
1158 					     force_reg (Pmode, XEXP (x, 0)),
1159 					     int_part));
1160 	}
1161       return plus_constant (Pmode, ptr_reg, offset - newoffset);
1162     }
1163 
1164   /* Handle (plus (mult (a) (mem_shadd_constant)) (b)).  */
1165 
1166   if (GET_CODE (x) == PLUS
1167       && mem_shadd_or_shadd_rtx_p (XEXP (x, 0))
1168       && (OBJECT_P (XEXP (x, 1))
1169 	  || GET_CODE (XEXP (x, 1)) == SUBREG)
1170       && GET_CODE (XEXP (x, 1)) != CONST)
1171     {
1172       /* If we were given a MULT, we must fix the constant
1173 	 as we're going to create the ASHIFT form.  */
1174       int shift_val = INTVAL (XEXP (XEXP (x, 0), 1));
1175       if (GET_CODE (XEXP (x, 0)) == MULT)
1176 	shift_val = exact_log2 (shift_val);
1177 
1178       rtx reg1, reg2;
1179       reg1 = XEXP (x, 1);
1180       if (GET_CODE (reg1) != REG)
1181 	reg1 = force_reg (Pmode, force_operand (reg1, 0));
1182 
1183       reg2 = XEXP (XEXP (x, 0), 0);
1184       if (GET_CODE (reg2) != REG)
1185         reg2 = force_reg (Pmode, force_operand (reg2, 0));
1186 
1187       return force_reg (Pmode,
1188 			gen_rtx_PLUS (Pmode,
1189 				      gen_rtx_ASHIFT (Pmode, reg2,
1190 						      GEN_INT (shift_val)),
1191 				      reg1));
1192     }
1193 
1194   /* Similarly for (plus (plus (mult (a) (mem_shadd_constant)) (b)) (c)).
1195 
1196      Only do so for floating point modes since this is more speculative
1197      and we lose if it's an integer store.  */
1198   if (GET_CODE (x) == PLUS
1199       && GET_CODE (XEXP (x, 0)) == PLUS
1200       && mem_shadd_or_shadd_rtx_p (XEXP (XEXP (x, 0), 0))
1201       && (mode == SFmode || mode == DFmode))
1202     {
1203       int shift_val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
1204 
1205       /* If we were given a MULT, we must fix the constant
1206 	 as we're going to create the ASHIFT form.  */
1207       if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT)
1208 	shift_val = exact_log2 (shift_val);
1209 
1210       /* Try and figure out what to use as a base register.  */
1211       rtx reg1, reg2, base, idx;
1212 
1213       reg1 = XEXP (XEXP (x, 0), 1);
1214       reg2 = XEXP (x, 1);
1215       base = NULL_RTX;
1216       idx = NULL_RTX;
1217 
1218       /* Make sure they're both regs.  If one was a SYMBOL_REF [+ const],
1219 	 then pa_emit_move_sequence will turn on REG_POINTER so we'll know
1220 	 it's a base register below.  */
1221       if (GET_CODE (reg1) != REG)
1222 	reg1 = force_reg (Pmode, force_operand (reg1, 0));
1223 
1224       if (GET_CODE (reg2) != REG)
1225 	reg2 = force_reg (Pmode, force_operand (reg2, 0));
1226 
1227       /* Figure out what the base and index are.  */
1228 
1229       if (GET_CODE (reg1) == REG
1230 	  && REG_POINTER (reg1))
1231 	{
1232 	  base = reg1;
1233 	  idx = gen_rtx_PLUS (Pmode,
1234 			      gen_rtx_ASHIFT (Pmode,
1235 					      XEXP (XEXP (XEXP (x, 0), 0), 0),
1236 					      GEN_INT (shift_val)),
1237 			      XEXP (x, 1));
1238 	}
1239       else if (GET_CODE (reg2) == REG
1240 	       && REG_POINTER (reg2))
1241 	{
1242 	  base = reg2;
1243 	  idx = XEXP (x, 0);
1244 	}
1245 
1246       if (base == 0)
1247 	return orig;
1248 
1249       /* If the index adds a large constant, try to scale the
1250 	 constant so that it can be loaded with only one insn.  */
1251       if (GET_CODE (XEXP (idx, 1)) == CONST_INT
1252 	  && VAL_14_BITS_P (INTVAL (XEXP (idx, 1))
1253 			    / INTVAL (XEXP (XEXP (idx, 0), 1)))
1254 	  && INTVAL (XEXP (idx, 1)) % INTVAL (XEXP (XEXP (idx, 0), 1)) == 0)
1255 	{
1256 	  /* Divide the CONST_INT by the scale factor, then add it to A.  */
1257 	  int val = INTVAL (XEXP (idx, 1));
1258 	  val /= (1 << shift_val);
1259 
1260 	  reg1 = XEXP (XEXP (idx, 0), 0);
1261 	  if (GET_CODE (reg1) != REG)
1262 	    reg1 = force_reg (Pmode, force_operand (reg1, 0));
1263 
1264 	  reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, reg1, GEN_INT (val)));
1265 
1266 	  /* We can now generate a simple scaled indexed address.  */
1267 	  return
1268 	    force_reg
1269 	      (Pmode, gen_rtx_PLUS (Pmode,
1270 				    gen_rtx_ASHIFT (Pmode, reg1,
1271 						    GEN_INT (shift_val)),
1272 				    base));
1273 	}
1274 
1275       /* If B + C is still a valid base register, then add them.  */
1276       if (GET_CODE (XEXP (idx, 1)) == CONST_INT
1277 	  && INTVAL (XEXP (idx, 1)) <= 4096
1278 	  && INTVAL (XEXP (idx, 1)) >= -4096)
1279 	{
1280 	  rtx reg1, reg2;
1281 
1282 	  reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, XEXP (idx, 1)));
1283 
1284 	  reg2 = XEXP (XEXP (idx, 0), 0);
1285 	  if (GET_CODE (reg2) != CONST_INT)
1286 	    reg2 = force_reg (Pmode, force_operand (reg2, 0));
1287 
1288 	  return force_reg (Pmode,
1289 			    gen_rtx_PLUS (Pmode,
1290 					  gen_rtx_ASHIFT (Pmode, reg2,
1291 							  GEN_INT (shift_val)),
1292 					  reg1));
1293 	}
1294 
1295       /* Get the index into a register, then add the base + index and
1296 	 return a register holding the result.  */
1297 
1298       /* First get A into a register.  */
1299       reg1 = XEXP (XEXP (idx, 0), 0);
1300       if (GET_CODE (reg1) != REG)
1301 	reg1 = force_reg (Pmode, force_operand (reg1, 0));
1302 
1303       /* And get B into a register.  */
1304       reg2 = XEXP (idx, 1);
1305       if (GET_CODE (reg2) != REG)
1306 	reg2 = force_reg (Pmode, force_operand (reg2, 0));
1307 
1308       reg1 = force_reg (Pmode,
1309 			gen_rtx_PLUS (Pmode,
1310 				      gen_rtx_ASHIFT (Pmode, reg1,
1311 						      GEN_INT (shift_val)),
1312 				      reg2));
1313 
1314       /* Add the result to our base register and return.  */
1315       return force_reg (Pmode, gen_rtx_PLUS (Pmode, base, reg1));
1316 
1317     }
1318 
1319   /* Uh-oh.  We might have an address for x[n-100000].  This needs
1320      special handling to avoid creating an indexed memory address
1321      with x-100000 as the base.
1322 
1323      If the constant part is small enough, then it's still safe because
1324      there is a guard page at the beginning and end of the data segment.
1325 
1326      Scaled references are common enough that we want to try and rearrange the
1327      terms so that we can use indexing for these addresses too.  Only
1328      do the optimization for floatint point modes.  */
1329 
1330   if (GET_CODE (x) == PLUS
1331       && pa_symbolic_expression_p (XEXP (x, 1)))
1332     {
1333       /* Ugly.  We modify things here so that the address offset specified
1334 	 by the index expression is computed first, then added to x to form
1335 	 the entire address.  */
1336 
1337       rtx regx1, regx2, regy1, regy2, y;
1338 
1339       /* Strip off any CONST.  */
1340       y = XEXP (x, 1);
1341       if (GET_CODE (y) == CONST)
1342 	y = XEXP (y, 0);
1343 
1344       if (GET_CODE (y) == PLUS || GET_CODE (y) == MINUS)
1345 	{
1346 	  /* See if this looks like
1347 		(plus (mult (reg) (mem_shadd_const))
1348 		      (const (plus (symbol_ref) (const_int))))
1349 
1350 	     Where const_int is small.  In that case the const
1351 	     expression is a valid pointer for indexing.
1352 
1353 	     If const_int is big, but can be divided evenly by shadd_const
1354 	     and added to (reg).  This allows more scaled indexed addresses.  */
1355 	  if (GET_CODE (XEXP (y, 0)) == SYMBOL_REF
1356 	      && mem_shadd_or_shadd_rtx_p (XEXP (x, 0))
1357 	      && GET_CODE (XEXP (y, 1)) == CONST_INT
1358 	      && INTVAL (XEXP (y, 1)) >= -4096
1359 	      && INTVAL (XEXP (y, 1)) <= 4095)
1360 	    {
1361 	      int shift_val = INTVAL (XEXP (XEXP (x, 0), 1));
1362 
1363 	      /* If we were given a MULT, we must fix the constant
1364 		 as we're going to create the ASHIFT form.  */
1365 	      if (GET_CODE (XEXP (x, 0)) == MULT)
1366 		shift_val = exact_log2 (shift_val);
1367 
1368 	      rtx reg1, reg2;
1369 
1370 	      reg1 = XEXP (x, 1);
1371 	      if (GET_CODE (reg1) != REG)
1372 		reg1 = force_reg (Pmode, force_operand (reg1, 0));
1373 
1374 	      reg2 = XEXP (XEXP (x, 0), 0);
1375 	      if (GET_CODE (reg2) != REG)
1376 	        reg2 = force_reg (Pmode, force_operand (reg2, 0));
1377 
1378 	      return
1379 		force_reg (Pmode,
1380 			   gen_rtx_PLUS (Pmode,
1381 					 gen_rtx_ASHIFT (Pmode,
1382 							 reg2,
1383 							 GEN_INT (shift_val)),
1384 					 reg1));
1385 	    }
1386 	  else if ((mode == DFmode || mode == SFmode)
1387 		   && GET_CODE (XEXP (y, 0)) == SYMBOL_REF
1388 		   && mem_shadd_or_shadd_rtx_p (XEXP (x, 0))
1389 		   && GET_CODE (XEXP (y, 1)) == CONST_INT
1390 		   && INTVAL (XEXP (y, 1)) % (1 << INTVAL (XEXP (XEXP (x, 0), 1))) == 0)
1391 	    {
1392 	      int shift_val = INTVAL (XEXP (XEXP (x, 0), 1));
1393 
1394 	      /* If we were given a MULT, we must fix the constant
1395 		 as we're going to create the ASHIFT form.  */
1396 	      if (GET_CODE (XEXP (x, 0)) == MULT)
1397 		shift_val = exact_log2 (shift_val);
1398 
1399 	      regx1
1400 		= force_reg (Pmode, GEN_INT (INTVAL (XEXP (y, 1))
1401 					     / INTVAL (XEXP (XEXP (x, 0), 1))));
1402 	      regx2 = XEXP (XEXP (x, 0), 0);
1403 	      if (GET_CODE (regx2) != REG)
1404 		regx2 = force_reg (Pmode, force_operand (regx2, 0));
1405 	      regx2 = force_reg (Pmode, gen_rtx_fmt_ee (GET_CODE (y), Pmode,
1406 							regx2, regx1));
1407 	      return
1408 		force_reg (Pmode,
1409 			   gen_rtx_PLUS (Pmode,
1410 					 gen_rtx_ASHIFT (Pmode, regx2,
1411 						         GEN_INT (shift_val)),
1412 					 force_reg (Pmode, XEXP (y, 0))));
1413 	    }
1414 	  else if (GET_CODE (XEXP (y, 1)) == CONST_INT
1415 		   && INTVAL (XEXP (y, 1)) >= -4096
1416 		   && INTVAL (XEXP (y, 1)) <= 4095)
1417 	    {
1418 	      /* This is safe because of the guard page at the
1419 		 beginning and end of the data space.  Just
1420 		 return the original address.  */
1421 	      return orig;
1422 	    }
1423 	  else
1424 	    {
1425 	      /* Doesn't look like one we can optimize.  */
1426 	      regx1 = force_reg (Pmode, force_operand (XEXP (x, 0), 0));
1427 	      regy1 = force_reg (Pmode, force_operand (XEXP (y, 0), 0));
1428 	      regy2 = force_reg (Pmode, force_operand (XEXP (y, 1), 0));
1429 	      regx1 = force_reg (Pmode,
1430 				 gen_rtx_fmt_ee (GET_CODE (y), Pmode,
1431 						 regx1, regy2));
1432 	      return force_reg (Pmode, gen_rtx_PLUS (Pmode, regx1, regy1));
1433 	    }
1434 	}
1435     }
1436 
1437   return orig;
1438 }
1439 
1440 /* Implement the TARGET_REGISTER_MOVE_COST hook.
1441 
1442    Compute extra cost of moving data between one register class
1443    and another.
1444 
1445    Make moves from SAR so expensive they should never happen.  We used to
1446    have 0xffff here, but that generates overflow in rare cases.
1447 
1448    Copies involving a FP register and a non-FP register are relatively
1449    expensive because they must go through memory.
1450 
1451    Other copies are reasonably cheap.  */
1452 
1453 static int
hppa_register_move_cost(machine_mode mode ATTRIBUTE_UNUSED,reg_class_t from,reg_class_t to)1454 hppa_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
1455 			 reg_class_t from, reg_class_t to)
1456 {
1457   if (from == SHIFT_REGS)
1458     return 0x100;
1459   else if (to == SHIFT_REGS && FP_REG_CLASS_P (from))
1460     return 18;
1461   else if ((FP_REG_CLASS_P (from) && ! FP_REG_CLASS_P (to))
1462            || (FP_REG_CLASS_P (to) && ! FP_REG_CLASS_P (from)))
1463     return 16;
1464   else
1465     return 2;
1466 }
1467 
1468 /* For the HPPA, REG and REG+CONST is cost 0
1469    and addresses involving symbolic constants are cost 2.
1470 
1471    PIC addresses are very expensive.
1472 
1473    It is no coincidence that this has the same structure
1474    as pa_legitimate_address_p.  */
1475 
1476 static int
hppa_address_cost(rtx X,machine_mode mode ATTRIBUTE_UNUSED,addr_space_t as ATTRIBUTE_UNUSED,bool speed ATTRIBUTE_UNUSED)1477 hppa_address_cost (rtx X, machine_mode mode ATTRIBUTE_UNUSED,
1478 		   addr_space_t as ATTRIBUTE_UNUSED,
1479 		   bool speed ATTRIBUTE_UNUSED)
1480 {
1481   switch (GET_CODE (X))
1482     {
1483     case REG:
1484     case PLUS:
1485     case LO_SUM:
1486       return 1;
1487     case HIGH:
1488       return 2;
1489     default:
1490       return 4;
1491     }
1492 }
1493 
1494 /* Compute a (partial) cost for rtx X.  Return true if the complete
1495    cost has been computed, and false if subexpressions should be
1496    scanned.  In either case, *TOTAL contains the cost result.  */
1497 
1498 static bool
hppa_rtx_costs(rtx x,machine_mode mode,int outer_code,int opno ATTRIBUTE_UNUSED,int * total,bool speed ATTRIBUTE_UNUSED)1499 hppa_rtx_costs (rtx x, machine_mode mode, int outer_code,
1500 		int opno ATTRIBUTE_UNUSED,
1501 		int *total, bool speed ATTRIBUTE_UNUSED)
1502 {
1503   int factor;
1504   int code = GET_CODE (x);
1505 
1506   switch (code)
1507     {
1508     case CONST_INT:
1509       if (INTVAL (x) == 0)
1510 	*total = 0;
1511       else if (INT_14_BITS (x))
1512 	*total = 1;
1513       else
1514 	*total = 2;
1515       return true;
1516 
1517     case HIGH:
1518       *total = 2;
1519       return true;
1520 
1521     case CONST:
1522     case LABEL_REF:
1523     case SYMBOL_REF:
1524       *total = 4;
1525       return true;
1526 
1527     case CONST_DOUBLE:
1528       if ((x == CONST0_RTX (DFmode) || x == CONST0_RTX (SFmode))
1529 	  && outer_code != SET)
1530 	*total = 0;
1531       else
1532         *total = 8;
1533       return true;
1534 
1535     case MULT:
1536       if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1537 	{
1538 	  *total = COSTS_N_INSNS (3);
1539 	  return true;
1540 	}
1541 
1542       /* A mode size N times larger than SImode needs O(N*N) more insns.  */
1543       factor = GET_MODE_SIZE (mode) / 4;
1544       if (factor == 0)
1545 	factor = 1;
1546 
1547       if (TARGET_PA_11 && !TARGET_DISABLE_FPREGS && !TARGET_SOFT_FLOAT)
1548 	*total = factor * factor * COSTS_N_INSNS (8);
1549       else
1550 	*total = factor * factor * COSTS_N_INSNS (20);
1551       return true;
1552 
1553     case DIV:
1554       if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1555 	{
1556 	  *total = COSTS_N_INSNS (14);
1557 	  return true;
1558 	}
1559       /* FALLTHRU */
1560 
1561     case UDIV:
1562     case MOD:
1563     case UMOD:
1564       /* A mode size N times larger than SImode needs O(N*N) more insns.  */
1565       factor = GET_MODE_SIZE (mode) / 4;
1566       if (factor == 0)
1567 	factor = 1;
1568 
1569       *total = factor * factor * COSTS_N_INSNS (60);
1570       return true;
1571 
1572     case PLUS: /* this includes shNadd insns */
1573     case MINUS:
1574       if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1575 	{
1576 	  *total = COSTS_N_INSNS (3);
1577 	  return true;
1578 	}
1579 
1580       /* A size N times larger than UNITS_PER_WORD needs N times as
1581 	 many insns, taking N times as long.  */
1582       factor = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
1583       if (factor == 0)
1584 	factor = 1;
1585       *total = factor * COSTS_N_INSNS (1);
1586       return true;
1587 
1588     case ASHIFT:
1589     case ASHIFTRT:
1590     case LSHIFTRT:
1591       *total = COSTS_N_INSNS (1);
1592       return true;
1593 
1594     default:
1595       return false;
1596     }
1597 }
1598 
1599 /* Ensure mode of ORIG, a REG rtx, is MODE.  Returns either ORIG or a
1600    new rtx with the correct mode.  */
1601 static inline rtx
force_mode(machine_mode mode,rtx orig)1602 force_mode (machine_mode mode, rtx orig)
1603 {
1604   if (mode == GET_MODE (orig))
1605     return orig;
1606 
1607   gcc_assert (REGNO (orig) < FIRST_PSEUDO_REGISTER);
1608 
1609   return gen_rtx_REG (mode, REGNO (orig));
1610 }
1611 
1612 /* Implement TARGET_CANNOT_FORCE_CONST_MEM.  */
1613 
1614 static bool
pa_cannot_force_const_mem(machine_mode mode ATTRIBUTE_UNUSED,rtx x)1615 pa_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
1616 {
1617   return tls_referenced_p (x);
1618 }
1619 
1620 /* Emit insns to move operands[1] into operands[0].
1621 
1622    Return 1 if we have written out everything that needs to be done to
1623    do the move.  Otherwise, return 0 and the caller will emit the move
1624    normally.
1625 
1626    Note SCRATCH_REG may not be in the proper mode depending on how it
1627    will be used.  This routine is responsible for creating a new copy
1628    of SCRATCH_REG in the proper mode.  */
1629 
1630 int
pa_emit_move_sequence(rtx * operands,machine_mode mode,rtx scratch_reg)1631 pa_emit_move_sequence (rtx *operands, machine_mode mode, rtx scratch_reg)
1632 {
1633   register rtx operand0 = operands[0];
1634   register rtx operand1 = operands[1];
1635   register rtx tem;
1636 
1637   /* We can only handle indexed addresses in the destination operand
1638      of floating point stores.  Thus, we need to break out indexed
1639      addresses from the destination operand.  */
1640   if (GET_CODE (operand0) == MEM && IS_INDEX_ADDR_P (XEXP (operand0, 0)))
1641     {
1642       gcc_assert (can_create_pseudo_p ());
1643 
1644       tem = copy_to_mode_reg (Pmode, XEXP (operand0, 0));
1645       operand0 = replace_equiv_address (operand0, tem);
1646     }
1647 
1648   /* On targets with non-equivalent space registers, break out unscaled
1649      indexed addresses from the source operand before the final CSE.
1650      We have to do this because the REG_POINTER flag is not correctly
1651      carried through various optimization passes and CSE may substitute
1652      a pseudo without the pointer set for one with the pointer set.  As
1653      a result, we loose various opportunities to create insns with
1654      unscaled indexed addresses.  */
1655   if (!TARGET_NO_SPACE_REGS
1656       && !cse_not_expected
1657       && GET_CODE (operand1) == MEM
1658       && GET_CODE (XEXP (operand1, 0)) == PLUS
1659       && REG_P (XEXP (XEXP (operand1, 0), 0))
1660       && REG_P (XEXP (XEXP (operand1, 0), 1)))
1661     operand1
1662       = replace_equiv_address (operand1,
1663 			       copy_to_mode_reg (Pmode, XEXP (operand1, 0)));
1664 
1665   if (scratch_reg
1666       && reload_in_progress && GET_CODE (operand0) == REG
1667       && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
1668     operand0 = reg_equiv_mem (REGNO (operand0));
1669   else if (scratch_reg
1670 	   && reload_in_progress && GET_CODE (operand0) == SUBREG
1671 	   && GET_CODE (SUBREG_REG (operand0)) == REG
1672 	   && REGNO (SUBREG_REG (operand0)) >= FIRST_PSEUDO_REGISTER)
1673     {
1674      /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1675 	the code which tracks sets/uses for delete_output_reload.  */
1676       rtx temp = gen_rtx_SUBREG (GET_MODE (operand0),
1677 				 reg_equiv_mem (REGNO (SUBREG_REG (operand0))),
1678 				 SUBREG_BYTE (operand0));
1679       operand0 = alter_subreg (&temp, true);
1680     }
1681 
1682   if (scratch_reg
1683       && reload_in_progress && GET_CODE (operand1) == REG
1684       && REGNO (operand1) >= FIRST_PSEUDO_REGISTER)
1685     operand1 = reg_equiv_mem (REGNO (operand1));
1686   else if (scratch_reg
1687 	   && reload_in_progress && GET_CODE (operand1) == SUBREG
1688 	   && GET_CODE (SUBREG_REG (operand1)) == REG
1689 	   && REGNO (SUBREG_REG (operand1)) >= FIRST_PSEUDO_REGISTER)
1690     {
1691      /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1692 	the code which tracks sets/uses for delete_output_reload.  */
1693       rtx temp = gen_rtx_SUBREG (GET_MODE (operand1),
1694 				 reg_equiv_mem (REGNO (SUBREG_REG (operand1))),
1695 				 SUBREG_BYTE (operand1));
1696       operand1 = alter_subreg (&temp, true);
1697     }
1698 
1699   if (scratch_reg && reload_in_progress && GET_CODE (operand0) == MEM
1700       && ((tem = find_replacement (&XEXP (operand0, 0)))
1701 	  != XEXP (operand0, 0)))
1702     operand0 = replace_equiv_address (operand0, tem);
1703 
1704   if (scratch_reg && reload_in_progress && GET_CODE (operand1) == MEM
1705       && ((tem = find_replacement (&XEXP (operand1, 0)))
1706 	  != XEXP (operand1, 0)))
1707     operand1 = replace_equiv_address (operand1, tem);
1708 
1709   /* Handle secondary reloads for loads/stores of FP registers from
1710      REG+D addresses where D does not fit in 5 or 14 bits, including
1711      (subreg (mem (addr))) cases, and reloads for other unsupported
1712      memory operands.  */
1713   if (scratch_reg
1714       && FP_REG_P (operand0)
1715       && (MEM_P (operand1)
1716 	  || (GET_CODE (operand1) == SUBREG
1717 	      && MEM_P (XEXP (operand1, 0)))))
1718     {
1719       rtx op1 = operand1;
1720 
1721       if (GET_CODE (op1) == SUBREG)
1722 	op1 = XEXP (op1, 0);
1723 
1724       if (reg_plus_base_memory_operand (op1, GET_MODE (op1)))
1725 	{
1726 	  if (!(TARGET_PA_20
1727 		&& !TARGET_ELF32
1728 		&& INT_14_BITS (XEXP (XEXP (op1, 0), 1)))
1729 	      && !INT_5_BITS (XEXP (XEXP (op1, 0), 1)))
1730 	    {
1731 	      /* SCRATCH_REG will hold an address and maybe the actual data.
1732 		 We want it in WORD_MODE regardless of what mode it was
1733 		 originally given to us.  */
1734 	      scratch_reg = force_mode (word_mode, scratch_reg);
1735 
1736 	      /* D might not fit in 14 bits either; for such cases load D
1737 		 into scratch reg.  */
1738 	      if (!INT_14_BITS (XEXP (XEXP (op1, 0), 1)))
1739 		{
1740 		  emit_move_insn (scratch_reg, XEXP (XEXP (op1, 0), 1));
1741 		  emit_move_insn (scratch_reg,
1742 				  gen_rtx_fmt_ee (GET_CODE (XEXP (op1, 0)),
1743 						  Pmode,
1744 						  XEXP (XEXP (op1, 0), 0),
1745 						  scratch_reg));
1746 		}
1747 	      else
1748 		emit_move_insn (scratch_reg, XEXP (op1, 0));
1749 	      op1 = replace_equiv_address (op1, scratch_reg);
1750 	    }
1751 	}
1752       else if ((!INT14_OK_STRICT && symbolic_memory_operand (op1, VOIDmode))
1753 	       || IS_LO_SUM_DLT_ADDR_P (XEXP (op1, 0))
1754 	       || IS_INDEX_ADDR_P (XEXP (op1, 0)))
1755 	{
1756 	  /* Load memory address into SCRATCH_REG.  */
1757 	  scratch_reg = force_mode (word_mode, scratch_reg);
1758 	  emit_move_insn (scratch_reg, XEXP (op1, 0));
1759 	  op1 = replace_equiv_address (op1, scratch_reg);
1760 	}
1761       emit_insn (gen_rtx_SET (operand0, op1));
1762       return 1;
1763     }
1764   else if (scratch_reg
1765 	   && FP_REG_P (operand1)
1766 	   && (MEM_P (operand0)
1767 	       || (GET_CODE (operand0) == SUBREG
1768 		   && MEM_P (XEXP (operand0, 0)))))
1769     {
1770       rtx op0 = operand0;
1771 
1772       if (GET_CODE (op0) == SUBREG)
1773 	op0 = XEXP (op0, 0);
1774 
1775       if (reg_plus_base_memory_operand (op0, GET_MODE (op0)))
1776 	{
1777 	  if (!(TARGET_PA_20
1778 		&& !TARGET_ELF32
1779 		&& INT_14_BITS (XEXP (XEXP (op0, 0), 1)))
1780 	      && !INT_5_BITS (XEXP (XEXP (op0, 0), 1)))
1781 	    {
1782 	      /* SCRATCH_REG will hold an address and maybe the actual data.
1783 		 We want it in WORD_MODE regardless of what mode it was
1784 		 originally given to us.  */
1785 	      scratch_reg = force_mode (word_mode, scratch_reg);
1786 
1787 	      /* D might not fit in 14 bits either; for such cases load D
1788 		 into scratch reg.  */
1789 	      if (!INT_14_BITS (XEXP (XEXP (op0, 0), 1)))
1790 		{
1791 		  emit_move_insn (scratch_reg, XEXP (XEXP (op0, 0), 1));
1792 		  emit_move_insn (scratch_reg,
1793 				  gen_rtx_fmt_ee (GET_CODE (XEXP (op0, 0)),
1794 						  Pmode,
1795 						  XEXP (XEXP (op0, 0), 0),
1796 						  scratch_reg));
1797 		}
1798 	      else
1799 		emit_move_insn (scratch_reg, XEXP (op0, 0));
1800 	      op0 = replace_equiv_address (op0, scratch_reg);
1801 	    }
1802 	}
1803       else if ((!INT14_OK_STRICT && symbolic_memory_operand (op0, VOIDmode))
1804 	       || IS_LO_SUM_DLT_ADDR_P (XEXP (op0, 0))
1805 	       || IS_INDEX_ADDR_P (XEXP (op0, 0)))
1806 	{
1807 	  /* Load memory address into SCRATCH_REG.  */
1808 	  scratch_reg = force_mode (word_mode, scratch_reg);
1809 	  emit_move_insn (scratch_reg, XEXP (op0, 0));
1810 	  op0 = replace_equiv_address (op0, scratch_reg);
1811 	}
1812       emit_insn (gen_rtx_SET (op0, operand1));
1813       return 1;
1814     }
1815   /* Handle secondary reloads for loads of FP registers from constant
1816      expressions by forcing the constant into memory.  For the most part,
1817      this is only necessary for SImode and DImode.
1818 
1819      Use scratch_reg to hold the address of the memory location.  */
1820   else if (scratch_reg
1821 	   && CONSTANT_P (operand1)
1822 	   && FP_REG_P (operand0))
1823     {
1824       rtx const_mem, xoperands[2];
1825 
1826       if (operand1 == CONST0_RTX (mode))
1827 	{
1828 	  emit_insn (gen_rtx_SET (operand0, operand1));
1829 	  return 1;
1830 	}
1831 
1832       /* SCRATCH_REG will hold an address and maybe the actual data.  We want
1833 	 it in WORD_MODE regardless of what mode it was originally given
1834 	 to us.  */
1835       scratch_reg = force_mode (word_mode, scratch_reg);
1836 
1837       /* Force the constant into memory and put the address of the
1838 	 memory location into scratch_reg.  */
1839       const_mem = force_const_mem (mode, operand1);
1840       xoperands[0] = scratch_reg;
1841       xoperands[1] = XEXP (const_mem, 0);
1842       pa_emit_move_sequence (xoperands, Pmode, 0);
1843 
1844       /* Now load the destination register.  */
1845       emit_insn (gen_rtx_SET (operand0,
1846 			      replace_equiv_address (const_mem, scratch_reg)));
1847       return 1;
1848     }
1849   /* Handle secondary reloads for SAR.  These occur when trying to load
1850      the SAR from memory or a constant.  */
1851   else if (scratch_reg
1852 	   && GET_CODE (operand0) == REG
1853 	   && REGNO (operand0) < FIRST_PSEUDO_REGISTER
1854 	   && REGNO_REG_CLASS (REGNO (operand0)) == SHIFT_REGS
1855 	   && (GET_CODE (operand1) == MEM || GET_CODE (operand1) == CONST_INT))
1856     {
1857       /* D might not fit in 14 bits either; for such cases load D into
1858 	 scratch reg.  */
1859       if (GET_CODE (operand1) == MEM
1860 	  && !memory_address_p (GET_MODE (operand0), XEXP (operand1, 0)))
1861 	{
1862 	  /* We are reloading the address into the scratch register, so we
1863 	     want to make sure the scratch register is a full register.  */
1864 	  scratch_reg = force_mode (word_mode, scratch_reg);
1865 
1866 	  emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1));
1867 	  emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand1,
1868 								        0)),
1869 						       Pmode,
1870 						       XEXP (XEXP (operand1, 0),
1871 						       0),
1872 						       scratch_reg));
1873 
1874 	  /* Now we are going to load the scratch register from memory,
1875 	     we want to load it in the same width as the original MEM,
1876 	     which must be the same as the width of the ultimate destination,
1877 	     OPERAND0.  */
1878 	  scratch_reg = force_mode (GET_MODE (operand0), scratch_reg);
1879 
1880 	  emit_move_insn (scratch_reg,
1881 			  replace_equiv_address (operand1, scratch_reg));
1882 	}
1883       else
1884 	{
1885 	  /* We want to load the scratch register using the same mode as
1886 	     the ultimate destination.  */
1887 	  scratch_reg = force_mode (GET_MODE (operand0), scratch_reg);
1888 
1889 	  emit_move_insn (scratch_reg, operand1);
1890 	}
1891 
1892       /* And emit the insn to set the ultimate destination.  We know that
1893 	 the scratch register has the same mode as the destination at this
1894 	 point.  */
1895       emit_move_insn (operand0, scratch_reg);
1896       return 1;
1897     }
1898 
1899   /* Handle the most common case: storing into a register.  */
1900   if (register_operand (operand0, mode))
1901     {
1902       /* Legitimize TLS symbol references.  This happens for references
1903 	 that aren't a legitimate constant.  */
1904       if (PA_SYMBOL_REF_TLS_P (operand1))
1905 	operand1 = legitimize_tls_address (operand1);
1906 
1907       if (register_operand (operand1, mode)
1908 	  || (GET_CODE (operand1) == CONST_INT
1909 	      && pa_cint_ok_for_move (UINTVAL (operand1)))
1910 	  || (operand1 == CONST0_RTX (mode))
1911 	  || (GET_CODE (operand1) == HIGH
1912 	      && !symbolic_operand (XEXP (operand1, 0), VOIDmode))
1913 	  /* Only `general_operands' can come here, so MEM is ok.  */
1914 	  || GET_CODE (operand1) == MEM)
1915 	{
1916 	  /* Various sets are created during RTL generation which don't
1917 	     have the REG_POINTER flag correctly set.  After the CSE pass,
1918 	     instruction recognition can fail if we don't consistently
1919 	     set this flag when performing register copies.  This should
1920 	     also improve the opportunities for creating insns that use
1921 	     unscaled indexing.  */
1922 	  if (REG_P (operand0) && REG_P (operand1))
1923 	    {
1924 	      if (REG_POINTER (operand1)
1925 		  && !REG_POINTER (operand0)
1926 		  && !HARD_REGISTER_P (operand0))
1927 		copy_reg_pointer (operand0, operand1);
1928 	    }
1929 
1930 	  /* When MEMs are broken out, the REG_POINTER flag doesn't
1931 	     get set.  In some cases, we can set the REG_POINTER flag
1932 	     from the declaration for the MEM.  */
1933 	  if (REG_P (operand0)
1934 	      && GET_CODE (operand1) == MEM
1935 	      && !REG_POINTER (operand0))
1936 	    {
1937 	      tree decl = MEM_EXPR (operand1);
1938 
1939 	      /* Set the register pointer flag and register alignment
1940 		 if the declaration for this memory reference is a
1941 		 pointer type.  */
1942 	      if (decl)
1943 		{
1944 		  tree type;
1945 
1946 		  /* If this is a COMPONENT_REF, use the FIELD_DECL from
1947 		     tree operand 1.  */
1948 		  if (TREE_CODE (decl) == COMPONENT_REF)
1949 		    decl = TREE_OPERAND (decl, 1);
1950 
1951 		  type = TREE_TYPE (decl);
1952 		  type = strip_array_types (type);
1953 
1954 		  if (POINTER_TYPE_P (type))
1955 		    mark_reg_pointer (operand0, BITS_PER_UNIT);
1956 		}
1957 	    }
1958 
1959 	  emit_insn (gen_rtx_SET (operand0, operand1));
1960 	  return 1;
1961 	}
1962     }
1963   else if (GET_CODE (operand0) == MEM)
1964     {
1965       if (mode == DFmode && operand1 == CONST0_RTX (mode)
1966 	  && !(reload_in_progress || reload_completed))
1967 	{
1968 	  rtx temp = gen_reg_rtx (DFmode);
1969 
1970 	  emit_insn (gen_rtx_SET (temp, operand1));
1971 	  emit_insn (gen_rtx_SET (operand0, temp));
1972 	  return 1;
1973 	}
1974       if (register_operand (operand1, mode) || operand1 == CONST0_RTX (mode))
1975 	{
1976 	  /* Run this case quickly.  */
1977 	  emit_insn (gen_rtx_SET (operand0, operand1));
1978 	  return 1;
1979 	}
1980       if (! (reload_in_progress || reload_completed))
1981 	{
1982 	  operands[0] = validize_mem (operand0);
1983 	  operands[1] = operand1 = force_reg (mode, operand1);
1984 	}
1985     }
1986 
1987   /* Simplify the source if we need to.
1988      Note we do have to handle function labels here, even though we do
1989      not consider them legitimate constants.  Loop optimizations can
1990      call the emit_move_xxx with one as a source.  */
1991   if ((GET_CODE (operand1) != HIGH && immediate_operand (operand1, mode))
1992       || (GET_CODE (operand1) == HIGH
1993 	  && symbolic_operand (XEXP (operand1, 0), mode))
1994       || function_label_operand (operand1, VOIDmode)
1995       || tls_referenced_p (operand1))
1996     {
1997       int ishighonly = 0;
1998 
1999       if (GET_CODE (operand1) == HIGH)
2000 	{
2001 	  ishighonly = 1;
2002 	  operand1 = XEXP (operand1, 0);
2003 	}
2004       if (symbolic_operand (operand1, mode))
2005 	{
2006 	  /* Argh.  The assembler and linker can't handle arithmetic
2007 	     involving plabels.
2008 
2009 	     So we force the plabel into memory, load operand0 from
2010 	     the memory location, then add in the constant part.  */
2011 	  if ((GET_CODE (operand1) == CONST
2012 	       && GET_CODE (XEXP (operand1, 0)) == PLUS
2013 	       && function_label_operand (XEXP (XEXP (operand1, 0), 0),
2014 					  VOIDmode))
2015 	      || function_label_operand (operand1, VOIDmode))
2016 	    {
2017 	      rtx temp, const_part;
2018 
2019 	      /* Figure out what (if any) scratch register to use.  */
2020 	      if (reload_in_progress || reload_completed)
2021 		{
2022 		  scratch_reg = scratch_reg ? scratch_reg : operand0;
2023 		  /* SCRATCH_REG will hold an address and maybe the actual
2024 		     data.  We want it in WORD_MODE regardless of what mode it
2025 		     was originally given to us.  */
2026 		  scratch_reg = force_mode (word_mode, scratch_reg);
2027 		}
2028 	      else if (flag_pic)
2029 		scratch_reg = gen_reg_rtx (Pmode);
2030 
2031 	      if (GET_CODE (operand1) == CONST)
2032 		{
2033 		  /* Save away the constant part of the expression.  */
2034 		  const_part = XEXP (XEXP (operand1, 0), 1);
2035 		  gcc_assert (GET_CODE (const_part) == CONST_INT);
2036 
2037 		  /* Force the function label into memory.  */
2038 		  temp = force_const_mem (mode, XEXP (XEXP (operand1, 0), 0));
2039 		}
2040 	      else
2041 		{
2042 		  /* No constant part.  */
2043 		  const_part = NULL_RTX;
2044 
2045 		  /* Force the function label into memory.  */
2046 		  temp = force_const_mem (mode, operand1);
2047 		}
2048 
2049 
2050 	      /* Get the address of the memory location.  PIC-ify it if
2051 		 necessary.  */
2052 	      temp = XEXP (temp, 0);
2053 	      if (flag_pic)
2054 		temp = legitimize_pic_address (temp, mode, scratch_reg);
2055 
2056 	      /* Put the address of the memory location into our destination
2057 		 register.  */
2058 	      operands[1] = temp;
2059 	      pa_emit_move_sequence (operands, mode, scratch_reg);
2060 
2061 	      /* Now load from the memory location into our destination
2062 		 register.  */
2063 	      operands[1] = gen_rtx_MEM (Pmode, operands[0]);
2064 	      pa_emit_move_sequence (operands, mode, scratch_reg);
2065 
2066 	      /* And add back in the constant part.  */
2067 	      if (const_part != NULL_RTX)
2068 		expand_inc (operand0, const_part);
2069 
2070 	      return 1;
2071 	    }
2072 
2073 	  if (flag_pic)
2074 	    {
2075 	      rtx_insn *insn;
2076 	      rtx temp;
2077 
2078 	      if (reload_in_progress || reload_completed)
2079 		{
2080 		  temp = scratch_reg ? scratch_reg : operand0;
2081 		  /* TEMP will hold an address and maybe the actual
2082 		     data.  We want it in WORD_MODE regardless of what mode it
2083 		     was originally given to us.  */
2084 		  temp = force_mode (word_mode, temp);
2085 		}
2086 	      else
2087 		temp = gen_reg_rtx (Pmode);
2088 
2089 	      /* Force (const (plus (symbol) (const_int))) to memory
2090 	         if the const_int will not fit in 14 bits.  Although
2091 		 this requires a relocation, the instruction sequence
2092 		 needed to load the value is shorter.  */
2093 	      if (GET_CODE (operand1) == CONST
2094 		       && GET_CODE (XEXP (operand1, 0)) == PLUS
2095 		       && GET_CODE (XEXP (XEXP (operand1, 0), 1)) == CONST_INT
2096 		       && !INT_14_BITS (XEXP (XEXP (operand1, 0), 1)))
2097 		{
2098 		  rtx x, m = force_const_mem (mode, operand1);
2099 
2100 		  x = legitimize_pic_address (XEXP (m, 0), mode, temp);
2101 		  x = replace_equiv_address (m, x);
2102 		  insn = emit_move_insn (operand0, x);
2103 		}
2104 	      else
2105 		{
2106 		  operands[1] = legitimize_pic_address (operand1, mode, temp);
2107 		  if (REG_P (operand0) && REG_P (operands[1]))
2108 		    copy_reg_pointer (operand0, operands[1]);
2109 		  insn = emit_move_insn (operand0, operands[1]);
2110 		}
2111 
2112 	      /* Put a REG_EQUAL note on this insn.  */
2113 	      set_unique_reg_note (insn, REG_EQUAL, operand1);
2114 	    }
2115 	  /* On the HPPA, references to data space are supposed to use dp,
2116 	     register 27, but showing it in the RTL inhibits various cse
2117 	     and loop optimizations.  */
2118 	  else
2119 	    {
2120 	      rtx temp, set;
2121 
2122 	      if (reload_in_progress || reload_completed)
2123 		{
2124 		  temp = scratch_reg ? scratch_reg : operand0;
2125 		  /* TEMP will hold an address and maybe the actual
2126 		     data.  We want it in WORD_MODE regardless of what mode it
2127 		     was originally given to us.  */
2128 		  temp = force_mode (word_mode, temp);
2129 		}
2130 	      else
2131 		temp = gen_reg_rtx (mode);
2132 
2133 	      /* Loading a SYMBOL_REF into a register makes that register
2134 		 safe to be used as the base in an indexed address.
2135 
2136 		 Don't mark hard registers though.  That loses.  */
2137 	      if (GET_CODE (operand0) == REG
2138 		  && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
2139 		mark_reg_pointer (operand0, BITS_PER_UNIT);
2140 	      if (REGNO (temp) >= FIRST_PSEUDO_REGISTER)
2141 		mark_reg_pointer (temp, BITS_PER_UNIT);
2142 
2143 	      if (ishighonly)
2144 		set = gen_rtx_SET (operand0, temp);
2145 	      else
2146 		set = gen_rtx_SET (operand0,
2147 				   gen_rtx_LO_SUM (mode, temp, operand1));
2148 
2149 	      emit_insn (gen_rtx_SET (temp, gen_rtx_HIGH (mode, operand1)));
2150 	      emit_insn (set);
2151 
2152 	    }
2153 	  return 1;
2154 	}
2155       else if (tls_referenced_p (operand1))
2156 	{
2157 	  rtx tmp = operand1;
2158 	  rtx addend = NULL;
2159 
2160 	  if (GET_CODE (tmp) == CONST && GET_CODE (XEXP (tmp, 0)) == PLUS)
2161 	    {
2162 	      addend = XEXP (XEXP (tmp, 0), 1);
2163 	      tmp = XEXP (XEXP (tmp, 0), 0);
2164 	    }
2165 
2166 	  gcc_assert (GET_CODE (tmp) == SYMBOL_REF);
2167 	  tmp = legitimize_tls_address (tmp);
2168 	  if (addend)
2169 	    {
2170 	      tmp = gen_rtx_PLUS (mode, tmp, addend);
2171 	      tmp = force_operand (tmp, operands[0]);
2172 	    }
2173 	  operands[1] = tmp;
2174 	}
2175       else if (GET_CODE (operand1) != CONST_INT
2176 	       || !pa_cint_ok_for_move (UINTVAL (operand1)))
2177 	{
2178 	  rtx temp;
2179 	  rtx_insn *insn;
2180 	  rtx op1 = operand1;
2181 	  HOST_WIDE_INT value = 0;
2182 	  HOST_WIDE_INT insv = 0;
2183 	  int insert = 0;
2184 
2185 	  if (GET_CODE (operand1) == CONST_INT)
2186 	    value = INTVAL (operand1);
2187 
2188 	  if (TARGET_64BIT
2189 	      && GET_CODE (operand1) == CONST_INT
2190 	      && HOST_BITS_PER_WIDE_INT > 32
2191 	      && GET_MODE_BITSIZE (GET_MODE (operand0)) > 32)
2192 	    {
2193 	      HOST_WIDE_INT nval;
2194 
2195 	      /* Extract the low order 32 bits of the value and sign extend.
2196 		 If the new value is the same as the original value, we can
2197 		 can use the original value as-is.  If the new value is
2198 		 different, we use it and insert the most-significant 32-bits
2199 		 of the original value into the final result.  */
2200 	      nval = ((value & (((HOST_WIDE_INT) 2 << 31) - 1))
2201 		      ^ ((HOST_WIDE_INT) 1 << 31)) - ((HOST_WIDE_INT) 1 << 31);
2202 	      if (value != nval)
2203 		{
2204 #if HOST_BITS_PER_WIDE_INT > 32
2205 		  insv = value >= 0 ? value >> 32 : ~(~value >> 32);
2206 #endif
2207 		  insert = 1;
2208 		  value = nval;
2209 		  operand1 = GEN_INT (nval);
2210 		}
2211 	    }
2212 
2213 	  if (reload_in_progress || reload_completed)
2214 	    temp = scratch_reg ? scratch_reg : operand0;
2215 	  else
2216 	    temp = gen_reg_rtx (mode);
2217 
2218 	  /* We don't directly split DImode constants on 32-bit targets
2219 	     because PLUS uses an 11-bit immediate and the insn sequence
2220 	     generated is not as efficient as the one using HIGH/LO_SUM.  */
2221 	  if (GET_CODE (operand1) == CONST_INT
2222 	      && GET_MODE_BITSIZE (mode) <= BITS_PER_WORD
2223 	      && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
2224 	      && !insert)
2225 	    {
2226 	      /* Directly break constant into high and low parts.  This
2227 		 provides better optimization opportunities because various
2228 		 passes recognize constants split with PLUS but not LO_SUM.
2229 		 We use a 14-bit signed low part except when the addition
2230 		 of 0x4000 to the high part might change the sign of the
2231 		 high part.  */
2232 	      HOST_WIDE_INT low = value & 0x3fff;
2233 	      HOST_WIDE_INT high = value & ~ 0x3fff;
2234 
2235 	      if (low >= 0x2000)
2236 		{
2237 		  if (high == 0x7fffc000 || (mode == HImode && high == 0x4000))
2238 		    high += 0x2000;
2239 		  else
2240 		    high += 0x4000;
2241 		}
2242 
2243 	      low = value - high;
2244 
2245 	      emit_insn (gen_rtx_SET (temp, GEN_INT (high)));
2246 	      operands[1] = gen_rtx_PLUS (mode, temp, GEN_INT (low));
2247 	    }
2248 	  else
2249 	    {
2250 	      emit_insn (gen_rtx_SET (temp, gen_rtx_HIGH (mode, operand1)));
2251 	      operands[1] = gen_rtx_LO_SUM (mode, temp, operand1);
2252 	    }
2253 
2254 	  insn = emit_move_insn (operands[0], operands[1]);
2255 
2256 	  /* Now insert the most significant 32 bits of the value
2257 	     into the register.  When we don't have a second register
2258 	     available, it could take up to nine instructions to load
2259 	     a 64-bit integer constant.  Prior to reload, we force
2260 	     constants that would take more than three instructions
2261 	     to load to the constant pool.  During and after reload,
2262 	     we have to handle all possible values.  */
2263 	  if (insert)
2264 	    {
2265 	      /* Use a HIGH/LO_SUM/INSV sequence if we have a second
2266 		 register and the value to be inserted is outside the
2267 		 range that can be loaded with three depdi instructions.  */
2268 	      if (temp != operand0 && (insv >= 16384 || insv < -16384))
2269 		{
2270 		  operand1 = GEN_INT (insv);
2271 
2272 		  emit_insn (gen_rtx_SET (temp,
2273 					  gen_rtx_HIGH (mode, operand1)));
2274 		  emit_move_insn (temp, gen_rtx_LO_SUM (mode, temp, operand1));
2275 		  if (mode == DImode)
2276 		    insn = emit_insn (gen_insvdi (operand0, GEN_INT (32),
2277 						  const0_rtx, temp));
2278 		  else
2279 		    insn = emit_insn (gen_insvsi (operand0, GEN_INT (32),
2280 						  const0_rtx, temp));
2281 		}
2282 	      else
2283 		{
2284 		  int len = 5, pos = 27;
2285 
2286 		  /* Insert the bits using the depdi instruction.  */
2287 		  while (pos >= 0)
2288 		    {
2289 		      HOST_WIDE_INT v5 = ((insv & 31) ^ 16) - 16;
2290 		      HOST_WIDE_INT sign = v5 < 0;
2291 
2292 		      /* Left extend the insertion.  */
2293 		      insv = (insv >= 0 ? insv >> len : ~(~insv >> len));
2294 		      while (pos > 0 && (insv & 1) == sign)
2295 			{
2296 			  insv = (insv >= 0 ? insv >> 1 : ~(~insv >> 1));
2297 			  len += 1;
2298 			  pos -= 1;
2299 			}
2300 
2301 		      if (mode == DImode)
2302 			insn = emit_insn (gen_insvdi (operand0,
2303 						      GEN_INT (len),
2304 						      GEN_INT (pos),
2305 						      GEN_INT (v5)));
2306 		      else
2307 			insn = emit_insn (gen_insvsi (operand0,
2308 						      GEN_INT (len),
2309 						      GEN_INT (pos),
2310 						      GEN_INT (v5)));
2311 
2312 		      len = pos > 0 && pos < 5 ? pos : 5;
2313 		      pos -= len;
2314 		    }
2315 		}
2316 	    }
2317 
2318 	  set_unique_reg_note (insn, REG_EQUAL, op1);
2319 
2320 	  return 1;
2321 	}
2322     }
2323   /* Now have insn-emit do whatever it normally does.  */
2324   return 0;
2325 }
2326 
2327 /* Examine EXP and return nonzero if it contains an ADDR_EXPR (meaning
2328    it will need a link/runtime reloc).  */
2329 
2330 int
pa_reloc_needed(tree exp)2331 pa_reloc_needed (tree exp)
2332 {
2333   int reloc = 0;
2334 
2335   switch (TREE_CODE (exp))
2336     {
2337     case ADDR_EXPR:
2338       return 1;
2339 
2340     case POINTER_PLUS_EXPR:
2341     case PLUS_EXPR:
2342     case MINUS_EXPR:
2343       reloc = pa_reloc_needed (TREE_OPERAND (exp, 0));
2344       reloc |= pa_reloc_needed (TREE_OPERAND (exp, 1));
2345       break;
2346 
2347     CASE_CONVERT:
2348     case NON_LVALUE_EXPR:
2349       reloc = pa_reloc_needed (TREE_OPERAND (exp, 0));
2350       break;
2351 
2352     case CONSTRUCTOR:
2353       {
2354 	tree value;
2355 	unsigned HOST_WIDE_INT ix;
2356 
2357 	FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (exp), ix, value)
2358 	  if (value)
2359 	    reloc |= pa_reloc_needed (value);
2360       }
2361       break;
2362 
2363     case ERROR_MARK:
2364       break;
2365 
2366     default:
2367       break;
2368     }
2369   return reloc;
2370 }
2371 
2372 
2373 /* Return the best assembler insn template
2374    for moving operands[1] into operands[0] as a fullword.  */
2375 const char *
pa_singlemove_string(rtx * operands)2376 pa_singlemove_string (rtx *operands)
2377 {
2378   HOST_WIDE_INT intval;
2379 
2380   if (GET_CODE (operands[0]) == MEM)
2381     return "stw %r1,%0";
2382   if (GET_CODE (operands[1]) == MEM)
2383     return "ldw %1,%0";
2384   if (GET_CODE (operands[1]) == CONST_DOUBLE)
2385     {
2386       long i;
2387 
2388       gcc_assert (GET_MODE (operands[1]) == SFmode);
2389 
2390       /* Translate the CONST_DOUBLE to a CONST_INT with the same target
2391 	 bit pattern.  */
2392       REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (operands[1]), i);
2393 
2394       operands[1] = GEN_INT (i);
2395       /* Fall through to CONST_INT case.  */
2396     }
2397   if (GET_CODE (operands[1]) == CONST_INT)
2398     {
2399       intval = INTVAL (operands[1]);
2400 
2401       if (VAL_14_BITS_P (intval))
2402 	return "ldi %1,%0";
2403       else if ((intval & 0x7ff) == 0)
2404 	return "ldil L'%1,%0";
2405       else if (pa_zdepi_cint_p (intval))
2406 	return "{zdepi %Z1,%0|depwi,z %Z1,%0}";
2407       else
2408 	return "ldil L'%1,%0\n\tldo R'%1(%0),%0";
2409     }
2410   return "copy %1,%0";
2411 }
2412 
2413 
2414 /* Compute position (in OP[1]) and width (in OP[2])
2415    useful for copying IMM to a register using the zdepi
2416    instructions.  Store the immediate value to insert in OP[0].  */
2417 static void
compute_zdepwi_operands(unsigned HOST_WIDE_INT imm,unsigned * op)2418 compute_zdepwi_operands (unsigned HOST_WIDE_INT imm, unsigned *op)
2419 {
2420   int lsb, len;
2421 
2422   /* Find the least significant set bit in IMM.  */
2423   for (lsb = 0; lsb < 32; lsb++)
2424     {
2425       if ((imm & 1) != 0)
2426         break;
2427       imm >>= 1;
2428     }
2429 
2430   /* Choose variants based on *sign* of the 5-bit field.  */
2431   if ((imm & 0x10) == 0)
2432     len = (lsb <= 28) ? 4 : 32 - lsb;
2433   else
2434     {
2435       /* Find the width of the bitstring in IMM.  */
2436       for (len = 5; len < 32 - lsb; len++)
2437 	{
2438 	  if ((imm & ((unsigned HOST_WIDE_INT) 1 << len)) == 0)
2439 	    break;
2440 	}
2441 
2442       /* Sign extend IMM as a 5-bit value.  */
2443       imm = (imm & 0xf) - 0x10;
2444     }
2445 
2446   op[0] = imm;
2447   op[1] = 31 - lsb;
2448   op[2] = len;
2449 }
2450 
2451 /* Compute position (in OP[1]) and width (in OP[2])
2452    useful for copying IMM to a register using the depdi,z
2453    instructions.  Store the immediate value to insert in OP[0].  */
2454 
2455 static void
compute_zdepdi_operands(unsigned HOST_WIDE_INT imm,unsigned * op)2456 compute_zdepdi_operands (unsigned HOST_WIDE_INT imm, unsigned *op)
2457 {
2458   int lsb, len, maxlen;
2459 
2460   maxlen = MIN (HOST_BITS_PER_WIDE_INT, 64);
2461 
2462   /* Find the least significant set bit in IMM.  */
2463   for (lsb = 0; lsb < maxlen; lsb++)
2464     {
2465       if ((imm & 1) != 0)
2466         break;
2467       imm >>= 1;
2468     }
2469 
2470   /* Choose variants based on *sign* of the 5-bit field.  */
2471   if ((imm & 0x10) == 0)
2472     len = (lsb <= maxlen - 4) ? 4 : maxlen - lsb;
2473   else
2474     {
2475       /* Find the width of the bitstring in IMM.  */
2476       for (len = 5; len < maxlen - lsb; len++)
2477 	{
2478 	  if ((imm & ((unsigned HOST_WIDE_INT) 1 << len)) == 0)
2479 	    break;
2480 	}
2481 
2482       /* Extend length if host is narrow and IMM is negative.  */
2483       if (HOST_BITS_PER_WIDE_INT == 32 && len == maxlen - lsb)
2484 	len += 32;
2485 
2486       /* Sign extend IMM as a 5-bit value.  */
2487       imm = (imm & 0xf) - 0x10;
2488     }
2489 
2490   op[0] = imm;
2491   op[1] = 63 - lsb;
2492   op[2] = len;
2493 }
2494 
2495 /* Output assembler code to perform a doubleword move insn
2496    with operands OPERANDS.  */
2497 
2498 const char *
pa_output_move_double(rtx * operands)2499 pa_output_move_double (rtx *operands)
2500 {
2501   enum { REGOP, OFFSOP, MEMOP, CNSTOP, RNDOP } optype0, optype1;
2502   rtx latehalf[2];
2503   rtx addreg0 = 0, addreg1 = 0;
2504   int highonly = 0;
2505 
2506   /* First classify both operands.  */
2507 
2508   if (REG_P (operands[0]))
2509     optype0 = REGOP;
2510   else if (offsettable_memref_p (operands[0]))
2511     optype0 = OFFSOP;
2512   else if (GET_CODE (operands[0]) == MEM)
2513     optype0 = MEMOP;
2514   else
2515     optype0 = RNDOP;
2516 
2517   if (REG_P (operands[1]))
2518     optype1 = REGOP;
2519   else if (CONSTANT_P (operands[1]))
2520     optype1 = CNSTOP;
2521   else if (offsettable_memref_p (operands[1]))
2522     optype1 = OFFSOP;
2523   else if (GET_CODE (operands[1]) == MEM)
2524     optype1 = MEMOP;
2525   else
2526     optype1 = RNDOP;
2527 
2528   /* Check for the cases that the operand constraints are not
2529      supposed to allow to happen.  */
2530   gcc_assert (optype0 == REGOP || optype1 == REGOP);
2531 
2532   /* Handle copies between general and floating registers.  */
2533 
2534   if (optype0 == REGOP && optype1 == REGOP
2535       && FP_REG_P (operands[0]) ^ FP_REG_P (operands[1]))
2536     {
2537       if (FP_REG_P (operands[0]))
2538 	{
2539 	  output_asm_insn ("{stws|stw} %1,-16(%%sp)", operands);
2540 	  output_asm_insn ("{stws|stw} %R1,-12(%%sp)", operands);
2541 	  return "{fldds|fldd} -16(%%sp),%0";
2542 	}
2543       else
2544 	{
2545 	  output_asm_insn ("{fstds|fstd} %1,-16(%%sp)", operands);
2546 	  output_asm_insn ("{ldws|ldw} -16(%%sp),%0", operands);
2547 	  return "{ldws|ldw} -12(%%sp),%R0";
2548 	}
2549     }
2550 
2551    /* Handle auto decrementing and incrementing loads and stores
2552      specifically, since the structure of the function doesn't work
2553      for them without major modification.  Do it better when we learn
2554      this port about the general inc/dec addressing of PA.
2555      (This was written by tege.  Chide him if it doesn't work.)  */
2556 
2557   if (optype0 == MEMOP)
2558     {
2559       /* We have to output the address syntax ourselves, since print_operand
2560 	 doesn't deal with the addresses we want to use.  Fix this later.  */
2561 
2562       rtx addr = XEXP (operands[0], 0);
2563       if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
2564 	{
2565 	  rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
2566 
2567 	  operands[0] = XEXP (addr, 0);
2568 	  gcc_assert (GET_CODE (operands[1]) == REG
2569 		      && GET_CODE (operands[0]) == REG);
2570 
2571 	  gcc_assert (!reg_overlap_mentioned_p (high_reg, addr));
2572 
2573 	  /* No overlap between high target register and address
2574 	     register.  (We do this in a non-obvious way to
2575 	     save a register file writeback)  */
2576 	  if (GET_CODE (addr) == POST_INC)
2577 	    return "{stws|stw},ma %1,8(%0)\n\tstw %R1,-4(%0)";
2578 	  return "{stws|stw},ma %1,-8(%0)\n\tstw %R1,12(%0)";
2579 	}
2580       else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
2581 	{
2582 	  rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
2583 
2584 	  operands[0] = XEXP (addr, 0);
2585 	  gcc_assert (GET_CODE (operands[1]) == REG
2586 		      && GET_CODE (operands[0]) == REG);
2587 
2588 	  gcc_assert (!reg_overlap_mentioned_p (high_reg, addr));
2589 	  /* No overlap between high target register and address
2590 	     register.  (We do this in a non-obvious way to save a
2591 	     register file writeback)  */
2592 	  if (GET_CODE (addr) == PRE_INC)
2593 	    return "{stws|stw},mb %1,8(%0)\n\tstw %R1,4(%0)";
2594 	  return "{stws|stw},mb %1,-8(%0)\n\tstw %R1,4(%0)";
2595 	}
2596     }
2597   if (optype1 == MEMOP)
2598     {
2599       /* We have to output the address syntax ourselves, since print_operand
2600 	 doesn't deal with the addresses we want to use.  Fix this later.  */
2601 
2602       rtx addr = XEXP (operands[1], 0);
2603       if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
2604 	{
2605 	  rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2606 
2607 	  operands[1] = XEXP (addr, 0);
2608 	  gcc_assert (GET_CODE (operands[0]) == REG
2609 		      && GET_CODE (operands[1]) == REG);
2610 
2611 	  if (!reg_overlap_mentioned_p (high_reg, addr))
2612 	    {
2613 	      /* No overlap between high target register and address
2614 		 register.  (We do this in a non-obvious way to
2615 		 save a register file writeback)  */
2616 	      if (GET_CODE (addr) == POST_INC)
2617 		return "{ldws|ldw},ma 8(%1),%0\n\tldw -4(%1),%R0";
2618 	      return "{ldws|ldw},ma -8(%1),%0\n\tldw 12(%1),%R0";
2619 	    }
2620 	  else
2621 	    {
2622 	      /* This is an undefined situation.  We should load into the
2623 		 address register *and* update that register.  Probably
2624 		 we don't need to handle this at all.  */
2625 	      if (GET_CODE (addr) == POST_INC)
2626 		return "ldw 4(%1),%R0\n\t{ldws|ldw},ma 8(%1),%0";
2627 	      return "ldw 4(%1),%R0\n\t{ldws|ldw},ma -8(%1),%0";
2628 	    }
2629 	}
2630       else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
2631 	{
2632 	  rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2633 
2634 	  operands[1] = XEXP (addr, 0);
2635 	  gcc_assert (GET_CODE (operands[0]) == REG
2636 		      && GET_CODE (operands[1]) == REG);
2637 
2638 	  if (!reg_overlap_mentioned_p (high_reg, addr))
2639 	    {
2640 	      /* No overlap between high target register and address
2641 		 register.  (We do this in a non-obvious way to
2642 		 save a register file writeback)  */
2643 	      if (GET_CODE (addr) == PRE_INC)
2644 		return "{ldws|ldw},mb 8(%1),%0\n\tldw 4(%1),%R0";
2645 	      return "{ldws|ldw},mb -8(%1),%0\n\tldw 4(%1),%R0";
2646 	    }
2647 	  else
2648 	    {
2649 	      /* This is an undefined situation.  We should load into the
2650 		 address register *and* update that register.  Probably
2651 		 we don't need to handle this at all.  */
2652 	      if (GET_CODE (addr) == PRE_INC)
2653 		return "ldw 12(%1),%R0\n\t{ldws|ldw},mb 8(%1),%0";
2654 	      return "ldw -4(%1),%R0\n\t{ldws|ldw},mb -8(%1),%0";
2655 	    }
2656 	}
2657       else if (GET_CODE (addr) == PLUS
2658 	       && GET_CODE (XEXP (addr, 0)) == MULT)
2659 	{
2660 	  rtx xoperands[4];
2661 
2662 	  /* Load address into left half of destination register.  */
2663 	  xoperands[0] = gen_rtx_SUBREG (SImode, operands[0], 0);
2664 	  xoperands[1] = XEXP (addr, 1);
2665 	  xoperands[2] = XEXP (XEXP (addr, 0), 0);
2666 	  xoperands[3] = XEXP (XEXP (addr, 0), 1);
2667 	  output_asm_insn ("{sh%O3addl %2,%1,%0|shladd,l %2,%O3,%1,%0}",
2668 			   xoperands);
2669 	  return "ldw 4(%0),%R0\n\tldw 0(%0),%0";
2670 	}
2671       else if (GET_CODE (addr) == PLUS
2672 	       && REG_P (XEXP (addr, 0))
2673 	       && REG_P (XEXP (addr, 1)))
2674 	{
2675 	  rtx xoperands[3];
2676 
2677 	  /* Load address into left half of destination register.  */
2678 	  xoperands[0] = gen_rtx_SUBREG (SImode, operands[0], 0);
2679 	  xoperands[1] = XEXP (addr, 0);
2680 	  xoperands[2] = XEXP (addr, 1);
2681 	  output_asm_insn ("{addl|add,l} %1,%2,%0",
2682 			   xoperands);
2683 	  return "ldw 4(%0),%R0\n\tldw 0(%0),%0";
2684 	}
2685     }
2686 
2687   /* If an operand is an unoffsettable memory ref, find a register
2688      we can increment temporarily to make it refer to the second word.  */
2689 
2690   if (optype0 == MEMOP)
2691     addreg0 = find_addr_reg (XEXP (operands[0], 0));
2692 
2693   if (optype1 == MEMOP)
2694     addreg1 = find_addr_reg (XEXP (operands[1], 0));
2695 
2696   /* Ok, we can do one word at a time.
2697      Normally we do the low-numbered word first.
2698 
2699      In either case, set up in LATEHALF the operands to use
2700      for the high-numbered word and in some cases alter the
2701      operands in OPERANDS to be suitable for the low-numbered word.  */
2702 
2703   if (optype0 == REGOP)
2704     latehalf[0] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
2705   else if (optype0 == OFFSOP)
2706     latehalf[0] = adjust_address_nv (operands[0], SImode, 4);
2707   else
2708     latehalf[0] = operands[0];
2709 
2710   if (optype1 == REGOP)
2711     latehalf[1] = gen_rtx_REG (SImode, REGNO (operands[1]) + 1);
2712   else if (optype1 == OFFSOP)
2713     latehalf[1] = adjust_address_nv (operands[1], SImode, 4);
2714   else if (optype1 == CNSTOP)
2715     {
2716       if (GET_CODE (operands[1]) == HIGH)
2717 	{
2718 	  operands[1] = XEXP (operands[1], 0);
2719 	  highonly = 1;
2720 	}
2721       split_double (operands[1], &operands[1], &latehalf[1]);
2722     }
2723   else
2724     latehalf[1] = operands[1];
2725 
2726   /* If the first move would clobber the source of the second one,
2727      do them in the other order.
2728 
2729      This can happen in two cases:
2730 
2731 	mem -> register where the first half of the destination register
2732  	is the same register used in the memory's address.  Reload
2733 	can create such insns.
2734 
2735 	mem in this case will be either register indirect or register
2736 	indirect plus a valid offset.
2737 
2738 	register -> register move where REGNO(dst) == REGNO(src + 1)
2739 	someone (Tim/Tege?) claimed this can happen for parameter loads.
2740 
2741      Handle mem -> register case first.  */
2742   if (optype0 == REGOP
2743       && (optype1 == MEMOP || optype1 == OFFSOP)
2744       && refers_to_regno_p (REGNO (operands[0]), operands[1]))
2745     {
2746       /* Do the late half first.  */
2747       if (addreg1)
2748 	output_asm_insn ("ldo 4(%0),%0", &addreg1);
2749       output_asm_insn (pa_singlemove_string (latehalf), latehalf);
2750 
2751       /* Then clobber.  */
2752       if (addreg1)
2753 	output_asm_insn ("ldo -4(%0),%0", &addreg1);
2754       return pa_singlemove_string (operands);
2755     }
2756 
2757   /* Now handle register -> register case.  */
2758   if (optype0 == REGOP && optype1 == REGOP
2759       && REGNO (operands[0]) == REGNO (operands[1]) + 1)
2760     {
2761       output_asm_insn (pa_singlemove_string (latehalf), latehalf);
2762       return pa_singlemove_string (operands);
2763     }
2764 
2765   /* Normal case: do the two words, low-numbered first.  */
2766 
2767   output_asm_insn (pa_singlemove_string (operands), operands);
2768 
2769   /* Make any unoffsettable addresses point at high-numbered word.  */
2770   if (addreg0)
2771     output_asm_insn ("ldo 4(%0),%0", &addreg0);
2772   if (addreg1)
2773     output_asm_insn ("ldo 4(%0),%0", &addreg1);
2774 
2775   /* Do high-numbered word.  */
2776   if (highonly)
2777     output_asm_insn ("ldil L'%1,%0", latehalf);
2778   else
2779     output_asm_insn (pa_singlemove_string (latehalf), latehalf);
2780 
2781   /* Undo the adds we just did.  */
2782   if (addreg0)
2783     output_asm_insn ("ldo -4(%0),%0", &addreg0);
2784   if (addreg1)
2785     output_asm_insn ("ldo -4(%0),%0", &addreg1);
2786 
2787   return "";
2788 }
2789 
2790 const char *
pa_output_fp_move_double(rtx * operands)2791 pa_output_fp_move_double (rtx *operands)
2792 {
2793   if (FP_REG_P (operands[0]))
2794     {
2795       if (FP_REG_P (operands[1])
2796 	  || operands[1] == CONST0_RTX (GET_MODE (operands[0])))
2797 	output_asm_insn ("fcpy,dbl %f1,%0", operands);
2798       else
2799 	output_asm_insn ("fldd%F1 %1,%0", operands);
2800     }
2801   else if (FP_REG_P (operands[1]))
2802     {
2803       output_asm_insn ("fstd%F0 %1,%0", operands);
2804     }
2805   else
2806     {
2807       rtx xoperands[2];
2808 
2809       gcc_assert (operands[1] == CONST0_RTX (GET_MODE (operands[0])));
2810 
2811       /* This is a pain.  You have to be prepared to deal with an
2812 	 arbitrary address here including pre/post increment/decrement.
2813 
2814 	 so avoid this in the MD.  */
2815       gcc_assert (GET_CODE (operands[0]) == REG);
2816 
2817       xoperands[1] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
2818       xoperands[0] = operands[0];
2819       output_asm_insn ("copy %%r0,%0\n\tcopy %%r0,%1", xoperands);
2820     }
2821   return "";
2822 }
2823 
2824 /* Return a REG that occurs in ADDR with coefficient 1.
2825    ADDR can be effectively incremented by incrementing REG.  */
2826 
2827 static rtx
find_addr_reg(rtx addr)2828 find_addr_reg (rtx addr)
2829 {
2830   while (GET_CODE (addr) == PLUS)
2831     {
2832       if (GET_CODE (XEXP (addr, 0)) == REG)
2833 	addr = XEXP (addr, 0);
2834       else if (GET_CODE (XEXP (addr, 1)) == REG)
2835 	addr = XEXP (addr, 1);
2836       else if (CONSTANT_P (XEXP (addr, 0)))
2837 	addr = XEXP (addr, 1);
2838       else if (CONSTANT_P (XEXP (addr, 1)))
2839 	addr = XEXP (addr, 0);
2840       else
2841 	gcc_unreachable ();
2842     }
2843   gcc_assert (GET_CODE (addr) == REG);
2844   return addr;
2845 }
2846 
2847 /* Emit code to perform a block move.
2848 
2849    OPERANDS[0] is the destination pointer as a REG, clobbered.
2850    OPERANDS[1] is the source pointer as a REG, clobbered.
2851    OPERANDS[2] is a register for temporary storage.
2852    OPERANDS[3] is a register for temporary storage.
2853    OPERANDS[4] is the size as a CONST_INT
2854    OPERANDS[5] is the alignment safe to use, as a CONST_INT.
2855    OPERANDS[6] is another temporary register.  */
2856 
2857 const char *
pa_output_block_move(rtx * operands,int size_is_constant ATTRIBUTE_UNUSED)2858 pa_output_block_move (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED)
2859 {
2860   int align = INTVAL (operands[5]);
2861   unsigned long n_bytes = INTVAL (operands[4]);
2862 
2863   /* We can't move more than a word at a time because the PA
2864      has no longer integer move insns.  (Could use fp mem ops?)  */
2865   if (align > (TARGET_64BIT ? 8 : 4))
2866     align = (TARGET_64BIT ? 8 : 4);
2867 
2868   /* Note that we know each loop below will execute at least twice
2869      (else we would have open-coded the copy).  */
2870   switch (align)
2871     {
2872       case 8:
2873 	/* Pre-adjust the loop counter.  */
2874 	operands[4] = GEN_INT (n_bytes - 16);
2875 	output_asm_insn ("ldi %4,%2", operands);
2876 
2877 	/* Copying loop.  */
2878 	output_asm_insn ("ldd,ma 8(%1),%3", operands);
2879 	output_asm_insn ("ldd,ma 8(%1),%6", operands);
2880 	output_asm_insn ("std,ma %3,8(%0)", operands);
2881 	output_asm_insn ("addib,>= -16,%2,.-12", operands);
2882 	output_asm_insn ("std,ma %6,8(%0)", operands);
2883 
2884 	/* Handle the residual.  There could be up to 7 bytes of
2885 	   residual to copy!  */
2886 	if (n_bytes % 16 != 0)
2887 	  {
2888 	    operands[4] = GEN_INT (n_bytes % 8);
2889 	    if (n_bytes % 16 >= 8)
2890 	      output_asm_insn ("ldd,ma 8(%1),%3", operands);
2891 	    if (n_bytes % 8 != 0)
2892 	      output_asm_insn ("ldd 0(%1),%6", operands);
2893 	    if (n_bytes % 16 >= 8)
2894 	      output_asm_insn ("std,ma %3,8(%0)", operands);
2895 	    if (n_bytes % 8 != 0)
2896 	      output_asm_insn ("stdby,e %6,%4(%0)", operands);
2897 	  }
2898 	return "";
2899 
2900       case 4:
2901 	/* Pre-adjust the loop counter.  */
2902 	operands[4] = GEN_INT (n_bytes - 8);
2903 	output_asm_insn ("ldi %4,%2", operands);
2904 
2905 	/* Copying loop.  */
2906 	output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands);
2907 	output_asm_insn ("{ldws|ldw},ma 4(%1),%6", operands);
2908 	output_asm_insn ("{stws|stw},ma %3,4(%0)", operands);
2909 	output_asm_insn ("addib,>= -8,%2,.-12", operands);
2910 	output_asm_insn ("{stws|stw},ma %6,4(%0)", operands);
2911 
2912 	/* Handle the residual.  There could be up to 7 bytes of
2913 	   residual to copy!  */
2914 	if (n_bytes % 8 != 0)
2915 	  {
2916 	    operands[4] = GEN_INT (n_bytes % 4);
2917 	    if (n_bytes % 8 >= 4)
2918 	      output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands);
2919 	    if (n_bytes % 4 != 0)
2920 	      output_asm_insn ("ldw 0(%1),%6", operands);
2921 	    if (n_bytes % 8 >= 4)
2922 	      output_asm_insn ("{stws|stw},ma %3,4(%0)", operands);
2923 	    if (n_bytes % 4 != 0)
2924 	      output_asm_insn ("{stbys|stby},e %6,%4(%0)", operands);
2925 	  }
2926 	return "";
2927 
2928       case 2:
2929 	/* Pre-adjust the loop counter.  */
2930 	operands[4] = GEN_INT (n_bytes - 4);
2931 	output_asm_insn ("ldi %4,%2", operands);
2932 
2933 	/* Copying loop.  */
2934 	output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands);
2935 	output_asm_insn ("{ldhs|ldh},ma 2(%1),%6", operands);
2936 	output_asm_insn ("{sths|sth},ma %3,2(%0)", operands);
2937 	output_asm_insn ("addib,>= -4,%2,.-12", operands);
2938 	output_asm_insn ("{sths|sth},ma %6,2(%0)", operands);
2939 
2940 	/* Handle the residual.  */
2941 	if (n_bytes % 4 != 0)
2942 	  {
2943 	    if (n_bytes % 4 >= 2)
2944 	      output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands);
2945 	    if (n_bytes % 2 != 0)
2946 	      output_asm_insn ("ldb 0(%1),%6", operands);
2947 	    if (n_bytes % 4 >= 2)
2948 	      output_asm_insn ("{sths|sth},ma %3,2(%0)", operands);
2949 	    if (n_bytes % 2 != 0)
2950 	      output_asm_insn ("stb %6,0(%0)", operands);
2951 	  }
2952 	return "";
2953 
2954       case 1:
2955 	/* Pre-adjust the loop counter.  */
2956 	operands[4] = GEN_INT (n_bytes - 2);
2957 	output_asm_insn ("ldi %4,%2", operands);
2958 
2959 	/* Copying loop.  */
2960 	output_asm_insn ("{ldbs|ldb},ma 1(%1),%3", operands);
2961 	output_asm_insn ("{ldbs|ldb},ma 1(%1),%6", operands);
2962 	output_asm_insn ("{stbs|stb},ma %3,1(%0)", operands);
2963 	output_asm_insn ("addib,>= -2,%2,.-12", operands);
2964 	output_asm_insn ("{stbs|stb},ma %6,1(%0)", operands);
2965 
2966 	/* Handle the residual.  */
2967 	if (n_bytes % 2 != 0)
2968 	  {
2969 	    output_asm_insn ("ldb 0(%1),%3", operands);
2970 	    output_asm_insn ("stb %3,0(%0)", operands);
2971 	  }
2972 	return "";
2973 
2974       default:
2975 	gcc_unreachable ();
2976     }
2977 }
2978 
2979 /* Count the number of insns necessary to handle this block move.
2980 
2981    Basic structure is the same as emit_block_move, except that we
2982    count insns rather than emit them.  */
2983 
2984 static int
compute_movmem_length(rtx_insn * insn)2985 compute_movmem_length (rtx_insn *insn)
2986 {
2987   rtx pat = PATTERN (insn);
2988   unsigned int align = INTVAL (XEXP (XVECEXP (pat, 0, 7), 0));
2989   unsigned long n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 6), 0));
2990   unsigned int n_insns = 0;
2991 
2992   /* We can't move more than four bytes at a time because the PA
2993      has no longer integer move insns.  (Could use fp mem ops?)  */
2994   if (align > (TARGET_64BIT ? 8 : 4))
2995     align = (TARGET_64BIT ? 8 : 4);
2996 
2997   /* The basic copying loop.  */
2998   n_insns = 6;
2999 
3000   /* Residuals.  */
3001   if (n_bytes % (2 * align) != 0)
3002     {
3003       if ((n_bytes % (2 * align)) >= align)
3004 	n_insns += 2;
3005 
3006       if ((n_bytes % align) != 0)
3007 	n_insns += 2;
3008     }
3009 
3010   /* Lengths are expressed in bytes now; each insn is 4 bytes.  */
3011   return n_insns * 4;
3012 }
3013 
3014 /* Emit code to perform a block clear.
3015 
3016    OPERANDS[0] is the destination pointer as a REG, clobbered.
3017    OPERANDS[1] is a register for temporary storage.
3018    OPERANDS[2] is the size as a CONST_INT
3019    OPERANDS[3] is the alignment safe to use, as a CONST_INT.  */
3020 
3021 const char *
pa_output_block_clear(rtx * operands,int size_is_constant ATTRIBUTE_UNUSED)3022 pa_output_block_clear (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED)
3023 {
3024   int align = INTVAL (operands[3]);
3025   unsigned long n_bytes = INTVAL (operands[2]);
3026 
3027   /* We can't clear more than a word at a time because the PA
3028      has no longer integer move insns.  */
3029   if (align > (TARGET_64BIT ? 8 : 4))
3030     align = (TARGET_64BIT ? 8 : 4);
3031 
3032   /* Note that we know each loop below will execute at least twice
3033      (else we would have open-coded the copy).  */
3034   switch (align)
3035     {
3036       case 8:
3037 	/* Pre-adjust the loop counter.  */
3038 	operands[2] = GEN_INT (n_bytes - 16);
3039 	output_asm_insn ("ldi %2,%1", operands);
3040 
3041 	/* Loop.  */
3042 	output_asm_insn ("std,ma %%r0,8(%0)", operands);
3043 	output_asm_insn ("addib,>= -16,%1,.-4", operands);
3044 	output_asm_insn ("std,ma %%r0,8(%0)", operands);
3045 
3046 	/* Handle the residual.  There could be up to 7 bytes of
3047 	   residual to copy!  */
3048 	if (n_bytes % 16 != 0)
3049 	  {
3050 	    operands[2] = GEN_INT (n_bytes % 8);
3051 	    if (n_bytes % 16 >= 8)
3052 	      output_asm_insn ("std,ma %%r0,8(%0)", operands);
3053 	    if (n_bytes % 8 != 0)
3054 	      output_asm_insn ("stdby,e %%r0,%2(%0)", operands);
3055 	  }
3056 	return "";
3057 
3058       case 4:
3059 	/* Pre-adjust the loop counter.  */
3060 	operands[2] = GEN_INT (n_bytes - 8);
3061 	output_asm_insn ("ldi %2,%1", operands);
3062 
3063 	/* Loop.  */
3064 	output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
3065 	output_asm_insn ("addib,>= -8,%1,.-4", operands);
3066 	output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
3067 
3068 	/* Handle the residual.  There could be up to 7 bytes of
3069 	   residual to copy!  */
3070 	if (n_bytes % 8 != 0)
3071 	  {
3072 	    operands[2] = GEN_INT (n_bytes % 4);
3073 	    if (n_bytes % 8 >= 4)
3074 	      output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
3075 	    if (n_bytes % 4 != 0)
3076 	      output_asm_insn ("{stbys|stby},e %%r0,%2(%0)", operands);
3077 	  }
3078 	return "";
3079 
3080       case 2:
3081 	/* Pre-adjust the loop counter.  */
3082 	operands[2] = GEN_INT (n_bytes - 4);
3083 	output_asm_insn ("ldi %2,%1", operands);
3084 
3085 	/* Loop.  */
3086 	output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
3087 	output_asm_insn ("addib,>= -4,%1,.-4", operands);
3088 	output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
3089 
3090 	/* Handle the residual.  */
3091 	if (n_bytes % 4 != 0)
3092 	  {
3093 	    if (n_bytes % 4 >= 2)
3094 	      output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
3095 	    if (n_bytes % 2 != 0)
3096 	      output_asm_insn ("stb %%r0,0(%0)", operands);
3097 	  }
3098 	return "";
3099 
3100       case 1:
3101 	/* Pre-adjust the loop counter.  */
3102 	operands[2] = GEN_INT (n_bytes - 2);
3103 	output_asm_insn ("ldi %2,%1", operands);
3104 
3105 	/* Loop.  */
3106 	output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands);
3107 	output_asm_insn ("addib,>= -2,%1,.-4", operands);
3108 	output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands);
3109 
3110 	/* Handle the residual.  */
3111 	if (n_bytes % 2 != 0)
3112 	  output_asm_insn ("stb %%r0,0(%0)", operands);
3113 
3114 	return "";
3115 
3116       default:
3117 	gcc_unreachable ();
3118     }
3119 }
3120 
3121 /* Count the number of insns necessary to handle this block move.
3122 
3123    Basic structure is the same as emit_block_move, except that we
3124    count insns rather than emit them.  */
3125 
3126 static int
compute_clrmem_length(rtx_insn * insn)3127 compute_clrmem_length (rtx_insn *insn)
3128 {
3129   rtx pat = PATTERN (insn);
3130   unsigned int align = INTVAL (XEXP (XVECEXP (pat, 0, 4), 0));
3131   unsigned long n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 3), 0));
3132   unsigned int n_insns = 0;
3133 
3134   /* We can't clear more than a word at a time because the PA
3135      has no longer integer move insns.  */
3136   if (align > (TARGET_64BIT ? 8 : 4))
3137     align = (TARGET_64BIT ? 8 : 4);
3138 
3139   /* The basic loop.  */
3140   n_insns = 4;
3141 
3142   /* Residuals.  */
3143   if (n_bytes % (2 * align) != 0)
3144     {
3145       if ((n_bytes % (2 * align)) >= align)
3146 	n_insns++;
3147 
3148       if ((n_bytes % align) != 0)
3149 	n_insns++;
3150     }
3151 
3152   /* Lengths are expressed in bytes now; each insn is 4 bytes.  */
3153   return n_insns * 4;
3154 }
3155 
3156 
3157 const char *
pa_output_and(rtx * operands)3158 pa_output_and (rtx *operands)
3159 {
3160   if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
3161     {
3162       unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3163       int ls0, ls1, ms0, p, len;
3164 
3165       for (ls0 = 0; ls0 < 32; ls0++)
3166 	if ((mask & (1 << ls0)) == 0)
3167 	  break;
3168 
3169       for (ls1 = ls0; ls1 < 32; ls1++)
3170 	if ((mask & (1 << ls1)) != 0)
3171 	  break;
3172 
3173       for (ms0 = ls1; ms0 < 32; ms0++)
3174 	if ((mask & (1 << ms0)) == 0)
3175 	  break;
3176 
3177       gcc_assert (ms0 == 32);
3178 
3179       if (ls1 == 32)
3180 	{
3181 	  len = ls0;
3182 
3183 	  gcc_assert (len);
3184 
3185 	  operands[2] = GEN_INT (len);
3186 	  return "{extru|extrw,u} %1,31,%2,%0";
3187 	}
3188       else
3189 	{
3190 	  /* We could use this `depi' for the case above as well, but `depi'
3191 	     requires one more register file access than an `extru'.  */
3192 
3193 	  p = 31 - ls0;
3194 	  len = ls1 - ls0;
3195 
3196 	  operands[2] = GEN_INT (p);
3197 	  operands[3] = GEN_INT (len);
3198 	  return "{depi|depwi} 0,%2,%3,%0";
3199 	}
3200     }
3201   else
3202     return "and %1,%2,%0";
3203 }
3204 
3205 /* Return a string to perform a bitwise-and of operands[1] with operands[2]
3206    storing the result in operands[0].  */
3207 const char *
pa_output_64bit_and(rtx * operands)3208 pa_output_64bit_and (rtx *operands)
3209 {
3210   if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
3211     {
3212       unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3213       int ls0, ls1, ms0, p, len;
3214 
3215       for (ls0 = 0; ls0 < HOST_BITS_PER_WIDE_INT; ls0++)
3216 	if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls0)) == 0)
3217 	  break;
3218 
3219       for (ls1 = ls0; ls1 < HOST_BITS_PER_WIDE_INT; ls1++)
3220 	if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls1)) != 0)
3221 	  break;
3222 
3223       for (ms0 = ls1; ms0 < HOST_BITS_PER_WIDE_INT; ms0++)
3224 	if ((mask & ((unsigned HOST_WIDE_INT) 1 << ms0)) == 0)
3225 	  break;
3226 
3227       gcc_assert (ms0 == HOST_BITS_PER_WIDE_INT);
3228 
3229       if (ls1 == HOST_BITS_PER_WIDE_INT)
3230 	{
3231 	  len = ls0;
3232 
3233 	  gcc_assert (len);
3234 
3235 	  operands[2] = GEN_INT (len);
3236 	  return "extrd,u %1,63,%2,%0";
3237 	}
3238       else
3239 	{
3240 	  /* We could use this `depi' for the case above as well, but `depi'
3241 	     requires one more register file access than an `extru'.  */
3242 
3243 	  p = 63 - ls0;
3244 	  len = ls1 - ls0;
3245 
3246 	  operands[2] = GEN_INT (p);
3247 	  operands[3] = GEN_INT (len);
3248 	  return "depdi 0,%2,%3,%0";
3249 	}
3250     }
3251   else
3252     return "and %1,%2,%0";
3253 }
3254 
3255 const char *
pa_output_ior(rtx * operands)3256 pa_output_ior (rtx *operands)
3257 {
3258   unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3259   int bs0, bs1, p, len;
3260 
3261   if (INTVAL (operands[2]) == 0)
3262     return "copy %1,%0";
3263 
3264   for (bs0 = 0; bs0 < 32; bs0++)
3265     if ((mask & (1 << bs0)) != 0)
3266       break;
3267 
3268   for (bs1 = bs0; bs1 < 32; bs1++)
3269     if ((mask & (1 << bs1)) == 0)
3270       break;
3271 
3272   gcc_assert (bs1 == 32 || ((unsigned HOST_WIDE_INT) 1 << bs1) > mask);
3273 
3274   p = 31 - bs0;
3275   len = bs1 - bs0;
3276 
3277   operands[2] = GEN_INT (p);
3278   operands[3] = GEN_INT (len);
3279   return "{depi|depwi} -1,%2,%3,%0";
3280 }
3281 
3282 /* Return a string to perform a bitwise-and of operands[1] with operands[2]
3283    storing the result in operands[0].  */
3284 const char *
pa_output_64bit_ior(rtx * operands)3285 pa_output_64bit_ior (rtx *operands)
3286 {
3287   unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3288   int bs0, bs1, p, len;
3289 
3290   if (INTVAL (operands[2]) == 0)
3291     return "copy %1,%0";
3292 
3293   for (bs0 = 0; bs0 < HOST_BITS_PER_WIDE_INT; bs0++)
3294     if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs0)) != 0)
3295       break;
3296 
3297   for (bs1 = bs0; bs1 < HOST_BITS_PER_WIDE_INT; bs1++)
3298     if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs1)) == 0)
3299       break;
3300 
3301   gcc_assert (bs1 == HOST_BITS_PER_WIDE_INT
3302 	      || ((unsigned HOST_WIDE_INT) 1 << bs1) > mask);
3303 
3304   p = 63 - bs0;
3305   len = bs1 - bs0;
3306 
3307   operands[2] = GEN_INT (p);
3308   operands[3] = GEN_INT (len);
3309   return "depdi -1,%2,%3,%0";
3310 }
3311 
3312 /* Target hook for assembling integer objects.  This code handles
3313    aligned SI and DI integers specially since function references
3314    must be preceded by P%.  */
3315 
3316 static bool
pa_assemble_integer(rtx x,unsigned int size,int aligned_p)3317 pa_assemble_integer (rtx x, unsigned int size, int aligned_p)
3318 {
3319   bool result;
3320   tree decl = NULL;
3321 
3322   /* When we have a SYMBOL_REF with a SYMBOL_REF_DECL, we need to call
3323      call assemble_external and set the SYMBOL_REF_DECL to NULL before
3324      calling output_addr_const.  Otherwise, it may call assemble_external
3325      in the midst of outputing the assembler code for the SYMBOL_REF.
3326      We restore the SYMBOL_REF_DECL after the output is done.  */
3327   if (GET_CODE (x) == SYMBOL_REF)
3328     {
3329       decl = SYMBOL_REF_DECL (x);
3330       if (decl)
3331 	{
3332 	  assemble_external (decl);
3333 	  SET_SYMBOL_REF_DECL (x, NULL);
3334 	}
3335     }
3336 
3337   if (size == UNITS_PER_WORD
3338       && aligned_p
3339       && function_label_operand (x, VOIDmode))
3340     {
3341       fputs (size == 8? "\t.dword\t" : "\t.word\t", asm_out_file);
3342 
3343       /* We don't want an OPD when generating fast indirect calls.  */
3344       if (!TARGET_FAST_INDIRECT_CALLS)
3345 	fputs ("P%", asm_out_file);
3346 
3347       output_addr_const (asm_out_file, x);
3348       fputc ('\n', asm_out_file);
3349       result = true;
3350     }
3351   else
3352     result = default_assemble_integer (x, size, aligned_p);
3353 
3354   if (decl)
3355     SET_SYMBOL_REF_DECL (x, decl);
3356 
3357   return result;
3358 }
3359 
3360 /* Output an ascii string.  */
3361 void
pa_output_ascii(FILE * file,const char * p,int size)3362 pa_output_ascii (FILE *file, const char *p, int size)
3363 {
3364   int i;
3365   int chars_output;
3366   unsigned char partial_output[16];	/* Max space 4 chars can occupy.  */
3367 
3368   /* The HP assembler can only take strings of 256 characters at one
3369      time.  This is a limitation on input line length, *not* the
3370      length of the string.  Sigh.  Even worse, it seems that the
3371      restriction is in number of input characters (see \xnn &
3372      \whatever).  So we have to do this very carefully.  */
3373 
3374   fputs ("\t.STRING \"", file);
3375 
3376   chars_output = 0;
3377   for (i = 0; i < size; i += 4)
3378     {
3379       int co = 0;
3380       int io = 0;
3381       for (io = 0, co = 0; io < MIN (4, size - i); io++)
3382 	{
3383 	  register unsigned int c = (unsigned char) p[i + io];
3384 
3385 	  if (c == '\"' || c == '\\')
3386 	    partial_output[co++] = '\\';
3387 	  if (c >= ' ' && c < 0177)
3388 	    partial_output[co++] = c;
3389 	  else
3390 	    {
3391 	      unsigned int hexd;
3392 	      partial_output[co++] = '\\';
3393 	      partial_output[co++] = 'x';
3394 	      hexd =  c  / 16 - 0 + '0';
3395 	      if (hexd > '9')
3396 		hexd -= '9' - 'a' + 1;
3397 	      partial_output[co++] = hexd;
3398 	      hexd =  c % 16 - 0 + '0';
3399 	      if (hexd > '9')
3400 		hexd -= '9' - 'a' + 1;
3401 	      partial_output[co++] = hexd;
3402 	    }
3403 	}
3404       if (chars_output + co > 243)
3405 	{
3406 	  fputs ("\"\n\t.STRING \"", file);
3407 	  chars_output = 0;
3408 	}
3409       fwrite (partial_output, 1, (size_t) co, file);
3410       chars_output += co;
3411       co = 0;
3412     }
3413   fputs ("\"\n", file);
3414 }
3415 
3416 /* Try to rewrite floating point comparisons & branches to avoid
3417    useless add,tr insns.
3418 
3419    CHECK_NOTES is nonzero if we should examine REG_DEAD notes
3420    to see if FPCC is dead.  CHECK_NOTES is nonzero for the
3421    first attempt to remove useless add,tr insns.  It is zero
3422    for the second pass as reorg sometimes leaves bogus REG_DEAD
3423    notes lying around.
3424 
3425    When CHECK_NOTES is zero we can only eliminate add,tr insns
3426    when there's a 1:1 correspondence between fcmp and ftest/fbranch
3427    instructions.  */
3428 static void
remove_useless_addtr_insns(int check_notes)3429 remove_useless_addtr_insns (int check_notes)
3430 {
3431   rtx_insn *insn;
3432   static int pass = 0;
3433 
3434   /* This is fairly cheap, so always run it when optimizing.  */
3435   if (optimize > 0)
3436     {
3437       int fcmp_count = 0;
3438       int fbranch_count = 0;
3439 
3440       /* Walk all the insns in this function looking for fcmp & fbranch
3441 	 instructions.  Keep track of how many of each we find.  */
3442       for (insn = get_insns (); insn; insn = next_insn (insn))
3443 	{
3444 	  rtx tmp;
3445 
3446 	  /* Ignore anything that isn't an INSN or a JUMP_INSN.  */
3447 	  if (! NONJUMP_INSN_P (insn) && ! JUMP_P (insn))
3448 	    continue;
3449 
3450 	  tmp = PATTERN (insn);
3451 
3452 	  /* It must be a set.  */
3453 	  if (GET_CODE (tmp) != SET)
3454 	    continue;
3455 
3456 	  /* If the destination is CCFP, then we've found an fcmp insn.  */
3457 	  tmp = SET_DEST (tmp);
3458 	  if (GET_CODE (tmp) == REG && REGNO (tmp) == 0)
3459 	    {
3460 	      fcmp_count++;
3461 	      continue;
3462 	    }
3463 
3464 	  tmp = PATTERN (insn);
3465 	  /* If this is an fbranch instruction, bump the fbranch counter.  */
3466 	  if (GET_CODE (tmp) == SET
3467 	      && SET_DEST (tmp) == pc_rtx
3468 	      && GET_CODE (SET_SRC (tmp)) == IF_THEN_ELSE
3469 	      && GET_CODE (XEXP (SET_SRC (tmp), 0)) == NE
3470 	      && GET_CODE (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == REG
3471 	      && REGNO (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == 0)
3472 	    {
3473 	      fbranch_count++;
3474 	      continue;
3475 	    }
3476 	}
3477 
3478 
3479       /* Find all floating point compare + branch insns.  If possible,
3480 	 reverse the comparison & the branch to avoid add,tr insns.  */
3481       for (insn = get_insns (); insn; insn = next_insn (insn))
3482 	{
3483 	  rtx tmp;
3484 	  rtx_insn *next;
3485 
3486 	  /* Ignore anything that isn't an INSN.  */
3487 	  if (! NONJUMP_INSN_P (insn))
3488 	    continue;
3489 
3490 	  tmp = PATTERN (insn);
3491 
3492 	  /* It must be a set.  */
3493 	  if (GET_CODE (tmp) != SET)
3494 	    continue;
3495 
3496 	  /* The destination must be CCFP, which is register zero.  */
3497 	  tmp = SET_DEST (tmp);
3498 	  if (GET_CODE (tmp) != REG || REGNO (tmp) != 0)
3499 	    continue;
3500 
3501 	  /* INSN should be a set of CCFP.
3502 
3503 	     See if the result of this insn is used in a reversed FP
3504 	     conditional branch.  If so, reverse our condition and
3505 	     the branch.  Doing so avoids useless add,tr insns.  */
3506 	  next = next_insn (insn);
3507 	  while (next)
3508 	    {
3509 	      /* Jumps, calls and labels stop our search.  */
3510 	      if (JUMP_P (next) || CALL_P (next) || LABEL_P (next))
3511 		break;
3512 
3513 	      /* As does another fcmp insn.  */
3514 	      if (NONJUMP_INSN_P (next)
3515 		  && GET_CODE (PATTERN (next)) == SET
3516 		  && GET_CODE (SET_DEST (PATTERN (next))) == REG
3517 		  && REGNO (SET_DEST (PATTERN (next))) == 0)
3518 		break;
3519 
3520 	      next = next_insn (next);
3521 	    }
3522 
3523 	  /* Is NEXT_INSN a branch?  */
3524 	  if (next && JUMP_P (next))
3525 	    {
3526 	      rtx pattern = PATTERN (next);
3527 
3528 	      /* If it a reversed fp conditional branch (e.g. uses add,tr)
3529 		 and CCFP dies, then reverse our conditional and the branch
3530 		 to avoid the add,tr.  */
3531 	      if (GET_CODE (pattern) == SET
3532 		  && SET_DEST (pattern) == pc_rtx
3533 		  && GET_CODE (SET_SRC (pattern)) == IF_THEN_ELSE
3534 		  && GET_CODE (XEXP (SET_SRC (pattern), 0)) == NE
3535 		  && GET_CODE (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == REG
3536 		  && REGNO (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == 0
3537 		  && GET_CODE (XEXP (SET_SRC (pattern), 1)) == PC
3538 		  && (fcmp_count == fbranch_count
3539 		      || (check_notes
3540 			  && find_regno_note (next, REG_DEAD, 0))))
3541 		{
3542 		  /* Reverse the branch.  */
3543 		  tmp = XEXP (SET_SRC (pattern), 1);
3544 		  XEXP (SET_SRC (pattern), 1) = XEXP (SET_SRC (pattern), 2);
3545 		  XEXP (SET_SRC (pattern), 2) = tmp;
3546 		  INSN_CODE (next) = -1;
3547 
3548 		  /* Reverse our condition.  */
3549 		  tmp = PATTERN (insn);
3550 		  PUT_CODE (XEXP (tmp, 1),
3551 			    (reverse_condition_maybe_unordered
3552 			     (GET_CODE (XEXP (tmp, 1)))));
3553 		}
3554 	    }
3555 	}
3556     }
3557 
3558   pass = !pass;
3559 
3560 }
3561 
3562 /* You may have trouble believing this, but this is the 32 bit HP-PA
3563    stack layout.  Wow.
3564 
3565    Offset		Contents
3566 
3567    Variable arguments	(optional; any number may be allocated)
3568 
3569    SP-(4*(N+9))		arg word N
3570    	:		    :
3571       SP-56		arg word 5
3572       SP-52		arg word 4
3573 
3574    Fixed arguments	(must be allocated; may remain unused)
3575 
3576       SP-48		arg word 3
3577       SP-44		arg word 2
3578       SP-40		arg word 1
3579       SP-36		arg word 0
3580 
3581    Frame Marker
3582 
3583       SP-32		External Data Pointer (DP)
3584       SP-28		External sr4
3585       SP-24		External/stub RP (RP')
3586       SP-20		Current RP
3587       SP-16		Static Link
3588       SP-12		Clean up
3589       SP-8		Calling Stub RP (RP'')
3590       SP-4		Previous SP
3591 
3592    Top of Frame
3593 
3594       SP-0		Stack Pointer (points to next available address)
3595 
3596 */
3597 
3598 /* This function saves registers as follows.  Registers marked with ' are
3599    this function's registers (as opposed to the previous function's).
3600    If a frame_pointer isn't needed, r4 is saved as a general register;
3601    the space for the frame pointer is still allocated, though, to keep
3602    things simple.
3603 
3604 
3605    Top of Frame
3606 
3607        SP (FP')		Previous FP
3608        SP + 4		Alignment filler (sigh)
3609        SP + 8		Space for locals reserved here.
3610        .
3611        .
3612        .
3613        SP + n		All call saved register used.
3614        .
3615        .
3616        .
3617        SP + o		All call saved fp registers used.
3618        .
3619        .
3620        .
3621        SP + p (SP')	points to next available address.
3622 
3623 */
3624 
3625 /* Global variables set by output_function_prologue().  */
3626 /* Size of frame.  Need to know this to emit return insns from
3627    leaf procedures.  */
3628 static HOST_WIDE_INT actual_fsize, local_fsize;
3629 static int save_fregs;
3630 
3631 /* Emit RTL to store REG at the memory location specified by BASE+DISP.
3632    Handle case where DISP > 8k by using the add_high_const patterns.
3633 
3634    Note in DISP > 8k case, we will leave the high part of the address
3635    in %r1.  There is code in expand_hppa_{prologue,epilogue} that knows this.*/
3636 
3637 static void
store_reg(int reg,HOST_WIDE_INT disp,int base)3638 store_reg (int reg, HOST_WIDE_INT disp, int base)
3639 {
3640   rtx dest, src, basereg;
3641   rtx_insn *insn;
3642 
3643   src = gen_rtx_REG (word_mode, reg);
3644   basereg = gen_rtx_REG (Pmode, base);
3645   if (VAL_14_BITS_P (disp))
3646     {
3647       dest = gen_rtx_MEM (word_mode, plus_constant (Pmode, basereg, disp));
3648       insn = emit_move_insn (dest, src);
3649     }
3650   else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
3651     {
3652       rtx delta = GEN_INT (disp);
3653       rtx tmpreg = gen_rtx_REG (Pmode, 1);
3654 
3655       emit_move_insn (tmpreg, delta);
3656       insn = emit_move_insn (tmpreg, gen_rtx_PLUS (Pmode, tmpreg, basereg));
3657       if (DO_FRAME_NOTES)
3658 	{
3659 	  add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3660 			gen_rtx_SET (tmpreg,
3661 				     gen_rtx_PLUS (Pmode, basereg, delta)));
3662 	  RTX_FRAME_RELATED_P (insn) = 1;
3663 	}
3664       dest = gen_rtx_MEM (word_mode, tmpreg);
3665       insn = emit_move_insn (dest, src);
3666     }
3667   else
3668     {
3669       rtx delta = GEN_INT (disp);
3670       rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta));
3671       rtx tmpreg = gen_rtx_REG (Pmode, 1);
3672 
3673       emit_move_insn (tmpreg, high);
3674       dest = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta));
3675       insn = emit_move_insn (dest, src);
3676       if (DO_FRAME_NOTES)
3677 	add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3678 		      gen_rtx_SET (gen_rtx_MEM (word_mode,
3679 						gen_rtx_PLUS (word_mode,
3680 							      basereg,
3681 							      delta)),
3682 				   src));
3683     }
3684 
3685   if (DO_FRAME_NOTES)
3686     RTX_FRAME_RELATED_P (insn) = 1;
3687 }
3688 
3689 /* Emit RTL to store REG at the memory location specified by BASE and then
3690    add MOD to BASE.  MOD must be <= 8k.  */
3691 
3692 static void
store_reg_modify(int base,int reg,HOST_WIDE_INT mod)3693 store_reg_modify (int base, int reg, HOST_WIDE_INT mod)
3694 {
3695   rtx basereg, srcreg, delta;
3696   rtx_insn *insn;
3697 
3698   gcc_assert (VAL_14_BITS_P (mod));
3699 
3700   basereg = gen_rtx_REG (Pmode, base);
3701   srcreg = gen_rtx_REG (word_mode, reg);
3702   delta = GEN_INT (mod);
3703 
3704   insn = emit_insn (gen_post_store (basereg, srcreg, delta));
3705   if (DO_FRAME_NOTES)
3706     {
3707       RTX_FRAME_RELATED_P (insn) = 1;
3708 
3709       /* RTX_FRAME_RELATED_P must be set on each frame related set
3710 	 in a parallel with more than one element.  */
3711       RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 0)) = 1;
3712       RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
3713     }
3714 }
3715 
3716 /* Emit RTL to set REG to the value specified by BASE+DISP.  Handle case
3717    where DISP > 8k by using the add_high_const patterns.  NOTE indicates
3718    whether to add a frame note or not.
3719 
3720    In the DISP > 8k case, we leave the high part of the address in %r1.
3721    There is code in expand_hppa_{prologue,epilogue} that knows about this.  */
3722 
3723 static void
set_reg_plus_d(int reg,int base,HOST_WIDE_INT disp,int note)3724 set_reg_plus_d (int reg, int base, HOST_WIDE_INT disp, int note)
3725 {
3726   rtx_insn *insn;
3727 
3728   if (VAL_14_BITS_P (disp))
3729     {
3730       insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3731 			     plus_constant (Pmode,
3732 					    gen_rtx_REG (Pmode, base), disp));
3733     }
3734   else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
3735     {
3736       rtx basereg = gen_rtx_REG (Pmode, base);
3737       rtx delta = GEN_INT (disp);
3738       rtx tmpreg = gen_rtx_REG (Pmode, 1);
3739 
3740       emit_move_insn (tmpreg, delta);
3741       insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3742 			     gen_rtx_PLUS (Pmode, tmpreg, basereg));
3743       if (DO_FRAME_NOTES)
3744 	add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3745 		      gen_rtx_SET (tmpreg,
3746 				   gen_rtx_PLUS (Pmode, basereg, delta)));
3747     }
3748   else
3749     {
3750       rtx basereg = gen_rtx_REG (Pmode, base);
3751       rtx delta = GEN_INT (disp);
3752       rtx tmpreg = gen_rtx_REG (Pmode, 1);
3753 
3754       emit_move_insn (tmpreg,
3755 		      gen_rtx_PLUS (Pmode, basereg,
3756 				    gen_rtx_HIGH (Pmode, delta)));
3757       insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3758 			     gen_rtx_LO_SUM (Pmode, tmpreg, delta));
3759     }
3760 
3761   if (DO_FRAME_NOTES && note)
3762     RTX_FRAME_RELATED_P (insn) = 1;
3763 }
3764 
3765 HOST_WIDE_INT
pa_compute_frame_size(poly_int64 size,int * fregs_live)3766 pa_compute_frame_size (poly_int64 size, int *fregs_live)
3767 {
3768   int freg_saved = 0;
3769   int i, j;
3770 
3771   /* The code in pa_expand_prologue and pa_expand_epilogue must
3772      be consistent with the rounding and size calculation done here.
3773      Change them at the same time.  */
3774 
3775   /* We do our own stack alignment.  First, round the size of the
3776      stack locals up to a word boundary.  */
3777   size = (size + UNITS_PER_WORD - 1) & ~(UNITS_PER_WORD - 1);
3778 
3779   /* Space for previous frame pointer + filler.  If any frame is
3780      allocated, we need to add in the TARGET_STARTING_FRAME_OFFSET.  We
3781      waste some space here for the sake of HP compatibility.  The
3782      first slot is only used when the frame pointer is needed.  */
3783   if (size || frame_pointer_needed)
3784     size += pa_starting_frame_offset ();
3785 
3786   /* If the current function calls __builtin_eh_return, then we need
3787      to allocate stack space for registers that will hold data for
3788      the exception handler.  */
3789   if (DO_FRAME_NOTES && crtl->calls_eh_return)
3790     {
3791       unsigned int i;
3792 
3793       for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM; ++i)
3794 	continue;
3795       size += i * UNITS_PER_WORD;
3796     }
3797 
3798   /* Account for space used by the callee general register saves.  */
3799   for (i = 18, j = frame_pointer_needed ? 4 : 3; i >= j; i--)
3800     if (df_regs_ever_live_p (i))
3801       size += UNITS_PER_WORD;
3802 
3803   /* Account for space used by the callee floating point register saves.  */
3804   for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
3805     if (df_regs_ever_live_p (i)
3806 	|| (!TARGET_64BIT && df_regs_ever_live_p (i + 1)))
3807       {
3808 	freg_saved = 1;
3809 
3810 	/* We always save both halves of the FP register, so always
3811 	   increment the frame size by 8 bytes.  */
3812 	size += 8;
3813       }
3814 
3815   /* If any of the floating registers are saved, account for the
3816      alignment needed for the floating point register save block.  */
3817   if (freg_saved)
3818     {
3819       size = (size + 7) & ~7;
3820       if (fregs_live)
3821 	*fregs_live = 1;
3822     }
3823 
3824   /* The various ABIs include space for the outgoing parameters in the
3825      size of the current function's stack frame.  We don't need to align
3826      for the outgoing arguments as their alignment is set by the final
3827      rounding for the frame as a whole.  */
3828   size += crtl->outgoing_args_size;
3829 
3830   /* Allocate space for the fixed frame marker.  This space must be
3831      allocated for any function that makes calls or allocates
3832      stack space.  */
3833   if (!crtl->is_leaf || size)
3834     size += TARGET_64BIT ? 48 : 32;
3835 
3836   /* Finally, round to the preferred stack boundary.  */
3837   return ((size + PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT - 1)
3838 	  & ~(PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT - 1));
3839 }
3840 
3841 /* Output function label, and associated .PROC and .CALLINFO statements.  */
3842 
3843 void
pa_output_function_label(FILE * file)3844 pa_output_function_label (FILE *file)
3845 {
3846   /* The function's label and associated .PROC must never be
3847      separated and must be output *after* any profiling declarations
3848      to avoid changing spaces/subspaces within a procedure.  */
3849   ASM_OUTPUT_LABEL (file, XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0));
3850   fputs ("\t.PROC\n", file);
3851 
3852   /* pa_expand_prologue does the dirty work now.  We just need
3853      to output the assembler directives which denote the start
3854      of a function.  */
3855   fprintf (file, "\t.CALLINFO FRAME=" HOST_WIDE_INT_PRINT_DEC, actual_fsize);
3856   if (crtl->is_leaf)
3857     fputs (",NO_CALLS", file);
3858   else
3859     fputs (",CALLS", file);
3860   if (rp_saved)
3861     fputs (",SAVE_RP", file);
3862 
3863   /* The SAVE_SP flag is used to indicate that register %r3 is stored
3864      at the beginning of the frame and that it is used as the frame
3865      pointer for the frame.  We do this because our current frame
3866      layout doesn't conform to that specified in the HP runtime
3867      documentation and we need a way to indicate to programs such as
3868      GDB where %r3 is saved.  The SAVE_SP flag was chosen because it
3869      isn't used by HP compilers but is supported by the assembler.
3870      However, SAVE_SP is supposed to indicate that the previous stack
3871      pointer has been saved in the frame marker.  */
3872   if (frame_pointer_needed)
3873     fputs (",SAVE_SP", file);
3874 
3875   /* Pass on information about the number of callee register saves
3876      performed in the prologue.
3877 
3878      The compiler is supposed to pass the highest register number
3879      saved, the assembler then has to adjust that number before
3880      entering it into the unwind descriptor (to account for any
3881      caller saved registers with lower register numbers than the
3882      first callee saved register).  */
3883   if (gr_saved)
3884     fprintf (file, ",ENTRY_GR=%d", gr_saved + 2);
3885 
3886   if (fr_saved)
3887     fprintf (file, ",ENTRY_FR=%d", fr_saved + 11);
3888 
3889   fputs ("\n\t.ENTRY\n", file);
3890 }
3891 
3892 /* Output function prologue.  */
3893 
3894 static void
pa_output_function_prologue(FILE * file)3895 pa_output_function_prologue (FILE *file)
3896 {
3897   pa_output_function_label (file);
3898   remove_useless_addtr_insns (0);
3899 }
3900 
3901 /* The label is output by ASM_DECLARE_FUNCTION_NAME on linux.  */
3902 
3903 static void
pa_linux_output_function_prologue(FILE * file ATTRIBUTE_UNUSED)3904 pa_linux_output_function_prologue (FILE *file ATTRIBUTE_UNUSED)
3905 {
3906   remove_useless_addtr_insns (0);
3907 }
3908 
3909 void
pa_expand_prologue(void)3910 pa_expand_prologue (void)
3911 {
3912   int merge_sp_adjust_with_store = 0;
3913   HOST_WIDE_INT size = get_frame_size ();
3914   HOST_WIDE_INT offset;
3915   int i;
3916   rtx tmpreg;
3917   rtx_insn *insn;
3918 
3919   gr_saved = 0;
3920   fr_saved = 0;
3921   save_fregs = 0;
3922 
3923   /* Compute total size for frame pointer, filler, locals and rounding to
3924      the next word boundary.  Similar code appears in pa_compute_frame_size
3925      and must be changed in tandem with this code.  */
3926   local_fsize = (size + UNITS_PER_WORD - 1) & ~(UNITS_PER_WORD - 1);
3927   if (local_fsize || frame_pointer_needed)
3928     local_fsize += pa_starting_frame_offset ();
3929 
3930   actual_fsize = pa_compute_frame_size (size, &save_fregs);
3931   if (flag_stack_usage_info)
3932     current_function_static_stack_size = actual_fsize;
3933 
3934   /* Compute a few things we will use often.  */
3935   tmpreg = gen_rtx_REG (word_mode, 1);
3936 
3937   /* Save RP first.  The calling conventions manual states RP will
3938      always be stored into the caller's frame at sp - 20 or sp - 16
3939      depending on which ABI is in use.  */
3940   if (df_regs_ever_live_p (2) || crtl->calls_eh_return)
3941     {
3942       store_reg (2, TARGET_64BIT ? -16 : -20, STACK_POINTER_REGNUM);
3943       rp_saved = true;
3944     }
3945   else
3946     rp_saved = false;
3947 
3948   /* Allocate the local frame and set up the frame pointer if needed.  */
3949   if (actual_fsize != 0)
3950     {
3951       if (frame_pointer_needed)
3952 	{
3953 	  /* Copy the old frame pointer temporarily into %r1.  Set up the
3954 	     new stack pointer, then store away the saved old frame pointer
3955 	     into the stack at sp and at the same time update the stack
3956 	     pointer by actual_fsize bytes.  Two versions, first
3957 	     handles small (<8k) frames.  The second handles large (>=8k)
3958 	     frames.  */
3959 	  insn = emit_move_insn (tmpreg, hard_frame_pointer_rtx);
3960 	  if (DO_FRAME_NOTES)
3961 	    RTX_FRAME_RELATED_P (insn) = 1;
3962 
3963 	  insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
3964 	  if (DO_FRAME_NOTES)
3965 	    RTX_FRAME_RELATED_P (insn) = 1;
3966 
3967 	  if (VAL_14_BITS_P (actual_fsize))
3968 	    store_reg_modify (STACK_POINTER_REGNUM, 1, actual_fsize);
3969 	  else
3970 	    {
3971 	      /* It is incorrect to store the saved frame pointer at *sp,
3972 		 then increment sp (writes beyond the current stack boundary).
3973 
3974 		 So instead use stwm to store at *sp and post-increment the
3975 		 stack pointer as an atomic operation.  Then increment sp to
3976 		 finish allocating the new frame.  */
3977 	      HOST_WIDE_INT adjust1 = 8192 - 64;
3978 	      HOST_WIDE_INT adjust2 = actual_fsize - adjust1;
3979 
3980 	      store_reg_modify (STACK_POINTER_REGNUM, 1, adjust1);
3981 	      set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
3982 			      adjust2, 1);
3983 	    }
3984 
3985 	  /* We set SAVE_SP in frames that need a frame pointer.  Thus,
3986 	     we need to store the previous stack pointer (frame pointer)
3987 	     into the frame marker on targets that use the HP unwind
3988 	     library.  This allows the HP unwind library to be used to
3989 	     unwind GCC frames.  However, we are not fully compatible
3990 	     with the HP library because our frame layout differs from
3991 	     that specified in the HP runtime specification.
3992 
3993 	     We don't want a frame note on this instruction as the frame
3994 	     marker moves during dynamic stack allocation.
3995 
3996 	     This instruction also serves as a blockage to prevent
3997 	     register spills from being scheduled before the stack
3998 	     pointer is raised.  This is necessary as we store
3999 	     registers using the frame pointer as a base register,
4000 	     and the frame pointer is set before sp is raised.  */
4001 	  if (TARGET_HPUX_UNWIND_LIBRARY)
4002 	    {
4003 	      rtx addr = gen_rtx_PLUS (word_mode, stack_pointer_rtx,
4004 				       GEN_INT (TARGET_64BIT ? -8 : -4));
4005 
4006 	      emit_move_insn (gen_rtx_MEM (word_mode, addr),
4007 			      hard_frame_pointer_rtx);
4008 	    }
4009 	  else
4010 	    emit_insn (gen_blockage ());
4011 	}
4012       /* no frame pointer needed.  */
4013       else
4014 	{
4015 	  /* In some cases we can perform the first callee register save
4016 	     and allocating the stack frame at the same time.   If so, just
4017 	     make a note of it and defer allocating the frame until saving
4018 	     the callee registers.  */
4019 	  if (VAL_14_BITS_P (actual_fsize) && local_fsize == 0)
4020 	    merge_sp_adjust_with_store = 1;
4021 	  /* Can not optimize.  Adjust the stack frame by actual_fsize
4022 	     bytes.  */
4023 	  else
4024 	    set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
4025 			    actual_fsize, 1);
4026 	}
4027     }
4028 
4029   /* Normal register save.
4030 
4031      Do not save the frame pointer in the frame_pointer_needed case.  It
4032      was done earlier.  */
4033   if (frame_pointer_needed)
4034     {
4035       offset = local_fsize;
4036 
4037       /* Saving the EH return data registers in the frame is the simplest
4038 	 way to get the frame unwind information emitted.  We put them
4039 	 just before the general registers.  */
4040       if (DO_FRAME_NOTES && crtl->calls_eh_return)
4041 	{
4042 	  unsigned int i, regno;
4043 
4044 	  for (i = 0; ; ++i)
4045 	    {
4046 	      regno = EH_RETURN_DATA_REGNO (i);
4047 	      if (regno == INVALID_REGNUM)
4048 		break;
4049 
4050 	      store_reg (regno, offset, HARD_FRAME_POINTER_REGNUM);
4051 	      offset += UNITS_PER_WORD;
4052 	    }
4053 	}
4054 
4055       for (i = 18; i >= 4; i--)
4056 	if (df_regs_ever_live_p (i) && ! call_used_regs[i])
4057 	  {
4058 	    store_reg (i, offset, HARD_FRAME_POINTER_REGNUM);
4059 	    offset += UNITS_PER_WORD;
4060 	    gr_saved++;
4061 	  }
4062       /* Account for %r3 which is saved in a special place.  */
4063       gr_saved++;
4064     }
4065   /* No frame pointer needed.  */
4066   else
4067     {
4068       offset = local_fsize - actual_fsize;
4069 
4070       /* Saving the EH return data registers in the frame is the simplest
4071          way to get the frame unwind information emitted.  */
4072       if (DO_FRAME_NOTES && crtl->calls_eh_return)
4073 	{
4074 	  unsigned int i, regno;
4075 
4076 	  for (i = 0; ; ++i)
4077 	    {
4078 	      regno = EH_RETURN_DATA_REGNO (i);
4079 	      if (regno == INVALID_REGNUM)
4080 		break;
4081 
4082 	      /* If merge_sp_adjust_with_store is nonzero, then we can
4083 		 optimize the first save.  */
4084 	      if (merge_sp_adjust_with_store)
4085 		{
4086 		  store_reg_modify (STACK_POINTER_REGNUM, regno, -offset);
4087 		  merge_sp_adjust_with_store = 0;
4088 		}
4089 	      else
4090 		store_reg (regno, offset, STACK_POINTER_REGNUM);
4091 	      offset += UNITS_PER_WORD;
4092 	    }
4093 	}
4094 
4095       for (i = 18; i >= 3; i--)
4096       	if (df_regs_ever_live_p (i) && ! call_used_regs[i])
4097 	  {
4098 	    /* If merge_sp_adjust_with_store is nonzero, then we can
4099 	       optimize the first GR save.  */
4100 	    if (merge_sp_adjust_with_store)
4101 	      {
4102 		store_reg_modify (STACK_POINTER_REGNUM, i, -offset);
4103 		merge_sp_adjust_with_store = 0;
4104 	      }
4105 	    else
4106 	      store_reg (i, offset, STACK_POINTER_REGNUM);
4107 	    offset += UNITS_PER_WORD;
4108 	    gr_saved++;
4109 	  }
4110 
4111       /* If we wanted to merge the SP adjustment with a GR save, but we never
4112 	 did any GR saves, then just emit the adjustment here.  */
4113       if (merge_sp_adjust_with_store)
4114 	set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
4115 			actual_fsize, 1);
4116     }
4117 
4118   /* The hppa calling conventions say that %r19, the pic offset
4119      register, is saved at sp - 32 (in this function's frame)
4120      when generating PIC code.  FIXME:  What is the correct thing
4121      to do for functions which make no calls and allocate no
4122      frame?  Do we need to allocate a frame, or can we just omit
4123      the save?   For now we'll just omit the save.
4124 
4125      We don't want a note on this insn as the frame marker can
4126      move if there is a dynamic stack allocation.  */
4127   if (flag_pic && actual_fsize != 0 && !TARGET_64BIT)
4128     {
4129       rtx addr = gen_rtx_PLUS (word_mode, stack_pointer_rtx, GEN_INT (-32));
4130 
4131       emit_move_insn (gen_rtx_MEM (word_mode, addr), pic_offset_table_rtx);
4132 
4133     }
4134 
4135   /* Align pointer properly (doubleword boundary).  */
4136   offset = (offset + 7) & ~7;
4137 
4138   /* Floating point register store.  */
4139   if (save_fregs)
4140     {
4141       rtx base;
4142 
4143       /* First get the frame or stack pointer to the start of the FP register
4144 	 save area.  */
4145       if (frame_pointer_needed)
4146 	{
4147 	  set_reg_plus_d (1, HARD_FRAME_POINTER_REGNUM, offset, 0);
4148 	  base = hard_frame_pointer_rtx;
4149 	}
4150       else
4151 	{
4152 	  set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0);
4153 	  base = stack_pointer_rtx;
4154 	}
4155 
4156       /* Now actually save the FP registers.  */
4157       for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
4158 	{
4159 	  if (df_regs_ever_live_p (i)
4160 	      || (! TARGET_64BIT && df_regs_ever_live_p (i + 1)))
4161 	    {
4162 	      rtx addr, reg;
4163 	      rtx_insn *insn;
4164 	      addr = gen_rtx_MEM (DFmode,
4165 				  gen_rtx_POST_INC (word_mode, tmpreg));
4166 	      reg = gen_rtx_REG (DFmode, i);
4167 	      insn = emit_move_insn (addr, reg);
4168 	      if (DO_FRAME_NOTES)
4169 		{
4170 		  RTX_FRAME_RELATED_P (insn) = 1;
4171 		  if (TARGET_64BIT)
4172 		    {
4173 		      rtx mem = gen_rtx_MEM (DFmode,
4174 					     plus_constant (Pmode, base,
4175 							    offset));
4176 		      add_reg_note (insn, REG_FRAME_RELATED_EXPR,
4177 				    gen_rtx_SET (mem, reg));
4178 		    }
4179 		  else
4180 		    {
4181 		      rtx meml = gen_rtx_MEM (SFmode,
4182 					      plus_constant (Pmode, base,
4183 							     offset));
4184 		      rtx memr = gen_rtx_MEM (SFmode,
4185 					      plus_constant (Pmode, base,
4186 							     offset + 4));
4187 		      rtx regl = gen_rtx_REG (SFmode, i);
4188 		      rtx regr = gen_rtx_REG (SFmode, i + 1);
4189 		      rtx setl = gen_rtx_SET (meml, regl);
4190 		      rtx setr = gen_rtx_SET (memr, regr);
4191 		      rtvec vec;
4192 
4193 		      RTX_FRAME_RELATED_P (setl) = 1;
4194 		      RTX_FRAME_RELATED_P (setr) = 1;
4195 		      vec = gen_rtvec (2, setl, setr);
4196 		      add_reg_note (insn, REG_FRAME_RELATED_EXPR,
4197 				    gen_rtx_SEQUENCE (VOIDmode, vec));
4198 		    }
4199 		}
4200 	      offset += GET_MODE_SIZE (DFmode);
4201 	      fr_saved++;
4202 	    }
4203 	}
4204     }
4205 }
4206 
4207 /* Emit RTL to load REG from the memory location specified by BASE+DISP.
4208    Handle case where DISP > 8k by using the add_high_const patterns.  */
4209 
4210 static void
load_reg(int reg,HOST_WIDE_INT disp,int base)4211 load_reg (int reg, HOST_WIDE_INT disp, int base)
4212 {
4213   rtx dest = gen_rtx_REG (word_mode, reg);
4214   rtx basereg = gen_rtx_REG (Pmode, base);
4215   rtx src;
4216 
4217   if (VAL_14_BITS_P (disp))
4218     src = gen_rtx_MEM (word_mode, plus_constant (Pmode, basereg, disp));
4219   else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
4220     {
4221       rtx delta = GEN_INT (disp);
4222       rtx tmpreg = gen_rtx_REG (Pmode, 1);
4223 
4224       emit_move_insn (tmpreg, delta);
4225       if (TARGET_DISABLE_INDEXING)
4226 	{
4227 	  emit_move_insn (tmpreg, gen_rtx_PLUS (Pmode, tmpreg, basereg));
4228 	  src = gen_rtx_MEM (word_mode, tmpreg);
4229 	}
4230       else
4231 	src = gen_rtx_MEM (word_mode, gen_rtx_PLUS (Pmode, tmpreg, basereg));
4232     }
4233   else
4234     {
4235       rtx delta = GEN_INT (disp);
4236       rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta));
4237       rtx tmpreg = gen_rtx_REG (Pmode, 1);
4238 
4239       emit_move_insn (tmpreg, high);
4240       src = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta));
4241     }
4242 
4243   emit_move_insn (dest, src);
4244 }
4245 
4246 /* Update the total code bytes output to the text section.  */
4247 
4248 static void
update_total_code_bytes(unsigned int nbytes)4249 update_total_code_bytes (unsigned int nbytes)
4250 {
4251   if ((TARGET_PORTABLE_RUNTIME || !TARGET_GAS || !TARGET_SOM)
4252       && !IN_NAMED_SECTION_P (cfun->decl))
4253     {
4254       unsigned int old_total = total_code_bytes;
4255 
4256       total_code_bytes += nbytes;
4257 
4258       /* Be prepared to handle overflows.  */
4259       if (old_total > total_code_bytes)
4260         total_code_bytes = UINT_MAX;
4261     }
4262 }
4263 
4264 /* This function generates the assembly code for function exit.
4265    Args are as for output_function_prologue ().
4266 
4267    The function epilogue should not depend on the current stack
4268    pointer!  It should use the frame pointer only.  This is mandatory
4269    because of alloca; we also take advantage of it to omit stack
4270    adjustments before returning.  */
4271 
4272 static void
pa_output_function_epilogue(FILE * file)4273 pa_output_function_epilogue (FILE *file)
4274 {
4275   rtx_insn *insn = get_last_insn ();
4276   bool extra_nop;
4277 
4278   /* pa_expand_epilogue does the dirty work now.  We just need
4279      to output the assembler directives which denote the end
4280      of a function.
4281 
4282      To make debuggers happy, emit a nop if the epilogue was completely
4283      eliminated due to a volatile call as the last insn in the
4284      current function.  That way the return address (in %r2) will
4285      always point to a valid instruction in the current function.  */
4286 
4287   /* Get the last real insn.  */
4288   if (NOTE_P (insn))
4289     insn = prev_real_insn (insn);
4290 
4291   /* If it is a sequence, then look inside.  */
4292   if (insn && NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == SEQUENCE)
4293     insn = as_a <rtx_sequence *> (PATTERN (insn))-> insn (0);
4294 
4295   /* If insn is a CALL_INSN, then it must be a call to a volatile
4296      function (otherwise there would be epilogue insns).  */
4297   if (insn && CALL_P (insn))
4298     {
4299       fputs ("\tnop\n", file);
4300       extra_nop = true;
4301     }
4302   else
4303     extra_nop = false;
4304 
4305   fputs ("\t.EXIT\n\t.PROCEND\n", file);
4306 
4307   if (TARGET_SOM && TARGET_GAS)
4308     {
4309       /* We are done with this subspace except possibly for some additional
4310 	 debug information.  Forget that we are in this subspace to ensure
4311 	 that the next function is output in its own subspace.  */
4312       in_section = NULL;
4313       cfun->machine->in_nsubspa = 2;
4314     }
4315 
4316   /* Thunks do their own insn accounting.  */
4317   if (cfun->is_thunk)
4318     return;
4319 
4320   if (INSN_ADDRESSES_SET_P ())
4321     {
4322       last_address = extra_nop ? 4 : 0;
4323       insn = get_last_nonnote_insn ();
4324       if (insn)
4325 	{
4326 	  last_address += INSN_ADDRESSES (INSN_UID (insn));
4327 	  if (INSN_P (insn))
4328 	    last_address += insn_default_length (insn);
4329 	}
4330       last_address = ((last_address + FUNCTION_BOUNDARY / BITS_PER_UNIT - 1)
4331 		      & ~(FUNCTION_BOUNDARY / BITS_PER_UNIT - 1));
4332     }
4333   else
4334     last_address = UINT_MAX;
4335 
4336   /* Finally, update the total number of code bytes output so far.  */
4337   update_total_code_bytes (last_address);
4338 }
4339 
4340 void
pa_expand_epilogue(void)4341 pa_expand_epilogue (void)
4342 {
4343   rtx tmpreg;
4344   HOST_WIDE_INT offset;
4345   HOST_WIDE_INT ret_off = 0;
4346   int i;
4347   int merge_sp_adjust_with_load = 0;
4348 
4349   /* We will use this often.  */
4350   tmpreg = gen_rtx_REG (word_mode, 1);
4351 
4352   /* Try to restore RP early to avoid load/use interlocks when
4353      RP gets used in the return (bv) instruction.  This appears to still
4354      be necessary even when we schedule the prologue and epilogue.  */
4355   if (rp_saved)
4356     {
4357       ret_off = TARGET_64BIT ? -16 : -20;
4358       if (frame_pointer_needed)
4359 	{
4360 	  load_reg (2, ret_off, HARD_FRAME_POINTER_REGNUM);
4361 	  ret_off = 0;
4362 	}
4363       else
4364 	{
4365 	  /* No frame pointer, and stack is smaller than 8k.  */
4366 	  if (VAL_14_BITS_P (ret_off - actual_fsize))
4367 	    {
4368 	      load_reg (2, ret_off - actual_fsize, STACK_POINTER_REGNUM);
4369 	      ret_off = 0;
4370 	    }
4371 	}
4372     }
4373 
4374   /* General register restores.  */
4375   if (frame_pointer_needed)
4376     {
4377       offset = local_fsize;
4378 
4379       /* If the current function calls __builtin_eh_return, then we need
4380          to restore the saved EH data registers.  */
4381       if (DO_FRAME_NOTES && crtl->calls_eh_return)
4382 	{
4383 	  unsigned int i, regno;
4384 
4385 	  for (i = 0; ; ++i)
4386 	    {
4387 	      regno = EH_RETURN_DATA_REGNO (i);
4388 	      if (regno == INVALID_REGNUM)
4389 		break;
4390 
4391 	      load_reg (regno, offset, HARD_FRAME_POINTER_REGNUM);
4392 	      offset += UNITS_PER_WORD;
4393 	    }
4394 	}
4395 
4396       for (i = 18; i >= 4; i--)
4397 	if (df_regs_ever_live_p (i) && ! call_used_regs[i])
4398 	  {
4399 	    load_reg (i, offset, HARD_FRAME_POINTER_REGNUM);
4400 	    offset += UNITS_PER_WORD;
4401 	  }
4402     }
4403   else
4404     {
4405       offset = local_fsize - actual_fsize;
4406 
4407       /* If the current function calls __builtin_eh_return, then we need
4408          to restore the saved EH data registers.  */
4409       if (DO_FRAME_NOTES && crtl->calls_eh_return)
4410 	{
4411 	  unsigned int i, regno;
4412 
4413 	  for (i = 0; ; ++i)
4414 	    {
4415 	      regno = EH_RETURN_DATA_REGNO (i);
4416 	      if (regno == INVALID_REGNUM)
4417 		break;
4418 
4419 	      /* Only for the first load.
4420 	         merge_sp_adjust_with_load holds the register load
4421 	         with which we will merge the sp adjustment.  */
4422 	      if (merge_sp_adjust_with_load == 0
4423 		  && local_fsize == 0
4424 		  && VAL_14_BITS_P (-actual_fsize))
4425 	        merge_sp_adjust_with_load = regno;
4426 	      else
4427 		load_reg (regno, offset, STACK_POINTER_REGNUM);
4428 	      offset += UNITS_PER_WORD;
4429 	    }
4430 	}
4431 
4432       for (i = 18; i >= 3; i--)
4433 	{
4434 	  if (df_regs_ever_live_p (i) && ! call_used_regs[i])
4435 	    {
4436 	      /* Only for the first load.
4437 	         merge_sp_adjust_with_load holds the register load
4438 	         with which we will merge the sp adjustment.  */
4439 	      if (merge_sp_adjust_with_load == 0
4440 		  && local_fsize == 0
4441 		  && VAL_14_BITS_P (-actual_fsize))
4442 	        merge_sp_adjust_with_load = i;
4443 	      else
4444 		load_reg (i, offset, STACK_POINTER_REGNUM);
4445 	      offset += UNITS_PER_WORD;
4446 	    }
4447 	}
4448     }
4449 
4450   /* Align pointer properly (doubleword boundary).  */
4451   offset = (offset + 7) & ~7;
4452 
4453   /* FP register restores.  */
4454   if (save_fregs)
4455     {
4456       /* Adjust the register to index off of.  */
4457       if (frame_pointer_needed)
4458 	set_reg_plus_d (1, HARD_FRAME_POINTER_REGNUM, offset, 0);
4459       else
4460 	set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0);
4461 
4462       /* Actually do the restores now.  */
4463       for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
4464 	if (df_regs_ever_live_p (i)
4465 	    || (! TARGET_64BIT && df_regs_ever_live_p (i + 1)))
4466 	  {
4467 	    rtx src = gen_rtx_MEM (DFmode,
4468 				   gen_rtx_POST_INC (word_mode, tmpreg));
4469 	    rtx dest = gen_rtx_REG (DFmode, i);
4470 	    emit_move_insn (dest, src);
4471 	  }
4472     }
4473 
4474   /* Emit a blockage insn here to keep these insns from being moved to
4475      an earlier spot in the epilogue, or into the main instruction stream.
4476 
4477      This is necessary as we must not cut the stack back before all the
4478      restores are finished.  */
4479   emit_insn (gen_blockage ());
4480 
4481   /* Reset stack pointer (and possibly frame pointer).  The stack
4482      pointer is initially set to fp + 64 to avoid a race condition.  */
4483   if (frame_pointer_needed)
4484     {
4485       rtx delta = GEN_INT (-64);
4486 
4487       set_reg_plus_d (STACK_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM, 64, 0);
4488       emit_insn (gen_pre_load (hard_frame_pointer_rtx,
4489 			       stack_pointer_rtx, delta));
4490     }
4491   /* If we were deferring a callee register restore, do it now.  */
4492   else if (merge_sp_adjust_with_load)
4493     {
4494       rtx delta = GEN_INT (-actual_fsize);
4495       rtx dest = gen_rtx_REG (word_mode, merge_sp_adjust_with_load);
4496 
4497       emit_insn (gen_pre_load (dest, stack_pointer_rtx, delta));
4498     }
4499   else if (actual_fsize != 0)
4500     set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
4501 		    - actual_fsize, 0);
4502 
4503   /* If we haven't restored %r2 yet (no frame pointer, and a stack
4504      frame greater than 8k), do so now.  */
4505   if (ret_off != 0)
4506     load_reg (2, ret_off, STACK_POINTER_REGNUM);
4507 
4508   if (DO_FRAME_NOTES && crtl->calls_eh_return)
4509     {
4510       rtx sa = EH_RETURN_STACKADJ_RTX;
4511 
4512       emit_insn (gen_blockage ());
4513       emit_insn (TARGET_64BIT
4514 		 ? gen_subdi3 (stack_pointer_rtx, stack_pointer_rtx, sa)
4515 		 : gen_subsi3 (stack_pointer_rtx, stack_pointer_rtx, sa));
4516     }
4517 }
4518 
4519 bool
pa_can_use_return_insn(void)4520 pa_can_use_return_insn (void)
4521 {
4522   if (!reload_completed)
4523     return false;
4524 
4525   if (frame_pointer_needed)
4526     return false;
4527 
4528   if (df_regs_ever_live_p (2))
4529     return false;
4530 
4531   if (crtl->profile)
4532     return false;
4533 
4534   return pa_compute_frame_size (get_frame_size (), 0) == 0;
4535 }
4536 
4537 rtx
hppa_pic_save_rtx(void)4538 hppa_pic_save_rtx (void)
4539 {
4540   return get_hard_reg_initial_val (word_mode, PIC_OFFSET_TABLE_REGNUM);
4541 }
4542 
4543 #ifndef NO_DEFERRED_PROFILE_COUNTERS
4544 #define NO_DEFERRED_PROFILE_COUNTERS 0
4545 #endif
4546 
4547 
4548 /* Vector of funcdef numbers.  */
4549 static vec<int> funcdef_nos;
4550 
4551 /* Output deferred profile counters.  */
4552 static void
output_deferred_profile_counters(void)4553 output_deferred_profile_counters (void)
4554 {
4555   unsigned int i;
4556   int align, n;
4557 
4558   if (funcdef_nos.is_empty ())
4559    return;
4560 
4561   switch_to_section (data_section);
4562   align = MIN (BIGGEST_ALIGNMENT, LONG_TYPE_SIZE);
4563   ASM_OUTPUT_ALIGN (asm_out_file, floor_log2 (align / BITS_PER_UNIT));
4564 
4565   for (i = 0; funcdef_nos.iterate (i, &n); i++)
4566     {
4567       targetm.asm_out.internal_label (asm_out_file, "LP", n);
4568       assemble_integer (const0_rtx, LONG_TYPE_SIZE / BITS_PER_UNIT, align, 1);
4569     }
4570 
4571   funcdef_nos.release ();
4572 }
4573 
4574 void
hppa_profile_hook(int label_no)4575 hppa_profile_hook (int label_no)
4576 {
4577   rtx_code_label *label_rtx = gen_label_rtx ();
4578   int reg_parm_stack_space = REG_PARM_STACK_SPACE (NULL_TREE);
4579   rtx arg_bytes, begin_label_rtx, mcount, sym;
4580   rtx_insn *call_insn;
4581   char begin_label_name[16];
4582   bool use_mcount_pcrel_call;
4583 
4584   /* Set up call destination.  */
4585   sym = gen_rtx_SYMBOL_REF (Pmode, "_mcount");
4586   pa_encode_label (sym);
4587   mcount = gen_rtx_MEM (Pmode, sym);
4588 
4589   /* If we can reach _mcount with a pc-relative call, we can optimize
4590      loading the address of the current function.  This requires linker
4591      long branch stub support.  */
4592   if (!TARGET_PORTABLE_RUNTIME
4593       && !TARGET_LONG_CALLS
4594       && (TARGET_SOM || flag_function_sections))
4595     use_mcount_pcrel_call = TRUE;
4596   else
4597     use_mcount_pcrel_call = FALSE;
4598 
4599   ASM_GENERATE_INTERNAL_LABEL (begin_label_name, FUNC_BEGIN_PROLOG_LABEL,
4600 			       label_no);
4601   begin_label_rtx = gen_rtx_SYMBOL_REF (SImode, ggc_strdup (begin_label_name));
4602 
4603   emit_move_insn (gen_rtx_REG (word_mode, 26), gen_rtx_REG (word_mode, 2));
4604 
4605   if (!use_mcount_pcrel_call)
4606     {
4607       /* The address of the function is loaded into %r25 with an instruction-
4608 	 relative sequence that avoids the use of relocations.  We use SImode
4609 	 for the address of the function in both 32 and 64-bit code to avoid
4610 	 having to provide DImode versions of the lcla2 pattern.  */
4611       if (TARGET_PA_20)
4612 	emit_insn (gen_lcla2 (gen_rtx_REG (SImode, 25), label_rtx));
4613       else
4614 	emit_insn (gen_lcla1 (gen_rtx_REG (SImode, 25), label_rtx));
4615     }
4616 
4617   if (!NO_DEFERRED_PROFILE_COUNTERS)
4618     {
4619       rtx count_label_rtx, addr, r24;
4620       char count_label_name[16];
4621 
4622       funcdef_nos.safe_push (label_no);
4623       ASM_GENERATE_INTERNAL_LABEL (count_label_name, "LP", label_no);
4624       count_label_rtx = gen_rtx_SYMBOL_REF (Pmode,
4625 					    ggc_strdup (count_label_name));
4626 
4627       addr = force_reg (Pmode, count_label_rtx);
4628       r24 = gen_rtx_REG (Pmode, 24);
4629       emit_move_insn (r24, addr);
4630 
4631       arg_bytes = GEN_INT (TARGET_64BIT ? 24 : 12);
4632       if (use_mcount_pcrel_call)
4633 	call_insn = emit_call_insn (gen_call_mcount (mcount, arg_bytes,
4634 						     begin_label_rtx));
4635       else
4636 	call_insn = emit_call_insn (gen_call (mcount, arg_bytes));
4637 
4638       use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), r24);
4639     }
4640   else
4641     {
4642       arg_bytes = GEN_INT (TARGET_64BIT ? 16 : 8);
4643       if (use_mcount_pcrel_call)
4644 	call_insn = emit_call_insn (gen_call_mcount (mcount, arg_bytes,
4645 						     begin_label_rtx));
4646       else
4647 	call_insn = emit_call_insn (gen_call (mcount, arg_bytes));
4648     }
4649 
4650   use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), gen_rtx_REG (SImode, 25));
4651   use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), gen_rtx_REG (SImode, 26));
4652 
4653   /* Indicate the _mcount call cannot throw, nor will it execute a
4654      non-local goto.  */
4655   make_reg_eh_region_note_nothrow_nononlocal (call_insn);
4656 
4657   /* Allocate space for fixed arguments.  */
4658   if (reg_parm_stack_space > crtl->outgoing_args_size)
4659     crtl->outgoing_args_size = reg_parm_stack_space;
4660 }
4661 
4662 /* Fetch the return address for the frame COUNT steps up from
4663    the current frame, after the prologue.  FRAMEADDR is the
4664    frame pointer of the COUNT frame.
4665 
4666    We want to ignore any export stub remnants here.  To handle this,
4667    we examine the code at the return address, and if it is an export
4668    stub, we return a memory rtx for the stub return address stored
4669    at frame-24.
4670 
4671    The value returned is used in two different ways:
4672 
4673 	1. To find a function's caller.
4674 
4675 	2. To change the return address for a function.
4676 
4677    This function handles most instances of case 1; however, it will
4678    fail if there are two levels of stubs to execute on the return
4679    path.  The only way I believe that can happen is if the return value
4680    needs a parameter relocation, which never happens for C code.
4681 
4682    This function handles most instances of case 2; however, it will
4683    fail if we did not originally have stub code on the return path
4684    but will need stub code on the new return path.  This can happen if
4685    the caller & callee are both in the main program, but the new
4686    return location is in a shared library.  */
4687 
4688 rtx
pa_return_addr_rtx(int count,rtx frameaddr)4689 pa_return_addr_rtx (int count, rtx frameaddr)
4690 {
4691   rtx label;
4692   rtx rp;
4693   rtx saved_rp;
4694   rtx ins;
4695 
4696   /* The instruction stream at the return address of a PA1.X export stub is:
4697 
4698 	0x4bc23fd1 | stub+8:   ldw -18(sr0,sp),rp
4699 	0x004010a1 | stub+12:  ldsid (sr0,rp),r1
4700 	0x00011820 | stub+16:  mtsp r1,sr0
4701 	0xe0400002 | stub+20:  be,n 0(sr0,rp)
4702 
4703      0xe0400002 must be specified as -532676606 so that it won't be
4704      rejected as an invalid immediate operand on 64-bit hosts.
4705 
4706      The instruction stream at the return address of a PA2.0 export stub is:
4707 
4708 	0x4bc23fd1 | stub+8:   ldw -18(sr0,sp),rp
4709 	0xe840d002 | stub+12:  bve,n (rp)
4710   */
4711 
4712   HOST_WIDE_INT insns[4];
4713   int i, len;
4714 
4715   if (count != 0)
4716     return NULL_RTX;
4717 
4718   rp = get_hard_reg_initial_val (Pmode, 2);
4719 
4720   if (TARGET_64BIT || TARGET_NO_SPACE_REGS)
4721     return rp;
4722 
4723   /* If there is no export stub then just use the value saved from
4724      the return pointer register.  */
4725 
4726   saved_rp = gen_reg_rtx (Pmode);
4727   emit_move_insn (saved_rp, rp);
4728 
4729   /* Get pointer to the instruction stream.  We have to mask out the
4730      privilege level from the two low order bits of the return address
4731      pointer here so that ins will point to the start of the first
4732      instruction that would have been executed if we returned.  */
4733   ins = copy_to_reg (gen_rtx_AND (Pmode, rp, MASK_RETURN_ADDR));
4734   label = gen_label_rtx ();
4735 
4736   if (TARGET_PA_20)
4737     {
4738       insns[0] = 0x4bc23fd1;
4739       insns[1] = -398405630;
4740       len = 2;
4741     }
4742   else
4743     {
4744       insns[0] = 0x4bc23fd1;
4745       insns[1] = 0x004010a1;
4746       insns[2] = 0x00011820;
4747       insns[3] = -532676606;
4748       len = 4;
4749     }
4750 
4751   /* Check the instruction stream at the normal return address for the
4752      export stub.  If it is an export stub, than our return address is
4753      really in -24[frameaddr].  */
4754 
4755   for (i = 0; i < len; i++)
4756     {
4757       rtx op0 = gen_rtx_MEM (SImode, plus_constant (Pmode, ins, i * 4));
4758       rtx op1 = GEN_INT (insns[i]);
4759       emit_cmp_and_jump_insns (op0, op1, NE, NULL, SImode, 0, label);
4760     }
4761 
4762   /* Here we know that our return address points to an export
4763      stub.  We don't want to return the address of the export stub,
4764      but rather the return address of the export stub.  That return
4765      address is stored at -24[frameaddr].  */
4766 
4767   emit_move_insn (saved_rp,
4768 		  gen_rtx_MEM (Pmode,
4769 			       memory_address (Pmode,
4770 					       plus_constant (Pmode, frameaddr,
4771 							      -24))));
4772 
4773   emit_label (label);
4774 
4775   return saved_rp;
4776 }
4777 
4778 void
pa_emit_bcond_fp(rtx operands[])4779 pa_emit_bcond_fp (rtx operands[])
4780 {
4781   enum rtx_code code = GET_CODE (operands[0]);
4782   rtx operand0 = operands[1];
4783   rtx operand1 = operands[2];
4784   rtx label = operands[3];
4785 
4786   emit_insn (gen_rtx_SET (gen_rtx_REG (CCFPmode, 0),
4787 		          gen_rtx_fmt_ee (code, CCFPmode, operand0, operand1)));
4788 
4789   emit_jump_insn (gen_rtx_SET (pc_rtx,
4790 			       gen_rtx_IF_THEN_ELSE (VOIDmode,
4791 						     gen_rtx_fmt_ee (NE,
4792 							      VOIDmode,
4793 							      gen_rtx_REG (CCFPmode, 0),
4794 							      const0_rtx),
4795 						     gen_rtx_LABEL_REF (VOIDmode, label),
4796 						     pc_rtx)));
4797 
4798 }
4799 
4800 /* Adjust the cost of a scheduling dependency.  Return the new cost of
4801    a dependency LINK or INSN on DEP_INSN.  COST is the current cost.  */
4802 
4803 static int
pa_adjust_cost(rtx_insn * insn,int dep_type,rtx_insn * dep_insn,int cost,unsigned int)4804 pa_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost,
4805 		unsigned int)
4806 {
4807   enum attr_type attr_type;
4808 
4809   /* Don't adjust costs for a pa8000 chip, also do not adjust any
4810      true dependencies as they are described with bypasses now.  */
4811   if (pa_cpu >= PROCESSOR_8000 || dep_type == 0)
4812     return cost;
4813 
4814   if (! recog_memoized (insn))
4815     return 0;
4816 
4817   attr_type = get_attr_type (insn);
4818 
4819   switch (dep_type)
4820     {
4821     case REG_DEP_ANTI:
4822       /* Anti dependency; DEP_INSN reads a register that INSN writes some
4823 	 cycles later.  */
4824 
4825       if (attr_type == TYPE_FPLOAD)
4826 	{
4827 	  rtx pat = PATTERN (insn);
4828 	  rtx dep_pat = PATTERN (dep_insn);
4829 	  if (GET_CODE (pat) == PARALLEL)
4830 	    {
4831 	      /* This happens for the fldXs,mb patterns.  */
4832 	      pat = XVECEXP (pat, 0, 0);
4833 	    }
4834 	  if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4835 	    /* If this happens, we have to extend this to schedule
4836 	       optimally.  Return 0 for now.  */
4837 	  return 0;
4838 
4839 	  if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
4840 	    {
4841 	      if (! recog_memoized (dep_insn))
4842 		return 0;
4843 	      switch (get_attr_type (dep_insn))
4844 		{
4845 		case TYPE_FPALU:
4846 		case TYPE_FPMULSGL:
4847 		case TYPE_FPMULDBL:
4848 		case TYPE_FPDIVSGL:
4849 		case TYPE_FPDIVDBL:
4850 		case TYPE_FPSQRTSGL:
4851 		case TYPE_FPSQRTDBL:
4852 		  /* A fpload can't be issued until one cycle before a
4853 		     preceding arithmetic operation has finished if
4854 		     the target of the fpload is any of the sources
4855 		     (or destination) of the arithmetic operation.  */
4856 		  return insn_default_latency (dep_insn) - 1;
4857 
4858 		default:
4859 		  return 0;
4860 		}
4861 	    }
4862 	}
4863       else if (attr_type == TYPE_FPALU)
4864 	{
4865 	  rtx pat = PATTERN (insn);
4866 	  rtx dep_pat = PATTERN (dep_insn);
4867 	  if (GET_CODE (pat) == PARALLEL)
4868 	    {
4869 	      /* This happens for the fldXs,mb patterns.  */
4870 	      pat = XVECEXP (pat, 0, 0);
4871 	    }
4872 	  if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4873 	    /* If this happens, we have to extend this to schedule
4874 	       optimally.  Return 0 for now.  */
4875 	  return 0;
4876 
4877 	  if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
4878 	    {
4879 	      if (! recog_memoized (dep_insn))
4880 		return 0;
4881 	      switch (get_attr_type (dep_insn))
4882 		{
4883 		case TYPE_FPDIVSGL:
4884 		case TYPE_FPDIVDBL:
4885 		case TYPE_FPSQRTSGL:
4886 		case TYPE_FPSQRTDBL:
4887 		  /* An ALU flop can't be issued until two cycles before a
4888 		     preceding divide or sqrt operation has finished if
4889 		     the target of the ALU flop is any of the sources
4890 		     (or destination) of the divide or sqrt operation.  */
4891 		  return insn_default_latency (dep_insn) - 2;
4892 
4893 		default:
4894 		  return 0;
4895 		}
4896 	    }
4897 	}
4898 
4899       /* For other anti dependencies, the cost is 0.  */
4900       return 0;
4901 
4902     case REG_DEP_OUTPUT:
4903       /* Output dependency; DEP_INSN writes a register that INSN writes some
4904 	 cycles later.  */
4905       if (attr_type == TYPE_FPLOAD)
4906 	{
4907 	  rtx pat = PATTERN (insn);
4908 	  rtx dep_pat = PATTERN (dep_insn);
4909 	  if (GET_CODE (pat) == PARALLEL)
4910 	    {
4911 	      /* This happens for the fldXs,mb patterns.  */
4912 	      pat = XVECEXP (pat, 0, 0);
4913 	    }
4914 	  if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4915 	    /* If this happens, we have to extend this to schedule
4916 	       optimally.  Return 0 for now.  */
4917 	  return 0;
4918 
4919 	  if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
4920 	    {
4921 	      if (! recog_memoized (dep_insn))
4922 		return 0;
4923 	      switch (get_attr_type (dep_insn))
4924 		{
4925 		case TYPE_FPALU:
4926 		case TYPE_FPMULSGL:
4927 		case TYPE_FPMULDBL:
4928 		case TYPE_FPDIVSGL:
4929 		case TYPE_FPDIVDBL:
4930 		case TYPE_FPSQRTSGL:
4931 		case TYPE_FPSQRTDBL:
4932 		  /* A fpload can't be issued until one cycle before a
4933 		     preceding arithmetic operation has finished if
4934 		     the target of the fpload is the destination of the
4935 		     arithmetic operation.
4936 
4937 		     Exception: For PA7100LC, PA7200 and PA7300, the cost
4938 		     is 3 cycles, unless they bundle together.   We also
4939 		     pay the penalty if the second insn is a fpload.  */
4940 		  return insn_default_latency (dep_insn) - 1;
4941 
4942 		default:
4943 		  return 0;
4944 		}
4945 	    }
4946 	}
4947       else if (attr_type == TYPE_FPALU)
4948 	{
4949 	  rtx pat = PATTERN (insn);
4950 	  rtx dep_pat = PATTERN (dep_insn);
4951 	  if (GET_CODE (pat) == PARALLEL)
4952 	    {
4953 	      /* This happens for the fldXs,mb patterns.  */
4954 	      pat = XVECEXP (pat, 0, 0);
4955 	    }
4956 	  if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4957 	    /* If this happens, we have to extend this to schedule
4958 	       optimally.  Return 0 for now.  */
4959 	  return 0;
4960 
4961 	  if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
4962 	    {
4963 	      if (! recog_memoized (dep_insn))
4964 		return 0;
4965 	      switch (get_attr_type (dep_insn))
4966 		{
4967 		case TYPE_FPDIVSGL:
4968 		case TYPE_FPDIVDBL:
4969 		case TYPE_FPSQRTSGL:
4970 		case TYPE_FPSQRTDBL:
4971 		  /* An ALU flop can't be issued until two cycles before a
4972 		     preceding divide or sqrt operation has finished if
4973 		     the target of the ALU flop is also the target of
4974 		     the divide or sqrt operation.  */
4975 		  return insn_default_latency (dep_insn) - 2;
4976 
4977 		default:
4978 		  return 0;
4979 		}
4980 	    }
4981 	}
4982 
4983       /* For other output dependencies, the cost is 0.  */
4984       return 0;
4985 
4986     default:
4987       gcc_unreachable ();
4988     }
4989 }
4990 
4991 /* The 700 can only issue a single insn at a time.
4992    The 7XXX processors can issue two insns at a time.
4993    The 8000 can issue 4 insns at a time.  */
4994 static int
pa_issue_rate(void)4995 pa_issue_rate (void)
4996 {
4997   switch (pa_cpu)
4998     {
4999     case PROCESSOR_700:		return 1;
5000     case PROCESSOR_7100:	return 2;
5001     case PROCESSOR_7100LC:	return 2;
5002     case PROCESSOR_7200:	return 2;
5003     case PROCESSOR_7300:	return 2;
5004     case PROCESSOR_8000:	return 4;
5005 
5006     default:
5007       gcc_unreachable ();
5008     }
5009 }
5010 
5011 
5012 
5013 /* Return any length plus adjustment needed by INSN which already has
5014    its length computed as LENGTH.   Return LENGTH if no adjustment is
5015    necessary.
5016 
5017    Also compute the length of an inline block move here as it is too
5018    complicated to express as a length attribute in pa.md.  */
5019 int
pa_adjust_insn_length(rtx_insn * insn,int length)5020 pa_adjust_insn_length (rtx_insn *insn, int length)
5021 {
5022   rtx pat = PATTERN (insn);
5023 
5024   /* If length is negative or undefined, provide initial length.  */
5025   if ((unsigned int) length >= INT_MAX)
5026     {
5027       if (GET_CODE (pat) == SEQUENCE)
5028 	insn = as_a <rtx_insn *> (XVECEXP (pat, 0, 0));
5029 
5030       switch (get_attr_type (insn))
5031 	{
5032 	case TYPE_MILLI:
5033 	  length = pa_attr_length_millicode_call (insn);
5034 	  break;
5035 	case TYPE_CALL:
5036 	  length = pa_attr_length_call (insn, 0);
5037 	  break;
5038 	case TYPE_SIBCALL:
5039 	  length = pa_attr_length_call (insn, 1);
5040 	  break;
5041 	case TYPE_DYNCALL:
5042 	  length = pa_attr_length_indirect_call (insn);
5043 	  break;
5044 	case TYPE_SH_FUNC_ADRS:
5045 	  length = pa_attr_length_millicode_call (insn) + 20;
5046 	  break;
5047 	default:
5048 	  gcc_unreachable ();
5049 	}
5050     }
5051 
5052   /* Block move pattern.  */
5053   if (NONJUMP_INSN_P (insn)
5054       && GET_CODE (pat) == PARALLEL
5055       && GET_CODE (XVECEXP (pat, 0, 0)) == SET
5056       && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM
5057       && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 1)) == MEM
5058       && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode
5059       && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 1)) == BLKmode)
5060     length += compute_movmem_length (insn) - 4;
5061   /* Block clear pattern.  */
5062   else if (NONJUMP_INSN_P (insn)
5063 	   && GET_CODE (pat) == PARALLEL
5064 	   && GET_CODE (XVECEXP (pat, 0, 0)) == SET
5065 	   && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM
5066 	   && XEXP (XVECEXP (pat, 0, 0), 1) == const0_rtx
5067 	   && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode)
5068     length += compute_clrmem_length (insn) - 4;
5069   /* Conditional branch with an unfilled delay slot.  */
5070   else if (JUMP_P (insn) && ! simplejump_p (insn))
5071     {
5072       /* Adjust a short backwards conditional with an unfilled delay slot.  */
5073       if (GET_CODE (pat) == SET
5074 	  && length == 4
5075 	  && JUMP_LABEL (insn) != NULL_RTX
5076 	  && ! forward_branch_p (insn))
5077 	length += 4;
5078       else if (GET_CODE (pat) == PARALLEL
5079 	       && get_attr_type (insn) == TYPE_PARALLEL_BRANCH
5080 	       && length == 4)
5081 	length += 4;
5082       /* Adjust dbra insn with short backwards conditional branch with
5083 	 unfilled delay slot -- only for case where counter is in a
5084 	 general register register.  */
5085       else if (GET_CODE (pat) == PARALLEL
5086 	       && GET_CODE (XVECEXP (pat, 0, 1)) == SET
5087 	       && GET_CODE (XEXP (XVECEXP (pat, 0, 1), 0)) == REG
5088  	       && ! FP_REG_P (XEXP (XVECEXP (pat, 0, 1), 0))
5089 	       && length == 4
5090 	       && ! forward_branch_p (insn))
5091 	length += 4;
5092     }
5093   return length;
5094 }
5095 
5096 /* Implement the TARGET_PRINT_OPERAND_PUNCT_VALID_P hook.  */
5097 
5098 static bool
pa_print_operand_punct_valid_p(unsigned char code)5099 pa_print_operand_punct_valid_p (unsigned char code)
5100 {
5101   if (code == '@'
5102       || code == '#'
5103       || code == '*'
5104       || code == '^')
5105     return true;
5106 
5107   return false;
5108 }
5109 
5110 /* Print operand X (an rtx) in assembler syntax to file FILE.
5111    CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
5112    For `%' followed by punctuation, CODE is the punctuation and X is null.  */
5113 
5114 void
pa_print_operand(FILE * file,rtx x,int code)5115 pa_print_operand (FILE *file, rtx x, int code)
5116 {
5117   switch (code)
5118     {
5119     case '#':
5120       /* Output a 'nop' if there's nothing for the delay slot.  */
5121       if (dbr_sequence_length () == 0)
5122 	fputs ("\n\tnop", file);
5123       return;
5124     case '*':
5125       /* Output a nullification completer if there's nothing for the */
5126       /* delay slot or nullification is requested.  */
5127       if (dbr_sequence_length () == 0 ||
5128 	  (final_sequence &&
5129 	   INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))))
5130         fputs (",n", file);
5131       return;
5132     case 'R':
5133       /* Print out the second register name of a register pair.
5134 	 I.e., R (6) => 7.  */
5135       fputs (reg_names[REGNO (x) + 1], file);
5136       return;
5137     case 'r':
5138       /* A register or zero.  */
5139       if (x == const0_rtx
5140 	  || (x == CONST0_RTX (DFmode))
5141 	  || (x == CONST0_RTX (SFmode)))
5142 	{
5143 	  fputs ("%r0", file);
5144 	  return;
5145 	}
5146       else
5147 	break;
5148     case 'f':
5149       /* A register or zero (floating point).  */
5150       if (x == const0_rtx
5151 	  || (x == CONST0_RTX (DFmode))
5152 	  || (x == CONST0_RTX (SFmode)))
5153 	{
5154 	  fputs ("%fr0", file);
5155 	  return;
5156 	}
5157       else
5158 	break;
5159     case 'A':
5160       {
5161 	rtx xoperands[2];
5162 
5163 	xoperands[0] = XEXP (XEXP (x, 0), 0);
5164 	xoperands[1] = XVECEXP (XEXP (XEXP (x, 0), 1), 0, 0);
5165 	pa_output_global_address (file, xoperands[1], 0);
5166         fprintf (file, "(%s)", reg_names [REGNO (xoperands[0])]);
5167 	return;
5168       }
5169 
5170     case 'C':			/* Plain (C)ondition */
5171     case 'X':
5172       switch (GET_CODE (x))
5173 	{
5174 	case EQ:
5175 	  fputs ("=", file);  break;
5176 	case NE:
5177 	  fputs ("<>", file);  break;
5178 	case GT:
5179 	  fputs (">", file);  break;
5180 	case GE:
5181 	  fputs (">=", file);  break;
5182 	case GEU:
5183 	  fputs (">>=", file);  break;
5184 	case GTU:
5185 	  fputs (">>", file);  break;
5186 	case LT:
5187 	  fputs ("<", file);  break;
5188 	case LE:
5189 	  fputs ("<=", file);  break;
5190 	case LEU:
5191 	  fputs ("<<=", file);  break;
5192 	case LTU:
5193 	  fputs ("<<", file);  break;
5194 	default:
5195 	  gcc_unreachable ();
5196 	}
5197       return;
5198     case 'N':			/* Condition, (N)egated */
5199       switch (GET_CODE (x))
5200 	{
5201 	case EQ:
5202 	  fputs ("<>", file);  break;
5203 	case NE:
5204 	  fputs ("=", file);  break;
5205 	case GT:
5206 	  fputs ("<=", file);  break;
5207 	case GE:
5208 	  fputs ("<", file);  break;
5209 	case GEU:
5210 	  fputs ("<<", file);  break;
5211 	case GTU:
5212 	  fputs ("<<=", file);  break;
5213 	case LT:
5214 	  fputs (">=", file);  break;
5215 	case LE:
5216 	  fputs (">", file);  break;
5217 	case LEU:
5218 	  fputs (">>", file);  break;
5219 	case LTU:
5220 	  fputs (">>=", file);  break;
5221 	default:
5222 	  gcc_unreachable ();
5223 	}
5224       return;
5225     /* For floating point comparisons.  Note that the output
5226        predicates are the complement of the desired mode.  The
5227        conditions for GT, GE, LT, LE and LTGT cause an invalid
5228        operation exception if the result is unordered and this
5229        exception is enabled in the floating-point status register.  */
5230     case 'Y':
5231       switch (GET_CODE (x))
5232 	{
5233 	case EQ:
5234 	  fputs ("!=", file);  break;
5235 	case NE:
5236 	  fputs ("=", file);  break;
5237 	case GT:
5238 	  fputs ("!>", file);  break;
5239 	case GE:
5240 	  fputs ("!>=", file);  break;
5241 	case LT:
5242 	  fputs ("!<", file);  break;
5243 	case LE:
5244 	  fputs ("!<=", file);  break;
5245 	case LTGT:
5246 	  fputs ("!<>", file);  break;
5247 	case UNLE:
5248 	  fputs ("!?<=", file);  break;
5249 	case UNLT:
5250 	  fputs ("!?<", file);  break;
5251 	case UNGE:
5252 	  fputs ("!?>=", file);  break;
5253 	case UNGT:
5254 	  fputs ("!?>", file);  break;
5255 	case UNEQ:
5256 	  fputs ("!?=", file);  break;
5257 	case UNORDERED:
5258 	  fputs ("!?", file);  break;
5259 	case ORDERED:
5260 	  fputs ("?", file);  break;
5261 	default:
5262 	  gcc_unreachable ();
5263 	}
5264       return;
5265     case 'S':			/* Condition, operands are (S)wapped.  */
5266       switch (GET_CODE (x))
5267 	{
5268 	case EQ:
5269 	  fputs ("=", file);  break;
5270 	case NE:
5271 	  fputs ("<>", file);  break;
5272 	case GT:
5273 	  fputs ("<", file);  break;
5274 	case GE:
5275 	  fputs ("<=", file);  break;
5276 	case GEU:
5277 	  fputs ("<<=", file);  break;
5278 	case GTU:
5279 	  fputs ("<<", file);  break;
5280 	case LT:
5281 	  fputs (">", file);  break;
5282 	case LE:
5283 	  fputs (">=", file);  break;
5284 	case LEU:
5285 	  fputs (">>=", file);  break;
5286 	case LTU:
5287 	  fputs (">>", file);  break;
5288 	default:
5289 	  gcc_unreachable ();
5290 	}
5291       return;
5292     case 'B':			/* Condition, (B)oth swapped and negate.  */
5293       switch (GET_CODE (x))
5294 	{
5295 	case EQ:
5296 	  fputs ("<>", file);  break;
5297 	case NE:
5298 	  fputs ("=", file);  break;
5299 	case GT:
5300 	  fputs (">=", file);  break;
5301 	case GE:
5302 	  fputs (">", file);  break;
5303 	case GEU:
5304 	  fputs (">>", file);  break;
5305 	case GTU:
5306 	  fputs (">>=", file);  break;
5307 	case LT:
5308 	  fputs ("<=", file);  break;
5309 	case LE:
5310 	  fputs ("<", file);  break;
5311 	case LEU:
5312 	  fputs ("<<", file);  break;
5313 	case LTU:
5314 	  fputs ("<<=", file);  break;
5315 	default:
5316 	  gcc_unreachable ();
5317 	}
5318       return;
5319     case 'k':
5320       gcc_assert (GET_CODE (x) == CONST_INT);
5321       fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~INTVAL (x));
5322       return;
5323     case 'Q':
5324       gcc_assert (GET_CODE (x) == CONST_INT);
5325       fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - (INTVAL (x) & 63));
5326       return;
5327     case 'L':
5328       gcc_assert (GET_CODE (x) == CONST_INT);
5329       fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - (INTVAL (x) & 31));
5330       return;
5331     case 'o':
5332       gcc_assert (GET_CODE (x) == CONST_INT
5333 		  && (INTVAL (x) == 1 || INTVAL (x) == 2 || INTVAL (x) == 3));
5334       fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
5335       return;
5336     case 'O':
5337       gcc_assert (GET_CODE (x) == CONST_INT && exact_log2 (INTVAL (x)) >= 0);
5338       fprintf (file, "%d", exact_log2 (INTVAL (x)));
5339       return;
5340     case 'p':
5341       gcc_assert (GET_CODE (x) == CONST_INT);
5342       fprintf (file, HOST_WIDE_INT_PRINT_DEC, 63 - (INTVAL (x) & 63));
5343       return;
5344     case 'P':
5345       gcc_assert (GET_CODE (x) == CONST_INT);
5346       fprintf (file, HOST_WIDE_INT_PRINT_DEC, 31 - (INTVAL (x) & 31));
5347       return;
5348     case 'I':
5349       if (GET_CODE (x) == CONST_INT)
5350 	fputs ("i", file);
5351       return;
5352     case 'M':
5353     case 'F':
5354       switch (GET_CODE (XEXP (x, 0)))
5355 	{
5356 	case PRE_DEC:
5357 	case PRE_INC:
5358 	  if (ASSEMBLER_DIALECT == 0)
5359 	    fputs ("s,mb", file);
5360 	  else
5361 	    fputs (",mb", file);
5362 	  break;
5363 	case POST_DEC:
5364 	case POST_INC:
5365 	  if (ASSEMBLER_DIALECT == 0)
5366 	    fputs ("s,ma", file);
5367 	  else
5368 	    fputs (",ma", file);
5369 	  break;
5370 	case PLUS:
5371 	  if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
5372 	      && GET_CODE (XEXP (XEXP (x, 0), 1)) == REG)
5373 	    {
5374 	      if (ASSEMBLER_DIALECT == 0)
5375 		fputs ("x", file);
5376 	    }
5377 	  else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
5378 		   || GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
5379 	    {
5380 	      if (ASSEMBLER_DIALECT == 0)
5381 		fputs ("x,s", file);
5382 	      else
5383 		fputs (",s", file);
5384 	    }
5385 	  else if (code == 'F' && ASSEMBLER_DIALECT == 0)
5386 	    fputs ("s", file);
5387 	  break;
5388 	default:
5389 	  if (code == 'F' && ASSEMBLER_DIALECT == 0)
5390 	    fputs ("s", file);
5391 	  break;
5392 	}
5393       return;
5394     case 'G':
5395       pa_output_global_address (file, x, 0);
5396       return;
5397     case 'H':
5398       pa_output_global_address (file, x, 1);
5399       return;
5400     case 0:			/* Don't do anything special */
5401       break;
5402     case 'Z':
5403       {
5404 	unsigned op[3];
5405 	compute_zdepwi_operands (INTVAL (x), op);
5406 	fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
5407 	return;
5408       }
5409     case 'z':
5410       {
5411 	unsigned op[3];
5412 	compute_zdepdi_operands (INTVAL (x), op);
5413 	fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
5414 	return;
5415       }
5416     case 'c':
5417       /* We can get here from a .vtable_inherit due to our
5418 	 CONSTANT_ADDRESS_P rejecting perfectly good constant
5419 	 addresses.  */
5420       break;
5421     default:
5422       gcc_unreachable ();
5423     }
5424   if (GET_CODE (x) == REG)
5425     {
5426       fputs (reg_names [REGNO (x)], file);
5427       if (TARGET_64BIT && FP_REG_P (x) && GET_MODE_SIZE (GET_MODE (x)) <= 4)
5428 	{
5429 	  fputs ("R", file);
5430 	  return;
5431 	}
5432       if (FP_REG_P (x)
5433 	  && GET_MODE_SIZE (GET_MODE (x)) <= 4
5434 	  && (REGNO (x) & 1) == 0)
5435 	fputs ("L", file);
5436     }
5437   else if (GET_CODE (x) == MEM)
5438     {
5439       int size = GET_MODE_SIZE (GET_MODE (x));
5440       rtx base = NULL_RTX;
5441       switch (GET_CODE (XEXP (x, 0)))
5442 	{
5443 	case PRE_DEC:
5444 	case POST_DEC:
5445           base = XEXP (XEXP (x, 0), 0);
5446 	  fprintf (file, "-%d(%s)", size, reg_names [REGNO (base)]);
5447 	  break;
5448 	case PRE_INC:
5449 	case POST_INC:
5450           base = XEXP (XEXP (x, 0), 0);
5451 	  fprintf (file, "%d(%s)", size, reg_names [REGNO (base)]);
5452 	  break;
5453 	case PLUS:
5454 	  if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT)
5455 	    fprintf (file, "%s(%s)",
5456 		     reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 0), 0))],
5457 		     reg_names [REGNO (XEXP (XEXP (x, 0), 1))]);
5458 	  else if (GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
5459 	    fprintf (file, "%s(%s)",
5460 		     reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 1), 0))],
5461 		     reg_names [REGNO (XEXP (XEXP (x, 0), 0))]);
5462 	  else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
5463 		   && GET_CODE (XEXP (XEXP (x, 0), 1)) == REG)
5464 	    {
5465 	      /* Because the REG_POINTER flag can get lost during reload,
5466 		 pa_legitimate_address_p canonicalizes the order of the
5467 		 index and base registers in the combined move patterns.  */
5468 	      rtx base = XEXP (XEXP (x, 0), 1);
5469 	      rtx index = XEXP (XEXP (x, 0), 0);
5470 
5471 	      fprintf (file, "%s(%s)",
5472 		       reg_names [REGNO (index)], reg_names [REGNO (base)]);
5473 	    }
5474 	  else
5475 	    output_address (GET_MODE (x), XEXP (x, 0));
5476 	  break;
5477 	default:
5478 	  output_address (GET_MODE (x), XEXP (x, 0));
5479 	  break;
5480 	}
5481     }
5482   else
5483     output_addr_const (file, x);
5484 }
5485 
5486 /* output a SYMBOL_REF or a CONST expression involving a SYMBOL_REF.  */
5487 
5488 void
pa_output_global_address(FILE * file,rtx x,int round_constant)5489 pa_output_global_address (FILE *file, rtx x, int round_constant)
5490 {
5491 
5492   /* Imagine  (high (const (plus ...))).  */
5493   if (GET_CODE (x) == HIGH)
5494     x = XEXP (x, 0);
5495 
5496   if (GET_CODE (x) == SYMBOL_REF && read_only_operand (x, VOIDmode))
5497     output_addr_const (file, x);
5498   else if (GET_CODE (x) == SYMBOL_REF && !flag_pic)
5499     {
5500       output_addr_const (file, x);
5501       fputs ("-$global$", file);
5502     }
5503   else if (GET_CODE (x) == CONST)
5504     {
5505       const char *sep = "";
5506       int offset = 0;		/* assembler wants -$global$ at end */
5507       rtx base = NULL_RTX;
5508 
5509       switch (GET_CODE (XEXP (XEXP (x, 0), 0)))
5510 	{
5511 	case LABEL_REF:
5512 	case SYMBOL_REF:
5513 	  base = XEXP (XEXP (x, 0), 0);
5514 	  output_addr_const (file, base);
5515 	  break;
5516 	case CONST_INT:
5517 	  offset = INTVAL (XEXP (XEXP (x, 0), 0));
5518 	  break;
5519 	default:
5520 	  gcc_unreachable ();
5521 	}
5522 
5523       switch (GET_CODE (XEXP (XEXP (x, 0), 1)))
5524 	{
5525 	case LABEL_REF:
5526 	case SYMBOL_REF:
5527 	  base = XEXP (XEXP (x, 0), 1);
5528 	  output_addr_const (file, base);
5529 	  break;
5530 	case CONST_INT:
5531 	  offset = INTVAL (XEXP (XEXP (x, 0), 1));
5532 	  break;
5533 	default:
5534 	  gcc_unreachable ();
5535 	}
5536 
5537       /* How bogus.  The compiler is apparently responsible for
5538 	 rounding the constant if it uses an LR field selector.
5539 
5540 	 The linker and/or assembler seem a better place since
5541 	 they have to do this kind of thing already.
5542 
5543 	 If we fail to do this, HP's optimizing linker may eliminate
5544 	 an addil, but not update the ldw/stw/ldo instruction that
5545 	 uses the result of the addil.  */
5546       if (round_constant)
5547 	offset = ((offset + 0x1000) & ~0x1fff);
5548 
5549       switch (GET_CODE (XEXP (x, 0)))
5550 	{
5551 	case PLUS:
5552 	  if (offset < 0)
5553 	    {
5554 	      offset = -offset;
5555 	      sep = "-";
5556 	    }
5557 	  else
5558 	    sep = "+";
5559 	  break;
5560 
5561 	case MINUS:
5562 	  gcc_assert (GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF);
5563 	  sep = "-";
5564 	  break;
5565 
5566 	default:
5567 	  gcc_unreachable ();
5568 	}
5569 
5570       if (!read_only_operand (base, VOIDmode) && !flag_pic)
5571 	fputs ("-$global$", file);
5572       if (offset)
5573 	fprintf (file, "%s%d", sep, offset);
5574     }
5575   else
5576     output_addr_const (file, x);
5577 }
5578 
5579 /* Output boilerplate text to appear at the beginning of the file.
5580    There are several possible versions.  */
5581 #define aputs(x) fputs(x, asm_out_file)
5582 static inline void
pa_file_start_level(void)5583 pa_file_start_level (void)
5584 {
5585   if (TARGET_64BIT)
5586     aputs ("\t.LEVEL 2.0w\n");
5587   else if (TARGET_PA_20)
5588     aputs ("\t.LEVEL 2.0\n");
5589   else if (TARGET_PA_11)
5590     aputs ("\t.LEVEL 1.1\n");
5591   else
5592     aputs ("\t.LEVEL 1.0\n");
5593 }
5594 
5595 static inline void
pa_file_start_space(int sortspace)5596 pa_file_start_space (int sortspace)
5597 {
5598   aputs ("\t.SPACE $PRIVATE$");
5599   if (sortspace)
5600     aputs (",SORT=16");
5601   aputs ("\n\t.SUBSPA $DATA$,QUAD=1,ALIGN=8,ACCESS=31");
5602   if (flag_tm)
5603     aputs ("\n\t.SUBSPA $TM_CLONE_TABLE$,QUAD=1,ALIGN=8,ACCESS=31");
5604   aputs ("\n\t.SUBSPA $BSS$,QUAD=1,ALIGN=8,ACCESS=31,ZERO,SORT=82"
5605 	 "\n\t.SPACE $TEXT$");
5606   if (sortspace)
5607     aputs (",SORT=8");
5608   aputs ("\n\t.SUBSPA $LIT$,QUAD=0,ALIGN=8,ACCESS=44"
5609 	 "\n\t.SUBSPA $CODE$,QUAD=0,ALIGN=8,ACCESS=44,CODE_ONLY\n");
5610 }
5611 
5612 static inline void
pa_file_start_file(int want_version)5613 pa_file_start_file (int want_version)
5614 {
5615   if (write_symbols != NO_DEBUG)
5616     {
5617       output_file_directive (asm_out_file, main_input_filename);
5618       if (want_version)
5619 	aputs ("\t.version\t\"01.01\"\n");
5620     }
5621 }
5622 
5623 static inline void
pa_file_start_mcount(const char * aswhat)5624 pa_file_start_mcount (const char *aswhat)
5625 {
5626   if (profile_flag)
5627     fprintf (asm_out_file, "\t.IMPORT _mcount,%s\n", aswhat);
5628 }
5629 
5630 static void
pa_elf_file_start(void)5631 pa_elf_file_start (void)
5632 {
5633   pa_file_start_level ();
5634   pa_file_start_mcount ("ENTRY");
5635   pa_file_start_file (0);
5636 }
5637 
5638 static void
pa_som_file_start(void)5639 pa_som_file_start (void)
5640 {
5641   pa_file_start_level ();
5642   pa_file_start_space (0);
5643   aputs ("\t.IMPORT $global$,DATA\n"
5644          "\t.IMPORT $$dyncall,MILLICODE\n");
5645   pa_file_start_mcount ("CODE");
5646   pa_file_start_file (0);
5647 }
5648 
5649 static void
pa_linux_file_start(void)5650 pa_linux_file_start (void)
5651 {
5652   pa_file_start_file (1);
5653   pa_file_start_level ();
5654   pa_file_start_mcount ("CODE");
5655 }
5656 
5657 static void
pa_hpux64_gas_file_start(void)5658 pa_hpux64_gas_file_start (void)
5659 {
5660   pa_file_start_level ();
5661 #ifdef ASM_OUTPUT_TYPE_DIRECTIVE
5662   if (profile_flag)
5663     ASM_OUTPUT_TYPE_DIRECTIVE (asm_out_file, "_mcount", "function");
5664 #endif
5665   pa_file_start_file (1);
5666 }
5667 
5668 static void
pa_hpux64_hpas_file_start(void)5669 pa_hpux64_hpas_file_start (void)
5670 {
5671   pa_file_start_level ();
5672   pa_file_start_space (1);
5673   pa_file_start_mcount ("CODE");
5674   pa_file_start_file (0);
5675 }
5676 #undef aputs
5677 
5678 /* Search the deferred plabel list for SYMBOL and return its internal
5679    label.  If an entry for SYMBOL is not found, a new entry is created.  */
5680 
5681 rtx
pa_get_deferred_plabel(rtx symbol)5682 pa_get_deferred_plabel (rtx symbol)
5683 {
5684   const char *fname = XSTR (symbol, 0);
5685   size_t i;
5686 
5687   /* See if we have already put this function on the list of deferred
5688      plabels.  This list is generally small, so a liner search is not
5689      too ugly.  If it proves too slow replace it with something faster.  */
5690   for (i = 0; i < n_deferred_plabels; i++)
5691     if (strcmp (fname, XSTR (deferred_plabels[i].symbol, 0)) == 0)
5692       break;
5693 
5694   /* If the deferred plabel list is empty, or this entry was not found
5695      on the list, create a new entry on the list.  */
5696   if (deferred_plabels == NULL || i == n_deferred_plabels)
5697     {
5698       tree id;
5699 
5700       if (deferred_plabels == 0)
5701 	deferred_plabels =  ggc_alloc<deferred_plabel> ();
5702       else
5703         deferred_plabels = GGC_RESIZEVEC (struct deferred_plabel,
5704                                           deferred_plabels,
5705                                           n_deferred_plabels + 1);
5706 
5707       i = n_deferred_plabels++;
5708       deferred_plabels[i].internal_label = gen_label_rtx ();
5709       deferred_plabels[i].symbol = symbol;
5710 
5711       /* Gross.  We have just implicitly taken the address of this
5712 	 function.  Mark it in the same manner as assemble_name.  */
5713       id = maybe_get_identifier (targetm.strip_name_encoding (fname));
5714       if (id)
5715 	mark_referenced (id);
5716     }
5717 
5718   return deferred_plabels[i].internal_label;
5719 }
5720 
5721 static void
output_deferred_plabels(void)5722 output_deferred_plabels (void)
5723 {
5724   size_t i;
5725 
5726   /* If we have some deferred plabels, then we need to switch into the
5727      data or readonly data section, and align it to a 4 byte boundary
5728      before outputting the deferred plabels.  */
5729   if (n_deferred_plabels)
5730     {
5731       switch_to_section (flag_pic ? data_section : readonly_data_section);
5732       ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
5733     }
5734 
5735   /* Now output the deferred plabels.  */
5736   for (i = 0; i < n_deferred_plabels; i++)
5737     {
5738       targetm.asm_out.internal_label (asm_out_file, "L",
5739 		 CODE_LABEL_NUMBER (deferred_plabels[i].internal_label));
5740       assemble_integer (deferred_plabels[i].symbol,
5741 			TARGET_64BIT ? 8 : 4, TARGET_64BIT ? 64 : 32, 1);
5742     }
5743 }
5744 
5745 /* Initialize optabs to point to emulation routines.  */
5746 
5747 static void
pa_init_libfuncs(void)5748 pa_init_libfuncs (void)
5749 {
5750   if (HPUX_LONG_DOUBLE_LIBRARY)
5751     {
5752       set_optab_libfunc (add_optab, TFmode, "_U_Qfadd");
5753       set_optab_libfunc (sub_optab, TFmode, "_U_Qfsub");
5754       set_optab_libfunc (smul_optab, TFmode, "_U_Qfmpy");
5755       set_optab_libfunc (sdiv_optab, TFmode, "_U_Qfdiv");
5756       set_optab_libfunc (smin_optab, TFmode, "_U_Qmin");
5757       set_optab_libfunc (smax_optab, TFmode, "_U_Qfmax");
5758       set_optab_libfunc (sqrt_optab, TFmode, "_U_Qfsqrt");
5759       set_optab_libfunc (abs_optab, TFmode, "_U_Qfabs");
5760       set_optab_libfunc (neg_optab, TFmode, "_U_Qfneg");
5761 
5762       set_optab_libfunc (eq_optab, TFmode, "_U_Qfeq");
5763       set_optab_libfunc (ne_optab, TFmode, "_U_Qfne");
5764       set_optab_libfunc (gt_optab, TFmode, "_U_Qfgt");
5765       set_optab_libfunc (ge_optab, TFmode, "_U_Qfge");
5766       set_optab_libfunc (lt_optab, TFmode, "_U_Qflt");
5767       set_optab_libfunc (le_optab, TFmode, "_U_Qfle");
5768       set_optab_libfunc (unord_optab, TFmode, "_U_Qfunord");
5769 
5770       set_conv_libfunc (sext_optab, TFmode, SFmode, "_U_Qfcnvff_sgl_to_quad");
5771       set_conv_libfunc (sext_optab, TFmode, DFmode, "_U_Qfcnvff_dbl_to_quad");
5772       set_conv_libfunc (trunc_optab, SFmode, TFmode, "_U_Qfcnvff_quad_to_sgl");
5773       set_conv_libfunc (trunc_optab, DFmode, TFmode, "_U_Qfcnvff_quad_to_dbl");
5774 
5775       set_conv_libfunc (sfix_optab, SImode, TFmode,
5776 			TARGET_64BIT ? "__U_Qfcnvfxt_quad_to_sgl"
5777 				     : "_U_Qfcnvfxt_quad_to_sgl");
5778       set_conv_libfunc (sfix_optab, DImode, TFmode,
5779 			"_U_Qfcnvfxt_quad_to_dbl");
5780       set_conv_libfunc (ufix_optab, SImode, TFmode,
5781 			"_U_Qfcnvfxt_quad_to_usgl");
5782       set_conv_libfunc (ufix_optab, DImode, TFmode,
5783 			"_U_Qfcnvfxt_quad_to_udbl");
5784 
5785       set_conv_libfunc (sfloat_optab, TFmode, SImode,
5786 			"_U_Qfcnvxf_sgl_to_quad");
5787       set_conv_libfunc (sfloat_optab, TFmode, DImode,
5788 			"_U_Qfcnvxf_dbl_to_quad");
5789       set_conv_libfunc (ufloat_optab, TFmode, SImode,
5790 			"_U_Qfcnvxf_usgl_to_quad");
5791       set_conv_libfunc (ufloat_optab, TFmode, DImode,
5792 			"_U_Qfcnvxf_udbl_to_quad");
5793     }
5794 
5795   if (TARGET_SYNC_LIBCALL)
5796     init_sync_libfuncs (8);
5797 }
5798 
5799 /* HP's millicode routines mean something special to the assembler.
5800    Keep track of which ones we have used.  */
5801 
5802 enum millicodes { remI, remU, divI, divU, mulI, end1000 };
5803 static void import_milli (enum millicodes);
5804 static char imported[(int) end1000];
5805 static const char * const milli_names[] = {"remI", "remU", "divI", "divU", "mulI"};
5806 static const char import_string[] = ".IMPORT $$....,MILLICODE";
5807 #define MILLI_START 10
5808 
5809 static void
import_milli(enum millicodes code)5810 import_milli (enum millicodes code)
5811 {
5812   char str[sizeof (import_string)];
5813 
5814   if (!imported[(int) code])
5815     {
5816       imported[(int) code] = 1;
5817       strcpy (str, import_string);
5818       strncpy (str + MILLI_START, milli_names[(int) code], 4);
5819       output_asm_insn (str, 0);
5820     }
5821 }
5822 
5823 /* The register constraints have put the operands and return value in
5824    the proper registers.  */
5825 
5826 const char *
pa_output_mul_insn(int unsignedp ATTRIBUTE_UNUSED,rtx_insn * insn)5827 pa_output_mul_insn (int unsignedp ATTRIBUTE_UNUSED, rtx_insn *insn)
5828 {
5829   import_milli (mulI);
5830   return pa_output_millicode_call (insn, gen_rtx_SYMBOL_REF (Pmode, "$$mulI"));
5831 }
5832 
5833 /* Emit the rtl for doing a division by a constant.  */
5834 
5835 /* Do magic division millicodes exist for this value? */
5836 const int pa_magic_milli[]= {0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1};
5837 
5838 /* We'll use an array to keep track of the magic millicodes and
5839    whether or not we've used them already. [n][0] is signed, [n][1] is
5840    unsigned.  */
5841 
5842 static int div_milli[16][2];
5843 
5844 int
pa_emit_hpdiv_const(rtx * operands,int unsignedp)5845 pa_emit_hpdiv_const (rtx *operands, int unsignedp)
5846 {
5847   if (GET_CODE (operands[2]) == CONST_INT
5848       && INTVAL (operands[2]) > 0
5849       && INTVAL (operands[2]) < 16
5850       && pa_magic_milli[INTVAL (operands[2])])
5851     {
5852       rtx ret = gen_rtx_REG (SImode, TARGET_64BIT ? 2 : 31);
5853 
5854       emit_move_insn (gen_rtx_REG (SImode, 26), operands[1]);
5855       emit
5856 	(gen_rtx_PARALLEL
5857 	 (VOIDmode,
5858 	  gen_rtvec (6, gen_rtx_SET (gen_rtx_REG (SImode, 29),
5859 				     gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
5860 						     SImode,
5861 						     gen_rtx_REG (SImode, 26),
5862 						     operands[2])),
5863 		     gen_rtx_CLOBBER (VOIDmode, operands[4]),
5864 		     gen_rtx_CLOBBER (VOIDmode, operands[3]),
5865 		     gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 26)),
5866 		     gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 25)),
5867 		     gen_rtx_CLOBBER (VOIDmode, ret))));
5868       emit_move_insn (operands[0], gen_rtx_REG (SImode, 29));
5869       return 1;
5870     }
5871   return 0;
5872 }
5873 
5874 const char *
pa_output_div_insn(rtx * operands,int unsignedp,rtx_insn * insn)5875 pa_output_div_insn (rtx *operands, int unsignedp, rtx_insn *insn)
5876 {
5877   int divisor;
5878 
5879   /* If the divisor is a constant, try to use one of the special
5880      opcodes .*/
5881   if (GET_CODE (operands[0]) == CONST_INT)
5882     {
5883       static char buf[100];
5884       divisor = INTVAL (operands[0]);
5885       if (!div_milli[divisor][unsignedp])
5886 	{
5887 	  div_milli[divisor][unsignedp] = 1;
5888 	  if (unsignedp)
5889 	    output_asm_insn (".IMPORT $$divU_%0,MILLICODE", operands);
5890 	  else
5891 	    output_asm_insn (".IMPORT $$divI_%0,MILLICODE", operands);
5892 	}
5893       if (unsignedp)
5894 	{
5895 	  sprintf (buf, "$$divU_" HOST_WIDE_INT_PRINT_DEC,
5896 		   INTVAL (operands[0]));
5897 	  return pa_output_millicode_call (insn,
5898 					   gen_rtx_SYMBOL_REF (SImode, buf));
5899 	}
5900       else
5901 	{
5902 	  sprintf (buf, "$$divI_" HOST_WIDE_INT_PRINT_DEC,
5903 		   INTVAL (operands[0]));
5904 	  return pa_output_millicode_call (insn,
5905 					   gen_rtx_SYMBOL_REF (SImode, buf));
5906 	}
5907     }
5908   /* Divisor isn't a special constant.  */
5909   else
5910     {
5911       if (unsignedp)
5912 	{
5913 	  import_milli (divU);
5914 	  return pa_output_millicode_call (insn,
5915 					gen_rtx_SYMBOL_REF (SImode, "$$divU"));
5916 	}
5917       else
5918 	{
5919 	  import_milli (divI);
5920 	  return pa_output_millicode_call (insn,
5921 					gen_rtx_SYMBOL_REF (SImode, "$$divI"));
5922 	}
5923     }
5924 }
5925 
5926 /* Output a $$rem millicode to do mod.  */
5927 
5928 const char *
pa_output_mod_insn(int unsignedp,rtx_insn * insn)5929 pa_output_mod_insn (int unsignedp, rtx_insn *insn)
5930 {
5931   if (unsignedp)
5932     {
5933       import_milli (remU);
5934       return pa_output_millicode_call (insn,
5935 				       gen_rtx_SYMBOL_REF (SImode, "$$remU"));
5936     }
5937   else
5938     {
5939       import_milli (remI);
5940       return pa_output_millicode_call (insn,
5941 				       gen_rtx_SYMBOL_REF (SImode, "$$remI"));
5942     }
5943 }
5944 
5945 void
pa_output_arg_descriptor(rtx_insn * call_insn)5946 pa_output_arg_descriptor (rtx_insn *call_insn)
5947 {
5948   const char *arg_regs[4];
5949   machine_mode arg_mode;
5950   rtx link;
5951   int i, output_flag = 0;
5952   int regno;
5953 
5954   /* We neither need nor want argument location descriptors for the
5955      64bit runtime environment or the ELF32 environment.  */
5956   if (TARGET_64BIT || TARGET_ELF32)
5957     return;
5958 
5959   for (i = 0; i < 4; i++)
5960     arg_regs[i] = 0;
5961 
5962   /* Specify explicitly that no argument relocations should take place
5963      if using the portable runtime calling conventions.  */
5964   if (TARGET_PORTABLE_RUNTIME)
5965     {
5966       fputs ("\t.CALL ARGW0=NO,ARGW1=NO,ARGW2=NO,ARGW3=NO,RETVAL=NO\n",
5967 	     asm_out_file);
5968       return;
5969     }
5970 
5971   gcc_assert (CALL_P (call_insn));
5972   for (link = CALL_INSN_FUNCTION_USAGE (call_insn);
5973        link; link = XEXP (link, 1))
5974     {
5975       rtx use = XEXP (link, 0);
5976 
5977       if (! (GET_CODE (use) == USE
5978 	     && GET_CODE (XEXP (use, 0)) == REG
5979 	     && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
5980 	continue;
5981 
5982       arg_mode = GET_MODE (XEXP (use, 0));
5983       regno = REGNO (XEXP (use, 0));
5984       if (regno >= 23 && regno <= 26)
5985 	{
5986 	  arg_regs[26 - regno] = "GR";
5987 	  if (arg_mode == DImode)
5988 	    arg_regs[25 - regno] = "GR";
5989 	}
5990       else if (regno >= 32 && regno <= 39)
5991 	{
5992 	  if (arg_mode == SFmode)
5993 	    arg_regs[(regno - 32) / 2] = "FR";
5994 	  else
5995 	    {
5996 #ifndef HP_FP_ARG_DESCRIPTOR_REVERSED
5997 	      arg_regs[(regno - 34) / 2] = "FR";
5998 	      arg_regs[(regno - 34) / 2 + 1] = "FU";
5999 #else
6000 	      arg_regs[(regno - 34) / 2] = "FU";
6001 	      arg_regs[(regno - 34) / 2 + 1] = "FR";
6002 #endif
6003 	    }
6004 	}
6005     }
6006   fputs ("\t.CALL ", asm_out_file);
6007   for (i = 0; i < 4; i++)
6008     {
6009       if (arg_regs[i])
6010 	{
6011 	  if (output_flag++)
6012 	    fputc (',', asm_out_file);
6013 	  fprintf (asm_out_file, "ARGW%d=%s", i, arg_regs[i]);
6014 	}
6015     }
6016   fputc ('\n', asm_out_file);
6017 }
6018 
6019 /* Inform reload about cases where moving X with a mode MODE to or from
6020    a register in RCLASS requires an extra scratch or immediate register.
6021    Return the class needed for the immediate register.  */
6022 
6023 static reg_class_t
pa_secondary_reload(bool in_p,rtx x,reg_class_t rclass_i,machine_mode mode,secondary_reload_info * sri)6024 pa_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
6025 		     machine_mode mode, secondary_reload_info *sri)
6026 {
6027   int regno;
6028   enum reg_class rclass = (enum reg_class) rclass_i;
6029 
6030   /* Handle the easy stuff first.  */
6031   if (rclass == R1_REGS)
6032     return NO_REGS;
6033 
6034   if (REG_P (x))
6035     {
6036       regno = REGNO (x);
6037       if (rclass == BASE_REG_CLASS && regno < FIRST_PSEUDO_REGISTER)
6038 	return NO_REGS;
6039     }
6040   else
6041     regno = -1;
6042 
6043   /* If we have something like (mem (mem (...)), we can safely assume the
6044      inner MEM will end up in a general register after reloading, so there's
6045      no need for a secondary reload.  */
6046   if (GET_CODE (x) == MEM && GET_CODE (XEXP (x, 0)) == MEM)
6047     return NO_REGS;
6048 
6049   /* Trying to load a constant into a FP register during PIC code
6050      generation requires %r1 as a scratch register.  For float modes,
6051      the only legitimate constant is CONST0_RTX.  However, there are
6052      a few patterns that accept constant double operands.  */
6053   if (flag_pic
6054       && FP_REG_CLASS_P (rclass)
6055       && (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE))
6056     {
6057       switch (mode)
6058 	{
6059 	case E_SImode:
6060 	  sri->icode = CODE_FOR_reload_insi_r1;
6061 	  break;
6062 
6063 	case E_DImode:
6064 	  sri->icode = CODE_FOR_reload_indi_r1;
6065 	  break;
6066 
6067 	case E_SFmode:
6068 	  sri->icode = CODE_FOR_reload_insf_r1;
6069 	  break;
6070 
6071 	case E_DFmode:
6072 	  sri->icode = CODE_FOR_reload_indf_r1;
6073 	  break;
6074 
6075 	default:
6076 	  gcc_unreachable ();
6077 	}
6078       return NO_REGS;
6079     }
6080 
6081   /* Secondary reloads of symbolic expressions require %r1 as a scratch
6082      register when we're generating PIC code or when the operand isn't
6083      readonly.  */
6084   if (pa_symbolic_expression_p (x))
6085     {
6086       if (GET_CODE (x) == HIGH)
6087 	x = XEXP (x, 0);
6088 
6089       if (flag_pic || !read_only_operand (x, VOIDmode))
6090 	{
6091 	  switch (mode)
6092 	    {
6093 	    case E_SImode:
6094 	      sri->icode = CODE_FOR_reload_insi_r1;
6095 	      break;
6096 
6097 	    case E_DImode:
6098 	      sri->icode = CODE_FOR_reload_indi_r1;
6099 	      break;
6100 
6101 	    default:
6102 	      gcc_unreachable ();
6103 	    }
6104 	  return NO_REGS;
6105 	}
6106     }
6107 
6108   /* Profiling showed the PA port spends about 1.3% of its compilation
6109      time in true_regnum from calls inside pa_secondary_reload_class.  */
6110   if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
6111     regno = true_regnum (x);
6112 
6113   /* Handle reloads for floating point loads and stores.  */
6114   if ((regno >= FIRST_PSEUDO_REGISTER || regno == -1)
6115       && FP_REG_CLASS_P (rclass))
6116     {
6117       if (MEM_P (x))
6118 	{
6119 	  x = XEXP (x, 0);
6120 
6121 	  /* We don't need a secondary reload for indexed memory addresses.
6122 
6123 	     When INT14_OK_STRICT is true, it might appear that we could
6124 	     directly allow register indirect memory addresses.  However,
6125 	     this doesn't work because we don't support SUBREGs in
6126 	     floating-point register copies and reload doesn't tell us
6127 	     when it's going to use a SUBREG.  */
6128 	  if (IS_INDEX_ADDR_P (x))
6129 	    return NO_REGS;
6130 	}
6131 
6132       /* Request a secondary reload with a general scratch register
6133 	 for everything else.  ??? Could symbolic operands be handled
6134 	 directly when generating non-pic PA 2.0 code?  */
6135       sri->icode = (in_p
6136 		    ? direct_optab_handler (reload_in_optab, mode)
6137 		    : direct_optab_handler (reload_out_optab, mode));
6138       return NO_REGS;
6139     }
6140 
6141   /* A SAR<->FP register copy requires an intermediate general register
6142      and secondary memory.  We need a secondary reload with a general
6143      scratch register for spills.  */
6144   if (rclass == SHIFT_REGS)
6145     {
6146       /* Handle spill.  */
6147       if (regno >= FIRST_PSEUDO_REGISTER || regno < 0)
6148 	{
6149 	  sri->icode = (in_p
6150 			? direct_optab_handler (reload_in_optab, mode)
6151 			: direct_optab_handler (reload_out_optab, mode));
6152 	  return NO_REGS;
6153 	}
6154 
6155       /* Handle FP copy.  */
6156       if (FP_REG_CLASS_P (REGNO_REG_CLASS (regno)))
6157 	return GENERAL_REGS;
6158     }
6159 
6160   if (regno >= 0 && regno < FIRST_PSEUDO_REGISTER
6161       && REGNO_REG_CLASS (regno) == SHIFT_REGS
6162       && FP_REG_CLASS_P (rclass))
6163     return GENERAL_REGS;
6164 
6165   return NO_REGS;
6166 }
6167 
6168 /* Implement TARGET_SECONDARY_MEMORY_NEEDED.  */
6169 
6170 static bool
pa_secondary_memory_needed(machine_mode mode ATTRIBUTE_UNUSED,reg_class_t class1 ATTRIBUTE_UNUSED,reg_class_t class2 ATTRIBUTE_UNUSED)6171 pa_secondary_memory_needed (machine_mode mode ATTRIBUTE_UNUSED,
6172 			    reg_class_t class1 ATTRIBUTE_UNUSED,
6173 			    reg_class_t class2 ATTRIBUTE_UNUSED)
6174 {
6175 #ifdef PA_SECONDARY_MEMORY_NEEDED
6176   return PA_SECONDARY_MEMORY_NEEDED (mode, class1, class2);
6177 #else
6178   return false;
6179 #endif
6180 }
6181 
6182 /* Implement TARGET_EXTRA_LIVE_ON_ENTRY.  The argument pointer
6183    is only marked as live on entry by df-scan when it is a fixed
6184    register.  It isn't a fixed register in the 64-bit runtime,
6185    so we need to mark it here.  */
6186 
6187 static void
pa_extra_live_on_entry(bitmap regs)6188 pa_extra_live_on_entry (bitmap regs)
6189 {
6190   if (TARGET_64BIT)
6191     bitmap_set_bit (regs, ARG_POINTER_REGNUM);
6192 }
6193 
6194 /* Implement EH_RETURN_HANDLER_RTX.  The MEM needs to be volatile
6195    to prevent it from being deleted.  */
6196 
6197 rtx
pa_eh_return_handler_rtx(void)6198 pa_eh_return_handler_rtx (void)
6199 {
6200   rtx tmp;
6201 
6202   tmp = gen_rtx_PLUS (word_mode, hard_frame_pointer_rtx,
6203 		      TARGET_64BIT ? GEN_INT (-16) : GEN_INT (-20));
6204   tmp = gen_rtx_MEM (word_mode, tmp);
6205   tmp->volatil = 1;
6206   return tmp;
6207 }
6208 
6209 /* In the 32-bit runtime, arguments larger than eight bytes are passed
6210    by invisible reference.  As a GCC extension, we also pass anything
6211    with a zero or variable size by reference.
6212 
6213    The 64-bit runtime does not describe passing any types by invisible
6214    reference.  The internals of GCC can't currently handle passing
6215    empty structures, and zero or variable length arrays when they are
6216    not passed entirely on the stack or by reference.  Thus, as a GCC
6217    extension, we pass these types by reference.  The HP compiler doesn't
6218    support these types, so hopefully there shouldn't be any compatibility
6219    issues.  This may have to be revisited when HP releases a C99 compiler
6220    or updates the ABI.  */
6221 
6222 static bool
pa_pass_by_reference(cumulative_args_t ca ATTRIBUTE_UNUSED,machine_mode mode,const_tree type,bool named ATTRIBUTE_UNUSED)6223 pa_pass_by_reference (cumulative_args_t ca ATTRIBUTE_UNUSED,
6224 		      machine_mode mode, const_tree type,
6225 		      bool named ATTRIBUTE_UNUSED)
6226 {
6227   HOST_WIDE_INT size;
6228 
6229   if (type)
6230     size = int_size_in_bytes (type);
6231   else
6232     size = GET_MODE_SIZE (mode);
6233 
6234   if (TARGET_64BIT)
6235     return size <= 0;
6236   else
6237     return size <= 0 || size > 8;
6238 }
6239 
6240 /* Implement TARGET_FUNCTION_ARG_PADDING.  */
6241 
6242 static pad_direction
pa_function_arg_padding(machine_mode mode,const_tree type)6243 pa_function_arg_padding (machine_mode mode, const_tree type)
6244 {
6245   if (mode == BLKmode
6246       || (TARGET_64BIT
6247 	  && type
6248 	  && (AGGREGATE_TYPE_P (type)
6249 	      || TREE_CODE (type) == COMPLEX_TYPE
6250 	      || TREE_CODE (type) == VECTOR_TYPE)))
6251     {
6252       /* Return PAD_NONE if justification is not required.  */
6253       if (type
6254 	  && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
6255 	  && (int_size_in_bytes (type) * BITS_PER_UNIT) % PARM_BOUNDARY == 0)
6256 	return PAD_NONE;
6257 
6258       /* The directions set here are ignored when a BLKmode argument larger
6259 	 than a word is placed in a register.  Different code is used for
6260 	 the stack and registers.  This makes it difficult to have a
6261 	 consistent data representation for both the stack and registers.
6262 	 For both runtimes, the justification and padding for arguments on
6263 	 the stack and in registers should be identical.  */
6264       if (TARGET_64BIT)
6265 	/* The 64-bit runtime specifies left justification for aggregates.  */
6266 	return PAD_UPWARD;
6267       else
6268 	/* The 32-bit runtime architecture specifies right justification.
6269 	   When the argument is passed on the stack, the argument is padded
6270 	   with garbage on the left.  The HP compiler pads with zeros.  */
6271 	return PAD_DOWNWARD;
6272     }
6273 
6274   if (GET_MODE_BITSIZE (mode) < PARM_BOUNDARY)
6275     return PAD_DOWNWARD;
6276   else
6277     return PAD_NONE;
6278 }
6279 
6280 
6281 /* Do what is necessary for `va_start'.  We look at the current function
6282    to determine if stdargs or varargs is used and fill in an initial
6283    va_list.  A pointer to this constructor is returned.  */
6284 
6285 static rtx
hppa_builtin_saveregs(void)6286 hppa_builtin_saveregs (void)
6287 {
6288   rtx offset, dest;
6289   tree fntype = TREE_TYPE (current_function_decl);
6290   int argadj = ((!stdarg_p (fntype))
6291 		? UNITS_PER_WORD : 0);
6292 
6293   if (argadj)
6294     offset = plus_constant (Pmode, crtl->args.arg_offset_rtx, argadj);
6295   else
6296     offset = crtl->args.arg_offset_rtx;
6297 
6298   if (TARGET_64BIT)
6299     {
6300       int i, off;
6301 
6302       /* Adjust for varargs/stdarg differences.  */
6303       if (argadj)
6304 	offset = plus_constant (Pmode, crtl->args.arg_offset_rtx, -argadj);
6305       else
6306 	offset = crtl->args.arg_offset_rtx;
6307 
6308       /* We need to save %r26 .. %r19 inclusive starting at offset -64
6309 	 from the incoming arg pointer and growing to larger addresses.  */
6310       for (i = 26, off = -64; i >= 19; i--, off += 8)
6311 	emit_move_insn (gen_rtx_MEM (word_mode,
6312 				     plus_constant (Pmode,
6313 						    arg_pointer_rtx, off)),
6314 			gen_rtx_REG (word_mode, i));
6315 
6316       /* The incoming args pointer points just beyond the flushback area;
6317 	 normally this is not a serious concern.  However, when we are doing
6318 	 varargs/stdargs we want to make the arg pointer point to the start
6319 	 of the incoming argument area.  */
6320       emit_move_insn (virtual_incoming_args_rtx,
6321 		      plus_constant (Pmode, arg_pointer_rtx, -64));
6322 
6323       /* Now return a pointer to the first anonymous argument.  */
6324       return copy_to_reg (expand_binop (Pmode, add_optab,
6325 					virtual_incoming_args_rtx,
6326 					offset, 0, 0, OPTAB_LIB_WIDEN));
6327     }
6328 
6329   /* Store general registers on the stack.  */
6330   dest = gen_rtx_MEM (BLKmode,
6331 		      plus_constant (Pmode, crtl->args.internal_arg_pointer,
6332 				     -16));
6333   set_mem_alias_set (dest, get_varargs_alias_set ());
6334   set_mem_align (dest, BITS_PER_WORD);
6335   move_block_from_reg (23, dest, 4);
6336 
6337   /* move_block_from_reg will emit code to store the argument registers
6338      individually as scalar stores.
6339 
6340      However, other insns may later load from the same addresses for
6341      a structure load (passing a struct to a varargs routine).
6342 
6343      The alias code assumes that such aliasing can never happen, so we
6344      have to keep memory referencing insns from moving up beyond the
6345      last argument register store.  So we emit a blockage insn here.  */
6346   emit_insn (gen_blockage ());
6347 
6348   return copy_to_reg (expand_binop (Pmode, add_optab,
6349 				    crtl->args.internal_arg_pointer,
6350 				    offset, 0, 0, OPTAB_LIB_WIDEN));
6351 }
6352 
6353 static void
hppa_va_start(tree valist,rtx nextarg)6354 hppa_va_start (tree valist, rtx nextarg)
6355 {
6356   nextarg = expand_builtin_saveregs ();
6357   std_expand_builtin_va_start (valist, nextarg);
6358 }
6359 
6360 static tree
hppa_gimplify_va_arg_expr(tree valist,tree type,gimple_seq * pre_p,gimple_seq * post_p)6361 hppa_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
6362 			   gimple_seq *post_p)
6363 {
6364   if (TARGET_64BIT)
6365     {
6366       /* Args grow upward.  We can use the generic routines.  */
6367       return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
6368     }
6369   else /* !TARGET_64BIT */
6370     {
6371       tree ptr = build_pointer_type (type);
6372       tree valist_type;
6373       tree t, u;
6374       unsigned int size, ofs;
6375       bool indirect;
6376 
6377       indirect = pass_by_reference (NULL, TYPE_MODE (type), type, 0);
6378       if (indirect)
6379 	{
6380 	  type = ptr;
6381 	  ptr = build_pointer_type (type);
6382 	}
6383       size = int_size_in_bytes (type);
6384       valist_type = TREE_TYPE (valist);
6385 
6386       /* Args grow down.  Not handled by generic routines.  */
6387 
6388       u = fold_convert (sizetype, size_in_bytes (type));
6389       u = fold_build1 (NEGATE_EXPR, sizetype, u);
6390       t = fold_build_pointer_plus (valist, u);
6391 
6392       /* Align to 4 or 8 byte boundary depending on argument size.  */
6393 
6394       u = build_int_cst (TREE_TYPE (t), (HOST_WIDE_INT)(size > 4 ? -8 : -4));
6395       t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t, u);
6396       t = fold_convert (valist_type, t);
6397 
6398       t = build2 (MODIFY_EXPR, valist_type, valist, t);
6399 
6400       ofs = (8 - size) % 4;
6401       if (ofs != 0)
6402 	t = fold_build_pointer_plus_hwi (t, ofs);
6403 
6404       t = fold_convert (ptr, t);
6405       t = build_va_arg_indirect_ref (t);
6406 
6407       if (indirect)
6408 	t = build_va_arg_indirect_ref (t);
6409 
6410       return t;
6411     }
6412 }
6413 
6414 /* True if MODE is valid for the target.  By "valid", we mean able to
6415    be manipulated in non-trivial ways.  In particular, this means all
6416    the arithmetic is supported.
6417 
6418    Currently, TImode is not valid as the HP 64-bit runtime documentation
6419    doesn't document the alignment and calling conventions for this type.
6420    Thus, we return false when PRECISION is 2 * BITS_PER_WORD and
6421    2 * BITS_PER_WORD isn't equal LONG_LONG_TYPE_SIZE.  */
6422 
6423 static bool
pa_scalar_mode_supported_p(scalar_mode mode)6424 pa_scalar_mode_supported_p (scalar_mode mode)
6425 {
6426   int precision = GET_MODE_PRECISION (mode);
6427 
6428   switch (GET_MODE_CLASS (mode))
6429     {
6430     case MODE_PARTIAL_INT:
6431     case MODE_INT:
6432       if (precision == CHAR_TYPE_SIZE)
6433 	return true;
6434       if (precision == SHORT_TYPE_SIZE)
6435 	return true;
6436       if (precision == INT_TYPE_SIZE)
6437 	return true;
6438       if (precision == LONG_TYPE_SIZE)
6439 	return true;
6440       if (precision == LONG_LONG_TYPE_SIZE)
6441 	return true;
6442       return false;
6443 
6444     case MODE_FLOAT:
6445       if (precision == FLOAT_TYPE_SIZE)
6446 	return true;
6447       if (precision == DOUBLE_TYPE_SIZE)
6448 	return true;
6449       if (precision == LONG_DOUBLE_TYPE_SIZE)
6450 	return true;
6451       return false;
6452 
6453     case MODE_DECIMAL_FLOAT:
6454       return false;
6455 
6456     default:
6457       gcc_unreachable ();
6458     }
6459 }
6460 
6461 /* Return TRUE if INSN, a jump insn, has an unfilled delay slot and
6462    it branches into the delay slot.  Otherwise, return FALSE.  */
6463 
6464 static bool
branch_to_delay_slot_p(rtx_insn * insn)6465 branch_to_delay_slot_p (rtx_insn *insn)
6466 {
6467   rtx_insn *jump_insn;
6468 
6469   if (dbr_sequence_length ())
6470     return FALSE;
6471 
6472   jump_insn = next_active_insn (JUMP_LABEL_AS_INSN (insn));
6473   while (insn)
6474     {
6475       insn = next_active_insn (insn);
6476       if (jump_insn == insn)
6477 	return TRUE;
6478 
6479       /* We can't rely on the length of asms.  So, we return FALSE when
6480 	 the branch is followed by an asm.  */
6481       if (!insn
6482 	  || GET_CODE (PATTERN (insn)) == ASM_INPUT
6483 	  || asm_noperands (PATTERN (insn)) >= 0
6484 	  || get_attr_length (insn) > 0)
6485 	break;
6486     }
6487 
6488   return FALSE;
6489 }
6490 
6491 /* Return TRUE if INSN, a forward jump insn, needs a nop in its delay slot.
6492 
6493    This occurs when INSN has an unfilled delay slot and is followed
6494    by an asm.  Disaster can occur if the asm is empty and the jump
6495    branches into the delay slot.  So, we add a nop in the delay slot
6496    when this occurs.  */
6497 
6498 static bool
branch_needs_nop_p(rtx_insn * insn)6499 branch_needs_nop_p (rtx_insn *insn)
6500 {
6501   rtx_insn *jump_insn;
6502 
6503   if (dbr_sequence_length ())
6504     return FALSE;
6505 
6506   jump_insn = next_active_insn (JUMP_LABEL_AS_INSN (insn));
6507   while (insn)
6508     {
6509       insn = next_active_insn (insn);
6510       if (!insn || jump_insn == insn)
6511 	return TRUE;
6512 
6513       if (!(GET_CODE (PATTERN (insn)) == ASM_INPUT
6514 	   || asm_noperands (PATTERN (insn)) >= 0)
6515 	  && get_attr_length (insn) > 0)
6516 	break;
6517     }
6518 
6519   return FALSE;
6520 }
6521 
6522 /* Return TRUE if INSN, a forward jump insn, can use nullification
6523    to skip the following instruction.  This avoids an extra cycle due
6524    to a mis-predicted branch when we fall through.  */
6525 
6526 static bool
use_skip_p(rtx_insn * insn)6527 use_skip_p (rtx_insn *insn)
6528 {
6529   rtx_insn *jump_insn = next_active_insn (JUMP_LABEL_AS_INSN (insn));
6530 
6531   while (insn)
6532     {
6533       insn = next_active_insn (insn);
6534 
6535       /* We can't rely on the length of asms, so we can't skip asms.  */
6536       if (!insn
6537 	  || GET_CODE (PATTERN (insn)) == ASM_INPUT
6538 	  || asm_noperands (PATTERN (insn)) >= 0)
6539 	break;
6540       if (get_attr_length (insn) == 4
6541 	  && jump_insn == next_active_insn (insn))
6542 	return TRUE;
6543       if (get_attr_length (insn) > 0)
6544 	break;
6545     }
6546 
6547   return FALSE;
6548 }
6549 
6550 /* This routine handles all the normal conditional branch sequences we
6551    might need to generate.  It handles compare immediate vs compare
6552    register, nullification of delay slots, varying length branches,
6553    negated branches, and all combinations of the above.  It returns the
6554    output appropriate to emit the branch corresponding to all given
6555    parameters.  */
6556 
6557 const char *
pa_output_cbranch(rtx * operands,int negated,rtx_insn * insn)6558 pa_output_cbranch (rtx *operands, int negated, rtx_insn *insn)
6559 {
6560   static char buf[100];
6561   bool useskip;
6562   int nullify = INSN_ANNULLED_BRANCH_P (insn);
6563   int length = get_attr_length (insn);
6564   int xdelay;
6565 
6566   /* A conditional branch to the following instruction (e.g. the delay slot)
6567      is asking for a disaster.  This can happen when not optimizing and
6568      when jump optimization fails.
6569 
6570      While it is usually safe to emit nothing, this can fail if the
6571      preceding instruction is a nullified branch with an empty delay
6572      slot and the same branch target as this branch.  We could check
6573      for this but jump optimization should eliminate nop jumps.  It
6574      is always safe to emit a nop.  */
6575   if (branch_to_delay_slot_p (insn))
6576     return "nop";
6577 
6578   /* The doubleword form of the cmpib instruction doesn't have the LEU
6579      and GTU conditions while the cmpb instruction does.  Since we accept
6580      zero for cmpb, we must ensure that we use cmpb for the comparison.  */
6581   if (GET_MODE (operands[1]) == DImode && operands[2] == const0_rtx)
6582     operands[2] = gen_rtx_REG (DImode, 0);
6583   if (GET_MODE (operands[2]) == DImode && operands[1] == const0_rtx)
6584     operands[1] = gen_rtx_REG (DImode, 0);
6585 
6586   /* If this is a long branch with its delay slot unfilled, set `nullify'
6587      as it can nullify the delay slot and save a nop.  */
6588   if (length == 8 && dbr_sequence_length () == 0)
6589     nullify = 1;
6590 
6591   /* If this is a short forward conditional branch which did not get
6592      its delay slot filled, the delay slot can still be nullified.  */
6593   if (! nullify && length == 4 && dbr_sequence_length () == 0)
6594     nullify = forward_branch_p (insn);
6595 
6596   /* A forward branch over a single nullified insn can be done with a
6597      comclr instruction.  This avoids a single cycle penalty due to
6598      mis-predicted branch if we fall through (branch not taken).  */
6599   useskip = (length == 4 && nullify) ? use_skip_p (insn) : FALSE;
6600 
6601   switch (length)
6602     {
6603       /* All short conditional branches except backwards with an unfilled
6604 	 delay slot.  */
6605       case 4:
6606 	if (useskip)
6607 	  strcpy (buf, "{com%I2clr,|cmp%I2clr,}");
6608 	else
6609 	  strcpy (buf, "{com%I2b,|cmp%I2b,}");
6610 	if (GET_MODE (operands[1]) == DImode)
6611 	  strcat (buf, "*");
6612 	if (negated)
6613 	  strcat (buf, "%B3");
6614 	else
6615 	  strcat (buf, "%S3");
6616 	if (useskip)
6617 	  strcat (buf, " %2,%r1,%%r0");
6618 	else if (nullify)
6619 	  {
6620 	    if (branch_needs_nop_p (insn))
6621 	      strcat (buf, ",n %2,%r1,%0%#");
6622 	    else
6623 	      strcat (buf, ",n %2,%r1,%0");
6624 	  }
6625 	else
6626 	  strcat (buf, " %2,%r1,%0");
6627 	break;
6628 
6629      /* All long conditionals.  Note a short backward branch with an
6630 	unfilled delay slot is treated just like a long backward branch
6631 	with an unfilled delay slot.  */
6632       case 8:
6633 	/* Handle weird backwards branch with a filled delay slot
6634 	   which is nullified.  */
6635 	if (dbr_sequence_length () != 0
6636 	    && ! forward_branch_p (insn)
6637 	    && nullify)
6638 	  {
6639 	    strcpy (buf, "{com%I2b,|cmp%I2b,}");
6640 	    if (GET_MODE (operands[1]) == DImode)
6641 	      strcat (buf, "*");
6642 	    if (negated)
6643 	      strcat (buf, "%S3");
6644 	    else
6645 	      strcat (buf, "%B3");
6646 	    strcat (buf, ",n %2,%r1,.+12\n\tb %0");
6647 	  }
6648 	/* Handle short backwards branch with an unfilled delay slot.
6649 	   Using a comb;nop rather than comiclr;bl saves 1 cycle for both
6650 	   taken and untaken branches.  */
6651 	else if (dbr_sequence_length () == 0
6652 		 && ! forward_branch_p (insn)
6653 		 && INSN_ADDRESSES_SET_P ()
6654 		 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6655 				    - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6656 	  {
6657 	    strcpy (buf, "{com%I2b,|cmp%I2b,}");
6658 	    if (GET_MODE (operands[1]) == DImode)
6659 	      strcat (buf, "*");
6660 	    if (negated)
6661 	      strcat (buf, "%B3 %2,%r1,%0%#");
6662 	    else
6663 	      strcat (buf, "%S3 %2,%r1,%0%#");
6664 	  }
6665 	else
6666 	  {
6667 	    strcpy (buf, "{com%I2clr,|cmp%I2clr,}");
6668 	    if (GET_MODE (operands[1]) == DImode)
6669 	      strcat (buf, "*");
6670 	    if (negated)
6671 	      strcat (buf, "%S3");
6672 	    else
6673 	      strcat (buf, "%B3");
6674 	    if (nullify)
6675 	      strcat (buf, " %2,%r1,%%r0\n\tb,n %0");
6676 	    else
6677 	      strcat (buf, " %2,%r1,%%r0\n\tb %0");
6678 	  }
6679 	break;
6680 
6681       default:
6682 	/* The reversed conditional branch must branch over one additional
6683 	   instruction if the delay slot is filled and needs to be extracted
6684 	   by pa_output_lbranch.  If the delay slot is empty or this is a
6685 	   nullified forward branch, the instruction after the reversed
6686 	   condition branch must be nullified.  */
6687 	if (dbr_sequence_length () == 0
6688 	    || (nullify && forward_branch_p (insn)))
6689 	  {
6690 	    nullify = 1;
6691 	    xdelay = 0;
6692 	    operands[4] = GEN_INT (length);
6693 	  }
6694 	else
6695 	  {
6696 	    xdelay = 1;
6697 	    operands[4] = GEN_INT (length + 4);
6698 	  }
6699 
6700 	/* Create a reversed conditional branch which branches around
6701 	   the following insns.  */
6702 	if (GET_MODE (operands[1]) != DImode)
6703 	  {
6704 	    if (nullify)
6705 	      {
6706 		if (negated)
6707 		  strcpy (buf,
6708 		    "{com%I2b,%S3,n %2,%r1,.+%4|cmp%I2b,%S3,n %2,%r1,.+%4}");
6709 		else
6710 		  strcpy (buf,
6711 		    "{com%I2b,%B3,n %2,%r1,.+%4|cmp%I2b,%B3,n %2,%r1,.+%4}");
6712 	      }
6713 	    else
6714 	      {
6715 		if (negated)
6716 		  strcpy (buf,
6717 		    "{com%I2b,%S3 %2,%r1,.+%4|cmp%I2b,%S3 %2,%r1,.+%4}");
6718 		else
6719 		  strcpy (buf,
6720 		    "{com%I2b,%B3 %2,%r1,.+%4|cmp%I2b,%B3 %2,%r1,.+%4}");
6721 	      }
6722 	  }
6723 	else
6724 	  {
6725 	    if (nullify)
6726 	      {
6727 		if (negated)
6728 		  strcpy (buf,
6729 		    "{com%I2b,*%S3,n %2,%r1,.+%4|cmp%I2b,*%S3,n %2,%r1,.+%4}");
6730 		else
6731 		  strcpy (buf,
6732 		    "{com%I2b,*%B3,n %2,%r1,.+%4|cmp%I2b,*%B3,n %2,%r1,.+%4}");
6733 	      }
6734 	    else
6735 	      {
6736 		if (negated)
6737 		  strcpy (buf,
6738 		    "{com%I2b,*%S3 %2,%r1,.+%4|cmp%I2b,*%S3 %2,%r1,.+%4}");
6739 		else
6740 		  strcpy (buf,
6741 		    "{com%I2b,*%B3 %2,%r1,.+%4|cmp%I2b,*%B3 %2,%r1,.+%4}");
6742 	      }
6743 	  }
6744 
6745 	output_asm_insn (buf, operands);
6746 	return pa_output_lbranch (operands[0], insn, xdelay);
6747     }
6748   return buf;
6749 }
6750 
6751 /* Output a PIC pc-relative instruction sequence to load the address of
6752    OPERANDS[0] to register OPERANDS[2].  OPERANDS[0] is a symbol ref
6753    or a code label.  OPERANDS[1] specifies the register to use to load
6754    the program counter.  OPERANDS[3] may be used for label generation
6755    The sequence is always three instructions in length.  The program
6756    counter recorded for PA 1.X is eight bytes more than that for PA 2.0.
6757    Register %r1 is clobbered.  */
6758 
6759 static void
pa_output_pic_pcrel_sequence(rtx * operands)6760 pa_output_pic_pcrel_sequence (rtx *operands)
6761 {
6762   gcc_assert (SYMBOL_REF_P (operands[0]) || LABEL_P (operands[0]));
6763   if (TARGET_PA_20)
6764     {
6765       /* We can use mfia to determine the current program counter.  */
6766       if (TARGET_SOM || !TARGET_GAS)
6767 	{
6768 	  operands[3] = gen_label_rtx ();
6769 	  targetm.asm_out.internal_label (asm_out_file, "L",
6770 					  CODE_LABEL_NUMBER (operands[3]));
6771 	  output_asm_insn ("mfia %1", operands);
6772 	  output_asm_insn ("addil L'%0-%l3,%1", operands);
6773 	  output_asm_insn ("ldo R'%0-%l3(%%r1),%2", operands);
6774 	}
6775       else
6776 	{
6777 	  output_asm_insn ("mfia %1", operands);
6778 	  output_asm_insn ("addil L'%0-$PIC_pcrel$0+12,%1", operands);
6779 	  output_asm_insn ("ldo R'%0-$PIC_pcrel$0+16(%%r1),%2", operands);
6780 	}
6781     }
6782   else
6783     {
6784       /* We need to use a branch to determine the current program counter.  */
6785       output_asm_insn ("{bl|b,l} .+8,%1", operands);
6786       if (TARGET_SOM || !TARGET_GAS)
6787 	{
6788 	  operands[3] = gen_label_rtx ();
6789 	  output_asm_insn ("addil L'%0-%l3,%1", operands);
6790 	  targetm.asm_out.internal_label (asm_out_file, "L",
6791 					  CODE_LABEL_NUMBER (operands[3]));
6792 	  output_asm_insn ("ldo R'%0-%l3(%%r1),%2", operands);
6793 	}
6794       else
6795 	{
6796 	  output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%1", operands);
6797 	  output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%2", operands);
6798 	}
6799     }
6800 }
6801 
6802 /* This routine handles output of long unconditional branches that
6803    exceed the maximum range of a simple branch instruction.  Since
6804    we don't have a register available for the branch, we save register
6805    %r1 in the frame marker, load the branch destination DEST into %r1,
6806    execute the branch, and restore %r1 in the delay slot of the branch.
6807 
6808    Since long branches may have an insn in the delay slot and the
6809    delay slot is used to restore %r1, we in general need to extract
6810    this insn and execute it before the branch.  However, to facilitate
6811    use of this function by conditional branches, we also provide an
6812    option to not extract the delay insn so that it will be emitted
6813    after the long branch.  So, if there is an insn in the delay slot,
6814    it is extracted if XDELAY is nonzero.
6815 
6816    The lengths of the various long-branch sequences are 20, 16 and 24
6817    bytes for the portable runtime, non-PIC and PIC cases, respectively.  */
6818 
6819 const char *
pa_output_lbranch(rtx dest,rtx_insn * insn,int xdelay)6820 pa_output_lbranch (rtx dest, rtx_insn *insn, int xdelay)
6821 {
6822   rtx xoperands[4];
6823 
6824   xoperands[0] = dest;
6825 
6826   /* First, free up the delay slot.  */
6827   if (xdelay && dbr_sequence_length () != 0)
6828     {
6829       /* We can't handle a jump in the delay slot.  */
6830       gcc_assert (! JUMP_P (NEXT_INSN (insn)));
6831 
6832       final_scan_insn (NEXT_INSN (insn), asm_out_file,
6833 		       optimize, 0, NULL);
6834 
6835       /* Now delete the delay insn.  */
6836       SET_INSN_DELETED (NEXT_INSN (insn));
6837     }
6838 
6839   /* Output an insn to save %r1.  The runtime documentation doesn't
6840      specify whether the "Clean Up" slot in the callers frame can
6841      be clobbered by the callee.  It isn't copied by HP's builtin
6842      alloca, so this suggests that it can be clobbered if necessary.
6843      The "Static Link" location is copied by HP builtin alloca, so
6844      we avoid using it.  Using the cleanup slot might be a problem
6845      if we have to interoperate with languages that pass cleanup
6846      information.  However, it should be possible to handle these
6847      situations with GCC's asm feature.
6848 
6849      The "Current RP" slot is reserved for the called procedure, so
6850      we try to use it when we don't have a frame of our own.  It's
6851      rather unlikely that we won't have a frame when we need to emit
6852      a very long branch.
6853 
6854      Really the way to go long term is a register scavenger; goto
6855      the target of the jump and find a register which we can use
6856      as a scratch to hold the value in %r1.  Then, we wouldn't have
6857      to free up the delay slot or clobber a slot that may be needed
6858      for other purposes.  */
6859   if (TARGET_64BIT)
6860     {
6861       if (actual_fsize == 0 && !df_regs_ever_live_p (2))
6862 	/* Use the return pointer slot in the frame marker.  */
6863 	output_asm_insn ("std %%r1,-16(%%r30)", xoperands);
6864       else
6865 	/* Use the slot at -40 in the frame marker since HP builtin
6866 	   alloca doesn't copy it.  */
6867 	output_asm_insn ("std %%r1,-40(%%r30)", xoperands);
6868     }
6869   else
6870     {
6871       if (actual_fsize == 0 && !df_regs_ever_live_p (2))
6872 	/* Use the return pointer slot in the frame marker.  */
6873 	output_asm_insn ("stw %%r1,-20(%%r30)", xoperands);
6874       else
6875 	/* Use the "Clean Up" slot in the frame marker.  In GCC,
6876 	   the only other use of this location is for copying a
6877 	   floating point double argument from a floating-point
6878 	   register to two general registers.  The copy is done
6879 	   as an "atomic" operation when outputting a call, so it
6880 	   won't interfere with our using the location here.  */
6881 	output_asm_insn ("stw %%r1,-12(%%r30)", xoperands);
6882     }
6883 
6884   if (TARGET_PORTABLE_RUNTIME)
6885     {
6886       output_asm_insn ("ldil L'%0,%%r1", xoperands);
6887       output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands);
6888       output_asm_insn ("bv %%r0(%%r1)", xoperands);
6889     }
6890   else if (flag_pic)
6891     {
6892       xoperands[1] = gen_rtx_REG (Pmode, 1);
6893       xoperands[2] = xoperands[1];
6894       pa_output_pic_pcrel_sequence (xoperands);
6895       output_asm_insn ("bv %%r0(%%r1)", xoperands);
6896     }
6897   else
6898     /* Now output a very long branch to the original target.  */
6899     output_asm_insn ("ldil L'%l0,%%r1\n\tbe R'%l0(%%sr4,%%r1)", xoperands);
6900 
6901   /* Now restore the value of %r1 in the delay slot.  */
6902   if (TARGET_64BIT)
6903     {
6904       if (actual_fsize == 0 && !df_regs_ever_live_p (2))
6905 	return "ldd -16(%%r30),%%r1";
6906       else
6907 	return "ldd -40(%%r30),%%r1";
6908     }
6909   else
6910     {
6911       if (actual_fsize == 0 && !df_regs_ever_live_p (2))
6912 	return "ldw -20(%%r30),%%r1";
6913       else
6914 	return "ldw -12(%%r30),%%r1";
6915     }
6916 }
6917 
6918 /* This routine handles all the branch-on-bit conditional branch sequences we
6919    might need to generate.  It handles nullification of delay slots,
6920    varying length branches, negated branches and all combinations of the
6921    above.  it returns the appropriate output template to emit the branch.  */
6922 
6923 const char *
pa_output_bb(rtx * operands ATTRIBUTE_UNUSED,int negated,rtx_insn * insn,int which)6924 pa_output_bb (rtx *operands ATTRIBUTE_UNUSED, int negated, rtx_insn *insn, int which)
6925 {
6926   static char buf[100];
6927   bool useskip;
6928   int nullify = INSN_ANNULLED_BRANCH_P (insn);
6929   int length = get_attr_length (insn);
6930   int xdelay;
6931 
6932   /* A conditional branch to the following instruction (e.g. the delay slot) is
6933      asking for a disaster.  I do not think this can happen as this pattern
6934      is only used when optimizing; jump optimization should eliminate the
6935      jump.  But be prepared just in case.  */
6936 
6937   if (branch_to_delay_slot_p (insn))
6938     return "nop";
6939 
6940   /* If this is a long branch with its delay slot unfilled, set `nullify'
6941      as it can nullify the delay slot and save a nop.  */
6942   if (length == 8 && dbr_sequence_length () == 0)
6943     nullify = 1;
6944 
6945   /* If this is a short forward conditional branch which did not get
6946      its delay slot filled, the delay slot can still be nullified.  */
6947   if (! nullify && length == 4 && dbr_sequence_length () == 0)
6948     nullify = forward_branch_p (insn);
6949 
6950   /* A forward branch over a single nullified insn can be done with a
6951      extrs instruction.  This avoids a single cycle penalty due to
6952      mis-predicted branch if we fall through (branch not taken).  */
6953   useskip = (length == 4 && nullify) ? use_skip_p (insn) : FALSE;
6954 
6955   switch (length)
6956     {
6957 
6958       /* All short conditional branches except backwards with an unfilled
6959 	 delay slot.  */
6960       case 4:
6961 	if (useskip)
6962 	  strcpy (buf, "{extrs,|extrw,s,}");
6963 	else
6964 	  strcpy (buf, "bb,");
6965 	if (useskip && GET_MODE (operands[0]) == DImode)
6966 	  strcpy (buf, "extrd,s,*");
6967 	else if (GET_MODE (operands[0]) == DImode)
6968 	  strcpy (buf, "bb,*");
6969 	if ((which == 0 && negated)
6970 	     || (which == 1 && ! negated))
6971 	  strcat (buf, ">=");
6972 	else
6973 	  strcat (buf, "<");
6974 	if (useskip)
6975 	  strcat (buf, " %0,%1,1,%%r0");
6976 	else if (nullify && negated)
6977 	  {
6978 	    if (branch_needs_nop_p (insn))
6979 	      strcat (buf, ",n %0,%1,%3%#");
6980 	    else
6981 	      strcat (buf, ",n %0,%1,%3");
6982 	  }
6983 	else if (nullify && ! negated)
6984 	  {
6985 	    if (branch_needs_nop_p (insn))
6986 	      strcat (buf, ",n %0,%1,%2%#");
6987 	    else
6988 	      strcat (buf, ",n %0,%1,%2");
6989 	  }
6990 	else if (! nullify && negated)
6991 	  strcat (buf, " %0,%1,%3");
6992 	else if (! nullify && ! negated)
6993 	  strcat (buf, " %0,%1,%2");
6994 	break;
6995 
6996      /* All long conditionals.  Note a short backward branch with an
6997 	unfilled delay slot is treated just like a long backward branch
6998 	with an unfilled delay slot.  */
6999       case 8:
7000 	/* Handle weird backwards branch with a filled delay slot
7001 	   which is nullified.  */
7002 	if (dbr_sequence_length () != 0
7003 	    && ! forward_branch_p (insn)
7004 	    && nullify)
7005 	  {
7006 	    strcpy (buf, "bb,");
7007 	    if (GET_MODE (operands[0]) == DImode)
7008 	      strcat (buf, "*");
7009 	    if ((which == 0 && negated)
7010 		|| (which == 1 && ! negated))
7011 	      strcat (buf, "<");
7012 	    else
7013 	      strcat (buf, ">=");
7014 	    if (negated)
7015 	      strcat (buf, ",n %0,%1,.+12\n\tb %3");
7016 	    else
7017 	      strcat (buf, ",n %0,%1,.+12\n\tb %2");
7018 	  }
7019 	/* Handle short backwards branch with an unfilled delay slot.
7020 	   Using a bb;nop rather than extrs;bl saves 1 cycle for both
7021 	   taken and untaken branches.  */
7022 	else if (dbr_sequence_length () == 0
7023 		 && ! forward_branch_p (insn)
7024 		 && INSN_ADDRESSES_SET_P ()
7025 		 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
7026 				    - INSN_ADDRESSES (INSN_UID (insn)) - 8))
7027 	  {
7028 	    strcpy (buf, "bb,");
7029 	    if (GET_MODE (operands[0]) == DImode)
7030 	      strcat (buf, "*");
7031 	    if ((which == 0 && negated)
7032 		|| (which == 1 && ! negated))
7033 	      strcat (buf, ">=");
7034 	    else
7035 	      strcat (buf, "<");
7036 	    if (negated)
7037 	      strcat (buf, " %0,%1,%3%#");
7038 	    else
7039 	      strcat (buf, " %0,%1,%2%#");
7040 	  }
7041 	else
7042 	  {
7043 	    if (GET_MODE (operands[0]) == DImode)
7044 	      strcpy (buf, "extrd,s,*");
7045 	    else
7046 	      strcpy (buf, "{extrs,|extrw,s,}");
7047 	    if ((which == 0 && negated)
7048 		|| (which == 1 && ! negated))
7049 	      strcat (buf, "<");
7050 	    else
7051 	      strcat (buf, ">=");
7052 	    if (nullify && negated)
7053 	      strcat (buf, " %0,%1,1,%%r0\n\tb,n %3");
7054 	    else if (nullify && ! negated)
7055 	      strcat (buf, " %0,%1,1,%%r0\n\tb,n %2");
7056 	    else if (negated)
7057 	      strcat (buf, " %0,%1,1,%%r0\n\tb %3");
7058 	    else
7059 	      strcat (buf, " %0,%1,1,%%r0\n\tb %2");
7060 	  }
7061 	break;
7062 
7063       default:
7064 	/* The reversed conditional branch must branch over one additional
7065 	   instruction if the delay slot is filled and needs to be extracted
7066 	   by pa_output_lbranch.  If the delay slot is empty or this is a
7067 	   nullified forward branch, the instruction after the reversed
7068 	   condition branch must be nullified.  */
7069 	if (dbr_sequence_length () == 0
7070 	    || (nullify && forward_branch_p (insn)))
7071 	  {
7072 	    nullify = 1;
7073 	    xdelay = 0;
7074 	    operands[4] = GEN_INT (length);
7075 	  }
7076 	else
7077 	  {
7078 	    xdelay = 1;
7079 	    operands[4] = GEN_INT (length + 4);
7080 	  }
7081 
7082 	if (GET_MODE (operands[0]) == DImode)
7083 	  strcpy (buf, "bb,*");
7084 	else
7085 	  strcpy (buf, "bb,");
7086 	if ((which == 0 && negated)
7087 	    || (which == 1 && !negated))
7088 	  strcat (buf, "<");
7089 	else
7090 	  strcat (buf, ">=");
7091 	if (nullify)
7092 	  strcat (buf, ",n %0,%1,.+%4");
7093 	else
7094 	  strcat (buf, " %0,%1,.+%4");
7095 	output_asm_insn (buf, operands);
7096 	return pa_output_lbranch (negated ? operands[3] : operands[2],
7097 				  insn, xdelay);
7098     }
7099   return buf;
7100 }
7101 
7102 /* This routine handles all the branch-on-variable-bit conditional branch
7103    sequences we might need to generate.  It handles nullification of delay
7104    slots, varying length branches, negated branches and all combinations
7105    of the above.  it returns the appropriate output template to emit the
7106    branch.  */
7107 
7108 const char *
pa_output_bvb(rtx * operands ATTRIBUTE_UNUSED,int negated,rtx_insn * insn,int which)7109 pa_output_bvb (rtx *operands ATTRIBUTE_UNUSED, int negated, rtx_insn *insn,
7110 	       int which)
7111 {
7112   static char buf[100];
7113   bool useskip;
7114   int nullify = INSN_ANNULLED_BRANCH_P (insn);
7115   int length = get_attr_length (insn);
7116   int xdelay;
7117 
7118   /* A conditional branch to the following instruction (e.g. the delay slot) is
7119      asking for a disaster.  I do not think this can happen as this pattern
7120      is only used when optimizing; jump optimization should eliminate the
7121      jump.  But be prepared just in case.  */
7122 
7123   if (branch_to_delay_slot_p (insn))
7124     return "nop";
7125 
7126   /* If this is a long branch with its delay slot unfilled, set `nullify'
7127      as it can nullify the delay slot and save a nop.  */
7128   if (length == 8 && dbr_sequence_length () == 0)
7129     nullify = 1;
7130 
7131   /* If this is a short forward conditional branch which did not get
7132      its delay slot filled, the delay slot can still be nullified.  */
7133   if (! nullify && length == 4 && dbr_sequence_length () == 0)
7134     nullify = forward_branch_p (insn);
7135 
7136   /* A forward branch over a single nullified insn can be done with a
7137      extrs instruction.  This avoids a single cycle penalty due to
7138      mis-predicted branch if we fall through (branch not taken).  */
7139   useskip = (length == 4 && nullify) ? use_skip_p (insn) : FALSE;
7140 
7141   switch (length)
7142     {
7143 
7144       /* All short conditional branches except backwards with an unfilled
7145 	 delay slot.  */
7146       case 4:
7147 	if (useskip)
7148 	  strcpy (buf, "{vextrs,|extrw,s,}");
7149 	else
7150 	  strcpy (buf, "{bvb,|bb,}");
7151 	if (useskip && GET_MODE (operands[0]) == DImode)
7152 	  strcpy (buf, "extrd,s,*");
7153 	else if (GET_MODE (operands[0]) == DImode)
7154 	  strcpy (buf, "bb,*");
7155 	if ((which == 0 && negated)
7156 	     || (which == 1 && ! negated))
7157 	  strcat (buf, ">=");
7158 	else
7159 	  strcat (buf, "<");
7160 	if (useskip)
7161 	  strcat (buf, "{ %0,1,%%r0| %0,%%sar,1,%%r0}");
7162 	else if (nullify && negated)
7163 	  {
7164 	    if (branch_needs_nop_p (insn))
7165 	      strcat (buf, "{,n %0,%3%#|,n %0,%%sar,%3%#}");
7166 	    else
7167 	      strcat (buf, "{,n %0,%3|,n %0,%%sar,%3}");
7168 	  }
7169 	else if (nullify && ! negated)
7170 	  {
7171 	    if (branch_needs_nop_p (insn))
7172 	      strcat (buf, "{,n %0,%2%#|,n %0,%%sar,%2%#}");
7173 	    else
7174 	      strcat (buf, "{,n %0,%2|,n %0,%%sar,%2}");
7175 	  }
7176 	else if (! nullify && negated)
7177 	  strcat (buf, "{ %0,%3| %0,%%sar,%3}");
7178 	else if (! nullify && ! negated)
7179 	  strcat (buf, "{ %0,%2| %0,%%sar,%2}");
7180 	break;
7181 
7182      /* All long conditionals.  Note a short backward branch with an
7183 	unfilled delay slot is treated just like a long backward branch
7184 	with an unfilled delay slot.  */
7185       case 8:
7186 	/* Handle weird backwards branch with a filled delay slot
7187 	   which is nullified.  */
7188 	if (dbr_sequence_length () != 0
7189 	    && ! forward_branch_p (insn)
7190 	    && nullify)
7191 	  {
7192 	    strcpy (buf, "{bvb,|bb,}");
7193 	    if (GET_MODE (operands[0]) == DImode)
7194 	      strcat (buf, "*");
7195 	    if ((which == 0 && negated)
7196 		|| (which == 1 && ! negated))
7197 	      strcat (buf, "<");
7198 	    else
7199 	      strcat (buf, ">=");
7200 	    if (negated)
7201 	      strcat (buf, "{,n %0,.+12\n\tb %3|,n %0,%%sar,.+12\n\tb %3}");
7202 	    else
7203 	      strcat (buf, "{,n %0,.+12\n\tb %2|,n %0,%%sar,.+12\n\tb %2}");
7204 	  }
7205 	/* Handle short backwards branch with an unfilled delay slot.
7206 	   Using a bb;nop rather than extrs;bl saves 1 cycle for both
7207 	   taken and untaken branches.  */
7208 	else if (dbr_sequence_length () == 0
7209 		 && ! forward_branch_p (insn)
7210 		 && INSN_ADDRESSES_SET_P ()
7211 		 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
7212 				    - INSN_ADDRESSES (INSN_UID (insn)) - 8))
7213 	  {
7214 	    strcpy (buf, "{bvb,|bb,}");
7215 	    if (GET_MODE (operands[0]) == DImode)
7216 	      strcat (buf, "*");
7217 	    if ((which == 0 && negated)
7218 		|| (which == 1 && ! negated))
7219 	      strcat (buf, ">=");
7220 	    else
7221 	      strcat (buf, "<");
7222 	    if (negated)
7223 	      strcat (buf, "{ %0,%3%#| %0,%%sar,%3%#}");
7224 	    else
7225 	      strcat (buf, "{ %0,%2%#| %0,%%sar,%2%#}");
7226 	  }
7227 	else
7228 	  {
7229 	    strcpy (buf, "{vextrs,|extrw,s,}");
7230 	    if (GET_MODE (operands[0]) == DImode)
7231 	      strcpy (buf, "extrd,s,*");
7232 	    if ((which == 0 && negated)
7233 		|| (which == 1 && ! negated))
7234 	      strcat (buf, "<");
7235 	    else
7236 	      strcat (buf, ">=");
7237 	    if (nullify && negated)
7238 	      strcat (buf, "{ %0,1,%%r0\n\tb,n %3| %0,%%sar,1,%%r0\n\tb,n %3}");
7239 	    else if (nullify && ! negated)
7240 	      strcat (buf, "{ %0,1,%%r0\n\tb,n %2| %0,%%sar,1,%%r0\n\tb,n %2}");
7241 	    else if (negated)
7242 	      strcat (buf, "{ %0,1,%%r0\n\tb %3| %0,%%sar,1,%%r0\n\tb %3}");
7243 	    else
7244 	      strcat (buf, "{ %0,1,%%r0\n\tb %2| %0,%%sar,1,%%r0\n\tb %2}");
7245 	  }
7246 	break;
7247 
7248       default:
7249 	/* The reversed conditional branch must branch over one additional
7250 	   instruction if the delay slot is filled and needs to be extracted
7251 	   by pa_output_lbranch.  If the delay slot is empty or this is a
7252 	   nullified forward branch, the instruction after the reversed
7253 	   condition branch must be nullified.  */
7254 	if (dbr_sequence_length () == 0
7255 	    || (nullify && forward_branch_p (insn)))
7256 	  {
7257 	    nullify = 1;
7258 	    xdelay = 0;
7259 	    operands[4] = GEN_INT (length);
7260 	  }
7261 	else
7262 	  {
7263 	    xdelay = 1;
7264 	    operands[4] = GEN_INT (length + 4);
7265 	  }
7266 
7267 	if (GET_MODE (operands[0]) == DImode)
7268 	  strcpy (buf, "bb,*");
7269 	else
7270 	  strcpy (buf, "{bvb,|bb,}");
7271 	if ((which == 0 && negated)
7272 	    || (which == 1 && !negated))
7273 	  strcat (buf, "<");
7274 	else
7275 	  strcat (buf, ">=");
7276 	if (nullify)
7277 	  strcat (buf, ",n {%0,.+%4|%0,%%sar,.+%4}");
7278 	else
7279 	  strcat (buf, " {%0,.+%4|%0,%%sar,.+%4}");
7280 	output_asm_insn (buf, operands);
7281 	return pa_output_lbranch (negated ? operands[3] : operands[2],
7282 				  insn, xdelay);
7283     }
7284   return buf;
7285 }
7286 
7287 /* Return the output template for emitting a dbra type insn.
7288 
7289    Note it may perform some output operations on its own before
7290    returning the final output string.  */
7291 const char *
pa_output_dbra(rtx * operands,rtx_insn * insn,int which_alternative)7292 pa_output_dbra (rtx *operands, rtx_insn *insn, int which_alternative)
7293 {
7294   int length = get_attr_length (insn);
7295 
7296   /* A conditional branch to the following instruction (e.g. the delay slot) is
7297      asking for a disaster.  Be prepared!  */
7298 
7299   if (branch_to_delay_slot_p (insn))
7300     {
7301       if (which_alternative == 0)
7302 	return "ldo %1(%0),%0";
7303       else if (which_alternative == 1)
7304 	{
7305 	  output_asm_insn ("{fstws|fstw} %0,-16(%%r30)", operands);
7306 	  output_asm_insn ("ldw -16(%%r30),%4", operands);
7307 	  output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands);
7308 	  return "{fldws|fldw} -16(%%r30),%0";
7309 	}
7310       else
7311 	{
7312 	  output_asm_insn ("ldw %0,%4", operands);
7313 	  return "ldo %1(%4),%4\n\tstw %4,%0";
7314 	}
7315     }
7316 
7317   if (which_alternative == 0)
7318     {
7319       int nullify = INSN_ANNULLED_BRANCH_P (insn);
7320       int xdelay;
7321 
7322       /* If this is a long branch with its delay slot unfilled, set `nullify'
7323 	 as it can nullify the delay slot and save a nop.  */
7324       if (length == 8 && dbr_sequence_length () == 0)
7325 	nullify = 1;
7326 
7327       /* If this is a short forward conditional branch which did not get
7328 	 its delay slot filled, the delay slot can still be nullified.  */
7329       if (! nullify && length == 4 && dbr_sequence_length () == 0)
7330 	nullify = forward_branch_p (insn);
7331 
7332       switch (length)
7333 	{
7334 	case 4:
7335 	  if (nullify)
7336 	    {
7337 	      if (branch_needs_nop_p (insn))
7338 		return "addib,%C2,n %1,%0,%3%#";
7339 	      else
7340 		return "addib,%C2,n %1,%0,%3";
7341 	    }
7342 	  else
7343 	    return "addib,%C2 %1,%0,%3";
7344 
7345 	case 8:
7346 	  /* Handle weird backwards branch with a fulled delay slot
7347 	     which is nullified.  */
7348 	  if (dbr_sequence_length () != 0
7349 	      && ! forward_branch_p (insn)
7350 	      && nullify)
7351 	    return "addib,%N2,n %1,%0,.+12\n\tb %3";
7352 	  /* Handle short backwards branch with an unfilled delay slot.
7353 	     Using a addb;nop rather than addi;bl saves 1 cycle for both
7354 	     taken and untaken branches.  */
7355 	  else if (dbr_sequence_length () == 0
7356 		   && ! forward_branch_p (insn)
7357 		   && INSN_ADDRESSES_SET_P ()
7358 		   && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
7359 				      - INSN_ADDRESSES (INSN_UID (insn)) - 8))
7360 	      return "addib,%C2 %1,%0,%3%#";
7361 
7362 	  /* Handle normal cases.  */
7363 	  if (nullify)
7364 	    return "addi,%N2 %1,%0,%0\n\tb,n %3";
7365 	  else
7366 	    return "addi,%N2 %1,%0,%0\n\tb %3";
7367 
7368 	default:
7369 	  /* The reversed conditional branch must branch over one additional
7370 	     instruction if the delay slot is filled and needs to be extracted
7371 	     by pa_output_lbranch.  If the delay slot is empty or this is a
7372 	     nullified forward branch, the instruction after the reversed
7373 	     condition branch must be nullified.  */
7374 	  if (dbr_sequence_length () == 0
7375 	      || (nullify && forward_branch_p (insn)))
7376 	    {
7377 	      nullify = 1;
7378 	      xdelay = 0;
7379 	      operands[4] = GEN_INT (length);
7380 	    }
7381 	  else
7382 	    {
7383 	      xdelay = 1;
7384 	      operands[4] = GEN_INT (length + 4);
7385 	    }
7386 
7387 	  if (nullify)
7388 	    output_asm_insn ("addib,%N2,n %1,%0,.+%4", operands);
7389 	  else
7390 	    output_asm_insn ("addib,%N2 %1,%0,.+%4", operands);
7391 
7392 	  return pa_output_lbranch (operands[3], insn, xdelay);
7393 	}
7394 
7395     }
7396   /* Deal with gross reload from FP register case.  */
7397   else if (which_alternative == 1)
7398     {
7399       /* Move loop counter from FP register to MEM then into a GR,
7400 	 increment the GR, store the GR into MEM, and finally reload
7401 	 the FP register from MEM from within the branch's delay slot.  */
7402       output_asm_insn ("{fstws|fstw} %0,-16(%%r30)\n\tldw -16(%%r30),%4",
7403 		       operands);
7404       output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands);
7405       if (length == 24)
7406 	return "{comb|cmpb},%S2 %%r0,%4,%3\n\t{fldws|fldw} -16(%%r30),%0";
7407       else if (length == 28)
7408 	return "{comclr|cmpclr},%B2 %%r0,%4,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
7409       else
7410 	{
7411 	  operands[5] = GEN_INT (length - 16);
7412 	  output_asm_insn ("{comb|cmpb},%B2 %%r0,%4,.+%5", operands);
7413 	  output_asm_insn ("{fldws|fldw} -16(%%r30),%0", operands);
7414 	  return pa_output_lbranch (operands[3], insn, 0);
7415 	}
7416     }
7417   /* Deal with gross reload from memory case.  */
7418   else
7419     {
7420       /* Reload loop counter from memory, the store back to memory
7421 	 happens in the branch's delay slot.  */
7422       output_asm_insn ("ldw %0,%4", operands);
7423       if (length == 12)
7424 	return "addib,%C2 %1,%4,%3\n\tstw %4,%0";
7425       else if (length == 16)
7426 	return "addi,%N2 %1,%4,%4\n\tb %3\n\tstw %4,%0";
7427       else
7428 	{
7429 	  operands[5] = GEN_INT (length - 4);
7430 	  output_asm_insn ("addib,%N2 %1,%4,.+%5\n\tstw %4,%0", operands);
7431 	  return pa_output_lbranch (operands[3], insn, 0);
7432 	}
7433     }
7434 }
7435 
7436 /* Return the output template for emitting a movb type insn.
7437 
7438    Note it may perform some output operations on its own before
7439    returning the final output string.  */
7440 const char *
pa_output_movb(rtx * operands,rtx_insn * insn,int which_alternative,int reverse_comparison)7441 pa_output_movb (rtx *operands, rtx_insn *insn, int which_alternative,
7442 	     int reverse_comparison)
7443 {
7444   int length = get_attr_length (insn);
7445 
7446   /* A conditional branch to the following instruction (e.g. the delay slot) is
7447      asking for a disaster.  Be prepared!  */
7448 
7449   if (branch_to_delay_slot_p (insn))
7450     {
7451       if (which_alternative == 0)
7452 	return "copy %1,%0";
7453       else if (which_alternative == 1)
7454 	{
7455 	  output_asm_insn ("stw %1,-16(%%r30)", operands);
7456 	  return "{fldws|fldw} -16(%%r30),%0";
7457 	}
7458       else if (which_alternative == 2)
7459 	return "stw %1,%0";
7460       else
7461 	return "mtsar %r1";
7462     }
7463 
7464   /* Support the second variant.  */
7465   if (reverse_comparison)
7466     PUT_CODE (operands[2], reverse_condition (GET_CODE (operands[2])));
7467 
7468   if (which_alternative == 0)
7469     {
7470       int nullify = INSN_ANNULLED_BRANCH_P (insn);
7471       int xdelay;
7472 
7473       /* If this is a long branch with its delay slot unfilled, set `nullify'
7474 	 as it can nullify the delay slot and save a nop.  */
7475       if (length == 8 && dbr_sequence_length () == 0)
7476 	nullify = 1;
7477 
7478       /* If this is a short forward conditional branch which did not get
7479 	 its delay slot filled, the delay slot can still be nullified.  */
7480       if (! nullify && length == 4 && dbr_sequence_length () == 0)
7481 	nullify = forward_branch_p (insn);
7482 
7483       switch (length)
7484 	{
7485 	case 4:
7486 	  if (nullify)
7487 	    {
7488 	      if (branch_needs_nop_p (insn))
7489 		return "movb,%C2,n %1,%0,%3%#";
7490 	      else
7491 		return "movb,%C2,n %1,%0,%3";
7492 	    }
7493 	  else
7494 	    return "movb,%C2 %1,%0,%3";
7495 
7496 	case 8:
7497 	  /* Handle weird backwards branch with a filled delay slot
7498 	     which is nullified.  */
7499 	  if (dbr_sequence_length () != 0
7500 	      && ! forward_branch_p (insn)
7501 	      && nullify)
7502 	    return "movb,%N2,n %1,%0,.+12\n\tb %3";
7503 
7504 	  /* Handle short backwards branch with an unfilled delay slot.
7505 	     Using a movb;nop rather than or;bl saves 1 cycle for both
7506 	     taken and untaken branches.  */
7507 	  else if (dbr_sequence_length () == 0
7508 		   && ! forward_branch_p (insn)
7509 		   && INSN_ADDRESSES_SET_P ()
7510 		   && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
7511 				      - INSN_ADDRESSES (INSN_UID (insn)) - 8))
7512 	    return "movb,%C2 %1,%0,%3%#";
7513 	  /* Handle normal cases.  */
7514 	  if (nullify)
7515 	    return "or,%N2 %1,%%r0,%0\n\tb,n %3";
7516 	  else
7517 	    return "or,%N2 %1,%%r0,%0\n\tb %3";
7518 
7519 	default:
7520 	  /* The reversed conditional branch must branch over one additional
7521 	     instruction if the delay slot is filled and needs to be extracted
7522 	     by pa_output_lbranch.  If the delay slot is empty or this is a
7523 	     nullified forward branch, the instruction after the reversed
7524 	     condition branch must be nullified.  */
7525 	  if (dbr_sequence_length () == 0
7526 	      || (nullify && forward_branch_p (insn)))
7527 	    {
7528 	      nullify = 1;
7529 	      xdelay = 0;
7530 	      operands[4] = GEN_INT (length);
7531 	    }
7532 	  else
7533 	    {
7534 	      xdelay = 1;
7535 	      operands[4] = GEN_INT (length + 4);
7536 	    }
7537 
7538 	  if (nullify)
7539 	    output_asm_insn ("movb,%N2,n %1,%0,.+%4", operands);
7540 	  else
7541 	    output_asm_insn ("movb,%N2 %1,%0,.+%4", operands);
7542 
7543 	  return pa_output_lbranch (operands[3], insn, xdelay);
7544 	}
7545     }
7546   /* Deal with gross reload for FP destination register case.  */
7547   else if (which_alternative == 1)
7548     {
7549       /* Move source register to MEM, perform the branch test, then
7550 	 finally load the FP register from MEM from within the branch's
7551 	 delay slot.  */
7552       output_asm_insn ("stw %1,-16(%%r30)", operands);
7553       if (length == 12)
7554 	return "{comb|cmpb},%S2 %%r0,%1,%3\n\t{fldws|fldw} -16(%%r30),%0";
7555       else if (length == 16)
7556 	return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
7557       else
7558 	{
7559 	  operands[4] = GEN_INT (length - 4);
7560 	  output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4", operands);
7561 	  output_asm_insn ("{fldws|fldw} -16(%%r30),%0", operands);
7562 	  return pa_output_lbranch (operands[3], insn, 0);
7563 	}
7564     }
7565   /* Deal with gross reload from memory case.  */
7566   else if (which_alternative == 2)
7567     {
7568       /* Reload loop counter from memory, the store back to memory
7569 	 happens in the branch's delay slot.  */
7570       if (length == 8)
7571 	return "{comb|cmpb},%S2 %%r0,%1,%3\n\tstw %1,%0";
7572       else if (length == 12)
7573 	return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tstw %1,%0";
7574       else
7575 	{
7576 	  operands[4] = GEN_INT (length);
7577 	  output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4\n\tstw %1,%0",
7578 			   operands);
7579 	  return pa_output_lbranch (operands[3], insn, 0);
7580 	}
7581     }
7582   /* Handle SAR as a destination.  */
7583   else
7584     {
7585       if (length == 8)
7586 	return "{comb|cmpb},%S2 %%r0,%1,%3\n\tmtsar %r1";
7587       else if (length == 12)
7588 	return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tmtsar %r1";
7589       else
7590 	{
7591 	  operands[4] = GEN_INT (length);
7592 	  output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4\n\tmtsar %r1",
7593 			   operands);
7594 	  return pa_output_lbranch (operands[3], insn, 0);
7595 	}
7596     }
7597 }
7598 
7599 /* Copy any FP arguments in INSN into integer registers.  */
7600 static void
copy_fp_args(rtx_insn * insn)7601 copy_fp_args (rtx_insn *insn)
7602 {
7603   rtx link;
7604   rtx xoperands[2];
7605 
7606   for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
7607     {
7608       int arg_mode, regno;
7609       rtx use = XEXP (link, 0);
7610 
7611       if (! (GET_CODE (use) == USE
7612 	  && GET_CODE (XEXP (use, 0)) == REG
7613 	  && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
7614 	continue;
7615 
7616       arg_mode = GET_MODE (XEXP (use, 0));
7617       regno = REGNO (XEXP (use, 0));
7618 
7619       /* Is it a floating point register?  */
7620       if (regno >= 32 && regno <= 39)
7621 	{
7622 	  /* Copy the FP register into an integer register via memory.  */
7623 	  if (arg_mode == SFmode)
7624 	    {
7625 	      xoperands[0] = XEXP (use, 0);
7626 	      xoperands[1] = gen_rtx_REG (SImode, 26 - (regno - 32) / 2);
7627 	      output_asm_insn ("{fstws|fstw} %0,-16(%%sr0,%%r30)", xoperands);
7628 	      output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
7629 	    }
7630 	  else
7631 	    {
7632 	      xoperands[0] = XEXP (use, 0);
7633 	      xoperands[1] = gen_rtx_REG (DImode, 25 - (regno - 34) / 2);
7634 	      output_asm_insn ("{fstds|fstd} %0,-16(%%sr0,%%r30)", xoperands);
7635 	      output_asm_insn ("ldw -12(%%sr0,%%r30),%R1", xoperands);
7636 	      output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
7637 	    }
7638 	}
7639     }
7640 }
7641 
7642 /* Compute length of the FP argument copy sequence for INSN.  */
7643 static int
length_fp_args(rtx_insn * insn)7644 length_fp_args (rtx_insn *insn)
7645 {
7646   int length = 0;
7647   rtx link;
7648 
7649   for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
7650     {
7651       int arg_mode, regno;
7652       rtx use = XEXP (link, 0);
7653 
7654       if (! (GET_CODE (use) == USE
7655 	  && GET_CODE (XEXP (use, 0)) == REG
7656 	  && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
7657 	continue;
7658 
7659       arg_mode = GET_MODE (XEXP (use, 0));
7660       regno = REGNO (XEXP (use, 0));
7661 
7662       /* Is it a floating point register?  */
7663       if (regno >= 32 && regno <= 39)
7664 	{
7665 	  if (arg_mode == SFmode)
7666 	    length += 8;
7667 	  else
7668 	    length += 12;
7669 	}
7670     }
7671 
7672   return length;
7673 }
7674 
7675 /* Return the attribute length for the millicode call instruction INSN.
7676    The length must match the code generated by pa_output_millicode_call.
7677    We include the delay slot in the returned length as it is better to
7678    over estimate the length than to under estimate it.  */
7679 
7680 int
pa_attr_length_millicode_call(rtx_insn * insn)7681 pa_attr_length_millicode_call (rtx_insn *insn)
7682 {
7683   unsigned long distance = -1;
7684   unsigned long total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
7685 
7686   if (INSN_ADDRESSES_SET_P ())
7687     {
7688       distance = (total + insn_current_reference_address (insn));
7689       if (distance < total)
7690 	distance = -1;
7691     }
7692 
7693   if (TARGET_64BIT)
7694     {
7695       if (!TARGET_LONG_CALLS && distance < 7600000)
7696 	return 8;
7697 
7698       return 20;
7699     }
7700   else if (TARGET_PORTABLE_RUNTIME)
7701     return 24;
7702   else
7703     {
7704       if (!TARGET_LONG_CALLS && distance < MAX_PCREL17F_OFFSET)
7705 	return 8;
7706 
7707       if (!flag_pic)
7708 	return 12;
7709 
7710       return 24;
7711     }
7712 }
7713 
7714 /* INSN is a function call.
7715 
7716    CALL_DEST is the routine we are calling.  */
7717 
7718 const char *
pa_output_millicode_call(rtx_insn * insn,rtx call_dest)7719 pa_output_millicode_call (rtx_insn *insn, rtx call_dest)
7720 {
7721   int attr_length = get_attr_length (insn);
7722   int seq_length = dbr_sequence_length ();
7723   rtx xoperands[4];
7724 
7725   xoperands[0] = call_dest;
7726 
7727   /* Handle the common case where we are sure that the branch will
7728      reach the beginning of the $CODE$ subspace.  The within reach
7729      form of the $$sh_func_adrs call has a length of 28.  Because it
7730      has an attribute type of sh_func_adrs, it never has a nonzero
7731      sequence length (i.e., the delay slot is never filled).  */
7732   if (!TARGET_LONG_CALLS
7733       && (attr_length == 8
7734 	  || (attr_length == 28
7735 	      && get_attr_type (insn) == TYPE_SH_FUNC_ADRS)))
7736     {
7737       xoperands[1] = gen_rtx_REG (Pmode, TARGET_64BIT ? 2 : 31);
7738       output_asm_insn ("{bl|b,l} %0,%1", xoperands);
7739     }
7740   else
7741     {
7742       if (TARGET_64BIT)
7743 	{
7744 	  /* It might seem that one insn could be saved by accessing
7745 	     the millicode function using the linkage table.  However,
7746 	     this doesn't work in shared libraries and other dynamically
7747 	     loaded objects.  Using a pc-relative sequence also avoids
7748 	     problems related to the implicit use of the gp register.  */
7749 	  xoperands[1] = gen_rtx_REG (Pmode, 1);
7750 	  xoperands[2] = xoperands[1];
7751 	  pa_output_pic_pcrel_sequence (xoperands);
7752 	  output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
7753 	}
7754       else if (TARGET_PORTABLE_RUNTIME)
7755 	{
7756 	  /* Pure portable runtime doesn't allow be/ble; we also don't
7757 	     have PIC support in the assembler/linker, so this sequence
7758 	     is needed.  */
7759 
7760 	  /* Get the address of our target into %r1.  */
7761 	  output_asm_insn ("ldil L'%0,%%r1", xoperands);
7762 	  output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands);
7763 
7764 	  /* Get our return address into %r31.  */
7765 	  output_asm_insn ("{bl|b,l} .+8,%%r31", xoperands);
7766 	  output_asm_insn ("addi 8,%%r31,%%r31", xoperands);
7767 
7768 	  /* Jump to our target address in %r1.  */
7769 	  output_asm_insn ("bv %%r0(%%r1)", xoperands);
7770 	}
7771       else if (!flag_pic)
7772 	{
7773 	  output_asm_insn ("ldil L'%0,%%r1", xoperands);
7774 	  if (TARGET_PA_20)
7775 	    output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31", xoperands);
7776 	  else
7777 	    output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands);
7778 	}
7779       else
7780 	{
7781 	  xoperands[1] = gen_rtx_REG (Pmode, 31);
7782 	  xoperands[2] = gen_rtx_REG (Pmode, 1);
7783 	  pa_output_pic_pcrel_sequence (xoperands);
7784 
7785 	  /* Adjust return address.  */
7786 	  output_asm_insn ("ldo {16|24}(%%r31),%%r31", xoperands);
7787 
7788 	  /* Jump to our target address in %r1.  */
7789 	  output_asm_insn ("bv %%r0(%%r1)", xoperands);
7790 	}
7791     }
7792 
7793   if (seq_length == 0)
7794     output_asm_insn ("nop", xoperands);
7795 
7796   return "";
7797 }
7798 
7799 /* Return the attribute length of the call instruction INSN.  The SIBCALL
7800    flag indicates whether INSN is a regular call or a sibling call.  The
7801    length returned must be longer than the code actually generated by
7802    pa_output_call.  Since branch shortening is done before delay branch
7803    sequencing, there is no way to determine whether or not the delay
7804    slot will be filled during branch shortening.  Even when the delay
7805    slot is filled, we may have to add a nop if the delay slot contains
7806    a branch that can't reach its target.  Thus, we always have to include
7807    the delay slot in the length estimate.  This used to be done in
7808    pa_adjust_insn_length but we do it here now as some sequences always
7809    fill the delay slot and we can save four bytes in the estimate for
7810    these sequences.  */
7811 
7812 int
pa_attr_length_call(rtx_insn * insn,int sibcall)7813 pa_attr_length_call (rtx_insn *insn, int sibcall)
7814 {
7815   int local_call;
7816   rtx call, call_dest;
7817   tree call_decl;
7818   int length = 0;
7819   rtx pat = PATTERN (insn);
7820   unsigned long distance = -1;
7821 
7822   gcc_assert (CALL_P (insn));
7823 
7824   if (INSN_ADDRESSES_SET_P ())
7825     {
7826       unsigned long total;
7827 
7828       total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
7829       distance = (total + insn_current_reference_address (insn));
7830       if (distance < total)
7831 	distance = -1;
7832     }
7833 
7834   gcc_assert (GET_CODE (pat) == PARALLEL);
7835 
7836   /* Get the call rtx.  */
7837   call = XVECEXP (pat, 0, 0);
7838   if (GET_CODE (call) == SET)
7839     call = SET_SRC (call);
7840 
7841   gcc_assert (GET_CODE (call) == CALL);
7842 
7843   /* Determine if this is a local call.  */
7844   call_dest = XEXP (XEXP (call, 0), 0);
7845   call_decl = SYMBOL_REF_DECL (call_dest);
7846   local_call = call_decl && targetm.binds_local_p (call_decl);
7847 
7848   /* pc-relative branch.  */
7849   if (!TARGET_LONG_CALLS
7850       && ((TARGET_PA_20 && !sibcall && distance < 7600000)
7851 	  || distance < MAX_PCREL17F_OFFSET))
7852     length += 8;
7853 
7854   /* 64-bit plabel sequence.  */
7855   else if (TARGET_64BIT && !local_call)
7856     length += 24;
7857 
7858   /* non-pic long absolute branch sequence.  */
7859   else if ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
7860     length += 12;
7861 
7862   /* long pc-relative branch sequence.  */
7863   else if (TARGET_LONG_PIC_SDIFF_CALL
7864 	   || (TARGET_GAS && !TARGET_SOM && local_call))
7865     {
7866       length += 20;
7867 
7868       if (!TARGET_PA_20 && !TARGET_NO_SPACE_REGS && (!local_call || flag_pic))
7869 	length += 8;
7870     }
7871 
7872   /* 32-bit plabel sequence.  */
7873   else
7874     {
7875       length += 32;
7876 
7877       if (TARGET_SOM)
7878 	length += length_fp_args (insn);
7879 
7880       if (flag_pic)
7881 	length += 4;
7882 
7883       if (!TARGET_PA_20)
7884 	{
7885 	  if (!sibcall)
7886 	    length += 8;
7887 
7888 	  if (!TARGET_NO_SPACE_REGS && (!local_call || flag_pic))
7889 	    length += 8;
7890 	}
7891     }
7892 
7893   return length;
7894 }
7895 
7896 /* INSN is a function call.
7897 
7898    CALL_DEST is the routine we are calling.  */
7899 
7900 const char *
pa_output_call(rtx_insn * insn,rtx call_dest,int sibcall)7901 pa_output_call (rtx_insn *insn, rtx call_dest, int sibcall)
7902 {
7903   int seq_length = dbr_sequence_length ();
7904   tree call_decl = SYMBOL_REF_DECL (call_dest);
7905   int local_call = call_decl && targetm.binds_local_p (call_decl);
7906   rtx xoperands[4];
7907 
7908   xoperands[0] = call_dest;
7909 
7910   /* Handle the common case where we're sure that the branch will reach
7911      the beginning of the "$CODE$" subspace.  This is the beginning of
7912      the current function if we are in a named section.  */
7913   if (!TARGET_LONG_CALLS && pa_attr_length_call (insn, sibcall) == 8)
7914     {
7915       xoperands[1] = gen_rtx_REG (word_mode, sibcall ? 0 : 2);
7916       output_asm_insn ("{bl|b,l} %0,%1", xoperands);
7917     }
7918   else
7919     {
7920       if (TARGET_64BIT && !local_call)
7921 	{
7922 	  /* ??? As far as I can tell, the HP linker doesn't support the
7923 	     long pc-relative sequence described in the 64-bit runtime
7924 	     architecture.  So, we use a slightly longer indirect call.  */
7925 	  xoperands[0] = pa_get_deferred_plabel (call_dest);
7926 	  xoperands[1] = gen_label_rtx ();
7927 
7928 	  /* Put the load of %r27 into the delay slot.  We don't need to
7929 	     do anything when generating fast indirect calls.  */
7930 	  if (seq_length != 0)
7931 	    {
7932 	      final_scan_insn (NEXT_INSN (insn), asm_out_file,
7933 			       optimize, 0, NULL);
7934 
7935 	      /* Now delete the delay insn.  */
7936 	      SET_INSN_DELETED (NEXT_INSN (insn));
7937 	    }
7938 
7939 	  output_asm_insn ("addil LT'%0,%%r27", xoperands);
7940 	  output_asm_insn ("ldd RT'%0(%%r1),%%r1", xoperands);
7941 	  output_asm_insn ("ldd 0(%%r1),%%r1", xoperands);
7942 	  output_asm_insn ("ldd 16(%%r1),%%r2", xoperands);
7943 	  output_asm_insn ("bve,l (%%r2),%%r2", xoperands);
7944 	  output_asm_insn ("ldd 24(%%r1),%%r27", xoperands);
7945 	  seq_length = 1;
7946 	}
7947       else
7948 	{
7949 	  int indirect_call = 0;
7950 
7951 	  /* Emit a long call.  There are several different sequences
7952 	     of increasing length and complexity.  In most cases,
7953              they don't allow an instruction in the delay slot.  */
7954 	  if (!((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
7955 	      && !TARGET_LONG_PIC_SDIFF_CALL
7956 	      && !(TARGET_GAS && !TARGET_SOM && local_call)
7957 	      && !TARGET_64BIT)
7958 	    indirect_call = 1;
7959 
7960 	  if (seq_length != 0
7961 	      && !sibcall
7962 	      && (!TARGET_PA_20
7963 		  || indirect_call
7964 		  || ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)))
7965 	    {
7966 	      /* A non-jump insn in the delay slot.  By definition we can
7967 		 emit this insn before the call (and in fact before argument
7968 		 relocating.  */
7969 	      final_scan_insn (NEXT_INSN (insn), asm_out_file, optimize, 0,
7970 			       NULL);
7971 
7972 	      /* Now delete the delay insn.  */
7973 	      SET_INSN_DELETED (NEXT_INSN (insn));
7974 	      seq_length = 0;
7975 	    }
7976 
7977 	  if ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
7978 	    {
7979 	      /* This is the best sequence for making long calls in
7980 		 non-pic code.  Unfortunately, GNU ld doesn't provide
7981 		 the stub needed for external calls, and GAS's support
7982 		 for this with the SOM linker is buggy.  It is safe
7983 		 to use this for local calls.  */
7984 	      output_asm_insn ("ldil L'%0,%%r1", xoperands);
7985 	      if (sibcall)
7986 		output_asm_insn ("be R'%0(%%sr4,%%r1)", xoperands);
7987 	      else
7988 		{
7989 		  if (TARGET_PA_20)
7990 		    output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31",
7991 				     xoperands);
7992 		  else
7993 		    output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands);
7994 
7995 		  output_asm_insn ("copy %%r31,%%r2", xoperands);
7996 		  seq_length = 1;
7997 		}
7998 	    }
7999 	  else
8000 	    {
8001 	      /* The HP assembler and linker can handle relocations for
8002 		 the difference of two symbols.  The HP assembler
8003 		 recognizes the sequence as a pc-relative call and
8004 		 the linker provides stubs when needed.  */
8005 
8006 	      /* GAS currently can't generate the relocations that
8007 		 are needed for the SOM linker under HP-UX using this
8008 		 sequence.  The GNU linker doesn't generate the stubs
8009 		 that are needed for external calls on TARGET_ELF32
8010 		 with this sequence.  For now, we have to use a longer
8011 	         plabel sequence when using GAS for non local calls.  */
8012 	      if (TARGET_LONG_PIC_SDIFF_CALL
8013 		  || (TARGET_GAS && !TARGET_SOM && local_call))
8014 		{
8015 		  xoperands[1] = gen_rtx_REG (Pmode, 1);
8016 		  xoperands[2] = xoperands[1];
8017 		  pa_output_pic_pcrel_sequence (xoperands);
8018 		}
8019 	      else
8020 		{
8021 		  /* Emit a long plabel-based call sequence.  This is
8022 		     essentially an inline implementation of $$dyncall.
8023 		     We don't actually try to call $$dyncall as this is
8024 		     as difficult as calling the function itself.  */
8025 		  xoperands[0] = pa_get_deferred_plabel (call_dest);
8026 		  xoperands[1] = gen_label_rtx ();
8027 
8028 		  /* Since the call is indirect, FP arguments in registers
8029 		     need to be copied to the general registers.  Then, the
8030 		     argument relocation stub will copy them back.  */
8031 		  if (TARGET_SOM)
8032 		    copy_fp_args (insn);
8033 
8034 		  if (flag_pic)
8035 		    {
8036 		      output_asm_insn ("addil LT'%0,%%r19", xoperands);
8037 		      output_asm_insn ("ldw RT'%0(%%r1),%%r1", xoperands);
8038 		      output_asm_insn ("ldw 0(%%r1),%%r22", xoperands);
8039 		    }
8040 		  else
8041 		    {
8042 		      output_asm_insn ("addil LR'%0-$global$,%%r27",
8043 				       xoperands);
8044 		      output_asm_insn ("ldw RR'%0-$global$(%%r1),%%r22",
8045 				       xoperands);
8046 		    }
8047 
8048 		  output_asm_insn ("bb,>=,n %%r22,30,.+16", xoperands);
8049 		  output_asm_insn ("depi 0,31,2,%%r22", xoperands);
8050 		  /* Should this be an ordered load to ensure the target
8051 	             address is loaded before the global pointer?  */
8052 		  output_asm_insn ("ldw 0(%%r22),%%r1", xoperands);
8053 		  output_asm_insn ("ldw 4(%%r22),%%r19", xoperands);
8054 
8055 		  if (!sibcall && !TARGET_PA_20)
8056 		    {
8057 		      output_asm_insn ("{bl|b,l} .+8,%%r2", xoperands);
8058 		      if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic))
8059 			output_asm_insn ("addi 8,%%r2,%%r2", xoperands);
8060 		      else
8061 			output_asm_insn ("addi 16,%%r2,%%r2", xoperands);
8062 		    }
8063 		}
8064 
8065 	      if (TARGET_PA_20)
8066 		{
8067 		  if (sibcall)
8068 		    output_asm_insn ("bve (%%r1)", xoperands);
8069 		  else
8070 		    {
8071 		      if (indirect_call)
8072 			{
8073 			  output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
8074 			  output_asm_insn ("stw %%r2,-24(%%sp)", xoperands);
8075 			  seq_length = 1;
8076 			}
8077 		      else
8078 			output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
8079 		    }
8080 		}
8081 	      else
8082 		{
8083 		  if (!TARGET_NO_SPACE_REGS && (!local_call || flag_pic))
8084 		    output_asm_insn ("ldsid (%%r1),%%r31\n\tmtsp %%r31,%%sr0",
8085 				     xoperands);
8086 
8087 		  if (sibcall)
8088 		    {
8089 		      if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic))
8090 			output_asm_insn ("be 0(%%sr4,%%r1)", xoperands);
8091 		      else
8092 			output_asm_insn ("be 0(%%sr0,%%r1)", xoperands);
8093 		    }
8094 		  else
8095 		    {
8096 		      if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic))
8097 			output_asm_insn ("ble 0(%%sr4,%%r1)", xoperands);
8098 		      else
8099 			output_asm_insn ("ble 0(%%sr0,%%r1)", xoperands);
8100 
8101 		      if (indirect_call)
8102 			output_asm_insn ("stw %%r31,-24(%%sp)", xoperands);
8103 		      else
8104 			output_asm_insn ("copy %%r31,%%r2", xoperands);
8105 		      seq_length = 1;
8106 		    }
8107 		}
8108 	    }
8109 	}
8110     }
8111 
8112   if (seq_length == 0)
8113     output_asm_insn ("nop", xoperands);
8114 
8115   return "";
8116 }
8117 
8118 /* Return the attribute length of the indirect call instruction INSN.
8119    The length must match the code generated by output_indirect call.
8120    The returned length includes the delay slot.  Currently, the delay
8121    slot of an indirect call sequence is not exposed and it is used by
8122    the sequence itself.  */
8123 
8124 int
pa_attr_length_indirect_call(rtx_insn * insn)8125 pa_attr_length_indirect_call (rtx_insn *insn)
8126 {
8127   unsigned long distance = -1;
8128   unsigned long total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
8129 
8130   if (INSN_ADDRESSES_SET_P ())
8131     {
8132       distance = (total + insn_current_reference_address (insn));
8133       if (distance < total)
8134 	distance = -1;
8135     }
8136 
8137   if (TARGET_64BIT)
8138     return 12;
8139 
8140   if (TARGET_FAST_INDIRECT_CALLS)
8141     return 8;
8142 
8143   if (TARGET_PORTABLE_RUNTIME)
8144     return 16;
8145 
8146   if (!TARGET_LONG_CALLS
8147       && ((TARGET_PA_20 && !TARGET_SOM && distance < 7600000)
8148 	  || distance < MAX_PCREL17F_OFFSET))
8149     return 8;
8150 
8151   /* Out of reach, can use ble.  */
8152   if (!flag_pic)
8153     return 12;
8154 
8155   /* Inline versions of $$dyncall.  */
8156   if (!optimize_size)
8157     {
8158       if (TARGET_NO_SPACE_REGS)
8159 	return 28;
8160 
8161       if (TARGET_PA_20)
8162 	return 32;
8163     }
8164 
8165   /* Long PIC pc-relative call.  */
8166   return 20;
8167 }
8168 
8169 const char *
pa_output_indirect_call(rtx_insn * insn,rtx call_dest)8170 pa_output_indirect_call (rtx_insn *insn, rtx call_dest)
8171 {
8172   rtx xoperands[4];
8173   int length;
8174 
8175   if (TARGET_64BIT)
8176     {
8177       xoperands[0] = call_dest;
8178       output_asm_insn ("ldd 16(%0),%%r2\n\t"
8179 		       "bve,l (%%r2),%%r2\n\t"
8180 		       "ldd 24(%0),%%r27", xoperands);
8181       return "";
8182     }
8183 
8184   /* First the special case for kernels, level 0 systems, etc.  */
8185   if (TARGET_FAST_INDIRECT_CALLS)
8186     {
8187       pa_output_arg_descriptor (insn);
8188       if (TARGET_PA_20)
8189 	return "bve,l,n (%%r22),%%r2\n\tnop";
8190       return "ble 0(%%sr4,%%r22)\n\tcopy %%r31,%%r2";
8191     }
8192 
8193   if (TARGET_PORTABLE_RUNTIME)
8194     {
8195       output_asm_insn ("ldil L'$$dyncall,%%r31\n\t"
8196 		       "ldo R'$$dyncall(%%r31),%%r31", xoperands);
8197       pa_output_arg_descriptor (insn);
8198       return "blr %%r0,%%r2\n\tbv,n %%r0(%%r31)";
8199     }
8200 
8201   /* Now the normal case -- we can reach $$dyncall directly or
8202      we're sure that we can get there via a long-branch stub.
8203 
8204      No need to check target flags as the length uniquely identifies
8205      the remaining cases.  */
8206   length = pa_attr_length_indirect_call (insn);
8207   if (length == 8)
8208     {
8209       pa_output_arg_descriptor (insn);
8210 
8211       /* The HP linker sometimes substitutes a BLE for BL/B,L calls to
8212 	 $$dyncall.  Since BLE uses %r31 as the link register, the 22-bit
8213 	 variant of the B,L instruction can't be used on the SOM target.  */
8214       if (TARGET_PA_20 && !TARGET_SOM)
8215 	return "b,l,n $$dyncall,%%r2\n\tnop";
8216       else
8217 	return "bl $$dyncall,%%r31\n\tcopy %%r31,%%r2";
8218     }
8219 
8220   /* Long millicode call, but we are not generating PIC or portable runtime
8221      code.  */
8222   if (length == 12)
8223     {
8224       output_asm_insn ("ldil L'$$dyncall,%%r2", xoperands);
8225       pa_output_arg_descriptor (insn);
8226       return "ble R'$$dyncall(%%sr4,%%r2)\n\tcopy %%r31,%%r2";
8227     }
8228 
8229   /* The long PIC pc-relative call sequence is five instructions.  So,
8230      let's use an inline version of $$dyncall when the calling sequence
8231      has a roughly similar number of instructions and we are not optimizing
8232      for size.  We need two instructions to load the return pointer plus
8233      the $$dyncall implementation.  */
8234   if (!optimize_size)
8235     {
8236       if (TARGET_NO_SPACE_REGS)
8237 	{
8238 	  pa_output_arg_descriptor (insn);
8239 	  output_asm_insn ("bl .+8,%%r2\n\t"
8240 			   "ldo 20(%%r2),%%r2\n\t"
8241 			   "extru,<> %%r22,30,1,%%r0\n\t"
8242 			   "bv,n %%r0(%%r22)\n\t"
8243 			   "ldw -2(%%r22),%%r21\n\t"
8244 			   "bv %%r0(%%r21)\n\t"
8245 			   "ldw 2(%%r22),%%r19", xoperands);
8246 	  return "";
8247 	}
8248       if (TARGET_PA_20)
8249 	{
8250 	  pa_output_arg_descriptor (insn);
8251 	  output_asm_insn ("bl .+8,%%r2\n\t"
8252 			   "ldo 24(%%r2),%%r2\n\t"
8253 			   "stw %%r2,-24(%%sp)\n\t"
8254 			   "extru,<> %r22,30,1,%%r0\n\t"
8255 			   "bve,n (%%r22)\n\t"
8256 			   "ldw -2(%%r22),%%r21\n\t"
8257 			   "bve (%%r21)\n\t"
8258 			   "ldw 2(%%r22),%%r19", xoperands);
8259 	  return "";
8260 	}
8261     }
8262 
8263   /* We need a long PIC call to $$dyncall.  */
8264   xoperands[0] = gen_rtx_SYMBOL_REF (Pmode, "$$dyncall");
8265   xoperands[1] = gen_rtx_REG (Pmode, 2);
8266   xoperands[2] = gen_rtx_REG (Pmode, 1);
8267   pa_output_pic_pcrel_sequence (xoperands);
8268   pa_output_arg_descriptor (insn);
8269   return "bv %%r0(%%r1)\n\tldo {12|20}(%%r2),%%r2";
8270 }
8271 
8272 /* In HPUX 8.0's shared library scheme, special relocations are needed
8273    for function labels if they might be passed to a function
8274    in a shared library (because shared libraries don't live in code
8275    space), and special magic is needed to construct their address.  */
8276 
8277 void
pa_encode_label(rtx sym)8278 pa_encode_label (rtx sym)
8279 {
8280   const char *str = XSTR (sym, 0);
8281   int len = strlen (str) + 1;
8282   char *newstr, *p;
8283 
8284   p = newstr = XALLOCAVEC (char, len + 1);
8285   *p++ = '@';
8286   strcpy (p, str);
8287 
8288   XSTR (sym, 0) = ggc_alloc_string (newstr, len);
8289 }
8290 
8291 static void
pa_encode_section_info(tree decl,rtx rtl,int first)8292 pa_encode_section_info (tree decl, rtx rtl, int first)
8293 {
8294   int old_referenced = 0;
8295 
8296   if (!first && MEM_P (rtl) && GET_CODE (XEXP (rtl, 0)) == SYMBOL_REF)
8297     old_referenced
8298       = SYMBOL_REF_FLAGS (XEXP (rtl, 0)) & SYMBOL_FLAG_REFERENCED;
8299 
8300   default_encode_section_info (decl, rtl, first);
8301 
8302   if (first && TEXT_SPACE_P (decl))
8303     {
8304       SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
8305       if (TREE_CODE (decl) == FUNCTION_DECL)
8306 	pa_encode_label (XEXP (rtl, 0));
8307     }
8308   else if (old_referenced)
8309     SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= old_referenced;
8310 }
8311 
8312 /* This is sort of inverse to pa_encode_section_info.  */
8313 
8314 static const char *
pa_strip_name_encoding(const char * str)8315 pa_strip_name_encoding (const char *str)
8316 {
8317   str += (*str == '@');
8318   str += (*str == '*');
8319   return str;
8320 }
8321 
8322 /* Returns 1 if OP is a function label involved in a simple addition
8323    with a constant.  Used to keep certain patterns from matching
8324    during instruction combination.  */
8325 int
pa_is_function_label_plus_const(rtx op)8326 pa_is_function_label_plus_const (rtx op)
8327 {
8328   /* Strip off any CONST.  */
8329   if (GET_CODE (op) == CONST)
8330     op = XEXP (op, 0);
8331 
8332   return (GET_CODE (op) == PLUS
8333 	  && function_label_operand (XEXP (op, 0), VOIDmode)
8334 	  && GET_CODE (XEXP (op, 1)) == CONST_INT);
8335 }
8336 
8337 /* Output assembly code for a thunk to FUNCTION.  */
8338 
8339 static void
pa_asm_output_mi_thunk(FILE * file,tree thunk_fndecl,HOST_WIDE_INT delta,HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,tree function)8340 pa_asm_output_mi_thunk (FILE *file, tree thunk_fndecl, HOST_WIDE_INT delta,
8341 			HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
8342 			tree function)
8343 {
8344   static unsigned int current_thunk_number;
8345   int val_14 = VAL_14_BITS_P (delta);
8346   unsigned int old_last_address = last_address, nbytes = 0;
8347   char label[17];
8348   rtx xoperands[4];
8349 
8350   xoperands[0] = XEXP (DECL_RTL (function), 0);
8351   xoperands[1] = XEXP (DECL_RTL (thunk_fndecl), 0);
8352   xoperands[2] = GEN_INT (delta);
8353 
8354   final_start_function (emit_barrier (), file, 1);
8355 
8356   /* Output the thunk.  We know that the function is in the same
8357      translation unit (i.e., the same space) as the thunk, and that
8358      thunks are output after their method.  Thus, we don't need an
8359      external branch to reach the function.  With SOM and GAS,
8360      functions and thunks are effectively in different sections.
8361      Thus, we can always use a IA-relative branch and the linker
8362      will add a long branch stub if necessary.
8363 
8364      However, we have to be careful when generating PIC code on the
8365      SOM port to ensure that the sequence does not transfer to an
8366      import stub for the target function as this could clobber the
8367      return value saved at SP-24.  This would also apply to the
8368      32-bit linux port if the multi-space model is implemented.  */
8369   if ((!TARGET_LONG_CALLS && TARGET_SOM && !TARGET_PORTABLE_RUNTIME
8370        && !(flag_pic && TREE_PUBLIC (function))
8371        && (TARGET_GAS || last_address < 262132))
8372       || (!TARGET_LONG_CALLS && !TARGET_SOM && !TARGET_PORTABLE_RUNTIME
8373 	  && ((targetm_common.have_named_sections
8374 	       && DECL_SECTION_NAME (thunk_fndecl) != NULL
8375 	       /* The GNU 64-bit linker has rather poor stub management.
8376 		  So, we use a long branch from thunks that aren't in
8377 		  the same section as the target function.  */
8378 	       && ((!TARGET_64BIT
8379 		    && (DECL_SECTION_NAME (thunk_fndecl)
8380 			!= DECL_SECTION_NAME (function)))
8381 		   || ((DECL_SECTION_NAME (thunk_fndecl)
8382 			== DECL_SECTION_NAME (function))
8383 		       && last_address < 262132)))
8384 	      /* In this case, we need to be able to reach the start of
8385 		 the stub table even though the function is likely closer
8386 		 and can be jumped to directly.  */
8387 	      || (targetm_common.have_named_sections
8388 		  && DECL_SECTION_NAME (thunk_fndecl) == NULL
8389 		  && DECL_SECTION_NAME (function) == NULL
8390 		  && total_code_bytes < MAX_PCREL17F_OFFSET)
8391 	      /* Likewise.  */
8392 	      || (!targetm_common.have_named_sections
8393 		  && total_code_bytes < MAX_PCREL17F_OFFSET))))
8394     {
8395       if (!val_14)
8396 	output_asm_insn ("addil L'%2,%%r26", xoperands);
8397 
8398       output_asm_insn ("b %0", xoperands);
8399 
8400       if (val_14)
8401 	{
8402 	  output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8403 	  nbytes += 8;
8404 	}
8405       else
8406 	{
8407 	  output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8408 	  nbytes += 12;
8409 	}
8410     }
8411   else if (TARGET_64BIT)
8412     {
8413       rtx xop[4];
8414 
8415       /* We only have one call-clobbered scratch register, so we can't
8416          make use of the delay slot if delta doesn't fit in 14 bits.  */
8417       if (!val_14)
8418 	{
8419 	  output_asm_insn ("addil L'%2,%%r26", xoperands);
8420 	  output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8421 	}
8422 
8423       /* Load function address into %r1.  */
8424       xop[0] = xoperands[0];
8425       xop[1] = gen_rtx_REG (Pmode, 1);
8426       xop[2] = xop[1];
8427       pa_output_pic_pcrel_sequence (xop);
8428 
8429       if (val_14)
8430 	{
8431 	  output_asm_insn ("bv %%r0(%%r1)", xoperands);
8432 	  output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8433 	  nbytes += 20;
8434 	}
8435       else
8436 	{
8437 	  output_asm_insn ("bv,n %%r0(%%r1)", xoperands);
8438 	  nbytes += 24;
8439 	}
8440     }
8441   else if (TARGET_PORTABLE_RUNTIME)
8442     {
8443       output_asm_insn ("ldil L'%0,%%r1", xoperands);
8444       output_asm_insn ("ldo R'%0(%%r1),%%r22", xoperands);
8445 
8446       if (!val_14)
8447 	output_asm_insn ("ldil L'%2,%%r26", xoperands);
8448 
8449       output_asm_insn ("bv %%r0(%%r22)", xoperands);
8450 
8451       if (val_14)
8452 	{
8453 	  output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8454 	  nbytes += 16;
8455 	}
8456       else
8457 	{
8458 	  output_asm_insn ("ldo R'%2(%%r26),%%r26", xoperands);
8459 	  nbytes += 20;
8460 	}
8461     }
8462   else if (TARGET_SOM && flag_pic && TREE_PUBLIC (function))
8463     {
8464       /* The function is accessible from outside this module.  The only
8465 	 way to avoid an import stub between the thunk and function is to
8466 	 call the function directly with an indirect sequence similar to
8467 	 that used by $$dyncall.  This is possible because $$dyncall acts
8468 	 as the import stub in an indirect call.  */
8469       ASM_GENERATE_INTERNAL_LABEL (label, "LTHN", current_thunk_number);
8470       xoperands[3] = gen_rtx_SYMBOL_REF (Pmode, label);
8471       output_asm_insn ("addil LT'%3,%%r19", xoperands);
8472       output_asm_insn ("ldw RT'%3(%%r1),%%r22", xoperands);
8473       output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands);
8474       output_asm_insn ("bb,>=,n %%r22,30,.+16", xoperands);
8475       output_asm_insn ("depi 0,31,2,%%r22", xoperands);
8476       output_asm_insn ("ldw 4(%%sr0,%%r22),%%r19", xoperands);
8477       output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands);
8478 
8479       if (!val_14)
8480 	{
8481 	  output_asm_insn ("addil L'%2,%%r26", xoperands);
8482 	  nbytes += 4;
8483 	}
8484 
8485       if (TARGET_PA_20)
8486 	{
8487 	  output_asm_insn ("bve (%%r22)", xoperands);
8488 	  nbytes += 36;
8489 	}
8490       else if (TARGET_NO_SPACE_REGS)
8491 	{
8492 	  output_asm_insn ("be 0(%%sr4,%%r22)", xoperands);
8493 	  nbytes += 36;
8494 	}
8495       else
8496 	{
8497 	  output_asm_insn ("ldsid (%%sr0,%%r22),%%r21", xoperands);
8498 	  output_asm_insn ("mtsp %%r21,%%sr0", xoperands);
8499 	  output_asm_insn ("be 0(%%sr0,%%r22)", xoperands);
8500 	  nbytes += 44;
8501 	}
8502 
8503       if (val_14)
8504 	output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8505       else
8506 	output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8507     }
8508   else if (flag_pic)
8509     {
8510       rtx xop[4];
8511 
8512       /* Load function address into %r22.  */
8513       xop[0] = xoperands[0];
8514       xop[1] = gen_rtx_REG (Pmode, 1);
8515       xop[2] = gen_rtx_REG (Pmode, 22);
8516       pa_output_pic_pcrel_sequence (xop);
8517 
8518       if (!val_14)
8519 	output_asm_insn ("addil L'%2,%%r26", xoperands);
8520 
8521       output_asm_insn ("bv %%r0(%%r22)", xoperands);
8522 
8523       if (val_14)
8524 	{
8525 	  output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8526 	  nbytes += 20;
8527 	}
8528       else
8529 	{
8530 	  output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8531 	  nbytes += 24;
8532 	}
8533     }
8534   else
8535     {
8536       if (!val_14)
8537 	output_asm_insn ("addil L'%2,%%r26", xoperands);
8538 
8539       output_asm_insn ("ldil L'%0,%%r22", xoperands);
8540       output_asm_insn ("be R'%0(%%sr4,%%r22)", xoperands);
8541 
8542       if (val_14)
8543 	{
8544 	  output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8545 	  nbytes += 12;
8546 	}
8547       else
8548 	{
8549 	  output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8550 	  nbytes += 16;
8551 	}
8552     }
8553 
8554   final_end_function ();
8555 
8556   if (TARGET_SOM && flag_pic && TREE_PUBLIC (function))
8557     {
8558       switch_to_section (data_section);
8559       output_asm_insn (".align 4", xoperands);
8560       ASM_OUTPUT_LABEL (file, label);
8561       output_asm_insn (".word P'%0", xoperands);
8562     }
8563 
8564   current_thunk_number++;
8565   nbytes = ((nbytes + FUNCTION_BOUNDARY / BITS_PER_UNIT - 1)
8566 	    & ~(FUNCTION_BOUNDARY / BITS_PER_UNIT - 1));
8567   last_address += nbytes;
8568   if (old_last_address > last_address)
8569     last_address = UINT_MAX;
8570   update_total_code_bytes (nbytes);
8571 }
8572 
8573 /* Only direct calls to static functions are allowed to be sibling (tail)
8574    call optimized.
8575 
8576    This restriction is necessary because some linker generated stubs will
8577    store return pointers into rp' in some cases which might clobber a
8578    live value already in rp'.
8579 
8580    In a sibcall the current function and the target function share stack
8581    space.  Thus if the path to the current function and the path to the
8582    target function save a value in rp', they save the value into the
8583    same stack slot, which has undesirable consequences.
8584 
8585    Because of the deferred binding nature of shared libraries any function
8586    with external scope could be in a different load module and thus require
8587    rp' to be saved when calling that function.  So sibcall optimizations
8588    can only be safe for static function.
8589 
8590    Note that GCC never needs return value relocations, so we don't have to
8591    worry about static calls with return value relocations (which require
8592    saving rp').
8593 
8594    It is safe to perform a sibcall optimization when the target function
8595    will never return.  */
8596 static bool
pa_function_ok_for_sibcall(tree decl,tree exp ATTRIBUTE_UNUSED)8597 pa_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
8598 {
8599   /* Sibcalls are not ok because the arg pointer register is not a fixed
8600      register.  This prevents the sibcall optimization from occurring.  In
8601      addition, there are problems with stub placement using GNU ld.  This
8602      is because a normal sibcall branch uses a 17-bit relocation while
8603      a regular call branch uses a 22-bit relocation.  As a result, more
8604      care needs to be taken in the placement of long-branch stubs.  */
8605   if (TARGET_64BIT)
8606     return false;
8607 
8608   if (TARGET_PORTABLE_RUNTIME)
8609     return false;
8610 
8611   /* Sibcalls are only ok within a translation unit.  */
8612   return decl && targetm.binds_local_p (decl);
8613 }
8614 
8615 /* ??? Addition is not commutative on the PA due to the weird implicit
8616    space register selection rules for memory addresses.  Therefore, we
8617    don't consider a + b == b + a, as this might be inside a MEM.  */
8618 static bool
pa_commutative_p(const_rtx x,int outer_code)8619 pa_commutative_p (const_rtx x, int outer_code)
8620 {
8621   return (COMMUTATIVE_P (x)
8622 	  && (TARGET_NO_SPACE_REGS
8623 	      || (outer_code != UNKNOWN && outer_code != MEM)
8624 	      || GET_CODE (x) != PLUS));
8625 }
8626 
8627 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
8628    use in fmpyadd instructions.  */
8629 int
pa_fmpyaddoperands(rtx * operands)8630 pa_fmpyaddoperands (rtx *operands)
8631 {
8632   machine_mode mode = GET_MODE (operands[0]);
8633 
8634   /* Must be a floating point mode.  */
8635   if (mode != SFmode && mode != DFmode)
8636     return 0;
8637 
8638   /* All modes must be the same.  */
8639   if (! (mode == GET_MODE (operands[1])
8640 	 && mode == GET_MODE (operands[2])
8641 	 && mode == GET_MODE (operands[3])
8642 	 && mode == GET_MODE (operands[4])
8643 	 && mode == GET_MODE (operands[5])))
8644     return 0;
8645 
8646   /* All operands must be registers.  */
8647   if (! (GET_CODE (operands[1]) == REG
8648 	 && GET_CODE (operands[2]) == REG
8649 	 && GET_CODE (operands[3]) == REG
8650 	 && GET_CODE (operands[4]) == REG
8651 	 && GET_CODE (operands[5]) == REG))
8652     return 0;
8653 
8654   /* Only 2 real operands to the addition.  One of the input operands must
8655      be the same as the output operand.  */
8656   if (! rtx_equal_p (operands[3], operands[4])
8657       && ! rtx_equal_p (operands[3], operands[5]))
8658     return 0;
8659 
8660   /* Inout operand of add cannot conflict with any operands from multiply.  */
8661   if (rtx_equal_p (operands[3], operands[0])
8662      || rtx_equal_p (operands[3], operands[1])
8663      || rtx_equal_p (operands[3], operands[2]))
8664     return 0;
8665 
8666   /* multiply cannot feed into addition operands.  */
8667   if (rtx_equal_p (operands[4], operands[0])
8668       || rtx_equal_p (operands[5], operands[0]))
8669     return 0;
8670 
8671   /* SFmode limits the registers to the upper 32 of the 32bit FP regs.  */
8672   if (mode == SFmode
8673       && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS
8674 	  || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS
8675 	  || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS
8676 	  || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS
8677 	  || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS
8678 	  || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS))
8679     return 0;
8680 
8681   /* Passed.  Operands are suitable for fmpyadd.  */
8682   return 1;
8683 }
8684 
8685 #if !defined(USE_COLLECT2)
8686 static void
pa_asm_out_constructor(rtx symbol,int priority)8687 pa_asm_out_constructor (rtx symbol, int priority)
8688 {
8689   if (!function_label_operand (symbol, VOIDmode))
8690     pa_encode_label (symbol);
8691 
8692 #ifdef CTORS_SECTION_ASM_OP
8693   default_ctor_section_asm_out_constructor (symbol, priority);
8694 #else
8695 # ifdef TARGET_ASM_NAMED_SECTION
8696   default_named_section_asm_out_constructor (symbol, priority);
8697 # else
8698   default_stabs_asm_out_constructor (symbol, priority);
8699 # endif
8700 #endif
8701 }
8702 
8703 static void
pa_asm_out_destructor(rtx symbol,int priority)8704 pa_asm_out_destructor (rtx symbol, int priority)
8705 {
8706   if (!function_label_operand (symbol, VOIDmode))
8707     pa_encode_label (symbol);
8708 
8709 #ifdef DTORS_SECTION_ASM_OP
8710   default_dtor_section_asm_out_destructor (symbol, priority);
8711 #else
8712 # ifdef TARGET_ASM_NAMED_SECTION
8713   default_named_section_asm_out_destructor (symbol, priority);
8714 # else
8715   default_stabs_asm_out_destructor (symbol, priority);
8716 # endif
8717 #endif
8718 }
8719 #endif
8720 
8721 /* This function places uninitialized global data in the bss section.
8722    The ASM_OUTPUT_ALIGNED_BSS macro needs to be defined to call this
8723    function on the SOM port to prevent uninitialized global data from
8724    being placed in the data section.  */
8725 
8726 void
pa_asm_output_aligned_bss(FILE * stream,const char * name,unsigned HOST_WIDE_INT size,unsigned int align)8727 pa_asm_output_aligned_bss (FILE *stream,
8728 			   const char *name,
8729 			   unsigned HOST_WIDE_INT size,
8730 			   unsigned int align)
8731 {
8732   switch_to_section (bss_section);
8733   fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
8734 
8735 #ifdef ASM_OUTPUT_TYPE_DIRECTIVE
8736   ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "object");
8737 #endif
8738 
8739 #ifdef ASM_OUTPUT_SIZE_DIRECTIVE
8740   ASM_OUTPUT_SIZE_DIRECTIVE (stream, name, size);
8741 #endif
8742 
8743   fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
8744   ASM_OUTPUT_LABEL (stream, name);
8745   fprintf (stream, "\t.block " HOST_WIDE_INT_PRINT_UNSIGNED"\n", size);
8746 }
8747 
8748 /* Both the HP and GNU assemblers under HP-UX provide a .comm directive
8749    that doesn't allow the alignment of global common storage to be directly
8750    specified.  The SOM linker aligns common storage based on the rounded
8751    value of the NUM_BYTES parameter in the .comm directive.  It's not
8752    possible to use the .align directive as it doesn't affect the alignment
8753    of the label associated with a .comm directive.  */
8754 
8755 void
pa_asm_output_aligned_common(FILE * stream,const char * name,unsigned HOST_WIDE_INT size,unsigned int align)8756 pa_asm_output_aligned_common (FILE *stream,
8757 			      const char *name,
8758 			      unsigned HOST_WIDE_INT size,
8759 			      unsigned int align)
8760 {
8761   unsigned int max_common_align;
8762 
8763   max_common_align = TARGET_64BIT ? 128 : (size >= 4096 ? 256 : 64);
8764   if (align > max_common_align)
8765     {
8766       warning (0, "alignment (%u) for %s exceeds maximum alignment "
8767 	       "for global common data.  Using %u",
8768 	       align / BITS_PER_UNIT, name, max_common_align / BITS_PER_UNIT);
8769       align = max_common_align;
8770     }
8771 
8772   switch_to_section (bss_section);
8773 
8774   assemble_name (stream, name);
8775   fprintf (stream, "\t.comm " HOST_WIDE_INT_PRINT_UNSIGNED"\n",
8776            MAX (size, align / BITS_PER_UNIT));
8777 }
8778 
8779 /* We can't use .comm for local common storage as the SOM linker effectively
8780    treats the symbol as universal and uses the same storage for local symbols
8781    with the same name in different object files.  The .block directive
8782    reserves an uninitialized block of storage.  However, it's not common
8783    storage.  Fortunately, GCC never requests common storage with the same
8784    name in any given translation unit.  */
8785 
8786 void
pa_asm_output_aligned_local(FILE * stream,const char * name,unsigned HOST_WIDE_INT size,unsigned int align)8787 pa_asm_output_aligned_local (FILE *stream,
8788 			     const char *name,
8789 			     unsigned HOST_WIDE_INT size,
8790 			     unsigned int align)
8791 {
8792   switch_to_section (bss_section);
8793   fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
8794 
8795 #ifdef LOCAL_ASM_OP
8796   fprintf (stream, "%s", LOCAL_ASM_OP);
8797   assemble_name (stream, name);
8798   fprintf (stream, "\n");
8799 #endif
8800 
8801   ASM_OUTPUT_LABEL (stream, name);
8802   fprintf (stream, "\t.block " HOST_WIDE_INT_PRINT_UNSIGNED"\n", size);
8803 }
8804 
8805 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
8806    use in fmpysub instructions.  */
8807 int
pa_fmpysuboperands(rtx * operands)8808 pa_fmpysuboperands (rtx *operands)
8809 {
8810   machine_mode mode = GET_MODE (operands[0]);
8811 
8812   /* Must be a floating point mode.  */
8813   if (mode != SFmode && mode != DFmode)
8814     return 0;
8815 
8816   /* All modes must be the same.  */
8817   if (! (mode == GET_MODE (operands[1])
8818 	 && mode == GET_MODE (operands[2])
8819 	 && mode == GET_MODE (operands[3])
8820 	 && mode == GET_MODE (operands[4])
8821 	 && mode == GET_MODE (operands[5])))
8822     return 0;
8823 
8824   /* All operands must be registers.  */
8825   if (! (GET_CODE (operands[1]) == REG
8826 	 && GET_CODE (operands[2]) == REG
8827 	 && GET_CODE (operands[3]) == REG
8828 	 && GET_CODE (operands[4]) == REG
8829 	 && GET_CODE (operands[5]) == REG))
8830     return 0;
8831 
8832   /* Only 2 real operands to the subtraction.  Subtraction is not a commutative
8833      operation, so operands[4] must be the same as operand[3].  */
8834   if (! rtx_equal_p (operands[3], operands[4]))
8835     return 0;
8836 
8837   /* multiply cannot feed into subtraction.  */
8838   if (rtx_equal_p (operands[5], operands[0]))
8839     return 0;
8840 
8841   /* Inout operand of sub cannot conflict with any operands from multiply.  */
8842   if (rtx_equal_p (operands[3], operands[0])
8843      || rtx_equal_p (operands[3], operands[1])
8844      || rtx_equal_p (operands[3], operands[2]))
8845     return 0;
8846 
8847   /* SFmode limits the registers to the upper 32 of the 32bit FP regs.  */
8848   if (mode == SFmode
8849       && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS
8850 	  || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS
8851 	  || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS
8852 	  || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS
8853 	  || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS
8854 	  || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS))
8855     return 0;
8856 
8857   /* Passed.  Operands are suitable for fmpysub.  */
8858   return 1;
8859 }
8860 
8861 /* Return 1 if the given constant is 2, 4, or 8.  These are the valid
8862    constants for a MULT embedded inside a memory address.  */
8863 int
pa_mem_shadd_constant_p(int val)8864 pa_mem_shadd_constant_p (int val)
8865 {
8866   if (val == 2 || val == 4 || val == 8)
8867     return 1;
8868   else
8869     return 0;
8870 }
8871 
8872 /* Return 1 if the given constant is 1, 2, or 3.  These are the valid
8873    constants for shadd instructions.  */
8874 int
pa_shadd_constant_p(int val)8875 pa_shadd_constant_p (int val)
8876 {
8877   if (val == 1 || val == 2 || val == 3)
8878     return 1;
8879   else
8880     return 0;
8881 }
8882 
8883 /* Return TRUE if INSN branches forward.  */
8884 
8885 static bool
forward_branch_p(rtx_insn * insn)8886 forward_branch_p (rtx_insn *insn)
8887 {
8888   rtx lab = JUMP_LABEL (insn);
8889 
8890   /* The INSN must have a jump label.  */
8891   gcc_assert (lab != NULL_RTX);
8892 
8893   if (INSN_ADDRESSES_SET_P ())
8894     return INSN_ADDRESSES (INSN_UID (lab)) > INSN_ADDRESSES (INSN_UID (insn));
8895 
8896   while (insn)
8897     {
8898       if (insn == lab)
8899 	return true;
8900       else
8901 	insn = NEXT_INSN (insn);
8902     }
8903 
8904   return false;
8905 }
8906 
8907 /* Output an unconditional move and branch insn.  */
8908 
8909 const char *
pa_output_parallel_movb(rtx * operands,rtx_insn * insn)8910 pa_output_parallel_movb (rtx *operands, rtx_insn *insn)
8911 {
8912   int length = get_attr_length (insn);
8913 
8914   /* These are the cases in which we win.  */
8915   if (length == 4)
8916     return "mov%I1b,tr %1,%0,%2";
8917 
8918   /* None of the following cases win, but they don't lose either.  */
8919   if (length == 8)
8920     {
8921       if (dbr_sequence_length () == 0)
8922 	{
8923 	  /* Nothing in the delay slot, fake it by putting the combined
8924 	     insn (the copy or add) in the delay slot of a bl.  */
8925 	  if (GET_CODE (operands[1]) == CONST_INT)
8926 	    return "b %2\n\tldi %1,%0";
8927 	  else
8928 	    return "b %2\n\tcopy %1,%0";
8929 	}
8930       else
8931 	{
8932 	  /* Something in the delay slot, but we've got a long branch.  */
8933 	  if (GET_CODE (operands[1]) == CONST_INT)
8934 	    return "ldi %1,%0\n\tb %2";
8935 	  else
8936 	    return "copy %1,%0\n\tb %2";
8937 	}
8938     }
8939 
8940   if (GET_CODE (operands[1]) == CONST_INT)
8941     output_asm_insn ("ldi %1,%0", operands);
8942   else
8943     output_asm_insn ("copy %1,%0", operands);
8944   return pa_output_lbranch (operands[2], insn, 1);
8945 }
8946 
8947 /* Output an unconditional add and branch insn.  */
8948 
8949 const char *
pa_output_parallel_addb(rtx * operands,rtx_insn * insn)8950 pa_output_parallel_addb (rtx *operands, rtx_insn *insn)
8951 {
8952   int length = get_attr_length (insn);
8953 
8954   /* To make life easy we want operand0 to be the shared input/output
8955      operand and operand1 to be the readonly operand.  */
8956   if (operands[0] == operands[1])
8957     operands[1] = operands[2];
8958 
8959   /* These are the cases in which we win.  */
8960   if (length == 4)
8961     return "add%I1b,tr %1,%0,%3";
8962 
8963   /* None of the following cases win, but they don't lose either.  */
8964   if (length == 8)
8965     {
8966       if (dbr_sequence_length () == 0)
8967 	/* Nothing in the delay slot, fake it by putting the combined
8968 	   insn (the copy or add) in the delay slot of a bl.  */
8969 	return "b %3\n\tadd%I1 %1,%0,%0";
8970       else
8971 	/* Something in the delay slot, but we've got a long branch.  */
8972 	return "add%I1 %1,%0,%0\n\tb %3";
8973     }
8974 
8975   output_asm_insn ("add%I1 %1,%0,%0", operands);
8976   return pa_output_lbranch (operands[3], insn, 1);
8977 }
8978 
8979 /* We use this hook to perform a PA specific optimization which is difficult
8980    to do in earlier passes.  */
8981 
8982 static void
pa_reorg(void)8983 pa_reorg (void)
8984 {
8985   remove_useless_addtr_insns (1);
8986 
8987   if (pa_cpu < PROCESSOR_8000)
8988     pa_combine_instructions ();
8989 }
8990 
8991 /* The PA has a number of odd instructions which can perform multiple
8992    tasks at once.  On first generation PA machines (PA1.0 and PA1.1)
8993    it may be profitable to combine two instructions into one instruction
8994    with two outputs.  It's not profitable PA2.0 machines because the
8995    two outputs would take two slots in the reorder buffers.
8996 
8997    This routine finds instructions which can be combined and combines
8998    them.  We only support some of the potential combinations, and we
8999    only try common ways to find suitable instructions.
9000 
9001       * addb can add two registers or a register and a small integer
9002       and jump to a nearby (+-8k) location.  Normally the jump to the
9003       nearby location is conditional on the result of the add, but by
9004       using the "true" condition we can make the jump unconditional.
9005       Thus addb can perform two independent operations in one insn.
9006 
9007       * movb is similar to addb in that it can perform a reg->reg
9008       or small immediate->reg copy and jump to a nearby (+-8k location).
9009 
9010       * fmpyadd and fmpysub can perform a FP multiply and either an
9011       FP add or FP sub if the operands of the multiply and add/sub are
9012       independent (there are other minor restrictions).  Note both
9013       the fmpy and fadd/fsub can in theory move to better spots according
9014       to data dependencies, but for now we require the fmpy stay at a
9015       fixed location.
9016 
9017       * Many of the memory operations can perform pre & post updates
9018       of index registers.  GCC's pre/post increment/decrement addressing
9019       is far too simple to take advantage of all the possibilities.  This
9020       pass may not be suitable since those insns may not be independent.
9021 
9022       * comclr can compare two ints or an int and a register, nullify
9023       the following instruction and zero some other register.  This
9024       is more difficult to use as it's harder to find an insn which
9025       will generate a comclr than finding something like an unconditional
9026       branch.  (conditional moves & long branches create comclr insns).
9027 
9028       * Most arithmetic operations can conditionally skip the next
9029       instruction.  They can be viewed as "perform this operation
9030       and conditionally jump to this nearby location" (where nearby
9031       is an insns away).  These are difficult to use due to the
9032       branch length restrictions.  */
9033 
9034 static void
pa_combine_instructions(void)9035 pa_combine_instructions (void)
9036 {
9037   rtx_insn *anchor;
9038 
9039   /* This can get expensive since the basic algorithm is on the
9040      order of O(n^2) (or worse).  Only do it for -O2 or higher
9041      levels of optimization.  */
9042   if (optimize < 2)
9043     return;
9044 
9045   /* Walk down the list of insns looking for "anchor" insns which
9046      may be combined with "floating" insns.  As the name implies,
9047      "anchor" instructions don't move, while "floating" insns may
9048      move around.  */
9049   rtx par = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, NULL_RTX, NULL_RTX));
9050   rtx_insn *new_rtx = make_insn_raw (par);
9051 
9052   for (anchor = get_insns (); anchor; anchor = NEXT_INSN (anchor))
9053     {
9054       enum attr_pa_combine_type anchor_attr;
9055       enum attr_pa_combine_type floater_attr;
9056 
9057       /* We only care about INSNs, JUMP_INSNs, and CALL_INSNs.
9058 	 Also ignore any special USE insns.  */
9059       if ((! NONJUMP_INSN_P (anchor) && ! JUMP_P (anchor) && ! CALL_P (anchor))
9060 	  || GET_CODE (PATTERN (anchor)) == USE
9061 	  || GET_CODE (PATTERN (anchor)) == CLOBBER)
9062 	continue;
9063 
9064       anchor_attr = get_attr_pa_combine_type (anchor);
9065       /* See if anchor is an insn suitable for combination.  */
9066       if (anchor_attr == PA_COMBINE_TYPE_FMPY
9067 	  || anchor_attr == PA_COMBINE_TYPE_FADDSUB
9068 	  || (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
9069 	      && ! forward_branch_p (anchor)))
9070 	{
9071 	  rtx_insn *floater;
9072 
9073 	  for (floater = PREV_INSN (anchor);
9074 	       floater;
9075 	       floater = PREV_INSN (floater))
9076 	    {
9077 	      if (NOTE_P (floater)
9078 		  || (NONJUMP_INSN_P (floater)
9079 		      && (GET_CODE (PATTERN (floater)) == USE
9080 			  || GET_CODE (PATTERN (floater)) == CLOBBER)))
9081 		continue;
9082 
9083 	      /* Anything except a regular INSN will stop our search.  */
9084 	      if (! NONJUMP_INSN_P (floater))
9085 		{
9086 		  floater = NULL;
9087 		  break;
9088 		}
9089 
9090 	      /* See if FLOATER is suitable for combination with the
9091 		 anchor.  */
9092 	      floater_attr = get_attr_pa_combine_type (floater);
9093 	      if ((anchor_attr == PA_COMBINE_TYPE_FMPY
9094 		   && floater_attr == PA_COMBINE_TYPE_FADDSUB)
9095 		  || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
9096 		      && floater_attr == PA_COMBINE_TYPE_FMPY))
9097 		{
9098 		  /* If ANCHOR and FLOATER can be combined, then we're
9099 		     done with this pass.  */
9100 		  if (pa_can_combine_p (new_rtx, anchor, floater, 0,
9101 					SET_DEST (PATTERN (floater)),
9102 					XEXP (SET_SRC (PATTERN (floater)), 0),
9103 					XEXP (SET_SRC (PATTERN (floater)), 1)))
9104 		    break;
9105 		}
9106 
9107 	      else if (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
9108 		       && floater_attr == PA_COMBINE_TYPE_ADDMOVE)
9109 		{
9110 		  if (GET_CODE (SET_SRC (PATTERN (floater))) == PLUS)
9111 		    {
9112 		      if (pa_can_combine_p (new_rtx, anchor, floater, 0,
9113 					    SET_DEST (PATTERN (floater)),
9114 					XEXP (SET_SRC (PATTERN (floater)), 0),
9115 					XEXP (SET_SRC (PATTERN (floater)), 1)))
9116 			break;
9117 		    }
9118 		  else
9119 		    {
9120 		      if (pa_can_combine_p (new_rtx, anchor, floater, 0,
9121 					    SET_DEST (PATTERN (floater)),
9122 					    SET_SRC (PATTERN (floater)),
9123 					    SET_SRC (PATTERN (floater))))
9124 			break;
9125 		    }
9126 		}
9127 	    }
9128 
9129 	  /* If we didn't find anything on the backwards scan try forwards.  */
9130 	  if (!floater
9131 	      && (anchor_attr == PA_COMBINE_TYPE_FMPY
9132 		  || anchor_attr == PA_COMBINE_TYPE_FADDSUB))
9133 	    {
9134 	      for (floater = anchor; floater; floater = NEXT_INSN (floater))
9135 		{
9136 		  if (NOTE_P (floater)
9137 		      || (NONJUMP_INSN_P (floater)
9138 			  && (GET_CODE (PATTERN (floater)) == USE
9139 			      || GET_CODE (PATTERN (floater)) == CLOBBER)))
9140 
9141 		    continue;
9142 
9143 		  /* Anything except a regular INSN will stop our search.  */
9144 		  if (! NONJUMP_INSN_P (floater))
9145 		    {
9146 		      floater = NULL;
9147 		      break;
9148 		    }
9149 
9150 		  /* See if FLOATER is suitable for combination with the
9151 		     anchor.  */
9152 		  floater_attr = get_attr_pa_combine_type (floater);
9153 		  if ((anchor_attr == PA_COMBINE_TYPE_FMPY
9154 		       && floater_attr == PA_COMBINE_TYPE_FADDSUB)
9155 		      || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
9156 			  && floater_attr == PA_COMBINE_TYPE_FMPY))
9157 		    {
9158 		      /* If ANCHOR and FLOATER can be combined, then we're
9159 			 done with this pass.  */
9160 		      if (pa_can_combine_p (new_rtx, anchor, floater, 1,
9161 					    SET_DEST (PATTERN (floater)),
9162 					    XEXP (SET_SRC (PATTERN (floater)),
9163 						  0),
9164 					    XEXP (SET_SRC (PATTERN (floater)),
9165 						  1)))
9166 			break;
9167 		    }
9168 		}
9169 	    }
9170 
9171 	  /* FLOATER will be nonzero if we found a suitable floating
9172 	     insn for combination with ANCHOR.  */
9173 	  if (floater
9174 	      && (anchor_attr == PA_COMBINE_TYPE_FADDSUB
9175 		  || anchor_attr == PA_COMBINE_TYPE_FMPY))
9176 	    {
9177 	      /* Emit the new instruction and delete the old anchor.  */
9178 	      rtvec vtemp = gen_rtvec (2, copy_rtx (PATTERN (anchor)),
9179 				       copy_rtx (PATTERN (floater)));
9180 	      rtx temp = gen_rtx_PARALLEL (VOIDmode, vtemp);
9181 	      emit_insn_before (temp, anchor);
9182 
9183 	      SET_INSN_DELETED (anchor);
9184 
9185 	      /* Emit a special USE insn for FLOATER, then delete
9186 		 the floating insn.  */
9187 	      temp = copy_rtx (PATTERN (floater));
9188 	      emit_insn_before (gen_rtx_USE (VOIDmode, temp), floater);
9189 	      delete_insn (floater);
9190 
9191 	      continue;
9192 	    }
9193 	  else if (floater
9194 		   && anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH)
9195 	    {
9196 	      /* Emit the new_jump instruction and delete the old anchor.  */
9197 	      rtvec vtemp = gen_rtvec (2, copy_rtx (PATTERN (anchor)),
9198 				       copy_rtx (PATTERN (floater)));
9199 	      rtx temp = gen_rtx_PARALLEL (VOIDmode, vtemp);
9200 	      temp = emit_jump_insn_before (temp, anchor);
9201 
9202 	      JUMP_LABEL (temp) = JUMP_LABEL (anchor);
9203 	      SET_INSN_DELETED (anchor);
9204 
9205 	      /* Emit a special USE insn for FLOATER, then delete
9206 		 the floating insn.  */
9207 	      temp = copy_rtx (PATTERN (floater));
9208 	      emit_insn_before (gen_rtx_USE (VOIDmode, temp), floater);
9209 	      delete_insn (floater);
9210 	      continue;
9211 	    }
9212 	}
9213     }
9214 }
9215 
9216 static int
pa_can_combine_p(rtx_insn * new_rtx,rtx_insn * anchor,rtx_insn * floater,int reversed,rtx dest,rtx src1,rtx src2)9217 pa_can_combine_p (rtx_insn *new_rtx, rtx_insn *anchor, rtx_insn *floater,
9218 		  int reversed, rtx dest,
9219 		  rtx src1, rtx src2)
9220 {
9221   int insn_code_number;
9222   rtx_insn *start, *end;
9223 
9224   /* Create a PARALLEL with the patterns of ANCHOR and
9225      FLOATER, try to recognize it, then test constraints
9226      for the resulting pattern.
9227 
9228      If the pattern doesn't match or the constraints
9229      aren't met keep searching for a suitable floater
9230      insn.  */
9231   XVECEXP (PATTERN (new_rtx), 0, 0) = PATTERN (anchor);
9232   XVECEXP (PATTERN (new_rtx), 0, 1) = PATTERN (floater);
9233   INSN_CODE (new_rtx) = -1;
9234   insn_code_number = recog_memoized (new_rtx);
9235   basic_block bb = BLOCK_FOR_INSN (anchor);
9236   if (insn_code_number < 0
9237       || (extract_insn (new_rtx),
9238 	  !constrain_operands (1, get_preferred_alternatives (new_rtx, bb))))
9239     return 0;
9240 
9241   if (reversed)
9242     {
9243       start = anchor;
9244       end = floater;
9245     }
9246   else
9247     {
9248       start = floater;
9249       end = anchor;
9250     }
9251 
9252   /* There's up to three operands to consider.  One
9253      output and two inputs.
9254 
9255      The output must not be used between FLOATER & ANCHOR
9256      exclusive.  The inputs must not be set between
9257      FLOATER and ANCHOR exclusive.  */
9258 
9259   if (reg_used_between_p (dest, start, end))
9260     return 0;
9261 
9262   if (reg_set_between_p (src1, start, end))
9263     return 0;
9264 
9265   if (reg_set_between_p (src2, start, end))
9266     return 0;
9267 
9268   /* If we get here, then everything is good.  */
9269   return 1;
9270 }
9271 
9272 /* Return nonzero if references for INSN are delayed.
9273 
9274    Millicode insns are actually function calls with some special
9275    constraints on arguments and register usage.
9276 
9277    Millicode calls always expect their arguments in the integer argument
9278    registers, and always return their result in %r29 (ret1).  They
9279    are expected to clobber their arguments, %r1, %r29, and the return
9280    pointer which is %r31 on 32-bit and %r2 on 64-bit, and nothing else.
9281 
9282    This function tells reorg that the references to arguments and
9283    millicode calls do not appear to happen until after the millicode call.
9284    This allows reorg to put insns which set the argument registers into the
9285    delay slot of the millicode call -- thus they act more like traditional
9286    CALL_INSNs.
9287 
9288    Note we cannot consider side effects of the insn to be delayed because
9289    the branch and link insn will clobber the return pointer.  If we happened
9290    to use the return pointer in the delay slot of the call, then we lose.
9291 
9292    get_attr_type will try to recognize the given insn, so make sure to
9293    filter out things it will not accept -- SEQUENCE, USE and CLOBBER insns
9294    in particular.  */
9295 int
pa_insn_refs_are_delayed(rtx_insn * insn)9296 pa_insn_refs_are_delayed (rtx_insn *insn)
9297 {
9298   return ((NONJUMP_INSN_P (insn)
9299 	   && GET_CODE (PATTERN (insn)) != SEQUENCE
9300 	   && GET_CODE (PATTERN (insn)) != USE
9301 	   && GET_CODE (PATTERN (insn)) != CLOBBER
9302 	   && get_attr_type (insn) == TYPE_MILLI));
9303 }
9304 
9305 /* Promote the return value, but not the arguments.  */
9306 
9307 static machine_mode
pa_promote_function_mode(const_tree type ATTRIBUTE_UNUSED,machine_mode mode,int * punsignedp ATTRIBUTE_UNUSED,const_tree fntype ATTRIBUTE_UNUSED,int for_return)9308 pa_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
9309                           machine_mode mode,
9310                           int *punsignedp ATTRIBUTE_UNUSED,
9311                           const_tree fntype ATTRIBUTE_UNUSED,
9312                           int for_return)
9313 {
9314   if (for_return == 0)
9315     return mode;
9316   return promote_mode (type, mode, punsignedp);
9317 }
9318 
9319 /* On the HP-PA the value is found in register(s) 28(-29), unless
9320    the mode is SF or DF. Then the value is returned in fr4 (32).
9321 
9322    This must perform the same promotions as PROMOTE_MODE, else promoting
9323    return values in TARGET_PROMOTE_FUNCTION_MODE will not work correctly.
9324 
9325    Small structures must be returned in a PARALLEL on PA64 in order
9326    to match the HP Compiler ABI.  */
9327 
9328 static rtx
pa_function_value(const_tree valtype,const_tree func ATTRIBUTE_UNUSED,bool outgoing ATTRIBUTE_UNUSED)9329 pa_function_value (const_tree valtype,
9330                    const_tree func ATTRIBUTE_UNUSED,
9331                    bool outgoing ATTRIBUTE_UNUSED)
9332 {
9333   machine_mode valmode;
9334 
9335   if (AGGREGATE_TYPE_P (valtype)
9336       || TREE_CODE (valtype) == COMPLEX_TYPE
9337       || TREE_CODE (valtype) == VECTOR_TYPE)
9338     {
9339       HOST_WIDE_INT valsize = int_size_in_bytes (valtype);
9340 
9341       /* Handle aggregates that fit exactly in a word or double word.  */
9342       if (valsize == UNITS_PER_WORD || valsize == 2 * UNITS_PER_WORD)
9343 	return gen_rtx_REG (TYPE_MODE (valtype), 28);
9344 
9345       if (TARGET_64BIT)
9346 	{
9347           /* Aggregates with a size less than or equal to 128 bits are
9348 	     returned in GR 28(-29).  They are left justified.  The pad
9349 	     bits are undefined.  Larger aggregates are returned in
9350 	     memory.  */
9351 	  rtx loc[2];
9352 	  int i, offset = 0;
9353 	  int ub = valsize <= UNITS_PER_WORD ? 1 : 2;
9354 
9355 	  for (i = 0; i < ub; i++)
9356 	    {
9357 	      loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
9358 					  gen_rtx_REG (DImode, 28 + i),
9359 					  GEN_INT (offset));
9360 	      offset += 8;
9361 	    }
9362 
9363 	  return gen_rtx_PARALLEL (BLKmode, gen_rtvec_v (ub, loc));
9364 	}
9365       else if (valsize > UNITS_PER_WORD)
9366 	{
9367 	  /* Aggregates 5 to 8 bytes in size are returned in general
9368 	     registers r28-r29 in the same manner as other non
9369 	     floating-point objects.  The data is right-justified and
9370 	     zero-extended to 64 bits.  This is opposite to the normal
9371 	     justification used on big endian targets and requires
9372 	     special treatment.  */
9373 	  rtx loc = gen_rtx_EXPR_LIST (VOIDmode,
9374 				       gen_rtx_REG (DImode, 28), const0_rtx);
9375 	  return gen_rtx_PARALLEL (BLKmode, gen_rtvec (1, loc));
9376 	}
9377     }
9378 
9379   if ((INTEGRAL_TYPE_P (valtype)
9380        && GET_MODE_BITSIZE (TYPE_MODE (valtype)) < BITS_PER_WORD)
9381       || POINTER_TYPE_P (valtype))
9382     valmode = word_mode;
9383   else
9384     valmode = TYPE_MODE (valtype);
9385 
9386   if (TREE_CODE (valtype) == REAL_TYPE
9387       && !AGGREGATE_TYPE_P (valtype)
9388       && TYPE_MODE (valtype) != TFmode
9389       && !TARGET_SOFT_FLOAT)
9390     return gen_rtx_REG (valmode, 32);
9391 
9392   return gen_rtx_REG (valmode, 28);
9393 }
9394 
9395 /* Implement the TARGET_LIBCALL_VALUE hook.  */
9396 
9397 static rtx
pa_libcall_value(machine_mode mode,const_rtx fun ATTRIBUTE_UNUSED)9398 pa_libcall_value (machine_mode mode,
9399 		  const_rtx fun ATTRIBUTE_UNUSED)
9400 {
9401   if (! TARGET_SOFT_FLOAT
9402       && (mode == SFmode || mode == DFmode))
9403     return  gen_rtx_REG (mode, 32);
9404   else
9405     return  gen_rtx_REG (mode, 28);
9406 }
9407 
9408 /* Implement the TARGET_FUNCTION_VALUE_REGNO_P hook.  */
9409 
9410 static bool
pa_function_value_regno_p(const unsigned int regno)9411 pa_function_value_regno_p (const unsigned int regno)
9412 {
9413   if (regno == 28
9414       || (! TARGET_SOFT_FLOAT &&  regno == 32))
9415     return true;
9416 
9417   return false;
9418 }
9419 
9420 /* Update the data in CUM to advance over an argument
9421    of mode MODE and data type TYPE.
9422    (TYPE is null for libcalls where that information may not be available.)  */
9423 
9424 static void
pa_function_arg_advance(cumulative_args_t cum_v,machine_mode mode,const_tree type,bool named ATTRIBUTE_UNUSED)9425 pa_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
9426 			 const_tree type, bool named ATTRIBUTE_UNUSED)
9427 {
9428   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
9429   int arg_size = pa_function_arg_size (mode, type);
9430 
9431   cum->nargs_prototype--;
9432   cum->words += (arg_size
9433 		 + ((cum->words & 01)
9434 		    && type != NULL_TREE
9435 		    && arg_size > 1));
9436 }
9437 
9438 /* Return the location of a parameter that is passed in a register or NULL
9439    if the parameter has any component that is passed in memory.
9440 
9441    This is new code and will be pushed to into the net sources after
9442    further testing.
9443 
9444    ??? We might want to restructure this so that it looks more like other
9445    ports.  */
9446 static rtx
pa_function_arg(cumulative_args_t cum_v,machine_mode mode,const_tree type,bool named ATTRIBUTE_UNUSED)9447 pa_function_arg (cumulative_args_t cum_v, machine_mode mode,
9448 		 const_tree type, bool named ATTRIBUTE_UNUSED)
9449 {
9450   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
9451   int max_arg_words = (TARGET_64BIT ? 8 : 4);
9452   int alignment = 0;
9453   int arg_size;
9454   int fpr_reg_base;
9455   int gpr_reg_base;
9456   rtx retval;
9457 
9458   if (mode == VOIDmode)
9459     return NULL_RTX;
9460 
9461   arg_size = pa_function_arg_size (mode, type);
9462 
9463   /* If this arg would be passed partially or totally on the stack, then
9464      this routine should return zero.  pa_arg_partial_bytes will
9465      handle arguments which are split between regs and stack slots if
9466      the ABI mandates split arguments.  */
9467   if (!TARGET_64BIT)
9468     {
9469       /* The 32-bit ABI does not split arguments.  */
9470       if (cum->words + arg_size > max_arg_words)
9471 	return NULL_RTX;
9472     }
9473   else
9474     {
9475       if (arg_size > 1)
9476 	alignment = cum->words & 1;
9477       if (cum->words + alignment >= max_arg_words)
9478 	return NULL_RTX;
9479     }
9480 
9481   /* The 32bit ABIs and the 64bit ABIs are rather different,
9482      particularly in their handling of FP registers.  We might
9483      be able to cleverly share code between them, but I'm not
9484      going to bother in the hope that splitting them up results
9485      in code that is more easily understood.  */
9486 
9487   if (TARGET_64BIT)
9488     {
9489       /* Advance the base registers to their current locations.
9490 
9491          Remember, gprs grow towards smaller register numbers while
9492 	 fprs grow to higher register numbers.  Also remember that
9493 	 although FP regs are 32-bit addressable, we pretend that
9494 	 the registers are 64-bits wide.  */
9495       gpr_reg_base = 26 - cum->words;
9496       fpr_reg_base = 32 + cum->words;
9497 
9498       /* Arguments wider than one word and small aggregates need special
9499 	 treatment.  */
9500       if (arg_size > 1
9501 	  || mode == BLKmode
9502 	  || (type && (AGGREGATE_TYPE_P (type)
9503 		       || TREE_CODE (type) == COMPLEX_TYPE
9504 		       || TREE_CODE (type) == VECTOR_TYPE)))
9505 	{
9506 	  /* Double-extended precision (80-bit), quad-precision (128-bit)
9507 	     and aggregates including complex numbers are aligned on
9508 	     128-bit boundaries.  The first eight 64-bit argument slots
9509 	     are associated one-to-one, with general registers r26
9510 	     through r19, and also with floating-point registers fr4
9511 	     through fr11.  Arguments larger than one word are always
9512 	     passed in general registers.
9513 
9514 	     Using a PARALLEL with a word mode register results in left
9515 	     justified data on a big-endian target.  */
9516 
9517 	  rtx loc[8];
9518 	  int i, offset = 0, ub = arg_size;
9519 
9520 	  /* Align the base register.  */
9521 	  gpr_reg_base -= alignment;
9522 
9523 	  ub = MIN (ub, max_arg_words - cum->words - alignment);
9524 	  for (i = 0; i < ub; i++)
9525 	    {
9526 	      loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
9527 					  gen_rtx_REG (DImode, gpr_reg_base),
9528 					  GEN_INT (offset));
9529 	      gpr_reg_base -= 1;
9530 	      offset += 8;
9531 	    }
9532 
9533 	  return gen_rtx_PARALLEL (mode, gen_rtvec_v (ub, loc));
9534 	}
9535      }
9536   else
9537     {
9538       /* If the argument is larger than a word, then we know precisely
9539 	 which registers we must use.  */
9540       if (arg_size > 1)
9541 	{
9542 	  if (cum->words)
9543 	    {
9544 	      gpr_reg_base = 23;
9545 	      fpr_reg_base = 38;
9546 	    }
9547 	  else
9548 	    {
9549 	      gpr_reg_base = 25;
9550 	      fpr_reg_base = 34;
9551 	    }
9552 
9553 	  /* Structures 5 to 8 bytes in size are passed in the general
9554 	     registers in the same manner as other non floating-point
9555 	     objects.  The data is right-justified and zero-extended
9556 	     to 64 bits.  This is opposite to the normal justification
9557 	     used on big endian targets and requires special treatment.
9558 	     We now define BLOCK_REG_PADDING to pad these objects.
9559 	     Aggregates, complex and vector types are passed in the same
9560 	     manner as structures.  */
9561 	  if (mode == BLKmode
9562 	      || (type && (AGGREGATE_TYPE_P (type)
9563 			   || TREE_CODE (type) == COMPLEX_TYPE
9564 			   || TREE_CODE (type) == VECTOR_TYPE)))
9565 	    {
9566 	      rtx loc = gen_rtx_EXPR_LIST (VOIDmode,
9567 					   gen_rtx_REG (DImode, gpr_reg_base),
9568 					   const0_rtx);
9569 	      return gen_rtx_PARALLEL (BLKmode, gen_rtvec (1, loc));
9570 	    }
9571 	}
9572       else
9573         {
9574 	   /* We have a single word (32 bits).  A simple computation
9575 	      will get us the register #s we need.  */
9576 	   gpr_reg_base = 26 - cum->words;
9577 	   fpr_reg_base = 32 + 2 * cum->words;
9578 	}
9579     }
9580 
9581   /* Determine if the argument needs to be passed in both general and
9582      floating point registers.  */
9583   if (((TARGET_PORTABLE_RUNTIME || TARGET_64BIT || TARGET_ELF32)
9584        /* If we are doing soft-float with portable runtime, then there
9585 	  is no need to worry about FP regs.  */
9586        && !TARGET_SOFT_FLOAT
9587        /* The parameter must be some kind of scalar float, else we just
9588 	  pass it in integer registers.  */
9589        && GET_MODE_CLASS (mode) == MODE_FLOAT
9590        /* The target function must not have a prototype.  */
9591        && cum->nargs_prototype <= 0
9592        /* libcalls do not need to pass items in both FP and general
9593 	  registers.  */
9594        && type != NULL_TREE
9595        /* All this hair applies to "outgoing" args only.  This includes
9596 	  sibcall arguments setup with FUNCTION_INCOMING_ARG.  */
9597        && !cum->incoming)
9598       /* Also pass outgoing floating arguments in both registers in indirect
9599 	 calls with the 32 bit ABI and the HP assembler since there is no
9600 	 way to the specify argument locations in static functions.  */
9601       || (!TARGET_64BIT
9602 	  && !TARGET_GAS
9603 	  && !cum->incoming
9604 	  && cum->indirect
9605 	  && GET_MODE_CLASS (mode) == MODE_FLOAT))
9606     {
9607       retval
9608 	= gen_rtx_PARALLEL
9609 	    (mode,
9610 	     gen_rtvec (2,
9611 			gen_rtx_EXPR_LIST (VOIDmode,
9612 					   gen_rtx_REG (mode, fpr_reg_base),
9613 					   const0_rtx),
9614 			gen_rtx_EXPR_LIST (VOIDmode,
9615 					   gen_rtx_REG (mode, gpr_reg_base),
9616 					   const0_rtx)));
9617     }
9618   else
9619     {
9620       /* See if we should pass this parameter in a general register.  */
9621       if (TARGET_SOFT_FLOAT
9622 	  /* Indirect calls in the normal 32bit ABI require all arguments
9623 	     to be passed in general registers.  */
9624 	  || (!TARGET_PORTABLE_RUNTIME
9625 	      && !TARGET_64BIT
9626 	      && !TARGET_ELF32
9627 	      && cum->indirect)
9628 	  /* If the parameter is not a scalar floating-point parameter,
9629 	     then it belongs in GPRs.  */
9630 	  || GET_MODE_CLASS (mode) != MODE_FLOAT
9631 	  /* Structure with single SFmode field belongs in GPR.  */
9632 	  || (type && AGGREGATE_TYPE_P (type)))
9633 	retval = gen_rtx_REG (mode, gpr_reg_base);
9634       else
9635 	retval = gen_rtx_REG (mode, fpr_reg_base);
9636     }
9637   return retval;
9638 }
9639 
9640 /* Arguments larger than one word are double word aligned.  */
9641 
9642 static unsigned int
pa_function_arg_boundary(machine_mode mode,const_tree type)9643 pa_function_arg_boundary (machine_mode mode, const_tree type)
9644 {
9645   bool singleword = (type
9646 		     ? (integer_zerop (TYPE_SIZE (type))
9647 			|| !TREE_CONSTANT (TYPE_SIZE (type))
9648 			|| int_size_in_bytes (type) <= UNITS_PER_WORD)
9649 		     : GET_MODE_SIZE (mode) <= UNITS_PER_WORD);
9650 
9651   return singleword ? PARM_BOUNDARY : MAX_PARM_BOUNDARY;
9652 }
9653 
9654 /* If this arg would be passed totally in registers or totally on the stack,
9655    then this routine should return zero.  */
9656 
9657 static int
pa_arg_partial_bytes(cumulative_args_t cum_v,machine_mode mode,tree type,bool named ATTRIBUTE_UNUSED)9658 pa_arg_partial_bytes (cumulative_args_t cum_v, machine_mode mode,
9659 		      tree type, bool named ATTRIBUTE_UNUSED)
9660 {
9661   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
9662   unsigned int max_arg_words = 8;
9663   unsigned int offset = 0;
9664 
9665   if (!TARGET_64BIT)
9666     return 0;
9667 
9668   if (pa_function_arg_size (mode, type) > 1 && (cum->words & 1))
9669     offset = 1;
9670 
9671   if (cum->words + offset + pa_function_arg_size (mode, type) <= max_arg_words)
9672     /* Arg fits fully into registers.  */
9673     return 0;
9674   else if (cum->words + offset >= max_arg_words)
9675     /* Arg fully on the stack.  */
9676     return 0;
9677   else
9678     /* Arg is split.  */
9679     return (max_arg_words - cum->words - offset) * UNITS_PER_WORD;
9680 }
9681 
9682 
9683 /* A get_unnamed_section callback for switching to the text section.
9684 
9685    This function is only used with SOM.  Because we don't support
9686    named subspaces, we can only create a new subspace or switch back
9687    to the default text subspace.  */
9688 
9689 static void
som_output_text_section_asm_op(const void * data ATTRIBUTE_UNUSED)9690 som_output_text_section_asm_op (const void *data ATTRIBUTE_UNUSED)
9691 {
9692   gcc_assert (TARGET_SOM);
9693   if (TARGET_GAS)
9694     {
9695       if (cfun && cfun->machine && !cfun->machine->in_nsubspa)
9696 	{
9697 	  /* We only want to emit a .nsubspa directive once at the
9698 	     start of the function.  */
9699 	  cfun->machine->in_nsubspa = 1;
9700 
9701 	  /* Create a new subspace for the text.  This provides
9702 	     better stub placement and one-only functions.  */
9703 	  if (cfun->decl
9704 	      && DECL_ONE_ONLY (cfun->decl)
9705 	      && !DECL_WEAK (cfun->decl))
9706 	    {
9707 	      output_section_asm_op ("\t.SPACE $TEXT$\n"
9708 				     "\t.NSUBSPA $CODE$,QUAD=0,ALIGN=8,"
9709 				     "ACCESS=44,SORT=24,COMDAT");
9710 	      return;
9711 	    }
9712 	}
9713       else
9714 	{
9715 	  /* There isn't a current function or the body of the current
9716 	     function has been completed.  So, we are changing to the
9717 	     text section to output debugging information.  Thus, we
9718 	     need to forget that we are in the text section so that
9719 	     varasm.c will call us when text_section is selected again.  */
9720 	  gcc_assert (!cfun || !cfun->machine
9721 		      || cfun->machine->in_nsubspa == 2);
9722 	  in_section = NULL;
9723 	}
9724       output_section_asm_op ("\t.SPACE $TEXT$\n\t.NSUBSPA $CODE$");
9725       return;
9726     }
9727   output_section_asm_op ("\t.SPACE $TEXT$\n\t.SUBSPA $CODE$");
9728 }
9729 
9730 /* A get_unnamed_section callback for switching to comdat data
9731    sections.  This function is only used with SOM.  */
9732 
9733 static void
som_output_comdat_data_section_asm_op(const void * data)9734 som_output_comdat_data_section_asm_op (const void *data)
9735 {
9736   in_section = NULL;
9737   output_section_asm_op (data);
9738 }
9739 
9740 /* Implement TARGET_ASM_INIT_SECTIONS.  */
9741 
9742 static void
pa_som_asm_init_sections(void)9743 pa_som_asm_init_sections (void)
9744 {
9745   text_section
9746     = get_unnamed_section (0, som_output_text_section_asm_op, NULL);
9747 
9748   /* SOM puts readonly data in the default $LIT$ subspace when PIC code
9749      is not being generated.  */
9750   som_readonly_data_section
9751     = get_unnamed_section (0, output_section_asm_op,
9752 			   "\t.SPACE $TEXT$\n\t.SUBSPA $LIT$");
9753 
9754   /* When secondary definitions are not supported, SOM makes readonly
9755      data one-only by creating a new $LIT$ subspace in $TEXT$ with
9756      the comdat flag.  */
9757   som_one_only_readonly_data_section
9758     = get_unnamed_section (0, som_output_comdat_data_section_asm_op,
9759 			   "\t.SPACE $TEXT$\n"
9760 			   "\t.NSUBSPA $LIT$,QUAD=0,ALIGN=8,"
9761 			   "ACCESS=0x2c,SORT=16,COMDAT");
9762 
9763 
9764   /* When secondary definitions are not supported, SOM makes data one-only
9765      by creating a new $DATA$ subspace in $PRIVATE$ with the comdat flag.  */
9766   som_one_only_data_section
9767     = get_unnamed_section (SECTION_WRITE,
9768 			   som_output_comdat_data_section_asm_op,
9769 			   "\t.SPACE $PRIVATE$\n"
9770 			   "\t.NSUBSPA $DATA$,QUAD=1,ALIGN=8,"
9771 			   "ACCESS=31,SORT=24,COMDAT");
9772 
9773   if (flag_tm)
9774     som_tm_clone_table_section
9775       = get_unnamed_section (0, output_section_asm_op,
9776 			     "\t.SPACE $PRIVATE$\n\t.SUBSPA $TM_CLONE_TABLE$");
9777 
9778   /* FIXME: HPUX ld generates incorrect GOT entries for "T" fixups
9779      which reference data within the $TEXT$ space (for example constant
9780      strings in the $LIT$ subspace).
9781 
9782      The assemblers (GAS and HP as) both have problems with handling
9783      the difference of two symbols which is the other correct way to
9784      reference constant data during PIC code generation.
9785 
9786      So, there's no way to reference constant data which is in the
9787      $TEXT$ space during PIC generation.  Instead place all constant
9788      data into the $PRIVATE$ subspace (this reduces sharing, but it
9789      works correctly).  */
9790   readonly_data_section = flag_pic ? data_section : som_readonly_data_section;
9791 
9792   /* We must not have a reference to an external symbol defined in a
9793      shared library in a readonly section, else the SOM linker will
9794      complain.
9795 
9796      So, we force exception information into the data section.  */
9797   exception_section = data_section;
9798 }
9799 
9800 /* Implement TARGET_ASM_TM_CLONE_TABLE_SECTION.  */
9801 
9802 static section *
pa_som_tm_clone_table_section(void)9803 pa_som_tm_clone_table_section (void)
9804 {
9805   return som_tm_clone_table_section;
9806 }
9807 
9808 /* On hpux10, the linker will give an error if we have a reference
9809    in the read-only data section to a symbol defined in a shared
9810    library.  Therefore, expressions that might require a reloc can
9811    not be placed in the read-only data section.  */
9812 
9813 static section *
pa_select_section(tree exp,int reloc,unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)9814 pa_select_section (tree exp, int reloc,
9815 		   unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
9816 {
9817   if (TREE_CODE (exp) == VAR_DECL
9818       && TREE_READONLY (exp)
9819       && !TREE_THIS_VOLATILE (exp)
9820       && DECL_INITIAL (exp)
9821       && (DECL_INITIAL (exp) == error_mark_node
9822           || TREE_CONSTANT (DECL_INITIAL (exp)))
9823       && !reloc)
9824     {
9825       if (TARGET_SOM
9826 	  && DECL_ONE_ONLY (exp)
9827 	  && !DECL_WEAK (exp))
9828 	return som_one_only_readonly_data_section;
9829       else
9830 	return readonly_data_section;
9831     }
9832   else if (CONSTANT_CLASS_P (exp) && !reloc)
9833     return readonly_data_section;
9834   else if (TARGET_SOM
9835 	   && TREE_CODE (exp) == VAR_DECL
9836 	   && DECL_ONE_ONLY (exp)
9837 	   && !DECL_WEAK (exp))
9838     return som_one_only_data_section;
9839   else
9840     return data_section;
9841 }
9842 
9843 /* Implement pa_reloc_rw_mask.  */
9844 
9845 static int
pa_reloc_rw_mask(void)9846 pa_reloc_rw_mask (void)
9847 {
9848   /* We force (const (plus (symbol) (const_int))) to memory when the
9849      const_int doesn't fit in a 14-bit integer.  The SOM linker can't
9850      handle this construct in read-only memory and we want to avoid
9851      this for ELF.  So, we always force an RTX needing relocation to
9852      the data section.  */
9853   return 3;
9854 }
9855 
9856 static void
pa_globalize_label(FILE * stream,const char * name)9857 pa_globalize_label (FILE *stream, const char *name)
9858 {
9859   /* We only handle DATA objects here, functions are globalized in
9860      ASM_DECLARE_FUNCTION_NAME.  */
9861   if (! FUNCTION_NAME_P (name))
9862   {
9863     fputs ("\t.EXPORT ", stream);
9864     assemble_name (stream, name);
9865     fputs (",DATA\n", stream);
9866   }
9867 }
9868 
9869 /* Worker function for TARGET_STRUCT_VALUE_RTX.  */
9870 
9871 static rtx
pa_struct_value_rtx(tree fntype ATTRIBUTE_UNUSED,int incoming ATTRIBUTE_UNUSED)9872 pa_struct_value_rtx (tree fntype ATTRIBUTE_UNUSED,
9873 		     int incoming ATTRIBUTE_UNUSED)
9874 {
9875   return gen_rtx_REG (Pmode, PA_STRUCT_VALUE_REGNUM);
9876 }
9877 
9878 /* Worker function for TARGET_RETURN_IN_MEMORY.  */
9879 
9880 bool
pa_return_in_memory(const_tree type,const_tree fntype ATTRIBUTE_UNUSED)9881 pa_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
9882 {
9883   /* SOM ABI says that objects larger than 64 bits are returned in memory.
9884      PA64 ABI says that objects larger than 128 bits are returned in memory.
9885      Note, int_size_in_bytes can return -1 if the size of the object is
9886      variable or larger than the maximum value that can be expressed as
9887      a HOST_WIDE_INT.   It can also return zero for an empty type.  The
9888      simplest way to handle variable and empty types is to pass them in
9889      memory.  This avoids problems in defining the boundaries of argument
9890      slots, allocating registers, etc.  */
9891   return (int_size_in_bytes (type) > (TARGET_64BIT ? 16 : 8)
9892 	  || int_size_in_bytes (type) <= 0);
9893 }
9894 
9895 /* Structure to hold declaration and name of external symbols that are
9896    emitted by GCC.  We generate a vector of these symbols and output them
9897    at the end of the file if and only if SYMBOL_REF_REFERENCED_P is true.
9898    This avoids putting out names that are never really used.  */
9899 
9900 typedef struct GTY(()) extern_symbol
9901 {
9902   tree decl;
9903   const char *name;
9904 } extern_symbol;
9905 
9906 /* Define gc'd vector type for extern_symbol.  */
9907 
9908 /* Vector of extern_symbol pointers.  */
9909 static GTY(()) vec<extern_symbol, va_gc> *extern_symbols;
9910 
9911 #ifdef ASM_OUTPUT_EXTERNAL_REAL
9912 /* Mark DECL (name NAME) as an external reference (assembler output
9913    file FILE).  This saves the names to output at the end of the file
9914    if actually referenced.  */
9915 
9916 void
pa_hpux_asm_output_external(FILE * file,tree decl,const char * name)9917 pa_hpux_asm_output_external (FILE *file, tree decl, const char *name)
9918 {
9919   gcc_assert (file == asm_out_file);
9920   extern_symbol p = {decl, name};
9921   vec_safe_push (extern_symbols, p);
9922 }
9923 #endif
9924 
9925 /* Output text required at the end of an assembler file.
9926    This includes deferred plabels and .import directives for
9927    all external symbols that were actually referenced.  */
9928 
9929 static void
pa_file_end(void)9930 pa_file_end (void)
9931 {
9932 #ifdef ASM_OUTPUT_EXTERNAL_REAL
9933   unsigned int i;
9934   extern_symbol *p;
9935 
9936   if (!NO_DEFERRED_PROFILE_COUNTERS)
9937     output_deferred_profile_counters ();
9938 #endif
9939 
9940   output_deferred_plabels ();
9941 
9942 #ifdef ASM_OUTPUT_EXTERNAL_REAL
9943   for (i = 0; vec_safe_iterate (extern_symbols, i, &p); i++)
9944     {
9945       tree decl = p->decl;
9946 
9947       if (!TREE_ASM_WRITTEN (decl)
9948 	  && SYMBOL_REF_REFERENCED_P (XEXP (DECL_RTL (decl), 0)))
9949 	ASM_OUTPUT_EXTERNAL_REAL (asm_out_file, decl, p->name);
9950     }
9951 
9952   vec_free (extern_symbols);
9953 #endif
9954 
9955   if (NEED_INDICATE_EXEC_STACK)
9956     file_end_indicate_exec_stack ();
9957 }
9958 
9959 /* Implement TARGET_CAN_CHANGE_MODE_CLASS.  */
9960 
9961 static bool
pa_can_change_mode_class(machine_mode from,machine_mode to,reg_class_t rclass)9962 pa_can_change_mode_class (machine_mode from, machine_mode to,
9963 			  reg_class_t rclass)
9964 {
9965   if (from == to)
9966     return true;
9967 
9968   if (GET_MODE_SIZE (from) == GET_MODE_SIZE (to))
9969     return true;
9970 
9971   /* Reject changes to/from modes with zero size.  */
9972   if (!GET_MODE_SIZE (from) || !GET_MODE_SIZE (to))
9973     return false;
9974 
9975   /* Reject changes to/from complex and vector modes.  */
9976   if (COMPLEX_MODE_P (from) || VECTOR_MODE_P (from)
9977       || COMPLEX_MODE_P (to) || VECTOR_MODE_P (to))
9978     return false;
9979 
9980   /* There is no way to load QImode or HImode values directly from memory
9981      to a FP register.  SImode loads to the FP registers are not zero
9982      extended.  On the 64-bit target, this conflicts with the definition
9983      of LOAD_EXTEND_OP.  Thus, we reject all mode changes in the FP registers
9984      except for DImode to SImode on the 64-bit target.  It is handled by
9985      register renaming in pa_print_operand.  */
9986   if (MAYBE_FP_REG_CLASS_P (rclass))
9987     return TARGET_64BIT && from == DImode && to == SImode;
9988 
9989   /* TARGET_HARD_REGNO_MODE_OK places modes with sizes larger than a word
9990      in specific sets of registers.  Thus, we cannot allow changing
9991      to a larger mode when it's larger than a word.  */
9992   if (GET_MODE_SIZE (to) > UNITS_PER_WORD
9993       && GET_MODE_SIZE (to) > GET_MODE_SIZE (from))
9994     return false;
9995 
9996   return true;
9997 }
9998 
9999 /* Implement TARGET_MODES_TIEABLE_P.
10000 
10001    We should return FALSE for QImode and HImode because these modes
10002    are not ok in the floating-point registers.  However, this prevents
10003    tieing these modes to SImode and DImode in the general registers.
10004    So, this isn't a good idea.  We rely on TARGET_HARD_REGNO_MODE_OK and
10005    TARGET_CAN_CHANGE_MODE_CLASS to prevent these modes from being used
10006    in the floating-point registers.  */
10007 
10008 static bool
pa_modes_tieable_p(machine_mode mode1,machine_mode mode2)10009 pa_modes_tieable_p (machine_mode mode1, machine_mode mode2)
10010 {
10011   /* Don't tie modes in different classes.  */
10012   if (GET_MODE_CLASS (mode1) != GET_MODE_CLASS (mode2))
10013     return false;
10014 
10015   return true;
10016 }
10017 
10018 
10019 /* Length in units of the trampoline instruction code.  */
10020 
10021 #define TRAMPOLINE_CODE_SIZE (TARGET_64BIT ? 24 : (TARGET_PA_20 ? 36 : 48))
10022 
10023 
10024 /* Output assembler code for a block containing the constant parts
10025    of a trampoline, leaving space for the variable parts.\
10026 
10027    The trampoline sets the static chain pointer to STATIC_CHAIN_REGNUM
10028    and then branches to the specified routine.
10029 
10030    This code template is copied from text segment to stack location
10031    and then patched with pa_trampoline_init to contain valid values,
10032    and then entered as a subroutine.
10033 
10034    It is best to keep this as small as possible to avoid having to
10035    flush multiple lines in the cache.  */
10036 
10037 static void
pa_asm_trampoline_template(FILE * f)10038 pa_asm_trampoline_template (FILE *f)
10039 {
10040   if (!TARGET_64BIT)
10041     {
10042       if (TARGET_PA_20)
10043 	{
10044 	  fputs ("\tmfia	%r20\n", f);
10045 	  fputs ("\tldw		48(%r20),%r22\n", f);
10046 	  fputs ("\tcopy	%r22,%r21\n", f);
10047 	  fputs ("\tbb,>=,n	%r22,30,.+16\n", f);
10048 	  fputs ("\tdepwi	0,31,2,%r22\n", f);
10049 	  fputs ("\tldw		0(%r22),%r21\n", f);
10050 	  fputs ("\tldw		4(%r22),%r19\n", f);
10051 	  fputs ("\tbve		(%r21)\n", f);
10052 	  fputs ("\tldw		52(%r1),%r29\n", f);
10053 	  fputs ("\t.word	0\n", f);
10054 	  fputs ("\t.word	0\n", f);
10055 	  fputs ("\t.word	0\n", f);
10056 	}
10057       else
10058 	{
10059 	  if (ASSEMBLER_DIALECT == 0)
10060 	    {
10061 	      fputs ("\tbl	.+8,%r20\n", f);
10062 	      fputs ("\tdepi	0,31,2,%r20\n", f);
10063 	    }
10064 	  else
10065 	    {
10066 	      fputs ("\tb,l	.+8,%r20\n", f);
10067 	      fputs ("\tdepwi	0,31,2,%r20\n", f);
10068 	    }
10069 	  fputs ("\tldw		40(%r20),%r22\n", f);
10070 	  fputs ("\tcopy	%r22,%r21\n", f);
10071 	  fputs ("\tbb,>=,n	%r22,30,.+16\n", f);
10072 	  if (ASSEMBLER_DIALECT == 0)
10073 	    fputs ("\tdepi	0,31,2,%r22\n", f);
10074 	  else
10075 	    fputs ("\tdepwi	0,31,2,%r22\n", f);
10076 	  fputs ("\tldw		0(%r22),%r21\n", f);
10077 	  fputs ("\tldw		4(%r22),%r19\n", f);
10078 	  fputs ("\tldsid	(%r21),%r1\n", f);
10079 	  fputs ("\tmtsp	%r1,%sr0\n", f);
10080 	  fputs ("\tbe		0(%sr0,%r21)\n", f);
10081 	  fputs ("\tldw		44(%r20),%r29\n", f);
10082 	}
10083       fputs ("\t.word	0\n", f);
10084       fputs ("\t.word	0\n", f);
10085       fputs ("\t.word	0\n", f);
10086       fputs ("\t.word	0\n", f);
10087     }
10088   else
10089     {
10090       fputs ("\t.dword 0\n", f);
10091       fputs ("\t.dword 0\n", f);
10092       fputs ("\t.dword 0\n", f);
10093       fputs ("\t.dword 0\n", f);
10094       fputs ("\tmfia	%r31\n", f);
10095       fputs ("\tldd	24(%r31),%r27\n", f);
10096       fputs ("\tldd	32(%r31),%r31\n", f);
10097       fputs ("\tldd	16(%r27),%r1\n", f);
10098       fputs ("\tbve	(%r1)\n", f);
10099       fputs ("\tldd	24(%r27),%r27\n", f);
10100       fputs ("\t.dword 0  ; fptr\n", f);
10101       fputs ("\t.dword 0  ; static link\n", f);
10102     }
10103 }
10104 
10105 /* Emit RTL insns to initialize the variable parts of a trampoline.
10106    FNADDR is an RTX for the address of the function's pure code.
10107    CXT is an RTX for the static chain value for the function.
10108 
10109    Move the function address to the trampoline template at offset 48.
10110    Move the static chain value to trampoline template at offset 52.
10111    Move the trampoline address to trampoline template at offset 56.
10112    Move r19 to trampoline template at offset 60.  The latter two
10113    words create a plabel for the indirect call to the trampoline.
10114 
10115    A similar sequence is used for the 64-bit port but the plabel is
10116    at the beginning of the trampoline.
10117 
10118    Finally, the cache entries for the trampoline code are flushed.
10119    This is necessary to ensure that the trampoline instruction sequence
10120    is written to memory prior to any attempts at prefetching the code
10121    sequence.  */
10122 
10123 static void
pa_trampoline_init(rtx m_tramp,tree fndecl,rtx chain_value)10124 pa_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
10125 {
10126   rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
10127   rtx start_addr = gen_reg_rtx (Pmode);
10128   rtx end_addr = gen_reg_rtx (Pmode);
10129   rtx line_length = gen_reg_rtx (Pmode);
10130   rtx r_tramp, tmp;
10131 
10132   emit_block_move (m_tramp, assemble_trampoline_template (),
10133 		   GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
10134   r_tramp = force_reg (Pmode, XEXP (m_tramp, 0));
10135 
10136   if (!TARGET_64BIT)
10137     {
10138       tmp = adjust_address (m_tramp, Pmode, 48);
10139       emit_move_insn (tmp, fnaddr);
10140       tmp = adjust_address (m_tramp, Pmode, 52);
10141       emit_move_insn (tmp, chain_value);
10142 
10143       /* Create a fat pointer for the trampoline.  */
10144       tmp = adjust_address (m_tramp, Pmode, 56);
10145       emit_move_insn (tmp, r_tramp);
10146       tmp = adjust_address (m_tramp, Pmode, 60);
10147       emit_move_insn (tmp, gen_rtx_REG (Pmode, 19));
10148 
10149       /* fdc and fic only use registers for the address to flush,
10150 	 they do not accept integer displacements.  We align the
10151 	 start and end addresses to the beginning of their respective
10152 	 cache lines to minimize the number of lines flushed.  */
10153       emit_insn (gen_andsi3 (start_addr, r_tramp,
10154 			     GEN_INT (-MIN_CACHELINE_SIZE)));
10155       tmp = force_reg (Pmode, plus_constant (Pmode, r_tramp,
10156 					     TRAMPOLINE_CODE_SIZE-1));
10157       emit_insn (gen_andsi3 (end_addr, tmp,
10158 			     GEN_INT (-MIN_CACHELINE_SIZE)));
10159       emit_move_insn (line_length, GEN_INT (MIN_CACHELINE_SIZE));
10160       emit_insn (gen_dcacheflushsi (start_addr, end_addr, line_length));
10161       emit_insn (gen_icacheflushsi (start_addr, end_addr, line_length,
10162 				    gen_reg_rtx (Pmode),
10163 				    gen_reg_rtx (Pmode)));
10164     }
10165   else
10166     {
10167       tmp = adjust_address (m_tramp, Pmode, 56);
10168       emit_move_insn (tmp, fnaddr);
10169       tmp = adjust_address (m_tramp, Pmode, 64);
10170       emit_move_insn (tmp, chain_value);
10171 
10172       /* Create a fat pointer for the trampoline.  */
10173       tmp = adjust_address (m_tramp, Pmode, 16);
10174       emit_move_insn (tmp, force_reg (Pmode, plus_constant (Pmode,
10175 							    r_tramp, 32)));
10176       tmp = adjust_address (m_tramp, Pmode, 24);
10177       emit_move_insn (tmp, gen_rtx_REG (Pmode, 27));
10178 
10179       /* fdc and fic only use registers for the address to flush,
10180 	 they do not accept integer displacements.  We align the
10181 	 start and end addresses to the beginning of their respective
10182 	 cache lines to minimize the number of lines flushed.  */
10183       tmp = force_reg (Pmode, plus_constant (Pmode, r_tramp, 32));
10184       emit_insn (gen_anddi3 (start_addr, tmp,
10185 			     GEN_INT (-MIN_CACHELINE_SIZE)));
10186       tmp = force_reg (Pmode, plus_constant (Pmode, tmp,
10187 					     TRAMPOLINE_CODE_SIZE - 1));
10188       emit_insn (gen_anddi3 (end_addr, tmp,
10189 			     GEN_INT (-MIN_CACHELINE_SIZE)));
10190       emit_move_insn (line_length, GEN_INT (MIN_CACHELINE_SIZE));
10191       emit_insn (gen_dcacheflushdi (start_addr, end_addr, line_length));
10192       emit_insn (gen_icacheflushdi (start_addr, end_addr, line_length,
10193 				    gen_reg_rtx (Pmode),
10194 				    gen_reg_rtx (Pmode)));
10195     }
10196 
10197 #ifdef HAVE_ENABLE_EXECUTE_STACK
10198   emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
10199 		     LCT_NORMAL, VOIDmode, XEXP (m_tramp, 0), Pmode);
10200 #endif
10201 }
10202 
10203 /* Perform any machine-specific adjustment in the address of the trampoline.
10204    ADDR contains the address that was passed to pa_trampoline_init.
10205    Adjust the trampoline address to point to the plabel at offset 56.  */
10206 
10207 static rtx
pa_trampoline_adjust_address(rtx addr)10208 pa_trampoline_adjust_address (rtx addr)
10209 {
10210   if (!TARGET_64BIT)
10211     addr = memory_address (Pmode, plus_constant (Pmode, addr, 58));
10212   return addr;
10213 }
10214 
10215 static rtx
pa_delegitimize_address(rtx orig_x)10216 pa_delegitimize_address (rtx orig_x)
10217 {
10218   rtx x = delegitimize_mem_from_attrs (orig_x);
10219 
10220   if (GET_CODE (x) == LO_SUM
10221       && GET_CODE (XEXP (x, 1)) == UNSPEC
10222       && XINT (XEXP (x, 1), 1) == UNSPEC_DLTIND14R)
10223     return gen_const_mem (Pmode, XVECEXP (XEXP (x, 1), 0, 0));
10224   return x;
10225 }
10226 
10227 static rtx
pa_internal_arg_pointer(void)10228 pa_internal_arg_pointer (void)
10229 {
10230   /* The argument pointer and the hard frame pointer are the same in
10231      the 32-bit runtime, so we don't need a copy.  */
10232   if (TARGET_64BIT)
10233     return copy_to_reg (virtual_incoming_args_rtx);
10234   else
10235     return virtual_incoming_args_rtx;
10236 }
10237 
10238 /* Given FROM and TO register numbers, say whether this elimination is allowed.
10239    Frame pointer elimination is automatically handled.  */
10240 
10241 static bool
pa_can_eliminate(const int from,const int to)10242 pa_can_eliminate (const int from, const int to)
10243 {
10244   /* The argument cannot be eliminated in the 64-bit runtime.  */
10245   if (TARGET_64BIT && from == ARG_POINTER_REGNUM)
10246     return false;
10247 
10248   return (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
10249           ? ! frame_pointer_needed
10250           : true);
10251 }
10252 
10253 /* Define the offset between two registers, FROM to be eliminated and its
10254    replacement TO, at the start of a routine.  */
10255 HOST_WIDE_INT
pa_initial_elimination_offset(int from,int to)10256 pa_initial_elimination_offset (int from, int to)
10257 {
10258   HOST_WIDE_INT offset;
10259 
10260   if ((from == HARD_FRAME_POINTER_REGNUM || from == FRAME_POINTER_REGNUM)
10261       && to == STACK_POINTER_REGNUM)
10262     offset = -pa_compute_frame_size (get_frame_size (), 0);
10263   else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
10264     offset = 0;
10265   else
10266     gcc_unreachable ();
10267 
10268   return offset;
10269 }
10270 
10271 static void
pa_conditional_register_usage(void)10272 pa_conditional_register_usage (void)
10273 {
10274   int i;
10275 
10276   if (!TARGET_64BIT && !TARGET_PA_11)
10277     {
10278       for (i = 56; i <= FP_REG_LAST; i++)
10279 	fixed_regs[i] = call_used_regs[i] = 1;
10280       for (i = 33; i < 56; i += 2)
10281 	fixed_regs[i] = call_used_regs[i] = 1;
10282     }
10283   if (TARGET_DISABLE_FPREGS || TARGET_SOFT_FLOAT)
10284     {
10285       for (i = FP_REG_FIRST; i <= FP_REG_LAST; i++)
10286 	fixed_regs[i] = call_used_regs[i] = 1;
10287     }
10288   if (flag_pic)
10289     fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
10290 }
10291 
10292 /* Target hook for c_mode_for_suffix.  */
10293 
10294 static machine_mode
pa_c_mode_for_suffix(char suffix)10295 pa_c_mode_for_suffix (char suffix)
10296 {
10297   if (HPUX_LONG_DOUBLE_LIBRARY)
10298     {
10299       if (suffix == 'q')
10300 	return TFmode;
10301     }
10302 
10303   return VOIDmode;
10304 }
10305 
10306 /* Target hook for function_section.  */
10307 
10308 static section *
pa_function_section(tree decl,enum node_frequency freq,bool startup,bool exit)10309 pa_function_section (tree decl, enum node_frequency freq,
10310 		     bool startup, bool exit)
10311 {
10312   /* Put functions in text section if target doesn't have named sections.  */
10313   if (!targetm_common.have_named_sections)
10314     return text_section;
10315 
10316   /* Force nested functions into the same section as the containing
10317      function.  */
10318   if (decl
10319       && DECL_SECTION_NAME (decl) == NULL
10320       && DECL_CONTEXT (decl) != NULL_TREE
10321       && TREE_CODE (DECL_CONTEXT (decl)) == FUNCTION_DECL
10322       && DECL_SECTION_NAME (DECL_CONTEXT (decl)) == NULL)
10323     return function_section (DECL_CONTEXT (decl));
10324 
10325   /* Otherwise, use the default function section.  */
10326   return default_function_section (decl, freq, startup, exit);
10327 }
10328 
10329 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
10330 
10331    In 64-bit mode, we reject CONST_DOUBLES.  We also reject CONST_INTS
10332    that need more than three instructions to load prior to reload.  This
10333    limit is somewhat arbitrary.  It takes three instructions to load a
10334    CONST_INT from memory but two are memory accesses.  It may be better
10335    to increase the allowed range for CONST_INTS.  We may also be able
10336    to handle CONST_DOUBLES.  */
10337 
10338 static bool
pa_legitimate_constant_p(machine_mode mode,rtx x)10339 pa_legitimate_constant_p (machine_mode mode, rtx x)
10340 {
10341   if (GET_MODE_CLASS (mode) == MODE_FLOAT && x != CONST0_RTX (mode))
10342     return false;
10343 
10344   if (!NEW_HP_ASSEMBLER && !TARGET_GAS && GET_CODE (x) == LABEL_REF)
10345     return false;
10346 
10347   /* TLS_MODEL_GLOBAL_DYNAMIC and TLS_MODEL_LOCAL_DYNAMIC are not
10348      legitimate constants.  The other variants can't be handled by
10349      the move patterns after reload starts.  */
10350   if (tls_referenced_p (x))
10351     return false;
10352 
10353   if (TARGET_64BIT && GET_CODE (x) == CONST_DOUBLE)
10354     return false;
10355 
10356   if (TARGET_64BIT
10357       && HOST_BITS_PER_WIDE_INT > 32
10358       && GET_CODE (x) == CONST_INT
10359       && !reload_in_progress
10360       && !reload_completed
10361       && !LEGITIMATE_64BIT_CONST_INT_P (INTVAL (x))
10362       && !pa_cint_ok_for_move (UINTVAL (x)))
10363     return false;
10364 
10365   if (function_label_operand (x, mode))
10366     return false;
10367 
10368   return true;
10369 }
10370 
10371 /* Implement TARGET_SECTION_TYPE_FLAGS.  */
10372 
10373 static unsigned int
pa_section_type_flags(tree decl,const char * name,int reloc)10374 pa_section_type_flags (tree decl, const char *name, int reloc)
10375 {
10376   unsigned int flags;
10377 
10378   flags = default_section_type_flags (decl, name, reloc);
10379 
10380   /* Function labels are placed in the constant pool.  This can
10381      cause a section conflict if decls are put in ".data.rel.ro"
10382      or ".data.rel.ro.local" using the __attribute__ construct.  */
10383   if (strcmp (name, ".data.rel.ro") == 0
10384       || strcmp (name, ".data.rel.ro.local") == 0)
10385     flags |= SECTION_WRITE | SECTION_RELRO;
10386 
10387   return flags;
10388 }
10389 
10390 /* pa_legitimate_address_p recognizes an RTL expression that is a
10391    valid memory address for an instruction.  The MODE argument is the
10392    machine mode for the MEM expression that wants to use this address.
10393 
10394    On HP PA-RISC, the legitimate address forms are REG+SMALLINT,
10395    REG+REG, and REG+(REG*SCALE).  The indexed address forms are only
10396    available with floating point loads and stores, and integer loads.
10397    We get better code by allowing indexed addresses in the initial
10398    RTL generation.
10399 
10400    The acceptance of indexed addresses as legitimate implies that we
10401    must provide patterns for doing indexed integer stores, or the move
10402    expanders must force the address of an indexed store to a register.
10403    We have adopted the latter approach.
10404 
10405    Another function of pa_legitimate_address_p is to ensure that
10406    the base register is a valid pointer for indexed instructions.
10407    On targets that have non-equivalent space registers, we have to
10408    know at the time of assembler output which register in a REG+REG
10409    pair is the base register.  The REG_POINTER flag is sometimes lost
10410    in reload and the following passes, so it can't be relied on during
10411    code generation.  Thus, we either have to canonicalize the order
10412    of the registers in REG+REG indexed addresses, or treat REG+REG
10413    addresses separately and provide patterns for both permutations.
10414 
10415    The latter approach requires several hundred additional lines of
10416    code in pa.md.  The downside to canonicalizing is that a PLUS
10417    in the wrong order can't combine to form to make a scaled indexed
10418    memory operand.  As we won't need to canonicalize the operands if
10419    the REG_POINTER lossage can be fixed, it seems better canonicalize.
10420 
10421    We initially break out scaled indexed addresses in canonical order
10422    in pa_emit_move_sequence.  LEGITIMIZE_ADDRESS also canonicalizes
10423    scaled indexed addresses during RTL generation.  However, fold_rtx
10424    has its own opinion on how the operands of a PLUS should be ordered.
10425    If one of the operands is equivalent to a constant, it will make
10426    that operand the second operand.  As the base register is likely to
10427    be equivalent to a SYMBOL_REF, we have made it the second operand.
10428 
10429    pa_legitimate_address_p accepts REG+REG as legitimate when the
10430    operands are in the order INDEX+BASE on targets with non-equivalent
10431    space registers, and in any order on targets with equivalent space
10432    registers.  It accepts both MULT+BASE and BASE+MULT for scaled indexing.
10433 
10434    We treat a SYMBOL_REF as legitimate if it is part of the current
10435    function's constant-pool, because such addresses can actually be
10436    output as REG+SMALLINT.  */
10437 
10438 static bool
pa_legitimate_address_p(machine_mode mode,rtx x,bool strict)10439 pa_legitimate_address_p (machine_mode mode, rtx x, bool strict)
10440 {
10441   if ((REG_P (x)
10442        && (strict ? STRICT_REG_OK_FOR_BASE_P (x)
10443 		  : REG_OK_FOR_BASE_P (x)))
10444       || ((GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC
10445 	   || GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC)
10446 	  && REG_P (XEXP (x, 0))
10447 	  && (strict ? STRICT_REG_OK_FOR_BASE_P (XEXP (x, 0))
10448 		     : REG_OK_FOR_BASE_P (XEXP (x, 0)))))
10449     return true;
10450 
10451   if (GET_CODE (x) == PLUS)
10452     {
10453       rtx base, index;
10454 
10455       /* For REG+REG, the base register should be in XEXP (x, 1),
10456 	 so check it first.  */
10457       if (REG_P (XEXP (x, 1))
10458 	  && (strict ? STRICT_REG_OK_FOR_BASE_P (XEXP (x, 1))
10459 		     : REG_OK_FOR_BASE_P (XEXP (x, 1))))
10460 	base = XEXP (x, 1), index = XEXP (x, 0);
10461       else if (REG_P (XEXP (x, 0))
10462 	       && (strict ? STRICT_REG_OK_FOR_BASE_P (XEXP (x, 0))
10463 			  : REG_OK_FOR_BASE_P (XEXP (x, 0))))
10464 	base = XEXP (x, 0), index = XEXP (x, 1);
10465       else
10466 	return false;
10467 
10468       if (GET_CODE (index) == CONST_INT)
10469 	{
10470 	  if (INT_5_BITS (index))
10471 	    return true;
10472 
10473 	  /* When INT14_OK_STRICT is false, a secondary reload is needed
10474 	     to adjust the displacement of SImode and DImode floating point
10475 	     instructions but this may fail when the register also needs
10476 	     reloading.  So, we return false when STRICT is true.  We
10477 	     also reject long displacements for float mode addresses since
10478 	     the majority of accesses will use floating point instructions
10479 	     that don't support 14-bit offsets.  */
10480 	  if (!INT14_OK_STRICT
10481 	      && (strict || !(reload_in_progress || reload_completed))
10482 	      && mode != QImode
10483 	      && mode != HImode)
10484 	    return false;
10485 
10486 	  return base14_operand (index, mode);
10487 	}
10488 
10489       if (!TARGET_DISABLE_INDEXING
10490 	  /* Only accept the "canonical" INDEX+BASE operand order
10491 	     on targets with non-equivalent space registers.  */
10492 	  && (TARGET_NO_SPACE_REGS
10493 	      ? REG_P (index)
10494 	      : (base == XEXP (x, 1) && REG_P (index)
10495 		 && (reload_completed
10496 		     || (reload_in_progress && HARD_REGISTER_P (base))
10497 		     || REG_POINTER (base))
10498 		 && (reload_completed
10499 		     || (reload_in_progress && HARD_REGISTER_P (index))
10500 		     || !REG_POINTER (index))))
10501 	  && MODE_OK_FOR_UNSCALED_INDEXING_P (mode)
10502 	  && (strict ? STRICT_REG_OK_FOR_INDEX_P (index)
10503 		     : REG_OK_FOR_INDEX_P (index))
10504 	  && borx_reg_operand (base, Pmode)
10505 	  && borx_reg_operand (index, Pmode))
10506 	return true;
10507 
10508       if (!TARGET_DISABLE_INDEXING
10509 	  && GET_CODE (index) == MULT
10510 	  /* Only accept base operands with the REG_POINTER flag prior to
10511 	     reload on targets with non-equivalent space registers.  */
10512 	  && (TARGET_NO_SPACE_REGS
10513 	      || (base == XEXP (x, 1)
10514 		  && (reload_completed
10515 		      || (reload_in_progress && HARD_REGISTER_P (base))
10516 		      || REG_POINTER (base))))
10517 	  && REG_P (XEXP (index, 0))
10518 	  && GET_MODE (XEXP (index, 0)) == Pmode
10519 	  && MODE_OK_FOR_SCALED_INDEXING_P (mode)
10520 	  && (strict ? STRICT_REG_OK_FOR_INDEX_P (XEXP (index, 0))
10521 		     : REG_OK_FOR_INDEX_P (XEXP (index, 0)))
10522 	  && GET_CODE (XEXP (index, 1)) == CONST_INT
10523 	  && INTVAL (XEXP (index, 1))
10524 	     == (HOST_WIDE_INT) GET_MODE_SIZE (mode)
10525 	  && borx_reg_operand (base, Pmode))
10526 	return true;
10527 
10528       return false;
10529     }
10530 
10531   if (GET_CODE (x) == LO_SUM)
10532     {
10533       rtx y = XEXP (x, 0);
10534 
10535       if (GET_CODE (y) == SUBREG)
10536 	y = SUBREG_REG (y);
10537 
10538       if (REG_P (y)
10539 	  && (strict ? STRICT_REG_OK_FOR_BASE_P (y)
10540 		     : REG_OK_FOR_BASE_P (y)))
10541 	{
10542 	  /* Needed for -fPIC */
10543 	  if (mode == Pmode
10544 	      && GET_CODE (XEXP (x, 1)) == UNSPEC)
10545 	    return true;
10546 
10547 	  if (!INT14_OK_STRICT
10548 	      && (strict || !(reload_in_progress || reload_completed))
10549 	      && mode != QImode
10550 	      && mode != HImode)
10551 	    return false;
10552 
10553 	  if (CONSTANT_P (XEXP (x, 1)))
10554 	    return true;
10555 	}
10556       return false;
10557     }
10558 
10559   if (GET_CODE (x) == CONST_INT && INT_5_BITS (x))
10560     return true;
10561 
10562   return false;
10563 }
10564 
10565 /* Look for machine dependent ways to make the invalid address AD a
10566    valid address.
10567 
10568    For the PA, transform:
10569 
10570         memory(X + <large int>)
10571 
10572    into:
10573 
10574         if (<large int> & mask) >= 16
10575           Y = (<large int> & ~mask) + mask + 1  Round up.
10576         else
10577           Y = (<large int> & ~mask)             Round down.
10578         Z = X + Y
10579         memory (Z + (<large int> - Y));
10580 
10581    This makes reload inheritance and reload_cse work better since Z
10582    can be reused.
10583 
10584    There may be more opportunities to improve code with this hook.  */
10585 
10586 rtx
pa_legitimize_reload_address(rtx ad,machine_mode mode,int opnum,int type,int ind_levels ATTRIBUTE_UNUSED)10587 pa_legitimize_reload_address (rtx ad, machine_mode mode,
10588 			      int opnum, int type,
10589 			      int ind_levels ATTRIBUTE_UNUSED)
10590 {
10591   long offset, newoffset, mask;
10592   rtx new_rtx, temp = NULL_RTX;
10593 
10594   mask = (GET_MODE_CLASS (mode) == MODE_FLOAT
10595 	  && !INT14_OK_STRICT ? 0x1f : 0x3fff);
10596 
10597   if (optimize && GET_CODE (ad) == PLUS)
10598     temp = simplify_binary_operation (PLUS, Pmode,
10599 				      XEXP (ad, 0), XEXP (ad, 1));
10600 
10601   new_rtx = temp ? temp : ad;
10602 
10603   if (optimize
10604       && GET_CODE (new_rtx) == PLUS
10605       && GET_CODE (XEXP (new_rtx, 0)) == REG
10606       && GET_CODE (XEXP (new_rtx, 1)) == CONST_INT)
10607     {
10608       offset = INTVAL (XEXP ((new_rtx), 1));
10609 
10610       /* Choose rounding direction.  Round up if we are >= halfway.  */
10611       if ((offset & mask) >= ((mask + 1) / 2))
10612 	newoffset = (offset & ~mask) + mask + 1;
10613       else
10614 	newoffset = offset & ~mask;
10615 
10616       /* Ensure that long displacements are aligned.  */
10617       if (mask == 0x3fff
10618 	  && (GET_MODE_CLASS (mode) == MODE_FLOAT
10619 	      || (TARGET_64BIT && (mode) == DImode)))
10620 	newoffset &= ~(GET_MODE_SIZE (mode) - 1);
10621 
10622       if (newoffset != 0 && VAL_14_BITS_P (newoffset))
10623 	{
10624 	  temp = gen_rtx_PLUS (Pmode, XEXP (new_rtx, 0),
10625 			       GEN_INT (newoffset));
10626 	  ad = gen_rtx_PLUS (Pmode, temp, GEN_INT (offset - newoffset));
10627 	  push_reload (XEXP (ad, 0), 0, &XEXP (ad, 0), 0,
10628 		       BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
10629 		       opnum, (enum reload_type) type);
10630 	  return ad;
10631 	}
10632     }
10633 
10634   return NULL_RTX;
10635 }
10636 
10637 /* Output address vector.  */
10638 
10639 void
pa_output_addr_vec(rtx lab,rtx body)10640 pa_output_addr_vec (rtx lab, rtx body)
10641 {
10642   int idx, vlen = XVECLEN (body, 0);
10643 
10644   if (!TARGET_SOM)
10645     fputs ("\t.align 4\n", asm_out_file);
10646   targetm.asm_out.internal_label (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
10647   if (TARGET_GAS)
10648     fputs ("\t.begin_brtab\n", asm_out_file);
10649   for (idx = 0; idx < vlen; idx++)
10650     {
10651       ASM_OUTPUT_ADDR_VEC_ELT
10652 	(asm_out_file, CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 0, idx), 0)));
10653     }
10654   if (TARGET_GAS)
10655     fputs ("\t.end_brtab\n", asm_out_file);
10656 }
10657 
10658 /* Output address difference vector.  */
10659 
10660 void
pa_output_addr_diff_vec(rtx lab,rtx body)10661 pa_output_addr_diff_vec (rtx lab, rtx body)
10662 {
10663   rtx base = XEXP (XEXP (body, 0), 0);
10664   int idx, vlen = XVECLEN (body, 1);
10665 
10666   targetm.asm_out.internal_label (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
10667   if (TARGET_GAS)
10668     fputs ("\t.begin_brtab\n", asm_out_file);
10669   for (idx = 0; idx < vlen; idx++)
10670     {
10671       ASM_OUTPUT_ADDR_DIFF_ELT
10672 	(asm_out_file,
10673 	 body,
10674 	 CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 1, idx), 0)),
10675 	 CODE_LABEL_NUMBER (base));
10676     }
10677   if (TARGET_GAS)
10678     fputs ("\t.end_brtab\n", asm_out_file);
10679 }
10680 
10681 /* This is a helper function for the other atomic operations.  This function
10682    emits a loop that contains SEQ that iterates until a compare-and-swap
10683    operation at the end succeeds.  MEM is the memory to be modified.  SEQ is
10684    a set of instructions that takes a value from OLD_REG as an input and
10685    produces a value in NEW_REG as an output.  Before SEQ, OLD_REG will be
10686    set to the current contents of MEM.  After SEQ, a compare-and-swap will
10687    attempt to update MEM with NEW_REG.  The function returns true when the
10688    loop was generated successfully.  */
10689 
10690 static bool
pa_expand_compare_and_swap_loop(rtx mem,rtx old_reg,rtx new_reg,rtx seq)10691 pa_expand_compare_and_swap_loop (rtx mem, rtx old_reg, rtx new_reg, rtx seq)
10692 {
10693   machine_mode mode = GET_MODE (mem);
10694   rtx_code_label *label;
10695   rtx cmp_reg, success, oldval;
10696 
10697   /* The loop we want to generate looks like
10698 
10699         cmp_reg = mem;
10700       label:
10701         old_reg = cmp_reg;
10702         seq;
10703         (success, cmp_reg) = compare-and-swap(mem, old_reg, new_reg)
10704         if (success)
10705           goto label;
10706 
10707      Note that we only do the plain load from memory once.  Subsequent
10708      iterations use the value loaded by the compare-and-swap pattern.  */
10709 
10710   label = gen_label_rtx ();
10711   cmp_reg = gen_reg_rtx (mode);
10712 
10713   emit_move_insn (cmp_reg, mem);
10714   emit_label (label);
10715   emit_move_insn (old_reg, cmp_reg);
10716   if (seq)
10717     emit_insn (seq);
10718 
10719   success = NULL_RTX;
10720   oldval = cmp_reg;
10721   if (!expand_atomic_compare_and_swap (&success, &oldval, mem, old_reg,
10722                                        new_reg, false, MEMMODEL_SYNC_SEQ_CST,
10723                                        MEMMODEL_RELAXED))
10724     return false;
10725 
10726   if (oldval != cmp_reg)
10727     emit_move_insn (cmp_reg, oldval);
10728 
10729   /* Mark this jump predicted not taken.  */
10730   emit_cmp_and_jump_insns (success, const0_rtx, EQ, const0_rtx,
10731                            GET_MODE (success), 1, label,
10732 			   profile_probability::guessed_never ());
10733   return true;
10734 }
10735 
10736 /* This function tries to implement an atomic exchange operation using a
10737    compare_and_swap loop. VAL is written to *MEM.  The previous contents of
10738    *MEM are returned, using TARGET if possible.  No memory model is required
10739    since a compare_and_swap loop is seq-cst.  */
10740 
10741 rtx
pa_maybe_emit_compare_and_swap_exchange_loop(rtx target,rtx mem,rtx val)10742 pa_maybe_emit_compare_and_swap_exchange_loop (rtx target, rtx mem, rtx val)
10743 {
10744   machine_mode mode = GET_MODE (mem);
10745 
10746   if (can_compare_and_swap_p (mode, true))
10747     {
10748       if (!target || !register_operand (target, mode))
10749         target = gen_reg_rtx (mode);
10750       if (pa_expand_compare_and_swap_loop (mem, target, val, NULL_RTX))
10751         return target;
10752     }
10753 
10754   return NULL_RTX;
10755 }
10756 
10757 /* Implement TARGET_CALLEE_COPIES.  The callee is responsible for copying
10758    arguments passed by hidden reference in the 32-bit HP runtime.  Users
10759    can override this behavior for better compatibility with openmp at the
10760    risk of library incompatibilities.  Arguments are always passed by value
10761    in the 64-bit HP runtime.  */
10762 
10763 static bool
pa_callee_copies(cumulative_args_t cum ATTRIBUTE_UNUSED,machine_mode mode ATTRIBUTE_UNUSED,const_tree type ATTRIBUTE_UNUSED,bool named ATTRIBUTE_UNUSED)10764 pa_callee_copies (cumulative_args_t cum ATTRIBUTE_UNUSED,
10765 		  machine_mode mode ATTRIBUTE_UNUSED,
10766 		  const_tree type ATTRIBUTE_UNUSED,
10767 		  bool named ATTRIBUTE_UNUSED)
10768 {
10769   return !TARGET_CALLER_COPIES;
10770 }
10771 
10772 /* Implement TARGET_HARD_REGNO_NREGS.  */
10773 
10774 static unsigned int
pa_hard_regno_nregs(unsigned int regno ATTRIBUTE_UNUSED,machine_mode mode)10775 pa_hard_regno_nregs (unsigned int regno ATTRIBUTE_UNUSED, machine_mode mode)
10776 {
10777   return PA_HARD_REGNO_NREGS (regno, mode);
10778 }
10779 
10780 /* Implement TARGET_HARD_REGNO_MODE_OK.  */
10781 
10782 static bool
pa_hard_regno_mode_ok(unsigned int regno,machine_mode mode)10783 pa_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
10784 {
10785   return PA_HARD_REGNO_MODE_OK (regno, mode);
10786 }
10787 
10788 /* Implement TARGET_STARTING_FRAME_OFFSET.
10789 
10790    On the 32-bit ports, we reserve one slot for the previous frame
10791    pointer and one fill slot.  The fill slot is for compatibility
10792    with HP compiled programs.  On the 64-bit ports, we reserve one
10793    slot for the previous frame pointer.  */
10794 
10795 static HOST_WIDE_INT
pa_starting_frame_offset(void)10796 pa_starting_frame_offset (void)
10797 {
10798   return 8;
10799 }
10800 
10801 /* Figure out the size in words of the function argument.  The size
10802    returned by this function should always be greater than zero because
10803    we pass variable and zero sized objects by reference.  */
10804 
10805 HOST_WIDE_INT
pa_function_arg_size(machine_mode mode,const_tree type)10806 pa_function_arg_size (machine_mode mode, const_tree type)
10807 {
10808   HOST_WIDE_INT size;
10809 
10810   size = mode != BLKmode ? GET_MODE_SIZE (mode) : int_size_in_bytes (type);
10811   return CEIL (size, UNITS_PER_WORD);
10812 }
10813 
10814 #include "gt-pa.h"
10815