1 /* Subroutines for insn-output.c for HPPA.
2    Copyright (C) 1992-2016 Free Software Foundation, Inc.
3    Contributed by Tim Moore (moore@cs.utah.edu), based on sparc.c
4 
5 This file is part of GCC.
6 
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11 
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 GNU General Public License for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3.  If not see
19 <http://www.gnu.org/licenses/>.  */
20 
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "target.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "df.h"
29 #include "tm_p.h"
30 #include "stringpool.h"
31 #include "optabs.h"
32 #include "regs.h"
33 #include "emit-rtl.h"
34 #include "recog.h"
35 #include "diagnostic-core.h"
36 #include "insn-attr.h"
37 #include "alias.h"
38 #include "fold-const.h"
39 #include "stor-layout.h"
40 #include "varasm.h"
41 #include "calls.h"
42 #include "output.h"
43 #include "except.h"
44 #include "explow.h"
45 #include "expr.h"
46 #include "reload.h"
47 #include "common/common-target.h"
48 #include "langhooks.h"
49 #include "cfgrtl.h"
50 #include "opts.h"
51 #include "builtins.h"
52 
53 /* This file should be included last.  */
54 #include "target-def.h"
55 
56 /* Return nonzero if there is a bypass for the output of
57    OUT_INSN and the fp store IN_INSN.  */
58 int
pa_fpstore_bypass_p(rtx_insn * out_insn,rtx_insn * in_insn)59 pa_fpstore_bypass_p (rtx_insn *out_insn, rtx_insn *in_insn)
60 {
61   machine_mode store_mode;
62   machine_mode other_mode;
63   rtx set;
64 
65   if (recog_memoized (in_insn) < 0
66       || (get_attr_type (in_insn) != TYPE_FPSTORE
67 	  && get_attr_type (in_insn) != TYPE_FPSTORE_LOAD)
68       || recog_memoized (out_insn) < 0)
69     return 0;
70 
71   store_mode = GET_MODE (SET_SRC (PATTERN (in_insn)));
72 
73   set = single_set (out_insn);
74   if (!set)
75     return 0;
76 
77   other_mode = GET_MODE (SET_SRC (set));
78 
79   return (GET_MODE_SIZE (store_mode) == GET_MODE_SIZE (other_mode));
80 }
81 
82 
83 #ifndef DO_FRAME_NOTES
84 #ifdef INCOMING_RETURN_ADDR_RTX
85 #define DO_FRAME_NOTES 1
86 #else
87 #define DO_FRAME_NOTES 0
88 #endif
89 #endif
90 
91 static void pa_option_override (void);
92 static void copy_reg_pointer (rtx, rtx);
93 static void fix_range (const char *);
94 static int hppa_register_move_cost (machine_mode mode, reg_class_t,
95 				    reg_class_t);
96 static int hppa_address_cost (rtx, machine_mode mode, addr_space_t, bool);
97 static bool hppa_rtx_costs (rtx, machine_mode, int, int, int *, bool);
98 static inline rtx force_mode (machine_mode, rtx);
99 static void pa_reorg (void);
100 static void pa_combine_instructions (void);
101 static int pa_can_combine_p (rtx_insn *, rtx_insn *, rtx_insn *, int, rtx,
102 			     rtx, rtx);
103 static bool forward_branch_p (rtx_insn *);
104 static void compute_zdepwi_operands (unsigned HOST_WIDE_INT, unsigned *);
105 static void compute_zdepdi_operands (unsigned HOST_WIDE_INT, unsigned *);
106 static int compute_movmem_length (rtx_insn *);
107 static int compute_clrmem_length (rtx_insn *);
108 static bool pa_assemble_integer (rtx, unsigned int, int);
109 static void remove_useless_addtr_insns (int);
110 static void store_reg (int, HOST_WIDE_INT, int);
111 static void store_reg_modify (int, int, HOST_WIDE_INT);
112 static void load_reg (int, HOST_WIDE_INT, int);
113 static void set_reg_plus_d (int, int, HOST_WIDE_INT, int);
114 static rtx pa_function_value (const_tree, const_tree, bool);
115 static rtx pa_libcall_value (machine_mode, const_rtx);
116 static bool pa_function_value_regno_p (const unsigned int);
117 static void pa_output_function_prologue (FILE *, HOST_WIDE_INT);
118 static void update_total_code_bytes (unsigned int);
119 static void pa_output_function_epilogue (FILE *, HOST_WIDE_INT);
120 static int pa_adjust_cost (rtx_insn *, rtx, rtx_insn *, int);
121 static int pa_adjust_priority (rtx_insn *, int);
122 static int pa_issue_rate (void);
123 static int pa_reloc_rw_mask (void);
124 static void pa_som_asm_init_sections (void) ATTRIBUTE_UNUSED;
125 static section *pa_som_tm_clone_table_section (void) ATTRIBUTE_UNUSED;
126 static section *pa_select_section (tree, int, unsigned HOST_WIDE_INT)
127      ATTRIBUTE_UNUSED;
128 static void pa_encode_section_info (tree, rtx, int);
129 static const char *pa_strip_name_encoding (const char *);
130 static bool pa_function_ok_for_sibcall (tree, tree);
131 static void pa_globalize_label (FILE *, const char *)
132      ATTRIBUTE_UNUSED;
133 static void pa_asm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
134 				    HOST_WIDE_INT, tree);
135 #if !defined(USE_COLLECT2)
136 static void pa_asm_out_constructor (rtx, int);
137 static void pa_asm_out_destructor (rtx, int);
138 #endif
139 static void pa_init_builtins (void);
140 static rtx pa_expand_builtin (tree, rtx, rtx, machine_mode mode, int);
141 static rtx hppa_builtin_saveregs (void);
142 static void hppa_va_start (tree, rtx);
143 static tree hppa_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
144 static bool pa_scalar_mode_supported_p (machine_mode);
145 static bool pa_commutative_p (const_rtx x, int outer_code);
146 static void copy_fp_args (rtx_insn *) ATTRIBUTE_UNUSED;
147 static int length_fp_args (rtx_insn *) ATTRIBUTE_UNUSED;
148 static rtx hppa_legitimize_address (rtx, rtx, machine_mode);
149 static inline void pa_file_start_level (void) ATTRIBUTE_UNUSED;
150 static inline void pa_file_start_space (int) ATTRIBUTE_UNUSED;
151 static inline void pa_file_start_file (int) ATTRIBUTE_UNUSED;
152 static inline void pa_file_start_mcount (const char*) ATTRIBUTE_UNUSED;
153 static void pa_elf_file_start (void) ATTRIBUTE_UNUSED;
154 static void pa_som_file_start (void) ATTRIBUTE_UNUSED;
155 static void pa_linux_file_start (void) ATTRIBUTE_UNUSED;
156 static void pa_hpux64_gas_file_start (void) ATTRIBUTE_UNUSED;
157 static void pa_hpux64_hpas_file_start (void) ATTRIBUTE_UNUSED;
158 static void output_deferred_plabels (void);
159 static void output_deferred_profile_counters (void) ATTRIBUTE_UNUSED;
160 #ifdef ASM_OUTPUT_EXTERNAL_REAL
161 static void pa_hpux_file_end (void);
162 #endif
163 static void pa_init_libfuncs (void);
164 static rtx pa_struct_value_rtx (tree, int);
165 static bool pa_pass_by_reference (cumulative_args_t, machine_mode,
166 				  const_tree, bool);
167 static int pa_arg_partial_bytes (cumulative_args_t, machine_mode,
168 				 tree, bool);
169 static void pa_function_arg_advance (cumulative_args_t, machine_mode,
170 				     const_tree, bool);
171 static rtx pa_function_arg (cumulative_args_t, machine_mode,
172 			    const_tree, bool);
173 static unsigned int pa_function_arg_boundary (machine_mode, const_tree);
174 static struct machine_function * pa_init_machine_status (void);
175 static reg_class_t pa_secondary_reload (bool, rtx, reg_class_t,
176 					machine_mode,
177 					secondary_reload_info *);
178 static void pa_extra_live_on_entry (bitmap);
179 static machine_mode pa_promote_function_mode (const_tree,
180 						   machine_mode, int *,
181 						   const_tree, int);
182 
183 static void pa_asm_trampoline_template (FILE *);
184 static void pa_trampoline_init (rtx, tree, rtx);
185 static rtx pa_trampoline_adjust_address (rtx);
186 static rtx pa_delegitimize_address (rtx);
187 static bool pa_print_operand_punct_valid_p (unsigned char);
188 static rtx pa_internal_arg_pointer (void);
189 static bool pa_can_eliminate (const int, const int);
190 static void pa_conditional_register_usage (void);
191 static machine_mode pa_c_mode_for_suffix (char);
192 static section *pa_function_section (tree, enum node_frequency, bool, bool);
193 static bool pa_cannot_force_const_mem (machine_mode, rtx);
194 static bool pa_legitimate_constant_p (machine_mode, rtx);
195 static unsigned int pa_section_type_flags (tree, const char *, int);
196 static bool pa_legitimate_address_p (machine_mode, rtx, bool);
197 static bool pa_callee_copies (cumulative_args_t, machine_mode,
198 			      const_tree, bool);
199 
200 /* The following extra sections are only used for SOM.  */
201 static GTY(()) section *som_readonly_data_section;
202 static GTY(()) section *som_one_only_readonly_data_section;
203 static GTY(()) section *som_one_only_data_section;
204 static GTY(()) section *som_tm_clone_table_section;
205 
206 /* Counts for the number of callee-saved general and floating point
207    registers which were saved by the current function's prologue.  */
208 static int gr_saved, fr_saved;
209 
210 /* Boolean indicating whether the return pointer was saved by the
211    current function's prologue.  */
212 static bool rp_saved;
213 
214 static rtx find_addr_reg (rtx);
215 
216 /* Keep track of the number of bytes we have output in the CODE subspace
217    during this compilation so we'll know when to emit inline long-calls.  */
218 unsigned long total_code_bytes;
219 
220 /* The last address of the previous function plus the number of bytes in
221    associated thunks that have been output.  This is used to determine if
222    a thunk can use an IA-relative branch to reach its target function.  */
223 static unsigned int last_address;
224 
225 /* Variables to handle plabels that we discover are necessary at assembly
226    output time.  They are output after the current function.  */
227 struct GTY(()) deferred_plabel
228 {
229   rtx internal_label;
230   rtx symbol;
231 };
232 static GTY((length ("n_deferred_plabels"))) struct deferred_plabel *
233   deferred_plabels;
234 static size_t n_deferred_plabels = 0;
235 
236 /* Initialize the GCC target structure.  */
237 
238 #undef TARGET_OPTION_OVERRIDE
239 #define TARGET_OPTION_OVERRIDE pa_option_override
240 
241 #undef TARGET_ASM_ALIGNED_HI_OP
242 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
243 #undef TARGET_ASM_ALIGNED_SI_OP
244 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
245 #undef TARGET_ASM_ALIGNED_DI_OP
246 #define TARGET_ASM_ALIGNED_DI_OP "\t.dword\t"
247 #undef TARGET_ASM_UNALIGNED_HI_OP
248 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
249 #undef TARGET_ASM_UNALIGNED_SI_OP
250 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
251 #undef TARGET_ASM_UNALIGNED_DI_OP
252 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
253 #undef TARGET_ASM_INTEGER
254 #define TARGET_ASM_INTEGER pa_assemble_integer
255 
256 #undef TARGET_ASM_FUNCTION_PROLOGUE
257 #define TARGET_ASM_FUNCTION_PROLOGUE pa_output_function_prologue
258 #undef TARGET_ASM_FUNCTION_EPILOGUE
259 #define TARGET_ASM_FUNCTION_EPILOGUE pa_output_function_epilogue
260 
261 #undef TARGET_FUNCTION_VALUE
262 #define TARGET_FUNCTION_VALUE pa_function_value
263 #undef TARGET_LIBCALL_VALUE
264 #define TARGET_LIBCALL_VALUE pa_libcall_value
265 #undef TARGET_FUNCTION_VALUE_REGNO_P
266 #define TARGET_FUNCTION_VALUE_REGNO_P pa_function_value_regno_p
267 
268 #undef TARGET_LEGITIMIZE_ADDRESS
269 #define TARGET_LEGITIMIZE_ADDRESS hppa_legitimize_address
270 
271 #undef TARGET_SCHED_ADJUST_COST
272 #define TARGET_SCHED_ADJUST_COST pa_adjust_cost
273 #undef TARGET_SCHED_ADJUST_PRIORITY
274 #define TARGET_SCHED_ADJUST_PRIORITY pa_adjust_priority
275 #undef TARGET_SCHED_ISSUE_RATE
276 #define TARGET_SCHED_ISSUE_RATE pa_issue_rate
277 
278 #undef TARGET_ENCODE_SECTION_INFO
279 #define TARGET_ENCODE_SECTION_INFO pa_encode_section_info
280 #undef TARGET_STRIP_NAME_ENCODING
281 #define TARGET_STRIP_NAME_ENCODING pa_strip_name_encoding
282 
283 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
284 #define TARGET_FUNCTION_OK_FOR_SIBCALL pa_function_ok_for_sibcall
285 
286 #undef TARGET_COMMUTATIVE_P
287 #define TARGET_COMMUTATIVE_P pa_commutative_p
288 
289 #undef TARGET_ASM_OUTPUT_MI_THUNK
290 #define TARGET_ASM_OUTPUT_MI_THUNK pa_asm_output_mi_thunk
291 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
292 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
293 
294 #undef TARGET_ASM_FILE_END
295 #ifdef ASM_OUTPUT_EXTERNAL_REAL
296 #define TARGET_ASM_FILE_END pa_hpux_file_end
297 #else
298 #define TARGET_ASM_FILE_END output_deferred_plabels
299 #endif
300 
301 #undef TARGET_ASM_RELOC_RW_MASK
302 #define TARGET_ASM_RELOC_RW_MASK pa_reloc_rw_mask
303 
304 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
305 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P pa_print_operand_punct_valid_p
306 
307 #if !defined(USE_COLLECT2)
308 #undef TARGET_ASM_CONSTRUCTOR
309 #define TARGET_ASM_CONSTRUCTOR pa_asm_out_constructor
310 #undef TARGET_ASM_DESTRUCTOR
311 #define TARGET_ASM_DESTRUCTOR pa_asm_out_destructor
312 #endif
313 
314 #undef TARGET_INIT_BUILTINS
315 #define TARGET_INIT_BUILTINS pa_init_builtins
316 
317 #undef TARGET_EXPAND_BUILTIN
318 #define TARGET_EXPAND_BUILTIN pa_expand_builtin
319 
320 #undef TARGET_REGISTER_MOVE_COST
321 #define TARGET_REGISTER_MOVE_COST hppa_register_move_cost
322 #undef TARGET_RTX_COSTS
323 #define TARGET_RTX_COSTS hppa_rtx_costs
324 #undef TARGET_ADDRESS_COST
325 #define TARGET_ADDRESS_COST hppa_address_cost
326 
327 #undef TARGET_MACHINE_DEPENDENT_REORG
328 #define TARGET_MACHINE_DEPENDENT_REORG pa_reorg
329 
330 #undef TARGET_INIT_LIBFUNCS
331 #define TARGET_INIT_LIBFUNCS pa_init_libfuncs
332 
333 #undef TARGET_PROMOTE_FUNCTION_MODE
334 #define TARGET_PROMOTE_FUNCTION_MODE pa_promote_function_mode
335 #undef TARGET_PROMOTE_PROTOTYPES
336 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
337 
338 #undef TARGET_STRUCT_VALUE_RTX
339 #define TARGET_STRUCT_VALUE_RTX pa_struct_value_rtx
340 #undef TARGET_RETURN_IN_MEMORY
341 #define TARGET_RETURN_IN_MEMORY pa_return_in_memory
342 #undef TARGET_MUST_PASS_IN_STACK
343 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
344 #undef TARGET_PASS_BY_REFERENCE
345 #define TARGET_PASS_BY_REFERENCE pa_pass_by_reference
346 #undef TARGET_CALLEE_COPIES
347 #define TARGET_CALLEE_COPIES pa_callee_copies
348 #undef TARGET_ARG_PARTIAL_BYTES
349 #define TARGET_ARG_PARTIAL_BYTES pa_arg_partial_bytes
350 #undef TARGET_FUNCTION_ARG
351 #define TARGET_FUNCTION_ARG pa_function_arg
352 #undef TARGET_FUNCTION_ARG_ADVANCE
353 #define TARGET_FUNCTION_ARG_ADVANCE pa_function_arg_advance
354 #undef TARGET_FUNCTION_ARG_BOUNDARY
355 #define TARGET_FUNCTION_ARG_BOUNDARY pa_function_arg_boundary
356 
357 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
358 #define TARGET_EXPAND_BUILTIN_SAVEREGS hppa_builtin_saveregs
359 #undef TARGET_EXPAND_BUILTIN_VA_START
360 #define TARGET_EXPAND_BUILTIN_VA_START hppa_va_start
361 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
362 #define TARGET_GIMPLIFY_VA_ARG_EXPR hppa_gimplify_va_arg_expr
363 
364 #undef TARGET_SCALAR_MODE_SUPPORTED_P
365 #define TARGET_SCALAR_MODE_SUPPORTED_P pa_scalar_mode_supported_p
366 
367 #undef TARGET_CANNOT_FORCE_CONST_MEM
368 #define TARGET_CANNOT_FORCE_CONST_MEM pa_cannot_force_const_mem
369 
370 #undef TARGET_SECONDARY_RELOAD
371 #define TARGET_SECONDARY_RELOAD pa_secondary_reload
372 
373 #undef TARGET_EXTRA_LIVE_ON_ENTRY
374 #define TARGET_EXTRA_LIVE_ON_ENTRY pa_extra_live_on_entry
375 
376 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
377 #define TARGET_ASM_TRAMPOLINE_TEMPLATE pa_asm_trampoline_template
378 #undef TARGET_TRAMPOLINE_INIT
379 #define TARGET_TRAMPOLINE_INIT pa_trampoline_init
380 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
381 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS pa_trampoline_adjust_address
382 #undef TARGET_DELEGITIMIZE_ADDRESS
383 #define TARGET_DELEGITIMIZE_ADDRESS pa_delegitimize_address
384 #undef TARGET_INTERNAL_ARG_POINTER
385 #define TARGET_INTERNAL_ARG_POINTER pa_internal_arg_pointer
386 #undef TARGET_CAN_ELIMINATE
387 #define TARGET_CAN_ELIMINATE pa_can_eliminate
388 #undef TARGET_CONDITIONAL_REGISTER_USAGE
389 #define TARGET_CONDITIONAL_REGISTER_USAGE pa_conditional_register_usage
390 #undef TARGET_C_MODE_FOR_SUFFIX
391 #define TARGET_C_MODE_FOR_SUFFIX pa_c_mode_for_suffix
392 #undef TARGET_ASM_FUNCTION_SECTION
393 #define TARGET_ASM_FUNCTION_SECTION pa_function_section
394 
395 #undef TARGET_LEGITIMATE_CONSTANT_P
396 #define TARGET_LEGITIMATE_CONSTANT_P pa_legitimate_constant_p
397 #undef TARGET_SECTION_TYPE_FLAGS
398 #define TARGET_SECTION_TYPE_FLAGS pa_section_type_flags
399 #undef TARGET_LEGITIMATE_ADDRESS_P
400 #define TARGET_LEGITIMATE_ADDRESS_P pa_legitimate_address_p
401 
402 struct gcc_target targetm = TARGET_INITIALIZER;
403 
404 /* Parse the -mfixed-range= option string.  */
405 
406 static void
fix_range(const char * const_str)407 fix_range (const char *const_str)
408 {
409   int i, first, last;
410   char *str, *dash, *comma;
411 
412   /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
413      REG2 are either register names or register numbers.  The effect
414      of this option is to mark the registers in the range from REG1 to
415      REG2 as ``fixed'' so they won't be used by the compiler.  This is
416      used, e.g., to ensure that kernel mode code doesn't use fr4-fr31.  */
417 
418   i = strlen (const_str);
419   str = (char *) alloca (i + 1);
420   memcpy (str, const_str, i + 1);
421 
422   while (1)
423     {
424       dash = strchr (str, '-');
425       if (!dash)
426 	{
427 	  warning (0, "value of -mfixed-range must have form REG1-REG2");
428 	  return;
429 	}
430       *dash = '\0';
431 
432       comma = strchr (dash + 1, ',');
433       if (comma)
434 	*comma = '\0';
435 
436       first = decode_reg_name (str);
437       if (first < 0)
438 	{
439 	  warning (0, "unknown register name: %s", str);
440 	  return;
441 	}
442 
443       last = decode_reg_name (dash + 1);
444       if (last < 0)
445 	{
446 	  warning (0, "unknown register name: %s", dash + 1);
447 	  return;
448 	}
449 
450       *dash = '-';
451 
452       if (first > last)
453 	{
454 	  warning (0, "%s-%s is an empty range", str, dash + 1);
455 	  return;
456 	}
457 
458       for (i = first; i <= last; ++i)
459 	fixed_regs[i] = call_used_regs[i] = 1;
460 
461       if (!comma)
462 	break;
463 
464       *comma = ',';
465       str = comma + 1;
466     }
467 
468   /* Check if all floating point registers have been fixed.  */
469   for (i = FP_REG_FIRST; i <= FP_REG_LAST; i++)
470     if (!fixed_regs[i])
471       break;
472 
473   if (i > FP_REG_LAST)
474     target_flags |= MASK_DISABLE_FPREGS;
475 }
476 
477 /* Implement the TARGET_OPTION_OVERRIDE hook.  */
478 
479 static void
pa_option_override(void)480 pa_option_override (void)
481 {
482   unsigned int i;
483   cl_deferred_option *opt;
484   vec<cl_deferred_option> *v
485     = (vec<cl_deferred_option> *) pa_deferred_options;
486 
487   if (v)
488     FOR_EACH_VEC_ELT (*v, i, opt)
489       {
490 	switch (opt->opt_index)
491 	  {
492 	  case OPT_mfixed_range_:
493 	    fix_range (opt->arg);
494 	    break;
495 
496 	  default:
497 	    gcc_unreachable ();
498 	  }
499       }
500 
501   if (flag_pic && TARGET_PORTABLE_RUNTIME)
502     {
503       warning (0, "PIC code generation is not supported in the portable runtime model");
504     }
505 
506   if (flag_pic && TARGET_FAST_INDIRECT_CALLS)
507    {
508       warning (0, "PIC code generation is not compatible with fast indirect calls");
509    }
510 
511   if (! TARGET_GAS && write_symbols != NO_DEBUG)
512     {
513       warning (0, "-g is only supported when using GAS on this processor,");
514       warning (0, "-g option disabled");
515       write_symbols = NO_DEBUG;
516     }
517 
518   /* We only support the "big PIC" model now.  And we always generate PIC
519      code when in 64bit mode.  */
520   if (flag_pic == 1 || TARGET_64BIT)
521     flag_pic = 2;
522 
523   /* Disable -freorder-blocks-and-partition as we don't support hot and
524      cold partitioning.  */
525   if (flag_reorder_blocks_and_partition)
526     {
527       inform (input_location,
528               "-freorder-blocks-and-partition does not work "
529               "on this architecture");
530       flag_reorder_blocks_and_partition = 0;
531       flag_reorder_blocks = 1;
532     }
533 
534   /* We can't guarantee that .dword is available for 32-bit targets.  */
535   if (UNITS_PER_WORD == 4)
536     targetm.asm_out.aligned_op.di = NULL;
537 
538   /* The unaligned ops are only available when using GAS.  */
539   if (!TARGET_GAS)
540     {
541       targetm.asm_out.unaligned_op.hi = NULL;
542       targetm.asm_out.unaligned_op.si = NULL;
543       targetm.asm_out.unaligned_op.di = NULL;
544     }
545 
546   init_machine_status = pa_init_machine_status;
547 }
548 
549 enum pa_builtins
550 {
551   PA_BUILTIN_COPYSIGNQ,
552   PA_BUILTIN_FABSQ,
553   PA_BUILTIN_INFQ,
554   PA_BUILTIN_HUGE_VALQ,
555   PA_BUILTIN_max
556 };
557 
558 static GTY(()) tree pa_builtins[(int) PA_BUILTIN_max];
559 
560 static void
pa_init_builtins(void)561 pa_init_builtins (void)
562 {
563 #ifdef DONT_HAVE_FPUTC_UNLOCKED
564   {
565     tree decl = builtin_decl_explicit (BUILT_IN_PUTC_UNLOCKED);
566     set_builtin_decl (BUILT_IN_FPUTC_UNLOCKED, decl,
567 		      builtin_decl_implicit_p (BUILT_IN_PUTC_UNLOCKED));
568   }
569 #endif
570 #if TARGET_HPUX_11
571   {
572     tree decl;
573 
574     if ((decl = builtin_decl_explicit (BUILT_IN_FINITE)) != NULL_TREE)
575       set_user_assembler_name (decl, "_Isfinite");
576     if ((decl = builtin_decl_explicit (BUILT_IN_FINITEF)) != NULL_TREE)
577       set_user_assembler_name (decl, "_Isfinitef");
578   }
579 #endif
580 
581   if (HPUX_LONG_DOUBLE_LIBRARY)
582     {
583       tree decl, ftype;
584 
585       /* Under HPUX, the __float128 type is a synonym for "long double".  */
586       (*lang_hooks.types.register_builtin_type) (long_double_type_node,
587 						 "__float128");
588 
589       /* TFmode support builtins.  */
590       ftype = build_function_type_list (long_double_type_node,
591 					long_double_type_node,
592 					NULL_TREE);
593       decl = add_builtin_function ("__builtin_fabsq", ftype,
594 				   PA_BUILTIN_FABSQ, BUILT_IN_MD,
595 				   "_U_Qfabs", NULL_TREE);
596       TREE_READONLY (decl) = 1;
597       pa_builtins[PA_BUILTIN_FABSQ] = decl;
598 
599       ftype = build_function_type_list (long_double_type_node,
600 					long_double_type_node,
601 					long_double_type_node,
602 					NULL_TREE);
603       decl = add_builtin_function ("__builtin_copysignq", ftype,
604 				   PA_BUILTIN_COPYSIGNQ, BUILT_IN_MD,
605 				   "_U_Qfcopysign", NULL_TREE);
606       TREE_READONLY (decl) = 1;
607       pa_builtins[PA_BUILTIN_COPYSIGNQ] = decl;
608 
609       ftype = build_function_type_list (long_double_type_node, NULL_TREE);
610       decl = add_builtin_function ("__builtin_infq", ftype,
611 				   PA_BUILTIN_INFQ, BUILT_IN_MD,
612 				   NULL, NULL_TREE);
613       pa_builtins[PA_BUILTIN_INFQ] = decl;
614 
615       decl = add_builtin_function ("__builtin_huge_valq", ftype,
616                                    PA_BUILTIN_HUGE_VALQ, BUILT_IN_MD,
617                                    NULL, NULL_TREE);
618       pa_builtins[PA_BUILTIN_HUGE_VALQ] = decl;
619     }
620 }
621 
622 static rtx
pa_expand_builtin(tree exp,rtx target,rtx subtarget ATTRIBUTE_UNUSED,machine_mode mode ATTRIBUTE_UNUSED,int ignore ATTRIBUTE_UNUSED)623 pa_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
624 		   machine_mode mode ATTRIBUTE_UNUSED,
625 		   int ignore ATTRIBUTE_UNUSED)
626 {
627   tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
628   unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
629 
630   switch (fcode)
631     {
632     case PA_BUILTIN_FABSQ:
633     case PA_BUILTIN_COPYSIGNQ:
634       return expand_call (exp, target, ignore);
635 
636     case PA_BUILTIN_INFQ:
637     case PA_BUILTIN_HUGE_VALQ:
638       {
639 	machine_mode target_mode = TYPE_MODE (TREE_TYPE (exp));
640 	REAL_VALUE_TYPE inf;
641 	rtx tmp;
642 
643 	real_inf (&inf);
644 	tmp = const_double_from_real_value (inf, target_mode);
645 
646 	tmp = validize_mem (force_const_mem (target_mode, tmp));
647 
648 	if (target == 0)
649 	  target = gen_reg_rtx (target_mode);
650 
651 	emit_move_insn (target, tmp);
652 	return target;
653       }
654 
655     default:
656       gcc_unreachable ();
657     }
658 
659   return NULL_RTX;
660 }
661 
662 /* Function to init struct machine_function.
663    This will be called, via a pointer variable,
664    from push_function_context.  */
665 
666 static struct machine_function *
pa_init_machine_status(void)667 pa_init_machine_status (void)
668 {
669   return ggc_cleared_alloc<machine_function> ();
670 }
671 
672 /* If FROM is a probable pointer register, mark TO as a probable
673    pointer register with the same pointer alignment as FROM.  */
674 
675 static void
copy_reg_pointer(rtx to,rtx from)676 copy_reg_pointer (rtx to, rtx from)
677 {
678   if (REG_POINTER (from))
679     mark_reg_pointer (to, REGNO_POINTER_ALIGN (REGNO (from)));
680 }
681 
682 /* Return 1 if X contains a symbolic expression.  We know these
683    expressions will have one of a few well defined forms, so
684    we need only check those forms.  */
685 int
pa_symbolic_expression_p(rtx x)686 pa_symbolic_expression_p (rtx x)
687 {
688 
689   /* Strip off any HIGH.  */
690   if (GET_CODE (x) == HIGH)
691     x = XEXP (x, 0);
692 
693   return symbolic_operand (x, VOIDmode);
694 }
695 
696 /* Accept any constant that can be moved in one instruction into a
697    general register.  */
698 int
pa_cint_ok_for_move(unsigned HOST_WIDE_INT ival)699 pa_cint_ok_for_move (unsigned HOST_WIDE_INT ival)
700 {
701   /* OK if ldo, ldil, or zdepi, can be used.  */
702   return (VAL_14_BITS_P (ival)
703 	  || pa_ldil_cint_p (ival)
704 	  || pa_zdepi_cint_p (ival));
705 }
706 
707 /* True iff ldil can be used to load this CONST_INT.  The least
708    significant 11 bits of the value must be zero and the value must
709    not change sign when extended from 32 to 64 bits.  */
710 int
pa_ldil_cint_p(unsigned HOST_WIDE_INT ival)711 pa_ldil_cint_p (unsigned HOST_WIDE_INT ival)
712 {
713   unsigned HOST_WIDE_INT x;
714 
715   x = ival & (((unsigned HOST_WIDE_INT) -1 << 31) | 0x7ff);
716   return x == 0 || x == ((unsigned HOST_WIDE_INT) -1 << 31);
717 }
718 
719 /* True iff zdepi can be used to generate this CONST_INT.
720    zdepi first sign extends a 5-bit signed number to a given field
721    length, then places this field anywhere in a zero.  */
722 int
pa_zdepi_cint_p(unsigned HOST_WIDE_INT x)723 pa_zdepi_cint_p (unsigned HOST_WIDE_INT x)
724 {
725   unsigned HOST_WIDE_INT lsb_mask, t;
726 
727   /* This might not be obvious, but it's at least fast.
728      This function is critical; we don't have the time loops would take.  */
729   lsb_mask = x & -x;
730   t = ((x >> 4) + lsb_mask) & ~(lsb_mask - 1);
731   /* Return true iff t is a power of two.  */
732   return ((t & (t - 1)) == 0);
733 }
734 
735 /* True iff depi or extru can be used to compute (reg & mask).
736    Accept bit pattern like these:
737    0....01....1
738    1....10....0
739    1..10..01..1  */
740 int
pa_and_mask_p(unsigned HOST_WIDE_INT mask)741 pa_and_mask_p (unsigned HOST_WIDE_INT mask)
742 {
743   mask = ~mask;
744   mask += mask & -mask;
745   return (mask & (mask - 1)) == 0;
746 }
747 
748 /* True iff depi can be used to compute (reg | MASK).  */
749 int
pa_ior_mask_p(unsigned HOST_WIDE_INT mask)750 pa_ior_mask_p (unsigned HOST_WIDE_INT mask)
751 {
752   mask += mask & -mask;
753   return (mask & (mask - 1)) == 0;
754 }
755 
756 /* Legitimize PIC addresses.  If the address is already
757    position-independent, we return ORIG.  Newly generated
758    position-independent addresses go to REG.  If we need more
759    than one register, we lose.  */
760 
761 static rtx
legitimize_pic_address(rtx orig,machine_mode mode,rtx reg)762 legitimize_pic_address (rtx orig, machine_mode mode, rtx reg)
763 {
764   rtx pic_ref = orig;
765 
766   gcc_assert (!PA_SYMBOL_REF_TLS_P (orig));
767 
768   /* Labels need special handling.  */
769   if (pic_label_operand (orig, mode))
770     {
771       rtx_insn *insn;
772 
773       /* We do not want to go through the movXX expanders here since that
774 	 would create recursion.
775 
776 	 Nor do we really want to call a generator for a named pattern
777 	 since that requires multiple patterns if we want to support
778 	 multiple word sizes.
779 
780 	 So instead we just emit the raw set, which avoids the movXX
781 	 expanders completely.  */
782       mark_reg_pointer (reg, BITS_PER_UNIT);
783       insn = emit_insn (gen_rtx_SET (reg, orig));
784 
785       /* Put a REG_EQUAL note on this insn, so that it can be optimized.  */
786       add_reg_note (insn, REG_EQUAL, orig);
787 
788       /* During and after reload, we need to generate a REG_LABEL_OPERAND note
789 	 and update LABEL_NUSES because this is not done automatically.  */
790       if (reload_in_progress || reload_completed)
791 	{
792 	  /* Extract LABEL_REF.  */
793 	  if (GET_CODE (orig) == CONST)
794 	    orig = XEXP (XEXP (orig, 0), 0);
795 	  /* Extract CODE_LABEL.  */
796 	  orig = XEXP (orig, 0);
797 	  add_reg_note (insn, REG_LABEL_OPERAND, orig);
798 	  /* Make sure we have label and not a note.  */
799 	  if (LABEL_P (orig))
800 	    LABEL_NUSES (orig)++;
801 	}
802       crtl->uses_pic_offset_table = 1;
803       return reg;
804     }
805   if (GET_CODE (orig) == SYMBOL_REF)
806     {
807       rtx_insn *insn;
808       rtx tmp_reg;
809 
810       gcc_assert (reg);
811 
812       /* Before reload, allocate a temporary register for the intermediate
813 	 result.  This allows the sequence to be deleted when the final
814 	 result is unused and the insns are trivially dead.  */
815       tmp_reg = ((reload_in_progress || reload_completed)
816 		 ? reg : gen_reg_rtx (Pmode));
817 
818       if (function_label_operand (orig, VOIDmode))
819 	{
820 	  /* Force function label into memory in word mode.  */
821 	  orig = XEXP (force_const_mem (word_mode, orig), 0);
822 	  /* Load plabel address from DLT.  */
823 	  emit_move_insn (tmp_reg,
824 			  gen_rtx_PLUS (word_mode, pic_offset_table_rtx,
825 					gen_rtx_HIGH (word_mode, orig)));
826 	  pic_ref
827 	    = gen_const_mem (Pmode,
828 			     gen_rtx_LO_SUM (Pmode, tmp_reg,
829 					     gen_rtx_UNSPEC (Pmode,
830 						         gen_rtvec (1, orig),
831 						         UNSPEC_DLTIND14R)));
832 	  emit_move_insn (reg, pic_ref);
833 	  /* Now load address of function descriptor.  */
834 	  pic_ref = gen_rtx_MEM (Pmode, reg);
835 	}
836       else
837 	{
838 	  /* Load symbol reference from DLT.  */
839 	  emit_move_insn (tmp_reg,
840 			  gen_rtx_PLUS (word_mode, pic_offset_table_rtx,
841 					gen_rtx_HIGH (word_mode, orig)));
842 	  pic_ref
843 	    = gen_const_mem (Pmode,
844 			     gen_rtx_LO_SUM (Pmode, tmp_reg,
845 					     gen_rtx_UNSPEC (Pmode,
846 						         gen_rtvec (1, orig),
847 						         UNSPEC_DLTIND14R)));
848 	}
849 
850       crtl->uses_pic_offset_table = 1;
851       mark_reg_pointer (reg, BITS_PER_UNIT);
852       insn = emit_move_insn (reg, pic_ref);
853 
854       /* Put a REG_EQUAL note on this insn, so that it can be optimized.  */
855       set_unique_reg_note (insn, REG_EQUAL, orig);
856 
857       return reg;
858     }
859   else if (GET_CODE (orig) == CONST)
860     {
861       rtx base;
862 
863       if (GET_CODE (XEXP (orig, 0)) == PLUS
864 	  && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
865 	return orig;
866 
867       gcc_assert (reg);
868       gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
869 
870       base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
871       orig = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
872 				     base == reg ? 0 : reg);
873 
874       if (GET_CODE (orig) == CONST_INT)
875 	{
876 	  if (INT_14_BITS (orig))
877 	    return plus_constant (Pmode, base, INTVAL (orig));
878 	  orig = force_reg (Pmode, orig);
879 	}
880       pic_ref = gen_rtx_PLUS (Pmode, base, orig);
881       /* Likewise, should we set special REG_NOTEs here?  */
882     }
883 
884   return pic_ref;
885 }
886 
887 static GTY(()) rtx gen_tls_tga;
888 
889 static rtx
gen_tls_get_addr(void)890 gen_tls_get_addr (void)
891 {
892   if (!gen_tls_tga)
893     gen_tls_tga = init_one_libfunc ("__tls_get_addr");
894   return gen_tls_tga;
895 }
896 
897 static rtx
hppa_tls_call(rtx arg)898 hppa_tls_call (rtx arg)
899 {
900   rtx ret;
901 
902   ret = gen_reg_rtx (Pmode);
903   emit_library_call_value (gen_tls_get_addr (), ret,
904 		  	   LCT_CONST, Pmode, 1, arg, Pmode);
905 
906   return ret;
907 }
908 
909 static rtx
legitimize_tls_address(rtx addr)910 legitimize_tls_address (rtx addr)
911 {
912   rtx ret, tmp, t1, t2, tp;
913   rtx_insn *insn;
914 
915   /* Currently, we can't handle anything but a SYMBOL_REF.  */
916   if (GET_CODE (addr) != SYMBOL_REF)
917     return addr;
918 
919   switch (SYMBOL_REF_TLS_MODEL (addr))
920     {
921       case TLS_MODEL_GLOBAL_DYNAMIC:
922 	tmp = gen_reg_rtx (Pmode);
923 	if (flag_pic)
924 	  emit_insn (gen_tgd_load_pic (tmp, addr));
925 	else
926 	  emit_insn (gen_tgd_load (tmp, addr));
927 	ret = hppa_tls_call (tmp);
928 	break;
929 
930       case TLS_MODEL_LOCAL_DYNAMIC:
931 	ret = gen_reg_rtx (Pmode);
932 	tmp = gen_reg_rtx (Pmode);
933 	start_sequence ();
934 	if (flag_pic)
935 	  emit_insn (gen_tld_load_pic (tmp, addr));
936 	else
937 	  emit_insn (gen_tld_load (tmp, addr));
938 	t1 = hppa_tls_call (tmp);
939 	insn = get_insns ();
940 	end_sequence ();
941 	t2 = gen_reg_rtx (Pmode);
942 	emit_libcall_block (insn, t2, t1,
943 			    gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
944 				            UNSPEC_TLSLDBASE));
945 	emit_insn (gen_tld_offset_load (ret, addr, t2));
946 	break;
947 
948       case TLS_MODEL_INITIAL_EXEC:
949 	tp = gen_reg_rtx (Pmode);
950 	tmp = gen_reg_rtx (Pmode);
951 	ret = gen_reg_rtx (Pmode);
952 	emit_insn (gen_tp_load (tp));
953 	if (flag_pic)
954 	  emit_insn (gen_tie_load_pic (tmp, addr));
955 	else
956 	  emit_insn (gen_tie_load (tmp, addr));
957 	emit_move_insn (ret, gen_rtx_PLUS (Pmode, tp, tmp));
958 	break;
959 
960       case TLS_MODEL_LOCAL_EXEC:
961 	tp = gen_reg_rtx (Pmode);
962 	ret = gen_reg_rtx (Pmode);
963 	emit_insn (gen_tp_load (tp));
964 	emit_insn (gen_tle_load (ret, addr, tp));
965 	break;
966 
967       default:
968 	gcc_unreachable ();
969     }
970 
971   return ret;
972 }
973 
974 /* Helper for hppa_legitimize_address.  Given X, return true if it
975    is a left shift by 1, 2 or 3 positions or a multiply by 2, 4 or 8.
976 
977    This respectively represent canonical shift-add rtxs or scaled
978    memory addresses.  */
979 static bool
mem_shadd_or_shadd_rtx_p(rtx x)980 mem_shadd_or_shadd_rtx_p (rtx x)
981 {
982   return ((GET_CODE (x) == ASHIFT
983 	   || GET_CODE (x) == MULT)
984 	  && GET_CODE (XEXP (x, 1)) == CONST_INT
985 	  && ((GET_CODE (x) == ASHIFT
986 	       && pa_shadd_constant_p (INTVAL (XEXP (x, 1))))
987 	      || (GET_CODE (x) == MULT
988 		  && pa_mem_shadd_constant_p (INTVAL (XEXP (x, 1))))));
989 }
990 
991 /* Try machine-dependent ways of modifying an illegitimate address
992    to be legitimate.  If we find one, return the new, valid address.
993    This macro is used in only one place: `memory_address' in explow.c.
994 
995    OLDX is the address as it was before break_out_memory_refs was called.
996    In some cases it is useful to look at this to decide what needs to be done.
997 
998    It is always safe for this macro to do nothing.  It exists to recognize
999    opportunities to optimize the output.
1000 
1001    For the PA, transform:
1002 
1003 	memory(X + <large int>)
1004 
1005    into:
1006 
1007 	if (<large int> & mask) >= 16
1008 	  Y = (<large int> & ~mask) + mask + 1	Round up.
1009 	else
1010 	  Y = (<large int> & ~mask)		Round down.
1011 	Z = X + Y
1012 	memory (Z + (<large int> - Y));
1013 
1014    This is for CSE to find several similar references, and only use one Z.
1015 
1016    X can either be a SYMBOL_REF or REG, but because combine cannot
1017    perform a 4->2 combination we do nothing for SYMBOL_REF + D where
1018    D will not fit in 14 bits.
1019 
1020    MODE_FLOAT references allow displacements which fit in 5 bits, so use
1021    0x1f as the mask.
1022 
1023    MODE_INT references allow displacements which fit in 14 bits, so use
1024    0x3fff as the mask.
1025 
1026    This relies on the fact that most mode MODE_FLOAT references will use FP
1027    registers and most mode MODE_INT references will use integer registers.
1028    (In the rare case of an FP register used in an integer MODE, we depend
1029    on secondary reloads to clean things up.)
1030 
1031 
1032    It is also beneficial to handle (plus (mult (X) (Y)) (Z)) in a special
1033    manner if Y is 2, 4, or 8.  (allows more shadd insns and shifted indexed
1034    addressing modes to be used).
1035 
1036    Note that the addresses passed into hppa_legitimize_address always
1037    come from a MEM, so we only have to match the MULT form on incoming
1038    addresses.  But to be future proof we also match the ASHIFT form.
1039 
1040    However, this routine always places those shift-add sequences into
1041    registers, so we have to generate the ASHIFT form as our output.
1042 
1043    Put X and Z into registers.  Then put the entire expression into
1044    a register.  */
1045 
1046 rtx
hppa_legitimize_address(rtx x,rtx oldx ATTRIBUTE_UNUSED,machine_mode mode)1047 hppa_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
1048 			 machine_mode mode)
1049 {
1050   rtx orig = x;
1051 
1052   /* We need to canonicalize the order of operands in unscaled indexed
1053      addresses since the code that checks if an address is valid doesn't
1054      always try both orders.  */
1055   if (!TARGET_NO_SPACE_REGS
1056       && GET_CODE (x) == PLUS
1057       && GET_MODE (x) == Pmode
1058       && REG_P (XEXP (x, 0))
1059       && REG_P (XEXP (x, 1))
1060       && REG_POINTER (XEXP (x, 0))
1061       && !REG_POINTER (XEXP (x, 1)))
1062     return gen_rtx_PLUS (Pmode, XEXP (x, 1), XEXP (x, 0));
1063 
1064   if (tls_referenced_p (x))
1065     return legitimize_tls_address (x);
1066   else if (flag_pic)
1067     return legitimize_pic_address (x, mode, gen_reg_rtx (Pmode));
1068 
1069   /* Strip off CONST.  */
1070   if (GET_CODE (x) == CONST)
1071     x = XEXP (x, 0);
1072 
1073   /* Special case.  Get the SYMBOL_REF into a register and use indexing.
1074      That should always be safe.  */
1075   if (GET_CODE (x) == PLUS
1076       && GET_CODE (XEXP (x, 0)) == REG
1077       && GET_CODE (XEXP (x, 1)) == SYMBOL_REF)
1078     {
1079       rtx reg = force_reg (Pmode, XEXP (x, 1));
1080       return force_reg (Pmode, gen_rtx_PLUS (Pmode, reg, XEXP (x, 0)));
1081     }
1082 
1083   /* Note we must reject symbols which represent function addresses
1084      since the assembler/linker can't handle arithmetic on plabels.  */
1085   if (GET_CODE (x) == PLUS
1086       && GET_CODE (XEXP (x, 1)) == CONST_INT
1087       && ((GET_CODE (XEXP (x, 0)) == SYMBOL_REF
1088 	   && !FUNCTION_NAME_P (XSTR (XEXP (x, 0), 0)))
1089 	  || GET_CODE (XEXP (x, 0)) == REG))
1090     {
1091       rtx int_part, ptr_reg;
1092       int newoffset;
1093       int offset = INTVAL (XEXP (x, 1));
1094       int mask;
1095 
1096       mask = (GET_MODE_CLASS (mode) == MODE_FLOAT
1097 	      && !INT14_OK_STRICT ? 0x1f : 0x3fff);
1098 
1099       /* Choose which way to round the offset.  Round up if we
1100 	 are >= halfway to the next boundary.  */
1101       if ((offset & mask) >= ((mask + 1) / 2))
1102 	newoffset = (offset & ~ mask) + mask + 1;
1103       else
1104 	newoffset = (offset & ~ mask);
1105 
1106       /* If the newoffset will not fit in 14 bits (ldo), then
1107 	 handling this would take 4 or 5 instructions (2 to load
1108 	 the SYMBOL_REF + 1 or 2 to load the newoffset + 1 to
1109 	 add the new offset and the SYMBOL_REF.)  Combine can
1110 	 not handle 4->2 or 5->2 combinations, so do not create
1111 	 them.  */
1112       if (! VAL_14_BITS_P (newoffset)
1113 	  && GET_CODE (XEXP (x, 0)) == SYMBOL_REF)
1114 	{
1115 	  rtx const_part = plus_constant (Pmode, XEXP (x, 0), newoffset);
1116 	  rtx tmp_reg
1117 	    = force_reg (Pmode,
1118 			 gen_rtx_HIGH (Pmode, const_part));
1119 	  ptr_reg
1120 	    = force_reg (Pmode,
1121 			 gen_rtx_LO_SUM (Pmode,
1122 					 tmp_reg, const_part));
1123 	}
1124       else
1125 	{
1126 	  if (! VAL_14_BITS_P (newoffset))
1127 	    int_part = force_reg (Pmode, GEN_INT (newoffset));
1128 	  else
1129 	    int_part = GEN_INT (newoffset);
1130 
1131 	  ptr_reg = force_reg (Pmode,
1132 			       gen_rtx_PLUS (Pmode,
1133 					     force_reg (Pmode, XEXP (x, 0)),
1134 					     int_part));
1135 	}
1136       return plus_constant (Pmode, ptr_reg, offset - newoffset);
1137     }
1138 
1139   /* Handle (plus (mult (a) (mem_shadd_constant)) (b)).  */
1140 
1141   if (GET_CODE (x) == PLUS
1142       && mem_shadd_or_shadd_rtx_p (XEXP (x, 0))
1143       && (OBJECT_P (XEXP (x, 1))
1144 	  || GET_CODE (XEXP (x, 1)) == SUBREG)
1145       && GET_CODE (XEXP (x, 1)) != CONST)
1146     {
1147       /* If we were given a MULT, we must fix the constant
1148 	 as we're going to create the ASHIFT form.  */
1149       int shift_val = INTVAL (XEXP (XEXP (x, 0), 1));
1150       if (GET_CODE (XEXP (x, 0)) == MULT)
1151 	shift_val = exact_log2 (shift_val);
1152 
1153       rtx reg1, reg2;
1154       reg1 = XEXP (x, 1);
1155       if (GET_CODE (reg1) != REG)
1156 	reg1 = force_reg (Pmode, force_operand (reg1, 0));
1157 
1158       reg2 = XEXP (XEXP (x, 0), 0);
1159       if (GET_CODE (reg2) != REG)
1160         reg2 = force_reg (Pmode, force_operand (reg2, 0));
1161 
1162       return force_reg (Pmode,
1163 			gen_rtx_PLUS (Pmode,
1164 				      gen_rtx_ASHIFT (Pmode, reg2,
1165 						      GEN_INT (shift_val)),
1166 				      reg1));
1167     }
1168 
1169   /* Similarly for (plus (plus (mult (a) (mem_shadd_constant)) (b)) (c)).
1170 
1171      Only do so for floating point modes since this is more speculative
1172      and we lose if it's an integer store.  */
1173   if (GET_CODE (x) == PLUS
1174       && GET_CODE (XEXP (x, 0)) == PLUS
1175       && mem_shadd_or_shadd_rtx_p (XEXP (XEXP (x, 0), 0))
1176       && (mode == SFmode || mode == DFmode))
1177     {
1178       int shift_val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
1179 
1180       /* If we were given a MULT, we must fix the constant
1181 	 as we're going to create the ASHIFT form.  */
1182       if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT)
1183 	shift_val = exact_log2 (shift_val);
1184 
1185       /* Try and figure out what to use as a base register.  */
1186       rtx reg1, reg2, base, idx;
1187 
1188       reg1 = XEXP (XEXP (x, 0), 1);
1189       reg2 = XEXP (x, 1);
1190       base = NULL_RTX;
1191       idx = NULL_RTX;
1192 
1193       /* Make sure they're both regs.  If one was a SYMBOL_REF [+ const],
1194 	 then pa_emit_move_sequence will turn on REG_POINTER so we'll know
1195 	 it's a base register below.  */
1196       if (GET_CODE (reg1) != REG)
1197 	reg1 = force_reg (Pmode, force_operand (reg1, 0));
1198 
1199       if (GET_CODE (reg2) != REG)
1200 	reg2 = force_reg (Pmode, force_operand (reg2, 0));
1201 
1202       /* Figure out what the base and index are.  */
1203 
1204       if (GET_CODE (reg1) == REG
1205 	  && REG_POINTER (reg1))
1206 	{
1207 	  base = reg1;
1208 	  idx = gen_rtx_PLUS (Pmode,
1209 			      gen_rtx_ASHIFT (Pmode,
1210 					      XEXP (XEXP (XEXP (x, 0), 0), 0),
1211 					      GEN_INT (shift_val)),
1212 			      XEXP (x, 1));
1213 	}
1214       else if (GET_CODE (reg2) == REG
1215 	       && REG_POINTER (reg2))
1216 	{
1217 	  base = reg2;
1218 	  idx = XEXP (x, 0);
1219 	}
1220 
1221       if (base == 0)
1222 	return orig;
1223 
1224       /* If the index adds a large constant, try to scale the
1225 	 constant so that it can be loaded with only one insn.  */
1226       if (GET_CODE (XEXP (idx, 1)) == CONST_INT
1227 	  && VAL_14_BITS_P (INTVAL (XEXP (idx, 1))
1228 			    / INTVAL (XEXP (XEXP (idx, 0), 1)))
1229 	  && INTVAL (XEXP (idx, 1)) % INTVAL (XEXP (XEXP (idx, 0), 1)) == 0)
1230 	{
1231 	  /* Divide the CONST_INT by the scale factor, then add it to A.  */
1232 	  int val = INTVAL (XEXP (idx, 1));
1233 	  val /= (1 << shift_val);
1234 
1235 	  reg1 = XEXP (XEXP (idx, 0), 0);
1236 	  if (GET_CODE (reg1) != REG)
1237 	    reg1 = force_reg (Pmode, force_operand (reg1, 0));
1238 
1239 	  reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, reg1, GEN_INT (val)));
1240 
1241 	  /* We can now generate a simple scaled indexed address.  */
1242 	  return
1243 	    force_reg
1244 	      (Pmode, gen_rtx_PLUS (Pmode,
1245 				    gen_rtx_ASHIFT (Pmode, reg1,
1246 						    GEN_INT (shift_val)),
1247 				    base));
1248 	}
1249 
1250       /* If B + C is still a valid base register, then add them.  */
1251       if (GET_CODE (XEXP (idx, 1)) == CONST_INT
1252 	  && INTVAL (XEXP (idx, 1)) <= 4096
1253 	  && INTVAL (XEXP (idx, 1)) >= -4096)
1254 	{
1255 	  rtx reg1, reg2;
1256 
1257 	  reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, XEXP (idx, 1)));
1258 
1259 	  reg2 = XEXP (XEXP (idx, 0), 0);
1260 	  if (GET_CODE (reg2) != CONST_INT)
1261 	    reg2 = force_reg (Pmode, force_operand (reg2, 0));
1262 
1263 	  return force_reg (Pmode,
1264 			    gen_rtx_PLUS (Pmode,
1265 					  gen_rtx_ASHIFT (Pmode, reg2,
1266 							  GEN_INT (shift_val)),
1267 					  reg1));
1268 	}
1269 
1270       /* Get the index into a register, then add the base + index and
1271 	 return a register holding the result.  */
1272 
1273       /* First get A into a register.  */
1274       reg1 = XEXP (XEXP (idx, 0), 0);
1275       if (GET_CODE (reg1) != REG)
1276 	reg1 = force_reg (Pmode, force_operand (reg1, 0));
1277 
1278       /* And get B into a register.  */
1279       reg2 = XEXP (idx, 1);
1280       if (GET_CODE (reg2) != REG)
1281 	reg2 = force_reg (Pmode, force_operand (reg2, 0));
1282 
1283       reg1 = force_reg (Pmode,
1284 			gen_rtx_PLUS (Pmode,
1285 				      gen_rtx_ASHIFT (Pmode, reg1,
1286 						      GEN_INT (shift_val)),
1287 				      reg2));
1288 
1289       /* Add the result to our base register and return.  */
1290       return force_reg (Pmode, gen_rtx_PLUS (Pmode, base, reg1));
1291 
1292     }
1293 
1294   /* Uh-oh.  We might have an address for x[n-100000].  This needs
1295      special handling to avoid creating an indexed memory address
1296      with x-100000 as the base.
1297 
1298      If the constant part is small enough, then it's still safe because
1299      there is a guard page at the beginning and end of the data segment.
1300 
1301      Scaled references are common enough that we want to try and rearrange the
1302      terms so that we can use indexing for these addresses too.  Only
1303      do the optimization for floatint point modes.  */
1304 
1305   if (GET_CODE (x) == PLUS
1306       && pa_symbolic_expression_p (XEXP (x, 1)))
1307     {
1308       /* Ugly.  We modify things here so that the address offset specified
1309 	 by the index expression is computed first, then added to x to form
1310 	 the entire address.  */
1311 
1312       rtx regx1, regx2, regy1, regy2, y;
1313 
1314       /* Strip off any CONST.  */
1315       y = XEXP (x, 1);
1316       if (GET_CODE (y) == CONST)
1317 	y = XEXP (y, 0);
1318 
1319       if (GET_CODE (y) == PLUS || GET_CODE (y) == MINUS)
1320 	{
1321 	  /* See if this looks like
1322 		(plus (mult (reg) (mem_shadd_const))
1323 		      (const (plus (symbol_ref) (const_int))))
1324 
1325 	     Where const_int is small.  In that case the const
1326 	     expression is a valid pointer for indexing.
1327 
1328 	     If const_int is big, but can be divided evenly by shadd_const
1329 	     and added to (reg).  This allows more scaled indexed addresses.  */
1330 	  if (GET_CODE (XEXP (y, 0)) == SYMBOL_REF
1331 	      && mem_shadd_or_shadd_rtx_p (XEXP (x, 0))
1332 	      && GET_CODE (XEXP (y, 1)) == CONST_INT
1333 	      && INTVAL (XEXP (y, 1)) >= -4096
1334 	      && INTVAL (XEXP (y, 1)) <= 4095)
1335 	    {
1336 	      int shift_val = INTVAL (XEXP (XEXP (x, 0), 1));
1337 
1338 	      /* If we were given a MULT, we must fix the constant
1339 		 as we're going to create the ASHIFT form.  */
1340 	      if (GET_CODE (XEXP (x, 0)) == MULT)
1341 		shift_val = exact_log2 (shift_val);
1342 
1343 	      rtx reg1, reg2;
1344 
1345 	      reg1 = XEXP (x, 1);
1346 	      if (GET_CODE (reg1) != REG)
1347 		reg1 = force_reg (Pmode, force_operand (reg1, 0));
1348 
1349 	      reg2 = XEXP (XEXP (x, 0), 0);
1350 	      if (GET_CODE (reg2) != REG)
1351 	        reg2 = force_reg (Pmode, force_operand (reg2, 0));
1352 
1353 	      return
1354 		force_reg (Pmode,
1355 			   gen_rtx_PLUS (Pmode,
1356 					 gen_rtx_ASHIFT (Pmode,
1357 							 reg2,
1358 							 GEN_INT (shift_val)),
1359 					 reg1));
1360 	    }
1361 	  else if ((mode == DFmode || mode == SFmode)
1362 		   && GET_CODE (XEXP (y, 0)) == SYMBOL_REF
1363 		   && mem_shadd_or_shadd_rtx_p (XEXP (x, 0))
1364 		   && GET_CODE (XEXP (y, 1)) == CONST_INT
1365 		   && INTVAL (XEXP (y, 1)) % (1 << INTVAL (XEXP (XEXP (x, 0), 1))) == 0)
1366 	    {
1367 	      int shift_val = INTVAL (XEXP (XEXP (x, 0), 1));
1368 
1369 	      /* If we were given a MULT, we must fix the constant
1370 		 as we're going to create the ASHIFT form.  */
1371 	      if (GET_CODE (XEXP (x, 0)) == MULT)
1372 		shift_val = exact_log2 (shift_val);
1373 
1374 	      regx1
1375 		= force_reg (Pmode, GEN_INT (INTVAL (XEXP (y, 1))
1376 					     / INTVAL (XEXP (XEXP (x, 0), 1))));
1377 	      regx2 = XEXP (XEXP (x, 0), 0);
1378 	      if (GET_CODE (regx2) != REG)
1379 		regx2 = force_reg (Pmode, force_operand (regx2, 0));
1380 	      regx2 = force_reg (Pmode, gen_rtx_fmt_ee (GET_CODE (y), Pmode,
1381 							regx2, regx1));
1382 	      return
1383 		force_reg (Pmode,
1384 			   gen_rtx_PLUS (Pmode,
1385 					 gen_rtx_ASHIFT (Pmode, regx2,
1386 						         GEN_INT (shift_val)),
1387 					 force_reg (Pmode, XEXP (y, 0))));
1388 	    }
1389 	  else if (GET_CODE (XEXP (y, 1)) == CONST_INT
1390 		   && INTVAL (XEXP (y, 1)) >= -4096
1391 		   && INTVAL (XEXP (y, 1)) <= 4095)
1392 	    {
1393 	      /* This is safe because of the guard page at the
1394 		 beginning and end of the data space.  Just
1395 		 return the original address.  */
1396 	      return orig;
1397 	    }
1398 	  else
1399 	    {
1400 	      /* Doesn't look like one we can optimize.  */
1401 	      regx1 = force_reg (Pmode, force_operand (XEXP (x, 0), 0));
1402 	      regy1 = force_reg (Pmode, force_operand (XEXP (y, 0), 0));
1403 	      regy2 = force_reg (Pmode, force_operand (XEXP (y, 1), 0));
1404 	      regx1 = force_reg (Pmode,
1405 				 gen_rtx_fmt_ee (GET_CODE (y), Pmode,
1406 						 regx1, regy2));
1407 	      return force_reg (Pmode, gen_rtx_PLUS (Pmode, regx1, regy1));
1408 	    }
1409 	}
1410     }
1411 
1412   return orig;
1413 }
1414 
1415 /* Implement the TARGET_REGISTER_MOVE_COST hook.
1416 
1417    Compute extra cost of moving data between one register class
1418    and another.
1419 
1420    Make moves from SAR so expensive they should never happen.  We used to
1421    have 0xffff here, but that generates overflow in rare cases.
1422 
1423    Copies involving a FP register and a non-FP register are relatively
1424    expensive because they must go through memory.
1425 
1426    Other copies are reasonably cheap.  */
1427 
1428 static int
hppa_register_move_cost(machine_mode mode ATTRIBUTE_UNUSED,reg_class_t from,reg_class_t to)1429 hppa_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
1430 			 reg_class_t from, reg_class_t to)
1431 {
1432   if (from == SHIFT_REGS)
1433     return 0x100;
1434   else if (to == SHIFT_REGS && FP_REG_CLASS_P (from))
1435     return 18;
1436   else if ((FP_REG_CLASS_P (from) && ! FP_REG_CLASS_P (to))
1437            || (FP_REG_CLASS_P (to) && ! FP_REG_CLASS_P (from)))
1438     return 16;
1439   else
1440     return 2;
1441 }
1442 
1443 /* For the HPPA, REG and REG+CONST is cost 0
1444    and addresses involving symbolic constants are cost 2.
1445 
1446    PIC addresses are very expensive.
1447 
1448    It is no coincidence that this has the same structure
1449    as pa_legitimate_address_p.  */
1450 
1451 static int
hppa_address_cost(rtx X,machine_mode mode ATTRIBUTE_UNUSED,addr_space_t as ATTRIBUTE_UNUSED,bool speed ATTRIBUTE_UNUSED)1452 hppa_address_cost (rtx X, machine_mode mode ATTRIBUTE_UNUSED,
1453 		   addr_space_t as ATTRIBUTE_UNUSED,
1454 		   bool speed ATTRIBUTE_UNUSED)
1455 {
1456   switch (GET_CODE (X))
1457     {
1458     case REG:
1459     case PLUS:
1460     case LO_SUM:
1461       return 1;
1462     case HIGH:
1463       return 2;
1464     default:
1465       return 4;
1466     }
1467 }
1468 
1469 /* Compute a (partial) cost for rtx X.  Return true if the complete
1470    cost has been computed, and false if subexpressions should be
1471    scanned.  In either case, *TOTAL contains the cost result.  */
1472 
1473 static bool
hppa_rtx_costs(rtx x,machine_mode mode,int outer_code,int opno ATTRIBUTE_UNUSED,int * total,bool speed ATTRIBUTE_UNUSED)1474 hppa_rtx_costs (rtx x, machine_mode mode, int outer_code,
1475 		int opno ATTRIBUTE_UNUSED,
1476 		int *total, bool speed ATTRIBUTE_UNUSED)
1477 {
1478   int factor;
1479   int code = GET_CODE (x);
1480 
1481   switch (code)
1482     {
1483     case CONST_INT:
1484       if (INTVAL (x) == 0)
1485 	*total = 0;
1486       else if (INT_14_BITS (x))
1487 	*total = 1;
1488       else
1489 	*total = 2;
1490       return true;
1491 
1492     case HIGH:
1493       *total = 2;
1494       return true;
1495 
1496     case CONST:
1497     case LABEL_REF:
1498     case SYMBOL_REF:
1499       *total = 4;
1500       return true;
1501 
1502     case CONST_DOUBLE:
1503       if ((x == CONST0_RTX (DFmode) || x == CONST0_RTX (SFmode))
1504 	  && outer_code != SET)
1505 	*total = 0;
1506       else
1507         *total = 8;
1508       return true;
1509 
1510     case MULT:
1511       if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1512 	{
1513 	  *total = COSTS_N_INSNS (3);
1514 	  return true;
1515 	}
1516 
1517       /* A mode size N times larger than SImode needs O(N*N) more insns.  */
1518       factor = GET_MODE_SIZE (mode) / 4;
1519       if (factor == 0)
1520 	factor = 1;
1521 
1522       if (TARGET_PA_11 && !TARGET_DISABLE_FPREGS && !TARGET_SOFT_FLOAT)
1523 	*total = factor * factor * COSTS_N_INSNS (8);
1524       else
1525 	*total = factor * factor * COSTS_N_INSNS (20);
1526       return true;
1527 
1528     case DIV:
1529       if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1530 	{
1531 	  *total = COSTS_N_INSNS (14);
1532 	  return true;
1533 	}
1534       /* FALLTHRU */
1535 
1536     case UDIV:
1537     case MOD:
1538     case UMOD:
1539       /* A mode size N times larger than SImode needs O(N*N) more insns.  */
1540       factor = GET_MODE_SIZE (mode) / 4;
1541       if (factor == 0)
1542 	factor = 1;
1543 
1544       *total = factor * factor * COSTS_N_INSNS (60);
1545       return true;
1546 
1547     case PLUS: /* this includes shNadd insns */
1548     case MINUS:
1549       if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1550 	{
1551 	  *total = COSTS_N_INSNS (3);
1552 	  return true;
1553 	}
1554 
1555       /* A size N times larger than UNITS_PER_WORD needs N times as
1556 	 many insns, taking N times as long.  */
1557       factor = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
1558       if (factor == 0)
1559 	factor = 1;
1560       *total = factor * COSTS_N_INSNS (1);
1561       return true;
1562 
1563     case ASHIFT:
1564     case ASHIFTRT:
1565     case LSHIFTRT:
1566       *total = COSTS_N_INSNS (1);
1567       return true;
1568 
1569     default:
1570       return false;
1571     }
1572 }
1573 
1574 /* Ensure mode of ORIG, a REG rtx, is MODE.  Returns either ORIG or a
1575    new rtx with the correct mode.  */
1576 static inline rtx
force_mode(machine_mode mode,rtx orig)1577 force_mode (machine_mode mode, rtx orig)
1578 {
1579   if (mode == GET_MODE (orig))
1580     return orig;
1581 
1582   gcc_assert (REGNO (orig) < FIRST_PSEUDO_REGISTER);
1583 
1584   return gen_rtx_REG (mode, REGNO (orig));
1585 }
1586 
1587 /* Implement TARGET_CANNOT_FORCE_CONST_MEM.  */
1588 
1589 static bool
pa_cannot_force_const_mem(machine_mode mode ATTRIBUTE_UNUSED,rtx x)1590 pa_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
1591 {
1592   return tls_referenced_p (x);
1593 }
1594 
1595 /* Emit insns to move operands[1] into operands[0].
1596 
1597    Return 1 if we have written out everything that needs to be done to
1598    do the move.  Otherwise, return 0 and the caller will emit the move
1599    normally.
1600 
1601    Note SCRATCH_REG may not be in the proper mode depending on how it
1602    will be used.  This routine is responsible for creating a new copy
1603    of SCRATCH_REG in the proper mode.  */
1604 
1605 int
pa_emit_move_sequence(rtx * operands,machine_mode mode,rtx scratch_reg)1606 pa_emit_move_sequence (rtx *operands, machine_mode mode, rtx scratch_reg)
1607 {
1608   register rtx operand0 = operands[0];
1609   register rtx operand1 = operands[1];
1610   register rtx tem;
1611 
1612   /* We can only handle indexed addresses in the destination operand
1613      of floating point stores.  Thus, we need to break out indexed
1614      addresses from the destination operand.  */
1615   if (GET_CODE (operand0) == MEM && IS_INDEX_ADDR_P (XEXP (operand0, 0)))
1616     {
1617       gcc_assert (can_create_pseudo_p ());
1618 
1619       tem = copy_to_mode_reg (Pmode, XEXP (operand0, 0));
1620       operand0 = replace_equiv_address (operand0, tem);
1621     }
1622 
1623   /* On targets with non-equivalent space registers, break out unscaled
1624      indexed addresses from the source operand before the final CSE.
1625      We have to do this because the REG_POINTER flag is not correctly
1626      carried through various optimization passes and CSE may substitute
1627      a pseudo without the pointer set for one with the pointer set.  As
1628      a result, we loose various opportunities to create insns with
1629      unscaled indexed addresses.  */
1630   if (!TARGET_NO_SPACE_REGS
1631       && !cse_not_expected
1632       && GET_CODE (operand1) == MEM
1633       && GET_CODE (XEXP (operand1, 0)) == PLUS
1634       && REG_P (XEXP (XEXP (operand1, 0), 0))
1635       && REG_P (XEXP (XEXP (operand1, 0), 1)))
1636     operand1
1637       = replace_equiv_address (operand1,
1638 			       copy_to_mode_reg (Pmode, XEXP (operand1, 0)));
1639 
1640   if (scratch_reg
1641       && reload_in_progress && GET_CODE (operand0) == REG
1642       && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
1643     operand0 = reg_equiv_mem (REGNO (operand0));
1644   else if (scratch_reg
1645 	   && reload_in_progress && GET_CODE (operand0) == SUBREG
1646 	   && GET_CODE (SUBREG_REG (operand0)) == REG
1647 	   && REGNO (SUBREG_REG (operand0)) >= FIRST_PSEUDO_REGISTER)
1648     {
1649      /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1650 	the code which tracks sets/uses for delete_output_reload.  */
1651       rtx temp = gen_rtx_SUBREG (GET_MODE (operand0),
1652 				 reg_equiv_mem (REGNO (SUBREG_REG (operand0))),
1653 				 SUBREG_BYTE (operand0));
1654       operand0 = alter_subreg (&temp, true);
1655     }
1656 
1657   if (scratch_reg
1658       && reload_in_progress && GET_CODE (operand1) == REG
1659       && REGNO (operand1) >= FIRST_PSEUDO_REGISTER)
1660     operand1 = reg_equiv_mem (REGNO (operand1));
1661   else if (scratch_reg
1662 	   && reload_in_progress && GET_CODE (operand1) == SUBREG
1663 	   && GET_CODE (SUBREG_REG (operand1)) == REG
1664 	   && REGNO (SUBREG_REG (operand1)) >= FIRST_PSEUDO_REGISTER)
1665     {
1666      /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1667 	the code which tracks sets/uses for delete_output_reload.  */
1668       rtx temp = gen_rtx_SUBREG (GET_MODE (operand1),
1669 				 reg_equiv_mem (REGNO (SUBREG_REG (operand1))),
1670 				 SUBREG_BYTE (operand1));
1671       operand1 = alter_subreg (&temp, true);
1672     }
1673 
1674   if (scratch_reg && reload_in_progress && GET_CODE (operand0) == MEM
1675       && ((tem = find_replacement (&XEXP (operand0, 0)))
1676 	  != XEXP (operand0, 0)))
1677     operand0 = replace_equiv_address (operand0, tem);
1678 
1679   if (scratch_reg && reload_in_progress && GET_CODE (operand1) == MEM
1680       && ((tem = find_replacement (&XEXP (operand1, 0)))
1681 	  != XEXP (operand1, 0)))
1682     operand1 = replace_equiv_address (operand1, tem);
1683 
1684   /* Handle secondary reloads for loads/stores of FP registers from
1685      REG+D addresses where D does not fit in 5 or 14 bits, including
1686      (subreg (mem (addr))) cases, and reloads for other unsupported
1687      memory operands.  */
1688   if (scratch_reg
1689       && FP_REG_P (operand0)
1690       && (MEM_P (operand1)
1691 	  || (GET_CODE (operand1) == SUBREG
1692 	      && MEM_P (XEXP (operand1, 0)))))
1693     {
1694       rtx op1 = operand1;
1695 
1696       if (GET_CODE (op1) == SUBREG)
1697 	op1 = XEXP (op1, 0);
1698 
1699       if (reg_plus_base_memory_operand (op1, GET_MODE (op1)))
1700 	{
1701 	  if (!(TARGET_PA_20
1702 		&& !TARGET_ELF32
1703 		&& INT_14_BITS (XEXP (XEXP (op1, 0), 1)))
1704 	      && !INT_5_BITS (XEXP (XEXP (op1, 0), 1)))
1705 	    {
1706 	      /* SCRATCH_REG will hold an address and maybe the actual data.
1707 		 We want it in WORD_MODE regardless of what mode it was
1708 		 originally given to us.  */
1709 	      scratch_reg = force_mode (word_mode, scratch_reg);
1710 
1711 	      /* D might not fit in 14 bits either; for such cases load D
1712 		 into scratch reg.  */
1713 	      if (!INT_14_BITS (XEXP (XEXP (op1, 0), 1)))
1714 		{
1715 		  emit_move_insn (scratch_reg, XEXP (XEXP (op1, 0), 1));
1716 		  emit_move_insn (scratch_reg,
1717 				  gen_rtx_fmt_ee (GET_CODE (XEXP (op1, 0)),
1718 						  Pmode,
1719 						  XEXP (XEXP (op1, 0), 0),
1720 						  scratch_reg));
1721 		}
1722 	      else
1723 		emit_move_insn (scratch_reg, XEXP (op1, 0));
1724 	      op1 = replace_equiv_address (op1, scratch_reg);
1725 	    }
1726 	}
1727       else if ((!INT14_OK_STRICT && symbolic_memory_operand (op1, VOIDmode))
1728 	       || IS_LO_SUM_DLT_ADDR_P (XEXP (op1, 0))
1729 	       || IS_INDEX_ADDR_P (XEXP (op1, 0)))
1730 	{
1731 	  /* Load memory address into SCRATCH_REG.  */
1732 	  scratch_reg = force_mode (word_mode, scratch_reg);
1733 	  emit_move_insn (scratch_reg, XEXP (op1, 0));
1734 	  op1 = replace_equiv_address (op1, scratch_reg);
1735 	}
1736       emit_insn (gen_rtx_SET (operand0, op1));
1737       return 1;
1738     }
1739   else if (scratch_reg
1740 	   && FP_REG_P (operand1)
1741 	   && (MEM_P (operand0)
1742 	       || (GET_CODE (operand0) == SUBREG
1743 		   && MEM_P (XEXP (operand0, 0)))))
1744     {
1745       rtx op0 = operand0;
1746 
1747       if (GET_CODE (op0) == SUBREG)
1748 	op0 = XEXP (op0, 0);
1749 
1750       if (reg_plus_base_memory_operand (op0, GET_MODE (op0)))
1751 	{
1752 	  if (!(TARGET_PA_20
1753 		&& !TARGET_ELF32
1754 		&& INT_14_BITS (XEXP (XEXP (op0, 0), 1)))
1755 	      && !INT_5_BITS (XEXP (XEXP (op0, 0), 1)))
1756 	    {
1757 	      /* SCRATCH_REG will hold an address and maybe the actual data.
1758 		 We want it in WORD_MODE regardless of what mode it was
1759 		 originally given to us.  */
1760 	      scratch_reg = force_mode (word_mode, scratch_reg);
1761 
1762 	      /* D might not fit in 14 bits either; for such cases load D
1763 		 into scratch reg.  */
1764 	      if (!INT_14_BITS (XEXP (XEXP (op0, 0), 1)))
1765 		{
1766 		  emit_move_insn (scratch_reg, XEXP (XEXP (op0, 0), 1));
1767 		  emit_move_insn (scratch_reg,
1768 				  gen_rtx_fmt_ee (GET_CODE (XEXP (op0, 0)),
1769 						  Pmode,
1770 						  XEXP (XEXP (op0, 0), 0),
1771 						  scratch_reg));
1772 		}
1773 	      else
1774 		emit_move_insn (scratch_reg, XEXP (op0, 0));
1775 	      op0 = replace_equiv_address (op0, scratch_reg);
1776 	    }
1777 	}
1778       else if ((!INT14_OK_STRICT && symbolic_memory_operand (op0, VOIDmode))
1779 	       || IS_LO_SUM_DLT_ADDR_P (XEXP (op0, 0))
1780 	       || IS_INDEX_ADDR_P (XEXP (op0, 0)))
1781 	{
1782 	  /* Load memory address into SCRATCH_REG.  */
1783 	  scratch_reg = force_mode (word_mode, scratch_reg);
1784 	  emit_move_insn (scratch_reg, XEXP (op0, 0));
1785 	  op0 = replace_equiv_address (op0, scratch_reg);
1786 	}
1787       emit_insn (gen_rtx_SET (op0, operand1));
1788       return 1;
1789     }
1790   /* Handle secondary reloads for loads of FP registers from constant
1791      expressions by forcing the constant into memory.  For the most part,
1792      this is only necessary for SImode and DImode.
1793 
1794      Use scratch_reg to hold the address of the memory location.  */
1795   else if (scratch_reg
1796 	   && CONSTANT_P (operand1)
1797 	   && FP_REG_P (operand0))
1798     {
1799       rtx const_mem, xoperands[2];
1800 
1801       if (operand1 == CONST0_RTX (mode))
1802 	{
1803 	  emit_insn (gen_rtx_SET (operand0, operand1));
1804 	  return 1;
1805 	}
1806 
1807       /* SCRATCH_REG will hold an address and maybe the actual data.  We want
1808 	 it in WORD_MODE regardless of what mode it was originally given
1809 	 to us.  */
1810       scratch_reg = force_mode (word_mode, scratch_reg);
1811 
1812       /* Force the constant into memory and put the address of the
1813 	 memory location into scratch_reg.  */
1814       const_mem = force_const_mem (mode, operand1);
1815       xoperands[0] = scratch_reg;
1816       xoperands[1] = XEXP (const_mem, 0);
1817       pa_emit_move_sequence (xoperands, Pmode, 0);
1818 
1819       /* Now load the destination register.  */
1820       emit_insn (gen_rtx_SET (operand0,
1821 			      replace_equiv_address (const_mem, scratch_reg)));
1822       return 1;
1823     }
1824   /* Handle secondary reloads for SAR.  These occur when trying to load
1825      the SAR from memory or a constant.  */
1826   else if (scratch_reg
1827 	   && GET_CODE (operand0) == REG
1828 	   && REGNO (operand0) < FIRST_PSEUDO_REGISTER
1829 	   && REGNO_REG_CLASS (REGNO (operand0)) == SHIFT_REGS
1830 	   && (GET_CODE (operand1) == MEM || GET_CODE (operand1) == CONST_INT))
1831     {
1832       /* D might not fit in 14 bits either; for such cases load D into
1833 	 scratch reg.  */
1834       if (GET_CODE (operand1) == MEM
1835 	  && !memory_address_p (GET_MODE (operand0), XEXP (operand1, 0)))
1836 	{
1837 	  /* We are reloading the address into the scratch register, so we
1838 	     want to make sure the scratch register is a full register.  */
1839 	  scratch_reg = force_mode (word_mode, scratch_reg);
1840 
1841 	  emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1));
1842 	  emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand1,
1843 								        0)),
1844 						       Pmode,
1845 						       XEXP (XEXP (operand1, 0),
1846 						       0),
1847 						       scratch_reg));
1848 
1849 	  /* Now we are going to load the scratch register from memory,
1850 	     we want to load it in the same width as the original MEM,
1851 	     which must be the same as the width of the ultimate destination,
1852 	     OPERAND0.  */
1853 	  scratch_reg = force_mode (GET_MODE (operand0), scratch_reg);
1854 
1855 	  emit_move_insn (scratch_reg,
1856 			  replace_equiv_address (operand1, scratch_reg));
1857 	}
1858       else
1859 	{
1860 	  /* We want to load the scratch register using the same mode as
1861 	     the ultimate destination.  */
1862 	  scratch_reg = force_mode (GET_MODE (operand0), scratch_reg);
1863 
1864 	  emit_move_insn (scratch_reg, operand1);
1865 	}
1866 
1867       /* And emit the insn to set the ultimate destination.  We know that
1868 	 the scratch register has the same mode as the destination at this
1869 	 point.  */
1870       emit_move_insn (operand0, scratch_reg);
1871       return 1;
1872     }
1873 
1874   /* Handle the most common case: storing into a register.  */
1875   if (register_operand (operand0, mode))
1876     {
1877       /* Legitimize TLS symbol references.  This happens for references
1878 	 that aren't a legitimate constant.  */
1879       if (PA_SYMBOL_REF_TLS_P (operand1))
1880 	operand1 = legitimize_tls_address (operand1);
1881 
1882       if (register_operand (operand1, mode)
1883 	  || (GET_CODE (operand1) == CONST_INT
1884 	      && pa_cint_ok_for_move (UINTVAL (operand1)))
1885 	  || (operand1 == CONST0_RTX (mode))
1886 	  || (GET_CODE (operand1) == HIGH
1887 	      && !symbolic_operand (XEXP (operand1, 0), VOIDmode))
1888 	  /* Only `general_operands' can come here, so MEM is ok.  */
1889 	  || GET_CODE (operand1) == MEM)
1890 	{
1891 	  /* Various sets are created during RTL generation which don't
1892 	     have the REG_POINTER flag correctly set.  After the CSE pass,
1893 	     instruction recognition can fail if we don't consistently
1894 	     set this flag when performing register copies.  This should
1895 	     also improve the opportunities for creating insns that use
1896 	     unscaled indexing.  */
1897 	  if (REG_P (operand0) && REG_P (operand1))
1898 	    {
1899 	      if (REG_POINTER (operand1)
1900 		  && !REG_POINTER (operand0)
1901 		  && !HARD_REGISTER_P (operand0))
1902 		copy_reg_pointer (operand0, operand1);
1903 	    }
1904 
1905 	  /* When MEMs are broken out, the REG_POINTER flag doesn't
1906 	     get set.  In some cases, we can set the REG_POINTER flag
1907 	     from the declaration for the MEM.  */
1908 	  if (REG_P (operand0)
1909 	      && GET_CODE (operand1) == MEM
1910 	      && !REG_POINTER (operand0))
1911 	    {
1912 	      tree decl = MEM_EXPR (operand1);
1913 
1914 	      /* Set the register pointer flag and register alignment
1915 		 if the declaration for this memory reference is a
1916 		 pointer type.  */
1917 	      if (decl)
1918 		{
1919 		  tree type;
1920 
1921 		  /* If this is a COMPONENT_REF, use the FIELD_DECL from
1922 		     tree operand 1.  */
1923 		  if (TREE_CODE (decl) == COMPONENT_REF)
1924 		    decl = TREE_OPERAND (decl, 1);
1925 
1926 		  type = TREE_TYPE (decl);
1927 		  type = strip_array_types (type);
1928 
1929 		  if (POINTER_TYPE_P (type))
1930 		    {
1931 		      int align;
1932 
1933 		      type = TREE_TYPE (type);
1934 		      /* Using TYPE_ALIGN_OK is rather conservative as
1935 			 only the ada frontend actually sets it.  */
1936 		      align = (TYPE_ALIGN_OK (type) ? TYPE_ALIGN (type)
1937 			       : BITS_PER_UNIT);
1938 		      mark_reg_pointer (operand0, align);
1939 		    }
1940 		}
1941 	    }
1942 
1943 	  emit_insn (gen_rtx_SET (operand0, operand1));
1944 	  return 1;
1945 	}
1946     }
1947   else if (GET_CODE (operand0) == MEM)
1948     {
1949       if (mode == DFmode && operand1 == CONST0_RTX (mode)
1950 	  && !(reload_in_progress || reload_completed))
1951 	{
1952 	  rtx temp = gen_reg_rtx (DFmode);
1953 
1954 	  emit_insn (gen_rtx_SET (temp, operand1));
1955 	  emit_insn (gen_rtx_SET (operand0, temp));
1956 	  return 1;
1957 	}
1958       if (register_operand (operand1, mode) || operand1 == CONST0_RTX (mode))
1959 	{
1960 	  /* Run this case quickly.  */
1961 	  emit_insn (gen_rtx_SET (operand0, operand1));
1962 	  return 1;
1963 	}
1964       if (! (reload_in_progress || reload_completed))
1965 	{
1966 	  operands[0] = validize_mem (operand0);
1967 	  operands[1] = operand1 = force_reg (mode, operand1);
1968 	}
1969     }
1970 
1971   /* Simplify the source if we need to.
1972      Note we do have to handle function labels here, even though we do
1973      not consider them legitimate constants.  Loop optimizations can
1974      call the emit_move_xxx with one as a source.  */
1975   if ((GET_CODE (operand1) != HIGH && immediate_operand (operand1, mode))
1976       || (GET_CODE (operand1) == HIGH
1977 	  && symbolic_operand (XEXP (operand1, 0), mode))
1978       || function_label_operand (operand1, VOIDmode)
1979       || tls_referenced_p (operand1))
1980     {
1981       int ishighonly = 0;
1982 
1983       if (GET_CODE (operand1) == HIGH)
1984 	{
1985 	  ishighonly = 1;
1986 	  operand1 = XEXP (operand1, 0);
1987 	}
1988       if (symbolic_operand (operand1, mode))
1989 	{
1990 	  /* Argh.  The assembler and linker can't handle arithmetic
1991 	     involving plabels.
1992 
1993 	     So we force the plabel into memory, load operand0 from
1994 	     the memory location, then add in the constant part.  */
1995 	  if ((GET_CODE (operand1) == CONST
1996 	       && GET_CODE (XEXP (operand1, 0)) == PLUS
1997 	       && function_label_operand (XEXP (XEXP (operand1, 0), 0),
1998 					  VOIDmode))
1999 	      || function_label_operand (operand1, VOIDmode))
2000 	    {
2001 	      rtx temp, const_part;
2002 
2003 	      /* Figure out what (if any) scratch register to use.  */
2004 	      if (reload_in_progress || reload_completed)
2005 		{
2006 		  scratch_reg = scratch_reg ? scratch_reg : operand0;
2007 		  /* SCRATCH_REG will hold an address and maybe the actual
2008 		     data.  We want it in WORD_MODE regardless of what mode it
2009 		     was originally given to us.  */
2010 		  scratch_reg = force_mode (word_mode, scratch_reg);
2011 		}
2012 	      else if (flag_pic)
2013 		scratch_reg = gen_reg_rtx (Pmode);
2014 
2015 	      if (GET_CODE (operand1) == CONST)
2016 		{
2017 		  /* Save away the constant part of the expression.  */
2018 		  const_part = XEXP (XEXP (operand1, 0), 1);
2019 		  gcc_assert (GET_CODE (const_part) == CONST_INT);
2020 
2021 		  /* Force the function label into memory.  */
2022 		  temp = force_const_mem (mode, XEXP (XEXP (operand1, 0), 0));
2023 		}
2024 	      else
2025 		{
2026 		  /* No constant part.  */
2027 		  const_part = NULL_RTX;
2028 
2029 		  /* Force the function label into memory.  */
2030 		  temp = force_const_mem (mode, operand1);
2031 		}
2032 
2033 
2034 	      /* Get the address of the memory location.  PIC-ify it if
2035 		 necessary.  */
2036 	      temp = XEXP (temp, 0);
2037 	      if (flag_pic)
2038 		temp = legitimize_pic_address (temp, mode, scratch_reg);
2039 
2040 	      /* Put the address of the memory location into our destination
2041 		 register.  */
2042 	      operands[1] = temp;
2043 	      pa_emit_move_sequence (operands, mode, scratch_reg);
2044 
2045 	      /* Now load from the memory location into our destination
2046 		 register.  */
2047 	      operands[1] = gen_rtx_MEM (Pmode, operands[0]);
2048 	      pa_emit_move_sequence (operands, mode, scratch_reg);
2049 
2050 	      /* And add back in the constant part.  */
2051 	      if (const_part != NULL_RTX)
2052 		expand_inc (operand0, const_part);
2053 
2054 	      return 1;
2055 	    }
2056 
2057 	  if (flag_pic)
2058 	    {
2059 	      rtx_insn *insn;
2060 	      rtx temp;
2061 
2062 	      if (reload_in_progress || reload_completed)
2063 		{
2064 		  temp = scratch_reg ? scratch_reg : operand0;
2065 		  /* TEMP will hold an address and maybe the actual
2066 		     data.  We want it in WORD_MODE regardless of what mode it
2067 		     was originally given to us.  */
2068 		  temp = force_mode (word_mode, temp);
2069 		}
2070 	      else
2071 		temp = gen_reg_rtx (Pmode);
2072 
2073 	      /* Force (const (plus (symbol) (const_int))) to memory
2074 	         if the const_int will not fit in 14 bits.  Although
2075 		 this requires a relocation, the instruction sequence
2076 		 needed to load the value is shorter.  */
2077 	      if (GET_CODE (operand1) == CONST
2078 		       && GET_CODE (XEXP (operand1, 0)) == PLUS
2079 		       && GET_CODE (XEXP (XEXP (operand1, 0), 1)) == CONST_INT
2080 		       && !INT_14_BITS (XEXP (XEXP (operand1, 0), 1)))
2081 		{
2082 		  rtx x, m = force_const_mem (mode, operand1);
2083 
2084 		  x = legitimize_pic_address (XEXP (m, 0), mode, temp);
2085 		  x = replace_equiv_address (m, x);
2086 		  insn = emit_move_insn (operand0, x);
2087 		}
2088 	      else
2089 		{
2090 		  operands[1] = legitimize_pic_address (operand1, mode, temp);
2091 		  if (REG_P (operand0) && REG_P (operands[1]))
2092 		    copy_reg_pointer (operand0, operands[1]);
2093 		  insn = emit_move_insn (operand0, operands[1]);
2094 		}
2095 
2096 	      /* Put a REG_EQUAL note on this insn.  */
2097 	      set_unique_reg_note (insn, REG_EQUAL, operand1);
2098 	    }
2099 	  /* On the HPPA, references to data space are supposed to use dp,
2100 	     register 27, but showing it in the RTL inhibits various cse
2101 	     and loop optimizations.  */
2102 	  else
2103 	    {
2104 	      rtx temp, set;
2105 
2106 	      if (reload_in_progress || reload_completed)
2107 		{
2108 		  temp = scratch_reg ? scratch_reg : operand0;
2109 		  /* TEMP will hold an address and maybe the actual
2110 		     data.  We want it in WORD_MODE regardless of what mode it
2111 		     was originally given to us.  */
2112 		  temp = force_mode (word_mode, temp);
2113 		}
2114 	      else
2115 		temp = gen_reg_rtx (mode);
2116 
2117 	      /* Loading a SYMBOL_REF into a register makes that register
2118 		 safe to be used as the base in an indexed address.
2119 
2120 		 Don't mark hard registers though.  That loses.  */
2121 	      if (GET_CODE (operand0) == REG
2122 		  && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
2123 		mark_reg_pointer (operand0, BITS_PER_UNIT);
2124 	      if (REGNO (temp) >= FIRST_PSEUDO_REGISTER)
2125 		mark_reg_pointer (temp, BITS_PER_UNIT);
2126 
2127 	      if (ishighonly)
2128 		set = gen_rtx_SET (operand0, temp);
2129 	      else
2130 		set = gen_rtx_SET (operand0,
2131 				   gen_rtx_LO_SUM (mode, temp, operand1));
2132 
2133 	      emit_insn (gen_rtx_SET (temp, gen_rtx_HIGH (mode, operand1)));
2134 	      emit_insn (set);
2135 
2136 	    }
2137 	  return 1;
2138 	}
2139       else if (tls_referenced_p (operand1))
2140 	{
2141 	  rtx tmp = operand1;
2142 	  rtx addend = NULL;
2143 
2144 	  if (GET_CODE (tmp) == CONST && GET_CODE (XEXP (tmp, 0)) == PLUS)
2145 	    {
2146 	      addend = XEXP (XEXP (tmp, 0), 1);
2147 	      tmp = XEXP (XEXP (tmp, 0), 0);
2148 	    }
2149 
2150 	  gcc_assert (GET_CODE (tmp) == SYMBOL_REF);
2151 	  tmp = legitimize_tls_address (tmp);
2152 	  if (addend)
2153 	    {
2154 	      tmp = gen_rtx_PLUS (mode, tmp, addend);
2155 	      tmp = force_operand (tmp, operands[0]);
2156 	    }
2157 	  operands[1] = tmp;
2158 	}
2159       else if (GET_CODE (operand1) != CONST_INT
2160 	       || !pa_cint_ok_for_move (UINTVAL (operand1)))
2161 	{
2162 	  rtx temp;
2163 	  rtx_insn *insn;
2164 	  rtx op1 = operand1;
2165 	  HOST_WIDE_INT value = 0;
2166 	  HOST_WIDE_INT insv = 0;
2167 	  int insert = 0;
2168 
2169 	  if (GET_CODE (operand1) == CONST_INT)
2170 	    value = INTVAL (operand1);
2171 
2172 	  if (TARGET_64BIT
2173 	      && GET_CODE (operand1) == CONST_INT
2174 	      && HOST_BITS_PER_WIDE_INT > 32
2175 	      && GET_MODE_BITSIZE (GET_MODE (operand0)) > 32)
2176 	    {
2177 	      HOST_WIDE_INT nval;
2178 
2179 	      /* Extract the low order 32 bits of the value and sign extend.
2180 		 If the new value is the same as the original value, we can
2181 		 can use the original value as-is.  If the new value is
2182 		 different, we use it and insert the most-significant 32-bits
2183 		 of the original value into the final result.  */
2184 	      nval = ((value & (((HOST_WIDE_INT) 2 << 31) - 1))
2185 		      ^ ((HOST_WIDE_INT) 1 << 31)) - ((HOST_WIDE_INT) 1 << 31);
2186 	      if (value != nval)
2187 		{
2188 #if HOST_BITS_PER_WIDE_INT > 32
2189 		  insv = value >= 0 ? value >> 32 : ~(~value >> 32);
2190 #endif
2191 		  insert = 1;
2192 		  value = nval;
2193 		  operand1 = GEN_INT (nval);
2194 		}
2195 	    }
2196 
2197 	  if (reload_in_progress || reload_completed)
2198 	    temp = scratch_reg ? scratch_reg : operand0;
2199 	  else
2200 	    temp = gen_reg_rtx (mode);
2201 
2202 	  /* We don't directly split DImode constants on 32-bit targets
2203 	     because PLUS uses an 11-bit immediate and the insn sequence
2204 	     generated is not as efficient as the one using HIGH/LO_SUM.  */
2205 	  if (GET_CODE (operand1) == CONST_INT
2206 	      && GET_MODE_BITSIZE (mode) <= BITS_PER_WORD
2207 	      && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
2208 	      && !insert)
2209 	    {
2210 	      /* Directly break constant into high and low parts.  This
2211 		 provides better optimization opportunities because various
2212 		 passes recognize constants split with PLUS but not LO_SUM.
2213 		 We use a 14-bit signed low part except when the addition
2214 		 of 0x4000 to the high part might change the sign of the
2215 		 high part.  */
2216 	      HOST_WIDE_INT low = value & 0x3fff;
2217 	      HOST_WIDE_INT high = value & ~ 0x3fff;
2218 
2219 	      if (low >= 0x2000)
2220 		{
2221 		  if (high == 0x7fffc000 || (mode == HImode && high == 0x4000))
2222 		    high += 0x2000;
2223 		  else
2224 		    high += 0x4000;
2225 		}
2226 
2227 	      low = value - high;
2228 
2229 	      emit_insn (gen_rtx_SET (temp, GEN_INT (high)));
2230 	      operands[1] = gen_rtx_PLUS (mode, temp, GEN_INT (low));
2231 	    }
2232 	  else
2233 	    {
2234 	      emit_insn (gen_rtx_SET (temp, gen_rtx_HIGH (mode, operand1)));
2235 	      operands[1] = gen_rtx_LO_SUM (mode, temp, operand1);
2236 	    }
2237 
2238 	  insn = emit_move_insn (operands[0], operands[1]);
2239 
2240 	  /* Now insert the most significant 32 bits of the value
2241 	     into the register.  When we don't have a second register
2242 	     available, it could take up to nine instructions to load
2243 	     a 64-bit integer constant.  Prior to reload, we force
2244 	     constants that would take more than three instructions
2245 	     to load to the constant pool.  During and after reload,
2246 	     we have to handle all possible values.  */
2247 	  if (insert)
2248 	    {
2249 	      /* Use a HIGH/LO_SUM/INSV sequence if we have a second
2250 		 register and the value to be inserted is outside the
2251 		 range that can be loaded with three depdi instructions.  */
2252 	      if (temp != operand0 && (insv >= 16384 || insv < -16384))
2253 		{
2254 		  operand1 = GEN_INT (insv);
2255 
2256 		  emit_insn (gen_rtx_SET (temp,
2257 					  gen_rtx_HIGH (mode, operand1)));
2258 		  emit_move_insn (temp, gen_rtx_LO_SUM (mode, temp, operand1));
2259 		  if (mode == DImode)
2260 		    insn = emit_insn (gen_insvdi (operand0, GEN_INT (32),
2261 						  const0_rtx, temp));
2262 		  else
2263 		    insn = emit_insn (gen_insvsi (operand0, GEN_INT (32),
2264 						  const0_rtx, temp));
2265 		}
2266 	      else
2267 		{
2268 		  int len = 5, pos = 27;
2269 
2270 		  /* Insert the bits using the depdi instruction.  */
2271 		  while (pos >= 0)
2272 		    {
2273 		      HOST_WIDE_INT v5 = ((insv & 31) ^ 16) - 16;
2274 		      HOST_WIDE_INT sign = v5 < 0;
2275 
2276 		      /* Left extend the insertion.  */
2277 		      insv = (insv >= 0 ? insv >> len : ~(~insv >> len));
2278 		      while (pos > 0 && (insv & 1) == sign)
2279 			{
2280 			  insv = (insv >= 0 ? insv >> 1 : ~(~insv >> 1));
2281 			  len += 1;
2282 			  pos -= 1;
2283 			}
2284 
2285 		      if (mode == DImode)
2286 			insn = emit_insn (gen_insvdi (operand0,
2287 						      GEN_INT (len),
2288 						      GEN_INT (pos),
2289 						      GEN_INT (v5)));
2290 		      else
2291 			insn = emit_insn (gen_insvsi (operand0,
2292 						      GEN_INT (len),
2293 						      GEN_INT (pos),
2294 						      GEN_INT (v5)));
2295 
2296 		      len = pos > 0 && pos < 5 ? pos : 5;
2297 		      pos -= len;
2298 		    }
2299 		}
2300 	    }
2301 
2302 	  set_unique_reg_note (insn, REG_EQUAL, op1);
2303 
2304 	  return 1;
2305 	}
2306     }
2307   /* Now have insn-emit do whatever it normally does.  */
2308   return 0;
2309 }
2310 
2311 /* Examine EXP and return nonzero if it contains an ADDR_EXPR (meaning
2312    it will need a link/runtime reloc).  */
2313 
2314 int
pa_reloc_needed(tree exp)2315 pa_reloc_needed (tree exp)
2316 {
2317   int reloc = 0;
2318 
2319   switch (TREE_CODE (exp))
2320     {
2321     case ADDR_EXPR:
2322       return 1;
2323 
2324     case POINTER_PLUS_EXPR:
2325     case PLUS_EXPR:
2326     case MINUS_EXPR:
2327       reloc = pa_reloc_needed (TREE_OPERAND (exp, 0));
2328       reloc |= pa_reloc_needed (TREE_OPERAND (exp, 1));
2329       break;
2330 
2331     CASE_CONVERT:
2332     case NON_LVALUE_EXPR:
2333       reloc = pa_reloc_needed (TREE_OPERAND (exp, 0));
2334       break;
2335 
2336     case CONSTRUCTOR:
2337       {
2338 	tree value;
2339 	unsigned HOST_WIDE_INT ix;
2340 
2341 	FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (exp), ix, value)
2342 	  if (value)
2343 	    reloc |= pa_reloc_needed (value);
2344       }
2345       break;
2346 
2347     case ERROR_MARK:
2348       break;
2349 
2350     default:
2351       break;
2352     }
2353   return reloc;
2354 }
2355 
2356 
2357 /* Return the best assembler insn template
2358    for moving operands[1] into operands[0] as a fullword.  */
2359 const char *
pa_singlemove_string(rtx * operands)2360 pa_singlemove_string (rtx *operands)
2361 {
2362   HOST_WIDE_INT intval;
2363 
2364   if (GET_CODE (operands[0]) == MEM)
2365     return "stw %r1,%0";
2366   if (GET_CODE (operands[1]) == MEM)
2367     return "ldw %1,%0";
2368   if (GET_CODE (operands[1]) == CONST_DOUBLE)
2369     {
2370       long i;
2371 
2372       gcc_assert (GET_MODE (operands[1]) == SFmode);
2373 
2374       /* Translate the CONST_DOUBLE to a CONST_INT with the same target
2375 	 bit pattern.  */
2376       REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (operands[1]), i);
2377 
2378       operands[1] = GEN_INT (i);
2379       /* Fall through to CONST_INT case.  */
2380     }
2381   if (GET_CODE (operands[1]) == CONST_INT)
2382     {
2383       intval = INTVAL (operands[1]);
2384 
2385       if (VAL_14_BITS_P (intval))
2386 	return "ldi %1,%0";
2387       else if ((intval & 0x7ff) == 0)
2388 	return "ldil L'%1,%0";
2389       else if (pa_zdepi_cint_p (intval))
2390 	return "{zdepi %Z1,%0|depwi,z %Z1,%0}";
2391       else
2392 	return "ldil L'%1,%0\n\tldo R'%1(%0),%0";
2393     }
2394   return "copy %1,%0";
2395 }
2396 
2397 
2398 /* Compute position (in OP[1]) and width (in OP[2])
2399    useful for copying IMM to a register using the zdepi
2400    instructions.  Store the immediate value to insert in OP[0].  */
2401 static void
compute_zdepwi_operands(unsigned HOST_WIDE_INT imm,unsigned * op)2402 compute_zdepwi_operands (unsigned HOST_WIDE_INT imm, unsigned *op)
2403 {
2404   int lsb, len;
2405 
2406   /* Find the least significant set bit in IMM.  */
2407   for (lsb = 0; lsb < 32; lsb++)
2408     {
2409       if ((imm & 1) != 0)
2410         break;
2411       imm >>= 1;
2412     }
2413 
2414   /* Choose variants based on *sign* of the 5-bit field.  */
2415   if ((imm & 0x10) == 0)
2416     len = (lsb <= 28) ? 4 : 32 - lsb;
2417   else
2418     {
2419       /* Find the width of the bitstring in IMM.  */
2420       for (len = 5; len < 32 - lsb; len++)
2421 	{
2422 	  if ((imm & ((unsigned HOST_WIDE_INT) 1 << len)) == 0)
2423 	    break;
2424 	}
2425 
2426       /* Sign extend IMM as a 5-bit value.  */
2427       imm = (imm & 0xf) - 0x10;
2428     }
2429 
2430   op[0] = imm;
2431   op[1] = 31 - lsb;
2432   op[2] = len;
2433 }
2434 
2435 /* Compute position (in OP[1]) and width (in OP[2])
2436    useful for copying IMM to a register using the depdi,z
2437    instructions.  Store the immediate value to insert in OP[0].  */
2438 
2439 static void
compute_zdepdi_operands(unsigned HOST_WIDE_INT imm,unsigned * op)2440 compute_zdepdi_operands (unsigned HOST_WIDE_INT imm, unsigned *op)
2441 {
2442   int lsb, len, maxlen;
2443 
2444   maxlen = MIN (HOST_BITS_PER_WIDE_INT, 64);
2445 
2446   /* Find the least significant set bit in IMM.  */
2447   for (lsb = 0; lsb < maxlen; lsb++)
2448     {
2449       if ((imm & 1) != 0)
2450         break;
2451       imm >>= 1;
2452     }
2453 
2454   /* Choose variants based on *sign* of the 5-bit field.  */
2455   if ((imm & 0x10) == 0)
2456     len = (lsb <= maxlen - 4) ? 4 : maxlen - lsb;
2457   else
2458     {
2459       /* Find the width of the bitstring in IMM.  */
2460       for (len = 5; len < maxlen - lsb; len++)
2461 	{
2462 	  if ((imm & ((unsigned HOST_WIDE_INT) 1 << len)) == 0)
2463 	    break;
2464 	}
2465 
2466       /* Extend length if host is narrow and IMM is negative.  */
2467       if (HOST_BITS_PER_WIDE_INT == 32 && len == maxlen - lsb)
2468 	len += 32;
2469 
2470       /* Sign extend IMM as a 5-bit value.  */
2471       imm = (imm & 0xf) - 0x10;
2472     }
2473 
2474   op[0] = imm;
2475   op[1] = 63 - lsb;
2476   op[2] = len;
2477 }
2478 
2479 /* Output assembler code to perform a doubleword move insn
2480    with operands OPERANDS.  */
2481 
2482 const char *
pa_output_move_double(rtx * operands)2483 pa_output_move_double (rtx *operands)
2484 {
2485   enum { REGOP, OFFSOP, MEMOP, CNSTOP, RNDOP } optype0, optype1;
2486   rtx latehalf[2];
2487   rtx addreg0 = 0, addreg1 = 0;
2488   int highonly = 0;
2489 
2490   /* First classify both operands.  */
2491 
2492   if (REG_P (operands[0]))
2493     optype0 = REGOP;
2494   else if (offsettable_memref_p (operands[0]))
2495     optype0 = OFFSOP;
2496   else if (GET_CODE (operands[0]) == MEM)
2497     optype0 = MEMOP;
2498   else
2499     optype0 = RNDOP;
2500 
2501   if (REG_P (operands[1]))
2502     optype1 = REGOP;
2503   else if (CONSTANT_P (operands[1]))
2504     optype1 = CNSTOP;
2505   else if (offsettable_memref_p (operands[1]))
2506     optype1 = OFFSOP;
2507   else if (GET_CODE (operands[1]) == MEM)
2508     optype1 = MEMOP;
2509   else
2510     optype1 = RNDOP;
2511 
2512   /* Check for the cases that the operand constraints are not
2513      supposed to allow to happen.  */
2514   gcc_assert (optype0 == REGOP || optype1 == REGOP);
2515 
2516   /* Handle copies between general and floating registers.  */
2517 
2518   if (optype0 == REGOP && optype1 == REGOP
2519       && FP_REG_P (operands[0]) ^ FP_REG_P (operands[1]))
2520     {
2521       if (FP_REG_P (operands[0]))
2522 	{
2523 	  output_asm_insn ("{stws|stw} %1,-16(%%sp)", operands);
2524 	  output_asm_insn ("{stws|stw} %R1,-12(%%sp)", operands);
2525 	  return "{fldds|fldd} -16(%%sp),%0";
2526 	}
2527       else
2528 	{
2529 	  output_asm_insn ("{fstds|fstd} %1,-16(%%sp)", operands);
2530 	  output_asm_insn ("{ldws|ldw} -16(%%sp),%0", operands);
2531 	  return "{ldws|ldw} -12(%%sp),%R0";
2532 	}
2533     }
2534 
2535    /* Handle auto decrementing and incrementing loads and stores
2536      specifically, since the structure of the function doesn't work
2537      for them without major modification.  Do it better when we learn
2538      this port about the general inc/dec addressing of PA.
2539      (This was written by tege.  Chide him if it doesn't work.)  */
2540 
2541   if (optype0 == MEMOP)
2542     {
2543       /* We have to output the address syntax ourselves, since print_operand
2544 	 doesn't deal with the addresses we want to use.  Fix this later.  */
2545 
2546       rtx addr = XEXP (operands[0], 0);
2547       if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
2548 	{
2549 	  rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
2550 
2551 	  operands[0] = XEXP (addr, 0);
2552 	  gcc_assert (GET_CODE (operands[1]) == REG
2553 		      && GET_CODE (operands[0]) == REG);
2554 
2555 	  gcc_assert (!reg_overlap_mentioned_p (high_reg, addr));
2556 
2557 	  /* No overlap between high target register and address
2558 	     register.  (We do this in a non-obvious way to
2559 	     save a register file writeback)  */
2560 	  if (GET_CODE (addr) == POST_INC)
2561 	    return "{stws|stw},ma %1,8(%0)\n\tstw %R1,-4(%0)";
2562 	  return "{stws|stw},ma %1,-8(%0)\n\tstw %R1,12(%0)";
2563 	}
2564       else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
2565 	{
2566 	  rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
2567 
2568 	  operands[0] = XEXP (addr, 0);
2569 	  gcc_assert (GET_CODE (operands[1]) == REG
2570 		      && GET_CODE (operands[0]) == REG);
2571 
2572 	  gcc_assert (!reg_overlap_mentioned_p (high_reg, addr));
2573 	  /* No overlap between high target register and address
2574 	     register.  (We do this in a non-obvious way to save a
2575 	     register file writeback)  */
2576 	  if (GET_CODE (addr) == PRE_INC)
2577 	    return "{stws|stw},mb %1,8(%0)\n\tstw %R1,4(%0)";
2578 	  return "{stws|stw},mb %1,-8(%0)\n\tstw %R1,4(%0)";
2579 	}
2580     }
2581   if (optype1 == MEMOP)
2582     {
2583       /* We have to output the address syntax ourselves, since print_operand
2584 	 doesn't deal with the addresses we want to use.  Fix this later.  */
2585 
2586       rtx addr = XEXP (operands[1], 0);
2587       if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
2588 	{
2589 	  rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2590 
2591 	  operands[1] = XEXP (addr, 0);
2592 	  gcc_assert (GET_CODE (operands[0]) == REG
2593 		      && GET_CODE (operands[1]) == REG);
2594 
2595 	  if (!reg_overlap_mentioned_p (high_reg, addr))
2596 	    {
2597 	      /* No overlap between high target register and address
2598 		 register.  (We do this in a non-obvious way to
2599 		 save a register file writeback)  */
2600 	      if (GET_CODE (addr) == POST_INC)
2601 		return "{ldws|ldw},ma 8(%1),%0\n\tldw -4(%1),%R0";
2602 	      return "{ldws|ldw},ma -8(%1),%0\n\tldw 12(%1),%R0";
2603 	    }
2604 	  else
2605 	    {
2606 	      /* This is an undefined situation.  We should load into the
2607 		 address register *and* update that register.  Probably
2608 		 we don't need to handle this at all.  */
2609 	      if (GET_CODE (addr) == POST_INC)
2610 		return "ldw 4(%1),%R0\n\t{ldws|ldw},ma 8(%1),%0";
2611 	      return "ldw 4(%1),%R0\n\t{ldws|ldw},ma -8(%1),%0";
2612 	    }
2613 	}
2614       else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
2615 	{
2616 	  rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2617 
2618 	  operands[1] = XEXP (addr, 0);
2619 	  gcc_assert (GET_CODE (operands[0]) == REG
2620 		      && GET_CODE (operands[1]) == REG);
2621 
2622 	  if (!reg_overlap_mentioned_p (high_reg, addr))
2623 	    {
2624 	      /* No overlap between high target register and address
2625 		 register.  (We do this in a non-obvious way to
2626 		 save a register file writeback)  */
2627 	      if (GET_CODE (addr) == PRE_INC)
2628 		return "{ldws|ldw},mb 8(%1),%0\n\tldw 4(%1),%R0";
2629 	      return "{ldws|ldw},mb -8(%1),%0\n\tldw 4(%1),%R0";
2630 	    }
2631 	  else
2632 	    {
2633 	      /* This is an undefined situation.  We should load into the
2634 		 address register *and* update that register.  Probably
2635 		 we don't need to handle this at all.  */
2636 	      if (GET_CODE (addr) == PRE_INC)
2637 		return "ldw 12(%1),%R0\n\t{ldws|ldw},mb 8(%1),%0";
2638 	      return "ldw -4(%1),%R0\n\t{ldws|ldw},mb -8(%1),%0";
2639 	    }
2640 	}
2641       else if (GET_CODE (addr) == PLUS
2642 	       && GET_CODE (XEXP (addr, 0)) == MULT)
2643 	{
2644 	  rtx xoperands[4];
2645 
2646 	  /* Load address into left half of destination register.  */
2647 	  xoperands[0] = gen_rtx_SUBREG (SImode, operands[0], 0);
2648 	  xoperands[1] = XEXP (addr, 1);
2649 	  xoperands[2] = XEXP (XEXP (addr, 0), 0);
2650 	  xoperands[3] = XEXP (XEXP (addr, 0), 1);
2651 	  output_asm_insn ("{sh%O3addl %2,%1,%0|shladd,l %2,%O3,%1,%0}",
2652 			   xoperands);
2653 	  return "ldw 4(%0),%R0\n\tldw 0(%0),%0";
2654 	}
2655       else if (GET_CODE (addr) == PLUS
2656 	       && REG_P (XEXP (addr, 0))
2657 	       && REG_P (XEXP (addr, 1)))
2658 	{
2659 	  rtx xoperands[3];
2660 
2661 	  /* Load address into left half of destination register.  */
2662 	  xoperands[0] = gen_rtx_SUBREG (SImode, operands[0], 0);
2663 	  xoperands[1] = XEXP (addr, 0);
2664 	  xoperands[2] = XEXP (addr, 1);
2665 	  output_asm_insn ("{addl|add,l} %1,%2,%0",
2666 			   xoperands);
2667 	  return "ldw 4(%0),%R0\n\tldw 0(%0),%0";
2668 	}
2669     }
2670 
2671   /* If an operand is an unoffsettable memory ref, find a register
2672      we can increment temporarily to make it refer to the second word.  */
2673 
2674   if (optype0 == MEMOP)
2675     addreg0 = find_addr_reg (XEXP (operands[0], 0));
2676 
2677   if (optype1 == MEMOP)
2678     addreg1 = find_addr_reg (XEXP (operands[1], 0));
2679 
2680   /* Ok, we can do one word at a time.
2681      Normally we do the low-numbered word first.
2682 
2683      In either case, set up in LATEHALF the operands to use
2684      for the high-numbered word and in some cases alter the
2685      operands in OPERANDS to be suitable for the low-numbered word.  */
2686 
2687   if (optype0 == REGOP)
2688     latehalf[0] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
2689   else if (optype0 == OFFSOP)
2690     latehalf[0] = adjust_address_nv (operands[0], SImode, 4);
2691   else
2692     latehalf[0] = operands[0];
2693 
2694   if (optype1 == REGOP)
2695     latehalf[1] = gen_rtx_REG (SImode, REGNO (operands[1]) + 1);
2696   else if (optype1 == OFFSOP)
2697     latehalf[1] = adjust_address_nv (operands[1], SImode, 4);
2698   else if (optype1 == CNSTOP)
2699     {
2700       if (GET_CODE (operands[1]) == HIGH)
2701 	{
2702 	  operands[1] = XEXP (operands[1], 0);
2703 	  highonly = 1;
2704 	}
2705       split_double (operands[1], &operands[1], &latehalf[1]);
2706     }
2707   else
2708     latehalf[1] = operands[1];
2709 
2710   /* If the first move would clobber the source of the second one,
2711      do them in the other order.
2712 
2713      This can happen in two cases:
2714 
2715 	mem -> register where the first half of the destination register
2716  	is the same register used in the memory's address.  Reload
2717 	can create such insns.
2718 
2719 	mem in this case will be either register indirect or register
2720 	indirect plus a valid offset.
2721 
2722 	register -> register move where REGNO(dst) == REGNO(src + 1)
2723 	someone (Tim/Tege?) claimed this can happen for parameter loads.
2724 
2725      Handle mem -> register case first.  */
2726   if (optype0 == REGOP
2727       && (optype1 == MEMOP || optype1 == OFFSOP)
2728       && refers_to_regno_p (REGNO (operands[0]), operands[1]))
2729     {
2730       /* Do the late half first.  */
2731       if (addreg1)
2732 	output_asm_insn ("ldo 4(%0),%0", &addreg1);
2733       output_asm_insn (pa_singlemove_string (latehalf), latehalf);
2734 
2735       /* Then clobber.  */
2736       if (addreg1)
2737 	output_asm_insn ("ldo -4(%0),%0", &addreg1);
2738       return pa_singlemove_string (operands);
2739     }
2740 
2741   /* Now handle register -> register case.  */
2742   if (optype0 == REGOP && optype1 == REGOP
2743       && REGNO (operands[0]) == REGNO (operands[1]) + 1)
2744     {
2745       output_asm_insn (pa_singlemove_string (latehalf), latehalf);
2746       return pa_singlemove_string (operands);
2747     }
2748 
2749   /* Normal case: do the two words, low-numbered first.  */
2750 
2751   output_asm_insn (pa_singlemove_string (operands), operands);
2752 
2753   /* Make any unoffsettable addresses point at high-numbered word.  */
2754   if (addreg0)
2755     output_asm_insn ("ldo 4(%0),%0", &addreg0);
2756   if (addreg1)
2757     output_asm_insn ("ldo 4(%0),%0", &addreg1);
2758 
2759   /* Do high-numbered word.  */
2760   if (highonly)
2761     output_asm_insn ("ldil L'%1,%0", latehalf);
2762   else
2763     output_asm_insn (pa_singlemove_string (latehalf), latehalf);
2764 
2765   /* Undo the adds we just did.  */
2766   if (addreg0)
2767     output_asm_insn ("ldo -4(%0),%0", &addreg0);
2768   if (addreg1)
2769     output_asm_insn ("ldo -4(%0),%0", &addreg1);
2770 
2771   return "";
2772 }
2773 
2774 const char *
pa_output_fp_move_double(rtx * operands)2775 pa_output_fp_move_double (rtx *operands)
2776 {
2777   if (FP_REG_P (operands[0]))
2778     {
2779       if (FP_REG_P (operands[1])
2780 	  || operands[1] == CONST0_RTX (GET_MODE (operands[0])))
2781 	output_asm_insn ("fcpy,dbl %f1,%0", operands);
2782       else
2783 	output_asm_insn ("fldd%F1 %1,%0", operands);
2784     }
2785   else if (FP_REG_P (operands[1]))
2786     {
2787       output_asm_insn ("fstd%F0 %1,%0", operands);
2788     }
2789   else
2790     {
2791       rtx xoperands[2];
2792 
2793       gcc_assert (operands[1] == CONST0_RTX (GET_MODE (operands[0])));
2794 
2795       /* This is a pain.  You have to be prepared to deal with an
2796 	 arbitrary address here including pre/post increment/decrement.
2797 
2798 	 so avoid this in the MD.  */
2799       gcc_assert (GET_CODE (operands[0]) == REG);
2800 
2801       xoperands[1] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
2802       xoperands[0] = operands[0];
2803       output_asm_insn ("copy %%r0,%0\n\tcopy %%r0,%1", xoperands);
2804     }
2805   return "";
2806 }
2807 
2808 /* Return a REG that occurs in ADDR with coefficient 1.
2809    ADDR can be effectively incremented by incrementing REG.  */
2810 
2811 static rtx
find_addr_reg(rtx addr)2812 find_addr_reg (rtx addr)
2813 {
2814   while (GET_CODE (addr) == PLUS)
2815     {
2816       if (GET_CODE (XEXP (addr, 0)) == REG)
2817 	addr = XEXP (addr, 0);
2818       else if (GET_CODE (XEXP (addr, 1)) == REG)
2819 	addr = XEXP (addr, 1);
2820       else if (CONSTANT_P (XEXP (addr, 0)))
2821 	addr = XEXP (addr, 1);
2822       else if (CONSTANT_P (XEXP (addr, 1)))
2823 	addr = XEXP (addr, 0);
2824       else
2825 	gcc_unreachable ();
2826     }
2827   gcc_assert (GET_CODE (addr) == REG);
2828   return addr;
2829 }
2830 
2831 /* Emit code to perform a block move.
2832 
2833    OPERANDS[0] is the destination pointer as a REG, clobbered.
2834    OPERANDS[1] is the source pointer as a REG, clobbered.
2835    OPERANDS[2] is a register for temporary storage.
2836    OPERANDS[3] is a register for temporary storage.
2837    OPERANDS[4] is the size as a CONST_INT
2838    OPERANDS[5] is the alignment safe to use, as a CONST_INT.
2839    OPERANDS[6] is another temporary register.  */
2840 
2841 const char *
pa_output_block_move(rtx * operands,int size_is_constant ATTRIBUTE_UNUSED)2842 pa_output_block_move (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED)
2843 {
2844   int align = INTVAL (operands[5]);
2845   unsigned long n_bytes = INTVAL (operands[4]);
2846 
2847   /* We can't move more than a word at a time because the PA
2848      has no longer integer move insns.  (Could use fp mem ops?)  */
2849   if (align > (TARGET_64BIT ? 8 : 4))
2850     align = (TARGET_64BIT ? 8 : 4);
2851 
2852   /* Note that we know each loop below will execute at least twice
2853      (else we would have open-coded the copy).  */
2854   switch (align)
2855     {
2856       case 8:
2857 	/* Pre-adjust the loop counter.  */
2858 	operands[4] = GEN_INT (n_bytes - 16);
2859 	output_asm_insn ("ldi %4,%2", operands);
2860 
2861 	/* Copying loop.  */
2862 	output_asm_insn ("ldd,ma 8(%1),%3", operands);
2863 	output_asm_insn ("ldd,ma 8(%1),%6", operands);
2864 	output_asm_insn ("std,ma %3,8(%0)", operands);
2865 	output_asm_insn ("addib,>= -16,%2,.-12", operands);
2866 	output_asm_insn ("std,ma %6,8(%0)", operands);
2867 
2868 	/* Handle the residual.  There could be up to 7 bytes of
2869 	   residual to copy!  */
2870 	if (n_bytes % 16 != 0)
2871 	  {
2872 	    operands[4] = GEN_INT (n_bytes % 8);
2873 	    if (n_bytes % 16 >= 8)
2874 	      output_asm_insn ("ldd,ma 8(%1),%3", operands);
2875 	    if (n_bytes % 8 != 0)
2876 	      output_asm_insn ("ldd 0(%1),%6", operands);
2877 	    if (n_bytes % 16 >= 8)
2878 	      output_asm_insn ("std,ma %3,8(%0)", operands);
2879 	    if (n_bytes % 8 != 0)
2880 	      output_asm_insn ("stdby,e %6,%4(%0)", operands);
2881 	  }
2882 	return "";
2883 
2884       case 4:
2885 	/* Pre-adjust the loop counter.  */
2886 	operands[4] = GEN_INT (n_bytes - 8);
2887 	output_asm_insn ("ldi %4,%2", operands);
2888 
2889 	/* Copying loop.  */
2890 	output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands);
2891 	output_asm_insn ("{ldws|ldw},ma 4(%1),%6", operands);
2892 	output_asm_insn ("{stws|stw},ma %3,4(%0)", operands);
2893 	output_asm_insn ("addib,>= -8,%2,.-12", operands);
2894 	output_asm_insn ("{stws|stw},ma %6,4(%0)", operands);
2895 
2896 	/* Handle the residual.  There could be up to 7 bytes of
2897 	   residual to copy!  */
2898 	if (n_bytes % 8 != 0)
2899 	  {
2900 	    operands[4] = GEN_INT (n_bytes % 4);
2901 	    if (n_bytes % 8 >= 4)
2902 	      output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands);
2903 	    if (n_bytes % 4 != 0)
2904 	      output_asm_insn ("ldw 0(%1),%6", operands);
2905 	    if (n_bytes % 8 >= 4)
2906 	      output_asm_insn ("{stws|stw},ma %3,4(%0)", operands);
2907 	    if (n_bytes % 4 != 0)
2908 	      output_asm_insn ("{stbys|stby},e %6,%4(%0)", operands);
2909 	  }
2910 	return "";
2911 
2912       case 2:
2913 	/* Pre-adjust the loop counter.  */
2914 	operands[4] = GEN_INT (n_bytes - 4);
2915 	output_asm_insn ("ldi %4,%2", operands);
2916 
2917 	/* Copying loop.  */
2918 	output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands);
2919 	output_asm_insn ("{ldhs|ldh},ma 2(%1),%6", operands);
2920 	output_asm_insn ("{sths|sth},ma %3,2(%0)", operands);
2921 	output_asm_insn ("addib,>= -4,%2,.-12", operands);
2922 	output_asm_insn ("{sths|sth},ma %6,2(%0)", operands);
2923 
2924 	/* Handle the residual.  */
2925 	if (n_bytes % 4 != 0)
2926 	  {
2927 	    if (n_bytes % 4 >= 2)
2928 	      output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands);
2929 	    if (n_bytes % 2 != 0)
2930 	      output_asm_insn ("ldb 0(%1),%6", operands);
2931 	    if (n_bytes % 4 >= 2)
2932 	      output_asm_insn ("{sths|sth},ma %3,2(%0)", operands);
2933 	    if (n_bytes % 2 != 0)
2934 	      output_asm_insn ("stb %6,0(%0)", operands);
2935 	  }
2936 	return "";
2937 
2938       case 1:
2939 	/* Pre-adjust the loop counter.  */
2940 	operands[4] = GEN_INT (n_bytes - 2);
2941 	output_asm_insn ("ldi %4,%2", operands);
2942 
2943 	/* Copying loop.  */
2944 	output_asm_insn ("{ldbs|ldb},ma 1(%1),%3", operands);
2945 	output_asm_insn ("{ldbs|ldb},ma 1(%1),%6", operands);
2946 	output_asm_insn ("{stbs|stb},ma %3,1(%0)", operands);
2947 	output_asm_insn ("addib,>= -2,%2,.-12", operands);
2948 	output_asm_insn ("{stbs|stb},ma %6,1(%0)", operands);
2949 
2950 	/* Handle the residual.  */
2951 	if (n_bytes % 2 != 0)
2952 	  {
2953 	    output_asm_insn ("ldb 0(%1),%3", operands);
2954 	    output_asm_insn ("stb %3,0(%0)", operands);
2955 	  }
2956 	return "";
2957 
2958       default:
2959 	gcc_unreachable ();
2960     }
2961 }
2962 
2963 /* Count the number of insns necessary to handle this block move.
2964 
2965    Basic structure is the same as emit_block_move, except that we
2966    count insns rather than emit them.  */
2967 
2968 static int
compute_movmem_length(rtx_insn * insn)2969 compute_movmem_length (rtx_insn *insn)
2970 {
2971   rtx pat = PATTERN (insn);
2972   unsigned int align = INTVAL (XEXP (XVECEXP (pat, 0, 7), 0));
2973   unsigned long n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 6), 0));
2974   unsigned int n_insns = 0;
2975 
2976   /* We can't move more than four bytes at a time because the PA
2977      has no longer integer move insns.  (Could use fp mem ops?)  */
2978   if (align > (TARGET_64BIT ? 8 : 4))
2979     align = (TARGET_64BIT ? 8 : 4);
2980 
2981   /* The basic copying loop.  */
2982   n_insns = 6;
2983 
2984   /* Residuals.  */
2985   if (n_bytes % (2 * align) != 0)
2986     {
2987       if ((n_bytes % (2 * align)) >= align)
2988 	n_insns += 2;
2989 
2990       if ((n_bytes % align) != 0)
2991 	n_insns += 2;
2992     }
2993 
2994   /* Lengths are expressed in bytes now; each insn is 4 bytes.  */
2995   return n_insns * 4;
2996 }
2997 
2998 /* Emit code to perform a block clear.
2999 
3000    OPERANDS[0] is the destination pointer as a REG, clobbered.
3001    OPERANDS[1] is a register for temporary storage.
3002    OPERANDS[2] is the size as a CONST_INT
3003    OPERANDS[3] is the alignment safe to use, as a CONST_INT.  */
3004 
3005 const char *
pa_output_block_clear(rtx * operands,int size_is_constant ATTRIBUTE_UNUSED)3006 pa_output_block_clear (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED)
3007 {
3008   int align = INTVAL (operands[3]);
3009   unsigned long n_bytes = INTVAL (operands[2]);
3010 
3011   /* We can't clear more than a word at a time because the PA
3012      has no longer integer move insns.  */
3013   if (align > (TARGET_64BIT ? 8 : 4))
3014     align = (TARGET_64BIT ? 8 : 4);
3015 
3016   /* Note that we know each loop below will execute at least twice
3017      (else we would have open-coded the copy).  */
3018   switch (align)
3019     {
3020       case 8:
3021 	/* Pre-adjust the loop counter.  */
3022 	operands[2] = GEN_INT (n_bytes - 16);
3023 	output_asm_insn ("ldi %2,%1", operands);
3024 
3025 	/* Loop.  */
3026 	output_asm_insn ("std,ma %%r0,8(%0)", operands);
3027 	output_asm_insn ("addib,>= -16,%1,.-4", operands);
3028 	output_asm_insn ("std,ma %%r0,8(%0)", operands);
3029 
3030 	/* Handle the residual.  There could be up to 7 bytes of
3031 	   residual to copy!  */
3032 	if (n_bytes % 16 != 0)
3033 	  {
3034 	    operands[2] = GEN_INT (n_bytes % 8);
3035 	    if (n_bytes % 16 >= 8)
3036 	      output_asm_insn ("std,ma %%r0,8(%0)", operands);
3037 	    if (n_bytes % 8 != 0)
3038 	      output_asm_insn ("stdby,e %%r0,%2(%0)", operands);
3039 	  }
3040 	return "";
3041 
3042       case 4:
3043 	/* Pre-adjust the loop counter.  */
3044 	operands[2] = GEN_INT (n_bytes - 8);
3045 	output_asm_insn ("ldi %2,%1", operands);
3046 
3047 	/* Loop.  */
3048 	output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
3049 	output_asm_insn ("addib,>= -8,%1,.-4", operands);
3050 	output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
3051 
3052 	/* Handle the residual.  There could be up to 7 bytes of
3053 	   residual to copy!  */
3054 	if (n_bytes % 8 != 0)
3055 	  {
3056 	    operands[2] = GEN_INT (n_bytes % 4);
3057 	    if (n_bytes % 8 >= 4)
3058 	      output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
3059 	    if (n_bytes % 4 != 0)
3060 	      output_asm_insn ("{stbys|stby},e %%r0,%2(%0)", operands);
3061 	  }
3062 	return "";
3063 
3064       case 2:
3065 	/* Pre-adjust the loop counter.  */
3066 	operands[2] = GEN_INT (n_bytes - 4);
3067 	output_asm_insn ("ldi %2,%1", operands);
3068 
3069 	/* Loop.  */
3070 	output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
3071 	output_asm_insn ("addib,>= -4,%1,.-4", operands);
3072 	output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
3073 
3074 	/* Handle the residual.  */
3075 	if (n_bytes % 4 != 0)
3076 	  {
3077 	    if (n_bytes % 4 >= 2)
3078 	      output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
3079 	    if (n_bytes % 2 != 0)
3080 	      output_asm_insn ("stb %%r0,0(%0)", operands);
3081 	  }
3082 	return "";
3083 
3084       case 1:
3085 	/* Pre-adjust the loop counter.  */
3086 	operands[2] = GEN_INT (n_bytes - 2);
3087 	output_asm_insn ("ldi %2,%1", operands);
3088 
3089 	/* Loop.  */
3090 	output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands);
3091 	output_asm_insn ("addib,>= -2,%1,.-4", operands);
3092 	output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands);
3093 
3094 	/* Handle the residual.  */
3095 	if (n_bytes % 2 != 0)
3096 	  output_asm_insn ("stb %%r0,0(%0)", operands);
3097 
3098 	return "";
3099 
3100       default:
3101 	gcc_unreachable ();
3102     }
3103 }
3104 
3105 /* Count the number of insns necessary to handle this block move.
3106 
3107    Basic structure is the same as emit_block_move, except that we
3108    count insns rather than emit them.  */
3109 
3110 static int
compute_clrmem_length(rtx_insn * insn)3111 compute_clrmem_length (rtx_insn *insn)
3112 {
3113   rtx pat = PATTERN (insn);
3114   unsigned int align = INTVAL (XEXP (XVECEXP (pat, 0, 4), 0));
3115   unsigned long n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 3), 0));
3116   unsigned int n_insns = 0;
3117 
3118   /* We can't clear more than a word at a time because the PA
3119      has no longer integer move insns.  */
3120   if (align > (TARGET_64BIT ? 8 : 4))
3121     align = (TARGET_64BIT ? 8 : 4);
3122 
3123   /* The basic loop.  */
3124   n_insns = 4;
3125 
3126   /* Residuals.  */
3127   if (n_bytes % (2 * align) != 0)
3128     {
3129       if ((n_bytes % (2 * align)) >= align)
3130 	n_insns++;
3131 
3132       if ((n_bytes % align) != 0)
3133 	n_insns++;
3134     }
3135 
3136   /* Lengths are expressed in bytes now; each insn is 4 bytes.  */
3137   return n_insns * 4;
3138 }
3139 
3140 
3141 const char *
pa_output_and(rtx * operands)3142 pa_output_and (rtx *operands)
3143 {
3144   if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
3145     {
3146       unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3147       int ls0, ls1, ms0, p, len;
3148 
3149       for (ls0 = 0; ls0 < 32; ls0++)
3150 	if ((mask & (1 << ls0)) == 0)
3151 	  break;
3152 
3153       for (ls1 = ls0; ls1 < 32; ls1++)
3154 	if ((mask & (1 << ls1)) != 0)
3155 	  break;
3156 
3157       for (ms0 = ls1; ms0 < 32; ms0++)
3158 	if ((mask & (1 << ms0)) == 0)
3159 	  break;
3160 
3161       gcc_assert (ms0 == 32);
3162 
3163       if (ls1 == 32)
3164 	{
3165 	  len = ls0;
3166 
3167 	  gcc_assert (len);
3168 
3169 	  operands[2] = GEN_INT (len);
3170 	  return "{extru|extrw,u} %1,31,%2,%0";
3171 	}
3172       else
3173 	{
3174 	  /* We could use this `depi' for the case above as well, but `depi'
3175 	     requires one more register file access than an `extru'.  */
3176 
3177 	  p = 31 - ls0;
3178 	  len = ls1 - ls0;
3179 
3180 	  operands[2] = GEN_INT (p);
3181 	  operands[3] = GEN_INT (len);
3182 	  return "{depi|depwi} 0,%2,%3,%0";
3183 	}
3184     }
3185   else
3186     return "and %1,%2,%0";
3187 }
3188 
3189 /* Return a string to perform a bitwise-and of operands[1] with operands[2]
3190    storing the result in operands[0].  */
3191 const char *
pa_output_64bit_and(rtx * operands)3192 pa_output_64bit_and (rtx *operands)
3193 {
3194   if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
3195     {
3196       unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3197       int ls0, ls1, ms0, p, len;
3198 
3199       for (ls0 = 0; ls0 < HOST_BITS_PER_WIDE_INT; ls0++)
3200 	if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls0)) == 0)
3201 	  break;
3202 
3203       for (ls1 = ls0; ls1 < HOST_BITS_PER_WIDE_INT; ls1++)
3204 	if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls1)) != 0)
3205 	  break;
3206 
3207       for (ms0 = ls1; ms0 < HOST_BITS_PER_WIDE_INT; ms0++)
3208 	if ((mask & ((unsigned HOST_WIDE_INT) 1 << ms0)) == 0)
3209 	  break;
3210 
3211       gcc_assert (ms0 == HOST_BITS_PER_WIDE_INT);
3212 
3213       if (ls1 == HOST_BITS_PER_WIDE_INT)
3214 	{
3215 	  len = ls0;
3216 
3217 	  gcc_assert (len);
3218 
3219 	  operands[2] = GEN_INT (len);
3220 	  return "extrd,u %1,63,%2,%0";
3221 	}
3222       else
3223 	{
3224 	  /* We could use this `depi' for the case above as well, but `depi'
3225 	     requires one more register file access than an `extru'.  */
3226 
3227 	  p = 63 - ls0;
3228 	  len = ls1 - ls0;
3229 
3230 	  operands[2] = GEN_INT (p);
3231 	  operands[3] = GEN_INT (len);
3232 	  return "depdi 0,%2,%3,%0";
3233 	}
3234     }
3235   else
3236     return "and %1,%2,%0";
3237 }
3238 
3239 const char *
pa_output_ior(rtx * operands)3240 pa_output_ior (rtx *operands)
3241 {
3242   unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3243   int bs0, bs1, p, len;
3244 
3245   if (INTVAL (operands[2]) == 0)
3246     return "copy %1,%0";
3247 
3248   for (bs0 = 0; bs0 < 32; bs0++)
3249     if ((mask & (1 << bs0)) != 0)
3250       break;
3251 
3252   for (bs1 = bs0; bs1 < 32; bs1++)
3253     if ((mask & (1 << bs1)) == 0)
3254       break;
3255 
3256   gcc_assert (bs1 == 32 || ((unsigned HOST_WIDE_INT) 1 << bs1) > mask);
3257 
3258   p = 31 - bs0;
3259   len = bs1 - bs0;
3260 
3261   operands[2] = GEN_INT (p);
3262   operands[3] = GEN_INT (len);
3263   return "{depi|depwi} -1,%2,%3,%0";
3264 }
3265 
3266 /* Return a string to perform a bitwise-and of operands[1] with operands[2]
3267    storing the result in operands[0].  */
3268 const char *
pa_output_64bit_ior(rtx * operands)3269 pa_output_64bit_ior (rtx *operands)
3270 {
3271   unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3272   int bs0, bs1, p, len;
3273 
3274   if (INTVAL (operands[2]) == 0)
3275     return "copy %1,%0";
3276 
3277   for (bs0 = 0; bs0 < HOST_BITS_PER_WIDE_INT; bs0++)
3278     if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs0)) != 0)
3279       break;
3280 
3281   for (bs1 = bs0; bs1 < HOST_BITS_PER_WIDE_INT; bs1++)
3282     if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs1)) == 0)
3283       break;
3284 
3285   gcc_assert (bs1 == HOST_BITS_PER_WIDE_INT
3286 	      || ((unsigned HOST_WIDE_INT) 1 << bs1) > mask);
3287 
3288   p = 63 - bs0;
3289   len = bs1 - bs0;
3290 
3291   operands[2] = GEN_INT (p);
3292   operands[3] = GEN_INT (len);
3293   return "depdi -1,%2,%3,%0";
3294 }
3295 
3296 /* Target hook for assembling integer objects.  This code handles
3297    aligned SI and DI integers specially since function references
3298    must be preceded by P%.  */
3299 
3300 static bool
pa_assemble_integer(rtx x,unsigned int size,int aligned_p)3301 pa_assemble_integer (rtx x, unsigned int size, int aligned_p)
3302 {
3303   bool result;
3304   tree decl = NULL;
3305 
3306   /* When we have a SYMBOL_REF with a SYMBOL_REF_DECL, we need to call
3307      call assemble_external and set the SYMBOL_REF_DECL to NULL before
3308      calling output_addr_const.  Otherwise, it may call assemble_external
3309      in the midst of outputing the assembler code for the SYMBOL_REF.
3310      We restore the SYMBOL_REF_DECL after the output is done.  */
3311   if (GET_CODE (x) == SYMBOL_REF)
3312     {
3313       decl = SYMBOL_REF_DECL (x);
3314       if (decl)
3315 	{
3316 	  assemble_external (decl);
3317 	  SET_SYMBOL_REF_DECL (x, NULL);
3318 	}
3319     }
3320 
3321   if (size == UNITS_PER_WORD
3322       && aligned_p
3323       && function_label_operand (x, VOIDmode))
3324     {
3325       fputs (size == 8? "\t.dword\t" : "\t.word\t", asm_out_file);
3326 
3327       /* We don't want an OPD when generating fast indirect calls.  */
3328       if (!TARGET_FAST_INDIRECT_CALLS)
3329 	fputs ("P%", asm_out_file);
3330 
3331       output_addr_const (asm_out_file, x);
3332       fputc ('\n', asm_out_file);
3333       result = true;
3334     }
3335   else
3336     result = default_assemble_integer (x, size, aligned_p);
3337 
3338   if (decl)
3339     SET_SYMBOL_REF_DECL (x, decl);
3340 
3341   return result;
3342 }
3343 
3344 /* Output an ascii string.  */
3345 void
pa_output_ascii(FILE * file,const char * p,int size)3346 pa_output_ascii (FILE *file, const char *p, int size)
3347 {
3348   int i;
3349   int chars_output;
3350   unsigned char partial_output[16];	/* Max space 4 chars can occupy.  */
3351 
3352   /* The HP assembler can only take strings of 256 characters at one
3353      time.  This is a limitation on input line length, *not* the
3354      length of the string.  Sigh.  Even worse, it seems that the
3355      restriction is in number of input characters (see \xnn &
3356      \whatever).  So we have to do this very carefully.  */
3357 
3358   fputs ("\t.STRING \"", file);
3359 
3360   chars_output = 0;
3361   for (i = 0; i < size; i += 4)
3362     {
3363       int co = 0;
3364       int io = 0;
3365       for (io = 0, co = 0; io < MIN (4, size - i); io++)
3366 	{
3367 	  register unsigned int c = (unsigned char) p[i + io];
3368 
3369 	  if (c == '\"' || c == '\\')
3370 	    partial_output[co++] = '\\';
3371 	  if (c >= ' ' && c < 0177)
3372 	    partial_output[co++] = c;
3373 	  else
3374 	    {
3375 	      unsigned int hexd;
3376 	      partial_output[co++] = '\\';
3377 	      partial_output[co++] = 'x';
3378 	      hexd =  c  / 16 - 0 + '0';
3379 	      if (hexd > '9')
3380 		hexd -= '9' - 'a' + 1;
3381 	      partial_output[co++] = hexd;
3382 	      hexd =  c % 16 - 0 + '0';
3383 	      if (hexd > '9')
3384 		hexd -= '9' - 'a' + 1;
3385 	      partial_output[co++] = hexd;
3386 	    }
3387 	}
3388       if (chars_output + co > 243)
3389 	{
3390 	  fputs ("\"\n\t.STRING \"", file);
3391 	  chars_output = 0;
3392 	}
3393       fwrite (partial_output, 1, (size_t) co, file);
3394       chars_output += co;
3395       co = 0;
3396     }
3397   fputs ("\"\n", file);
3398 }
3399 
3400 /* Try to rewrite floating point comparisons & branches to avoid
3401    useless add,tr insns.
3402 
3403    CHECK_NOTES is nonzero if we should examine REG_DEAD notes
3404    to see if FPCC is dead.  CHECK_NOTES is nonzero for the
3405    first attempt to remove useless add,tr insns.  It is zero
3406    for the second pass as reorg sometimes leaves bogus REG_DEAD
3407    notes lying around.
3408 
3409    When CHECK_NOTES is zero we can only eliminate add,tr insns
3410    when there's a 1:1 correspondence between fcmp and ftest/fbranch
3411    instructions.  */
3412 static void
remove_useless_addtr_insns(int check_notes)3413 remove_useless_addtr_insns (int check_notes)
3414 {
3415   rtx_insn *insn;
3416   static int pass = 0;
3417 
3418   /* This is fairly cheap, so always run it when optimizing.  */
3419   if (optimize > 0)
3420     {
3421       int fcmp_count = 0;
3422       int fbranch_count = 0;
3423 
3424       /* Walk all the insns in this function looking for fcmp & fbranch
3425 	 instructions.  Keep track of how many of each we find.  */
3426       for (insn = get_insns (); insn; insn = next_insn (insn))
3427 	{
3428 	  rtx tmp;
3429 
3430 	  /* Ignore anything that isn't an INSN or a JUMP_INSN.  */
3431 	  if (! NONJUMP_INSN_P (insn) && ! JUMP_P (insn))
3432 	    continue;
3433 
3434 	  tmp = PATTERN (insn);
3435 
3436 	  /* It must be a set.  */
3437 	  if (GET_CODE (tmp) != SET)
3438 	    continue;
3439 
3440 	  /* If the destination is CCFP, then we've found an fcmp insn.  */
3441 	  tmp = SET_DEST (tmp);
3442 	  if (GET_CODE (tmp) == REG && REGNO (tmp) == 0)
3443 	    {
3444 	      fcmp_count++;
3445 	      continue;
3446 	    }
3447 
3448 	  tmp = PATTERN (insn);
3449 	  /* If this is an fbranch instruction, bump the fbranch counter.  */
3450 	  if (GET_CODE (tmp) == SET
3451 	      && SET_DEST (tmp) == pc_rtx
3452 	      && GET_CODE (SET_SRC (tmp)) == IF_THEN_ELSE
3453 	      && GET_CODE (XEXP (SET_SRC (tmp), 0)) == NE
3454 	      && GET_CODE (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == REG
3455 	      && REGNO (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == 0)
3456 	    {
3457 	      fbranch_count++;
3458 	      continue;
3459 	    }
3460 	}
3461 
3462 
3463       /* Find all floating point compare + branch insns.  If possible,
3464 	 reverse the comparison & the branch to avoid add,tr insns.  */
3465       for (insn = get_insns (); insn; insn = next_insn (insn))
3466 	{
3467 	  rtx tmp;
3468 	  rtx_insn *next;
3469 
3470 	  /* Ignore anything that isn't an INSN.  */
3471 	  if (! NONJUMP_INSN_P (insn))
3472 	    continue;
3473 
3474 	  tmp = PATTERN (insn);
3475 
3476 	  /* It must be a set.  */
3477 	  if (GET_CODE (tmp) != SET)
3478 	    continue;
3479 
3480 	  /* The destination must be CCFP, which is register zero.  */
3481 	  tmp = SET_DEST (tmp);
3482 	  if (GET_CODE (tmp) != REG || REGNO (tmp) != 0)
3483 	    continue;
3484 
3485 	  /* INSN should be a set of CCFP.
3486 
3487 	     See if the result of this insn is used in a reversed FP
3488 	     conditional branch.  If so, reverse our condition and
3489 	     the branch.  Doing so avoids useless add,tr insns.  */
3490 	  next = next_insn (insn);
3491 	  while (next)
3492 	    {
3493 	      /* Jumps, calls and labels stop our search.  */
3494 	      if (JUMP_P (next) || CALL_P (next) || LABEL_P (next))
3495 		break;
3496 
3497 	      /* As does another fcmp insn.  */
3498 	      if (NONJUMP_INSN_P (next)
3499 		  && GET_CODE (PATTERN (next)) == SET
3500 		  && GET_CODE (SET_DEST (PATTERN (next))) == REG
3501 		  && REGNO (SET_DEST (PATTERN (next))) == 0)
3502 		break;
3503 
3504 	      next = next_insn (next);
3505 	    }
3506 
3507 	  /* Is NEXT_INSN a branch?  */
3508 	  if (next && JUMP_P (next))
3509 	    {
3510 	      rtx pattern = PATTERN (next);
3511 
3512 	      /* If it a reversed fp conditional branch (e.g. uses add,tr)
3513 		 and CCFP dies, then reverse our conditional and the branch
3514 		 to avoid the add,tr.  */
3515 	      if (GET_CODE (pattern) == SET
3516 		  && SET_DEST (pattern) == pc_rtx
3517 		  && GET_CODE (SET_SRC (pattern)) == IF_THEN_ELSE
3518 		  && GET_CODE (XEXP (SET_SRC (pattern), 0)) == NE
3519 		  && GET_CODE (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == REG
3520 		  && REGNO (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == 0
3521 		  && GET_CODE (XEXP (SET_SRC (pattern), 1)) == PC
3522 		  && (fcmp_count == fbranch_count
3523 		      || (check_notes
3524 			  && find_regno_note (next, REG_DEAD, 0))))
3525 		{
3526 		  /* Reverse the branch.  */
3527 		  tmp = XEXP (SET_SRC (pattern), 1);
3528 		  XEXP (SET_SRC (pattern), 1) = XEXP (SET_SRC (pattern), 2);
3529 		  XEXP (SET_SRC (pattern), 2) = tmp;
3530 		  INSN_CODE (next) = -1;
3531 
3532 		  /* Reverse our condition.  */
3533 		  tmp = PATTERN (insn);
3534 		  PUT_CODE (XEXP (tmp, 1),
3535 			    (reverse_condition_maybe_unordered
3536 			     (GET_CODE (XEXP (tmp, 1)))));
3537 		}
3538 	    }
3539 	}
3540     }
3541 
3542   pass = !pass;
3543 
3544 }
3545 
3546 /* You may have trouble believing this, but this is the 32 bit HP-PA
3547    stack layout.  Wow.
3548 
3549    Offset		Contents
3550 
3551    Variable arguments	(optional; any number may be allocated)
3552 
3553    SP-(4*(N+9))		arg word N
3554    	:		    :
3555       SP-56		arg word 5
3556       SP-52		arg word 4
3557 
3558    Fixed arguments	(must be allocated; may remain unused)
3559 
3560       SP-48		arg word 3
3561       SP-44		arg word 2
3562       SP-40		arg word 1
3563       SP-36		arg word 0
3564 
3565    Frame Marker
3566 
3567       SP-32		External Data Pointer (DP)
3568       SP-28		External sr4
3569       SP-24		External/stub RP (RP')
3570       SP-20		Current RP
3571       SP-16		Static Link
3572       SP-12		Clean up
3573       SP-8		Calling Stub RP (RP'')
3574       SP-4		Previous SP
3575 
3576    Top of Frame
3577 
3578       SP-0		Stack Pointer (points to next available address)
3579 
3580 */
3581 
3582 /* This function saves registers as follows.  Registers marked with ' are
3583    this function's registers (as opposed to the previous function's).
3584    If a frame_pointer isn't needed, r4 is saved as a general register;
3585    the space for the frame pointer is still allocated, though, to keep
3586    things simple.
3587 
3588 
3589    Top of Frame
3590 
3591        SP (FP')		Previous FP
3592        SP + 4		Alignment filler (sigh)
3593        SP + 8		Space for locals reserved here.
3594        .
3595        .
3596        .
3597        SP + n		All call saved register used.
3598        .
3599        .
3600        .
3601        SP + o		All call saved fp registers used.
3602        .
3603        .
3604        .
3605        SP + p (SP')	points to next available address.
3606 
3607 */
3608 
3609 /* Global variables set by output_function_prologue().  */
3610 /* Size of frame.  Need to know this to emit return insns from
3611    leaf procedures.  */
3612 static HOST_WIDE_INT actual_fsize, local_fsize;
3613 static int save_fregs;
3614 
3615 /* Emit RTL to store REG at the memory location specified by BASE+DISP.
3616    Handle case where DISP > 8k by using the add_high_const patterns.
3617 
3618    Note in DISP > 8k case, we will leave the high part of the address
3619    in %r1.  There is code in expand_hppa_{prologue,epilogue} that knows this.*/
3620 
3621 static void
store_reg(int reg,HOST_WIDE_INT disp,int base)3622 store_reg (int reg, HOST_WIDE_INT disp, int base)
3623 {
3624   rtx dest, src, basereg;
3625   rtx_insn *insn;
3626 
3627   src = gen_rtx_REG (word_mode, reg);
3628   basereg = gen_rtx_REG (Pmode, base);
3629   if (VAL_14_BITS_P (disp))
3630     {
3631       dest = gen_rtx_MEM (word_mode, plus_constant (Pmode, basereg, disp));
3632       insn = emit_move_insn (dest, src);
3633     }
3634   else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
3635     {
3636       rtx delta = GEN_INT (disp);
3637       rtx tmpreg = gen_rtx_REG (Pmode, 1);
3638 
3639       emit_move_insn (tmpreg, delta);
3640       insn = emit_move_insn (tmpreg, gen_rtx_PLUS (Pmode, tmpreg, basereg));
3641       if (DO_FRAME_NOTES)
3642 	{
3643 	  add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3644 			gen_rtx_SET (tmpreg,
3645 				     gen_rtx_PLUS (Pmode, basereg, delta)));
3646 	  RTX_FRAME_RELATED_P (insn) = 1;
3647 	}
3648       dest = gen_rtx_MEM (word_mode, tmpreg);
3649       insn = emit_move_insn (dest, src);
3650     }
3651   else
3652     {
3653       rtx delta = GEN_INT (disp);
3654       rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta));
3655       rtx tmpreg = gen_rtx_REG (Pmode, 1);
3656 
3657       emit_move_insn (tmpreg, high);
3658       dest = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta));
3659       insn = emit_move_insn (dest, src);
3660       if (DO_FRAME_NOTES)
3661 	add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3662 		      gen_rtx_SET (gen_rtx_MEM (word_mode,
3663 						gen_rtx_PLUS (word_mode,
3664 							      basereg,
3665 							      delta)),
3666 				   src));
3667     }
3668 
3669   if (DO_FRAME_NOTES)
3670     RTX_FRAME_RELATED_P (insn) = 1;
3671 }
3672 
3673 /* Emit RTL to store REG at the memory location specified by BASE and then
3674    add MOD to BASE.  MOD must be <= 8k.  */
3675 
3676 static void
store_reg_modify(int base,int reg,HOST_WIDE_INT mod)3677 store_reg_modify (int base, int reg, HOST_WIDE_INT mod)
3678 {
3679   rtx basereg, srcreg, delta;
3680   rtx_insn *insn;
3681 
3682   gcc_assert (VAL_14_BITS_P (mod));
3683 
3684   basereg = gen_rtx_REG (Pmode, base);
3685   srcreg = gen_rtx_REG (word_mode, reg);
3686   delta = GEN_INT (mod);
3687 
3688   insn = emit_insn (gen_post_store (basereg, srcreg, delta));
3689   if (DO_FRAME_NOTES)
3690     {
3691       RTX_FRAME_RELATED_P (insn) = 1;
3692 
3693       /* RTX_FRAME_RELATED_P must be set on each frame related set
3694 	 in a parallel with more than one element.  */
3695       RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 0)) = 1;
3696       RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
3697     }
3698 }
3699 
3700 /* Emit RTL to set REG to the value specified by BASE+DISP.  Handle case
3701    where DISP > 8k by using the add_high_const patterns.  NOTE indicates
3702    whether to add a frame note or not.
3703 
3704    In the DISP > 8k case, we leave the high part of the address in %r1.
3705    There is code in expand_hppa_{prologue,epilogue} that knows about this.  */
3706 
3707 static void
set_reg_plus_d(int reg,int base,HOST_WIDE_INT disp,int note)3708 set_reg_plus_d (int reg, int base, HOST_WIDE_INT disp, int note)
3709 {
3710   rtx_insn *insn;
3711 
3712   if (VAL_14_BITS_P (disp))
3713     {
3714       insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3715 			     plus_constant (Pmode,
3716 					    gen_rtx_REG (Pmode, base), disp));
3717     }
3718   else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
3719     {
3720       rtx basereg = gen_rtx_REG (Pmode, base);
3721       rtx delta = GEN_INT (disp);
3722       rtx tmpreg = gen_rtx_REG (Pmode, 1);
3723 
3724       emit_move_insn (tmpreg, delta);
3725       insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3726 			     gen_rtx_PLUS (Pmode, tmpreg, basereg));
3727       if (DO_FRAME_NOTES)
3728 	add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3729 		      gen_rtx_SET (tmpreg,
3730 				   gen_rtx_PLUS (Pmode, basereg, delta)));
3731     }
3732   else
3733     {
3734       rtx basereg = gen_rtx_REG (Pmode, base);
3735       rtx delta = GEN_INT (disp);
3736       rtx tmpreg = gen_rtx_REG (Pmode, 1);
3737 
3738       emit_move_insn (tmpreg,
3739 		      gen_rtx_PLUS (Pmode, basereg,
3740 				    gen_rtx_HIGH (Pmode, delta)));
3741       insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3742 			     gen_rtx_LO_SUM (Pmode, tmpreg, delta));
3743     }
3744 
3745   if (DO_FRAME_NOTES && note)
3746     RTX_FRAME_RELATED_P (insn) = 1;
3747 }
3748 
3749 HOST_WIDE_INT
pa_compute_frame_size(HOST_WIDE_INT size,int * fregs_live)3750 pa_compute_frame_size (HOST_WIDE_INT size, int *fregs_live)
3751 {
3752   int freg_saved = 0;
3753   int i, j;
3754 
3755   /* The code in pa_expand_prologue and pa_expand_epilogue must
3756      be consistent with the rounding and size calculation done here.
3757      Change them at the same time.  */
3758 
3759   /* We do our own stack alignment.  First, round the size of the
3760      stack locals up to a word boundary.  */
3761   size = (size + UNITS_PER_WORD - 1) & ~(UNITS_PER_WORD - 1);
3762 
3763   /* Space for previous frame pointer + filler.  If any frame is
3764      allocated, we need to add in the STARTING_FRAME_OFFSET.  We
3765      waste some space here for the sake of HP compatibility.  The
3766      first slot is only used when the frame pointer is needed.  */
3767   if (size || frame_pointer_needed)
3768     size += STARTING_FRAME_OFFSET;
3769 
3770   /* If the current function calls __builtin_eh_return, then we need
3771      to allocate stack space for registers that will hold data for
3772      the exception handler.  */
3773   if (DO_FRAME_NOTES && crtl->calls_eh_return)
3774     {
3775       unsigned int i;
3776 
3777       for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM; ++i)
3778 	continue;
3779       size += i * UNITS_PER_WORD;
3780     }
3781 
3782   /* Account for space used by the callee general register saves.  */
3783   for (i = 18, j = frame_pointer_needed ? 4 : 3; i >= j; i--)
3784     if (df_regs_ever_live_p (i))
3785       size += UNITS_PER_WORD;
3786 
3787   /* Account for space used by the callee floating point register saves.  */
3788   for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
3789     if (df_regs_ever_live_p (i)
3790 	|| (!TARGET_64BIT && df_regs_ever_live_p (i + 1)))
3791       {
3792 	freg_saved = 1;
3793 
3794 	/* We always save both halves of the FP register, so always
3795 	   increment the frame size by 8 bytes.  */
3796 	size += 8;
3797       }
3798 
3799   /* If any of the floating registers are saved, account for the
3800      alignment needed for the floating point register save block.  */
3801   if (freg_saved)
3802     {
3803       size = (size + 7) & ~7;
3804       if (fregs_live)
3805 	*fregs_live = 1;
3806     }
3807 
3808   /* The various ABIs include space for the outgoing parameters in the
3809      size of the current function's stack frame.  We don't need to align
3810      for the outgoing arguments as their alignment is set by the final
3811      rounding for the frame as a whole.  */
3812   size += crtl->outgoing_args_size;
3813 
3814   /* Allocate space for the fixed frame marker.  This space must be
3815      allocated for any function that makes calls or allocates
3816      stack space.  */
3817   if (!crtl->is_leaf || size)
3818     size += TARGET_64BIT ? 48 : 32;
3819 
3820   /* Finally, round to the preferred stack boundary.  */
3821   return ((size + PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT - 1)
3822 	  & ~(PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT - 1));
3823 }
3824 
3825 /* Generate the assembly code for function entry.  FILE is a stdio
3826    stream to output the code to.  SIZE is an int: how many units of
3827    temporary storage to allocate.
3828 
3829    Refer to the array `regs_ever_live' to determine which registers to
3830    save; `regs_ever_live[I]' is nonzero if register number I is ever
3831    used in the function.  This function is responsible for knowing
3832    which registers should not be saved even if used.  */
3833 
3834 /* On HP-PA, move-double insns between fpu and cpu need an 8-byte block
3835    of memory.  If any fpu reg is used in the function, we allocate
3836    such a block here, at the bottom of the frame, just in case it's needed.
3837 
3838    If this function is a leaf procedure, then we may choose not
3839    to do a "save" insn.  The decision about whether or not
3840    to do this is made in regclass.c.  */
3841 
3842 static void
pa_output_function_prologue(FILE * file,HOST_WIDE_INT size ATTRIBUTE_UNUSED)3843 pa_output_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
3844 {
3845   /* The function's label and associated .PROC must never be
3846      separated and must be output *after* any profiling declarations
3847      to avoid changing spaces/subspaces within a procedure.  */
3848   ASM_OUTPUT_LABEL (file, XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0));
3849   fputs ("\t.PROC\n", file);
3850 
3851   /* pa_expand_prologue does the dirty work now.  We just need
3852      to output the assembler directives which denote the start
3853      of a function.  */
3854   fprintf (file, "\t.CALLINFO FRAME=" HOST_WIDE_INT_PRINT_DEC, actual_fsize);
3855   if (crtl->is_leaf)
3856     fputs (",NO_CALLS", file);
3857   else
3858     fputs (",CALLS", file);
3859   if (rp_saved)
3860     fputs (",SAVE_RP", file);
3861 
3862   /* The SAVE_SP flag is used to indicate that register %r3 is stored
3863      at the beginning of the frame and that it is used as the frame
3864      pointer for the frame.  We do this because our current frame
3865      layout doesn't conform to that specified in the HP runtime
3866      documentation and we need a way to indicate to programs such as
3867      GDB where %r3 is saved.  The SAVE_SP flag was chosen because it
3868      isn't used by HP compilers but is supported by the assembler.
3869      However, SAVE_SP is supposed to indicate that the previous stack
3870      pointer has been saved in the frame marker.  */
3871   if (frame_pointer_needed)
3872     fputs (",SAVE_SP", file);
3873 
3874   /* Pass on information about the number of callee register saves
3875      performed in the prologue.
3876 
3877      The compiler is supposed to pass the highest register number
3878      saved, the assembler then has to adjust that number before
3879      entering it into the unwind descriptor (to account for any
3880      caller saved registers with lower register numbers than the
3881      first callee saved register).  */
3882   if (gr_saved)
3883     fprintf (file, ",ENTRY_GR=%d", gr_saved + 2);
3884 
3885   if (fr_saved)
3886     fprintf (file, ",ENTRY_FR=%d", fr_saved + 11);
3887 
3888   fputs ("\n\t.ENTRY\n", file);
3889 
3890   remove_useless_addtr_insns (0);
3891 }
3892 
3893 void
pa_expand_prologue(void)3894 pa_expand_prologue (void)
3895 {
3896   int merge_sp_adjust_with_store = 0;
3897   HOST_WIDE_INT size = get_frame_size ();
3898   HOST_WIDE_INT offset;
3899   int i;
3900   rtx tmpreg;
3901   rtx_insn *insn;
3902 
3903   gr_saved = 0;
3904   fr_saved = 0;
3905   save_fregs = 0;
3906 
3907   /* Compute total size for frame pointer, filler, locals and rounding to
3908      the next word boundary.  Similar code appears in pa_compute_frame_size
3909      and must be changed in tandem with this code.  */
3910   local_fsize = (size + UNITS_PER_WORD - 1) & ~(UNITS_PER_WORD - 1);
3911   if (local_fsize || frame_pointer_needed)
3912     local_fsize += STARTING_FRAME_OFFSET;
3913 
3914   actual_fsize = pa_compute_frame_size (size, &save_fregs);
3915   if (flag_stack_usage_info)
3916     current_function_static_stack_size = actual_fsize;
3917 
3918   /* Compute a few things we will use often.  */
3919   tmpreg = gen_rtx_REG (word_mode, 1);
3920 
3921   /* Save RP first.  The calling conventions manual states RP will
3922      always be stored into the caller's frame at sp - 20 or sp - 16
3923      depending on which ABI is in use.  */
3924   if (df_regs_ever_live_p (2) || crtl->calls_eh_return)
3925     {
3926       store_reg (2, TARGET_64BIT ? -16 : -20, STACK_POINTER_REGNUM);
3927       rp_saved = true;
3928     }
3929   else
3930     rp_saved = false;
3931 
3932   /* Allocate the local frame and set up the frame pointer if needed.  */
3933   if (actual_fsize != 0)
3934     {
3935       if (frame_pointer_needed)
3936 	{
3937 	  /* Copy the old frame pointer temporarily into %r1.  Set up the
3938 	     new stack pointer, then store away the saved old frame pointer
3939 	     into the stack at sp and at the same time update the stack
3940 	     pointer by actual_fsize bytes.  Two versions, first
3941 	     handles small (<8k) frames.  The second handles large (>=8k)
3942 	     frames.  */
3943 	  insn = emit_move_insn (tmpreg, hard_frame_pointer_rtx);
3944 	  if (DO_FRAME_NOTES)
3945 	    RTX_FRAME_RELATED_P (insn) = 1;
3946 
3947 	  insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
3948 	  if (DO_FRAME_NOTES)
3949 	    RTX_FRAME_RELATED_P (insn) = 1;
3950 
3951 	  if (VAL_14_BITS_P (actual_fsize))
3952 	    store_reg_modify (STACK_POINTER_REGNUM, 1, actual_fsize);
3953 	  else
3954 	    {
3955 	      /* It is incorrect to store the saved frame pointer at *sp,
3956 		 then increment sp (writes beyond the current stack boundary).
3957 
3958 		 So instead use stwm to store at *sp and post-increment the
3959 		 stack pointer as an atomic operation.  Then increment sp to
3960 		 finish allocating the new frame.  */
3961 	      HOST_WIDE_INT adjust1 = 8192 - 64;
3962 	      HOST_WIDE_INT adjust2 = actual_fsize - adjust1;
3963 
3964 	      store_reg_modify (STACK_POINTER_REGNUM, 1, adjust1);
3965 	      set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
3966 			      adjust2, 1);
3967 	    }
3968 
3969 	  /* We set SAVE_SP in frames that need a frame pointer.  Thus,
3970 	     we need to store the previous stack pointer (frame pointer)
3971 	     into the frame marker on targets that use the HP unwind
3972 	     library.  This allows the HP unwind library to be used to
3973 	     unwind GCC frames.  However, we are not fully compatible
3974 	     with the HP library because our frame layout differs from
3975 	     that specified in the HP runtime specification.
3976 
3977 	     We don't want a frame note on this instruction as the frame
3978 	     marker moves during dynamic stack allocation.
3979 
3980 	     This instruction also serves as a blockage to prevent
3981 	     register spills from being scheduled before the stack
3982 	     pointer is raised.  This is necessary as we store
3983 	     registers using the frame pointer as a base register,
3984 	     and the frame pointer is set before sp is raised.  */
3985 	  if (TARGET_HPUX_UNWIND_LIBRARY)
3986 	    {
3987 	      rtx addr = gen_rtx_PLUS (word_mode, stack_pointer_rtx,
3988 				       GEN_INT (TARGET_64BIT ? -8 : -4));
3989 
3990 	      emit_move_insn (gen_rtx_MEM (word_mode, addr),
3991 			      hard_frame_pointer_rtx);
3992 	    }
3993 	  else
3994 	    emit_insn (gen_blockage ());
3995 	}
3996       /* no frame pointer needed.  */
3997       else
3998 	{
3999 	  /* In some cases we can perform the first callee register save
4000 	     and allocating the stack frame at the same time.   If so, just
4001 	     make a note of it and defer allocating the frame until saving
4002 	     the callee registers.  */
4003 	  if (VAL_14_BITS_P (actual_fsize) && local_fsize == 0)
4004 	    merge_sp_adjust_with_store = 1;
4005 	  /* Can not optimize.  Adjust the stack frame by actual_fsize
4006 	     bytes.  */
4007 	  else
4008 	    set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
4009 			    actual_fsize, 1);
4010 	}
4011     }
4012 
4013   /* Normal register save.
4014 
4015      Do not save the frame pointer in the frame_pointer_needed case.  It
4016      was done earlier.  */
4017   if (frame_pointer_needed)
4018     {
4019       offset = local_fsize;
4020 
4021       /* Saving the EH return data registers in the frame is the simplest
4022 	 way to get the frame unwind information emitted.  We put them
4023 	 just before the general registers.  */
4024       if (DO_FRAME_NOTES && crtl->calls_eh_return)
4025 	{
4026 	  unsigned int i, regno;
4027 
4028 	  for (i = 0; ; ++i)
4029 	    {
4030 	      regno = EH_RETURN_DATA_REGNO (i);
4031 	      if (regno == INVALID_REGNUM)
4032 		break;
4033 
4034 	      store_reg (regno, offset, HARD_FRAME_POINTER_REGNUM);
4035 	      offset += UNITS_PER_WORD;
4036 	    }
4037 	}
4038 
4039       for (i = 18; i >= 4; i--)
4040 	if (df_regs_ever_live_p (i) && ! call_used_regs[i])
4041 	  {
4042 	    store_reg (i, offset, HARD_FRAME_POINTER_REGNUM);
4043 	    offset += UNITS_PER_WORD;
4044 	    gr_saved++;
4045 	  }
4046       /* Account for %r3 which is saved in a special place.  */
4047       gr_saved++;
4048     }
4049   /* No frame pointer needed.  */
4050   else
4051     {
4052       offset = local_fsize - actual_fsize;
4053 
4054       /* Saving the EH return data registers in the frame is the simplest
4055          way to get the frame unwind information emitted.  */
4056       if (DO_FRAME_NOTES && crtl->calls_eh_return)
4057 	{
4058 	  unsigned int i, regno;
4059 
4060 	  for (i = 0; ; ++i)
4061 	    {
4062 	      regno = EH_RETURN_DATA_REGNO (i);
4063 	      if (regno == INVALID_REGNUM)
4064 		break;
4065 
4066 	      /* If merge_sp_adjust_with_store is nonzero, then we can
4067 		 optimize the first save.  */
4068 	      if (merge_sp_adjust_with_store)
4069 		{
4070 		  store_reg_modify (STACK_POINTER_REGNUM, regno, -offset);
4071 		  merge_sp_adjust_with_store = 0;
4072 		}
4073 	      else
4074 		store_reg (regno, offset, STACK_POINTER_REGNUM);
4075 	      offset += UNITS_PER_WORD;
4076 	    }
4077 	}
4078 
4079       for (i = 18; i >= 3; i--)
4080       	if (df_regs_ever_live_p (i) && ! call_used_regs[i])
4081 	  {
4082 	    /* If merge_sp_adjust_with_store is nonzero, then we can
4083 	       optimize the first GR save.  */
4084 	    if (merge_sp_adjust_with_store)
4085 	      {
4086 		store_reg_modify (STACK_POINTER_REGNUM, i, -offset);
4087 		merge_sp_adjust_with_store = 0;
4088 	      }
4089 	    else
4090 	      store_reg (i, offset, STACK_POINTER_REGNUM);
4091 	    offset += UNITS_PER_WORD;
4092 	    gr_saved++;
4093 	  }
4094 
4095       /* If we wanted to merge the SP adjustment with a GR save, but we never
4096 	 did any GR saves, then just emit the adjustment here.  */
4097       if (merge_sp_adjust_with_store)
4098 	set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
4099 			actual_fsize, 1);
4100     }
4101 
4102   /* The hppa calling conventions say that %r19, the pic offset
4103      register, is saved at sp - 32 (in this function's frame)
4104      when generating PIC code.  FIXME:  What is the correct thing
4105      to do for functions which make no calls and allocate no
4106      frame?  Do we need to allocate a frame, or can we just omit
4107      the save?   For now we'll just omit the save.
4108 
4109      We don't want a note on this insn as the frame marker can
4110      move if there is a dynamic stack allocation.  */
4111   if (flag_pic && actual_fsize != 0 && !TARGET_64BIT)
4112     {
4113       rtx addr = gen_rtx_PLUS (word_mode, stack_pointer_rtx, GEN_INT (-32));
4114 
4115       emit_move_insn (gen_rtx_MEM (word_mode, addr), pic_offset_table_rtx);
4116 
4117     }
4118 
4119   /* Align pointer properly (doubleword boundary).  */
4120   offset = (offset + 7) & ~7;
4121 
4122   /* Floating point register store.  */
4123   if (save_fregs)
4124     {
4125       rtx base;
4126 
4127       /* First get the frame or stack pointer to the start of the FP register
4128 	 save area.  */
4129       if (frame_pointer_needed)
4130 	{
4131 	  set_reg_plus_d (1, HARD_FRAME_POINTER_REGNUM, offset, 0);
4132 	  base = hard_frame_pointer_rtx;
4133 	}
4134       else
4135 	{
4136 	  set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0);
4137 	  base = stack_pointer_rtx;
4138 	}
4139 
4140       /* Now actually save the FP registers.  */
4141       for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
4142 	{
4143 	  if (df_regs_ever_live_p (i)
4144 	      || (! TARGET_64BIT && df_regs_ever_live_p (i + 1)))
4145 	    {
4146 	      rtx addr, reg;
4147 	      rtx_insn *insn;
4148 	      addr = gen_rtx_MEM (DFmode,
4149 				  gen_rtx_POST_INC (word_mode, tmpreg));
4150 	      reg = gen_rtx_REG (DFmode, i);
4151 	      insn = emit_move_insn (addr, reg);
4152 	      if (DO_FRAME_NOTES)
4153 		{
4154 		  RTX_FRAME_RELATED_P (insn) = 1;
4155 		  if (TARGET_64BIT)
4156 		    {
4157 		      rtx mem = gen_rtx_MEM (DFmode,
4158 					     plus_constant (Pmode, base,
4159 							    offset));
4160 		      add_reg_note (insn, REG_FRAME_RELATED_EXPR,
4161 				    gen_rtx_SET (mem, reg));
4162 		    }
4163 		  else
4164 		    {
4165 		      rtx meml = gen_rtx_MEM (SFmode,
4166 					      plus_constant (Pmode, base,
4167 							     offset));
4168 		      rtx memr = gen_rtx_MEM (SFmode,
4169 					      plus_constant (Pmode, base,
4170 							     offset + 4));
4171 		      rtx regl = gen_rtx_REG (SFmode, i);
4172 		      rtx regr = gen_rtx_REG (SFmode, i + 1);
4173 		      rtx setl = gen_rtx_SET (meml, regl);
4174 		      rtx setr = gen_rtx_SET (memr, regr);
4175 		      rtvec vec;
4176 
4177 		      RTX_FRAME_RELATED_P (setl) = 1;
4178 		      RTX_FRAME_RELATED_P (setr) = 1;
4179 		      vec = gen_rtvec (2, setl, setr);
4180 		      add_reg_note (insn, REG_FRAME_RELATED_EXPR,
4181 				    gen_rtx_SEQUENCE (VOIDmode, vec));
4182 		    }
4183 		}
4184 	      offset += GET_MODE_SIZE (DFmode);
4185 	      fr_saved++;
4186 	    }
4187 	}
4188     }
4189 }
4190 
4191 /* Emit RTL to load REG from the memory location specified by BASE+DISP.
4192    Handle case where DISP > 8k by using the add_high_const patterns.  */
4193 
4194 static void
load_reg(int reg,HOST_WIDE_INT disp,int base)4195 load_reg (int reg, HOST_WIDE_INT disp, int base)
4196 {
4197   rtx dest = gen_rtx_REG (word_mode, reg);
4198   rtx basereg = gen_rtx_REG (Pmode, base);
4199   rtx src;
4200 
4201   if (VAL_14_BITS_P (disp))
4202     src = gen_rtx_MEM (word_mode, plus_constant (Pmode, basereg, disp));
4203   else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
4204     {
4205       rtx delta = GEN_INT (disp);
4206       rtx tmpreg = gen_rtx_REG (Pmode, 1);
4207 
4208       emit_move_insn (tmpreg, delta);
4209       if (TARGET_DISABLE_INDEXING)
4210 	{
4211 	  emit_move_insn (tmpreg, gen_rtx_PLUS (Pmode, tmpreg, basereg));
4212 	  src = gen_rtx_MEM (word_mode, tmpreg);
4213 	}
4214       else
4215 	src = gen_rtx_MEM (word_mode, gen_rtx_PLUS (Pmode, tmpreg, basereg));
4216     }
4217   else
4218     {
4219       rtx delta = GEN_INT (disp);
4220       rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta));
4221       rtx tmpreg = gen_rtx_REG (Pmode, 1);
4222 
4223       emit_move_insn (tmpreg, high);
4224       src = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta));
4225     }
4226 
4227   emit_move_insn (dest, src);
4228 }
4229 
4230 /* Update the total code bytes output to the text section.  */
4231 
4232 static void
update_total_code_bytes(unsigned int nbytes)4233 update_total_code_bytes (unsigned int nbytes)
4234 {
4235   if ((TARGET_PORTABLE_RUNTIME || !TARGET_GAS || !TARGET_SOM)
4236       && !IN_NAMED_SECTION_P (cfun->decl))
4237     {
4238       unsigned int old_total = total_code_bytes;
4239 
4240       total_code_bytes += nbytes;
4241 
4242       /* Be prepared to handle overflows.  */
4243       if (old_total > total_code_bytes)
4244         total_code_bytes = UINT_MAX;
4245     }
4246 }
4247 
4248 /* This function generates the assembly code for function exit.
4249    Args are as for output_function_prologue ().
4250 
4251    The function epilogue should not depend on the current stack
4252    pointer!  It should use the frame pointer only.  This is mandatory
4253    because of alloca; we also take advantage of it to omit stack
4254    adjustments before returning.  */
4255 
4256 static void
pa_output_function_epilogue(FILE * file,HOST_WIDE_INT size ATTRIBUTE_UNUSED)4257 pa_output_function_epilogue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
4258 {
4259   rtx_insn *insn = get_last_insn ();
4260   bool extra_nop;
4261 
4262   /* pa_expand_epilogue does the dirty work now.  We just need
4263      to output the assembler directives which denote the end
4264      of a function.
4265 
4266      To make debuggers happy, emit a nop if the epilogue was completely
4267      eliminated due to a volatile call as the last insn in the
4268      current function.  That way the return address (in %r2) will
4269      always point to a valid instruction in the current function.  */
4270 
4271   /* Get the last real insn.  */
4272   if (NOTE_P (insn))
4273     insn = prev_real_insn (insn);
4274 
4275   /* If it is a sequence, then look inside.  */
4276   if (insn && NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == SEQUENCE)
4277     insn = as_a <rtx_sequence *> (PATTERN (insn))-> insn (0);
4278 
4279   /* If insn is a CALL_INSN, then it must be a call to a volatile
4280      function (otherwise there would be epilogue insns).  */
4281   if (insn && CALL_P (insn))
4282     {
4283       fputs ("\tnop\n", file);
4284       extra_nop = true;
4285     }
4286   else
4287     extra_nop = false;
4288 
4289   fputs ("\t.EXIT\n\t.PROCEND\n", file);
4290 
4291   if (TARGET_SOM && TARGET_GAS)
4292     {
4293       /* We are done with this subspace except possibly for some additional
4294 	 debug information.  Forget that we are in this subspace to ensure
4295 	 that the next function is output in its own subspace.  */
4296       in_section = NULL;
4297       cfun->machine->in_nsubspa = 2;
4298     }
4299 
4300   /* Thunks do their own insn accounting.  */
4301   if (cfun->is_thunk)
4302     return;
4303 
4304   if (INSN_ADDRESSES_SET_P ())
4305     {
4306       last_address = extra_nop ? 4 : 0;
4307       insn = get_last_nonnote_insn ();
4308       if (insn)
4309 	{
4310 	  last_address += INSN_ADDRESSES (INSN_UID (insn));
4311 	  if (INSN_P (insn))
4312 	    last_address += insn_default_length (insn);
4313 	}
4314       last_address = ((last_address + FUNCTION_BOUNDARY / BITS_PER_UNIT - 1)
4315 		      & ~(FUNCTION_BOUNDARY / BITS_PER_UNIT - 1));
4316     }
4317   else
4318     last_address = UINT_MAX;
4319 
4320   /* Finally, update the total number of code bytes output so far.  */
4321   update_total_code_bytes (last_address);
4322 }
4323 
4324 void
pa_expand_epilogue(void)4325 pa_expand_epilogue (void)
4326 {
4327   rtx tmpreg;
4328   HOST_WIDE_INT offset;
4329   HOST_WIDE_INT ret_off = 0;
4330   int i;
4331   int merge_sp_adjust_with_load = 0;
4332 
4333   /* We will use this often.  */
4334   tmpreg = gen_rtx_REG (word_mode, 1);
4335 
4336   /* Try to restore RP early to avoid load/use interlocks when
4337      RP gets used in the return (bv) instruction.  This appears to still
4338      be necessary even when we schedule the prologue and epilogue.  */
4339   if (rp_saved)
4340     {
4341       ret_off = TARGET_64BIT ? -16 : -20;
4342       if (frame_pointer_needed)
4343 	{
4344 	  load_reg (2, ret_off, HARD_FRAME_POINTER_REGNUM);
4345 	  ret_off = 0;
4346 	}
4347       else
4348 	{
4349 	  /* No frame pointer, and stack is smaller than 8k.  */
4350 	  if (VAL_14_BITS_P (ret_off - actual_fsize))
4351 	    {
4352 	      load_reg (2, ret_off - actual_fsize, STACK_POINTER_REGNUM);
4353 	      ret_off = 0;
4354 	    }
4355 	}
4356     }
4357 
4358   /* General register restores.  */
4359   if (frame_pointer_needed)
4360     {
4361       offset = local_fsize;
4362 
4363       /* If the current function calls __builtin_eh_return, then we need
4364          to restore the saved EH data registers.  */
4365       if (DO_FRAME_NOTES && crtl->calls_eh_return)
4366 	{
4367 	  unsigned int i, regno;
4368 
4369 	  for (i = 0; ; ++i)
4370 	    {
4371 	      regno = EH_RETURN_DATA_REGNO (i);
4372 	      if (regno == INVALID_REGNUM)
4373 		break;
4374 
4375 	      load_reg (regno, offset, HARD_FRAME_POINTER_REGNUM);
4376 	      offset += UNITS_PER_WORD;
4377 	    }
4378 	}
4379 
4380       for (i = 18; i >= 4; i--)
4381 	if (df_regs_ever_live_p (i) && ! call_used_regs[i])
4382 	  {
4383 	    load_reg (i, offset, HARD_FRAME_POINTER_REGNUM);
4384 	    offset += UNITS_PER_WORD;
4385 	  }
4386     }
4387   else
4388     {
4389       offset = local_fsize - actual_fsize;
4390 
4391       /* If the current function calls __builtin_eh_return, then we need
4392          to restore the saved EH data registers.  */
4393       if (DO_FRAME_NOTES && crtl->calls_eh_return)
4394 	{
4395 	  unsigned int i, regno;
4396 
4397 	  for (i = 0; ; ++i)
4398 	    {
4399 	      regno = EH_RETURN_DATA_REGNO (i);
4400 	      if (regno == INVALID_REGNUM)
4401 		break;
4402 
4403 	      /* Only for the first load.
4404 	         merge_sp_adjust_with_load holds the register load
4405 	         with which we will merge the sp adjustment.  */
4406 	      if (merge_sp_adjust_with_load == 0
4407 		  && local_fsize == 0
4408 		  && VAL_14_BITS_P (-actual_fsize))
4409 	        merge_sp_adjust_with_load = regno;
4410 	      else
4411 		load_reg (regno, offset, STACK_POINTER_REGNUM);
4412 	      offset += UNITS_PER_WORD;
4413 	    }
4414 	}
4415 
4416       for (i = 18; i >= 3; i--)
4417 	{
4418 	  if (df_regs_ever_live_p (i) && ! call_used_regs[i])
4419 	    {
4420 	      /* Only for the first load.
4421 	         merge_sp_adjust_with_load holds the register load
4422 	         with which we will merge the sp adjustment.  */
4423 	      if (merge_sp_adjust_with_load == 0
4424 		  && local_fsize == 0
4425 		  && VAL_14_BITS_P (-actual_fsize))
4426 	        merge_sp_adjust_with_load = i;
4427 	      else
4428 		load_reg (i, offset, STACK_POINTER_REGNUM);
4429 	      offset += UNITS_PER_WORD;
4430 	    }
4431 	}
4432     }
4433 
4434   /* Align pointer properly (doubleword boundary).  */
4435   offset = (offset + 7) & ~7;
4436 
4437   /* FP register restores.  */
4438   if (save_fregs)
4439     {
4440       /* Adjust the register to index off of.  */
4441       if (frame_pointer_needed)
4442 	set_reg_plus_d (1, HARD_FRAME_POINTER_REGNUM, offset, 0);
4443       else
4444 	set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0);
4445 
4446       /* Actually do the restores now.  */
4447       for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
4448 	if (df_regs_ever_live_p (i)
4449 	    || (! TARGET_64BIT && df_regs_ever_live_p (i + 1)))
4450 	  {
4451 	    rtx src = gen_rtx_MEM (DFmode,
4452 				   gen_rtx_POST_INC (word_mode, tmpreg));
4453 	    rtx dest = gen_rtx_REG (DFmode, i);
4454 	    emit_move_insn (dest, src);
4455 	  }
4456     }
4457 
4458   /* Emit a blockage insn here to keep these insns from being moved to
4459      an earlier spot in the epilogue, or into the main instruction stream.
4460 
4461      This is necessary as we must not cut the stack back before all the
4462      restores are finished.  */
4463   emit_insn (gen_blockage ());
4464 
4465   /* Reset stack pointer (and possibly frame pointer).  The stack
4466      pointer is initially set to fp + 64 to avoid a race condition.  */
4467   if (frame_pointer_needed)
4468     {
4469       rtx delta = GEN_INT (-64);
4470 
4471       set_reg_plus_d (STACK_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM, 64, 0);
4472       emit_insn (gen_pre_load (hard_frame_pointer_rtx,
4473 			       stack_pointer_rtx, delta));
4474     }
4475   /* If we were deferring a callee register restore, do it now.  */
4476   else if (merge_sp_adjust_with_load)
4477     {
4478       rtx delta = GEN_INT (-actual_fsize);
4479       rtx dest = gen_rtx_REG (word_mode, merge_sp_adjust_with_load);
4480 
4481       emit_insn (gen_pre_load (dest, stack_pointer_rtx, delta));
4482     }
4483   else if (actual_fsize != 0)
4484     set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
4485 		    - actual_fsize, 0);
4486 
4487   /* If we haven't restored %r2 yet (no frame pointer, and a stack
4488      frame greater than 8k), do so now.  */
4489   if (ret_off != 0)
4490     load_reg (2, ret_off, STACK_POINTER_REGNUM);
4491 
4492   if (DO_FRAME_NOTES && crtl->calls_eh_return)
4493     {
4494       rtx sa = EH_RETURN_STACKADJ_RTX;
4495 
4496       emit_insn (gen_blockage ());
4497       emit_insn (TARGET_64BIT
4498 		 ? gen_subdi3 (stack_pointer_rtx, stack_pointer_rtx, sa)
4499 		 : gen_subsi3 (stack_pointer_rtx, stack_pointer_rtx, sa));
4500     }
4501 }
4502 
4503 bool
pa_can_use_return_insn(void)4504 pa_can_use_return_insn (void)
4505 {
4506   if (!reload_completed)
4507     return false;
4508 
4509   if (frame_pointer_needed)
4510     return false;
4511 
4512   if (df_regs_ever_live_p (2))
4513     return false;
4514 
4515   if (crtl->profile)
4516     return false;
4517 
4518   return pa_compute_frame_size (get_frame_size (), 0) == 0;
4519 }
4520 
4521 rtx
hppa_pic_save_rtx(void)4522 hppa_pic_save_rtx (void)
4523 {
4524   return get_hard_reg_initial_val (word_mode, PIC_OFFSET_TABLE_REGNUM);
4525 }
4526 
4527 #ifndef NO_DEFERRED_PROFILE_COUNTERS
4528 #define NO_DEFERRED_PROFILE_COUNTERS 0
4529 #endif
4530 
4531 
4532 /* Vector of funcdef numbers.  */
4533 static vec<int> funcdef_nos;
4534 
4535 /* Output deferred profile counters.  */
4536 static void
output_deferred_profile_counters(void)4537 output_deferred_profile_counters (void)
4538 {
4539   unsigned int i;
4540   int align, n;
4541 
4542   if (funcdef_nos.is_empty ())
4543    return;
4544 
4545   switch_to_section (data_section);
4546   align = MIN (BIGGEST_ALIGNMENT, LONG_TYPE_SIZE);
4547   ASM_OUTPUT_ALIGN (asm_out_file, floor_log2 (align / BITS_PER_UNIT));
4548 
4549   for (i = 0; funcdef_nos.iterate (i, &n); i++)
4550     {
4551       targetm.asm_out.internal_label (asm_out_file, "LP", n);
4552       assemble_integer (const0_rtx, LONG_TYPE_SIZE / BITS_PER_UNIT, align, 1);
4553     }
4554 
4555   funcdef_nos.release ();
4556 }
4557 
4558 void
hppa_profile_hook(int label_no)4559 hppa_profile_hook (int label_no)
4560 {
4561   /* We use SImode for the address of the function in both 32 and
4562      64-bit code to avoid having to provide DImode versions of the
4563      lcla2 and load_offset_label_address insn patterns.  */
4564   rtx reg = gen_reg_rtx (SImode);
4565   rtx_code_label *label_rtx = gen_label_rtx ();
4566   int reg_parm_stack_space = REG_PARM_STACK_SPACE (NULL_TREE);
4567   rtx arg_bytes, begin_label_rtx, mcount, sym;
4568   rtx_insn *call_insn;
4569   char begin_label_name[16];
4570   bool use_mcount_pcrel_call;
4571 
4572   /* Set up call destination.  */
4573   sym = gen_rtx_SYMBOL_REF (Pmode, "_mcount");
4574   pa_encode_label (sym);
4575   mcount = gen_rtx_MEM (Pmode, sym);
4576 
4577   /* If we can reach _mcount with a pc-relative call, we can optimize
4578      loading the address of the current function.  This requires linker
4579      long branch stub support.  */
4580   if (!TARGET_PORTABLE_RUNTIME
4581       && !TARGET_LONG_CALLS
4582       && (TARGET_SOM || flag_function_sections))
4583     use_mcount_pcrel_call = TRUE;
4584   else
4585     use_mcount_pcrel_call = FALSE;
4586 
4587   ASM_GENERATE_INTERNAL_LABEL (begin_label_name, FUNC_BEGIN_PROLOG_LABEL,
4588 			       label_no);
4589   begin_label_rtx = gen_rtx_SYMBOL_REF (SImode, ggc_strdup (begin_label_name));
4590 
4591   emit_move_insn (gen_rtx_REG (word_mode, 26), gen_rtx_REG (word_mode, 2));
4592 
4593   if (!use_mcount_pcrel_call)
4594     {
4595       /* The address of the function is loaded into %r25 with an instruction-
4596 	 relative sequence that avoids the use of relocations.  The sequence
4597 	 is split so that the load_offset_label_address instruction can
4598 	 occupy the delay slot of the call to _mcount.  */
4599       if (TARGET_PA_20)
4600 	emit_insn (gen_lcla2 (reg, label_rtx));
4601       else
4602 	emit_insn (gen_lcla1 (reg, label_rtx));
4603 
4604       emit_insn (gen_load_offset_label_address (gen_rtx_REG (SImode, 25),
4605 						reg,
4606 						begin_label_rtx,
4607 						label_rtx));
4608     }
4609 
4610   if (!NO_DEFERRED_PROFILE_COUNTERS)
4611     {
4612       rtx count_label_rtx, addr, r24;
4613       char count_label_name[16];
4614 
4615       funcdef_nos.safe_push (label_no);
4616       ASM_GENERATE_INTERNAL_LABEL (count_label_name, "LP", label_no);
4617       count_label_rtx = gen_rtx_SYMBOL_REF (Pmode,
4618 					    ggc_strdup (count_label_name));
4619 
4620       addr = force_reg (Pmode, count_label_rtx);
4621       r24 = gen_rtx_REG (Pmode, 24);
4622       emit_move_insn (r24, addr);
4623 
4624       arg_bytes = GEN_INT (TARGET_64BIT ? 24 : 12);
4625       if (use_mcount_pcrel_call)
4626 	call_insn = emit_call_insn (gen_call_mcount (mcount, arg_bytes,
4627 						     begin_label_rtx));
4628       else
4629 	call_insn = emit_call_insn (gen_call (mcount, arg_bytes));
4630 
4631       use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), r24);
4632     }
4633   else
4634     {
4635       arg_bytes = GEN_INT (TARGET_64BIT ? 16 : 8);
4636       if (use_mcount_pcrel_call)
4637 	call_insn = emit_call_insn (gen_call_mcount (mcount, arg_bytes,
4638 						     begin_label_rtx));
4639       else
4640 	call_insn = emit_call_insn (gen_call (mcount, arg_bytes));
4641     }
4642 
4643   use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), gen_rtx_REG (SImode, 25));
4644   use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), gen_rtx_REG (SImode, 26));
4645 
4646   /* Indicate the _mcount call cannot throw, nor will it execute a
4647      non-local goto.  */
4648   make_reg_eh_region_note_nothrow_nononlocal (call_insn);
4649 
4650   /* Allocate space for fixed arguments.  */
4651   if (reg_parm_stack_space > crtl->outgoing_args_size)
4652     crtl->outgoing_args_size = reg_parm_stack_space;
4653 }
4654 
4655 /* Fetch the return address for the frame COUNT steps up from
4656    the current frame, after the prologue.  FRAMEADDR is the
4657    frame pointer of the COUNT frame.
4658 
4659    We want to ignore any export stub remnants here.  To handle this,
4660    we examine the code at the return address, and if it is an export
4661    stub, we return a memory rtx for the stub return address stored
4662    at frame-24.
4663 
4664    The value returned is used in two different ways:
4665 
4666 	1. To find a function's caller.
4667 
4668 	2. To change the return address for a function.
4669 
4670    This function handles most instances of case 1; however, it will
4671    fail if there are two levels of stubs to execute on the return
4672    path.  The only way I believe that can happen is if the return value
4673    needs a parameter relocation, which never happens for C code.
4674 
4675    This function handles most instances of case 2; however, it will
4676    fail if we did not originally have stub code on the return path
4677    but will need stub code on the new return path.  This can happen if
4678    the caller & callee are both in the main program, but the new
4679    return location is in a shared library.  */
4680 
4681 rtx
pa_return_addr_rtx(int count,rtx frameaddr)4682 pa_return_addr_rtx (int count, rtx frameaddr)
4683 {
4684   rtx label;
4685   rtx rp;
4686   rtx saved_rp;
4687   rtx ins;
4688 
4689   /* The instruction stream at the return address of a PA1.X export stub is:
4690 
4691 	0x4bc23fd1 | stub+8:   ldw -18(sr0,sp),rp
4692 	0x004010a1 | stub+12:  ldsid (sr0,rp),r1
4693 	0x00011820 | stub+16:  mtsp r1,sr0
4694 	0xe0400002 | stub+20:  be,n 0(sr0,rp)
4695 
4696      0xe0400002 must be specified as -532676606 so that it won't be
4697      rejected as an invalid immediate operand on 64-bit hosts.
4698 
4699      The instruction stream at the return address of a PA2.0 export stub is:
4700 
4701 	0x4bc23fd1 | stub+8:   ldw -18(sr0,sp),rp
4702 	0xe840d002 | stub+12:  bve,n (rp)
4703   */
4704 
4705   HOST_WIDE_INT insns[4];
4706   int i, len;
4707 
4708   if (count != 0)
4709     return NULL_RTX;
4710 
4711   rp = get_hard_reg_initial_val (Pmode, 2);
4712 
4713   if (TARGET_64BIT || TARGET_NO_SPACE_REGS)
4714     return rp;
4715 
4716   /* If there is no export stub then just use the value saved from
4717      the return pointer register.  */
4718 
4719   saved_rp = gen_reg_rtx (Pmode);
4720   emit_move_insn (saved_rp, rp);
4721 
4722   /* Get pointer to the instruction stream.  We have to mask out the
4723      privilege level from the two low order bits of the return address
4724      pointer here so that ins will point to the start of the first
4725      instruction that would have been executed if we returned.  */
4726   ins = copy_to_reg (gen_rtx_AND (Pmode, rp, MASK_RETURN_ADDR));
4727   label = gen_label_rtx ();
4728 
4729   if (TARGET_PA_20)
4730     {
4731       insns[0] = 0x4bc23fd1;
4732       insns[1] = -398405630;
4733       len = 2;
4734     }
4735   else
4736     {
4737       insns[0] = 0x4bc23fd1;
4738       insns[1] = 0x004010a1;
4739       insns[2] = 0x00011820;
4740       insns[3] = -532676606;
4741       len = 4;
4742     }
4743 
4744   /* Check the instruction stream at the normal return address for the
4745      export stub.  If it is an export stub, than our return address is
4746      really in -24[frameaddr].  */
4747 
4748   for (i = 0; i < len; i++)
4749     {
4750       rtx op0 = gen_rtx_MEM (SImode, plus_constant (Pmode, ins, i * 4));
4751       rtx op1 = GEN_INT (insns[i]);
4752       emit_cmp_and_jump_insns (op0, op1, NE, NULL, SImode, 0, label);
4753     }
4754 
4755   /* Here we know that our return address points to an export
4756      stub.  We don't want to return the address of the export stub,
4757      but rather the return address of the export stub.  That return
4758      address is stored at -24[frameaddr].  */
4759 
4760   emit_move_insn (saved_rp,
4761 		  gen_rtx_MEM (Pmode,
4762 			       memory_address (Pmode,
4763 					       plus_constant (Pmode, frameaddr,
4764 							      -24))));
4765 
4766   emit_label (label);
4767 
4768   return saved_rp;
4769 }
4770 
4771 void
pa_emit_bcond_fp(rtx operands[])4772 pa_emit_bcond_fp (rtx operands[])
4773 {
4774   enum rtx_code code = GET_CODE (operands[0]);
4775   rtx operand0 = operands[1];
4776   rtx operand1 = operands[2];
4777   rtx label = operands[3];
4778 
4779   emit_insn (gen_rtx_SET (gen_rtx_REG (CCFPmode, 0),
4780 		          gen_rtx_fmt_ee (code, CCFPmode, operand0, operand1)));
4781 
4782   emit_jump_insn (gen_rtx_SET (pc_rtx,
4783 			       gen_rtx_IF_THEN_ELSE (VOIDmode,
4784 						     gen_rtx_fmt_ee (NE,
4785 							      VOIDmode,
4786 							      gen_rtx_REG (CCFPmode, 0),
4787 							      const0_rtx),
4788 						     gen_rtx_LABEL_REF (VOIDmode, label),
4789 						     pc_rtx)));
4790 
4791 }
4792 
4793 /* Adjust the cost of a scheduling dependency.  Return the new cost of
4794    a dependency LINK or INSN on DEP_INSN.  COST is the current cost.  */
4795 
4796 static int
pa_adjust_cost(rtx_insn * insn,rtx link,rtx_insn * dep_insn,int cost)4797 pa_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep_insn, int cost)
4798 {
4799   enum attr_type attr_type;
4800 
4801   /* Don't adjust costs for a pa8000 chip, also do not adjust any
4802      true dependencies as they are described with bypasses now.  */
4803   if (pa_cpu >= PROCESSOR_8000 || REG_NOTE_KIND (link) == 0)
4804     return cost;
4805 
4806   if (! recog_memoized (insn))
4807     return 0;
4808 
4809   attr_type = get_attr_type (insn);
4810 
4811   switch (REG_NOTE_KIND (link))
4812     {
4813     case REG_DEP_ANTI:
4814       /* Anti dependency; DEP_INSN reads a register that INSN writes some
4815 	 cycles later.  */
4816 
4817       if (attr_type == TYPE_FPLOAD)
4818 	{
4819 	  rtx pat = PATTERN (insn);
4820 	  rtx dep_pat = PATTERN (dep_insn);
4821 	  if (GET_CODE (pat) == PARALLEL)
4822 	    {
4823 	      /* This happens for the fldXs,mb patterns.  */
4824 	      pat = XVECEXP (pat, 0, 0);
4825 	    }
4826 	  if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4827 	    /* If this happens, we have to extend this to schedule
4828 	       optimally.  Return 0 for now.  */
4829 	  return 0;
4830 
4831 	  if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
4832 	    {
4833 	      if (! recog_memoized (dep_insn))
4834 		return 0;
4835 	      switch (get_attr_type (dep_insn))
4836 		{
4837 		case TYPE_FPALU:
4838 		case TYPE_FPMULSGL:
4839 		case TYPE_FPMULDBL:
4840 		case TYPE_FPDIVSGL:
4841 		case TYPE_FPDIVDBL:
4842 		case TYPE_FPSQRTSGL:
4843 		case TYPE_FPSQRTDBL:
4844 		  /* A fpload can't be issued until one cycle before a
4845 		     preceding arithmetic operation has finished if
4846 		     the target of the fpload is any of the sources
4847 		     (or destination) of the arithmetic operation.  */
4848 		  return insn_default_latency (dep_insn) - 1;
4849 
4850 		default:
4851 		  return 0;
4852 		}
4853 	    }
4854 	}
4855       else if (attr_type == TYPE_FPALU)
4856 	{
4857 	  rtx pat = PATTERN (insn);
4858 	  rtx dep_pat = PATTERN (dep_insn);
4859 	  if (GET_CODE (pat) == PARALLEL)
4860 	    {
4861 	      /* This happens for the fldXs,mb patterns.  */
4862 	      pat = XVECEXP (pat, 0, 0);
4863 	    }
4864 	  if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4865 	    /* If this happens, we have to extend this to schedule
4866 	       optimally.  Return 0 for now.  */
4867 	  return 0;
4868 
4869 	  if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
4870 	    {
4871 	      if (! recog_memoized (dep_insn))
4872 		return 0;
4873 	      switch (get_attr_type (dep_insn))
4874 		{
4875 		case TYPE_FPDIVSGL:
4876 		case TYPE_FPDIVDBL:
4877 		case TYPE_FPSQRTSGL:
4878 		case TYPE_FPSQRTDBL:
4879 		  /* An ALU flop can't be issued until two cycles before a
4880 		     preceding divide or sqrt operation has finished if
4881 		     the target of the ALU flop is any of the sources
4882 		     (or destination) of the divide or sqrt operation.  */
4883 		  return insn_default_latency (dep_insn) - 2;
4884 
4885 		default:
4886 		  return 0;
4887 		}
4888 	    }
4889 	}
4890 
4891       /* For other anti dependencies, the cost is 0.  */
4892       return 0;
4893 
4894     case REG_DEP_OUTPUT:
4895       /* Output dependency; DEP_INSN writes a register that INSN writes some
4896 	 cycles later.  */
4897       if (attr_type == TYPE_FPLOAD)
4898 	{
4899 	  rtx pat = PATTERN (insn);
4900 	  rtx dep_pat = PATTERN (dep_insn);
4901 	  if (GET_CODE (pat) == PARALLEL)
4902 	    {
4903 	      /* This happens for the fldXs,mb patterns.  */
4904 	      pat = XVECEXP (pat, 0, 0);
4905 	    }
4906 	  if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4907 	    /* If this happens, we have to extend this to schedule
4908 	       optimally.  Return 0 for now.  */
4909 	  return 0;
4910 
4911 	  if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
4912 	    {
4913 	      if (! recog_memoized (dep_insn))
4914 		return 0;
4915 	      switch (get_attr_type (dep_insn))
4916 		{
4917 		case TYPE_FPALU:
4918 		case TYPE_FPMULSGL:
4919 		case TYPE_FPMULDBL:
4920 		case TYPE_FPDIVSGL:
4921 		case TYPE_FPDIVDBL:
4922 		case TYPE_FPSQRTSGL:
4923 		case TYPE_FPSQRTDBL:
4924 		  /* A fpload can't be issued until one cycle before a
4925 		     preceding arithmetic operation has finished if
4926 		     the target of the fpload is the destination of the
4927 		     arithmetic operation.
4928 
4929 		     Exception: For PA7100LC, PA7200 and PA7300, the cost
4930 		     is 3 cycles, unless they bundle together.   We also
4931 		     pay the penalty if the second insn is a fpload.  */
4932 		  return insn_default_latency (dep_insn) - 1;
4933 
4934 		default:
4935 		  return 0;
4936 		}
4937 	    }
4938 	}
4939       else if (attr_type == TYPE_FPALU)
4940 	{
4941 	  rtx pat = PATTERN (insn);
4942 	  rtx dep_pat = PATTERN (dep_insn);
4943 	  if (GET_CODE (pat) == PARALLEL)
4944 	    {
4945 	      /* This happens for the fldXs,mb patterns.  */
4946 	      pat = XVECEXP (pat, 0, 0);
4947 	    }
4948 	  if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4949 	    /* If this happens, we have to extend this to schedule
4950 	       optimally.  Return 0 for now.  */
4951 	  return 0;
4952 
4953 	  if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
4954 	    {
4955 	      if (! recog_memoized (dep_insn))
4956 		return 0;
4957 	      switch (get_attr_type (dep_insn))
4958 		{
4959 		case TYPE_FPDIVSGL:
4960 		case TYPE_FPDIVDBL:
4961 		case TYPE_FPSQRTSGL:
4962 		case TYPE_FPSQRTDBL:
4963 		  /* An ALU flop can't be issued until two cycles before a
4964 		     preceding divide or sqrt operation has finished if
4965 		     the target of the ALU flop is also the target of
4966 		     the divide or sqrt operation.  */
4967 		  return insn_default_latency (dep_insn) - 2;
4968 
4969 		default:
4970 		  return 0;
4971 		}
4972 	    }
4973 	}
4974 
4975       /* For other output dependencies, the cost is 0.  */
4976       return 0;
4977 
4978     default:
4979       gcc_unreachable ();
4980     }
4981 }
4982 
4983 /* Adjust scheduling priorities.  We use this to try and keep addil
4984    and the next use of %r1 close together.  */
4985 static int
pa_adjust_priority(rtx_insn * insn,int priority)4986 pa_adjust_priority (rtx_insn *insn, int priority)
4987 {
4988   rtx set = single_set (insn);
4989   rtx src, dest;
4990   if (set)
4991     {
4992       src = SET_SRC (set);
4993       dest = SET_DEST (set);
4994       if (GET_CODE (src) == LO_SUM
4995 	  && symbolic_operand (XEXP (src, 1), VOIDmode)
4996 	  && ! read_only_operand (XEXP (src, 1), VOIDmode))
4997 	priority >>= 3;
4998 
4999       else if (GET_CODE (src) == MEM
5000 	       && GET_CODE (XEXP (src, 0)) == LO_SUM
5001 	       && symbolic_operand (XEXP (XEXP (src, 0), 1), VOIDmode)
5002 	       && ! read_only_operand (XEXP (XEXP (src, 0), 1), VOIDmode))
5003 	priority >>= 1;
5004 
5005       else if (GET_CODE (dest) == MEM
5006 	       && GET_CODE (XEXP (dest, 0)) == LO_SUM
5007 	       && symbolic_operand (XEXP (XEXP (dest, 0), 1), VOIDmode)
5008 	       && ! read_only_operand (XEXP (XEXP (dest, 0), 1), VOIDmode))
5009 	priority >>= 3;
5010     }
5011   return priority;
5012 }
5013 
5014 /* The 700 can only issue a single insn at a time.
5015    The 7XXX processors can issue two insns at a time.
5016    The 8000 can issue 4 insns at a time.  */
5017 static int
pa_issue_rate(void)5018 pa_issue_rate (void)
5019 {
5020   switch (pa_cpu)
5021     {
5022     case PROCESSOR_700:		return 1;
5023     case PROCESSOR_7100:	return 2;
5024     case PROCESSOR_7100LC:	return 2;
5025     case PROCESSOR_7200:	return 2;
5026     case PROCESSOR_7300:	return 2;
5027     case PROCESSOR_8000:	return 4;
5028 
5029     default:
5030       gcc_unreachable ();
5031     }
5032 }
5033 
5034 
5035 
5036 /* Return any length plus adjustment needed by INSN which already has
5037    its length computed as LENGTH.   Return LENGTH if no adjustment is
5038    necessary.
5039 
5040    Also compute the length of an inline block move here as it is too
5041    complicated to express as a length attribute in pa.md.  */
5042 int
pa_adjust_insn_length(rtx_insn * insn,int length)5043 pa_adjust_insn_length (rtx_insn *insn, int length)
5044 {
5045   rtx pat = PATTERN (insn);
5046 
5047   /* If length is negative or undefined, provide initial length.  */
5048   if ((unsigned int) length >= INT_MAX)
5049     {
5050       if (GET_CODE (pat) == SEQUENCE)
5051 	insn = as_a <rtx_insn *> (XVECEXP (pat, 0, 0));
5052 
5053       switch (get_attr_type (insn))
5054 	{
5055 	case TYPE_MILLI:
5056 	  length = pa_attr_length_millicode_call (insn);
5057 	  break;
5058 	case TYPE_CALL:
5059 	  length = pa_attr_length_call (insn, 0);
5060 	  break;
5061 	case TYPE_SIBCALL:
5062 	  length = pa_attr_length_call (insn, 1);
5063 	  break;
5064 	case TYPE_DYNCALL:
5065 	  length = pa_attr_length_indirect_call (insn);
5066 	  break;
5067 	case TYPE_SH_FUNC_ADRS:
5068 	  length = pa_attr_length_millicode_call (insn) + 20;
5069 	  break;
5070 	default:
5071 	  gcc_unreachable ();
5072 	}
5073     }
5074 
5075   /* Block move pattern.  */
5076   if (NONJUMP_INSN_P (insn)
5077       && GET_CODE (pat) == PARALLEL
5078       && GET_CODE (XVECEXP (pat, 0, 0)) == SET
5079       && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM
5080       && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 1)) == MEM
5081       && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode
5082       && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 1)) == BLKmode)
5083     length += compute_movmem_length (insn) - 4;
5084   /* Block clear pattern.  */
5085   else if (NONJUMP_INSN_P (insn)
5086 	   && GET_CODE (pat) == PARALLEL
5087 	   && GET_CODE (XVECEXP (pat, 0, 0)) == SET
5088 	   && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM
5089 	   && XEXP (XVECEXP (pat, 0, 0), 1) == const0_rtx
5090 	   && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode)
5091     length += compute_clrmem_length (insn) - 4;
5092   /* Conditional branch with an unfilled delay slot.  */
5093   else if (JUMP_P (insn) && ! simplejump_p (insn))
5094     {
5095       /* Adjust a short backwards conditional with an unfilled delay slot.  */
5096       if (GET_CODE (pat) == SET
5097 	  && length == 4
5098 	  && JUMP_LABEL (insn) != NULL_RTX
5099 	  && ! forward_branch_p (insn))
5100 	length += 4;
5101       else if (GET_CODE (pat) == PARALLEL
5102 	       && get_attr_type (insn) == TYPE_PARALLEL_BRANCH
5103 	       && length == 4)
5104 	length += 4;
5105       /* Adjust dbra insn with short backwards conditional branch with
5106 	 unfilled delay slot -- only for case where counter is in a
5107 	 general register register.  */
5108       else if (GET_CODE (pat) == PARALLEL
5109 	       && GET_CODE (XVECEXP (pat, 0, 1)) == SET
5110 	       && GET_CODE (XEXP (XVECEXP (pat, 0, 1), 0)) == REG
5111  	       && ! FP_REG_P (XEXP (XVECEXP (pat, 0, 1), 0))
5112 	       && length == 4
5113 	       && ! forward_branch_p (insn))
5114 	length += 4;
5115     }
5116   return length;
5117 }
5118 
5119 /* Implement the TARGET_PRINT_OPERAND_PUNCT_VALID_P hook.  */
5120 
5121 static bool
pa_print_operand_punct_valid_p(unsigned char code)5122 pa_print_operand_punct_valid_p (unsigned char code)
5123 {
5124   if (code == '@'
5125       || code == '#'
5126       || code == '*'
5127       || code == '^')
5128     return true;
5129 
5130   return false;
5131 }
5132 
5133 /* Print operand X (an rtx) in assembler syntax to file FILE.
5134    CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
5135    For `%' followed by punctuation, CODE is the punctuation and X is null.  */
5136 
5137 void
pa_print_operand(FILE * file,rtx x,int code)5138 pa_print_operand (FILE *file, rtx x, int code)
5139 {
5140   switch (code)
5141     {
5142     case '#':
5143       /* Output a 'nop' if there's nothing for the delay slot.  */
5144       if (dbr_sequence_length () == 0)
5145 	fputs ("\n\tnop", file);
5146       return;
5147     case '*':
5148       /* Output a nullification completer if there's nothing for the */
5149       /* delay slot or nullification is requested.  */
5150       if (dbr_sequence_length () == 0 ||
5151 	  (final_sequence &&
5152 	   INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))))
5153         fputs (",n", file);
5154       return;
5155     case 'R':
5156       /* Print out the second register name of a register pair.
5157 	 I.e., R (6) => 7.  */
5158       fputs (reg_names[REGNO (x) + 1], file);
5159       return;
5160     case 'r':
5161       /* A register or zero.  */
5162       if (x == const0_rtx
5163 	  || (x == CONST0_RTX (DFmode))
5164 	  || (x == CONST0_RTX (SFmode)))
5165 	{
5166 	  fputs ("%r0", file);
5167 	  return;
5168 	}
5169       else
5170 	break;
5171     case 'f':
5172       /* A register or zero (floating point).  */
5173       if (x == const0_rtx
5174 	  || (x == CONST0_RTX (DFmode))
5175 	  || (x == CONST0_RTX (SFmode)))
5176 	{
5177 	  fputs ("%fr0", file);
5178 	  return;
5179 	}
5180       else
5181 	break;
5182     case 'A':
5183       {
5184 	rtx xoperands[2];
5185 
5186 	xoperands[0] = XEXP (XEXP (x, 0), 0);
5187 	xoperands[1] = XVECEXP (XEXP (XEXP (x, 0), 1), 0, 0);
5188 	pa_output_global_address (file, xoperands[1], 0);
5189         fprintf (file, "(%s)", reg_names [REGNO (xoperands[0])]);
5190 	return;
5191       }
5192 
5193     case 'C':			/* Plain (C)ondition */
5194     case 'X':
5195       switch (GET_CODE (x))
5196 	{
5197 	case EQ:
5198 	  fputs ("=", file);  break;
5199 	case NE:
5200 	  fputs ("<>", file);  break;
5201 	case GT:
5202 	  fputs (">", file);  break;
5203 	case GE:
5204 	  fputs (">=", file);  break;
5205 	case GEU:
5206 	  fputs (">>=", file);  break;
5207 	case GTU:
5208 	  fputs (">>", file);  break;
5209 	case LT:
5210 	  fputs ("<", file);  break;
5211 	case LE:
5212 	  fputs ("<=", file);  break;
5213 	case LEU:
5214 	  fputs ("<<=", file);  break;
5215 	case LTU:
5216 	  fputs ("<<", file);  break;
5217 	default:
5218 	  gcc_unreachable ();
5219 	}
5220       return;
5221     case 'N':			/* Condition, (N)egated */
5222       switch (GET_CODE (x))
5223 	{
5224 	case EQ:
5225 	  fputs ("<>", file);  break;
5226 	case NE:
5227 	  fputs ("=", file);  break;
5228 	case GT:
5229 	  fputs ("<=", file);  break;
5230 	case GE:
5231 	  fputs ("<", file);  break;
5232 	case GEU:
5233 	  fputs ("<<", file);  break;
5234 	case GTU:
5235 	  fputs ("<<=", file);  break;
5236 	case LT:
5237 	  fputs (">=", file);  break;
5238 	case LE:
5239 	  fputs (">", file);  break;
5240 	case LEU:
5241 	  fputs (">>", file);  break;
5242 	case LTU:
5243 	  fputs (">>=", file);  break;
5244 	default:
5245 	  gcc_unreachable ();
5246 	}
5247       return;
5248     /* For floating point comparisons.  Note that the output
5249        predicates are the complement of the desired mode.  The
5250        conditions for GT, GE, LT, LE and LTGT cause an invalid
5251        operation exception if the result is unordered and this
5252        exception is enabled in the floating-point status register.  */
5253     case 'Y':
5254       switch (GET_CODE (x))
5255 	{
5256 	case EQ:
5257 	  fputs ("!=", file);  break;
5258 	case NE:
5259 	  fputs ("=", file);  break;
5260 	case GT:
5261 	  fputs ("!>", file);  break;
5262 	case GE:
5263 	  fputs ("!>=", file);  break;
5264 	case LT:
5265 	  fputs ("!<", file);  break;
5266 	case LE:
5267 	  fputs ("!<=", file);  break;
5268 	case LTGT:
5269 	  fputs ("!<>", file);  break;
5270 	case UNLE:
5271 	  fputs ("!?<=", file);  break;
5272 	case UNLT:
5273 	  fputs ("!?<", file);  break;
5274 	case UNGE:
5275 	  fputs ("!?>=", file);  break;
5276 	case UNGT:
5277 	  fputs ("!?>", file);  break;
5278 	case UNEQ:
5279 	  fputs ("!?=", file);  break;
5280 	case UNORDERED:
5281 	  fputs ("!?", file);  break;
5282 	case ORDERED:
5283 	  fputs ("?", file);  break;
5284 	default:
5285 	  gcc_unreachable ();
5286 	}
5287       return;
5288     case 'S':			/* Condition, operands are (S)wapped.  */
5289       switch (GET_CODE (x))
5290 	{
5291 	case EQ:
5292 	  fputs ("=", file);  break;
5293 	case NE:
5294 	  fputs ("<>", file);  break;
5295 	case GT:
5296 	  fputs ("<", file);  break;
5297 	case GE:
5298 	  fputs ("<=", file);  break;
5299 	case GEU:
5300 	  fputs ("<<=", file);  break;
5301 	case GTU:
5302 	  fputs ("<<", file);  break;
5303 	case LT:
5304 	  fputs (">", file);  break;
5305 	case LE:
5306 	  fputs (">=", file);  break;
5307 	case LEU:
5308 	  fputs (">>=", file);  break;
5309 	case LTU:
5310 	  fputs (">>", file);  break;
5311 	default:
5312 	  gcc_unreachable ();
5313 	}
5314       return;
5315     case 'B':			/* Condition, (B)oth swapped and negate.  */
5316       switch (GET_CODE (x))
5317 	{
5318 	case EQ:
5319 	  fputs ("<>", file);  break;
5320 	case NE:
5321 	  fputs ("=", file);  break;
5322 	case GT:
5323 	  fputs (">=", file);  break;
5324 	case GE:
5325 	  fputs (">", file);  break;
5326 	case GEU:
5327 	  fputs (">>", file);  break;
5328 	case GTU:
5329 	  fputs (">>=", file);  break;
5330 	case LT:
5331 	  fputs ("<=", file);  break;
5332 	case LE:
5333 	  fputs ("<", file);  break;
5334 	case LEU:
5335 	  fputs ("<<", file);  break;
5336 	case LTU:
5337 	  fputs ("<<=", file);  break;
5338 	default:
5339 	  gcc_unreachable ();
5340 	}
5341       return;
5342     case 'k':
5343       gcc_assert (GET_CODE (x) == CONST_INT);
5344       fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~INTVAL (x));
5345       return;
5346     case 'Q':
5347       gcc_assert (GET_CODE (x) == CONST_INT);
5348       fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - (INTVAL (x) & 63));
5349       return;
5350     case 'L':
5351       gcc_assert (GET_CODE (x) == CONST_INT);
5352       fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - (INTVAL (x) & 31));
5353       return;
5354     case 'o':
5355       gcc_assert (GET_CODE (x) == CONST_INT
5356 		  && (INTVAL (x) == 1 || INTVAL (x) == 2 || INTVAL (x) == 3));
5357       fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
5358       return;
5359     case 'O':
5360       gcc_assert (GET_CODE (x) == CONST_INT && exact_log2 (INTVAL (x)) >= 0);
5361       fprintf (file, "%d", exact_log2 (INTVAL (x)));
5362       return;
5363     case 'p':
5364       gcc_assert (GET_CODE (x) == CONST_INT);
5365       fprintf (file, HOST_WIDE_INT_PRINT_DEC, 63 - (INTVAL (x) & 63));
5366       return;
5367     case 'P':
5368       gcc_assert (GET_CODE (x) == CONST_INT);
5369       fprintf (file, HOST_WIDE_INT_PRINT_DEC, 31 - (INTVAL (x) & 31));
5370       return;
5371     case 'I':
5372       if (GET_CODE (x) == CONST_INT)
5373 	fputs ("i", file);
5374       return;
5375     case 'M':
5376     case 'F':
5377       switch (GET_CODE (XEXP (x, 0)))
5378 	{
5379 	case PRE_DEC:
5380 	case PRE_INC:
5381 	  if (ASSEMBLER_DIALECT == 0)
5382 	    fputs ("s,mb", file);
5383 	  else
5384 	    fputs (",mb", file);
5385 	  break;
5386 	case POST_DEC:
5387 	case POST_INC:
5388 	  if (ASSEMBLER_DIALECT == 0)
5389 	    fputs ("s,ma", file);
5390 	  else
5391 	    fputs (",ma", file);
5392 	  break;
5393 	case PLUS:
5394 	  if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
5395 	      && GET_CODE (XEXP (XEXP (x, 0), 1)) == REG)
5396 	    {
5397 	      if (ASSEMBLER_DIALECT == 0)
5398 		fputs ("x", file);
5399 	    }
5400 	  else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
5401 		   || GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
5402 	    {
5403 	      if (ASSEMBLER_DIALECT == 0)
5404 		fputs ("x,s", file);
5405 	      else
5406 		fputs (",s", file);
5407 	    }
5408 	  else if (code == 'F' && ASSEMBLER_DIALECT == 0)
5409 	    fputs ("s", file);
5410 	  break;
5411 	default:
5412 	  if (code == 'F' && ASSEMBLER_DIALECT == 0)
5413 	    fputs ("s", file);
5414 	  break;
5415 	}
5416       return;
5417     case 'G':
5418       pa_output_global_address (file, x, 0);
5419       return;
5420     case 'H':
5421       pa_output_global_address (file, x, 1);
5422       return;
5423     case 0:			/* Don't do anything special */
5424       break;
5425     case 'Z':
5426       {
5427 	unsigned op[3];
5428 	compute_zdepwi_operands (INTVAL (x), op);
5429 	fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
5430 	return;
5431       }
5432     case 'z':
5433       {
5434 	unsigned op[3];
5435 	compute_zdepdi_operands (INTVAL (x), op);
5436 	fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
5437 	return;
5438       }
5439     case 'c':
5440       /* We can get here from a .vtable_inherit due to our
5441 	 CONSTANT_ADDRESS_P rejecting perfectly good constant
5442 	 addresses.  */
5443       break;
5444     default:
5445       gcc_unreachable ();
5446     }
5447   if (GET_CODE (x) == REG)
5448     {
5449       fputs (reg_names [REGNO (x)], file);
5450       if (TARGET_64BIT && FP_REG_P (x) && GET_MODE_SIZE (GET_MODE (x)) <= 4)
5451 	{
5452 	  fputs ("R", file);
5453 	  return;
5454 	}
5455       if (FP_REG_P (x)
5456 	  && GET_MODE_SIZE (GET_MODE (x)) <= 4
5457 	  && (REGNO (x) & 1) == 0)
5458 	fputs ("L", file);
5459     }
5460   else if (GET_CODE (x) == MEM)
5461     {
5462       int size = GET_MODE_SIZE (GET_MODE (x));
5463       rtx base = NULL_RTX;
5464       switch (GET_CODE (XEXP (x, 0)))
5465 	{
5466 	case PRE_DEC:
5467 	case POST_DEC:
5468           base = XEXP (XEXP (x, 0), 0);
5469 	  fprintf (file, "-%d(%s)", size, reg_names [REGNO (base)]);
5470 	  break;
5471 	case PRE_INC:
5472 	case POST_INC:
5473           base = XEXP (XEXP (x, 0), 0);
5474 	  fprintf (file, "%d(%s)", size, reg_names [REGNO (base)]);
5475 	  break;
5476 	case PLUS:
5477 	  if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT)
5478 	    fprintf (file, "%s(%s)",
5479 		     reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 0), 0))],
5480 		     reg_names [REGNO (XEXP (XEXP (x, 0), 1))]);
5481 	  else if (GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
5482 	    fprintf (file, "%s(%s)",
5483 		     reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 1), 0))],
5484 		     reg_names [REGNO (XEXP (XEXP (x, 0), 0))]);
5485 	  else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
5486 		   && GET_CODE (XEXP (XEXP (x, 0), 1)) == REG)
5487 	    {
5488 	      /* Because the REG_POINTER flag can get lost during reload,
5489 		 pa_legitimate_address_p canonicalizes the order of the
5490 		 index and base registers in the combined move patterns.  */
5491 	      rtx base = XEXP (XEXP (x, 0), 1);
5492 	      rtx index = XEXP (XEXP (x, 0), 0);
5493 
5494 	      fprintf (file, "%s(%s)",
5495 		       reg_names [REGNO (index)], reg_names [REGNO (base)]);
5496 	    }
5497 	  else
5498 	    output_address (GET_MODE (x), XEXP (x, 0));
5499 	  break;
5500 	default:
5501 	  output_address (GET_MODE (x), XEXP (x, 0));
5502 	  break;
5503 	}
5504     }
5505   else
5506     output_addr_const (file, x);
5507 }
5508 
5509 /* output a SYMBOL_REF or a CONST expression involving a SYMBOL_REF.  */
5510 
5511 void
pa_output_global_address(FILE * file,rtx x,int round_constant)5512 pa_output_global_address (FILE *file, rtx x, int round_constant)
5513 {
5514 
5515   /* Imagine  (high (const (plus ...))).  */
5516   if (GET_CODE (x) == HIGH)
5517     x = XEXP (x, 0);
5518 
5519   if (GET_CODE (x) == SYMBOL_REF && read_only_operand (x, VOIDmode))
5520     output_addr_const (file, x);
5521   else if (GET_CODE (x) == SYMBOL_REF && !flag_pic)
5522     {
5523       output_addr_const (file, x);
5524       fputs ("-$global$", file);
5525     }
5526   else if (GET_CODE (x) == CONST)
5527     {
5528       const char *sep = "";
5529       int offset = 0;		/* assembler wants -$global$ at end */
5530       rtx base = NULL_RTX;
5531 
5532       switch (GET_CODE (XEXP (XEXP (x, 0), 0)))
5533 	{
5534 	case LABEL_REF:
5535 	case SYMBOL_REF:
5536 	  base = XEXP (XEXP (x, 0), 0);
5537 	  output_addr_const (file, base);
5538 	  break;
5539 	case CONST_INT:
5540 	  offset = INTVAL (XEXP (XEXP (x, 0), 0));
5541 	  break;
5542 	default:
5543 	  gcc_unreachable ();
5544 	}
5545 
5546       switch (GET_CODE (XEXP (XEXP (x, 0), 1)))
5547 	{
5548 	case LABEL_REF:
5549 	case SYMBOL_REF:
5550 	  base = XEXP (XEXP (x, 0), 1);
5551 	  output_addr_const (file, base);
5552 	  break;
5553 	case CONST_INT:
5554 	  offset = INTVAL (XEXP (XEXP (x, 0), 1));
5555 	  break;
5556 	default:
5557 	  gcc_unreachable ();
5558 	}
5559 
5560       /* How bogus.  The compiler is apparently responsible for
5561 	 rounding the constant if it uses an LR field selector.
5562 
5563 	 The linker and/or assembler seem a better place since
5564 	 they have to do this kind of thing already.
5565 
5566 	 If we fail to do this, HP's optimizing linker may eliminate
5567 	 an addil, but not update the ldw/stw/ldo instruction that
5568 	 uses the result of the addil.  */
5569       if (round_constant)
5570 	offset = ((offset + 0x1000) & ~0x1fff);
5571 
5572       switch (GET_CODE (XEXP (x, 0)))
5573 	{
5574 	case PLUS:
5575 	  if (offset < 0)
5576 	    {
5577 	      offset = -offset;
5578 	      sep = "-";
5579 	    }
5580 	  else
5581 	    sep = "+";
5582 	  break;
5583 
5584 	case MINUS:
5585 	  gcc_assert (GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF);
5586 	  sep = "-";
5587 	  break;
5588 
5589 	default:
5590 	  gcc_unreachable ();
5591 	}
5592 
5593       if (!read_only_operand (base, VOIDmode) && !flag_pic)
5594 	fputs ("-$global$", file);
5595       if (offset)
5596 	fprintf (file, "%s%d", sep, offset);
5597     }
5598   else
5599     output_addr_const (file, x);
5600 }
5601 
5602 /* Output boilerplate text to appear at the beginning of the file.
5603    There are several possible versions.  */
5604 #define aputs(x) fputs(x, asm_out_file)
5605 static inline void
pa_file_start_level(void)5606 pa_file_start_level (void)
5607 {
5608   if (TARGET_64BIT)
5609     aputs ("\t.LEVEL 2.0w\n");
5610   else if (TARGET_PA_20)
5611     aputs ("\t.LEVEL 2.0\n");
5612   else if (TARGET_PA_11)
5613     aputs ("\t.LEVEL 1.1\n");
5614   else
5615     aputs ("\t.LEVEL 1.0\n");
5616 }
5617 
5618 static inline void
pa_file_start_space(int sortspace)5619 pa_file_start_space (int sortspace)
5620 {
5621   aputs ("\t.SPACE $PRIVATE$");
5622   if (sortspace)
5623     aputs (",SORT=16");
5624   aputs ("\n\t.SUBSPA $DATA$,QUAD=1,ALIGN=8,ACCESS=31");
5625   if (flag_tm)
5626     aputs ("\n\t.SUBSPA $TM_CLONE_TABLE$,QUAD=1,ALIGN=8,ACCESS=31");
5627   aputs ("\n\t.SUBSPA $BSS$,QUAD=1,ALIGN=8,ACCESS=31,ZERO,SORT=82"
5628 	 "\n\t.SPACE $TEXT$");
5629   if (sortspace)
5630     aputs (",SORT=8");
5631   aputs ("\n\t.SUBSPA $LIT$,QUAD=0,ALIGN=8,ACCESS=44"
5632 	 "\n\t.SUBSPA $CODE$,QUAD=0,ALIGN=8,ACCESS=44,CODE_ONLY\n");
5633 }
5634 
5635 static inline void
pa_file_start_file(int want_version)5636 pa_file_start_file (int want_version)
5637 {
5638   if (write_symbols != NO_DEBUG)
5639     {
5640       output_file_directive (asm_out_file, main_input_filename);
5641       if (want_version)
5642 	aputs ("\t.version\t\"01.01\"\n");
5643     }
5644 }
5645 
5646 static inline void
pa_file_start_mcount(const char * aswhat)5647 pa_file_start_mcount (const char *aswhat)
5648 {
5649   if (profile_flag)
5650     fprintf (asm_out_file, "\t.IMPORT _mcount,%s\n", aswhat);
5651 }
5652 
5653 static void
pa_elf_file_start(void)5654 pa_elf_file_start (void)
5655 {
5656   pa_file_start_level ();
5657   pa_file_start_mcount ("ENTRY");
5658   pa_file_start_file (0);
5659 }
5660 
5661 static void
pa_som_file_start(void)5662 pa_som_file_start (void)
5663 {
5664   pa_file_start_level ();
5665   pa_file_start_space (0);
5666   aputs ("\t.IMPORT $global$,DATA\n"
5667          "\t.IMPORT $$dyncall,MILLICODE\n");
5668   pa_file_start_mcount ("CODE");
5669   pa_file_start_file (0);
5670 }
5671 
5672 static void
pa_linux_file_start(void)5673 pa_linux_file_start (void)
5674 {
5675   pa_file_start_file (1);
5676   pa_file_start_level ();
5677   pa_file_start_mcount ("CODE");
5678 }
5679 
5680 static void
pa_hpux64_gas_file_start(void)5681 pa_hpux64_gas_file_start (void)
5682 {
5683   pa_file_start_level ();
5684 #ifdef ASM_OUTPUT_TYPE_DIRECTIVE
5685   if (profile_flag)
5686     ASM_OUTPUT_TYPE_DIRECTIVE (asm_out_file, "_mcount", "function");
5687 #endif
5688   pa_file_start_file (1);
5689 }
5690 
5691 static void
pa_hpux64_hpas_file_start(void)5692 pa_hpux64_hpas_file_start (void)
5693 {
5694   pa_file_start_level ();
5695   pa_file_start_space (1);
5696   pa_file_start_mcount ("CODE");
5697   pa_file_start_file (0);
5698 }
5699 #undef aputs
5700 
5701 /* Search the deferred plabel list for SYMBOL and return its internal
5702    label.  If an entry for SYMBOL is not found, a new entry is created.  */
5703 
5704 rtx
pa_get_deferred_plabel(rtx symbol)5705 pa_get_deferred_plabel (rtx symbol)
5706 {
5707   const char *fname = XSTR (symbol, 0);
5708   size_t i;
5709 
5710   /* See if we have already put this function on the list of deferred
5711      plabels.  This list is generally small, so a liner search is not
5712      too ugly.  If it proves too slow replace it with something faster.  */
5713   for (i = 0; i < n_deferred_plabels; i++)
5714     if (strcmp (fname, XSTR (deferred_plabels[i].symbol, 0)) == 0)
5715       break;
5716 
5717   /* If the deferred plabel list is empty, or this entry was not found
5718      on the list, create a new entry on the list.  */
5719   if (deferred_plabels == NULL || i == n_deferred_plabels)
5720     {
5721       tree id;
5722 
5723       if (deferred_plabels == 0)
5724 	deferred_plabels =  ggc_alloc<deferred_plabel> ();
5725       else
5726         deferred_plabels = GGC_RESIZEVEC (struct deferred_plabel,
5727                                           deferred_plabels,
5728                                           n_deferred_plabels + 1);
5729 
5730       i = n_deferred_plabels++;
5731       deferred_plabels[i].internal_label = gen_label_rtx ();
5732       deferred_plabels[i].symbol = symbol;
5733 
5734       /* Gross.  We have just implicitly taken the address of this
5735 	 function.  Mark it in the same manner as assemble_name.  */
5736       id = maybe_get_identifier (targetm.strip_name_encoding (fname));
5737       if (id)
5738 	mark_referenced (id);
5739     }
5740 
5741   return deferred_plabels[i].internal_label;
5742 }
5743 
5744 static void
output_deferred_plabels(void)5745 output_deferred_plabels (void)
5746 {
5747   size_t i;
5748 
5749   /* If we have some deferred plabels, then we need to switch into the
5750      data or readonly data section, and align it to a 4 byte boundary
5751      before outputting the deferred plabels.  */
5752   if (n_deferred_plabels)
5753     {
5754       switch_to_section (flag_pic ? data_section : readonly_data_section);
5755       ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
5756     }
5757 
5758   /* Now output the deferred plabels.  */
5759   for (i = 0; i < n_deferred_plabels; i++)
5760     {
5761       targetm.asm_out.internal_label (asm_out_file, "L",
5762 		 CODE_LABEL_NUMBER (deferred_plabels[i].internal_label));
5763       assemble_integer (deferred_plabels[i].symbol,
5764 			TARGET_64BIT ? 8 : 4, TARGET_64BIT ? 64 : 32, 1);
5765     }
5766 }
5767 
5768 /* Initialize optabs to point to emulation routines.  */
5769 
5770 static void
pa_init_libfuncs(void)5771 pa_init_libfuncs (void)
5772 {
5773   if (HPUX_LONG_DOUBLE_LIBRARY)
5774     {
5775       set_optab_libfunc (add_optab, TFmode, "_U_Qfadd");
5776       set_optab_libfunc (sub_optab, TFmode, "_U_Qfsub");
5777       set_optab_libfunc (smul_optab, TFmode, "_U_Qfmpy");
5778       set_optab_libfunc (sdiv_optab, TFmode, "_U_Qfdiv");
5779       set_optab_libfunc (smin_optab, TFmode, "_U_Qmin");
5780       set_optab_libfunc (smax_optab, TFmode, "_U_Qfmax");
5781       set_optab_libfunc (sqrt_optab, TFmode, "_U_Qfsqrt");
5782       set_optab_libfunc (abs_optab, TFmode, "_U_Qfabs");
5783       set_optab_libfunc (neg_optab, TFmode, "_U_Qfneg");
5784 
5785       set_optab_libfunc (eq_optab, TFmode, "_U_Qfeq");
5786       set_optab_libfunc (ne_optab, TFmode, "_U_Qfne");
5787       set_optab_libfunc (gt_optab, TFmode, "_U_Qfgt");
5788       set_optab_libfunc (ge_optab, TFmode, "_U_Qfge");
5789       set_optab_libfunc (lt_optab, TFmode, "_U_Qflt");
5790       set_optab_libfunc (le_optab, TFmode, "_U_Qfle");
5791       set_optab_libfunc (unord_optab, TFmode, "_U_Qfunord");
5792 
5793       set_conv_libfunc (sext_optab, TFmode, SFmode, "_U_Qfcnvff_sgl_to_quad");
5794       set_conv_libfunc (sext_optab, TFmode, DFmode, "_U_Qfcnvff_dbl_to_quad");
5795       set_conv_libfunc (trunc_optab, SFmode, TFmode, "_U_Qfcnvff_quad_to_sgl");
5796       set_conv_libfunc (trunc_optab, DFmode, TFmode, "_U_Qfcnvff_quad_to_dbl");
5797 
5798       set_conv_libfunc (sfix_optab, SImode, TFmode,
5799 			TARGET_64BIT ? "__U_Qfcnvfxt_quad_to_sgl"
5800 				     : "_U_Qfcnvfxt_quad_to_sgl");
5801       set_conv_libfunc (sfix_optab, DImode, TFmode,
5802 			"_U_Qfcnvfxt_quad_to_dbl");
5803       set_conv_libfunc (ufix_optab, SImode, TFmode,
5804 			"_U_Qfcnvfxt_quad_to_usgl");
5805       set_conv_libfunc (ufix_optab, DImode, TFmode,
5806 			"_U_Qfcnvfxt_quad_to_udbl");
5807 
5808       set_conv_libfunc (sfloat_optab, TFmode, SImode,
5809 			"_U_Qfcnvxf_sgl_to_quad");
5810       set_conv_libfunc (sfloat_optab, TFmode, DImode,
5811 			"_U_Qfcnvxf_dbl_to_quad");
5812       set_conv_libfunc (ufloat_optab, TFmode, SImode,
5813 			"_U_Qfcnvxf_usgl_to_quad");
5814       set_conv_libfunc (ufloat_optab, TFmode, DImode,
5815 			"_U_Qfcnvxf_udbl_to_quad");
5816     }
5817 
5818   if (TARGET_SYNC_LIBCALL)
5819     init_sync_libfuncs (8);
5820 }
5821 
5822 /* HP's millicode routines mean something special to the assembler.
5823    Keep track of which ones we have used.  */
5824 
5825 enum millicodes { remI, remU, divI, divU, mulI, end1000 };
5826 static void import_milli (enum millicodes);
5827 static char imported[(int) end1000];
5828 static const char * const milli_names[] = {"remI", "remU", "divI", "divU", "mulI"};
5829 static const char import_string[] = ".IMPORT $$....,MILLICODE";
5830 #define MILLI_START 10
5831 
5832 static void
import_milli(enum millicodes code)5833 import_milli (enum millicodes code)
5834 {
5835   char str[sizeof (import_string)];
5836 
5837   if (!imported[(int) code])
5838     {
5839       imported[(int) code] = 1;
5840       strcpy (str, import_string);
5841       strncpy (str + MILLI_START, milli_names[(int) code], 4);
5842       output_asm_insn (str, 0);
5843     }
5844 }
5845 
5846 /* The register constraints have put the operands and return value in
5847    the proper registers.  */
5848 
5849 const char *
pa_output_mul_insn(int unsignedp ATTRIBUTE_UNUSED,rtx_insn * insn)5850 pa_output_mul_insn (int unsignedp ATTRIBUTE_UNUSED, rtx_insn *insn)
5851 {
5852   import_milli (mulI);
5853   return pa_output_millicode_call (insn, gen_rtx_SYMBOL_REF (Pmode, "$$mulI"));
5854 }
5855 
5856 /* Emit the rtl for doing a division by a constant.  */
5857 
5858 /* Do magic division millicodes exist for this value? */
5859 const int pa_magic_milli[]= {0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1};
5860 
5861 /* We'll use an array to keep track of the magic millicodes and
5862    whether or not we've used them already. [n][0] is signed, [n][1] is
5863    unsigned.  */
5864 
5865 static int div_milli[16][2];
5866 
5867 int
pa_emit_hpdiv_const(rtx * operands,int unsignedp)5868 pa_emit_hpdiv_const (rtx *operands, int unsignedp)
5869 {
5870   if (GET_CODE (operands[2]) == CONST_INT
5871       && INTVAL (operands[2]) > 0
5872       && INTVAL (operands[2]) < 16
5873       && pa_magic_milli[INTVAL (operands[2])])
5874     {
5875       rtx ret = gen_rtx_REG (SImode, TARGET_64BIT ? 2 : 31);
5876 
5877       emit_move_insn (gen_rtx_REG (SImode, 26), operands[1]);
5878       emit
5879 	(gen_rtx_PARALLEL
5880 	 (VOIDmode,
5881 	  gen_rtvec (6, gen_rtx_SET (gen_rtx_REG (SImode, 29),
5882 				     gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
5883 						     SImode,
5884 						     gen_rtx_REG (SImode, 26),
5885 						     operands[2])),
5886 		     gen_rtx_CLOBBER (VOIDmode, operands[4]),
5887 		     gen_rtx_CLOBBER (VOIDmode, operands[3]),
5888 		     gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 26)),
5889 		     gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 25)),
5890 		     gen_rtx_CLOBBER (VOIDmode, ret))));
5891       emit_move_insn (operands[0], gen_rtx_REG (SImode, 29));
5892       return 1;
5893     }
5894   return 0;
5895 }
5896 
5897 const char *
pa_output_div_insn(rtx * operands,int unsignedp,rtx_insn * insn)5898 pa_output_div_insn (rtx *operands, int unsignedp, rtx_insn *insn)
5899 {
5900   int divisor;
5901 
5902   /* If the divisor is a constant, try to use one of the special
5903      opcodes .*/
5904   if (GET_CODE (operands[0]) == CONST_INT)
5905     {
5906       static char buf[100];
5907       divisor = INTVAL (operands[0]);
5908       if (!div_milli[divisor][unsignedp])
5909 	{
5910 	  div_milli[divisor][unsignedp] = 1;
5911 	  if (unsignedp)
5912 	    output_asm_insn (".IMPORT $$divU_%0,MILLICODE", operands);
5913 	  else
5914 	    output_asm_insn (".IMPORT $$divI_%0,MILLICODE", operands);
5915 	}
5916       if (unsignedp)
5917 	{
5918 	  sprintf (buf, "$$divU_" HOST_WIDE_INT_PRINT_DEC,
5919 		   INTVAL (operands[0]));
5920 	  return pa_output_millicode_call (insn,
5921 					   gen_rtx_SYMBOL_REF (SImode, buf));
5922 	}
5923       else
5924 	{
5925 	  sprintf (buf, "$$divI_" HOST_WIDE_INT_PRINT_DEC,
5926 		   INTVAL (operands[0]));
5927 	  return pa_output_millicode_call (insn,
5928 					   gen_rtx_SYMBOL_REF (SImode, buf));
5929 	}
5930     }
5931   /* Divisor isn't a special constant.  */
5932   else
5933     {
5934       if (unsignedp)
5935 	{
5936 	  import_milli (divU);
5937 	  return pa_output_millicode_call (insn,
5938 					gen_rtx_SYMBOL_REF (SImode, "$$divU"));
5939 	}
5940       else
5941 	{
5942 	  import_milli (divI);
5943 	  return pa_output_millicode_call (insn,
5944 					gen_rtx_SYMBOL_REF (SImode, "$$divI"));
5945 	}
5946     }
5947 }
5948 
5949 /* Output a $$rem millicode to do mod.  */
5950 
5951 const char *
pa_output_mod_insn(int unsignedp,rtx_insn * insn)5952 pa_output_mod_insn (int unsignedp, rtx_insn *insn)
5953 {
5954   if (unsignedp)
5955     {
5956       import_milli (remU);
5957       return pa_output_millicode_call (insn,
5958 				       gen_rtx_SYMBOL_REF (SImode, "$$remU"));
5959     }
5960   else
5961     {
5962       import_milli (remI);
5963       return pa_output_millicode_call (insn,
5964 				       gen_rtx_SYMBOL_REF (SImode, "$$remI"));
5965     }
5966 }
5967 
5968 void
pa_output_arg_descriptor(rtx_insn * call_insn)5969 pa_output_arg_descriptor (rtx_insn *call_insn)
5970 {
5971   const char *arg_regs[4];
5972   machine_mode arg_mode;
5973   rtx link;
5974   int i, output_flag = 0;
5975   int regno;
5976 
5977   /* We neither need nor want argument location descriptors for the
5978      64bit runtime environment or the ELF32 environment.  */
5979   if (TARGET_64BIT || TARGET_ELF32)
5980     return;
5981 
5982   for (i = 0; i < 4; i++)
5983     arg_regs[i] = 0;
5984 
5985   /* Specify explicitly that no argument relocations should take place
5986      if using the portable runtime calling conventions.  */
5987   if (TARGET_PORTABLE_RUNTIME)
5988     {
5989       fputs ("\t.CALL ARGW0=NO,ARGW1=NO,ARGW2=NO,ARGW3=NO,RETVAL=NO\n",
5990 	     asm_out_file);
5991       return;
5992     }
5993 
5994   gcc_assert (CALL_P (call_insn));
5995   for (link = CALL_INSN_FUNCTION_USAGE (call_insn);
5996        link; link = XEXP (link, 1))
5997     {
5998       rtx use = XEXP (link, 0);
5999 
6000       if (! (GET_CODE (use) == USE
6001 	     && GET_CODE (XEXP (use, 0)) == REG
6002 	     && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
6003 	continue;
6004 
6005       arg_mode = GET_MODE (XEXP (use, 0));
6006       regno = REGNO (XEXP (use, 0));
6007       if (regno >= 23 && regno <= 26)
6008 	{
6009 	  arg_regs[26 - regno] = "GR";
6010 	  if (arg_mode == DImode)
6011 	    arg_regs[25 - regno] = "GR";
6012 	}
6013       else if (regno >= 32 && regno <= 39)
6014 	{
6015 	  if (arg_mode == SFmode)
6016 	    arg_regs[(regno - 32) / 2] = "FR";
6017 	  else
6018 	    {
6019 #ifndef HP_FP_ARG_DESCRIPTOR_REVERSED
6020 	      arg_regs[(regno - 34) / 2] = "FR";
6021 	      arg_regs[(regno - 34) / 2 + 1] = "FU";
6022 #else
6023 	      arg_regs[(regno - 34) / 2] = "FU";
6024 	      arg_regs[(regno - 34) / 2 + 1] = "FR";
6025 #endif
6026 	    }
6027 	}
6028     }
6029   fputs ("\t.CALL ", asm_out_file);
6030   for (i = 0; i < 4; i++)
6031     {
6032       if (arg_regs[i])
6033 	{
6034 	  if (output_flag++)
6035 	    fputc (',', asm_out_file);
6036 	  fprintf (asm_out_file, "ARGW%d=%s", i, arg_regs[i]);
6037 	}
6038     }
6039   fputc ('\n', asm_out_file);
6040 }
6041 
6042 /* Inform reload about cases where moving X with a mode MODE to or from
6043    a register in RCLASS requires an extra scratch or immediate register.
6044    Return the class needed for the immediate register.  */
6045 
6046 static reg_class_t
pa_secondary_reload(bool in_p,rtx x,reg_class_t rclass_i,machine_mode mode,secondary_reload_info * sri)6047 pa_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
6048 		     machine_mode mode, secondary_reload_info *sri)
6049 {
6050   int regno;
6051   enum reg_class rclass = (enum reg_class) rclass_i;
6052 
6053   /* Handle the easy stuff first.  */
6054   if (rclass == R1_REGS)
6055     return NO_REGS;
6056 
6057   if (REG_P (x))
6058     {
6059       regno = REGNO (x);
6060       if (rclass == BASE_REG_CLASS && regno < FIRST_PSEUDO_REGISTER)
6061 	return NO_REGS;
6062     }
6063   else
6064     regno = -1;
6065 
6066   /* If we have something like (mem (mem (...)), we can safely assume the
6067      inner MEM will end up in a general register after reloading, so there's
6068      no need for a secondary reload.  */
6069   if (GET_CODE (x) == MEM && GET_CODE (XEXP (x, 0)) == MEM)
6070     return NO_REGS;
6071 
6072   /* Trying to load a constant into a FP register during PIC code
6073      generation requires %r1 as a scratch register.  For float modes,
6074      the only legitimate constant is CONST0_RTX.  However, there are
6075      a few patterns that accept constant double operands.  */
6076   if (flag_pic
6077       && FP_REG_CLASS_P (rclass)
6078       && (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE))
6079     {
6080       switch (mode)
6081 	{
6082 	case SImode:
6083 	  sri->icode = CODE_FOR_reload_insi_r1;
6084 	  break;
6085 
6086 	case DImode:
6087 	  sri->icode = CODE_FOR_reload_indi_r1;
6088 	  break;
6089 
6090 	case SFmode:
6091 	  sri->icode = CODE_FOR_reload_insf_r1;
6092 	  break;
6093 
6094 	case DFmode:
6095 	  sri->icode = CODE_FOR_reload_indf_r1;
6096 	  break;
6097 
6098 	default:
6099 	  gcc_unreachable ();
6100 	}
6101       return NO_REGS;
6102     }
6103 
6104   /* Secondary reloads of symbolic expressions require %r1 as a scratch
6105      register when we're generating PIC code or when the operand isn't
6106      readonly.  */
6107   if (pa_symbolic_expression_p (x))
6108     {
6109       if (GET_CODE (x) == HIGH)
6110 	x = XEXP (x, 0);
6111 
6112       if (flag_pic || !read_only_operand (x, VOIDmode))
6113 	{
6114 	  switch (mode)
6115 	    {
6116 	    case SImode:
6117 	      sri->icode = CODE_FOR_reload_insi_r1;
6118 	      break;
6119 
6120 	    case DImode:
6121 	      sri->icode = CODE_FOR_reload_indi_r1;
6122 	      break;
6123 
6124 	    default:
6125 	      gcc_unreachable ();
6126 	    }
6127 	  return NO_REGS;
6128 	}
6129     }
6130 
6131   /* Profiling showed the PA port spends about 1.3% of its compilation
6132      time in true_regnum from calls inside pa_secondary_reload_class.  */
6133   if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
6134     regno = true_regnum (x);
6135 
6136   /* Handle reloads for floating point loads and stores.  */
6137   if ((regno >= FIRST_PSEUDO_REGISTER || regno == -1)
6138       && FP_REG_CLASS_P (rclass))
6139     {
6140       if (MEM_P (x))
6141 	{
6142 	  x = XEXP (x, 0);
6143 
6144 	  /* We don't need a secondary reload for indexed memory addresses.
6145 
6146 	     When INT14_OK_STRICT is true, it might appear that we could
6147 	     directly allow register indirect memory addresses.  However,
6148 	     this doesn't work because we don't support SUBREGs in
6149 	     floating-point register copies and reload doesn't tell us
6150 	     when it's going to use a SUBREG.  */
6151 	  if (IS_INDEX_ADDR_P (x))
6152 	    return NO_REGS;
6153 	}
6154 
6155       /* Request a secondary reload with a general scratch register
6156 	 for everything else.  ??? Could symbolic operands be handled
6157 	 directly when generating non-pic PA 2.0 code?  */
6158       sri->icode = (in_p
6159 		    ? direct_optab_handler (reload_in_optab, mode)
6160 		    : direct_optab_handler (reload_out_optab, mode));
6161       return NO_REGS;
6162     }
6163 
6164   /* A SAR<->FP register copy requires an intermediate general register
6165      and secondary memory.  We need a secondary reload with a general
6166      scratch register for spills.  */
6167   if (rclass == SHIFT_REGS)
6168     {
6169       /* Handle spill.  */
6170       if (regno >= FIRST_PSEUDO_REGISTER || regno < 0)
6171 	{
6172 	  sri->icode = (in_p
6173 			? direct_optab_handler (reload_in_optab, mode)
6174 			: direct_optab_handler (reload_out_optab, mode));
6175 	  return NO_REGS;
6176 	}
6177 
6178       /* Handle FP copy.  */
6179       if (FP_REG_CLASS_P (REGNO_REG_CLASS (regno)))
6180 	return GENERAL_REGS;
6181     }
6182 
6183   if (regno >= 0 && regno < FIRST_PSEUDO_REGISTER
6184       && REGNO_REG_CLASS (regno) == SHIFT_REGS
6185       && FP_REG_CLASS_P (rclass))
6186     return GENERAL_REGS;
6187 
6188   return NO_REGS;
6189 }
6190 
6191 /* Implement TARGET_EXTRA_LIVE_ON_ENTRY.  The argument pointer
6192    is only marked as live on entry by df-scan when it is a fixed
6193    register.  It isn't a fixed register in the 64-bit runtime,
6194    so we need to mark it here.  */
6195 
6196 static void
pa_extra_live_on_entry(bitmap regs)6197 pa_extra_live_on_entry (bitmap regs)
6198 {
6199   if (TARGET_64BIT)
6200     bitmap_set_bit (regs, ARG_POINTER_REGNUM);
6201 }
6202 
6203 /* Implement EH_RETURN_HANDLER_RTX.  The MEM needs to be volatile
6204    to prevent it from being deleted.  */
6205 
6206 rtx
pa_eh_return_handler_rtx(void)6207 pa_eh_return_handler_rtx (void)
6208 {
6209   rtx tmp;
6210 
6211   tmp = gen_rtx_PLUS (word_mode, hard_frame_pointer_rtx,
6212 		      TARGET_64BIT ? GEN_INT (-16) : GEN_INT (-20));
6213   tmp = gen_rtx_MEM (word_mode, tmp);
6214   tmp->volatil = 1;
6215   return tmp;
6216 }
6217 
6218 /* In the 32-bit runtime, arguments larger than eight bytes are passed
6219    by invisible reference.  As a GCC extension, we also pass anything
6220    with a zero or variable size by reference.
6221 
6222    The 64-bit runtime does not describe passing any types by invisible
6223    reference.  The internals of GCC can't currently handle passing
6224    empty structures, and zero or variable length arrays when they are
6225    not passed entirely on the stack or by reference.  Thus, as a GCC
6226    extension, we pass these types by reference.  The HP compiler doesn't
6227    support these types, so hopefully there shouldn't be any compatibility
6228    issues.  This may have to be revisited when HP releases a C99 compiler
6229    or updates the ABI.  */
6230 
6231 static bool
pa_pass_by_reference(cumulative_args_t ca ATTRIBUTE_UNUSED,machine_mode mode,const_tree type,bool named ATTRIBUTE_UNUSED)6232 pa_pass_by_reference (cumulative_args_t ca ATTRIBUTE_UNUSED,
6233 		      machine_mode mode, const_tree type,
6234 		      bool named ATTRIBUTE_UNUSED)
6235 {
6236   HOST_WIDE_INT size;
6237 
6238   if (type)
6239     size = int_size_in_bytes (type);
6240   else
6241     size = GET_MODE_SIZE (mode);
6242 
6243   if (TARGET_64BIT)
6244     return size <= 0;
6245   else
6246     return size <= 0 || size > 8;
6247 }
6248 
6249 enum direction
pa_function_arg_padding(machine_mode mode,const_tree type)6250 pa_function_arg_padding (machine_mode mode, const_tree type)
6251 {
6252   if (mode == BLKmode
6253       || (TARGET_64BIT
6254 	  && type
6255 	  && (AGGREGATE_TYPE_P (type)
6256 	      || TREE_CODE (type) == COMPLEX_TYPE
6257 	      || TREE_CODE (type) == VECTOR_TYPE)))
6258     {
6259       /* Return none if justification is not required.  */
6260       if (type
6261 	  && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
6262 	  && (int_size_in_bytes (type) * BITS_PER_UNIT) % PARM_BOUNDARY == 0)
6263 	return none;
6264 
6265       /* The directions set here are ignored when a BLKmode argument larger
6266 	 than a word is placed in a register.  Different code is used for
6267 	 the stack and registers.  This makes it difficult to have a
6268 	 consistent data representation for both the stack and registers.
6269 	 For both runtimes, the justification and padding for arguments on
6270 	 the stack and in registers should be identical.  */
6271       if (TARGET_64BIT)
6272 	/* The 64-bit runtime specifies left justification for aggregates.  */
6273         return upward;
6274       else
6275 	/* The 32-bit runtime architecture specifies right justification.
6276 	   When the argument is passed on the stack, the argument is padded
6277 	   with garbage on the left.  The HP compiler pads with zeros.  */
6278 	return downward;
6279     }
6280 
6281   if (GET_MODE_BITSIZE (mode) < PARM_BOUNDARY)
6282     return downward;
6283   else
6284     return none;
6285 }
6286 
6287 
6288 /* Do what is necessary for `va_start'.  We look at the current function
6289    to determine if stdargs or varargs is used and fill in an initial
6290    va_list.  A pointer to this constructor is returned.  */
6291 
6292 static rtx
hppa_builtin_saveregs(void)6293 hppa_builtin_saveregs (void)
6294 {
6295   rtx offset, dest;
6296   tree fntype = TREE_TYPE (current_function_decl);
6297   int argadj = ((!stdarg_p (fntype))
6298 		? UNITS_PER_WORD : 0);
6299 
6300   if (argadj)
6301     offset = plus_constant (Pmode, crtl->args.arg_offset_rtx, argadj);
6302   else
6303     offset = crtl->args.arg_offset_rtx;
6304 
6305   if (TARGET_64BIT)
6306     {
6307       int i, off;
6308 
6309       /* Adjust for varargs/stdarg differences.  */
6310       if (argadj)
6311 	offset = plus_constant (Pmode, crtl->args.arg_offset_rtx, -argadj);
6312       else
6313 	offset = crtl->args.arg_offset_rtx;
6314 
6315       /* We need to save %r26 .. %r19 inclusive starting at offset -64
6316 	 from the incoming arg pointer and growing to larger addresses.  */
6317       for (i = 26, off = -64; i >= 19; i--, off += 8)
6318 	emit_move_insn (gen_rtx_MEM (word_mode,
6319 				     plus_constant (Pmode,
6320 						    arg_pointer_rtx, off)),
6321 			gen_rtx_REG (word_mode, i));
6322 
6323       /* The incoming args pointer points just beyond the flushback area;
6324 	 normally this is not a serious concern.  However, when we are doing
6325 	 varargs/stdargs we want to make the arg pointer point to the start
6326 	 of the incoming argument area.  */
6327       emit_move_insn (virtual_incoming_args_rtx,
6328 		      plus_constant (Pmode, arg_pointer_rtx, -64));
6329 
6330       /* Now return a pointer to the first anonymous argument.  */
6331       return copy_to_reg (expand_binop (Pmode, add_optab,
6332 					virtual_incoming_args_rtx,
6333 					offset, 0, 0, OPTAB_LIB_WIDEN));
6334     }
6335 
6336   /* Store general registers on the stack.  */
6337   dest = gen_rtx_MEM (BLKmode,
6338 		      plus_constant (Pmode, crtl->args.internal_arg_pointer,
6339 				     -16));
6340   set_mem_alias_set (dest, get_varargs_alias_set ());
6341   set_mem_align (dest, BITS_PER_WORD);
6342   move_block_from_reg (23, dest, 4);
6343 
6344   /* move_block_from_reg will emit code to store the argument registers
6345      individually as scalar stores.
6346 
6347      However, other insns may later load from the same addresses for
6348      a structure load (passing a struct to a varargs routine).
6349 
6350      The alias code assumes that such aliasing can never happen, so we
6351      have to keep memory referencing insns from moving up beyond the
6352      last argument register store.  So we emit a blockage insn here.  */
6353   emit_insn (gen_blockage ());
6354 
6355   return copy_to_reg (expand_binop (Pmode, add_optab,
6356 				    crtl->args.internal_arg_pointer,
6357 				    offset, 0, 0, OPTAB_LIB_WIDEN));
6358 }
6359 
6360 static void
hppa_va_start(tree valist,rtx nextarg)6361 hppa_va_start (tree valist, rtx nextarg)
6362 {
6363   nextarg = expand_builtin_saveregs ();
6364   std_expand_builtin_va_start (valist, nextarg);
6365 }
6366 
6367 static tree
hppa_gimplify_va_arg_expr(tree valist,tree type,gimple_seq * pre_p,gimple_seq * post_p)6368 hppa_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
6369 			   gimple_seq *post_p)
6370 {
6371   if (TARGET_64BIT)
6372     {
6373       /* Args grow upward.  We can use the generic routines.  */
6374       return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
6375     }
6376   else /* !TARGET_64BIT */
6377     {
6378       tree ptr = build_pointer_type (type);
6379       tree valist_type;
6380       tree t, u;
6381       unsigned int size, ofs;
6382       bool indirect;
6383 
6384       indirect = pass_by_reference (NULL, TYPE_MODE (type), type, 0);
6385       if (indirect)
6386 	{
6387 	  type = ptr;
6388 	  ptr = build_pointer_type (type);
6389 	}
6390       size = int_size_in_bytes (type);
6391       valist_type = TREE_TYPE (valist);
6392 
6393       /* Args grow down.  Not handled by generic routines.  */
6394 
6395       u = fold_convert (sizetype, size_in_bytes (type));
6396       u = fold_build1 (NEGATE_EXPR, sizetype, u);
6397       t = fold_build_pointer_plus (valist, u);
6398 
6399       /* Align to 4 or 8 byte boundary depending on argument size.  */
6400 
6401       u = build_int_cst (TREE_TYPE (t), (HOST_WIDE_INT)(size > 4 ? -8 : -4));
6402       t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t, u);
6403       t = fold_convert (valist_type, t);
6404 
6405       t = build2 (MODIFY_EXPR, valist_type, valist, t);
6406 
6407       ofs = (8 - size) % 4;
6408       if (ofs != 0)
6409 	t = fold_build_pointer_plus_hwi (t, ofs);
6410 
6411       t = fold_convert (ptr, t);
6412       t = build_va_arg_indirect_ref (t);
6413 
6414       if (indirect)
6415 	t = build_va_arg_indirect_ref (t);
6416 
6417       return t;
6418     }
6419 }
6420 
6421 /* True if MODE is valid for the target.  By "valid", we mean able to
6422    be manipulated in non-trivial ways.  In particular, this means all
6423    the arithmetic is supported.
6424 
6425    Currently, TImode is not valid as the HP 64-bit runtime documentation
6426    doesn't document the alignment and calling conventions for this type.
6427    Thus, we return false when PRECISION is 2 * BITS_PER_WORD and
6428    2 * BITS_PER_WORD isn't equal LONG_LONG_TYPE_SIZE.  */
6429 
6430 static bool
pa_scalar_mode_supported_p(machine_mode mode)6431 pa_scalar_mode_supported_p (machine_mode mode)
6432 {
6433   int precision = GET_MODE_PRECISION (mode);
6434 
6435   switch (GET_MODE_CLASS (mode))
6436     {
6437     case MODE_PARTIAL_INT:
6438     case MODE_INT:
6439       if (precision == CHAR_TYPE_SIZE)
6440 	return true;
6441       if (precision == SHORT_TYPE_SIZE)
6442 	return true;
6443       if (precision == INT_TYPE_SIZE)
6444 	return true;
6445       if (precision == LONG_TYPE_SIZE)
6446 	return true;
6447       if (precision == LONG_LONG_TYPE_SIZE)
6448 	return true;
6449       return false;
6450 
6451     case MODE_FLOAT:
6452       if (precision == FLOAT_TYPE_SIZE)
6453 	return true;
6454       if (precision == DOUBLE_TYPE_SIZE)
6455 	return true;
6456       if (precision == LONG_DOUBLE_TYPE_SIZE)
6457 	return true;
6458       return false;
6459 
6460     case MODE_DECIMAL_FLOAT:
6461       return false;
6462 
6463     default:
6464       gcc_unreachable ();
6465     }
6466 }
6467 
6468 /* Return TRUE if INSN, a jump insn, has an unfilled delay slot and
6469    it branches into the delay slot.  Otherwise, return FALSE.  */
6470 
6471 static bool
branch_to_delay_slot_p(rtx_insn * insn)6472 branch_to_delay_slot_p (rtx_insn *insn)
6473 {
6474   rtx_insn *jump_insn;
6475 
6476   if (dbr_sequence_length ())
6477     return FALSE;
6478 
6479   jump_insn = next_active_insn (JUMP_LABEL (insn));
6480   while (insn)
6481     {
6482       insn = next_active_insn (insn);
6483       if (jump_insn == insn)
6484 	return TRUE;
6485 
6486       /* We can't rely on the length of asms.  So, we return FALSE when
6487 	 the branch is followed by an asm.  */
6488       if (!insn
6489 	  || GET_CODE (PATTERN (insn)) == ASM_INPUT
6490 	  || extract_asm_operands (PATTERN (insn)) != NULL_RTX
6491 	  || get_attr_length (insn) > 0)
6492 	break;
6493     }
6494 
6495   return FALSE;
6496 }
6497 
6498 /* Return TRUE if INSN, a forward jump insn, needs a nop in its delay slot.
6499 
6500    This occurs when INSN has an unfilled delay slot and is followed
6501    by an asm.  Disaster can occur if the asm is empty and the jump
6502    branches into the delay slot.  So, we add a nop in the delay slot
6503    when this occurs.  */
6504 
6505 static bool
branch_needs_nop_p(rtx_insn * insn)6506 branch_needs_nop_p (rtx_insn *insn)
6507 {
6508   rtx_insn *jump_insn;
6509 
6510   if (dbr_sequence_length ())
6511     return FALSE;
6512 
6513   jump_insn = next_active_insn (JUMP_LABEL (insn));
6514   while (insn)
6515     {
6516       insn = next_active_insn (insn);
6517       if (!insn || jump_insn == insn)
6518 	return TRUE;
6519 
6520       if (!(GET_CODE (PATTERN (insn)) == ASM_INPUT
6521 	   || extract_asm_operands (PATTERN (insn)) != NULL_RTX)
6522 	  && get_attr_length (insn) > 0)
6523 	break;
6524     }
6525 
6526   return FALSE;
6527 }
6528 
6529 /* Return TRUE if INSN, a forward jump insn, can use nullification
6530    to skip the following instruction.  This avoids an extra cycle due
6531    to a mis-predicted branch when we fall through.  */
6532 
6533 static bool
use_skip_p(rtx_insn * insn)6534 use_skip_p (rtx_insn *insn)
6535 {
6536   rtx_insn *jump_insn = next_active_insn (JUMP_LABEL (insn));
6537 
6538   while (insn)
6539     {
6540       insn = next_active_insn (insn);
6541 
6542       /* We can't rely on the length of asms, so we can't skip asms.  */
6543       if (!insn
6544 	  || GET_CODE (PATTERN (insn)) == ASM_INPUT
6545 	  || extract_asm_operands (PATTERN (insn)) != NULL_RTX)
6546 	break;
6547       if (get_attr_length (insn) == 4
6548 	  && jump_insn == next_active_insn (insn))
6549 	return TRUE;
6550       if (get_attr_length (insn) > 0)
6551 	break;
6552     }
6553 
6554   return FALSE;
6555 }
6556 
6557 /* This routine handles all the normal conditional branch sequences we
6558    might need to generate.  It handles compare immediate vs compare
6559    register, nullification of delay slots, varying length branches,
6560    negated branches, and all combinations of the above.  It returns the
6561    output appropriate to emit the branch corresponding to all given
6562    parameters.  */
6563 
6564 const char *
pa_output_cbranch(rtx * operands,int negated,rtx_insn * insn)6565 pa_output_cbranch (rtx *operands, int negated, rtx_insn *insn)
6566 {
6567   static char buf[100];
6568   bool useskip;
6569   int nullify = INSN_ANNULLED_BRANCH_P (insn);
6570   int length = get_attr_length (insn);
6571   int xdelay;
6572 
6573   /* A conditional branch to the following instruction (e.g. the delay slot)
6574      is asking for a disaster.  This can happen when not optimizing and
6575      when jump optimization fails.
6576 
6577      While it is usually safe to emit nothing, this can fail if the
6578      preceding instruction is a nullified branch with an empty delay
6579      slot and the same branch target as this branch.  We could check
6580      for this but jump optimization should eliminate nop jumps.  It
6581      is always safe to emit a nop.  */
6582   if (branch_to_delay_slot_p (insn))
6583     return "nop";
6584 
6585   /* The doubleword form of the cmpib instruction doesn't have the LEU
6586      and GTU conditions while the cmpb instruction does.  Since we accept
6587      zero for cmpb, we must ensure that we use cmpb for the comparison.  */
6588   if (GET_MODE (operands[1]) == DImode && operands[2] == const0_rtx)
6589     operands[2] = gen_rtx_REG (DImode, 0);
6590   if (GET_MODE (operands[2]) == DImode && operands[1] == const0_rtx)
6591     operands[1] = gen_rtx_REG (DImode, 0);
6592 
6593   /* If this is a long branch with its delay slot unfilled, set `nullify'
6594      as it can nullify the delay slot and save a nop.  */
6595   if (length == 8 && dbr_sequence_length () == 0)
6596     nullify = 1;
6597 
6598   /* If this is a short forward conditional branch which did not get
6599      its delay slot filled, the delay slot can still be nullified.  */
6600   if (! nullify && length == 4 && dbr_sequence_length () == 0)
6601     nullify = forward_branch_p (insn);
6602 
6603   /* A forward branch over a single nullified insn can be done with a
6604      comclr instruction.  This avoids a single cycle penalty due to
6605      mis-predicted branch if we fall through (branch not taken).  */
6606   useskip = (length == 4 && nullify) ? use_skip_p (insn) : FALSE;
6607 
6608   switch (length)
6609     {
6610       /* All short conditional branches except backwards with an unfilled
6611 	 delay slot.  */
6612       case 4:
6613 	if (useskip)
6614 	  strcpy (buf, "{com%I2clr,|cmp%I2clr,}");
6615 	else
6616 	  strcpy (buf, "{com%I2b,|cmp%I2b,}");
6617 	if (GET_MODE (operands[1]) == DImode)
6618 	  strcat (buf, "*");
6619 	if (negated)
6620 	  strcat (buf, "%B3");
6621 	else
6622 	  strcat (buf, "%S3");
6623 	if (useskip)
6624 	  strcat (buf, " %2,%r1,%%r0");
6625 	else if (nullify)
6626 	  {
6627 	    if (branch_needs_nop_p (insn))
6628 	      strcat (buf, ",n %2,%r1,%0%#");
6629 	    else
6630 	      strcat (buf, ",n %2,%r1,%0");
6631 	  }
6632 	else
6633 	  strcat (buf, " %2,%r1,%0");
6634 	break;
6635 
6636      /* All long conditionals.  Note a short backward branch with an
6637 	unfilled delay slot is treated just like a long backward branch
6638 	with an unfilled delay slot.  */
6639       case 8:
6640 	/* Handle weird backwards branch with a filled delay slot
6641 	   which is nullified.  */
6642 	if (dbr_sequence_length () != 0
6643 	    && ! forward_branch_p (insn)
6644 	    && nullify)
6645 	  {
6646 	    strcpy (buf, "{com%I2b,|cmp%I2b,}");
6647 	    if (GET_MODE (operands[1]) == DImode)
6648 	      strcat (buf, "*");
6649 	    if (negated)
6650 	      strcat (buf, "%S3");
6651 	    else
6652 	      strcat (buf, "%B3");
6653 	    strcat (buf, ",n %2,%r1,.+12\n\tb %0");
6654 	  }
6655 	/* Handle short backwards branch with an unfilled delay slot.
6656 	   Using a comb;nop rather than comiclr;bl saves 1 cycle for both
6657 	   taken and untaken branches.  */
6658 	else if (dbr_sequence_length () == 0
6659 		 && ! forward_branch_p (insn)
6660 		 && INSN_ADDRESSES_SET_P ()
6661 		 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6662 				    - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6663 	  {
6664 	    strcpy (buf, "{com%I2b,|cmp%I2b,}");
6665 	    if (GET_MODE (operands[1]) == DImode)
6666 	      strcat (buf, "*");
6667 	    if (negated)
6668 	      strcat (buf, "%B3 %2,%r1,%0%#");
6669 	    else
6670 	      strcat (buf, "%S3 %2,%r1,%0%#");
6671 	  }
6672 	else
6673 	  {
6674 	    strcpy (buf, "{com%I2clr,|cmp%I2clr,}");
6675 	    if (GET_MODE (operands[1]) == DImode)
6676 	      strcat (buf, "*");
6677 	    if (negated)
6678 	      strcat (buf, "%S3");
6679 	    else
6680 	      strcat (buf, "%B3");
6681 	    if (nullify)
6682 	      strcat (buf, " %2,%r1,%%r0\n\tb,n %0");
6683 	    else
6684 	      strcat (buf, " %2,%r1,%%r0\n\tb %0");
6685 	  }
6686 	break;
6687 
6688       default:
6689 	/* The reversed conditional branch must branch over one additional
6690 	   instruction if the delay slot is filled and needs to be extracted
6691 	   by pa_output_lbranch.  If the delay slot is empty or this is a
6692 	   nullified forward branch, the instruction after the reversed
6693 	   condition branch must be nullified.  */
6694 	if (dbr_sequence_length () == 0
6695 	    || (nullify && forward_branch_p (insn)))
6696 	  {
6697 	    nullify = 1;
6698 	    xdelay = 0;
6699 	    operands[4] = GEN_INT (length);
6700 	  }
6701 	else
6702 	  {
6703 	    xdelay = 1;
6704 	    operands[4] = GEN_INT (length + 4);
6705 	  }
6706 
6707 	/* Create a reversed conditional branch which branches around
6708 	   the following insns.  */
6709 	if (GET_MODE (operands[1]) != DImode)
6710 	  {
6711 	    if (nullify)
6712 	      {
6713 		if (negated)
6714 		  strcpy (buf,
6715 		    "{com%I2b,%S3,n %2,%r1,.+%4|cmp%I2b,%S3,n %2,%r1,.+%4}");
6716 		else
6717 		  strcpy (buf,
6718 		    "{com%I2b,%B3,n %2,%r1,.+%4|cmp%I2b,%B3,n %2,%r1,.+%4}");
6719 	      }
6720 	    else
6721 	      {
6722 		if (negated)
6723 		  strcpy (buf,
6724 		    "{com%I2b,%S3 %2,%r1,.+%4|cmp%I2b,%S3 %2,%r1,.+%4}");
6725 		else
6726 		  strcpy (buf,
6727 		    "{com%I2b,%B3 %2,%r1,.+%4|cmp%I2b,%B3 %2,%r1,.+%4}");
6728 	      }
6729 	  }
6730 	else
6731 	  {
6732 	    if (nullify)
6733 	      {
6734 		if (negated)
6735 		  strcpy (buf,
6736 		    "{com%I2b,*%S3,n %2,%r1,.+%4|cmp%I2b,*%S3,n %2,%r1,.+%4}");
6737 		else
6738 		  strcpy (buf,
6739 		    "{com%I2b,*%B3,n %2,%r1,.+%4|cmp%I2b,*%B3,n %2,%r1,.+%4}");
6740 	      }
6741 	    else
6742 	      {
6743 		if (negated)
6744 		  strcpy (buf,
6745 		    "{com%I2b,*%S3 %2,%r1,.+%4|cmp%I2b,*%S3 %2,%r1,.+%4}");
6746 		else
6747 		  strcpy (buf,
6748 		    "{com%I2b,*%B3 %2,%r1,.+%4|cmp%I2b,*%B3 %2,%r1,.+%4}");
6749 	      }
6750 	  }
6751 
6752 	output_asm_insn (buf, operands);
6753 	return pa_output_lbranch (operands[0], insn, xdelay);
6754     }
6755   return buf;
6756 }
6757 
6758 /* This routine handles output of long unconditional branches that
6759    exceed the maximum range of a simple branch instruction.  Since
6760    we don't have a register available for the branch, we save register
6761    %r1 in the frame marker, load the branch destination DEST into %r1,
6762    execute the branch, and restore %r1 in the delay slot of the branch.
6763 
6764    Since long branches may have an insn in the delay slot and the
6765    delay slot is used to restore %r1, we in general need to extract
6766    this insn and execute it before the branch.  However, to facilitate
6767    use of this function by conditional branches, we also provide an
6768    option to not extract the delay insn so that it will be emitted
6769    after the long branch.  So, if there is an insn in the delay slot,
6770    it is extracted if XDELAY is nonzero.
6771 
6772    The lengths of the various long-branch sequences are 20, 16 and 24
6773    bytes for the portable runtime, non-PIC and PIC cases, respectively.  */
6774 
6775 const char *
pa_output_lbranch(rtx dest,rtx_insn * insn,int xdelay)6776 pa_output_lbranch (rtx dest, rtx_insn *insn, int xdelay)
6777 {
6778   rtx xoperands[2];
6779 
6780   xoperands[0] = dest;
6781 
6782   /* First, free up the delay slot.  */
6783   if (xdelay && dbr_sequence_length () != 0)
6784     {
6785       /* We can't handle a jump in the delay slot.  */
6786       gcc_assert (! JUMP_P (NEXT_INSN (insn)));
6787 
6788       final_scan_insn (NEXT_INSN (insn), asm_out_file,
6789 		       optimize, 0, NULL);
6790 
6791       /* Now delete the delay insn.  */
6792       SET_INSN_DELETED (NEXT_INSN (insn));
6793     }
6794 
6795   /* Output an insn to save %r1.  The runtime documentation doesn't
6796      specify whether the "Clean Up" slot in the callers frame can
6797      be clobbered by the callee.  It isn't copied by HP's builtin
6798      alloca, so this suggests that it can be clobbered if necessary.
6799      The "Static Link" location is copied by HP builtin alloca, so
6800      we avoid using it.  Using the cleanup slot might be a problem
6801      if we have to interoperate with languages that pass cleanup
6802      information.  However, it should be possible to handle these
6803      situations with GCC's asm feature.
6804 
6805      The "Current RP" slot is reserved for the called procedure, so
6806      we try to use it when we don't have a frame of our own.  It's
6807      rather unlikely that we won't have a frame when we need to emit
6808      a very long branch.
6809 
6810      Really the way to go long term is a register scavenger; goto
6811      the target of the jump and find a register which we can use
6812      as a scratch to hold the value in %r1.  Then, we wouldn't have
6813      to free up the delay slot or clobber a slot that may be needed
6814      for other purposes.  */
6815   if (TARGET_64BIT)
6816     {
6817       if (actual_fsize == 0 && !df_regs_ever_live_p (2))
6818 	/* Use the return pointer slot in the frame marker.  */
6819 	output_asm_insn ("std %%r1,-16(%%r30)", xoperands);
6820       else
6821 	/* Use the slot at -40 in the frame marker since HP builtin
6822 	   alloca doesn't copy it.  */
6823 	output_asm_insn ("std %%r1,-40(%%r30)", xoperands);
6824     }
6825   else
6826     {
6827       if (actual_fsize == 0 && !df_regs_ever_live_p (2))
6828 	/* Use the return pointer slot in the frame marker.  */
6829 	output_asm_insn ("stw %%r1,-20(%%r30)", xoperands);
6830       else
6831 	/* Use the "Clean Up" slot in the frame marker.  In GCC,
6832 	   the only other use of this location is for copying a
6833 	   floating point double argument from a floating-point
6834 	   register to two general registers.  The copy is done
6835 	   as an "atomic" operation when outputting a call, so it
6836 	   won't interfere with our using the location here.  */
6837 	output_asm_insn ("stw %%r1,-12(%%r30)", xoperands);
6838     }
6839 
6840   if (TARGET_PORTABLE_RUNTIME)
6841     {
6842       output_asm_insn ("ldil L'%0,%%r1", xoperands);
6843       output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands);
6844       output_asm_insn ("bv %%r0(%%r1)", xoperands);
6845     }
6846   else if (flag_pic)
6847     {
6848       output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
6849       if (TARGET_SOM || !TARGET_GAS)
6850 	{
6851 	  xoperands[1] = gen_label_rtx ();
6852 	  output_asm_insn ("addil L'%l0-%l1,%%r1", xoperands);
6853 	  targetm.asm_out.internal_label (asm_out_file, "L",
6854 					  CODE_LABEL_NUMBER (xoperands[1]));
6855 	  output_asm_insn ("ldo R'%l0-%l1(%%r1),%%r1", xoperands);
6856 	}
6857       else
6858 	{
6859 	  output_asm_insn ("addil L'%l0-$PIC_pcrel$0+4,%%r1", xoperands);
6860 	  output_asm_insn ("ldo R'%l0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands);
6861 	}
6862       output_asm_insn ("bv %%r0(%%r1)", xoperands);
6863     }
6864   else
6865     /* Now output a very long branch to the original target.  */
6866     output_asm_insn ("ldil L'%l0,%%r1\n\tbe R'%l0(%%sr4,%%r1)", xoperands);
6867 
6868   /* Now restore the value of %r1 in the delay slot.  */
6869   if (TARGET_64BIT)
6870     {
6871       if (actual_fsize == 0 && !df_regs_ever_live_p (2))
6872 	return "ldd -16(%%r30),%%r1";
6873       else
6874 	return "ldd -40(%%r30),%%r1";
6875     }
6876   else
6877     {
6878       if (actual_fsize == 0 && !df_regs_ever_live_p (2))
6879 	return "ldw -20(%%r30),%%r1";
6880       else
6881 	return "ldw -12(%%r30),%%r1";
6882     }
6883 }
6884 
6885 /* This routine handles all the branch-on-bit conditional branch sequences we
6886    might need to generate.  It handles nullification of delay slots,
6887    varying length branches, negated branches and all combinations of the
6888    above.  it returns the appropriate output template to emit the branch.  */
6889 
6890 const char *
pa_output_bb(rtx * operands ATTRIBUTE_UNUSED,int negated,rtx_insn * insn,int which)6891 pa_output_bb (rtx *operands ATTRIBUTE_UNUSED, int negated, rtx_insn *insn, int which)
6892 {
6893   static char buf[100];
6894   bool useskip;
6895   int nullify = INSN_ANNULLED_BRANCH_P (insn);
6896   int length = get_attr_length (insn);
6897   int xdelay;
6898 
6899   /* A conditional branch to the following instruction (e.g. the delay slot) is
6900      asking for a disaster.  I do not think this can happen as this pattern
6901      is only used when optimizing; jump optimization should eliminate the
6902      jump.  But be prepared just in case.  */
6903 
6904   if (branch_to_delay_slot_p (insn))
6905     return "nop";
6906 
6907   /* If this is a long branch with its delay slot unfilled, set `nullify'
6908      as it can nullify the delay slot and save a nop.  */
6909   if (length == 8 && dbr_sequence_length () == 0)
6910     nullify = 1;
6911 
6912   /* If this is a short forward conditional branch which did not get
6913      its delay slot filled, the delay slot can still be nullified.  */
6914   if (! nullify && length == 4 && dbr_sequence_length () == 0)
6915     nullify = forward_branch_p (insn);
6916 
6917   /* A forward branch over a single nullified insn can be done with a
6918      extrs instruction.  This avoids a single cycle penalty due to
6919      mis-predicted branch if we fall through (branch not taken).  */
6920   useskip = (length == 4 && nullify) ? use_skip_p (insn) : FALSE;
6921 
6922   switch (length)
6923     {
6924 
6925       /* All short conditional branches except backwards with an unfilled
6926 	 delay slot.  */
6927       case 4:
6928 	if (useskip)
6929 	  strcpy (buf, "{extrs,|extrw,s,}");
6930 	else
6931 	  strcpy (buf, "bb,");
6932 	if (useskip && GET_MODE (operands[0]) == DImode)
6933 	  strcpy (buf, "extrd,s,*");
6934 	else if (GET_MODE (operands[0]) == DImode)
6935 	  strcpy (buf, "bb,*");
6936 	if ((which == 0 && negated)
6937 	     || (which == 1 && ! negated))
6938 	  strcat (buf, ">=");
6939 	else
6940 	  strcat (buf, "<");
6941 	if (useskip)
6942 	  strcat (buf, " %0,%1,1,%%r0");
6943 	else if (nullify && negated)
6944 	  {
6945 	    if (branch_needs_nop_p (insn))
6946 	      strcat (buf, ",n %0,%1,%3%#");
6947 	    else
6948 	      strcat (buf, ",n %0,%1,%3");
6949 	  }
6950 	else if (nullify && ! negated)
6951 	  {
6952 	    if (branch_needs_nop_p (insn))
6953 	      strcat (buf, ",n %0,%1,%2%#");
6954 	    else
6955 	      strcat (buf, ",n %0,%1,%2");
6956 	  }
6957 	else if (! nullify && negated)
6958 	  strcat (buf, " %0,%1,%3");
6959 	else if (! nullify && ! negated)
6960 	  strcat (buf, " %0,%1,%2");
6961 	break;
6962 
6963      /* All long conditionals.  Note a short backward branch with an
6964 	unfilled delay slot is treated just like a long backward branch
6965 	with an unfilled delay slot.  */
6966       case 8:
6967 	/* Handle weird backwards branch with a filled delay slot
6968 	   which is nullified.  */
6969 	if (dbr_sequence_length () != 0
6970 	    && ! forward_branch_p (insn)
6971 	    && nullify)
6972 	  {
6973 	    strcpy (buf, "bb,");
6974 	    if (GET_MODE (operands[0]) == DImode)
6975 	      strcat (buf, "*");
6976 	    if ((which == 0 && negated)
6977 		|| (which == 1 && ! negated))
6978 	      strcat (buf, "<");
6979 	    else
6980 	      strcat (buf, ">=");
6981 	    if (negated)
6982 	      strcat (buf, ",n %0,%1,.+12\n\tb %3");
6983 	    else
6984 	      strcat (buf, ",n %0,%1,.+12\n\tb %2");
6985 	  }
6986 	/* Handle short backwards branch with an unfilled delay slot.
6987 	   Using a bb;nop rather than extrs;bl saves 1 cycle for both
6988 	   taken and untaken branches.  */
6989 	else if (dbr_sequence_length () == 0
6990 		 && ! forward_branch_p (insn)
6991 		 && INSN_ADDRESSES_SET_P ()
6992 		 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6993 				    - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6994 	  {
6995 	    strcpy (buf, "bb,");
6996 	    if (GET_MODE (operands[0]) == DImode)
6997 	      strcat (buf, "*");
6998 	    if ((which == 0 && negated)
6999 		|| (which == 1 && ! negated))
7000 	      strcat (buf, ">=");
7001 	    else
7002 	      strcat (buf, "<");
7003 	    if (negated)
7004 	      strcat (buf, " %0,%1,%3%#");
7005 	    else
7006 	      strcat (buf, " %0,%1,%2%#");
7007 	  }
7008 	else
7009 	  {
7010 	    if (GET_MODE (operands[0]) == DImode)
7011 	      strcpy (buf, "extrd,s,*");
7012 	    else
7013 	      strcpy (buf, "{extrs,|extrw,s,}");
7014 	    if ((which == 0 && negated)
7015 		|| (which == 1 && ! negated))
7016 	      strcat (buf, "<");
7017 	    else
7018 	      strcat (buf, ">=");
7019 	    if (nullify && negated)
7020 	      strcat (buf, " %0,%1,1,%%r0\n\tb,n %3");
7021 	    else if (nullify && ! negated)
7022 	      strcat (buf, " %0,%1,1,%%r0\n\tb,n %2");
7023 	    else if (negated)
7024 	      strcat (buf, " %0,%1,1,%%r0\n\tb %3");
7025 	    else
7026 	      strcat (buf, " %0,%1,1,%%r0\n\tb %2");
7027 	  }
7028 	break;
7029 
7030       default:
7031 	/* The reversed conditional branch must branch over one additional
7032 	   instruction if the delay slot is filled and needs to be extracted
7033 	   by pa_output_lbranch.  If the delay slot is empty or this is a
7034 	   nullified forward branch, the instruction after the reversed
7035 	   condition branch must be nullified.  */
7036 	if (dbr_sequence_length () == 0
7037 	    || (nullify && forward_branch_p (insn)))
7038 	  {
7039 	    nullify = 1;
7040 	    xdelay = 0;
7041 	    operands[4] = GEN_INT (length);
7042 	  }
7043 	else
7044 	  {
7045 	    xdelay = 1;
7046 	    operands[4] = GEN_INT (length + 4);
7047 	  }
7048 
7049 	if (GET_MODE (operands[0]) == DImode)
7050 	  strcpy (buf, "bb,*");
7051 	else
7052 	  strcpy (buf, "bb,");
7053 	if ((which == 0 && negated)
7054 	    || (which == 1 && !negated))
7055 	  strcat (buf, "<");
7056 	else
7057 	  strcat (buf, ">=");
7058 	if (nullify)
7059 	  strcat (buf, ",n %0,%1,.+%4");
7060 	else
7061 	  strcat (buf, " %0,%1,.+%4");
7062 	output_asm_insn (buf, operands);
7063 	return pa_output_lbranch (negated ? operands[3] : operands[2],
7064 				  insn, xdelay);
7065     }
7066   return buf;
7067 }
7068 
7069 /* This routine handles all the branch-on-variable-bit conditional branch
7070    sequences we might need to generate.  It handles nullification of delay
7071    slots, varying length branches, negated branches and all combinations
7072    of the above.  it returns the appropriate output template to emit the
7073    branch.  */
7074 
7075 const char *
pa_output_bvb(rtx * operands ATTRIBUTE_UNUSED,int negated,rtx_insn * insn,int which)7076 pa_output_bvb (rtx *operands ATTRIBUTE_UNUSED, int negated, rtx_insn *insn,
7077 	       int which)
7078 {
7079   static char buf[100];
7080   bool useskip;
7081   int nullify = INSN_ANNULLED_BRANCH_P (insn);
7082   int length = get_attr_length (insn);
7083   int xdelay;
7084 
7085   /* A conditional branch to the following instruction (e.g. the delay slot) is
7086      asking for a disaster.  I do not think this can happen as this pattern
7087      is only used when optimizing; jump optimization should eliminate the
7088      jump.  But be prepared just in case.  */
7089 
7090   if (branch_to_delay_slot_p (insn))
7091     return "nop";
7092 
7093   /* If this is a long branch with its delay slot unfilled, set `nullify'
7094      as it can nullify the delay slot and save a nop.  */
7095   if (length == 8 && dbr_sequence_length () == 0)
7096     nullify = 1;
7097 
7098   /* If this is a short forward conditional branch which did not get
7099      its delay slot filled, the delay slot can still be nullified.  */
7100   if (! nullify && length == 4 && dbr_sequence_length () == 0)
7101     nullify = forward_branch_p (insn);
7102 
7103   /* A forward branch over a single nullified insn can be done with a
7104      extrs instruction.  This avoids a single cycle penalty due to
7105      mis-predicted branch if we fall through (branch not taken).  */
7106   useskip = (length == 4 && nullify) ? use_skip_p (insn) : FALSE;
7107 
7108   switch (length)
7109     {
7110 
7111       /* All short conditional branches except backwards with an unfilled
7112 	 delay slot.  */
7113       case 4:
7114 	if (useskip)
7115 	  strcpy (buf, "{vextrs,|extrw,s,}");
7116 	else
7117 	  strcpy (buf, "{bvb,|bb,}");
7118 	if (useskip && GET_MODE (operands[0]) == DImode)
7119 	  strcpy (buf, "extrd,s,*");
7120 	else if (GET_MODE (operands[0]) == DImode)
7121 	  strcpy (buf, "bb,*");
7122 	if ((which == 0 && negated)
7123 	     || (which == 1 && ! negated))
7124 	  strcat (buf, ">=");
7125 	else
7126 	  strcat (buf, "<");
7127 	if (useskip)
7128 	  strcat (buf, "{ %0,1,%%r0| %0,%%sar,1,%%r0}");
7129 	else if (nullify && negated)
7130 	  {
7131 	    if (branch_needs_nop_p (insn))
7132 	      strcat (buf, "{,n %0,%3%#|,n %0,%%sar,%3%#}");
7133 	    else
7134 	      strcat (buf, "{,n %0,%3|,n %0,%%sar,%3}");
7135 	  }
7136 	else if (nullify && ! negated)
7137 	  {
7138 	    if (branch_needs_nop_p (insn))
7139 	      strcat (buf, "{,n %0,%2%#|,n %0,%%sar,%2%#}");
7140 	    else
7141 	      strcat (buf, "{,n %0,%2|,n %0,%%sar,%2}");
7142 	  }
7143 	else if (! nullify && negated)
7144 	  strcat (buf, "{ %0,%3| %0,%%sar,%3}");
7145 	else if (! nullify && ! negated)
7146 	  strcat (buf, "{ %0,%2| %0,%%sar,%2}");
7147 	break;
7148 
7149      /* All long conditionals.  Note a short backward branch with an
7150 	unfilled delay slot is treated just like a long backward branch
7151 	with an unfilled delay slot.  */
7152       case 8:
7153 	/* Handle weird backwards branch with a filled delay slot
7154 	   which is nullified.  */
7155 	if (dbr_sequence_length () != 0
7156 	    && ! forward_branch_p (insn)
7157 	    && nullify)
7158 	  {
7159 	    strcpy (buf, "{bvb,|bb,}");
7160 	    if (GET_MODE (operands[0]) == DImode)
7161 	      strcat (buf, "*");
7162 	    if ((which == 0 && negated)
7163 		|| (which == 1 && ! negated))
7164 	      strcat (buf, "<");
7165 	    else
7166 	      strcat (buf, ">=");
7167 	    if (negated)
7168 	      strcat (buf, "{,n %0,.+12\n\tb %3|,n %0,%%sar,.+12\n\tb %3}");
7169 	    else
7170 	      strcat (buf, "{,n %0,.+12\n\tb %2|,n %0,%%sar,.+12\n\tb %2}");
7171 	  }
7172 	/* Handle short backwards branch with an unfilled delay slot.
7173 	   Using a bb;nop rather than extrs;bl saves 1 cycle for both
7174 	   taken and untaken branches.  */
7175 	else if (dbr_sequence_length () == 0
7176 		 && ! forward_branch_p (insn)
7177 		 && INSN_ADDRESSES_SET_P ()
7178 		 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
7179 				    - INSN_ADDRESSES (INSN_UID (insn)) - 8))
7180 	  {
7181 	    strcpy (buf, "{bvb,|bb,}");
7182 	    if (GET_MODE (operands[0]) == DImode)
7183 	      strcat (buf, "*");
7184 	    if ((which == 0 && negated)
7185 		|| (which == 1 && ! negated))
7186 	      strcat (buf, ">=");
7187 	    else
7188 	      strcat (buf, "<");
7189 	    if (negated)
7190 	      strcat (buf, "{ %0,%3%#| %0,%%sar,%3%#}");
7191 	    else
7192 	      strcat (buf, "{ %0,%2%#| %0,%%sar,%2%#}");
7193 	  }
7194 	else
7195 	  {
7196 	    strcpy (buf, "{vextrs,|extrw,s,}");
7197 	    if (GET_MODE (operands[0]) == DImode)
7198 	      strcpy (buf, "extrd,s,*");
7199 	    if ((which == 0 && negated)
7200 		|| (which == 1 && ! negated))
7201 	      strcat (buf, "<");
7202 	    else
7203 	      strcat (buf, ">=");
7204 	    if (nullify && negated)
7205 	      strcat (buf, "{ %0,1,%%r0\n\tb,n %3| %0,%%sar,1,%%r0\n\tb,n %3}");
7206 	    else if (nullify && ! negated)
7207 	      strcat (buf, "{ %0,1,%%r0\n\tb,n %2| %0,%%sar,1,%%r0\n\tb,n %2}");
7208 	    else if (negated)
7209 	      strcat (buf, "{ %0,1,%%r0\n\tb %3| %0,%%sar,1,%%r0\n\tb %3}");
7210 	    else
7211 	      strcat (buf, "{ %0,1,%%r0\n\tb %2| %0,%%sar,1,%%r0\n\tb %2}");
7212 	  }
7213 	break;
7214 
7215       default:
7216 	/* The reversed conditional branch must branch over one additional
7217 	   instruction if the delay slot is filled and needs to be extracted
7218 	   by pa_output_lbranch.  If the delay slot is empty or this is a
7219 	   nullified forward branch, the instruction after the reversed
7220 	   condition branch must be nullified.  */
7221 	if (dbr_sequence_length () == 0
7222 	    || (nullify && forward_branch_p (insn)))
7223 	  {
7224 	    nullify = 1;
7225 	    xdelay = 0;
7226 	    operands[4] = GEN_INT (length);
7227 	  }
7228 	else
7229 	  {
7230 	    xdelay = 1;
7231 	    operands[4] = GEN_INT (length + 4);
7232 	  }
7233 
7234 	if (GET_MODE (operands[0]) == DImode)
7235 	  strcpy (buf, "bb,*");
7236 	else
7237 	  strcpy (buf, "{bvb,|bb,}");
7238 	if ((which == 0 && negated)
7239 	    || (which == 1 && !negated))
7240 	  strcat (buf, "<");
7241 	else
7242 	  strcat (buf, ">=");
7243 	if (nullify)
7244 	  strcat (buf, ",n {%0,.+%4|%0,%%sar,.+%4}");
7245 	else
7246 	  strcat (buf, " {%0,.+%4|%0,%%sar,.+%4}");
7247 	output_asm_insn (buf, operands);
7248 	return pa_output_lbranch (negated ? operands[3] : operands[2],
7249 				  insn, xdelay);
7250     }
7251   return buf;
7252 }
7253 
7254 /* Return the output template for emitting a dbra type insn.
7255 
7256    Note it may perform some output operations on its own before
7257    returning the final output string.  */
7258 const char *
pa_output_dbra(rtx * operands,rtx_insn * insn,int which_alternative)7259 pa_output_dbra (rtx *operands, rtx_insn *insn, int which_alternative)
7260 {
7261   int length = get_attr_length (insn);
7262 
7263   /* A conditional branch to the following instruction (e.g. the delay slot) is
7264      asking for a disaster.  Be prepared!  */
7265 
7266   if (branch_to_delay_slot_p (insn))
7267     {
7268       if (which_alternative == 0)
7269 	return "ldo %1(%0),%0";
7270       else if (which_alternative == 1)
7271 	{
7272 	  output_asm_insn ("{fstws|fstw} %0,-16(%%r30)", operands);
7273 	  output_asm_insn ("ldw -16(%%r30),%4", operands);
7274 	  output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands);
7275 	  return "{fldws|fldw} -16(%%r30),%0";
7276 	}
7277       else
7278 	{
7279 	  output_asm_insn ("ldw %0,%4", operands);
7280 	  return "ldo %1(%4),%4\n\tstw %4,%0";
7281 	}
7282     }
7283 
7284   if (which_alternative == 0)
7285     {
7286       int nullify = INSN_ANNULLED_BRANCH_P (insn);
7287       int xdelay;
7288 
7289       /* If this is a long branch with its delay slot unfilled, set `nullify'
7290 	 as it can nullify the delay slot and save a nop.  */
7291       if (length == 8 && dbr_sequence_length () == 0)
7292 	nullify = 1;
7293 
7294       /* If this is a short forward conditional branch which did not get
7295 	 its delay slot filled, the delay slot can still be nullified.  */
7296       if (! nullify && length == 4 && dbr_sequence_length () == 0)
7297 	nullify = forward_branch_p (insn);
7298 
7299       switch (length)
7300 	{
7301 	case 4:
7302 	  if (nullify)
7303 	    {
7304 	      if (branch_needs_nop_p (insn))
7305 		return "addib,%C2,n %1,%0,%3%#";
7306 	      else
7307 		return "addib,%C2,n %1,%0,%3";
7308 	    }
7309 	  else
7310 	    return "addib,%C2 %1,%0,%3";
7311 
7312 	case 8:
7313 	  /* Handle weird backwards branch with a fulled delay slot
7314 	     which is nullified.  */
7315 	  if (dbr_sequence_length () != 0
7316 	      && ! forward_branch_p (insn)
7317 	      && nullify)
7318 	    return "addib,%N2,n %1,%0,.+12\n\tb %3";
7319 	  /* Handle short backwards branch with an unfilled delay slot.
7320 	     Using a addb;nop rather than addi;bl saves 1 cycle for both
7321 	     taken and untaken branches.  */
7322 	  else if (dbr_sequence_length () == 0
7323 		   && ! forward_branch_p (insn)
7324 		   && INSN_ADDRESSES_SET_P ()
7325 		   && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
7326 				      - INSN_ADDRESSES (INSN_UID (insn)) - 8))
7327 	      return "addib,%C2 %1,%0,%3%#";
7328 
7329 	  /* Handle normal cases.  */
7330 	  if (nullify)
7331 	    return "addi,%N2 %1,%0,%0\n\tb,n %3";
7332 	  else
7333 	    return "addi,%N2 %1,%0,%0\n\tb %3";
7334 
7335 	default:
7336 	  /* The reversed conditional branch must branch over one additional
7337 	     instruction if the delay slot is filled and needs to be extracted
7338 	     by pa_output_lbranch.  If the delay slot is empty or this is a
7339 	     nullified forward branch, the instruction after the reversed
7340 	     condition branch must be nullified.  */
7341 	  if (dbr_sequence_length () == 0
7342 	      || (nullify && forward_branch_p (insn)))
7343 	    {
7344 	      nullify = 1;
7345 	      xdelay = 0;
7346 	      operands[4] = GEN_INT (length);
7347 	    }
7348 	  else
7349 	    {
7350 	      xdelay = 1;
7351 	      operands[4] = GEN_INT (length + 4);
7352 	    }
7353 
7354 	  if (nullify)
7355 	    output_asm_insn ("addib,%N2,n %1,%0,.+%4", operands);
7356 	  else
7357 	    output_asm_insn ("addib,%N2 %1,%0,.+%4", operands);
7358 
7359 	  return pa_output_lbranch (operands[3], insn, xdelay);
7360 	}
7361 
7362     }
7363   /* Deal with gross reload from FP register case.  */
7364   else if (which_alternative == 1)
7365     {
7366       /* Move loop counter from FP register to MEM then into a GR,
7367 	 increment the GR, store the GR into MEM, and finally reload
7368 	 the FP register from MEM from within the branch's delay slot.  */
7369       output_asm_insn ("{fstws|fstw} %0,-16(%%r30)\n\tldw -16(%%r30),%4",
7370 		       operands);
7371       output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands);
7372       if (length == 24)
7373 	return "{comb|cmpb},%S2 %%r0,%4,%3\n\t{fldws|fldw} -16(%%r30),%0";
7374       else if (length == 28)
7375 	return "{comclr|cmpclr},%B2 %%r0,%4,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
7376       else
7377 	{
7378 	  operands[5] = GEN_INT (length - 16);
7379 	  output_asm_insn ("{comb|cmpb},%B2 %%r0,%4,.+%5", operands);
7380 	  output_asm_insn ("{fldws|fldw} -16(%%r30),%0", operands);
7381 	  return pa_output_lbranch (operands[3], insn, 0);
7382 	}
7383     }
7384   /* Deal with gross reload from memory case.  */
7385   else
7386     {
7387       /* Reload loop counter from memory, the store back to memory
7388 	 happens in the branch's delay slot.  */
7389       output_asm_insn ("ldw %0,%4", operands);
7390       if (length == 12)
7391 	return "addib,%C2 %1,%4,%3\n\tstw %4,%0";
7392       else if (length == 16)
7393 	return "addi,%N2 %1,%4,%4\n\tb %3\n\tstw %4,%0";
7394       else
7395 	{
7396 	  operands[5] = GEN_INT (length - 4);
7397 	  output_asm_insn ("addib,%N2 %1,%4,.+%5\n\tstw %4,%0", operands);
7398 	  return pa_output_lbranch (operands[3], insn, 0);
7399 	}
7400     }
7401 }
7402 
7403 /* Return the output template for emitting a movb type insn.
7404 
7405    Note it may perform some output operations on its own before
7406    returning the final output string.  */
7407 const char *
pa_output_movb(rtx * operands,rtx_insn * insn,int which_alternative,int reverse_comparison)7408 pa_output_movb (rtx *operands, rtx_insn *insn, int which_alternative,
7409 	     int reverse_comparison)
7410 {
7411   int length = get_attr_length (insn);
7412 
7413   /* A conditional branch to the following instruction (e.g. the delay slot) is
7414      asking for a disaster.  Be prepared!  */
7415 
7416   if (branch_to_delay_slot_p (insn))
7417     {
7418       if (which_alternative == 0)
7419 	return "copy %1,%0";
7420       else if (which_alternative == 1)
7421 	{
7422 	  output_asm_insn ("stw %1,-16(%%r30)", operands);
7423 	  return "{fldws|fldw} -16(%%r30),%0";
7424 	}
7425       else if (which_alternative == 2)
7426 	return "stw %1,%0";
7427       else
7428 	return "mtsar %r1";
7429     }
7430 
7431   /* Support the second variant.  */
7432   if (reverse_comparison)
7433     PUT_CODE (operands[2], reverse_condition (GET_CODE (operands[2])));
7434 
7435   if (which_alternative == 0)
7436     {
7437       int nullify = INSN_ANNULLED_BRANCH_P (insn);
7438       int xdelay;
7439 
7440       /* If this is a long branch with its delay slot unfilled, set `nullify'
7441 	 as it can nullify the delay slot and save a nop.  */
7442       if (length == 8 && dbr_sequence_length () == 0)
7443 	nullify = 1;
7444 
7445       /* If this is a short forward conditional branch which did not get
7446 	 its delay slot filled, the delay slot can still be nullified.  */
7447       if (! nullify && length == 4 && dbr_sequence_length () == 0)
7448 	nullify = forward_branch_p (insn);
7449 
7450       switch (length)
7451 	{
7452 	case 4:
7453 	  if (nullify)
7454 	    {
7455 	      if (branch_needs_nop_p (insn))
7456 		return "movb,%C2,n %1,%0,%3%#";
7457 	      else
7458 		return "movb,%C2,n %1,%0,%3";
7459 	    }
7460 	  else
7461 	    return "movb,%C2 %1,%0,%3";
7462 
7463 	case 8:
7464 	  /* Handle weird backwards branch with a filled delay slot
7465 	     which is nullified.  */
7466 	  if (dbr_sequence_length () != 0
7467 	      && ! forward_branch_p (insn)
7468 	      && nullify)
7469 	    return "movb,%N2,n %1,%0,.+12\n\tb %3";
7470 
7471 	  /* Handle short backwards branch with an unfilled delay slot.
7472 	     Using a movb;nop rather than or;bl saves 1 cycle for both
7473 	     taken and untaken branches.  */
7474 	  else if (dbr_sequence_length () == 0
7475 		   && ! forward_branch_p (insn)
7476 		   && INSN_ADDRESSES_SET_P ()
7477 		   && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
7478 				      - INSN_ADDRESSES (INSN_UID (insn)) - 8))
7479 	    return "movb,%C2 %1,%0,%3%#";
7480 	  /* Handle normal cases.  */
7481 	  if (nullify)
7482 	    return "or,%N2 %1,%%r0,%0\n\tb,n %3";
7483 	  else
7484 	    return "or,%N2 %1,%%r0,%0\n\tb %3";
7485 
7486 	default:
7487 	  /* The reversed conditional branch must branch over one additional
7488 	     instruction if the delay slot is filled and needs to be extracted
7489 	     by pa_output_lbranch.  If the delay slot is empty or this is a
7490 	     nullified forward branch, the instruction after the reversed
7491 	     condition branch must be nullified.  */
7492 	  if (dbr_sequence_length () == 0
7493 	      || (nullify && forward_branch_p (insn)))
7494 	    {
7495 	      nullify = 1;
7496 	      xdelay = 0;
7497 	      operands[4] = GEN_INT (length);
7498 	    }
7499 	  else
7500 	    {
7501 	      xdelay = 1;
7502 	      operands[4] = GEN_INT (length + 4);
7503 	    }
7504 
7505 	  if (nullify)
7506 	    output_asm_insn ("movb,%N2,n %1,%0,.+%4", operands);
7507 	  else
7508 	    output_asm_insn ("movb,%N2 %1,%0,.+%4", operands);
7509 
7510 	  return pa_output_lbranch (operands[3], insn, xdelay);
7511 	}
7512     }
7513   /* Deal with gross reload for FP destination register case.  */
7514   else if (which_alternative == 1)
7515     {
7516       /* Move source register to MEM, perform the branch test, then
7517 	 finally load the FP register from MEM from within the branch's
7518 	 delay slot.  */
7519       output_asm_insn ("stw %1,-16(%%r30)", operands);
7520       if (length == 12)
7521 	return "{comb|cmpb},%S2 %%r0,%1,%3\n\t{fldws|fldw} -16(%%r30),%0";
7522       else if (length == 16)
7523 	return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
7524       else
7525 	{
7526 	  operands[4] = GEN_INT (length - 4);
7527 	  output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4", operands);
7528 	  output_asm_insn ("{fldws|fldw} -16(%%r30),%0", operands);
7529 	  return pa_output_lbranch (operands[3], insn, 0);
7530 	}
7531     }
7532   /* Deal with gross reload from memory case.  */
7533   else if (which_alternative == 2)
7534     {
7535       /* Reload loop counter from memory, the store back to memory
7536 	 happens in the branch's delay slot.  */
7537       if (length == 8)
7538 	return "{comb|cmpb},%S2 %%r0,%1,%3\n\tstw %1,%0";
7539       else if (length == 12)
7540 	return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tstw %1,%0";
7541       else
7542 	{
7543 	  operands[4] = GEN_INT (length);
7544 	  output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4\n\tstw %1,%0",
7545 			   operands);
7546 	  return pa_output_lbranch (operands[3], insn, 0);
7547 	}
7548     }
7549   /* Handle SAR as a destination.  */
7550   else
7551     {
7552       if (length == 8)
7553 	return "{comb|cmpb},%S2 %%r0,%1,%3\n\tmtsar %r1";
7554       else if (length == 12)
7555 	return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tmtsar %r1";
7556       else
7557 	{
7558 	  operands[4] = GEN_INT (length);
7559 	  output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4\n\tmtsar %r1",
7560 			   operands);
7561 	  return pa_output_lbranch (operands[3], insn, 0);
7562 	}
7563     }
7564 }
7565 
7566 /* Copy any FP arguments in INSN into integer registers.  */
7567 static void
copy_fp_args(rtx_insn * insn)7568 copy_fp_args (rtx_insn *insn)
7569 {
7570   rtx link;
7571   rtx xoperands[2];
7572 
7573   for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
7574     {
7575       int arg_mode, regno;
7576       rtx use = XEXP (link, 0);
7577 
7578       if (! (GET_CODE (use) == USE
7579 	  && GET_CODE (XEXP (use, 0)) == REG
7580 	  && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
7581 	continue;
7582 
7583       arg_mode = GET_MODE (XEXP (use, 0));
7584       regno = REGNO (XEXP (use, 0));
7585 
7586       /* Is it a floating point register?  */
7587       if (regno >= 32 && regno <= 39)
7588 	{
7589 	  /* Copy the FP register into an integer register via memory.  */
7590 	  if (arg_mode == SFmode)
7591 	    {
7592 	      xoperands[0] = XEXP (use, 0);
7593 	      xoperands[1] = gen_rtx_REG (SImode, 26 - (regno - 32) / 2);
7594 	      output_asm_insn ("{fstws|fstw} %0,-16(%%sr0,%%r30)", xoperands);
7595 	      output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
7596 	    }
7597 	  else
7598 	    {
7599 	      xoperands[0] = XEXP (use, 0);
7600 	      xoperands[1] = gen_rtx_REG (DImode, 25 - (regno - 34) / 2);
7601 	      output_asm_insn ("{fstds|fstd} %0,-16(%%sr0,%%r30)", xoperands);
7602 	      output_asm_insn ("ldw -12(%%sr0,%%r30),%R1", xoperands);
7603 	      output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
7604 	    }
7605 	}
7606     }
7607 }
7608 
7609 /* Compute length of the FP argument copy sequence for INSN.  */
7610 static int
length_fp_args(rtx_insn * insn)7611 length_fp_args (rtx_insn *insn)
7612 {
7613   int length = 0;
7614   rtx link;
7615 
7616   for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
7617     {
7618       int arg_mode, regno;
7619       rtx use = XEXP (link, 0);
7620 
7621       if (! (GET_CODE (use) == USE
7622 	  && GET_CODE (XEXP (use, 0)) == REG
7623 	  && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
7624 	continue;
7625 
7626       arg_mode = GET_MODE (XEXP (use, 0));
7627       regno = REGNO (XEXP (use, 0));
7628 
7629       /* Is it a floating point register?  */
7630       if (regno >= 32 && regno <= 39)
7631 	{
7632 	  if (arg_mode == SFmode)
7633 	    length += 8;
7634 	  else
7635 	    length += 12;
7636 	}
7637     }
7638 
7639   return length;
7640 }
7641 
7642 /* Return the attribute length for the millicode call instruction INSN.
7643    The length must match the code generated by pa_output_millicode_call.
7644    We include the delay slot in the returned length as it is better to
7645    over estimate the length than to under estimate it.  */
7646 
7647 int
pa_attr_length_millicode_call(rtx_insn * insn)7648 pa_attr_length_millicode_call (rtx_insn *insn)
7649 {
7650   unsigned long distance = -1;
7651   unsigned long total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
7652 
7653   if (INSN_ADDRESSES_SET_P ())
7654     {
7655       distance = (total + insn_current_reference_address (insn));
7656       if (distance < total)
7657 	distance = -1;
7658     }
7659 
7660   if (TARGET_64BIT)
7661     {
7662       if (!TARGET_LONG_CALLS && distance < 7600000)
7663 	return 8;
7664 
7665       return 20;
7666     }
7667   else if (TARGET_PORTABLE_RUNTIME)
7668     return 24;
7669   else
7670     {
7671       if (!TARGET_LONG_CALLS && distance < MAX_PCREL17F_OFFSET)
7672 	return 8;
7673 
7674       if (!flag_pic)
7675 	return 12;
7676 
7677       return 24;
7678     }
7679 }
7680 
7681 /* INSN is a function call.
7682 
7683    CALL_DEST is the routine we are calling.  */
7684 
7685 const char *
pa_output_millicode_call(rtx_insn * insn,rtx call_dest)7686 pa_output_millicode_call (rtx_insn *insn, rtx call_dest)
7687 {
7688   int attr_length = get_attr_length (insn);
7689   int seq_length = dbr_sequence_length ();
7690   rtx xoperands[3];
7691 
7692   xoperands[0] = call_dest;
7693   xoperands[2] = gen_rtx_REG (Pmode, TARGET_64BIT ? 2 : 31);
7694 
7695   /* Handle the common case where we are sure that the branch will
7696      reach the beginning of the $CODE$ subspace.  The within reach
7697      form of the $$sh_func_adrs call has a length of 28.  Because it
7698      has an attribute type of sh_func_adrs, it never has a nonzero
7699      sequence length (i.e., the delay slot is never filled).  */
7700   if (!TARGET_LONG_CALLS
7701       && (attr_length == 8
7702 	  || (attr_length == 28
7703 	      && get_attr_type (insn) == TYPE_SH_FUNC_ADRS)))
7704     {
7705       output_asm_insn ("{bl|b,l} %0,%2", xoperands);
7706     }
7707   else
7708     {
7709       if (TARGET_64BIT)
7710 	{
7711 	  /* It might seem that one insn could be saved by accessing
7712 	     the millicode function using the linkage table.  However,
7713 	     this doesn't work in shared libraries and other dynamically
7714 	     loaded objects.  Using a pc-relative sequence also avoids
7715 	     problems related to the implicit use of the gp register.  */
7716 	  output_asm_insn ("b,l .+8,%%r1", xoperands);
7717 
7718 	  if (TARGET_GAS)
7719 	    {
7720 	      output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1", xoperands);
7721 	      output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands);
7722 	    }
7723 	  else
7724 	    {
7725 	      xoperands[1] = gen_label_rtx ();
7726 	      output_asm_insn ("addil L'%0-%l1,%%r1", xoperands);
7727 	      targetm.asm_out.internal_label (asm_out_file, "L",
7728 					 CODE_LABEL_NUMBER (xoperands[1]));
7729 	      output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands);
7730 	    }
7731 
7732 	  output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
7733 	}
7734       else if (TARGET_PORTABLE_RUNTIME)
7735 	{
7736 	  /* Pure portable runtime doesn't allow be/ble; we also don't
7737 	     have PIC support in the assembler/linker, so this sequence
7738 	     is needed.  */
7739 
7740 	  /* Get the address of our target into %r1.  */
7741 	  output_asm_insn ("ldil L'%0,%%r1", xoperands);
7742 	  output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands);
7743 
7744 	  /* Get our return address into %r31.  */
7745 	  output_asm_insn ("{bl|b,l} .+8,%%r31", xoperands);
7746 	  output_asm_insn ("addi 8,%%r31,%%r31", xoperands);
7747 
7748 	  /* Jump to our target address in %r1.  */
7749 	  output_asm_insn ("bv %%r0(%%r1)", xoperands);
7750 	}
7751       else if (!flag_pic)
7752 	{
7753 	  output_asm_insn ("ldil L'%0,%%r1", xoperands);
7754 	  if (TARGET_PA_20)
7755 	    output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31", xoperands);
7756 	  else
7757 	    output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands);
7758 	}
7759       else
7760 	{
7761 	  output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
7762 	  output_asm_insn ("addi 16,%%r1,%%r31", xoperands);
7763 
7764 	  if (TARGET_SOM || !TARGET_GAS)
7765 	    {
7766 	      /* The HP assembler can generate relocations for the
7767 		 difference of two symbols.  GAS can do this for a
7768 		 millicode symbol but not an arbitrary external
7769 		 symbol when generating SOM output.  */
7770 	      xoperands[1] = gen_label_rtx ();
7771 	      targetm.asm_out.internal_label (asm_out_file, "L",
7772 					 CODE_LABEL_NUMBER (xoperands[1]));
7773 	      output_asm_insn ("addil L'%0-%l1,%%r1", xoperands);
7774 	      output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands);
7775 	    }
7776 	  else
7777 	    {
7778 	      output_asm_insn ("addil L'%0-$PIC_pcrel$0+8,%%r1", xoperands);
7779 	      output_asm_insn ("ldo R'%0-$PIC_pcrel$0+12(%%r1),%%r1",
7780 			       xoperands);
7781 	    }
7782 
7783 	  /* Jump to our target address in %r1.  */
7784 	  output_asm_insn ("bv %%r0(%%r1)", xoperands);
7785 	}
7786     }
7787 
7788   if (seq_length == 0)
7789     output_asm_insn ("nop", xoperands);
7790 
7791   return "";
7792 }
7793 
7794 /* Return the attribute length of the call instruction INSN.  The SIBCALL
7795    flag indicates whether INSN is a regular call or a sibling call.  The
7796    length returned must be longer than the code actually generated by
7797    pa_output_call.  Since branch shortening is done before delay branch
7798    sequencing, there is no way to determine whether or not the delay
7799    slot will be filled during branch shortening.  Even when the delay
7800    slot is filled, we may have to add a nop if the delay slot contains
7801    a branch that can't reach its target.  Thus, we always have to include
7802    the delay slot in the length estimate.  This used to be done in
7803    pa_adjust_insn_length but we do it here now as some sequences always
7804    fill the delay slot and we can save four bytes in the estimate for
7805    these sequences.  */
7806 
7807 int
pa_attr_length_call(rtx_insn * insn,int sibcall)7808 pa_attr_length_call (rtx_insn *insn, int sibcall)
7809 {
7810   int local_call;
7811   rtx call, call_dest;
7812   tree call_decl;
7813   int length = 0;
7814   rtx pat = PATTERN (insn);
7815   unsigned long distance = -1;
7816 
7817   gcc_assert (CALL_P (insn));
7818 
7819   if (INSN_ADDRESSES_SET_P ())
7820     {
7821       unsigned long total;
7822 
7823       total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
7824       distance = (total + insn_current_reference_address (insn));
7825       if (distance < total)
7826 	distance = -1;
7827     }
7828 
7829   gcc_assert (GET_CODE (pat) == PARALLEL);
7830 
7831   /* Get the call rtx.  */
7832   call = XVECEXP (pat, 0, 0);
7833   if (GET_CODE (call) == SET)
7834     call = SET_SRC (call);
7835 
7836   gcc_assert (GET_CODE (call) == CALL);
7837 
7838   /* Determine if this is a local call.  */
7839   call_dest = XEXP (XEXP (call, 0), 0);
7840   call_decl = SYMBOL_REF_DECL (call_dest);
7841   local_call = call_decl && targetm.binds_local_p (call_decl);
7842 
7843   /* pc-relative branch.  */
7844   if (!TARGET_LONG_CALLS
7845       && ((TARGET_PA_20 && !sibcall && distance < 7600000)
7846 	  || distance < MAX_PCREL17F_OFFSET))
7847     length += 8;
7848 
7849   /* 64-bit plabel sequence.  */
7850   else if (TARGET_64BIT && !local_call)
7851     length += sibcall ? 28 : 24;
7852 
7853   /* non-pic long absolute branch sequence.  */
7854   else if ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
7855     length += 12;
7856 
7857   /* long pc-relative branch sequence.  */
7858   else if (TARGET_LONG_PIC_SDIFF_CALL
7859 	   || (TARGET_GAS && !TARGET_SOM
7860 	       && (TARGET_LONG_PIC_PCREL_CALL || local_call)))
7861     {
7862       length += 20;
7863 
7864       if (!TARGET_PA_20 && !TARGET_NO_SPACE_REGS && (!local_call || flag_pic))
7865 	length += 8;
7866     }
7867 
7868   /* 32-bit plabel sequence.  */
7869   else
7870     {
7871       length += 32;
7872 
7873       if (TARGET_SOM)
7874 	length += length_fp_args (insn);
7875 
7876       if (flag_pic)
7877 	length += 4;
7878 
7879       if (!TARGET_PA_20)
7880 	{
7881 	  if (!sibcall)
7882 	    length += 8;
7883 
7884 	  if (!TARGET_NO_SPACE_REGS && (!local_call || flag_pic))
7885 	    length += 8;
7886 	}
7887     }
7888 
7889   return length;
7890 }
7891 
7892 /* INSN is a function call.
7893 
7894    CALL_DEST is the routine we are calling.  */
7895 
7896 const char *
pa_output_call(rtx_insn * insn,rtx call_dest,int sibcall)7897 pa_output_call (rtx_insn *insn, rtx call_dest, int sibcall)
7898 {
7899   int seq_length = dbr_sequence_length ();
7900   tree call_decl = SYMBOL_REF_DECL (call_dest);
7901   int local_call = call_decl && targetm.binds_local_p (call_decl);
7902   rtx xoperands[2];
7903 
7904   xoperands[0] = call_dest;
7905 
7906   /* Handle the common case where we're sure that the branch will reach
7907      the beginning of the "$CODE$" subspace.  This is the beginning of
7908      the current function if we are in a named section.  */
7909   if (!TARGET_LONG_CALLS && pa_attr_length_call (insn, sibcall) == 8)
7910     {
7911       xoperands[1] = gen_rtx_REG (word_mode, sibcall ? 0 : 2);
7912       output_asm_insn ("{bl|b,l} %0,%1", xoperands);
7913     }
7914   else
7915     {
7916       if (TARGET_64BIT && !local_call)
7917 	{
7918 	  /* ??? As far as I can tell, the HP linker doesn't support the
7919 	     long pc-relative sequence described in the 64-bit runtime
7920 	     architecture.  So, we use a slightly longer indirect call.  */
7921 	  xoperands[0] = pa_get_deferred_plabel (call_dest);
7922 	  xoperands[1] = gen_label_rtx ();
7923 
7924 	  /* If this isn't a sibcall, we put the load of %r27 into the
7925 	     delay slot.  We can't do this in a sibcall as we don't
7926 	     have a second call-clobbered scratch register available.
7927 	     We don't need to do anything when generating fast indirect
7928 	     calls.  */
7929 	  if (seq_length != 0 && !sibcall)
7930 	    {
7931 	      final_scan_insn (NEXT_INSN (insn), asm_out_file,
7932 			       optimize, 0, NULL);
7933 
7934 	      /* Now delete the delay insn.  */
7935 	      SET_INSN_DELETED (NEXT_INSN (insn));
7936 	      seq_length = 0;
7937 	    }
7938 
7939 	  output_asm_insn ("addil LT'%0,%%r27", xoperands);
7940 	  output_asm_insn ("ldd RT'%0(%%r1),%%r1", xoperands);
7941 	  output_asm_insn ("ldd 0(%%r1),%%r1", xoperands);
7942 
7943 	  if (sibcall)
7944 	    {
7945 	      output_asm_insn ("ldd 24(%%r1),%%r27", xoperands);
7946 	      output_asm_insn ("ldd 16(%%r1),%%r1", xoperands);
7947 	      output_asm_insn ("bve (%%r1)", xoperands);
7948 	    }
7949 	  else
7950 	    {
7951 	      output_asm_insn ("ldd 16(%%r1),%%r2", xoperands);
7952 	      output_asm_insn ("bve,l (%%r2),%%r2", xoperands);
7953 	      output_asm_insn ("ldd 24(%%r1),%%r27", xoperands);
7954 	      seq_length = 1;
7955 	    }
7956 	}
7957       else
7958 	{
7959 	  int indirect_call = 0;
7960 
7961 	  /* Emit a long call.  There are several different sequences
7962 	     of increasing length and complexity.  In most cases,
7963              they don't allow an instruction in the delay slot.  */
7964 	  if (!((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
7965 	      && !TARGET_LONG_PIC_SDIFF_CALL
7966 	      && !(TARGET_GAS && !TARGET_SOM
7967 		   && (TARGET_LONG_PIC_PCREL_CALL || local_call))
7968 	      && !TARGET_64BIT)
7969 	    indirect_call = 1;
7970 
7971 	  if (seq_length != 0
7972 	      && !sibcall
7973 	      && (!TARGET_PA_20
7974 		  || indirect_call
7975 		  || ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)))
7976 	    {
7977 	      /* A non-jump insn in the delay slot.  By definition we can
7978 		 emit this insn before the call (and in fact before argument
7979 		 relocating.  */
7980 	      final_scan_insn (NEXT_INSN (insn), asm_out_file, optimize, 0,
7981 			       NULL);
7982 
7983 	      /* Now delete the delay insn.  */
7984 	      SET_INSN_DELETED (NEXT_INSN (insn));
7985 	      seq_length = 0;
7986 	    }
7987 
7988 	  if ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
7989 	    {
7990 	      /* This is the best sequence for making long calls in
7991 		 non-pic code.  Unfortunately, GNU ld doesn't provide
7992 		 the stub needed for external calls, and GAS's support
7993 		 for this with the SOM linker is buggy.  It is safe
7994 		 to use this for local calls.  */
7995 	      output_asm_insn ("ldil L'%0,%%r1", xoperands);
7996 	      if (sibcall)
7997 		output_asm_insn ("be R'%0(%%sr4,%%r1)", xoperands);
7998 	      else
7999 		{
8000 		  if (TARGET_PA_20)
8001 		    output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31",
8002 				     xoperands);
8003 		  else
8004 		    output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands);
8005 
8006 		  output_asm_insn ("copy %%r31,%%r2", xoperands);
8007 		  seq_length = 1;
8008 		}
8009 	    }
8010 	  else
8011 	    {
8012 	      if (TARGET_LONG_PIC_SDIFF_CALL)
8013 		{
8014 		  /* The HP assembler and linker can handle relocations
8015 		     for the difference of two symbols.  The HP assembler
8016 		     recognizes the sequence as a pc-relative call and
8017 		     the linker provides stubs when needed.  */
8018 		  xoperands[1] = gen_label_rtx ();
8019 		  output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
8020 		  output_asm_insn ("addil L'%0-%l1,%%r1", xoperands);
8021 		  targetm.asm_out.internal_label (asm_out_file, "L",
8022 					     CODE_LABEL_NUMBER (xoperands[1]));
8023 		  output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands);
8024 		}
8025 	      else if (TARGET_GAS && !TARGET_SOM
8026 		       && (TARGET_LONG_PIC_PCREL_CALL || local_call))
8027 		{
8028 		  /*  GAS currently can't generate the relocations that
8029 		      are needed for the SOM linker under HP-UX using this
8030 		      sequence.  The GNU linker doesn't generate the stubs
8031 		      that are needed for external calls on TARGET_ELF32
8032 		      with this sequence.  For now, we have to use a
8033 		      longer plabel sequence when using GAS.  */
8034 		  output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
8035 		  output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1",
8036 				   xoperands);
8037 		  output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1",
8038 				   xoperands);
8039 		}
8040 	      else
8041 		{
8042 		  /* Emit a long plabel-based call sequence.  This is
8043 		     essentially an inline implementation of $$dyncall.
8044 		     We don't actually try to call $$dyncall as this is
8045 		     as difficult as calling the function itself.  */
8046 		  xoperands[0] = pa_get_deferred_plabel (call_dest);
8047 		  xoperands[1] = gen_label_rtx ();
8048 
8049 		  /* Since the call is indirect, FP arguments in registers
8050 		     need to be copied to the general registers.  Then, the
8051 		     argument relocation stub will copy them back.  */
8052 		  if (TARGET_SOM)
8053 		    copy_fp_args (insn);
8054 
8055 		  if (flag_pic)
8056 		    {
8057 		      output_asm_insn ("addil LT'%0,%%r19", xoperands);
8058 		      output_asm_insn ("ldw RT'%0(%%r1),%%r1", xoperands);
8059 		      output_asm_insn ("ldw 0(%%r1),%%r1", xoperands);
8060 		    }
8061 		  else
8062 		    {
8063 		      output_asm_insn ("addil LR'%0-$global$,%%r27",
8064 				       xoperands);
8065 		      output_asm_insn ("ldw RR'%0-$global$(%%r1),%%r1",
8066 				       xoperands);
8067 		    }
8068 
8069 		  output_asm_insn ("bb,>=,n %%r1,30,.+16", xoperands);
8070 		  output_asm_insn ("depi 0,31,2,%%r1", xoperands);
8071 		  output_asm_insn ("ldw 4(%%sr0,%%r1),%%r19", xoperands);
8072 		  output_asm_insn ("ldw 0(%%sr0,%%r1),%%r1", xoperands);
8073 
8074 		  if (!sibcall && !TARGET_PA_20)
8075 		    {
8076 		      output_asm_insn ("{bl|b,l} .+8,%%r2", xoperands);
8077 		      if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic))
8078 			output_asm_insn ("addi 8,%%r2,%%r2", xoperands);
8079 		      else
8080 			output_asm_insn ("addi 16,%%r2,%%r2", xoperands);
8081 		    }
8082 		}
8083 
8084 	      if (TARGET_PA_20)
8085 		{
8086 		  if (sibcall)
8087 		    output_asm_insn ("bve (%%r1)", xoperands);
8088 		  else
8089 		    {
8090 		      if (indirect_call)
8091 			{
8092 			  output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
8093 			  output_asm_insn ("stw %%r2,-24(%%sp)", xoperands);
8094 			  seq_length = 1;
8095 			}
8096 		      else
8097 			output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
8098 		    }
8099 		}
8100 	      else
8101 		{
8102 		  if (!TARGET_NO_SPACE_REGS && (!local_call || flag_pic))
8103 		    output_asm_insn ("ldsid (%%r1),%%r31\n\tmtsp %%r31,%%sr0",
8104 				     xoperands);
8105 
8106 		  if (sibcall)
8107 		    {
8108 		      if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic))
8109 			output_asm_insn ("be 0(%%sr4,%%r1)", xoperands);
8110 		      else
8111 			output_asm_insn ("be 0(%%sr0,%%r1)", xoperands);
8112 		    }
8113 		  else
8114 		    {
8115 		      if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic))
8116 			output_asm_insn ("ble 0(%%sr4,%%r1)", xoperands);
8117 		      else
8118 			output_asm_insn ("ble 0(%%sr0,%%r1)", xoperands);
8119 
8120 		      if (indirect_call)
8121 			output_asm_insn ("stw %%r31,-24(%%sp)", xoperands);
8122 		      else
8123 			output_asm_insn ("copy %%r31,%%r2", xoperands);
8124 		      seq_length = 1;
8125 		    }
8126 		}
8127 	    }
8128 	}
8129     }
8130 
8131   if (seq_length == 0)
8132     output_asm_insn ("nop", xoperands);
8133 
8134   return "";
8135 }
8136 
8137 /* Return the attribute length of the indirect call instruction INSN.
8138    The length must match the code generated by output_indirect call.
8139    The returned length includes the delay slot.  Currently, the delay
8140    slot of an indirect call sequence is not exposed and it is used by
8141    the sequence itself.  */
8142 
8143 int
pa_attr_length_indirect_call(rtx_insn * insn)8144 pa_attr_length_indirect_call (rtx_insn *insn)
8145 {
8146   unsigned long distance = -1;
8147   unsigned long total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
8148 
8149   if (INSN_ADDRESSES_SET_P ())
8150     {
8151       distance = (total + insn_current_reference_address (insn));
8152       if (distance < total)
8153 	distance = -1;
8154     }
8155 
8156   if (TARGET_64BIT)
8157     return 12;
8158 
8159   if (TARGET_FAST_INDIRECT_CALLS
8160       || (!TARGET_LONG_CALLS
8161 	  && !TARGET_PORTABLE_RUNTIME
8162 	  && ((TARGET_PA_20 && !TARGET_SOM && distance < 7600000)
8163 	      || distance < MAX_PCREL17F_OFFSET)))
8164     return 8;
8165 
8166   if (flag_pic)
8167     return 20;
8168 
8169   if (TARGET_PORTABLE_RUNTIME)
8170     return 16;
8171 
8172   /* Out of reach, can use ble.  */
8173   return 12;
8174 }
8175 
8176 const char *
pa_output_indirect_call(rtx_insn * insn,rtx call_dest)8177 pa_output_indirect_call (rtx_insn *insn, rtx call_dest)
8178 {
8179   rtx xoperands[1];
8180 
8181   if (TARGET_64BIT)
8182     {
8183       xoperands[0] = call_dest;
8184       output_asm_insn ("ldd 16(%0),%%r2", xoperands);
8185       output_asm_insn ("bve,l (%%r2),%%r2\n\tldd 24(%0),%%r27", xoperands);
8186       return "";
8187     }
8188 
8189   /* First the special case for kernels, level 0 systems, etc.  */
8190   if (TARGET_FAST_INDIRECT_CALLS)
8191     return "ble 0(%%sr4,%%r22)\n\tcopy %%r31,%%r2";
8192 
8193   /* Now the normal case -- we can reach $$dyncall directly or
8194      we're sure that we can get there via a long-branch stub.
8195 
8196      No need to check target flags as the length uniquely identifies
8197      the remaining cases.  */
8198   if (pa_attr_length_indirect_call (insn) == 8)
8199     {
8200       /* The HP linker sometimes substitutes a BLE for BL/B,L calls to
8201 	 $$dyncall.  Since BLE uses %r31 as the link register, the 22-bit
8202 	 variant of the B,L instruction can't be used on the SOM target.  */
8203       if (TARGET_PA_20 && !TARGET_SOM)
8204 	return ".CALL\tARGW0=GR\n\tb,l $$dyncall,%%r2\n\tcopy %%r2,%%r31";
8205       else
8206 	return ".CALL\tARGW0=GR\n\tbl $$dyncall,%%r31\n\tcopy %%r31,%%r2";
8207     }
8208 
8209   /* Long millicode call, but we are not generating PIC or portable runtime
8210      code.  */
8211   if (pa_attr_length_indirect_call (insn) == 12)
8212     return ".CALL\tARGW0=GR\n\tldil L'$$dyncall,%%r2\n\tble R'$$dyncall(%%sr4,%%r2)\n\tcopy %%r31,%%r2";
8213 
8214   /* Long millicode call for portable runtime.  */
8215   if (pa_attr_length_indirect_call (insn) == 16)
8216     return "ldil L'$$dyncall,%%r31\n\tldo R'$$dyncall(%%r31),%%r31\n\tblr %%r0,%%r2\n\tbv,n %%r0(%%r31)";
8217 
8218   /* We need a long PIC call to $$dyncall.  */
8219   xoperands[0] = NULL_RTX;
8220   output_asm_insn ("{bl|b,l} .+8,%%r2", xoperands);
8221   if (TARGET_SOM || !TARGET_GAS)
8222     {
8223       xoperands[0] = gen_label_rtx ();
8224       output_asm_insn ("addil L'$$dyncall-%0,%%r2", xoperands);
8225       targetm.asm_out.internal_label (asm_out_file, "L",
8226 				      CODE_LABEL_NUMBER (xoperands[0]));
8227       output_asm_insn ("ldo R'$$dyncall-%0(%%r1),%%r1", xoperands);
8228     }
8229   else
8230     {
8231       output_asm_insn ("addil L'$$dyncall-$PIC_pcrel$0+4,%%r2", xoperands);
8232       output_asm_insn ("ldo R'$$dyncall-$PIC_pcrel$0+8(%%r1),%%r1",
8233 		       xoperands);
8234     }
8235   output_asm_insn ("bv %%r0(%%r1)", xoperands);
8236   output_asm_insn ("ldo 12(%%r2),%%r2", xoperands);
8237   return "";
8238 }
8239 
8240 /* In HPUX 8.0's shared library scheme, special relocations are needed
8241    for function labels if they might be passed to a function
8242    in a shared library (because shared libraries don't live in code
8243    space), and special magic is needed to construct their address.  */
8244 
8245 void
pa_encode_label(rtx sym)8246 pa_encode_label (rtx sym)
8247 {
8248   const char *str = XSTR (sym, 0);
8249   int len = strlen (str) + 1;
8250   char *newstr, *p;
8251 
8252   p = newstr = XALLOCAVEC (char, len + 1);
8253   *p++ = '@';
8254   strcpy (p, str);
8255 
8256   XSTR (sym, 0) = ggc_alloc_string (newstr, len);
8257 }
8258 
8259 static void
pa_encode_section_info(tree decl,rtx rtl,int first)8260 pa_encode_section_info (tree decl, rtx rtl, int first)
8261 {
8262   int old_referenced = 0;
8263 
8264   if (!first && MEM_P (rtl) && GET_CODE (XEXP (rtl, 0)) == SYMBOL_REF)
8265     old_referenced
8266       = SYMBOL_REF_FLAGS (XEXP (rtl, 0)) & SYMBOL_FLAG_REFERENCED;
8267 
8268   default_encode_section_info (decl, rtl, first);
8269 
8270   if (first && TEXT_SPACE_P (decl))
8271     {
8272       SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
8273       if (TREE_CODE (decl) == FUNCTION_DECL)
8274 	pa_encode_label (XEXP (rtl, 0));
8275     }
8276   else if (old_referenced)
8277     SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= old_referenced;
8278 }
8279 
8280 /* This is sort of inverse to pa_encode_section_info.  */
8281 
8282 static const char *
pa_strip_name_encoding(const char * str)8283 pa_strip_name_encoding (const char *str)
8284 {
8285   str += (*str == '@');
8286   str += (*str == '*');
8287   return str;
8288 }
8289 
8290 /* Returns 1 if OP is a function label involved in a simple addition
8291    with a constant.  Used to keep certain patterns from matching
8292    during instruction combination.  */
8293 int
pa_is_function_label_plus_const(rtx op)8294 pa_is_function_label_plus_const (rtx op)
8295 {
8296   /* Strip off any CONST.  */
8297   if (GET_CODE (op) == CONST)
8298     op = XEXP (op, 0);
8299 
8300   return (GET_CODE (op) == PLUS
8301 	  && function_label_operand (XEXP (op, 0), VOIDmode)
8302 	  && GET_CODE (XEXP (op, 1)) == CONST_INT);
8303 }
8304 
8305 /* Output assembly code for a thunk to FUNCTION.  */
8306 
8307 static void
pa_asm_output_mi_thunk(FILE * file,tree thunk_fndecl,HOST_WIDE_INT delta,HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,tree function)8308 pa_asm_output_mi_thunk (FILE *file, tree thunk_fndecl, HOST_WIDE_INT delta,
8309 			HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
8310 			tree function)
8311 {
8312   static unsigned int current_thunk_number;
8313   int val_14 = VAL_14_BITS_P (delta);
8314   unsigned int old_last_address = last_address, nbytes = 0;
8315   char label[16];
8316   rtx xoperands[4];
8317 
8318   xoperands[0] = XEXP (DECL_RTL (function), 0);
8319   xoperands[1] = XEXP (DECL_RTL (thunk_fndecl), 0);
8320   xoperands[2] = GEN_INT (delta);
8321 
8322   final_start_function (emit_barrier (), file, 1);
8323 
8324   /* Output the thunk.  We know that the function is in the same
8325      translation unit (i.e., the same space) as the thunk, and that
8326      thunks are output after their method.  Thus, we don't need an
8327      external branch to reach the function.  With SOM and GAS,
8328      functions and thunks are effectively in different sections.
8329      Thus, we can always use a IA-relative branch and the linker
8330      will add a long branch stub if necessary.
8331 
8332      However, we have to be careful when generating PIC code on the
8333      SOM port to ensure that the sequence does not transfer to an
8334      import stub for the target function as this could clobber the
8335      return value saved at SP-24.  This would also apply to the
8336      32-bit linux port if the multi-space model is implemented.  */
8337   if ((!TARGET_LONG_CALLS && TARGET_SOM && !TARGET_PORTABLE_RUNTIME
8338        && !(flag_pic && TREE_PUBLIC (function))
8339        && (TARGET_GAS || last_address < 262132))
8340       || (!TARGET_LONG_CALLS && !TARGET_SOM && !TARGET_PORTABLE_RUNTIME
8341 	  && ((targetm_common.have_named_sections
8342 	       && DECL_SECTION_NAME (thunk_fndecl) != NULL
8343 	       /* The GNU 64-bit linker has rather poor stub management.
8344 		  So, we use a long branch from thunks that aren't in
8345 		  the same section as the target function.  */
8346 	       && ((!TARGET_64BIT
8347 		    && (DECL_SECTION_NAME (thunk_fndecl)
8348 			!= DECL_SECTION_NAME (function)))
8349 		   || ((DECL_SECTION_NAME (thunk_fndecl)
8350 			== DECL_SECTION_NAME (function))
8351 		       && last_address < 262132)))
8352 	      /* In this case, we need to be able to reach the start of
8353 		 the stub table even though the function is likely closer
8354 		 and can be jumped to directly.  */
8355 	      || (targetm_common.have_named_sections
8356 		  && DECL_SECTION_NAME (thunk_fndecl) == NULL
8357 		  && DECL_SECTION_NAME (function) == NULL
8358 		  && total_code_bytes < MAX_PCREL17F_OFFSET)
8359 	      /* Likewise.  */
8360 	      || (!targetm_common.have_named_sections
8361 		  && total_code_bytes < MAX_PCREL17F_OFFSET))))
8362     {
8363       if (!val_14)
8364 	output_asm_insn ("addil L'%2,%%r26", xoperands);
8365 
8366       output_asm_insn ("b %0", xoperands);
8367 
8368       if (val_14)
8369 	{
8370 	  output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8371 	  nbytes += 8;
8372 	}
8373       else
8374 	{
8375 	  output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8376 	  nbytes += 12;
8377 	}
8378     }
8379   else if (TARGET_64BIT)
8380     {
8381       /* We only have one call-clobbered scratch register, so we can't
8382          make use of the delay slot if delta doesn't fit in 14 bits.  */
8383       if (!val_14)
8384 	{
8385 	  output_asm_insn ("addil L'%2,%%r26", xoperands);
8386 	  output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8387 	}
8388 
8389       output_asm_insn ("b,l .+8,%%r1", xoperands);
8390 
8391       if (TARGET_GAS)
8392 	{
8393 	  output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1", xoperands);
8394 	  output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands);
8395 	}
8396       else
8397 	{
8398 	  xoperands[3] = GEN_INT (val_14 ? 8 : 16);
8399 	  output_asm_insn ("addil L'%0-%1-%3,%%r1", xoperands);
8400 	}
8401 
8402       if (val_14)
8403 	{
8404 	  output_asm_insn ("bv %%r0(%%r1)", xoperands);
8405 	  output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8406 	  nbytes += 20;
8407 	}
8408       else
8409 	{
8410 	  output_asm_insn ("bv,n %%r0(%%r1)", xoperands);
8411 	  nbytes += 24;
8412 	}
8413     }
8414   else if (TARGET_PORTABLE_RUNTIME)
8415     {
8416       output_asm_insn ("ldil L'%0,%%r1", xoperands);
8417       output_asm_insn ("ldo R'%0(%%r1),%%r22", xoperands);
8418 
8419       if (!val_14)
8420 	output_asm_insn ("addil L'%2,%%r26", xoperands);
8421 
8422       output_asm_insn ("bv %%r0(%%r22)", xoperands);
8423 
8424       if (val_14)
8425 	{
8426 	  output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8427 	  nbytes += 16;
8428 	}
8429       else
8430 	{
8431 	  output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8432 	  nbytes += 20;
8433 	}
8434     }
8435   else if (TARGET_SOM && flag_pic && TREE_PUBLIC (function))
8436     {
8437       /* The function is accessible from outside this module.  The only
8438 	 way to avoid an import stub between the thunk and function is to
8439 	 call the function directly with an indirect sequence similar to
8440 	 that used by $$dyncall.  This is possible because $$dyncall acts
8441 	 as the import stub in an indirect call.  */
8442       ASM_GENERATE_INTERNAL_LABEL (label, "LTHN", current_thunk_number);
8443       xoperands[3] = gen_rtx_SYMBOL_REF (Pmode, label);
8444       output_asm_insn ("addil LT'%3,%%r19", xoperands);
8445       output_asm_insn ("ldw RT'%3(%%r1),%%r22", xoperands);
8446       output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands);
8447       output_asm_insn ("bb,>=,n %%r22,30,.+16", xoperands);
8448       output_asm_insn ("depi 0,31,2,%%r22", xoperands);
8449       output_asm_insn ("ldw 4(%%sr0,%%r22),%%r19", xoperands);
8450       output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands);
8451 
8452       if (!val_14)
8453 	{
8454 	  output_asm_insn ("addil L'%2,%%r26", xoperands);
8455 	  nbytes += 4;
8456 	}
8457 
8458       if (TARGET_PA_20)
8459 	{
8460 	  output_asm_insn ("bve (%%r22)", xoperands);
8461 	  nbytes += 36;
8462 	}
8463       else if (TARGET_NO_SPACE_REGS)
8464 	{
8465 	  output_asm_insn ("be 0(%%sr4,%%r22)", xoperands);
8466 	  nbytes += 36;
8467 	}
8468       else
8469 	{
8470 	  output_asm_insn ("ldsid (%%sr0,%%r22),%%r21", xoperands);
8471 	  output_asm_insn ("mtsp %%r21,%%sr0", xoperands);
8472 	  output_asm_insn ("be 0(%%sr0,%%r22)", xoperands);
8473 	  nbytes += 44;
8474 	}
8475 
8476       if (val_14)
8477 	output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8478       else
8479 	output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8480     }
8481   else if (flag_pic)
8482     {
8483       output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
8484 
8485       if (TARGET_SOM || !TARGET_GAS)
8486 	{
8487 	  output_asm_insn ("addil L'%0-%1-8,%%r1", xoperands);
8488 	  output_asm_insn ("ldo R'%0-%1-8(%%r1),%%r22", xoperands);
8489 	}
8490       else
8491 	{
8492 	  output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1", xoperands);
8493 	  output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r22", xoperands);
8494 	}
8495 
8496       if (!val_14)
8497 	output_asm_insn ("addil L'%2,%%r26", xoperands);
8498 
8499       output_asm_insn ("bv %%r0(%%r22)", xoperands);
8500 
8501       if (val_14)
8502 	{
8503 	  output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8504 	  nbytes += 20;
8505 	}
8506       else
8507 	{
8508 	  output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8509 	  nbytes += 24;
8510 	}
8511     }
8512   else
8513     {
8514       if (!val_14)
8515 	output_asm_insn ("addil L'%2,%%r26", xoperands);
8516 
8517       output_asm_insn ("ldil L'%0,%%r22", xoperands);
8518       output_asm_insn ("be R'%0(%%sr4,%%r22)", xoperands);
8519 
8520       if (val_14)
8521 	{
8522 	  output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8523 	  nbytes += 12;
8524 	}
8525       else
8526 	{
8527 	  output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8528 	  nbytes += 16;
8529 	}
8530     }
8531 
8532   final_end_function ();
8533 
8534   if (TARGET_SOM && flag_pic && TREE_PUBLIC (function))
8535     {
8536       switch_to_section (data_section);
8537       output_asm_insn (".align 4", xoperands);
8538       ASM_OUTPUT_LABEL (file, label);
8539       output_asm_insn (".word P'%0", xoperands);
8540     }
8541 
8542   current_thunk_number++;
8543   nbytes = ((nbytes + FUNCTION_BOUNDARY / BITS_PER_UNIT - 1)
8544 	    & ~(FUNCTION_BOUNDARY / BITS_PER_UNIT - 1));
8545   last_address += nbytes;
8546   if (old_last_address > last_address)
8547     last_address = UINT_MAX;
8548   update_total_code_bytes (nbytes);
8549 }
8550 
8551 /* Only direct calls to static functions are allowed to be sibling (tail)
8552    call optimized.
8553 
8554    This restriction is necessary because some linker generated stubs will
8555    store return pointers into rp' in some cases which might clobber a
8556    live value already in rp'.
8557 
8558    In a sibcall the current function and the target function share stack
8559    space.  Thus if the path to the current function and the path to the
8560    target function save a value in rp', they save the value into the
8561    same stack slot, which has undesirable consequences.
8562 
8563    Because of the deferred binding nature of shared libraries any function
8564    with external scope could be in a different load module and thus require
8565    rp' to be saved when calling that function.  So sibcall optimizations
8566    can only be safe for static function.
8567 
8568    Note that GCC never needs return value relocations, so we don't have to
8569    worry about static calls with return value relocations (which require
8570    saving rp').
8571 
8572    It is safe to perform a sibcall optimization when the target function
8573    will never return.  */
8574 static bool
pa_function_ok_for_sibcall(tree decl,tree exp ATTRIBUTE_UNUSED)8575 pa_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
8576 {
8577   if (TARGET_PORTABLE_RUNTIME)
8578     return false;
8579 
8580   /* Sibcalls are not ok because the arg pointer register is not a fixed
8581      register.  This prevents the sibcall optimization from occurring.  In
8582      addition, there are problems with stub placement using GNU ld.  This
8583      is because a normal sibcall branch uses a 17-bit relocation while
8584      a regular call branch uses a 22-bit relocation.  As a result, more
8585      care needs to be taken in the placement of long-branch stubs.  */
8586   if (TARGET_64BIT)
8587     return false;
8588 
8589   /* Sibcalls are only ok within a translation unit.  */
8590   return (decl && !TREE_PUBLIC (decl));
8591 }
8592 
8593 /* ??? Addition is not commutative on the PA due to the weird implicit
8594    space register selection rules for memory addresses.  Therefore, we
8595    don't consider a + b == b + a, as this might be inside a MEM.  */
8596 static bool
pa_commutative_p(const_rtx x,int outer_code)8597 pa_commutative_p (const_rtx x, int outer_code)
8598 {
8599   return (COMMUTATIVE_P (x)
8600 	  && (TARGET_NO_SPACE_REGS
8601 	      || (outer_code != UNKNOWN && outer_code != MEM)
8602 	      || GET_CODE (x) != PLUS));
8603 }
8604 
8605 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
8606    use in fmpyadd instructions.  */
8607 int
pa_fmpyaddoperands(rtx * operands)8608 pa_fmpyaddoperands (rtx *operands)
8609 {
8610   machine_mode mode = GET_MODE (operands[0]);
8611 
8612   /* Must be a floating point mode.  */
8613   if (mode != SFmode && mode != DFmode)
8614     return 0;
8615 
8616   /* All modes must be the same.  */
8617   if (! (mode == GET_MODE (operands[1])
8618 	 && mode == GET_MODE (operands[2])
8619 	 && mode == GET_MODE (operands[3])
8620 	 && mode == GET_MODE (operands[4])
8621 	 && mode == GET_MODE (operands[5])))
8622     return 0;
8623 
8624   /* All operands must be registers.  */
8625   if (! (GET_CODE (operands[1]) == REG
8626 	 && GET_CODE (operands[2]) == REG
8627 	 && GET_CODE (operands[3]) == REG
8628 	 && GET_CODE (operands[4]) == REG
8629 	 && GET_CODE (operands[5]) == REG))
8630     return 0;
8631 
8632   /* Only 2 real operands to the addition.  One of the input operands must
8633      be the same as the output operand.  */
8634   if (! rtx_equal_p (operands[3], operands[4])
8635       && ! rtx_equal_p (operands[3], operands[5]))
8636     return 0;
8637 
8638   /* Inout operand of add cannot conflict with any operands from multiply.  */
8639   if (rtx_equal_p (operands[3], operands[0])
8640      || rtx_equal_p (operands[3], operands[1])
8641      || rtx_equal_p (operands[3], operands[2]))
8642     return 0;
8643 
8644   /* multiply cannot feed into addition operands.  */
8645   if (rtx_equal_p (operands[4], operands[0])
8646       || rtx_equal_p (operands[5], operands[0]))
8647     return 0;
8648 
8649   /* SFmode limits the registers to the upper 32 of the 32bit FP regs.  */
8650   if (mode == SFmode
8651       && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS
8652 	  || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS
8653 	  || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS
8654 	  || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS
8655 	  || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS
8656 	  || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS))
8657     return 0;
8658 
8659   /* Passed.  Operands are suitable for fmpyadd.  */
8660   return 1;
8661 }
8662 
8663 #if !defined(USE_COLLECT2)
8664 static void
pa_asm_out_constructor(rtx symbol,int priority)8665 pa_asm_out_constructor (rtx symbol, int priority)
8666 {
8667   if (!function_label_operand (symbol, VOIDmode))
8668     pa_encode_label (symbol);
8669 
8670 #ifdef CTORS_SECTION_ASM_OP
8671   default_ctor_section_asm_out_constructor (symbol, priority);
8672 #else
8673 # ifdef TARGET_ASM_NAMED_SECTION
8674   default_named_section_asm_out_constructor (symbol, priority);
8675 # else
8676   default_stabs_asm_out_constructor (symbol, priority);
8677 # endif
8678 #endif
8679 }
8680 
8681 static void
pa_asm_out_destructor(rtx symbol,int priority)8682 pa_asm_out_destructor (rtx symbol, int priority)
8683 {
8684   if (!function_label_operand (symbol, VOIDmode))
8685     pa_encode_label (symbol);
8686 
8687 #ifdef DTORS_SECTION_ASM_OP
8688   default_dtor_section_asm_out_destructor (symbol, priority);
8689 #else
8690 # ifdef TARGET_ASM_NAMED_SECTION
8691   default_named_section_asm_out_destructor (symbol, priority);
8692 # else
8693   default_stabs_asm_out_destructor (symbol, priority);
8694 # endif
8695 #endif
8696 }
8697 #endif
8698 
8699 /* This function places uninitialized global data in the bss section.
8700    The ASM_OUTPUT_ALIGNED_BSS macro needs to be defined to call this
8701    function on the SOM port to prevent uninitialized global data from
8702    being placed in the data section.  */
8703 
8704 void
pa_asm_output_aligned_bss(FILE * stream,const char * name,unsigned HOST_WIDE_INT size,unsigned int align)8705 pa_asm_output_aligned_bss (FILE *stream,
8706 			   const char *name,
8707 			   unsigned HOST_WIDE_INT size,
8708 			   unsigned int align)
8709 {
8710   switch_to_section (bss_section);
8711   fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
8712 
8713 #ifdef ASM_OUTPUT_TYPE_DIRECTIVE
8714   ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "object");
8715 #endif
8716 
8717 #ifdef ASM_OUTPUT_SIZE_DIRECTIVE
8718   ASM_OUTPUT_SIZE_DIRECTIVE (stream, name, size);
8719 #endif
8720 
8721   fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
8722   ASM_OUTPUT_LABEL (stream, name);
8723   fprintf (stream, "\t.block " HOST_WIDE_INT_PRINT_UNSIGNED"\n", size);
8724 }
8725 
8726 /* Both the HP and GNU assemblers under HP-UX provide a .comm directive
8727    that doesn't allow the alignment of global common storage to be directly
8728    specified.  The SOM linker aligns common storage based on the rounded
8729    value of the NUM_BYTES parameter in the .comm directive.  It's not
8730    possible to use the .align directive as it doesn't affect the alignment
8731    of the label associated with a .comm directive.  */
8732 
8733 void
pa_asm_output_aligned_common(FILE * stream,const char * name,unsigned HOST_WIDE_INT size,unsigned int align)8734 pa_asm_output_aligned_common (FILE *stream,
8735 			      const char *name,
8736 			      unsigned HOST_WIDE_INT size,
8737 			      unsigned int align)
8738 {
8739   unsigned int max_common_align;
8740 
8741   max_common_align = TARGET_64BIT ? 128 : (size >= 4096 ? 256 : 64);
8742   if (align > max_common_align)
8743     {
8744       warning (0, "alignment (%u) for %s exceeds maximum alignment "
8745 	       "for global common data.  Using %u",
8746 	       align / BITS_PER_UNIT, name, max_common_align / BITS_PER_UNIT);
8747       align = max_common_align;
8748     }
8749 
8750   switch_to_section (bss_section);
8751 
8752   assemble_name (stream, name);
8753   fprintf (stream, "\t.comm " HOST_WIDE_INT_PRINT_UNSIGNED"\n",
8754            MAX (size, align / BITS_PER_UNIT));
8755 }
8756 
8757 /* We can't use .comm for local common storage as the SOM linker effectively
8758    treats the symbol as universal and uses the same storage for local symbols
8759    with the same name in different object files.  The .block directive
8760    reserves an uninitialized block of storage.  However, it's not common
8761    storage.  Fortunately, GCC never requests common storage with the same
8762    name in any given translation unit.  */
8763 
8764 void
pa_asm_output_aligned_local(FILE * stream,const char * name,unsigned HOST_WIDE_INT size,unsigned int align)8765 pa_asm_output_aligned_local (FILE *stream,
8766 			     const char *name,
8767 			     unsigned HOST_WIDE_INT size,
8768 			     unsigned int align)
8769 {
8770   switch_to_section (bss_section);
8771   fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
8772 
8773 #ifdef LOCAL_ASM_OP
8774   fprintf (stream, "%s", LOCAL_ASM_OP);
8775   assemble_name (stream, name);
8776   fprintf (stream, "\n");
8777 #endif
8778 
8779   ASM_OUTPUT_LABEL (stream, name);
8780   fprintf (stream, "\t.block " HOST_WIDE_INT_PRINT_UNSIGNED"\n", size);
8781 }
8782 
8783 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
8784    use in fmpysub instructions.  */
8785 int
pa_fmpysuboperands(rtx * operands)8786 pa_fmpysuboperands (rtx *operands)
8787 {
8788   machine_mode mode = GET_MODE (operands[0]);
8789 
8790   /* Must be a floating point mode.  */
8791   if (mode != SFmode && mode != DFmode)
8792     return 0;
8793 
8794   /* All modes must be the same.  */
8795   if (! (mode == GET_MODE (operands[1])
8796 	 && mode == GET_MODE (operands[2])
8797 	 && mode == GET_MODE (operands[3])
8798 	 && mode == GET_MODE (operands[4])
8799 	 && mode == GET_MODE (operands[5])))
8800     return 0;
8801 
8802   /* All operands must be registers.  */
8803   if (! (GET_CODE (operands[1]) == REG
8804 	 && GET_CODE (operands[2]) == REG
8805 	 && GET_CODE (operands[3]) == REG
8806 	 && GET_CODE (operands[4]) == REG
8807 	 && GET_CODE (operands[5]) == REG))
8808     return 0;
8809 
8810   /* Only 2 real operands to the subtraction.  Subtraction is not a commutative
8811      operation, so operands[4] must be the same as operand[3].  */
8812   if (! rtx_equal_p (operands[3], operands[4]))
8813     return 0;
8814 
8815   /* multiply cannot feed into subtraction.  */
8816   if (rtx_equal_p (operands[5], operands[0]))
8817     return 0;
8818 
8819   /* Inout operand of sub cannot conflict with any operands from multiply.  */
8820   if (rtx_equal_p (operands[3], operands[0])
8821      || rtx_equal_p (operands[3], operands[1])
8822      || rtx_equal_p (operands[3], operands[2]))
8823     return 0;
8824 
8825   /* SFmode limits the registers to the upper 32 of the 32bit FP regs.  */
8826   if (mode == SFmode
8827       && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS
8828 	  || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS
8829 	  || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS
8830 	  || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS
8831 	  || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS
8832 	  || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS))
8833     return 0;
8834 
8835   /* Passed.  Operands are suitable for fmpysub.  */
8836   return 1;
8837 }
8838 
8839 /* Return 1 if the given constant is 2, 4, or 8.  These are the valid
8840    constants for a MULT embedded inside a memory address.  */
8841 int
pa_mem_shadd_constant_p(int val)8842 pa_mem_shadd_constant_p (int val)
8843 {
8844   if (val == 2 || val == 4 || val == 8)
8845     return 1;
8846   else
8847     return 0;
8848 }
8849 
8850 /* Return 1 if the given constant is 1, 2, or 3.  These are the valid
8851    constants for shadd instructions.  */
8852 int
pa_shadd_constant_p(int val)8853 pa_shadd_constant_p (int val)
8854 {
8855   if (val == 1 || val == 2 || val == 3)
8856     return 1;
8857   else
8858     return 0;
8859 }
8860 
8861 /* Return TRUE if INSN branches forward.  */
8862 
8863 static bool
forward_branch_p(rtx_insn * insn)8864 forward_branch_p (rtx_insn *insn)
8865 {
8866   rtx lab = JUMP_LABEL (insn);
8867 
8868   /* The INSN must have a jump label.  */
8869   gcc_assert (lab != NULL_RTX);
8870 
8871   if (INSN_ADDRESSES_SET_P ())
8872     return INSN_ADDRESSES (INSN_UID (lab)) > INSN_ADDRESSES (INSN_UID (insn));
8873 
8874   while (insn)
8875     {
8876       if (insn == lab)
8877 	return true;
8878       else
8879 	insn = NEXT_INSN (insn);
8880     }
8881 
8882   return false;
8883 }
8884 
8885 /* Output an unconditional move and branch insn.  */
8886 
8887 const char *
pa_output_parallel_movb(rtx * operands,rtx_insn * insn)8888 pa_output_parallel_movb (rtx *operands, rtx_insn *insn)
8889 {
8890   int length = get_attr_length (insn);
8891 
8892   /* These are the cases in which we win.  */
8893   if (length == 4)
8894     return "mov%I1b,tr %1,%0,%2";
8895 
8896   /* None of the following cases win, but they don't lose either.  */
8897   if (length == 8)
8898     {
8899       if (dbr_sequence_length () == 0)
8900 	{
8901 	  /* Nothing in the delay slot, fake it by putting the combined
8902 	     insn (the copy or add) in the delay slot of a bl.  */
8903 	  if (GET_CODE (operands[1]) == CONST_INT)
8904 	    return "b %2\n\tldi %1,%0";
8905 	  else
8906 	    return "b %2\n\tcopy %1,%0";
8907 	}
8908       else
8909 	{
8910 	  /* Something in the delay slot, but we've got a long branch.  */
8911 	  if (GET_CODE (operands[1]) == CONST_INT)
8912 	    return "ldi %1,%0\n\tb %2";
8913 	  else
8914 	    return "copy %1,%0\n\tb %2";
8915 	}
8916     }
8917 
8918   if (GET_CODE (operands[1]) == CONST_INT)
8919     output_asm_insn ("ldi %1,%0", operands);
8920   else
8921     output_asm_insn ("copy %1,%0", operands);
8922   return pa_output_lbranch (operands[2], insn, 1);
8923 }
8924 
8925 /* Output an unconditional add and branch insn.  */
8926 
8927 const char *
pa_output_parallel_addb(rtx * operands,rtx_insn * insn)8928 pa_output_parallel_addb (rtx *operands, rtx_insn *insn)
8929 {
8930   int length = get_attr_length (insn);
8931 
8932   /* To make life easy we want operand0 to be the shared input/output
8933      operand and operand1 to be the readonly operand.  */
8934   if (operands[0] == operands[1])
8935     operands[1] = operands[2];
8936 
8937   /* These are the cases in which we win.  */
8938   if (length == 4)
8939     return "add%I1b,tr %1,%0,%3";
8940 
8941   /* None of the following cases win, but they don't lose either.  */
8942   if (length == 8)
8943     {
8944       if (dbr_sequence_length () == 0)
8945 	/* Nothing in the delay slot, fake it by putting the combined
8946 	   insn (the copy or add) in the delay slot of a bl.  */
8947 	return "b %3\n\tadd%I1 %1,%0,%0";
8948       else
8949 	/* Something in the delay slot, but we've got a long branch.  */
8950 	return "add%I1 %1,%0,%0\n\tb %3";
8951     }
8952 
8953   output_asm_insn ("add%I1 %1,%0,%0", operands);
8954   return pa_output_lbranch (operands[3], insn, 1);
8955 }
8956 
8957 /* We use this hook to perform a PA specific optimization which is difficult
8958    to do in earlier passes.  */
8959 
8960 static void
pa_reorg(void)8961 pa_reorg (void)
8962 {
8963   remove_useless_addtr_insns (1);
8964 
8965   if (pa_cpu < PROCESSOR_8000)
8966     pa_combine_instructions ();
8967 }
8968 
8969 /* The PA has a number of odd instructions which can perform multiple
8970    tasks at once.  On first generation PA machines (PA1.0 and PA1.1)
8971    it may be profitable to combine two instructions into one instruction
8972    with two outputs.  It's not profitable PA2.0 machines because the
8973    two outputs would take two slots in the reorder buffers.
8974 
8975    This routine finds instructions which can be combined and combines
8976    them.  We only support some of the potential combinations, and we
8977    only try common ways to find suitable instructions.
8978 
8979       * addb can add two registers or a register and a small integer
8980       and jump to a nearby (+-8k) location.  Normally the jump to the
8981       nearby location is conditional on the result of the add, but by
8982       using the "true" condition we can make the jump unconditional.
8983       Thus addb can perform two independent operations in one insn.
8984 
8985       * movb is similar to addb in that it can perform a reg->reg
8986       or small immediate->reg copy and jump to a nearby (+-8k location).
8987 
8988       * fmpyadd and fmpysub can perform a FP multiply and either an
8989       FP add or FP sub if the operands of the multiply and add/sub are
8990       independent (there are other minor restrictions).  Note both
8991       the fmpy and fadd/fsub can in theory move to better spots according
8992       to data dependencies, but for now we require the fmpy stay at a
8993       fixed location.
8994 
8995       * Many of the memory operations can perform pre & post updates
8996       of index registers.  GCC's pre/post increment/decrement addressing
8997       is far too simple to take advantage of all the possibilities.  This
8998       pass may not be suitable since those insns may not be independent.
8999 
9000       * comclr can compare two ints or an int and a register, nullify
9001       the following instruction and zero some other register.  This
9002       is more difficult to use as it's harder to find an insn which
9003       will generate a comclr than finding something like an unconditional
9004       branch.  (conditional moves & long branches create comclr insns).
9005 
9006       * Most arithmetic operations can conditionally skip the next
9007       instruction.  They can be viewed as "perform this operation
9008       and conditionally jump to this nearby location" (where nearby
9009       is an insns away).  These are difficult to use due to the
9010       branch length restrictions.  */
9011 
9012 static void
pa_combine_instructions(void)9013 pa_combine_instructions (void)
9014 {
9015   rtx_insn *anchor;
9016 
9017   /* This can get expensive since the basic algorithm is on the
9018      order of O(n^2) (or worse).  Only do it for -O2 or higher
9019      levels of optimization.  */
9020   if (optimize < 2)
9021     return;
9022 
9023   /* Walk down the list of insns looking for "anchor" insns which
9024      may be combined with "floating" insns.  As the name implies,
9025      "anchor" instructions don't move, while "floating" insns may
9026      move around.  */
9027   rtx par = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, NULL_RTX, NULL_RTX));
9028   rtx_insn *new_rtx = make_insn_raw (par);
9029 
9030   for (anchor = get_insns (); anchor; anchor = NEXT_INSN (anchor))
9031     {
9032       enum attr_pa_combine_type anchor_attr;
9033       enum attr_pa_combine_type floater_attr;
9034 
9035       /* We only care about INSNs, JUMP_INSNs, and CALL_INSNs.
9036 	 Also ignore any special USE insns.  */
9037       if ((! NONJUMP_INSN_P (anchor) && ! JUMP_P (anchor) && ! CALL_P (anchor))
9038 	  || GET_CODE (PATTERN (anchor)) == USE
9039 	  || GET_CODE (PATTERN (anchor)) == CLOBBER)
9040 	continue;
9041 
9042       anchor_attr = get_attr_pa_combine_type (anchor);
9043       /* See if anchor is an insn suitable for combination.  */
9044       if (anchor_attr == PA_COMBINE_TYPE_FMPY
9045 	  || anchor_attr == PA_COMBINE_TYPE_FADDSUB
9046 	  || (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
9047 	      && ! forward_branch_p (anchor)))
9048 	{
9049 	  rtx_insn *floater;
9050 
9051 	  for (floater = PREV_INSN (anchor);
9052 	       floater;
9053 	       floater = PREV_INSN (floater))
9054 	    {
9055 	      if (NOTE_P (floater)
9056 		  || (NONJUMP_INSN_P (floater)
9057 		      && (GET_CODE (PATTERN (floater)) == USE
9058 			  || GET_CODE (PATTERN (floater)) == CLOBBER)))
9059 		continue;
9060 
9061 	      /* Anything except a regular INSN will stop our search.  */
9062 	      if (! NONJUMP_INSN_P (floater))
9063 		{
9064 		  floater = NULL;
9065 		  break;
9066 		}
9067 
9068 	      /* See if FLOATER is suitable for combination with the
9069 		 anchor.  */
9070 	      floater_attr = get_attr_pa_combine_type (floater);
9071 	      if ((anchor_attr == PA_COMBINE_TYPE_FMPY
9072 		   && floater_attr == PA_COMBINE_TYPE_FADDSUB)
9073 		  || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
9074 		      && floater_attr == PA_COMBINE_TYPE_FMPY))
9075 		{
9076 		  /* If ANCHOR and FLOATER can be combined, then we're
9077 		     done with this pass.  */
9078 		  if (pa_can_combine_p (new_rtx, anchor, floater, 0,
9079 					SET_DEST (PATTERN (floater)),
9080 					XEXP (SET_SRC (PATTERN (floater)), 0),
9081 					XEXP (SET_SRC (PATTERN (floater)), 1)))
9082 		    break;
9083 		}
9084 
9085 	      else if (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
9086 		       && floater_attr == PA_COMBINE_TYPE_ADDMOVE)
9087 		{
9088 		  if (GET_CODE (SET_SRC (PATTERN (floater))) == PLUS)
9089 		    {
9090 		      if (pa_can_combine_p (new_rtx, anchor, floater, 0,
9091 					    SET_DEST (PATTERN (floater)),
9092 					XEXP (SET_SRC (PATTERN (floater)), 0),
9093 					XEXP (SET_SRC (PATTERN (floater)), 1)))
9094 			break;
9095 		    }
9096 		  else
9097 		    {
9098 		      if (pa_can_combine_p (new_rtx, anchor, floater, 0,
9099 					    SET_DEST (PATTERN (floater)),
9100 					    SET_SRC (PATTERN (floater)),
9101 					    SET_SRC (PATTERN (floater))))
9102 			break;
9103 		    }
9104 		}
9105 	    }
9106 
9107 	  /* If we didn't find anything on the backwards scan try forwards.  */
9108 	  if (!floater
9109 	      && (anchor_attr == PA_COMBINE_TYPE_FMPY
9110 		  || anchor_attr == PA_COMBINE_TYPE_FADDSUB))
9111 	    {
9112 	      for (floater = anchor; floater; floater = NEXT_INSN (floater))
9113 		{
9114 		  if (NOTE_P (floater)
9115 		      || (NONJUMP_INSN_P (floater)
9116 			  && (GET_CODE (PATTERN (floater)) == USE
9117 			      || GET_CODE (PATTERN (floater)) == CLOBBER)))
9118 
9119 		    continue;
9120 
9121 		  /* Anything except a regular INSN will stop our search.  */
9122 		  if (! NONJUMP_INSN_P (floater))
9123 		    {
9124 		      floater = NULL;
9125 		      break;
9126 		    }
9127 
9128 		  /* See if FLOATER is suitable for combination with the
9129 		     anchor.  */
9130 		  floater_attr = get_attr_pa_combine_type (floater);
9131 		  if ((anchor_attr == PA_COMBINE_TYPE_FMPY
9132 		       && floater_attr == PA_COMBINE_TYPE_FADDSUB)
9133 		      || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
9134 			  && floater_attr == PA_COMBINE_TYPE_FMPY))
9135 		    {
9136 		      /* If ANCHOR and FLOATER can be combined, then we're
9137 			 done with this pass.  */
9138 		      if (pa_can_combine_p (new_rtx, anchor, floater, 1,
9139 					    SET_DEST (PATTERN (floater)),
9140 					    XEXP (SET_SRC (PATTERN (floater)),
9141 						  0),
9142 					    XEXP (SET_SRC (PATTERN (floater)),
9143 						  1)))
9144 			break;
9145 		    }
9146 		}
9147 	    }
9148 
9149 	  /* FLOATER will be nonzero if we found a suitable floating
9150 	     insn for combination with ANCHOR.  */
9151 	  if (floater
9152 	      && (anchor_attr == PA_COMBINE_TYPE_FADDSUB
9153 		  || anchor_attr == PA_COMBINE_TYPE_FMPY))
9154 	    {
9155 	      /* Emit the new instruction and delete the old anchor.  */
9156 	      emit_insn_before (gen_rtx_PARALLEL
9157 				(VOIDmode,
9158 				 gen_rtvec (2, PATTERN (anchor),
9159 					    PATTERN (floater))),
9160 				anchor);
9161 
9162 	      SET_INSN_DELETED (anchor);
9163 
9164 	      /* Emit a special USE insn for FLOATER, then delete
9165 		 the floating insn.  */
9166 	      emit_insn_before (gen_rtx_USE (VOIDmode, floater), floater);
9167 	      delete_insn (floater);
9168 
9169 	      continue;
9170 	    }
9171 	  else if (floater
9172 		   && anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH)
9173 	    {
9174 	      rtx temp;
9175 	      /* Emit the new_jump instruction and delete the old anchor.  */
9176 	      temp
9177 		= emit_jump_insn_before (gen_rtx_PARALLEL
9178 					 (VOIDmode,
9179 					  gen_rtvec (2, PATTERN (anchor),
9180 						     PATTERN (floater))),
9181 					 anchor);
9182 
9183 	      JUMP_LABEL (temp) = JUMP_LABEL (anchor);
9184 	      SET_INSN_DELETED (anchor);
9185 
9186 	      /* Emit a special USE insn for FLOATER, then delete
9187 		 the floating insn.  */
9188 	      emit_insn_before (gen_rtx_USE (VOIDmode, floater), floater);
9189 	      delete_insn (floater);
9190 	      continue;
9191 	    }
9192 	}
9193     }
9194 }
9195 
9196 static int
pa_can_combine_p(rtx_insn * new_rtx,rtx_insn * anchor,rtx_insn * floater,int reversed,rtx dest,rtx src1,rtx src2)9197 pa_can_combine_p (rtx_insn *new_rtx, rtx_insn *anchor, rtx_insn *floater,
9198 		  int reversed, rtx dest,
9199 		  rtx src1, rtx src2)
9200 {
9201   int insn_code_number;
9202   rtx_insn *start, *end;
9203 
9204   /* Create a PARALLEL with the patterns of ANCHOR and
9205      FLOATER, try to recognize it, then test constraints
9206      for the resulting pattern.
9207 
9208      If the pattern doesn't match or the constraints
9209      aren't met keep searching for a suitable floater
9210      insn.  */
9211   XVECEXP (PATTERN (new_rtx), 0, 0) = PATTERN (anchor);
9212   XVECEXP (PATTERN (new_rtx), 0, 1) = PATTERN (floater);
9213   INSN_CODE (new_rtx) = -1;
9214   insn_code_number = recog_memoized (new_rtx);
9215   basic_block bb = BLOCK_FOR_INSN (anchor);
9216   if (insn_code_number < 0
9217       || (extract_insn (new_rtx),
9218 	  !constrain_operands (1, get_preferred_alternatives (new_rtx, bb))))
9219     return 0;
9220 
9221   if (reversed)
9222     {
9223       start = anchor;
9224       end = floater;
9225     }
9226   else
9227     {
9228       start = floater;
9229       end = anchor;
9230     }
9231 
9232   /* There's up to three operands to consider.  One
9233      output and two inputs.
9234 
9235      The output must not be used between FLOATER & ANCHOR
9236      exclusive.  The inputs must not be set between
9237      FLOATER and ANCHOR exclusive.  */
9238 
9239   if (reg_used_between_p (dest, start, end))
9240     return 0;
9241 
9242   if (reg_set_between_p (src1, start, end))
9243     return 0;
9244 
9245   if (reg_set_between_p (src2, start, end))
9246     return 0;
9247 
9248   /* If we get here, then everything is good.  */
9249   return 1;
9250 }
9251 
9252 /* Return nonzero if references for INSN are delayed.
9253 
9254    Millicode insns are actually function calls with some special
9255    constraints on arguments and register usage.
9256 
9257    Millicode calls always expect their arguments in the integer argument
9258    registers, and always return their result in %r29 (ret1).  They
9259    are expected to clobber their arguments, %r1, %r29, and the return
9260    pointer which is %r31 on 32-bit and %r2 on 64-bit, and nothing else.
9261 
9262    This function tells reorg that the references to arguments and
9263    millicode calls do not appear to happen until after the millicode call.
9264    This allows reorg to put insns which set the argument registers into the
9265    delay slot of the millicode call -- thus they act more like traditional
9266    CALL_INSNs.
9267 
9268    Note we cannot consider side effects of the insn to be delayed because
9269    the branch and link insn will clobber the return pointer.  If we happened
9270    to use the return pointer in the delay slot of the call, then we lose.
9271 
9272    get_attr_type will try to recognize the given insn, so make sure to
9273    filter out things it will not accept -- SEQUENCE, USE and CLOBBER insns
9274    in particular.  */
9275 int
pa_insn_refs_are_delayed(rtx_insn * insn)9276 pa_insn_refs_are_delayed (rtx_insn *insn)
9277 {
9278   return ((NONJUMP_INSN_P (insn)
9279 	   && GET_CODE (PATTERN (insn)) != SEQUENCE
9280 	   && GET_CODE (PATTERN (insn)) != USE
9281 	   && GET_CODE (PATTERN (insn)) != CLOBBER
9282 	   && get_attr_type (insn) == TYPE_MILLI));
9283 }
9284 
9285 /* Promote the return value, but not the arguments.  */
9286 
9287 static machine_mode
pa_promote_function_mode(const_tree type ATTRIBUTE_UNUSED,machine_mode mode,int * punsignedp ATTRIBUTE_UNUSED,const_tree fntype ATTRIBUTE_UNUSED,int for_return)9288 pa_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
9289                           machine_mode mode,
9290                           int *punsignedp ATTRIBUTE_UNUSED,
9291                           const_tree fntype ATTRIBUTE_UNUSED,
9292                           int for_return)
9293 {
9294   if (for_return == 0)
9295     return mode;
9296   return promote_mode (type, mode, punsignedp);
9297 }
9298 
9299 /* On the HP-PA the value is found in register(s) 28(-29), unless
9300    the mode is SF or DF. Then the value is returned in fr4 (32).
9301 
9302    This must perform the same promotions as PROMOTE_MODE, else promoting
9303    return values in TARGET_PROMOTE_FUNCTION_MODE will not work correctly.
9304 
9305    Small structures must be returned in a PARALLEL on PA64 in order
9306    to match the HP Compiler ABI.  */
9307 
9308 static rtx
pa_function_value(const_tree valtype,const_tree func ATTRIBUTE_UNUSED,bool outgoing ATTRIBUTE_UNUSED)9309 pa_function_value (const_tree valtype,
9310                    const_tree func ATTRIBUTE_UNUSED,
9311                    bool outgoing ATTRIBUTE_UNUSED)
9312 {
9313   machine_mode valmode;
9314 
9315   if (AGGREGATE_TYPE_P (valtype)
9316       || TREE_CODE (valtype) == COMPLEX_TYPE
9317       || TREE_CODE (valtype) == VECTOR_TYPE)
9318     {
9319       HOST_WIDE_INT valsize = int_size_in_bytes (valtype);
9320 
9321       /* Handle aggregates that fit exactly in a word or double word.  */
9322       if ((valsize & (UNITS_PER_WORD - 1)) == 0)
9323 	return gen_rtx_REG (TYPE_MODE (valtype), 28);
9324 
9325       if (TARGET_64BIT)
9326 	{
9327           /* Aggregates with a size less than or equal to 128 bits are
9328 	     returned in GR 28(-29).  They are left justified.  The pad
9329 	     bits are undefined.  Larger aggregates are returned in
9330 	     memory.  */
9331 	  rtx loc[2];
9332 	  int i, offset = 0;
9333 	  int ub = valsize <= UNITS_PER_WORD ? 1 : 2;
9334 
9335 	  for (i = 0; i < ub; i++)
9336 	    {
9337 	      loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
9338 					  gen_rtx_REG (DImode, 28 + i),
9339 					  GEN_INT (offset));
9340 	      offset += 8;
9341 	    }
9342 
9343 	  return gen_rtx_PARALLEL (BLKmode, gen_rtvec_v (ub, loc));
9344 	}
9345       else if (valsize > UNITS_PER_WORD)
9346 	{
9347 	  /* Aggregates 5 to 8 bytes in size are returned in general
9348 	     registers r28-r29 in the same manner as other non
9349 	     floating-point objects.  The data is right-justified and
9350 	     zero-extended to 64 bits.  This is opposite to the normal
9351 	     justification used on big endian targets and requires
9352 	     special treatment.  */
9353 	  rtx loc = gen_rtx_EXPR_LIST (VOIDmode,
9354 				       gen_rtx_REG (DImode, 28), const0_rtx);
9355 	  return gen_rtx_PARALLEL (BLKmode, gen_rtvec (1, loc));
9356 	}
9357     }
9358 
9359   if ((INTEGRAL_TYPE_P (valtype)
9360        && GET_MODE_BITSIZE (TYPE_MODE (valtype)) < BITS_PER_WORD)
9361       || POINTER_TYPE_P (valtype))
9362     valmode = word_mode;
9363   else
9364     valmode = TYPE_MODE (valtype);
9365 
9366   if (TREE_CODE (valtype) == REAL_TYPE
9367       && !AGGREGATE_TYPE_P (valtype)
9368       && TYPE_MODE (valtype) != TFmode
9369       && !TARGET_SOFT_FLOAT)
9370     return gen_rtx_REG (valmode, 32);
9371 
9372   return gen_rtx_REG (valmode, 28);
9373 }
9374 
9375 /* Implement the TARGET_LIBCALL_VALUE hook.  */
9376 
9377 static rtx
pa_libcall_value(machine_mode mode,const_rtx fun ATTRIBUTE_UNUSED)9378 pa_libcall_value (machine_mode mode,
9379 		  const_rtx fun ATTRIBUTE_UNUSED)
9380 {
9381   if (! TARGET_SOFT_FLOAT
9382       && (mode == SFmode || mode == DFmode))
9383     return  gen_rtx_REG (mode, 32);
9384   else
9385     return  gen_rtx_REG (mode, 28);
9386 }
9387 
9388 /* Implement the TARGET_FUNCTION_VALUE_REGNO_P hook.  */
9389 
9390 static bool
pa_function_value_regno_p(const unsigned int regno)9391 pa_function_value_regno_p (const unsigned int regno)
9392 {
9393   if (regno == 28
9394       || (! TARGET_SOFT_FLOAT &&  regno == 32))
9395     return true;
9396 
9397   return false;
9398 }
9399 
9400 /* Update the data in CUM to advance over an argument
9401    of mode MODE and data type TYPE.
9402    (TYPE is null for libcalls where that information may not be available.)  */
9403 
9404 static void
pa_function_arg_advance(cumulative_args_t cum_v,machine_mode mode,const_tree type,bool named ATTRIBUTE_UNUSED)9405 pa_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
9406 			 const_tree type, bool named ATTRIBUTE_UNUSED)
9407 {
9408   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
9409   int arg_size = FUNCTION_ARG_SIZE (mode, type);
9410 
9411   cum->nargs_prototype--;
9412   cum->words += (arg_size
9413 		 + ((cum->words & 01)
9414 		    && type != NULL_TREE
9415 		    && arg_size > 1));
9416 }
9417 
9418 /* Return the location of a parameter that is passed in a register or NULL
9419    if the parameter has any component that is passed in memory.
9420 
9421    This is new code and will be pushed to into the net sources after
9422    further testing.
9423 
9424    ??? We might want to restructure this so that it looks more like other
9425    ports.  */
9426 static rtx
pa_function_arg(cumulative_args_t cum_v,machine_mode mode,const_tree type,bool named ATTRIBUTE_UNUSED)9427 pa_function_arg (cumulative_args_t cum_v, machine_mode mode,
9428 		 const_tree type, bool named ATTRIBUTE_UNUSED)
9429 {
9430   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
9431   int max_arg_words = (TARGET_64BIT ? 8 : 4);
9432   int alignment = 0;
9433   int arg_size;
9434   int fpr_reg_base;
9435   int gpr_reg_base;
9436   rtx retval;
9437 
9438   if (mode == VOIDmode)
9439     return NULL_RTX;
9440 
9441   arg_size = FUNCTION_ARG_SIZE (mode, type);
9442 
9443   /* If this arg would be passed partially or totally on the stack, then
9444      this routine should return zero.  pa_arg_partial_bytes will
9445      handle arguments which are split between regs and stack slots if
9446      the ABI mandates split arguments.  */
9447   if (!TARGET_64BIT)
9448     {
9449       /* The 32-bit ABI does not split arguments.  */
9450       if (cum->words + arg_size > max_arg_words)
9451 	return NULL_RTX;
9452     }
9453   else
9454     {
9455       if (arg_size > 1)
9456 	alignment = cum->words & 1;
9457       if (cum->words + alignment >= max_arg_words)
9458 	return NULL_RTX;
9459     }
9460 
9461   /* The 32bit ABIs and the 64bit ABIs are rather different,
9462      particularly in their handling of FP registers.  We might
9463      be able to cleverly share code between them, but I'm not
9464      going to bother in the hope that splitting them up results
9465      in code that is more easily understood.  */
9466 
9467   if (TARGET_64BIT)
9468     {
9469       /* Advance the base registers to their current locations.
9470 
9471          Remember, gprs grow towards smaller register numbers while
9472 	 fprs grow to higher register numbers.  Also remember that
9473 	 although FP regs are 32-bit addressable, we pretend that
9474 	 the registers are 64-bits wide.  */
9475       gpr_reg_base = 26 - cum->words;
9476       fpr_reg_base = 32 + cum->words;
9477 
9478       /* Arguments wider than one word and small aggregates need special
9479 	 treatment.  */
9480       if (arg_size > 1
9481 	  || mode == BLKmode
9482 	  || (type && (AGGREGATE_TYPE_P (type)
9483 		       || TREE_CODE (type) == COMPLEX_TYPE
9484 		       || TREE_CODE (type) == VECTOR_TYPE)))
9485 	{
9486 	  /* Double-extended precision (80-bit), quad-precision (128-bit)
9487 	     and aggregates including complex numbers are aligned on
9488 	     128-bit boundaries.  The first eight 64-bit argument slots
9489 	     are associated one-to-one, with general registers r26
9490 	     through r19, and also with floating-point registers fr4
9491 	     through fr11.  Arguments larger than one word are always
9492 	     passed in general registers.
9493 
9494 	     Using a PARALLEL with a word mode register results in left
9495 	     justified data on a big-endian target.  */
9496 
9497 	  rtx loc[8];
9498 	  int i, offset = 0, ub = arg_size;
9499 
9500 	  /* Align the base register.  */
9501 	  gpr_reg_base -= alignment;
9502 
9503 	  ub = MIN (ub, max_arg_words - cum->words - alignment);
9504 	  for (i = 0; i < ub; i++)
9505 	    {
9506 	      loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
9507 					  gen_rtx_REG (DImode, gpr_reg_base),
9508 					  GEN_INT (offset));
9509 	      gpr_reg_base -= 1;
9510 	      offset += 8;
9511 	    }
9512 
9513 	  return gen_rtx_PARALLEL (mode, gen_rtvec_v (ub, loc));
9514 	}
9515      }
9516   else
9517     {
9518       /* If the argument is larger than a word, then we know precisely
9519 	 which registers we must use.  */
9520       if (arg_size > 1)
9521 	{
9522 	  if (cum->words)
9523 	    {
9524 	      gpr_reg_base = 23;
9525 	      fpr_reg_base = 38;
9526 	    }
9527 	  else
9528 	    {
9529 	      gpr_reg_base = 25;
9530 	      fpr_reg_base = 34;
9531 	    }
9532 
9533 	  /* Structures 5 to 8 bytes in size are passed in the general
9534 	     registers in the same manner as other non floating-point
9535 	     objects.  The data is right-justified and zero-extended
9536 	     to 64 bits.  This is opposite to the normal justification
9537 	     used on big endian targets and requires special treatment.
9538 	     We now define BLOCK_REG_PADDING to pad these objects.
9539 	     Aggregates, complex and vector types are passed in the same
9540 	     manner as structures.  */
9541 	  if (mode == BLKmode
9542 	      || (type && (AGGREGATE_TYPE_P (type)
9543 			   || TREE_CODE (type) == COMPLEX_TYPE
9544 			   || TREE_CODE (type) == VECTOR_TYPE)))
9545 	    {
9546 	      rtx loc = gen_rtx_EXPR_LIST (VOIDmode,
9547 					   gen_rtx_REG (DImode, gpr_reg_base),
9548 					   const0_rtx);
9549 	      return gen_rtx_PARALLEL (BLKmode, gen_rtvec (1, loc));
9550 	    }
9551 	}
9552       else
9553         {
9554 	   /* We have a single word (32 bits).  A simple computation
9555 	      will get us the register #s we need.  */
9556 	   gpr_reg_base = 26 - cum->words;
9557 	   fpr_reg_base = 32 + 2 * cum->words;
9558 	}
9559     }
9560 
9561   /* Determine if the argument needs to be passed in both general and
9562      floating point registers.  */
9563   if (((TARGET_PORTABLE_RUNTIME || TARGET_64BIT || TARGET_ELF32)
9564        /* If we are doing soft-float with portable runtime, then there
9565 	  is no need to worry about FP regs.  */
9566        && !TARGET_SOFT_FLOAT
9567        /* The parameter must be some kind of scalar float, else we just
9568 	  pass it in integer registers.  */
9569        && GET_MODE_CLASS (mode) == MODE_FLOAT
9570        /* The target function must not have a prototype.  */
9571        && cum->nargs_prototype <= 0
9572        /* libcalls do not need to pass items in both FP and general
9573 	  registers.  */
9574        && type != NULL_TREE
9575        /* All this hair applies to "outgoing" args only.  This includes
9576 	  sibcall arguments setup with FUNCTION_INCOMING_ARG.  */
9577        && !cum->incoming)
9578       /* Also pass outgoing floating arguments in both registers in indirect
9579 	 calls with the 32 bit ABI and the HP assembler since there is no
9580 	 way to the specify argument locations in static functions.  */
9581       || (!TARGET_64BIT
9582 	  && !TARGET_GAS
9583 	  && !cum->incoming
9584 	  && cum->indirect
9585 	  && GET_MODE_CLASS (mode) == MODE_FLOAT))
9586     {
9587       retval
9588 	= gen_rtx_PARALLEL
9589 	    (mode,
9590 	     gen_rtvec (2,
9591 			gen_rtx_EXPR_LIST (VOIDmode,
9592 					   gen_rtx_REG (mode, fpr_reg_base),
9593 					   const0_rtx),
9594 			gen_rtx_EXPR_LIST (VOIDmode,
9595 					   gen_rtx_REG (mode, gpr_reg_base),
9596 					   const0_rtx)));
9597     }
9598   else
9599     {
9600       /* See if we should pass this parameter in a general register.  */
9601       if (TARGET_SOFT_FLOAT
9602 	  /* Indirect calls in the normal 32bit ABI require all arguments
9603 	     to be passed in general registers.  */
9604 	  || (!TARGET_PORTABLE_RUNTIME
9605 	      && !TARGET_64BIT
9606 	      && !TARGET_ELF32
9607 	      && cum->indirect)
9608 	  /* If the parameter is not a scalar floating-point parameter,
9609 	     then it belongs in GPRs.  */
9610 	  || GET_MODE_CLASS (mode) != MODE_FLOAT
9611 	  /* Structure with single SFmode field belongs in GPR.  */
9612 	  || (type && AGGREGATE_TYPE_P (type)))
9613 	retval = gen_rtx_REG (mode, gpr_reg_base);
9614       else
9615 	retval = gen_rtx_REG (mode, fpr_reg_base);
9616     }
9617   return retval;
9618 }
9619 
9620 /* Arguments larger than one word are double word aligned.  */
9621 
9622 static unsigned int
pa_function_arg_boundary(machine_mode mode,const_tree type)9623 pa_function_arg_boundary (machine_mode mode, const_tree type)
9624 {
9625   bool singleword = (type
9626 		     ? (integer_zerop (TYPE_SIZE (type))
9627 			|| !TREE_CONSTANT (TYPE_SIZE (type))
9628 			|| int_size_in_bytes (type) <= UNITS_PER_WORD)
9629 		     : GET_MODE_SIZE (mode) <= UNITS_PER_WORD);
9630 
9631   return singleword ? PARM_BOUNDARY : MAX_PARM_BOUNDARY;
9632 }
9633 
9634 /* If this arg would be passed totally in registers or totally on the stack,
9635    then this routine should return zero.  */
9636 
9637 static int
pa_arg_partial_bytes(cumulative_args_t cum_v,machine_mode mode,tree type,bool named ATTRIBUTE_UNUSED)9638 pa_arg_partial_bytes (cumulative_args_t cum_v, machine_mode mode,
9639 		      tree type, bool named ATTRIBUTE_UNUSED)
9640 {
9641   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
9642   unsigned int max_arg_words = 8;
9643   unsigned int offset = 0;
9644 
9645   if (!TARGET_64BIT)
9646     return 0;
9647 
9648   if (FUNCTION_ARG_SIZE (mode, type) > 1 && (cum->words & 1))
9649     offset = 1;
9650 
9651   if (cum->words + offset + FUNCTION_ARG_SIZE (mode, type) <= max_arg_words)
9652     /* Arg fits fully into registers.  */
9653     return 0;
9654   else if (cum->words + offset >= max_arg_words)
9655     /* Arg fully on the stack.  */
9656     return 0;
9657   else
9658     /* Arg is split.  */
9659     return (max_arg_words - cum->words - offset) * UNITS_PER_WORD;
9660 }
9661 
9662 
9663 /* A get_unnamed_section callback for switching to the text section.
9664 
9665    This function is only used with SOM.  Because we don't support
9666    named subspaces, we can only create a new subspace or switch back
9667    to the default text subspace.  */
9668 
9669 static void
som_output_text_section_asm_op(const void * data ATTRIBUTE_UNUSED)9670 som_output_text_section_asm_op (const void *data ATTRIBUTE_UNUSED)
9671 {
9672   gcc_assert (TARGET_SOM);
9673   if (TARGET_GAS)
9674     {
9675       if (cfun && cfun->machine && !cfun->machine->in_nsubspa)
9676 	{
9677 	  /* We only want to emit a .nsubspa directive once at the
9678 	     start of the function.  */
9679 	  cfun->machine->in_nsubspa = 1;
9680 
9681 	  /* Create a new subspace for the text.  This provides
9682 	     better stub placement and one-only functions.  */
9683 	  if (cfun->decl
9684 	      && DECL_ONE_ONLY (cfun->decl)
9685 	      && !DECL_WEAK (cfun->decl))
9686 	    {
9687 	      output_section_asm_op ("\t.SPACE $TEXT$\n"
9688 				     "\t.NSUBSPA $CODE$,QUAD=0,ALIGN=8,"
9689 				     "ACCESS=44,SORT=24,COMDAT");
9690 	      return;
9691 	    }
9692 	}
9693       else
9694 	{
9695 	  /* There isn't a current function or the body of the current
9696 	     function has been completed.  So, we are changing to the
9697 	     text section to output debugging information.  Thus, we
9698 	     need to forget that we are in the text section so that
9699 	     varasm.c will call us when text_section is selected again.  */
9700 	  gcc_assert (!cfun || !cfun->machine
9701 		      || cfun->machine->in_nsubspa == 2);
9702 	  in_section = NULL;
9703 	}
9704       output_section_asm_op ("\t.SPACE $TEXT$\n\t.NSUBSPA $CODE$");
9705       return;
9706     }
9707   output_section_asm_op ("\t.SPACE $TEXT$\n\t.SUBSPA $CODE$");
9708 }
9709 
9710 /* A get_unnamed_section callback for switching to comdat data
9711    sections.  This function is only used with SOM.  */
9712 
9713 static void
som_output_comdat_data_section_asm_op(const void * data)9714 som_output_comdat_data_section_asm_op (const void *data)
9715 {
9716   in_section = NULL;
9717   output_section_asm_op (data);
9718 }
9719 
9720 /* Implement TARGET_ASM_INITIALIZE_SECTIONS  */
9721 
9722 static void
pa_som_asm_init_sections(void)9723 pa_som_asm_init_sections (void)
9724 {
9725   text_section
9726     = get_unnamed_section (0, som_output_text_section_asm_op, NULL);
9727 
9728   /* SOM puts readonly data in the default $LIT$ subspace when PIC code
9729      is not being generated.  */
9730   som_readonly_data_section
9731     = get_unnamed_section (0, output_section_asm_op,
9732 			   "\t.SPACE $TEXT$\n\t.SUBSPA $LIT$");
9733 
9734   /* When secondary definitions are not supported, SOM makes readonly
9735      data one-only by creating a new $LIT$ subspace in $TEXT$ with
9736      the comdat flag.  */
9737   som_one_only_readonly_data_section
9738     = get_unnamed_section (0, som_output_comdat_data_section_asm_op,
9739 			   "\t.SPACE $TEXT$\n"
9740 			   "\t.NSUBSPA $LIT$,QUAD=0,ALIGN=8,"
9741 			   "ACCESS=0x2c,SORT=16,COMDAT");
9742 
9743 
9744   /* When secondary definitions are not supported, SOM makes data one-only
9745      by creating a new $DATA$ subspace in $PRIVATE$ with the comdat flag.  */
9746   som_one_only_data_section
9747     = get_unnamed_section (SECTION_WRITE,
9748 			   som_output_comdat_data_section_asm_op,
9749 			   "\t.SPACE $PRIVATE$\n"
9750 			   "\t.NSUBSPA $DATA$,QUAD=1,ALIGN=8,"
9751 			   "ACCESS=31,SORT=24,COMDAT");
9752 
9753   if (flag_tm)
9754     som_tm_clone_table_section
9755       = get_unnamed_section (0, output_section_asm_op,
9756 			     "\t.SPACE $PRIVATE$\n\t.SUBSPA $TM_CLONE_TABLE$");
9757 
9758   /* FIXME: HPUX ld generates incorrect GOT entries for "T" fixups
9759      which reference data within the $TEXT$ space (for example constant
9760      strings in the $LIT$ subspace).
9761 
9762      The assemblers (GAS and HP as) both have problems with handling
9763      the difference of two symbols which is the other correct way to
9764      reference constant data during PIC code generation.
9765 
9766      So, there's no way to reference constant data which is in the
9767      $TEXT$ space during PIC generation.  Instead place all constant
9768      data into the $PRIVATE$ subspace (this reduces sharing, but it
9769      works correctly).  */
9770   readonly_data_section = flag_pic ? data_section : som_readonly_data_section;
9771 
9772   /* We must not have a reference to an external symbol defined in a
9773      shared library in a readonly section, else the SOM linker will
9774      complain.
9775 
9776      So, we force exception information into the data section.  */
9777   exception_section = data_section;
9778 }
9779 
9780 /* Implement TARGET_ASM_TM_CLONE_TABLE_SECTION.  */
9781 
9782 static section *
pa_som_tm_clone_table_section(void)9783 pa_som_tm_clone_table_section (void)
9784 {
9785   return som_tm_clone_table_section;
9786 }
9787 
9788 /* On hpux10, the linker will give an error if we have a reference
9789    in the read-only data section to a symbol defined in a shared
9790    library.  Therefore, expressions that might require a reloc can
9791    not be placed in the read-only data section.  */
9792 
9793 static section *
pa_select_section(tree exp,int reloc,unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)9794 pa_select_section (tree exp, int reloc,
9795 		   unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
9796 {
9797   if (TREE_CODE (exp) == VAR_DECL
9798       && TREE_READONLY (exp)
9799       && !TREE_THIS_VOLATILE (exp)
9800       && DECL_INITIAL (exp)
9801       && (DECL_INITIAL (exp) == error_mark_node
9802           || TREE_CONSTANT (DECL_INITIAL (exp)))
9803       && !reloc)
9804     {
9805       if (TARGET_SOM
9806 	  && DECL_ONE_ONLY (exp)
9807 	  && !DECL_WEAK (exp))
9808 	return som_one_only_readonly_data_section;
9809       else
9810 	return readonly_data_section;
9811     }
9812   else if (CONSTANT_CLASS_P (exp) && !reloc)
9813     return readonly_data_section;
9814   else if (TARGET_SOM
9815 	   && TREE_CODE (exp) == VAR_DECL
9816 	   && DECL_ONE_ONLY (exp)
9817 	   && !DECL_WEAK (exp))
9818     return som_one_only_data_section;
9819   else
9820     return data_section;
9821 }
9822 
9823 /* Implement pa_reloc_rw_mask.  */
9824 
9825 static int
pa_reloc_rw_mask(void)9826 pa_reloc_rw_mask (void)
9827 {
9828   /* We force (const (plus (symbol) (const_int))) to memory when the
9829      const_int doesn't fit in a 14-bit integer.  The SOM linker can't
9830      handle this construct in read-only memory and we want to avoid
9831      this for ELF.  So, we always force an RTX needing relocation to
9832      the data section.  */
9833   return 3;
9834 }
9835 
9836 static void
pa_globalize_label(FILE * stream,const char * name)9837 pa_globalize_label (FILE *stream, const char *name)
9838 {
9839   /* We only handle DATA objects here, functions are globalized in
9840      ASM_DECLARE_FUNCTION_NAME.  */
9841   if (! FUNCTION_NAME_P (name))
9842   {
9843     fputs ("\t.EXPORT ", stream);
9844     assemble_name (stream, name);
9845     fputs (",DATA\n", stream);
9846   }
9847 }
9848 
9849 /* Worker function for TARGET_STRUCT_VALUE_RTX.  */
9850 
9851 static rtx
pa_struct_value_rtx(tree fntype ATTRIBUTE_UNUSED,int incoming ATTRIBUTE_UNUSED)9852 pa_struct_value_rtx (tree fntype ATTRIBUTE_UNUSED,
9853 		     int incoming ATTRIBUTE_UNUSED)
9854 {
9855   return gen_rtx_REG (Pmode, PA_STRUCT_VALUE_REGNUM);
9856 }
9857 
9858 /* Worker function for TARGET_RETURN_IN_MEMORY.  */
9859 
9860 bool
pa_return_in_memory(const_tree type,const_tree fntype ATTRIBUTE_UNUSED)9861 pa_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
9862 {
9863   /* SOM ABI says that objects larger than 64 bits are returned in memory.
9864      PA64 ABI says that objects larger than 128 bits are returned in memory.
9865      Note, int_size_in_bytes can return -1 if the size of the object is
9866      variable or larger than the maximum value that can be expressed as
9867      a HOST_WIDE_INT.   It can also return zero for an empty type.  The
9868      simplest way to handle variable and empty types is to pass them in
9869      memory.  This avoids problems in defining the boundaries of argument
9870      slots, allocating registers, etc.  */
9871   return (int_size_in_bytes (type) > (TARGET_64BIT ? 16 : 8)
9872 	  || int_size_in_bytes (type) <= 0);
9873 }
9874 
9875 /* Structure to hold declaration and name of external symbols that are
9876    emitted by GCC.  We generate a vector of these symbols and output them
9877    at the end of the file if and only if SYMBOL_REF_REFERENCED_P is true.
9878    This avoids putting out names that are never really used.  */
9879 
9880 typedef struct GTY(()) extern_symbol
9881 {
9882   tree decl;
9883   const char *name;
9884 } extern_symbol;
9885 
9886 /* Define gc'd vector type for extern_symbol.  */
9887 
9888 /* Vector of extern_symbol pointers.  */
9889 static GTY(()) vec<extern_symbol, va_gc> *extern_symbols;
9890 
9891 #ifdef ASM_OUTPUT_EXTERNAL_REAL
9892 /* Mark DECL (name NAME) as an external reference (assembler output
9893    file FILE).  This saves the names to output at the end of the file
9894    if actually referenced.  */
9895 
9896 void
pa_hpux_asm_output_external(FILE * file,tree decl,const char * name)9897 pa_hpux_asm_output_external (FILE *file, tree decl, const char *name)
9898 {
9899   gcc_assert (file == asm_out_file);
9900   extern_symbol p = {decl, name};
9901   vec_safe_push (extern_symbols, p);
9902 }
9903 
9904 /* Output text required at the end of an assembler file.
9905    This includes deferred plabels and .import directives for
9906    all external symbols that were actually referenced.  */
9907 
9908 static void
pa_hpux_file_end(void)9909 pa_hpux_file_end (void)
9910 {
9911   unsigned int i;
9912   extern_symbol *p;
9913 
9914   if (!NO_DEFERRED_PROFILE_COUNTERS)
9915     output_deferred_profile_counters ();
9916 
9917   output_deferred_plabels ();
9918 
9919   for (i = 0; vec_safe_iterate (extern_symbols, i, &p); i++)
9920     {
9921       tree decl = p->decl;
9922 
9923       if (!TREE_ASM_WRITTEN (decl)
9924 	  && SYMBOL_REF_REFERENCED_P (XEXP (DECL_RTL (decl), 0)))
9925 	ASM_OUTPUT_EXTERNAL_REAL (asm_out_file, decl, p->name);
9926     }
9927 
9928   vec_free (extern_symbols);
9929 }
9930 #endif
9931 
9932 /* Return true if a change from mode FROM to mode TO for a register
9933    in register class RCLASS is invalid.  */
9934 
9935 bool
pa_cannot_change_mode_class(machine_mode from,machine_mode to,enum reg_class rclass)9936 pa_cannot_change_mode_class (machine_mode from, machine_mode to,
9937 			     enum reg_class rclass)
9938 {
9939   if (from == to)
9940     return false;
9941 
9942   if (GET_MODE_SIZE (from) == GET_MODE_SIZE (to))
9943     return false;
9944 
9945   /* Reject changes to/from modes with zero size.  */
9946   if (!GET_MODE_SIZE (from) || !GET_MODE_SIZE (to))
9947     return true;
9948 
9949   /* Reject changes to/from complex and vector modes.  */
9950   if (COMPLEX_MODE_P (from) || VECTOR_MODE_P (from)
9951       || COMPLEX_MODE_P (to) || VECTOR_MODE_P (to))
9952     return true;
9953 
9954   /* There is no way to load QImode or HImode values directly from memory
9955      to a FP register.  SImode loads to the FP registers are not zero
9956      extended.  On the 64-bit target, this conflicts with the definition
9957      of LOAD_EXTEND_OP.  Thus, we can't allow changing between modes with
9958      different sizes in the floating-point registers.  */
9959   if (MAYBE_FP_REG_CLASS_P (rclass))
9960     return true;
9961 
9962   /* HARD_REGNO_MODE_OK places modes with sizes larger than a word
9963      in specific sets of registers.  Thus, we cannot allow changing
9964      to a larger mode when it's larger than a word.  */
9965   if (GET_MODE_SIZE (to) > UNITS_PER_WORD
9966       && GET_MODE_SIZE (to) > GET_MODE_SIZE (from))
9967     return true;
9968 
9969   return false;
9970 }
9971 
9972 /* Returns TRUE if it is a good idea to tie two pseudo registers
9973    when one has mode MODE1 and one has mode MODE2.
9974    If HARD_REGNO_MODE_OK could produce different values for MODE1 and MODE2,
9975    for any hard reg, then this must be FALSE for correct output.
9976 
9977    We should return FALSE for QImode and HImode because these modes
9978    are not ok in the floating-point registers.  However, this prevents
9979    tieing these modes to SImode and DImode in the general registers.
9980    So, this isn't a good idea.  We rely on HARD_REGNO_MODE_OK and
9981    CANNOT_CHANGE_MODE_CLASS to prevent these modes from being used
9982    in the floating-point registers.  */
9983 
9984 bool
pa_modes_tieable_p(machine_mode mode1,machine_mode mode2)9985 pa_modes_tieable_p (machine_mode mode1, machine_mode mode2)
9986 {
9987   /* Don't tie modes in different classes.  */
9988   if (GET_MODE_CLASS (mode1) != GET_MODE_CLASS (mode2))
9989     return false;
9990 
9991   return true;
9992 }
9993 
9994 
9995 /* Length in units of the trampoline instruction code.  */
9996 
9997 #define TRAMPOLINE_CODE_SIZE (TARGET_64BIT ? 24 : (TARGET_PA_20 ? 32 : 40))
9998 
9999 
10000 /* Output assembler code for a block containing the constant parts
10001    of a trampoline, leaving space for the variable parts.\
10002 
10003    The trampoline sets the static chain pointer to STATIC_CHAIN_REGNUM
10004    and then branches to the specified routine.
10005 
10006    This code template is copied from text segment to stack location
10007    and then patched with pa_trampoline_init to contain valid values,
10008    and then entered as a subroutine.
10009 
10010    It is best to keep this as small as possible to avoid having to
10011    flush multiple lines in the cache.  */
10012 
10013 static void
pa_asm_trampoline_template(FILE * f)10014 pa_asm_trampoline_template (FILE *f)
10015 {
10016   if (!TARGET_64BIT)
10017     {
10018       fputs ("\tldw	36(%r22),%r21\n", f);
10019       fputs ("\tbb,>=,n	%r21,30,.+16\n", f);
10020       if (ASSEMBLER_DIALECT == 0)
10021 	fputs ("\tdepi	0,31,2,%r21\n", f);
10022       else
10023 	fputs ("\tdepwi	0,31,2,%r21\n", f);
10024       fputs ("\tldw	4(%r21),%r19\n", f);
10025       fputs ("\tldw	0(%r21),%r21\n", f);
10026       if (TARGET_PA_20)
10027 	{
10028 	  fputs ("\tbve	(%r21)\n", f);
10029 	  fputs ("\tldw	40(%r22),%r29\n", f);
10030 	  fputs ("\t.word	0\n", f);
10031 	  fputs ("\t.word	0\n", f);
10032 	}
10033       else
10034 	{
10035 	  fputs ("\tldsid	(%r21),%r1\n", f);
10036 	  fputs ("\tmtsp	%r1,%sr0\n", f);
10037 	  fputs ("\tbe	0(%sr0,%r21)\n", f);
10038 	  fputs ("\tldw	40(%r22),%r29\n", f);
10039 	}
10040       fputs ("\t.word	0\n", f);
10041       fputs ("\t.word	0\n", f);
10042       fputs ("\t.word	0\n", f);
10043       fputs ("\t.word	0\n", f);
10044     }
10045   else
10046     {
10047       fputs ("\t.dword 0\n", f);
10048       fputs ("\t.dword 0\n", f);
10049       fputs ("\t.dword 0\n", f);
10050       fputs ("\t.dword 0\n", f);
10051       fputs ("\tmfia	%r31\n", f);
10052       fputs ("\tldd	24(%r31),%r1\n", f);
10053       fputs ("\tldd	24(%r1),%r27\n", f);
10054       fputs ("\tldd	16(%r1),%r1\n", f);
10055       fputs ("\tbve	(%r1)\n", f);
10056       fputs ("\tldd	32(%r31),%r31\n", f);
10057       fputs ("\t.dword 0  ; fptr\n", f);
10058       fputs ("\t.dword 0  ; static link\n", f);
10059     }
10060 }
10061 
10062 /* Emit RTL insns to initialize the variable parts of a trampoline.
10063    FNADDR is an RTX for the address of the function's pure code.
10064    CXT is an RTX for the static chain value for the function.
10065 
10066    Move the function address to the trampoline template at offset 36.
10067    Move the static chain value to trampoline template at offset 40.
10068    Move the trampoline address to trampoline template at offset 44.
10069    Move r19 to trampoline template at offset 48.  The latter two
10070    words create a plabel for the indirect call to the trampoline.
10071 
10072    A similar sequence is used for the 64-bit port but the plabel is
10073    at the beginning of the trampoline.
10074 
10075    Finally, the cache entries for the trampoline code are flushed.
10076    This is necessary to ensure that the trampoline instruction sequence
10077    is written to memory prior to any attempts at prefetching the code
10078    sequence.  */
10079 
10080 static void
pa_trampoline_init(rtx m_tramp,tree fndecl,rtx chain_value)10081 pa_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
10082 {
10083   rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
10084   rtx start_addr = gen_reg_rtx (Pmode);
10085   rtx end_addr = gen_reg_rtx (Pmode);
10086   rtx line_length = gen_reg_rtx (Pmode);
10087   rtx r_tramp, tmp;
10088 
10089   emit_block_move (m_tramp, assemble_trampoline_template (),
10090 		   GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
10091   r_tramp = force_reg (Pmode, XEXP (m_tramp, 0));
10092 
10093   if (!TARGET_64BIT)
10094     {
10095       tmp = adjust_address (m_tramp, Pmode, 36);
10096       emit_move_insn (tmp, fnaddr);
10097       tmp = adjust_address (m_tramp, Pmode, 40);
10098       emit_move_insn (tmp, chain_value);
10099 
10100       /* Create a fat pointer for the trampoline.  */
10101       tmp = adjust_address (m_tramp, Pmode, 44);
10102       emit_move_insn (tmp, r_tramp);
10103       tmp = adjust_address (m_tramp, Pmode, 48);
10104       emit_move_insn (tmp, gen_rtx_REG (Pmode, 19));
10105 
10106       /* fdc and fic only use registers for the address to flush,
10107 	 they do not accept integer displacements.  We align the
10108 	 start and end addresses to the beginning of their respective
10109 	 cache lines to minimize the number of lines flushed.  */
10110       emit_insn (gen_andsi3 (start_addr, r_tramp,
10111 			     GEN_INT (-MIN_CACHELINE_SIZE)));
10112       tmp = force_reg (Pmode, plus_constant (Pmode, r_tramp,
10113 					     TRAMPOLINE_CODE_SIZE-1));
10114       emit_insn (gen_andsi3 (end_addr, tmp,
10115 			     GEN_INT (-MIN_CACHELINE_SIZE)));
10116       emit_move_insn (line_length, GEN_INT (MIN_CACHELINE_SIZE));
10117       emit_insn (gen_dcacheflushsi (start_addr, end_addr, line_length));
10118       emit_insn (gen_icacheflushsi (start_addr, end_addr, line_length,
10119 				    gen_reg_rtx (Pmode),
10120 				    gen_reg_rtx (Pmode)));
10121     }
10122   else
10123     {
10124       tmp = adjust_address (m_tramp, Pmode, 56);
10125       emit_move_insn (tmp, fnaddr);
10126       tmp = adjust_address (m_tramp, Pmode, 64);
10127       emit_move_insn (tmp, chain_value);
10128 
10129       /* Create a fat pointer for the trampoline.  */
10130       tmp = adjust_address (m_tramp, Pmode, 16);
10131       emit_move_insn (tmp, force_reg (Pmode, plus_constant (Pmode,
10132 							    r_tramp, 32)));
10133       tmp = adjust_address (m_tramp, Pmode, 24);
10134       emit_move_insn (tmp, gen_rtx_REG (Pmode, 27));
10135 
10136       /* fdc and fic only use registers for the address to flush,
10137 	 they do not accept integer displacements.  We align the
10138 	 start and end addresses to the beginning of their respective
10139 	 cache lines to minimize the number of lines flushed.  */
10140       tmp = force_reg (Pmode, plus_constant (Pmode, r_tramp, 32));
10141       emit_insn (gen_anddi3 (start_addr, tmp,
10142 			     GEN_INT (-MIN_CACHELINE_SIZE)));
10143       tmp = force_reg (Pmode, plus_constant (Pmode, tmp,
10144 					     TRAMPOLINE_CODE_SIZE - 1));
10145       emit_insn (gen_anddi3 (end_addr, tmp,
10146 			     GEN_INT (-MIN_CACHELINE_SIZE)));
10147       emit_move_insn (line_length, GEN_INT (MIN_CACHELINE_SIZE));
10148       emit_insn (gen_dcacheflushdi (start_addr, end_addr, line_length));
10149       emit_insn (gen_icacheflushdi (start_addr, end_addr, line_length,
10150 				    gen_reg_rtx (Pmode),
10151 				    gen_reg_rtx (Pmode)));
10152     }
10153 
10154 #ifdef HAVE_ENABLE_EXECUTE_STACK
10155   emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
10156 		     LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
10157 #endif
10158 }
10159 
10160 /* Perform any machine-specific adjustment in the address of the trampoline.
10161    ADDR contains the address that was passed to pa_trampoline_init.
10162    Adjust the trampoline address to point to the plabel at offset 44.  */
10163 
10164 static rtx
pa_trampoline_adjust_address(rtx addr)10165 pa_trampoline_adjust_address (rtx addr)
10166 {
10167   if (!TARGET_64BIT)
10168     addr = memory_address (Pmode, plus_constant (Pmode, addr, 46));
10169   return addr;
10170 }
10171 
10172 static rtx
pa_delegitimize_address(rtx orig_x)10173 pa_delegitimize_address (rtx orig_x)
10174 {
10175   rtx x = delegitimize_mem_from_attrs (orig_x);
10176 
10177   if (GET_CODE (x) == LO_SUM
10178       && GET_CODE (XEXP (x, 1)) == UNSPEC
10179       && XINT (XEXP (x, 1), 1) == UNSPEC_DLTIND14R)
10180     return gen_const_mem (Pmode, XVECEXP (XEXP (x, 1), 0, 0));
10181   return x;
10182 }
10183 
10184 static rtx
pa_internal_arg_pointer(void)10185 pa_internal_arg_pointer (void)
10186 {
10187   /* The argument pointer and the hard frame pointer are the same in
10188      the 32-bit runtime, so we don't need a copy.  */
10189   if (TARGET_64BIT)
10190     return copy_to_reg (virtual_incoming_args_rtx);
10191   else
10192     return virtual_incoming_args_rtx;
10193 }
10194 
10195 /* Given FROM and TO register numbers, say whether this elimination is allowed.
10196    Frame pointer elimination is automatically handled.  */
10197 
10198 static bool
pa_can_eliminate(const int from,const int to)10199 pa_can_eliminate (const int from, const int to)
10200 {
10201   /* The argument cannot be eliminated in the 64-bit runtime.  */
10202   if (TARGET_64BIT && from == ARG_POINTER_REGNUM)
10203     return false;
10204 
10205   return (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
10206           ? ! frame_pointer_needed
10207           : true);
10208 }
10209 
10210 /* Define the offset between two registers, FROM to be eliminated and its
10211    replacement TO, at the start of a routine.  */
10212 HOST_WIDE_INT
pa_initial_elimination_offset(int from,int to)10213 pa_initial_elimination_offset (int from, int to)
10214 {
10215   HOST_WIDE_INT offset;
10216 
10217   if ((from == HARD_FRAME_POINTER_REGNUM || from == FRAME_POINTER_REGNUM)
10218       && to == STACK_POINTER_REGNUM)
10219     offset = -pa_compute_frame_size (get_frame_size (), 0);
10220   else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
10221     offset = 0;
10222   else
10223     gcc_unreachable ();
10224 
10225   return offset;
10226 }
10227 
10228 static void
pa_conditional_register_usage(void)10229 pa_conditional_register_usage (void)
10230 {
10231   int i;
10232 
10233   if (!TARGET_64BIT && !TARGET_PA_11)
10234     {
10235       for (i = 56; i <= FP_REG_LAST; i++)
10236 	fixed_regs[i] = call_used_regs[i] = 1;
10237       for (i = 33; i < 56; i += 2)
10238 	fixed_regs[i] = call_used_regs[i] = 1;
10239     }
10240   if (TARGET_DISABLE_FPREGS || TARGET_SOFT_FLOAT)
10241     {
10242       for (i = FP_REG_FIRST; i <= FP_REG_LAST; i++)
10243 	fixed_regs[i] = call_used_regs[i] = 1;
10244     }
10245   if (flag_pic)
10246     fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
10247 }
10248 
10249 /* Target hook for c_mode_for_suffix.  */
10250 
10251 static machine_mode
pa_c_mode_for_suffix(char suffix)10252 pa_c_mode_for_suffix (char suffix)
10253 {
10254   if (HPUX_LONG_DOUBLE_LIBRARY)
10255     {
10256       if (suffix == 'q')
10257 	return TFmode;
10258     }
10259 
10260   return VOIDmode;
10261 }
10262 
10263 /* Target hook for function_section.  */
10264 
10265 static section *
pa_function_section(tree decl,enum node_frequency freq,bool startup,bool exit)10266 pa_function_section (tree decl, enum node_frequency freq,
10267 		     bool startup, bool exit)
10268 {
10269   /* Put functions in text section if target doesn't have named sections.  */
10270   if (!targetm_common.have_named_sections)
10271     return text_section;
10272 
10273   /* Force nested functions into the same section as the containing
10274      function.  */
10275   if (decl
10276       && DECL_SECTION_NAME (decl) == NULL
10277       && DECL_CONTEXT (decl) != NULL_TREE
10278       && TREE_CODE (DECL_CONTEXT (decl)) == FUNCTION_DECL
10279       && DECL_SECTION_NAME (DECL_CONTEXT (decl)) == NULL)
10280     return function_section (DECL_CONTEXT (decl));
10281 
10282   /* Otherwise, use the default function section.  */
10283   return default_function_section (decl, freq, startup, exit);
10284 }
10285 
10286 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
10287 
10288    In 64-bit mode, we reject CONST_DOUBLES.  We also reject CONST_INTS
10289    that need more than three instructions to load prior to reload.  This
10290    limit is somewhat arbitrary.  It takes three instructions to load a
10291    CONST_INT from memory but two are memory accesses.  It may be better
10292    to increase the allowed range for CONST_INTS.  We may also be able
10293    to handle CONST_DOUBLES.  */
10294 
10295 static bool
pa_legitimate_constant_p(machine_mode mode,rtx x)10296 pa_legitimate_constant_p (machine_mode mode, rtx x)
10297 {
10298   if (GET_MODE_CLASS (mode) == MODE_FLOAT && x != CONST0_RTX (mode))
10299     return false;
10300 
10301   if (!NEW_HP_ASSEMBLER && !TARGET_GAS && GET_CODE (x) == LABEL_REF)
10302     return false;
10303 
10304   /* TLS_MODEL_GLOBAL_DYNAMIC and TLS_MODEL_LOCAL_DYNAMIC are not
10305      legitimate constants.  The other variants can't be handled by
10306      the move patterns after reload starts.  */
10307   if (tls_referenced_p (x))
10308     return false;
10309 
10310   if (TARGET_64BIT && GET_CODE (x) == CONST_DOUBLE)
10311     return false;
10312 
10313   if (TARGET_64BIT
10314       && HOST_BITS_PER_WIDE_INT > 32
10315       && GET_CODE (x) == CONST_INT
10316       && !reload_in_progress
10317       && !reload_completed
10318       && !LEGITIMATE_64BIT_CONST_INT_P (INTVAL (x))
10319       && !pa_cint_ok_for_move (UINTVAL (x)))
10320     return false;
10321 
10322   if (function_label_operand (x, mode))
10323     return false;
10324 
10325   return true;
10326 }
10327 
10328 /* Implement TARGET_SECTION_TYPE_FLAGS.  */
10329 
10330 static unsigned int
pa_section_type_flags(tree decl,const char * name,int reloc)10331 pa_section_type_flags (tree decl, const char *name, int reloc)
10332 {
10333   unsigned int flags;
10334 
10335   flags = default_section_type_flags (decl, name, reloc);
10336 
10337   /* Function labels are placed in the constant pool.  This can
10338      cause a section conflict if decls are put in ".data.rel.ro"
10339      or ".data.rel.ro.local" using the __attribute__ construct.  */
10340   if (strcmp (name, ".data.rel.ro") == 0
10341       || strcmp (name, ".data.rel.ro.local") == 0)
10342     flags |= SECTION_WRITE | SECTION_RELRO;
10343 
10344   return flags;
10345 }
10346 
10347 /* pa_legitimate_address_p recognizes an RTL expression that is a
10348    valid memory address for an instruction.  The MODE argument is the
10349    machine mode for the MEM expression that wants to use this address.
10350 
10351    On HP PA-RISC, the legitimate address forms are REG+SMALLINT,
10352    REG+REG, and REG+(REG*SCALE).  The indexed address forms are only
10353    available with floating point loads and stores, and integer loads.
10354    We get better code by allowing indexed addresses in the initial
10355    RTL generation.
10356 
10357    The acceptance of indexed addresses as legitimate implies that we
10358    must provide patterns for doing indexed integer stores, or the move
10359    expanders must force the address of an indexed store to a register.
10360    We have adopted the latter approach.
10361 
10362    Another function of pa_legitimate_address_p is to ensure that
10363    the base register is a valid pointer for indexed instructions.
10364    On targets that have non-equivalent space registers, we have to
10365    know at the time of assembler output which register in a REG+REG
10366    pair is the base register.  The REG_POINTER flag is sometimes lost
10367    in reload and the following passes, so it can't be relied on during
10368    code generation.  Thus, we either have to canonicalize the order
10369    of the registers in REG+REG indexed addresses, or treat REG+REG
10370    addresses separately and provide patterns for both permutations.
10371 
10372    The latter approach requires several hundred additional lines of
10373    code in pa.md.  The downside to canonicalizing is that a PLUS
10374    in the wrong order can't combine to form to make a scaled indexed
10375    memory operand.  As we won't need to canonicalize the operands if
10376    the REG_POINTER lossage can be fixed, it seems better canonicalize.
10377 
10378    We initially break out scaled indexed addresses in canonical order
10379    in pa_emit_move_sequence.  LEGITIMIZE_ADDRESS also canonicalizes
10380    scaled indexed addresses during RTL generation.  However, fold_rtx
10381    has its own opinion on how the operands of a PLUS should be ordered.
10382    If one of the operands is equivalent to a constant, it will make
10383    that operand the second operand.  As the base register is likely to
10384    be equivalent to a SYMBOL_REF, we have made it the second operand.
10385 
10386    pa_legitimate_address_p accepts REG+REG as legitimate when the
10387    operands are in the order INDEX+BASE on targets with non-equivalent
10388    space registers, and in any order on targets with equivalent space
10389    registers.  It accepts both MULT+BASE and BASE+MULT for scaled indexing.
10390 
10391    We treat a SYMBOL_REF as legitimate if it is part of the current
10392    function's constant-pool, because such addresses can actually be
10393    output as REG+SMALLINT.  */
10394 
10395 static bool
pa_legitimate_address_p(machine_mode mode,rtx x,bool strict)10396 pa_legitimate_address_p (machine_mode mode, rtx x, bool strict)
10397 {
10398   if ((REG_P (x)
10399        && (strict ? STRICT_REG_OK_FOR_BASE_P (x)
10400 		  : REG_OK_FOR_BASE_P (x)))
10401       || ((GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC
10402 	   || GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC)
10403 	  && REG_P (XEXP (x, 0))
10404 	  && (strict ? STRICT_REG_OK_FOR_BASE_P (XEXP (x, 0))
10405 		     : REG_OK_FOR_BASE_P (XEXP (x, 0)))))
10406     return true;
10407 
10408   if (GET_CODE (x) == PLUS)
10409     {
10410       rtx base, index;
10411 
10412       /* For REG+REG, the base register should be in XEXP (x, 1),
10413 	 so check it first.  */
10414       if (REG_P (XEXP (x, 1))
10415 	  && (strict ? STRICT_REG_OK_FOR_BASE_P (XEXP (x, 1))
10416 		     : REG_OK_FOR_BASE_P (XEXP (x, 1))))
10417 	base = XEXP (x, 1), index = XEXP (x, 0);
10418       else if (REG_P (XEXP (x, 0))
10419 	       && (strict ? STRICT_REG_OK_FOR_BASE_P (XEXP (x, 0))
10420 			  : REG_OK_FOR_BASE_P (XEXP (x, 0))))
10421 	base = XEXP (x, 0), index = XEXP (x, 1);
10422       else
10423 	return false;
10424 
10425       if (GET_CODE (index) == CONST_INT)
10426 	{
10427 	  if (INT_5_BITS (index))
10428 	    return true;
10429 
10430 	  /* When INT14_OK_STRICT is false, a secondary reload is needed
10431 	     to adjust the displacement of SImode and DImode floating point
10432 	     instructions but this may fail when the register also needs
10433 	     reloading.  So, we return false when STRICT is true.  We
10434 	     also reject long displacements for float mode addresses since
10435 	     the majority of accesses will use floating point instructions
10436 	     that don't support 14-bit offsets.  */
10437 	  if (!INT14_OK_STRICT
10438 	      && (strict || !(reload_in_progress || reload_completed))
10439 	      && mode != QImode
10440 	      && mode != HImode)
10441 	    return false;
10442 
10443 	  return base14_operand (index, mode);
10444 	}
10445 
10446       if (!TARGET_DISABLE_INDEXING
10447 	  /* Only accept the "canonical" INDEX+BASE operand order
10448 	     on targets with non-equivalent space registers.  */
10449 	  && (TARGET_NO_SPACE_REGS
10450 	      ? REG_P (index)
10451 	      : (base == XEXP (x, 1) && REG_P (index)
10452 		 && (reload_completed
10453 		     || (reload_in_progress && HARD_REGISTER_P (base))
10454 		     || REG_POINTER (base))
10455 		 && (reload_completed
10456 		     || (reload_in_progress && HARD_REGISTER_P (index))
10457 		     || !REG_POINTER (index))))
10458 	  && MODE_OK_FOR_UNSCALED_INDEXING_P (mode)
10459 	  && (strict ? STRICT_REG_OK_FOR_INDEX_P (index)
10460 		     : REG_OK_FOR_INDEX_P (index))
10461 	  && borx_reg_operand (base, Pmode)
10462 	  && borx_reg_operand (index, Pmode))
10463 	return true;
10464 
10465       if (!TARGET_DISABLE_INDEXING
10466 	  && GET_CODE (index) == MULT
10467 	  /* Only accept base operands with the REG_POINTER flag prior to
10468 	     reload on targets with non-equivalent space registers.  */
10469 	  && (TARGET_NO_SPACE_REGS
10470 	      || (base == XEXP (x, 1)
10471 		  && (reload_completed
10472 		      || (reload_in_progress && HARD_REGISTER_P (base))
10473 		      || REG_POINTER (base))))
10474 	  && REG_P (XEXP (index, 0))
10475 	  && GET_MODE (XEXP (index, 0)) == Pmode
10476 	  && MODE_OK_FOR_SCALED_INDEXING_P (mode)
10477 	  && (strict ? STRICT_REG_OK_FOR_INDEX_P (XEXP (index, 0))
10478 		     : REG_OK_FOR_INDEX_P (XEXP (index, 0)))
10479 	  && GET_CODE (XEXP (index, 1)) == CONST_INT
10480 	  && INTVAL (XEXP (index, 1))
10481 	     == (HOST_WIDE_INT) GET_MODE_SIZE (mode)
10482 	  && borx_reg_operand (base, Pmode))
10483 	return true;
10484 
10485       return false;
10486     }
10487 
10488   if (GET_CODE (x) == LO_SUM)
10489     {
10490       rtx y = XEXP (x, 0);
10491 
10492       if (GET_CODE (y) == SUBREG)
10493 	y = SUBREG_REG (y);
10494 
10495       if (REG_P (y)
10496 	  && (strict ? STRICT_REG_OK_FOR_BASE_P (y)
10497 		     : REG_OK_FOR_BASE_P (y)))
10498 	{
10499 	  /* Needed for -fPIC */
10500 	  if (mode == Pmode
10501 	      && GET_CODE (XEXP (x, 1)) == UNSPEC)
10502 	    return true;
10503 
10504 	  if (!INT14_OK_STRICT
10505 	      && (strict || !(reload_in_progress || reload_completed))
10506 	      && mode != QImode
10507 	      && mode != HImode)
10508 	    return false;
10509 
10510 	  if (CONSTANT_P (XEXP (x, 1)))
10511 	    return true;
10512 	}
10513       return false;
10514     }
10515 
10516   if (GET_CODE (x) == CONST_INT && INT_5_BITS (x))
10517     return true;
10518 
10519   return false;
10520 }
10521 
10522 /* Look for machine dependent ways to make the invalid address AD a
10523    valid address.
10524 
10525    For the PA, transform:
10526 
10527         memory(X + <large int>)
10528 
10529    into:
10530 
10531         if (<large int> & mask) >= 16
10532           Y = (<large int> & ~mask) + mask + 1  Round up.
10533         else
10534           Y = (<large int> & ~mask)             Round down.
10535         Z = X + Y
10536         memory (Z + (<large int> - Y));
10537 
10538    This makes reload inheritance and reload_cse work better since Z
10539    can be reused.
10540 
10541    There may be more opportunities to improve code with this hook.  */
10542 
10543 rtx
pa_legitimize_reload_address(rtx ad,machine_mode mode,int opnum,int type,int ind_levels ATTRIBUTE_UNUSED)10544 pa_legitimize_reload_address (rtx ad, machine_mode mode,
10545 			      int opnum, int type,
10546 			      int ind_levels ATTRIBUTE_UNUSED)
10547 {
10548   long offset, newoffset, mask;
10549   rtx new_rtx, temp = NULL_RTX;
10550 
10551   mask = (GET_MODE_CLASS (mode) == MODE_FLOAT
10552 	  && !INT14_OK_STRICT ? 0x1f : 0x3fff);
10553 
10554   if (optimize && GET_CODE (ad) == PLUS)
10555     temp = simplify_binary_operation (PLUS, Pmode,
10556 				      XEXP (ad, 0), XEXP (ad, 1));
10557 
10558   new_rtx = temp ? temp : ad;
10559 
10560   if (optimize
10561       && GET_CODE (new_rtx) == PLUS
10562       && GET_CODE (XEXP (new_rtx, 0)) == REG
10563       && GET_CODE (XEXP (new_rtx, 1)) == CONST_INT)
10564     {
10565       offset = INTVAL (XEXP ((new_rtx), 1));
10566 
10567       /* Choose rounding direction.  Round up if we are >= halfway.  */
10568       if ((offset & mask) >= ((mask + 1) / 2))
10569 	newoffset = (offset & ~mask) + mask + 1;
10570       else
10571 	newoffset = offset & ~mask;
10572 
10573       /* Ensure that long displacements are aligned.  */
10574       if (mask == 0x3fff
10575 	  && (GET_MODE_CLASS (mode) == MODE_FLOAT
10576 	      || (TARGET_64BIT && (mode) == DImode)))
10577 	newoffset &= ~(GET_MODE_SIZE (mode) - 1);
10578 
10579       if (newoffset != 0 && VAL_14_BITS_P (newoffset))
10580 	{
10581 	  temp = gen_rtx_PLUS (Pmode, XEXP (new_rtx, 0),
10582 			       GEN_INT (newoffset));
10583 	  ad = gen_rtx_PLUS (Pmode, temp, GEN_INT (offset - newoffset));
10584 	  push_reload (XEXP (ad, 0), 0, &XEXP (ad, 0), 0,
10585 		       BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
10586 		       opnum, (enum reload_type) type);
10587 	  return ad;
10588 	}
10589     }
10590 
10591   return NULL_RTX;
10592 }
10593 
10594 /* Output address vector.  */
10595 
10596 void
pa_output_addr_vec(rtx lab,rtx body)10597 pa_output_addr_vec (rtx lab, rtx body)
10598 {
10599   int idx, vlen = XVECLEN (body, 0);
10600 
10601   targetm.asm_out.internal_label (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
10602   if (TARGET_GAS)
10603     fputs ("\t.begin_brtab\n", asm_out_file);
10604   for (idx = 0; idx < vlen; idx++)
10605     {
10606       ASM_OUTPUT_ADDR_VEC_ELT
10607 	(asm_out_file, CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 0, idx), 0)));
10608     }
10609   if (TARGET_GAS)
10610     fputs ("\t.end_brtab\n", asm_out_file);
10611 }
10612 
10613 /* Output address difference vector.  */
10614 
10615 void
pa_output_addr_diff_vec(rtx lab,rtx body)10616 pa_output_addr_diff_vec (rtx lab, rtx body)
10617 {
10618   rtx base = XEXP (XEXP (body, 0), 0);
10619   int idx, vlen = XVECLEN (body, 1);
10620 
10621   targetm.asm_out.internal_label (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
10622   if (TARGET_GAS)
10623     fputs ("\t.begin_brtab\n", asm_out_file);
10624   for (idx = 0; idx < vlen; idx++)
10625     {
10626       ASM_OUTPUT_ADDR_DIFF_ELT
10627 	(asm_out_file,
10628 	 body,
10629 	 CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 1, idx), 0)),
10630 	 CODE_LABEL_NUMBER (base));
10631     }
10632   if (TARGET_GAS)
10633     fputs ("\t.end_brtab\n", asm_out_file);
10634 }
10635 
10636 /* This is a helper function for the other atomic operations.  This function
10637    emits a loop that contains SEQ that iterates until a compare-and-swap
10638    operation at the end succeeds.  MEM is the memory to be modified.  SEQ is
10639    a set of instructions that takes a value from OLD_REG as an input and
10640    produces a value in NEW_REG as an output.  Before SEQ, OLD_REG will be
10641    set to the current contents of MEM.  After SEQ, a compare-and-swap will
10642    attempt to update MEM with NEW_REG.  The function returns true when the
10643    loop was generated successfully.  */
10644 
10645 static bool
pa_expand_compare_and_swap_loop(rtx mem,rtx old_reg,rtx new_reg,rtx seq)10646 pa_expand_compare_and_swap_loop (rtx mem, rtx old_reg, rtx new_reg, rtx seq)
10647 {
10648   machine_mode mode = GET_MODE (mem);
10649   rtx_code_label *label;
10650   rtx cmp_reg, success, oldval;
10651 
10652   /* The loop we want to generate looks like
10653 
10654         cmp_reg = mem;
10655       label:
10656         old_reg = cmp_reg;
10657         seq;
10658         (success, cmp_reg) = compare-and-swap(mem, old_reg, new_reg)
10659         if (success)
10660           goto label;
10661 
10662      Note that we only do the plain load from memory once.  Subsequent
10663      iterations use the value loaded by the compare-and-swap pattern.  */
10664 
10665   label = gen_label_rtx ();
10666   cmp_reg = gen_reg_rtx (mode);
10667 
10668   emit_move_insn (cmp_reg, mem);
10669   emit_label (label);
10670   emit_move_insn (old_reg, cmp_reg);
10671   if (seq)
10672     emit_insn (seq);
10673 
10674   success = NULL_RTX;
10675   oldval = cmp_reg;
10676   if (!expand_atomic_compare_and_swap (&success, &oldval, mem, old_reg,
10677                                        new_reg, false, MEMMODEL_SYNC_SEQ_CST,
10678                                        MEMMODEL_RELAXED))
10679     return false;
10680 
10681   if (oldval != cmp_reg)
10682     emit_move_insn (cmp_reg, oldval);
10683 
10684   /* Mark this jump predicted not taken.  */
10685   emit_cmp_and_jump_insns (success, const0_rtx, EQ, const0_rtx,
10686                            GET_MODE (success), 1, label, 0);
10687   return true;
10688 }
10689 
10690 /* This function tries to implement an atomic exchange operation using a
10691    compare_and_swap loop. VAL is written to *MEM.  The previous contents of
10692    *MEM are returned, using TARGET if possible.  No memory model is required
10693    since a compare_and_swap loop is seq-cst.  */
10694 
10695 rtx
pa_maybe_emit_compare_and_swap_exchange_loop(rtx target,rtx mem,rtx val)10696 pa_maybe_emit_compare_and_swap_exchange_loop (rtx target, rtx mem, rtx val)
10697 {
10698   machine_mode mode = GET_MODE (mem);
10699 
10700   if (can_compare_and_swap_p (mode, true))
10701     {
10702       if (!target || !register_operand (target, mode))
10703         target = gen_reg_rtx (mode);
10704       if (pa_expand_compare_and_swap_loop (mem, target, val, NULL_RTX))
10705         return target;
10706     }
10707 
10708   return NULL_RTX;
10709 }
10710 
10711 /* Implement TARGET_CALLEE_COPIES.  The callee is responsible for copying
10712    arguments passed by hidden reference in the 32-bit HP runtime.  Users
10713    can override this behavior for better compatibility with openmp at the
10714    risk of library incompatibilities.  Arguments are always passed by value
10715    in the 64-bit HP runtime.  */
10716 
10717 static bool
pa_callee_copies(cumulative_args_t cum ATTRIBUTE_UNUSED,machine_mode mode ATTRIBUTE_UNUSED,const_tree type ATTRIBUTE_UNUSED,bool named ATTRIBUTE_UNUSED)10718 pa_callee_copies (cumulative_args_t cum ATTRIBUTE_UNUSED,
10719 		  machine_mode mode ATTRIBUTE_UNUSED,
10720 		  const_tree type ATTRIBUTE_UNUSED,
10721 		  bool named ATTRIBUTE_UNUSED)
10722 {
10723   return !TARGET_CALLER_COPIES;
10724 }
10725 
10726 #include "gt-pa.h"
10727