1 /* Subroutines for insn-output.c for HPPA.
2 Copyright (C) 1992-2021 Free Software Foundation, Inc.
3 Contributed by Tim Moore (moore@cs.utah.edu), based on sparc.c
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 #define IN_TARGET_CODE 1
22
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "memmodel.h"
27 #include "backend.h"
28 #include "target.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "df.h"
32 #include "tm_p.h"
33 #include "stringpool.h"
34 #include "attribs.h"
35 #include "optabs.h"
36 #include "regs.h"
37 #include "emit-rtl.h"
38 #include "recog.h"
39 #include "diagnostic-core.h"
40 #include "insn-attr.h"
41 #include "alias.h"
42 #include "fold-const.h"
43 #include "stor-layout.h"
44 #include "varasm.h"
45 #include "calls.h"
46 #include "output.h"
47 #include "except.h"
48 #include "explow.h"
49 #include "expr.h"
50 #include "reload.h"
51 #include "common/common-target.h"
52 #include "langhooks.h"
53 #include "cfgrtl.h"
54 #include "opts.h"
55 #include "builtins.h"
56
57 /* This file should be included last. */
58 #include "target-def.h"
59
60 /* Return nonzero if there is a bypass for the output of
61 OUT_INSN and the fp store IN_INSN. */
62 int
pa_fpstore_bypass_p(rtx_insn * out_insn,rtx_insn * in_insn)63 pa_fpstore_bypass_p (rtx_insn *out_insn, rtx_insn *in_insn)
64 {
65 machine_mode store_mode;
66 machine_mode other_mode;
67 rtx set;
68
69 if (recog_memoized (in_insn) < 0
70 || (get_attr_type (in_insn) != TYPE_FPSTORE
71 && get_attr_type (in_insn) != TYPE_FPSTORE_LOAD)
72 || recog_memoized (out_insn) < 0)
73 return 0;
74
75 store_mode = GET_MODE (SET_SRC (PATTERN (in_insn)));
76
77 set = single_set (out_insn);
78 if (!set)
79 return 0;
80
81 other_mode = GET_MODE (SET_SRC (set));
82
83 return (GET_MODE_SIZE (store_mode) == GET_MODE_SIZE (other_mode));
84 }
85
86
87 #ifndef DO_FRAME_NOTES
88 #ifdef INCOMING_RETURN_ADDR_RTX
89 #define DO_FRAME_NOTES 1
90 #else
91 #define DO_FRAME_NOTES 0
92 #endif
93 #endif
94
95 static void pa_option_override (void);
96 static void copy_reg_pointer (rtx, rtx);
97 static void fix_range (const char *);
98 static int hppa_register_move_cost (machine_mode mode, reg_class_t,
99 reg_class_t);
100 static int hppa_address_cost (rtx, machine_mode mode, addr_space_t, bool);
101 static bool hppa_rtx_costs (rtx, machine_mode, int, int, int *, bool);
102 static inline rtx force_mode (machine_mode, rtx);
103 static void pa_reorg (void);
104 static void pa_combine_instructions (void);
105 static int pa_can_combine_p (rtx_insn *, rtx_insn *, rtx_insn *, int, rtx,
106 rtx, rtx);
107 static bool forward_branch_p (rtx_insn *);
108 static void compute_zdepwi_operands (unsigned HOST_WIDE_INT, unsigned *);
109 static void compute_zdepdi_operands (unsigned HOST_WIDE_INT, unsigned *);
110 static int compute_cpymem_length (rtx_insn *);
111 static int compute_clrmem_length (rtx_insn *);
112 static bool pa_assemble_integer (rtx, unsigned int, int);
113 static void remove_useless_addtr_insns (int);
114 static void store_reg (int, HOST_WIDE_INT, int);
115 static void store_reg_modify (int, int, HOST_WIDE_INT);
116 static void load_reg (int, HOST_WIDE_INT, int);
117 static void set_reg_plus_d (int, int, HOST_WIDE_INT, int);
118 static rtx pa_function_value (const_tree, const_tree, bool);
119 static rtx pa_libcall_value (machine_mode, const_rtx);
120 static bool pa_function_value_regno_p (const unsigned int);
121 static void pa_output_function_prologue (FILE *) ATTRIBUTE_UNUSED;
122 static void pa_linux_output_function_prologue (FILE *) ATTRIBUTE_UNUSED;
123 static void update_total_code_bytes (unsigned int);
124 static void pa_output_function_epilogue (FILE *);
125 static int pa_adjust_cost (rtx_insn *, int, rtx_insn *, int, unsigned int);
126 static int pa_issue_rate (void);
127 static int pa_reloc_rw_mask (void);
128 static void pa_som_asm_init_sections (void) ATTRIBUTE_UNUSED;
129 static section *pa_som_tm_clone_table_section (void) ATTRIBUTE_UNUSED;
130 static section *pa_select_section (tree, int, unsigned HOST_WIDE_INT)
131 ATTRIBUTE_UNUSED;
132 static void pa_encode_section_info (tree, rtx, int);
133 static const char *pa_strip_name_encoding (const char *);
134 static bool pa_function_ok_for_sibcall (tree, tree);
135 static void pa_globalize_label (FILE *, const char *)
136 ATTRIBUTE_UNUSED;
137 static void pa_asm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
138 HOST_WIDE_INT, tree);
139 #if !defined(USE_COLLECT2)
140 static void pa_asm_out_constructor (rtx, int);
141 static void pa_asm_out_destructor (rtx, int);
142 #endif
143 static void pa_init_builtins (void);
144 static rtx pa_expand_builtin (tree, rtx, rtx, machine_mode mode, int);
145 static rtx hppa_builtin_saveregs (void);
146 static void hppa_va_start (tree, rtx);
147 static tree hppa_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
148 static bool pa_scalar_mode_supported_p (scalar_mode);
149 static bool pa_commutative_p (const_rtx x, int outer_code);
150 static void copy_fp_args (rtx_insn *) ATTRIBUTE_UNUSED;
151 static int length_fp_args (rtx_insn *) ATTRIBUTE_UNUSED;
152 static rtx hppa_legitimize_address (rtx, rtx, machine_mode);
153 static inline void pa_file_start_level (void) ATTRIBUTE_UNUSED;
154 static inline void pa_file_start_space (int) ATTRIBUTE_UNUSED;
155 static inline void pa_file_start_file (int) ATTRIBUTE_UNUSED;
156 static inline void pa_file_start_mcount (const char*) ATTRIBUTE_UNUSED;
157 static void pa_elf_file_start (void) ATTRIBUTE_UNUSED;
158 static void pa_som_file_start (void) ATTRIBUTE_UNUSED;
159 static void pa_linux_file_start (void) ATTRIBUTE_UNUSED;
160 static void pa_hpux64_gas_file_start (void) ATTRIBUTE_UNUSED;
161 static void pa_hpux64_hpas_file_start (void) ATTRIBUTE_UNUSED;
162 static void output_deferred_plabels (void);
163 static void output_deferred_profile_counters (void) ATTRIBUTE_UNUSED;
164 static void pa_file_end (void);
165 static void pa_init_libfuncs (void);
166 static rtx pa_struct_value_rtx (tree, int);
167 static bool pa_pass_by_reference (cumulative_args_t,
168 const function_arg_info &);
169 static int pa_arg_partial_bytes (cumulative_args_t, const function_arg_info &);
170 static void pa_function_arg_advance (cumulative_args_t,
171 const function_arg_info &);
172 static rtx pa_function_arg (cumulative_args_t, const function_arg_info &);
173 static pad_direction pa_function_arg_padding (machine_mode, const_tree);
174 static unsigned int pa_function_arg_boundary (machine_mode, const_tree);
175 static struct machine_function * pa_init_machine_status (void);
176 static reg_class_t pa_secondary_reload (bool, rtx, reg_class_t,
177 machine_mode,
178 secondary_reload_info *);
179 static bool pa_secondary_memory_needed (machine_mode,
180 reg_class_t, reg_class_t);
181 static void pa_extra_live_on_entry (bitmap);
182 static machine_mode pa_promote_function_mode (const_tree,
183 machine_mode, int *,
184 const_tree, int);
185
186 static void pa_asm_trampoline_template (FILE *);
187 static void pa_trampoline_init (rtx, tree, rtx);
188 static rtx pa_trampoline_adjust_address (rtx);
189 static rtx pa_delegitimize_address (rtx);
190 static bool pa_print_operand_punct_valid_p (unsigned char);
191 static rtx pa_internal_arg_pointer (void);
192 static bool pa_can_eliminate (const int, const int);
193 static void pa_conditional_register_usage (void);
194 static machine_mode pa_c_mode_for_suffix (char);
195 static section *pa_function_section (tree, enum node_frequency, bool, bool);
196 static bool pa_cannot_force_const_mem (machine_mode, rtx);
197 static bool pa_legitimate_constant_p (machine_mode, rtx);
198 static unsigned int pa_section_type_flags (tree, const char *, int);
199 static bool pa_legitimate_address_p (machine_mode, rtx, bool);
200 static bool pa_callee_copies (cumulative_args_t, const function_arg_info &);
201 static unsigned int pa_hard_regno_nregs (unsigned int, machine_mode);
202 static bool pa_hard_regno_mode_ok (unsigned int, machine_mode);
203 static bool pa_modes_tieable_p (machine_mode, machine_mode);
204 static bool pa_can_change_mode_class (machine_mode, machine_mode, reg_class_t);
205 static HOST_WIDE_INT pa_starting_frame_offset (void);
206 static section* pa_elf_select_rtx_section(machine_mode, rtx, unsigned HOST_WIDE_INT) ATTRIBUTE_UNUSED;
207
208 /* The following extra sections are only used for SOM. */
209 static GTY(()) section *som_readonly_data_section;
210 static GTY(()) section *som_one_only_readonly_data_section;
211 static GTY(()) section *som_one_only_data_section;
212 static GTY(()) section *som_tm_clone_table_section;
213
214 /* Counts for the number of callee-saved general and floating point
215 registers which were saved by the current function's prologue. */
216 static int gr_saved, fr_saved;
217
218 /* Boolean indicating whether the return pointer was saved by the
219 current function's prologue. */
220 static bool rp_saved;
221
222 static rtx find_addr_reg (rtx);
223
224 /* Keep track of the number of bytes we have output in the CODE subspace
225 during this compilation so we'll know when to emit inline long-calls. */
226 unsigned long total_code_bytes;
227
228 /* The last address of the previous function plus the number of bytes in
229 associated thunks that have been output. This is used to determine if
230 a thunk can use an IA-relative branch to reach its target function. */
231 static unsigned int last_address;
232
233 /* Variables to handle plabels that we discover are necessary at assembly
234 output time. They are output after the current function. */
235 struct GTY(()) deferred_plabel
236 {
237 rtx internal_label;
238 rtx symbol;
239 };
240 static GTY((length ("n_deferred_plabels"))) struct deferred_plabel *
241 deferred_plabels;
242 static size_t n_deferred_plabels = 0;
243
244 /* Initialize the GCC target structure. */
245
246 #undef TARGET_OPTION_OVERRIDE
247 #define TARGET_OPTION_OVERRIDE pa_option_override
248
249 #undef TARGET_ASM_ALIGNED_HI_OP
250 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
251 #undef TARGET_ASM_ALIGNED_SI_OP
252 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
253 #undef TARGET_ASM_ALIGNED_DI_OP
254 #define TARGET_ASM_ALIGNED_DI_OP "\t.dword\t"
255 #undef TARGET_ASM_UNALIGNED_HI_OP
256 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
257 #undef TARGET_ASM_UNALIGNED_SI_OP
258 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
259 #undef TARGET_ASM_UNALIGNED_DI_OP
260 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
261 #undef TARGET_ASM_INTEGER
262 #define TARGET_ASM_INTEGER pa_assemble_integer
263
264 #undef TARGET_ASM_FUNCTION_EPILOGUE
265 #define TARGET_ASM_FUNCTION_EPILOGUE pa_output_function_epilogue
266
267 #undef TARGET_FUNCTION_VALUE
268 #define TARGET_FUNCTION_VALUE pa_function_value
269 #undef TARGET_LIBCALL_VALUE
270 #define TARGET_LIBCALL_VALUE pa_libcall_value
271 #undef TARGET_FUNCTION_VALUE_REGNO_P
272 #define TARGET_FUNCTION_VALUE_REGNO_P pa_function_value_regno_p
273
274 #undef TARGET_LEGITIMIZE_ADDRESS
275 #define TARGET_LEGITIMIZE_ADDRESS hppa_legitimize_address
276
277 #undef TARGET_SCHED_ADJUST_COST
278 #define TARGET_SCHED_ADJUST_COST pa_adjust_cost
279 #undef TARGET_SCHED_ISSUE_RATE
280 #define TARGET_SCHED_ISSUE_RATE pa_issue_rate
281
282 #undef TARGET_ENCODE_SECTION_INFO
283 #define TARGET_ENCODE_SECTION_INFO pa_encode_section_info
284 #undef TARGET_STRIP_NAME_ENCODING
285 #define TARGET_STRIP_NAME_ENCODING pa_strip_name_encoding
286
287 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
288 #define TARGET_FUNCTION_OK_FOR_SIBCALL pa_function_ok_for_sibcall
289
290 #undef TARGET_COMMUTATIVE_P
291 #define TARGET_COMMUTATIVE_P pa_commutative_p
292
293 #undef TARGET_ASM_OUTPUT_MI_THUNK
294 #define TARGET_ASM_OUTPUT_MI_THUNK pa_asm_output_mi_thunk
295 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
296 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
297
298 #undef TARGET_ASM_FILE_END
299 #define TARGET_ASM_FILE_END pa_file_end
300
301 #undef TARGET_ASM_RELOC_RW_MASK
302 #define TARGET_ASM_RELOC_RW_MASK pa_reloc_rw_mask
303
304 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
305 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P pa_print_operand_punct_valid_p
306
307 #if !defined(USE_COLLECT2)
308 #undef TARGET_ASM_CONSTRUCTOR
309 #define TARGET_ASM_CONSTRUCTOR pa_asm_out_constructor
310 #undef TARGET_ASM_DESTRUCTOR
311 #define TARGET_ASM_DESTRUCTOR pa_asm_out_destructor
312 #endif
313
314 #undef TARGET_INIT_BUILTINS
315 #define TARGET_INIT_BUILTINS pa_init_builtins
316
317 #undef TARGET_EXPAND_BUILTIN
318 #define TARGET_EXPAND_BUILTIN pa_expand_builtin
319
320 #undef TARGET_REGISTER_MOVE_COST
321 #define TARGET_REGISTER_MOVE_COST hppa_register_move_cost
322 #undef TARGET_RTX_COSTS
323 #define TARGET_RTX_COSTS hppa_rtx_costs
324 #undef TARGET_ADDRESS_COST
325 #define TARGET_ADDRESS_COST hppa_address_cost
326
327 #undef TARGET_MACHINE_DEPENDENT_REORG
328 #define TARGET_MACHINE_DEPENDENT_REORG pa_reorg
329
330 #undef TARGET_INIT_LIBFUNCS
331 #define TARGET_INIT_LIBFUNCS pa_init_libfuncs
332
333 #undef TARGET_PROMOTE_FUNCTION_MODE
334 #define TARGET_PROMOTE_FUNCTION_MODE pa_promote_function_mode
335 #undef TARGET_PROMOTE_PROTOTYPES
336 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
337
338 #undef TARGET_STRUCT_VALUE_RTX
339 #define TARGET_STRUCT_VALUE_RTX pa_struct_value_rtx
340 #undef TARGET_RETURN_IN_MEMORY
341 #define TARGET_RETURN_IN_MEMORY pa_return_in_memory
342 #undef TARGET_MUST_PASS_IN_STACK
343 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
344 #undef TARGET_PASS_BY_REFERENCE
345 #define TARGET_PASS_BY_REFERENCE pa_pass_by_reference
346 #undef TARGET_CALLEE_COPIES
347 #define TARGET_CALLEE_COPIES pa_callee_copies
348 #undef TARGET_ARG_PARTIAL_BYTES
349 #define TARGET_ARG_PARTIAL_BYTES pa_arg_partial_bytes
350 #undef TARGET_FUNCTION_ARG
351 #define TARGET_FUNCTION_ARG pa_function_arg
352 #undef TARGET_FUNCTION_ARG_ADVANCE
353 #define TARGET_FUNCTION_ARG_ADVANCE pa_function_arg_advance
354 #undef TARGET_FUNCTION_ARG_PADDING
355 #define TARGET_FUNCTION_ARG_PADDING pa_function_arg_padding
356 #undef TARGET_FUNCTION_ARG_BOUNDARY
357 #define TARGET_FUNCTION_ARG_BOUNDARY pa_function_arg_boundary
358
359 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
360 #define TARGET_EXPAND_BUILTIN_SAVEREGS hppa_builtin_saveregs
361 #undef TARGET_EXPAND_BUILTIN_VA_START
362 #define TARGET_EXPAND_BUILTIN_VA_START hppa_va_start
363 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
364 #define TARGET_GIMPLIFY_VA_ARG_EXPR hppa_gimplify_va_arg_expr
365
366 #undef TARGET_SCALAR_MODE_SUPPORTED_P
367 #define TARGET_SCALAR_MODE_SUPPORTED_P pa_scalar_mode_supported_p
368
369 #undef TARGET_CANNOT_FORCE_CONST_MEM
370 #define TARGET_CANNOT_FORCE_CONST_MEM pa_cannot_force_const_mem
371
372 #undef TARGET_SECONDARY_RELOAD
373 #define TARGET_SECONDARY_RELOAD pa_secondary_reload
374 #undef TARGET_SECONDARY_MEMORY_NEEDED
375 #define TARGET_SECONDARY_MEMORY_NEEDED pa_secondary_memory_needed
376
377 #undef TARGET_EXTRA_LIVE_ON_ENTRY
378 #define TARGET_EXTRA_LIVE_ON_ENTRY pa_extra_live_on_entry
379
380 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
381 #define TARGET_ASM_TRAMPOLINE_TEMPLATE pa_asm_trampoline_template
382 #undef TARGET_TRAMPOLINE_INIT
383 #define TARGET_TRAMPOLINE_INIT pa_trampoline_init
384 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
385 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS pa_trampoline_adjust_address
386 #undef TARGET_DELEGITIMIZE_ADDRESS
387 #define TARGET_DELEGITIMIZE_ADDRESS pa_delegitimize_address
388 #undef TARGET_INTERNAL_ARG_POINTER
389 #define TARGET_INTERNAL_ARG_POINTER pa_internal_arg_pointer
390 #undef TARGET_CAN_ELIMINATE
391 #define TARGET_CAN_ELIMINATE pa_can_eliminate
392 #undef TARGET_CONDITIONAL_REGISTER_USAGE
393 #define TARGET_CONDITIONAL_REGISTER_USAGE pa_conditional_register_usage
394 #undef TARGET_C_MODE_FOR_SUFFIX
395 #define TARGET_C_MODE_FOR_SUFFIX pa_c_mode_for_suffix
396 #undef TARGET_ASM_FUNCTION_SECTION
397 #define TARGET_ASM_FUNCTION_SECTION pa_function_section
398
399 #undef TARGET_LEGITIMATE_CONSTANT_P
400 #define TARGET_LEGITIMATE_CONSTANT_P pa_legitimate_constant_p
401 #undef TARGET_SECTION_TYPE_FLAGS
402 #define TARGET_SECTION_TYPE_FLAGS pa_section_type_flags
403 #undef TARGET_LEGITIMATE_ADDRESS_P
404 #define TARGET_LEGITIMATE_ADDRESS_P pa_legitimate_address_p
405
406 #undef TARGET_LRA_P
407 #define TARGET_LRA_P hook_bool_void_false
408
409 #undef TARGET_HARD_REGNO_NREGS
410 #define TARGET_HARD_REGNO_NREGS pa_hard_regno_nregs
411 #undef TARGET_HARD_REGNO_MODE_OK
412 #define TARGET_HARD_REGNO_MODE_OK pa_hard_regno_mode_ok
413 #undef TARGET_MODES_TIEABLE_P
414 #define TARGET_MODES_TIEABLE_P pa_modes_tieable_p
415
416 #undef TARGET_CAN_CHANGE_MODE_CLASS
417 #define TARGET_CAN_CHANGE_MODE_CLASS pa_can_change_mode_class
418
419 #undef TARGET_CONSTANT_ALIGNMENT
420 #define TARGET_CONSTANT_ALIGNMENT constant_alignment_word_strings
421
422 #undef TARGET_STARTING_FRAME_OFFSET
423 #define TARGET_STARTING_FRAME_OFFSET pa_starting_frame_offset
424
425 #undef TARGET_HAVE_SPECULATION_SAFE_VALUE
426 #define TARGET_HAVE_SPECULATION_SAFE_VALUE speculation_safe_value_not_needed
427
428 struct gcc_target targetm = TARGET_INITIALIZER;
429
430 /* Parse the -mfixed-range= option string. */
431
432 static void
fix_range(const char * const_str)433 fix_range (const char *const_str)
434 {
435 int i, first, last;
436 char *str, *dash, *comma;
437
438 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
439 REG2 are either register names or register numbers. The effect
440 of this option is to mark the registers in the range from REG1 to
441 REG2 as ``fixed'' so they won't be used by the compiler. This is
442 used, e.g., to ensure that kernel mode code doesn't use fr4-fr31. */
443
444 i = strlen (const_str);
445 str = (char *) alloca (i + 1);
446 memcpy (str, const_str, i + 1);
447
448 while (1)
449 {
450 dash = strchr (str, '-');
451 if (!dash)
452 {
453 warning (0, "value of %<-mfixed-range%> must have form REG1-REG2");
454 return;
455 }
456 *dash = '\0';
457
458 comma = strchr (dash + 1, ',');
459 if (comma)
460 *comma = '\0';
461
462 first = decode_reg_name (str);
463 if (first < 0)
464 {
465 warning (0, "unknown register name: %s", str);
466 return;
467 }
468
469 last = decode_reg_name (dash + 1);
470 if (last < 0)
471 {
472 warning (0, "unknown register name: %s", dash + 1);
473 return;
474 }
475
476 *dash = '-';
477
478 if (first > last)
479 {
480 warning (0, "%s-%s is an empty range", str, dash + 1);
481 return;
482 }
483
484 for (i = first; i <= last; ++i)
485 fixed_regs[i] = call_used_regs[i] = 1;
486
487 if (!comma)
488 break;
489
490 *comma = ',';
491 str = comma + 1;
492 }
493
494 /* Check if all floating point registers have been fixed. */
495 for (i = FP_REG_FIRST; i <= FP_REG_LAST; i++)
496 if (!fixed_regs[i])
497 break;
498
499 if (i > FP_REG_LAST)
500 target_flags |= MASK_SOFT_FLOAT;
501 }
502
503 /* Implement the TARGET_OPTION_OVERRIDE hook. */
504
505 static void
pa_option_override(void)506 pa_option_override (void)
507 {
508 unsigned int i;
509 cl_deferred_option *opt;
510 vec<cl_deferred_option> *v
511 = (vec<cl_deferred_option> *) pa_deferred_options;
512
513 if (v)
514 FOR_EACH_VEC_ELT (*v, i, opt)
515 {
516 switch (opt->opt_index)
517 {
518 case OPT_mfixed_range_:
519 fix_range (opt->arg);
520 break;
521
522 default:
523 gcc_unreachable ();
524 }
525 }
526
527 if (flag_pic && TARGET_PORTABLE_RUNTIME)
528 {
529 warning (0, "PIC code generation is not supported in the portable runtime model");
530 }
531
532 if (flag_pic && TARGET_FAST_INDIRECT_CALLS)
533 {
534 warning (0, "PIC code generation is not compatible with fast indirect calls");
535 }
536
537 if (! TARGET_GAS && write_symbols != NO_DEBUG)
538 {
539 warning (0, "%<-g%> is only supported when using GAS on this processor");
540 warning (0, "%<-g%> option disabled");
541 write_symbols = NO_DEBUG;
542 }
543
544 if (TARGET_64BIT && TARGET_HPUX)
545 {
546 /* DWARF5 is not supported by gdb. Don't emit DWARF5 unless
547 specifically selected. */
548 if (!OPTION_SET_P (dwarf_strict))
549 dwarf_strict = 1;
550 if (!OPTION_SET_P (dwarf_version))
551 dwarf_version = 4;
552 }
553
554 /* We only support the "big PIC" model now. And we always generate PIC
555 code when in 64bit mode. */
556 if (flag_pic == 1 || TARGET_64BIT)
557 flag_pic = 2;
558
559 /* Disable -freorder-blocks-and-partition as we don't support hot and
560 cold partitioning. */
561 if (flag_reorder_blocks_and_partition)
562 {
563 inform (input_location,
564 "%<-freorder-blocks-and-partition%> does not work "
565 "on this architecture");
566 flag_reorder_blocks_and_partition = 0;
567 flag_reorder_blocks = 1;
568 }
569
570 /* We can't guarantee that .dword is available for 32-bit targets. */
571 if (UNITS_PER_WORD == 4)
572 targetm.asm_out.aligned_op.di = NULL;
573
574 /* The unaligned ops are only available when using GAS. */
575 if (!TARGET_GAS)
576 {
577 targetm.asm_out.unaligned_op.hi = NULL;
578 targetm.asm_out.unaligned_op.si = NULL;
579 targetm.asm_out.unaligned_op.di = NULL;
580 }
581
582 init_machine_status = pa_init_machine_status;
583 }
584
585 enum pa_builtins
586 {
587 PA_BUILTIN_COPYSIGNQ,
588 PA_BUILTIN_FABSQ,
589 PA_BUILTIN_INFQ,
590 PA_BUILTIN_HUGE_VALQ,
591 PA_BUILTIN_max
592 };
593
594 static GTY(()) tree pa_builtins[(int) PA_BUILTIN_max];
595
596 static void
pa_init_builtins(void)597 pa_init_builtins (void)
598 {
599 #ifdef DONT_HAVE_FPUTC_UNLOCKED
600 {
601 tree decl = builtin_decl_explicit (BUILT_IN_PUTC_UNLOCKED);
602 set_builtin_decl (BUILT_IN_FPUTC_UNLOCKED, decl,
603 builtin_decl_implicit_p (BUILT_IN_PUTC_UNLOCKED));
604 }
605 #endif
606 #if TARGET_HPUX_11
607 {
608 tree decl;
609
610 if ((decl = builtin_decl_explicit (BUILT_IN_FINITE)) != NULL_TREE)
611 set_user_assembler_name (decl, "_Isfinite");
612 if ((decl = builtin_decl_explicit (BUILT_IN_FINITEF)) != NULL_TREE)
613 set_user_assembler_name (decl, "_Isfinitef");
614 }
615 #endif
616
617 if (HPUX_LONG_DOUBLE_LIBRARY)
618 {
619 tree decl, ftype;
620
621 /* Under HPUX, the __float128 type is a synonym for "long double". */
622 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
623 "__float128");
624
625 /* TFmode support builtins. */
626 ftype = build_function_type_list (long_double_type_node,
627 long_double_type_node,
628 NULL_TREE);
629 decl = add_builtin_function ("__builtin_fabsq", ftype,
630 PA_BUILTIN_FABSQ, BUILT_IN_MD,
631 "_U_Qfabs", NULL_TREE);
632 TREE_READONLY (decl) = 1;
633 pa_builtins[PA_BUILTIN_FABSQ] = decl;
634
635 ftype = build_function_type_list (long_double_type_node,
636 long_double_type_node,
637 long_double_type_node,
638 NULL_TREE);
639 decl = add_builtin_function ("__builtin_copysignq", ftype,
640 PA_BUILTIN_COPYSIGNQ, BUILT_IN_MD,
641 "_U_Qfcopysign", NULL_TREE);
642 TREE_READONLY (decl) = 1;
643 pa_builtins[PA_BUILTIN_COPYSIGNQ] = decl;
644
645 ftype = build_function_type_list (long_double_type_node, NULL_TREE);
646 decl = add_builtin_function ("__builtin_infq", ftype,
647 PA_BUILTIN_INFQ, BUILT_IN_MD,
648 NULL, NULL_TREE);
649 pa_builtins[PA_BUILTIN_INFQ] = decl;
650
651 decl = add_builtin_function ("__builtin_huge_valq", ftype,
652 PA_BUILTIN_HUGE_VALQ, BUILT_IN_MD,
653 NULL, NULL_TREE);
654 pa_builtins[PA_BUILTIN_HUGE_VALQ] = decl;
655 }
656 }
657
658 static rtx
pa_expand_builtin(tree exp,rtx target,rtx subtarget ATTRIBUTE_UNUSED,machine_mode mode ATTRIBUTE_UNUSED,int ignore ATTRIBUTE_UNUSED)659 pa_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
660 machine_mode mode ATTRIBUTE_UNUSED,
661 int ignore ATTRIBUTE_UNUSED)
662 {
663 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
664 unsigned int fcode = DECL_MD_FUNCTION_CODE (fndecl);
665
666 switch (fcode)
667 {
668 case PA_BUILTIN_FABSQ:
669 case PA_BUILTIN_COPYSIGNQ:
670 return expand_call (exp, target, ignore);
671
672 case PA_BUILTIN_INFQ:
673 case PA_BUILTIN_HUGE_VALQ:
674 {
675 machine_mode target_mode = TYPE_MODE (TREE_TYPE (exp));
676 REAL_VALUE_TYPE inf;
677 rtx tmp;
678
679 real_inf (&inf);
680 tmp = const_double_from_real_value (inf, target_mode);
681
682 tmp = validize_mem (force_const_mem (target_mode, tmp));
683
684 if (target == 0)
685 target = gen_reg_rtx (target_mode);
686
687 emit_move_insn (target, tmp);
688 return target;
689 }
690
691 default:
692 gcc_unreachable ();
693 }
694
695 return NULL_RTX;
696 }
697
698 /* Function to init struct machine_function.
699 This will be called, via a pointer variable,
700 from push_function_context. */
701
702 static struct machine_function *
pa_init_machine_status(void)703 pa_init_machine_status (void)
704 {
705 return ggc_cleared_alloc<machine_function> ();
706 }
707
708 /* If FROM is a probable pointer register, mark TO as a probable
709 pointer register with the same pointer alignment as FROM. */
710
711 static void
copy_reg_pointer(rtx to,rtx from)712 copy_reg_pointer (rtx to, rtx from)
713 {
714 if (REG_POINTER (from))
715 mark_reg_pointer (to, REGNO_POINTER_ALIGN (REGNO (from)));
716 }
717
718 /* Return 1 if X contains a symbolic expression. We know these
719 expressions will have one of a few well defined forms, so
720 we need only check those forms. */
721 int
pa_symbolic_expression_p(rtx x)722 pa_symbolic_expression_p (rtx x)
723 {
724
725 /* Strip off any HIGH. */
726 if (GET_CODE (x) == HIGH)
727 x = XEXP (x, 0);
728
729 return symbolic_operand (x, VOIDmode);
730 }
731
732 /* Accept any constant that can be moved in one instruction into a
733 general register. */
734 int
pa_cint_ok_for_move(unsigned HOST_WIDE_INT ival)735 pa_cint_ok_for_move (unsigned HOST_WIDE_INT ival)
736 {
737 /* OK if ldo, ldil, or zdepi, can be used. */
738 return (VAL_14_BITS_P (ival)
739 || pa_ldil_cint_p (ival)
740 || pa_zdepi_cint_p (ival));
741 }
742
743 /* True iff ldil can be used to load this CONST_INT. The least
744 significant 11 bits of the value must be zero and the value must
745 not change sign when extended from 32 to 64 bits. */
746 int
pa_ldil_cint_p(unsigned HOST_WIDE_INT ival)747 pa_ldil_cint_p (unsigned HOST_WIDE_INT ival)
748 {
749 unsigned HOST_WIDE_INT x;
750
751 x = ival & (((unsigned HOST_WIDE_INT) -1 << 31) | 0x7ff);
752 return x == 0 || x == ((unsigned HOST_WIDE_INT) -1 << 31);
753 }
754
755 /* True iff zdepi can be used to generate this CONST_INT.
756 zdepi first sign extends a 5-bit signed number to a given field
757 length, then places this field anywhere in a zero. */
758 int
pa_zdepi_cint_p(unsigned HOST_WIDE_INT x)759 pa_zdepi_cint_p (unsigned HOST_WIDE_INT x)
760 {
761 unsigned HOST_WIDE_INT lsb_mask, t;
762
763 /* This might not be obvious, but it's at least fast.
764 This function is critical; we don't have the time loops would take. */
765 lsb_mask = x & -x;
766 t = ((x >> 4) + lsb_mask) & ~(lsb_mask - 1);
767 /* Return true iff t is a power of two. */
768 return ((t & (t - 1)) == 0);
769 }
770
771 /* True iff depi or extru can be used to compute (reg & mask).
772 Accept bit pattern like these:
773 0....01....1
774 1....10....0
775 1..10..01..1 */
776 int
pa_and_mask_p(unsigned HOST_WIDE_INT mask)777 pa_and_mask_p (unsigned HOST_WIDE_INT mask)
778 {
779 mask = ~mask;
780 mask += mask & -mask;
781 return (mask & (mask - 1)) == 0;
782 }
783
784 /* True iff depi can be used to compute (reg | MASK). */
785 int
pa_ior_mask_p(unsigned HOST_WIDE_INT mask)786 pa_ior_mask_p (unsigned HOST_WIDE_INT mask)
787 {
788 mask += mask & -mask;
789 return (mask & (mask - 1)) == 0;
790 }
791
792 /* Legitimize PIC addresses. If the address is already
793 position-independent, we return ORIG. Newly generated
794 position-independent addresses go to REG. If we need more
795 than one register, we lose. */
796
797 static rtx
legitimize_pic_address(rtx orig,machine_mode mode,rtx reg)798 legitimize_pic_address (rtx orig, machine_mode mode, rtx reg)
799 {
800 rtx pic_ref = orig;
801
802 gcc_assert (!PA_SYMBOL_REF_TLS_P (orig));
803
804 /* Labels need special handling. */
805 if (pic_label_operand (orig, mode))
806 {
807 rtx_insn *insn;
808
809 /* We do not want to go through the movXX expanders here since that
810 would create recursion.
811
812 Nor do we really want to call a generator for a named pattern
813 since that requires multiple patterns if we want to support
814 multiple word sizes.
815
816 So instead we just emit the raw set, which avoids the movXX
817 expanders completely. */
818 mark_reg_pointer (reg, BITS_PER_UNIT);
819 insn = emit_insn (gen_rtx_SET (reg, orig));
820
821 /* Put a REG_EQUAL note on this insn, so that it can be optimized. */
822 add_reg_note (insn, REG_EQUAL, orig);
823
824 /* During and after reload, we need to generate a REG_LABEL_OPERAND note
825 and update LABEL_NUSES because this is not done automatically. */
826 if (reload_in_progress || reload_completed)
827 {
828 /* Extract LABEL_REF. */
829 if (GET_CODE (orig) == CONST)
830 orig = XEXP (XEXP (orig, 0), 0);
831 /* Extract CODE_LABEL. */
832 orig = XEXP (orig, 0);
833 add_reg_note (insn, REG_LABEL_OPERAND, orig);
834 /* Make sure we have label and not a note. */
835 if (LABEL_P (orig))
836 LABEL_NUSES (orig)++;
837 }
838 crtl->uses_pic_offset_table = 1;
839 return reg;
840 }
841 if (GET_CODE (orig) == SYMBOL_REF)
842 {
843 rtx_insn *insn;
844 rtx tmp_reg;
845
846 gcc_assert (reg);
847
848 /* Before reload, allocate a temporary register for the intermediate
849 result. This allows the sequence to be deleted when the final
850 result is unused and the insns are trivially dead. */
851 tmp_reg = ((reload_in_progress || reload_completed)
852 ? reg : gen_reg_rtx (Pmode));
853
854 if (function_label_operand (orig, VOIDmode))
855 {
856 /* Force function label into memory in word mode. */
857 orig = XEXP (force_const_mem (word_mode, orig), 0);
858 /* Load plabel address from DLT. */
859 emit_move_insn (tmp_reg,
860 gen_rtx_PLUS (word_mode, pic_offset_table_rtx,
861 gen_rtx_HIGH (word_mode, orig)));
862 pic_ref
863 = gen_const_mem (Pmode,
864 gen_rtx_LO_SUM (Pmode, tmp_reg,
865 gen_rtx_UNSPEC (Pmode,
866 gen_rtvec (1, orig),
867 UNSPEC_DLTIND14R)));
868 emit_move_insn (reg, pic_ref);
869 /* Now load address of function descriptor. */
870 pic_ref = gen_rtx_MEM (Pmode, reg);
871 }
872 else
873 {
874 /* Load symbol reference from DLT. */
875 emit_move_insn (tmp_reg,
876 gen_rtx_PLUS (word_mode, pic_offset_table_rtx,
877 gen_rtx_HIGH (word_mode, orig)));
878 pic_ref
879 = gen_const_mem (Pmode,
880 gen_rtx_LO_SUM (Pmode, tmp_reg,
881 gen_rtx_UNSPEC (Pmode,
882 gen_rtvec (1, orig),
883 UNSPEC_DLTIND14R)));
884 }
885
886 crtl->uses_pic_offset_table = 1;
887 mark_reg_pointer (reg, BITS_PER_UNIT);
888 insn = emit_move_insn (reg, pic_ref);
889
890 /* Put a REG_EQUAL note on this insn, so that it can be optimized. */
891 set_unique_reg_note (insn, REG_EQUAL, orig);
892
893 return reg;
894 }
895 else if (GET_CODE (orig) == CONST)
896 {
897 rtx base;
898
899 if (GET_CODE (XEXP (orig, 0)) == PLUS
900 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
901 return orig;
902
903 gcc_assert (reg);
904 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
905
906 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
907 orig = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
908 base == reg ? 0 : reg);
909
910 if (GET_CODE (orig) == CONST_INT)
911 {
912 if (INT_14_BITS (orig))
913 return plus_constant (Pmode, base, INTVAL (orig));
914 orig = force_reg (Pmode, orig);
915 }
916 pic_ref = gen_rtx_PLUS (Pmode, base, orig);
917 /* Likewise, should we set special REG_NOTEs here? */
918 }
919
920 return pic_ref;
921 }
922
923 static GTY(()) rtx gen_tls_tga;
924
925 static rtx
gen_tls_get_addr(void)926 gen_tls_get_addr (void)
927 {
928 if (!gen_tls_tga)
929 gen_tls_tga = init_one_libfunc ("__tls_get_addr");
930 return gen_tls_tga;
931 }
932
933 static rtx
hppa_tls_call(rtx arg)934 hppa_tls_call (rtx arg)
935 {
936 rtx ret;
937
938 ret = gen_reg_rtx (Pmode);
939 emit_library_call_value (gen_tls_get_addr (), ret,
940 LCT_CONST, Pmode, arg, Pmode);
941
942 return ret;
943 }
944
945 static rtx
legitimize_tls_address(rtx addr)946 legitimize_tls_address (rtx addr)
947 {
948 rtx ret, tmp, t1, t2, tp;
949 rtx_insn *insn;
950
951 /* Currently, we can't handle anything but a SYMBOL_REF. */
952 if (GET_CODE (addr) != SYMBOL_REF)
953 return addr;
954
955 switch (SYMBOL_REF_TLS_MODEL (addr))
956 {
957 case TLS_MODEL_GLOBAL_DYNAMIC:
958 tmp = gen_reg_rtx (Pmode);
959 if (flag_pic)
960 emit_insn (gen_tgd_load_pic (tmp, addr));
961 else
962 emit_insn (gen_tgd_load (tmp, addr));
963 ret = hppa_tls_call (tmp);
964 break;
965
966 case TLS_MODEL_LOCAL_DYNAMIC:
967 ret = gen_reg_rtx (Pmode);
968 tmp = gen_reg_rtx (Pmode);
969 start_sequence ();
970 if (flag_pic)
971 emit_insn (gen_tld_load_pic (tmp, addr));
972 else
973 emit_insn (gen_tld_load (tmp, addr));
974 t1 = hppa_tls_call (tmp);
975 insn = get_insns ();
976 end_sequence ();
977 t2 = gen_reg_rtx (Pmode);
978 emit_libcall_block (insn, t2, t1,
979 gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
980 UNSPEC_TLSLDBASE));
981 emit_insn (gen_tld_offset_load (ret, addr, t2));
982 break;
983
984 case TLS_MODEL_INITIAL_EXEC:
985 tp = gen_reg_rtx (Pmode);
986 tmp = gen_reg_rtx (Pmode);
987 ret = gen_reg_rtx (Pmode);
988 emit_insn (gen_tp_load (tp));
989 if (flag_pic)
990 emit_insn (gen_tie_load_pic (tmp, addr));
991 else
992 emit_insn (gen_tie_load (tmp, addr));
993 emit_move_insn (ret, gen_rtx_PLUS (Pmode, tp, tmp));
994 break;
995
996 case TLS_MODEL_LOCAL_EXEC:
997 tp = gen_reg_rtx (Pmode);
998 ret = gen_reg_rtx (Pmode);
999 emit_insn (gen_tp_load (tp));
1000 emit_insn (gen_tle_load (ret, addr, tp));
1001 break;
1002
1003 default:
1004 gcc_unreachable ();
1005 }
1006
1007 return ret;
1008 }
1009
1010 /* Helper for hppa_legitimize_address. Given X, return true if it
1011 is a left shift by 1, 2 or 3 positions or a multiply by 2, 4 or 8.
1012
1013 This respectively represent canonical shift-add rtxs or scaled
1014 memory addresses. */
1015 static bool
mem_shadd_or_shadd_rtx_p(rtx x)1016 mem_shadd_or_shadd_rtx_p (rtx x)
1017 {
1018 return ((GET_CODE (x) == ASHIFT
1019 || GET_CODE (x) == MULT)
1020 && GET_CODE (XEXP (x, 1)) == CONST_INT
1021 && ((GET_CODE (x) == ASHIFT
1022 && pa_shadd_constant_p (INTVAL (XEXP (x, 1))))
1023 || (GET_CODE (x) == MULT
1024 && pa_mem_shadd_constant_p (INTVAL (XEXP (x, 1))))));
1025 }
1026
1027 /* Try machine-dependent ways of modifying an illegitimate address
1028 to be legitimate. If we find one, return the new, valid address.
1029 This macro is used in only one place: `memory_address' in explow.c.
1030
1031 OLDX is the address as it was before break_out_memory_refs was called.
1032 In some cases it is useful to look at this to decide what needs to be done.
1033
1034 It is always safe for this macro to do nothing. It exists to recognize
1035 opportunities to optimize the output.
1036
1037 For the PA, transform:
1038
1039 memory(X + <large int>)
1040
1041 into:
1042
1043 if (<large int> & mask) >= 16
1044 Y = (<large int> & ~mask) + mask + 1 Round up.
1045 else
1046 Y = (<large int> & ~mask) Round down.
1047 Z = X + Y
1048 memory (Z + (<large int> - Y));
1049
1050 This is for CSE to find several similar references, and only use one Z.
1051
1052 X can either be a SYMBOL_REF or REG, but because combine cannot
1053 perform a 4->2 combination we do nothing for SYMBOL_REF + D where
1054 D will not fit in 14 bits.
1055
1056 MODE_FLOAT references allow displacements which fit in 5 bits, so use
1057 0x1f as the mask.
1058
1059 MODE_INT references allow displacements which fit in 14 bits, so use
1060 0x3fff as the mask.
1061
1062 This relies on the fact that most mode MODE_FLOAT references will use FP
1063 registers and most mode MODE_INT references will use integer registers.
1064 (In the rare case of an FP register used in an integer MODE, we depend
1065 on secondary reloads to clean things up.)
1066
1067
1068 It is also beneficial to handle (plus (mult (X) (Y)) (Z)) in a special
1069 manner if Y is 2, 4, or 8. (allows more shadd insns and shifted indexed
1070 addressing modes to be used).
1071
1072 Note that the addresses passed into hppa_legitimize_address always
1073 come from a MEM, so we only have to match the MULT form on incoming
1074 addresses. But to be future proof we also match the ASHIFT form.
1075
1076 However, this routine always places those shift-add sequences into
1077 registers, so we have to generate the ASHIFT form as our output.
1078
1079 Put X and Z into registers. Then put the entire expression into
1080 a register. */
1081
1082 rtx
hppa_legitimize_address(rtx x,rtx oldx ATTRIBUTE_UNUSED,machine_mode mode)1083 hppa_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
1084 machine_mode mode)
1085 {
1086 rtx orig = x;
1087
1088 /* We need to canonicalize the order of operands in unscaled indexed
1089 addresses since the code that checks if an address is valid doesn't
1090 always try both orders. */
1091 if (!TARGET_NO_SPACE_REGS
1092 && GET_CODE (x) == PLUS
1093 && GET_MODE (x) == Pmode
1094 && REG_P (XEXP (x, 0))
1095 && REG_P (XEXP (x, 1))
1096 && REG_POINTER (XEXP (x, 0))
1097 && !REG_POINTER (XEXP (x, 1)))
1098 return gen_rtx_PLUS (Pmode, XEXP (x, 1), XEXP (x, 0));
1099
1100 if (tls_referenced_p (x))
1101 return legitimize_tls_address (x);
1102 else if (flag_pic)
1103 return legitimize_pic_address (x, mode, gen_reg_rtx (Pmode));
1104
1105 /* Strip off CONST. */
1106 if (GET_CODE (x) == CONST)
1107 x = XEXP (x, 0);
1108
1109 /* Special case. Get the SYMBOL_REF into a register and use indexing.
1110 That should always be safe. */
1111 if (GET_CODE (x) == PLUS
1112 && GET_CODE (XEXP (x, 0)) == REG
1113 && GET_CODE (XEXP (x, 1)) == SYMBOL_REF)
1114 {
1115 rtx reg = force_reg (Pmode, XEXP (x, 1));
1116 return force_reg (Pmode, gen_rtx_PLUS (Pmode, reg, XEXP (x, 0)));
1117 }
1118
1119 /* Note we must reject symbols which represent function addresses
1120 since the assembler/linker can't handle arithmetic on plabels. */
1121 if (GET_CODE (x) == PLUS
1122 && GET_CODE (XEXP (x, 1)) == CONST_INT
1123 && ((GET_CODE (XEXP (x, 0)) == SYMBOL_REF
1124 && !FUNCTION_NAME_P (XSTR (XEXP (x, 0), 0)))
1125 || GET_CODE (XEXP (x, 0)) == REG))
1126 {
1127 rtx int_part, ptr_reg;
1128 int newoffset;
1129 int offset = INTVAL (XEXP (x, 1));
1130 int mask;
1131
1132 mask = (GET_MODE_CLASS (mode) == MODE_FLOAT
1133 && !INT14_OK_STRICT ? 0x1f : 0x3fff);
1134
1135 /* Choose which way to round the offset. Round up if we
1136 are >= halfway to the next boundary. */
1137 if ((offset & mask) >= ((mask + 1) / 2))
1138 newoffset = (offset & ~ mask) + mask + 1;
1139 else
1140 newoffset = (offset & ~ mask);
1141
1142 /* If the newoffset will not fit in 14 bits (ldo), then
1143 handling this would take 4 or 5 instructions (2 to load
1144 the SYMBOL_REF + 1 or 2 to load the newoffset + 1 to
1145 add the new offset and the SYMBOL_REF.) Combine cannot
1146 handle 4->2 or 5->2 combinations, so do not create
1147 them. */
1148 if (! VAL_14_BITS_P (newoffset)
1149 && GET_CODE (XEXP (x, 0)) == SYMBOL_REF)
1150 {
1151 rtx const_part = plus_constant (Pmode, XEXP (x, 0), newoffset);
1152 rtx tmp_reg
1153 = force_reg (Pmode,
1154 gen_rtx_HIGH (Pmode, const_part));
1155 ptr_reg
1156 = force_reg (Pmode,
1157 gen_rtx_LO_SUM (Pmode,
1158 tmp_reg, const_part));
1159 }
1160 else
1161 {
1162 if (! VAL_14_BITS_P (newoffset))
1163 int_part = force_reg (Pmode, GEN_INT (newoffset));
1164 else
1165 int_part = GEN_INT (newoffset);
1166
1167 ptr_reg = force_reg (Pmode,
1168 gen_rtx_PLUS (Pmode,
1169 force_reg (Pmode, XEXP (x, 0)),
1170 int_part));
1171 }
1172 return plus_constant (Pmode, ptr_reg, offset - newoffset);
1173 }
1174
1175 /* Handle (plus (mult (a) (mem_shadd_constant)) (b)). */
1176
1177 if (GET_CODE (x) == PLUS
1178 && mem_shadd_or_shadd_rtx_p (XEXP (x, 0))
1179 && (OBJECT_P (XEXP (x, 1))
1180 || GET_CODE (XEXP (x, 1)) == SUBREG)
1181 && GET_CODE (XEXP (x, 1)) != CONST)
1182 {
1183 /* If we were given a MULT, we must fix the constant
1184 as we're going to create the ASHIFT form. */
1185 int shift_val = INTVAL (XEXP (XEXP (x, 0), 1));
1186 if (GET_CODE (XEXP (x, 0)) == MULT)
1187 shift_val = exact_log2 (shift_val);
1188
1189 rtx reg1, reg2;
1190 reg1 = XEXP (x, 1);
1191 if (GET_CODE (reg1) != REG)
1192 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1193
1194 reg2 = XEXP (XEXP (x, 0), 0);
1195 if (GET_CODE (reg2) != REG)
1196 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1197
1198 return force_reg (Pmode,
1199 gen_rtx_PLUS (Pmode,
1200 gen_rtx_ASHIFT (Pmode, reg2,
1201 GEN_INT (shift_val)),
1202 reg1));
1203 }
1204
1205 /* Similarly for (plus (plus (mult (a) (mem_shadd_constant)) (b)) (c)).
1206
1207 Only do so for floating point modes since this is more speculative
1208 and we lose if it's an integer store. */
1209 if (GET_CODE (x) == PLUS
1210 && GET_CODE (XEXP (x, 0)) == PLUS
1211 && mem_shadd_or_shadd_rtx_p (XEXP (XEXP (x, 0), 0))
1212 && (mode == SFmode || mode == DFmode))
1213 {
1214 int shift_val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
1215
1216 /* If we were given a MULT, we must fix the constant
1217 as we're going to create the ASHIFT form. */
1218 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT)
1219 shift_val = exact_log2 (shift_val);
1220
1221 /* Try and figure out what to use as a base register. */
1222 rtx reg1, reg2, base, idx;
1223
1224 reg1 = XEXP (XEXP (x, 0), 1);
1225 reg2 = XEXP (x, 1);
1226 base = NULL_RTX;
1227 idx = NULL_RTX;
1228
1229 /* Make sure they're both regs. If one was a SYMBOL_REF [+ const],
1230 then pa_emit_move_sequence will turn on REG_POINTER so we'll know
1231 it's a base register below. */
1232 if (GET_CODE (reg1) != REG)
1233 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1234
1235 if (GET_CODE (reg2) != REG)
1236 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1237
1238 /* Figure out what the base and index are. */
1239
1240 if (GET_CODE (reg1) == REG
1241 && REG_POINTER (reg1))
1242 {
1243 base = reg1;
1244 idx = gen_rtx_PLUS (Pmode,
1245 gen_rtx_ASHIFT (Pmode,
1246 XEXP (XEXP (XEXP (x, 0), 0), 0),
1247 GEN_INT (shift_val)),
1248 XEXP (x, 1));
1249 }
1250 else if (GET_CODE (reg2) == REG
1251 && REG_POINTER (reg2))
1252 {
1253 base = reg2;
1254 idx = XEXP (x, 0);
1255 }
1256
1257 if (base == 0)
1258 return orig;
1259
1260 /* If the index adds a large constant, try to scale the
1261 constant so that it can be loaded with only one insn. */
1262 if (GET_CODE (XEXP (idx, 1)) == CONST_INT
1263 && VAL_14_BITS_P (INTVAL (XEXP (idx, 1))
1264 / INTVAL (XEXP (XEXP (idx, 0), 1)))
1265 && INTVAL (XEXP (idx, 1)) % INTVAL (XEXP (XEXP (idx, 0), 1)) == 0)
1266 {
1267 /* Divide the CONST_INT by the scale factor, then add it to A. */
1268 int val = INTVAL (XEXP (idx, 1));
1269 val /= (1 << shift_val);
1270
1271 reg1 = XEXP (XEXP (idx, 0), 0);
1272 if (GET_CODE (reg1) != REG)
1273 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1274
1275 reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, reg1, GEN_INT (val)));
1276
1277 /* We can now generate a simple scaled indexed address. */
1278 return
1279 force_reg
1280 (Pmode, gen_rtx_PLUS (Pmode,
1281 gen_rtx_ASHIFT (Pmode, reg1,
1282 GEN_INT (shift_val)),
1283 base));
1284 }
1285
1286 /* If B + C is still a valid base register, then add them. */
1287 if (GET_CODE (XEXP (idx, 1)) == CONST_INT
1288 && INTVAL (XEXP (idx, 1)) <= 4096
1289 && INTVAL (XEXP (idx, 1)) >= -4096)
1290 {
1291 rtx reg1, reg2;
1292
1293 reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, XEXP (idx, 1)));
1294
1295 reg2 = XEXP (XEXP (idx, 0), 0);
1296 if (GET_CODE (reg2) != CONST_INT)
1297 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1298
1299 return force_reg (Pmode,
1300 gen_rtx_PLUS (Pmode,
1301 gen_rtx_ASHIFT (Pmode, reg2,
1302 GEN_INT (shift_val)),
1303 reg1));
1304 }
1305
1306 /* Get the index into a register, then add the base + index and
1307 return a register holding the result. */
1308
1309 /* First get A into a register. */
1310 reg1 = XEXP (XEXP (idx, 0), 0);
1311 if (GET_CODE (reg1) != REG)
1312 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1313
1314 /* And get B into a register. */
1315 reg2 = XEXP (idx, 1);
1316 if (GET_CODE (reg2) != REG)
1317 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1318
1319 reg1 = force_reg (Pmode,
1320 gen_rtx_PLUS (Pmode,
1321 gen_rtx_ASHIFT (Pmode, reg1,
1322 GEN_INT (shift_val)),
1323 reg2));
1324
1325 /* Add the result to our base register and return. */
1326 return force_reg (Pmode, gen_rtx_PLUS (Pmode, base, reg1));
1327
1328 }
1329
1330 /* Uh-oh. We might have an address for x[n-100000]. This needs
1331 special handling to avoid creating an indexed memory address
1332 with x-100000 as the base.
1333
1334 If the constant part is small enough, then it's still safe because
1335 there is a guard page at the beginning and end of the data segment.
1336
1337 Scaled references are common enough that we want to try and rearrange the
1338 terms so that we can use indexing for these addresses too. Only
1339 do the optimization for floatint point modes. */
1340
1341 if (GET_CODE (x) == PLUS
1342 && pa_symbolic_expression_p (XEXP (x, 1)))
1343 {
1344 /* Ugly. We modify things here so that the address offset specified
1345 by the index expression is computed first, then added to x to form
1346 the entire address. */
1347
1348 rtx regx1, regx2, regy1, regy2, y;
1349
1350 /* Strip off any CONST. */
1351 y = XEXP (x, 1);
1352 if (GET_CODE (y) == CONST)
1353 y = XEXP (y, 0);
1354
1355 if (GET_CODE (y) == PLUS || GET_CODE (y) == MINUS)
1356 {
1357 /* See if this looks like
1358 (plus (mult (reg) (mem_shadd_const))
1359 (const (plus (symbol_ref) (const_int))))
1360
1361 Where const_int is small. In that case the const
1362 expression is a valid pointer for indexing.
1363
1364 If const_int is big, but can be divided evenly by shadd_const
1365 and added to (reg). This allows more scaled indexed addresses. */
1366 if (GET_CODE (XEXP (y, 0)) == SYMBOL_REF
1367 && mem_shadd_or_shadd_rtx_p (XEXP (x, 0))
1368 && GET_CODE (XEXP (y, 1)) == CONST_INT
1369 && INTVAL (XEXP (y, 1)) >= -4096
1370 && INTVAL (XEXP (y, 1)) <= 4095)
1371 {
1372 int shift_val = INTVAL (XEXP (XEXP (x, 0), 1));
1373
1374 /* If we were given a MULT, we must fix the constant
1375 as we're going to create the ASHIFT form. */
1376 if (GET_CODE (XEXP (x, 0)) == MULT)
1377 shift_val = exact_log2 (shift_val);
1378
1379 rtx reg1, reg2;
1380
1381 reg1 = XEXP (x, 1);
1382 if (GET_CODE (reg1) != REG)
1383 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1384
1385 reg2 = XEXP (XEXP (x, 0), 0);
1386 if (GET_CODE (reg2) != REG)
1387 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1388
1389 return
1390 force_reg (Pmode,
1391 gen_rtx_PLUS (Pmode,
1392 gen_rtx_ASHIFT (Pmode,
1393 reg2,
1394 GEN_INT (shift_val)),
1395 reg1));
1396 }
1397 else if ((mode == DFmode || mode == SFmode)
1398 && GET_CODE (XEXP (y, 0)) == SYMBOL_REF
1399 && mem_shadd_or_shadd_rtx_p (XEXP (x, 0))
1400 && GET_CODE (XEXP (y, 1)) == CONST_INT
1401 && INTVAL (XEXP (y, 1)) % (1 << INTVAL (XEXP (XEXP (x, 0), 1))) == 0)
1402 {
1403 int shift_val = INTVAL (XEXP (XEXP (x, 0), 1));
1404
1405 /* If we were given a MULT, we must fix the constant
1406 as we're going to create the ASHIFT form. */
1407 if (GET_CODE (XEXP (x, 0)) == MULT)
1408 shift_val = exact_log2 (shift_val);
1409
1410 regx1
1411 = force_reg (Pmode, GEN_INT (INTVAL (XEXP (y, 1))
1412 / INTVAL (XEXP (XEXP (x, 0), 1))));
1413 regx2 = XEXP (XEXP (x, 0), 0);
1414 if (GET_CODE (regx2) != REG)
1415 regx2 = force_reg (Pmode, force_operand (regx2, 0));
1416 regx2 = force_reg (Pmode, gen_rtx_fmt_ee (GET_CODE (y), Pmode,
1417 regx2, regx1));
1418 return
1419 force_reg (Pmode,
1420 gen_rtx_PLUS (Pmode,
1421 gen_rtx_ASHIFT (Pmode, regx2,
1422 GEN_INT (shift_val)),
1423 force_reg (Pmode, XEXP (y, 0))));
1424 }
1425 else if (GET_CODE (XEXP (y, 1)) == CONST_INT
1426 && INTVAL (XEXP (y, 1)) >= -4096
1427 && INTVAL (XEXP (y, 1)) <= 4095)
1428 {
1429 /* This is safe because of the guard page at the
1430 beginning and end of the data space. Just
1431 return the original address. */
1432 return orig;
1433 }
1434 else
1435 {
1436 /* Doesn't look like one we can optimize. */
1437 regx1 = force_reg (Pmode, force_operand (XEXP (x, 0), 0));
1438 regy1 = force_reg (Pmode, force_operand (XEXP (y, 0), 0));
1439 regy2 = force_reg (Pmode, force_operand (XEXP (y, 1), 0));
1440 regx1 = force_reg (Pmode,
1441 gen_rtx_fmt_ee (GET_CODE (y), Pmode,
1442 regx1, regy2));
1443 return force_reg (Pmode, gen_rtx_PLUS (Pmode, regx1, regy1));
1444 }
1445 }
1446 }
1447
1448 return orig;
1449 }
1450
1451 /* Implement the TARGET_REGISTER_MOVE_COST hook.
1452
1453 Compute extra cost of moving data between one register class
1454 and another.
1455
1456 Make moves from SAR so expensive they should never happen. We used to
1457 have 0xffff here, but that generates overflow in rare cases.
1458
1459 Copies involving a FP register and a non-FP register are relatively
1460 expensive because they must go through memory.
1461
1462 Other copies are reasonably cheap. */
1463
1464 static int
hppa_register_move_cost(machine_mode mode ATTRIBUTE_UNUSED,reg_class_t from,reg_class_t to)1465 hppa_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
1466 reg_class_t from, reg_class_t to)
1467 {
1468 if (from == SHIFT_REGS)
1469 return 0x100;
1470 else if (to == SHIFT_REGS && FP_REG_CLASS_P (from))
1471 return 18;
1472 else if ((FP_REG_CLASS_P (from) && ! FP_REG_CLASS_P (to))
1473 || (FP_REG_CLASS_P (to) && ! FP_REG_CLASS_P (from)))
1474 return 16;
1475 else
1476 return 2;
1477 }
1478
1479 /* For the HPPA, REG and REG+CONST is cost 0
1480 and addresses involving symbolic constants are cost 2.
1481
1482 PIC addresses are very expensive.
1483
1484 It is no coincidence that this has the same structure
1485 as pa_legitimate_address_p. */
1486
1487 static int
hppa_address_cost(rtx X,machine_mode mode ATTRIBUTE_UNUSED,addr_space_t as ATTRIBUTE_UNUSED,bool speed ATTRIBUTE_UNUSED)1488 hppa_address_cost (rtx X, machine_mode mode ATTRIBUTE_UNUSED,
1489 addr_space_t as ATTRIBUTE_UNUSED,
1490 bool speed ATTRIBUTE_UNUSED)
1491 {
1492 switch (GET_CODE (X))
1493 {
1494 case REG:
1495 case PLUS:
1496 case LO_SUM:
1497 return 1;
1498 case HIGH:
1499 return 2;
1500 default:
1501 return 4;
1502 }
1503 }
1504
1505 /* Return true if X represents a (possibly non-canonical) shNadd pattern.
1506 The machine mode of X is known to be SImode or DImode. */
1507
1508 static bool
hppa_rtx_costs_shadd_p(rtx x)1509 hppa_rtx_costs_shadd_p (rtx x)
1510 {
1511 if (GET_CODE (x) != PLUS
1512 || !REG_P (XEXP (x, 1)))
1513 return false;
1514 rtx op0 = XEXP (x, 0);
1515 if (GET_CODE (op0) == ASHIFT
1516 && CONST_INT_P (XEXP (op0, 1))
1517 && REG_P (XEXP (op0, 0)))
1518 {
1519 unsigned HOST_WIDE_INT x = UINTVAL (XEXP (op0, 1));
1520 return x == 1 || x == 2 || x == 3;
1521 }
1522 if (GET_CODE (op0) == MULT
1523 && CONST_INT_P (XEXP (op0, 1))
1524 && REG_P (XEXP (op0, 0)))
1525 {
1526 unsigned HOST_WIDE_INT x = UINTVAL (XEXP (op0, 1));
1527 return x == 2 || x == 4 || x == 8;
1528 }
1529 return false;
1530 }
1531
1532 /* Compute a (partial) cost for rtx X. Return true if the complete
1533 cost has been computed, and false if subexpressions should be
1534 scanned. In either case, *TOTAL contains the cost result. */
1535
1536 static bool
hppa_rtx_costs(rtx x,machine_mode mode,int outer_code,int opno ATTRIBUTE_UNUSED,int * total,bool speed)1537 hppa_rtx_costs (rtx x, machine_mode mode, int outer_code,
1538 int opno ATTRIBUTE_UNUSED,
1539 int *total, bool speed)
1540 {
1541 int code = GET_CODE (x);
1542
1543 switch (code)
1544 {
1545 case CONST_INT:
1546 if (outer_code == SET)
1547 *total = COSTS_N_INSNS (1);
1548 else if (INTVAL (x) == 0)
1549 *total = 0;
1550 else if (INT_14_BITS (x))
1551 *total = 1;
1552 else
1553 *total = 2;
1554 return true;
1555
1556 case HIGH:
1557 *total = 2;
1558 return true;
1559
1560 case CONST:
1561 case LABEL_REF:
1562 case SYMBOL_REF:
1563 *total = 4;
1564 return true;
1565
1566 case CONST_DOUBLE:
1567 if ((x == CONST0_RTX (DFmode) || x == CONST0_RTX (SFmode))
1568 && outer_code != SET)
1569 *total = 0;
1570 else
1571 *total = 8;
1572 return true;
1573
1574 case MULT:
1575 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1576 {
1577 *total = COSTS_N_INSNS (3);
1578 }
1579 else if (mode == DImode)
1580 {
1581 if (TARGET_PA_11 && !TARGET_SOFT_FLOAT && !TARGET_SOFT_MULT)
1582 *total = COSTS_N_INSNS (25);
1583 else
1584 *total = COSTS_N_INSNS (80);
1585 }
1586 else
1587 {
1588 if (TARGET_PA_11 && !TARGET_SOFT_FLOAT && !TARGET_SOFT_MULT)
1589 *total = COSTS_N_INSNS (8);
1590 else
1591 *total = COSTS_N_INSNS (20);
1592 }
1593 return REG_P (XEXP (x, 0)) && REG_P (XEXP (x, 1));
1594
1595 case DIV:
1596 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1597 {
1598 *total = COSTS_N_INSNS (14);
1599 return false;
1600 }
1601 /* FALLTHRU */
1602
1603 case UDIV:
1604 case MOD:
1605 case UMOD:
1606 /* A mode size N times larger than SImode needs O(N*N) more insns. */
1607 if (mode == DImode)
1608 *total = COSTS_N_INSNS (240);
1609 else
1610 *total = COSTS_N_INSNS (60);
1611 return REG_P (XEXP (x, 0)) && REG_P (XEXP (x, 1));
1612
1613 case PLUS: /* this includes shNadd insns */
1614 case MINUS:
1615 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1616 *total = COSTS_N_INSNS (3);
1617 else if (mode == DImode)
1618 {
1619 if (TARGET_64BIT)
1620 {
1621 *total = COSTS_N_INSNS (1);
1622 /* Handle shladd,l instructions. */
1623 if (hppa_rtx_costs_shadd_p (x))
1624 return true;
1625 }
1626 else
1627 *total = COSTS_N_INSNS (2);
1628 }
1629 else
1630 {
1631 *total = COSTS_N_INSNS (1);
1632 /* Handle shNadd instructions. */
1633 if (hppa_rtx_costs_shadd_p (x))
1634 return true;
1635 }
1636 return REG_P (XEXP (x, 0))
1637 && (REG_P (XEXP (x, 1))
1638 || CONST_INT_P (XEXP (x, 1)));
1639
1640 case ASHIFT:
1641 if (mode == DImode)
1642 {
1643 if (REG_P (XEXP (x, 0)) && CONST_INT_P (XEXP (x, 1)))
1644 {
1645 if (TARGET_64BIT)
1646 *total = COSTS_N_INSNS (1);
1647 else
1648 *total = COSTS_N_INSNS (2);
1649 return true;
1650 }
1651 else if (TARGET_64BIT)
1652 *total = COSTS_N_INSNS (3);
1653 else if (speed)
1654 *total = COSTS_N_INSNS (13);
1655 else
1656 *total = COSTS_N_INSNS (18);
1657 }
1658 else if (REG_P (XEXP (x, 0)) && CONST_INT_P (XEXP (x, 1)))
1659 {
1660 if (TARGET_64BIT)
1661 *total = COSTS_N_INSNS (2);
1662 else
1663 *total = COSTS_N_INSNS (1);
1664 return true;
1665 }
1666 else if (TARGET_64BIT)
1667 *total = COSTS_N_INSNS (4);
1668 else
1669 *total = COSTS_N_INSNS (2);
1670 return REG_P (XEXP (x, 0))
1671 && (REG_P (XEXP (x, 1))
1672 || CONST_INT_P (XEXP (x, 1)));
1673
1674 case ASHIFTRT:
1675 if (mode == DImode)
1676 {
1677 if (REG_P (XEXP (x, 0)) && CONST_INT_P (XEXP (x, 1)))
1678 {
1679 if (TARGET_64BIT)
1680 *total = COSTS_N_INSNS (1);
1681 else
1682 *total = COSTS_N_INSNS (2);
1683 return true;
1684 }
1685 else if (TARGET_64BIT)
1686 *total = COSTS_N_INSNS (3);
1687 else if (speed)
1688 *total = COSTS_N_INSNS (14);
1689 else
1690 *total = COSTS_N_INSNS (19);
1691 }
1692 else if (REG_P (XEXP (x, 0)) && CONST_INT_P (XEXP (x, 1)))
1693 {
1694 if (TARGET_64BIT)
1695 *total = COSTS_N_INSNS (2);
1696 else
1697 *total = COSTS_N_INSNS (1);
1698 return true;
1699 }
1700 else if (TARGET_64BIT)
1701 *total = COSTS_N_INSNS (4);
1702 else
1703 *total = COSTS_N_INSNS (2);
1704 return REG_P (XEXP (x, 0))
1705 && (REG_P (XEXP (x, 1))
1706 || CONST_INT_P (XEXP (x, 1)));
1707
1708 case LSHIFTRT:
1709 if (mode == DImode)
1710 {
1711 if (REG_P (XEXP (x, 0)) && CONST_INT_P (XEXP (x, 1)))
1712 {
1713 if (TARGET_64BIT)
1714 *total = COSTS_N_INSNS (1);
1715 else
1716 *total = COSTS_N_INSNS (2);
1717 return true;
1718 }
1719 else if (TARGET_64BIT)
1720 *total = COSTS_N_INSNS (2);
1721 else if (speed)
1722 *total = COSTS_N_INSNS (12);
1723 else
1724 *total = COSTS_N_INSNS (15);
1725 }
1726 else if (REG_P (XEXP (x, 0)) && CONST_INT_P (XEXP (x, 1)))
1727 {
1728 *total = COSTS_N_INSNS (1);
1729 return true;
1730 }
1731 else if (TARGET_64BIT)
1732 *total = COSTS_N_INSNS (3);
1733 else
1734 *total = COSTS_N_INSNS (2);
1735 return REG_P (XEXP (x, 0))
1736 && (REG_P (XEXP (x, 1))
1737 || CONST_INT_P (XEXP (x, 1)));
1738
1739 default:
1740 return false;
1741 }
1742 }
1743
1744 /* Ensure mode of ORIG, a REG rtx, is MODE. Returns either ORIG or a
1745 new rtx with the correct mode. */
1746 static inline rtx
force_mode(machine_mode mode,rtx orig)1747 force_mode (machine_mode mode, rtx orig)
1748 {
1749 if (mode == GET_MODE (orig))
1750 return orig;
1751
1752 gcc_assert (REGNO (orig) < FIRST_PSEUDO_REGISTER);
1753
1754 return gen_rtx_REG (mode, REGNO (orig));
1755 }
1756
1757 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
1758
1759 static bool
pa_cannot_force_const_mem(machine_mode mode ATTRIBUTE_UNUSED,rtx x)1760 pa_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
1761 {
1762 return tls_referenced_p (x);
1763 }
1764
1765 /* Emit insns to move operands[1] into operands[0].
1766
1767 Return 1 if we have written out everything that needs to be done to
1768 do the move. Otherwise, return 0 and the caller will emit the move
1769 normally.
1770
1771 Note SCRATCH_REG may not be in the proper mode depending on how it
1772 will be used. This routine is responsible for creating a new copy
1773 of SCRATCH_REG in the proper mode. */
1774
1775 int
pa_emit_move_sequence(rtx * operands,machine_mode mode,rtx scratch_reg)1776 pa_emit_move_sequence (rtx *operands, machine_mode mode, rtx scratch_reg)
1777 {
1778 rtx operand0 = operands[0];
1779 rtx operand1 = operands[1];
1780 rtx tem;
1781
1782 /* We can only handle indexed addresses in the destination operand
1783 of floating point stores. Thus, we need to break out indexed
1784 addresses from the destination operand. */
1785 if (GET_CODE (operand0) == MEM && IS_INDEX_ADDR_P (XEXP (operand0, 0)))
1786 {
1787 gcc_assert (can_create_pseudo_p ());
1788
1789 tem = copy_to_mode_reg (Pmode, XEXP (operand0, 0));
1790 operand0 = replace_equiv_address (operand0, tem);
1791 }
1792
1793 /* On targets with non-equivalent space registers, break out unscaled
1794 indexed addresses from the source operand before the final CSE.
1795 We have to do this because the REG_POINTER flag is not correctly
1796 carried through various optimization passes and CSE may substitute
1797 a pseudo without the pointer set for one with the pointer set. As
1798 a result, we loose various opportunities to create insns with
1799 unscaled indexed addresses. */
1800 if (!TARGET_NO_SPACE_REGS
1801 && !cse_not_expected
1802 && GET_CODE (operand1) == MEM
1803 && GET_CODE (XEXP (operand1, 0)) == PLUS
1804 && REG_P (XEXP (XEXP (operand1, 0), 0))
1805 && REG_P (XEXP (XEXP (operand1, 0), 1)))
1806 operand1
1807 = replace_equiv_address (operand1,
1808 copy_to_mode_reg (Pmode, XEXP (operand1, 0)));
1809
1810 if (scratch_reg
1811 && reload_in_progress && GET_CODE (operand0) == REG
1812 && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
1813 operand0 = reg_equiv_mem (REGNO (operand0));
1814 else if (scratch_reg
1815 && reload_in_progress && GET_CODE (operand0) == SUBREG
1816 && GET_CODE (SUBREG_REG (operand0)) == REG
1817 && REGNO (SUBREG_REG (operand0)) >= FIRST_PSEUDO_REGISTER)
1818 {
1819 /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1820 the code which tracks sets/uses for delete_output_reload. */
1821 rtx temp = gen_rtx_SUBREG (GET_MODE (operand0),
1822 reg_equiv_mem (REGNO (SUBREG_REG (operand0))),
1823 SUBREG_BYTE (operand0));
1824 operand0 = alter_subreg (&temp, true);
1825 }
1826
1827 if (scratch_reg
1828 && reload_in_progress && GET_CODE (operand1) == REG
1829 && REGNO (operand1) >= FIRST_PSEUDO_REGISTER)
1830 operand1 = reg_equiv_mem (REGNO (operand1));
1831 else if (scratch_reg
1832 && reload_in_progress && GET_CODE (operand1) == SUBREG
1833 && GET_CODE (SUBREG_REG (operand1)) == REG
1834 && REGNO (SUBREG_REG (operand1)) >= FIRST_PSEUDO_REGISTER)
1835 {
1836 /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1837 the code which tracks sets/uses for delete_output_reload. */
1838 rtx temp = gen_rtx_SUBREG (GET_MODE (operand1),
1839 reg_equiv_mem (REGNO (SUBREG_REG (operand1))),
1840 SUBREG_BYTE (operand1));
1841 operand1 = alter_subreg (&temp, true);
1842 }
1843
1844 if (scratch_reg && reload_in_progress && GET_CODE (operand0) == MEM
1845 && ((tem = find_replacement (&XEXP (operand0, 0)))
1846 != XEXP (operand0, 0)))
1847 operand0 = replace_equiv_address (operand0, tem);
1848
1849 if (scratch_reg && reload_in_progress && GET_CODE (operand1) == MEM
1850 && ((tem = find_replacement (&XEXP (operand1, 0)))
1851 != XEXP (operand1, 0)))
1852 operand1 = replace_equiv_address (operand1, tem);
1853
1854 /* Handle secondary reloads for loads/stores of FP registers from
1855 REG+D addresses where D does not fit in 5 or 14 bits, including
1856 (subreg (mem (addr))) cases, and reloads for other unsupported
1857 memory operands. */
1858 if (scratch_reg
1859 && FP_REG_P (operand0)
1860 && (MEM_P (operand1)
1861 || (GET_CODE (operand1) == SUBREG
1862 && MEM_P (XEXP (operand1, 0)))))
1863 {
1864 rtx op1 = operand1;
1865
1866 if (GET_CODE (op1) == SUBREG)
1867 op1 = XEXP (op1, 0);
1868
1869 if (reg_plus_base_memory_operand (op1, GET_MODE (op1)))
1870 {
1871 if (!(TARGET_PA_20
1872 && !TARGET_ELF32
1873 && INT_14_BITS (XEXP (XEXP (op1, 0), 1)))
1874 && !INT_5_BITS (XEXP (XEXP (op1, 0), 1)))
1875 {
1876 /* SCRATCH_REG will hold an address and maybe the actual data.
1877 We want it in WORD_MODE regardless of what mode it was
1878 originally given to us. */
1879 scratch_reg = force_mode (word_mode, scratch_reg);
1880
1881 /* D might not fit in 14 bits either; for such cases load D
1882 into scratch reg. */
1883 if (!INT_14_BITS (XEXP (XEXP (op1, 0), 1)))
1884 {
1885 emit_move_insn (scratch_reg, XEXP (XEXP (op1, 0), 1));
1886 emit_move_insn (scratch_reg,
1887 gen_rtx_fmt_ee (GET_CODE (XEXP (op1, 0)),
1888 Pmode,
1889 XEXP (XEXP (op1, 0), 0),
1890 scratch_reg));
1891 }
1892 else
1893 emit_move_insn (scratch_reg, XEXP (op1, 0));
1894 op1 = replace_equiv_address (op1, scratch_reg);
1895 }
1896 }
1897 else if ((!INT14_OK_STRICT && symbolic_memory_operand (op1, VOIDmode))
1898 || IS_LO_SUM_DLT_ADDR_P (XEXP (op1, 0))
1899 || IS_INDEX_ADDR_P (XEXP (op1, 0)))
1900 {
1901 /* Load memory address into SCRATCH_REG. */
1902 scratch_reg = force_mode (word_mode, scratch_reg);
1903 emit_move_insn (scratch_reg, XEXP (op1, 0));
1904 op1 = replace_equiv_address (op1, scratch_reg);
1905 }
1906 emit_insn (gen_rtx_SET (operand0, op1));
1907 return 1;
1908 }
1909 else if (scratch_reg
1910 && FP_REG_P (operand1)
1911 && (MEM_P (operand0)
1912 || (GET_CODE (operand0) == SUBREG
1913 && MEM_P (XEXP (operand0, 0)))))
1914 {
1915 rtx op0 = operand0;
1916
1917 if (GET_CODE (op0) == SUBREG)
1918 op0 = XEXP (op0, 0);
1919
1920 if (reg_plus_base_memory_operand (op0, GET_MODE (op0)))
1921 {
1922 if (!(TARGET_PA_20
1923 && !TARGET_ELF32
1924 && INT_14_BITS (XEXP (XEXP (op0, 0), 1)))
1925 && !INT_5_BITS (XEXP (XEXP (op0, 0), 1)))
1926 {
1927 /* SCRATCH_REG will hold an address and maybe the actual data.
1928 We want it in WORD_MODE regardless of what mode it was
1929 originally given to us. */
1930 scratch_reg = force_mode (word_mode, scratch_reg);
1931
1932 /* D might not fit in 14 bits either; for such cases load D
1933 into scratch reg. */
1934 if (!INT_14_BITS (XEXP (XEXP (op0, 0), 1)))
1935 {
1936 emit_move_insn (scratch_reg, XEXP (XEXP (op0, 0), 1));
1937 emit_move_insn (scratch_reg,
1938 gen_rtx_fmt_ee (GET_CODE (XEXP (op0, 0)),
1939 Pmode,
1940 XEXP (XEXP (op0, 0), 0),
1941 scratch_reg));
1942 }
1943 else
1944 emit_move_insn (scratch_reg, XEXP (op0, 0));
1945 op0 = replace_equiv_address (op0, scratch_reg);
1946 }
1947 }
1948 else if ((!INT14_OK_STRICT && symbolic_memory_operand (op0, VOIDmode))
1949 || IS_LO_SUM_DLT_ADDR_P (XEXP (op0, 0))
1950 || IS_INDEX_ADDR_P (XEXP (op0, 0)))
1951 {
1952 /* Load memory address into SCRATCH_REG. */
1953 scratch_reg = force_mode (word_mode, scratch_reg);
1954 emit_move_insn (scratch_reg, XEXP (op0, 0));
1955 op0 = replace_equiv_address (op0, scratch_reg);
1956 }
1957 emit_insn (gen_rtx_SET (op0, operand1));
1958 return 1;
1959 }
1960 /* Handle secondary reloads for loads of FP registers from constant
1961 expressions by forcing the constant into memory. For the most part,
1962 this is only necessary for SImode and DImode.
1963
1964 Use scratch_reg to hold the address of the memory location. */
1965 else if (scratch_reg
1966 && CONSTANT_P (operand1)
1967 && FP_REG_P (operand0))
1968 {
1969 rtx const_mem, xoperands[2];
1970
1971 if (operand1 == CONST0_RTX (mode))
1972 {
1973 emit_insn (gen_rtx_SET (operand0, operand1));
1974 return 1;
1975 }
1976
1977 /* SCRATCH_REG will hold an address and maybe the actual data. We want
1978 it in WORD_MODE regardless of what mode it was originally given
1979 to us. */
1980 scratch_reg = force_mode (word_mode, scratch_reg);
1981
1982 /* Force the constant into memory and put the address of the
1983 memory location into scratch_reg. */
1984 const_mem = force_const_mem (mode, operand1);
1985 xoperands[0] = scratch_reg;
1986 xoperands[1] = XEXP (const_mem, 0);
1987 pa_emit_move_sequence (xoperands, Pmode, 0);
1988
1989 /* Now load the destination register. */
1990 emit_insn (gen_rtx_SET (operand0,
1991 replace_equiv_address (const_mem, scratch_reg)));
1992 return 1;
1993 }
1994 /* Handle secondary reloads for SAR. These occur when trying to load
1995 the SAR from memory or a constant. */
1996 else if (scratch_reg
1997 && GET_CODE (operand0) == REG
1998 && REGNO (operand0) < FIRST_PSEUDO_REGISTER
1999 && REGNO_REG_CLASS (REGNO (operand0)) == SHIFT_REGS
2000 && (GET_CODE (operand1) == MEM || GET_CODE (operand1) == CONST_INT))
2001 {
2002 /* D might not fit in 14 bits either; for such cases load D into
2003 scratch reg. */
2004 if (GET_CODE (operand1) == MEM
2005 && !memory_address_p (GET_MODE (operand0), XEXP (operand1, 0)))
2006 {
2007 /* We are reloading the address into the scratch register, so we
2008 want to make sure the scratch register is a full register. */
2009 scratch_reg = force_mode (word_mode, scratch_reg);
2010
2011 emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1));
2012 emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand1,
2013 0)),
2014 Pmode,
2015 XEXP (XEXP (operand1, 0),
2016 0),
2017 scratch_reg));
2018
2019 /* Now we are going to load the scratch register from memory,
2020 we want to load it in the same width as the original MEM,
2021 which must be the same as the width of the ultimate destination,
2022 OPERAND0. */
2023 scratch_reg = force_mode (GET_MODE (operand0), scratch_reg);
2024
2025 emit_move_insn (scratch_reg,
2026 replace_equiv_address (operand1, scratch_reg));
2027 }
2028 else
2029 {
2030 /* We want to load the scratch register using the same mode as
2031 the ultimate destination. */
2032 scratch_reg = force_mode (GET_MODE (operand0), scratch_reg);
2033
2034 emit_move_insn (scratch_reg, operand1);
2035 }
2036
2037 /* And emit the insn to set the ultimate destination. We know that
2038 the scratch register has the same mode as the destination at this
2039 point. */
2040 emit_move_insn (operand0, scratch_reg);
2041 return 1;
2042 }
2043
2044 /* Handle the most common case: storing into a register. */
2045 if (register_operand (operand0, mode))
2046 {
2047 /* Legitimize TLS symbol references. This happens for references
2048 that aren't a legitimate constant. */
2049 if (PA_SYMBOL_REF_TLS_P (operand1))
2050 operand1 = legitimize_tls_address (operand1);
2051
2052 if (register_operand (operand1, mode)
2053 || (GET_CODE (operand1) == CONST_INT
2054 && pa_cint_ok_for_move (UINTVAL (operand1)))
2055 || (operand1 == CONST0_RTX (mode))
2056 || (GET_CODE (operand1) == HIGH
2057 && !symbolic_operand (XEXP (operand1, 0), VOIDmode))
2058 /* Only `general_operands' can come here, so MEM is ok. */
2059 || GET_CODE (operand1) == MEM)
2060 {
2061 /* Various sets are created during RTL generation which don't
2062 have the REG_POINTER flag correctly set. After the CSE pass,
2063 instruction recognition can fail if we don't consistently
2064 set this flag when performing register copies. This should
2065 also improve the opportunities for creating insns that use
2066 unscaled indexing. */
2067 if (REG_P (operand0) && REG_P (operand1))
2068 {
2069 if (REG_POINTER (operand1)
2070 && !REG_POINTER (operand0)
2071 && !HARD_REGISTER_P (operand0))
2072 copy_reg_pointer (operand0, operand1);
2073 }
2074
2075 /* When MEMs are broken out, the REG_POINTER flag doesn't
2076 get set. In some cases, we can set the REG_POINTER flag
2077 from the declaration for the MEM. */
2078 if (REG_P (operand0)
2079 && GET_CODE (operand1) == MEM
2080 && !REG_POINTER (operand0))
2081 {
2082 tree decl = MEM_EXPR (operand1);
2083
2084 /* Set the register pointer flag and register alignment
2085 if the declaration for this memory reference is a
2086 pointer type. */
2087 if (decl)
2088 {
2089 tree type;
2090
2091 /* If this is a COMPONENT_REF, use the FIELD_DECL from
2092 tree operand 1. */
2093 if (TREE_CODE (decl) == COMPONENT_REF)
2094 decl = TREE_OPERAND (decl, 1);
2095
2096 type = TREE_TYPE (decl);
2097 type = strip_array_types (type);
2098
2099 if (POINTER_TYPE_P (type))
2100 mark_reg_pointer (operand0, BITS_PER_UNIT);
2101 }
2102 }
2103
2104 emit_insn (gen_rtx_SET (operand0, operand1));
2105 return 1;
2106 }
2107 }
2108 else if (GET_CODE (operand0) == MEM)
2109 {
2110 if (mode == DFmode && operand1 == CONST0_RTX (mode)
2111 && !(reload_in_progress || reload_completed))
2112 {
2113 rtx temp = gen_reg_rtx (DFmode);
2114
2115 emit_insn (gen_rtx_SET (temp, operand1));
2116 emit_insn (gen_rtx_SET (operand0, temp));
2117 return 1;
2118 }
2119 if (register_operand (operand1, mode) || operand1 == CONST0_RTX (mode))
2120 {
2121 /* Run this case quickly. */
2122 emit_insn (gen_rtx_SET (operand0, operand1));
2123 return 1;
2124 }
2125 if (! (reload_in_progress || reload_completed))
2126 {
2127 operands[0] = validize_mem (operand0);
2128 operands[1] = operand1 = force_reg (mode, operand1);
2129 }
2130 }
2131
2132 /* Simplify the source if we need to.
2133 Note we do have to handle function labels here, even though we do
2134 not consider them legitimate constants. Loop optimizations can
2135 call the emit_move_xxx with one as a source. */
2136 if ((GET_CODE (operand1) != HIGH && immediate_operand (operand1, mode))
2137 || (GET_CODE (operand1) == HIGH
2138 && symbolic_operand (XEXP (operand1, 0), mode))
2139 || function_label_operand (operand1, VOIDmode)
2140 || tls_referenced_p (operand1))
2141 {
2142 int ishighonly = 0;
2143
2144 if (GET_CODE (operand1) == HIGH)
2145 {
2146 ishighonly = 1;
2147 operand1 = XEXP (operand1, 0);
2148 }
2149 if (symbolic_operand (operand1, mode))
2150 {
2151 /* Argh. The assembler and linker can't handle arithmetic
2152 involving plabels.
2153
2154 So we force the plabel into memory, load operand0 from
2155 the memory location, then add in the constant part. */
2156 if ((GET_CODE (operand1) == CONST
2157 && GET_CODE (XEXP (operand1, 0)) == PLUS
2158 && function_label_operand (XEXP (XEXP (operand1, 0), 0),
2159 VOIDmode))
2160 || function_label_operand (operand1, VOIDmode))
2161 {
2162 rtx temp, const_part;
2163
2164 /* Figure out what (if any) scratch register to use. */
2165 if (reload_in_progress || reload_completed)
2166 {
2167 scratch_reg = scratch_reg ? scratch_reg : operand0;
2168 /* SCRATCH_REG will hold an address and maybe the actual
2169 data. We want it in WORD_MODE regardless of what mode it
2170 was originally given to us. */
2171 scratch_reg = force_mode (word_mode, scratch_reg);
2172 }
2173 else if (flag_pic)
2174 scratch_reg = gen_reg_rtx (Pmode);
2175
2176 if (GET_CODE (operand1) == CONST)
2177 {
2178 /* Save away the constant part of the expression. */
2179 const_part = XEXP (XEXP (operand1, 0), 1);
2180 gcc_assert (GET_CODE (const_part) == CONST_INT);
2181
2182 /* Force the function label into memory. */
2183 temp = force_const_mem (mode, XEXP (XEXP (operand1, 0), 0));
2184 }
2185 else
2186 {
2187 /* No constant part. */
2188 const_part = NULL_RTX;
2189
2190 /* Force the function label into memory. */
2191 temp = force_const_mem (mode, operand1);
2192 }
2193
2194
2195 /* Get the address of the memory location. PIC-ify it if
2196 necessary. */
2197 temp = XEXP (temp, 0);
2198 if (flag_pic)
2199 temp = legitimize_pic_address (temp, mode, scratch_reg);
2200
2201 /* Put the address of the memory location into our destination
2202 register. */
2203 operands[1] = temp;
2204 pa_emit_move_sequence (operands, mode, scratch_reg);
2205
2206 /* Now load from the memory location into our destination
2207 register. */
2208 operands[1] = gen_rtx_MEM (Pmode, operands[0]);
2209 pa_emit_move_sequence (operands, mode, scratch_reg);
2210
2211 /* And add back in the constant part. */
2212 if (const_part != NULL_RTX)
2213 expand_inc (operand0, const_part);
2214
2215 return 1;
2216 }
2217
2218 if (flag_pic)
2219 {
2220 rtx_insn *insn;
2221 rtx temp;
2222
2223 if (reload_in_progress || reload_completed)
2224 {
2225 temp = scratch_reg ? scratch_reg : operand0;
2226 /* TEMP will hold an address and maybe the actual
2227 data. We want it in WORD_MODE regardless of what mode it
2228 was originally given to us. */
2229 temp = force_mode (word_mode, temp);
2230 }
2231 else
2232 temp = gen_reg_rtx (Pmode);
2233
2234 /* Force (const (plus (symbol) (const_int))) to memory
2235 if the const_int will not fit in 14 bits. Although
2236 this requires a relocation, the instruction sequence
2237 needed to load the value is shorter. */
2238 if (GET_CODE (operand1) == CONST
2239 && GET_CODE (XEXP (operand1, 0)) == PLUS
2240 && GET_CODE (XEXP (XEXP (operand1, 0), 1)) == CONST_INT
2241 && !INT_14_BITS (XEXP (XEXP (operand1, 0), 1)))
2242 {
2243 rtx x, m = force_const_mem (mode, operand1);
2244
2245 x = legitimize_pic_address (XEXP (m, 0), mode, temp);
2246 x = replace_equiv_address (m, x);
2247 insn = emit_move_insn (operand0, x);
2248 }
2249 else
2250 {
2251 operands[1] = legitimize_pic_address (operand1, mode, temp);
2252 if (REG_P (operand0) && REG_P (operands[1]))
2253 copy_reg_pointer (operand0, operands[1]);
2254 insn = emit_move_insn (operand0, operands[1]);
2255 }
2256
2257 /* Put a REG_EQUAL note on this insn. */
2258 set_unique_reg_note (insn, REG_EQUAL, operand1);
2259 }
2260 /* On the HPPA, references to data space are supposed to use dp,
2261 register 27, but showing it in the RTL inhibits various cse
2262 and loop optimizations. */
2263 else
2264 {
2265 rtx temp, set;
2266
2267 if (reload_in_progress || reload_completed)
2268 {
2269 temp = scratch_reg ? scratch_reg : operand0;
2270 /* TEMP will hold an address and maybe the actual
2271 data. We want it in WORD_MODE regardless of what mode it
2272 was originally given to us. */
2273 temp = force_mode (word_mode, temp);
2274 }
2275 else
2276 temp = gen_reg_rtx (mode);
2277
2278 /* Loading a SYMBOL_REF into a register makes that register
2279 safe to be used as the base in an indexed address.
2280
2281 Don't mark hard registers though. That loses. */
2282 if (GET_CODE (operand0) == REG
2283 && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
2284 mark_reg_pointer (operand0, BITS_PER_UNIT);
2285 if (REGNO (temp) >= FIRST_PSEUDO_REGISTER)
2286 mark_reg_pointer (temp, BITS_PER_UNIT);
2287
2288 if (ishighonly)
2289 set = gen_rtx_SET (operand0, temp);
2290 else
2291 set = gen_rtx_SET (operand0,
2292 gen_rtx_LO_SUM (mode, temp, operand1));
2293
2294 emit_insn (gen_rtx_SET (temp, gen_rtx_HIGH (mode, operand1)));
2295 emit_insn (set);
2296
2297 }
2298 return 1;
2299 }
2300 else if (tls_referenced_p (operand1))
2301 {
2302 rtx tmp = operand1;
2303 rtx addend = NULL;
2304
2305 if (GET_CODE (tmp) == CONST && GET_CODE (XEXP (tmp, 0)) == PLUS)
2306 {
2307 addend = XEXP (XEXP (tmp, 0), 1);
2308 tmp = XEXP (XEXP (tmp, 0), 0);
2309 }
2310
2311 gcc_assert (GET_CODE (tmp) == SYMBOL_REF);
2312 tmp = legitimize_tls_address (tmp);
2313 if (addend)
2314 {
2315 tmp = gen_rtx_PLUS (mode, tmp, addend);
2316 tmp = force_operand (tmp, operands[0]);
2317 }
2318 operands[1] = tmp;
2319 }
2320 else if (GET_CODE (operand1) != CONST_INT
2321 || !pa_cint_ok_for_move (UINTVAL (operand1)))
2322 {
2323 rtx temp;
2324 rtx_insn *insn;
2325 rtx op1 = operand1;
2326 HOST_WIDE_INT value = 0;
2327 HOST_WIDE_INT insv = 0;
2328 int insert = 0;
2329
2330 if (GET_CODE (operand1) == CONST_INT)
2331 value = INTVAL (operand1);
2332
2333 if (TARGET_64BIT
2334 && GET_CODE (operand1) == CONST_INT
2335 && HOST_BITS_PER_WIDE_INT > 32
2336 && GET_MODE_BITSIZE (GET_MODE (operand0)) > 32)
2337 {
2338 HOST_WIDE_INT nval;
2339
2340 /* Extract the low order 32 bits of the value and sign extend.
2341 If the new value is the same as the original value, we can
2342 can use the original value as-is. If the new value is
2343 different, we use it and insert the most-significant 32-bits
2344 of the original value into the final result. */
2345 nval = ((value & (((HOST_WIDE_INT) 2 << 31) - 1))
2346 ^ ((HOST_WIDE_INT) 1 << 31)) - ((HOST_WIDE_INT) 1 << 31);
2347 if (value != nval)
2348 {
2349 #if HOST_BITS_PER_WIDE_INT > 32
2350 insv = value >= 0 ? value >> 32 : ~(~value >> 32);
2351 #endif
2352 insert = 1;
2353 value = nval;
2354 operand1 = GEN_INT (nval);
2355 }
2356 }
2357
2358 if (reload_in_progress || reload_completed)
2359 temp = scratch_reg ? scratch_reg : operand0;
2360 else
2361 temp = gen_reg_rtx (mode);
2362
2363 /* We don't directly split DImode constants on 32-bit targets
2364 because PLUS uses an 11-bit immediate and the insn sequence
2365 generated is not as efficient as the one using HIGH/LO_SUM. */
2366 if (GET_CODE (operand1) == CONST_INT
2367 && GET_MODE_BITSIZE (mode) <= BITS_PER_WORD
2368 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
2369 && !insert)
2370 {
2371 /* Directly break constant into high and low parts. This
2372 provides better optimization opportunities because various
2373 passes recognize constants split with PLUS but not LO_SUM.
2374 We use a 14-bit signed low part except when the addition
2375 of 0x4000 to the high part might change the sign of the
2376 high part. */
2377 HOST_WIDE_INT low = value & 0x3fff;
2378 HOST_WIDE_INT high = value & ~ 0x3fff;
2379
2380 if (low >= 0x2000)
2381 {
2382 if (high == 0x7fffc000 || (mode == HImode && high == 0x4000))
2383 high += 0x2000;
2384 else
2385 high += 0x4000;
2386 }
2387
2388 low = value - high;
2389
2390 emit_insn (gen_rtx_SET (temp, GEN_INT (high)));
2391 operands[1] = gen_rtx_PLUS (mode, temp, GEN_INT (low));
2392 }
2393 else
2394 {
2395 emit_insn (gen_rtx_SET (temp, gen_rtx_HIGH (mode, operand1)));
2396 operands[1] = gen_rtx_LO_SUM (mode, temp, operand1);
2397 }
2398
2399 insn = emit_move_insn (operands[0], operands[1]);
2400
2401 /* Now insert the most significant 32 bits of the value
2402 into the register. When we don't have a second register
2403 available, it could take up to nine instructions to load
2404 a 64-bit integer constant. Prior to reload, we force
2405 constants that would take more than three instructions
2406 to load to the constant pool. During and after reload,
2407 we have to handle all possible values. */
2408 if (insert)
2409 {
2410 /* Use a HIGH/LO_SUM/INSV sequence if we have a second
2411 register and the value to be inserted is outside the
2412 range that can be loaded with three depdi instructions. */
2413 if (temp != operand0 && (insv >= 16384 || insv < -16384))
2414 {
2415 operand1 = GEN_INT (insv);
2416
2417 emit_insn (gen_rtx_SET (temp,
2418 gen_rtx_HIGH (mode, operand1)));
2419 emit_move_insn (temp, gen_rtx_LO_SUM (mode, temp, operand1));
2420 if (mode == DImode)
2421 insn = emit_insn (gen_insvdi (operand0, GEN_INT (32),
2422 const0_rtx, temp));
2423 else
2424 insn = emit_insn (gen_insvsi (operand0, GEN_INT (32),
2425 const0_rtx, temp));
2426 }
2427 else
2428 {
2429 int len = 5, pos = 27;
2430
2431 /* Insert the bits using the depdi instruction. */
2432 while (pos >= 0)
2433 {
2434 HOST_WIDE_INT v5 = ((insv & 31) ^ 16) - 16;
2435 HOST_WIDE_INT sign = v5 < 0;
2436
2437 /* Left extend the insertion. */
2438 insv = (insv >= 0 ? insv >> len : ~(~insv >> len));
2439 while (pos > 0 && (insv & 1) == sign)
2440 {
2441 insv = (insv >= 0 ? insv >> 1 : ~(~insv >> 1));
2442 len += 1;
2443 pos -= 1;
2444 }
2445
2446 if (mode == DImode)
2447 insn = emit_insn (gen_insvdi (operand0,
2448 GEN_INT (len),
2449 GEN_INT (pos),
2450 GEN_INT (v5)));
2451 else
2452 insn = emit_insn (gen_insvsi (operand0,
2453 GEN_INT (len),
2454 GEN_INT (pos),
2455 GEN_INT (v5)));
2456
2457 len = pos > 0 && pos < 5 ? pos : 5;
2458 pos -= len;
2459 }
2460 }
2461 }
2462
2463 set_unique_reg_note (insn, REG_EQUAL, op1);
2464
2465 return 1;
2466 }
2467 }
2468 /* Now have insn-emit do whatever it normally does. */
2469 return 0;
2470 }
2471
2472 /* Examine EXP and return nonzero if it contains an ADDR_EXPR (meaning
2473 it will need a link/runtime reloc). */
2474
2475 int
pa_reloc_needed(tree exp)2476 pa_reloc_needed (tree exp)
2477 {
2478 int reloc = 0;
2479
2480 switch (TREE_CODE (exp))
2481 {
2482 case ADDR_EXPR:
2483 return 1;
2484
2485 case POINTER_PLUS_EXPR:
2486 case PLUS_EXPR:
2487 case MINUS_EXPR:
2488 reloc = pa_reloc_needed (TREE_OPERAND (exp, 0));
2489 reloc |= pa_reloc_needed (TREE_OPERAND (exp, 1));
2490 break;
2491
2492 CASE_CONVERT:
2493 case NON_LVALUE_EXPR:
2494 reloc = pa_reloc_needed (TREE_OPERAND (exp, 0));
2495 break;
2496
2497 case CONSTRUCTOR:
2498 {
2499 tree value;
2500 unsigned HOST_WIDE_INT ix;
2501
2502 FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (exp), ix, value)
2503 if (value)
2504 reloc |= pa_reloc_needed (value);
2505 }
2506 break;
2507
2508 case ERROR_MARK:
2509 break;
2510
2511 default:
2512 break;
2513 }
2514 return reloc;
2515 }
2516
2517
2518 /* Return the best assembler insn template
2519 for moving operands[1] into operands[0] as a fullword. */
2520 const char *
pa_singlemove_string(rtx * operands)2521 pa_singlemove_string (rtx *operands)
2522 {
2523 HOST_WIDE_INT intval;
2524
2525 if (GET_CODE (operands[0]) == MEM)
2526 return "stw %r1,%0";
2527 if (GET_CODE (operands[1]) == MEM)
2528 return "ldw %1,%0";
2529 if (GET_CODE (operands[1]) == CONST_DOUBLE)
2530 {
2531 long i;
2532
2533 gcc_assert (GET_MODE (operands[1]) == SFmode);
2534
2535 /* Translate the CONST_DOUBLE to a CONST_INT with the same target
2536 bit pattern. */
2537 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (operands[1]), i);
2538
2539 operands[1] = GEN_INT (i);
2540 /* Fall through to CONST_INT case. */
2541 }
2542 if (GET_CODE (operands[1]) == CONST_INT)
2543 {
2544 intval = INTVAL (operands[1]);
2545
2546 if (VAL_14_BITS_P (intval))
2547 return "ldi %1,%0";
2548 else if ((intval & 0x7ff) == 0)
2549 return "ldil L'%1,%0";
2550 else if (pa_zdepi_cint_p (intval))
2551 return "{zdepi %Z1,%0|depwi,z %Z1,%0}";
2552 else
2553 return "ldil L'%1,%0\n\tldo R'%1(%0),%0";
2554 }
2555 return "copy %1,%0";
2556 }
2557
2558
2559 /* Compute position (in OP[1]) and width (in OP[2])
2560 useful for copying IMM to a register using the zdepi
2561 instructions. Store the immediate value to insert in OP[0]. */
2562 static void
compute_zdepwi_operands(unsigned HOST_WIDE_INT imm,unsigned * op)2563 compute_zdepwi_operands (unsigned HOST_WIDE_INT imm, unsigned *op)
2564 {
2565 int lsb, len;
2566
2567 /* Find the least significant set bit in IMM. */
2568 for (lsb = 0; lsb < 32; lsb++)
2569 {
2570 if ((imm & 1) != 0)
2571 break;
2572 imm >>= 1;
2573 }
2574
2575 /* Choose variants based on *sign* of the 5-bit field. */
2576 if ((imm & 0x10) == 0)
2577 len = (lsb <= 28) ? 4 : 32 - lsb;
2578 else
2579 {
2580 /* Find the width of the bitstring in IMM. */
2581 for (len = 5; len < 32 - lsb; len++)
2582 {
2583 if ((imm & ((unsigned HOST_WIDE_INT) 1 << len)) == 0)
2584 break;
2585 }
2586
2587 /* Sign extend IMM as a 5-bit value. */
2588 imm = (imm & 0xf) - 0x10;
2589 }
2590
2591 op[0] = imm;
2592 op[1] = 31 - lsb;
2593 op[2] = len;
2594 }
2595
2596 /* Compute position (in OP[1]) and width (in OP[2])
2597 useful for copying IMM to a register using the depdi,z
2598 instructions. Store the immediate value to insert in OP[0]. */
2599
2600 static void
compute_zdepdi_operands(unsigned HOST_WIDE_INT imm,unsigned * op)2601 compute_zdepdi_operands (unsigned HOST_WIDE_INT imm, unsigned *op)
2602 {
2603 int lsb, len, maxlen;
2604
2605 maxlen = MIN (HOST_BITS_PER_WIDE_INT, 64);
2606
2607 /* Find the least significant set bit in IMM. */
2608 for (lsb = 0; lsb < maxlen; lsb++)
2609 {
2610 if ((imm & 1) != 0)
2611 break;
2612 imm >>= 1;
2613 }
2614
2615 /* Choose variants based on *sign* of the 5-bit field. */
2616 if ((imm & 0x10) == 0)
2617 len = (lsb <= maxlen - 4) ? 4 : maxlen - lsb;
2618 else
2619 {
2620 /* Find the width of the bitstring in IMM. */
2621 for (len = 5; len < maxlen - lsb; len++)
2622 {
2623 if ((imm & ((unsigned HOST_WIDE_INT) 1 << len)) == 0)
2624 break;
2625 }
2626
2627 /* Extend length if host is narrow and IMM is negative. */
2628 if (HOST_BITS_PER_WIDE_INT == 32 && len == maxlen - lsb)
2629 len += 32;
2630
2631 /* Sign extend IMM as a 5-bit value. */
2632 imm = (imm & 0xf) - 0x10;
2633 }
2634
2635 op[0] = imm;
2636 op[1] = 63 - lsb;
2637 op[2] = len;
2638 }
2639
2640 /* Output assembler code to perform a doubleword move insn
2641 with operands OPERANDS. */
2642
2643 const char *
pa_output_move_double(rtx * operands)2644 pa_output_move_double (rtx *operands)
2645 {
2646 enum { REGOP, OFFSOP, MEMOP, CNSTOP, RNDOP } optype0, optype1;
2647 rtx latehalf[2];
2648 rtx addreg0 = 0, addreg1 = 0;
2649 int highonly = 0;
2650
2651 /* First classify both operands. */
2652
2653 if (REG_P (operands[0]))
2654 optype0 = REGOP;
2655 else if (offsettable_memref_p (operands[0]))
2656 optype0 = OFFSOP;
2657 else if (GET_CODE (operands[0]) == MEM)
2658 optype0 = MEMOP;
2659 else
2660 optype0 = RNDOP;
2661
2662 if (REG_P (operands[1]))
2663 optype1 = REGOP;
2664 else if (CONSTANT_P (operands[1]))
2665 optype1 = CNSTOP;
2666 else if (offsettable_memref_p (operands[1]))
2667 optype1 = OFFSOP;
2668 else if (GET_CODE (operands[1]) == MEM)
2669 optype1 = MEMOP;
2670 else
2671 optype1 = RNDOP;
2672
2673 /* Check for the cases that the operand constraints are not
2674 supposed to allow to happen. */
2675 gcc_assert (optype0 == REGOP || optype1 == REGOP);
2676
2677 /* Handle copies between general and floating registers. */
2678
2679 if (optype0 == REGOP && optype1 == REGOP
2680 && FP_REG_P (operands[0]) ^ FP_REG_P (operands[1]))
2681 {
2682 if (FP_REG_P (operands[0]))
2683 {
2684 output_asm_insn ("{stws|stw} %1,-16(%%sp)", operands);
2685 output_asm_insn ("{stws|stw} %R1,-12(%%sp)", operands);
2686 return "{fldds|fldd} -16(%%sp),%0";
2687 }
2688 else
2689 {
2690 output_asm_insn ("{fstds|fstd} %1,-16(%%sp)", operands);
2691 output_asm_insn ("{ldws|ldw} -16(%%sp),%0", operands);
2692 return "{ldws|ldw} -12(%%sp),%R0";
2693 }
2694 }
2695
2696 /* Handle auto decrementing and incrementing loads and stores
2697 specifically, since the structure of the function doesn't work
2698 for them without major modification. Do it better when we learn
2699 this port about the general inc/dec addressing of PA.
2700 (This was written by tege. Chide him if it doesn't work.) */
2701
2702 if (optype0 == MEMOP)
2703 {
2704 /* We have to output the address syntax ourselves, since print_operand
2705 doesn't deal with the addresses we want to use. Fix this later. */
2706
2707 rtx addr = XEXP (operands[0], 0);
2708 if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
2709 {
2710 rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
2711
2712 operands[0] = XEXP (addr, 0);
2713 gcc_assert (GET_CODE (operands[1]) == REG
2714 && GET_CODE (operands[0]) == REG);
2715
2716 gcc_assert (!reg_overlap_mentioned_p (high_reg, addr));
2717
2718 /* No overlap between high target register and address
2719 register. (We do this in a non-obvious way to
2720 save a register file writeback) */
2721 if (GET_CODE (addr) == POST_INC)
2722 return "{stws|stw},ma %1,8(%0)\n\tstw %R1,-4(%0)";
2723 return "{stws|stw},ma %1,-8(%0)\n\tstw %R1,12(%0)";
2724 }
2725 else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
2726 {
2727 rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
2728
2729 operands[0] = XEXP (addr, 0);
2730 gcc_assert (GET_CODE (operands[1]) == REG
2731 && GET_CODE (operands[0]) == REG);
2732
2733 gcc_assert (!reg_overlap_mentioned_p (high_reg, addr));
2734 /* No overlap between high target register and address
2735 register. (We do this in a non-obvious way to save a
2736 register file writeback) */
2737 if (GET_CODE (addr) == PRE_INC)
2738 return "{stws|stw},mb %1,8(%0)\n\tstw %R1,4(%0)";
2739 return "{stws|stw},mb %1,-8(%0)\n\tstw %R1,4(%0)";
2740 }
2741 }
2742 if (optype1 == MEMOP)
2743 {
2744 /* We have to output the address syntax ourselves, since print_operand
2745 doesn't deal with the addresses we want to use. Fix this later. */
2746
2747 rtx addr = XEXP (operands[1], 0);
2748 if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
2749 {
2750 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2751
2752 operands[1] = XEXP (addr, 0);
2753 gcc_assert (GET_CODE (operands[0]) == REG
2754 && GET_CODE (operands[1]) == REG);
2755
2756 if (!reg_overlap_mentioned_p (high_reg, addr))
2757 {
2758 /* No overlap between high target register and address
2759 register. (We do this in a non-obvious way to
2760 save a register file writeback) */
2761 if (GET_CODE (addr) == POST_INC)
2762 return "{ldws|ldw},ma 8(%1),%0\n\tldw -4(%1),%R0";
2763 return "{ldws|ldw},ma -8(%1),%0\n\tldw 12(%1),%R0";
2764 }
2765 else
2766 {
2767 /* This is an undefined situation. We should load into the
2768 address register *and* update that register. Probably
2769 we don't need to handle this at all. */
2770 if (GET_CODE (addr) == POST_INC)
2771 return "ldw 4(%1),%R0\n\t{ldws|ldw},ma 8(%1),%0";
2772 return "ldw 4(%1),%R0\n\t{ldws|ldw},ma -8(%1),%0";
2773 }
2774 }
2775 else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
2776 {
2777 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2778
2779 operands[1] = XEXP (addr, 0);
2780 gcc_assert (GET_CODE (operands[0]) == REG
2781 && GET_CODE (operands[1]) == REG);
2782
2783 if (!reg_overlap_mentioned_p (high_reg, addr))
2784 {
2785 /* No overlap between high target register and address
2786 register. (We do this in a non-obvious way to
2787 save a register file writeback) */
2788 if (GET_CODE (addr) == PRE_INC)
2789 return "{ldws|ldw},mb 8(%1),%0\n\tldw 4(%1),%R0";
2790 return "{ldws|ldw},mb -8(%1),%0\n\tldw 4(%1),%R0";
2791 }
2792 else
2793 {
2794 /* This is an undefined situation. We should load into the
2795 address register *and* update that register. Probably
2796 we don't need to handle this at all. */
2797 if (GET_CODE (addr) == PRE_INC)
2798 return "ldw 12(%1),%R0\n\t{ldws|ldw},mb 8(%1),%0";
2799 return "ldw -4(%1),%R0\n\t{ldws|ldw},mb -8(%1),%0";
2800 }
2801 }
2802 else if (GET_CODE (addr) == PLUS
2803 && GET_CODE (XEXP (addr, 0)) == MULT)
2804 {
2805 rtx xoperands[4];
2806
2807 /* Load address into left half of destination register. */
2808 xoperands[0] = gen_rtx_SUBREG (SImode, operands[0], 0);
2809 xoperands[1] = XEXP (addr, 1);
2810 xoperands[2] = XEXP (XEXP (addr, 0), 0);
2811 xoperands[3] = XEXP (XEXP (addr, 0), 1);
2812 output_asm_insn ("{sh%O3addl %2,%1,%0|shladd,l %2,%O3,%1,%0}",
2813 xoperands);
2814 return "ldw 4(%0),%R0\n\tldw 0(%0),%0";
2815 }
2816 else if (GET_CODE (addr) == PLUS
2817 && REG_P (XEXP (addr, 0))
2818 && REG_P (XEXP (addr, 1)))
2819 {
2820 rtx xoperands[3];
2821
2822 /* Load address into left half of destination register. */
2823 xoperands[0] = gen_rtx_SUBREG (SImode, operands[0], 0);
2824 xoperands[1] = XEXP (addr, 0);
2825 xoperands[2] = XEXP (addr, 1);
2826 output_asm_insn ("{addl|add,l} %1,%2,%0",
2827 xoperands);
2828 return "ldw 4(%0),%R0\n\tldw 0(%0),%0";
2829 }
2830 }
2831
2832 /* If an operand is an unoffsettable memory ref, find a register
2833 we can increment temporarily to make it refer to the second word. */
2834
2835 if (optype0 == MEMOP)
2836 addreg0 = find_addr_reg (XEXP (operands[0], 0));
2837
2838 if (optype1 == MEMOP)
2839 addreg1 = find_addr_reg (XEXP (operands[1], 0));
2840
2841 /* Ok, we can do one word at a time.
2842 Normally we do the low-numbered word first.
2843
2844 In either case, set up in LATEHALF the operands to use
2845 for the high-numbered word and in some cases alter the
2846 operands in OPERANDS to be suitable for the low-numbered word. */
2847
2848 if (optype0 == REGOP)
2849 latehalf[0] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
2850 else if (optype0 == OFFSOP)
2851 latehalf[0] = adjust_address_nv (operands[0], SImode, 4);
2852 else
2853 latehalf[0] = operands[0];
2854
2855 if (optype1 == REGOP)
2856 latehalf[1] = gen_rtx_REG (SImode, REGNO (operands[1]) + 1);
2857 else if (optype1 == OFFSOP)
2858 latehalf[1] = adjust_address_nv (operands[1], SImode, 4);
2859 else if (optype1 == CNSTOP)
2860 {
2861 if (GET_CODE (operands[1]) == HIGH)
2862 {
2863 operands[1] = XEXP (operands[1], 0);
2864 highonly = 1;
2865 }
2866 split_double (operands[1], &operands[1], &latehalf[1]);
2867 }
2868 else
2869 latehalf[1] = operands[1];
2870
2871 /* If the first move would clobber the source of the second one,
2872 do them in the other order.
2873
2874 This can happen in two cases:
2875
2876 mem -> register where the first half of the destination register
2877 is the same register used in the memory's address. Reload
2878 can create such insns.
2879
2880 mem in this case will be either register indirect or register
2881 indirect plus a valid offset.
2882
2883 register -> register move where REGNO(dst) == REGNO(src + 1)
2884 someone (Tim/Tege?) claimed this can happen for parameter loads.
2885
2886 Handle mem -> register case first. */
2887 if (optype0 == REGOP
2888 && (optype1 == MEMOP || optype1 == OFFSOP)
2889 && refers_to_regno_p (REGNO (operands[0]), operands[1]))
2890 {
2891 /* Do the late half first. */
2892 if (addreg1)
2893 output_asm_insn ("ldo 4(%0),%0", &addreg1);
2894 output_asm_insn (pa_singlemove_string (latehalf), latehalf);
2895
2896 /* Then clobber. */
2897 if (addreg1)
2898 output_asm_insn ("ldo -4(%0),%0", &addreg1);
2899 return pa_singlemove_string (operands);
2900 }
2901
2902 /* Now handle register -> register case. */
2903 if (optype0 == REGOP && optype1 == REGOP
2904 && REGNO (operands[0]) == REGNO (operands[1]) + 1)
2905 {
2906 output_asm_insn (pa_singlemove_string (latehalf), latehalf);
2907 return pa_singlemove_string (operands);
2908 }
2909
2910 /* Normal case: do the two words, low-numbered first. */
2911
2912 output_asm_insn (pa_singlemove_string (operands), operands);
2913
2914 /* Make any unoffsettable addresses point at high-numbered word. */
2915 if (addreg0)
2916 output_asm_insn ("ldo 4(%0),%0", &addreg0);
2917 if (addreg1)
2918 output_asm_insn ("ldo 4(%0),%0", &addreg1);
2919
2920 /* Do high-numbered word. */
2921 if (highonly)
2922 output_asm_insn ("ldil L'%1,%0", latehalf);
2923 else
2924 output_asm_insn (pa_singlemove_string (latehalf), latehalf);
2925
2926 /* Undo the adds we just did. */
2927 if (addreg0)
2928 output_asm_insn ("ldo -4(%0),%0", &addreg0);
2929 if (addreg1)
2930 output_asm_insn ("ldo -4(%0),%0", &addreg1);
2931
2932 return "";
2933 }
2934
2935 const char *
pa_output_fp_move_double(rtx * operands)2936 pa_output_fp_move_double (rtx *operands)
2937 {
2938 if (FP_REG_P (operands[0]))
2939 {
2940 if (FP_REG_P (operands[1])
2941 || operands[1] == CONST0_RTX (GET_MODE (operands[0])))
2942 output_asm_insn ("fcpy,dbl %f1,%0", operands);
2943 else
2944 output_asm_insn ("fldd%F1 %1,%0", operands);
2945 }
2946 else if (FP_REG_P (operands[1]))
2947 {
2948 output_asm_insn ("fstd%F0 %1,%0", operands);
2949 }
2950 else
2951 {
2952 rtx xoperands[2];
2953
2954 gcc_assert (operands[1] == CONST0_RTX (GET_MODE (operands[0])));
2955
2956 /* This is a pain. You have to be prepared to deal with an
2957 arbitrary address here including pre/post increment/decrement.
2958
2959 so avoid this in the MD. */
2960 gcc_assert (GET_CODE (operands[0]) == REG);
2961
2962 xoperands[1] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
2963 xoperands[0] = operands[0];
2964 output_asm_insn ("copy %%r0,%0\n\tcopy %%r0,%1", xoperands);
2965 }
2966 return "";
2967 }
2968
2969 /* Return a REG that occurs in ADDR with coefficient 1.
2970 ADDR can be effectively incremented by incrementing REG. */
2971
2972 static rtx
find_addr_reg(rtx addr)2973 find_addr_reg (rtx addr)
2974 {
2975 while (GET_CODE (addr) == PLUS)
2976 {
2977 if (GET_CODE (XEXP (addr, 0)) == REG)
2978 addr = XEXP (addr, 0);
2979 else if (GET_CODE (XEXP (addr, 1)) == REG)
2980 addr = XEXP (addr, 1);
2981 else if (CONSTANT_P (XEXP (addr, 0)))
2982 addr = XEXP (addr, 1);
2983 else if (CONSTANT_P (XEXP (addr, 1)))
2984 addr = XEXP (addr, 0);
2985 else
2986 gcc_unreachable ();
2987 }
2988 gcc_assert (GET_CODE (addr) == REG);
2989 return addr;
2990 }
2991
2992 /* Emit code to perform a block move.
2993
2994 OPERANDS[0] is the destination pointer as a REG, clobbered.
2995 OPERANDS[1] is the source pointer as a REG, clobbered.
2996 OPERANDS[2] is a register for temporary storage.
2997 OPERANDS[3] is a register for temporary storage.
2998 OPERANDS[4] is the size as a CONST_INT
2999 OPERANDS[5] is the alignment safe to use, as a CONST_INT.
3000 OPERANDS[6] is another temporary register. */
3001
3002 const char *
pa_output_block_move(rtx * operands,int size_is_constant ATTRIBUTE_UNUSED)3003 pa_output_block_move (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED)
3004 {
3005 int align = INTVAL (operands[5]);
3006 unsigned long n_bytes = INTVAL (operands[4]);
3007
3008 /* We can't move more than a word at a time because the PA
3009 has no longer integer move insns. (Could use fp mem ops?) */
3010 if (align > (TARGET_64BIT ? 8 : 4))
3011 align = (TARGET_64BIT ? 8 : 4);
3012
3013 /* Note that we know each loop below will execute at least twice
3014 (else we would have open-coded the copy). */
3015 switch (align)
3016 {
3017 case 8:
3018 /* Pre-adjust the loop counter. */
3019 operands[4] = GEN_INT (n_bytes - 16);
3020 output_asm_insn ("ldi %4,%2", operands);
3021
3022 /* Copying loop. */
3023 output_asm_insn ("ldd,ma 8(%1),%3", operands);
3024 output_asm_insn ("ldd,ma 8(%1),%6", operands);
3025 output_asm_insn ("std,ma %3,8(%0)", operands);
3026 output_asm_insn ("addib,>= -16,%2,.-12", operands);
3027 output_asm_insn ("std,ma %6,8(%0)", operands);
3028
3029 /* Handle the residual. There could be up to 7 bytes of
3030 residual to copy! */
3031 if (n_bytes % 16 != 0)
3032 {
3033 operands[4] = GEN_INT (n_bytes % 8);
3034 if (n_bytes % 16 >= 8)
3035 output_asm_insn ("ldd,ma 8(%1),%3", operands);
3036 if (n_bytes % 8 != 0)
3037 output_asm_insn ("ldd 0(%1),%6", operands);
3038 if (n_bytes % 16 >= 8)
3039 output_asm_insn ("std,ma %3,8(%0)", operands);
3040 if (n_bytes % 8 != 0)
3041 output_asm_insn ("stdby,e %6,%4(%0)", operands);
3042 }
3043 return "";
3044
3045 case 4:
3046 /* Pre-adjust the loop counter. */
3047 operands[4] = GEN_INT (n_bytes - 8);
3048 output_asm_insn ("ldi %4,%2", operands);
3049
3050 /* Copying loop. */
3051 output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands);
3052 output_asm_insn ("{ldws|ldw},ma 4(%1),%6", operands);
3053 output_asm_insn ("{stws|stw},ma %3,4(%0)", operands);
3054 output_asm_insn ("addib,>= -8,%2,.-12", operands);
3055 output_asm_insn ("{stws|stw},ma %6,4(%0)", operands);
3056
3057 /* Handle the residual. There could be up to 7 bytes of
3058 residual to copy! */
3059 if (n_bytes % 8 != 0)
3060 {
3061 operands[4] = GEN_INT (n_bytes % 4);
3062 if (n_bytes % 8 >= 4)
3063 output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands);
3064 if (n_bytes % 4 != 0)
3065 output_asm_insn ("ldw 0(%1),%6", operands);
3066 if (n_bytes % 8 >= 4)
3067 output_asm_insn ("{stws|stw},ma %3,4(%0)", operands);
3068 if (n_bytes % 4 != 0)
3069 output_asm_insn ("{stbys|stby},e %6,%4(%0)", operands);
3070 }
3071 return "";
3072
3073 case 2:
3074 /* Pre-adjust the loop counter. */
3075 operands[4] = GEN_INT (n_bytes - 4);
3076 output_asm_insn ("ldi %4,%2", operands);
3077
3078 /* Copying loop. */
3079 output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands);
3080 output_asm_insn ("{ldhs|ldh},ma 2(%1),%6", operands);
3081 output_asm_insn ("{sths|sth},ma %3,2(%0)", operands);
3082 output_asm_insn ("addib,>= -4,%2,.-12", operands);
3083 output_asm_insn ("{sths|sth},ma %6,2(%0)", operands);
3084
3085 /* Handle the residual. */
3086 if (n_bytes % 4 != 0)
3087 {
3088 if (n_bytes % 4 >= 2)
3089 output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands);
3090 if (n_bytes % 2 != 0)
3091 output_asm_insn ("ldb 0(%1),%6", operands);
3092 if (n_bytes % 4 >= 2)
3093 output_asm_insn ("{sths|sth},ma %3,2(%0)", operands);
3094 if (n_bytes % 2 != 0)
3095 output_asm_insn ("stb %6,0(%0)", operands);
3096 }
3097 return "";
3098
3099 case 1:
3100 /* Pre-adjust the loop counter. */
3101 operands[4] = GEN_INT (n_bytes - 2);
3102 output_asm_insn ("ldi %4,%2", operands);
3103
3104 /* Copying loop. */
3105 output_asm_insn ("{ldbs|ldb},ma 1(%1),%3", operands);
3106 output_asm_insn ("{ldbs|ldb},ma 1(%1),%6", operands);
3107 output_asm_insn ("{stbs|stb},ma %3,1(%0)", operands);
3108 output_asm_insn ("addib,>= -2,%2,.-12", operands);
3109 output_asm_insn ("{stbs|stb},ma %6,1(%0)", operands);
3110
3111 /* Handle the residual. */
3112 if (n_bytes % 2 != 0)
3113 {
3114 output_asm_insn ("ldb 0(%1),%3", operands);
3115 output_asm_insn ("stb %3,0(%0)", operands);
3116 }
3117 return "";
3118
3119 default:
3120 gcc_unreachable ();
3121 }
3122 }
3123
3124 /* Count the number of insns necessary to handle this block move.
3125
3126 Basic structure is the same as emit_block_move, except that we
3127 count insns rather than emit them. */
3128
3129 static int
compute_cpymem_length(rtx_insn * insn)3130 compute_cpymem_length (rtx_insn *insn)
3131 {
3132 rtx pat = PATTERN (insn);
3133 unsigned int align = INTVAL (XEXP (XVECEXP (pat, 0, 7), 0));
3134 unsigned long n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 6), 0));
3135 unsigned int n_insns = 0;
3136
3137 /* We can't move more than four bytes at a time because the PA
3138 has no longer integer move insns. (Could use fp mem ops?) */
3139 if (align > (TARGET_64BIT ? 8 : 4))
3140 align = (TARGET_64BIT ? 8 : 4);
3141
3142 /* The basic copying loop. */
3143 n_insns = 6;
3144
3145 /* Residuals. */
3146 if (n_bytes % (2 * align) != 0)
3147 {
3148 if ((n_bytes % (2 * align)) >= align)
3149 n_insns += 2;
3150
3151 if ((n_bytes % align) != 0)
3152 n_insns += 2;
3153 }
3154
3155 /* Lengths are expressed in bytes now; each insn is 4 bytes. */
3156 return n_insns * 4;
3157 }
3158
3159 /* Emit code to perform a block clear.
3160
3161 OPERANDS[0] is the destination pointer as a REG, clobbered.
3162 OPERANDS[1] is a register for temporary storage.
3163 OPERANDS[2] is the size as a CONST_INT
3164 OPERANDS[3] is the alignment safe to use, as a CONST_INT. */
3165
3166 const char *
pa_output_block_clear(rtx * operands,int size_is_constant ATTRIBUTE_UNUSED)3167 pa_output_block_clear (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED)
3168 {
3169 int align = INTVAL (operands[3]);
3170 unsigned long n_bytes = INTVAL (operands[2]);
3171
3172 /* We can't clear more than a word at a time because the PA
3173 has no longer integer move insns. */
3174 if (align > (TARGET_64BIT ? 8 : 4))
3175 align = (TARGET_64BIT ? 8 : 4);
3176
3177 /* Note that we know each loop below will execute at least twice
3178 (else we would have open-coded the copy). */
3179 switch (align)
3180 {
3181 case 8:
3182 /* Pre-adjust the loop counter. */
3183 operands[2] = GEN_INT (n_bytes - 16);
3184 output_asm_insn ("ldi %2,%1", operands);
3185
3186 /* Loop. */
3187 output_asm_insn ("std,ma %%r0,8(%0)", operands);
3188 output_asm_insn ("addib,>= -16,%1,.-4", operands);
3189 output_asm_insn ("std,ma %%r0,8(%0)", operands);
3190
3191 /* Handle the residual. There could be up to 7 bytes of
3192 residual to copy! */
3193 if (n_bytes % 16 != 0)
3194 {
3195 operands[2] = GEN_INT (n_bytes % 8);
3196 if (n_bytes % 16 >= 8)
3197 output_asm_insn ("std,ma %%r0,8(%0)", operands);
3198 if (n_bytes % 8 != 0)
3199 output_asm_insn ("stdby,e %%r0,%2(%0)", operands);
3200 }
3201 return "";
3202
3203 case 4:
3204 /* Pre-adjust the loop counter. */
3205 operands[2] = GEN_INT (n_bytes - 8);
3206 output_asm_insn ("ldi %2,%1", operands);
3207
3208 /* Loop. */
3209 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
3210 output_asm_insn ("addib,>= -8,%1,.-4", operands);
3211 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
3212
3213 /* Handle the residual. There could be up to 7 bytes of
3214 residual to copy! */
3215 if (n_bytes % 8 != 0)
3216 {
3217 operands[2] = GEN_INT (n_bytes % 4);
3218 if (n_bytes % 8 >= 4)
3219 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
3220 if (n_bytes % 4 != 0)
3221 output_asm_insn ("{stbys|stby},e %%r0,%2(%0)", operands);
3222 }
3223 return "";
3224
3225 case 2:
3226 /* Pre-adjust the loop counter. */
3227 operands[2] = GEN_INT (n_bytes - 4);
3228 output_asm_insn ("ldi %2,%1", operands);
3229
3230 /* Loop. */
3231 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
3232 output_asm_insn ("addib,>= -4,%1,.-4", operands);
3233 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
3234
3235 /* Handle the residual. */
3236 if (n_bytes % 4 != 0)
3237 {
3238 if (n_bytes % 4 >= 2)
3239 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
3240 if (n_bytes % 2 != 0)
3241 output_asm_insn ("stb %%r0,0(%0)", operands);
3242 }
3243 return "";
3244
3245 case 1:
3246 /* Pre-adjust the loop counter. */
3247 operands[2] = GEN_INT (n_bytes - 2);
3248 output_asm_insn ("ldi %2,%1", operands);
3249
3250 /* Loop. */
3251 output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands);
3252 output_asm_insn ("addib,>= -2,%1,.-4", operands);
3253 output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands);
3254
3255 /* Handle the residual. */
3256 if (n_bytes % 2 != 0)
3257 output_asm_insn ("stb %%r0,0(%0)", operands);
3258
3259 return "";
3260
3261 default:
3262 gcc_unreachable ();
3263 }
3264 }
3265
3266 /* Count the number of insns necessary to handle this block move.
3267
3268 Basic structure is the same as emit_block_move, except that we
3269 count insns rather than emit them. */
3270
3271 static int
compute_clrmem_length(rtx_insn * insn)3272 compute_clrmem_length (rtx_insn *insn)
3273 {
3274 rtx pat = PATTERN (insn);
3275 unsigned int align = INTVAL (XEXP (XVECEXP (pat, 0, 4), 0));
3276 unsigned long n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 3), 0));
3277 unsigned int n_insns = 0;
3278
3279 /* We can't clear more than a word at a time because the PA
3280 has no longer integer move insns. */
3281 if (align > (TARGET_64BIT ? 8 : 4))
3282 align = (TARGET_64BIT ? 8 : 4);
3283
3284 /* The basic loop. */
3285 n_insns = 4;
3286
3287 /* Residuals. */
3288 if (n_bytes % (2 * align) != 0)
3289 {
3290 if ((n_bytes % (2 * align)) >= align)
3291 n_insns++;
3292
3293 if ((n_bytes % align) != 0)
3294 n_insns++;
3295 }
3296
3297 /* Lengths are expressed in bytes now; each insn is 4 bytes. */
3298 return n_insns * 4;
3299 }
3300
3301
3302 const char *
pa_output_and(rtx * operands)3303 pa_output_and (rtx *operands)
3304 {
3305 if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
3306 {
3307 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3308 int ls0, ls1, ms0, p, len;
3309
3310 for (ls0 = 0; ls0 < 32; ls0++)
3311 if ((mask & (1 << ls0)) == 0)
3312 break;
3313
3314 for (ls1 = ls0; ls1 < 32; ls1++)
3315 if ((mask & (1 << ls1)) != 0)
3316 break;
3317
3318 for (ms0 = ls1; ms0 < 32; ms0++)
3319 if ((mask & (1 << ms0)) == 0)
3320 break;
3321
3322 gcc_assert (ms0 == 32);
3323
3324 if (ls1 == 32)
3325 {
3326 len = ls0;
3327
3328 gcc_assert (len);
3329
3330 operands[2] = GEN_INT (len);
3331 return "{extru|extrw,u} %1,31,%2,%0";
3332 }
3333 else
3334 {
3335 /* We could use this `depi' for the case above as well, but `depi'
3336 requires one more register file access than an `extru'. */
3337
3338 p = 31 - ls0;
3339 len = ls1 - ls0;
3340
3341 operands[2] = GEN_INT (p);
3342 operands[3] = GEN_INT (len);
3343 return "{depi|depwi} 0,%2,%3,%0";
3344 }
3345 }
3346 else
3347 return "and %1,%2,%0";
3348 }
3349
3350 /* Return a string to perform a bitwise-and of operands[1] with operands[2]
3351 storing the result in operands[0]. */
3352 const char *
pa_output_64bit_and(rtx * operands)3353 pa_output_64bit_and (rtx *operands)
3354 {
3355 if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
3356 {
3357 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3358 int ls0, ls1, ms0, p, len;
3359
3360 for (ls0 = 0; ls0 < HOST_BITS_PER_WIDE_INT; ls0++)
3361 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls0)) == 0)
3362 break;
3363
3364 for (ls1 = ls0; ls1 < HOST_BITS_PER_WIDE_INT; ls1++)
3365 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls1)) != 0)
3366 break;
3367
3368 for (ms0 = ls1; ms0 < HOST_BITS_PER_WIDE_INT; ms0++)
3369 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ms0)) == 0)
3370 break;
3371
3372 gcc_assert (ms0 == HOST_BITS_PER_WIDE_INT);
3373
3374 if (ls1 == HOST_BITS_PER_WIDE_INT)
3375 {
3376 len = ls0;
3377
3378 gcc_assert (len);
3379
3380 operands[2] = GEN_INT (len);
3381 return "extrd,u %1,63,%2,%0";
3382 }
3383 else
3384 {
3385 /* We could use this `depi' for the case above as well, but `depi'
3386 requires one more register file access than an `extru'. */
3387
3388 p = 63 - ls0;
3389 len = ls1 - ls0;
3390
3391 operands[2] = GEN_INT (p);
3392 operands[3] = GEN_INT (len);
3393 return "depdi 0,%2,%3,%0";
3394 }
3395 }
3396 else
3397 return "and %1,%2,%0";
3398 }
3399
3400 const char *
pa_output_ior(rtx * operands)3401 pa_output_ior (rtx *operands)
3402 {
3403 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3404 int bs0, bs1, p, len;
3405
3406 if (INTVAL (operands[2]) == 0)
3407 return "copy %1,%0";
3408
3409 for (bs0 = 0; bs0 < 32; bs0++)
3410 if ((mask & (1 << bs0)) != 0)
3411 break;
3412
3413 for (bs1 = bs0; bs1 < 32; bs1++)
3414 if ((mask & (1 << bs1)) == 0)
3415 break;
3416
3417 gcc_assert (bs1 == 32 || ((unsigned HOST_WIDE_INT) 1 << bs1) > mask);
3418
3419 p = 31 - bs0;
3420 len = bs1 - bs0;
3421
3422 operands[2] = GEN_INT (p);
3423 operands[3] = GEN_INT (len);
3424 return "{depi|depwi} -1,%2,%3,%0";
3425 }
3426
3427 /* Return a string to perform a bitwise-and of operands[1] with operands[2]
3428 storing the result in operands[0]. */
3429 const char *
pa_output_64bit_ior(rtx * operands)3430 pa_output_64bit_ior (rtx *operands)
3431 {
3432 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3433 int bs0, bs1, p, len;
3434
3435 if (INTVAL (operands[2]) == 0)
3436 return "copy %1,%0";
3437
3438 for (bs0 = 0; bs0 < HOST_BITS_PER_WIDE_INT; bs0++)
3439 if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs0)) != 0)
3440 break;
3441
3442 for (bs1 = bs0; bs1 < HOST_BITS_PER_WIDE_INT; bs1++)
3443 if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs1)) == 0)
3444 break;
3445
3446 gcc_assert (bs1 == HOST_BITS_PER_WIDE_INT
3447 || ((unsigned HOST_WIDE_INT) 1 << bs1) > mask);
3448
3449 p = 63 - bs0;
3450 len = bs1 - bs0;
3451
3452 operands[2] = GEN_INT (p);
3453 operands[3] = GEN_INT (len);
3454 return "depdi -1,%2,%3,%0";
3455 }
3456
3457 /* Target hook for assembling integer objects. This code handles
3458 aligned SI and DI integers specially since function references
3459 must be preceded by P%. */
3460
3461 static bool
pa_assemble_integer(rtx x,unsigned int size,int aligned_p)3462 pa_assemble_integer (rtx x, unsigned int size, int aligned_p)
3463 {
3464 bool result;
3465 tree decl = NULL;
3466
3467 /* When we have a SYMBOL_REF with a SYMBOL_REF_DECL, we need to call
3468 call assemble_external and set the SYMBOL_REF_DECL to NULL before
3469 calling output_addr_const. Otherwise, it may call assemble_external
3470 in the midst of outputing the assembler code for the SYMBOL_REF.
3471 We restore the SYMBOL_REF_DECL after the output is done. */
3472 if (GET_CODE (x) == SYMBOL_REF)
3473 {
3474 decl = SYMBOL_REF_DECL (x);
3475 if (decl)
3476 {
3477 assemble_external (decl);
3478 SET_SYMBOL_REF_DECL (x, NULL);
3479 }
3480 }
3481
3482 if (size == UNITS_PER_WORD
3483 && aligned_p
3484 && function_label_operand (x, VOIDmode))
3485 {
3486 fputs (size == 8? "\t.dword\t" : "\t.word\t", asm_out_file);
3487
3488 /* We don't want an OPD when generating fast indirect calls. */
3489 if (!TARGET_FAST_INDIRECT_CALLS)
3490 fputs ("P%", asm_out_file);
3491
3492 output_addr_const (asm_out_file, x);
3493 fputc ('\n', asm_out_file);
3494 result = true;
3495 }
3496 else
3497 result = default_assemble_integer (x, size, aligned_p);
3498
3499 if (decl)
3500 SET_SYMBOL_REF_DECL (x, decl);
3501
3502 return result;
3503 }
3504
3505 /* Output an ascii string. */
3506 void
pa_output_ascii(FILE * file,const char * p,int size)3507 pa_output_ascii (FILE *file, const char *p, int size)
3508 {
3509 int i;
3510 int chars_output;
3511 unsigned char partial_output[16]; /* Max space 4 chars can occupy. */
3512
3513 /* The HP assembler can only take strings of 256 characters at one
3514 time. This is a limitation on input line length, *not* the
3515 length of the string. Sigh. Even worse, it seems that the
3516 restriction is in number of input characters (see \xnn &
3517 \whatever). So we have to do this very carefully. */
3518
3519 fputs ("\t.STRING \"", file);
3520
3521 chars_output = 0;
3522 for (i = 0; i < size; i += 4)
3523 {
3524 int co = 0;
3525 int io = 0;
3526 for (io = 0, co = 0; io < MIN (4, size - i); io++)
3527 {
3528 unsigned int c = (unsigned char) p[i + io];
3529
3530 if (c == '\"' || c == '\\')
3531 partial_output[co++] = '\\';
3532 if (c >= ' ' && c < 0177)
3533 partial_output[co++] = c;
3534 else
3535 {
3536 unsigned int hexd;
3537 partial_output[co++] = '\\';
3538 partial_output[co++] = 'x';
3539 hexd = c / 16 - 0 + '0';
3540 if (hexd > '9')
3541 hexd -= '9' - 'a' + 1;
3542 partial_output[co++] = hexd;
3543 hexd = c % 16 - 0 + '0';
3544 if (hexd > '9')
3545 hexd -= '9' - 'a' + 1;
3546 partial_output[co++] = hexd;
3547 }
3548 }
3549 if (chars_output + co > 243)
3550 {
3551 fputs ("\"\n\t.STRING \"", file);
3552 chars_output = 0;
3553 }
3554 fwrite (partial_output, 1, (size_t) co, file);
3555 chars_output += co;
3556 co = 0;
3557 }
3558 fputs ("\"\n", file);
3559 }
3560
3561 /* Try to rewrite floating point comparisons & branches to avoid
3562 useless add,tr insns.
3563
3564 CHECK_NOTES is nonzero if we should examine REG_DEAD notes
3565 to see if FPCC is dead. CHECK_NOTES is nonzero for the
3566 first attempt to remove useless add,tr insns. It is zero
3567 for the second pass as reorg sometimes leaves bogus REG_DEAD
3568 notes lying around.
3569
3570 When CHECK_NOTES is zero we can only eliminate add,tr insns
3571 when there's a 1:1 correspondence between fcmp and ftest/fbranch
3572 instructions. */
3573 static void
remove_useless_addtr_insns(int check_notes)3574 remove_useless_addtr_insns (int check_notes)
3575 {
3576 rtx_insn *insn;
3577 static int pass = 0;
3578
3579 /* This is fairly cheap, so always run it when optimizing. */
3580 if (optimize > 0)
3581 {
3582 int fcmp_count = 0;
3583 int fbranch_count = 0;
3584
3585 /* Walk all the insns in this function looking for fcmp & fbranch
3586 instructions. Keep track of how many of each we find. */
3587 for (insn = get_insns (); insn; insn = next_insn (insn))
3588 {
3589 rtx tmp;
3590
3591 /* Ignore anything that isn't an INSN or a JUMP_INSN. */
3592 if (! NONJUMP_INSN_P (insn) && ! JUMP_P (insn))
3593 continue;
3594
3595 tmp = PATTERN (insn);
3596
3597 /* It must be a set. */
3598 if (GET_CODE (tmp) != SET)
3599 continue;
3600
3601 /* If the destination is CCFP, then we've found an fcmp insn. */
3602 tmp = SET_DEST (tmp);
3603 if (GET_CODE (tmp) == REG && REGNO (tmp) == 0)
3604 {
3605 fcmp_count++;
3606 continue;
3607 }
3608
3609 tmp = PATTERN (insn);
3610 /* If this is an fbranch instruction, bump the fbranch counter. */
3611 if (GET_CODE (tmp) == SET
3612 && SET_DEST (tmp) == pc_rtx
3613 && GET_CODE (SET_SRC (tmp)) == IF_THEN_ELSE
3614 && GET_CODE (XEXP (SET_SRC (tmp), 0)) == NE
3615 && GET_CODE (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == REG
3616 && REGNO (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == 0)
3617 {
3618 fbranch_count++;
3619 continue;
3620 }
3621 }
3622
3623
3624 /* Find all floating point compare + branch insns. If possible,
3625 reverse the comparison & the branch to avoid add,tr insns. */
3626 for (insn = get_insns (); insn; insn = next_insn (insn))
3627 {
3628 rtx tmp;
3629 rtx_insn *next;
3630
3631 /* Ignore anything that isn't an INSN. */
3632 if (! NONJUMP_INSN_P (insn))
3633 continue;
3634
3635 tmp = PATTERN (insn);
3636
3637 /* It must be a set. */
3638 if (GET_CODE (tmp) != SET)
3639 continue;
3640
3641 /* The destination must be CCFP, which is register zero. */
3642 tmp = SET_DEST (tmp);
3643 if (GET_CODE (tmp) != REG || REGNO (tmp) != 0)
3644 continue;
3645
3646 /* INSN should be a set of CCFP.
3647
3648 See if the result of this insn is used in a reversed FP
3649 conditional branch. If so, reverse our condition and
3650 the branch. Doing so avoids useless add,tr insns. */
3651 next = next_insn (insn);
3652 while (next)
3653 {
3654 /* Jumps, calls and labels stop our search. */
3655 if (JUMP_P (next) || CALL_P (next) || LABEL_P (next))
3656 break;
3657
3658 /* As does another fcmp insn. */
3659 if (NONJUMP_INSN_P (next)
3660 && GET_CODE (PATTERN (next)) == SET
3661 && GET_CODE (SET_DEST (PATTERN (next))) == REG
3662 && REGNO (SET_DEST (PATTERN (next))) == 0)
3663 break;
3664
3665 next = next_insn (next);
3666 }
3667
3668 /* Is NEXT_INSN a branch? */
3669 if (next && JUMP_P (next))
3670 {
3671 rtx pattern = PATTERN (next);
3672
3673 /* If it a reversed fp conditional branch (e.g. uses add,tr)
3674 and CCFP dies, then reverse our conditional and the branch
3675 to avoid the add,tr. */
3676 if (GET_CODE (pattern) == SET
3677 && SET_DEST (pattern) == pc_rtx
3678 && GET_CODE (SET_SRC (pattern)) == IF_THEN_ELSE
3679 && GET_CODE (XEXP (SET_SRC (pattern), 0)) == NE
3680 && GET_CODE (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == REG
3681 && REGNO (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == 0
3682 && GET_CODE (XEXP (SET_SRC (pattern), 1)) == PC
3683 && (fcmp_count == fbranch_count
3684 || (check_notes
3685 && find_regno_note (next, REG_DEAD, 0))))
3686 {
3687 /* Reverse the branch. */
3688 tmp = XEXP (SET_SRC (pattern), 1);
3689 XEXP (SET_SRC (pattern), 1) = XEXP (SET_SRC (pattern), 2);
3690 XEXP (SET_SRC (pattern), 2) = tmp;
3691 INSN_CODE (next) = -1;
3692
3693 /* Reverse our condition. */
3694 tmp = PATTERN (insn);
3695 PUT_CODE (XEXP (tmp, 1),
3696 (reverse_condition_maybe_unordered
3697 (GET_CODE (XEXP (tmp, 1)))));
3698 }
3699 }
3700 }
3701 }
3702
3703 pass = !pass;
3704
3705 }
3706
3707 /* You may have trouble believing this, but this is the 32 bit HP-PA
3708 stack layout. Wow.
3709
3710 Offset Contents
3711
3712 Variable arguments (optional; any number may be allocated)
3713
3714 SP-(4*(N+9)) arg word N
3715 : :
3716 SP-56 arg word 5
3717 SP-52 arg word 4
3718
3719 Fixed arguments (must be allocated; may remain unused)
3720
3721 SP-48 arg word 3
3722 SP-44 arg word 2
3723 SP-40 arg word 1
3724 SP-36 arg word 0
3725
3726 Frame Marker
3727
3728 SP-32 External Data Pointer (DP)
3729 SP-28 External sr4
3730 SP-24 External/stub RP (RP')
3731 SP-20 Current RP
3732 SP-16 Static Link
3733 SP-12 Clean up
3734 SP-8 Calling Stub RP (RP'')
3735 SP-4 Previous SP
3736
3737 Top of Frame
3738
3739 SP-0 Stack Pointer (points to next available address)
3740
3741 */
3742
3743 /* This function saves registers as follows. Registers marked with ' are
3744 this function's registers (as opposed to the previous function's).
3745 If a frame_pointer isn't needed, r4 is saved as a general register;
3746 the space for the frame pointer is still allocated, though, to keep
3747 things simple.
3748
3749
3750 Top of Frame
3751
3752 SP (FP') Previous FP
3753 SP + 4 Alignment filler (sigh)
3754 SP + 8 Space for locals reserved here.
3755 .
3756 .
3757 .
3758 SP + n All call saved register used.
3759 .
3760 .
3761 .
3762 SP + o All call saved fp registers used.
3763 .
3764 .
3765 .
3766 SP + p (SP') points to next available address.
3767
3768 */
3769
3770 /* Global variables set by output_function_prologue(). */
3771 /* Size of frame. Need to know this to emit return insns from
3772 leaf procedures. */
3773 static HOST_WIDE_INT actual_fsize, local_fsize;
3774 static int save_fregs;
3775
3776 /* Emit RTL to store REG at the memory location specified by BASE+DISP.
3777 Handle case where DISP > 8k by using the add_high_const patterns.
3778
3779 Note in DISP > 8k case, we will leave the high part of the address
3780 in %r1. There is code in expand_hppa_{prologue,epilogue} that knows this.*/
3781
3782 static void
store_reg(int reg,HOST_WIDE_INT disp,int base)3783 store_reg (int reg, HOST_WIDE_INT disp, int base)
3784 {
3785 rtx dest, src, basereg;
3786 rtx_insn *insn;
3787
3788 src = gen_rtx_REG (word_mode, reg);
3789 basereg = gen_rtx_REG (Pmode, base);
3790 if (VAL_14_BITS_P (disp))
3791 {
3792 dest = gen_rtx_MEM (word_mode, plus_constant (Pmode, basereg, disp));
3793 insn = emit_move_insn (dest, src);
3794 }
3795 else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
3796 {
3797 rtx delta = GEN_INT (disp);
3798 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3799
3800 emit_move_insn (tmpreg, delta);
3801 insn = emit_move_insn (tmpreg, gen_rtx_PLUS (Pmode, tmpreg, basereg));
3802 if (DO_FRAME_NOTES)
3803 {
3804 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3805 gen_rtx_SET (tmpreg,
3806 gen_rtx_PLUS (Pmode, basereg, delta)));
3807 RTX_FRAME_RELATED_P (insn) = 1;
3808 }
3809 dest = gen_rtx_MEM (word_mode, tmpreg);
3810 insn = emit_move_insn (dest, src);
3811 }
3812 else
3813 {
3814 rtx delta = GEN_INT (disp);
3815 rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta));
3816 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3817
3818 emit_move_insn (tmpreg, high);
3819 dest = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta));
3820 insn = emit_move_insn (dest, src);
3821 if (DO_FRAME_NOTES)
3822 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3823 gen_rtx_SET (gen_rtx_MEM (word_mode,
3824 gen_rtx_PLUS (word_mode,
3825 basereg,
3826 delta)),
3827 src));
3828 }
3829
3830 if (DO_FRAME_NOTES)
3831 RTX_FRAME_RELATED_P (insn) = 1;
3832 }
3833
3834 /* Emit RTL to store REG at the memory location specified by BASE and then
3835 add MOD to BASE. MOD must be <= 8k. */
3836
3837 static void
store_reg_modify(int base,int reg,HOST_WIDE_INT mod)3838 store_reg_modify (int base, int reg, HOST_WIDE_INT mod)
3839 {
3840 rtx basereg, srcreg, delta;
3841 rtx_insn *insn;
3842
3843 gcc_assert (VAL_14_BITS_P (mod));
3844
3845 basereg = gen_rtx_REG (Pmode, base);
3846 srcreg = gen_rtx_REG (word_mode, reg);
3847 delta = GEN_INT (mod);
3848
3849 insn = emit_insn (gen_post_store (basereg, srcreg, delta));
3850 if (DO_FRAME_NOTES)
3851 {
3852 RTX_FRAME_RELATED_P (insn) = 1;
3853
3854 /* RTX_FRAME_RELATED_P must be set on each frame related set
3855 in a parallel with more than one element. */
3856 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 0)) = 1;
3857 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
3858 }
3859 }
3860
3861 /* Emit RTL to set REG to the value specified by BASE+DISP. Handle case
3862 where DISP > 8k by using the add_high_const patterns. NOTE indicates
3863 whether to add a frame note or not.
3864
3865 In the DISP > 8k case, we leave the high part of the address in %r1.
3866 There is code in expand_hppa_{prologue,epilogue} that knows about this. */
3867
3868 static void
set_reg_plus_d(int reg,int base,HOST_WIDE_INT disp,int note)3869 set_reg_plus_d (int reg, int base, HOST_WIDE_INT disp, int note)
3870 {
3871 rtx_insn *insn;
3872
3873 if (VAL_14_BITS_P (disp))
3874 {
3875 insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3876 plus_constant (Pmode,
3877 gen_rtx_REG (Pmode, base), disp));
3878 }
3879 else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
3880 {
3881 rtx basereg = gen_rtx_REG (Pmode, base);
3882 rtx delta = GEN_INT (disp);
3883 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3884
3885 emit_move_insn (tmpreg, delta);
3886 insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3887 gen_rtx_PLUS (Pmode, tmpreg, basereg));
3888 if (DO_FRAME_NOTES)
3889 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3890 gen_rtx_SET (tmpreg,
3891 gen_rtx_PLUS (Pmode, basereg, delta)));
3892 }
3893 else
3894 {
3895 rtx basereg = gen_rtx_REG (Pmode, base);
3896 rtx delta = GEN_INT (disp);
3897 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3898
3899 emit_move_insn (tmpreg,
3900 gen_rtx_PLUS (Pmode, basereg,
3901 gen_rtx_HIGH (Pmode, delta)));
3902 insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3903 gen_rtx_LO_SUM (Pmode, tmpreg, delta));
3904 }
3905
3906 if (DO_FRAME_NOTES && note)
3907 RTX_FRAME_RELATED_P (insn) = 1;
3908 }
3909
3910 HOST_WIDE_INT
pa_compute_frame_size(poly_int64 size,int * fregs_live)3911 pa_compute_frame_size (poly_int64 size, int *fregs_live)
3912 {
3913 int freg_saved = 0;
3914 int i, j;
3915
3916 /* The code in pa_expand_prologue and pa_expand_epilogue must
3917 be consistent with the rounding and size calculation done here.
3918 Change them at the same time. */
3919
3920 /* We do our own stack alignment. First, round the size of the
3921 stack locals up to a word boundary. */
3922 size = (size + UNITS_PER_WORD - 1) & ~(UNITS_PER_WORD - 1);
3923
3924 /* Space for previous frame pointer + filler. If any frame is
3925 allocated, we need to add in the TARGET_STARTING_FRAME_OFFSET. We
3926 waste some space here for the sake of HP compatibility. The
3927 first slot is only used when the frame pointer is needed. */
3928 if (size || frame_pointer_needed)
3929 size += pa_starting_frame_offset ();
3930
3931 /* If the current function calls __builtin_eh_return, then we need
3932 to allocate stack space for registers that will hold data for
3933 the exception handler. */
3934 if (DO_FRAME_NOTES && crtl->calls_eh_return)
3935 {
3936 unsigned int i;
3937
3938 for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM; ++i)
3939 continue;
3940 size += i * UNITS_PER_WORD;
3941 }
3942
3943 /* Account for space used by the callee general register saves. */
3944 for (i = 18, j = frame_pointer_needed ? 4 : 3; i >= j; i--)
3945 if (df_regs_ever_live_p (i))
3946 size += UNITS_PER_WORD;
3947
3948 /* Account for space used by the callee floating point register saves. */
3949 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
3950 if (df_regs_ever_live_p (i)
3951 || (!TARGET_64BIT && df_regs_ever_live_p (i + 1)))
3952 {
3953 freg_saved = 1;
3954
3955 /* We always save both halves of the FP register, so always
3956 increment the frame size by 8 bytes. */
3957 size += 8;
3958 }
3959
3960 /* If any of the floating registers are saved, account for the
3961 alignment needed for the floating point register save block. */
3962 if (freg_saved)
3963 {
3964 size = (size + 7) & ~7;
3965 if (fregs_live)
3966 *fregs_live = 1;
3967 }
3968
3969 /* The various ABIs include space for the outgoing parameters in the
3970 size of the current function's stack frame. We don't need to align
3971 for the outgoing arguments as their alignment is set by the final
3972 rounding for the frame as a whole. */
3973 size += crtl->outgoing_args_size;
3974
3975 /* Allocate space for the fixed frame marker. This space must be
3976 allocated for any function that makes calls or allocates
3977 stack space. */
3978 if (!crtl->is_leaf || size)
3979 size += TARGET_64BIT ? 48 : 32;
3980
3981 /* Finally, round to the preferred stack boundary. */
3982 return ((size + PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT - 1)
3983 & ~(PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT - 1));
3984 }
3985
3986 /* Output function label, and associated .PROC and .CALLINFO statements. */
3987
3988 void
pa_output_function_label(FILE * file)3989 pa_output_function_label (FILE *file)
3990 {
3991 /* The function's label and associated .PROC must never be
3992 separated and must be output *after* any profiling declarations
3993 to avoid changing spaces/subspaces within a procedure. */
3994 ASM_OUTPUT_LABEL (file, XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0));
3995 fputs ("\t.PROC\n", file);
3996
3997 /* pa_expand_prologue does the dirty work now. We just need
3998 to output the assembler directives which denote the start
3999 of a function. */
4000 fprintf (file, "\t.CALLINFO FRAME=" HOST_WIDE_INT_PRINT_DEC, actual_fsize);
4001 if (crtl->is_leaf)
4002 fputs (",NO_CALLS", file);
4003 else
4004 fputs (",CALLS", file);
4005 if (rp_saved)
4006 fputs (",SAVE_RP", file);
4007
4008 /* The SAVE_SP flag is used to indicate that register %r3 is stored
4009 at the beginning of the frame and that it is used as the frame
4010 pointer for the frame. We do this because our current frame
4011 layout doesn't conform to that specified in the HP runtime
4012 documentation and we need a way to indicate to programs such as
4013 GDB where %r3 is saved. The SAVE_SP flag was chosen because it
4014 isn't used by HP compilers but is supported by the assembler.
4015 However, SAVE_SP is supposed to indicate that the previous stack
4016 pointer has been saved in the frame marker. */
4017 if (frame_pointer_needed)
4018 fputs (",SAVE_SP", file);
4019
4020 /* Pass on information about the number of callee register saves
4021 performed in the prologue.
4022
4023 The compiler is supposed to pass the highest register number
4024 saved, the assembler then has to adjust that number before
4025 entering it into the unwind descriptor (to account for any
4026 caller saved registers with lower register numbers than the
4027 first callee saved register). */
4028 if (gr_saved)
4029 fprintf (file, ",ENTRY_GR=%d", gr_saved + 2);
4030
4031 if (fr_saved)
4032 fprintf (file, ",ENTRY_FR=%d", fr_saved + 11);
4033
4034 fputs ("\n\t.ENTRY\n", file);
4035 }
4036
4037 /* Output function prologue. */
4038
4039 static void
pa_output_function_prologue(FILE * file)4040 pa_output_function_prologue (FILE *file)
4041 {
4042 pa_output_function_label (file);
4043 remove_useless_addtr_insns (0);
4044 }
4045
4046 /* The label is output by ASM_DECLARE_FUNCTION_NAME on linux. */
4047
4048 static void
pa_linux_output_function_prologue(FILE * file ATTRIBUTE_UNUSED)4049 pa_linux_output_function_prologue (FILE *file ATTRIBUTE_UNUSED)
4050 {
4051 remove_useless_addtr_insns (0);
4052 }
4053
4054 void
pa_expand_prologue(void)4055 pa_expand_prologue (void)
4056 {
4057 int merge_sp_adjust_with_store = 0;
4058 HOST_WIDE_INT size = get_frame_size ();
4059 HOST_WIDE_INT offset;
4060 int i;
4061 rtx tmpreg;
4062 rtx_insn *insn;
4063
4064 gr_saved = 0;
4065 fr_saved = 0;
4066 save_fregs = 0;
4067
4068 /* Compute total size for frame pointer, filler, locals and rounding to
4069 the next word boundary. Similar code appears in pa_compute_frame_size
4070 and must be changed in tandem with this code. */
4071 local_fsize = (size + UNITS_PER_WORD - 1) & ~(UNITS_PER_WORD - 1);
4072 if (local_fsize || frame_pointer_needed)
4073 local_fsize += pa_starting_frame_offset ();
4074
4075 actual_fsize = pa_compute_frame_size (size, &save_fregs);
4076 if (flag_stack_usage_info)
4077 current_function_static_stack_size = actual_fsize;
4078
4079 /* Compute a few things we will use often. */
4080 tmpreg = gen_rtx_REG (word_mode, 1);
4081
4082 /* Save RP first. The calling conventions manual states RP will
4083 always be stored into the caller's frame at sp - 20 or sp - 16
4084 depending on which ABI is in use. */
4085 if (df_regs_ever_live_p (2) || crtl->calls_eh_return)
4086 {
4087 store_reg (2, TARGET_64BIT ? -16 : -20, STACK_POINTER_REGNUM);
4088 rp_saved = true;
4089 }
4090 else
4091 rp_saved = false;
4092
4093 /* Allocate the local frame and set up the frame pointer if needed. */
4094 if (actual_fsize != 0)
4095 {
4096 if (frame_pointer_needed)
4097 {
4098 /* Copy the old frame pointer temporarily into %r1. Set up the
4099 new stack pointer, then store away the saved old frame pointer
4100 into the stack at sp and at the same time update the stack
4101 pointer by actual_fsize bytes. Two versions, first
4102 handles small (<8k) frames. The second handles large (>=8k)
4103 frames. */
4104 insn = emit_move_insn (tmpreg, hard_frame_pointer_rtx);
4105 if (DO_FRAME_NOTES)
4106 RTX_FRAME_RELATED_P (insn) = 1;
4107
4108 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
4109 if (DO_FRAME_NOTES)
4110 RTX_FRAME_RELATED_P (insn) = 1;
4111
4112 if (VAL_14_BITS_P (actual_fsize))
4113 store_reg_modify (STACK_POINTER_REGNUM, 1, actual_fsize);
4114 else
4115 {
4116 /* It is incorrect to store the saved frame pointer at *sp,
4117 then increment sp (writes beyond the current stack boundary).
4118
4119 So instead use stwm to store at *sp and post-increment the
4120 stack pointer as an atomic operation. Then increment sp to
4121 finish allocating the new frame. */
4122 HOST_WIDE_INT adjust1 = 8192 - 64;
4123 HOST_WIDE_INT adjust2 = actual_fsize - adjust1;
4124
4125 store_reg_modify (STACK_POINTER_REGNUM, 1, adjust1);
4126 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
4127 adjust2, 1);
4128 }
4129
4130 /* We set SAVE_SP in frames that need a frame pointer. Thus,
4131 we need to store the previous stack pointer (frame pointer)
4132 into the frame marker on targets that use the HP unwind
4133 library. This allows the HP unwind library to be used to
4134 unwind GCC frames. However, we are not fully compatible
4135 with the HP library because our frame layout differs from
4136 that specified in the HP runtime specification.
4137
4138 We don't want a frame note on this instruction as the frame
4139 marker moves during dynamic stack allocation.
4140
4141 This instruction also serves as a blockage to prevent
4142 register spills from being scheduled before the stack
4143 pointer is raised. This is necessary as we store
4144 registers using the frame pointer as a base register,
4145 and the frame pointer is set before sp is raised. */
4146 if (TARGET_HPUX_UNWIND_LIBRARY)
4147 {
4148 rtx addr = gen_rtx_PLUS (word_mode, stack_pointer_rtx,
4149 GEN_INT (TARGET_64BIT ? -8 : -4));
4150
4151 emit_move_insn (gen_rtx_MEM (word_mode, addr),
4152 hard_frame_pointer_rtx);
4153 }
4154 else
4155 emit_insn (gen_blockage ());
4156 }
4157 /* no frame pointer needed. */
4158 else
4159 {
4160 /* In some cases we can perform the first callee register save
4161 and allocating the stack frame at the same time. If so, just
4162 make a note of it and defer allocating the frame until saving
4163 the callee registers. */
4164 if (VAL_14_BITS_P (actual_fsize) && local_fsize == 0)
4165 merge_sp_adjust_with_store = 1;
4166 /* Cannot optimize. Adjust the stack frame by actual_fsize
4167 bytes. */
4168 else
4169 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
4170 actual_fsize, 1);
4171 }
4172 }
4173
4174 /* Normal register save.
4175
4176 Do not save the frame pointer in the frame_pointer_needed case. It
4177 was done earlier. */
4178 if (frame_pointer_needed)
4179 {
4180 offset = local_fsize;
4181
4182 /* Saving the EH return data registers in the frame is the simplest
4183 way to get the frame unwind information emitted. We put them
4184 just before the general registers. */
4185 if (DO_FRAME_NOTES && crtl->calls_eh_return)
4186 {
4187 unsigned int i, regno;
4188
4189 for (i = 0; ; ++i)
4190 {
4191 regno = EH_RETURN_DATA_REGNO (i);
4192 if (regno == INVALID_REGNUM)
4193 break;
4194
4195 store_reg (regno, offset, HARD_FRAME_POINTER_REGNUM);
4196 offset += UNITS_PER_WORD;
4197 }
4198 }
4199
4200 for (i = 18; i >= 4; i--)
4201 if (df_regs_ever_live_p (i) && !call_used_or_fixed_reg_p (i))
4202 {
4203 store_reg (i, offset, HARD_FRAME_POINTER_REGNUM);
4204 offset += UNITS_PER_WORD;
4205 gr_saved++;
4206 }
4207 /* Account for %r3 which is saved in a special place. */
4208 gr_saved++;
4209 }
4210 /* No frame pointer needed. */
4211 else
4212 {
4213 offset = local_fsize - actual_fsize;
4214
4215 /* Saving the EH return data registers in the frame is the simplest
4216 way to get the frame unwind information emitted. */
4217 if (DO_FRAME_NOTES && crtl->calls_eh_return)
4218 {
4219 unsigned int i, regno;
4220
4221 for (i = 0; ; ++i)
4222 {
4223 regno = EH_RETURN_DATA_REGNO (i);
4224 if (regno == INVALID_REGNUM)
4225 break;
4226
4227 /* If merge_sp_adjust_with_store is nonzero, then we can
4228 optimize the first save. */
4229 if (merge_sp_adjust_with_store)
4230 {
4231 store_reg_modify (STACK_POINTER_REGNUM, regno, -offset);
4232 merge_sp_adjust_with_store = 0;
4233 }
4234 else
4235 store_reg (regno, offset, STACK_POINTER_REGNUM);
4236 offset += UNITS_PER_WORD;
4237 }
4238 }
4239
4240 for (i = 18; i >= 3; i--)
4241 if (df_regs_ever_live_p (i) && !call_used_or_fixed_reg_p (i))
4242 {
4243 /* If merge_sp_adjust_with_store is nonzero, then we can
4244 optimize the first GR save. */
4245 if (merge_sp_adjust_with_store)
4246 {
4247 store_reg_modify (STACK_POINTER_REGNUM, i, -offset);
4248 merge_sp_adjust_with_store = 0;
4249 }
4250 else
4251 store_reg (i, offset, STACK_POINTER_REGNUM);
4252 offset += UNITS_PER_WORD;
4253 gr_saved++;
4254 }
4255
4256 /* If we wanted to merge the SP adjustment with a GR save, but we never
4257 did any GR saves, then just emit the adjustment here. */
4258 if (merge_sp_adjust_with_store)
4259 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
4260 actual_fsize, 1);
4261 }
4262
4263 /* The hppa calling conventions say that %r19, the pic offset
4264 register, is saved at sp - 32 (in this function's frame)
4265 when generating PIC code. FIXME: What is the correct thing
4266 to do for functions which make no calls and allocate no
4267 frame? Do we need to allocate a frame, or can we just omit
4268 the save? For now we'll just omit the save.
4269
4270 We don't want a note on this insn as the frame marker can
4271 move if there is a dynamic stack allocation. */
4272 if (flag_pic && actual_fsize != 0 && !TARGET_64BIT)
4273 {
4274 rtx addr = gen_rtx_PLUS (word_mode, stack_pointer_rtx, GEN_INT (-32));
4275
4276 emit_move_insn (gen_rtx_MEM (word_mode, addr), pic_offset_table_rtx);
4277
4278 }
4279
4280 /* Align pointer properly (doubleword boundary). */
4281 offset = (offset + 7) & ~7;
4282
4283 /* Floating point register store. */
4284 if (save_fregs)
4285 {
4286 rtx base;
4287
4288 /* First get the frame or stack pointer to the start of the FP register
4289 save area. */
4290 if (frame_pointer_needed)
4291 {
4292 set_reg_plus_d (1, HARD_FRAME_POINTER_REGNUM, offset, 0);
4293 base = hard_frame_pointer_rtx;
4294 }
4295 else
4296 {
4297 set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0);
4298 base = stack_pointer_rtx;
4299 }
4300
4301 /* Now actually save the FP registers. */
4302 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
4303 {
4304 if (df_regs_ever_live_p (i)
4305 || (! TARGET_64BIT && df_regs_ever_live_p (i + 1)))
4306 {
4307 rtx addr, reg;
4308 rtx_insn *insn;
4309 addr = gen_rtx_MEM (DFmode,
4310 gen_rtx_POST_INC (word_mode, tmpreg));
4311 reg = gen_rtx_REG (DFmode, i);
4312 insn = emit_move_insn (addr, reg);
4313 if (DO_FRAME_NOTES)
4314 {
4315 RTX_FRAME_RELATED_P (insn) = 1;
4316 if (TARGET_64BIT)
4317 {
4318 rtx mem = gen_rtx_MEM (DFmode,
4319 plus_constant (Pmode, base,
4320 offset));
4321 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
4322 gen_rtx_SET (mem, reg));
4323 }
4324 else
4325 {
4326 rtx meml = gen_rtx_MEM (SFmode,
4327 plus_constant (Pmode, base,
4328 offset));
4329 rtx memr = gen_rtx_MEM (SFmode,
4330 plus_constant (Pmode, base,
4331 offset + 4));
4332 rtx regl = gen_rtx_REG (SFmode, i);
4333 rtx regr = gen_rtx_REG (SFmode, i + 1);
4334 rtx setl = gen_rtx_SET (meml, regl);
4335 rtx setr = gen_rtx_SET (memr, regr);
4336 rtvec vec;
4337
4338 RTX_FRAME_RELATED_P (setl) = 1;
4339 RTX_FRAME_RELATED_P (setr) = 1;
4340 vec = gen_rtvec (2, setl, setr);
4341 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
4342 gen_rtx_SEQUENCE (VOIDmode, vec));
4343 }
4344 }
4345 offset += GET_MODE_SIZE (DFmode);
4346 fr_saved++;
4347 }
4348 }
4349 }
4350 }
4351
4352 /* Emit RTL to load REG from the memory location specified by BASE+DISP.
4353 Handle case where DISP > 8k by using the add_high_const patterns. */
4354
4355 static void
load_reg(int reg,HOST_WIDE_INT disp,int base)4356 load_reg (int reg, HOST_WIDE_INT disp, int base)
4357 {
4358 rtx dest = gen_rtx_REG (word_mode, reg);
4359 rtx basereg = gen_rtx_REG (Pmode, base);
4360 rtx src;
4361
4362 if (VAL_14_BITS_P (disp))
4363 src = gen_rtx_MEM (word_mode, plus_constant (Pmode, basereg, disp));
4364 else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
4365 {
4366 rtx delta = GEN_INT (disp);
4367 rtx tmpreg = gen_rtx_REG (Pmode, 1);
4368
4369 emit_move_insn (tmpreg, delta);
4370 if (TARGET_DISABLE_INDEXING)
4371 {
4372 emit_move_insn (tmpreg, gen_rtx_PLUS (Pmode, tmpreg, basereg));
4373 src = gen_rtx_MEM (word_mode, tmpreg);
4374 }
4375 else
4376 src = gen_rtx_MEM (word_mode, gen_rtx_PLUS (Pmode, tmpreg, basereg));
4377 }
4378 else
4379 {
4380 rtx delta = GEN_INT (disp);
4381 rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta));
4382 rtx tmpreg = gen_rtx_REG (Pmode, 1);
4383
4384 emit_move_insn (tmpreg, high);
4385 src = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta));
4386 }
4387
4388 emit_move_insn (dest, src);
4389 }
4390
4391 /* Update the total code bytes output to the text section. */
4392
4393 static void
update_total_code_bytes(unsigned int nbytes)4394 update_total_code_bytes (unsigned int nbytes)
4395 {
4396 if ((TARGET_PORTABLE_RUNTIME || !TARGET_GAS || !TARGET_SOM)
4397 && !IN_NAMED_SECTION_P (cfun->decl))
4398 {
4399 unsigned int old_total = total_code_bytes;
4400
4401 total_code_bytes += nbytes;
4402
4403 /* Be prepared to handle overflows. */
4404 if (old_total > total_code_bytes)
4405 total_code_bytes = UINT_MAX;
4406 }
4407 }
4408
4409 /* This function generates the assembly code for function exit.
4410 Args are as for output_function_prologue ().
4411
4412 The function epilogue should not depend on the current stack
4413 pointer! It should use the frame pointer only. This is mandatory
4414 because of alloca; we also take advantage of it to omit stack
4415 adjustments before returning. */
4416
4417 static void
pa_output_function_epilogue(FILE * file)4418 pa_output_function_epilogue (FILE *file)
4419 {
4420 rtx_insn *insn = get_last_insn ();
4421 bool extra_nop;
4422
4423 /* pa_expand_epilogue does the dirty work now. We just need
4424 to output the assembler directives which denote the end
4425 of a function.
4426
4427 To make debuggers happy, emit a nop if the epilogue was completely
4428 eliminated due to a volatile call as the last insn in the
4429 current function. That way the return address (in %r2) will
4430 always point to a valid instruction in the current function. */
4431
4432 /* Get the last real insn. */
4433 if (NOTE_P (insn))
4434 insn = prev_real_insn (insn);
4435
4436 /* If it is a sequence, then look inside. */
4437 if (insn && NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == SEQUENCE)
4438 insn = as_a <rtx_sequence *> (PATTERN (insn))-> insn (0);
4439
4440 /* If insn is a CALL_INSN, then it must be a call to a volatile
4441 function (otherwise there would be epilogue insns). */
4442 if (insn && CALL_P (insn))
4443 {
4444 fputs ("\tnop\n", file);
4445 extra_nop = true;
4446 }
4447 else
4448 extra_nop = false;
4449
4450 fputs ("\t.EXIT\n\t.PROCEND\n", file);
4451
4452 if (TARGET_SOM && TARGET_GAS)
4453 {
4454 /* We are done with this subspace except possibly for some additional
4455 debug information. Forget that we are in this subspace to ensure
4456 that the next function is output in its own subspace. */
4457 in_section = NULL;
4458 cfun->machine->in_nsubspa = 2;
4459 }
4460
4461 /* Thunks do their own insn accounting. */
4462 if (cfun->is_thunk)
4463 return;
4464
4465 if (INSN_ADDRESSES_SET_P ())
4466 {
4467 last_address = extra_nop ? 4 : 0;
4468 insn = get_last_nonnote_insn ();
4469 if (insn)
4470 {
4471 last_address += INSN_ADDRESSES (INSN_UID (insn));
4472 if (INSN_P (insn))
4473 last_address += insn_default_length (insn);
4474 }
4475 last_address = ((last_address + FUNCTION_BOUNDARY / BITS_PER_UNIT - 1)
4476 & ~(FUNCTION_BOUNDARY / BITS_PER_UNIT - 1));
4477 }
4478 else
4479 last_address = UINT_MAX;
4480
4481 /* Finally, update the total number of code bytes output so far. */
4482 update_total_code_bytes (last_address);
4483 }
4484
4485 void
pa_expand_epilogue(void)4486 pa_expand_epilogue (void)
4487 {
4488 rtx tmpreg;
4489 HOST_WIDE_INT offset;
4490 HOST_WIDE_INT ret_off = 0;
4491 int i;
4492 int merge_sp_adjust_with_load = 0;
4493
4494 /* We will use this often. */
4495 tmpreg = gen_rtx_REG (word_mode, 1);
4496
4497 /* Try to restore RP early to avoid load/use interlocks when
4498 RP gets used in the return (bv) instruction. This appears to still
4499 be necessary even when we schedule the prologue and epilogue. */
4500 if (rp_saved)
4501 {
4502 ret_off = TARGET_64BIT ? -16 : -20;
4503 if (frame_pointer_needed)
4504 {
4505 load_reg (2, ret_off, HARD_FRAME_POINTER_REGNUM);
4506 ret_off = 0;
4507 }
4508 else
4509 {
4510 /* No frame pointer, and stack is smaller than 8k. */
4511 if (VAL_14_BITS_P (ret_off - actual_fsize))
4512 {
4513 load_reg (2, ret_off - actual_fsize, STACK_POINTER_REGNUM);
4514 ret_off = 0;
4515 }
4516 }
4517 }
4518
4519 /* General register restores. */
4520 if (frame_pointer_needed)
4521 {
4522 offset = local_fsize;
4523
4524 /* If the current function calls __builtin_eh_return, then we need
4525 to restore the saved EH data registers. */
4526 if (DO_FRAME_NOTES && crtl->calls_eh_return)
4527 {
4528 unsigned int i, regno;
4529
4530 for (i = 0; ; ++i)
4531 {
4532 regno = EH_RETURN_DATA_REGNO (i);
4533 if (regno == INVALID_REGNUM)
4534 break;
4535
4536 load_reg (regno, offset, HARD_FRAME_POINTER_REGNUM);
4537 offset += UNITS_PER_WORD;
4538 }
4539 }
4540
4541 for (i = 18; i >= 4; i--)
4542 if (df_regs_ever_live_p (i) && !call_used_or_fixed_reg_p (i))
4543 {
4544 load_reg (i, offset, HARD_FRAME_POINTER_REGNUM);
4545 offset += UNITS_PER_WORD;
4546 }
4547 }
4548 else
4549 {
4550 offset = local_fsize - actual_fsize;
4551
4552 /* If the current function calls __builtin_eh_return, then we need
4553 to restore the saved EH data registers. */
4554 if (DO_FRAME_NOTES && crtl->calls_eh_return)
4555 {
4556 unsigned int i, regno;
4557
4558 for (i = 0; ; ++i)
4559 {
4560 regno = EH_RETURN_DATA_REGNO (i);
4561 if (regno == INVALID_REGNUM)
4562 break;
4563
4564 /* Only for the first load.
4565 merge_sp_adjust_with_load holds the register load
4566 with which we will merge the sp adjustment. */
4567 if (merge_sp_adjust_with_load == 0
4568 && local_fsize == 0
4569 && VAL_14_BITS_P (-actual_fsize))
4570 merge_sp_adjust_with_load = regno;
4571 else
4572 load_reg (regno, offset, STACK_POINTER_REGNUM);
4573 offset += UNITS_PER_WORD;
4574 }
4575 }
4576
4577 for (i = 18; i >= 3; i--)
4578 {
4579 if (df_regs_ever_live_p (i) && !call_used_or_fixed_reg_p (i))
4580 {
4581 /* Only for the first load.
4582 merge_sp_adjust_with_load holds the register load
4583 with which we will merge the sp adjustment. */
4584 if (merge_sp_adjust_with_load == 0
4585 && local_fsize == 0
4586 && VAL_14_BITS_P (-actual_fsize))
4587 merge_sp_adjust_with_load = i;
4588 else
4589 load_reg (i, offset, STACK_POINTER_REGNUM);
4590 offset += UNITS_PER_WORD;
4591 }
4592 }
4593 }
4594
4595 /* Align pointer properly (doubleword boundary). */
4596 offset = (offset + 7) & ~7;
4597
4598 /* FP register restores. */
4599 if (save_fregs)
4600 {
4601 /* Adjust the register to index off of. */
4602 if (frame_pointer_needed)
4603 set_reg_plus_d (1, HARD_FRAME_POINTER_REGNUM, offset, 0);
4604 else
4605 set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0);
4606
4607 /* Actually do the restores now. */
4608 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
4609 if (df_regs_ever_live_p (i)
4610 || (! TARGET_64BIT && df_regs_ever_live_p (i + 1)))
4611 {
4612 rtx src = gen_rtx_MEM (DFmode,
4613 gen_rtx_POST_INC (word_mode, tmpreg));
4614 rtx dest = gen_rtx_REG (DFmode, i);
4615 emit_move_insn (dest, src);
4616 }
4617 }
4618
4619 /* Emit a blockage insn here to keep these insns from being moved to
4620 an earlier spot in the epilogue, or into the main instruction stream.
4621
4622 This is necessary as we must not cut the stack back before all the
4623 restores are finished. */
4624 emit_insn (gen_blockage ());
4625
4626 /* Reset stack pointer (and possibly frame pointer). The stack
4627 pointer is initially set to fp + 64 to avoid a race condition. */
4628 if (frame_pointer_needed)
4629 {
4630 rtx delta = GEN_INT (-64);
4631
4632 set_reg_plus_d (STACK_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM, 64, 0);
4633 emit_insn (gen_pre_load (hard_frame_pointer_rtx,
4634 stack_pointer_rtx, delta));
4635 }
4636 /* If we were deferring a callee register restore, do it now. */
4637 else if (merge_sp_adjust_with_load)
4638 {
4639 rtx delta = GEN_INT (-actual_fsize);
4640 rtx dest = gen_rtx_REG (word_mode, merge_sp_adjust_with_load);
4641
4642 emit_insn (gen_pre_load (dest, stack_pointer_rtx, delta));
4643 }
4644 else if (actual_fsize != 0)
4645 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
4646 - actual_fsize, 0);
4647
4648 /* If we haven't restored %r2 yet (no frame pointer, and a stack
4649 frame greater than 8k), do so now. */
4650 if (ret_off != 0)
4651 load_reg (2, ret_off, STACK_POINTER_REGNUM);
4652
4653 if (DO_FRAME_NOTES && crtl->calls_eh_return)
4654 {
4655 rtx sa = EH_RETURN_STACKADJ_RTX;
4656
4657 emit_insn (gen_blockage ());
4658 emit_insn (TARGET_64BIT
4659 ? gen_subdi3 (stack_pointer_rtx, stack_pointer_rtx, sa)
4660 : gen_subsi3 (stack_pointer_rtx, stack_pointer_rtx, sa));
4661 }
4662 }
4663
4664 bool
pa_can_use_return_insn(void)4665 pa_can_use_return_insn (void)
4666 {
4667 if (!reload_completed)
4668 return false;
4669
4670 if (frame_pointer_needed)
4671 return false;
4672
4673 if (df_regs_ever_live_p (2))
4674 return false;
4675
4676 if (crtl->profile)
4677 return false;
4678
4679 return pa_compute_frame_size (get_frame_size (), 0) == 0;
4680 }
4681
4682 rtx
hppa_pic_save_rtx(void)4683 hppa_pic_save_rtx (void)
4684 {
4685 return get_hard_reg_initial_val (word_mode, PIC_OFFSET_TABLE_REGNUM);
4686 }
4687
4688 #ifndef NO_DEFERRED_PROFILE_COUNTERS
4689 #define NO_DEFERRED_PROFILE_COUNTERS 0
4690 #endif
4691
4692
4693 /* Vector of funcdef numbers. */
4694 static vec<int> funcdef_nos;
4695
4696 /* Output deferred profile counters. */
4697 static void
output_deferred_profile_counters(void)4698 output_deferred_profile_counters (void)
4699 {
4700 unsigned int i;
4701 int align, n;
4702
4703 if (funcdef_nos.is_empty ())
4704 return;
4705
4706 switch_to_section (data_section);
4707 align = MIN (BIGGEST_ALIGNMENT, LONG_TYPE_SIZE);
4708 ASM_OUTPUT_ALIGN (asm_out_file, floor_log2 (align / BITS_PER_UNIT));
4709
4710 for (i = 0; funcdef_nos.iterate (i, &n); i++)
4711 {
4712 targetm.asm_out.internal_label (asm_out_file, "LP", n);
4713 assemble_integer (const0_rtx, LONG_TYPE_SIZE / BITS_PER_UNIT, align, 1);
4714 }
4715
4716 funcdef_nos.release ();
4717 }
4718
4719 void
hppa_profile_hook(int label_no)4720 hppa_profile_hook (int label_no)
4721 {
4722 rtx_code_label *label_rtx = gen_label_rtx ();
4723 int reg_parm_stack_space = REG_PARM_STACK_SPACE (NULL_TREE);
4724 rtx arg_bytes, begin_label_rtx, mcount, sym;
4725 rtx_insn *call_insn;
4726 char begin_label_name[16];
4727 bool use_mcount_pcrel_call;
4728
4729 /* Set up call destination. */
4730 sym = gen_rtx_SYMBOL_REF (Pmode, "_mcount");
4731 pa_encode_label (sym);
4732 mcount = gen_rtx_MEM (Pmode, sym);
4733
4734 /* If we can reach _mcount with a pc-relative call, we can optimize
4735 loading the address of the current function. This requires linker
4736 long branch stub support. */
4737 if (!TARGET_PORTABLE_RUNTIME
4738 && !TARGET_LONG_CALLS
4739 && (TARGET_SOM || flag_function_sections))
4740 use_mcount_pcrel_call = TRUE;
4741 else
4742 use_mcount_pcrel_call = FALSE;
4743
4744 ASM_GENERATE_INTERNAL_LABEL (begin_label_name, FUNC_BEGIN_PROLOG_LABEL,
4745 label_no);
4746 begin_label_rtx = gen_rtx_SYMBOL_REF (SImode, ggc_strdup (begin_label_name));
4747
4748 emit_move_insn (gen_rtx_REG (word_mode, 26), gen_rtx_REG (word_mode, 2));
4749
4750 if (!use_mcount_pcrel_call)
4751 {
4752 /* The address of the function is loaded into %r25 with an instruction-
4753 relative sequence that avoids the use of relocations. We use SImode
4754 for the address of the function in both 32 and 64-bit code to avoid
4755 having to provide DImode versions of the lcla2 pattern. */
4756 if (TARGET_PA_20)
4757 emit_insn (gen_lcla2 (gen_rtx_REG (SImode, 25), label_rtx));
4758 else
4759 emit_insn (gen_lcla1 (gen_rtx_REG (SImode, 25), label_rtx));
4760 }
4761
4762 if (!NO_DEFERRED_PROFILE_COUNTERS)
4763 {
4764 rtx count_label_rtx, addr, r24;
4765 char count_label_name[16];
4766
4767 funcdef_nos.safe_push (label_no);
4768 ASM_GENERATE_INTERNAL_LABEL (count_label_name, "LP", label_no);
4769 count_label_rtx = gen_rtx_SYMBOL_REF (Pmode,
4770 ggc_strdup (count_label_name));
4771
4772 addr = force_reg (Pmode, count_label_rtx);
4773 r24 = gen_rtx_REG (Pmode, 24);
4774 emit_move_insn (r24, addr);
4775
4776 arg_bytes = GEN_INT (TARGET_64BIT ? 24 : 12);
4777 if (use_mcount_pcrel_call)
4778 call_insn = emit_call_insn (gen_call_mcount (mcount, arg_bytes,
4779 begin_label_rtx));
4780 else
4781 call_insn = emit_call_insn (gen_call (mcount, arg_bytes));
4782
4783 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), r24);
4784 }
4785 else
4786 {
4787 arg_bytes = GEN_INT (TARGET_64BIT ? 16 : 8);
4788 if (use_mcount_pcrel_call)
4789 call_insn = emit_call_insn (gen_call_mcount (mcount, arg_bytes,
4790 begin_label_rtx));
4791 else
4792 call_insn = emit_call_insn (gen_call (mcount, arg_bytes));
4793 }
4794
4795 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), gen_rtx_REG (SImode, 25));
4796 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), gen_rtx_REG (SImode, 26));
4797
4798 /* Indicate the _mcount call cannot throw, nor will it execute a
4799 non-local goto. */
4800 make_reg_eh_region_note_nothrow_nononlocal (call_insn);
4801
4802 /* Allocate space for fixed arguments. */
4803 if (reg_parm_stack_space > crtl->outgoing_args_size)
4804 crtl->outgoing_args_size = reg_parm_stack_space;
4805 }
4806
4807 /* Fetch the return address for the frame COUNT steps up from
4808 the current frame, after the prologue. FRAMEADDR is the
4809 frame pointer of the COUNT frame.
4810
4811 We want to ignore any export stub remnants here. To handle this,
4812 we examine the code at the return address, and if it is an export
4813 stub, we return a memory rtx for the stub return address stored
4814 at frame-24.
4815
4816 The value returned is used in two different ways:
4817
4818 1. To find a function's caller.
4819
4820 2. To change the return address for a function.
4821
4822 This function handles most instances of case 1; however, it will
4823 fail if there are two levels of stubs to execute on the return
4824 path. The only way I believe that can happen is if the return value
4825 needs a parameter relocation, which never happens for C code.
4826
4827 This function handles most instances of case 2; however, it will
4828 fail if we did not originally have stub code on the return path
4829 but will need stub code on the new return path. This can happen if
4830 the caller & callee are both in the main program, but the new
4831 return location is in a shared library. */
4832
4833 rtx
pa_return_addr_rtx(int count,rtx frameaddr)4834 pa_return_addr_rtx (int count, rtx frameaddr)
4835 {
4836 rtx label;
4837 rtx rp;
4838 rtx saved_rp;
4839 rtx ins;
4840
4841 /* The instruction stream at the return address of a PA1.X export stub is:
4842
4843 0x4bc23fd1 | stub+8: ldw -18(sr0,sp),rp
4844 0x004010a1 | stub+12: ldsid (sr0,rp),r1
4845 0x00011820 | stub+16: mtsp r1,sr0
4846 0xe0400002 | stub+20: be,n 0(sr0,rp)
4847
4848 0xe0400002 must be specified as -532676606 so that it won't be
4849 rejected as an invalid immediate operand on 64-bit hosts.
4850
4851 The instruction stream at the return address of a PA2.0 export stub is:
4852
4853 0x4bc23fd1 | stub+8: ldw -18(sr0,sp),rp
4854 0xe840d002 | stub+12: bve,n (rp)
4855 */
4856
4857 HOST_WIDE_INT insns[4];
4858 int i, len;
4859
4860 if (count != 0)
4861 return NULL_RTX;
4862
4863 rp = get_hard_reg_initial_val (Pmode, 2);
4864
4865 if (TARGET_64BIT || TARGET_NO_SPACE_REGS)
4866 return rp;
4867
4868 /* If there is no export stub then just use the value saved from
4869 the return pointer register. */
4870
4871 saved_rp = gen_reg_rtx (Pmode);
4872 emit_move_insn (saved_rp, rp);
4873
4874 /* Get pointer to the instruction stream. We have to mask out the
4875 privilege level from the two low order bits of the return address
4876 pointer here so that ins will point to the start of the first
4877 instruction that would have been executed if we returned. */
4878 ins = copy_to_reg (gen_rtx_AND (Pmode, rp, MASK_RETURN_ADDR));
4879 label = gen_label_rtx ();
4880
4881 if (TARGET_PA_20)
4882 {
4883 insns[0] = 0x4bc23fd1;
4884 insns[1] = -398405630;
4885 len = 2;
4886 }
4887 else
4888 {
4889 insns[0] = 0x4bc23fd1;
4890 insns[1] = 0x004010a1;
4891 insns[2] = 0x00011820;
4892 insns[3] = -532676606;
4893 len = 4;
4894 }
4895
4896 /* Check the instruction stream at the normal return address for the
4897 export stub. If it is an export stub, than our return address is
4898 really in -24[frameaddr]. */
4899
4900 for (i = 0; i < len; i++)
4901 {
4902 rtx op0 = gen_rtx_MEM (SImode, plus_constant (Pmode, ins, i * 4));
4903 rtx op1 = GEN_INT (insns[i]);
4904 emit_cmp_and_jump_insns (op0, op1, NE, NULL, SImode, 0, label);
4905 }
4906
4907 /* Here we know that our return address points to an export
4908 stub. We don't want to return the address of the export stub,
4909 but rather the return address of the export stub. That return
4910 address is stored at -24[frameaddr]. */
4911
4912 emit_move_insn (saved_rp,
4913 gen_rtx_MEM (Pmode,
4914 memory_address (Pmode,
4915 plus_constant (Pmode, frameaddr,
4916 -24))));
4917
4918 emit_label (label);
4919
4920 return saved_rp;
4921 }
4922
4923 void
pa_emit_bcond_fp(rtx operands[])4924 pa_emit_bcond_fp (rtx operands[])
4925 {
4926 enum rtx_code code = GET_CODE (operands[0]);
4927 rtx operand0 = operands[1];
4928 rtx operand1 = operands[2];
4929 rtx label = operands[3];
4930
4931 emit_insn (gen_rtx_SET (gen_rtx_REG (CCFPmode, 0),
4932 gen_rtx_fmt_ee (code, CCFPmode, operand0, operand1)));
4933
4934 emit_jump_insn (gen_rtx_SET (pc_rtx,
4935 gen_rtx_IF_THEN_ELSE (VOIDmode,
4936 gen_rtx_fmt_ee (NE,
4937 VOIDmode,
4938 gen_rtx_REG (CCFPmode, 0),
4939 const0_rtx),
4940 gen_rtx_LABEL_REF (VOIDmode, label),
4941 pc_rtx)));
4942
4943 }
4944
4945 /* Adjust the cost of a scheduling dependency. Return the new cost of
4946 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
4947
4948 static int
pa_adjust_cost(rtx_insn * insn,int dep_type,rtx_insn * dep_insn,int cost,unsigned int)4949 pa_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost,
4950 unsigned int)
4951 {
4952 enum attr_type attr_type;
4953
4954 /* Don't adjust costs for a pa8000 chip, also do not adjust any
4955 true dependencies as they are described with bypasses now. */
4956 if (pa_cpu >= PROCESSOR_8000 || dep_type == 0)
4957 return cost;
4958
4959 if (! recog_memoized (insn))
4960 return 0;
4961
4962 attr_type = get_attr_type (insn);
4963
4964 switch (dep_type)
4965 {
4966 case REG_DEP_ANTI:
4967 /* Anti dependency; DEP_INSN reads a register that INSN writes some
4968 cycles later. */
4969
4970 if (attr_type == TYPE_FPLOAD)
4971 {
4972 rtx pat = PATTERN (insn);
4973 rtx dep_pat = PATTERN (dep_insn);
4974 if (GET_CODE (pat) == PARALLEL)
4975 {
4976 /* This happens for the fldXs,mb patterns. */
4977 pat = XVECEXP (pat, 0, 0);
4978 }
4979 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4980 /* If this happens, we have to extend this to schedule
4981 optimally. Return 0 for now. */
4982 return 0;
4983
4984 if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
4985 {
4986 if (! recog_memoized (dep_insn))
4987 return 0;
4988 switch (get_attr_type (dep_insn))
4989 {
4990 case TYPE_FPALU:
4991 case TYPE_FPMULSGL:
4992 case TYPE_FPMULDBL:
4993 case TYPE_FPDIVSGL:
4994 case TYPE_FPDIVDBL:
4995 case TYPE_FPSQRTSGL:
4996 case TYPE_FPSQRTDBL:
4997 /* A fpload can't be issued until one cycle before a
4998 preceding arithmetic operation has finished if
4999 the target of the fpload is any of the sources
5000 (or destination) of the arithmetic operation. */
5001 return insn_default_latency (dep_insn) - 1;
5002
5003 default:
5004 return 0;
5005 }
5006 }
5007 }
5008 else if (attr_type == TYPE_FPALU)
5009 {
5010 rtx pat = PATTERN (insn);
5011 rtx dep_pat = PATTERN (dep_insn);
5012 if (GET_CODE (pat) == PARALLEL)
5013 {
5014 /* This happens for the fldXs,mb patterns. */
5015 pat = XVECEXP (pat, 0, 0);
5016 }
5017 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
5018 /* If this happens, we have to extend this to schedule
5019 optimally. Return 0 for now. */
5020 return 0;
5021
5022 if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
5023 {
5024 if (! recog_memoized (dep_insn))
5025 return 0;
5026 switch (get_attr_type (dep_insn))
5027 {
5028 case TYPE_FPDIVSGL:
5029 case TYPE_FPDIVDBL:
5030 case TYPE_FPSQRTSGL:
5031 case TYPE_FPSQRTDBL:
5032 /* An ALU flop can't be issued until two cycles before a
5033 preceding divide or sqrt operation has finished if
5034 the target of the ALU flop is any of the sources
5035 (or destination) of the divide or sqrt operation. */
5036 return insn_default_latency (dep_insn) - 2;
5037
5038 default:
5039 return 0;
5040 }
5041 }
5042 }
5043
5044 /* For other anti dependencies, the cost is 0. */
5045 return 0;
5046
5047 case REG_DEP_OUTPUT:
5048 /* Output dependency; DEP_INSN writes a register that INSN writes some
5049 cycles later. */
5050 if (attr_type == TYPE_FPLOAD)
5051 {
5052 rtx pat = PATTERN (insn);
5053 rtx dep_pat = PATTERN (dep_insn);
5054 if (GET_CODE (pat) == PARALLEL)
5055 {
5056 /* This happens for the fldXs,mb patterns. */
5057 pat = XVECEXP (pat, 0, 0);
5058 }
5059 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
5060 /* If this happens, we have to extend this to schedule
5061 optimally. Return 0 for now. */
5062 return 0;
5063
5064 if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
5065 {
5066 if (! recog_memoized (dep_insn))
5067 return 0;
5068 switch (get_attr_type (dep_insn))
5069 {
5070 case TYPE_FPALU:
5071 case TYPE_FPMULSGL:
5072 case TYPE_FPMULDBL:
5073 case TYPE_FPDIVSGL:
5074 case TYPE_FPDIVDBL:
5075 case TYPE_FPSQRTSGL:
5076 case TYPE_FPSQRTDBL:
5077 /* A fpload can't be issued until one cycle before a
5078 preceding arithmetic operation has finished if
5079 the target of the fpload is the destination of the
5080 arithmetic operation.
5081
5082 Exception: For PA7100LC, PA7200 and PA7300, the cost
5083 is 3 cycles, unless they bundle together. We also
5084 pay the penalty if the second insn is a fpload. */
5085 return insn_default_latency (dep_insn) - 1;
5086
5087 default:
5088 return 0;
5089 }
5090 }
5091 }
5092 else if (attr_type == TYPE_FPALU)
5093 {
5094 rtx pat = PATTERN (insn);
5095 rtx dep_pat = PATTERN (dep_insn);
5096 if (GET_CODE (pat) == PARALLEL)
5097 {
5098 /* This happens for the fldXs,mb patterns. */
5099 pat = XVECEXP (pat, 0, 0);
5100 }
5101 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
5102 /* If this happens, we have to extend this to schedule
5103 optimally. Return 0 for now. */
5104 return 0;
5105
5106 if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
5107 {
5108 if (! recog_memoized (dep_insn))
5109 return 0;
5110 switch (get_attr_type (dep_insn))
5111 {
5112 case TYPE_FPDIVSGL:
5113 case TYPE_FPDIVDBL:
5114 case TYPE_FPSQRTSGL:
5115 case TYPE_FPSQRTDBL:
5116 /* An ALU flop can't be issued until two cycles before a
5117 preceding divide or sqrt operation has finished if
5118 the target of the ALU flop is also the target of
5119 the divide or sqrt operation. */
5120 return insn_default_latency (dep_insn) - 2;
5121
5122 default:
5123 return 0;
5124 }
5125 }
5126 }
5127
5128 /* For other output dependencies, the cost is 0. */
5129 return 0;
5130
5131 default:
5132 gcc_unreachable ();
5133 }
5134 }
5135
5136 /* The 700 can only issue a single insn at a time.
5137 The 7XXX processors can issue two insns at a time.
5138 The 8000 can issue 4 insns at a time. */
5139 static int
pa_issue_rate(void)5140 pa_issue_rate (void)
5141 {
5142 switch (pa_cpu)
5143 {
5144 case PROCESSOR_700: return 1;
5145 case PROCESSOR_7100: return 2;
5146 case PROCESSOR_7100LC: return 2;
5147 case PROCESSOR_7200: return 2;
5148 case PROCESSOR_7300: return 2;
5149 case PROCESSOR_8000: return 4;
5150
5151 default:
5152 gcc_unreachable ();
5153 }
5154 }
5155
5156
5157
5158 /* Return any length plus adjustment needed by INSN which already has
5159 its length computed as LENGTH. Return LENGTH if no adjustment is
5160 necessary.
5161
5162 Also compute the length of an inline block move here as it is too
5163 complicated to express as a length attribute in pa.md. */
5164 int
pa_adjust_insn_length(rtx_insn * insn,int length)5165 pa_adjust_insn_length (rtx_insn *insn, int length)
5166 {
5167 rtx pat = PATTERN (insn);
5168
5169 /* If length is negative or undefined, provide initial length. */
5170 if ((unsigned int) length >= INT_MAX)
5171 {
5172 if (GET_CODE (pat) == SEQUENCE)
5173 insn = as_a <rtx_insn *> (XVECEXP (pat, 0, 0));
5174
5175 switch (get_attr_type (insn))
5176 {
5177 case TYPE_MILLI:
5178 length = pa_attr_length_millicode_call (insn);
5179 break;
5180 case TYPE_CALL:
5181 length = pa_attr_length_call (insn, 0);
5182 break;
5183 case TYPE_SIBCALL:
5184 length = pa_attr_length_call (insn, 1);
5185 break;
5186 case TYPE_DYNCALL:
5187 length = pa_attr_length_indirect_call (insn);
5188 break;
5189 case TYPE_SH_FUNC_ADRS:
5190 length = pa_attr_length_millicode_call (insn) + 20;
5191 break;
5192 default:
5193 gcc_unreachable ();
5194 }
5195 }
5196
5197 /* Block move pattern. */
5198 if (NONJUMP_INSN_P (insn)
5199 && GET_CODE (pat) == PARALLEL
5200 && GET_CODE (XVECEXP (pat, 0, 0)) == SET
5201 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM
5202 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 1)) == MEM
5203 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode
5204 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 1)) == BLKmode)
5205 length += compute_cpymem_length (insn) - 4;
5206 /* Block clear pattern. */
5207 else if (NONJUMP_INSN_P (insn)
5208 && GET_CODE (pat) == PARALLEL
5209 && GET_CODE (XVECEXP (pat, 0, 0)) == SET
5210 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM
5211 && XEXP (XVECEXP (pat, 0, 0), 1) == const0_rtx
5212 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode)
5213 length += compute_clrmem_length (insn) - 4;
5214 /* Conditional branch with an unfilled delay slot. */
5215 else if (JUMP_P (insn) && ! simplejump_p (insn))
5216 {
5217 /* Adjust a short backwards conditional with an unfilled delay slot. */
5218 if (GET_CODE (pat) == SET
5219 && length == 4
5220 && JUMP_LABEL (insn) != NULL_RTX
5221 && ! forward_branch_p (insn))
5222 length += 4;
5223 else if (GET_CODE (pat) == PARALLEL
5224 && get_attr_type (insn) == TYPE_PARALLEL_BRANCH
5225 && length == 4)
5226 length += 4;
5227 /* Adjust dbra insn with short backwards conditional branch with
5228 unfilled delay slot -- only for case where counter is in a
5229 general register register. */
5230 else if (GET_CODE (pat) == PARALLEL
5231 && GET_CODE (XVECEXP (pat, 0, 1)) == SET
5232 && GET_CODE (XEXP (XVECEXP (pat, 0, 1), 0)) == REG
5233 && ! FP_REG_P (XEXP (XVECEXP (pat, 0, 1), 0))
5234 && length == 4
5235 && ! forward_branch_p (insn))
5236 length += 4;
5237 }
5238 return length;
5239 }
5240
5241 /* Implement the TARGET_PRINT_OPERAND_PUNCT_VALID_P hook. */
5242
5243 static bool
pa_print_operand_punct_valid_p(unsigned char code)5244 pa_print_operand_punct_valid_p (unsigned char code)
5245 {
5246 if (code == '@'
5247 || code == '#'
5248 || code == '*'
5249 || code == '^')
5250 return true;
5251
5252 return false;
5253 }
5254
5255 /* Print operand X (an rtx) in assembler syntax to file FILE.
5256 CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
5257 For `%' followed by punctuation, CODE is the punctuation and X is null. */
5258
5259 void
pa_print_operand(FILE * file,rtx x,int code)5260 pa_print_operand (FILE *file, rtx x, int code)
5261 {
5262 switch (code)
5263 {
5264 case '#':
5265 /* Output a 'nop' if there's nothing for the delay slot. */
5266 if (dbr_sequence_length () == 0)
5267 fputs ("\n\tnop", file);
5268 return;
5269 case '*':
5270 /* Output a nullification completer if there's nothing for the */
5271 /* delay slot or nullification is requested. */
5272 if (dbr_sequence_length () == 0 ||
5273 (final_sequence &&
5274 INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))))
5275 fputs (",n", file);
5276 return;
5277 case 'R':
5278 /* Print out the second register name of a register pair.
5279 I.e., R (6) => 7. */
5280 fputs (reg_names[REGNO (x) + 1], file);
5281 return;
5282 case 'r':
5283 /* A register or zero. */
5284 if (x == const0_rtx
5285 || (x == CONST0_RTX (DFmode))
5286 || (x == CONST0_RTX (SFmode)))
5287 {
5288 fputs ("%r0", file);
5289 return;
5290 }
5291 else
5292 break;
5293 case 'f':
5294 /* A register or zero (floating point). */
5295 if (x == const0_rtx
5296 || (x == CONST0_RTX (DFmode))
5297 || (x == CONST0_RTX (SFmode)))
5298 {
5299 fputs ("%fr0", file);
5300 return;
5301 }
5302 else
5303 break;
5304 case 'A':
5305 {
5306 rtx xoperands[2];
5307
5308 xoperands[0] = XEXP (XEXP (x, 0), 0);
5309 xoperands[1] = XVECEXP (XEXP (XEXP (x, 0), 1), 0, 0);
5310 pa_output_global_address (file, xoperands[1], 0);
5311 fprintf (file, "(%s)", reg_names [REGNO (xoperands[0])]);
5312 return;
5313 }
5314
5315 case 'C': /* Plain (C)ondition */
5316 case 'X':
5317 switch (GET_CODE (x))
5318 {
5319 case EQ:
5320 fputs ("=", file); break;
5321 case NE:
5322 fputs ("<>", file); break;
5323 case GT:
5324 fputs (">", file); break;
5325 case GE:
5326 fputs (">=", file); break;
5327 case GEU:
5328 fputs (">>=", file); break;
5329 case GTU:
5330 fputs (">>", file); break;
5331 case LT:
5332 fputs ("<", file); break;
5333 case LE:
5334 fputs ("<=", file); break;
5335 case LEU:
5336 fputs ("<<=", file); break;
5337 case LTU:
5338 fputs ("<<", file); break;
5339 default:
5340 gcc_unreachable ();
5341 }
5342 return;
5343 case 'N': /* Condition, (N)egated */
5344 switch (GET_CODE (x))
5345 {
5346 case EQ:
5347 fputs ("<>", file); break;
5348 case NE:
5349 fputs ("=", file); break;
5350 case GT:
5351 fputs ("<=", file); break;
5352 case GE:
5353 fputs ("<", file); break;
5354 case GEU:
5355 fputs ("<<", file); break;
5356 case GTU:
5357 fputs ("<<=", file); break;
5358 case LT:
5359 fputs (">=", file); break;
5360 case LE:
5361 fputs (">", file); break;
5362 case LEU:
5363 fputs (">>", file); break;
5364 case LTU:
5365 fputs (">>=", file); break;
5366 default:
5367 gcc_unreachable ();
5368 }
5369 return;
5370 /* For floating point comparisons. Note that the output
5371 predicates are the complement of the desired mode. The
5372 conditions for GT, GE, LT, LE and LTGT cause an invalid
5373 operation exception if the result is unordered and this
5374 exception is enabled in the floating-point status register. */
5375 case 'Y':
5376 switch (GET_CODE (x))
5377 {
5378 case EQ:
5379 fputs ("!=", file); break;
5380 case NE:
5381 fputs ("=", file); break;
5382 case GT:
5383 fputs ("!>", file); break;
5384 case GE:
5385 fputs ("!>=", file); break;
5386 case LT:
5387 fputs ("!<", file); break;
5388 case LE:
5389 fputs ("!<=", file); break;
5390 case LTGT:
5391 fputs ("!<>", file); break;
5392 case UNLE:
5393 fputs ("!?<=", file); break;
5394 case UNLT:
5395 fputs ("!?<", file); break;
5396 case UNGE:
5397 fputs ("!?>=", file); break;
5398 case UNGT:
5399 fputs ("!?>", file); break;
5400 case UNEQ:
5401 fputs ("!?=", file); break;
5402 case UNORDERED:
5403 fputs ("!?", file); break;
5404 case ORDERED:
5405 fputs ("?", file); break;
5406 default:
5407 gcc_unreachable ();
5408 }
5409 return;
5410 case 'S': /* Condition, operands are (S)wapped. */
5411 switch (GET_CODE (x))
5412 {
5413 case EQ:
5414 fputs ("=", file); break;
5415 case NE:
5416 fputs ("<>", file); break;
5417 case GT:
5418 fputs ("<", file); break;
5419 case GE:
5420 fputs ("<=", file); break;
5421 case GEU:
5422 fputs ("<<=", file); break;
5423 case GTU:
5424 fputs ("<<", file); break;
5425 case LT:
5426 fputs (">", file); break;
5427 case LE:
5428 fputs (">=", file); break;
5429 case LEU:
5430 fputs (">>=", file); break;
5431 case LTU:
5432 fputs (">>", file); break;
5433 default:
5434 gcc_unreachable ();
5435 }
5436 return;
5437 case 'B': /* Condition, (B)oth swapped and negate. */
5438 switch (GET_CODE (x))
5439 {
5440 case EQ:
5441 fputs ("<>", file); break;
5442 case NE:
5443 fputs ("=", file); break;
5444 case GT:
5445 fputs (">=", file); break;
5446 case GE:
5447 fputs (">", file); break;
5448 case GEU:
5449 fputs (">>", file); break;
5450 case GTU:
5451 fputs (">>=", file); break;
5452 case LT:
5453 fputs ("<=", file); break;
5454 case LE:
5455 fputs ("<", file); break;
5456 case LEU:
5457 fputs ("<<", file); break;
5458 case LTU:
5459 fputs ("<<=", file); break;
5460 default:
5461 gcc_unreachable ();
5462 }
5463 return;
5464 case 'k':
5465 gcc_assert (GET_CODE (x) == CONST_INT);
5466 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~INTVAL (x));
5467 return;
5468 case 'Q':
5469 gcc_assert (GET_CODE (x) == CONST_INT);
5470 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - (INTVAL (x) & 63));
5471 return;
5472 case 'L':
5473 gcc_assert (GET_CODE (x) == CONST_INT);
5474 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - (INTVAL (x) & 31));
5475 return;
5476 case 'o':
5477 gcc_assert (GET_CODE (x) == CONST_INT
5478 && (INTVAL (x) == 1 || INTVAL (x) == 2 || INTVAL (x) == 3));
5479 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
5480 return;
5481 case 'O':
5482 gcc_assert (GET_CODE (x) == CONST_INT && exact_log2 (INTVAL (x)) >= 0);
5483 fprintf (file, "%d", exact_log2 (INTVAL (x)));
5484 return;
5485 case 'p':
5486 gcc_assert (GET_CODE (x) == CONST_INT);
5487 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 63 - (INTVAL (x) & 63));
5488 return;
5489 case 'P':
5490 gcc_assert (GET_CODE (x) == CONST_INT);
5491 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 31 - (INTVAL (x) & 31));
5492 return;
5493 case 'I':
5494 if (GET_CODE (x) == CONST_INT)
5495 fputs ("i", file);
5496 return;
5497 case 'M':
5498 case 'F':
5499 switch (GET_CODE (XEXP (x, 0)))
5500 {
5501 case PRE_DEC:
5502 case PRE_INC:
5503 if (ASSEMBLER_DIALECT == 0)
5504 fputs ("s,mb", file);
5505 else
5506 fputs (",mb", file);
5507 break;
5508 case POST_DEC:
5509 case POST_INC:
5510 if (ASSEMBLER_DIALECT == 0)
5511 fputs ("s,ma", file);
5512 else
5513 fputs (",ma", file);
5514 break;
5515 case PLUS:
5516 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
5517 && GET_CODE (XEXP (XEXP (x, 0), 1)) == REG)
5518 {
5519 if (ASSEMBLER_DIALECT == 0)
5520 fputs ("x", file);
5521 }
5522 else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
5523 || GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
5524 {
5525 if (ASSEMBLER_DIALECT == 0)
5526 fputs ("x,s", file);
5527 else
5528 fputs (",s", file);
5529 }
5530 else if (code == 'F' && ASSEMBLER_DIALECT == 0)
5531 fputs ("s", file);
5532 break;
5533 default:
5534 if (code == 'F' && ASSEMBLER_DIALECT == 0)
5535 fputs ("s", file);
5536 break;
5537 }
5538 return;
5539 case 'G':
5540 pa_output_global_address (file, x, 0);
5541 return;
5542 case 'H':
5543 pa_output_global_address (file, x, 1);
5544 return;
5545 case 0: /* Don't do anything special */
5546 break;
5547 case 'Z':
5548 {
5549 unsigned op[3];
5550 compute_zdepwi_operands (INTVAL (x), op);
5551 fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
5552 return;
5553 }
5554 case 'z':
5555 {
5556 unsigned op[3];
5557 compute_zdepdi_operands (INTVAL (x), op);
5558 fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
5559 return;
5560 }
5561 case 'c':
5562 /* We can get here from a .vtable_inherit due to our
5563 CONSTANT_ADDRESS_P rejecting perfectly good constant
5564 addresses. */
5565 break;
5566 default:
5567 gcc_unreachable ();
5568 }
5569 if (GET_CODE (x) == REG)
5570 {
5571 fputs (reg_names [REGNO (x)], file);
5572 if (TARGET_64BIT && FP_REG_P (x) && GET_MODE_SIZE (GET_MODE (x)) <= 4)
5573 {
5574 fputs ("R", file);
5575 return;
5576 }
5577 if (FP_REG_P (x)
5578 && GET_MODE_SIZE (GET_MODE (x)) <= 4
5579 && (REGNO (x) & 1) == 0)
5580 fputs ("L", file);
5581 }
5582 else if (GET_CODE (x) == MEM)
5583 {
5584 int size = GET_MODE_SIZE (GET_MODE (x));
5585 rtx base = NULL_RTX;
5586 switch (GET_CODE (XEXP (x, 0)))
5587 {
5588 case PRE_DEC:
5589 case POST_DEC:
5590 base = XEXP (XEXP (x, 0), 0);
5591 fprintf (file, "-%d(%s)", size, reg_names [REGNO (base)]);
5592 break;
5593 case PRE_INC:
5594 case POST_INC:
5595 base = XEXP (XEXP (x, 0), 0);
5596 fprintf (file, "%d(%s)", size, reg_names [REGNO (base)]);
5597 break;
5598 case PLUS:
5599 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT)
5600 fprintf (file, "%s(%s)",
5601 reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 0), 0))],
5602 reg_names [REGNO (XEXP (XEXP (x, 0), 1))]);
5603 else if (GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
5604 fprintf (file, "%s(%s)",
5605 reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 1), 0))],
5606 reg_names [REGNO (XEXP (XEXP (x, 0), 0))]);
5607 else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
5608 && GET_CODE (XEXP (XEXP (x, 0), 1)) == REG)
5609 {
5610 /* Because the REG_POINTER flag can get lost during reload,
5611 pa_legitimate_address_p canonicalizes the order of the
5612 index and base registers in the combined move patterns. */
5613 rtx base = XEXP (XEXP (x, 0), 1);
5614 rtx index = XEXP (XEXP (x, 0), 0);
5615
5616 fprintf (file, "%s(%s)",
5617 reg_names [REGNO (index)], reg_names [REGNO (base)]);
5618 }
5619 else
5620 output_address (GET_MODE (x), XEXP (x, 0));
5621 break;
5622 default:
5623 output_address (GET_MODE (x), XEXP (x, 0));
5624 break;
5625 }
5626 }
5627 else
5628 output_addr_const (file, x);
5629 }
5630
5631 /* output a SYMBOL_REF or a CONST expression involving a SYMBOL_REF. */
5632
5633 void
pa_output_global_address(FILE * file,rtx x,int round_constant)5634 pa_output_global_address (FILE *file, rtx x, int round_constant)
5635 {
5636
5637 /* Imagine (high (const (plus ...))). */
5638 if (GET_CODE (x) == HIGH)
5639 x = XEXP (x, 0);
5640
5641 if (GET_CODE (x) == SYMBOL_REF && read_only_operand (x, VOIDmode))
5642 output_addr_const (file, x);
5643 else if (GET_CODE (x) == SYMBOL_REF && !flag_pic)
5644 {
5645 output_addr_const (file, x);
5646 fputs ("-$global$", file);
5647 }
5648 else if (GET_CODE (x) == CONST)
5649 {
5650 const char *sep = "";
5651 int offset = 0; /* assembler wants -$global$ at end */
5652 rtx base = NULL_RTX;
5653
5654 switch (GET_CODE (XEXP (XEXP (x, 0), 0)))
5655 {
5656 case LABEL_REF:
5657 case SYMBOL_REF:
5658 base = XEXP (XEXP (x, 0), 0);
5659 output_addr_const (file, base);
5660 break;
5661 case CONST_INT:
5662 offset = INTVAL (XEXP (XEXP (x, 0), 0));
5663 break;
5664 default:
5665 gcc_unreachable ();
5666 }
5667
5668 switch (GET_CODE (XEXP (XEXP (x, 0), 1)))
5669 {
5670 case LABEL_REF:
5671 case SYMBOL_REF:
5672 base = XEXP (XEXP (x, 0), 1);
5673 output_addr_const (file, base);
5674 break;
5675 case CONST_INT:
5676 offset = INTVAL (XEXP (XEXP (x, 0), 1));
5677 break;
5678 default:
5679 gcc_unreachable ();
5680 }
5681
5682 /* How bogus. The compiler is apparently responsible for
5683 rounding the constant if it uses an LR field selector.
5684
5685 The linker and/or assembler seem a better place since
5686 they have to do this kind of thing already.
5687
5688 If we fail to do this, HP's optimizing linker may eliminate
5689 an addil, but not update the ldw/stw/ldo instruction that
5690 uses the result of the addil. */
5691 if (round_constant)
5692 offset = ((offset + 0x1000) & ~0x1fff);
5693
5694 switch (GET_CODE (XEXP (x, 0)))
5695 {
5696 case PLUS:
5697 if (offset < 0)
5698 {
5699 offset = -offset;
5700 sep = "-";
5701 }
5702 else
5703 sep = "+";
5704 break;
5705
5706 case MINUS:
5707 gcc_assert (GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF);
5708 sep = "-";
5709 break;
5710
5711 default:
5712 gcc_unreachable ();
5713 }
5714
5715 if (!read_only_operand (base, VOIDmode) && !flag_pic)
5716 fputs ("-$global$", file);
5717 if (offset)
5718 fprintf (file, "%s%d", sep, offset);
5719 }
5720 else
5721 output_addr_const (file, x);
5722 }
5723
5724 /* Output boilerplate text to appear at the beginning of the file.
5725 There are several possible versions. */
5726 #define aputs(x) fputs(x, asm_out_file)
5727 static inline void
pa_file_start_level(void)5728 pa_file_start_level (void)
5729 {
5730 if (TARGET_64BIT)
5731 aputs ("\t.LEVEL 2.0w\n");
5732 else if (TARGET_PA_20)
5733 aputs ("\t.LEVEL 2.0\n");
5734 else if (TARGET_PA_11)
5735 aputs ("\t.LEVEL 1.1\n");
5736 else
5737 aputs ("\t.LEVEL 1.0\n");
5738 }
5739
5740 static inline void
pa_file_start_space(int sortspace)5741 pa_file_start_space (int sortspace)
5742 {
5743 aputs ("\t.SPACE $PRIVATE$");
5744 if (sortspace)
5745 aputs (",SORT=16");
5746 aputs ("\n\t.SUBSPA $DATA$,QUAD=1,ALIGN=8,ACCESS=31");
5747 if (flag_tm)
5748 aputs ("\n\t.SUBSPA $TM_CLONE_TABLE$,QUAD=1,ALIGN=8,ACCESS=31");
5749 aputs ("\n\t.SUBSPA $BSS$,QUAD=1,ALIGN=8,ACCESS=31,ZERO,SORT=82"
5750 "\n\t.SPACE $TEXT$");
5751 if (sortspace)
5752 aputs (",SORT=8");
5753 aputs ("\n\t.SUBSPA $LIT$,QUAD=0,ALIGN=8,ACCESS=44"
5754 "\n\t.SUBSPA $CODE$,QUAD=0,ALIGN=8,ACCESS=44,CODE_ONLY\n");
5755 }
5756
5757 static inline void
pa_file_start_file(int want_version)5758 pa_file_start_file (int want_version)
5759 {
5760 if (write_symbols != NO_DEBUG)
5761 {
5762 output_file_directive (asm_out_file, main_input_filename);
5763 if (want_version)
5764 aputs ("\t.version\t\"01.01\"\n");
5765 }
5766 }
5767
5768 static inline void
pa_file_start_mcount(const char * aswhat)5769 pa_file_start_mcount (const char *aswhat)
5770 {
5771 if (profile_flag)
5772 fprintf (asm_out_file, "\t.IMPORT _mcount,%s\n", aswhat);
5773 }
5774
5775 static void
pa_elf_file_start(void)5776 pa_elf_file_start (void)
5777 {
5778 pa_file_start_level ();
5779 pa_file_start_mcount ("ENTRY");
5780 pa_file_start_file (0);
5781 }
5782
5783 static void
pa_som_file_start(void)5784 pa_som_file_start (void)
5785 {
5786 pa_file_start_level ();
5787 pa_file_start_space (0);
5788 aputs ("\t.IMPORT $global$,DATA\n"
5789 "\t.IMPORT $$dyncall,MILLICODE\n");
5790 pa_file_start_mcount ("CODE");
5791 pa_file_start_file (0);
5792 }
5793
5794 static void
pa_linux_file_start(void)5795 pa_linux_file_start (void)
5796 {
5797 pa_file_start_file (1);
5798 pa_file_start_level ();
5799 pa_file_start_mcount ("CODE");
5800 }
5801
5802 static void
pa_hpux64_gas_file_start(void)5803 pa_hpux64_gas_file_start (void)
5804 {
5805 pa_file_start_level ();
5806 #ifdef ASM_OUTPUT_TYPE_DIRECTIVE
5807 if (profile_flag)
5808 ASM_OUTPUT_TYPE_DIRECTIVE (asm_out_file, "_mcount", "function");
5809 #endif
5810 pa_file_start_file (1);
5811 }
5812
5813 static void
pa_hpux64_hpas_file_start(void)5814 pa_hpux64_hpas_file_start (void)
5815 {
5816 pa_file_start_level ();
5817 pa_file_start_space (1);
5818 pa_file_start_mcount ("CODE");
5819 pa_file_start_file (0);
5820 }
5821 #undef aputs
5822
5823 /* Search the deferred plabel list for SYMBOL and return its internal
5824 label. If an entry for SYMBOL is not found, a new entry is created. */
5825
5826 rtx
pa_get_deferred_plabel(rtx symbol)5827 pa_get_deferred_plabel (rtx symbol)
5828 {
5829 const char *fname = XSTR (symbol, 0);
5830 size_t i;
5831
5832 /* See if we have already put this function on the list of deferred
5833 plabels. This list is generally small, so a liner search is not
5834 too ugly. If it proves too slow replace it with something faster. */
5835 for (i = 0; i < n_deferred_plabels; i++)
5836 if (strcmp (fname, XSTR (deferred_plabels[i].symbol, 0)) == 0)
5837 break;
5838
5839 /* If the deferred plabel list is empty, or this entry was not found
5840 on the list, create a new entry on the list. */
5841 if (deferred_plabels == NULL || i == n_deferred_plabels)
5842 {
5843 tree id;
5844
5845 if (deferred_plabels == 0)
5846 deferred_plabels = ggc_alloc<deferred_plabel> ();
5847 else
5848 deferred_plabels = GGC_RESIZEVEC (struct deferred_plabel,
5849 deferred_plabels,
5850 n_deferred_plabels + 1);
5851
5852 i = n_deferred_plabels++;
5853 deferred_plabels[i].internal_label = gen_label_rtx ();
5854 deferred_plabels[i].symbol = symbol;
5855
5856 /* Gross. We have just implicitly taken the address of this
5857 function. Mark it in the same manner as assemble_name. */
5858 id = maybe_get_identifier (targetm.strip_name_encoding (fname));
5859 if (id)
5860 mark_referenced (id);
5861 }
5862
5863 return deferred_plabels[i].internal_label;
5864 }
5865
5866 static void
output_deferred_plabels(void)5867 output_deferred_plabels (void)
5868 {
5869 size_t i;
5870
5871 /* If we have some deferred plabels, then we need to switch into the
5872 data or readonly data section, and align it to a 4 byte boundary
5873 before outputting the deferred plabels. */
5874 if (n_deferred_plabels)
5875 {
5876 switch_to_section (flag_pic ? data_section : readonly_data_section);
5877 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
5878 }
5879
5880 /* Now output the deferred plabels. */
5881 for (i = 0; i < n_deferred_plabels; i++)
5882 {
5883 targetm.asm_out.internal_label (asm_out_file, "L",
5884 CODE_LABEL_NUMBER (deferred_plabels[i].internal_label));
5885 assemble_integer (deferred_plabels[i].symbol,
5886 TARGET_64BIT ? 8 : 4, TARGET_64BIT ? 64 : 32, 1);
5887 }
5888 }
5889
5890 /* Initialize optabs to point to emulation routines. */
5891
5892 static void
pa_init_libfuncs(void)5893 pa_init_libfuncs (void)
5894 {
5895 if (HPUX_LONG_DOUBLE_LIBRARY)
5896 {
5897 set_optab_libfunc (add_optab, TFmode, "_U_Qfadd");
5898 set_optab_libfunc (sub_optab, TFmode, "_U_Qfsub");
5899 set_optab_libfunc (smul_optab, TFmode, "_U_Qfmpy");
5900 set_optab_libfunc (sdiv_optab, TFmode, "_U_Qfdiv");
5901 set_optab_libfunc (smin_optab, TFmode, "_U_Qmin");
5902 set_optab_libfunc (smax_optab, TFmode, "_U_Qfmax");
5903 set_optab_libfunc (sqrt_optab, TFmode, "_U_Qfsqrt");
5904 set_optab_libfunc (abs_optab, TFmode, "_U_Qfabs");
5905 set_optab_libfunc (neg_optab, TFmode, "_U_Qfneg");
5906
5907 set_optab_libfunc (eq_optab, TFmode, "_U_Qfeq");
5908 set_optab_libfunc (ne_optab, TFmode, "_U_Qfne");
5909 set_optab_libfunc (gt_optab, TFmode, "_U_Qfgt");
5910 set_optab_libfunc (ge_optab, TFmode, "_U_Qfge");
5911 set_optab_libfunc (lt_optab, TFmode, "_U_Qflt");
5912 set_optab_libfunc (le_optab, TFmode, "_U_Qfle");
5913 set_optab_libfunc (unord_optab, TFmode, "_U_Qfunord");
5914
5915 set_conv_libfunc (sext_optab, TFmode, SFmode, "_U_Qfcnvff_sgl_to_quad");
5916 set_conv_libfunc (sext_optab, TFmode, DFmode, "_U_Qfcnvff_dbl_to_quad");
5917 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_U_Qfcnvff_quad_to_sgl");
5918 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_U_Qfcnvff_quad_to_dbl");
5919
5920 set_conv_libfunc (sfix_optab, SImode, TFmode,
5921 TARGET_64BIT ? "__U_Qfcnvfxt_quad_to_sgl"
5922 : "_U_Qfcnvfxt_quad_to_sgl");
5923 set_conv_libfunc (sfix_optab, DImode, TFmode,
5924 "_U_Qfcnvfxt_quad_to_dbl");
5925 set_conv_libfunc (ufix_optab, SImode, TFmode,
5926 "_U_Qfcnvfxt_quad_to_usgl");
5927 set_conv_libfunc (ufix_optab, DImode, TFmode,
5928 "_U_Qfcnvfxt_quad_to_udbl");
5929
5930 set_conv_libfunc (sfloat_optab, TFmode, SImode,
5931 "_U_Qfcnvxf_sgl_to_quad");
5932 set_conv_libfunc (sfloat_optab, TFmode, DImode,
5933 "_U_Qfcnvxf_dbl_to_quad");
5934 set_conv_libfunc (ufloat_optab, TFmode, SImode,
5935 "_U_Qfcnvxf_usgl_to_quad");
5936 set_conv_libfunc (ufloat_optab, TFmode, DImode,
5937 "_U_Qfcnvxf_udbl_to_quad");
5938 }
5939
5940 if (TARGET_SYNC_LIBCALL)
5941 init_sync_libfuncs (8);
5942 }
5943
5944 /* HP's millicode routines mean something special to the assembler.
5945 Keep track of which ones we have used. */
5946
5947 enum millicodes { remI, remU, divI, divU, mulI, end1000 };
5948 static void import_milli (enum millicodes);
5949 static char imported[(int) end1000];
5950 static const char * const milli_names[] = {"remI", "remU", "divI", "divU", "mulI"};
5951 static const char import_string[] = ".IMPORT $$....,MILLICODE";
5952 #define MILLI_START 10
5953
5954 static void
import_milli(enum millicodes code)5955 import_milli (enum millicodes code)
5956 {
5957 char str[sizeof (import_string)];
5958
5959 if (!imported[(int) code])
5960 {
5961 imported[(int) code] = 1;
5962 strcpy (str, import_string);
5963 memcpy (str + MILLI_START, milli_names[(int) code], 4);
5964 output_asm_insn (str, 0);
5965 }
5966 }
5967
5968 /* The register constraints have put the operands and return value in
5969 the proper registers. */
5970
5971 const char *
pa_output_mul_insn(int unsignedp ATTRIBUTE_UNUSED,rtx_insn * insn)5972 pa_output_mul_insn (int unsignedp ATTRIBUTE_UNUSED, rtx_insn *insn)
5973 {
5974 import_milli (mulI);
5975 return pa_output_millicode_call (insn, gen_rtx_SYMBOL_REF (Pmode, "$$mulI"));
5976 }
5977
5978 /* Emit the rtl for doing a division by a constant. */
5979
5980 /* Do magic division millicodes exist for this value? */
5981 const int pa_magic_milli[]= {0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1};
5982
5983 /* We'll use an array to keep track of the magic millicodes and
5984 whether or not we've used them already. [n][0] is signed, [n][1] is
5985 unsigned. */
5986
5987 static int div_milli[16][2];
5988
5989 int
pa_emit_hpdiv_const(rtx * operands,int unsignedp)5990 pa_emit_hpdiv_const (rtx *operands, int unsignedp)
5991 {
5992 if (GET_CODE (operands[2]) == CONST_INT
5993 && INTVAL (operands[2]) > 0
5994 && INTVAL (operands[2]) < 16
5995 && pa_magic_milli[INTVAL (operands[2])])
5996 {
5997 rtx ret = gen_rtx_REG (SImode, TARGET_64BIT ? 2 : 31);
5998
5999 emit_move_insn (gen_rtx_REG (SImode, 26), operands[1]);
6000 emit
6001 (gen_rtx_PARALLEL
6002 (VOIDmode,
6003 gen_rtvec (6, gen_rtx_SET (gen_rtx_REG (SImode, 29),
6004 gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
6005 SImode,
6006 gen_rtx_REG (SImode, 26),
6007 operands[2])),
6008 gen_rtx_CLOBBER (VOIDmode, operands[4]),
6009 gen_rtx_CLOBBER (VOIDmode, operands[3]),
6010 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 26)),
6011 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 25)),
6012 gen_rtx_CLOBBER (VOIDmode, ret))));
6013 emit_move_insn (operands[0], gen_rtx_REG (SImode, 29));
6014 return 1;
6015 }
6016 return 0;
6017 }
6018
6019 const char *
pa_output_div_insn(rtx * operands,int unsignedp,rtx_insn * insn)6020 pa_output_div_insn (rtx *operands, int unsignedp, rtx_insn *insn)
6021 {
6022 int divisor;
6023
6024 /* If the divisor is a constant, try to use one of the special
6025 opcodes .*/
6026 if (GET_CODE (operands[0]) == CONST_INT)
6027 {
6028 static char buf[100];
6029 divisor = INTVAL (operands[0]);
6030 if (!div_milli[divisor][unsignedp])
6031 {
6032 div_milli[divisor][unsignedp] = 1;
6033 if (unsignedp)
6034 output_asm_insn (".IMPORT $$divU_%0,MILLICODE", operands);
6035 else
6036 output_asm_insn (".IMPORT $$divI_%0,MILLICODE", operands);
6037 }
6038 if (unsignedp)
6039 {
6040 sprintf (buf, "$$divU_" HOST_WIDE_INT_PRINT_DEC,
6041 INTVAL (operands[0]));
6042 return pa_output_millicode_call (insn,
6043 gen_rtx_SYMBOL_REF (SImode, buf));
6044 }
6045 else
6046 {
6047 sprintf (buf, "$$divI_" HOST_WIDE_INT_PRINT_DEC,
6048 INTVAL (operands[0]));
6049 return pa_output_millicode_call (insn,
6050 gen_rtx_SYMBOL_REF (SImode, buf));
6051 }
6052 }
6053 /* Divisor isn't a special constant. */
6054 else
6055 {
6056 if (unsignedp)
6057 {
6058 import_milli (divU);
6059 return pa_output_millicode_call (insn,
6060 gen_rtx_SYMBOL_REF (SImode, "$$divU"));
6061 }
6062 else
6063 {
6064 import_milli (divI);
6065 return pa_output_millicode_call (insn,
6066 gen_rtx_SYMBOL_REF (SImode, "$$divI"));
6067 }
6068 }
6069 }
6070
6071 /* Output a $$rem millicode to do mod. */
6072
6073 const char *
pa_output_mod_insn(int unsignedp,rtx_insn * insn)6074 pa_output_mod_insn (int unsignedp, rtx_insn *insn)
6075 {
6076 if (unsignedp)
6077 {
6078 import_milli (remU);
6079 return pa_output_millicode_call (insn,
6080 gen_rtx_SYMBOL_REF (SImode, "$$remU"));
6081 }
6082 else
6083 {
6084 import_milli (remI);
6085 return pa_output_millicode_call (insn,
6086 gen_rtx_SYMBOL_REF (SImode, "$$remI"));
6087 }
6088 }
6089
6090 void
pa_output_arg_descriptor(rtx_insn * call_insn)6091 pa_output_arg_descriptor (rtx_insn *call_insn)
6092 {
6093 const char *arg_regs[4];
6094 machine_mode arg_mode;
6095 rtx link;
6096 int i, output_flag = 0;
6097 int regno;
6098
6099 /* We neither need nor want argument location descriptors for the
6100 64bit runtime environment or the ELF32 environment. */
6101 if (TARGET_64BIT || TARGET_ELF32)
6102 return;
6103
6104 for (i = 0; i < 4; i++)
6105 arg_regs[i] = 0;
6106
6107 /* Specify explicitly that no argument relocations should take place
6108 if using the portable runtime calling conventions. */
6109 if (TARGET_PORTABLE_RUNTIME)
6110 {
6111 fputs ("\t.CALL ARGW0=NO,ARGW1=NO,ARGW2=NO,ARGW3=NO,RETVAL=NO\n",
6112 asm_out_file);
6113 return;
6114 }
6115
6116 gcc_assert (CALL_P (call_insn));
6117 for (link = CALL_INSN_FUNCTION_USAGE (call_insn);
6118 link; link = XEXP (link, 1))
6119 {
6120 rtx use = XEXP (link, 0);
6121
6122 if (! (GET_CODE (use) == USE
6123 && GET_CODE (XEXP (use, 0)) == REG
6124 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
6125 continue;
6126
6127 arg_mode = GET_MODE (XEXP (use, 0));
6128 regno = REGNO (XEXP (use, 0));
6129 if (regno >= 23 && regno <= 26)
6130 {
6131 arg_regs[26 - regno] = "GR";
6132 if (arg_mode == DImode)
6133 arg_regs[25 - regno] = "GR";
6134 }
6135 else if (regno >= 32 && regno <= 39)
6136 {
6137 if (arg_mode == SFmode)
6138 arg_regs[(regno - 32) / 2] = "FR";
6139 else
6140 {
6141 #ifndef HP_FP_ARG_DESCRIPTOR_REVERSED
6142 arg_regs[(regno - 34) / 2] = "FR";
6143 arg_regs[(regno - 34) / 2 + 1] = "FU";
6144 #else
6145 arg_regs[(regno - 34) / 2] = "FU";
6146 arg_regs[(regno - 34) / 2 + 1] = "FR";
6147 #endif
6148 }
6149 }
6150 }
6151 fputs ("\t.CALL ", asm_out_file);
6152 for (i = 0; i < 4; i++)
6153 {
6154 if (arg_regs[i])
6155 {
6156 if (output_flag++)
6157 fputc (',', asm_out_file);
6158 fprintf (asm_out_file, "ARGW%d=%s", i, arg_regs[i]);
6159 }
6160 }
6161 fputc ('\n', asm_out_file);
6162 }
6163
6164 /* Inform reload about cases where moving X with a mode MODE to or from
6165 a register in RCLASS requires an extra scratch or immediate register.
6166 Return the class needed for the immediate register. */
6167
6168 static reg_class_t
pa_secondary_reload(bool in_p,rtx x,reg_class_t rclass_i,machine_mode mode,secondary_reload_info * sri)6169 pa_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
6170 machine_mode mode, secondary_reload_info *sri)
6171 {
6172 int regno;
6173 enum reg_class rclass = (enum reg_class) rclass_i;
6174
6175 /* Handle the easy stuff first. */
6176 if (rclass == R1_REGS)
6177 return NO_REGS;
6178
6179 if (REG_P (x))
6180 {
6181 regno = REGNO (x);
6182 if (rclass == BASE_REG_CLASS && regno < FIRST_PSEUDO_REGISTER)
6183 return NO_REGS;
6184 }
6185 else
6186 regno = -1;
6187
6188 /* If we have something like (mem (mem (...)), we can safely assume the
6189 inner MEM will end up in a general register after reloading, so there's
6190 no need for a secondary reload. */
6191 if (GET_CODE (x) == MEM && GET_CODE (XEXP (x, 0)) == MEM)
6192 return NO_REGS;
6193
6194 /* Trying to load a constant into a FP register during PIC code
6195 generation requires %r1 as a scratch register. For float modes,
6196 the only legitimate constant is CONST0_RTX. However, there are
6197 a few patterns that accept constant double operands. */
6198 if (flag_pic
6199 && FP_REG_CLASS_P (rclass)
6200 && (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE))
6201 {
6202 switch (mode)
6203 {
6204 case E_SImode:
6205 sri->icode = CODE_FOR_reload_insi_r1;
6206 break;
6207
6208 case E_DImode:
6209 sri->icode = CODE_FOR_reload_indi_r1;
6210 break;
6211
6212 case E_SFmode:
6213 sri->icode = CODE_FOR_reload_insf_r1;
6214 break;
6215
6216 case E_DFmode:
6217 sri->icode = CODE_FOR_reload_indf_r1;
6218 break;
6219
6220 default:
6221 gcc_unreachable ();
6222 }
6223 return NO_REGS;
6224 }
6225
6226 /* Secondary reloads of symbolic expressions require %r1 as a scratch
6227 register when we're generating PIC code or when the operand isn't
6228 readonly. */
6229 if (pa_symbolic_expression_p (x))
6230 {
6231 if (GET_CODE (x) == HIGH)
6232 x = XEXP (x, 0);
6233
6234 if (flag_pic || !read_only_operand (x, VOIDmode))
6235 {
6236 switch (mode)
6237 {
6238 case E_SImode:
6239 sri->icode = CODE_FOR_reload_insi_r1;
6240 break;
6241
6242 case E_DImode:
6243 sri->icode = CODE_FOR_reload_indi_r1;
6244 break;
6245
6246 default:
6247 gcc_unreachable ();
6248 }
6249 return NO_REGS;
6250 }
6251 }
6252
6253 /* Profiling showed the PA port spends about 1.3% of its compilation
6254 time in true_regnum from calls inside pa_secondary_reload_class. */
6255 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
6256 regno = true_regnum (x);
6257
6258 /* Handle reloads for floating point loads and stores. */
6259 if ((regno >= FIRST_PSEUDO_REGISTER || regno == -1)
6260 && FP_REG_CLASS_P (rclass))
6261 {
6262 if (MEM_P (x))
6263 {
6264 x = XEXP (x, 0);
6265
6266 /* We don't need a secondary reload for indexed memory addresses.
6267
6268 When INT14_OK_STRICT is true, it might appear that we could
6269 directly allow register indirect memory addresses. However,
6270 this doesn't work because we don't support SUBREGs in
6271 floating-point register copies and reload doesn't tell us
6272 when it's going to use a SUBREG. */
6273 if (IS_INDEX_ADDR_P (x))
6274 return NO_REGS;
6275 }
6276
6277 /* Request a secondary reload with a general scratch register
6278 for everything else. ??? Could symbolic operands be handled
6279 directly when generating non-pic PA 2.0 code? */
6280 sri->icode = (in_p
6281 ? direct_optab_handler (reload_in_optab, mode)
6282 : direct_optab_handler (reload_out_optab, mode));
6283 return NO_REGS;
6284 }
6285
6286 /* A SAR<->FP register copy requires an intermediate general register
6287 and secondary memory. We need a secondary reload with a general
6288 scratch register for spills. */
6289 if (rclass == SHIFT_REGS)
6290 {
6291 /* Handle spill. */
6292 if (regno >= FIRST_PSEUDO_REGISTER || regno < 0)
6293 {
6294 sri->icode = (in_p
6295 ? direct_optab_handler (reload_in_optab, mode)
6296 : direct_optab_handler (reload_out_optab, mode));
6297 return NO_REGS;
6298 }
6299
6300 /* Handle FP copy. */
6301 if (FP_REG_CLASS_P (REGNO_REG_CLASS (regno)))
6302 return GENERAL_REGS;
6303 }
6304
6305 if (regno >= 0 && regno < FIRST_PSEUDO_REGISTER
6306 && REGNO_REG_CLASS (regno) == SHIFT_REGS
6307 && FP_REG_CLASS_P (rclass))
6308 return GENERAL_REGS;
6309
6310 return NO_REGS;
6311 }
6312
6313 /* Implement TARGET_SECONDARY_MEMORY_NEEDED. */
6314
6315 static bool
pa_secondary_memory_needed(machine_mode mode ATTRIBUTE_UNUSED,reg_class_t class1 ATTRIBUTE_UNUSED,reg_class_t class2 ATTRIBUTE_UNUSED)6316 pa_secondary_memory_needed (machine_mode mode ATTRIBUTE_UNUSED,
6317 reg_class_t class1 ATTRIBUTE_UNUSED,
6318 reg_class_t class2 ATTRIBUTE_UNUSED)
6319 {
6320 #ifdef PA_SECONDARY_MEMORY_NEEDED
6321 return PA_SECONDARY_MEMORY_NEEDED (mode, class1, class2);
6322 #else
6323 return false;
6324 #endif
6325 }
6326
6327 /* Implement TARGET_EXTRA_LIVE_ON_ENTRY. The argument pointer
6328 is only marked as live on entry by df-scan when it is a fixed
6329 register. It isn't a fixed register in the 64-bit runtime,
6330 so we need to mark it here. */
6331
6332 static void
pa_extra_live_on_entry(bitmap regs)6333 pa_extra_live_on_entry (bitmap regs)
6334 {
6335 if (TARGET_64BIT)
6336 bitmap_set_bit (regs, ARG_POINTER_REGNUM);
6337 }
6338
6339 /* Implement EH_RETURN_HANDLER_RTX. The MEM needs to be volatile
6340 to prevent it from being deleted. */
6341
6342 rtx
pa_eh_return_handler_rtx(void)6343 pa_eh_return_handler_rtx (void)
6344 {
6345 rtx tmp;
6346
6347 tmp = gen_rtx_PLUS (word_mode, hard_frame_pointer_rtx,
6348 TARGET_64BIT ? GEN_INT (-16) : GEN_INT (-20));
6349 tmp = gen_rtx_MEM (word_mode, tmp);
6350 tmp->volatil = 1;
6351 return tmp;
6352 }
6353
6354 /* In the 32-bit runtime, arguments larger than eight bytes are passed
6355 by invisible reference. As a GCC extension, we also pass anything
6356 with a zero or variable size by reference.
6357
6358 The 64-bit runtime does not describe passing any types by invisible
6359 reference. The internals of GCC can't currently handle passing
6360 empty structures, and zero or variable length arrays when they are
6361 not passed entirely on the stack or by reference. Thus, as a GCC
6362 extension, we pass these types by reference. The HP compiler doesn't
6363 support these types, so hopefully there shouldn't be any compatibility
6364 issues. This may have to be revisited when HP releases a C99 compiler
6365 or updates the ABI. */
6366
6367 static bool
pa_pass_by_reference(cumulative_args_t,const function_arg_info & arg)6368 pa_pass_by_reference (cumulative_args_t, const function_arg_info &arg)
6369 {
6370 HOST_WIDE_INT size = arg.type_size_in_bytes ();
6371 if (TARGET_64BIT)
6372 return size <= 0;
6373 else
6374 return size <= 0 || size > 8;
6375 }
6376
6377 /* Implement TARGET_FUNCTION_ARG_PADDING. */
6378
6379 static pad_direction
pa_function_arg_padding(machine_mode mode,const_tree type)6380 pa_function_arg_padding (machine_mode mode, const_tree type)
6381 {
6382 if (mode == BLKmode
6383 || (TARGET_64BIT
6384 && type
6385 && (AGGREGATE_TYPE_P (type)
6386 || TREE_CODE (type) == COMPLEX_TYPE
6387 || TREE_CODE (type) == VECTOR_TYPE)))
6388 {
6389 /* Return PAD_NONE if justification is not required. */
6390 if (type
6391 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
6392 && (int_size_in_bytes (type) * BITS_PER_UNIT) % PARM_BOUNDARY == 0)
6393 return PAD_NONE;
6394
6395 /* The directions set here are ignored when a BLKmode argument larger
6396 than a word is placed in a register. Different code is used for
6397 the stack and registers. This makes it difficult to have a
6398 consistent data representation for both the stack and registers.
6399 For both runtimes, the justification and padding for arguments on
6400 the stack and in registers should be identical. */
6401 if (TARGET_64BIT)
6402 /* The 64-bit runtime specifies left justification for aggregates. */
6403 return PAD_UPWARD;
6404 else
6405 /* The 32-bit runtime architecture specifies right justification.
6406 When the argument is passed on the stack, the argument is padded
6407 with garbage on the left. The HP compiler pads with zeros. */
6408 return PAD_DOWNWARD;
6409 }
6410
6411 if (GET_MODE_BITSIZE (mode) < PARM_BOUNDARY)
6412 return PAD_DOWNWARD;
6413 else
6414 return PAD_NONE;
6415 }
6416
6417
6418 /* Do what is necessary for `va_start'. We look at the current function
6419 to determine if stdargs or varargs is used and fill in an initial
6420 va_list. A pointer to this constructor is returned. */
6421
6422 static rtx
hppa_builtin_saveregs(void)6423 hppa_builtin_saveregs (void)
6424 {
6425 rtx offset, dest;
6426 tree fntype = TREE_TYPE (current_function_decl);
6427 int argadj = ((!stdarg_p (fntype))
6428 ? UNITS_PER_WORD : 0);
6429
6430 if (argadj)
6431 offset = plus_constant (Pmode, crtl->args.arg_offset_rtx, argadj);
6432 else
6433 offset = crtl->args.arg_offset_rtx;
6434
6435 if (TARGET_64BIT)
6436 {
6437 int i, off;
6438
6439 /* Adjust for varargs/stdarg differences. */
6440 if (argadj)
6441 offset = plus_constant (Pmode, crtl->args.arg_offset_rtx, -argadj);
6442 else
6443 offset = crtl->args.arg_offset_rtx;
6444
6445 /* We need to save %r26 .. %r19 inclusive starting at offset -64
6446 from the incoming arg pointer and growing to larger addresses. */
6447 for (i = 26, off = -64; i >= 19; i--, off += 8)
6448 emit_move_insn (gen_rtx_MEM (word_mode,
6449 plus_constant (Pmode,
6450 arg_pointer_rtx, off)),
6451 gen_rtx_REG (word_mode, i));
6452
6453 /* The incoming args pointer points just beyond the flushback area;
6454 normally this is not a serious concern. However, when we are doing
6455 varargs/stdargs we want to make the arg pointer point to the start
6456 of the incoming argument area. */
6457 emit_move_insn (virtual_incoming_args_rtx,
6458 plus_constant (Pmode, arg_pointer_rtx, -64));
6459
6460 /* Now return a pointer to the first anonymous argument. */
6461 return copy_to_reg (expand_binop (Pmode, add_optab,
6462 virtual_incoming_args_rtx,
6463 offset, 0, 0, OPTAB_LIB_WIDEN));
6464 }
6465
6466 /* Store general registers on the stack. */
6467 dest = gen_rtx_MEM (BLKmode,
6468 plus_constant (Pmode, crtl->args.internal_arg_pointer,
6469 -16));
6470 set_mem_alias_set (dest, get_varargs_alias_set ());
6471 set_mem_align (dest, BITS_PER_WORD);
6472 move_block_from_reg (23, dest, 4);
6473
6474 /* move_block_from_reg will emit code to store the argument registers
6475 individually as scalar stores.
6476
6477 However, other insns may later load from the same addresses for
6478 a structure load (passing a struct to a varargs routine).
6479
6480 The alias code assumes that such aliasing can never happen, so we
6481 have to keep memory referencing insns from moving up beyond the
6482 last argument register store. So we emit a blockage insn here. */
6483 emit_insn (gen_blockage ());
6484
6485 return copy_to_reg (expand_binop (Pmode, add_optab,
6486 crtl->args.internal_arg_pointer,
6487 offset, 0, 0, OPTAB_LIB_WIDEN));
6488 }
6489
6490 static void
hppa_va_start(tree valist,rtx nextarg)6491 hppa_va_start (tree valist, rtx nextarg)
6492 {
6493 nextarg = expand_builtin_saveregs ();
6494 std_expand_builtin_va_start (valist, nextarg);
6495 }
6496
6497 static tree
hppa_gimplify_va_arg_expr(tree valist,tree type,gimple_seq * pre_p,gimple_seq * post_p)6498 hppa_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
6499 gimple_seq *post_p)
6500 {
6501 if (TARGET_64BIT)
6502 {
6503 /* Args grow upward. We can use the generic routines. */
6504 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
6505 }
6506 else /* !TARGET_64BIT */
6507 {
6508 tree ptr = build_pointer_type (type);
6509 tree valist_type;
6510 tree t, u;
6511 unsigned int size, ofs;
6512 bool indirect;
6513
6514 indirect = pass_va_arg_by_reference (type);
6515 if (indirect)
6516 {
6517 type = ptr;
6518 ptr = build_pointer_type (type);
6519 }
6520 size = int_size_in_bytes (type);
6521 valist_type = TREE_TYPE (valist);
6522
6523 /* Args grow down. Not handled by generic routines. */
6524
6525 u = fold_convert (sizetype, size_in_bytes (type));
6526 u = fold_build1 (NEGATE_EXPR, sizetype, u);
6527 t = fold_build_pointer_plus (valist, u);
6528
6529 /* Align to 4 or 8 byte boundary depending on argument size. */
6530
6531 u = build_int_cst (TREE_TYPE (t), (HOST_WIDE_INT)(size > 4 ? -8 : -4));
6532 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t, u);
6533 t = fold_convert (valist_type, t);
6534
6535 t = build2 (MODIFY_EXPR, valist_type, valist, t);
6536
6537 ofs = (8 - size) % 4;
6538 if (ofs != 0)
6539 t = fold_build_pointer_plus_hwi (t, ofs);
6540
6541 t = fold_convert (ptr, t);
6542 t = build_va_arg_indirect_ref (t);
6543
6544 if (indirect)
6545 t = build_va_arg_indirect_ref (t);
6546
6547 return t;
6548 }
6549 }
6550
6551 /* True if MODE is valid for the target. By "valid", we mean able to
6552 be manipulated in non-trivial ways. In particular, this means all
6553 the arithmetic is supported. */
6554
6555 static bool
pa_scalar_mode_supported_p(scalar_mode mode)6556 pa_scalar_mode_supported_p (scalar_mode mode)
6557 {
6558 int precision = GET_MODE_PRECISION (mode);
6559
6560 if (TARGET_64BIT && mode == TImode)
6561 return true;
6562
6563 switch (GET_MODE_CLASS (mode))
6564 {
6565 case MODE_PARTIAL_INT:
6566 case MODE_INT:
6567 if (precision == CHAR_TYPE_SIZE)
6568 return true;
6569 if (precision == SHORT_TYPE_SIZE)
6570 return true;
6571 if (precision == INT_TYPE_SIZE)
6572 return true;
6573 if (precision == LONG_TYPE_SIZE)
6574 return true;
6575 if (precision == LONG_LONG_TYPE_SIZE)
6576 return true;
6577 return false;
6578
6579 case MODE_FLOAT:
6580 if (precision == FLOAT_TYPE_SIZE)
6581 return true;
6582 if (precision == DOUBLE_TYPE_SIZE)
6583 return true;
6584 if (precision == LONG_DOUBLE_TYPE_SIZE)
6585 return true;
6586 return false;
6587
6588 case MODE_DECIMAL_FLOAT:
6589 return false;
6590
6591 default:
6592 gcc_unreachable ();
6593 }
6594 }
6595
6596 /* Return TRUE if INSN, a jump insn, has an unfilled delay slot and
6597 it branches into the delay slot. Otherwise, return FALSE. */
6598
6599 static bool
branch_to_delay_slot_p(rtx_insn * insn)6600 branch_to_delay_slot_p (rtx_insn *insn)
6601 {
6602 rtx_insn *jump_insn;
6603
6604 if (dbr_sequence_length ())
6605 return FALSE;
6606
6607 jump_insn = next_active_insn (JUMP_LABEL_AS_INSN (insn));
6608 while (insn)
6609 {
6610 insn = next_active_insn (insn);
6611 if (jump_insn == insn)
6612 return TRUE;
6613
6614 /* We can't rely on the length of asms. So, we return FALSE when
6615 the branch is followed by an asm. */
6616 if (!insn
6617 || GET_CODE (PATTERN (insn)) == ASM_INPUT
6618 || asm_noperands (PATTERN (insn)) >= 0
6619 || get_attr_length (insn) > 0)
6620 break;
6621 }
6622
6623 return FALSE;
6624 }
6625
6626 /* Return TRUE if INSN, a forward jump insn, needs a nop in its delay slot.
6627
6628 This occurs when INSN has an unfilled delay slot and is followed
6629 by an asm. Disaster can occur if the asm is empty and the jump
6630 branches into the delay slot. So, we add a nop in the delay slot
6631 when this occurs. */
6632
6633 static bool
branch_needs_nop_p(rtx_insn * insn)6634 branch_needs_nop_p (rtx_insn *insn)
6635 {
6636 rtx_insn *jump_insn;
6637
6638 if (dbr_sequence_length ())
6639 return FALSE;
6640
6641 jump_insn = next_active_insn (JUMP_LABEL_AS_INSN (insn));
6642 while (insn)
6643 {
6644 insn = next_active_insn (insn);
6645 if (!insn || jump_insn == insn)
6646 return TRUE;
6647
6648 if (!(GET_CODE (PATTERN (insn)) == ASM_INPUT
6649 || asm_noperands (PATTERN (insn)) >= 0)
6650 && get_attr_length (insn) > 0)
6651 break;
6652 }
6653
6654 return FALSE;
6655 }
6656
6657 /* Return TRUE if INSN, a forward jump insn, can use nullification
6658 to skip the following instruction. This avoids an extra cycle due
6659 to a mis-predicted branch when we fall through. */
6660
6661 static bool
use_skip_p(rtx_insn * insn)6662 use_skip_p (rtx_insn *insn)
6663 {
6664 rtx_insn *jump_insn = next_active_insn (JUMP_LABEL_AS_INSN (insn));
6665
6666 while (insn)
6667 {
6668 insn = next_active_insn (insn);
6669
6670 /* We can't rely on the length of asms, so we can't skip asms. */
6671 if (!insn
6672 || GET_CODE (PATTERN (insn)) == ASM_INPUT
6673 || asm_noperands (PATTERN (insn)) >= 0)
6674 break;
6675 if (get_attr_length (insn) == 4
6676 && jump_insn == next_active_insn (insn))
6677 return TRUE;
6678 if (get_attr_length (insn) > 0)
6679 break;
6680 }
6681
6682 return FALSE;
6683 }
6684
6685 /* This routine handles all the normal conditional branch sequences we
6686 might need to generate. It handles compare immediate vs compare
6687 register, nullification of delay slots, varying length branches,
6688 negated branches, and all combinations of the above. It returns the
6689 output appropriate to emit the branch corresponding to all given
6690 parameters. */
6691
6692 const char *
pa_output_cbranch(rtx * operands,int negated,rtx_insn * insn)6693 pa_output_cbranch (rtx *operands, int negated, rtx_insn *insn)
6694 {
6695 static char buf[100];
6696 bool useskip;
6697 int nullify = INSN_ANNULLED_BRANCH_P (insn);
6698 int length = get_attr_length (insn);
6699 int xdelay;
6700
6701 /* A conditional branch to the following instruction (e.g. the delay slot)
6702 is asking for a disaster. This can happen when not optimizing and
6703 when jump optimization fails.
6704
6705 While it is usually safe to emit nothing, this can fail if the
6706 preceding instruction is a nullified branch with an empty delay
6707 slot and the same branch target as this branch. We could check
6708 for this but jump optimization should eliminate nop jumps. It
6709 is always safe to emit a nop. */
6710 if (branch_to_delay_slot_p (insn))
6711 return "nop";
6712
6713 /* The doubleword form of the cmpib instruction doesn't have the LEU
6714 and GTU conditions while the cmpb instruction does. Since we accept
6715 zero for cmpb, we must ensure that we use cmpb for the comparison. */
6716 if (GET_MODE (operands[1]) == DImode && operands[2] == const0_rtx)
6717 operands[2] = gen_rtx_REG (DImode, 0);
6718 if (GET_MODE (operands[2]) == DImode && operands[1] == const0_rtx)
6719 operands[1] = gen_rtx_REG (DImode, 0);
6720
6721 /* If this is a long branch with its delay slot unfilled, set `nullify'
6722 as it can nullify the delay slot and save a nop. */
6723 if (length == 8 && dbr_sequence_length () == 0)
6724 nullify = 1;
6725
6726 /* If this is a short forward conditional branch which did not get
6727 its delay slot filled, the delay slot can still be nullified. */
6728 if (! nullify && length == 4 && dbr_sequence_length () == 0)
6729 nullify = forward_branch_p (insn);
6730
6731 /* A forward branch over a single nullified insn can be done with a
6732 comclr instruction. This avoids a single cycle penalty due to
6733 mis-predicted branch if we fall through (branch not taken). */
6734 useskip = (length == 4 && nullify) ? use_skip_p (insn) : FALSE;
6735
6736 switch (length)
6737 {
6738 /* All short conditional branches except backwards with an unfilled
6739 delay slot. */
6740 case 4:
6741 if (useskip)
6742 strcpy (buf, "{com%I2clr,|cmp%I2clr,}");
6743 else
6744 strcpy (buf, "{com%I2b,|cmp%I2b,}");
6745 if (GET_MODE (operands[1]) == DImode)
6746 strcat (buf, "*");
6747 if (negated)
6748 strcat (buf, "%B3");
6749 else
6750 strcat (buf, "%S3");
6751 if (useskip)
6752 strcat (buf, " %2,%r1,%%r0");
6753 else if (nullify)
6754 {
6755 if (branch_needs_nop_p (insn))
6756 strcat (buf, ",n %2,%r1,%0%#");
6757 else
6758 strcat (buf, ",n %2,%r1,%0");
6759 }
6760 else
6761 strcat (buf, " %2,%r1,%0");
6762 break;
6763
6764 /* All long conditionals. Note a short backward branch with an
6765 unfilled delay slot is treated just like a long backward branch
6766 with an unfilled delay slot. */
6767 case 8:
6768 /* Handle weird backwards branch with a filled delay slot
6769 which is nullified. */
6770 if (dbr_sequence_length () != 0
6771 && ! forward_branch_p (insn)
6772 && nullify)
6773 {
6774 strcpy (buf, "{com%I2b,|cmp%I2b,}");
6775 if (GET_MODE (operands[1]) == DImode)
6776 strcat (buf, "*");
6777 if (negated)
6778 strcat (buf, "%S3");
6779 else
6780 strcat (buf, "%B3");
6781 strcat (buf, ",n %2,%r1,.+12\n\tb %0");
6782 }
6783 /* Handle short backwards branch with an unfilled delay slot.
6784 Using a comb;nop rather than comiclr;bl saves 1 cycle for both
6785 taken and untaken branches. */
6786 else if (dbr_sequence_length () == 0
6787 && ! forward_branch_p (insn)
6788 && INSN_ADDRESSES_SET_P ()
6789 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6790 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6791 {
6792 strcpy (buf, "{com%I2b,|cmp%I2b,}");
6793 if (GET_MODE (operands[1]) == DImode)
6794 strcat (buf, "*");
6795 if (negated)
6796 strcat (buf, "%B3 %2,%r1,%0%#");
6797 else
6798 strcat (buf, "%S3 %2,%r1,%0%#");
6799 }
6800 else
6801 {
6802 strcpy (buf, "{com%I2clr,|cmp%I2clr,}");
6803 if (GET_MODE (operands[1]) == DImode)
6804 strcat (buf, "*");
6805 if (negated)
6806 strcat (buf, "%S3");
6807 else
6808 strcat (buf, "%B3");
6809 if (nullify)
6810 strcat (buf, " %2,%r1,%%r0\n\tb,n %0");
6811 else
6812 strcat (buf, " %2,%r1,%%r0\n\tb %0");
6813 }
6814 break;
6815
6816 default:
6817 /* The reversed conditional branch must branch over one additional
6818 instruction if the delay slot is filled and needs to be extracted
6819 by pa_output_lbranch. If the delay slot is empty or this is a
6820 nullified forward branch, the instruction after the reversed
6821 condition branch must be nullified. */
6822 if (dbr_sequence_length () == 0
6823 || (nullify && forward_branch_p (insn)))
6824 {
6825 nullify = 1;
6826 xdelay = 0;
6827 operands[4] = GEN_INT (length);
6828 }
6829 else
6830 {
6831 xdelay = 1;
6832 operands[4] = GEN_INT (length + 4);
6833 }
6834
6835 /* Create a reversed conditional branch which branches around
6836 the following insns. */
6837 if (GET_MODE (operands[1]) != DImode)
6838 {
6839 if (nullify)
6840 {
6841 if (negated)
6842 strcpy (buf,
6843 "{com%I2b,%S3,n %2,%r1,.+%4|cmp%I2b,%S3,n %2,%r1,.+%4}");
6844 else
6845 strcpy (buf,
6846 "{com%I2b,%B3,n %2,%r1,.+%4|cmp%I2b,%B3,n %2,%r1,.+%4}");
6847 }
6848 else
6849 {
6850 if (negated)
6851 strcpy (buf,
6852 "{com%I2b,%S3 %2,%r1,.+%4|cmp%I2b,%S3 %2,%r1,.+%4}");
6853 else
6854 strcpy (buf,
6855 "{com%I2b,%B3 %2,%r1,.+%4|cmp%I2b,%B3 %2,%r1,.+%4}");
6856 }
6857 }
6858 else
6859 {
6860 if (nullify)
6861 {
6862 if (negated)
6863 strcpy (buf,
6864 "{com%I2b,*%S3,n %2,%r1,.+%4|cmp%I2b,*%S3,n %2,%r1,.+%4}");
6865 else
6866 strcpy (buf,
6867 "{com%I2b,*%B3,n %2,%r1,.+%4|cmp%I2b,*%B3,n %2,%r1,.+%4}");
6868 }
6869 else
6870 {
6871 if (negated)
6872 strcpy (buf,
6873 "{com%I2b,*%S3 %2,%r1,.+%4|cmp%I2b,*%S3 %2,%r1,.+%4}");
6874 else
6875 strcpy (buf,
6876 "{com%I2b,*%B3 %2,%r1,.+%4|cmp%I2b,*%B3 %2,%r1,.+%4}");
6877 }
6878 }
6879
6880 output_asm_insn (buf, operands);
6881 return pa_output_lbranch (operands[0], insn, xdelay);
6882 }
6883 return buf;
6884 }
6885
6886 /* Output a PIC pc-relative instruction sequence to load the address of
6887 OPERANDS[0] to register OPERANDS[2]. OPERANDS[0] is a symbol ref
6888 or a code label. OPERANDS[1] specifies the register to use to load
6889 the program counter. OPERANDS[3] may be used for label generation
6890 The sequence is always three instructions in length. The program
6891 counter recorded for PA 1.X is eight bytes more than that for PA 2.0.
6892 Register %r1 is clobbered. */
6893
6894 static void
pa_output_pic_pcrel_sequence(rtx * operands)6895 pa_output_pic_pcrel_sequence (rtx *operands)
6896 {
6897 gcc_assert (SYMBOL_REF_P (operands[0]) || LABEL_P (operands[0]));
6898 if (TARGET_PA_20)
6899 {
6900 /* We can use mfia to determine the current program counter. */
6901 if (TARGET_SOM || !TARGET_GAS)
6902 {
6903 operands[3] = gen_label_rtx ();
6904 targetm.asm_out.internal_label (asm_out_file, "L",
6905 CODE_LABEL_NUMBER (operands[3]));
6906 output_asm_insn ("mfia %1", operands);
6907 output_asm_insn ("addil L'%0-%l3,%1", operands);
6908 output_asm_insn ("ldo R'%0-%l3(%%r1),%2", operands);
6909 }
6910 else
6911 {
6912 output_asm_insn ("mfia %1", operands);
6913 output_asm_insn ("addil L'%0-$PIC_pcrel$0+12,%1", operands);
6914 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+16(%%r1),%2", operands);
6915 }
6916 }
6917 else
6918 {
6919 /* We need to use a branch to determine the current program counter. */
6920 output_asm_insn ("{bl|b,l} .+8,%1", operands);
6921 if (TARGET_SOM || !TARGET_GAS)
6922 {
6923 operands[3] = gen_label_rtx ();
6924 output_asm_insn ("addil L'%0-%l3,%1", operands);
6925 targetm.asm_out.internal_label (asm_out_file, "L",
6926 CODE_LABEL_NUMBER (operands[3]));
6927 output_asm_insn ("ldo R'%0-%l3(%%r1),%2", operands);
6928 }
6929 else
6930 {
6931 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%1", operands);
6932 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%2", operands);
6933 }
6934 }
6935 }
6936
6937 /* This routine handles output of long unconditional branches that
6938 exceed the maximum range of a simple branch instruction. Since
6939 we don't have a register available for the branch, we save register
6940 %r1 in the frame marker, load the branch destination DEST into %r1,
6941 execute the branch, and restore %r1 in the delay slot of the branch.
6942
6943 Since long branches may have an insn in the delay slot and the
6944 delay slot is used to restore %r1, we in general need to extract
6945 this insn and execute it before the branch. However, to facilitate
6946 use of this function by conditional branches, we also provide an
6947 option to not extract the delay insn so that it will be emitted
6948 after the long branch. So, if there is an insn in the delay slot,
6949 it is extracted if XDELAY is nonzero.
6950
6951 The lengths of the various long-branch sequences are 20, 16 and 24
6952 bytes for the portable runtime, non-PIC and PIC cases, respectively. */
6953
6954 const char *
pa_output_lbranch(rtx dest,rtx_insn * insn,int xdelay)6955 pa_output_lbranch (rtx dest, rtx_insn *insn, int xdelay)
6956 {
6957 rtx xoperands[4];
6958
6959 xoperands[0] = dest;
6960
6961 /* First, free up the delay slot. */
6962 if (xdelay && dbr_sequence_length () != 0)
6963 {
6964 /* We can't handle a jump in the delay slot. */
6965 gcc_assert (! JUMP_P (NEXT_INSN (insn)));
6966
6967 final_scan_insn (NEXT_INSN (insn), asm_out_file,
6968 optimize, 0, NULL);
6969
6970 /* Now delete the delay insn. */
6971 SET_INSN_DELETED (NEXT_INSN (insn));
6972 }
6973
6974 /* Output an insn to save %r1. The runtime documentation doesn't
6975 specify whether the "Clean Up" slot in the callers frame can
6976 be clobbered by the callee. It isn't copied by HP's builtin
6977 alloca, so this suggests that it can be clobbered if necessary.
6978 The "Static Link" location is copied by HP builtin alloca, so
6979 we avoid using it. Using the cleanup slot might be a problem
6980 if we have to interoperate with languages that pass cleanup
6981 information. However, it should be possible to handle these
6982 situations with GCC's asm feature.
6983
6984 The "Current RP" slot is reserved for the called procedure, so
6985 we try to use it when we don't have a frame of our own. It's
6986 rather unlikely that we won't have a frame when we need to emit
6987 a very long branch.
6988
6989 Really the way to go long term is a register scavenger; goto
6990 the target of the jump and find a register which we can use
6991 as a scratch to hold the value in %r1. Then, we wouldn't have
6992 to free up the delay slot or clobber a slot that may be needed
6993 for other purposes. */
6994 if (TARGET_64BIT)
6995 {
6996 if (actual_fsize == 0 && !df_regs_ever_live_p (2))
6997 /* Use the return pointer slot in the frame marker. */
6998 output_asm_insn ("std %%r1,-16(%%r30)", xoperands);
6999 else
7000 /* Use the slot at -40 in the frame marker since HP builtin
7001 alloca doesn't copy it. */
7002 output_asm_insn ("std %%r1,-40(%%r30)", xoperands);
7003 }
7004 else
7005 {
7006 if (actual_fsize == 0 && !df_regs_ever_live_p (2))
7007 /* Use the return pointer slot in the frame marker. */
7008 output_asm_insn ("stw %%r1,-20(%%r30)", xoperands);
7009 else
7010 /* Use the "Clean Up" slot in the frame marker. In GCC,
7011 the only other use of this location is for copying a
7012 floating point double argument from a floating-point
7013 register to two general registers. The copy is done
7014 as an "atomic" operation when outputting a call, so it
7015 won't interfere with our using the location here. */
7016 output_asm_insn ("stw %%r1,-12(%%r30)", xoperands);
7017 }
7018
7019 if (TARGET_PORTABLE_RUNTIME)
7020 {
7021 output_asm_insn ("ldil L'%0,%%r1", xoperands);
7022 output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands);
7023 output_asm_insn ("bv %%r0(%%r1)", xoperands);
7024 }
7025 else if (flag_pic)
7026 {
7027 xoperands[1] = gen_rtx_REG (Pmode, 1);
7028 xoperands[2] = xoperands[1];
7029 pa_output_pic_pcrel_sequence (xoperands);
7030 output_asm_insn ("bv %%r0(%%r1)", xoperands);
7031 }
7032 else
7033 /* Now output a very long branch to the original target. */
7034 output_asm_insn ("ldil L'%l0,%%r1\n\tbe R'%l0(%%sr4,%%r1)", xoperands);
7035
7036 /* Now restore the value of %r1 in the delay slot. */
7037 if (TARGET_64BIT)
7038 {
7039 if (actual_fsize == 0 && !df_regs_ever_live_p (2))
7040 return "ldd -16(%%r30),%%r1";
7041 else
7042 return "ldd -40(%%r30),%%r1";
7043 }
7044 else
7045 {
7046 if (actual_fsize == 0 && !df_regs_ever_live_p (2))
7047 return "ldw -20(%%r30),%%r1";
7048 else
7049 return "ldw -12(%%r30),%%r1";
7050 }
7051 }
7052
7053 /* This routine handles all the branch-on-bit conditional branch sequences we
7054 might need to generate. It handles nullification of delay slots,
7055 varying length branches, negated branches and all combinations of the
7056 above. it returns the appropriate output template to emit the branch. */
7057
7058 const char *
pa_output_bb(rtx * operands ATTRIBUTE_UNUSED,int negated,rtx_insn * insn,int which)7059 pa_output_bb (rtx *operands ATTRIBUTE_UNUSED, int negated, rtx_insn *insn, int which)
7060 {
7061 static char buf[100];
7062 bool useskip;
7063 int nullify = INSN_ANNULLED_BRANCH_P (insn);
7064 int length = get_attr_length (insn);
7065 int xdelay;
7066
7067 /* A conditional branch to the following instruction (e.g. the delay slot) is
7068 asking for a disaster. I do not think this can happen as this pattern
7069 is only used when optimizing; jump optimization should eliminate the
7070 jump. But be prepared just in case. */
7071
7072 if (branch_to_delay_slot_p (insn))
7073 return "nop";
7074
7075 /* If this is a long branch with its delay slot unfilled, set `nullify'
7076 as it can nullify the delay slot and save a nop. */
7077 if (length == 8 && dbr_sequence_length () == 0)
7078 nullify = 1;
7079
7080 /* If this is a short forward conditional branch which did not get
7081 its delay slot filled, the delay slot can still be nullified. */
7082 if (! nullify && length == 4 && dbr_sequence_length () == 0)
7083 nullify = forward_branch_p (insn);
7084
7085 /* A forward branch over a single nullified insn can be done with a
7086 extrs instruction. This avoids a single cycle penalty due to
7087 mis-predicted branch if we fall through (branch not taken). */
7088 useskip = (length == 4 && nullify) ? use_skip_p (insn) : FALSE;
7089
7090 switch (length)
7091 {
7092
7093 /* All short conditional branches except backwards with an unfilled
7094 delay slot. */
7095 case 4:
7096 if (useskip)
7097 strcpy (buf, "{extrs,|extrw,s,}");
7098 else
7099 strcpy (buf, "bb,");
7100 if (useskip && GET_MODE (operands[0]) == DImode)
7101 strcpy (buf, "extrd,s,*");
7102 else if (GET_MODE (operands[0]) == DImode)
7103 strcpy (buf, "bb,*");
7104 if ((which == 0 && negated)
7105 || (which == 1 && ! negated))
7106 strcat (buf, ">=");
7107 else
7108 strcat (buf, "<");
7109 if (useskip)
7110 strcat (buf, " %0,%1,1,%%r0");
7111 else if (nullify && negated)
7112 {
7113 if (branch_needs_nop_p (insn))
7114 strcat (buf, ",n %0,%1,%3%#");
7115 else
7116 strcat (buf, ",n %0,%1,%3");
7117 }
7118 else if (nullify && ! negated)
7119 {
7120 if (branch_needs_nop_p (insn))
7121 strcat (buf, ",n %0,%1,%2%#");
7122 else
7123 strcat (buf, ",n %0,%1,%2");
7124 }
7125 else if (! nullify && negated)
7126 strcat (buf, " %0,%1,%3");
7127 else if (! nullify && ! negated)
7128 strcat (buf, " %0,%1,%2");
7129 break;
7130
7131 /* All long conditionals. Note a short backward branch with an
7132 unfilled delay slot is treated just like a long backward branch
7133 with an unfilled delay slot. */
7134 case 8:
7135 /* Handle weird backwards branch with a filled delay slot
7136 which is nullified. */
7137 if (dbr_sequence_length () != 0
7138 && ! forward_branch_p (insn)
7139 && nullify)
7140 {
7141 strcpy (buf, "bb,");
7142 if (GET_MODE (operands[0]) == DImode)
7143 strcat (buf, "*");
7144 if ((which == 0 && negated)
7145 || (which == 1 && ! negated))
7146 strcat (buf, "<");
7147 else
7148 strcat (buf, ">=");
7149 if (negated)
7150 strcat (buf, ",n %0,%1,.+12\n\tb %3");
7151 else
7152 strcat (buf, ",n %0,%1,.+12\n\tb %2");
7153 }
7154 /* Handle short backwards branch with an unfilled delay slot.
7155 Using a bb;nop rather than extrs;bl saves 1 cycle for both
7156 taken and untaken branches. */
7157 else if (dbr_sequence_length () == 0
7158 && ! forward_branch_p (insn)
7159 && INSN_ADDRESSES_SET_P ()
7160 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
7161 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
7162 {
7163 strcpy (buf, "bb,");
7164 if (GET_MODE (operands[0]) == DImode)
7165 strcat (buf, "*");
7166 if ((which == 0 && negated)
7167 || (which == 1 && ! negated))
7168 strcat (buf, ">=");
7169 else
7170 strcat (buf, "<");
7171 if (negated)
7172 strcat (buf, " %0,%1,%3%#");
7173 else
7174 strcat (buf, " %0,%1,%2%#");
7175 }
7176 else
7177 {
7178 if (GET_MODE (operands[0]) == DImode)
7179 strcpy (buf, "extrd,s,*");
7180 else
7181 strcpy (buf, "{extrs,|extrw,s,}");
7182 if ((which == 0 && negated)
7183 || (which == 1 && ! negated))
7184 strcat (buf, "<");
7185 else
7186 strcat (buf, ">=");
7187 if (nullify && negated)
7188 strcat (buf, " %0,%1,1,%%r0\n\tb,n %3");
7189 else if (nullify && ! negated)
7190 strcat (buf, " %0,%1,1,%%r0\n\tb,n %2");
7191 else if (negated)
7192 strcat (buf, " %0,%1,1,%%r0\n\tb %3");
7193 else
7194 strcat (buf, " %0,%1,1,%%r0\n\tb %2");
7195 }
7196 break;
7197
7198 default:
7199 /* The reversed conditional branch must branch over one additional
7200 instruction if the delay slot is filled and needs to be extracted
7201 by pa_output_lbranch. If the delay slot is empty or this is a
7202 nullified forward branch, the instruction after the reversed
7203 condition branch must be nullified. */
7204 if (dbr_sequence_length () == 0
7205 || (nullify && forward_branch_p (insn)))
7206 {
7207 nullify = 1;
7208 xdelay = 0;
7209 operands[4] = GEN_INT (length);
7210 }
7211 else
7212 {
7213 xdelay = 1;
7214 operands[4] = GEN_INT (length + 4);
7215 }
7216
7217 if (GET_MODE (operands[0]) == DImode)
7218 strcpy (buf, "bb,*");
7219 else
7220 strcpy (buf, "bb,");
7221 if ((which == 0 && negated)
7222 || (which == 1 && !negated))
7223 strcat (buf, "<");
7224 else
7225 strcat (buf, ">=");
7226 if (nullify)
7227 strcat (buf, ",n %0,%1,.+%4");
7228 else
7229 strcat (buf, " %0,%1,.+%4");
7230 output_asm_insn (buf, operands);
7231 return pa_output_lbranch (negated ? operands[3] : operands[2],
7232 insn, xdelay);
7233 }
7234 return buf;
7235 }
7236
7237 /* This routine handles all the branch-on-variable-bit conditional branch
7238 sequences we might need to generate. It handles nullification of delay
7239 slots, varying length branches, negated branches and all combinations
7240 of the above. it returns the appropriate output template to emit the
7241 branch. */
7242
7243 const char *
pa_output_bvb(rtx * operands ATTRIBUTE_UNUSED,int negated,rtx_insn * insn,int which)7244 pa_output_bvb (rtx *operands ATTRIBUTE_UNUSED, int negated, rtx_insn *insn,
7245 int which)
7246 {
7247 static char buf[100];
7248 bool useskip;
7249 int nullify = INSN_ANNULLED_BRANCH_P (insn);
7250 int length = get_attr_length (insn);
7251 int xdelay;
7252
7253 /* A conditional branch to the following instruction (e.g. the delay slot) is
7254 asking for a disaster. I do not think this can happen as this pattern
7255 is only used when optimizing; jump optimization should eliminate the
7256 jump. But be prepared just in case. */
7257
7258 if (branch_to_delay_slot_p (insn))
7259 return "nop";
7260
7261 /* If this is a long branch with its delay slot unfilled, set `nullify'
7262 as it can nullify the delay slot and save a nop. */
7263 if (length == 8 && dbr_sequence_length () == 0)
7264 nullify = 1;
7265
7266 /* If this is a short forward conditional branch which did not get
7267 its delay slot filled, the delay slot can still be nullified. */
7268 if (! nullify && length == 4 && dbr_sequence_length () == 0)
7269 nullify = forward_branch_p (insn);
7270
7271 /* A forward branch over a single nullified insn can be done with a
7272 extrs instruction. This avoids a single cycle penalty due to
7273 mis-predicted branch if we fall through (branch not taken). */
7274 useskip = (length == 4 && nullify) ? use_skip_p (insn) : FALSE;
7275
7276 switch (length)
7277 {
7278
7279 /* All short conditional branches except backwards with an unfilled
7280 delay slot. */
7281 case 4:
7282 if (useskip)
7283 strcpy (buf, "{vextrs,|extrw,s,}");
7284 else
7285 strcpy (buf, "{bvb,|bb,}");
7286 if (useskip && GET_MODE (operands[0]) == DImode)
7287 strcpy (buf, "extrd,s,*");
7288 else if (GET_MODE (operands[0]) == DImode)
7289 strcpy (buf, "bb,*");
7290 if ((which == 0 && negated)
7291 || (which == 1 && ! negated))
7292 strcat (buf, ">=");
7293 else
7294 strcat (buf, "<");
7295 if (useskip)
7296 strcat (buf, "{ %0,1,%%r0| %0,%%sar,1,%%r0}");
7297 else if (nullify && negated)
7298 {
7299 if (branch_needs_nop_p (insn))
7300 strcat (buf, "{,n %0,%3%#|,n %0,%%sar,%3%#}");
7301 else
7302 strcat (buf, "{,n %0,%3|,n %0,%%sar,%3}");
7303 }
7304 else if (nullify && ! negated)
7305 {
7306 if (branch_needs_nop_p (insn))
7307 strcat (buf, "{,n %0,%2%#|,n %0,%%sar,%2%#}");
7308 else
7309 strcat (buf, "{,n %0,%2|,n %0,%%sar,%2}");
7310 }
7311 else if (! nullify && negated)
7312 strcat (buf, "{ %0,%3| %0,%%sar,%3}");
7313 else if (! nullify && ! negated)
7314 strcat (buf, "{ %0,%2| %0,%%sar,%2}");
7315 break;
7316
7317 /* All long conditionals. Note a short backward branch with an
7318 unfilled delay slot is treated just like a long backward branch
7319 with an unfilled delay slot. */
7320 case 8:
7321 /* Handle weird backwards branch with a filled delay slot
7322 which is nullified. */
7323 if (dbr_sequence_length () != 0
7324 && ! forward_branch_p (insn)
7325 && nullify)
7326 {
7327 strcpy (buf, "{bvb,|bb,}");
7328 if (GET_MODE (operands[0]) == DImode)
7329 strcat (buf, "*");
7330 if ((which == 0 && negated)
7331 || (which == 1 && ! negated))
7332 strcat (buf, "<");
7333 else
7334 strcat (buf, ">=");
7335 if (negated)
7336 strcat (buf, "{,n %0,.+12\n\tb %3|,n %0,%%sar,.+12\n\tb %3}");
7337 else
7338 strcat (buf, "{,n %0,.+12\n\tb %2|,n %0,%%sar,.+12\n\tb %2}");
7339 }
7340 /* Handle short backwards branch with an unfilled delay slot.
7341 Using a bb;nop rather than extrs;bl saves 1 cycle for both
7342 taken and untaken branches. */
7343 else if (dbr_sequence_length () == 0
7344 && ! forward_branch_p (insn)
7345 && INSN_ADDRESSES_SET_P ()
7346 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
7347 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
7348 {
7349 strcpy (buf, "{bvb,|bb,}");
7350 if (GET_MODE (operands[0]) == DImode)
7351 strcat (buf, "*");
7352 if ((which == 0 && negated)
7353 || (which == 1 && ! negated))
7354 strcat (buf, ">=");
7355 else
7356 strcat (buf, "<");
7357 if (negated)
7358 strcat (buf, "{ %0,%3%#| %0,%%sar,%3%#}");
7359 else
7360 strcat (buf, "{ %0,%2%#| %0,%%sar,%2%#}");
7361 }
7362 else
7363 {
7364 strcpy (buf, "{vextrs,|extrw,s,}");
7365 if (GET_MODE (operands[0]) == DImode)
7366 strcpy (buf, "extrd,s,*");
7367 if ((which == 0 && negated)
7368 || (which == 1 && ! negated))
7369 strcat (buf, "<");
7370 else
7371 strcat (buf, ">=");
7372 if (nullify && negated)
7373 strcat (buf, "{ %0,1,%%r0\n\tb,n %3| %0,%%sar,1,%%r0\n\tb,n %3}");
7374 else if (nullify && ! negated)
7375 strcat (buf, "{ %0,1,%%r0\n\tb,n %2| %0,%%sar,1,%%r0\n\tb,n %2}");
7376 else if (negated)
7377 strcat (buf, "{ %0,1,%%r0\n\tb %3| %0,%%sar,1,%%r0\n\tb %3}");
7378 else
7379 strcat (buf, "{ %0,1,%%r0\n\tb %2| %0,%%sar,1,%%r0\n\tb %2}");
7380 }
7381 break;
7382
7383 default:
7384 /* The reversed conditional branch must branch over one additional
7385 instruction if the delay slot is filled and needs to be extracted
7386 by pa_output_lbranch. If the delay slot is empty or this is a
7387 nullified forward branch, the instruction after the reversed
7388 condition branch must be nullified. */
7389 if (dbr_sequence_length () == 0
7390 || (nullify && forward_branch_p (insn)))
7391 {
7392 nullify = 1;
7393 xdelay = 0;
7394 operands[4] = GEN_INT (length);
7395 }
7396 else
7397 {
7398 xdelay = 1;
7399 operands[4] = GEN_INT (length + 4);
7400 }
7401
7402 if (GET_MODE (operands[0]) == DImode)
7403 strcpy (buf, "bb,*");
7404 else
7405 strcpy (buf, "{bvb,|bb,}");
7406 if ((which == 0 && negated)
7407 || (which == 1 && !negated))
7408 strcat (buf, "<");
7409 else
7410 strcat (buf, ">=");
7411 if (nullify)
7412 strcat (buf, ",n {%0,.+%4|%0,%%sar,.+%4}");
7413 else
7414 strcat (buf, " {%0,.+%4|%0,%%sar,.+%4}");
7415 output_asm_insn (buf, operands);
7416 return pa_output_lbranch (negated ? operands[3] : operands[2],
7417 insn, xdelay);
7418 }
7419 return buf;
7420 }
7421
7422 /* Return the output template for emitting a dbra type insn.
7423
7424 Note it may perform some output operations on its own before
7425 returning the final output string. */
7426 const char *
pa_output_dbra(rtx * operands,rtx_insn * insn,int which_alternative)7427 pa_output_dbra (rtx *operands, rtx_insn *insn, int which_alternative)
7428 {
7429 int length = get_attr_length (insn);
7430
7431 /* A conditional branch to the following instruction (e.g. the delay slot) is
7432 asking for a disaster. Be prepared! */
7433
7434 if (branch_to_delay_slot_p (insn))
7435 {
7436 if (which_alternative == 0)
7437 return "ldo %1(%0),%0";
7438 else if (which_alternative == 1)
7439 {
7440 output_asm_insn ("{fstws|fstw} %0,-16(%%r30)", operands);
7441 output_asm_insn ("ldw -16(%%r30),%4", operands);
7442 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands);
7443 return "{fldws|fldw} -16(%%r30),%0";
7444 }
7445 else
7446 {
7447 output_asm_insn ("ldw %0,%4", operands);
7448 return "ldo %1(%4),%4\n\tstw %4,%0";
7449 }
7450 }
7451
7452 if (which_alternative == 0)
7453 {
7454 int nullify = INSN_ANNULLED_BRANCH_P (insn);
7455 int xdelay;
7456
7457 /* If this is a long branch with its delay slot unfilled, set `nullify'
7458 as it can nullify the delay slot and save a nop. */
7459 if (length == 8 && dbr_sequence_length () == 0)
7460 nullify = 1;
7461
7462 /* If this is a short forward conditional branch which did not get
7463 its delay slot filled, the delay slot can still be nullified. */
7464 if (! nullify && length == 4 && dbr_sequence_length () == 0)
7465 nullify = forward_branch_p (insn);
7466
7467 switch (length)
7468 {
7469 case 4:
7470 if (nullify)
7471 {
7472 if (branch_needs_nop_p (insn))
7473 return "addib,%C2,n %1,%0,%3%#";
7474 else
7475 return "addib,%C2,n %1,%0,%3";
7476 }
7477 else
7478 return "addib,%C2 %1,%0,%3";
7479
7480 case 8:
7481 /* Handle weird backwards branch with a fulled delay slot
7482 which is nullified. */
7483 if (dbr_sequence_length () != 0
7484 && ! forward_branch_p (insn)
7485 && nullify)
7486 return "addib,%N2,n %1,%0,.+12\n\tb %3";
7487 /* Handle short backwards branch with an unfilled delay slot.
7488 Using a addb;nop rather than addi;bl saves 1 cycle for both
7489 taken and untaken branches. */
7490 else if (dbr_sequence_length () == 0
7491 && ! forward_branch_p (insn)
7492 && INSN_ADDRESSES_SET_P ()
7493 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
7494 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
7495 return "addib,%C2 %1,%0,%3%#";
7496
7497 /* Handle normal cases. */
7498 if (nullify)
7499 return "addi,%N2 %1,%0,%0\n\tb,n %3";
7500 else
7501 return "addi,%N2 %1,%0,%0\n\tb %3";
7502
7503 default:
7504 /* The reversed conditional branch must branch over one additional
7505 instruction if the delay slot is filled and needs to be extracted
7506 by pa_output_lbranch. If the delay slot is empty or this is a
7507 nullified forward branch, the instruction after the reversed
7508 condition branch must be nullified. */
7509 if (dbr_sequence_length () == 0
7510 || (nullify && forward_branch_p (insn)))
7511 {
7512 nullify = 1;
7513 xdelay = 0;
7514 operands[4] = GEN_INT (length);
7515 }
7516 else
7517 {
7518 xdelay = 1;
7519 operands[4] = GEN_INT (length + 4);
7520 }
7521
7522 if (nullify)
7523 output_asm_insn ("addib,%N2,n %1,%0,.+%4", operands);
7524 else
7525 output_asm_insn ("addib,%N2 %1,%0,.+%4", operands);
7526
7527 return pa_output_lbranch (operands[3], insn, xdelay);
7528 }
7529
7530 }
7531 /* Deal with gross reload from FP register case. */
7532 else if (which_alternative == 1)
7533 {
7534 /* Move loop counter from FP register to MEM then into a GR,
7535 increment the GR, store the GR into MEM, and finally reload
7536 the FP register from MEM from within the branch's delay slot. */
7537 output_asm_insn ("{fstws|fstw} %0,-16(%%r30)\n\tldw -16(%%r30),%4",
7538 operands);
7539 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands);
7540 if (length == 24)
7541 return "{comb|cmpb},%S2 %%r0,%4,%3\n\t{fldws|fldw} -16(%%r30),%0";
7542 else if (length == 28)
7543 return "{comclr|cmpclr},%B2 %%r0,%4,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
7544 else
7545 {
7546 operands[5] = GEN_INT (length - 16);
7547 output_asm_insn ("{comb|cmpb},%B2 %%r0,%4,.+%5", operands);
7548 output_asm_insn ("{fldws|fldw} -16(%%r30),%0", operands);
7549 return pa_output_lbranch (operands[3], insn, 0);
7550 }
7551 }
7552 /* Deal with gross reload from memory case. */
7553 else
7554 {
7555 /* Reload loop counter from memory, the store back to memory
7556 happens in the branch's delay slot. */
7557 output_asm_insn ("ldw %0,%4", operands);
7558 if (length == 12)
7559 return "addib,%C2 %1,%4,%3\n\tstw %4,%0";
7560 else if (length == 16)
7561 return "addi,%N2 %1,%4,%4\n\tb %3\n\tstw %4,%0";
7562 else
7563 {
7564 operands[5] = GEN_INT (length - 4);
7565 output_asm_insn ("addib,%N2 %1,%4,.+%5\n\tstw %4,%0", operands);
7566 return pa_output_lbranch (operands[3], insn, 0);
7567 }
7568 }
7569 }
7570
7571 /* Return the output template for emitting a movb type insn.
7572
7573 Note it may perform some output operations on its own before
7574 returning the final output string. */
7575 const char *
pa_output_movb(rtx * operands,rtx_insn * insn,int which_alternative,int reverse_comparison)7576 pa_output_movb (rtx *operands, rtx_insn *insn, int which_alternative,
7577 int reverse_comparison)
7578 {
7579 int length = get_attr_length (insn);
7580
7581 /* A conditional branch to the following instruction (e.g. the delay slot) is
7582 asking for a disaster. Be prepared! */
7583
7584 if (branch_to_delay_slot_p (insn))
7585 {
7586 if (which_alternative == 0)
7587 return "copy %1,%0";
7588 else if (which_alternative == 1)
7589 {
7590 output_asm_insn ("stw %1,-16(%%r30)", operands);
7591 return "{fldws|fldw} -16(%%r30),%0";
7592 }
7593 else if (which_alternative == 2)
7594 return "stw %1,%0";
7595 else
7596 return "mtsar %r1";
7597 }
7598
7599 /* Support the second variant. */
7600 if (reverse_comparison)
7601 PUT_CODE (operands[2], reverse_condition (GET_CODE (operands[2])));
7602
7603 if (which_alternative == 0)
7604 {
7605 int nullify = INSN_ANNULLED_BRANCH_P (insn);
7606 int xdelay;
7607
7608 /* If this is a long branch with its delay slot unfilled, set `nullify'
7609 as it can nullify the delay slot and save a nop. */
7610 if (length == 8 && dbr_sequence_length () == 0)
7611 nullify = 1;
7612
7613 /* If this is a short forward conditional branch which did not get
7614 its delay slot filled, the delay slot can still be nullified. */
7615 if (! nullify && length == 4 && dbr_sequence_length () == 0)
7616 nullify = forward_branch_p (insn);
7617
7618 switch (length)
7619 {
7620 case 4:
7621 if (nullify)
7622 {
7623 if (branch_needs_nop_p (insn))
7624 return "movb,%C2,n %1,%0,%3%#";
7625 else
7626 return "movb,%C2,n %1,%0,%3";
7627 }
7628 else
7629 return "movb,%C2 %1,%0,%3";
7630
7631 case 8:
7632 /* Handle weird backwards branch with a filled delay slot
7633 which is nullified. */
7634 if (dbr_sequence_length () != 0
7635 && ! forward_branch_p (insn)
7636 && nullify)
7637 return "movb,%N2,n %1,%0,.+12\n\tb %3";
7638
7639 /* Handle short backwards branch with an unfilled delay slot.
7640 Using a movb;nop rather than or;bl saves 1 cycle for both
7641 taken and untaken branches. */
7642 else if (dbr_sequence_length () == 0
7643 && ! forward_branch_p (insn)
7644 && INSN_ADDRESSES_SET_P ()
7645 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
7646 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
7647 return "movb,%C2 %1,%0,%3%#";
7648 /* Handle normal cases. */
7649 if (nullify)
7650 return "or,%N2 %1,%%r0,%0\n\tb,n %3";
7651 else
7652 return "or,%N2 %1,%%r0,%0\n\tb %3";
7653
7654 default:
7655 /* The reversed conditional branch must branch over one additional
7656 instruction if the delay slot is filled and needs to be extracted
7657 by pa_output_lbranch. If the delay slot is empty or this is a
7658 nullified forward branch, the instruction after the reversed
7659 condition branch must be nullified. */
7660 if (dbr_sequence_length () == 0
7661 || (nullify && forward_branch_p (insn)))
7662 {
7663 nullify = 1;
7664 xdelay = 0;
7665 operands[4] = GEN_INT (length);
7666 }
7667 else
7668 {
7669 xdelay = 1;
7670 operands[4] = GEN_INT (length + 4);
7671 }
7672
7673 if (nullify)
7674 output_asm_insn ("movb,%N2,n %1,%0,.+%4", operands);
7675 else
7676 output_asm_insn ("movb,%N2 %1,%0,.+%4", operands);
7677
7678 return pa_output_lbranch (operands[3], insn, xdelay);
7679 }
7680 }
7681 /* Deal with gross reload for FP destination register case. */
7682 else if (which_alternative == 1)
7683 {
7684 /* Move source register to MEM, perform the branch test, then
7685 finally load the FP register from MEM from within the branch's
7686 delay slot. */
7687 output_asm_insn ("stw %1,-16(%%r30)", operands);
7688 if (length == 12)
7689 return "{comb|cmpb},%S2 %%r0,%1,%3\n\t{fldws|fldw} -16(%%r30),%0";
7690 else if (length == 16)
7691 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
7692 else
7693 {
7694 operands[4] = GEN_INT (length - 4);
7695 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4", operands);
7696 output_asm_insn ("{fldws|fldw} -16(%%r30),%0", operands);
7697 return pa_output_lbranch (operands[3], insn, 0);
7698 }
7699 }
7700 /* Deal with gross reload from memory case. */
7701 else if (which_alternative == 2)
7702 {
7703 /* Reload loop counter from memory, the store back to memory
7704 happens in the branch's delay slot. */
7705 if (length == 8)
7706 return "{comb|cmpb},%S2 %%r0,%1,%3\n\tstw %1,%0";
7707 else if (length == 12)
7708 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tstw %1,%0";
7709 else
7710 {
7711 operands[4] = GEN_INT (length);
7712 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4\n\tstw %1,%0",
7713 operands);
7714 return pa_output_lbranch (operands[3], insn, 0);
7715 }
7716 }
7717 /* Handle SAR as a destination. */
7718 else
7719 {
7720 if (length == 8)
7721 return "{comb|cmpb},%S2 %%r0,%1,%3\n\tmtsar %r1";
7722 else if (length == 12)
7723 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tmtsar %r1";
7724 else
7725 {
7726 operands[4] = GEN_INT (length);
7727 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4\n\tmtsar %r1",
7728 operands);
7729 return pa_output_lbranch (operands[3], insn, 0);
7730 }
7731 }
7732 }
7733
7734 /* Copy any FP arguments in INSN into integer registers. */
7735 static void
copy_fp_args(rtx_insn * insn)7736 copy_fp_args (rtx_insn *insn)
7737 {
7738 rtx link;
7739 rtx xoperands[2];
7740
7741 for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
7742 {
7743 int arg_mode, regno;
7744 rtx use = XEXP (link, 0);
7745
7746 if (! (GET_CODE (use) == USE
7747 && GET_CODE (XEXP (use, 0)) == REG
7748 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
7749 continue;
7750
7751 arg_mode = GET_MODE (XEXP (use, 0));
7752 regno = REGNO (XEXP (use, 0));
7753
7754 /* Is it a floating point register? */
7755 if (regno >= 32 && regno <= 39)
7756 {
7757 /* Copy the FP register into an integer register via memory. */
7758 if (arg_mode == SFmode)
7759 {
7760 xoperands[0] = XEXP (use, 0);
7761 xoperands[1] = gen_rtx_REG (SImode, 26 - (regno - 32) / 2);
7762 output_asm_insn ("{fstws|fstw} %0,-16(%%sr0,%%r30)", xoperands);
7763 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
7764 }
7765 else
7766 {
7767 xoperands[0] = XEXP (use, 0);
7768 xoperands[1] = gen_rtx_REG (DImode, 25 - (regno - 34) / 2);
7769 output_asm_insn ("{fstds|fstd} %0,-16(%%sr0,%%r30)", xoperands);
7770 output_asm_insn ("ldw -12(%%sr0,%%r30),%R1", xoperands);
7771 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
7772 }
7773 }
7774 }
7775 }
7776
7777 /* Compute length of the FP argument copy sequence for INSN. */
7778 static int
length_fp_args(rtx_insn * insn)7779 length_fp_args (rtx_insn *insn)
7780 {
7781 int length = 0;
7782 rtx link;
7783
7784 for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
7785 {
7786 int arg_mode, regno;
7787 rtx use = XEXP (link, 0);
7788
7789 if (! (GET_CODE (use) == USE
7790 && GET_CODE (XEXP (use, 0)) == REG
7791 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
7792 continue;
7793
7794 arg_mode = GET_MODE (XEXP (use, 0));
7795 regno = REGNO (XEXP (use, 0));
7796
7797 /* Is it a floating point register? */
7798 if (regno >= 32 && regno <= 39)
7799 {
7800 if (arg_mode == SFmode)
7801 length += 8;
7802 else
7803 length += 12;
7804 }
7805 }
7806
7807 return length;
7808 }
7809
7810 /* Return the attribute length for the millicode call instruction INSN.
7811 The length must match the code generated by pa_output_millicode_call.
7812 We include the delay slot in the returned length as it is better to
7813 over estimate the length than to under estimate it. */
7814
7815 int
pa_attr_length_millicode_call(rtx_insn * insn)7816 pa_attr_length_millicode_call (rtx_insn *insn)
7817 {
7818 unsigned long distance = -1;
7819 unsigned long total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
7820
7821 if (INSN_ADDRESSES_SET_P ())
7822 {
7823 distance = (total + insn_current_reference_address (insn));
7824 if (distance < total)
7825 distance = -1;
7826 }
7827
7828 if (TARGET_64BIT)
7829 {
7830 if (!TARGET_LONG_CALLS && distance < 7600000)
7831 return 8;
7832
7833 return 20;
7834 }
7835 else if (TARGET_PORTABLE_RUNTIME)
7836 return 24;
7837 else
7838 {
7839 if (!TARGET_LONG_CALLS && distance < MAX_PCREL17F_OFFSET)
7840 return 8;
7841
7842 if (!flag_pic)
7843 return 12;
7844
7845 return 24;
7846 }
7847 }
7848
7849 /* INSN is a function call.
7850
7851 CALL_DEST is the routine we are calling. */
7852
7853 const char *
pa_output_millicode_call(rtx_insn * insn,rtx call_dest)7854 pa_output_millicode_call (rtx_insn *insn, rtx call_dest)
7855 {
7856 int attr_length = get_attr_length (insn);
7857 int seq_length = dbr_sequence_length ();
7858 rtx xoperands[4];
7859
7860 xoperands[0] = call_dest;
7861
7862 /* Handle the common case where we are sure that the branch will
7863 reach the beginning of the $CODE$ subspace. The within reach
7864 form of the $$sh_func_adrs call has a length of 28. Because it
7865 has an attribute type of sh_func_adrs, it never has a nonzero
7866 sequence length (i.e., the delay slot is never filled). */
7867 if (!TARGET_LONG_CALLS
7868 && (attr_length == 8
7869 || (attr_length == 28
7870 && get_attr_type (insn) == TYPE_SH_FUNC_ADRS)))
7871 {
7872 xoperands[1] = gen_rtx_REG (Pmode, TARGET_64BIT ? 2 : 31);
7873 output_asm_insn ("{bl|b,l} %0,%1", xoperands);
7874 }
7875 else
7876 {
7877 if (TARGET_64BIT)
7878 {
7879 /* It might seem that one insn could be saved by accessing
7880 the millicode function using the linkage table. However,
7881 this doesn't work in shared libraries and other dynamically
7882 loaded objects. Using a pc-relative sequence also avoids
7883 problems related to the implicit use of the gp register. */
7884 xoperands[1] = gen_rtx_REG (Pmode, 1);
7885 xoperands[2] = xoperands[1];
7886 pa_output_pic_pcrel_sequence (xoperands);
7887 output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
7888 }
7889 else if (TARGET_PORTABLE_RUNTIME)
7890 {
7891 /* Pure portable runtime doesn't allow be/ble; we also don't
7892 have PIC support in the assembler/linker, so this sequence
7893 is needed. */
7894
7895 /* Get the address of our target into %r1. */
7896 output_asm_insn ("ldil L'%0,%%r1", xoperands);
7897 output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands);
7898
7899 /* Get our return address into %r31. */
7900 output_asm_insn ("{bl|b,l} .+8,%%r31", xoperands);
7901 output_asm_insn ("addi 8,%%r31,%%r31", xoperands);
7902
7903 /* Jump to our target address in %r1. */
7904 output_asm_insn ("bv %%r0(%%r1)", xoperands);
7905 }
7906 else if (!flag_pic)
7907 {
7908 output_asm_insn ("ldil L'%0,%%r1", xoperands);
7909 if (TARGET_PA_20)
7910 output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31", xoperands);
7911 else
7912 output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands);
7913 }
7914 else
7915 {
7916 xoperands[1] = gen_rtx_REG (Pmode, 31);
7917 xoperands[2] = gen_rtx_REG (Pmode, 1);
7918 pa_output_pic_pcrel_sequence (xoperands);
7919
7920 /* Adjust return address. */
7921 output_asm_insn ("ldo {16|24}(%%r31),%%r31", xoperands);
7922
7923 /* Jump to our target address in %r1. */
7924 output_asm_insn ("bv %%r0(%%r1)", xoperands);
7925 }
7926 }
7927
7928 if (seq_length == 0)
7929 output_asm_insn ("nop", xoperands);
7930
7931 return "";
7932 }
7933
7934 /* Return the attribute length of the call instruction INSN. The SIBCALL
7935 flag indicates whether INSN is a regular call or a sibling call. The
7936 length returned must be longer than the code actually generated by
7937 pa_output_call. Since branch shortening is done before delay branch
7938 sequencing, there is no way to determine whether or not the delay
7939 slot will be filled during branch shortening. Even when the delay
7940 slot is filled, we may have to add a nop if the delay slot contains
7941 a branch that can't reach its target. Thus, we always have to include
7942 the delay slot in the length estimate. This used to be done in
7943 pa_adjust_insn_length but we do it here now as some sequences always
7944 fill the delay slot and we can save four bytes in the estimate for
7945 these sequences. */
7946
7947 int
pa_attr_length_call(rtx_insn * insn,int sibcall)7948 pa_attr_length_call (rtx_insn *insn, int sibcall)
7949 {
7950 int local_call;
7951 rtx call, call_dest;
7952 tree call_decl;
7953 int length = 0;
7954 rtx pat = PATTERN (insn);
7955 unsigned long distance = -1;
7956
7957 gcc_assert (CALL_P (insn));
7958
7959 if (INSN_ADDRESSES_SET_P ())
7960 {
7961 unsigned long total;
7962
7963 total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
7964 distance = (total + insn_current_reference_address (insn));
7965 if (distance < total)
7966 distance = -1;
7967 }
7968
7969 gcc_assert (GET_CODE (pat) == PARALLEL);
7970
7971 /* Get the call rtx. */
7972 call = XVECEXP (pat, 0, 0);
7973 if (GET_CODE (call) == SET)
7974 call = SET_SRC (call);
7975
7976 gcc_assert (GET_CODE (call) == CALL);
7977
7978 /* Determine if this is a local call. */
7979 call_dest = XEXP (XEXP (call, 0), 0);
7980 call_decl = SYMBOL_REF_DECL (call_dest);
7981 local_call = call_decl && targetm.binds_local_p (call_decl);
7982
7983 /* pc-relative branch. */
7984 if (!TARGET_LONG_CALLS
7985 && ((TARGET_PA_20 && !sibcall && distance < 7600000)
7986 || distance < MAX_PCREL17F_OFFSET))
7987 length += 8;
7988
7989 /* 64-bit plabel sequence. */
7990 else if (TARGET_64BIT && !local_call)
7991 length += 24;
7992
7993 /* non-pic long absolute branch sequence. */
7994 else if ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
7995 length += 12;
7996
7997 /* long pc-relative branch sequence. */
7998 else if (TARGET_LONG_PIC_SDIFF_CALL
7999 || (TARGET_GAS && !TARGET_SOM && local_call))
8000 {
8001 length += 20;
8002
8003 if (!TARGET_PA_20 && !TARGET_NO_SPACE_REGS && (!local_call || flag_pic))
8004 length += 8;
8005 }
8006
8007 /* 32-bit plabel sequence. */
8008 else
8009 {
8010 length += 32;
8011
8012 if (TARGET_SOM)
8013 length += length_fp_args (insn);
8014
8015 if (flag_pic)
8016 length += 4;
8017
8018 if (!TARGET_PA_20)
8019 {
8020 if (!sibcall)
8021 length += 8;
8022
8023 if (!TARGET_NO_SPACE_REGS && (!local_call || flag_pic))
8024 length += 8;
8025 }
8026 }
8027
8028 return length;
8029 }
8030
8031 /* INSN is a function call.
8032
8033 CALL_DEST is the routine we are calling. */
8034
8035 const char *
pa_output_call(rtx_insn * insn,rtx call_dest,int sibcall)8036 pa_output_call (rtx_insn *insn, rtx call_dest, int sibcall)
8037 {
8038 int seq_length = dbr_sequence_length ();
8039 tree call_decl = SYMBOL_REF_DECL (call_dest);
8040 int local_call = call_decl && targetm.binds_local_p (call_decl);
8041 rtx xoperands[4];
8042
8043 xoperands[0] = call_dest;
8044
8045 /* Handle the common case where we're sure that the branch will reach
8046 the beginning of the "$CODE$" subspace. This is the beginning of
8047 the current function if we are in a named section. */
8048 if (!TARGET_LONG_CALLS && pa_attr_length_call (insn, sibcall) == 8)
8049 {
8050 xoperands[1] = gen_rtx_REG (word_mode, sibcall ? 0 : 2);
8051 output_asm_insn ("{bl|b,l} %0,%1", xoperands);
8052 }
8053 else
8054 {
8055 if (TARGET_64BIT && !local_call)
8056 {
8057 /* ??? As far as I can tell, the HP linker doesn't support the
8058 long pc-relative sequence described in the 64-bit runtime
8059 architecture. So, we use a slightly longer indirect call. */
8060 xoperands[0] = pa_get_deferred_plabel (call_dest);
8061 xoperands[1] = gen_label_rtx ();
8062
8063 /* Put the load of %r27 into the delay slot. We don't need to
8064 do anything when generating fast indirect calls. */
8065 if (seq_length != 0)
8066 {
8067 final_scan_insn (NEXT_INSN (insn), asm_out_file,
8068 optimize, 0, NULL);
8069
8070 /* Now delete the delay insn. */
8071 SET_INSN_DELETED (NEXT_INSN (insn));
8072 }
8073
8074 output_asm_insn ("addil LT'%0,%%r27", xoperands);
8075 output_asm_insn ("ldd RT'%0(%%r1),%%r1", xoperands);
8076 output_asm_insn ("ldd 0(%%r1),%%r1", xoperands);
8077 output_asm_insn ("ldd 16(%%r1),%%r2", xoperands);
8078 output_asm_insn ("bve,l (%%r2),%%r2", xoperands);
8079 output_asm_insn ("ldd 24(%%r1),%%r27", xoperands);
8080 seq_length = 1;
8081 }
8082 else
8083 {
8084 int indirect_call = 0;
8085
8086 /* Emit a long call. There are several different sequences
8087 of increasing length and complexity. In most cases,
8088 they don't allow an instruction in the delay slot. */
8089 if (!((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
8090 && !TARGET_LONG_PIC_SDIFF_CALL
8091 && !(TARGET_GAS && !TARGET_SOM && local_call)
8092 && !TARGET_64BIT)
8093 indirect_call = 1;
8094
8095 if (seq_length != 0
8096 && !sibcall
8097 && (!TARGET_PA_20
8098 || indirect_call
8099 || ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)))
8100 {
8101 /* A non-jump insn in the delay slot. By definition we can
8102 emit this insn before the call (and in fact before argument
8103 relocating. */
8104 final_scan_insn (NEXT_INSN (insn), asm_out_file, optimize, 0,
8105 NULL);
8106
8107 /* Now delete the delay insn. */
8108 SET_INSN_DELETED (NEXT_INSN (insn));
8109 seq_length = 0;
8110 }
8111
8112 if ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
8113 {
8114 /* This is the best sequence for making long calls in
8115 non-pic code. Unfortunately, GNU ld doesn't provide
8116 the stub needed for external calls, and GAS's support
8117 for this with the SOM linker is buggy. It is safe
8118 to use this for local calls. */
8119 output_asm_insn ("ldil L'%0,%%r1", xoperands);
8120 if (sibcall)
8121 output_asm_insn ("be R'%0(%%sr4,%%r1)", xoperands);
8122 else
8123 {
8124 if (TARGET_PA_20)
8125 output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31",
8126 xoperands);
8127 else
8128 output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands);
8129
8130 output_asm_insn ("copy %%r31,%%r2", xoperands);
8131 seq_length = 1;
8132 }
8133 }
8134 else
8135 {
8136 /* The HP assembler and linker can handle relocations for
8137 the difference of two symbols. The HP assembler
8138 recognizes the sequence as a pc-relative call and
8139 the linker provides stubs when needed. */
8140
8141 /* GAS currently can't generate the relocations that
8142 are needed for the SOM linker under HP-UX using this
8143 sequence. The GNU linker doesn't generate the stubs
8144 that are needed for external calls on TARGET_ELF32
8145 with this sequence. For now, we have to use a longer
8146 plabel sequence when using GAS for non local calls. */
8147 if (TARGET_LONG_PIC_SDIFF_CALL
8148 || (TARGET_GAS && !TARGET_SOM && local_call))
8149 {
8150 xoperands[1] = gen_rtx_REG (Pmode, 1);
8151 xoperands[2] = xoperands[1];
8152 pa_output_pic_pcrel_sequence (xoperands);
8153 }
8154 else
8155 {
8156 /* Emit a long plabel-based call sequence. This is
8157 essentially an inline implementation of $$dyncall.
8158 We don't actually try to call $$dyncall as this is
8159 as difficult as calling the function itself. */
8160 xoperands[0] = pa_get_deferred_plabel (call_dest);
8161 xoperands[1] = gen_label_rtx ();
8162
8163 /* Since the call is indirect, FP arguments in registers
8164 need to be copied to the general registers. Then, the
8165 argument relocation stub will copy them back. */
8166 if (TARGET_SOM)
8167 copy_fp_args (insn);
8168
8169 if (flag_pic)
8170 {
8171 output_asm_insn ("addil LT'%0,%%r19", xoperands);
8172 output_asm_insn ("ldw RT'%0(%%r1),%%r1", xoperands);
8173 output_asm_insn ("ldw 0(%%r1),%%r22", xoperands);
8174 }
8175 else
8176 {
8177 output_asm_insn ("addil LR'%0-$global$,%%r27",
8178 xoperands);
8179 output_asm_insn ("ldw RR'%0-$global$(%%r1),%%r22",
8180 xoperands);
8181 }
8182
8183 output_asm_insn ("bb,>=,n %%r22,30,.+16", xoperands);
8184 output_asm_insn ("depi 0,31,2,%%r22", xoperands);
8185 /* Should this be an ordered load to ensure the target
8186 address is loaded before the global pointer? */
8187 output_asm_insn ("ldw 0(%%r22),%%r1", xoperands);
8188 output_asm_insn ("ldw 4(%%r22),%%r19", xoperands);
8189
8190 if (!sibcall && !TARGET_PA_20)
8191 {
8192 output_asm_insn ("{bl|b,l} .+8,%%r2", xoperands);
8193 if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic))
8194 output_asm_insn ("addi 8,%%r2,%%r2", xoperands);
8195 else
8196 output_asm_insn ("addi 16,%%r2,%%r2", xoperands);
8197 }
8198 }
8199
8200 if (TARGET_PA_20)
8201 {
8202 if (sibcall)
8203 output_asm_insn ("bve (%%r1)", xoperands);
8204 else
8205 {
8206 if (indirect_call)
8207 {
8208 output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
8209 output_asm_insn ("stw %%r2,-24(%%sp)", xoperands);
8210 seq_length = 1;
8211 }
8212 else
8213 output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
8214 }
8215 }
8216 else
8217 {
8218 if (!TARGET_NO_SPACE_REGS && (!local_call || flag_pic))
8219 output_asm_insn ("ldsid (%%r1),%%r31\n\tmtsp %%r31,%%sr0",
8220 xoperands);
8221
8222 if (sibcall)
8223 {
8224 if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic))
8225 output_asm_insn ("be 0(%%sr4,%%r1)", xoperands);
8226 else
8227 output_asm_insn ("be 0(%%sr0,%%r1)", xoperands);
8228 }
8229 else
8230 {
8231 if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic))
8232 output_asm_insn ("ble 0(%%sr4,%%r1)", xoperands);
8233 else
8234 output_asm_insn ("ble 0(%%sr0,%%r1)", xoperands);
8235
8236 if (indirect_call)
8237 output_asm_insn ("stw %%r31,-24(%%sp)", xoperands);
8238 else
8239 output_asm_insn ("copy %%r31,%%r2", xoperands);
8240 seq_length = 1;
8241 }
8242 }
8243 }
8244 }
8245 }
8246
8247 if (seq_length == 0)
8248 output_asm_insn ("nop", xoperands);
8249
8250 return "";
8251 }
8252
8253 /* Return the attribute length of the indirect call instruction INSN.
8254 The length must match the code generated by output_indirect call.
8255 The returned length includes the delay slot. Currently, the delay
8256 slot of an indirect call sequence is not exposed and it is used by
8257 the sequence itself. */
8258
8259 int
pa_attr_length_indirect_call(rtx_insn * insn)8260 pa_attr_length_indirect_call (rtx_insn *insn)
8261 {
8262 unsigned long distance = -1;
8263 unsigned long total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
8264
8265 if (INSN_ADDRESSES_SET_P ())
8266 {
8267 distance = (total + insn_current_reference_address (insn));
8268 if (distance < total)
8269 distance = -1;
8270 }
8271
8272 if (TARGET_64BIT)
8273 return 12;
8274
8275 if (TARGET_FAST_INDIRECT_CALLS)
8276 return 8;
8277
8278 if (TARGET_PORTABLE_RUNTIME)
8279 return 16;
8280
8281 if (!TARGET_LONG_CALLS
8282 && ((TARGET_PA_20 && !TARGET_SOM && distance < 7600000)
8283 || distance < MAX_PCREL17F_OFFSET))
8284 return 8;
8285
8286 /* Out of reach, can use ble. */
8287 if (!flag_pic)
8288 return 12;
8289
8290 /* Inline versions of $$dyncall. */
8291 if (!optimize_size)
8292 {
8293 if (TARGET_NO_SPACE_REGS)
8294 return 28;
8295
8296 if (TARGET_PA_20)
8297 return 32;
8298 }
8299
8300 /* Long PIC pc-relative call. */
8301 return 20;
8302 }
8303
8304 const char *
pa_output_indirect_call(rtx_insn * insn,rtx call_dest)8305 pa_output_indirect_call (rtx_insn *insn, rtx call_dest)
8306 {
8307 rtx xoperands[4];
8308 int length;
8309
8310 if (TARGET_64BIT)
8311 {
8312 xoperands[0] = call_dest;
8313 output_asm_insn ("ldd 16(%0),%%r2\n\t"
8314 "bve,l (%%r2),%%r2\n\t"
8315 "ldd 24(%0),%%r27", xoperands);
8316 return "";
8317 }
8318
8319 /* First the special case for kernels, level 0 systems, etc. */
8320 if (TARGET_FAST_INDIRECT_CALLS)
8321 {
8322 pa_output_arg_descriptor (insn);
8323 if (TARGET_PA_20)
8324 return "bve,l,n (%%r22),%%r2\n\tnop";
8325 return "ble 0(%%sr4,%%r22)\n\tcopy %%r31,%%r2";
8326 }
8327
8328 if (TARGET_PORTABLE_RUNTIME)
8329 {
8330 output_asm_insn ("ldil L'$$dyncall,%%r31\n\t"
8331 "ldo R'$$dyncall(%%r31),%%r31", xoperands);
8332 pa_output_arg_descriptor (insn);
8333 return "blr %%r0,%%r2\n\tbv,n %%r0(%%r31)";
8334 }
8335
8336 /* Now the normal case -- we can reach $$dyncall directly or
8337 we're sure that we can get there via a long-branch stub.
8338
8339 No need to check target flags as the length uniquely identifies
8340 the remaining cases. */
8341 length = pa_attr_length_indirect_call (insn);
8342 if (length == 8)
8343 {
8344 pa_output_arg_descriptor (insn);
8345
8346 /* The HP linker sometimes substitutes a BLE for BL/B,L calls to
8347 $$dyncall. Since BLE uses %r31 as the link register, the 22-bit
8348 variant of the B,L instruction can't be used on the SOM target. */
8349 if (TARGET_PA_20 && !TARGET_SOM)
8350 return "b,l,n $$dyncall,%%r2\n\tnop";
8351 else
8352 return "bl $$dyncall,%%r31\n\tcopy %%r31,%%r2";
8353 }
8354
8355 /* Long millicode call, but we are not generating PIC or portable runtime
8356 code. */
8357 if (length == 12)
8358 {
8359 output_asm_insn ("ldil L'$$dyncall,%%r2", xoperands);
8360 pa_output_arg_descriptor (insn);
8361 return "ble R'$$dyncall(%%sr4,%%r2)\n\tcopy %%r31,%%r2";
8362 }
8363
8364 /* The long PIC pc-relative call sequence is five instructions. So,
8365 let's use an inline version of $$dyncall when the calling sequence
8366 has a roughly similar number of instructions and we are not optimizing
8367 for size. We need two instructions to load the return pointer plus
8368 the $$dyncall implementation. */
8369 if (!optimize_size)
8370 {
8371 if (TARGET_NO_SPACE_REGS)
8372 {
8373 pa_output_arg_descriptor (insn);
8374 output_asm_insn ("bl .+8,%%r2\n\t"
8375 "ldo 20(%%r2),%%r2\n\t"
8376 "extru,<> %%r22,30,1,%%r0\n\t"
8377 "bv,n %%r0(%%r22)\n\t"
8378 "ldw -2(%%r22),%%r21\n\t"
8379 "bv %%r0(%%r21)\n\t"
8380 "ldw 2(%%r22),%%r19", xoperands);
8381 return "";
8382 }
8383 if (TARGET_PA_20)
8384 {
8385 pa_output_arg_descriptor (insn);
8386 output_asm_insn ("bl .+8,%%r2\n\t"
8387 "ldo 24(%%r2),%%r2\n\t"
8388 "stw %%r2,-24(%%sp)\n\t"
8389 "extru,<> %r22,30,1,%%r0\n\t"
8390 "bve,n (%%r22)\n\t"
8391 "ldw -2(%%r22),%%r21\n\t"
8392 "bve (%%r21)\n\t"
8393 "ldw 2(%%r22),%%r19", xoperands);
8394 return "";
8395 }
8396 }
8397
8398 /* We need a long PIC call to $$dyncall. */
8399 xoperands[0] = gen_rtx_SYMBOL_REF (Pmode, "$$dyncall");
8400 xoperands[1] = gen_rtx_REG (Pmode, 2);
8401 xoperands[2] = gen_rtx_REG (Pmode, 1);
8402 pa_output_pic_pcrel_sequence (xoperands);
8403 pa_output_arg_descriptor (insn);
8404 return "bv %%r0(%%r1)\n\tldo {12|20}(%%r2),%%r2";
8405 }
8406
8407 /* In HPUX 8.0's shared library scheme, special relocations are needed
8408 for function labels if they might be passed to a function
8409 in a shared library (because shared libraries don't live in code
8410 space), and special magic is needed to construct their address. */
8411
8412 void
pa_encode_label(rtx sym)8413 pa_encode_label (rtx sym)
8414 {
8415 const char *str = XSTR (sym, 0);
8416 int len = strlen (str) + 1;
8417 char *newstr, *p;
8418
8419 p = newstr = XALLOCAVEC (char, len + 1);
8420 *p++ = '@';
8421 strcpy (p, str);
8422
8423 XSTR (sym, 0) = ggc_alloc_string (newstr, len);
8424 }
8425
8426 static void
pa_encode_section_info(tree decl,rtx rtl,int first)8427 pa_encode_section_info (tree decl, rtx rtl, int first)
8428 {
8429 int old_referenced = 0;
8430
8431 if (!first && MEM_P (rtl) && GET_CODE (XEXP (rtl, 0)) == SYMBOL_REF)
8432 old_referenced
8433 = SYMBOL_REF_FLAGS (XEXP (rtl, 0)) & SYMBOL_FLAG_REFERENCED;
8434
8435 default_encode_section_info (decl, rtl, first);
8436
8437 if (first && TEXT_SPACE_P (decl))
8438 {
8439 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
8440 if (TREE_CODE (decl) == FUNCTION_DECL)
8441 pa_encode_label (XEXP (rtl, 0));
8442 }
8443 else if (old_referenced)
8444 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= old_referenced;
8445 }
8446
8447 /* This is sort of inverse to pa_encode_section_info. */
8448
8449 static const char *
pa_strip_name_encoding(const char * str)8450 pa_strip_name_encoding (const char *str)
8451 {
8452 str += (*str == '@');
8453 str += (*str == '*');
8454 return str;
8455 }
8456
8457 /* Returns 1 if OP is a function label involved in a simple addition
8458 with a constant. Used to keep certain patterns from matching
8459 during instruction combination. */
8460 int
pa_is_function_label_plus_const(rtx op)8461 pa_is_function_label_plus_const (rtx op)
8462 {
8463 /* Strip off any CONST. */
8464 if (GET_CODE (op) == CONST)
8465 op = XEXP (op, 0);
8466
8467 return (GET_CODE (op) == PLUS
8468 && function_label_operand (XEXP (op, 0), VOIDmode)
8469 && GET_CODE (XEXP (op, 1)) == CONST_INT);
8470 }
8471
8472 /* Output the assembler code for a thunk function. THUNK_DECL is the
8473 declaration for the thunk function itself, FUNCTION is the decl for
8474 the target function. DELTA is an immediate constant offset to be
8475 added to THIS. If VCALL_OFFSET is nonzero, the word at
8476 *(*this + vcall_offset) should be added to THIS. */
8477
8478 static void
pa_asm_output_mi_thunk(FILE * file,tree thunk_fndecl,HOST_WIDE_INT delta,HOST_WIDE_INT vcall_offset,tree function)8479 pa_asm_output_mi_thunk (FILE *file, tree thunk_fndecl, HOST_WIDE_INT delta,
8480 HOST_WIDE_INT vcall_offset, tree function)
8481 {
8482 const char *fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk_fndecl));
8483 static unsigned int current_thunk_number;
8484 int val_14 = VAL_14_BITS_P (delta);
8485 unsigned int old_last_address = last_address, nbytes = 0;
8486 char label[17];
8487 rtx xoperands[4];
8488
8489 xoperands[0] = XEXP (DECL_RTL (function), 0);
8490 xoperands[1] = XEXP (DECL_RTL (thunk_fndecl), 0);
8491 xoperands[2] = GEN_INT (delta);
8492
8493 assemble_start_function (thunk_fndecl, fnname);
8494 final_start_function (emit_barrier (), file, 1);
8495
8496 if (!vcall_offset)
8497 {
8498 /* Output the thunk. We know that the function is in the same
8499 translation unit (i.e., the same space) as the thunk, and that
8500 thunks are output after their method. Thus, we don't need an
8501 external branch to reach the function. With SOM and GAS,
8502 functions and thunks are effectively in different sections.
8503 Thus, we can always use a IA-relative branch and the linker
8504 will add a long branch stub if necessary.
8505
8506 However, we have to be careful when generating PIC code on the
8507 SOM port to ensure that the sequence does not transfer to an
8508 import stub for the target function as this could clobber the
8509 return value saved at SP-24. This would also apply to the
8510 32-bit linux port if the multi-space model is implemented. */
8511 if ((!TARGET_LONG_CALLS && TARGET_SOM && !TARGET_PORTABLE_RUNTIME
8512 && !(flag_pic && TREE_PUBLIC (function))
8513 && (TARGET_GAS || last_address < 262132))
8514 || (!TARGET_LONG_CALLS && !TARGET_SOM && !TARGET_PORTABLE_RUNTIME
8515 && ((targetm_common.have_named_sections
8516 && DECL_SECTION_NAME (thunk_fndecl) != NULL
8517 /* The GNU 64-bit linker has rather poor stub management.
8518 So, we use a long branch from thunks that aren't in
8519 the same section as the target function. */
8520 && ((!TARGET_64BIT
8521 && (DECL_SECTION_NAME (thunk_fndecl)
8522 != DECL_SECTION_NAME (function)))
8523 || ((DECL_SECTION_NAME (thunk_fndecl)
8524 == DECL_SECTION_NAME (function))
8525 && last_address < 262132)))
8526 /* In this case, we need to be able to reach the start of
8527 the stub table even though the function is likely closer
8528 and can be jumped to directly. */
8529 || (targetm_common.have_named_sections
8530 && DECL_SECTION_NAME (thunk_fndecl) == NULL
8531 && DECL_SECTION_NAME (function) == NULL
8532 && total_code_bytes < MAX_PCREL17F_OFFSET)
8533 /* Likewise. */
8534 || (!targetm_common.have_named_sections
8535 && total_code_bytes < MAX_PCREL17F_OFFSET))))
8536 {
8537 if (!val_14)
8538 output_asm_insn ("addil L'%2,%%r26", xoperands);
8539
8540 output_asm_insn ("b %0", xoperands);
8541
8542 if (val_14)
8543 {
8544 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8545 nbytes += 8;
8546 }
8547 else
8548 {
8549 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8550 nbytes += 12;
8551 }
8552 }
8553 else if (TARGET_64BIT)
8554 {
8555 rtx xop[4];
8556
8557 /* We only have one call-clobbered scratch register, so we can't
8558 make use of the delay slot if delta doesn't fit in 14 bits. */
8559 if (!val_14)
8560 {
8561 output_asm_insn ("addil L'%2,%%r26", xoperands);
8562 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8563 }
8564
8565 /* Load function address into %r1. */
8566 xop[0] = xoperands[0];
8567 xop[1] = gen_rtx_REG (Pmode, 1);
8568 xop[2] = xop[1];
8569 pa_output_pic_pcrel_sequence (xop);
8570
8571 if (val_14)
8572 {
8573 output_asm_insn ("bv %%r0(%%r1)", xoperands);
8574 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8575 nbytes += 20;
8576 }
8577 else
8578 {
8579 output_asm_insn ("bv,n %%r0(%%r1)", xoperands);
8580 nbytes += 24;
8581 }
8582 }
8583 else if (TARGET_PORTABLE_RUNTIME)
8584 {
8585 output_asm_insn ("ldil L'%0,%%r1", xoperands);
8586 output_asm_insn ("ldo R'%0(%%r1),%%r22", xoperands);
8587
8588 if (!val_14)
8589 output_asm_insn ("ldil L'%2,%%r26", xoperands);
8590
8591 output_asm_insn ("bv %%r0(%%r22)", xoperands);
8592
8593 if (val_14)
8594 {
8595 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8596 nbytes += 16;
8597 }
8598 else
8599 {
8600 output_asm_insn ("ldo R'%2(%%r26),%%r26", xoperands);
8601 nbytes += 20;
8602 }
8603 }
8604 else if (TARGET_SOM && flag_pic && TREE_PUBLIC (function))
8605 {
8606 /* The function is accessible from outside this module. The only
8607 way to avoid an import stub between the thunk and function is to
8608 call the function directly with an indirect sequence similar to
8609 that used by $$dyncall. This is possible because $$dyncall acts
8610 as the import stub in an indirect call. */
8611 ASM_GENERATE_INTERNAL_LABEL (label, "LTHN", current_thunk_number);
8612 xoperands[3] = gen_rtx_SYMBOL_REF (Pmode, label);
8613 output_asm_insn ("addil LT'%3,%%r19", xoperands);
8614 output_asm_insn ("ldw RT'%3(%%r1),%%r22", xoperands);
8615 output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands);
8616 output_asm_insn ("bb,>=,n %%r22,30,.+16", xoperands);
8617 output_asm_insn ("depi 0,31,2,%%r22", xoperands);
8618 output_asm_insn ("ldw 4(%%sr0,%%r22),%%r19", xoperands);
8619 output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands);
8620
8621 if (!val_14)
8622 {
8623 output_asm_insn ("addil L'%2,%%r26", xoperands);
8624 nbytes += 4;
8625 }
8626
8627 if (TARGET_PA_20)
8628 {
8629 output_asm_insn ("bve (%%r22)", xoperands);
8630 nbytes += 36;
8631 }
8632 else if (TARGET_NO_SPACE_REGS)
8633 {
8634 output_asm_insn ("be 0(%%sr4,%%r22)", xoperands);
8635 nbytes += 36;
8636 }
8637 else
8638 {
8639 output_asm_insn ("ldsid (%%sr0,%%r22),%%r21", xoperands);
8640 output_asm_insn ("mtsp %%r21,%%sr0", xoperands);
8641 output_asm_insn ("be 0(%%sr0,%%r22)", xoperands);
8642 nbytes += 44;
8643 }
8644
8645 if (val_14)
8646 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8647 else
8648 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8649 }
8650 else if (flag_pic)
8651 {
8652 rtx xop[4];
8653
8654 /* Load function address into %r22. */
8655 xop[0] = xoperands[0];
8656 xop[1] = gen_rtx_REG (Pmode, 1);
8657 xop[2] = gen_rtx_REG (Pmode, 22);
8658 pa_output_pic_pcrel_sequence (xop);
8659
8660 if (!val_14)
8661 output_asm_insn ("addil L'%2,%%r26", xoperands);
8662
8663 output_asm_insn ("bv %%r0(%%r22)", xoperands);
8664
8665 if (val_14)
8666 {
8667 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8668 nbytes += 20;
8669 }
8670 else
8671 {
8672 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8673 nbytes += 24;
8674 }
8675 }
8676 else
8677 {
8678 if (!val_14)
8679 output_asm_insn ("addil L'%2,%%r26", xoperands);
8680
8681 output_asm_insn ("ldil L'%0,%%r22", xoperands);
8682 output_asm_insn ("be R'%0(%%sr4,%%r22)", xoperands);
8683
8684 if (val_14)
8685 {
8686 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8687 nbytes += 12;
8688 }
8689 else
8690 {
8691 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8692 nbytes += 16;
8693 }
8694 }
8695 }
8696 else
8697 {
8698 rtx xop[4];
8699
8700 /* Add DELTA to THIS. */
8701 if (val_14)
8702 {
8703 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8704 nbytes += 4;
8705 }
8706 else
8707 {
8708 output_asm_insn ("addil L'%2,%%r26", xoperands);
8709 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8710 nbytes += 8;
8711 }
8712
8713 if (TARGET_64BIT)
8714 {
8715 /* Load *(THIS + DELTA) to %r1. */
8716 output_asm_insn ("ldd 0(%%r26),%%r1", xoperands);
8717
8718 val_14 = VAL_14_BITS_P (vcall_offset);
8719 xoperands[2] = GEN_INT (vcall_offset);
8720
8721 /* Load *(*(THIS + DELTA) + VCALL_OFFSET) to %r1. */
8722 if (val_14)
8723 {
8724 output_asm_insn ("ldd %2(%%r1),%%r1", xoperands);
8725 nbytes += 8;
8726 }
8727 else
8728 {
8729 output_asm_insn ("addil L'%2,%%r1", xoperands);
8730 output_asm_insn ("ldd R'%2(%%r1),%%r1", xoperands);
8731 nbytes += 12;
8732 }
8733 }
8734 else
8735 {
8736 /* Load *(THIS + DELTA) to %r1. */
8737 output_asm_insn ("ldw 0(%%r26),%%r1", xoperands);
8738
8739 val_14 = VAL_14_BITS_P (vcall_offset);
8740 xoperands[2] = GEN_INT (vcall_offset);
8741
8742 /* Load *(*(THIS + DELTA) + VCALL_OFFSET) to %r1. */
8743 if (val_14)
8744 {
8745 output_asm_insn ("ldw %2(%%r1),%%r1", xoperands);
8746 nbytes += 8;
8747 }
8748 else
8749 {
8750 output_asm_insn ("addil L'%2,%%r1", xoperands);
8751 output_asm_insn ("ldw R'%2(%%r1),%%r1", xoperands);
8752 nbytes += 12;
8753 }
8754 }
8755
8756 /* Branch to FUNCTION and add %r1 to THIS in delay slot if possible. */
8757 if ((!TARGET_LONG_CALLS && TARGET_SOM && !TARGET_PORTABLE_RUNTIME
8758 && !(flag_pic && TREE_PUBLIC (function))
8759 && (TARGET_GAS || last_address < 262132))
8760 || (!TARGET_LONG_CALLS && !TARGET_SOM && !TARGET_PORTABLE_RUNTIME
8761 && ((targetm_common.have_named_sections
8762 && DECL_SECTION_NAME (thunk_fndecl) != NULL
8763 /* The GNU 64-bit linker has rather poor stub management.
8764 So, we use a long branch from thunks that aren't in
8765 the same section as the target function. */
8766 && ((!TARGET_64BIT
8767 && (DECL_SECTION_NAME (thunk_fndecl)
8768 != DECL_SECTION_NAME (function)))
8769 || ((DECL_SECTION_NAME (thunk_fndecl)
8770 == DECL_SECTION_NAME (function))
8771 && last_address < 262132)))
8772 /* In this case, we need to be able to reach the start of
8773 the stub table even though the function is likely closer
8774 and can be jumped to directly. */
8775 || (targetm_common.have_named_sections
8776 && DECL_SECTION_NAME (thunk_fndecl) == NULL
8777 && DECL_SECTION_NAME (function) == NULL
8778 && total_code_bytes < MAX_PCREL17F_OFFSET)
8779 /* Likewise. */
8780 || (!targetm_common.have_named_sections
8781 && total_code_bytes < MAX_PCREL17F_OFFSET))))
8782 {
8783 nbytes += 4;
8784 output_asm_insn ("b %0", xoperands);
8785
8786 /* Add *(*(THIS + DELTA) + VCALL_OFFSET) to THIS. */
8787 output_asm_insn ("addl %%r1,%%r26,%%r26", xoperands);
8788 }
8789 else if (TARGET_64BIT)
8790 {
8791 /* Add *(*(THIS + DELTA) + VCALL_OFFSET) to THIS. */
8792 output_asm_insn ("addl %%r1,%%r26,%%r26", xoperands);
8793
8794 /* Load function address into %r1. */
8795 nbytes += 16;
8796 xop[0] = xoperands[0];
8797 xop[1] = gen_rtx_REG (Pmode, 1);
8798 xop[2] = xop[1];
8799 pa_output_pic_pcrel_sequence (xop);
8800
8801 output_asm_insn ("bv,n %%r0(%%r1)", xoperands);
8802 }
8803 else if (TARGET_PORTABLE_RUNTIME)
8804 {
8805 /* Load function address into %r22. */
8806 nbytes += 12;
8807 output_asm_insn ("ldil L'%0,%%r22", xoperands);
8808 output_asm_insn ("ldo R'%0(%%r22),%%r22", xoperands);
8809
8810 output_asm_insn ("bv %%r0(%%r22)", xoperands);
8811
8812 /* Add *(*(THIS + DELTA) + VCALL_OFFSET) to THIS. */
8813 output_asm_insn ("addl %%r1,%%r26,%%r26", xoperands);
8814 }
8815 else if (TARGET_SOM && flag_pic && TREE_PUBLIC (function))
8816 {
8817 /* Add *(*(THIS + DELTA) + VCALL_OFFSET) to THIS. */
8818 output_asm_insn ("addl %%r1,%%r26,%%r26", xoperands);
8819
8820 /* The function is accessible from outside this module. The only
8821 way to avoid an import stub between the thunk and function is to
8822 call the function directly with an indirect sequence similar to
8823 that used by $$dyncall. This is possible because $$dyncall acts
8824 as the import stub in an indirect call. */
8825 ASM_GENERATE_INTERNAL_LABEL (label, "LTHN", current_thunk_number);
8826 xoperands[3] = gen_rtx_SYMBOL_REF (Pmode, label);
8827 output_asm_insn ("addil LT'%3,%%r19", xoperands);
8828 output_asm_insn ("ldw RT'%3(%%r1),%%r22", xoperands);
8829 output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands);
8830 output_asm_insn ("bb,>=,n %%r22,30,.+16", xoperands);
8831 output_asm_insn ("depi 0,31,2,%%r22", xoperands);
8832 output_asm_insn ("ldw 4(%%sr0,%%r22),%%r19", xoperands);
8833 output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands);
8834
8835 if (TARGET_PA_20)
8836 {
8837 output_asm_insn ("bve,n (%%r22)", xoperands);
8838 nbytes += 32;
8839 }
8840 else if (TARGET_NO_SPACE_REGS)
8841 {
8842 output_asm_insn ("be,n 0(%%sr4,%%r22)", xoperands);
8843 nbytes += 32;
8844 }
8845 else
8846 {
8847 output_asm_insn ("ldsid (%%sr0,%%r22),%%r21", xoperands);
8848 output_asm_insn ("mtsp %%r21,%%sr0", xoperands);
8849 output_asm_insn ("be,n 0(%%sr0,%%r22)", xoperands);
8850 nbytes += 40;
8851 }
8852 }
8853 else if (flag_pic)
8854 {
8855 /* Add *(*(THIS + DELTA) + VCALL_OFFSET) to THIS. */
8856 output_asm_insn ("addl %%r1,%%r26,%%r26", xoperands);
8857
8858 /* Load function address into %r1. */
8859 nbytes += 16;
8860 xop[0] = xoperands[0];
8861 xop[1] = gen_rtx_REG (Pmode, 1);
8862 xop[2] = xop[1];
8863 pa_output_pic_pcrel_sequence (xop);
8864
8865 output_asm_insn ("bv,n %%r0(%%r1)", xoperands);
8866 }
8867 else
8868 {
8869 /* Load function address into %r22. */
8870 nbytes += 8;
8871 output_asm_insn ("ldil L'%0,%%r22", xoperands);
8872 output_asm_insn ("be R'%0(%%sr4,%%r22)", xoperands);
8873
8874 /* Add *(*(THIS + DELTA) + VCALL_OFFSET) to THIS. */
8875 output_asm_insn ("addl %%r1,%%r26,%%r26", xoperands);
8876 }
8877 }
8878
8879 final_end_function ();
8880
8881 if (TARGET_SOM && flag_pic && TREE_PUBLIC (function))
8882 {
8883 switch_to_section (data_section);
8884 output_asm_insn (".align 4", xoperands);
8885 ASM_OUTPUT_LABEL (file, label);
8886 output_asm_insn (".word P'%0", xoperands);
8887 }
8888
8889 current_thunk_number++;
8890 nbytes = ((nbytes + FUNCTION_BOUNDARY / BITS_PER_UNIT - 1)
8891 & ~(FUNCTION_BOUNDARY / BITS_PER_UNIT - 1));
8892 last_address += nbytes;
8893 if (old_last_address > last_address)
8894 last_address = UINT_MAX;
8895 update_total_code_bytes (nbytes);
8896 assemble_end_function (thunk_fndecl, fnname);
8897 }
8898
8899 /* Only direct calls to static functions are allowed to be sibling (tail)
8900 call optimized.
8901
8902 This restriction is necessary because some linker generated stubs will
8903 store return pointers into rp' in some cases which might clobber a
8904 live value already in rp'.
8905
8906 In a sibcall the current function and the target function share stack
8907 space. Thus if the path to the current function and the path to the
8908 target function save a value in rp', they save the value into the
8909 same stack slot, which has undesirable consequences.
8910
8911 Because of the deferred binding nature of shared libraries any function
8912 with external scope could be in a different load module and thus require
8913 rp' to be saved when calling that function. So sibcall optimizations
8914 can only be safe for static function.
8915
8916 Note that GCC never needs return value relocations, so we don't have to
8917 worry about static calls with return value relocations (which require
8918 saving rp').
8919
8920 It is safe to perform a sibcall optimization when the target function
8921 will never return. */
8922 static bool
pa_function_ok_for_sibcall(tree decl,tree exp ATTRIBUTE_UNUSED)8923 pa_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
8924 {
8925 /* Sibcalls are not ok because the arg pointer register is not a fixed
8926 register. This prevents the sibcall optimization from occurring. In
8927 addition, there are problems with stub placement using GNU ld. This
8928 is because a normal sibcall branch uses a 17-bit relocation while
8929 a regular call branch uses a 22-bit relocation. As a result, more
8930 care needs to be taken in the placement of long-branch stubs. */
8931 if (TARGET_64BIT)
8932 return false;
8933
8934 if (TARGET_PORTABLE_RUNTIME)
8935 return false;
8936
8937 /* Sibcalls are only ok within a translation unit. */
8938 return decl && targetm.binds_local_p (decl);
8939 }
8940
8941 /* ??? Addition is not commutative on the PA due to the weird implicit
8942 space register selection rules for memory addresses. Therefore, we
8943 don't consider a + b == b + a, as this might be inside a MEM. */
8944 static bool
pa_commutative_p(const_rtx x,int outer_code)8945 pa_commutative_p (const_rtx x, int outer_code)
8946 {
8947 return (COMMUTATIVE_P (x)
8948 && (TARGET_NO_SPACE_REGS
8949 || (outer_code != UNKNOWN && outer_code != MEM)
8950 || GET_CODE (x) != PLUS));
8951 }
8952
8953 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
8954 use in fmpyadd instructions. */
8955 int
pa_fmpyaddoperands(rtx * operands)8956 pa_fmpyaddoperands (rtx *operands)
8957 {
8958 machine_mode mode = GET_MODE (operands[0]);
8959
8960 /* Must be a floating point mode. */
8961 if (mode != SFmode && mode != DFmode)
8962 return 0;
8963
8964 /* All modes must be the same. */
8965 if (! (mode == GET_MODE (operands[1])
8966 && mode == GET_MODE (operands[2])
8967 && mode == GET_MODE (operands[3])
8968 && mode == GET_MODE (operands[4])
8969 && mode == GET_MODE (operands[5])))
8970 return 0;
8971
8972 /* All operands must be registers. */
8973 if (! (GET_CODE (operands[1]) == REG
8974 && GET_CODE (operands[2]) == REG
8975 && GET_CODE (operands[3]) == REG
8976 && GET_CODE (operands[4]) == REG
8977 && GET_CODE (operands[5]) == REG))
8978 return 0;
8979
8980 /* Only 2 real operands to the addition. One of the input operands must
8981 be the same as the output operand. */
8982 if (! rtx_equal_p (operands[3], operands[4])
8983 && ! rtx_equal_p (operands[3], operands[5]))
8984 return 0;
8985
8986 /* Inout operand of add cannot conflict with any operands from multiply. */
8987 if (rtx_equal_p (operands[3], operands[0])
8988 || rtx_equal_p (operands[3], operands[1])
8989 || rtx_equal_p (operands[3], operands[2]))
8990 return 0;
8991
8992 /* multiply cannot feed into addition operands. */
8993 if (rtx_equal_p (operands[4], operands[0])
8994 || rtx_equal_p (operands[5], operands[0]))
8995 return 0;
8996
8997 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */
8998 if (mode == SFmode
8999 && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS
9000 || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS
9001 || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS
9002 || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS
9003 || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS
9004 || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS))
9005 return 0;
9006
9007 /* Passed. Operands are suitable for fmpyadd. */
9008 return 1;
9009 }
9010
9011 #if !defined(USE_COLLECT2)
9012 static void
pa_asm_out_constructor(rtx symbol,int priority)9013 pa_asm_out_constructor (rtx symbol, int priority)
9014 {
9015 if (!function_label_operand (symbol, VOIDmode))
9016 pa_encode_label (symbol);
9017
9018 #ifdef CTORS_SECTION_ASM_OP
9019 default_ctor_section_asm_out_constructor (symbol, priority);
9020 #else
9021 # ifdef TARGET_ASM_NAMED_SECTION
9022 default_named_section_asm_out_constructor (symbol, priority);
9023 # else
9024 default_stabs_asm_out_constructor (symbol, priority);
9025 # endif
9026 #endif
9027 }
9028
9029 static void
pa_asm_out_destructor(rtx symbol,int priority)9030 pa_asm_out_destructor (rtx symbol, int priority)
9031 {
9032 if (!function_label_operand (symbol, VOIDmode))
9033 pa_encode_label (symbol);
9034
9035 #ifdef DTORS_SECTION_ASM_OP
9036 default_dtor_section_asm_out_destructor (symbol, priority);
9037 #else
9038 # ifdef TARGET_ASM_NAMED_SECTION
9039 default_named_section_asm_out_destructor (symbol, priority);
9040 # else
9041 default_stabs_asm_out_destructor (symbol, priority);
9042 # endif
9043 #endif
9044 }
9045 #endif
9046
9047 /* This function places uninitialized global data in the bss section.
9048 The ASM_OUTPUT_ALIGNED_BSS macro needs to be defined to call this
9049 function on the SOM port to prevent uninitialized global data from
9050 being placed in the data section. */
9051
9052 void
pa_asm_output_aligned_bss(FILE * stream,const char * name,unsigned HOST_WIDE_INT size,unsigned int align)9053 pa_asm_output_aligned_bss (FILE *stream,
9054 const char *name,
9055 unsigned HOST_WIDE_INT size,
9056 unsigned int align)
9057 {
9058 switch_to_section (bss_section);
9059
9060 #ifdef ASM_OUTPUT_TYPE_DIRECTIVE
9061 ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "object");
9062 #endif
9063
9064 #ifdef ASM_OUTPUT_SIZE_DIRECTIVE
9065 ASM_OUTPUT_SIZE_DIRECTIVE (stream, name, size);
9066 #endif
9067
9068 fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
9069 ASM_OUTPUT_LABEL (stream, name);
9070 fprintf (stream, "\t.block " HOST_WIDE_INT_PRINT_UNSIGNED"\n", size);
9071 }
9072
9073 /* Both the HP and GNU assemblers under HP-UX provide a .comm directive
9074 that doesn't allow the alignment of global common storage to be directly
9075 specified. The SOM linker aligns common storage based on the rounded
9076 value of the NUM_BYTES parameter in the .comm directive. It's not
9077 possible to use the .align directive as it doesn't affect the alignment
9078 of the label associated with a .comm directive. */
9079
9080 void
pa_asm_output_aligned_common(FILE * stream,const char * name,unsigned HOST_WIDE_INT size,unsigned int align)9081 pa_asm_output_aligned_common (FILE *stream,
9082 const char *name,
9083 unsigned HOST_WIDE_INT size,
9084 unsigned int align)
9085 {
9086 unsigned int max_common_align;
9087
9088 max_common_align = TARGET_64BIT ? 128 : (size >= 4096 ? 256 : 64);
9089 if (align > max_common_align)
9090 {
9091 /* Alignment exceeds maximum alignment for global common data. */
9092 align = max_common_align;
9093 }
9094
9095 switch_to_section (bss_section);
9096
9097 assemble_name (stream, name);
9098 fprintf (stream, "\t.comm " HOST_WIDE_INT_PRINT_UNSIGNED"\n",
9099 MAX (size, align / BITS_PER_UNIT));
9100 }
9101
9102 /* We can't use .comm for local common storage as the SOM linker effectively
9103 treats the symbol as universal and uses the same storage for local symbols
9104 with the same name in different object files. The .block directive
9105 reserves an uninitialized block of storage. However, it's not common
9106 storage. Fortunately, GCC never requests common storage with the same
9107 name in any given translation unit. */
9108
9109 void
pa_asm_output_aligned_local(FILE * stream,const char * name,unsigned HOST_WIDE_INT size,unsigned int align)9110 pa_asm_output_aligned_local (FILE *stream,
9111 const char *name,
9112 unsigned HOST_WIDE_INT size,
9113 unsigned int align)
9114 {
9115 switch_to_section (bss_section);
9116 fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
9117
9118 #ifdef LOCAL_ASM_OP
9119 fprintf (stream, "%s", LOCAL_ASM_OP);
9120 assemble_name (stream, name);
9121 fprintf (stream, "\n");
9122 #endif
9123
9124 ASM_OUTPUT_LABEL (stream, name);
9125 fprintf (stream, "\t.block " HOST_WIDE_INT_PRINT_UNSIGNED"\n", size);
9126 }
9127
9128 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
9129 use in fmpysub instructions. */
9130 int
pa_fmpysuboperands(rtx * operands)9131 pa_fmpysuboperands (rtx *operands)
9132 {
9133 machine_mode mode = GET_MODE (operands[0]);
9134
9135 /* Must be a floating point mode. */
9136 if (mode != SFmode && mode != DFmode)
9137 return 0;
9138
9139 /* All modes must be the same. */
9140 if (! (mode == GET_MODE (operands[1])
9141 && mode == GET_MODE (operands[2])
9142 && mode == GET_MODE (operands[3])
9143 && mode == GET_MODE (operands[4])
9144 && mode == GET_MODE (operands[5])))
9145 return 0;
9146
9147 /* All operands must be registers. */
9148 if (! (GET_CODE (operands[1]) == REG
9149 && GET_CODE (operands[2]) == REG
9150 && GET_CODE (operands[3]) == REG
9151 && GET_CODE (operands[4]) == REG
9152 && GET_CODE (operands[5]) == REG))
9153 return 0;
9154
9155 /* Only 2 real operands to the subtraction. Subtraction is not a commutative
9156 operation, so operands[4] must be the same as operand[3]. */
9157 if (! rtx_equal_p (operands[3], operands[4]))
9158 return 0;
9159
9160 /* multiply cannot feed into subtraction. */
9161 if (rtx_equal_p (operands[5], operands[0]))
9162 return 0;
9163
9164 /* Inout operand of sub cannot conflict with any operands from multiply. */
9165 if (rtx_equal_p (operands[3], operands[0])
9166 || rtx_equal_p (operands[3], operands[1])
9167 || rtx_equal_p (operands[3], operands[2]))
9168 return 0;
9169
9170 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */
9171 if (mode == SFmode
9172 && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS
9173 || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS
9174 || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS
9175 || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS
9176 || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS
9177 || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS))
9178 return 0;
9179
9180 /* Passed. Operands are suitable for fmpysub. */
9181 return 1;
9182 }
9183
9184 /* Return 1 if the given constant is 2, 4, or 8. These are the valid
9185 constants for a MULT embedded inside a memory address. */
9186 int
pa_mem_shadd_constant_p(int val)9187 pa_mem_shadd_constant_p (int val)
9188 {
9189 if (val == 2 || val == 4 || val == 8)
9190 return 1;
9191 else
9192 return 0;
9193 }
9194
9195 /* Return 1 if the given constant is 1, 2, or 3. These are the valid
9196 constants for shadd instructions. */
9197 int
pa_shadd_constant_p(int val)9198 pa_shadd_constant_p (int val)
9199 {
9200 if (val == 1 || val == 2 || val == 3)
9201 return 1;
9202 else
9203 return 0;
9204 }
9205
9206 /* Return TRUE if INSN branches forward. */
9207
9208 static bool
forward_branch_p(rtx_insn * insn)9209 forward_branch_p (rtx_insn *insn)
9210 {
9211 rtx lab = JUMP_LABEL (insn);
9212
9213 /* The INSN must have a jump label. */
9214 gcc_assert (lab != NULL_RTX);
9215
9216 if (INSN_ADDRESSES_SET_P ())
9217 return INSN_ADDRESSES (INSN_UID (lab)) > INSN_ADDRESSES (INSN_UID (insn));
9218
9219 while (insn)
9220 {
9221 if (insn == lab)
9222 return true;
9223 else
9224 insn = NEXT_INSN (insn);
9225 }
9226
9227 return false;
9228 }
9229
9230 /* Output an unconditional move and branch insn. */
9231
9232 const char *
pa_output_parallel_movb(rtx * operands,rtx_insn * insn)9233 pa_output_parallel_movb (rtx *operands, rtx_insn *insn)
9234 {
9235 int length = get_attr_length (insn);
9236
9237 /* These are the cases in which we win. */
9238 if (length == 4)
9239 return "mov%I1b,tr %1,%0,%2";
9240
9241 /* None of the following cases win, but they don't lose either. */
9242 if (length == 8)
9243 {
9244 if (dbr_sequence_length () == 0)
9245 {
9246 /* Nothing in the delay slot, fake it by putting the combined
9247 insn (the copy or add) in the delay slot of a bl. */
9248 if (GET_CODE (operands[1]) == CONST_INT)
9249 return "b %2\n\tldi %1,%0";
9250 else
9251 return "b %2\n\tcopy %1,%0";
9252 }
9253 else
9254 {
9255 /* Something in the delay slot, but we've got a long branch. */
9256 if (GET_CODE (operands[1]) == CONST_INT)
9257 return "ldi %1,%0\n\tb %2";
9258 else
9259 return "copy %1,%0\n\tb %2";
9260 }
9261 }
9262
9263 if (GET_CODE (operands[1]) == CONST_INT)
9264 output_asm_insn ("ldi %1,%0", operands);
9265 else
9266 output_asm_insn ("copy %1,%0", operands);
9267 return pa_output_lbranch (operands[2], insn, 1);
9268 }
9269
9270 /* Output an unconditional add and branch insn. */
9271
9272 const char *
pa_output_parallel_addb(rtx * operands,rtx_insn * insn)9273 pa_output_parallel_addb (rtx *operands, rtx_insn *insn)
9274 {
9275 int length = get_attr_length (insn);
9276
9277 /* To make life easy we want operand0 to be the shared input/output
9278 operand and operand1 to be the readonly operand. */
9279 if (operands[0] == operands[1])
9280 operands[1] = operands[2];
9281
9282 /* These are the cases in which we win. */
9283 if (length == 4)
9284 return "add%I1b,tr %1,%0,%3";
9285
9286 /* None of the following cases win, but they don't lose either. */
9287 if (length == 8)
9288 {
9289 if (dbr_sequence_length () == 0)
9290 /* Nothing in the delay slot, fake it by putting the combined
9291 insn (the copy or add) in the delay slot of a bl. */
9292 return "b %3\n\tadd%I1 %1,%0,%0";
9293 else
9294 /* Something in the delay slot, but we've got a long branch. */
9295 return "add%I1 %1,%0,%0\n\tb %3";
9296 }
9297
9298 output_asm_insn ("add%I1 %1,%0,%0", operands);
9299 return pa_output_lbranch (operands[3], insn, 1);
9300 }
9301
9302 /* We use this hook to perform a PA specific optimization which is difficult
9303 to do in earlier passes. */
9304
9305 static void
pa_reorg(void)9306 pa_reorg (void)
9307 {
9308 remove_useless_addtr_insns (1);
9309
9310 if (pa_cpu < PROCESSOR_8000)
9311 pa_combine_instructions ();
9312 }
9313
9314 /* The PA has a number of odd instructions which can perform multiple
9315 tasks at once. On first generation PA machines (PA1.0 and PA1.1)
9316 it may be profitable to combine two instructions into one instruction
9317 with two outputs. It's not profitable PA2.0 machines because the
9318 two outputs would take two slots in the reorder buffers.
9319
9320 This routine finds instructions which can be combined and combines
9321 them. We only support some of the potential combinations, and we
9322 only try common ways to find suitable instructions.
9323
9324 * addb can add two registers or a register and a small integer
9325 and jump to a nearby (+-8k) location. Normally the jump to the
9326 nearby location is conditional on the result of the add, but by
9327 using the "true" condition we can make the jump unconditional.
9328 Thus addb can perform two independent operations in one insn.
9329
9330 * movb is similar to addb in that it can perform a reg->reg
9331 or small immediate->reg copy and jump to a nearby (+-8k location).
9332
9333 * fmpyadd and fmpysub can perform a FP multiply and either an
9334 FP add or FP sub if the operands of the multiply and add/sub are
9335 independent (there are other minor restrictions). Note both
9336 the fmpy and fadd/fsub can in theory move to better spots according
9337 to data dependencies, but for now we require the fmpy stay at a
9338 fixed location.
9339
9340 * Many of the memory operations can perform pre & post updates
9341 of index registers. GCC's pre/post increment/decrement addressing
9342 is far too simple to take advantage of all the possibilities. This
9343 pass may not be suitable since those insns may not be independent.
9344
9345 * comclr can compare two ints or an int and a register, nullify
9346 the following instruction and zero some other register. This
9347 is more difficult to use as it's harder to find an insn which
9348 will generate a comclr than finding something like an unconditional
9349 branch. (conditional moves & long branches create comclr insns).
9350
9351 * Most arithmetic operations can conditionally skip the next
9352 instruction. They can be viewed as "perform this operation
9353 and conditionally jump to this nearby location" (where nearby
9354 is an insns away). These are difficult to use due to the
9355 branch length restrictions. */
9356
9357 static void
pa_combine_instructions(void)9358 pa_combine_instructions (void)
9359 {
9360 rtx_insn *anchor;
9361
9362 /* This can get expensive since the basic algorithm is on the
9363 order of O(n^2) (or worse). Only do it for -O2 or higher
9364 levels of optimization. */
9365 if (optimize < 2)
9366 return;
9367
9368 /* Walk down the list of insns looking for "anchor" insns which
9369 may be combined with "floating" insns. As the name implies,
9370 "anchor" instructions don't move, while "floating" insns may
9371 move around. */
9372 rtx par = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, NULL_RTX, NULL_RTX));
9373 rtx_insn *new_rtx = make_insn_raw (par);
9374
9375 for (anchor = get_insns (); anchor; anchor = NEXT_INSN (anchor))
9376 {
9377 enum attr_pa_combine_type anchor_attr;
9378 enum attr_pa_combine_type floater_attr;
9379
9380 /* We only care about INSNs, JUMP_INSNs, and CALL_INSNs.
9381 Also ignore any special USE insns. */
9382 if ((! NONJUMP_INSN_P (anchor) && ! JUMP_P (anchor) && ! CALL_P (anchor))
9383 || GET_CODE (PATTERN (anchor)) == USE
9384 || GET_CODE (PATTERN (anchor)) == CLOBBER)
9385 continue;
9386
9387 anchor_attr = get_attr_pa_combine_type (anchor);
9388 /* See if anchor is an insn suitable for combination. */
9389 if (anchor_attr == PA_COMBINE_TYPE_FMPY
9390 || anchor_attr == PA_COMBINE_TYPE_FADDSUB
9391 || (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
9392 && ! forward_branch_p (anchor)))
9393 {
9394 rtx_insn *floater;
9395
9396 for (floater = PREV_INSN (anchor);
9397 floater;
9398 floater = PREV_INSN (floater))
9399 {
9400 if (NOTE_P (floater)
9401 || (NONJUMP_INSN_P (floater)
9402 && (GET_CODE (PATTERN (floater)) == USE
9403 || GET_CODE (PATTERN (floater)) == CLOBBER)))
9404 continue;
9405
9406 /* Anything except a regular INSN will stop our search. */
9407 if (! NONJUMP_INSN_P (floater))
9408 {
9409 floater = NULL;
9410 break;
9411 }
9412
9413 /* See if FLOATER is suitable for combination with the
9414 anchor. */
9415 floater_attr = get_attr_pa_combine_type (floater);
9416 if ((anchor_attr == PA_COMBINE_TYPE_FMPY
9417 && floater_attr == PA_COMBINE_TYPE_FADDSUB)
9418 || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
9419 && floater_attr == PA_COMBINE_TYPE_FMPY))
9420 {
9421 /* If ANCHOR and FLOATER can be combined, then we're
9422 done with this pass. */
9423 if (pa_can_combine_p (new_rtx, anchor, floater, 0,
9424 SET_DEST (PATTERN (floater)),
9425 XEXP (SET_SRC (PATTERN (floater)), 0),
9426 XEXP (SET_SRC (PATTERN (floater)), 1)))
9427 break;
9428 }
9429
9430 else if (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
9431 && floater_attr == PA_COMBINE_TYPE_ADDMOVE)
9432 {
9433 if (GET_CODE (SET_SRC (PATTERN (floater))) == PLUS)
9434 {
9435 if (pa_can_combine_p (new_rtx, anchor, floater, 0,
9436 SET_DEST (PATTERN (floater)),
9437 XEXP (SET_SRC (PATTERN (floater)), 0),
9438 XEXP (SET_SRC (PATTERN (floater)), 1)))
9439 break;
9440 }
9441 else
9442 {
9443 if (pa_can_combine_p (new_rtx, anchor, floater, 0,
9444 SET_DEST (PATTERN (floater)),
9445 SET_SRC (PATTERN (floater)),
9446 SET_SRC (PATTERN (floater))))
9447 break;
9448 }
9449 }
9450 }
9451
9452 /* If we didn't find anything on the backwards scan try forwards. */
9453 if (!floater
9454 && (anchor_attr == PA_COMBINE_TYPE_FMPY
9455 || anchor_attr == PA_COMBINE_TYPE_FADDSUB))
9456 {
9457 for (floater = anchor; floater; floater = NEXT_INSN (floater))
9458 {
9459 if (NOTE_P (floater)
9460 || (NONJUMP_INSN_P (floater)
9461 && (GET_CODE (PATTERN (floater)) == USE
9462 || GET_CODE (PATTERN (floater)) == CLOBBER)))
9463
9464 continue;
9465
9466 /* Anything except a regular INSN will stop our search. */
9467 if (! NONJUMP_INSN_P (floater))
9468 {
9469 floater = NULL;
9470 break;
9471 }
9472
9473 /* See if FLOATER is suitable for combination with the
9474 anchor. */
9475 floater_attr = get_attr_pa_combine_type (floater);
9476 if ((anchor_attr == PA_COMBINE_TYPE_FMPY
9477 && floater_attr == PA_COMBINE_TYPE_FADDSUB)
9478 || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
9479 && floater_attr == PA_COMBINE_TYPE_FMPY))
9480 {
9481 /* If ANCHOR and FLOATER can be combined, then we're
9482 done with this pass. */
9483 if (pa_can_combine_p (new_rtx, anchor, floater, 1,
9484 SET_DEST (PATTERN (floater)),
9485 XEXP (SET_SRC (PATTERN (floater)),
9486 0),
9487 XEXP (SET_SRC (PATTERN (floater)),
9488 1)))
9489 break;
9490 }
9491 }
9492 }
9493
9494 /* FLOATER will be nonzero if we found a suitable floating
9495 insn for combination with ANCHOR. */
9496 if (floater
9497 && (anchor_attr == PA_COMBINE_TYPE_FADDSUB
9498 || anchor_attr == PA_COMBINE_TYPE_FMPY))
9499 {
9500 /* Emit the new instruction and delete the old anchor. */
9501 rtvec vtemp = gen_rtvec (2, copy_rtx (PATTERN (anchor)),
9502 copy_rtx (PATTERN (floater)));
9503 rtx temp = gen_rtx_PARALLEL (VOIDmode, vtemp);
9504 emit_insn_before (temp, anchor);
9505
9506 SET_INSN_DELETED (anchor);
9507
9508 /* Emit a special USE insn for FLOATER, then delete
9509 the floating insn. */
9510 temp = copy_rtx (PATTERN (floater));
9511 emit_insn_before (gen_rtx_USE (VOIDmode, temp), floater);
9512 delete_insn (floater);
9513
9514 continue;
9515 }
9516 else if (floater
9517 && anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH)
9518 {
9519 /* Emit the new_jump instruction and delete the old anchor. */
9520 rtvec vtemp = gen_rtvec (2, copy_rtx (PATTERN (anchor)),
9521 copy_rtx (PATTERN (floater)));
9522 rtx temp = gen_rtx_PARALLEL (VOIDmode, vtemp);
9523 temp = emit_jump_insn_before (temp, anchor);
9524
9525 JUMP_LABEL (temp) = JUMP_LABEL (anchor);
9526 SET_INSN_DELETED (anchor);
9527
9528 /* Emit a special USE insn for FLOATER, then delete
9529 the floating insn. */
9530 temp = copy_rtx (PATTERN (floater));
9531 emit_insn_before (gen_rtx_USE (VOIDmode, temp), floater);
9532 delete_insn (floater);
9533 continue;
9534 }
9535 }
9536 }
9537 }
9538
9539 static int
pa_can_combine_p(rtx_insn * new_rtx,rtx_insn * anchor,rtx_insn * floater,int reversed,rtx dest,rtx src1,rtx src2)9540 pa_can_combine_p (rtx_insn *new_rtx, rtx_insn *anchor, rtx_insn *floater,
9541 int reversed, rtx dest,
9542 rtx src1, rtx src2)
9543 {
9544 int insn_code_number;
9545 rtx_insn *start, *end;
9546
9547 /* Create a PARALLEL with the patterns of ANCHOR and
9548 FLOATER, try to recognize it, then test constraints
9549 for the resulting pattern.
9550
9551 If the pattern doesn't match or the constraints
9552 aren't met keep searching for a suitable floater
9553 insn. */
9554 XVECEXP (PATTERN (new_rtx), 0, 0) = PATTERN (anchor);
9555 XVECEXP (PATTERN (new_rtx), 0, 1) = PATTERN (floater);
9556 INSN_CODE (new_rtx) = -1;
9557 insn_code_number = recog_memoized (new_rtx);
9558 basic_block bb = BLOCK_FOR_INSN (anchor);
9559 if (insn_code_number < 0
9560 || (extract_insn (new_rtx),
9561 !constrain_operands (1, get_preferred_alternatives (new_rtx, bb))))
9562 return 0;
9563
9564 if (reversed)
9565 {
9566 start = anchor;
9567 end = floater;
9568 }
9569 else
9570 {
9571 start = floater;
9572 end = anchor;
9573 }
9574
9575 /* There's up to three operands to consider. One
9576 output and two inputs.
9577
9578 The output must not be used between FLOATER & ANCHOR
9579 exclusive. The inputs must not be set between
9580 FLOATER and ANCHOR exclusive. */
9581
9582 if (reg_used_between_p (dest, start, end))
9583 return 0;
9584
9585 if (reg_set_between_p (src1, start, end))
9586 return 0;
9587
9588 if (reg_set_between_p (src2, start, end))
9589 return 0;
9590
9591 /* If we get here, then everything is good. */
9592 return 1;
9593 }
9594
9595 /* Return nonzero if references for INSN are delayed.
9596
9597 Millicode insns are actually function calls with some special
9598 constraints on arguments and register usage.
9599
9600 Millicode calls always expect their arguments in the integer argument
9601 registers, and always return their result in %r29 (ret1). They
9602 are expected to clobber their arguments, %r1, %r29, and the return
9603 pointer which is %r31 on 32-bit and %r2 on 64-bit, and nothing else.
9604
9605 This function tells reorg that the references to arguments and
9606 millicode calls do not appear to happen until after the millicode call.
9607 This allows reorg to put insns which set the argument registers into the
9608 delay slot of the millicode call -- thus they act more like traditional
9609 CALL_INSNs.
9610
9611 Note we cannot consider side effects of the insn to be delayed because
9612 the branch and link insn will clobber the return pointer. If we happened
9613 to use the return pointer in the delay slot of the call, then we lose.
9614
9615 get_attr_type will try to recognize the given insn, so make sure to
9616 filter out things it will not accept -- SEQUENCE, USE and CLOBBER insns
9617 in particular. */
9618 int
pa_insn_refs_are_delayed(rtx_insn * insn)9619 pa_insn_refs_are_delayed (rtx_insn *insn)
9620 {
9621 return ((NONJUMP_INSN_P (insn)
9622 && GET_CODE (PATTERN (insn)) != SEQUENCE
9623 && GET_CODE (PATTERN (insn)) != USE
9624 && GET_CODE (PATTERN (insn)) != CLOBBER
9625 && get_attr_type (insn) == TYPE_MILLI));
9626 }
9627
9628 /* Promote the return value, but not the arguments. */
9629
9630 static machine_mode
pa_promote_function_mode(const_tree type ATTRIBUTE_UNUSED,machine_mode mode,int * punsignedp ATTRIBUTE_UNUSED,const_tree fntype ATTRIBUTE_UNUSED,int for_return)9631 pa_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
9632 machine_mode mode,
9633 int *punsignedp ATTRIBUTE_UNUSED,
9634 const_tree fntype ATTRIBUTE_UNUSED,
9635 int for_return)
9636 {
9637 if (for_return == 0)
9638 return mode;
9639 return promote_mode (type, mode, punsignedp);
9640 }
9641
9642 /* On the HP-PA the value is found in register(s) 28(-29), unless
9643 the mode is SF or DF. Then the value is returned in fr4 (32).
9644
9645 This must perform the same promotions as PROMOTE_MODE, else promoting
9646 return values in TARGET_PROMOTE_FUNCTION_MODE will not work correctly.
9647
9648 Small structures must be returned in a PARALLEL on PA64 in order
9649 to match the HP Compiler ABI. */
9650
9651 static rtx
pa_function_value(const_tree valtype,const_tree func ATTRIBUTE_UNUSED,bool outgoing ATTRIBUTE_UNUSED)9652 pa_function_value (const_tree valtype,
9653 const_tree func ATTRIBUTE_UNUSED,
9654 bool outgoing ATTRIBUTE_UNUSED)
9655 {
9656 machine_mode valmode;
9657
9658 if (AGGREGATE_TYPE_P (valtype)
9659 || TREE_CODE (valtype) == COMPLEX_TYPE
9660 || TREE_CODE (valtype) == VECTOR_TYPE)
9661 {
9662 HOST_WIDE_INT valsize = int_size_in_bytes (valtype);
9663
9664 /* Handle aggregates that fit exactly in a word or double word. */
9665 if (valsize == UNITS_PER_WORD || valsize == 2 * UNITS_PER_WORD)
9666 return gen_rtx_REG (TYPE_MODE (valtype), 28);
9667
9668 if (TARGET_64BIT)
9669 {
9670 /* Aggregates with a size less than or equal to 128 bits are
9671 returned in GR 28(-29). They are left justified. The pad
9672 bits are undefined. Larger aggregates are returned in
9673 memory. */
9674 rtx loc[2];
9675 int i, offset = 0;
9676 int ub = valsize <= UNITS_PER_WORD ? 1 : 2;
9677
9678 for (i = 0; i < ub; i++)
9679 {
9680 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
9681 gen_rtx_REG (DImode, 28 + i),
9682 GEN_INT (offset));
9683 offset += 8;
9684 }
9685
9686 return gen_rtx_PARALLEL (BLKmode, gen_rtvec_v (ub, loc));
9687 }
9688 else if (valsize > UNITS_PER_WORD)
9689 {
9690 /* Aggregates 5 to 8 bytes in size are returned in general
9691 registers r28-r29 in the same manner as other non
9692 floating-point objects. The data is right-justified and
9693 zero-extended to 64 bits. This is opposite to the normal
9694 justification used on big endian targets and requires
9695 special treatment. */
9696 rtx loc = gen_rtx_EXPR_LIST (VOIDmode,
9697 gen_rtx_REG (DImode, 28), const0_rtx);
9698 return gen_rtx_PARALLEL (BLKmode, gen_rtvec (1, loc));
9699 }
9700 }
9701
9702 if ((INTEGRAL_TYPE_P (valtype)
9703 && GET_MODE_BITSIZE (TYPE_MODE (valtype)) < BITS_PER_WORD)
9704 || POINTER_TYPE_P (valtype))
9705 valmode = word_mode;
9706 else
9707 valmode = TYPE_MODE (valtype);
9708
9709 if (TREE_CODE (valtype) == REAL_TYPE
9710 && !AGGREGATE_TYPE_P (valtype)
9711 && TYPE_MODE (valtype) != TFmode
9712 && !TARGET_SOFT_FLOAT)
9713 return gen_rtx_REG (valmode, 32);
9714
9715 return gen_rtx_REG (valmode, 28);
9716 }
9717
9718 /* Implement the TARGET_LIBCALL_VALUE hook. */
9719
9720 static rtx
pa_libcall_value(machine_mode mode,const_rtx fun ATTRIBUTE_UNUSED)9721 pa_libcall_value (machine_mode mode,
9722 const_rtx fun ATTRIBUTE_UNUSED)
9723 {
9724 if (! TARGET_SOFT_FLOAT
9725 && (mode == SFmode || mode == DFmode))
9726 return gen_rtx_REG (mode, 32);
9727 else
9728 return gen_rtx_REG (mode, 28);
9729 }
9730
9731 /* Implement the TARGET_FUNCTION_VALUE_REGNO_P hook. */
9732
9733 static bool
pa_function_value_regno_p(const unsigned int regno)9734 pa_function_value_regno_p (const unsigned int regno)
9735 {
9736 if (regno == 28
9737 || (! TARGET_SOFT_FLOAT && regno == 32))
9738 return true;
9739
9740 return false;
9741 }
9742
9743 /* Update the data in CUM to advance over argument ARG. */
9744
9745 static void
pa_function_arg_advance(cumulative_args_t cum_v,const function_arg_info & arg)9746 pa_function_arg_advance (cumulative_args_t cum_v,
9747 const function_arg_info &arg)
9748 {
9749 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
9750 int arg_size = pa_function_arg_size (arg.mode, arg.type);
9751
9752 cum->nargs_prototype--;
9753 cum->words += (arg_size
9754 + ((cum->words & 01)
9755 && arg.type != NULL_TREE
9756 && arg_size > 1));
9757 }
9758
9759 /* Return the location of a parameter that is passed in a register or NULL
9760 if the parameter has any component that is passed in memory.
9761
9762 This is new code and will be pushed to into the net sources after
9763 further testing.
9764
9765 ??? We might want to restructure this so that it looks more like other
9766 ports. */
9767 static rtx
pa_function_arg(cumulative_args_t cum_v,const function_arg_info & arg)9768 pa_function_arg (cumulative_args_t cum_v, const function_arg_info &arg)
9769 {
9770 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
9771 tree type = arg.type;
9772 machine_mode mode = arg.mode;
9773 int max_arg_words = (TARGET_64BIT ? 8 : 4);
9774 int alignment = 0;
9775 int arg_size;
9776 int fpr_reg_base;
9777 int gpr_reg_base;
9778 rtx retval;
9779
9780 if (arg.end_marker_p ())
9781 return NULL_RTX;
9782
9783 arg_size = pa_function_arg_size (mode, type);
9784
9785 /* If this arg would be passed partially or totally on the stack, then
9786 this routine should return zero. pa_arg_partial_bytes will
9787 handle arguments which are split between regs and stack slots if
9788 the ABI mandates split arguments. */
9789 if (!TARGET_64BIT)
9790 {
9791 /* The 32-bit ABI does not split arguments. */
9792 if (cum->words + arg_size > max_arg_words)
9793 return NULL_RTX;
9794 }
9795 else
9796 {
9797 if (arg_size > 1)
9798 alignment = cum->words & 1;
9799 if (cum->words + alignment >= max_arg_words)
9800 return NULL_RTX;
9801 }
9802
9803 /* The 32bit ABIs and the 64bit ABIs are rather different,
9804 particularly in their handling of FP registers. We might
9805 be able to cleverly share code between them, but I'm not
9806 going to bother in the hope that splitting them up results
9807 in code that is more easily understood. */
9808
9809 if (TARGET_64BIT)
9810 {
9811 /* Advance the base registers to their current locations.
9812
9813 Remember, gprs grow towards smaller register numbers while
9814 fprs grow to higher register numbers. Also remember that
9815 although FP regs are 32-bit addressable, we pretend that
9816 the registers are 64-bits wide. */
9817 gpr_reg_base = 26 - cum->words;
9818 fpr_reg_base = 32 + cum->words;
9819
9820 /* Arguments wider than one word and small aggregates need special
9821 treatment. */
9822 if (arg_size > 1
9823 || mode == BLKmode
9824 || (type && (AGGREGATE_TYPE_P (type)
9825 || TREE_CODE (type) == COMPLEX_TYPE
9826 || TREE_CODE (type) == VECTOR_TYPE)))
9827 {
9828 /* Double-extended precision (80-bit), quad-precision (128-bit)
9829 and aggregates including complex numbers are aligned on
9830 128-bit boundaries. The first eight 64-bit argument slots
9831 are associated one-to-one, with general registers r26
9832 through r19, and also with floating-point registers fr4
9833 through fr11. Arguments larger than one word are always
9834 passed in general registers.
9835
9836 Using a PARALLEL with a word mode register results in left
9837 justified data on a big-endian target. */
9838
9839 rtx loc[8];
9840 int i, offset = 0, ub = arg_size;
9841
9842 /* Align the base register. */
9843 gpr_reg_base -= alignment;
9844
9845 ub = MIN (ub, max_arg_words - cum->words - alignment);
9846 for (i = 0; i < ub; i++)
9847 {
9848 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
9849 gen_rtx_REG (DImode, gpr_reg_base),
9850 GEN_INT (offset));
9851 gpr_reg_base -= 1;
9852 offset += 8;
9853 }
9854
9855 return gen_rtx_PARALLEL (mode, gen_rtvec_v (ub, loc));
9856 }
9857 }
9858 else
9859 {
9860 /* If the argument is larger than a word, then we know precisely
9861 which registers we must use. */
9862 if (arg_size > 1)
9863 {
9864 if (cum->words)
9865 {
9866 gpr_reg_base = 23;
9867 fpr_reg_base = 38;
9868 }
9869 else
9870 {
9871 gpr_reg_base = 25;
9872 fpr_reg_base = 34;
9873 }
9874
9875 /* Structures 5 to 8 bytes in size are passed in the general
9876 registers in the same manner as other non floating-point
9877 objects. The data is right-justified and zero-extended
9878 to 64 bits. This is opposite to the normal justification
9879 used on big endian targets and requires special treatment.
9880 We now define BLOCK_REG_PADDING to pad these objects.
9881 Aggregates, complex and vector types are passed in the same
9882 manner as structures. */
9883 if (mode == BLKmode
9884 || (type && (AGGREGATE_TYPE_P (type)
9885 || TREE_CODE (type) == COMPLEX_TYPE
9886 || TREE_CODE (type) == VECTOR_TYPE)))
9887 {
9888 rtx loc = gen_rtx_EXPR_LIST (VOIDmode,
9889 gen_rtx_REG (DImode, gpr_reg_base),
9890 const0_rtx);
9891 return gen_rtx_PARALLEL (BLKmode, gen_rtvec (1, loc));
9892 }
9893 }
9894 else
9895 {
9896 /* We have a single word (32 bits). A simple computation
9897 will get us the register #s we need. */
9898 gpr_reg_base = 26 - cum->words;
9899 fpr_reg_base = 32 + 2 * cum->words;
9900 }
9901 }
9902
9903 /* Determine if the argument needs to be passed in both general and
9904 floating point registers. */
9905 if (((TARGET_PORTABLE_RUNTIME || TARGET_64BIT || TARGET_ELF32)
9906 /* If we are doing soft-float with portable runtime, then there
9907 is no need to worry about FP regs. */
9908 && !TARGET_SOFT_FLOAT
9909 /* The parameter must be some kind of scalar float, else we just
9910 pass it in integer registers. */
9911 && GET_MODE_CLASS (mode) == MODE_FLOAT
9912 /* The target function must not have a prototype. */
9913 && cum->nargs_prototype <= 0
9914 /* libcalls do not need to pass items in both FP and general
9915 registers. */
9916 && type != NULL_TREE
9917 /* All this hair applies to "outgoing" args only. This includes
9918 sibcall arguments setup with FUNCTION_INCOMING_ARG. */
9919 && !cum->incoming)
9920 /* Also pass outgoing floating arguments in both registers in indirect
9921 calls with the 32 bit ABI and the HP assembler since there is no
9922 way to the specify argument locations in static functions. */
9923 || (!TARGET_64BIT
9924 && !TARGET_GAS
9925 && !cum->incoming
9926 && cum->indirect
9927 && GET_MODE_CLASS (mode) == MODE_FLOAT))
9928 {
9929 retval
9930 = gen_rtx_PARALLEL
9931 (mode,
9932 gen_rtvec (2,
9933 gen_rtx_EXPR_LIST (VOIDmode,
9934 gen_rtx_REG (mode, fpr_reg_base),
9935 const0_rtx),
9936 gen_rtx_EXPR_LIST (VOIDmode,
9937 gen_rtx_REG (mode, gpr_reg_base),
9938 const0_rtx)));
9939 }
9940 else
9941 {
9942 /* See if we should pass this parameter in a general register. */
9943 if (TARGET_SOFT_FLOAT
9944 /* Indirect calls in the normal 32bit ABI require all arguments
9945 to be passed in general registers. */
9946 || (!TARGET_PORTABLE_RUNTIME
9947 && !TARGET_64BIT
9948 && !TARGET_ELF32
9949 && cum->indirect)
9950 /* If the parameter is not a scalar floating-point parameter,
9951 then it belongs in GPRs. */
9952 || GET_MODE_CLASS (mode) != MODE_FLOAT
9953 /* Structure with single SFmode field belongs in GPR. */
9954 || (type && AGGREGATE_TYPE_P (type)))
9955 retval = gen_rtx_REG (mode, gpr_reg_base);
9956 else
9957 retval = gen_rtx_REG (mode, fpr_reg_base);
9958 }
9959 return retval;
9960 }
9961
9962 /* Arguments larger than one word are double word aligned. */
9963
9964 static unsigned int
pa_function_arg_boundary(machine_mode mode,const_tree type)9965 pa_function_arg_boundary (machine_mode mode, const_tree type)
9966 {
9967 bool singleword = (type
9968 ? (integer_zerop (TYPE_SIZE (type))
9969 || !TREE_CONSTANT (TYPE_SIZE (type))
9970 || int_size_in_bytes (type) <= UNITS_PER_WORD)
9971 : GET_MODE_SIZE (mode) <= UNITS_PER_WORD);
9972
9973 return singleword ? PARM_BOUNDARY : MAX_PARM_BOUNDARY;
9974 }
9975
9976 /* If this arg would be passed totally in registers or totally on the stack,
9977 then this routine should return zero. */
9978
9979 static int
pa_arg_partial_bytes(cumulative_args_t cum_v,const function_arg_info & arg)9980 pa_arg_partial_bytes (cumulative_args_t cum_v, const function_arg_info &arg)
9981 {
9982 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
9983 unsigned int max_arg_words = 8;
9984 unsigned int offset = 0;
9985
9986 if (!TARGET_64BIT)
9987 return 0;
9988
9989 if (pa_function_arg_size (arg.mode, arg.type) > 1 && (cum->words & 1))
9990 offset = 1;
9991
9992 if (cum->words + offset + pa_function_arg_size (arg.mode, arg.type)
9993 <= max_arg_words)
9994 /* Arg fits fully into registers. */
9995 return 0;
9996 else if (cum->words + offset >= max_arg_words)
9997 /* Arg fully on the stack. */
9998 return 0;
9999 else
10000 /* Arg is split. */
10001 return (max_arg_words - cum->words - offset) * UNITS_PER_WORD;
10002 }
10003
10004
10005 /* A get_unnamed_section callback for switching to the text section.
10006
10007 This function is only used with SOM. Because we don't support
10008 named subspaces, we can only create a new subspace or switch back
10009 to the default text subspace. */
10010
10011 static void
som_output_text_section_asm_op(const char * data ATTRIBUTE_UNUSED)10012 som_output_text_section_asm_op (const char *data ATTRIBUTE_UNUSED)
10013 {
10014 gcc_assert (TARGET_SOM);
10015 if (TARGET_GAS)
10016 {
10017 if (cfun && cfun->machine && !cfun->machine->in_nsubspa)
10018 {
10019 /* We only want to emit a .nsubspa directive once at the
10020 start of the function. */
10021 cfun->machine->in_nsubspa = 1;
10022
10023 /* Create a new subspace for the text. This provides
10024 better stub placement and one-only functions. */
10025 if (cfun->decl
10026 && DECL_ONE_ONLY (cfun->decl)
10027 && !DECL_WEAK (cfun->decl))
10028 {
10029 output_section_asm_op ("\t.SPACE $TEXT$\n"
10030 "\t.NSUBSPA $CODE$,QUAD=0,ALIGN=8,"
10031 "ACCESS=44,SORT=24,COMDAT");
10032 return;
10033 }
10034 }
10035 else
10036 {
10037 /* There isn't a current function or the body of the current
10038 function has been completed. So, we are changing to the
10039 text section to output debugging information. Thus, we
10040 need to forget that we are in the text section so that
10041 varasm.c will call us when text_section is selected again. */
10042 gcc_assert (!cfun || !cfun->machine
10043 || cfun->machine->in_nsubspa == 2);
10044 in_section = NULL;
10045 }
10046 output_section_asm_op ("\t.SPACE $TEXT$\n\t.NSUBSPA $CODE$");
10047 return;
10048 }
10049 output_section_asm_op ("\t.SPACE $TEXT$\n\t.SUBSPA $CODE$");
10050 }
10051
10052 /* A get_unnamed_section callback for switching to comdat data
10053 sections. This function is only used with SOM. */
10054
10055 static void
som_output_comdat_data_section_asm_op(const char * data)10056 som_output_comdat_data_section_asm_op (const char *data)
10057 {
10058 in_section = NULL;
10059 output_section_asm_op (data);
10060 }
10061
10062 /* Implement TARGET_ASM_INIT_SECTIONS. */
10063
10064 static void
pa_som_asm_init_sections(void)10065 pa_som_asm_init_sections (void)
10066 {
10067 text_section
10068 = get_unnamed_section (0, som_output_text_section_asm_op, NULL);
10069
10070 /* SOM puts readonly data in the default $LIT$ subspace when PIC code
10071 is not being generated. */
10072 som_readonly_data_section
10073 = get_unnamed_section (0, output_section_asm_op,
10074 "\t.SPACE $TEXT$\n\t.SUBSPA $LIT$");
10075
10076 /* When secondary definitions are not supported, SOM makes readonly
10077 data one-only by creating a new $LIT$ subspace in $TEXT$ with
10078 the comdat flag. */
10079 som_one_only_readonly_data_section
10080 = get_unnamed_section (0, som_output_comdat_data_section_asm_op,
10081 "\t.SPACE $TEXT$\n"
10082 "\t.NSUBSPA $LIT$,QUAD=0,ALIGN=8,"
10083 "ACCESS=0x2c,SORT=16,COMDAT");
10084
10085
10086 /* When secondary definitions are not supported, SOM makes data one-only
10087 by creating a new $DATA$ subspace in $PRIVATE$ with the comdat flag. */
10088 som_one_only_data_section
10089 = get_unnamed_section (SECTION_WRITE,
10090 som_output_comdat_data_section_asm_op,
10091 "\t.SPACE $PRIVATE$\n"
10092 "\t.NSUBSPA $DATA$,QUAD=1,ALIGN=8,"
10093 "ACCESS=31,SORT=24,COMDAT");
10094
10095 if (flag_tm)
10096 som_tm_clone_table_section
10097 = get_unnamed_section (0, output_section_asm_op,
10098 "\t.SPACE $PRIVATE$\n\t.SUBSPA $TM_CLONE_TABLE$");
10099
10100 /* HPUX ld generates incorrect GOT entries for "T" fixups which
10101 reference data within the $TEXT$ space (for example constant
10102 strings in the $LIT$ subspace).
10103
10104 The assemblers (GAS and HP as) both have problems with handling
10105 the difference of two symbols. This is the other correct way to
10106 reference constant data during PIC code generation.
10107
10108 Thus, we can't put constant data needing relocation in the $TEXT$
10109 space during PIC generation.
10110
10111 Previously, we placed all constant data into the $DATA$ subspace
10112 when generating PIC code. This reduces sharing, but it works
10113 correctly. Now we rely on pa_reloc_rw_mask() for section selection.
10114 This puts constant data not needing relocation into the $TEXT$ space. */
10115 readonly_data_section = som_readonly_data_section;
10116
10117 /* We must not have a reference to an external symbol defined in a
10118 shared library in a readonly section, else the SOM linker will
10119 complain.
10120
10121 So, we force exception information into the data section. */
10122 exception_section = data_section;
10123 }
10124
10125 /* Implement TARGET_ASM_TM_CLONE_TABLE_SECTION. */
10126
10127 static section *
pa_som_tm_clone_table_section(void)10128 pa_som_tm_clone_table_section (void)
10129 {
10130 return som_tm_clone_table_section;
10131 }
10132
10133 /* On hpux10, the linker will give an error if we have a reference
10134 in the read-only data section to a symbol defined in a shared
10135 library. Therefore, expressions that might require a reloc
10136 cannot be placed in the read-only data section. */
10137
10138 static section *
pa_select_section(tree exp,int reloc,unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)10139 pa_select_section (tree exp, int reloc,
10140 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
10141 {
10142 if (TREE_CODE (exp) == VAR_DECL
10143 && TREE_READONLY (exp)
10144 && !TREE_THIS_VOLATILE (exp)
10145 && DECL_INITIAL (exp)
10146 && (DECL_INITIAL (exp) == error_mark_node
10147 || TREE_CONSTANT (DECL_INITIAL (exp)))
10148 && !(reloc & pa_reloc_rw_mask ()))
10149 {
10150 if (TARGET_SOM
10151 && DECL_ONE_ONLY (exp)
10152 && !DECL_WEAK (exp))
10153 return som_one_only_readonly_data_section;
10154 else
10155 return readonly_data_section;
10156 }
10157 else if (CONSTANT_CLASS_P (exp)
10158 && !(reloc & pa_reloc_rw_mask ()))
10159 return readonly_data_section;
10160 else if (TARGET_SOM
10161 && TREE_CODE (exp) == VAR_DECL
10162 && DECL_ONE_ONLY (exp)
10163 && !DECL_WEAK (exp))
10164 return som_one_only_data_section;
10165 else
10166 return data_section;
10167 }
10168
10169 /* Implement pa_elf_select_rtx_section. If X is a function label operand
10170 and the function is in a COMDAT group, place the plabel reference in the
10171 .data.rel.ro.local section. The linker ignores references to symbols in
10172 discarded sections from this section. */
10173
10174 static section *
pa_elf_select_rtx_section(machine_mode mode,rtx x,unsigned HOST_WIDE_INT align)10175 pa_elf_select_rtx_section (machine_mode mode, rtx x,
10176 unsigned HOST_WIDE_INT align)
10177 {
10178 if (function_label_operand (x, VOIDmode))
10179 {
10180 tree decl = SYMBOL_REF_DECL (x);
10181
10182 if (!decl || (DECL_P (decl) && DECL_COMDAT_GROUP (decl)))
10183 return get_named_section (NULL, ".data.rel.ro.local", 1);
10184 }
10185
10186 return default_elf_select_rtx_section (mode, x, align);
10187 }
10188
10189 /* Implement pa_reloc_rw_mask. */
10190
10191 static int
pa_reloc_rw_mask(void)10192 pa_reloc_rw_mask (void)
10193 {
10194 if (flag_pic || (TARGET_SOM && !TARGET_HPUX_11))
10195 return 3;
10196
10197 /* HP linker does not support global relocs in readonly memory. */
10198 return TARGET_SOM ? 2 : 0;
10199 }
10200
10201 static void
pa_globalize_label(FILE * stream,const char * name)10202 pa_globalize_label (FILE *stream, const char *name)
10203 {
10204 /* We only handle DATA objects here, functions are globalized in
10205 ASM_DECLARE_FUNCTION_NAME. */
10206 if (! FUNCTION_NAME_P (name))
10207 {
10208 fputs ("\t.EXPORT ", stream);
10209 assemble_name (stream, name);
10210 fputs (",DATA\n", stream);
10211 }
10212 }
10213
10214 /* Worker function for TARGET_STRUCT_VALUE_RTX. */
10215
10216 static rtx
pa_struct_value_rtx(tree fntype ATTRIBUTE_UNUSED,int incoming ATTRIBUTE_UNUSED)10217 pa_struct_value_rtx (tree fntype ATTRIBUTE_UNUSED,
10218 int incoming ATTRIBUTE_UNUSED)
10219 {
10220 return gen_rtx_REG (Pmode, PA_STRUCT_VALUE_REGNUM);
10221 }
10222
10223 /* Worker function for TARGET_RETURN_IN_MEMORY. */
10224
10225 bool
pa_return_in_memory(const_tree type,const_tree fntype ATTRIBUTE_UNUSED)10226 pa_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
10227 {
10228 /* SOM ABI says that objects larger than 64 bits are returned in memory.
10229 PA64 ABI says that objects larger than 128 bits are returned in memory.
10230 Note, int_size_in_bytes can return -1 if the size of the object is
10231 variable or larger than the maximum value that can be expressed as
10232 a HOST_WIDE_INT. It can also return zero for an empty type. The
10233 simplest way to handle variable and empty types is to pass them in
10234 memory. This avoids problems in defining the boundaries of argument
10235 slots, allocating registers, etc. */
10236 return (int_size_in_bytes (type) > (TARGET_64BIT ? 16 : 8)
10237 || int_size_in_bytes (type) <= 0);
10238 }
10239
10240 /* Structure to hold declaration and name of external symbols that are
10241 emitted by GCC. We generate a vector of these symbols and output them
10242 at the end of the file if and only if SYMBOL_REF_REFERENCED_P is true.
10243 This avoids putting out names that are never really used. */
10244
10245 typedef struct GTY(()) extern_symbol
10246 {
10247 tree decl;
10248 const char *name;
10249 } extern_symbol;
10250
10251 /* Define gc'd vector type for extern_symbol. */
10252
10253 /* Vector of extern_symbol pointers. */
10254 static GTY(()) vec<extern_symbol, va_gc> *extern_symbols;
10255
10256 #ifdef ASM_OUTPUT_EXTERNAL_REAL
10257 /* Mark DECL (name NAME) as an external reference (assembler output
10258 file FILE). This saves the names to output at the end of the file
10259 if actually referenced. */
10260
10261 void
pa_hpux_asm_output_external(FILE * file,tree decl,const char * name)10262 pa_hpux_asm_output_external (FILE *file, tree decl, const char *name)
10263 {
10264 gcc_assert (file == asm_out_file);
10265 extern_symbol p = {decl, name};
10266 vec_safe_push (extern_symbols, p);
10267 }
10268 #endif
10269
10270 /* Output text required at the end of an assembler file.
10271 This includes deferred plabels and .import directives for
10272 all external symbols that were actually referenced. */
10273
10274 static void
pa_file_end(void)10275 pa_file_end (void)
10276 {
10277 #ifdef ASM_OUTPUT_EXTERNAL_REAL
10278 unsigned int i;
10279 extern_symbol *p;
10280
10281 if (!NO_DEFERRED_PROFILE_COUNTERS)
10282 output_deferred_profile_counters ();
10283 #endif
10284
10285 output_deferred_plabels ();
10286
10287 #ifdef ASM_OUTPUT_EXTERNAL_REAL
10288 for (i = 0; vec_safe_iterate (extern_symbols, i, &p); i++)
10289 {
10290 tree decl = p->decl;
10291
10292 if (!TREE_ASM_WRITTEN (decl)
10293 && SYMBOL_REF_REFERENCED_P (XEXP (DECL_RTL (decl), 0)))
10294 ASM_OUTPUT_EXTERNAL_REAL (asm_out_file, decl, p->name);
10295 }
10296
10297 vec_free (extern_symbols);
10298 #endif
10299
10300 if (NEED_INDICATE_EXEC_STACK)
10301 file_end_indicate_exec_stack ();
10302 }
10303
10304 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. */
10305
10306 static bool
pa_can_change_mode_class(machine_mode from,machine_mode to,reg_class_t rclass)10307 pa_can_change_mode_class (machine_mode from, machine_mode to,
10308 reg_class_t rclass)
10309 {
10310 if (from == to)
10311 return true;
10312
10313 if (GET_MODE_SIZE (from) == GET_MODE_SIZE (to))
10314 return true;
10315
10316 /* Reject changes to/from modes with zero size. */
10317 if (!GET_MODE_SIZE (from) || !GET_MODE_SIZE (to))
10318 return false;
10319
10320 /* Reject changes to/from complex and vector modes. */
10321 if (COMPLEX_MODE_P (from) || VECTOR_MODE_P (from)
10322 || COMPLEX_MODE_P (to) || VECTOR_MODE_P (to))
10323 return false;
10324
10325 /* There is no way to load QImode or HImode values directly from memory
10326 to a FP register. SImode loads to the FP registers are not zero
10327 extended. On the 64-bit target, this conflicts with the definition
10328 of LOAD_EXTEND_OP. Thus, we reject all mode changes in the FP registers
10329 except for DImode to SImode on the 64-bit target. It is handled by
10330 register renaming in pa_print_operand. */
10331 if (MAYBE_FP_REG_CLASS_P (rclass))
10332 return TARGET_64BIT && from == DImode && to == SImode;
10333
10334 /* TARGET_HARD_REGNO_MODE_OK places modes with sizes larger than a word
10335 in specific sets of registers. Thus, we cannot allow changing
10336 to a larger mode when it's larger than a word. */
10337 if (GET_MODE_SIZE (to) > UNITS_PER_WORD
10338 && GET_MODE_SIZE (to) > GET_MODE_SIZE (from))
10339 return false;
10340
10341 return true;
10342 }
10343
10344 /* Implement TARGET_MODES_TIEABLE_P.
10345
10346 We should return FALSE for QImode and HImode because these modes
10347 are not ok in the floating-point registers. However, this prevents
10348 tieing these modes to SImode and DImode in the general registers.
10349 So, this isn't a good idea. We rely on TARGET_HARD_REGNO_MODE_OK and
10350 TARGET_CAN_CHANGE_MODE_CLASS to prevent these modes from being used
10351 in the floating-point registers. */
10352
10353 static bool
pa_modes_tieable_p(machine_mode mode1,machine_mode mode2)10354 pa_modes_tieable_p (machine_mode mode1, machine_mode mode2)
10355 {
10356 /* Don't tie modes in different classes. */
10357 if (GET_MODE_CLASS (mode1) != GET_MODE_CLASS (mode2))
10358 return false;
10359
10360 return true;
10361 }
10362
10363
10364 /* Length in units of the trampoline instruction code. */
10365
10366 #define TRAMPOLINE_CODE_SIZE (TARGET_64BIT ? 24 : (TARGET_PA_20 ? 36 : 48))
10367
10368
10369 /* Output assembler code for a block containing the constant parts
10370 of a trampoline, leaving space for the variable parts.\
10371
10372 The trampoline sets the static chain pointer to STATIC_CHAIN_REGNUM
10373 and then branches to the specified routine.
10374
10375 This code template is copied from text segment to stack location
10376 and then patched with pa_trampoline_init to contain valid values,
10377 and then entered as a subroutine.
10378
10379 It is best to keep this as small as possible to avoid having to
10380 flush multiple lines in the cache. */
10381
10382 static void
pa_asm_trampoline_template(FILE * f)10383 pa_asm_trampoline_template (FILE *f)
10384 {
10385 if (!TARGET_64BIT)
10386 {
10387 if (TARGET_PA_20)
10388 {
10389 fputs ("\tmfia %r20\n", f);
10390 fputs ("\tldw 48(%r20),%r22\n", f);
10391 fputs ("\tcopy %r22,%r21\n", f);
10392 fputs ("\tbb,>=,n %r22,30,.+16\n", f);
10393 fputs ("\tdepwi 0,31,2,%r22\n", f);
10394 fputs ("\tldw 0(%r22),%r21\n", f);
10395 fputs ("\tldw 4(%r22),%r19\n", f);
10396 fputs ("\tbve (%r21)\n", f);
10397 fputs ("\tldw 52(%r1),%r29\n", f);
10398 fputs ("\t.word 0\n", f);
10399 fputs ("\t.word 0\n", f);
10400 fputs ("\t.word 0\n", f);
10401 }
10402 else
10403 {
10404 if (ASSEMBLER_DIALECT == 0)
10405 {
10406 fputs ("\tbl .+8,%r20\n", f);
10407 fputs ("\tdepi 0,31,2,%r20\n", f);
10408 }
10409 else
10410 {
10411 fputs ("\tb,l .+8,%r20\n", f);
10412 fputs ("\tdepwi 0,31,2,%r20\n", f);
10413 }
10414 fputs ("\tldw 40(%r20),%r22\n", f);
10415 fputs ("\tcopy %r22,%r21\n", f);
10416 fputs ("\tbb,>=,n %r22,30,.+16\n", f);
10417 if (ASSEMBLER_DIALECT == 0)
10418 fputs ("\tdepi 0,31,2,%r22\n", f);
10419 else
10420 fputs ("\tdepwi 0,31,2,%r22\n", f);
10421 fputs ("\tldw 0(%r22),%r21\n", f);
10422 fputs ("\tldw 4(%r22),%r19\n", f);
10423 fputs ("\tldsid (%r21),%r1\n", f);
10424 fputs ("\tmtsp %r1,%sr0\n", f);
10425 fputs ("\tbe 0(%sr0,%r21)\n", f);
10426 fputs ("\tldw 44(%r20),%r29\n", f);
10427 }
10428 fputs ("\t.word 0\n", f);
10429 fputs ("\t.word 0\n", f);
10430 fputs ("\t.word 0\n", f);
10431 fputs ("\t.word 0\n", f);
10432 }
10433 else
10434 {
10435 fputs ("\t.dword 0\n", f);
10436 fputs ("\t.dword 0\n", f);
10437 fputs ("\t.dword 0\n", f);
10438 fputs ("\t.dword 0\n", f);
10439 fputs ("\tmfia %r31\n", f);
10440 fputs ("\tldd 24(%r31),%r27\n", f);
10441 fputs ("\tldd 32(%r31),%r31\n", f);
10442 fputs ("\tldd 16(%r27),%r1\n", f);
10443 fputs ("\tbve (%r1)\n", f);
10444 fputs ("\tldd 24(%r27),%r27\n", f);
10445 fputs ("\t.dword 0 ; fptr\n", f);
10446 fputs ("\t.dword 0 ; static link\n", f);
10447 }
10448 }
10449
10450 /* Emit RTL insns to initialize the variable parts of a trampoline.
10451 FNADDR is an RTX for the address of the function's pure code.
10452 CXT is an RTX for the static chain value for the function.
10453
10454 Move the function address to the trampoline template at offset 48.
10455 Move the static chain value to trampoline template at offset 52.
10456 Move the trampoline address to trampoline template at offset 56.
10457 Move r19 to trampoline template at offset 60. The latter two
10458 words create a plabel for the indirect call to the trampoline.
10459
10460 A similar sequence is used for the 64-bit port but the plabel is
10461 at the beginning of the trampoline.
10462
10463 Finally, the cache entries for the trampoline code are flushed.
10464 This is necessary to ensure that the trampoline instruction sequence
10465 is written to memory prior to any attempts at prefetching the code
10466 sequence. */
10467
10468 static void
pa_trampoline_init(rtx m_tramp,tree fndecl,rtx chain_value)10469 pa_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
10470 {
10471 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
10472 rtx start_addr = gen_reg_rtx (Pmode);
10473 rtx end_addr = gen_reg_rtx (Pmode);
10474 rtx line_length = gen_reg_rtx (Pmode);
10475 rtx r_tramp, tmp;
10476
10477 emit_block_move (m_tramp, assemble_trampoline_template (),
10478 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
10479 r_tramp = force_reg (Pmode, XEXP (m_tramp, 0));
10480
10481 if (!TARGET_64BIT)
10482 {
10483 tmp = adjust_address (m_tramp, Pmode, 48);
10484 emit_move_insn (tmp, fnaddr);
10485 tmp = adjust_address (m_tramp, Pmode, 52);
10486 emit_move_insn (tmp, chain_value);
10487
10488 /* Create a fat pointer for the trampoline. */
10489 tmp = adjust_address (m_tramp, Pmode, 56);
10490 emit_move_insn (tmp, r_tramp);
10491 tmp = adjust_address (m_tramp, Pmode, 60);
10492 emit_move_insn (tmp, gen_rtx_REG (Pmode, 19));
10493
10494 /* fdc and fic only use registers for the address to flush,
10495 they do not accept integer displacements. We align the
10496 start and end addresses to the beginning of their respective
10497 cache lines to minimize the number of lines flushed. */
10498 emit_insn (gen_andsi3 (start_addr, r_tramp,
10499 GEN_INT (-MIN_CACHELINE_SIZE)));
10500 tmp = force_reg (Pmode, plus_constant (Pmode, r_tramp,
10501 TRAMPOLINE_CODE_SIZE-1));
10502 emit_insn (gen_andsi3 (end_addr, tmp,
10503 GEN_INT (-MIN_CACHELINE_SIZE)));
10504 emit_move_insn (line_length, GEN_INT (MIN_CACHELINE_SIZE));
10505 emit_insn (gen_dcacheflushsi (start_addr, end_addr, line_length));
10506 emit_insn (gen_icacheflushsi (start_addr, end_addr, line_length,
10507 gen_reg_rtx (Pmode),
10508 gen_reg_rtx (Pmode)));
10509 }
10510 else
10511 {
10512 tmp = adjust_address (m_tramp, Pmode, 56);
10513 emit_move_insn (tmp, fnaddr);
10514 tmp = adjust_address (m_tramp, Pmode, 64);
10515 emit_move_insn (tmp, chain_value);
10516
10517 /* Create a fat pointer for the trampoline. */
10518 tmp = adjust_address (m_tramp, Pmode, 16);
10519 emit_move_insn (tmp, force_reg (Pmode, plus_constant (Pmode,
10520 r_tramp, 32)));
10521 tmp = adjust_address (m_tramp, Pmode, 24);
10522 emit_move_insn (tmp, gen_rtx_REG (Pmode, 27));
10523
10524 /* fdc and fic only use registers for the address to flush,
10525 they do not accept integer displacements. We align the
10526 start and end addresses to the beginning of their respective
10527 cache lines to minimize the number of lines flushed. */
10528 tmp = force_reg (Pmode, plus_constant (Pmode, r_tramp, 32));
10529 emit_insn (gen_anddi3 (start_addr, tmp,
10530 GEN_INT (-MIN_CACHELINE_SIZE)));
10531 tmp = force_reg (Pmode, plus_constant (Pmode, tmp,
10532 TRAMPOLINE_CODE_SIZE - 1));
10533 emit_insn (gen_anddi3 (end_addr, tmp,
10534 GEN_INT (-MIN_CACHELINE_SIZE)));
10535 emit_move_insn (line_length, GEN_INT (MIN_CACHELINE_SIZE));
10536 emit_insn (gen_dcacheflushdi (start_addr, end_addr, line_length));
10537 emit_insn (gen_icacheflushdi (start_addr, end_addr, line_length,
10538 gen_reg_rtx (Pmode),
10539 gen_reg_rtx (Pmode)));
10540 }
10541
10542 #ifdef HAVE_ENABLE_EXECUTE_STACK
10543 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
10544 LCT_NORMAL, VOIDmode, XEXP (m_tramp, 0), Pmode);
10545 #endif
10546 }
10547
10548 /* Perform any machine-specific adjustment in the address of the trampoline.
10549 ADDR contains the address that was passed to pa_trampoline_init.
10550 Adjust the trampoline address to point to the plabel at offset 56. */
10551
10552 static rtx
pa_trampoline_adjust_address(rtx addr)10553 pa_trampoline_adjust_address (rtx addr)
10554 {
10555 if (!TARGET_64BIT)
10556 addr = memory_address (Pmode, plus_constant (Pmode, addr, 58));
10557 return addr;
10558 }
10559
10560 static rtx
pa_delegitimize_address(rtx orig_x)10561 pa_delegitimize_address (rtx orig_x)
10562 {
10563 rtx x = delegitimize_mem_from_attrs (orig_x);
10564
10565 if (GET_CODE (x) == LO_SUM
10566 && GET_CODE (XEXP (x, 1)) == UNSPEC
10567 && XINT (XEXP (x, 1), 1) == UNSPEC_DLTIND14R)
10568 return gen_const_mem (Pmode, XVECEXP (XEXP (x, 1), 0, 0));
10569 return x;
10570 }
10571
10572 static rtx
pa_internal_arg_pointer(void)10573 pa_internal_arg_pointer (void)
10574 {
10575 /* The argument pointer and the hard frame pointer are the same in
10576 the 32-bit runtime, so we don't need a copy. */
10577 if (TARGET_64BIT)
10578 return copy_to_reg (virtual_incoming_args_rtx);
10579 else
10580 return virtual_incoming_args_rtx;
10581 }
10582
10583 /* Given FROM and TO register numbers, say whether this elimination is allowed.
10584 Frame pointer elimination is automatically handled. */
10585
10586 static bool
pa_can_eliminate(const int from,const int to)10587 pa_can_eliminate (const int from, const int to)
10588 {
10589 /* The argument cannot be eliminated in the 64-bit runtime. */
10590 if (TARGET_64BIT && from == ARG_POINTER_REGNUM)
10591 return false;
10592
10593 return (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
10594 ? ! frame_pointer_needed
10595 : true);
10596 }
10597
10598 /* Define the offset between two registers, FROM to be eliminated and its
10599 replacement TO, at the start of a routine. */
10600 HOST_WIDE_INT
pa_initial_elimination_offset(int from,int to)10601 pa_initial_elimination_offset (int from, int to)
10602 {
10603 HOST_WIDE_INT offset;
10604
10605 if ((from == HARD_FRAME_POINTER_REGNUM || from == FRAME_POINTER_REGNUM)
10606 && to == STACK_POINTER_REGNUM)
10607 offset = -pa_compute_frame_size (get_frame_size (), 0);
10608 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
10609 offset = 0;
10610 else
10611 gcc_unreachable ();
10612
10613 return offset;
10614 }
10615
10616 static void
pa_conditional_register_usage(void)10617 pa_conditional_register_usage (void)
10618 {
10619 int i;
10620
10621 if (!TARGET_64BIT && !TARGET_PA_11)
10622 {
10623 for (i = 56; i <= FP_REG_LAST; i++)
10624 fixed_regs[i] = call_used_regs[i] = 1;
10625 for (i = 33; i < 56; i += 2)
10626 fixed_regs[i] = call_used_regs[i] = 1;
10627 }
10628 if (TARGET_SOFT_FLOAT)
10629 {
10630 for (i = FP_REG_FIRST; i <= FP_REG_LAST; i++)
10631 fixed_regs[i] = call_used_regs[i] = 1;
10632 }
10633 if (flag_pic)
10634 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
10635 }
10636
10637 /* Target hook for c_mode_for_suffix. */
10638
10639 static machine_mode
pa_c_mode_for_suffix(char suffix)10640 pa_c_mode_for_suffix (char suffix)
10641 {
10642 if (HPUX_LONG_DOUBLE_LIBRARY)
10643 {
10644 if (suffix == 'q')
10645 return TFmode;
10646 }
10647
10648 return VOIDmode;
10649 }
10650
10651 /* Target hook for function_section. */
10652
10653 static section *
pa_function_section(tree decl,enum node_frequency freq,bool startup,bool exit)10654 pa_function_section (tree decl, enum node_frequency freq,
10655 bool startup, bool exit)
10656 {
10657 /* Put functions in text section if target doesn't have named sections. */
10658 if (!targetm_common.have_named_sections)
10659 return text_section;
10660
10661 /* Force nested functions into the same section as the containing
10662 function. */
10663 if (decl
10664 && DECL_SECTION_NAME (decl) == NULL
10665 && DECL_CONTEXT (decl) != NULL_TREE
10666 && TREE_CODE (DECL_CONTEXT (decl)) == FUNCTION_DECL
10667 && DECL_SECTION_NAME (DECL_CONTEXT (decl)) == NULL)
10668 return function_section (DECL_CONTEXT (decl));
10669
10670 /* Otherwise, use the default function section. */
10671 return default_function_section (decl, freq, startup, exit);
10672 }
10673
10674 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
10675
10676 In 64-bit mode, we reject CONST_DOUBLES. We also reject CONST_INTS
10677 that need more than three instructions to load prior to reload. This
10678 limit is somewhat arbitrary. It takes three instructions to load a
10679 CONST_INT from memory but two are memory accesses. It may be better
10680 to increase the allowed range for CONST_INTS. We may also be able
10681 to handle CONST_DOUBLES. */
10682
10683 static bool
pa_legitimate_constant_p(machine_mode mode,rtx x)10684 pa_legitimate_constant_p (machine_mode mode, rtx x)
10685 {
10686 if (GET_MODE_CLASS (mode) == MODE_FLOAT && x != CONST0_RTX (mode))
10687 return false;
10688
10689 if (!NEW_HP_ASSEMBLER && !TARGET_GAS && GET_CODE (x) == LABEL_REF)
10690 return false;
10691
10692 /* TLS_MODEL_GLOBAL_DYNAMIC and TLS_MODEL_LOCAL_DYNAMIC are not
10693 legitimate constants. The other variants can't be handled by
10694 the move patterns after reload starts. */
10695 if (tls_referenced_p (x))
10696 return false;
10697
10698 if (TARGET_64BIT && GET_CODE (x) == CONST_DOUBLE)
10699 return false;
10700
10701 if (TARGET_64BIT
10702 && HOST_BITS_PER_WIDE_INT > 32
10703 && GET_CODE (x) == CONST_INT
10704 && !reload_in_progress
10705 && !reload_completed
10706 && !LEGITIMATE_64BIT_CONST_INT_P (INTVAL (x))
10707 && !pa_cint_ok_for_move (UINTVAL (x)))
10708 return false;
10709
10710 if (function_label_operand (x, mode))
10711 return false;
10712
10713 return true;
10714 }
10715
10716 /* Implement TARGET_SECTION_TYPE_FLAGS. */
10717
10718 static unsigned int
pa_section_type_flags(tree decl,const char * name,int reloc)10719 pa_section_type_flags (tree decl, const char *name, int reloc)
10720 {
10721 unsigned int flags;
10722
10723 flags = default_section_type_flags (decl, name, reloc);
10724
10725 /* Function labels are placed in the constant pool. This can
10726 cause a section conflict if decls are put in ".data.rel.ro"
10727 or ".data.rel.ro.local" using the __attribute__ construct. */
10728 if (strcmp (name, ".data.rel.ro") == 0
10729 || strcmp (name, ".data.rel.ro.local") == 0)
10730 flags |= SECTION_WRITE | SECTION_RELRO;
10731
10732 return flags;
10733 }
10734
10735 /* pa_legitimate_address_p recognizes an RTL expression that is a
10736 valid memory address for an instruction. The MODE argument is the
10737 machine mode for the MEM expression that wants to use this address.
10738
10739 On HP PA-RISC, the legitimate address forms are REG+SMALLINT,
10740 REG+REG, and REG+(REG*SCALE). The indexed address forms are only
10741 available with floating point loads and stores, and integer loads.
10742 We get better code by allowing indexed addresses in the initial
10743 RTL generation.
10744
10745 The acceptance of indexed addresses as legitimate implies that we
10746 must provide patterns for doing indexed integer stores, or the move
10747 expanders must force the address of an indexed store to a register.
10748 We have adopted the latter approach.
10749
10750 Another function of pa_legitimate_address_p is to ensure that
10751 the base register is a valid pointer for indexed instructions.
10752 On targets that have non-equivalent space registers, we have to
10753 know at the time of assembler output which register in a REG+REG
10754 pair is the base register. The REG_POINTER flag is sometimes lost
10755 in reload and the following passes, so it can't be relied on during
10756 code generation. Thus, we either have to canonicalize the order
10757 of the registers in REG+REG indexed addresses, or treat REG+REG
10758 addresses separately and provide patterns for both permutations.
10759
10760 The latter approach requires several hundred additional lines of
10761 code in pa.md. The downside to canonicalizing is that a PLUS
10762 in the wrong order can't combine to form to make a scaled indexed
10763 memory operand. As we won't need to canonicalize the operands if
10764 the REG_POINTER lossage can be fixed, it seems better canonicalize.
10765
10766 We initially break out scaled indexed addresses in canonical order
10767 in pa_emit_move_sequence. LEGITIMIZE_ADDRESS also canonicalizes
10768 scaled indexed addresses during RTL generation. However, fold_rtx
10769 has its own opinion on how the operands of a PLUS should be ordered.
10770 If one of the operands is equivalent to a constant, it will make
10771 that operand the second operand. As the base register is likely to
10772 be equivalent to a SYMBOL_REF, we have made it the second operand.
10773
10774 pa_legitimate_address_p accepts REG+REG as legitimate when the
10775 operands are in the order INDEX+BASE on targets with non-equivalent
10776 space registers, and in any order on targets with equivalent space
10777 registers. It accepts both MULT+BASE and BASE+MULT for scaled indexing.
10778
10779 We treat a SYMBOL_REF as legitimate if it is part of the current
10780 function's constant-pool, because such addresses can actually be
10781 output as REG+SMALLINT. */
10782
10783 static bool
pa_legitimate_address_p(machine_mode mode,rtx x,bool strict)10784 pa_legitimate_address_p (machine_mode mode, rtx x, bool strict)
10785 {
10786 if ((REG_P (x)
10787 && (strict ? STRICT_REG_OK_FOR_BASE_P (x)
10788 : REG_OK_FOR_BASE_P (x)))
10789 || ((GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC
10790 || GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC)
10791 && REG_P (XEXP (x, 0))
10792 && (strict ? STRICT_REG_OK_FOR_BASE_P (XEXP (x, 0))
10793 : REG_OK_FOR_BASE_P (XEXP (x, 0)))))
10794 return true;
10795
10796 if (GET_CODE (x) == PLUS)
10797 {
10798 rtx base, index;
10799
10800 /* For REG+REG, the base register should be in XEXP (x, 1),
10801 so check it first. */
10802 if (REG_P (XEXP (x, 1))
10803 && (strict ? STRICT_REG_OK_FOR_BASE_P (XEXP (x, 1))
10804 : REG_OK_FOR_BASE_P (XEXP (x, 1))))
10805 base = XEXP (x, 1), index = XEXP (x, 0);
10806 else if (REG_P (XEXP (x, 0))
10807 && (strict ? STRICT_REG_OK_FOR_BASE_P (XEXP (x, 0))
10808 : REG_OK_FOR_BASE_P (XEXP (x, 0))))
10809 base = XEXP (x, 0), index = XEXP (x, 1);
10810 else
10811 return false;
10812
10813 if (GET_CODE (index) == CONST_INT)
10814 {
10815 if (INT_5_BITS (index))
10816 return true;
10817
10818 /* When INT14_OK_STRICT is false, a secondary reload is needed
10819 to adjust the displacement of SImode and DImode floating point
10820 instructions but this may fail when the register also needs
10821 reloading. So, we return false when STRICT is true. We
10822 also reject long displacements for float mode addresses since
10823 the majority of accesses will use floating point instructions
10824 that don't support 14-bit offsets. */
10825 if (!INT14_OK_STRICT
10826 && (strict || !(reload_in_progress || reload_completed))
10827 && mode != QImode
10828 && mode != HImode)
10829 return false;
10830
10831 return base14_operand (index, mode);
10832 }
10833
10834 if (!TARGET_DISABLE_INDEXING
10835 /* Only accept the "canonical" INDEX+BASE operand order
10836 on targets with non-equivalent space registers. */
10837 && (TARGET_NO_SPACE_REGS
10838 ? REG_P (index)
10839 : (base == XEXP (x, 1) && REG_P (index)
10840 && (reload_completed
10841 || (reload_in_progress && HARD_REGISTER_P (base))
10842 || REG_POINTER (base))
10843 && (reload_completed
10844 || (reload_in_progress && HARD_REGISTER_P (index))
10845 || !REG_POINTER (index))))
10846 && MODE_OK_FOR_UNSCALED_INDEXING_P (mode)
10847 && (strict ? STRICT_REG_OK_FOR_INDEX_P (index)
10848 : REG_OK_FOR_INDEX_P (index))
10849 && borx_reg_operand (base, Pmode)
10850 && borx_reg_operand (index, Pmode))
10851 return true;
10852
10853 if (!TARGET_DISABLE_INDEXING
10854 && GET_CODE (index) == MULT
10855 /* Only accept base operands with the REG_POINTER flag prior to
10856 reload on targets with non-equivalent space registers. */
10857 && (TARGET_NO_SPACE_REGS
10858 || (base == XEXP (x, 1)
10859 && (reload_completed
10860 || (reload_in_progress && HARD_REGISTER_P (base))
10861 || REG_POINTER (base))))
10862 && REG_P (XEXP (index, 0))
10863 && GET_MODE (XEXP (index, 0)) == Pmode
10864 && MODE_OK_FOR_SCALED_INDEXING_P (mode)
10865 && (strict ? STRICT_REG_OK_FOR_INDEX_P (XEXP (index, 0))
10866 : REG_OK_FOR_INDEX_P (XEXP (index, 0)))
10867 && GET_CODE (XEXP (index, 1)) == CONST_INT
10868 && INTVAL (XEXP (index, 1))
10869 == (HOST_WIDE_INT) GET_MODE_SIZE (mode)
10870 && borx_reg_operand (base, Pmode))
10871 return true;
10872
10873 return false;
10874 }
10875
10876 if (GET_CODE (x) == LO_SUM)
10877 {
10878 rtx y = XEXP (x, 0);
10879
10880 if (GET_CODE (y) == SUBREG)
10881 y = SUBREG_REG (y);
10882
10883 if (REG_P (y)
10884 && (strict ? STRICT_REG_OK_FOR_BASE_P (y)
10885 : REG_OK_FOR_BASE_P (y)))
10886 {
10887 /* Needed for -fPIC */
10888 if (mode == Pmode
10889 && GET_CODE (XEXP (x, 1)) == UNSPEC)
10890 return true;
10891
10892 if (!INT14_OK_STRICT
10893 && (strict || !(reload_in_progress || reload_completed))
10894 && mode != QImode
10895 && mode != HImode)
10896 return false;
10897
10898 if (CONSTANT_P (XEXP (x, 1)))
10899 return true;
10900 }
10901 return false;
10902 }
10903
10904 if (GET_CODE (x) == CONST_INT && INT_5_BITS (x))
10905 return true;
10906
10907 return false;
10908 }
10909
10910 /* Look for machine dependent ways to make the invalid address AD a
10911 valid address.
10912
10913 For the PA, transform:
10914
10915 memory(X + <large int>)
10916
10917 into:
10918
10919 if (<large int> & mask) >= 16
10920 Y = (<large int> & ~mask) + mask + 1 Round up.
10921 else
10922 Y = (<large int> & ~mask) Round down.
10923 Z = X + Y
10924 memory (Z + (<large int> - Y));
10925
10926 This makes reload inheritance and reload_cse work better since Z
10927 can be reused.
10928
10929 There may be more opportunities to improve code with this hook. */
10930
10931 rtx
pa_legitimize_reload_address(rtx ad,machine_mode mode,int opnum,int type,int ind_levels ATTRIBUTE_UNUSED)10932 pa_legitimize_reload_address (rtx ad, machine_mode mode,
10933 int opnum, int type,
10934 int ind_levels ATTRIBUTE_UNUSED)
10935 {
10936 long offset, newoffset, mask;
10937 rtx new_rtx, temp = NULL_RTX;
10938
10939 mask = (GET_MODE_CLASS (mode) == MODE_FLOAT
10940 && !INT14_OK_STRICT ? 0x1f : 0x3fff);
10941
10942 if (optimize && GET_CODE (ad) == PLUS)
10943 temp = simplify_binary_operation (PLUS, Pmode,
10944 XEXP (ad, 0), XEXP (ad, 1));
10945
10946 new_rtx = temp ? temp : ad;
10947
10948 if (optimize
10949 && GET_CODE (new_rtx) == PLUS
10950 && GET_CODE (XEXP (new_rtx, 0)) == REG
10951 && GET_CODE (XEXP (new_rtx, 1)) == CONST_INT)
10952 {
10953 offset = INTVAL (XEXP ((new_rtx), 1));
10954
10955 /* Choose rounding direction. Round up if we are >= halfway. */
10956 if ((offset & mask) >= ((mask + 1) / 2))
10957 newoffset = (offset & ~mask) + mask + 1;
10958 else
10959 newoffset = offset & ~mask;
10960
10961 /* Ensure that long displacements are aligned. */
10962 if (mask == 0x3fff
10963 && (GET_MODE_CLASS (mode) == MODE_FLOAT
10964 || (TARGET_64BIT && (mode) == DImode)))
10965 newoffset &= ~(GET_MODE_SIZE (mode) - 1);
10966
10967 if (newoffset != 0 && VAL_14_BITS_P (newoffset))
10968 {
10969 temp = gen_rtx_PLUS (Pmode, XEXP (new_rtx, 0),
10970 GEN_INT (newoffset));
10971 ad = gen_rtx_PLUS (Pmode, temp, GEN_INT (offset - newoffset));
10972 push_reload (XEXP (ad, 0), 0, &XEXP (ad, 0), 0,
10973 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
10974 opnum, (enum reload_type) type);
10975 return ad;
10976 }
10977 }
10978
10979 return NULL_RTX;
10980 }
10981
10982 /* Output address vector. */
10983
10984 void
pa_output_addr_vec(rtx lab,rtx body)10985 pa_output_addr_vec (rtx lab, rtx body)
10986 {
10987 int idx, vlen = XVECLEN (body, 0);
10988
10989 if (!TARGET_SOM)
10990 fputs ("\t.align 4\n", asm_out_file);
10991 targetm.asm_out.internal_label (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
10992 if (TARGET_GAS)
10993 fputs ("\t.begin_brtab\n", asm_out_file);
10994 for (idx = 0; idx < vlen; idx++)
10995 {
10996 ASM_OUTPUT_ADDR_VEC_ELT
10997 (asm_out_file, CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 0, idx), 0)));
10998 }
10999 if (TARGET_GAS)
11000 fputs ("\t.end_brtab\n", asm_out_file);
11001 }
11002
11003 /* Output address difference vector. */
11004
11005 void
pa_output_addr_diff_vec(rtx lab,rtx body)11006 pa_output_addr_diff_vec (rtx lab, rtx body)
11007 {
11008 rtx base = XEXP (XEXP (body, 0), 0);
11009 int idx, vlen = XVECLEN (body, 1);
11010
11011 targetm.asm_out.internal_label (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
11012 if (TARGET_GAS)
11013 fputs ("\t.begin_brtab\n", asm_out_file);
11014 for (idx = 0; idx < vlen; idx++)
11015 {
11016 ASM_OUTPUT_ADDR_DIFF_ELT
11017 (asm_out_file,
11018 body,
11019 CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 1, idx), 0)),
11020 CODE_LABEL_NUMBER (base));
11021 }
11022 if (TARGET_GAS)
11023 fputs ("\t.end_brtab\n", asm_out_file);
11024 }
11025
11026 /* This is a helper function for the other atomic operations. This function
11027 emits a loop that contains SEQ that iterates until a compare-and-swap
11028 operation at the end succeeds. MEM is the memory to be modified. SEQ is
11029 a set of instructions that takes a value from OLD_REG as an input and
11030 produces a value in NEW_REG as an output. Before SEQ, OLD_REG will be
11031 set to the current contents of MEM. After SEQ, a compare-and-swap will
11032 attempt to update MEM with NEW_REG. The function returns true when the
11033 loop was generated successfully. */
11034
11035 static bool
pa_expand_compare_and_swap_loop(rtx mem,rtx old_reg,rtx new_reg,rtx seq)11036 pa_expand_compare_and_swap_loop (rtx mem, rtx old_reg, rtx new_reg, rtx seq)
11037 {
11038 machine_mode mode = GET_MODE (mem);
11039 rtx_code_label *label;
11040 rtx cmp_reg, success, oldval;
11041
11042 /* The loop we want to generate looks like
11043
11044 cmp_reg = mem;
11045 label:
11046 old_reg = cmp_reg;
11047 seq;
11048 (success, cmp_reg) = compare-and-swap(mem, old_reg, new_reg)
11049 if (success)
11050 goto label;
11051
11052 Note that we only do the plain load from memory once. Subsequent
11053 iterations use the value loaded by the compare-and-swap pattern. */
11054
11055 label = gen_label_rtx ();
11056 cmp_reg = gen_reg_rtx (mode);
11057
11058 emit_move_insn (cmp_reg, mem);
11059 emit_label (label);
11060 emit_move_insn (old_reg, cmp_reg);
11061 if (seq)
11062 emit_insn (seq);
11063
11064 success = NULL_RTX;
11065 oldval = cmp_reg;
11066 if (!expand_atomic_compare_and_swap (&success, &oldval, mem, old_reg,
11067 new_reg, false, MEMMODEL_SYNC_SEQ_CST,
11068 MEMMODEL_RELAXED))
11069 return false;
11070
11071 if (oldval != cmp_reg)
11072 emit_move_insn (cmp_reg, oldval);
11073
11074 /* Mark this jump predicted not taken. */
11075 emit_cmp_and_jump_insns (success, const0_rtx, EQ, const0_rtx,
11076 GET_MODE (success), 1, label,
11077 profile_probability::guessed_never ());
11078 return true;
11079 }
11080
11081 /* This function tries to implement an atomic exchange operation using a
11082 compare_and_swap loop. VAL is written to *MEM. The previous contents of
11083 *MEM are returned, using TARGET if possible. No memory model is required
11084 since a compare_and_swap loop is seq-cst. */
11085
11086 rtx
pa_maybe_emit_compare_and_swap_exchange_loop(rtx target,rtx mem,rtx val)11087 pa_maybe_emit_compare_and_swap_exchange_loop (rtx target, rtx mem, rtx val)
11088 {
11089 machine_mode mode = GET_MODE (mem);
11090
11091 if (can_compare_and_swap_p (mode, true))
11092 {
11093 if (!target || !register_operand (target, mode))
11094 target = gen_reg_rtx (mode);
11095 if (pa_expand_compare_and_swap_loop (mem, target, val, NULL_RTX))
11096 return target;
11097 }
11098
11099 return NULL_RTX;
11100 }
11101
11102 /* Implement TARGET_CALLEE_COPIES. The callee is responsible for copying
11103 arguments passed by hidden reference in the 32-bit HP runtime. Users
11104 can override this behavior for better compatibility with openmp at the
11105 risk of library incompatibilities. Arguments are always passed by value
11106 in the 64-bit HP runtime. */
11107
11108 static bool
pa_callee_copies(cumulative_args_t,const function_arg_info &)11109 pa_callee_copies (cumulative_args_t, const function_arg_info &)
11110 {
11111 return !TARGET_CALLER_COPIES;
11112 }
11113
11114 /* Implement TARGET_HARD_REGNO_NREGS. */
11115
11116 static unsigned int
pa_hard_regno_nregs(unsigned int regno ATTRIBUTE_UNUSED,machine_mode mode)11117 pa_hard_regno_nregs (unsigned int regno ATTRIBUTE_UNUSED, machine_mode mode)
11118 {
11119 return PA_HARD_REGNO_NREGS (regno, mode);
11120 }
11121
11122 /* Implement TARGET_HARD_REGNO_MODE_OK. */
11123
11124 static bool
pa_hard_regno_mode_ok(unsigned int regno,machine_mode mode)11125 pa_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
11126 {
11127 return PA_HARD_REGNO_MODE_OK (regno, mode);
11128 }
11129
11130 /* Implement TARGET_STARTING_FRAME_OFFSET.
11131
11132 On the 32-bit ports, we reserve one slot for the previous frame
11133 pointer and one fill slot. The fill slot is for compatibility
11134 with HP compiled programs. On the 64-bit ports, we reserve one
11135 slot for the previous frame pointer. */
11136
11137 static HOST_WIDE_INT
pa_starting_frame_offset(void)11138 pa_starting_frame_offset (void)
11139 {
11140 return 8;
11141 }
11142
11143 /* Figure out the size in words of the function argument. The size
11144 returned by this function should always be greater than zero because
11145 we pass variable and zero sized objects by reference. */
11146
11147 HOST_WIDE_INT
pa_function_arg_size(machine_mode mode,const_tree type)11148 pa_function_arg_size (machine_mode mode, const_tree type)
11149 {
11150 HOST_WIDE_INT size;
11151
11152 size = mode != BLKmode ? GET_MODE_SIZE (mode) : int_size_in_bytes (type);
11153 return CEIL (size, UNITS_PER_WORD);
11154 }
11155
11156 #include "gt-pa.h"
11157