1 /* Subroutines used for code generation on the DEC Alpha.
2 Copyright (C) 1992-2013 Free Software Foundation, Inc.
3 Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "regs.h"
29 #include "hard-reg-set.h"
30 #include "insn-config.h"
31 #include "conditions.h"
32 #include "output.h"
33 #include "insn-attr.h"
34 #include "flags.h"
35 #include "recog.h"
36 #include "expr.h"
37 #include "optabs.h"
38 #include "reload.h"
39 #include "obstack.h"
40 #include "except.h"
41 #include "function.h"
42 #include "diagnostic-core.h"
43 #include "ggc.h"
44 #include "tm_p.h"
45 #include "target.h"
46 #include "target-def.h"
47 #include "common/common-target.h"
48 #include "debug.h"
49 #include "langhooks.h"
50 #include "splay-tree.h"
51 #include "gimple.h"
52 #include "tree-flow.h"
53 #include "tree-stdarg.h"
54 #include "tm-constrs.h"
55 #include "df.h"
56 #include "libfuncs.h"
57 #include "opts.h"
58 #include "params.h"
59
60 /* Specify which cpu to schedule for. */
61 enum processor_type alpha_tune;
62
63 /* Which cpu we're generating code for. */
64 enum processor_type alpha_cpu;
65
66 static const char * const alpha_cpu_name[] =
67 {
68 "ev4", "ev5", "ev6"
69 };
70
71 /* Specify how accurate floating-point traps need to be. */
72
73 enum alpha_trap_precision alpha_tp;
74
75 /* Specify the floating-point rounding mode. */
76
77 enum alpha_fp_rounding_mode alpha_fprm;
78
79 /* Specify which things cause traps. */
80
81 enum alpha_fp_trap_mode alpha_fptm;
82
83 /* Nonzero if inside of a function, because the Alpha asm can't
84 handle .files inside of functions. */
85
86 static int inside_function = FALSE;
87
88 /* The number of cycles of latency we should assume on memory reads. */
89
90 int alpha_memory_latency = 3;
91
92 /* Whether the function needs the GP. */
93
94 static int alpha_function_needs_gp;
95
96 /* The assembler name of the current function. */
97
98 static const char *alpha_fnname;
99
100 /* The next explicit relocation sequence number. */
101 extern GTY(()) int alpha_next_sequence_number;
102 int alpha_next_sequence_number = 1;
103
104 /* The literal and gpdisp sequence numbers for this insn, as printed
105 by %# and %* respectively. */
106 extern GTY(()) int alpha_this_literal_sequence_number;
107 extern GTY(()) int alpha_this_gpdisp_sequence_number;
108 int alpha_this_literal_sequence_number;
109 int alpha_this_gpdisp_sequence_number;
110
111 /* Costs of various operations on the different architectures. */
112
113 struct alpha_rtx_cost_data
114 {
115 unsigned char fp_add;
116 unsigned char fp_mult;
117 unsigned char fp_div_sf;
118 unsigned char fp_div_df;
119 unsigned char int_mult_si;
120 unsigned char int_mult_di;
121 unsigned char int_shift;
122 unsigned char int_cmov;
123 unsigned short int_div;
124 };
125
126 static struct alpha_rtx_cost_data const alpha_rtx_cost_data[PROCESSOR_MAX] =
127 {
128 { /* EV4 */
129 COSTS_N_INSNS (6), /* fp_add */
130 COSTS_N_INSNS (6), /* fp_mult */
131 COSTS_N_INSNS (34), /* fp_div_sf */
132 COSTS_N_INSNS (63), /* fp_div_df */
133 COSTS_N_INSNS (23), /* int_mult_si */
134 COSTS_N_INSNS (23), /* int_mult_di */
135 COSTS_N_INSNS (2), /* int_shift */
136 COSTS_N_INSNS (2), /* int_cmov */
137 COSTS_N_INSNS (97), /* int_div */
138 },
139 { /* EV5 */
140 COSTS_N_INSNS (4), /* fp_add */
141 COSTS_N_INSNS (4), /* fp_mult */
142 COSTS_N_INSNS (15), /* fp_div_sf */
143 COSTS_N_INSNS (22), /* fp_div_df */
144 COSTS_N_INSNS (8), /* int_mult_si */
145 COSTS_N_INSNS (12), /* int_mult_di */
146 COSTS_N_INSNS (1) + 1, /* int_shift */
147 COSTS_N_INSNS (1), /* int_cmov */
148 COSTS_N_INSNS (83), /* int_div */
149 },
150 { /* EV6 */
151 COSTS_N_INSNS (4), /* fp_add */
152 COSTS_N_INSNS (4), /* fp_mult */
153 COSTS_N_INSNS (12), /* fp_div_sf */
154 COSTS_N_INSNS (15), /* fp_div_df */
155 COSTS_N_INSNS (7), /* int_mult_si */
156 COSTS_N_INSNS (7), /* int_mult_di */
157 COSTS_N_INSNS (1), /* int_shift */
158 COSTS_N_INSNS (2), /* int_cmov */
159 COSTS_N_INSNS (86), /* int_div */
160 },
161 };
162
163 /* Similar but tuned for code size instead of execution latency. The
164 extra +N is fractional cost tuning based on latency. It's used to
165 encourage use of cheaper insns like shift, but only if there's just
166 one of them. */
167
168 static struct alpha_rtx_cost_data const alpha_rtx_cost_size =
169 {
170 COSTS_N_INSNS (1), /* fp_add */
171 COSTS_N_INSNS (1), /* fp_mult */
172 COSTS_N_INSNS (1), /* fp_div_sf */
173 COSTS_N_INSNS (1) + 1, /* fp_div_df */
174 COSTS_N_INSNS (1) + 1, /* int_mult_si */
175 COSTS_N_INSNS (1) + 2, /* int_mult_di */
176 COSTS_N_INSNS (1), /* int_shift */
177 COSTS_N_INSNS (1), /* int_cmov */
178 COSTS_N_INSNS (6), /* int_div */
179 };
180
181 /* Get the number of args of a function in one of two ways. */
182 #if TARGET_ABI_OPEN_VMS
183 #define NUM_ARGS crtl->args.info.num_args
184 #else
185 #define NUM_ARGS crtl->args.info
186 #endif
187
188 #define REG_PV 27
189 #define REG_RA 26
190
191 /* Declarations of static functions. */
192 static struct machine_function *alpha_init_machine_status (void);
193 static rtx alpha_emit_xfloating_compare (enum rtx_code *, rtx, rtx);
194
195 #if TARGET_ABI_OPEN_VMS
196 static void alpha_write_linkage (FILE *, const char *);
197 static bool vms_valid_pointer_mode (enum machine_mode);
198 #else
199 #define vms_patch_builtins() gcc_unreachable()
200 #endif
201
202 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
203 /* Implement TARGET_MANGLE_TYPE. */
204
205 static const char *
alpha_mangle_type(const_tree type)206 alpha_mangle_type (const_tree type)
207 {
208 if (TYPE_MAIN_VARIANT (type) == long_double_type_node
209 && TARGET_LONG_DOUBLE_128)
210 return "g";
211
212 /* For all other types, use normal C++ mangling. */
213 return NULL;
214 }
215 #endif
216
217 /* Parse target option strings. */
218
219 static void
alpha_option_override(void)220 alpha_option_override (void)
221 {
222 static const struct cpu_table {
223 const char *const name;
224 const enum processor_type processor;
225 const int flags;
226 const unsigned short line_size; /* in bytes */
227 const unsigned short l1_size; /* in kb. */
228 const unsigned short l2_size; /* in kb. */
229 } cpu_table[] = {
230 /* EV4/LCA45 had 8k L1 caches; EV45 had 16k L1 caches.
231 EV4/EV45 had 128k to 16M 32-byte direct Bcache. LCA45
232 had 64k to 8M 8-byte direct Bcache. */
233 { "ev4", PROCESSOR_EV4, 0, 32, 8, 8*1024 },
234 { "21064", PROCESSOR_EV4, 0, 32, 8, 8*1024 },
235 { "ev45", PROCESSOR_EV4, 0, 32, 16, 16*1024 },
236
237 /* EV5 or EV56 had 8k 32 byte L1, 96k 32 or 64 byte L2,
238 and 1M to 16M 64 byte L3 (not modeled).
239 PCA56 had 16k 64-byte cache; PCA57 had 32k Icache.
240 PCA56 had 8k 64-byte cache; PCA57 had 16k Dcache. */
241 { "ev5", PROCESSOR_EV5, 0, 32, 8, 96 },
242 { "21164", PROCESSOR_EV5, 0, 32, 8, 96 },
243 { "ev56", PROCESSOR_EV5, MASK_BWX, 32, 8, 96 },
244 { "21164a", PROCESSOR_EV5, MASK_BWX, 32, 8, 96 },
245 { "pca56", PROCESSOR_EV5, MASK_BWX|MASK_MAX, 64, 16, 4*1024 },
246 { "21164PC",PROCESSOR_EV5, MASK_BWX|MASK_MAX, 64, 16, 4*1024 },
247 { "21164pc",PROCESSOR_EV5, MASK_BWX|MASK_MAX, 64, 16, 4*1024 },
248
249 /* EV6 had 64k 64 byte L1, 1M to 16M Bcache. */
250 { "ev6", PROCESSOR_EV6, MASK_BWX|MASK_MAX|MASK_FIX, 64, 64, 16*1024 },
251 { "21264", PROCESSOR_EV6, MASK_BWX|MASK_MAX|MASK_FIX, 64, 64, 16*1024 },
252 { "ev67", PROCESSOR_EV6, MASK_BWX|MASK_MAX|MASK_FIX|MASK_CIX,
253 64, 64, 16*1024 },
254 { "21264a", PROCESSOR_EV6, MASK_BWX|MASK_MAX|MASK_FIX|MASK_CIX,
255 64, 64, 16*1024 }
256 };
257
258 int const ct_size = ARRAY_SIZE (cpu_table);
259 int line_size = 0, l1_size = 0, l2_size = 0;
260 int i;
261
262 #ifdef SUBTARGET_OVERRIDE_OPTIONS
263 SUBTARGET_OVERRIDE_OPTIONS;
264 #endif
265
266 /* Default to full IEEE compliance mode for Go language. */
267 if (strcmp (lang_hooks.name, "GNU Go") == 0
268 && !(target_flags_explicit & MASK_IEEE))
269 target_flags |= MASK_IEEE;
270
271 alpha_fprm = ALPHA_FPRM_NORM;
272 alpha_tp = ALPHA_TP_PROG;
273 alpha_fptm = ALPHA_FPTM_N;
274
275 if (TARGET_IEEE)
276 {
277 alpha_tp = ALPHA_TP_INSN;
278 alpha_fptm = ALPHA_FPTM_SU;
279 }
280 if (TARGET_IEEE_WITH_INEXACT)
281 {
282 alpha_tp = ALPHA_TP_INSN;
283 alpha_fptm = ALPHA_FPTM_SUI;
284 }
285
286 if (alpha_tp_string)
287 {
288 if (! strcmp (alpha_tp_string, "p"))
289 alpha_tp = ALPHA_TP_PROG;
290 else if (! strcmp (alpha_tp_string, "f"))
291 alpha_tp = ALPHA_TP_FUNC;
292 else if (! strcmp (alpha_tp_string, "i"))
293 alpha_tp = ALPHA_TP_INSN;
294 else
295 error ("bad value %qs for -mtrap-precision switch", alpha_tp_string);
296 }
297
298 if (alpha_fprm_string)
299 {
300 if (! strcmp (alpha_fprm_string, "n"))
301 alpha_fprm = ALPHA_FPRM_NORM;
302 else if (! strcmp (alpha_fprm_string, "m"))
303 alpha_fprm = ALPHA_FPRM_MINF;
304 else if (! strcmp (alpha_fprm_string, "c"))
305 alpha_fprm = ALPHA_FPRM_CHOP;
306 else if (! strcmp (alpha_fprm_string,"d"))
307 alpha_fprm = ALPHA_FPRM_DYN;
308 else
309 error ("bad value %qs for -mfp-rounding-mode switch",
310 alpha_fprm_string);
311 }
312
313 if (alpha_fptm_string)
314 {
315 if (strcmp (alpha_fptm_string, "n") == 0)
316 alpha_fptm = ALPHA_FPTM_N;
317 else if (strcmp (alpha_fptm_string, "u") == 0)
318 alpha_fptm = ALPHA_FPTM_U;
319 else if (strcmp (alpha_fptm_string, "su") == 0)
320 alpha_fptm = ALPHA_FPTM_SU;
321 else if (strcmp (alpha_fptm_string, "sui") == 0)
322 alpha_fptm = ALPHA_FPTM_SUI;
323 else
324 error ("bad value %qs for -mfp-trap-mode switch", alpha_fptm_string);
325 }
326
327 if (alpha_cpu_string)
328 {
329 for (i = 0; i < ct_size; i++)
330 if (! strcmp (alpha_cpu_string, cpu_table [i].name))
331 {
332 alpha_tune = alpha_cpu = cpu_table[i].processor;
333 line_size = cpu_table[i].line_size;
334 l1_size = cpu_table[i].l1_size;
335 l2_size = cpu_table[i].l2_size;
336 target_flags &= ~ (MASK_BWX | MASK_MAX | MASK_FIX | MASK_CIX);
337 target_flags |= cpu_table[i].flags;
338 break;
339 }
340 if (i == ct_size)
341 error ("bad value %qs for -mcpu switch", alpha_cpu_string);
342 }
343
344 if (alpha_tune_string)
345 {
346 for (i = 0; i < ct_size; i++)
347 if (! strcmp (alpha_tune_string, cpu_table [i].name))
348 {
349 alpha_tune = cpu_table[i].processor;
350 line_size = cpu_table[i].line_size;
351 l1_size = cpu_table[i].l1_size;
352 l2_size = cpu_table[i].l2_size;
353 break;
354 }
355 if (i == ct_size)
356 error ("bad value %qs for -mtune switch", alpha_tune_string);
357 }
358
359 if (line_size)
360 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE, line_size,
361 global_options.x_param_values,
362 global_options_set.x_param_values);
363 if (l1_size)
364 maybe_set_param_value (PARAM_L1_CACHE_SIZE, l1_size,
365 global_options.x_param_values,
366 global_options_set.x_param_values);
367 if (l2_size)
368 maybe_set_param_value (PARAM_L2_CACHE_SIZE, l2_size,
369 global_options.x_param_values,
370 global_options_set.x_param_values);
371
372 /* Do some sanity checks on the above options. */
373
374 if ((alpha_fptm == ALPHA_FPTM_SU || alpha_fptm == ALPHA_FPTM_SUI)
375 && alpha_tp != ALPHA_TP_INSN && alpha_cpu != PROCESSOR_EV6)
376 {
377 warning (0, "fp software completion requires -mtrap-precision=i");
378 alpha_tp = ALPHA_TP_INSN;
379 }
380
381 if (alpha_cpu == PROCESSOR_EV6)
382 {
383 /* Except for EV6 pass 1 (not released), we always have precise
384 arithmetic traps. Which means we can do software completion
385 without minding trap shadows. */
386 alpha_tp = ALPHA_TP_PROG;
387 }
388
389 if (TARGET_FLOAT_VAX)
390 {
391 if (alpha_fprm == ALPHA_FPRM_MINF || alpha_fprm == ALPHA_FPRM_DYN)
392 {
393 warning (0, "rounding mode not supported for VAX floats");
394 alpha_fprm = ALPHA_FPRM_NORM;
395 }
396 if (alpha_fptm == ALPHA_FPTM_SUI)
397 {
398 warning (0, "trap mode not supported for VAX floats");
399 alpha_fptm = ALPHA_FPTM_SU;
400 }
401 if (target_flags_explicit & MASK_LONG_DOUBLE_128)
402 warning (0, "128-bit long double not supported for VAX floats");
403 target_flags &= ~MASK_LONG_DOUBLE_128;
404 }
405
406 {
407 char *end;
408 int lat;
409
410 if (!alpha_mlat_string)
411 alpha_mlat_string = "L1";
412
413 if (ISDIGIT ((unsigned char)alpha_mlat_string[0])
414 && (lat = strtol (alpha_mlat_string, &end, 10), *end == '\0'))
415 ;
416 else if ((alpha_mlat_string[0] == 'L' || alpha_mlat_string[0] == 'l')
417 && ISDIGIT ((unsigned char)alpha_mlat_string[1])
418 && alpha_mlat_string[2] == '\0')
419 {
420 static int const cache_latency[][4] =
421 {
422 { 3, 30, -1 }, /* ev4 -- Bcache is a guess */
423 { 2, 12, 38 }, /* ev5 -- Bcache from PC164 LMbench numbers */
424 { 3, 12, 30 }, /* ev6 -- Bcache from DS20 LMbench. */
425 };
426
427 lat = alpha_mlat_string[1] - '0';
428 if (lat <= 0 || lat > 3 || cache_latency[alpha_tune][lat-1] == -1)
429 {
430 warning (0, "L%d cache latency unknown for %s",
431 lat, alpha_cpu_name[alpha_tune]);
432 lat = 3;
433 }
434 else
435 lat = cache_latency[alpha_tune][lat-1];
436 }
437 else if (! strcmp (alpha_mlat_string, "main"))
438 {
439 /* Most current memories have about 370ns latency. This is
440 a reasonable guess for a fast cpu. */
441 lat = 150;
442 }
443 else
444 {
445 warning (0, "bad value %qs for -mmemory-latency", alpha_mlat_string);
446 lat = 3;
447 }
448
449 alpha_memory_latency = lat;
450 }
451
452 /* Default the definition of "small data" to 8 bytes. */
453 if (!global_options_set.x_g_switch_value)
454 g_switch_value = 8;
455
456 /* Infer TARGET_SMALL_DATA from -fpic/-fPIC. */
457 if (flag_pic == 1)
458 target_flags |= MASK_SMALL_DATA;
459 else if (flag_pic == 2)
460 target_flags &= ~MASK_SMALL_DATA;
461
462 /* Align labels and loops for optimal branching. */
463 /* ??? Kludge these by not doing anything if we don't optimize. */
464 if (optimize > 0)
465 {
466 if (align_loops <= 0)
467 align_loops = 16;
468 if (align_jumps <= 0)
469 align_jumps = 16;
470 }
471 if (align_functions <= 0)
472 align_functions = 16;
473
474 /* Register variables and functions with the garbage collector. */
475
476 /* Set up function hooks. */
477 init_machine_status = alpha_init_machine_status;
478
479 /* Tell the compiler when we're using VAX floating point. */
480 if (TARGET_FLOAT_VAX)
481 {
482 REAL_MODE_FORMAT (SFmode) = &vax_f_format;
483 REAL_MODE_FORMAT (DFmode) = &vax_g_format;
484 REAL_MODE_FORMAT (TFmode) = NULL;
485 }
486
487 #ifdef TARGET_DEFAULT_LONG_DOUBLE_128
488 if (!(target_flags_explicit & MASK_LONG_DOUBLE_128))
489 target_flags |= MASK_LONG_DOUBLE_128;
490 #endif
491 }
492
493 /* Returns 1 if VALUE is a mask that contains full bytes of zero or ones. */
494
495 int
zap_mask(HOST_WIDE_INT value)496 zap_mask (HOST_WIDE_INT value)
497 {
498 int i;
499
500 for (i = 0; i < HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR;
501 i++, value >>= 8)
502 if ((value & 0xff) != 0 && (value & 0xff) != 0xff)
503 return 0;
504
505 return 1;
506 }
507
508 /* Return true if OP is valid for a particular TLS relocation.
509 We are already guaranteed that OP is a CONST. */
510
511 int
tls_symbolic_operand_1(rtx op,int size,int unspec)512 tls_symbolic_operand_1 (rtx op, int size, int unspec)
513 {
514 op = XEXP (op, 0);
515
516 if (GET_CODE (op) != UNSPEC || XINT (op, 1) != unspec)
517 return 0;
518 op = XVECEXP (op, 0, 0);
519
520 if (GET_CODE (op) != SYMBOL_REF)
521 return 0;
522
523 switch (SYMBOL_REF_TLS_MODEL (op))
524 {
525 case TLS_MODEL_LOCAL_DYNAMIC:
526 return unspec == UNSPEC_DTPREL && size == alpha_tls_size;
527 case TLS_MODEL_INITIAL_EXEC:
528 return unspec == UNSPEC_TPREL && size == 64;
529 case TLS_MODEL_LOCAL_EXEC:
530 return unspec == UNSPEC_TPREL && size == alpha_tls_size;
531 default:
532 gcc_unreachable ();
533 }
534 }
535
536 /* Used by aligned_memory_operand and unaligned_memory_operand to
537 resolve what reload is going to do with OP if it's a register. */
538
539 rtx
resolve_reload_operand(rtx op)540 resolve_reload_operand (rtx op)
541 {
542 if (reload_in_progress)
543 {
544 rtx tmp = op;
545 if (GET_CODE (tmp) == SUBREG)
546 tmp = SUBREG_REG (tmp);
547 if (REG_P (tmp)
548 && REGNO (tmp) >= FIRST_PSEUDO_REGISTER)
549 {
550 op = reg_equiv_memory_loc (REGNO (tmp));
551 if (op == 0)
552 return 0;
553 }
554 }
555 return op;
556 }
557
558 /* The scalar modes supported differs from the default check-what-c-supports
559 version in that sometimes TFmode is available even when long double
560 indicates only DFmode. */
561
562 static bool
alpha_scalar_mode_supported_p(enum machine_mode mode)563 alpha_scalar_mode_supported_p (enum machine_mode mode)
564 {
565 switch (mode)
566 {
567 case QImode:
568 case HImode:
569 case SImode:
570 case DImode:
571 case TImode: /* via optabs.c */
572 return true;
573
574 case SFmode:
575 case DFmode:
576 return true;
577
578 case TFmode:
579 return TARGET_HAS_XFLOATING_LIBS;
580
581 default:
582 return false;
583 }
584 }
585
586 /* Alpha implements a couple of integer vector mode operations when
587 TARGET_MAX is enabled. We do not check TARGET_MAX here, however,
588 which allows the vectorizer to operate on e.g. move instructions,
589 or when expand_vector_operations can do something useful. */
590
591 static bool
alpha_vector_mode_supported_p(enum machine_mode mode)592 alpha_vector_mode_supported_p (enum machine_mode mode)
593 {
594 return mode == V8QImode || mode == V4HImode || mode == V2SImode;
595 }
596
597 /* Return 1 if this function can directly return via $26. */
598
599 int
direct_return(void)600 direct_return (void)
601 {
602 return (TARGET_ABI_OSF
603 && reload_completed
604 && alpha_sa_size () == 0
605 && get_frame_size () == 0
606 && crtl->outgoing_args_size == 0
607 && crtl->args.pretend_args_size == 0);
608 }
609
610 /* Return the TLS model to use for SYMBOL. */
611
612 static enum tls_model
tls_symbolic_operand_type(rtx symbol)613 tls_symbolic_operand_type (rtx symbol)
614 {
615 enum tls_model model;
616
617 if (GET_CODE (symbol) != SYMBOL_REF)
618 return TLS_MODEL_NONE;
619 model = SYMBOL_REF_TLS_MODEL (symbol);
620
621 /* Local-exec with a 64-bit size is the same code as initial-exec. */
622 if (model == TLS_MODEL_LOCAL_EXEC && alpha_tls_size == 64)
623 model = TLS_MODEL_INITIAL_EXEC;
624
625 return model;
626 }
627
628 /* Return true if the function DECL will share the same GP as any
629 function in the current unit of translation. */
630
631 static bool
decl_has_samegp(const_tree decl)632 decl_has_samegp (const_tree decl)
633 {
634 /* Functions that are not local can be overridden, and thus may
635 not share the same gp. */
636 if (!(*targetm.binds_local_p) (decl))
637 return false;
638
639 /* If -msmall-data is in effect, assume that there is only one GP
640 for the module, and so any local symbol has this property. We
641 need explicit relocations to be able to enforce this for symbols
642 not defined in this unit of translation, however. */
643 if (TARGET_EXPLICIT_RELOCS && TARGET_SMALL_DATA)
644 return true;
645
646 /* Functions that are not external are defined in this UoT. */
647 /* ??? Irritatingly, static functions not yet emitted are still
648 marked "external". Apply this to non-static functions only. */
649 return !TREE_PUBLIC (decl) || !DECL_EXTERNAL (decl);
650 }
651
652 /* Return true if EXP should be placed in the small data section. */
653
654 static bool
alpha_in_small_data_p(const_tree exp)655 alpha_in_small_data_p (const_tree exp)
656 {
657 /* We want to merge strings, so we never consider them small data. */
658 if (TREE_CODE (exp) == STRING_CST)
659 return false;
660
661 /* Functions are never in the small data area. Duh. */
662 if (TREE_CODE (exp) == FUNCTION_DECL)
663 return false;
664
665 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
666 {
667 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
668 if (strcmp (section, ".sdata") == 0
669 || strcmp (section, ".sbss") == 0)
670 return true;
671 }
672 else
673 {
674 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
675
676 /* If this is an incomplete type with size 0, then we can't put it
677 in sdata because it might be too big when completed. */
678 if (size > 0 && size <= g_switch_value)
679 return true;
680 }
681
682 return false;
683 }
684
685 #if TARGET_ABI_OPEN_VMS
686 static bool
vms_valid_pointer_mode(enum machine_mode mode)687 vms_valid_pointer_mode (enum machine_mode mode)
688 {
689 return (mode == SImode || mode == DImode);
690 }
691
692 static bool
alpha_linkage_symbol_p(const char * symname)693 alpha_linkage_symbol_p (const char *symname)
694 {
695 int symlen = strlen (symname);
696
697 if (symlen > 4)
698 return strcmp (&symname [symlen - 4], "..lk") == 0;
699
700 return false;
701 }
702
703 #define LINKAGE_SYMBOL_REF_P(X) \
704 ((GET_CODE (X) == SYMBOL_REF \
705 && alpha_linkage_symbol_p (XSTR (X, 0))) \
706 || (GET_CODE (X) == CONST \
707 && GET_CODE (XEXP (X, 0)) == PLUS \
708 && GET_CODE (XEXP (XEXP (X, 0), 0)) == SYMBOL_REF \
709 && alpha_linkage_symbol_p (XSTR (XEXP (XEXP (X, 0), 0), 0))))
710 #endif
711
712 /* legitimate_address_p recognizes an RTL expression that is a valid
713 memory address for an instruction. The MODE argument is the
714 machine mode for the MEM expression that wants to use this address.
715
716 For Alpha, we have either a constant address or the sum of a
717 register and a constant address, or just a register. For DImode,
718 any of those forms can be surrounded with an AND that clear the
719 low-order three bits; this is an "unaligned" access. */
720
721 static bool
alpha_legitimate_address_p(enum machine_mode mode,rtx x,bool strict)722 alpha_legitimate_address_p (enum machine_mode mode, rtx x, bool strict)
723 {
724 /* If this is an ldq_u type address, discard the outer AND. */
725 if (mode == DImode
726 && GET_CODE (x) == AND
727 && CONST_INT_P (XEXP (x, 1))
728 && INTVAL (XEXP (x, 1)) == -8)
729 x = XEXP (x, 0);
730
731 /* Discard non-paradoxical subregs. */
732 if (GET_CODE (x) == SUBREG
733 && (GET_MODE_SIZE (GET_MODE (x))
734 < GET_MODE_SIZE (GET_MODE (SUBREG_REG (x)))))
735 x = SUBREG_REG (x);
736
737 /* Unadorned general registers are valid. */
738 if (REG_P (x)
739 && (strict
740 ? STRICT_REG_OK_FOR_BASE_P (x)
741 : NONSTRICT_REG_OK_FOR_BASE_P (x)))
742 return true;
743
744 /* Constant addresses (i.e. +/- 32k) are valid. */
745 if (CONSTANT_ADDRESS_P (x))
746 return true;
747
748 #if TARGET_ABI_OPEN_VMS
749 if (LINKAGE_SYMBOL_REF_P (x))
750 return true;
751 #endif
752
753 /* Register plus a small constant offset is valid. */
754 if (GET_CODE (x) == PLUS)
755 {
756 rtx ofs = XEXP (x, 1);
757 x = XEXP (x, 0);
758
759 /* Discard non-paradoxical subregs. */
760 if (GET_CODE (x) == SUBREG
761 && (GET_MODE_SIZE (GET_MODE (x))
762 < GET_MODE_SIZE (GET_MODE (SUBREG_REG (x)))))
763 x = SUBREG_REG (x);
764
765 if (REG_P (x))
766 {
767 if (! strict
768 && NONSTRICT_REG_OK_FP_BASE_P (x)
769 && CONST_INT_P (ofs))
770 return true;
771 if ((strict
772 ? STRICT_REG_OK_FOR_BASE_P (x)
773 : NONSTRICT_REG_OK_FOR_BASE_P (x))
774 && CONSTANT_ADDRESS_P (ofs))
775 return true;
776 }
777 }
778
779 /* If we're managing explicit relocations, LO_SUM is valid, as are small
780 data symbols. Avoid explicit relocations of modes larger than word
781 mode since i.e. $LC0+8($1) can fold around +/- 32k offset. */
782 else if (TARGET_EXPLICIT_RELOCS
783 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD)
784 {
785 if (small_symbolic_operand (x, Pmode))
786 return true;
787
788 if (GET_CODE (x) == LO_SUM)
789 {
790 rtx ofs = XEXP (x, 1);
791 x = XEXP (x, 0);
792
793 /* Discard non-paradoxical subregs. */
794 if (GET_CODE (x) == SUBREG
795 && (GET_MODE_SIZE (GET_MODE (x))
796 < GET_MODE_SIZE (GET_MODE (SUBREG_REG (x)))))
797 x = SUBREG_REG (x);
798
799 /* Must have a valid base register. */
800 if (! (REG_P (x)
801 && (strict
802 ? STRICT_REG_OK_FOR_BASE_P (x)
803 : NONSTRICT_REG_OK_FOR_BASE_P (x))))
804 return false;
805
806 /* The symbol must be local. */
807 if (local_symbolic_operand (ofs, Pmode)
808 || dtp32_symbolic_operand (ofs, Pmode)
809 || tp32_symbolic_operand (ofs, Pmode))
810 return true;
811 }
812 }
813
814 return false;
815 }
816
817 /* Build the SYMBOL_REF for __tls_get_addr. */
818
819 static GTY(()) rtx tls_get_addr_libfunc;
820
821 static rtx
get_tls_get_addr(void)822 get_tls_get_addr (void)
823 {
824 if (!tls_get_addr_libfunc)
825 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
826 return tls_get_addr_libfunc;
827 }
828
829 /* Try machine-dependent ways of modifying an illegitimate address
830 to be legitimate. If we find one, return the new, valid address. */
831
832 static rtx
alpha_legitimize_address_1(rtx x,rtx scratch,enum machine_mode mode)833 alpha_legitimize_address_1 (rtx x, rtx scratch, enum machine_mode mode)
834 {
835 HOST_WIDE_INT addend;
836
837 /* If the address is (plus reg const_int) and the CONST_INT is not a
838 valid offset, compute the high part of the constant and add it to
839 the register. Then our address is (plus temp low-part-const). */
840 if (GET_CODE (x) == PLUS
841 && REG_P (XEXP (x, 0))
842 && CONST_INT_P (XEXP (x, 1))
843 && ! CONSTANT_ADDRESS_P (XEXP (x, 1)))
844 {
845 addend = INTVAL (XEXP (x, 1));
846 x = XEXP (x, 0);
847 goto split_addend;
848 }
849
850 /* If the address is (const (plus FOO const_int)), find the low-order
851 part of the CONST_INT. Then load FOO plus any high-order part of the
852 CONST_INT into a register. Our address is (plus reg low-part-const).
853 This is done to reduce the number of GOT entries. */
854 if (can_create_pseudo_p ()
855 && GET_CODE (x) == CONST
856 && GET_CODE (XEXP (x, 0)) == PLUS
857 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
858 {
859 addend = INTVAL (XEXP (XEXP (x, 0), 1));
860 x = force_reg (Pmode, XEXP (XEXP (x, 0), 0));
861 goto split_addend;
862 }
863
864 /* If we have a (plus reg const), emit the load as in (2), then add
865 the two registers, and finally generate (plus reg low-part-const) as
866 our address. */
867 if (can_create_pseudo_p ()
868 && GET_CODE (x) == PLUS
869 && REG_P (XEXP (x, 0))
870 && GET_CODE (XEXP (x, 1)) == CONST
871 && GET_CODE (XEXP (XEXP (x, 1), 0)) == PLUS
872 && CONST_INT_P (XEXP (XEXP (XEXP (x, 1), 0), 1)))
873 {
874 addend = INTVAL (XEXP (XEXP (XEXP (x, 1), 0), 1));
875 x = expand_simple_binop (Pmode, PLUS, XEXP (x, 0),
876 XEXP (XEXP (XEXP (x, 1), 0), 0),
877 NULL_RTX, 1, OPTAB_LIB_WIDEN);
878 goto split_addend;
879 }
880
881 /* If this is a local symbol, split the address into HIGH/LO_SUM parts.
882 Avoid modes larger than word mode since i.e. $LC0+8($1) can fold
883 around +/- 32k offset. */
884 if (TARGET_EXPLICIT_RELOCS
885 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD
886 && symbolic_operand (x, Pmode))
887 {
888 rtx r0, r16, eqv, tga, tp, insn, dest, seq;
889
890 switch (tls_symbolic_operand_type (x))
891 {
892 case TLS_MODEL_NONE:
893 break;
894
895 case TLS_MODEL_GLOBAL_DYNAMIC:
896 start_sequence ();
897
898 r0 = gen_rtx_REG (Pmode, 0);
899 r16 = gen_rtx_REG (Pmode, 16);
900 tga = get_tls_get_addr ();
901 dest = gen_reg_rtx (Pmode);
902 seq = GEN_INT (alpha_next_sequence_number++);
903
904 emit_insn (gen_movdi_er_tlsgd (r16, pic_offset_table_rtx, x, seq));
905 insn = gen_call_value_osf_tlsgd (r0, tga, seq);
906 insn = emit_call_insn (insn);
907 RTL_CONST_CALL_P (insn) = 1;
908 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), r16);
909
910 insn = get_insns ();
911 end_sequence ();
912
913 emit_libcall_block (insn, dest, r0, x);
914 return dest;
915
916 case TLS_MODEL_LOCAL_DYNAMIC:
917 start_sequence ();
918
919 r0 = gen_rtx_REG (Pmode, 0);
920 r16 = gen_rtx_REG (Pmode, 16);
921 tga = get_tls_get_addr ();
922 scratch = gen_reg_rtx (Pmode);
923 seq = GEN_INT (alpha_next_sequence_number++);
924
925 emit_insn (gen_movdi_er_tlsldm (r16, pic_offset_table_rtx, seq));
926 insn = gen_call_value_osf_tlsldm (r0, tga, seq);
927 insn = emit_call_insn (insn);
928 RTL_CONST_CALL_P (insn) = 1;
929 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), r16);
930
931 insn = get_insns ();
932 end_sequence ();
933
934 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
935 UNSPEC_TLSLDM_CALL);
936 emit_libcall_block (insn, scratch, r0, eqv);
937
938 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPREL);
939 eqv = gen_rtx_CONST (Pmode, eqv);
940
941 if (alpha_tls_size == 64)
942 {
943 dest = gen_reg_rtx (Pmode);
944 emit_insn (gen_rtx_SET (VOIDmode, dest, eqv));
945 emit_insn (gen_adddi3 (dest, dest, scratch));
946 return dest;
947 }
948 if (alpha_tls_size == 32)
949 {
950 insn = gen_rtx_HIGH (Pmode, eqv);
951 insn = gen_rtx_PLUS (Pmode, scratch, insn);
952 scratch = gen_reg_rtx (Pmode);
953 emit_insn (gen_rtx_SET (VOIDmode, scratch, insn));
954 }
955 return gen_rtx_LO_SUM (Pmode, scratch, eqv);
956
957 case TLS_MODEL_INITIAL_EXEC:
958 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_TPREL);
959 eqv = gen_rtx_CONST (Pmode, eqv);
960 tp = gen_reg_rtx (Pmode);
961 scratch = gen_reg_rtx (Pmode);
962 dest = gen_reg_rtx (Pmode);
963
964 emit_insn (gen_get_thread_pointerdi (tp));
965 emit_insn (gen_rtx_SET (VOIDmode, scratch, eqv));
966 emit_insn (gen_adddi3 (dest, tp, scratch));
967 return dest;
968
969 case TLS_MODEL_LOCAL_EXEC:
970 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_TPREL);
971 eqv = gen_rtx_CONST (Pmode, eqv);
972 tp = gen_reg_rtx (Pmode);
973
974 emit_insn (gen_get_thread_pointerdi (tp));
975 if (alpha_tls_size == 32)
976 {
977 insn = gen_rtx_HIGH (Pmode, eqv);
978 insn = gen_rtx_PLUS (Pmode, tp, insn);
979 tp = gen_reg_rtx (Pmode);
980 emit_insn (gen_rtx_SET (VOIDmode, tp, insn));
981 }
982 return gen_rtx_LO_SUM (Pmode, tp, eqv);
983
984 default:
985 gcc_unreachable ();
986 }
987
988 if (local_symbolic_operand (x, Pmode))
989 {
990 if (small_symbolic_operand (x, Pmode))
991 return x;
992 else
993 {
994 if (can_create_pseudo_p ())
995 scratch = gen_reg_rtx (Pmode);
996 emit_insn (gen_rtx_SET (VOIDmode, scratch,
997 gen_rtx_HIGH (Pmode, x)));
998 return gen_rtx_LO_SUM (Pmode, scratch, x);
999 }
1000 }
1001 }
1002
1003 return NULL;
1004
1005 split_addend:
1006 {
1007 HOST_WIDE_INT low, high;
1008
1009 low = ((addend & 0xffff) ^ 0x8000) - 0x8000;
1010 addend -= low;
1011 high = ((addend & 0xffffffff) ^ 0x80000000) - 0x80000000;
1012 addend -= high;
1013
1014 if (addend)
1015 x = expand_simple_binop (Pmode, PLUS, x, GEN_INT (addend),
1016 (!can_create_pseudo_p () ? scratch : NULL_RTX),
1017 1, OPTAB_LIB_WIDEN);
1018 if (high)
1019 x = expand_simple_binop (Pmode, PLUS, x, GEN_INT (high),
1020 (!can_create_pseudo_p () ? scratch : NULL_RTX),
1021 1, OPTAB_LIB_WIDEN);
1022
1023 return plus_constant (Pmode, x, low);
1024 }
1025 }
1026
1027
1028 /* Try machine-dependent ways of modifying an illegitimate address
1029 to be legitimate. Return X or the new, valid address. */
1030
1031 static rtx
alpha_legitimize_address(rtx x,rtx oldx ATTRIBUTE_UNUSED,enum machine_mode mode)1032 alpha_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
1033 enum machine_mode mode)
1034 {
1035 rtx new_x = alpha_legitimize_address_1 (x, NULL_RTX, mode);
1036 return new_x ? new_x : x;
1037 }
1038
1039 /* Return true if ADDR has an effect that depends on the machine mode it
1040 is used for. On the Alpha this is true only for the unaligned modes.
1041 We can simplify the test since we know that the address must be valid. */
1042
1043 static bool
alpha_mode_dependent_address_p(const_rtx addr,addr_space_t as ATTRIBUTE_UNUSED)1044 alpha_mode_dependent_address_p (const_rtx addr,
1045 addr_space_t as ATTRIBUTE_UNUSED)
1046 {
1047 return GET_CODE (addr) == AND;
1048 }
1049
1050 /* Primarily this is required for TLS symbols, but given that our move
1051 patterns *ought* to be able to handle any symbol at any time, we
1052 should never be spilling symbolic operands to the constant pool, ever. */
1053
1054 static bool
alpha_cannot_force_const_mem(enum machine_mode mode ATTRIBUTE_UNUSED,rtx x)1055 alpha_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
1056 {
1057 enum rtx_code code = GET_CODE (x);
1058 return code == SYMBOL_REF || code == LABEL_REF || code == CONST;
1059 }
1060
1061 /* We do not allow indirect calls to be optimized into sibling calls, nor
1062 can we allow a call to a function with a different GP to be optimized
1063 into a sibcall. */
1064
1065 static bool
alpha_function_ok_for_sibcall(tree decl,tree exp ATTRIBUTE_UNUSED)1066 alpha_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
1067 {
1068 /* Can't do indirect tail calls, since we don't know if the target
1069 uses the same GP. */
1070 if (!decl)
1071 return false;
1072
1073 /* Otherwise, we can make a tail call if the target function shares
1074 the same GP. */
1075 return decl_has_samegp (decl);
1076 }
1077
1078 int
some_small_symbolic_operand_int(rtx * px,void * data ATTRIBUTE_UNUSED)1079 some_small_symbolic_operand_int (rtx *px, void *data ATTRIBUTE_UNUSED)
1080 {
1081 rtx x = *px;
1082
1083 /* Don't re-split. */
1084 if (GET_CODE (x) == LO_SUM)
1085 return -1;
1086
1087 return small_symbolic_operand (x, Pmode) != 0;
1088 }
1089
1090 static int
split_small_symbolic_operand_1(rtx * px,void * data ATTRIBUTE_UNUSED)1091 split_small_symbolic_operand_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
1092 {
1093 rtx x = *px;
1094
1095 /* Don't re-split. */
1096 if (GET_CODE (x) == LO_SUM)
1097 return -1;
1098
1099 if (small_symbolic_operand (x, Pmode))
1100 {
1101 x = gen_rtx_LO_SUM (Pmode, pic_offset_table_rtx, x);
1102 *px = x;
1103 return -1;
1104 }
1105
1106 return 0;
1107 }
1108
1109 rtx
split_small_symbolic_operand(rtx x)1110 split_small_symbolic_operand (rtx x)
1111 {
1112 x = copy_insn (x);
1113 for_each_rtx (&x, split_small_symbolic_operand_1, NULL);
1114 return x;
1115 }
1116
1117 /* Indicate that INSN cannot be duplicated. This is true for any insn
1118 that we've marked with gpdisp relocs, since those have to stay in
1119 1-1 correspondence with one another.
1120
1121 Technically we could copy them if we could set up a mapping from one
1122 sequence number to another, across the set of insns to be duplicated.
1123 This seems overly complicated and error-prone since interblock motion
1124 from sched-ebb could move one of the pair of insns to a different block.
1125
1126 Also cannot allow jsr insns to be duplicated. If they throw exceptions,
1127 then they'll be in a different block from their ldgp. Which could lead
1128 the bb reorder code to think that it would be ok to copy just the block
1129 containing the call and branch to the block containing the ldgp. */
1130
1131 static bool
alpha_cannot_copy_insn_p(rtx insn)1132 alpha_cannot_copy_insn_p (rtx insn)
1133 {
1134 if (!reload_completed || !TARGET_EXPLICIT_RELOCS)
1135 return false;
1136 if (recog_memoized (insn) >= 0)
1137 return get_attr_cannot_copy (insn);
1138 else
1139 return false;
1140 }
1141
1142
1143 /* Try a machine-dependent way of reloading an illegitimate address
1144 operand. If we find one, push the reload and return the new rtx. */
1145
1146 rtx
alpha_legitimize_reload_address(rtx x,enum machine_mode mode ATTRIBUTE_UNUSED,int opnum,int type,int ind_levels ATTRIBUTE_UNUSED)1147 alpha_legitimize_reload_address (rtx x,
1148 enum machine_mode mode ATTRIBUTE_UNUSED,
1149 int opnum, int type,
1150 int ind_levels ATTRIBUTE_UNUSED)
1151 {
1152 /* We must recognize output that we have already generated ourselves. */
1153 if (GET_CODE (x) == PLUS
1154 && GET_CODE (XEXP (x, 0)) == PLUS
1155 && REG_P (XEXP (XEXP (x, 0), 0))
1156 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
1157 && CONST_INT_P (XEXP (x, 1)))
1158 {
1159 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
1160 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
1161 opnum, (enum reload_type) type);
1162 return x;
1163 }
1164
1165 /* We wish to handle large displacements off a base register by
1166 splitting the addend across an ldah and the mem insn. This
1167 cuts number of extra insns needed from 3 to 1. */
1168 if (GET_CODE (x) == PLUS
1169 && REG_P (XEXP (x, 0))
1170 && REGNO (XEXP (x, 0)) < FIRST_PSEUDO_REGISTER
1171 && REGNO_OK_FOR_BASE_P (REGNO (XEXP (x, 0)))
1172 && GET_CODE (XEXP (x, 1)) == CONST_INT)
1173 {
1174 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
1175 HOST_WIDE_INT low = ((val & 0xffff) ^ 0x8000) - 0x8000;
1176 HOST_WIDE_INT high
1177 = (((val - low) & 0xffffffff) ^ 0x80000000) - 0x80000000;
1178
1179 /* Check for 32-bit overflow. */
1180 if (high + low != val)
1181 return NULL_RTX;
1182
1183 /* Reload the high part into a base reg; leave the low part
1184 in the mem directly. */
1185 x = gen_rtx_PLUS (GET_MODE (x),
1186 gen_rtx_PLUS (GET_MODE (x), XEXP (x, 0),
1187 GEN_INT (high)),
1188 GEN_INT (low));
1189
1190 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
1191 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
1192 opnum, (enum reload_type) type);
1193 return x;
1194 }
1195
1196 return NULL_RTX;
1197 }
1198
1199 /* Compute a (partial) cost for rtx X. Return true if the complete
1200 cost has been computed, and false if subexpressions should be
1201 scanned. In either case, *TOTAL contains the cost result. */
1202
1203 static bool
alpha_rtx_costs(rtx x,int code,int outer_code,int opno,int * total,bool speed)1204 alpha_rtx_costs (rtx x, int code, int outer_code, int opno, int *total,
1205 bool speed)
1206 {
1207 enum machine_mode mode = GET_MODE (x);
1208 bool float_mode_p = FLOAT_MODE_P (mode);
1209 const struct alpha_rtx_cost_data *cost_data;
1210
1211 if (!speed)
1212 cost_data = &alpha_rtx_cost_size;
1213 else
1214 cost_data = &alpha_rtx_cost_data[alpha_tune];
1215
1216 switch (code)
1217 {
1218 case CONST_INT:
1219 /* If this is an 8-bit constant, return zero since it can be used
1220 nearly anywhere with no cost. If it is a valid operand for an
1221 ADD or AND, likewise return 0 if we know it will be used in that
1222 context. Otherwise, return 2 since it might be used there later.
1223 All other constants take at least two insns. */
1224 if (INTVAL (x) >= 0 && INTVAL (x) < 256)
1225 {
1226 *total = 0;
1227 return true;
1228 }
1229 /* FALLTHRU */
1230
1231 case CONST_DOUBLE:
1232 if (x == CONST0_RTX (mode))
1233 *total = 0;
1234 else if ((outer_code == PLUS && add_operand (x, VOIDmode))
1235 || (outer_code == AND && and_operand (x, VOIDmode)))
1236 *total = 0;
1237 else if (add_operand (x, VOIDmode) || and_operand (x, VOIDmode))
1238 *total = 2;
1239 else
1240 *total = COSTS_N_INSNS (2);
1241 return true;
1242
1243 case CONST:
1244 case SYMBOL_REF:
1245 case LABEL_REF:
1246 if (TARGET_EXPLICIT_RELOCS && small_symbolic_operand (x, VOIDmode))
1247 *total = COSTS_N_INSNS (outer_code != MEM);
1248 else if (TARGET_EXPLICIT_RELOCS && local_symbolic_operand (x, VOIDmode))
1249 *total = COSTS_N_INSNS (1 + (outer_code != MEM));
1250 else if (tls_symbolic_operand_type (x))
1251 /* Estimate of cost for call_pal rduniq. */
1252 /* ??? How many insns do we emit here? More than one... */
1253 *total = COSTS_N_INSNS (15);
1254 else
1255 /* Otherwise we do a load from the GOT. */
1256 *total = COSTS_N_INSNS (!speed ? 1 : alpha_memory_latency);
1257 return true;
1258
1259 case HIGH:
1260 /* This is effectively an add_operand. */
1261 *total = 2;
1262 return true;
1263
1264 case PLUS:
1265 case MINUS:
1266 if (float_mode_p)
1267 *total = cost_data->fp_add;
1268 else if (GET_CODE (XEXP (x, 0)) == MULT
1269 && const48_operand (XEXP (XEXP (x, 0), 1), VOIDmode))
1270 {
1271 *total = (rtx_cost (XEXP (XEXP (x, 0), 0),
1272 (enum rtx_code) outer_code, opno, speed)
1273 + rtx_cost (XEXP (x, 1),
1274 (enum rtx_code) outer_code, opno, speed)
1275 + COSTS_N_INSNS (1));
1276 return true;
1277 }
1278 return false;
1279
1280 case MULT:
1281 if (float_mode_p)
1282 *total = cost_data->fp_mult;
1283 else if (mode == DImode)
1284 *total = cost_data->int_mult_di;
1285 else
1286 *total = cost_data->int_mult_si;
1287 return false;
1288
1289 case ASHIFT:
1290 if (CONST_INT_P (XEXP (x, 1))
1291 && INTVAL (XEXP (x, 1)) <= 3)
1292 {
1293 *total = COSTS_N_INSNS (1);
1294 return false;
1295 }
1296 /* FALLTHRU */
1297
1298 case ASHIFTRT:
1299 case LSHIFTRT:
1300 *total = cost_data->int_shift;
1301 return false;
1302
1303 case IF_THEN_ELSE:
1304 if (float_mode_p)
1305 *total = cost_data->fp_add;
1306 else
1307 *total = cost_data->int_cmov;
1308 return false;
1309
1310 case DIV:
1311 case UDIV:
1312 case MOD:
1313 case UMOD:
1314 if (!float_mode_p)
1315 *total = cost_data->int_div;
1316 else if (mode == SFmode)
1317 *total = cost_data->fp_div_sf;
1318 else
1319 *total = cost_data->fp_div_df;
1320 return false;
1321
1322 case MEM:
1323 *total = COSTS_N_INSNS (!speed ? 1 : alpha_memory_latency);
1324 return true;
1325
1326 case NEG:
1327 if (! float_mode_p)
1328 {
1329 *total = COSTS_N_INSNS (1);
1330 return false;
1331 }
1332 /* FALLTHRU */
1333
1334 case ABS:
1335 if (! float_mode_p)
1336 {
1337 *total = COSTS_N_INSNS (1) + cost_data->int_cmov;
1338 return false;
1339 }
1340 /* FALLTHRU */
1341
1342 case FLOAT:
1343 case UNSIGNED_FLOAT:
1344 case FIX:
1345 case UNSIGNED_FIX:
1346 case FLOAT_TRUNCATE:
1347 *total = cost_data->fp_add;
1348 return false;
1349
1350 case FLOAT_EXTEND:
1351 if (MEM_P (XEXP (x, 0)))
1352 *total = 0;
1353 else
1354 *total = cost_data->fp_add;
1355 return false;
1356
1357 default:
1358 return false;
1359 }
1360 }
1361
1362 /* REF is an alignable memory location. Place an aligned SImode
1363 reference into *PALIGNED_MEM and the number of bits to shift into
1364 *PBITNUM. SCRATCH is a free register for use in reloading out
1365 of range stack slots. */
1366
1367 void
get_aligned_mem(rtx ref,rtx * paligned_mem,rtx * pbitnum)1368 get_aligned_mem (rtx ref, rtx *paligned_mem, rtx *pbitnum)
1369 {
1370 rtx base;
1371 HOST_WIDE_INT disp, offset;
1372
1373 gcc_assert (MEM_P (ref));
1374
1375 if (reload_in_progress
1376 && ! memory_address_p (GET_MODE (ref), XEXP (ref, 0)))
1377 {
1378 base = find_replacement (&XEXP (ref, 0));
1379 gcc_assert (memory_address_p (GET_MODE (ref), base));
1380 }
1381 else
1382 base = XEXP (ref, 0);
1383
1384 if (GET_CODE (base) == PLUS)
1385 disp = INTVAL (XEXP (base, 1)), base = XEXP (base, 0);
1386 else
1387 disp = 0;
1388
1389 /* Find the byte offset within an aligned word. If the memory itself is
1390 claimed to be aligned, believe it. Otherwise, aligned_memory_operand
1391 will have examined the base register and determined it is aligned, and
1392 thus displacements from it are naturally alignable. */
1393 if (MEM_ALIGN (ref) >= 32)
1394 offset = 0;
1395 else
1396 offset = disp & 3;
1397
1398 /* The location should not cross aligned word boundary. */
1399 gcc_assert (offset + GET_MODE_SIZE (GET_MODE (ref))
1400 <= GET_MODE_SIZE (SImode));
1401
1402 /* Access the entire aligned word. */
1403 *paligned_mem = widen_memory_access (ref, SImode, -offset);
1404
1405 /* Convert the byte offset within the word to a bit offset. */
1406 offset *= BITS_PER_UNIT;
1407 *pbitnum = GEN_INT (offset);
1408 }
1409
1410 /* Similar, but just get the address. Handle the two reload cases.
1411 Add EXTRA_OFFSET to the address we return. */
1412
1413 rtx
get_unaligned_address(rtx ref)1414 get_unaligned_address (rtx ref)
1415 {
1416 rtx base;
1417 HOST_WIDE_INT offset = 0;
1418
1419 gcc_assert (MEM_P (ref));
1420
1421 if (reload_in_progress
1422 && ! memory_address_p (GET_MODE (ref), XEXP (ref, 0)))
1423 {
1424 base = find_replacement (&XEXP (ref, 0));
1425
1426 gcc_assert (memory_address_p (GET_MODE (ref), base));
1427 }
1428 else
1429 base = XEXP (ref, 0);
1430
1431 if (GET_CODE (base) == PLUS)
1432 offset += INTVAL (XEXP (base, 1)), base = XEXP (base, 0);
1433
1434 return plus_constant (Pmode, base, offset);
1435 }
1436
1437 /* Compute a value X, such that X & 7 == (ADDR + OFS) & 7.
1438 X is always returned in a register. */
1439
1440 rtx
get_unaligned_offset(rtx addr,HOST_WIDE_INT ofs)1441 get_unaligned_offset (rtx addr, HOST_WIDE_INT ofs)
1442 {
1443 if (GET_CODE (addr) == PLUS)
1444 {
1445 ofs += INTVAL (XEXP (addr, 1));
1446 addr = XEXP (addr, 0);
1447 }
1448
1449 return expand_simple_binop (Pmode, PLUS, addr, GEN_INT (ofs & 7),
1450 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1451 }
1452
1453 /* On the Alpha, all (non-symbolic) constants except zero go into
1454 a floating-point register via memory. Note that we cannot
1455 return anything that is not a subset of RCLASS, and that some
1456 symbolic constants cannot be dropped to memory. */
1457
1458 enum reg_class
alpha_preferred_reload_class(rtx x,enum reg_class rclass)1459 alpha_preferred_reload_class(rtx x, enum reg_class rclass)
1460 {
1461 /* Zero is present in any register class. */
1462 if (x == CONST0_RTX (GET_MODE (x)))
1463 return rclass;
1464
1465 /* These sorts of constants we can easily drop to memory. */
1466 if (CONST_INT_P (x)
1467 || GET_CODE (x) == CONST_DOUBLE
1468 || GET_CODE (x) == CONST_VECTOR)
1469 {
1470 if (rclass == FLOAT_REGS)
1471 return NO_REGS;
1472 if (rclass == ALL_REGS)
1473 return GENERAL_REGS;
1474 return rclass;
1475 }
1476
1477 /* All other kinds of constants should not (and in the case of HIGH
1478 cannot) be dropped to memory -- instead we use a GENERAL_REGS
1479 secondary reload. */
1480 if (CONSTANT_P (x))
1481 return (rclass == ALL_REGS ? GENERAL_REGS : rclass);
1482
1483 return rclass;
1484 }
1485
1486 /* Inform reload about cases where moving X with a mode MODE to a register in
1487 RCLASS requires an extra scratch or immediate register. Return the class
1488 needed for the immediate register. */
1489
1490 static reg_class_t
alpha_secondary_reload(bool in_p,rtx x,reg_class_t rclass_i,enum machine_mode mode,secondary_reload_info * sri)1491 alpha_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
1492 enum machine_mode mode, secondary_reload_info *sri)
1493 {
1494 enum reg_class rclass = (enum reg_class) rclass_i;
1495
1496 /* Loading and storing HImode or QImode values to and from memory
1497 usually requires a scratch register. */
1498 if (!TARGET_BWX && (mode == QImode || mode == HImode || mode == CQImode))
1499 {
1500 if (any_memory_operand (x, mode))
1501 {
1502 if (in_p)
1503 {
1504 if (!aligned_memory_operand (x, mode))
1505 sri->icode = direct_optab_handler (reload_in_optab, mode);
1506 }
1507 else
1508 sri->icode = direct_optab_handler (reload_out_optab, mode);
1509 return NO_REGS;
1510 }
1511 }
1512
1513 /* We also cannot do integral arithmetic into FP regs, as might result
1514 from register elimination into a DImode fp register. */
1515 if (rclass == FLOAT_REGS)
1516 {
1517 if (MEM_P (x) && GET_CODE (XEXP (x, 0)) == AND)
1518 return GENERAL_REGS;
1519 if (in_p && INTEGRAL_MODE_P (mode)
1520 && !MEM_P (x) && !REG_P (x) && !CONST_INT_P (x))
1521 return GENERAL_REGS;
1522 }
1523
1524 return NO_REGS;
1525 }
1526
1527 /* Subfunction of the following function. Update the flags of any MEM
1528 found in part of X. */
1529
1530 static int
alpha_set_memflags_1(rtx * xp,void * data)1531 alpha_set_memflags_1 (rtx *xp, void *data)
1532 {
1533 rtx x = *xp, orig = (rtx) data;
1534
1535 if (!MEM_P (x))
1536 return 0;
1537
1538 MEM_VOLATILE_P (x) = MEM_VOLATILE_P (orig);
1539 MEM_NOTRAP_P (x) = MEM_NOTRAP_P (orig);
1540 MEM_READONLY_P (x) = MEM_READONLY_P (orig);
1541
1542 /* Sadly, we cannot use alias sets because the extra aliasing
1543 produced by the AND interferes. Given that two-byte quantities
1544 are the only thing we would be able to differentiate anyway,
1545 there does not seem to be any point in convoluting the early
1546 out of the alias check. */
1547
1548 return -1;
1549 }
1550
1551 /* Given SEQ, which is an INSN list, look for any MEMs in either
1552 a SET_DEST or a SET_SRC and copy the in-struct, unchanging, and
1553 volatile flags from REF into each of the MEMs found. If REF is not
1554 a MEM, don't do anything. */
1555
1556 void
alpha_set_memflags(rtx seq,rtx ref)1557 alpha_set_memflags (rtx seq, rtx ref)
1558 {
1559 rtx insn;
1560
1561 if (!MEM_P (ref))
1562 return;
1563
1564 /* This is only called from alpha.md, after having had something
1565 generated from one of the insn patterns. So if everything is
1566 zero, the pattern is already up-to-date. */
1567 if (!MEM_VOLATILE_P (ref)
1568 && !MEM_NOTRAP_P (ref)
1569 && !MEM_READONLY_P (ref))
1570 return;
1571
1572 for (insn = seq; insn; insn = NEXT_INSN (insn))
1573 if (INSN_P (insn))
1574 for_each_rtx (&PATTERN (insn), alpha_set_memflags_1, (void *) ref);
1575 else
1576 gcc_unreachable ();
1577 }
1578
1579 static rtx alpha_emit_set_const (rtx, enum machine_mode, HOST_WIDE_INT,
1580 int, bool);
1581
1582 /* Internal routine for alpha_emit_set_const to check for N or below insns.
1583 If NO_OUTPUT is true, then we only check to see if N insns are possible,
1584 and return pc_rtx if successful. */
1585
1586 static rtx
alpha_emit_set_const_1(rtx target,enum machine_mode mode,HOST_WIDE_INT c,int n,bool no_output)1587 alpha_emit_set_const_1 (rtx target, enum machine_mode mode,
1588 HOST_WIDE_INT c, int n, bool no_output)
1589 {
1590 HOST_WIDE_INT new_const;
1591 int i, bits;
1592 /* Use a pseudo if highly optimizing and still generating RTL. */
1593 rtx subtarget
1594 = (flag_expensive_optimizations && can_create_pseudo_p () ? 0 : target);
1595 rtx temp, insn;
1596
1597 /* If this is a sign-extended 32-bit constant, we can do this in at most
1598 three insns, so do it if we have enough insns left. We always have
1599 a sign-extended 32-bit constant when compiling on a narrow machine. */
1600
1601 if (HOST_BITS_PER_WIDE_INT != 64
1602 || c >> 31 == -1 || c >> 31 == 0)
1603 {
1604 HOST_WIDE_INT low = ((c & 0xffff) ^ 0x8000) - 0x8000;
1605 HOST_WIDE_INT tmp1 = c - low;
1606 HOST_WIDE_INT high = (((tmp1 >> 16) & 0xffff) ^ 0x8000) - 0x8000;
1607 HOST_WIDE_INT extra = 0;
1608
1609 /* If HIGH will be interpreted as negative but the constant is
1610 positive, we must adjust it to do two ldha insns. */
1611
1612 if ((high & 0x8000) != 0 && c >= 0)
1613 {
1614 extra = 0x4000;
1615 tmp1 -= 0x40000000;
1616 high = ((tmp1 >> 16) & 0xffff) - 2 * ((tmp1 >> 16) & 0x8000);
1617 }
1618
1619 if (c == low || (low == 0 && extra == 0))
1620 {
1621 /* We used to use copy_to_suggested_reg (GEN_INT (c), target, mode)
1622 but that meant that we can't handle INT_MIN on 32-bit machines
1623 (like NT/Alpha), because we recurse indefinitely through
1624 emit_move_insn to gen_movdi. So instead, since we know exactly
1625 what we want, create it explicitly. */
1626
1627 if (no_output)
1628 return pc_rtx;
1629 if (target == NULL)
1630 target = gen_reg_rtx (mode);
1631 emit_insn (gen_rtx_SET (VOIDmode, target, GEN_INT (c)));
1632 return target;
1633 }
1634 else if (n >= 2 + (extra != 0))
1635 {
1636 if (no_output)
1637 return pc_rtx;
1638 if (!can_create_pseudo_p ())
1639 {
1640 emit_insn (gen_rtx_SET (VOIDmode, target, GEN_INT (high << 16)));
1641 temp = target;
1642 }
1643 else
1644 temp = copy_to_suggested_reg (GEN_INT (high << 16),
1645 subtarget, mode);
1646
1647 /* As of 2002-02-23, addsi3 is only available when not optimizing.
1648 This means that if we go through expand_binop, we'll try to
1649 generate extensions, etc, which will require new pseudos, which
1650 will fail during some split phases. The SImode add patterns
1651 still exist, but are not named. So build the insns by hand. */
1652
1653 if (extra != 0)
1654 {
1655 if (! subtarget)
1656 subtarget = gen_reg_rtx (mode);
1657 insn = gen_rtx_PLUS (mode, temp, GEN_INT (extra << 16));
1658 insn = gen_rtx_SET (VOIDmode, subtarget, insn);
1659 emit_insn (insn);
1660 temp = subtarget;
1661 }
1662
1663 if (target == NULL)
1664 target = gen_reg_rtx (mode);
1665 insn = gen_rtx_PLUS (mode, temp, GEN_INT (low));
1666 insn = gen_rtx_SET (VOIDmode, target, insn);
1667 emit_insn (insn);
1668 return target;
1669 }
1670 }
1671
1672 /* If we couldn't do it that way, try some other methods. But if we have
1673 no instructions left, don't bother. Likewise, if this is SImode and
1674 we can't make pseudos, we can't do anything since the expand_binop
1675 and expand_unop calls will widen and try to make pseudos. */
1676
1677 if (n == 1 || (mode == SImode && !can_create_pseudo_p ()))
1678 return 0;
1679
1680 /* Next, see if we can load a related constant and then shift and possibly
1681 negate it to get the constant we want. Try this once each increasing
1682 numbers of insns. */
1683
1684 for (i = 1; i < n; i++)
1685 {
1686 /* First, see if minus some low bits, we've an easy load of
1687 high bits. */
1688
1689 new_const = ((c & 0xffff) ^ 0x8000) - 0x8000;
1690 if (new_const != 0)
1691 {
1692 temp = alpha_emit_set_const (subtarget, mode, c - new_const, i, no_output);
1693 if (temp)
1694 {
1695 if (no_output)
1696 return temp;
1697 return expand_binop (mode, add_optab, temp, GEN_INT (new_const),
1698 target, 0, OPTAB_WIDEN);
1699 }
1700 }
1701
1702 /* Next try complementing. */
1703 temp = alpha_emit_set_const (subtarget, mode, ~c, i, no_output);
1704 if (temp)
1705 {
1706 if (no_output)
1707 return temp;
1708 return expand_unop (mode, one_cmpl_optab, temp, target, 0);
1709 }
1710
1711 /* Next try to form a constant and do a left shift. We can do this
1712 if some low-order bits are zero; the exact_log2 call below tells
1713 us that information. The bits we are shifting out could be any
1714 value, but here we'll just try the 0- and sign-extended forms of
1715 the constant. To try to increase the chance of having the same
1716 constant in more than one insn, start at the highest number of
1717 bits to shift, but try all possibilities in case a ZAPNOT will
1718 be useful. */
1719
1720 bits = exact_log2 (c & -c);
1721 if (bits > 0)
1722 for (; bits > 0; bits--)
1723 {
1724 new_const = c >> bits;
1725 temp = alpha_emit_set_const (subtarget, mode, new_const, i, no_output);
1726 if (!temp && c < 0)
1727 {
1728 new_const = (unsigned HOST_WIDE_INT)c >> bits;
1729 temp = alpha_emit_set_const (subtarget, mode, new_const,
1730 i, no_output);
1731 }
1732 if (temp)
1733 {
1734 if (no_output)
1735 return temp;
1736 return expand_binop (mode, ashl_optab, temp, GEN_INT (bits),
1737 target, 0, OPTAB_WIDEN);
1738 }
1739 }
1740
1741 /* Now try high-order zero bits. Here we try the shifted-in bits as
1742 all zero and all ones. Be careful to avoid shifting outside the
1743 mode and to avoid shifting outside the host wide int size. */
1744 /* On narrow hosts, don't shift a 1 into the high bit, since we'll
1745 confuse the recursive call and set all of the high 32 bits. */
1746
1747 bits = (MIN (HOST_BITS_PER_WIDE_INT, GET_MODE_SIZE (mode) * 8)
1748 - floor_log2 (c) - 1 - (HOST_BITS_PER_WIDE_INT < 64));
1749 if (bits > 0)
1750 for (; bits > 0; bits--)
1751 {
1752 new_const = c << bits;
1753 temp = alpha_emit_set_const (subtarget, mode, new_const, i, no_output);
1754 if (!temp)
1755 {
1756 new_const = (c << bits) | (((HOST_WIDE_INT) 1 << bits) - 1);
1757 temp = alpha_emit_set_const (subtarget, mode, new_const,
1758 i, no_output);
1759 }
1760 if (temp)
1761 {
1762 if (no_output)
1763 return temp;
1764 return expand_binop (mode, lshr_optab, temp, GEN_INT (bits),
1765 target, 1, OPTAB_WIDEN);
1766 }
1767 }
1768
1769 /* Now try high-order 1 bits. We get that with a sign-extension.
1770 But one bit isn't enough here. Be careful to avoid shifting outside
1771 the mode and to avoid shifting outside the host wide int size. */
1772
1773 bits = (MIN (HOST_BITS_PER_WIDE_INT, GET_MODE_SIZE (mode) * 8)
1774 - floor_log2 (~ c) - 2);
1775 if (bits > 0)
1776 for (; bits > 0; bits--)
1777 {
1778 new_const = c << bits;
1779 temp = alpha_emit_set_const (subtarget, mode, new_const, i, no_output);
1780 if (!temp)
1781 {
1782 new_const = (c << bits) | (((HOST_WIDE_INT) 1 << bits) - 1);
1783 temp = alpha_emit_set_const (subtarget, mode, new_const,
1784 i, no_output);
1785 }
1786 if (temp)
1787 {
1788 if (no_output)
1789 return temp;
1790 return expand_binop (mode, ashr_optab, temp, GEN_INT (bits),
1791 target, 0, OPTAB_WIDEN);
1792 }
1793 }
1794 }
1795
1796 #if HOST_BITS_PER_WIDE_INT == 64
1797 /* Finally, see if can load a value into the target that is the same as the
1798 constant except that all bytes that are 0 are changed to be 0xff. If we
1799 can, then we can do a ZAPNOT to obtain the desired constant. */
1800
1801 new_const = c;
1802 for (i = 0; i < 64; i += 8)
1803 if ((new_const & ((HOST_WIDE_INT) 0xff << i)) == 0)
1804 new_const |= (HOST_WIDE_INT) 0xff << i;
1805
1806 /* We are only called for SImode and DImode. If this is SImode, ensure that
1807 we are sign extended to a full word. */
1808
1809 if (mode == SImode)
1810 new_const = ((new_const & 0xffffffff) ^ 0x80000000) - 0x80000000;
1811
1812 if (new_const != c)
1813 {
1814 temp = alpha_emit_set_const (subtarget, mode, new_const, n - 1, no_output);
1815 if (temp)
1816 {
1817 if (no_output)
1818 return temp;
1819 return expand_binop (mode, and_optab, temp, GEN_INT (c | ~ new_const),
1820 target, 0, OPTAB_WIDEN);
1821 }
1822 }
1823 #endif
1824
1825 return 0;
1826 }
1827
1828 /* Try to output insns to set TARGET equal to the constant C if it can be
1829 done in less than N insns. Do all computations in MODE. Returns the place
1830 where the output has been placed if it can be done and the insns have been
1831 emitted. If it would take more than N insns, zero is returned and no
1832 insns and emitted. */
1833
1834 static rtx
alpha_emit_set_const(rtx target,enum machine_mode mode,HOST_WIDE_INT c,int n,bool no_output)1835 alpha_emit_set_const (rtx target, enum machine_mode mode,
1836 HOST_WIDE_INT c, int n, bool no_output)
1837 {
1838 enum machine_mode orig_mode = mode;
1839 rtx orig_target = target;
1840 rtx result = 0;
1841 int i;
1842
1843 /* If we can't make any pseudos, TARGET is an SImode hard register, we
1844 can't load this constant in one insn, do this in DImode. */
1845 if (!can_create_pseudo_p () && mode == SImode
1846 && REG_P (target) && REGNO (target) < FIRST_PSEUDO_REGISTER)
1847 {
1848 result = alpha_emit_set_const_1 (target, mode, c, 1, no_output);
1849 if (result)
1850 return result;
1851
1852 target = no_output ? NULL : gen_lowpart (DImode, target);
1853 mode = DImode;
1854 }
1855 else if (mode == V8QImode || mode == V4HImode || mode == V2SImode)
1856 {
1857 target = no_output ? NULL : gen_lowpart (DImode, target);
1858 mode = DImode;
1859 }
1860
1861 /* Try 1 insn, then 2, then up to N. */
1862 for (i = 1; i <= n; i++)
1863 {
1864 result = alpha_emit_set_const_1 (target, mode, c, i, no_output);
1865 if (result)
1866 {
1867 rtx insn, set;
1868
1869 if (no_output)
1870 return result;
1871
1872 insn = get_last_insn ();
1873 set = single_set (insn);
1874 if (! CONSTANT_P (SET_SRC (set)))
1875 set_unique_reg_note (get_last_insn (), REG_EQUAL, GEN_INT (c));
1876 break;
1877 }
1878 }
1879
1880 /* Allow for the case where we changed the mode of TARGET. */
1881 if (result)
1882 {
1883 if (result == target)
1884 result = orig_target;
1885 else if (mode != orig_mode)
1886 result = gen_lowpart (orig_mode, result);
1887 }
1888
1889 return result;
1890 }
1891
1892 /* Having failed to find a 3 insn sequence in alpha_emit_set_const,
1893 fall back to a straight forward decomposition. We do this to avoid
1894 exponential run times encountered when looking for longer sequences
1895 with alpha_emit_set_const. */
1896
1897 static rtx
alpha_emit_set_long_const(rtx target,HOST_WIDE_INT c1,HOST_WIDE_INT c2)1898 alpha_emit_set_long_const (rtx target, HOST_WIDE_INT c1, HOST_WIDE_INT c2)
1899 {
1900 HOST_WIDE_INT d1, d2, d3, d4;
1901
1902 /* Decompose the entire word */
1903 #if HOST_BITS_PER_WIDE_INT >= 64
1904 gcc_assert (c2 == -(c1 < 0));
1905 d1 = ((c1 & 0xffff) ^ 0x8000) - 0x8000;
1906 c1 -= d1;
1907 d2 = ((c1 & 0xffffffff) ^ 0x80000000) - 0x80000000;
1908 c1 = (c1 - d2) >> 32;
1909 d3 = ((c1 & 0xffff) ^ 0x8000) - 0x8000;
1910 c1 -= d3;
1911 d4 = ((c1 & 0xffffffff) ^ 0x80000000) - 0x80000000;
1912 gcc_assert (c1 == d4);
1913 #else
1914 d1 = ((c1 & 0xffff) ^ 0x8000) - 0x8000;
1915 c1 -= d1;
1916 d2 = ((c1 & 0xffffffff) ^ 0x80000000) - 0x80000000;
1917 gcc_assert (c1 == d2);
1918 c2 += (d2 < 0);
1919 d3 = ((c2 & 0xffff) ^ 0x8000) - 0x8000;
1920 c2 -= d3;
1921 d4 = ((c2 & 0xffffffff) ^ 0x80000000) - 0x80000000;
1922 gcc_assert (c2 == d4);
1923 #endif
1924
1925 /* Construct the high word */
1926 if (d4)
1927 {
1928 emit_move_insn (target, GEN_INT (d4));
1929 if (d3)
1930 emit_move_insn (target, gen_rtx_PLUS (DImode, target, GEN_INT (d3)));
1931 }
1932 else
1933 emit_move_insn (target, GEN_INT (d3));
1934
1935 /* Shift it into place */
1936 emit_move_insn (target, gen_rtx_ASHIFT (DImode, target, GEN_INT (32)));
1937
1938 /* Add in the low bits. */
1939 if (d2)
1940 emit_move_insn (target, gen_rtx_PLUS (DImode, target, GEN_INT (d2)));
1941 if (d1)
1942 emit_move_insn (target, gen_rtx_PLUS (DImode, target, GEN_INT (d1)));
1943
1944 return target;
1945 }
1946
1947 /* Given an integral CONST_INT, CONST_DOUBLE, or CONST_VECTOR, return
1948 the low 64 bits. */
1949
1950 static void
alpha_extract_integer(rtx x,HOST_WIDE_INT * p0,HOST_WIDE_INT * p1)1951 alpha_extract_integer (rtx x, HOST_WIDE_INT *p0, HOST_WIDE_INT *p1)
1952 {
1953 HOST_WIDE_INT i0, i1;
1954
1955 if (GET_CODE (x) == CONST_VECTOR)
1956 x = simplify_subreg (DImode, x, GET_MODE (x), 0);
1957
1958
1959 if (CONST_INT_P (x))
1960 {
1961 i0 = INTVAL (x);
1962 i1 = -(i0 < 0);
1963 }
1964 else if (HOST_BITS_PER_WIDE_INT >= 64)
1965 {
1966 i0 = CONST_DOUBLE_LOW (x);
1967 i1 = -(i0 < 0);
1968 }
1969 else
1970 {
1971 i0 = CONST_DOUBLE_LOW (x);
1972 i1 = CONST_DOUBLE_HIGH (x);
1973 }
1974
1975 *p0 = i0;
1976 *p1 = i1;
1977 }
1978
1979 /* Implement TARGET_LEGITIMATE_CONSTANT_P. This is all constants for which
1980 we are willing to load the value into a register via a move pattern.
1981 Normally this is all symbolic constants, integral constants that
1982 take three or fewer instructions, and floating-point zero. */
1983
1984 bool
alpha_legitimate_constant_p(enum machine_mode mode,rtx x)1985 alpha_legitimate_constant_p (enum machine_mode mode, rtx x)
1986 {
1987 HOST_WIDE_INT i0, i1;
1988
1989 switch (GET_CODE (x))
1990 {
1991 case LABEL_REF:
1992 case HIGH:
1993 return true;
1994
1995 case CONST:
1996 if (GET_CODE (XEXP (x, 0)) == PLUS
1997 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
1998 x = XEXP (XEXP (x, 0), 0);
1999 else
2000 return true;
2001
2002 if (GET_CODE (x) != SYMBOL_REF)
2003 return true;
2004
2005 /* FALLTHRU */
2006
2007 case SYMBOL_REF:
2008 /* TLS symbols are never valid. */
2009 return SYMBOL_REF_TLS_MODEL (x) == 0;
2010
2011 case CONST_DOUBLE:
2012 if (x == CONST0_RTX (mode))
2013 return true;
2014 if (FLOAT_MODE_P (mode))
2015 return false;
2016 goto do_integer;
2017
2018 case CONST_VECTOR:
2019 if (x == CONST0_RTX (mode))
2020 return true;
2021 if (GET_MODE_CLASS (mode) != MODE_VECTOR_INT)
2022 return false;
2023 if (GET_MODE_SIZE (mode) != 8)
2024 return false;
2025 goto do_integer;
2026
2027 case CONST_INT:
2028 do_integer:
2029 if (TARGET_BUILD_CONSTANTS)
2030 return true;
2031 alpha_extract_integer (x, &i0, &i1);
2032 if (HOST_BITS_PER_WIDE_INT >= 64 || i1 == (-i0 < 0))
2033 return alpha_emit_set_const_1 (x, mode, i0, 3, true) != NULL;
2034 return false;
2035
2036 default:
2037 return false;
2038 }
2039 }
2040
2041 /* Operand 1 is known to be a constant, and should require more than one
2042 instruction to load. Emit that multi-part load. */
2043
2044 bool
alpha_split_const_mov(enum machine_mode mode,rtx * operands)2045 alpha_split_const_mov (enum machine_mode mode, rtx *operands)
2046 {
2047 HOST_WIDE_INT i0, i1;
2048 rtx temp = NULL_RTX;
2049
2050 alpha_extract_integer (operands[1], &i0, &i1);
2051
2052 if (HOST_BITS_PER_WIDE_INT >= 64 || i1 == -(i0 < 0))
2053 temp = alpha_emit_set_const (operands[0], mode, i0, 3, false);
2054
2055 if (!temp && TARGET_BUILD_CONSTANTS)
2056 temp = alpha_emit_set_long_const (operands[0], i0, i1);
2057
2058 if (temp)
2059 {
2060 if (!rtx_equal_p (operands[0], temp))
2061 emit_move_insn (operands[0], temp);
2062 return true;
2063 }
2064
2065 return false;
2066 }
2067
2068 /* Expand a move instruction; return true if all work is done.
2069 We don't handle non-bwx subword loads here. */
2070
2071 bool
alpha_expand_mov(enum machine_mode mode,rtx * operands)2072 alpha_expand_mov (enum machine_mode mode, rtx *operands)
2073 {
2074 rtx tmp;
2075
2076 /* If the output is not a register, the input must be. */
2077 if (MEM_P (operands[0])
2078 && ! reg_or_0_operand (operands[1], mode))
2079 operands[1] = force_reg (mode, operands[1]);
2080
2081 /* Allow legitimize_address to perform some simplifications. */
2082 if (mode == Pmode && symbolic_operand (operands[1], mode))
2083 {
2084 tmp = alpha_legitimize_address_1 (operands[1], operands[0], mode);
2085 if (tmp)
2086 {
2087 if (tmp == operands[0])
2088 return true;
2089 operands[1] = tmp;
2090 return false;
2091 }
2092 }
2093
2094 /* Early out for non-constants and valid constants. */
2095 if (! CONSTANT_P (operands[1]) || input_operand (operands[1], mode))
2096 return false;
2097
2098 /* Split large integers. */
2099 if (CONST_INT_P (operands[1])
2100 || GET_CODE (operands[1]) == CONST_DOUBLE
2101 || GET_CODE (operands[1]) == CONST_VECTOR)
2102 {
2103 if (alpha_split_const_mov (mode, operands))
2104 return true;
2105 }
2106
2107 /* Otherwise we've nothing left but to drop the thing to memory. */
2108 tmp = force_const_mem (mode, operands[1]);
2109
2110 if (tmp == NULL_RTX)
2111 return false;
2112
2113 if (reload_in_progress)
2114 {
2115 emit_move_insn (operands[0], XEXP (tmp, 0));
2116 operands[1] = replace_equiv_address (tmp, operands[0]);
2117 }
2118 else
2119 operands[1] = validize_mem (tmp);
2120 return false;
2121 }
2122
2123 /* Expand a non-bwx QImode or HImode move instruction;
2124 return true if all work is done. */
2125
2126 bool
alpha_expand_mov_nobwx(enum machine_mode mode,rtx * operands)2127 alpha_expand_mov_nobwx (enum machine_mode mode, rtx *operands)
2128 {
2129 rtx seq;
2130
2131 /* If the output is not a register, the input must be. */
2132 if (MEM_P (operands[0]))
2133 operands[1] = force_reg (mode, operands[1]);
2134
2135 /* Handle four memory cases, unaligned and aligned for either the input
2136 or the output. The only case where we can be called during reload is
2137 for aligned loads; all other cases require temporaries. */
2138
2139 if (any_memory_operand (operands[1], mode))
2140 {
2141 if (aligned_memory_operand (operands[1], mode))
2142 {
2143 if (reload_in_progress)
2144 {
2145 if (mode == QImode)
2146 seq = gen_reload_inqi_aligned (operands[0], operands[1]);
2147 else
2148 seq = gen_reload_inhi_aligned (operands[0], operands[1]);
2149 emit_insn (seq);
2150 }
2151 else
2152 {
2153 rtx aligned_mem, bitnum;
2154 rtx scratch = gen_reg_rtx (SImode);
2155 rtx subtarget;
2156 bool copyout;
2157
2158 get_aligned_mem (operands[1], &aligned_mem, &bitnum);
2159
2160 subtarget = operands[0];
2161 if (REG_P (subtarget))
2162 subtarget = gen_lowpart (DImode, subtarget), copyout = false;
2163 else
2164 subtarget = gen_reg_rtx (DImode), copyout = true;
2165
2166 if (mode == QImode)
2167 seq = gen_aligned_loadqi (subtarget, aligned_mem,
2168 bitnum, scratch);
2169 else
2170 seq = gen_aligned_loadhi (subtarget, aligned_mem,
2171 bitnum, scratch);
2172 emit_insn (seq);
2173
2174 if (copyout)
2175 emit_move_insn (operands[0], gen_lowpart (mode, subtarget));
2176 }
2177 }
2178 else
2179 {
2180 /* Don't pass these as parameters since that makes the generated
2181 code depend on parameter evaluation order which will cause
2182 bootstrap failures. */
2183
2184 rtx temp1, temp2, subtarget, ua;
2185 bool copyout;
2186
2187 temp1 = gen_reg_rtx (DImode);
2188 temp2 = gen_reg_rtx (DImode);
2189
2190 subtarget = operands[0];
2191 if (REG_P (subtarget))
2192 subtarget = gen_lowpart (DImode, subtarget), copyout = false;
2193 else
2194 subtarget = gen_reg_rtx (DImode), copyout = true;
2195
2196 ua = get_unaligned_address (operands[1]);
2197 if (mode == QImode)
2198 seq = gen_unaligned_loadqi (subtarget, ua, temp1, temp2);
2199 else
2200 seq = gen_unaligned_loadhi (subtarget, ua, temp1, temp2);
2201
2202 alpha_set_memflags (seq, operands[1]);
2203 emit_insn (seq);
2204
2205 if (copyout)
2206 emit_move_insn (operands[0], gen_lowpart (mode, subtarget));
2207 }
2208 return true;
2209 }
2210
2211 if (any_memory_operand (operands[0], mode))
2212 {
2213 if (aligned_memory_operand (operands[0], mode))
2214 {
2215 rtx aligned_mem, bitnum;
2216 rtx temp1 = gen_reg_rtx (SImode);
2217 rtx temp2 = gen_reg_rtx (SImode);
2218
2219 get_aligned_mem (operands[0], &aligned_mem, &bitnum);
2220
2221 emit_insn (gen_aligned_store (aligned_mem, operands[1], bitnum,
2222 temp1, temp2));
2223 }
2224 else
2225 {
2226 rtx temp1 = gen_reg_rtx (DImode);
2227 rtx temp2 = gen_reg_rtx (DImode);
2228 rtx temp3 = gen_reg_rtx (DImode);
2229 rtx ua = get_unaligned_address (operands[0]);
2230
2231 if (mode == QImode)
2232 seq = gen_unaligned_storeqi (ua, operands[1], temp1, temp2, temp3);
2233 else
2234 seq = gen_unaligned_storehi (ua, operands[1], temp1, temp2, temp3);
2235
2236 alpha_set_memflags (seq, operands[0]);
2237 emit_insn (seq);
2238 }
2239 return true;
2240 }
2241
2242 return false;
2243 }
2244
2245 /* Implement the movmisalign patterns. One of the operands is a memory
2246 that is not naturally aligned. Emit instructions to load it. */
2247
2248 void
alpha_expand_movmisalign(enum machine_mode mode,rtx * operands)2249 alpha_expand_movmisalign (enum machine_mode mode, rtx *operands)
2250 {
2251 /* Honor misaligned loads, for those we promised to do so. */
2252 if (MEM_P (operands[1]))
2253 {
2254 rtx tmp;
2255
2256 if (register_operand (operands[0], mode))
2257 tmp = operands[0];
2258 else
2259 tmp = gen_reg_rtx (mode);
2260
2261 alpha_expand_unaligned_load (tmp, operands[1], 8, 0, 0);
2262 if (tmp != operands[0])
2263 emit_move_insn (operands[0], tmp);
2264 }
2265 else if (MEM_P (operands[0]))
2266 {
2267 if (!reg_or_0_operand (operands[1], mode))
2268 operands[1] = force_reg (mode, operands[1]);
2269 alpha_expand_unaligned_store (operands[0], operands[1], 8, 0);
2270 }
2271 else
2272 gcc_unreachable ();
2273 }
2274
2275 /* Generate an unsigned DImode to FP conversion. This is the same code
2276 optabs would emit if we didn't have TFmode patterns.
2277
2278 For SFmode, this is the only construction I've found that can pass
2279 gcc.c-torture/execute/ieee/rbug.c. No scenario that uses DFmode
2280 intermediates will work, because you'll get intermediate rounding
2281 that ruins the end result. Some of this could be fixed by turning
2282 on round-to-positive-infinity, but that requires diddling the fpsr,
2283 which kills performance. I tried turning this around and converting
2284 to a negative number, so that I could turn on /m, but either I did
2285 it wrong or there's something else cause I wound up with the exact
2286 same single-bit error. There is a branch-less form of this same code:
2287
2288 srl $16,1,$1
2289 and $16,1,$2
2290 cmplt $16,0,$3
2291 or $1,$2,$2
2292 cmovge $16,$16,$2
2293 itoft $3,$f10
2294 itoft $2,$f11
2295 cvtqs $f11,$f11
2296 adds $f11,$f11,$f0
2297 fcmoveq $f10,$f11,$f0
2298
2299 I'm not using it because it's the same number of instructions as
2300 this branch-full form, and it has more serialized long latency
2301 instructions on the critical path.
2302
2303 For DFmode, we can avoid rounding errors by breaking up the word
2304 into two pieces, converting them separately, and adding them back:
2305
2306 LC0: .long 0,0x5f800000
2307
2308 itoft $16,$f11
2309 lda $2,LC0
2310 cmplt $16,0,$1
2311 cpyse $f11,$f31,$f10
2312 cpyse $f31,$f11,$f11
2313 s4addq $1,$2,$1
2314 lds $f12,0($1)
2315 cvtqt $f10,$f10
2316 cvtqt $f11,$f11
2317 addt $f12,$f10,$f0
2318 addt $f0,$f11,$f0
2319
2320 This doesn't seem to be a clear-cut win over the optabs form.
2321 It probably all depends on the distribution of numbers being
2322 converted -- in the optabs form, all but high-bit-set has a
2323 much lower minimum execution time. */
2324
2325 void
alpha_emit_floatuns(rtx operands[2])2326 alpha_emit_floatuns (rtx operands[2])
2327 {
2328 rtx neglab, donelab, i0, i1, f0, in, out;
2329 enum machine_mode mode;
2330
2331 out = operands[0];
2332 in = force_reg (DImode, operands[1]);
2333 mode = GET_MODE (out);
2334 neglab = gen_label_rtx ();
2335 donelab = gen_label_rtx ();
2336 i0 = gen_reg_rtx (DImode);
2337 i1 = gen_reg_rtx (DImode);
2338 f0 = gen_reg_rtx (mode);
2339
2340 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, DImode, 0, neglab);
2341
2342 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
2343 emit_jump_insn (gen_jump (donelab));
2344 emit_barrier ();
2345
2346 emit_label (neglab);
2347
2348 emit_insn (gen_lshrdi3 (i0, in, const1_rtx));
2349 emit_insn (gen_anddi3 (i1, in, const1_rtx));
2350 emit_insn (gen_iordi3 (i0, i0, i1));
2351 emit_insn (gen_rtx_SET (VOIDmode, f0, gen_rtx_FLOAT (mode, i0)));
2352 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
2353
2354 emit_label (donelab);
2355 }
2356
2357 /* Generate the comparison for a conditional branch. */
2358
2359 void
alpha_emit_conditional_branch(rtx operands[],enum machine_mode cmp_mode)2360 alpha_emit_conditional_branch (rtx operands[], enum machine_mode cmp_mode)
2361 {
2362 enum rtx_code cmp_code, branch_code;
2363 enum machine_mode branch_mode = VOIDmode;
2364 enum rtx_code code = GET_CODE (operands[0]);
2365 rtx op0 = operands[1], op1 = operands[2];
2366 rtx tem;
2367
2368 if (cmp_mode == TFmode)
2369 {
2370 op0 = alpha_emit_xfloating_compare (&code, op0, op1);
2371 op1 = const0_rtx;
2372 cmp_mode = DImode;
2373 }
2374
2375 /* The general case: fold the comparison code to the types of compares
2376 that we have, choosing the branch as necessary. */
2377 switch (code)
2378 {
2379 case EQ: case LE: case LT: case LEU: case LTU:
2380 case UNORDERED:
2381 /* We have these compares. */
2382 cmp_code = code, branch_code = NE;
2383 break;
2384
2385 case NE:
2386 case ORDERED:
2387 /* These must be reversed. */
2388 cmp_code = reverse_condition (code), branch_code = EQ;
2389 break;
2390
2391 case GE: case GT: case GEU: case GTU:
2392 /* For FP, we swap them, for INT, we reverse them. */
2393 if (cmp_mode == DFmode)
2394 {
2395 cmp_code = swap_condition (code);
2396 branch_code = NE;
2397 tem = op0, op0 = op1, op1 = tem;
2398 }
2399 else
2400 {
2401 cmp_code = reverse_condition (code);
2402 branch_code = EQ;
2403 }
2404 break;
2405
2406 default:
2407 gcc_unreachable ();
2408 }
2409
2410 if (cmp_mode == DFmode)
2411 {
2412 if (flag_unsafe_math_optimizations && cmp_code != UNORDERED)
2413 {
2414 /* When we are not as concerned about non-finite values, and we
2415 are comparing against zero, we can branch directly. */
2416 if (op1 == CONST0_RTX (DFmode))
2417 cmp_code = UNKNOWN, branch_code = code;
2418 else if (op0 == CONST0_RTX (DFmode))
2419 {
2420 /* Undo the swap we probably did just above. */
2421 tem = op0, op0 = op1, op1 = tem;
2422 branch_code = swap_condition (cmp_code);
2423 cmp_code = UNKNOWN;
2424 }
2425 }
2426 else
2427 {
2428 /* ??? We mark the branch mode to be CCmode to prevent the
2429 compare and branch from being combined, since the compare
2430 insn follows IEEE rules that the branch does not. */
2431 branch_mode = CCmode;
2432 }
2433 }
2434 else
2435 {
2436 /* The following optimizations are only for signed compares. */
2437 if (code != LEU && code != LTU && code != GEU && code != GTU)
2438 {
2439 /* Whee. Compare and branch against 0 directly. */
2440 if (op1 == const0_rtx)
2441 cmp_code = UNKNOWN, branch_code = code;
2442
2443 /* If the constants doesn't fit into an immediate, but can
2444 be generated by lda/ldah, we adjust the argument and
2445 compare against zero, so we can use beq/bne directly. */
2446 /* ??? Don't do this when comparing against symbols, otherwise
2447 we'll reduce (&x == 0x1234) to (&x-0x1234 == 0), which will
2448 be declared false out of hand (at least for non-weak). */
2449 else if (CONST_INT_P (op1)
2450 && (code == EQ || code == NE)
2451 && !(symbolic_operand (op0, VOIDmode)
2452 || (REG_P (op0) && REG_POINTER (op0))))
2453 {
2454 rtx n_op1 = GEN_INT (-INTVAL (op1));
2455
2456 if (! satisfies_constraint_I (op1)
2457 && (satisfies_constraint_K (n_op1)
2458 || satisfies_constraint_L (n_op1)))
2459 cmp_code = PLUS, branch_code = code, op1 = n_op1;
2460 }
2461 }
2462
2463 if (!reg_or_0_operand (op0, DImode))
2464 op0 = force_reg (DImode, op0);
2465 if (cmp_code != PLUS && !reg_or_8bit_operand (op1, DImode))
2466 op1 = force_reg (DImode, op1);
2467 }
2468
2469 /* Emit an initial compare instruction, if necessary. */
2470 tem = op0;
2471 if (cmp_code != UNKNOWN)
2472 {
2473 tem = gen_reg_rtx (cmp_mode);
2474 emit_move_insn (tem, gen_rtx_fmt_ee (cmp_code, cmp_mode, op0, op1));
2475 }
2476
2477 /* Emit the branch instruction. */
2478 tem = gen_rtx_SET (VOIDmode, pc_rtx,
2479 gen_rtx_IF_THEN_ELSE (VOIDmode,
2480 gen_rtx_fmt_ee (branch_code,
2481 branch_mode, tem,
2482 CONST0_RTX (cmp_mode)),
2483 gen_rtx_LABEL_REF (VOIDmode,
2484 operands[3]),
2485 pc_rtx));
2486 emit_jump_insn (tem);
2487 }
2488
2489 /* Certain simplifications can be done to make invalid setcc operations
2490 valid. Return the final comparison, or NULL if we can't work. */
2491
2492 bool
alpha_emit_setcc(rtx operands[],enum machine_mode cmp_mode)2493 alpha_emit_setcc (rtx operands[], enum machine_mode cmp_mode)
2494 {
2495 enum rtx_code cmp_code;
2496 enum rtx_code code = GET_CODE (operands[1]);
2497 rtx op0 = operands[2], op1 = operands[3];
2498 rtx tmp;
2499
2500 if (cmp_mode == TFmode)
2501 {
2502 op0 = alpha_emit_xfloating_compare (&code, op0, op1);
2503 op1 = const0_rtx;
2504 cmp_mode = DImode;
2505 }
2506
2507 if (cmp_mode == DFmode && !TARGET_FIX)
2508 return 0;
2509
2510 /* The general case: fold the comparison code to the types of compares
2511 that we have, choosing the branch as necessary. */
2512
2513 cmp_code = UNKNOWN;
2514 switch (code)
2515 {
2516 case EQ: case LE: case LT: case LEU: case LTU:
2517 case UNORDERED:
2518 /* We have these compares. */
2519 if (cmp_mode == DFmode)
2520 cmp_code = code, code = NE;
2521 break;
2522
2523 case NE:
2524 if (cmp_mode == DImode && op1 == const0_rtx)
2525 break;
2526 /* FALLTHRU */
2527
2528 case ORDERED:
2529 cmp_code = reverse_condition (code);
2530 code = EQ;
2531 break;
2532
2533 case GE: case GT: case GEU: case GTU:
2534 /* These normally need swapping, but for integer zero we have
2535 special patterns that recognize swapped operands. */
2536 if (cmp_mode == DImode && op1 == const0_rtx)
2537 break;
2538 code = swap_condition (code);
2539 if (cmp_mode == DFmode)
2540 cmp_code = code, code = NE;
2541 tmp = op0, op0 = op1, op1 = tmp;
2542 break;
2543
2544 default:
2545 gcc_unreachable ();
2546 }
2547
2548 if (cmp_mode == DImode)
2549 {
2550 if (!register_operand (op0, DImode))
2551 op0 = force_reg (DImode, op0);
2552 if (!reg_or_8bit_operand (op1, DImode))
2553 op1 = force_reg (DImode, op1);
2554 }
2555
2556 /* Emit an initial compare instruction, if necessary. */
2557 if (cmp_code != UNKNOWN)
2558 {
2559 tmp = gen_reg_rtx (cmp_mode);
2560 emit_insn (gen_rtx_SET (VOIDmode, tmp,
2561 gen_rtx_fmt_ee (cmp_code, cmp_mode, op0, op1)));
2562
2563 op0 = cmp_mode != DImode ? gen_lowpart (DImode, tmp) : tmp;
2564 op1 = const0_rtx;
2565 }
2566
2567 /* Emit the setcc instruction. */
2568 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
2569 gen_rtx_fmt_ee (code, DImode, op0, op1)));
2570 return true;
2571 }
2572
2573
2574 /* Rewrite a comparison against zero CMP of the form
2575 (CODE (cc0) (const_int 0)) so it can be written validly in
2576 a conditional move (if_then_else CMP ...).
2577 If both of the operands that set cc0 are nonzero we must emit
2578 an insn to perform the compare (it can't be done within
2579 the conditional move). */
2580
2581 rtx
alpha_emit_conditional_move(rtx cmp,enum machine_mode mode)2582 alpha_emit_conditional_move (rtx cmp, enum machine_mode mode)
2583 {
2584 enum rtx_code code = GET_CODE (cmp);
2585 enum rtx_code cmov_code = NE;
2586 rtx op0 = XEXP (cmp, 0);
2587 rtx op1 = XEXP (cmp, 1);
2588 enum machine_mode cmp_mode
2589 = (GET_MODE (op0) == VOIDmode ? DImode : GET_MODE (op0));
2590 enum machine_mode cmov_mode = VOIDmode;
2591 int local_fast_math = flag_unsafe_math_optimizations;
2592 rtx tem;
2593
2594 if (cmp_mode == TFmode)
2595 {
2596 op0 = alpha_emit_xfloating_compare (&code, op0, op1);
2597 op1 = const0_rtx;
2598 cmp_mode = DImode;
2599 }
2600
2601 gcc_assert (cmp_mode == DFmode || cmp_mode == DImode);
2602
2603 if (FLOAT_MODE_P (cmp_mode) != FLOAT_MODE_P (mode))
2604 {
2605 enum rtx_code cmp_code;
2606
2607 if (! TARGET_FIX)
2608 return 0;
2609
2610 /* If we have fp<->int register move instructions, do a cmov by
2611 performing the comparison in fp registers, and move the
2612 zero/nonzero value to integer registers, where we can then
2613 use a normal cmov, or vice-versa. */
2614
2615 switch (code)
2616 {
2617 case EQ: case LE: case LT: case LEU: case LTU:
2618 case UNORDERED:
2619 /* We have these compares. */
2620 cmp_code = code, code = NE;
2621 break;
2622
2623 case NE:
2624 case ORDERED:
2625 /* These must be reversed. */
2626 cmp_code = reverse_condition (code), code = EQ;
2627 break;
2628
2629 case GE: case GT: case GEU: case GTU:
2630 /* These normally need swapping, but for integer zero we have
2631 special patterns that recognize swapped operands. */
2632 if (cmp_mode == DImode && op1 == const0_rtx)
2633 cmp_code = code, code = NE;
2634 else
2635 {
2636 cmp_code = swap_condition (code);
2637 code = NE;
2638 tem = op0, op0 = op1, op1 = tem;
2639 }
2640 break;
2641
2642 default:
2643 gcc_unreachable ();
2644 }
2645
2646 if (cmp_mode == DImode)
2647 {
2648 if (!reg_or_0_operand (op0, DImode))
2649 op0 = force_reg (DImode, op0);
2650 if (!reg_or_8bit_operand (op1, DImode))
2651 op1 = force_reg (DImode, op1);
2652 }
2653
2654 tem = gen_reg_rtx (cmp_mode);
2655 emit_insn (gen_rtx_SET (VOIDmode, tem,
2656 gen_rtx_fmt_ee (cmp_code, cmp_mode,
2657 op0, op1)));
2658
2659 cmp_mode = cmp_mode == DImode ? DFmode : DImode;
2660 op0 = gen_lowpart (cmp_mode, tem);
2661 op1 = CONST0_RTX (cmp_mode);
2662 local_fast_math = 1;
2663 }
2664
2665 if (cmp_mode == DImode)
2666 {
2667 if (!reg_or_0_operand (op0, DImode))
2668 op0 = force_reg (DImode, op0);
2669 if (!reg_or_8bit_operand (op1, DImode))
2670 op1 = force_reg (DImode, op1);
2671 }
2672
2673 /* We may be able to use a conditional move directly.
2674 This avoids emitting spurious compares. */
2675 if (signed_comparison_operator (cmp, VOIDmode)
2676 && (cmp_mode == DImode || local_fast_math)
2677 && (op0 == CONST0_RTX (cmp_mode) || op1 == CONST0_RTX (cmp_mode)))
2678 return gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
2679
2680 /* We can't put the comparison inside the conditional move;
2681 emit a compare instruction and put that inside the
2682 conditional move. Make sure we emit only comparisons we have;
2683 swap or reverse as necessary. */
2684
2685 if (!can_create_pseudo_p ())
2686 return NULL_RTX;
2687
2688 switch (code)
2689 {
2690 case EQ: case LE: case LT: case LEU: case LTU:
2691 case UNORDERED:
2692 /* We have these compares: */
2693 break;
2694
2695 case NE:
2696 case ORDERED:
2697 /* These must be reversed. */
2698 code = reverse_condition (code);
2699 cmov_code = EQ;
2700 break;
2701
2702 case GE: case GT: case GEU: case GTU:
2703 /* These must be swapped. */
2704 if (op1 != CONST0_RTX (cmp_mode))
2705 {
2706 code = swap_condition (code);
2707 tem = op0, op0 = op1, op1 = tem;
2708 }
2709 break;
2710
2711 default:
2712 gcc_unreachable ();
2713 }
2714
2715 if (cmp_mode == DImode)
2716 {
2717 if (!reg_or_0_operand (op0, DImode))
2718 op0 = force_reg (DImode, op0);
2719 if (!reg_or_8bit_operand (op1, DImode))
2720 op1 = force_reg (DImode, op1);
2721 }
2722
2723 /* ??? We mark the branch mode to be CCmode to prevent the compare
2724 and cmov from being combined, since the compare insn follows IEEE
2725 rules that the cmov does not. */
2726 if (cmp_mode == DFmode && !local_fast_math)
2727 cmov_mode = CCmode;
2728
2729 tem = gen_reg_rtx (cmp_mode);
2730 emit_move_insn (tem, gen_rtx_fmt_ee (code, cmp_mode, op0, op1));
2731 return gen_rtx_fmt_ee (cmov_code, cmov_mode, tem, CONST0_RTX (cmp_mode));
2732 }
2733
2734 /* Simplify a conditional move of two constants into a setcc with
2735 arithmetic. This is done with a splitter since combine would
2736 just undo the work if done during code generation. It also catches
2737 cases we wouldn't have before cse. */
2738
2739 int
alpha_split_conditional_move(enum rtx_code code,rtx dest,rtx cond,rtx t_rtx,rtx f_rtx)2740 alpha_split_conditional_move (enum rtx_code code, rtx dest, rtx cond,
2741 rtx t_rtx, rtx f_rtx)
2742 {
2743 HOST_WIDE_INT t, f, diff;
2744 enum machine_mode mode;
2745 rtx target, subtarget, tmp;
2746
2747 mode = GET_MODE (dest);
2748 t = INTVAL (t_rtx);
2749 f = INTVAL (f_rtx);
2750 diff = t - f;
2751
2752 if (((code == NE || code == EQ) && diff < 0)
2753 || (code == GE || code == GT))
2754 {
2755 code = reverse_condition (code);
2756 diff = t, t = f, f = diff;
2757 diff = t - f;
2758 }
2759
2760 subtarget = target = dest;
2761 if (mode != DImode)
2762 {
2763 target = gen_lowpart (DImode, dest);
2764 if (can_create_pseudo_p ())
2765 subtarget = gen_reg_rtx (DImode);
2766 else
2767 subtarget = target;
2768 }
2769 /* Below, we must be careful to use copy_rtx on target and subtarget
2770 in intermediate insns, as they may be a subreg rtx, which may not
2771 be shared. */
2772
2773 if (f == 0 && exact_log2 (diff) > 0
2774 /* On EV6, we've got enough shifters to make non-arithmetic shifts
2775 viable over a longer latency cmove. On EV5, the E0 slot is a
2776 scarce resource, and on EV4 shift has the same latency as a cmove. */
2777 && (diff <= 8 || alpha_tune == PROCESSOR_EV6))
2778 {
2779 tmp = gen_rtx_fmt_ee (code, DImode, cond, const0_rtx);
2780 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (subtarget), tmp));
2781
2782 tmp = gen_rtx_ASHIFT (DImode, copy_rtx (subtarget),
2783 GEN_INT (exact_log2 (t)));
2784 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
2785 }
2786 else if (f == 0 && t == -1)
2787 {
2788 tmp = gen_rtx_fmt_ee (code, DImode, cond, const0_rtx);
2789 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (subtarget), tmp));
2790
2791 emit_insn (gen_negdi2 (target, copy_rtx (subtarget)));
2792 }
2793 else if (diff == 1 || diff == 4 || diff == 8)
2794 {
2795 rtx add_op;
2796
2797 tmp = gen_rtx_fmt_ee (code, DImode, cond, const0_rtx);
2798 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (subtarget), tmp));
2799
2800 if (diff == 1)
2801 emit_insn (gen_adddi3 (target, copy_rtx (subtarget), GEN_INT (f)));
2802 else
2803 {
2804 add_op = GEN_INT (f);
2805 if (sext_add_operand (add_op, mode))
2806 {
2807 tmp = gen_rtx_MULT (DImode, copy_rtx (subtarget),
2808 GEN_INT (diff));
2809 tmp = gen_rtx_PLUS (DImode, tmp, add_op);
2810 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
2811 }
2812 else
2813 return 0;
2814 }
2815 }
2816 else
2817 return 0;
2818
2819 return 1;
2820 }
2821
2822 /* Look up the function X_floating library function name for the
2823 given operation. */
2824
2825 struct GTY(()) xfloating_op
2826 {
2827 const enum rtx_code code;
2828 const char *const GTY((skip)) osf_func;
2829 const char *const GTY((skip)) vms_func;
2830 rtx libcall;
2831 };
2832
2833 static GTY(()) struct xfloating_op xfloating_ops[] =
2834 {
2835 { PLUS, "_OtsAddX", "OTS$ADD_X", 0 },
2836 { MINUS, "_OtsSubX", "OTS$SUB_X", 0 },
2837 { MULT, "_OtsMulX", "OTS$MUL_X", 0 },
2838 { DIV, "_OtsDivX", "OTS$DIV_X", 0 },
2839 { EQ, "_OtsEqlX", "OTS$EQL_X", 0 },
2840 { NE, "_OtsNeqX", "OTS$NEQ_X", 0 },
2841 { LT, "_OtsLssX", "OTS$LSS_X", 0 },
2842 { LE, "_OtsLeqX", "OTS$LEQ_X", 0 },
2843 { GT, "_OtsGtrX", "OTS$GTR_X", 0 },
2844 { GE, "_OtsGeqX", "OTS$GEQ_X", 0 },
2845 { FIX, "_OtsCvtXQ", "OTS$CVTXQ", 0 },
2846 { FLOAT, "_OtsCvtQX", "OTS$CVTQX", 0 },
2847 { UNSIGNED_FLOAT, "_OtsCvtQUX", "OTS$CVTQUX", 0 },
2848 { FLOAT_EXTEND, "_OtsConvertFloatTX", "OTS$CVT_FLOAT_T_X", 0 },
2849 { FLOAT_TRUNCATE, "_OtsConvertFloatXT", "OTS$CVT_FLOAT_X_T", 0 }
2850 };
2851
2852 static GTY(()) struct xfloating_op vax_cvt_ops[] =
2853 {
2854 { FLOAT_EXTEND, "_OtsConvertFloatGX", "OTS$CVT_FLOAT_G_X", 0 },
2855 { FLOAT_TRUNCATE, "_OtsConvertFloatXG", "OTS$CVT_FLOAT_X_G", 0 }
2856 };
2857
2858 static rtx
alpha_lookup_xfloating_lib_func(enum rtx_code code)2859 alpha_lookup_xfloating_lib_func (enum rtx_code code)
2860 {
2861 struct xfloating_op *ops = xfloating_ops;
2862 long n = ARRAY_SIZE (xfloating_ops);
2863 long i;
2864
2865 gcc_assert (TARGET_HAS_XFLOATING_LIBS);
2866
2867 /* How irritating. Nothing to key off for the main table. */
2868 if (TARGET_FLOAT_VAX && (code == FLOAT_EXTEND || code == FLOAT_TRUNCATE))
2869 {
2870 ops = vax_cvt_ops;
2871 n = ARRAY_SIZE (vax_cvt_ops);
2872 }
2873
2874 for (i = 0; i < n; ++i, ++ops)
2875 if (ops->code == code)
2876 {
2877 rtx func = ops->libcall;
2878 if (!func)
2879 {
2880 func = init_one_libfunc (TARGET_ABI_OPEN_VMS
2881 ? ops->vms_func : ops->osf_func);
2882 ops->libcall = func;
2883 }
2884 return func;
2885 }
2886
2887 gcc_unreachable ();
2888 }
2889
2890 /* Most X_floating operations take the rounding mode as an argument.
2891 Compute that here. */
2892
2893 static int
alpha_compute_xfloating_mode_arg(enum rtx_code code,enum alpha_fp_rounding_mode round)2894 alpha_compute_xfloating_mode_arg (enum rtx_code code,
2895 enum alpha_fp_rounding_mode round)
2896 {
2897 int mode;
2898
2899 switch (round)
2900 {
2901 case ALPHA_FPRM_NORM:
2902 mode = 2;
2903 break;
2904 case ALPHA_FPRM_MINF:
2905 mode = 1;
2906 break;
2907 case ALPHA_FPRM_CHOP:
2908 mode = 0;
2909 break;
2910 case ALPHA_FPRM_DYN:
2911 mode = 4;
2912 break;
2913 default:
2914 gcc_unreachable ();
2915
2916 /* XXX For reference, round to +inf is mode = 3. */
2917 }
2918
2919 if (code == FLOAT_TRUNCATE && alpha_fptm == ALPHA_FPTM_N)
2920 mode |= 0x10000;
2921
2922 return mode;
2923 }
2924
2925 /* Emit an X_floating library function call.
2926
2927 Note that these functions do not follow normal calling conventions:
2928 TFmode arguments are passed in two integer registers (as opposed to
2929 indirect); TFmode return values appear in R16+R17.
2930
2931 FUNC is the function to call.
2932 TARGET is where the output belongs.
2933 OPERANDS are the inputs.
2934 NOPERANDS is the count of inputs.
2935 EQUIV is the expression equivalent for the function.
2936 */
2937
2938 static void
alpha_emit_xfloating_libcall(rtx func,rtx target,rtx operands[],int noperands,rtx equiv)2939 alpha_emit_xfloating_libcall (rtx func, rtx target, rtx operands[],
2940 int noperands, rtx equiv)
2941 {
2942 rtx usage = NULL_RTX, tmp, reg;
2943 int regno = 16, i;
2944
2945 start_sequence ();
2946
2947 for (i = 0; i < noperands; ++i)
2948 {
2949 switch (GET_MODE (operands[i]))
2950 {
2951 case TFmode:
2952 reg = gen_rtx_REG (TFmode, regno);
2953 regno += 2;
2954 break;
2955
2956 case DFmode:
2957 reg = gen_rtx_REG (DFmode, regno + 32);
2958 regno += 1;
2959 break;
2960
2961 case VOIDmode:
2962 gcc_assert (CONST_INT_P (operands[i]));
2963 /* FALLTHRU */
2964 case DImode:
2965 reg = gen_rtx_REG (DImode, regno);
2966 regno += 1;
2967 break;
2968
2969 default:
2970 gcc_unreachable ();
2971 }
2972
2973 emit_move_insn (reg, operands[i]);
2974 use_reg (&usage, reg);
2975 }
2976
2977 switch (GET_MODE (target))
2978 {
2979 case TFmode:
2980 reg = gen_rtx_REG (TFmode, 16);
2981 break;
2982 case DFmode:
2983 reg = gen_rtx_REG (DFmode, 32);
2984 break;
2985 case DImode:
2986 reg = gen_rtx_REG (DImode, 0);
2987 break;
2988 default:
2989 gcc_unreachable ();
2990 }
2991
2992 tmp = gen_rtx_MEM (QImode, func);
2993 tmp = emit_call_insn (GEN_CALL_VALUE (reg, tmp, const0_rtx,
2994 const0_rtx, const0_rtx));
2995 CALL_INSN_FUNCTION_USAGE (tmp) = usage;
2996 RTL_CONST_CALL_P (tmp) = 1;
2997
2998 tmp = get_insns ();
2999 end_sequence ();
3000
3001 emit_libcall_block (tmp, target, reg, equiv);
3002 }
3003
3004 /* Emit an X_floating library function call for arithmetic (+,-,*,/). */
3005
3006 void
alpha_emit_xfloating_arith(enum rtx_code code,rtx operands[])3007 alpha_emit_xfloating_arith (enum rtx_code code, rtx operands[])
3008 {
3009 rtx func;
3010 int mode;
3011 rtx out_operands[3];
3012
3013 func = alpha_lookup_xfloating_lib_func (code);
3014 mode = alpha_compute_xfloating_mode_arg (code, alpha_fprm);
3015
3016 out_operands[0] = operands[1];
3017 out_operands[1] = operands[2];
3018 out_operands[2] = GEN_INT (mode);
3019 alpha_emit_xfloating_libcall (func, operands[0], out_operands, 3,
3020 gen_rtx_fmt_ee (code, TFmode, operands[1],
3021 operands[2]));
3022 }
3023
3024 /* Emit an X_floating library function call for a comparison. */
3025
3026 static rtx
alpha_emit_xfloating_compare(enum rtx_code * pcode,rtx op0,rtx op1)3027 alpha_emit_xfloating_compare (enum rtx_code *pcode, rtx op0, rtx op1)
3028 {
3029 enum rtx_code cmp_code, res_code;
3030 rtx func, out, operands[2], note;
3031
3032 /* X_floating library comparison functions return
3033 -1 unordered
3034 0 false
3035 1 true
3036 Convert the compare against the raw return value. */
3037
3038 cmp_code = *pcode;
3039 switch (cmp_code)
3040 {
3041 case UNORDERED:
3042 cmp_code = EQ;
3043 res_code = LT;
3044 break;
3045 case ORDERED:
3046 cmp_code = EQ;
3047 res_code = GE;
3048 break;
3049 case NE:
3050 res_code = NE;
3051 break;
3052 case EQ:
3053 case LT:
3054 case GT:
3055 case LE:
3056 case GE:
3057 res_code = GT;
3058 break;
3059 default:
3060 gcc_unreachable ();
3061 }
3062 *pcode = res_code;
3063
3064 func = alpha_lookup_xfloating_lib_func (cmp_code);
3065
3066 operands[0] = op0;
3067 operands[1] = op1;
3068 out = gen_reg_rtx (DImode);
3069
3070 /* What's actually returned is -1,0,1, not a proper boolean value,
3071 so use an EXPR_LIST as with a generic libcall instead of a
3072 comparison type expression. */
3073 note = gen_rtx_EXPR_LIST (VOIDmode, op1, NULL_RTX);
3074 note = gen_rtx_EXPR_LIST (VOIDmode, op0, note);
3075 note = gen_rtx_EXPR_LIST (VOIDmode, func, note);
3076 alpha_emit_xfloating_libcall (func, out, operands, 2, note);
3077
3078 return out;
3079 }
3080
3081 /* Emit an X_floating library function call for a conversion. */
3082
3083 void
alpha_emit_xfloating_cvt(enum rtx_code orig_code,rtx operands[])3084 alpha_emit_xfloating_cvt (enum rtx_code orig_code, rtx operands[])
3085 {
3086 int noperands = 1, mode;
3087 rtx out_operands[2];
3088 rtx func;
3089 enum rtx_code code = orig_code;
3090
3091 if (code == UNSIGNED_FIX)
3092 code = FIX;
3093
3094 func = alpha_lookup_xfloating_lib_func (code);
3095
3096 out_operands[0] = operands[1];
3097
3098 switch (code)
3099 {
3100 case FIX:
3101 mode = alpha_compute_xfloating_mode_arg (code, ALPHA_FPRM_CHOP);
3102 out_operands[1] = GEN_INT (mode);
3103 noperands = 2;
3104 break;
3105 case FLOAT_TRUNCATE:
3106 mode = alpha_compute_xfloating_mode_arg (code, alpha_fprm);
3107 out_operands[1] = GEN_INT (mode);
3108 noperands = 2;
3109 break;
3110 default:
3111 break;
3112 }
3113
3114 alpha_emit_xfloating_libcall (func, operands[0], out_operands, noperands,
3115 gen_rtx_fmt_e (orig_code,
3116 GET_MODE (operands[0]),
3117 operands[1]));
3118 }
3119
3120 /* Split a TImode or TFmode move from OP[1] to OP[0] into a pair of
3121 DImode moves from OP[2,3] to OP[0,1]. If FIXUP_OVERLAP is true,
3122 guarantee that the sequence
3123 set (OP[0] OP[2])
3124 set (OP[1] OP[3])
3125 is valid. Naturally, output operand ordering is little-endian.
3126 This is used by *movtf_internal and *movti_internal. */
3127
3128 void
alpha_split_tmode_pair(rtx operands[4],enum machine_mode mode,bool fixup_overlap)3129 alpha_split_tmode_pair (rtx operands[4], enum machine_mode mode,
3130 bool fixup_overlap)
3131 {
3132 switch (GET_CODE (operands[1]))
3133 {
3134 case REG:
3135 operands[3] = gen_rtx_REG (DImode, REGNO (operands[1]) + 1);
3136 operands[2] = gen_rtx_REG (DImode, REGNO (operands[1]));
3137 break;
3138
3139 case MEM:
3140 operands[3] = adjust_address (operands[1], DImode, 8);
3141 operands[2] = adjust_address (operands[1], DImode, 0);
3142 break;
3143
3144 case CONST_INT:
3145 case CONST_DOUBLE:
3146 gcc_assert (operands[1] == CONST0_RTX (mode));
3147 operands[2] = operands[3] = const0_rtx;
3148 break;
3149
3150 default:
3151 gcc_unreachable ();
3152 }
3153
3154 switch (GET_CODE (operands[0]))
3155 {
3156 case REG:
3157 operands[1] = gen_rtx_REG (DImode, REGNO (operands[0]) + 1);
3158 operands[0] = gen_rtx_REG (DImode, REGNO (operands[0]));
3159 break;
3160
3161 case MEM:
3162 operands[1] = adjust_address (operands[0], DImode, 8);
3163 operands[0] = adjust_address (operands[0], DImode, 0);
3164 break;
3165
3166 default:
3167 gcc_unreachable ();
3168 }
3169
3170 if (fixup_overlap && reg_overlap_mentioned_p (operands[0], operands[3]))
3171 {
3172 rtx tmp;
3173 tmp = operands[0], operands[0] = operands[1], operands[1] = tmp;
3174 tmp = operands[2], operands[2] = operands[3], operands[3] = tmp;
3175 }
3176 }
3177
3178 /* Implement negtf2 or abstf2. Op0 is destination, op1 is source,
3179 op2 is a register containing the sign bit, operation is the
3180 logical operation to be performed. */
3181
3182 void
alpha_split_tfmode_frobsign(rtx operands[3],rtx (* operation)(rtx,rtx,rtx))3183 alpha_split_tfmode_frobsign (rtx operands[3], rtx (*operation) (rtx, rtx, rtx))
3184 {
3185 rtx high_bit = operands[2];
3186 rtx scratch;
3187 int move;
3188
3189 alpha_split_tmode_pair (operands, TFmode, false);
3190
3191 /* Detect three flavors of operand overlap. */
3192 move = 1;
3193 if (rtx_equal_p (operands[0], operands[2]))
3194 move = 0;
3195 else if (rtx_equal_p (operands[1], operands[2]))
3196 {
3197 if (rtx_equal_p (operands[0], high_bit))
3198 move = 2;
3199 else
3200 move = -1;
3201 }
3202
3203 if (move < 0)
3204 emit_move_insn (operands[0], operands[2]);
3205
3206 /* ??? If the destination overlaps both source tf and high_bit, then
3207 assume source tf is dead in its entirety and use the other half
3208 for a scratch register. Otherwise "scratch" is just the proper
3209 destination register. */
3210 scratch = operands[move < 2 ? 1 : 3];
3211
3212 emit_insn ((*operation) (scratch, high_bit, operands[3]));
3213
3214 if (move > 0)
3215 {
3216 emit_move_insn (operands[0], operands[2]);
3217 if (move > 1)
3218 emit_move_insn (operands[1], scratch);
3219 }
3220 }
3221
3222 /* Use ext[wlq][lh] as the Architecture Handbook describes for extracting
3223 unaligned data:
3224
3225 unsigned: signed:
3226 word: ldq_u r1,X(r11) ldq_u r1,X(r11)
3227 ldq_u r2,X+1(r11) ldq_u r2,X+1(r11)
3228 lda r3,X(r11) lda r3,X+2(r11)
3229 extwl r1,r3,r1 extql r1,r3,r1
3230 extwh r2,r3,r2 extqh r2,r3,r2
3231 or r1.r2.r1 or r1,r2,r1
3232 sra r1,48,r1
3233
3234 long: ldq_u r1,X(r11) ldq_u r1,X(r11)
3235 ldq_u r2,X+3(r11) ldq_u r2,X+3(r11)
3236 lda r3,X(r11) lda r3,X(r11)
3237 extll r1,r3,r1 extll r1,r3,r1
3238 extlh r2,r3,r2 extlh r2,r3,r2
3239 or r1.r2.r1 addl r1,r2,r1
3240
3241 quad: ldq_u r1,X(r11)
3242 ldq_u r2,X+7(r11)
3243 lda r3,X(r11)
3244 extql r1,r3,r1
3245 extqh r2,r3,r2
3246 or r1.r2.r1
3247 */
3248
3249 void
alpha_expand_unaligned_load(rtx tgt,rtx mem,HOST_WIDE_INT size,HOST_WIDE_INT ofs,int sign)3250 alpha_expand_unaligned_load (rtx tgt, rtx mem, HOST_WIDE_INT size,
3251 HOST_WIDE_INT ofs, int sign)
3252 {
3253 rtx meml, memh, addr, extl, exth, tmp, mema;
3254 enum machine_mode mode;
3255
3256 if (TARGET_BWX && size == 2)
3257 {
3258 meml = adjust_address (mem, QImode, ofs);
3259 memh = adjust_address (mem, QImode, ofs+1);
3260 extl = gen_reg_rtx (DImode);
3261 exth = gen_reg_rtx (DImode);
3262 emit_insn (gen_zero_extendqidi2 (extl, meml));
3263 emit_insn (gen_zero_extendqidi2 (exth, memh));
3264 exth = expand_simple_binop (DImode, ASHIFT, exth, GEN_INT (8),
3265 NULL, 1, OPTAB_LIB_WIDEN);
3266 addr = expand_simple_binop (DImode, IOR, extl, exth,
3267 NULL, 1, OPTAB_LIB_WIDEN);
3268
3269 if (sign && GET_MODE (tgt) != HImode)
3270 {
3271 addr = gen_lowpart (HImode, addr);
3272 emit_insn (gen_extend_insn (tgt, addr, GET_MODE (tgt), HImode, 0));
3273 }
3274 else
3275 {
3276 if (GET_MODE (tgt) != DImode)
3277 addr = gen_lowpart (GET_MODE (tgt), addr);
3278 emit_move_insn (tgt, addr);
3279 }
3280 return;
3281 }
3282
3283 meml = gen_reg_rtx (DImode);
3284 memh = gen_reg_rtx (DImode);
3285 addr = gen_reg_rtx (DImode);
3286 extl = gen_reg_rtx (DImode);
3287 exth = gen_reg_rtx (DImode);
3288
3289 mema = XEXP (mem, 0);
3290 if (GET_CODE (mema) == LO_SUM)
3291 mema = force_reg (Pmode, mema);
3292
3293 /* AND addresses cannot be in any alias set, since they may implicitly
3294 alias surrounding code. Ideally we'd have some alias set that
3295 covered all types except those with alignment 8 or higher. */
3296
3297 tmp = change_address (mem, DImode,
3298 gen_rtx_AND (DImode,
3299 plus_constant (DImode, mema, ofs),
3300 GEN_INT (-8)));
3301 set_mem_alias_set (tmp, 0);
3302 emit_move_insn (meml, tmp);
3303
3304 tmp = change_address (mem, DImode,
3305 gen_rtx_AND (DImode,
3306 plus_constant (DImode, mema,
3307 ofs + size - 1),
3308 GEN_INT (-8)));
3309 set_mem_alias_set (tmp, 0);
3310 emit_move_insn (memh, tmp);
3311
3312 if (sign && size == 2)
3313 {
3314 emit_move_insn (addr, plus_constant (Pmode, mema, ofs+2));
3315
3316 emit_insn (gen_extql (extl, meml, addr));
3317 emit_insn (gen_extqh (exth, memh, addr));
3318
3319 /* We must use tgt here for the target. Alpha-vms port fails if we use
3320 addr for the target, because addr is marked as a pointer and combine
3321 knows that pointers are always sign-extended 32-bit values. */
3322 addr = expand_binop (DImode, ior_optab, extl, exth, tgt, 1, OPTAB_WIDEN);
3323 addr = expand_binop (DImode, ashr_optab, addr, GEN_INT (48),
3324 addr, 1, OPTAB_WIDEN);
3325 }
3326 else
3327 {
3328 emit_move_insn (addr, plus_constant (Pmode, mema, ofs));
3329 emit_insn (gen_extxl (extl, meml, GEN_INT (size*8), addr));
3330 switch ((int) size)
3331 {
3332 case 2:
3333 emit_insn (gen_extwh (exth, memh, addr));
3334 mode = HImode;
3335 break;
3336 case 4:
3337 emit_insn (gen_extlh (exth, memh, addr));
3338 mode = SImode;
3339 break;
3340 case 8:
3341 emit_insn (gen_extqh (exth, memh, addr));
3342 mode = DImode;
3343 break;
3344 default:
3345 gcc_unreachable ();
3346 }
3347
3348 addr = expand_binop (mode, ior_optab, gen_lowpart (mode, extl),
3349 gen_lowpart (mode, exth), gen_lowpart (mode, tgt),
3350 sign, OPTAB_WIDEN);
3351 }
3352
3353 if (addr != tgt)
3354 emit_move_insn (tgt, gen_lowpart (GET_MODE (tgt), addr));
3355 }
3356
3357 /* Similarly, use ins and msk instructions to perform unaligned stores. */
3358
3359 void
alpha_expand_unaligned_store(rtx dst,rtx src,HOST_WIDE_INT size,HOST_WIDE_INT ofs)3360 alpha_expand_unaligned_store (rtx dst, rtx src,
3361 HOST_WIDE_INT size, HOST_WIDE_INT ofs)
3362 {
3363 rtx dstl, dsth, addr, insl, insh, meml, memh, dsta;
3364
3365 if (TARGET_BWX && size == 2)
3366 {
3367 if (src != const0_rtx)
3368 {
3369 dstl = gen_lowpart (QImode, src);
3370 dsth = expand_simple_binop (DImode, LSHIFTRT, src, GEN_INT (8),
3371 NULL, 1, OPTAB_LIB_WIDEN);
3372 dsth = gen_lowpart (QImode, dsth);
3373 }
3374 else
3375 dstl = dsth = const0_rtx;
3376
3377 meml = adjust_address (dst, QImode, ofs);
3378 memh = adjust_address (dst, QImode, ofs+1);
3379
3380 emit_move_insn (meml, dstl);
3381 emit_move_insn (memh, dsth);
3382 return;
3383 }
3384
3385 dstl = gen_reg_rtx (DImode);
3386 dsth = gen_reg_rtx (DImode);
3387 insl = gen_reg_rtx (DImode);
3388 insh = gen_reg_rtx (DImode);
3389
3390 dsta = XEXP (dst, 0);
3391 if (GET_CODE (dsta) == LO_SUM)
3392 dsta = force_reg (Pmode, dsta);
3393
3394 /* AND addresses cannot be in any alias set, since they may implicitly
3395 alias surrounding code. Ideally we'd have some alias set that
3396 covered all types except those with alignment 8 or higher. */
3397
3398 meml = change_address (dst, DImode,
3399 gen_rtx_AND (DImode,
3400 plus_constant (DImode, dsta, ofs),
3401 GEN_INT (-8)));
3402 set_mem_alias_set (meml, 0);
3403
3404 memh = change_address (dst, DImode,
3405 gen_rtx_AND (DImode,
3406 plus_constant (DImode, dsta,
3407 ofs + size - 1),
3408 GEN_INT (-8)));
3409 set_mem_alias_set (memh, 0);
3410
3411 emit_move_insn (dsth, memh);
3412 emit_move_insn (dstl, meml);
3413
3414 addr = copy_addr_to_reg (plus_constant (Pmode, dsta, ofs));
3415
3416 if (src != CONST0_RTX (GET_MODE (src)))
3417 {
3418 emit_insn (gen_insxh (insh, gen_lowpart (DImode, src),
3419 GEN_INT (size*8), addr));
3420
3421 switch ((int) size)
3422 {
3423 case 2:
3424 emit_insn (gen_inswl (insl, gen_lowpart (HImode, src), addr));
3425 break;
3426 case 4:
3427 emit_insn (gen_insll (insl, gen_lowpart (SImode, src), addr));
3428 break;
3429 case 8:
3430 emit_insn (gen_insql (insl, gen_lowpart (DImode, src), addr));
3431 break;
3432 default:
3433 gcc_unreachable ();
3434 }
3435 }
3436
3437 emit_insn (gen_mskxh (dsth, dsth, GEN_INT (size*8), addr));
3438
3439 switch ((int) size)
3440 {
3441 case 2:
3442 emit_insn (gen_mskwl (dstl, dstl, addr));
3443 break;
3444 case 4:
3445 emit_insn (gen_mskll (dstl, dstl, addr));
3446 break;
3447 case 8:
3448 emit_insn (gen_mskql (dstl, dstl, addr));
3449 break;
3450 default:
3451 gcc_unreachable ();
3452 }
3453
3454 if (src != CONST0_RTX (GET_MODE (src)))
3455 {
3456 dsth = expand_binop (DImode, ior_optab, insh, dsth, dsth, 0, OPTAB_WIDEN);
3457 dstl = expand_binop (DImode, ior_optab, insl, dstl, dstl, 0, OPTAB_WIDEN);
3458 }
3459
3460 /* Must store high before low for degenerate case of aligned. */
3461 emit_move_insn (memh, dsth);
3462 emit_move_insn (meml, dstl);
3463 }
3464
3465 /* The block move code tries to maximize speed by separating loads and
3466 stores at the expense of register pressure: we load all of the data
3467 before we store it back out. There are two secondary effects worth
3468 mentioning, that this speeds copying to/from aligned and unaligned
3469 buffers, and that it makes the code significantly easier to write. */
3470
3471 #define MAX_MOVE_WORDS 8
3472
3473 /* Load an integral number of consecutive unaligned quadwords. */
3474
3475 static void
alpha_expand_unaligned_load_words(rtx * out_regs,rtx smem,HOST_WIDE_INT words,HOST_WIDE_INT ofs)3476 alpha_expand_unaligned_load_words (rtx *out_regs, rtx smem,
3477 HOST_WIDE_INT words, HOST_WIDE_INT ofs)
3478 {
3479 rtx const im8 = GEN_INT (-8);
3480 rtx ext_tmps[MAX_MOVE_WORDS], data_regs[MAX_MOVE_WORDS+1];
3481 rtx sreg, areg, tmp, smema;
3482 HOST_WIDE_INT i;
3483
3484 smema = XEXP (smem, 0);
3485 if (GET_CODE (smema) == LO_SUM)
3486 smema = force_reg (Pmode, smema);
3487
3488 /* Generate all the tmp registers we need. */
3489 for (i = 0; i < words; ++i)
3490 {
3491 data_regs[i] = out_regs[i];
3492 ext_tmps[i] = gen_reg_rtx (DImode);
3493 }
3494 data_regs[words] = gen_reg_rtx (DImode);
3495
3496 if (ofs != 0)
3497 smem = adjust_address (smem, GET_MODE (smem), ofs);
3498
3499 /* Load up all of the source data. */
3500 for (i = 0; i < words; ++i)
3501 {
3502 tmp = change_address (smem, DImode,
3503 gen_rtx_AND (DImode,
3504 plus_constant (DImode, smema, 8*i),
3505 im8));
3506 set_mem_alias_set (tmp, 0);
3507 emit_move_insn (data_regs[i], tmp);
3508 }
3509
3510 tmp = change_address (smem, DImode,
3511 gen_rtx_AND (DImode,
3512 plus_constant (DImode, smema,
3513 8*words - 1),
3514 im8));
3515 set_mem_alias_set (tmp, 0);
3516 emit_move_insn (data_regs[words], tmp);
3517
3518 /* Extract the half-word fragments. Unfortunately DEC decided to make
3519 extxh with offset zero a noop instead of zeroing the register, so
3520 we must take care of that edge condition ourselves with cmov. */
3521
3522 sreg = copy_addr_to_reg (smema);
3523 areg = expand_binop (DImode, and_optab, sreg, GEN_INT (7), NULL,
3524 1, OPTAB_WIDEN);
3525 for (i = 0; i < words; ++i)
3526 {
3527 emit_insn (gen_extql (data_regs[i], data_regs[i], sreg));
3528 emit_insn (gen_extqh (ext_tmps[i], data_regs[i+1], sreg));
3529 emit_insn (gen_rtx_SET (VOIDmode, ext_tmps[i],
3530 gen_rtx_IF_THEN_ELSE (DImode,
3531 gen_rtx_EQ (DImode, areg,
3532 const0_rtx),
3533 const0_rtx, ext_tmps[i])));
3534 }
3535
3536 /* Merge the half-words into whole words. */
3537 for (i = 0; i < words; ++i)
3538 {
3539 out_regs[i] = expand_binop (DImode, ior_optab, data_regs[i],
3540 ext_tmps[i], data_regs[i], 1, OPTAB_WIDEN);
3541 }
3542 }
3543
3544 /* Store an integral number of consecutive unaligned quadwords. DATA_REGS
3545 may be NULL to store zeros. */
3546
3547 static void
alpha_expand_unaligned_store_words(rtx * data_regs,rtx dmem,HOST_WIDE_INT words,HOST_WIDE_INT ofs)3548 alpha_expand_unaligned_store_words (rtx *data_regs, rtx dmem,
3549 HOST_WIDE_INT words, HOST_WIDE_INT ofs)
3550 {
3551 rtx const im8 = GEN_INT (-8);
3552 rtx ins_tmps[MAX_MOVE_WORDS];
3553 rtx st_tmp_1, st_tmp_2, dreg;
3554 rtx st_addr_1, st_addr_2, dmema;
3555 HOST_WIDE_INT i;
3556
3557 dmema = XEXP (dmem, 0);
3558 if (GET_CODE (dmema) == LO_SUM)
3559 dmema = force_reg (Pmode, dmema);
3560
3561 /* Generate all the tmp registers we need. */
3562 if (data_regs != NULL)
3563 for (i = 0; i < words; ++i)
3564 ins_tmps[i] = gen_reg_rtx(DImode);
3565 st_tmp_1 = gen_reg_rtx(DImode);
3566 st_tmp_2 = gen_reg_rtx(DImode);
3567
3568 if (ofs != 0)
3569 dmem = adjust_address (dmem, GET_MODE (dmem), ofs);
3570
3571 st_addr_2 = change_address (dmem, DImode,
3572 gen_rtx_AND (DImode,
3573 plus_constant (DImode, dmema,
3574 words*8 - 1),
3575 im8));
3576 set_mem_alias_set (st_addr_2, 0);
3577
3578 st_addr_1 = change_address (dmem, DImode,
3579 gen_rtx_AND (DImode, dmema, im8));
3580 set_mem_alias_set (st_addr_1, 0);
3581
3582 /* Load up the destination end bits. */
3583 emit_move_insn (st_tmp_2, st_addr_2);
3584 emit_move_insn (st_tmp_1, st_addr_1);
3585
3586 /* Shift the input data into place. */
3587 dreg = copy_addr_to_reg (dmema);
3588 if (data_regs != NULL)
3589 {
3590 for (i = words-1; i >= 0; --i)
3591 {
3592 emit_insn (gen_insqh (ins_tmps[i], data_regs[i], dreg));
3593 emit_insn (gen_insql (data_regs[i], data_regs[i], dreg));
3594 }
3595 for (i = words-1; i > 0; --i)
3596 {
3597 ins_tmps[i-1] = expand_binop (DImode, ior_optab, data_regs[i],
3598 ins_tmps[i-1], ins_tmps[i-1], 1,
3599 OPTAB_WIDEN);
3600 }
3601 }
3602
3603 /* Split and merge the ends with the destination data. */
3604 emit_insn (gen_mskqh (st_tmp_2, st_tmp_2, dreg));
3605 emit_insn (gen_mskql (st_tmp_1, st_tmp_1, dreg));
3606
3607 if (data_regs != NULL)
3608 {
3609 st_tmp_2 = expand_binop (DImode, ior_optab, st_tmp_2, ins_tmps[words-1],
3610 st_tmp_2, 1, OPTAB_WIDEN);
3611 st_tmp_1 = expand_binop (DImode, ior_optab, st_tmp_1, data_regs[0],
3612 st_tmp_1, 1, OPTAB_WIDEN);
3613 }
3614
3615 /* Store it all. */
3616 emit_move_insn (st_addr_2, st_tmp_2);
3617 for (i = words-1; i > 0; --i)
3618 {
3619 rtx tmp = change_address (dmem, DImode,
3620 gen_rtx_AND (DImode,
3621 plus_constant (DImode,
3622 dmema, i*8),
3623 im8));
3624 set_mem_alias_set (tmp, 0);
3625 emit_move_insn (tmp, data_regs ? ins_tmps[i-1] : const0_rtx);
3626 }
3627 emit_move_insn (st_addr_1, st_tmp_1);
3628 }
3629
3630
3631 /* Expand string/block move operations.
3632
3633 operands[0] is the pointer to the destination.
3634 operands[1] is the pointer to the source.
3635 operands[2] is the number of bytes to move.
3636 operands[3] is the alignment. */
3637
3638 int
alpha_expand_block_move(rtx operands[])3639 alpha_expand_block_move (rtx operands[])
3640 {
3641 rtx bytes_rtx = operands[2];
3642 rtx align_rtx = operands[3];
3643 HOST_WIDE_INT orig_bytes = INTVAL (bytes_rtx);
3644 HOST_WIDE_INT bytes = orig_bytes;
3645 HOST_WIDE_INT src_align = INTVAL (align_rtx) * BITS_PER_UNIT;
3646 HOST_WIDE_INT dst_align = src_align;
3647 rtx orig_src = operands[1];
3648 rtx orig_dst = operands[0];
3649 rtx data_regs[2 * MAX_MOVE_WORDS + 16];
3650 rtx tmp;
3651 unsigned int i, words, ofs, nregs = 0;
3652
3653 if (orig_bytes <= 0)
3654 return 1;
3655 else if (orig_bytes > MAX_MOVE_WORDS * UNITS_PER_WORD)
3656 return 0;
3657
3658 /* Look for additional alignment information from recorded register info. */
3659
3660 tmp = XEXP (orig_src, 0);
3661 if (REG_P (tmp))
3662 src_align = MAX (src_align, REGNO_POINTER_ALIGN (REGNO (tmp)));
3663 else if (GET_CODE (tmp) == PLUS
3664 && REG_P (XEXP (tmp, 0))
3665 && CONST_INT_P (XEXP (tmp, 1)))
3666 {
3667 unsigned HOST_WIDE_INT c = INTVAL (XEXP (tmp, 1));
3668 unsigned int a = REGNO_POINTER_ALIGN (REGNO (XEXP (tmp, 0)));
3669
3670 if (a > src_align)
3671 {
3672 if (a >= 64 && c % 8 == 0)
3673 src_align = 64;
3674 else if (a >= 32 && c % 4 == 0)
3675 src_align = 32;
3676 else if (a >= 16 && c % 2 == 0)
3677 src_align = 16;
3678 }
3679 }
3680
3681 tmp = XEXP (orig_dst, 0);
3682 if (REG_P (tmp))
3683 dst_align = MAX (dst_align, REGNO_POINTER_ALIGN (REGNO (tmp)));
3684 else if (GET_CODE (tmp) == PLUS
3685 && REG_P (XEXP (tmp, 0))
3686 && CONST_INT_P (XEXP (tmp, 1)))
3687 {
3688 unsigned HOST_WIDE_INT c = INTVAL (XEXP (tmp, 1));
3689 unsigned int a = REGNO_POINTER_ALIGN (REGNO (XEXP (tmp, 0)));
3690
3691 if (a > dst_align)
3692 {
3693 if (a >= 64 && c % 8 == 0)
3694 dst_align = 64;
3695 else if (a >= 32 && c % 4 == 0)
3696 dst_align = 32;
3697 else if (a >= 16 && c % 2 == 0)
3698 dst_align = 16;
3699 }
3700 }
3701
3702 ofs = 0;
3703 if (src_align >= 64 && bytes >= 8)
3704 {
3705 words = bytes / 8;
3706
3707 for (i = 0; i < words; ++i)
3708 data_regs[nregs + i] = gen_reg_rtx (DImode);
3709
3710 for (i = 0; i < words; ++i)
3711 emit_move_insn (data_regs[nregs + i],
3712 adjust_address (orig_src, DImode, ofs + i * 8));
3713
3714 nregs += words;
3715 bytes -= words * 8;
3716 ofs += words * 8;
3717 }
3718
3719 if (src_align >= 32 && bytes >= 4)
3720 {
3721 words = bytes / 4;
3722
3723 for (i = 0; i < words; ++i)
3724 data_regs[nregs + i] = gen_reg_rtx (SImode);
3725
3726 for (i = 0; i < words; ++i)
3727 emit_move_insn (data_regs[nregs + i],
3728 adjust_address (orig_src, SImode, ofs + i * 4));
3729
3730 nregs += words;
3731 bytes -= words * 4;
3732 ofs += words * 4;
3733 }
3734
3735 if (bytes >= 8)
3736 {
3737 words = bytes / 8;
3738
3739 for (i = 0; i < words+1; ++i)
3740 data_regs[nregs + i] = gen_reg_rtx (DImode);
3741
3742 alpha_expand_unaligned_load_words (data_regs + nregs, orig_src,
3743 words, ofs);
3744
3745 nregs += words;
3746 bytes -= words * 8;
3747 ofs += words * 8;
3748 }
3749
3750 if (! TARGET_BWX && bytes >= 4)
3751 {
3752 data_regs[nregs++] = tmp = gen_reg_rtx (SImode);
3753 alpha_expand_unaligned_load (tmp, orig_src, 4, ofs, 0);
3754 bytes -= 4;
3755 ofs += 4;
3756 }
3757
3758 if (bytes >= 2)
3759 {
3760 if (src_align >= 16)
3761 {
3762 do {
3763 data_regs[nregs++] = tmp = gen_reg_rtx (HImode);
3764 emit_move_insn (tmp, adjust_address (orig_src, HImode, ofs));
3765 bytes -= 2;
3766 ofs += 2;
3767 } while (bytes >= 2);
3768 }
3769 else if (! TARGET_BWX)
3770 {
3771 data_regs[nregs++] = tmp = gen_reg_rtx (HImode);
3772 alpha_expand_unaligned_load (tmp, orig_src, 2, ofs, 0);
3773 bytes -= 2;
3774 ofs += 2;
3775 }
3776 }
3777
3778 while (bytes > 0)
3779 {
3780 data_regs[nregs++] = tmp = gen_reg_rtx (QImode);
3781 emit_move_insn (tmp, adjust_address (orig_src, QImode, ofs));
3782 bytes -= 1;
3783 ofs += 1;
3784 }
3785
3786 gcc_assert (nregs <= ARRAY_SIZE (data_regs));
3787
3788 /* Now save it back out again. */
3789
3790 i = 0, ofs = 0;
3791
3792 /* Write out the data in whatever chunks reading the source allowed. */
3793 if (dst_align >= 64)
3794 {
3795 while (i < nregs && GET_MODE (data_regs[i]) == DImode)
3796 {
3797 emit_move_insn (adjust_address (orig_dst, DImode, ofs),
3798 data_regs[i]);
3799 ofs += 8;
3800 i++;
3801 }
3802 }
3803
3804 if (dst_align >= 32)
3805 {
3806 /* If the source has remaining DImode regs, write them out in
3807 two pieces. */
3808 while (i < nregs && GET_MODE (data_regs[i]) == DImode)
3809 {
3810 tmp = expand_binop (DImode, lshr_optab, data_regs[i], GEN_INT (32),
3811 NULL_RTX, 1, OPTAB_WIDEN);
3812
3813 emit_move_insn (adjust_address (orig_dst, SImode, ofs),
3814 gen_lowpart (SImode, data_regs[i]));
3815 emit_move_insn (adjust_address (orig_dst, SImode, ofs + 4),
3816 gen_lowpart (SImode, tmp));
3817 ofs += 8;
3818 i++;
3819 }
3820
3821 while (i < nregs && GET_MODE (data_regs[i]) == SImode)
3822 {
3823 emit_move_insn (adjust_address (orig_dst, SImode, ofs),
3824 data_regs[i]);
3825 ofs += 4;
3826 i++;
3827 }
3828 }
3829
3830 if (i < nregs && GET_MODE (data_regs[i]) == DImode)
3831 {
3832 /* Write out a remaining block of words using unaligned methods. */
3833
3834 for (words = 1; i + words < nregs; words++)
3835 if (GET_MODE (data_regs[i + words]) != DImode)
3836 break;
3837
3838 if (words == 1)
3839 alpha_expand_unaligned_store (orig_dst, data_regs[i], 8, ofs);
3840 else
3841 alpha_expand_unaligned_store_words (data_regs + i, orig_dst,
3842 words, ofs);
3843
3844 i += words;
3845 ofs += words * 8;
3846 }
3847
3848 /* Due to the above, this won't be aligned. */
3849 /* ??? If we have more than one of these, consider constructing full
3850 words in registers and using alpha_expand_unaligned_store_words. */
3851 while (i < nregs && GET_MODE (data_regs[i]) == SImode)
3852 {
3853 alpha_expand_unaligned_store (orig_dst, data_regs[i], 4, ofs);
3854 ofs += 4;
3855 i++;
3856 }
3857
3858 if (dst_align >= 16)
3859 while (i < nregs && GET_MODE (data_regs[i]) == HImode)
3860 {
3861 emit_move_insn (adjust_address (orig_dst, HImode, ofs), data_regs[i]);
3862 i++;
3863 ofs += 2;
3864 }
3865 else
3866 while (i < nregs && GET_MODE (data_regs[i]) == HImode)
3867 {
3868 alpha_expand_unaligned_store (orig_dst, data_regs[i], 2, ofs);
3869 i++;
3870 ofs += 2;
3871 }
3872
3873 /* The remainder must be byte copies. */
3874 while (i < nregs)
3875 {
3876 gcc_assert (GET_MODE (data_regs[i]) == QImode);
3877 emit_move_insn (adjust_address (orig_dst, QImode, ofs), data_regs[i]);
3878 i++;
3879 ofs += 1;
3880 }
3881
3882 return 1;
3883 }
3884
3885 int
alpha_expand_block_clear(rtx operands[])3886 alpha_expand_block_clear (rtx operands[])
3887 {
3888 rtx bytes_rtx = operands[1];
3889 rtx align_rtx = operands[3];
3890 HOST_WIDE_INT orig_bytes = INTVAL (bytes_rtx);
3891 HOST_WIDE_INT bytes = orig_bytes;
3892 HOST_WIDE_INT align = INTVAL (align_rtx) * BITS_PER_UNIT;
3893 HOST_WIDE_INT alignofs = 0;
3894 rtx orig_dst = operands[0];
3895 rtx tmp;
3896 int i, words, ofs = 0;
3897
3898 if (orig_bytes <= 0)
3899 return 1;
3900 if (orig_bytes > MAX_MOVE_WORDS * UNITS_PER_WORD)
3901 return 0;
3902
3903 /* Look for stricter alignment. */
3904 tmp = XEXP (orig_dst, 0);
3905 if (REG_P (tmp))
3906 align = MAX (align, REGNO_POINTER_ALIGN (REGNO (tmp)));
3907 else if (GET_CODE (tmp) == PLUS
3908 && REG_P (XEXP (tmp, 0))
3909 && CONST_INT_P (XEXP (tmp, 1)))
3910 {
3911 HOST_WIDE_INT c = INTVAL (XEXP (tmp, 1));
3912 int a = REGNO_POINTER_ALIGN (REGNO (XEXP (tmp, 0)));
3913
3914 if (a > align)
3915 {
3916 if (a >= 64)
3917 align = a, alignofs = 8 - c % 8;
3918 else if (a >= 32)
3919 align = a, alignofs = 4 - c % 4;
3920 else if (a >= 16)
3921 align = a, alignofs = 2 - c % 2;
3922 }
3923 }
3924
3925 /* Handle an unaligned prefix first. */
3926
3927 if (alignofs > 0)
3928 {
3929 #if HOST_BITS_PER_WIDE_INT >= 64
3930 /* Given that alignofs is bounded by align, the only time BWX could
3931 generate three stores is for a 7 byte fill. Prefer two individual
3932 stores over a load/mask/store sequence. */
3933 if ((!TARGET_BWX || alignofs == 7)
3934 && align >= 32
3935 && !(alignofs == 4 && bytes >= 4))
3936 {
3937 enum machine_mode mode = (align >= 64 ? DImode : SImode);
3938 int inv_alignofs = (align >= 64 ? 8 : 4) - alignofs;
3939 rtx mem, tmp;
3940 HOST_WIDE_INT mask;
3941
3942 mem = adjust_address (orig_dst, mode, ofs - inv_alignofs);
3943 set_mem_alias_set (mem, 0);
3944
3945 mask = ~(~(HOST_WIDE_INT)0 << (inv_alignofs * 8));
3946 if (bytes < alignofs)
3947 {
3948 mask |= ~(HOST_WIDE_INT)0 << ((inv_alignofs + bytes) * 8);
3949 ofs += bytes;
3950 bytes = 0;
3951 }
3952 else
3953 {
3954 bytes -= alignofs;
3955 ofs += alignofs;
3956 }
3957 alignofs = 0;
3958
3959 tmp = expand_binop (mode, and_optab, mem, GEN_INT (mask),
3960 NULL_RTX, 1, OPTAB_WIDEN);
3961
3962 emit_move_insn (mem, tmp);
3963 }
3964 #endif
3965
3966 if (TARGET_BWX && (alignofs & 1) && bytes >= 1)
3967 {
3968 emit_move_insn (adjust_address (orig_dst, QImode, ofs), const0_rtx);
3969 bytes -= 1;
3970 ofs += 1;
3971 alignofs -= 1;
3972 }
3973 if (TARGET_BWX && align >= 16 && (alignofs & 3) == 2 && bytes >= 2)
3974 {
3975 emit_move_insn (adjust_address (orig_dst, HImode, ofs), const0_rtx);
3976 bytes -= 2;
3977 ofs += 2;
3978 alignofs -= 2;
3979 }
3980 if (alignofs == 4 && bytes >= 4)
3981 {
3982 emit_move_insn (adjust_address (orig_dst, SImode, ofs), const0_rtx);
3983 bytes -= 4;
3984 ofs += 4;
3985 alignofs = 0;
3986 }
3987
3988 /* If we've not used the extra lead alignment information by now,
3989 we won't be able to. Downgrade align to match what's left over. */
3990 if (alignofs > 0)
3991 {
3992 alignofs = alignofs & -alignofs;
3993 align = MIN (align, alignofs * BITS_PER_UNIT);
3994 }
3995 }
3996
3997 /* Handle a block of contiguous long-words. */
3998
3999 if (align >= 64 && bytes >= 8)
4000 {
4001 words = bytes / 8;
4002
4003 for (i = 0; i < words; ++i)
4004 emit_move_insn (adjust_address (orig_dst, DImode, ofs + i * 8),
4005 const0_rtx);
4006
4007 bytes -= words * 8;
4008 ofs += words * 8;
4009 }
4010
4011 /* If the block is large and appropriately aligned, emit a single
4012 store followed by a sequence of stq_u insns. */
4013
4014 if (align >= 32 && bytes > 16)
4015 {
4016 rtx orig_dsta;
4017
4018 emit_move_insn (adjust_address (orig_dst, SImode, ofs), const0_rtx);
4019 bytes -= 4;
4020 ofs += 4;
4021
4022 orig_dsta = XEXP (orig_dst, 0);
4023 if (GET_CODE (orig_dsta) == LO_SUM)
4024 orig_dsta = force_reg (Pmode, orig_dsta);
4025
4026 words = bytes / 8;
4027 for (i = 0; i < words; ++i)
4028 {
4029 rtx mem
4030 = change_address (orig_dst, DImode,
4031 gen_rtx_AND (DImode,
4032 plus_constant (DImode, orig_dsta,
4033 ofs + i*8),
4034 GEN_INT (-8)));
4035 set_mem_alias_set (mem, 0);
4036 emit_move_insn (mem, const0_rtx);
4037 }
4038
4039 /* Depending on the alignment, the first stq_u may have overlapped
4040 with the initial stl, which means that the last stq_u didn't
4041 write as much as it would appear. Leave those questionable bytes
4042 unaccounted for. */
4043 bytes -= words * 8 - 4;
4044 ofs += words * 8 - 4;
4045 }
4046
4047 /* Handle a smaller block of aligned words. */
4048
4049 if ((align >= 64 && bytes == 4)
4050 || (align == 32 && bytes >= 4))
4051 {
4052 words = bytes / 4;
4053
4054 for (i = 0; i < words; ++i)
4055 emit_move_insn (adjust_address (orig_dst, SImode, ofs + i * 4),
4056 const0_rtx);
4057
4058 bytes -= words * 4;
4059 ofs += words * 4;
4060 }
4061
4062 /* An unaligned block uses stq_u stores for as many as possible. */
4063
4064 if (bytes >= 8)
4065 {
4066 words = bytes / 8;
4067
4068 alpha_expand_unaligned_store_words (NULL, orig_dst, words, ofs);
4069
4070 bytes -= words * 8;
4071 ofs += words * 8;
4072 }
4073
4074 /* Next clean up any trailing pieces. */
4075
4076 #if HOST_BITS_PER_WIDE_INT >= 64
4077 /* Count the number of bits in BYTES for which aligned stores could
4078 be emitted. */
4079 words = 0;
4080 for (i = (TARGET_BWX ? 1 : 4); i * BITS_PER_UNIT <= align ; i <<= 1)
4081 if (bytes & i)
4082 words += 1;
4083
4084 /* If we have appropriate alignment (and it wouldn't take too many
4085 instructions otherwise), mask out the bytes we need. */
4086 if (TARGET_BWX ? words > 2 : bytes > 0)
4087 {
4088 if (align >= 64)
4089 {
4090 rtx mem, tmp;
4091 HOST_WIDE_INT mask;
4092
4093 mem = adjust_address (orig_dst, DImode, ofs);
4094 set_mem_alias_set (mem, 0);
4095
4096 mask = ~(HOST_WIDE_INT)0 << (bytes * 8);
4097
4098 tmp = expand_binop (DImode, and_optab, mem, GEN_INT (mask),
4099 NULL_RTX, 1, OPTAB_WIDEN);
4100
4101 emit_move_insn (mem, tmp);
4102 return 1;
4103 }
4104 else if (align >= 32 && bytes < 4)
4105 {
4106 rtx mem, tmp;
4107 HOST_WIDE_INT mask;
4108
4109 mem = adjust_address (orig_dst, SImode, ofs);
4110 set_mem_alias_set (mem, 0);
4111
4112 mask = ~(HOST_WIDE_INT)0 << (bytes * 8);
4113
4114 tmp = expand_binop (SImode, and_optab, mem, GEN_INT (mask),
4115 NULL_RTX, 1, OPTAB_WIDEN);
4116
4117 emit_move_insn (mem, tmp);
4118 return 1;
4119 }
4120 }
4121 #endif
4122
4123 if (!TARGET_BWX && bytes >= 4)
4124 {
4125 alpha_expand_unaligned_store (orig_dst, const0_rtx, 4, ofs);
4126 bytes -= 4;
4127 ofs += 4;
4128 }
4129
4130 if (bytes >= 2)
4131 {
4132 if (align >= 16)
4133 {
4134 do {
4135 emit_move_insn (adjust_address (orig_dst, HImode, ofs),
4136 const0_rtx);
4137 bytes -= 2;
4138 ofs += 2;
4139 } while (bytes >= 2);
4140 }
4141 else if (! TARGET_BWX)
4142 {
4143 alpha_expand_unaligned_store (orig_dst, const0_rtx, 2, ofs);
4144 bytes -= 2;
4145 ofs += 2;
4146 }
4147 }
4148
4149 while (bytes > 0)
4150 {
4151 emit_move_insn (adjust_address (orig_dst, QImode, ofs), const0_rtx);
4152 bytes -= 1;
4153 ofs += 1;
4154 }
4155
4156 return 1;
4157 }
4158
4159 /* Returns a mask so that zap(x, value) == x & mask. */
4160
4161 rtx
alpha_expand_zap_mask(HOST_WIDE_INT value)4162 alpha_expand_zap_mask (HOST_WIDE_INT value)
4163 {
4164 rtx result;
4165 int i;
4166
4167 if (HOST_BITS_PER_WIDE_INT >= 64)
4168 {
4169 HOST_WIDE_INT mask = 0;
4170
4171 for (i = 7; i >= 0; --i)
4172 {
4173 mask <<= 8;
4174 if (!((value >> i) & 1))
4175 mask |= 0xff;
4176 }
4177
4178 result = gen_int_mode (mask, DImode);
4179 }
4180 else
4181 {
4182 HOST_WIDE_INT mask_lo = 0, mask_hi = 0;
4183
4184 gcc_assert (HOST_BITS_PER_WIDE_INT == 32);
4185
4186 for (i = 7; i >= 4; --i)
4187 {
4188 mask_hi <<= 8;
4189 if (!((value >> i) & 1))
4190 mask_hi |= 0xff;
4191 }
4192
4193 for (i = 3; i >= 0; --i)
4194 {
4195 mask_lo <<= 8;
4196 if (!((value >> i) & 1))
4197 mask_lo |= 0xff;
4198 }
4199
4200 result = immed_double_const (mask_lo, mask_hi, DImode);
4201 }
4202
4203 return result;
4204 }
4205
4206 void
alpha_expand_builtin_vector_binop(rtx (* gen)(rtx,rtx,rtx),enum machine_mode mode,rtx op0,rtx op1,rtx op2)4207 alpha_expand_builtin_vector_binop (rtx (*gen) (rtx, rtx, rtx),
4208 enum machine_mode mode,
4209 rtx op0, rtx op1, rtx op2)
4210 {
4211 op0 = gen_lowpart (mode, op0);
4212
4213 if (op1 == const0_rtx)
4214 op1 = CONST0_RTX (mode);
4215 else
4216 op1 = gen_lowpart (mode, op1);
4217
4218 if (op2 == const0_rtx)
4219 op2 = CONST0_RTX (mode);
4220 else
4221 op2 = gen_lowpart (mode, op2);
4222
4223 emit_insn ((*gen) (op0, op1, op2));
4224 }
4225
4226 /* A subroutine of the atomic operation splitters. Jump to LABEL if
4227 COND is true. Mark the jump as unlikely to be taken. */
4228
4229 static void
emit_unlikely_jump(rtx cond,rtx label)4230 emit_unlikely_jump (rtx cond, rtx label)
4231 {
4232 rtx very_unlikely = GEN_INT (REG_BR_PROB_BASE / 100 - 1);
4233 rtx x;
4234
4235 x = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, label, pc_rtx);
4236 x = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, x));
4237 add_reg_note (x, REG_BR_PROB, very_unlikely);
4238 }
4239
4240 /* A subroutine of the atomic operation splitters. Emit a load-locked
4241 instruction in MODE. */
4242
4243 static void
emit_load_locked(enum machine_mode mode,rtx reg,rtx mem)4244 emit_load_locked (enum machine_mode mode, rtx reg, rtx mem)
4245 {
4246 rtx (*fn) (rtx, rtx) = NULL;
4247 if (mode == SImode)
4248 fn = gen_load_locked_si;
4249 else if (mode == DImode)
4250 fn = gen_load_locked_di;
4251 emit_insn (fn (reg, mem));
4252 }
4253
4254 /* A subroutine of the atomic operation splitters. Emit a store-conditional
4255 instruction in MODE. */
4256
4257 static void
emit_store_conditional(enum machine_mode mode,rtx res,rtx mem,rtx val)4258 emit_store_conditional (enum machine_mode mode, rtx res, rtx mem, rtx val)
4259 {
4260 rtx (*fn) (rtx, rtx, rtx) = NULL;
4261 if (mode == SImode)
4262 fn = gen_store_conditional_si;
4263 else if (mode == DImode)
4264 fn = gen_store_conditional_di;
4265 emit_insn (fn (res, mem, val));
4266 }
4267
4268 /* Subroutines of the atomic operation splitters. Emit barriers
4269 as needed for the memory MODEL. */
4270
4271 static void
alpha_pre_atomic_barrier(enum memmodel model)4272 alpha_pre_atomic_barrier (enum memmodel model)
4273 {
4274 if (need_atomic_barrier_p (model, true))
4275 emit_insn (gen_memory_barrier ());
4276 }
4277
4278 static void
alpha_post_atomic_barrier(enum memmodel model)4279 alpha_post_atomic_barrier (enum memmodel model)
4280 {
4281 if (need_atomic_barrier_p (model, false))
4282 emit_insn (gen_memory_barrier ());
4283 }
4284
4285 /* A subroutine of the atomic operation splitters. Emit an insxl
4286 instruction in MODE. */
4287
4288 static rtx
emit_insxl(enum machine_mode mode,rtx op1,rtx op2)4289 emit_insxl (enum machine_mode mode, rtx op1, rtx op2)
4290 {
4291 rtx ret = gen_reg_rtx (DImode);
4292 rtx (*fn) (rtx, rtx, rtx);
4293
4294 switch (mode)
4295 {
4296 case QImode:
4297 fn = gen_insbl;
4298 break;
4299 case HImode:
4300 fn = gen_inswl;
4301 break;
4302 case SImode:
4303 fn = gen_insll;
4304 break;
4305 case DImode:
4306 fn = gen_insql;
4307 break;
4308 default:
4309 gcc_unreachable ();
4310 }
4311
4312 op1 = force_reg (mode, op1);
4313 emit_insn (fn (ret, op1, op2));
4314
4315 return ret;
4316 }
4317
4318 /* Expand an atomic fetch-and-operate pattern. CODE is the binary operation
4319 to perform. MEM is the memory on which to operate. VAL is the second
4320 operand of the binary operator. BEFORE and AFTER are optional locations to
4321 return the value of MEM either before of after the operation. SCRATCH is
4322 a scratch register. */
4323
4324 void
alpha_split_atomic_op(enum rtx_code code,rtx mem,rtx val,rtx before,rtx after,rtx scratch,enum memmodel model)4325 alpha_split_atomic_op (enum rtx_code code, rtx mem, rtx val, rtx before,
4326 rtx after, rtx scratch, enum memmodel model)
4327 {
4328 enum machine_mode mode = GET_MODE (mem);
4329 rtx label, x, cond = gen_rtx_REG (DImode, REGNO (scratch));
4330
4331 alpha_pre_atomic_barrier (model);
4332
4333 label = gen_label_rtx ();
4334 emit_label (label);
4335 label = gen_rtx_LABEL_REF (DImode, label);
4336
4337 if (before == NULL)
4338 before = scratch;
4339 emit_load_locked (mode, before, mem);
4340
4341 if (code == NOT)
4342 {
4343 x = gen_rtx_AND (mode, before, val);
4344 emit_insn (gen_rtx_SET (VOIDmode, val, x));
4345
4346 x = gen_rtx_NOT (mode, val);
4347 }
4348 else
4349 x = gen_rtx_fmt_ee (code, mode, before, val);
4350 if (after)
4351 emit_insn (gen_rtx_SET (VOIDmode, after, copy_rtx (x)));
4352 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
4353
4354 emit_store_conditional (mode, cond, mem, scratch);
4355
4356 x = gen_rtx_EQ (DImode, cond, const0_rtx);
4357 emit_unlikely_jump (x, label);
4358
4359 alpha_post_atomic_barrier (model);
4360 }
4361
4362 /* Expand a compare and swap operation. */
4363
4364 void
alpha_split_compare_and_swap(rtx operands[])4365 alpha_split_compare_and_swap (rtx operands[])
4366 {
4367 rtx cond, retval, mem, oldval, newval;
4368 bool is_weak;
4369 enum memmodel mod_s, mod_f;
4370 enum machine_mode mode;
4371 rtx label1, label2, x;
4372
4373 cond = operands[0];
4374 retval = operands[1];
4375 mem = operands[2];
4376 oldval = operands[3];
4377 newval = operands[4];
4378 is_weak = (operands[5] != const0_rtx);
4379 mod_s = (enum memmodel) INTVAL (operands[6]);
4380 mod_f = (enum memmodel) INTVAL (operands[7]);
4381 mode = GET_MODE (mem);
4382
4383 alpha_pre_atomic_barrier (mod_s);
4384
4385 label1 = NULL_RTX;
4386 if (!is_weak)
4387 {
4388 label1 = gen_rtx_LABEL_REF (DImode, gen_label_rtx ());
4389 emit_label (XEXP (label1, 0));
4390 }
4391 label2 = gen_rtx_LABEL_REF (DImode, gen_label_rtx ());
4392
4393 emit_load_locked (mode, retval, mem);
4394
4395 x = gen_lowpart (DImode, retval);
4396 if (oldval == const0_rtx)
4397 {
4398 emit_move_insn (cond, const0_rtx);
4399 x = gen_rtx_NE (DImode, x, const0_rtx);
4400 }
4401 else
4402 {
4403 x = gen_rtx_EQ (DImode, x, oldval);
4404 emit_insn (gen_rtx_SET (VOIDmode, cond, x));
4405 x = gen_rtx_EQ (DImode, cond, const0_rtx);
4406 }
4407 emit_unlikely_jump (x, label2);
4408
4409 emit_move_insn (cond, newval);
4410 emit_store_conditional (mode, cond, mem, gen_lowpart (mode, cond));
4411
4412 if (!is_weak)
4413 {
4414 x = gen_rtx_EQ (DImode, cond, const0_rtx);
4415 emit_unlikely_jump (x, label1);
4416 }
4417
4418 if (mod_f != MEMMODEL_RELAXED)
4419 emit_label (XEXP (label2, 0));
4420
4421 alpha_post_atomic_barrier (mod_s);
4422
4423 if (mod_f == MEMMODEL_RELAXED)
4424 emit_label (XEXP (label2, 0));
4425 }
4426
4427 void
alpha_expand_compare_and_swap_12(rtx operands[])4428 alpha_expand_compare_and_swap_12 (rtx operands[])
4429 {
4430 rtx cond, dst, mem, oldval, newval, is_weak, mod_s, mod_f;
4431 enum machine_mode mode;
4432 rtx addr, align, wdst;
4433 rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx, rtx, rtx);
4434
4435 cond = operands[0];
4436 dst = operands[1];
4437 mem = operands[2];
4438 oldval = operands[3];
4439 newval = operands[4];
4440 is_weak = operands[5];
4441 mod_s = operands[6];
4442 mod_f = operands[7];
4443 mode = GET_MODE (mem);
4444
4445 /* We forced the address into a register via mem_noofs_operand. */
4446 addr = XEXP (mem, 0);
4447 gcc_assert (register_operand (addr, DImode));
4448
4449 align = expand_simple_binop (Pmode, AND, addr, GEN_INT (-8),
4450 NULL_RTX, 1, OPTAB_DIRECT);
4451
4452 oldval = convert_modes (DImode, mode, oldval, 1);
4453
4454 if (newval != const0_rtx)
4455 newval = emit_insxl (mode, newval, addr);
4456
4457 wdst = gen_reg_rtx (DImode);
4458 if (mode == QImode)
4459 gen = gen_atomic_compare_and_swapqi_1;
4460 else
4461 gen = gen_atomic_compare_and_swaphi_1;
4462 emit_insn (gen (cond, wdst, mem, oldval, newval, align,
4463 is_weak, mod_s, mod_f));
4464
4465 emit_move_insn (dst, gen_lowpart (mode, wdst));
4466 }
4467
4468 void
alpha_split_compare_and_swap_12(rtx operands[])4469 alpha_split_compare_and_swap_12 (rtx operands[])
4470 {
4471 rtx cond, dest, orig_mem, oldval, newval, align, scratch;
4472 enum machine_mode mode;
4473 bool is_weak;
4474 enum memmodel mod_s, mod_f;
4475 rtx label1, label2, mem, addr, width, mask, x;
4476
4477 cond = operands[0];
4478 dest = operands[1];
4479 orig_mem = operands[2];
4480 oldval = operands[3];
4481 newval = operands[4];
4482 align = operands[5];
4483 is_weak = (operands[6] != const0_rtx);
4484 mod_s = (enum memmodel) INTVAL (operands[7]);
4485 mod_f = (enum memmodel) INTVAL (operands[8]);
4486 scratch = operands[9];
4487 mode = GET_MODE (orig_mem);
4488 addr = XEXP (orig_mem, 0);
4489
4490 mem = gen_rtx_MEM (DImode, align);
4491 MEM_VOLATILE_P (mem) = MEM_VOLATILE_P (orig_mem);
4492 if (MEM_ALIAS_SET (orig_mem) == ALIAS_SET_MEMORY_BARRIER)
4493 set_mem_alias_set (mem, ALIAS_SET_MEMORY_BARRIER);
4494
4495 alpha_pre_atomic_barrier (mod_s);
4496
4497 label1 = NULL_RTX;
4498 if (!is_weak)
4499 {
4500 label1 = gen_rtx_LABEL_REF (DImode, gen_label_rtx ());
4501 emit_label (XEXP (label1, 0));
4502 }
4503 label2 = gen_rtx_LABEL_REF (DImode, gen_label_rtx ());
4504
4505 emit_load_locked (DImode, scratch, mem);
4506
4507 width = GEN_INT (GET_MODE_BITSIZE (mode));
4508 mask = GEN_INT (mode == QImode ? 0xff : 0xffff);
4509 emit_insn (gen_extxl (dest, scratch, width, addr));
4510
4511 if (oldval == const0_rtx)
4512 {
4513 emit_move_insn (cond, const0_rtx);
4514 x = gen_rtx_NE (DImode, dest, const0_rtx);
4515 }
4516 else
4517 {
4518 x = gen_rtx_EQ (DImode, dest, oldval);
4519 emit_insn (gen_rtx_SET (VOIDmode, cond, x));
4520 x = gen_rtx_EQ (DImode, cond, const0_rtx);
4521 }
4522 emit_unlikely_jump (x, label2);
4523
4524 emit_insn (gen_mskxl (cond, scratch, mask, addr));
4525
4526 if (newval != const0_rtx)
4527 emit_insn (gen_iordi3 (cond, cond, newval));
4528
4529 emit_store_conditional (DImode, cond, mem, cond);
4530
4531 if (!is_weak)
4532 {
4533 x = gen_rtx_EQ (DImode, cond, const0_rtx);
4534 emit_unlikely_jump (x, label1);
4535 }
4536
4537 if (mod_f != MEMMODEL_RELAXED)
4538 emit_label (XEXP (label2, 0));
4539
4540 alpha_post_atomic_barrier (mod_s);
4541
4542 if (mod_f == MEMMODEL_RELAXED)
4543 emit_label (XEXP (label2, 0));
4544 }
4545
4546 /* Expand an atomic exchange operation. */
4547
4548 void
alpha_split_atomic_exchange(rtx operands[])4549 alpha_split_atomic_exchange (rtx operands[])
4550 {
4551 rtx retval, mem, val, scratch;
4552 enum memmodel model;
4553 enum machine_mode mode;
4554 rtx label, x, cond;
4555
4556 retval = operands[0];
4557 mem = operands[1];
4558 val = operands[2];
4559 model = (enum memmodel) INTVAL (operands[3]);
4560 scratch = operands[4];
4561 mode = GET_MODE (mem);
4562 cond = gen_lowpart (DImode, scratch);
4563
4564 alpha_pre_atomic_barrier (model);
4565
4566 label = gen_rtx_LABEL_REF (DImode, gen_label_rtx ());
4567 emit_label (XEXP (label, 0));
4568
4569 emit_load_locked (mode, retval, mem);
4570 emit_move_insn (scratch, val);
4571 emit_store_conditional (mode, cond, mem, scratch);
4572
4573 x = gen_rtx_EQ (DImode, cond, const0_rtx);
4574 emit_unlikely_jump (x, label);
4575
4576 alpha_post_atomic_barrier (model);
4577 }
4578
4579 void
alpha_expand_atomic_exchange_12(rtx operands[])4580 alpha_expand_atomic_exchange_12 (rtx operands[])
4581 {
4582 rtx dst, mem, val, model;
4583 enum machine_mode mode;
4584 rtx addr, align, wdst;
4585 rtx (*gen) (rtx, rtx, rtx, rtx, rtx);
4586
4587 dst = operands[0];
4588 mem = operands[1];
4589 val = operands[2];
4590 model = operands[3];
4591 mode = GET_MODE (mem);
4592
4593 /* We forced the address into a register via mem_noofs_operand. */
4594 addr = XEXP (mem, 0);
4595 gcc_assert (register_operand (addr, DImode));
4596
4597 align = expand_simple_binop (Pmode, AND, addr, GEN_INT (-8),
4598 NULL_RTX, 1, OPTAB_DIRECT);
4599
4600 /* Insert val into the correct byte location within the word. */
4601 if (val != const0_rtx)
4602 val = emit_insxl (mode, val, addr);
4603
4604 wdst = gen_reg_rtx (DImode);
4605 if (mode == QImode)
4606 gen = gen_atomic_exchangeqi_1;
4607 else
4608 gen = gen_atomic_exchangehi_1;
4609 emit_insn (gen (wdst, mem, val, align, model));
4610
4611 emit_move_insn (dst, gen_lowpart (mode, wdst));
4612 }
4613
4614 void
alpha_split_atomic_exchange_12(rtx operands[])4615 alpha_split_atomic_exchange_12 (rtx operands[])
4616 {
4617 rtx dest, orig_mem, addr, val, align, scratch;
4618 rtx label, mem, width, mask, x;
4619 enum machine_mode mode;
4620 enum memmodel model;
4621
4622 dest = operands[0];
4623 orig_mem = operands[1];
4624 val = operands[2];
4625 align = operands[3];
4626 model = (enum memmodel) INTVAL (operands[4]);
4627 scratch = operands[5];
4628 mode = GET_MODE (orig_mem);
4629 addr = XEXP (orig_mem, 0);
4630
4631 mem = gen_rtx_MEM (DImode, align);
4632 MEM_VOLATILE_P (mem) = MEM_VOLATILE_P (orig_mem);
4633 if (MEM_ALIAS_SET (orig_mem) == ALIAS_SET_MEMORY_BARRIER)
4634 set_mem_alias_set (mem, ALIAS_SET_MEMORY_BARRIER);
4635
4636 alpha_pre_atomic_barrier (model);
4637
4638 label = gen_rtx_LABEL_REF (DImode, gen_label_rtx ());
4639 emit_label (XEXP (label, 0));
4640
4641 emit_load_locked (DImode, scratch, mem);
4642
4643 width = GEN_INT (GET_MODE_BITSIZE (mode));
4644 mask = GEN_INT (mode == QImode ? 0xff : 0xffff);
4645 emit_insn (gen_extxl (dest, scratch, width, addr));
4646 emit_insn (gen_mskxl (scratch, scratch, mask, addr));
4647 if (val != const0_rtx)
4648 emit_insn (gen_iordi3 (scratch, scratch, val));
4649
4650 emit_store_conditional (DImode, scratch, mem, scratch);
4651
4652 x = gen_rtx_EQ (DImode, scratch, const0_rtx);
4653 emit_unlikely_jump (x, label);
4654
4655 alpha_post_atomic_barrier (model);
4656 }
4657
4658 /* Adjust the cost of a scheduling dependency. Return the new cost of
4659 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
4660
4661 static int
alpha_adjust_cost(rtx insn,rtx link,rtx dep_insn,int cost)4662 alpha_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
4663 {
4664 enum attr_type dep_insn_type;
4665
4666 /* If the dependence is an anti-dependence, there is no cost. For an
4667 output dependence, there is sometimes a cost, but it doesn't seem
4668 worth handling those few cases. */
4669 if (REG_NOTE_KIND (link) != 0)
4670 return cost;
4671
4672 /* If we can't recognize the insns, we can't really do anything. */
4673 if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
4674 return cost;
4675
4676 dep_insn_type = get_attr_type (dep_insn);
4677
4678 /* Bring in the user-defined memory latency. */
4679 if (dep_insn_type == TYPE_ILD
4680 || dep_insn_type == TYPE_FLD
4681 || dep_insn_type == TYPE_LDSYM)
4682 cost += alpha_memory_latency-1;
4683
4684 /* Everything else handled in DFA bypasses now. */
4685
4686 return cost;
4687 }
4688
4689 /* The number of instructions that can be issued per cycle. */
4690
4691 static int
alpha_issue_rate(void)4692 alpha_issue_rate (void)
4693 {
4694 return (alpha_tune == PROCESSOR_EV4 ? 2 : 4);
4695 }
4696
4697 /* How many alternative schedules to try. This should be as wide as the
4698 scheduling freedom in the DFA, but no wider. Making this value too
4699 large results extra work for the scheduler.
4700
4701 For EV4, loads can be issued to either IB0 or IB1, thus we have 2
4702 alternative schedules. For EV5, we can choose between E0/E1 and
4703 FA/FM. For EV6, an arithmetic insn can be issued to U0/U1/L0/L1. */
4704
4705 static int
alpha_multipass_dfa_lookahead(void)4706 alpha_multipass_dfa_lookahead (void)
4707 {
4708 return (alpha_tune == PROCESSOR_EV6 ? 4 : 2);
4709 }
4710
4711 /* Machine-specific function data. */
4712
4713 struct GTY(()) alpha_links;
4714
4715 struct GTY(()) machine_function
4716 {
4717 /* For OSF. */
4718 const char *some_ld_name;
4719
4720 /* For flag_reorder_blocks_and_partition. */
4721 rtx gp_save_rtx;
4722
4723 /* For VMS condition handlers. */
4724 bool uses_condition_handler;
4725
4726 /* Linkage entries. */
4727 splay_tree GTY ((param1_is (char *), param2_is (struct alpha_links *)))
4728 links;
4729 };
4730
4731 /* How to allocate a 'struct machine_function'. */
4732
4733 static struct machine_function *
alpha_init_machine_status(void)4734 alpha_init_machine_status (void)
4735 {
4736 return ggc_alloc_cleared_machine_function ();
4737 }
4738
4739 /* Support for frame based VMS condition handlers. */
4740
4741 /* A VMS condition handler may be established for a function with a call to
4742 __builtin_establish_vms_condition_handler, and cancelled with a call to
4743 __builtin_revert_vms_condition_handler.
4744
4745 The VMS Condition Handling Facility knows about the existence of a handler
4746 from the procedure descriptor .handler field. As the VMS native compilers,
4747 we store the user specified handler's address at a fixed location in the
4748 stack frame and point the procedure descriptor at a common wrapper which
4749 fetches the real handler's address and issues an indirect call.
4750
4751 The indirection wrapper is "__gcc_shell_handler", provided by libgcc.
4752
4753 We force the procedure kind to PT_STACK, and the fixed frame location is
4754 fp+8, just before the register save area. We use the handler_data field in
4755 the procedure descriptor to state the fp offset at which the installed
4756 handler address can be found. */
4757
4758 #define VMS_COND_HANDLER_FP_OFFSET 8
4759
4760 /* Expand code to store the currently installed user VMS condition handler
4761 into TARGET and install HANDLER as the new condition handler. */
4762
4763 void
alpha_expand_builtin_establish_vms_condition_handler(rtx target,rtx handler)4764 alpha_expand_builtin_establish_vms_condition_handler (rtx target, rtx handler)
4765 {
4766 rtx handler_slot_address = plus_constant (Pmode, hard_frame_pointer_rtx,
4767 VMS_COND_HANDLER_FP_OFFSET);
4768
4769 rtx handler_slot
4770 = gen_rtx_MEM (DImode, handler_slot_address);
4771
4772 emit_move_insn (target, handler_slot);
4773 emit_move_insn (handler_slot, handler);
4774
4775 /* Notify the start/prologue/epilogue emitters that the condition handler
4776 slot is needed. In addition to reserving the slot space, this will force
4777 the procedure kind to PT_STACK so ensure that the hard_frame_pointer_rtx
4778 use above is correct. */
4779 cfun->machine->uses_condition_handler = true;
4780 }
4781
4782 /* Expand code to store the current VMS condition handler into TARGET and
4783 nullify it. */
4784
4785 void
alpha_expand_builtin_revert_vms_condition_handler(rtx target)4786 alpha_expand_builtin_revert_vms_condition_handler (rtx target)
4787 {
4788 /* We implement this by establishing a null condition handler, with the tiny
4789 side effect of setting uses_condition_handler. This is a little bit
4790 pessimistic if no actual builtin_establish call is ever issued, which is
4791 not a real problem and expected never to happen anyway. */
4792
4793 alpha_expand_builtin_establish_vms_condition_handler (target, const0_rtx);
4794 }
4795
4796 /* Functions to save and restore alpha_return_addr_rtx. */
4797
4798 /* Start the ball rolling with RETURN_ADDR_RTX. */
4799
4800 rtx
alpha_return_addr(int count,rtx frame ATTRIBUTE_UNUSED)4801 alpha_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
4802 {
4803 if (count != 0)
4804 return const0_rtx;
4805
4806 return get_hard_reg_initial_val (Pmode, REG_RA);
4807 }
4808
4809 /* Return or create a memory slot containing the gp value for the current
4810 function. Needed only if TARGET_LD_BUGGY_LDGP. */
4811
4812 rtx
alpha_gp_save_rtx(void)4813 alpha_gp_save_rtx (void)
4814 {
4815 rtx seq, m = cfun->machine->gp_save_rtx;
4816
4817 if (m == NULL)
4818 {
4819 start_sequence ();
4820
4821 m = assign_stack_local (DImode, UNITS_PER_WORD, BITS_PER_WORD);
4822 m = validize_mem (m);
4823 emit_move_insn (m, pic_offset_table_rtx);
4824
4825 seq = get_insns ();
4826 end_sequence ();
4827
4828 /* We used to simply emit the sequence after entry_of_function.
4829 However this breaks the CFG if the first instruction in the
4830 first block is not the NOTE_INSN_BASIC_BLOCK, for example a
4831 label. Emit the sequence properly on the edge. We are only
4832 invoked from dw2_build_landing_pads and finish_eh_generation
4833 will call commit_edge_insertions thanks to a kludge. */
4834 insert_insn_on_edge (seq, single_succ_edge (ENTRY_BLOCK_PTR));
4835
4836 cfun->machine->gp_save_rtx = m;
4837 }
4838
4839 return m;
4840 }
4841
4842 static void
alpha_instantiate_decls(void)4843 alpha_instantiate_decls (void)
4844 {
4845 if (cfun->machine->gp_save_rtx != NULL_RTX)
4846 instantiate_decl_rtl (cfun->machine->gp_save_rtx);
4847 }
4848
4849 static int
alpha_ra_ever_killed(void)4850 alpha_ra_ever_killed (void)
4851 {
4852 rtx top;
4853
4854 if (!has_hard_reg_initial_val (Pmode, REG_RA))
4855 return (int)df_regs_ever_live_p (REG_RA);
4856
4857 push_topmost_sequence ();
4858 top = get_insns ();
4859 pop_topmost_sequence ();
4860
4861 return reg_set_between_p (gen_rtx_REG (Pmode, REG_RA), top, NULL_RTX);
4862 }
4863
4864
4865 /* Return the trap mode suffix applicable to the current
4866 instruction, or NULL. */
4867
4868 static const char *
get_trap_mode_suffix(void)4869 get_trap_mode_suffix (void)
4870 {
4871 enum attr_trap_suffix s = get_attr_trap_suffix (current_output_insn);
4872
4873 switch (s)
4874 {
4875 case TRAP_SUFFIX_NONE:
4876 return NULL;
4877
4878 case TRAP_SUFFIX_SU:
4879 if (alpha_fptm >= ALPHA_FPTM_SU)
4880 return "su";
4881 return NULL;
4882
4883 case TRAP_SUFFIX_SUI:
4884 if (alpha_fptm >= ALPHA_FPTM_SUI)
4885 return "sui";
4886 return NULL;
4887
4888 case TRAP_SUFFIX_V_SV:
4889 switch (alpha_fptm)
4890 {
4891 case ALPHA_FPTM_N:
4892 return NULL;
4893 case ALPHA_FPTM_U:
4894 return "v";
4895 case ALPHA_FPTM_SU:
4896 case ALPHA_FPTM_SUI:
4897 return "sv";
4898 default:
4899 gcc_unreachable ();
4900 }
4901
4902 case TRAP_SUFFIX_V_SV_SVI:
4903 switch (alpha_fptm)
4904 {
4905 case ALPHA_FPTM_N:
4906 return NULL;
4907 case ALPHA_FPTM_U:
4908 return "v";
4909 case ALPHA_FPTM_SU:
4910 return "sv";
4911 case ALPHA_FPTM_SUI:
4912 return "svi";
4913 default:
4914 gcc_unreachable ();
4915 }
4916 break;
4917
4918 case TRAP_SUFFIX_U_SU_SUI:
4919 switch (alpha_fptm)
4920 {
4921 case ALPHA_FPTM_N:
4922 return NULL;
4923 case ALPHA_FPTM_U:
4924 return "u";
4925 case ALPHA_FPTM_SU:
4926 return "su";
4927 case ALPHA_FPTM_SUI:
4928 return "sui";
4929 default:
4930 gcc_unreachable ();
4931 }
4932 break;
4933
4934 default:
4935 gcc_unreachable ();
4936 }
4937 gcc_unreachable ();
4938 }
4939
4940 /* Return the rounding mode suffix applicable to the current
4941 instruction, or NULL. */
4942
4943 static const char *
get_round_mode_suffix(void)4944 get_round_mode_suffix (void)
4945 {
4946 enum attr_round_suffix s = get_attr_round_suffix (current_output_insn);
4947
4948 switch (s)
4949 {
4950 case ROUND_SUFFIX_NONE:
4951 return NULL;
4952 case ROUND_SUFFIX_NORMAL:
4953 switch (alpha_fprm)
4954 {
4955 case ALPHA_FPRM_NORM:
4956 return NULL;
4957 case ALPHA_FPRM_MINF:
4958 return "m";
4959 case ALPHA_FPRM_CHOP:
4960 return "c";
4961 case ALPHA_FPRM_DYN:
4962 return "d";
4963 default:
4964 gcc_unreachable ();
4965 }
4966 break;
4967
4968 case ROUND_SUFFIX_C:
4969 return "c";
4970
4971 default:
4972 gcc_unreachable ();
4973 }
4974 gcc_unreachable ();
4975 }
4976
4977 /* Locate some local-dynamic symbol still in use by this function
4978 so that we can print its name in some movdi_er_tlsldm pattern. */
4979
4980 static int
get_some_local_dynamic_name_1(rtx * px,void * data ATTRIBUTE_UNUSED)4981 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
4982 {
4983 rtx x = *px;
4984
4985 if (GET_CODE (x) == SYMBOL_REF
4986 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
4987 {
4988 cfun->machine->some_ld_name = XSTR (x, 0);
4989 return 1;
4990 }
4991
4992 return 0;
4993 }
4994
4995 static const char *
get_some_local_dynamic_name(void)4996 get_some_local_dynamic_name (void)
4997 {
4998 rtx insn;
4999
5000 if (cfun->machine->some_ld_name)
5001 return cfun->machine->some_ld_name;
5002
5003 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
5004 if (INSN_P (insn)
5005 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
5006 return cfun->machine->some_ld_name;
5007
5008 gcc_unreachable ();
5009 }
5010
5011 /* Print an operand. Recognize special options, documented below. */
5012
5013 void
print_operand(FILE * file,rtx x,int code)5014 print_operand (FILE *file, rtx x, int code)
5015 {
5016 int i;
5017
5018 switch (code)
5019 {
5020 case '~':
5021 /* Print the assembler name of the current function. */
5022 assemble_name (file, alpha_fnname);
5023 break;
5024
5025 case '&':
5026 assemble_name (file, get_some_local_dynamic_name ());
5027 break;
5028
5029 case '/':
5030 {
5031 const char *trap = get_trap_mode_suffix ();
5032 const char *round = get_round_mode_suffix ();
5033
5034 if (trap || round)
5035 fprintf (file, "/%s%s", (trap ? trap : ""), (round ? round : ""));
5036 break;
5037 }
5038
5039 case ',':
5040 /* Generates single precision instruction suffix. */
5041 fputc ((TARGET_FLOAT_VAX ? 'f' : 's'), file);
5042 break;
5043
5044 case '-':
5045 /* Generates double precision instruction suffix. */
5046 fputc ((TARGET_FLOAT_VAX ? 'g' : 't'), file);
5047 break;
5048
5049 case '#':
5050 if (alpha_this_literal_sequence_number == 0)
5051 alpha_this_literal_sequence_number = alpha_next_sequence_number++;
5052 fprintf (file, "%d", alpha_this_literal_sequence_number);
5053 break;
5054
5055 case '*':
5056 if (alpha_this_gpdisp_sequence_number == 0)
5057 alpha_this_gpdisp_sequence_number = alpha_next_sequence_number++;
5058 fprintf (file, "%d", alpha_this_gpdisp_sequence_number);
5059 break;
5060
5061 case 'H':
5062 if (GET_CODE (x) == HIGH)
5063 output_addr_const (file, XEXP (x, 0));
5064 else
5065 output_operand_lossage ("invalid %%H value");
5066 break;
5067
5068 case 'J':
5069 {
5070 const char *lituse;
5071
5072 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSGD_CALL)
5073 {
5074 x = XVECEXP (x, 0, 0);
5075 lituse = "lituse_tlsgd";
5076 }
5077 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSLDM_CALL)
5078 {
5079 x = XVECEXP (x, 0, 0);
5080 lituse = "lituse_tlsldm";
5081 }
5082 else if (CONST_INT_P (x))
5083 lituse = "lituse_jsr";
5084 else
5085 {
5086 output_operand_lossage ("invalid %%J value");
5087 break;
5088 }
5089
5090 if (x != const0_rtx)
5091 fprintf (file, "\t\t!%s!%d", lituse, (int) INTVAL (x));
5092 }
5093 break;
5094
5095 case 'j':
5096 {
5097 const char *lituse;
5098
5099 #ifdef HAVE_AS_JSRDIRECT_RELOCS
5100 lituse = "lituse_jsrdirect";
5101 #else
5102 lituse = "lituse_jsr";
5103 #endif
5104
5105 gcc_assert (INTVAL (x) != 0);
5106 fprintf (file, "\t\t!%s!%d", lituse, (int) INTVAL (x));
5107 }
5108 break;
5109 case 'r':
5110 /* If this operand is the constant zero, write it as "$31". */
5111 if (REG_P (x))
5112 fprintf (file, "%s", reg_names[REGNO (x)]);
5113 else if (x == CONST0_RTX (GET_MODE (x)))
5114 fprintf (file, "$31");
5115 else
5116 output_operand_lossage ("invalid %%r value");
5117 break;
5118
5119 case 'R':
5120 /* Similar, but for floating-point. */
5121 if (REG_P (x))
5122 fprintf (file, "%s", reg_names[REGNO (x)]);
5123 else if (x == CONST0_RTX (GET_MODE (x)))
5124 fprintf (file, "$f31");
5125 else
5126 output_operand_lossage ("invalid %%R value");
5127 break;
5128
5129 case 'N':
5130 /* Write the 1's complement of a constant. */
5131 if (!CONST_INT_P (x))
5132 output_operand_lossage ("invalid %%N value");
5133
5134 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~ INTVAL (x));
5135 break;
5136
5137 case 'P':
5138 /* Write 1 << C, for a constant C. */
5139 if (!CONST_INT_P (x))
5140 output_operand_lossage ("invalid %%P value");
5141
5142 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (HOST_WIDE_INT) 1 << INTVAL (x));
5143 break;
5144
5145 case 'h':
5146 /* Write the high-order 16 bits of a constant, sign-extended. */
5147 if (!CONST_INT_P (x))
5148 output_operand_lossage ("invalid %%h value");
5149
5150 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) >> 16);
5151 break;
5152
5153 case 'L':
5154 /* Write the low-order 16 bits of a constant, sign-extended. */
5155 if (!CONST_INT_P (x))
5156 output_operand_lossage ("invalid %%L value");
5157
5158 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
5159 (INTVAL (x) & 0xffff) - 2 * (INTVAL (x) & 0x8000));
5160 break;
5161
5162 case 'm':
5163 /* Write mask for ZAP insn. */
5164 if (GET_CODE (x) == CONST_DOUBLE)
5165 {
5166 HOST_WIDE_INT mask = 0;
5167 HOST_WIDE_INT value;
5168
5169 value = CONST_DOUBLE_LOW (x);
5170 for (i = 0; i < HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR;
5171 i++, value >>= 8)
5172 if (value & 0xff)
5173 mask |= (1 << i);
5174
5175 value = CONST_DOUBLE_HIGH (x);
5176 for (i = 0; i < HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR;
5177 i++, value >>= 8)
5178 if (value & 0xff)
5179 mask |= (1 << (i + sizeof (int)));
5180
5181 fprintf (file, HOST_WIDE_INT_PRINT_DEC, mask & 0xff);
5182 }
5183
5184 else if (CONST_INT_P (x))
5185 {
5186 HOST_WIDE_INT mask = 0, value = INTVAL (x);
5187
5188 for (i = 0; i < 8; i++, value >>= 8)
5189 if (value & 0xff)
5190 mask |= (1 << i);
5191
5192 fprintf (file, HOST_WIDE_INT_PRINT_DEC, mask);
5193 }
5194 else
5195 output_operand_lossage ("invalid %%m value");
5196 break;
5197
5198 case 'M':
5199 /* 'b', 'w', 'l', or 'q' as the value of the constant. */
5200 if (!CONST_INT_P (x)
5201 || (INTVAL (x) != 8 && INTVAL (x) != 16
5202 && INTVAL (x) != 32 && INTVAL (x) != 64))
5203 output_operand_lossage ("invalid %%M value");
5204
5205 fprintf (file, "%s",
5206 (INTVAL (x) == 8 ? "b"
5207 : INTVAL (x) == 16 ? "w"
5208 : INTVAL (x) == 32 ? "l"
5209 : "q"));
5210 break;
5211
5212 case 'U':
5213 /* Similar, except do it from the mask. */
5214 if (CONST_INT_P (x))
5215 {
5216 HOST_WIDE_INT value = INTVAL (x);
5217
5218 if (value == 0xff)
5219 {
5220 fputc ('b', file);
5221 break;
5222 }
5223 if (value == 0xffff)
5224 {
5225 fputc ('w', file);
5226 break;
5227 }
5228 if (value == 0xffffffff)
5229 {
5230 fputc ('l', file);
5231 break;
5232 }
5233 if (value == -1)
5234 {
5235 fputc ('q', file);
5236 break;
5237 }
5238 }
5239 else if (HOST_BITS_PER_WIDE_INT == 32
5240 && GET_CODE (x) == CONST_DOUBLE
5241 && CONST_DOUBLE_LOW (x) == 0xffffffff
5242 && CONST_DOUBLE_HIGH (x) == 0)
5243 {
5244 fputc ('l', file);
5245 break;
5246 }
5247 output_operand_lossage ("invalid %%U value");
5248 break;
5249
5250 case 's':
5251 /* Write the constant value divided by 8. */
5252 if (!CONST_INT_P (x)
5253 || (unsigned HOST_WIDE_INT) INTVAL (x) >= 64
5254 || (INTVAL (x) & 7) != 0)
5255 output_operand_lossage ("invalid %%s value");
5256
5257 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) / 8);
5258 break;
5259
5260 case 'S':
5261 /* Same, except compute (64 - c) / 8 */
5262
5263 if (!CONST_INT_P (x)
5264 && (unsigned HOST_WIDE_INT) INTVAL (x) >= 64
5265 && (INTVAL (x) & 7) != 8)
5266 output_operand_lossage ("invalid %%s value");
5267
5268 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (64 - INTVAL (x)) / 8);
5269 break;
5270
5271 case 'C': case 'D': case 'c': case 'd':
5272 /* Write out comparison name. */
5273 {
5274 enum rtx_code c = GET_CODE (x);
5275
5276 if (!COMPARISON_P (x))
5277 output_operand_lossage ("invalid %%C value");
5278
5279 else if (code == 'D')
5280 c = reverse_condition (c);
5281 else if (code == 'c')
5282 c = swap_condition (c);
5283 else if (code == 'd')
5284 c = swap_condition (reverse_condition (c));
5285
5286 if (c == LEU)
5287 fprintf (file, "ule");
5288 else if (c == LTU)
5289 fprintf (file, "ult");
5290 else if (c == UNORDERED)
5291 fprintf (file, "un");
5292 else
5293 fprintf (file, "%s", GET_RTX_NAME (c));
5294 }
5295 break;
5296
5297 case 'E':
5298 /* Write the divide or modulus operator. */
5299 switch (GET_CODE (x))
5300 {
5301 case DIV:
5302 fprintf (file, "div%s", GET_MODE (x) == SImode ? "l" : "q");
5303 break;
5304 case UDIV:
5305 fprintf (file, "div%su", GET_MODE (x) == SImode ? "l" : "q");
5306 break;
5307 case MOD:
5308 fprintf (file, "rem%s", GET_MODE (x) == SImode ? "l" : "q");
5309 break;
5310 case UMOD:
5311 fprintf (file, "rem%su", GET_MODE (x) == SImode ? "l" : "q");
5312 break;
5313 default:
5314 output_operand_lossage ("invalid %%E value");
5315 break;
5316 }
5317 break;
5318
5319 case 'A':
5320 /* Write "_u" for unaligned access. */
5321 if (MEM_P (x) && GET_CODE (XEXP (x, 0)) == AND)
5322 fprintf (file, "_u");
5323 break;
5324
5325 case 0:
5326 if (REG_P (x))
5327 fprintf (file, "%s", reg_names[REGNO (x)]);
5328 else if (MEM_P (x))
5329 output_address (XEXP (x, 0));
5330 else if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == UNSPEC)
5331 {
5332 switch (XINT (XEXP (x, 0), 1))
5333 {
5334 case UNSPEC_DTPREL:
5335 case UNSPEC_TPREL:
5336 output_addr_const (file, XVECEXP (XEXP (x, 0), 0, 0));
5337 break;
5338 default:
5339 output_operand_lossage ("unknown relocation unspec");
5340 break;
5341 }
5342 }
5343 else
5344 output_addr_const (file, x);
5345 break;
5346
5347 default:
5348 output_operand_lossage ("invalid %%xn code");
5349 }
5350 }
5351
5352 void
print_operand_address(FILE * file,rtx addr)5353 print_operand_address (FILE *file, rtx addr)
5354 {
5355 int basereg = 31;
5356 HOST_WIDE_INT offset = 0;
5357
5358 if (GET_CODE (addr) == AND)
5359 addr = XEXP (addr, 0);
5360
5361 if (GET_CODE (addr) == PLUS
5362 && CONST_INT_P (XEXP (addr, 1)))
5363 {
5364 offset = INTVAL (XEXP (addr, 1));
5365 addr = XEXP (addr, 0);
5366 }
5367
5368 if (GET_CODE (addr) == LO_SUM)
5369 {
5370 const char *reloc16, *reloclo;
5371 rtx op1 = XEXP (addr, 1);
5372
5373 if (GET_CODE (op1) == CONST && GET_CODE (XEXP (op1, 0)) == UNSPEC)
5374 {
5375 op1 = XEXP (op1, 0);
5376 switch (XINT (op1, 1))
5377 {
5378 case UNSPEC_DTPREL:
5379 reloc16 = NULL;
5380 reloclo = (alpha_tls_size == 16 ? "dtprel" : "dtprello");
5381 break;
5382 case UNSPEC_TPREL:
5383 reloc16 = NULL;
5384 reloclo = (alpha_tls_size == 16 ? "tprel" : "tprello");
5385 break;
5386 default:
5387 output_operand_lossage ("unknown relocation unspec");
5388 return;
5389 }
5390
5391 output_addr_const (file, XVECEXP (op1, 0, 0));
5392 }
5393 else
5394 {
5395 reloc16 = "gprel";
5396 reloclo = "gprellow";
5397 output_addr_const (file, op1);
5398 }
5399
5400 if (offset)
5401 fprintf (file, "+" HOST_WIDE_INT_PRINT_DEC, offset);
5402
5403 addr = XEXP (addr, 0);
5404 switch (GET_CODE (addr))
5405 {
5406 case REG:
5407 basereg = REGNO (addr);
5408 break;
5409
5410 case SUBREG:
5411 basereg = subreg_regno (addr);
5412 break;
5413
5414 default:
5415 gcc_unreachable ();
5416 }
5417
5418 fprintf (file, "($%d)\t\t!%s", basereg,
5419 (basereg == 29 ? reloc16 : reloclo));
5420 return;
5421 }
5422
5423 switch (GET_CODE (addr))
5424 {
5425 case REG:
5426 basereg = REGNO (addr);
5427 break;
5428
5429 case SUBREG:
5430 basereg = subreg_regno (addr);
5431 break;
5432
5433 case CONST_INT:
5434 offset = INTVAL (addr);
5435 break;
5436
5437 #if TARGET_ABI_OPEN_VMS
5438 case SYMBOL_REF:
5439 fprintf (file, "%s", XSTR (addr, 0));
5440 return;
5441
5442 case CONST:
5443 gcc_assert (GET_CODE (XEXP (addr, 0)) == PLUS
5444 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF);
5445 fprintf (file, "%s+" HOST_WIDE_INT_PRINT_DEC,
5446 XSTR (XEXP (XEXP (addr, 0), 0), 0),
5447 INTVAL (XEXP (XEXP (addr, 0), 1)));
5448 return;
5449
5450 #endif
5451 default:
5452 gcc_unreachable ();
5453 }
5454
5455 fprintf (file, HOST_WIDE_INT_PRINT_DEC "($%d)", offset, basereg);
5456 }
5457
5458 /* Emit RTL insns to initialize the variable parts of a trampoline at
5459 M_TRAMP. FNDECL is target function's decl. CHAIN_VALUE is an rtx
5460 for the static chain value for the function. */
5461
5462 static void
alpha_trampoline_init(rtx m_tramp,tree fndecl,rtx chain_value)5463 alpha_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
5464 {
5465 rtx fnaddr, mem, word1, word2;
5466
5467 fnaddr = XEXP (DECL_RTL (fndecl), 0);
5468
5469 #ifdef POINTERS_EXTEND_UNSIGNED
5470 fnaddr = convert_memory_address (Pmode, fnaddr);
5471 chain_value = convert_memory_address (Pmode, chain_value);
5472 #endif
5473
5474 if (TARGET_ABI_OPEN_VMS)
5475 {
5476 const char *fnname;
5477 char *trname;
5478
5479 /* Construct the name of the trampoline entry point. */
5480 fnname = XSTR (fnaddr, 0);
5481 trname = (char *) alloca (strlen (fnname) + 5);
5482 strcpy (trname, fnname);
5483 strcat (trname, "..tr");
5484 fnname = ggc_alloc_string (trname, strlen (trname) + 1);
5485 word2 = gen_rtx_SYMBOL_REF (Pmode, fnname);
5486
5487 /* Trampoline (or "bounded") procedure descriptor is constructed from
5488 the function's procedure descriptor with certain fields zeroed IAW
5489 the VMS calling standard. This is stored in the first quadword. */
5490 word1 = force_reg (DImode, gen_const_mem (DImode, fnaddr));
5491 word1 = expand_and (DImode, word1,
5492 GEN_INT (HOST_WIDE_INT_C (0xffff0fff0000fff0)),
5493 NULL);
5494 }
5495 else
5496 {
5497 /* These 4 instructions are:
5498 ldq $1,24($27)
5499 ldq $27,16($27)
5500 jmp $31,($27),0
5501 nop
5502 We don't bother setting the HINT field of the jump; the nop
5503 is merely there for padding. */
5504 word1 = GEN_INT (HOST_WIDE_INT_C (0xa77b0010a43b0018));
5505 word2 = GEN_INT (HOST_WIDE_INT_C (0x47ff041f6bfb0000));
5506 }
5507
5508 /* Store the first two words, as computed above. */
5509 mem = adjust_address (m_tramp, DImode, 0);
5510 emit_move_insn (mem, word1);
5511 mem = adjust_address (m_tramp, DImode, 8);
5512 emit_move_insn (mem, word2);
5513
5514 /* Store function address and static chain value. */
5515 mem = adjust_address (m_tramp, Pmode, 16);
5516 emit_move_insn (mem, fnaddr);
5517 mem = adjust_address (m_tramp, Pmode, 24);
5518 emit_move_insn (mem, chain_value);
5519
5520 if (TARGET_ABI_OSF)
5521 {
5522 emit_insn (gen_imb ());
5523 #ifdef HAVE_ENABLE_EXECUTE_STACK
5524 emit_library_call (init_one_libfunc ("__enable_execute_stack"),
5525 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
5526 #endif
5527 }
5528 }
5529
5530 /* Determine where to put an argument to a function.
5531 Value is zero to push the argument on the stack,
5532 or a hard register in which to store the argument.
5533
5534 MODE is the argument's machine mode.
5535 TYPE is the data type of the argument (as a tree).
5536 This is null for libcalls where that information may
5537 not be available.
5538 CUM is a variable of type CUMULATIVE_ARGS which gives info about
5539 the preceding args and about the function being called.
5540 NAMED is nonzero if this argument is a named parameter
5541 (otherwise it is an extra parameter matching an ellipsis).
5542
5543 On Alpha the first 6 words of args are normally in registers
5544 and the rest are pushed. */
5545
5546 static rtx
alpha_function_arg(cumulative_args_t cum_v,enum machine_mode mode,const_tree type,bool named ATTRIBUTE_UNUSED)5547 alpha_function_arg (cumulative_args_t cum_v, enum machine_mode mode,
5548 const_tree type, bool named ATTRIBUTE_UNUSED)
5549 {
5550 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
5551 int basereg;
5552 int num_args;
5553
5554 /* Don't get confused and pass small structures in FP registers. */
5555 if (type && AGGREGATE_TYPE_P (type))
5556 basereg = 16;
5557 else
5558 {
5559 #ifdef ENABLE_CHECKING
5560 /* With alpha_split_complex_arg, we shouldn't see any raw complex
5561 values here. */
5562 gcc_assert (!COMPLEX_MODE_P (mode));
5563 #endif
5564
5565 /* Set up defaults for FP operands passed in FP registers, and
5566 integral operands passed in integer registers. */
5567 if (TARGET_FPREGS && GET_MODE_CLASS (mode) == MODE_FLOAT)
5568 basereg = 32 + 16;
5569 else
5570 basereg = 16;
5571 }
5572
5573 /* ??? Irritatingly, the definition of CUMULATIVE_ARGS is different for
5574 the two platforms, so we can't avoid conditional compilation. */
5575 #if TARGET_ABI_OPEN_VMS
5576 {
5577 if (mode == VOIDmode)
5578 return alpha_arg_info_reg_val (*cum);
5579
5580 num_args = cum->num_args;
5581 if (num_args >= 6
5582 || targetm.calls.must_pass_in_stack (mode, type))
5583 return NULL_RTX;
5584 }
5585 #elif TARGET_ABI_OSF
5586 {
5587 if (*cum >= 6)
5588 return NULL_RTX;
5589 num_args = *cum;
5590
5591 /* VOID is passed as a special flag for "last argument". */
5592 if (type == void_type_node)
5593 basereg = 16;
5594 else if (targetm.calls.must_pass_in_stack (mode, type))
5595 return NULL_RTX;
5596 }
5597 #else
5598 #error Unhandled ABI
5599 #endif
5600
5601 return gen_rtx_REG (mode, num_args + basereg);
5602 }
5603
5604 /* Update the data in CUM to advance over an argument
5605 of mode MODE and data type TYPE.
5606 (TYPE is null for libcalls where that information may not be available.) */
5607
5608 static void
alpha_function_arg_advance(cumulative_args_t cum_v,enum machine_mode mode,const_tree type,bool named ATTRIBUTE_UNUSED)5609 alpha_function_arg_advance (cumulative_args_t cum_v, enum machine_mode mode,
5610 const_tree type, bool named ATTRIBUTE_UNUSED)
5611 {
5612 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
5613 bool onstack = targetm.calls.must_pass_in_stack (mode, type);
5614 int increment = onstack ? 6 : ALPHA_ARG_SIZE (mode, type, named);
5615
5616 #if TARGET_ABI_OSF
5617 *cum += increment;
5618 #else
5619 if (!onstack && cum->num_args < 6)
5620 cum->atypes[cum->num_args] = alpha_arg_type (mode);
5621 cum->num_args += increment;
5622 #endif
5623 }
5624
5625 static int
alpha_arg_partial_bytes(cumulative_args_t cum_v,enum machine_mode mode ATTRIBUTE_UNUSED,tree type ATTRIBUTE_UNUSED,bool named ATTRIBUTE_UNUSED)5626 alpha_arg_partial_bytes (cumulative_args_t cum_v,
5627 enum machine_mode mode ATTRIBUTE_UNUSED,
5628 tree type ATTRIBUTE_UNUSED,
5629 bool named ATTRIBUTE_UNUSED)
5630 {
5631 int words = 0;
5632 CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED = get_cumulative_args (cum_v);
5633
5634 #if TARGET_ABI_OPEN_VMS
5635 if (cum->num_args < 6
5636 && 6 < cum->num_args + ALPHA_ARG_SIZE (mode, type, named))
5637 words = 6 - cum->num_args;
5638 #elif TARGET_ABI_OSF
5639 if (*cum < 6 && 6 < *cum + ALPHA_ARG_SIZE (mode, type, named))
5640 words = 6 - *cum;
5641 #else
5642 #error Unhandled ABI
5643 #endif
5644
5645 return words * UNITS_PER_WORD;
5646 }
5647
5648
5649 /* Return true if TYPE must be returned in memory, instead of in registers. */
5650
5651 static bool
alpha_return_in_memory(const_tree type,const_tree fndecl ATTRIBUTE_UNUSED)5652 alpha_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED)
5653 {
5654 enum machine_mode mode = VOIDmode;
5655 int size;
5656
5657 if (type)
5658 {
5659 mode = TYPE_MODE (type);
5660
5661 /* All aggregates are returned in memory, except on OpenVMS where
5662 records that fit 64 bits should be returned by immediate value
5663 as required by section 3.8.7.1 of the OpenVMS Calling Standard. */
5664 if (TARGET_ABI_OPEN_VMS
5665 && TREE_CODE (type) != ARRAY_TYPE
5666 && (unsigned HOST_WIDE_INT) int_size_in_bytes(type) <= 8)
5667 return false;
5668
5669 if (AGGREGATE_TYPE_P (type))
5670 return true;
5671 }
5672
5673 size = GET_MODE_SIZE (mode);
5674 switch (GET_MODE_CLASS (mode))
5675 {
5676 case MODE_VECTOR_FLOAT:
5677 /* Pass all float vectors in memory, like an aggregate. */
5678 return true;
5679
5680 case MODE_COMPLEX_FLOAT:
5681 /* We judge complex floats on the size of their element,
5682 not the size of the whole type. */
5683 size = GET_MODE_UNIT_SIZE (mode);
5684 break;
5685
5686 case MODE_INT:
5687 case MODE_FLOAT:
5688 case MODE_COMPLEX_INT:
5689 case MODE_VECTOR_INT:
5690 break;
5691
5692 default:
5693 /* ??? We get called on all sorts of random stuff from
5694 aggregate_value_p. We must return something, but it's not
5695 clear what's safe to return. Pretend it's a struct I
5696 guess. */
5697 return true;
5698 }
5699
5700 /* Otherwise types must fit in one register. */
5701 return size > UNITS_PER_WORD;
5702 }
5703
5704 /* Return true if TYPE should be passed by invisible reference. */
5705
5706 static bool
alpha_pass_by_reference(cumulative_args_t ca ATTRIBUTE_UNUSED,enum machine_mode mode,const_tree type ATTRIBUTE_UNUSED,bool named ATTRIBUTE_UNUSED)5707 alpha_pass_by_reference (cumulative_args_t ca ATTRIBUTE_UNUSED,
5708 enum machine_mode mode,
5709 const_tree type ATTRIBUTE_UNUSED,
5710 bool named ATTRIBUTE_UNUSED)
5711 {
5712 return mode == TFmode || mode == TCmode;
5713 }
5714
5715 /* Define how to find the value returned by a function. VALTYPE is the
5716 data type of the value (as a tree). If the precise function being
5717 called is known, FUNC is its FUNCTION_DECL; otherwise, FUNC is 0.
5718 MODE is set instead of VALTYPE for libcalls.
5719
5720 On Alpha the value is found in $0 for integer functions and
5721 $f0 for floating-point functions. */
5722
5723 rtx
function_value(const_tree valtype,const_tree func ATTRIBUTE_UNUSED,enum machine_mode mode)5724 function_value (const_tree valtype, const_tree func ATTRIBUTE_UNUSED,
5725 enum machine_mode mode)
5726 {
5727 unsigned int regnum, dummy ATTRIBUTE_UNUSED;
5728 enum mode_class mclass;
5729
5730 gcc_assert (!valtype || !alpha_return_in_memory (valtype, func));
5731
5732 if (valtype)
5733 mode = TYPE_MODE (valtype);
5734
5735 mclass = GET_MODE_CLASS (mode);
5736 switch (mclass)
5737 {
5738 case MODE_INT:
5739 /* Do the same thing as PROMOTE_MODE except for libcalls on VMS,
5740 where we have them returning both SImode and DImode. */
5741 if (!(TARGET_ABI_OPEN_VMS && valtype && AGGREGATE_TYPE_P (valtype)))
5742 PROMOTE_MODE (mode, dummy, valtype);
5743 /* FALLTHRU */
5744
5745 case MODE_COMPLEX_INT:
5746 case MODE_VECTOR_INT:
5747 regnum = 0;
5748 break;
5749
5750 case MODE_FLOAT:
5751 regnum = 32;
5752 break;
5753
5754 case MODE_COMPLEX_FLOAT:
5755 {
5756 enum machine_mode cmode = GET_MODE_INNER (mode);
5757
5758 return gen_rtx_PARALLEL
5759 (VOIDmode,
5760 gen_rtvec (2,
5761 gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_REG (cmode, 32),
5762 const0_rtx),
5763 gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_REG (cmode, 33),
5764 GEN_INT (GET_MODE_SIZE (cmode)))));
5765 }
5766
5767 case MODE_RANDOM:
5768 /* We should only reach here for BLKmode on VMS. */
5769 gcc_assert (TARGET_ABI_OPEN_VMS && mode == BLKmode);
5770 regnum = 0;
5771 break;
5772
5773 default:
5774 gcc_unreachable ();
5775 }
5776
5777 return gen_rtx_REG (mode, regnum);
5778 }
5779
5780 /* TCmode complex values are passed by invisible reference. We
5781 should not split these values. */
5782
5783 static bool
alpha_split_complex_arg(const_tree type)5784 alpha_split_complex_arg (const_tree type)
5785 {
5786 return TYPE_MODE (type) != TCmode;
5787 }
5788
5789 static tree
alpha_build_builtin_va_list(void)5790 alpha_build_builtin_va_list (void)
5791 {
5792 tree base, ofs, space, record, type_decl;
5793
5794 if (TARGET_ABI_OPEN_VMS)
5795 return ptr_type_node;
5796
5797 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
5798 type_decl = build_decl (BUILTINS_LOCATION,
5799 TYPE_DECL, get_identifier ("__va_list_tag"), record);
5800 TYPE_STUB_DECL (record) = type_decl;
5801 TYPE_NAME (record) = type_decl;
5802
5803 /* C++? SET_IS_AGGR_TYPE (record, 1); */
5804
5805 /* Dummy field to prevent alignment warnings. */
5806 space = build_decl (BUILTINS_LOCATION,
5807 FIELD_DECL, NULL_TREE, integer_type_node);
5808 DECL_FIELD_CONTEXT (space) = record;
5809 DECL_ARTIFICIAL (space) = 1;
5810 DECL_IGNORED_P (space) = 1;
5811
5812 ofs = build_decl (BUILTINS_LOCATION,
5813 FIELD_DECL, get_identifier ("__offset"),
5814 integer_type_node);
5815 DECL_FIELD_CONTEXT (ofs) = record;
5816 DECL_CHAIN (ofs) = space;
5817 /* ??? This is a hack, __offset is marked volatile to prevent
5818 DCE that confuses stdarg optimization and results in
5819 gcc.c-torture/execute/stdarg-1.c failure. See PR 41089. */
5820 TREE_THIS_VOLATILE (ofs) = 1;
5821
5822 base = build_decl (BUILTINS_LOCATION,
5823 FIELD_DECL, get_identifier ("__base"),
5824 ptr_type_node);
5825 DECL_FIELD_CONTEXT (base) = record;
5826 DECL_CHAIN (base) = ofs;
5827
5828 TYPE_FIELDS (record) = base;
5829 layout_type (record);
5830
5831 va_list_gpr_counter_field = ofs;
5832 return record;
5833 }
5834
5835 #if TARGET_ABI_OSF
5836 /* Helper function for alpha_stdarg_optimize_hook. Skip over casts
5837 and constant additions. */
5838
5839 static gimple
va_list_skip_additions(tree lhs)5840 va_list_skip_additions (tree lhs)
5841 {
5842 gimple stmt;
5843
5844 for (;;)
5845 {
5846 enum tree_code code;
5847
5848 stmt = SSA_NAME_DEF_STMT (lhs);
5849
5850 if (gimple_code (stmt) == GIMPLE_PHI)
5851 return stmt;
5852
5853 if (!is_gimple_assign (stmt)
5854 || gimple_assign_lhs (stmt) != lhs)
5855 return NULL;
5856
5857 if (TREE_CODE (gimple_assign_rhs1 (stmt)) != SSA_NAME)
5858 return stmt;
5859 code = gimple_assign_rhs_code (stmt);
5860 if (!CONVERT_EXPR_CODE_P (code)
5861 && ((code != PLUS_EXPR && code != POINTER_PLUS_EXPR)
5862 || TREE_CODE (gimple_assign_rhs2 (stmt)) != INTEGER_CST
5863 || !host_integerp (gimple_assign_rhs2 (stmt), 1)))
5864 return stmt;
5865
5866 lhs = gimple_assign_rhs1 (stmt);
5867 }
5868 }
5869
5870 /* Check if LHS = RHS statement is
5871 LHS = *(ap.__base + ap.__offset + cst)
5872 or
5873 LHS = *(ap.__base
5874 + ((ap.__offset + cst <= 47)
5875 ? ap.__offset + cst - 48 : ap.__offset + cst) + cst2).
5876 If the former, indicate that GPR registers are needed,
5877 if the latter, indicate that FPR registers are needed.
5878
5879 Also look for LHS = (*ptr).field, where ptr is one of the forms
5880 listed above.
5881
5882 On alpha, cfun->va_list_gpr_size is used as size of the needed
5883 regs and cfun->va_list_fpr_size is a bitmask, bit 0 set if GPR
5884 registers are needed and bit 1 set if FPR registers are needed.
5885 Return true if va_list references should not be scanned for the
5886 current statement. */
5887
5888 static bool
alpha_stdarg_optimize_hook(struct stdarg_info * si,const_gimple stmt)5889 alpha_stdarg_optimize_hook (struct stdarg_info *si, const_gimple stmt)
5890 {
5891 tree base, offset, rhs;
5892 int offset_arg = 1;
5893 gimple base_stmt;
5894
5895 if (get_gimple_rhs_class (gimple_assign_rhs_code (stmt))
5896 != GIMPLE_SINGLE_RHS)
5897 return false;
5898
5899 rhs = gimple_assign_rhs1 (stmt);
5900 while (handled_component_p (rhs))
5901 rhs = TREE_OPERAND (rhs, 0);
5902 if (TREE_CODE (rhs) != MEM_REF
5903 || TREE_CODE (TREE_OPERAND (rhs, 0)) != SSA_NAME)
5904 return false;
5905
5906 stmt = va_list_skip_additions (TREE_OPERAND (rhs, 0));
5907 if (stmt == NULL
5908 || !is_gimple_assign (stmt)
5909 || gimple_assign_rhs_code (stmt) != POINTER_PLUS_EXPR)
5910 return false;
5911
5912 base = gimple_assign_rhs1 (stmt);
5913 if (TREE_CODE (base) == SSA_NAME)
5914 {
5915 base_stmt = va_list_skip_additions (base);
5916 if (base_stmt
5917 && is_gimple_assign (base_stmt)
5918 && gimple_assign_rhs_code (base_stmt) == COMPONENT_REF)
5919 base = gimple_assign_rhs1 (base_stmt);
5920 }
5921
5922 if (TREE_CODE (base) != COMPONENT_REF
5923 || TREE_OPERAND (base, 1) != TYPE_FIELDS (va_list_type_node))
5924 {
5925 base = gimple_assign_rhs2 (stmt);
5926 if (TREE_CODE (base) == SSA_NAME)
5927 {
5928 base_stmt = va_list_skip_additions (base);
5929 if (base_stmt
5930 && is_gimple_assign (base_stmt)
5931 && gimple_assign_rhs_code (base_stmt) == COMPONENT_REF)
5932 base = gimple_assign_rhs1 (base_stmt);
5933 }
5934
5935 if (TREE_CODE (base) != COMPONENT_REF
5936 || TREE_OPERAND (base, 1) != TYPE_FIELDS (va_list_type_node))
5937 return false;
5938
5939 offset_arg = 0;
5940 }
5941
5942 base = get_base_address (base);
5943 if (TREE_CODE (base) != VAR_DECL
5944 || !bitmap_bit_p (si->va_list_vars, DECL_UID (base) + num_ssa_names))
5945 return false;
5946
5947 offset = gimple_op (stmt, 1 + offset_arg);
5948 if (TREE_CODE (offset) == SSA_NAME)
5949 {
5950 gimple offset_stmt = va_list_skip_additions (offset);
5951
5952 if (offset_stmt
5953 && gimple_code (offset_stmt) == GIMPLE_PHI)
5954 {
5955 HOST_WIDE_INT sub;
5956 gimple arg1_stmt, arg2_stmt;
5957 tree arg1, arg2;
5958 enum tree_code code1, code2;
5959
5960 if (gimple_phi_num_args (offset_stmt) != 2)
5961 goto escapes;
5962
5963 arg1_stmt
5964 = va_list_skip_additions (gimple_phi_arg_def (offset_stmt, 0));
5965 arg2_stmt
5966 = va_list_skip_additions (gimple_phi_arg_def (offset_stmt, 1));
5967 if (arg1_stmt == NULL
5968 || !is_gimple_assign (arg1_stmt)
5969 || arg2_stmt == NULL
5970 || !is_gimple_assign (arg2_stmt))
5971 goto escapes;
5972
5973 code1 = gimple_assign_rhs_code (arg1_stmt);
5974 code2 = gimple_assign_rhs_code (arg2_stmt);
5975 if (code1 == COMPONENT_REF
5976 && (code2 == MINUS_EXPR || code2 == PLUS_EXPR))
5977 /* Do nothing. */;
5978 else if (code2 == COMPONENT_REF
5979 && (code1 == MINUS_EXPR || code1 == PLUS_EXPR))
5980 {
5981 gimple tem = arg1_stmt;
5982 code2 = code1;
5983 arg1_stmt = arg2_stmt;
5984 arg2_stmt = tem;
5985 }
5986 else
5987 goto escapes;
5988
5989 if (!host_integerp (gimple_assign_rhs2 (arg2_stmt), 0))
5990 goto escapes;
5991
5992 sub = tree_low_cst (gimple_assign_rhs2 (arg2_stmt), 0);
5993 if (code2 == MINUS_EXPR)
5994 sub = -sub;
5995 if (sub < -48 || sub > -32)
5996 goto escapes;
5997
5998 arg1 = gimple_assign_rhs1 (arg1_stmt);
5999 arg2 = gimple_assign_rhs1 (arg2_stmt);
6000 if (TREE_CODE (arg2) == SSA_NAME)
6001 {
6002 arg2_stmt = va_list_skip_additions (arg2);
6003 if (arg2_stmt == NULL
6004 || !is_gimple_assign (arg2_stmt)
6005 || gimple_assign_rhs_code (arg2_stmt) != COMPONENT_REF)
6006 goto escapes;
6007 arg2 = gimple_assign_rhs1 (arg2_stmt);
6008 }
6009 if (arg1 != arg2)
6010 goto escapes;
6011
6012 if (TREE_CODE (arg1) != COMPONENT_REF
6013 || TREE_OPERAND (arg1, 1) != va_list_gpr_counter_field
6014 || get_base_address (arg1) != base)
6015 goto escapes;
6016
6017 /* Need floating point regs. */
6018 cfun->va_list_fpr_size |= 2;
6019 return false;
6020 }
6021 if (offset_stmt
6022 && is_gimple_assign (offset_stmt)
6023 && gimple_assign_rhs_code (offset_stmt) == COMPONENT_REF)
6024 offset = gimple_assign_rhs1 (offset_stmt);
6025 }
6026 if (TREE_CODE (offset) != COMPONENT_REF
6027 || TREE_OPERAND (offset, 1) != va_list_gpr_counter_field
6028 || get_base_address (offset) != base)
6029 goto escapes;
6030 else
6031 /* Need general regs. */
6032 cfun->va_list_fpr_size |= 1;
6033 return false;
6034
6035 escapes:
6036 si->va_list_escapes = true;
6037 return false;
6038 }
6039 #endif
6040
6041 /* Perform any needed actions needed for a function that is receiving a
6042 variable number of arguments. */
6043
6044 static void
alpha_setup_incoming_varargs(cumulative_args_t pcum,enum machine_mode mode,tree type,int * pretend_size,int no_rtl)6045 alpha_setup_incoming_varargs (cumulative_args_t pcum, enum machine_mode mode,
6046 tree type, int *pretend_size, int no_rtl)
6047 {
6048 CUMULATIVE_ARGS cum = *get_cumulative_args (pcum);
6049
6050 /* Skip the current argument. */
6051 targetm.calls.function_arg_advance (pack_cumulative_args (&cum), mode, type,
6052 true);
6053
6054 #if TARGET_ABI_OPEN_VMS
6055 /* For VMS, we allocate space for all 6 arg registers plus a count.
6056
6057 However, if NO registers need to be saved, don't allocate any space.
6058 This is not only because we won't need the space, but because AP
6059 includes the current_pretend_args_size and we don't want to mess up
6060 any ap-relative addresses already made. */
6061 if (cum.num_args < 6)
6062 {
6063 if (!no_rtl)
6064 {
6065 emit_move_insn (gen_rtx_REG (DImode, 1), virtual_incoming_args_rtx);
6066 emit_insn (gen_arg_home ());
6067 }
6068 *pretend_size = 7 * UNITS_PER_WORD;
6069 }
6070 #else
6071 /* On OSF/1 and friends, we allocate space for all 12 arg registers, but
6072 only push those that are remaining. However, if NO registers need to
6073 be saved, don't allocate any space. This is not only because we won't
6074 need the space, but because AP includes the current_pretend_args_size
6075 and we don't want to mess up any ap-relative addresses already made.
6076
6077 If we are not to use the floating-point registers, save the integer
6078 registers where we would put the floating-point registers. This is
6079 not the most efficient way to implement varargs with just one register
6080 class, but it isn't worth doing anything more efficient in this rare
6081 case. */
6082 if (cum >= 6)
6083 return;
6084
6085 if (!no_rtl)
6086 {
6087 int count;
6088 alias_set_type set = get_varargs_alias_set ();
6089 rtx tmp;
6090
6091 count = cfun->va_list_gpr_size / UNITS_PER_WORD;
6092 if (count > 6 - cum)
6093 count = 6 - cum;
6094
6095 /* Detect whether integer registers or floating-point registers
6096 are needed by the detected va_arg statements. See above for
6097 how these values are computed. Note that the "escape" value
6098 is VA_LIST_MAX_FPR_SIZE, which is 255, which has both of
6099 these bits set. */
6100 gcc_assert ((VA_LIST_MAX_FPR_SIZE & 3) == 3);
6101
6102 if (cfun->va_list_fpr_size & 1)
6103 {
6104 tmp = gen_rtx_MEM (BLKmode,
6105 plus_constant (Pmode, virtual_incoming_args_rtx,
6106 (cum + 6) * UNITS_PER_WORD));
6107 MEM_NOTRAP_P (tmp) = 1;
6108 set_mem_alias_set (tmp, set);
6109 move_block_from_reg (16 + cum, tmp, count);
6110 }
6111
6112 if (cfun->va_list_fpr_size & 2)
6113 {
6114 tmp = gen_rtx_MEM (BLKmode,
6115 plus_constant (Pmode, virtual_incoming_args_rtx,
6116 cum * UNITS_PER_WORD));
6117 MEM_NOTRAP_P (tmp) = 1;
6118 set_mem_alias_set (tmp, set);
6119 move_block_from_reg (16 + cum + TARGET_FPREGS*32, tmp, count);
6120 }
6121 }
6122 *pretend_size = 12 * UNITS_PER_WORD;
6123 #endif
6124 }
6125
6126 static void
alpha_va_start(tree valist,rtx nextarg ATTRIBUTE_UNUSED)6127 alpha_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
6128 {
6129 HOST_WIDE_INT offset;
6130 tree t, offset_field, base_field;
6131
6132 if (TREE_CODE (TREE_TYPE (valist)) == ERROR_MARK)
6133 return;
6134
6135 /* For Unix, TARGET_SETUP_INCOMING_VARARGS moves the starting address base
6136 up by 48, storing fp arg registers in the first 48 bytes, and the
6137 integer arg registers in the next 48 bytes. This is only done,
6138 however, if any integer registers need to be stored.
6139
6140 If no integer registers need be stored, then we must subtract 48
6141 in order to account for the integer arg registers which are counted
6142 in argsize above, but which are not actually stored on the stack.
6143 Must further be careful here about structures straddling the last
6144 integer argument register; that futzes with pretend_args_size,
6145 which changes the meaning of AP. */
6146
6147 if (NUM_ARGS < 6)
6148 offset = TARGET_ABI_OPEN_VMS ? UNITS_PER_WORD : 6 * UNITS_PER_WORD;
6149 else
6150 offset = -6 * UNITS_PER_WORD + crtl->args.pretend_args_size;
6151
6152 if (TARGET_ABI_OPEN_VMS)
6153 {
6154 t = make_tree (ptr_type_node, virtual_incoming_args_rtx);
6155 t = fold_build_pointer_plus_hwi (t, offset + NUM_ARGS * UNITS_PER_WORD);
6156 t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
6157 TREE_SIDE_EFFECTS (t) = 1;
6158 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6159 }
6160 else
6161 {
6162 base_field = TYPE_FIELDS (TREE_TYPE (valist));
6163 offset_field = DECL_CHAIN (base_field);
6164
6165 base_field = build3 (COMPONENT_REF, TREE_TYPE (base_field),
6166 valist, base_field, NULL_TREE);
6167 offset_field = build3 (COMPONENT_REF, TREE_TYPE (offset_field),
6168 valist, offset_field, NULL_TREE);
6169
6170 t = make_tree (ptr_type_node, virtual_incoming_args_rtx);
6171 t = fold_build_pointer_plus_hwi (t, offset);
6172 t = build2 (MODIFY_EXPR, TREE_TYPE (base_field), base_field, t);
6173 TREE_SIDE_EFFECTS (t) = 1;
6174 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6175
6176 t = build_int_cst (NULL_TREE, NUM_ARGS * UNITS_PER_WORD);
6177 t = build2 (MODIFY_EXPR, TREE_TYPE (offset_field), offset_field, t);
6178 TREE_SIDE_EFFECTS (t) = 1;
6179 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6180 }
6181 }
6182
6183 static tree
alpha_gimplify_va_arg_1(tree type,tree base,tree offset,gimple_seq * pre_p)6184 alpha_gimplify_va_arg_1 (tree type, tree base, tree offset,
6185 gimple_seq *pre_p)
6186 {
6187 tree type_size, ptr_type, addend, t, addr;
6188 gimple_seq internal_post;
6189
6190 /* If the type could not be passed in registers, skip the block
6191 reserved for the registers. */
6192 if (targetm.calls.must_pass_in_stack (TYPE_MODE (type), type))
6193 {
6194 t = build_int_cst (TREE_TYPE (offset), 6*8);
6195 gimplify_assign (offset,
6196 build2 (MAX_EXPR, TREE_TYPE (offset), offset, t),
6197 pre_p);
6198 }
6199
6200 addend = offset;
6201 ptr_type = build_pointer_type_for_mode (type, ptr_mode, true);
6202
6203 if (TREE_CODE (type) == COMPLEX_TYPE)
6204 {
6205 tree real_part, imag_part, real_temp;
6206
6207 real_part = alpha_gimplify_va_arg_1 (TREE_TYPE (type), base,
6208 offset, pre_p);
6209
6210 /* Copy the value into a new temporary, lest the formal temporary
6211 be reused out from under us. */
6212 real_temp = get_initialized_tmp_var (real_part, pre_p, NULL);
6213
6214 imag_part = alpha_gimplify_va_arg_1 (TREE_TYPE (type), base,
6215 offset, pre_p);
6216
6217 return build2 (COMPLEX_EXPR, type, real_temp, imag_part);
6218 }
6219 else if (TREE_CODE (type) == REAL_TYPE)
6220 {
6221 tree fpaddend, cond, fourtyeight;
6222
6223 fourtyeight = build_int_cst (TREE_TYPE (addend), 6*8);
6224 fpaddend = fold_build2 (MINUS_EXPR, TREE_TYPE (addend),
6225 addend, fourtyeight);
6226 cond = fold_build2 (LT_EXPR, boolean_type_node, addend, fourtyeight);
6227 addend = fold_build3 (COND_EXPR, TREE_TYPE (addend), cond,
6228 fpaddend, addend);
6229 }
6230
6231 /* Build the final address and force that value into a temporary. */
6232 addr = fold_build_pointer_plus (fold_convert (ptr_type, base), addend);
6233 internal_post = NULL;
6234 gimplify_expr (&addr, pre_p, &internal_post, is_gimple_val, fb_rvalue);
6235 gimple_seq_add_seq (pre_p, internal_post);
6236
6237 /* Update the offset field. */
6238 type_size = TYPE_SIZE_UNIT (TYPE_MAIN_VARIANT (type));
6239 if (type_size == NULL || TREE_OVERFLOW (type_size))
6240 t = size_zero_node;
6241 else
6242 {
6243 t = size_binop (PLUS_EXPR, type_size, size_int (7));
6244 t = size_binop (TRUNC_DIV_EXPR, t, size_int (8));
6245 t = size_binop (MULT_EXPR, t, size_int (8));
6246 }
6247 t = fold_convert (TREE_TYPE (offset), t);
6248 gimplify_assign (offset, build2 (PLUS_EXPR, TREE_TYPE (offset), offset, t),
6249 pre_p);
6250
6251 return build_va_arg_indirect_ref (addr);
6252 }
6253
6254 static tree
alpha_gimplify_va_arg(tree valist,tree type,gimple_seq * pre_p,gimple_seq * post_p)6255 alpha_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
6256 gimple_seq *post_p)
6257 {
6258 tree offset_field, base_field, offset, base, t, r;
6259 bool indirect;
6260
6261 if (TARGET_ABI_OPEN_VMS)
6262 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
6263
6264 base_field = TYPE_FIELDS (va_list_type_node);
6265 offset_field = DECL_CHAIN (base_field);
6266 base_field = build3 (COMPONENT_REF, TREE_TYPE (base_field),
6267 valist, base_field, NULL_TREE);
6268 offset_field = build3 (COMPONENT_REF, TREE_TYPE (offset_field),
6269 valist, offset_field, NULL_TREE);
6270
6271 /* Pull the fields of the structure out into temporaries. Since we never
6272 modify the base field, we can use a formal temporary. Sign-extend the
6273 offset field so that it's the proper width for pointer arithmetic. */
6274 base = get_formal_tmp_var (base_field, pre_p);
6275
6276 t = fold_convert (build_nonstandard_integer_type (64, 0), offset_field);
6277 offset = get_initialized_tmp_var (t, pre_p, NULL);
6278
6279 indirect = pass_by_reference (NULL, TYPE_MODE (type), type, false);
6280 if (indirect)
6281 type = build_pointer_type_for_mode (type, ptr_mode, true);
6282
6283 /* Find the value. Note that this will be a stable indirection, or
6284 a composite of stable indirections in the case of complex. */
6285 r = alpha_gimplify_va_arg_1 (type, base, offset, pre_p);
6286
6287 /* Stuff the offset temporary back into its field. */
6288 gimplify_assign (unshare_expr (offset_field),
6289 fold_convert (TREE_TYPE (offset_field), offset), pre_p);
6290
6291 if (indirect)
6292 r = build_va_arg_indirect_ref (r);
6293
6294 return r;
6295 }
6296
6297 /* Builtins. */
6298
6299 enum alpha_builtin
6300 {
6301 ALPHA_BUILTIN_CMPBGE,
6302 ALPHA_BUILTIN_EXTBL,
6303 ALPHA_BUILTIN_EXTWL,
6304 ALPHA_BUILTIN_EXTLL,
6305 ALPHA_BUILTIN_EXTQL,
6306 ALPHA_BUILTIN_EXTWH,
6307 ALPHA_BUILTIN_EXTLH,
6308 ALPHA_BUILTIN_EXTQH,
6309 ALPHA_BUILTIN_INSBL,
6310 ALPHA_BUILTIN_INSWL,
6311 ALPHA_BUILTIN_INSLL,
6312 ALPHA_BUILTIN_INSQL,
6313 ALPHA_BUILTIN_INSWH,
6314 ALPHA_BUILTIN_INSLH,
6315 ALPHA_BUILTIN_INSQH,
6316 ALPHA_BUILTIN_MSKBL,
6317 ALPHA_BUILTIN_MSKWL,
6318 ALPHA_BUILTIN_MSKLL,
6319 ALPHA_BUILTIN_MSKQL,
6320 ALPHA_BUILTIN_MSKWH,
6321 ALPHA_BUILTIN_MSKLH,
6322 ALPHA_BUILTIN_MSKQH,
6323 ALPHA_BUILTIN_UMULH,
6324 ALPHA_BUILTIN_ZAP,
6325 ALPHA_BUILTIN_ZAPNOT,
6326 ALPHA_BUILTIN_AMASK,
6327 ALPHA_BUILTIN_IMPLVER,
6328 ALPHA_BUILTIN_RPCC,
6329 ALPHA_BUILTIN_ESTABLISH_VMS_CONDITION_HANDLER,
6330 ALPHA_BUILTIN_REVERT_VMS_CONDITION_HANDLER,
6331
6332 /* TARGET_MAX */
6333 ALPHA_BUILTIN_MINUB8,
6334 ALPHA_BUILTIN_MINSB8,
6335 ALPHA_BUILTIN_MINUW4,
6336 ALPHA_BUILTIN_MINSW4,
6337 ALPHA_BUILTIN_MAXUB8,
6338 ALPHA_BUILTIN_MAXSB8,
6339 ALPHA_BUILTIN_MAXUW4,
6340 ALPHA_BUILTIN_MAXSW4,
6341 ALPHA_BUILTIN_PERR,
6342 ALPHA_BUILTIN_PKLB,
6343 ALPHA_BUILTIN_PKWB,
6344 ALPHA_BUILTIN_UNPKBL,
6345 ALPHA_BUILTIN_UNPKBW,
6346
6347 /* TARGET_CIX */
6348 ALPHA_BUILTIN_CTTZ,
6349 ALPHA_BUILTIN_CTLZ,
6350 ALPHA_BUILTIN_CTPOP,
6351
6352 ALPHA_BUILTIN_max
6353 };
6354
6355 static enum insn_code const code_for_builtin[ALPHA_BUILTIN_max] = {
6356 CODE_FOR_builtin_cmpbge,
6357 CODE_FOR_extbl,
6358 CODE_FOR_extwl,
6359 CODE_FOR_extll,
6360 CODE_FOR_extql,
6361 CODE_FOR_extwh,
6362 CODE_FOR_extlh,
6363 CODE_FOR_extqh,
6364 CODE_FOR_builtin_insbl,
6365 CODE_FOR_builtin_inswl,
6366 CODE_FOR_builtin_insll,
6367 CODE_FOR_insql,
6368 CODE_FOR_inswh,
6369 CODE_FOR_inslh,
6370 CODE_FOR_insqh,
6371 CODE_FOR_mskbl,
6372 CODE_FOR_mskwl,
6373 CODE_FOR_mskll,
6374 CODE_FOR_mskql,
6375 CODE_FOR_mskwh,
6376 CODE_FOR_msklh,
6377 CODE_FOR_mskqh,
6378 CODE_FOR_umuldi3_highpart,
6379 CODE_FOR_builtin_zap,
6380 CODE_FOR_builtin_zapnot,
6381 CODE_FOR_builtin_amask,
6382 CODE_FOR_builtin_implver,
6383 CODE_FOR_builtin_rpcc,
6384 CODE_FOR_builtin_establish_vms_condition_handler,
6385 CODE_FOR_builtin_revert_vms_condition_handler,
6386
6387 /* TARGET_MAX */
6388 CODE_FOR_builtin_minub8,
6389 CODE_FOR_builtin_minsb8,
6390 CODE_FOR_builtin_minuw4,
6391 CODE_FOR_builtin_minsw4,
6392 CODE_FOR_builtin_maxub8,
6393 CODE_FOR_builtin_maxsb8,
6394 CODE_FOR_builtin_maxuw4,
6395 CODE_FOR_builtin_maxsw4,
6396 CODE_FOR_builtin_perr,
6397 CODE_FOR_builtin_pklb,
6398 CODE_FOR_builtin_pkwb,
6399 CODE_FOR_builtin_unpkbl,
6400 CODE_FOR_builtin_unpkbw,
6401
6402 /* TARGET_CIX */
6403 CODE_FOR_ctzdi2,
6404 CODE_FOR_clzdi2,
6405 CODE_FOR_popcountdi2
6406 };
6407
6408 struct alpha_builtin_def
6409 {
6410 const char *name;
6411 enum alpha_builtin code;
6412 unsigned int target_mask;
6413 bool is_const;
6414 };
6415
6416 static struct alpha_builtin_def const zero_arg_builtins[] = {
6417 { "__builtin_alpha_implver", ALPHA_BUILTIN_IMPLVER, 0, true },
6418 { "__builtin_alpha_rpcc", ALPHA_BUILTIN_RPCC, 0, false }
6419 };
6420
6421 static struct alpha_builtin_def const one_arg_builtins[] = {
6422 { "__builtin_alpha_amask", ALPHA_BUILTIN_AMASK, 0, true },
6423 { "__builtin_alpha_pklb", ALPHA_BUILTIN_PKLB, MASK_MAX, true },
6424 { "__builtin_alpha_pkwb", ALPHA_BUILTIN_PKWB, MASK_MAX, true },
6425 { "__builtin_alpha_unpkbl", ALPHA_BUILTIN_UNPKBL, MASK_MAX, true },
6426 { "__builtin_alpha_unpkbw", ALPHA_BUILTIN_UNPKBW, MASK_MAX, true },
6427 { "__builtin_alpha_cttz", ALPHA_BUILTIN_CTTZ, MASK_CIX, true },
6428 { "__builtin_alpha_ctlz", ALPHA_BUILTIN_CTLZ, MASK_CIX, true },
6429 { "__builtin_alpha_ctpop", ALPHA_BUILTIN_CTPOP, MASK_CIX, true }
6430 };
6431
6432 static struct alpha_builtin_def const two_arg_builtins[] = {
6433 { "__builtin_alpha_cmpbge", ALPHA_BUILTIN_CMPBGE, 0, true },
6434 { "__builtin_alpha_extbl", ALPHA_BUILTIN_EXTBL, 0, true },
6435 { "__builtin_alpha_extwl", ALPHA_BUILTIN_EXTWL, 0, true },
6436 { "__builtin_alpha_extll", ALPHA_BUILTIN_EXTLL, 0, true },
6437 { "__builtin_alpha_extql", ALPHA_BUILTIN_EXTQL, 0, true },
6438 { "__builtin_alpha_extwh", ALPHA_BUILTIN_EXTWH, 0, true },
6439 { "__builtin_alpha_extlh", ALPHA_BUILTIN_EXTLH, 0, true },
6440 { "__builtin_alpha_extqh", ALPHA_BUILTIN_EXTQH, 0, true },
6441 { "__builtin_alpha_insbl", ALPHA_BUILTIN_INSBL, 0, true },
6442 { "__builtin_alpha_inswl", ALPHA_BUILTIN_INSWL, 0, true },
6443 { "__builtin_alpha_insll", ALPHA_BUILTIN_INSLL, 0, true },
6444 { "__builtin_alpha_insql", ALPHA_BUILTIN_INSQL, 0, true },
6445 { "__builtin_alpha_inswh", ALPHA_BUILTIN_INSWH, 0, true },
6446 { "__builtin_alpha_inslh", ALPHA_BUILTIN_INSLH, 0, true },
6447 { "__builtin_alpha_insqh", ALPHA_BUILTIN_INSQH, 0, true },
6448 { "__builtin_alpha_mskbl", ALPHA_BUILTIN_MSKBL, 0, true },
6449 { "__builtin_alpha_mskwl", ALPHA_BUILTIN_MSKWL, 0, true },
6450 { "__builtin_alpha_mskll", ALPHA_BUILTIN_MSKLL, 0, true },
6451 { "__builtin_alpha_mskql", ALPHA_BUILTIN_MSKQL, 0, true },
6452 { "__builtin_alpha_mskwh", ALPHA_BUILTIN_MSKWH, 0, true },
6453 { "__builtin_alpha_msklh", ALPHA_BUILTIN_MSKLH, 0, true },
6454 { "__builtin_alpha_mskqh", ALPHA_BUILTIN_MSKQH, 0, true },
6455 { "__builtin_alpha_umulh", ALPHA_BUILTIN_UMULH, 0, true },
6456 { "__builtin_alpha_zap", ALPHA_BUILTIN_ZAP, 0, true },
6457 { "__builtin_alpha_zapnot", ALPHA_BUILTIN_ZAPNOT, 0, true },
6458 { "__builtin_alpha_minub8", ALPHA_BUILTIN_MINUB8, MASK_MAX, true },
6459 { "__builtin_alpha_minsb8", ALPHA_BUILTIN_MINSB8, MASK_MAX, true },
6460 { "__builtin_alpha_minuw4", ALPHA_BUILTIN_MINUW4, MASK_MAX, true },
6461 { "__builtin_alpha_minsw4", ALPHA_BUILTIN_MINSW4, MASK_MAX, true },
6462 { "__builtin_alpha_maxub8", ALPHA_BUILTIN_MAXUB8, MASK_MAX, true },
6463 { "__builtin_alpha_maxsb8", ALPHA_BUILTIN_MAXSB8, MASK_MAX, true },
6464 { "__builtin_alpha_maxuw4", ALPHA_BUILTIN_MAXUW4, MASK_MAX, true },
6465 { "__builtin_alpha_maxsw4", ALPHA_BUILTIN_MAXSW4, MASK_MAX, true },
6466 { "__builtin_alpha_perr", ALPHA_BUILTIN_PERR, MASK_MAX, true }
6467 };
6468
6469 static GTY(()) tree alpha_dimode_u;
6470 static GTY(()) tree alpha_v8qi_u;
6471 static GTY(()) tree alpha_v8qi_s;
6472 static GTY(()) tree alpha_v4hi_u;
6473 static GTY(()) tree alpha_v4hi_s;
6474
6475 static GTY(()) tree alpha_builtins[(int) ALPHA_BUILTIN_max];
6476
6477 /* Return the alpha builtin for CODE. */
6478
6479 static tree
alpha_builtin_decl(unsigned code,bool initialize_p ATTRIBUTE_UNUSED)6480 alpha_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
6481 {
6482 if (code >= ALPHA_BUILTIN_max)
6483 return error_mark_node;
6484 return alpha_builtins[code];
6485 }
6486
6487 /* Helper function of alpha_init_builtins. Add the built-in specified
6488 by NAME, TYPE, CODE, and ECF. */
6489
6490 static void
alpha_builtin_function(const char * name,tree ftype,enum alpha_builtin code,unsigned ecf)6491 alpha_builtin_function (const char *name, tree ftype,
6492 enum alpha_builtin code, unsigned ecf)
6493 {
6494 tree decl = add_builtin_function (name, ftype, (int) code,
6495 BUILT_IN_MD, NULL, NULL_TREE);
6496
6497 if (ecf & ECF_CONST)
6498 TREE_READONLY (decl) = 1;
6499 if (ecf & ECF_NOTHROW)
6500 TREE_NOTHROW (decl) = 1;
6501
6502 alpha_builtins [(int) code] = decl;
6503 }
6504
6505 /* Helper function of alpha_init_builtins. Add the COUNT built-in
6506 functions pointed to by P, with function type FTYPE. */
6507
6508 static void
alpha_add_builtins(const struct alpha_builtin_def * p,size_t count,tree ftype)6509 alpha_add_builtins (const struct alpha_builtin_def *p, size_t count,
6510 tree ftype)
6511 {
6512 size_t i;
6513
6514 for (i = 0; i < count; ++i, ++p)
6515 if ((target_flags & p->target_mask) == p->target_mask)
6516 alpha_builtin_function (p->name, ftype, p->code,
6517 (p->is_const ? ECF_CONST : 0) | ECF_NOTHROW);
6518 }
6519
6520 static void
alpha_init_builtins(void)6521 alpha_init_builtins (void)
6522 {
6523 tree ftype;
6524
6525 alpha_dimode_u = lang_hooks.types.type_for_mode (DImode, 1);
6526 alpha_v8qi_u = build_vector_type (unsigned_intQI_type_node, 8);
6527 alpha_v8qi_s = build_vector_type (intQI_type_node, 8);
6528 alpha_v4hi_u = build_vector_type (unsigned_intHI_type_node, 4);
6529 alpha_v4hi_s = build_vector_type (intHI_type_node, 4);
6530
6531 ftype = build_function_type_list (alpha_dimode_u, NULL_TREE);
6532 alpha_add_builtins (zero_arg_builtins, ARRAY_SIZE (zero_arg_builtins), ftype);
6533
6534 ftype = build_function_type_list (alpha_dimode_u, alpha_dimode_u, NULL_TREE);
6535 alpha_add_builtins (one_arg_builtins, ARRAY_SIZE (one_arg_builtins), ftype);
6536
6537 ftype = build_function_type_list (alpha_dimode_u, alpha_dimode_u,
6538 alpha_dimode_u, NULL_TREE);
6539 alpha_add_builtins (two_arg_builtins, ARRAY_SIZE (two_arg_builtins), ftype);
6540
6541 if (TARGET_ABI_OPEN_VMS)
6542 {
6543 ftype = build_function_type_list (ptr_type_node, ptr_type_node,
6544 NULL_TREE);
6545 alpha_builtin_function ("__builtin_establish_vms_condition_handler",
6546 ftype,
6547 ALPHA_BUILTIN_ESTABLISH_VMS_CONDITION_HANDLER,
6548 0);
6549
6550 ftype = build_function_type_list (ptr_type_node, void_type_node,
6551 NULL_TREE);
6552 alpha_builtin_function ("__builtin_revert_vms_condition_handler", ftype,
6553 ALPHA_BUILTIN_REVERT_VMS_CONDITION_HANDLER, 0);
6554
6555 vms_patch_builtins ();
6556 }
6557 }
6558
6559 /* Expand an expression EXP that calls a built-in function,
6560 with result going to TARGET if that's convenient
6561 (and in mode MODE if that's convenient).
6562 SUBTARGET may be used as the target for computing one of EXP's operands.
6563 IGNORE is nonzero if the value is to be ignored. */
6564
6565 static rtx
alpha_expand_builtin(tree exp,rtx target,rtx subtarget ATTRIBUTE_UNUSED,enum machine_mode mode ATTRIBUTE_UNUSED,int ignore ATTRIBUTE_UNUSED)6566 alpha_expand_builtin (tree exp, rtx target,
6567 rtx subtarget ATTRIBUTE_UNUSED,
6568 enum machine_mode mode ATTRIBUTE_UNUSED,
6569 int ignore ATTRIBUTE_UNUSED)
6570 {
6571 #define MAX_ARGS 2
6572
6573 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
6574 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
6575 tree arg;
6576 call_expr_arg_iterator iter;
6577 enum insn_code icode;
6578 rtx op[MAX_ARGS], pat;
6579 int arity;
6580 bool nonvoid;
6581
6582 if (fcode >= ALPHA_BUILTIN_max)
6583 internal_error ("bad builtin fcode");
6584 icode = code_for_builtin[fcode];
6585 if (icode == 0)
6586 internal_error ("bad builtin fcode");
6587
6588 nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
6589
6590 arity = 0;
6591 FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
6592 {
6593 const struct insn_operand_data *insn_op;
6594
6595 if (arg == error_mark_node)
6596 return NULL_RTX;
6597 if (arity > MAX_ARGS)
6598 return NULL_RTX;
6599
6600 insn_op = &insn_data[icode].operand[arity + nonvoid];
6601
6602 op[arity] = expand_expr (arg, NULL_RTX, insn_op->mode, EXPAND_NORMAL);
6603
6604 if (!(*insn_op->predicate) (op[arity], insn_op->mode))
6605 op[arity] = copy_to_mode_reg (insn_op->mode, op[arity]);
6606 arity++;
6607 }
6608
6609 if (nonvoid)
6610 {
6611 enum machine_mode tmode = insn_data[icode].operand[0].mode;
6612 if (!target
6613 || GET_MODE (target) != tmode
6614 || !(*insn_data[icode].operand[0].predicate) (target, tmode))
6615 target = gen_reg_rtx (tmode);
6616 }
6617
6618 switch (arity)
6619 {
6620 case 0:
6621 pat = GEN_FCN (icode) (target);
6622 break;
6623 case 1:
6624 if (nonvoid)
6625 pat = GEN_FCN (icode) (target, op[0]);
6626 else
6627 pat = GEN_FCN (icode) (op[0]);
6628 break;
6629 case 2:
6630 pat = GEN_FCN (icode) (target, op[0], op[1]);
6631 break;
6632 default:
6633 gcc_unreachable ();
6634 }
6635 if (!pat)
6636 return NULL_RTX;
6637 emit_insn (pat);
6638
6639 if (nonvoid)
6640 return target;
6641 else
6642 return const0_rtx;
6643 }
6644
6645
6646 /* Several bits below assume HWI >= 64 bits. This should be enforced
6647 by config.gcc. */
6648 #if HOST_BITS_PER_WIDE_INT < 64
6649 # error "HOST_WIDE_INT too small"
6650 #endif
6651
6652 /* Fold the builtin for the CMPBGE instruction. This is a vector comparison
6653 with an 8-bit output vector. OPINT contains the integer operands; bit N
6654 of OP_CONST is set if OPINT[N] is valid. */
6655
6656 static tree
alpha_fold_builtin_cmpbge(unsigned HOST_WIDE_INT opint[],long op_const)6657 alpha_fold_builtin_cmpbge (unsigned HOST_WIDE_INT opint[], long op_const)
6658 {
6659 if (op_const == 3)
6660 {
6661 int i, val;
6662 for (i = 0, val = 0; i < 8; ++i)
6663 {
6664 unsigned HOST_WIDE_INT c0 = (opint[0] >> (i * 8)) & 0xff;
6665 unsigned HOST_WIDE_INT c1 = (opint[1] >> (i * 8)) & 0xff;
6666 if (c0 >= c1)
6667 val |= 1 << i;
6668 }
6669 return build_int_cst (alpha_dimode_u, val);
6670 }
6671 else if (op_const == 2 && opint[1] == 0)
6672 return build_int_cst (alpha_dimode_u, 0xff);
6673 return NULL;
6674 }
6675
6676 /* Fold the builtin for the ZAPNOT instruction. This is essentially a
6677 specialized form of an AND operation. Other byte manipulation instructions
6678 are defined in terms of this instruction, so this is also used as a
6679 subroutine for other builtins.
6680
6681 OP contains the tree operands; OPINT contains the extracted integer values.
6682 Bit N of OP_CONST it set if OPINT[N] is valid. OP may be null if only
6683 OPINT may be considered. */
6684
6685 static tree
alpha_fold_builtin_zapnot(tree * op,unsigned HOST_WIDE_INT opint[],long op_const)6686 alpha_fold_builtin_zapnot (tree *op, unsigned HOST_WIDE_INT opint[],
6687 long op_const)
6688 {
6689 if (op_const & 2)
6690 {
6691 unsigned HOST_WIDE_INT mask = 0;
6692 int i;
6693
6694 for (i = 0; i < 8; ++i)
6695 if ((opint[1] >> i) & 1)
6696 mask |= (unsigned HOST_WIDE_INT)0xff << (i * 8);
6697
6698 if (op_const & 1)
6699 return build_int_cst (alpha_dimode_u, opint[0] & mask);
6700
6701 if (op)
6702 return fold_build2 (BIT_AND_EXPR, alpha_dimode_u, op[0],
6703 build_int_cst (alpha_dimode_u, mask));
6704 }
6705 else if ((op_const & 1) && opint[0] == 0)
6706 return build_int_cst (alpha_dimode_u, 0);
6707 return NULL;
6708 }
6709
6710 /* Fold the builtins for the EXT family of instructions. */
6711
6712 static tree
alpha_fold_builtin_extxx(tree op[],unsigned HOST_WIDE_INT opint[],long op_const,unsigned HOST_WIDE_INT bytemask,bool is_high)6713 alpha_fold_builtin_extxx (tree op[], unsigned HOST_WIDE_INT opint[],
6714 long op_const, unsigned HOST_WIDE_INT bytemask,
6715 bool is_high)
6716 {
6717 long zap_const = 2;
6718 tree *zap_op = NULL;
6719
6720 if (op_const & 2)
6721 {
6722 unsigned HOST_WIDE_INT loc;
6723
6724 loc = opint[1] & 7;
6725 loc *= BITS_PER_UNIT;
6726
6727 if (loc != 0)
6728 {
6729 if (op_const & 1)
6730 {
6731 unsigned HOST_WIDE_INT temp = opint[0];
6732 if (is_high)
6733 temp <<= loc;
6734 else
6735 temp >>= loc;
6736 opint[0] = temp;
6737 zap_const = 3;
6738 }
6739 }
6740 else
6741 zap_op = op;
6742 }
6743
6744 opint[1] = bytemask;
6745 return alpha_fold_builtin_zapnot (zap_op, opint, zap_const);
6746 }
6747
6748 /* Fold the builtins for the INS family of instructions. */
6749
6750 static tree
alpha_fold_builtin_insxx(tree op[],unsigned HOST_WIDE_INT opint[],long op_const,unsigned HOST_WIDE_INT bytemask,bool is_high)6751 alpha_fold_builtin_insxx (tree op[], unsigned HOST_WIDE_INT opint[],
6752 long op_const, unsigned HOST_WIDE_INT bytemask,
6753 bool is_high)
6754 {
6755 if ((op_const & 1) && opint[0] == 0)
6756 return build_int_cst (alpha_dimode_u, 0);
6757
6758 if (op_const & 2)
6759 {
6760 unsigned HOST_WIDE_INT temp, loc, byteloc;
6761 tree *zap_op = NULL;
6762
6763 loc = opint[1] & 7;
6764 bytemask <<= loc;
6765
6766 temp = opint[0];
6767 if (is_high)
6768 {
6769 byteloc = (64 - (loc * 8)) & 0x3f;
6770 if (byteloc == 0)
6771 zap_op = op;
6772 else
6773 temp >>= byteloc;
6774 bytemask >>= 8;
6775 }
6776 else
6777 {
6778 byteloc = loc * 8;
6779 if (byteloc == 0)
6780 zap_op = op;
6781 else
6782 temp <<= byteloc;
6783 }
6784
6785 opint[0] = temp;
6786 opint[1] = bytemask;
6787 return alpha_fold_builtin_zapnot (zap_op, opint, op_const);
6788 }
6789
6790 return NULL;
6791 }
6792
6793 static tree
alpha_fold_builtin_mskxx(tree op[],unsigned HOST_WIDE_INT opint[],long op_const,unsigned HOST_WIDE_INT bytemask,bool is_high)6794 alpha_fold_builtin_mskxx (tree op[], unsigned HOST_WIDE_INT opint[],
6795 long op_const, unsigned HOST_WIDE_INT bytemask,
6796 bool is_high)
6797 {
6798 if (op_const & 2)
6799 {
6800 unsigned HOST_WIDE_INT loc;
6801
6802 loc = opint[1] & 7;
6803 bytemask <<= loc;
6804
6805 if (is_high)
6806 bytemask >>= 8;
6807
6808 opint[1] = bytemask ^ 0xff;
6809 }
6810
6811 return alpha_fold_builtin_zapnot (op, opint, op_const);
6812 }
6813
6814 static tree
alpha_fold_vector_minmax(enum tree_code code,tree op[],tree vtype)6815 alpha_fold_vector_minmax (enum tree_code code, tree op[], tree vtype)
6816 {
6817 tree op0 = fold_convert (vtype, op[0]);
6818 tree op1 = fold_convert (vtype, op[1]);
6819 tree val = fold_build2 (code, vtype, op0, op1);
6820 return fold_build1 (VIEW_CONVERT_EXPR, alpha_dimode_u, val);
6821 }
6822
6823 static tree
alpha_fold_builtin_perr(unsigned HOST_WIDE_INT opint[],long op_const)6824 alpha_fold_builtin_perr (unsigned HOST_WIDE_INT opint[], long op_const)
6825 {
6826 unsigned HOST_WIDE_INT temp = 0;
6827 int i;
6828
6829 if (op_const != 3)
6830 return NULL;
6831
6832 for (i = 0; i < 8; ++i)
6833 {
6834 unsigned HOST_WIDE_INT a = (opint[0] >> (i * 8)) & 0xff;
6835 unsigned HOST_WIDE_INT b = (opint[1] >> (i * 8)) & 0xff;
6836 if (a >= b)
6837 temp += a - b;
6838 else
6839 temp += b - a;
6840 }
6841
6842 return build_int_cst (alpha_dimode_u, temp);
6843 }
6844
6845 static tree
alpha_fold_builtin_pklb(unsigned HOST_WIDE_INT opint[],long op_const)6846 alpha_fold_builtin_pklb (unsigned HOST_WIDE_INT opint[], long op_const)
6847 {
6848 unsigned HOST_WIDE_INT temp;
6849
6850 if (op_const == 0)
6851 return NULL;
6852
6853 temp = opint[0] & 0xff;
6854 temp |= (opint[0] >> 24) & 0xff00;
6855
6856 return build_int_cst (alpha_dimode_u, temp);
6857 }
6858
6859 static tree
alpha_fold_builtin_pkwb(unsigned HOST_WIDE_INT opint[],long op_const)6860 alpha_fold_builtin_pkwb (unsigned HOST_WIDE_INT opint[], long op_const)
6861 {
6862 unsigned HOST_WIDE_INT temp;
6863
6864 if (op_const == 0)
6865 return NULL;
6866
6867 temp = opint[0] & 0xff;
6868 temp |= (opint[0] >> 8) & 0xff00;
6869 temp |= (opint[0] >> 16) & 0xff0000;
6870 temp |= (opint[0] >> 24) & 0xff000000;
6871
6872 return build_int_cst (alpha_dimode_u, temp);
6873 }
6874
6875 static tree
alpha_fold_builtin_unpkbl(unsigned HOST_WIDE_INT opint[],long op_const)6876 alpha_fold_builtin_unpkbl (unsigned HOST_WIDE_INT opint[], long op_const)
6877 {
6878 unsigned HOST_WIDE_INT temp;
6879
6880 if (op_const == 0)
6881 return NULL;
6882
6883 temp = opint[0] & 0xff;
6884 temp |= (opint[0] & 0xff00) << 24;
6885
6886 return build_int_cst (alpha_dimode_u, temp);
6887 }
6888
6889 static tree
alpha_fold_builtin_unpkbw(unsigned HOST_WIDE_INT opint[],long op_const)6890 alpha_fold_builtin_unpkbw (unsigned HOST_WIDE_INT opint[], long op_const)
6891 {
6892 unsigned HOST_WIDE_INT temp;
6893
6894 if (op_const == 0)
6895 return NULL;
6896
6897 temp = opint[0] & 0xff;
6898 temp |= (opint[0] & 0x0000ff00) << 8;
6899 temp |= (opint[0] & 0x00ff0000) << 16;
6900 temp |= (opint[0] & 0xff000000) << 24;
6901
6902 return build_int_cst (alpha_dimode_u, temp);
6903 }
6904
6905 static tree
alpha_fold_builtin_cttz(unsigned HOST_WIDE_INT opint[],long op_const)6906 alpha_fold_builtin_cttz (unsigned HOST_WIDE_INT opint[], long op_const)
6907 {
6908 unsigned HOST_WIDE_INT temp;
6909
6910 if (op_const == 0)
6911 return NULL;
6912
6913 if (opint[0] == 0)
6914 temp = 64;
6915 else
6916 temp = exact_log2 (opint[0] & -opint[0]);
6917
6918 return build_int_cst (alpha_dimode_u, temp);
6919 }
6920
6921 static tree
alpha_fold_builtin_ctlz(unsigned HOST_WIDE_INT opint[],long op_const)6922 alpha_fold_builtin_ctlz (unsigned HOST_WIDE_INT opint[], long op_const)
6923 {
6924 unsigned HOST_WIDE_INT temp;
6925
6926 if (op_const == 0)
6927 return NULL;
6928
6929 if (opint[0] == 0)
6930 temp = 64;
6931 else
6932 temp = 64 - floor_log2 (opint[0]) - 1;
6933
6934 return build_int_cst (alpha_dimode_u, temp);
6935 }
6936
6937 static tree
alpha_fold_builtin_ctpop(unsigned HOST_WIDE_INT opint[],long op_const)6938 alpha_fold_builtin_ctpop (unsigned HOST_WIDE_INT opint[], long op_const)
6939 {
6940 unsigned HOST_WIDE_INT temp, op;
6941
6942 if (op_const == 0)
6943 return NULL;
6944
6945 op = opint[0];
6946 temp = 0;
6947 while (op)
6948 temp++, op &= op - 1;
6949
6950 return build_int_cst (alpha_dimode_u, temp);
6951 }
6952
6953 /* Fold one of our builtin functions. */
6954
6955 static tree
alpha_fold_builtin(tree fndecl,int n_args,tree * op,bool ignore ATTRIBUTE_UNUSED)6956 alpha_fold_builtin (tree fndecl, int n_args, tree *op,
6957 bool ignore ATTRIBUTE_UNUSED)
6958 {
6959 unsigned HOST_WIDE_INT opint[MAX_ARGS];
6960 long op_const = 0;
6961 int i;
6962
6963 if (n_args > MAX_ARGS)
6964 return NULL;
6965
6966 for (i = 0; i < n_args; i++)
6967 {
6968 tree arg = op[i];
6969 if (arg == error_mark_node)
6970 return NULL;
6971
6972 opint[i] = 0;
6973 if (TREE_CODE (arg) == INTEGER_CST)
6974 {
6975 op_const |= 1L << i;
6976 opint[i] = int_cst_value (arg);
6977 }
6978 }
6979
6980 switch (DECL_FUNCTION_CODE (fndecl))
6981 {
6982 case ALPHA_BUILTIN_CMPBGE:
6983 return alpha_fold_builtin_cmpbge (opint, op_const);
6984
6985 case ALPHA_BUILTIN_EXTBL:
6986 return alpha_fold_builtin_extxx (op, opint, op_const, 0x01, false);
6987 case ALPHA_BUILTIN_EXTWL:
6988 return alpha_fold_builtin_extxx (op, opint, op_const, 0x03, false);
6989 case ALPHA_BUILTIN_EXTLL:
6990 return alpha_fold_builtin_extxx (op, opint, op_const, 0x0f, false);
6991 case ALPHA_BUILTIN_EXTQL:
6992 return alpha_fold_builtin_extxx (op, opint, op_const, 0xff, false);
6993 case ALPHA_BUILTIN_EXTWH:
6994 return alpha_fold_builtin_extxx (op, opint, op_const, 0x03, true);
6995 case ALPHA_BUILTIN_EXTLH:
6996 return alpha_fold_builtin_extxx (op, opint, op_const, 0x0f, true);
6997 case ALPHA_BUILTIN_EXTQH:
6998 return alpha_fold_builtin_extxx (op, opint, op_const, 0xff, true);
6999
7000 case ALPHA_BUILTIN_INSBL:
7001 return alpha_fold_builtin_insxx (op, opint, op_const, 0x01, false);
7002 case ALPHA_BUILTIN_INSWL:
7003 return alpha_fold_builtin_insxx (op, opint, op_const, 0x03, false);
7004 case ALPHA_BUILTIN_INSLL:
7005 return alpha_fold_builtin_insxx (op, opint, op_const, 0x0f, false);
7006 case ALPHA_BUILTIN_INSQL:
7007 return alpha_fold_builtin_insxx (op, opint, op_const, 0xff, false);
7008 case ALPHA_BUILTIN_INSWH:
7009 return alpha_fold_builtin_insxx (op, opint, op_const, 0x03, true);
7010 case ALPHA_BUILTIN_INSLH:
7011 return alpha_fold_builtin_insxx (op, opint, op_const, 0x0f, true);
7012 case ALPHA_BUILTIN_INSQH:
7013 return alpha_fold_builtin_insxx (op, opint, op_const, 0xff, true);
7014
7015 case ALPHA_BUILTIN_MSKBL:
7016 return alpha_fold_builtin_mskxx (op, opint, op_const, 0x01, false);
7017 case ALPHA_BUILTIN_MSKWL:
7018 return alpha_fold_builtin_mskxx (op, opint, op_const, 0x03, false);
7019 case ALPHA_BUILTIN_MSKLL:
7020 return alpha_fold_builtin_mskxx (op, opint, op_const, 0x0f, false);
7021 case ALPHA_BUILTIN_MSKQL:
7022 return alpha_fold_builtin_mskxx (op, opint, op_const, 0xff, false);
7023 case ALPHA_BUILTIN_MSKWH:
7024 return alpha_fold_builtin_mskxx (op, opint, op_const, 0x03, true);
7025 case ALPHA_BUILTIN_MSKLH:
7026 return alpha_fold_builtin_mskxx (op, opint, op_const, 0x0f, true);
7027 case ALPHA_BUILTIN_MSKQH:
7028 return alpha_fold_builtin_mskxx (op, opint, op_const, 0xff, true);
7029
7030 case ALPHA_BUILTIN_UMULH:
7031 return fold_build2 (MULT_HIGHPART_EXPR, alpha_dimode_u, op[0], op[1]);
7032
7033 case ALPHA_BUILTIN_ZAP:
7034 opint[1] ^= 0xff;
7035 /* FALLTHRU */
7036 case ALPHA_BUILTIN_ZAPNOT:
7037 return alpha_fold_builtin_zapnot (op, opint, op_const);
7038
7039 case ALPHA_BUILTIN_MINUB8:
7040 return alpha_fold_vector_minmax (MIN_EXPR, op, alpha_v8qi_u);
7041 case ALPHA_BUILTIN_MINSB8:
7042 return alpha_fold_vector_minmax (MIN_EXPR, op, alpha_v8qi_s);
7043 case ALPHA_BUILTIN_MINUW4:
7044 return alpha_fold_vector_minmax (MIN_EXPR, op, alpha_v4hi_u);
7045 case ALPHA_BUILTIN_MINSW4:
7046 return alpha_fold_vector_minmax (MIN_EXPR, op, alpha_v4hi_s);
7047 case ALPHA_BUILTIN_MAXUB8:
7048 return alpha_fold_vector_minmax (MAX_EXPR, op, alpha_v8qi_u);
7049 case ALPHA_BUILTIN_MAXSB8:
7050 return alpha_fold_vector_minmax (MAX_EXPR, op, alpha_v8qi_s);
7051 case ALPHA_BUILTIN_MAXUW4:
7052 return alpha_fold_vector_minmax (MAX_EXPR, op, alpha_v4hi_u);
7053 case ALPHA_BUILTIN_MAXSW4:
7054 return alpha_fold_vector_minmax (MAX_EXPR, op, alpha_v4hi_s);
7055
7056 case ALPHA_BUILTIN_PERR:
7057 return alpha_fold_builtin_perr (opint, op_const);
7058 case ALPHA_BUILTIN_PKLB:
7059 return alpha_fold_builtin_pklb (opint, op_const);
7060 case ALPHA_BUILTIN_PKWB:
7061 return alpha_fold_builtin_pkwb (opint, op_const);
7062 case ALPHA_BUILTIN_UNPKBL:
7063 return alpha_fold_builtin_unpkbl (opint, op_const);
7064 case ALPHA_BUILTIN_UNPKBW:
7065 return alpha_fold_builtin_unpkbw (opint, op_const);
7066
7067 case ALPHA_BUILTIN_CTTZ:
7068 return alpha_fold_builtin_cttz (opint, op_const);
7069 case ALPHA_BUILTIN_CTLZ:
7070 return alpha_fold_builtin_ctlz (opint, op_const);
7071 case ALPHA_BUILTIN_CTPOP:
7072 return alpha_fold_builtin_ctpop (opint, op_const);
7073
7074 case ALPHA_BUILTIN_AMASK:
7075 case ALPHA_BUILTIN_IMPLVER:
7076 case ALPHA_BUILTIN_RPCC:
7077 /* None of these are foldable at compile-time. */
7078 default:
7079 return NULL;
7080 }
7081 }
7082
7083 /* This page contains routines that are used to determine what the function
7084 prologue and epilogue code will do and write them out. */
7085
7086 /* Compute the size of the save area in the stack. */
7087
7088 /* These variables are used for communication between the following functions.
7089 They indicate various things about the current function being compiled
7090 that are used to tell what kind of prologue, epilogue and procedure
7091 descriptor to generate. */
7092
7093 /* Nonzero if we need a stack procedure. */
7094 enum alpha_procedure_types {PT_NULL = 0, PT_REGISTER = 1, PT_STACK = 2};
7095 static enum alpha_procedure_types alpha_procedure_type;
7096
7097 /* Register number (either FP or SP) that is used to unwind the frame. */
7098 static int vms_unwind_regno;
7099
7100 /* Register number used to save FP. We need not have one for RA since
7101 we don't modify it for register procedures. This is only defined
7102 for register frame procedures. */
7103 static int vms_save_fp_regno;
7104
7105 /* Register number used to reference objects off our PV. */
7106 static int vms_base_regno;
7107
7108 /* Compute register masks for saved registers. */
7109
7110 static void
alpha_sa_mask(unsigned long * imaskP,unsigned long * fmaskP)7111 alpha_sa_mask (unsigned long *imaskP, unsigned long *fmaskP)
7112 {
7113 unsigned long imask = 0;
7114 unsigned long fmask = 0;
7115 unsigned int i;
7116
7117 /* When outputting a thunk, we don't have valid register life info,
7118 but assemble_start_function wants to output .frame and .mask
7119 directives. */
7120 if (cfun->is_thunk)
7121 {
7122 *imaskP = 0;
7123 *fmaskP = 0;
7124 return;
7125 }
7126
7127 if (TARGET_ABI_OPEN_VMS && alpha_procedure_type == PT_STACK)
7128 imask |= (1UL << HARD_FRAME_POINTER_REGNUM);
7129
7130 /* One for every register we have to save. */
7131 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
7132 if (! fixed_regs[i] && ! call_used_regs[i]
7133 && df_regs_ever_live_p (i) && i != REG_RA)
7134 {
7135 if (i < 32)
7136 imask |= (1UL << i);
7137 else
7138 fmask |= (1UL << (i - 32));
7139 }
7140
7141 /* We need to restore these for the handler. */
7142 if (crtl->calls_eh_return)
7143 {
7144 for (i = 0; ; ++i)
7145 {
7146 unsigned regno = EH_RETURN_DATA_REGNO (i);
7147 if (regno == INVALID_REGNUM)
7148 break;
7149 imask |= 1UL << regno;
7150 }
7151 }
7152
7153 /* If any register spilled, then spill the return address also. */
7154 /* ??? This is required by the Digital stack unwind specification
7155 and isn't needed if we're doing Dwarf2 unwinding. */
7156 if (imask || fmask || alpha_ra_ever_killed ())
7157 imask |= (1UL << REG_RA);
7158
7159 *imaskP = imask;
7160 *fmaskP = fmask;
7161 }
7162
7163 int
alpha_sa_size(void)7164 alpha_sa_size (void)
7165 {
7166 unsigned long mask[2];
7167 int sa_size = 0;
7168 int i, j;
7169
7170 alpha_sa_mask (&mask[0], &mask[1]);
7171
7172 for (j = 0; j < 2; ++j)
7173 for (i = 0; i < 32; ++i)
7174 if ((mask[j] >> i) & 1)
7175 sa_size++;
7176
7177 if (TARGET_ABI_OPEN_VMS)
7178 {
7179 /* Start with a stack procedure if we make any calls (REG_RA used), or
7180 need a frame pointer, with a register procedure if we otherwise need
7181 at least a slot, and with a null procedure in other cases. */
7182 if ((mask[0] >> REG_RA) & 1 || frame_pointer_needed)
7183 alpha_procedure_type = PT_STACK;
7184 else if (get_frame_size() != 0)
7185 alpha_procedure_type = PT_REGISTER;
7186 else
7187 alpha_procedure_type = PT_NULL;
7188
7189 /* Don't reserve space for saving FP & RA yet. Do that later after we've
7190 made the final decision on stack procedure vs register procedure. */
7191 if (alpha_procedure_type == PT_STACK)
7192 sa_size -= 2;
7193
7194 /* Decide whether to refer to objects off our PV via FP or PV.
7195 If we need FP for something else or if we receive a nonlocal
7196 goto (which expects PV to contain the value), we must use PV.
7197 Otherwise, start by assuming we can use FP. */
7198
7199 vms_base_regno
7200 = (frame_pointer_needed
7201 || cfun->has_nonlocal_label
7202 || alpha_procedure_type == PT_STACK
7203 || crtl->outgoing_args_size)
7204 ? REG_PV : HARD_FRAME_POINTER_REGNUM;
7205
7206 /* If we want to copy PV into FP, we need to find some register
7207 in which to save FP. */
7208
7209 vms_save_fp_regno = -1;
7210 if (vms_base_regno == HARD_FRAME_POINTER_REGNUM)
7211 for (i = 0; i < 32; i++)
7212 if (! fixed_regs[i] && call_used_regs[i] && ! df_regs_ever_live_p (i))
7213 vms_save_fp_regno = i;
7214
7215 /* A VMS condition handler requires a stack procedure in our
7216 implementation. (not required by the calling standard). */
7217 if ((vms_save_fp_regno == -1 && alpha_procedure_type == PT_REGISTER)
7218 || cfun->machine->uses_condition_handler)
7219 vms_base_regno = REG_PV, alpha_procedure_type = PT_STACK;
7220 else if (alpha_procedure_type == PT_NULL)
7221 vms_base_regno = REG_PV;
7222
7223 /* Stack unwinding should be done via FP unless we use it for PV. */
7224 vms_unwind_regno = (vms_base_regno == REG_PV
7225 ? HARD_FRAME_POINTER_REGNUM : STACK_POINTER_REGNUM);
7226
7227 /* If this is a stack procedure, allow space for saving FP, RA and
7228 a condition handler slot if needed. */
7229 if (alpha_procedure_type == PT_STACK)
7230 sa_size += 2 + cfun->machine->uses_condition_handler;
7231 }
7232 else
7233 {
7234 /* Our size must be even (multiple of 16 bytes). */
7235 if (sa_size & 1)
7236 sa_size++;
7237 }
7238
7239 return sa_size * 8;
7240 }
7241
7242 /* Define the offset between two registers, one to be eliminated,
7243 and the other its replacement, at the start of a routine. */
7244
7245 HOST_WIDE_INT
alpha_initial_elimination_offset(unsigned int from,unsigned int to ATTRIBUTE_UNUSED)7246 alpha_initial_elimination_offset (unsigned int from,
7247 unsigned int to ATTRIBUTE_UNUSED)
7248 {
7249 HOST_WIDE_INT ret;
7250
7251 ret = alpha_sa_size ();
7252 ret += ALPHA_ROUND (crtl->outgoing_args_size);
7253
7254 switch (from)
7255 {
7256 case FRAME_POINTER_REGNUM:
7257 break;
7258
7259 case ARG_POINTER_REGNUM:
7260 ret += (ALPHA_ROUND (get_frame_size ()
7261 + crtl->args.pretend_args_size)
7262 - crtl->args.pretend_args_size);
7263 break;
7264
7265 default:
7266 gcc_unreachable ();
7267 }
7268
7269 return ret;
7270 }
7271
7272 #if TARGET_ABI_OPEN_VMS
7273
7274 /* Worker function for TARGET_CAN_ELIMINATE. */
7275
7276 static bool
alpha_vms_can_eliminate(const int from ATTRIBUTE_UNUSED,const int to)7277 alpha_vms_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
7278 {
7279 /* We need the alpha_procedure_type to decide. Evaluate it now. */
7280 alpha_sa_size ();
7281
7282 switch (alpha_procedure_type)
7283 {
7284 case PT_NULL:
7285 /* NULL procedures have no frame of their own and we only
7286 know how to resolve from the current stack pointer. */
7287 return to == STACK_POINTER_REGNUM;
7288
7289 case PT_REGISTER:
7290 case PT_STACK:
7291 /* We always eliminate except to the stack pointer if there is no
7292 usable frame pointer at hand. */
7293 return (to != STACK_POINTER_REGNUM
7294 || vms_unwind_regno != HARD_FRAME_POINTER_REGNUM);
7295 }
7296
7297 gcc_unreachable ();
7298 }
7299
7300 /* FROM is to be eliminated for TO. Return the offset so that TO+offset
7301 designates the same location as FROM. */
7302
7303 HOST_WIDE_INT
alpha_vms_initial_elimination_offset(unsigned int from,unsigned int to)7304 alpha_vms_initial_elimination_offset (unsigned int from, unsigned int to)
7305 {
7306 /* The only possible attempts we ever expect are ARG or FRAME_PTR to
7307 HARD_FRAME or STACK_PTR. We need the alpha_procedure_type to decide
7308 on the proper computations and will need the register save area size
7309 in most cases. */
7310
7311 HOST_WIDE_INT sa_size = alpha_sa_size ();
7312
7313 /* PT_NULL procedures have no frame of their own and we only allow
7314 elimination to the stack pointer. This is the argument pointer and we
7315 resolve the soft frame pointer to that as well. */
7316
7317 if (alpha_procedure_type == PT_NULL)
7318 return 0;
7319
7320 /* For a PT_STACK procedure the frame layout looks as follows
7321
7322 -----> decreasing addresses
7323
7324 < size rounded up to 16 | likewise >
7325 --------------#------------------------------+++--------------+++-------#
7326 incoming args # pretended args | "frame" | regs sa | PV | outgoing args #
7327 --------------#---------------------------------------------------------#
7328 ^ ^ ^ ^
7329 ARG_PTR FRAME_PTR HARD_FRAME_PTR STACK_PTR
7330
7331
7332 PT_REGISTER procedures are similar in that they may have a frame of their
7333 own. They have no regs-sa/pv/outgoing-args area.
7334
7335 We first compute offset to HARD_FRAME_PTR, then add what we need to get
7336 to STACK_PTR if need be. */
7337
7338 {
7339 HOST_WIDE_INT offset;
7340 HOST_WIDE_INT pv_save_size = alpha_procedure_type == PT_STACK ? 8 : 0;
7341
7342 switch (from)
7343 {
7344 case FRAME_POINTER_REGNUM:
7345 offset = ALPHA_ROUND (sa_size + pv_save_size);
7346 break;
7347 case ARG_POINTER_REGNUM:
7348 offset = (ALPHA_ROUND (sa_size + pv_save_size
7349 + get_frame_size ()
7350 + crtl->args.pretend_args_size)
7351 - crtl->args.pretend_args_size);
7352 break;
7353 default:
7354 gcc_unreachable ();
7355 }
7356
7357 if (to == STACK_POINTER_REGNUM)
7358 offset += ALPHA_ROUND (crtl->outgoing_args_size);
7359
7360 return offset;
7361 }
7362 }
7363
7364 #define COMMON_OBJECT "common_object"
7365
7366 static tree
common_object_handler(tree * node,tree name ATTRIBUTE_UNUSED,tree args ATTRIBUTE_UNUSED,int flags ATTRIBUTE_UNUSED,bool * no_add_attrs ATTRIBUTE_UNUSED)7367 common_object_handler (tree *node, tree name ATTRIBUTE_UNUSED,
7368 tree args ATTRIBUTE_UNUSED, int flags ATTRIBUTE_UNUSED,
7369 bool *no_add_attrs ATTRIBUTE_UNUSED)
7370 {
7371 tree decl = *node;
7372 gcc_assert (DECL_P (decl));
7373
7374 DECL_COMMON (decl) = 1;
7375 return NULL_TREE;
7376 }
7377
7378 static const struct attribute_spec vms_attribute_table[] =
7379 {
7380 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
7381 affects_type_identity } */
7382 { COMMON_OBJECT, 0, 1, true, false, false, common_object_handler, false },
7383 { NULL, 0, 0, false, false, false, NULL, false }
7384 };
7385
7386 void
vms_output_aligned_decl_common(FILE * file,tree decl,const char * name,unsigned HOST_WIDE_INT size,unsigned int align)7387 vms_output_aligned_decl_common(FILE *file, tree decl, const char *name,
7388 unsigned HOST_WIDE_INT size,
7389 unsigned int align)
7390 {
7391 tree attr = DECL_ATTRIBUTES (decl);
7392 fprintf (file, "%s", COMMON_ASM_OP);
7393 assemble_name (file, name);
7394 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED, size);
7395 /* ??? Unlike on OSF/1, the alignment factor is not in log units. */
7396 fprintf (file, ",%u", align / BITS_PER_UNIT);
7397 if (attr)
7398 {
7399 attr = lookup_attribute (COMMON_OBJECT, attr);
7400 if (attr)
7401 fprintf (file, ",%s",
7402 IDENTIFIER_POINTER (TREE_VALUE (TREE_VALUE (attr))));
7403 }
7404 fputc ('\n', file);
7405 }
7406
7407 #undef COMMON_OBJECT
7408
7409 #endif
7410
7411 static int
find_lo_sum_using_gp(rtx * px,void * data ATTRIBUTE_UNUSED)7412 find_lo_sum_using_gp (rtx *px, void *data ATTRIBUTE_UNUSED)
7413 {
7414 return GET_CODE (*px) == LO_SUM && XEXP (*px, 0) == pic_offset_table_rtx;
7415 }
7416
7417 int
alpha_find_lo_sum_using_gp(rtx insn)7418 alpha_find_lo_sum_using_gp (rtx insn)
7419 {
7420 return for_each_rtx (&PATTERN (insn), find_lo_sum_using_gp, NULL) > 0;
7421 }
7422
7423 static int
alpha_does_function_need_gp(void)7424 alpha_does_function_need_gp (void)
7425 {
7426 rtx insn;
7427
7428 /* The GP being variable is an OSF abi thing. */
7429 if (! TARGET_ABI_OSF)
7430 return 0;
7431
7432 /* We need the gp to load the address of __mcount. */
7433 if (TARGET_PROFILING_NEEDS_GP && crtl->profile)
7434 return 1;
7435
7436 /* The code emitted by alpha_output_mi_thunk_osf uses the gp. */
7437 if (cfun->is_thunk)
7438 return 1;
7439
7440 /* The nonlocal receiver pattern assumes that the gp is valid for
7441 the nested function. Reasonable because it's almost always set
7442 correctly already. For the cases where that's wrong, make sure
7443 the nested function loads its gp on entry. */
7444 if (crtl->has_nonlocal_goto)
7445 return 1;
7446
7447 /* If we need a GP (we have a LDSYM insn or a CALL_INSN), load it first.
7448 Even if we are a static function, we still need to do this in case
7449 our address is taken and passed to something like qsort. */
7450
7451 push_topmost_sequence ();
7452 insn = get_insns ();
7453 pop_topmost_sequence ();
7454
7455 for (; insn; insn = NEXT_INSN (insn))
7456 if (NONDEBUG_INSN_P (insn)
7457 && ! JUMP_TABLE_DATA_P (insn)
7458 && GET_CODE (PATTERN (insn)) != USE
7459 && GET_CODE (PATTERN (insn)) != CLOBBER
7460 && get_attr_usegp (insn))
7461 return 1;
7462
7463 return 0;
7464 }
7465
7466
7467 /* Helper function to set RTX_FRAME_RELATED_P on instructions, including
7468 sequences. */
7469
7470 static rtx
set_frame_related_p(void)7471 set_frame_related_p (void)
7472 {
7473 rtx seq = get_insns ();
7474 rtx insn;
7475
7476 end_sequence ();
7477
7478 if (!seq)
7479 return NULL_RTX;
7480
7481 if (INSN_P (seq))
7482 {
7483 insn = seq;
7484 while (insn != NULL_RTX)
7485 {
7486 RTX_FRAME_RELATED_P (insn) = 1;
7487 insn = NEXT_INSN (insn);
7488 }
7489 seq = emit_insn (seq);
7490 }
7491 else
7492 {
7493 seq = emit_insn (seq);
7494 RTX_FRAME_RELATED_P (seq) = 1;
7495 }
7496 return seq;
7497 }
7498
7499 #define FRP(exp) (start_sequence (), exp, set_frame_related_p ())
7500
7501 /* Generates a store with the proper unwind info attached. VALUE is
7502 stored at BASE_REG+BASE_OFS. If FRAME_BIAS is nonzero, then BASE_REG
7503 contains SP+FRAME_BIAS, and that is the unwind info that should be
7504 generated. If FRAME_REG != VALUE, then VALUE is being stored on
7505 behalf of FRAME_REG, and FRAME_REG should be present in the unwind. */
7506
7507 static void
emit_frame_store_1(rtx value,rtx base_reg,HOST_WIDE_INT frame_bias,HOST_WIDE_INT base_ofs,rtx frame_reg)7508 emit_frame_store_1 (rtx value, rtx base_reg, HOST_WIDE_INT frame_bias,
7509 HOST_WIDE_INT base_ofs, rtx frame_reg)
7510 {
7511 rtx addr, mem, insn;
7512
7513 addr = plus_constant (Pmode, base_reg, base_ofs);
7514 mem = gen_frame_mem (DImode, addr);
7515
7516 insn = emit_move_insn (mem, value);
7517 RTX_FRAME_RELATED_P (insn) = 1;
7518
7519 if (frame_bias || value != frame_reg)
7520 {
7521 if (frame_bias)
7522 {
7523 addr = plus_constant (Pmode, stack_pointer_rtx,
7524 frame_bias + base_ofs);
7525 mem = gen_rtx_MEM (DImode, addr);
7526 }
7527
7528 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
7529 gen_rtx_SET (VOIDmode, mem, frame_reg));
7530 }
7531 }
7532
7533 static void
emit_frame_store(unsigned int regno,rtx base_reg,HOST_WIDE_INT frame_bias,HOST_WIDE_INT base_ofs)7534 emit_frame_store (unsigned int regno, rtx base_reg,
7535 HOST_WIDE_INT frame_bias, HOST_WIDE_INT base_ofs)
7536 {
7537 rtx reg = gen_rtx_REG (DImode, regno);
7538 emit_frame_store_1 (reg, base_reg, frame_bias, base_ofs, reg);
7539 }
7540
7541 /* Compute the frame size. SIZE is the size of the "naked" frame
7542 and SA_SIZE is the size of the register save area. */
7543
7544 static HOST_WIDE_INT
compute_frame_size(HOST_WIDE_INT size,HOST_WIDE_INT sa_size)7545 compute_frame_size (HOST_WIDE_INT size, HOST_WIDE_INT sa_size)
7546 {
7547 if (TARGET_ABI_OPEN_VMS)
7548 return ALPHA_ROUND (sa_size
7549 + (alpha_procedure_type == PT_STACK ? 8 : 0)
7550 + size
7551 + crtl->args.pretend_args_size);
7552 else
7553 return ALPHA_ROUND (crtl->outgoing_args_size)
7554 + sa_size
7555 + ALPHA_ROUND (size
7556 + crtl->args.pretend_args_size);
7557 }
7558
7559 /* Write function prologue. */
7560
7561 /* On vms we have two kinds of functions:
7562
7563 - stack frame (PROC_STACK)
7564 these are 'normal' functions with local vars and which are
7565 calling other functions
7566 - register frame (PROC_REGISTER)
7567 keeps all data in registers, needs no stack
7568
7569 We must pass this to the assembler so it can generate the
7570 proper pdsc (procedure descriptor)
7571 This is done with the '.pdesc' command.
7572
7573 On not-vms, we don't really differentiate between the two, as we can
7574 simply allocate stack without saving registers. */
7575
7576 void
alpha_expand_prologue(void)7577 alpha_expand_prologue (void)
7578 {
7579 /* Registers to save. */
7580 unsigned long imask = 0;
7581 unsigned long fmask = 0;
7582 /* Stack space needed for pushing registers clobbered by us. */
7583 HOST_WIDE_INT sa_size, sa_bias;
7584 /* Complete stack size needed. */
7585 HOST_WIDE_INT frame_size;
7586 /* Probed stack size; it additionally includes the size of
7587 the "reserve region" if any. */
7588 HOST_WIDE_INT probed_size;
7589 /* Offset from base reg to register save area. */
7590 HOST_WIDE_INT reg_offset;
7591 rtx sa_reg;
7592 int i;
7593
7594 sa_size = alpha_sa_size ();
7595 frame_size = compute_frame_size (get_frame_size (), sa_size);
7596
7597 if (flag_stack_usage_info)
7598 current_function_static_stack_size = frame_size;
7599
7600 if (TARGET_ABI_OPEN_VMS)
7601 reg_offset = 8 + 8 * cfun->machine->uses_condition_handler;
7602 else
7603 reg_offset = ALPHA_ROUND (crtl->outgoing_args_size);
7604
7605 alpha_sa_mask (&imask, &fmask);
7606
7607 /* Emit an insn to reload GP, if needed. */
7608 if (TARGET_ABI_OSF)
7609 {
7610 alpha_function_needs_gp = alpha_does_function_need_gp ();
7611 if (alpha_function_needs_gp)
7612 emit_insn (gen_prologue_ldgp ());
7613 }
7614
7615 /* TARGET_PROFILING_NEEDS_GP actually implies that we need to insert
7616 the call to mcount ourselves, rather than having the linker do it
7617 magically in response to -pg. Since _mcount has special linkage,
7618 don't represent the call as a call. */
7619 if (TARGET_PROFILING_NEEDS_GP && crtl->profile)
7620 emit_insn (gen_prologue_mcount ());
7621
7622 /* Adjust the stack by the frame size. If the frame size is > 4096
7623 bytes, we need to be sure we probe somewhere in the first and last
7624 4096 bytes (we can probably get away without the latter test) and
7625 every 8192 bytes in between. If the frame size is > 32768, we
7626 do this in a loop. Otherwise, we generate the explicit probe
7627 instructions.
7628
7629 Note that we are only allowed to adjust sp once in the prologue. */
7630
7631 probed_size = frame_size;
7632 if (flag_stack_check)
7633 probed_size += STACK_CHECK_PROTECT;
7634
7635 if (probed_size <= 32768)
7636 {
7637 if (probed_size > 4096)
7638 {
7639 int probed;
7640
7641 for (probed = 4096; probed < probed_size; probed += 8192)
7642 emit_insn (gen_probe_stack (GEN_INT (-probed)));
7643
7644 /* We only have to do this probe if we aren't saving registers or
7645 if we are probing beyond the frame because of -fstack-check. */
7646 if ((sa_size == 0 && probed_size > probed - 4096)
7647 || flag_stack_check)
7648 emit_insn (gen_probe_stack (GEN_INT (-probed_size)));
7649 }
7650
7651 if (frame_size != 0)
7652 FRP (emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx,
7653 GEN_INT (-frame_size))));
7654 }
7655 else
7656 {
7657 /* Here we generate code to set R22 to SP + 4096 and set R23 to the
7658 number of 8192 byte blocks to probe. We then probe each block
7659 in the loop and then set SP to the proper location. If the
7660 amount remaining is > 4096, we have to do one more probe if we
7661 are not saving any registers or if we are probing beyond the
7662 frame because of -fstack-check. */
7663
7664 HOST_WIDE_INT blocks = (probed_size + 4096) / 8192;
7665 HOST_WIDE_INT leftover = probed_size + 4096 - blocks * 8192;
7666 rtx ptr = gen_rtx_REG (DImode, 22);
7667 rtx count = gen_rtx_REG (DImode, 23);
7668 rtx seq;
7669
7670 emit_move_insn (count, GEN_INT (blocks));
7671 emit_insn (gen_adddi3 (ptr, stack_pointer_rtx, GEN_INT (4096)));
7672
7673 /* Because of the difficulty in emitting a new basic block this
7674 late in the compilation, generate the loop as a single insn. */
7675 emit_insn (gen_prologue_stack_probe_loop (count, ptr));
7676
7677 if ((leftover > 4096 && sa_size == 0) || flag_stack_check)
7678 {
7679 rtx last = gen_rtx_MEM (DImode,
7680 plus_constant (Pmode, ptr, -leftover));
7681 MEM_VOLATILE_P (last) = 1;
7682 emit_move_insn (last, const0_rtx);
7683 }
7684
7685 if (flag_stack_check)
7686 {
7687 /* If -fstack-check is specified we have to load the entire
7688 constant into a register and subtract from the sp in one go,
7689 because the probed stack size is not equal to the frame size. */
7690 HOST_WIDE_INT lo, hi;
7691 lo = ((frame_size & 0xffff) ^ 0x8000) - 0x8000;
7692 hi = frame_size - lo;
7693
7694 emit_move_insn (ptr, GEN_INT (hi));
7695 emit_insn (gen_adddi3 (ptr, ptr, GEN_INT (lo)));
7696 seq = emit_insn (gen_subdi3 (stack_pointer_rtx, stack_pointer_rtx,
7697 ptr));
7698 }
7699 else
7700 {
7701 seq = emit_insn (gen_adddi3 (stack_pointer_rtx, ptr,
7702 GEN_INT (-leftover)));
7703 }
7704
7705 /* This alternative is special, because the DWARF code cannot
7706 possibly intuit through the loop above. So we invent this
7707 note it looks at instead. */
7708 RTX_FRAME_RELATED_P (seq) = 1;
7709 add_reg_note (seq, REG_FRAME_RELATED_EXPR,
7710 gen_rtx_SET (VOIDmode, stack_pointer_rtx,
7711 plus_constant (Pmode, stack_pointer_rtx,
7712 -frame_size)));
7713 }
7714
7715 /* Cope with very large offsets to the register save area. */
7716 sa_bias = 0;
7717 sa_reg = stack_pointer_rtx;
7718 if (reg_offset + sa_size > 0x8000)
7719 {
7720 int low = ((reg_offset & 0xffff) ^ 0x8000) - 0x8000;
7721 rtx sa_bias_rtx;
7722
7723 if (low + sa_size <= 0x8000)
7724 sa_bias = reg_offset - low, reg_offset = low;
7725 else
7726 sa_bias = reg_offset, reg_offset = 0;
7727
7728 sa_reg = gen_rtx_REG (DImode, 24);
7729 sa_bias_rtx = GEN_INT (sa_bias);
7730
7731 if (add_operand (sa_bias_rtx, DImode))
7732 emit_insn (gen_adddi3 (sa_reg, stack_pointer_rtx, sa_bias_rtx));
7733 else
7734 {
7735 emit_move_insn (sa_reg, sa_bias_rtx);
7736 emit_insn (gen_adddi3 (sa_reg, stack_pointer_rtx, sa_reg));
7737 }
7738 }
7739
7740 /* Save regs in stack order. Beginning with VMS PV. */
7741 if (TARGET_ABI_OPEN_VMS && alpha_procedure_type == PT_STACK)
7742 emit_frame_store (REG_PV, stack_pointer_rtx, 0, 0);
7743
7744 /* Save register RA next. */
7745 if (imask & (1UL << REG_RA))
7746 {
7747 emit_frame_store (REG_RA, sa_reg, sa_bias, reg_offset);
7748 imask &= ~(1UL << REG_RA);
7749 reg_offset += 8;
7750 }
7751
7752 /* Now save any other registers required to be saved. */
7753 for (i = 0; i < 31; i++)
7754 if (imask & (1UL << i))
7755 {
7756 emit_frame_store (i, sa_reg, sa_bias, reg_offset);
7757 reg_offset += 8;
7758 }
7759
7760 for (i = 0; i < 31; i++)
7761 if (fmask & (1UL << i))
7762 {
7763 emit_frame_store (i+32, sa_reg, sa_bias, reg_offset);
7764 reg_offset += 8;
7765 }
7766
7767 if (TARGET_ABI_OPEN_VMS)
7768 {
7769 /* Register frame procedures save the fp. */
7770 if (alpha_procedure_type == PT_REGISTER)
7771 {
7772 rtx insn = emit_move_insn (gen_rtx_REG (DImode, vms_save_fp_regno),
7773 hard_frame_pointer_rtx);
7774 add_reg_note (insn, REG_CFA_REGISTER, NULL);
7775 RTX_FRAME_RELATED_P (insn) = 1;
7776 }
7777
7778 if (alpha_procedure_type != PT_NULL && vms_base_regno != REG_PV)
7779 emit_insn (gen_force_movdi (gen_rtx_REG (DImode, vms_base_regno),
7780 gen_rtx_REG (DImode, REG_PV)));
7781
7782 if (alpha_procedure_type != PT_NULL
7783 && vms_unwind_regno == HARD_FRAME_POINTER_REGNUM)
7784 FRP (emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx));
7785
7786 /* If we have to allocate space for outgoing args, do it now. */
7787 if (crtl->outgoing_args_size != 0)
7788 {
7789 rtx seq
7790 = emit_move_insn (stack_pointer_rtx,
7791 plus_constant
7792 (Pmode, hard_frame_pointer_rtx,
7793 - (ALPHA_ROUND
7794 (crtl->outgoing_args_size))));
7795
7796 /* Only set FRAME_RELATED_P on the stack adjustment we just emitted
7797 if ! frame_pointer_needed. Setting the bit will change the CFA
7798 computation rule to use sp again, which would be wrong if we had
7799 frame_pointer_needed, as this means sp might move unpredictably
7800 later on.
7801
7802 Also, note that
7803 frame_pointer_needed
7804 => vms_unwind_regno == HARD_FRAME_POINTER_REGNUM
7805 and
7806 crtl->outgoing_args_size != 0
7807 => alpha_procedure_type != PT_NULL,
7808
7809 so when we are not setting the bit here, we are guaranteed to
7810 have emitted an FRP frame pointer update just before. */
7811 RTX_FRAME_RELATED_P (seq) = ! frame_pointer_needed;
7812 }
7813 }
7814 else
7815 {
7816 /* If we need a frame pointer, set it from the stack pointer. */
7817 if (frame_pointer_needed)
7818 {
7819 if (TARGET_CAN_FAULT_IN_PROLOGUE)
7820 FRP (emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx));
7821 else
7822 /* This must always be the last instruction in the
7823 prologue, thus we emit a special move + clobber. */
7824 FRP (emit_insn (gen_init_fp (hard_frame_pointer_rtx,
7825 stack_pointer_rtx, sa_reg)));
7826 }
7827 }
7828
7829 /* The ABIs for VMS and OSF/1 say that while we can schedule insns into
7830 the prologue, for exception handling reasons, we cannot do this for
7831 any insn that might fault. We could prevent this for mems with a
7832 (clobber:BLK (scratch)), but this doesn't work for fp insns. So we
7833 have to prevent all such scheduling with a blockage.
7834
7835 Linux, on the other hand, never bothered to implement OSF/1's
7836 exception handling, and so doesn't care about such things. Anyone
7837 planning to use dwarf2 frame-unwind info can also omit the blockage. */
7838
7839 if (! TARGET_CAN_FAULT_IN_PROLOGUE)
7840 emit_insn (gen_blockage ());
7841 }
7842
7843 /* Count the number of .file directives, so that .loc is up to date. */
7844 int num_source_filenames = 0;
7845
7846 /* Output the textual info surrounding the prologue. */
7847
7848 void
alpha_start_function(FILE * file,const char * fnname,tree decl ATTRIBUTE_UNUSED)7849 alpha_start_function (FILE *file, const char *fnname,
7850 tree decl ATTRIBUTE_UNUSED)
7851 {
7852 unsigned long imask = 0;
7853 unsigned long fmask = 0;
7854 /* Stack space needed for pushing registers clobbered by us. */
7855 HOST_WIDE_INT sa_size;
7856 /* Complete stack size needed. */
7857 unsigned HOST_WIDE_INT frame_size;
7858 /* The maximum debuggable frame size. */
7859 unsigned HOST_WIDE_INT max_frame_size = 1UL << 31;
7860 /* Offset from base reg to register save area. */
7861 HOST_WIDE_INT reg_offset;
7862 char *entry_label = (char *) alloca (strlen (fnname) + 6);
7863 char *tramp_label = (char *) alloca (strlen (fnname) + 6);
7864 int i;
7865
7866 #if TARGET_ABI_OPEN_VMS
7867 vms_start_function (fnname);
7868 #endif
7869
7870 alpha_fnname = fnname;
7871 sa_size = alpha_sa_size ();
7872 frame_size = compute_frame_size (get_frame_size (), sa_size);
7873
7874 if (TARGET_ABI_OPEN_VMS)
7875 reg_offset = 8 + 8 * cfun->machine->uses_condition_handler;
7876 else
7877 reg_offset = ALPHA_ROUND (crtl->outgoing_args_size);
7878
7879 alpha_sa_mask (&imask, &fmask);
7880
7881 /* Issue function start and label. */
7882 if (TARGET_ABI_OPEN_VMS || !flag_inhibit_size_directive)
7883 {
7884 fputs ("\t.ent ", file);
7885 assemble_name (file, fnname);
7886 putc ('\n', file);
7887
7888 /* If the function needs GP, we'll write the "..ng" label there.
7889 Otherwise, do it here. */
7890 if (TARGET_ABI_OSF
7891 && ! alpha_function_needs_gp
7892 && ! cfun->is_thunk)
7893 {
7894 putc ('$', file);
7895 assemble_name (file, fnname);
7896 fputs ("..ng:\n", file);
7897 }
7898 }
7899 /* Nested functions on VMS that are potentially called via trampoline
7900 get a special transfer entry point that loads the called functions
7901 procedure descriptor and static chain. */
7902 if (TARGET_ABI_OPEN_VMS
7903 && !TREE_PUBLIC (decl)
7904 && DECL_CONTEXT (decl)
7905 && !TYPE_P (DECL_CONTEXT (decl))
7906 && TREE_CODE (DECL_CONTEXT (decl)) != TRANSLATION_UNIT_DECL)
7907 {
7908 strcpy (tramp_label, fnname);
7909 strcat (tramp_label, "..tr");
7910 ASM_OUTPUT_LABEL (file, tramp_label);
7911 fprintf (file, "\tldq $1,24($27)\n");
7912 fprintf (file, "\tldq $27,16($27)\n");
7913 }
7914
7915 strcpy (entry_label, fnname);
7916 if (TARGET_ABI_OPEN_VMS)
7917 strcat (entry_label, "..en");
7918
7919 ASM_OUTPUT_LABEL (file, entry_label);
7920 inside_function = TRUE;
7921
7922 if (TARGET_ABI_OPEN_VMS)
7923 fprintf (file, "\t.base $%d\n", vms_base_regno);
7924
7925 if (TARGET_ABI_OSF
7926 && TARGET_IEEE_CONFORMANT
7927 && !flag_inhibit_size_directive)
7928 {
7929 /* Set flags in procedure descriptor to request IEEE-conformant
7930 math-library routines. The value we set it to is PDSC_EXC_IEEE
7931 (/usr/include/pdsc.h). */
7932 fputs ("\t.eflag 48\n", file);
7933 }
7934
7935 /* Set up offsets to alpha virtual arg/local debugging pointer. */
7936 alpha_auto_offset = -frame_size + crtl->args.pretend_args_size;
7937 alpha_arg_offset = -frame_size + 48;
7938
7939 /* Describe our frame. If the frame size is larger than an integer,
7940 print it as zero to avoid an assembler error. We won't be
7941 properly describing such a frame, but that's the best we can do. */
7942 if (TARGET_ABI_OPEN_VMS)
7943 fprintf (file, "\t.frame $%d," HOST_WIDE_INT_PRINT_DEC ",$26,"
7944 HOST_WIDE_INT_PRINT_DEC "\n",
7945 vms_unwind_regno,
7946 frame_size >= (1UL << 31) ? 0 : frame_size,
7947 reg_offset);
7948 else if (!flag_inhibit_size_directive)
7949 fprintf (file, "\t.frame $%d," HOST_WIDE_INT_PRINT_DEC ",$26,%d\n",
7950 (frame_pointer_needed
7951 ? HARD_FRAME_POINTER_REGNUM : STACK_POINTER_REGNUM),
7952 frame_size >= max_frame_size ? 0 : frame_size,
7953 crtl->args.pretend_args_size);
7954
7955 /* Describe which registers were spilled. */
7956 if (TARGET_ABI_OPEN_VMS)
7957 {
7958 if (imask)
7959 /* ??? Does VMS care if mask contains ra? The old code didn't
7960 set it, so I don't here. */
7961 fprintf (file, "\t.mask 0x%lx,0\n", imask & ~(1UL << REG_RA));
7962 if (fmask)
7963 fprintf (file, "\t.fmask 0x%lx,0\n", fmask);
7964 if (alpha_procedure_type == PT_REGISTER)
7965 fprintf (file, "\t.fp_save $%d\n", vms_save_fp_regno);
7966 }
7967 else if (!flag_inhibit_size_directive)
7968 {
7969 if (imask)
7970 {
7971 fprintf (file, "\t.mask 0x%lx," HOST_WIDE_INT_PRINT_DEC "\n", imask,
7972 frame_size >= max_frame_size ? 0 : reg_offset - frame_size);
7973
7974 for (i = 0; i < 32; ++i)
7975 if (imask & (1UL << i))
7976 reg_offset += 8;
7977 }
7978
7979 if (fmask)
7980 fprintf (file, "\t.fmask 0x%lx," HOST_WIDE_INT_PRINT_DEC "\n", fmask,
7981 frame_size >= max_frame_size ? 0 : reg_offset - frame_size);
7982 }
7983
7984 #if TARGET_ABI_OPEN_VMS
7985 /* If a user condition handler has been installed at some point, emit
7986 the procedure descriptor bits to point the Condition Handling Facility
7987 at the indirection wrapper, and state the fp offset at which the user
7988 handler may be found. */
7989 if (cfun->machine->uses_condition_handler)
7990 {
7991 fprintf (file, "\t.handler __gcc_shell_handler\n");
7992 fprintf (file, "\t.handler_data %d\n", VMS_COND_HANDLER_FP_OFFSET);
7993 }
7994
7995 #ifdef TARGET_VMS_CRASH_DEBUG
7996 /* Support of minimal traceback info. */
7997 switch_to_section (readonly_data_section);
7998 fprintf (file, "\t.align 3\n");
7999 assemble_name (file, fnname); fputs ("..na:\n", file);
8000 fputs ("\t.ascii \"", file);
8001 assemble_name (file, fnname);
8002 fputs ("\\0\"\n", file);
8003 switch_to_section (text_section);
8004 #endif
8005 #endif /* TARGET_ABI_OPEN_VMS */
8006 }
8007
8008 /* Emit the .prologue note at the scheduled end of the prologue. */
8009
8010 static void
alpha_output_function_end_prologue(FILE * file)8011 alpha_output_function_end_prologue (FILE *file)
8012 {
8013 if (TARGET_ABI_OPEN_VMS)
8014 fputs ("\t.prologue\n", file);
8015 else if (!flag_inhibit_size_directive)
8016 fprintf (file, "\t.prologue %d\n",
8017 alpha_function_needs_gp || cfun->is_thunk);
8018 }
8019
8020 /* Write function epilogue. */
8021
8022 void
alpha_expand_epilogue(void)8023 alpha_expand_epilogue (void)
8024 {
8025 /* Registers to save. */
8026 unsigned long imask = 0;
8027 unsigned long fmask = 0;
8028 /* Stack space needed for pushing registers clobbered by us. */
8029 HOST_WIDE_INT sa_size;
8030 /* Complete stack size needed. */
8031 HOST_WIDE_INT frame_size;
8032 /* Offset from base reg to register save area. */
8033 HOST_WIDE_INT reg_offset;
8034 int fp_is_frame_pointer, fp_offset;
8035 rtx sa_reg, sa_reg_exp = NULL;
8036 rtx sp_adj1, sp_adj2, mem, reg, insn;
8037 rtx eh_ofs;
8038 rtx cfa_restores = NULL_RTX;
8039 int i;
8040
8041 sa_size = alpha_sa_size ();
8042 frame_size = compute_frame_size (get_frame_size (), sa_size);
8043
8044 if (TARGET_ABI_OPEN_VMS)
8045 {
8046 if (alpha_procedure_type == PT_STACK)
8047 reg_offset = 8 + 8 * cfun->machine->uses_condition_handler;
8048 else
8049 reg_offset = 0;
8050 }
8051 else
8052 reg_offset = ALPHA_ROUND (crtl->outgoing_args_size);
8053
8054 alpha_sa_mask (&imask, &fmask);
8055
8056 fp_is_frame_pointer
8057 = (TARGET_ABI_OPEN_VMS
8058 ? alpha_procedure_type == PT_STACK
8059 : frame_pointer_needed);
8060 fp_offset = 0;
8061 sa_reg = stack_pointer_rtx;
8062
8063 if (crtl->calls_eh_return)
8064 eh_ofs = EH_RETURN_STACKADJ_RTX;
8065 else
8066 eh_ofs = NULL_RTX;
8067
8068 if (sa_size)
8069 {
8070 /* If we have a frame pointer, restore SP from it. */
8071 if (TARGET_ABI_OPEN_VMS
8072 ? vms_unwind_regno == HARD_FRAME_POINTER_REGNUM
8073 : frame_pointer_needed)
8074 emit_move_insn (stack_pointer_rtx, hard_frame_pointer_rtx);
8075
8076 /* Cope with very large offsets to the register save area. */
8077 if (reg_offset + sa_size > 0x8000)
8078 {
8079 int low = ((reg_offset & 0xffff) ^ 0x8000) - 0x8000;
8080 HOST_WIDE_INT bias;
8081
8082 if (low + sa_size <= 0x8000)
8083 bias = reg_offset - low, reg_offset = low;
8084 else
8085 bias = reg_offset, reg_offset = 0;
8086
8087 sa_reg = gen_rtx_REG (DImode, 22);
8088 sa_reg_exp = plus_constant (Pmode, stack_pointer_rtx, bias);
8089
8090 emit_move_insn (sa_reg, sa_reg_exp);
8091 }
8092
8093 /* Restore registers in order, excepting a true frame pointer. */
8094
8095 mem = gen_frame_mem (DImode, plus_constant (Pmode, sa_reg, reg_offset));
8096 reg = gen_rtx_REG (DImode, REG_RA);
8097 emit_move_insn (reg, mem);
8098 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
8099
8100 reg_offset += 8;
8101 imask &= ~(1UL << REG_RA);
8102
8103 for (i = 0; i < 31; ++i)
8104 if (imask & (1UL << i))
8105 {
8106 if (i == HARD_FRAME_POINTER_REGNUM && fp_is_frame_pointer)
8107 fp_offset = reg_offset;
8108 else
8109 {
8110 mem = gen_frame_mem (DImode,
8111 plus_constant (Pmode, sa_reg,
8112 reg_offset));
8113 reg = gen_rtx_REG (DImode, i);
8114 emit_move_insn (reg, mem);
8115 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg,
8116 cfa_restores);
8117 }
8118 reg_offset += 8;
8119 }
8120
8121 for (i = 0; i < 31; ++i)
8122 if (fmask & (1UL << i))
8123 {
8124 mem = gen_frame_mem (DFmode, plus_constant (Pmode, sa_reg,
8125 reg_offset));
8126 reg = gen_rtx_REG (DFmode, i+32);
8127 emit_move_insn (reg, mem);
8128 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
8129 reg_offset += 8;
8130 }
8131 }
8132
8133 if (frame_size || eh_ofs)
8134 {
8135 sp_adj1 = stack_pointer_rtx;
8136
8137 if (eh_ofs)
8138 {
8139 sp_adj1 = gen_rtx_REG (DImode, 23);
8140 emit_move_insn (sp_adj1,
8141 gen_rtx_PLUS (Pmode, stack_pointer_rtx, eh_ofs));
8142 }
8143
8144 /* If the stack size is large, begin computation into a temporary
8145 register so as not to interfere with a potential fp restore,
8146 which must be consecutive with an SP restore. */
8147 if (frame_size < 32768 && !cfun->calls_alloca)
8148 sp_adj2 = GEN_INT (frame_size);
8149 else if (frame_size < 0x40007fffL)
8150 {
8151 int low = ((frame_size & 0xffff) ^ 0x8000) - 0x8000;
8152
8153 sp_adj2 = plus_constant (Pmode, sp_adj1, frame_size - low);
8154 if (sa_reg_exp && rtx_equal_p (sa_reg_exp, sp_adj2))
8155 sp_adj1 = sa_reg;
8156 else
8157 {
8158 sp_adj1 = gen_rtx_REG (DImode, 23);
8159 emit_move_insn (sp_adj1, sp_adj2);
8160 }
8161 sp_adj2 = GEN_INT (low);
8162 }
8163 else
8164 {
8165 rtx tmp = gen_rtx_REG (DImode, 23);
8166 sp_adj2 = alpha_emit_set_const (tmp, DImode, frame_size, 3, false);
8167 if (!sp_adj2)
8168 {
8169 /* We can't drop new things to memory this late, afaik,
8170 so build it up by pieces. */
8171 sp_adj2 = alpha_emit_set_long_const (tmp, frame_size,
8172 -(frame_size < 0));
8173 gcc_assert (sp_adj2);
8174 }
8175 }
8176
8177 /* From now on, things must be in order. So emit blockages. */
8178
8179 /* Restore the frame pointer. */
8180 if (fp_is_frame_pointer)
8181 {
8182 emit_insn (gen_blockage ());
8183 mem = gen_frame_mem (DImode, plus_constant (Pmode, sa_reg,
8184 fp_offset));
8185 emit_move_insn (hard_frame_pointer_rtx, mem);
8186 cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
8187 hard_frame_pointer_rtx, cfa_restores);
8188 }
8189 else if (TARGET_ABI_OPEN_VMS)
8190 {
8191 emit_insn (gen_blockage ());
8192 emit_move_insn (hard_frame_pointer_rtx,
8193 gen_rtx_REG (DImode, vms_save_fp_regno));
8194 cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
8195 hard_frame_pointer_rtx, cfa_restores);
8196 }
8197
8198 /* Restore the stack pointer. */
8199 emit_insn (gen_blockage ());
8200 if (sp_adj2 == const0_rtx)
8201 insn = emit_move_insn (stack_pointer_rtx, sp_adj1);
8202 else
8203 insn = emit_move_insn (stack_pointer_rtx,
8204 gen_rtx_PLUS (DImode, sp_adj1, sp_adj2));
8205 REG_NOTES (insn) = cfa_restores;
8206 add_reg_note (insn, REG_CFA_DEF_CFA, stack_pointer_rtx);
8207 RTX_FRAME_RELATED_P (insn) = 1;
8208 }
8209 else
8210 {
8211 gcc_assert (cfa_restores == NULL);
8212
8213 if (TARGET_ABI_OPEN_VMS && alpha_procedure_type == PT_REGISTER)
8214 {
8215 emit_insn (gen_blockage ());
8216 insn = emit_move_insn (hard_frame_pointer_rtx,
8217 gen_rtx_REG (DImode, vms_save_fp_regno));
8218 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
8219 RTX_FRAME_RELATED_P (insn) = 1;
8220 }
8221 }
8222 }
8223
8224 /* Output the rest of the textual info surrounding the epilogue. */
8225
8226 void
alpha_end_function(FILE * file,const char * fnname,tree decl ATTRIBUTE_UNUSED)8227 alpha_end_function (FILE *file, const char *fnname, tree decl ATTRIBUTE_UNUSED)
8228 {
8229 rtx insn;
8230
8231 /* We output a nop after noreturn calls at the very end of the function to
8232 ensure that the return address always remains in the caller's code range,
8233 as not doing so might confuse unwinding engines. */
8234 insn = get_last_insn ();
8235 if (!INSN_P (insn))
8236 insn = prev_active_insn (insn);
8237 if (insn && CALL_P (insn))
8238 output_asm_insn (get_insn_template (CODE_FOR_nop, NULL), NULL);
8239
8240 #if TARGET_ABI_OPEN_VMS
8241 /* Write the linkage entries. */
8242 alpha_write_linkage (file, fnname);
8243 #endif
8244
8245 /* End the function. */
8246 if (TARGET_ABI_OPEN_VMS
8247 || !flag_inhibit_size_directive)
8248 {
8249 fputs ("\t.end ", file);
8250 assemble_name (file, fnname);
8251 putc ('\n', file);
8252 }
8253 inside_function = FALSE;
8254 }
8255
8256 #if TARGET_ABI_OSF
8257 /* Emit a tail call to FUNCTION after adjusting THIS by DELTA.
8258
8259 In order to avoid the hordes of differences between generated code
8260 with and without TARGET_EXPLICIT_RELOCS, and to avoid duplicating
8261 lots of code loading up large constants, generate rtl and emit it
8262 instead of going straight to text.
8263
8264 Not sure why this idea hasn't been explored before... */
8265
8266 static void
alpha_output_mi_thunk_osf(FILE * file,tree thunk_fndecl ATTRIBUTE_UNUSED,HOST_WIDE_INT delta,HOST_WIDE_INT vcall_offset,tree function)8267 alpha_output_mi_thunk_osf (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
8268 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
8269 tree function)
8270 {
8271 HOST_WIDE_INT hi, lo;
8272 rtx this_rtx, insn, funexp;
8273
8274 /* We always require a valid GP. */
8275 emit_insn (gen_prologue_ldgp ());
8276 emit_note (NOTE_INSN_PROLOGUE_END);
8277
8278 /* Find the "this" pointer. If the function returns a structure,
8279 the structure return pointer is in $16. */
8280 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
8281 this_rtx = gen_rtx_REG (Pmode, 17);
8282 else
8283 this_rtx = gen_rtx_REG (Pmode, 16);
8284
8285 /* Add DELTA. When possible we use ldah+lda. Otherwise load the
8286 entire constant for the add. */
8287 lo = ((delta & 0xffff) ^ 0x8000) - 0x8000;
8288 hi = (((delta - lo) & 0xffffffff) ^ 0x80000000) - 0x80000000;
8289 if (hi + lo == delta)
8290 {
8291 if (hi)
8292 emit_insn (gen_adddi3 (this_rtx, this_rtx, GEN_INT (hi)));
8293 if (lo)
8294 emit_insn (gen_adddi3 (this_rtx, this_rtx, GEN_INT (lo)));
8295 }
8296 else
8297 {
8298 rtx tmp = alpha_emit_set_long_const (gen_rtx_REG (Pmode, 0),
8299 delta, -(delta < 0));
8300 emit_insn (gen_adddi3 (this_rtx, this_rtx, tmp));
8301 }
8302
8303 /* Add a delta stored in the vtable at VCALL_OFFSET. */
8304 if (vcall_offset)
8305 {
8306 rtx tmp, tmp2;
8307
8308 tmp = gen_rtx_REG (Pmode, 0);
8309 emit_move_insn (tmp, gen_rtx_MEM (Pmode, this_rtx));
8310
8311 lo = ((vcall_offset & 0xffff) ^ 0x8000) - 0x8000;
8312 hi = (((vcall_offset - lo) & 0xffffffff) ^ 0x80000000) - 0x80000000;
8313 if (hi + lo == vcall_offset)
8314 {
8315 if (hi)
8316 emit_insn (gen_adddi3 (tmp, tmp, GEN_INT (hi)));
8317 }
8318 else
8319 {
8320 tmp2 = alpha_emit_set_long_const (gen_rtx_REG (Pmode, 1),
8321 vcall_offset, -(vcall_offset < 0));
8322 emit_insn (gen_adddi3 (tmp, tmp, tmp2));
8323 lo = 0;
8324 }
8325 if (lo)
8326 tmp2 = gen_rtx_PLUS (Pmode, tmp, GEN_INT (lo));
8327 else
8328 tmp2 = tmp;
8329 emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp2));
8330
8331 emit_insn (gen_adddi3 (this_rtx, this_rtx, tmp));
8332 }
8333
8334 /* Generate a tail call to the target function. */
8335 if (! TREE_USED (function))
8336 {
8337 assemble_external (function);
8338 TREE_USED (function) = 1;
8339 }
8340 funexp = XEXP (DECL_RTL (function), 0);
8341 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
8342 insn = emit_call_insn (gen_sibcall (funexp, const0_rtx));
8343 SIBLING_CALL_P (insn) = 1;
8344
8345 /* Run just enough of rest_of_compilation to get the insns emitted.
8346 There's not really enough bulk here to make other passes such as
8347 instruction scheduling worth while. Note that use_thunk calls
8348 assemble_start_function and assemble_end_function. */
8349 insn = get_insns ();
8350 shorten_branches (insn);
8351 final_start_function (insn, file, 1);
8352 final (insn, file, 1);
8353 final_end_function ();
8354 }
8355 #endif /* TARGET_ABI_OSF */
8356
8357 /* Debugging support. */
8358
8359 #include "gstab.h"
8360
8361 /* Name of the file containing the current function. */
8362
8363 static const char *current_function_file = "";
8364
8365 /* Offsets to alpha virtual arg/local debugging pointers. */
8366
8367 long alpha_arg_offset;
8368 long alpha_auto_offset;
8369
8370 /* Emit a new filename to a stream. */
8371
8372 void
alpha_output_filename(FILE * stream,const char * name)8373 alpha_output_filename (FILE *stream, const char *name)
8374 {
8375 static int first_time = TRUE;
8376
8377 if (first_time)
8378 {
8379 first_time = FALSE;
8380 ++num_source_filenames;
8381 current_function_file = name;
8382 fprintf (stream, "\t.file\t%d ", num_source_filenames);
8383 output_quoted_string (stream, name);
8384 fprintf (stream, "\n");
8385 }
8386
8387 else if (name != current_function_file
8388 && strcmp (name, current_function_file) != 0)
8389 {
8390 ++num_source_filenames;
8391 current_function_file = name;
8392 fprintf (stream, "\t.file\t%d ", num_source_filenames);
8393
8394 output_quoted_string (stream, name);
8395 fprintf (stream, "\n");
8396 }
8397 }
8398
8399 /* Structure to show the current status of registers and memory. */
8400
8401 struct shadow_summary
8402 {
8403 struct {
8404 unsigned int i : 31; /* Mask of int regs */
8405 unsigned int fp : 31; /* Mask of fp regs */
8406 unsigned int mem : 1; /* mem == imem | fpmem */
8407 } used, defd;
8408 };
8409
8410 /* Summary the effects of expression X on the machine. Update SUM, a pointer
8411 to the summary structure. SET is nonzero if the insn is setting the
8412 object, otherwise zero. */
8413
8414 static void
summarize_insn(rtx x,struct shadow_summary * sum,int set)8415 summarize_insn (rtx x, struct shadow_summary *sum, int set)
8416 {
8417 const char *format_ptr;
8418 int i, j;
8419
8420 if (x == 0)
8421 return;
8422
8423 switch (GET_CODE (x))
8424 {
8425 /* ??? Note that this case would be incorrect if the Alpha had a
8426 ZERO_EXTRACT in SET_DEST. */
8427 case SET:
8428 summarize_insn (SET_SRC (x), sum, 0);
8429 summarize_insn (SET_DEST (x), sum, 1);
8430 break;
8431
8432 case CLOBBER:
8433 summarize_insn (XEXP (x, 0), sum, 1);
8434 break;
8435
8436 case USE:
8437 summarize_insn (XEXP (x, 0), sum, 0);
8438 break;
8439
8440 case ASM_OPERANDS:
8441 for (i = ASM_OPERANDS_INPUT_LENGTH (x) - 1; i >= 0; i--)
8442 summarize_insn (ASM_OPERANDS_INPUT (x, i), sum, 0);
8443 break;
8444
8445 case PARALLEL:
8446 for (i = XVECLEN (x, 0) - 1; i >= 0; i--)
8447 summarize_insn (XVECEXP (x, 0, i), sum, 0);
8448 break;
8449
8450 case SUBREG:
8451 summarize_insn (SUBREG_REG (x), sum, 0);
8452 break;
8453
8454 case REG:
8455 {
8456 int regno = REGNO (x);
8457 unsigned long mask = ((unsigned long) 1) << (regno % 32);
8458
8459 if (regno == 31 || regno == 63)
8460 break;
8461
8462 if (set)
8463 {
8464 if (regno < 32)
8465 sum->defd.i |= mask;
8466 else
8467 sum->defd.fp |= mask;
8468 }
8469 else
8470 {
8471 if (regno < 32)
8472 sum->used.i |= mask;
8473 else
8474 sum->used.fp |= mask;
8475 }
8476 }
8477 break;
8478
8479 case MEM:
8480 if (set)
8481 sum->defd.mem = 1;
8482 else
8483 sum->used.mem = 1;
8484
8485 /* Find the regs used in memory address computation: */
8486 summarize_insn (XEXP (x, 0), sum, 0);
8487 break;
8488
8489 case CONST_INT: case CONST_DOUBLE:
8490 case SYMBOL_REF: case LABEL_REF: case CONST:
8491 case SCRATCH: case ASM_INPUT:
8492 break;
8493
8494 /* Handle common unary and binary ops for efficiency. */
8495 case COMPARE: case PLUS: case MINUS: case MULT: case DIV:
8496 case MOD: case UDIV: case UMOD: case AND: case IOR:
8497 case XOR: case ASHIFT: case ROTATE: case ASHIFTRT: case LSHIFTRT:
8498 case ROTATERT: case SMIN: case SMAX: case UMIN: case UMAX:
8499 case NE: case EQ: case GE: case GT: case LE:
8500 case LT: case GEU: case GTU: case LEU: case LTU:
8501 summarize_insn (XEXP (x, 0), sum, 0);
8502 summarize_insn (XEXP (x, 1), sum, 0);
8503 break;
8504
8505 case NEG: case NOT: case SIGN_EXTEND: case ZERO_EXTEND:
8506 case TRUNCATE: case FLOAT_EXTEND: case FLOAT_TRUNCATE: case FLOAT:
8507 case FIX: case UNSIGNED_FLOAT: case UNSIGNED_FIX: case ABS:
8508 case SQRT: case FFS:
8509 summarize_insn (XEXP (x, 0), sum, 0);
8510 break;
8511
8512 default:
8513 format_ptr = GET_RTX_FORMAT (GET_CODE (x));
8514 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
8515 switch (format_ptr[i])
8516 {
8517 case 'e':
8518 summarize_insn (XEXP (x, i), sum, 0);
8519 break;
8520
8521 case 'E':
8522 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
8523 summarize_insn (XVECEXP (x, i, j), sum, 0);
8524 break;
8525
8526 case 'i':
8527 break;
8528
8529 default:
8530 gcc_unreachable ();
8531 }
8532 }
8533 }
8534
8535 /* Ensure a sufficient number of `trapb' insns are in the code when
8536 the user requests code with a trap precision of functions or
8537 instructions.
8538
8539 In naive mode, when the user requests a trap-precision of
8540 "instruction", a trapb is needed after every instruction that may
8541 generate a trap. This ensures that the code is resumption safe but
8542 it is also slow.
8543
8544 When optimizations are turned on, we delay issuing a trapb as long
8545 as possible. In this context, a trap shadow is the sequence of
8546 instructions that starts with a (potentially) trap generating
8547 instruction and extends to the next trapb or call_pal instruction
8548 (but GCC never generates call_pal by itself). We can delay (and
8549 therefore sometimes omit) a trapb subject to the following
8550 conditions:
8551
8552 (a) On entry to the trap shadow, if any Alpha register or memory
8553 location contains a value that is used as an operand value by some
8554 instruction in the trap shadow (live on entry), then no instruction
8555 in the trap shadow may modify the register or memory location.
8556
8557 (b) Within the trap shadow, the computation of the base register
8558 for a memory load or store instruction may not involve using the
8559 result of an instruction that might generate an UNPREDICTABLE
8560 result.
8561
8562 (c) Within the trap shadow, no register may be used more than once
8563 as a destination register. (This is to make life easier for the
8564 trap-handler.)
8565
8566 (d) The trap shadow may not include any branch instructions. */
8567
8568 static void
alpha_handle_trap_shadows(void)8569 alpha_handle_trap_shadows (void)
8570 {
8571 struct shadow_summary shadow;
8572 int trap_pending, exception_nesting;
8573 rtx i, n;
8574
8575 trap_pending = 0;
8576 exception_nesting = 0;
8577 shadow.used.i = 0;
8578 shadow.used.fp = 0;
8579 shadow.used.mem = 0;
8580 shadow.defd = shadow.used;
8581
8582 for (i = get_insns (); i ; i = NEXT_INSN (i))
8583 {
8584 if (NOTE_P (i))
8585 {
8586 switch (NOTE_KIND (i))
8587 {
8588 case NOTE_INSN_EH_REGION_BEG:
8589 exception_nesting++;
8590 if (trap_pending)
8591 goto close_shadow;
8592 break;
8593
8594 case NOTE_INSN_EH_REGION_END:
8595 exception_nesting--;
8596 if (trap_pending)
8597 goto close_shadow;
8598 break;
8599
8600 case NOTE_INSN_EPILOGUE_BEG:
8601 if (trap_pending && alpha_tp >= ALPHA_TP_FUNC)
8602 goto close_shadow;
8603 break;
8604 }
8605 }
8606 else if (trap_pending)
8607 {
8608 if (alpha_tp == ALPHA_TP_FUNC)
8609 {
8610 if (JUMP_P (i)
8611 && GET_CODE (PATTERN (i)) == RETURN)
8612 goto close_shadow;
8613 }
8614 else if (alpha_tp == ALPHA_TP_INSN)
8615 {
8616 if (optimize > 0)
8617 {
8618 struct shadow_summary sum;
8619
8620 sum.used.i = 0;
8621 sum.used.fp = 0;
8622 sum.used.mem = 0;
8623 sum.defd = sum.used;
8624
8625 switch (GET_CODE (i))
8626 {
8627 case INSN:
8628 /* Annoyingly, get_attr_trap will die on these. */
8629 if (GET_CODE (PATTERN (i)) == USE
8630 || GET_CODE (PATTERN (i)) == CLOBBER)
8631 break;
8632
8633 summarize_insn (PATTERN (i), &sum, 0);
8634
8635 if ((sum.defd.i & shadow.defd.i)
8636 || (sum.defd.fp & shadow.defd.fp))
8637 {
8638 /* (c) would be violated */
8639 goto close_shadow;
8640 }
8641
8642 /* Combine shadow with summary of current insn: */
8643 shadow.used.i |= sum.used.i;
8644 shadow.used.fp |= sum.used.fp;
8645 shadow.used.mem |= sum.used.mem;
8646 shadow.defd.i |= sum.defd.i;
8647 shadow.defd.fp |= sum.defd.fp;
8648 shadow.defd.mem |= sum.defd.mem;
8649
8650 if ((sum.defd.i & shadow.used.i)
8651 || (sum.defd.fp & shadow.used.fp)
8652 || (sum.defd.mem & shadow.used.mem))
8653 {
8654 /* (a) would be violated (also takes care of (b)) */
8655 gcc_assert (get_attr_trap (i) != TRAP_YES
8656 || (!(sum.defd.i & sum.used.i)
8657 && !(sum.defd.fp & sum.used.fp)));
8658
8659 goto close_shadow;
8660 }
8661 break;
8662
8663 case JUMP_INSN:
8664 case CALL_INSN:
8665 case CODE_LABEL:
8666 goto close_shadow;
8667
8668 default:
8669 gcc_unreachable ();
8670 }
8671 }
8672 else
8673 {
8674 close_shadow:
8675 n = emit_insn_before (gen_trapb (), i);
8676 PUT_MODE (n, TImode);
8677 PUT_MODE (i, TImode);
8678 trap_pending = 0;
8679 shadow.used.i = 0;
8680 shadow.used.fp = 0;
8681 shadow.used.mem = 0;
8682 shadow.defd = shadow.used;
8683 }
8684 }
8685 }
8686
8687 if ((exception_nesting > 0 || alpha_tp >= ALPHA_TP_FUNC)
8688 && NONJUMP_INSN_P (i)
8689 && GET_CODE (PATTERN (i)) != USE
8690 && GET_CODE (PATTERN (i)) != CLOBBER
8691 && get_attr_trap (i) == TRAP_YES)
8692 {
8693 if (optimize && !trap_pending)
8694 summarize_insn (PATTERN (i), &shadow, 0);
8695 trap_pending = 1;
8696 }
8697 }
8698 }
8699
8700 /* Alpha can only issue instruction groups simultaneously if they are
8701 suitably aligned. This is very processor-specific. */
8702 /* There are a number of entries in alphaev4_insn_pipe and alphaev5_insn_pipe
8703 that are marked "fake". These instructions do not exist on that target,
8704 but it is possible to see these insns with deranged combinations of
8705 command-line options, such as "-mtune=ev4 -mmax". Instead of aborting,
8706 choose a result at random. */
8707
8708 enum alphaev4_pipe {
8709 EV4_STOP = 0,
8710 EV4_IB0 = 1,
8711 EV4_IB1 = 2,
8712 EV4_IBX = 4
8713 };
8714
8715 enum alphaev5_pipe {
8716 EV5_STOP = 0,
8717 EV5_NONE = 1,
8718 EV5_E01 = 2,
8719 EV5_E0 = 4,
8720 EV5_E1 = 8,
8721 EV5_FAM = 16,
8722 EV5_FA = 32,
8723 EV5_FM = 64
8724 };
8725
8726 static enum alphaev4_pipe
alphaev4_insn_pipe(rtx insn)8727 alphaev4_insn_pipe (rtx insn)
8728 {
8729 if (recog_memoized (insn) < 0)
8730 return EV4_STOP;
8731 if (get_attr_length (insn) != 4)
8732 return EV4_STOP;
8733
8734 switch (get_attr_type (insn))
8735 {
8736 case TYPE_ILD:
8737 case TYPE_LDSYM:
8738 case TYPE_FLD:
8739 case TYPE_LD_L:
8740 return EV4_IBX;
8741
8742 case TYPE_IADD:
8743 case TYPE_ILOG:
8744 case TYPE_ICMOV:
8745 case TYPE_ICMP:
8746 case TYPE_FST:
8747 case TYPE_SHIFT:
8748 case TYPE_IMUL:
8749 case TYPE_FBR:
8750 case TYPE_MVI: /* fake */
8751 return EV4_IB0;
8752
8753 case TYPE_IST:
8754 case TYPE_MISC:
8755 case TYPE_IBR:
8756 case TYPE_JSR:
8757 case TYPE_CALLPAL:
8758 case TYPE_FCPYS:
8759 case TYPE_FCMOV:
8760 case TYPE_FADD:
8761 case TYPE_FDIV:
8762 case TYPE_FMUL:
8763 case TYPE_ST_C:
8764 case TYPE_MB:
8765 case TYPE_FSQRT: /* fake */
8766 case TYPE_FTOI: /* fake */
8767 case TYPE_ITOF: /* fake */
8768 return EV4_IB1;
8769
8770 default:
8771 gcc_unreachable ();
8772 }
8773 }
8774
8775 static enum alphaev5_pipe
alphaev5_insn_pipe(rtx insn)8776 alphaev5_insn_pipe (rtx insn)
8777 {
8778 if (recog_memoized (insn) < 0)
8779 return EV5_STOP;
8780 if (get_attr_length (insn) != 4)
8781 return EV5_STOP;
8782
8783 switch (get_attr_type (insn))
8784 {
8785 case TYPE_ILD:
8786 case TYPE_FLD:
8787 case TYPE_LDSYM:
8788 case TYPE_IADD:
8789 case TYPE_ILOG:
8790 case TYPE_ICMOV:
8791 case TYPE_ICMP:
8792 return EV5_E01;
8793
8794 case TYPE_IST:
8795 case TYPE_FST:
8796 case TYPE_SHIFT:
8797 case TYPE_IMUL:
8798 case TYPE_MISC:
8799 case TYPE_MVI:
8800 case TYPE_LD_L:
8801 case TYPE_ST_C:
8802 case TYPE_MB:
8803 case TYPE_FTOI: /* fake */
8804 case TYPE_ITOF: /* fake */
8805 return EV5_E0;
8806
8807 case TYPE_IBR:
8808 case TYPE_JSR:
8809 case TYPE_CALLPAL:
8810 return EV5_E1;
8811
8812 case TYPE_FCPYS:
8813 return EV5_FAM;
8814
8815 case TYPE_FBR:
8816 case TYPE_FCMOV:
8817 case TYPE_FADD:
8818 case TYPE_FDIV:
8819 case TYPE_FSQRT: /* fake */
8820 return EV5_FA;
8821
8822 case TYPE_FMUL:
8823 return EV5_FM;
8824
8825 default:
8826 gcc_unreachable ();
8827 }
8828 }
8829
8830 /* IN_USE is a mask of the slots currently filled within the insn group.
8831 The mask bits come from alphaev4_pipe above. If EV4_IBX is set, then
8832 the insn in EV4_IB0 can be swapped by the hardware into EV4_IB1.
8833
8834 LEN is, of course, the length of the group in bytes. */
8835
8836 static rtx
alphaev4_next_group(rtx insn,int * pin_use,int * plen)8837 alphaev4_next_group (rtx insn, int *pin_use, int *plen)
8838 {
8839 int len, in_use;
8840
8841 len = in_use = 0;
8842
8843 if (! INSN_P (insn)
8844 || GET_CODE (PATTERN (insn)) == CLOBBER
8845 || GET_CODE (PATTERN (insn)) == USE)
8846 goto next_and_done;
8847
8848 while (1)
8849 {
8850 enum alphaev4_pipe pipe;
8851
8852 pipe = alphaev4_insn_pipe (insn);
8853 switch (pipe)
8854 {
8855 case EV4_STOP:
8856 /* Force complex instructions to start new groups. */
8857 if (in_use)
8858 goto done;
8859
8860 /* If this is a completely unrecognized insn, it's an asm.
8861 We don't know how long it is, so record length as -1 to
8862 signal a needed realignment. */
8863 if (recog_memoized (insn) < 0)
8864 len = -1;
8865 else
8866 len = get_attr_length (insn);
8867 goto next_and_done;
8868
8869 case EV4_IBX:
8870 if (in_use & EV4_IB0)
8871 {
8872 if (in_use & EV4_IB1)
8873 goto done;
8874 in_use |= EV4_IB1;
8875 }
8876 else
8877 in_use |= EV4_IB0 | EV4_IBX;
8878 break;
8879
8880 case EV4_IB0:
8881 if (in_use & EV4_IB0)
8882 {
8883 if (!(in_use & EV4_IBX) || (in_use & EV4_IB1))
8884 goto done;
8885 in_use |= EV4_IB1;
8886 }
8887 in_use |= EV4_IB0;
8888 break;
8889
8890 case EV4_IB1:
8891 if (in_use & EV4_IB1)
8892 goto done;
8893 in_use |= EV4_IB1;
8894 break;
8895
8896 default:
8897 gcc_unreachable ();
8898 }
8899 len += 4;
8900
8901 /* Haifa doesn't do well scheduling branches. */
8902 if (JUMP_P (insn))
8903 goto next_and_done;
8904
8905 next:
8906 insn = next_nonnote_insn (insn);
8907
8908 if (!insn || ! INSN_P (insn))
8909 goto done;
8910
8911 /* Let Haifa tell us where it thinks insn group boundaries are. */
8912 if (GET_MODE (insn) == TImode)
8913 goto done;
8914
8915 if (GET_CODE (insn) == CLOBBER || GET_CODE (insn) == USE)
8916 goto next;
8917 }
8918
8919 next_and_done:
8920 insn = next_nonnote_insn (insn);
8921
8922 done:
8923 *plen = len;
8924 *pin_use = in_use;
8925 return insn;
8926 }
8927
8928 /* IN_USE is a mask of the slots currently filled within the insn group.
8929 The mask bits come from alphaev5_pipe above. If EV5_E01 is set, then
8930 the insn in EV5_E0 can be swapped by the hardware into EV5_E1.
8931
8932 LEN is, of course, the length of the group in bytes. */
8933
8934 static rtx
alphaev5_next_group(rtx insn,int * pin_use,int * plen)8935 alphaev5_next_group (rtx insn, int *pin_use, int *plen)
8936 {
8937 int len, in_use;
8938
8939 len = in_use = 0;
8940
8941 if (! INSN_P (insn)
8942 || GET_CODE (PATTERN (insn)) == CLOBBER
8943 || GET_CODE (PATTERN (insn)) == USE)
8944 goto next_and_done;
8945
8946 while (1)
8947 {
8948 enum alphaev5_pipe pipe;
8949
8950 pipe = alphaev5_insn_pipe (insn);
8951 switch (pipe)
8952 {
8953 case EV5_STOP:
8954 /* Force complex instructions to start new groups. */
8955 if (in_use)
8956 goto done;
8957
8958 /* If this is a completely unrecognized insn, it's an asm.
8959 We don't know how long it is, so record length as -1 to
8960 signal a needed realignment. */
8961 if (recog_memoized (insn) < 0)
8962 len = -1;
8963 else
8964 len = get_attr_length (insn);
8965 goto next_and_done;
8966
8967 /* ??? Most of the places below, we would like to assert never
8968 happen, as it would indicate an error either in Haifa, or
8969 in the scheduling description. Unfortunately, Haifa never
8970 schedules the last instruction of the BB, so we don't have
8971 an accurate TI bit to go off. */
8972 case EV5_E01:
8973 if (in_use & EV5_E0)
8974 {
8975 if (in_use & EV5_E1)
8976 goto done;
8977 in_use |= EV5_E1;
8978 }
8979 else
8980 in_use |= EV5_E0 | EV5_E01;
8981 break;
8982
8983 case EV5_E0:
8984 if (in_use & EV5_E0)
8985 {
8986 if (!(in_use & EV5_E01) || (in_use & EV5_E1))
8987 goto done;
8988 in_use |= EV5_E1;
8989 }
8990 in_use |= EV5_E0;
8991 break;
8992
8993 case EV5_E1:
8994 if (in_use & EV5_E1)
8995 goto done;
8996 in_use |= EV5_E1;
8997 break;
8998
8999 case EV5_FAM:
9000 if (in_use & EV5_FA)
9001 {
9002 if (in_use & EV5_FM)
9003 goto done;
9004 in_use |= EV5_FM;
9005 }
9006 else
9007 in_use |= EV5_FA | EV5_FAM;
9008 break;
9009
9010 case EV5_FA:
9011 if (in_use & EV5_FA)
9012 goto done;
9013 in_use |= EV5_FA;
9014 break;
9015
9016 case EV5_FM:
9017 if (in_use & EV5_FM)
9018 goto done;
9019 in_use |= EV5_FM;
9020 break;
9021
9022 case EV5_NONE:
9023 break;
9024
9025 default:
9026 gcc_unreachable ();
9027 }
9028 len += 4;
9029
9030 /* Haifa doesn't do well scheduling branches. */
9031 /* ??? If this is predicted not-taken, slotting continues, except
9032 that no more IBR, FBR, or JSR insns may be slotted. */
9033 if (JUMP_P (insn))
9034 goto next_and_done;
9035
9036 next:
9037 insn = next_nonnote_insn (insn);
9038
9039 if (!insn || ! INSN_P (insn))
9040 goto done;
9041
9042 /* Let Haifa tell us where it thinks insn group boundaries are. */
9043 if (GET_MODE (insn) == TImode)
9044 goto done;
9045
9046 if (GET_CODE (insn) == CLOBBER || GET_CODE (insn) == USE)
9047 goto next;
9048 }
9049
9050 next_and_done:
9051 insn = next_nonnote_insn (insn);
9052
9053 done:
9054 *plen = len;
9055 *pin_use = in_use;
9056 return insn;
9057 }
9058
9059 static rtx
alphaev4_next_nop(int * pin_use)9060 alphaev4_next_nop (int *pin_use)
9061 {
9062 int in_use = *pin_use;
9063 rtx nop;
9064
9065 if (!(in_use & EV4_IB0))
9066 {
9067 in_use |= EV4_IB0;
9068 nop = gen_nop ();
9069 }
9070 else if ((in_use & (EV4_IBX|EV4_IB1)) == EV4_IBX)
9071 {
9072 in_use |= EV4_IB1;
9073 nop = gen_nop ();
9074 }
9075 else if (TARGET_FP && !(in_use & EV4_IB1))
9076 {
9077 in_use |= EV4_IB1;
9078 nop = gen_fnop ();
9079 }
9080 else
9081 nop = gen_unop ();
9082
9083 *pin_use = in_use;
9084 return nop;
9085 }
9086
9087 static rtx
alphaev5_next_nop(int * pin_use)9088 alphaev5_next_nop (int *pin_use)
9089 {
9090 int in_use = *pin_use;
9091 rtx nop;
9092
9093 if (!(in_use & EV5_E1))
9094 {
9095 in_use |= EV5_E1;
9096 nop = gen_nop ();
9097 }
9098 else if (TARGET_FP && !(in_use & EV5_FA))
9099 {
9100 in_use |= EV5_FA;
9101 nop = gen_fnop ();
9102 }
9103 else if (TARGET_FP && !(in_use & EV5_FM))
9104 {
9105 in_use |= EV5_FM;
9106 nop = gen_fnop ();
9107 }
9108 else
9109 nop = gen_unop ();
9110
9111 *pin_use = in_use;
9112 return nop;
9113 }
9114
9115 /* The instruction group alignment main loop. */
9116
9117 static void
alpha_align_insns(unsigned int max_align,rtx (* next_group)(rtx,int *,int *),rtx (* next_nop)(int *))9118 alpha_align_insns (unsigned int max_align,
9119 rtx (*next_group) (rtx, int *, int *),
9120 rtx (*next_nop) (int *))
9121 {
9122 /* ALIGN is the known alignment for the insn group. */
9123 unsigned int align;
9124 /* OFS is the offset of the current insn in the insn group. */
9125 int ofs;
9126 int prev_in_use, in_use, len, ldgp;
9127 rtx i, next;
9128
9129 /* Let shorten branches care for assigning alignments to code labels. */
9130 shorten_branches (get_insns ());
9131
9132 if (align_functions < 4)
9133 align = 4;
9134 else if ((unsigned int) align_functions < max_align)
9135 align = align_functions;
9136 else
9137 align = max_align;
9138
9139 ofs = prev_in_use = 0;
9140 i = get_insns ();
9141 if (NOTE_P (i))
9142 i = next_nonnote_insn (i);
9143
9144 ldgp = alpha_function_needs_gp ? 8 : 0;
9145
9146 while (i)
9147 {
9148 next = (*next_group) (i, &in_use, &len);
9149
9150 /* When we see a label, resync alignment etc. */
9151 if (LABEL_P (i))
9152 {
9153 unsigned int new_align = 1 << label_to_alignment (i);
9154
9155 if (new_align >= align)
9156 {
9157 align = new_align < max_align ? new_align : max_align;
9158 ofs = 0;
9159 }
9160
9161 else if (ofs & (new_align-1))
9162 ofs = (ofs | (new_align-1)) + 1;
9163 gcc_assert (!len);
9164 }
9165
9166 /* Handle complex instructions special. */
9167 else if (in_use == 0)
9168 {
9169 /* Asms will have length < 0. This is a signal that we have
9170 lost alignment knowledge. Assume, however, that the asm
9171 will not mis-align instructions. */
9172 if (len < 0)
9173 {
9174 ofs = 0;
9175 align = 4;
9176 len = 0;
9177 }
9178 }
9179
9180 /* If the known alignment is smaller than the recognized insn group,
9181 realign the output. */
9182 else if ((int) align < len)
9183 {
9184 unsigned int new_log_align = len > 8 ? 4 : 3;
9185 rtx prev, where;
9186
9187 where = prev = prev_nonnote_insn (i);
9188 if (!where || !LABEL_P (where))
9189 where = i;
9190
9191 /* Can't realign between a call and its gp reload. */
9192 if (! (TARGET_EXPLICIT_RELOCS
9193 && prev && CALL_P (prev)))
9194 {
9195 emit_insn_before (gen_realign (GEN_INT (new_log_align)), where);
9196 align = 1 << new_log_align;
9197 ofs = 0;
9198 }
9199 }
9200
9201 /* We may not insert padding inside the initial ldgp sequence. */
9202 else if (ldgp > 0)
9203 ldgp -= len;
9204
9205 /* If the group won't fit in the same INT16 as the previous,
9206 we need to add padding to keep the group together. Rather
9207 than simply leaving the insn filling to the assembler, we
9208 can make use of the knowledge of what sorts of instructions
9209 were issued in the previous group to make sure that all of
9210 the added nops are really free. */
9211 else if (ofs + len > (int) align)
9212 {
9213 int nop_count = (align - ofs) / 4;
9214 rtx where;
9215
9216 /* Insert nops before labels, branches, and calls to truly merge
9217 the execution of the nops with the previous instruction group. */
9218 where = prev_nonnote_insn (i);
9219 if (where)
9220 {
9221 if (LABEL_P (where))
9222 {
9223 rtx where2 = prev_nonnote_insn (where);
9224 if (where2 && JUMP_P (where2))
9225 where = where2;
9226 }
9227 else if (NONJUMP_INSN_P (where))
9228 where = i;
9229 }
9230 else
9231 where = i;
9232
9233 do
9234 emit_insn_before ((*next_nop)(&prev_in_use), where);
9235 while (--nop_count);
9236 ofs = 0;
9237 }
9238
9239 ofs = (ofs + len) & (align - 1);
9240 prev_in_use = in_use;
9241 i = next;
9242 }
9243 }
9244
9245 /* Insert an unop between sibcall or noreturn function call and GP load. */
9246
9247 static void
alpha_pad_function_end(void)9248 alpha_pad_function_end (void)
9249 {
9250 rtx insn, next;
9251
9252 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
9253 {
9254 if (!CALL_P (insn)
9255 || !(SIBLING_CALL_P (insn)
9256 || find_reg_note (insn, REG_NORETURN, NULL_RTX)))
9257 continue;
9258
9259 /* Make sure we do not split a call and its corresponding
9260 CALL_ARG_LOCATION note. */
9261 next = NEXT_INSN (insn);
9262 if (next == NULL)
9263 continue;
9264 if (BARRIER_P (next))
9265 {
9266 next = NEXT_INSN (next);
9267 if (next == NULL)
9268 continue;
9269 }
9270 if (NOTE_P (next) && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
9271 insn = next;
9272
9273 next = next_active_insn (insn);
9274 if (next)
9275 {
9276 rtx pat = PATTERN (next);
9277
9278 if (GET_CODE (pat) == SET
9279 && GET_CODE (SET_SRC (pat)) == UNSPEC_VOLATILE
9280 && XINT (SET_SRC (pat), 1) == UNSPECV_LDGP1)
9281 emit_insn_after (gen_unop (), insn);
9282 }
9283 }
9284 }
9285
9286 /* Machine dependent reorg pass. */
9287
9288 static void
alpha_reorg(void)9289 alpha_reorg (void)
9290 {
9291 /* Workaround for a linker error that triggers when an exception
9292 handler immediatelly follows a sibcall or a noreturn function.
9293
9294 In the sibcall case:
9295
9296 The instruction stream from an object file:
9297
9298 1d8: 00 00 fb 6b jmp (t12)
9299 1dc: 00 00 ba 27 ldah gp,0(ra)
9300 1e0: 00 00 bd 23 lda gp,0(gp)
9301 1e4: 00 00 7d a7 ldq t12,0(gp)
9302 1e8: 00 40 5b 6b jsr ra,(t12),1ec <__funcZ+0x1ec>
9303
9304 was converted in the final link pass to:
9305
9306 12003aa88: 67 fa ff c3 br 120039428 <...>
9307 12003aa8c: 00 00 fe 2f unop
9308 12003aa90: 00 00 fe 2f unop
9309 12003aa94: 48 83 7d a7 ldq t12,-31928(gp)
9310 12003aa98: 00 40 5b 6b jsr ra,(t12),12003aa9c <__func+0x1ec>
9311
9312 And in the noreturn case:
9313
9314 The instruction stream from an object file:
9315
9316 54: 00 40 5b 6b jsr ra,(t12),58 <__func+0x58>
9317 58: 00 00 ba 27 ldah gp,0(ra)
9318 5c: 00 00 bd 23 lda gp,0(gp)
9319 60: 00 00 7d a7 ldq t12,0(gp)
9320 64: 00 40 5b 6b jsr ra,(t12),68 <__func+0x68>
9321
9322 was converted in the final link pass to:
9323
9324 fdb24: a0 03 40 d3 bsr ra,fe9a8 <_called_func+0x8>
9325 fdb28: 00 00 fe 2f unop
9326 fdb2c: 00 00 fe 2f unop
9327 fdb30: 30 82 7d a7 ldq t12,-32208(gp)
9328 fdb34: 00 40 5b 6b jsr ra,(t12),fdb38 <__func+0x68>
9329
9330 GP load instructions were wrongly cleared by the linker relaxation
9331 pass. This workaround prevents removal of GP loads by inserting
9332 an unop instruction between a sibcall or noreturn function call and
9333 exception handler prologue. */
9334
9335 if (current_function_has_exception_handlers ())
9336 alpha_pad_function_end ();
9337
9338 if (alpha_tp != ALPHA_TP_PROG || flag_exceptions)
9339 alpha_handle_trap_shadows ();
9340
9341 /* Due to the number of extra trapb insns, don't bother fixing up
9342 alignment when trap precision is instruction. Moreover, we can
9343 only do our job when sched2 is run. */
9344 if (optimize && !optimize_size
9345 && alpha_tp != ALPHA_TP_INSN
9346 && flag_schedule_insns_after_reload)
9347 {
9348 if (alpha_tune == PROCESSOR_EV4)
9349 alpha_align_insns (8, alphaev4_next_group, alphaev4_next_nop);
9350 else if (alpha_tune == PROCESSOR_EV5)
9351 alpha_align_insns (16, alphaev5_next_group, alphaev5_next_nop);
9352 }
9353 }
9354
9355 static void
alpha_file_start(void)9356 alpha_file_start (void)
9357 {
9358 default_file_start ();
9359
9360 fputs ("\t.set noreorder\n", asm_out_file);
9361 fputs ("\t.set volatile\n", asm_out_file);
9362 if (TARGET_ABI_OSF)
9363 fputs ("\t.set noat\n", asm_out_file);
9364 if (TARGET_EXPLICIT_RELOCS)
9365 fputs ("\t.set nomacro\n", asm_out_file);
9366 if (TARGET_SUPPORT_ARCH | TARGET_BWX | TARGET_MAX | TARGET_FIX | TARGET_CIX)
9367 {
9368 const char *arch;
9369
9370 if (alpha_cpu == PROCESSOR_EV6 || TARGET_FIX || TARGET_CIX)
9371 arch = "ev6";
9372 else if (TARGET_MAX)
9373 arch = "pca56";
9374 else if (TARGET_BWX)
9375 arch = "ev56";
9376 else if (alpha_cpu == PROCESSOR_EV5)
9377 arch = "ev5";
9378 else
9379 arch = "ev4";
9380
9381 fprintf (asm_out_file, "\t.arch %s\n", arch);
9382 }
9383 }
9384
9385 /* Since we don't have a .dynbss section, we should not allow global
9386 relocations in the .rodata section. */
9387
9388 static int
alpha_elf_reloc_rw_mask(void)9389 alpha_elf_reloc_rw_mask (void)
9390 {
9391 return flag_pic ? 3 : 2;
9392 }
9393
9394 /* Return a section for X. The only special thing we do here is to
9395 honor small data. */
9396
9397 static section *
alpha_elf_select_rtx_section(enum machine_mode mode,rtx x,unsigned HOST_WIDE_INT align)9398 alpha_elf_select_rtx_section (enum machine_mode mode, rtx x,
9399 unsigned HOST_WIDE_INT align)
9400 {
9401 if (TARGET_SMALL_DATA && GET_MODE_SIZE (mode) <= g_switch_value)
9402 /* ??? Consider using mergeable sdata sections. */
9403 return sdata_section;
9404 else
9405 return default_elf_select_rtx_section (mode, x, align);
9406 }
9407
9408 static unsigned int
alpha_elf_section_type_flags(tree decl,const char * name,int reloc)9409 alpha_elf_section_type_flags (tree decl, const char *name, int reloc)
9410 {
9411 unsigned int flags = 0;
9412
9413 if (strcmp (name, ".sdata") == 0
9414 || strncmp (name, ".sdata.", 7) == 0
9415 || strncmp (name, ".gnu.linkonce.s.", 16) == 0
9416 || strcmp (name, ".sbss") == 0
9417 || strncmp (name, ".sbss.", 6) == 0
9418 || strncmp (name, ".gnu.linkonce.sb.", 17) == 0)
9419 flags = SECTION_SMALL;
9420
9421 flags |= default_section_type_flags (decl, name, reloc);
9422 return flags;
9423 }
9424
9425 /* Structure to collect function names for final output in link section. */
9426 /* Note that items marked with GTY can't be ifdef'ed out. */
9427
9428 enum reloc_kind
9429 {
9430 KIND_LINKAGE,
9431 KIND_CODEADDR
9432 };
9433
9434 struct GTY(()) alpha_links
9435 {
9436 rtx func;
9437 rtx linkage;
9438 enum reloc_kind rkind;
9439 };
9440
9441 #if TARGET_ABI_OPEN_VMS
9442
9443 /* Return the VMS argument type corresponding to MODE. */
9444
9445 enum avms_arg_type
alpha_arg_type(enum machine_mode mode)9446 alpha_arg_type (enum machine_mode mode)
9447 {
9448 switch (mode)
9449 {
9450 case SFmode:
9451 return TARGET_FLOAT_VAX ? FF : FS;
9452 case DFmode:
9453 return TARGET_FLOAT_VAX ? FD : FT;
9454 default:
9455 return I64;
9456 }
9457 }
9458
9459 /* Return an rtx for an integer representing the VMS Argument Information
9460 register value. */
9461
9462 rtx
alpha_arg_info_reg_val(CUMULATIVE_ARGS cum)9463 alpha_arg_info_reg_val (CUMULATIVE_ARGS cum)
9464 {
9465 unsigned HOST_WIDE_INT regval = cum.num_args;
9466 int i;
9467
9468 for (i = 0; i < 6; i++)
9469 regval |= ((int) cum.atypes[i]) << (i * 3 + 8);
9470
9471 return GEN_INT (regval);
9472 }
9473
9474
9475 /* Return a SYMBOL_REF representing the reference to the .linkage entry
9476 of function FUNC built for calls made from CFUNDECL. LFLAG is 1 if
9477 this is the reference to the linkage pointer value, 0 if this is the
9478 reference to the function entry value. RFLAG is 1 if this a reduced
9479 reference (code address only), 0 if this is a full reference. */
9480
9481 rtx
alpha_use_linkage(rtx func,bool lflag,bool rflag)9482 alpha_use_linkage (rtx func, bool lflag, bool rflag)
9483 {
9484 struct alpha_links *al = NULL;
9485 const char *name = XSTR (func, 0);
9486
9487 if (cfun->machine->links)
9488 {
9489 splay_tree_node lnode;
9490
9491 /* Is this name already defined? */
9492 lnode = splay_tree_lookup (cfun->machine->links, (splay_tree_key) name);
9493 if (lnode)
9494 al = (struct alpha_links *) lnode->value;
9495 }
9496 else
9497 cfun->machine->links = splay_tree_new_ggc
9498 ((splay_tree_compare_fn) strcmp,
9499 ggc_alloc_splay_tree_str_alpha_links_splay_tree_s,
9500 ggc_alloc_splay_tree_str_alpha_links_splay_tree_node_s);
9501
9502 if (al == NULL)
9503 {
9504 size_t buf_len;
9505 char *linksym;
9506 tree id;
9507
9508 if (name[0] == '*')
9509 name++;
9510
9511 /* Follow transparent alias, as this is used for CRTL translations. */
9512 id = maybe_get_identifier (name);
9513 if (id)
9514 {
9515 while (IDENTIFIER_TRANSPARENT_ALIAS (id))
9516 id = TREE_CHAIN (id);
9517 name = IDENTIFIER_POINTER (id);
9518 }
9519
9520 buf_len = strlen (name) + 8 + 9;
9521 linksym = (char *) alloca (buf_len);
9522 snprintf (linksym, buf_len, "$%d..%s..lk", cfun->funcdef_no, name);
9523
9524 al = ggc_alloc_alpha_links ();
9525 al->func = func;
9526 al->linkage = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (linksym));
9527
9528 splay_tree_insert (cfun->machine->links,
9529 (splay_tree_key) ggc_strdup (name),
9530 (splay_tree_value) al);
9531 }
9532
9533 al->rkind = rflag ? KIND_CODEADDR : KIND_LINKAGE;
9534
9535 if (lflag)
9536 return gen_rtx_MEM (Pmode, plus_constant (Pmode, al->linkage, 8));
9537 else
9538 return al->linkage;
9539 }
9540
9541 static int
alpha_write_one_linkage(splay_tree_node node,void * data)9542 alpha_write_one_linkage (splay_tree_node node, void *data)
9543 {
9544 const char *const name = (const char *) node->key;
9545 struct alpha_links *link = (struct alpha_links *) node->value;
9546 FILE *stream = (FILE *) data;
9547
9548 ASM_OUTPUT_INTERNAL_LABEL (stream, XSTR (link->linkage, 0));
9549 if (link->rkind == KIND_CODEADDR)
9550 {
9551 /* External and used, request code address. */
9552 fprintf (stream, "\t.code_address ");
9553 }
9554 else
9555 {
9556 if (!SYMBOL_REF_EXTERNAL_P (link->func)
9557 && SYMBOL_REF_LOCAL_P (link->func))
9558 {
9559 /* Locally defined, build linkage pair. */
9560 fprintf (stream, "\t.quad %s..en\n", name);
9561 fprintf (stream, "\t.quad ");
9562 }
9563 else
9564 {
9565 /* External, request linkage pair. */
9566 fprintf (stream, "\t.linkage ");
9567 }
9568 }
9569 assemble_name (stream, name);
9570 fputs ("\n", stream);
9571
9572 return 0;
9573 }
9574
9575 static void
alpha_write_linkage(FILE * stream,const char * funname)9576 alpha_write_linkage (FILE *stream, const char *funname)
9577 {
9578 fprintf (stream, "\t.link\n");
9579 fprintf (stream, "\t.align 3\n");
9580 in_section = NULL;
9581
9582 #ifdef TARGET_VMS_CRASH_DEBUG
9583 fputs ("\t.name ", stream);
9584 assemble_name (stream, funname);
9585 fputs ("..na\n", stream);
9586 #endif
9587
9588 ASM_OUTPUT_LABEL (stream, funname);
9589 fprintf (stream, "\t.pdesc ");
9590 assemble_name (stream, funname);
9591 fprintf (stream, "..en,%s\n",
9592 alpha_procedure_type == PT_STACK ? "stack"
9593 : alpha_procedure_type == PT_REGISTER ? "reg" : "null");
9594
9595 if (cfun->machine->links)
9596 {
9597 splay_tree_foreach (cfun->machine->links, alpha_write_one_linkage, stream);
9598 /* splay_tree_delete (func->links); */
9599 }
9600 }
9601
9602 /* Switch to an arbitrary section NAME with attributes as specified
9603 by FLAGS. ALIGN specifies any known alignment requirements for
9604 the section; 0 if the default should be used. */
9605
9606 static void
vms_asm_named_section(const char * name,unsigned int flags,tree decl ATTRIBUTE_UNUSED)9607 vms_asm_named_section (const char *name, unsigned int flags,
9608 tree decl ATTRIBUTE_UNUSED)
9609 {
9610 fputc ('\n', asm_out_file);
9611 fprintf (asm_out_file, ".section\t%s", name);
9612
9613 if (flags & SECTION_DEBUG)
9614 fprintf (asm_out_file, ",NOWRT");
9615
9616 fputc ('\n', asm_out_file);
9617 }
9618
9619 /* Record an element in the table of global constructors. SYMBOL is
9620 a SYMBOL_REF of the function to be called; PRIORITY is a number
9621 between 0 and MAX_INIT_PRIORITY.
9622
9623 Differs from default_ctors_section_asm_out_constructor in that the
9624 width of the .ctors entry is always 64 bits, rather than the 32 bits
9625 used by a normal pointer. */
9626
9627 static void
vms_asm_out_constructor(rtx symbol,int priority ATTRIBUTE_UNUSED)9628 vms_asm_out_constructor (rtx symbol, int priority ATTRIBUTE_UNUSED)
9629 {
9630 switch_to_section (ctors_section);
9631 assemble_align (BITS_PER_WORD);
9632 assemble_integer (symbol, UNITS_PER_WORD, BITS_PER_WORD, 1);
9633 }
9634
9635 static void
vms_asm_out_destructor(rtx symbol,int priority ATTRIBUTE_UNUSED)9636 vms_asm_out_destructor (rtx symbol, int priority ATTRIBUTE_UNUSED)
9637 {
9638 switch_to_section (dtors_section);
9639 assemble_align (BITS_PER_WORD);
9640 assemble_integer (symbol, UNITS_PER_WORD, BITS_PER_WORD, 1);
9641 }
9642 #else
9643 rtx
alpha_use_linkage(rtx func ATTRIBUTE_UNUSED,bool lflag ATTRIBUTE_UNUSED,bool rflag ATTRIBUTE_UNUSED)9644 alpha_use_linkage (rtx func ATTRIBUTE_UNUSED,
9645 bool lflag ATTRIBUTE_UNUSED,
9646 bool rflag ATTRIBUTE_UNUSED)
9647 {
9648 return NULL_RTX;
9649 }
9650
9651 #endif /* TARGET_ABI_OPEN_VMS */
9652
9653 static void
alpha_init_libfuncs(void)9654 alpha_init_libfuncs (void)
9655 {
9656 if (TARGET_ABI_OPEN_VMS)
9657 {
9658 /* Use the VMS runtime library functions for division and
9659 remainder. */
9660 set_optab_libfunc (sdiv_optab, SImode, "OTS$DIV_I");
9661 set_optab_libfunc (sdiv_optab, DImode, "OTS$DIV_L");
9662 set_optab_libfunc (udiv_optab, SImode, "OTS$DIV_UI");
9663 set_optab_libfunc (udiv_optab, DImode, "OTS$DIV_UL");
9664 set_optab_libfunc (smod_optab, SImode, "OTS$REM_I");
9665 set_optab_libfunc (smod_optab, DImode, "OTS$REM_L");
9666 set_optab_libfunc (umod_optab, SImode, "OTS$REM_UI");
9667 set_optab_libfunc (umod_optab, DImode, "OTS$REM_UL");
9668 abort_libfunc = init_one_libfunc ("decc$abort");
9669 memcmp_libfunc = init_one_libfunc ("decc$memcmp");
9670 #ifdef MEM_LIBFUNCS_INIT
9671 MEM_LIBFUNCS_INIT;
9672 #endif
9673 }
9674 }
9675
9676 /* On the Alpha, we use this to disable the floating-point registers
9677 when they don't exist. */
9678
9679 static void
alpha_conditional_register_usage(void)9680 alpha_conditional_register_usage (void)
9681 {
9682 int i;
9683 if (! TARGET_FPREGS)
9684 for (i = 32; i < 63; i++)
9685 fixed_regs[i] = call_used_regs[i] = 1;
9686 }
9687
9688 /* Canonicalize a comparison from one we don't have to one we do have. */
9689
9690 static void
alpha_canonicalize_comparison(int * code,rtx * op0,rtx * op1,bool op0_preserve_value)9691 alpha_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
9692 bool op0_preserve_value)
9693 {
9694 if (!op0_preserve_value
9695 && (*code == GE || *code == GT || *code == GEU || *code == GTU)
9696 && (REG_P (*op1) || *op1 == const0_rtx))
9697 {
9698 rtx tem = *op0;
9699 *op0 = *op1;
9700 *op1 = tem;
9701 *code = (int)swap_condition ((enum rtx_code)*code);
9702 }
9703
9704 if ((*code == LT || *code == LTU)
9705 && CONST_INT_P (*op1) && INTVAL (*op1) == 256)
9706 {
9707 *code = *code == LT ? LE : LEU;
9708 *op1 = GEN_INT (255);
9709 }
9710 }
9711
9712 /* Initialize the GCC target structure. */
9713 #if TARGET_ABI_OPEN_VMS
9714 # undef TARGET_ATTRIBUTE_TABLE
9715 # define TARGET_ATTRIBUTE_TABLE vms_attribute_table
9716 # undef TARGET_CAN_ELIMINATE
9717 # define TARGET_CAN_ELIMINATE alpha_vms_can_eliminate
9718 #endif
9719
9720 #undef TARGET_IN_SMALL_DATA_P
9721 #define TARGET_IN_SMALL_DATA_P alpha_in_small_data_p
9722
9723 #undef TARGET_ASM_ALIGNED_HI_OP
9724 #define TARGET_ASM_ALIGNED_HI_OP "\t.word\t"
9725 #undef TARGET_ASM_ALIGNED_DI_OP
9726 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
9727
9728 /* Default unaligned ops are provided for ELF systems. To get unaligned
9729 data for non-ELF systems, we have to turn off auto alignment. */
9730 #if TARGET_ABI_OPEN_VMS
9731 #undef TARGET_ASM_UNALIGNED_HI_OP
9732 #define TARGET_ASM_UNALIGNED_HI_OP "\t.align 0\n\t.word\t"
9733 #undef TARGET_ASM_UNALIGNED_SI_OP
9734 #define TARGET_ASM_UNALIGNED_SI_OP "\t.align 0\n\t.long\t"
9735 #undef TARGET_ASM_UNALIGNED_DI_OP
9736 #define TARGET_ASM_UNALIGNED_DI_OP "\t.align 0\n\t.quad\t"
9737 #endif
9738
9739 #undef TARGET_ASM_RELOC_RW_MASK
9740 #define TARGET_ASM_RELOC_RW_MASK alpha_elf_reloc_rw_mask
9741 #undef TARGET_ASM_SELECT_RTX_SECTION
9742 #define TARGET_ASM_SELECT_RTX_SECTION alpha_elf_select_rtx_section
9743 #undef TARGET_SECTION_TYPE_FLAGS
9744 #define TARGET_SECTION_TYPE_FLAGS alpha_elf_section_type_flags
9745
9746 #undef TARGET_ASM_FUNCTION_END_PROLOGUE
9747 #define TARGET_ASM_FUNCTION_END_PROLOGUE alpha_output_function_end_prologue
9748
9749 #undef TARGET_INIT_LIBFUNCS
9750 #define TARGET_INIT_LIBFUNCS alpha_init_libfuncs
9751
9752 #undef TARGET_LEGITIMIZE_ADDRESS
9753 #define TARGET_LEGITIMIZE_ADDRESS alpha_legitimize_address
9754 #undef TARGET_MODE_DEPENDENT_ADDRESS_P
9755 #define TARGET_MODE_DEPENDENT_ADDRESS_P alpha_mode_dependent_address_p
9756
9757 #undef TARGET_ASM_FILE_START
9758 #define TARGET_ASM_FILE_START alpha_file_start
9759
9760 #undef TARGET_SCHED_ADJUST_COST
9761 #define TARGET_SCHED_ADJUST_COST alpha_adjust_cost
9762 #undef TARGET_SCHED_ISSUE_RATE
9763 #define TARGET_SCHED_ISSUE_RATE alpha_issue_rate
9764 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
9765 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
9766 alpha_multipass_dfa_lookahead
9767
9768 #undef TARGET_HAVE_TLS
9769 #define TARGET_HAVE_TLS HAVE_AS_TLS
9770
9771 #undef TARGET_BUILTIN_DECL
9772 #define TARGET_BUILTIN_DECL alpha_builtin_decl
9773 #undef TARGET_INIT_BUILTINS
9774 #define TARGET_INIT_BUILTINS alpha_init_builtins
9775 #undef TARGET_EXPAND_BUILTIN
9776 #define TARGET_EXPAND_BUILTIN alpha_expand_builtin
9777 #undef TARGET_FOLD_BUILTIN
9778 #define TARGET_FOLD_BUILTIN alpha_fold_builtin
9779
9780 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
9781 #define TARGET_FUNCTION_OK_FOR_SIBCALL alpha_function_ok_for_sibcall
9782 #undef TARGET_CANNOT_COPY_INSN_P
9783 #define TARGET_CANNOT_COPY_INSN_P alpha_cannot_copy_insn_p
9784 #undef TARGET_LEGITIMATE_CONSTANT_P
9785 #define TARGET_LEGITIMATE_CONSTANT_P alpha_legitimate_constant_p
9786 #undef TARGET_CANNOT_FORCE_CONST_MEM
9787 #define TARGET_CANNOT_FORCE_CONST_MEM alpha_cannot_force_const_mem
9788
9789 #if TARGET_ABI_OSF
9790 #undef TARGET_ASM_OUTPUT_MI_THUNK
9791 #define TARGET_ASM_OUTPUT_MI_THUNK alpha_output_mi_thunk_osf
9792 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
9793 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
9794 #undef TARGET_STDARG_OPTIMIZE_HOOK
9795 #define TARGET_STDARG_OPTIMIZE_HOOK alpha_stdarg_optimize_hook
9796 #endif
9797
9798 /* Use 16-bits anchor. */
9799 #undef TARGET_MIN_ANCHOR_OFFSET
9800 #define TARGET_MIN_ANCHOR_OFFSET -0x7fff - 1
9801 #undef TARGET_MAX_ANCHOR_OFFSET
9802 #define TARGET_MAX_ANCHOR_OFFSET 0x7fff
9803 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
9804 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
9805
9806 #undef TARGET_RTX_COSTS
9807 #define TARGET_RTX_COSTS alpha_rtx_costs
9808 #undef TARGET_ADDRESS_COST
9809 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
9810
9811 #undef TARGET_MACHINE_DEPENDENT_REORG
9812 #define TARGET_MACHINE_DEPENDENT_REORG alpha_reorg
9813
9814 #undef TARGET_PROMOTE_FUNCTION_MODE
9815 #define TARGET_PROMOTE_FUNCTION_MODE default_promote_function_mode_always_promote
9816 #undef TARGET_PROMOTE_PROTOTYPES
9817 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_false
9818 #undef TARGET_RETURN_IN_MEMORY
9819 #define TARGET_RETURN_IN_MEMORY alpha_return_in_memory
9820 #undef TARGET_PASS_BY_REFERENCE
9821 #define TARGET_PASS_BY_REFERENCE alpha_pass_by_reference
9822 #undef TARGET_SETUP_INCOMING_VARARGS
9823 #define TARGET_SETUP_INCOMING_VARARGS alpha_setup_incoming_varargs
9824 #undef TARGET_STRICT_ARGUMENT_NAMING
9825 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
9826 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
9827 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED hook_bool_CUMULATIVE_ARGS_true
9828 #undef TARGET_SPLIT_COMPLEX_ARG
9829 #define TARGET_SPLIT_COMPLEX_ARG alpha_split_complex_arg
9830 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
9831 #define TARGET_GIMPLIFY_VA_ARG_EXPR alpha_gimplify_va_arg
9832 #undef TARGET_ARG_PARTIAL_BYTES
9833 #define TARGET_ARG_PARTIAL_BYTES alpha_arg_partial_bytes
9834 #undef TARGET_FUNCTION_ARG
9835 #define TARGET_FUNCTION_ARG alpha_function_arg
9836 #undef TARGET_FUNCTION_ARG_ADVANCE
9837 #define TARGET_FUNCTION_ARG_ADVANCE alpha_function_arg_advance
9838 #undef TARGET_TRAMPOLINE_INIT
9839 #define TARGET_TRAMPOLINE_INIT alpha_trampoline_init
9840
9841 #undef TARGET_INSTANTIATE_DECLS
9842 #define TARGET_INSTANTIATE_DECLS alpha_instantiate_decls
9843
9844 #undef TARGET_SECONDARY_RELOAD
9845 #define TARGET_SECONDARY_RELOAD alpha_secondary_reload
9846
9847 #undef TARGET_SCALAR_MODE_SUPPORTED_P
9848 #define TARGET_SCALAR_MODE_SUPPORTED_P alpha_scalar_mode_supported_p
9849 #undef TARGET_VECTOR_MODE_SUPPORTED_P
9850 #define TARGET_VECTOR_MODE_SUPPORTED_P alpha_vector_mode_supported_p
9851
9852 #undef TARGET_BUILD_BUILTIN_VA_LIST
9853 #define TARGET_BUILD_BUILTIN_VA_LIST alpha_build_builtin_va_list
9854
9855 #undef TARGET_EXPAND_BUILTIN_VA_START
9856 #define TARGET_EXPAND_BUILTIN_VA_START alpha_va_start
9857
9858 /* The Alpha architecture does not require sequential consistency. See
9859 http://www.cs.umd.edu/~pugh/java/memoryModel/AlphaReordering.html
9860 for an example of how it can be violated in practice. */
9861 #undef TARGET_RELAXED_ORDERING
9862 #define TARGET_RELAXED_ORDERING true
9863
9864 #undef TARGET_OPTION_OVERRIDE
9865 #define TARGET_OPTION_OVERRIDE alpha_option_override
9866
9867 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
9868 #undef TARGET_MANGLE_TYPE
9869 #define TARGET_MANGLE_TYPE alpha_mangle_type
9870 #endif
9871
9872 #undef TARGET_LEGITIMATE_ADDRESS_P
9873 #define TARGET_LEGITIMATE_ADDRESS_P alpha_legitimate_address_p
9874
9875 #undef TARGET_CONDITIONAL_REGISTER_USAGE
9876 #define TARGET_CONDITIONAL_REGISTER_USAGE alpha_conditional_register_usage
9877
9878 #undef TARGET_CANONICALIZE_COMPARISON
9879 #define TARGET_CANONICALIZE_COMPARISON alpha_canonicalize_comparison
9880
9881 struct gcc_target targetm = TARGET_INITIALIZER;
9882
9883
9884 #include "gt-alpha.h"
9885