1 /* Subroutines for insn-output.c for SPARC.
2 Copyright (C) 1987-2013 Free Software Foundation, Inc.
3 Contributed by Michael Tiemann (tiemann@cygnus.com)
4 64-bit SPARC-V9 support by Michael Tiemann, Jim Wilson, and Doug Evans,
5 at Cygnus Support.
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3, or (at your option)
12 any later version.
13
14 GCC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
22
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tm.h"
27 #include "tree.h"
28 #include "rtl.h"
29 #include "regs.h"
30 #include "hard-reg-set.h"
31 #include "insn-config.h"
32 #include "insn-codes.h"
33 #include "conditions.h"
34 #include "output.h"
35 #include "insn-attr.h"
36 #include "flags.h"
37 #include "function.h"
38 #include "except.h"
39 #include "expr.h"
40 #include "optabs.h"
41 #include "recog.h"
42 #include "diagnostic-core.h"
43 #include "ggc.h"
44 #include "tm_p.h"
45 #include "debug.h"
46 #include "target.h"
47 #include "target-def.h"
48 #include "common/common-target.h"
49 #include "gimple.h"
50 #include "langhooks.h"
51 #include "reload.h"
52 #include "params.h"
53 #include "df.h"
54 #include "opts.h"
55 #include "tree-pass.h"
56
57 /* Processor costs */
58
59 struct processor_costs {
60 /* Integer load */
61 const int int_load;
62
63 /* Integer signed load */
64 const int int_sload;
65
66 /* Integer zeroed load */
67 const int int_zload;
68
69 /* Float load */
70 const int float_load;
71
72 /* fmov, fneg, fabs */
73 const int float_move;
74
75 /* fadd, fsub */
76 const int float_plusminus;
77
78 /* fcmp */
79 const int float_cmp;
80
81 /* fmov, fmovr */
82 const int float_cmove;
83
84 /* fmul */
85 const int float_mul;
86
87 /* fdivs */
88 const int float_div_sf;
89
90 /* fdivd */
91 const int float_div_df;
92
93 /* fsqrts */
94 const int float_sqrt_sf;
95
96 /* fsqrtd */
97 const int float_sqrt_df;
98
99 /* umul/smul */
100 const int int_mul;
101
102 /* mulX */
103 const int int_mulX;
104
105 /* integer multiply cost for each bit set past the most
106 significant 3, so the formula for multiply cost becomes:
107
108 if (rs1 < 0)
109 highest_bit = highest_clear_bit(rs1);
110 else
111 highest_bit = highest_set_bit(rs1);
112 if (highest_bit < 3)
113 highest_bit = 3;
114 cost = int_mul{,X} + ((highest_bit - 3) / int_mul_bit_factor);
115
116 A value of zero indicates that the multiply costs is fixed,
117 and not variable. */
118 const int int_mul_bit_factor;
119
120 /* udiv/sdiv */
121 const int int_div;
122
123 /* divX */
124 const int int_divX;
125
126 /* movcc, movr */
127 const int int_cmove;
128
129 /* penalty for shifts, due to scheduling rules etc. */
130 const int shift_penalty;
131 };
132
133 static const
134 struct processor_costs cypress_costs = {
135 COSTS_N_INSNS (2), /* int load */
136 COSTS_N_INSNS (2), /* int signed load */
137 COSTS_N_INSNS (2), /* int zeroed load */
138 COSTS_N_INSNS (2), /* float load */
139 COSTS_N_INSNS (5), /* fmov, fneg, fabs */
140 COSTS_N_INSNS (5), /* fadd, fsub */
141 COSTS_N_INSNS (1), /* fcmp */
142 COSTS_N_INSNS (1), /* fmov, fmovr */
143 COSTS_N_INSNS (7), /* fmul */
144 COSTS_N_INSNS (37), /* fdivs */
145 COSTS_N_INSNS (37), /* fdivd */
146 COSTS_N_INSNS (63), /* fsqrts */
147 COSTS_N_INSNS (63), /* fsqrtd */
148 COSTS_N_INSNS (1), /* imul */
149 COSTS_N_INSNS (1), /* imulX */
150 0, /* imul bit factor */
151 COSTS_N_INSNS (1), /* idiv */
152 COSTS_N_INSNS (1), /* idivX */
153 COSTS_N_INSNS (1), /* movcc/movr */
154 0, /* shift penalty */
155 };
156
157 static const
158 struct processor_costs supersparc_costs = {
159 COSTS_N_INSNS (1), /* int load */
160 COSTS_N_INSNS (1), /* int signed load */
161 COSTS_N_INSNS (1), /* int zeroed load */
162 COSTS_N_INSNS (0), /* float load */
163 COSTS_N_INSNS (3), /* fmov, fneg, fabs */
164 COSTS_N_INSNS (3), /* fadd, fsub */
165 COSTS_N_INSNS (3), /* fcmp */
166 COSTS_N_INSNS (1), /* fmov, fmovr */
167 COSTS_N_INSNS (3), /* fmul */
168 COSTS_N_INSNS (6), /* fdivs */
169 COSTS_N_INSNS (9), /* fdivd */
170 COSTS_N_INSNS (12), /* fsqrts */
171 COSTS_N_INSNS (12), /* fsqrtd */
172 COSTS_N_INSNS (4), /* imul */
173 COSTS_N_INSNS (4), /* imulX */
174 0, /* imul bit factor */
175 COSTS_N_INSNS (4), /* idiv */
176 COSTS_N_INSNS (4), /* idivX */
177 COSTS_N_INSNS (1), /* movcc/movr */
178 1, /* shift penalty */
179 };
180
181 static const
182 struct processor_costs hypersparc_costs = {
183 COSTS_N_INSNS (1), /* int load */
184 COSTS_N_INSNS (1), /* int signed load */
185 COSTS_N_INSNS (1), /* int zeroed load */
186 COSTS_N_INSNS (1), /* float load */
187 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
188 COSTS_N_INSNS (1), /* fadd, fsub */
189 COSTS_N_INSNS (1), /* fcmp */
190 COSTS_N_INSNS (1), /* fmov, fmovr */
191 COSTS_N_INSNS (1), /* fmul */
192 COSTS_N_INSNS (8), /* fdivs */
193 COSTS_N_INSNS (12), /* fdivd */
194 COSTS_N_INSNS (17), /* fsqrts */
195 COSTS_N_INSNS (17), /* fsqrtd */
196 COSTS_N_INSNS (17), /* imul */
197 COSTS_N_INSNS (17), /* imulX */
198 0, /* imul bit factor */
199 COSTS_N_INSNS (17), /* idiv */
200 COSTS_N_INSNS (17), /* idivX */
201 COSTS_N_INSNS (1), /* movcc/movr */
202 0, /* shift penalty */
203 };
204
205 static const
206 struct processor_costs leon_costs = {
207 COSTS_N_INSNS (1), /* int load */
208 COSTS_N_INSNS (1), /* int signed load */
209 COSTS_N_INSNS (1), /* int zeroed load */
210 COSTS_N_INSNS (1), /* float load */
211 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
212 COSTS_N_INSNS (1), /* fadd, fsub */
213 COSTS_N_INSNS (1), /* fcmp */
214 COSTS_N_INSNS (1), /* fmov, fmovr */
215 COSTS_N_INSNS (1), /* fmul */
216 COSTS_N_INSNS (15), /* fdivs */
217 COSTS_N_INSNS (15), /* fdivd */
218 COSTS_N_INSNS (23), /* fsqrts */
219 COSTS_N_INSNS (23), /* fsqrtd */
220 COSTS_N_INSNS (5), /* imul */
221 COSTS_N_INSNS (5), /* imulX */
222 0, /* imul bit factor */
223 COSTS_N_INSNS (5), /* idiv */
224 COSTS_N_INSNS (5), /* idivX */
225 COSTS_N_INSNS (1), /* movcc/movr */
226 0, /* shift penalty */
227 };
228
229 static const
230 struct processor_costs leon3_costs = {
231 COSTS_N_INSNS (1), /* int load */
232 COSTS_N_INSNS (1), /* int signed load */
233 COSTS_N_INSNS (1), /* int zeroed load */
234 COSTS_N_INSNS (1), /* float load */
235 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
236 COSTS_N_INSNS (1), /* fadd, fsub */
237 COSTS_N_INSNS (1), /* fcmp */
238 COSTS_N_INSNS (1), /* fmov, fmovr */
239 COSTS_N_INSNS (1), /* fmul */
240 COSTS_N_INSNS (14), /* fdivs */
241 COSTS_N_INSNS (15), /* fdivd */
242 COSTS_N_INSNS (22), /* fsqrts */
243 COSTS_N_INSNS (23), /* fsqrtd */
244 COSTS_N_INSNS (5), /* imul */
245 COSTS_N_INSNS (5), /* imulX */
246 0, /* imul bit factor */
247 COSTS_N_INSNS (35), /* idiv */
248 COSTS_N_INSNS (35), /* idivX */
249 COSTS_N_INSNS (1), /* movcc/movr */
250 0, /* shift penalty */
251 };
252
253 static const
254 struct processor_costs sparclet_costs = {
255 COSTS_N_INSNS (3), /* int load */
256 COSTS_N_INSNS (3), /* int signed load */
257 COSTS_N_INSNS (1), /* int zeroed load */
258 COSTS_N_INSNS (1), /* float load */
259 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
260 COSTS_N_INSNS (1), /* fadd, fsub */
261 COSTS_N_INSNS (1), /* fcmp */
262 COSTS_N_INSNS (1), /* fmov, fmovr */
263 COSTS_N_INSNS (1), /* fmul */
264 COSTS_N_INSNS (1), /* fdivs */
265 COSTS_N_INSNS (1), /* fdivd */
266 COSTS_N_INSNS (1), /* fsqrts */
267 COSTS_N_INSNS (1), /* fsqrtd */
268 COSTS_N_INSNS (5), /* imul */
269 COSTS_N_INSNS (5), /* imulX */
270 0, /* imul bit factor */
271 COSTS_N_INSNS (5), /* idiv */
272 COSTS_N_INSNS (5), /* idivX */
273 COSTS_N_INSNS (1), /* movcc/movr */
274 0, /* shift penalty */
275 };
276
277 static const
278 struct processor_costs ultrasparc_costs = {
279 COSTS_N_INSNS (2), /* int load */
280 COSTS_N_INSNS (3), /* int signed load */
281 COSTS_N_INSNS (2), /* int zeroed load */
282 COSTS_N_INSNS (2), /* float load */
283 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
284 COSTS_N_INSNS (4), /* fadd, fsub */
285 COSTS_N_INSNS (1), /* fcmp */
286 COSTS_N_INSNS (2), /* fmov, fmovr */
287 COSTS_N_INSNS (4), /* fmul */
288 COSTS_N_INSNS (13), /* fdivs */
289 COSTS_N_INSNS (23), /* fdivd */
290 COSTS_N_INSNS (13), /* fsqrts */
291 COSTS_N_INSNS (23), /* fsqrtd */
292 COSTS_N_INSNS (4), /* imul */
293 COSTS_N_INSNS (4), /* imulX */
294 2, /* imul bit factor */
295 COSTS_N_INSNS (37), /* idiv */
296 COSTS_N_INSNS (68), /* idivX */
297 COSTS_N_INSNS (2), /* movcc/movr */
298 2, /* shift penalty */
299 };
300
301 static const
302 struct processor_costs ultrasparc3_costs = {
303 COSTS_N_INSNS (2), /* int load */
304 COSTS_N_INSNS (3), /* int signed load */
305 COSTS_N_INSNS (3), /* int zeroed load */
306 COSTS_N_INSNS (2), /* float load */
307 COSTS_N_INSNS (3), /* fmov, fneg, fabs */
308 COSTS_N_INSNS (4), /* fadd, fsub */
309 COSTS_N_INSNS (5), /* fcmp */
310 COSTS_N_INSNS (3), /* fmov, fmovr */
311 COSTS_N_INSNS (4), /* fmul */
312 COSTS_N_INSNS (17), /* fdivs */
313 COSTS_N_INSNS (20), /* fdivd */
314 COSTS_N_INSNS (20), /* fsqrts */
315 COSTS_N_INSNS (29), /* fsqrtd */
316 COSTS_N_INSNS (6), /* imul */
317 COSTS_N_INSNS (6), /* imulX */
318 0, /* imul bit factor */
319 COSTS_N_INSNS (40), /* idiv */
320 COSTS_N_INSNS (71), /* idivX */
321 COSTS_N_INSNS (2), /* movcc/movr */
322 0, /* shift penalty */
323 };
324
325 static const
326 struct processor_costs niagara_costs = {
327 COSTS_N_INSNS (3), /* int load */
328 COSTS_N_INSNS (3), /* int signed load */
329 COSTS_N_INSNS (3), /* int zeroed load */
330 COSTS_N_INSNS (9), /* float load */
331 COSTS_N_INSNS (8), /* fmov, fneg, fabs */
332 COSTS_N_INSNS (8), /* fadd, fsub */
333 COSTS_N_INSNS (26), /* fcmp */
334 COSTS_N_INSNS (8), /* fmov, fmovr */
335 COSTS_N_INSNS (29), /* fmul */
336 COSTS_N_INSNS (54), /* fdivs */
337 COSTS_N_INSNS (83), /* fdivd */
338 COSTS_N_INSNS (100), /* fsqrts - not implemented in hardware */
339 COSTS_N_INSNS (100), /* fsqrtd - not implemented in hardware */
340 COSTS_N_INSNS (11), /* imul */
341 COSTS_N_INSNS (11), /* imulX */
342 0, /* imul bit factor */
343 COSTS_N_INSNS (72), /* idiv */
344 COSTS_N_INSNS (72), /* idivX */
345 COSTS_N_INSNS (1), /* movcc/movr */
346 0, /* shift penalty */
347 };
348
349 static const
350 struct processor_costs niagara2_costs = {
351 COSTS_N_INSNS (3), /* int load */
352 COSTS_N_INSNS (3), /* int signed load */
353 COSTS_N_INSNS (3), /* int zeroed load */
354 COSTS_N_INSNS (3), /* float load */
355 COSTS_N_INSNS (6), /* fmov, fneg, fabs */
356 COSTS_N_INSNS (6), /* fadd, fsub */
357 COSTS_N_INSNS (6), /* fcmp */
358 COSTS_N_INSNS (6), /* fmov, fmovr */
359 COSTS_N_INSNS (6), /* fmul */
360 COSTS_N_INSNS (19), /* fdivs */
361 COSTS_N_INSNS (33), /* fdivd */
362 COSTS_N_INSNS (19), /* fsqrts */
363 COSTS_N_INSNS (33), /* fsqrtd */
364 COSTS_N_INSNS (5), /* imul */
365 COSTS_N_INSNS (5), /* imulX */
366 0, /* imul bit factor */
367 COSTS_N_INSNS (26), /* idiv, average of 12 - 41 cycle range */
368 COSTS_N_INSNS (26), /* idivX, average of 12 - 41 cycle range */
369 COSTS_N_INSNS (1), /* movcc/movr */
370 0, /* shift penalty */
371 };
372
373 static const
374 struct processor_costs niagara3_costs = {
375 COSTS_N_INSNS (3), /* int load */
376 COSTS_N_INSNS (3), /* int signed load */
377 COSTS_N_INSNS (3), /* int zeroed load */
378 COSTS_N_INSNS (3), /* float load */
379 COSTS_N_INSNS (9), /* fmov, fneg, fabs */
380 COSTS_N_INSNS (9), /* fadd, fsub */
381 COSTS_N_INSNS (9), /* fcmp */
382 COSTS_N_INSNS (9), /* fmov, fmovr */
383 COSTS_N_INSNS (9), /* fmul */
384 COSTS_N_INSNS (23), /* fdivs */
385 COSTS_N_INSNS (37), /* fdivd */
386 COSTS_N_INSNS (23), /* fsqrts */
387 COSTS_N_INSNS (37), /* fsqrtd */
388 COSTS_N_INSNS (9), /* imul */
389 COSTS_N_INSNS (9), /* imulX */
390 0, /* imul bit factor */
391 COSTS_N_INSNS (31), /* idiv, average of 17 - 45 cycle range */
392 COSTS_N_INSNS (30), /* idivX, average of 16 - 44 cycle range */
393 COSTS_N_INSNS (1), /* movcc/movr */
394 0, /* shift penalty */
395 };
396
397 static const
398 struct processor_costs niagara4_costs = {
399 COSTS_N_INSNS (5), /* int load */
400 COSTS_N_INSNS (5), /* int signed load */
401 COSTS_N_INSNS (5), /* int zeroed load */
402 COSTS_N_INSNS (5), /* float load */
403 COSTS_N_INSNS (11), /* fmov, fneg, fabs */
404 COSTS_N_INSNS (11), /* fadd, fsub */
405 COSTS_N_INSNS (11), /* fcmp */
406 COSTS_N_INSNS (11), /* fmov, fmovr */
407 COSTS_N_INSNS (11), /* fmul */
408 COSTS_N_INSNS (24), /* fdivs */
409 COSTS_N_INSNS (37), /* fdivd */
410 COSTS_N_INSNS (24), /* fsqrts */
411 COSTS_N_INSNS (37), /* fsqrtd */
412 COSTS_N_INSNS (12), /* imul */
413 COSTS_N_INSNS (12), /* imulX */
414 0, /* imul bit factor */
415 COSTS_N_INSNS (50), /* idiv, average of 41 - 60 cycle range */
416 COSTS_N_INSNS (35), /* idivX, average of 26 - 44 cycle range */
417 COSTS_N_INSNS (1), /* movcc/movr */
418 0, /* shift penalty */
419 };
420
421 static const struct processor_costs *sparc_costs = &cypress_costs;
422
423 #ifdef HAVE_AS_RELAX_OPTION
424 /* If 'as' and 'ld' are relaxing tail call insns into branch always, use
425 "or %o7,%g0,X; call Y; or X,%g0,%o7" always, so that it can be optimized.
426 With sethi/jmp, neither 'as' nor 'ld' has an easy way how to find out if
427 somebody does not branch between the sethi and jmp. */
428 #define LEAF_SIBCALL_SLOT_RESERVED_P 1
429 #else
430 #define LEAF_SIBCALL_SLOT_RESERVED_P \
431 ((TARGET_ARCH64 && !TARGET_CM_MEDLOW) || flag_pic)
432 #endif
433
434 /* Vector to say how input registers are mapped to output registers.
435 HARD_FRAME_POINTER_REGNUM cannot be remapped by this function to
436 eliminate it. You must use -fomit-frame-pointer to get that. */
437 char leaf_reg_remap[] =
438 { 0, 1, 2, 3, 4, 5, 6, 7,
439 -1, -1, -1, -1, -1, -1, 14, -1,
440 -1, -1, -1, -1, -1, -1, -1, -1,
441 8, 9, 10, 11, 12, 13, -1, 15,
442
443 32, 33, 34, 35, 36, 37, 38, 39,
444 40, 41, 42, 43, 44, 45, 46, 47,
445 48, 49, 50, 51, 52, 53, 54, 55,
446 56, 57, 58, 59, 60, 61, 62, 63,
447 64, 65, 66, 67, 68, 69, 70, 71,
448 72, 73, 74, 75, 76, 77, 78, 79,
449 80, 81, 82, 83, 84, 85, 86, 87,
450 88, 89, 90, 91, 92, 93, 94, 95,
451 96, 97, 98, 99, 100, 101, 102};
452
453 /* Vector, indexed by hard register number, which contains 1
454 for a register that is allowable in a candidate for leaf
455 function treatment. */
456 char sparc_leaf_regs[] =
457 { 1, 1, 1, 1, 1, 1, 1, 1,
458 0, 0, 0, 0, 0, 0, 1, 0,
459 0, 0, 0, 0, 0, 0, 0, 0,
460 1, 1, 1, 1, 1, 1, 0, 1,
461 1, 1, 1, 1, 1, 1, 1, 1,
462 1, 1, 1, 1, 1, 1, 1, 1,
463 1, 1, 1, 1, 1, 1, 1, 1,
464 1, 1, 1, 1, 1, 1, 1, 1,
465 1, 1, 1, 1, 1, 1, 1, 1,
466 1, 1, 1, 1, 1, 1, 1, 1,
467 1, 1, 1, 1, 1, 1, 1, 1,
468 1, 1, 1, 1, 1, 1, 1, 1,
469 1, 1, 1, 1, 1, 1, 1};
470
471 struct GTY(()) machine_function
472 {
473 /* Size of the frame of the function. */
474 HOST_WIDE_INT frame_size;
475
476 /* Size of the frame of the function minus the register window save area
477 and the outgoing argument area. */
478 HOST_WIDE_INT apparent_frame_size;
479
480 /* Register we pretend the frame pointer is allocated to. Normally, this
481 is %fp, but if we are in a leaf procedure, this is (%sp + offset). We
482 record "offset" separately as it may be too big for (reg + disp). */
483 rtx frame_base_reg;
484 HOST_WIDE_INT frame_base_offset;
485
486 /* Some local-dynamic TLS symbol name. */
487 const char *some_ld_name;
488
489 /* Number of global or FP registers to be saved (as 4-byte quantities). */
490 int n_global_fp_regs;
491
492 /* True if the current function is leaf and uses only leaf regs,
493 so that the SPARC leaf function optimization can be applied.
494 Private version of crtl->uses_only_leaf_regs, see
495 sparc_expand_prologue for the rationale. */
496 int leaf_function_p;
497
498 /* True if the prologue saves local or in registers. */
499 bool save_local_in_regs_p;
500
501 /* True if the data calculated by sparc_expand_prologue are valid. */
502 bool prologue_data_valid_p;
503 };
504
505 #define sparc_frame_size cfun->machine->frame_size
506 #define sparc_apparent_frame_size cfun->machine->apparent_frame_size
507 #define sparc_frame_base_reg cfun->machine->frame_base_reg
508 #define sparc_frame_base_offset cfun->machine->frame_base_offset
509 #define sparc_n_global_fp_regs cfun->machine->n_global_fp_regs
510 #define sparc_leaf_function_p cfun->machine->leaf_function_p
511 #define sparc_save_local_in_regs_p cfun->machine->save_local_in_regs_p
512 #define sparc_prologue_data_valid_p cfun->machine->prologue_data_valid_p
513
514 /* 1 if the next opcode is to be specially indented. */
515 int sparc_indent_opcode = 0;
516
517 static void sparc_option_override (void);
518 static void sparc_init_modes (void);
519 static void scan_record_type (const_tree, int *, int *, int *);
520 static int function_arg_slotno (const CUMULATIVE_ARGS *, enum machine_mode,
521 const_tree, bool, bool, int *, int *);
522
523 static int supersparc_adjust_cost (rtx, rtx, rtx, int);
524 static int hypersparc_adjust_cost (rtx, rtx, rtx, int);
525
526 static void sparc_emit_set_const32 (rtx, rtx);
527 static void sparc_emit_set_const64 (rtx, rtx);
528 static void sparc_output_addr_vec (rtx);
529 static void sparc_output_addr_diff_vec (rtx);
530 static void sparc_output_deferred_case_vectors (void);
531 static bool sparc_legitimate_address_p (enum machine_mode, rtx, bool);
532 static bool sparc_legitimate_constant_p (enum machine_mode, rtx);
533 static rtx sparc_builtin_saveregs (void);
534 static int epilogue_renumber (rtx *, int);
535 static bool sparc_assemble_integer (rtx, unsigned int, int);
536 static int set_extends (rtx);
537 static void sparc_asm_function_prologue (FILE *, HOST_WIDE_INT);
538 static void sparc_asm_function_epilogue (FILE *, HOST_WIDE_INT);
539 #ifdef TARGET_SOLARIS
540 static void sparc_solaris_elf_asm_named_section (const char *, unsigned int,
541 tree) ATTRIBUTE_UNUSED;
542 #endif
543 static int sparc_adjust_cost (rtx, rtx, rtx, int);
544 static int sparc_issue_rate (void);
545 static void sparc_sched_init (FILE *, int, int);
546 static int sparc_use_sched_lookahead (void);
547
548 static void emit_soft_tfmode_libcall (const char *, int, rtx *);
549 static void emit_soft_tfmode_binop (enum rtx_code, rtx *);
550 static void emit_soft_tfmode_unop (enum rtx_code, rtx *);
551 static void emit_soft_tfmode_cvt (enum rtx_code, rtx *);
552 static void emit_hard_tfmode_operation (enum rtx_code, rtx *);
553
554 static bool sparc_function_ok_for_sibcall (tree, tree);
555 static void sparc_init_libfuncs (void);
556 static void sparc_init_builtins (void);
557 static void sparc_vis_init_builtins (void);
558 static rtx sparc_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
559 static tree sparc_fold_builtin (tree, int, tree *, bool);
560 static int sparc_vis_mul8x16 (int, int);
561 static void sparc_handle_vis_mul8x16 (tree *, int, tree, tree, tree);
562 static void sparc_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
563 HOST_WIDE_INT, tree);
564 static bool sparc_can_output_mi_thunk (const_tree, HOST_WIDE_INT,
565 HOST_WIDE_INT, const_tree);
566 static struct machine_function * sparc_init_machine_status (void);
567 static bool sparc_cannot_force_const_mem (enum machine_mode, rtx);
568 static rtx sparc_tls_get_addr (void);
569 static rtx sparc_tls_got (void);
570 static const char *get_some_local_dynamic_name (void);
571 static int get_some_local_dynamic_name_1 (rtx *, void *);
572 static int sparc_register_move_cost (enum machine_mode,
573 reg_class_t, reg_class_t);
574 static bool sparc_rtx_costs (rtx, int, int, int, int *, bool);
575 static rtx sparc_function_value (const_tree, const_tree, bool);
576 static rtx sparc_libcall_value (enum machine_mode, const_rtx);
577 static bool sparc_function_value_regno_p (const unsigned int);
578 static rtx sparc_struct_value_rtx (tree, int);
579 static enum machine_mode sparc_promote_function_mode (const_tree, enum machine_mode,
580 int *, const_tree, int);
581 static bool sparc_return_in_memory (const_tree, const_tree);
582 static bool sparc_strict_argument_naming (cumulative_args_t);
583 static void sparc_va_start (tree, rtx);
584 static tree sparc_gimplify_va_arg (tree, tree, gimple_seq *, gimple_seq *);
585 static bool sparc_vector_mode_supported_p (enum machine_mode);
586 static bool sparc_tls_referenced_p (rtx);
587 static rtx sparc_legitimize_tls_address (rtx);
588 static rtx sparc_legitimize_pic_address (rtx, rtx);
589 static rtx sparc_legitimize_address (rtx, rtx, enum machine_mode);
590 static rtx sparc_delegitimize_address (rtx);
591 static bool sparc_mode_dependent_address_p (const_rtx, addr_space_t);
592 static bool sparc_pass_by_reference (cumulative_args_t,
593 enum machine_mode, const_tree, bool);
594 static void sparc_function_arg_advance (cumulative_args_t,
595 enum machine_mode, const_tree, bool);
596 static rtx sparc_function_arg_1 (cumulative_args_t,
597 enum machine_mode, const_tree, bool, bool);
598 static rtx sparc_function_arg (cumulative_args_t,
599 enum machine_mode, const_tree, bool);
600 static rtx sparc_function_incoming_arg (cumulative_args_t,
601 enum machine_mode, const_tree, bool);
602 static unsigned int sparc_function_arg_boundary (enum machine_mode,
603 const_tree);
604 static int sparc_arg_partial_bytes (cumulative_args_t,
605 enum machine_mode, tree, bool);
606 static void sparc_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
607 static void sparc_file_end (void);
608 static bool sparc_frame_pointer_required (void);
609 static bool sparc_can_eliminate (const int, const int);
610 static rtx sparc_builtin_setjmp_frame_value (void);
611 static void sparc_conditional_register_usage (void);
612 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
613 static const char *sparc_mangle_type (const_tree);
614 #endif
615 static void sparc_trampoline_init (rtx, tree, rtx);
616 static enum machine_mode sparc_preferred_simd_mode (enum machine_mode);
617 static reg_class_t sparc_preferred_reload_class (rtx x, reg_class_t rclass);
618 static bool sparc_print_operand_punct_valid_p (unsigned char);
619 static void sparc_print_operand (FILE *, rtx, int);
620 static void sparc_print_operand_address (FILE *, rtx);
621 static reg_class_t sparc_secondary_reload (bool, rtx, reg_class_t,
622 enum machine_mode,
623 secondary_reload_info *);
624
625 #ifdef SUBTARGET_ATTRIBUTE_TABLE
626 /* Table of valid machine attributes. */
627 static const struct attribute_spec sparc_attribute_table[] =
628 {
629 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
630 do_diagnostic } */
631 SUBTARGET_ATTRIBUTE_TABLE,
632 { NULL, 0, 0, false, false, false, NULL, false }
633 };
634 #endif
635
636 /* Option handling. */
637
638 /* Parsed value. */
639 enum cmodel sparc_cmodel;
640
641 char sparc_hard_reg_printed[8];
642
643 /* Initialize the GCC target structure. */
644
645 /* The default is to use .half rather than .short for aligned HI objects. */
646 #undef TARGET_ASM_ALIGNED_HI_OP
647 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
648
649 #undef TARGET_ASM_UNALIGNED_HI_OP
650 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uahalf\t"
651 #undef TARGET_ASM_UNALIGNED_SI_OP
652 #define TARGET_ASM_UNALIGNED_SI_OP "\t.uaword\t"
653 #undef TARGET_ASM_UNALIGNED_DI_OP
654 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaxword\t"
655
656 /* The target hook has to handle DI-mode values. */
657 #undef TARGET_ASM_INTEGER
658 #define TARGET_ASM_INTEGER sparc_assemble_integer
659
660 #undef TARGET_ASM_FUNCTION_PROLOGUE
661 #define TARGET_ASM_FUNCTION_PROLOGUE sparc_asm_function_prologue
662 #undef TARGET_ASM_FUNCTION_EPILOGUE
663 #define TARGET_ASM_FUNCTION_EPILOGUE sparc_asm_function_epilogue
664
665 #undef TARGET_SCHED_ADJUST_COST
666 #define TARGET_SCHED_ADJUST_COST sparc_adjust_cost
667 #undef TARGET_SCHED_ISSUE_RATE
668 #define TARGET_SCHED_ISSUE_RATE sparc_issue_rate
669 #undef TARGET_SCHED_INIT
670 #define TARGET_SCHED_INIT sparc_sched_init
671 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
672 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD sparc_use_sched_lookahead
673
674 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
675 #define TARGET_FUNCTION_OK_FOR_SIBCALL sparc_function_ok_for_sibcall
676
677 #undef TARGET_INIT_LIBFUNCS
678 #define TARGET_INIT_LIBFUNCS sparc_init_libfuncs
679 #undef TARGET_INIT_BUILTINS
680 #define TARGET_INIT_BUILTINS sparc_init_builtins
681
682 #undef TARGET_LEGITIMIZE_ADDRESS
683 #define TARGET_LEGITIMIZE_ADDRESS sparc_legitimize_address
684 #undef TARGET_DELEGITIMIZE_ADDRESS
685 #define TARGET_DELEGITIMIZE_ADDRESS sparc_delegitimize_address
686 #undef TARGET_MODE_DEPENDENT_ADDRESS_P
687 #define TARGET_MODE_DEPENDENT_ADDRESS_P sparc_mode_dependent_address_p
688
689 #undef TARGET_EXPAND_BUILTIN
690 #define TARGET_EXPAND_BUILTIN sparc_expand_builtin
691 #undef TARGET_FOLD_BUILTIN
692 #define TARGET_FOLD_BUILTIN sparc_fold_builtin
693
694 #if TARGET_TLS
695 #undef TARGET_HAVE_TLS
696 #define TARGET_HAVE_TLS true
697 #endif
698
699 #undef TARGET_CANNOT_FORCE_CONST_MEM
700 #define TARGET_CANNOT_FORCE_CONST_MEM sparc_cannot_force_const_mem
701
702 #undef TARGET_ASM_OUTPUT_MI_THUNK
703 #define TARGET_ASM_OUTPUT_MI_THUNK sparc_output_mi_thunk
704 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
705 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK sparc_can_output_mi_thunk
706
707 #undef TARGET_RTX_COSTS
708 #define TARGET_RTX_COSTS sparc_rtx_costs
709 #undef TARGET_ADDRESS_COST
710 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
711 #undef TARGET_REGISTER_MOVE_COST
712 #define TARGET_REGISTER_MOVE_COST sparc_register_move_cost
713
714 #undef TARGET_PROMOTE_FUNCTION_MODE
715 #define TARGET_PROMOTE_FUNCTION_MODE sparc_promote_function_mode
716
717 #undef TARGET_FUNCTION_VALUE
718 #define TARGET_FUNCTION_VALUE sparc_function_value
719 #undef TARGET_LIBCALL_VALUE
720 #define TARGET_LIBCALL_VALUE sparc_libcall_value
721 #undef TARGET_FUNCTION_VALUE_REGNO_P
722 #define TARGET_FUNCTION_VALUE_REGNO_P sparc_function_value_regno_p
723
724 #undef TARGET_STRUCT_VALUE_RTX
725 #define TARGET_STRUCT_VALUE_RTX sparc_struct_value_rtx
726 #undef TARGET_RETURN_IN_MEMORY
727 #define TARGET_RETURN_IN_MEMORY sparc_return_in_memory
728 #undef TARGET_MUST_PASS_IN_STACK
729 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
730 #undef TARGET_PASS_BY_REFERENCE
731 #define TARGET_PASS_BY_REFERENCE sparc_pass_by_reference
732 #undef TARGET_ARG_PARTIAL_BYTES
733 #define TARGET_ARG_PARTIAL_BYTES sparc_arg_partial_bytes
734 #undef TARGET_FUNCTION_ARG_ADVANCE
735 #define TARGET_FUNCTION_ARG_ADVANCE sparc_function_arg_advance
736 #undef TARGET_FUNCTION_ARG
737 #define TARGET_FUNCTION_ARG sparc_function_arg
738 #undef TARGET_FUNCTION_INCOMING_ARG
739 #define TARGET_FUNCTION_INCOMING_ARG sparc_function_incoming_arg
740 #undef TARGET_FUNCTION_ARG_BOUNDARY
741 #define TARGET_FUNCTION_ARG_BOUNDARY sparc_function_arg_boundary
742
743 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
744 #define TARGET_EXPAND_BUILTIN_SAVEREGS sparc_builtin_saveregs
745 #undef TARGET_STRICT_ARGUMENT_NAMING
746 #define TARGET_STRICT_ARGUMENT_NAMING sparc_strict_argument_naming
747
748 #undef TARGET_EXPAND_BUILTIN_VA_START
749 #define TARGET_EXPAND_BUILTIN_VA_START sparc_va_start
750 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
751 #define TARGET_GIMPLIFY_VA_ARG_EXPR sparc_gimplify_va_arg
752
753 #undef TARGET_VECTOR_MODE_SUPPORTED_P
754 #define TARGET_VECTOR_MODE_SUPPORTED_P sparc_vector_mode_supported_p
755
756 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
757 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE sparc_preferred_simd_mode
758
759 #ifdef SUBTARGET_INSERT_ATTRIBUTES
760 #undef TARGET_INSERT_ATTRIBUTES
761 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
762 #endif
763
764 #ifdef SUBTARGET_ATTRIBUTE_TABLE
765 #undef TARGET_ATTRIBUTE_TABLE
766 #define TARGET_ATTRIBUTE_TABLE sparc_attribute_table
767 #endif
768
769 #undef TARGET_RELAXED_ORDERING
770 #define TARGET_RELAXED_ORDERING SPARC_RELAXED_ORDERING
771
772 #undef TARGET_OPTION_OVERRIDE
773 #define TARGET_OPTION_OVERRIDE sparc_option_override
774
775 #if TARGET_GNU_TLS && defined(HAVE_AS_SPARC_UA_PCREL)
776 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
777 #define TARGET_ASM_OUTPUT_DWARF_DTPREL sparc_output_dwarf_dtprel
778 #endif
779
780 #undef TARGET_ASM_FILE_END
781 #define TARGET_ASM_FILE_END sparc_file_end
782
783 #undef TARGET_FRAME_POINTER_REQUIRED
784 #define TARGET_FRAME_POINTER_REQUIRED sparc_frame_pointer_required
785
786 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
787 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE sparc_builtin_setjmp_frame_value
788
789 #undef TARGET_CAN_ELIMINATE
790 #define TARGET_CAN_ELIMINATE sparc_can_eliminate
791
792 #undef TARGET_PREFERRED_RELOAD_CLASS
793 #define TARGET_PREFERRED_RELOAD_CLASS sparc_preferred_reload_class
794
795 #undef TARGET_SECONDARY_RELOAD
796 #define TARGET_SECONDARY_RELOAD sparc_secondary_reload
797
798 #undef TARGET_CONDITIONAL_REGISTER_USAGE
799 #define TARGET_CONDITIONAL_REGISTER_USAGE sparc_conditional_register_usage
800
801 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
802 #undef TARGET_MANGLE_TYPE
803 #define TARGET_MANGLE_TYPE sparc_mangle_type
804 #endif
805
806 #undef TARGET_LEGITIMATE_ADDRESS_P
807 #define TARGET_LEGITIMATE_ADDRESS_P sparc_legitimate_address_p
808
809 #undef TARGET_LEGITIMATE_CONSTANT_P
810 #define TARGET_LEGITIMATE_CONSTANT_P sparc_legitimate_constant_p
811
812 #undef TARGET_TRAMPOLINE_INIT
813 #define TARGET_TRAMPOLINE_INIT sparc_trampoline_init
814
815 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
816 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P sparc_print_operand_punct_valid_p
817 #undef TARGET_PRINT_OPERAND
818 #define TARGET_PRINT_OPERAND sparc_print_operand
819 #undef TARGET_PRINT_OPERAND_ADDRESS
820 #define TARGET_PRINT_OPERAND_ADDRESS sparc_print_operand_address
821
822 /* The value stored by LDSTUB. */
823 #undef TARGET_ATOMIC_TEST_AND_SET_TRUEVAL
824 #define TARGET_ATOMIC_TEST_AND_SET_TRUEVAL 0xff
825
826 struct gcc_target targetm = TARGET_INITIALIZER;
827
828 /* Return the memory reference contained in X if any, zero otherwise. */
829
830 static rtx
mem_ref(rtx x)831 mem_ref (rtx x)
832 {
833 if (GET_CODE (x) == SIGN_EXTEND || GET_CODE (x) == ZERO_EXTEND)
834 x = XEXP (x, 0);
835
836 if (MEM_P (x))
837 return x;
838
839 return NULL_RTX;
840 }
841
842 /* We use a machine specific pass to enable workarounds for errata.
843 We need to have the (essentially) final form of the insn stream in order
844 to properly detect the various hazards. Therefore, this machine specific
845 pass runs as late as possible. The pass is inserted in the pass pipeline
846 at the end of sparc_option_override. */
847
848 static bool
sparc_gate_work_around_errata(void)849 sparc_gate_work_around_errata (void)
850 {
851 /* The only errata we handle are those of the AT697F and UT699. */
852 return sparc_fix_at697f != 0 || sparc_fix_ut699 != 0;
853 }
854
855 static unsigned int
sparc_do_work_around_errata(void)856 sparc_do_work_around_errata (void)
857 {
858 rtx insn, next;
859
860 /* Force all instructions to be split into their final form. */
861 split_all_insns_noflow ();
862
863 /* Now look for specific patterns in the insn stream. */
864 for (insn = get_insns (); insn; insn = next)
865 {
866 bool insert_nop = false;
867 rtx set;
868
869 /* Look into the instruction in a delay slot. */
870 if (NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == SEQUENCE)
871 insn = XVECEXP (PATTERN (insn), 0, 1);
872
873 /* Look for a single-word load into an odd-numbered FP register. */
874 if (sparc_fix_at697f
875 && NONJUMP_INSN_P (insn)
876 && (set = single_set (insn)) != NULL_RTX
877 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
878 && MEM_P (SET_SRC (set))
879 && REG_P (SET_DEST (set))
880 && REGNO (SET_DEST (set)) > 31
881 && REGNO (SET_DEST (set)) % 2 != 0)
882 {
883 /* The wrong dependency is on the enclosing double register. */
884 const unsigned int x = REGNO (SET_DEST (set)) - 1;
885 unsigned int src1, src2, dest;
886 int code;
887
888 next = next_active_insn (insn);
889 if (!next)
890 break;
891 /* If the insn is a branch, then it cannot be problematic. */
892 if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE)
893 continue;
894
895 extract_insn (next);
896 code = INSN_CODE (next);
897
898 switch (code)
899 {
900 case CODE_FOR_adddf3:
901 case CODE_FOR_subdf3:
902 case CODE_FOR_muldf3:
903 case CODE_FOR_divdf3:
904 dest = REGNO (recog_data.operand[0]);
905 src1 = REGNO (recog_data.operand[1]);
906 src2 = REGNO (recog_data.operand[2]);
907 if (src1 != src2)
908 {
909 /* Case [1-4]:
910 ld [address], %fx+1
911 FPOPd %f{x,y}, %f{y,x}, %f{x,y} */
912 if ((src1 == x || src2 == x)
913 && (dest == src1 || dest == src2))
914 insert_nop = true;
915 }
916 else
917 {
918 /* Case 5:
919 ld [address], %fx+1
920 FPOPd %fx, %fx, %fx */
921 if (src1 == x
922 && dest == src1
923 && (code == CODE_FOR_adddf3 || code == CODE_FOR_muldf3))
924 insert_nop = true;
925 }
926 break;
927
928 case CODE_FOR_sqrtdf2:
929 dest = REGNO (recog_data.operand[0]);
930 src1 = REGNO (recog_data.operand[1]);
931 /* Case 6:
932 ld [address], %fx+1
933 fsqrtd %fx, %fx */
934 if (src1 == x && dest == src1)
935 insert_nop = true;
936 break;
937
938 default:
939 break;
940 }
941 }
942
943 /* Look for a single-word load into an integer register. */
944 else if (sparc_fix_ut699
945 && NONJUMP_INSN_P (insn)
946 && (set = single_set (insn)) != NULL_RTX
947 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) <= 4
948 && mem_ref (SET_SRC (set)) != NULL_RTX
949 && REG_P (SET_DEST (set))
950 && REGNO (SET_DEST (set)) < 32)
951 {
952 /* There is no problem if the second memory access has a data
953 dependency on the first single-cycle load. */
954 rtx x = SET_DEST (set);
955
956 next = next_active_insn (insn);
957 if (!next)
958 break;
959 /* If the insn is a branch, then it cannot be problematic. */
960 if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE)
961 continue;
962
963 /* Look for a second memory access to/from an integer register. */
964 if ((set = single_set (next)) != NULL_RTX)
965 {
966 rtx src = SET_SRC (set);
967 rtx dest = SET_DEST (set);
968 rtx mem;
969
970 /* LDD is affected. */
971 if ((mem = mem_ref (src)) != NULL_RTX
972 && REG_P (dest)
973 && REGNO (dest) < 32
974 && !reg_mentioned_p (x, XEXP (mem, 0)))
975 insert_nop = true;
976
977 /* STD is *not* affected. */
978 else if (MEM_P (dest)
979 && GET_MODE_SIZE (GET_MODE (dest)) <= 4
980 && (src == CONST0_RTX (GET_MODE (dest))
981 || (REG_P (src)
982 && REGNO (src) < 32
983 && REGNO (src) != REGNO (x)))
984 && !reg_mentioned_p (x, XEXP (dest, 0)))
985 insert_nop = true;
986 }
987 }
988
989 /* Look for a single-word load/operation into an FP register. */
990 else if (sparc_fix_ut699
991 && NONJUMP_INSN_P (insn)
992 && (set = single_set (insn)) != NULL_RTX
993 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
994 && REG_P (SET_DEST (set))
995 && REGNO (SET_DEST (set)) > 31)
996 {
997 /* Number of instructions in the problematic window. */
998 const int n_insns = 4;
999 /* The problematic combination is with the sibling FP register. */
1000 const unsigned int x = REGNO (SET_DEST (set));
1001 const unsigned int y = x ^ 1;
1002 rtx after;
1003 int i;
1004
1005 next = next_active_insn (insn);
1006 if (!next)
1007 break;
1008 /* If the insn is a branch, then it cannot be problematic. */
1009 if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE)
1010 continue;
1011
1012 /* Look for a second load/operation into the sibling FP register. */
1013 if (!((set = single_set (next)) != NULL_RTX
1014 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
1015 && REG_P (SET_DEST (set))
1016 && REGNO (SET_DEST (set)) == y))
1017 continue;
1018
1019 /* Look for a (possible) store from the FP register in the next N
1020 instructions, but bail out if it is again modified or if there
1021 is a store from the sibling FP register before this store. */
1022 for (after = next, i = 0; i < n_insns; i++)
1023 {
1024 bool branch_p;
1025
1026 after = next_active_insn (after);
1027 if (!after)
1028 break;
1029
1030 /* This is a branch with an empty delay slot. */
1031 if (!NONJUMP_INSN_P (after))
1032 {
1033 if (++i == n_insns)
1034 break;
1035 branch_p = true;
1036 after = NULL_RTX;
1037 }
1038 /* This is a branch with a filled delay slot. */
1039 else if (GET_CODE (PATTERN (after)) == SEQUENCE)
1040 {
1041 if (++i == n_insns)
1042 break;
1043 branch_p = true;
1044 after = XVECEXP (PATTERN (after), 0, 1);
1045 }
1046 /* This is a regular instruction. */
1047 else
1048 branch_p = false;
1049
1050 if (after && (set = single_set (after)) != NULL_RTX)
1051 {
1052 const rtx src = SET_SRC (set);
1053 const rtx dest = SET_DEST (set);
1054 const unsigned int size = GET_MODE_SIZE (GET_MODE (dest));
1055
1056 /* If the FP register is again modified before the store,
1057 then the store isn't affected. */
1058 if (REG_P (dest)
1059 && (REGNO (dest) == x
1060 || (REGNO (dest) == y && size == 8)))
1061 break;
1062
1063 if (MEM_P (dest) && REG_P (src))
1064 {
1065 /* If there is a store from the sibling FP register
1066 before the store, then the store is not affected. */
1067 if (REGNO (src) == y || (REGNO (src) == x && size == 8))
1068 break;
1069
1070 /* Otherwise, the store is affected. */
1071 if (REGNO (src) == x && size == 4)
1072 {
1073 insert_nop = true;
1074 break;
1075 }
1076 }
1077 }
1078
1079 /* If we have a branch in the first M instructions, then we
1080 cannot see the (M+2)th instruction so we play safe. */
1081 if (branch_p && i <= (n_insns - 2))
1082 {
1083 insert_nop = true;
1084 break;
1085 }
1086 }
1087 }
1088
1089 else
1090 next = NEXT_INSN (insn);
1091
1092 if (insert_nop)
1093 emit_insn_before (gen_nop (), next);
1094 }
1095
1096 return 0;
1097 }
1098
1099 struct rtl_opt_pass pass_work_around_errata =
1100 {
1101 {
1102 RTL_PASS,
1103 "errata", /* name */
1104 OPTGROUP_NONE, /* optinfo_flags */
1105 sparc_gate_work_around_errata, /* gate */
1106 sparc_do_work_around_errata, /* execute */
1107 NULL, /* sub */
1108 NULL, /* next */
1109 0, /* static_pass_number */
1110 TV_MACH_DEP, /* tv_id */
1111 0, /* properties_required */
1112 0, /* properties_provided */
1113 0, /* properties_destroyed */
1114 0, /* todo_flags_start */
1115 TODO_verify_rtl_sharing, /* todo_flags_finish */
1116 }
1117 };
1118
1119 struct register_pass_info insert_pass_work_around_errata =
1120 {
1121 &pass_work_around_errata.pass, /* pass */
1122 "dbr", /* reference_pass_name */
1123 1, /* ref_pass_instance_number */
1124 PASS_POS_INSERT_AFTER /* po_op */
1125 };
1126
1127 /* Helpers for TARGET_DEBUG_OPTIONS. */
1128 static void
dump_target_flag_bits(const int flags)1129 dump_target_flag_bits (const int flags)
1130 {
1131 if (flags & MASK_64BIT)
1132 fprintf (stderr, "64BIT ");
1133 if (flags & MASK_APP_REGS)
1134 fprintf (stderr, "APP_REGS ");
1135 if (flags & MASK_FASTER_STRUCTS)
1136 fprintf (stderr, "FASTER_STRUCTS ");
1137 if (flags & MASK_FLAT)
1138 fprintf (stderr, "FLAT ");
1139 if (flags & MASK_FMAF)
1140 fprintf (stderr, "FMAF ");
1141 if (flags & MASK_FPU)
1142 fprintf (stderr, "FPU ");
1143 if (flags & MASK_HARD_QUAD)
1144 fprintf (stderr, "HARD_QUAD ");
1145 if (flags & MASK_POPC)
1146 fprintf (stderr, "POPC ");
1147 if (flags & MASK_PTR64)
1148 fprintf (stderr, "PTR64 ");
1149 if (flags & MASK_STACK_BIAS)
1150 fprintf (stderr, "STACK_BIAS ");
1151 if (flags & MASK_UNALIGNED_DOUBLES)
1152 fprintf (stderr, "UNALIGNED_DOUBLES ");
1153 if (flags & MASK_V8PLUS)
1154 fprintf (stderr, "V8PLUS ");
1155 if (flags & MASK_VIS)
1156 fprintf (stderr, "VIS ");
1157 if (flags & MASK_VIS2)
1158 fprintf (stderr, "VIS2 ");
1159 if (flags & MASK_VIS3)
1160 fprintf (stderr, "VIS3 ");
1161 if (flags & MASK_CBCOND)
1162 fprintf (stderr, "CBCOND ");
1163 if (flags & MASK_DEPRECATED_V8_INSNS)
1164 fprintf (stderr, "DEPRECATED_V8_INSNS ");
1165 if (flags & MASK_SPARCLET)
1166 fprintf (stderr, "SPARCLET ");
1167 if (flags & MASK_SPARCLITE)
1168 fprintf (stderr, "SPARCLITE ");
1169 if (flags & MASK_V8)
1170 fprintf (stderr, "V8 ");
1171 if (flags & MASK_V9)
1172 fprintf (stderr, "V9 ");
1173 }
1174
1175 static void
dump_target_flags(const char * prefix,const int flags)1176 dump_target_flags (const char *prefix, const int flags)
1177 {
1178 fprintf (stderr, "%s: (%08x) [ ", prefix, flags);
1179 dump_target_flag_bits (flags);
1180 fprintf(stderr, "]\n");
1181 }
1182
1183 /* Validate and override various options, and do some machine dependent
1184 initialization. */
1185
1186 static void
sparc_option_override(void)1187 sparc_option_override (void)
1188 {
1189 static struct code_model {
1190 const char *const name;
1191 const enum cmodel value;
1192 } const cmodels[] = {
1193 { "32", CM_32 },
1194 { "medlow", CM_MEDLOW },
1195 { "medmid", CM_MEDMID },
1196 { "medany", CM_MEDANY },
1197 { "embmedany", CM_EMBMEDANY },
1198 { NULL, (enum cmodel) 0 }
1199 };
1200 const struct code_model *cmodel;
1201 /* Map TARGET_CPU_DEFAULT to value for -m{cpu,tune}=. */
1202 static struct cpu_default {
1203 const int cpu;
1204 const enum processor_type processor;
1205 } const cpu_default[] = {
1206 /* There must be one entry here for each TARGET_CPU value. */
1207 { TARGET_CPU_sparc, PROCESSOR_CYPRESS },
1208 { TARGET_CPU_v8, PROCESSOR_V8 },
1209 { TARGET_CPU_supersparc, PROCESSOR_SUPERSPARC },
1210 { TARGET_CPU_hypersparc, PROCESSOR_HYPERSPARC },
1211 { TARGET_CPU_leon, PROCESSOR_LEON },
1212 { TARGET_CPU_leon3, PROCESSOR_LEON3 },
1213 { TARGET_CPU_leon3v7, PROCESSOR_LEON3V7 },
1214 { TARGET_CPU_sparclite, PROCESSOR_F930 },
1215 { TARGET_CPU_sparclite86x, PROCESSOR_SPARCLITE86X },
1216 { TARGET_CPU_sparclet, PROCESSOR_TSC701 },
1217 { TARGET_CPU_v9, PROCESSOR_V9 },
1218 { TARGET_CPU_ultrasparc, PROCESSOR_ULTRASPARC },
1219 { TARGET_CPU_ultrasparc3, PROCESSOR_ULTRASPARC3 },
1220 { TARGET_CPU_niagara, PROCESSOR_NIAGARA },
1221 { TARGET_CPU_niagara2, PROCESSOR_NIAGARA2 },
1222 { TARGET_CPU_niagara3, PROCESSOR_NIAGARA3 },
1223 { TARGET_CPU_niagara4, PROCESSOR_NIAGARA4 },
1224 { -1, PROCESSOR_V7 }
1225 };
1226 const struct cpu_default *def;
1227 /* Table of values for -m{cpu,tune}=. This must match the order of
1228 the enum processor_type in sparc-opts.h. */
1229 static struct cpu_table {
1230 const char *const name;
1231 const int disable;
1232 const int enable;
1233 } const cpu_table[] = {
1234 { "v7", MASK_ISA, 0 },
1235 { "cypress", MASK_ISA, 0 },
1236 { "v8", MASK_ISA, MASK_V8 },
1237 /* TI TMS390Z55 supersparc */
1238 { "supersparc", MASK_ISA, MASK_V8 },
1239 { "hypersparc", MASK_ISA, MASK_V8|MASK_FPU },
1240 { "leon", MASK_ISA, MASK_V8|MASK_LEON|MASK_FPU },
1241 { "leon3", MASK_ISA, MASK_V8|MASK_LEON3|MASK_FPU },
1242 { "leon3v7", MASK_ISA, MASK_LEON3|MASK_FPU },
1243 { "sparclite", MASK_ISA, MASK_SPARCLITE },
1244 /* The Fujitsu MB86930 is the original sparclite chip, with no FPU. */
1245 { "f930", MASK_ISA|MASK_FPU, MASK_SPARCLITE },
1246 /* The Fujitsu MB86934 is the recent sparclite chip, with an FPU. */
1247 { "f934", MASK_ISA, MASK_SPARCLITE|MASK_FPU },
1248 { "sparclite86x", MASK_ISA|MASK_FPU, MASK_SPARCLITE },
1249 { "sparclet", MASK_ISA, MASK_SPARCLET },
1250 /* TEMIC sparclet */
1251 { "tsc701", MASK_ISA, MASK_SPARCLET },
1252 { "v9", MASK_ISA, MASK_V9 },
1253 /* UltraSPARC I, II, IIi */
1254 { "ultrasparc", MASK_ISA,
1255 /* Although insns using %y are deprecated, it is a clear win. */
1256 MASK_V9|MASK_DEPRECATED_V8_INSNS },
1257 /* UltraSPARC III */
1258 /* ??? Check if %y issue still holds true. */
1259 { "ultrasparc3", MASK_ISA,
1260 MASK_V9|MASK_DEPRECATED_V8_INSNS|MASK_VIS2 },
1261 /* UltraSPARC T1 */
1262 { "niagara", MASK_ISA,
1263 MASK_V9|MASK_DEPRECATED_V8_INSNS },
1264 /* UltraSPARC T2 */
1265 { "niagara2", MASK_ISA,
1266 MASK_V9|MASK_POPC|MASK_VIS2 },
1267 /* UltraSPARC T3 */
1268 { "niagara3", MASK_ISA,
1269 MASK_V9|MASK_POPC|MASK_VIS2|MASK_VIS3|MASK_FMAF },
1270 /* UltraSPARC T4 */
1271 { "niagara4", MASK_ISA,
1272 MASK_V9|MASK_POPC|MASK_VIS2|MASK_VIS3|MASK_FMAF|MASK_CBCOND },
1273 };
1274 const struct cpu_table *cpu;
1275 unsigned int i;
1276 int fpu;
1277
1278 if (sparc_debug_string != NULL)
1279 {
1280 const char *q;
1281 char *p;
1282
1283 p = ASTRDUP (sparc_debug_string);
1284 while ((q = strtok (p, ",")) != NULL)
1285 {
1286 bool invert;
1287 int mask;
1288
1289 p = NULL;
1290 if (*q == '!')
1291 {
1292 invert = true;
1293 q++;
1294 }
1295 else
1296 invert = false;
1297
1298 if (! strcmp (q, "all"))
1299 mask = MASK_DEBUG_ALL;
1300 else if (! strcmp (q, "options"))
1301 mask = MASK_DEBUG_OPTIONS;
1302 else
1303 error ("unknown -mdebug-%s switch", q);
1304
1305 if (invert)
1306 sparc_debug &= ~mask;
1307 else
1308 sparc_debug |= mask;
1309 }
1310 }
1311
1312 if (TARGET_DEBUG_OPTIONS)
1313 {
1314 dump_target_flags("Initial target_flags", target_flags);
1315 dump_target_flags("target_flags_explicit", target_flags_explicit);
1316 }
1317
1318 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1319 SUBTARGET_OVERRIDE_OPTIONS;
1320 #endif
1321
1322 #ifndef SPARC_BI_ARCH
1323 /* Check for unsupported architecture size. */
1324 if (! TARGET_64BIT != DEFAULT_ARCH32_P)
1325 error ("%s is not supported by this configuration",
1326 DEFAULT_ARCH32_P ? "-m64" : "-m32");
1327 #endif
1328
1329 /* We force all 64bit archs to use 128 bit long double */
1330 if (TARGET_64BIT && ! TARGET_LONG_DOUBLE_128)
1331 {
1332 error ("-mlong-double-64 not allowed with -m64");
1333 target_flags |= MASK_LONG_DOUBLE_128;
1334 }
1335
1336 /* Code model selection. */
1337 sparc_cmodel = SPARC_DEFAULT_CMODEL;
1338
1339 #ifdef SPARC_BI_ARCH
1340 if (TARGET_ARCH32)
1341 sparc_cmodel = CM_32;
1342 #endif
1343
1344 if (sparc_cmodel_string != NULL)
1345 {
1346 if (TARGET_ARCH64)
1347 {
1348 for (cmodel = &cmodels[0]; cmodel->name; cmodel++)
1349 if (strcmp (sparc_cmodel_string, cmodel->name) == 0)
1350 break;
1351 if (cmodel->name == NULL)
1352 error ("bad value (%s) for -mcmodel= switch", sparc_cmodel_string);
1353 else
1354 sparc_cmodel = cmodel->value;
1355 }
1356 else
1357 error ("-mcmodel= is not supported on 32 bit systems");
1358 }
1359
1360 /* Check that -fcall-saved-REG wasn't specified for out registers. */
1361 for (i = 8; i < 16; i++)
1362 if (!call_used_regs [i])
1363 {
1364 error ("-fcall-saved-REG is not supported for out registers");
1365 call_used_regs [i] = 1;
1366 }
1367
1368 fpu = target_flags & MASK_FPU; /* save current -mfpu status */
1369
1370 /* Set the default CPU. */
1371 if (!global_options_set.x_sparc_cpu_and_features)
1372 {
1373 for (def = &cpu_default[0]; def->cpu != -1; ++def)
1374 if (def->cpu == TARGET_CPU_DEFAULT)
1375 break;
1376 gcc_assert (def->cpu != -1);
1377 sparc_cpu_and_features = def->processor;
1378 }
1379
1380 if (!global_options_set.x_sparc_cpu)
1381 sparc_cpu = sparc_cpu_and_features;
1382
1383 cpu = &cpu_table[(int) sparc_cpu_and_features];
1384
1385 if (TARGET_DEBUG_OPTIONS)
1386 {
1387 fprintf (stderr, "sparc_cpu_and_features: %s\n", cpu->name);
1388 fprintf (stderr, "sparc_cpu: %s\n",
1389 cpu_table[(int) sparc_cpu].name);
1390 dump_target_flags ("cpu->disable", cpu->disable);
1391 dump_target_flags ("cpu->enable", cpu->enable);
1392 }
1393
1394 target_flags &= ~cpu->disable;
1395 target_flags |= (cpu->enable
1396 #ifndef HAVE_AS_FMAF_HPC_VIS3
1397 & ~(MASK_FMAF | MASK_VIS3)
1398 #endif
1399 #ifndef HAVE_AS_SPARC4
1400 & ~MASK_CBCOND
1401 #endif
1402 #ifndef HAVE_AS_LEON
1403 & ~(MASK_LEON | MASK_LEON3)
1404 #endif
1405 );
1406
1407 /* If -mfpu or -mno-fpu was explicitly used, don't override with
1408 the processor default. */
1409 if (target_flags_explicit & MASK_FPU)
1410 target_flags = (target_flags & ~MASK_FPU) | fpu;
1411
1412 /* -mvis2 implies -mvis */
1413 if (TARGET_VIS2)
1414 target_flags |= MASK_VIS;
1415
1416 /* -mvis3 implies -mvis2 and -mvis */
1417 if (TARGET_VIS3)
1418 target_flags |= MASK_VIS2 | MASK_VIS;
1419
1420 /* Don't allow -mvis, -mvis2, -mvis3, or -mfmaf if FPU is
1421 disabled. */
1422 if (! TARGET_FPU)
1423 target_flags &= ~(MASK_VIS | MASK_VIS2 | MASK_VIS3 | MASK_FMAF);
1424
1425 /* -mvis assumes UltraSPARC+, so we are sure v9 instructions
1426 are available.
1427 -m64 also implies v9. */
1428 if (TARGET_VIS || TARGET_ARCH64)
1429 {
1430 target_flags |= MASK_V9;
1431 target_flags &= ~(MASK_V8 | MASK_SPARCLET | MASK_SPARCLITE);
1432 }
1433
1434 /* -mvis also implies -mv8plus on 32-bit */
1435 if (TARGET_VIS && ! TARGET_ARCH64)
1436 target_flags |= MASK_V8PLUS;
1437
1438 /* Use the deprecated v8 insns for sparc64 in 32 bit mode. */
1439 if (TARGET_V9 && TARGET_ARCH32)
1440 target_flags |= MASK_DEPRECATED_V8_INSNS;
1441
1442 /* V8PLUS requires V9, makes no sense in 64 bit mode. */
1443 if (! TARGET_V9 || TARGET_ARCH64)
1444 target_flags &= ~MASK_V8PLUS;
1445
1446 /* Don't use stack biasing in 32 bit mode. */
1447 if (TARGET_ARCH32)
1448 target_flags &= ~MASK_STACK_BIAS;
1449
1450 /* Supply a default value for align_functions. */
1451 if (align_functions == 0
1452 && (sparc_cpu == PROCESSOR_ULTRASPARC
1453 || sparc_cpu == PROCESSOR_ULTRASPARC3
1454 || sparc_cpu == PROCESSOR_NIAGARA
1455 || sparc_cpu == PROCESSOR_NIAGARA2
1456 || sparc_cpu == PROCESSOR_NIAGARA3
1457 || sparc_cpu == PROCESSOR_NIAGARA4))
1458 align_functions = 32;
1459
1460 /* Validate PCC_STRUCT_RETURN. */
1461 if (flag_pcc_struct_return == DEFAULT_PCC_STRUCT_RETURN)
1462 flag_pcc_struct_return = (TARGET_ARCH64 ? 0 : 1);
1463
1464 /* Only use .uaxword when compiling for a 64-bit target. */
1465 if (!TARGET_ARCH64)
1466 targetm.asm_out.unaligned_op.di = NULL;
1467
1468 /* Do various machine dependent initializations. */
1469 sparc_init_modes ();
1470
1471 /* Set up function hooks. */
1472 init_machine_status = sparc_init_machine_status;
1473
1474 switch (sparc_cpu)
1475 {
1476 case PROCESSOR_V7:
1477 case PROCESSOR_CYPRESS:
1478 sparc_costs = &cypress_costs;
1479 break;
1480 case PROCESSOR_V8:
1481 case PROCESSOR_SPARCLITE:
1482 case PROCESSOR_SUPERSPARC:
1483 sparc_costs = &supersparc_costs;
1484 break;
1485 case PROCESSOR_F930:
1486 case PROCESSOR_F934:
1487 case PROCESSOR_HYPERSPARC:
1488 case PROCESSOR_SPARCLITE86X:
1489 sparc_costs = &hypersparc_costs;
1490 break;
1491 case PROCESSOR_LEON:
1492 sparc_costs = &leon_costs;
1493 break;
1494 case PROCESSOR_LEON3:
1495 case PROCESSOR_LEON3V7:
1496 sparc_costs = &leon3_costs;
1497 break;
1498 case PROCESSOR_SPARCLET:
1499 case PROCESSOR_TSC701:
1500 sparc_costs = &sparclet_costs;
1501 break;
1502 case PROCESSOR_V9:
1503 case PROCESSOR_ULTRASPARC:
1504 sparc_costs = &ultrasparc_costs;
1505 break;
1506 case PROCESSOR_ULTRASPARC3:
1507 sparc_costs = &ultrasparc3_costs;
1508 break;
1509 case PROCESSOR_NIAGARA:
1510 sparc_costs = &niagara_costs;
1511 break;
1512 case PROCESSOR_NIAGARA2:
1513 sparc_costs = &niagara2_costs;
1514 break;
1515 case PROCESSOR_NIAGARA3:
1516 sparc_costs = &niagara3_costs;
1517 break;
1518 case PROCESSOR_NIAGARA4:
1519 sparc_costs = &niagara4_costs;
1520 break;
1521 case PROCESSOR_NATIVE:
1522 gcc_unreachable ();
1523 };
1524
1525 if (sparc_memory_model == SMM_DEFAULT)
1526 {
1527 /* Choose the memory model for the operating system. */
1528 enum sparc_memory_model_type os_default = SUBTARGET_DEFAULT_MEMORY_MODEL;
1529 if (os_default != SMM_DEFAULT)
1530 sparc_memory_model = os_default;
1531 /* Choose the most relaxed model for the processor. */
1532 else if (TARGET_V9)
1533 sparc_memory_model = SMM_RMO;
1534 else if (TARGET_LEON3)
1535 sparc_memory_model = SMM_TSO;
1536 else if (TARGET_LEON)
1537 sparc_memory_model = SMM_SC;
1538 else if (TARGET_V8)
1539 sparc_memory_model = SMM_PSO;
1540 else
1541 sparc_memory_model = SMM_SC;
1542 }
1543
1544 #ifdef TARGET_DEFAULT_LONG_DOUBLE_128
1545 if (!(target_flags_explicit & MASK_LONG_DOUBLE_128))
1546 target_flags |= MASK_LONG_DOUBLE_128;
1547 #endif
1548
1549 if (TARGET_DEBUG_OPTIONS)
1550 dump_target_flags ("Final target_flags", target_flags);
1551
1552 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
1553 ((sparc_cpu == PROCESSOR_ULTRASPARC
1554 || sparc_cpu == PROCESSOR_NIAGARA
1555 || sparc_cpu == PROCESSOR_NIAGARA2
1556 || sparc_cpu == PROCESSOR_NIAGARA3
1557 || sparc_cpu == PROCESSOR_NIAGARA4)
1558 ? 2
1559 : (sparc_cpu == PROCESSOR_ULTRASPARC3
1560 ? 8 : 3)),
1561 global_options.x_param_values,
1562 global_options_set.x_param_values);
1563 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
1564 ((sparc_cpu == PROCESSOR_ULTRASPARC
1565 || sparc_cpu == PROCESSOR_ULTRASPARC3
1566 || sparc_cpu == PROCESSOR_NIAGARA
1567 || sparc_cpu == PROCESSOR_NIAGARA2
1568 || sparc_cpu == PROCESSOR_NIAGARA3
1569 || sparc_cpu == PROCESSOR_NIAGARA4)
1570 ? 64 : 32),
1571 global_options.x_param_values,
1572 global_options_set.x_param_values);
1573
1574 /* Disable save slot sharing for call-clobbered registers by default.
1575 The IRA sharing algorithm works on single registers only and this
1576 pessimizes for double floating-point registers. */
1577 if (!global_options_set.x_flag_ira_share_save_slots)
1578 flag_ira_share_save_slots = 0;
1579
1580 /* We register a machine specific pass to work around errata, if any.
1581 The pass mut be scheduled as late as possible so that we have the
1582 (essentially) final form of the insn stream to work on.
1583 Registering the pass must be done at start up. It's convenient to
1584 do it here. */
1585 register_pass (&insert_pass_work_around_errata);
1586 }
1587
1588 /* Miscellaneous utilities. */
1589
1590 /* Nonzero if CODE, a comparison, is suitable for use in v9 conditional move
1591 or branch on register contents instructions. */
1592
1593 int
v9_regcmp_p(enum rtx_code code)1594 v9_regcmp_p (enum rtx_code code)
1595 {
1596 return (code == EQ || code == NE || code == GE || code == LT
1597 || code == LE || code == GT);
1598 }
1599
1600 /* Nonzero if OP is a floating point constant which can
1601 be loaded into an integer register using a single
1602 sethi instruction. */
1603
1604 int
fp_sethi_p(rtx op)1605 fp_sethi_p (rtx op)
1606 {
1607 if (GET_CODE (op) == CONST_DOUBLE)
1608 {
1609 REAL_VALUE_TYPE r;
1610 long i;
1611
1612 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
1613 REAL_VALUE_TO_TARGET_SINGLE (r, i);
1614 return !SPARC_SIMM13_P (i) && SPARC_SETHI_P (i);
1615 }
1616
1617 return 0;
1618 }
1619
1620 /* Nonzero if OP is a floating point constant which can
1621 be loaded into an integer register using a single
1622 mov instruction. */
1623
1624 int
fp_mov_p(rtx op)1625 fp_mov_p (rtx op)
1626 {
1627 if (GET_CODE (op) == CONST_DOUBLE)
1628 {
1629 REAL_VALUE_TYPE r;
1630 long i;
1631
1632 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
1633 REAL_VALUE_TO_TARGET_SINGLE (r, i);
1634 return SPARC_SIMM13_P (i);
1635 }
1636
1637 return 0;
1638 }
1639
1640 /* Nonzero if OP is a floating point constant which can
1641 be loaded into an integer register using a high/losum
1642 instruction sequence. */
1643
1644 int
fp_high_losum_p(rtx op)1645 fp_high_losum_p (rtx op)
1646 {
1647 /* The constraints calling this should only be in
1648 SFmode move insns, so any constant which cannot
1649 be moved using a single insn will do. */
1650 if (GET_CODE (op) == CONST_DOUBLE)
1651 {
1652 REAL_VALUE_TYPE r;
1653 long i;
1654
1655 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
1656 REAL_VALUE_TO_TARGET_SINGLE (r, i);
1657 return !SPARC_SIMM13_P (i) && !SPARC_SETHI_P (i);
1658 }
1659
1660 return 0;
1661 }
1662
1663 /* Return true if the address of LABEL can be loaded by means of the
1664 mov{si,di}_pic_label_ref patterns in PIC mode. */
1665
1666 static bool
can_use_mov_pic_label_ref(rtx label)1667 can_use_mov_pic_label_ref (rtx label)
1668 {
1669 /* VxWorks does not impose a fixed gap between segments; the run-time
1670 gap can be different from the object-file gap. We therefore can't
1671 assume X - _GLOBAL_OFFSET_TABLE_ is a link-time constant unless we
1672 are absolutely sure that X is in the same segment as the GOT.
1673 Unfortunately, the flexibility of linker scripts means that we
1674 can't be sure of that in general, so assume that GOT-relative
1675 accesses are never valid on VxWorks. */
1676 if (TARGET_VXWORKS_RTP)
1677 return false;
1678
1679 /* Similarly, if the label is non-local, it might end up being placed
1680 in a different section than the current one; now mov_pic_label_ref
1681 requires the label and the code to be in the same section. */
1682 if (LABEL_REF_NONLOCAL_P (label))
1683 return false;
1684
1685 /* Finally, if we are reordering basic blocks and partition into hot
1686 and cold sections, this might happen for any label. */
1687 if (flag_reorder_blocks_and_partition)
1688 return false;
1689
1690 return true;
1691 }
1692
1693 /* Expand a move instruction. Return true if all work is done. */
1694
1695 bool
sparc_expand_move(enum machine_mode mode,rtx * operands)1696 sparc_expand_move (enum machine_mode mode, rtx *operands)
1697 {
1698 /* Handle sets of MEM first. */
1699 if (GET_CODE (operands[0]) == MEM)
1700 {
1701 /* 0 is a register (or a pair of registers) on SPARC. */
1702 if (register_or_zero_operand (operands[1], mode))
1703 return false;
1704
1705 if (!reload_in_progress)
1706 {
1707 operands[0] = validize_mem (operands[0]);
1708 operands[1] = force_reg (mode, operands[1]);
1709 }
1710 }
1711
1712 /* Fixup TLS cases. */
1713 if (TARGET_HAVE_TLS
1714 && CONSTANT_P (operands[1])
1715 && sparc_tls_referenced_p (operands [1]))
1716 {
1717 operands[1] = sparc_legitimize_tls_address (operands[1]);
1718 return false;
1719 }
1720
1721 /* Fixup PIC cases. */
1722 if (flag_pic && CONSTANT_P (operands[1]))
1723 {
1724 if (pic_address_needs_scratch (operands[1]))
1725 operands[1] = sparc_legitimize_pic_address (operands[1], NULL_RTX);
1726
1727 /* We cannot use the mov{si,di}_pic_label_ref patterns in all cases. */
1728 if (GET_CODE (operands[1]) == LABEL_REF
1729 && can_use_mov_pic_label_ref (operands[1]))
1730 {
1731 if (mode == SImode)
1732 {
1733 emit_insn (gen_movsi_pic_label_ref (operands[0], operands[1]));
1734 return true;
1735 }
1736
1737 if (mode == DImode)
1738 {
1739 gcc_assert (TARGET_ARCH64);
1740 emit_insn (gen_movdi_pic_label_ref (operands[0], operands[1]));
1741 return true;
1742 }
1743 }
1744
1745 if (symbolic_operand (operands[1], mode))
1746 {
1747 operands[1]
1748 = sparc_legitimize_pic_address (operands[1],
1749 reload_in_progress
1750 ? operands[0] : NULL_RTX);
1751 return false;
1752 }
1753 }
1754
1755 /* If we are trying to toss an integer constant into FP registers,
1756 or loading a FP or vector constant, force it into memory. */
1757 if (CONSTANT_P (operands[1])
1758 && REG_P (operands[0])
1759 && (SPARC_FP_REG_P (REGNO (operands[0]))
1760 || SCALAR_FLOAT_MODE_P (mode)
1761 || VECTOR_MODE_P (mode)))
1762 {
1763 /* emit_group_store will send such bogosity to us when it is
1764 not storing directly into memory. So fix this up to avoid
1765 crashes in output_constant_pool. */
1766 if (operands [1] == const0_rtx)
1767 operands[1] = CONST0_RTX (mode);
1768
1769 /* We can clear or set to all-ones FP registers if TARGET_VIS, and
1770 always other regs. */
1771 if ((TARGET_VIS || REGNO (operands[0]) < SPARC_FIRST_FP_REG)
1772 && (const_zero_operand (operands[1], mode)
1773 || const_all_ones_operand (operands[1], mode)))
1774 return false;
1775
1776 if (REGNO (operands[0]) < SPARC_FIRST_FP_REG
1777 /* We are able to build any SF constant in integer registers
1778 with at most 2 instructions. */
1779 && (mode == SFmode
1780 /* And any DF constant in integer registers. */
1781 || (mode == DFmode
1782 && ! can_create_pseudo_p ())))
1783 return false;
1784
1785 operands[1] = force_const_mem (mode, operands[1]);
1786 if (!reload_in_progress)
1787 operands[1] = validize_mem (operands[1]);
1788 return false;
1789 }
1790
1791 /* Accept non-constants and valid constants unmodified. */
1792 if (!CONSTANT_P (operands[1])
1793 || GET_CODE (operands[1]) == HIGH
1794 || input_operand (operands[1], mode))
1795 return false;
1796
1797 switch (mode)
1798 {
1799 case QImode:
1800 /* All QImode constants require only one insn, so proceed. */
1801 break;
1802
1803 case HImode:
1804 case SImode:
1805 sparc_emit_set_const32 (operands[0], operands[1]);
1806 return true;
1807
1808 case DImode:
1809 /* input_operand should have filtered out 32-bit mode. */
1810 sparc_emit_set_const64 (operands[0], operands[1]);
1811 return true;
1812
1813 case TImode:
1814 {
1815 rtx high, low;
1816 /* TImode isn't available in 32-bit mode. */
1817 split_double (operands[1], &high, &low);
1818 emit_insn (gen_movdi (operand_subword (operands[0], 0, 0, TImode),
1819 high));
1820 emit_insn (gen_movdi (operand_subword (operands[0], 1, 0, TImode),
1821 low));
1822 }
1823 return true;
1824
1825 default:
1826 gcc_unreachable ();
1827 }
1828
1829 return false;
1830 }
1831
1832 /* Load OP1, a 32-bit constant, into OP0, a register.
1833 We know it can't be done in one insn when we get
1834 here, the move expander guarantees this. */
1835
1836 static void
sparc_emit_set_const32(rtx op0,rtx op1)1837 sparc_emit_set_const32 (rtx op0, rtx op1)
1838 {
1839 enum machine_mode mode = GET_MODE (op0);
1840 rtx temp = op0;
1841
1842 if (can_create_pseudo_p ())
1843 temp = gen_reg_rtx (mode);
1844
1845 if (GET_CODE (op1) == CONST_INT)
1846 {
1847 gcc_assert (!small_int_operand (op1, mode)
1848 && !const_high_operand (op1, mode));
1849
1850 /* Emit them as real moves instead of a HIGH/LO_SUM,
1851 this way CSE can see everything and reuse intermediate
1852 values if it wants. */
1853 emit_insn (gen_rtx_SET (VOIDmode, temp,
1854 GEN_INT (INTVAL (op1)
1855 & ~(HOST_WIDE_INT)0x3ff)));
1856
1857 emit_insn (gen_rtx_SET (VOIDmode,
1858 op0,
1859 gen_rtx_IOR (mode, temp,
1860 GEN_INT (INTVAL (op1) & 0x3ff))));
1861 }
1862 else
1863 {
1864 /* A symbol, emit in the traditional way. */
1865 emit_insn (gen_rtx_SET (VOIDmode, temp,
1866 gen_rtx_HIGH (mode, op1)));
1867 emit_insn (gen_rtx_SET (VOIDmode,
1868 op0, gen_rtx_LO_SUM (mode, temp, op1)));
1869 }
1870 }
1871
1872 /* Load OP1, a symbolic 64-bit constant, into OP0, a DImode register.
1873 If TEMP is nonzero, we are forbidden to use any other scratch
1874 registers. Otherwise, we are allowed to generate them as needed.
1875
1876 Note that TEMP may have TImode if the code model is TARGET_CM_MEDANY
1877 or TARGET_CM_EMBMEDANY (see the reload_indi and reload_outdi patterns). */
1878
1879 void
sparc_emit_set_symbolic_const64(rtx op0,rtx op1,rtx temp)1880 sparc_emit_set_symbolic_const64 (rtx op0, rtx op1, rtx temp)
1881 {
1882 rtx temp1, temp2, temp3, temp4, temp5;
1883 rtx ti_temp = 0;
1884
1885 if (temp && GET_MODE (temp) == TImode)
1886 {
1887 ti_temp = temp;
1888 temp = gen_rtx_REG (DImode, REGNO (temp));
1889 }
1890
1891 /* SPARC-V9 code-model support. */
1892 switch (sparc_cmodel)
1893 {
1894 case CM_MEDLOW:
1895 /* The range spanned by all instructions in the object is less
1896 than 2^31 bytes (2GB) and the distance from any instruction
1897 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
1898 than 2^31 bytes (2GB).
1899
1900 The executable must be in the low 4TB of the virtual address
1901 space.
1902
1903 sethi %hi(symbol), %temp1
1904 or %temp1, %lo(symbol), %reg */
1905 if (temp)
1906 temp1 = temp; /* op0 is allowed. */
1907 else
1908 temp1 = gen_reg_rtx (DImode);
1909
1910 emit_insn (gen_rtx_SET (VOIDmode, temp1, gen_rtx_HIGH (DImode, op1)));
1911 emit_insn (gen_rtx_SET (VOIDmode, op0, gen_rtx_LO_SUM (DImode, temp1, op1)));
1912 break;
1913
1914 case CM_MEDMID:
1915 /* The range spanned by all instructions in the object is less
1916 than 2^31 bytes (2GB) and the distance from any instruction
1917 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
1918 than 2^31 bytes (2GB).
1919
1920 The executable must be in the low 16TB of the virtual address
1921 space.
1922
1923 sethi %h44(symbol), %temp1
1924 or %temp1, %m44(symbol), %temp2
1925 sllx %temp2, 12, %temp3
1926 or %temp3, %l44(symbol), %reg */
1927 if (temp)
1928 {
1929 temp1 = op0;
1930 temp2 = op0;
1931 temp3 = temp; /* op0 is allowed. */
1932 }
1933 else
1934 {
1935 temp1 = gen_reg_rtx (DImode);
1936 temp2 = gen_reg_rtx (DImode);
1937 temp3 = gen_reg_rtx (DImode);
1938 }
1939
1940 emit_insn (gen_seth44 (temp1, op1));
1941 emit_insn (gen_setm44 (temp2, temp1, op1));
1942 emit_insn (gen_rtx_SET (VOIDmode, temp3,
1943 gen_rtx_ASHIFT (DImode, temp2, GEN_INT (12))));
1944 emit_insn (gen_setl44 (op0, temp3, op1));
1945 break;
1946
1947 case CM_MEDANY:
1948 /* The range spanned by all instructions in the object is less
1949 than 2^31 bytes (2GB) and the distance from any instruction
1950 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
1951 than 2^31 bytes (2GB).
1952
1953 The executable can be placed anywhere in the virtual address
1954 space.
1955
1956 sethi %hh(symbol), %temp1
1957 sethi %lm(symbol), %temp2
1958 or %temp1, %hm(symbol), %temp3
1959 sllx %temp3, 32, %temp4
1960 or %temp4, %temp2, %temp5
1961 or %temp5, %lo(symbol), %reg */
1962 if (temp)
1963 {
1964 /* It is possible that one of the registers we got for operands[2]
1965 might coincide with that of operands[0] (which is why we made
1966 it TImode). Pick the other one to use as our scratch. */
1967 if (rtx_equal_p (temp, op0))
1968 {
1969 gcc_assert (ti_temp);
1970 temp = gen_rtx_REG (DImode, REGNO (temp) + 1);
1971 }
1972 temp1 = op0;
1973 temp2 = temp; /* op0 is _not_ allowed, see above. */
1974 temp3 = op0;
1975 temp4 = op0;
1976 temp5 = op0;
1977 }
1978 else
1979 {
1980 temp1 = gen_reg_rtx (DImode);
1981 temp2 = gen_reg_rtx (DImode);
1982 temp3 = gen_reg_rtx (DImode);
1983 temp4 = gen_reg_rtx (DImode);
1984 temp5 = gen_reg_rtx (DImode);
1985 }
1986
1987 emit_insn (gen_sethh (temp1, op1));
1988 emit_insn (gen_setlm (temp2, op1));
1989 emit_insn (gen_sethm (temp3, temp1, op1));
1990 emit_insn (gen_rtx_SET (VOIDmode, temp4,
1991 gen_rtx_ASHIFT (DImode, temp3, GEN_INT (32))));
1992 emit_insn (gen_rtx_SET (VOIDmode, temp5,
1993 gen_rtx_PLUS (DImode, temp4, temp2)));
1994 emit_insn (gen_setlo (op0, temp5, op1));
1995 break;
1996
1997 case CM_EMBMEDANY:
1998 /* Old old old backwards compatibility kruft here.
1999 Essentially it is MEDLOW with a fixed 64-bit
2000 virtual base added to all data segment addresses.
2001 Text-segment stuff is computed like MEDANY, we can't
2002 reuse the code above because the relocation knobs
2003 look different.
2004
2005 Data segment: sethi %hi(symbol), %temp1
2006 add %temp1, EMBMEDANY_BASE_REG, %temp2
2007 or %temp2, %lo(symbol), %reg */
2008 if (data_segment_operand (op1, GET_MODE (op1)))
2009 {
2010 if (temp)
2011 {
2012 temp1 = temp; /* op0 is allowed. */
2013 temp2 = op0;
2014 }
2015 else
2016 {
2017 temp1 = gen_reg_rtx (DImode);
2018 temp2 = gen_reg_rtx (DImode);
2019 }
2020
2021 emit_insn (gen_embmedany_sethi (temp1, op1));
2022 emit_insn (gen_embmedany_brsum (temp2, temp1));
2023 emit_insn (gen_embmedany_losum (op0, temp2, op1));
2024 }
2025
2026 /* Text segment: sethi %uhi(symbol), %temp1
2027 sethi %hi(symbol), %temp2
2028 or %temp1, %ulo(symbol), %temp3
2029 sllx %temp3, 32, %temp4
2030 or %temp4, %temp2, %temp5
2031 or %temp5, %lo(symbol), %reg */
2032 else
2033 {
2034 if (temp)
2035 {
2036 /* It is possible that one of the registers we got for operands[2]
2037 might coincide with that of operands[0] (which is why we made
2038 it TImode). Pick the other one to use as our scratch. */
2039 if (rtx_equal_p (temp, op0))
2040 {
2041 gcc_assert (ti_temp);
2042 temp = gen_rtx_REG (DImode, REGNO (temp) + 1);
2043 }
2044 temp1 = op0;
2045 temp2 = temp; /* op0 is _not_ allowed, see above. */
2046 temp3 = op0;
2047 temp4 = op0;
2048 temp5 = op0;
2049 }
2050 else
2051 {
2052 temp1 = gen_reg_rtx (DImode);
2053 temp2 = gen_reg_rtx (DImode);
2054 temp3 = gen_reg_rtx (DImode);
2055 temp4 = gen_reg_rtx (DImode);
2056 temp5 = gen_reg_rtx (DImode);
2057 }
2058
2059 emit_insn (gen_embmedany_textuhi (temp1, op1));
2060 emit_insn (gen_embmedany_texthi (temp2, op1));
2061 emit_insn (gen_embmedany_textulo (temp3, temp1, op1));
2062 emit_insn (gen_rtx_SET (VOIDmode, temp4,
2063 gen_rtx_ASHIFT (DImode, temp3, GEN_INT (32))));
2064 emit_insn (gen_rtx_SET (VOIDmode, temp5,
2065 gen_rtx_PLUS (DImode, temp4, temp2)));
2066 emit_insn (gen_embmedany_textlo (op0, temp5, op1));
2067 }
2068 break;
2069
2070 default:
2071 gcc_unreachable ();
2072 }
2073 }
2074
2075 #if HOST_BITS_PER_WIDE_INT == 32
2076 static void
sparc_emit_set_const64(rtx op0 ATTRIBUTE_UNUSED,rtx op1 ATTRIBUTE_UNUSED)2077 sparc_emit_set_const64 (rtx op0 ATTRIBUTE_UNUSED, rtx op1 ATTRIBUTE_UNUSED)
2078 {
2079 gcc_unreachable ();
2080 }
2081 #else
2082 /* These avoid problems when cross compiling. If we do not
2083 go through all this hair then the optimizer will see
2084 invalid REG_EQUAL notes or in some cases none at all. */
2085 static rtx gen_safe_HIGH64 (rtx, HOST_WIDE_INT);
2086 static rtx gen_safe_SET64 (rtx, HOST_WIDE_INT);
2087 static rtx gen_safe_OR64 (rtx, HOST_WIDE_INT);
2088 static rtx gen_safe_XOR64 (rtx, HOST_WIDE_INT);
2089
2090 /* The optimizer is not to assume anything about exactly
2091 which bits are set for a HIGH, they are unspecified.
2092 Unfortunately this leads to many missed optimizations
2093 during CSE. We mask out the non-HIGH bits, and matches
2094 a plain movdi, to alleviate this problem. */
2095 static rtx
gen_safe_HIGH64(rtx dest,HOST_WIDE_INT val)2096 gen_safe_HIGH64 (rtx dest, HOST_WIDE_INT val)
2097 {
2098 return gen_rtx_SET (VOIDmode, dest, GEN_INT (val & ~(HOST_WIDE_INT)0x3ff));
2099 }
2100
2101 static rtx
gen_safe_SET64(rtx dest,HOST_WIDE_INT val)2102 gen_safe_SET64 (rtx dest, HOST_WIDE_INT val)
2103 {
2104 return gen_rtx_SET (VOIDmode, dest, GEN_INT (val));
2105 }
2106
2107 static rtx
gen_safe_OR64(rtx src,HOST_WIDE_INT val)2108 gen_safe_OR64 (rtx src, HOST_WIDE_INT val)
2109 {
2110 return gen_rtx_IOR (DImode, src, GEN_INT (val));
2111 }
2112
2113 static rtx
gen_safe_XOR64(rtx src,HOST_WIDE_INT val)2114 gen_safe_XOR64 (rtx src, HOST_WIDE_INT val)
2115 {
2116 return gen_rtx_XOR (DImode, src, GEN_INT (val));
2117 }
2118
2119 /* Worker routines for 64-bit constant formation on arch64.
2120 One of the key things to be doing in these emissions is
2121 to create as many temp REGs as possible. This makes it
2122 possible for half-built constants to be used later when
2123 such values are similar to something required later on.
2124 Without doing this, the optimizer cannot see such
2125 opportunities. */
2126
2127 static void sparc_emit_set_const64_quick1 (rtx, rtx,
2128 unsigned HOST_WIDE_INT, int);
2129
2130 static void
sparc_emit_set_const64_quick1(rtx op0,rtx temp,unsigned HOST_WIDE_INT low_bits,int is_neg)2131 sparc_emit_set_const64_quick1 (rtx op0, rtx temp,
2132 unsigned HOST_WIDE_INT low_bits, int is_neg)
2133 {
2134 unsigned HOST_WIDE_INT high_bits;
2135
2136 if (is_neg)
2137 high_bits = (~low_bits) & 0xffffffff;
2138 else
2139 high_bits = low_bits;
2140
2141 emit_insn (gen_safe_HIGH64 (temp, high_bits));
2142 if (!is_neg)
2143 {
2144 emit_insn (gen_rtx_SET (VOIDmode, op0,
2145 gen_safe_OR64 (temp, (high_bits & 0x3ff))));
2146 }
2147 else
2148 {
2149 /* If we are XOR'ing with -1, then we should emit a one's complement
2150 instead. This way the combiner will notice logical operations
2151 such as ANDN later on and substitute. */
2152 if ((low_bits & 0x3ff) == 0x3ff)
2153 {
2154 emit_insn (gen_rtx_SET (VOIDmode, op0,
2155 gen_rtx_NOT (DImode, temp)));
2156 }
2157 else
2158 {
2159 emit_insn (gen_rtx_SET (VOIDmode, op0,
2160 gen_safe_XOR64 (temp,
2161 (-(HOST_WIDE_INT)0x400
2162 | (low_bits & 0x3ff)))));
2163 }
2164 }
2165 }
2166
2167 static void sparc_emit_set_const64_quick2 (rtx, rtx, unsigned HOST_WIDE_INT,
2168 unsigned HOST_WIDE_INT, int);
2169
2170 static void
sparc_emit_set_const64_quick2(rtx op0,rtx temp,unsigned HOST_WIDE_INT high_bits,unsigned HOST_WIDE_INT low_immediate,int shift_count)2171 sparc_emit_set_const64_quick2 (rtx op0, rtx temp,
2172 unsigned HOST_WIDE_INT high_bits,
2173 unsigned HOST_WIDE_INT low_immediate,
2174 int shift_count)
2175 {
2176 rtx temp2 = op0;
2177
2178 if ((high_bits & 0xfffffc00) != 0)
2179 {
2180 emit_insn (gen_safe_HIGH64 (temp, high_bits));
2181 if ((high_bits & ~0xfffffc00) != 0)
2182 emit_insn (gen_rtx_SET (VOIDmode, op0,
2183 gen_safe_OR64 (temp, (high_bits & 0x3ff))));
2184 else
2185 temp2 = temp;
2186 }
2187 else
2188 {
2189 emit_insn (gen_safe_SET64 (temp, high_bits));
2190 temp2 = temp;
2191 }
2192
2193 /* Now shift it up into place. */
2194 emit_insn (gen_rtx_SET (VOIDmode, op0,
2195 gen_rtx_ASHIFT (DImode, temp2,
2196 GEN_INT (shift_count))));
2197
2198 /* If there is a low immediate part piece, finish up by
2199 putting that in as well. */
2200 if (low_immediate != 0)
2201 emit_insn (gen_rtx_SET (VOIDmode, op0,
2202 gen_safe_OR64 (op0, low_immediate)));
2203 }
2204
2205 static void sparc_emit_set_const64_longway (rtx, rtx, unsigned HOST_WIDE_INT,
2206 unsigned HOST_WIDE_INT);
2207
2208 /* Full 64-bit constant decomposition. Even though this is the
2209 'worst' case, we still optimize a few things away. */
2210 static void
sparc_emit_set_const64_longway(rtx op0,rtx temp,unsigned HOST_WIDE_INT high_bits,unsigned HOST_WIDE_INT low_bits)2211 sparc_emit_set_const64_longway (rtx op0, rtx temp,
2212 unsigned HOST_WIDE_INT high_bits,
2213 unsigned HOST_WIDE_INT low_bits)
2214 {
2215 rtx sub_temp = op0;
2216
2217 if (can_create_pseudo_p ())
2218 sub_temp = gen_reg_rtx (DImode);
2219
2220 if ((high_bits & 0xfffffc00) != 0)
2221 {
2222 emit_insn (gen_safe_HIGH64 (temp, high_bits));
2223 if ((high_bits & ~0xfffffc00) != 0)
2224 emit_insn (gen_rtx_SET (VOIDmode,
2225 sub_temp,
2226 gen_safe_OR64 (temp, (high_bits & 0x3ff))));
2227 else
2228 sub_temp = temp;
2229 }
2230 else
2231 {
2232 emit_insn (gen_safe_SET64 (temp, high_bits));
2233 sub_temp = temp;
2234 }
2235
2236 if (can_create_pseudo_p ())
2237 {
2238 rtx temp2 = gen_reg_rtx (DImode);
2239 rtx temp3 = gen_reg_rtx (DImode);
2240 rtx temp4 = gen_reg_rtx (DImode);
2241
2242 emit_insn (gen_rtx_SET (VOIDmode, temp4,
2243 gen_rtx_ASHIFT (DImode, sub_temp,
2244 GEN_INT (32))));
2245
2246 emit_insn (gen_safe_HIGH64 (temp2, low_bits));
2247 if ((low_bits & ~0xfffffc00) != 0)
2248 {
2249 emit_insn (gen_rtx_SET (VOIDmode, temp3,
2250 gen_safe_OR64 (temp2, (low_bits & 0x3ff))));
2251 emit_insn (gen_rtx_SET (VOIDmode, op0,
2252 gen_rtx_PLUS (DImode, temp4, temp3)));
2253 }
2254 else
2255 {
2256 emit_insn (gen_rtx_SET (VOIDmode, op0,
2257 gen_rtx_PLUS (DImode, temp4, temp2)));
2258 }
2259 }
2260 else
2261 {
2262 rtx low1 = GEN_INT ((low_bits >> (32 - 12)) & 0xfff);
2263 rtx low2 = GEN_INT ((low_bits >> (32 - 12 - 12)) & 0xfff);
2264 rtx low3 = GEN_INT ((low_bits >> (32 - 12 - 12 - 8)) & 0x0ff);
2265 int to_shift = 12;
2266
2267 /* We are in the middle of reload, so this is really
2268 painful. However we do still make an attempt to
2269 avoid emitting truly stupid code. */
2270 if (low1 != const0_rtx)
2271 {
2272 emit_insn (gen_rtx_SET (VOIDmode, op0,
2273 gen_rtx_ASHIFT (DImode, sub_temp,
2274 GEN_INT (to_shift))));
2275 emit_insn (gen_rtx_SET (VOIDmode, op0,
2276 gen_rtx_IOR (DImode, op0, low1)));
2277 sub_temp = op0;
2278 to_shift = 12;
2279 }
2280 else
2281 {
2282 to_shift += 12;
2283 }
2284 if (low2 != const0_rtx)
2285 {
2286 emit_insn (gen_rtx_SET (VOIDmode, op0,
2287 gen_rtx_ASHIFT (DImode, sub_temp,
2288 GEN_INT (to_shift))));
2289 emit_insn (gen_rtx_SET (VOIDmode, op0,
2290 gen_rtx_IOR (DImode, op0, low2)));
2291 sub_temp = op0;
2292 to_shift = 8;
2293 }
2294 else
2295 {
2296 to_shift += 8;
2297 }
2298 emit_insn (gen_rtx_SET (VOIDmode, op0,
2299 gen_rtx_ASHIFT (DImode, sub_temp,
2300 GEN_INT (to_shift))));
2301 if (low3 != const0_rtx)
2302 emit_insn (gen_rtx_SET (VOIDmode, op0,
2303 gen_rtx_IOR (DImode, op0, low3)));
2304 /* phew... */
2305 }
2306 }
2307
2308 /* Analyze a 64-bit constant for certain properties. */
2309 static void analyze_64bit_constant (unsigned HOST_WIDE_INT,
2310 unsigned HOST_WIDE_INT,
2311 int *, int *, int *);
2312
2313 static void
analyze_64bit_constant(unsigned HOST_WIDE_INT high_bits,unsigned HOST_WIDE_INT low_bits,int * hbsp,int * lbsp,int * abbasp)2314 analyze_64bit_constant (unsigned HOST_WIDE_INT high_bits,
2315 unsigned HOST_WIDE_INT low_bits,
2316 int *hbsp, int *lbsp, int *abbasp)
2317 {
2318 int lowest_bit_set, highest_bit_set, all_bits_between_are_set;
2319 int i;
2320
2321 lowest_bit_set = highest_bit_set = -1;
2322 i = 0;
2323 do
2324 {
2325 if ((lowest_bit_set == -1)
2326 && ((low_bits >> i) & 1))
2327 lowest_bit_set = i;
2328 if ((highest_bit_set == -1)
2329 && ((high_bits >> (32 - i - 1)) & 1))
2330 highest_bit_set = (64 - i - 1);
2331 }
2332 while (++i < 32
2333 && ((highest_bit_set == -1)
2334 || (lowest_bit_set == -1)));
2335 if (i == 32)
2336 {
2337 i = 0;
2338 do
2339 {
2340 if ((lowest_bit_set == -1)
2341 && ((high_bits >> i) & 1))
2342 lowest_bit_set = i + 32;
2343 if ((highest_bit_set == -1)
2344 && ((low_bits >> (32 - i - 1)) & 1))
2345 highest_bit_set = 32 - i - 1;
2346 }
2347 while (++i < 32
2348 && ((highest_bit_set == -1)
2349 || (lowest_bit_set == -1)));
2350 }
2351 /* If there are no bits set this should have gone out
2352 as one instruction! */
2353 gcc_assert (lowest_bit_set != -1 && highest_bit_set != -1);
2354 all_bits_between_are_set = 1;
2355 for (i = lowest_bit_set; i <= highest_bit_set; i++)
2356 {
2357 if (i < 32)
2358 {
2359 if ((low_bits & (1 << i)) != 0)
2360 continue;
2361 }
2362 else
2363 {
2364 if ((high_bits & (1 << (i - 32))) != 0)
2365 continue;
2366 }
2367 all_bits_between_are_set = 0;
2368 break;
2369 }
2370 *hbsp = highest_bit_set;
2371 *lbsp = lowest_bit_set;
2372 *abbasp = all_bits_between_are_set;
2373 }
2374
2375 static int const64_is_2insns (unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT);
2376
2377 static int
const64_is_2insns(unsigned HOST_WIDE_INT high_bits,unsigned HOST_WIDE_INT low_bits)2378 const64_is_2insns (unsigned HOST_WIDE_INT high_bits,
2379 unsigned HOST_WIDE_INT low_bits)
2380 {
2381 int highest_bit_set, lowest_bit_set, all_bits_between_are_set;
2382
2383 if (high_bits == 0
2384 || high_bits == 0xffffffff)
2385 return 1;
2386
2387 analyze_64bit_constant (high_bits, low_bits,
2388 &highest_bit_set, &lowest_bit_set,
2389 &all_bits_between_are_set);
2390
2391 if ((highest_bit_set == 63
2392 || lowest_bit_set == 0)
2393 && all_bits_between_are_set != 0)
2394 return 1;
2395
2396 if ((highest_bit_set - lowest_bit_set) < 21)
2397 return 1;
2398
2399 return 0;
2400 }
2401
2402 static unsigned HOST_WIDE_INT create_simple_focus_bits (unsigned HOST_WIDE_INT,
2403 unsigned HOST_WIDE_INT,
2404 int, int);
2405
2406 static unsigned HOST_WIDE_INT
create_simple_focus_bits(unsigned HOST_WIDE_INT high_bits,unsigned HOST_WIDE_INT low_bits,int lowest_bit_set,int shift)2407 create_simple_focus_bits (unsigned HOST_WIDE_INT high_bits,
2408 unsigned HOST_WIDE_INT low_bits,
2409 int lowest_bit_set, int shift)
2410 {
2411 HOST_WIDE_INT hi, lo;
2412
2413 if (lowest_bit_set < 32)
2414 {
2415 lo = (low_bits >> lowest_bit_set) << shift;
2416 hi = ((high_bits << (32 - lowest_bit_set)) << shift);
2417 }
2418 else
2419 {
2420 lo = 0;
2421 hi = ((high_bits >> (lowest_bit_set - 32)) << shift);
2422 }
2423 gcc_assert (! (hi & lo));
2424 return (hi | lo);
2425 }
2426
2427 /* Here we are sure to be arch64 and this is an integer constant
2428 being loaded into a register. Emit the most efficient
2429 insn sequence possible. Detection of all the 1-insn cases
2430 has been done already. */
2431 static void
sparc_emit_set_const64(rtx op0,rtx op1)2432 sparc_emit_set_const64 (rtx op0, rtx op1)
2433 {
2434 unsigned HOST_WIDE_INT high_bits, low_bits;
2435 int lowest_bit_set, highest_bit_set;
2436 int all_bits_between_are_set;
2437 rtx temp = 0;
2438
2439 /* Sanity check that we know what we are working with. */
2440 gcc_assert (TARGET_ARCH64
2441 && (GET_CODE (op0) == SUBREG
2442 || (REG_P (op0) && ! SPARC_FP_REG_P (REGNO (op0)))));
2443
2444 if (! can_create_pseudo_p ())
2445 temp = op0;
2446
2447 if (GET_CODE (op1) != CONST_INT)
2448 {
2449 sparc_emit_set_symbolic_const64 (op0, op1, temp);
2450 return;
2451 }
2452
2453 if (! temp)
2454 temp = gen_reg_rtx (DImode);
2455
2456 high_bits = ((INTVAL (op1) >> 32) & 0xffffffff);
2457 low_bits = (INTVAL (op1) & 0xffffffff);
2458
2459 /* low_bits bits 0 --> 31
2460 high_bits bits 32 --> 63 */
2461
2462 analyze_64bit_constant (high_bits, low_bits,
2463 &highest_bit_set, &lowest_bit_set,
2464 &all_bits_between_are_set);
2465
2466 /* First try for a 2-insn sequence. */
2467
2468 /* These situations are preferred because the optimizer can
2469 * do more things with them:
2470 * 1) mov -1, %reg
2471 * sllx %reg, shift, %reg
2472 * 2) mov -1, %reg
2473 * srlx %reg, shift, %reg
2474 * 3) mov some_small_const, %reg
2475 * sllx %reg, shift, %reg
2476 */
2477 if (((highest_bit_set == 63
2478 || lowest_bit_set == 0)
2479 && all_bits_between_are_set != 0)
2480 || ((highest_bit_set - lowest_bit_set) < 12))
2481 {
2482 HOST_WIDE_INT the_const = -1;
2483 int shift = lowest_bit_set;
2484
2485 if ((highest_bit_set != 63
2486 && lowest_bit_set != 0)
2487 || all_bits_between_are_set == 0)
2488 {
2489 the_const =
2490 create_simple_focus_bits (high_bits, low_bits,
2491 lowest_bit_set, 0);
2492 }
2493 else if (lowest_bit_set == 0)
2494 shift = -(63 - highest_bit_set);
2495
2496 gcc_assert (SPARC_SIMM13_P (the_const));
2497 gcc_assert (shift != 0);
2498
2499 emit_insn (gen_safe_SET64 (temp, the_const));
2500 if (shift > 0)
2501 emit_insn (gen_rtx_SET (VOIDmode,
2502 op0,
2503 gen_rtx_ASHIFT (DImode,
2504 temp,
2505 GEN_INT (shift))));
2506 else if (shift < 0)
2507 emit_insn (gen_rtx_SET (VOIDmode,
2508 op0,
2509 gen_rtx_LSHIFTRT (DImode,
2510 temp,
2511 GEN_INT (-shift))));
2512 return;
2513 }
2514
2515 /* Now a range of 22 or less bits set somewhere.
2516 * 1) sethi %hi(focus_bits), %reg
2517 * sllx %reg, shift, %reg
2518 * 2) sethi %hi(focus_bits), %reg
2519 * srlx %reg, shift, %reg
2520 */
2521 if ((highest_bit_set - lowest_bit_set) < 21)
2522 {
2523 unsigned HOST_WIDE_INT focus_bits =
2524 create_simple_focus_bits (high_bits, low_bits,
2525 lowest_bit_set, 10);
2526
2527 gcc_assert (SPARC_SETHI_P (focus_bits));
2528 gcc_assert (lowest_bit_set != 10);
2529
2530 emit_insn (gen_safe_HIGH64 (temp, focus_bits));
2531
2532 /* If lowest_bit_set == 10 then a sethi alone could have done it. */
2533 if (lowest_bit_set < 10)
2534 emit_insn (gen_rtx_SET (VOIDmode,
2535 op0,
2536 gen_rtx_LSHIFTRT (DImode, temp,
2537 GEN_INT (10 - lowest_bit_set))));
2538 else if (lowest_bit_set > 10)
2539 emit_insn (gen_rtx_SET (VOIDmode,
2540 op0,
2541 gen_rtx_ASHIFT (DImode, temp,
2542 GEN_INT (lowest_bit_set - 10))));
2543 return;
2544 }
2545
2546 /* 1) sethi %hi(low_bits), %reg
2547 * or %reg, %lo(low_bits), %reg
2548 * 2) sethi %hi(~low_bits), %reg
2549 * xor %reg, %lo(-0x400 | (low_bits & 0x3ff)), %reg
2550 */
2551 if (high_bits == 0
2552 || high_bits == 0xffffffff)
2553 {
2554 sparc_emit_set_const64_quick1 (op0, temp, low_bits,
2555 (high_bits == 0xffffffff));
2556 return;
2557 }
2558
2559 /* Now, try 3-insn sequences. */
2560
2561 /* 1) sethi %hi(high_bits), %reg
2562 * or %reg, %lo(high_bits), %reg
2563 * sllx %reg, 32, %reg
2564 */
2565 if (low_bits == 0)
2566 {
2567 sparc_emit_set_const64_quick2 (op0, temp, high_bits, 0, 32);
2568 return;
2569 }
2570
2571 /* We may be able to do something quick
2572 when the constant is negated, so try that. */
2573 if (const64_is_2insns ((~high_bits) & 0xffffffff,
2574 (~low_bits) & 0xfffffc00))
2575 {
2576 /* NOTE: The trailing bits get XOR'd so we need the
2577 non-negated bits, not the negated ones. */
2578 unsigned HOST_WIDE_INT trailing_bits = low_bits & 0x3ff;
2579
2580 if ((((~high_bits) & 0xffffffff) == 0
2581 && ((~low_bits) & 0x80000000) == 0)
2582 || (((~high_bits) & 0xffffffff) == 0xffffffff
2583 && ((~low_bits) & 0x80000000) != 0))
2584 {
2585 unsigned HOST_WIDE_INT fast_int = (~low_bits & 0xffffffff);
2586
2587 if ((SPARC_SETHI_P (fast_int)
2588 && (~high_bits & 0xffffffff) == 0)
2589 || SPARC_SIMM13_P (fast_int))
2590 emit_insn (gen_safe_SET64 (temp, fast_int));
2591 else
2592 sparc_emit_set_const64 (temp, GEN_INT (fast_int));
2593 }
2594 else
2595 {
2596 rtx negated_const;
2597 negated_const = GEN_INT (((~low_bits) & 0xfffffc00) |
2598 (((HOST_WIDE_INT)((~high_bits) & 0xffffffff))<<32));
2599 sparc_emit_set_const64 (temp, negated_const);
2600 }
2601
2602 /* If we are XOR'ing with -1, then we should emit a one's complement
2603 instead. This way the combiner will notice logical operations
2604 such as ANDN later on and substitute. */
2605 if (trailing_bits == 0x3ff)
2606 {
2607 emit_insn (gen_rtx_SET (VOIDmode, op0,
2608 gen_rtx_NOT (DImode, temp)));
2609 }
2610 else
2611 {
2612 emit_insn (gen_rtx_SET (VOIDmode,
2613 op0,
2614 gen_safe_XOR64 (temp,
2615 (-0x400 | trailing_bits))));
2616 }
2617 return;
2618 }
2619
2620 /* 1) sethi %hi(xxx), %reg
2621 * or %reg, %lo(xxx), %reg
2622 * sllx %reg, yyy, %reg
2623 *
2624 * ??? This is just a generalized version of the low_bits==0
2625 * thing above, FIXME...
2626 */
2627 if ((highest_bit_set - lowest_bit_set) < 32)
2628 {
2629 unsigned HOST_WIDE_INT focus_bits =
2630 create_simple_focus_bits (high_bits, low_bits,
2631 lowest_bit_set, 0);
2632
2633 /* We can't get here in this state. */
2634 gcc_assert (highest_bit_set >= 32 && lowest_bit_set < 32);
2635
2636 /* So what we know is that the set bits straddle the
2637 middle of the 64-bit word. */
2638 sparc_emit_set_const64_quick2 (op0, temp,
2639 focus_bits, 0,
2640 lowest_bit_set);
2641 return;
2642 }
2643
2644 /* 1) sethi %hi(high_bits), %reg
2645 * or %reg, %lo(high_bits), %reg
2646 * sllx %reg, 32, %reg
2647 * or %reg, low_bits, %reg
2648 */
2649 if (SPARC_SIMM13_P(low_bits)
2650 && ((int)low_bits > 0))
2651 {
2652 sparc_emit_set_const64_quick2 (op0, temp, high_bits, low_bits, 32);
2653 return;
2654 }
2655
2656 /* The easiest way when all else fails, is full decomposition. */
2657 sparc_emit_set_const64_longway (op0, temp, high_bits, low_bits);
2658 }
2659 #endif /* HOST_BITS_PER_WIDE_INT == 32 */
2660
2661 /* Given a comparison code (EQ, NE, etc.) and the first operand of a COMPARE,
2662 return the mode to be used for the comparison. For floating-point,
2663 CCFP[E]mode is used. CC_NOOVmode should be used when the first operand
2664 is a PLUS, MINUS, NEG, or ASHIFT. CCmode should be used when no special
2665 processing is needed. */
2666
2667 enum machine_mode
select_cc_mode(enum rtx_code op,rtx x,rtx y ATTRIBUTE_UNUSED)2668 select_cc_mode (enum rtx_code op, rtx x, rtx y ATTRIBUTE_UNUSED)
2669 {
2670 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
2671 {
2672 switch (op)
2673 {
2674 case EQ:
2675 case NE:
2676 case UNORDERED:
2677 case ORDERED:
2678 case UNLT:
2679 case UNLE:
2680 case UNGT:
2681 case UNGE:
2682 case UNEQ:
2683 case LTGT:
2684 return CCFPmode;
2685
2686 case LT:
2687 case LE:
2688 case GT:
2689 case GE:
2690 return CCFPEmode;
2691
2692 default:
2693 gcc_unreachable ();
2694 }
2695 }
2696 else if (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
2697 || GET_CODE (x) == NEG || GET_CODE (x) == ASHIFT)
2698 {
2699 if (TARGET_ARCH64 && GET_MODE (x) == DImode)
2700 return CCX_NOOVmode;
2701 else
2702 return CC_NOOVmode;
2703 }
2704 else
2705 {
2706 if (TARGET_ARCH64 && GET_MODE (x) == DImode)
2707 return CCXmode;
2708 else
2709 return CCmode;
2710 }
2711 }
2712
2713 /* Emit the compare insn and return the CC reg for a CODE comparison
2714 with operands X and Y. */
2715
2716 static rtx
gen_compare_reg_1(enum rtx_code code,rtx x,rtx y)2717 gen_compare_reg_1 (enum rtx_code code, rtx x, rtx y)
2718 {
2719 enum machine_mode mode;
2720 rtx cc_reg;
2721
2722 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
2723 return x;
2724
2725 mode = SELECT_CC_MODE (code, x, y);
2726
2727 /* ??? We don't have movcc patterns so we cannot generate pseudo regs for the
2728 fcc regs (cse can't tell they're really call clobbered regs and will
2729 remove a duplicate comparison even if there is an intervening function
2730 call - it will then try to reload the cc reg via an int reg which is why
2731 we need the movcc patterns). It is possible to provide the movcc
2732 patterns by using the ldxfsr/stxfsr v9 insns. I tried it: you need two
2733 registers (say %g1,%g5) and it takes about 6 insns. A better fix would be
2734 to tell cse that CCFPE mode registers (even pseudos) are call
2735 clobbered. */
2736
2737 /* ??? This is an experiment. Rather than making changes to cse which may
2738 or may not be easy/clean, we do our own cse. This is possible because
2739 we will generate hard registers. Cse knows they're call clobbered (it
2740 doesn't know the same thing about pseudos). If we guess wrong, no big
2741 deal, but if we win, great! */
2742
2743 if (TARGET_V9 && GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
2744 #if 1 /* experiment */
2745 {
2746 int reg;
2747 /* We cycle through the registers to ensure they're all exercised. */
2748 static int next_fcc_reg = 0;
2749 /* Previous x,y for each fcc reg. */
2750 static rtx prev_args[4][2];
2751
2752 /* Scan prev_args for x,y. */
2753 for (reg = 0; reg < 4; reg++)
2754 if (prev_args[reg][0] == x && prev_args[reg][1] == y)
2755 break;
2756 if (reg == 4)
2757 {
2758 reg = next_fcc_reg;
2759 prev_args[reg][0] = x;
2760 prev_args[reg][1] = y;
2761 next_fcc_reg = (next_fcc_reg + 1) & 3;
2762 }
2763 cc_reg = gen_rtx_REG (mode, reg + SPARC_FIRST_V9_FCC_REG);
2764 }
2765 #else
2766 cc_reg = gen_reg_rtx (mode);
2767 #endif /* ! experiment */
2768 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
2769 cc_reg = gen_rtx_REG (mode, SPARC_FCC_REG);
2770 else
2771 cc_reg = gen_rtx_REG (mode, SPARC_ICC_REG);
2772
2773 /* We shouldn't get there for TFmode if !TARGET_HARD_QUAD. If we do, this
2774 will only result in an unrecognizable insn so no point in asserting. */
2775 emit_insn (gen_rtx_SET (VOIDmode, cc_reg, gen_rtx_COMPARE (mode, x, y)));
2776
2777 return cc_reg;
2778 }
2779
2780
2781 /* Emit the compare insn and return the CC reg for the comparison in CMP. */
2782
2783 rtx
gen_compare_reg(rtx cmp)2784 gen_compare_reg (rtx cmp)
2785 {
2786 return gen_compare_reg_1 (GET_CODE (cmp), XEXP (cmp, 0), XEXP (cmp, 1));
2787 }
2788
2789 /* This function is used for v9 only.
2790 DEST is the target of the Scc insn.
2791 CODE is the code for an Scc's comparison.
2792 X and Y are the values we compare.
2793
2794 This function is needed to turn
2795
2796 (set (reg:SI 110)
2797 (gt (reg:CCX 100 %icc)
2798 (const_int 0)))
2799 into
2800 (set (reg:SI 110)
2801 (gt:DI (reg:CCX 100 %icc)
2802 (const_int 0)))
2803
2804 IE: The instruction recognizer needs to see the mode of the comparison to
2805 find the right instruction. We could use "gt:DI" right in the
2806 define_expand, but leaving it out allows us to handle DI, SI, etc. */
2807
2808 static int
gen_v9_scc(rtx dest,enum rtx_code compare_code,rtx x,rtx y)2809 gen_v9_scc (rtx dest, enum rtx_code compare_code, rtx x, rtx y)
2810 {
2811 if (! TARGET_ARCH64
2812 && (GET_MODE (x) == DImode
2813 || GET_MODE (dest) == DImode))
2814 return 0;
2815
2816 /* Try to use the movrCC insns. */
2817 if (TARGET_ARCH64
2818 && GET_MODE_CLASS (GET_MODE (x)) == MODE_INT
2819 && y == const0_rtx
2820 && v9_regcmp_p (compare_code))
2821 {
2822 rtx op0 = x;
2823 rtx temp;
2824
2825 /* Special case for op0 != 0. This can be done with one instruction if
2826 dest == x. */
2827
2828 if (compare_code == NE
2829 && GET_MODE (dest) == DImode
2830 && rtx_equal_p (op0, dest))
2831 {
2832 emit_insn (gen_rtx_SET (VOIDmode, dest,
2833 gen_rtx_IF_THEN_ELSE (DImode,
2834 gen_rtx_fmt_ee (compare_code, DImode,
2835 op0, const0_rtx),
2836 const1_rtx,
2837 dest)));
2838 return 1;
2839 }
2840
2841 if (reg_overlap_mentioned_p (dest, op0))
2842 {
2843 /* Handle the case where dest == x.
2844 We "early clobber" the result. */
2845 op0 = gen_reg_rtx (GET_MODE (x));
2846 emit_move_insn (op0, x);
2847 }
2848
2849 emit_insn (gen_rtx_SET (VOIDmode, dest, const0_rtx));
2850 if (GET_MODE (op0) != DImode)
2851 {
2852 temp = gen_reg_rtx (DImode);
2853 convert_move (temp, op0, 0);
2854 }
2855 else
2856 temp = op0;
2857 emit_insn (gen_rtx_SET (VOIDmode, dest,
2858 gen_rtx_IF_THEN_ELSE (GET_MODE (dest),
2859 gen_rtx_fmt_ee (compare_code, DImode,
2860 temp, const0_rtx),
2861 const1_rtx,
2862 dest)));
2863 return 1;
2864 }
2865 else
2866 {
2867 x = gen_compare_reg_1 (compare_code, x, y);
2868 y = const0_rtx;
2869
2870 gcc_assert (GET_MODE (x) != CC_NOOVmode
2871 && GET_MODE (x) != CCX_NOOVmode);
2872
2873 emit_insn (gen_rtx_SET (VOIDmode, dest, const0_rtx));
2874 emit_insn (gen_rtx_SET (VOIDmode, dest,
2875 gen_rtx_IF_THEN_ELSE (GET_MODE (dest),
2876 gen_rtx_fmt_ee (compare_code,
2877 GET_MODE (x), x, y),
2878 const1_rtx, dest)));
2879 return 1;
2880 }
2881 }
2882
2883
2884 /* Emit an scc insn. For seq, sne, sgeu, and sltu, we can do this
2885 without jumps using the addx/subx instructions. */
2886
2887 bool
emit_scc_insn(rtx operands[])2888 emit_scc_insn (rtx operands[])
2889 {
2890 rtx tem;
2891 rtx x;
2892 rtx y;
2893 enum rtx_code code;
2894
2895 /* The quad-word fp compare library routines all return nonzero to indicate
2896 true, which is different from the equivalent libgcc routines, so we must
2897 handle them specially here. */
2898 if (GET_MODE (operands[2]) == TFmode && ! TARGET_HARD_QUAD)
2899 {
2900 operands[1] = sparc_emit_float_lib_cmp (operands[2], operands[3],
2901 GET_CODE (operands[1]));
2902 operands[2] = XEXP (operands[1], 0);
2903 operands[3] = XEXP (operands[1], 1);
2904 }
2905
2906 code = GET_CODE (operands[1]);
2907 x = operands[2];
2908 y = operands[3];
2909
2910 /* For seq/sne on v9 we use the same code as v8 (the addx/subx method has
2911 more applications). The exception to this is "reg != 0" which can
2912 be done in one instruction on v9 (so we do it). */
2913 if (code == EQ)
2914 {
2915 if (GET_MODE (x) == SImode)
2916 {
2917 rtx pat = gen_seqsi_special (operands[0], x, y);
2918 emit_insn (pat);
2919 return true;
2920 }
2921 else if (GET_MODE (x) == DImode)
2922 {
2923 rtx pat = gen_seqdi_special (operands[0], x, y);
2924 emit_insn (pat);
2925 return true;
2926 }
2927 }
2928
2929 if (code == NE)
2930 {
2931 if (GET_MODE (x) == SImode)
2932 {
2933 rtx pat = gen_snesi_special (operands[0], x, y);
2934 emit_insn (pat);
2935 return true;
2936 }
2937 else if (GET_MODE (x) == DImode)
2938 {
2939 rtx pat;
2940 if (TARGET_VIS3)
2941 pat = gen_snedi_special_vis3 (operands[0], x, y);
2942 else
2943 pat = gen_snedi_special (operands[0], x, y);
2944 emit_insn (pat);
2945 return true;
2946 }
2947 }
2948
2949 if (TARGET_V9
2950 && TARGET_ARCH64
2951 && GET_MODE (x) == DImode
2952 && !(TARGET_VIS3
2953 && (code == GTU || code == LTU))
2954 && gen_v9_scc (operands[0], code, x, y))
2955 return true;
2956
2957 /* We can do LTU and GEU using the addx/subx instructions too. And
2958 for GTU/LEU, if both operands are registers swap them and fall
2959 back to the easy case. */
2960 if (code == GTU || code == LEU)
2961 {
2962 if ((GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
2963 && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG))
2964 {
2965 tem = x;
2966 x = y;
2967 y = tem;
2968 code = swap_condition (code);
2969 }
2970 }
2971
2972 if (code == LTU
2973 || (!TARGET_VIS3 && code == GEU))
2974 {
2975 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
2976 gen_rtx_fmt_ee (code, SImode,
2977 gen_compare_reg_1 (code, x, y),
2978 const0_rtx)));
2979 return true;
2980 }
2981
2982 /* All the posibilities to use addx/subx based sequences has been
2983 exhausted, try for a 3 instruction sequence using v9 conditional
2984 moves. */
2985 if (TARGET_V9 && gen_v9_scc (operands[0], code, x, y))
2986 return true;
2987
2988 /* Nope, do branches. */
2989 return false;
2990 }
2991
2992 /* Emit a conditional jump insn for the v9 architecture using comparison code
2993 CODE and jump target LABEL.
2994 This function exists to take advantage of the v9 brxx insns. */
2995
2996 static void
emit_v9_brxx_insn(enum rtx_code code,rtx op0,rtx label)2997 emit_v9_brxx_insn (enum rtx_code code, rtx op0, rtx label)
2998 {
2999 emit_jump_insn (gen_rtx_SET (VOIDmode,
3000 pc_rtx,
3001 gen_rtx_IF_THEN_ELSE (VOIDmode,
3002 gen_rtx_fmt_ee (code, GET_MODE (op0),
3003 op0, const0_rtx),
3004 gen_rtx_LABEL_REF (VOIDmode, label),
3005 pc_rtx)));
3006 }
3007
3008 /* Emit a conditional jump insn for the UA2011 architecture using
3009 comparison code CODE and jump target LABEL. This function exists
3010 to take advantage of the UA2011 Compare and Branch insns. */
3011
3012 static void
emit_cbcond_insn(enum rtx_code code,rtx op0,rtx op1,rtx label)3013 emit_cbcond_insn (enum rtx_code code, rtx op0, rtx op1, rtx label)
3014 {
3015 rtx if_then_else;
3016
3017 if_then_else = gen_rtx_IF_THEN_ELSE (VOIDmode,
3018 gen_rtx_fmt_ee(code, GET_MODE(op0),
3019 op0, op1),
3020 gen_rtx_LABEL_REF (VOIDmode, label),
3021 pc_rtx);
3022
3023 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, if_then_else));
3024 }
3025
3026 void
emit_conditional_branch_insn(rtx operands[])3027 emit_conditional_branch_insn (rtx operands[])
3028 {
3029 /* The quad-word fp compare library routines all return nonzero to indicate
3030 true, which is different from the equivalent libgcc routines, so we must
3031 handle them specially here. */
3032 if (GET_MODE (operands[1]) == TFmode && ! TARGET_HARD_QUAD)
3033 {
3034 operands[0] = sparc_emit_float_lib_cmp (operands[1], operands[2],
3035 GET_CODE (operands[0]));
3036 operands[1] = XEXP (operands[0], 0);
3037 operands[2] = XEXP (operands[0], 1);
3038 }
3039
3040 /* If we can tell early on that the comparison is against a constant
3041 that won't fit in the 5-bit signed immediate field of a cbcond,
3042 use one of the other v9 conditional branch sequences. */
3043 if (TARGET_CBCOND
3044 && GET_CODE (operands[1]) == REG
3045 && (GET_MODE (operands[1]) == SImode
3046 || (TARGET_ARCH64 && GET_MODE (operands[1]) == DImode))
3047 && (GET_CODE (operands[2]) != CONST_INT
3048 || SPARC_SIMM5_P (INTVAL (operands[2]))))
3049 {
3050 emit_cbcond_insn (GET_CODE (operands[0]), operands[1], operands[2], operands[3]);
3051 return;
3052 }
3053
3054 if (TARGET_ARCH64 && operands[2] == const0_rtx
3055 && GET_CODE (operands[1]) == REG
3056 && GET_MODE (operands[1]) == DImode)
3057 {
3058 emit_v9_brxx_insn (GET_CODE (operands[0]), operands[1], operands[3]);
3059 return;
3060 }
3061
3062 operands[1] = gen_compare_reg (operands[0]);
3063 operands[2] = const0_rtx;
3064 operands[0] = gen_rtx_fmt_ee (GET_CODE (operands[0]), VOIDmode,
3065 operands[1], operands[2]);
3066 emit_jump_insn (gen_cbranchcc4 (operands[0], operands[1], operands[2],
3067 operands[3]));
3068 }
3069
3070
3071 /* Generate a DFmode part of a hard TFmode register.
3072 REG is the TFmode hard register, LOW is 1 for the
3073 low 64bit of the register and 0 otherwise.
3074 */
3075 rtx
gen_df_reg(rtx reg,int low)3076 gen_df_reg (rtx reg, int low)
3077 {
3078 int regno = REGNO (reg);
3079
3080 if ((WORDS_BIG_ENDIAN == 0) ^ (low != 0))
3081 regno += (TARGET_ARCH64 && SPARC_INT_REG_P (regno)) ? 1 : 2;
3082 return gen_rtx_REG (DFmode, regno);
3083 }
3084
3085 /* Generate a call to FUNC with OPERANDS. Operand 0 is the return value.
3086 Unlike normal calls, TFmode operands are passed by reference. It is
3087 assumed that no more than 3 operands are required. */
3088
3089 static void
emit_soft_tfmode_libcall(const char * func_name,int nargs,rtx * operands)3090 emit_soft_tfmode_libcall (const char *func_name, int nargs, rtx *operands)
3091 {
3092 rtx ret_slot = NULL, arg[3], func_sym;
3093 int i;
3094
3095 /* We only expect to be called for conversions, unary, and binary ops. */
3096 gcc_assert (nargs == 2 || nargs == 3);
3097
3098 for (i = 0; i < nargs; ++i)
3099 {
3100 rtx this_arg = operands[i];
3101 rtx this_slot;
3102
3103 /* TFmode arguments and return values are passed by reference. */
3104 if (GET_MODE (this_arg) == TFmode)
3105 {
3106 int force_stack_temp;
3107
3108 force_stack_temp = 0;
3109 if (TARGET_BUGGY_QP_LIB && i == 0)
3110 force_stack_temp = 1;
3111
3112 if (GET_CODE (this_arg) == MEM
3113 && ! force_stack_temp)
3114 {
3115 tree expr = MEM_EXPR (this_arg);
3116 if (expr)
3117 mark_addressable (expr);
3118 this_arg = XEXP (this_arg, 0);
3119 }
3120 else if (CONSTANT_P (this_arg)
3121 && ! force_stack_temp)
3122 {
3123 this_slot = force_const_mem (TFmode, this_arg);
3124 this_arg = XEXP (this_slot, 0);
3125 }
3126 else
3127 {
3128 this_slot = assign_stack_temp (TFmode, GET_MODE_SIZE (TFmode));
3129
3130 /* Operand 0 is the return value. We'll copy it out later. */
3131 if (i > 0)
3132 emit_move_insn (this_slot, this_arg);
3133 else
3134 ret_slot = this_slot;
3135
3136 this_arg = XEXP (this_slot, 0);
3137 }
3138 }
3139
3140 arg[i] = this_arg;
3141 }
3142
3143 func_sym = gen_rtx_SYMBOL_REF (Pmode, func_name);
3144
3145 if (GET_MODE (operands[0]) == TFmode)
3146 {
3147 if (nargs == 2)
3148 emit_library_call (func_sym, LCT_NORMAL, VOIDmode, 2,
3149 arg[0], GET_MODE (arg[0]),
3150 arg[1], GET_MODE (arg[1]));
3151 else
3152 emit_library_call (func_sym, LCT_NORMAL, VOIDmode, 3,
3153 arg[0], GET_MODE (arg[0]),
3154 arg[1], GET_MODE (arg[1]),
3155 arg[2], GET_MODE (arg[2]));
3156
3157 if (ret_slot)
3158 emit_move_insn (operands[0], ret_slot);
3159 }
3160 else
3161 {
3162 rtx ret;
3163
3164 gcc_assert (nargs == 2);
3165
3166 ret = emit_library_call_value (func_sym, operands[0], LCT_NORMAL,
3167 GET_MODE (operands[0]), 1,
3168 arg[1], GET_MODE (arg[1]));
3169
3170 if (ret != operands[0])
3171 emit_move_insn (operands[0], ret);
3172 }
3173 }
3174
3175 /* Expand soft-float TFmode calls to sparc abi routines. */
3176
3177 static void
emit_soft_tfmode_binop(enum rtx_code code,rtx * operands)3178 emit_soft_tfmode_binop (enum rtx_code code, rtx *operands)
3179 {
3180 const char *func;
3181
3182 switch (code)
3183 {
3184 case PLUS:
3185 func = "_Qp_add";
3186 break;
3187 case MINUS:
3188 func = "_Qp_sub";
3189 break;
3190 case MULT:
3191 func = "_Qp_mul";
3192 break;
3193 case DIV:
3194 func = "_Qp_div";
3195 break;
3196 default:
3197 gcc_unreachable ();
3198 }
3199
3200 emit_soft_tfmode_libcall (func, 3, operands);
3201 }
3202
3203 static void
emit_soft_tfmode_unop(enum rtx_code code,rtx * operands)3204 emit_soft_tfmode_unop (enum rtx_code code, rtx *operands)
3205 {
3206 const char *func;
3207
3208 gcc_assert (code == SQRT);
3209 func = "_Qp_sqrt";
3210
3211 emit_soft_tfmode_libcall (func, 2, operands);
3212 }
3213
3214 static void
emit_soft_tfmode_cvt(enum rtx_code code,rtx * operands)3215 emit_soft_tfmode_cvt (enum rtx_code code, rtx *operands)
3216 {
3217 const char *func;
3218
3219 switch (code)
3220 {
3221 case FLOAT_EXTEND:
3222 switch (GET_MODE (operands[1]))
3223 {
3224 case SFmode:
3225 func = "_Qp_stoq";
3226 break;
3227 case DFmode:
3228 func = "_Qp_dtoq";
3229 break;
3230 default:
3231 gcc_unreachable ();
3232 }
3233 break;
3234
3235 case FLOAT_TRUNCATE:
3236 switch (GET_MODE (operands[0]))
3237 {
3238 case SFmode:
3239 func = "_Qp_qtos";
3240 break;
3241 case DFmode:
3242 func = "_Qp_qtod";
3243 break;
3244 default:
3245 gcc_unreachable ();
3246 }
3247 break;
3248
3249 case FLOAT:
3250 switch (GET_MODE (operands[1]))
3251 {
3252 case SImode:
3253 func = "_Qp_itoq";
3254 if (TARGET_ARCH64)
3255 operands[1] = gen_rtx_SIGN_EXTEND (DImode, operands[1]);
3256 break;
3257 case DImode:
3258 func = "_Qp_xtoq";
3259 break;
3260 default:
3261 gcc_unreachable ();
3262 }
3263 break;
3264
3265 case UNSIGNED_FLOAT:
3266 switch (GET_MODE (operands[1]))
3267 {
3268 case SImode:
3269 func = "_Qp_uitoq";
3270 if (TARGET_ARCH64)
3271 operands[1] = gen_rtx_ZERO_EXTEND (DImode, operands[1]);
3272 break;
3273 case DImode:
3274 func = "_Qp_uxtoq";
3275 break;
3276 default:
3277 gcc_unreachable ();
3278 }
3279 break;
3280
3281 case FIX:
3282 switch (GET_MODE (operands[0]))
3283 {
3284 case SImode:
3285 func = "_Qp_qtoi";
3286 break;
3287 case DImode:
3288 func = "_Qp_qtox";
3289 break;
3290 default:
3291 gcc_unreachable ();
3292 }
3293 break;
3294
3295 case UNSIGNED_FIX:
3296 switch (GET_MODE (operands[0]))
3297 {
3298 case SImode:
3299 func = "_Qp_qtoui";
3300 break;
3301 case DImode:
3302 func = "_Qp_qtoux";
3303 break;
3304 default:
3305 gcc_unreachable ();
3306 }
3307 break;
3308
3309 default:
3310 gcc_unreachable ();
3311 }
3312
3313 emit_soft_tfmode_libcall (func, 2, operands);
3314 }
3315
3316 /* Expand a hard-float tfmode operation. All arguments must be in
3317 registers. */
3318
3319 static void
emit_hard_tfmode_operation(enum rtx_code code,rtx * operands)3320 emit_hard_tfmode_operation (enum rtx_code code, rtx *operands)
3321 {
3322 rtx op, dest;
3323
3324 if (GET_RTX_CLASS (code) == RTX_UNARY)
3325 {
3326 operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
3327 op = gen_rtx_fmt_e (code, GET_MODE (operands[0]), operands[1]);
3328 }
3329 else
3330 {
3331 operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
3332 operands[2] = force_reg (GET_MODE (operands[2]), operands[2]);
3333 op = gen_rtx_fmt_ee (code, GET_MODE (operands[0]),
3334 operands[1], operands[2]);
3335 }
3336
3337 if (register_operand (operands[0], VOIDmode))
3338 dest = operands[0];
3339 else
3340 dest = gen_reg_rtx (GET_MODE (operands[0]));
3341
3342 emit_insn (gen_rtx_SET (VOIDmode, dest, op));
3343
3344 if (dest != operands[0])
3345 emit_move_insn (operands[0], dest);
3346 }
3347
3348 void
emit_tfmode_binop(enum rtx_code code,rtx * operands)3349 emit_tfmode_binop (enum rtx_code code, rtx *operands)
3350 {
3351 if (TARGET_HARD_QUAD)
3352 emit_hard_tfmode_operation (code, operands);
3353 else
3354 emit_soft_tfmode_binop (code, operands);
3355 }
3356
3357 void
emit_tfmode_unop(enum rtx_code code,rtx * operands)3358 emit_tfmode_unop (enum rtx_code code, rtx *operands)
3359 {
3360 if (TARGET_HARD_QUAD)
3361 emit_hard_tfmode_operation (code, operands);
3362 else
3363 emit_soft_tfmode_unop (code, operands);
3364 }
3365
3366 void
emit_tfmode_cvt(enum rtx_code code,rtx * operands)3367 emit_tfmode_cvt (enum rtx_code code, rtx *operands)
3368 {
3369 if (TARGET_HARD_QUAD)
3370 emit_hard_tfmode_operation (code, operands);
3371 else
3372 emit_soft_tfmode_cvt (code, operands);
3373 }
3374
3375 /* Return nonzero if a branch/jump/call instruction will be emitting
3376 nop into its delay slot. */
3377
3378 int
empty_delay_slot(rtx insn)3379 empty_delay_slot (rtx insn)
3380 {
3381 rtx seq;
3382
3383 /* If no previous instruction (should not happen), return true. */
3384 if (PREV_INSN (insn) == NULL)
3385 return 1;
3386
3387 seq = NEXT_INSN (PREV_INSN (insn));
3388 if (GET_CODE (PATTERN (seq)) == SEQUENCE)
3389 return 0;
3390
3391 return 1;
3392 }
3393
3394 /* Return nonzero if we should emit a nop after a cbcond instruction.
3395 The cbcond instruction does not have a delay slot, however there is
3396 a severe performance penalty if a control transfer appears right
3397 after a cbcond. Therefore we emit a nop when we detect this
3398 situation. */
3399
3400 int
emit_cbcond_nop(rtx insn)3401 emit_cbcond_nop (rtx insn)
3402 {
3403 rtx next = next_active_insn (insn);
3404
3405 if (!next)
3406 return 1;
3407
3408 if (GET_CODE (next) == INSN
3409 && GET_CODE (PATTERN (next)) == SEQUENCE)
3410 next = XVECEXP (PATTERN (next), 0, 0);
3411 else if (GET_CODE (next) == CALL_INSN
3412 && GET_CODE (PATTERN (next)) == PARALLEL)
3413 {
3414 rtx delay = XVECEXP (PATTERN (next), 0, 1);
3415
3416 if (GET_CODE (delay) == RETURN)
3417 {
3418 /* It's a sibling call. Do not emit the nop if we're going
3419 to emit something other than the jump itself as the first
3420 instruction of the sibcall sequence. */
3421 if (sparc_leaf_function_p || TARGET_FLAT)
3422 return 0;
3423 }
3424 }
3425
3426 if (NONJUMP_INSN_P (next))
3427 return 0;
3428
3429 return 1;
3430 }
3431
3432 /* Return nonzero if TRIAL can go into the call delay slot. */
3433
3434 int
eligible_for_call_delay(rtx trial)3435 eligible_for_call_delay (rtx trial)
3436 {
3437 rtx pat;
3438
3439 if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE)
3440 return 0;
3441
3442 /* Binutils allows
3443 call __tls_get_addr, %tgd_call (foo)
3444 add %l7, %o0, %o0, %tgd_add (foo)
3445 while Sun as/ld does not. */
3446 if (TARGET_GNU_TLS || !TARGET_TLS)
3447 return 1;
3448
3449 pat = PATTERN (trial);
3450
3451 /* We must reject tgd_add{32|64}, i.e.
3452 (set (reg) (plus (reg) (unspec [(reg) (symbol_ref)] UNSPEC_TLSGD)))
3453 and tldm_add{32|64}, i.e.
3454 (set (reg) (plus (reg) (unspec [(reg) (symbol_ref)] UNSPEC_TLSLDM)))
3455 for Sun as/ld. */
3456 if (GET_CODE (pat) == SET
3457 && GET_CODE (SET_SRC (pat)) == PLUS)
3458 {
3459 rtx unspec = XEXP (SET_SRC (pat), 1);
3460
3461 if (GET_CODE (unspec) == UNSPEC
3462 && (XINT (unspec, 1) == UNSPEC_TLSGD
3463 || XINT (unspec, 1) == UNSPEC_TLSLDM))
3464 return 0;
3465 }
3466
3467 return 1;
3468 }
3469
3470 /* Return nonzero if TRIAL, an insn, can be combined with a 'restore'
3471 instruction. RETURN_P is true if the v9 variant 'return' is to be
3472 considered in the test too.
3473
3474 TRIAL must be a SET whose destination is a REG appropriate for the
3475 'restore' instruction or, if RETURN_P is true, for the 'return'
3476 instruction. */
3477
3478 static int
eligible_for_restore_insn(rtx trial,bool return_p)3479 eligible_for_restore_insn (rtx trial, bool return_p)
3480 {
3481 rtx pat = PATTERN (trial);
3482 rtx src = SET_SRC (pat);
3483 bool src_is_freg = false;
3484 rtx src_reg;
3485
3486 /* Since we now can do moves between float and integer registers when
3487 VIS3 is enabled, we have to catch this case. We can allow such
3488 moves when doing a 'return' however. */
3489 src_reg = src;
3490 if (GET_CODE (src_reg) == SUBREG)
3491 src_reg = SUBREG_REG (src_reg);
3492 if (GET_CODE (src_reg) == REG
3493 && SPARC_FP_REG_P (REGNO (src_reg)))
3494 src_is_freg = true;
3495
3496 /* The 'restore src,%g0,dest' pattern for word mode and below. */
3497 if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT
3498 && arith_operand (src, GET_MODE (src))
3499 && ! src_is_freg)
3500 {
3501 if (TARGET_ARCH64)
3502 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
3503 else
3504 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (SImode);
3505 }
3506
3507 /* The 'restore src,%g0,dest' pattern for double-word mode. */
3508 else if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT
3509 && arith_double_operand (src, GET_MODE (src))
3510 && ! src_is_freg)
3511 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
3512
3513 /* The 'restore src,%g0,dest' pattern for float if no FPU. */
3514 else if (! TARGET_FPU && register_operand (src, SFmode))
3515 return 1;
3516
3517 /* The 'restore src,%g0,dest' pattern for double if no FPU. */
3518 else if (! TARGET_FPU && TARGET_ARCH64 && register_operand (src, DFmode))
3519 return 1;
3520
3521 /* If we have the 'return' instruction, anything that does not use
3522 local or output registers and can go into a delay slot wins. */
3523 else if (return_p && TARGET_V9 && !epilogue_renumber (&pat, 1))
3524 return 1;
3525
3526 /* The 'restore src1,src2,dest' pattern for SImode. */
3527 else if (GET_CODE (src) == PLUS
3528 && register_operand (XEXP (src, 0), SImode)
3529 && arith_operand (XEXP (src, 1), SImode))
3530 return 1;
3531
3532 /* The 'restore src1,src2,dest' pattern for DImode. */
3533 else if (GET_CODE (src) == PLUS
3534 && register_operand (XEXP (src, 0), DImode)
3535 && arith_double_operand (XEXP (src, 1), DImode))
3536 return 1;
3537
3538 /* The 'restore src1,%lo(src2),dest' pattern. */
3539 else if (GET_CODE (src) == LO_SUM
3540 && ! TARGET_CM_MEDMID
3541 && ((register_operand (XEXP (src, 0), SImode)
3542 && immediate_operand (XEXP (src, 1), SImode))
3543 || (TARGET_ARCH64
3544 && register_operand (XEXP (src, 0), DImode)
3545 && immediate_operand (XEXP (src, 1), DImode))))
3546 return 1;
3547
3548 /* The 'restore src,src,dest' pattern. */
3549 else if (GET_CODE (src) == ASHIFT
3550 && (register_operand (XEXP (src, 0), SImode)
3551 || register_operand (XEXP (src, 0), DImode))
3552 && XEXP (src, 1) == const1_rtx)
3553 return 1;
3554
3555 return 0;
3556 }
3557
3558 /* Return nonzero if TRIAL can go into the function return's delay slot. */
3559
3560 int
eligible_for_return_delay(rtx trial)3561 eligible_for_return_delay (rtx trial)
3562 {
3563 int regno;
3564 rtx pat;
3565
3566 /* If the function uses __builtin_eh_return, the eh_return machinery
3567 occupies the delay slot. */
3568 if (crtl->calls_eh_return)
3569 return 0;
3570
3571 if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE)
3572 return 0;
3573
3574 /* In the case of a leaf or flat function, anything can go into the slot. */
3575 if (sparc_leaf_function_p || TARGET_FLAT)
3576 return 1;
3577
3578 if (!NONJUMP_INSN_P (trial))
3579 return 0;
3580
3581 pat = PATTERN (trial);
3582 if (GET_CODE (pat) == PARALLEL)
3583 {
3584 int i;
3585
3586 if (! TARGET_V9)
3587 return 0;
3588 for (i = XVECLEN (pat, 0) - 1; i >= 0; i--)
3589 {
3590 rtx expr = XVECEXP (pat, 0, i);
3591 if (GET_CODE (expr) != SET)
3592 return 0;
3593 if (GET_CODE (SET_DEST (expr)) != REG)
3594 return 0;
3595 regno = REGNO (SET_DEST (expr));
3596 if (regno >= 8 && regno < 24)
3597 return 0;
3598 }
3599 return !epilogue_renumber (&pat, 1);
3600 }
3601
3602 if (GET_CODE (pat) != SET)
3603 return 0;
3604
3605 if (GET_CODE (SET_DEST (pat)) != REG)
3606 return 0;
3607
3608 regno = REGNO (SET_DEST (pat));
3609
3610 /* Otherwise, only operations which can be done in tandem with
3611 a `restore' or `return' insn can go into the delay slot. */
3612 if (regno >= 8 && regno < 24)
3613 return 0;
3614
3615 /* If this instruction sets up floating point register and we have a return
3616 instruction, it can probably go in. But restore will not work
3617 with FP_REGS. */
3618 if (! SPARC_INT_REG_P (regno))
3619 return TARGET_V9 && !epilogue_renumber (&pat, 1);
3620
3621 return eligible_for_restore_insn (trial, true);
3622 }
3623
3624 /* Return nonzero if TRIAL can go into the sibling call's delay slot. */
3625
3626 int
eligible_for_sibcall_delay(rtx trial)3627 eligible_for_sibcall_delay (rtx trial)
3628 {
3629 rtx pat;
3630
3631 if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE)
3632 return 0;
3633
3634 if (!NONJUMP_INSN_P (trial))
3635 return 0;
3636
3637 pat = PATTERN (trial);
3638
3639 if (sparc_leaf_function_p || TARGET_FLAT)
3640 {
3641 /* If the tail call is done using the call instruction,
3642 we have to restore %o7 in the delay slot. */
3643 if (LEAF_SIBCALL_SLOT_RESERVED_P)
3644 return 0;
3645
3646 /* %g1 is used to build the function address */
3647 if (reg_mentioned_p (gen_rtx_REG (Pmode, 1), pat))
3648 return 0;
3649
3650 return 1;
3651 }
3652
3653 if (GET_CODE (pat) != SET)
3654 return 0;
3655
3656 /* Otherwise, only operations which can be done in tandem with
3657 a `restore' insn can go into the delay slot. */
3658 if (GET_CODE (SET_DEST (pat)) != REG
3659 || (REGNO (SET_DEST (pat)) >= 8 && REGNO (SET_DEST (pat)) < 24)
3660 || ! SPARC_INT_REG_P (REGNO (SET_DEST (pat))))
3661 return 0;
3662
3663 /* If it mentions %o7, it can't go in, because sibcall will clobber it
3664 in most cases. */
3665 if (reg_mentioned_p (gen_rtx_REG (Pmode, 15), pat))
3666 return 0;
3667
3668 return eligible_for_restore_insn (trial, false);
3669 }
3670
3671 /* Determine if it's legal to put X into the constant pool. This
3672 is not possible if X contains the address of a symbol that is
3673 not constant (TLS) or not known at final link time (PIC). */
3674
3675 static bool
sparc_cannot_force_const_mem(enum machine_mode mode,rtx x)3676 sparc_cannot_force_const_mem (enum machine_mode mode, rtx x)
3677 {
3678 switch (GET_CODE (x))
3679 {
3680 case CONST_INT:
3681 case CONST_DOUBLE:
3682 case CONST_VECTOR:
3683 /* Accept all non-symbolic constants. */
3684 return false;
3685
3686 case LABEL_REF:
3687 /* Labels are OK iff we are non-PIC. */
3688 return flag_pic != 0;
3689
3690 case SYMBOL_REF:
3691 /* 'Naked' TLS symbol references are never OK,
3692 non-TLS symbols are OK iff we are non-PIC. */
3693 if (SYMBOL_REF_TLS_MODEL (x))
3694 return true;
3695 else
3696 return flag_pic != 0;
3697
3698 case CONST:
3699 return sparc_cannot_force_const_mem (mode, XEXP (x, 0));
3700 case PLUS:
3701 case MINUS:
3702 return sparc_cannot_force_const_mem (mode, XEXP (x, 0))
3703 || sparc_cannot_force_const_mem (mode, XEXP (x, 1));
3704 case UNSPEC:
3705 return true;
3706 default:
3707 gcc_unreachable ();
3708 }
3709 }
3710
3711 /* Global Offset Table support. */
3712 static GTY(()) rtx got_helper_rtx = NULL_RTX;
3713 static GTY(()) rtx global_offset_table_rtx = NULL_RTX;
3714
3715 /* Return the SYMBOL_REF for the Global Offset Table. */
3716
3717 static GTY(()) rtx sparc_got_symbol = NULL_RTX;
3718
3719 static rtx
sparc_got(void)3720 sparc_got (void)
3721 {
3722 if (!sparc_got_symbol)
3723 sparc_got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
3724
3725 return sparc_got_symbol;
3726 }
3727
3728 /* Ensure that we are not using patterns that are not OK with PIC. */
3729
3730 int
check_pic(int i)3731 check_pic (int i)
3732 {
3733 rtx op;
3734
3735 switch (flag_pic)
3736 {
3737 case 1:
3738 op = recog_data.operand[i];
3739 gcc_assert (GET_CODE (op) != SYMBOL_REF
3740 && (GET_CODE (op) != CONST
3741 || (GET_CODE (XEXP (op, 0)) == MINUS
3742 && XEXP (XEXP (op, 0), 0) == sparc_got ()
3743 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST)));
3744 case 2:
3745 default:
3746 return 1;
3747 }
3748 }
3749
3750 /* Return true if X is an address which needs a temporary register when
3751 reloaded while generating PIC code. */
3752
3753 int
pic_address_needs_scratch(rtx x)3754 pic_address_needs_scratch (rtx x)
3755 {
3756 /* An address which is a symbolic plus a non SMALL_INT needs a temp reg. */
3757 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS
3758 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
3759 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
3760 && ! SMALL_INT (XEXP (XEXP (x, 0), 1)))
3761 return 1;
3762
3763 return 0;
3764 }
3765
3766 /* Determine if a given RTX is a valid constant. We already know this
3767 satisfies CONSTANT_P. */
3768
3769 static bool
sparc_legitimate_constant_p(enum machine_mode mode,rtx x)3770 sparc_legitimate_constant_p (enum machine_mode mode, rtx x)
3771 {
3772 switch (GET_CODE (x))
3773 {
3774 case CONST:
3775 case SYMBOL_REF:
3776 if (sparc_tls_referenced_p (x))
3777 return false;
3778 break;
3779
3780 case CONST_DOUBLE:
3781 if (GET_MODE (x) == VOIDmode)
3782 return true;
3783
3784 /* Floating point constants are generally not ok.
3785 The only exception is 0.0 and all-ones in VIS. */
3786 if (TARGET_VIS
3787 && SCALAR_FLOAT_MODE_P (mode)
3788 && (const_zero_operand (x, mode)
3789 || const_all_ones_operand (x, mode)))
3790 return true;
3791
3792 return false;
3793
3794 case CONST_VECTOR:
3795 /* Vector constants are generally not ok.
3796 The only exception is 0 or -1 in VIS. */
3797 if (TARGET_VIS
3798 && (const_zero_operand (x, mode)
3799 || const_all_ones_operand (x, mode)))
3800 return true;
3801
3802 return false;
3803
3804 default:
3805 break;
3806 }
3807
3808 return true;
3809 }
3810
3811 /* Determine if a given RTX is a valid constant address. */
3812
3813 bool
constant_address_p(rtx x)3814 constant_address_p (rtx x)
3815 {
3816 switch (GET_CODE (x))
3817 {
3818 case LABEL_REF:
3819 case CONST_INT:
3820 case HIGH:
3821 return true;
3822
3823 case CONST:
3824 if (flag_pic && pic_address_needs_scratch (x))
3825 return false;
3826 return sparc_legitimate_constant_p (Pmode, x);
3827
3828 case SYMBOL_REF:
3829 return !flag_pic && sparc_legitimate_constant_p (Pmode, x);
3830
3831 default:
3832 return false;
3833 }
3834 }
3835
3836 /* Nonzero if the constant value X is a legitimate general operand
3837 when generating PIC code. It is given that flag_pic is on and
3838 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
3839
3840 bool
legitimate_pic_operand_p(rtx x)3841 legitimate_pic_operand_p (rtx x)
3842 {
3843 if (pic_address_needs_scratch (x))
3844 return false;
3845 if (sparc_tls_referenced_p (x))
3846 return false;
3847 return true;
3848 }
3849
3850 #define RTX_OK_FOR_OFFSET_P(X, MODE) \
3851 (CONST_INT_P (X) \
3852 && INTVAL (X) >= -0x1000 \
3853 && INTVAL (X) < (0x1000 - GET_MODE_SIZE (MODE)))
3854
3855 #define RTX_OK_FOR_OLO10_P(X, MODE) \
3856 (CONST_INT_P (X) \
3857 && INTVAL (X) >= -0x1000 \
3858 && INTVAL (X) < (0xc00 - GET_MODE_SIZE (MODE)))
3859
3860 /* Handle the TARGET_LEGITIMATE_ADDRESS_P target hook.
3861
3862 On SPARC, the actual legitimate addresses must be REG+REG or REG+SMALLINT
3863 ordinarily. This changes a bit when generating PIC. */
3864
3865 static bool
sparc_legitimate_address_p(enum machine_mode mode,rtx addr,bool strict)3866 sparc_legitimate_address_p (enum machine_mode mode, rtx addr, bool strict)
3867 {
3868 rtx rs1 = NULL, rs2 = NULL, imm1 = NULL;
3869
3870 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
3871 rs1 = addr;
3872 else if (GET_CODE (addr) == PLUS)
3873 {
3874 rs1 = XEXP (addr, 0);
3875 rs2 = XEXP (addr, 1);
3876
3877 /* Canonicalize. REG comes first, if there are no regs,
3878 LO_SUM comes first. */
3879 if (!REG_P (rs1)
3880 && GET_CODE (rs1) != SUBREG
3881 && (REG_P (rs2)
3882 || GET_CODE (rs2) == SUBREG
3883 || (GET_CODE (rs2) == LO_SUM && GET_CODE (rs1) != LO_SUM)))
3884 {
3885 rs1 = XEXP (addr, 1);
3886 rs2 = XEXP (addr, 0);
3887 }
3888
3889 if ((flag_pic == 1
3890 && rs1 == pic_offset_table_rtx
3891 && !REG_P (rs2)
3892 && GET_CODE (rs2) != SUBREG
3893 && GET_CODE (rs2) != LO_SUM
3894 && GET_CODE (rs2) != MEM
3895 && !(GET_CODE (rs2) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs2))
3896 && (! symbolic_operand (rs2, VOIDmode) || mode == Pmode)
3897 && (GET_CODE (rs2) != CONST_INT || SMALL_INT (rs2)))
3898 || ((REG_P (rs1)
3899 || GET_CODE (rs1) == SUBREG)
3900 && RTX_OK_FOR_OFFSET_P (rs2, mode)))
3901 {
3902 imm1 = rs2;
3903 rs2 = NULL;
3904 }
3905 else if ((REG_P (rs1) || GET_CODE (rs1) == SUBREG)
3906 && (REG_P (rs2) || GET_CODE (rs2) == SUBREG))
3907 {
3908 /* We prohibit REG + REG for TFmode when there are no quad move insns
3909 and we consequently need to split. We do this because REG+REG
3910 is not an offsettable address. If we get the situation in reload
3911 where source and destination of a movtf pattern are both MEMs with
3912 REG+REG address, then only one of them gets converted to an
3913 offsettable address. */
3914 if (mode == TFmode
3915 && ! (TARGET_ARCH64 && TARGET_HARD_QUAD))
3916 return 0;
3917
3918 /* Likewise for TImode, but in all cases. */
3919 if (mode == TImode)
3920 return 0;
3921
3922 /* We prohibit REG + REG on ARCH32 if not optimizing for
3923 DFmode/DImode because then mem_min_alignment is likely to be zero
3924 after reload and the forced split would lack a matching splitter
3925 pattern. */
3926 if (TARGET_ARCH32 && !optimize
3927 && (mode == DFmode || mode == DImode))
3928 return 0;
3929 }
3930 else if (USE_AS_OFFSETABLE_LO10
3931 && GET_CODE (rs1) == LO_SUM
3932 && TARGET_ARCH64
3933 && ! TARGET_CM_MEDMID
3934 && RTX_OK_FOR_OLO10_P (rs2, mode))
3935 {
3936 rs2 = NULL;
3937 imm1 = XEXP (rs1, 1);
3938 rs1 = XEXP (rs1, 0);
3939 if (!CONSTANT_P (imm1)
3940 || (GET_CODE (rs1) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs1)))
3941 return 0;
3942 }
3943 }
3944 else if (GET_CODE (addr) == LO_SUM)
3945 {
3946 rs1 = XEXP (addr, 0);
3947 imm1 = XEXP (addr, 1);
3948
3949 if (!CONSTANT_P (imm1)
3950 || (GET_CODE (rs1) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs1)))
3951 return 0;
3952
3953 /* We can't allow TFmode in 32-bit mode, because an offset greater
3954 than the alignment (8) may cause the LO_SUM to overflow. */
3955 if (mode == TFmode && TARGET_ARCH32)
3956 return 0;
3957 }
3958 else if (GET_CODE (addr) == CONST_INT && SMALL_INT (addr))
3959 return 1;
3960 else
3961 return 0;
3962
3963 if (GET_CODE (rs1) == SUBREG)
3964 rs1 = SUBREG_REG (rs1);
3965 if (!REG_P (rs1))
3966 return 0;
3967
3968 if (rs2)
3969 {
3970 if (GET_CODE (rs2) == SUBREG)
3971 rs2 = SUBREG_REG (rs2);
3972 if (!REG_P (rs2))
3973 return 0;
3974 }
3975
3976 if (strict)
3977 {
3978 if (!REGNO_OK_FOR_BASE_P (REGNO (rs1))
3979 || (rs2 && !REGNO_OK_FOR_BASE_P (REGNO (rs2))))
3980 return 0;
3981 }
3982 else
3983 {
3984 if ((! SPARC_INT_REG_P (REGNO (rs1))
3985 && REGNO (rs1) != FRAME_POINTER_REGNUM
3986 && REGNO (rs1) < FIRST_PSEUDO_REGISTER)
3987 || (rs2
3988 && (! SPARC_INT_REG_P (REGNO (rs2))
3989 && REGNO (rs2) != FRAME_POINTER_REGNUM
3990 && REGNO (rs2) < FIRST_PSEUDO_REGISTER)))
3991 return 0;
3992 }
3993 return 1;
3994 }
3995
3996 /* Return the SYMBOL_REF for the tls_get_addr function. */
3997
3998 static GTY(()) rtx sparc_tls_symbol = NULL_RTX;
3999
4000 static rtx
sparc_tls_get_addr(void)4001 sparc_tls_get_addr (void)
4002 {
4003 if (!sparc_tls_symbol)
4004 sparc_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tls_get_addr");
4005
4006 return sparc_tls_symbol;
4007 }
4008
4009 /* Return the Global Offset Table to be used in TLS mode. */
4010
4011 static rtx
sparc_tls_got(void)4012 sparc_tls_got (void)
4013 {
4014 /* In PIC mode, this is just the PIC offset table. */
4015 if (flag_pic)
4016 {
4017 crtl->uses_pic_offset_table = 1;
4018 return pic_offset_table_rtx;
4019 }
4020
4021 /* In non-PIC mode, Sun as (unlike GNU as) emits PC-relative relocations for
4022 the GOT symbol with the 32-bit ABI, so we reload the GOT register. */
4023 if (TARGET_SUN_TLS && TARGET_ARCH32)
4024 {
4025 load_got_register ();
4026 return global_offset_table_rtx;
4027 }
4028
4029 /* In all other cases, we load a new pseudo with the GOT symbol. */
4030 return copy_to_reg (sparc_got ());
4031 }
4032
4033 /* Return true if X contains a thread-local symbol. */
4034
4035 static bool
sparc_tls_referenced_p(rtx x)4036 sparc_tls_referenced_p (rtx x)
4037 {
4038 if (!TARGET_HAVE_TLS)
4039 return false;
4040
4041 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
4042 x = XEXP (XEXP (x, 0), 0);
4043
4044 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x))
4045 return true;
4046
4047 /* That's all we handle in sparc_legitimize_tls_address for now. */
4048 return false;
4049 }
4050
4051 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
4052 this (thread-local) address. */
4053
4054 static rtx
sparc_legitimize_tls_address(rtx addr)4055 sparc_legitimize_tls_address (rtx addr)
4056 {
4057 rtx temp1, temp2, temp3, ret, o0, got, insn;
4058
4059 gcc_assert (can_create_pseudo_p ());
4060
4061 if (GET_CODE (addr) == SYMBOL_REF)
4062 switch (SYMBOL_REF_TLS_MODEL (addr))
4063 {
4064 case TLS_MODEL_GLOBAL_DYNAMIC:
4065 start_sequence ();
4066 temp1 = gen_reg_rtx (SImode);
4067 temp2 = gen_reg_rtx (SImode);
4068 ret = gen_reg_rtx (Pmode);
4069 o0 = gen_rtx_REG (Pmode, 8);
4070 got = sparc_tls_got ();
4071 emit_insn (gen_tgd_hi22 (temp1, addr));
4072 emit_insn (gen_tgd_lo10 (temp2, temp1, addr));
4073 if (TARGET_ARCH32)
4074 {
4075 emit_insn (gen_tgd_add32 (o0, got, temp2, addr));
4076 insn = emit_call_insn (gen_tgd_call32 (o0, sparc_tls_get_addr (),
4077 addr, const1_rtx));
4078 }
4079 else
4080 {
4081 emit_insn (gen_tgd_add64 (o0, got, temp2, addr));
4082 insn = emit_call_insn (gen_tgd_call64 (o0, sparc_tls_get_addr (),
4083 addr, const1_rtx));
4084 }
4085 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), o0);
4086 insn = get_insns ();
4087 end_sequence ();
4088 emit_libcall_block (insn, ret, o0, addr);
4089 break;
4090
4091 case TLS_MODEL_LOCAL_DYNAMIC:
4092 start_sequence ();
4093 temp1 = gen_reg_rtx (SImode);
4094 temp2 = gen_reg_rtx (SImode);
4095 temp3 = gen_reg_rtx (Pmode);
4096 ret = gen_reg_rtx (Pmode);
4097 o0 = gen_rtx_REG (Pmode, 8);
4098 got = sparc_tls_got ();
4099 emit_insn (gen_tldm_hi22 (temp1));
4100 emit_insn (gen_tldm_lo10 (temp2, temp1));
4101 if (TARGET_ARCH32)
4102 {
4103 emit_insn (gen_tldm_add32 (o0, got, temp2));
4104 insn = emit_call_insn (gen_tldm_call32 (o0, sparc_tls_get_addr (),
4105 const1_rtx));
4106 }
4107 else
4108 {
4109 emit_insn (gen_tldm_add64 (o0, got, temp2));
4110 insn = emit_call_insn (gen_tldm_call64 (o0, sparc_tls_get_addr (),
4111 const1_rtx));
4112 }
4113 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), o0);
4114 insn = get_insns ();
4115 end_sequence ();
4116 emit_libcall_block (insn, temp3, o0,
4117 gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
4118 UNSPEC_TLSLD_BASE));
4119 temp1 = gen_reg_rtx (SImode);
4120 temp2 = gen_reg_rtx (SImode);
4121 emit_insn (gen_tldo_hix22 (temp1, addr));
4122 emit_insn (gen_tldo_lox10 (temp2, temp1, addr));
4123 if (TARGET_ARCH32)
4124 emit_insn (gen_tldo_add32 (ret, temp3, temp2, addr));
4125 else
4126 emit_insn (gen_tldo_add64 (ret, temp3, temp2, addr));
4127 break;
4128
4129 case TLS_MODEL_INITIAL_EXEC:
4130 temp1 = gen_reg_rtx (SImode);
4131 temp2 = gen_reg_rtx (SImode);
4132 temp3 = gen_reg_rtx (Pmode);
4133 got = sparc_tls_got ();
4134 emit_insn (gen_tie_hi22 (temp1, addr));
4135 emit_insn (gen_tie_lo10 (temp2, temp1, addr));
4136 if (TARGET_ARCH32)
4137 emit_insn (gen_tie_ld32 (temp3, got, temp2, addr));
4138 else
4139 emit_insn (gen_tie_ld64 (temp3, got, temp2, addr));
4140 if (TARGET_SUN_TLS)
4141 {
4142 ret = gen_reg_rtx (Pmode);
4143 if (TARGET_ARCH32)
4144 emit_insn (gen_tie_add32 (ret, gen_rtx_REG (Pmode, 7),
4145 temp3, addr));
4146 else
4147 emit_insn (gen_tie_add64 (ret, gen_rtx_REG (Pmode, 7),
4148 temp3, addr));
4149 }
4150 else
4151 ret = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, 7), temp3);
4152 break;
4153
4154 case TLS_MODEL_LOCAL_EXEC:
4155 temp1 = gen_reg_rtx (Pmode);
4156 temp2 = gen_reg_rtx (Pmode);
4157 if (TARGET_ARCH32)
4158 {
4159 emit_insn (gen_tle_hix22_sp32 (temp1, addr));
4160 emit_insn (gen_tle_lox10_sp32 (temp2, temp1, addr));
4161 }
4162 else
4163 {
4164 emit_insn (gen_tle_hix22_sp64 (temp1, addr));
4165 emit_insn (gen_tle_lox10_sp64 (temp2, temp1, addr));
4166 }
4167 ret = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, 7), temp2);
4168 break;
4169
4170 default:
4171 gcc_unreachable ();
4172 }
4173
4174 else if (GET_CODE (addr) == CONST)
4175 {
4176 rtx base, offset;
4177
4178 gcc_assert (GET_CODE (XEXP (addr, 0)) == PLUS);
4179
4180 base = sparc_legitimize_tls_address (XEXP (XEXP (addr, 0), 0));
4181 offset = XEXP (XEXP (addr, 0), 1);
4182
4183 base = force_operand (base, NULL_RTX);
4184 if (!(GET_CODE (offset) == CONST_INT && SMALL_INT (offset)))
4185 offset = force_reg (Pmode, offset);
4186 ret = gen_rtx_PLUS (Pmode, base, offset);
4187 }
4188
4189 else
4190 gcc_unreachable (); /* for now ... */
4191
4192 return ret;
4193 }
4194
4195 /* Legitimize PIC addresses. If the address is already position-independent,
4196 we return ORIG. Newly generated position-independent addresses go into a
4197 reg. This is REG if nonzero, otherwise we allocate register(s) as
4198 necessary. */
4199
4200 static rtx
sparc_legitimize_pic_address(rtx orig,rtx reg)4201 sparc_legitimize_pic_address (rtx orig, rtx reg)
4202 {
4203 bool gotdata_op = false;
4204
4205 if (GET_CODE (orig) == SYMBOL_REF
4206 /* See the comment in sparc_expand_move. */
4207 || (GET_CODE (orig) == LABEL_REF && !can_use_mov_pic_label_ref (orig)))
4208 {
4209 rtx pic_ref, address;
4210 rtx insn;
4211
4212 if (reg == 0)
4213 {
4214 gcc_assert (can_create_pseudo_p ());
4215 reg = gen_reg_rtx (Pmode);
4216 }
4217
4218 if (flag_pic == 2)
4219 {
4220 /* If not during reload, allocate another temp reg here for loading
4221 in the address, so that these instructions can be optimized
4222 properly. */
4223 rtx temp_reg = (! can_create_pseudo_p ()
4224 ? reg : gen_reg_rtx (Pmode));
4225
4226 /* Must put the SYMBOL_REF inside an UNSPEC here so that cse
4227 won't get confused into thinking that these two instructions
4228 are loading in the true address of the symbol. If in the
4229 future a PIC rtx exists, that should be used instead. */
4230 if (TARGET_ARCH64)
4231 {
4232 emit_insn (gen_movdi_high_pic (temp_reg, orig));
4233 emit_insn (gen_movdi_lo_sum_pic (temp_reg, temp_reg, orig));
4234 }
4235 else
4236 {
4237 emit_insn (gen_movsi_high_pic (temp_reg, orig));
4238 emit_insn (gen_movsi_lo_sum_pic (temp_reg, temp_reg, orig));
4239 }
4240 address = temp_reg;
4241 gotdata_op = true;
4242 }
4243 else
4244 address = orig;
4245
4246 crtl->uses_pic_offset_table = 1;
4247 if (gotdata_op)
4248 {
4249 if (TARGET_ARCH64)
4250 insn = emit_insn (gen_movdi_pic_gotdata_op (reg,
4251 pic_offset_table_rtx,
4252 address, orig));
4253 else
4254 insn = emit_insn (gen_movsi_pic_gotdata_op (reg,
4255 pic_offset_table_rtx,
4256 address, orig));
4257 }
4258 else
4259 {
4260 pic_ref
4261 = gen_const_mem (Pmode,
4262 gen_rtx_PLUS (Pmode,
4263 pic_offset_table_rtx, address));
4264 insn = emit_move_insn (reg, pic_ref);
4265 }
4266
4267 /* Put a REG_EQUAL note on this insn, so that it can be optimized
4268 by loop. */
4269 set_unique_reg_note (insn, REG_EQUAL, orig);
4270 return reg;
4271 }
4272 else if (GET_CODE (orig) == CONST)
4273 {
4274 rtx base, offset;
4275
4276 if (GET_CODE (XEXP (orig, 0)) == PLUS
4277 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
4278 return orig;
4279
4280 if (reg == 0)
4281 {
4282 gcc_assert (can_create_pseudo_p ());
4283 reg = gen_reg_rtx (Pmode);
4284 }
4285
4286 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
4287 base = sparc_legitimize_pic_address (XEXP (XEXP (orig, 0), 0), reg);
4288 offset = sparc_legitimize_pic_address (XEXP (XEXP (orig, 0), 1),
4289 base == reg ? NULL_RTX : reg);
4290
4291 if (GET_CODE (offset) == CONST_INT)
4292 {
4293 if (SMALL_INT (offset))
4294 return plus_constant (Pmode, base, INTVAL (offset));
4295 else if (can_create_pseudo_p ())
4296 offset = force_reg (Pmode, offset);
4297 else
4298 /* If we reach here, then something is seriously wrong. */
4299 gcc_unreachable ();
4300 }
4301 return gen_rtx_PLUS (Pmode, base, offset);
4302 }
4303 else if (GET_CODE (orig) == LABEL_REF)
4304 /* ??? We ought to be checking that the register is live instead, in case
4305 it is eliminated. */
4306 crtl->uses_pic_offset_table = 1;
4307
4308 return orig;
4309 }
4310
4311 /* Try machine-dependent ways of modifying an illegitimate address X
4312 to be legitimate. If we find one, return the new, valid address.
4313
4314 OLDX is the address as it was before break_out_memory_refs was called.
4315 In some cases it is useful to look at this to decide what needs to be done.
4316
4317 MODE is the mode of the operand pointed to by X.
4318
4319 On SPARC, change REG+N into REG+REG, and REG+(X*Y) into REG+REG. */
4320
4321 static rtx
sparc_legitimize_address(rtx x,rtx oldx ATTRIBUTE_UNUSED,enum machine_mode mode)4322 sparc_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
4323 enum machine_mode mode)
4324 {
4325 rtx orig_x = x;
4326
4327 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == MULT)
4328 x = gen_rtx_PLUS (Pmode, XEXP (x, 1),
4329 force_operand (XEXP (x, 0), NULL_RTX));
4330 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 1)) == MULT)
4331 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
4332 force_operand (XEXP (x, 1), NULL_RTX));
4333 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS)
4334 x = gen_rtx_PLUS (Pmode, force_operand (XEXP (x, 0), NULL_RTX),
4335 XEXP (x, 1));
4336 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 1)) == PLUS)
4337 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
4338 force_operand (XEXP (x, 1), NULL_RTX));
4339
4340 if (x != orig_x && sparc_legitimate_address_p (mode, x, FALSE))
4341 return x;
4342
4343 if (sparc_tls_referenced_p (x))
4344 x = sparc_legitimize_tls_address (x);
4345 else if (flag_pic)
4346 x = sparc_legitimize_pic_address (x, NULL_RTX);
4347 else if (GET_CODE (x) == PLUS && CONSTANT_ADDRESS_P (XEXP (x, 1)))
4348 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
4349 copy_to_mode_reg (Pmode, XEXP (x, 1)));
4350 else if (GET_CODE (x) == PLUS && CONSTANT_ADDRESS_P (XEXP (x, 0)))
4351 x = gen_rtx_PLUS (Pmode, XEXP (x, 1),
4352 copy_to_mode_reg (Pmode, XEXP (x, 0)));
4353 else if (GET_CODE (x) == SYMBOL_REF
4354 || GET_CODE (x) == CONST
4355 || GET_CODE (x) == LABEL_REF)
4356 x = copy_to_suggested_reg (x, NULL_RTX, Pmode);
4357
4358 return x;
4359 }
4360
4361 /* Delegitimize an address that was legitimized by the above function. */
4362
4363 static rtx
sparc_delegitimize_address(rtx x)4364 sparc_delegitimize_address (rtx x)
4365 {
4366 x = delegitimize_mem_from_attrs (x);
4367
4368 if (GET_CODE (x) == LO_SUM && GET_CODE (XEXP (x, 1)) == UNSPEC)
4369 switch (XINT (XEXP (x, 1), 1))
4370 {
4371 case UNSPEC_MOVE_PIC:
4372 case UNSPEC_TLSLE:
4373 x = XVECEXP (XEXP (x, 1), 0, 0);
4374 gcc_assert (GET_CODE (x) == SYMBOL_REF);
4375 break;
4376 default:
4377 break;
4378 }
4379
4380 /* This is generated by mov{si,di}_pic_label_ref in PIC mode. */
4381 if (GET_CODE (x) == MINUS
4382 && REG_P (XEXP (x, 0))
4383 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM
4384 && GET_CODE (XEXP (x, 1)) == LO_SUM
4385 && GET_CODE (XEXP (XEXP (x, 1), 1)) == UNSPEC
4386 && XINT (XEXP (XEXP (x, 1), 1), 1) == UNSPEC_MOVE_PIC_LABEL)
4387 {
4388 x = XVECEXP (XEXP (XEXP (x, 1), 1), 0, 0);
4389 gcc_assert (GET_CODE (x) == LABEL_REF);
4390 }
4391
4392 return x;
4393 }
4394
4395 /* SPARC implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
4396 replace the input X, or the original X if no replacement is called for.
4397 The output parameter *WIN is 1 if the calling macro should goto WIN,
4398 0 if it should not.
4399
4400 For SPARC, we wish to handle addresses by splitting them into
4401 HIGH+LO_SUM pairs, retaining the LO_SUM in the memory reference.
4402 This cuts the number of extra insns by one.
4403
4404 Do nothing when generating PIC code and the address is a symbolic
4405 operand or requires a scratch register. */
4406
4407 rtx
sparc_legitimize_reload_address(rtx x,enum machine_mode mode,int opnum,int type,int ind_levels ATTRIBUTE_UNUSED,int * win)4408 sparc_legitimize_reload_address (rtx x, enum machine_mode mode,
4409 int opnum, int type,
4410 int ind_levels ATTRIBUTE_UNUSED, int *win)
4411 {
4412 /* Decompose SImode constants into HIGH+LO_SUM. */
4413 if (CONSTANT_P (x)
4414 && (mode != TFmode || TARGET_ARCH64)
4415 && GET_MODE (x) == SImode
4416 && GET_CODE (x) != LO_SUM
4417 && GET_CODE (x) != HIGH
4418 && sparc_cmodel <= CM_MEDLOW
4419 && !(flag_pic
4420 && (symbolic_operand (x, Pmode) || pic_address_needs_scratch (x))))
4421 {
4422 x = gen_rtx_LO_SUM (GET_MODE (x), gen_rtx_HIGH (GET_MODE (x), x), x);
4423 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4424 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4425 opnum, (enum reload_type)type);
4426 *win = 1;
4427 return x;
4428 }
4429
4430 /* We have to recognize what we have already generated above. */
4431 if (GET_CODE (x) == LO_SUM && GET_CODE (XEXP (x, 0)) == HIGH)
4432 {
4433 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4434 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4435 opnum, (enum reload_type)type);
4436 *win = 1;
4437 return x;
4438 }
4439
4440 *win = 0;
4441 return x;
4442 }
4443
4444 /* Return true if ADDR (a legitimate address expression)
4445 has an effect that depends on the machine mode it is used for.
4446
4447 In PIC mode,
4448
4449 (mem:HI [%l7+a])
4450
4451 is not equivalent to
4452
4453 (mem:QI [%l7+a]) (mem:QI [%l7+a+1])
4454
4455 because [%l7+a+1] is interpreted as the address of (a+1). */
4456
4457
4458 static bool
sparc_mode_dependent_address_p(const_rtx addr,addr_space_t as ATTRIBUTE_UNUSED)4459 sparc_mode_dependent_address_p (const_rtx addr,
4460 addr_space_t as ATTRIBUTE_UNUSED)
4461 {
4462 if (flag_pic && GET_CODE (addr) == PLUS)
4463 {
4464 rtx op0 = XEXP (addr, 0);
4465 rtx op1 = XEXP (addr, 1);
4466 if (op0 == pic_offset_table_rtx
4467 && symbolic_operand (op1, VOIDmode))
4468 return true;
4469 }
4470
4471 return false;
4472 }
4473
4474 #ifdef HAVE_GAS_HIDDEN
4475 # define USE_HIDDEN_LINKONCE 1
4476 #else
4477 # define USE_HIDDEN_LINKONCE 0
4478 #endif
4479
4480 static void
get_pc_thunk_name(char name[32],unsigned int regno)4481 get_pc_thunk_name (char name[32], unsigned int regno)
4482 {
4483 const char *reg_name = reg_names[regno];
4484
4485 /* Skip the leading '%' as that cannot be used in a
4486 symbol name. */
4487 reg_name += 1;
4488
4489 if (USE_HIDDEN_LINKONCE)
4490 sprintf (name, "__sparc_get_pc_thunk.%s", reg_name);
4491 else
4492 ASM_GENERATE_INTERNAL_LABEL (name, "LADDPC", regno);
4493 }
4494
4495 /* Wrapper around the load_pcrel_sym{si,di} patterns. */
4496
4497 static rtx
gen_load_pcrel_sym(rtx op0,rtx op1,rtx op2,rtx op3)4498 gen_load_pcrel_sym (rtx op0, rtx op1, rtx op2, rtx op3)
4499 {
4500 int orig_flag_pic = flag_pic;
4501 rtx insn;
4502
4503 /* The load_pcrel_sym{si,di} patterns require absolute addressing. */
4504 flag_pic = 0;
4505 if (TARGET_ARCH64)
4506 insn = gen_load_pcrel_symdi (op0, op1, op2, op3);
4507 else
4508 insn = gen_load_pcrel_symsi (op0, op1, op2, op3);
4509 flag_pic = orig_flag_pic;
4510
4511 return insn;
4512 }
4513
4514 /* Emit code to load the GOT register. */
4515
4516 void
load_got_register(void)4517 load_got_register (void)
4518 {
4519 /* In PIC mode, this will retrieve pic_offset_table_rtx. */
4520 if (!global_offset_table_rtx)
4521 global_offset_table_rtx = gen_rtx_REG (Pmode, GLOBAL_OFFSET_TABLE_REGNUM);
4522
4523 if (TARGET_VXWORKS_RTP)
4524 emit_insn (gen_vxworks_load_got ());
4525 else
4526 {
4527 /* The GOT symbol is subject to a PC-relative relocation so we need a
4528 helper function to add the PC value and thus get the final value. */
4529 if (!got_helper_rtx)
4530 {
4531 char name[32];
4532 get_pc_thunk_name (name, GLOBAL_OFFSET_TABLE_REGNUM);
4533 got_helper_rtx = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
4534 }
4535
4536 emit_insn (gen_load_pcrel_sym (global_offset_table_rtx, sparc_got (),
4537 got_helper_rtx,
4538 GEN_INT (GLOBAL_OFFSET_TABLE_REGNUM)));
4539 }
4540
4541 /* Need to emit this whether or not we obey regdecls,
4542 since setjmp/longjmp can cause life info to screw up.
4543 ??? In the case where we don't obey regdecls, this is not sufficient
4544 since we may not fall out the bottom. */
4545 emit_use (global_offset_table_rtx);
4546 }
4547
4548 /* Emit a call instruction with the pattern given by PAT. ADDR is the
4549 address of the call target. */
4550
4551 void
sparc_emit_call_insn(rtx pat,rtx addr)4552 sparc_emit_call_insn (rtx pat, rtx addr)
4553 {
4554 rtx insn;
4555
4556 insn = emit_call_insn (pat);
4557
4558 /* The PIC register is live on entry to VxWorks PIC PLT entries. */
4559 if (TARGET_VXWORKS_RTP
4560 && flag_pic
4561 && GET_CODE (addr) == SYMBOL_REF
4562 && (SYMBOL_REF_DECL (addr)
4563 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
4564 : !SYMBOL_REF_LOCAL_P (addr)))
4565 {
4566 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx);
4567 crtl->uses_pic_offset_table = 1;
4568 }
4569 }
4570
4571 /* Return 1 if RTX is a MEM which is known to be aligned to at
4572 least a DESIRED byte boundary. */
4573
4574 int
mem_min_alignment(rtx mem,int desired)4575 mem_min_alignment (rtx mem, int desired)
4576 {
4577 rtx addr, base, offset;
4578
4579 /* If it's not a MEM we can't accept it. */
4580 if (GET_CODE (mem) != MEM)
4581 return 0;
4582
4583 /* Obviously... */
4584 if (!TARGET_UNALIGNED_DOUBLES
4585 && MEM_ALIGN (mem) / BITS_PER_UNIT >= (unsigned)desired)
4586 return 1;
4587
4588 /* ??? The rest of the function predates MEM_ALIGN so
4589 there is probably a bit of redundancy. */
4590 addr = XEXP (mem, 0);
4591 base = offset = NULL_RTX;
4592 if (GET_CODE (addr) == PLUS)
4593 {
4594 if (GET_CODE (XEXP (addr, 0)) == REG)
4595 {
4596 base = XEXP (addr, 0);
4597
4598 /* What we are saying here is that if the base
4599 REG is aligned properly, the compiler will make
4600 sure any REG based index upon it will be so
4601 as well. */
4602 if (GET_CODE (XEXP (addr, 1)) == CONST_INT)
4603 offset = XEXP (addr, 1);
4604 else
4605 offset = const0_rtx;
4606 }
4607 }
4608 else if (GET_CODE (addr) == REG)
4609 {
4610 base = addr;
4611 offset = const0_rtx;
4612 }
4613
4614 if (base != NULL_RTX)
4615 {
4616 int regno = REGNO (base);
4617
4618 if (regno != HARD_FRAME_POINTER_REGNUM && regno != STACK_POINTER_REGNUM)
4619 {
4620 /* Check if the compiler has recorded some information
4621 about the alignment of the base REG. If reload has
4622 completed, we already matched with proper alignments.
4623 If not running global_alloc, reload might give us
4624 unaligned pointer to local stack though. */
4625 if (((cfun != 0
4626 && REGNO_POINTER_ALIGN (regno) >= desired * BITS_PER_UNIT)
4627 || (optimize && reload_completed))
4628 && (INTVAL (offset) & (desired - 1)) == 0)
4629 return 1;
4630 }
4631 else
4632 {
4633 if (((INTVAL (offset) - SPARC_STACK_BIAS) & (desired - 1)) == 0)
4634 return 1;
4635 }
4636 }
4637 else if (! TARGET_UNALIGNED_DOUBLES
4638 || CONSTANT_P (addr)
4639 || GET_CODE (addr) == LO_SUM)
4640 {
4641 /* Anything else we know is properly aligned unless TARGET_UNALIGNED_DOUBLES
4642 is true, in which case we can only assume that an access is aligned if
4643 it is to a constant address, or the address involves a LO_SUM. */
4644 return 1;
4645 }
4646
4647 /* An obviously unaligned address. */
4648 return 0;
4649 }
4650
4651
4652 /* Vectors to keep interesting information about registers where it can easily
4653 be got. We used to use the actual mode value as the bit number, but there
4654 are more than 32 modes now. Instead we use two tables: one indexed by
4655 hard register number, and one indexed by mode. */
4656
4657 /* The purpose of sparc_mode_class is to shrink the range of modes so that
4658 they all fit (as bit numbers) in a 32-bit word (again). Each real mode is
4659 mapped into one sparc_mode_class mode. */
4660
4661 enum sparc_mode_class {
4662 H_MODE, S_MODE, D_MODE, T_MODE, O_MODE,
4663 SF_MODE, DF_MODE, TF_MODE, OF_MODE,
4664 CC_MODE, CCFP_MODE
4665 };
4666
4667 /* Modes for single-word and smaller quantities. */
4668 #define S_MODES \
4669 ((1 << (int) H_MODE) | (1 << (int) S_MODE) | (1 << (int) SF_MODE))
4670
4671 /* Modes for double-word and smaller quantities. */
4672 #define D_MODES (S_MODES | (1 << (int) D_MODE) | (1 << DF_MODE))
4673
4674 /* Modes for quad-word and smaller quantities. */
4675 #define T_MODES (D_MODES | (1 << (int) T_MODE) | (1 << (int) TF_MODE))
4676
4677 /* Modes for 8-word and smaller quantities. */
4678 #define O_MODES (T_MODES | (1 << (int) O_MODE) | (1 << (int) OF_MODE))
4679
4680 /* Modes for single-float quantities. */
4681 #define SF_MODES ((1 << (int) S_MODE) | (1 << (int) SF_MODE))
4682
4683 /* Modes for double-float and smaller quantities. */
4684 #define DF_MODES (SF_MODES | (1 << (int) D_MODE) | (1 << DF_MODE))
4685
4686 /* Modes for quad-float and smaller quantities. */
4687 #define TF_MODES (DF_MODES | (1 << (int) TF_MODE))
4688
4689 /* Modes for quad-float pairs and smaller quantities. */
4690 #define OF_MODES (TF_MODES | (1 << (int) OF_MODE))
4691
4692 /* Modes for double-float only quantities. */
4693 #define DF_MODES_NO_S ((1 << (int) D_MODE) | (1 << (int) DF_MODE))
4694
4695 /* Modes for quad-float and double-float only quantities. */
4696 #define TF_MODES_NO_S (DF_MODES_NO_S | (1 << (int) TF_MODE))
4697
4698 /* Modes for quad-float pairs and double-float only quantities. */
4699 #define OF_MODES_NO_S (TF_MODES_NO_S | (1 << (int) OF_MODE))
4700
4701 /* Modes for condition codes. */
4702 #define CC_MODES (1 << (int) CC_MODE)
4703 #define CCFP_MODES (1 << (int) CCFP_MODE)
4704
4705 /* Value is 1 if register/mode pair is acceptable on sparc.
4706 The funny mixture of D and T modes is because integer operations
4707 do not specially operate on tetra quantities, so non-quad-aligned
4708 registers can hold quadword quantities (except %o4 and %i4 because
4709 they cross fixed registers). */
4710
4711 /* This points to either the 32 bit or the 64 bit version. */
4712 const int *hard_regno_mode_classes;
4713
4714 static const int hard_32bit_mode_classes[] = {
4715 S_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES,
4716 T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES,
4717 T_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES,
4718 T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES,
4719
4720 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4721 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4722 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4723 OF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
4724
4725 /* FP regs f32 to f63. Only the even numbered registers actually exist,
4726 and none can hold SFmode/SImode values. */
4727 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4728 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4729 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4730 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, TF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4731
4732 /* %fcc[0123] */
4733 CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES,
4734
4735 /* %icc, %sfp, %gsr */
4736 CC_MODES, 0, D_MODES
4737 };
4738
4739 static const int hard_64bit_mode_classes[] = {
4740 D_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
4741 O_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
4742 T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
4743 O_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
4744
4745 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4746 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4747 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4748 OF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
4749
4750 /* FP regs f32 to f63. Only the even numbered registers actually exist,
4751 and none can hold SFmode/SImode values. */
4752 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4753 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4754 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4755 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, TF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4756
4757 /* %fcc[0123] */
4758 CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES,
4759
4760 /* %icc, %sfp, %gsr */
4761 CC_MODES, 0, D_MODES
4762 };
4763
4764 int sparc_mode_class [NUM_MACHINE_MODES];
4765
4766 enum reg_class sparc_regno_reg_class[FIRST_PSEUDO_REGISTER];
4767
4768 static void
sparc_init_modes(void)4769 sparc_init_modes (void)
4770 {
4771 int i;
4772
4773 for (i = 0; i < NUM_MACHINE_MODES; i++)
4774 {
4775 switch (GET_MODE_CLASS (i))
4776 {
4777 case MODE_INT:
4778 case MODE_PARTIAL_INT:
4779 case MODE_COMPLEX_INT:
4780 if (GET_MODE_SIZE (i) < 4)
4781 sparc_mode_class[i] = 1 << (int) H_MODE;
4782 else if (GET_MODE_SIZE (i) == 4)
4783 sparc_mode_class[i] = 1 << (int) S_MODE;
4784 else if (GET_MODE_SIZE (i) == 8)
4785 sparc_mode_class[i] = 1 << (int) D_MODE;
4786 else if (GET_MODE_SIZE (i) == 16)
4787 sparc_mode_class[i] = 1 << (int) T_MODE;
4788 else if (GET_MODE_SIZE (i) == 32)
4789 sparc_mode_class[i] = 1 << (int) O_MODE;
4790 else
4791 sparc_mode_class[i] = 0;
4792 break;
4793 case MODE_VECTOR_INT:
4794 if (GET_MODE_SIZE (i) == 4)
4795 sparc_mode_class[i] = 1 << (int) SF_MODE;
4796 else if (GET_MODE_SIZE (i) == 8)
4797 sparc_mode_class[i] = 1 << (int) DF_MODE;
4798 else
4799 sparc_mode_class[i] = 0;
4800 break;
4801 case MODE_FLOAT:
4802 case MODE_COMPLEX_FLOAT:
4803 if (GET_MODE_SIZE (i) == 4)
4804 sparc_mode_class[i] = 1 << (int) SF_MODE;
4805 else if (GET_MODE_SIZE (i) == 8)
4806 sparc_mode_class[i] = 1 << (int) DF_MODE;
4807 else if (GET_MODE_SIZE (i) == 16)
4808 sparc_mode_class[i] = 1 << (int) TF_MODE;
4809 else if (GET_MODE_SIZE (i) == 32)
4810 sparc_mode_class[i] = 1 << (int) OF_MODE;
4811 else
4812 sparc_mode_class[i] = 0;
4813 break;
4814 case MODE_CC:
4815 if (i == (int) CCFPmode || i == (int) CCFPEmode)
4816 sparc_mode_class[i] = 1 << (int) CCFP_MODE;
4817 else
4818 sparc_mode_class[i] = 1 << (int) CC_MODE;
4819 break;
4820 default:
4821 sparc_mode_class[i] = 0;
4822 break;
4823 }
4824 }
4825
4826 if (TARGET_ARCH64)
4827 hard_regno_mode_classes = hard_64bit_mode_classes;
4828 else
4829 hard_regno_mode_classes = hard_32bit_mode_classes;
4830
4831 /* Initialize the array used by REGNO_REG_CLASS. */
4832 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4833 {
4834 if (i < 16 && TARGET_V8PLUS)
4835 sparc_regno_reg_class[i] = I64_REGS;
4836 else if (i < 32 || i == FRAME_POINTER_REGNUM)
4837 sparc_regno_reg_class[i] = GENERAL_REGS;
4838 else if (i < 64)
4839 sparc_regno_reg_class[i] = FP_REGS;
4840 else if (i < 96)
4841 sparc_regno_reg_class[i] = EXTRA_FP_REGS;
4842 else if (i < 100)
4843 sparc_regno_reg_class[i] = FPCC_REGS;
4844 else
4845 sparc_regno_reg_class[i] = NO_REGS;
4846 }
4847 }
4848
4849 /* Return whether REGNO, a global or FP register, must be saved/restored. */
4850
4851 static inline bool
save_global_or_fp_reg_p(unsigned int regno,int leaf_function ATTRIBUTE_UNUSED)4852 save_global_or_fp_reg_p (unsigned int regno,
4853 int leaf_function ATTRIBUTE_UNUSED)
4854 {
4855 return !call_used_regs[regno] && df_regs_ever_live_p (regno);
4856 }
4857
4858 /* Return whether the return address register (%i7) is needed. */
4859
4860 static inline bool
return_addr_reg_needed_p(int leaf_function)4861 return_addr_reg_needed_p (int leaf_function)
4862 {
4863 /* If it is live, for example because of __builtin_return_address (0). */
4864 if (df_regs_ever_live_p (RETURN_ADDR_REGNUM))
4865 return true;
4866
4867 /* Otherwise, it is needed as save register if %o7 is clobbered. */
4868 if (!leaf_function
4869 /* Loading the GOT register clobbers %o7. */
4870 || crtl->uses_pic_offset_table
4871 || df_regs_ever_live_p (INCOMING_RETURN_ADDR_REGNUM))
4872 return true;
4873
4874 return false;
4875 }
4876
4877 /* Return whether REGNO, a local or in register, must be saved/restored. */
4878
4879 static bool
save_local_or_in_reg_p(unsigned int regno,int leaf_function)4880 save_local_or_in_reg_p (unsigned int regno, int leaf_function)
4881 {
4882 /* General case: call-saved registers live at some point. */
4883 if (!call_used_regs[regno] && df_regs_ever_live_p (regno))
4884 return true;
4885
4886 /* Frame pointer register (%fp) if needed. */
4887 if (regno == HARD_FRAME_POINTER_REGNUM && frame_pointer_needed)
4888 return true;
4889
4890 /* Return address register (%i7) if needed. */
4891 if (regno == RETURN_ADDR_REGNUM && return_addr_reg_needed_p (leaf_function))
4892 return true;
4893
4894 /* GOT register (%l7) if needed. */
4895 if (regno == PIC_OFFSET_TABLE_REGNUM && crtl->uses_pic_offset_table)
4896 return true;
4897
4898 /* If the function accesses prior frames, the frame pointer and the return
4899 address of the previous frame must be saved on the stack. */
4900 if (crtl->accesses_prior_frames
4901 && (regno == HARD_FRAME_POINTER_REGNUM || regno == RETURN_ADDR_REGNUM))
4902 return true;
4903
4904 return false;
4905 }
4906
4907 /* Compute the frame size required by the function. This function is called
4908 during the reload pass and also by sparc_expand_prologue. */
4909
4910 HOST_WIDE_INT
sparc_compute_frame_size(HOST_WIDE_INT size,int leaf_function)4911 sparc_compute_frame_size (HOST_WIDE_INT size, int leaf_function)
4912 {
4913 HOST_WIDE_INT frame_size, apparent_frame_size;
4914 int args_size, n_global_fp_regs = 0;
4915 bool save_local_in_regs_p = false;
4916 unsigned int i;
4917
4918 /* If the function allocates dynamic stack space, the dynamic offset is
4919 computed early and contains REG_PARM_STACK_SPACE, so we need to cope. */
4920 if (leaf_function && !cfun->calls_alloca)
4921 args_size = 0;
4922 else
4923 args_size = crtl->outgoing_args_size + REG_PARM_STACK_SPACE (cfun->decl);
4924
4925 /* Calculate space needed for global registers. */
4926 if (TARGET_ARCH64)
4927 for (i = 0; i < 8; i++)
4928 if (save_global_or_fp_reg_p (i, 0))
4929 n_global_fp_regs += 2;
4930 else
4931 for (i = 0; i < 8; i += 2)
4932 if (save_global_or_fp_reg_p (i, 0) || save_global_or_fp_reg_p (i + 1, 0))
4933 n_global_fp_regs += 2;
4934
4935 /* In the flat window model, find out which local and in registers need to
4936 be saved. We don't reserve space in the current frame for them as they
4937 will be spilled into the register window save area of the caller's frame.
4938 However, as soon as we use this register window save area, we must create
4939 that of the current frame to make it the live one. */
4940 if (TARGET_FLAT)
4941 for (i = 16; i < 32; i++)
4942 if (save_local_or_in_reg_p (i, leaf_function))
4943 {
4944 save_local_in_regs_p = true;
4945 break;
4946 }
4947
4948 /* Calculate space needed for FP registers. */
4949 for (i = 32; i < (TARGET_V9 ? 96 : 64); i += 2)
4950 if (save_global_or_fp_reg_p (i, 0) || save_global_or_fp_reg_p (i + 1, 0))
4951 n_global_fp_regs += 2;
4952
4953 if (size == 0
4954 && n_global_fp_regs == 0
4955 && args_size == 0
4956 && !save_local_in_regs_p)
4957 frame_size = apparent_frame_size = 0;
4958 else
4959 {
4960 /* We subtract STARTING_FRAME_OFFSET, remember it's negative. */
4961 apparent_frame_size = (size - STARTING_FRAME_OFFSET + 7) & -8;
4962 apparent_frame_size += n_global_fp_regs * 4;
4963
4964 /* We need to add the size of the outgoing argument area. */
4965 frame_size = apparent_frame_size + ((args_size + 7) & -8);
4966
4967 /* And that of the register window save area. */
4968 frame_size += FIRST_PARM_OFFSET (cfun->decl);
4969
4970 /* Finally, bump to the appropriate alignment. */
4971 frame_size = SPARC_STACK_ALIGN (frame_size);
4972 }
4973
4974 /* Set up values for use in prologue and epilogue. */
4975 sparc_frame_size = frame_size;
4976 sparc_apparent_frame_size = apparent_frame_size;
4977 sparc_n_global_fp_regs = n_global_fp_regs;
4978 sparc_save_local_in_regs_p = save_local_in_regs_p;
4979
4980 return frame_size;
4981 }
4982
4983 /* Implement the macro INITIAL_ELIMINATION_OFFSET, return the OFFSET. */
4984
4985 int
sparc_initial_elimination_offset(int to)4986 sparc_initial_elimination_offset (int to)
4987 {
4988 int offset;
4989
4990 if (to == STACK_POINTER_REGNUM)
4991 offset = sparc_compute_frame_size (get_frame_size (), crtl->is_leaf);
4992 else
4993 offset = 0;
4994
4995 offset += SPARC_STACK_BIAS;
4996 return offset;
4997 }
4998
4999 /* Output any necessary .register pseudo-ops. */
5000
5001 void
sparc_output_scratch_registers(FILE * file ATTRIBUTE_UNUSED)5002 sparc_output_scratch_registers (FILE *file ATTRIBUTE_UNUSED)
5003 {
5004 #ifdef HAVE_AS_REGISTER_PSEUDO_OP
5005 int i;
5006
5007 if (TARGET_ARCH32)
5008 return;
5009
5010 /* Check if %g[2367] were used without
5011 .register being printed for them already. */
5012 for (i = 2; i < 8; i++)
5013 {
5014 if (df_regs_ever_live_p (i)
5015 && ! sparc_hard_reg_printed [i])
5016 {
5017 sparc_hard_reg_printed [i] = 1;
5018 /* %g7 is used as TLS base register, use #ignore
5019 for it instead of #scratch. */
5020 fprintf (file, "\t.register\t%%g%d, #%s\n", i,
5021 i == 7 ? "ignore" : "scratch");
5022 }
5023 if (i == 3) i = 5;
5024 }
5025 #endif
5026 }
5027
5028 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
5029
5030 #if PROBE_INTERVAL > 4096
5031 #error Cannot use indexed addressing mode for stack probing
5032 #endif
5033
5034 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
5035 inclusive. These are offsets from the current stack pointer.
5036
5037 Note that we don't use the REG+REG addressing mode for the probes because
5038 of the stack bias in 64-bit mode. And it doesn't really buy us anything
5039 so the advantages of having a single code win here. */
5040
5041 static void
sparc_emit_probe_stack_range(HOST_WIDE_INT first,HOST_WIDE_INT size)5042 sparc_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
5043 {
5044 rtx g1 = gen_rtx_REG (Pmode, 1);
5045
5046 /* See if we have a constant small number of probes to generate. If so,
5047 that's the easy case. */
5048 if (size <= PROBE_INTERVAL)
5049 {
5050 emit_move_insn (g1, GEN_INT (first));
5051 emit_insn (gen_rtx_SET (VOIDmode, g1,
5052 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
5053 emit_stack_probe (plus_constant (Pmode, g1, -size));
5054 }
5055
5056 /* The run-time loop is made up of 10 insns in the generic case while the
5057 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */
5058 else if (size <= 5 * PROBE_INTERVAL)
5059 {
5060 HOST_WIDE_INT i;
5061
5062 emit_move_insn (g1, GEN_INT (first + PROBE_INTERVAL));
5063 emit_insn (gen_rtx_SET (VOIDmode, g1,
5064 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
5065 emit_stack_probe (g1);
5066
5067 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
5068 it exceeds SIZE. If only two probes are needed, this will not
5069 generate any code. Then probe at FIRST + SIZE. */
5070 for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
5071 {
5072 emit_insn (gen_rtx_SET (VOIDmode, g1,
5073 plus_constant (Pmode, g1, -PROBE_INTERVAL)));
5074 emit_stack_probe (g1);
5075 }
5076
5077 emit_stack_probe (plus_constant (Pmode, g1,
5078 (i - PROBE_INTERVAL) - size));
5079 }
5080
5081 /* Otherwise, do the same as above, but in a loop. Note that we must be
5082 extra careful with variables wrapping around because we might be at
5083 the very top (or the very bottom) of the address space and we have
5084 to be able to handle this case properly; in particular, we use an
5085 equality test for the loop condition. */
5086 else
5087 {
5088 HOST_WIDE_INT rounded_size;
5089 rtx g4 = gen_rtx_REG (Pmode, 4);
5090
5091 emit_move_insn (g1, GEN_INT (first));
5092
5093
5094 /* Step 1: round SIZE to the previous multiple of the interval. */
5095
5096 rounded_size = size & -PROBE_INTERVAL;
5097 emit_move_insn (g4, GEN_INT (rounded_size));
5098
5099
5100 /* Step 2: compute initial and final value of the loop counter. */
5101
5102 /* TEST_ADDR = SP + FIRST. */
5103 emit_insn (gen_rtx_SET (VOIDmode, g1,
5104 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
5105
5106 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
5107 emit_insn (gen_rtx_SET (VOIDmode, g4, gen_rtx_MINUS (Pmode, g1, g4)));
5108
5109
5110 /* Step 3: the loop
5111
5112 while (TEST_ADDR != LAST_ADDR)
5113 {
5114 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
5115 probe at TEST_ADDR
5116 }
5117
5118 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
5119 until it is equal to ROUNDED_SIZE. */
5120
5121 if (TARGET_64BIT)
5122 emit_insn (gen_probe_stack_rangedi (g1, g1, g4));
5123 else
5124 emit_insn (gen_probe_stack_rangesi (g1, g1, g4));
5125
5126
5127 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
5128 that SIZE is equal to ROUNDED_SIZE. */
5129
5130 if (size != rounded_size)
5131 emit_stack_probe (plus_constant (Pmode, g4, rounded_size - size));
5132 }
5133
5134 /* Make sure nothing is scheduled before we are done. */
5135 emit_insn (gen_blockage ());
5136 }
5137
5138 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
5139 absolute addresses. */
5140
5141 const char *
output_probe_stack_range(rtx reg1,rtx reg2)5142 output_probe_stack_range (rtx reg1, rtx reg2)
5143 {
5144 static int labelno = 0;
5145 char loop_lab[32], end_lab[32];
5146 rtx xops[2];
5147
5148 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
5149 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
5150
5151 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
5152
5153 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
5154 xops[0] = reg1;
5155 xops[1] = reg2;
5156 output_asm_insn ("cmp\t%0, %1", xops);
5157 if (TARGET_ARCH64)
5158 fputs ("\tbe,pn\t%xcc,", asm_out_file);
5159 else
5160 fputs ("\tbe\t", asm_out_file);
5161 assemble_name_raw (asm_out_file, end_lab);
5162 fputc ('\n', asm_out_file);
5163
5164 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
5165 xops[1] = GEN_INT (-PROBE_INTERVAL);
5166 output_asm_insn (" add\t%0, %1, %0", xops);
5167
5168 /* Probe at TEST_ADDR and branch. */
5169 if (TARGET_ARCH64)
5170 fputs ("\tba,pt\t%xcc,", asm_out_file);
5171 else
5172 fputs ("\tba\t", asm_out_file);
5173 assemble_name_raw (asm_out_file, loop_lab);
5174 fputc ('\n', asm_out_file);
5175 xops[1] = GEN_INT (SPARC_STACK_BIAS);
5176 output_asm_insn (" st\t%%g0, [%0+%1]", xops);
5177
5178 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
5179
5180 return "";
5181 }
5182
5183 /* Emit code to save/restore registers from LOW to HIGH at BASE+OFFSET as
5184 needed. LOW is supposed to be double-word aligned for 32-bit registers.
5185 SAVE_P decides whether a register must be saved/restored. ACTION_TRUE
5186 is the action to be performed if SAVE_P returns true and ACTION_FALSE
5187 the action to be performed if it returns false. Return the new offset. */
5188
5189 typedef bool (*sorr_pred_t) (unsigned int, int);
5190 typedef enum { SORR_NONE, SORR_ADVANCE, SORR_SAVE, SORR_RESTORE } sorr_act_t;
5191
5192 static int
emit_save_or_restore_regs(unsigned int low,unsigned int high,rtx base,int offset,int leaf_function,sorr_pred_t save_p,sorr_act_t action_true,sorr_act_t action_false)5193 emit_save_or_restore_regs (unsigned int low, unsigned int high, rtx base,
5194 int offset, int leaf_function, sorr_pred_t save_p,
5195 sorr_act_t action_true, sorr_act_t action_false)
5196 {
5197 unsigned int i;
5198 rtx mem, insn;
5199
5200 if (TARGET_ARCH64 && high <= 32)
5201 {
5202 int fp_offset = -1;
5203
5204 for (i = low; i < high; i++)
5205 {
5206 if (save_p (i, leaf_function))
5207 {
5208 mem = gen_frame_mem (DImode, plus_constant (Pmode,
5209 base, offset));
5210 if (action_true == SORR_SAVE)
5211 {
5212 insn = emit_move_insn (mem, gen_rtx_REG (DImode, i));
5213 RTX_FRAME_RELATED_P (insn) = 1;
5214 }
5215 else /* action_true == SORR_RESTORE */
5216 {
5217 /* The frame pointer must be restored last since its old
5218 value may be used as base address for the frame. This
5219 is problematic in 64-bit mode only because of the lack
5220 of double-word load instruction. */
5221 if (i == HARD_FRAME_POINTER_REGNUM)
5222 fp_offset = offset;
5223 else
5224 emit_move_insn (gen_rtx_REG (DImode, i), mem);
5225 }
5226 offset += 8;
5227 }
5228 else if (action_false == SORR_ADVANCE)
5229 offset += 8;
5230 }
5231
5232 if (fp_offset >= 0)
5233 {
5234 mem = gen_frame_mem (DImode, plus_constant (Pmode, base, fp_offset));
5235 emit_move_insn (hard_frame_pointer_rtx, mem);
5236 }
5237 }
5238 else
5239 {
5240 for (i = low; i < high; i += 2)
5241 {
5242 bool reg0 = save_p (i, leaf_function);
5243 bool reg1 = save_p (i + 1, leaf_function);
5244 enum machine_mode mode;
5245 int regno;
5246
5247 if (reg0 && reg1)
5248 {
5249 mode = SPARC_INT_REG_P (i) ? DImode : DFmode;
5250 regno = i;
5251 }
5252 else if (reg0)
5253 {
5254 mode = SPARC_INT_REG_P (i) ? SImode : SFmode;
5255 regno = i;
5256 }
5257 else if (reg1)
5258 {
5259 mode = SPARC_INT_REG_P (i) ? SImode : SFmode;
5260 regno = i + 1;
5261 offset += 4;
5262 }
5263 else
5264 {
5265 if (action_false == SORR_ADVANCE)
5266 offset += 8;
5267 continue;
5268 }
5269
5270 mem = gen_frame_mem (mode, plus_constant (Pmode, base, offset));
5271 if (action_true == SORR_SAVE)
5272 {
5273 insn = emit_move_insn (mem, gen_rtx_REG (mode, regno));
5274 RTX_FRAME_RELATED_P (insn) = 1;
5275 if (mode == DImode)
5276 {
5277 rtx set1, set2;
5278 mem = gen_frame_mem (SImode, plus_constant (Pmode, base,
5279 offset));
5280 set1 = gen_rtx_SET (VOIDmode, mem,
5281 gen_rtx_REG (SImode, regno));
5282 RTX_FRAME_RELATED_P (set1) = 1;
5283 mem
5284 = gen_frame_mem (SImode, plus_constant (Pmode, base,
5285 offset + 4));
5286 set2 = gen_rtx_SET (VOIDmode, mem,
5287 gen_rtx_REG (SImode, regno + 1));
5288 RTX_FRAME_RELATED_P (set2) = 1;
5289 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
5290 gen_rtx_PARALLEL (VOIDmode,
5291 gen_rtvec (2, set1, set2)));
5292 }
5293 }
5294 else /* action_true == SORR_RESTORE */
5295 emit_move_insn (gen_rtx_REG (mode, regno), mem);
5296
5297 /* Always preserve double-word alignment. */
5298 offset = (offset + 8) & -8;
5299 }
5300 }
5301
5302 return offset;
5303 }
5304
5305 /* Emit code to adjust BASE to OFFSET. Return the new base. */
5306
5307 static rtx
emit_adjust_base_to_offset(rtx base,int offset)5308 emit_adjust_base_to_offset (rtx base, int offset)
5309 {
5310 /* ??? This might be optimized a little as %g1 might already have a
5311 value close enough that a single add insn will do. */
5312 /* ??? Although, all of this is probably only a temporary fix because
5313 if %g1 can hold a function result, then sparc_expand_epilogue will
5314 lose (the result will be clobbered). */
5315 rtx new_base = gen_rtx_REG (Pmode, 1);
5316 emit_move_insn (new_base, GEN_INT (offset));
5317 emit_insn (gen_rtx_SET (VOIDmode,
5318 new_base, gen_rtx_PLUS (Pmode, base, new_base)));
5319 return new_base;
5320 }
5321
5322 /* Emit code to save/restore call-saved global and FP registers. */
5323
5324 static void
emit_save_or_restore_global_fp_regs(rtx base,int offset,sorr_act_t action)5325 emit_save_or_restore_global_fp_regs (rtx base, int offset, sorr_act_t action)
5326 {
5327 if (offset < -4096 || offset + sparc_n_global_fp_regs * 4 > 4095)
5328 {
5329 base = emit_adjust_base_to_offset (base, offset);
5330 offset = 0;
5331 }
5332
5333 offset
5334 = emit_save_or_restore_regs (0, 8, base, offset, 0,
5335 save_global_or_fp_reg_p, action, SORR_NONE);
5336 emit_save_or_restore_regs (32, TARGET_V9 ? 96 : 64, base, offset, 0,
5337 save_global_or_fp_reg_p, action, SORR_NONE);
5338 }
5339
5340 /* Emit code to save/restore call-saved local and in registers. */
5341
5342 static void
emit_save_or_restore_local_in_regs(rtx base,int offset,sorr_act_t action)5343 emit_save_or_restore_local_in_regs (rtx base, int offset, sorr_act_t action)
5344 {
5345 if (offset < -4096 || offset + 16 * UNITS_PER_WORD > 4095)
5346 {
5347 base = emit_adjust_base_to_offset (base, offset);
5348 offset = 0;
5349 }
5350
5351 emit_save_or_restore_regs (16, 32, base, offset, sparc_leaf_function_p,
5352 save_local_or_in_reg_p, action, SORR_ADVANCE);
5353 }
5354
5355 /* Emit a window_save insn. */
5356
5357 static rtx
emit_window_save(rtx increment)5358 emit_window_save (rtx increment)
5359 {
5360 rtx insn = emit_insn (gen_window_save (increment));
5361 RTX_FRAME_RELATED_P (insn) = 1;
5362
5363 /* The incoming return address (%o7) is saved in %i7. */
5364 add_reg_note (insn, REG_CFA_REGISTER,
5365 gen_rtx_SET (VOIDmode,
5366 gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM),
5367 gen_rtx_REG (Pmode,
5368 INCOMING_RETURN_ADDR_REGNUM)));
5369
5370 /* The window save event. */
5371 add_reg_note (insn, REG_CFA_WINDOW_SAVE, const0_rtx);
5372
5373 /* The CFA is %fp, the hard frame pointer. */
5374 add_reg_note (insn, REG_CFA_DEF_CFA,
5375 plus_constant (Pmode, hard_frame_pointer_rtx,
5376 INCOMING_FRAME_SP_OFFSET));
5377
5378 return insn;
5379 }
5380
5381 /* Generate an increment for the stack pointer. */
5382
5383 static rtx
gen_stack_pointer_inc(rtx increment)5384 gen_stack_pointer_inc (rtx increment)
5385 {
5386 return gen_rtx_SET (VOIDmode,
5387 stack_pointer_rtx,
5388 gen_rtx_PLUS (Pmode,
5389 stack_pointer_rtx,
5390 increment));
5391 }
5392
5393 /* Expand the function prologue. The prologue is responsible for reserving
5394 storage for the frame, saving the call-saved registers and loading the
5395 GOT register if needed. */
5396
5397 void
sparc_expand_prologue(void)5398 sparc_expand_prologue (void)
5399 {
5400 HOST_WIDE_INT size;
5401 rtx insn;
5402
5403 /* Compute a snapshot of crtl->uses_only_leaf_regs. Relying
5404 on the final value of the flag means deferring the prologue/epilogue
5405 expansion until just before the second scheduling pass, which is too
5406 late to emit multiple epilogues or return insns.
5407
5408 Of course we are making the assumption that the value of the flag
5409 will not change between now and its final value. Of the three parts
5410 of the formula, only the last one can reasonably vary. Let's take a
5411 closer look, after assuming that the first two ones are set to true
5412 (otherwise the last value is effectively silenced).
5413
5414 If only_leaf_regs_used returns false, the global predicate will also
5415 be false so the actual frame size calculated below will be positive.
5416 As a consequence, the save_register_window insn will be emitted in
5417 the instruction stream; now this insn explicitly references %fp
5418 which is not a leaf register so only_leaf_regs_used will always
5419 return false subsequently.
5420
5421 If only_leaf_regs_used returns true, we hope that the subsequent
5422 optimization passes won't cause non-leaf registers to pop up. For
5423 example, the regrename pass has special provisions to not rename to
5424 non-leaf registers in a leaf function. */
5425 sparc_leaf_function_p
5426 = optimize > 0 && crtl->is_leaf && only_leaf_regs_used ();
5427
5428 size = sparc_compute_frame_size (get_frame_size(), sparc_leaf_function_p);
5429
5430 if (flag_stack_usage_info)
5431 current_function_static_stack_size = size;
5432
5433 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK && size)
5434 sparc_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
5435
5436 if (size == 0)
5437 ; /* do nothing. */
5438 else if (sparc_leaf_function_p)
5439 {
5440 rtx size_int_rtx = GEN_INT (-size);
5441
5442 if (size <= 4096)
5443 insn = emit_insn (gen_stack_pointer_inc (size_int_rtx));
5444 else if (size <= 8192)
5445 {
5446 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (-4096)));
5447 RTX_FRAME_RELATED_P (insn) = 1;
5448
5449 /* %sp is still the CFA register. */
5450 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
5451 }
5452 else
5453 {
5454 rtx size_rtx = gen_rtx_REG (Pmode, 1);
5455 emit_move_insn (size_rtx, size_int_rtx);
5456 insn = emit_insn (gen_stack_pointer_inc (size_rtx));
5457 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
5458 gen_stack_pointer_inc (size_int_rtx));
5459 }
5460
5461 RTX_FRAME_RELATED_P (insn) = 1;
5462 }
5463 else
5464 {
5465 rtx size_int_rtx = GEN_INT (-size);
5466
5467 if (size <= 4096)
5468 emit_window_save (size_int_rtx);
5469 else if (size <= 8192)
5470 {
5471 emit_window_save (GEN_INT (-4096));
5472
5473 /* %sp is not the CFA register anymore. */
5474 emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
5475
5476 /* Make sure no %fp-based store is issued until after the frame is
5477 established. The offset between the frame pointer and the stack
5478 pointer is calculated relative to the value of the stack pointer
5479 at the end of the function prologue, and moving instructions that
5480 access the stack via the frame pointer between the instructions
5481 that decrement the stack pointer could result in accessing the
5482 register window save area, which is volatile. */
5483 emit_insn (gen_frame_blockage ());
5484 }
5485 else
5486 {
5487 rtx size_rtx = gen_rtx_REG (Pmode, 1);
5488 emit_move_insn (size_rtx, size_int_rtx);
5489 emit_window_save (size_rtx);
5490 }
5491 }
5492
5493 if (sparc_leaf_function_p)
5494 {
5495 sparc_frame_base_reg = stack_pointer_rtx;
5496 sparc_frame_base_offset = size + SPARC_STACK_BIAS;
5497 }
5498 else
5499 {
5500 sparc_frame_base_reg = hard_frame_pointer_rtx;
5501 sparc_frame_base_offset = SPARC_STACK_BIAS;
5502 }
5503
5504 if (sparc_n_global_fp_regs > 0)
5505 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
5506 sparc_frame_base_offset
5507 - sparc_apparent_frame_size,
5508 SORR_SAVE);
5509
5510 /* Load the GOT register if needed. */
5511 if (crtl->uses_pic_offset_table)
5512 load_got_register ();
5513
5514 /* Advertise that the data calculated just above are now valid. */
5515 sparc_prologue_data_valid_p = true;
5516 }
5517
5518 /* Expand the function prologue. The prologue is responsible for reserving
5519 storage for the frame, saving the call-saved registers and loading the
5520 GOT register if needed. */
5521
5522 void
sparc_flat_expand_prologue(void)5523 sparc_flat_expand_prologue (void)
5524 {
5525 HOST_WIDE_INT size;
5526 rtx insn;
5527
5528 sparc_leaf_function_p = optimize > 0 && crtl->is_leaf;
5529
5530 size = sparc_compute_frame_size (get_frame_size(), sparc_leaf_function_p);
5531
5532 if (flag_stack_usage_info)
5533 current_function_static_stack_size = size;
5534
5535 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK && size)
5536 sparc_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
5537
5538 if (sparc_save_local_in_regs_p)
5539 emit_save_or_restore_local_in_regs (stack_pointer_rtx, SPARC_STACK_BIAS,
5540 SORR_SAVE);
5541
5542 if (size == 0)
5543 ; /* do nothing. */
5544 else
5545 {
5546 rtx size_int_rtx, size_rtx;
5547
5548 size_rtx = size_int_rtx = GEN_INT (-size);
5549
5550 /* We establish the frame (i.e. decrement the stack pointer) first, even
5551 if we use a frame pointer, because we cannot clobber any call-saved
5552 registers, including the frame pointer, if we haven't created a new
5553 register save area, for the sake of compatibility with the ABI. */
5554 if (size <= 4096)
5555 insn = emit_insn (gen_stack_pointer_inc (size_int_rtx));
5556 else if (size <= 8192 && !frame_pointer_needed)
5557 {
5558 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (-4096)));
5559 RTX_FRAME_RELATED_P (insn) = 1;
5560 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
5561 }
5562 else
5563 {
5564 size_rtx = gen_rtx_REG (Pmode, 1);
5565 emit_move_insn (size_rtx, size_int_rtx);
5566 insn = emit_insn (gen_stack_pointer_inc (size_rtx));
5567 add_reg_note (insn, REG_CFA_ADJUST_CFA,
5568 gen_stack_pointer_inc (size_int_rtx));
5569 }
5570 RTX_FRAME_RELATED_P (insn) = 1;
5571
5572 /* Ensure nothing is scheduled until after the frame is established. */
5573 emit_insn (gen_blockage ());
5574
5575 if (frame_pointer_needed)
5576 {
5577 insn = emit_insn (gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
5578 gen_rtx_MINUS (Pmode,
5579 stack_pointer_rtx,
5580 size_rtx)));
5581 RTX_FRAME_RELATED_P (insn) = 1;
5582
5583 add_reg_note (insn, REG_CFA_ADJUST_CFA,
5584 gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
5585 plus_constant (Pmode, stack_pointer_rtx,
5586 size)));
5587 }
5588
5589 if (return_addr_reg_needed_p (sparc_leaf_function_p))
5590 {
5591 rtx o7 = gen_rtx_REG (Pmode, INCOMING_RETURN_ADDR_REGNUM);
5592 rtx i7 = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
5593
5594 insn = emit_move_insn (i7, o7);
5595 RTX_FRAME_RELATED_P (insn) = 1;
5596
5597 add_reg_note (insn, REG_CFA_REGISTER,
5598 gen_rtx_SET (VOIDmode, i7, o7));
5599
5600 /* Prevent this instruction from ever being considered dead,
5601 even if this function has no epilogue. */
5602 emit_use (i7);
5603 }
5604 }
5605
5606 if (frame_pointer_needed)
5607 {
5608 sparc_frame_base_reg = hard_frame_pointer_rtx;
5609 sparc_frame_base_offset = SPARC_STACK_BIAS;
5610 }
5611 else
5612 {
5613 sparc_frame_base_reg = stack_pointer_rtx;
5614 sparc_frame_base_offset = size + SPARC_STACK_BIAS;
5615 }
5616
5617 if (sparc_n_global_fp_regs > 0)
5618 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
5619 sparc_frame_base_offset
5620 - sparc_apparent_frame_size,
5621 SORR_SAVE);
5622
5623 /* Load the GOT register if needed. */
5624 if (crtl->uses_pic_offset_table)
5625 load_got_register ();
5626
5627 /* Advertise that the data calculated just above are now valid. */
5628 sparc_prologue_data_valid_p = true;
5629 }
5630
5631 /* This function generates the assembly code for function entry, which boils
5632 down to emitting the necessary .register directives. */
5633
5634 static void
sparc_asm_function_prologue(FILE * file,HOST_WIDE_INT size ATTRIBUTE_UNUSED)5635 sparc_asm_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
5636 {
5637 /* Check that the assumption we made in sparc_expand_prologue is valid. */
5638 if (!TARGET_FLAT)
5639 gcc_assert (sparc_leaf_function_p == crtl->uses_only_leaf_regs);
5640
5641 sparc_output_scratch_registers (file);
5642 }
5643
5644 /* Expand the function epilogue, either normal or part of a sibcall.
5645 We emit all the instructions except the return or the call. */
5646
5647 void
sparc_expand_epilogue(bool for_eh)5648 sparc_expand_epilogue (bool for_eh)
5649 {
5650 HOST_WIDE_INT size = sparc_frame_size;
5651
5652 if (sparc_n_global_fp_regs > 0)
5653 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
5654 sparc_frame_base_offset
5655 - sparc_apparent_frame_size,
5656 SORR_RESTORE);
5657
5658 if (size == 0 || for_eh)
5659 ; /* do nothing. */
5660 else if (sparc_leaf_function_p)
5661 {
5662 if (size <= 4096)
5663 emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
5664 else if (size <= 8192)
5665 {
5666 emit_insn (gen_stack_pointer_inc (GEN_INT (4096)));
5667 emit_insn (gen_stack_pointer_inc (GEN_INT (size - 4096)));
5668 }
5669 else
5670 {
5671 rtx reg = gen_rtx_REG (Pmode, 1);
5672 emit_move_insn (reg, GEN_INT (size));
5673 emit_insn (gen_stack_pointer_inc (reg));
5674 }
5675 }
5676 }
5677
5678 /* Expand the function epilogue, either normal or part of a sibcall.
5679 We emit all the instructions except the return or the call. */
5680
5681 void
sparc_flat_expand_epilogue(bool for_eh)5682 sparc_flat_expand_epilogue (bool for_eh)
5683 {
5684 HOST_WIDE_INT size = sparc_frame_size;
5685
5686 if (sparc_n_global_fp_regs > 0)
5687 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
5688 sparc_frame_base_offset
5689 - sparc_apparent_frame_size,
5690 SORR_RESTORE);
5691
5692 /* If we have a frame pointer, we'll need both to restore it before the
5693 frame is destroyed and use its current value in destroying the frame.
5694 Since we don't have an atomic way to do that in the flat window model,
5695 we save the current value into a temporary register (%g1). */
5696 if (frame_pointer_needed && !for_eh)
5697 emit_move_insn (gen_rtx_REG (Pmode, 1), hard_frame_pointer_rtx);
5698
5699 if (return_addr_reg_needed_p (sparc_leaf_function_p))
5700 emit_move_insn (gen_rtx_REG (Pmode, INCOMING_RETURN_ADDR_REGNUM),
5701 gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM));
5702
5703 if (sparc_save_local_in_regs_p)
5704 emit_save_or_restore_local_in_regs (sparc_frame_base_reg,
5705 sparc_frame_base_offset,
5706 SORR_RESTORE);
5707
5708 if (size == 0 || for_eh)
5709 ; /* do nothing. */
5710 else if (frame_pointer_needed)
5711 {
5712 /* Make sure the frame is destroyed after everything else is done. */
5713 emit_insn (gen_blockage ());
5714
5715 emit_move_insn (stack_pointer_rtx, gen_rtx_REG (Pmode, 1));
5716 }
5717 else
5718 {
5719 /* Likewise. */
5720 emit_insn (gen_blockage ());
5721
5722 if (size <= 4096)
5723 emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
5724 else if (size <= 8192)
5725 {
5726 emit_insn (gen_stack_pointer_inc (GEN_INT (4096)));
5727 emit_insn (gen_stack_pointer_inc (GEN_INT (size - 4096)));
5728 }
5729 else
5730 {
5731 rtx reg = gen_rtx_REG (Pmode, 1);
5732 emit_move_insn (reg, GEN_INT (size));
5733 emit_insn (gen_stack_pointer_inc (reg));
5734 }
5735 }
5736 }
5737
5738 /* Return true if it is appropriate to emit `return' instructions in the
5739 body of a function. */
5740
5741 bool
sparc_can_use_return_insn_p(void)5742 sparc_can_use_return_insn_p (void)
5743 {
5744 return sparc_prologue_data_valid_p
5745 && sparc_n_global_fp_regs == 0
5746 && TARGET_FLAT
5747 ? (sparc_frame_size == 0 && !sparc_save_local_in_regs_p)
5748 : (sparc_frame_size == 0 || !sparc_leaf_function_p);
5749 }
5750
5751 /* This function generates the assembly code for function exit. */
5752
5753 static void
sparc_asm_function_epilogue(FILE * file,HOST_WIDE_INT size ATTRIBUTE_UNUSED)5754 sparc_asm_function_epilogue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
5755 {
5756 /* If the last two instructions of a function are "call foo; dslot;"
5757 the return address might point to the first instruction in the next
5758 function and we have to output a dummy nop for the sake of sane
5759 backtraces in such cases. This is pointless for sibling calls since
5760 the return address is explicitly adjusted. */
5761
5762 rtx insn, last_real_insn;
5763
5764 insn = get_last_insn ();
5765
5766 last_real_insn = prev_real_insn (insn);
5767 if (last_real_insn
5768 && GET_CODE (last_real_insn) == INSN
5769 && GET_CODE (PATTERN (last_real_insn)) == SEQUENCE)
5770 last_real_insn = XVECEXP (PATTERN (last_real_insn), 0, 0);
5771
5772 if (last_real_insn
5773 && CALL_P (last_real_insn)
5774 && !SIBLING_CALL_P (last_real_insn))
5775 fputs("\tnop\n", file);
5776
5777 sparc_output_deferred_case_vectors ();
5778 }
5779
5780 /* Output a 'restore' instruction. */
5781
5782 static void
output_restore(rtx pat)5783 output_restore (rtx pat)
5784 {
5785 rtx operands[3];
5786
5787 if (! pat)
5788 {
5789 fputs ("\t restore\n", asm_out_file);
5790 return;
5791 }
5792
5793 gcc_assert (GET_CODE (pat) == SET);
5794
5795 operands[0] = SET_DEST (pat);
5796 pat = SET_SRC (pat);
5797
5798 switch (GET_CODE (pat))
5799 {
5800 case PLUS:
5801 operands[1] = XEXP (pat, 0);
5802 operands[2] = XEXP (pat, 1);
5803 output_asm_insn (" restore %r1, %2, %Y0", operands);
5804 break;
5805 case LO_SUM:
5806 operands[1] = XEXP (pat, 0);
5807 operands[2] = XEXP (pat, 1);
5808 output_asm_insn (" restore %r1, %%lo(%a2), %Y0", operands);
5809 break;
5810 case ASHIFT:
5811 operands[1] = XEXP (pat, 0);
5812 gcc_assert (XEXP (pat, 1) == const1_rtx);
5813 output_asm_insn (" restore %r1, %r1, %Y0", operands);
5814 break;
5815 default:
5816 operands[1] = pat;
5817 output_asm_insn (" restore %%g0, %1, %Y0", operands);
5818 break;
5819 }
5820 }
5821
5822 /* Output a return. */
5823
5824 const char *
output_return(rtx insn)5825 output_return (rtx insn)
5826 {
5827 if (crtl->calls_eh_return)
5828 {
5829 /* If the function uses __builtin_eh_return, the eh_return
5830 machinery occupies the delay slot. */
5831 gcc_assert (!final_sequence);
5832
5833 if (flag_delayed_branch)
5834 {
5835 if (!TARGET_FLAT && TARGET_V9)
5836 fputs ("\treturn\t%i7+8\n", asm_out_file);
5837 else
5838 {
5839 if (!TARGET_FLAT)
5840 fputs ("\trestore\n", asm_out_file);
5841
5842 fputs ("\tjmp\t%o7+8\n", asm_out_file);
5843 }
5844
5845 fputs ("\t add\t%sp, %g1, %sp\n", asm_out_file);
5846 }
5847 else
5848 {
5849 if (!TARGET_FLAT)
5850 fputs ("\trestore\n", asm_out_file);
5851
5852 fputs ("\tadd\t%sp, %g1, %sp\n", asm_out_file);
5853 fputs ("\tjmp\t%o7+8\n\t nop\n", asm_out_file);
5854 }
5855 }
5856 else if (sparc_leaf_function_p || TARGET_FLAT)
5857 {
5858 /* This is a leaf or flat function so we don't have to bother restoring
5859 the register window, which frees us from dealing with the convoluted
5860 semantics of restore/return. We simply output the jump to the
5861 return address and the insn in the delay slot (if any). */
5862
5863 return "jmp\t%%o7+%)%#";
5864 }
5865 else
5866 {
5867 /* This is a regular function so we have to restore the register window.
5868 We may have a pending insn for the delay slot, which will be either
5869 combined with the 'restore' instruction or put in the delay slot of
5870 the 'return' instruction. */
5871
5872 if (final_sequence)
5873 {
5874 rtx delay, pat;
5875
5876 delay = NEXT_INSN (insn);
5877 gcc_assert (delay);
5878
5879 pat = PATTERN (delay);
5880
5881 if (TARGET_V9 && ! epilogue_renumber (&pat, 1))
5882 {
5883 epilogue_renumber (&pat, 0);
5884 return "return\t%%i7+%)%#";
5885 }
5886 else
5887 {
5888 output_asm_insn ("jmp\t%%i7+%)", NULL);
5889 output_restore (pat);
5890 PATTERN (delay) = gen_blockage ();
5891 INSN_CODE (delay) = -1;
5892 }
5893 }
5894 else
5895 {
5896 /* The delay slot is empty. */
5897 if (TARGET_V9)
5898 return "return\t%%i7+%)\n\t nop";
5899 else if (flag_delayed_branch)
5900 return "jmp\t%%i7+%)\n\t restore";
5901 else
5902 return "restore\n\tjmp\t%%o7+%)\n\t nop";
5903 }
5904 }
5905
5906 return "";
5907 }
5908
5909 /* Output a sibling call. */
5910
5911 const char *
output_sibcall(rtx insn,rtx call_operand)5912 output_sibcall (rtx insn, rtx call_operand)
5913 {
5914 rtx operands[1];
5915
5916 gcc_assert (flag_delayed_branch);
5917
5918 operands[0] = call_operand;
5919
5920 if (sparc_leaf_function_p || TARGET_FLAT)
5921 {
5922 /* This is a leaf or flat function so we don't have to bother restoring
5923 the register window. We simply output the jump to the function and
5924 the insn in the delay slot (if any). */
5925
5926 gcc_assert (!(LEAF_SIBCALL_SLOT_RESERVED_P && final_sequence));
5927
5928 if (final_sequence)
5929 output_asm_insn ("sethi\t%%hi(%a0), %%g1\n\tjmp\t%%g1 + %%lo(%a0)%#",
5930 operands);
5931 else
5932 /* Use or with rs2 %%g0 instead of mov, so that as/ld can optimize
5933 it into branch if possible. */
5934 output_asm_insn ("or\t%%o7, %%g0, %%g1\n\tcall\t%a0, 0\n\t or\t%%g1, %%g0, %%o7",
5935 operands);
5936 }
5937 else
5938 {
5939 /* This is a regular function so we have to restore the register window.
5940 We may have a pending insn for the delay slot, which will be combined
5941 with the 'restore' instruction. */
5942
5943 output_asm_insn ("call\t%a0, 0", operands);
5944
5945 if (final_sequence)
5946 {
5947 rtx delay = NEXT_INSN (insn);
5948 gcc_assert (delay);
5949
5950 output_restore (PATTERN (delay));
5951
5952 PATTERN (delay) = gen_blockage ();
5953 INSN_CODE (delay) = -1;
5954 }
5955 else
5956 output_restore (NULL_RTX);
5957 }
5958
5959 return "";
5960 }
5961
5962 /* Functions for handling argument passing.
5963
5964 For 32-bit, the first 6 args are normally in registers and the rest are
5965 pushed. Any arg that starts within the first 6 words is at least
5966 partially passed in a register unless its data type forbids.
5967
5968 For 64-bit, the argument registers are laid out as an array of 16 elements
5969 and arguments are added sequentially. The first 6 int args and up to the
5970 first 16 fp args (depending on size) are passed in regs.
5971
5972 Slot Stack Integral Float Float in structure Double Long Double
5973 ---- ----- -------- ----- ------------------ ------ -----------
5974 15 [SP+248] %f31 %f30,%f31 %d30
5975 14 [SP+240] %f29 %f28,%f29 %d28 %q28
5976 13 [SP+232] %f27 %f26,%f27 %d26
5977 12 [SP+224] %f25 %f24,%f25 %d24 %q24
5978 11 [SP+216] %f23 %f22,%f23 %d22
5979 10 [SP+208] %f21 %f20,%f21 %d20 %q20
5980 9 [SP+200] %f19 %f18,%f19 %d18
5981 8 [SP+192] %f17 %f16,%f17 %d16 %q16
5982 7 [SP+184] %f15 %f14,%f15 %d14
5983 6 [SP+176] %f13 %f12,%f13 %d12 %q12
5984 5 [SP+168] %o5 %f11 %f10,%f11 %d10
5985 4 [SP+160] %o4 %f9 %f8,%f9 %d8 %q8
5986 3 [SP+152] %o3 %f7 %f6,%f7 %d6
5987 2 [SP+144] %o2 %f5 %f4,%f5 %d4 %q4
5988 1 [SP+136] %o1 %f3 %f2,%f3 %d2
5989 0 [SP+128] %o0 %f1 %f0,%f1 %d0 %q0
5990
5991 Here SP = %sp if -mno-stack-bias or %sp+stack_bias otherwise.
5992
5993 Integral arguments are always passed as 64-bit quantities appropriately
5994 extended.
5995
5996 Passing of floating point values is handled as follows.
5997 If a prototype is in scope:
5998 If the value is in a named argument (i.e. not a stdarg function or a
5999 value not part of the `...') then the value is passed in the appropriate
6000 fp reg.
6001 If the value is part of the `...' and is passed in one of the first 6
6002 slots then the value is passed in the appropriate int reg.
6003 If the value is part of the `...' and is not passed in one of the first 6
6004 slots then the value is passed in memory.
6005 If a prototype is not in scope:
6006 If the value is one of the first 6 arguments the value is passed in the
6007 appropriate integer reg and the appropriate fp reg.
6008 If the value is not one of the first 6 arguments the value is passed in
6009 the appropriate fp reg and in memory.
6010
6011
6012 Summary of the calling conventions implemented by GCC on the SPARC:
6013
6014 32-bit ABI:
6015 size argument return value
6016
6017 small integer <4 int. reg. int. reg.
6018 word 4 int. reg. int. reg.
6019 double word 8 int. reg. int. reg.
6020
6021 _Complex small integer <8 int. reg. int. reg.
6022 _Complex word 8 int. reg. int. reg.
6023 _Complex double word 16 memory int. reg.
6024
6025 vector integer <=8 int. reg. FP reg.
6026 vector integer >8 memory memory
6027
6028 float 4 int. reg. FP reg.
6029 double 8 int. reg. FP reg.
6030 long double 16 memory memory
6031
6032 _Complex float 8 memory FP reg.
6033 _Complex double 16 memory FP reg.
6034 _Complex long double 32 memory FP reg.
6035
6036 vector float any memory memory
6037
6038 aggregate any memory memory
6039
6040
6041
6042 64-bit ABI:
6043 size argument return value
6044
6045 small integer <8 int. reg. int. reg.
6046 word 8 int. reg. int. reg.
6047 double word 16 int. reg. int. reg.
6048
6049 _Complex small integer <16 int. reg. int. reg.
6050 _Complex word 16 int. reg. int. reg.
6051 _Complex double word 32 memory int. reg.
6052
6053 vector integer <=16 FP reg. FP reg.
6054 vector integer 16<s<=32 memory FP reg.
6055 vector integer >32 memory memory
6056
6057 float 4 FP reg. FP reg.
6058 double 8 FP reg. FP reg.
6059 long double 16 FP reg. FP reg.
6060
6061 _Complex float 8 FP reg. FP reg.
6062 _Complex double 16 FP reg. FP reg.
6063 _Complex long double 32 memory FP reg.
6064
6065 vector float <=16 FP reg. FP reg.
6066 vector float 16<s<=32 memory FP reg.
6067 vector float >32 memory memory
6068
6069 aggregate <=16 reg. reg.
6070 aggregate 16<s<=32 memory reg.
6071 aggregate >32 memory memory
6072
6073
6074
6075 Note #1: complex floating-point types follow the extended SPARC ABIs as
6076 implemented by the Sun compiler.
6077
6078 Note #2: integral vector types follow the scalar floating-point types
6079 conventions to match what is implemented by the Sun VIS SDK.
6080
6081 Note #3: floating-point vector types follow the aggregate types
6082 conventions. */
6083
6084
6085 /* Maximum number of int regs for args. */
6086 #define SPARC_INT_ARG_MAX 6
6087 /* Maximum number of fp regs for args. */
6088 #define SPARC_FP_ARG_MAX 16
6089
6090 #define ROUND_ADVANCE(SIZE) (((SIZE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)
6091
6092 /* Handle the INIT_CUMULATIVE_ARGS macro.
6093 Initialize a variable CUM of type CUMULATIVE_ARGS
6094 for a call to a function whose data type is FNTYPE.
6095 For a library call, FNTYPE is 0. */
6096
6097 void
init_cumulative_args(struct sparc_args * cum,tree fntype,rtx libname ATTRIBUTE_UNUSED,tree fndecl ATTRIBUTE_UNUSED)6098 init_cumulative_args (struct sparc_args *cum, tree fntype,
6099 rtx libname ATTRIBUTE_UNUSED,
6100 tree fndecl ATTRIBUTE_UNUSED)
6101 {
6102 cum->words = 0;
6103 cum->prototype_p = fntype && prototype_p (fntype);
6104 cum->libcall_p = fntype == 0;
6105 }
6106
6107 /* Handle promotion of pointer and integer arguments. */
6108
6109 static enum machine_mode
sparc_promote_function_mode(const_tree type,enum machine_mode mode,int * punsignedp,const_tree fntype ATTRIBUTE_UNUSED,int for_return ATTRIBUTE_UNUSED)6110 sparc_promote_function_mode (const_tree type,
6111 enum machine_mode mode,
6112 int *punsignedp,
6113 const_tree fntype ATTRIBUTE_UNUSED,
6114 int for_return ATTRIBUTE_UNUSED)
6115 {
6116 if (type != NULL_TREE && POINTER_TYPE_P (type))
6117 {
6118 *punsignedp = POINTERS_EXTEND_UNSIGNED;
6119 return Pmode;
6120 }
6121
6122 /* Integral arguments are passed as full words, as per the ABI. */
6123 if (GET_MODE_CLASS (mode) == MODE_INT
6124 && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
6125 return word_mode;
6126
6127 return mode;
6128 }
6129
6130 /* Handle the TARGET_STRICT_ARGUMENT_NAMING target hook. */
6131
6132 static bool
sparc_strict_argument_naming(cumulative_args_t ca ATTRIBUTE_UNUSED)6133 sparc_strict_argument_naming (cumulative_args_t ca ATTRIBUTE_UNUSED)
6134 {
6135 return TARGET_ARCH64 ? true : false;
6136 }
6137
6138 /* Scan the record type TYPE and return the following predicates:
6139 - INTREGS_P: the record contains at least one field or sub-field
6140 that is eligible for promotion in integer registers.
6141 - FP_REGS_P: the record contains at least one field or sub-field
6142 that is eligible for promotion in floating-point registers.
6143 - PACKED_P: the record contains at least one field that is packed.
6144
6145 Sub-fields are not taken into account for the PACKED_P predicate. */
6146
6147 static void
scan_record_type(const_tree type,int * intregs_p,int * fpregs_p,int * packed_p)6148 scan_record_type (const_tree type, int *intregs_p, int *fpregs_p,
6149 int *packed_p)
6150 {
6151 tree field;
6152
6153 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6154 {
6155 if (TREE_CODE (field) == FIELD_DECL)
6156 {
6157 if (TREE_CODE (TREE_TYPE (field)) == RECORD_TYPE)
6158 scan_record_type (TREE_TYPE (field), intregs_p, fpregs_p, 0);
6159 else if ((FLOAT_TYPE_P (TREE_TYPE (field))
6160 || TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE)
6161 && TARGET_FPU)
6162 *fpregs_p = 1;
6163 else
6164 *intregs_p = 1;
6165
6166 if (packed_p && DECL_PACKED (field))
6167 *packed_p = 1;
6168 }
6169 }
6170 }
6171
6172 /* Compute the slot number to pass an argument in.
6173 Return the slot number or -1 if passing on the stack.
6174
6175 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6176 the preceding args and about the function being called.
6177 MODE is the argument's machine mode.
6178 TYPE is the data type of the argument (as a tree).
6179 This is null for libcalls where that information may
6180 not be available.
6181 NAMED is nonzero if this argument is a named parameter
6182 (otherwise it is an extra parameter matching an ellipsis).
6183 INCOMING_P is zero for FUNCTION_ARG, nonzero for FUNCTION_INCOMING_ARG.
6184 *PREGNO records the register number to use if scalar type.
6185 *PPADDING records the amount of padding needed in words. */
6186
6187 static int
function_arg_slotno(const struct sparc_args * cum,enum machine_mode mode,const_tree type,bool named,bool incoming_p,int * pregno,int * ppadding)6188 function_arg_slotno (const struct sparc_args *cum, enum machine_mode mode,
6189 const_tree type, bool named, bool incoming_p,
6190 int *pregno, int *ppadding)
6191 {
6192 int regbase = (incoming_p
6193 ? SPARC_INCOMING_INT_ARG_FIRST
6194 : SPARC_OUTGOING_INT_ARG_FIRST);
6195 int slotno = cum->words;
6196 enum mode_class mclass;
6197 int regno;
6198
6199 *ppadding = 0;
6200
6201 if (type && TREE_ADDRESSABLE (type))
6202 return -1;
6203
6204 if (TARGET_ARCH32
6205 && mode == BLKmode
6206 && type
6207 && TYPE_ALIGN (type) % PARM_BOUNDARY != 0)
6208 return -1;
6209
6210 /* For SPARC64, objects requiring 16-byte alignment get it. */
6211 if (TARGET_ARCH64
6212 && (type ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode)) >= 128
6213 && (slotno & 1) != 0)
6214 slotno++, *ppadding = 1;
6215
6216 mclass = GET_MODE_CLASS (mode);
6217 if (type && TREE_CODE (type) == VECTOR_TYPE)
6218 {
6219 /* Vector types deserve special treatment because they are
6220 polymorphic wrt their mode, depending upon whether VIS
6221 instructions are enabled. */
6222 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
6223 {
6224 /* The SPARC port defines no floating-point vector modes. */
6225 gcc_assert (mode == BLKmode);
6226 }
6227 else
6228 {
6229 /* Integral vector types should either have a vector
6230 mode or an integral mode, because we are guaranteed
6231 by pass_by_reference that their size is not greater
6232 than 16 bytes and TImode is 16-byte wide. */
6233 gcc_assert (mode != BLKmode);
6234
6235 /* Vector integers are handled like floats according to
6236 the Sun VIS SDK. */
6237 mclass = MODE_FLOAT;
6238 }
6239 }
6240
6241 switch (mclass)
6242 {
6243 case MODE_FLOAT:
6244 case MODE_COMPLEX_FLOAT:
6245 case MODE_VECTOR_INT:
6246 if (TARGET_ARCH64 && TARGET_FPU && named)
6247 {
6248 if (slotno >= SPARC_FP_ARG_MAX)
6249 return -1;
6250 regno = SPARC_FP_ARG_FIRST + slotno * 2;
6251 /* Arguments filling only one single FP register are
6252 right-justified in the outer double FP register. */
6253 if (GET_MODE_SIZE (mode) <= 4)
6254 regno++;
6255 break;
6256 }
6257 /* fallthrough */
6258
6259 case MODE_INT:
6260 case MODE_COMPLEX_INT:
6261 if (slotno >= SPARC_INT_ARG_MAX)
6262 return -1;
6263 regno = regbase + slotno;
6264 break;
6265
6266 case MODE_RANDOM:
6267 if (mode == VOIDmode)
6268 /* MODE is VOIDmode when generating the actual call. */
6269 return -1;
6270
6271 gcc_assert (mode == BLKmode);
6272
6273 if (TARGET_ARCH32
6274 || !type
6275 || (TREE_CODE (type) != VECTOR_TYPE
6276 && TREE_CODE (type) != RECORD_TYPE))
6277 {
6278 if (slotno >= SPARC_INT_ARG_MAX)
6279 return -1;
6280 regno = regbase + slotno;
6281 }
6282 else /* TARGET_ARCH64 && type */
6283 {
6284 int intregs_p = 0, fpregs_p = 0, packed_p = 0;
6285
6286 /* First see what kinds of registers we would need. */
6287 if (TREE_CODE (type) == VECTOR_TYPE)
6288 fpregs_p = 1;
6289 else
6290 scan_record_type (type, &intregs_p, &fpregs_p, &packed_p);
6291
6292 /* The ABI obviously doesn't specify how packed structures
6293 are passed. These are defined to be passed in int regs
6294 if possible, otherwise memory. */
6295 if (packed_p || !named)
6296 fpregs_p = 0, intregs_p = 1;
6297
6298 /* If all arg slots are filled, then must pass on stack. */
6299 if (fpregs_p && slotno >= SPARC_FP_ARG_MAX)
6300 return -1;
6301
6302 /* If there are only int args and all int arg slots are filled,
6303 then must pass on stack. */
6304 if (!fpregs_p && intregs_p && slotno >= SPARC_INT_ARG_MAX)
6305 return -1;
6306
6307 /* Note that even if all int arg slots are filled, fp members may
6308 still be passed in regs if such regs are available.
6309 *PREGNO isn't set because there may be more than one, it's up
6310 to the caller to compute them. */
6311 return slotno;
6312 }
6313 break;
6314
6315 default :
6316 gcc_unreachable ();
6317 }
6318
6319 *pregno = regno;
6320 return slotno;
6321 }
6322
6323 /* Handle recursive register counting for structure field layout. */
6324
6325 struct function_arg_record_value_parms
6326 {
6327 rtx ret; /* return expression being built. */
6328 int slotno; /* slot number of the argument. */
6329 int named; /* whether the argument is named. */
6330 int regbase; /* regno of the base register. */
6331 int stack; /* 1 if part of the argument is on the stack. */
6332 int intoffset; /* offset of the first pending integer field. */
6333 unsigned int nregs; /* number of words passed in registers. */
6334 };
6335
6336 static void function_arg_record_value_3
6337 (HOST_WIDE_INT, struct function_arg_record_value_parms *);
6338 static void function_arg_record_value_2
6339 (const_tree, HOST_WIDE_INT, struct function_arg_record_value_parms *, bool);
6340 static void function_arg_record_value_1
6341 (const_tree, HOST_WIDE_INT, struct function_arg_record_value_parms *, bool);
6342 static rtx function_arg_record_value (const_tree, enum machine_mode, int, int, int);
6343 static rtx function_arg_union_value (int, enum machine_mode, int, int);
6344
6345 /* A subroutine of function_arg_record_value. Traverse the structure
6346 recursively and determine how many registers will be required. */
6347
6348 static void
function_arg_record_value_1(const_tree type,HOST_WIDE_INT startbitpos,struct function_arg_record_value_parms * parms,bool packed_p)6349 function_arg_record_value_1 (const_tree type, HOST_WIDE_INT startbitpos,
6350 struct function_arg_record_value_parms *parms,
6351 bool packed_p)
6352 {
6353 tree field;
6354
6355 /* We need to compute how many registers are needed so we can
6356 allocate the PARALLEL but before we can do that we need to know
6357 whether there are any packed fields. The ABI obviously doesn't
6358 specify how structures are passed in this case, so they are
6359 defined to be passed in int regs if possible, otherwise memory,
6360 regardless of whether there are fp values present. */
6361
6362 if (! packed_p)
6363 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
6364 {
6365 if (TREE_CODE (field) == FIELD_DECL && DECL_PACKED (field))
6366 {
6367 packed_p = true;
6368 break;
6369 }
6370 }
6371
6372 /* Compute how many registers we need. */
6373 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6374 {
6375 if (TREE_CODE (field) == FIELD_DECL)
6376 {
6377 HOST_WIDE_INT bitpos = startbitpos;
6378
6379 if (DECL_SIZE (field) != 0)
6380 {
6381 if (integer_zerop (DECL_SIZE (field)))
6382 continue;
6383
6384 if (host_integerp (bit_position (field), 1))
6385 bitpos += int_bit_position (field);
6386 }
6387
6388 /* ??? FIXME: else assume zero offset. */
6389
6390 if (TREE_CODE (TREE_TYPE (field)) == RECORD_TYPE)
6391 function_arg_record_value_1 (TREE_TYPE (field),
6392 bitpos,
6393 parms,
6394 packed_p);
6395 else if ((FLOAT_TYPE_P (TREE_TYPE (field))
6396 || TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE)
6397 && TARGET_FPU
6398 && parms->named
6399 && ! packed_p)
6400 {
6401 if (parms->intoffset != -1)
6402 {
6403 unsigned int startbit, endbit;
6404 int intslots, this_slotno;
6405
6406 startbit = parms->intoffset & -BITS_PER_WORD;
6407 endbit = (bitpos + BITS_PER_WORD - 1) & -BITS_PER_WORD;
6408
6409 intslots = (endbit - startbit) / BITS_PER_WORD;
6410 this_slotno = parms->slotno + parms->intoffset
6411 / BITS_PER_WORD;
6412
6413 if (intslots > 0 && intslots > SPARC_INT_ARG_MAX - this_slotno)
6414 {
6415 intslots = MAX (0, SPARC_INT_ARG_MAX - this_slotno);
6416 /* We need to pass this field on the stack. */
6417 parms->stack = 1;
6418 }
6419
6420 parms->nregs += intslots;
6421 parms->intoffset = -1;
6422 }
6423
6424 /* There's no need to check this_slotno < SPARC_FP_ARG MAX.
6425 If it wasn't true we wouldn't be here. */
6426 if (TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE
6427 && DECL_MODE (field) == BLKmode)
6428 parms->nregs += TYPE_VECTOR_SUBPARTS (TREE_TYPE (field));
6429 else if (TREE_CODE (TREE_TYPE (field)) == COMPLEX_TYPE)
6430 parms->nregs += 2;
6431 else
6432 parms->nregs += 1;
6433 }
6434 else
6435 {
6436 if (parms->intoffset == -1)
6437 parms->intoffset = bitpos;
6438 }
6439 }
6440 }
6441 }
6442
6443 /* A subroutine of function_arg_record_value. Assign the bits of the
6444 structure between parms->intoffset and bitpos to integer registers. */
6445
6446 static void
function_arg_record_value_3(HOST_WIDE_INT bitpos,struct function_arg_record_value_parms * parms)6447 function_arg_record_value_3 (HOST_WIDE_INT bitpos,
6448 struct function_arg_record_value_parms *parms)
6449 {
6450 enum machine_mode mode;
6451 unsigned int regno;
6452 unsigned int startbit, endbit;
6453 int this_slotno, intslots, intoffset;
6454 rtx reg;
6455
6456 if (parms->intoffset == -1)
6457 return;
6458
6459 intoffset = parms->intoffset;
6460 parms->intoffset = -1;
6461
6462 startbit = intoffset & -BITS_PER_WORD;
6463 endbit = (bitpos + BITS_PER_WORD - 1) & -BITS_PER_WORD;
6464 intslots = (endbit - startbit) / BITS_PER_WORD;
6465 this_slotno = parms->slotno + intoffset / BITS_PER_WORD;
6466
6467 intslots = MIN (intslots, SPARC_INT_ARG_MAX - this_slotno);
6468 if (intslots <= 0)
6469 return;
6470
6471 /* If this is the trailing part of a word, only load that much into
6472 the register. Otherwise load the whole register. Note that in
6473 the latter case we may pick up unwanted bits. It's not a problem
6474 at the moment but may wish to revisit. */
6475
6476 if (intoffset % BITS_PER_WORD != 0)
6477 mode = smallest_mode_for_size (BITS_PER_WORD - intoffset % BITS_PER_WORD,
6478 MODE_INT);
6479 else
6480 mode = word_mode;
6481
6482 intoffset /= BITS_PER_UNIT;
6483 do
6484 {
6485 regno = parms->regbase + this_slotno;
6486 reg = gen_rtx_REG (mode, regno);
6487 XVECEXP (parms->ret, 0, parms->stack + parms->nregs)
6488 = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (intoffset));
6489
6490 this_slotno += 1;
6491 intoffset = (intoffset | (UNITS_PER_WORD-1)) + 1;
6492 mode = word_mode;
6493 parms->nregs += 1;
6494 intslots -= 1;
6495 }
6496 while (intslots > 0);
6497 }
6498
6499 /* A subroutine of function_arg_record_value. Traverse the structure
6500 recursively and assign bits to floating point registers. Track which
6501 bits in between need integer registers; invoke function_arg_record_value_3
6502 to make that happen. */
6503
6504 static void
function_arg_record_value_2(const_tree type,HOST_WIDE_INT startbitpos,struct function_arg_record_value_parms * parms,bool packed_p)6505 function_arg_record_value_2 (const_tree type, HOST_WIDE_INT startbitpos,
6506 struct function_arg_record_value_parms *parms,
6507 bool packed_p)
6508 {
6509 tree field;
6510
6511 if (! packed_p)
6512 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6513 {
6514 if (TREE_CODE (field) == FIELD_DECL && DECL_PACKED (field))
6515 {
6516 packed_p = true;
6517 break;
6518 }
6519 }
6520
6521 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6522 {
6523 if (TREE_CODE (field) == FIELD_DECL)
6524 {
6525 HOST_WIDE_INT bitpos = startbitpos;
6526
6527 if (DECL_SIZE (field) != 0)
6528 {
6529 if (integer_zerop (DECL_SIZE (field)))
6530 continue;
6531
6532 if (host_integerp (bit_position (field), 1))
6533 bitpos += int_bit_position (field);
6534 }
6535
6536 /* ??? FIXME: else assume zero offset. */
6537
6538 if (TREE_CODE (TREE_TYPE (field)) == RECORD_TYPE)
6539 function_arg_record_value_2 (TREE_TYPE (field),
6540 bitpos,
6541 parms,
6542 packed_p);
6543 else if ((FLOAT_TYPE_P (TREE_TYPE (field))
6544 || TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE)
6545 && TARGET_FPU
6546 && parms->named
6547 && ! packed_p)
6548 {
6549 int this_slotno = parms->slotno + bitpos / BITS_PER_WORD;
6550 int regno, nregs, pos;
6551 enum machine_mode mode = DECL_MODE (field);
6552 rtx reg;
6553
6554 function_arg_record_value_3 (bitpos, parms);
6555
6556 if (TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE
6557 && mode == BLKmode)
6558 {
6559 mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (field)));
6560 nregs = TYPE_VECTOR_SUBPARTS (TREE_TYPE (field));
6561 }
6562 else if (TREE_CODE (TREE_TYPE (field)) == COMPLEX_TYPE)
6563 {
6564 mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (field)));
6565 nregs = 2;
6566 }
6567 else
6568 nregs = 1;
6569
6570 regno = SPARC_FP_ARG_FIRST + this_slotno * 2;
6571 if (GET_MODE_SIZE (mode) <= 4 && (bitpos & 32) != 0)
6572 regno++;
6573 reg = gen_rtx_REG (mode, regno);
6574 pos = bitpos / BITS_PER_UNIT;
6575 XVECEXP (parms->ret, 0, parms->stack + parms->nregs)
6576 = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (pos));
6577 parms->nregs += 1;
6578 while (--nregs > 0)
6579 {
6580 regno += GET_MODE_SIZE (mode) / 4;
6581 reg = gen_rtx_REG (mode, regno);
6582 pos += GET_MODE_SIZE (mode);
6583 XVECEXP (parms->ret, 0, parms->stack + parms->nregs)
6584 = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (pos));
6585 parms->nregs += 1;
6586 }
6587 }
6588 else
6589 {
6590 if (parms->intoffset == -1)
6591 parms->intoffset = bitpos;
6592 }
6593 }
6594 }
6595 }
6596
6597 /* Used by function_arg and sparc_function_value_1 to implement the complex
6598 conventions of the 64-bit ABI for passing and returning structures.
6599 Return an expression valid as a return value for the FUNCTION_ARG
6600 and TARGET_FUNCTION_VALUE.
6601
6602 TYPE is the data type of the argument (as a tree).
6603 This is null for libcalls where that information may
6604 not be available.
6605 MODE is the argument's machine mode.
6606 SLOTNO is the index number of the argument's slot in the parameter array.
6607 NAMED is nonzero if this argument is a named parameter
6608 (otherwise it is an extra parameter matching an ellipsis).
6609 REGBASE is the regno of the base register for the parameter array. */
6610
6611 static rtx
function_arg_record_value(const_tree type,enum machine_mode mode,int slotno,int named,int regbase)6612 function_arg_record_value (const_tree type, enum machine_mode mode,
6613 int slotno, int named, int regbase)
6614 {
6615 HOST_WIDE_INT typesize = int_size_in_bytes (type);
6616 struct function_arg_record_value_parms parms;
6617 unsigned int nregs;
6618
6619 parms.ret = NULL_RTX;
6620 parms.slotno = slotno;
6621 parms.named = named;
6622 parms.regbase = regbase;
6623 parms.stack = 0;
6624
6625 /* Compute how many registers we need. */
6626 parms.nregs = 0;
6627 parms.intoffset = 0;
6628 function_arg_record_value_1 (type, 0, &parms, false);
6629
6630 /* Take into account pending integer fields. */
6631 if (parms.intoffset != -1)
6632 {
6633 unsigned int startbit, endbit;
6634 int intslots, this_slotno;
6635
6636 startbit = parms.intoffset & -BITS_PER_WORD;
6637 endbit = (typesize*BITS_PER_UNIT + BITS_PER_WORD - 1) & -BITS_PER_WORD;
6638 intslots = (endbit - startbit) / BITS_PER_WORD;
6639 this_slotno = slotno + parms.intoffset / BITS_PER_WORD;
6640
6641 if (intslots > 0 && intslots > SPARC_INT_ARG_MAX - this_slotno)
6642 {
6643 intslots = MAX (0, SPARC_INT_ARG_MAX - this_slotno);
6644 /* We need to pass this field on the stack. */
6645 parms.stack = 1;
6646 }
6647
6648 parms.nregs += intslots;
6649 }
6650 nregs = parms.nregs;
6651
6652 /* Allocate the vector and handle some annoying special cases. */
6653 if (nregs == 0)
6654 {
6655 /* ??? Empty structure has no value? Duh? */
6656 if (typesize <= 0)
6657 {
6658 /* Though there's nothing really to store, return a word register
6659 anyway so the rest of gcc doesn't go nuts. Returning a PARALLEL
6660 leads to breakage due to the fact that there are zero bytes to
6661 load. */
6662 return gen_rtx_REG (mode, regbase);
6663 }
6664 else
6665 {
6666 /* ??? C++ has structures with no fields, and yet a size. Give up
6667 for now and pass everything back in integer registers. */
6668 nregs = (typesize + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6669 }
6670 if (nregs + slotno > SPARC_INT_ARG_MAX)
6671 nregs = SPARC_INT_ARG_MAX - slotno;
6672 }
6673 gcc_assert (nregs != 0);
6674
6675 parms.ret = gen_rtx_PARALLEL (mode, rtvec_alloc (parms.stack + nregs));
6676
6677 /* If at least one field must be passed on the stack, generate
6678 (parallel [(expr_list (nil) ...) ...]) so that all fields will
6679 also be passed on the stack. We can't do much better because the
6680 semantics of TARGET_ARG_PARTIAL_BYTES doesn't handle the case
6681 of structures for which the fields passed exclusively in registers
6682 are not at the beginning of the structure. */
6683 if (parms.stack)
6684 XVECEXP (parms.ret, 0, 0)
6685 = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
6686
6687 /* Fill in the entries. */
6688 parms.nregs = 0;
6689 parms.intoffset = 0;
6690 function_arg_record_value_2 (type, 0, &parms, false);
6691 function_arg_record_value_3 (typesize * BITS_PER_UNIT, &parms);
6692
6693 gcc_assert (parms.nregs == nregs);
6694
6695 return parms.ret;
6696 }
6697
6698 /* Used by function_arg and sparc_function_value_1 to implement the conventions
6699 of the 64-bit ABI for passing and returning unions.
6700 Return an expression valid as a return value for the FUNCTION_ARG
6701 and TARGET_FUNCTION_VALUE.
6702
6703 SIZE is the size in bytes of the union.
6704 MODE is the argument's machine mode.
6705 REGNO is the hard register the union will be passed in. */
6706
6707 static rtx
function_arg_union_value(int size,enum machine_mode mode,int slotno,int regno)6708 function_arg_union_value (int size, enum machine_mode mode, int slotno,
6709 int regno)
6710 {
6711 int nwords = ROUND_ADVANCE (size), i;
6712 rtx regs;
6713
6714 /* See comment in previous function for empty structures. */
6715 if (nwords == 0)
6716 return gen_rtx_REG (mode, regno);
6717
6718 if (slotno == SPARC_INT_ARG_MAX - 1)
6719 nwords = 1;
6720
6721 regs = gen_rtx_PARALLEL (mode, rtvec_alloc (nwords));
6722
6723 for (i = 0; i < nwords; i++)
6724 {
6725 /* Unions are passed left-justified. */
6726 XVECEXP (regs, 0, i)
6727 = gen_rtx_EXPR_LIST (VOIDmode,
6728 gen_rtx_REG (word_mode, regno),
6729 GEN_INT (UNITS_PER_WORD * i));
6730 regno++;
6731 }
6732
6733 return regs;
6734 }
6735
6736 /* Used by function_arg and sparc_function_value_1 to implement the conventions
6737 for passing and returning large (BLKmode) vectors.
6738 Return an expression valid as a return value for the FUNCTION_ARG
6739 and TARGET_FUNCTION_VALUE.
6740
6741 SIZE is the size in bytes of the vector (at least 8 bytes).
6742 REGNO is the FP hard register the vector will be passed in. */
6743
6744 static rtx
function_arg_vector_value(int size,int regno)6745 function_arg_vector_value (int size, int regno)
6746 {
6747 int i, nregs = size / 8;
6748 rtx regs;
6749
6750 regs = gen_rtx_PARALLEL (BLKmode, rtvec_alloc (nregs));
6751
6752 for (i = 0; i < nregs; i++)
6753 {
6754 XVECEXP (regs, 0, i)
6755 = gen_rtx_EXPR_LIST (VOIDmode,
6756 gen_rtx_REG (DImode, regno + 2*i),
6757 GEN_INT (i*8));
6758 }
6759
6760 return regs;
6761 }
6762
6763 /* Determine where to put an argument to a function.
6764 Value is zero to push the argument on the stack,
6765 or a hard register in which to store the argument.
6766
6767 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6768 the preceding args and about the function being called.
6769 MODE is the argument's machine mode.
6770 TYPE is the data type of the argument (as a tree).
6771 This is null for libcalls where that information may
6772 not be available.
6773 NAMED is true if this argument is a named parameter
6774 (otherwise it is an extra parameter matching an ellipsis).
6775 INCOMING_P is false for TARGET_FUNCTION_ARG, true for
6776 TARGET_FUNCTION_INCOMING_ARG. */
6777
6778 static rtx
sparc_function_arg_1(cumulative_args_t cum_v,enum machine_mode mode,const_tree type,bool named,bool incoming_p)6779 sparc_function_arg_1 (cumulative_args_t cum_v, enum machine_mode mode,
6780 const_tree type, bool named, bool incoming_p)
6781 {
6782 const CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
6783
6784 int regbase = (incoming_p
6785 ? SPARC_INCOMING_INT_ARG_FIRST
6786 : SPARC_OUTGOING_INT_ARG_FIRST);
6787 int slotno, regno, padding;
6788 enum mode_class mclass = GET_MODE_CLASS (mode);
6789
6790 slotno = function_arg_slotno (cum, mode, type, named, incoming_p,
6791 ®no, &padding);
6792 if (slotno == -1)
6793 return 0;
6794
6795 /* Vector types deserve special treatment because they are polymorphic wrt
6796 their mode, depending upon whether VIS instructions are enabled. */
6797 if (type && TREE_CODE (type) == VECTOR_TYPE)
6798 {
6799 HOST_WIDE_INT size = int_size_in_bytes (type);
6800 gcc_assert ((TARGET_ARCH32 && size <= 8)
6801 || (TARGET_ARCH64 && size <= 16));
6802
6803 if (mode == BLKmode)
6804 return function_arg_vector_value (size,
6805 SPARC_FP_ARG_FIRST + 2*slotno);
6806 else
6807 mclass = MODE_FLOAT;
6808 }
6809
6810 if (TARGET_ARCH32)
6811 return gen_rtx_REG (mode, regno);
6812
6813 /* Structures up to 16 bytes in size are passed in arg slots on the stack
6814 and are promoted to registers if possible. */
6815 if (type && TREE_CODE (type) == RECORD_TYPE)
6816 {
6817 HOST_WIDE_INT size = int_size_in_bytes (type);
6818 gcc_assert (size <= 16);
6819
6820 return function_arg_record_value (type, mode, slotno, named, regbase);
6821 }
6822
6823 /* Unions up to 16 bytes in size are passed in integer registers. */
6824 else if (type && TREE_CODE (type) == UNION_TYPE)
6825 {
6826 HOST_WIDE_INT size = int_size_in_bytes (type);
6827 gcc_assert (size <= 16);
6828
6829 return function_arg_union_value (size, mode, slotno, regno);
6830 }
6831
6832 /* v9 fp args in reg slots beyond the int reg slots get passed in regs
6833 but also have the slot allocated for them.
6834 If no prototype is in scope fp values in register slots get passed
6835 in two places, either fp regs and int regs or fp regs and memory. */
6836 else if ((mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT)
6837 && SPARC_FP_REG_P (regno))
6838 {
6839 rtx reg = gen_rtx_REG (mode, regno);
6840 if (cum->prototype_p || cum->libcall_p)
6841 {
6842 /* "* 2" because fp reg numbers are recorded in 4 byte
6843 quantities. */
6844 #if 0
6845 /* ??? This will cause the value to be passed in the fp reg and
6846 in the stack. When a prototype exists we want to pass the
6847 value in the reg but reserve space on the stack. That's an
6848 optimization, and is deferred [for a bit]. */
6849 if ((regno - SPARC_FP_ARG_FIRST) >= SPARC_INT_ARG_MAX * 2)
6850 return gen_rtx_PARALLEL (mode,
6851 gen_rtvec (2,
6852 gen_rtx_EXPR_LIST (VOIDmode,
6853 NULL_RTX, const0_rtx),
6854 gen_rtx_EXPR_LIST (VOIDmode,
6855 reg, const0_rtx)));
6856 else
6857 #else
6858 /* ??? It seems that passing back a register even when past
6859 the area declared by REG_PARM_STACK_SPACE will allocate
6860 space appropriately, and will not copy the data onto the
6861 stack, exactly as we desire.
6862
6863 This is due to locate_and_pad_parm being called in
6864 expand_call whenever reg_parm_stack_space > 0, which
6865 while beneficial to our example here, would seem to be
6866 in error from what had been intended. Ho hum... -- r~ */
6867 #endif
6868 return reg;
6869 }
6870 else
6871 {
6872 rtx v0, v1;
6873
6874 if ((regno - SPARC_FP_ARG_FIRST) < SPARC_INT_ARG_MAX * 2)
6875 {
6876 int intreg;
6877
6878 /* On incoming, we don't need to know that the value
6879 is passed in %f0 and %i0, and it confuses other parts
6880 causing needless spillage even on the simplest cases. */
6881 if (incoming_p)
6882 return reg;
6883
6884 intreg = (SPARC_OUTGOING_INT_ARG_FIRST
6885 + (regno - SPARC_FP_ARG_FIRST) / 2);
6886
6887 v0 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx);
6888 v1 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_REG (mode, intreg),
6889 const0_rtx);
6890 return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1));
6891 }
6892 else
6893 {
6894 v0 = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
6895 v1 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx);
6896 return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1));
6897 }
6898 }
6899 }
6900
6901 /* All other aggregate types are passed in an integer register in a mode
6902 corresponding to the size of the type. */
6903 else if (type && AGGREGATE_TYPE_P (type))
6904 {
6905 HOST_WIDE_INT size = int_size_in_bytes (type);
6906 gcc_assert (size <= 16);
6907
6908 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
6909 }
6910
6911 return gen_rtx_REG (mode, regno);
6912 }
6913
6914 /* Handle the TARGET_FUNCTION_ARG target hook. */
6915
6916 static rtx
sparc_function_arg(cumulative_args_t cum,enum machine_mode mode,const_tree type,bool named)6917 sparc_function_arg (cumulative_args_t cum, enum machine_mode mode,
6918 const_tree type, bool named)
6919 {
6920 return sparc_function_arg_1 (cum, mode, type, named, false);
6921 }
6922
6923 /* Handle the TARGET_FUNCTION_INCOMING_ARG target hook. */
6924
6925 static rtx
sparc_function_incoming_arg(cumulative_args_t cum,enum machine_mode mode,const_tree type,bool named)6926 sparc_function_incoming_arg (cumulative_args_t cum, enum machine_mode mode,
6927 const_tree type, bool named)
6928 {
6929 return sparc_function_arg_1 (cum, mode, type, named, true);
6930 }
6931
6932 /* For sparc64, objects requiring 16 byte alignment are passed that way. */
6933
6934 static unsigned int
sparc_function_arg_boundary(enum machine_mode mode,const_tree type)6935 sparc_function_arg_boundary (enum machine_mode mode, const_tree type)
6936 {
6937 return ((TARGET_ARCH64
6938 && (GET_MODE_ALIGNMENT (mode) == 128
6939 || (type && TYPE_ALIGN (type) == 128)))
6940 ? 128
6941 : PARM_BOUNDARY);
6942 }
6943
6944 /* For an arg passed partly in registers and partly in memory,
6945 this is the number of bytes of registers used.
6946 For args passed entirely in registers or entirely in memory, zero.
6947
6948 Any arg that starts in the first 6 regs but won't entirely fit in them
6949 needs partial registers on v8. On v9, structures with integer
6950 values in arg slots 5,6 will be passed in %o5 and SP+176, and complex fp
6951 values that begin in the last fp reg [where "last fp reg" varies with the
6952 mode] will be split between that reg and memory. */
6953
6954 static int
sparc_arg_partial_bytes(cumulative_args_t cum,enum machine_mode mode,tree type,bool named)6955 sparc_arg_partial_bytes (cumulative_args_t cum, enum machine_mode mode,
6956 tree type, bool named)
6957 {
6958 int slotno, regno, padding;
6959
6960 /* We pass false for incoming_p here, it doesn't matter. */
6961 slotno = function_arg_slotno (get_cumulative_args (cum), mode, type, named,
6962 false, ®no, &padding);
6963
6964 if (slotno == -1)
6965 return 0;
6966
6967 if (TARGET_ARCH32)
6968 {
6969 if ((slotno + (mode == BLKmode
6970 ? ROUND_ADVANCE (int_size_in_bytes (type))
6971 : ROUND_ADVANCE (GET_MODE_SIZE (mode))))
6972 > SPARC_INT_ARG_MAX)
6973 return (SPARC_INT_ARG_MAX - slotno) * UNITS_PER_WORD;
6974 }
6975 else
6976 {
6977 /* We are guaranteed by pass_by_reference that the size of the
6978 argument is not greater than 16 bytes, so we only need to return
6979 one word if the argument is partially passed in registers. */
6980
6981 if (type && AGGREGATE_TYPE_P (type))
6982 {
6983 int size = int_size_in_bytes (type);
6984
6985 if (size > UNITS_PER_WORD
6986 && slotno == SPARC_INT_ARG_MAX - 1)
6987 return UNITS_PER_WORD;
6988 }
6989 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_INT
6990 || (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
6991 && ! (TARGET_FPU && named)))
6992 {
6993 /* The complex types are passed as packed types. */
6994 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD
6995 && slotno == SPARC_INT_ARG_MAX - 1)
6996 return UNITS_PER_WORD;
6997 }
6998 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
6999 {
7000 if ((slotno + GET_MODE_SIZE (mode) / UNITS_PER_WORD)
7001 > SPARC_FP_ARG_MAX)
7002 return UNITS_PER_WORD;
7003 }
7004 }
7005
7006 return 0;
7007 }
7008
7009 /* Handle the TARGET_PASS_BY_REFERENCE target hook.
7010 Specify whether to pass the argument by reference. */
7011
7012 static bool
sparc_pass_by_reference(cumulative_args_t cum ATTRIBUTE_UNUSED,enum machine_mode mode,const_tree type,bool named ATTRIBUTE_UNUSED)7013 sparc_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
7014 enum machine_mode mode, const_tree type,
7015 bool named ATTRIBUTE_UNUSED)
7016 {
7017 if (TARGET_ARCH32)
7018 /* Original SPARC 32-bit ABI says that structures and unions,
7019 and quad-precision floats are passed by reference. For Pascal,
7020 also pass arrays by reference. All other base types are passed
7021 in registers.
7022
7023 Extended ABI (as implemented by the Sun compiler) says that all
7024 complex floats are passed by reference. Pass complex integers
7025 in registers up to 8 bytes. More generally, enforce the 2-word
7026 cap for passing arguments in registers.
7027
7028 Vector ABI (as implemented by the Sun VIS SDK) says that vector
7029 integers are passed like floats of the same size, that is in
7030 registers up to 8 bytes. Pass all vector floats by reference
7031 like structure and unions. */
7032 return ((type && (AGGREGATE_TYPE_P (type) || VECTOR_FLOAT_TYPE_P (type)))
7033 || mode == SCmode
7034 /* Catch CDImode, TFmode, DCmode and TCmode. */
7035 || GET_MODE_SIZE (mode) > 8
7036 || (type
7037 && TREE_CODE (type) == VECTOR_TYPE
7038 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8));
7039 else
7040 /* Original SPARC 64-bit ABI says that structures and unions
7041 smaller than 16 bytes are passed in registers, as well as
7042 all other base types.
7043
7044 Extended ABI (as implemented by the Sun compiler) says that
7045 complex floats are passed in registers up to 16 bytes. Pass
7046 all complex integers in registers up to 16 bytes. More generally,
7047 enforce the 2-word cap for passing arguments in registers.
7048
7049 Vector ABI (as implemented by the Sun VIS SDK) says that vector
7050 integers are passed like floats of the same size, that is in
7051 registers (up to 16 bytes). Pass all vector floats like structure
7052 and unions. */
7053 return ((type
7054 && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == VECTOR_TYPE)
7055 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 16)
7056 /* Catch CTImode and TCmode. */
7057 || GET_MODE_SIZE (mode) > 16);
7058 }
7059
7060 /* Handle the TARGET_FUNCTION_ARG_ADVANCE hook.
7061 Update the data in CUM to advance over an argument
7062 of mode MODE and data type TYPE.
7063 TYPE is null for libcalls where that information may not be available. */
7064
7065 static void
sparc_function_arg_advance(cumulative_args_t cum_v,enum machine_mode mode,const_tree type,bool named)7066 sparc_function_arg_advance (cumulative_args_t cum_v, enum machine_mode mode,
7067 const_tree type, bool named)
7068 {
7069 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7070 int regno, padding;
7071
7072 /* We pass false for incoming_p here, it doesn't matter. */
7073 function_arg_slotno (cum, mode, type, named, false, ®no, &padding);
7074
7075 /* If argument requires leading padding, add it. */
7076 cum->words += padding;
7077
7078 if (TARGET_ARCH32)
7079 {
7080 cum->words += (mode != BLKmode
7081 ? ROUND_ADVANCE (GET_MODE_SIZE (mode))
7082 : ROUND_ADVANCE (int_size_in_bytes (type)));
7083 }
7084 else
7085 {
7086 if (type && AGGREGATE_TYPE_P (type))
7087 {
7088 int size = int_size_in_bytes (type);
7089
7090 if (size <= 8)
7091 ++cum->words;
7092 else if (size <= 16)
7093 cum->words += 2;
7094 else /* passed by reference */
7095 ++cum->words;
7096 }
7097 else
7098 {
7099 cum->words += (mode != BLKmode
7100 ? ROUND_ADVANCE (GET_MODE_SIZE (mode))
7101 : ROUND_ADVANCE (int_size_in_bytes (type)));
7102 }
7103 }
7104 }
7105
7106 /* Handle the FUNCTION_ARG_PADDING macro.
7107 For the 64 bit ABI structs are always stored left shifted in their
7108 argument slot. */
7109
7110 enum direction
function_arg_padding(enum machine_mode mode,const_tree type)7111 function_arg_padding (enum machine_mode mode, const_tree type)
7112 {
7113 if (TARGET_ARCH64 && type != 0 && AGGREGATE_TYPE_P (type))
7114 return upward;
7115
7116 /* Fall back to the default. */
7117 return DEFAULT_FUNCTION_ARG_PADDING (mode, type);
7118 }
7119
7120 /* Handle the TARGET_RETURN_IN_MEMORY target hook.
7121 Specify whether to return the return value in memory. */
7122
7123 static bool
sparc_return_in_memory(const_tree type,const_tree fntype ATTRIBUTE_UNUSED)7124 sparc_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
7125 {
7126 if (TARGET_ARCH32)
7127 /* Original SPARC 32-bit ABI says that structures and unions,
7128 and quad-precision floats are returned in memory. All other
7129 base types are returned in registers.
7130
7131 Extended ABI (as implemented by the Sun compiler) says that
7132 all complex floats are returned in registers (8 FP registers
7133 at most for '_Complex long double'). Return all complex integers
7134 in registers (4 at most for '_Complex long long').
7135
7136 Vector ABI (as implemented by the Sun VIS SDK) says that vector
7137 integers are returned like floats of the same size, that is in
7138 registers up to 8 bytes and in memory otherwise. Return all
7139 vector floats in memory like structure and unions; note that
7140 they always have BLKmode like the latter. */
7141 return (TYPE_MODE (type) == BLKmode
7142 || TYPE_MODE (type) == TFmode
7143 || (TREE_CODE (type) == VECTOR_TYPE
7144 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8));
7145 else
7146 /* Original SPARC 64-bit ABI says that structures and unions
7147 smaller than 32 bytes are returned in registers, as well as
7148 all other base types.
7149
7150 Extended ABI (as implemented by the Sun compiler) says that all
7151 complex floats are returned in registers (8 FP registers at most
7152 for '_Complex long double'). Return all complex integers in
7153 registers (4 at most for '_Complex TItype').
7154
7155 Vector ABI (as implemented by the Sun VIS SDK) says that vector
7156 integers are returned like floats of the same size, that is in
7157 registers. Return all vector floats like structure and unions;
7158 note that they always have BLKmode like the latter. */
7159 return (TYPE_MODE (type) == BLKmode
7160 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 32);
7161 }
7162
7163 /* Handle the TARGET_STRUCT_VALUE target hook.
7164 Return where to find the structure return value address. */
7165
7166 static rtx
sparc_struct_value_rtx(tree fndecl,int incoming)7167 sparc_struct_value_rtx (tree fndecl, int incoming)
7168 {
7169 if (TARGET_ARCH64)
7170 return 0;
7171 else
7172 {
7173 rtx mem;
7174
7175 if (incoming)
7176 mem = gen_frame_mem (Pmode, plus_constant (Pmode, frame_pointer_rtx,
7177 STRUCT_VALUE_OFFSET));
7178 else
7179 mem = gen_frame_mem (Pmode, plus_constant (Pmode, stack_pointer_rtx,
7180 STRUCT_VALUE_OFFSET));
7181
7182 /* Only follow the SPARC ABI for fixed-size structure returns.
7183 Variable size structure returns are handled per the normal
7184 procedures in GCC. This is enabled by -mstd-struct-return */
7185 if (incoming == 2
7186 && sparc_std_struct_return
7187 && TYPE_SIZE_UNIT (TREE_TYPE (fndecl))
7188 && TREE_CODE (TYPE_SIZE_UNIT (TREE_TYPE (fndecl))) == INTEGER_CST)
7189 {
7190 /* We must check and adjust the return address, as it is
7191 optional as to whether the return object is really
7192 provided. */
7193 rtx ret_reg = gen_rtx_REG (Pmode, 31);
7194 rtx scratch = gen_reg_rtx (SImode);
7195 rtx endlab = gen_label_rtx ();
7196
7197 /* Calculate the return object size */
7198 tree size = TYPE_SIZE_UNIT (TREE_TYPE (fndecl));
7199 rtx size_rtx = GEN_INT (TREE_INT_CST_LOW (size) & 0xfff);
7200 /* Construct a temporary return value */
7201 rtx temp_val
7202 = assign_stack_local (Pmode, TREE_INT_CST_LOW (size), 0);
7203
7204 /* Implement SPARC 32-bit psABI callee return struct checking:
7205
7206 Fetch the instruction where we will return to and see if
7207 it's an unimp instruction (the most significant 10 bits
7208 will be zero). */
7209 emit_move_insn (scratch, gen_rtx_MEM (SImode,
7210 plus_constant (Pmode,
7211 ret_reg, 8)));
7212 /* Assume the size is valid and pre-adjust */
7213 emit_insn (gen_add3_insn (ret_reg, ret_reg, GEN_INT (4)));
7214 emit_cmp_and_jump_insns (scratch, size_rtx, EQ, const0_rtx, SImode,
7215 0, endlab);
7216 emit_insn (gen_sub3_insn (ret_reg, ret_reg, GEN_INT (4)));
7217 /* Write the address of the memory pointed to by temp_val into
7218 the memory pointed to by mem */
7219 emit_move_insn (mem, XEXP (temp_val, 0));
7220 emit_label (endlab);
7221 }
7222
7223 return mem;
7224 }
7225 }
7226
7227 /* Handle TARGET_FUNCTION_VALUE, and TARGET_LIBCALL_VALUE target hook.
7228 For v9, function return values are subject to the same rules as arguments,
7229 except that up to 32 bytes may be returned in registers. */
7230
7231 static rtx
sparc_function_value_1(const_tree type,enum machine_mode mode,bool outgoing)7232 sparc_function_value_1 (const_tree type, enum machine_mode mode,
7233 bool outgoing)
7234 {
7235 /* Beware that the two values are swapped here wrt function_arg. */
7236 int regbase = (outgoing
7237 ? SPARC_INCOMING_INT_ARG_FIRST
7238 : SPARC_OUTGOING_INT_ARG_FIRST);
7239 enum mode_class mclass = GET_MODE_CLASS (mode);
7240 int regno;
7241
7242 /* Vector types deserve special treatment because they are polymorphic wrt
7243 their mode, depending upon whether VIS instructions are enabled. */
7244 if (type && TREE_CODE (type) == VECTOR_TYPE)
7245 {
7246 HOST_WIDE_INT size = int_size_in_bytes (type);
7247 gcc_assert ((TARGET_ARCH32 && size <= 8)
7248 || (TARGET_ARCH64 && size <= 32));
7249
7250 if (mode == BLKmode)
7251 return function_arg_vector_value (size,
7252 SPARC_FP_ARG_FIRST);
7253 else
7254 mclass = MODE_FLOAT;
7255 }
7256
7257 if (TARGET_ARCH64 && type)
7258 {
7259 /* Structures up to 32 bytes in size are returned in registers. */
7260 if (TREE_CODE (type) == RECORD_TYPE)
7261 {
7262 HOST_WIDE_INT size = int_size_in_bytes (type);
7263 gcc_assert (size <= 32);
7264
7265 return function_arg_record_value (type, mode, 0, 1, regbase);
7266 }
7267
7268 /* Unions up to 32 bytes in size are returned in integer registers. */
7269 else if (TREE_CODE (type) == UNION_TYPE)
7270 {
7271 HOST_WIDE_INT size = int_size_in_bytes (type);
7272 gcc_assert (size <= 32);
7273
7274 return function_arg_union_value (size, mode, 0, regbase);
7275 }
7276
7277 /* Objects that require it are returned in FP registers. */
7278 else if (mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT)
7279 ;
7280
7281 /* All other aggregate types are returned in an integer register in a
7282 mode corresponding to the size of the type. */
7283 else if (AGGREGATE_TYPE_P (type))
7284 {
7285 /* All other aggregate types are passed in an integer register
7286 in a mode corresponding to the size of the type. */
7287 HOST_WIDE_INT size = int_size_in_bytes (type);
7288 gcc_assert (size <= 32);
7289
7290 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
7291
7292 /* ??? We probably should have made the same ABI change in
7293 3.4.0 as the one we made for unions. The latter was
7294 required by the SCD though, while the former is not
7295 specified, so we favored compatibility and efficiency.
7296
7297 Now we're stuck for aggregates larger than 16 bytes,
7298 because OImode vanished in the meantime. Let's not
7299 try to be unduly clever, and simply follow the ABI
7300 for unions in that case. */
7301 if (mode == BLKmode)
7302 return function_arg_union_value (size, mode, 0, regbase);
7303 else
7304 mclass = MODE_INT;
7305 }
7306
7307 /* We should only have pointer and integer types at this point. This
7308 must match sparc_promote_function_mode. */
7309 else if (mclass == MODE_INT && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
7310 mode = word_mode;
7311 }
7312
7313 /* We should only have pointer and integer types at this point. This must
7314 match sparc_promote_function_mode. */
7315 else if (TARGET_ARCH32
7316 && mclass == MODE_INT
7317 && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
7318 mode = word_mode;
7319
7320 if ((mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT) && TARGET_FPU)
7321 regno = SPARC_FP_ARG_FIRST;
7322 else
7323 regno = regbase;
7324
7325 return gen_rtx_REG (mode, regno);
7326 }
7327
7328 /* Handle TARGET_FUNCTION_VALUE.
7329 On the SPARC, the value is found in the first "output" register, but the
7330 called function leaves it in the first "input" register. */
7331
7332 static rtx
sparc_function_value(const_tree valtype,const_tree fn_decl_or_type ATTRIBUTE_UNUSED,bool outgoing)7333 sparc_function_value (const_tree valtype,
7334 const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
7335 bool outgoing)
7336 {
7337 return sparc_function_value_1 (valtype, TYPE_MODE (valtype), outgoing);
7338 }
7339
7340 /* Handle TARGET_LIBCALL_VALUE. */
7341
7342 static rtx
sparc_libcall_value(enum machine_mode mode,const_rtx fun ATTRIBUTE_UNUSED)7343 sparc_libcall_value (enum machine_mode mode,
7344 const_rtx fun ATTRIBUTE_UNUSED)
7345 {
7346 return sparc_function_value_1 (NULL_TREE, mode, false);
7347 }
7348
7349 /* Handle FUNCTION_VALUE_REGNO_P.
7350 On the SPARC, the first "output" reg is used for integer values, and the
7351 first floating point register is used for floating point values. */
7352
7353 static bool
sparc_function_value_regno_p(const unsigned int regno)7354 sparc_function_value_regno_p (const unsigned int regno)
7355 {
7356 return (regno == 8 || regno == 32);
7357 }
7358
7359 /* Do what is necessary for `va_start'. We look at the current function
7360 to determine if stdarg or varargs is used and return the address of
7361 the first unnamed parameter. */
7362
7363 static rtx
sparc_builtin_saveregs(void)7364 sparc_builtin_saveregs (void)
7365 {
7366 int first_reg = crtl->args.info.words;
7367 rtx address;
7368 int regno;
7369
7370 for (regno = first_reg; regno < SPARC_INT_ARG_MAX; regno++)
7371 emit_move_insn (gen_rtx_MEM (word_mode,
7372 gen_rtx_PLUS (Pmode,
7373 frame_pointer_rtx,
7374 GEN_INT (FIRST_PARM_OFFSET (0)
7375 + (UNITS_PER_WORD
7376 * regno)))),
7377 gen_rtx_REG (word_mode,
7378 SPARC_INCOMING_INT_ARG_FIRST + regno));
7379
7380 address = gen_rtx_PLUS (Pmode,
7381 frame_pointer_rtx,
7382 GEN_INT (FIRST_PARM_OFFSET (0)
7383 + UNITS_PER_WORD * first_reg));
7384
7385 return address;
7386 }
7387
7388 /* Implement `va_start' for stdarg. */
7389
7390 static void
sparc_va_start(tree valist,rtx nextarg)7391 sparc_va_start (tree valist, rtx nextarg)
7392 {
7393 nextarg = expand_builtin_saveregs ();
7394 std_expand_builtin_va_start (valist, nextarg);
7395 }
7396
7397 /* Implement `va_arg' for stdarg. */
7398
7399 static tree
sparc_gimplify_va_arg(tree valist,tree type,gimple_seq * pre_p,gimple_seq * post_p)7400 sparc_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
7401 gimple_seq *post_p)
7402 {
7403 HOST_WIDE_INT size, rsize, align;
7404 tree addr, incr;
7405 bool indirect;
7406 tree ptrtype = build_pointer_type (type);
7407
7408 if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
7409 {
7410 indirect = true;
7411 size = rsize = UNITS_PER_WORD;
7412 align = 0;
7413 }
7414 else
7415 {
7416 indirect = false;
7417 size = int_size_in_bytes (type);
7418 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
7419 align = 0;
7420
7421 if (TARGET_ARCH64)
7422 {
7423 /* For SPARC64, objects requiring 16-byte alignment get it. */
7424 if (TYPE_ALIGN (type) >= 2 * (unsigned) BITS_PER_WORD)
7425 align = 2 * UNITS_PER_WORD;
7426
7427 /* SPARC-V9 ABI states that structures up to 16 bytes in size
7428 are left-justified in their slots. */
7429 if (AGGREGATE_TYPE_P (type))
7430 {
7431 if (size == 0)
7432 size = rsize = UNITS_PER_WORD;
7433 else
7434 size = rsize;
7435 }
7436 }
7437 }
7438
7439 incr = valist;
7440 if (align)
7441 {
7442 incr = fold_build_pointer_plus_hwi (incr, align - 1);
7443 incr = fold_convert (sizetype, incr);
7444 incr = fold_build2 (BIT_AND_EXPR, sizetype, incr,
7445 size_int (-align));
7446 incr = fold_convert (ptr_type_node, incr);
7447 }
7448
7449 gimplify_expr (&incr, pre_p, post_p, is_gimple_val, fb_rvalue);
7450 addr = incr;
7451
7452 if (BYTES_BIG_ENDIAN && size < rsize)
7453 addr = fold_build_pointer_plus_hwi (incr, rsize - size);
7454
7455 if (indirect)
7456 {
7457 addr = fold_convert (build_pointer_type (ptrtype), addr);
7458 addr = build_va_arg_indirect_ref (addr);
7459 }
7460
7461 /* If the address isn't aligned properly for the type, we need a temporary.
7462 FIXME: This is inefficient, usually we can do this in registers. */
7463 else if (align == 0 && TYPE_ALIGN (type) > BITS_PER_WORD)
7464 {
7465 tree tmp = create_tmp_var (type, "va_arg_tmp");
7466 tree dest_addr = build_fold_addr_expr (tmp);
7467 tree copy = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
7468 3, dest_addr, addr, size_int (rsize));
7469 TREE_ADDRESSABLE (tmp) = 1;
7470 gimplify_and_add (copy, pre_p);
7471 addr = dest_addr;
7472 }
7473
7474 else
7475 addr = fold_convert (ptrtype, addr);
7476
7477 incr = fold_build_pointer_plus_hwi (incr, rsize);
7478 gimplify_assign (valist, incr, post_p);
7479
7480 return build_va_arg_indirect_ref (addr);
7481 }
7482
7483 /* Implement the TARGET_VECTOR_MODE_SUPPORTED_P target hook.
7484 Specify whether the vector mode is supported by the hardware. */
7485
7486 static bool
sparc_vector_mode_supported_p(enum machine_mode mode)7487 sparc_vector_mode_supported_p (enum machine_mode mode)
7488 {
7489 return TARGET_VIS && VECTOR_MODE_P (mode) ? true : false;
7490 }
7491
7492 /* Implement the TARGET_VECTORIZE_PREFERRED_SIMD_MODE target hook. */
7493
7494 static enum machine_mode
sparc_preferred_simd_mode(enum machine_mode mode)7495 sparc_preferred_simd_mode (enum machine_mode mode)
7496 {
7497 if (TARGET_VIS)
7498 switch (mode)
7499 {
7500 case SImode:
7501 return V2SImode;
7502 case HImode:
7503 return V4HImode;
7504 case QImode:
7505 return V8QImode;
7506
7507 default:;
7508 }
7509
7510 return word_mode;
7511 }
7512
7513 /* Return the string to output an unconditional branch to LABEL, which is
7514 the operand number of the label.
7515
7516 DEST is the destination insn (i.e. the label), INSN is the source. */
7517
7518 const char *
output_ubranch(rtx dest,rtx insn)7519 output_ubranch (rtx dest, rtx insn)
7520 {
7521 static char string[64];
7522 bool v9_form = false;
7523 int delta;
7524 char *p;
7525
7526 /* Even if we are trying to use cbcond for this, evaluate
7527 whether we can use V9 branches as our backup plan. */
7528
7529 delta = 5000000;
7530 if (INSN_ADDRESSES_SET_P ())
7531 delta = (INSN_ADDRESSES (INSN_UID (dest))
7532 - INSN_ADDRESSES (INSN_UID (insn)));
7533
7534 /* Leave some instructions for "slop". */
7535 if (TARGET_V9 && delta >= -260000 && delta < 260000)
7536 v9_form = true;
7537
7538 if (TARGET_CBCOND)
7539 {
7540 bool emit_nop = emit_cbcond_nop (insn);
7541 bool far = false;
7542 const char *rval;
7543
7544 if (delta < -500 || delta > 500)
7545 far = true;
7546
7547 if (far)
7548 {
7549 if (v9_form)
7550 rval = "ba,a,pt\t%%xcc, %l0";
7551 else
7552 rval = "b,a\t%l0";
7553 }
7554 else
7555 {
7556 if (emit_nop)
7557 rval = "cwbe\t%%g0, %%g0, %l0\n\tnop";
7558 else
7559 rval = "cwbe\t%%g0, %%g0, %l0";
7560 }
7561 return rval;
7562 }
7563
7564 if (v9_form)
7565 strcpy (string, "ba%*,pt\t%%xcc, ");
7566 else
7567 strcpy (string, "b%*\t");
7568
7569 p = strchr (string, '\0');
7570 *p++ = '%';
7571 *p++ = 'l';
7572 *p++ = '0';
7573 *p++ = '%';
7574 *p++ = '(';
7575 *p = '\0';
7576
7577 return string;
7578 }
7579
7580 /* Return the string to output a conditional branch to LABEL, which is
7581 the operand number of the label. OP is the conditional expression.
7582 XEXP (OP, 0) is assumed to be a condition code register (integer or
7583 floating point) and its mode specifies what kind of comparison we made.
7584
7585 DEST is the destination insn (i.e. the label), INSN is the source.
7586
7587 REVERSED is nonzero if we should reverse the sense of the comparison.
7588
7589 ANNUL is nonzero if we should generate an annulling branch. */
7590
7591 const char *
output_cbranch(rtx op,rtx dest,int label,int reversed,int annul,rtx insn)7592 output_cbranch (rtx op, rtx dest, int label, int reversed, int annul,
7593 rtx insn)
7594 {
7595 static char string[64];
7596 enum rtx_code code = GET_CODE (op);
7597 rtx cc_reg = XEXP (op, 0);
7598 enum machine_mode mode = GET_MODE (cc_reg);
7599 const char *labelno, *branch;
7600 int spaces = 8, far;
7601 char *p;
7602
7603 /* v9 branches are limited to +-1MB. If it is too far away,
7604 change
7605
7606 bne,pt %xcc, .LC30
7607
7608 to
7609
7610 be,pn %xcc, .+12
7611 nop
7612 ba .LC30
7613
7614 and
7615
7616 fbne,a,pn %fcc2, .LC29
7617
7618 to
7619
7620 fbe,pt %fcc2, .+16
7621 nop
7622 ba .LC29 */
7623
7624 far = TARGET_V9 && (get_attr_length (insn) >= 3);
7625 if (reversed ^ far)
7626 {
7627 /* Reversal of FP compares takes care -- an ordered compare
7628 becomes an unordered compare and vice versa. */
7629 if (mode == CCFPmode || mode == CCFPEmode)
7630 code = reverse_condition_maybe_unordered (code);
7631 else
7632 code = reverse_condition (code);
7633 }
7634
7635 /* Start by writing the branch condition. */
7636 if (mode == CCFPmode || mode == CCFPEmode)
7637 {
7638 switch (code)
7639 {
7640 case NE:
7641 branch = "fbne";
7642 break;
7643 case EQ:
7644 branch = "fbe";
7645 break;
7646 case GE:
7647 branch = "fbge";
7648 break;
7649 case GT:
7650 branch = "fbg";
7651 break;
7652 case LE:
7653 branch = "fble";
7654 break;
7655 case LT:
7656 branch = "fbl";
7657 break;
7658 case UNORDERED:
7659 branch = "fbu";
7660 break;
7661 case ORDERED:
7662 branch = "fbo";
7663 break;
7664 case UNGT:
7665 branch = "fbug";
7666 break;
7667 case UNLT:
7668 branch = "fbul";
7669 break;
7670 case UNEQ:
7671 branch = "fbue";
7672 break;
7673 case UNGE:
7674 branch = "fbuge";
7675 break;
7676 case UNLE:
7677 branch = "fbule";
7678 break;
7679 case LTGT:
7680 branch = "fblg";
7681 break;
7682
7683 default:
7684 gcc_unreachable ();
7685 }
7686
7687 /* ??? !v9: FP branches cannot be preceded by another floating point
7688 insn. Because there is currently no concept of pre-delay slots,
7689 we can fix this only by always emitting a nop before a floating
7690 point branch. */
7691
7692 string[0] = '\0';
7693 if (! TARGET_V9)
7694 strcpy (string, "nop\n\t");
7695 strcat (string, branch);
7696 }
7697 else
7698 {
7699 switch (code)
7700 {
7701 case NE:
7702 branch = "bne";
7703 break;
7704 case EQ:
7705 branch = "be";
7706 break;
7707 case GE:
7708 if (mode == CC_NOOVmode || mode == CCX_NOOVmode)
7709 branch = "bpos";
7710 else
7711 branch = "bge";
7712 break;
7713 case GT:
7714 branch = "bg";
7715 break;
7716 case LE:
7717 branch = "ble";
7718 break;
7719 case LT:
7720 if (mode == CC_NOOVmode || mode == CCX_NOOVmode)
7721 branch = "bneg";
7722 else
7723 branch = "bl";
7724 break;
7725 case GEU:
7726 branch = "bgeu";
7727 break;
7728 case GTU:
7729 branch = "bgu";
7730 break;
7731 case LEU:
7732 branch = "bleu";
7733 break;
7734 case LTU:
7735 branch = "blu";
7736 break;
7737
7738 default:
7739 gcc_unreachable ();
7740 }
7741 strcpy (string, branch);
7742 }
7743 spaces -= strlen (branch);
7744 p = strchr (string, '\0');
7745
7746 /* Now add the annulling, the label, and a possible noop. */
7747 if (annul && ! far)
7748 {
7749 strcpy (p, ",a");
7750 p += 2;
7751 spaces -= 2;
7752 }
7753
7754 if (TARGET_V9)
7755 {
7756 rtx note;
7757 int v8 = 0;
7758
7759 if (! far && insn && INSN_ADDRESSES_SET_P ())
7760 {
7761 int delta = (INSN_ADDRESSES (INSN_UID (dest))
7762 - INSN_ADDRESSES (INSN_UID (insn)));
7763 /* Leave some instructions for "slop". */
7764 if (delta < -260000 || delta >= 260000)
7765 v8 = 1;
7766 }
7767
7768 if (mode == CCFPmode || mode == CCFPEmode)
7769 {
7770 static char v9_fcc_labelno[] = "%%fccX, ";
7771 /* Set the char indicating the number of the fcc reg to use. */
7772 v9_fcc_labelno[5] = REGNO (cc_reg) - SPARC_FIRST_V9_FCC_REG + '0';
7773 labelno = v9_fcc_labelno;
7774 if (v8)
7775 {
7776 gcc_assert (REGNO (cc_reg) == SPARC_FCC_REG);
7777 labelno = "";
7778 }
7779 }
7780 else if (mode == CCXmode || mode == CCX_NOOVmode)
7781 {
7782 labelno = "%%xcc, ";
7783 gcc_assert (! v8);
7784 }
7785 else
7786 {
7787 labelno = "%%icc, ";
7788 if (v8)
7789 labelno = "";
7790 }
7791
7792 if (*labelno && insn && (note = find_reg_note (insn, REG_BR_PROB, NULL_RTX)))
7793 {
7794 strcpy (p,
7795 ((INTVAL (XEXP (note, 0)) >= REG_BR_PROB_BASE / 2) ^ far)
7796 ? ",pt" : ",pn");
7797 p += 3;
7798 spaces -= 3;
7799 }
7800 }
7801 else
7802 labelno = "";
7803
7804 if (spaces > 0)
7805 *p++ = '\t';
7806 else
7807 *p++ = ' ';
7808 strcpy (p, labelno);
7809 p = strchr (p, '\0');
7810 if (far)
7811 {
7812 strcpy (p, ".+12\n\t nop\n\tb\t");
7813 /* Skip the next insn if requested or
7814 if we know that it will be a nop. */
7815 if (annul || ! final_sequence)
7816 p[3] = '6';
7817 p += 14;
7818 }
7819 *p++ = '%';
7820 *p++ = 'l';
7821 *p++ = label + '0';
7822 *p++ = '%';
7823 *p++ = '#';
7824 *p = '\0';
7825
7826 return string;
7827 }
7828
7829 /* Emit a library call comparison between floating point X and Y.
7830 COMPARISON is the operator to compare with (EQ, NE, GT, etc).
7831 Return the new operator to be used in the comparison sequence.
7832
7833 TARGET_ARCH64 uses _Qp_* functions, which use pointers to TFmode
7834 values as arguments instead of the TFmode registers themselves,
7835 that's why we cannot call emit_float_lib_cmp. */
7836
7837 rtx
sparc_emit_float_lib_cmp(rtx x,rtx y,enum rtx_code comparison)7838 sparc_emit_float_lib_cmp (rtx x, rtx y, enum rtx_code comparison)
7839 {
7840 const char *qpfunc;
7841 rtx slot0, slot1, result, tem, tem2, libfunc;
7842 enum machine_mode mode;
7843 enum rtx_code new_comparison;
7844
7845 switch (comparison)
7846 {
7847 case EQ:
7848 qpfunc = (TARGET_ARCH64 ? "_Qp_feq" : "_Q_feq");
7849 break;
7850
7851 case NE:
7852 qpfunc = (TARGET_ARCH64 ? "_Qp_fne" : "_Q_fne");
7853 break;
7854
7855 case GT:
7856 qpfunc = (TARGET_ARCH64 ? "_Qp_fgt" : "_Q_fgt");
7857 break;
7858
7859 case GE:
7860 qpfunc = (TARGET_ARCH64 ? "_Qp_fge" : "_Q_fge");
7861 break;
7862
7863 case LT:
7864 qpfunc = (TARGET_ARCH64 ? "_Qp_flt" : "_Q_flt");
7865 break;
7866
7867 case LE:
7868 qpfunc = (TARGET_ARCH64 ? "_Qp_fle" : "_Q_fle");
7869 break;
7870
7871 case ORDERED:
7872 case UNORDERED:
7873 case UNGT:
7874 case UNLT:
7875 case UNEQ:
7876 case UNGE:
7877 case UNLE:
7878 case LTGT:
7879 qpfunc = (TARGET_ARCH64 ? "_Qp_cmp" : "_Q_cmp");
7880 break;
7881
7882 default:
7883 gcc_unreachable ();
7884 }
7885
7886 if (TARGET_ARCH64)
7887 {
7888 if (MEM_P (x))
7889 {
7890 tree expr = MEM_EXPR (x);
7891 if (expr)
7892 mark_addressable (expr);
7893 slot0 = x;
7894 }
7895 else
7896 {
7897 slot0 = assign_stack_temp (TFmode, GET_MODE_SIZE(TFmode));
7898 emit_move_insn (slot0, x);
7899 }
7900
7901 if (MEM_P (y))
7902 {
7903 tree expr = MEM_EXPR (y);
7904 if (expr)
7905 mark_addressable (expr);
7906 slot1 = y;
7907 }
7908 else
7909 {
7910 slot1 = assign_stack_temp (TFmode, GET_MODE_SIZE(TFmode));
7911 emit_move_insn (slot1, y);
7912 }
7913
7914 libfunc = gen_rtx_SYMBOL_REF (Pmode, qpfunc);
7915 emit_library_call (libfunc, LCT_NORMAL,
7916 DImode, 2,
7917 XEXP (slot0, 0), Pmode,
7918 XEXP (slot1, 0), Pmode);
7919 mode = DImode;
7920 }
7921 else
7922 {
7923 libfunc = gen_rtx_SYMBOL_REF (Pmode, qpfunc);
7924 emit_library_call (libfunc, LCT_NORMAL,
7925 SImode, 2,
7926 x, TFmode, y, TFmode);
7927 mode = SImode;
7928 }
7929
7930
7931 /* Immediately move the result of the libcall into a pseudo
7932 register so reload doesn't clobber the value if it needs
7933 the return register for a spill reg. */
7934 result = gen_reg_rtx (mode);
7935 emit_move_insn (result, hard_libcall_value (mode, libfunc));
7936
7937 switch (comparison)
7938 {
7939 default:
7940 return gen_rtx_NE (VOIDmode, result, const0_rtx);
7941 case ORDERED:
7942 case UNORDERED:
7943 new_comparison = (comparison == UNORDERED ? EQ : NE);
7944 return gen_rtx_fmt_ee (new_comparison, VOIDmode, result, GEN_INT(3));
7945 case UNGT:
7946 case UNGE:
7947 new_comparison = (comparison == UNGT ? GT : NE);
7948 return gen_rtx_fmt_ee (new_comparison, VOIDmode, result, const1_rtx);
7949 case UNLE:
7950 return gen_rtx_NE (VOIDmode, result, const2_rtx);
7951 case UNLT:
7952 tem = gen_reg_rtx (mode);
7953 if (TARGET_ARCH32)
7954 emit_insn (gen_andsi3 (tem, result, const1_rtx));
7955 else
7956 emit_insn (gen_anddi3 (tem, result, const1_rtx));
7957 return gen_rtx_NE (VOIDmode, tem, const0_rtx);
7958 case UNEQ:
7959 case LTGT:
7960 tem = gen_reg_rtx (mode);
7961 if (TARGET_ARCH32)
7962 emit_insn (gen_addsi3 (tem, result, const1_rtx));
7963 else
7964 emit_insn (gen_adddi3 (tem, result, const1_rtx));
7965 tem2 = gen_reg_rtx (mode);
7966 if (TARGET_ARCH32)
7967 emit_insn (gen_andsi3 (tem2, tem, const2_rtx));
7968 else
7969 emit_insn (gen_anddi3 (tem2, tem, const2_rtx));
7970 new_comparison = (comparison == UNEQ ? EQ : NE);
7971 return gen_rtx_fmt_ee (new_comparison, VOIDmode, tem2, const0_rtx);
7972 }
7973
7974 gcc_unreachable ();
7975 }
7976
7977 /* Generate an unsigned DImode to FP conversion. This is the same code
7978 optabs would emit if we didn't have TFmode patterns. */
7979
7980 void
sparc_emit_floatunsdi(rtx * operands,enum machine_mode mode)7981 sparc_emit_floatunsdi (rtx *operands, enum machine_mode mode)
7982 {
7983 rtx neglab, donelab, i0, i1, f0, in, out;
7984
7985 out = operands[0];
7986 in = force_reg (DImode, operands[1]);
7987 neglab = gen_label_rtx ();
7988 donelab = gen_label_rtx ();
7989 i0 = gen_reg_rtx (DImode);
7990 i1 = gen_reg_rtx (DImode);
7991 f0 = gen_reg_rtx (mode);
7992
7993 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, DImode, 0, neglab);
7994
7995 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
7996 emit_jump_insn (gen_jump (donelab));
7997 emit_barrier ();
7998
7999 emit_label (neglab);
8000
8001 emit_insn (gen_lshrdi3 (i0, in, const1_rtx));
8002 emit_insn (gen_anddi3 (i1, in, const1_rtx));
8003 emit_insn (gen_iordi3 (i0, i0, i1));
8004 emit_insn (gen_rtx_SET (VOIDmode, f0, gen_rtx_FLOAT (mode, i0)));
8005 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
8006
8007 emit_label (donelab);
8008 }
8009
8010 /* Generate an FP to unsigned DImode conversion. This is the same code
8011 optabs would emit if we didn't have TFmode patterns. */
8012
8013 void
sparc_emit_fixunsdi(rtx * operands,enum machine_mode mode)8014 sparc_emit_fixunsdi (rtx *operands, enum machine_mode mode)
8015 {
8016 rtx neglab, donelab, i0, i1, f0, in, out, limit;
8017
8018 out = operands[0];
8019 in = force_reg (mode, operands[1]);
8020 neglab = gen_label_rtx ();
8021 donelab = gen_label_rtx ();
8022 i0 = gen_reg_rtx (DImode);
8023 i1 = gen_reg_rtx (DImode);
8024 limit = gen_reg_rtx (mode);
8025 f0 = gen_reg_rtx (mode);
8026
8027 emit_move_insn (limit,
8028 CONST_DOUBLE_FROM_REAL_VALUE (
8029 REAL_VALUE_ATOF ("9223372036854775808.0", mode), mode));
8030 emit_cmp_and_jump_insns (in, limit, GE, NULL_RTX, mode, 0, neglab);
8031
8032 emit_insn (gen_rtx_SET (VOIDmode,
8033 out,
8034 gen_rtx_FIX (DImode, gen_rtx_FIX (mode, in))));
8035 emit_jump_insn (gen_jump (donelab));
8036 emit_barrier ();
8037
8038 emit_label (neglab);
8039
8040 emit_insn (gen_rtx_SET (VOIDmode, f0, gen_rtx_MINUS (mode, in, limit)));
8041 emit_insn (gen_rtx_SET (VOIDmode,
8042 i0,
8043 gen_rtx_FIX (DImode, gen_rtx_FIX (mode, f0))));
8044 emit_insn (gen_movdi (i1, const1_rtx));
8045 emit_insn (gen_ashldi3 (i1, i1, GEN_INT (63)));
8046 emit_insn (gen_xordi3 (out, i0, i1));
8047
8048 emit_label (donelab);
8049 }
8050
8051 /* Return the string to output a compare and branch instruction to DEST.
8052 DEST is the destination insn (i.e. the label), INSN is the source,
8053 and OP is the conditional expression. */
8054
8055 const char *
output_cbcond(rtx op,rtx dest,rtx insn)8056 output_cbcond (rtx op, rtx dest, rtx insn)
8057 {
8058 enum machine_mode mode = GET_MODE (XEXP (op, 0));
8059 enum rtx_code code = GET_CODE (op);
8060 const char *cond_str, *tmpl;
8061 int far, emit_nop, len;
8062 static char string[64];
8063 char size_char;
8064
8065 /* Compare and Branch is limited to +-2KB. If it is too far away,
8066 change
8067
8068 cxbne X, Y, .LC30
8069
8070 to
8071
8072 cxbe X, Y, .+16
8073 nop
8074 ba,pt xcc, .LC30
8075 nop */
8076
8077 len = get_attr_length (insn);
8078
8079 far = len == 4;
8080 emit_nop = len == 2;
8081
8082 if (far)
8083 code = reverse_condition (code);
8084
8085 size_char = ((mode == SImode) ? 'w' : 'x');
8086
8087 switch (code)
8088 {
8089 case NE:
8090 cond_str = "ne";
8091 break;
8092
8093 case EQ:
8094 cond_str = "e";
8095 break;
8096
8097 case GE:
8098 if (mode == CC_NOOVmode || mode == CCX_NOOVmode)
8099 cond_str = "pos";
8100 else
8101 cond_str = "ge";
8102 break;
8103
8104 case GT:
8105 cond_str = "g";
8106 break;
8107
8108 case LE:
8109 cond_str = "le";
8110 break;
8111
8112 case LT:
8113 if (mode == CC_NOOVmode || mode == CCX_NOOVmode)
8114 cond_str = "neg";
8115 else
8116 cond_str = "l";
8117 break;
8118
8119 case GEU:
8120 cond_str = "cc";
8121 break;
8122
8123 case GTU:
8124 cond_str = "gu";
8125 break;
8126
8127 case LEU:
8128 cond_str = "leu";
8129 break;
8130
8131 case LTU:
8132 cond_str = "cs";
8133 break;
8134
8135 default:
8136 gcc_unreachable ();
8137 }
8138
8139 if (far)
8140 {
8141 int veryfar = 1, delta;
8142
8143 if (INSN_ADDRESSES_SET_P ())
8144 {
8145 delta = (INSN_ADDRESSES (INSN_UID (dest))
8146 - INSN_ADDRESSES (INSN_UID (insn)));
8147 /* Leave some instructions for "slop". */
8148 if (delta >= -260000 && delta < 260000)
8149 veryfar = 0;
8150 }
8151
8152 if (veryfar)
8153 tmpl = "c%cb%s\t%%1, %%2, .+16\n\tnop\n\tb\t%%3\n\tnop";
8154 else
8155 tmpl = "c%cb%s\t%%1, %%2, .+16\n\tnop\n\tba,pt\t%%%%xcc, %%3\n\tnop";
8156 }
8157 else
8158 {
8159 if (emit_nop)
8160 tmpl = "c%cb%s\t%%1, %%2, %%3\n\tnop";
8161 else
8162 tmpl = "c%cb%s\t%%1, %%2, %%3";
8163 }
8164
8165 snprintf (string, sizeof(string), tmpl, size_char, cond_str);
8166
8167 return string;
8168 }
8169
8170 /* Return the string to output a conditional branch to LABEL, testing
8171 register REG. LABEL is the operand number of the label; REG is the
8172 operand number of the reg. OP is the conditional expression. The mode
8173 of REG says what kind of comparison we made.
8174
8175 DEST is the destination insn (i.e. the label), INSN is the source.
8176
8177 REVERSED is nonzero if we should reverse the sense of the comparison.
8178
8179 ANNUL is nonzero if we should generate an annulling branch. */
8180
8181 const char *
output_v9branch(rtx op,rtx dest,int reg,int label,int reversed,int annul,rtx insn)8182 output_v9branch (rtx op, rtx dest, int reg, int label, int reversed,
8183 int annul, rtx insn)
8184 {
8185 static char string[64];
8186 enum rtx_code code = GET_CODE (op);
8187 enum machine_mode mode = GET_MODE (XEXP (op, 0));
8188 rtx note;
8189 int far;
8190 char *p;
8191
8192 /* branch on register are limited to +-128KB. If it is too far away,
8193 change
8194
8195 brnz,pt %g1, .LC30
8196
8197 to
8198
8199 brz,pn %g1, .+12
8200 nop
8201 ba,pt %xcc, .LC30
8202
8203 and
8204
8205 brgez,a,pn %o1, .LC29
8206
8207 to
8208
8209 brlz,pt %o1, .+16
8210 nop
8211 ba,pt %xcc, .LC29 */
8212
8213 far = get_attr_length (insn) >= 3;
8214
8215 /* If not floating-point or if EQ or NE, we can just reverse the code. */
8216 if (reversed ^ far)
8217 code = reverse_condition (code);
8218
8219 /* Only 64 bit versions of these instructions exist. */
8220 gcc_assert (mode == DImode);
8221
8222 /* Start by writing the branch condition. */
8223
8224 switch (code)
8225 {
8226 case NE:
8227 strcpy (string, "brnz");
8228 break;
8229
8230 case EQ:
8231 strcpy (string, "brz");
8232 break;
8233
8234 case GE:
8235 strcpy (string, "brgez");
8236 break;
8237
8238 case LT:
8239 strcpy (string, "brlz");
8240 break;
8241
8242 case LE:
8243 strcpy (string, "brlez");
8244 break;
8245
8246 case GT:
8247 strcpy (string, "brgz");
8248 break;
8249
8250 default:
8251 gcc_unreachable ();
8252 }
8253
8254 p = strchr (string, '\0');
8255
8256 /* Now add the annulling, reg, label, and nop. */
8257 if (annul && ! far)
8258 {
8259 strcpy (p, ",a");
8260 p += 2;
8261 }
8262
8263 if (insn && (note = find_reg_note (insn, REG_BR_PROB, NULL_RTX)))
8264 {
8265 strcpy (p,
8266 ((INTVAL (XEXP (note, 0)) >= REG_BR_PROB_BASE / 2) ^ far)
8267 ? ",pt" : ",pn");
8268 p += 3;
8269 }
8270
8271 *p = p < string + 8 ? '\t' : ' ';
8272 p++;
8273 *p++ = '%';
8274 *p++ = '0' + reg;
8275 *p++ = ',';
8276 *p++ = ' ';
8277 if (far)
8278 {
8279 int veryfar = 1, delta;
8280
8281 if (INSN_ADDRESSES_SET_P ())
8282 {
8283 delta = (INSN_ADDRESSES (INSN_UID (dest))
8284 - INSN_ADDRESSES (INSN_UID (insn)));
8285 /* Leave some instructions for "slop". */
8286 if (delta >= -260000 && delta < 260000)
8287 veryfar = 0;
8288 }
8289
8290 strcpy (p, ".+12\n\t nop\n\t");
8291 /* Skip the next insn if requested or
8292 if we know that it will be a nop. */
8293 if (annul || ! final_sequence)
8294 p[3] = '6';
8295 p += 12;
8296 if (veryfar)
8297 {
8298 strcpy (p, "b\t");
8299 p += 2;
8300 }
8301 else
8302 {
8303 strcpy (p, "ba,pt\t%%xcc, ");
8304 p += 13;
8305 }
8306 }
8307 *p++ = '%';
8308 *p++ = 'l';
8309 *p++ = '0' + label;
8310 *p++ = '%';
8311 *p++ = '#';
8312 *p = '\0';
8313
8314 return string;
8315 }
8316
8317 /* Return 1, if any of the registers of the instruction are %l[0-7] or %o[0-7].
8318 Such instructions cannot be used in the delay slot of return insn on v9.
8319 If TEST is 0, also rename all %i[0-7] registers to their %o[0-7] counterparts.
8320 */
8321
8322 static int
epilogue_renumber(register rtx * where,int test)8323 epilogue_renumber (register rtx *where, int test)
8324 {
8325 register const char *fmt;
8326 register int i;
8327 register enum rtx_code code;
8328
8329 if (*where == 0)
8330 return 0;
8331
8332 code = GET_CODE (*where);
8333
8334 switch (code)
8335 {
8336 case REG:
8337 if (REGNO (*where) >= 8 && REGNO (*where) < 24) /* oX or lX */
8338 return 1;
8339 if (! test && REGNO (*where) >= 24 && REGNO (*where) < 32)
8340 *where = gen_rtx_REG (GET_MODE (*where), OUTGOING_REGNO (REGNO(*where)));
8341 case SCRATCH:
8342 case CC0:
8343 case PC:
8344 case CONST_INT:
8345 case CONST_DOUBLE:
8346 return 0;
8347
8348 /* Do not replace the frame pointer with the stack pointer because
8349 it can cause the delayed instruction to load below the stack.
8350 This occurs when instructions like:
8351
8352 (set (reg/i:SI 24 %i0)
8353 (mem/f:SI (plus:SI (reg/f:SI 30 %fp)
8354 (const_int -20 [0xffffffec])) 0))
8355
8356 are in the return delayed slot. */
8357 case PLUS:
8358 if (GET_CODE (XEXP (*where, 0)) == REG
8359 && REGNO (XEXP (*where, 0)) == HARD_FRAME_POINTER_REGNUM
8360 && (GET_CODE (XEXP (*where, 1)) != CONST_INT
8361 || INTVAL (XEXP (*where, 1)) < SPARC_STACK_BIAS))
8362 return 1;
8363 break;
8364
8365 case MEM:
8366 if (SPARC_STACK_BIAS
8367 && GET_CODE (XEXP (*where, 0)) == REG
8368 && REGNO (XEXP (*where, 0)) == HARD_FRAME_POINTER_REGNUM)
8369 return 1;
8370 break;
8371
8372 default:
8373 break;
8374 }
8375
8376 fmt = GET_RTX_FORMAT (code);
8377
8378 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
8379 {
8380 if (fmt[i] == 'E')
8381 {
8382 register int j;
8383 for (j = XVECLEN (*where, i) - 1; j >= 0; j--)
8384 if (epilogue_renumber (&(XVECEXP (*where, i, j)), test))
8385 return 1;
8386 }
8387 else if (fmt[i] == 'e'
8388 && epilogue_renumber (&(XEXP (*where, i)), test))
8389 return 1;
8390 }
8391 return 0;
8392 }
8393
8394 /* Leaf functions and non-leaf functions have different needs. */
8395
8396 static const int
8397 reg_leaf_alloc_order[] = REG_LEAF_ALLOC_ORDER;
8398
8399 static const int
8400 reg_nonleaf_alloc_order[] = REG_ALLOC_ORDER;
8401
8402 static const int *const reg_alloc_orders[] = {
8403 reg_leaf_alloc_order,
8404 reg_nonleaf_alloc_order};
8405
8406 void
order_regs_for_local_alloc(void)8407 order_regs_for_local_alloc (void)
8408 {
8409 static int last_order_nonleaf = 1;
8410
8411 if (df_regs_ever_live_p (15) != last_order_nonleaf)
8412 {
8413 last_order_nonleaf = !last_order_nonleaf;
8414 memcpy ((char *) reg_alloc_order,
8415 (const char *) reg_alloc_orders[last_order_nonleaf],
8416 FIRST_PSEUDO_REGISTER * sizeof (int));
8417 }
8418 }
8419
8420 /* Return 1 if REG and MEM are legitimate enough to allow the various
8421 mem<-->reg splits to be run. */
8422
8423 int
sparc_splitdi_legitimate(rtx reg,rtx mem)8424 sparc_splitdi_legitimate (rtx reg, rtx mem)
8425 {
8426 /* Punt if we are here by mistake. */
8427 gcc_assert (reload_completed);
8428
8429 /* We must have an offsettable memory reference. */
8430 if (! offsettable_memref_p (mem))
8431 return 0;
8432
8433 /* If we have legitimate args for ldd/std, we do not want
8434 the split to happen. */
8435 if ((REGNO (reg) % 2) == 0
8436 && mem_min_alignment (mem, 8))
8437 return 0;
8438
8439 /* Success. */
8440 return 1;
8441 }
8442
8443 /* Like sparc_splitdi_legitimate but for REG <--> REG moves. */
8444
8445 int
sparc_split_regreg_legitimate(rtx reg1,rtx reg2)8446 sparc_split_regreg_legitimate (rtx reg1, rtx reg2)
8447 {
8448 int regno1, regno2;
8449
8450 if (GET_CODE (reg1) == SUBREG)
8451 reg1 = SUBREG_REG (reg1);
8452 if (GET_CODE (reg1) != REG)
8453 return 0;
8454 regno1 = REGNO (reg1);
8455
8456 if (GET_CODE (reg2) == SUBREG)
8457 reg2 = SUBREG_REG (reg2);
8458 if (GET_CODE (reg2) != REG)
8459 return 0;
8460 regno2 = REGNO (reg2);
8461
8462 if (SPARC_INT_REG_P (regno1) && SPARC_INT_REG_P (regno2))
8463 return 1;
8464
8465 if (TARGET_VIS3)
8466 {
8467 if ((SPARC_INT_REG_P (regno1) && SPARC_FP_REG_P (regno2))
8468 || (SPARC_FP_REG_P (regno1) && SPARC_INT_REG_P (regno2)))
8469 return 1;
8470 }
8471
8472 return 0;
8473 }
8474
8475 /* Return 1 if REGNO (reg1) is even and REGNO (reg1) == REGNO (reg2) - 1.
8476 This makes them candidates for using ldd and std insns.
8477
8478 Note reg1 and reg2 *must* be hard registers. */
8479
8480 int
registers_ok_for_ldd_peep(rtx reg1,rtx reg2)8481 registers_ok_for_ldd_peep (rtx reg1, rtx reg2)
8482 {
8483 /* We might have been passed a SUBREG. */
8484 if (GET_CODE (reg1) != REG || GET_CODE (reg2) != REG)
8485 return 0;
8486
8487 if (REGNO (reg1) % 2 != 0)
8488 return 0;
8489
8490 /* Integer ldd is deprecated in SPARC V9 */
8491 if (TARGET_V9 && SPARC_INT_REG_P (REGNO (reg1)))
8492 return 0;
8493
8494 return (REGNO (reg1) == REGNO (reg2) - 1);
8495 }
8496
8497 /* Return 1 if the addresses in mem1 and mem2 are suitable for use in
8498 an ldd or std insn.
8499
8500 This can only happen when addr1 and addr2, the addresses in mem1
8501 and mem2, are consecutive memory locations (addr1 + 4 == addr2).
8502 addr1 must also be aligned on a 64-bit boundary.
8503
8504 Also iff dependent_reg_rtx is not null it should not be used to
8505 compute the address for mem1, i.e. we cannot optimize a sequence
8506 like:
8507 ld [%o0], %o0
8508 ld [%o0 + 4], %o1
8509 to
8510 ldd [%o0], %o0
8511 nor:
8512 ld [%g3 + 4], %g3
8513 ld [%g3], %g2
8514 to
8515 ldd [%g3], %g2
8516
8517 But, note that the transformation from:
8518 ld [%g2 + 4], %g3
8519 ld [%g2], %g2
8520 to
8521 ldd [%g2], %g2
8522 is perfectly fine. Thus, the peephole2 patterns always pass us
8523 the destination register of the first load, never the second one.
8524
8525 For stores we don't have a similar problem, so dependent_reg_rtx is
8526 NULL_RTX. */
8527
8528 int
mems_ok_for_ldd_peep(rtx mem1,rtx mem2,rtx dependent_reg_rtx)8529 mems_ok_for_ldd_peep (rtx mem1, rtx mem2, rtx dependent_reg_rtx)
8530 {
8531 rtx addr1, addr2;
8532 unsigned int reg1;
8533 HOST_WIDE_INT offset1;
8534
8535 /* The mems cannot be volatile. */
8536 if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2))
8537 return 0;
8538
8539 /* MEM1 should be aligned on a 64-bit boundary. */
8540 if (MEM_ALIGN (mem1) < 64)
8541 return 0;
8542
8543 addr1 = XEXP (mem1, 0);
8544 addr2 = XEXP (mem2, 0);
8545
8546 /* Extract a register number and offset (if used) from the first addr. */
8547 if (GET_CODE (addr1) == PLUS)
8548 {
8549 /* If not a REG, return zero. */
8550 if (GET_CODE (XEXP (addr1, 0)) != REG)
8551 return 0;
8552 else
8553 {
8554 reg1 = REGNO (XEXP (addr1, 0));
8555 /* The offset must be constant! */
8556 if (GET_CODE (XEXP (addr1, 1)) != CONST_INT)
8557 return 0;
8558 offset1 = INTVAL (XEXP (addr1, 1));
8559 }
8560 }
8561 else if (GET_CODE (addr1) != REG)
8562 return 0;
8563 else
8564 {
8565 reg1 = REGNO (addr1);
8566 /* This was a simple (mem (reg)) expression. Offset is 0. */
8567 offset1 = 0;
8568 }
8569
8570 /* Make sure the second address is a (mem (plus (reg) (const_int). */
8571 if (GET_CODE (addr2) != PLUS)
8572 return 0;
8573
8574 if (GET_CODE (XEXP (addr2, 0)) != REG
8575 || GET_CODE (XEXP (addr2, 1)) != CONST_INT)
8576 return 0;
8577
8578 if (reg1 != REGNO (XEXP (addr2, 0)))
8579 return 0;
8580
8581 if (dependent_reg_rtx != NULL_RTX && reg1 == REGNO (dependent_reg_rtx))
8582 return 0;
8583
8584 /* The first offset must be evenly divisible by 8 to ensure the
8585 address is 64 bit aligned. */
8586 if (offset1 % 8 != 0)
8587 return 0;
8588
8589 /* The offset for the second addr must be 4 more than the first addr. */
8590 if (INTVAL (XEXP (addr2, 1)) != offset1 + 4)
8591 return 0;
8592
8593 /* All the tests passed. addr1 and addr2 are valid for ldd and std
8594 instructions. */
8595 return 1;
8596 }
8597
8598 /* Return 1 if reg is a pseudo, or is the first register in
8599 a hard register pair. This makes it suitable for use in
8600 ldd and std insns. */
8601
8602 int
register_ok_for_ldd(rtx reg)8603 register_ok_for_ldd (rtx reg)
8604 {
8605 /* We might have been passed a SUBREG. */
8606 if (!REG_P (reg))
8607 return 0;
8608
8609 if (REGNO (reg) < FIRST_PSEUDO_REGISTER)
8610 return (REGNO (reg) % 2 == 0);
8611
8612 return 1;
8613 }
8614
8615 /* Return 1 if OP, a MEM, has an address which is known to be
8616 aligned to an 8-byte boundary. */
8617
8618 int
memory_ok_for_ldd(rtx op)8619 memory_ok_for_ldd (rtx op)
8620 {
8621 /* In 64-bit mode, we assume that the address is word-aligned. */
8622 if (TARGET_ARCH32 && !mem_min_alignment (op, 8))
8623 return 0;
8624
8625 if (! can_create_pseudo_p ()
8626 && !strict_memory_address_p (Pmode, XEXP (op, 0)))
8627 return 0;
8628
8629 return 1;
8630 }
8631
8632 /* Implement TARGET_PRINT_OPERAND_PUNCT_VALID_P. */
8633
8634 static bool
sparc_print_operand_punct_valid_p(unsigned char code)8635 sparc_print_operand_punct_valid_p (unsigned char code)
8636 {
8637 if (code == '#'
8638 || code == '*'
8639 || code == '('
8640 || code == ')'
8641 || code == '_'
8642 || code == '&')
8643 return true;
8644
8645 return false;
8646 }
8647
8648 /* Implement TARGET_PRINT_OPERAND.
8649 Print operand X (an rtx) in assembler syntax to file FILE.
8650 CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
8651 For `%' followed by punctuation, CODE is the punctuation and X is null. */
8652
8653 static void
sparc_print_operand(FILE * file,rtx x,int code)8654 sparc_print_operand (FILE *file, rtx x, int code)
8655 {
8656 switch (code)
8657 {
8658 case '#':
8659 /* Output an insn in a delay slot. */
8660 if (final_sequence)
8661 sparc_indent_opcode = 1;
8662 else
8663 fputs ("\n\t nop", file);
8664 return;
8665 case '*':
8666 /* Output an annul flag if there's nothing for the delay slot and we
8667 are optimizing. This is always used with '(' below.
8668 Sun OS 4.1.1 dbx can't handle an annulled unconditional branch;
8669 this is a dbx bug. So, we only do this when optimizing.
8670 On UltraSPARC, a branch in a delay slot causes a pipeline flush.
8671 Always emit a nop in case the next instruction is a branch. */
8672 if (! final_sequence && (optimize && (int)sparc_cpu < PROCESSOR_V9))
8673 fputs (",a", file);
8674 return;
8675 case '(':
8676 /* Output a 'nop' if there's nothing for the delay slot and we are
8677 not optimizing. This is always used with '*' above. */
8678 if (! final_sequence && ! (optimize && (int)sparc_cpu < PROCESSOR_V9))
8679 fputs ("\n\t nop", file);
8680 else if (final_sequence)
8681 sparc_indent_opcode = 1;
8682 return;
8683 case ')':
8684 /* Output the right displacement from the saved PC on function return.
8685 The caller may have placed an "unimp" insn immediately after the call
8686 so we have to account for it. This insn is used in the 32-bit ABI
8687 when calling a function that returns a non zero-sized structure. The
8688 64-bit ABI doesn't have it. Be careful to have this test be the same
8689 as that for the call. The exception is when sparc_std_struct_return
8690 is enabled, the psABI is followed exactly and the adjustment is made
8691 by the code in sparc_struct_value_rtx. The call emitted is the same
8692 when sparc_std_struct_return is enabled. */
8693 if (!TARGET_ARCH64
8694 && cfun->returns_struct
8695 && !sparc_std_struct_return
8696 && DECL_SIZE (DECL_RESULT (current_function_decl))
8697 && TREE_CODE (DECL_SIZE (DECL_RESULT (current_function_decl)))
8698 == INTEGER_CST
8699 && !integer_zerop (DECL_SIZE (DECL_RESULT (current_function_decl))))
8700 fputs ("12", file);
8701 else
8702 fputc ('8', file);
8703 return;
8704 case '_':
8705 /* Output the Embedded Medium/Anywhere code model base register. */
8706 fputs (EMBMEDANY_BASE_REG, file);
8707 return;
8708 case '&':
8709 /* Print some local dynamic TLS name. */
8710 assemble_name (file, get_some_local_dynamic_name ());
8711 return;
8712
8713 case 'Y':
8714 /* Adjust the operand to take into account a RESTORE operation. */
8715 if (GET_CODE (x) == CONST_INT)
8716 break;
8717 else if (GET_CODE (x) != REG)
8718 output_operand_lossage ("invalid %%Y operand");
8719 else if (REGNO (x) < 8)
8720 fputs (reg_names[REGNO (x)], file);
8721 else if (REGNO (x) >= 24 && REGNO (x) < 32)
8722 fputs (reg_names[REGNO (x)-16], file);
8723 else
8724 output_operand_lossage ("invalid %%Y operand");
8725 return;
8726 case 'L':
8727 /* Print out the low order register name of a register pair. */
8728 if (WORDS_BIG_ENDIAN)
8729 fputs (reg_names[REGNO (x)+1], file);
8730 else
8731 fputs (reg_names[REGNO (x)], file);
8732 return;
8733 case 'H':
8734 /* Print out the high order register name of a register pair. */
8735 if (WORDS_BIG_ENDIAN)
8736 fputs (reg_names[REGNO (x)], file);
8737 else
8738 fputs (reg_names[REGNO (x)+1], file);
8739 return;
8740 case 'R':
8741 /* Print out the second register name of a register pair or quad.
8742 I.e., R (%o0) => %o1. */
8743 fputs (reg_names[REGNO (x)+1], file);
8744 return;
8745 case 'S':
8746 /* Print out the third register name of a register quad.
8747 I.e., S (%o0) => %o2. */
8748 fputs (reg_names[REGNO (x)+2], file);
8749 return;
8750 case 'T':
8751 /* Print out the fourth register name of a register quad.
8752 I.e., T (%o0) => %o3. */
8753 fputs (reg_names[REGNO (x)+3], file);
8754 return;
8755 case 'x':
8756 /* Print a condition code register. */
8757 if (REGNO (x) == SPARC_ICC_REG)
8758 {
8759 /* We don't handle CC[X]_NOOVmode because they're not supposed
8760 to occur here. */
8761 if (GET_MODE (x) == CCmode)
8762 fputs ("%icc", file);
8763 else if (GET_MODE (x) == CCXmode)
8764 fputs ("%xcc", file);
8765 else
8766 gcc_unreachable ();
8767 }
8768 else
8769 /* %fccN register */
8770 fputs (reg_names[REGNO (x)], file);
8771 return;
8772 case 'm':
8773 /* Print the operand's address only. */
8774 output_address (XEXP (x, 0));
8775 return;
8776 case 'r':
8777 /* In this case we need a register. Use %g0 if the
8778 operand is const0_rtx. */
8779 if (x == const0_rtx
8780 || (GET_MODE (x) != VOIDmode && x == CONST0_RTX (GET_MODE (x))))
8781 {
8782 fputs ("%g0", file);
8783 return;
8784 }
8785 else
8786 break;
8787
8788 case 'A':
8789 switch (GET_CODE (x))
8790 {
8791 case IOR: fputs ("or", file); break;
8792 case AND: fputs ("and", file); break;
8793 case XOR: fputs ("xor", file); break;
8794 default: output_operand_lossage ("invalid %%A operand");
8795 }
8796 return;
8797
8798 case 'B':
8799 switch (GET_CODE (x))
8800 {
8801 case IOR: fputs ("orn", file); break;
8802 case AND: fputs ("andn", file); break;
8803 case XOR: fputs ("xnor", file); break;
8804 default: output_operand_lossage ("invalid %%B operand");
8805 }
8806 return;
8807
8808 /* This is used by the conditional move instructions. */
8809 case 'C':
8810 {
8811 enum rtx_code rc = GET_CODE (x);
8812
8813 switch (rc)
8814 {
8815 case NE: fputs ("ne", file); break;
8816 case EQ: fputs ("e", file); break;
8817 case GE: fputs ("ge", file); break;
8818 case GT: fputs ("g", file); break;
8819 case LE: fputs ("le", file); break;
8820 case LT: fputs ("l", file); break;
8821 case GEU: fputs ("geu", file); break;
8822 case GTU: fputs ("gu", file); break;
8823 case LEU: fputs ("leu", file); break;
8824 case LTU: fputs ("lu", file); break;
8825 case LTGT: fputs ("lg", file); break;
8826 case UNORDERED: fputs ("u", file); break;
8827 case ORDERED: fputs ("o", file); break;
8828 case UNLT: fputs ("ul", file); break;
8829 case UNLE: fputs ("ule", file); break;
8830 case UNGT: fputs ("ug", file); break;
8831 case UNGE: fputs ("uge", file); break;
8832 case UNEQ: fputs ("ue", file); break;
8833 default: output_operand_lossage ("invalid %%C operand");
8834 }
8835 return;
8836 }
8837
8838 /* This are used by the movr instruction pattern. */
8839 case 'D':
8840 {
8841 enum rtx_code rc = GET_CODE (x);
8842 switch (rc)
8843 {
8844 case NE: fputs ("ne", file); break;
8845 case EQ: fputs ("e", file); break;
8846 case GE: fputs ("gez", file); break;
8847 case LT: fputs ("lz", file); break;
8848 case LE: fputs ("lez", file); break;
8849 case GT: fputs ("gz", file); break;
8850 default: output_operand_lossage ("invalid %%D operand");
8851 }
8852 return;
8853 }
8854
8855 case 'b':
8856 {
8857 /* Print a sign-extended character. */
8858 int i = trunc_int_for_mode (INTVAL (x), QImode);
8859 fprintf (file, "%d", i);
8860 return;
8861 }
8862
8863 case 'f':
8864 /* Operand must be a MEM; write its address. */
8865 if (GET_CODE (x) != MEM)
8866 output_operand_lossage ("invalid %%f operand");
8867 output_address (XEXP (x, 0));
8868 return;
8869
8870 case 's':
8871 {
8872 /* Print a sign-extended 32-bit value. */
8873 HOST_WIDE_INT i;
8874 if (GET_CODE(x) == CONST_INT)
8875 i = INTVAL (x);
8876 else if (GET_CODE(x) == CONST_DOUBLE)
8877 i = CONST_DOUBLE_LOW (x);
8878 else
8879 {
8880 output_operand_lossage ("invalid %%s operand");
8881 return;
8882 }
8883 i = trunc_int_for_mode (i, SImode);
8884 fprintf (file, HOST_WIDE_INT_PRINT_DEC, i);
8885 return;
8886 }
8887
8888 case 0:
8889 /* Do nothing special. */
8890 break;
8891
8892 default:
8893 /* Undocumented flag. */
8894 output_operand_lossage ("invalid operand output code");
8895 }
8896
8897 if (GET_CODE (x) == REG)
8898 fputs (reg_names[REGNO (x)], file);
8899 else if (GET_CODE (x) == MEM)
8900 {
8901 fputc ('[', file);
8902 /* Poor Sun assembler doesn't understand absolute addressing. */
8903 if (CONSTANT_P (XEXP (x, 0)))
8904 fputs ("%g0+", file);
8905 output_address (XEXP (x, 0));
8906 fputc (']', file);
8907 }
8908 else if (GET_CODE (x) == HIGH)
8909 {
8910 fputs ("%hi(", file);
8911 output_addr_const (file, XEXP (x, 0));
8912 fputc (')', file);
8913 }
8914 else if (GET_CODE (x) == LO_SUM)
8915 {
8916 sparc_print_operand (file, XEXP (x, 0), 0);
8917 if (TARGET_CM_MEDMID)
8918 fputs ("+%l44(", file);
8919 else
8920 fputs ("+%lo(", file);
8921 output_addr_const (file, XEXP (x, 1));
8922 fputc (')', file);
8923 }
8924 else if (GET_CODE (x) == CONST_DOUBLE
8925 && (GET_MODE (x) == VOIDmode
8926 || GET_MODE_CLASS (GET_MODE (x)) == MODE_INT))
8927 {
8928 if (CONST_DOUBLE_HIGH (x) == 0)
8929 fprintf (file, "%u", (unsigned int) CONST_DOUBLE_LOW (x));
8930 else if (CONST_DOUBLE_HIGH (x) == -1
8931 && CONST_DOUBLE_LOW (x) < 0)
8932 fprintf (file, "%d", (int) CONST_DOUBLE_LOW (x));
8933 else
8934 output_operand_lossage ("long long constant not a valid immediate operand");
8935 }
8936 else if (GET_CODE (x) == CONST_DOUBLE)
8937 output_operand_lossage ("floating point constant not a valid immediate operand");
8938 else { output_addr_const (file, x); }
8939 }
8940
8941 /* Implement TARGET_PRINT_OPERAND_ADDRESS. */
8942
8943 static void
sparc_print_operand_address(FILE * file,rtx x)8944 sparc_print_operand_address (FILE *file, rtx x)
8945 {
8946 register rtx base, index = 0;
8947 int offset = 0;
8948 register rtx addr = x;
8949
8950 if (REG_P (addr))
8951 fputs (reg_names[REGNO (addr)], file);
8952 else if (GET_CODE (addr) == PLUS)
8953 {
8954 if (CONST_INT_P (XEXP (addr, 0)))
8955 offset = INTVAL (XEXP (addr, 0)), base = XEXP (addr, 1);
8956 else if (CONST_INT_P (XEXP (addr, 1)))
8957 offset = INTVAL (XEXP (addr, 1)), base = XEXP (addr, 0);
8958 else
8959 base = XEXP (addr, 0), index = XEXP (addr, 1);
8960 if (GET_CODE (base) == LO_SUM)
8961 {
8962 gcc_assert (USE_AS_OFFSETABLE_LO10
8963 && TARGET_ARCH64
8964 && ! TARGET_CM_MEDMID);
8965 output_operand (XEXP (base, 0), 0);
8966 fputs ("+%lo(", file);
8967 output_address (XEXP (base, 1));
8968 fprintf (file, ")+%d", offset);
8969 }
8970 else
8971 {
8972 fputs (reg_names[REGNO (base)], file);
8973 if (index == 0)
8974 fprintf (file, "%+d", offset);
8975 else if (REG_P (index))
8976 fprintf (file, "+%s", reg_names[REGNO (index)]);
8977 else if (GET_CODE (index) == SYMBOL_REF
8978 || GET_CODE (index) == LABEL_REF
8979 || GET_CODE (index) == CONST)
8980 fputc ('+', file), output_addr_const (file, index);
8981 else gcc_unreachable ();
8982 }
8983 }
8984 else if (GET_CODE (addr) == MINUS
8985 && GET_CODE (XEXP (addr, 1)) == LABEL_REF)
8986 {
8987 output_addr_const (file, XEXP (addr, 0));
8988 fputs ("-(", file);
8989 output_addr_const (file, XEXP (addr, 1));
8990 fputs ("-.)", file);
8991 }
8992 else if (GET_CODE (addr) == LO_SUM)
8993 {
8994 output_operand (XEXP (addr, 0), 0);
8995 if (TARGET_CM_MEDMID)
8996 fputs ("+%l44(", file);
8997 else
8998 fputs ("+%lo(", file);
8999 output_address (XEXP (addr, 1));
9000 fputc (')', file);
9001 }
9002 else if (flag_pic
9003 && GET_CODE (addr) == CONST
9004 && GET_CODE (XEXP (addr, 0)) == MINUS
9005 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST
9006 && GET_CODE (XEXP (XEXP (XEXP (addr, 0), 1), 0)) == MINUS
9007 && XEXP (XEXP (XEXP (XEXP (addr, 0), 1), 0), 1) == pc_rtx)
9008 {
9009 addr = XEXP (addr, 0);
9010 output_addr_const (file, XEXP (addr, 0));
9011 /* Group the args of the second CONST in parenthesis. */
9012 fputs ("-(", file);
9013 /* Skip past the second CONST--it does nothing for us. */
9014 output_addr_const (file, XEXP (XEXP (addr, 1), 0));
9015 /* Close the parenthesis. */
9016 fputc (')', file);
9017 }
9018 else
9019 {
9020 output_addr_const (file, addr);
9021 }
9022 }
9023
9024 /* Target hook for assembling integer objects. The sparc version has
9025 special handling for aligned DI-mode objects. */
9026
9027 static bool
sparc_assemble_integer(rtx x,unsigned int size,int aligned_p)9028 sparc_assemble_integer (rtx x, unsigned int size, int aligned_p)
9029 {
9030 /* ??? We only output .xword's for symbols and only then in environments
9031 where the assembler can handle them. */
9032 if (aligned_p && size == 8
9033 && (GET_CODE (x) != CONST_INT && GET_CODE (x) != CONST_DOUBLE))
9034 {
9035 if (TARGET_V9)
9036 {
9037 assemble_integer_with_op ("\t.xword\t", x);
9038 return true;
9039 }
9040 else
9041 {
9042 assemble_aligned_integer (4, const0_rtx);
9043 assemble_aligned_integer (4, x);
9044 return true;
9045 }
9046 }
9047 return default_assemble_integer (x, size, aligned_p);
9048 }
9049
9050 /* Return the value of a code used in the .proc pseudo-op that says
9051 what kind of result this function returns. For non-C types, we pick
9052 the closest C type. */
9053
9054 #ifndef SHORT_TYPE_SIZE
9055 #define SHORT_TYPE_SIZE (BITS_PER_UNIT * 2)
9056 #endif
9057
9058 #ifndef INT_TYPE_SIZE
9059 #define INT_TYPE_SIZE BITS_PER_WORD
9060 #endif
9061
9062 #ifndef LONG_TYPE_SIZE
9063 #define LONG_TYPE_SIZE BITS_PER_WORD
9064 #endif
9065
9066 #ifndef LONG_LONG_TYPE_SIZE
9067 #define LONG_LONG_TYPE_SIZE (BITS_PER_WORD * 2)
9068 #endif
9069
9070 #ifndef FLOAT_TYPE_SIZE
9071 #define FLOAT_TYPE_SIZE BITS_PER_WORD
9072 #endif
9073
9074 #ifndef DOUBLE_TYPE_SIZE
9075 #define DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2)
9076 #endif
9077
9078 #ifndef LONG_DOUBLE_TYPE_SIZE
9079 #define LONG_DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2)
9080 #endif
9081
9082 unsigned long
sparc_type_code(register tree type)9083 sparc_type_code (register tree type)
9084 {
9085 register unsigned long qualifiers = 0;
9086 register unsigned shift;
9087
9088 /* Only the first 30 bits of the qualifier are valid. We must refrain from
9089 setting more, since some assemblers will give an error for this. Also,
9090 we must be careful to avoid shifts of 32 bits or more to avoid getting
9091 unpredictable results. */
9092
9093 for (shift = 6; shift < 30; shift += 2, type = TREE_TYPE (type))
9094 {
9095 switch (TREE_CODE (type))
9096 {
9097 case ERROR_MARK:
9098 return qualifiers;
9099
9100 case ARRAY_TYPE:
9101 qualifiers |= (3 << shift);
9102 break;
9103
9104 case FUNCTION_TYPE:
9105 case METHOD_TYPE:
9106 qualifiers |= (2 << shift);
9107 break;
9108
9109 case POINTER_TYPE:
9110 case REFERENCE_TYPE:
9111 case OFFSET_TYPE:
9112 qualifiers |= (1 << shift);
9113 break;
9114
9115 case RECORD_TYPE:
9116 return (qualifiers | 8);
9117
9118 case UNION_TYPE:
9119 case QUAL_UNION_TYPE:
9120 return (qualifiers | 9);
9121
9122 case ENUMERAL_TYPE:
9123 return (qualifiers | 10);
9124
9125 case VOID_TYPE:
9126 return (qualifiers | 16);
9127
9128 case INTEGER_TYPE:
9129 /* If this is a range type, consider it to be the underlying
9130 type. */
9131 if (TREE_TYPE (type) != 0)
9132 break;
9133
9134 /* Carefully distinguish all the standard types of C,
9135 without messing up if the language is not C. We do this by
9136 testing TYPE_PRECISION and TYPE_UNSIGNED. The old code used to
9137 look at both the names and the above fields, but that's redundant.
9138 Any type whose size is between two C types will be considered
9139 to be the wider of the two types. Also, we do not have a
9140 special code to use for "long long", so anything wider than
9141 long is treated the same. Note that we can't distinguish
9142 between "int" and "long" in this code if they are the same
9143 size, but that's fine, since neither can the assembler. */
9144
9145 if (TYPE_PRECISION (type) <= CHAR_TYPE_SIZE)
9146 return (qualifiers | (TYPE_UNSIGNED (type) ? 12 : 2));
9147
9148 else if (TYPE_PRECISION (type) <= SHORT_TYPE_SIZE)
9149 return (qualifiers | (TYPE_UNSIGNED (type) ? 13 : 3));
9150
9151 else if (TYPE_PRECISION (type) <= INT_TYPE_SIZE)
9152 return (qualifiers | (TYPE_UNSIGNED (type) ? 14 : 4));
9153
9154 else
9155 return (qualifiers | (TYPE_UNSIGNED (type) ? 15 : 5));
9156
9157 case REAL_TYPE:
9158 /* If this is a range type, consider it to be the underlying
9159 type. */
9160 if (TREE_TYPE (type) != 0)
9161 break;
9162
9163 /* Carefully distinguish all the standard types of C,
9164 without messing up if the language is not C. */
9165
9166 if (TYPE_PRECISION (type) == FLOAT_TYPE_SIZE)
9167 return (qualifiers | 6);
9168
9169 else
9170 return (qualifiers | 7);
9171
9172 case COMPLEX_TYPE: /* GNU Fortran COMPLEX type. */
9173 /* ??? We need to distinguish between double and float complex types,
9174 but I don't know how yet because I can't reach this code from
9175 existing front-ends. */
9176 return (qualifiers | 7); /* Who knows? */
9177
9178 case VECTOR_TYPE:
9179 case BOOLEAN_TYPE: /* Boolean truth value type. */
9180 case LANG_TYPE:
9181 case NULLPTR_TYPE:
9182 return qualifiers;
9183
9184 default:
9185 gcc_unreachable (); /* Not a type! */
9186 }
9187 }
9188
9189 return qualifiers;
9190 }
9191
9192 /* Nested function support. */
9193
9194 /* Emit RTL insns to initialize the variable parts of a trampoline.
9195 FNADDR is an RTX for the address of the function's pure code.
9196 CXT is an RTX for the static chain value for the function.
9197
9198 This takes 16 insns: 2 shifts & 2 ands (to split up addresses), 4 sethi
9199 (to load in opcodes), 4 iors (to merge address and opcodes), and 4 writes
9200 (to store insns). This is a bit excessive. Perhaps a different
9201 mechanism would be better here.
9202
9203 Emit enough FLUSH insns to synchronize the data and instruction caches. */
9204
9205 static void
sparc32_initialize_trampoline(rtx m_tramp,rtx fnaddr,rtx cxt)9206 sparc32_initialize_trampoline (rtx m_tramp, rtx fnaddr, rtx cxt)
9207 {
9208 /* SPARC 32-bit trampoline:
9209
9210 sethi %hi(fn), %g1
9211 sethi %hi(static), %g2
9212 jmp %g1+%lo(fn)
9213 or %g2, %lo(static), %g2
9214
9215 SETHI i,r = 00rr rrr1 00ii iiii iiii iiii iiii iiii
9216 JMPL r+i,d = 10dd ddd1 1100 0rrr rr1i iiii iiii iiii
9217 */
9218
9219 emit_move_insn
9220 (adjust_address (m_tramp, SImode, 0),
9221 expand_binop (SImode, ior_optab,
9222 expand_shift (RSHIFT_EXPR, SImode, fnaddr, 10, 0, 1),
9223 GEN_INT (trunc_int_for_mode (0x03000000, SImode)),
9224 NULL_RTX, 1, OPTAB_DIRECT));
9225
9226 emit_move_insn
9227 (adjust_address (m_tramp, SImode, 4),
9228 expand_binop (SImode, ior_optab,
9229 expand_shift (RSHIFT_EXPR, SImode, cxt, 10, 0, 1),
9230 GEN_INT (trunc_int_for_mode (0x05000000, SImode)),
9231 NULL_RTX, 1, OPTAB_DIRECT));
9232
9233 emit_move_insn
9234 (adjust_address (m_tramp, SImode, 8),
9235 expand_binop (SImode, ior_optab,
9236 expand_and (SImode, fnaddr, GEN_INT (0x3ff), NULL_RTX),
9237 GEN_INT (trunc_int_for_mode (0x81c06000, SImode)),
9238 NULL_RTX, 1, OPTAB_DIRECT));
9239
9240 emit_move_insn
9241 (adjust_address (m_tramp, SImode, 12),
9242 expand_binop (SImode, ior_optab,
9243 expand_and (SImode, cxt, GEN_INT (0x3ff), NULL_RTX),
9244 GEN_INT (trunc_int_for_mode (0x8410a000, SImode)),
9245 NULL_RTX, 1, OPTAB_DIRECT));
9246
9247 /* On UltraSPARC a flush flushes an entire cache line. The trampoline is
9248 aligned on a 16 byte boundary so one flush clears it all. */
9249 emit_insn (gen_flush (validize_mem (adjust_address (m_tramp, SImode, 0))));
9250 if (sparc_cpu != PROCESSOR_ULTRASPARC
9251 && sparc_cpu != PROCESSOR_ULTRASPARC3
9252 && sparc_cpu != PROCESSOR_NIAGARA
9253 && sparc_cpu != PROCESSOR_NIAGARA2
9254 && sparc_cpu != PROCESSOR_NIAGARA3
9255 && sparc_cpu != PROCESSOR_NIAGARA4)
9256 emit_insn (gen_flush (validize_mem (adjust_address (m_tramp, SImode, 8))));
9257
9258 /* Call __enable_execute_stack after writing onto the stack to make sure
9259 the stack address is accessible. */
9260 #ifdef HAVE_ENABLE_EXECUTE_STACK
9261 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
9262 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
9263 #endif
9264
9265 }
9266
9267 /* The 64-bit version is simpler because it makes more sense to load the
9268 values as "immediate" data out of the trampoline. It's also easier since
9269 we can read the PC without clobbering a register. */
9270
9271 static void
sparc64_initialize_trampoline(rtx m_tramp,rtx fnaddr,rtx cxt)9272 sparc64_initialize_trampoline (rtx m_tramp, rtx fnaddr, rtx cxt)
9273 {
9274 /* SPARC 64-bit trampoline:
9275
9276 rd %pc, %g1
9277 ldx [%g1+24], %g5
9278 jmp %g5
9279 ldx [%g1+16], %g5
9280 +16 bytes data
9281 */
9282
9283 emit_move_insn (adjust_address (m_tramp, SImode, 0),
9284 GEN_INT (trunc_int_for_mode (0x83414000, SImode)));
9285 emit_move_insn (adjust_address (m_tramp, SImode, 4),
9286 GEN_INT (trunc_int_for_mode (0xca586018, SImode)));
9287 emit_move_insn (adjust_address (m_tramp, SImode, 8),
9288 GEN_INT (trunc_int_for_mode (0x81c14000, SImode)));
9289 emit_move_insn (adjust_address (m_tramp, SImode, 12),
9290 GEN_INT (trunc_int_for_mode (0xca586010, SImode)));
9291 emit_move_insn (adjust_address (m_tramp, DImode, 16), cxt);
9292 emit_move_insn (adjust_address (m_tramp, DImode, 24), fnaddr);
9293 emit_insn (gen_flushdi (validize_mem (adjust_address (m_tramp, DImode, 0))));
9294
9295 if (sparc_cpu != PROCESSOR_ULTRASPARC
9296 && sparc_cpu != PROCESSOR_ULTRASPARC3
9297 && sparc_cpu != PROCESSOR_NIAGARA
9298 && sparc_cpu != PROCESSOR_NIAGARA2
9299 && sparc_cpu != PROCESSOR_NIAGARA3
9300 && sparc_cpu != PROCESSOR_NIAGARA4)
9301 emit_insn (gen_flushdi (validize_mem (adjust_address (m_tramp, DImode, 8))));
9302
9303 /* Call __enable_execute_stack after writing onto the stack to make sure
9304 the stack address is accessible. */
9305 #ifdef HAVE_ENABLE_EXECUTE_STACK
9306 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
9307 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
9308 #endif
9309 }
9310
9311 /* Worker for TARGET_TRAMPOLINE_INIT. */
9312
9313 static void
sparc_trampoline_init(rtx m_tramp,tree fndecl,rtx cxt)9314 sparc_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
9315 {
9316 rtx fnaddr = force_reg (Pmode, XEXP (DECL_RTL (fndecl), 0));
9317 cxt = force_reg (Pmode, cxt);
9318 if (TARGET_ARCH64)
9319 sparc64_initialize_trampoline (m_tramp, fnaddr, cxt);
9320 else
9321 sparc32_initialize_trampoline (m_tramp, fnaddr, cxt);
9322 }
9323
9324 /* Adjust the cost of a scheduling dependency. Return the new cost of
9325 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
9326
9327 static int
supersparc_adjust_cost(rtx insn,rtx link,rtx dep_insn,int cost)9328 supersparc_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
9329 {
9330 enum attr_type insn_type;
9331
9332 if (! recog_memoized (insn))
9333 return 0;
9334
9335 insn_type = get_attr_type (insn);
9336
9337 if (REG_NOTE_KIND (link) == 0)
9338 {
9339 /* Data dependency; DEP_INSN writes a register that INSN reads some
9340 cycles later. */
9341
9342 /* if a load, then the dependence must be on the memory address;
9343 add an extra "cycle". Note that the cost could be two cycles
9344 if the reg was written late in an instruction group; we ca not tell
9345 here. */
9346 if (insn_type == TYPE_LOAD || insn_type == TYPE_FPLOAD)
9347 return cost + 3;
9348
9349 /* Get the delay only if the address of the store is the dependence. */
9350 if (insn_type == TYPE_STORE || insn_type == TYPE_FPSTORE)
9351 {
9352 rtx pat = PATTERN(insn);
9353 rtx dep_pat = PATTERN (dep_insn);
9354
9355 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
9356 return cost; /* This should not happen! */
9357
9358 /* The dependency between the two instructions was on the data that
9359 is being stored. Assume that this implies that the address of the
9360 store is not dependent. */
9361 if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
9362 return cost;
9363
9364 return cost + 3; /* An approximation. */
9365 }
9366
9367 /* A shift instruction cannot receive its data from an instruction
9368 in the same cycle; add a one cycle penalty. */
9369 if (insn_type == TYPE_SHIFT)
9370 return cost + 3; /* Split before cascade into shift. */
9371 }
9372 else
9373 {
9374 /* Anti- or output- dependency; DEP_INSN reads/writes a register that
9375 INSN writes some cycles later. */
9376
9377 /* These are only significant for the fpu unit; writing a fp reg before
9378 the fpu has finished with it stalls the processor. */
9379
9380 /* Reusing an integer register causes no problems. */
9381 if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT)
9382 return 0;
9383 }
9384
9385 return cost;
9386 }
9387
9388 static int
hypersparc_adjust_cost(rtx insn,rtx link,rtx dep_insn,int cost)9389 hypersparc_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
9390 {
9391 enum attr_type insn_type, dep_type;
9392 rtx pat = PATTERN(insn);
9393 rtx dep_pat = PATTERN (dep_insn);
9394
9395 if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
9396 return cost;
9397
9398 insn_type = get_attr_type (insn);
9399 dep_type = get_attr_type (dep_insn);
9400
9401 switch (REG_NOTE_KIND (link))
9402 {
9403 case 0:
9404 /* Data dependency; DEP_INSN writes a register that INSN reads some
9405 cycles later. */
9406
9407 switch (insn_type)
9408 {
9409 case TYPE_STORE:
9410 case TYPE_FPSTORE:
9411 /* Get the delay iff the address of the store is the dependence. */
9412 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
9413 return cost;
9414
9415 if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
9416 return cost;
9417 return cost + 3;
9418
9419 case TYPE_LOAD:
9420 case TYPE_SLOAD:
9421 case TYPE_FPLOAD:
9422 /* If a load, then the dependence must be on the memory address. If
9423 the addresses aren't equal, then it might be a false dependency */
9424 if (dep_type == TYPE_STORE || dep_type == TYPE_FPSTORE)
9425 {
9426 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET
9427 || GET_CODE (SET_DEST (dep_pat)) != MEM
9428 || GET_CODE (SET_SRC (pat)) != MEM
9429 || ! rtx_equal_p (XEXP (SET_DEST (dep_pat), 0),
9430 XEXP (SET_SRC (pat), 0)))
9431 return cost + 2;
9432
9433 return cost + 8;
9434 }
9435 break;
9436
9437 case TYPE_BRANCH:
9438 /* Compare to branch latency is 0. There is no benefit from
9439 separating compare and branch. */
9440 if (dep_type == TYPE_COMPARE)
9441 return 0;
9442 /* Floating point compare to branch latency is less than
9443 compare to conditional move. */
9444 if (dep_type == TYPE_FPCMP)
9445 return cost - 1;
9446 break;
9447 default:
9448 break;
9449 }
9450 break;
9451
9452 case REG_DEP_ANTI:
9453 /* Anti-dependencies only penalize the fpu unit. */
9454 if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT)
9455 return 0;
9456 break;
9457
9458 default:
9459 break;
9460 }
9461
9462 return cost;
9463 }
9464
9465 static int
sparc_adjust_cost(rtx insn,rtx link,rtx dep,int cost)9466 sparc_adjust_cost(rtx insn, rtx link, rtx dep, int cost)
9467 {
9468 switch (sparc_cpu)
9469 {
9470 case PROCESSOR_SUPERSPARC:
9471 cost = supersparc_adjust_cost (insn, link, dep, cost);
9472 break;
9473 case PROCESSOR_HYPERSPARC:
9474 case PROCESSOR_SPARCLITE86X:
9475 cost = hypersparc_adjust_cost (insn, link, dep, cost);
9476 break;
9477 default:
9478 break;
9479 }
9480 return cost;
9481 }
9482
9483 static void
sparc_sched_init(FILE * dump ATTRIBUTE_UNUSED,int sched_verbose ATTRIBUTE_UNUSED,int max_ready ATTRIBUTE_UNUSED)9484 sparc_sched_init (FILE *dump ATTRIBUTE_UNUSED,
9485 int sched_verbose ATTRIBUTE_UNUSED,
9486 int max_ready ATTRIBUTE_UNUSED)
9487 {}
9488
9489 static int
sparc_use_sched_lookahead(void)9490 sparc_use_sched_lookahead (void)
9491 {
9492 if (sparc_cpu == PROCESSOR_NIAGARA
9493 || sparc_cpu == PROCESSOR_NIAGARA2
9494 || sparc_cpu == PROCESSOR_NIAGARA3)
9495 return 0;
9496 if (sparc_cpu == PROCESSOR_NIAGARA4)
9497 return 2;
9498 if (sparc_cpu == PROCESSOR_ULTRASPARC
9499 || sparc_cpu == PROCESSOR_ULTRASPARC3)
9500 return 4;
9501 if ((1 << sparc_cpu) &
9502 ((1 << PROCESSOR_SUPERSPARC) | (1 << PROCESSOR_HYPERSPARC) |
9503 (1 << PROCESSOR_SPARCLITE86X)))
9504 return 3;
9505 return 0;
9506 }
9507
9508 static int
sparc_issue_rate(void)9509 sparc_issue_rate (void)
9510 {
9511 switch (sparc_cpu)
9512 {
9513 case PROCESSOR_NIAGARA:
9514 case PROCESSOR_NIAGARA2:
9515 case PROCESSOR_NIAGARA3:
9516 default:
9517 return 1;
9518 case PROCESSOR_NIAGARA4:
9519 case PROCESSOR_V9:
9520 /* Assume V9 processors are capable of at least dual-issue. */
9521 return 2;
9522 case PROCESSOR_SUPERSPARC:
9523 return 3;
9524 case PROCESSOR_HYPERSPARC:
9525 case PROCESSOR_SPARCLITE86X:
9526 return 2;
9527 case PROCESSOR_ULTRASPARC:
9528 case PROCESSOR_ULTRASPARC3:
9529 return 4;
9530 }
9531 }
9532
9533 static int
set_extends(rtx insn)9534 set_extends (rtx insn)
9535 {
9536 register rtx pat = PATTERN (insn);
9537
9538 switch (GET_CODE (SET_SRC (pat)))
9539 {
9540 /* Load and some shift instructions zero extend. */
9541 case MEM:
9542 case ZERO_EXTEND:
9543 /* sethi clears the high bits */
9544 case HIGH:
9545 /* LO_SUM is used with sethi. sethi cleared the high
9546 bits and the values used with lo_sum are positive */
9547 case LO_SUM:
9548 /* Store flag stores 0 or 1 */
9549 case LT: case LTU:
9550 case GT: case GTU:
9551 case LE: case LEU:
9552 case GE: case GEU:
9553 case EQ:
9554 case NE:
9555 return 1;
9556 case AND:
9557 {
9558 rtx op0 = XEXP (SET_SRC (pat), 0);
9559 rtx op1 = XEXP (SET_SRC (pat), 1);
9560 if (GET_CODE (op1) == CONST_INT)
9561 return INTVAL (op1) >= 0;
9562 if (GET_CODE (op0) != REG)
9563 return 0;
9564 if (sparc_check_64 (op0, insn) == 1)
9565 return 1;
9566 return (GET_CODE (op1) == REG && sparc_check_64 (op1, insn) == 1);
9567 }
9568 case IOR:
9569 case XOR:
9570 {
9571 rtx op0 = XEXP (SET_SRC (pat), 0);
9572 rtx op1 = XEXP (SET_SRC (pat), 1);
9573 if (GET_CODE (op0) != REG || sparc_check_64 (op0, insn) <= 0)
9574 return 0;
9575 if (GET_CODE (op1) == CONST_INT)
9576 return INTVAL (op1) >= 0;
9577 return (GET_CODE (op1) == REG && sparc_check_64 (op1, insn) == 1);
9578 }
9579 case LSHIFTRT:
9580 return GET_MODE (SET_SRC (pat)) == SImode;
9581 /* Positive integers leave the high bits zero. */
9582 case CONST_DOUBLE:
9583 return ! (CONST_DOUBLE_LOW (SET_SRC (pat)) & 0x80000000);
9584 case CONST_INT:
9585 return ! (INTVAL (SET_SRC (pat)) & 0x80000000);
9586 case ASHIFTRT:
9587 case SIGN_EXTEND:
9588 return - (GET_MODE (SET_SRC (pat)) == SImode);
9589 case REG:
9590 return sparc_check_64 (SET_SRC (pat), insn);
9591 default:
9592 return 0;
9593 }
9594 }
9595
9596 /* We _ought_ to have only one kind per function, but... */
9597 static GTY(()) rtx sparc_addr_diff_list;
9598 static GTY(()) rtx sparc_addr_list;
9599
9600 void
sparc_defer_case_vector(rtx lab,rtx vec,int diff)9601 sparc_defer_case_vector (rtx lab, rtx vec, int diff)
9602 {
9603 vec = gen_rtx_EXPR_LIST (VOIDmode, lab, vec);
9604 if (diff)
9605 sparc_addr_diff_list
9606 = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_diff_list);
9607 else
9608 sparc_addr_list = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_list);
9609 }
9610
9611 static void
sparc_output_addr_vec(rtx vec)9612 sparc_output_addr_vec (rtx vec)
9613 {
9614 rtx lab = XEXP (vec, 0), body = XEXP (vec, 1);
9615 int idx, vlen = XVECLEN (body, 0);
9616
9617 #ifdef ASM_OUTPUT_ADDR_VEC_START
9618 ASM_OUTPUT_ADDR_VEC_START (asm_out_file);
9619 #endif
9620
9621 #ifdef ASM_OUTPUT_CASE_LABEL
9622 ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab),
9623 NEXT_INSN (lab));
9624 #else
9625 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
9626 #endif
9627
9628 for (idx = 0; idx < vlen; idx++)
9629 {
9630 ASM_OUTPUT_ADDR_VEC_ELT
9631 (asm_out_file, CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 0, idx), 0)));
9632 }
9633
9634 #ifdef ASM_OUTPUT_ADDR_VEC_END
9635 ASM_OUTPUT_ADDR_VEC_END (asm_out_file);
9636 #endif
9637 }
9638
9639 static void
sparc_output_addr_diff_vec(rtx vec)9640 sparc_output_addr_diff_vec (rtx vec)
9641 {
9642 rtx lab = XEXP (vec, 0), body = XEXP (vec, 1);
9643 rtx base = XEXP (XEXP (body, 0), 0);
9644 int idx, vlen = XVECLEN (body, 1);
9645
9646 #ifdef ASM_OUTPUT_ADDR_VEC_START
9647 ASM_OUTPUT_ADDR_VEC_START (asm_out_file);
9648 #endif
9649
9650 #ifdef ASM_OUTPUT_CASE_LABEL
9651 ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab),
9652 NEXT_INSN (lab));
9653 #else
9654 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
9655 #endif
9656
9657 for (idx = 0; idx < vlen; idx++)
9658 {
9659 ASM_OUTPUT_ADDR_DIFF_ELT
9660 (asm_out_file,
9661 body,
9662 CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 1, idx), 0)),
9663 CODE_LABEL_NUMBER (base));
9664 }
9665
9666 #ifdef ASM_OUTPUT_ADDR_VEC_END
9667 ASM_OUTPUT_ADDR_VEC_END (asm_out_file);
9668 #endif
9669 }
9670
9671 static void
sparc_output_deferred_case_vectors(void)9672 sparc_output_deferred_case_vectors (void)
9673 {
9674 rtx t;
9675 int align;
9676
9677 if (sparc_addr_list == NULL_RTX
9678 && sparc_addr_diff_list == NULL_RTX)
9679 return;
9680
9681 /* Align to cache line in the function's code section. */
9682 switch_to_section (current_function_section ());
9683
9684 align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT);
9685 if (align > 0)
9686 ASM_OUTPUT_ALIGN (asm_out_file, align);
9687
9688 for (t = sparc_addr_list; t ; t = XEXP (t, 1))
9689 sparc_output_addr_vec (XEXP (t, 0));
9690 for (t = sparc_addr_diff_list; t ; t = XEXP (t, 1))
9691 sparc_output_addr_diff_vec (XEXP (t, 0));
9692
9693 sparc_addr_list = sparc_addr_diff_list = NULL_RTX;
9694 }
9695
9696 /* Return 0 if the high 32 bits of X (the low word of X, if DImode) are
9697 unknown. Return 1 if the high bits are zero, -1 if the register is
9698 sign extended. */
9699 int
sparc_check_64(rtx x,rtx insn)9700 sparc_check_64 (rtx x, rtx insn)
9701 {
9702 /* If a register is set only once it is safe to ignore insns this
9703 code does not know how to handle. The loop will either recognize
9704 the single set and return the correct value or fail to recognize
9705 it and return 0. */
9706 int set_once = 0;
9707 rtx y = x;
9708
9709 gcc_assert (GET_CODE (x) == REG);
9710
9711 if (GET_MODE (x) == DImode)
9712 y = gen_rtx_REG (SImode, REGNO (x) + WORDS_BIG_ENDIAN);
9713
9714 if (flag_expensive_optimizations
9715 && df && DF_REG_DEF_COUNT (REGNO (y)) == 1)
9716 set_once = 1;
9717
9718 if (insn == 0)
9719 {
9720 if (set_once)
9721 insn = get_last_insn_anywhere ();
9722 else
9723 return 0;
9724 }
9725
9726 while ((insn = PREV_INSN (insn)))
9727 {
9728 switch (GET_CODE (insn))
9729 {
9730 case JUMP_INSN:
9731 case NOTE:
9732 break;
9733 case CODE_LABEL:
9734 case CALL_INSN:
9735 default:
9736 if (! set_once)
9737 return 0;
9738 break;
9739 case INSN:
9740 {
9741 rtx pat = PATTERN (insn);
9742 if (GET_CODE (pat) != SET)
9743 return 0;
9744 if (rtx_equal_p (x, SET_DEST (pat)))
9745 return set_extends (insn);
9746 if (y && rtx_equal_p (y, SET_DEST (pat)))
9747 return set_extends (insn);
9748 if (reg_overlap_mentioned_p (SET_DEST (pat), y))
9749 return 0;
9750 }
9751 }
9752 }
9753 return 0;
9754 }
9755
9756 /* Output a wide shift instruction in V8+ mode. INSN is the instruction,
9757 OPERANDS are its operands and OPCODE is the mnemonic to be used. */
9758
9759 const char *
output_v8plus_shift(rtx insn,rtx * operands,const char * opcode)9760 output_v8plus_shift (rtx insn, rtx *operands, const char *opcode)
9761 {
9762 static char asm_code[60];
9763
9764 /* The scratch register is only required when the destination
9765 register is not a 64-bit global or out register. */
9766 if (which_alternative != 2)
9767 operands[3] = operands[0];
9768
9769 /* We can only shift by constants <= 63. */
9770 if (GET_CODE (operands[2]) == CONST_INT)
9771 operands[2] = GEN_INT (INTVAL (operands[2]) & 0x3f);
9772
9773 if (GET_CODE (operands[1]) == CONST_INT)
9774 {
9775 output_asm_insn ("mov\t%1, %3", operands);
9776 }
9777 else
9778 {
9779 output_asm_insn ("sllx\t%H1, 32, %3", operands);
9780 if (sparc_check_64 (operands[1], insn) <= 0)
9781 output_asm_insn ("srl\t%L1, 0, %L1", operands);
9782 output_asm_insn ("or\t%L1, %3, %3", operands);
9783 }
9784
9785 strcpy (asm_code, opcode);
9786
9787 if (which_alternative != 2)
9788 return strcat (asm_code, "\t%0, %2, %L0\n\tsrlx\t%L0, 32, %H0");
9789 else
9790 return
9791 strcat (asm_code, "\t%3, %2, %3\n\tsrlx\t%3, 32, %H0\n\tmov\t%3, %L0");
9792 }
9793
9794 /* Output rtl to increment the profiler label LABELNO
9795 for profiling a function entry. */
9796
9797 void
sparc_profile_hook(int labelno)9798 sparc_profile_hook (int labelno)
9799 {
9800 char buf[32];
9801 rtx lab, fun;
9802
9803 fun = gen_rtx_SYMBOL_REF (Pmode, MCOUNT_FUNCTION);
9804 if (NO_PROFILE_COUNTERS)
9805 {
9806 emit_library_call (fun, LCT_NORMAL, VOIDmode, 0);
9807 }
9808 else
9809 {
9810 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
9811 lab = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
9812 emit_library_call (fun, LCT_NORMAL, VOIDmode, 1, lab, Pmode);
9813 }
9814 }
9815
9816 #ifdef TARGET_SOLARIS
9817 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
9818
9819 static void
sparc_solaris_elf_asm_named_section(const char * name,unsigned int flags,tree decl ATTRIBUTE_UNUSED)9820 sparc_solaris_elf_asm_named_section (const char *name, unsigned int flags,
9821 tree decl ATTRIBUTE_UNUSED)
9822 {
9823 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
9824 {
9825 solaris_elf_asm_comdat_section (name, flags, decl);
9826 return;
9827 }
9828
9829 fprintf (asm_out_file, "\t.section\t\"%s\"", name);
9830
9831 if (!(flags & SECTION_DEBUG))
9832 fputs (",#alloc", asm_out_file);
9833 if (flags & SECTION_WRITE)
9834 fputs (",#write", asm_out_file);
9835 if (flags & SECTION_TLS)
9836 fputs (",#tls", asm_out_file);
9837 if (flags & SECTION_CODE)
9838 fputs (",#execinstr", asm_out_file);
9839
9840 /* Sun as only supports #nobits/#progbits since Solaris 10. */
9841 if (HAVE_AS_SPARC_NOBITS)
9842 {
9843 if (flags & SECTION_BSS)
9844 fputs (",#nobits", asm_out_file);
9845 else
9846 fputs (",#progbits", asm_out_file);
9847 }
9848
9849 fputc ('\n', asm_out_file);
9850 }
9851 #endif /* TARGET_SOLARIS */
9852
9853 /* We do not allow indirect calls to be optimized into sibling calls.
9854
9855 We cannot use sibling calls when delayed branches are disabled
9856 because they will likely require the call delay slot to be filled.
9857
9858 Also, on SPARC 32-bit we cannot emit a sibling call when the
9859 current function returns a structure. This is because the "unimp
9860 after call" convention would cause the callee to return to the
9861 wrong place. The generic code already disallows cases where the
9862 function being called returns a structure.
9863
9864 It may seem strange how this last case could occur. Usually there
9865 is code after the call which jumps to epilogue code which dumps the
9866 return value into the struct return area. That ought to invalidate
9867 the sibling call right? Well, in the C++ case we can end up passing
9868 the pointer to the struct return area to a constructor (which returns
9869 void) and then nothing else happens. Such a sibling call would look
9870 valid without the added check here.
9871
9872 VxWorks PIC PLT entries require the global pointer to be initialized
9873 on entry. We therefore can't emit sibling calls to them. */
9874 static bool
sparc_function_ok_for_sibcall(tree decl,tree exp ATTRIBUTE_UNUSED)9875 sparc_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
9876 {
9877 return (decl
9878 && flag_delayed_branch
9879 && (TARGET_ARCH64 || ! cfun->returns_struct)
9880 && !(TARGET_VXWORKS_RTP
9881 && flag_pic
9882 && !targetm.binds_local_p (decl)));
9883 }
9884
9885 /* libfunc renaming. */
9886
9887 static void
sparc_init_libfuncs(void)9888 sparc_init_libfuncs (void)
9889 {
9890 if (TARGET_ARCH32)
9891 {
9892 /* Use the subroutines that Sun's library provides for integer
9893 multiply and divide. The `*' prevents an underscore from
9894 being prepended by the compiler. .umul is a little faster
9895 than .mul. */
9896 set_optab_libfunc (smul_optab, SImode, "*.umul");
9897 set_optab_libfunc (sdiv_optab, SImode, "*.div");
9898 set_optab_libfunc (udiv_optab, SImode, "*.udiv");
9899 set_optab_libfunc (smod_optab, SImode, "*.rem");
9900 set_optab_libfunc (umod_optab, SImode, "*.urem");
9901
9902 /* TFmode arithmetic. These names are part of the SPARC 32bit ABI. */
9903 set_optab_libfunc (add_optab, TFmode, "_Q_add");
9904 set_optab_libfunc (sub_optab, TFmode, "_Q_sub");
9905 set_optab_libfunc (neg_optab, TFmode, "_Q_neg");
9906 set_optab_libfunc (smul_optab, TFmode, "_Q_mul");
9907 set_optab_libfunc (sdiv_optab, TFmode, "_Q_div");
9908
9909 /* We can define the TFmode sqrt optab only if TARGET_FPU. This
9910 is because with soft-float, the SFmode and DFmode sqrt
9911 instructions will be absent, and the compiler will notice and
9912 try to use the TFmode sqrt instruction for calls to the
9913 builtin function sqrt, but this fails. */
9914 if (TARGET_FPU)
9915 set_optab_libfunc (sqrt_optab, TFmode, "_Q_sqrt");
9916
9917 set_optab_libfunc (eq_optab, TFmode, "_Q_feq");
9918 set_optab_libfunc (ne_optab, TFmode, "_Q_fne");
9919 set_optab_libfunc (gt_optab, TFmode, "_Q_fgt");
9920 set_optab_libfunc (ge_optab, TFmode, "_Q_fge");
9921 set_optab_libfunc (lt_optab, TFmode, "_Q_flt");
9922 set_optab_libfunc (le_optab, TFmode, "_Q_fle");
9923
9924 set_conv_libfunc (sext_optab, TFmode, SFmode, "_Q_stoq");
9925 set_conv_libfunc (sext_optab, TFmode, DFmode, "_Q_dtoq");
9926 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_Q_qtos");
9927 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_Q_qtod");
9928
9929 set_conv_libfunc (sfix_optab, SImode, TFmode, "_Q_qtoi");
9930 set_conv_libfunc (ufix_optab, SImode, TFmode, "_Q_qtou");
9931 set_conv_libfunc (sfloat_optab, TFmode, SImode, "_Q_itoq");
9932 set_conv_libfunc (ufloat_optab, TFmode, SImode, "_Q_utoq");
9933
9934 if (DITF_CONVERSION_LIBFUNCS)
9935 {
9936 set_conv_libfunc (sfix_optab, DImode, TFmode, "_Q_qtoll");
9937 set_conv_libfunc (ufix_optab, DImode, TFmode, "_Q_qtoull");
9938 set_conv_libfunc (sfloat_optab, TFmode, DImode, "_Q_lltoq");
9939 set_conv_libfunc (ufloat_optab, TFmode, DImode, "_Q_ulltoq");
9940 }
9941
9942 if (SUN_CONVERSION_LIBFUNCS)
9943 {
9944 set_conv_libfunc (sfix_optab, DImode, SFmode, "__ftoll");
9945 set_conv_libfunc (ufix_optab, DImode, SFmode, "__ftoull");
9946 set_conv_libfunc (sfix_optab, DImode, DFmode, "__dtoll");
9947 set_conv_libfunc (ufix_optab, DImode, DFmode, "__dtoull");
9948 }
9949 }
9950 if (TARGET_ARCH64)
9951 {
9952 /* In the SPARC 64bit ABI, SImode multiply and divide functions
9953 do not exist in the library. Make sure the compiler does not
9954 emit calls to them by accident. (It should always use the
9955 hardware instructions.) */
9956 set_optab_libfunc (smul_optab, SImode, 0);
9957 set_optab_libfunc (sdiv_optab, SImode, 0);
9958 set_optab_libfunc (udiv_optab, SImode, 0);
9959 set_optab_libfunc (smod_optab, SImode, 0);
9960 set_optab_libfunc (umod_optab, SImode, 0);
9961
9962 if (SUN_INTEGER_MULTIPLY_64)
9963 {
9964 set_optab_libfunc (smul_optab, DImode, "__mul64");
9965 set_optab_libfunc (sdiv_optab, DImode, "__div64");
9966 set_optab_libfunc (udiv_optab, DImode, "__udiv64");
9967 set_optab_libfunc (smod_optab, DImode, "__rem64");
9968 set_optab_libfunc (umod_optab, DImode, "__urem64");
9969 }
9970
9971 if (SUN_CONVERSION_LIBFUNCS)
9972 {
9973 set_conv_libfunc (sfix_optab, DImode, SFmode, "__ftol");
9974 set_conv_libfunc (ufix_optab, DImode, SFmode, "__ftoul");
9975 set_conv_libfunc (sfix_optab, DImode, DFmode, "__dtol");
9976 set_conv_libfunc (ufix_optab, DImode, DFmode, "__dtoul");
9977 }
9978 }
9979 }
9980
def_builtin(const char * name,int code,tree type)9981 static tree def_builtin(const char *name, int code, tree type)
9982 {
9983 return add_builtin_function(name, type, code, BUILT_IN_MD, NULL,
9984 NULL_TREE);
9985 }
9986
def_builtin_const(const char * name,int code,tree type)9987 static tree def_builtin_const(const char *name, int code, tree type)
9988 {
9989 tree t = def_builtin(name, code, type);
9990
9991 if (t)
9992 TREE_READONLY (t) = 1;
9993
9994 return t;
9995 }
9996
9997 /* Implement the TARGET_INIT_BUILTINS target hook.
9998 Create builtin functions for special SPARC instructions. */
9999
10000 static void
sparc_init_builtins(void)10001 sparc_init_builtins (void)
10002 {
10003 if (TARGET_VIS)
10004 sparc_vis_init_builtins ();
10005 }
10006
10007 /* Create builtin functions for VIS 1.0 instructions. */
10008
10009 static void
sparc_vis_init_builtins(void)10010 sparc_vis_init_builtins (void)
10011 {
10012 tree v4qi = build_vector_type (unsigned_intQI_type_node, 4);
10013 tree v8qi = build_vector_type (unsigned_intQI_type_node, 8);
10014 tree v4hi = build_vector_type (intHI_type_node, 4);
10015 tree v2hi = build_vector_type (intHI_type_node, 2);
10016 tree v2si = build_vector_type (intSI_type_node, 2);
10017 tree v1si = build_vector_type (intSI_type_node, 1);
10018
10019 tree v4qi_ftype_v4hi = build_function_type_list (v4qi, v4hi, 0);
10020 tree v8qi_ftype_v2si_v8qi = build_function_type_list (v8qi, v2si, v8qi, 0);
10021 tree v2hi_ftype_v2si = build_function_type_list (v2hi, v2si, 0);
10022 tree v4hi_ftype_v4qi = build_function_type_list (v4hi, v4qi, 0);
10023 tree v8qi_ftype_v4qi_v4qi = build_function_type_list (v8qi, v4qi, v4qi, 0);
10024 tree v4hi_ftype_v4qi_v4hi = build_function_type_list (v4hi, v4qi, v4hi, 0);
10025 tree v4hi_ftype_v4qi_v2hi = build_function_type_list (v4hi, v4qi, v2hi, 0);
10026 tree v2si_ftype_v4qi_v2hi = build_function_type_list (v2si, v4qi, v2hi, 0);
10027 tree v4hi_ftype_v8qi_v4hi = build_function_type_list (v4hi, v8qi, v4hi, 0);
10028 tree v4hi_ftype_v4hi_v4hi = build_function_type_list (v4hi, v4hi, v4hi, 0);
10029 tree v2si_ftype_v2si_v2si = build_function_type_list (v2si, v2si, v2si, 0);
10030 tree v8qi_ftype_v8qi_v8qi = build_function_type_list (v8qi, v8qi, v8qi, 0);
10031 tree v2hi_ftype_v2hi_v2hi = build_function_type_list (v2hi, v2hi, v2hi, 0);
10032 tree v1si_ftype_v1si_v1si = build_function_type_list (v1si, v1si, v1si, 0);
10033 tree di_ftype_v8qi_v8qi_di = build_function_type_list (intDI_type_node,
10034 v8qi, v8qi,
10035 intDI_type_node, 0);
10036 tree di_ftype_v8qi_v8qi = build_function_type_list (intDI_type_node,
10037 v8qi, v8qi, 0);
10038 tree si_ftype_v8qi_v8qi = build_function_type_list (intSI_type_node,
10039 v8qi, v8qi, 0);
10040 tree di_ftype_di_di = build_function_type_list (intDI_type_node,
10041 intDI_type_node,
10042 intDI_type_node, 0);
10043 tree si_ftype_si_si = build_function_type_list (intSI_type_node,
10044 intSI_type_node,
10045 intSI_type_node, 0);
10046 tree ptr_ftype_ptr_si = build_function_type_list (ptr_type_node,
10047 ptr_type_node,
10048 intSI_type_node, 0);
10049 tree ptr_ftype_ptr_di = build_function_type_list (ptr_type_node,
10050 ptr_type_node,
10051 intDI_type_node, 0);
10052 tree si_ftype_ptr_ptr = build_function_type_list (intSI_type_node,
10053 ptr_type_node,
10054 ptr_type_node, 0);
10055 tree di_ftype_ptr_ptr = build_function_type_list (intDI_type_node,
10056 ptr_type_node,
10057 ptr_type_node, 0);
10058 tree si_ftype_v4hi_v4hi = build_function_type_list (intSI_type_node,
10059 v4hi, v4hi, 0);
10060 tree si_ftype_v2si_v2si = build_function_type_list (intSI_type_node,
10061 v2si, v2si, 0);
10062 tree di_ftype_v4hi_v4hi = build_function_type_list (intDI_type_node,
10063 v4hi, v4hi, 0);
10064 tree di_ftype_v2si_v2si = build_function_type_list (intDI_type_node,
10065 v2si, v2si, 0);
10066 tree void_ftype_di = build_function_type_list (void_type_node,
10067 intDI_type_node, 0);
10068 tree di_ftype_void = build_function_type_list (intDI_type_node,
10069 void_type_node, 0);
10070 tree void_ftype_si = build_function_type_list (void_type_node,
10071 intSI_type_node, 0);
10072 tree sf_ftype_sf_sf = build_function_type_list (float_type_node,
10073 float_type_node,
10074 float_type_node, 0);
10075 tree df_ftype_df_df = build_function_type_list (double_type_node,
10076 double_type_node,
10077 double_type_node, 0);
10078
10079 /* Packing and expanding vectors. */
10080 def_builtin ("__builtin_vis_fpack16", CODE_FOR_fpack16_vis,
10081 v4qi_ftype_v4hi);
10082 def_builtin ("__builtin_vis_fpack32", CODE_FOR_fpack32_vis,
10083 v8qi_ftype_v2si_v8qi);
10084 def_builtin ("__builtin_vis_fpackfix", CODE_FOR_fpackfix_vis,
10085 v2hi_ftype_v2si);
10086 def_builtin_const ("__builtin_vis_fexpand", CODE_FOR_fexpand_vis,
10087 v4hi_ftype_v4qi);
10088 def_builtin_const ("__builtin_vis_fpmerge", CODE_FOR_fpmerge_vis,
10089 v8qi_ftype_v4qi_v4qi);
10090
10091 /* Multiplications. */
10092 def_builtin_const ("__builtin_vis_fmul8x16", CODE_FOR_fmul8x16_vis,
10093 v4hi_ftype_v4qi_v4hi);
10094 def_builtin_const ("__builtin_vis_fmul8x16au", CODE_FOR_fmul8x16au_vis,
10095 v4hi_ftype_v4qi_v2hi);
10096 def_builtin_const ("__builtin_vis_fmul8x16al", CODE_FOR_fmul8x16al_vis,
10097 v4hi_ftype_v4qi_v2hi);
10098 def_builtin_const ("__builtin_vis_fmul8sux16", CODE_FOR_fmul8sux16_vis,
10099 v4hi_ftype_v8qi_v4hi);
10100 def_builtin_const ("__builtin_vis_fmul8ulx16", CODE_FOR_fmul8ulx16_vis,
10101 v4hi_ftype_v8qi_v4hi);
10102 def_builtin_const ("__builtin_vis_fmuld8sux16", CODE_FOR_fmuld8sux16_vis,
10103 v2si_ftype_v4qi_v2hi);
10104 def_builtin_const ("__builtin_vis_fmuld8ulx16", CODE_FOR_fmuld8ulx16_vis,
10105 v2si_ftype_v4qi_v2hi);
10106
10107 /* Data aligning. */
10108 def_builtin ("__builtin_vis_faligndatav4hi", CODE_FOR_faligndatav4hi_vis,
10109 v4hi_ftype_v4hi_v4hi);
10110 def_builtin ("__builtin_vis_faligndatav8qi", CODE_FOR_faligndatav8qi_vis,
10111 v8qi_ftype_v8qi_v8qi);
10112 def_builtin ("__builtin_vis_faligndatav2si", CODE_FOR_faligndatav2si_vis,
10113 v2si_ftype_v2si_v2si);
10114 def_builtin ("__builtin_vis_faligndatadi", CODE_FOR_faligndatav1di_vis,
10115 di_ftype_di_di);
10116
10117 def_builtin ("__builtin_vis_write_gsr", CODE_FOR_wrgsr_vis,
10118 void_ftype_di);
10119 def_builtin ("__builtin_vis_read_gsr", CODE_FOR_rdgsr_vis,
10120 di_ftype_void);
10121
10122 if (TARGET_ARCH64)
10123 {
10124 def_builtin ("__builtin_vis_alignaddr", CODE_FOR_alignaddrdi_vis,
10125 ptr_ftype_ptr_di);
10126 def_builtin ("__builtin_vis_alignaddrl", CODE_FOR_alignaddrldi_vis,
10127 ptr_ftype_ptr_di);
10128 }
10129 else
10130 {
10131 def_builtin ("__builtin_vis_alignaddr", CODE_FOR_alignaddrsi_vis,
10132 ptr_ftype_ptr_si);
10133 def_builtin ("__builtin_vis_alignaddrl", CODE_FOR_alignaddrlsi_vis,
10134 ptr_ftype_ptr_si);
10135 }
10136
10137 /* Pixel distance. */
10138 def_builtin_const ("__builtin_vis_pdist", CODE_FOR_pdist_vis,
10139 di_ftype_v8qi_v8qi_di);
10140
10141 /* Edge handling. */
10142 if (TARGET_ARCH64)
10143 {
10144 def_builtin_const ("__builtin_vis_edge8", CODE_FOR_edge8di_vis,
10145 di_ftype_ptr_ptr);
10146 def_builtin_const ("__builtin_vis_edge8l", CODE_FOR_edge8ldi_vis,
10147 di_ftype_ptr_ptr);
10148 def_builtin_const ("__builtin_vis_edge16", CODE_FOR_edge16di_vis,
10149 di_ftype_ptr_ptr);
10150 def_builtin_const ("__builtin_vis_edge16l", CODE_FOR_edge16ldi_vis,
10151 di_ftype_ptr_ptr);
10152 def_builtin_const ("__builtin_vis_edge32", CODE_FOR_edge32di_vis,
10153 di_ftype_ptr_ptr);
10154 def_builtin_const ("__builtin_vis_edge32l", CODE_FOR_edge32ldi_vis,
10155 di_ftype_ptr_ptr);
10156 if (TARGET_VIS2)
10157 {
10158 def_builtin_const ("__builtin_vis_edge8n", CODE_FOR_edge8ndi_vis,
10159 di_ftype_ptr_ptr);
10160 def_builtin_const ("__builtin_vis_edge8ln", CODE_FOR_edge8lndi_vis,
10161 di_ftype_ptr_ptr);
10162 def_builtin_const ("__builtin_vis_edge16n", CODE_FOR_edge16ndi_vis,
10163 di_ftype_ptr_ptr);
10164 def_builtin_const ("__builtin_vis_edge16ln", CODE_FOR_edge16lndi_vis,
10165 di_ftype_ptr_ptr);
10166 def_builtin_const ("__builtin_vis_edge32n", CODE_FOR_edge32ndi_vis,
10167 di_ftype_ptr_ptr);
10168 def_builtin_const ("__builtin_vis_edge32ln", CODE_FOR_edge32lndi_vis,
10169 di_ftype_ptr_ptr);
10170 }
10171 }
10172 else
10173 {
10174 def_builtin_const ("__builtin_vis_edge8", CODE_FOR_edge8si_vis,
10175 si_ftype_ptr_ptr);
10176 def_builtin_const ("__builtin_vis_edge8l", CODE_FOR_edge8lsi_vis,
10177 si_ftype_ptr_ptr);
10178 def_builtin_const ("__builtin_vis_edge16", CODE_FOR_edge16si_vis,
10179 si_ftype_ptr_ptr);
10180 def_builtin_const ("__builtin_vis_edge16l", CODE_FOR_edge16lsi_vis,
10181 si_ftype_ptr_ptr);
10182 def_builtin_const ("__builtin_vis_edge32", CODE_FOR_edge32si_vis,
10183 si_ftype_ptr_ptr);
10184 def_builtin_const ("__builtin_vis_edge32l", CODE_FOR_edge32lsi_vis,
10185 si_ftype_ptr_ptr);
10186 if (TARGET_VIS2)
10187 {
10188 def_builtin_const ("__builtin_vis_edge8n", CODE_FOR_edge8nsi_vis,
10189 si_ftype_ptr_ptr);
10190 def_builtin_const ("__builtin_vis_edge8ln", CODE_FOR_edge8lnsi_vis,
10191 si_ftype_ptr_ptr);
10192 def_builtin_const ("__builtin_vis_edge16n", CODE_FOR_edge16nsi_vis,
10193 si_ftype_ptr_ptr);
10194 def_builtin_const ("__builtin_vis_edge16ln", CODE_FOR_edge16lnsi_vis,
10195 si_ftype_ptr_ptr);
10196 def_builtin_const ("__builtin_vis_edge32n", CODE_FOR_edge32nsi_vis,
10197 si_ftype_ptr_ptr);
10198 def_builtin_const ("__builtin_vis_edge32ln", CODE_FOR_edge32lnsi_vis,
10199 si_ftype_ptr_ptr);
10200 }
10201 }
10202
10203 /* Pixel compare. */
10204 if (TARGET_ARCH64)
10205 {
10206 def_builtin_const ("__builtin_vis_fcmple16", CODE_FOR_fcmple16di_vis,
10207 di_ftype_v4hi_v4hi);
10208 def_builtin_const ("__builtin_vis_fcmple32", CODE_FOR_fcmple32di_vis,
10209 di_ftype_v2si_v2si);
10210 def_builtin_const ("__builtin_vis_fcmpne16", CODE_FOR_fcmpne16di_vis,
10211 di_ftype_v4hi_v4hi);
10212 def_builtin_const ("__builtin_vis_fcmpne32", CODE_FOR_fcmpne32di_vis,
10213 di_ftype_v2si_v2si);
10214 def_builtin_const ("__builtin_vis_fcmpgt16", CODE_FOR_fcmpgt16di_vis,
10215 di_ftype_v4hi_v4hi);
10216 def_builtin_const ("__builtin_vis_fcmpgt32", CODE_FOR_fcmpgt32di_vis,
10217 di_ftype_v2si_v2si);
10218 def_builtin_const ("__builtin_vis_fcmpeq16", CODE_FOR_fcmpeq16di_vis,
10219 di_ftype_v4hi_v4hi);
10220 def_builtin_const ("__builtin_vis_fcmpeq32", CODE_FOR_fcmpeq32di_vis,
10221 di_ftype_v2si_v2si);
10222 }
10223 else
10224 {
10225 def_builtin_const ("__builtin_vis_fcmple16", CODE_FOR_fcmple16si_vis,
10226 si_ftype_v4hi_v4hi);
10227 def_builtin_const ("__builtin_vis_fcmple32", CODE_FOR_fcmple32si_vis,
10228 si_ftype_v2si_v2si);
10229 def_builtin_const ("__builtin_vis_fcmpne16", CODE_FOR_fcmpne16si_vis,
10230 si_ftype_v4hi_v4hi);
10231 def_builtin_const ("__builtin_vis_fcmpne32", CODE_FOR_fcmpne32si_vis,
10232 si_ftype_v2si_v2si);
10233 def_builtin_const ("__builtin_vis_fcmpgt16", CODE_FOR_fcmpgt16si_vis,
10234 si_ftype_v4hi_v4hi);
10235 def_builtin_const ("__builtin_vis_fcmpgt32", CODE_FOR_fcmpgt32si_vis,
10236 si_ftype_v2si_v2si);
10237 def_builtin_const ("__builtin_vis_fcmpeq16", CODE_FOR_fcmpeq16si_vis,
10238 si_ftype_v4hi_v4hi);
10239 def_builtin_const ("__builtin_vis_fcmpeq32", CODE_FOR_fcmpeq32si_vis,
10240 si_ftype_v2si_v2si);
10241 }
10242
10243 /* Addition and subtraction. */
10244 def_builtin_const ("__builtin_vis_fpadd16", CODE_FOR_addv4hi3,
10245 v4hi_ftype_v4hi_v4hi);
10246 def_builtin_const ("__builtin_vis_fpadd16s", CODE_FOR_addv2hi3,
10247 v2hi_ftype_v2hi_v2hi);
10248 def_builtin_const ("__builtin_vis_fpadd32", CODE_FOR_addv2si3,
10249 v2si_ftype_v2si_v2si);
10250 def_builtin_const ("__builtin_vis_fpadd32s", CODE_FOR_addv1si3,
10251 v1si_ftype_v1si_v1si);
10252 def_builtin_const ("__builtin_vis_fpsub16", CODE_FOR_subv4hi3,
10253 v4hi_ftype_v4hi_v4hi);
10254 def_builtin_const ("__builtin_vis_fpsub16s", CODE_FOR_subv2hi3,
10255 v2hi_ftype_v2hi_v2hi);
10256 def_builtin_const ("__builtin_vis_fpsub32", CODE_FOR_subv2si3,
10257 v2si_ftype_v2si_v2si);
10258 def_builtin_const ("__builtin_vis_fpsub32s", CODE_FOR_subv1si3,
10259 v1si_ftype_v1si_v1si);
10260
10261 /* Three-dimensional array addressing. */
10262 if (TARGET_ARCH64)
10263 {
10264 def_builtin_const ("__builtin_vis_array8", CODE_FOR_array8di_vis,
10265 di_ftype_di_di);
10266 def_builtin_const ("__builtin_vis_array16", CODE_FOR_array16di_vis,
10267 di_ftype_di_di);
10268 def_builtin_const ("__builtin_vis_array32", CODE_FOR_array32di_vis,
10269 di_ftype_di_di);
10270 }
10271 else
10272 {
10273 def_builtin_const ("__builtin_vis_array8", CODE_FOR_array8si_vis,
10274 si_ftype_si_si);
10275 def_builtin_const ("__builtin_vis_array16", CODE_FOR_array16si_vis,
10276 si_ftype_si_si);
10277 def_builtin_const ("__builtin_vis_array32", CODE_FOR_array32si_vis,
10278 si_ftype_si_si);
10279 }
10280
10281 if (TARGET_VIS2)
10282 {
10283 /* Byte mask and shuffle */
10284 if (TARGET_ARCH64)
10285 def_builtin ("__builtin_vis_bmask", CODE_FOR_bmaskdi_vis,
10286 di_ftype_di_di);
10287 else
10288 def_builtin ("__builtin_vis_bmask", CODE_FOR_bmasksi_vis,
10289 si_ftype_si_si);
10290 def_builtin ("__builtin_vis_bshufflev4hi", CODE_FOR_bshufflev4hi_vis,
10291 v4hi_ftype_v4hi_v4hi);
10292 def_builtin ("__builtin_vis_bshufflev8qi", CODE_FOR_bshufflev8qi_vis,
10293 v8qi_ftype_v8qi_v8qi);
10294 def_builtin ("__builtin_vis_bshufflev2si", CODE_FOR_bshufflev2si_vis,
10295 v2si_ftype_v2si_v2si);
10296 def_builtin ("__builtin_vis_bshuffledi", CODE_FOR_bshufflev1di_vis,
10297 di_ftype_di_di);
10298 }
10299
10300 if (TARGET_VIS3)
10301 {
10302 if (TARGET_ARCH64)
10303 {
10304 def_builtin ("__builtin_vis_cmask8", CODE_FOR_cmask8di_vis,
10305 void_ftype_di);
10306 def_builtin ("__builtin_vis_cmask16", CODE_FOR_cmask16di_vis,
10307 void_ftype_di);
10308 def_builtin ("__builtin_vis_cmask32", CODE_FOR_cmask32di_vis,
10309 void_ftype_di);
10310 }
10311 else
10312 {
10313 def_builtin ("__builtin_vis_cmask8", CODE_FOR_cmask8si_vis,
10314 void_ftype_si);
10315 def_builtin ("__builtin_vis_cmask16", CODE_FOR_cmask16si_vis,
10316 void_ftype_si);
10317 def_builtin ("__builtin_vis_cmask32", CODE_FOR_cmask32si_vis,
10318 void_ftype_si);
10319 }
10320
10321 def_builtin_const ("__builtin_vis_fchksm16", CODE_FOR_fchksm16_vis,
10322 v4hi_ftype_v4hi_v4hi);
10323
10324 def_builtin_const ("__builtin_vis_fsll16", CODE_FOR_vashlv4hi3,
10325 v4hi_ftype_v4hi_v4hi);
10326 def_builtin_const ("__builtin_vis_fslas16", CODE_FOR_vssashlv4hi3,
10327 v4hi_ftype_v4hi_v4hi);
10328 def_builtin_const ("__builtin_vis_fsrl16", CODE_FOR_vlshrv4hi3,
10329 v4hi_ftype_v4hi_v4hi);
10330 def_builtin_const ("__builtin_vis_fsra16", CODE_FOR_vashrv4hi3,
10331 v4hi_ftype_v4hi_v4hi);
10332 def_builtin_const ("__builtin_vis_fsll32", CODE_FOR_vashlv2si3,
10333 v2si_ftype_v2si_v2si);
10334 def_builtin_const ("__builtin_vis_fslas32", CODE_FOR_vssashlv2si3,
10335 v2si_ftype_v2si_v2si);
10336 def_builtin_const ("__builtin_vis_fsrl32", CODE_FOR_vlshrv2si3,
10337 v2si_ftype_v2si_v2si);
10338 def_builtin_const ("__builtin_vis_fsra32", CODE_FOR_vashrv2si3,
10339 v2si_ftype_v2si_v2si);
10340
10341 if (TARGET_ARCH64)
10342 def_builtin_const ("__builtin_vis_pdistn", CODE_FOR_pdistndi_vis,
10343 di_ftype_v8qi_v8qi);
10344 else
10345 def_builtin_const ("__builtin_vis_pdistn", CODE_FOR_pdistnsi_vis,
10346 si_ftype_v8qi_v8qi);
10347
10348 def_builtin_const ("__builtin_vis_fmean16", CODE_FOR_fmean16_vis,
10349 v4hi_ftype_v4hi_v4hi);
10350 def_builtin_const ("__builtin_vis_fpadd64", CODE_FOR_fpadd64_vis,
10351 di_ftype_di_di);
10352 def_builtin_const ("__builtin_vis_fpsub64", CODE_FOR_fpsub64_vis,
10353 di_ftype_di_di);
10354
10355 def_builtin_const ("__builtin_vis_fpadds16", CODE_FOR_ssaddv4hi3,
10356 v4hi_ftype_v4hi_v4hi);
10357 def_builtin_const ("__builtin_vis_fpadds16s", CODE_FOR_ssaddv2hi3,
10358 v2hi_ftype_v2hi_v2hi);
10359 def_builtin_const ("__builtin_vis_fpsubs16", CODE_FOR_sssubv4hi3,
10360 v4hi_ftype_v4hi_v4hi);
10361 def_builtin_const ("__builtin_vis_fpsubs16s", CODE_FOR_sssubv2hi3,
10362 v2hi_ftype_v2hi_v2hi);
10363 def_builtin_const ("__builtin_vis_fpadds32", CODE_FOR_ssaddv2si3,
10364 v2si_ftype_v2si_v2si);
10365 def_builtin_const ("__builtin_vis_fpadds32s", CODE_FOR_ssaddv1si3,
10366 v1si_ftype_v1si_v1si);
10367 def_builtin_const ("__builtin_vis_fpsubs32", CODE_FOR_sssubv2si3,
10368 v2si_ftype_v2si_v2si);
10369 def_builtin_const ("__builtin_vis_fpsubs32s", CODE_FOR_sssubv1si3,
10370 v1si_ftype_v1si_v1si);
10371
10372 if (TARGET_ARCH64)
10373 {
10374 def_builtin_const ("__builtin_vis_fucmple8", CODE_FOR_fucmple8di_vis,
10375 di_ftype_v8qi_v8qi);
10376 def_builtin_const ("__builtin_vis_fucmpne8", CODE_FOR_fucmpne8di_vis,
10377 di_ftype_v8qi_v8qi);
10378 def_builtin_const ("__builtin_vis_fucmpgt8", CODE_FOR_fucmpgt8di_vis,
10379 di_ftype_v8qi_v8qi);
10380 def_builtin_const ("__builtin_vis_fucmpeq8", CODE_FOR_fucmpeq8di_vis,
10381 di_ftype_v8qi_v8qi);
10382 }
10383 else
10384 {
10385 def_builtin_const ("__builtin_vis_fucmple8", CODE_FOR_fucmple8si_vis,
10386 si_ftype_v8qi_v8qi);
10387 def_builtin_const ("__builtin_vis_fucmpne8", CODE_FOR_fucmpne8si_vis,
10388 si_ftype_v8qi_v8qi);
10389 def_builtin_const ("__builtin_vis_fucmpgt8", CODE_FOR_fucmpgt8si_vis,
10390 si_ftype_v8qi_v8qi);
10391 def_builtin_const ("__builtin_vis_fucmpeq8", CODE_FOR_fucmpeq8si_vis,
10392 si_ftype_v8qi_v8qi);
10393 }
10394
10395 def_builtin_const ("__builtin_vis_fhadds", CODE_FOR_fhaddsf_vis,
10396 sf_ftype_sf_sf);
10397 def_builtin_const ("__builtin_vis_fhaddd", CODE_FOR_fhadddf_vis,
10398 df_ftype_df_df);
10399 def_builtin_const ("__builtin_vis_fhsubs", CODE_FOR_fhsubsf_vis,
10400 sf_ftype_sf_sf);
10401 def_builtin_const ("__builtin_vis_fhsubd", CODE_FOR_fhsubdf_vis,
10402 df_ftype_df_df);
10403 def_builtin_const ("__builtin_vis_fnhadds", CODE_FOR_fnhaddsf_vis,
10404 sf_ftype_sf_sf);
10405 def_builtin_const ("__builtin_vis_fnhaddd", CODE_FOR_fnhadddf_vis,
10406 df_ftype_df_df);
10407
10408 def_builtin_const ("__builtin_vis_umulxhi", CODE_FOR_umulxhi_vis,
10409 di_ftype_di_di);
10410 def_builtin_const ("__builtin_vis_xmulx", CODE_FOR_xmulx_vis,
10411 di_ftype_di_di);
10412 def_builtin_const ("__builtin_vis_xmulxhi", CODE_FOR_xmulxhi_vis,
10413 di_ftype_di_di);
10414 }
10415 }
10416
10417 /* Handle TARGET_EXPAND_BUILTIN target hook.
10418 Expand builtin functions for sparc intrinsics. */
10419
10420 static rtx
sparc_expand_builtin(tree exp,rtx target,rtx subtarget ATTRIBUTE_UNUSED,enum machine_mode tmode ATTRIBUTE_UNUSED,int ignore ATTRIBUTE_UNUSED)10421 sparc_expand_builtin (tree exp, rtx target,
10422 rtx subtarget ATTRIBUTE_UNUSED,
10423 enum machine_mode tmode ATTRIBUTE_UNUSED,
10424 int ignore ATTRIBUTE_UNUSED)
10425 {
10426 tree arg;
10427 call_expr_arg_iterator iter;
10428 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
10429 unsigned int icode = DECL_FUNCTION_CODE (fndecl);
10430 rtx pat, op[4];
10431 int arg_count = 0;
10432 bool nonvoid;
10433
10434 nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
10435
10436 if (nonvoid)
10437 {
10438 enum machine_mode tmode = insn_data[icode].operand[0].mode;
10439 if (!target
10440 || GET_MODE (target) != tmode
10441 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
10442 op[0] = gen_reg_rtx (tmode);
10443 else
10444 op[0] = target;
10445 }
10446 FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
10447 {
10448 const struct insn_operand_data *insn_op;
10449 int idx;
10450
10451 if (arg == error_mark_node)
10452 return NULL_RTX;
10453
10454 arg_count++;
10455 idx = arg_count - !nonvoid;
10456 insn_op = &insn_data[icode].operand[idx];
10457 op[arg_count] = expand_normal (arg);
10458
10459 if (insn_op->mode == V1DImode
10460 && GET_MODE (op[arg_count]) == DImode)
10461 op[arg_count] = gen_lowpart (V1DImode, op[arg_count]);
10462 else if (insn_op->mode == V1SImode
10463 && GET_MODE (op[arg_count]) == SImode)
10464 op[arg_count] = gen_lowpart (V1SImode, op[arg_count]);
10465
10466 if (! (*insn_data[icode].operand[idx].predicate) (op[arg_count],
10467 insn_op->mode))
10468 op[arg_count] = copy_to_mode_reg (insn_op->mode, op[arg_count]);
10469 }
10470
10471 switch (arg_count)
10472 {
10473 case 0:
10474 pat = GEN_FCN (icode) (op[0]);
10475 break;
10476 case 1:
10477 if (nonvoid)
10478 pat = GEN_FCN (icode) (op[0], op[1]);
10479 else
10480 pat = GEN_FCN (icode) (op[1]);
10481 break;
10482 case 2:
10483 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
10484 break;
10485 case 3:
10486 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
10487 break;
10488 default:
10489 gcc_unreachable ();
10490 }
10491
10492 if (!pat)
10493 return NULL_RTX;
10494
10495 emit_insn (pat);
10496
10497 if (nonvoid)
10498 return op[0];
10499 else
10500 return const0_rtx;
10501 }
10502
10503 static int
sparc_vis_mul8x16(int e8,int e16)10504 sparc_vis_mul8x16 (int e8, int e16)
10505 {
10506 return (e8 * e16 + 128) / 256;
10507 }
10508
10509 /* Multiply the VECTOR_CSTs CST0 and CST1 as specified by FNCODE and put
10510 the result into the array N_ELTS, whose elements are of INNER_TYPE. */
10511
10512 static void
sparc_handle_vis_mul8x16(tree * n_elts,int fncode,tree inner_type,tree cst0,tree cst1)10513 sparc_handle_vis_mul8x16 (tree *n_elts, int fncode, tree inner_type,
10514 tree cst0, tree cst1)
10515 {
10516 unsigned i, num = VECTOR_CST_NELTS (cst0);
10517 int scale;
10518
10519 switch (fncode)
10520 {
10521 case CODE_FOR_fmul8x16_vis:
10522 for (i = 0; i < num; ++i)
10523 {
10524 int val
10525 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
10526 TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, i)));
10527 n_elts[i] = build_int_cst (inner_type, val);
10528 }
10529 break;
10530
10531 case CODE_FOR_fmul8x16au_vis:
10532 scale = TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, 0));
10533
10534 for (i = 0; i < num; ++i)
10535 {
10536 int val
10537 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
10538 scale);
10539 n_elts[i] = build_int_cst (inner_type, val);
10540 }
10541 break;
10542
10543 case CODE_FOR_fmul8x16al_vis:
10544 scale = TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, 1));
10545
10546 for (i = 0; i < num; ++i)
10547 {
10548 int val
10549 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
10550 scale);
10551 n_elts[i] = build_int_cst (inner_type, val);
10552 }
10553 break;
10554
10555 default:
10556 gcc_unreachable ();
10557 }
10558 }
10559
10560 /* Handle TARGET_FOLD_BUILTIN target hook.
10561 Fold builtin functions for SPARC intrinsics. If IGNORE is true the
10562 result of the function call is ignored. NULL_TREE is returned if the
10563 function could not be folded. */
10564
10565 static tree
sparc_fold_builtin(tree fndecl,int n_args ATTRIBUTE_UNUSED,tree * args,bool ignore)10566 sparc_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED,
10567 tree *args, bool ignore)
10568 {
10569 tree arg0, arg1, arg2;
10570 tree rtype = TREE_TYPE (TREE_TYPE (fndecl));
10571 enum insn_code icode = (enum insn_code) DECL_FUNCTION_CODE (fndecl);
10572
10573 if (ignore)
10574 {
10575 /* Note that a switch statement instead of the sequence of tests would
10576 be incorrect as many of the CODE_FOR values could be CODE_FOR_nothing
10577 and that would yield multiple alternatives with identical values. */
10578 if (icode == CODE_FOR_alignaddrsi_vis
10579 || icode == CODE_FOR_alignaddrdi_vis
10580 || icode == CODE_FOR_wrgsr_vis
10581 || icode == CODE_FOR_bmasksi_vis
10582 || icode == CODE_FOR_bmaskdi_vis
10583 || icode == CODE_FOR_cmask8si_vis
10584 || icode == CODE_FOR_cmask8di_vis
10585 || icode == CODE_FOR_cmask16si_vis
10586 || icode == CODE_FOR_cmask16di_vis
10587 || icode == CODE_FOR_cmask32si_vis
10588 || icode == CODE_FOR_cmask32di_vis)
10589 ;
10590 else
10591 return build_zero_cst (rtype);
10592 }
10593
10594 switch (icode)
10595 {
10596 case CODE_FOR_fexpand_vis:
10597 arg0 = args[0];
10598 STRIP_NOPS (arg0);
10599
10600 if (TREE_CODE (arg0) == VECTOR_CST)
10601 {
10602 tree inner_type = TREE_TYPE (rtype);
10603 tree *n_elts;
10604 unsigned i;
10605
10606 n_elts = XALLOCAVEC (tree, VECTOR_CST_NELTS (arg0));
10607 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
10608 n_elts[i] = build_int_cst (inner_type,
10609 TREE_INT_CST_LOW
10610 (VECTOR_CST_ELT (arg0, i)) << 4);
10611 return build_vector (rtype, n_elts);
10612 }
10613 break;
10614
10615 case CODE_FOR_fmul8x16_vis:
10616 case CODE_FOR_fmul8x16au_vis:
10617 case CODE_FOR_fmul8x16al_vis:
10618 arg0 = args[0];
10619 arg1 = args[1];
10620 STRIP_NOPS (arg0);
10621 STRIP_NOPS (arg1);
10622
10623 if (TREE_CODE (arg0) == VECTOR_CST && TREE_CODE (arg1) == VECTOR_CST)
10624 {
10625 tree inner_type = TREE_TYPE (rtype);
10626 tree *n_elts = XALLOCAVEC (tree, VECTOR_CST_NELTS (arg0));
10627 sparc_handle_vis_mul8x16 (n_elts, icode, inner_type, arg0, arg1);
10628 return build_vector (rtype, n_elts);
10629 }
10630 break;
10631
10632 case CODE_FOR_fpmerge_vis:
10633 arg0 = args[0];
10634 arg1 = args[1];
10635 STRIP_NOPS (arg0);
10636 STRIP_NOPS (arg1);
10637
10638 if (TREE_CODE (arg0) == VECTOR_CST && TREE_CODE (arg1) == VECTOR_CST)
10639 {
10640 tree *n_elts = XALLOCAVEC (tree, 2 * VECTOR_CST_NELTS (arg0));
10641 unsigned i;
10642 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
10643 {
10644 n_elts[2*i] = VECTOR_CST_ELT (arg0, i);
10645 n_elts[2*i+1] = VECTOR_CST_ELT (arg1, i);
10646 }
10647
10648 return build_vector (rtype, n_elts);
10649 }
10650 break;
10651
10652 case CODE_FOR_pdist_vis:
10653 arg0 = args[0];
10654 arg1 = args[1];
10655 arg2 = args[2];
10656 STRIP_NOPS (arg0);
10657 STRIP_NOPS (arg1);
10658 STRIP_NOPS (arg2);
10659
10660 if (TREE_CODE (arg0) == VECTOR_CST
10661 && TREE_CODE (arg1) == VECTOR_CST
10662 && TREE_CODE (arg2) == INTEGER_CST)
10663 {
10664 bool overflow = false;
10665 double_int result = TREE_INT_CST (arg2);
10666 double_int tmp;
10667 unsigned i;
10668
10669 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
10670 {
10671 double_int e0 = TREE_INT_CST (VECTOR_CST_ELT (arg0, i));
10672 double_int e1 = TREE_INT_CST (VECTOR_CST_ELT (arg1, i));
10673
10674 bool neg1_ovf, neg2_ovf, add1_ovf, add2_ovf;
10675
10676 tmp = e1.neg_with_overflow (&neg1_ovf);
10677 tmp = e0.add_with_sign (tmp, false, &add1_ovf);
10678 if (tmp.is_negative ())
10679 tmp = tmp.neg_with_overflow (&neg2_ovf);
10680 else
10681 neg2_ovf = false;
10682 result = result.add_with_sign (tmp, false, &add2_ovf);
10683 overflow |= neg1_ovf | neg2_ovf | add1_ovf | add2_ovf;
10684 }
10685
10686 gcc_assert (!overflow);
10687
10688 return build_int_cst_wide (rtype, result.low, result.high);
10689 }
10690
10691 default:
10692 break;
10693 }
10694
10695 return NULL_TREE;
10696 }
10697
10698 /* ??? This duplicates information provided to the compiler by the
10699 ??? scheduler description. Some day, teach genautomata to output
10700 ??? the latencies and then CSE will just use that. */
10701
10702 static bool
sparc_rtx_costs(rtx x,int code,int outer_code,int opno ATTRIBUTE_UNUSED,int * total,bool speed ATTRIBUTE_UNUSED)10703 sparc_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
10704 int *total, bool speed ATTRIBUTE_UNUSED)
10705 {
10706 enum machine_mode mode = GET_MODE (x);
10707 bool float_mode_p = FLOAT_MODE_P (mode);
10708
10709 switch (code)
10710 {
10711 case CONST_INT:
10712 if (INTVAL (x) < 0x1000 && INTVAL (x) >= -0x1000)
10713 {
10714 *total = 0;
10715 return true;
10716 }
10717 /* FALLTHRU */
10718
10719 case HIGH:
10720 *total = 2;
10721 return true;
10722
10723 case CONST:
10724 case LABEL_REF:
10725 case SYMBOL_REF:
10726 *total = 4;
10727 return true;
10728
10729 case CONST_DOUBLE:
10730 if (GET_MODE (x) == VOIDmode
10731 && ((CONST_DOUBLE_HIGH (x) == 0
10732 && CONST_DOUBLE_LOW (x) < 0x1000)
10733 || (CONST_DOUBLE_HIGH (x) == -1
10734 && CONST_DOUBLE_LOW (x) < 0
10735 && CONST_DOUBLE_LOW (x) >= -0x1000)))
10736 *total = 0;
10737 else
10738 *total = 8;
10739 return true;
10740
10741 case MEM:
10742 /* If outer-code was a sign or zero extension, a cost
10743 of COSTS_N_INSNS (1) was already added in. This is
10744 why we are subtracting it back out. */
10745 if (outer_code == ZERO_EXTEND)
10746 {
10747 *total = sparc_costs->int_zload - COSTS_N_INSNS (1);
10748 }
10749 else if (outer_code == SIGN_EXTEND)
10750 {
10751 *total = sparc_costs->int_sload - COSTS_N_INSNS (1);
10752 }
10753 else if (float_mode_p)
10754 {
10755 *total = sparc_costs->float_load;
10756 }
10757 else
10758 {
10759 *total = sparc_costs->int_load;
10760 }
10761
10762 return true;
10763
10764 case PLUS:
10765 case MINUS:
10766 if (float_mode_p)
10767 *total = sparc_costs->float_plusminus;
10768 else
10769 *total = COSTS_N_INSNS (1);
10770 return false;
10771
10772 case FMA:
10773 {
10774 rtx sub;
10775
10776 gcc_assert (float_mode_p);
10777 *total = sparc_costs->float_mul;
10778
10779 sub = XEXP (x, 0);
10780 if (GET_CODE (sub) == NEG)
10781 sub = XEXP (sub, 0);
10782 *total += rtx_cost (sub, FMA, 0, speed);
10783
10784 sub = XEXP (x, 2);
10785 if (GET_CODE (sub) == NEG)
10786 sub = XEXP (sub, 0);
10787 *total += rtx_cost (sub, FMA, 2, speed);
10788 return true;
10789 }
10790
10791 case MULT:
10792 if (float_mode_p)
10793 *total = sparc_costs->float_mul;
10794 else if (! TARGET_HARD_MUL)
10795 *total = COSTS_N_INSNS (25);
10796 else
10797 {
10798 int bit_cost;
10799
10800 bit_cost = 0;
10801 if (sparc_costs->int_mul_bit_factor)
10802 {
10803 int nbits;
10804
10805 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
10806 {
10807 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
10808 for (nbits = 0; value != 0; value &= value - 1)
10809 nbits++;
10810 }
10811 else if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
10812 && GET_MODE (XEXP (x, 1)) == VOIDmode)
10813 {
10814 rtx x1 = XEXP (x, 1);
10815 unsigned HOST_WIDE_INT value1 = CONST_DOUBLE_LOW (x1);
10816 unsigned HOST_WIDE_INT value2 = CONST_DOUBLE_HIGH (x1);
10817
10818 for (nbits = 0; value1 != 0; value1 &= value1 - 1)
10819 nbits++;
10820 for (; value2 != 0; value2 &= value2 - 1)
10821 nbits++;
10822 }
10823 else
10824 nbits = 7;
10825
10826 if (nbits < 3)
10827 nbits = 3;
10828 bit_cost = (nbits - 3) / sparc_costs->int_mul_bit_factor;
10829 bit_cost = COSTS_N_INSNS (bit_cost);
10830 }
10831
10832 if (mode == DImode)
10833 *total = sparc_costs->int_mulX + bit_cost;
10834 else
10835 *total = sparc_costs->int_mul + bit_cost;
10836 }
10837 return false;
10838
10839 case ASHIFT:
10840 case ASHIFTRT:
10841 case LSHIFTRT:
10842 *total = COSTS_N_INSNS (1) + sparc_costs->shift_penalty;
10843 return false;
10844
10845 case DIV:
10846 case UDIV:
10847 case MOD:
10848 case UMOD:
10849 if (float_mode_p)
10850 {
10851 if (mode == DFmode)
10852 *total = sparc_costs->float_div_df;
10853 else
10854 *total = sparc_costs->float_div_sf;
10855 }
10856 else
10857 {
10858 if (mode == DImode)
10859 *total = sparc_costs->int_divX;
10860 else
10861 *total = sparc_costs->int_div;
10862 }
10863 return false;
10864
10865 case NEG:
10866 if (! float_mode_p)
10867 {
10868 *total = COSTS_N_INSNS (1);
10869 return false;
10870 }
10871 /* FALLTHRU */
10872
10873 case ABS:
10874 case FLOAT:
10875 case UNSIGNED_FLOAT:
10876 case FIX:
10877 case UNSIGNED_FIX:
10878 case FLOAT_EXTEND:
10879 case FLOAT_TRUNCATE:
10880 *total = sparc_costs->float_move;
10881 return false;
10882
10883 case SQRT:
10884 if (mode == DFmode)
10885 *total = sparc_costs->float_sqrt_df;
10886 else
10887 *total = sparc_costs->float_sqrt_sf;
10888 return false;
10889
10890 case COMPARE:
10891 if (float_mode_p)
10892 *total = sparc_costs->float_cmp;
10893 else
10894 *total = COSTS_N_INSNS (1);
10895 return false;
10896
10897 case IF_THEN_ELSE:
10898 if (float_mode_p)
10899 *total = sparc_costs->float_cmove;
10900 else
10901 *total = sparc_costs->int_cmove;
10902 return false;
10903
10904 case IOR:
10905 /* Handle the NAND vector patterns. */
10906 if (sparc_vector_mode_supported_p (GET_MODE (x))
10907 && GET_CODE (XEXP (x, 0)) == NOT
10908 && GET_CODE (XEXP (x, 1)) == NOT)
10909 {
10910 *total = COSTS_N_INSNS (1);
10911 return true;
10912 }
10913 else
10914 return false;
10915
10916 default:
10917 return false;
10918 }
10919 }
10920
10921 /* Return true if CLASS is either GENERAL_REGS or I64_REGS. */
10922
10923 static inline bool
general_or_i64_p(reg_class_t rclass)10924 general_or_i64_p (reg_class_t rclass)
10925 {
10926 return (rclass == GENERAL_REGS || rclass == I64_REGS);
10927 }
10928
10929 /* Implement TARGET_REGISTER_MOVE_COST. */
10930
10931 static int
sparc_register_move_cost(enum machine_mode mode ATTRIBUTE_UNUSED,reg_class_t from,reg_class_t to)10932 sparc_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
10933 reg_class_t from, reg_class_t to)
10934 {
10935 bool need_memory = false;
10936
10937 if (from == FPCC_REGS || to == FPCC_REGS)
10938 need_memory = true;
10939 else if ((FP_REG_CLASS_P (from) && general_or_i64_p (to))
10940 || (general_or_i64_p (from) && FP_REG_CLASS_P (to)))
10941 {
10942 if (TARGET_VIS3)
10943 {
10944 int size = GET_MODE_SIZE (mode);
10945 if (size == 8 || size == 4)
10946 {
10947 if (! TARGET_ARCH32 || size == 4)
10948 return 4;
10949 else
10950 return 6;
10951 }
10952 }
10953 need_memory = true;
10954 }
10955
10956 if (need_memory)
10957 {
10958 if (sparc_cpu == PROCESSOR_ULTRASPARC
10959 || sparc_cpu == PROCESSOR_ULTRASPARC3
10960 || sparc_cpu == PROCESSOR_NIAGARA
10961 || sparc_cpu == PROCESSOR_NIAGARA2
10962 || sparc_cpu == PROCESSOR_NIAGARA3
10963 || sparc_cpu == PROCESSOR_NIAGARA4)
10964 return 12;
10965
10966 return 6;
10967 }
10968
10969 return 2;
10970 }
10971
10972 /* Emit the sequence of insns SEQ while preserving the registers REG and REG2.
10973 This is achieved by means of a manual dynamic stack space allocation in
10974 the current frame. We make the assumption that SEQ doesn't contain any
10975 function calls, with the possible exception of calls to the GOT helper. */
10976
10977 static void
emit_and_preserve(rtx seq,rtx reg,rtx reg2)10978 emit_and_preserve (rtx seq, rtx reg, rtx reg2)
10979 {
10980 /* We must preserve the lowest 16 words for the register save area. */
10981 HOST_WIDE_INT offset = 16*UNITS_PER_WORD;
10982 /* We really need only 2 words of fresh stack space. */
10983 HOST_WIDE_INT size = SPARC_STACK_ALIGN (offset + 2*UNITS_PER_WORD);
10984
10985 rtx slot
10986 = gen_rtx_MEM (word_mode, plus_constant (Pmode, stack_pointer_rtx,
10987 SPARC_STACK_BIAS + offset));
10988
10989 emit_insn (gen_stack_pointer_inc (GEN_INT (-size)));
10990 emit_insn (gen_rtx_SET (VOIDmode, slot, reg));
10991 if (reg2)
10992 emit_insn (gen_rtx_SET (VOIDmode,
10993 adjust_address (slot, word_mode, UNITS_PER_WORD),
10994 reg2));
10995 emit_insn (seq);
10996 if (reg2)
10997 emit_insn (gen_rtx_SET (VOIDmode,
10998 reg2,
10999 adjust_address (slot, word_mode, UNITS_PER_WORD)));
11000 emit_insn (gen_rtx_SET (VOIDmode, reg, slot));
11001 emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
11002 }
11003
11004 /* Output the assembler code for a thunk function. THUNK_DECL is the
11005 declaration for the thunk function itself, FUNCTION is the decl for
11006 the target function. DELTA is an immediate constant offset to be
11007 added to THIS. If VCALL_OFFSET is nonzero, the word at address
11008 (*THIS + VCALL_OFFSET) should be additionally added to THIS. */
11009
11010 static void
sparc_output_mi_thunk(FILE * file,tree thunk_fndecl ATTRIBUTE_UNUSED,HOST_WIDE_INT delta,HOST_WIDE_INT vcall_offset,tree function)11011 sparc_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
11012 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
11013 tree function)
11014 {
11015 rtx this_rtx, insn, funexp;
11016 unsigned int int_arg_first;
11017
11018 reload_completed = 1;
11019 epilogue_completed = 1;
11020
11021 emit_note (NOTE_INSN_PROLOGUE_END);
11022
11023 if (TARGET_FLAT)
11024 {
11025 sparc_leaf_function_p = 1;
11026
11027 int_arg_first = SPARC_OUTGOING_INT_ARG_FIRST;
11028 }
11029 else if (flag_delayed_branch)
11030 {
11031 /* We will emit a regular sibcall below, so we need to instruct
11032 output_sibcall that we are in a leaf function. */
11033 sparc_leaf_function_p = crtl->uses_only_leaf_regs = 1;
11034
11035 /* This will cause final.c to invoke leaf_renumber_regs so we
11036 must behave as if we were in a not-yet-leafified function. */
11037 int_arg_first = SPARC_INCOMING_INT_ARG_FIRST;
11038 }
11039 else
11040 {
11041 /* We will emit the sibcall manually below, so we will need to
11042 manually spill non-leaf registers. */
11043 sparc_leaf_function_p = crtl->uses_only_leaf_regs = 0;
11044
11045 /* We really are in a leaf function. */
11046 int_arg_first = SPARC_OUTGOING_INT_ARG_FIRST;
11047 }
11048
11049 /* Find the "this" pointer. Normally in %o0, but in ARCH64 if the function
11050 returns a structure, the structure return pointer is there instead. */
11051 if (TARGET_ARCH64
11052 && aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
11053 this_rtx = gen_rtx_REG (Pmode, int_arg_first + 1);
11054 else
11055 this_rtx = gen_rtx_REG (Pmode, int_arg_first);
11056
11057 /* Add DELTA. When possible use a plain add, otherwise load it into
11058 a register first. */
11059 if (delta)
11060 {
11061 rtx delta_rtx = GEN_INT (delta);
11062
11063 if (! SPARC_SIMM13_P (delta))
11064 {
11065 rtx scratch = gen_rtx_REG (Pmode, 1);
11066 emit_move_insn (scratch, delta_rtx);
11067 delta_rtx = scratch;
11068 }
11069
11070 /* THIS_RTX += DELTA. */
11071 emit_insn (gen_add2_insn (this_rtx, delta_rtx));
11072 }
11073
11074 /* Add the word at address (*THIS_RTX + VCALL_OFFSET). */
11075 if (vcall_offset)
11076 {
11077 rtx vcall_offset_rtx = GEN_INT (vcall_offset);
11078 rtx scratch = gen_rtx_REG (Pmode, 1);
11079
11080 gcc_assert (vcall_offset < 0);
11081
11082 /* SCRATCH = *THIS_RTX. */
11083 emit_move_insn (scratch, gen_rtx_MEM (Pmode, this_rtx));
11084
11085 /* Prepare for adding VCALL_OFFSET. The difficulty is that we
11086 may not have any available scratch register at this point. */
11087 if (SPARC_SIMM13_P (vcall_offset))
11088 ;
11089 /* This is the case if ARCH64 (unless -ffixed-g5 is passed). */
11090 else if (! fixed_regs[5]
11091 /* The below sequence is made up of at least 2 insns,
11092 while the default method may need only one. */
11093 && vcall_offset < -8192)
11094 {
11095 rtx scratch2 = gen_rtx_REG (Pmode, 5);
11096 emit_move_insn (scratch2, vcall_offset_rtx);
11097 vcall_offset_rtx = scratch2;
11098 }
11099 else
11100 {
11101 rtx increment = GEN_INT (-4096);
11102
11103 /* VCALL_OFFSET is a negative number whose typical range can be
11104 estimated as -32768..0 in 32-bit mode. In almost all cases
11105 it is therefore cheaper to emit multiple add insns than
11106 spilling and loading the constant into a register (at least
11107 6 insns). */
11108 while (! SPARC_SIMM13_P (vcall_offset))
11109 {
11110 emit_insn (gen_add2_insn (scratch, increment));
11111 vcall_offset += 4096;
11112 }
11113 vcall_offset_rtx = GEN_INT (vcall_offset); /* cannot be 0 */
11114 }
11115
11116 /* SCRATCH = *(*THIS_RTX + VCALL_OFFSET). */
11117 emit_move_insn (scratch, gen_rtx_MEM (Pmode,
11118 gen_rtx_PLUS (Pmode,
11119 scratch,
11120 vcall_offset_rtx)));
11121
11122 /* THIS_RTX += *(*THIS_RTX + VCALL_OFFSET). */
11123 emit_insn (gen_add2_insn (this_rtx, scratch));
11124 }
11125
11126 /* Generate a tail call to the target function. */
11127 if (! TREE_USED (function))
11128 {
11129 assemble_external (function);
11130 TREE_USED (function) = 1;
11131 }
11132 funexp = XEXP (DECL_RTL (function), 0);
11133
11134 if (flag_delayed_branch)
11135 {
11136 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
11137 insn = emit_call_insn (gen_sibcall (funexp));
11138 SIBLING_CALL_P (insn) = 1;
11139 }
11140 else
11141 {
11142 /* The hoops we have to jump through in order to generate a sibcall
11143 without using delay slots... */
11144 rtx spill_reg, seq, scratch = gen_rtx_REG (Pmode, 1);
11145
11146 if (flag_pic)
11147 {
11148 spill_reg = gen_rtx_REG (word_mode, 15); /* %o7 */
11149 start_sequence ();
11150 load_got_register (); /* clobbers %o7 */
11151 scratch = sparc_legitimize_pic_address (funexp, scratch);
11152 seq = get_insns ();
11153 end_sequence ();
11154 emit_and_preserve (seq, spill_reg, pic_offset_table_rtx);
11155 }
11156 else if (TARGET_ARCH32)
11157 {
11158 emit_insn (gen_rtx_SET (VOIDmode,
11159 scratch,
11160 gen_rtx_HIGH (SImode, funexp)));
11161 emit_insn (gen_rtx_SET (VOIDmode,
11162 scratch,
11163 gen_rtx_LO_SUM (SImode, scratch, funexp)));
11164 }
11165 else /* TARGET_ARCH64 */
11166 {
11167 switch (sparc_cmodel)
11168 {
11169 case CM_MEDLOW:
11170 case CM_MEDMID:
11171 /* The destination can serve as a temporary. */
11172 sparc_emit_set_symbolic_const64 (scratch, funexp, scratch);
11173 break;
11174
11175 case CM_MEDANY:
11176 case CM_EMBMEDANY:
11177 /* The destination cannot serve as a temporary. */
11178 spill_reg = gen_rtx_REG (DImode, 15); /* %o7 */
11179 start_sequence ();
11180 sparc_emit_set_symbolic_const64 (scratch, funexp, spill_reg);
11181 seq = get_insns ();
11182 end_sequence ();
11183 emit_and_preserve (seq, spill_reg, 0);
11184 break;
11185
11186 default:
11187 gcc_unreachable ();
11188 }
11189 }
11190
11191 emit_jump_insn (gen_indirect_jump (scratch));
11192 }
11193
11194 emit_barrier ();
11195
11196 /* Run just enough of rest_of_compilation to get the insns emitted.
11197 There's not really enough bulk here to make other passes such as
11198 instruction scheduling worth while. Note that use_thunk calls
11199 assemble_start_function and assemble_end_function. */
11200 insn = get_insns ();
11201 shorten_branches (insn);
11202 final_start_function (insn, file, 1);
11203 final (insn, file, 1);
11204 final_end_function ();
11205
11206 reload_completed = 0;
11207 epilogue_completed = 0;
11208 }
11209
11210 /* Return true if sparc_output_mi_thunk would be able to output the
11211 assembler code for the thunk function specified by the arguments
11212 it is passed, and false otherwise. */
11213 static bool
sparc_can_output_mi_thunk(const_tree thunk_fndecl ATTRIBUTE_UNUSED,HOST_WIDE_INT delta ATTRIBUTE_UNUSED,HOST_WIDE_INT vcall_offset,const_tree function ATTRIBUTE_UNUSED)11214 sparc_can_output_mi_thunk (const_tree thunk_fndecl ATTRIBUTE_UNUSED,
11215 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
11216 HOST_WIDE_INT vcall_offset,
11217 const_tree function ATTRIBUTE_UNUSED)
11218 {
11219 /* Bound the loop used in the default method above. */
11220 return (vcall_offset >= -32768 || ! fixed_regs[5]);
11221 }
11222
11223 /* How to allocate a 'struct machine_function'. */
11224
11225 static struct machine_function *
sparc_init_machine_status(void)11226 sparc_init_machine_status (void)
11227 {
11228 return ggc_alloc_cleared_machine_function ();
11229 }
11230
11231 /* Locate some local-dynamic symbol still in use by this function
11232 so that we can print its name in local-dynamic base patterns. */
11233
11234 static const char *
get_some_local_dynamic_name(void)11235 get_some_local_dynamic_name (void)
11236 {
11237 rtx insn;
11238
11239 if (cfun->machine->some_ld_name)
11240 return cfun->machine->some_ld_name;
11241
11242 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
11243 if (INSN_P (insn)
11244 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
11245 return cfun->machine->some_ld_name;
11246
11247 gcc_unreachable ();
11248 }
11249
11250 static int
get_some_local_dynamic_name_1(rtx * px,void * data ATTRIBUTE_UNUSED)11251 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
11252 {
11253 rtx x = *px;
11254
11255 if (x
11256 && GET_CODE (x) == SYMBOL_REF
11257 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
11258 {
11259 cfun->machine->some_ld_name = XSTR (x, 0);
11260 return 1;
11261 }
11262
11263 return 0;
11264 }
11265
11266 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
11267 We need to emit DTP-relative relocations. */
11268
11269 static void
sparc_output_dwarf_dtprel(FILE * file,int size,rtx x)11270 sparc_output_dwarf_dtprel (FILE *file, int size, rtx x)
11271 {
11272 switch (size)
11273 {
11274 case 4:
11275 fputs ("\t.word\t%r_tls_dtpoff32(", file);
11276 break;
11277 case 8:
11278 fputs ("\t.xword\t%r_tls_dtpoff64(", file);
11279 break;
11280 default:
11281 gcc_unreachable ();
11282 }
11283 output_addr_const (file, x);
11284 fputs (")", file);
11285 }
11286
11287 /* Do whatever processing is required at the end of a file. */
11288
11289 static void
sparc_file_end(void)11290 sparc_file_end (void)
11291 {
11292 /* If we need to emit the special GOT helper function, do so now. */
11293 if (got_helper_rtx)
11294 {
11295 const char *name = XSTR (got_helper_rtx, 0);
11296 const char *reg_name = reg_names[GLOBAL_OFFSET_TABLE_REGNUM];
11297 #ifdef DWARF2_UNWIND_INFO
11298 bool do_cfi;
11299 #endif
11300
11301 if (USE_HIDDEN_LINKONCE)
11302 {
11303 tree decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
11304 get_identifier (name),
11305 build_function_type_list (void_type_node,
11306 NULL_TREE));
11307 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
11308 NULL_TREE, void_type_node);
11309 TREE_PUBLIC (decl) = 1;
11310 TREE_STATIC (decl) = 1;
11311 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
11312 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
11313 DECL_VISIBILITY_SPECIFIED (decl) = 1;
11314 resolve_unique_section (decl, 0, flag_function_sections);
11315 allocate_struct_function (decl, true);
11316 cfun->is_thunk = 1;
11317 current_function_decl = decl;
11318 init_varasm_status ();
11319 assemble_start_function (decl, name);
11320 }
11321 else
11322 {
11323 const int align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT);
11324 switch_to_section (text_section);
11325 if (align > 0)
11326 ASM_OUTPUT_ALIGN (asm_out_file, align);
11327 ASM_OUTPUT_LABEL (asm_out_file, name);
11328 }
11329
11330 #ifdef DWARF2_UNWIND_INFO
11331 do_cfi = dwarf2out_do_cfi_asm ();
11332 if (do_cfi)
11333 fprintf (asm_out_file, "\t.cfi_startproc\n");
11334 #endif
11335 if (flag_delayed_branch)
11336 fprintf (asm_out_file, "\tjmp\t%%o7+8\n\t add\t%%o7, %s, %s\n",
11337 reg_name, reg_name);
11338 else
11339 fprintf (asm_out_file, "\tadd\t%%o7, %s, %s\n\tjmp\t%%o7+8\n\t nop\n",
11340 reg_name, reg_name);
11341 #ifdef DWARF2_UNWIND_INFO
11342 if (do_cfi)
11343 fprintf (asm_out_file, "\t.cfi_endproc\n");
11344 #endif
11345 }
11346
11347 if (NEED_INDICATE_EXEC_STACK)
11348 file_end_indicate_exec_stack ();
11349
11350 #ifdef TARGET_SOLARIS
11351 solaris_file_end ();
11352 #endif
11353 }
11354
11355 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
11356 /* Implement TARGET_MANGLE_TYPE. */
11357
11358 static const char *
sparc_mangle_type(const_tree type)11359 sparc_mangle_type (const_tree type)
11360 {
11361 if (!TARGET_64BIT
11362 && TYPE_MAIN_VARIANT (type) == long_double_type_node
11363 && TARGET_LONG_DOUBLE_128)
11364 return "g";
11365
11366 /* For all other types, use normal C++ mangling. */
11367 return NULL;
11368 }
11369 #endif
11370
11371 /* Expand a membar instruction for various use cases. Both the LOAD_STORE
11372 and BEFORE_AFTER arguments of the form X_Y. They are two-bit masks where
11373 bit 0 indicates that X is true, and bit 1 indicates Y is true. */
11374
11375 void
sparc_emit_membar_for_model(enum memmodel model,int load_store,int before_after)11376 sparc_emit_membar_for_model (enum memmodel model,
11377 int load_store, int before_after)
11378 {
11379 /* Bits for the MEMBAR mmask field. */
11380 const int LoadLoad = 1;
11381 const int StoreLoad = 2;
11382 const int LoadStore = 4;
11383 const int StoreStore = 8;
11384
11385 int mm = 0, implied = 0;
11386
11387 switch (sparc_memory_model)
11388 {
11389 case SMM_SC:
11390 /* Sequential Consistency. All memory transactions are immediately
11391 visible in sequential execution order. No barriers needed. */
11392 implied = LoadLoad | StoreLoad | LoadStore | StoreStore;
11393 break;
11394
11395 case SMM_TSO:
11396 /* Total Store Ordering: all memory transactions with store semantics
11397 are followed by an implied StoreStore. */
11398 implied |= StoreStore;
11399
11400 /* If we're not looking for a raw barrer (before+after), then atomic
11401 operations get the benefit of being both load and store. */
11402 if (load_store == 3 && before_after == 1)
11403 implied |= StoreLoad;
11404 /* FALLTHRU */
11405
11406 case SMM_PSO:
11407 /* Partial Store Ordering: all memory transactions with load semantics
11408 are followed by an implied LoadLoad | LoadStore. */
11409 implied |= LoadLoad | LoadStore;
11410
11411 /* If we're not looking for a raw barrer (before+after), then atomic
11412 operations get the benefit of being both load and store. */
11413 if (load_store == 3 && before_after == 2)
11414 implied |= StoreLoad | StoreStore;
11415 /* FALLTHRU */
11416
11417 case SMM_RMO:
11418 /* Relaxed Memory Ordering: no implicit bits. */
11419 break;
11420
11421 default:
11422 gcc_unreachable ();
11423 }
11424
11425 if (before_after & 1)
11426 {
11427 if (model == MEMMODEL_RELEASE
11428 || model == MEMMODEL_ACQ_REL
11429 || model == MEMMODEL_SEQ_CST)
11430 {
11431 if (load_store & 1)
11432 mm |= LoadLoad | StoreLoad;
11433 if (load_store & 2)
11434 mm |= LoadStore | StoreStore;
11435 }
11436 }
11437 if (before_after & 2)
11438 {
11439 if (model == MEMMODEL_ACQUIRE
11440 || model == MEMMODEL_ACQ_REL
11441 || model == MEMMODEL_SEQ_CST)
11442 {
11443 if (load_store & 1)
11444 mm |= LoadLoad | LoadStore;
11445 if (load_store & 2)
11446 mm |= StoreLoad | StoreStore;
11447 }
11448 }
11449
11450 /* Remove the bits implied by the system memory model. */
11451 mm &= ~implied;
11452
11453 /* For raw barriers (before+after), always emit a barrier.
11454 This will become a compile-time barrier if needed. */
11455 if (mm || before_after == 3)
11456 emit_insn (gen_membar (GEN_INT (mm)));
11457 }
11458
11459 /* Expand code to perform a 8 or 16-bit compare and swap by doing 32-bit
11460 compare and swap on the word containing the byte or half-word. */
11461
11462 static void
sparc_expand_compare_and_swap_12(rtx bool_result,rtx result,rtx mem,rtx oldval,rtx newval)11463 sparc_expand_compare_and_swap_12 (rtx bool_result, rtx result, rtx mem,
11464 rtx oldval, rtx newval)
11465 {
11466 rtx addr1 = force_reg (Pmode, XEXP (mem, 0));
11467 rtx addr = gen_reg_rtx (Pmode);
11468 rtx off = gen_reg_rtx (SImode);
11469 rtx oldv = gen_reg_rtx (SImode);
11470 rtx newv = gen_reg_rtx (SImode);
11471 rtx oldvalue = gen_reg_rtx (SImode);
11472 rtx newvalue = gen_reg_rtx (SImode);
11473 rtx res = gen_reg_rtx (SImode);
11474 rtx resv = gen_reg_rtx (SImode);
11475 rtx memsi, val, mask, end_label, loop_label, cc;
11476
11477 emit_insn (gen_rtx_SET (VOIDmode, addr,
11478 gen_rtx_AND (Pmode, addr1, GEN_INT (-4))));
11479
11480 if (Pmode != SImode)
11481 addr1 = gen_lowpart (SImode, addr1);
11482 emit_insn (gen_rtx_SET (VOIDmode, off,
11483 gen_rtx_AND (SImode, addr1, GEN_INT (3))));
11484
11485 memsi = gen_rtx_MEM (SImode, addr);
11486 set_mem_alias_set (memsi, ALIAS_SET_MEMORY_BARRIER);
11487 MEM_VOLATILE_P (memsi) = MEM_VOLATILE_P (mem);
11488
11489 val = copy_to_reg (memsi);
11490
11491 emit_insn (gen_rtx_SET (VOIDmode, off,
11492 gen_rtx_XOR (SImode, off,
11493 GEN_INT (GET_MODE (mem) == QImode
11494 ? 3 : 2))));
11495
11496 emit_insn (gen_rtx_SET (VOIDmode, off,
11497 gen_rtx_ASHIFT (SImode, off, GEN_INT (3))));
11498
11499 if (GET_MODE (mem) == QImode)
11500 mask = force_reg (SImode, GEN_INT (0xff));
11501 else
11502 mask = force_reg (SImode, GEN_INT (0xffff));
11503
11504 emit_insn (gen_rtx_SET (VOIDmode, mask,
11505 gen_rtx_ASHIFT (SImode, mask, off)));
11506
11507 emit_insn (gen_rtx_SET (VOIDmode, val,
11508 gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask),
11509 val)));
11510
11511 oldval = gen_lowpart (SImode, oldval);
11512 emit_insn (gen_rtx_SET (VOIDmode, oldv,
11513 gen_rtx_ASHIFT (SImode, oldval, off)));
11514
11515 newval = gen_lowpart_common (SImode, newval);
11516 emit_insn (gen_rtx_SET (VOIDmode, newv,
11517 gen_rtx_ASHIFT (SImode, newval, off)));
11518
11519 emit_insn (gen_rtx_SET (VOIDmode, oldv,
11520 gen_rtx_AND (SImode, oldv, mask)));
11521
11522 emit_insn (gen_rtx_SET (VOIDmode, newv,
11523 gen_rtx_AND (SImode, newv, mask)));
11524
11525 end_label = gen_label_rtx ();
11526 loop_label = gen_label_rtx ();
11527 emit_label (loop_label);
11528
11529 emit_insn (gen_rtx_SET (VOIDmode, oldvalue,
11530 gen_rtx_IOR (SImode, oldv, val)));
11531
11532 emit_insn (gen_rtx_SET (VOIDmode, newvalue,
11533 gen_rtx_IOR (SImode, newv, val)));
11534
11535 emit_move_insn (bool_result, const1_rtx);
11536
11537 emit_insn (gen_atomic_compare_and_swapsi_1 (res, memsi, oldvalue, newvalue));
11538
11539 emit_cmp_and_jump_insns (res, oldvalue, EQ, NULL, SImode, 0, end_label);
11540
11541 emit_insn (gen_rtx_SET (VOIDmode, resv,
11542 gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask),
11543 res)));
11544
11545 emit_move_insn (bool_result, const0_rtx);
11546
11547 cc = gen_compare_reg_1 (NE, resv, val);
11548 emit_insn (gen_rtx_SET (VOIDmode, val, resv));
11549
11550 /* Use cbranchcc4 to separate the compare and branch! */
11551 emit_jump_insn (gen_cbranchcc4 (gen_rtx_NE (VOIDmode, cc, const0_rtx),
11552 cc, const0_rtx, loop_label));
11553
11554 emit_label (end_label);
11555
11556 emit_insn (gen_rtx_SET (VOIDmode, res,
11557 gen_rtx_AND (SImode, res, mask)));
11558
11559 emit_insn (gen_rtx_SET (VOIDmode, res,
11560 gen_rtx_LSHIFTRT (SImode, res, off)));
11561
11562 emit_move_insn (result, gen_lowpart (GET_MODE (result), res));
11563 }
11564
11565 /* Expand code to perform a compare-and-swap. */
11566
11567 void
sparc_expand_compare_and_swap(rtx operands[])11568 sparc_expand_compare_and_swap (rtx operands[])
11569 {
11570 rtx bval, retval, mem, oldval, newval;
11571 enum machine_mode mode;
11572 enum memmodel model;
11573
11574 bval = operands[0];
11575 retval = operands[1];
11576 mem = operands[2];
11577 oldval = operands[3];
11578 newval = operands[4];
11579 model = (enum memmodel) INTVAL (operands[6]);
11580 mode = GET_MODE (mem);
11581
11582 sparc_emit_membar_for_model (model, 3, 1);
11583
11584 if (reg_overlap_mentioned_p (retval, oldval))
11585 oldval = copy_to_reg (oldval);
11586
11587 if (mode == QImode || mode == HImode)
11588 sparc_expand_compare_and_swap_12 (bval, retval, mem, oldval, newval);
11589 else
11590 {
11591 rtx (*gen) (rtx, rtx, rtx, rtx);
11592 rtx x;
11593
11594 if (mode == SImode)
11595 gen = gen_atomic_compare_and_swapsi_1;
11596 else
11597 gen = gen_atomic_compare_and_swapdi_1;
11598 emit_insn (gen (retval, mem, oldval, newval));
11599
11600 x = emit_store_flag (bval, EQ, retval, oldval, mode, 1, 1);
11601 if (x != bval)
11602 convert_move (bval, x, 1);
11603 }
11604
11605 sparc_emit_membar_for_model (model, 3, 2);
11606 }
11607
11608 void
sparc_expand_vec_perm_bmask(enum machine_mode vmode,rtx sel)11609 sparc_expand_vec_perm_bmask (enum machine_mode vmode, rtx sel)
11610 {
11611 rtx t_1, t_2, t_3;
11612
11613 sel = gen_lowpart (DImode, sel);
11614 switch (vmode)
11615 {
11616 case V2SImode:
11617 /* inp = xxxxxxxAxxxxxxxB */
11618 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
11619 NULL_RTX, 1, OPTAB_DIRECT);
11620 /* t_1 = ....xxxxxxxAxxx. */
11621 sel = expand_simple_binop (SImode, AND, gen_lowpart (SImode, sel),
11622 GEN_INT (3), NULL_RTX, 1, OPTAB_DIRECT);
11623 t_1 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_1),
11624 GEN_INT (0x30000), NULL_RTX, 1, OPTAB_DIRECT);
11625 /* sel = .......B */
11626 /* t_1 = ...A.... */
11627 sel = expand_simple_binop (SImode, IOR, sel, t_1, sel, 1, OPTAB_DIRECT);
11628 /* sel = ...A...B */
11629 sel = expand_mult (SImode, sel, GEN_INT (0x4444), sel, 1);
11630 /* sel = AAAABBBB * 4 */
11631 t_1 = force_reg (SImode, GEN_INT (0x01230123));
11632 /* sel = { A*4, A*4+1, A*4+2, ... } */
11633 break;
11634
11635 case V4HImode:
11636 /* inp = xxxAxxxBxxxCxxxD */
11637 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (8),
11638 NULL_RTX, 1, OPTAB_DIRECT);
11639 t_2 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
11640 NULL_RTX, 1, OPTAB_DIRECT);
11641 t_3 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (24),
11642 NULL_RTX, 1, OPTAB_DIRECT);
11643 /* t_1 = ..xxxAxxxBxxxCxx */
11644 /* t_2 = ....xxxAxxxBxxxC */
11645 /* t_3 = ......xxxAxxxBxx */
11646 sel = expand_simple_binop (SImode, AND, gen_lowpart (SImode, sel),
11647 GEN_INT (0x07),
11648 NULL_RTX, 1, OPTAB_DIRECT);
11649 t_1 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_1),
11650 GEN_INT (0x0700),
11651 NULL_RTX, 1, OPTAB_DIRECT);
11652 t_2 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_2),
11653 GEN_INT (0x070000),
11654 NULL_RTX, 1, OPTAB_DIRECT);
11655 t_3 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_3),
11656 GEN_INT (0x07000000),
11657 NULL_RTX, 1, OPTAB_DIRECT);
11658 /* sel = .......D */
11659 /* t_1 = .....C.. */
11660 /* t_2 = ...B.... */
11661 /* t_3 = .A...... */
11662 sel = expand_simple_binop (SImode, IOR, sel, t_1, sel, 1, OPTAB_DIRECT);
11663 t_2 = expand_simple_binop (SImode, IOR, t_2, t_3, t_2, 1, OPTAB_DIRECT);
11664 sel = expand_simple_binop (SImode, IOR, sel, t_2, sel, 1, OPTAB_DIRECT);
11665 /* sel = .A.B.C.D */
11666 sel = expand_mult (SImode, sel, GEN_INT (0x22), sel, 1);
11667 /* sel = AABBCCDD * 2 */
11668 t_1 = force_reg (SImode, GEN_INT (0x01010101));
11669 /* sel = { A*2, A*2+1, B*2, B*2+1, ... } */
11670 break;
11671
11672 case V8QImode:
11673 /* input = xAxBxCxDxExFxGxH */
11674 sel = expand_simple_binop (DImode, AND, sel,
11675 GEN_INT ((HOST_WIDE_INT)0x0f0f0f0f << 32
11676 | 0x0f0f0f0f),
11677 NULL_RTX, 1, OPTAB_DIRECT);
11678 /* sel = .A.B.C.D.E.F.G.H */
11679 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (4),
11680 NULL_RTX, 1, OPTAB_DIRECT);
11681 /* t_1 = ..A.B.C.D.E.F.G. */
11682 sel = expand_simple_binop (DImode, IOR, sel, t_1,
11683 NULL_RTX, 1, OPTAB_DIRECT);
11684 /* sel = .AABBCCDDEEFFGGH */
11685 sel = expand_simple_binop (DImode, AND, sel,
11686 GEN_INT ((HOST_WIDE_INT)0xff00ff << 32
11687 | 0xff00ff),
11688 NULL_RTX, 1, OPTAB_DIRECT);
11689 /* sel = ..AB..CD..EF..GH */
11690 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (8),
11691 NULL_RTX, 1, OPTAB_DIRECT);
11692 /* t_1 = ....AB..CD..EF.. */
11693 sel = expand_simple_binop (DImode, IOR, sel, t_1,
11694 NULL_RTX, 1, OPTAB_DIRECT);
11695 /* sel = ..ABABCDCDEFEFGH */
11696 sel = expand_simple_binop (DImode, AND, sel,
11697 GEN_INT ((HOST_WIDE_INT)0xffff << 32 | 0xffff),
11698 NULL_RTX, 1, OPTAB_DIRECT);
11699 /* sel = ....ABCD....EFGH */
11700 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
11701 NULL_RTX, 1, OPTAB_DIRECT);
11702 /* t_1 = ........ABCD.... */
11703 sel = gen_lowpart (SImode, sel);
11704 t_1 = gen_lowpart (SImode, t_1);
11705 break;
11706
11707 default:
11708 gcc_unreachable ();
11709 }
11710
11711 /* Always perform the final addition/merge within the bmask insn. */
11712 emit_insn (gen_bmasksi_vis (gen_reg_rtx (SImode), sel, t_1));
11713 }
11714
11715 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
11716
11717 static bool
sparc_frame_pointer_required(void)11718 sparc_frame_pointer_required (void)
11719 {
11720 /* If the stack pointer is dynamically modified in the function, it cannot
11721 serve as the frame pointer. */
11722 if (cfun->calls_alloca)
11723 return true;
11724
11725 /* If the function receives nonlocal gotos, it needs to save the frame
11726 pointer in the nonlocal_goto_save_area object. */
11727 if (cfun->has_nonlocal_label)
11728 return true;
11729
11730 /* In flat mode, that's it. */
11731 if (TARGET_FLAT)
11732 return false;
11733
11734 /* Otherwise, the frame pointer is required if the function isn't leaf. */
11735 return !(crtl->is_leaf && only_leaf_regs_used ());
11736 }
11737
11738 /* The way this is structured, we can't eliminate SFP in favor of SP
11739 if the frame pointer is required: we want to use the SFP->HFP elimination
11740 in that case. But the test in update_eliminables doesn't know we are
11741 assuming below that we only do the former elimination. */
11742
11743 static bool
sparc_can_eliminate(const int from ATTRIBUTE_UNUSED,const int to)11744 sparc_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
11745 {
11746 return to == HARD_FRAME_POINTER_REGNUM || !sparc_frame_pointer_required ();
11747 }
11748
11749 /* Return the hard frame pointer directly to bypass the stack bias. */
11750
11751 static rtx
sparc_builtin_setjmp_frame_value(void)11752 sparc_builtin_setjmp_frame_value (void)
11753 {
11754 return hard_frame_pointer_rtx;
11755 }
11756
11757 /* If !TARGET_FPU, then make the fp registers and fp cc regs fixed so that
11758 they won't be allocated. */
11759
11760 static void
sparc_conditional_register_usage(void)11761 sparc_conditional_register_usage (void)
11762 {
11763 if (PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
11764 {
11765 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
11766 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
11767 }
11768 /* If the user has passed -f{fixed,call-{used,saved}}-g5 */
11769 /* then honor it. */
11770 if (TARGET_ARCH32 && fixed_regs[5])
11771 fixed_regs[5] = 1;
11772 else if (TARGET_ARCH64 && fixed_regs[5] == 2)
11773 fixed_regs[5] = 0;
11774 if (! TARGET_V9)
11775 {
11776 int regno;
11777 for (regno = SPARC_FIRST_V9_FP_REG;
11778 regno <= SPARC_LAST_V9_FP_REG;
11779 regno++)
11780 fixed_regs[regno] = 1;
11781 /* %fcc0 is used by v8 and v9. */
11782 for (regno = SPARC_FIRST_V9_FCC_REG + 1;
11783 regno <= SPARC_LAST_V9_FCC_REG;
11784 regno++)
11785 fixed_regs[regno] = 1;
11786 }
11787 if (! TARGET_FPU)
11788 {
11789 int regno;
11790 for (regno = 32; regno < SPARC_LAST_V9_FCC_REG; regno++)
11791 fixed_regs[regno] = 1;
11792 }
11793 /* If the user has passed -f{fixed,call-{used,saved}}-g2 */
11794 /* then honor it. Likewise with g3 and g4. */
11795 if (fixed_regs[2] == 2)
11796 fixed_regs[2] = ! TARGET_APP_REGS;
11797 if (fixed_regs[3] == 2)
11798 fixed_regs[3] = ! TARGET_APP_REGS;
11799 if (TARGET_ARCH32 && fixed_regs[4] == 2)
11800 fixed_regs[4] = ! TARGET_APP_REGS;
11801 else if (TARGET_CM_EMBMEDANY)
11802 fixed_regs[4] = 1;
11803 else if (fixed_regs[4] == 2)
11804 fixed_regs[4] = 0;
11805 if (TARGET_FLAT)
11806 {
11807 int regno;
11808 /* Disable leaf functions. */
11809 memset (sparc_leaf_regs, 0, FIRST_PSEUDO_REGISTER);
11810 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11811 leaf_reg_remap [regno] = regno;
11812 }
11813 if (TARGET_VIS)
11814 global_regs[SPARC_GSR_REG] = 1;
11815 }
11816
11817 /* Implement TARGET_PREFERRED_RELOAD_CLASS:
11818
11819 - We can't load constants into FP registers.
11820 - We can't load FP constants into integer registers when soft-float,
11821 because there is no soft-float pattern with a r/F constraint.
11822 - We can't load FP constants into integer registers for TFmode unless
11823 it is 0.0L, because there is no movtf pattern with a r/F constraint.
11824 - Try and reload integer constants (symbolic or otherwise) back into
11825 registers directly, rather than having them dumped to memory. */
11826
11827 static reg_class_t
sparc_preferred_reload_class(rtx x,reg_class_t rclass)11828 sparc_preferred_reload_class (rtx x, reg_class_t rclass)
11829 {
11830 enum machine_mode mode = GET_MODE (x);
11831 if (CONSTANT_P (x))
11832 {
11833 if (FP_REG_CLASS_P (rclass)
11834 || rclass == GENERAL_OR_FP_REGS
11835 || rclass == GENERAL_OR_EXTRA_FP_REGS
11836 || (GET_MODE_CLASS (mode) == MODE_FLOAT && ! TARGET_FPU)
11837 || (mode == TFmode && ! const_zero_operand (x, mode)))
11838 return NO_REGS;
11839
11840 if (GET_MODE_CLASS (mode) == MODE_INT)
11841 return GENERAL_REGS;
11842
11843 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
11844 {
11845 if (! FP_REG_CLASS_P (rclass)
11846 || !(const_zero_operand (x, mode)
11847 || const_all_ones_operand (x, mode)))
11848 return NO_REGS;
11849 }
11850 }
11851
11852 if (TARGET_VIS3
11853 && ! TARGET_ARCH64
11854 && (rclass == EXTRA_FP_REGS
11855 || rclass == GENERAL_OR_EXTRA_FP_REGS))
11856 {
11857 int regno = true_regnum (x);
11858
11859 if (SPARC_INT_REG_P (regno))
11860 return (rclass == EXTRA_FP_REGS
11861 ? FP_REGS : GENERAL_OR_FP_REGS);
11862 }
11863
11864 return rclass;
11865 }
11866
11867 /* Output a wide multiply instruction in V8+ mode. INSN is the instruction,
11868 OPERANDS are its operands and OPCODE is the mnemonic to be used. */
11869
11870 const char *
output_v8plus_mult(rtx insn,rtx * operands,const char * opcode)11871 output_v8plus_mult (rtx insn, rtx *operands, const char *opcode)
11872 {
11873 char mulstr[32];
11874
11875 gcc_assert (! TARGET_ARCH64);
11876
11877 if (sparc_check_64 (operands[1], insn) <= 0)
11878 output_asm_insn ("srl\t%L1, 0, %L1", operands);
11879 if (which_alternative == 1)
11880 output_asm_insn ("sllx\t%H1, 32, %H1", operands);
11881 if (GET_CODE (operands[2]) == CONST_INT)
11882 {
11883 if (which_alternative == 1)
11884 {
11885 output_asm_insn ("or\t%L1, %H1, %H1", operands);
11886 sprintf (mulstr, "%s\t%%H1, %%2, %%L0", opcode);
11887 output_asm_insn (mulstr, operands);
11888 return "srlx\t%L0, 32, %H0";
11889 }
11890 else
11891 {
11892 output_asm_insn ("sllx\t%H1, 32, %3", operands);
11893 output_asm_insn ("or\t%L1, %3, %3", operands);
11894 sprintf (mulstr, "%s\t%%3, %%2, %%3", opcode);
11895 output_asm_insn (mulstr, operands);
11896 output_asm_insn ("srlx\t%3, 32, %H0", operands);
11897 return "mov\t%3, %L0";
11898 }
11899 }
11900 else if (rtx_equal_p (operands[1], operands[2]))
11901 {
11902 if (which_alternative == 1)
11903 {
11904 output_asm_insn ("or\t%L1, %H1, %H1", operands);
11905 sprintf (mulstr, "%s\t%%H1, %%H1, %%L0", opcode);
11906 output_asm_insn (mulstr, operands);
11907 return "srlx\t%L0, 32, %H0";
11908 }
11909 else
11910 {
11911 output_asm_insn ("sllx\t%H1, 32, %3", operands);
11912 output_asm_insn ("or\t%L1, %3, %3", operands);
11913 sprintf (mulstr, "%s\t%%3, %%3, %%3", opcode);
11914 output_asm_insn (mulstr, operands);
11915 output_asm_insn ("srlx\t%3, 32, %H0", operands);
11916 return "mov\t%3, %L0";
11917 }
11918 }
11919 if (sparc_check_64 (operands[2], insn) <= 0)
11920 output_asm_insn ("srl\t%L2, 0, %L2", operands);
11921 if (which_alternative == 1)
11922 {
11923 output_asm_insn ("or\t%L1, %H1, %H1", operands);
11924 output_asm_insn ("sllx\t%H2, 32, %L1", operands);
11925 output_asm_insn ("or\t%L2, %L1, %L1", operands);
11926 sprintf (mulstr, "%s\t%%H1, %%L1, %%L0", opcode);
11927 output_asm_insn (mulstr, operands);
11928 return "srlx\t%L0, 32, %H0";
11929 }
11930 else
11931 {
11932 output_asm_insn ("sllx\t%H1, 32, %3", operands);
11933 output_asm_insn ("sllx\t%H2, 32, %4", operands);
11934 output_asm_insn ("or\t%L1, %3, %3", operands);
11935 output_asm_insn ("or\t%L2, %4, %4", operands);
11936 sprintf (mulstr, "%s\t%%3, %%4, %%3", opcode);
11937 output_asm_insn (mulstr, operands);
11938 output_asm_insn ("srlx\t%3, 32, %H0", operands);
11939 return "mov\t%3, %L0";
11940 }
11941 }
11942
11943 /* Subroutine of sparc_expand_vector_init. Emit code to initialize
11944 all fields of TARGET to ELT by means of VIS2 BSHUFFLE insn. MODE
11945 and INNER_MODE are the modes describing TARGET. */
11946
11947 static void
vector_init_bshuffle(rtx target,rtx elt,enum machine_mode mode,enum machine_mode inner_mode)11948 vector_init_bshuffle (rtx target, rtx elt, enum machine_mode mode,
11949 enum machine_mode inner_mode)
11950 {
11951 rtx t1, final_insn;
11952 int bmask;
11953
11954 t1 = gen_reg_rtx (mode);
11955
11956 elt = convert_modes (SImode, inner_mode, elt, true);
11957 emit_move_insn (gen_lowpart(SImode, t1), elt);
11958
11959 switch (mode)
11960 {
11961 case V2SImode:
11962 final_insn = gen_bshufflev2si_vis (target, t1, t1);
11963 bmask = 0x45674567;
11964 break;
11965 case V4HImode:
11966 final_insn = gen_bshufflev4hi_vis (target, t1, t1);
11967 bmask = 0x67676767;
11968 break;
11969 case V8QImode:
11970 final_insn = gen_bshufflev8qi_vis (target, t1, t1);
11971 bmask = 0x77777777;
11972 break;
11973 default:
11974 gcc_unreachable ();
11975 }
11976
11977 emit_insn (gen_bmasksi_vis (gen_reg_rtx (SImode), CONST0_RTX (SImode),
11978 force_reg (SImode, GEN_INT (bmask))));
11979 emit_insn (final_insn);
11980 }
11981
11982 /* Subroutine of sparc_expand_vector_init. Emit code to initialize
11983 all fields of TARGET to ELT in V8QI by means of VIS FPMERGE insn. */
11984
11985 static void
vector_init_fpmerge(rtx target,rtx elt)11986 vector_init_fpmerge (rtx target, rtx elt)
11987 {
11988 rtx t1, t2, t2_low, t3, t3_low;
11989
11990 t1 = gen_reg_rtx (V4QImode);
11991 elt = convert_modes (SImode, QImode, elt, true);
11992 emit_move_insn (gen_lowpart (SImode, t1), elt);
11993
11994 t2 = gen_reg_rtx (V8QImode);
11995 t2_low = gen_lowpart (V4QImode, t2);
11996 emit_insn (gen_fpmerge_vis (t2, t1, t1));
11997
11998 t3 = gen_reg_rtx (V8QImode);
11999 t3_low = gen_lowpart (V4QImode, t3);
12000 emit_insn (gen_fpmerge_vis (t3, t2_low, t2_low));
12001
12002 emit_insn (gen_fpmerge_vis (target, t3_low, t3_low));
12003 }
12004
12005 /* Subroutine of sparc_expand_vector_init. Emit code to initialize
12006 all fields of TARGET to ELT in V4HI by means of VIS FALIGNDATA insn. */
12007
12008 static void
vector_init_faligndata(rtx target,rtx elt)12009 vector_init_faligndata (rtx target, rtx elt)
12010 {
12011 rtx t1 = gen_reg_rtx (V4HImode);
12012 int i;
12013
12014 elt = convert_modes (SImode, HImode, elt, true);
12015 emit_move_insn (gen_lowpart (SImode, t1), elt);
12016
12017 emit_insn (gen_alignaddrsi_vis (gen_reg_rtx (SImode),
12018 force_reg (SImode, GEN_INT (6)),
12019 const0_rtx));
12020
12021 for (i = 0; i < 4; i++)
12022 emit_insn (gen_faligndatav4hi_vis (target, t1, target));
12023 }
12024
12025 /* Emit code to initialize TARGET to values for individual fields VALS. */
12026
12027 void
sparc_expand_vector_init(rtx target,rtx vals)12028 sparc_expand_vector_init (rtx target, rtx vals)
12029 {
12030 const enum machine_mode mode = GET_MODE (target);
12031 const enum machine_mode inner_mode = GET_MODE_INNER (mode);
12032 const int n_elts = GET_MODE_NUNITS (mode);
12033 int i, n_var = 0;
12034 bool all_same;
12035 rtx mem;
12036
12037 all_same = true;
12038 for (i = 0; i < n_elts; i++)
12039 {
12040 rtx x = XVECEXP (vals, 0, i);
12041 if (!CONSTANT_P (x))
12042 n_var++;
12043
12044 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12045 all_same = false;
12046 }
12047
12048 if (n_var == 0)
12049 {
12050 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
12051 return;
12052 }
12053
12054 if (GET_MODE_SIZE (inner_mode) == GET_MODE_SIZE (mode))
12055 {
12056 if (GET_MODE_SIZE (inner_mode) == 4)
12057 {
12058 emit_move_insn (gen_lowpart (SImode, target),
12059 gen_lowpart (SImode, XVECEXP (vals, 0, 0)));
12060 return;
12061 }
12062 else if (GET_MODE_SIZE (inner_mode) == 8)
12063 {
12064 emit_move_insn (gen_lowpart (DImode, target),
12065 gen_lowpart (DImode, XVECEXP (vals, 0, 0)));
12066 return;
12067 }
12068 }
12069 else if (GET_MODE_SIZE (inner_mode) == GET_MODE_SIZE (word_mode)
12070 && GET_MODE_SIZE (mode) == 2 * GET_MODE_SIZE (word_mode))
12071 {
12072 emit_move_insn (gen_highpart (word_mode, target),
12073 gen_lowpart (word_mode, XVECEXP (vals, 0, 0)));
12074 emit_move_insn (gen_lowpart (word_mode, target),
12075 gen_lowpart (word_mode, XVECEXP (vals, 0, 1)));
12076 return;
12077 }
12078
12079 if (all_same && GET_MODE_SIZE (mode) == 8)
12080 {
12081 if (TARGET_VIS2)
12082 {
12083 vector_init_bshuffle (target, XVECEXP (vals, 0, 0), mode, inner_mode);
12084 return;
12085 }
12086 if (mode == V8QImode)
12087 {
12088 vector_init_fpmerge (target, XVECEXP (vals, 0, 0));
12089 return;
12090 }
12091 if (mode == V4HImode)
12092 {
12093 vector_init_faligndata (target, XVECEXP (vals, 0, 0));
12094 return;
12095 }
12096 }
12097
12098 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
12099 for (i = 0; i < n_elts; i++)
12100 emit_move_insn (adjust_address_nv (mem, inner_mode,
12101 i * GET_MODE_SIZE (inner_mode)),
12102 XVECEXP (vals, 0, i));
12103 emit_move_insn (target, mem);
12104 }
12105
12106 /* Implement TARGET_SECONDARY_RELOAD. */
12107
12108 static reg_class_t
sparc_secondary_reload(bool in_p,rtx x,reg_class_t rclass_i,enum machine_mode mode,secondary_reload_info * sri)12109 sparc_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
12110 enum machine_mode mode, secondary_reload_info *sri)
12111 {
12112 enum reg_class rclass = (enum reg_class) rclass_i;
12113
12114 sri->icode = CODE_FOR_nothing;
12115 sri->extra_cost = 0;
12116
12117 /* We need a temporary when loading/storing a HImode/QImode value
12118 between memory and the FPU registers. This can happen when combine puts
12119 a paradoxical subreg in a float/fix conversion insn. */
12120 if (FP_REG_CLASS_P (rclass)
12121 && (mode == HImode || mode == QImode)
12122 && (GET_CODE (x) == MEM
12123 || ((GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
12124 && true_regnum (x) == -1)))
12125 return GENERAL_REGS;
12126
12127 /* On 32-bit we need a temporary when loading/storing a DFmode value
12128 between unaligned memory and the upper FPU registers. */
12129 if (TARGET_ARCH32
12130 && rclass == EXTRA_FP_REGS
12131 && mode == DFmode
12132 && GET_CODE (x) == MEM
12133 && ! mem_min_alignment (x, 8))
12134 return FP_REGS;
12135
12136 if (((TARGET_CM_MEDANY
12137 && symbolic_operand (x, mode))
12138 || (TARGET_CM_EMBMEDANY
12139 && text_segment_operand (x, mode)))
12140 && ! flag_pic)
12141 {
12142 if (in_p)
12143 sri->icode = direct_optab_handler (reload_in_optab, mode);
12144 else
12145 sri->icode = direct_optab_handler (reload_out_optab, mode);
12146 return NO_REGS;
12147 }
12148
12149 if (TARGET_VIS3 && TARGET_ARCH32)
12150 {
12151 int regno = true_regnum (x);
12152
12153 /* When using VIS3 fp<-->int register moves, on 32-bit we have
12154 to move 8-byte values in 4-byte pieces. This only works via
12155 FP_REGS, and not via EXTRA_FP_REGS. Therefore if we try to
12156 move between EXTRA_FP_REGS and GENERAL_REGS, we will need
12157 an FP_REGS intermediate move. */
12158 if ((rclass == EXTRA_FP_REGS && SPARC_INT_REG_P (regno))
12159 || ((general_or_i64_p (rclass)
12160 || rclass == GENERAL_OR_FP_REGS)
12161 && SPARC_FP_REG_P (regno)))
12162 {
12163 sri->extra_cost = 2;
12164 return FP_REGS;
12165 }
12166 }
12167
12168 return NO_REGS;
12169 }
12170
12171 /* Emit code to conditionally move either OPERANDS[2] or OPERANDS[3] into
12172 OPERANDS[0] in MODE. OPERANDS[1] is the operator of the condition. */
12173
12174 bool
sparc_expand_conditional_move(enum machine_mode mode,rtx * operands)12175 sparc_expand_conditional_move (enum machine_mode mode, rtx *operands)
12176 {
12177 enum rtx_code rc = GET_CODE (operands[1]);
12178 enum machine_mode cmp_mode;
12179 rtx cc_reg, dst, cmp;
12180
12181 cmp = operands[1];
12182 if (GET_MODE (XEXP (cmp, 0)) == DImode && !TARGET_ARCH64)
12183 return false;
12184
12185 if (GET_MODE (XEXP (cmp, 0)) == TFmode && !TARGET_HARD_QUAD)
12186 cmp = sparc_emit_float_lib_cmp (XEXP (cmp, 0), XEXP (cmp, 1), rc);
12187
12188 cmp_mode = GET_MODE (XEXP (cmp, 0));
12189 rc = GET_CODE (cmp);
12190
12191 dst = operands[0];
12192 if (! rtx_equal_p (operands[2], dst)
12193 && ! rtx_equal_p (operands[3], dst))
12194 {
12195 if (reg_overlap_mentioned_p (dst, cmp))
12196 dst = gen_reg_rtx (mode);
12197
12198 emit_move_insn (dst, operands[3]);
12199 }
12200 else if (operands[2] == dst)
12201 {
12202 operands[2] = operands[3];
12203
12204 if (GET_MODE_CLASS (cmp_mode) == MODE_FLOAT)
12205 rc = reverse_condition_maybe_unordered (rc);
12206 else
12207 rc = reverse_condition (rc);
12208 }
12209
12210 if (XEXP (cmp, 1) == const0_rtx
12211 && GET_CODE (XEXP (cmp, 0)) == REG
12212 && cmp_mode == DImode
12213 && v9_regcmp_p (rc))
12214 cc_reg = XEXP (cmp, 0);
12215 else
12216 cc_reg = gen_compare_reg_1 (rc, XEXP (cmp, 0), XEXP (cmp, 1));
12217
12218 cmp = gen_rtx_fmt_ee (rc, GET_MODE (cc_reg), cc_reg, const0_rtx);
12219
12220 emit_insn (gen_rtx_SET (VOIDmode, dst,
12221 gen_rtx_IF_THEN_ELSE (mode, cmp, operands[2], dst)));
12222
12223 if (dst != operands[0])
12224 emit_move_insn (operands[0], dst);
12225
12226 return true;
12227 }
12228
12229 /* Emit code to conditionally move a combination of OPERANDS[1] and OPERANDS[2]
12230 into OPERANDS[0] in MODE, depending on the outcome of the comparison of
12231 OPERANDS[4] and OPERANDS[5]. OPERANDS[3] is the operator of the condition.
12232 FCODE is the machine code to be used for OPERANDS[3] and CCODE the machine
12233 code to be used for the condition mask. */
12234
12235 void
sparc_expand_vcond(enum machine_mode mode,rtx * operands,int ccode,int fcode)12236 sparc_expand_vcond (enum machine_mode mode, rtx *operands, int ccode, int fcode)
12237 {
12238 rtx mask, cop0, cop1, fcmp, cmask, bshuf, gsr;
12239 enum rtx_code code = GET_CODE (operands[3]);
12240
12241 mask = gen_reg_rtx (Pmode);
12242 cop0 = operands[4];
12243 cop1 = operands[5];
12244 if (code == LT || code == GE)
12245 {
12246 rtx t;
12247
12248 code = swap_condition (code);
12249 t = cop0; cop0 = cop1; cop1 = t;
12250 }
12251
12252 gsr = gen_rtx_REG (DImode, SPARC_GSR_REG);
12253
12254 fcmp = gen_rtx_UNSPEC (Pmode,
12255 gen_rtvec (1, gen_rtx_fmt_ee (code, mode, cop0, cop1)),
12256 fcode);
12257
12258 cmask = gen_rtx_UNSPEC (DImode,
12259 gen_rtvec (2, mask, gsr),
12260 ccode);
12261
12262 bshuf = gen_rtx_UNSPEC (mode,
12263 gen_rtvec (3, operands[1], operands[2], gsr),
12264 UNSPEC_BSHUFFLE);
12265
12266 emit_insn (gen_rtx_SET (VOIDmode, mask, fcmp));
12267 emit_insn (gen_rtx_SET (VOIDmode, gsr, cmask));
12268
12269 emit_insn (gen_rtx_SET (VOIDmode, operands[0], bshuf));
12270 }
12271
12272 /* On sparc, any mode which naturally allocates into the float
12273 registers should return 4 here. */
12274
12275 unsigned int
sparc_regmode_natural_size(enum machine_mode mode)12276 sparc_regmode_natural_size (enum machine_mode mode)
12277 {
12278 int size = UNITS_PER_WORD;
12279
12280 if (TARGET_ARCH64)
12281 {
12282 enum mode_class mclass = GET_MODE_CLASS (mode);
12283
12284 if (mclass == MODE_FLOAT || mclass == MODE_VECTOR_INT)
12285 size = 4;
12286 }
12287
12288 return size;
12289 }
12290
12291 /* Return TRUE if it is a good idea to tie two pseudo registers
12292 when one has mode MODE1 and one has mode MODE2.
12293 If HARD_REGNO_MODE_OK could produce different values for MODE1 and MODE2,
12294 for any hard reg, then this must be FALSE for correct output.
12295
12296 For V9 we have to deal with the fact that only the lower 32 floating
12297 point registers are 32-bit addressable. */
12298
12299 bool
sparc_modes_tieable_p(enum machine_mode mode1,enum machine_mode mode2)12300 sparc_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
12301 {
12302 enum mode_class mclass1, mclass2;
12303 unsigned short size1, size2;
12304
12305 if (mode1 == mode2)
12306 return true;
12307
12308 mclass1 = GET_MODE_CLASS (mode1);
12309 mclass2 = GET_MODE_CLASS (mode2);
12310 if (mclass1 != mclass2)
12311 return false;
12312
12313 if (! TARGET_V9)
12314 return true;
12315
12316 /* Classes are the same and we are V9 so we have to deal with upper
12317 vs. lower floating point registers. If one of the modes is a
12318 4-byte mode, and the other is not, we have to mark them as not
12319 tieable because only the lower 32 floating point register are
12320 addressable 32-bits at a time.
12321
12322 We can't just test explicitly for SFmode, otherwise we won't
12323 cover the vector mode cases properly. */
12324
12325 if (mclass1 != MODE_FLOAT && mclass1 != MODE_VECTOR_INT)
12326 return true;
12327
12328 size1 = GET_MODE_SIZE (mode1);
12329 size2 = GET_MODE_SIZE (mode2);
12330 if ((size1 > 4 && size2 == 4)
12331 || (size2 > 4 && size1 == 4))
12332 return false;
12333
12334 return true;
12335 }
12336
12337 #include "gt-sparc.h"
12338