1 /* Subroutines for insn-output.c for SPARC.
2 Copyright (C) 1987-2014 Free Software Foundation, Inc.
3 Contributed by Michael Tiemann (tiemann@cygnus.com)
4 64-bit SPARC-V9 support by Michael Tiemann, Jim Wilson, and Doug Evans,
5 at Cygnus Support.
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3, or (at your option)
12 any later version.
13
14 GCC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
22
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tm.h"
27 #include "tree.h"
28 #include "stringpool.h"
29 #include "stor-layout.h"
30 #include "calls.h"
31 #include "varasm.h"
32 #include "rtl.h"
33 #include "regs.h"
34 #include "hard-reg-set.h"
35 #include "insn-config.h"
36 #include "insn-codes.h"
37 #include "conditions.h"
38 #include "output.h"
39 #include "insn-attr.h"
40 #include "flags.h"
41 #include "function.h"
42 #include "except.h"
43 #include "expr.h"
44 #include "optabs.h"
45 #include "recog.h"
46 #include "diagnostic-core.h"
47 #include "ggc.h"
48 #include "tm_p.h"
49 #include "debug.h"
50 #include "target.h"
51 #include "target-def.h"
52 #include "common/common-target.h"
53 #include "pointer-set.h"
54 #include "hash-table.h"
55 #include "vec.h"
56 #include "basic-block.h"
57 #include "tree-ssa-alias.h"
58 #include "internal-fn.h"
59 #include "gimple-fold.h"
60 #include "tree-eh.h"
61 #include "gimple-expr.h"
62 #include "is-a.h"
63 #include "gimple.h"
64 #include "gimplify.h"
65 #include "langhooks.h"
66 #include "reload.h"
67 #include "params.h"
68 #include "df.h"
69 #include "opts.h"
70 #include "tree-pass.h"
71 #include "context.h"
72
73 /* Processor costs */
74
75 struct processor_costs {
76 /* Integer load */
77 const int int_load;
78
79 /* Integer signed load */
80 const int int_sload;
81
82 /* Integer zeroed load */
83 const int int_zload;
84
85 /* Float load */
86 const int float_load;
87
88 /* fmov, fneg, fabs */
89 const int float_move;
90
91 /* fadd, fsub */
92 const int float_plusminus;
93
94 /* fcmp */
95 const int float_cmp;
96
97 /* fmov, fmovr */
98 const int float_cmove;
99
100 /* fmul */
101 const int float_mul;
102
103 /* fdivs */
104 const int float_div_sf;
105
106 /* fdivd */
107 const int float_div_df;
108
109 /* fsqrts */
110 const int float_sqrt_sf;
111
112 /* fsqrtd */
113 const int float_sqrt_df;
114
115 /* umul/smul */
116 const int int_mul;
117
118 /* mulX */
119 const int int_mulX;
120
121 /* integer multiply cost for each bit set past the most
122 significant 3, so the formula for multiply cost becomes:
123
124 if (rs1 < 0)
125 highest_bit = highest_clear_bit(rs1);
126 else
127 highest_bit = highest_set_bit(rs1);
128 if (highest_bit < 3)
129 highest_bit = 3;
130 cost = int_mul{,X} + ((highest_bit - 3) / int_mul_bit_factor);
131
132 A value of zero indicates that the multiply costs is fixed,
133 and not variable. */
134 const int int_mul_bit_factor;
135
136 /* udiv/sdiv */
137 const int int_div;
138
139 /* divX */
140 const int int_divX;
141
142 /* movcc, movr */
143 const int int_cmove;
144
145 /* penalty for shifts, due to scheduling rules etc. */
146 const int shift_penalty;
147 };
148
149 static const
150 struct processor_costs cypress_costs = {
151 COSTS_N_INSNS (2), /* int load */
152 COSTS_N_INSNS (2), /* int signed load */
153 COSTS_N_INSNS (2), /* int zeroed load */
154 COSTS_N_INSNS (2), /* float load */
155 COSTS_N_INSNS (5), /* fmov, fneg, fabs */
156 COSTS_N_INSNS (5), /* fadd, fsub */
157 COSTS_N_INSNS (1), /* fcmp */
158 COSTS_N_INSNS (1), /* fmov, fmovr */
159 COSTS_N_INSNS (7), /* fmul */
160 COSTS_N_INSNS (37), /* fdivs */
161 COSTS_N_INSNS (37), /* fdivd */
162 COSTS_N_INSNS (63), /* fsqrts */
163 COSTS_N_INSNS (63), /* fsqrtd */
164 COSTS_N_INSNS (1), /* imul */
165 COSTS_N_INSNS (1), /* imulX */
166 0, /* imul bit factor */
167 COSTS_N_INSNS (1), /* idiv */
168 COSTS_N_INSNS (1), /* idivX */
169 COSTS_N_INSNS (1), /* movcc/movr */
170 0, /* shift penalty */
171 };
172
173 static const
174 struct processor_costs supersparc_costs = {
175 COSTS_N_INSNS (1), /* int load */
176 COSTS_N_INSNS (1), /* int signed load */
177 COSTS_N_INSNS (1), /* int zeroed load */
178 COSTS_N_INSNS (0), /* float load */
179 COSTS_N_INSNS (3), /* fmov, fneg, fabs */
180 COSTS_N_INSNS (3), /* fadd, fsub */
181 COSTS_N_INSNS (3), /* fcmp */
182 COSTS_N_INSNS (1), /* fmov, fmovr */
183 COSTS_N_INSNS (3), /* fmul */
184 COSTS_N_INSNS (6), /* fdivs */
185 COSTS_N_INSNS (9), /* fdivd */
186 COSTS_N_INSNS (12), /* fsqrts */
187 COSTS_N_INSNS (12), /* fsqrtd */
188 COSTS_N_INSNS (4), /* imul */
189 COSTS_N_INSNS (4), /* imulX */
190 0, /* imul bit factor */
191 COSTS_N_INSNS (4), /* idiv */
192 COSTS_N_INSNS (4), /* idivX */
193 COSTS_N_INSNS (1), /* movcc/movr */
194 1, /* shift penalty */
195 };
196
197 static const
198 struct processor_costs hypersparc_costs = {
199 COSTS_N_INSNS (1), /* int load */
200 COSTS_N_INSNS (1), /* int signed load */
201 COSTS_N_INSNS (1), /* int zeroed load */
202 COSTS_N_INSNS (1), /* float load */
203 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
204 COSTS_N_INSNS (1), /* fadd, fsub */
205 COSTS_N_INSNS (1), /* fcmp */
206 COSTS_N_INSNS (1), /* fmov, fmovr */
207 COSTS_N_INSNS (1), /* fmul */
208 COSTS_N_INSNS (8), /* fdivs */
209 COSTS_N_INSNS (12), /* fdivd */
210 COSTS_N_INSNS (17), /* fsqrts */
211 COSTS_N_INSNS (17), /* fsqrtd */
212 COSTS_N_INSNS (17), /* imul */
213 COSTS_N_INSNS (17), /* imulX */
214 0, /* imul bit factor */
215 COSTS_N_INSNS (17), /* idiv */
216 COSTS_N_INSNS (17), /* idivX */
217 COSTS_N_INSNS (1), /* movcc/movr */
218 0, /* shift penalty */
219 };
220
221 static const
222 struct processor_costs leon_costs = {
223 COSTS_N_INSNS (1), /* int load */
224 COSTS_N_INSNS (1), /* int signed load */
225 COSTS_N_INSNS (1), /* int zeroed load */
226 COSTS_N_INSNS (1), /* float load */
227 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
228 COSTS_N_INSNS (1), /* fadd, fsub */
229 COSTS_N_INSNS (1), /* fcmp */
230 COSTS_N_INSNS (1), /* fmov, fmovr */
231 COSTS_N_INSNS (1), /* fmul */
232 COSTS_N_INSNS (15), /* fdivs */
233 COSTS_N_INSNS (15), /* fdivd */
234 COSTS_N_INSNS (23), /* fsqrts */
235 COSTS_N_INSNS (23), /* fsqrtd */
236 COSTS_N_INSNS (5), /* imul */
237 COSTS_N_INSNS (5), /* imulX */
238 0, /* imul bit factor */
239 COSTS_N_INSNS (5), /* idiv */
240 COSTS_N_INSNS (5), /* idivX */
241 COSTS_N_INSNS (1), /* movcc/movr */
242 0, /* shift penalty */
243 };
244
245 static const
246 struct processor_costs leon3_costs = {
247 COSTS_N_INSNS (1), /* int load */
248 COSTS_N_INSNS (1), /* int signed load */
249 COSTS_N_INSNS (1), /* int zeroed load */
250 COSTS_N_INSNS (1), /* float load */
251 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
252 COSTS_N_INSNS (1), /* fadd, fsub */
253 COSTS_N_INSNS (1), /* fcmp */
254 COSTS_N_INSNS (1), /* fmov, fmovr */
255 COSTS_N_INSNS (1), /* fmul */
256 COSTS_N_INSNS (14), /* fdivs */
257 COSTS_N_INSNS (15), /* fdivd */
258 COSTS_N_INSNS (22), /* fsqrts */
259 COSTS_N_INSNS (23), /* fsqrtd */
260 COSTS_N_INSNS (5), /* imul */
261 COSTS_N_INSNS (5), /* imulX */
262 0, /* imul bit factor */
263 COSTS_N_INSNS (35), /* idiv */
264 COSTS_N_INSNS (35), /* idivX */
265 COSTS_N_INSNS (1), /* movcc/movr */
266 0, /* shift penalty */
267 };
268
269 static const
270 struct processor_costs sparclet_costs = {
271 COSTS_N_INSNS (3), /* int load */
272 COSTS_N_INSNS (3), /* int signed load */
273 COSTS_N_INSNS (1), /* int zeroed load */
274 COSTS_N_INSNS (1), /* float load */
275 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
276 COSTS_N_INSNS (1), /* fadd, fsub */
277 COSTS_N_INSNS (1), /* fcmp */
278 COSTS_N_INSNS (1), /* fmov, fmovr */
279 COSTS_N_INSNS (1), /* fmul */
280 COSTS_N_INSNS (1), /* fdivs */
281 COSTS_N_INSNS (1), /* fdivd */
282 COSTS_N_INSNS (1), /* fsqrts */
283 COSTS_N_INSNS (1), /* fsqrtd */
284 COSTS_N_INSNS (5), /* imul */
285 COSTS_N_INSNS (5), /* imulX */
286 0, /* imul bit factor */
287 COSTS_N_INSNS (5), /* idiv */
288 COSTS_N_INSNS (5), /* idivX */
289 COSTS_N_INSNS (1), /* movcc/movr */
290 0, /* shift penalty */
291 };
292
293 static const
294 struct processor_costs ultrasparc_costs = {
295 COSTS_N_INSNS (2), /* int load */
296 COSTS_N_INSNS (3), /* int signed load */
297 COSTS_N_INSNS (2), /* int zeroed load */
298 COSTS_N_INSNS (2), /* float load */
299 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
300 COSTS_N_INSNS (4), /* fadd, fsub */
301 COSTS_N_INSNS (1), /* fcmp */
302 COSTS_N_INSNS (2), /* fmov, fmovr */
303 COSTS_N_INSNS (4), /* fmul */
304 COSTS_N_INSNS (13), /* fdivs */
305 COSTS_N_INSNS (23), /* fdivd */
306 COSTS_N_INSNS (13), /* fsqrts */
307 COSTS_N_INSNS (23), /* fsqrtd */
308 COSTS_N_INSNS (4), /* imul */
309 COSTS_N_INSNS (4), /* imulX */
310 2, /* imul bit factor */
311 COSTS_N_INSNS (37), /* idiv */
312 COSTS_N_INSNS (68), /* idivX */
313 COSTS_N_INSNS (2), /* movcc/movr */
314 2, /* shift penalty */
315 };
316
317 static const
318 struct processor_costs ultrasparc3_costs = {
319 COSTS_N_INSNS (2), /* int load */
320 COSTS_N_INSNS (3), /* int signed load */
321 COSTS_N_INSNS (3), /* int zeroed load */
322 COSTS_N_INSNS (2), /* float load */
323 COSTS_N_INSNS (3), /* fmov, fneg, fabs */
324 COSTS_N_INSNS (4), /* fadd, fsub */
325 COSTS_N_INSNS (5), /* fcmp */
326 COSTS_N_INSNS (3), /* fmov, fmovr */
327 COSTS_N_INSNS (4), /* fmul */
328 COSTS_N_INSNS (17), /* fdivs */
329 COSTS_N_INSNS (20), /* fdivd */
330 COSTS_N_INSNS (20), /* fsqrts */
331 COSTS_N_INSNS (29), /* fsqrtd */
332 COSTS_N_INSNS (6), /* imul */
333 COSTS_N_INSNS (6), /* imulX */
334 0, /* imul bit factor */
335 COSTS_N_INSNS (40), /* idiv */
336 COSTS_N_INSNS (71), /* idivX */
337 COSTS_N_INSNS (2), /* movcc/movr */
338 0, /* shift penalty */
339 };
340
341 static const
342 struct processor_costs niagara_costs = {
343 COSTS_N_INSNS (3), /* int load */
344 COSTS_N_INSNS (3), /* int signed load */
345 COSTS_N_INSNS (3), /* int zeroed load */
346 COSTS_N_INSNS (9), /* float load */
347 COSTS_N_INSNS (8), /* fmov, fneg, fabs */
348 COSTS_N_INSNS (8), /* fadd, fsub */
349 COSTS_N_INSNS (26), /* fcmp */
350 COSTS_N_INSNS (8), /* fmov, fmovr */
351 COSTS_N_INSNS (29), /* fmul */
352 COSTS_N_INSNS (54), /* fdivs */
353 COSTS_N_INSNS (83), /* fdivd */
354 COSTS_N_INSNS (100), /* fsqrts - not implemented in hardware */
355 COSTS_N_INSNS (100), /* fsqrtd - not implemented in hardware */
356 COSTS_N_INSNS (11), /* imul */
357 COSTS_N_INSNS (11), /* imulX */
358 0, /* imul bit factor */
359 COSTS_N_INSNS (72), /* idiv */
360 COSTS_N_INSNS (72), /* idivX */
361 COSTS_N_INSNS (1), /* movcc/movr */
362 0, /* shift penalty */
363 };
364
365 static const
366 struct processor_costs niagara2_costs = {
367 COSTS_N_INSNS (3), /* int load */
368 COSTS_N_INSNS (3), /* int signed load */
369 COSTS_N_INSNS (3), /* int zeroed load */
370 COSTS_N_INSNS (3), /* float load */
371 COSTS_N_INSNS (6), /* fmov, fneg, fabs */
372 COSTS_N_INSNS (6), /* fadd, fsub */
373 COSTS_N_INSNS (6), /* fcmp */
374 COSTS_N_INSNS (6), /* fmov, fmovr */
375 COSTS_N_INSNS (6), /* fmul */
376 COSTS_N_INSNS (19), /* fdivs */
377 COSTS_N_INSNS (33), /* fdivd */
378 COSTS_N_INSNS (19), /* fsqrts */
379 COSTS_N_INSNS (33), /* fsqrtd */
380 COSTS_N_INSNS (5), /* imul */
381 COSTS_N_INSNS (5), /* imulX */
382 0, /* imul bit factor */
383 COSTS_N_INSNS (26), /* idiv, average of 12 - 41 cycle range */
384 COSTS_N_INSNS (26), /* idivX, average of 12 - 41 cycle range */
385 COSTS_N_INSNS (1), /* movcc/movr */
386 0, /* shift penalty */
387 };
388
389 static const
390 struct processor_costs niagara3_costs = {
391 COSTS_N_INSNS (3), /* int load */
392 COSTS_N_INSNS (3), /* int signed load */
393 COSTS_N_INSNS (3), /* int zeroed load */
394 COSTS_N_INSNS (3), /* float load */
395 COSTS_N_INSNS (9), /* fmov, fneg, fabs */
396 COSTS_N_INSNS (9), /* fadd, fsub */
397 COSTS_N_INSNS (9), /* fcmp */
398 COSTS_N_INSNS (9), /* fmov, fmovr */
399 COSTS_N_INSNS (9), /* fmul */
400 COSTS_N_INSNS (23), /* fdivs */
401 COSTS_N_INSNS (37), /* fdivd */
402 COSTS_N_INSNS (23), /* fsqrts */
403 COSTS_N_INSNS (37), /* fsqrtd */
404 COSTS_N_INSNS (9), /* imul */
405 COSTS_N_INSNS (9), /* imulX */
406 0, /* imul bit factor */
407 COSTS_N_INSNS (31), /* idiv, average of 17 - 45 cycle range */
408 COSTS_N_INSNS (30), /* idivX, average of 16 - 44 cycle range */
409 COSTS_N_INSNS (1), /* movcc/movr */
410 0, /* shift penalty */
411 };
412
413 static const
414 struct processor_costs niagara4_costs = {
415 COSTS_N_INSNS (5), /* int load */
416 COSTS_N_INSNS (5), /* int signed load */
417 COSTS_N_INSNS (5), /* int zeroed load */
418 COSTS_N_INSNS (5), /* float load */
419 COSTS_N_INSNS (11), /* fmov, fneg, fabs */
420 COSTS_N_INSNS (11), /* fadd, fsub */
421 COSTS_N_INSNS (11), /* fcmp */
422 COSTS_N_INSNS (11), /* fmov, fmovr */
423 COSTS_N_INSNS (11), /* fmul */
424 COSTS_N_INSNS (24), /* fdivs */
425 COSTS_N_INSNS (37), /* fdivd */
426 COSTS_N_INSNS (24), /* fsqrts */
427 COSTS_N_INSNS (37), /* fsqrtd */
428 COSTS_N_INSNS (12), /* imul */
429 COSTS_N_INSNS (12), /* imulX */
430 0, /* imul bit factor */
431 COSTS_N_INSNS (50), /* idiv, average of 41 - 60 cycle range */
432 COSTS_N_INSNS (35), /* idivX, average of 26 - 44 cycle range */
433 COSTS_N_INSNS (1), /* movcc/movr */
434 0, /* shift penalty */
435 };
436
437 static const struct processor_costs *sparc_costs = &cypress_costs;
438
439 #ifdef HAVE_AS_RELAX_OPTION
440 /* If 'as' and 'ld' are relaxing tail call insns into branch always, use
441 "or %o7,%g0,X; call Y; or X,%g0,%o7" always, so that it can be optimized.
442 With sethi/jmp, neither 'as' nor 'ld' has an easy way how to find out if
443 somebody does not branch between the sethi and jmp. */
444 #define LEAF_SIBCALL_SLOT_RESERVED_P 1
445 #else
446 #define LEAF_SIBCALL_SLOT_RESERVED_P \
447 ((TARGET_ARCH64 && !TARGET_CM_MEDLOW) || flag_pic)
448 #endif
449
450 /* Vector to say how input registers are mapped to output registers.
451 HARD_FRAME_POINTER_REGNUM cannot be remapped by this function to
452 eliminate it. You must use -fomit-frame-pointer to get that. */
453 char leaf_reg_remap[] =
454 { 0, 1, 2, 3, 4, 5, 6, 7,
455 -1, -1, -1, -1, -1, -1, 14, -1,
456 -1, -1, -1, -1, -1, -1, -1, -1,
457 8, 9, 10, 11, 12, 13, -1, 15,
458
459 32, 33, 34, 35, 36, 37, 38, 39,
460 40, 41, 42, 43, 44, 45, 46, 47,
461 48, 49, 50, 51, 52, 53, 54, 55,
462 56, 57, 58, 59, 60, 61, 62, 63,
463 64, 65, 66, 67, 68, 69, 70, 71,
464 72, 73, 74, 75, 76, 77, 78, 79,
465 80, 81, 82, 83, 84, 85, 86, 87,
466 88, 89, 90, 91, 92, 93, 94, 95,
467 96, 97, 98, 99, 100, 101, 102};
468
469 /* Vector, indexed by hard register number, which contains 1
470 for a register that is allowable in a candidate for leaf
471 function treatment. */
472 char sparc_leaf_regs[] =
473 { 1, 1, 1, 1, 1, 1, 1, 1,
474 0, 0, 0, 0, 0, 0, 1, 0,
475 0, 0, 0, 0, 0, 0, 0, 0,
476 1, 1, 1, 1, 1, 1, 0, 1,
477 1, 1, 1, 1, 1, 1, 1, 1,
478 1, 1, 1, 1, 1, 1, 1, 1,
479 1, 1, 1, 1, 1, 1, 1, 1,
480 1, 1, 1, 1, 1, 1, 1, 1,
481 1, 1, 1, 1, 1, 1, 1, 1,
482 1, 1, 1, 1, 1, 1, 1, 1,
483 1, 1, 1, 1, 1, 1, 1, 1,
484 1, 1, 1, 1, 1, 1, 1, 1,
485 1, 1, 1, 1, 1, 1, 1};
486
487 struct GTY(()) machine_function
488 {
489 /* Size of the frame of the function. */
490 HOST_WIDE_INT frame_size;
491
492 /* Size of the frame of the function minus the register window save area
493 and the outgoing argument area. */
494 HOST_WIDE_INT apparent_frame_size;
495
496 /* Register we pretend the frame pointer is allocated to. Normally, this
497 is %fp, but if we are in a leaf procedure, this is (%sp + offset). We
498 record "offset" separately as it may be too big for (reg + disp). */
499 rtx frame_base_reg;
500 HOST_WIDE_INT frame_base_offset;
501
502 /* Some local-dynamic TLS symbol name. */
503 const char *some_ld_name;
504
505 /* Number of global or FP registers to be saved (as 4-byte quantities). */
506 int n_global_fp_regs;
507
508 /* True if the current function is leaf and uses only leaf regs,
509 so that the SPARC leaf function optimization can be applied.
510 Private version of crtl->uses_only_leaf_regs, see
511 sparc_expand_prologue for the rationale. */
512 int leaf_function_p;
513
514 /* True if the prologue saves local or in registers. */
515 bool save_local_in_regs_p;
516
517 /* True if the data calculated by sparc_expand_prologue are valid. */
518 bool prologue_data_valid_p;
519 };
520
521 #define sparc_frame_size cfun->machine->frame_size
522 #define sparc_apparent_frame_size cfun->machine->apparent_frame_size
523 #define sparc_frame_base_reg cfun->machine->frame_base_reg
524 #define sparc_frame_base_offset cfun->machine->frame_base_offset
525 #define sparc_n_global_fp_regs cfun->machine->n_global_fp_regs
526 #define sparc_leaf_function_p cfun->machine->leaf_function_p
527 #define sparc_save_local_in_regs_p cfun->machine->save_local_in_regs_p
528 #define sparc_prologue_data_valid_p cfun->machine->prologue_data_valid_p
529
530 /* 1 if the next opcode is to be specially indented. */
531 int sparc_indent_opcode = 0;
532
533 static void sparc_option_override (void);
534 static void sparc_init_modes (void);
535 static void scan_record_type (const_tree, int *, int *, int *);
536 static int function_arg_slotno (const CUMULATIVE_ARGS *, enum machine_mode,
537 const_tree, bool, bool, int *, int *);
538
539 static int supersparc_adjust_cost (rtx, rtx, rtx, int);
540 static int hypersparc_adjust_cost (rtx, rtx, rtx, int);
541
542 static void sparc_emit_set_const32 (rtx, rtx);
543 static void sparc_emit_set_const64 (rtx, rtx);
544 static void sparc_output_addr_vec (rtx);
545 static void sparc_output_addr_diff_vec (rtx);
546 static void sparc_output_deferred_case_vectors (void);
547 static bool sparc_legitimate_address_p (enum machine_mode, rtx, bool);
548 static bool sparc_legitimate_constant_p (enum machine_mode, rtx);
549 static rtx sparc_builtin_saveregs (void);
550 static int epilogue_renumber (rtx *, int);
551 static bool sparc_assemble_integer (rtx, unsigned int, int);
552 static int set_extends (rtx);
553 static void sparc_asm_function_prologue (FILE *, HOST_WIDE_INT);
554 static void sparc_asm_function_epilogue (FILE *, HOST_WIDE_INT);
555 #ifdef TARGET_SOLARIS
556 static void sparc_solaris_elf_asm_named_section (const char *, unsigned int,
557 tree) ATTRIBUTE_UNUSED;
558 #endif
559 static int sparc_adjust_cost (rtx, rtx, rtx, int);
560 static int sparc_issue_rate (void);
561 static void sparc_sched_init (FILE *, int, int);
562 static int sparc_use_sched_lookahead (void);
563
564 static void emit_soft_tfmode_libcall (const char *, int, rtx *);
565 static void emit_soft_tfmode_binop (enum rtx_code, rtx *);
566 static void emit_soft_tfmode_unop (enum rtx_code, rtx *);
567 static void emit_soft_tfmode_cvt (enum rtx_code, rtx *);
568 static void emit_hard_tfmode_operation (enum rtx_code, rtx *);
569
570 static bool sparc_function_ok_for_sibcall (tree, tree);
571 static void sparc_init_libfuncs (void);
572 static void sparc_init_builtins (void);
573 static void sparc_fpu_init_builtins (void);
574 static void sparc_vis_init_builtins (void);
575 static tree sparc_builtin_decl (unsigned, bool);
576 static rtx sparc_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
577 static tree sparc_fold_builtin (tree, int, tree *, bool);
578 static void sparc_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
579 HOST_WIDE_INT, tree);
580 static bool sparc_can_output_mi_thunk (const_tree, HOST_WIDE_INT,
581 HOST_WIDE_INT, const_tree);
582 static struct machine_function * sparc_init_machine_status (void);
583 static bool sparc_cannot_force_const_mem (enum machine_mode, rtx);
584 static rtx sparc_tls_get_addr (void);
585 static rtx sparc_tls_got (void);
586 static const char *get_some_local_dynamic_name (void);
587 static int get_some_local_dynamic_name_1 (rtx *, void *);
588 static int sparc_register_move_cost (enum machine_mode,
589 reg_class_t, reg_class_t);
590 static bool sparc_rtx_costs (rtx, int, int, int, int *, bool);
591 static rtx sparc_function_value (const_tree, const_tree, bool);
592 static rtx sparc_libcall_value (enum machine_mode, const_rtx);
593 static bool sparc_function_value_regno_p (const unsigned int);
594 static rtx sparc_struct_value_rtx (tree, int);
595 static enum machine_mode sparc_promote_function_mode (const_tree, enum machine_mode,
596 int *, const_tree, int);
597 static bool sparc_return_in_memory (const_tree, const_tree);
598 static bool sparc_strict_argument_naming (cumulative_args_t);
599 static void sparc_va_start (tree, rtx);
600 static tree sparc_gimplify_va_arg (tree, tree, gimple_seq *, gimple_seq *);
601 static bool sparc_vector_mode_supported_p (enum machine_mode);
602 static bool sparc_tls_referenced_p (rtx);
603 static rtx sparc_legitimize_tls_address (rtx);
604 static rtx sparc_legitimize_pic_address (rtx, rtx);
605 static rtx sparc_legitimize_address (rtx, rtx, enum machine_mode);
606 static rtx sparc_delegitimize_address (rtx);
607 static bool sparc_mode_dependent_address_p (const_rtx, addr_space_t);
608 static bool sparc_pass_by_reference (cumulative_args_t,
609 enum machine_mode, const_tree, bool);
610 static void sparc_function_arg_advance (cumulative_args_t,
611 enum machine_mode, const_tree, bool);
612 static rtx sparc_function_arg_1 (cumulative_args_t,
613 enum machine_mode, const_tree, bool, bool);
614 static rtx sparc_function_arg (cumulative_args_t,
615 enum machine_mode, const_tree, bool);
616 static rtx sparc_function_incoming_arg (cumulative_args_t,
617 enum machine_mode, const_tree, bool);
618 static unsigned int sparc_function_arg_boundary (enum machine_mode,
619 const_tree);
620 static int sparc_arg_partial_bytes (cumulative_args_t,
621 enum machine_mode, tree, bool);
622 static void sparc_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
623 static void sparc_file_end (void);
624 static bool sparc_frame_pointer_required (void);
625 static bool sparc_can_eliminate (const int, const int);
626 static rtx sparc_builtin_setjmp_frame_value (void);
627 static void sparc_conditional_register_usage (void);
628 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
629 static const char *sparc_mangle_type (const_tree);
630 #endif
631 static void sparc_trampoline_init (rtx, tree, rtx);
632 static enum machine_mode sparc_preferred_simd_mode (enum machine_mode);
633 static reg_class_t sparc_preferred_reload_class (rtx x, reg_class_t rclass);
634 static bool sparc_print_operand_punct_valid_p (unsigned char);
635 static void sparc_print_operand (FILE *, rtx, int);
636 static void sparc_print_operand_address (FILE *, rtx);
637 static reg_class_t sparc_secondary_reload (bool, rtx, reg_class_t,
638 enum machine_mode,
639 secondary_reload_info *);
640 static enum machine_mode sparc_cstore_mode (enum insn_code icode);
641 static void sparc_atomic_assign_expand_fenv (tree *, tree *, tree *);
642
643 #ifdef SUBTARGET_ATTRIBUTE_TABLE
644 /* Table of valid machine attributes. */
645 static const struct attribute_spec sparc_attribute_table[] =
646 {
647 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
648 do_diagnostic } */
649 SUBTARGET_ATTRIBUTE_TABLE,
650 { NULL, 0, 0, false, false, false, NULL, false }
651 };
652 #endif
653
654 /* Option handling. */
655
656 /* Parsed value. */
657 enum cmodel sparc_cmodel;
658
659 char sparc_hard_reg_printed[8];
660
661 /* Initialize the GCC target structure. */
662
663 /* The default is to use .half rather than .short for aligned HI objects. */
664 #undef TARGET_ASM_ALIGNED_HI_OP
665 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
666
667 #undef TARGET_ASM_UNALIGNED_HI_OP
668 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uahalf\t"
669 #undef TARGET_ASM_UNALIGNED_SI_OP
670 #define TARGET_ASM_UNALIGNED_SI_OP "\t.uaword\t"
671 #undef TARGET_ASM_UNALIGNED_DI_OP
672 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaxword\t"
673
674 /* The target hook has to handle DI-mode values. */
675 #undef TARGET_ASM_INTEGER
676 #define TARGET_ASM_INTEGER sparc_assemble_integer
677
678 #undef TARGET_ASM_FUNCTION_PROLOGUE
679 #define TARGET_ASM_FUNCTION_PROLOGUE sparc_asm_function_prologue
680 #undef TARGET_ASM_FUNCTION_EPILOGUE
681 #define TARGET_ASM_FUNCTION_EPILOGUE sparc_asm_function_epilogue
682
683 #undef TARGET_SCHED_ADJUST_COST
684 #define TARGET_SCHED_ADJUST_COST sparc_adjust_cost
685 #undef TARGET_SCHED_ISSUE_RATE
686 #define TARGET_SCHED_ISSUE_RATE sparc_issue_rate
687 #undef TARGET_SCHED_INIT
688 #define TARGET_SCHED_INIT sparc_sched_init
689 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
690 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD sparc_use_sched_lookahead
691
692 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
693 #define TARGET_FUNCTION_OK_FOR_SIBCALL sparc_function_ok_for_sibcall
694
695 #undef TARGET_INIT_LIBFUNCS
696 #define TARGET_INIT_LIBFUNCS sparc_init_libfuncs
697
698 #undef TARGET_LEGITIMIZE_ADDRESS
699 #define TARGET_LEGITIMIZE_ADDRESS sparc_legitimize_address
700 #undef TARGET_DELEGITIMIZE_ADDRESS
701 #define TARGET_DELEGITIMIZE_ADDRESS sparc_delegitimize_address
702 #undef TARGET_MODE_DEPENDENT_ADDRESS_P
703 #define TARGET_MODE_DEPENDENT_ADDRESS_P sparc_mode_dependent_address_p
704
705 #undef TARGET_INIT_BUILTINS
706 #define TARGET_INIT_BUILTINS sparc_init_builtins
707 #undef TARGET_BUILTIN_DECL
708 #define TARGET_BUILTIN_DECL sparc_builtin_decl
709 #undef TARGET_EXPAND_BUILTIN
710 #define TARGET_EXPAND_BUILTIN sparc_expand_builtin
711 #undef TARGET_FOLD_BUILTIN
712 #define TARGET_FOLD_BUILTIN sparc_fold_builtin
713
714 #if TARGET_TLS
715 #undef TARGET_HAVE_TLS
716 #define TARGET_HAVE_TLS true
717 #endif
718
719 #undef TARGET_CANNOT_FORCE_CONST_MEM
720 #define TARGET_CANNOT_FORCE_CONST_MEM sparc_cannot_force_const_mem
721
722 #undef TARGET_ASM_OUTPUT_MI_THUNK
723 #define TARGET_ASM_OUTPUT_MI_THUNK sparc_output_mi_thunk
724 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
725 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK sparc_can_output_mi_thunk
726
727 #undef TARGET_RTX_COSTS
728 #define TARGET_RTX_COSTS sparc_rtx_costs
729 #undef TARGET_ADDRESS_COST
730 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
731 #undef TARGET_REGISTER_MOVE_COST
732 #define TARGET_REGISTER_MOVE_COST sparc_register_move_cost
733
734 #undef TARGET_PROMOTE_FUNCTION_MODE
735 #define TARGET_PROMOTE_FUNCTION_MODE sparc_promote_function_mode
736
737 #undef TARGET_FUNCTION_VALUE
738 #define TARGET_FUNCTION_VALUE sparc_function_value
739 #undef TARGET_LIBCALL_VALUE
740 #define TARGET_LIBCALL_VALUE sparc_libcall_value
741 #undef TARGET_FUNCTION_VALUE_REGNO_P
742 #define TARGET_FUNCTION_VALUE_REGNO_P sparc_function_value_regno_p
743
744 #undef TARGET_STRUCT_VALUE_RTX
745 #define TARGET_STRUCT_VALUE_RTX sparc_struct_value_rtx
746 #undef TARGET_RETURN_IN_MEMORY
747 #define TARGET_RETURN_IN_MEMORY sparc_return_in_memory
748 #undef TARGET_MUST_PASS_IN_STACK
749 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
750 #undef TARGET_PASS_BY_REFERENCE
751 #define TARGET_PASS_BY_REFERENCE sparc_pass_by_reference
752 #undef TARGET_ARG_PARTIAL_BYTES
753 #define TARGET_ARG_PARTIAL_BYTES sparc_arg_partial_bytes
754 #undef TARGET_FUNCTION_ARG_ADVANCE
755 #define TARGET_FUNCTION_ARG_ADVANCE sparc_function_arg_advance
756 #undef TARGET_FUNCTION_ARG
757 #define TARGET_FUNCTION_ARG sparc_function_arg
758 #undef TARGET_FUNCTION_INCOMING_ARG
759 #define TARGET_FUNCTION_INCOMING_ARG sparc_function_incoming_arg
760 #undef TARGET_FUNCTION_ARG_BOUNDARY
761 #define TARGET_FUNCTION_ARG_BOUNDARY sparc_function_arg_boundary
762
763 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
764 #define TARGET_EXPAND_BUILTIN_SAVEREGS sparc_builtin_saveregs
765 #undef TARGET_STRICT_ARGUMENT_NAMING
766 #define TARGET_STRICT_ARGUMENT_NAMING sparc_strict_argument_naming
767
768 #undef TARGET_EXPAND_BUILTIN_VA_START
769 #define TARGET_EXPAND_BUILTIN_VA_START sparc_va_start
770 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
771 #define TARGET_GIMPLIFY_VA_ARG_EXPR sparc_gimplify_va_arg
772
773 #undef TARGET_VECTOR_MODE_SUPPORTED_P
774 #define TARGET_VECTOR_MODE_SUPPORTED_P sparc_vector_mode_supported_p
775
776 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
777 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE sparc_preferred_simd_mode
778
779 #ifdef SUBTARGET_INSERT_ATTRIBUTES
780 #undef TARGET_INSERT_ATTRIBUTES
781 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
782 #endif
783
784 #ifdef SUBTARGET_ATTRIBUTE_TABLE
785 #undef TARGET_ATTRIBUTE_TABLE
786 #define TARGET_ATTRIBUTE_TABLE sparc_attribute_table
787 #endif
788
789 #undef TARGET_RELAXED_ORDERING
790 #define TARGET_RELAXED_ORDERING SPARC_RELAXED_ORDERING
791
792 #undef TARGET_OPTION_OVERRIDE
793 #define TARGET_OPTION_OVERRIDE sparc_option_override
794
795 #if TARGET_GNU_TLS && defined(HAVE_AS_SPARC_UA_PCREL)
796 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
797 #define TARGET_ASM_OUTPUT_DWARF_DTPREL sparc_output_dwarf_dtprel
798 #endif
799
800 #undef TARGET_ASM_FILE_END
801 #define TARGET_ASM_FILE_END sparc_file_end
802
803 #undef TARGET_FRAME_POINTER_REQUIRED
804 #define TARGET_FRAME_POINTER_REQUIRED sparc_frame_pointer_required
805
806 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
807 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE sparc_builtin_setjmp_frame_value
808
809 #undef TARGET_CAN_ELIMINATE
810 #define TARGET_CAN_ELIMINATE sparc_can_eliminate
811
812 #undef TARGET_PREFERRED_RELOAD_CLASS
813 #define TARGET_PREFERRED_RELOAD_CLASS sparc_preferred_reload_class
814
815 #undef TARGET_SECONDARY_RELOAD
816 #define TARGET_SECONDARY_RELOAD sparc_secondary_reload
817
818 #undef TARGET_CONDITIONAL_REGISTER_USAGE
819 #define TARGET_CONDITIONAL_REGISTER_USAGE sparc_conditional_register_usage
820
821 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
822 #undef TARGET_MANGLE_TYPE
823 #define TARGET_MANGLE_TYPE sparc_mangle_type
824 #endif
825
826 #undef TARGET_LEGITIMATE_ADDRESS_P
827 #define TARGET_LEGITIMATE_ADDRESS_P sparc_legitimate_address_p
828
829 #undef TARGET_LEGITIMATE_CONSTANT_P
830 #define TARGET_LEGITIMATE_CONSTANT_P sparc_legitimate_constant_p
831
832 #undef TARGET_TRAMPOLINE_INIT
833 #define TARGET_TRAMPOLINE_INIT sparc_trampoline_init
834
835 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
836 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P sparc_print_operand_punct_valid_p
837 #undef TARGET_PRINT_OPERAND
838 #define TARGET_PRINT_OPERAND sparc_print_operand
839 #undef TARGET_PRINT_OPERAND_ADDRESS
840 #define TARGET_PRINT_OPERAND_ADDRESS sparc_print_operand_address
841
842 /* The value stored by LDSTUB. */
843 #undef TARGET_ATOMIC_TEST_AND_SET_TRUEVAL
844 #define TARGET_ATOMIC_TEST_AND_SET_TRUEVAL 0xff
845
846 #undef TARGET_CSTORE_MODE
847 #define TARGET_CSTORE_MODE sparc_cstore_mode
848
849 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
850 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV sparc_atomic_assign_expand_fenv
851
852 struct gcc_target targetm = TARGET_INITIALIZER;
853
854 /* Return the memory reference contained in X if any, zero otherwise. */
855
856 static rtx
mem_ref(rtx x)857 mem_ref (rtx x)
858 {
859 if (GET_CODE (x) == SIGN_EXTEND || GET_CODE (x) == ZERO_EXTEND)
860 x = XEXP (x, 0);
861
862 if (MEM_P (x))
863 return x;
864
865 return NULL_RTX;
866 }
867
868 /* We use a machine specific pass to enable workarounds for errata.
869 We need to have the (essentially) final form of the insn stream in order
870 to properly detect the various hazards. Therefore, this machine specific
871 pass runs as late as possible. The pass is inserted in the pass pipeline
872 at the end of sparc_option_override. */
873
874 static bool
sparc_gate_work_around_errata(void)875 sparc_gate_work_around_errata (void)
876 {
877 /* The only errata we handle are those of the AT697F and UT699. */
878 return sparc_fix_at697f != 0 || sparc_fix_ut699 != 0;
879 }
880
881 static unsigned int
sparc_do_work_around_errata(void)882 sparc_do_work_around_errata (void)
883 {
884 rtx insn, next;
885
886 /* Force all instructions to be split into their final form. */
887 split_all_insns_noflow ();
888
889 /* Now look for specific patterns in the insn stream. */
890 for (insn = get_insns (); insn; insn = next)
891 {
892 bool insert_nop = false;
893 rtx set;
894
895 /* Look into the instruction in a delay slot. */
896 if (NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == SEQUENCE)
897 insn = XVECEXP (PATTERN (insn), 0, 1);
898
899 /* Look for a single-word load into an odd-numbered FP register. */
900 if (sparc_fix_at697f
901 && NONJUMP_INSN_P (insn)
902 && (set = single_set (insn)) != NULL_RTX
903 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
904 && MEM_P (SET_SRC (set))
905 && REG_P (SET_DEST (set))
906 && REGNO (SET_DEST (set)) > 31
907 && REGNO (SET_DEST (set)) % 2 != 0)
908 {
909 /* The wrong dependency is on the enclosing double register. */
910 const unsigned int x = REGNO (SET_DEST (set)) - 1;
911 unsigned int src1, src2, dest;
912 int code;
913
914 next = next_active_insn (insn);
915 if (!next)
916 break;
917 /* If the insn is a branch, then it cannot be problematic. */
918 if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE)
919 continue;
920
921 extract_insn (next);
922 code = INSN_CODE (next);
923
924 switch (code)
925 {
926 case CODE_FOR_adddf3:
927 case CODE_FOR_subdf3:
928 case CODE_FOR_muldf3:
929 case CODE_FOR_divdf3:
930 dest = REGNO (recog_data.operand[0]);
931 src1 = REGNO (recog_data.operand[1]);
932 src2 = REGNO (recog_data.operand[2]);
933 if (src1 != src2)
934 {
935 /* Case [1-4]:
936 ld [address], %fx+1
937 FPOPd %f{x,y}, %f{y,x}, %f{x,y} */
938 if ((src1 == x || src2 == x)
939 && (dest == src1 || dest == src2))
940 insert_nop = true;
941 }
942 else
943 {
944 /* Case 5:
945 ld [address], %fx+1
946 FPOPd %fx, %fx, %fx */
947 if (src1 == x
948 && dest == src1
949 && (code == CODE_FOR_adddf3 || code == CODE_FOR_muldf3))
950 insert_nop = true;
951 }
952 break;
953
954 case CODE_FOR_sqrtdf2:
955 dest = REGNO (recog_data.operand[0]);
956 src1 = REGNO (recog_data.operand[1]);
957 /* Case 6:
958 ld [address], %fx+1
959 fsqrtd %fx, %fx */
960 if (src1 == x && dest == src1)
961 insert_nop = true;
962 break;
963
964 default:
965 break;
966 }
967 }
968
969 /* Look for a single-word load into an integer register. */
970 else if (sparc_fix_ut699
971 && NONJUMP_INSN_P (insn)
972 && (set = single_set (insn)) != NULL_RTX
973 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) <= 4
974 && mem_ref (SET_SRC (set)) != NULL_RTX
975 && REG_P (SET_DEST (set))
976 && REGNO (SET_DEST (set)) < 32)
977 {
978 /* There is no problem if the second memory access has a data
979 dependency on the first single-cycle load. */
980 rtx x = SET_DEST (set);
981
982 next = next_active_insn (insn);
983 if (!next)
984 break;
985 /* If the insn is a branch, then it cannot be problematic. */
986 if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE)
987 continue;
988
989 /* Look for a second memory access to/from an integer register. */
990 if ((set = single_set (next)) != NULL_RTX)
991 {
992 rtx src = SET_SRC (set);
993 rtx dest = SET_DEST (set);
994 rtx mem;
995
996 /* LDD is affected. */
997 if ((mem = mem_ref (src)) != NULL_RTX
998 && REG_P (dest)
999 && REGNO (dest) < 32
1000 && !reg_mentioned_p (x, XEXP (mem, 0)))
1001 insert_nop = true;
1002
1003 /* STD is *not* affected. */
1004 else if (MEM_P (dest)
1005 && GET_MODE_SIZE (GET_MODE (dest)) <= 4
1006 && (src == CONST0_RTX (GET_MODE (dest))
1007 || (REG_P (src)
1008 && REGNO (src) < 32
1009 && REGNO (src) != REGNO (x)))
1010 && !reg_mentioned_p (x, XEXP (dest, 0)))
1011 insert_nop = true;
1012 }
1013 }
1014
1015 /* Look for a single-word load/operation into an FP register. */
1016 else if (sparc_fix_ut699
1017 && NONJUMP_INSN_P (insn)
1018 && (set = single_set (insn)) != NULL_RTX
1019 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
1020 && REG_P (SET_DEST (set))
1021 && REGNO (SET_DEST (set)) > 31)
1022 {
1023 /* Number of instructions in the problematic window. */
1024 const int n_insns = 4;
1025 /* The problematic combination is with the sibling FP register. */
1026 const unsigned int x = REGNO (SET_DEST (set));
1027 const unsigned int y = x ^ 1;
1028 rtx after;
1029 int i;
1030
1031 next = next_active_insn (insn);
1032 if (!next)
1033 break;
1034 /* If the insn is a branch, then it cannot be problematic. */
1035 if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE)
1036 continue;
1037
1038 /* Look for a second load/operation into the sibling FP register. */
1039 if (!((set = single_set (next)) != NULL_RTX
1040 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
1041 && REG_P (SET_DEST (set))
1042 && REGNO (SET_DEST (set)) == y))
1043 continue;
1044
1045 /* Look for a (possible) store from the FP register in the next N
1046 instructions, but bail out if it is again modified or if there
1047 is a store from the sibling FP register before this store. */
1048 for (after = next, i = 0; i < n_insns; i++)
1049 {
1050 bool branch_p;
1051
1052 after = next_active_insn (after);
1053 if (!after)
1054 break;
1055
1056 /* This is a branch with an empty delay slot. */
1057 if (!NONJUMP_INSN_P (after))
1058 {
1059 if (++i == n_insns)
1060 break;
1061 branch_p = true;
1062 after = NULL_RTX;
1063 }
1064 /* This is a branch with a filled delay slot. */
1065 else if (GET_CODE (PATTERN (after)) == SEQUENCE)
1066 {
1067 if (++i == n_insns)
1068 break;
1069 branch_p = true;
1070 after = XVECEXP (PATTERN (after), 0, 1);
1071 }
1072 /* This is a regular instruction. */
1073 else
1074 branch_p = false;
1075
1076 if (after && (set = single_set (after)) != NULL_RTX)
1077 {
1078 const rtx src = SET_SRC (set);
1079 const rtx dest = SET_DEST (set);
1080 const unsigned int size = GET_MODE_SIZE (GET_MODE (dest));
1081
1082 /* If the FP register is again modified before the store,
1083 then the store isn't affected. */
1084 if (REG_P (dest)
1085 && (REGNO (dest) == x
1086 || (REGNO (dest) == y && size == 8)))
1087 break;
1088
1089 if (MEM_P (dest) && REG_P (src))
1090 {
1091 /* If there is a store from the sibling FP register
1092 before the store, then the store is not affected. */
1093 if (REGNO (src) == y || (REGNO (src) == x && size == 8))
1094 break;
1095
1096 /* Otherwise, the store is affected. */
1097 if (REGNO (src) == x && size == 4)
1098 {
1099 insert_nop = true;
1100 break;
1101 }
1102 }
1103 }
1104
1105 /* If we have a branch in the first M instructions, then we
1106 cannot see the (M+2)th instruction so we play safe. */
1107 if (branch_p && i <= (n_insns - 2))
1108 {
1109 insert_nop = true;
1110 break;
1111 }
1112 }
1113 }
1114
1115 else
1116 next = NEXT_INSN (insn);
1117
1118 if (insert_nop)
1119 emit_insn_before (gen_nop (), next);
1120 }
1121
1122 return 0;
1123 }
1124
1125 namespace {
1126
1127 const pass_data pass_data_work_around_errata =
1128 {
1129 RTL_PASS, /* type */
1130 "errata", /* name */
1131 OPTGROUP_NONE, /* optinfo_flags */
1132 true, /* has_gate */
1133 true, /* has_execute */
1134 TV_MACH_DEP, /* tv_id */
1135 0, /* properties_required */
1136 0, /* properties_provided */
1137 0, /* properties_destroyed */
1138 0, /* todo_flags_start */
1139 TODO_verify_rtl_sharing, /* todo_flags_finish */
1140 };
1141
1142 class pass_work_around_errata : public rtl_opt_pass
1143 {
1144 public:
pass_work_around_errata(gcc::context * ctxt)1145 pass_work_around_errata(gcc::context *ctxt)
1146 : rtl_opt_pass(pass_data_work_around_errata, ctxt)
1147 {}
1148
1149 /* opt_pass methods: */
gate()1150 bool gate () { return sparc_gate_work_around_errata (); }
execute()1151 unsigned int execute () { return sparc_do_work_around_errata (); }
1152
1153 }; // class pass_work_around_errata
1154
1155 } // anon namespace
1156
1157 rtl_opt_pass *
make_pass_work_around_errata(gcc::context * ctxt)1158 make_pass_work_around_errata (gcc::context *ctxt)
1159 {
1160 return new pass_work_around_errata (ctxt);
1161 }
1162
1163 /* Helpers for TARGET_DEBUG_OPTIONS. */
1164 static void
dump_target_flag_bits(const int flags)1165 dump_target_flag_bits (const int flags)
1166 {
1167 if (flags & MASK_64BIT)
1168 fprintf (stderr, "64BIT ");
1169 if (flags & MASK_APP_REGS)
1170 fprintf (stderr, "APP_REGS ");
1171 if (flags & MASK_FASTER_STRUCTS)
1172 fprintf (stderr, "FASTER_STRUCTS ");
1173 if (flags & MASK_FLAT)
1174 fprintf (stderr, "FLAT ");
1175 if (flags & MASK_FMAF)
1176 fprintf (stderr, "FMAF ");
1177 if (flags & MASK_FPU)
1178 fprintf (stderr, "FPU ");
1179 if (flags & MASK_HARD_QUAD)
1180 fprintf (stderr, "HARD_QUAD ");
1181 if (flags & MASK_POPC)
1182 fprintf (stderr, "POPC ");
1183 if (flags & MASK_PTR64)
1184 fprintf (stderr, "PTR64 ");
1185 if (flags & MASK_STACK_BIAS)
1186 fprintf (stderr, "STACK_BIAS ");
1187 if (flags & MASK_UNALIGNED_DOUBLES)
1188 fprintf (stderr, "UNALIGNED_DOUBLES ");
1189 if (flags & MASK_V8PLUS)
1190 fprintf (stderr, "V8PLUS ");
1191 if (flags & MASK_VIS)
1192 fprintf (stderr, "VIS ");
1193 if (flags & MASK_VIS2)
1194 fprintf (stderr, "VIS2 ");
1195 if (flags & MASK_VIS3)
1196 fprintf (stderr, "VIS3 ");
1197 if (flags & MASK_CBCOND)
1198 fprintf (stderr, "CBCOND ");
1199 if (flags & MASK_DEPRECATED_V8_INSNS)
1200 fprintf (stderr, "DEPRECATED_V8_INSNS ");
1201 if (flags & MASK_SPARCLET)
1202 fprintf (stderr, "SPARCLET ");
1203 if (flags & MASK_SPARCLITE)
1204 fprintf (stderr, "SPARCLITE ");
1205 if (flags & MASK_V8)
1206 fprintf (stderr, "V8 ");
1207 if (flags & MASK_V9)
1208 fprintf (stderr, "V9 ");
1209 }
1210
1211 static void
dump_target_flags(const char * prefix,const int flags)1212 dump_target_flags (const char *prefix, const int flags)
1213 {
1214 fprintf (stderr, "%s: (%08x) [ ", prefix, flags);
1215 dump_target_flag_bits (flags);
1216 fprintf(stderr, "]\n");
1217 }
1218
1219 /* Validate and override various options, and do some machine dependent
1220 initialization. */
1221
1222 static void
sparc_option_override(void)1223 sparc_option_override (void)
1224 {
1225 static struct code_model {
1226 const char *const name;
1227 const enum cmodel value;
1228 } const cmodels[] = {
1229 { "32", CM_32 },
1230 { "medlow", CM_MEDLOW },
1231 { "medmid", CM_MEDMID },
1232 { "medany", CM_MEDANY },
1233 { "embmedany", CM_EMBMEDANY },
1234 { NULL, (enum cmodel) 0 }
1235 };
1236 const struct code_model *cmodel;
1237 /* Map TARGET_CPU_DEFAULT to value for -m{cpu,tune}=. */
1238 static struct cpu_default {
1239 const int cpu;
1240 const enum processor_type processor;
1241 } const cpu_default[] = {
1242 /* There must be one entry here for each TARGET_CPU value. */
1243 { TARGET_CPU_sparc, PROCESSOR_CYPRESS },
1244 { TARGET_CPU_v8, PROCESSOR_V8 },
1245 { TARGET_CPU_supersparc, PROCESSOR_SUPERSPARC },
1246 { TARGET_CPU_hypersparc, PROCESSOR_HYPERSPARC },
1247 { TARGET_CPU_leon, PROCESSOR_LEON },
1248 { TARGET_CPU_leon3, PROCESSOR_LEON3 },
1249 { TARGET_CPU_sparclite, PROCESSOR_F930 },
1250 { TARGET_CPU_sparclite86x, PROCESSOR_SPARCLITE86X },
1251 { TARGET_CPU_sparclet, PROCESSOR_TSC701 },
1252 { TARGET_CPU_v9, PROCESSOR_V9 },
1253 { TARGET_CPU_ultrasparc, PROCESSOR_ULTRASPARC },
1254 { TARGET_CPU_ultrasparc3, PROCESSOR_ULTRASPARC3 },
1255 { TARGET_CPU_niagara, PROCESSOR_NIAGARA },
1256 { TARGET_CPU_niagara2, PROCESSOR_NIAGARA2 },
1257 { TARGET_CPU_niagara3, PROCESSOR_NIAGARA3 },
1258 { TARGET_CPU_niagara4, PROCESSOR_NIAGARA4 },
1259 { -1, PROCESSOR_V7 }
1260 };
1261 const struct cpu_default *def;
1262 /* Table of values for -m{cpu,tune}=. This must match the order of
1263 the enum processor_type in sparc-opts.h. */
1264 static struct cpu_table {
1265 const char *const name;
1266 const int disable;
1267 const int enable;
1268 } const cpu_table[] = {
1269 { "v7", MASK_ISA, 0 },
1270 { "cypress", MASK_ISA, 0 },
1271 { "v8", MASK_ISA, MASK_V8 },
1272 /* TI TMS390Z55 supersparc */
1273 { "supersparc", MASK_ISA, MASK_V8 },
1274 { "hypersparc", MASK_ISA, MASK_V8|MASK_FPU },
1275 { "leon", MASK_ISA, MASK_V8|MASK_LEON|MASK_FPU },
1276 { "leon3", MASK_ISA, MASK_V8|MASK_LEON3|MASK_FPU },
1277 { "sparclite", MASK_ISA, MASK_SPARCLITE },
1278 /* The Fujitsu MB86930 is the original sparclite chip, with no FPU. */
1279 { "f930", MASK_ISA|MASK_FPU, MASK_SPARCLITE },
1280 /* The Fujitsu MB86934 is the recent sparclite chip, with an FPU. */
1281 { "f934", MASK_ISA, MASK_SPARCLITE|MASK_FPU },
1282 { "sparclite86x", MASK_ISA|MASK_FPU, MASK_SPARCLITE },
1283 { "sparclet", MASK_ISA, MASK_SPARCLET },
1284 /* TEMIC sparclet */
1285 { "tsc701", MASK_ISA, MASK_SPARCLET },
1286 { "v9", MASK_ISA, MASK_V9 },
1287 /* UltraSPARC I, II, IIi */
1288 { "ultrasparc", MASK_ISA,
1289 /* Although insns using %y are deprecated, it is a clear win. */
1290 MASK_V9|MASK_DEPRECATED_V8_INSNS },
1291 /* UltraSPARC III */
1292 /* ??? Check if %y issue still holds true. */
1293 { "ultrasparc3", MASK_ISA,
1294 MASK_V9|MASK_DEPRECATED_V8_INSNS|MASK_VIS2 },
1295 /* UltraSPARC T1 */
1296 { "niagara", MASK_ISA,
1297 MASK_V9|MASK_DEPRECATED_V8_INSNS },
1298 /* UltraSPARC T2 */
1299 { "niagara2", MASK_ISA,
1300 MASK_V9|MASK_POPC|MASK_VIS2 },
1301 /* UltraSPARC T3 */
1302 { "niagara3", MASK_ISA,
1303 MASK_V9|MASK_POPC|MASK_VIS2|MASK_VIS3|MASK_FMAF },
1304 /* UltraSPARC T4 */
1305 { "niagara4", MASK_ISA,
1306 MASK_V9|MASK_POPC|MASK_VIS2|MASK_VIS3|MASK_FMAF|MASK_CBCOND },
1307 };
1308 const struct cpu_table *cpu;
1309 unsigned int i;
1310 int fpu;
1311
1312 if (sparc_debug_string != NULL)
1313 {
1314 const char *q;
1315 char *p;
1316
1317 p = ASTRDUP (sparc_debug_string);
1318 while ((q = strtok (p, ",")) != NULL)
1319 {
1320 bool invert;
1321 int mask;
1322
1323 p = NULL;
1324 if (*q == '!')
1325 {
1326 invert = true;
1327 q++;
1328 }
1329 else
1330 invert = false;
1331
1332 if (! strcmp (q, "all"))
1333 mask = MASK_DEBUG_ALL;
1334 else if (! strcmp (q, "options"))
1335 mask = MASK_DEBUG_OPTIONS;
1336 else
1337 error ("unknown -mdebug-%s switch", q);
1338
1339 if (invert)
1340 sparc_debug &= ~mask;
1341 else
1342 sparc_debug |= mask;
1343 }
1344 }
1345
1346 if (TARGET_DEBUG_OPTIONS)
1347 {
1348 dump_target_flags("Initial target_flags", target_flags);
1349 dump_target_flags("target_flags_explicit", target_flags_explicit);
1350 }
1351
1352 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1353 SUBTARGET_OVERRIDE_OPTIONS;
1354 #endif
1355
1356 #ifndef SPARC_BI_ARCH
1357 /* Check for unsupported architecture size. */
1358 if (! TARGET_64BIT != DEFAULT_ARCH32_P)
1359 error ("%s is not supported by this configuration",
1360 DEFAULT_ARCH32_P ? "-m64" : "-m32");
1361 #endif
1362
1363 /* We force all 64bit archs to use 128 bit long double */
1364 if (TARGET_64BIT && ! TARGET_LONG_DOUBLE_128)
1365 {
1366 error ("-mlong-double-64 not allowed with -m64");
1367 target_flags |= MASK_LONG_DOUBLE_128;
1368 }
1369
1370 /* Code model selection. */
1371 sparc_cmodel = SPARC_DEFAULT_CMODEL;
1372
1373 #ifdef SPARC_BI_ARCH
1374 if (TARGET_ARCH32)
1375 sparc_cmodel = CM_32;
1376 #endif
1377
1378 if (sparc_cmodel_string != NULL)
1379 {
1380 if (TARGET_ARCH64)
1381 {
1382 for (cmodel = &cmodels[0]; cmodel->name; cmodel++)
1383 if (strcmp (sparc_cmodel_string, cmodel->name) == 0)
1384 break;
1385 if (cmodel->name == NULL)
1386 error ("bad value (%s) for -mcmodel= switch", sparc_cmodel_string);
1387 else
1388 sparc_cmodel = cmodel->value;
1389 }
1390 else
1391 error ("-mcmodel= is not supported on 32 bit systems");
1392 }
1393
1394 /* Check that -fcall-saved-REG wasn't specified for out registers. */
1395 for (i = 8; i < 16; i++)
1396 if (!call_used_regs [i])
1397 {
1398 error ("-fcall-saved-REG is not supported for out registers");
1399 call_used_regs [i] = 1;
1400 }
1401
1402 fpu = target_flags & MASK_FPU; /* save current -mfpu status */
1403
1404 /* Set the default CPU. */
1405 if (!global_options_set.x_sparc_cpu_and_features)
1406 {
1407 for (def = &cpu_default[0]; def->cpu != -1; ++def)
1408 if (def->cpu == TARGET_CPU_DEFAULT)
1409 break;
1410 gcc_assert (def->cpu != -1);
1411 sparc_cpu_and_features = def->processor;
1412 }
1413
1414 if (!global_options_set.x_sparc_cpu)
1415 sparc_cpu = sparc_cpu_and_features;
1416
1417 cpu = &cpu_table[(int) sparc_cpu_and_features];
1418
1419 if (TARGET_DEBUG_OPTIONS)
1420 {
1421 fprintf (stderr, "sparc_cpu_and_features: %s\n", cpu->name);
1422 fprintf (stderr, "sparc_cpu: %s\n",
1423 cpu_table[(int) sparc_cpu].name);
1424 dump_target_flags ("cpu->disable", cpu->disable);
1425 dump_target_flags ("cpu->enable", cpu->enable);
1426 }
1427
1428 target_flags &= ~cpu->disable;
1429 target_flags |= (cpu->enable
1430 #ifndef HAVE_AS_FMAF_HPC_VIS3
1431 & ~(MASK_FMAF | MASK_VIS3)
1432 #endif
1433 #ifndef HAVE_AS_SPARC4
1434 & ~MASK_CBCOND
1435 #endif
1436 #ifndef HAVE_AS_LEON
1437 & ~(MASK_LEON | MASK_LEON3)
1438 #endif
1439 );
1440
1441 /* If -mfpu or -mno-fpu was explicitly used, don't override with
1442 the processor default. */
1443 if (target_flags_explicit & MASK_FPU)
1444 target_flags = (target_flags & ~MASK_FPU) | fpu;
1445
1446 /* -mvis2 implies -mvis */
1447 if (TARGET_VIS2)
1448 target_flags |= MASK_VIS;
1449
1450 /* -mvis3 implies -mvis2 and -mvis */
1451 if (TARGET_VIS3)
1452 target_flags |= MASK_VIS2 | MASK_VIS;
1453
1454 /* Don't allow -mvis, -mvis2, -mvis3, or -mfmaf if FPU is
1455 disabled. */
1456 if (! TARGET_FPU)
1457 target_flags &= ~(MASK_VIS | MASK_VIS2 | MASK_VIS3 | MASK_FMAF);
1458
1459 /* -mvis assumes UltraSPARC+, so we are sure v9 instructions
1460 are available.
1461 -m64 also implies v9. */
1462 if (TARGET_VIS || TARGET_ARCH64)
1463 {
1464 target_flags |= MASK_V9;
1465 target_flags &= ~(MASK_V8 | MASK_SPARCLET | MASK_SPARCLITE);
1466 }
1467
1468 /* -mvis also implies -mv8plus on 32-bit */
1469 if (TARGET_VIS && ! TARGET_ARCH64)
1470 target_flags |= MASK_V8PLUS;
1471
1472 /* Use the deprecated v8 insns for sparc64 in 32 bit mode. */
1473 if (TARGET_V9 && TARGET_ARCH32)
1474 target_flags |= MASK_DEPRECATED_V8_INSNS;
1475
1476 /* V8PLUS requires V9, makes no sense in 64 bit mode. */
1477 if (! TARGET_V9 || TARGET_ARCH64)
1478 target_flags &= ~MASK_V8PLUS;
1479
1480 /* Don't use stack biasing in 32 bit mode. */
1481 if (TARGET_ARCH32)
1482 target_flags &= ~MASK_STACK_BIAS;
1483
1484 /* Supply a default value for align_functions. */
1485 if (align_functions == 0
1486 && (sparc_cpu == PROCESSOR_ULTRASPARC
1487 || sparc_cpu == PROCESSOR_ULTRASPARC3
1488 || sparc_cpu == PROCESSOR_NIAGARA
1489 || sparc_cpu == PROCESSOR_NIAGARA2
1490 || sparc_cpu == PROCESSOR_NIAGARA3
1491 || sparc_cpu == PROCESSOR_NIAGARA4))
1492 align_functions = 32;
1493
1494 /* Validate PCC_STRUCT_RETURN. */
1495 if (flag_pcc_struct_return == DEFAULT_PCC_STRUCT_RETURN)
1496 flag_pcc_struct_return = (TARGET_ARCH64 ? 0 : 1);
1497
1498 /* Only use .uaxword when compiling for a 64-bit target. */
1499 if (!TARGET_ARCH64)
1500 targetm.asm_out.unaligned_op.di = NULL;
1501
1502 /* Do various machine dependent initializations. */
1503 sparc_init_modes ();
1504
1505 /* Set up function hooks. */
1506 init_machine_status = sparc_init_machine_status;
1507
1508 switch (sparc_cpu)
1509 {
1510 case PROCESSOR_V7:
1511 case PROCESSOR_CYPRESS:
1512 sparc_costs = &cypress_costs;
1513 break;
1514 case PROCESSOR_V8:
1515 case PROCESSOR_SPARCLITE:
1516 case PROCESSOR_SUPERSPARC:
1517 sparc_costs = &supersparc_costs;
1518 break;
1519 case PROCESSOR_F930:
1520 case PROCESSOR_F934:
1521 case PROCESSOR_HYPERSPARC:
1522 case PROCESSOR_SPARCLITE86X:
1523 sparc_costs = &hypersparc_costs;
1524 break;
1525 case PROCESSOR_LEON:
1526 sparc_costs = &leon_costs;
1527 break;
1528 case PROCESSOR_LEON3:
1529 sparc_costs = &leon3_costs;
1530 break;
1531 case PROCESSOR_SPARCLET:
1532 case PROCESSOR_TSC701:
1533 sparc_costs = &sparclet_costs;
1534 break;
1535 case PROCESSOR_V9:
1536 case PROCESSOR_ULTRASPARC:
1537 sparc_costs = &ultrasparc_costs;
1538 break;
1539 case PROCESSOR_ULTRASPARC3:
1540 sparc_costs = &ultrasparc3_costs;
1541 break;
1542 case PROCESSOR_NIAGARA:
1543 sparc_costs = &niagara_costs;
1544 break;
1545 case PROCESSOR_NIAGARA2:
1546 sparc_costs = &niagara2_costs;
1547 break;
1548 case PROCESSOR_NIAGARA3:
1549 sparc_costs = &niagara3_costs;
1550 break;
1551 case PROCESSOR_NIAGARA4:
1552 sparc_costs = &niagara4_costs;
1553 break;
1554 case PROCESSOR_NATIVE:
1555 gcc_unreachable ();
1556 };
1557
1558 if (sparc_memory_model == SMM_DEFAULT)
1559 {
1560 /* Choose the memory model for the operating system. */
1561 enum sparc_memory_model_type os_default = SUBTARGET_DEFAULT_MEMORY_MODEL;
1562 if (os_default != SMM_DEFAULT)
1563 sparc_memory_model = os_default;
1564 /* Choose the most relaxed model for the processor. */
1565 else if (TARGET_V9)
1566 sparc_memory_model = SMM_RMO;
1567 else if (TARGET_LEON3)
1568 sparc_memory_model = SMM_TSO;
1569 else if (TARGET_LEON)
1570 sparc_memory_model = SMM_SC;
1571 else if (TARGET_V8)
1572 sparc_memory_model = SMM_PSO;
1573 else
1574 sparc_memory_model = SMM_SC;
1575 }
1576
1577 #ifdef TARGET_DEFAULT_LONG_DOUBLE_128
1578 if (!(target_flags_explicit & MASK_LONG_DOUBLE_128))
1579 target_flags |= MASK_LONG_DOUBLE_128;
1580 #endif
1581
1582 if (TARGET_DEBUG_OPTIONS)
1583 dump_target_flags ("Final target_flags", target_flags);
1584
1585 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
1586 ((sparc_cpu == PROCESSOR_ULTRASPARC
1587 || sparc_cpu == PROCESSOR_NIAGARA
1588 || sparc_cpu == PROCESSOR_NIAGARA2
1589 || sparc_cpu == PROCESSOR_NIAGARA3
1590 || sparc_cpu == PROCESSOR_NIAGARA4)
1591 ? 2
1592 : (sparc_cpu == PROCESSOR_ULTRASPARC3
1593 ? 8 : 3)),
1594 global_options.x_param_values,
1595 global_options_set.x_param_values);
1596 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
1597 ((sparc_cpu == PROCESSOR_ULTRASPARC
1598 || sparc_cpu == PROCESSOR_ULTRASPARC3
1599 || sparc_cpu == PROCESSOR_NIAGARA
1600 || sparc_cpu == PROCESSOR_NIAGARA2
1601 || sparc_cpu == PROCESSOR_NIAGARA3
1602 || sparc_cpu == PROCESSOR_NIAGARA4)
1603 ? 64 : 32),
1604 global_options.x_param_values,
1605 global_options_set.x_param_values);
1606
1607 /* Disable save slot sharing for call-clobbered registers by default.
1608 The IRA sharing algorithm works on single registers only and this
1609 pessimizes for double floating-point registers. */
1610 if (!global_options_set.x_flag_ira_share_save_slots)
1611 flag_ira_share_save_slots = 0;
1612
1613 /* We register a machine specific pass to work around errata, if any.
1614 The pass mut be scheduled as late as possible so that we have the
1615 (essentially) final form of the insn stream to work on.
1616 Registering the pass must be done at start up. It's convenient to
1617 do it here. */
1618 opt_pass *errata_pass = make_pass_work_around_errata (g);
1619 struct register_pass_info insert_pass_work_around_errata =
1620 {
1621 errata_pass, /* pass */
1622 "dbr", /* reference_pass_name */
1623 1, /* ref_pass_instance_number */
1624 PASS_POS_INSERT_AFTER /* po_op */
1625 };
1626 register_pass (&insert_pass_work_around_errata);
1627 }
1628
1629 /* Miscellaneous utilities. */
1630
1631 /* Nonzero if CODE, a comparison, is suitable for use in v9 conditional move
1632 or branch on register contents instructions. */
1633
1634 int
v9_regcmp_p(enum rtx_code code)1635 v9_regcmp_p (enum rtx_code code)
1636 {
1637 return (code == EQ || code == NE || code == GE || code == LT
1638 || code == LE || code == GT);
1639 }
1640
1641 /* Nonzero if OP is a floating point constant which can
1642 be loaded into an integer register using a single
1643 sethi instruction. */
1644
1645 int
fp_sethi_p(rtx op)1646 fp_sethi_p (rtx op)
1647 {
1648 if (GET_CODE (op) == CONST_DOUBLE)
1649 {
1650 REAL_VALUE_TYPE r;
1651 long i;
1652
1653 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
1654 REAL_VALUE_TO_TARGET_SINGLE (r, i);
1655 return !SPARC_SIMM13_P (i) && SPARC_SETHI_P (i);
1656 }
1657
1658 return 0;
1659 }
1660
1661 /* Nonzero if OP is a floating point constant which can
1662 be loaded into an integer register using a single
1663 mov instruction. */
1664
1665 int
fp_mov_p(rtx op)1666 fp_mov_p (rtx op)
1667 {
1668 if (GET_CODE (op) == CONST_DOUBLE)
1669 {
1670 REAL_VALUE_TYPE r;
1671 long i;
1672
1673 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
1674 REAL_VALUE_TO_TARGET_SINGLE (r, i);
1675 return SPARC_SIMM13_P (i);
1676 }
1677
1678 return 0;
1679 }
1680
1681 /* Nonzero if OP is a floating point constant which can
1682 be loaded into an integer register using a high/losum
1683 instruction sequence. */
1684
1685 int
fp_high_losum_p(rtx op)1686 fp_high_losum_p (rtx op)
1687 {
1688 /* The constraints calling this should only be in
1689 SFmode move insns, so any constant which cannot
1690 be moved using a single insn will do. */
1691 if (GET_CODE (op) == CONST_DOUBLE)
1692 {
1693 REAL_VALUE_TYPE r;
1694 long i;
1695
1696 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
1697 REAL_VALUE_TO_TARGET_SINGLE (r, i);
1698 return !SPARC_SIMM13_P (i) && !SPARC_SETHI_P (i);
1699 }
1700
1701 return 0;
1702 }
1703
1704 /* Return true if the address of LABEL can be loaded by means of the
1705 mov{si,di}_pic_label_ref patterns in PIC mode. */
1706
1707 static bool
can_use_mov_pic_label_ref(rtx label)1708 can_use_mov_pic_label_ref (rtx label)
1709 {
1710 /* VxWorks does not impose a fixed gap between segments; the run-time
1711 gap can be different from the object-file gap. We therefore can't
1712 assume X - _GLOBAL_OFFSET_TABLE_ is a link-time constant unless we
1713 are absolutely sure that X is in the same segment as the GOT.
1714 Unfortunately, the flexibility of linker scripts means that we
1715 can't be sure of that in general, so assume that GOT-relative
1716 accesses are never valid on VxWorks. */
1717 if (TARGET_VXWORKS_RTP)
1718 return false;
1719
1720 /* Similarly, if the label is non-local, it might end up being placed
1721 in a different section than the current one; now mov_pic_label_ref
1722 requires the label and the code to be in the same section. */
1723 if (LABEL_REF_NONLOCAL_P (label))
1724 return false;
1725
1726 /* Finally, if we are reordering basic blocks and partition into hot
1727 and cold sections, this might happen for any label. */
1728 if (flag_reorder_blocks_and_partition)
1729 return false;
1730
1731 return true;
1732 }
1733
1734 /* Expand a move instruction. Return true if all work is done. */
1735
1736 bool
sparc_expand_move(enum machine_mode mode,rtx * operands)1737 sparc_expand_move (enum machine_mode mode, rtx *operands)
1738 {
1739 /* Handle sets of MEM first. */
1740 if (GET_CODE (operands[0]) == MEM)
1741 {
1742 /* 0 is a register (or a pair of registers) on SPARC. */
1743 if (register_or_zero_operand (operands[1], mode))
1744 return false;
1745
1746 if (!reload_in_progress)
1747 {
1748 operands[0] = validize_mem (operands[0]);
1749 operands[1] = force_reg (mode, operands[1]);
1750 }
1751 }
1752
1753 /* Fixup TLS cases. */
1754 if (TARGET_HAVE_TLS
1755 && CONSTANT_P (operands[1])
1756 && sparc_tls_referenced_p (operands [1]))
1757 {
1758 operands[1] = sparc_legitimize_tls_address (operands[1]);
1759 return false;
1760 }
1761
1762 /* Fixup PIC cases. */
1763 if (flag_pic && CONSTANT_P (operands[1]))
1764 {
1765 if (pic_address_needs_scratch (operands[1]))
1766 operands[1] = sparc_legitimize_pic_address (operands[1], NULL_RTX);
1767
1768 /* We cannot use the mov{si,di}_pic_label_ref patterns in all cases. */
1769 if (GET_CODE (operands[1]) == LABEL_REF
1770 && can_use_mov_pic_label_ref (operands[1]))
1771 {
1772 if (mode == SImode)
1773 {
1774 emit_insn (gen_movsi_pic_label_ref (operands[0], operands[1]));
1775 return true;
1776 }
1777
1778 if (mode == DImode)
1779 {
1780 gcc_assert (TARGET_ARCH64);
1781 emit_insn (gen_movdi_pic_label_ref (operands[0], operands[1]));
1782 return true;
1783 }
1784 }
1785
1786 if (symbolic_operand (operands[1], mode))
1787 {
1788 operands[1]
1789 = sparc_legitimize_pic_address (operands[1],
1790 reload_in_progress
1791 ? operands[0] : NULL_RTX);
1792 return false;
1793 }
1794 }
1795
1796 /* If we are trying to toss an integer constant into FP registers,
1797 or loading a FP or vector constant, force it into memory. */
1798 if (CONSTANT_P (operands[1])
1799 && REG_P (operands[0])
1800 && (SPARC_FP_REG_P (REGNO (operands[0]))
1801 || SCALAR_FLOAT_MODE_P (mode)
1802 || VECTOR_MODE_P (mode)))
1803 {
1804 /* emit_group_store will send such bogosity to us when it is
1805 not storing directly into memory. So fix this up to avoid
1806 crashes in output_constant_pool. */
1807 if (operands [1] == const0_rtx)
1808 operands[1] = CONST0_RTX (mode);
1809
1810 /* We can clear or set to all-ones FP registers if TARGET_VIS, and
1811 always other regs. */
1812 if ((TARGET_VIS || REGNO (operands[0]) < SPARC_FIRST_FP_REG)
1813 && (const_zero_operand (operands[1], mode)
1814 || const_all_ones_operand (operands[1], mode)))
1815 return false;
1816
1817 if (REGNO (operands[0]) < SPARC_FIRST_FP_REG
1818 /* We are able to build any SF constant in integer registers
1819 with at most 2 instructions. */
1820 && (mode == SFmode
1821 /* And any DF constant in integer registers. */
1822 || (mode == DFmode
1823 && ! can_create_pseudo_p ())))
1824 return false;
1825
1826 operands[1] = force_const_mem (mode, operands[1]);
1827 if (!reload_in_progress)
1828 operands[1] = validize_mem (operands[1]);
1829 return false;
1830 }
1831
1832 /* Accept non-constants and valid constants unmodified. */
1833 if (!CONSTANT_P (operands[1])
1834 || GET_CODE (operands[1]) == HIGH
1835 || input_operand (operands[1], mode))
1836 return false;
1837
1838 switch (mode)
1839 {
1840 case QImode:
1841 /* All QImode constants require only one insn, so proceed. */
1842 break;
1843
1844 case HImode:
1845 case SImode:
1846 sparc_emit_set_const32 (operands[0], operands[1]);
1847 return true;
1848
1849 case DImode:
1850 /* input_operand should have filtered out 32-bit mode. */
1851 sparc_emit_set_const64 (operands[0], operands[1]);
1852 return true;
1853
1854 case TImode:
1855 {
1856 rtx high, low;
1857 /* TImode isn't available in 32-bit mode. */
1858 split_double (operands[1], &high, &low);
1859 emit_insn (gen_movdi (operand_subword (operands[0], 0, 0, TImode),
1860 high));
1861 emit_insn (gen_movdi (operand_subword (operands[0], 1, 0, TImode),
1862 low));
1863 }
1864 return true;
1865
1866 default:
1867 gcc_unreachable ();
1868 }
1869
1870 return false;
1871 }
1872
1873 /* Load OP1, a 32-bit constant, into OP0, a register.
1874 We know it can't be done in one insn when we get
1875 here, the move expander guarantees this. */
1876
1877 static void
sparc_emit_set_const32(rtx op0,rtx op1)1878 sparc_emit_set_const32 (rtx op0, rtx op1)
1879 {
1880 enum machine_mode mode = GET_MODE (op0);
1881 rtx temp = op0;
1882
1883 if (can_create_pseudo_p ())
1884 temp = gen_reg_rtx (mode);
1885
1886 if (GET_CODE (op1) == CONST_INT)
1887 {
1888 gcc_assert (!small_int_operand (op1, mode)
1889 && !const_high_operand (op1, mode));
1890
1891 /* Emit them as real moves instead of a HIGH/LO_SUM,
1892 this way CSE can see everything and reuse intermediate
1893 values if it wants. */
1894 emit_insn (gen_rtx_SET (VOIDmode, temp,
1895 GEN_INT (INTVAL (op1)
1896 & ~(HOST_WIDE_INT)0x3ff)));
1897
1898 emit_insn (gen_rtx_SET (VOIDmode,
1899 op0,
1900 gen_rtx_IOR (mode, temp,
1901 GEN_INT (INTVAL (op1) & 0x3ff))));
1902 }
1903 else
1904 {
1905 /* A symbol, emit in the traditional way. */
1906 emit_insn (gen_rtx_SET (VOIDmode, temp,
1907 gen_rtx_HIGH (mode, op1)));
1908 emit_insn (gen_rtx_SET (VOIDmode,
1909 op0, gen_rtx_LO_SUM (mode, temp, op1)));
1910 }
1911 }
1912
1913 /* Load OP1, a symbolic 64-bit constant, into OP0, a DImode register.
1914 If TEMP is nonzero, we are forbidden to use any other scratch
1915 registers. Otherwise, we are allowed to generate them as needed.
1916
1917 Note that TEMP may have TImode if the code model is TARGET_CM_MEDANY
1918 or TARGET_CM_EMBMEDANY (see the reload_indi and reload_outdi patterns). */
1919
1920 void
sparc_emit_set_symbolic_const64(rtx op0,rtx op1,rtx temp)1921 sparc_emit_set_symbolic_const64 (rtx op0, rtx op1, rtx temp)
1922 {
1923 rtx temp1, temp2, temp3, temp4, temp5;
1924 rtx ti_temp = 0;
1925
1926 if (temp && GET_MODE (temp) == TImode)
1927 {
1928 ti_temp = temp;
1929 temp = gen_rtx_REG (DImode, REGNO (temp));
1930 }
1931
1932 /* SPARC-V9 code-model support. */
1933 switch (sparc_cmodel)
1934 {
1935 case CM_MEDLOW:
1936 /* The range spanned by all instructions in the object is less
1937 than 2^31 bytes (2GB) and the distance from any instruction
1938 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
1939 than 2^31 bytes (2GB).
1940
1941 The executable must be in the low 4TB of the virtual address
1942 space.
1943
1944 sethi %hi(symbol), %temp1
1945 or %temp1, %lo(symbol), %reg */
1946 if (temp)
1947 temp1 = temp; /* op0 is allowed. */
1948 else
1949 temp1 = gen_reg_rtx (DImode);
1950
1951 emit_insn (gen_rtx_SET (VOIDmode, temp1, gen_rtx_HIGH (DImode, op1)));
1952 emit_insn (gen_rtx_SET (VOIDmode, op0, gen_rtx_LO_SUM (DImode, temp1, op1)));
1953 break;
1954
1955 case CM_MEDMID:
1956 /* The range spanned by all instructions in the object is less
1957 than 2^31 bytes (2GB) and the distance from any instruction
1958 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
1959 than 2^31 bytes (2GB).
1960
1961 The executable must be in the low 16TB of the virtual address
1962 space.
1963
1964 sethi %h44(symbol), %temp1
1965 or %temp1, %m44(symbol), %temp2
1966 sllx %temp2, 12, %temp3
1967 or %temp3, %l44(symbol), %reg */
1968 if (temp)
1969 {
1970 temp1 = op0;
1971 temp2 = op0;
1972 temp3 = temp; /* op0 is allowed. */
1973 }
1974 else
1975 {
1976 temp1 = gen_reg_rtx (DImode);
1977 temp2 = gen_reg_rtx (DImode);
1978 temp3 = gen_reg_rtx (DImode);
1979 }
1980
1981 emit_insn (gen_seth44 (temp1, op1));
1982 emit_insn (gen_setm44 (temp2, temp1, op1));
1983 emit_insn (gen_rtx_SET (VOIDmode, temp3,
1984 gen_rtx_ASHIFT (DImode, temp2, GEN_INT (12))));
1985 emit_insn (gen_setl44 (op0, temp3, op1));
1986 break;
1987
1988 case CM_MEDANY:
1989 /* The range spanned by all instructions in the object is less
1990 than 2^31 bytes (2GB) and the distance from any instruction
1991 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
1992 than 2^31 bytes (2GB).
1993
1994 The executable can be placed anywhere in the virtual address
1995 space.
1996
1997 sethi %hh(symbol), %temp1
1998 sethi %lm(symbol), %temp2
1999 or %temp1, %hm(symbol), %temp3
2000 sllx %temp3, 32, %temp4
2001 or %temp4, %temp2, %temp5
2002 or %temp5, %lo(symbol), %reg */
2003 if (temp)
2004 {
2005 /* It is possible that one of the registers we got for operands[2]
2006 might coincide with that of operands[0] (which is why we made
2007 it TImode). Pick the other one to use as our scratch. */
2008 if (rtx_equal_p (temp, op0))
2009 {
2010 gcc_assert (ti_temp);
2011 temp = gen_rtx_REG (DImode, REGNO (temp) + 1);
2012 }
2013 temp1 = op0;
2014 temp2 = temp; /* op0 is _not_ allowed, see above. */
2015 temp3 = op0;
2016 temp4 = op0;
2017 temp5 = op0;
2018 }
2019 else
2020 {
2021 temp1 = gen_reg_rtx (DImode);
2022 temp2 = gen_reg_rtx (DImode);
2023 temp3 = gen_reg_rtx (DImode);
2024 temp4 = gen_reg_rtx (DImode);
2025 temp5 = gen_reg_rtx (DImode);
2026 }
2027
2028 emit_insn (gen_sethh (temp1, op1));
2029 emit_insn (gen_setlm (temp2, op1));
2030 emit_insn (gen_sethm (temp3, temp1, op1));
2031 emit_insn (gen_rtx_SET (VOIDmode, temp4,
2032 gen_rtx_ASHIFT (DImode, temp3, GEN_INT (32))));
2033 emit_insn (gen_rtx_SET (VOIDmode, temp5,
2034 gen_rtx_PLUS (DImode, temp4, temp2)));
2035 emit_insn (gen_setlo (op0, temp5, op1));
2036 break;
2037
2038 case CM_EMBMEDANY:
2039 /* Old old old backwards compatibility kruft here.
2040 Essentially it is MEDLOW with a fixed 64-bit
2041 virtual base added to all data segment addresses.
2042 Text-segment stuff is computed like MEDANY, we can't
2043 reuse the code above because the relocation knobs
2044 look different.
2045
2046 Data segment: sethi %hi(symbol), %temp1
2047 add %temp1, EMBMEDANY_BASE_REG, %temp2
2048 or %temp2, %lo(symbol), %reg */
2049 if (data_segment_operand (op1, GET_MODE (op1)))
2050 {
2051 if (temp)
2052 {
2053 temp1 = temp; /* op0 is allowed. */
2054 temp2 = op0;
2055 }
2056 else
2057 {
2058 temp1 = gen_reg_rtx (DImode);
2059 temp2 = gen_reg_rtx (DImode);
2060 }
2061
2062 emit_insn (gen_embmedany_sethi (temp1, op1));
2063 emit_insn (gen_embmedany_brsum (temp2, temp1));
2064 emit_insn (gen_embmedany_losum (op0, temp2, op1));
2065 }
2066
2067 /* Text segment: sethi %uhi(symbol), %temp1
2068 sethi %hi(symbol), %temp2
2069 or %temp1, %ulo(symbol), %temp3
2070 sllx %temp3, 32, %temp4
2071 or %temp4, %temp2, %temp5
2072 or %temp5, %lo(symbol), %reg */
2073 else
2074 {
2075 if (temp)
2076 {
2077 /* It is possible that one of the registers we got for operands[2]
2078 might coincide with that of operands[0] (which is why we made
2079 it TImode). Pick the other one to use as our scratch. */
2080 if (rtx_equal_p (temp, op0))
2081 {
2082 gcc_assert (ti_temp);
2083 temp = gen_rtx_REG (DImode, REGNO (temp) + 1);
2084 }
2085 temp1 = op0;
2086 temp2 = temp; /* op0 is _not_ allowed, see above. */
2087 temp3 = op0;
2088 temp4 = op0;
2089 temp5 = op0;
2090 }
2091 else
2092 {
2093 temp1 = gen_reg_rtx (DImode);
2094 temp2 = gen_reg_rtx (DImode);
2095 temp3 = gen_reg_rtx (DImode);
2096 temp4 = gen_reg_rtx (DImode);
2097 temp5 = gen_reg_rtx (DImode);
2098 }
2099
2100 emit_insn (gen_embmedany_textuhi (temp1, op1));
2101 emit_insn (gen_embmedany_texthi (temp2, op1));
2102 emit_insn (gen_embmedany_textulo (temp3, temp1, op1));
2103 emit_insn (gen_rtx_SET (VOIDmode, temp4,
2104 gen_rtx_ASHIFT (DImode, temp3, GEN_INT (32))));
2105 emit_insn (gen_rtx_SET (VOIDmode, temp5,
2106 gen_rtx_PLUS (DImode, temp4, temp2)));
2107 emit_insn (gen_embmedany_textlo (op0, temp5, op1));
2108 }
2109 break;
2110
2111 default:
2112 gcc_unreachable ();
2113 }
2114 }
2115
2116 #if HOST_BITS_PER_WIDE_INT == 32
2117 static void
sparc_emit_set_const64(rtx op0 ATTRIBUTE_UNUSED,rtx op1 ATTRIBUTE_UNUSED)2118 sparc_emit_set_const64 (rtx op0 ATTRIBUTE_UNUSED, rtx op1 ATTRIBUTE_UNUSED)
2119 {
2120 gcc_unreachable ();
2121 }
2122 #else
2123 /* These avoid problems when cross compiling. If we do not
2124 go through all this hair then the optimizer will see
2125 invalid REG_EQUAL notes or in some cases none at all. */
2126 static rtx gen_safe_HIGH64 (rtx, HOST_WIDE_INT);
2127 static rtx gen_safe_SET64 (rtx, HOST_WIDE_INT);
2128 static rtx gen_safe_OR64 (rtx, HOST_WIDE_INT);
2129 static rtx gen_safe_XOR64 (rtx, HOST_WIDE_INT);
2130
2131 /* The optimizer is not to assume anything about exactly
2132 which bits are set for a HIGH, they are unspecified.
2133 Unfortunately this leads to many missed optimizations
2134 during CSE. We mask out the non-HIGH bits, and matches
2135 a plain movdi, to alleviate this problem. */
2136 static rtx
gen_safe_HIGH64(rtx dest,HOST_WIDE_INT val)2137 gen_safe_HIGH64 (rtx dest, HOST_WIDE_INT val)
2138 {
2139 return gen_rtx_SET (VOIDmode, dest, GEN_INT (val & ~(HOST_WIDE_INT)0x3ff));
2140 }
2141
2142 static rtx
gen_safe_SET64(rtx dest,HOST_WIDE_INT val)2143 gen_safe_SET64 (rtx dest, HOST_WIDE_INT val)
2144 {
2145 return gen_rtx_SET (VOIDmode, dest, GEN_INT (val));
2146 }
2147
2148 static rtx
gen_safe_OR64(rtx src,HOST_WIDE_INT val)2149 gen_safe_OR64 (rtx src, HOST_WIDE_INT val)
2150 {
2151 return gen_rtx_IOR (DImode, src, GEN_INT (val));
2152 }
2153
2154 static rtx
gen_safe_XOR64(rtx src,HOST_WIDE_INT val)2155 gen_safe_XOR64 (rtx src, HOST_WIDE_INT val)
2156 {
2157 return gen_rtx_XOR (DImode, src, GEN_INT (val));
2158 }
2159
2160 /* Worker routines for 64-bit constant formation on arch64.
2161 One of the key things to be doing in these emissions is
2162 to create as many temp REGs as possible. This makes it
2163 possible for half-built constants to be used later when
2164 such values are similar to something required later on.
2165 Without doing this, the optimizer cannot see such
2166 opportunities. */
2167
2168 static void sparc_emit_set_const64_quick1 (rtx, rtx,
2169 unsigned HOST_WIDE_INT, int);
2170
2171 static void
sparc_emit_set_const64_quick1(rtx op0,rtx temp,unsigned HOST_WIDE_INT low_bits,int is_neg)2172 sparc_emit_set_const64_quick1 (rtx op0, rtx temp,
2173 unsigned HOST_WIDE_INT low_bits, int is_neg)
2174 {
2175 unsigned HOST_WIDE_INT high_bits;
2176
2177 if (is_neg)
2178 high_bits = (~low_bits) & 0xffffffff;
2179 else
2180 high_bits = low_bits;
2181
2182 emit_insn (gen_safe_HIGH64 (temp, high_bits));
2183 if (!is_neg)
2184 {
2185 emit_insn (gen_rtx_SET (VOIDmode, op0,
2186 gen_safe_OR64 (temp, (high_bits & 0x3ff))));
2187 }
2188 else
2189 {
2190 /* If we are XOR'ing with -1, then we should emit a one's complement
2191 instead. This way the combiner will notice logical operations
2192 such as ANDN later on and substitute. */
2193 if ((low_bits & 0x3ff) == 0x3ff)
2194 {
2195 emit_insn (gen_rtx_SET (VOIDmode, op0,
2196 gen_rtx_NOT (DImode, temp)));
2197 }
2198 else
2199 {
2200 emit_insn (gen_rtx_SET (VOIDmode, op0,
2201 gen_safe_XOR64 (temp,
2202 (-(HOST_WIDE_INT)0x400
2203 | (low_bits & 0x3ff)))));
2204 }
2205 }
2206 }
2207
2208 static void sparc_emit_set_const64_quick2 (rtx, rtx, unsigned HOST_WIDE_INT,
2209 unsigned HOST_WIDE_INT, int);
2210
2211 static void
sparc_emit_set_const64_quick2(rtx op0,rtx temp,unsigned HOST_WIDE_INT high_bits,unsigned HOST_WIDE_INT low_immediate,int shift_count)2212 sparc_emit_set_const64_quick2 (rtx op0, rtx temp,
2213 unsigned HOST_WIDE_INT high_bits,
2214 unsigned HOST_WIDE_INT low_immediate,
2215 int shift_count)
2216 {
2217 rtx temp2 = op0;
2218
2219 if ((high_bits & 0xfffffc00) != 0)
2220 {
2221 emit_insn (gen_safe_HIGH64 (temp, high_bits));
2222 if ((high_bits & ~0xfffffc00) != 0)
2223 emit_insn (gen_rtx_SET (VOIDmode, op0,
2224 gen_safe_OR64 (temp, (high_bits & 0x3ff))));
2225 else
2226 temp2 = temp;
2227 }
2228 else
2229 {
2230 emit_insn (gen_safe_SET64 (temp, high_bits));
2231 temp2 = temp;
2232 }
2233
2234 /* Now shift it up into place. */
2235 emit_insn (gen_rtx_SET (VOIDmode, op0,
2236 gen_rtx_ASHIFT (DImode, temp2,
2237 GEN_INT (shift_count))));
2238
2239 /* If there is a low immediate part piece, finish up by
2240 putting that in as well. */
2241 if (low_immediate != 0)
2242 emit_insn (gen_rtx_SET (VOIDmode, op0,
2243 gen_safe_OR64 (op0, low_immediate)));
2244 }
2245
2246 static void sparc_emit_set_const64_longway (rtx, rtx, unsigned HOST_WIDE_INT,
2247 unsigned HOST_WIDE_INT);
2248
2249 /* Full 64-bit constant decomposition. Even though this is the
2250 'worst' case, we still optimize a few things away. */
2251 static void
sparc_emit_set_const64_longway(rtx op0,rtx temp,unsigned HOST_WIDE_INT high_bits,unsigned HOST_WIDE_INT low_bits)2252 sparc_emit_set_const64_longway (rtx op0, rtx temp,
2253 unsigned HOST_WIDE_INT high_bits,
2254 unsigned HOST_WIDE_INT low_bits)
2255 {
2256 rtx sub_temp = op0;
2257
2258 if (can_create_pseudo_p ())
2259 sub_temp = gen_reg_rtx (DImode);
2260
2261 if ((high_bits & 0xfffffc00) != 0)
2262 {
2263 emit_insn (gen_safe_HIGH64 (temp, high_bits));
2264 if ((high_bits & ~0xfffffc00) != 0)
2265 emit_insn (gen_rtx_SET (VOIDmode,
2266 sub_temp,
2267 gen_safe_OR64 (temp, (high_bits & 0x3ff))));
2268 else
2269 sub_temp = temp;
2270 }
2271 else
2272 {
2273 emit_insn (gen_safe_SET64 (temp, high_bits));
2274 sub_temp = temp;
2275 }
2276
2277 if (can_create_pseudo_p ())
2278 {
2279 rtx temp2 = gen_reg_rtx (DImode);
2280 rtx temp3 = gen_reg_rtx (DImode);
2281 rtx temp4 = gen_reg_rtx (DImode);
2282
2283 emit_insn (gen_rtx_SET (VOIDmode, temp4,
2284 gen_rtx_ASHIFT (DImode, sub_temp,
2285 GEN_INT (32))));
2286
2287 emit_insn (gen_safe_HIGH64 (temp2, low_bits));
2288 if ((low_bits & ~0xfffffc00) != 0)
2289 {
2290 emit_insn (gen_rtx_SET (VOIDmode, temp3,
2291 gen_safe_OR64 (temp2, (low_bits & 0x3ff))));
2292 emit_insn (gen_rtx_SET (VOIDmode, op0,
2293 gen_rtx_PLUS (DImode, temp4, temp3)));
2294 }
2295 else
2296 {
2297 emit_insn (gen_rtx_SET (VOIDmode, op0,
2298 gen_rtx_PLUS (DImode, temp4, temp2)));
2299 }
2300 }
2301 else
2302 {
2303 rtx low1 = GEN_INT ((low_bits >> (32 - 12)) & 0xfff);
2304 rtx low2 = GEN_INT ((low_bits >> (32 - 12 - 12)) & 0xfff);
2305 rtx low3 = GEN_INT ((low_bits >> (32 - 12 - 12 - 8)) & 0x0ff);
2306 int to_shift = 12;
2307
2308 /* We are in the middle of reload, so this is really
2309 painful. However we do still make an attempt to
2310 avoid emitting truly stupid code. */
2311 if (low1 != const0_rtx)
2312 {
2313 emit_insn (gen_rtx_SET (VOIDmode, op0,
2314 gen_rtx_ASHIFT (DImode, sub_temp,
2315 GEN_INT (to_shift))));
2316 emit_insn (gen_rtx_SET (VOIDmode, op0,
2317 gen_rtx_IOR (DImode, op0, low1)));
2318 sub_temp = op0;
2319 to_shift = 12;
2320 }
2321 else
2322 {
2323 to_shift += 12;
2324 }
2325 if (low2 != const0_rtx)
2326 {
2327 emit_insn (gen_rtx_SET (VOIDmode, op0,
2328 gen_rtx_ASHIFT (DImode, sub_temp,
2329 GEN_INT (to_shift))));
2330 emit_insn (gen_rtx_SET (VOIDmode, op0,
2331 gen_rtx_IOR (DImode, op0, low2)));
2332 sub_temp = op0;
2333 to_shift = 8;
2334 }
2335 else
2336 {
2337 to_shift += 8;
2338 }
2339 emit_insn (gen_rtx_SET (VOIDmode, op0,
2340 gen_rtx_ASHIFT (DImode, sub_temp,
2341 GEN_INT (to_shift))));
2342 if (low3 != const0_rtx)
2343 emit_insn (gen_rtx_SET (VOIDmode, op0,
2344 gen_rtx_IOR (DImode, op0, low3)));
2345 /* phew... */
2346 }
2347 }
2348
2349 /* Analyze a 64-bit constant for certain properties. */
2350 static void analyze_64bit_constant (unsigned HOST_WIDE_INT,
2351 unsigned HOST_WIDE_INT,
2352 int *, int *, int *);
2353
2354 static void
analyze_64bit_constant(unsigned HOST_WIDE_INT high_bits,unsigned HOST_WIDE_INT low_bits,int * hbsp,int * lbsp,int * abbasp)2355 analyze_64bit_constant (unsigned HOST_WIDE_INT high_bits,
2356 unsigned HOST_WIDE_INT low_bits,
2357 int *hbsp, int *lbsp, int *abbasp)
2358 {
2359 int lowest_bit_set, highest_bit_set, all_bits_between_are_set;
2360 int i;
2361
2362 lowest_bit_set = highest_bit_set = -1;
2363 i = 0;
2364 do
2365 {
2366 if ((lowest_bit_set == -1)
2367 && ((low_bits >> i) & 1))
2368 lowest_bit_set = i;
2369 if ((highest_bit_set == -1)
2370 && ((high_bits >> (32 - i - 1)) & 1))
2371 highest_bit_set = (64 - i - 1);
2372 }
2373 while (++i < 32
2374 && ((highest_bit_set == -1)
2375 || (lowest_bit_set == -1)));
2376 if (i == 32)
2377 {
2378 i = 0;
2379 do
2380 {
2381 if ((lowest_bit_set == -1)
2382 && ((high_bits >> i) & 1))
2383 lowest_bit_set = i + 32;
2384 if ((highest_bit_set == -1)
2385 && ((low_bits >> (32 - i - 1)) & 1))
2386 highest_bit_set = 32 - i - 1;
2387 }
2388 while (++i < 32
2389 && ((highest_bit_set == -1)
2390 || (lowest_bit_set == -1)));
2391 }
2392 /* If there are no bits set this should have gone out
2393 as one instruction! */
2394 gcc_assert (lowest_bit_set != -1 && highest_bit_set != -1);
2395 all_bits_between_are_set = 1;
2396 for (i = lowest_bit_set; i <= highest_bit_set; i++)
2397 {
2398 if (i < 32)
2399 {
2400 if ((low_bits & (1 << i)) != 0)
2401 continue;
2402 }
2403 else
2404 {
2405 if ((high_bits & (1 << (i - 32))) != 0)
2406 continue;
2407 }
2408 all_bits_between_are_set = 0;
2409 break;
2410 }
2411 *hbsp = highest_bit_set;
2412 *lbsp = lowest_bit_set;
2413 *abbasp = all_bits_between_are_set;
2414 }
2415
2416 static int const64_is_2insns (unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT);
2417
2418 static int
const64_is_2insns(unsigned HOST_WIDE_INT high_bits,unsigned HOST_WIDE_INT low_bits)2419 const64_is_2insns (unsigned HOST_WIDE_INT high_bits,
2420 unsigned HOST_WIDE_INT low_bits)
2421 {
2422 int highest_bit_set, lowest_bit_set, all_bits_between_are_set;
2423
2424 if (high_bits == 0
2425 || high_bits == 0xffffffff)
2426 return 1;
2427
2428 analyze_64bit_constant (high_bits, low_bits,
2429 &highest_bit_set, &lowest_bit_set,
2430 &all_bits_between_are_set);
2431
2432 if ((highest_bit_set == 63
2433 || lowest_bit_set == 0)
2434 && all_bits_between_are_set != 0)
2435 return 1;
2436
2437 if ((highest_bit_set - lowest_bit_set) < 21)
2438 return 1;
2439
2440 return 0;
2441 }
2442
2443 static unsigned HOST_WIDE_INT create_simple_focus_bits (unsigned HOST_WIDE_INT,
2444 unsigned HOST_WIDE_INT,
2445 int, int);
2446
2447 static unsigned HOST_WIDE_INT
create_simple_focus_bits(unsigned HOST_WIDE_INT high_bits,unsigned HOST_WIDE_INT low_bits,int lowest_bit_set,int shift)2448 create_simple_focus_bits (unsigned HOST_WIDE_INT high_bits,
2449 unsigned HOST_WIDE_INT low_bits,
2450 int lowest_bit_set, int shift)
2451 {
2452 HOST_WIDE_INT hi, lo;
2453
2454 if (lowest_bit_set < 32)
2455 {
2456 lo = (low_bits >> lowest_bit_set) << shift;
2457 hi = ((high_bits << (32 - lowest_bit_set)) << shift);
2458 }
2459 else
2460 {
2461 lo = 0;
2462 hi = ((high_bits >> (lowest_bit_set - 32)) << shift);
2463 }
2464 gcc_assert (! (hi & lo));
2465 return (hi | lo);
2466 }
2467
2468 /* Here we are sure to be arch64 and this is an integer constant
2469 being loaded into a register. Emit the most efficient
2470 insn sequence possible. Detection of all the 1-insn cases
2471 has been done already. */
2472 static void
sparc_emit_set_const64(rtx op0,rtx op1)2473 sparc_emit_set_const64 (rtx op0, rtx op1)
2474 {
2475 unsigned HOST_WIDE_INT high_bits, low_bits;
2476 int lowest_bit_set, highest_bit_set;
2477 int all_bits_between_are_set;
2478 rtx temp = 0;
2479
2480 /* Sanity check that we know what we are working with. */
2481 gcc_assert (TARGET_ARCH64
2482 && (GET_CODE (op0) == SUBREG
2483 || (REG_P (op0) && ! SPARC_FP_REG_P (REGNO (op0)))));
2484
2485 if (! can_create_pseudo_p ())
2486 temp = op0;
2487
2488 if (GET_CODE (op1) != CONST_INT)
2489 {
2490 sparc_emit_set_symbolic_const64 (op0, op1, temp);
2491 return;
2492 }
2493
2494 if (! temp)
2495 temp = gen_reg_rtx (DImode);
2496
2497 high_bits = ((INTVAL (op1) >> 32) & 0xffffffff);
2498 low_bits = (INTVAL (op1) & 0xffffffff);
2499
2500 /* low_bits bits 0 --> 31
2501 high_bits bits 32 --> 63 */
2502
2503 analyze_64bit_constant (high_bits, low_bits,
2504 &highest_bit_set, &lowest_bit_set,
2505 &all_bits_between_are_set);
2506
2507 /* First try for a 2-insn sequence. */
2508
2509 /* These situations are preferred because the optimizer can
2510 * do more things with them:
2511 * 1) mov -1, %reg
2512 * sllx %reg, shift, %reg
2513 * 2) mov -1, %reg
2514 * srlx %reg, shift, %reg
2515 * 3) mov some_small_const, %reg
2516 * sllx %reg, shift, %reg
2517 */
2518 if (((highest_bit_set == 63
2519 || lowest_bit_set == 0)
2520 && all_bits_between_are_set != 0)
2521 || ((highest_bit_set - lowest_bit_set) < 12))
2522 {
2523 HOST_WIDE_INT the_const = -1;
2524 int shift = lowest_bit_set;
2525
2526 if ((highest_bit_set != 63
2527 && lowest_bit_set != 0)
2528 || all_bits_between_are_set == 0)
2529 {
2530 the_const =
2531 create_simple_focus_bits (high_bits, low_bits,
2532 lowest_bit_set, 0);
2533 }
2534 else if (lowest_bit_set == 0)
2535 shift = -(63 - highest_bit_set);
2536
2537 gcc_assert (SPARC_SIMM13_P (the_const));
2538 gcc_assert (shift != 0);
2539
2540 emit_insn (gen_safe_SET64 (temp, the_const));
2541 if (shift > 0)
2542 emit_insn (gen_rtx_SET (VOIDmode,
2543 op0,
2544 gen_rtx_ASHIFT (DImode,
2545 temp,
2546 GEN_INT (shift))));
2547 else if (shift < 0)
2548 emit_insn (gen_rtx_SET (VOIDmode,
2549 op0,
2550 gen_rtx_LSHIFTRT (DImode,
2551 temp,
2552 GEN_INT (-shift))));
2553 return;
2554 }
2555
2556 /* Now a range of 22 or less bits set somewhere.
2557 * 1) sethi %hi(focus_bits), %reg
2558 * sllx %reg, shift, %reg
2559 * 2) sethi %hi(focus_bits), %reg
2560 * srlx %reg, shift, %reg
2561 */
2562 if ((highest_bit_set - lowest_bit_set) < 21)
2563 {
2564 unsigned HOST_WIDE_INT focus_bits =
2565 create_simple_focus_bits (high_bits, low_bits,
2566 lowest_bit_set, 10);
2567
2568 gcc_assert (SPARC_SETHI_P (focus_bits));
2569 gcc_assert (lowest_bit_set != 10);
2570
2571 emit_insn (gen_safe_HIGH64 (temp, focus_bits));
2572
2573 /* If lowest_bit_set == 10 then a sethi alone could have done it. */
2574 if (lowest_bit_set < 10)
2575 emit_insn (gen_rtx_SET (VOIDmode,
2576 op0,
2577 gen_rtx_LSHIFTRT (DImode, temp,
2578 GEN_INT (10 - lowest_bit_set))));
2579 else if (lowest_bit_set > 10)
2580 emit_insn (gen_rtx_SET (VOIDmode,
2581 op0,
2582 gen_rtx_ASHIFT (DImode, temp,
2583 GEN_INT (lowest_bit_set - 10))));
2584 return;
2585 }
2586
2587 /* 1) sethi %hi(low_bits), %reg
2588 * or %reg, %lo(low_bits), %reg
2589 * 2) sethi %hi(~low_bits), %reg
2590 * xor %reg, %lo(-0x400 | (low_bits & 0x3ff)), %reg
2591 */
2592 if (high_bits == 0
2593 || high_bits == 0xffffffff)
2594 {
2595 sparc_emit_set_const64_quick1 (op0, temp, low_bits,
2596 (high_bits == 0xffffffff));
2597 return;
2598 }
2599
2600 /* Now, try 3-insn sequences. */
2601
2602 /* 1) sethi %hi(high_bits), %reg
2603 * or %reg, %lo(high_bits), %reg
2604 * sllx %reg, 32, %reg
2605 */
2606 if (low_bits == 0)
2607 {
2608 sparc_emit_set_const64_quick2 (op0, temp, high_bits, 0, 32);
2609 return;
2610 }
2611
2612 /* We may be able to do something quick
2613 when the constant is negated, so try that. */
2614 if (const64_is_2insns ((~high_bits) & 0xffffffff,
2615 (~low_bits) & 0xfffffc00))
2616 {
2617 /* NOTE: The trailing bits get XOR'd so we need the
2618 non-negated bits, not the negated ones. */
2619 unsigned HOST_WIDE_INT trailing_bits = low_bits & 0x3ff;
2620
2621 if ((((~high_bits) & 0xffffffff) == 0
2622 && ((~low_bits) & 0x80000000) == 0)
2623 || (((~high_bits) & 0xffffffff) == 0xffffffff
2624 && ((~low_bits) & 0x80000000) != 0))
2625 {
2626 unsigned HOST_WIDE_INT fast_int = (~low_bits & 0xffffffff);
2627
2628 if ((SPARC_SETHI_P (fast_int)
2629 && (~high_bits & 0xffffffff) == 0)
2630 || SPARC_SIMM13_P (fast_int))
2631 emit_insn (gen_safe_SET64 (temp, fast_int));
2632 else
2633 sparc_emit_set_const64 (temp, GEN_INT (fast_int));
2634 }
2635 else
2636 {
2637 rtx negated_const;
2638 negated_const = GEN_INT (((~low_bits) & 0xfffffc00) |
2639 (((HOST_WIDE_INT)((~high_bits) & 0xffffffff))<<32));
2640 sparc_emit_set_const64 (temp, negated_const);
2641 }
2642
2643 /* If we are XOR'ing with -1, then we should emit a one's complement
2644 instead. This way the combiner will notice logical operations
2645 such as ANDN later on and substitute. */
2646 if (trailing_bits == 0x3ff)
2647 {
2648 emit_insn (gen_rtx_SET (VOIDmode, op0,
2649 gen_rtx_NOT (DImode, temp)));
2650 }
2651 else
2652 {
2653 emit_insn (gen_rtx_SET (VOIDmode,
2654 op0,
2655 gen_safe_XOR64 (temp,
2656 (-0x400 | trailing_bits))));
2657 }
2658 return;
2659 }
2660
2661 /* 1) sethi %hi(xxx), %reg
2662 * or %reg, %lo(xxx), %reg
2663 * sllx %reg, yyy, %reg
2664 *
2665 * ??? This is just a generalized version of the low_bits==0
2666 * thing above, FIXME...
2667 */
2668 if ((highest_bit_set - lowest_bit_set) < 32)
2669 {
2670 unsigned HOST_WIDE_INT focus_bits =
2671 create_simple_focus_bits (high_bits, low_bits,
2672 lowest_bit_set, 0);
2673
2674 /* We can't get here in this state. */
2675 gcc_assert (highest_bit_set >= 32 && lowest_bit_set < 32);
2676
2677 /* So what we know is that the set bits straddle the
2678 middle of the 64-bit word. */
2679 sparc_emit_set_const64_quick2 (op0, temp,
2680 focus_bits, 0,
2681 lowest_bit_set);
2682 return;
2683 }
2684
2685 /* 1) sethi %hi(high_bits), %reg
2686 * or %reg, %lo(high_bits), %reg
2687 * sllx %reg, 32, %reg
2688 * or %reg, low_bits, %reg
2689 */
2690 if (SPARC_SIMM13_P(low_bits)
2691 && ((int)low_bits > 0))
2692 {
2693 sparc_emit_set_const64_quick2 (op0, temp, high_bits, low_bits, 32);
2694 return;
2695 }
2696
2697 /* The easiest way when all else fails, is full decomposition. */
2698 sparc_emit_set_const64_longway (op0, temp, high_bits, low_bits);
2699 }
2700 #endif /* HOST_BITS_PER_WIDE_INT == 32 */
2701
2702 /* Given a comparison code (EQ, NE, etc.) and the first operand of a COMPARE,
2703 return the mode to be used for the comparison. For floating-point,
2704 CCFP[E]mode is used. CC_NOOVmode should be used when the first operand
2705 is a PLUS, MINUS, NEG, or ASHIFT. CCmode should be used when no special
2706 processing is needed. */
2707
2708 enum machine_mode
select_cc_mode(enum rtx_code op,rtx x,rtx y ATTRIBUTE_UNUSED)2709 select_cc_mode (enum rtx_code op, rtx x, rtx y ATTRIBUTE_UNUSED)
2710 {
2711 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
2712 {
2713 switch (op)
2714 {
2715 case EQ:
2716 case NE:
2717 case UNORDERED:
2718 case ORDERED:
2719 case UNLT:
2720 case UNLE:
2721 case UNGT:
2722 case UNGE:
2723 case UNEQ:
2724 case LTGT:
2725 return CCFPmode;
2726
2727 case LT:
2728 case LE:
2729 case GT:
2730 case GE:
2731 return CCFPEmode;
2732
2733 default:
2734 gcc_unreachable ();
2735 }
2736 }
2737 else if (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
2738 || GET_CODE (x) == NEG || GET_CODE (x) == ASHIFT)
2739 {
2740 if (TARGET_ARCH64 && GET_MODE (x) == DImode)
2741 return CCX_NOOVmode;
2742 else
2743 return CC_NOOVmode;
2744 }
2745 else
2746 {
2747 if (TARGET_ARCH64 && GET_MODE (x) == DImode)
2748 return CCXmode;
2749 else
2750 return CCmode;
2751 }
2752 }
2753
2754 /* Emit the compare insn and return the CC reg for a CODE comparison
2755 with operands X and Y. */
2756
2757 static rtx
gen_compare_reg_1(enum rtx_code code,rtx x,rtx y)2758 gen_compare_reg_1 (enum rtx_code code, rtx x, rtx y)
2759 {
2760 enum machine_mode mode;
2761 rtx cc_reg;
2762
2763 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
2764 return x;
2765
2766 mode = SELECT_CC_MODE (code, x, y);
2767
2768 /* ??? We don't have movcc patterns so we cannot generate pseudo regs for the
2769 fcc regs (cse can't tell they're really call clobbered regs and will
2770 remove a duplicate comparison even if there is an intervening function
2771 call - it will then try to reload the cc reg via an int reg which is why
2772 we need the movcc patterns). It is possible to provide the movcc
2773 patterns by using the ldxfsr/stxfsr v9 insns. I tried it: you need two
2774 registers (say %g1,%g5) and it takes about 6 insns. A better fix would be
2775 to tell cse that CCFPE mode registers (even pseudos) are call
2776 clobbered. */
2777
2778 /* ??? This is an experiment. Rather than making changes to cse which may
2779 or may not be easy/clean, we do our own cse. This is possible because
2780 we will generate hard registers. Cse knows they're call clobbered (it
2781 doesn't know the same thing about pseudos). If we guess wrong, no big
2782 deal, but if we win, great! */
2783
2784 if (TARGET_V9 && GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
2785 #if 1 /* experiment */
2786 {
2787 int reg;
2788 /* We cycle through the registers to ensure they're all exercised. */
2789 static int next_fcc_reg = 0;
2790 /* Previous x,y for each fcc reg. */
2791 static rtx prev_args[4][2];
2792
2793 /* Scan prev_args for x,y. */
2794 for (reg = 0; reg < 4; reg++)
2795 if (prev_args[reg][0] == x && prev_args[reg][1] == y)
2796 break;
2797 if (reg == 4)
2798 {
2799 reg = next_fcc_reg;
2800 prev_args[reg][0] = x;
2801 prev_args[reg][1] = y;
2802 next_fcc_reg = (next_fcc_reg + 1) & 3;
2803 }
2804 cc_reg = gen_rtx_REG (mode, reg + SPARC_FIRST_V9_FCC_REG);
2805 }
2806 #else
2807 cc_reg = gen_reg_rtx (mode);
2808 #endif /* ! experiment */
2809 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
2810 cc_reg = gen_rtx_REG (mode, SPARC_FCC_REG);
2811 else
2812 cc_reg = gen_rtx_REG (mode, SPARC_ICC_REG);
2813
2814 /* We shouldn't get there for TFmode if !TARGET_HARD_QUAD. If we do, this
2815 will only result in an unrecognizable insn so no point in asserting. */
2816 emit_insn (gen_rtx_SET (VOIDmode, cc_reg, gen_rtx_COMPARE (mode, x, y)));
2817
2818 return cc_reg;
2819 }
2820
2821
2822 /* Emit the compare insn and return the CC reg for the comparison in CMP. */
2823
2824 rtx
gen_compare_reg(rtx cmp)2825 gen_compare_reg (rtx cmp)
2826 {
2827 return gen_compare_reg_1 (GET_CODE (cmp), XEXP (cmp, 0), XEXP (cmp, 1));
2828 }
2829
2830 /* This function is used for v9 only.
2831 DEST is the target of the Scc insn.
2832 CODE is the code for an Scc's comparison.
2833 X and Y are the values we compare.
2834
2835 This function is needed to turn
2836
2837 (set (reg:SI 110)
2838 (gt (reg:CCX 100 %icc)
2839 (const_int 0)))
2840 into
2841 (set (reg:SI 110)
2842 (gt:DI (reg:CCX 100 %icc)
2843 (const_int 0)))
2844
2845 IE: The instruction recognizer needs to see the mode of the comparison to
2846 find the right instruction. We could use "gt:DI" right in the
2847 define_expand, but leaving it out allows us to handle DI, SI, etc. */
2848
2849 static int
gen_v9_scc(rtx dest,enum rtx_code compare_code,rtx x,rtx y)2850 gen_v9_scc (rtx dest, enum rtx_code compare_code, rtx x, rtx y)
2851 {
2852 if (! TARGET_ARCH64
2853 && (GET_MODE (x) == DImode
2854 || GET_MODE (dest) == DImode))
2855 return 0;
2856
2857 /* Try to use the movrCC insns. */
2858 if (TARGET_ARCH64
2859 && GET_MODE_CLASS (GET_MODE (x)) == MODE_INT
2860 && y == const0_rtx
2861 && v9_regcmp_p (compare_code))
2862 {
2863 rtx op0 = x;
2864 rtx temp;
2865
2866 /* Special case for op0 != 0. This can be done with one instruction if
2867 dest == x. */
2868
2869 if (compare_code == NE
2870 && GET_MODE (dest) == DImode
2871 && rtx_equal_p (op0, dest))
2872 {
2873 emit_insn (gen_rtx_SET (VOIDmode, dest,
2874 gen_rtx_IF_THEN_ELSE (DImode,
2875 gen_rtx_fmt_ee (compare_code, DImode,
2876 op0, const0_rtx),
2877 const1_rtx,
2878 dest)));
2879 return 1;
2880 }
2881
2882 if (reg_overlap_mentioned_p (dest, op0))
2883 {
2884 /* Handle the case where dest == x.
2885 We "early clobber" the result. */
2886 op0 = gen_reg_rtx (GET_MODE (x));
2887 emit_move_insn (op0, x);
2888 }
2889
2890 emit_insn (gen_rtx_SET (VOIDmode, dest, const0_rtx));
2891 if (GET_MODE (op0) != DImode)
2892 {
2893 temp = gen_reg_rtx (DImode);
2894 convert_move (temp, op0, 0);
2895 }
2896 else
2897 temp = op0;
2898 emit_insn (gen_rtx_SET (VOIDmode, dest,
2899 gen_rtx_IF_THEN_ELSE (GET_MODE (dest),
2900 gen_rtx_fmt_ee (compare_code, DImode,
2901 temp, const0_rtx),
2902 const1_rtx,
2903 dest)));
2904 return 1;
2905 }
2906 else
2907 {
2908 x = gen_compare_reg_1 (compare_code, x, y);
2909 y = const0_rtx;
2910
2911 gcc_assert (GET_MODE (x) != CC_NOOVmode
2912 && GET_MODE (x) != CCX_NOOVmode);
2913
2914 emit_insn (gen_rtx_SET (VOIDmode, dest, const0_rtx));
2915 emit_insn (gen_rtx_SET (VOIDmode, dest,
2916 gen_rtx_IF_THEN_ELSE (GET_MODE (dest),
2917 gen_rtx_fmt_ee (compare_code,
2918 GET_MODE (x), x, y),
2919 const1_rtx, dest)));
2920 return 1;
2921 }
2922 }
2923
2924
2925 /* Emit an scc insn. For seq, sne, sgeu, and sltu, we can do this
2926 without jumps using the addx/subx instructions. */
2927
2928 bool
emit_scc_insn(rtx operands[])2929 emit_scc_insn (rtx operands[])
2930 {
2931 rtx tem;
2932 rtx x;
2933 rtx y;
2934 enum rtx_code code;
2935
2936 /* The quad-word fp compare library routines all return nonzero to indicate
2937 true, which is different from the equivalent libgcc routines, so we must
2938 handle them specially here. */
2939 if (GET_MODE (operands[2]) == TFmode && ! TARGET_HARD_QUAD)
2940 {
2941 operands[1] = sparc_emit_float_lib_cmp (operands[2], operands[3],
2942 GET_CODE (operands[1]));
2943 operands[2] = XEXP (operands[1], 0);
2944 operands[3] = XEXP (operands[1], 1);
2945 }
2946
2947 code = GET_CODE (operands[1]);
2948 x = operands[2];
2949 y = operands[3];
2950
2951 /* For seq/sne on v9 we use the same code as v8 (the addx/subx method has
2952 more applications). The exception to this is "reg != 0" which can
2953 be done in one instruction on v9 (so we do it). */
2954 if (code == EQ)
2955 {
2956 if (GET_MODE (x) == SImode)
2957 {
2958 rtx pat;
2959 if (TARGET_ARCH64)
2960 pat = gen_seqsidi_special (operands[0], x, y);
2961 else
2962 pat = gen_seqsisi_special (operands[0], x, y);
2963 emit_insn (pat);
2964 return true;
2965 }
2966 else if (GET_MODE (x) == DImode)
2967 {
2968 rtx pat = gen_seqdi_special (operands[0], x, y);
2969 emit_insn (pat);
2970 return true;
2971 }
2972 }
2973
2974 if (code == NE)
2975 {
2976 if (GET_MODE (x) == SImode)
2977 {
2978 rtx pat;
2979 if (TARGET_ARCH64)
2980 pat = gen_snesidi_special (operands[0], x, y);
2981 else
2982 pat = gen_snesisi_special (operands[0], x, y);
2983 emit_insn (pat);
2984 return true;
2985 }
2986 else if (GET_MODE (x) == DImode)
2987 {
2988 rtx pat;
2989 if (TARGET_VIS3)
2990 pat = gen_snedi_special_vis3 (operands[0], x, y);
2991 else
2992 pat = gen_snedi_special (operands[0], x, y);
2993 emit_insn (pat);
2994 return true;
2995 }
2996 }
2997
2998 if (TARGET_V9
2999 && TARGET_ARCH64
3000 && GET_MODE (x) == DImode
3001 && !(TARGET_VIS3
3002 && (code == GTU || code == LTU))
3003 && gen_v9_scc (operands[0], code, x, y))
3004 return true;
3005
3006 /* We can do LTU and GEU using the addx/subx instructions too. And
3007 for GTU/LEU, if both operands are registers swap them and fall
3008 back to the easy case. */
3009 if (code == GTU || code == LEU)
3010 {
3011 if ((GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
3012 && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG))
3013 {
3014 tem = x;
3015 x = y;
3016 y = tem;
3017 code = swap_condition (code);
3018 }
3019 }
3020
3021 if (code == LTU
3022 || (!TARGET_VIS3 && code == GEU))
3023 {
3024 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
3025 gen_rtx_fmt_ee (code, GET_MODE (operands[0]),
3026 gen_compare_reg_1 (code, x, y),
3027 const0_rtx)));
3028 return true;
3029 }
3030
3031 /* All the posibilities to use addx/subx based sequences has been
3032 exhausted, try for a 3 instruction sequence using v9 conditional
3033 moves. */
3034 if (TARGET_V9 && gen_v9_scc (operands[0], code, x, y))
3035 return true;
3036
3037 /* Nope, do branches. */
3038 return false;
3039 }
3040
3041 /* Emit a conditional jump insn for the v9 architecture using comparison code
3042 CODE and jump target LABEL.
3043 This function exists to take advantage of the v9 brxx insns. */
3044
3045 static void
emit_v9_brxx_insn(enum rtx_code code,rtx op0,rtx label)3046 emit_v9_brxx_insn (enum rtx_code code, rtx op0, rtx label)
3047 {
3048 emit_jump_insn (gen_rtx_SET (VOIDmode,
3049 pc_rtx,
3050 gen_rtx_IF_THEN_ELSE (VOIDmode,
3051 gen_rtx_fmt_ee (code, GET_MODE (op0),
3052 op0, const0_rtx),
3053 gen_rtx_LABEL_REF (VOIDmode, label),
3054 pc_rtx)));
3055 }
3056
3057 /* Emit a conditional jump insn for the UA2011 architecture using
3058 comparison code CODE and jump target LABEL. This function exists
3059 to take advantage of the UA2011 Compare and Branch insns. */
3060
3061 static void
emit_cbcond_insn(enum rtx_code code,rtx op0,rtx op1,rtx label)3062 emit_cbcond_insn (enum rtx_code code, rtx op0, rtx op1, rtx label)
3063 {
3064 rtx if_then_else;
3065
3066 if_then_else = gen_rtx_IF_THEN_ELSE (VOIDmode,
3067 gen_rtx_fmt_ee(code, GET_MODE(op0),
3068 op0, op1),
3069 gen_rtx_LABEL_REF (VOIDmode, label),
3070 pc_rtx);
3071
3072 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, if_then_else));
3073 }
3074
3075 void
emit_conditional_branch_insn(rtx operands[])3076 emit_conditional_branch_insn (rtx operands[])
3077 {
3078 /* The quad-word fp compare library routines all return nonzero to indicate
3079 true, which is different from the equivalent libgcc routines, so we must
3080 handle them specially here. */
3081 if (GET_MODE (operands[1]) == TFmode && ! TARGET_HARD_QUAD)
3082 {
3083 operands[0] = sparc_emit_float_lib_cmp (operands[1], operands[2],
3084 GET_CODE (operands[0]));
3085 operands[1] = XEXP (operands[0], 0);
3086 operands[2] = XEXP (operands[0], 1);
3087 }
3088
3089 /* If we can tell early on that the comparison is against a constant
3090 that won't fit in the 5-bit signed immediate field of a cbcond,
3091 use one of the other v9 conditional branch sequences. */
3092 if (TARGET_CBCOND
3093 && GET_CODE (operands[1]) == REG
3094 && (GET_MODE (operands[1]) == SImode
3095 || (TARGET_ARCH64 && GET_MODE (operands[1]) == DImode))
3096 && (GET_CODE (operands[2]) != CONST_INT
3097 || SPARC_SIMM5_P (INTVAL (operands[2]))))
3098 {
3099 emit_cbcond_insn (GET_CODE (operands[0]), operands[1], operands[2], operands[3]);
3100 return;
3101 }
3102
3103 if (TARGET_ARCH64 && operands[2] == const0_rtx
3104 && GET_CODE (operands[1]) == REG
3105 && GET_MODE (operands[1]) == DImode)
3106 {
3107 emit_v9_brxx_insn (GET_CODE (operands[0]), operands[1], operands[3]);
3108 return;
3109 }
3110
3111 operands[1] = gen_compare_reg (operands[0]);
3112 operands[2] = const0_rtx;
3113 operands[0] = gen_rtx_fmt_ee (GET_CODE (operands[0]), VOIDmode,
3114 operands[1], operands[2]);
3115 emit_jump_insn (gen_cbranchcc4 (operands[0], operands[1], operands[2],
3116 operands[3]));
3117 }
3118
3119
3120 /* Generate a DFmode part of a hard TFmode register.
3121 REG is the TFmode hard register, LOW is 1 for the
3122 low 64bit of the register and 0 otherwise.
3123 */
3124 rtx
gen_df_reg(rtx reg,int low)3125 gen_df_reg (rtx reg, int low)
3126 {
3127 int regno = REGNO (reg);
3128
3129 if ((WORDS_BIG_ENDIAN == 0) ^ (low != 0))
3130 regno += (TARGET_ARCH64 && SPARC_INT_REG_P (regno)) ? 1 : 2;
3131 return gen_rtx_REG (DFmode, regno);
3132 }
3133
3134 /* Generate a call to FUNC with OPERANDS. Operand 0 is the return value.
3135 Unlike normal calls, TFmode operands are passed by reference. It is
3136 assumed that no more than 3 operands are required. */
3137
3138 static void
emit_soft_tfmode_libcall(const char * func_name,int nargs,rtx * operands)3139 emit_soft_tfmode_libcall (const char *func_name, int nargs, rtx *operands)
3140 {
3141 rtx ret_slot = NULL, arg[3], func_sym;
3142 int i;
3143
3144 /* We only expect to be called for conversions, unary, and binary ops. */
3145 gcc_assert (nargs == 2 || nargs == 3);
3146
3147 for (i = 0; i < nargs; ++i)
3148 {
3149 rtx this_arg = operands[i];
3150 rtx this_slot;
3151
3152 /* TFmode arguments and return values are passed by reference. */
3153 if (GET_MODE (this_arg) == TFmode)
3154 {
3155 int force_stack_temp;
3156
3157 force_stack_temp = 0;
3158 if (TARGET_BUGGY_QP_LIB && i == 0)
3159 force_stack_temp = 1;
3160
3161 if (GET_CODE (this_arg) == MEM
3162 && ! force_stack_temp)
3163 {
3164 tree expr = MEM_EXPR (this_arg);
3165 if (expr)
3166 mark_addressable (expr);
3167 this_arg = XEXP (this_arg, 0);
3168 }
3169 else if (CONSTANT_P (this_arg)
3170 && ! force_stack_temp)
3171 {
3172 this_slot = force_const_mem (TFmode, this_arg);
3173 this_arg = XEXP (this_slot, 0);
3174 }
3175 else
3176 {
3177 this_slot = assign_stack_temp (TFmode, GET_MODE_SIZE (TFmode));
3178
3179 /* Operand 0 is the return value. We'll copy it out later. */
3180 if (i > 0)
3181 emit_move_insn (this_slot, this_arg);
3182 else
3183 ret_slot = this_slot;
3184
3185 this_arg = XEXP (this_slot, 0);
3186 }
3187 }
3188
3189 arg[i] = this_arg;
3190 }
3191
3192 func_sym = gen_rtx_SYMBOL_REF (Pmode, func_name);
3193
3194 if (GET_MODE (operands[0]) == TFmode)
3195 {
3196 if (nargs == 2)
3197 emit_library_call (func_sym, LCT_NORMAL, VOIDmode, 2,
3198 arg[0], GET_MODE (arg[0]),
3199 arg[1], GET_MODE (arg[1]));
3200 else
3201 emit_library_call (func_sym, LCT_NORMAL, VOIDmode, 3,
3202 arg[0], GET_MODE (arg[0]),
3203 arg[1], GET_MODE (arg[1]),
3204 arg[2], GET_MODE (arg[2]));
3205
3206 if (ret_slot)
3207 emit_move_insn (operands[0], ret_slot);
3208 }
3209 else
3210 {
3211 rtx ret;
3212
3213 gcc_assert (nargs == 2);
3214
3215 ret = emit_library_call_value (func_sym, operands[0], LCT_NORMAL,
3216 GET_MODE (operands[0]), 1,
3217 arg[1], GET_MODE (arg[1]));
3218
3219 if (ret != operands[0])
3220 emit_move_insn (operands[0], ret);
3221 }
3222 }
3223
3224 /* Expand soft-float TFmode calls to sparc abi routines. */
3225
3226 static void
emit_soft_tfmode_binop(enum rtx_code code,rtx * operands)3227 emit_soft_tfmode_binop (enum rtx_code code, rtx *operands)
3228 {
3229 const char *func;
3230
3231 switch (code)
3232 {
3233 case PLUS:
3234 func = "_Qp_add";
3235 break;
3236 case MINUS:
3237 func = "_Qp_sub";
3238 break;
3239 case MULT:
3240 func = "_Qp_mul";
3241 break;
3242 case DIV:
3243 func = "_Qp_div";
3244 break;
3245 default:
3246 gcc_unreachable ();
3247 }
3248
3249 emit_soft_tfmode_libcall (func, 3, operands);
3250 }
3251
3252 static void
emit_soft_tfmode_unop(enum rtx_code code,rtx * operands)3253 emit_soft_tfmode_unop (enum rtx_code code, rtx *operands)
3254 {
3255 const char *func;
3256
3257 gcc_assert (code == SQRT);
3258 func = "_Qp_sqrt";
3259
3260 emit_soft_tfmode_libcall (func, 2, operands);
3261 }
3262
3263 static void
emit_soft_tfmode_cvt(enum rtx_code code,rtx * operands)3264 emit_soft_tfmode_cvt (enum rtx_code code, rtx *operands)
3265 {
3266 const char *func;
3267
3268 switch (code)
3269 {
3270 case FLOAT_EXTEND:
3271 switch (GET_MODE (operands[1]))
3272 {
3273 case SFmode:
3274 func = "_Qp_stoq";
3275 break;
3276 case DFmode:
3277 func = "_Qp_dtoq";
3278 break;
3279 default:
3280 gcc_unreachable ();
3281 }
3282 break;
3283
3284 case FLOAT_TRUNCATE:
3285 switch (GET_MODE (operands[0]))
3286 {
3287 case SFmode:
3288 func = "_Qp_qtos";
3289 break;
3290 case DFmode:
3291 func = "_Qp_qtod";
3292 break;
3293 default:
3294 gcc_unreachable ();
3295 }
3296 break;
3297
3298 case FLOAT:
3299 switch (GET_MODE (operands[1]))
3300 {
3301 case SImode:
3302 func = "_Qp_itoq";
3303 if (TARGET_ARCH64)
3304 operands[1] = gen_rtx_SIGN_EXTEND (DImode, operands[1]);
3305 break;
3306 case DImode:
3307 func = "_Qp_xtoq";
3308 break;
3309 default:
3310 gcc_unreachable ();
3311 }
3312 break;
3313
3314 case UNSIGNED_FLOAT:
3315 switch (GET_MODE (operands[1]))
3316 {
3317 case SImode:
3318 func = "_Qp_uitoq";
3319 if (TARGET_ARCH64)
3320 operands[1] = gen_rtx_ZERO_EXTEND (DImode, operands[1]);
3321 break;
3322 case DImode:
3323 func = "_Qp_uxtoq";
3324 break;
3325 default:
3326 gcc_unreachable ();
3327 }
3328 break;
3329
3330 case FIX:
3331 switch (GET_MODE (operands[0]))
3332 {
3333 case SImode:
3334 func = "_Qp_qtoi";
3335 break;
3336 case DImode:
3337 func = "_Qp_qtox";
3338 break;
3339 default:
3340 gcc_unreachable ();
3341 }
3342 break;
3343
3344 case UNSIGNED_FIX:
3345 switch (GET_MODE (operands[0]))
3346 {
3347 case SImode:
3348 func = "_Qp_qtoui";
3349 break;
3350 case DImode:
3351 func = "_Qp_qtoux";
3352 break;
3353 default:
3354 gcc_unreachable ();
3355 }
3356 break;
3357
3358 default:
3359 gcc_unreachable ();
3360 }
3361
3362 emit_soft_tfmode_libcall (func, 2, operands);
3363 }
3364
3365 /* Expand a hard-float tfmode operation. All arguments must be in
3366 registers. */
3367
3368 static void
emit_hard_tfmode_operation(enum rtx_code code,rtx * operands)3369 emit_hard_tfmode_operation (enum rtx_code code, rtx *operands)
3370 {
3371 rtx op, dest;
3372
3373 if (GET_RTX_CLASS (code) == RTX_UNARY)
3374 {
3375 operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
3376 op = gen_rtx_fmt_e (code, GET_MODE (operands[0]), operands[1]);
3377 }
3378 else
3379 {
3380 operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
3381 operands[2] = force_reg (GET_MODE (operands[2]), operands[2]);
3382 op = gen_rtx_fmt_ee (code, GET_MODE (operands[0]),
3383 operands[1], operands[2]);
3384 }
3385
3386 if (register_operand (operands[0], VOIDmode))
3387 dest = operands[0];
3388 else
3389 dest = gen_reg_rtx (GET_MODE (operands[0]));
3390
3391 emit_insn (gen_rtx_SET (VOIDmode, dest, op));
3392
3393 if (dest != operands[0])
3394 emit_move_insn (operands[0], dest);
3395 }
3396
3397 void
emit_tfmode_binop(enum rtx_code code,rtx * operands)3398 emit_tfmode_binop (enum rtx_code code, rtx *operands)
3399 {
3400 if (TARGET_HARD_QUAD)
3401 emit_hard_tfmode_operation (code, operands);
3402 else
3403 emit_soft_tfmode_binop (code, operands);
3404 }
3405
3406 void
emit_tfmode_unop(enum rtx_code code,rtx * operands)3407 emit_tfmode_unop (enum rtx_code code, rtx *operands)
3408 {
3409 if (TARGET_HARD_QUAD)
3410 emit_hard_tfmode_operation (code, operands);
3411 else
3412 emit_soft_tfmode_unop (code, operands);
3413 }
3414
3415 void
emit_tfmode_cvt(enum rtx_code code,rtx * operands)3416 emit_tfmode_cvt (enum rtx_code code, rtx *operands)
3417 {
3418 if (TARGET_HARD_QUAD)
3419 emit_hard_tfmode_operation (code, operands);
3420 else
3421 emit_soft_tfmode_cvt (code, operands);
3422 }
3423
3424 /* Return nonzero if a branch/jump/call instruction will be emitting
3425 nop into its delay slot. */
3426
3427 int
empty_delay_slot(rtx insn)3428 empty_delay_slot (rtx insn)
3429 {
3430 rtx seq;
3431
3432 /* If no previous instruction (should not happen), return true. */
3433 if (PREV_INSN (insn) == NULL)
3434 return 1;
3435
3436 seq = NEXT_INSN (PREV_INSN (insn));
3437 if (GET_CODE (PATTERN (seq)) == SEQUENCE)
3438 return 0;
3439
3440 return 1;
3441 }
3442
3443 /* Return nonzero if we should emit a nop after a cbcond instruction.
3444 The cbcond instruction does not have a delay slot, however there is
3445 a severe performance penalty if a control transfer appears right
3446 after a cbcond. Therefore we emit a nop when we detect this
3447 situation. */
3448
3449 int
emit_cbcond_nop(rtx insn)3450 emit_cbcond_nop (rtx insn)
3451 {
3452 rtx next = next_active_insn (insn);
3453
3454 if (!next)
3455 return 1;
3456
3457 if (NONJUMP_INSN_P (next)
3458 && GET_CODE (PATTERN (next)) == SEQUENCE)
3459 next = XVECEXP (PATTERN (next), 0, 0);
3460 else if (CALL_P (next)
3461 && GET_CODE (PATTERN (next)) == PARALLEL)
3462 {
3463 rtx delay = XVECEXP (PATTERN (next), 0, 1);
3464
3465 if (GET_CODE (delay) == RETURN)
3466 {
3467 /* It's a sibling call. Do not emit the nop if we're going
3468 to emit something other than the jump itself as the first
3469 instruction of the sibcall sequence. */
3470 if (sparc_leaf_function_p || TARGET_FLAT)
3471 return 0;
3472 }
3473 }
3474
3475 if (NONJUMP_INSN_P (next))
3476 return 0;
3477
3478 return 1;
3479 }
3480
3481 /* Return nonzero if TRIAL can go into the call delay slot. */
3482
3483 int
eligible_for_call_delay(rtx trial)3484 eligible_for_call_delay (rtx trial)
3485 {
3486 rtx pat;
3487
3488 if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE)
3489 return 0;
3490
3491 /* Binutils allows
3492 call __tls_get_addr, %tgd_call (foo)
3493 add %l7, %o0, %o0, %tgd_add (foo)
3494 while Sun as/ld does not. */
3495 if (TARGET_GNU_TLS || !TARGET_TLS)
3496 return 1;
3497
3498 pat = PATTERN (trial);
3499
3500 /* We must reject tgd_add{32|64}, i.e.
3501 (set (reg) (plus (reg) (unspec [(reg) (symbol_ref)] UNSPEC_TLSGD)))
3502 and tldm_add{32|64}, i.e.
3503 (set (reg) (plus (reg) (unspec [(reg) (symbol_ref)] UNSPEC_TLSLDM)))
3504 for Sun as/ld. */
3505 if (GET_CODE (pat) == SET
3506 && GET_CODE (SET_SRC (pat)) == PLUS)
3507 {
3508 rtx unspec = XEXP (SET_SRC (pat), 1);
3509
3510 if (GET_CODE (unspec) == UNSPEC
3511 && (XINT (unspec, 1) == UNSPEC_TLSGD
3512 || XINT (unspec, 1) == UNSPEC_TLSLDM))
3513 return 0;
3514 }
3515
3516 return 1;
3517 }
3518
3519 /* Return nonzero if TRIAL, an insn, can be combined with a 'restore'
3520 instruction. RETURN_P is true if the v9 variant 'return' is to be
3521 considered in the test too.
3522
3523 TRIAL must be a SET whose destination is a REG appropriate for the
3524 'restore' instruction or, if RETURN_P is true, for the 'return'
3525 instruction. */
3526
3527 static int
eligible_for_restore_insn(rtx trial,bool return_p)3528 eligible_for_restore_insn (rtx trial, bool return_p)
3529 {
3530 rtx pat = PATTERN (trial);
3531 rtx src = SET_SRC (pat);
3532 bool src_is_freg = false;
3533 rtx src_reg;
3534
3535 /* Since we now can do moves between float and integer registers when
3536 VIS3 is enabled, we have to catch this case. We can allow such
3537 moves when doing a 'return' however. */
3538 src_reg = src;
3539 if (GET_CODE (src_reg) == SUBREG)
3540 src_reg = SUBREG_REG (src_reg);
3541 if (GET_CODE (src_reg) == REG
3542 && SPARC_FP_REG_P (REGNO (src_reg)))
3543 src_is_freg = true;
3544
3545 /* The 'restore src,%g0,dest' pattern for word mode and below. */
3546 if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT
3547 && arith_operand (src, GET_MODE (src))
3548 && ! src_is_freg)
3549 {
3550 if (TARGET_ARCH64)
3551 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
3552 else
3553 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (SImode);
3554 }
3555
3556 /* The 'restore src,%g0,dest' pattern for double-word mode. */
3557 else if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT
3558 && arith_double_operand (src, GET_MODE (src))
3559 && ! src_is_freg)
3560 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
3561
3562 /* The 'restore src,%g0,dest' pattern for float if no FPU. */
3563 else if (! TARGET_FPU && register_operand (src, SFmode))
3564 return 1;
3565
3566 /* The 'restore src,%g0,dest' pattern for double if no FPU. */
3567 else if (! TARGET_FPU && TARGET_ARCH64 && register_operand (src, DFmode))
3568 return 1;
3569
3570 /* If we have the 'return' instruction, anything that does not use
3571 local or output registers and can go into a delay slot wins. */
3572 else if (return_p && TARGET_V9 && !epilogue_renumber (&pat, 1))
3573 return 1;
3574
3575 /* The 'restore src1,src2,dest' pattern for SImode. */
3576 else if (GET_CODE (src) == PLUS
3577 && register_operand (XEXP (src, 0), SImode)
3578 && arith_operand (XEXP (src, 1), SImode))
3579 return 1;
3580
3581 /* The 'restore src1,src2,dest' pattern for DImode. */
3582 else if (GET_CODE (src) == PLUS
3583 && register_operand (XEXP (src, 0), DImode)
3584 && arith_double_operand (XEXP (src, 1), DImode))
3585 return 1;
3586
3587 /* The 'restore src1,%lo(src2),dest' pattern. */
3588 else if (GET_CODE (src) == LO_SUM
3589 && ! TARGET_CM_MEDMID
3590 && ((register_operand (XEXP (src, 0), SImode)
3591 && immediate_operand (XEXP (src, 1), SImode))
3592 || (TARGET_ARCH64
3593 && register_operand (XEXP (src, 0), DImode)
3594 && immediate_operand (XEXP (src, 1), DImode))))
3595 return 1;
3596
3597 /* The 'restore src,src,dest' pattern. */
3598 else if (GET_CODE (src) == ASHIFT
3599 && (register_operand (XEXP (src, 0), SImode)
3600 || register_operand (XEXP (src, 0), DImode))
3601 && XEXP (src, 1) == const1_rtx)
3602 return 1;
3603
3604 return 0;
3605 }
3606
3607 /* Return nonzero if TRIAL can go into the function return's delay slot. */
3608
3609 int
eligible_for_return_delay(rtx trial)3610 eligible_for_return_delay (rtx trial)
3611 {
3612 int regno;
3613 rtx pat;
3614
3615 /* If the function uses __builtin_eh_return, the eh_return machinery
3616 occupies the delay slot. */
3617 if (crtl->calls_eh_return)
3618 return 0;
3619
3620 if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE)
3621 return 0;
3622
3623 /* In the case of a leaf or flat function, anything can go into the slot. */
3624 if (sparc_leaf_function_p || TARGET_FLAT)
3625 return 1;
3626
3627 if (!NONJUMP_INSN_P (trial))
3628 return 0;
3629
3630 pat = PATTERN (trial);
3631 if (GET_CODE (pat) == PARALLEL)
3632 {
3633 int i;
3634
3635 if (! TARGET_V9)
3636 return 0;
3637 for (i = XVECLEN (pat, 0) - 1; i >= 0; i--)
3638 {
3639 rtx expr = XVECEXP (pat, 0, i);
3640 if (GET_CODE (expr) != SET)
3641 return 0;
3642 if (GET_CODE (SET_DEST (expr)) != REG)
3643 return 0;
3644 regno = REGNO (SET_DEST (expr));
3645 if (regno >= 8 && regno < 24)
3646 return 0;
3647 }
3648 return !epilogue_renumber (&pat, 1);
3649 }
3650
3651 if (GET_CODE (pat) != SET)
3652 return 0;
3653
3654 if (GET_CODE (SET_DEST (pat)) != REG)
3655 return 0;
3656
3657 regno = REGNO (SET_DEST (pat));
3658
3659 /* Otherwise, only operations which can be done in tandem with
3660 a `restore' or `return' insn can go into the delay slot. */
3661 if (regno >= 8 && regno < 24)
3662 return 0;
3663
3664 /* If this instruction sets up floating point register and we have a return
3665 instruction, it can probably go in. But restore will not work
3666 with FP_REGS. */
3667 if (! SPARC_INT_REG_P (regno))
3668 return TARGET_V9 && !epilogue_renumber (&pat, 1);
3669
3670 return eligible_for_restore_insn (trial, true);
3671 }
3672
3673 /* Return nonzero if TRIAL can go into the sibling call's delay slot. */
3674
3675 int
eligible_for_sibcall_delay(rtx trial)3676 eligible_for_sibcall_delay (rtx trial)
3677 {
3678 rtx pat;
3679
3680 if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE)
3681 return 0;
3682
3683 if (!NONJUMP_INSN_P (trial))
3684 return 0;
3685
3686 pat = PATTERN (trial);
3687
3688 if (sparc_leaf_function_p || TARGET_FLAT)
3689 {
3690 /* If the tail call is done using the call instruction,
3691 we have to restore %o7 in the delay slot. */
3692 if (LEAF_SIBCALL_SLOT_RESERVED_P)
3693 return 0;
3694
3695 /* %g1 is used to build the function address */
3696 if (reg_mentioned_p (gen_rtx_REG (Pmode, 1), pat))
3697 return 0;
3698
3699 return 1;
3700 }
3701
3702 if (GET_CODE (pat) != SET)
3703 return 0;
3704
3705 /* Otherwise, only operations which can be done in tandem with
3706 a `restore' insn can go into the delay slot. */
3707 if (GET_CODE (SET_DEST (pat)) != REG
3708 || (REGNO (SET_DEST (pat)) >= 8 && REGNO (SET_DEST (pat)) < 24)
3709 || ! SPARC_INT_REG_P (REGNO (SET_DEST (pat))))
3710 return 0;
3711
3712 /* If it mentions %o7, it can't go in, because sibcall will clobber it
3713 in most cases. */
3714 if (reg_mentioned_p (gen_rtx_REG (Pmode, 15), pat))
3715 return 0;
3716
3717 return eligible_for_restore_insn (trial, false);
3718 }
3719
3720 /* Determine if it's legal to put X into the constant pool. This
3721 is not possible if X contains the address of a symbol that is
3722 not constant (TLS) or not known at final link time (PIC). */
3723
3724 static bool
sparc_cannot_force_const_mem(enum machine_mode mode,rtx x)3725 sparc_cannot_force_const_mem (enum machine_mode mode, rtx x)
3726 {
3727 switch (GET_CODE (x))
3728 {
3729 case CONST_INT:
3730 case CONST_DOUBLE:
3731 case CONST_VECTOR:
3732 /* Accept all non-symbolic constants. */
3733 return false;
3734
3735 case LABEL_REF:
3736 /* Labels are OK iff we are non-PIC. */
3737 return flag_pic != 0;
3738
3739 case SYMBOL_REF:
3740 /* 'Naked' TLS symbol references are never OK,
3741 non-TLS symbols are OK iff we are non-PIC. */
3742 if (SYMBOL_REF_TLS_MODEL (x))
3743 return true;
3744 else
3745 return flag_pic != 0;
3746
3747 case CONST:
3748 return sparc_cannot_force_const_mem (mode, XEXP (x, 0));
3749 case PLUS:
3750 case MINUS:
3751 return sparc_cannot_force_const_mem (mode, XEXP (x, 0))
3752 || sparc_cannot_force_const_mem (mode, XEXP (x, 1));
3753 case UNSPEC:
3754 return true;
3755 default:
3756 gcc_unreachable ();
3757 }
3758 }
3759
3760 /* Global Offset Table support. */
3761 static GTY(()) rtx got_helper_rtx = NULL_RTX;
3762 static GTY(()) rtx global_offset_table_rtx = NULL_RTX;
3763
3764 /* Return the SYMBOL_REF for the Global Offset Table. */
3765
3766 static GTY(()) rtx sparc_got_symbol = NULL_RTX;
3767
3768 static rtx
sparc_got(void)3769 sparc_got (void)
3770 {
3771 if (!sparc_got_symbol)
3772 sparc_got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
3773
3774 return sparc_got_symbol;
3775 }
3776
3777 /* Ensure that we are not using patterns that are not OK with PIC. */
3778
3779 int
check_pic(int i)3780 check_pic (int i)
3781 {
3782 rtx op;
3783
3784 switch (flag_pic)
3785 {
3786 case 1:
3787 op = recog_data.operand[i];
3788 gcc_assert (GET_CODE (op) != SYMBOL_REF
3789 && (GET_CODE (op) != CONST
3790 || (GET_CODE (XEXP (op, 0)) == MINUS
3791 && XEXP (XEXP (op, 0), 0) == sparc_got ()
3792 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST)));
3793 case 2:
3794 default:
3795 return 1;
3796 }
3797 }
3798
3799 /* Return true if X is an address which needs a temporary register when
3800 reloaded while generating PIC code. */
3801
3802 int
pic_address_needs_scratch(rtx x)3803 pic_address_needs_scratch (rtx x)
3804 {
3805 /* An address which is a symbolic plus a non SMALL_INT needs a temp reg. */
3806 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS
3807 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
3808 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
3809 && ! SMALL_INT (XEXP (XEXP (x, 0), 1)))
3810 return 1;
3811
3812 return 0;
3813 }
3814
3815 /* Determine if a given RTX is a valid constant. We already know this
3816 satisfies CONSTANT_P. */
3817
3818 static bool
sparc_legitimate_constant_p(enum machine_mode mode,rtx x)3819 sparc_legitimate_constant_p (enum machine_mode mode, rtx x)
3820 {
3821 switch (GET_CODE (x))
3822 {
3823 case CONST:
3824 case SYMBOL_REF:
3825 if (sparc_tls_referenced_p (x))
3826 return false;
3827 break;
3828
3829 case CONST_DOUBLE:
3830 if (GET_MODE (x) == VOIDmode)
3831 return true;
3832
3833 /* Floating point constants are generally not ok.
3834 The only exception is 0.0 and all-ones in VIS. */
3835 if (TARGET_VIS
3836 && SCALAR_FLOAT_MODE_P (mode)
3837 && (const_zero_operand (x, mode)
3838 || const_all_ones_operand (x, mode)))
3839 return true;
3840
3841 return false;
3842
3843 case CONST_VECTOR:
3844 /* Vector constants are generally not ok.
3845 The only exception is 0 or -1 in VIS. */
3846 if (TARGET_VIS
3847 && (const_zero_operand (x, mode)
3848 || const_all_ones_operand (x, mode)))
3849 return true;
3850
3851 return false;
3852
3853 default:
3854 break;
3855 }
3856
3857 return true;
3858 }
3859
3860 /* Determine if a given RTX is a valid constant address. */
3861
3862 bool
constant_address_p(rtx x)3863 constant_address_p (rtx x)
3864 {
3865 switch (GET_CODE (x))
3866 {
3867 case LABEL_REF:
3868 case CONST_INT:
3869 case HIGH:
3870 return true;
3871
3872 case CONST:
3873 if (flag_pic && pic_address_needs_scratch (x))
3874 return false;
3875 return sparc_legitimate_constant_p (Pmode, x);
3876
3877 case SYMBOL_REF:
3878 return !flag_pic && sparc_legitimate_constant_p (Pmode, x);
3879
3880 default:
3881 return false;
3882 }
3883 }
3884
3885 /* Nonzero if the constant value X is a legitimate general operand
3886 when generating PIC code. It is given that flag_pic is on and
3887 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
3888
3889 bool
legitimate_pic_operand_p(rtx x)3890 legitimate_pic_operand_p (rtx x)
3891 {
3892 if (pic_address_needs_scratch (x))
3893 return false;
3894 if (sparc_tls_referenced_p (x))
3895 return false;
3896 return true;
3897 }
3898
3899 #define RTX_OK_FOR_OFFSET_P(X, MODE) \
3900 (CONST_INT_P (X) \
3901 && INTVAL (X) >= -0x1000 \
3902 && INTVAL (X) < (0x1000 - GET_MODE_SIZE (MODE)))
3903
3904 #define RTX_OK_FOR_OLO10_P(X, MODE) \
3905 (CONST_INT_P (X) \
3906 && INTVAL (X) >= -0x1000 \
3907 && INTVAL (X) < (0xc00 - GET_MODE_SIZE (MODE)))
3908
3909 /* Handle the TARGET_LEGITIMATE_ADDRESS_P target hook.
3910
3911 On SPARC, the actual legitimate addresses must be REG+REG or REG+SMALLINT
3912 ordinarily. This changes a bit when generating PIC. */
3913
3914 static bool
sparc_legitimate_address_p(enum machine_mode mode,rtx addr,bool strict)3915 sparc_legitimate_address_p (enum machine_mode mode, rtx addr, bool strict)
3916 {
3917 rtx rs1 = NULL, rs2 = NULL, imm1 = NULL;
3918
3919 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
3920 rs1 = addr;
3921 else if (GET_CODE (addr) == PLUS)
3922 {
3923 rs1 = XEXP (addr, 0);
3924 rs2 = XEXP (addr, 1);
3925
3926 /* Canonicalize. REG comes first, if there are no regs,
3927 LO_SUM comes first. */
3928 if (!REG_P (rs1)
3929 && GET_CODE (rs1) != SUBREG
3930 && (REG_P (rs2)
3931 || GET_CODE (rs2) == SUBREG
3932 || (GET_CODE (rs2) == LO_SUM && GET_CODE (rs1) != LO_SUM)))
3933 {
3934 rs1 = XEXP (addr, 1);
3935 rs2 = XEXP (addr, 0);
3936 }
3937
3938 if ((flag_pic == 1
3939 && rs1 == pic_offset_table_rtx
3940 && !REG_P (rs2)
3941 && GET_CODE (rs2) != SUBREG
3942 && GET_CODE (rs2) != LO_SUM
3943 && GET_CODE (rs2) != MEM
3944 && !(GET_CODE (rs2) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs2))
3945 && (! symbolic_operand (rs2, VOIDmode) || mode == Pmode)
3946 && (GET_CODE (rs2) != CONST_INT || SMALL_INT (rs2)))
3947 || ((REG_P (rs1)
3948 || GET_CODE (rs1) == SUBREG)
3949 && RTX_OK_FOR_OFFSET_P (rs2, mode)))
3950 {
3951 imm1 = rs2;
3952 rs2 = NULL;
3953 }
3954 else if ((REG_P (rs1) || GET_CODE (rs1) == SUBREG)
3955 && (REG_P (rs2) || GET_CODE (rs2) == SUBREG))
3956 {
3957 /* We prohibit REG + REG for TFmode when there are no quad move insns
3958 and we consequently need to split. We do this because REG+REG
3959 is not an offsettable address. If we get the situation in reload
3960 where source and destination of a movtf pattern are both MEMs with
3961 REG+REG address, then only one of them gets converted to an
3962 offsettable address. */
3963 if (mode == TFmode
3964 && ! (TARGET_ARCH64 && TARGET_HARD_QUAD))
3965 return 0;
3966
3967 /* Likewise for TImode, but in all cases. */
3968 if (mode == TImode)
3969 return 0;
3970
3971 /* We prohibit REG + REG on ARCH32 if not optimizing for
3972 DFmode/DImode because then mem_min_alignment is likely to be zero
3973 after reload and the forced split would lack a matching splitter
3974 pattern. */
3975 if (TARGET_ARCH32 && !optimize
3976 && (mode == DFmode || mode == DImode))
3977 return 0;
3978 }
3979 else if (USE_AS_OFFSETABLE_LO10
3980 && GET_CODE (rs1) == LO_SUM
3981 && TARGET_ARCH64
3982 && ! TARGET_CM_MEDMID
3983 && RTX_OK_FOR_OLO10_P (rs2, mode))
3984 {
3985 rs2 = NULL;
3986 imm1 = XEXP (rs1, 1);
3987 rs1 = XEXP (rs1, 0);
3988 if (!CONSTANT_P (imm1)
3989 || (GET_CODE (rs1) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs1)))
3990 return 0;
3991 }
3992 }
3993 else if (GET_CODE (addr) == LO_SUM)
3994 {
3995 rs1 = XEXP (addr, 0);
3996 imm1 = XEXP (addr, 1);
3997
3998 if (!CONSTANT_P (imm1)
3999 || (GET_CODE (rs1) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs1)))
4000 return 0;
4001
4002 /* We can't allow TFmode in 32-bit mode, because an offset greater
4003 than the alignment (8) may cause the LO_SUM to overflow. */
4004 if (mode == TFmode && TARGET_ARCH32)
4005 return 0;
4006 }
4007 else if (GET_CODE (addr) == CONST_INT && SMALL_INT (addr))
4008 return 1;
4009 else
4010 return 0;
4011
4012 if (GET_CODE (rs1) == SUBREG)
4013 rs1 = SUBREG_REG (rs1);
4014 if (!REG_P (rs1))
4015 return 0;
4016
4017 if (rs2)
4018 {
4019 if (GET_CODE (rs2) == SUBREG)
4020 rs2 = SUBREG_REG (rs2);
4021 if (!REG_P (rs2))
4022 return 0;
4023 }
4024
4025 if (strict)
4026 {
4027 if (!REGNO_OK_FOR_BASE_P (REGNO (rs1))
4028 || (rs2 && !REGNO_OK_FOR_BASE_P (REGNO (rs2))))
4029 return 0;
4030 }
4031 else
4032 {
4033 if ((! SPARC_INT_REG_P (REGNO (rs1))
4034 && REGNO (rs1) != FRAME_POINTER_REGNUM
4035 && REGNO (rs1) < FIRST_PSEUDO_REGISTER)
4036 || (rs2
4037 && (! SPARC_INT_REG_P (REGNO (rs2))
4038 && REGNO (rs2) != FRAME_POINTER_REGNUM
4039 && REGNO (rs2) < FIRST_PSEUDO_REGISTER)))
4040 return 0;
4041 }
4042 return 1;
4043 }
4044
4045 /* Return the SYMBOL_REF for the tls_get_addr function. */
4046
4047 static GTY(()) rtx sparc_tls_symbol = NULL_RTX;
4048
4049 static rtx
sparc_tls_get_addr(void)4050 sparc_tls_get_addr (void)
4051 {
4052 if (!sparc_tls_symbol)
4053 sparc_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tls_get_addr");
4054
4055 return sparc_tls_symbol;
4056 }
4057
4058 /* Return the Global Offset Table to be used in TLS mode. */
4059
4060 static rtx
sparc_tls_got(void)4061 sparc_tls_got (void)
4062 {
4063 /* In PIC mode, this is just the PIC offset table. */
4064 if (flag_pic)
4065 {
4066 crtl->uses_pic_offset_table = 1;
4067 return pic_offset_table_rtx;
4068 }
4069
4070 /* In non-PIC mode, Sun as (unlike GNU as) emits PC-relative relocations for
4071 the GOT symbol with the 32-bit ABI, so we reload the GOT register. */
4072 if (TARGET_SUN_TLS && TARGET_ARCH32)
4073 {
4074 load_got_register ();
4075 return global_offset_table_rtx;
4076 }
4077
4078 /* In all other cases, we load a new pseudo with the GOT symbol. */
4079 return copy_to_reg (sparc_got ());
4080 }
4081
4082 /* Return true if X contains a thread-local symbol. */
4083
4084 static bool
sparc_tls_referenced_p(rtx x)4085 sparc_tls_referenced_p (rtx x)
4086 {
4087 if (!TARGET_HAVE_TLS)
4088 return false;
4089
4090 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
4091 x = XEXP (XEXP (x, 0), 0);
4092
4093 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x))
4094 return true;
4095
4096 /* That's all we handle in sparc_legitimize_tls_address for now. */
4097 return false;
4098 }
4099
4100 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
4101 this (thread-local) address. */
4102
4103 static rtx
sparc_legitimize_tls_address(rtx addr)4104 sparc_legitimize_tls_address (rtx addr)
4105 {
4106 rtx temp1, temp2, temp3, ret, o0, got, insn;
4107
4108 gcc_assert (can_create_pseudo_p ());
4109
4110 if (GET_CODE (addr) == SYMBOL_REF)
4111 switch (SYMBOL_REF_TLS_MODEL (addr))
4112 {
4113 case TLS_MODEL_GLOBAL_DYNAMIC:
4114 start_sequence ();
4115 temp1 = gen_reg_rtx (SImode);
4116 temp2 = gen_reg_rtx (SImode);
4117 ret = gen_reg_rtx (Pmode);
4118 o0 = gen_rtx_REG (Pmode, 8);
4119 got = sparc_tls_got ();
4120 emit_insn (gen_tgd_hi22 (temp1, addr));
4121 emit_insn (gen_tgd_lo10 (temp2, temp1, addr));
4122 if (TARGET_ARCH32)
4123 {
4124 emit_insn (gen_tgd_add32 (o0, got, temp2, addr));
4125 insn = emit_call_insn (gen_tgd_call32 (o0, sparc_tls_get_addr (),
4126 addr, const1_rtx));
4127 }
4128 else
4129 {
4130 emit_insn (gen_tgd_add64 (o0, got, temp2, addr));
4131 insn = emit_call_insn (gen_tgd_call64 (o0, sparc_tls_get_addr (),
4132 addr, const1_rtx));
4133 }
4134 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), o0);
4135 insn = get_insns ();
4136 end_sequence ();
4137 emit_libcall_block (insn, ret, o0, addr);
4138 break;
4139
4140 case TLS_MODEL_LOCAL_DYNAMIC:
4141 start_sequence ();
4142 temp1 = gen_reg_rtx (SImode);
4143 temp2 = gen_reg_rtx (SImode);
4144 temp3 = gen_reg_rtx (Pmode);
4145 ret = gen_reg_rtx (Pmode);
4146 o0 = gen_rtx_REG (Pmode, 8);
4147 got = sparc_tls_got ();
4148 emit_insn (gen_tldm_hi22 (temp1));
4149 emit_insn (gen_tldm_lo10 (temp2, temp1));
4150 if (TARGET_ARCH32)
4151 {
4152 emit_insn (gen_tldm_add32 (o0, got, temp2));
4153 insn = emit_call_insn (gen_tldm_call32 (o0, sparc_tls_get_addr (),
4154 const1_rtx));
4155 }
4156 else
4157 {
4158 emit_insn (gen_tldm_add64 (o0, got, temp2));
4159 insn = emit_call_insn (gen_tldm_call64 (o0, sparc_tls_get_addr (),
4160 const1_rtx));
4161 }
4162 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), o0);
4163 insn = get_insns ();
4164 end_sequence ();
4165 emit_libcall_block (insn, temp3, o0,
4166 gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
4167 UNSPEC_TLSLD_BASE));
4168 temp1 = gen_reg_rtx (SImode);
4169 temp2 = gen_reg_rtx (SImode);
4170 emit_insn (gen_tldo_hix22 (temp1, addr));
4171 emit_insn (gen_tldo_lox10 (temp2, temp1, addr));
4172 if (TARGET_ARCH32)
4173 emit_insn (gen_tldo_add32 (ret, temp3, temp2, addr));
4174 else
4175 emit_insn (gen_tldo_add64 (ret, temp3, temp2, addr));
4176 break;
4177
4178 case TLS_MODEL_INITIAL_EXEC:
4179 temp1 = gen_reg_rtx (SImode);
4180 temp2 = gen_reg_rtx (SImode);
4181 temp3 = gen_reg_rtx (Pmode);
4182 got = sparc_tls_got ();
4183 emit_insn (gen_tie_hi22 (temp1, addr));
4184 emit_insn (gen_tie_lo10 (temp2, temp1, addr));
4185 if (TARGET_ARCH32)
4186 emit_insn (gen_tie_ld32 (temp3, got, temp2, addr));
4187 else
4188 emit_insn (gen_tie_ld64 (temp3, got, temp2, addr));
4189 if (TARGET_SUN_TLS)
4190 {
4191 ret = gen_reg_rtx (Pmode);
4192 if (TARGET_ARCH32)
4193 emit_insn (gen_tie_add32 (ret, gen_rtx_REG (Pmode, 7),
4194 temp3, addr));
4195 else
4196 emit_insn (gen_tie_add64 (ret, gen_rtx_REG (Pmode, 7),
4197 temp3, addr));
4198 }
4199 else
4200 ret = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, 7), temp3);
4201 break;
4202
4203 case TLS_MODEL_LOCAL_EXEC:
4204 temp1 = gen_reg_rtx (Pmode);
4205 temp2 = gen_reg_rtx (Pmode);
4206 if (TARGET_ARCH32)
4207 {
4208 emit_insn (gen_tle_hix22_sp32 (temp1, addr));
4209 emit_insn (gen_tle_lox10_sp32 (temp2, temp1, addr));
4210 }
4211 else
4212 {
4213 emit_insn (gen_tle_hix22_sp64 (temp1, addr));
4214 emit_insn (gen_tle_lox10_sp64 (temp2, temp1, addr));
4215 }
4216 ret = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, 7), temp2);
4217 break;
4218
4219 default:
4220 gcc_unreachable ();
4221 }
4222
4223 else if (GET_CODE (addr) == CONST)
4224 {
4225 rtx base, offset;
4226
4227 gcc_assert (GET_CODE (XEXP (addr, 0)) == PLUS);
4228
4229 base = sparc_legitimize_tls_address (XEXP (XEXP (addr, 0), 0));
4230 offset = XEXP (XEXP (addr, 0), 1);
4231
4232 base = force_operand (base, NULL_RTX);
4233 if (!(GET_CODE (offset) == CONST_INT && SMALL_INT (offset)))
4234 offset = force_reg (Pmode, offset);
4235 ret = gen_rtx_PLUS (Pmode, base, offset);
4236 }
4237
4238 else
4239 gcc_unreachable (); /* for now ... */
4240
4241 return ret;
4242 }
4243
4244 /* Legitimize PIC addresses. If the address is already position-independent,
4245 we return ORIG. Newly generated position-independent addresses go into a
4246 reg. This is REG if nonzero, otherwise we allocate register(s) as
4247 necessary. */
4248
4249 static rtx
sparc_legitimize_pic_address(rtx orig,rtx reg)4250 sparc_legitimize_pic_address (rtx orig, rtx reg)
4251 {
4252 bool gotdata_op = false;
4253
4254 if (GET_CODE (orig) == SYMBOL_REF
4255 /* See the comment in sparc_expand_move. */
4256 || (GET_CODE (orig) == LABEL_REF && !can_use_mov_pic_label_ref (orig)))
4257 {
4258 rtx pic_ref, address;
4259 rtx insn;
4260
4261 if (reg == 0)
4262 {
4263 gcc_assert (can_create_pseudo_p ());
4264 reg = gen_reg_rtx (Pmode);
4265 }
4266
4267 if (flag_pic == 2)
4268 {
4269 /* If not during reload, allocate another temp reg here for loading
4270 in the address, so that these instructions can be optimized
4271 properly. */
4272 rtx temp_reg = (! can_create_pseudo_p ()
4273 ? reg : gen_reg_rtx (Pmode));
4274
4275 /* Must put the SYMBOL_REF inside an UNSPEC here so that cse
4276 won't get confused into thinking that these two instructions
4277 are loading in the true address of the symbol. If in the
4278 future a PIC rtx exists, that should be used instead. */
4279 if (TARGET_ARCH64)
4280 {
4281 emit_insn (gen_movdi_high_pic (temp_reg, orig));
4282 emit_insn (gen_movdi_lo_sum_pic (temp_reg, temp_reg, orig));
4283 }
4284 else
4285 {
4286 emit_insn (gen_movsi_high_pic (temp_reg, orig));
4287 emit_insn (gen_movsi_lo_sum_pic (temp_reg, temp_reg, orig));
4288 }
4289 address = temp_reg;
4290 gotdata_op = true;
4291 }
4292 else
4293 address = orig;
4294
4295 crtl->uses_pic_offset_table = 1;
4296 if (gotdata_op)
4297 {
4298 if (TARGET_ARCH64)
4299 insn = emit_insn (gen_movdi_pic_gotdata_op (reg,
4300 pic_offset_table_rtx,
4301 address, orig));
4302 else
4303 insn = emit_insn (gen_movsi_pic_gotdata_op (reg,
4304 pic_offset_table_rtx,
4305 address, orig));
4306 }
4307 else
4308 {
4309 pic_ref
4310 = gen_const_mem (Pmode,
4311 gen_rtx_PLUS (Pmode,
4312 pic_offset_table_rtx, address));
4313 insn = emit_move_insn (reg, pic_ref);
4314 }
4315
4316 /* Put a REG_EQUAL note on this insn, so that it can be optimized
4317 by loop. */
4318 set_unique_reg_note (insn, REG_EQUAL, orig);
4319 return reg;
4320 }
4321 else if (GET_CODE (orig) == CONST)
4322 {
4323 rtx base, offset;
4324
4325 if (GET_CODE (XEXP (orig, 0)) == PLUS
4326 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
4327 return orig;
4328
4329 if (reg == 0)
4330 {
4331 gcc_assert (can_create_pseudo_p ());
4332 reg = gen_reg_rtx (Pmode);
4333 }
4334
4335 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
4336 base = sparc_legitimize_pic_address (XEXP (XEXP (orig, 0), 0), reg);
4337 offset = sparc_legitimize_pic_address (XEXP (XEXP (orig, 0), 1),
4338 base == reg ? NULL_RTX : reg);
4339
4340 if (GET_CODE (offset) == CONST_INT)
4341 {
4342 if (SMALL_INT (offset))
4343 return plus_constant (Pmode, base, INTVAL (offset));
4344 else if (can_create_pseudo_p ())
4345 offset = force_reg (Pmode, offset);
4346 else
4347 /* If we reach here, then something is seriously wrong. */
4348 gcc_unreachable ();
4349 }
4350 return gen_rtx_PLUS (Pmode, base, offset);
4351 }
4352 else if (GET_CODE (orig) == LABEL_REF)
4353 /* ??? We ought to be checking that the register is live instead, in case
4354 it is eliminated. */
4355 crtl->uses_pic_offset_table = 1;
4356
4357 return orig;
4358 }
4359
4360 /* Try machine-dependent ways of modifying an illegitimate address X
4361 to be legitimate. If we find one, return the new, valid address.
4362
4363 OLDX is the address as it was before break_out_memory_refs was called.
4364 In some cases it is useful to look at this to decide what needs to be done.
4365
4366 MODE is the mode of the operand pointed to by X.
4367
4368 On SPARC, change REG+N into REG+REG, and REG+(X*Y) into REG+REG. */
4369
4370 static rtx
sparc_legitimize_address(rtx x,rtx oldx ATTRIBUTE_UNUSED,enum machine_mode mode)4371 sparc_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
4372 enum machine_mode mode)
4373 {
4374 rtx orig_x = x;
4375
4376 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == MULT)
4377 x = gen_rtx_PLUS (Pmode, XEXP (x, 1),
4378 force_operand (XEXP (x, 0), NULL_RTX));
4379 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 1)) == MULT)
4380 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
4381 force_operand (XEXP (x, 1), NULL_RTX));
4382 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS)
4383 x = gen_rtx_PLUS (Pmode, force_operand (XEXP (x, 0), NULL_RTX),
4384 XEXP (x, 1));
4385 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 1)) == PLUS)
4386 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
4387 force_operand (XEXP (x, 1), NULL_RTX));
4388
4389 if (x != orig_x && sparc_legitimate_address_p (mode, x, FALSE))
4390 return x;
4391
4392 if (sparc_tls_referenced_p (x))
4393 x = sparc_legitimize_tls_address (x);
4394 else if (flag_pic)
4395 x = sparc_legitimize_pic_address (x, NULL_RTX);
4396 else if (GET_CODE (x) == PLUS && CONSTANT_ADDRESS_P (XEXP (x, 1)))
4397 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
4398 copy_to_mode_reg (Pmode, XEXP (x, 1)));
4399 else if (GET_CODE (x) == PLUS && CONSTANT_ADDRESS_P (XEXP (x, 0)))
4400 x = gen_rtx_PLUS (Pmode, XEXP (x, 1),
4401 copy_to_mode_reg (Pmode, XEXP (x, 0)));
4402 else if (GET_CODE (x) == SYMBOL_REF
4403 || GET_CODE (x) == CONST
4404 || GET_CODE (x) == LABEL_REF)
4405 x = copy_to_suggested_reg (x, NULL_RTX, Pmode);
4406
4407 return x;
4408 }
4409
4410 /* Delegitimize an address that was legitimized by the above function. */
4411
4412 static rtx
sparc_delegitimize_address(rtx x)4413 sparc_delegitimize_address (rtx x)
4414 {
4415 x = delegitimize_mem_from_attrs (x);
4416
4417 if (GET_CODE (x) == LO_SUM && GET_CODE (XEXP (x, 1)) == UNSPEC)
4418 switch (XINT (XEXP (x, 1), 1))
4419 {
4420 case UNSPEC_MOVE_PIC:
4421 case UNSPEC_TLSLE:
4422 x = XVECEXP (XEXP (x, 1), 0, 0);
4423 gcc_assert (GET_CODE (x) == SYMBOL_REF);
4424 break;
4425 default:
4426 break;
4427 }
4428
4429 /* This is generated by mov{si,di}_pic_label_ref in PIC mode. */
4430 if (GET_CODE (x) == MINUS
4431 && REG_P (XEXP (x, 0))
4432 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM
4433 && GET_CODE (XEXP (x, 1)) == LO_SUM
4434 && GET_CODE (XEXP (XEXP (x, 1), 1)) == UNSPEC
4435 && XINT (XEXP (XEXP (x, 1), 1), 1) == UNSPEC_MOVE_PIC_LABEL)
4436 {
4437 x = XVECEXP (XEXP (XEXP (x, 1), 1), 0, 0);
4438 gcc_assert (GET_CODE (x) == LABEL_REF);
4439 }
4440
4441 return x;
4442 }
4443
4444 /* SPARC implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
4445 replace the input X, or the original X if no replacement is called for.
4446 The output parameter *WIN is 1 if the calling macro should goto WIN,
4447 0 if it should not.
4448
4449 For SPARC, we wish to handle addresses by splitting them into
4450 HIGH+LO_SUM pairs, retaining the LO_SUM in the memory reference.
4451 This cuts the number of extra insns by one.
4452
4453 Do nothing when generating PIC code and the address is a symbolic
4454 operand or requires a scratch register. */
4455
4456 rtx
sparc_legitimize_reload_address(rtx x,enum machine_mode mode,int opnum,int type,int ind_levels ATTRIBUTE_UNUSED,int * win)4457 sparc_legitimize_reload_address (rtx x, enum machine_mode mode,
4458 int opnum, int type,
4459 int ind_levels ATTRIBUTE_UNUSED, int *win)
4460 {
4461 /* Decompose SImode constants into HIGH+LO_SUM. */
4462 if (CONSTANT_P (x)
4463 && (mode != TFmode || TARGET_ARCH64)
4464 && GET_MODE (x) == SImode
4465 && GET_CODE (x) != LO_SUM
4466 && GET_CODE (x) != HIGH
4467 && sparc_cmodel <= CM_MEDLOW
4468 && !(flag_pic
4469 && (symbolic_operand (x, Pmode) || pic_address_needs_scratch (x))))
4470 {
4471 x = gen_rtx_LO_SUM (GET_MODE (x), gen_rtx_HIGH (GET_MODE (x), x), x);
4472 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4473 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4474 opnum, (enum reload_type)type);
4475 *win = 1;
4476 return x;
4477 }
4478
4479 /* We have to recognize what we have already generated above. */
4480 if (GET_CODE (x) == LO_SUM && GET_CODE (XEXP (x, 0)) == HIGH)
4481 {
4482 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4483 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4484 opnum, (enum reload_type)type);
4485 *win = 1;
4486 return x;
4487 }
4488
4489 *win = 0;
4490 return x;
4491 }
4492
4493 /* Return true if ADDR (a legitimate address expression)
4494 has an effect that depends on the machine mode it is used for.
4495
4496 In PIC mode,
4497
4498 (mem:HI [%l7+a])
4499
4500 is not equivalent to
4501
4502 (mem:QI [%l7+a]) (mem:QI [%l7+a+1])
4503
4504 because [%l7+a+1] is interpreted as the address of (a+1). */
4505
4506
4507 static bool
sparc_mode_dependent_address_p(const_rtx addr,addr_space_t as ATTRIBUTE_UNUSED)4508 sparc_mode_dependent_address_p (const_rtx addr,
4509 addr_space_t as ATTRIBUTE_UNUSED)
4510 {
4511 if (flag_pic && GET_CODE (addr) == PLUS)
4512 {
4513 rtx op0 = XEXP (addr, 0);
4514 rtx op1 = XEXP (addr, 1);
4515 if (op0 == pic_offset_table_rtx
4516 && symbolic_operand (op1, VOIDmode))
4517 return true;
4518 }
4519
4520 return false;
4521 }
4522
4523 #ifdef HAVE_GAS_HIDDEN
4524 # define USE_HIDDEN_LINKONCE 1
4525 #else
4526 # define USE_HIDDEN_LINKONCE 0
4527 #endif
4528
4529 static void
get_pc_thunk_name(char name[32],unsigned int regno)4530 get_pc_thunk_name (char name[32], unsigned int regno)
4531 {
4532 const char *reg_name = reg_names[regno];
4533
4534 /* Skip the leading '%' as that cannot be used in a
4535 symbol name. */
4536 reg_name += 1;
4537
4538 if (USE_HIDDEN_LINKONCE)
4539 sprintf (name, "__sparc_get_pc_thunk.%s", reg_name);
4540 else
4541 ASM_GENERATE_INTERNAL_LABEL (name, "LADDPC", regno);
4542 }
4543
4544 /* Wrapper around the load_pcrel_sym{si,di} patterns. */
4545
4546 static rtx
gen_load_pcrel_sym(rtx op0,rtx op1,rtx op2,rtx op3)4547 gen_load_pcrel_sym (rtx op0, rtx op1, rtx op2, rtx op3)
4548 {
4549 int orig_flag_pic = flag_pic;
4550 rtx insn;
4551
4552 /* The load_pcrel_sym{si,di} patterns require absolute addressing. */
4553 flag_pic = 0;
4554 if (TARGET_ARCH64)
4555 insn = gen_load_pcrel_symdi (op0, op1, op2, op3);
4556 else
4557 insn = gen_load_pcrel_symsi (op0, op1, op2, op3);
4558 flag_pic = orig_flag_pic;
4559
4560 return insn;
4561 }
4562
4563 /* Emit code to load the GOT register. */
4564
4565 void
load_got_register(void)4566 load_got_register (void)
4567 {
4568 /* In PIC mode, this will retrieve pic_offset_table_rtx. */
4569 if (!global_offset_table_rtx)
4570 global_offset_table_rtx = gen_rtx_REG (Pmode, GLOBAL_OFFSET_TABLE_REGNUM);
4571
4572 if (TARGET_VXWORKS_RTP)
4573 emit_insn (gen_vxworks_load_got ());
4574 else
4575 {
4576 /* The GOT symbol is subject to a PC-relative relocation so we need a
4577 helper function to add the PC value and thus get the final value. */
4578 if (!got_helper_rtx)
4579 {
4580 char name[32];
4581 get_pc_thunk_name (name, GLOBAL_OFFSET_TABLE_REGNUM);
4582 got_helper_rtx = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
4583 }
4584
4585 emit_insn (gen_load_pcrel_sym (global_offset_table_rtx, sparc_got (),
4586 got_helper_rtx,
4587 GEN_INT (GLOBAL_OFFSET_TABLE_REGNUM)));
4588 }
4589
4590 /* Need to emit this whether or not we obey regdecls,
4591 since setjmp/longjmp can cause life info to screw up.
4592 ??? In the case where we don't obey regdecls, this is not sufficient
4593 since we may not fall out the bottom. */
4594 emit_use (global_offset_table_rtx);
4595 }
4596
4597 /* Emit a call instruction with the pattern given by PAT. ADDR is the
4598 address of the call target. */
4599
4600 void
sparc_emit_call_insn(rtx pat,rtx addr)4601 sparc_emit_call_insn (rtx pat, rtx addr)
4602 {
4603 rtx insn;
4604
4605 insn = emit_call_insn (pat);
4606
4607 /* The PIC register is live on entry to VxWorks PIC PLT entries. */
4608 if (TARGET_VXWORKS_RTP
4609 && flag_pic
4610 && GET_CODE (addr) == SYMBOL_REF
4611 && (SYMBOL_REF_DECL (addr)
4612 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
4613 : !SYMBOL_REF_LOCAL_P (addr)))
4614 {
4615 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx);
4616 crtl->uses_pic_offset_table = 1;
4617 }
4618 }
4619
4620 /* Return 1 if RTX is a MEM which is known to be aligned to at
4621 least a DESIRED byte boundary. */
4622
4623 int
mem_min_alignment(rtx mem,int desired)4624 mem_min_alignment (rtx mem, int desired)
4625 {
4626 rtx addr, base, offset;
4627
4628 /* If it's not a MEM we can't accept it. */
4629 if (GET_CODE (mem) != MEM)
4630 return 0;
4631
4632 /* Obviously... */
4633 if (!TARGET_UNALIGNED_DOUBLES
4634 && MEM_ALIGN (mem) / BITS_PER_UNIT >= (unsigned)desired)
4635 return 1;
4636
4637 /* ??? The rest of the function predates MEM_ALIGN so
4638 there is probably a bit of redundancy. */
4639 addr = XEXP (mem, 0);
4640 base = offset = NULL_RTX;
4641 if (GET_CODE (addr) == PLUS)
4642 {
4643 if (GET_CODE (XEXP (addr, 0)) == REG)
4644 {
4645 base = XEXP (addr, 0);
4646
4647 /* What we are saying here is that if the base
4648 REG is aligned properly, the compiler will make
4649 sure any REG based index upon it will be so
4650 as well. */
4651 if (GET_CODE (XEXP (addr, 1)) == CONST_INT)
4652 offset = XEXP (addr, 1);
4653 else
4654 offset = const0_rtx;
4655 }
4656 }
4657 else if (GET_CODE (addr) == REG)
4658 {
4659 base = addr;
4660 offset = const0_rtx;
4661 }
4662
4663 if (base != NULL_RTX)
4664 {
4665 int regno = REGNO (base);
4666
4667 if (regno != HARD_FRAME_POINTER_REGNUM && regno != STACK_POINTER_REGNUM)
4668 {
4669 /* Check if the compiler has recorded some information
4670 about the alignment of the base REG. If reload has
4671 completed, we already matched with proper alignments.
4672 If not running global_alloc, reload might give us
4673 unaligned pointer to local stack though. */
4674 if (((cfun != 0
4675 && REGNO_POINTER_ALIGN (regno) >= desired * BITS_PER_UNIT)
4676 || (optimize && reload_completed))
4677 && (INTVAL (offset) & (desired - 1)) == 0)
4678 return 1;
4679 }
4680 else
4681 {
4682 if (((INTVAL (offset) - SPARC_STACK_BIAS) & (desired - 1)) == 0)
4683 return 1;
4684 }
4685 }
4686 else if (! TARGET_UNALIGNED_DOUBLES
4687 || CONSTANT_P (addr)
4688 || GET_CODE (addr) == LO_SUM)
4689 {
4690 /* Anything else we know is properly aligned unless TARGET_UNALIGNED_DOUBLES
4691 is true, in which case we can only assume that an access is aligned if
4692 it is to a constant address, or the address involves a LO_SUM. */
4693 return 1;
4694 }
4695
4696 /* An obviously unaligned address. */
4697 return 0;
4698 }
4699
4700
4701 /* Vectors to keep interesting information about registers where it can easily
4702 be got. We used to use the actual mode value as the bit number, but there
4703 are more than 32 modes now. Instead we use two tables: one indexed by
4704 hard register number, and one indexed by mode. */
4705
4706 /* The purpose of sparc_mode_class is to shrink the range of modes so that
4707 they all fit (as bit numbers) in a 32-bit word (again). Each real mode is
4708 mapped into one sparc_mode_class mode. */
4709
4710 enum sparc_mode_class {
4711 H_MODE, S_MODE, D_MODE, T_MODE, O_MODE,
4712 SF_MODE, DF_MODE, TF_MODE, OF_MODE,
4713 CC_MODE, CCFP_MODE
4714 };
4715
4716 /* Modes for single-word and smaller quantities. */
4717 #define S_MODES \
4718 ((1 << (int) H_MODE) | (1 << (int) S_MODE) | (1 << (int) SF_MODE))
4719
4720 /* Modes for double-word and smaller quantities. */
4721 #define D_MODES (S_MODES | (1 << (int) D_MODE) | (1 << DF_MODE))
4722
4723 /* Modes for quad-word and smaller quantities. */
4724 #define T_MODES (D_MODES | (1 << (int) T_MODE) | (1 << (int) TF_MODE))
4725
4726 /* Modes for 8-word and smaller quantities. */
4727 #define O_MODES (T_MODES | (1 << (int) O_MODE) | (1 << (int) OF_MODE))
4728
4729 /* Modes for single-float quantities. */
4730 #define SF_MODES ((1 << (int) S_MODE) | (1 << (int) SF_MODE))
4731
4732 /* Modes for double-float and smaller quantities. */
4733 #define DF_MODES (SF_MODES | (1 << (int) D_MODE) | (1 << DF_MODE))
4734
4735 /* Modes for quad-float and smaller quantities. */
4736 #define TF_MODES (DF_MODES | (1 << (int) TF_MODE))
4737
4738 /* Modes for quad-float pairs and smaller quantities. */
4739 #define OF_MODES (TF_MODES | (1 << (int) OF_MODE))
4740
4741 /* Modes for double-float only quantities. */
4742 #define DF_MODES_NO_S ((1 << (int) D_MODE) | (1 << (int) DF_MODE))
4743
4744 /* Modes for quad-float and double-float only quantities. */
4745 #define TF_MODES_NO_S (DF_MODES_NO_S | (1 << (int) TF_MODE))
4746
4747 /* Modes for quad-float pairs and double-float only quantities. */
4748 #define OF_MODES_NO_S (TF_MODES_NO_S | (1 << (int) OF_MODE))
4749
4750 /* Modes for condition codes. */
4751 #define CC_MODES (1 << (int) CC_MODE)
4752 #define CCFP_MODES (1 << (int) CCFP_MODE)
4753
4754 /* Value is 1 if register/mode pair is acceptable on sparc.
4755 The funny mixture of D and T modes is because integer operations
4756 do not specially operate on tetra quantities, so non-quad-aligned
4757 registers can hold quadword quantities (except %o4 and %i4 because
4758 they cross fixed registers). */
4759
4760 /* This points to either the 32 bit or the 64 bit version. */
4761 const int *hard_regno_mode_classes;
4762
4763 static const int hard_32bit_mode_classes[] = {
4764 S_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES,
4765 T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES,
4766 T_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES,
4767 T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES,
4768
4769 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4770 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4771 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4772 OF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
4773
4774 /* FP regs f32 to f63. Only the even numbered registers actually exist,
4775 and none can hold SFmode/SImode values. */
4776 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4777 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4778 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4779 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, TF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4780
4781 /* %fcc[0123] */
4782 CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES,
4783
4784 /* %icc, %sfp, %gsr */
4785 CC_MODES, 0, D_MODES
4786 };
4787
4788 static const int hard_64bit_mode_classes[] = {
4789 D_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
4790 O_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
4791 T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
4792 O_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
4793
4794 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4795 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4796 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4797 OF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
4798
4799 /* FP regs f32 to f63. Only the even numbered registers actually exist,
4800 and none can hold SFmode/SImode values. */
4801 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4802 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4803 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4804 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, TF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4805
4806 /* %fcc[0123] */
4807 CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES,
4808
4809 /* %icc, %sfp, %gsr */
4810 CC_MODES, 0, D_MODES
4811 };
4812
4813 int sparc_mode_class [NUM_MACHINE_MODES];
4814
4815 enum reg_class sparc_regno_reg_class[FIRST_PSEUDO_REGISTER];
4816
4817 static void
sparc_init_modes(void)4818 sparc_init_modes (void)
4819 {
4820 int i;
4821
4822 for (i = 0; i < NUM_MACHINE_MODES; i++)
4823 {
4824 switch (GET_MODE_CLASS (i))
4825 {
4826 case MODE_INT:
4827 case MODE_PARTIAL_INT:
4828 case MODE_COMPLEX_INT:
4829 if (GET_MODE_SIZE (i) < 4)
4830 sparc_mode_class[i] = 1 << (int) H_MODE;
4831 else if (GET_MODE_SIZE (i) == 4)
4832 sparc_mode_class[i] = 1 << (int) S_MODE;
4833 else if (GET_MODE_SIZE (i) == 8)
4834 sparc_mode_class[i] = 1 << (int) D_MODE;
4835 else if (GET_MODE_SIZE (i) == 16)
4836 sparc_mode_class[i] = 1 << (int) T_MODE;
4837 else if (GET_MODE_SIZE (i) == 32)
4838 sparc_mode_class[i] = 1 << (int) O_MODE;
4839 else
4840 sparc_mode_class[i] = 0;
4841 break;
4842 case MODE_VECTOR_INT:
4843 if (GET_MODE_SIZE (i) == 4)
4844 sparc_mode_class[i] = 1 << (int) SF_MODE;
4845 else if (GET_MODE_SIZE (i) == 8)
4846 sparc_mode_class[i] = 1 << (int) DF_MODE;
4847 else
4848 sparc_mode_class[i] = 0;
4849 break;
4850 case MODE_FLOAT:
4851 case MODE_COMPLEX_FLOAT:
4852 if (GET_MODE_SIZE (i) == 4)
4853 sparc_mode_class[i] = 1 << (int) SF_MODE;
4854 else if (GET_MODE_SIZE (i) == 8)
4855 sparc_mode_class[i] = 1 << (int) DF_MODE;
4856 else if (GET_MODE_SIZE (i) == 16)
4857 sparc_mode_class[i] = 1 << (int) TF_MODE;
4858 else if (GET_MODE_SIZE (i) == 32)
4859 sparc_mode_class[i] = 1 << (int) OF_MODE;
4860 else
4861 sparc_mode_class[i] = 0;
4862 break;
4863 case MODE_CC:
4864 if (i == (int) CCFPmode || i == (int) CCFPEmode)
4865 sparc_mode_class[i] = 1 << (int) CCFP_MODE;
4866 else
4867 sparc_mode_class[i] = 1 << (int) CC_MODE;
4868 break;
4869 default:
4870 sparc_mode_class[i] = 0;
4871 break;
4872 }
4873 }
4874
4875 if (TARGET_ARCH64)
4876 hard_regno_mode_classes = hard_64bit_mode_classes;
4877 else
4878 hard_regno_mode_classes = hard_32bit_mode_classes;
4879
4880 /* Initialize the array used by REGNO_REG_CLASS. */
4881 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4882 {
4883 if (i < 16 && TARGET_V8PLUS)
4884 sparc_regno_reg_class[i] = I64_REGS;
4885 else if (i < 32 || i == FRAME_POINTER_REGNUM)
4886 sparc_regno_reg_class[i] = GENERAL_REGS;
4887 else if (i < 64)
4888 sparc_regno_reg_class[i] = FP_REGS;
4889 else if (i < 96)
4890 sparc_regno_reg_class[i] = EXTRA_FP_REGS;
4891 else if (i < 100)
4892 sparc_regno_reg_class[i] = FPCC_REGS;
4893 else
4894 sparc_regno_reg_class[i] = NO_REGS;
4895 }
4896 }
4897
4898 /* Return whether REGNO, a global or FP register, must be saved/restored. */
4899
4900 static inline bool
save_global_or_fp_reg_p(unsigned int regno,int leaf_function ATTRIBUTE_UNUSED)4901 save_global_or_fp_reg_p (unsigned int regno,
4902 int leaf_function ATTRIBUTE_UNUSED)
4903 {
4904 return !call_used_regs[regno] && df_regs_ever_live_p (regno);
4905 }
4906
4907 /* Return whether the return address register (%i7) is needed. */
4908
4909 static inline bool
return_addr_reg_needed_p(int leaf_function)4910 return_addr_reg_needed_p (int leaf_function)
4911 {
4912 /* If it is live, for example because of __builtin_return_address (0). */
4913 if (df_regs_ever_live_p (RETURN_ADDR_REGNUM))
4914 return true;
4915
4916 /* Otherwise, it is needed as save register if %o7 is clobbered. */
4917 if (!leaf_function
4918 /* Loading the GOT register clobbers %o7. */
4919 || crtl->uses_pic_offset_table
4920 || df_regs_ever_live_p (INCOMING_RETURN_ADDR_REGNUM))
4921 return true;
4922
4923 return false;
4924 }
4925
4926 /* Return whether REGNO, a local or in register, must be saved/restored. */
4927
4928 static bool
save_local_or_in_reg_p(unsigned int regno,int leaf_function)4929 save_local_or_in_reg_p (unsigned int regno, int leaf_function)
4930 {
4931 /* General case: call-saved registers live at some point. */
4932 if (!call_used_regs[regno] && df_regs_ever_live_p (regno))
4933 return true;
4934
4935 /* Frame pointer register (%fp) if needed. */
4936 if (regno == HARD_FRAME_POINTER_REGNUM && frame_pointer_needed)
4937 return true;
4938
4939 /* Return address register (%i7) if needed. */
4940 if (regno == RETURN_ADDR_REGNUM && return_addr_reg_needed_p (leaf_function))
4941 return true;
4942
4943 /* GOT register (%l7) if needed. */
4944 if (regno == PIC_OFFSET_TABLE_REGNUM && crtl->uses_pic_offset_table)
4945 return true;
4946
4947 /* If the function accesses prior frames, the frame pointer and the return
4948 address of the previous frame must be saved on the stack. */
4949 if (crtl->accesses_prior_frames
4950 && (regno == HARD_FRAME_POINTER_REGNUM || regno == RETURN_ADDR_REGNUM))
4951 return true;
4952
4953 return false;
4954 }
4955
4956 /* Compute the frame size required by the function. This function is called
4957 during the reload pass and also by sparc_expand_prologue. */
4958
4959 HOST_WIDE_INT
sparc_compute_frame_size(HOST_WIDE_INT size,int leaf_function)4960 sparc_compute_frame_size (HOST_WIDE_INT size, int leaf_function)
4961 {
4962 HOST_WIDE_INT frame_size, apparent_frame_size;
4963 int args_size, n_global_fp_regs = 0;
4964 bool save_local_in_regs_p = false;
4965 unsigned int i;
4966
4967 /* If the function allocates dynamic stack space, the dynamic offset is
4968 computed early and contains REG_PARM_STACK_SPACE, so we need to cope. */
4969 if (leaf_function && !cfun->calls_alloca)
4970 args_size = 0;
4971 else
4972 args_size = crtl->outgoing_args_size + REG_PARM_STACK_SPACE (cfun->decl);
4973
4974 /* Calculate space needed for global registers. */
4975 if (TARGET_ARCH64)
4976 for (i = 0; i < 8; i++)
4977 if (save_global_or_fp_reg_p (i, 0))
4978 n_global_fp_regs += 2;
4979 else
4980 for (i = 0; i < 8; i += 2)
4981 if (save_global_or_fp_reg_p (i, 0) || save_global_or_fp_reg_p (i + 1, 0))
4982 n_global_fp_regs += 2;
4983
4984 /* In the flat window model, find out which local and in registers need to
4985 be saved. We don't reserve space in the current frame for them as they
4986 will be spilled into the register window save area of the caller's frame.
4987 However, as soon as we use this register window save area, we must create
4988 that of the current frame to make it the live one. */
4989 if (TARGET_FLAT)
4990 for (i = 16; i < 32; i++)
4991 if (save_local_or_in_reg_p (i, leaf_function))
4992 {
4993 save_local_in_regs_p = true;
4994 break;
4995 }
4996
4997 /* Calculate space needed for FP registers. */
4998 for (i = 32; i < (TARGET_V9 ? 96 : 64); i += 2)
4999 if (save_global_or_fp_reg_p (i, 0) || save_global_or_fp_reg_p (i + 1, 0))
5000 n_global_fp_regs += 2;
5001
5002 if (size == 0
5003 && n_global_fp_regs == 0
5004 && args_size == 0
5005 && !save_local_in_regs_p)
5006 frame_size = apparent_frame_size = 0;
5007 else
5008 {
5009 /* We subtract STARTING_FRAME_OFFSET, remember it's negative. */
5010 apparent_frame_size = (size - STARTING_FRAME_OFFSET + 7) & -8;
5011 apparent_frame_size += n_global_fp_regs * 4;
5012
5013 /* We need to add the size of the outgoing argument area. */
5014 frame_size = apparent_frame_size + ((args_size + 7) & -8);
5015
5016 /* And that of the register window save area. */
5017 frame_size += FIRST_PARM_OFFSET (cfun->decl);
5018
5019 /* Finally, bump to the appropriate alignment. */
5020 frame_size = SPARC_STACK_ALIGN (frame_size);
5021 }
5022
5023 /* Set up values for use in prologue and epilogue. */
5024 sparc_frame_size = frame_size;
5025 sparc_apparent_frame_size = apparent_frame_size;
5026 sparc_n_global_fp_regs = n_global_fp_regs;
5027 sparc_save_local_in_regs_p = save_local_in_regs_p;
5028
5029 return frame_size;
5030 }
5031
5032 /* Implement the macro INITIAL_ELIMINATION_OFFSET, return the OFFSET. */
5033
5034 int
sparc_initial_elimination_offset(int to)5035 sparc_initial_elimination_offset (int to)
5036 {
5037 int offset;
5038
5039 if (to == STACK_POINTER_REGNUM)
5040 offset = sparc_compute_frame_size (get_frame_size (), crtl->is_leaf);
5041 else
5042 offset = 0;
5043
5044 offset += SPARC_STACK_BIAS;
5045 return offset;
5046 }
5047
5048 /* Output any necessary .register pseudo-ops. */
5049
5050 void
sparc_output_scratch_registers(FILE * file ATTRIBUTE_UNUSED)5051 sparc_output_scratch_registers (FILE *file ATTRIBUTE_UNUSED)
5052 {
5053 #ifdef HAVE_AS_REGISTER_PSEUDO_OP
5054 int i;
5055
5056 if (TARGET_ARCH32)
5057 return;
5058
5059 /* Check if %g[2367] were used without
5060 .register being printed for them already. */
5061 for (i = 2; i < 8; i++)
5062 {
5063 if (df_regs_ever_live_p (i)
5064 && ! sparc_hard_reg_printed [i])
5065 {
5066 sparc_hard_reg_printed [i] = 1;
5067 /* %g7 is used as TLS base register, use #ignore
5068 for it instead of #scratch. */
5069 fprintf (file, "\t.register\t%%g%d, #%s\n", i,
5070 i == 7 ? "ignore" : "scratch");
5071 }
5072 if (i == 3) i = 5;
5073 }
5074 #endif
5075 }
5076
5077 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
5078
5079 #if PROBE_INTERVAL > 4096
5080 #error Cannot use indexed addressing mode for stack probing
5081 #endif
5082
5083 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
5084 inclusive. These are offsets from the current stack pointer.
5085
5086 Note that we don't use the REG+REG addressing mode for the probes because
5087 of the stack bias in 64-bit mode. And it doesn't really buy us anything
5088 so the advantages of having a single code win here. */
5089
5090 static void
sparc_emit_probe_stack_range(HOST_WIDE_INT first,HOST_WIDE_INT size)5091 sparc_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
5092 {
5093 rtx g1 = gen_rtx_REG (Pmode, 1);
5094
5095 /* See if we have a constant small number of probes to generate. If so,
5096 that's the easy case. */
5097 if (size <= PROBE_INTERVAL)
5098 {
5099 emit_move_insn (g1, GEN_INT (first));
5100 emit_insn (gen_rtx_SET (VOIDmode, g1,
5101 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
5102 emit_stack_probe (plus_constant (Pmode, g1, -size));
5103 }
5104
5105 /* The run-time loop is made up of 10 insns in the generic case while the
5106 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */
5107 else if (size <= 5 * PROBE_INTERVAL)
5108 {
5109 HOST_WIDE_INT i;
5110
5111 emit_move_insn (g1, GEN_INT (first + PROBE_INTERVAL));
5112 emit_insn (gen_rtx_SET (VOIDmode, g1,
5113 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
5114 emit_stack_probe (g1);
5115
5116 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
5117 it exceeds SIZE. If only two probes are needed, this will not
5118 generate any code. Then probe at FIRST + SIZE. */
5119 for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
5120 {
5121 emit_insn (gen_rtx_SET (VOIDmode, g1,
5122 plus_constant (Pmode, g1, -PROBE_INTERVAL)));
5123 emit_stack_probe (g1);
5124 }
5125
5126 emit_stack_probe (plus_constant (Pmode, g1,
5127 (i - PROBE_INTERVAL) - size));
5128 }
5129
5130 /* Otherwise, do the same as above, but in a loop. Note that we must be
5131 extra careful with variables wrapping around because we might be at
5132 the very top (or the very bottom) of the address space and we have
5133 to be able to handle this case properly; in particular, we use an
5134 equality test for the loop condition. */
5135 else
5136 {
5137 HOST_WIDE_INT rounded_size;
5138 rtx g4 = gen_rtx_REG (Pmode, 4);
5139
5140 emit_move_insn (g1, GEN_INT (first));
5141
5142
5143 /* Step 1: round SIZE to the previous multiple of the interval. */
5144
5145 rounded_size = size & -PROBE_INTERVAL;
5146 emit_move_insn (g4, GEN_INT (rounded_size));
5147
5148
5149 /* Step 2: compute initial and final value of the loop counter. */
5150
5151 /* TEST_ADDR = SP + FIRST. */
5152 emit_insn (gen_rtx_SET (VOIDmode, g1,
5153 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
5154
5155 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
5156 emit_insn (gen_rtx_SET (VOIDmode, g4, gen_rtx_MINUS (Pmode, g1, g4)));
5157
5158
5159 /* Step 3: the loop
5160
5161 while (TEST_ADDR != LAST_ADDR)
5162 {
5163 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
5164 probe at TEST_ADDR
5165 }
5166
5167 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
5168 until it is equal to ROUNDED_SIZE. */
5169
5170 if (TARGET_ARCH64)
5171 emit_insn (gen_probe_stack_rangedi (g1, g1, g4));
5172 else
5173 emit_insn (gen_probe_stack_rangesi (g1, g1, g4));
5174
5175
5176 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
5177 that SIZE is equal to ROUNDED_SIZE. */
5178
5179 if (size != rounded_size)
5180 emit_stack_probe (plus_constant (Pmode, g4, rounded_size - size));
5181 }
5182
5183 /* Make sure nothing is scheduled before we are done. */
5184 emit_insn (gen_blockage ());
5185 }
5186
5187 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
5188 absolute addresses. */
5189
5190 const char *
output_probe_stack_range(rtx reg1,rtx reg2)5191 output_probe_stack_range (rtx reg1, rtx reg2)
5192 {
5193 static int labelno = 0;
5194 char loop_lab[32], end_lab[32];
5195 rtx xops[2];
5196
5197 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
5198 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
5199
5200 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
5201
5202 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
5203 xops[0] = reg1;
5204 xops[1] = reg2;
5205 output_asm_insn ("cmp\t%0, %1", xops);
5206 if (TARGET_ARCH64)
5207 fputs ("\tbe,pn\t%xcc,", asm_out_file);
5208 else
5209 fputs ("\tbe\t", asm_out_file);
5210 assemble_name_raw (asm_out_file, end_lab);
5211 fputc ('\n', asm_out_file);
5212
5213 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
5214 xops[1] = GEN_INT (-PROBE_INTERVAL);
5215 output_asm_insn (" add\t%0, %1, %0", xops);
5216
5217 /* Probe at TEST_ADDR and branch. */
5218 if (TARGET_ARCH64)
5219 fputs ("\tba,pt\t%xcc,", asm_out_file);
5220 else
5221 fputs ("\tba\t", asm_out_file);
5222 assemble_name_raw (asm_out_file, loop_lab);
5223 fputc ('\n', asm_out_file);
5224 xops[1] = GEN_INT (SPARC_STACK_BIAS);
5225 output_asm_insn (" st\t%%g0, [%0+%1]", xops);
5226
5227 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
5228
5229 return "";
5230 }
5231
5232 /* Emit code to save/restore registers from LOW to HIGH at BASE+OFFSET as
5233 needed. LOW is supposed to be double-word aligned for 32-bit registers.
5234 SAVE_P decides whether a register must be saved/restored. ACTION_TRUE
5235 is the action to be performed if SAVE_P returns true and ACTION_FALSE
5236 the action to be performed if it returns false. Return the new offset. */
5237
5238 typedef bool (*sorr_pred_t) (unsigned int, int);
5239 typedef enum { SORR_NONE, SORR_ADVANCE, SORR_SAVE, SORR_RESTORE } sorr_act_t;
5240
5241 static int
emit_save_or_restore_regs(unsigned int low,unsigned int high,rtx base,int offset,int leaf_function,sorr_pred_t save_p,sorr_act_t action_true,sorr_act_t action_false)5242 emit_save_or_restore_regs (unsigned int low, unsigned int high, rtx base,
5243 int offset, int leaf_function, sorr_pred_t save_p,
5244 sorr_act_t action_true, sorr_act_t action_false)
5245 {
5246 unsigned int i;
5247 rtx mem, insn;
5248
5249 if (TARGET_ARCH64 && high <= 32)
5250 {
5251 int fp_offset = -1;
5252
5253 for (i = low; i < high; i++)
5254 {
5255 if (save_p (i, leaf_function))
5256 {
5257 mem = gen_frame_mem (DImode, plus_constant (Pmode,
5258 base, offset));
5259 if (action_true == SORR_SAVE)
5260 {
5261 insn = emit_move_insn (mem, gen_rtx_REG (DImode, i));
5262 RTX_FRAME_RELATED_P (insn) = 1;
5263 }
5264 else /* action_true == SORR_RESTORE */
5265 {
5266 /* The frame pointer must be restored last since its old
5267 value may be used as base address for the frame. This
5268 is problematic in 64-bit mode only because of the lack
5269 of double-word load instruction. */
5270 if (i == HARD_FRAME_POINTER_REGNUM)
5271 fp_offset = offset;
5272 else
5273 emit_move_insn (gen_rtx_REG (DImode, i), mem);
5274 }
5275 offset += 8;
5276 }
5277 else if (action_false == SORR_ADVANCE)
5278 offset += 8;
5279 }
5280
5281 if (fp_offset >= 0)
5282 {
5283 mem = gen_frame_mem (DImode, plus_constant (Pmode, base, fp_offset));
5284 emit_move_insn (hard_frame_pointer_rtx, mem);
5285 }
5286 }
5287 else
5288 {
5289 for (i = low; i < high; i += 2)
5290 {
5291 bool reg0 = save_p (i, leaf_function);
5292 bool reg1 = save_p (i + 1, leaf_function);
5293 enum machine_mode mode;
5294 int regno;
5295
5296 if (reg0 && reg1)
5297 {
5298 mode = SPARC_INT_REG_P (i) ? DImode : DFmode;
5299 regno = i;
5300 }
5301 else if (reg0)
5302 {
5303 mode = SPARC_INT_REG_P (i) ? SImode : SFmode;
5304 regno = i;
5305 }
5306 else if (reg1)
5307 {
5308 mode = SPARC_INT_REG_P (i) ? SImode : SFmode;
5309 regno = i + 1;
5310 offset += 4;
5311 }
5312 else
5313 {
5314 if (action_false == SORR_ADVANCE)
5315 offset += 8;
5316 continue;
5317 }
5318
5319 mem = gen_frame_mem (mode, plus_constant (Pmode, base, offset));
5320 if (action_true == SORR_SAVE)
5321 {
5322 insn = emit_move_insn (mem, gen_rtx_REG (mode, regno));
5323 RTX_FRAME_RELATED_P (insn) = 1;
5324 if (mode == DImode)
5325 {
5326 rtx set1, set2;
5327 mem = gen_frame_mem (SImode, plus_constant (Pmode, base,
5328 offset));
5329 set1 = gen_rtx_SET (VOIDmode, mem,
5330 gen_rtx_REG (SImode, regno));
5331 RTX_FRAME_RELATED_P (set1) = 1;
5332 mem
5333 = gen_frame_mem (SImode, plus_constant (Pmode, base,
5334 offset + 4));
5335 set2 = gen_rtx_SET (VOIDmode, mem,
5336 gen_rtx_REG (SImode, regno + 1));
5337 RTX_FRAME_RELATED_P (set2) = 1;
5338 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
5339 gen_rtx_PARALLEL (VOIDmode,
5340 gen_rtvec (2, set1, set2)));
5341 }
5342 }
5343 else /* action_true == SORR_RESTORE */
5344 emit_move_insn (gen_rtx_REG (mode, regno), mem);
5345
5346 /* Always preserve double-word alignment. */
5347 offset = (offset + 8) & -8;
5348 }
5349 }
5350
5351 return offset;
5352 }
5353
5354 /* Emit code to adjust BASE to OFFSET. Return the new base. */
5355
5356 static rtx
emit_adjust_base_to_offset(rtx base,int offset)5357 emit_adjust_base_to_offset (rtx base, int offset)
5358 {
5359 /* ??? This might be optimized a little as %g1 might already have a
5360 value close enough that a single add insn will do. */
5361 /* ??? Although, all of this is probably only a temporary fix because
5362 if %g1 can hold a function result, then sparc_expand_epilogue will
5363 lose (the result will be clobbered). */
5364 rtx new_base = gen_rtx_REG (Pmode, 1);
5365 emit_move_insn (new_base, GEN_INT (offset));
5366 emit_insn (gen_rtx_SET (VOIDmode,
5367 new_base, gen_rtx_PLUS (Pmode, base, new_base)));
5368 return new_base;
5369 }
5370
5371 /* Emit code to save/restore call-saved global and FP registers. */
5372
5373 static void
emit_save_or_restore_global_fp_regs(rtx base,int offset,sorr_act_t action)5374 emit_save_or_restore_global_fp_regs (rtx base, int offset, sorr_act_t action)
5375 {
5376 if (offset < -4096 || offset + sparc_n_global_fp_regs * 4 > 4095)
5377 {
5378 base = emit_adjust_base_to_offset (base, offset);
5379 offset = 0;
5380 }
5381
5382 offset
5383 = emit_save_or_restore_regs (0, 8, base, offset, 0,
5384 save_global_or_fp_reg_p, action, SORR_NONE);
5385 emit_save_or_restore_regs (32, TARGET_V9 ? 96 : 64, base, offset, 0,
5386 save_global_or_fp_reg_p, action, SORR_NONE);
5387 }
5388
5389 /* Emit code to save/restore call-saved local and in registers. */
5390
5391 static void
emit_save_or_restore_local_in_regs(rtx base,int offset,sorr_act_t action)5392 emit_save_or_restore_local_in_regs (rtx base, int offset, sorr_act_t action)
5393 {
5394 if (offset < -4096 || offset + 16 * UNITS_PER_WORD > 4095)
5395 {
5396 base = emit_adjust_base_to_offset (base, offset);
5397 offset = 0;
5398 }
5399
5400 emit_save_or_restore_regs (16, 32, base, offset, sparc_leaf_function_p,
5401 save_local_or_in_reg_p, action, SORR_ADVANCE);
5402 }
5403
5404 /* Emit a window_save insn. */
5405
5406 static rtx
emit_window_save(rtx increment)5407 emit_window_save (rtx increment)
5408 {
5409 rtx insn = emit_insn (gen_window_save (increment));
5410 RTX_FRAME_RELATED_P (insn) = 1;
5411
5412 /* The incoming return address (%o7) is saved in %i7. */
5413 add_reg_note (insn, REG_CFA_REGISTER,
5414 gen_rtx_SET (VOIDmode,
5415 gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM),
5416 gen_rtx_REG (Pmode,
5417 INCOMING_RETURN_ADDR_REGNUM)));
5418
5419 /* The window save event. */
5420 add_reg_note (insn, REG_CFA_WINDOW_SAVE, const0_rtx);
5421
5422 /* The CFA is %fp, the hard frame pointer. */
5423 add_reg_note (insn, REG_CFA_DEF_CFA,
5424 plus_constant (Pmode, hard_frame_pointer_rtx,
5425 INCOMING_FRAME_SP_OFFSET));
5426
5427 return insn;
5428 }
5429
5430 /* Generate an increment for the stack pointer. */
5431
5432 static rtx
gen_stack_pointer_inc(rtx increment)5433 gen_stack_pointer_inc (rtx increment)
5434 {
5435 return gen_rtx_SET (VOIDmode,
5436 stack_pointer_rtx,
5437 gen_rtx_PLUS (Pmode,
5438 stack_pointer_rtx,
5439 increment));
5440 }
5441
5442 /* Expand the function prologue. The prologue is responsible for reserving
5443 storage for the frame, saving the call-saved registers and loading the
5444 GOT register if needed. */
5445
5446 void
sparc_expand_prologue(void)5447 sparc_expand_prologue (void)
5448 {
5449 HOST_WIDE_INT size;
5450 rtx insn;
5451
5452 /* Compute a snapshot of crtl->uses_only_leaf_regs. Relying
5453 on the final value of the flag means deferring the prologue/epilogue
5454 expansion until just before the second scheduling pass, which is too
5455 late to emit multiple epilogues or return insns.
5456
5457 Of course we are making the assumption that the value of the flag
5458 will not change between now and its final value. Of the three parts
5459 of the formula, only the last one can reasonably vary. Let's take a
5460 closer look, after assuming that the first two ones are set to true
5461 (otherwise the last value is effectively silenced).
5462
5463 If only_leaf_regs_used returns false, the global predicate will also
5464 be false so the actual frame size calculated below will be positive.
5465 As a consequence, the save_register_window insn will be emitted in
5466 the instruction stream; now this insn explicitly references %fp
5467 which is not a leaf register so only_leaf_regs_used will always
5468 return false subsequently.
5469
5470 If only_leaf_regs_used returns true, we hope that the subsequent
5471 optimization passes won't cause non-leaf registers to pop up. For
5472 example, the regrename pass has special provisions to not rename to
5473 non-leaf registers in a leaf function. */
5474 sparc_leaf_function_p
5475 = optimize > 0 && crtl->is_leaf && only_leaf_regs_used ();
5476
5477 size = sparc_compute_frame_size (get_frame_size(), sparc_leaf_function_p);
5478
5479 if (flag_stack_usage_info)
5480 current_function_static_stack_size = size;
5481
5482 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
5483 {
5484 if (crtl->is_leaf && !cfun->calls_alloca)
5485 {
5486 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
5487 sparc_emit_probe_stack_range (STACK_CHECK_PROTECT,
5488 size - STACK_CHECK_PROTECT);
5489 }
5490 else if (size > 0)
5491 sparc_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
5492 }
5493
5494 if (size == 0)
5495 ; /* do nothing. */
5496 else if (sparc_leaf_function_p)
5497 {
5498 rtx size_int_rtx = GEN_INT (-size);
5499
5500 if (size <= 4096)
5501 insn = emit_insn (gen_stack_pointer_inc (size_int_rtx));
5502 else if (size <= 8192)
5503 {
5504 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (-4096)));
5505 RTX_FRAME_RELATED_P (insn) = 1;
5506
5507 /* %sp is still the CFA register. */
5508 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
5509 }
5510 else
5511 {
5512 rtx size_rtx = gen_rtx_REG (Pmode, 1);
5513 emit_move_insn (size_rtx, size_int_rtx);
5514 insn = emit_insn (gen_stack_pointer_inc (size_rtx));
5515 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
5516 gen_stack_pointer_inc (size_int_rtx));
5517 }
5518
5519 RTX_FRAME_RELATED_P (insn) = 1;
5520 }
5521 else
5522 {
5523 rtx size_int_rtx = GEN_INT (-size);
5524
5525 if (size <= 4096)
5526 emit_window_save (size_int_rtx);
5527 else if (size <= 8192)
5528 {
5529 emit_window_save (GEN_INT (-4096));
5530
5531 /* %sp is not the CFA register anymore. */
5532 emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
5533
5534 /* Make sure no %fp-based store is issued until after the frame is
5535 established. The offset between the frame pointer and the stack
5536 pointer is calculated relative to the value of the stack pointer
5537 at the end of the function prologue, and moving instructions that
5538 access the stack via the frame pointer between the instructions
5539 that decrement the stack pointer could result in accessing the
5540 register window save area, which is volatile. */
5541 emit_insn (gen_frame_blockage ());
5542 }
5543 else
5544 {
5545 rtx size_rtx = gen_rtx_REG (Pmode, 1);
5546 emit_move_insn (size_rtx, size_int_rtx);
5547 emit_window_save (size_rtx);
5548 }
5549 }
5550
5551 if (sparc_leaf_function_p)
5552 {
5553 sparc_frame_base_reg = stack_pointer_rtx;
5554 sparc_frame_base_offset = size + SPARC_STACK_BIAS;
5555 }
5556 else
5557 {
5558 sparc_frame_base_reg = hard_frame_pointer_rtx;
5559 sparc_frame_base_offset = SPARC_STACK_BIAS;
5560 }
5561
5562 if (sparc_n_global_fp_regs > 0)
5563 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
5564 sparc_frame_base_offset
5565 - sparc_apparent_frame_size,
5566 SORR_SAVE);
5567
5568 /* Load the GOT register if needed. */
5569 if (crtl->uses_pic_offset_table)
5570 load_got_register ();
5571
5572 /* Advertise that the data calculated just above are now valid. */
5573 sparc_prologue_data_valid_p = true;
5574 }
5575
5576 /* Expand the function prologue. The prologue is responsible for reserving
5577 storage for the frame, saving the call-saved registers and loading the
5578 GOT register if needed. */
5579
5580 void
sparc_flat_expand_prologue(void)5581 sparc_flat_expand_prologue (void)
5582 {
5583 HOST_WIDE_INT size;
5584 rtx insn;
5585
5586 sparc_leaf_function_p = optimize > 0 && crtl->is_leaf;
5587
5588 size = sparc_compute_frame_size (get_frame_size(), sparc_leaf_function_p);
5589
5590 if (flag_stack_usage_info)
5591 current_function_static_stack_size = size;
5592
5593 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
5594 {
5595 if (crtl->is_leaf && !cfun->calls_alloca)
5596 {
5597 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
5598 sparc_emit_probe_stack_range (STACK_CHECK_PROTECT,
5599 size - STACK_CHECK_PROTECT);
5600 }
5601 else if (size > 0)
5602 sparc_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
5603 }
5604
5605 if (sparc_save_local_in_regs_p)
5606 emit_save_or_restore_local_in_regs (stack_pointer_rtx, SPARC_STACK_BIAS,
5607 SORR_SAVE);
5608
5609 if (size == 0)
5610 ; /* do nothing. */
5611 else
5612 {
5613 rtx size_int_rtx, size_rtx;
5614
5615 size_rtx = size_int_rtx = GEN_INT (-size);
5616
5617 /* We establish the frame (i.e. decrement the stack pointer) first, even
5618 if we use a frame pointer, because we cannot clobber any call-saved
5619 registers, including the frame pointer, if we haven't created a new
5620 register save area, for the sake of compatibility with the ABI. */
5621 if (size <= 4096)
5622 insn = emit_insn (gen_stack_pointer_inc (size_int_rtx));
5623 else if (size <= 8192 && !frame_pointer_needed)
5624 {
5625 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (-4096)));
5626 RTX_FRAME_RELATED_P (insn) = 1;
5627 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
5628 }
5629 else
5630 {
5631 size_rtx = gen_rtx_REG (Pmode, 1);
5632 emit_move_insn (size_rtx, size_int_rtx);
5633 insn = emit_insn (gen_stack_pointer_inc (size_rtx));
5634 add_reg_note (insn, REG_CFA_ADJUST_CFA,
5635 gen_stack_pointer_inc (size_int_rtx));
5636 }
5637 RTX_FRAME_RELATED_P (insn) = 1;
5638
5639 /* Ensure nothing is scheduled until after the frame is established. */
5640 emit_insn (gen_blockage ());
5641
5642 if (frame_pointer_needed)
5643 {
5644 insn = emit_insn (gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
5645 gen_rtx_MINUS (Pmode,
5646 stack_pointer_rtx,
5647 size_rtx)));
5648 RTX_FRAME_RELATED_P (insn) = 1;
5649
5650 add_reg_note (insn, REG_CFA_ADJUST_CFA,
5651 gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
5652 plus_constant (Pmode, stack_pointer_rtx,
5653 size)));
5654 }
5655
5656 if (return_addr_reg_needed_p (sparc_leaf_function_p))
5657 {
5658 rtx o7 = gen_rtx_REG (Pmode, INCOMING_RETURN_ADDR_REGNUM);
5659 rtx i7 = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
5660
5661 insn = emit_move_insn (i7, o7);
5662 RTX_FRAME_RELATED_P (insn) = 1;
5663
5664 add_reg_note (insn, REG_CFA_REGISTER,
5665 gen_rtx_SET (VOIDmode, i7, o7));
5666
5667 /* Prevent this instruction from ever being considered dead,
5668 even if this function has no epilogue. */
5669 emit_use (i7);
5670 }
5671 }
5672
5673 if (frame_pointer_needed)
5674 {
5675 sparc_frame_base_reg = hard_frame_pointer_rtx;
5676 sparc_frame_base_offset = SPARC_STACK_BIAS;
5677 }
5678 else
5679 {
5680 sparc_frame_base_reg = stack_pointer_rtx;
5681 sparc_frame_base_offset = size + SPARC_STACK_BIAS;
5682 }
5683
5684 if (sparc_n_global_fp_regs > 0)
5685 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
5686 sparc_frame_base_offset
5687 - sparc_apparent_frame_size,
5688 SORR_SAVE);
5689
5690 /* Load the GOT register if needed. */
5691 if (crtl->uses_pic_offset_table)
5692 load_got_register ();
5693
5694 /* Advertise that the data calculated just above are now valid. */
5695 sparc_prologue_data_valid_p = true;
5696 }
5697
5698 /* This function generates the assembly code for function entry, which boils
5699 down to emitting the necessary .register directives. */
5700
5701 static void
sparc_asm_function_prologue(FILE * file,HOST_WIDE_INT size ATTRIBUTE_UNUSED)5702 sparc_asm_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
5703 {
5704 /* Check that the assumption we made in sparc_expand_prologue is valid. */
5705 if (!TARGET_FLAT)
5706 gcc_assert (sparc_leaf_function_p == crtl->uses_only_leaf_regs);
5707
5708 sparc_output_scratch_registers (file);
5709 }
5710
5711 /* Expand the function epilogue, either normal or part of a sibcall.
5712 We emit all the instructions except the return or the call. */
5713
5714 void
sparc_expand_epilogue(bool for_eh)5715 sparc_expand_epilogue (bool for_eh)
5716 {
5717 HOST_WIDE_INT size = sparc_frame_size;
5718
5719 if (sparc_n_global_fp_regs > 0)
5720 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
5721 sparc_frame_base_offset
5722 - sparc_apparent_frame_size,
5723 SORR_RESTORE);
5724
5725 if (size == 0 || for_eh)
5726 ; /* do nothing. */
5727 else if (sparc_leaf_function_p)
5728 {
5729 if (size <= 4096)
5730 emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
5731 else if (size <= 8192)
5732 {
5733 emit_insn (gen_stack_pointer_inc (GEN_INT (4096)));
5734 emit_insn (gen_stack_pointer_inc (GEN_INT (size - 4096)));
5735 }
5736 else
5737 {
5738 rtx reg = gen_rtx_REG (Pmode, 1);
5739 emit_move_insn (reg, GEN_INT (size));
5740 emit_insn (gen_stack_pointer_inc (reg));
5741 }
5742 }
5743 }
5744
5745 /* Expand the function epilogue, either normal or part of a sibcall.
5746 We emit all the instructions except the return or the call. */
5747
5748 void
sparc_flat_expand_epilogue(bool for_eh)5749 sparc_flat_expand_epilogue (bool for_eh)
5750 {
5751 HOST_WIDE_INT size = sparc_frame_size;
5752
5753 if (sparc_n_global_fp_regs > 0)
5754 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
5755 sparc_frame_base_offset
5756 - sparc_apparent_frame_size,
5757 SORR_RESTORE);
5758
5759 /* If we have a frame pointer, we'll need both to restore it before the
5760 frame is destroyed and use its current value in destroying the frame.
5761 Since we don't have an atomic way to do that in the flat window model,
5762 we save the current value into a temporary register (%g1). */
5763 if (frame_pointer_needed && !for_eh)
5764 emit_move_insn (gen_rtx_REG (Pmode, 1), hard_frame_pointer_rtx);
5765
5766 if (return_addr_reg_needed_p (sparc_leaf_function_p))
5767 emit_move_insn (gen_rtx_REG (Pmode, INCOMING_RETURN_ADDR_REGNUM),
5768 gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM));
5769
5770 if (sparc_save_local_in_regs_p)
5771 emit_save_or_restore_local_in_regs (sparc_frame_base_reg,
5772 sparc_frame_base_offset,
5773 SORR_RESTORE);
5774
5775 if (size == 0 || for_eh)
5776 ; /* do nothing. */
5777 else if (frame_pointer_needed)
5778 {
5779 /* Make sure the frame is destroyed after everything else is done. */
5780 emit_insn (gen_blockage ());
5781
5782 emit_move_insn (stack_pointer_rtx, gen_rtx_REG (Pmode, 1));
5783 }
5784 else
5785 {
5786 /* Likewise. */
5787 emit_insn (gen_blockage ());
5788
5789 if (size <= 4096)
5790 emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
5791 else if (size <= 8192)
5792 {
5793 emit_insn (gen_stack_pointer_inc (GEN_INT (4096)));
5794 emit_insn (gen_stack_pointer_inc (GEN_INT (size - 4096)));
5795 }
5796 else
5797 {
5798 rtx reg = gen_rtx_REG (Pmode, 1);
5799 emit_move_insn (reg, GEN_INT (size));
5800 emit_insn (gen_stack_pointer_inc (reg));
5801 }
5802 }
5803 }
5804
5805 /* Return true if it is appropriate to emit `return' instructions in the
5806 body of a function. */
5807
5808 bool
sparc_can_use_return_insn_p(void)5809 sparc_can_use_return_insn_p (void)
5810 {
5811 return sparc_prologue_data_valid_p
5812 && sparc_n_global_fp_regs == 0
5813 && TARGET_FLAT
5814 ? (sparc_frame_size == 0 && !sparc_save_local_in_regs_p)
5815 : (sparc_frame_size == 0 || !sparc_leaf_function_p);
5816 }
5817
5818 /* This function generates the assembly code for function exit. */
5819
5820 static void
sparc_asm_function_epilogue(FILE * file,HOST_WIDE_INT size ATTRIBUTE_UNUSED)5821 sparc_asm_function_epilogue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
5822 {
5823 /* If the last two instructions of a function are "call foo; dslot;"
5824 the return address might point to the first instruction in the next
5825 function and we have to output a dummy nop for the sake of sane
5826 backtraces in such cases. This is pointless for sibling calls since
5827 the return address is explicitly adjusted. */
5828
5829 rtx insn, last_real_insn;
5830
5831 insn = get_last_insn ();
5832
5833 last_real_insn = prev_real_insn (insn);
5834 if (last_real_insn
5835 && NONJUMP_INSN_P (last_real_insn)
5836 && GET_CODE (PATTERN (last_real_insn)) == SEQUENCE)
5837 last_real_insn = XVECEXP (PATTERN (last_real_insn), 0, 0);
5838
5839 if (last_real_insn
5840 && CALL_P (last_real_insn)
5841 && !SIBLING_CALL_P (last_real_insn))
5842 fputs("\tnop\n", file);
5843
5844 sparc_output_deferred_case_vectors ();
5845 }
5846
5847 /* Output a 'restore' instruction. */
5848
5849 static void
output_restore(rtx pat)5850 output_restore (rtx pat)
5851 {
5852 rtx operands[3];
5853
5854 if (! pat)
5855 {
5856 fputs ("\t restore\n", asm_out_file);
5857 return;
5858 }
5859
5860 gcc_assert (GET_CODE (pat) == SET);
5861
5862 operands[0] = SET_DEST (pat);
5863 pat = SET_SRC (pat);
5864
5865 switch (GET_CODE (pat))
5866 {
5867 case PLUS:
5868 operands[1] = XEXP (pat, 0);
5869 operands[2] = XEXP (pat, 1);
5870 output_asm_insn (" restore %r1, %2, %Y0", operands);
5871 break;
5872 case LO_SUM:
5873 operands[1] = XEXP (pat, 0);
5874 operands[2] = XEXP (pat, 1);
5875 output_asm_insn (" restore %r1, %%lo(%a2), %Y0", operands);
5876 break;
5877 case ASHIFT:
5878 operands[1] = XEXP (pat, 0);
5879 gcc_assert (XEXP (pat, 1) == const1_rtx);
5880 output_asm_insn (" restore %r1, %r1, %Y0", operands);
5881 break;
5882 default:
5883 operands[1] = pat;
5884 output_asm_insn (" restore %%g0, %1, %Y0", operands);
5885 break;
5886 }
5887 }
5888
5889 /* Output a return. */
5890
5891 const char *
output_return(rtx insn)5892 output_return (rtx insn)
5893 {
5894 if (crtl->calls_eh_return)
5895 {
5896 /* If the function uses __builtin_eh_return, the eh_return
5897 machinery occupies the delay slot. */
5898 gcc_assert (!final_sequence);
5899
5900 if (flag_delayed_branch)
5901 {
5902 if (!TARGET_FLAT && TARGET_V9)
5903 fputs ("\treturn\t%i7+8\n", asm_out_file);
5904 else
5905 {
5906 if (!TARGET_FLAT)
5907 fputs ("\trestore\n", asm_out_file);
5908
5909 fputs ("\tjmp\t%o7+8\n", asm_out_file);
5910 }
5911
5912 fputs ("\t add\t%sp, %g1, %sp\n", asm_out_file);
5913 }
5914 else
5915 {
5916 if (!TARGET_FLAT)
5917 fputs ("\trestore\n", asm_out_file);
5918
5919 fputs ("\tadd\t%sp, %g1, %sp\n", asm_out_file);
5920 fputs ("\tjmp\t%o7+8\n\t nop\n", asm_out_file);
5921 }
5922 }
5923 else if (sparc_leaf_function_p || TARGET_FLAT)
5924 {
5925 /* This is a leaf or flat function so we don't have to bother restoring
5926 the register window, which frees us from dealing with the convoluted
5927 semantics of restore/return. We simply output the jump to the
5928 return address and the insn in the delay slot (if any). */
5929
5930 return "jmp\t%%o7+%)%#";
5931 }
5932 else
5933 {
5934 /* This is a regular function so we have to restore the register window.
5935 We may have a pending insn for the delay slot, which will be either
5936 combined with the 'restore' instruction or put in the delay slot of
5937 the 'return' instruction. */
5938
5939 if (final_sequence)
5940 {
5941 rtx delay, pat;
5942
5943 delay = NEXT_INSN (insn);
5944 gcc_assert (delay);
5945
5946 pat = PATTERN (delay);
5947
5948 if (TARGET_V9 && ! epilogue_renumber (&pat, 1))
5949 {
5950 epilogue_renumber (&pat, 0);
5951 return "return\t%%i7+%)%#";
5952 }
5953 else
5954 {
5955 output_asm_insn ("jmp\t%%i7+%)", NULL);
5956 output_restore (pat);
5957 PATTERN (delay) = gen_blockage ();
5958 INSN_CODE (delay) = -1;
5959 }
5960 }
5961 else
5962 {
5963 /* The delay slot is empty. */
5964 if (TARGET_V9)
5965 return "return\t%%i7+%)\n\t nop";
5966 else if (flag_delayed_branch)
5967 return "jmp\t%%i7+%)\n\t restore";
5968 else
5969 return "restore\n\tjmp\t%%o7+%)\n\t nop";
5970 }
5971 }
5972
5973 return "";
5974 }
5975
5976 /* Output a sibling call. */
5977
5978 const char *
output_sibcall(rtx insn,rtx call_operand)5979 output_sibcall (rtx insn, rtx call_operand)
5980 {
5981 rtx operands[1];
5982
5983 gcc_assert (flag_delayed_branch);
5984
5985 operands[0] = call_operand;
5986
5987 if (sparc_leaf_function_p || TARGET_FLAT)
5988 {
5989 /* This is a leaf or flat function so we don't have to bother restoring
5990 the register window. We simply output the jump to the function and
5991 the insn in the delay slot (if any). */
5992
5993 gcc_assert (!(LEAF_SIBCALL_SLOT_RESERVED_P && final_sequence));
5994
5995 if (final_sequence)
5996 output_asm_insn ("sethi\t%%hi(%a0), %%g1\n\tjmp\t%%g1 + %%lo(%a0)%#",
5997 operands);
5998 else
5999 /* Use or with rs2 %%g0 instead of mov, so that as/ld can optimize
6000 it into branch if possible. */
6001 output_asm_insn ("or\t%%o7, %%g0, %%g1\n\tcall\t%a0, 0\n\t or\t%%g1, %%g0, %%o7",
6002 operands);
6003 }
6004 else
6005 {
6006 /* This is a regular function so we have to restore the register window.
6007 We may have a pending insn for the delay slot, which will be combined
6008 with the 'restore' instruction. */
6009
6010 output_asm_insn ("call\t%a0, 0", operands);
6011
6012 if (final_sequence)
6013 {
6014 rtx delay = NEXT_INSN (insn);
6015 gcc_assert (delay);
6016
6017 output_restore (PATTERN (delay));
6018
6019 PATTERN (delay) = gen_blockage ();
6020 INSN_CODE (delay) = -1;
6021 }
6022 else
6023 output_restore (NULL_RTX);
6024 }
6025
6026 return "";
6027 }
6028
6029 /* Functions for handling argument passing.
6030
6031 For 32-bit, the first 6 args are normally in registers and the rest are
6032 pushed. Any arg that starts within the first 6 words is at least
6033 partially passed in a register unless its data type forbids.
6034
6035 For 64-bit, the argument registers are laid out as an array of 16 elements
6036 and arguments are added sequentially. The first 6 int args and up to the
6037 first 16 fp args (depending on size) are passed in regs.
6038
6039 Slot Stack Integral Float Float in structure Double Long Double
6040 ---- ----- -------- ----- ------------------ ------ -----------
6041 15 [SP+248] %f31 %f30,%f31 %d30
6042 14 [SP+240] %f29 %f28,%f29 %d28 %q28
6043 13 [SP+232] %f27 %f26,%f27 %d26
6044 12 [SP+224] %f25 %f24,%f25 %d24 %q24
6045 11 [SP+216] %f23 %f22,%f23 %d22
6046 10 [SP+208] %f21 %f20,%f21 %d20 %q20
6047 9 [SP+200] %f19 %f18,%f19 %d18
6048 8 [SP+192] %f17 %f16,%f17 %d16 %q16
6049 7 [SP+184] %f15 %f14,%f15 %d14
6050 6 [SP+176] %f13 %f12,%f13 %d12 %q12
6051 5 [SP+168] %o5 %f11 %f10,%f11 %d10
6052 4 [SP+160] %o4 %f9 %f8,%f9 %d8 %q8
6053 3 [SP+152] %o3 %f7 %f6,%f7 %d6
6054 2 [SP+144] %o2 %f5 %f4,%f5 %d4 %q4
6055 1 [SP+136] %o1 %f3 %f2,%f3 %d2
6056 0 [SP+128] %o0 %f1 %f0,%f1 %d0 %q0
6057
6058 Here SP = %sp if -mno-stack-bias or %sp+stack_bias otherwise.
6059
6060 Integral arguments are always passed as 64-bit quantities appropriately
6061 extended.
6062
6063 Passing of floating point values is handled as follows.
6064 If a prototype is in scope:
6065 If the value is in a named argument (i.e. not a stdarg function or a
6066 value not part of the `...') then the value is passed in the appropriate
6067 fp reg.
6068 If the value is part of the `...' and is passed in one of the first 6
6069 slots then the value is passed in the appropriate int reg.
6070 If the value is part of the `...' and is not passed in one of the first 6
6071 slots then the value is passed in memory.
6072 If a prototype is not in scope:
6073 If the value is one of the first 6 arguments the value is passed in the
6074 appropriate integer reg and the appropriate fp reg.
6075 If the value is not one of the first 6 arguments the value is passed in
6076 the appropriate fp reg and in memory.
6077
6078
6079 Summary of the calling conventions implemented by GCC on the SPARC:
6080
6081 32-bit ABI:
6082 size argument return value
6083
6084 small integer <4 int. reg. int. reg.
6085 word 4 int. reg. int. reg.
6086 double word 8 int. reg. int. reg.
6087
6088 _Complex small integer <8 int. reg. int. reg.
6089 _Complex word 8 int. reg. int. reg.
6090 _Complex double word 16 memory int. reg.
6091
6092 vector integer <=8 int. reg. FP reg.
6093 vector integer >8 memory memory
6094
6095 float 4 int. reg. FP reg.
6096 double 8 int. reg. FP reg.
6097 long double 16 memory memory
6098
6099 _Complex float 8 memory FP reg.
6100 _Complex double 16 memory FP reg.
6101 _Complex long double 32 memory FP reg.
6102
6103 vector float any memory memory
6104
6105 aggregate any memory memory
6106
6107
6108
6109 64-bit ABI:
6110 size argument return value
6111
6112 small integer <8 int. reg. int. reg.
6113 word 8 int. reg. int. reg.
6114 double word 16 int. reg. int. reg.
6115
6116 _Complex small integer <16 int. reg. int. reg.
6117 _Complex word 16 int. reg. int. reg.
6118 _Complex double word 32 memory int. reg.
6119
6120 vector integer <=16 FP reg. FP reg.
6121 vector integer 16<s<=32 memory FP reg.
6122 vector integer >32 memory memory
6123
6124 float 4 FP reg. FP reg.
6125 double 8 FP reg. FP reg.
6126 long double 16 FP reg. FP reg.
6127
6128 _Complex float 8 FP reg. FP reg.
6129 _Complex double 16 FP reg. FP reg.
6130 _Complex long double 32 memory FP reg.
6131
6132 vector float <=16 FP reg. FP reg.
6133 vector float 16<s<=32 memory FP reg.
6134 vector float >32 memory memory
6135
6136 aggregate <=16 reg. reg.
6137 aggregate 16<s<=32 memory reg.
6138 aggregate >32 memory memory
6139
6140
6141
6142 Note #1: complex floating-point types follow the extended SPARC ABIs as
6143 implemented by the Sun compiler.
6144
6145 Note #2: integral vector types follow the scalar floating-point types
6146 conventions to match what is implemented by the Sun VIS SDK.
6147
6148 Note #3: floating-point vector types follow the aggregate types
6149 conventions. */
6150
6151
6152 /* Maximum number of int regs for args. */
6153 #define SPARC_INT_ARG_MAX 6
6154 /* Maximum number of fp regs for args. */
6155 #define SPARC_FP_ARG_MAX 16
6156
6157 #define ROUND_ADVANCE(SIZE) (((SIZE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)
6158
6159 /* Handle the INIT_CUMULATIVE_ARGS macro.
6160 Initialize a variable CUM of type CUMULATIVE_ARGS
6161 for a call to a function whose data type is FNTYPE.
6162 For a library call, FNTYPE is 0. */
6163
6164 void
init_cumulative_args(struct sparc_args * cum,tree fntype,rtx libname ATTRIBUTE_UNUSED,tree fndecl ATTRIBUTE_UNUSED)6165 init_cumulative_args (struct sparc_args *cum, tree fntype,
6166 rtx libname ATTRIBUTE_UNUSED,
6167 tree fndecl ATTRIBUTE_UNUSED)
6168 {
6169 cum->words = 0;
6170 cum->prototype_p = fntype && prototype_p (fntype);
6171 cum->libcall_p = fntype == 0;
6172 }
6173
6174 /* Handle promotion of pointer and integer arguments. */
6175
6176 static enum machine_mode
sparc_promote_function_mode(const_tree type,enum machine_mode mode,int * punsignedp,const_tree fntype ATTRIBUTE_UNUSED,int for_return ATTRIBUTE_UNUSED)6177 sparc_promote_function_mode (const_tree type,
6178 enum machine_mode mode,
6179 int *punsignedp,
6180 const_tree fntype ATTRIBUTE_UNUSED,
6181 int for_return ATTRIBUTE_UNUSED)
6182 {
6183 if (type != NULL_TREE && POINTER_TYPE_P (type))
6184 {
6185 *punsignedp = POINTERS_EXTEND_UNSIGNED;
6186 return Pmode;
6187 }
6188
6189 /* Integral arguments are passed as full words, as per the ABI. */
6190 if (GET_MODE_CLASS (mode) == MODE_INT
6191 && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
6192 return word_mode;
6193
6194 return mode;
6195 }
6196
6197 /* Handle the TARGET_STRICT_ARGUMENT_NAMING target hook. */
6198
6199 static bool
sparc_strict_argument_naming(cumulative_args_t ca ATTRIBUTE_UNUSED)6200 sparc_strict_argument_naming (cumulative_args_t ca ATTRIBUTE_UNUSED)
6201 {
6202 return TARGET_ARCH64 ? true : false;
6203 }
6204
6205 /* Scan the record type TYPE and return the following predicates:
6206 - INTREGS_P: the record contains at least one field or sub-field
6207 that is eligible for promotion in integer registers.
6208 - FP_REGS_P: the record contains at least one field or sub-field
6209 that is eligible for promotion in floating-point registers.
6210 - PACKED_P: the record contains at least one field that is packed.
6211
6212 Sub-fields are not taken into account for the PACKED_P predicate. */
6213
6214 static void
scan_record_type(const_tree type,int * intregs_p,int * fpregs_p,int * packed_p)6215 scan_record_type (const_tree type, int *intregs_p, int *fpregs_p,
6216 int *packed_p)
6217 {
6218 tree field;
6219
6220 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6221 {
6222 if (TREE_CODE (field) == FIELD_DECL)
6223 {
6224 if (TREE_CODE (TREE_TYPE (field)) == RECORD_TYPE)
6225 scan_record_type (TREE_TYPE (field), intregs_p, fpregs_p, 0);
6226 else if ((FLOAT_TYPE_P (TREE_TYPE (field))
6227 || TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE)
6228 && TARGET_FPU)
6229 *fpregs_p = 1;
6230 else
6231 *intregs_p = 1;
6232
6233 if (packed_p && DECL_PACKED (field))
6234 *packed_p = 1;
6235 }
6236 }
6237 }
6238
6239 /* Compute the slot number to pass an argument in.
6240 Return the slot number or -1 if passing on the stack.
6241
6242 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6243 the preceding args and about the function being called.
6244 MODE is the argument's machine mode.
6245 TYPE is the data type of the argument (as a tree).
6246 This is null for libcalls where that information may
6247 not be available.
6248 NAMED is nonzero if this argument is a named parameter
6249 (otherwise it is an extra parameter matching an ellipsis).
6250 INCOMING_P is zero for FUNCTION_ARG, nonzero for FUNCTION_INCOMING_ARG.
6251 *PREGNO records the register number to use if scalar type.
6252 *PPADDING records the amount of padding needed in words. */
6253
6254 static int
function_arg_slotno(const struct sparc_args * cum,enum machine_mode mode,const_tree type,bool named,bool incoming_p,int * pregno,int * ppadding)6255 function_arg_slotno (const struct sparc_args *cum, enum machine_mode mode,
6256 const_tree type, bool named, bool incoming_p,
6257 int *pregno, int *ppadding)
6258 {
6259 int regbase = (incoming_p
6260 ? SPARC_INCOMING_INT_ARG_FIRST
6261 : SPARC_OUTGOING_INT_ARG_FIRST);
6262 int slotno = cum->words;
6263 enum mode_class mclass;
6264 int regno;
6265
6266 *ppadding = 0;
6267
6268 if (type && TREE_ADDRESSABLE (type))
6269 return -1;
6270
6271 if (TARGET_ARCH32
6272 && mode == BLKmode
6273 && type
6274 && TYPE_ALIGN (type) % PARM_BOUNDARY != 0)
6275 return -1;
6276
6277 /* For SPARC64, objects requiring 16-byte alignment get it. */
6278 if (TARGET_ARCH64
6279 && (type ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode)) >= 128
6280 && (slotno & 1) != 0)
6281 slotno++, *ppadding = 1;
6282
6283 mclass = GET_MODE_CLASS (mode);
6284 if (type && TREE_CODE (type) == VECTOR_TYPE)
6285 {
6286 /* Vector types deserve special treatment because they are
6287 polymorphic wrt their mode, depending upon whether VIS
6288 instructions are enabled. */
6289 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
6290 {
6291 /* The SPARC port defines no floating-point vector modes. */
6292 gcc_assert (mode == BLKmode);
6293 }
6294 else
6295 {
6296 /* Integral vector types should either have a vector
6297 mode or an integral mode, because we are guaranteed
6298 by pass_by_reference that their size is not greater
6299 than 16 bytes and TImode is 16-byte wide. */
6300 gcc_assert (mode != BLKmode);
6301
6302 /* Vector integers are handled like floats according to
6303 the Sun VIS SDK. */
6304 mclass = MODE_FLOAT;
6305 }
6306 }
6307
6308 switch (mclass)
6309 {
6310 case MODE_FLOAT:
6311 case MODE_COMPLEX_FLOAT:
6312 case MODE_VECTOR_INT:
6313 if (TARGET_ARCH64 && TARGET_FPU && named)
6314 {
6315 if (slotno >= SPARC_FP_ARG_MAX)
6316 return -1;
6317 regno = SPARC_FP_ARG_FIRST + slotno * 2;
6318 /* Arguments filling only one single FP register are
6319 right-justified in the outer double FP register. */
6320 if (GET_MODE_SIZE (mode) <= 4)
6321 regno++;
6322 break;
6323 }
6324 /* fallthrough */
6325
6326 case MODE_INT:
6327 case MODE_COMPLEX_INT:
6328 if (slotno >= SPARC_INT_ARG_MAX)
6329 return -1;
6330 regno = regbase + slotno;
6331 break;
6332
6333 case MODE_RANDOM:
6334 if (mode == VOIDmode)
6335 /* MODE is VOIDmode when generating the actual call. */
6336 return -1;
6337
6338 gcc_assert (mode == BLKmode);
6339
6340 if (TARGET_ARCH32
6341 || !type
6342 || (TREE_CODE (type) != VECTOR_TYPE
6343 && TREE_CODE (type) != RECORD_TYPE))
6344 {
6345 if (slotno >= SPARC_INT_ARG_MAX)
6346 return -1;
6347 regno = regbase + slotno;
6348 }
6349 else /* TARGET_ARCH64 && type */
6350 {
6351 int intregs_p = 0, fpregs_p = 0, packed_p = 0;
6352
6353 /* First see what kinds of registers we would need. */
6354 if (TREE_CODE (type) == VECTOR_TYPE)
6355 fpregs_p = 1;
6356 else
6357 scan_record_type (type, &intregs_p, &fpregs_p, &packed_p);
6358
6359 /* The ABI obviously doesn't specify how packed structures
6360 are passed. These are defined to be passed in int regs
6361 if possible, otherwise memory. */
6362 if (packed_p || !named)
6363 fpregs_p = 0, intregs_p = 1;
6364
6365 /* If all arg slots are filled, then must pass on stack. */
6366 if (fpregs_p && slotno >= SPARC_FP_ARG_MAX)
6367 return -1;
6368
6369 /* If there are only int args and all int arg slots are filled,
6370 then must pass on stack. */
6371 if (!fpregs_p && intregs_p && slotno >= SPARC_INT_ARG_MAX)
6372 return -1;
6373
6374 /* Note that even if all int arg slots are filled, fp members may
6375 still be passed in regs if such regs are available.
6376 *PREGNO isn't set because there may be more than one, it's up
6377 to the caller to compute them. */
6378 return slotno;
6379 }
6380 break;
6381
6382 default :
6383 gcc_unreachable ();
6384 }
6385
6386 *pregno = regno;
6387 return slotno;
6388 }
6389
6390 /* Handle recursive register counting for structure field layout. */
6391
6392 struct function_arg_record_value_parms
6393 {
6394 rtx ret; /* return expression being built. */
6395 int slotno; /* slot number of the argument. */
6396 int named; /* whether the argument is named. */
6397 int regbase; /* regno of the base register. */
6398 int stack; /* 1 if part of the argument is on the stack. */
6399 int intoffset; /* offset of the first pending integer field. */
6400 unsigned int nregs; /* number of words passed in registers. */
6401 };
6402
6403 static void function_arg_record_value_3
6404 (HOST_WIDE_INT, struct function_arg_record_value_parms *);
6405 static void function_arg_record_value_2
6406 (const_tree, HOST_WIDE_INT, struct function_arg_record_value_parms *, bool);
6407 static void function_arg_record_value_1
6408 (const_tree, HOST_WIDE_INT, struct function_arg_record_value_parms *, bool);
6409 static rtx function_arg_record_value (const_tree, enum machine_mode, int, int, int);
6410 static rtx function_arg_union_value (int, enum machine_mode, int, int);
6411
6412 /* A subroutine of function_arg_record_value. Traverse the structure
6413 recursively and determine how many registers will be required. */
6414
6415 static void
function_arg_record_value_1(const_tree type,HOST_WIDE_INT startbitpos,struct function_arg_record_value_parms * parms,bool packed_p)6416 function_arg_record_value_1 (const_tree type, HOST_WIDE_INT startbitpos,
6417 struct function_arg_record_value_parms *parms,
6418 bool packed_p)
6419 {
6420 tree field;
6421
6422 /* We need to compute how many registers are needed so we can
6423 allocate the PARALLEL but before we can do that we need to know
6424 whether there are any packed fields. The ABI obviously doesn't
6425 specify how structures are passed in this case, so they are
6426 defined to be passed in int regs if possible, otherwise memory,
6427 regardless of whether there are fp values present. */
6428
6429 if (! packed_p)
6430 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
6431 {
6432 if (TREE_CODE (field) == FIELD_DECL && DECL_PACKED (field))
6433 {
6434 packed_p = true;
6435 break;
6436 }
6437 }
6438
6439 /* Compute how many registers we need. */
6440 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6441 {
6442 if (TREE_CODE (field) == FIELD_DECL)
6443 {
6444 HOST_WIDE_INT bitpos = startbitpos;
6445
6446 if (DECL_SIZE (field) != 0)
6447 {
6448 if (integer_zerop (DECL_SIZE (field)))
6449 continue;
6450
6451 if (tree_fits_uhwi_p (bit_position (field)))
6452 bitpos += int_bit_position (field);
6453 }
6454
6455 /* ??? FIXME: else assume zero offset. */
6456
6457 if (TREE_CODE (TREE_TYPE (field)) == RECORD_TYPE)
6458 function_arg_record_value_1 (TREE_TYPE (field),
6459 bitpos,
6460 parms,
6461 packed_p);
6462 else if ((FLOAT_TYPE_P (TREE_TYPE (field))
6463 || TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE)
6464 && TARGET_FPU
6465 && parms->named
6466 && ! packed_p)
6467 {
6468 if (parms->intoffset != -1)
6469 {
6470 unsigned int startbit, endbit;
6471 int intslots, this_slotno;
6472
6473 startbit = parms->intoffset & -BITS_PER_WORD;
6474 endbit = (bitpos + BITS_PER_WORD - 1) & -BITS_PER_WORD;
6475
6476 intslots = (endbit - startbit) / BITS_PER_WORD;
6477 this_slotno = parms->slotno + parms->intoffset
6478 / BITS_PER_WORD;
6479
6480 if (intslots > 0 && intslots > SPARC_INT_ARG_MAX - this_slotno)
6481 {
6482 intslots = MAX (0, SPARC_INT_ARG_MAX - this_slotno);
6483 /* We need to pass this field on the stack. */
6484 parms->stack = 1;
6485 }
6486
6487 parms->nregs += intslots;
6488 parms->intoffset = -1;
6489 }
6490
6491 /* There's no need to check this_slotno < SPARC_FP_ARG MAX.
6492 If it wasn't true we wouldn't be here. */
6493 if (TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE
6494 && DECL_MODE (field) == BLKmode)
6495 parms->nregs += TYPE_VECTOR_SUBPARTS (TREE_TYPE (field));
6496 else if (TREE_CODE (TREE_TYPE (field)) == COMPLEX_TYPE)
6497 parms->nregs += 2;
6498 else
6499 parms->nregs += 1;
6500 }
6501 else
6502 {
6503 if (parms->intoffset == -1)
6504 parms->intoffset = bitpos;
6505 }
6506 }
6507 }
6508 }
6509
6510 /* A subroutine of function_arg_record_value. Assign the bits of the
6511 structure between parms->intoffset and bitpos to integer registers. */
6512
6513 static void
function_arg_record_value_3(HOST_WIDE_INT bitpos,struct function_arg_record_value_parms * parms)6514 function_arg_record_value_3 (HOST_WIDE_INT bitpos,
6515 struct function_arg_record_value_parms *parms)
6516 {
6517 enum machine_mode mode;
6518 unsigned int regno;
6519 unsigned int startbit, endbit;
6520 int this_slotno, intslots, intoffset;
6521 rtx reg;
6522
6523 if (parms->intoffset == -1)
6524 return;
6525
6526 intoffset = parms->intoffset;
6527 parms->intoffset = -1;
6528
6529 startbit = intoffset & -BITS_PER_WORD;
6530 endbit = (bitpos + BITS_PER_WORD - 1) & -BITS_PER_WORD;
6531 intslots = (endbit - startbit) / BITS_PER_WORD;
6532 this_slotno = parms->slotno + intoffset / BITS_PER_WORD;
6533
6534 intslots = MIN (intslots, SPARC_INT_ARG_MAX - this_slotno);
6535 if (intslots <= 0)
6536 return;
6537
6538 /* If this is the trailing part of a word, only load that much into
6539 the register. Otherwise load the whole register. Note that in
6540 the latter case we may pick up unwanted bits. It's not a problem
6541 at the moment but may wish to revisit. */
6542
6543 if (intoffset % BITS_PER_WORD != 0)
6544 mode = smallest_mode_for_size (BITS_PER_WORD - intoffset % BITS_PER_WORD,
6545 MODE_INT);
6546 else
6547 mode = word_mode;
6548
6549 intoffset /= BITS_PER_UNIT;
6550 do
6551 {
6552 regno = parms->regbase + this_slotno;
6553 reg = gen_rtx_REG (mode, regno);
6554 XVECEXP (parms->ret, 0, parms->stack + parms->nregs)
6555 = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (intoffset));
6556
6557 this_slotno += 1;
6558 intoffset = (intoffset | (UNITS_PER_WORD-1)) + 1;
6559 mode = word_mode;
6560 parms->nregs += 1;
6561 intslots -= 1;
6562 }
6563 while (intslots > 0);
6564 }
6565
6566 /* A subroutine of function_arg_record_value. Traverse the structure
6567 recursively and assign bits to floating point registers. Track which
6568 bits in between need integer registers; invoke function_arg_record_value_3
6569 to make that happen. */
6570
6571 static void
function_arg_record_value_2(const_tree type,HOST_WIDE_INT startbitpos,struct function_arg_record_value_parms * parms,bool packed_p)6572 function_arg_record_value_2 (const_tree type, HOST_WIDE_INT startbitpos,
6573 struct function_arg_record_value_parms *parms,
6574 bool packed_p)
6575 {
6576 tree field;
6577
6578 if (! packed_p)
6579 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6580 {
6581 if (TREE_CODE (field) == FIELD_DECL && DECL_PACKED (field))
6582 {
6583 packed_p = true;
6584 break;
6585 }
6586 }
6587
6588 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6589 {
6590 if (TREE_CODE (field) == FIELD_DECL)
6591 {
6592 HOST_WIDE_INT bitpos = startbitpos;
6593
6594 if (DECL_SIZE (field) != 0)
6595 {
6596 if (integer_zerop (DECL_SIZE (field)))
6597 continue;
6598
6599 if (tree_fits_uhwi_p (bit_position (field)))
6600 bitpos += int_bit_position (field);
6601 }
6602
6603 /* ??? FIXME: else assume zero offset. */
6604
6605 if (TREE_CODE (TREE_TYPE (field)) == RECORD_TYPE)
6606 function_arg_record_value_2 (TREE_TYPE (field),
6607 bitpos,
6608 parms,
6609 packed_p);
6610 else if ((FLOAT_TYPE_P (TREE_TYPE (field))
6611 || TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE)
6612 && TARGET_FPU
6613 && parms->named
6614 && ! packed_p)
6615 {
6616 int this_slotno = parms->slotno + bitpos / BITS_PER_WORD;
6617 int regno, nregs, pos;
6618 enum machine_mode mode = DECL_MODE (field);
6619 rtx reg;
6620
6621 function_arg_record_value_3 (bitpos, parms);
6622
6623 if (TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE
6624 && mode == BLKmode)
6625 {
6626 mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (field)));
6627 nregs = TYPE_VECTOR_SUBPARTS (TREE_TYPE (field));
6628 }
6629 else if (TREE_CODE (TREE_TYPE (field)) == COMPLEX_TYPE)
6630 {
6631 mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (field)));
6632 nregs = 2;
6633 }
6634 else
6635 nregs = 1;
6636
6637 regno = SPARC_FP_ARG_FIRST + this_slotno * 2;
6638 if (GET_MODE_SIZE (mode) <= 4 && (bitpos & 32) != 0)
6639 regno++;
6640 reg = gen_rtx_REG (mode, regno);
6641 pos = bitpos / BITS_PER_UNIT;
6642 XVECEXP (parms->ret, 0, parms->stack + parms->nregs)
6643 = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (pos));
6644 parms->nregs += 1;
6645 while (--nregs > 0)
6646 {
6647 regno += GET_MODE_SIZE (mode) / 4;
6648 reg = gen_rtx_REG (mode, regno);
6649 pos += GET_MODE_SIZE (mode);
6650 XVECEXP (parms->ret, 0, parms->stack + parms->nregs)
6651 = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (pos));
6652 parms->nregs += 1;
6653 }
6654 }
6655 else
6656 {
6657 if (parms->intoffset == -1)
6658 parms->intoffset = bitpos;
6659 }
6660 }
6661 }
6662 }
6663
6664 /* Used by function_arg and sparc_function_value_1 to implement the complex
6665 conventions of the 64-bit ABI for passing and returning structures.
6666 Return an expression valid as a return value for the FUNCTION_ARG
6667 and TARGET_FUNCTION_VALUE.
6668
6669 TYPE is the data type of the argument (as a tree).
6670 This is null for libcalls where that information may
6671 not be available.
6672 MODE is the argument's machine mode.
6673 SLOTNO is the index number of the argument's slot in the parameter array.
6674 NAMED is nonzero if this argument is a named parameter
6675 (otherwise it is an extra parameter matching an ellipsis).
6676 REGBASE is the regno of the base register for the parameter array. */
6677
6678 static rtx
function_arg_record_value(const_tree type,enum machine_mode mode,int slotno,int named,int regbase)6679 function_arg_record_value (const_tree type, enum machine_mode mode,
6680 int slotno, int named, int regbase)
6681 {
6682 HOST_WIDE_INT typesize = int_size_in_bytes (type);
6683 struct function_arg_record_value_parms parms;
6684 unsigned int nregs;
6685
6686 parms.ret = NULL_RTX;
6687 parms.slotno = slotno;
6688 parms.named = named;
6689 parms.regbase = regbase;
6690 parms.stack = 0;
6691
6692 /* Compute how many registers we need. */
6693 parms.nregs = 0;
6694 parms.intoffset = 0;
6695 function_arg_record_value_1 (type, 0, &parms, false);
6696
6697 /* Take into account pending integer fields. */
6698 if (parms.intoffset != -1)
6699 {
6700 unsigned int startbit, endbit;
6701 int intslots, this_slotno;
6702
6703 startbit = parms.intoffset & -BITS_PER_WORD;
6704 endbit = (typesize*BITS_PER_UNIT + BITS_PER_WORD - 1) & -BITS_PER_WORD;
6705 intslots = (endbit - startbit) / BITS_PER_WORD;
6706 this_slotno = slotno + parms.intoffset / BITS_PER_WORD;
6707
6708 if (intslots > 0 && intslots > SPARC_INT_ARG_MAX - this_slotno)
6709 {
6710 intslots = MAX (0, SPARC_INT_ARG_MAX - this_slotno);
6711 /* We need to pass this field on the stack. */
6712 parms.stack = 1;
6713 }
6714
6715 parms.nregs += intslots;
6716 }
6717 nregs = parms.nregs;
6718
6719 /* Allocate the vector and handle some annoying special cases. */
6720 if (nregs == 0)
6721 {
6722 /* ??? Empty structure has no value? Duh? */
6723 if (typesize <= 0)
6724 {
6725 /* Though there's nothing really to store, return a word register
6726 anyway so the rest of gcc doesn't go nuts. Returning a PARALLEL
6727 leads to breakage due to the fact that there are zero bytes to
6728 load. */
6729 return gen_rtx_REG (mode, regbase);
6730 }
6731 else
6732 {
6733 /* ??? C++ has structures with no fields, and yet a size. Give up
6734 for now and pass everything back in integer registers. */
6735 nregs = (typesize + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6736 }
6737 if (nregs + slotno > SPARC_INT_ARG_MAX)
6738 nregs = SPARC_INT_ARG_MAX - slotno;
6739 }
6740 gcc_assert (nregs != 0);
6741
6742 parms.ret = gen_rtx_PARALLEL (mode, rtvec_alloc (parms.stack + nregs));
6743
6744 /* If at least one field must be passed on the stack, generate
6745 (parallel [(expr_list (nil) ...) ...]) so that all fields will
6746 also be passed on the stack. We can't do much better because the
6747 semantics of TARGET_ARG_PARTIAL_BYTES doesn't handle the case
6748 of structures for which the fields passed exclusively in registers
6749 are not at the beginning of the structure. */
6750 if (parms.stack)
6751 XVECEXP (parms.ret, 0, 0)
6752 = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
6753
6754 /* Fill in the entries. */
6755 parms.nregs = 0;
6756 parms.intoffset = 0;
6757 function_arg_record_value_2 (type, 0, &parms, false);
6758 function_arg_record_value_3 (typesize * BITS_PER_UNIT, &parms);
6759
6760 gcc_assert (parms.nregs == nregs);
6761
6762 return parms.ret;
6763 }
6764
6765 /* Used by function_arg and sparc_function_value_1 to implement the conventions
6766 of the 64-bit ABI for passing and returning unions.
6767 Return an expression valid as a return value for the FUNCTION_ARG
6768 and TARGET_FUNCTION_VALUE.
6769
6770 SIZE is the size in bytes of the union.
6771 MODE is the argument's machine mode.
6772 REGNO is the hard register the union will be passed in. */
6773
6774 static rtx
function_arg_union_value(int size,enum machine_mode mode,int slotno,int regno)6775 function_arg_union_value (int size, enum machine_mode mode, int slotno,
6776 int regno)
6777 {
6778 int nwords = ROUND_ADVANCE (size), i;
6779 rtx regs;
6780
6781 /* See comment in previous function for empty structures. */
6782 if (nwords == 0)
6783 return gen_rtx_REG (mode, regno);
6784
6785 if (slotno == SPARC_INT_ARG_MAX - 1)
6786 nwords = 1;
6787
6788 regs = gen_rtx_PARALLEL (mode, rtvec_alloc (nwords));
6789
6790 for (i = 0; i < nwords; i++)
6791 {
6792 /* Unions are passed left-justified. */
6793 XVECEXP (regs, 0, i)
6794 = gen_rtx_EXPR_LIST (VOIDmode,
6795 gen_rtx_REG (word_mode, regno),
6796 GEN_INT (UNITS_PER_WORD * i));
6797 regno++;
6798 }
6799
6800 return regs;
6801 }
6802
6803 /* Used by function_arg and sparc_function_value_1 to implement the conventions
6804 for passing and returning large (BLKmode) vectors.
6805 Return an expression valid as a return value for the FUNCTION_ARG
6806 and TARGET_FUNCTION_VALUE.
6807
6808 SIZE is the size in bytes of the vector (at least 8 bytes).
6809 REGNO is the FP hard register the vector will be passed in. */
6810
6811 static rtx
function_arg_vector_value(int size,int regno)6812 function_arg_vector_value (int size, int regno)
6813 {
6814 int i, nregs = size / 8;
6815 rtx regs;
6816
6817 regs = gen_rtx_PARALLEL (BLKmode, rtvec_alloc (nregs));
6818
6819 for (i = 0; i < nregs; i++)
6820 {
6821 XVECEXP (regs, 0, i)
6822 = gen_rtx_EXPR_LIST (VOIDmode,
6823 gen_rtx_REG (DImode, regno + 2*i),
6824 GEN_INT (i*8));
6825 }
6826
6827 return regs;
6828 }
6829
6830 /* Determine where to put an argument to a function.
6831 Value is zero to push the argument on the stack,
6832 or a hard register in which to store the argument.
6833
6834 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6835 the preceding args and about the function being called.
6836 MODE is the argument's machine mode.
6837 TYPE is the data type of the argument (as a tree).
6838 This is null for libcalls where that information may
6839 not be available.
6840 NAMED is true if this argument is a named parameter
6841 (otherwise it is an extra parameter matching an ellipsis).
6842 INCOMING_P is false for TARGET_FUNCTION_ARG, true for
6843 TARGET_FUNCTION_INCOMING_ARG. */
6844
6845 static rtx
sparc_function_arg_1(cumulative_args_t cum_v,enum machine_mode mode,const_tree type,bool named,bool incoming_p)6846 sparc_function_arg_1 (cumulative_args_t cum_v, enum machine_mode mode,
6847 const_tree type, bool named, bool incoming_p)
6848 {
6849 const CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
6850
6851 int regbase = (incoming_p
6852 ? SPARC_INCOMING_INT_ARG_FIRST
6853 : SPARC_OUTGOING_INT_ARG_FIRST);
6854 int slotno, regno, padding;
6855 enum mode_class mclass = GET_MODE_CLASS (mode);
6856
6857 slotno = function_arg_slotno (cum, mode, type, named, incoming_p,
6858 ®no, &padding);
6859 if (slotno == -1)
6860 return 0;
6861
6862 /* Vector types deserve special treatment because they are polymorphic wrt
6863 their mode, depending upon whether VIS instructions are enabled. */
6864 if (type && TREE_CODE (type) == VECTOR_TYPE)
6865 {
6866 HOST_WIDE_INT size = int_size_in_bytes (type);
6867 gcc_assert ((TARGET_ARCH32 && size <= 8)
6868 || (TARGET_ARCH64 && size <= 16));
6869
6870 if (mode == BLKmode)
6871 return function_arg_vector_value (size,
6872 SPARC_FP_ARG_FIRST + 2*slotno);
6873 else
6874 mclass = MODE_FLOAT;
6875 }
6876
6877 if (TARGET_ARCH32)
6878 return gen_rtx_REG (mode, regno);
6879
6880 /* Structures up to 16 bytes in size are passed in arg slots on the stack
6881 and are promoted to registers if possible. */
6882 if (type && TREE_CODE (type) == RECORD_TYPE)
6883 {
6884 HOST_WIDE_INT size = int_size_in_bytes (type);
6885 gcc_assert (size <= 16);
6886
6887 return function_arg_record_value (type, mode, slotno, named, regbase);
6888 }
6889
6890 /* Unions up to 16 bytes in size are passed in integer registers. */
6891 else if (type && TREE_CODE (type) == UNION_TYPE)
6892 {
6893 HOST_WIDE_INT size = int_size_in_bytes (type);
6894 gcc_assert (size <= 16);
6895
6896 return function_arg_union_value (size, mode, slotno, regno);
6897 }
6898
6899 /* v9 fp args in reg slots beyond the int reg slots get passed in regs
6900 but also have the slot allocated for them.
6901 If no prototype is in scope fp values in register slots get passed
6902 in two places, either fp regs and int regs or fp regs and memory. */
6903 else if ((mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT)
6904 && SPARC_FP_REG_P (regno))
6905 {
6906 rtx reg = gen_rtx_REG (mode, regno);
6907 if (cum->prototype_p || cum->libcall_p)
6908 {
6909 /* "* 2" because fp reg numbers are recorded in 4 byte
6910 quantities. */
6911 #if 0
6912 /* ??? This will cause the value to be passed in the fp reg and
6913 in the stack. When a prototype exists we want to pass the
6914 value in the reg but reserve space on the stack. That's an
6915 optimization, and is deferred [for a bit]. */
6916 if ((regno - SPARC_FP_ARG_FIRST) >= SPARC_INT_ARG_MAX * 2)
6917 return gen_rtx_PARALLEL (mode,
6918 gen_rtvec (2,
6919 gen_rtx_EXPR_LIST (VOIDmode,
6920 NULL_RTX, const0_rtx),
6921 gen_rtx_EXPR_LIST (VOIDmode,
6922 reg, const0_rtx)));
6923 else
6924 #else
6925 /* ??? It seems that passing back a register even when past
6926 the area declared by REG_PARM_STACK_SPACE will allocate
6927 space appropriately, and will not copy the data onto the
6928 stack, exactly as we desire.
6929
6930 This is due to locate_and_pad_parm being called in
6931 expand_call whenever reg_parm_stack_space > 0, which
6932 while beneficial to our example here, would seem to be
6933 in error from what had been intended. Ho hum... -- r~ */
6934 #endif
6935 return reg;
6936 }
6937 else
6938 {
6939 rtx v0, v1;
6940
6941 if ((regno - SPARC_FP_ARG_FIRST) < SPARC_INT_ARG_MAX * 2)
6942 {
6943 int intreg;
6944
6945 /* On incoming, we don't need to know that the value
6946 is passed in %f0 and %i0, and it confuses other parts
6947 causing needless spillage even on the simplest cases. */
6948 if (incoming_p)
6949 return reg;
6950
6951 intreg = (SPARC_OUTGOING_INT_ARG_FIRST
6952 + (regno - SPARC_FP_ARG_FIRST) / 2);
6953
6954 v0 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx);
6955 v1 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_REG (mode, intreg),
6956 const0_rtx);
6957 return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1));
6958 }
6959 else
6960 {
6961 v0 = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
6962 v1 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx);
6963 return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1));
6964 }
6965 }
6966 }
6967
6968 /* All other aggregate types are passed in an integer register in a mode
6969 corresponding to the size of the type. */
6970 else if (type && AGGREGATE_TYPE_P (type))
6971 {
6972 HOST_WIDE_INT size = int_size_in_bytes (type);
6973 gcc_assert (size <= 16);
6974
6975 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
6976 }
6977
6978 return gen_rtx_REG (mode, regno);
6979 }
6980
6981 /* Handle the TARGET_FUNCTION_ARG target hook. */
6982
6983 static rtx
sparc_function_arg(cumulative_args_t cum,enum machine_mode mode,const_tree type,bool named)6984 sparc_function_arg (cumulative_args_t cum, enum machine_mode mode,
6985 const_tree type, bool named)
6986 {
6987 return sparc_function_arg_1 (cum, mode, type, named, false);
6988 }
6989
6990 /* Handle the TARGET_FUNCTION_INCOMING_ARG target hook. */
6991
6992 static rtx
sparc_function_incoming_arg(cumulative_args_t cum,enum machine_mode mode,const_tree type,bool named)6993 sparc_function_incoming_arg (cumulative_args_t cum, enum machine_mode mode,
6994 const_tree type, bool named)
6995 {
6996 return sparc_function_arg_1 (cum, mode, type, named, true);
6997 }
6998
6999 /* For sparc64, objects requiring 16 byte alignment are passed that way. */
7000
7001 static unsigned int
sparc_function_arg_boundary(enum machine_mode mode,const_tree type)7002 sparc_function_arg_boundary (enum machine_mode mode, const_tree type)
7003 {
7004 return ((TARGET_ARCH64
7005 && (GET_MODE_ALIGNMENT (mode) == 128
7006 || (type && TYPE_ALIGN (type) == 128)))
7007 ? 128
7008 : PARM_BOUNDARY);
7009 }
7010
7011 /* For an arg passed partly in registers and partly in memory,
7012 this is the number of bytes of registers used.
7013 For args passed entirely in registers or entirely in memory, zero.
7014
7015 Any arg that starts in the first 6 regs but won't entirely fit in them
7016 needs partial registers on v8. On v9, structures with integer
7017 values in arg slots 5,6 will be passed in %o5 and SP+176, and complex fp
7018 values that begin in the last fp reg [where "last fp reg" varies with the
7019 mode] will be split between that reg and memory. */
7020
7021 static int
sparc_arg_partial_bytes(cumulative_args_t cum,enum machine_mode mode,tree type,bool named)7022 sparc_arg_partial_bytes (cumulative_args_t cum, enum machine_mode mode,
7023 tree type, bool named)
7024 {
7025 int slotno, regno, padding;
7026
7027 /* We pass false for incoming_p here, it doesn't matter. */
7028 slotno = function_arg_slotno (get_cumulative_args (cum), mode, type, named,
7029 false, ®no, &padding);
7030
7031 if (slotno == -1)
7032 return 0;
7033
7034 if (TARGET_ARCH32)
7035 {
7036 if ((slotno + (mode == BLKmode
7037 ? ROUND_ADVANCE (int_size_in_bytes (type))
7038 : ROUND_ADVANCE (GET_MODE_SIZE (mode))))
7039 > SPARC_INT_ARG_MAX)
7040 return (SPARC_INT_ARG_MAX - slotno) * UNITS_PER_WORD;
7041 }
7042 else
7043 {
7044 /* We are guaranteed by pass_by_reference that the size of the
7045 argument is not greater than 16 bytes, so we only need to return
7046 one word if the argument is partially passed in registers. */
7047
7048 if (type && AGGREGATE_TYPE_P (type))
7049 {
7050 int size = int_size_in_bytes (type);
7051
7052 if (size > UNITS_PER_WORD
7053 && slotno == SPARC_INT_ARG_MAX - 1)
7054 return UNITS_PER_WORD;
7055 }
7056 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_INT
7057 || (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
7058 && ! (TARGET_FPU && named)))
7059 {
7060 /* The complex types are passed as packed types. */
7061 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD
7062 && slotno == SPARC_INT_ARG_MAX - 1)
7063 return UNITS_PER_WORD;
7064 }
7065 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
7066 {
7067 if ((slotno + GET_MODE_SIZE (mode) / UNITS_PER_WORD)
7068 > SPARC_FP_ARG_MAX)
7069 return UNITS_PER_WORD;
7070 }
7071 }
7072
7073 return 0;
7074 }
7075
7076 /* Handle the TARGET_PASS_BY_REFERENCE target hook.
7077 Specify whether to pass the argument by reference. */
7078
7079 static bool
sparc_pass_by_reference(cumulative_args_t cum ATTRIBUTE_UNUSED,enum machine_mode mode,const_tree type,bool named ATTRIBUTE_UNUSED)7080 sparc_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
7081 enum machine_mode mode, const_tree type,
7082 bool named ATTRIBUTE_UNUSED)
7083 {
7084 if (TARGET_ARCH32)
7085 /* Original SPARC 32-bit ABI says that structures and unions,
7086 and quad-precision floats are passed by reference. For Pascal,
7087 also pass arrays by reference. All other base types are passed
7088 in registers.
7089
7090 Extended ABI (as implemented by the Sun compiler) says that all
7091 complex floats are passed by reference. Pass complex integers
7092 in registers up to 8 bytes. More generally, enforce the 2-word
7093 cap for passing arguments in registers.
7094
7095 Vector ABI (as implemented by the Sun VIS SDK) says that vector
7096 integers are passed like floats of the same size, that is in
7097 registers up to 8 bytes. Pass all vector floats by reference
7098 like structure and unions. */
7099 return ((type && (AGGREGATE_TYPE_P (type) || VECTOR_FLOAT_TYPE_P (type)))
7100 || mode == SCmode
7101 /* Catch CDImode, TFmode, DCmode and TCmode. */
7102 || GET_MODE_SIZE (mode) > 8
7103 || (type
7104 && TREE_CODE (type) == VECTOR_TYPE
7105 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8));
7106 else
7107 /* Original SPARC 64-bit ABI says that structures and unions
7108 smaller than 16 bytes are passed in registers, as well as
7109 all other base types.
7110
7111 Extended ABI (as implemented by the Sun compiler) says that
7112 complex floats are passed in registers up to 16 bytes. Pass
7113 all complex integers in registers up to 16 bytes. More generally,
7114 enforce the 2-word cap for passing arguments in registers.
7115
7116 Vector ABI (as implemented by the Sun VIS SDK) says that vector
7117 integers are passed like floats of the same size, that is in
7118 registers (up to 16 bytes). Pass all vector floats like structure
7119 and unions. */
7120 return ((type
7121 && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == VECTOR_TYPE)
7122 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 16)
7123 /* Catch CTImode and TCmode. */
7124 || GET_MODE_SIZE (mode) > 16);
7125 }
7126
7127 /* Handle the TARGET_FUNCTION_ARG_ADVANCE hook.
7128 Update the data in CUM to advance over an argument
7129 of mode MODE and data type TYPE.
7130 TYPE is null for libcalls where that information may not be available. */
7131
7132 static void
sparc_function_arg_advance(cumulative_args_t cum_v,enum machine_mode mode,const_tree type,bool named)7133 sparc_function_arg_advance (cumulative_args_t cum_v, enum machine_mode mode,
7134 const_tree type, bool named)
7135 {
7136 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7137 int regno, padding;
7138
7139 /* We pass false for incoming_p here, it doesn't matter. */
7140 function_arg_slotno (cum, mode, type, named, false, ®no, &padding);
7141
7142 /* If argument requires leading padding, add it. */
7143 cum->words += padding;
7144
7145 if (TARGET_ARCH32)
7146 {
7147 cum->words += (mode != BLKmode
7148 ? ROUND_ADVANCE (GET_MODE_SIZE (mode))
7149 : ROUND_ADVANCE (int_size_in_bytes (type)));
7150 }
7151 else
7152 {
7153 if (type && AGGREGATE_TYPE_P (type))
7154 {
7155 int size = int_size_in_bytes (type);
7156
7157 if (size <= 8)
7158 ++cum->words;
7159 else if (size <= 16)
7160 cum->words += 2;
7161 else /* passed by reference */
7162 ++cum->words;
7163 }
7164 else
7165 {
7166 cum->words += (mode != BLKmode
7167 ? ROUND_ADVANCE (GET_MODE_SIZE (mode))
7168 : ROUND_ADVANCE (int_size_in_bytes (type)));
7169 }
7170 }
7171 }
7172
7173 /* Handle the FUNCTION_ARG_PADDING macro.
7174 For the 64 bit ABI structs are always stored left shifted in their
7175 argument slot. */
7176
7177 enum direction
function_arg_padding(enum machine_mode mode,const_tree type)7178 function_arg_padding (enum machine_mode mode, const_tree type)
7179 {
7180 if (TARGET_ARCH64 && type != 0 && AGGREGATE_TYPE_P (type))
7181 return upward;
7182
7183 /* Fall back to the default. */
7184 return DEFAULT_FUNCTION_ARG_PADDING (mode, type);
7185 }
7186
7187 /* Handle the TARGET_RETURN_IN_MEMORY target hook.
7188 Specify whether to return the return value in memory. */
7189
7190 static bool
sparc_return_in_memory(const_tree type,const_tree fntype ATTRIBUTE_UNUSED)7191 sparc_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
7192 {
7193 if (TARGET_ARCH32)
7194 /* Original SPARC 32-bit ABI says that structures and unions,
7195 and quad-precision floats are returned in memory. All other
7196 base types are returned in registers.
7197
7198 Extended ABI (as implemented by the Sun compiler) says that
7199 all complex floats are returned in registers (8 FP registers
7200 at most for '_Complex long double'). Return all complex integers
7201 in registers (4 at most for '_Complex long long').
7202
7203 Vector ABI (as implemented by the Sun VIS SDK) says that vector
7204 integers are returned like floats of the same size, that is in
7205 registers up to 8 bytes and in memory otherwise. Return all
7206 vector floats in memory like structure and unions; note that
7207 they always have BLKmode like the latter. */
7208 return (TYPE_MODE (type) == BLKmode
7209 || TYPE_MODE (type) == TFmode
7210 || (TREE_CODE (type) == VECTOR_TYPE
7211 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8));
7212 else
7213 /* Original SPARC 64-bit ABI says that structures and unions
7214 smaller than 32 bytes are returned in registers, as well as
7215 all other base types.
7216
7217 Extended ABI (as implemented by the Sun compiler) says that all
7218 complex floats are returned in registers (8 FP registers at most
7219 for '_Complex long double'). Return all complex integers in
7220 registers (4 at most for '_Complex TItype').
7221
7222 Vector ABI (as implemented by the Sun VIS SDK) says that vector
7223 integers are returned like floats of the same size, that is in
7224 registers. Return all vector floats like structure and unions;
7225 note that they always have BLKmode like the latter. */
7226 return (TYPE_MODE (type) == BLKmode
7227 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 32);
7228 }
7229
7230 /* Handle the TARGET_STRUCT_VALUE target hook.
7231 Return where to find the structure return value address. */
7232
7233 static rtx
sparc_struct_value_rtx(tree fndecl,int incoming)7234 sparc_struct_value_rtx (tree fndecl, int incoming)
7235 {
7236 if (TARGET_ARCH64)
7237 return 0;
7238 else
7239 {
7240 rtx mem;
7241
7242 if (incoming)
7243 mem = gen_frame_mem (Pmode, plus_constant (Pmode, frame_pointer_rtx,
7244 STRUCT_VALUE_OFFSET));
7245 else
7246 mem = gen_frame_mem (Pmode, plus_constant (Pmode, stack_pointer_rtx,
7247 STRUCT_VALUE_OFFSET));
7248
7249 /* Only follow the SPARC ABI for fixed-size structure returns.
7250 Variable size structure returns are handled per the normal
7251 procedures in GCC. This is enabled by -mstd-struct-return */
7252 if (incoming == 2
7253 && sparc_std_struct_return
7254 && TYPE_SIZE_UNIT (TREE_TYPE (fndecl))
7255 && TREE_CODE (TYPE_SIZE_UNIT (TREE_TYPE (fndecl))) == INTEGER_CST)
7256 {
7257 /* We must check and adjust the return address, as it is
7258 optional as to whether the return object is really
7259 provided. */
7260 rtx ret_reg = gen_rtx_REG (Pmode, 31);
7261 rtx scratch = gen_reg_rtx (SImode);
7262 rtx endlab = gen_label_rtx ();
7263
7264 /* Calculate the return object size */
7265 tree size = TYPE_SIZE_UNIT (TREE_TYPE (fndecl));
7266 rtx size_rtx = GEN_INT (TREE_INT_CST_LOW (size) & 0xfff);
7267 /* Construct a temporary return value */
7268 rtx temp_val
7269 = assign_stack_local (Pmode, TREE_INT_CST_LOW (size), 0);
7270
7271 /* Implement SPARC 32-bit psABI callee return struct checking:
7272
7273 Fetch the instruction where we will return to and see if
7274 it's an unimp instruction (the most significant 10 bits
7275 will be zero). */
7276 emit_move_insn (scratch, gen_rtx_MEM (SImode,
7277 plus_constant (Pmode,
7278 ret_reg, 8)));
7279 /* Assume the size is valid and pre-adjust */
7280 emit_insn (gen_add3_insn (ret_reg, ret_reg, GEN_INT (4)));
7281 emit_cmp_and_jump_insns (scratch, size_rtx, EQ, const0_rtx, SImode,
7282 0, endlab);
7283 emit_insn (gen_sub3_insn (ret_reg, ret_reg, GEN_INT (4)));
7284 /* Write the address of the memory pointed to by temp_val into
7285 the memory pointed to by mem */
7286 emit_move_insn (mem, XEXP (temp_val, 0));
7287 emit_label (endlab);
7288 }
7289
7290 return mem;
7291 }
7292 }
7293
7294 /* Handle TARGET_FUNCTION_VALUE, and TARGET_LIBCALL_VALUE target hook.
7295 For v9, function return values are subject to the same rules as arguments,
7296 except that up to 32 bytes may be returned in registers. */
7297
7298 static rtx
sparc_function_value_1(const_tree type,enum machine_mode mode,bool outgoing)7299 sparc_function_value_1 (const_tree type, enum machine_mode mode,
7300 bool outgoing)
7301 {
7302 /* Beware that the two values are swapped here wrt function_arg. */
7303 int regbase = (outgoing
7304 ? SPARC_INCOMING_INT_ARG_FIRST
7305 : SPARC_OUTGOING_INT_ARG_FIRST);
7306 enum mode_class mclass = GET_MODE_CLASS (mode);
7307 int regno;
7308
7309 /* Vector types deserve special treatment because they are polymorphic wrt
7310 their mode, depending upon whether VIS instructions are enabled. */
7311 if (type && TREE_CODE (type) == VECTOR_TYPE)
7312 {
7313 HOST_WIDE_INT size = int_size_in_bytes (type);
7314 gcc_assert ((TARGET_ARCH32 && size <= 8)
7315 || (TARGET_ARCH64 && size <= 32));
7316
7317 if (mode == BLKmode)
7318 return function_arg_vector_value (size,
7319 SPARC_FP_ARG_FIRST);
7320 else
7321 mclass = MODE_FLOAT;
7322 }
7323
7324 if (TARGET_ARCH64 && type)
7325 {
7326 /* Structures up to 32 bytes in size are returned in registers. */
7327 if (TREE_CODE (type) == RECORD_TYPE)
7328 {
7329 HOST_WIDE_INT size = int_size_in_bytes (type);
7330 gcc_assert (size <= 32);
7331
7332 return function_arg_record_value (type, mode, 0, 1, regbase);
7333 }
7334
7335 /* Unions up to 32 bytes in size are returned in integer registers. */
7336 else if (TREE_CODE (type) == UNION_TYPE)
7337 {
7338 HOST_WIDE_INT size = int_size_in_bytes (type);
7339 gcc_assert (size <= 32);
7340
7341 return function_arg_union_value (size, mode, 0, regbase);
7342 }
7343
7344 /* Objects that require it are returned in FP registers. */
7345 else if (mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT)
7346 ;
7347
7348 /* All other aggregate types are returned in an integer register in a
7349 mode corresponding to the size of the type. */
7350 else if (AGGREGATE_TYPE_P (type))
7351 {
7352 /* All other aggregate types are passed in an integer register
7353 in a mode corresponding to the size of the type. */
7354 HOST_WIDE_INT size = int_size_in_bytes (type);
7355 gcc_assert (size <= 32);
7356
7357 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
7358
7359 /* ??? We probably should have made the same ABI change in
7360 3.4.0 as the one we made for unions. The latter was
7361 required by the SCD though, while the former is not
7362 specified, so we favored compatibility and efficiency.
7363
7364 Now we're stuck for aggregates larger than 16 bytes,
7365 because OImode vanished in the meantime. Let's not
7366 try to be unduly clever, and simply follow the ABI
7367 for unions in that case. */
7368 if (mode == BLKmode)
7369 return function_arg_union_value (size, mode, 0, regbase);
7370 else
7371 mclass = MODE_INT;
7372 }
7373
7374 /* We should only have pointer and integer types at this point. This
7375 must match sparc_promote_function_mode. */
7376 else if (mclass == MODE_INT && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
7377 mode = word_mode;
7378 }
7379
7380 /* We should only have pointer and integer types at this point. This must
7381 match sparc_promote_function_mode. */
7382 else if (TARGET_ARCH32
7383 && mclass == MODE_INT
7384 && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
7385 mode = word_mode;
7386
7387 if ((mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT) && TARGET_FPU)
7388 regno = SPARC_FP_ARG_FIRST;
7389 else
7390 regno = regbase;
7391
7392 return gen_rtx_REG (mode, regno);
7393 }
7394
7395 /* Handle TARGET_FUNCTION_VALUE.
7396 On the SPARC, the value is found in the first "output" register, but the
7397 called function leaves it in the first "input" register. */
7398
7399 static rtx
sparc_function_value(const_tree valtype,const_tree fn_decl_or_type ATTRIBUTE_UNUSED,bool outgoing)7400 sparc_function_value (const_tree valtype,
7401 const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
7402 bool outgoing)
7403 {
7404 return sparc_function_value_1 (valtype, TYPE_MODE (valtype), outgoing);
7405 }
7406
7407 /* Handle TARGET_LIBCALL_VALUE. */
7408
7409 static rtx
sparc_libcall_value(enum machine_mode mode,const_rtx fun ATTRIBUTE_UNUSED)7410 sparc_libcall_value (enum machine_mode mode,
7411 const_rtx fun ATTRIBUTE_UNUSED)
7412 {
7413 return sparc_function_value_1 (NULL_TREE, mode, false);
7414 }
7415
7416 /* Handle FUNCTION_VALUE_REGNO_P.
7417 On the SPARC, the first "output" reg is used for integer values, and the
7418 first floating point register is used for floating point values. */
7419
7420 static bool
sparc_function_value_regno_p(const unsigned int regno)7421 sparc_function_value_regno_p (const unsigned int regno)
7422 {
7423 return (regno == 8 || regno == 32);
7424 }
7425
7426 /* Do what is necessary for `va_start'. We look at the current function
7427 to determine if stdarg or varargs is used and return the address of
7428 the first unnamed parameter. */
7429
7430 static rtx
sparc_builtin_saveregs(void)7431 sparc_builtin_saveregs (void)
7432 {
7433 int first_reg = crtl->args.info.words;
7434 rtx address;
7435 int regno;
7436
7437 for (regno = first_reg; regno < SPARC_INT_ARG_MAX; regno++)
7438 emit_move_insn (gen_rtx_MEM (word_mode,
7439 gen_rtx_PLUS (Pmode,
7440 frame_pointer_rtx,
7441 GEN_INT (FIRST_PARM_OFFSET (0)
7442 + (UNITS_PER_WORD
7443 * regno)))),
7444 gen_rtx_REG (word_mode,
7445 SPARC_INCOMING_INT_ARG_FIRST + regno));
7446
7447 address = gen_rtx_PLUS (Pmode,
7448 frame_pointer_rtx,
7449 GEN_INT (FIRST_PARM_OFFSET (0)
7450 + UNITS_PER_WORD * first_reg));
7451
7452 return address;
7453 }
7454
7455 /* Implement `va_start' for stdarg. */
7456
7457 static void
sparc_va_start(tree valist,rtx nextarg)7458 sparc_va_start (tree valist, rtx nextarg)
7459 {
7460 nextarg = expand_builtin_saveregs ();
7461 std_expand_builtin_va_start (valist, nextarg);
7462 }
7463
7464 /* Implement `va_arg' for stdarg. */
7465
7466 static tree
sparc_gimplify_va_arg(tree valist,tree type,gimple_seq * pre_p,gimple_seq * post_p)7467 sparc_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
7468 gimple_seq *post_p)
7469 {
7470 HOST_WIDE_INT size, rsize, align;
7471 tree addr, incr;
7472 bool indirect;
7473 tree ptrtype = build_pointer_type (type);
7474
7475 if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
7476 {
7477 indirect = true;
7478 size = rsize = UNITS_PER_WORD;
7479 align = 0;
7480 }
7481 else
7482 {
7483 indirect = false;
7484 size = int_size_in_bytes (type);
7485 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
7486 align = 0;
7487
7488 if (TARGET_ARCH64)
7489 {
7490 /* For SPARC64, objects requiring 16-byte alignment get it. */
7491 if (TYPE_ALIGN (type) >= 2 * (unsigned) BITS_PER_WORD)
7492 align = 2 * UNITS_PER_WORD;
7493
7494 /* SPARC-V9 ABI states that structures up to 16 bytes in size
7495 are left-justified in their slots. */
7496 if (AGGREGATE_TYPE_P (type))
7497 {
7498 if (size == 0)
7499 size = rsize = UNITS_PER_WORD;
7500 else
7501 size = rsize;
7502 }
7503 }
7504 }
7505
7506 incr = valist;
7507 if (align)
7508 {
7509 incr = fold_build_pointer_plus_hwi (incr, align - 1);
7510 incr = fold_convert (sizetype, incr);
7511 incr = fold_build2 (BIT_AND_EXPR, sizetype, incr,
7512 size_int (-align));
7513 incr = fold_convert (ptr_type_node, incr);
7514 }
7515
7516 gimplify_expr (&incr, pre_p, post_p, is_gimple_val, fb_rvalue);
7517 addr = incr;
7518
7519 if (BYTES_BIG_ENDIAN && size < rsize)
7520 addr = fold_build_pointer_plus_hwi (incr, rsize - size);
7521
7522 if (indirect)
7523 {
7524 addr = fold_convert (build_pointer_type (ptrtype), addr);
7525 addr = build_va_arg_indirect_ref (addr);
7526 }
7527
7528 /* If the address isn't aligned properly for the type, we need a temporary.
7529 FIXME: This is inefficient, usually we can do this in registers. */
7530 else if (align == 0 && TYPE_ALIGN (type) > BITS_PER_WORD)
7531 {
7532 tree tmp = create_tmp_var (type, "va_arg_tmp");
7533 tree dest_addr = build_fold_addr_expr (tmp);
7534 tree copy = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
7535 3, dest_addr, addr, size_int (rsize));
7536 TREE_ADDRESSABLE (tmp) = 1;
7537 gimplify_and_add (copy, pre_p);
7538 addr = dest_addr;
7539 }
7540
7541 else
7542 addr = fold_convert (ptrtype, addr);
7543
7544 incr = fold_build_pointer_plus_hwi (incr, rsize);
7545 gimplify_assign (valist, incr, post_p);
7546
7547 return build_va_arg_indirect_ref (addr);
7548 }
7549
7550 /* Implement the TARGET_VECTOR_MODE_SUPPORTED_P target hook.
7551 Specify whether the vector mode is supported by the hardware. */
7552
7553 static bool
sparc_vector_mode_supported_p(enum machine_mode mode)7554 sparc_vector_mode_supported_p (enum machine_mode mode)
7555 {
7556 return TARGET_VIS && VECTOR_MODE_P (mode) ? true : false;
7557 }
7558
7559 /* Implement the TARGET_VECTORIZE_PREFERRED_SIMD_MODE target hook. */
7560
7561 static enum machine_mode
sparc_preferred_simd_mode(enum machine_mode mode)7562 sparc_preferred_simd_mode (enum machine_mode mode)
7563 {
7564 if (TARGET_VIS)
7565 switch (mode)
7566 {
7567 case SImode:
7568 return V2SImode;
7569 case HImode:
7570 return V4HImode;
7571 case QImode:
7572 return V8QImode;
7573
7574 default:;
7575 }
7576
7577 return word_mode;
7578 }
7579
7580 /* Return the string to output an unconditional branch to LABEL, which is
7581 the operand number of the label.
7582
7583 DEST is the destination insn (i.e. the label), INSN is the source. */
7584
7585 const char *
output_ubranch(rtx dest,rtx insn)7586 output_ubranch (rtx dest, rtx insn)
7587 {
7588 static char string[64];
7589 bool v9_form = false;
7590 int delta;
7591 char *p;
7592
7593 /* Even if we are trying to use cbcond for this, evaluate
7594 whether we can use V9 branches as our backup plan. */
7595
7596 delta = 5000000;
7597 if (INSN_ADDRESSES_SET_P ())
7598 delta = (INSN_ADDRESSES (INSN_UID (dest))
7599 - INSN_ADDRESSES (INSN_UID (insn)));
7600
7601 /* Leave some instructions for "slop". */
7602 if (TARGET_V9 && delta >= -260000 && delta < 260000)
7603 v9_form = true;
7604
7605 if (TARGET_CBCOND)
7606 {
7607 bool emit_nop = emit_cbcond_nop (insn);
7608 bool far = false;
7609 const char *rval;
7610
7611 if (delta < -500 || delta > 500)
7612 far = true;
7613
7614 if (far)
7615 {
7616 if (v9_form)
7617 rval = "ba,a,pt\t%%xcc, %l0";
7618 else
7619 rval = "b,a\t%l0";
7620 }
7621 else
7622 {
7623 if (emit_nop)
7624 rval = "cwbe\t%%g0, %%g0, %l0\n\tnop";
7625 else
7626 rval = "cwbe\t%%g0, %%g0, %l0";
7627 }
7628 return rval;
7629 }
7630
7631 if (v9_form)
7632 strcpy (string, "ba%*,pt\t%%xcc, ");
7633 else
7634 strcpy (string, "b%*\t");
7635
7636 p = strchr (string, '\0');
7637 *p++ = '%';
7638 *p++ = 'l';
7639 *p++ = '0';
7640 *p++ = '%';
7641 *p++ = '(';
7642 *p = '\0';
7643
7644 return string;
7645 }
7646
7647 /* Return the string to output a conditional branch to LABEL, which is
7648 the operand number of the label. OP is the conditional expression.
7649 XEXP (OP, 0) is assumed to be a condition code register (integer or
7650 floating point) and its mode specifies what kind of comparison we made.
7651
7652 DEST is the destination insn (i.e. the label), INSN is the source.
7653
7654 REVERSED is nonzero if we should reverse the sense of the comparison.
7655
7656 ANNUL is nonzero if we should generate an annulling branch. */
7657
7658 const char *
output_cbranch(rtx op,rtx dest,int label,int reversed,int annul,rtx insn)7659 output_cbranch (rtx op, rtx dest, int label, int reversed, int annul,
7660 rtx insn)
7661 {
7662 static char string[64];
7663 enum rtx_code code = GET_CODE (op);
7664 rtx cc_reg = XEXP (op, 0);
7665 enum machine_mode mode = GET_MODE (cc_reg);
7666 const char *labelno, *branch;
7667 int spaces = 8, far;
7668 char *p;
7669
7670 /* v9 branches are limited to +-1MB. If it is too far away,
7671 change
7672
7673 bne,pt %xcc, .LC30
7674
7675 to
7676
7677 be,pn %xcc, .+12
7678 nop
7679 ba .LC30
7680
7681 and
7682
7683 fbne,a,pn %fcc2, .LC29
7684
7685 to
7686
7687 fbe,pt %fcc2, .+16
7688 nop
7689 ba .LC29 */
7690
7691 far = TARGET_V9 && (get_attr_length (insn) >= 3);
7692 if (reversed ^ far)
7693 {
7694 /* Reversal of FP compares takes care -- an ordered compare
7695 becomes an unordered compare and vice versa. */
7696 if (mode == CCFPmode || mode == CCFPEmode)
7697 code = reverse_condition_maybe_unordered (code);
7698 else
7699 code = reverse_condition (code);
7700 }
7701
7702 /* Start by writing the branch condition. */
7703 if (mode == CCFPmode || mode == CCFPEmode)
7704 {
7705 switch (code)
7706 {
7707 case NE:
7708 branch = "fbne";
7709 break;
7710 case EQ:
7711 branch = "fbe";
7712 break;
7713 case GE:
7714 branch = "fbge";
7715 break;
7716 case GT:
7717 branch = "fbg";
7718 break;
7719 case LE:
7720 branch = "fble";
7721 break;
7722 case LT:
7723 branch = "fbl";
7724 break;
7725 case UNORDERED:
7726 branch = "fbu";
7727 break;
7728 case ORDERED:
7729 branch = "fbo";
7730 break;
7731 case UNGT:
7732 branch = "fbug";
7733 break;
7734 case UNLT:
7735 branch = "fbul";
7736 break;
7737 case UNEQ:
7738 branch = "fbue";
7739 break;
7740 case UNGE:
7741 branch = "fbuge";
7742 break;
7743 case UNLE:
7744 branch = "fbule";
7745 break;
7746 case LTGT:
7747 branch = "fblg";
7748 break;
7749
7750 default:
7751 gcc_unreachable ();
7752 }
7753
7754 /* ??? !v9: FP branches cannot be preceded by another floating point
7755 insn. Because there is currently no concept of pre-delay slots,
7756 we can fix this only by always emitting a nop before a floating
7757 point branch. */
7758
7759 string[0] = '\0';
7760 if (! TARGET_V9)
7761 strcpy (string, "nop\n\t");
7762 strcat (string, branch);
7763 }
7764 else
7765 {
7766 switch (code)
7767 {
7768 case NE:
7769 branch = "bne";
7770 break;
7771 case EQ:
7772 branch = "be";
7773 break;
7774 case GE:
7775 if (mode == CC_NOOVmode || mode == CCX_NOOVmode)
7776 branch = "bpos";
7777 else
7778 branch = "bge";
7779 break;
7780 case GT:
7781 branch = "bg";
7782 break;
7783 case LE:
7784 branch = "ble";
7785 break;
7786 case LT:
7787 if (mode == CC_NOOVmode || mode == CCX_NOOVmode)
7788 branch = "bneg";
7789 else
7790 branch = "bl";
7791 break;
7792 case GEU:
7793 branch = "bgeu";
7794 break;
7795 case GTU:
7796 branch = "bgu";
7797 break;
7798 case LEU:
7799 branch = "bleu";
7800 break;
7801 case LTU:
7802 branch = "blu";
7803 break;
7804
7805 default:
7806 gcc_unreachable ();
7807 }
7808 strcpy (string, branch);
7809 }
7810 spaces -= strlen (branch);
7811 p = strchr (string, '\0');
7812
7813 /* Now add the annulling, the label, and a possible noop. */
7814 if (annul && ! far)
7815 {
7816 strcpy (p, ",a");
7817 p += 2;
7818 spaces -= 2;
7819 }
7820
7821 if (TARGET_V9)
7822 {
7823 rtx note;
7824 int v8 = 0;
7825
7826 if (! far && insn && INSN_ADDRESSES_SET_P ())
7827 {
7828 int delta = (INSN_ADDRESSES (INSN_UID (dest))
7829 - INSN_ADDRESSES (INSN_UID (insn)));
7830 /* Leave some instructions for "slop". */
7831 if (delta < -260000 || delta >= 260000)
7832 v8 = 1;
7833 }
7834
7835 if (mode == CCFPmode || mode == CCFPEmode)
7836 {
7837 static char v9_fcc_labelno[] = "%%fccX, ";
7838 /* Set the char indicating the number of the fcc reg to use. */
7839 v9_fcc_labelno[5] = REGNO (cc_reg) - SPARC_FIRST_V9_FCC_REG + '0';
7840 labelno = v9_fcc_labelno;
7841 if (v8)
7842 {
7843 gcc_assert (REGNO (cc_reg) == SPARC_FCC_REG);
7844 labelno = "";
7845 }
7846 }
7847 else if (mode == CCXmode || mode == CCX_NOOVmode)
7848 {
7849 labelno = "%%xcc, ";
7850 gcc_assert (! v8);
7851 }
7852 else
7853 {
7854 labelno = "%%icc, ";
7855 if (v8)
7856 labelno = "";
7857 }
7858
7859 if (*labelno && insn && (note = find_reg_note (insn, REG_BR_PROB, NULL_RTX)))
7860 {
7861 strcpy (p,
7862 ((XINT (note, 0) >= REG_BR_PROB_BASE / 2) ^ far)
7863 ? ",pt" : ",pn");
7864 p += 3;
7865 spaces -= 3;
7866 }
7867 }
7868 else
7869 labelno = "";
7870
7871 if (spaces > 0)
7872 *p++ = '\t';
7873 else
7874 *p++ = ' ';
7875 strcpy (p, labelno);
7876 p = strchr (p, '\0');
7877 if (far)
7878 {
7879 strcpy (p, ".+12\n\t nop\n\tb\t");
7880 /* Skip the next insn if requested or
7881 if we know that it will be a nop. */
7882 if (annul || ! final_sequence)
7883 p[3] = '6';
7884 p += 14;
7885 }
7886 *p++ = '%';
7887 *p++ = 'l';
7888 *p++ = label + '0';
7889 *p++ = '%';
7890 *p++ = '#';
7891 *p = '\0';
7892
7893 return string;
7894 }
7895
7896 /* Emit a library call comparison between floating point X and Y.
7897 COMPARISON is the operator to compare with (EQ, NE, GT, etc).
7898 Return the new operator to be used in the comparison sequence.
7899
7900 TARGET_ARCH64 uses _Qp_* functions, which use pointers to TFmode
7901 values as arguments instead of the TFmode registers themselves,
7902 that's why we cannot call emit_float_lib_cmp. */
7903
7904 rtx
sparc_emit_float_lib_cmp(rtx x,rtx y,enum rtx_code comparison)7905 sparc_emit_float_lib_cmp (rtx x, rtx y, enum rtx_code comparison)
7906 {
7907 const char *qpfunc;
7908 rtx slot0, slot1, result, tem, tem2, libfunc;
7909 enum machine_mode mode;
7910 enum rtx_code new_comparison;
7911
7912 switch (comparison)
7913 {
7914 case EQ:
7915 qpfunc = (TARGET_ARCH64 ? "_Qp_feq" : "_Q_feq");
7916 break;
7917
7918 case NE:
7919 qpfunc = (TARGET_ARCH64 ? "_Qp_fne" : "_Q_fne");
7920 break;
7921
7922 case GT:
7923 qpfunc = (TARGET_ARCH64 ? "_Qp_fgt" : "_Q_fgt");
7924 break;
7925
7926 case GE:
7927 qpfunc = (TARGET_ARCH64 ? "_Qp_fge" : "_Q_fge");
7928 break;
7929
7930 case LT:
7931 qpfunc = (TARGET_ARCH64 ? "_Qp_flt" : "_Q_flt");
7932 break;
7933
7934 case LE:
7935 qpfunc = (TARGET_ARCH64 ? "_Qp_fle" : "_Q_fle");
7936 break;
7937
7938 case ORDERED:
7939 case UNORDERED:
7940 case UNGT:
7941 case UNLT:
7942 case UNEQ:
7943 case UNGE:
7944 case UNLE:
7945 case LTGT:
7946 qpfunc = (TARGET_ARCH64 ? "_Qp_cmp" : "_Q_cmp");
7947 break;
7948
7949 default:
7950 gcc_unreachable ();
7951 }
7952
7953 if (TARGET_ARCH64)
7954 {
7955 if (MEM_P (x))
7956 {
7957 tree expr = MEM_EXPR (x);
7958 if (expr)
7959 mark_addressable (expr);
7960 slot0 = x;
7961 }
7962 else
7963 {
7964 slot0 = assign_stack_temp (TFmode, GET_MODE_SIZE(TFmode));
7965 emit_move_insn (slot0, x);
7966 }
7967
7968 if (MEM_P (y))
7969 {
7970 tree expr = MEM_EXPR (y);
7971 if (expr)
7972 mark_addressable (expr);
7973 slot1 = y;
7974 }
7975 else
7976 {
7977 slot1 = assign_stack_temp (TFmode, GET_MODE_SIZE(TFmode));
7978 emit_move_insn (slot1, y);
7979 }
7980
7981 libfunc = gen_rtx_SYMBOL_REF (Pmode, qpfunc);
7982 emit_library_call (libfunc, LCT_NORMAL,
7983 DImode, 2,
7984 XEXP (slot0, 0), Pmode,
7985 XEXP (slot1, 0), Pmode);
7986 mode = DImode;
7987 }
7988 else
7989 {
7990 libfunc = gen_rtx_SYMBOL_REF (Pmode, qpfunc);
7991 emit_library_call (libfunc, LCT_NORMAL,
7992 SImode, 2,
7993 x, TFmode, y, TFmode);
7994 mode = SImode;
7995 }
7996
7997
7998 /* Immediately move the result of the libcall into a pseudo
7999 register so reload doesn't clobber the value if it needs
8000 the return register for a spill reg. */
8001 result = gen_reg_rtx (mode);
8002 emit_move_insn (result, hard_libcall_value (mode, libfunc));
8003
8004 switch (comparison)
8005 {
8006 default:
8007 return gen_rtx_NE (VOIDmode, result, const0_rtx);
8008 case ORDERED:
8009 case UNORDERED:
8010 new_comparison = (comparison == UNORDERED ? EQ : NE);
8011 return gen_rtx_fmt_ee (new_comparison, VOIDmode, result, GEN_INT(3));
8012 case UNGT:
8013 case UNGE:
8014 new_comparison = (comparison == UNGT ? GT : NE);
8015 return gen_rtx_fmt_ee (new_comparison, VOIDmode, result, const1_rtx);
8016 case UNLE:
8017 return gen_rtx_NE (VOIDmode, result, const2_rtx);
8018 case UNLT:
8019 tem = gen_reg_rtx (mode);
8020 if (TARGET_ARCH32)
8021 emit_insn (gen_andsi3 (tem, result, const1_rtx));
8022 else
8023 emit_insn (gen_anddi3 (tem, result, const1_rtx));
8024 return gen_rtx_NE (VOIDmode, tem, const0_rtx);
8025 case UNEQ:
8026 case LTGT:
8027 tem = gen_reg_rtx (mode);
8028 if (TARGET_ARCH32)
8029 emit_insn (gen_addsi3 (tem, result, const1_rtx));
8030 else
8031 emit_insn (gen_adddi3 (tem, result, const1_rtx));
8032 tem2 = gen_reg_rtx (mode);
8033 if (TARGET_ARCH32)
8034 emit_insn (gen_andsi3 (tem2, tem, const2_rtx));
8035 else
8036 emit_insn (gen_anddi3 (tem2, tem, const2_rtx));
8037 new_comparison = (comparison == UNEQ ? EQ : NE);
8038 return gen_rtx_fmt_ee (new_comparison, VOIDmode, tem2, const0_rtx);
8039 }
8040
8041 gcc_unreachable ();
8042 }
8043
8044 /* Generate an unsigned DImode to FP conversion. This is the same code
8045 optabs would emit if we didn't have TFmode patterns. */
8046
8047 void
sparc_emit_floatunsdi(rtx * operands,enum machine_mode mode)8048 sparc_emit_floatunsdi (rtx *operands, enum machine_mode mode)
8049 {
8050 rtx neglab, donelab, i0, i1, f0, in, out;
8051
8052 out = operands[0];
8053 in = force_reg (DImode, operands[1]);
8054 neglab = gen_label_rtx ();
8055 donelab = gen_label_rtx ();
8056 i0 = gen_reg_rtx (DImode);
8057 i1 = gen_reg_rtx (DImode);
8058 f0 = gen_reg_rtx (mode);
8059
8060 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, DImode, 0, neglab);
8061
8062 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
8063 emit_jump_insn (gen_jump (donelab));
8064 emit_barrier ();
8065
8066 emit_label (neglab);
8067
8068 emit_insn (gen_lshrdi3 (i0, in, const1_rtx));
8069 emit_insn (gen_anddi3 (i1, in, const1_rtx));
8070 emit_insn (gen_iordi3 (i0, i0, i1));
8071 emit_insn (gen_rtx_SET (VOIDmode, f0, gen_rtx_FLOAT (mode, i0)));
8072 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
8073
8074 emit_label (donelab);
8075 }
8076
8077 /* Generate an FP to unsigned DImode conversion. This is the same code
8078 optabs would emit if we didn't have TFmode patterns. */
8079
8080 void
sparc_emit_fixunsdi(rtx * operands,enum machine_mode mode)8081 sparc_emit_fixunsdi (rtx *operands, enum machine_mode mode)
8082 {
8083 rtx neglab, donelab, i0, i1, f0, in, out, limit;
8084
8085 out = operands[0];
8086 in = force_reg (mode, operands[1]);
8087 neglab = gen_label_rtx ();
8088 donelab = gen_label_rtx ();
8089 i0 = gen_reg_rtx (DImode);
8090 i1 = gen_reg_rtx (DImode);
8091 limit = gen_reg_rtx (mode);
8092 f0 = gen_reg_rtx (mode);
8093
8094 emit_move_insn (limit,
8095 CONST_DOUBLE_FROM_REAL_VALUE (
8096 REAL_VALUE_ATOF ("9223372036854775808.0", mode), mode));
8097 emit_cmp_and_jump_insns (in, limit, GE, NULL_RTX, mode, 0, neglab);
8098
8099 emit_insn (gen_rtx_SET (VOIDmode,
8100 out,
8101 gen_rtx_FIX (DImode, gen_rtx_FIX (mode, in))));
8102 emit_jump_insn (gen_jump (donelab));
8103 emit_barrier ();
8104
8105 emit_label (neglab);
8106
8107 emit_insn (gen_rtx_SET (VOIDmode, f0, gen_rtx_MINUS (mode, in, limit)));
8108 emit_insn (gen_rtx_SET (VOIDmode,
8109 i0,
8110 gen_rtx_FIX (DImode, gen_rtx_FIX (mode, f0))));
8111 emit_insn (gen_movdi (i1, const1_rtx));
8112 emit_insn (gen_ashldi3 (i1, i1, GEN_INT (63)));
8113 emit_insn (gen_xordi3 (out, i0, i1));
8114
8115 emit_label (donelab);
8116 }
8117
8118 /* Return the string to output a compare and branch instruction to DEST.
8119 DEST is the destination insn (i.e. the label), INSN is the source,
8120 and OP is the conditional expression. */
8121
8122 const char *
output_cbcond(rtx op,rtx dest,rtx insn)8123 output_cbcond (rtx op, rtx dest, rtx insn)
8124 {
8125 enum machine_mode mode = GET_MODE (XEXP (op, 0));
8126 enum rtx_code code = GET_CODE (op);
8127 const char *cond_str, *tmpl;
8128 int far, emit_nop, len;
8129 static char string[64];
8130 char size_char;
8131
8132 /* Compare and Branch is limited to +-2KB. If it is too far away,
8133 change
8134
8135 cxbne X, Y, .LC30
8136
8137 to
8138
8139 cxbe X, Y, .+16
8140 nop
8141 ba,pt xcc, .LC30
8142 nop */
8143
8144 len = get_attr_length (insn);
8145
8146 far = len == 4;
8147 emit_nop = len == 2;
8148
8149 if (far)
8150 code = reverse_condition (code);
8151
8152 size_char = ((mode == SImode) ? 'w' : 'x');
8153
8154 switch (code)
8155 {
8156 case NE:
8157 cond_str = "ne";
8158 break;
8159
8160 case EQ:
8161 cond_str = "e";
8162 break;
8163
8164 case GE:
8165 if (mode == CC_NOOVmode || mode == CCX_NOOVmode)
8166 cond_str = "pos";
8167 else
8168 cond_str = "ge";
8169 break;
8170
8171 case GT:
8172 cond_str = "g";
8173 break;
8174
8175 case LE:
8176 cond_str = "le";
8177 break;
8178
8179 case LT:
8180 if (mode == CC_NOOVmode || mode == CCX_NOOVmode)
8181 cond_str = "neg";
8182 else
8183 cond_str = "l";
8184 break;
8185
8186 case GEU:
8187 cond_str = "cc";
8188 break;
8189
8190 case GTU:
8191 cond_str = "gu";
8192 break;
8193
8194 case LEU:
8195 cond_str = "leu";
8196 break;
8197
8198 case LTU:
8199 cond_str = "cs";
8200 break;
8201
8202 default:
8203 gcc_unreachable ();
8204 }
8205
8206 if (far)
8207 {
8208 int veryfar = 1, delta;
8209
8210 if (INSN_ADDRESSES_SET_P ())
8211 {
8212 delta = (INSN_ADDRESSES (INSN_UID (dest))
8213 - INSN_ADDRESSES (INSN_UID (insn)));
8214 /* Leave some instructions for "slop". */
8215 if (delta >= -260000 && delta < 260000)
8216 veryfar = 0;
8217 }
8218
8219 if (veryfar)
8220 tmpl = "c%cb%s\t%%1, %%2, .+16\n\tnop\n\tb\t%%3\n\tnop";
8221 else
8222 tmpl = "c%cb%s\t%%1, %%2, .+16\n\tnop\n\tba,pt\t%%%%xcc, %%3\n\tnop";
8223 }
8224 else
8225 {
8226 if (emit_nop)
8227 tmpl = "c%cb%s\t%%1, %%2, %%3\n\tnop";
8228 else
8229 tmpl = "c%cb%s\t%%1, %%2, %%3";
8230 }
8231
8232 snprintf (string, sizeof(string), tmpl, size_char, cond_str);
8233
8234 return string;
8235 }
8236
8237 /* Return the string to output a conditional branch to LABEL, testing
8238 register REG. LABEL is the operand number of the label; REG is the
8239 operand number of the reg. OP is the conditional expression. The mode
8240 of REG says what kind of comparison we made.
8241
8242 DEST is the destination insn (i.e. the label), INSN is the source.
8243
8244 REVERSED is nonzero if we should reverse the sense of the comparison.
8245
8246 ANNUL is nonzero if we should generate an annulling branch. */
8247
8248 const char *
output_v9branch(rtx op,rtx dest,int reg,int label,int reversed,int annul,rtx insn)8249 output_v9branch (rtx op, rtx dest, int reg, int label, int reversed,
8250 int annul, rtx insn)
8251 {
8252 static char string[64];
8253 enum rtx_code code = GET_CODE (op);
8254 enum machine_mode mode = GET_MODE (XEXP (op, 0));
8255 rtx note;
8256 int far;
8257 char *p;
8258
8259 /* branch on register are limited to +-128KB. If it is too far away,
8260 change
8261
8262 brnz,pt %g1, .LC30
8263
8264 to
8265
8266 brz,pn %g1, .+12
8267 nop
8268 ba,pt %xcc, .LC30
8269
8270 and
8271
8272 brgez,a,pn %o1, .LC29
8273
8274 to
8275
8276 brlz,pt %o1, .+16
8277 nop
8278 ba,pt %xcc, .LC29 */
8279
8280 far = get_attr_length (insn) >= 3;
8281
8282 /* If not floating-point or if EQ or NE, we can just reverse the code. */
8283 if (reversed ^ far)
8284 code = reverse_condition (code);
8285
8286 /* Only 64 bit versions of these instructions exist. */
8287 gcc_assert (mode == DImode);
8288
8289 /* Start by writing the branch condition. */
8290
8291 switch (code)
8292 {
8293 case NE:
8294 strcpy (string, "brnz");
8295 break;
8296
8297 case EQ:
8298 strcpy (string, "brz");
8299 break;
8300
8301 case GE:
8302 strcpy (string, "brgez");
8303 break;
8304
8305 case LT:
8306 strcpy (string, "brlz");
8307 break;
8308
8309 case LE:
8310 strcpy (string, "brlez");
8311 break;
8312
8313 case GT:
8314 strcpy (string, "brgz");
8315 break;
8316
8317 default:
8318 gcc_unreachable ();
8319 }
8320
8321 p = strchr (string, '\0');
8322
8323 /* Now add the annulling, reg, label, and nop. */
8324 if (annul && ! far)
8325 {
8326 strcpy (p, ",a");
8327 p += 2;
8328 }
8329
8330 if (insn && (note = find_reg_note (insn, REG_BR_PROB, NULL_RTX)))
8331 {
8332 strcpy (p,
8333 ((XINT (note, 0) >= REG_BR_PROB_BASE / 2) ^ far)
8334 ? ",pt" : ",pn");
8335 p += 3;
8336 }
8337
8338 *p = p < string + 8 ? '\t' : ' ';
8339 p++;
8340 *p++ = '%';
8341 *p++ = '0' + reg;
8342 *p++ = ',';
8343 *p++ = ' ';
8344 if (far)
8345 {
8346 int veryfar = 1, delta;
8347
8348 if (INSN_ADDRESSES_SET_P ())
8349 {
8350 delta = (INSN_ADDRESSES (INSN_UID (dest))
8351 - INSN_ADDRESSES (INSN_UID (insn)));
8352 /* Leave some instructions for "slop". */
8353 if (delta >= -260000 && delta < 260000)
8354 veryfar = 0;
8355 }
8356
8357 strcpy (p, ".+12\n\t nop\n\t");
8358 /* Skip the next insn if requested or
8359 if we know that it will be a nop. */
8360 if (annul || ! final_sequence)
8361 p[3] = '6';
8362 p += 12;
8363 if (veryfar)
8364 {
8365 strcpy (p, "b\t");
8366 p += 2;
8367 }
8368 else
8369 {
8370 strcpy (p, "ba,pt\t%%xcc, ");
8371 p += 13;
8372 }
8373 }
8374 *p++ = '%';
8375 *p++ = 'l';
8376 *p++ = '0' + label;
8377 *p++ = '%';
8378 *p++ = '#';
8379 *p = '\0';
8380
8381 return string;
8382 }
8383
8384 /* Return 1, if any of the registers of the instruction are %l[0-7] or %o[0-7].
8385 Such instructions cannot be used in the delay slot of return insn on v9.
8386 If TEST is 0, also rename all %i[0-7] registers to their %o[0-7] counterparts.
8387 */
8388
8389 static int
epilogue_renumber(register rtx * where,int test)8390 epilogue_renumber (register rtx *where, int test)
8391 {
8392 register const char *fmt;
8393 register int i;
8394 register enum rtx_code code;
8395
8396 if (*where == 0)
8397 return 0;
8398
8399 code = GET_CODE (*where);
8400
8401 switch (code)
8402 {
8403 case REG:
8404 if (REGNO (*where) >= 8 && REGNO (*where) < 24) /* oX or lX */
8405 return 1;
8406 if (! test && REGNO (*where) >= 24 && REGNO (*where) < 32)
8407 *where = gen_rtx_REG (GET_MODE (*where), OUTGOING_REGNO (REGNO(*where)));
8408 case SCRATCH:
8409 case CC0:
8410 case PC:
8411 case CONST_INT:
8412 case CONST_DOUBLE:
8413 return 0;
8414
8415 /* Do not replace the frame pointer with the stack pointer because
8416 it can cause the delayed instruction to load below the stack.
8417 This occurs when instructions like:
8418
8419 (set (reg/i:SI 24 %i0)
8420 (mem/f:SI (plus:SI (reg/f:SI 30 %fp)
8421 (const_int -20 [0xffffffec])) 0))
8422
8423 are in the return delayed slot. */
8424 case PLUS:
8425 if (GET_CODE (XEXP (*where, 0)) == REG
8426 && REGNO (XEXP (*where, 0)) == HARD_FRAME_POINTER_REGNUM
8427 && (GET_CODE (XEXP (*where, 1)) != CONST_INT
8428 || INTVAL (XEXP (*where, 1)) < SPARC_STACK_BIAS))
8429 return 1;
8430 break;
8431
8432 case MEM:
8433 if (SPARC_STACK_BIAS
8434 && GET_CODE (XEXP (*where, 0)) == REG
8435 && REGNO (XEXP (*where, 0)) == HARD_FRAME_POINTER_REGNUM)
8436 return 1;
8437 break;
8438
8439 default:
8440 break;
8441 }
8442
8443 fmt = GET_RTX_FORMAT (code);
8444
8445 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
8446 {
8447 if (fmt[i] == 'E')
8448 {
8449 register int j;
8450 for (j = XVECLEN (*where, i) - 1; j >= 0; j--)
8451 if (epilogue_renumber (&(XVECEXP (*where, i, j)), test))
8452 return 1;
8453 }
8454 else if (fmt[i] == 'e'
8455 && epilogue_renumber (&(XEXP (*where, i)), test))
8456 return 1;
8457 }
8458 return 0;
8459 }
8460
8461 /* Leaf functions and non-leaf functions have different needs. */
8462
8463 static const int
8464 reg_leaf_alloc_order[] = REG_LEAF_ALLOC_ORDER;
8465
8466 static const int
8467 reg_nonleaf_alloc_order[] = REG_ALLOC_ORDER;
8468
8469 static const int *const reg_alloc_orders[] = {
8470 reg_leaf_alloc_order,
8471 reg_nonleaf_alloc_order};
8472
8473 void
order_regs_for_local_alloc(void)8474 order_regs_for_local_alloc (void)
8475 {
8476 static int last_order_nonleaf = 1;
8477
8478 if (df_regs_ever_live_p (15) != last_order_nonleaf)
8479 {
8480 last_order_nonleaf = !last_order_nonleaf;
8481 memcpy ((char *) reg_alloc_order,
8482 (const char *) reg_alloc_orders[last_order_nonleaf],
8483 FIRST_PSEUDO_REGISTER * sizeof (int));
8484 }
8485 }
8486
8487 /* Return 1 if REG and MEM are legitimate enough to allow the various
8488 mem<-->reg splits to be run. */
8489
8490 int
sparc_splitdi_legitimate(rtx reg,rtx mem)8491 sparc_splitdi_legitimate (rtx reg, rtx mem)
8492 {
8493 /* Punt if we are here by mistake. */
8494 gcc_assert (reload_completed);
8495
8496 /* We must have an offsettable memory reference. */
8497 if (! offsettable_memref_p (mem))
8498 return 0;
8499
8500 /* If we have legitimate args for ldd/std, we do not want
8501 the split to happen. */
8502 if ((REGNO (reg) % 2) == 0
8503 && mem_min_alignment (mem, 8))
8504 return 0;
8505
8506 /* Success. */
8507 return 1;
8508 }
8509
8510 /* Like sparc_splitdi_legitimate but for REG <--> REG moves. */
8511
8512 int
sparc_split_regreg_legitimate(rtx reg1,rtx reg2)8513 sparc_split_regreg_legitimate (rtx reg1, rtx reg2)
8514 {
8515 int regno1, regno2;
8516
8517 if (GET_CODE (reg1) == SUBREG)
8518 reg1 = SUBREG_REG (reg1);
8519 if (GET_CODE (reg1) != REG)
8520 return 0;
8521 regno1 = REGNO (reg1);
8522
8523 if (GET_CODE (reg2) == SUBREG)
8524 reg2 = SUBREG_REG (reg2);
8525 if (GET_CODE (reg2) != REG)
8526 return 0;
8527 regno2 = REGNO (reg2);
8528
8529 if (SPARC_INT_REG_P (regno1) && SPARC_INT_REG_P (regno2))
8530 return 1;
8531
8532 if (TARGET_VIS3)
8533 {
8534 if ((SPARC_INT_REG_P (regno1) && SPARC_FP_REG_P (regno2))
8535 || (SPARC_FP_REG_P (regno1) && SPARC_INT_REG_P (regno2)))
8536 return 1;
8537 }
8538
8539 return 0;
8540 }
8541
8542 /* Return 1 if REGNO (reg1) is even and REGNO (reg1) == REGNO (reg2) - 1.
8543 This makes them candidates for using ldd and std insns.
8544
8545 Note reg1 and reg2 *must* be hard registers. */
8546
8547 int
registers_ok_for_ldd_peep(rtx reg1,rtx reg2)8548 registers_ok_for_ldd_peep (rtx reg1, rtx reg2)
8549 {
8550 /* We might have been passed a SUBREG. */
8551 if (GET_CODE (reg1) != REG || GET_CODE (reg2) != REG)
8552 return 0;
8553
8554 if (REGNO (reg1) % 2 != 0)
8555 return 0;
8556
8557 /* Integer ldd is deprecated in SPARC V9 */
8558 if (TARGET_V9 && SPARC_INT_REG_P (REGNO (reg1)))
8559 return 0;
8560
8561 return (REGNO (reg1) == REGNO (reg2) - 1);
8562 }
8563
8564 /* Return 1 if the addresses in mem1 and mem2 are suitable for use in
8565 an ldd or std insn.
8566
8567 This can only happen when addr1 and addr2, the addresses in mem1
8568 and mem2, are consecutive memory locations (addr1 + 4 == addr2).
8569 addr1 must also be aligned on a 64-bit boundary.
8570
8571 Also iff dependent_reg_rtx is not null it should not be used to
8572 compute the address for mem1, i.e. we cannot optimize a sequence
8573 like:
8574 ld [%o0], %o0
8575 ld [%o0 + 4], %o1
8576 to
8577 ldd [%o0], %o0
8578 nor:
8579 ld [%g3 + 4], %g3
8580 ld [%g3], %g2
8581 to
8582 ldd [%g3], %g2
8583
8584 But, note that the transformation from:
8585 ld [%g2 + 4], %g3
8586 ld [%g2], %g2
8587 to
8588 ldd [%g2], %g2
8589 is perfectly fine. Thus, the peephole2 patterns always pass us
8590 the destination register of the first load, never the second one.
8591
8592 For stores we don't have a similar problem, so dependent_reg_rtx is
8593 NULL_RTX. */
8594
8595 int
mems_ok_for_ldd_peep(rtx mem1,rtx mem2,rtx dependent_reg_rtx)8596 mems_ok_for_ldd_peep (rtx mem1, rtx mem2, rtx dependent_reg_rtx)
8597 {
8598 rtx addr1, addr2;
8599 unsigned int reg1;
8600 HOST_WIDE_INT offset1;
8601
8602 /* The mems cannot be volatile. */
8603 if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2))
8604 return 0;
8605
8606 /* MEM1 should be aligned on a 64-bit boundary. */
8607 if (MEM_ALIGN (mem1) < 64)
8608 return 0;
8609
8610 addr1 = XEXP (mem1, 0);
8611 addr2 = XEXP (mem2, 0);
8612
8613 /* Extract a register number and offset (if used) from the first addr. */
8614 if (GET_CODE (addr1) == PLUS)
8615 {
8616 /* If not a REG, return zero. */
8617 if (GET_CODE (XEXP (addr1, 0)) != REG)
8618 return 0;
8619 else
8620 {
8621 reg1 = REGNO (XEXP (addr1, 0));
8622 /* The offset must be constant! */
8623 if (GET_CODE (XEXP (addr1, 1)) != CONST_INT)
8624 return 0;
8625 offset1 = INTVAL (XEXP (addr1, 1));
8626 }
8627 }
8628 else if (GET_CODE (addr1) != REG)
8629 return 0;
8630 else
8631 {
8632 reg1 = REGNO (addr1);
8633 /* This was a simple (mem (reg)) expression. Offset is 0. */
8634 offset1 = 0;
8635 }
8636
8637 /* Make sure the second address is a (mem (plus (reg) (const_int). */
8638 if (GET_CODE (addr2) != PLUS)
8639 return 0;
8640
8641 if (GET_CODE (XEXP (addr2, 0)) != REG
8642 || GET_CODE (XEXP (addr2, 1)) != CONST_INT)
8643 return 0;
8644
8645 if (reg1 != REGNO (XEXP (addr2, 0)))
8646 return 0;
8647
8648 if (dependent_reg_rtx != NULL_RTX && reg1 == REGNO (dependent_reg_rtx))
8649 return 0;
8650
8651 /* The first offset must be evenly divisible by 8 to ensure the
8652 address is 64 bit aligned. */
8653 if (offset1 % 8 != 0)
8654 return 0;
8655
8656 /* The offset for the second addr must be 4 more than the first addr. */
8657 if (INTVAL (XEXP (addr2, 1)) != offset1 + 4)
8658 return 0;
8659
8660 /* All the tests passed. addr1 and addr2 are valid for ldd and std
8661 instructions. */
8662 return 1;
8663 }
8664
8665 /* Return the widened memory access made of MEM1 and MEM2 in MODE. */
8666
8667 rtx
widen_mem_for_ldd_peep(rtx mem1,rtx mem2,enum machine_mode mode)8668 widen_mem_for_ldd_peep (rtx mem1, rtx mem2, enum machine_mode mode)
8669 {
8670 rtx x = widen_memory_access (mem1, mode, 0);
8671 MEM_NOTRAP_P (x) = MEM_NOTRAP_P (mem1) && MEM_NOTRAP_P (mem2);
8672 return x;
8673 }
8674
8675 /* Return 1 if reg is a pseudo, or is the first register in
8676 a hard register pair. This makes it suitable for use in
8677 ldd and std insns. */
8678
8679 int
register_ok_for_ldd(rtx reg)8680 register_ok_for_ldd (rtx reg)
8681 {
8682 /* We might have been passed a SUBREG. */
8683 if (!REG_P (reg))
8684 return 0;
8685
8686 if (REGNO (reg) < FIRST_PSEUDO_REGISTER)
8687 return (REGNO (reg) % 2 == 0);
8688
8689 return 1;
8690 }
8691
8692 /* Return 1 if OP, a MEM, has an address which is known to be
8693 aligned to an 8-byte boundary. */
8694
8695 int
memory_ok_for_ldd(rtx op)8696 memory_ok_for_ldd (rtx op)
8697 {
8698 /* In 64-bit mode, we assume that the address is word-aligned. */
8699 if (TARGET_ARCH32 && !mem_min_alignment (op, 8))
8700 return 0;
8701
8702 if (! can_create_pseudo_p ()
8703 && !strict_memory_address_p (Pmode, XEXP (op, 0)))
8704 return 0;
8705
8706 return 1;
8707 }
8708
8709 /* Implement TARGET_PRINT_OPERAND_PUNCT_VALID_P. */
8710
8711 static bool
sparc_print_operand_punct_valid_p(unsigned char code)8712 sparc_print_operand_punct_valid_p (unsigned char code)
8713 {
8714 if (code == '#'
8715 || code == '*'
8716 || code == '('
8717 || code == ')'
8718 || code == '_'
8719 || code == '&')
8720 return true;
8721
8722 return false;
8723 }
8724
8725 /* Implement TARGET_PRINT_OPERAND.
8726 Print operand X (an rtx) in assembler syntax to file FILE.
8727 CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
8728 For `%' followed by punctuation, CODE is the punctuation and X is null. */
8729
8730 static void
sparc_print_operand(FILE * file,rtx x,int code)8731 sparc_print_operand (FILE *file, rtx x, int code)
8732 {
8733 switch (code)
8734 {
8735 case '#':
8736 /* Output an insn in a delay slot. */
8737 if (final_sequence)
8738 sparc_indent_opcode = 1;
8739 else
8740 fputs ("\n\t nop", file);
8741 return;
8742 case '*':
8743 /* Output an annul flag if there's nothing for the delay slot and we
8744 are optimizing. This is always used with '(' below.
8745 Sun OS 4.1.1 dbx can't handle an annulled unconditional branch;
8746 this is a dbx bug. So, we only do this when optimizing.
8747 On UltraSPARC, a branch in a delay slot causes a pipeline flush.
8748 Always emit a nop in case the next instruction is a branch. */
8749 if (! final_sequence && (optimize && (int)sparc_cpu < PROCESSOR_V9))
8750 fputs (",a", file);
8751 return;
8752 case '(':
8753 /* Output a 'nop' if there's nothing for the delay slot and we are
8754 not optimizing. This is always used with '*' above. */
8755 if (! final_sequence && ! (optimize && (int)sparc_cpu < PROCESSOR_V9))
8756 fputs ("\n\t nop", file);
8757 else if (final_sequence)
8758 sparc_indent_opcode = 1;
8759 return;
8760 case ')':
8761 /* Output the right displacement from the saved PC on function return.
8762 The caller may have placed an "unimp" insn immediately after the call
8763 so we have to account for it. This insn is used in the 32-bit ABI
8764 when calling a function that returns a non zero-sized structure. The
8765 64-bit ABI doesn't have it. Be careful to have this test be the same
8766 as that for the call. The exception is when sparc_std_struct_return
8767 is enabled, the psABI is followed exactly and the adjustment is made
8768 by the code in sparc_struct_value_rtx. The call emitted is the same
8769 when sparc_std_struct_return is enabled. */
8770 if (!TARGET_ARCH64
8771 && cfun->returns_struct
8772 && !sparc_std_struct_return
8773 && DECL_SIZE (DECL_RESULT (current_function_decl))
8774 && TREE_CODE (DECL_SIZE (DECL_RESULT (current_function_decl)))
8775 == INTEGER_CST
8776 && !integer_zerop (DECL_SIZE (DECL_RESULT (current_function_decl))))
8777 fputs ("12", file);
8778 else
8779 fputc ('8', file);
8780 return;
8781 case '_':
8782 /* Output the Embedded Medium/Anywhere code model base register. */
8783 fputs (EMBMEDANY_BASE_REG, file);
8784 return;
8785 case '&':
8786 /* Print some local dynamic TLS name. */
8787 assemble_name (file, get_some_local_dynamic_name ());
8788 return;
8789
8790 case 'Y':
8791 /* Adjust the operand to take into account a RESTORE operation. */
8792 if (GET_CODE (x) == CONST_INT)
8793 break;
8794 else if (GET_CODE (x) != REG)
8795 output_operand_lossage ("invalid %%Y operand");
8796 else if (REGNO (x) < 8)
8797 fputs (reg_names[REGNO (x)], file);
8798 else if (REGNO (x) >= 24 && REGNO (x) < 32)
8799 fputs (reg_names[REGNO (x)-16], file);
8800 else
8801 output_operand_lossage ("invalid %%Y operand");
8802 return;
8803 case 'L':
8804 /* Print out the low order register name of a register pair. */
8805 if (WORDS_BIG_ENDIAN)
8806 fputs (reg_names[REGNO (x)+1], file);
8807 else
8808 fputs (reg_names[REGNO (x)], file);
8809 return;
8810 case 'H':
8811 /* Print out the high order register name of a register pair. */
8812 if (WORDS_BIG_ENDIAN)
8813 fputs (reg_names[REGNO (x)], file);
8814 else
8815 fputs (reg_names[REGNO (x)+1], file);
8816 return;
8817 case 'R':
8818 /* Print out the second register name of a register pair or quad.
8819 I.e., R (%o0) => %o1. */
8820 fputs (reg_names[REGNO (x)+1], file);
8821 return;
8822 case 'S':
8823 /* Print out the third register name of a register quad.
8824 I.e., S (%o0) => %o2. */
8825 fputs (reg_names[REGNO (x)+2], file);
8826 return;
8827 case 'T':
8828 /* Print out the fourth register name of a register quad.
8829 I.e., T (%o0) => %o3. */
8830 fputs (reg_names[REGNO (x)+3], file);
8831 return;
8832 case 'x':
8833 /* Print a condition code register. */
8834 if (REGNO (x) == SPARC_ICC_REG)
8835 {
8836 /* We don't handle CC[X]_NOOVmode because they're not supposed
8837 to occur here. */
8838 if (GET_MODE (x) == CCmode)
8839 fputs ("%icc", file);
8840 else if (GET_MODE (x) == CCXmode)
8841 fputs ("%xcc", file);
8842 else
8843 gcc_unreachable ();
8844 }
8845 else
8846 /* %fccN register */
8847 fputs (reg_names[REGNO (x)], file);
8848 return;
8849 case 'm':
8850 /* Print the operand's address only. */
8851 output_address (XEXP (x, 0));
8852 return;
8853 case 'r':
8854 /* In this case we need a register. Use %g0 if the
8855 operand is const0_rtx. */
8856 if (x == const0_rtx
8857 || (GET_MODE (x) != VOIDmode && x == CONST0_RTX (GET_MODE (x))))
8858 {
8859 fputs ("%g0", file);
8860 return;
8861 }
8862 else
8863 break;
8864
8865 case 'A':
8866 switch (GET_CODE (x))
8867 {
8868 case IOR: fputs ("or", file); break;
8869 case AND: fputs ("and", file); break;
8870 case XOR: fputs ("xor", file); break;
8871 default: output_operand_lossage ("invalid %%A operand");
8872 }
8873 return;
8874
8875 case 'B':
8876 switch (GET_CODE (x))
8877 {
8878 case IOR: fputs ("orn", file); break;
8879 case AND: fputs ("andn", file); break;
8880 case XOR: fputs ("xnor", file); break;
8881 default: output_operand_lossage ("invalid %%B operand");
8882 }
8883 return;
8884
8885 /* This is used by the conditional move instructions. */
8886 case 'C':
8887 {
8888 enum rtx_code rc = GET_CODE (x);
8889
8890 switch (rc)
8891 {
8892 case NE: fputs ("ne", file); break;
8893 case EQ: fputs ("e", file); break;
8894 case GE: fputs ("ge", file); break;
8895 case GT: fputs ("g", file); break;
8896 case LE: fputs ("le", file); break;
8897 case LT: fputs ("l", file); break;
8898 case GEU: fputs ("geu", file); break;
8899 case GTU: fputs ("gu", file); break;
8900 case LEU: fputs ("leu", file); break;
8901 case LTU: fputs ("lu", file); break;
8902 case LTGT: fputs ("lg", file); break;
8903 case UNORDERED: fputs ("u", file); break;
8904 case ORDERED: fputs ("o", file); break;
8905 case UNLT: fputs ("ul", file); break;
8906 case UNLE: fputs ("ule", file); break;
8907 case UNGT: fputs ("ug", file); break;
8908 case UNGE: fputs ("uge", file); break;
8909 case UNEQ: fputs ("ue", file); break;
8910 default: output_operand_lossage ("invalid %%C operand");
8911 }
8912 return;
8913 }
8914
8915 /* This are used by the movr instruction pattern. */
8916 case 'D':
8917 {
8918 enum rtx_code rc = GET_CODE (x);
8919 switch (rc)
8920 {
8921 case NE: fputs ("ne", file); break;
8922 case EQ: fputs ("e", file); break;
8923 case GE: fputs ("gez", file); break;
8924 case LT: fputs ("lz", file); break;
8925 case LE: fputs ("lez", file); break;
8926 case GT: fputs ("gz", file); break;
8927 default: output_operand_lossage ("invalid %%D operand");
8928 }
8929 return;
8930 }
8931
8932 case 'b':
8933 {
8934 /* Print a sign-extended character. */
8935 int i = trunc_int_for_mode (INTVAL (x), QImode);
8936 fprintf (file, "%d", i);
8937 return;
8938 }
8939
8940 case 'f':
8941 /* Operand must be a MEM; write its address. */
8942 if (GET_CODE (x) != MEM)
8943 output_operand_lossage ("invalid %%f operand");
8944 output_address (XEXP (x, 0));
8945 return;
8946
8947 case 's':
8948 {
8949 /* Print a sign-extended 32-bit value. */
8950 HOST_WIDE_INT i;
8951 if (GET_CODE(x) == CONST_INT)
8952 i = INTVAL (x);
8953 else if (GET_CODE(x) == CONST_DOUBLE)
8954 i = CONST_DOUBLE_LOW (x);
8955 else
8956 {
8957 output_operand_lossage ("invalid %%s operand");
8958 return;
8959 }
8960 i = trunc_int_for_mode (i, SImode);
8961 fprintf (file, HOST_WIDE_INT_PRINT_DEC, i);
8962 return;
8963 }
8964
8965 case 0:
8966 /* Do nothing special. */
8967 break;
8968
8969 default:
8970 /* Undocumented flag. */
8971 output_operand_lossage ("invalid operand output code");
8972 }
8973
8974 if (GET_CODE (x) == REG)
8975 fputs (reg_names[REGNO (x)], file);
8976 else if (GET_CODE (x) == MEM)
8977 {
8978 fputc ('[', file);
8979 /* Poor Sun assembler doesn't understand absolute addressing. */
8980 if (CONSTANT_P (XEXP (x, 0)))
8981 fputs ("%g0+", file);
8982 output_address (XEXP (x, 0));
8983 fputc (']', file);
8984 }
8985 else if (GET_CODE (x) == HIGH)
8986 {
8987 fputs ("%hi(", file);
8988 output_addr_const (file, XEXP (x, 0));
8989 fputc (')', file);
8990 }
8991 else if (GET_CODE (x) == LO_SUM)
8992 {
8993 sparc_print_operand (file, XEXP (x, 0), 0);
8994 if (TARGET_CM_MEDMID)
8995 fputs ("+%l44(", file);
8996 else
8997 fputs ("+%lo(", file);
8998 output_addr_const (file, XEXP (x, 1));
8999 fputc (')', file);
9000 }
9001 else if (GET_CODE (x) == CONST_DOUBLE
9002 && (GET_MODE (x) == VOIDmode
9003 || GET_MODE_CLASS (GET_MODE (x)) == MODE_INT))
9004 {
9005 if (CONST_DOUBLE_HIGH (x) == 0)
9006 fprintf (file, "%u", (unsigned int) CONST_DOUBLE_LOW (x));
9007 else if (CONST_DOUBLE_HIGH (x) == -1
9008 && CONST_DOUBLE_LOW (x) < 0)
9009 fprintf (file, "%d", (int) CONST_DOUBLE_LOW (x));
9010 else
9011 output_operand_lossage ("long long constant not a valid immediate operand");
9012 }
9013 else if (GET_CODE (x) == CONST_DOUBLE)
9014 output_operand_lossage ("floating point constant not a valid immediate operand");
9015 else { output_addr_const (file, x); }
9016 }
9017
9018 /* Implement TARGET_PRINT_OPERAND_ADDRESS. */
9019
9020 static void
sparc_print_operand_address(FILE * file,rtx x)9021 sparc_print_operand_address (FILE *file, rtx x)
9022 {
9023 register rtx base, index = 0;
9024 int offset = 0;
9025 register rtx addr = x;
9026
9027 if (REG_P (addr))
9028 fputs (reg_names[REGNO (addr)], file);
9029 else if (GET_CODE (addr) == PLUS)
9030 {
9031 if (CONST_INT_P (XEXP (addr, 0)))
9032 offset = INTVAL (XEXP (addr, 0)), base = XEXP (addr, 1);
9033 else if (CONST_INT_P (XEXP (addr, 1)))
9034 offset = INTVAL (XEXP (addr, 1)), base = XEXP (addr, 0);
9035 else
9036 base = XEXP (addr, 0), index = XEXP (addr, 1);
9037 if (GET_CODE (base) == LO_SUM)
9038 {
9039 gcc_assert (USE_AS_OFFSETABLE_LO10
9040 && TARGET_ARCH64
9041 && ! TARGET_CM_MEDMID);
9042 output_operand (XEXP (base, 0), 0);
9043 fputs ("+%lo(", file);
9044 output_address (XEXP (base, 1));
9045 fprintf (file, ")+%d", offset);
9046 }
9047 else
9048 {
9049 fputs (reg_names[REGNO (base)], file);
9050 if (index == 0)
9051 fprintf (file, "%+d", offset);
9052 else if (REG_P (index))
9053 fprintf (file, "+%s", reg_names[REGNO (index)]);
9054 else if (GET_CODE (index) == SYMBOL_REF
9055 || GET_CODE (index) == LABEL_REF
9056 || GET_CODE (index) == CONST)
9057 fputc ('+', file), output_addr_const (file, index);
9058 else gcc_unreachable ();
9059 }
9060 }
9061 else if (GET_CODE (addr) == MINUS
9062 && GET_CODE (XEXP (addr, 1)) == LABEL_REF)
9063 {
9064 output_addr_const (file, XEXP (addr, 0));
9065 fputs ("-(", file);
9066 output_addr_const (file, XEXP (addr, 1));
9067 fputs ("-.)", file);
9068 }
9069 else if (GET_CODE (addr) == LO_SUM)
9070 {
9071 output_operand (XEXP (addr, 0), 0);
9072 if (TARGET_CM_MEDMID)
9073 fputs ("+%l44(", file);
9074 else
9075 fputs ("+%lo(", file);
9076 output_address (XEXP (addr, 1));
9077 fputc (')', file);
9078 }
9079 else if (flag_pic
9080 && GET_CODE (addr) == CONST
9081 && GET_CODE (XEXP (addr, 0)) == MINUS
9082 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST
9083 && GET_CODE (XEXP (XEXP (XEXP (addr, 0), 1), 0)) == MINUS
9084 && XEXP (XEXP (XEXP (XEXP (addr, 0), 1), 0), 1) == pc_rtx)
9085 {
9086 addr = XEXP (addr, 0);
9087 output_addr_const (file, XEXP (addr, 0));
9088 /* Group the args of the second CONST in parenthesis. */
9089 fputs ("-(", file);
9090 /* Skip past the second CONST--it does nothing for us. */
9091 output_addr_const (file, XEXP (XEXP (addr, 1), 0));
9092 /* Close the parenthesis. */
9093 fputc (')', file);
9094 }
9095 else
9096 {
9097 output_addr_const (file, addr);
9098 }
9099 }
9100
9101 /* Target hook for assembling integer objects. The sparc version has
9102 special handling for aligned DI-mode objects. */
9103
9104 static bool
sparc_assemble_integer(rtx x,unsigned int size,int aligned_p)9105 sparc_assemble_integer (rtx x, unsigned int size, int aligned_p)
9106 {
9107 /* ??? We only output .xword's for symbols and only then in environments
9108 where the assembler can handle them. */
9109 if (aligned_p && size == 8
9110 && (GET_CODE (x) != CONST_INT && GET_CODE (x) != CONST_DOUBLE))
9111 {
9112 if (TARGET_V9)
9113 {
9114 assemble_integer_with_op ("\t.xword\t", x);
9115 return true;
9116 }
9117 else
9118 {
9119 assemble_aligned_integer (4, const0_rtx);
9120 assemble_aligned_integer (4, x);
9121 return true;
9122 }
9123 }
9124 return default_assemble_integer (x, size, aligned_p);
9125 }
9126
9127 /* Return the value of a code used in the .proc pseudo-op that says
9128 what kind of result this function returns. For non-C types, we pick
9129 the closest C type. */
9130
9131 #ifndef SHORT_TYPE_SIZE
9132 #define SHORT_TYPE_SIZE (BITS_PER_UNIT * 2)
9133 #endif
9134
9135 #ifndef INT_TYPE_SIZE
9136 #define INT_TYPE_SIZE BITS_PER_WORD
9137 #endif
9138
9139 #ifndef LONG_TYPE_SIZE
9140 #define LONG_TYPE_SIZE BITS_PER_WORD
9141 #endif
9142
9143 #ifndef LONG_LONG_TYPE_SIZE
9144 #define LONG_LONG_TYPE_SIZE (BITS_PER_WORD * 2)
9145 #endif
9146
9147 #ifndef FLOAT_TYPE_SIZE
9148 #define FLOAT_TYPE_SIZE BITS_PER_WORD
9149 #endif
9150
9151 #ifndef DOUBLE_TYPE_SIZE
9152 #define DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2)
9153 #endif
9154
9155 #ifndef LONG_DOUBLE_TYPE_SIZE
9156 #define LONG_DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2)
9157 #endif
9158
9159 unsigned long
sparc_type_code(register tree type)9160 sparc_type_code (register tree type)
9161 {
9162 register unsigned long qualifiers = 0;
9163 register unsigned shift;
9164
9165 /* Only the first 30 bits of the qualifier are valid. We must refrain from
9166 setting more, since some assemblers will give an error for this. Also,
9167 we must be careful to avoid shifts of 32 bits or more to avoid getting
9168 unpredictable results. */
9169
9170 for (shift = 6; shift < 30; shift += 2, type = TREE_TYPE (type))
9171 {
9172 switch (TREE_CODE (type))
9173 {
9174 case ERROR_MARK:
9175 return qualifiers;
9176
9177 case ARRAY_TYPE:
9178 qualifiers |= (3 << shift);
9179 break;
9180
9181 case FUNCTION_TYPE:
9182 case METHOD_TYPE:
9183 qualifiers |= (2 << shift);
9184 break;
9185
9186 case POINTER_TYPE:
9187 case REFERENCE_TYPE:
9188 case OFFSET_TYPE:
9189 qualifiers |= (1 << shift);
9190 break;
9191
9192 case RECORD_TYPE:
9193 return (qualifiers | 8);
9194
9195 case UNION_TYPE:
9196 case QUAL_UNION_TYPE:
9197 return (qualifiers | 9);
9198
9199 case ENUMERAL_TYPE:
9200 return (qualifiers | 10);
9201
9202 case VOID_TYPE:
9203 return (qualifiers | 16);
9204
9205 case INTEGER_TYPE:
9206 /* If this is a range type, consider it to be the underlying
9207 type. */
9208 if (TREE_TYPE (type) != 0)
9209 break;
9210
9211 /* Carefully distinguish all the standard types of C,
9212 without messing up if the language is not C. We do this by
9213 testing TYPE_PRECISION and TYPE_UNSIGNED. The old code used to
9214 look at both the names and the above fields, but that's redundant.
9215 Any type whose size is between two C types will be considered
9216 to be the wider of the two types. Also, we do not have a
9217 special code to use for "long long", so anything wider than
9218 long is treated the same. Note that we can't distinguish
9219 between "int" and "long" in this code if they are the same
9220 size, but that's fine, since neither can the assembler. */
9221
9222 if (TYPE_PRECISION (type) <= CHAR_TYPE_SIZE)
9223 return (qualifiers | (TYPE_UNSIGNED (type) ? 12 : 2));
9224
9225 else if (TYPE_PRECISION (type) <= SHORT_TYPE_SIZE)
9226 return (qualifiers | (TYPE_UNSIGNED (type) ? 13 : 3));
9227
9228 else if (TYPE_PRECISION (type) <= INT_TYPE_SIZE)
9229 return (qualifiers | (TYPE_UNSIGNED (type) ? 14 : 4));
9230
9231 else
9232 return (qualifiers | (TYPE_UNSIGNED (type) ? 15 : 5));
9233
9234 case REAL_TYPE:
9235 /* If this is a range type, consider it to be the underlying
9236 type. */
9237 if (TREE_TYPE (type) != 0)
9238 break;
9239
9240 /* Carefully distinguish all the standard types of C,
9241 without messing up if the language is not C. */
9242
9243 if (TYPE_PRECISION (type) == FLOAT_TYPE_SIZE)
9244 return (qualifiers | 6);
9245
9246 else
9247 return (qualifiers | 7);
9248
9249 case COMPLEX_TYPE: /* GNU Fortran COMPLEX type. */
9250 /* ??? We need to distinguish between double and float complex types,
9251 but I don't know how yet because I can't reach this code from
9252 existing front-ends. */
9253 return (qualifiers | 7); /* Who knows? */
9254
9255 case VECTOR_TYPE:
9256 case BOOLEAN_TYPE: /* Boolean truth value type. */
9257 case LANG_TYPE:
9258 case NULLPTR_TYPE:
9259 return qualifiers;
9260
9261 default:
9262 gcc_unreachable (); /* Not a type! */
9263 }
9264 }
9265
9266 return qualifiers;
9267 }
9268
9269 /* Nested function support. */
9270
9271 /* Emit RTL insns to initialize the variable parts of a trampoline.
9272 FNADDR is an RTX for the address of the function's pure code.
9273 CXT is an RTX for the static chain value for the function.
9274
9275 This takes 16 insns: 2 shifts & 2 ands (to split up addresses), 4 sethi
9276 (to load in opcodes), 4 iors (to merge address and opcodes), and 4 writes
9277 (to store insns). This is a bit excessive. Perhaps a different
9278 mechanism would be better here.
9279
9280 Emit enough FLUSH insns to synchronize the data and instruction caches. */
9281
9282 static void
sparc32_initialize_trampoline(rtx m_tramp,rtx fnaddr,rtx cxt)9283 sparc32_initialize_trampoline (rtx m_tramp, rtx fnaddr, rtx cxt)
9284 {
9285 /* SPARC 32-bit trampoline:
9286
9287 sethi %hi(fn), %g1
9288 sethi %hi(static), %g2
9289 jmp %g1+%lo(fn)
9290 or %g2, %lo(static), %g2
9291
9292 SETHI i,r = 00rr rrr1 00ii iiii iiii iiii iiii iiii
9293 JMPL r+i,d = 10dd ddd1 1100 0rrr rr1i iiii iiii iiii
9294 */
9295
9296 emit_move_insn
9297 (adjust_address (m_tramp, SImode, 0),
9298 expand_binop (SImode, ior_optab,
9299 expand_shift (RSHIFT_EXPR, SImode, fnaddr, 10, 0, 1),
9300 GEN_INT (trunc_int_for_mode (0x03000000, SImode)),
9301 NULL_RTX, 1, OPTAB_DIRECT));
9302
9303 emit_move_insn
9304 (adjust_address (m_tramp, SImode, 4),
9305 expand_binop (SImode, ior_optab,
9306 expand_shift (RSHIFT_EXPR, SImode, cxt, 10, 0, 1),
9307 GEN_INT (trunc_int_for_mode (0x05000000, SImode)),
9308 NULL_RTX, 1, OPTAB_DIRECT));
9309
9310 emit_move_insn
9311 (adjust_address (m_tramp, SImode, 8),
9312 expand_binop (SImode, ior_optab,
9313 expand_and (SImode, fnaddr, GEN_INT (0x3ff), NULL_RTX),
9314 GEN_INT (trunc_int_for_mode (0x81c06000, SImode)),
9315 NULL_RTX, 1, OPTAB_DIRECT));
9316
9317 emit_move_insn
9318 (adjust_address (m_tramp, SImode, 12),
9319 expand_binop (SImode, ior_optab,
9320 expand_and (SImode, cxt, GEN_INT (0x3ff), NULL_RTX),
9321 GEN_INT (trunc_int_for_mode (0x8410a000, SImode)),
9322 NULL_RTX, 1, OPTAB_DIRECT));
9323
9324 /* On UltraSPARC a flush flushes an entire cache line. The trampoline is
9325 aligned on a 16 byte boundary so one flush clears it all. */
9326 emit_insn (gen_flushsi (validize_mem (adjust_address (m_tramp, SImode, 0))));
9327 if (sparc_cpu != PROCESSOR_ULTRASPARC
9328 && sparc_cpu != PROCESSOR_ULTRASPARC3
9329 && sparc_cpu != PROCESSOR_NIAGARA
9330 && sparc_cpu != PROCESSOR_NIAGARA2
9331 && sparc_cpu != PROCESSOR_NIAGARA3
9332 && sparc_cpu != PROCESSOR_NIAGARA4)
9333 emit_insn (gen_flushsi (validize_mem (adjust_address (m_tramp, SImode, 8))));
9334
9335 /* Call __enable_execute_stack after writing onto the stack to make sure
9336 the stack address is accessible. */
9337 #ifdef HAVE_ENABLE_EXECUTE_STACK
9338 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
9339 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
9340 #endif
9341
9342 }
9343
9344 /* The 64-bit version is simpler because it makes more sense to load the
9345 values as "immediate" data out of the trampoline. It's also easier since
9346 we can read the PC without clobbering a register. */
9347
9348 static void
sparc64_initialize_trampoline(rtx m_tramp,rtx fnaddr,rtx cxt)9349 sparc64_initialize_trampoline (rtx m_tramp, rtx fnaddr, rtx cxt)
9350 {
9351 /* SPARC 64-bit trampoline:
9352
9353 rd %pc, %g1
9354 ldx [%g1+24], %g5
9355 jmp %g5
9356 ldx [%g1+16], %g5
9357 +16 bytes data
9358 */
9359
9360 emit_move_insn (adjust_address (m_tramp, SImode, 0),
9361 GEN_INT (trunc_int_for_mode (0x83414000, SImode)));
9362 emit_move_insn (adjust_address (m_tramp, SImode, 4),
9363 GEN_INT (trunc_int_for_mode (0xca586018, SImode)));
9364 emit_move_insn (adjust_address (m_tramp, SImode, 8),
9365 GEN_INT (trunc_int_for_mode (0x81c14000, SImode)));
9366 emit_move_insn (adjust_address (m_tramp, SImode, 12),
9367 GEN_INT (trunc_int_for_mode (0xca586010, SImode)));
9368 emit_move_insn (adjust_address (m_tramp, DImode, 16), cxt);
9369 emit_move_insn (adjust_address (m_tramp, DImode, 24), fnaddr);
9370 emit_insn (gen_flushdi (validize_mem (adjust_address (m_tramp, DImode, 0))));
9371
9372 if (sparc_cpu != PROCESSOR_ULTRASPARC
9373 && sparc_cpu != PROCESSOR_ULTRASPARC3
9374 && sparc_cpu != PROCESSOR_NIAGARA
9375 && sparc_cpu != PROCESSOR_NIAGARA2
9376 && sparc_cpu != PROCESSOR_NIAGARA3
9377 && sparc_cpu != PROCESSOR_NIAGARA4)
9378 emit_insn (gen_flushdi (validize_mem (adjust_address (m_tramp, DImode, 8))));
9379
9380 /* Call __enable_execute_stack after writing onto the stack to make sure
9381 the stack address is accessible. */
9382 #ifdef HAVE_ENABLE_EXECUTE_STACK
9383 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
9384 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
9385 #endif
9386 }
9387
9388 /* Worker for TARGET_TRAMPOLINE_INIT. */
9389
9390 static void
sparc_trampoline_init(rtx m_tramp,tree fndecl,rtx cxt)9391 sparc_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
9392 {
9393 rtx fnaddr = force_reg (Pmode, XEXP (DECL_RTL (fndecl), 0));
9394 cxt = force_reg (Pmode, cxt);
9395 if (TARGET_ARCH64)
9396 sparc64_initialize_trampoline (m_tramp, fnaddr, cxt);
9397 else
9398 sparc32_initialize_trampoline (m_tramp, fnaddr, cxt);
9399 }
9400
9401 /* Adjust the cost of a scheduling dependency. Return the new cost of
9402 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
9403
9404 static int
supersparc_adjust_cost(rtx insn,rtx link,rtx dep_insn,int cost)9405 supersparc_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
9406 {
9407 enum attr_type insn_type;
9408
9409 if (! recog_memoized (insn))
9410 return 0;
9411
9412 insn_type = get_attr_type (insn);
9413
9414 if (REG_NOTE_KIND (link) == 0)
9415 {
9416 /* Data dependency; DEP_INSN writes a register that INSN reads some
9417 cycles later. */
9418
9419 /* if a load, then the dependence must be on the memory address;
9420 add an extra "cycle". Note that the cost could be two cycles
9421 if the reg was written late in an instruction group; we ca not tell
9422 here. */
9423 if (insn_type == TYPE_LOAD || insn_type == TYPE_FPLOAD)
9424 return cost + 3;
9425
9426 /* Get the delay only if the address of the store is the dependence. */
9427 if (insn_type == TYPE_STORE || insn_type == TYPE_FPSTORE)
9428 {
9429 rtx pat = PATTERN(insn);
9430 rtx dep_pat = PATTERN (dep_insn);
9431
9432 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
9433 return cost; /* This should not happen! */
9434
9435 /* The dependency between the two instructions was on the data that
9436 is being stored. Assume that this implies that the address of the
9437 store is not dependent. */
9438 if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
9439 return cost;
9440
9441 return cost + 3; /* An approximation. */
9442 }
9443
9444 /* A shift instruction cannot receive its data from an instruction
9445 in the same cycle; add a one cycle penalty. */
9446 if (insn_type == TYPE_SHIFT)
9447 return cost + 3; /* Split before cascade into shift. */
9448 }
9449 else
9450 {
9451 /* Anti- or output- dependency; DEP_INSN reads/writes a register that
9452 INSN writes some cycles later. */
9453
9454 /* These are only significant for the fpu unit; writing a fp reg before
9455 the fpu has finished with it stalls the processor. */
9456
9457 /* Reusing an integer register causes no problems. */
9458 if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT)
9459 return 0;
9460 }
9461
9462 return cost;
9463 }
9464
9465 static int
hypersparc_adjust_cost(rtx insn,rtx link,rtx dep_insn,int cost)9466 hypersparc_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
9467 {
9468 enum attr_type insn_type, dep_type;
9469 rtx pat = PATTERN(insn);
9470 rtx dep_pat = PATTERN (dep_insn);
9471
9472 if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
9473 return cost;
9474
9475 insn_type = get_attr_type (insn);
9476 dep_type = get_attr_type (dep_insn);
9477
9478 switch (REG_NOTE_KIND (link))
9479 {
9480 case 0:
9481 /* Data dependency; DEP_INSN writes a register that INSN reads some
9482 cycles later. */
9483
9484 switch (insn_type)
9485 {
9486 case TYPE_STORE:
9487 case TYPE_FPSTORE:
9488 /* Get the delay iff the address of the store is the dependence. */
9489 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
9490 return cost;
9491
9492 if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
9493 return cost;
9494 return cost + 3;
9495
9496 case TYPE_LOAD:
9497 case TYPE_SLOAD:
9498 case TYPE_FPLOAD:
9499 /* If a load, then the dependence must be on the memory address. If
9500 the addresses aren't equal, then it might be a false dependency */
9501 if (dep_type == TYPE_STORE || dep_type == TYPE_FPSTORE)
9502 {
9503 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET
9504 || GET_CODE (SET_DEST (dep_pat)) != MEM
9505 || GET_CODE (SET_SRC (pat)) != MEM
9506 || ! rtx_equal_p (XEXP (SET_DEST (dep_pat), 0),
9507 XEXP (SET_SRC (pat), 0)))
9508 return cost + 2;
9509
9510 return cost + 8;
9511 }
9512 break;
9513
9514 case TYPE_BRANCH:
9515 /* Compare to branch latency is 0. There is no benefit from
9516 separating compare and branch. */
9517 if (dep_type == TYPE_COMPARE)
9518 return 0;
9519 /* Floating point compare to branch latency is less than
9520 compare to conditional move. */
9521 if (dep_type == TYPE_FPCMP)
9522 return cost - 1;
9523 break;
9524 default:
9525 break;
9526 }
9527 break;
9528
9529 case REG_DEP_ANTI:
9530 /* Anti-dependencies only penalize the fpu unit. */
9531 if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT)
9532 return 0;
9533 break;
9534
9535 default:
9536 break;
9537 }
9538
9539 return cost;
9540 }
9541
9542 static int
sparc_adjust_cost(rtx insn,rtx link,rtx dep,int cost)9543 sparc_adjust_cost(rtx insn, rtx link, rtx dep, int cost)
9544 {
9545 switch (sparc_cpu)
9546 {
9547 case PROCESSOR_SUPERSPARC:
9548 cost = supersparc_adjust_cost (insn, link, dep, cost);
9549 break;
9550 case PROCESSOR_HYPERSPARC:
9551 case PROCESSOR_SPARCLITE86X:
9552 cost = hypersparc_adjust_cost (insn, link, dep, cost);
9553 break;
9554 default:
9555 break;
9556 }
9557 return cost;
9558 }
9559
9560 static void
sparc_sched_init(FILE * dump ATTRIBUTE_UNUSED,int sched_verbose ATTRIBUTE_UNUSED,int max_ready ATTRIBUTE_UNUSED)9561 sparc_sched_init (FILE *dump ATTRIBUTE_UNUSED,
9562 int sched_verbose ATTRIBUTE_UNUSED,
9563 int max_ready ATTRIBUTE_UNUSED)
9564 {}
9565
9566 static int
sparc_use_sched_lookahead(void)9567 sparc_use_sched_lookahead (void)
9568 {
9569 if (sparc_cpu == PROCESSOR_NIAGARA
9570 || sparc_cpu == PROCESSOR_NIAGARA2
9571 || sparc_cpu == PROCESSOR_NIAGARA3)
9572 return 0;
9573 if (sparc_cpu == PROCESSOR_NIAGARA4)
9574 return 2;
9575 if (sparc_cpu == PROCESSOR_ULTRASPARC
9576 || sparc_cpu == PROCESSOR_ULTRASPARC3)
9577 return 4;
9578 if ((1 << sparc_cpu) &
9579 ((1 << PROCESSOR_SUPERSPARC) | (1 << PROCESSOR_HYPERSPARC) |
9580 (1 << PROCESSOR_SPARCLITE86X)))
9581 return 3;
9582 return 0;
9583 }
9584
9585 static int
sparc_issue_rate(void)9586 sparc_issue_rate (void)
9587 {
9588 switch (sparc_cpu)
9589 {
9590 case PROCESSOR_NIAGARA:
9591 case PROCESSOR_NIAGARA2:
9592 case PROCESSOR_NIAGARA3:
9593 default:
9594 return 1;
9595 case PROCESSOR_NIAGARA4:
9596 case PROCESSOR_V9:
9597 /* Assume V9 processors are capable of at least dual-issue. */
9598 return 2;
9599 case PROCESSOR_SUPERSPARC:
9600 return 3;
9601 case PROCESSOR_HYPERSPARC:
9602 case PROCESSOR_SPARCLITE86X:
9603 return 2;
9604 case PROCESSOR_ULTRASPARC:
9605 case PROCESSOR_ULTRASPARC3:
9606 return 4;
9607 }
9608 }
9609
9610 static int
set_extends(rtx insn)9611 set_extends (rtx insn)
9612 {
9613 register rtx pat = PATTERN (insn);
9614
9615 switch (GET_CODE (SET_SRC (pat)))
9616 {
9617 /* Load and some shift instructions zero extend. */
9618 case MEM:
9619 case ZERO_EXTEND:
9620 /* sethi clears the high bits */
9621 case HIGH:
9622 /* LO_SUM is used with sethi. sethi cleared the high
9623 bits and the values used with lo_sum are positive */
9624 case LO_SUM:
9625 /* Store flag stores 0 or 1 */
9626 case LT: case LTU:
9627 case GT: case GTU:
9628 case LE: case LEU:
9629 case GE: case GEU:
9630 case EQ:
9631 case NE:
9632 return 1;
9633 case AND:
9634 {
9635 rtx op0 = XEXP (SET_SRC (pat), 0);
9636 rtx op1 = XEXP (SET_SRC (pat), 1);
9637 if (GET_CODE (op1) == CONST_INT)
9638 return INTVAL (op1) >= 0;
9639 if (GET_CODE (op0) != REG)
9640 return 0;
9641 if (sparc_check_64 (op0, insn) == 1)
9642 return 1;
9643 return (GET_CODE (op1) == REG && sparc_check_64 (op1, insn) == 1);
9644 }
9645 case IOR:
9646 case XOR:
9647 {
9648 rtx op0 = XEXP (SET_SRC (pat), 0);
9649 rtx op1 = XEXP (SET_SRC (pat), 1);
9650 if (GET_CODE (op0) != REG || sparc_check_64 (op0, insn) <= 0)
9651 return 0;
9652 if (GET_CODE (op1) == CONST_INT)
9653 return INTVAL (op1) >= 0;
9654 return (GET_CODE (op1) == REG && sparc_check_64 (op1, insn) == 1);
9655 }
9656 case LSHIFTRT:
9657 return GET_MODE (SET_SRC (pat)) == SImode;
9658 /* Positive integers leave the high bits zero. */
9659 case CONST_DOUBLE:
9660 return ! (CONST_DOUBLE_LOW (SET_SRC (pat)) & 0x80000000);
9661 case CONST_INT:
9662 return ! (INTVAL (SET_SRC (pat)) & 0x80000000);
9663 case ASHIFTRT:
9664 case SIGN_EXTEND:
9665 return - (GET_MODE (SET_SRC (pat)) == SImode);
9666 case REG:
9667 return sparc_check_64 (SET_SRC (pat), insn);
9668 default:
9669 return 0;
9670 }
9671 }
9672
9673 /* We _ought_ to have only one kind per function, but... */
9674 static GTY(()) rtx sparc_addr_diff_list;
9675 static GTY(()) rtx sparc_addr_list;
9676
9677 void
sparc_defer_case_vector(rtx lab,rtx vec,int diff)9678 sparc_defer_case_vector (rtx lab, rtx vec, int diff)
9679 {
9680 vec = gen_rtx_EXPR_LIST (VOIDmode, lab, vec);
9681 if (diff)
9682 sparc_addr_diff_list
9683 = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_diff_list);
9684 else
9685 sparc_addr_list = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_list);
9686 }
9687
9688 static void
sparc_output_addr_vec(rtx vec)9689 sparc_output_addr_vec (rtx vec)
9690 {
9691 rtx lab = XEXP (vec, 0), body = XEXP (vec, 1);
9692 int idx, vlen = XVECLEN (body, 0);
9693
9694 #ifdef ASM_OUTPUT_ADDR_VEC_START
9695 ASM_OUTPUT_ADDR_VEC_START (asm_out_file);
9696 #endif
9697
9698 #ifdef ASM_OUTPUT_CASE_LABEL
9699 ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab),
9700 NEXT_INSN (lab));
9701 #else
9702 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
9703 #endif
9704
9705 for (idx = 0; idx < vlen; idx++)
9706 {
9707 ASM_OUTPUT_ADDR_VEC_ELT
9708 (asm_out_file, CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 0, idx), 0)));
9709 }
9710
9711 #ifdef ASM_OUTPUT_ADDR_VEC_END
9712 ASM_OUTPUT_ADDR_VEC_END (asm_out_file);
9713 #endif
9714 }
9715
9716 static void
sparc_output_addr_diff_vec(rtx vec)9717 sparc_output_addr_diff_vec (rtx vec)
9718 {
9719 rtx lab = XEXP (vec, 0), body = XEXP (vec, 1);
9720 rtx base = XEXP (XEXP (body, 0), 0);
9721 int idx, vlen = XVECLEN (body, 1);
9722
9723 #ifdef ASM_OUTPUT_ADDR_VEC_START
9724 ASM_OUTPUT_ADDR_VEC_START (asm_out_file);
9725 #endif
9726
9727 #ifdef ASM_OUTPUT_CASE_LABEL
9728 ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab),
9729 NEXT_INSN (lab));
9730 #else
9731 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
9732 #endif
9733
9734 for (idx = 0; idx < vlen; idx++)
9735 {
9736 ASM_OUTPUT_ADDR_DIFF_ELT
9737 (asm_out_file,
9738 body,
9739 CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 1, idx), 0)),
9740 CODE_LABEL_NUMBER (base));
9741 }
9742
9743 #ifdef ASM_OUTPUT_ADDR_VEC_END
9744 ASM_OUTPUT_ADDR_VEC_END (asm_out_file);
9745 #endif
9746 }
9747
9748 static void
sparc_output_deferred_case_vectors(void)9749 sparc_output_deferred_case_vectors (void)
9750 {
9751 rtx t;
9752 int align;
9753
9754 if (sparc_addr_list == NULL_RTX
9755 && sparc_addr_diff_list == NULL_RTX)
9756 return;
9757
9758 /* Align to cache line in the function's code section. */
9759 switch_to_section (current_function_section ());
9760
9761 align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT);
9762 if (align > 0)
9763 ASM_OUTPUT_ALIGN (asm_out_file, align);
9764
9765 for (t = sparc_addr_list; t ; t = XEXP (t, 1))
9766 sparc_output_addr_vec (XEXP (t, 0));
9767 for (t = sparc_addr_diff_list; t ; t = XEXP (t, 1))
9768 sparc_output_addr_diff_vec (XEXP (t, 0));
9769
9770 sparc_addr_list = sparc_addr_diff_list = NULL_RTX;
9771 }
9772
9773 /* Return 0 if the high 32 bits of X (the low word of X, if DImode) are
9774 unknown. Return 1 if the high bits are zero, -1 if the register is
9775 sign extended. */
9776 int
sparc_check_64(rtx x,rtx insn)9777 sparc_check_64 (rtx x, rtx insn)
9778 {
9779 /* If a register is set only once it is safe to ignore insns this
9780 code does not know how to handle. The loop will either recognize
9781 the single set and return the correct value or fail to recognize
9782 it and return 0. */
9783 int set_once = 0;
9784 rtx y = x;
9785
9786 gcc_assert (GET_CODE (x) == REG);
9787
9788 if (GET_MODE (x) == DImode)
9789 y = gen_rtx_REG (SImode, REGNO (x) + WORDS_BIG_ENDIAN);
9790
9791 if (flag_expensive_optimizations
9792 && df && DF_REG_DEF_COUNT (REGNO (y)) == 1)
9793 set_once = 1;
9794
9795 if (insn == 0)
9796 {
9797 if (set_once)
9798 insn = get_last_insn_anywhere ();
9799 else
9800 return 0;
9801 }
9802
9803 while ((insn = PREV_INSN (insn)))
9804 {
9805 switch (GET_CODE (insn))
9806 {
9807 case JUMP_INSN:
9808 case NOTE:
9809 break;
9810 case CODE_LABEL:
9811 case CALL_INSN:
9812 default:
9813 if (! set_once)
9814 return 0;
9815 break;
9816 case INSN:
9817 {
9818 rtx pat = PATTERN (insn);
9819 if (GET_CODE (pat) != SET)
9820 return 0;
9821 if (rtx_equal_p (x, SET_DEST (pat)))
9822 return set_extends (insn);
9823 if (y && rtx_equal_p (y, SET_DEST (pat)))
9824 return set_extends (insn);
9825 if (reg_overlap_mentioned_p (SET_DEST (pat), y))
9826 return 0;
9827 }
9828 }
9829 }
9830 return 0;
9831 }
9832
9833 /* Output a wide shift instruction in V8+ mode. INSN is the instruction,
9834 OPERANDS are its operands and OPCODE is the mnemonic to be used. */
9835
9836 const char *
output_v8plus_shift(rtx insn,rtx * operands,const char * opcode)9837 output_v8plus_shift (rtx insn, rtx *operands, const char *opcode)
9838 {
9839 static char asm_code[60];
9840
9841 /* The scratch register is only required when the destination
9842 register is not a 64-bit global or out register. */
9843 if (which_alternative != 2)
9844 operands[3] = operands[0];
9845
9846 /* We can only shift by constants <= 63. */
9847 if (GET_CODE (operands[2]) == CONST_INT)
9848 operands[2] = GEN_INT (INTVAL (operands[2]) & 0x3f);
9849
9850 if (GET_CODE (operands[1]) == CONST_INT)
9851 {
9852 output_asm_insn ("mov\t%1, %3", operands);
9853 }
9854 else
9855 {
9856 output_asm_insn ("sllx\t%H1, 32, %3", operands);
9857 if (sparc_check_64 (operands[1], insn) <= 0)
9858 output_asm_insn ("srl\t%L1, 0, %L1", operands);
9859 output_asm_insn ("or\t%L1, %3, %3", operands);
9860 }
9861
9862 strcpy (asm_code, opcode);
9863
9864 if (which_alternative != 2)
9865 return strcat (asm_code, "\t%0, %2, %L0\n\tsrlx\t%L0, 32, %H0");
9866 else
9867 return
9868 strcat (asm_code, "\t%3, %2, %3\n\tsrlx\t%3, 32, %H0\n\tmov\t%3, %L0");
9869 }
9870
9871 /* Output rtl to increment the profiler label LABELNO
9872 for profiling a function entry. */
9873
9874 void
sparc_profile_hook(int labelno)9875 sparc_profile_hook (int labelno)
9876 {
9877 char buf[32];
9878 rtx lab, fun;
9879
9880 fun = gen_rtx_SYMBOL_REF (Pmode, MCOUNT_FUNCTION);
9881 if (NO_PROFILE_COUNTERS)
9882 {
9883 emit_library_call (fun, LCT_NORMAL, VOIDmode, 0);
9884 }
9885 else
9886 {
9887 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
9888 lab = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
9889 emit_library_call (fun, LCT_NORMAL, VOIDmode, 1, lab, Pmode);
9890 }
9891 }
9892
9893 #ifdef TARGET_SOLARIS
9894 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
9895
9896 static void
sparc_solaris_elf_asm_named_section(const char * name,unsigned int flags,tree decl ATTRIBUTE_UNUSED)9897 sparc_solaris_elf_asm_named_section (const char *name, unsigned int flags,
9898 tree decl ATTRIBUTE_UNUSED)
9899 {
9900 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
9901 {
9902 solaris_elf_asm_comdat_section (name, flags, decl);
9903 return;
9904 }
9905
9906 fprintf (asm_out_file, "\t.section\t\"%s\"", name);
9907
9908 if (!(flags & SECTION_DEBUG))
9909 fputs (",#alloc", asm_out_file);
9910 if (flags & SECTION_WRITE)
9911 fputs (",#write", asm_out_file);
9912 if (flags & SECTION_TLS)
9913 fputs (",#tls", asm_out_file);
9914 if (flags & SECTION_CODE)
9915 fputs (",#execinstr", asm_out_file);
9916
9917 /* Sun as only supports #nobits/#progbits since Solaris 10. */
9918 if (HAVE_AS_SPARC_NOBITS)
9919 {
9920 if (flags & SECTION_BSS)
9921 fputs (",#nobits", asm_out_file);
9922 else
9923 fputs (",#progbits", asm_out_file);
9924 }
9925
9926 fputc ('\n', asm_out_file);
9927 }
9928 #endif /* TARGET_SOLARIS */
9929
9930 /* We do not allow indirect calls to be optimized into sibling calls.
9931
9932 We cannot use sibling calls when delayed branches are disabled
9933 because they will likely require the call delay slot to be filled.
9934
9935 Also, on SPARC 32-bit we cannot emit a sibling call when the
9936 current function returns a structure. This is because the "unimp
9937 after call" convention would cause the callee to return to the
9938 wrong place. The generic code already disallows cases where the
9939 function being called returns a structure.
9940
9941 It may seem strange how this last case could occur. Usually there
9942 is code after the call which jumps to epilogue code which dumps the
9943 return value into the struct return area. That ought to invalidate
9944 the sibling call right? Well, in the C++ case we can end up passing
9945 the pointer to the struct return area to a constructor (which returns
9946 void) and then nothing else happens. Such a sibling call would look
9947 valid without the added check here.
9948
9949 VxWorks PIC PLT entries require the global pointer to be initialized
9950 on entry. We therefore can't emit sibling calls to them. */
9951 static bool
sparc_function_ok_for_sibcall(tree decl,tree exp ATTRIBUTE_UNUSED)9952 sparc_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
9953 {
9954 return (decl
9955 && flag_delayed_branch
9956 && (TARGET_ARCH64 || ! cfun->returns_struct)
9957 && !(TARGET_VXWORKS_RTP
9958 && flag_pic
9959 && !targetm.binds_local_p (decl)));
9960 }
9961
9962 /* libfunc renaming. */
9963
9964 static void
sparc_init_libfuncs(void)9965 sparc_init_libfuncs (void)
9966 {
9967 if (TARGET_ARCH32)
9968 {
9969 /* Use the subroutines that Sun's library provides for integer
9970 multiply and divide. The `*' prevents an underscore from
9971 being prepended by the compiler. .umul is a little faster
9972 than .mul. */
9973 set_optab_libfunc (smul_optab, SImode, "*.umul");
9974 set_optab_libfunc (sdiv_optab, SImode, "*.div");
9975 set_optab_libfunc (udiv_optab, SImode, "*.udiv");
9976 set_optab_libfunc (smod_optab, SImode, "*.rem");
9977 set_optab_libfunc (umod_optab, SImode, "*.urem");
9978
9979 /* TFmode arithmetic. These names are part of the SPARC 32bit ABI. */
9980 set_optab_libfunc (add_optab, TFmode, "_Q_add");
9981 set_optab_libfunc (sub_optab, TFmode, "_Q_sub");
9982 set_optab_libfunc (neg_optab, TFmode, "_Q_neg");
9983 set_optab_libfunc (smul_optab, TFmode, "_Q_mul");
9984 set_optab_libfunc (sdiv_optab, TFmode, "_Q_div");
9985
9986 /* We can define the TFmode sqrt optab only if TARGET_FPU. This
9987 is because with soft-float, the SFmode and DFmode sqrt
9988 instructions will be absent, and the compiler will notice and
9989 try to use the TFmode sqrt instruction for calls to the
9990 builtin function sqrt, but this fails. */
9991 if (TARGET_FPU)
9992 set_optab_libfunc (sqrt_optab, TFmode, "_Q_sqrt");
9993
9994 set_optab_libfunc (eq_optab, TFmode, "_Q_feq");
9995 set_optab_libfunc (ne_optab, TFmode, "_Q_fne");
9996 set_optab_libfunc (gt_optab, TFmode, "_Q_fgt");
9997 set_optab_libfunc (ge_optab, TFmode, "_Q_fge");
9998 set_optab_libfunc (lt_optab, TFmode, "_Q_flt");
9999 set_optab_libfunc (le_optab, TFmode, "_Q_fle");
10000
10001 set_conv_libfunc (sext_optab, TFmode, SFmode, "_Q_stoq");
10002 set_conv_libfunc (sext_optab, TFmode, DFmode, "_Q_dtoq");
10003 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_Q_qtos");
10004 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_Q_qtod");
10005
10006 set_conv_libfunc (sfix_optab, SImode, TFmode, "_Q_qtoi");
10007 set_conv_libfunc (ufix_optab, SImode, TFmode, "_Q_qtou");
10008 set_conv_libfunc (sfloat_optab, TFmode, SImode, "_Q_itoq");
10009 set_conv_libfunc (ufloat_optab, TFmode, SImode, "_Q_utoq");
10010
10011 if (DITF_CONVERSION_LIBFUNCS)
10012 {
10013 set_conv_libfunc (sfix_optab, DImode, TFmode, "_Q_qtoll");
10014 set_conv_libfunc (ufix_optab, DImode, TFmode, "_Q_qtoull");
10015 set_conv_libfunc (sfloat_optab, TFmode, DImode, "_Q_lltoq");
10016 set_conv_libfunc (ufloat_optab, TFmode, DImode, "_Q_ulltoq");
10017 }
10018
10019 if (SUN_CONVERSION_LIBFUNCS)
10020 {
10021 set_conv_libfunc (sfix_optab, DImode, SFmode, "__ftoll");
10022 set_conv_libfunc (ufix_optab, DImode, SFmode, "__ftoull");
10023 set_conv_libfunc (sfix_optab, DImode, DFmode, "__dtoll");
10024 set_conv_libfunc (ufix_optab, DImode, DFmode, "__dtoull");
10025 }
10026 }
10027 if (TARGET_ARCH64)
10028 {
10029 /* In the SPARC 64bit ABI, SImode multiply and divide functions
10030 do not exist in the library. Make sure the compiler does not
10031 emit calls to them by accident. (It should always use the
10032 hardware instructions.) */
10033 set_optab_libfunc (smul_optab, SImode, 0);
10034 set_optab_libfunc (sdiv_optab, SImode, 0);
10035 set_optab_libfunc (udiv_optab, SImode, 0);
10036 set_optab_libfunc (smod_optab, SImode, 0);
10037 set_optab_libfunc (umod_optab, SImode, 0);
10038
10039 if (SUN_INTEGER_MULTIPLY_64)
10040 {
10041 set_optab_libfunc (smul_optab, DImode, "__mul64");
10042 set_optab_libfunc (sdiv_optab, DImode, "__div64");
10043 set_optab_libfunc (udiv_optab, DImode, "__udiv64");
10044 set_optab_libfunc (smod_optab, DImode, "__rem64");
10045 set_optab_libfunc (umod_optab, DImode, "__urem64");
10046 }
10047
10048 if (SUN_CONVERSION_LIBFUNCS)
10049 {
10050 set_conv_libfunc (sfix_optab, DImode, SFmode, "__ftol");
10051 set_conv_libfunc (ufix_optab, DImode, SFmode, "__ftoul");
10052 set_conv_libfunc (sfix_optab, DImode, DFmode, "__dtol");
10053 set_conv_libfunc (ufix_optab, DImode, DFmode, "__dtoul");
10054 }
10055 }
10056 }
10057
10058 /* SPARC builtins. */
10059 enum sparc_builtins
10060 {
10061 /* FPU builtins. */
10062 SPARC_BUILTIN_LDFSR,
10063 SPARC_BUILTIN_STFSR,
10064
10065 /* VIS 1.0 builtins. */
10066 SPARC_BUILTIN_FPACK16,
10067 SPARC_BUILTIN_FPACK32,
10068 SPARC_BUILTIN_FPACKFIX,
10069 SPARC_BUILTIN_FEXPAND,
10070 SPARC_BUILTIN_FPMERGE,
10071 SPARC_BUILTIN_FMUL8X16,
10072 SPARC_BUILTIN_FMUL8X16AU,
10073 SPARC_BUILTIN_FMUL8X16AL,
10074 SPARC_BUILTIN_FMUL8SUX16,
10075 SPARC_BUILTIN_FMUL8ULX16,
10076 SPARC_BUILTIN_FMULD8SUX16,
10077 SPARC_BUILTIN_FMULD8ULX16,
10078 SPARC_BUILTIN_FALIGNDATAV4HI,
10079 SPARC_BUILTIN_FALIGNDATAV8QI,
10080 SPARC_BUILTIN_FALIGNDATAV2SI,
10081 SPARC_BUILTIN_FALIGNDATADI,
10082 SPARC_BUILTIN_WRGSR,
10083 SPARC_BUILTIN_RDGSR,
10084 SPARC_BUILTIN_ALIGNADDR,
10085 SPARC_BUILTIN_ALIGNADDRL,
10086 SPARC_BUILTIN_PDIST,
10087 SPARC_BUILTIN_EDGE8,
10088 SPARC_BUILTIN_EDGE8L,
10089 SPARC_BUILTIN_EDGE16,
10090 SPARC_BUILTIN_EDGE16L,
10091 SPARC_BUILTIN_EDGE32,
10092 SPARC_BUILTIN_EDGE32L,
10093 SPARC_BUILTIN_FCMPLE16,
10094 SPARC_BUILTIN_FCMPLE32,
10095 SPARC_BUILTIN_FCMPNE16,
10096 SPARC_BUILTIN_FCMPNE32,
10097 SPARC_BUILTIN_FCMPGT16,
10098 SPARC_BUILTIN_FCMPGT32,
10099 SPARC_BUILTIN_FCMPEQ16,
10100 SPARC_BUILTIN_FCMPEQ32,
10101 SPARC_BUILTIN_FPADD16,
10102 SPARC_BUILTIN_FPADD16S,
10103 SPARC_BUILTIN_FPADD32,
10104 SPARC_BUILTIN_FPADD32S,
10105 SPARC_BUILTIN_FPSUB16,
10106 SPARC_BUILTIN_FPSUB16S,
10107 SPARC_BUILTIN_FPSUB32,
10108 SPARC_BUILTIN_FPSUB32S,
10109 SPARC_BUILTIN_ARRAY8,
10110 SPARC_BUILTIN_ARRAY16,
10111 SPARC_BUILTIN_ARRAY32,
10112
10113 /* VIS 2.0 builtins. */
10114 SPARC_BUILTIN_EDGE8N,
10115 SPARC_BUILTIN_EDGE8LN,
10116 SPARC_BUILTIN_EDGE16N,
10117 SPARC_BUILTIN_EDGE16LN,
10118 SPARC_BUILTIN_EDGE32N,
10119 SPARC_BUILTIN_EDGE32LN,
10120 SPARC_BUILTIN_BMASK,
10121 SPARC_BUILTIN_BSHUFFLEV4HI,
10122 SPARC_BUILTIN_BSHUFFLEV8QI,
10123 SPARC_BUILTIN_BSHUFFLEV2SI,
10124 SPARC_BUILTIN_BSHUFFLEDI,
10125
10126 /* VIS 3.0 builtins. */
10127 SPARC_BUILTIN_CMASK8,
10128 SPARC_BUILTIN_CMASK16,
10129 SPARC_BUILTIN_CMASK32,
10130 SPARC_BUILTIN_FCHKSM16,
10131 SPARC_BUILTIN_FSLL16,
10132 SPARC_BUILTIN_FSLAS16,
10133 SPARC_BUILTIN_FSRL16,
10134 SPARC_BUILTIN_FSRA16,
10135 SPARC_BUILTIN_FSLL32,
10136 SPARC_BUILTIN_FSLAS32,
10137 SPARC_BUILTIN_FSRL32,
10138 SPARC_BUILTIN_FSRA32,
10139 SPARC_BUILTIN_PDISTN,
10140 SPARC_BUILTIN_FMEAN16,
10141 SPARC_BUILTIN_FPADD64,
10142 SPARC_BUILTIN_FPSUB64,
10143 SPARC_BUILTIN_FPADDS16,
10144 SPARC_BUILTIN_FPADDS16S,
10145 SPARC_BUILTIN_FPSUBS16,
10146 SPARC_BUILTIN_FPSUBS16S,
10147 SPARC_BUILTIN_FPADDS32,
10148 SPARC_BUILTIN_FPADDS32S,
10149 SPARC_BUILTIN_FPSUBS32,
10150 SPARC_BUILTIN_FPSUBS32S,
10151 SPARC_BUILTIN_FUCMPLE8,
10152 SPARC_BUILTIN_FUCMPNE8,
10153 SPARC_BUILTIN_FUCMPGT8,
10154 SPARC_BUILTIN_FUCMPEQ8,
10155 SPARC_BUILTIN_FHADDS,
10156 SPARC_BUILTIN_FHADDD,
10157 SPARC_BUILTIN_FHSUBS,
10158 SPARC_BUILTIN_FHSUBD,
10159 SPARC_BUILTIN_FNHADDS,
10160 SPARC_BUILTIN_FNHADDD,
10161 SPARC_BUILTIN_UMULXHI,
10162 SPARC_BUILTIN_XMULX,
10163 SPARC_BUILTIN_XMULXHI,
10164
10165 SPARC_BUILTIN_MAX
10166 };
10167
10168 static GTY (()) tree sparc_builtins[(int) SPARC_BUILTIN_MAX];
10169 static enum insn_code sparc_builtins_icode[(int) SPARC_BUILTIN_MAX];
10170
10171 /* Add a SPARC builtin function with NAME, ICODE, CODE and TYPE. Return the
10172 function decl or NULL_TREE if the builtin was not added. */
10173
10174 static tree
def_builtin(const char * name,enum insn_code icode,enum sparc_builtins code,tree type)10175 def_builtin (const char *name, enum insn_code icode, enum sparc_builtins code,
10176 tree type)
10177 {
10178 tree t
10179 = add_builtin_function (name, type, code, BUILT_IN_MD, NULL, NULL_TREE);
10180
10181 if (t)
10182 {
10183 sparc_builtins[code] = t;
10184 sparc_builtins_icode[code] = icode;
10185 }
10186
10187 return t;
10188 }
10189
10190 /* Likewise, but also marks the function as "const". */
10191
10192 static tree
def_builtin_const(const char * name,enum insn_code icode,enum sparc_builtins code,tree type)10193 def_builtin_const (const char *name, enum insn_code icode,
10194 enum sparc_builtins code, tree type)
10195 {
10196 tree t = def_builtin (name, icode, code, type);
10197
10198 if (t)
10199 TREE_READONLY (t) = 1;
10200
10201 return t;
10202 }
10203
10204 /* Implement the TARGET_INIT_BUILTINS target hook.
10205 Create builtin functions for special SPARC instructions. */
10206
10207 static void
sparc_init_builtins(void)10208 sparc_init_builtins (void)
10209 {
10210 if (TARGET_FPU)
10211 sparc_fpu_init_builtins ();
10212
10213 if (TARGET_VIS)
10214 sparc_vis_init_builtins ();
10215 }
10216
10217 /* Create builtin functions for FPU instructions. */
10218
10219 static void
sparc_fpu_init_builtins(void)10220 sparc_fpu_init_builtins (void)
10221 {
10222 tree ftype
10223 = build_function_type_list (void_type_node,
10224 build_pointer_type (unsigned_type_node), 0);
10225 def_builtin ("__builtin_load_fsr", CODE_FOR_ldfsr,
10226 SPARC_BUILTIN_LDFSR, ftype);
10227 def_builtin ("__builtin_store_fsr", CODE_FOR_stfsr,
10228 SPARC_BUILTIN_STFSR, ftype);
10229 }
10230
10231 /* Create builtin functions for VIS instructions. */
10232
10233 static void
sparc_vis_init_builtins(void)10234 sparc_vis_init_builtins (void)
10235 {
10236 tree v4qi = build_vector_type (unsigned_intQI_type_node, 4);
10237 tree v8qi = build_vector_type (unsigned_intQI_type_node, 8);
10238 tree v4hi = build_vector_type (intHI_type_node, 4);
10239 tree v2hi = build_vector_type (intHI_type_node, 2);
10240 tree v2si = build_vector_type (intSI_type_node, 2);
10241 tree v1si = build_vector_type (intSI_type_node, 1);
10242
10243 tree v4qi_ftype_v4hi = build_function_type_list (v4qi, v4hi, 0);
10244 tree v8qi_ftype_v2si_v8qi = build_function_type_list (v8qi, v2si, v8qi, 0);
10245 tree v2hi_ftype_v2si = build_function_type_list (v2hi, v2si, 0);
10246 tree v4hi_ftype_v4qi = build_function_type_list (v4hi, v4qi, 0);
10247 tree v8qi_ftype_v4qi_v4qi = build_function_type_list (v8qi, v4qi, v4qi, 0);
10248 tree v4hi_ftype_v4qi_v4hi = build_function_type_list (v4hi, v4qi, v4hi, 0);
10249 tree v4hi_ftype_v4qi_v2hi = build_function_type_list (v4hi, v4qi, v2hi, 0);
10250 tree v2si_ftype_v4qi_v2hi = build_function_type_list (v2si, v4qi, v2hi, 0);
10251 tree v4hi_ftype_v8qi_v4hi = build_function_type_list (v4hi, v8qi, v4hi, 0);
10252 tree v4hi_ftype_v4hi_v4hi = build_function_type_list (v4hi, v4hi, v4hi, 0);
10253 tree v2si_ftype_v2si_v2si = build_function_type_list (v2si, v2si, v2si, 0);
10254 tree v8qi_ftype_v8qi_v8qi = build_function_type_list (v8qi, v8qi, v8qi, 0);
10255 tree v2hi_ftype_v2hi_v2hi = build_function_type_list (v2hi, v2hi, v2hi, 0);
10256 tree v1si_ftype_v1si_v1si = build_function_type_list (v1si, v1si, v1si, 0);
10257 tree di_ftype_v8qi_v8qi_di = build_function_type_list (intDI_type_node,
10258 v8qi, v8qi,
10259 intDI_type_node, 0);
10260 tree di_ftype_v8qi_v8qi = build_function_type_list (intDI_type_node,
10261 v8qi, v8qi, 0);
10262 tree si_ftype_v8qi_v8qi = build_function_type_list (intSI_type_node,
10263 v8qi, v8qi, 0);
10264 tree di_ftype_di_di = build_function_type_list (intDI_type_node,
10265 intDI_type_node,
10266 intDI_type_node, 0);
10267 tree si_ftype_si_si = build_function_type_list (intSI_type_node,
10268 intSI_type_node,
10269 intSI_type_node, 0);
10270 tree ptr_ftype_ptr_si = build_function_type_list (ptr_type_node,
10271 ptr_type_node,
10272 intSI_type_node, 0);
10273 tree ptr_ftype_ptr_di = build_function_type_list (ptr_type_node,
10274 ptr_type_node,
10275 intDI_type_node, 0);
10276 tree si_ftype_ptr_ptr = build_function_type_list (intSI_type_node,
10277 ptr_type_node,
10278 ptr_type_node, 0);
10279 tree di_ftype_ptr_ptr = build_function_type_list (intDI_type_node,
10280 ptr_type_node,
10281 ptr_type_node, 0);
10282 tree si_ftype_v4hi_v4hi = build_function_type_list (intSI_type_node,
10283 v4hi, v4hi, 0);
10284 tree si_ftype_v2si_v2si = build_function_type_list (intSI_type_node,
10285 v2si, v2si, 0);
10286 tree di_ftype_v4hi_v4hi = build_function_type_list (intDI_type_node,
10287 v4hi, v4hi, 0);
10288 tree di_ftype_v2si_v2si = build_function_type_list (intDI_type_node,
10289 v2si, v2si, 0);
10290 tree void_ftype_di = build_function_type_list (void_type_node,
10291 intDI_type_node, 0);
10292 tree di_ftype_void = build_function_type_list (intDI_type_node,
10293 void_type_node, 0);
10294 tree void_ftype_si = build_function_type_list (void_type_node,
10295 intSI_type_node, 0);
10296 tree sf_ftype_sf_sf = build_function_type_list (float_type_node,
10297 float_type_node,
10298 float_type_node, 0);
10299 tree df_ftype_df_df = build_function_type_list (double_type_node,
10300 double_type_node,
10301 double_type_node, 0);
10302
10303 /* Packing and expanding vectors. */
10304 def_builtin ("__builtin_vis_fpack16", CODE_FOR_fpack16_vis,
10305 SPARC_BUILTIN_FPACK16, v4qi_ftype_v4hi);
10306 def_builtin ("__builtin_vis_fpack32", CODE_FOR_fpack32_vis,
10307 SPARC_BUILTIN_FPACK32, v8qi_ftype_v2si_v8qi);
10308 def_builtin ("__builtin_vis_fpackfix", CODE_FOR_fpackfix_vis,
10309 SPARC_BUILTIN_FPACKFIX, v2hi_ftype_v2si);
10310 def_builtin_const ("__builtin_vis_fexpand", CODE_FOR_fexpand_vis,
10311 SPARC_BUILTIN_FEXPAND, v4hi_ftype_v4qi);
10312 def_builtin_const ("__builtin_vis_fpmerge", CODE_FOR_fpmerge_vis,
10313 SPARC_BUILTIN_FPMERGE, v8qi_ftype_v4qi_v4qi);
10314
10315 /* Multiplications. */
10316 def_builtin_const ("__builtin_vis_fmul8x16", CODE_FOR_fmul8x16_vis,
10317 SPARC_BUILTIN_FMUL8X16, v4hi_ftype_v4qi_v4hi);
10318 def_builtin_const ("__builtin_vis_fmul8x16au", CODE_FOR_fmul8x16au_vis,
10319 SPARC_BUILTIN_FMUL8X16AU, v4hi_ftype_v4qi_v2hi);
10320 def_builtin_const ("__builtin_vis_fmul8x16al", CODE_FOR_fmul8x16al_vis,
10321 SPARC_BUILTIN_FMUL8X16AL, v4hi_ftype_v4qi_v2hi);
10322 def_builtin_const ("__builtin_vis_fmul8sux16", CODE_FOR_fmul8sux16_vis,
10323 SPARC_BUILTIN_FMUL8SUX16, v4hi_ftype_v8qi_v4hi);
10324 def_builtin_const ("__builtin_vis_fmul8ulx16", CODE_FOR_fmul8ulx16_vis,
10325 SPARC_BUILTIN_FMUL8ULX16, v4hi_ftype_v8qi_v4hi);
10326 def_builtin_const ("__builtin_vis_fmuld8sux16", CODE_FOR_fmuld8sux16_vis,
10327 SPARC_BUILTIN_FMULD8SUX16, v2si_ftype_v4qi_v2hi);
10328 def_builtin_const ("__builtin_vis_fmuld8ulx16", CODE_FOR_fmuld8ulx16_vis,
10329 SPARC_BUILTIN_FMULD8ULX16, v2si_ftype_v4qi_v2hi);
10330
10331 /* Data aligning. */
10332 def_builtin ("__builtin_vis_faligndatav4hi", CODE_FOR_faligndatav4hi_vis,
10333 SPARC_BUILTIN_FALIGNDATAV4HI, v4hi_ftype_v4hi_v4hi);
10334 def_builtin ("__builtin_vis_faligndatav8qi", CODE_FOR_faligndatav8qi_vis,
10335 SPARC_BUILTIN_FALIGNDATAV8QI, v8qi_ftype_v8qi_v8qi);
10336 def_builtin ("__builtin_vis_faligndatav2si", CODE_FOR_faligndatav2si_vis,
10337 SPARC_BUILTIN_FALIGNDATAV2SI, v2si_ftype_v2si_v2si);
10338 def_builtin ("__builtin_vis_faligndatadi", CODE_FOR_faligndatav1di_vis,
10339 SPARC_BUILTIN_FALIGNDATADI, di_ftype_di_di);
10340
10341 def_builtin ("__builtin_vis_write_gsr", CODE_FOR_wrgsr_vis,
10342 SPARC_BUILTIN_WRGSR, void_ftype_di);
10343 def_builtin ("__builtin_vis_read_gsr", CODE_FOR_rdgsr_vis,
10344 SPARC_BUILTIN_RDGSR, di_ftype_void);
10345
10346 if (TARGET_ARCH64)
10347 {
10348 def_builtin ("__builtin_vis_alignaddr", CODE_FOR_alignaddrdi_vis,
10349 SPARC_BUILTIN_ALIGNADDR, ptr_ftype_ptr_di);
10350 def_builtin ("__builtin_vis_alignaddrl", CODE_FOR_alignaddrldi_vis,
10351 SPARC_BUILTIN_ALIGNADDRL, ptr_ftype_ptr_di);
10352 }
10353 else
10354 {
10355 def_builtin ("__builtin_vis_alignaddr", CODE_FOR_alignaddrsi_vis,
10356 SPARC_BUILTIN_ALIGNADDR, ptr_ftype_ptr_si);
10357 def_builtin ("__builtin_vis_alignaddrl", CODE_FOR_alignaddrlsi_vis,
10358 SPARC_BUILTIN_ALIGNADDRL, ptr_ftype_ptr_si);
10359 }
10360
10361 /* Pixel distance. */
10362 def_builtin_const ("__builtin_vis_pdist", CODE_FOR_pdist_vis,
10363 SPARC_BUILTIN_PDIST, di_ftype_v8qi_v8qi_di);
10364
10365 /* Edge handling. */
10366 if (TARGET_ARCH64)
10367 {
10368 def_builtin_const ("__builtin_vis_edge8", CODE_FOR_edge8di_vis,
10369 SPARC_BUILTIN_EDGE8, di_ftype_ptr_ptr);
10370 def_builtin_const ("__builtin_vis_edge8l", CODE_FOR_edge8ldi_vis,
10371 SPARC_BUILTIN_EDGE8L, di_ftype_ptr_ptr);
10372 def_builtin_const ("__builtin_vis_edge16", CODE_FOR_edge16di_vis,
10373 SPARC_BUILTIN_EDGE16, di_ftype_ptr_ptr);
10374 def_builtin_const ("__builtin_vis_edge16l", CODE_FOR_edge16ldi_vis,
10375 SPARC_BUILTIN_EDGE16L, di_ftype_ptr_ptr);
10376 def_builtin_const ("__builtin_vis_edge32", CODE_FOR_edge32di_vis,
10377 SPARC_BUILTIN_EDGE32, di_ftype_ptr_ptr);
10378 def_builtin_const ("__builtin_vis_edge32l", CODE_FOR_edge32ldi_vis,
10379 SPARC_BUILTIN_EDGE32L, di_ftype_ptr_ptr);
10380 }
10381 else
10382 {
10383 def_builtin_const ("__builtin_vis_edge8", CODE_FOR_edge8si_vis,
10384 SPARC_BUILTIN_EDGE8, si_ftype_ptr_ptr);
10385 def_builtin_const ("__builtin_vis_edge8l", CODE_FOR_edge8lsi_vis,
10386 SPARC_BUILTIN_EDGE8L, si_ftype_ptr_ptr);
10387 def_builtin_const ("__builtin_vis_edge16", CODE_FOR_edge16si_vis,
10388 SPARC_BUILTIN_EDGE16, si_ftype_ptr_ptr);
10389 def_builtin_const ("__builtin_vis_edge16l", CODE_FOR_edge16lsi_vis,
10390 SPARC_BUILTIN_EDGE16L, si_ftype_ptr_ptr);
10391 def_builtin_const ("__builtin_vis_edge32", CODE_FOR_edge32si_vis,
10392 SPARC_BUILTIN_EDGE32, si_ftype_ptr_ptr);
10393 def_builtin_const ("__builtin_vis_edge32l", CODE_FOR_edge32lsi_vis,
10394 SPARC_BUILTIN_EDGE32L, si_ftype_ptr_ptr);
10395 }
10396
10397 /* Pixel compare. */
10398 if (TARGET_ARCH64)
10399 {
10400 def_builtin_const ("__builtin_vis_fcmple16", CODE_FOR_fcmple16di_vis,
10401 SPARC_BUILTIN_FCMPLE16, di_ftype_v4hi_v4hi);
10402 def_builtin_const ("__builtin_vis_fcmple32", CODE_FOR_fcmple32di_vis,
10403 SPARC_BUILTIN_FCMPLE32, di_ftype_v2si_v2si);
10404 def_builtin_const ("__builtin_vis_fcmpne16", CODE_FOR_fcmpne16di_vis,
10405 SPARC_BUILTIN_FCMPNE16, di_ftype_v4hi_v4hi);
10406 def_builtin_const ("__builtin_vis_fcmpne32", CODE_FOR_fcmpne32di_vis,
10407 SPARC_BUILTIN_FCMPNE32, di_ftype_v2si_v2si);
10408 def_builtin_const ("__builtin_vis_fcmpgt16", CODE_FOR_fcmpgt16di_vis,
10409 SPARC_BUILTIN_FCMPGT16, di_ftype_v4hi_v4hi);
10410 def_builtin_const ("__builtin_vis_fcmpgt32", CODE_FOR_fcmpgt32di_vis,
10411 SPARC_BUILTIN_FCMPGT32, di_ftype_v2si_v2si);
10412 def_builtin_const ("__builtin_vis_fcmpeq16", CODE_FOR_fcmpeq16di_vis,
10413 SPARC_BUILTIN_FCMPEQ16, di_ftype_v4hi_v4hi);
10414 def_builtin_const ("__builtin_vis_fcmpeq32", CODE_FOR_fcmpeq32di_vis,
10415 SPARC_BUILTIN_FCMPEQ32, di_ftype_v2si_v2si);
10416 }
10417 else
10418 {
10419 def_builtin_const ("__builtin_vis_fcmple16", CODE_FOR_fcmple16si_vis,
10420 SPARC_BUILTIN_FCMPLE16, si_ftype_v4hi_v4hi);
10421 def_builtin_const ("__builtin_vis_fcmple32", CODE_FOR_fcmple32si_vis,
10422 SPARC_BUILTIN_FCMPLE32, si_ftype_v2si_v2si);
10423 def_builtin_const ("__builtin_vis_fcmpne16", CODE_FOR_fcmpne16si_vis,
10424 SPARC_BUILTIN_FCMPNE16, si_ftype_v4hi_v4hi);
10425 def_builtin_const ("__builtin_vis_fcmpne32", CODE_FOR_fcmpne32si_vis,
10426 SPARC_BUILTIN_FCMPNE32, si_ftype_v2si_v2si);
10427 def_builtin_const ("__builtin_vis_fcmpgt16", CODE_FOR_fcmpgt16si_vis,
10428 SPARC_BUILTIN_FCMPGT16, si_ftype_v4hi_v4hi);
10429 def_builtin_const ("__builtin_vis_fcmpgt32", CODE_FOR_fcmpgt32si_vis,
10430 SPARC_BUILTIN_FCMPGT32, si_ftype_v2si_v2si);
10431 def_builtin_const ("__builtin_vis_fcmpeq16", CODE_FOR_fcmpeq16si_vis,
10432 SPARC_BUILTIN_FCMPEQ16, si_ftype_v4hi_v4hi);
10433 def_builtin_const ("__builtin_vis_fcmpeq32", CODE_FOR_fcmpeq32si_vis,
10434 SPARC_BUILTIN_FCMPEQ32, si_ftype_v2si_v2si);
10435 }
10436
10437 /* Addition and subtraction. */
10438 def_builtin_const ("__builtin_vis_fpadd16", CODE_FOR_addv4hi3,
10439 SPARC_BUILTIN_FPADD16, v4hi_ftype_v4hi_v4hi);
10440 def_builtin_const ("__builtin_vis_fpadd16s", CODE_FOR_addv2hi3,
10441 SPARC_BUILTIN_FPADD16S, v2hi_ftype_v2hi_v2hi);
10442 def_builtin_const ("__builtin_vis_fpadd32", CODE_FOR_addv2si3,
10443 SPARC_BUILTIN_FPADD32, v2si_ftype_v2si_v2si);
10444 def_builtin_const ("__builtin_vis_fpadd32s", CODE_FOR_addv1si3,
10445 SPARC_BUILTIN_FPADD32S, v1si_ftype_v1si_v1si);
10446 def_builtin_const ("__builtin_vis_fpsub16", CODE_FOR_subv4hi3,
10447 SPARC_BUILTIN_FPSUB16, v4hi_ftype_v4hi_v4hi);
10448 def_builtin_const ("__builtin_vis_fpsub16s", CODE_FOR_subv2hi3,
10449 SPARC_BUILTIN_FPSUB16S, v2hi_ftype_v2hi_v2hi);
10450 def_builtin_const ("__builtin_vis_fpsub32", CODE_FOR_subv2si3,
10451 SPARC_BUILTIN_FPSUB32, v2si_ftype_v2si_v2si);
10452 def_builtin_const ("__builtin_vis_fpsub32s", CODE_FOR_subv1si3,
10453 SPARC_BUILTIN_FPSUB32S, v1si_ftype_v1si_v1si);
10454
10455 /* Three-dimensional array addressing. */
10456 if (TARGET_ARCH64)
10457 {
10458 def_builtin_const ("__builtin_vis_array8", CODE_FOR_array8di_vis,
10459 SPARC_BUILTIN_ARRAY8, di_ftype_di_di);
10460 def_builtin_const ("__builtin_vis_array16", CODE_FOR_array16di_vis,
10461 SPARC_BUILTIN_ARRAY16, di_ftype_di_di);
10462 def_builtin_const ("__builtin_vis_array32", CODE_FOR_array32di_vis,
10463 SPARC_BUILTIN_ARRAY32, di_ftype_di_di);
10464 }
10465 else
10466 {
10467 def_builtin_const ("__builtin_vis_array8", CODE_FOR_array8si_vis,
10468 SPARC_BUILTIN_ARRAY8, si_ftype_si_si);
10469 def_builtin_const ("__builtin_vis_array16", CODE_FOR_array16si_vis,
10470 SPARC_BUILTIN_ARRAY16, si_ftype_si_si);
10471 def_builtin_const ("__builtin_vis_array32", CODE_FOR_array32si_vis,
10472 SPARC_BUILTIN_ARRAY32, si_ftype_si_si);
10473 }
10474
10475 if (TARGET_VIS2)
10476 {
10477 /* Edge handling. */
10478 if (TARGET_ARCH64)
10479 {
10480 def_builtin_const ("__builtin_vis_edge8n", CODE_FOR_edge8ndi_vis,
10481 SPARC_BUILTIN_EDGE8N, di_ftype_ptr_ptr);
10482 def_builtin_const ("__builtin_vis_edge8ln", CODE_FOR_edge8lndi_vis,
10483 SPARC_BUILTIN_EDGE8LN, di_ftype_ptr_ptr);
10484 def_builtin_const ("__builtin_vis_edge16n", CODE_FOR_edge16ndi_vis,
10485 SPARC_BUILTIN_EDGE16N, di_ftype_ptr_ptr);
10486 def_builtin_const ("__builtin_vis_edge16ln", CODE_FOR_edge16lndi_vis,
10487 SPARC_BUILTIN_EDGE16LN, di_ftype_ptr_ptr);
10488 def_builtin_const ("__builtin_vis_edge32n", CODE_FOR_edge32ndi_vis,
10489 SPARC_BUILTIN_EDGE32N, di_ftype_ptr_ptr);
10490 def_builtin_const ("__builtin_vis_edge32ln", CODE_FOR_edge32lndi_vis,
10491 SPARC_BUILTIN_EDGE32LN, di_ftype_ptr_ptr);
10492 }
10493 else
10494 {
10495 def_builtin_const ("__builtin_vis_edge8n", CODE_FOR_edge8nsi_vis,
10496 SPARC_BUILTIN_EDGE8N, si_ftype_ptr_ptr);
10497 def_builtin_const ("__builtin_vis_edge8ln", CODE_FOR_edge8lnsi_vis,
10498 SPARC_BUILTIN_EDGE8LN, si_ftype_ptr_ptr);
10499 def_builtin_const ("__builtin_vis_edge16n", CODE_FOR_edge16nsi_vis,
10500 SPARC_BUILTIN_EDGE16N, si_ftype_ptr_ptr);
10501 def_builtin_const ("__builtin_vis_edge16ln", CODE_FOR_edge16lnsi_vis,
10502 SPARC_BUILTIN_EDGE16LN, si_ftype_ptr_ptr);
10503 def_builtin_const ("__builtin_vis_edge32n", CODE_FOR_edge32nsi_vis,
10504 SPARC_BUILTIN_EDGE32N, si_ftype_ptr_ptr);
10505 def_builtin_const ("__builtin_vis_edge32ln", CODE_FOR_edge32lnsi_vis,
10506 SPARC_BUILTIN_EDGE32LN, si_ftype_ptr_ptr);
10507 }
10508
10509 /* Byte mask and shuffle. */
10510 if (TARGET_ARCH64)
10511 def_builtin ("__builtin_vis_bmask", CODE_FOR_bmaskdi_vis,
10512 SPARC_BUILTIN_BMASK, di_ftype_di_di);
10513 else
10514 def_builtin ("__builtin_vis_bmask", CODE_FOR_bmasksi_vis,
10515 SPARC_BUILTIN_BMASK, si_ftype_si_si);
10516 def_builtin ("__builtin_vis_bshufflev4hi", CODE_FOR_bshufflev4hi_vis,
10517 SPARC_BUILTIN_BSHUFFLEV4HI, v4hi_ftype_v4hi_v4hi);
10518 def_builtin ("__builtin_vis_bshufflev8qi", CODE_FOR_bshufflev8qi_vis,
10519 SPARC_BUILTIN_BSHUFFLEV8QI, v8qi_ftype_v8qi_v8qi);
10520 def_builtin ("__builtin_vis_bshufflev2si", CODE_FOR_bshufflev2si_vis,
10521 SPARC_BUILTIN_BSHUFFLEV2SI, v2si_ftype_v2si_v2si);
10522 def_builtin ("__builtin_vis_bshuffledi", CODE_FOR_bshufflev1di_vis,
10523 SPARC_BUILTIN_BSHUFFLEDI, di_ftype_di_di);
10524 }
10525
10526 if (TARGET_VIS3)
10527 {
10528 if (TARGET_ARCH64)
10529 {
10530 def_builtin ("__builtin_vis_cmask8", CODE_FOR_cmask8di_vis,
10531 SPARC_BUILTIN_CMASK8, void_ftype_di);
10532 def_builtin ("__builtin_vis_cmask16", CODE_FOR_cmask16di_vis,
10533 SPARC_BUILTIN_CMASK16, void_ftype_di);
10534 def_builtin ("__builtin_vis_cmask32", CODE_FOR_cmask32di_vis,
10535 SPARC_BUILTIN_CMASK32, void_ftype_di);
10536 }
10537 else
10538 {
10539 def_builtin ("__builtin_vis_cmask8", CODE_FOR_cmask8si_vis,
10540 SPARC_BUILTIN_CMASK8, void_ftype_si);
10541 def_builtin ("__builtin_vis_cmask16", CODE_FOR_cmask16si_vis,
10542 SPARC_BUILTIN_CMASK16, void_ftype_si);
10543 def_builtin ("__builtin_vis_cmask32", CODE_FOR_cmask32si_vis,
10544 SPARC_BUILTIN_CMASK32, void_ftype_si);
10545 }
10546
10547 def_builtin_const ("__builtin_vis_fchksm16", CODE_FOR_fchksm16_vis,
10548 SPARC_BUILTIN_FCHKSM16, v4hi_ftype_v4hi_v4hi);
10549
10550 def_builtin_const ("__builtin_vis_fsll16", CODE_FOR_vashlv4hi3,
10551 SPARC_BUILTIN_FSLL16, v4hi_ftype_v4hi_v4hi);
10552 def_builtin_const ("__builtin_vis_fslas16", CODE_FOR_vssashlv4hi3,
10553 SPARC_BUILTIN_FSLAS16, v4hi_ftype_v4hi_v4hi);
10554 def_builtin_const ("__builtin_vis_fsrl16", CODE_FOR_vlshrv4hi3,
10555 SPARC_BUILTIN_FSRL16, v4hi_ftype_v4hi_v4hi);
10556 def_builtin_const ("__builtin_vis_fsra16", CODE_FOR_vashrv4hi3,
10557 SPARC_BUILTIN_FSRA16, v4hi_ftype_v4hi_v4hi);
10558 def_builtin_const ("__builtin_vis_fsll32", CODE_FOR_vashlv2si3,
10559 SPARC_BUILTIN_FSLL32, v2si_ftype_v2si_v2si);
10560 def_builtin_const ("__builtin_vis_fslas32", CODE_FOR_vssashlv2si3,
10561 SPARC_BUILTIN_FSLAS32, v2si_ftype_v2si_v2si);
10562 def_builtin_const ("__builtin_vis_fsrl32", CODE_FOR_vlshrv2si3,
10563 SPARC_BUILTIN_FSRL32, v2si_ftype_v2si_v2si);
10564 def_builtin_const ("__builtin_vis_fsra32", CODE_FOR_vashrv2si3,
10565 SPARC_BUILTIN_FSRA32, v2si_ftype_v2si_v2si);
10566
10567 if (TARGET_ARCH64)
10568 def_builtin_const ("__builtin_vis_pdistn", CODE_FOR_pdistndi_vis,
10569 SPARC_BUILTIN_PDISTN, di_ftype_v8qi_v8qi);
10570 else
10571 def_builtin_const ("__builtin_vis_pdistn", CODE_FOR_pdistnsi_vis,
10572 SPARC_BUILTIN_PDISTN, si_ftype_v8qi_v8qi);
10573
10574 def_builtin_const ("__builtin_vis_fmean16", CODE_FOR_fmean16_vis,
10575 SPARC_BUILTIN_FMEAN16, v4hi_ftype_v4hi_v4hi);
10576 def_builtin_const ("__builtin_vis_fpadd64", CODE_FOR_fpadd64_vis,
10577 SPARC_BUILTIN_FPADD64, di_ftype_di_di);
10578 def_builtin_const ("__builtin_vis_fpsub64", CODE_FOR_fpsub64_vis,
10579 SPARC_BUILTIN_FPSUB64, di_ftype_di_di);
10580
10581 def_builtin_const ("__builtin_vis_fpadds16", CODE_FOR_ssaddv4hi3,
10582 SPARC_BUILTIN_FPADDS16, v4hi_ftype_v4hi_v4hi);
10583 def_builtin_const ("__builtin_vis_fpadds16s", CODE_FOR_ssaddv2hi3,
10584 SPARC_BUILTIN_FPADDS16S, v2hi_ftype_v2hi_v2hi);
10585 def_builtin_const ("__builtin_vis_fpsubs16", CODE_FOR_sssubv4hi3,
10586 SPARC_BUILTIN_FPSUBS16, v4hi_ftype_v4hi_v4hi);
10587 def_builtin_const ("__builtin_vis_fpsubs16s", CODE_FOR_sssubv2hi3,
10588 SPARC_BUILTIN_FPSUBS16S, v2hi_ftype_v2hi_v2hi);
10589 def_builtin_const ("__builtin_vis_fpadds32", CODE_FOR_ssaddv2si3,
10590 SPARC_BUILTIN_FPADDS32, v2si_ftype_v2si_v2si);
10591 def_builtin_const ("__builtin_vis_fpadds32s", CODE_FOR_ssaddv1si3,
10592 SPARC_BUILTIN_FPADDS32S, v1si_ftype_v1si_v1si);
10593 def_builtin_const ("__builtin_vis_fpsubs32", CODE_FOR_sssubv2si3,
10594 SPARC_BUILTIN_FPSUBS32, v2si_ftype_v2si_v2si);
10595 def_builtin_const ("__builtin_vis_fpsubs32s", CODE_FOR_sssubv1si3,
10596 SPARC_BUILTIN_FPSUBS32S, v1si_ftype_v1si_v1si);
10597
10598 if (TARGET_ARCH64)
10599 {
10600 def_builtin_const ("__builtin_vis_fucmple8", CODE_FOR_fucmple8di_vis,
10601 SPARC_BUILTIN_FUCMPLE8, di_ftype_v8qi_v8qi);
10602 def_builtin_const ("__builtin_vis_fucmpne8", CODE_FOR_fucmpne8di_vis,
10603 SPARC_BUILTIN_FUCMPNE8, di_ftype_v8qi_v8qi);
10604 def_builtin_const ("__builtin_vis_fucmpgt8", CODE_FOR_fucmpgt8di_vis,
10605 SPARC_BUILTIN_FUCMPGT8, di_ftype_v8qi_v8qi);
10606 def_builtin_const ("__builtin_vis_fucmpeq8", CODE_FOR_fucmpeq8di_vis,
10607 SPARC_BUILTIN_FUCMPEQ8, di_ftype_v8qi_v8qi);
10608 }
10609 else
10610 {
10611 def_builtin_const ("__builtin_vis_fucmple8", CODE_FOR_fucmple8si_vis,
10612 SPARC_BUILTIN_FUCMPLE8, si_ftype_v8qi_v8qi);
10613 def_builtin_const ("__builtin_vis_fucmpne8", CODE_FOR_fucmpne8si_vis,
10614 SPARC_BUILTIN_FUCMPNE8, si_ftype_v8qi_v8qi);
10615 def_builtin_const ("__builtin_vis_fucmpgt8", CODE_FOR_fucmpgt8si_vis,
10616 SPARC_BUILTIN_FUCMPGT8, si_ftype_v8qi_v8qi);
10617 def_builtin_const ("__builtin_vis_fucmpeq8", CODE_FOR_fucmpeq8si_vis,
10618 SPARC_BUILTIN_FUCMPEQ8, si_ftype_v8qi_v8qi);
10619 }
10620
10621 def_builtin_const ("__builtin_vis_fhadds", CODE_FOR_fhaddsf_vis,
10622 SPARC_BUILTIN_FHADDS, sf_ftype_sf_sf);
10623 def_builtin_const ("__builtin_vis_fhaddd", CODE_FOR_fhadddf_vis,
10624 SPARC_BUILTIN_FHADDD, df_ftype_df_df);
10625 def_builtin_const ("__builtin_vis_fhsubs", CODE_FOR_fhsubsf_vis,
10626 SPARC_BUILTIN_FHSUBS, sf_ftype_sf_sf);
10627 def_builtin_const ("__builtin_vis_fhsubd", CODE_FOR_fhsubdf_vis,
10628 SPARC_BUILTIN_FHSUBD, df_ftype_df_df);
10629 def_builtin_const ("__builtin_vis_fnhadds", CODE_FOR_fnhaddsf_vis,
10630 SPARC_BUILTIN_FNHADDS, sf_ftype_sf_sf);
10631 def_builtin_const ("__builtin_vis_fnhaddd", CODE_FOR_fnhadddf_vis,
10632 SPARC_BUILTIN_FNHADDD, df_ftype_df_df);
10633
10634 def_builtin_const ("__builtin_vis_umulxhi", CODE_FOR_umulxhi_vis,
10635 SPARC_BUILTIN_UMULXHI, di_ftype_di_di);
10636 def_builtin_const ("__builtin_vis_xmulx", CODE_FOR_xmulx_vis,
10637 SPARC_BUILTIN_XMULX, di_ftype_di_di);
10638 def_builtin_const ("__builtin_vis_xmulxhi", CODE_FOR_xmulxhi_vis,
10639 SPARC_BUILTIN_XMULXHI, di_ftype_di_di);
10640 }
10641 }
10642
10643 /* Implement TARGET_BUILTIN_DECL hook. */
10644
10645 static tree
sparc_builtin_decl(unsigned code,bool initialize_p ATTRIBUTE_UNUSED)10646 sparc_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
10647 {
10648 if (code >= SPARC_BUILTIN_MAX)
10649 return error_mark_node;
10650
10651 return sparc_builtins[code];
10652 }
10653
10654 /* Implemented TARGET_EXPAND_BUILTIN hook. */
10655
10656 static rtx
sparc_expand_builtin(tree exp,rtx target,rtx subtarget ATTRIBUTE_UNUSED,enum machine_mode tmode ATTRIBUTE_UNUSED,int ignore ATTRIBUTE_UNUSED)10657 sparc_expand_builtin (tree exp, rtx target,
10658 rtx subtarget ATTRIBUTE_UNUSED,
10659 enum machine_mode tmode ATTRIBUTE_UNUSED,
10660 int ignore ATTRIBUTE_UNUSED)
10661 {
10662 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
10663 enum sparc_builtins code = (enum sparc_builtins) DECL_FUNCTION_CODE (fndecl);
10664 enum insn_code icode = sparc_builtins_icode[code];
10665 bool nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
10666 call_expr_arg_iterator iter;
10667 int arg_count = 0;
10668 rtx pat, op[4];
10669 tree arg;
10670
10671 if (nonvoid)
10672 {
10673 enum machine_mode tmode = insn_data[icode].operand[0].mode;
10674 if (!target
10675 || GET_MODE (target) != tmode
10676 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
10677 op[0] = gen_reg_rtx (tmode);
10678 else
10679 op[0] = target;
10680 }
10681
10682 FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
10683 {
10684 const struct insn_operand_data *insn_op;
10685 int idx;
10686
10687 if (arg == error_mark_node)
10688 return NULL_RTX;
10689
10690 arg_count++;
10691 idx = arg_count - !nonvoid;
10692 insn_op = &insn_data[icode].operand[idx];
10693 op[arg_count] = expand_normal (arg);
10694
10695 if (code == SPARC_BUILTIN_LDFSR || code == SPARC_BUILTIN_STFSR)
10696 {
10697 if (!address_operand (op[arg_count], SImode))
10698 {
10699 op[arg_count] = convert_memory_address (Pmode, op[arg_count]);
10700 op[arg_count] = copy_addr_to_reg (op[arg_count]);
10701 }
10702 op[arg_count] = gen_rtx_MEM (SImode, op[arg_count]);
10703 }
10704
10705 else if (insn_op->mode == V1DImode
10706 && GET_MODE (op[arg_count]) == DImode)
10707 op[arg_count] = gen_lowpart (V1DImode, op[arg_count]);
10708
10709 else if (insn_op->mode == V1SImode
10710 && GET_MODE (op[arg_count]) == SImode)
10711 op[arg_count] = gen_lowpart (V1SImode, op[arg_count]);
10712
10713 if (! (*insn_data[icode].operand[idx].predicate) (op[arg_count],
10714 insn_op->mode))
10715 op[arg_count] = copy_to_mode_reg (insn_op->mode, op[arg_count]);
10716 }
10717
10718 switch (arg_count)
10719 {
10720 case 0:
10721 pat = GEN_FCN (icode) (op[0]);
10722 break;
10723 case 1:
10724 if (nonvoid)
10725 pat = GEN_FCN (icode) (op[0], op[1]);
10726 else
10727 pat = GEN_FCN (icode) (op[1]);
10728 break;
10729 case 2:
10730 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
10731 break;
10732 case 3:
10733 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
10734 break;
10735 default:
10736 gcc_unreachable ();
10737 }
10738
10739 if (!pat)
10740 return NULL_RTX;
10741
10742 emit_insn (pat);
10743
10744 return (nonvoid ? op[0] : const0_rtx);
10745 }
10746
10747 /* Return the upper 16 bits of the 8x16 multiplication. */
10748
10749 static int
sparc_vis_mul8x16(int e8,int e16)10750 sparc_vis_mul8x16 (int e8, int e16)
10751 {
10752 return (e8 * e16 + 128) / 256;
10753 }
10754
10755 /* Multiply the VECTOR_CSTs CST0 and CST1 as specified by FNCODE and put
10756 the result into the array N_ELTS, whose elements are of INNER_TYPE. */
10757
10758 static void
sparc_handle_vis_mul8x16(tree * n_elts,enum sparc_builtins fncode,tree inner_type,tree cst0,tree cst1)10759 sparc_handle_vis_mul8x16 (tree *n_elts, enum sparc_builtins fncode,
10760 tree inner_type, tree cst0, tree cst1)
10761 {
10762 unsigned i, num = VECTOR_CST_NELTS (cst0);
10763 int scale;
10764
10765 switch (fncode)
10766 {
10767 case SPARC_BUILTIN_FMUL8X16:
10768 for (i = 0; i < num; ++i)
10769 {
10770 int val
10771 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
10772 TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, i)));
10773 n_elts[i] = build_int_cst (inner_type, val);
10774 }
10775 break;
10776
10777 case SPARC_BUILTIN_FMUL8X16AU:
10778 scale = TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, 0));
10779
10780 for (i = 0; i < num; ++i)
10781 {
10782 int val
10783 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
10784 scale);
10785 n_elts[i] = build_int_cst (inner_type, val);
10786 }
10787 break;
10788
10789 case SPARC_BUILTIN_FMUL8X16AL:
10790 scale = TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, 1));
10791
10792 for (i = 0; i < num; ++i)
10793 {
10794 int val
10795 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
10796 scale);
10797 n_elts[i] = build_int_cst (inner_type, val);
10798 }
10799 break;
10800
10801 default:
10802 gcc_unreachable ();
10803 }
10804 }
10805
10806 /* Implement TARGET_FOLD_BUILTIN hook.
10807
10808 Fold builtin functions for SPARC intrinsics. If IGNORE is true the
10809 result of the function call is ignored. NULL_TREE is returned if the
10810 function could not be folded. */
10811
10812 static tree
sparc_fold_builtin(tree fndecl,int n_args ATTRIBUTE_UNUSED,tree * args,bool ignore)10813 sparc_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED,
10814 tree *args, bool ignore)
10815 {
10816 enum sparc_builtins code = (enum sparc_builtins) DECL_FUNCTION_CODE (fndecl);
10817 tree rtype = TREE_TYPE (TREE_TYPE (fndecl));
10818 tree arg0, arg1, arg2;
10819
10820 if (ignore)
10821 switch (code)
10822 {
10823 case SPARC_BUILTIN_LDFSR:
10824 case SPARC_BUILTIN_STFSR:
10825 case SPARC_BUILTIN_ALIGNADDR:
10826 case SPARC_BUILTIN_WRGSR:
10827 case SPARC_BUILTIN_BMASK:
10828 case SPARC_BUILTIN_CMASK8:
10829 case SPARC_BUILTIN_CMASK16:
10830 case SPARC_BUILTIN_CMASK32:
10831 break;
10832
10833 default:
10834 return build_zero_cst (rtype);
10835 }
10836
10837 switch (code)
10838 {
10839 case SPARC_BUILTIN_FEXPAND:
10840 arg0 = args[0];
10841 STRIP_NOPS (arg0);
10842
10843 if (TREE_CODE (arg0) == VECTOR_CST)
10844 {
10845 tree inner_type = TREE_TYPE (rtype);
10846 tree *n_elts;
10847 unsigned i;
10848
10849 n_elts = XALLOCAVEC (tree, VECTOR_CST_NELTS (arg0));
10850 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
10851 n_elts[i] = build_int_cst (inner_type,
10852 TREE_INT_CST_LOW
10853 (VECTOR_CST_ELT (arg0, i)) << 4);
10854 return build_vector (rtype, n_elts);
10855 }
10856 break;
10857
10858 case SPARC_BUILTIN_FMUL8X16:
10859 case SPARC_BUILTIN_FMUL8X16AU:
10860 case SPARC_BUILTIN_FMUL8X16AL:
10861 arg0 = args[0];
10862 arg1 = args[1];
10863 STRIP_NOPS (arg0);
10864 STRIP_NOPS (arg1);
10865
10866 if (TREE_CODE (arg0) == VECTOR_CST && TREE_CODE (arg1) == VECTOR_CST)
10867 {
10868 tree inner_type = TREE_TYPE (rtype);
10869 tree *n_elts = XALLOCAVEC (tree, VECTOR_CST_NELTS (arg0));
10870 sparc_handle_vis_mul8x16 (n_elts, code, inner_type, arg0, arg1);
10871 return build_vector (rtype, n_elts);
10872 }
10873 break;
10874
10875 case SPARC_BUILTIN_FPMERGE:
10876 arg0 = args[0];
10877 arg1 = args[1];
10878 STRIP_NOPS (arg0);
10879 STRIP_NOPS (arg1);
10880
10881 if (TREE_CODE (arg0) == VECTOR_CST && TREE_CODE (arg1) == VECTOR_CST)
10882 {
10883 tree *n_elts = XALLOCAVEC (tree, 2 * VECTOR_CST_NELTS (arg0));
10884 unsigned i;
10885 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
10886 {
10887 n_elts[2*i] = VECTOR_CST_ELT (arg0, i);
10888 n_elts[2*i+1] = VECTOR_CST_ELT (arg1, i);
10889 }
10890
10891 return build_vector (rtype, n_elts);
10892 }
10893 break;
10894
10895 case SPARC_BUILTIN_PDIST:
10896 case SPARC_BUILTIN_PDISTN:
10897 arg0 = args[0];
10898 arg1 = args[1];
10899 STRIP_NOPS (arg0);
10900 STRIP_NOPS (arg1);
10901 if (code == SPARC_BUILTIN_PDIST)
10902 {
10903 arg2 = args[2];
10904 STRIP_NOPS (arg2);
10905 }
10906 else
10907 arg2 = integer_zero_node;
10908
10909 if (TREE_CODE (arg0) == VECTOR_CST
10910 && TREE_CODE (arg1) == VECTOR_CST
10911 && TREE_CODE (arg2) == INTEGER_CST)
10912 {
10913 bool overflow = false;
10914 double_int result = TREE_INT_CST (arg2);
10915 double_int tmp;
10916 unsigned i;
10917
10918 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
10919 {
10920 double_int e0 = TREE_INT_CST (VECTOR_CST_ELT (arg0, i));
10921 double_int e1 = TREE_INT_CST (VECTOR_CST_ELT (arg1, i));
10922
10923 bool neg1_ovf, neg2_ovf, add1_ovf, add2_ovf;
10924
10925 tmp = e1.neg_with_overflow (&neg1_ovf);
10926 tmp = e0.add_with_sign (tmp, false, &add1_ovf);
10927 if (tmp.is_negative ())
10928 tmp = tmp.neg_with_overflow (&neg2_ovf);
10929 else
10930 neg2_ovf = false;
10931 result = result.add_with_sign (tmp, false, &add2_ovf);
10932 overflow |= neg1_ovf | neg2_ovf | add1_ovf | add2_ovf;
10933 }
10934
10935 gcc_assert (!overflow);
10936
10937 return build_int_cst_wide (rtype, result.low, result.high);
10938 }
10939
10940 default:
10941 break;
10942 }
10943
10944 return NULL_TREE;
10945 }
10946
10947 /* ??? This duplicates information provided to the compiler by the
10948 ??? scheduler description. Some day, teach genautomata to output
10949 ??? the latencies and then CSE will just use that. */
10950
10951 static bool
sparc_rtx_costs(rtx x,int code,int outer_code,int opno ATTRIBUTE_UNUSED,int * total,bool speed ATTRIBUTE_UNUSED)10952 sparc_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
10953 int *total, bool speed ATTRIBUTE_UNUSED)
10954 {
10955 enum machine_mode mode = GET_MODE (x);
10956 bool float_mode_p = FLOAT_MODE_P (mode);
10957
10958 switch (code)
10959 {
10960 case CONST_INT:
10961 if (INTVAL (x) < 0x1000 && INTVAL (x) >= -0x1000)
10962 {
10963 *total = 0;
10964 return true;
10965 }
10966 /* FALLTHRU */
10967
10968 case HIGH:
10969 *total = 2;
10970 return true;
10971
10972 case CONST:
10973 case LABEL_REF:
10974 case SYMBOL_REF:
10975 *total = 4;
10976 return true;
10977
10978 case CONST_DOUBLE:
10979 if (GET_MODE (x) == VOIDmode
10980 && ((CONST_DOUBLE_HIGH (x) == 0
10981 && CONST_DOUBLE_LOW (x) < 0x1000)
10982 || (CONST_DOUBLE_HIGH (x) == -1
10983 && CONST_DOUBLE_LOW (x) < 0
10984 && CONST_DOUBLE_LOW (x) >= -0x1000)))
10985 *total = 0;
10986 else
10987 *total = 8;
10988 return true;
10989
10990 case MEM:
10991 /* If outer-code was a sign or zero extension, a cost
10992 of COSTS_N_INSNS (1) was already added in. This is
10993 why we are subtracting it back out. */
10994 if (outer_code == ZERO_EXTEND)
10995 {
10996 *total = sparc_costs->int_zload - COSTS_N_INSNS (1);
10997 }
10998 else if (outer_code == SIGN_EXTEND)
10999 {
11000 *total = sparc_costs->int_sload - COSTS_N_INSNS (1);
11001 }
11002 else if (float_mode_p)
11003 {
11004 *total = sparc_costs->float_load;
11005 }
11006 else
11007 {
11008 *total = sparc_costs->int_load;
11009 }
11010
11011 return true;
11012
11013 case PLUS:
11014 case MINUS:
11015 if (float_mode_p)
11016 *total = sparc_costs->float_plusminus;
11017 else
11018 *total = COSTS_N_INSNS (1);
11019 return false;
11020
11021 case FMA:
11022 {
11023 rtx sub;
11024
11025 gcc_assert (float_mode_p);
11026 *total = sparc_costs->float_mul;
11027
11028 sub = XEXP (x, 0);
11029 if (GET_CODE (sub) == NEG)
11030 sub = XEXP (sub, 0);
11031 *total += rtx_cost (sub, FMA, 0, speed);
11032
11033 sub = XEXP (x, 2);
11034 if (GET_CODE (sub) == NEG)
11035 sub = XEXP (sub, 0);
11036 *total += rtx_cost (sub, FMA, 2, speed);
11037 return true;
11038 }
11039
11040 case MULT:
11041 if (float_mode_p)
11042 *total = sparc_costs->float_mul;
11043 else if (! TARGET_HARD_MUL)
11044 *total = COSTS_N_INSNS (25);
11045 else
11046 {
11047 int bit_cost;
11048
11049 bit_cost = 0;
11050 if (sparc_costs->int_mul_bit_factor)
11051 {
11052 int nbits;
11053
11054 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
11055 {
11056 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
11057 for (nbits = 0; value != 0; value &= value - 1)
11058 nbits++;
11059 }
11060 else if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
11061 && GET_MODE (XEXP (x, 1)) == VOIDmode)
11062 {
11063 rtx x1 = XEXP (x, 1);
11064 unsigned HOST_WIDE_INT value1 = CONST_DOUBLE_LOW (x1);
11065 unsigned HOST_WIDE_INT value2 = CONST_DOUBLE_HIGH (x1);
11066
11067 for (nbits = 0; value1 != 0; value1 &= value1 - 1)
11068 nbits++;
11069 for (; value2 != 0; value2 &= value2 - 1)
11070 nbits++;
11071 }
11072 else
11073 nbits = 7;
11074
11075 if (nbits < 3)
11076 nbits = 3;
11077 bit_cost = (nbits - 3) / sparc_costs->int_mul_bit_factor;
11078 bit_cost = COSTS_N_INSNS (bit_cost);
11079 }
11080
11081 if (mode == DImode)
11082 *total = sparc_costs->int_mulX + bit_cost;
11083 else
11084 *total = sparc_costs->int_mul + bit_cost;
11085 }
11086 return false;
11087
11088 case ASHIFT:
11089 case ASHIFTRT:
11090 case LSHIFTRT:
11091 *total = COSTS_N_INSNS (1) + sparc_costs->shift_penalty;
11092 return false;
11093
11094 case DIV:
11095 case UDIV:
11096 case MOD:
11097 case UMOD:
11098 if (float_mode_p)
11099 {
11100 if (mode == DFmode)
11101 *total = sparc_costs->float_div_df;
11102 else
11103 *total = sparc_costs->float_div_sf;
11104 }
11105 else
11106 {
11107 if (mode == DImode)
11108 *total = sparc_costs->int_divX;
11109 else
11110 *total = sparc_costs->int_div;
11111 }
11112 return false;
11113
11114 case NEG:
11115 if (! float_mode_p)
11116 {
11117 *total = COSTS_N_INSNS (1);
11118 return false;
11119 }
11120 /* FALLTHRU */
11121
11122 case ABS:
11123 case FLOAT:
11124 case UNSIGNED_FLOAT:
11125 case FIX:
11126 case UNSIGNED_FIX:
11127 case FLOAT_EXTEND:
11128 case FLOAT_TRUNCATE:
11129 *total = sparc_costs->float_move;
11130 return false;
11131
11132 case SQRT:
11133 if (mode == DFmode)
11134 *total = sparc_costs->float_sqrt_df;
11135 else
11136 *total = sparc_costs->float_sqrt_sf;
11137 return false;
11138
11139 case COMPARE:
11140 if (float_mode_p)
11141 *total = sparc_costs->float_cmp;
11142 else
11143 *total = COSTS_N_INSNS (1);
11144 return false;
11145
11146 case IF_THEN_ELSE:
11147 if (float_mode_p)
11148 *total = sparc_costs->float_cmove;
11149 else
11150 *total = sparc_costs->int_cmove;
11151 return false;
11152
11153 case IOR:
11154 /* Handle the NAND vector patterns. */
11155 if (sparc_vector_mode_supported_p (GET_MODE (x))
11156 && GET_CODE (XEXP (x, 0)) == NOT
11157 && GET_CODE (XEXP (x, 1)) == NOT)
11158 {
11159 *total = COSTS_N_INSNS (1);
11160 return true;
11161 }
11162 else
11163 return false;
11164
11165 default:
11166 return false;
11167 }
11168 }
11169
11170 /* Return true if CLASS is either GENERAL_REGS or I64_REGS. */
11171
11172 static inline bool
general_or_i64_p(reg_class_t rclass)11173 general_or_i64_p (reg_class_t rclass)
11174 {
11175 return (rclass == GENERAL_REGS || rclass == I64_REGS);
11176 }
11177
11178 /* Implement TARGET_REGISTER_MOVE_COST. */
11179
11180 static int
sparc_register_move_cost(enum machine_mode mode ATTRIBUTE_UNUSED,reg_class_t from,reg_class_t to)11181 sparc_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
11182 reg_class_t from, reg_class_t to)
11183 {
11184 bool need_memory = false;
11185
11186 if (from == FPCC_REGS || to == FPCC_REGS)
11187 need_memory = true;
11188 else if ((FP_REG_CLASS_P (from) && general_or_i64_p (to))
11189 || (general_or_i64_p (from) && FP_REG_CLASS_P (to)))
11190 {
11191 if (TARGET_VIS3)
11192 {
11193 int size = GET_MODE_SIZE (mode);
11194 if (size == 8 || size == 4)
11195 {
11196 if (! TARGET_ARCH32 || size == 4)
11197 return 4;
11198 else
11199 return 6;
11200 }
11201 }
11202 need_memory = true;
11203 }
11204
11205 if (need_memory)
11206 {
11207 if (sparc_cpu == PROCESSOR_ULTRASPARC
11208 || sparc_cpu == PROCESSOR_ULTRASPARC3
11209 || sparc_cpu == PROCESSOR_NIAGARA
11210 || sparc_cpu == PROCESSOR_NIAGARA2
11211 || sparc_cpu == PROCESSOR_NIAGARA3
11212 || sparc_cpu == PROCESSOR_NIAGARA4)
11213 return 12;
11214
11215 return 6;
11216 }
11217
11218 return 2;
11219 }
11220
11221 /* Emit the sequence of insns SEQ while preserving the registers REG and REG2.
11222 This is achieved by means of a manual dynamic stack space allocation in
11223 the current frame. We make the assumption that SEQ doesn't contain any
11224 function calls, with the possible exception of calls to the GOT helper. */
11225
11226 static void
emit_and_preserve(rtx seq,rtx reg,rtx reg2)11227 emit_and_preserve (rtx seq, rtx reg, rtx reg2)
11228 {
11229 /* We must preserve the lowest 16 words for the register save area. */
11230 HOST_WIDE_INT offset = 16*UNITS_PER_WORD;
11231 /* We really need only 2 words of fresh stack space. */
11232 HOST_WIDE_INT size = SPARC_STACK_ALIGN (offset + 2*UNITS_PER_WORD);
11233
11234 rtx slot
11235 = gen_rtx_MEM (word_mode, plus_constant (Pmode, stack_pointer_rtx,
11236 SPARC_STACK_BIAS + offset));
11237
11238 emit_insn (gen_stack_pointer_inc (GEN_INT (-size)));
11239 emit_insn (gen_rtx_SET (VOIDmode, slot, reg));
11240 if (reg2)
11241 emit_insn (gen_rtx_SET (VOIDmode,
11242 adjust_address (slot, word_mode, UNITS_PER_WORD),
11243 reg2));
11244 emit_insn (seq);
11245 if (reg2)
11246 emit_insn (gen_rtx_SET (VOIDmode,
11247 reg2,
11248 adjust_address (slot, word_mode, UNITS_PER_WORD)));
11249 emit_insn (gen_rtx_SET (VOIDmode, reg, slot));
11250 emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
11251 }
11252
11253 /* Output the assembler code for a thunk function. THUNK_DECL is the
11254 declaration for the thunk function itself, FUNCTION is the decl for
11255 the target function. DELTA is an immediate constant offset to be
11256 added to THIS. If VCALL_OFFSET is nonzero, the word at address
11257 (*THIS + VCALL_OFFSET) should be additionally added to THIS. */
11258
11259 static void
sparc_output_mi_thunk(FILE * file,tree thunk_fndecl ATTRIBUTE_UNUSED,HOST_WIDE_INT delta,HOST_WIDE_INT vcall_offset,tree function)11260 sparc_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
11261 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
11262 tree function)
11263 {
11264 rtx this_rtx, insn, funexp;
11265 unsigned int int_arg_first;
11266
11267 reload_completed = 1;
11268 epilogue_completed = 1;
11269
11270 emit_note (NOTE_INSN_PROLOGUE_END);
11271
11272 if (TARGET_FLAT)
11273 {
11274 sparc_leaf_function_p = 1;
11275
11276 int_arg_first = SPARC_OUTGOING_INT_ARG_FIRST;
11277 }
11278 else if (flag_delayed_branch)
11279 {
11280 /* We will emit a regular sibcall below, so we need to instruct
11281 output_sibcall that we are in a leaf function. */
11282 sparc_leaf_function_p = crtl->uses_only_leaf_regs = 1;
11283
11284 /* This will cause final.c to invoke leaf_renumber_regs so we
11285 must behave as if we were in a not-yet-leafified function. */
11286 int_arg_first = SPARC_INCOMING_INT_ARG_FIRST;
11287 }
11288 else
11289 {
11290 /* We will emit the sibcall manually below, so we will need to
11291 manually spill non-leaf registers. */
11292 sparc_leaf_function_p = crtl->uses_only_leaf_regs = 0;
11293
11294 /* We really are in a leaf function. */
11295 int_arg_first = SPARC_OUTGOING_INT_ARG_FIRST;
11296 }
11297
11298 /* Find the "this" pointer. Normally in %o0, but in ARCH64 if the function
11299 returns a structure, the structure return pointer is there instead. */
11300 if (TARGET_ARCH64
11301 && aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
11302 this_rtx = gen_rtx_REG (Pmode, int_arg_first + 1);
11303 else
11304 this_rtx = gen_rtx_REG (Pmode, int_arg_first);
11305
11306 /* Add DELTA. When possible use a plain add, otherwise load it into
11307 a register first. */
11308 if (delta)
11309 {
11310 rtx delta_rtx = GEN_INT (delta);
11311
11312 if (! SPARC_SIMM13_P (delta))
11313 {
11314 rtx scratch = gen_rtx_REG (Pmode, 1);
11315 emit_move_insn (scratch, delta_rtx);
11316 delta_rtx = scratch;
11317 }
11318
11319 /* THIS_RTX += DELTA. */
11320 emit_insn (gen_add2_insn (this_rtx, delta_rtx));
11321 }
11322
11323 /* Add the word at address (*THIS_RTX + VCALL_OFFSET). */
11324 if (vcall_offset)
11325 {
11326 rtx vcall_offset_rtx = GEN_INT (vcall_offset);
11327 rtx scratch = gen_rtx_REG (Pmode, 1);
11328
11329 gcc_assert (vcall_offset < 0);
11330
11331 /* SCRATCH = *THIS_RTX. */
11332 emit_move_insn (scratch, gen_rtx_MEM (Pmode, this_rtx));
11333
11334 /* Prepare for adding VCALL_OFFSET. The difficulty is that we
11335 may not have any available scratch register at this point. */
11336 if (SPARC_SIMM13_P (vcall_offset))
11337 ;
11338 /* This is the case if ARCH64 (unless -ffixed-g5 is passed). */
11339 else if (! fixed_regs[5]
11340 /* The below sequence is made up of at least 2 insns,
11341 while the default method may need only one. */
11342 && vcall_offset < -8192)
11343 {
11344 rtx scratch2 = gen_rtx_REG (Pmode, 5);
11345 emit_move_insn (scratch2, vcall_offset_rtx);
11346 vcall_offset_rtx = scratch2;
11347 }
11348 else
11349 {
11350 rtx increment = GEN_INT (-4096);
11351
11352 /* VCALL_OFFSET is a negative number whose typical range can be
11353 estimated as -32768..0 in 32-bit mode. In almost all cases
11354 it is therefore cheaper to emit multiple add insns than
11355 spilling and loading the constant into a register (at least
11356 6 insns). */
11357 while (! SPARC_SIMM13_P (vcall_offset))
11358 {
11359 emit_insn (gen_add2_insn (scratch, increment));
11360 vcall_offset += 4096;
11361 }
11362 vcall_offset_rtx = GEN_INT (vcall_offset); /* cannot be 0 */
11363 }
11364
11365 /* SCRATCH = *(*THIS_RTX + VCALL_OFFSET). */
11366 emit_move_insn (scratch, gen_rtx_MEM (Pmode,
11367 gen_rtx_PLUS (Pmode,
11368 scratch,
11369 vcall_offset_rtx)));
11370
11371 /* THIS_RTX += *(*THIS_RTX + VCALL_OFFSET). */
11372 emit_insn (gen_add2_insn (this_rtx, scratch));
11373 }
11374
11375 /* Generate a tail call to the target function. */
11376 if (! TREE_USED (function))
11377 {
11378 assemble_external (function);
11379 TREE_USED (function) = 1;
11380 }
11381 funexp = XEXP (DECL_RTL (function), 0);
11382
11383 if (flag_delayed_branch)
11384 {
11385 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
11386 insn = emit_call_insn (gen_sibcall (funexp));
11387 SIBLING_CALL_P (insn) = 1;
11388 }
11389 else
11390 {
11391 /* The hoops we have to jump through in order to generate a sibcall
11392 without using delay slots... */
11393 rtx spill_reg, seq, scratch = gen_rtx_REG (Pmode, 1);
11394
11395 if (flag_pic)
11396 {
11397 spill_reg = gen_rtx_REG (word_mode, 15); /* %o7 */
11398 start_sequence ();
11399 load_got_register (); /* clobbers %o7 */
11400 scratch = sparc_legitimize_pic_address (funexp, scratch);
11401 seq = get_insns ();
11402 end_sequence ();
11403 emit_and_preserve (seq, spill_reg, pic_offset_table_rtx);
11404 }
11405 else if (TARGET_ARCH32)
11406 {
11407 emit_insn (gen_rtx_SET (VOIDmode,
11408 scratch,
11409 gen_rtx_HIGH (SImode, funexp)));
11410 emit_insn (gen_rtx_SET (VOIDmode,
11411 scratch,
11412 gen_rtx_LO_SUM (SImode, scratch, funexp)));
11413 }
11414 else /* TARGET_ARCH64 */
11415 {
11416 switch (sparc_cmodel)
11417 {
11418 case CM_MEDLOW:
11419 case CM_MEDMID:
11420 /* The destination can serve as a temporary. */
11421 sparc_emit_set_symbolic_const64 (scratch, funexp, scratch);
11422 break;
11423
11424 case CM_MEDANY:
11425 case CM_EMBMEDANY:
11426 /* The destination cannot serve as a temporary. */
11427 spill_reg = gen_rtx_REG (DImode, 15); /* %o7 */
11428 start_sequence ();
11429 sparc_emit_set_symbolic_const64 (scratch, funexp, spill_reg);
11430 seq = get_insns ();
11431 end_sequence ();
11432 emit_and_preserve (seq, spill_reg, 0);
11433 break;
11434
11435 default:
11436 gcc_unreachable ();
11437 }
11438 }
11439
11440 emit_jump_insn (gen_indirect_jump (scratch));
11441 }
11442
11443 emit_barrier ();
11444
11445 /* Run just enough of rest_of_compilation to get the insns emitted.
11446 There's not really enough bulk here to make other passes such as
11447 instruction scheduling worth while. Note that use_thunk calls
11448 assemble_start_function and assemble_end_function. */
11449 insn = get_insns ();
11450 shorten_branches (insn);
11451 final_start_function (insn, file, 1);
11452 final (insn, file, 1);
11453 final_end_function ();
11454
11455 reload_completed = 0;
11456 epilogue_completed = 0;
11457 }
11458
11459 /* Return true if sparc_output_mi_thunk would be able to output the
11460 assembler code for the thunk function specified by the arguments
11461 it is passed, and false otherwise. */
11462 static bool
sparc_can_output_mi_thunk(const_tree thunk_fndecl ATTRIBUTE_UNUSED,HOST_WIDE_INT delta ATTRIBUTE_UNUSED,HOST_WIDE_INT vcall_offset,const_tree function ATTRIBUTE_UNUSED)11463 sparc_can_output_mi_thunk (const_tree thunk_fndecl ATTRIBUTE_UNUSED,
11464 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
11465 HOST_WIDE_INT vcall_offset,
11466 const_tree function ATTRIBUTE_UNUSED)
11467 {
11468 /* Bound the loop used in the default method above. */
11469 return (vcall_offset >= -32768 || ! fixed_regs[5]);
11470 }
11471
11472 /* How to allocate a 'struct machine_function'. */
11473
11474 static struct machine_function *
sparc_init_machine_status(void)11475 sparc_init_machine_status (void)
11476 {
11477 return ggc_alloc_cleared_machine_function ();
11478 }
11479
11480 /* Locate some local-dynamic symbol still in use by this function
11481 so that we can print its name in local-dynamic base patterns. */
11482
11483 static const char *
get_some_local_dynamic_name(void)11484 get_some_local_dynamic_name (void)
11485 {
11486 rtx insn;
11487
11488 if (cfun->machine->some_ld_name)
11489 return cfun->machine->some_ld_name;
11490
11491 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
11492 if (INSN_P (insn)
11493 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
11494 return cfun->machine->some_ld_name;
11495
11496 gcc_unreachable ();
11497 }
11498
11499 static int
get_some_local_dynamic_name_1(rtx * px,void * data ATTRIBUTE_UNUSED)11500 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
11501 {
11502 rtx x = *px;
11503
11504 if (x
11505 && GET_CODE (x) == SYMBOL_REF
11506 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
11507 {
11508 cfun->machine->some_ld_name = XSTR (x, 0);
11509 return 1;
11510 }
11511
11512 return 0;
11513 }
11514
11515 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
11516 We need to emit DTP-relative relocations. */
11517
11518 static void
sparc_output_dwarf_dtprel(FILE * file,int size,rtx x)11519 sparc_output_dwarf_dtprel (FILE *file, int size, rtx x)
11520 {
11521 switch (size)
11522 {
11523 case 4:
11524 fputs ("\t.word\t%r_tls_dtpoff32(", file);
11525 break;
11526 case 8:
11527 fputs ("\t.xword\t%r_tls_dtpoff64(", file);
11528 break;
11529 default:
11530 gcc_unreachable ();
11531 }
11532 output_addr_const (file, x);
11533 fputs (")", file);
11534 }
11535
11536 /* Do whatever processing is required at the end of a file. */
11537
11538 static void
sparc_file_end(void)11539 sparc_file_end (void)
11540 {
11541 /* If we need to emit the special GOT helper function, do so now. */
11542 if (got_helper_rtx)
11543 {
11544 const char *name = XSTR (got_helper_rtx, 0);
11545 const char *reg_name = reg_names[GLOBAL_OFFSET_TABLE_REGNUM];
11546 #ifdef DWARF2_UNWIND_INFO
11547 bool do_cfi;
11548 #endif
11549
11550 if (USE_HIDDEN_LINKONCE)
11551 {
11552 tree decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
11553 get_identifier (name),
11554 build_function_type_list (void_type_node,
11555 NULL_TREE));
11556 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
11557 NULL_TREE, void_type_node);
11558 TREE_PUBLIC (decl) = 1;
11559 TREE_STATIC (decl) = 1;
11560 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
11561 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
11562 DECL_VISIBILITY_SPECIFIED (decl) = 1;
11563 resolve_unique_section (decl, 0, flag_function_sections);
11564 allocate_struct_function (decl, true);
11565 cfun->is_thunk = 1;
11566 current_function_decl = decl;
11567 init_varasm_status ();
11568 assemble_start_function (decl, name);
11569 }
11570 else
11571 {
11572 const int align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT);
11573 switch_to_section (text_section);
11574 if (align > 0)
11575 ASM_OUTPUT_ALIGN (asm_out_file, align);
11576 ASM_OUTPUT_LABEL (asm_out_file, name);
11577 }
11578
11579 #ifdef DWARF2_UNWIND_INFO
11580 do_cfi = dwarf2out_do_cfi_asm ();
11581 if (do_cfi)
11582 fprintf (asm_out_file, "\t.cfi_startproc\n");
11583 #endif
11584 if (flag_delayed_branch)
11585 fprintf (asm_out_file, "\tjmp\t%%o7+8\n\t add\t%%o7, %s, %s\n",
11586 reg_name, reg_name);
11587 else
11588 fprintf (asm_out_file, "\tadd\t%%o7, %s, %s\n\tjmp\t%%o7+8\n\t nop\n",
11589 reg_name, reg_name);
11590 #ifdef DWARF2_UNWIND_INFO
11591 if (do_cfi)
11592 fprintf (asm_out_file, "\t.cfi_endproc\n");
11593 #endif
11594 }
11595
11596 if (NEED_INDICATE_EXEC_STACK)
11597 file_end_indicate_exec_stack ();
11598
11599 #ifdef TARGET_SOLARIS
11600 solaris_file_end ();
11601 #endif
11602 }
11603
11604 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
11605 /* Implement TARGET_MANGLE_TYPE. */
11606
11607 static const char *
sparc_mangle_type(const_tree type)11608 sparc_mangle_type (const_tree type)
11609 {
11610 if (!TARGET_64BIT
11611 && TYPE_MAIN_VARIANT (type) == long_double_type_node
11612 && TARGET_LONG_DOUBLE_128)
11613 return "g";
11614
11615 /* For all other types, use normal C++ mangling. */
11616 return NULL;
11617 }
11618 #endif
11619
11620 /* Expand a membar instruction for various use cases. Both the LOAD_STORE
11621 and BEFORE_AFTER arguments of the form X_Y. They are two-bit masks where
11622 bit 0 indicates that X is true, and bit 1 indicates Y is true. */
11623
11624 void
sparc_emit_membar_for_model(enum memmodel model,int load_store,int before_after)11625 sparc_emit_membar_for_model (enum memmodel model,
11626 int load_store, int before_after)
11627 {
11628 /* Bits for the MEMBAR mmask field. */
11629 const int LoadLoad = 1;
11630 const int StoreLoad = 2;
11631 const int LoadStore = 4;
11632 const int StoreStore = 8;
11633
11634 int mm = 0, implied = 0;
11635
11636 switch (sparc_memory_model)
11637 {
11638 case SMM_SC:
11639 /* Sequential Consistency. All memory transactions are immediately
11640 visible in sequential execution order. No barriers needed. */
11641 implied = LoadLoad | StoreLoad | LoadStore | StoreStore;
11642 break;
11643
11644 case SMM_TSO:
11645 /* Total Store Ordering: all memory transactions with store semantics
11646 are followed by an implied StoreStore. */
11647 implied |= StoreStore;
11648
11649 /* If we're not looking for a raw barrer (before+after), then atomic
11650 operations get the benefit of being both load and store. */
11651 if (load_store == 3 && before_after == 1)
11652 implied |= StoreLoad;
11653 /* FALLTHRU */
11654
11655 case SMM_PSO:
11656 /* Partial Store Ordering: all memory transactions with load semantics
11657 are followed by an implied LoadLoad | LoadStore. */
11658 implied |= LoadLoad | LoadStore;
11659
11660 /* If we're not looking for a raw barrer (before+after), then atomic
11661 operations get the benefit of being both load and store. */
11662 if (load_store == 3 && before_after == 2)
11663 implied |= StoreLoad | StoreStore;
11664 /* FALLTHRU */
11665
11666 case SMM_RMO:
11667 /* Relaxed Memory Ordering: no implicit bits. */
11668 break;
11669
11670 default:
11671 gcc_unreachable ();
11672 }
11673
11674 if (before_after & 1)
11675 {
11676 if (model == MEMMODEL_RELEASE
11677 || model == MEMMODEL_ACQ_REL
11678 || model == MEMMODEL_SEQ_CST)
11679 {
11680 if (load_store & 1)
11681 mm |= LoadLoad | StoreLoad;
11682 if (load_store & 2)
11683 mm |= LoadStore | StoreStore;
11684 }
11685 }
11686 if (before_after & 2)
11687 {
11688 if (model == MEMMODEL_ACQUIRE
11689 || model == MEMMODEL_ACQ_REL
11690 || model == MEMMODEL_SEQ_CST)
11691 {
11692 if (load_store & 1)
11693 mm |= LoadLoad | LoadStore;
11694 if (load_store & 2)
11695 mm |= StoreLoad | StoreStore;
11696 }
11697 }
11698
11699 /* Remove the bits implied by the system memory model. */
11700 mm &= ~implied;
11701
11702 /* For raw barriers (before+after), always emit a barrier.
11703 This will become a compile-time barrier if needed. */
11704 if (mm || before_after == 3)
11705 emit_insn (gen_membar (GEN_INT (mm)));
11706 }
11707
11708 /* Expand code to perform a 8 or 16-bit compare and swap by doing 32-bit
11709 compare and swap on the word containing the byte or half-word. */
11710
11711 static void
sparc_expand_compare_and_swap_12(rtx bool_result,rtx result,rtx mem,rtx oldval,rtx newval)11712 sparc_expand_compare_and_swap_12 (rtx bool_result, rtx result, rtx mem,
11713 rtx oldval, rtx newval)
11714 {
11715 rtx addr1 = force_reg (Pmode, XEXP (mem, 0));
11716 rtx addr = gen_reg_rtx (Pmode);
11717 rtx off = gen_reg_rtx (SImode);
11718 rtx oldv = gen_reg_rtx (SImode);
11719 rtx newv = gen_reg_rtx (SImode);
11720 rtx oldvalue = gen_reg_rtx (SImode);
11721 rtx newvalue = gen_reg_rtx (SImode);
11722 rtx res = gen_reg_rtx (SImode);
11723 rtx resv = gen_reg_rtx (SImode);
11724 rtx memsi, val, mask, end_label, loop_label, cc;
11725
11726 emit_insn (gen_rtx_SET (VOIDmode, addr,
11727 gen_rtx_AND (Pmode, addr1, GEN_INT (-4))));
11728
11729 if (Pmode != SImode)
11730 addr1 = gen_lowpart (SImode, addr1);
11731 emit_insn (gen_rtx_SET (VOIDmode, off,
11732 gen_rtx_AND (SImode, addr1, GEN_INT (3))));
11733
11734 memsi = gen_rtx_MEM (SImode, addr);
11735 set_mem_alias_set (memsi, ALIAS_SET_MEMORY_BARRIER);
11736 MEM_VOLATILE_P (memsi) = MEM_VOLATILE_P (mem);
11737
11738 val = copy_to_reg (memsi);
11739
11740 emit_insn (gen_rtx_SET (VOIDmode, off,
11741 gen_rtx_XOR (SImode, off,
11742 GEN_INT (GET_MODE (mem) == QImode
11743 ? 3 : 2))));
11744
11745 emit_insn (gen_rtx_SET (VOIDmode, off,
11746 gen_rtx_ASHIFT (SImode, off, GEN_INT (3))));
11747
11748 if (GET_MODE (mem) == QImode)
11749 mask = force_reg (SImode, GEN_INT (0xff));
11750 else
11751 mask = force_reg (SImode, GEN_INT (0xffff));
11752
11753 emit_insn (gen_rtx_SET (VOIDmode, mask,
11754 gen_rtx_ASHIFT (SImode, mask, off)));
11755
11756 emit_insn (gen_rtx_SET (VOIDmode, val,
11757 gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask),
11758 val)));
11759
11760 oldval = gen_lowpart (SImode, oldval);
11761 emit_insn (gen_rtx_SET (VOIDmode, oldv,
11762 gen_rtx_ASHIFT (SImode, oldval, off)));
11763
11764 newval = gen_lowpart_common (SImode, newval);
11765 emit_insn (gen_rtx_SET (VOIDmode, newv,
11766 gen_rtx_ASHIFT (SImode, newval, off)));
11767
11768 emit_insn (gen_rtx_SET (VOIDmode, oldv,
11769 gen_rtx_AND (SImode, oldv, mask)));
11770
11771 emit_insn (gen_rtx_SET (VOIDmode, newv,
11772 gen_rtx_AND (SImode, newv, mask)));
11773
11774 end_label = gen_label_rtx ();
11775 loop_label = gen_label_rtx ();
11776 emit_label (loop_label);
11777
11778 emit_insn (gen_rtx_SET (VOIDmode, oldvalue,
11779 gen_rtx_IOR (SImode, oldv, val)));
11780
11781 emit_insn (gen_rtx_SET (VOIDmode, newvalue,
11782 gen_rtx_IOR (SImode, newv, val)));
11783
11784 emit_move_insn (bool_result, const1_rtx);
11785
11786 emit_insn (gen_atomic_compare_and_swapsi_1 (res, memsi, oldvalue, newvalue));
11787
11788 emit_cmp_and_jump_insns (res, oldvalue, EQ, NULL, SImode, 0, end_label);
11789
11790 emit_insn (gen_rtx_SET (VOIDmode, resv,
11791 gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask),
11792 res)));
11793
11794 emit_move_insn (bool_result, const0_rtx);
11795
11796 cc = gen_compare_reg_1 (NE, resv, val);
11797 emit_insn (gen_rtx_SET (VOIDmode, val, resv));
11798
11799 /* Use cbranchcc4 to separate the compare and branch! */
11800 emit_jump_insn (gen_cbranchcc4 (gen_rtx_NE (VOIDmode, cc, const0_rtx),
11801 cc, const0_rtx, loop_label));
11802
11803 emit_label (end_label);
11804
11805 emit_insn (gen_rtx_SET (VOIDmode, res,
11806 gen_rtx_AND (SImode, res, mask)));
11807
11808 emit_insn (gen_rtx_SET (VOIDmode, res,
11809 gen_rtx_LSHIFTRT (SImode, res, off)));
11810
11811 emit_move_insn (result, gen_lowpart (GET_MODE (result), res));
11812 }
11813
11814 /* Expand code to perform a compare-and-swap. */
11815
11816 void
sparc_expand_compare_and_swap(rtx operands[])11817 sparc_expand_compare_and_swap (rtx operands[])
11818 {
11819 rtx bval, retval, mem, oldval, newval;
11820 enum machine_mode mode;
11821 enum memmodel model;
11822
11823 bval = operands[0];
11824 retval = operands[1];
11825 mem = operands[2];
11826 oldval = operands[3];
11827 newval = operands[4];
11828 model = (enum memmodel) INTVAL (operands[6]);
11829 mode = GET_MODE (mem);
11830
11831 sparc_emit_membar_for_model (model, 3, 1);
11832
11833 if (reg_overlap_mentioned_p (retval, oldval))
11834 oldval = copy_to_reg (oldval);
11835
11836 if (mode == QImode || mode == HImode)
11837 sparc_expand_compare_and_swap_12 (bval, retval, mem, oldval, newval);
11838 else
11839 {
11840 rtx (*gen) (rtx, rtx, rtx, rtx);
11841 rtx x;
11842
11843 if (mode == SImode)
11844 gen = gen_atomic_compare_and_swapsi_1;
11845 else
11846 gen = gen_atomic_compare_and_swapdi_1;
11847 emit_insn (gen (retval, mem, oldval, newval));
11848
11849 x = emit_store_flag (bval, EQ, retval, oldval, mode, 1, 1);
11850 if (x != bval)
11851 convert_move (bval, x, 1);
11852 }
11853
11854 sparc_emit_membar_for_model (model, 3, 2);
11855 }
11856
11857 void
sparc_expand_vec_perm_bmask(enum machine_mode vmode,rtx sel)11858 sparc_expand_vec_perm_bmask (enum machine_mode vmode, rtx sel)
11859 {
11860 rtx t_1, t_2, t_3;
11861
11862 sel = gen_lowpart (DImode, sel);
11863 switch (vmode)
11864 {
11865 case V2SImode:
11866 /* inp = xxxxxxxAxxxxxxxB */
11867 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
11868 NULL_RTX, 1, OPTAB_DIRECT);
11869 /* t_1 = ....xxxxxxxAxxx. */
11870 sel = expand_simple_binop (SImode, AND, gen_lowpart (SImode, sel),
11871 GEN_INT (3), NULL_RTX, 1, OPTAB_DIRECT);
11872 t_1 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_1),
11873 GEN_INT (0x30000), NULL_RTX, 1, OPTAB_DIRECT);
11874 /* sel = .......B */
11875 /* t_1 = ...A.... */
11876 sel = expand_simple_binop (SImode, IOR, sel, t_1, sel, 1, OPTAB_DIRECT);
11877 /* sel = ...A...B */
11878 sel = expand_mult (SImode, sel, GEN_INT (0x4444), sel, 1);
11879 /* sel = AAAABBBB * 4 */
11880 t_1 = force_reg (SImode, GEN_INT (0x01230123));
11881 /* sel = { A*4, A*4+1, A*4+2, ... } */
11882 break;
11883
11884 case V4HImode:
11885 /* inp = xxxAxxxBxxxCxxxD */
11886 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (8),
11887 NULL_RTX, 1, OPTAB_DIRECT);
11888 t_2 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
11889 NULL_RTX, 1, OPTAB_DIRECT);
11890 t_3 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (24),
11891 NULL_RTX, 1, OPTAB_DIRECT);
11892 /* t_1 = ..xxxAxxxBxxxCxx */
11893 /* t_2 = ....xxxAxxxBxxxC */
11894 /* t_3 = ......xxxAxxxBxx */
11895 sel = expand_simple_binop (SImode, AND, gen_lowpart (SImode, sel),
11896 GEN_INT (0x07),
11897 NULL_RTX, 1, OPTAB_DIRECT);
11898 t_1 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_1),
11899 GEN_INT (0x0700),
11900 NULL_RTX, 1, OPTAB_DIRECT);
11901 t_2 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_2),
11902 GEN_INT (0x070000),
11903 NULL_RTX, 1, OPTAB_DIRECT);
11904 t_3 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_3),
11905 GEN_INT (0x07000000),
11906 NULL_RTX, 1, OPTAB_DIRECT);
11907 /* sel = .......D */
11908 /* t_1 = .....C.. */
11909 /* t_2 = ...B.... */
11910 /* t_3 = .A...... */
11911 sel = expand_simple_binop (SImode, IOR, sel, t_1, sel, 1, OPTAB_DIRECT);
11912 t_2 = expand_simple_binop (SImode, IOR, t_2, t_3, t_2, 1, OPTAB_DIRECT);
11913 sel = expand_simple_binop (SImode, IOR, sel, t_2, sel, 1, OPTAB_DIRECT);
11914 /* sel = .A.B.C.D */
11915 sel = expand_mult (SImode, sel, GEN_INT (0x22), sel, 1);
11916 /* sel = AABBCCDD * 2 */
11917 t_1 = force_reg (SImode, GEN_INT (0x01010101));
11918 /* sel = { A*2, A*2+1, B*2, B*2+1, ... } */
11919 break;
11920
11921 case V8QImode:
11922 /* input = xAxBxCxDxExFxGxH */
11923 sel = expand_simple_binop (DImode, AND, sel,
11924 GEN_INT ((HOST_WIDE_INT)0x0f0f0f0f << 32
11925 | 0x0f0f0f0f),
11926 NULL_RTX, 1, OPTAB_DIRECT);
11927 /* sel = .A.B.C.D.E.F.G.H */
11928 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (4),
11929 NULL_RTX, 1, OPTAB_DIRECT);
11930 /* t_1 = ..A.B.C.D.E.F.G. */
11931 sel = expand_simple_binop (DImode, IOR, sel, t_1,
11932 NULL_RTX, 1, OPTAB_DIRECT);
11933 /* sel = .AABBCCDDEEFFGGH */
11934 sel = expand_simple_binop (DImode, AND, sel,
11935 GEN_INT ((HOST_WIDE_INT)0xff00ff << 32
11936 | 0xff00ff),
11937 NULL_RTX, 1, OPTAB_DIRECT);
11938 /* sel = ..AB..CD..EF..GH */
11939 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (8),
11940 NULL_RTX, 1, OPTAB_DIRECT);
11941 /* t_1 = ....AB..CD..EF.. */
11942 sel = expand_simple_binop (DImode, IOR, sel, t_1,
11943 NULL_RTX, 1, OPTAB_DIRECT);
11944 /* sel = ..ABABCDCDEFEFGH */
11945 sel = expand_simple_binop (DImode, AND, sel,
11946 GEN_INT ((HOST_WIDE_INT)0xffff << 32 | 0xffff),
11947 NULL_RTX, 1, OPTAB_DIRECT);
11948 /* sel = ....ABCD....EFGH */
11949 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
11950 NULL_RTX, 1, OPTAB_DIRECT);
11951 /* t_1 = ........ABCD.... */
11952 sel = gen_lowpart (SImode, sel);
11953 t_1 = gen_lowpart (SImode, t_1);
11954 break;
11955
11956 default:
11957 gcc_unreachable ();
11958 }
11959
11960 /* Always perform the final addition/merge within the bmask insn. */
11961 emit_insn (gen_bmasksi_vis (gen_rtx_REG (SImode, 0), sel, t_1));
11962 }
11963
11964 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
11965
11966 static bool
sparc_frame_pointer_required(void)11967 sparc_frame_pointer_required (void)
11968 {
11969 /* If the stack pointer is dynamically modified in the function, it cannot
11970 serve as the frame pointer. */
11971 if (cfun->calls_alloca)
11972 return true;
11973
11974 /* If the function receives nonlocal gotos, it needs to save the frame
11975 pointer in the nonlocal_goto_save_area object. */
11976 if (cfun->has_nonlocal_label)
11977 return true;
11978
11979 /* In flat mode, that's it. */
11980 if (TARGET_FLAT)
11981 return false;
11982
11983 /* Otherwise, the frame pointer is required if the function isn't leaf. */
11984 return !(crtl->is_leaf && only_leaf_regs_used ());
11985 }
11986
11987 /* The way this is structured, we can't eliminate SFP in favor of SP
11988 if the frame pointer is required: we want to use the SFP->HFP elimination
11989 in that case. But the test in update_eliminables doesn't know we are
11990 assuming below that we only do the former elimination. */
11991
11992 static bool
sparc_can_eliminate(const int from ATTRIBUTE_UNUSED,const int to)11993 sparc_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
11994 {
11995 return to == HARD_FRAME_POINTER_REGNUM || !sparc_frame_pointer_required ();
11996 }
11997
11998 /* Return the hard frame pointer directly to bypass the stack bias. */
11999
12000 static rtx
sparc_builtin_setjmp_frame_value(void)12001 sparc_builtin_setjmp_frame_value (void)
12002 {
12003 return hard_frame_pointer_rtx;
12004 }
12005
12006 /* If !TARGET_FPU, then make the fp registers and fp cc regs fixed so that
12007 they won't be allocated. */
12008
12009 static void
sparc_conditional_register_usage(void)12010 sparc_conditional_register_usage (void)
12011 {
12012 if (PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
12013 {
12014 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
12015 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
12016 }
12017 /* If the user has passed -f{fixed,call-{used,saved}}-g5 */
12018 /* then honor it. */
12019 if (TARGET_ARCH32 && fixed_regs[5])
12020 fixed_regs[5] = 1;
12021 else if (TARGET_ARCH64 && fixed_regs[5] == 2)
12022 fixed_regs[5] = 0;
12023 if (! TARGET_V9)
12024 {
12025 int regno;
12026 for (regno = SPARC_FIRST_V9_FP_REG;
12027 regno <= SPARC_LAST_V9_FP_REG;
12028 regno++)
12029 fixed_regs[regno] = 1;
12030 /* %fcc0 is used by v8 and v9. */
12031 for (regno = SPARC_FIRST_V9_FCC_REG + 1;
12032 regno <= SPARC_LAST_V9_FCC_REG;
12033 regno++)
12034 fixed_regs[regno] = 1;
12035 }
12036 if (! TARGET_FPU)
12037 {
12038 int regno;
12039 for (regno = 32; regno < SPARC_LAST_V9_FCC_REG; regno++)
12040 fixed_regs[regno] = 1;
12041 }
12042 /* If the user has passed -f{fixed,call-{used,saved}}-g2 */
12043 /* then honor it. Likewise with g3 and g4. */
12044 if (fixed_regs[2] == 2)
12045 fixed_regs[2] = ! TARGET_APP_REGS;
12046 if (fixed_regs[3] == 2)
12047 fixed_regs[3] = ! TARGET_APP_REGS;
12048 if (TARGET_ARCH32 && fixed_regs[4] == 2)
12049 fixed_regs[4] = ! TARGET_APP_REGS;
12050 else if (TARGET_CM_EMBMEDANY)
12051 fixed_regs[4] = 1;
12052 else if (fixed_regs[4] == 2)
12053 fixed_regs[4] = 0;
12054 if (TARGET_FLAT)
12055 {
12056 int regno;
12057 /* Disable leaf functions. */
12058 memset (sparc_leaf_regs, 0, FIRST_PSEUDO_REGISTER);
12059 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
12060 leaf_reg_remap [regno] = regno;
12061 }
12062 if (TARGET_VIS)
12063 global_regs[SPARC_GSR_REG] = 1;
12064 }
12065
12066 /* Implement TARGET_PREFERRED_RELOAD_CLASS:
12067
12068 - We can't load constants into FP registers.
12069 - We can't load FP constants into integer registers when soft-float,
12070 because there is no soft-float pattern with a r/F constraint.
12071 - We can't load FP constants into integer registers for TFmode unless
12072 it is 0.0L, because there is no movtf pattern with a r/F constraint.
12073 - Try and reload integer constants (symbolic or otherwise) back into
12074 registers directly, rather than having them dumped to memory. */
12075
12076 static reg_class_t
sparc_preferred_reload_class(rtx x,reg_class_t rclass)12077 sparc_preferred_reload_class (rtx x, reg_class_t rclass)
12078 {
12079 enum machine_mode mode = GET_MODE (x);
12080 if (CONSTANT_P (x))
12081 {
12082 if (FP_REG_CLASS_P (rclass)
12083 || rclass == GENERAL_OR_FP_REGS
12084 || rclass == GENERAL_OR_EXTRA_FP_REGS
12085 || (GET_MODE_CLASS (mode) == MODE_FLOAT && ! TARGET_FPU)
12086 || (mode == TFmode && ! const_zero_operand (x, mode)))
12087 return NO_REGS;
12088
12089 if (GET_MODE_CLASS (mode) == MODE_INT)
12090 return GENERAL_REGS;
12091
12092 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
12093 {
12094 if (! FP_REG_CLASS_P (rclass)
12095 || !(const_zero_operand (x, mode)
12096 || const_all_ones_operand (x, mode)))
12097 return NO_REGS;
12098 }
12099 }
12100
12101 if (TARGET_VIS3
12102 && ! TARGET_ARCH64
12103 && (rclass == EXTRA_FP_REGS
12104 || rclass == GENERAL_OR_EXTRA_FP_REGS))
12105 {
12106 int regno = true_regnum (x);
12107
12108 if (SPARC_INT_REG_P (regno))
12109 return (rclass == EXTRA_FP_REGS
12110 ? FP_REGS : GENERAL_OR_FP_REGS);
12111 }
12112
12113 return rclass;
12114 }
12115
12116 /* Output a wide multiply instruction in V8+ mode. INSN is the instruction,
12117 OPERANDS are its operands and OPCODE is the mnemonic to be used. */
12118
12119 const char *
output_v8plus_mult(rtx insn,rtx * operands,const char * opcode)12120 output_v8plus_mult (rtx insn, rtx *operands, const char *opcode)
12121 {
12122 char mulstr[32];
12123
12124 gcc_assert (! TARGET_ARCH64);
12125
12126 if (sparc_check_64 (operands[1], insn) <= 0)
12127 output_asm_insn ("srl\t%L1, 0, %L1", operands);
12128 if (which_alternative == 1)
12129 output_asm_insn ("sllx\t%H1, 32, %H1", operands);
12130 if (GET_CODE (operands[2]) == CONST_INT)
12131 {
12132 if (which_alternative == 1)
12133 {
12134 output_asm_insn ("or\t%L1, %H1, %H1", operands);
12135 sprintf (mulstr, "%s\t%%H1, %%2, %%L0", opcode);
12136 output_asm_insn (mulstr, operands);
12137 return "srlx\t%L0, 32, %H0";
12138 }
12139 else
12140 {
12141 output_asm_insn ("sllx\t%H1, 32, %3", operands);
12142 output_asm_insn ("or\t%L1, %3, %3", operands);
12143 sprintf (mulstr, "%s\t%%3, %%2, %%3", opcode);
12144 output_asm_insn (mulstr, operands);
12145 output_asm_insn ("srlx\t%3, 32, %H0", operands);
12146 return "mov\t%3, %L0";
12147 }
12148 }
12149 else if (rtx_equal_p (operands[1], operands[2]))
12150 {
12151 if (which_alternative == 1)
12152 {
12153 output_asm_insn ("or\t%L1, %H1, %H1", operands);
12154 sprintf (mulstr, "%s\t%%H1, %%H1, %%L0", opcode);
12155 output_asm_insn (mulstr, operands);
12156 return "srlx\t%L0, 32, %H0";
12157 }
12158 else
12159 {
12160 output_asm_insn ("sllx\t%H1, 32, %3", operands);
12161 output_asm_insn ("or\t%L1, %3, %3", operands);
12162 sprintf (mulstr, "%s\t%%3, %%3, %%3", opcode);
12163 output_asm_insn (mulstr, operands);
12164 output_asm_insn ("srlx\t%3, 32, %H0", operands);
12165 return "mov\t%3, %L0";
12166 }
12167 }
12168 if (sparc_check_64 (operands[2], insn) <= 0)
12169 output_asm_insn ("srl\t%L2, 0, %L2", operands);
12170 if (which_alternative == 1)
12171 {
12172 output_asm_insn ("or\t%L1, %H1, %H1", operands);
12173 output_asm_insn ("sllx\t%H2, 32, %L1", operands);
12174 output_asm_insn ("or\t%L2, %L1, %L1", operands);
12175 sprintf (mulstr, "%s\t%%H1, %%L1, %%L0", opcode);
12176 output_asm_insn (mulstr, operands);
12177 return "srlx\t%L0, 32, %H0";
12178 }
12179 else
12180 {
12181 output_asm_insn ("sllx\t%H1, 32, %3", operands);
12182 output_asm_insn ("sllx\t%H2, 32, %4", operands);
12183 output_asm_insn ("or\t%L1, %3, %3", operands);
12184 output_asm_insn ("or\t%L2, %4, %4", operands);
12185 sprintf (mulstr, "%s\t%%3, %%4, %%3", opcode);
12186 output_asm_insn (mulstr, operands);
12187 output_asm_insn ("srlx\t%3, 32, %H0", operands);
12188 return "mov\t%3, %L0";
12189 }
12190 }
12191
12192 /* Subroutine of sparc_expand_vector_init. Emit code to initialize
12193 all fields of TARGET to ELT by means of VIS2 BSHUFFLE insn. MODE
12194 and INNER_MODE are the modes describing TARGET. */
12195
12196 static void
vector_init_bshuffle(rtx target,rtx elt,enum machine_mode mode,enum machine_mode inner_mode)12197 vector_init_bshuffle (rtx target, rtx elt, enum machine_mode mode,
12198 enum machine_mode inner_mode)
12199 {
12200 rtx t1, final_insn, sel;
12201 int bmask;
12202
12203 t1 = gen_reg_rtx (mode);
12204
12205 elt = convert_modes (SImode, inner_mode, elt, true);
12206 emit_move_insn (gen_lowpart(SImode, t1), elt);
12207
12208 switch (mode)
12209 {
12210 case V2SImode:
12211 final_insn = gen_bshufflev2si_vis (target, t1, t1);
12212 bmask = 0x45674567;
12213 break;
12214 case V4HImode:
12215 final_insn = gen_bshufflev4hi_vis (target, t1, t1);
12216 bmask = 0x67676767;
12217 break;
12218 case V8QImode:
12219 final_insn = gen_bshufflev8qi_vis (target, t1, t1);
12220 bmask = 0x77777777;
12221 break;
12222 default:
12223 gcc_unreachable ();
12224 }
12225
12226 sel = force_reg (SImode, GEN_INT (bmask));
12227 emit_insn (gen_bmasksi_vis (gen_rtx_REG (SImode, 0), sel, const0_rtx));
12228 emit_insn (final_insn);
12229 }
12230
12231 /* Subroutine of sparc_expand_vector_init. Emit code to initialize
12232 all fields of TARGET to ELT in V8QI by means of VIS FPMERGE insn. */
12233
12234 static void
vector_init_fpmerge(rtx target,rtx elt)12235 vector_init_fpmerge (rtx target, rtx elt)
12236 {
12237 rtx t1, t2, t2_low, t3, t3_low;
12238
12239 t1 = gen_reg_rtx (V4QImode);
12240 elt = convert_modes (SImode, QImode, elt, true);
12241 emit_move_insn (gen_lowpart (SImode, t1), elt);
12242
12243 t2 = gen_reg_rtx (V8QImode);
12244 t2_low = gen_lowpart (V4QImode, t2);
12245 emit_insn (gen_fpmerge_vis (t2, t1, t1));
12246
12247 t3 = gen_reg_rtx (V8QImode);
12248 t3_low = gen_lowpart (V4QImode, t3);
12249 emit_insn (gen_fpmerge_vis (t3, t2_low, t2_low));
12250
12251 emit_insn (gen_fpmerge_vis (target, t3_low, t3_low));
12252 }
12253
12254 /* Subroutine of sparc_expand_vector_init. Emit code to initialize
12255 all fields of TARGET to ELT in V4HI by means of VIS FALIGNDATA insn. */
12256
12257 static void
vector_init_faligndata(rtx target,rtx elt)12258 vector_init_faligndata (rtx target, rtx elt)
12259 {
12260 rtx t1 = gen_reg_rtx (V4HImode);
12261 int i;
12262
12263 elt = convert_modes (SImode, HImode, elt, true);
12264 emit_move_insn (gen_lowpart (SImode, t1), elt);
12265
12266 emit_insn (gen_alignaddrsi_vis (gen_reg_rtx (SImode),
12267 force_reg (SImode, GEN_INT (6)),
12268 const0_rtx));
12269
12270 for (i = 0; i < 4; i++)
12271 emit_insn (gen_faligndatav4hi_vis (target, t1, target));
12272 }
12273
12274 /* Emit code to initialize TARGET to values for individual fields VALS. */
12275
12276 void
sparc_expand_vector_init(rtx target,rtx vals)12277 sparc_expand_vector_init (rtx target, rtx vals)
12278 {
12279 const enum machine_mode mode = GET_MODE (target);
12280 const enum machine_mode inner_mode = GET_MODE_INNER (mode);
12281 const int n_elts = GET_MODE_NUNITS (mode);
12282 int i, n_var = 0;
12283 bool all_same;
12284 rtx mem;
12285
12286 all_same = true;
12287 for (i = 0; i < n_elts; i++)
12288 {
12289 rtx x = XVECEXP (vals, 0, i);
12290 if (!CONSTANT_P (x))
12291 n_var++;
12292
12293 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12294 all_same = false;
12295 }
12296
12297 if (n_var == 0)
12298 {
12299 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
12300 return;
12301 }
12302
12303 if (GET_MODE_SIZE (inner_mode) == GET_MODE_SIZE (mode))
12304 {
12305 if (GET_MODE_SIZE (inner_mode) == 4)
12306 {
12307 emit_move_insn (gen_lowpart (SImode, target),
12308 gen_lowpart (SImode, XVECEXP (vals, 0, 0)));
12309 return;
12310 }
12311 else if (GET_MODE_SIZE (inner_mode) == 8)
12312 {
12313 emit_move_insn (gen_lowpart (DImode, target),
12314 gen_lowpart (DImode, XVECEXP (vals, 0, 0)));
12315 return;
12316 }
12317 }
12318 else if (GET_MODE_SIZE (inner_mode) == GET_MODE_SIZE (word_mode)
12319 && GET_MODE_SIZE (mode) == 2 * GET_MODE_SIZE (word_mode))
12320 {
12321 emit_move_insn (gen_highpart (word_mode, target),
12322 gen_lowpart (word_mode, XVECEXP (vals, 0, 0)));
12323 emit_move_insn (gen_lowpart (word_mode, target),
12324 gen_lowpart (word_mode, XVECEXP (vals, 0, 1)));
12325 return;
12326 }
12327
12328 if (all_same && GET_MODE_SIZE (mode) == 8)
12329 {
12330 if (TARGET_VIS2)
12331 {
12332 vector_init_bshuffle (target, XVECEXP (vals, 0, 0), mode, inner_mode);
12333 return;
12334 }
12335 if (mode == V8QImode)
12336 {
12337 vector_init_fpmerge (target, XVECEXP (vals, 0, 0));
12338 return;
12339 }
12340 if (mode == V4HImode)
12341 {
12342 vector_init_faligndata (target, XVECEXP (vals, 0, 0));
12343 return;
12344 }
12345 }
12346
12347 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
12348 for (i = 0; i < n_elts; i++)
12349 emit_move_insn (adjust_address_nv (mem, inner_mode,
12350 i * GET_MODE_SIZE (inner_mode)),
12351 XVECEXP (vals, 0, i));
12352 emit_move_insn (target, mem);
12353 }
12354
12355 /* Implement TARGET_SECONDARY_RELOAD. */
12356
12357 static reg_class_t
sparc_secondary_reload(bool in_p,rtx x,reg_class_t rclass_i,enum machine_mode mode,secondary_reload_info * sri)12358 sparc_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
12359 enum machine_mode mode, secondary_reload_info *sri)
12360 {
12361 enum reg_class rclass = (enum reg_class) rclass_i;
12362
12363 sri->icode = CODE_FOR_nothing;
12364 sri->extra_cost = 0;
12365
12366 /* We need a temporary when loading/storing a HImode/QImode value
12367 between memory and the FPU registers. This can happen when combine puts
12368 a paradoxical subreg in a float/fix conversion insn. */
12369 if (FP_REG_CLASS_P (rclass)
12370 && (mode == HImode || mode == QImode)
12371 && (GET_CODE (x) == MEM
12372 || ((GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
12373 && true_regnum (x) == -1)))
12374 return GENERAL_REGS;
12375
12376 /* On 32-bit we need a temporary when loading/storing a DFmode value
12377 between unaligned memory and the upper FPU registers. */
12378 if (TARGET_ARCH32
12379 && rclass == EXTRA_FP_REGS
12380 && mode == DFmode
12381 && GET_CODE (x) == MEM
12382 && ! mem_min_alignment (x, 8))
12383 return FP_REGS;
12384
12385 if (((TARGET_CM_MEDANY
12386 && symbolic_operand (x, mode))
12387 || (TARGET_CM_EMBMEDANY
12388 && text_segment_operand (x, mode)))
12389 && ! flag_pic)
12390 {
12391 if (in_p)
12392 sri->icode = direct_optab_handler (reload_in_optab, mode);
12393 else
12394 sri->icode = direct_optab_handler (reload_out_optab, mode);
12395 return NO_REGS;
12396 }
12397
12398 if (TARGET_VIS3 && TARGET_ARCH32)
12399 {
12400 int regno = true_regnum (x);
12401
12402 /* When using VIS3 fp<-->int register moves, on 32-bit we have
12403 to move 8-byte values in 4-byte pieces. This only works via
12404 FP_REGS, and not via EXTRA_FP_REGS. Therefore if we try to
12405 move between EXTRA_FP_REGS and GENERAL_REGS, we will need
12406 an FP_REGS intermediate move. */
12407 if ((rclass == EXTRA_FP_REGS && SPARC_INT_REG_P (regno))
12408 || ((general_or_i64_p (rclass)
12409 || rclass == GENERAL_OR_FP_REGS)
12410 && SPARC_FP_REG_P (regno)))
12411 {
12412 sri->extra_cost = 2;
12413 return FP_REGS;
12414 }
12415 }
12416
12417 return NO_REGS;
12418 }
12419
12420 /* Emit code to conditionally move either OPERANDS[2] or OPERANDS[3] into
12421 OPERANDS[0] in MODE. OPERANDS[1] is the operator of the condition. */
12422
12423 bool
sparc_expand_conditional_move(enum machine_mode mode,rtx * operands)12424 sparc_expand_conditional_move (enum machine_mode mode, rtx *operands)
12425 {
12426 enum rtx_code rc = GET_CODE (operands[1]);
12427 enum machine_mode cmp_mode;
12428 rtx cc_reg, dst, cmp;
12429
12430 cmp = operands[1];
12431 if (GET_MODE (XEXP (cmp, 0)) == DImode && !TARGET_ARCH64)
12432 return false;
12433
12434 if (GET_MODE (XEXP (cmp, 0)) == TFmode && !TARGET_HARD_QUAD)
12435 cmp = sparc_emit_float_lib_cmp (XEXP (cmp, 0), XEXP (cmp, 1), rc);
12436
12437 cmp_mode = GET_MODE (XEXP (cmp, 0));
12438 rc = GET_CODE (cmp);
12439
12440 dst = operands[0];
12441 if (! rtx_equal_p (operands[2], dst)
12442 && ! rtx_equal_p (operands[3], dst))
12443 {
12444 if (reg_overlap_mentioned_p (dst, cmp))
12445 dst = gen_reg_rtx (mode);
12446
12447 emit_move_insn (dst, operands[3]);
12448 }
12449 else if (operands[2] == dst)
12450 {
12451 operands[2] = operands[3];
12452
12453 if (GET_MODE_CLASS (cmp_mode) == MODE_FLOAT)
12454 rc = reverse_condition_maybe_unordered (rc);
12455 else
12456 rc = reverse_condition (rc);
12457 }
12458
12459 if (XEXP (cmp, 1) == const0_rtx
12460 && GET_CODE (XEXP (cmp, 0)) == REG
12461 && cmp_mode == DImode
12462 && v9_regcmp_p (rc))
12463 cc_reg = XEXP (cmp, 0);
12464 else
12465 cc_reg = gen_compare_reg_1 (rc, XEXP (cmp, 0), XEXP (cmp, 1));
12466
12467 cmp = gen_rtx_fmt_ee (rc, GET_MODE (cc_reg), cc_reg, const0_rtx);
12468
12469 emit_insn (gen_rtx_SET (VOIDmode, dst,
12470 gen_rtx_IF_THEN_ELSE (mode, cmp, operands[2], dst)));
12471
12472 if (dst != operands[0])
12473 emit_move_insn (operands[0], dst);
12474
12475 return true;
12476 }
12477
12478 /* Emit code to conditionally move a combination of OPERANDS[1] and OPERANDS[2]
12479 into OPERANDS[0] in MODE, depending on the outcome of the comparison of
12480 OPERANDS[4] and OPERANDS[5]. OPERANDS[3] is the operator of the condition.
12481 FCODE is the machine code to be used for OPERANDS[3] and CCODE the machine
12482 code to be used for the condition mask. */
12483
12484 void
sparc_expand_vcond(enum machine_mode mode,rtx * operands,int ccode,int fcode)12485 sparc_expand_vcond (enum machine_mode mode, rtx *operands, int ccode, int fcode)
12486 {
12487 rtx mask, cop0, cop1, fcmp, cmask, bshuf, gsr;
12488 enum rtx_code code = GET_CODE (operands[3]);
12489
12490 mask = gen_reg_rtx (Pmode);
12491 cop0 = operands[4];
12492 cop1 = operands[5];
12493 if (code == LT || code == GE)
12494 {
12495 rtx t;
12496
12497 code = swap_condition (code);
12498 t = cop0; cop0 = cop1; cop1 = t;
12499 }
12500
12501 gsr = gen_rtx_REG (DImode, SPARC_GSR_REG);
12502
12503 fcmp = gen_rtx_UNSPEC (Pmode,
12504 gen_rtvec (1, gen_rtx_fmt_ee (code, mode, cop0, cop1)),
12505 fcode);
12506
12507 cmask = gen_rtx_UNSPEC (DImode,
12508 gen_rtvec (2, mask, gsr),
12509 ccode);
12510
12511 bshuf = gen_rtx_UNSPEC (mode,
12512 gen_rtvec (3, operands[1], operands[2], gsr),
12513 UNSPEC_BSHUFFLE);
12514
12515 emit_insn (gen_rtx_SET (VOIDmode, mask, fcmp));
12516 emit_insn (gen_rtx_SET (VOIDmode, gsr, cmask));
12517
12518 emit_insn (gen_rtx_SET (VOIDmode, operands[0], bshuf));
12519 }
12520
12521 /* On sparc, any mode which naturally allocates into the float
12522 registers should return 4 here. */
12523
12524 unsigned int
sparc_regmode_natural_size(enum machine_mode mode)12525 sparc_regmode_natural_size (enum machine_mode mode)
12526 {
12527 int size = UNITS_PER_WORD;
12528
12529 if (TARGET_ARCH64)
12530 {
12531 enum mode_class mclass = GET_MODE_CLASS (mode);
12532
12533 if (mclass == MODE_FLOAT || mclass == MODE_VECTOR_INT)
12534 size = 4;
12535 }
12536
12537 return size;
12538 }
12539
12540 /* Return TRUE if it is a good idea to tie two pseudo registers
12541 when one has mode MODE1 and one has mode MODE2.
12542 If HARD_REGNO_MODE_OK could produce different values for MODE1 and MODE2,
12543 for any hard reg, then this must be FALSE for correct output.
12544
12545 For V9 we have to deal with the fact that only the lower 32 floating
12546 point registers are 32-bit addressable. */
12547
12548 bool
sparc_modes_tieable_p(enum machine_mode mode1,enum machine_mode mode2)12549 sparc_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
12550 {
12551 enum mode_class mclass1, mclass2;
12552 unsigned short size1, size2;
12553
12554 if (mode1 == mode2)
12555 return true;
12556
12557 mclass1 = GET_MODE_CLASS (mode1);
12558 mclass2 = GET_MODE_CLASS (mode2);
12559 if (mclass1 != mclass2)
12560 return false;
12561
12562 if (! TARGET_V9)
12563 return true;
12564
12565 /* Classes are the same and we are V9 so we have to deal with upper
12566 vs. lower floating point registers. If one of the modes is a
12567 4-byte mode, and the other is not, we have to mark them as not
12568 tieable because only the lower 32 floating point register are
12569 addressable 32-bits at a time.
12570
12571 We can't just test explicitly for SFmode, otherwise we won't
12572 cover the vector mode cases properly. */
12573
12574 if (mclass1 != MODE_FLOAT && mclass1 != MODE_VECTOR_INT)
12575 return true;
12576
12577 size1 = GET_MODE_SIZE (mode1);
12578 size2 = GET_MODE_SIZE (mode2);
12579 if ((size1 > 4 && size2 == 4)
12580 || (size2 > 4 && size1 == 4))
12581 return false;
12582
12583 return true;
12584 }
12585
12586 /* Implement TARGET_CSTORE_MODE. */
12587
12588 static enum machine_mode
sparc_cstore_mode(enum insn_code icode ATTRIBUTE_UNUSED)12589 sparc_cstore_mode (enum insn_code icode ATTRIBUTE_UNUSED)
12590 {
12591 return (TARGET_ARCH64 ? DImode : SImode);
12592 }
12593
12594 /* Return the compound expression made of T1 and T2. */
12595
12596 static inline tree
compound_expr(tree t1,tree t2)12597 compound_expr (tree t1, tree t2)
12598 {
12599 return build2 (COMPOUND_EXPR, void_type_node, t1, t2);
12600 }
12601
12602 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */
12603
12604 static void
sparc_atomic_assign_expand_fenv(tree * hold,tree * clear,tree * update)12605 sparc_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
12606 {
12607 if (!TARGET_FPU)
12608 return;
12609
12610 const unsigned HOST_WIDE_INT accrued_exception_mask = 0x1f << 5;
12611 const unsigned HOST_WIDE_INT trap_enable_mask = 0x1f << 23;
12612
12613 /* We generate the equivalent of feholdexcept (&fenv_var):
12614
12615 unsigned int fenv_var;
12616 __builtin_store_fsr (&fenv_var);
12617
12618 unsigned int tmp1_var;
12619 tmp1_var = fenv_var & ~(accrued_exception_mask | trap_enable_mask);
12620
12621 __builtin_load_fsr (&tmp1_var); */
12622
12623 tree fenv_var = create_tmp_var (unsigned_type_node, NULL);
12624 mark_addressable (fenv_var);
12625 tree fenv_addr = build_fold_addr_expr (fenv_var);
12626 tree stfsr = sparc_builtins[SPARC_BUILTIN_STFSR];
12627 tree hold_stfsr = build_call_expr (stfsr, 1, fenv_addr);
12628
12629 tree tmp1_var = create_tmp_var (unsigned_type_node, NULL);
12630 mark_addressable (tmp1_var);
12631 tree masked_fenv_var
12632 = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_var,
12633 build_int_cst (unsigned_type_node,
12634 ~(accrued_exception_mask | trap_enable_mask)));
12635 tree hold_mask
12636 = build2 (MODIFY_EXPR, void_type_node, tmp1_var, masked_fenv_var);
12637
12638 tree tmp1_addr = build_fold_addr_expr (tmp1_var);
12639 tree ldfsr = sparc_builtins[SPARC_BUILTIN_LDFSR];
12640 tree hold_ldfsr = build_call_expr (ldfsr, 1, tmp1_addr);
12641
12642 *hold = compound_expr (compound_expr (hold_stfsr, hold_mask), hold_ldfsr);
12643
12644 /* We reload the value of tmp1_var to clear the exceptions:
12645
12646 __builtin_load_fsr (&tmp1_var); */
12647
12648 *clear = build_call_expr (ldfsr, 1, tmp1_addr);
12649
12650 /* We generate the equivalent of feupdateenv (&fenv_var):
12651
12652 unsigned int tmp2_var;
12653 __builtin_store_fsr (&tmp2_var);
12654
12655 __builtin_load_fsr (&fenv_var);
12656
12657 if (SPARC_LOW_FE_EXCEPT_VALUES)
12658 tmp2_var >>= 5;
12659 __atomic_feraiseexcept ((int) tmp2_var); */
12660
12661 tree tmp2_var = create_tmp_var (unsigned_type_node, NULL);
12662 mark_addressable (tmp2_var);
12663 tree tmp3_addr = build_fold_addr_expr (tmp2_var);
12664 tree update_stfsr = build_call_expr (stfsr, 1, tmp3_addr);
12665
12666 tree update_ldfsr = build_call_expr (ldfsr, 1, fenv_addr);
12667
12668 tree atomic_feraiseexcept
12669 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
12670 tree update_call
12671 = build_call_expr (atomic_feraiseexcept, 1,
12672 fold_convert (integer_type_node, tmp2_var));
12673
12674 if (SPARC_LOW_FE_EXCEPT_VALUES)
12675 {
12676 tree shifted_tmp2_var
12677 = build2 (RSHIFT_EXPR, unsigned_type_node, tmp2_var,
12678 build_int_cst (unsigned_type_node, 5));
12679 tree update_shift
12680 = build2 (MODIFY_EXPR, void_type_node, tmp2_var, shifted_tmp2_var);
12681 update_call = compound_expr (update_shift, update_call);
12682 }
12683
12684 *update
12685 = compound_expr (compound_expr (update_stfsr, update_ldfsr), update_call);
12686 }
12687
12688 #include "gt-sparc.h"
12689