1 /* Subroutines for insn-output.c for SPARC.
2 Copyright (C) 1987-2016 Free Software Foundation, Inc.
3 Contributed by Michael Tiemann (tiemann@cygnus.com)
4 64-bit SPARC-V9 support by Michael Tiemann, Jim Wilson, and Doug Evans,
5 at Cygnus Support.
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3, or (at your option)
12 any later version.
13
14 GCC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
22
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "backend.h"
27 #include "target.h"
28 #include "rtl.h"
29 #include "tree.h"
30 #include "gimple.h"
31 #include "df.h"
32 #include "tm_p.h"
33 #include "stringpool.h"
34 #include "expmed.h"
35 #include "optabs.h"
36 #include "regs.h"
37 #include "emit-rtl.h"
38 #include "recog.h"
39 #include "diagnostic-core.h"
40 #include "alias.h"
41 #include "fold-const.h"
42 #include "stor-layout.h"
43 #include "calls.h"
44 #include "varasm.h"
45 #include "output.h"
46 #include "insn-attr.h"
47 #include "explow.h"
48 #include "expr.h"
49 #include "debug.h"
50 #include "common/common-target.h"
51 #include "gimplify.h"
52 #include "langhooks.h"
53 #include "reload.h"
54 #include "params.h"
55 #include "tree-pass.h"
56 #include "context.h"
57 #include "builtins.h"
58
59 /* This file should be included last. */
60 #include "target-def.h"
61
62 /* Processor costs */
63
64 struct processor_costs {
65 /* Integer load */
66 const int int_load;
67
68 /* Integer signed load */
69 const int int_sload;
70
71 /* Integer zeroed load */
72 const int int_zload;
73
74 /* Float load */
75 const int float_load;
76
77 /* fmov, fneg, fabs */
78 const int float_move;
79
80 /* fadd, fsub */
81 const int float_plusminus;
82
83 /* fcmp */
84 const int float_cmp;
85
86 /* fmov, fmovr */
87 const int float_cmove;
88
89 /* fmul */
90 const int float_mul;
91
92 /* fdivs */
93 const int float_div_sf;
94
95 /* fdivd */
96 const int float_div_df;
97
98 /* fsqrts */
99 const int float_sqrt_sf;
100
101 /* fsqrtd */
102 const int float_sqrt_df;
103
104 /* umul/smul */
105 const int int_mul;
106
107 /* mulX */
108 const int int_mulX;
109
110 /* integer multiply cost for each bit set past the most
111 significant 3, so the formula for multiply cost becomes:
112
113 if (rs1 < 0)
114 highest_bit = highest_clear_bit(rs1);
115 else
116 highest_bit = highest_set_bit(rs1);
117 if (highest_bit < 3)
118 highest_bit = 3;
119 cost = int_mul{,X} + ((highest_bit - 3) / int_mul_bit_factor);
120
121 A value of zero indicates that the multiply costs is fixed,
122 and not variable. */
123 const int int_mul_bit_factor;
124
125 /* udiv/sdiv */
126 const int int_div;
127
128 /* divX */
129 const int int_divX;
130
131 /* movcc, movr */
132 const int int_cmove;
133
134 /* penalty for shifts, due to scheduling rules etc. */
135 const int shift_penalty;
136 };
137
138 static const
139 struct processor_costs cypress_costs = {
140 COSTS_N_INSNS (2), /* int load */
141 COSTS_N_INSNS (2), /* int signed load */
142 COSTS_N_INSNS (2), /* int zeroed load */
143 COSTS_N_INSNS (2), /* float load */
144 COSTS_N_INSNS (5), /* fmov, fneg, fabs */
145 COSTS_N_INSNS (5), /* fadd, fsub */
146 COSTS_N_INSNS (1), /* fcmp */
147 COSTS_N_INSNS (1), /* fmov, fmovr */
148 COSTS_N_INSNS (7), /* fmul */
149 COSTS_N_INSNS (37), /* fdivs */
150 COSTS_N_INSNS (37), /* fdivd */
151 COSTS_N_INSNS (63), /* fsqrts */
152 COSTS_N_INSNS (63), /* fsqrtd */
153 COSTS_N_INSNS (1), /* imul */
154 COSTS_N_INSNS (1), /* imulX */
155 0, /* imul bit factor */
156 COSTS_N_INSNS (1), /* idiv */
157 COSTS_N_INSNS (1), /* idivX */
158 COSTS_N_INSNS (1), /* movcc/movr */
159 0, /* shift penalty */
160 };
161
162 static const
163 struct processor_costs supersparc_costs = {
164 COSTS_N_INSNS (1), /* int load */
165 COSTS_N_INSNS (1), /* int signed load */
166 COSTS_N_INSNS (1), /* int zeroed load */
167 COSTS_N_INSNS (0), /* float load */
168 COSTS_N_INSNS (3), /* fmov, fneg, fabs */
169 COSTS_N_INSNS (3), /* fadd, fsub */
170 COSTS_N_INSNS (3), /* fcmp */
171 COSTS_N_INSNS (1), /* fmov, fmovr */
172 COSTS_N_INSNS (3), /* fmul */
173 COSTS_N_INSNS (6), /* fdivs */
174 COSTS_N_INSNS (9), /* fdivd */
175 COSTS_N_INSNS (12), /* fsqrts */
176 COSTS_N_INSNS (12), /* fsqrtd */
177 COSTS_N_INSNS (4), /* imul */
178 COSTS_N_INSNS (4), /* imulX */
179 0, /* imul bit factor */
180 COSTS_N_INSNS (4), /* idiv */
181 COSTS_N_INSNS (4), /* idivX */
182 COSTS_N_INSNS (1), /* movcc/movr */
183 1, /* shift penalty */
184 };
185
186 static const
187 struct processor_costs hypersparc_costs = {
188 COSTS_N_INSNS (1), /* int load */
189 COSTS_N_INSNS (1), /* int signed load */
190 COSTS_N_INSNS (1), /* int zeroed load */
191 COSTS_N_INSNS (1), /* float load */
192 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
193 COSTS_N_INSNS (1), /* fadd, fsub */
194 COSTS_N_INSNS (1), /* fcmp */
195 COSTS_N_INSNS (1), /* fmov, fmovr */
196 COSTS_N_INSNS (1), /* fmul */
197 COSTS_N_INSNS (8), /* fdivs */
198 COSTS_N_INSNS (12), /* fdivd */
199 COSTS_N_INSNS (17), /* fsqrts */
200 COSTS_N_INSNS (17), /* fsqrtd */
201 COSTS_N_INSNS (17), /* imul */
202 COSTS_N_INSNS (17), /* imulX */
203 0, /* imul bit factor */
204 COSTS_N_INSNS (17), /* idiv */
205 COSTS_N_INSNS (17), /* idivX */
206 COSTS_N_INSNS (1), /* movcc/movr */
207 0, /* shift penalty */
208 };
209
210 static const
211 struct processor_costs leon_costs = {
212 COSTS_N_INSNS (1), /* int load */
213 COSTS_N_INSNS (1), /* int signed load */
214 COSTS_N_INSNS (1), /* int zeroed load */
215 COSTS_N_INSNS (1), /* float load */
216 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
217 COSTS_N_INSNS (1), /* fadd, fsub */
218 COSTS_N_INSNS (1), /* fcmp */
219 COSTS_N_INSNS (1), /* fmov, fmovr */
220 COSTS_N_INSNS (1), /* fmul */
221 COSTS_N_INSNS (15), /* fdivs */
222 COSTS_N_INSNS (15), /* fdivd */
223 COSTS_N_INSNS (23), /* fsqrts */
224 COSTS_N_INSNS (23), /* fsqrtd */
225 COSTS_N_INSNS (5), /* imul */
226 COSTS_N_INSNS (5), /* imulX */
227 0, /* imul bit factor */
228 COSTS_N_INSNS (5), /* idiv */
229 COSTS_N_INSNS (5), /* idivX */
230 COSTS_N_INSNS (1), /* movcc/movr */
231 0, /* shift penalty */
232 };
233
234 static const
235 struct processor_costs leon3_costs = {
236 COSTS_N_INSNS (1), /* int load */
237 COSTS_N_INSNS (1), /* int signed load */
238 COSTS_N_INSNS (1), /* int zeroed load */
239 COSTS_N_INSNS (1), /* float load */
240 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
241 COSTS_N_INSNS (1), /* fadd, fsub */
242 COSTS_N_INSNS (1), /* fcmp */
243 COSTS_N_INSNS (1), /* fmov, fmovr */
244 COSTS_N_INSNS (1), /* fmul */
245 COSTS_N_INSNS (14), /* fdivs */
246 COSTS_N_INSNS (15), /* fdivd */
247 COSTS_N_INSNS (22), /* fsqrts */
248 COSTS_N_INSNS (23), /* fsqrtd */
249 COSTS_N_INSNS (5), /* imul */
250 COSTS_N_INSNS (5), /* imulX */
251 0, /* imul bit factor */
252 COSTS_N_INSNS (35), /* idiv */
253 COSTS_N_INSNS (35), /* idivX */
254 COSTS_N_INSNS (1), /* movcc/movr */
255 0, /* shift penalty */
256 };
257
258 static const
259 struct processor_costs sparclet_costs = {
260 COSTS_N_INSNS (3), /* int load */
261 COSTS_N_INSNS (3), /* int signed load */
262 COSTS_N_INSNS (1), /* int zeroed load */
263 COSTS_N_INSNS (1), /* float load */
264 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
265 COSTS_N_INSNS (1), /* fadd, fsub */
266 COSTS_N_INSNS (1), /* fcmp */
267 COSTS_N_INSNS (1), /* fmov, fmovr */
268 COSTS_N_INSNS (1), /* fmul */
269 COSTS_N_INSNS (1), /* fdivs */
270 COSTS_N_INSNS (1), /* fdivd */
271 COSTS_N_INSNS (1), /* fsqrts */
272 COSTS_N_INSNS (1), /* fsqrtd */
273 COSTS_N_INSNS (5), /* imul */
274 COSTS_N_INSNS (5), /* imulX */
275 0, /* imul bit factor */
276 COSTS_N_INSNS (5), /* idiv */
277 COSTS_N_INSNS (5), /* idivX */
278 COSTS_N_INSNS (1), /* movcc/movr */
279 0, /* shift penalty */
280 };
281
282 static const
283 struct processor_costs ultrasparc_costs = {
284 COSTS_N_INSNS (2), /* int load */
285 COSTS_N_INSNS (3), /* int signed load */
286 COSTS_N_INSNS (2), /* int zeroed load */
287 COSTS_N_INSNS (2), /* float load */
288 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
289 COSTS_N_INSNS (4), /* fadd, fsub */
290 COSTS_N_INSNS (1), /* fcmp */
291 COSTS_N_INSNS (2), /* fmov, fmovr */
292 COSTS_N_INSNS (4), /* fmul */
293 COSTS_N_INSNS (13), /* fdivs */
294 COSTS_N_INSNS (23), /* fdivd */
295 COSTS_N_INSNS (13), /* fsqrts */
296 COSTS_N_INSNS (23), /* fsqrtd */
297 COSTS_N_INSNS (4), /* imul */
298 COSTS_N_INSNS (4), /* imulX */
299 2, /* imul bit factor */
300 COSTS_N_INSNS (37), /* idiv */
301 COSTS_N_INSNS (68), /* idivX */
302 COSTS_N_INSNS (2), /* movcc/movr */
303 2, /* shift penalty */
304 };
305
306 static const
307 struct processor_costs ultrasparc3_costs = {
308 COSTS_N_INSNS (2), /* int load */
309 COSTS_N_INSNS (3), /* int signed load */
310 COSTS_N_INSNS (3), /* int zeroed load */
311 COSTS_N_INSNS (2), /* float load */
312 COSTS_N_INSNS (3), /* fmov, fneg, fabs */
313 COSTS_N_INSNS (4), /* fadd, fsub */
314 COSTS_N_INSNS (5), /* fcmp */
315 COSTS_N_INSNS (3), /* fmov, fmovr */
316 COSTS_N_INSNS (4), /* fmul */
317 COSTS_N_INSNS (17), /* fdivs */
318 COSTS_N_INSNS (20), /* fdivd */
319 COSTS_N_INSNS (20), /* fsqrts */
320 COSTS_N_INSNS (29), /* fsqrtd */
321 COSTS_N_INSNS (6), /* imul */
322 COSTS_N_INSNS (6), /* imulX */
323 0, /* imul bit factor */
324 COSTS_N_INSNS (40), /* idiv */
325 COSTS_N_INSNS (71), /* idivX */
326 COSTS_N_INSNS (2), /* movcc/movr */
327 0, /* shift penalty */
328 };
329
330 static const
331 struct processor_costs niagara_costs = {
332 COSTS_N_INSNS (3), /* int load */
333 COSTS_N_INSNS (3), /* int signed load */
334 COSTS_N_INSNS (3), /* int zeroed load */
335 COSTS_N_INSNS (9), /* float load */
336 COSTS_N_INSNS (8), /* fmov, fneg, fabs */
337 COSTS_N_INSNS (8), /* fadd, fsub */
338 COSTS_N_INSNS (26), /* fcmp */
339 COSTS_N_INSNS (8), /* fmov, fmovr */
340 COSTS_N_INSNS (29), /* fmul */
341 COSTS_N_INSNS (54), /* fdivs */
342 COSTS_N_INSNS (83), /* fdivd */
343 COSTS_N_INSNS (100), /* fsqrts - not implemented in hardware */
344 COSTS_N_INSNS (100), /* fsqrtd - not implemented in hardware */
345 COSTS_N_INSNS (11), /* imul */
346 COSTS_N_INSNS (11), /* imulX */
347 0, /* imul bit factor */
348 COSTS_N_INSNS (72), /* idiv */
349 COSTS_N_INSNS (72), /* idivX */
350 COSTS_N_INSNS (1), /* movcc/movr */
351 0, /* shift penalty */
352 };
353
354 static const
355 struct processor_costs niagara2_costs = {
356 COSTS_N_INSNS (3), /* int load */
357 COSTS_N_INSNS (3), /* int signed load */
358 COSTS_N_INSNS (3), /* int zeroed load */
359 COSTS_N_INSNS (3), /* float load */
360 COSTS_N_INSNS (6), /* fmov, fneg, fabs */
361 COSTS_N_INSNS (6), /* fadd, fsub */
362 COSTS_N_INSNS (6), /* fcmp */
363 COSTS_N_INSNS (6), /* fmov, fmovr */
364 COSTS_N_INSNS (6), /* fmul */
365 COSTS_N_INSNS (19), /* fdivs */
366 COSTS_N_INSNS (33), /* fdivd */
367 COSTS_N_INSNS (19), /* fsqrts */
368 COSTS_N_INSNS (33), /* fsqrtd */
369 COSTS_N_INSNS (5), /* imul */
370 COSTS_N_INSNS (5), /* imulX */
371 0, /* imul bit factor */
372 COSTS_N_INSNS (26), /* idiv, average of 12 - 41 cycle range */
373 COSTS_N_INSNS (26), /* idivX, average of 12 - 41 cycle range */
374 COSTS_N_INSNS (1), /* movcc/movr */
375 0, /* shift penalty */
376 };
377
378 static const
379 struct processor_costs niagara3_costs = {
380 COSTS_N_INSNS (3), /* int load */
381 COSTS_N_INSNS (3), /* int signed load */
382 COSTS_N_INSNS (3), /* int zeroed load */
383 COSTS_N_INSNS (3), /* float load */
384 COSTS_N_INSNS (9), /* fmov, fneg, fabs */
385 COSTS_N_INSNS (9), /* fadd, fsub */
386 COSTS_N_INSNS (9), /* fcmp */
387 COSTS_N_INSNS (9), /* fmov, fmovr */
388 COSTS_N_INSNS (9), /* fmul */
389 COSTS_N_INSNS (23), /* fdivs */
390 COSTS_N_INSNS (37), /* fdivd */
391 COSTS_N_INSNS (23), /* fsqrts */
392 COSTS_N_INSNS (37), /* fsqrtd */
393 COSTS_N_INSNS (9), /* imul */
394 COSTS_N_INSNS (9), /* imulX */
395 0, /* imul bit factor */
396 COSTS_N_INSNS (31), /* idiv, average of 17 - 45 cycle range */
397 COSTS_N_INSNS (30), /* idivX, average of 16 - 44 cycle range */
398 COSTS_N_INSNS (1), /* movcc/movr */
399 0, /* shift penalty */
400 };
401
402 static const
403 struct processor_costs niagara4_costs = {
404 COSTS_N_INSNS (5), /* int load */
405 COSTS_N_INSNS (5), /* int signed load */
406 COSTS_N_INSNS (5), /* int zeroed load */
407 COSTS_N_INSNS (5), /* float load */
408 COSTS_N_INSNS (11), /* fmov, fneg, fabs */
409 COSTS_N_INSNS (11), /* fadd, fsub */
410 COSTS_N_INSNS (11), /* fcmp */
411 COSTS_N_INSNS (11), /* fmov, fmovr */
412 COSTS_N_INSNS (11), /* fmul */
413 COSTS_N_INSNS (24), /* fdivs */
414 COSTS_N_INSNS (37), /* fdivd */
415 COSTS_N_INSNS (24), /* fsqrts */
416 COSTS_N_INSNS (37), /* fsqrtd */
417 COSTS_N_INSNS (12), /* imul */
418 COSTS_N_INSNS (12), /* imulX */
419 0, /* imul bit factor */
420 COSTS_N_INSNS (50), /* idiv, average of 41 - 60 cycle range */
421 COSTS_N_INSNS (35), /* idivX, average of 26 - 44 cycle range */
422 COSTS_N_INSNS (1), /* movcc/movr */
423 0, /* shift penalty */
424 };
425
426 static const
427 struct processor_costs niagara7_costs = {
428 COSTS_N_INSNS (5), /* int load */
429 COSTS_N_INSNS (5), /* int signed load */
430 COSTS_N_INSNS (5), /* int zeroed load */
431 COSTS_N_INSNS (5), /* float load */
432 COSTS_N_INSNS (11), /* fmov, fneg, fabs */
433 COSTS_N_INSNS (11), /* fadd, fsub */
434 COSTS_N_INSNS (11), /* fcmp */
435 COSTS_N_INSNS (11), /* fmov, fmovr */
436 COSTS_N_INSNS (11), /* fmul */
437 COSTS_N_INSNS (24), /* fdivs */
438 COSTS_N_INSNS (37), /* fdivd */
439 COSTS_N_INSNS (24), /* fsqrts */
440 COSTS_N_INSNS (37), /* fsqrtd */
441 COSTS_N_INSNS (12), /* imul */
442 COSTS_N_INSNS (12), /* imulX */
443 0, /* imul bit factor */
444 COSTS_N_INSNS (51), /* idiv, average of 42 - 61 cycle range */
445 COSTS_N_INSNS (35), /* idivX, average of 26 - 44 cycle range */
446 COSTS_N_INSNS (1), /* movcc/movr */
447 0, /* shift penalty */
448 };
449
450 static const struct processor_costs *sparc_costs = &cypress_costs;
451
452 #ifdef HAVE_AS_RELAX_OPTION
453 /* If 'as' and 'ld' are relaxing tail call insns into branch always, use
454 "or %o7,%g0,X; call Y; or X,%g0,%o7" always, so that it can be optimized.
455 With sethi/jmp, neither 'as' nor 'ld' has an easy way how to find out if
456 somebody does not branch between the sethi and jmp. */
457 #define LEAF_SIBCALL_SLOT_RESERVED_P 1
458 #else
459 #define LEAF_SIBCALL_SLOT_RESERVED_P \
460 ((TARGET_ARCH64 && !TARGET_CM_MEDLOW) || flag_pic)
461 #endif
462
463 /* Vector to say how input registers are mapped to output registers.
464 HARD_FRAME_POINTER_REGNUM cannot be remapped by this function to
465 eliminate it. You must use -fomit-frame-pointer to get that. */
466 char leaf_reg_remap[] =
467 { 0, 1, 2, 3, 4, 5, 6, 7,
468 -1, -1, -1, -1, -1, -1, 14, -1,
469 -1, -1, -1, -1, -1, -1, -1, -1,
470 8, 9, 10, 11, 12, 13, -1, 15,
471
472 32, 33, 34, 35, 36, 37, 38, 39,
473 40, 41, 42, 43, 44, 45, 46, 47,
474 48, 49, 50, 51, 52, 53, 54, 55,
475 56, 57, 58, 59, 60, 61, 62, 63,
476 64, 65, 66, 67, 68, 69, 70, 71,
477 72, 73, 74, 75, 76, 77, 78, 79,
478 80, 81, 82, 83, 84, 85, 86, 87,
479 88, 89, 90, 91, 92, 93, 94, 95,
480 96, 97, 98, 99, 100, 101, 102};
481
482 /* Vector, indexed by hard register number, which contains 1
483 for a register that is allowable in a candidate for leaf
484 function treatment. */
485 char sparc_leaf_regs[] =
486 { 1, 1, 1, 1, 1, 1, 1, 1,
487 0, 0, 0, 0, 0, 0, 1, 0,
488 0, 0, 0, 0, 0, 0, 0, 0,
489 1, 1, 1, 1, 1, 1, 0, 1,
490 1, 1, 1, 1, 1, 1, 1, 1,
491 1, 1, 1, 1, 1, 1, 1, 1,
492 1, 1, 1, 1, 1, 1, 1, 1,
493 1, 1, 1, 1, 1, 1, 1, 1,
494 1, 1, 1, 1, 1, 1, 1, 1,
495 1, 1, 1, 1, 1, 1, 1, 1,
496 1, 1, 1, 1, 1, 1, 1, 1,
497 1, 1, 1, 1, 1, 1, 1, 1,
498 1, 1, 1, 1, 1, 1, 1};
499
500 struct GTY(()) machine_function
501 {
502 /* Size of the frame of the function. */
503 HOST_WIDE_INT frame_size;
504
505 /* Size of the frame of the function minus the register window save area
506 and the outgoing argument area. */
507 HOST_WIDE_INT apparent_frame_size;
508
509 /* Register we pretend the frame pointer is allocated to. Normally, this
510 is %fp, but if we are in a leaf procedure, this is (%sp + offset). We
511 record "offset" separately as it may be too big for (reg + disp). */
512 rtx frame_base_reg;
513 HOST_WIDE_INT frame_base_offset;
514
515 /* Number of global or FP registers to be saved (as 4-byte quantities). */
516 int n_global_fp_regs;
517
518 /* True if the current function is leaf and uses only leaf regs,
519 so that the SPARC leaf function optimization can be applied.
520 Private version of crtl->uses_only_leaf_regs, see
521 sparc_expand_prologue for the rationale. */
522 int leaf_function_p;
523
524 /* True if the prologue saves local or in registers. */
525 bool save_local_in_regs_p;
526
527 /* True if the data calculated by sparc_expand_prologue are valid. */
528 bool prologue_data_valid_p;
529 };
530
531 #define sparc_frame_size cfun->machine->frame_size
532 #define sparc_apparent_frame_size cfun->machine->apparent_frame_size
533 #define sparc_frame_base_reg cfun->machine->frame_base_reg
534 #define sparc_frame_base_offset cfun->machine->frame_base_offset
535 #define sparc_n_global_fp_regs cfun->machine->n_global_fp_regs
536 #define sparc_leaf_function_p cfun->machine->leaf_function_p
537 #define sparc_save_local_in_regs_p cfun->machine->save_local_in_regs_p
538 #define sparc_prologue_data_valid_p cfun->machine->prologue_data_valid_p
539
540 /* 1 if the next opcode is to be specially indented. */
541 int sparc_indent_opcode = 0;
542
543 static void sparc_option_override (void);
544 static void sparc_init_modes (void);
545 static int function_arg_slotno (const CUMULATIVE_ARGS *, machine_mode,
546 const_tree, bool, bool, int *, int *);
547
548 static int supersparc_adjust_cost (rtx_insn *, rtx, rtx_insn *, int);
549 static int hypersparc_adjust_cost (rtx_insn *, rtx, rtx_insn *, int);
550
551 static void sparc_emit_set_const32 (rtx, rtx);
552 static void sparc_emit_set_const64 (rtx, rtx);
553 static void sparc_output_addr_vec (rtx);
554 static void sparc_output_addr_diff_vec (rtx);
555 static void sparc_output_deferred_case_vectors (void);
556 static bool sparc_legitimate_address_p (machine_mode, rtx, bool);
557 static bool sparc_legitimate_constant_p (machine_mode, rtx);
558 static rtx sparc_builtin_saveregs (void);
559 static int epilogue_renumber (rtx *, int);
560 static bool sparc_assemble_integer (rtx, unsigned int, int);
561 static int set_extends (rtx_insn *);
562 static void sparc_asm_function_prologue (FILE *, HOST_WIDE_INT);
563 static void sparc_asm_function_epilogue (FILE *, HOST_WIDE_INT);
564 #ifdef TARGET_SOLARIS
565 static void sparc_solaris_elf_asm_named_section (const char *, unsigned int,
566 tree) ATTRIBUTE_UNUSED;
567 #endif
568 static int sparc_adjust_cost (rtx_insn *, rtx, rtx_insn *, int);
569 static int sparc_issue_rate (void);
570 static void sparc_sched_init (FILE *, int, int);
571 static int sparc_use_sched_lookahead (void);
572
573 static void emit_soft_tfmode_libcall (const char *, int, rtx *);
574 static void emit_soft_tfmode_binop (enum rtx_code, rtx *);
575 static void emit_soft_tfmode_unop (enum rtx_code, rtx *);
576 static void emit_soft_tfmode_cvt (enum rtx_code, rtx *);
577 static void emit_hard_tfmode_operation (enum rtx_code, rtx *);
578
579 static bool sparc_function_ok_for_sibcall (tree, tree);
580 static void sparc_init_libfuncs (void);
581 static void sparc_init_builtins (void);
582 static void sparc_fpu_init_builtins (void);
583 static void sparc_vis_init_builtins (void);
584 static tree sparc_builtin_decl (unsigned, bool);
585 static rtx sparc_expand_builtin (tree, rtx, rtx, machine_mode, int);
586 static tree sparc_fold_builtin (tree, int, tree *, bool);
587 static void sparc_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
588 HOST_WIDE_INT, tree);
589 static bool sparc_can_output_mi_thunk (const_tree, HOST_WIDE_INT,
590 HOST_WIDE_INT, const_tree);
591 static struct machine_function * sparc_init_machine_status (void);
592 static bool sparc_cannot_force_const_mem (machine_mode, rtx);
593 static rtx sparc_tls_get_addr (void);
594 static rtx sparc_tls_got (void);
595 static int sparc_register_move_cost (machine_mode,
596 reg_class_t, reg_class_t);
597 static bool sparc_rtx_costs (rtx, machine_mode, int, int, int *, bool);
598 static rtx sparc_function_value (const_tree, const_tree, bool);
599 static rtx sparc_libcall_value (machine_mode, const_rtx);
600 static bool sparc_function_value_regno_p (const unsigned int);
601 static rtx sparc_struct_value_rtx (tree, int);
602 static machine_mode sparc_promote_function_mode (const_tree, machine_mode,
603 int *, const_tree, int);
604 static bool sparc_return_in_memory (const_tree, const_tree);
605 static bool sparc_strict_argument_naming (cumulative_args_t);
606 static void sparc_va_start (tree, rtx);
607 static tree sparc_gimplify_va_arg (tree, tree, gimple_seq *, gimple_seq *);
608 static bool sparc_vector_mode_supported_p (machine_mode);
609 static bool sparc_tls_referenced_p (rtx);
610 static rtx sparc_legitimize_tls_address (rtx);
611 static rtx sparc_legitimize_pic_address (rtx, rtx);
612 static rtx sparc_legitimize_address (rtx, rtx, machine_mode);
613 static rtx sparc_delegitimize_address (rtx);
614 static bool sparc_mode_dependent_address_p (const_rtx, addr_space_t);
615 static bool sparc_pass_by_reference (cumulative_args_t,
616 machine_mode, const_tree, bool);
617 static void sparc_function_arg_advance (cumulative_args_t,
618 machine_mode, const_tree, bool);
619 static rtx sparc_function_arg_1 (cumulative_args_t,
620 machine_mode, const_tree, bool, bool);
621 static rtx sparc_function_arg (cumulative_args_t,
622 machine_mode, const_tree, bool);
623 static rtx sparc_function_incoming_arg (cumulative_args_t,
624 machine_mode, const_tree, bool);
625 static unsigned int sparc_function_arg_boundary (machine_mode,
626 const_tree);
627 static int sparc_arg_partial_bytes (cumulative_args_t,
628 machine_mode, tree, bool);
629 static void sparc_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
630 static void sparc_file_end (void);
631 static bool sparc_frame_pointer_required (void);
632 static bool sparc_can_eliminate (const int, const int);
633 static rtx sparc_builtin_setjmp_frame_value (void);
634 static void sparc_conditional_register_usage (void);
635 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
636 static const char *sparc_mangle_type (const_tree);
637 #endif
638 static void sparc_trampoline_init (rtx, tree, rtx);
639 static machine_mode sparc_preferred_simd_mode (machine_mode);
640 static reg_class_t sparc_preferred_reload_class (rtx x, reg_class_t rclass);
641 static bool sparc_print_operand_punct_valid_p (unsigned char);
642 static void sparc_print_operand (FILE *, rtx, int);
643 static void sparc_print_operand_address (FILE *, machine_mode, rtx);
644 static reg_class_t sparc_secondary_reload (bool, rtx, reg_class_t,
645 machine_mode,
646 secondary_reload_info *);
647 static machine_mode sparc_cstore_mode (enum insn_code icode);
648 static void sparc_atomic_assign_expand_fenv (tree *, tree *, tree *);
649
650 #ifdef SUBTARGET_ATTRIBUTE_TABLE
651 /* Table of valid machine attributes. */
652 static const struct attribute_spec sparc_attribute_table[] =
653 {
654 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
655 do_diagnostic } */
656 SUBTARGET_ATTRIBUTE_TABLE,
657 { NULL, 0, 0, false, false, false, NULL, false }
658 };
659 #endif
660
661 /* Option handling. */
662
663 /* Parsed value. */
664 enum cmodel sparc_cmodel;
665
666 char sparc_hard_reg_printed[8];
667
668 /* Initialize the GCC target structure. */
669
670 /* The default is to use .half rather than .short for aligned HI objects. */
671 #undef TARGET_ASM_ALIGNED_HI_OP
672 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
673
674 #undef TARGET_ASM_UNALIGNED_HI_OP
675 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uahalf\t"
676 #undef TARGET_ASM_UNALIGNED_SI_OP
677 #define TARGET_ASM_UNALIGNED_SI_OP "\t.uaword\t"
678 #undef TARGET_ASM_UNALIGNED_DI_OP
679 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaxword\t"
680
681 /* The target hook has to handle DI-mode values. */
682 #undef TARGET_ASM_INTEGER
683 #define TARGET_ASM_INTEGER sparc_assemble_integer
684
685 #undef TARGET_ASM_FUNCTION_PROLOGUE
686 #define TARGET_ASM_FUNCTION_PROLOGUE sparc_asm_function_prologue
687 #undef TARGET_ASM_FUNCTION_EPILOGUE
688 #define TARGET_ASM_FUNCTION_EPILOGUE sparc_asm_function_epilogue
689
690 #undef TARGET_SCHED_ADJUST_COST
691 #define TARGET_SCHED_ADJUST_COST sparc_adjust_cost
692 #undef TARGET_SCHED_ISSUE_RATE
693 #define TARGET_SCHED_ISSUE_RATE sparc_issue_rate
694 #undef TARGET_SCHED_INIT
695 #define TARGET_SCHED_INIT sparc_sched_init
696 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
697 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD sparc_use_sched_lookahead
698
699 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
700 #define TARGET_FUNCTION_OK_FOR_SIBCALL sparc_function_ok_for_sibcall
701
702 #undef TARGET_INIT_LIBFUNCS
703 #define TARGET_INIT_LIBFUNCS sparc_init_libfuncs
704
705 #undef TARGET_LEGITIMIZE_ADDRESS
706 #define TARGET_LEGITIMIZE_ADDRESS sparc_legitimize_address
707 #undef TARGET_DELEGITIMIZE_ADDRESS
708 #define TARGET_DELEGITIMIZE_ADDRESS sparc_delegitimize_address
709 #undef TARGET_MODE_DEPENDENT_ADDRESS_P
710 #define TARGET_MODE_DEPENDENT_ADDRESS_P sparc_mode_dependent_address_p
711
712 #undef TARGET_INIT_BUILTINS
713 #define TARGET_INIT_BUILTINS sparc_init_builtins
714 #undef TARGET_BUILTIN_DECL
715 #define TARGET_BUILTIN_DECL sparc_builtin_decl
716 #undef TARGET_EXPAND_BUILTIN
717 #define TARGET_EXPAND_BUILTIN sparc_expand_builtin
718 #undef TARGET_FOLD_BUILTIN
719 #define TARGET_FOLD_BUILTIN sparc_fold_builtin
720
721 #if TARGET_TLS
722 #undef TARGET_HAVE_TLS
723 #define TARGET_HAVE_TLS true
724 #endif
725
726 #undef TARGET_CANNOT_FORCE_CONST_MEM
727 #define TARGET_CANNOT_FORCE_CONST_MEM sparc_cannot_force_const_mem
728
729 #undef TARGET_ASM_OUTPUT_MI_THUNK
730 #define TARGET_ASM_OUTPUT_MI_THUNK sparc_output_mi_thunk
731 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
732 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK sparc_can_output_mi_thunk
733
734 #undef TARGET_RTX_COSTS
735 #define TARGET_RTX_COSTS sparc_rtx_costs
736 #undef TARGET_ADDRESS_COST
737 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
738 #undef TARGET_REGISTER_MOVE_COST
739 #define TARGET_REGISTER_MOVE_COST sparc_register_move_cost
740
741 #undef TARGET_PROMOTE_FUNCTION_MODE
742 #define TARGET_PROMOTE_FUNCTION_MODE sparc_promote_function_mode
743
744 #undef TARGET_FUNCTION_VALUE
745 #define TARGET_FUNCTION_VALUE sparc_function_value
746 #undef TARGET_LIBCALL_VALUE
747 #define TARGET_LIBCALL_VALUE sparc_libcall_value
748 #undef TARGET_FUNCTION_VALUE_REGNO_P
749 #define TARGET_FUNCTION_VALUE_REGNO_P sparc_function_value_regno_p
750
751 #undef TARGET_STRUCT_VALUE_RTX
752 #define TARGET_STRUCT_VALUE_RTX sparc_struct_value_rtx
753 #undef TARGET_RETURN_IN_MEMORY
754 #define TARGET_RETURN_IN_MEMORY sparc_return_in_memory
755 #undef TARGET_MUST_PASS_IN_STACK
756 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
757 #undef TARGET_PASS_BY_REFERENCE
758 #define TARGET_PASS_BY_REFERENCE sparc_pass_by_reference
759 #undef TARGET_ARG_PARTIAL_BYTES
760 #define TARGET_ARG_PARTIAL_BYTES sparc_arg_partial_bytes
761 #undef TARGET_FUNCTION_ARG_ADVANCE
762 #define TARGET_FUNCTION_ARG_ADVANCE sparc_function_arg_advance
763 #undef TARGET_FUNCTION_ARG
764 #define TARGET_FUNCTION_ARG sparc_function_arg
765 #undef TARGET_FUNCTION_INCOMING_ARG
766 #define TARGET_FUNCTION_INCOMING_ARG sparc_function_incoming_arg
767 #undef TARGET_FUNCTION_ARG_BOUNDARY
768 #define TARGET_FUNCTION_ARG_BOUNDARY sparc_function_arg_boundary
769
770 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
771 #define TARGET_EXPAND_BUILTIN_SAVEREGS sparc_builtin_saveregs
772 #undef TARGET_STRICT_ARGUMENT_NAMING
773 #define TARGET_STRICT_ARGUMENT_NAMING sparc_strict_argument_naming
774
775 #undef TARGET_EXPAND_BUILTIN_VA_START
776 #define TARGET_EXPAND_BUILTIN_VA_START sparc_va_start
777 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
778 #define TARGET_GIMPLIFY_VA_ARG_EXPR sparc_gimplify_va_arg
779
780 #undef TARGET_VECTOR_MODE_SUPPORTED_P
781 #define TARGET_VECTOR_MODE_SUPPORTED_P sparc_vector_mode_supported_p
782
783 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
784 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE sparc_preferred_simd_mode
785
786 #ifdef SUBTARGET_INSERT_ATTRIBUTES
787 #undef TARGET_INSERT_ATTRIBUTES
788 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
789 #endif
790
791 #ifdef SUBTARGET_ATTRIBUTE_TABLE
792 #undef TARGET_ATTRIBUTE_TABLE
793 #define TARGET_ATTRIBUTE_TABLE sparc_attribute_table
794 #endif
795
796 #undef TARGET_OPTION_OVERRIDE
797 #define TARGET_OPTION_OVERRIDE sparc_option_override
798
799 #if TARGET_GNU_TLS && defined(HAVE_AS_SPARC_UA_PCREL)
800 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
801 #define TARGET_ASM_OUTPUT_DWARF_DTPREL sparc_output_dwarf_dtprel
802 #endif
803
804 #undef TARGET_ASM_FILE_END
805 #define TARGET_ASM_FILE_END sparc_file_end
806
807 #undef TARGET_FRAME_POINTER_REQUIRED
808 #define TARGET_FRAME_POINTER_REQUIRED sparc_frame_pointer_required
809
810 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
811 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE sparc_builtin_setjmp_frame_value
812
813 #undef TARGET_CAN_ELIMINATE
814 #define TARGET_CAN_ELIMINATE sparc_can_eliminate
815
816 #undef TARGET_PREFERRED_RELOAD_CLASS
817 #define TARGET_PREFERRED_RELOAD_CLASS sparc_preferred_reload_class
818
819 #undef TARGET_SECONDARY_RELOAD
820 #define TARGET_SECONDARY_RELOAD sparc_secondary_reload
821
822 #undef TARGET_CONDITIONAL_REGISTER_USAGE
823 #define TARGET_CONDITIONAL_REGISTER_USAGE sparc_conditional_register_usage
824
825 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
826 #undef TARGET_MANGLE_TYPE
827 #define TARGET_MANGLE_TYPE sparc_mangle_type
828 #endif
829
830 #undef TARGET_LEGITIMATE_ADDRESS_P
831 #define TARGET_LEGITIMATE_ADDRESS_P sparc_legitimate_address_p
832
833 #undef TARGET_LEGITIMATE_CONSTANT_P
834 #define TARGET_LEGITIMATE_CONSTANT_P sparc_legitimate_constant_p
835
836 #undef TARGET_TRAMPOLINE_INIT
837 #define TARGET_TRAMPOLINE_INIT sparc_trampoline_init
838
839 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
840 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P sparc_print_operand_punct_valid_p
841 #undef TARGET_PRINT_OPERAND
842 #define TARGET_PRINT_OPERAND sparc_print_operand
843 #undef TARGET_PRINT_OPERAND_ADDRESS
844 #define TARGET_PRINT_OPERAND_ADDRESS sparc_print_operand_address
845
846 /* The value stored by LDSTUB. */
847 #undef TARGET_ATOMIC_TEST_AND_SET_TRUEVAL
848 #define TARGET_ATOMIC_TEST_AND_SET_TRUEVAL 0xff
849
850 #undef TARGET_CSTORE_MODE
851 #define TARGET_CSTORE_MODE sparc_cstore_mode
852
853 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
854 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV sparc_atomic_assign_expand_fenv
855
856 struct gcc_target targetm = TARGET_INITIALIZER;
857
858 /* Return the memory reference contained in X if any, zero otherwise. */
859
860 static rtx
mem_ref(rtx x)861 mem_ref (rtx x)
862 {
863 if (GET_CODE (x) == SIGN_EXTEND || GET_CODE (x) == ZERO_EXTEND)
864 x = XEXP (x, 0);
865
866 if (MEM_P (x))
867 return x;
868
869 return NULL_RTX;
870 }
871
872 /* We use a machine specific pass to enable workarounds for errata.
873 We need to have the (essentially) final form of the insn stream in order
874 to properly detect the various hazards. Therefore, this machine specific
875 pass runs as late as possible. The pass is inserted in the pass pipeline
876 at the end of sparc_option_override. */
877
878 static unsigned int
sparc_do_work_around_errata(void)879 sparc_do_work_around_errata (void)
880 {
881 rtx_insn *insn, *next;
882
883 /* Force all instructions to be split into their final form. */
884 split_all_insns_noflow ();
885
886 /* Now look for specific patterns in the insn stream. */
887 for (insn = get_insns (); insn; insn = next)
888 {
889 bool insert_nop = false;
890 rtx set;
891
892 /* Look into the instruction in a delay slot. */
893 if (NONJUMP_INSN_P (insn))
894 if (rtx_sequence *seq = dyn_cast <rtx_sequence *> (PATTERN (insn)))
895 insn = seq->insn (1);
896
897 /* Look for a single-word load into an odd-numbered FP register. */
898 if (sparc_fix_at697f
899 && NONJUMP_INSN_P (insn)
900 && (set = single_set (insn)) != NULL_RTX
901 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
902 && MEM_P (SET_SRC (set))
903 && REG_P (SET_DEST (set))
904 && REGNO (SET_DEST (set)) > 31
905 && REGNO (SET_DEST (set)) % 2 != 0)
906 {
907 /* The wrong dependency is on the enclosing double register. */
908 const unsigned int x = REGNO (SET_DEST (set)) - 1;
909 unsigned int src1, src2, dest;
910 int code;
911
912 next = next_active_insn (insn);
913 if (!next)
914 break;
915 /* If the insn is a branch, then it cannot be problematic. */
916 if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE)
917 continue;
918
919 extract_insn (next);
920 code = INSN_CODE (next);
921
922 switch (code)
923 {
924 case CODE_FOR_adddf3:
925 case CODE_FOR_subdf3:
926 case CODE_FOR_muldf3:
927 case CODE_FOR_divdf3:
928 dest = REGNO (recog_data.operand[0]);
929 src1 = REGNO (recog_data.operand[1]);
930 src2 = REGNO (recog_data.operand[2]);
931 if (src1 != src2)
932 {
933 /* Case [1-4]:
934 ld [address], %fx+1
935 FPOPd %f{x,y}, %f{y,x}, %f{x,y} */
936 if ((src1 == x || src2 == x)
937 && (dest == src1 || dest == src2))
938 insert_nop = true;
939 }
940 else
941 {
942 /* Case 5:
943 ld [address], %fx+1
944 FPOPd %fx, %fx, %fx */
945 if (src1 == x
946 && dest == src1
947 && (code == CODE_FOR_adddf3 || code == CODE_FOR_muldf3))
948 insert_nop = true;
949 }
950 break;
951
952 case CODE_FOR_sqrtdf2:
953 dest = REGNO (recog_data.operand[0]);
954 src1 = REGNO (recog_data.operand[1]);
955 /* Case 6:
956 ld [address], %fx+1
957 fsqrtd %fx, %fx */
958 if (src1 == x && dest == src1)
959 insert_nop = true;
960 break;
961
962 default:
963 break;
964 }
965 }
966
967 /* Look for a single-word load into an integer register. */
968 else if (sparc_fix_ut699
969 && NONJUMP_INSN_P (insn)
970 && (set = single_set (insn)) != NULL_RTX
971 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) <= 4
972 && (mem_ref (SET_SRC (set)) != NULL_RTX
973 || INSN_CODE (insn) == CODE_FOR_movsi_pic_gotdata_op)
974 && REG_P (SET_DEST (set))
975 && REGNO (SET_DEST (set)) < 32)
976 {
977 /* There is no problem if the second memory access has a data
978 dependency on the first single-cycle load. */
979 rtx x = SET_DEST (set);
980
981 next = next_active_insn (insn);
982 if (!next)
983 break;
984 /* If the insn is a branch, then it cannot be problematic. */
985 if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE)
986 continue;
987
988 /* Look for a second memory access to/from an integer register. */
989 if ((set = single_set (next)) != NULL_RTX)
990 {
991 rtx src = SET_SRC (set);
992 rtx dest = SET_DEST (set);
993 rtx mem;
994
995 /* LDD is affected. */
996 if ((mem = mem_ref (src)) != NULL_RTX
997 && REG_P (dest)
998 && REGNO (dest) < 32
999 && !reg_mentioned_p (x, XEXP (mem, 0)))
1000 insert_nop = true;
1001
1002 /* STD is *not* affected. */
1003 else if (MEM_P (dest)
1004 && GET_MODE_SIZE (GET_MODE (dest)) <= 4
1005 && (src == CONST0_RTX (GET_MODE (dest))
1006 || (REG_P (src)
1007 && REGNO (src) < 32
1008 && REGNO (src) != REGNO (x)))
1009 && !reg_mentioned_p (x, XEXP (dest, 0)))
1010 insert_nop = true;
1011
1012 /* GOT accesses uses LD. */
1013 else if (INSN_CODE (next) == CODE_FOR_movsi_pic_gotdata_op
1014 && !reg_mentioned_p (x, XEXP (XEXP (src, 0), 1)))
1015 insert_nop = true;
1016 }
1017 }
1018
1019 /* Look for a single-word load/operation into an FP register. */
1020 else if (sparc_fix_ut699
1021 && NONJUMP_INSN_P (insn)
1022 && (set = single_set (insn)) != NULL_RTX
1023 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
1024 && REG_P (SET_DEST (set))
1025 && REGNO (SET_DEST (set)) > 31)
1026 {
1027 /* Number of instructions in the problematic window. */
1028 const int n_insns = 4;
1029 /* The problematic combination is with the sibling FP register. */
1030 const unsigned int x = REGNO (SET_DEST (set));
1031 const unsigned int y = x ^ 1;
1032 rtx_insn *after;
1033 int i;
1034
1035 next = next_active_insn (insn);
1036 if (!next)
1037 break;
1038 /* If the insn is a branch, then it cannot be problematic. */
1039 if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE)
1040 continue;
1041
1042 /* Look for a second load/operation into the sibling FP register. */
1043 if (!((set = single_set (next)) != NULL_RTX
1044 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
1045 && REG_P (SET_DEST (set))
1046 && REGNO (SET_DEST (set)) == y))
1047 continue;
1048
1049 /* Look for a (possible) store from the FP register in the next N
1050 instructions, but bail out if it is again modified or if there
1051 is a store from the sibling FP register before this store. */
1052 for (after = next, i = 0; i < n_insns; i++)
1053 {
1054 bool branch_p;
1055
1056 after = next_active_insn (after);
1057 if (!after)
1058 break;
1059
1060 /* This is a branch with an empty delay slot. */
1061 if (!NONJUMP_INSN_P (after))
1062 {
1063 if (++i == n_insns)
1064 break;
1065 branch_p = true;
1066 after = NULL;
1067 }
1068 /* This is a branch with a filled delay slot. */
1069 else if (rtx_sequence *seq =
1070 dyn_cast <rtx_sequence *> (PATTERN (after)))
1071 {
1072 if (++i == n_insns)
1073 break;
1074 branch_p = true;
1075 after = seq->insn (1);
1076 }
1077 /* This is a regular instruction. */
1078 else
1079 branch_p = false;
1080
1081 if (after && (set = single_set (after)) != NULL_RTX)
1082 {
1083 const rtx src = SET_SRC (set);
1084 const rtx dest = SET_DEST (set);
1085 const unsigned int size = GET_MODE_SIZE (GET_MODE (dest));
1086
1087 /* If the FP register is again modified before the store,
1088 then the store isn't affected. */
1089 if (REG_P (dest)
1090 && (REGNO (dest) == x
1091 || (REGNO (dest) == y && size == 8)))
1092 break;
1093
1094 if (MEM_P (dest) && REG_P (src))
1095 {
1096 /* If there is a store from the sibling FP register
1097 before the store, then the store is not affected. */
1098 if (REGNO (src) == y || (REGNO (src) == x && size == 8))
1099 break;
1100
1101 /* Otherwise, the store is affected. */
1102 if (REGNO (src) == x && size == 4)
1103 {
1104 insert_nop = true;
1105 break;
1106 }
1107 }
1108 }
1109
1110 /* If we have a branch in the first M instructions, then we
1111 cannot see the (M+2)th instruction so we play safe. */
1112 if (branch_p && i <= (n_insns - 2))
1113 {
1114 insert_nop = true;
1115 break;
1116 }
1117 }
1118 }
1119
1120 else
1121 next = NEXT_INSN (insn);
1122
1123 if (insert_nop)
1124 emit_insn_before (gen_nop (), next);
1125 }
1126
1127 return 0;
1128 }
1129
1130 namespace {
1131
1132 const pass_data pass_data_work_around_errata =
1133 {
1134 RTL_PASS, /* type */
1135 "errata", /* name */
1136 OPTGROUP_NONE, /* optinfo_flags */
1137 TV_MACH_DEP, /* tv_id */
1138 0, /* properties_required */
1139 0, /* properties_provided */
1140 0, /* properties_destroyed */
1141 0, /* todo_flags_start */
1142 0, /* todo_flags_finish */
1143 };
1144
1145 class pass_work_around_errata : public rtl_opt_pass
1146 {
1147 public:
pass_work_around_errata(gcc::context * ctxt)1148 pass_work_around_errata(gcc::context *ctxt)
1149 : rtl_opt_pass(pass_data_work_around_errata, ctxt)
1150 {}
1151
1152 /* opt_pass methods: */
gate(function *)1153 virtual bool gate (function *)
1154 {
1155 /* The only errata we handle are those of the AT697F and UT699. */
1156 return sparc_fix_at697f != 0 || sparc_fix_ut699 != 0;
1157 }
1158
execute(function *)1159 virtual unsigned int execute (function *)
1160 {
1161 return sparc_do_work_around_errata ();
1162 }
1163
1164 }; // class pass_work_around_errata
1165
1166 } // anon namespace
1167
1168 rtl_opt_pass *
make_pass_work_around_errata(gcc::context * ctxt)1169 make_pass_work_around_errata (gcc::context *ctxt)
1170 {
1171 return new pass_work_around_errata (ctxt);
1172 }
1173
1174 /* Helpers for TARGET_DEBUG_OPTIONS. */
1175 static void
dump_target_flag_bits(const int flags)1176 dump_target_flag_bits (const int flags)
1177 {
1178 if (flags & MASK_64BIT)
1179 fprintf (stderr, "64BIT ");
1180 if (flags & MASK_APP_REGS)
1181 fprintf (stderr, "APP_REGS ");
1182 if (flags & MASK_FASTER_STRUCTS)
1183 fprintf (stderr, "FASTER_STRUCTS ");
1184 if (flags & MASK_FLAT)
1185 fprintf (stderr, "FLAT ");
1186 if (flags & MASK_FMAF)
1187 fprintf (stderr, "FMAF ");
1188 if (flags & MASK_FPU)
1189 fprintf (stderr, "FPU ");
1190 if (flags & MASK_HARD_QUAD)
1191 fprintf (stderr, "HARD_QUAD ");
1192 if (flags & MASK_POPC)
1193 fprintf (stderr, "POPC ");
1194 if (flags & MASK_PTR64)
1195 fprintf (stderr, "PTR64 ");
1196 if (flags & MASK_STACK_BIAS)
1197 fprintf (stderr, "STACK_BIAS ");
1198 if (flags & MASK_UNALIGNED_DOUBLES)
1199 fprintf (stderr, "UNALIGNED_DOUBLES ");
1200 if (flags & MASK_V8PLUS)
1201 fprintf (stderr, "V8PLUS ");
1202 if (flags & MASK_VIS)
1203 fprintf (stderr, "VIS ");
1204 if (flags & MASK_VIS2)
1205 fprintf (stderr, "VIS2 ");
1206 if (flags & MASK_VIS3)
1207 fprintf (stderr, "VIS3 ");
1208 if (flags & MASK_VIS4)
1209 fprintf (stderr, "VIS4 ");
1210 if (flags & MASK_CBCOND)
1211 fprintf (stderr, "CBCOND ");
1212 if (flags & MASK_DEPRECATED_V8_INSNS)
1213 fprintf (stderr, "DEPRECATED_V8_INSNS ");
1214 if (flags & MASK_SPARCLET)
1215 fprintf (stderr, "SPARCLET ");
1216 if (flags & MASK_SPARCLITE)
1217 fprintf (stderr, "SPARCLITE ");
1218 if (flags & MASK_V8)
1219 fprintf (stderr, "V8 ");
1220 if (flags & MASK_V9)
1221 fprintf (stderr, "V9 ");
1222 }
1223
1224 static void
dump_target_flags(const char * prefix,const int flags)1225 dump_target_flags (const char *prefix, const int flags)
1226 {
1227 fprintf (stderr, "%s: (%08x) [ ", prefix, flags);
1228 dump_target_flag_bits (flags);
1229 fprintf(stderr, "]\n");
1230 }
1231
1232 /* Validate and override various options, and do some machine dependent
1233 initialization. */
1234
1235 static void
sparc_option_override(void)1236 sparc_option_override (void)
1237 {
1238 static struct code_model {
1239 const char *const name;
1240 const enum cmodel value;
1241 } const cmodels[] = {
1242 { "32", CM_32 },
1243 { "medlow", CM_MEDLOW },
1244 { "medmid", CM_MEDMID },
1245 { "medany", CM_MEDANY },
1246 { "embmedany", CM_EMBMEDANY },
1247 { NULL, (enum cmodel) 0 }
1248 };
1249 const struct code_model *cmodel;
1250 /* Map TARGET_CPU_DEFAULT to value for -m{cpu,tune}=. */
1251 static struct cpu_default {
1252 const int cpu;
1253 const enum processor_type processor;
1254 } const cpu_default[] = {
1255 /* There must be one entry here for each TARGET_CPU value. */
1256 { TARGET_CPU_sparc, PROCESSOR_CYPRESS },
1257 { TARGET_CPU_v8, PROCESSOR_V8 },
1258 { TARGET_CPU_supersparc, PROCESSOR_SUPERSPARC },
1259 { TARGET_CPU_hypersparc, PROCESSOR_HYPERSPARC },
1260 { TARGET_CPU_leon, PROCESSOR_LEON },
1261 { TARGET_CPU_leon3, PROCESSOR_LEON3 },
1262 { TARGET_CPU_leon3v7, PROCESSOR_LEON3V7 },
1263 { TARGET_CPU_sparclite, PROCESSOR_F930 },
1264 { TARGET_CPU_sparclite86x, PROCESSOR_SPARCLITE86X },
1265 { TARGET_CPU_sparclet, PROCESSOR_TSC701 },
1266 { TARGET_CPU_v9, PROCESSOR_V9 },
1267 { TARGET_CPU_ultrasparc, PROCESSOR_ULTRASPARC },
1268 { TARGET_CPU_ultrasparc3, PROCESSOR_ULTRASPARC3 },
1269 { TARGET_CPU_niagara, PROCESSOR_NIAGARA },
1270 { TARGET_CPU_niagara2, PROCESSOR_NIAGARA2 },
1271 { TARGET_CPU_niagara3, PROCESSOR_NIAGARA3 },
1272 { TARGET_CPU_niagara4, PROCESSOR_NIAGARA4 },
1273 { TARGET_CPU_niagara7, PROCESSOR_NIAGARA7 },
1274 { -1, PROCESSOR_V7 }
1275 };
1276 const struct cpu_default *def;
1277 /* Table of values for -m{cpu,tune}=. This must match the order of
1278 the enum processor_type in sparc-opts.h. */
1279 static struct cpu_table {
1280 const char *const name;
1281 const int disable;
1282 const int enable;
1283 } const cpu_table[] = {
1284 { "v7", MASK_ISA, 0 },
1285 { "cypress", MASK_ISA, 0 },
1286 { "v8", MASK_ISA, MASK_V8 },
1287 /* TI TMS390Z55 supersparc */
1288 { "supersparc", MASK_ISA, MASK_V8 },
1289 { "hypersparc", MASK_ISA, MASK_V8|MASK_FPU },
1290 { "leon", MASK_ISA, MASK_V8|MASK_LEON|MASK_FPU },
1291 { "leon3", MASK_ISA, MASK_V8|MASK_LEON3|MASK_FPU },
1292 { "leon3v7", MASK_ISA, MASK_LEON3|MASK_FPU },
1293 { "sparclite", MASK_ISA, MASK_SPARCLITE },
1294 /* The Fujitsu MB86930 is the original sparclite chip, with no FPU. */
1295 { "f930", MASK_ISA|MASK_FPU, MASK_SPARCLITE },
1296 /* The Fujitsu MB86934 is the recent sparclite chip, with an FPU. */
1297 { "f934", MASK_ISA, MASK_SPARCLITE|MASK_FPU },
1298 { "sparclite86x", MASK_ISA|MASK_FPU, MASK_SPARCLITE },
1299 { "sparclet", MASK_ISA, MASK_SPARCLET },
1300 /* TEMIC sparclet */
1301 { "tsc701", MASK_ISA, MASK_SPARCLET },
1302 { "v9", MASK_ISA, MASK_V9 },
1303 /* UltraSPARC I, II, IIi */
1304 { "ultrasparc", MASK_ISA,
1305 /* Although insns using %y are deprecated, it is a clear win. */
1306 MASK_V9|MASK_DEPRECATED_V8_INSNS },
1307 /* UltraSPARC III */
1308 /* ??? Check if %y issue still holds true. */
1309 { "ultrasparc3", MASK_ISA,
1310 MASK_V9|MASK_DEPRECATED_V8_INSNS|MASK_VIS2 },
1311 /* UltraSPARC T1 */
1312 { "niagara", MASK_ISA,
1313 MASK_V9|MASK_DEPRECATED_V8_INSNS },
1314 /* UltraSPARC T2 */
1315 { "niagara2", MASK_ISA,
1316 MASK_V9|MASK_POPC|MASK_VIS2 },
1317 /* UltraSPARC T3 */
1318 { "niagara3", MASK_ISA,
1319 MASK_V9|MASK_POPC|MASK_VIS2|MASK_VIS3|MASK_FMAF },
1320 /* UltraSPARC T4 */
1321 { "niagara4", MASK_ISA,
1322 MASK_V9|MASK_POPC|MASK_VIS2|MASK_VIS3|MASK_FMAF|MASK_CBCOND },
1323 /* UltraSPARC M7 */
1324 { "niagara7", MASK_ISA,
1325 MASK_V9|MASK_POPC|MASK_VIS2|MASK_VIS3|MASK_VIS4|MASK_FMAF|MASK_CBCOND },
1326 };
1327 const struct cpu_table *cpu;
1328 unsigned int i;
1329 int fpu;
1330
1331 if (sparc_debug_string != NULL)
1332 {
1333 const char *q;
1334 char *p;
1335
1336 p = ASTRDUP (sparc_debug_string);
1337 while ((q = strtok (p, ",")) != NULL)
1338 {
1339 bool invert;
1340 int mask;
1341
1342 p = NULL;
1343 if (*q == '!')
1344 {
1345 invert = true;
1346 q++;
1347 }
1348 else
1349 invert = false;
1350
1351 if (! strcmp (q, "all"))
1352 mask = MASK_DEBUG_ALL;
1353 else if (! strcmp (q, "options"))
1354 mask = MASK_DEBUG_OPTIONS;
1355 else
1356 error ("unknown -mdebug-%s switch", q);
1357
1358 if (invert)
1359 sparc_debug &= ~mask;
1360 else
1361 sparc_debug |= mask;
1362 }
1363 }
1364
1365 if (TARGET_DEBUG_OPTIONS)
1366 {
1367 dump_target_flags("Initial target_flags", target_flags);
1368 dump_target_flags("target_flags_explicit", target_flags_explicit);
1369 }
1370
1371 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1372 SUBTARGET_OVERRIDE_OPTIONS;
1373 #endif
1374
1375 #ifndef SPARC_BI_ARCH
1376 /* Check for unsupported architecture size. */
1377 if (! TARGET_64BIT != DEFAULT_ARCH32_P)
1378 error ("%s is not supported by this configuration",
1379 DEFAULT_ARCH32_P ? "-m64" : "-m32");
1380 #endif
1381
1382 /* We force all 64bit archs to use 128 bit long double */
1383 if (TARGET_64BIT && ! TARGET_LONG_DOUBLE_128)
1384 {
1385 error ("-mlong-double-64 not allowed with -m64");
1386 target_flags |= MASK_LONG_DOUBLE_128;
1387 }
1388
1389 /* Code model selection. */
1390 sparc_cmodel = SPARC_DEFAULT_CMODEL;
1391
1392 #ifdef SPARC_BI_ARCH
1393 if (TARGET_ARCH32)
1394 sparc_cmodel = CM_32;
1395 #endif
1396
1397 if (sparc_cmodel_string != NULL)
1398 {
1399 if (TARGET_ARCH64)
1400 {
1401 for (cmodel = &cmodels[0]; cmodel->name; cmodel++)
1402 if (strcmp (sparc_cmodel_string, cmodel->name) == 0)
1403 break;
1404 if (cmodel->name == NULL)
1405 error ("bad value (%s) for -mcmodel= switch", sparc_cmodel_string);
1406 else
1407 sparc_cmodel = cmodel->value;
1408 }
1409 else
1410 error ("-mcmodel= is not supported on 32 bit systems");
1411 }
1412
1413 /* Check that -fcall-saved-REG wasn't specified for out registers. */
1414 for (i = 8; i < 16; i++)
1415 if (!call_used_regs [i])
1416 {
1417 error ("-fcall-saved-REG is not supported for out registers");
1418 call_used_regs [i] = 1;
1419 }
1420
1421 fpu = target_flags & MASK_FPU; /* save current -mfpu status */
1422
1423 /* Set the default CPU. */
1424 if (!global_options_set.x_sparc_cpu_and_features)
1425 {
1426 for (def = &cpu_default[0]; def->cpu != -1; ++def)
1427 if (def->cpu == TARGET_CPU_DEFAULT)
1428 break;
1429 gcc_assert (def->cpu != -1);
1430 sparc_cpu_and_features = def->processor;
1431 }
1432
1433 if (!global_options_set.x_sparc_cpu)
1434 sparc_cpu = sparc_cpu_and_features;
1435
1436 cpu = &cpu_table[(int) sparc_cpu_and_features];
1437
1438 if (TARGET_DEBUG_OPTIONS)
1439 {
1440 fprintf (stderr, "sparc_cpu_and_features: %s\n", cpu->name);
1441 fprintf (stderr, "sparc_cpu: %s\n",
1442 cpu_table[(int) sparc_cpu].name);
1443 dump_target_flags ("cpu->disable", cpu->disable);
1444 dump_target_flags ("cpu->enable", cpu->enable);
1445 }
1446
1447 target_flags &= ~cpu->disable;
1448 target_flags |= (cpu->enable
1449 #ifndef HAVE_AS_FMAF_HPC_VIS3
1450 & ~(MASK_FMAF | MASK_VIS3)
1451 #endif
1452 #ifndef HAVE_AS_SPARC4
1453 & ~MASK_CBCOND
1454 #endif
1455 #ifndef HAVE_AS_SPARC5_VIS4
1456 & ~MASK_VIS4
1457 #endif
1458 #ifndef HAVE_AS_LEON
1459 & ~(MASK_LEON | MASK_LEON3)
1460 #endif
1461 );
1462
1463 /* If -mfpu or -mno-fpu was explicitly used, don't override with
1464 the processor default. */
1465 if (target_flags_explicit & MASK_FPU)
1466 target_flags = (target_flags & ~MASK_FPU) | fpu;
1467
1468 /* -mvis2 implies -mvis */
1469 if (TARGET_VIS2)
1470 target_flags |= MASK_VIS;
1471
1472 /* -mvis3 implies -mvis2 and -mvis */
1473 if (TARGET_VIS3)
1474 target_flags |= MASK_VIS2 | MASK_VIS;
1475
1476 /* -mvis4 implies -mvis3, -mvis2 and -mvis */
1477 if (TARGET_VIS4)
1478 target_flags |= MASK_VIS3 | MASK_VIS2 | MASK_VIS;
1479
1480 /* Don't allow -mvis, -mvis2, -mvis3, -mvis4 or -mfmaf if FPU is
1481 disabled. */
1482 if (! TARGET_FPU)
1483 target_flags &= ~(MASK_VIS | MASK_VIS2 | MASK_VIS3 | MASK_VIS4
1484 | MASK_FMAF);
1485
1486 /* -mvis assumes UltraSPARC+, so we are sure v9 instructions
1487 are available.
1488 -m64 also implies v9. */
1489 if (TARGET_VIS || TARGET_ARCH64)
1490 {
1491 target_flags |= MASK_V9;
1492 target_flags &= ~(MASK_V8 | MASK_SPARCLET | MASK_SPARCLITE);
1493 }
1494
1495 /* -mvis also implies -mv8plus on 32-bit */
1496 if (TARGET_VIS && ! TARGET_ARCH64)
1497 target_flags |= MASK_V8PLUS;
1498
1499 /* Use the deprecated v8 insns for sparc64 in 32 bit mode. */
1500 if (TARGET_V9 && TARGET_ARCH32)
1501 target_flags |= MASK_DEPRECATED_V8_INSNS;
1502
1503 /* V8PLUS requires V9, makes no sense in 64 bit mode. */
1504 if (! TARGET_V9 || TARGET_ARCH64)
1505 target_flags &= ~MASK_V8PLUS;
1506
1507 /* Don't use stack biasing in 32 bit mode. */
1508 if (TARGET_ARCH32)
1509 target_flags &= ~MASK_STACK_BIAS;
1510
1511 /* Supply a default value for align_functions. */
1512 if (align_functions == 0)
1513 {
1514 if (sparc_cpu == PROCESSOR_ULTRASPARC
1515 || sparc_cpu == PROCESSOR_ULTRASPARC3
1516 || sparc_cpu == PROCESSOR_NIAGARA
1517 || sparc_cpu == PROCESSOR_NIAGARA2
1518 || sparc_cpu == PROCESSOR_NIAGARA3
1519 || sparc_cpu == PROCESSOR_NIAGARA4)
1520 align_functions = 32;
1521 else if (sparc_cpu == PROCESSOR_NIAGARA7)
1522 align_functions = 64;
1523 }
1524
1525 /* Validate PCC_STRUCT_RETURN. */
1526 if (flag_pcc_struct_return == DEFAULT_PCC_STRUCT_RETURN)
1527 flag_pcc_struct_return = (TARGET_ARCH64 ? 0 : 1);
1528
1529 /* Only use .uaxword when compiling for a 64-bit target. */
1530 if (!TARGET_ARCH64)
1531 targetm.asm_out.unaligned_op.di = NULL;
1532
1533 /* Do various machine dependent initializations. */
1534 sparc_init_modes ();
1535
1536 /* Set up function hooks. */
1537 init_machine_status = sparc_init_machine_status;
1538
1539 switch (sparc_cpu)
1540 {
1541 case PROCESSOR_V7:
1542 case PROCESSOR_CYPRESS:
1543 sparc_costs = &cypress_costs;
1544 break;
1545 case PROCESSOR_V8:
1546 case PROCESSOR_SPARCLITE:
1547 case PROCESSOR_SUPERSPARC:
1548 sparc_costs = &supersparc_costs;
1549 break;
1550 case PROCESSOR_F930:
1551 case PROCESSOR_F934:
1552 case PROCESSOR_HYPERSPARC:
1553 case PROCESSOR_SPARCLITE86X:
1554 sparc_costs = &hypersparc_costs;
1555 break;
1556 case PROCESSOR_LEON:
1557 sparc_costs = &leon_costs;
1558 break;
1559 case PROCESSOR_LEON3:
1560 case PROCESSOR_LEON3V7:
1561 sparc_costs = &leon3_costs;
1562 break;
1563 case PROCESSOR_SPARCLET:
1564 case PROCESSOR_TSC701:
1565 sparc_costs = &sparclet_costs;
1566 break;
1567 case PROCESSOR_V9:
1568 case PROCESSOR_ULTRASPARC:
1569 sparc_costs = &ultrasparc_costs;
1570 break;
1571 case PROCESSOR_ULTRASPARC3:
1572 sparc_costs = &ultrasparc3_costs;
1573 break;
1574 case PROCESSOR_NIAGARA:
1575 sparc_costs = &niagara_costs;
1576 break;
1577 case PROCESSOR_NIAGARA2:
1578 sparc_costs = &niagara2_costs;
1579 break;
1580 case PROCESSOR_NIAGARA3:
1581 sparc_costs = &niagara3_costs;
1582 break;
1583 case PROCESSOR_NIAGARA4:
1584 sparc_costs = &niagara4_costs;
1585 break;
1586 case PROCESSOR_NIAGARA7:
1587 sparc_costs = &niagara7_costs;
1588 break;
1589 case PROCESSOR_NATIVE:
1590 gcc_unreachable ();
1591 };
1592
1593 if (sparc_memory_model == SMM_DEFAULT)
1594 {
1595 /* Choose the memory model for the operating system. */
1596 enum sparc_memory_model_type os_default = SUBTARGET_DEFAULT_MEMORY_MODEL;
1597 if (os_default != SMM_DEFAULT)
1598 sparc_memory_model = os_default;
1599 /* Choose the most relaxed model for the processor. */
1600 else if (TARGET_V9)
1601 sparc_memory_model = SMM_RMO;
1602 else if (TARGET_LEON3)
1603 sparc_memory_model = SMM_TSO;
1604 else if (TARGET_LEON)
1605 sparc_memory_model = SMM_SC;
1606 else if (TARGET_V8)
1607 sparc_memory_model = SMM_PSO;
1608 else
1609 sparc_memory_model = SMM_SC;
1610 }
1611
1612 #ifdef TARGET_DEFAULT_LONG_DOUBLE_128
1613 if (!(target_flags_explicit & MASK_LONG_DOUBLE_128))
1614 target_flags |= MASK_LONG_DOUBLE_128;
1615 #endif
1616
1617 if (TARGET_DEBUG_OPTIONS)
1618 dump_target_flags ("Final target_flags", target_flags);
1619
1620 /* PARAM_SIMULTANEOUS_PREFETCHES is the number of prefetches that
1621 can run at the same time. More important, it is the threshold
1622 defining when additional prefetches will be dropped by the
1623 hardware.
1624
1625 The UltraSPARC-III features a documented prefetch queue with a
1626 size of 8. Additional prefetches issued in the cpu are
1627 dropped.
1628
1629 Niagara processors are different. In these processors prefetches
1630 are handled much like regular loads. The L1 miss buffer is 32
1631 entries, but prefetches start getting affected when 30 entries
1632 become occupied. That occupation could be a mix of regular loads
1633 and prefetches though. And that buffer is shared by all threads.
1634 Once the threshold is reached, if the core is running a single
1635 thread the prefetch will retry. If more than one thread is
1636 running, the prefetch will be dropped.
1637
1638 All this makes it very difficult to determine how many
1639 simultaneous prefetches can be issued simultaneously, even in a
1640 single-threaded program. Experimental results show that setting
1641 this parameter to 32 works well when the number of threads is not
1642 high. */
1643 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
1644 ((sparc_cpu == PROCESSOR_ULTRASPARC
1645 || sparc_cpu == PROCESSOR_NIAGARA
1646 || sparc_cpu == PROCESSOR_NIAGARA2
1647 || sparc_cpu == PROCESSOR_NIAGARA3
1648 || sparc_cpu == PROCESSOR_NIAGARA4)
1649 ? 2
1650 : (sparc_cpu == PROCESSOR_ULTRASPARC3
1651 ? 8 : (sparc_cpu == PROCESSOR_NIAGARA7
1652 ? 32 : 3))),
1653 global_options.x_param_values,
1654 global_options_set.x_param_values);
1655
1656 /* For PARAM_L1_CACHE_LINE_SIZE we use the default 32 bytes (see
1657 params.def), so no maybe_set_param_value is needed.
1658
1659 The Oracle SPARC Architecture (previously the UltraSPARC
1660 Architecture) specification states that when a PREFETCH[A]
1661 instruction is executed an implementation-specific amount of data
1662 is prefetched, and that it is at least 64 bytes long (aligned to
1663 at least 64 bytes).
1664
1665 However, this is not correct. The M7 (and implementations prior
1666 to that) does not guarantee a 64B prefetch into a cache if the
1667 line size is smaller. A single cache line is all that is ever
1668 prefetched. So for the M7, where the L1D$ has 32B lines and the
1669 L2D$ and L3 have 64B lines, a prefetch will prefetch 64B into the
1670 L2 and L3, but only 32B are brought into the L1D$. (Assuming it
1671 is a read_n prefetch, which is the only type which allocates to
1672 the L1.) */
1673
1674 /* PARAM_L1_CACHE_SIZE is the size of the L1D$ (most SPARC chips use
1675 Hardvard level-1 caches) in kilobytes. Both UltraSPARC and
1676 Niagara processors feature a L1D$ of 16KB. */
1677 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
1678 ((sparc_cpu == PROCESSOR_ULTRASPARC
1679 || sparc_cpu == PROCESSOR_ULTRASPARC3
1680 || sparc_cpu == PROCESSOR_NIAGARA
1681 || sparc_cpu == PROCESSOR_NIAGARA2
1682 || sparc_cpu == PROCESSOR_NIAGARA3
1683 || sparc_cpu == PROCESSOR_NIAGARA4
1684 || sparc_cpu == PROCESSOR_NIAGARA7)
1685 ? 16 : 64),
1686 global_options.x_param_values,
1687 global_options_set.x_param_values);
1688
1689
1690 /* PARAM_L2_CACHE_SIZE is the size fo the L2 in kilobytes. Note
1691 that 512 is the default in params.def. */
1692 maybe_set_param_value (PARAM_L2_CACHE_SIZE,
1693 (sparc_cpu == PROCESSOR_NIAGARA4
1694 ? 128 : (sparc_cpu == PROCESSOR_NIAGARA7
1695 ? 256 : 512)),
1696 global_options.x_param_values,
1697 global_options_set.x_param_values);
1698
1699
1700 /* Disable save slot sharing for call-clobbered registers by default.
1701 The IRA sharing algorithm works on single registers only and this
1702 pessimizes for double floating-point registers. */
1703 if (!global_options_set.x_flag_ira_share_save_slots)
1704 flag_ira_share_save_slots = 0;
1705
1706 /* We register a machine specific pass to work around errata, if any.
1707 The pass mut be scheduled as late as possible so that we have the
1708 (essentially) final form of the insn stream to work on.
1709 Registering the pass must be done at start up. It's convenient to
1710 do it here. */
1711 opt_pass *errata_pass = make_pass_work_around_errata (g);
1712 struct register_pass_info insert_pass_work_around_errata =
1713 {
1714 errata_pass, /* pass */
1715 "dbr", /* reference_pass_name */
1716 1, /* ref_pass_instance_number */
1717 PASS_POS_INSERT_AFTER /* po_op */
1718 };
1719 register_pass (&insert_pass_work_around_errata);
1720 }
1721
1722 /* Miscellaneous utilities. */
1723
1724 /* Nonzero if CODE, a comparison, is suitable for use in v9 conditional move
1725 or branch on register contents instructions. */
1726
1727 int
v9_regcmp_p(enum rtx_code code)1728 v9_regcmp_p (enum rtx_code code)
1729 {
1730 return (code == EQ || code == NE || code == GE || code == LT
1731 || code == LE || code == GT);
1732 }
1733
1734 /* Nonzero if OP is a floating point constant which can
1735 be loaded into an integer register using a single
1736 sethi instruction. */
1737
1738 int
fp_sethi_p(rtx op)1739 fp_sethi_p (rtx op)
1740 {
1741 if (GET_CODE (op) == CONST_DOUBLE)
1742 {
1743 long i;
1744
1745 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), i);
1746 return !SPARC_SIMM13_P (i) && SPARC_SETHI_P (i);
1747 }
1748
1749 return 0;
1750 }
1751
1752 /* Nonzero if OP is a floating point constant which can
1753 be loaded into an integer register using a single
1754 mov instruction. */
1755
1756 int
fp_mov_p(rtx op)1757 fp_mov_p (rtx op)
1758 {
1759 if (GET_CODE (op) == CONST_DOUBLE)
1760 {
1761 long i;
1762
1763 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), i);
1764 return SPARC_SIMM13_P (i);
1765 }
1766
1767 return 0;
1768 }
1769
1770 /* Nonzero if OP is a floating point constant which can
1771 be loaded into an integer register using a high/losum
1772 instruction sequence. */
1773
1774 int
fp_high_losum_p(rtx op)1775 fp_high_losum_p (rtx op)
1776 {
1777 /* The constraints calling this should only be in
1778 SFmode move insns, so any constant which cannot
1779 be moved using a single insn will do. */
1780 if (GET_CODE (op) == CONST_DOUBLE)
1781 {
1782 long i;
1783
1784 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), i);
1785 return !SPARC_SIMM13_P (i) && !SPARC_SETHI_P (i);
1786 }
1787
1788 return 0;
1789 }
1790
1791 /* Return true if the address of LABEL can be loaded by means of the
1792 mov{si,di}_pic_label_ref patterns in PIC mode. */
1793
1794 static bool
can_use_mov_pic_label_ref(rtx label)1795 can_use_mov_pic_label_ref (rtx label)
1796 {
1797 /* VxWorks does not impose a fixed gap between segments; the run-time
1798 gap can be different from the object-file gap. We therefore can't
1799 assume X - _GLOBAL_OFFSET_TABLE_ is a link-time constant unless we
1800 are absolutely sure that X is in the same segment as the GOT.
1801 Unfortunately, the flexibility of linker scripts means that we
1802 can't be sure of that in general, so assume that GOT-relative
1803 accesses are never valid on VxWorks. */
1804 if (TARGET_VXWORKS_RTP)
1805 return false;
1806
1807 /* Similarly, if the label is non-local, it might end up being placed
1808 in a different section than the current one; now mov_pic_label_ref
1809 requires the label and the code to be in the same section. */
1810 if (LABEL_REF_NONLOCAL_P (label))
1811 return false;
1812
1813 /* Finally, if we are reordering basic blocks and partition into hot
1814 and cold sections, this might happen for any label. */
1815 if (flag_reorder_blocks_and_partition)
1816 return false;
1817
1818 return true;
1819 }
1820
1821 /* Expand a move instruction. Return true if all work is done. */
1822
1823 bool
sparc_expand_move(machine_mode mode,rtx * operands)1824 sparc_expand_move (machine_mode mode, rtx *operands)
1825 {
1826 /* Handle sets of MEM first. */
1827 if (GET_CODE (operands[0]) == MEM)
1828 {
1829 /* 0 is a register (or a pair of registers) on SPARC. */
1830 if (register_or_zero_operand (operands[1], mode))
1831 return false;
1832
1833 if (!reload_in_progress)
1834 {
1835 operands[0] = validize_mem (operands[0]);
1836 operands[1] = force_reg (mode, operands[1]);
1837 }
1838 }
1839
1840 /* Fix up TLS cases. */
1841 if (TARGET_HAVE_TLS
1842 && CONSTANT_P (operands[1])
1843 && sparc_tls_referenced_p (operands [1]))
1844 {
1845 operands[1] = sparc_legitimize_tls_address (operands[1]);
1846 return false;
1847 }
1848
1849 /* Fix up PIC cases. */
1850 if (flag_pic && CONSTANT_P (operands[1]))
1851 {
1852 if (pic_address_needs_scratch (operands[1]))
1853 operands[1] = sparc_legitimize_pic_address (operands[1], NULL_RTX);
1854
1855 /* We cannot use the mov{si,di}_pic_label_ref patterns in all cases. */
1856 if ((GET_CODE (operands[1]) == LABEL_REF
1857 && can_use_mov_pic_label_ref (operands[1]))
1858 || (GET_CODE (operands[1]) == CONST
1859 && GET_CODE (XEXP (operands[1], 0)) == PLUS
1860 && GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == LABEL_REF
1861 && GET_CODE (XEXP (XEXP (operands[1], 0), 1)) == CONST_INT
1862 && can_use_mov_pic_label_ref (XEXP (XEXP (operands[1], 0), 0))))
1863 {
1864 if (mode == SImode)
1865 {
1866 emit_insn (gen_movsi_pic_label_ref (operands[0], operands[1]));
1867 return true;
1868 }
1869
1870 if (mode == DImode)
1871 {
1872 emit_insn (gen_movdi_pic_label_ref (operands[0], operands[1]));
1873 return true;
1874 }
1875 }
1876
1877 if (symbolic_operand (operands[1], mode))
1878 {
1879 operands[1]
1880 = sparc_legitimize_pic_address (operands[1],
1881 reload_in_progress
1882 ? operands[0] : NULL_RTX);
1883 return false;
1884 }
1885 }
1886
1887 /* If we are trying to toss an integer constant into FP registers,
1888 or loading a FP or vector constant, force it into memory. */
1889 if (CONSTANT_P (operands[1])
1890 && REG_P (operands[0])
1891 && (SPARC_FP_REG_P (REGNO (operands[0]))
1892 || SCALAR_FLOAT_MODE_P (mode)
1893 || VECTOR_MODE_P (mode)))
1894 {
1895 /* emit_group_store will send such bogosity to us when it is
1896 not storing directly into memory. So fix this up to avoid
1897 crashes in output_constant_pool. */
1898 if (operands [1] == const0_rtx)
1899 operands[1] = CONST0_RTX (mode);
1900
1901 /* We can clear or set to all-ones FP registers if TARGET_VIS, and
1902 always other regs. */
1903 if ((TARGET_VIS || REGNO (operands[0]) < SPARC_FIRST_FP_REG)
1904 && (const_zero_operand (operands[1], mode)
1905 || const_all_ones_operand (operands[1], mode)))
1906 return false;
1907
1908 if (REGNO (operands[0]) < SPARC_FIRST_FP_REG
1909 /* We are able to build any SF constant in integer registers
1910 with at most 2 instructions. */
1911 && (mode == SFmode
1912 /* And any DF constant in integer registers. */
1913 || (mode == DFmode
1914 && ! can_create_pseudo_p ())))
1915 return false;
1916
1917 operands[1] = force_const_mem (mode, operands[1]);
1918 if (!reload_in_progress)
1919 operands[1] = validize_mem (operands[1]);
1920 return false;
1921 }
1922
1923 /* Accept non-constants and valid constants unmodified. */
1924 if (!CONSTANT_P (operands[1])
1925 || GET_CODE (operands[1]) == HIGH
1926 || input_operand (operands[1], mode))
1927 return false;
1928
1929 switch (mode)
1930 {
1931 case QImode:
1932 /* All QImode constants require only one insn, so proceed. */
1933 break;
1934
1935 case HImode:
1936 case SImode:
1937 sparc_emit_set_const32 (operands[0], operands[1]);
1938 return true;
1939
1940 case DImode:
1941 /* input_operand should have filtered out 32-bit mode. */
1942 sparc_emit_set_const64 (operands[0], operands[1]);
1943 return true;
1944
1945 case TImode:
1946 {
1947 rtx high, low;
1948 /* TImode isn't available in 32-bit mode. */
1949 split_double (operands[1], &high, &low);
1950 emit_insn (gen_movdi (operand_subword (operands[0], 0, 0, TImode),
1951 high));
1952 emit_insn (gen_movdi (operand_subword (operands[0], 1, 0, TImode),
1953 low));
1954 }
1955 return true;
1956
1957 default:
1958 gcc_unreachable ();
1959 }
1960
1961 return false;
1962 }
1963
1964 /* Load OP1, a 32-bit constant, into OP0, a register.
1965 We know it can't be done in one insn when we get
1966 here, the move expander guarantees this. */
1967
1968 static void
sparc_emit_set_const32(rtx op0,rtx op1)1969 sparc_emit_set_const32 (rtx op0, rtx op1)
1970 {
1971 machine_mode mode = GET_MODE (op0);
1972 rtx temp = op0;
1973
1974 if (can_create_pseudo_p ())
1975 temp = gen_reg_rtx (mode);
1976
1977 if (GET_CODE (op1) == CONST_INT)
1978 {
1979 gcc_assert (!small_int_operand (op1, mode)
1980 && !const_high_operand (op1, mode));
1981
1982 /* Emit them as real moves instead of a HIGH/LO_SUM,
1983 this way CSE can see everything and reuse intermediate
1984 values if it wants. */
1985 emit_insn (gen_rtx_SET (temp, GEN_INT (INTVAL (op1)
1986 & ~(HOST_WIDE_INT) 0x3ff)));
1987
1988 emit_insn (gen_rtx_SET (op0,
1989 gen_rtx_IOR (mode, temp,
1990 GEN_INT (INTVAL (op1) & 0x3ff))));
1991 }
1992 else
1993 {
1994 /* A symbol, emit in the traditional way. */
1995 emit_insn (gen_rtx_SET (temp, gen_rtx_HIGH (mode, op1)));
1996 emit_insn (gen_rtx_SET (op0, gen_rtx_LO_SUM (mode, temp, op1)));
1997 }
1998 }
1999
2000 /* Load OP1, a symbolic 64-bit constant, into OP0, a DImode register.
2001 If TEMP is nonzero, we are forbidden to use any other scratch
2002 registers. Otherwise, we are allowed to generate them as needed.
2003
2004 Note that TEMP may have TImode if the code model is TARGET_CM_MEDANY
2005 or TARGET_CM_EMBMEDANY (see the reload_indi and reload_outdi patterns). */
2006
2007 void
sparc_emit_set_symbolic_const64(rtx op0,rtx op1,rtx temp)2008 sparc_emit_set_symbolic_const64 (rtx op0, rtx op1, rtx temp)
2009 {
2010 rtx temp1, temp2, temp3, temp4, temp5;
2011 rtx ti_temp = 0;
2012
2013 if (temp && GET_MODE (temp) == TImode)
2014 {
2015 ti_temp = temp;
2016 temp = gen_rtx_REG (DImode, REGNO (temp));
2017 }
2018
2019 /* SPARC-V9 code-model support. */
2020 switch (sparc_cmodel)
2021 {
2022 case CM_MEDLOW:
2023 /* The range spanned by all instructions in the object is less
2024 than 2^31 bytes (2GB) and the distance from any instruction
2025 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
2026 than 2^31 bytes (2GB).
2027
2028 The executable must be in the low 4TB of the virtual address
2029 space.
2030
2031 sethi %hi(symbol), %temp1
2032 or %temp1, %lo(symbol), %reg */
2033 if (temp)
2034 temp1 = temp; /* op0 is allowed. */
2035 else
2036 temp1 = gen_reg_rtx (DImode);
2037
2038 emit_insn (gen_rtx_SET (temp1, gen_rtx_HIGH (DImode, op1)));
2039 emit_insn (gen_rtx_SET (op0, gen_rtx_LO_SUM (DImode, temp1, op1)));
2040 break;
2041
2042 case CM_MEDMID:
2043 /* The range spanned by all instructions in the object is less
2044 than 2^31 bytes (2GB) and the distance from any instruction
2045 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
2046 than 2^31 bytes (2GB).
2047
2048 The executable must be in the low 16TB of the virtual address
2049 space.
2050
2051 sethi %h44(symbol), %temp1
2052 or %temp1, %m44(symbol), %temp2
2053 sllx %temp2, 12, %temp3
2054 or %temp3, %l44(symbol), %reg */
2055 if (temp)
2056 {
2057 temp1 = op0;
2058 temp2 = op0;
2059 temp3 = temp; /* op0 is allowed. */
2060 }
2061 else
2062 {
2063 temp1 = gen_reg_rtx (DImode);
2064 temp2 = gen_reg_rtx (DImode);
2065 temp3 = gen_reg_rtx (DImode);
2066 }
2067
2068 emit_insn (gen_seth44 (temp1, op1));
2069 emit_insn (gen_setm44 (temp2, temp1, op1));
2070 emit_insn (gen_rtx_SET (temp3,
2071 gen_rtx_ASHIFT (DImode, temp2, GEN_INT (12))));
2072 emit_insn (gen_setl44 (op0, temp3, op1));
2073 break;
2074
2075 case CM_MEDANY:
2076 /* The range spanned by all instructions in the object is less
2077 than 2^31 bytes (2GB) and the distance from any instruction
2078 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
2079 than 2^31 bytes (2GB).
2080
2081 The executable can be placed anywhere in the virtual address
2082 space.
2083
2084 sethi %hh(symbol), %temp1
2085 sethi %lm(symbol), %temp2
2086 or %temp1, %hm(symbol), %temp3
2087 sllx %temp3, 32, %temp4
2088 or %temp4, %temp2, %temp5
2089 or %temp5, %lo(symbol), %reg */
2090 if (temp)
2091 {
2092 /* It is possible that one of the registers we got for operands[2]
2093 might coincide with that of operands[0] (which is why we made
2094 it TImode). Pick the other one to use as our scratch. */
2095 if (rtx_equal_p (temp, op0))
2096 {
2097 gcc_assert (ti_temp);
2098 temp = gen_rtx_REG (DImode, REGNO (temp) + 1);
2099 }
2100 temp1 = op0;
2101 temp2 = temp; /* op0 is _not_ allowed, see above. */
2102 temp3 = op0;
2103 temp4 = op0;
2104 temp5 = op0;
2105 }
2106 else
2107 {
2108 temp1 = gen_reg_rtx (DImode);
2109 temp2 = gen_reg_rtx (DImode);
2110 temp3 = gen_reg_rtx (DImode);
2111 temp4 = gen_reg_rtx (DImode);
2112 temp5 = gen_reg_rtx (DImode);
2113 }
2114
2115 emit_insn (gen_sethh (temp1, op1));
2116 emit_insn (gen_setlm (temp2, op1));
2117 emit_insn (gen_sethm (temp3, temp1, op1));
2118 emit_insn (gen_rtx_SET (temp4,
2119 gen_rtx_ASHIFT (DImode, temp3, GEN_INT (32))));
2120 emit_insn (gen_rtx_SET (temp5, gen_rtx_PLUS (DImode, temp4, temp2)));
2121 emit_insn (gen_setlo (op0, temp5, op1));
2122 break;
2123
2124 case CM_EMBMEDANY:
2125 /* Old old old backwards compatibility kruft here.
2126 Essentially it is MEDLOW with a fixed 64-bit
2127 virtual base added to all data segment addresses.
2128 Text-segment stuff is computed like MEDANY, we can't
2129 reuse the code above because the relocation knobs
2130 look different.
2131
2132 Data segment: sethi %hi(symbol), %temp1
2133 add %temp1, EMBMEDANY_BASE_REG, %temp2
2134 or %temp2, %lo(symbol), %reg */
2135 if (data_segment_operand (op1, GET_MODE (op1)))
2136 {
2137 if (temp)
2138 {
2139 temp1 = temp; /* op0 is allowed. */
2140 temp2 = op0;
2141 }
2142 else
2143 {
2144 temp1 = gen_reg_rtx (DImode);
2145 temp2 = gen_reg_rtx (DImode);
2146 }
2147
2148 emit_insn (gen_embmedany_sethi (temp1, op1));
2149 emit_insn (gen_embmedany_brsum (temp2, temp1));
2150 emit_insn (gen_embmedany_losum (op0, temp2, op1));
2151 }
2152
2153 /* Text segment: sethi %uhi(symbol), %temp1
2154 sethi %hi(symbol), %temp2
2155 or %temp1, %ulo(symbol), %temp3
2156 sllx %temp3, 32, %temp4
2157 or %temp4, %temp2, %temp5
2158 or %temp5, %lo(symbol), %reg */
2159 else
2160 {
2161 if (temp)
2162 {
2163 /* It is possible that one of the registers we got for operands[2]
2164 might coincide with that of operands[0] (which is why we made
2165 it TImode). Pick the other one to use as our scratch. */
2166 if (rtx_equal_p (temp, op0))
2167 {
2168 gcc_assert (ti_temp);
2169 temp = gen_rtx_REG (DImode, REGNO (temp) + 1);
2170 }
2171 temp1 = op0;
2172 temp2 = temp; /* op0 is _not_ allowed, see above. */
2173 temp3 = op0;
2174 temp4 = op0;
2175 temp5 = op0;
2176 }
2177 else
2178 {
2179 temp1 = gen_reg_rtx (DImode);
2180 temp2 = gen_reg_rtx (DImode);
2181 temp3 = gen_reg_rtx (DImode);
2182 temp4 = gen_reg_rtx (DImode);
2183 temp5 = gen_reg_rtx (DImode);
2184 }
2185
2186 emit_insn (gen_embmedany_textuhi (temp1, op1));
2187 emit_insn (gen_embmedany_texthi (temp2, op1));
2188 emit_insn (gen_embmedany_textulo (temp3, temp1, op1));
2189 emit_insn (gen_rtx_SET (temp4,
2190 gen_rtx_ASHIFT (DImode, temp3, GEN_INT (32))));
2191 emit_insn (gen_rtx_SET (temp5, gen_rtx_PLUS (DImode, temp4, temp2)));
2192 emit_insn (gen_embmedany_textlo (op0, temp5, op1));
2193 }
2194 break;
2195
2196 default:
2197 gcc_unreachable ();
2198 }
2199 }
2200
2201 /* These avoid problems when cross compiling. If we do not
2202 go through all this hair then the optimizer will see
2203 invalid REG_EQUAL notes or in some cases none at all. */
2204 static rtx gen_safe_HIGH64 (rtx, HOST_WIDE_INT);
2205 static rtx gen_safe_SET64 (rtx, HOST_WIDE_INT);
2206 static rtx gen_safe_OR64 (rtx, HOST_WIDE_INT);
2207 static rtx gen_safe_XOR64 (rtx, HOST_WIDE_INT);
2208
2209 /* The optimizer is not to assume anything about exactly
2210 which bits are set for a HIGH, they are unspecified.
2211 Unfortunately this leads to many missed optimizations
2212 during CSE. We mask out the non-HIGH bits, and matches
2213 a plain movdi, to alleviate this problem. */
2214 static rtx
gen_safe_HIGH64(rtx dest,HOST_WIDE_INT val)2215 gen_safe_HIGH64 (rtx dest, HOST_WIDE_INT val)
2216 {
2217 return gen_rtx_SET (dest, GEN_INT (val & ~(HOST_WIDE_INT)0x3ff));
2218 }
2219
2220 static rtx
gen_safe_SET64(rtx dest,HOST_WIDE_INT val)2221 gen_safe_SET64 (rtx dest, HOST_WIDE_INT val)
2222 {
2223 return gen_rtx_SET (dest, GEN_INT (val));
2224 }
2225
2226 static rtx
gen_safe_OR64(rtx src,HOST_WIDE_INT val)2227 gen_safe_OR64 (rtx src, HOST_WIDE_INT val)
2228 {
2229 return gen_rtx_IOR (DImode, src, GEN_INT (val));
2230 }
2231
2232 static rtx
gen_safe_XOR64(rtx src,HOST_WIDE_INT val)2233 gen_safe_XOR64 (rtx src, HOST_WIDE_INT val)
2234 {
2235 return gen_rtx_XOR (DImode, src, GEN_INT (val));
2236 }
2237
2238 /* Worker routines for 64-bit constant formation on arch64.
2239 One of the key things to be doing in these emissions is
2240 to create as many temp REGs as possible. This makes it
2241 possible for half-built constants to be used later when
2242 such values are similar to something required later on.
2243 Without doing this, the optimizer cannot see such
2244 opportunities. */
2245
2246 static void sparc_emit_set_const64_quick1 (rtx, rtx,
2247 unsigned HOST_WIDE_INT, int);
2248
2249 static void
sparc_emit_set_const64_quick1(rtx op0,rtx temp,unsigned HOST_WIDE_INT low_bits,int is_neg)2250 sparc_emit_set_const64_quick1 (rtx op0, rtx temp,
2251 unsigned HOST_WIDE_INT low_bits, int is_neg)
2252 {
2253 unsigned HOST_WIDE_INT high_bits;
2254
2255 if (is_neg)
2256 high_bits = (~low_bits) & 0xffffffff;
2257 else
2258 high_bits = low_bits;
2259
2260 emit_insn (gen_safe_HIGH64 (temp, high_bits));
2261 if (!is_neg)
2262 {
2263 emit_insn (gen_rtx_SET (op0, gen_safe_OR64 (temp, (high_bits & 0x3ff))));
2264 }
2265 else
2266 {
2267 /* If we are XOR'ing with -1, then we should emit a one's complement
2268 instead. This way the combiner will notice logical operations
2269 such as ANDN later on and substitute. */
2270 if ((low_bits & 0x3ff) == 0x3ff)
2271 {
2272 emit_insn (gen_rtx_SET (op0, gen_rtx_NOT (DImode, temp)));
2273 }
2274 else
2275 {
2276 emit_insn (gen_rtx_SET (op0,
2277 gen_safe_XOR64 (temp,
2278 (-(HOST_WIDE_INT)0x400
2279 | (low_bits & 0x3ff)))));
2280 }
2281 }
2282 }
2283
2284 static void sparc_emit_set_const64_quick2 (rtx, rtx, unsigned HOST_WIDE_INT,
2285 unsigned HOST_WIDE_INT, int);
2286
2287 static void
sparc_emit_set_const64_quick2(rtx op0,rtx temp,unsigned HOST_WIDE_INT high_bits,unsigned HOST_WIDE_INT low_immediate,int shift_count)2288 sparc_emit_set_const64_quick2 (rtx op0, rtx temp,
2289 unsigned HOST_WIDE_INT high_bits,
2290 unsigned HOST_WIDE_INT low_immediate,
2291 int shift_count)
2292 {
2293 rtx temp2 = op0;
2294
2295 if ((high_bits & 0xfffffc00) != 0)
2296 {
2297 emit_insn (gen_safe_HIGH64 (temp, high_bits));
2298 if ((high_bits & ~0xfffffc00) != 0)
2299 emit_insn (gen_rtx_SET (op0,
2300 gen_safe_OR64 (temp, (high_bits & 0x3ff))));
2301 else
2302 temp2 = temp;
2303 }
2304 else
2305 {
2306 emit_insn (gen_safe_SET64 (temp, high_bits));
2307 temp2 = temp;
2308 }
2309
2310 /* Now shift it up into place. */
2311 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, temp2,
2312 GEN_INT (shift_count))));
2313
2314 /* If there is a low immediate part piece, finish up by
2315 putting that in as well. */
2316 if (low_immediate != 0)
2317 emit_insn (gen_rtx_SET (op0, gen_safe_OR64 (op0, low_immediate)));
2318 }
2319
2320 static void sparc_emit_set_const64_longway (rtx, rtx, unsigned HOST_WIDE_INT,
2321 unsigned HOST_WIDE_INT);
2322
2323 /* Full 64-bit constant decomposition. Even though this is the
2324 'worst' case, we still optimize a few things away. */
2325 static void
sparc_emit_set_const64_longway(rtx op0,rtx temp,unsigned HOST_WIDE_INT high_bits,unsigned HOST_WIDE_INT low_bits)2326 sparc_emit_set_const64_longway (rtx op0, rtx temp,
2327 unsigned HOST_WIDE_INT high_bits,
2328 unsigned HOST_WIDE_INT low_bits)
2329 {
2330 rtx sub_temp = op0;
2331
2332 if (can_create_pseudo_p ())
2333 sub_temp = gen_reg_rtx (DImode);
2334
2335 if ((high_bits & 0xfffffc00) != 0)
2336 {
2337 emit_insn (gen_safe_HIGH64 (temp, high_bits));
2338 if ((high_bits & ~0xfffffc00) != 0)
2339 emit_insn (gen_rtx_SET (sub_temp,
2340 gen_safe_OR64 (temp, (high_bits & 0x3ff))));
2341 else
2342 sub_temp = temp;
2343 }
2344 else
2345 {
2346 emit_insn (gen_safe_SET64 (temp, high_bits));
2347 sub_temp = temp;
2348 }
2349
2350 if (can_create_pseudo_p ())
2351 {
2352 rtx temp2 = gen_reg_rtx (DImode);
2353 rtx temp3 = gen_reg_rtx (DImode);
2354 rtx temp4 = gen_reg_rtx (DImode);
2355
2356 emit_insn (gen_rtx_SET (temp4, gen_rtx_ASHIFT (DImode, sub_temp,
2357 GEN_INT (32))));
2358
2359 emit_insn (gen_safe_HIGH64 (temp2, low_bits));
2360 if ((low_bits & ~0xfffffc00) != 0)
2361 {
2362 emit_insn (gen_rtx_SET (temp3,
2363 gen_safe_OR64 (temp2, (low_bits & 0x3ff))));
2364 emit_insn (gen_rtx_SET (op0, gen_rtx_PLUS (DImode, temp4, temp3)));
2365 }
2366 else
2367 {
2368 emit_insn (gen_rtx_SET (op0, gen_rtx_PLUS (DImode, temp4, temp2)));
2369 }
2370 }
2371 else
2372 {
2373 rtx low1 = GEN_INT ((low_bits >> (32 - 12)) & 0xfff);
2374 rtx low2 = GEN_INT ((low_bits >> (32 - 12 - 12)) & 0xfff);
2375 rtx low3 = GEN_INT ((low_bits >> (32 - 12 - 12 - 8)) & 0x0ff);
2376 int to_shift = 12;
2377
2378 /* We are in the middle of reload, so this is really
2379 painful. However we do still make an attempt to
2380 avoid emitting truly stupid code. */
2381 if (low1 != const0_rtx)
2382 {
2383 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, sub_temp,
2384 GEN_INT (to_shift))));
2385 emit_insn (gen_rtx_SET (op0, gen_rtx_IOR (DImode, op0, low1)));
2386 sub_temp = op0;
2387 to_shift = 12;
2388 }
2389 else
2390 {
2391 to_shift += 12;
2392 }
2393 if (low2 != const0_rtx)
2394 {
2395 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, sub_temp,
2396 GEN_INT (to_shift))));
2397 emit_insn (gen_rtx_SET (op0, gen_rtx_IOR (DImode, op0, low2)));
2398 sub_temp = op0;
2399 to_shift = 8;
2400 }
2401 else
2402 {
2403 to_shift += 8;
2404 }
2405 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, sub_temp,
2406 GEN_INT (to_shift))));
2407 if (low3 != const0_rtx)
2408 emit_insn (gen_rtx_SET (op0, gen_rtx_IOR (DImode, op0, low3)));
2409 /* phew... */
2410 }
2411 }
2412
2413 /* Analyze a 64-bit constant for certain properties. */
2414 static void analyze_64bit_constant (unsigned HOST_WIDE_INT,
2415 unsigned HOST_WIDE_INT,
2416 int *, int *, int *);
2417
2418 static void
analyze_64bit_constant(unsigned HOST_WIDE_INT high_bits,unsigned HOST_WIDE_INT low_bits,int * hbsp,int * lbsp,int * abbasp)2419 analyze_64bit_constant (unsigned HOST_WIDE_INT high_bits,
2420 unsigned HOST_WIDE_INT low_bits,
2421 int *hbsp, int *lbsp, int *abbasp)
2422 {
2423 int lowest_bit_set, highest_bit_set, all_bits_between_are_set;
2424 int i;
2425
2426 lowest_bit_set = highest_bit_set = -1;
2427 i = 0;
2428 do
2429 {
2430 if ((lowest_bit_set == -1)
2431 && ((low_bits >> i) & 1))
2432 lowest_bit_set = i;
2433 if ((highest_bit_set == -1)
2434 && ((high_bits >> (32 - i - 1)) & 1))
2435 highest_bit_set = (64 - i - 1);
2436 }
2437 while (++i < 32
2438 && ((highest_bit_set == -1)
2439 || (lowest_bit_set == -1)));
2440 if (i == 32)
2441 {
2442 i = 0;
2443 do
2444 {
2445 if ((lowest_bit_set == -1)
2446 && ((high_bits >> i) & 1))
2447 lowest_bit_set = i + 32;
2448 if ((highest_bit_set == -1)
2449 && ((low_bits >> (32 - i - 1)) & 1))
2450 highest_bit_set = 32 - i - 1;
2451 }
2452 while (++i < 32
2453 && ((highest_bit_set == -1)
2454 || (lowest_bit_set == -1)));
2455 }
2456 /* If there are no bits set this should have gone out
2457 as one instruction! */
2458 gcc_assert (lowest_bit_set != -1 && highest_bit_set != -1);
2459 all_bits_between_are_set = 1;
2460 for (i = lowest_bit_set; i <= highest_bit_set; i++)
2461 {
2462 if (i < 32)
2463 {
2464 if ((low_bits & (1 << i)) != 0)
2465 continue;
2466 }
2467 else
2468 {
2469 if ((high_bits & (1 << (i - 32))) != 0)
2470 continue;
2471 }
2472 all_bits_between_are_set = 0;
2473 break;
2474 }
2475 *hbsp = highest_bit_set;
2476 *lbsp = lowest_bit_set;
2477 *abbasp = all_bits_between_are_set;
2478 }
2479
2480 static int const64_is_2insns (unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT);
2481
2482 static int
const64_is_2insns(unsigned HOST_WIDE_INT high_bits,unsigned HOST_WIDE_INT low_bits)2483 const64_is_2insns (unsigned HOST_WIDE_INT high_bits,
2484 unsigned HOST_WIDE_INT low_bits)
2485 {
2486 int highest_bit_set, lowest_bit_set, all_bits_between_are_set;
2487
2488 if (high_bits == 0
2489 || high_bits == 0xffffffff)
2490 return 1;
2491
2492 analyze_64bit_constant (high_bits, low_bits,
2493 &highest_bit_set, &lowest_bit_set,
2494 &all_bits_between_are_set);
2495
2496 if ((highest_bit_set == 63
2497 || lowest_bit_set == 0)
2498 && all_bits_between_are_set != 0)
2499 return 1;
2500
2501 if ((highest_bit_set - lowest_bit_set) < 21)
2502 return 1;
2503
2504 return 0;
2505 }
2506
2507 static unsigned HOST_WIDE_INT create_simple_focus_bits (unsigned HOST_WIDE_INT,
2508 unsigned HOST_WIDE_INT,
2509 int, int);
2510
2511 static unsigned HOST_WIDE_INT
create_simple_focus_bits(unsigned HOST_WIDE_INT high_bits,unsigned HOST_WIDE_INT low_bits,int lowest_bit_set,int shift)2512 create_simple_focus_bits (unsigned HOST_WIDE_INT high_bits,
2513 unsigned HOST_WIDE_INT low_bits,
2514 int lowest_bit_set, int shift)
2515 {
2516 HOST_WIDE_INT hi, lo;
2517
2518 if (lowest_bit_set < 32)
2519 {
2520 lo = (low_bits >> lowest_bit_set) << shift;
2521 hi = ((high_bits << (32 - lowest_bit_set)) << shift);
2522 }
2523 else
2524 {
2525 lo = 0;
2526 hi = ((high_bits >> (lowest_bit_set - 32)) << shift);
2527 }
2528 gcc_assert (! (hi & lo));
2529 return (hi | lo);
2530 }
2531
2532 /* Here we are sure to be arch64 and this is an integer constant
2533 being loaded into a register. Emit the most efficient
2534 insn sequence possible. Detection of all the 1-insn cases
2535 has been done already. */
2536 static void
sparc_emit_set_const64(rtx op0,rtx op1)2537 sparc_emit_set_const64 (rtx op0, rtx op1)
2538 {
2539 unsigned HOST_WIDE_INT high_bits, low_bits;
2540 int lowest_bit_set, highest_bit_set;
2541 int all_bits_between_are_set;
2542 rtx temp = 0;
2543
2544 /* Sanity check that we know what we are working with. */
2545 gcc_assert (TARGET_ARCH64
2546 && (GET_CODE (op0) == SUBREG
2547 || (REG_P (op0) && ! SPARC_FP_REG_P (REGNO (op0)))));
2548
2549 if (! can_create_pseudo_p ())
2550 temp = op0;
2551
2552 if (GET_CODE (op1) != CONST_INT)
2553 {
2554 sparc_emit_set_symbolic_const64 (op0, op1, temp);
2555 return;
2556 }
2557
2558 if (! temp)
2559 temp = gen_reg_rtx (DImode);
2560
2561 high_bits = ((INTVAL (op1) >> 32) & 0xffffffff);
2562 low_bits = (INTVAL (op1) & 0xffffffff);
2563
2564 /* low_bits bits 0 --> 31
2565 high_bits bits 32 --> 63 */
2566
2567 analyze_64bit_constant (high_bits, low_bits,
2568 &highest_bit_set, &lowest_bit_set,
2569 &all_bits_between_are_set);
2570
2571 /* First try for a 2-insn sequence. */
2572
2573 /* These situations are preferred because the optimizer can
2574 * do more things with them:
2575 * 1) mov -1, %reg
2576 * sllx %reg, shift, %reg
2577 * 2) mov -1, %reg
2578 * srlx %reg, shift, %reg
2579 * 3) mov some_small_const, %reg
2580 * sllx %reg, shift, %reg
2581 */
2582 if (((highest_bit_set == 63
2583 || lowest_bit_set == 0)
2584 && all_bits_between_are_set != 0)
2585 || ((highest_bit_set - lowest_bit_set) < 12))
2586 {
2587 HOST_WIDE_INT the_const = -1;
2588 int shift = lowest_bit_set;
2589
2590 if ((highest_bit_set != 63
2591 && lowest_bit_set != 0)
2592 || all_bits_between_are_set == 0)
2593 {
2594 the_const =
2595 create_simple_focus_bits (high_bits, low_bits,
2596 lowest_bit_set, 0);
2597 }
2598 else if (lowest_bit_set == 0)
2599 shift = -(63 - highest_bit_set);
2600
2601 gcc_assert (SPARC_SIMM13_P (the_const));
2602 gcc_assert (shift != 0);
2603
2604 emit_insn (gen_safe_SET64 (temp, the_const));
2605 if (shift > 0)
2606 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, temp,
2607 GEN_INT (shift))));
2608 else if (shift < 0)
2609 emit_insn (gen_rtx_SET (op0, gen_rtx_LSHIFTRT (DImode, temp,
2610 GEN_INT (-shift))));
2611 return;
2612 }
2613
2614 /* Now a range of 22 or less bits set somewhere.
2615 * 1) sethi %hi(focus_bits), %reg
2616 * sllx %reg, shift, %reg
2617 * 2) sethi %hi(focus_bits), %reg
2618 * srlx %reg, shift, %reg
2619 */
2620 if ((highest_bit_set - lowest_bit_set) < 21)
2621 {
2622 unsigned HOST_WIDE_INT focus_bits =
2623 create_simple_focus_bits (high_bits, low_bits,
2624 lowest_bit_set, 10);
2625
2626 gcc_assert (SPARC_SETHI_P (focus_bits));
2627 gcc_assert (lowest_bit_set != 10);
2628
2629 emit_insn (gen_safe_HIGH64 (temp, focus_bits));
2630
2631 /* If lowest_bit_set == 10 then a sethi alone could have done it. */
2632 if (lowest_bit_set < 10)
2633 emit_insn (gen_rtx_SET (op0,
2634 gen_rtx_LSHIFTRT (DImode, temp,
2635 GEN_INT (10 - lowest_bit_set))));
2636 else if (lowest_bit_set > 10)
2637 emit_insn (gen_rtx_SET (op0,
2638 gen_rtx_ASHIFT (DImode, temp,
2639 GEN_INT (lowest_bit_set - 10))));
2640 return;
2641 }
2642
2643 /* 1) sethi %hi(low_bits), %reg
2644 * or %reg, %lo(low_bits), %reg
2645 * 2) sethi %hi(~low_bits), %reg
2646 * xor %reg, %lo(-0x400 | (low_bits & 0x3ff)), %reg
2647 */
2648 if (high_bits == 0
2649 || high_bits == 0xffffffff)
2650 {
2651 sparc_emit_set_const64_quick1 (op0, temp, low_bits,
2652 (high_bits == 0xffffffff));
2653 return;
2654 }
2655
2656 /* Now, try 3-insn sequences. */
2657
2658 /* 1) sethi %hi(high_bits), %reg
2659 * or %reg, %lo(high_bits), %reg
2660 * sllx %reg, 32, %reg
2661 */
2662 if (low_bits == 0)
2663 {
2664 sparc_emit_set_const64_quick2 (op0, temp, high_bits, 0, 32);
2665 return;
2666 }
2667
2668 /* We may be able to do something quick
2669 when the constant is negated, so try that. */
2670 if (const64_is_2insns ((~high_bits) & 0xffffffff,
2671 (~low_bits) & 0xfffffc00))
2672 {
2673 /* NOTE: The trailing bits get XOR'd so we need the
2674 non-negated bits, not the negated ones. */
2675 unsigned HOST_WIDE_INT trailing_bits = low_bits & 0x3ff;
2676
2677 if ((((~high_bits) & 0xffffffff) == 0
2678 && ((~low_bits) & 0x80000000) == 0)
2679 || (((~high_bits) & 0xffffffff) == 0xffffffff
2680 && ((~low_bits) & 0x80000000) != 0))
2681 {
2682 unsigned HOST_WIDE_INT fast_int = (~low_bits & 0xffffffff);
2683
2684 if ((SPARC_SETHI_P (fast_int)
2685 && (~high_bits & 0xffffffff) == 0)
2686 || SPARC_SIMM13_P (fast_int))
2687 emit_insn (gen_safe_SET64 (temp, fast_int));
2688 else
2689 sparc_emit_set_const64 (temp, GEN_INT (fast_int));
2690 }
2691 else
2692 {
2693 rtx negated_const;
2694 negated_const = GEN_INT (((~low_bits) & 0xfffffc00) |
2695 (((HOST_WIDE_INT)((~high_bits) & 0xffffffff))<<32));
2696 sparc_emit_set_const64 (temp, negated_const);
2697 }
2698
2699 /* If we are XOR'ing with -1, then we should emit a one's complement
2700 instead. This way the combiner will notice logical operations
2701 such as ANDN later on and substitute. */
2702 if (trailing_bits == 0x3ff)
2703 {
2704 emit_insn (gen_rtx_SET (op0, gen_rtx_NOT (DImode, temp)));
2705 }
2706 else
2707 {
2708 emit_insn (gen_rtx_SET (op0,
2709 gen_safe_XOR64 (temp,
2710 (-0x400 | trailing_bits))));
2711 }
2712 return;
2713 }
2714
2715 /* 1) sethi %hi(xxx), %reg
2716 * or %reg, %lo(xxx), %reg
2717 * sllx %reg, yyy, %reg
2718 *
2719 * ??? This is just a generalized version of the low_bits==0
2720 * thing above, FIXME...
2721 */
2722 if ((highest_bit_set - lowest_bit_set) < 32)
2723 {
2724 unsigned HOST_WIDE_INT focus_bits =
2725 create_simple_focus_bits (high_bits, low_bits,
2726 lowest_bit_set, 0);
2727
2728 /* We can't get here in this state. */
2729 gcc_assert (highest_bit_set >= 32 && lowest_bit_set < 32);
2730
2731 /* So what we know is that the set bits straddle the
2732 middle of the 64-bit word. */
2733 sparc_emit_set_const64_quick2 (op0, temp,
2734 focus_bits, 0,
2735 lowest_bit_set);
2736 return;
2737 }
2738
2739 /* 1) sethi %hi(high_bits), %reg
2740 * or %reg, %lo(high_bits), %reg
2741 * sllx %reg, 32, %reg
2742 * or %reg, low_bits, %reg
2743 */
2744 if (SPARC_SIMM13_P (low_bits) && ((int)low_bits > 0))
2745 {
2746 sparc_emit_set_const64_quick2 (op0, temp, high_bits, low_bits, 32);
2747 return;
2748 }
2749
2750 /* The easiest way when all else fails, is full decomposition. */
2751 sparc_emit_set_const64_longway (op0, temp, high_bits, low_bits);
2752 }
2753
2754 /* Given a comparison code (EQ, NE, etc.) and the first operand of a COMPARE,
2755 return the mode to be used for the comparison. For floating-point,
2756 CCFP[E]mode is used. CC_NOOVmode should be used when the first operand
2757 is a PLUS, MINUS, NEG, or ASHIFT. CCmode should be used when no special
2758 processing is needed. */
2759
2760 machine_mode
select_cc_mode(enum rtx_code op,rtx x,rtx y ATTRIBUTE_UNUSED)2761 select_cc_mode (enum rtx_code op, rtx x, rtx y ATTRIBUTE_UNUSED)
2762 {
2763 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
2764 {
2765 switch (op)
2766 {
2767 case EQ:
2768 case NE:
2769 case UNORDERED:
2770 case ORDERED:
2771 case UNLT:
2772 case UNLE:
2773 case UNGT:
2774 case UNGE:
2775 case UNEQ:
2776 case LTGT:
2777 return CCFPmode;
2778
2779 case LT:
2780 case LE:
2781 case GT:
2782 case GE:
2783 return CCFPEmode;
2784
2785 default:
2786 gcc_unreachable ();
2787 }
2788 }
2789 else if (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
2790 || GET_CODE (x) == NEG || GET_CODE (x) == ASHIFT)
2791 {
2792 if (TARGET_ARCH64 && GET_MODE (x) == DImode)
2793 return CCX_NOOVmode;
2794 else
2795 return CC_NOOVmode;
2796 }
2797 else
2798 {
2799 if (TARGET_ARCH64 && GET_MODE (x) == DImode)
2800 return CCXmode;
2801 else
2802 return CCmode;
2803 }
2804 }
2805
2806 /* Emit the compare insn and return the CC reg for a CODE comparison
2807 with operands X and Y. */
2808
2809 static rtx
gen_compare_reg_1(enum rtx_code code,rtx x,rtx y)2810 gen_compare_reg_1 (enum rtx_code code, rtx x, rtx y)
2811 {
2812 machine_mode mode;
2813 rtx cc_reg;
2814
2815 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
2816 return x;
2817
2818 mode = SELECT_CC_MODE (code, x, y);
2819
2820 /* ??? We don't have movcc patterns so we cannot generate pseudo regs for the
2821 fcc regs (cse can't tell they're really call clobbered regs and will
2822 remove a duplicate comparison even if there is an intervening function
2823 call - it will then try to reload the cc reg via an int reg which is why
2824 we need the movcc patterns). It is possible to provide the movcc
2825 patterns by using the ldxfsr/stxfsr v9 insns. I tried it: you need two
2826 registers (say %g1,%g5) and it takes about 6 insns. A better fix would be
2827 to tell cse that CCFPE mode registers (even pseudos) are call
2828 clobbered. */
2829
2830 /* ??? This is an experiment. Rather than making changes to cse which may
2831 or may not be easy/clean, we do our own cse. This is possible because
2832 we will generate hard registers. Cse knows they're call clobbered (it
2833 doesn't know the same thing about pseudos). If we guess wrong, no big
2834 deal, but if we win, great! */
2835
2836 if (TARGET_V9 && GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
2837 #if 1 /* experiment */
2838 {
2839 int reg;
2840 /* We cycle through the registers to ensure they're all exercised. */
2841 static int next_fcc_reg = 0;
2842 /* Previous x,y for each fcc reg. */
2843 static rtx prev_args[4][2];
2844
2845 /* Scan prev_args for x,y. */
2846 for (reg = 0; reg < 4; reg++)
2847 if (prev_args[reg][0] == x && prev_args[reg][1] == y)
2848 break;
2849 if (reg == 4)
2850 {
2851 reg = next_fcc_reg;
2852 prev_args[reg][0] = x;
2853 prev_args[reg][1] = y;
2854 next_fcc_reg = (next_fcc_reg + 1) & 3;
2855 }
2856 cc_reg = gen_rtx_REG (mode, reg + SPARC_FIRST_V9_FCC_REG);
2857 }
2858 #else
2859 cc_reg = gen_reg_rtx (mode);
2860 #endif /* ! experiment */
2861 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
2862 cc_reg = gen_rtx_REG (mode, SPARC_FCC_REG);
2863 else
2864 cc_reg = gen_rtx_REG (mode, SPARC_ICC_REG);
2865
2866 /* We shouldn't get there for TFmode if !TARGET_HARD_QUAD. If we do, this
2867 will only result in an unrecognizable insn so no point in asserting. */
2868 emit_insn (gen_rtx_SET (cc_reg, gen_rtx_COMPARE (mode, x, y)));
2869
2870 return cc_reg;
2871 }
2872
2873
2874 /* Emit the compare insn and return the CC reg for the comparison in CMP. */
2875
2876 rtx
gen_compare_reg(rtx cmp)2877 gen_compare_reg (rtx cmp)
2878 {
2879 return gen_compare_reg_1 (GET_CODE (cmp), XEXP (cmp, 0), XEXP (cmp, 1));
2880 }
2881
2882 /* This function is used for v9 only.
2883 DEST is the target of the Scc insn.
2884 CODE is the code for an Scc's comparison.
2885 X and Y are the values we compare.
2886
2887 This function is needed to turn
2888
2889 (set (reg:SI 110)
2890 (gt (reg:CCX 100 %icc)
2891 (const_int 0)))
2892 into
2893 (set (reg:SI 110)
2894 (gt:DI (reg:CCX 100 %icc)
2895 (const_int 0)))
2896
2897 IE: The instruction recognizer needs to see the mode of the comparison to
2898 find the right instruction. We could use "gt:DI" right in the
2899 define_expand, but leaving it out allows us to handle DI, SI, etc. */
2900
2901 static int
gen_v9_scc(rtx dest,enum rtx_code compare_code,rtx x,rtx y)2902 gen_v9_scc (rtx dest, enum rtx_code compare_code, rtx x, rtx y)
2903 {
2904 if (! TARGET_ARCH64
2905 && (GET_MODE (x) == DImode
2906 || GET_MODE (dest) == DImode))
2907 return 0;
2908
2909 /* Try to use the movrCC insns. */
2910 if (TARGET_ARCH64
2911 && GET_MODE_CLASS (GET_MODE (x)) == MODE_INT
2912 && y == const0_rtx
2913 && v9_regcmp_p (compare_code))
2914 {
2915 rtx op0 = x;
2916 rtx temp;
2917
2918 /* Special case for op0 != 0. This can be done with one instruction if
2919 dest == x. */
2920
2921 if (compare_code == NE
2922 && GET_MODE (dest) == DImode
2923 && rtx_equal_p (op0, dest))
2924 {
2925 emit_insn (gen_rtx_SET (dest,
2926 gen_rtx_IF_THEN_ELSE (DImode,
2927 gen_rtx_fmt_ee (compare_code, DImode,
2928 op0, const0_rtx),
2929 const1_rtx,
2930 dest)));
2931 return 1;
2932 }
2933
2934 if (reg_overlap_mentioned_p (dest, op0))
2935 {
2936 /* Handle the case where dest == x.
2937 We "early clobber" the result. */
2938 op0 = gen_reg_rtx (GET_MODE (x));
2939 emit_move_insn (op0, x);
2940 }
2941
2942 emit_insn (gen_rtx_SET (dest, const0_rtx));
2943 if (GET_MODE (op0) != DImode)
2944 {
2945 temp = gen_reg_rtx (DImode);
2946 convert_move (temp, op0, 0);
2947 }
2948 else
2949 temp = op0;
2950 emit_insn (gen_rtx_SET (dest,
2951 gen_rtx_IF_THEN_ELSE (GET_MODE (dest),
2952 gen_rtx_fmt_ee (compare_code, DImode,
2953 temp, const0_rtx),
2954 const1_rtx,
2955 dest)));
2956 return 1;
2957 }
2958 else
2959 {
2960 x = gen_compare_reg_1 (compare_code, x, y);
2961 y = const0_rtx;
2962
2963 gcc_assert (GET_MODE (x) != CC_NOOVmode
2964 && GET_MODE (x) != CCX_NOOVmode);
2965
2966 emit_insn (gen_rtx_SET (dest, const0_rtx));
2967 emit_insn (gen_rtx_SET (dest,
2968 gen_rtx_IF_THEN_ELSE (GET_MODE (dest),
2969 gen_rtx_fmt_ee (compare_code,
2970 GET_MODE (x), x, y),
2971 const1_rtx, dest)));
2972 return 1;
2973 }
2974 }
2975
2976
2977 /* Emit an scc insn. For seq, sne, sgeu, and sltu, we can do this
2978 without jumps using the addx/subx instructions. */
2979
2980 bool
emit_scc_insn(rtx operands[])2981 emit_scc_insn (rtx operands[])
2982 {
2983 rtx tem;
2984 rtx x;
2985 rtx y;
2986 enum rtx_code code;
2987
2988 /* The quad-word fp compare library routines all return nonzero to indicate
2989 true, which is different from the equivalent libgcc routines, so we must
2990 handle them specially here. */
2991 if (GET_MODE (operands[2]) == TFmode && ! TARGET_HARD_QUAD)
2992 {
2993 operands[1] = sparc_emit_float_lib_cmp (operands[2], operands[3],
2994 GET_CODE (operands[1]));
2995 operands[2] = XEXP (operands[1], 0);
2996 operands[3] = XEXP (operands[1], 1);
2997 }
2998
2999 code = GET_CODE (operands[1]);
3000 x = operands[2];
3001 y = operands[3];
3002
3003 /* For seq/sne on v9 we use the same code as v8 (the addx/subx method has
3004 more applications). The exception to this is "reg != 0" which can
3005 be done in one instruction on v9 (so we do it). */
3006 if (code == EQ)
3007 {
3008 if (GET_MODE (x) == SImode)
3009 {
3010 rtx pat;
3011 if (TARGET_ARCH64)
3012 pat = gen_seqsidi_special (operands[0], x, y);
3013 else
3014 pat = gen_seqsisi_special (operands[0], x, y);
3015 emit_insn (pat);
3016 return true;
3017 }
3018 else if (GET_MODE (x) == DImode)
3019 {
3020 rtx pat = gen_seqdi_special (operands[0], x, y);
3021 emit_insn (pat);
3022 return true;
3023 }
3024 }
3025
3026 if (code == NE)
3027 {
3028 if (GET_MODE (x) == SImode)
3029 {
3030 rtx pat;
3031 if (TARGET_ARCH64)
3032 pat = gen_snesidi_special (operands[0], x, y);
3033 else
3034 pat = gen_snesisi_special (operands[0], x, y);
3035 emit_insn (pat);
3036 return true;
3037 }
3038 else if (GET_MODE (x) == DImode)
3039 {
3040 rtx pat;
3041 if (TARGET_VIS3)
3042 pat = gen_snedi_special_vis3 (operands[0], x, y);
3043 else
3044 pat = gen_snedi_special (operands[0], x, y);
3045 emit_insn (pat);
3046 return true;
3047 }
3048 }
3049
3050 if (TARGET_V9
3051 && TARGET_ARCH64
3052 && GET_MODE (x) == DImode
3053 && !(TARGET_VIS3
3054 && (code == GTU || code == LTU))
3055 && gen_v9_scc (operands[0], code, x, y))
3056 return true;
3057
3058 /* We can do LTU and GEU using the addx/subx instructions too. And
3059 for GTU/LEU, if both operands are registers swap them and fall
3060 back to the easy case. */
3061 if (code == GTU || code == LEU)
3062 {
3063 if ((GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
3064 && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG))
3065 {
3066 tem = x;
3067 x = y;
3068 y = tem;
3069 code = swap_condition (code);
3070 }
3071 }
3072
3073 if (code == LTU
3074 || (!TARGET_VIS3 && code == GEU))
3075 {
3076 emit_insn (gen_rtx_SET (operands[0],
3077 gen_rtx_fmt_ee (code, GET_MODE (operands[0]),
3078 gen_compare_reg_1 (code, x, y),
3079 const0_rtx)));
3080 return true;
3081 }
3082
3083 /* All the posibilities to use addx/subx based sequences has been
3084 exhausted, try for a 3 instruction sequence using v9 conditional
3085 moves. */
3086 if (TARGET_V9 && gen_v9_scc (operands[0], code, x, y))
3087 return true;
3088
3089 /* Nope, do branches. */
3090 return false;
3091 }
3092
3093 /* Emit a conditional jump insn for the v9 architecture using comparison code
3094 CODE and jump target LABEL.
3095 This function exists to take advantage of the v9 brxx insns. */
3096
3097 static void
emit_v9_brxx_insn(enum rtx_code code,rtx op0,rtx label)3098 emit_v9_brxx_insn (enum rtx_code code, rtx op0, rtx label)
3099 {
3100 emit_jump_insn (gen_rtx_SET (pc_rtx,
3101 gen_rtx_IF_THEN_ELSE (VOIDmode,
3102 gen_rtx_fmt_ee (code, GET_MODE (op0),
3103 op0, const0_rtx),
3104 gen_rtx_LABEL_REF (VOIDmode, label),
3105 pc_rtx)));
3106 }
3107
3108 /* Emit a conditional jump insn for the UA2011 architecture using
3109 comparison code CODE and jump target LABEL. This function exists
3110 to take advantage of the UA2011 Compare and Branch insns. */
3111
3112 static void
emit_cbcond_insn(enum rtx_code code,rtx op0,rtx op1,rtx label)3113 emit_cbcond_insn (enum rtx_code code, rtx op0, rtx op1, rtx label)
3114 {
3115 rtx if_then_else;
3116
3117 if_then_else = gen_rtx_IF_THEN_ELSE (VOIDmode,
3118 gen_rtx_fmt_ee(code, GET_MODE(op0),
3119 op0, op1),
3120 gen_rtx_LABEL_REF (VOIDmode, label),
3121 pc_rtx);
3122
3123 emit_jump_insn (gen_rtx_SET (pc_rtx, if_then_else));
3124 }
3125
3126 void
emit_conditional_branch_insn(rtx operands[])3127 emit_conditional_branch_insn (rtx operands[])
3128 {
3129 /* The quad-word fp compare library routines all return nonzero to indicate
3130 true, which is different from the equivalent libgcc routines, so we must
3131 handle them specially here. */
3132 if (GET_MODE (operands[1]) == TFmode && ! TARGET_HARD_QUAD)
3133 {
3134 operands[0] = sparc_emit_float_lib_cmp (operands[1], operands[2],
3135 GET_CODE (operands[0]));
3136 operands[1] = XEXP (operands[0], 0);
3137 operands[2] = XEXP (operands[0], 1);
3138 }
3139
3140 /* If we can tell early on that the comparison is against a constant
3141 that won't fit in the 5-bit signed immediate field of a cbcond,
3142 use one of the other v9 conditional branch sequences. */
3143 if (TARGET_CBCOND
3144 && GET_CODE (operands[1]) == REG
3145 && (GET_MODE (operands[1]) == SImode
3146 || (TARGET_ARCH64 && GET_MODE (operands[1]) == DImode))
3147 && (GET_CODE (operands[2]) != CONST_INT
3148 || SPARC_SIMM5_P (INTVAL (operands[2]))))
3149 {
3150 emit_cbcond_insn (GET_CODE (operands[0]), operands[1], operands[2], operands[3]);
3151 return;
3152 }
3153
3154 if (TARGET_ARCH64 && operands[2] == const0_rtx
3155 && GET_CODE (operands[1]) == REG
3156 && GET_MODE (operands[1]) == DImode)
3157 {
3158 emit_v9_brxx_insn (GET_CODE (operands[0]), operands[1], operands[3]);
3159 return;
3160 }
3161
3162 operands[1] = gen_compare_reg (operands[0]);
3163 operands[2] = const0_rtx;
3164 operands[0] = gen_rtx_fmt_ee (GET_CODE (operands[0]), VOIDmode,
3165 operands[1], operands[2]);
3166 emit_jump_insn (gen_cbranchcc4 (operands[0], operands[1], operands[2],
3167 operands[3]));
3168 }
3169
3170
3171 /* Generate a DFmode part of a hard TFmode register.
3172 REG is the TFmode hard register, LOW is 1 for the
3173 low 64bit of the register and 0 otherwise.
3174 */
3175 rtx
gen_df_reg(rtx reg,int low)3176 gen_df_reg (rtx reg, int low)
3177 {
3178 int regno = REGNO (reg);
3179
3180 if ((WORDS_BIG_ENDIAN == 0) ^ (low != 0))
3181 regno += (TARGET_ARCH64 && SPARC_INT_REG_P (regno)) ? 1 : 2;
3182 return gen_rtx_REG (DFmode, regno);
3183 }
3184
3185 /* Generate a call to FUNC with OPERANDS. Operand 0 is the return value.
3186 Unlike normal calls, TFmode operands are passed by reference. It is
3187 assumed that no more than 3 operands are required. */
3188
3189 static void
emit_soft_tfmode_libcall(const char * func_name,int nargs,rtx * operands)3190 emit_soft_tfmode_libcall (const char *func_name, int nargs, rtx *operands)
3191 {
3192 rtx ret_slot = NULL, arg[3], func_sym;
3193 int i;
3194
3195 /* We only expect to be called for conversions, unary, and binary ops. */
3196 gcc_assert (nargs == 2 || nargs == 3);
3197
3198 for (i = 0; i < nargs; ++i)
3199 {
3200 rtx this_arg = operands[i];
3201 rtx this_slot;
3202
3203 /* TFmode arguments and return values are passed by reference. */
3204 if (GET_MODE (this_arg) == TFmode)
3205 {
3206 int force_stack_temp;
3207
3208 force_stack_temp = 0;
3209 if (TARGET_BUGGY_QP_LIB && i == 0)
3210 force_stack_temp = 1;
3211
3212 if (GET_CODE (this_arg) == MEM
3213 && ! force_stack_temp)
3214 {
3215 tree expr = MEM_EXPR (this_arg);
3216 if (expr)
3217 mark_addressable (expr);
3218 this_arg = XEXP (this_arg, 0);
3219 }
3220 else if (CONSTANT_P (this_arg)
3221 && ! force_stack_temp)
3222 {
3223 this_slot = force_const_mem (TFmode, this_arg);
3224 this_arg = XEXP (this_slot, 0);
3225 }
3226 else
3227 {
3228 this_slot = assign_stack_temp (TFmode, GET_MODE_SIZE (TFmode));
3229
3230 /* Operand 0 is the return value. We'll copy it out later. */
3231 if (i > 0)
3232 emit_move_insn (this_slot, this_arg);
3233 else
3234 ret_slot = this_slot;
3235
3236 this_arg = XEXP (this_slot, 0);
3237 }
3238 }
3239
3240 arg[i] = this_arg;
3241 }
3242
3243 func_sym = gen_rtx_SYMBOL_REF (Pmode, func_name);
3244
3245 if (GET_MODE (operands[0]) == TFmode)
3246 {
3247 if (nargs == 2)
3248 emit_library_call (func_sym, LCT_NORMAL, VOIDmode, 2,
3249 arg[0], GET_MODE (arg[0]),
3250 arg[1], GET_MODE (arg[1]));
3251 else
3252 emit_library_call (func_sym, LCT_NORMAL, VOIDmode, 3,
3253 arg[0], GET_MODE (arg[0]),
3254 arg[1], GET_MODE (arg[1]),
3255 arg[2], GET_MODE (arg[2]));
3256
3257 if (ret_slot)
3258 emit_move_insn (operands[0], ret_slot);
3259 }
3260 else
3261 {
3262 rtx ret;
3263
3264 gcc_assert (nargs == 2);
3265
3266 ret = emit_library_call_value (func_sym, operands[0], LCT_NORMAL,
3267 GET_MODE (operands[0]), 1,
3268 arg[1], GET_MODE (arg[1]));
3269
3270 if (ret != operands[0])
3271 emit_move_insn (operands[0], ret);
3272 }
3273 }
3274
3275 /* Expand soft-float TFmode calls to sparc abi routines. */
3276
3277 static void
emit_soft_tfmode_binop(enum rtx_code code,rtx * operands)3278 emit_soft_tfmode_binop (enum rtx_code code, rtx *operands)
3279 {
3280 const char *func;
3281
3282 switch (code)
3283 {
3284 case PLUS:
3285 func = "_Qp_add";
3286 break;
3287 case MINUS:
3288 func = "_Qp_sub";
3289 break;
3290 case MULT:
3291 func = "_Qp_mul";
3292 break;
3293 case DIV:
3294 func = "_Qp_div";
3295 break;
3296 default:
3297 gcc_unreachable ();
3298 }
3299
3300 emit_soft_tfmode_libcall (func, 3, operands);
3301 }
3302
3303 static void
emit_soft_tfmode_unop(enum rtx_code code,rtx * operands)3304 emit_soft_tfmode_unop (enum rtx_code code, rtx *operands)
3305 {
3306 const char *func;
3307
3308 gcc_assert (code == SQRT);
3309 func = "_Qp_sqrt";
3310
3311 emit_soft_tfmode_libcall (func, 2, operands);
3312 }
3313
3314 static void
emit_soft_tfmode_cvt(enum rtx_code code,rtx * operands)3315 emit_soft_tfmode_cvt (enum rtx_code code, rtx *operands)
3316 {
3317 const char *func;
3318
3319 switch (code)
3320 {
3321 case FLOAT_EXTEND:
3322 switch (GET_MODE (operands[1]))
3323 {
3324 case SFmode:
3325 func = "_Qp_stoq";
3326 break;
3327 case DFmode:
3328 func = "_Qp_dtoq";
3329 break;
3330 default:
3331 gcc_unreachable ();
3332 }
3333 break;
3334
3335 case FLOAT_TRUNCATE:
3336 switch (GET_MODE (operands[0]))
3337 {
3338 case SFmode:
3339 func = "_Qp_qtos";
3340 break;
3341 case DFmode:
3342 func = "_Qp_qtod";
3343 break;
3344 default:
3345 gcc_unreachable ();
3346 }
3347 break;
3348
3349 case FLOAT:
3350 switch (GET_MODE (operands[1]))
3351 {
3352 case SImode:
3353 func = "_Qp_itoq";
3354 if (TARGET_ARCH64)
3355 operands[1] = gen_rtx_SIGN_EXTEND (DImode, operands[1]);
3356 break;
3357 case DImode:
3358 func = "_Qp_xtoq";
3359 break;
3360 default:
3361 gcc_unreachable ();
3362 }
3363 break;
3364
3365 case UNSIGNED_FLOAT:
3366 switch (GET_MODE (operands[1]))
3367 {
3368 case SImode:
3369 func = "_Qp_uitoq";
3370 if (TARGET_ARCH64)
3371 operands[1] = gen_rtx_ZERO_EXTEND (DImode, operands[1]);
3372 break;
3373 case DImode:
3374 func = "_Qp_uxtoq";
3375 break;
3376 default:
3377 gcc_unreachable ();
3378 }
3379 break;
3380
3381 case FIX:
3382 switch (GET_MODE (operands[0]))
3383 {
3384 case SImode:
3385 func = "_Qp_qtoi";
3386 break;
3387 case DImode:
3388 func = "_Qp_qtox";
3389 break;
3390 default:
3391 gcc_unreachable ();
3392 }
3393 break;
3394
3395 case UNSIGNED_FIX:
3396 switch (GET_MODE (operands[0]))
3397 {
3398 case SImode:
3399 func = "_Qp_qtoui";
3400 break;
3401 case DImode:
3402 func = "_Qp_qtoux";
3403 break;
3404 default:
3405 gcc_unreachable ();
3406 }
3407 break;
3408
3409 default:
3410 gcc_unreachable ();
3411 }
3412
3413 emit_soft_tfmode_libcall (func, 2, operands);
3414 }
3415
3416 /* Expand a hard-float tfmode operation. All arguments must be in
3417 registers. */
3418
3419 static void
emit_hard_tfmode_operation(enum rtx_code code,rtx * operands)3420 emit_hard_tfmode_operation (enum rtx_code code, rtx *operands)
3421 {
3422 rtx op, dest;
3423
3424 if (GET_RTX_CLASS (code) == RTX_UNARY)
3425 {
3426 operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
3427 op = gen_rtx_fmt_e (code, GET_MODE (operands[0]), operands[1]);
3428 }
3429 else
3430 {
3431 operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
3432 operands[2] = force_reg (GET_MODE (operands[2]), operands[2]);
3433 op = gen_rtx_fmt_ee (code, GET_MODE (operands[0]),
3434 operands[1], operands[2]);
3435 }
3436
3437 if (register_operand (operands[0], VOIDmode))
3438 dest = operands[0];
3439 else
3440 dest = gen_reg_rtx (GET_MODE (operands[0]));
3441
3442 emit_insn (gen_rtx_SET (dest, op));
3443
3444 if (dest != operands[0])
3445 emit_move_insn (operands[0], dest);
3446 }
3447
3448 void
emit_tfmode_binop(enum rtx_code code,rtx * operands)3449 emit_tfmode_binop (enum rtx_code code, rtx *operands)
3450 {
3451 if (TARGET_HARD_QUAD)
3452 emit_hard_tfmode_operation (code, operands);
3453 else
3454 emit_soft_tfmode_binop (code, operands);
3455 }
3456
3457 void
emit_tfmode_unop(enum rtx_code code,rtx * operands)3458 emit_tfmode_unop (enum rtx_code code, rtx *operands)
3459 {
3460 if (TARGET_HARD_QUAD)
3461 emit_hard_tfmode_operation (code, operands);
3462 else
3463 emit_soft_tfmode_unop (code, operands);
3464 }
3465
3466 void
emit_tfmode_cvt(enum rtx_code code,rtx * operands)3467 emit_tfmode_cvt (enum rtx_code code, rtx *operands)
3468 {
3469 if (TARGET_HARD_QUAD)
3470 emit_hard_tfmode_operation (code, operands);
3471 else
3472 emit_soft_tfmode_cvt (code, operands);
3473 }
3474
3475 /* Return nonzero if a branch/jump/call instruction will be emitting
3476 nop into its delay slot. */
3477
3478 int
empty_delay_slot(rtx_insn * insn)3479 empty_delay_slot (rtx_insn *insn)
3480 {
3481 rtx seq;
3482
3483 /* If no previous instruction (should not happen), return true. */
3484 if (PREV_INSN (insn) == NULL)
3485 return 1;
3486
3487 seq = NEXT_INSN (PREV_INSN (insn));
3488 if (GET_CODE (PATTERN (seq)) == SEQUENCE)
3489 return 0;
3490
3491 return 1;
3492 }
3493
3494 /* Return nonzero if we should emit a nop after a cbcond instruction.
3495 The cbcond instruction does not have a delay slot, however there is
3496 a severe performance penalty if a control transfer appears right
3497 after a cbcond. Therefore we emit a nop when we detect this
3498 situation. */
3499
3500 int
emit_cbcond_nop(rtx insn)3501 emit_cbcond_nop (rtx insn)
3502 {
3503 rtx next = next_active_insn (insn);
3504
3505 if (!next)
3506 return 1;
3507
3508 if (NONJUMP_INSN_P (next)
3509 && GET_CODE (PATTERN (next)) == SEQUENCE)
3510 next = XVECEXP (PATTERN (next), 0, 0);
3511 else if (CALL_P (next)
3512 && GET_CODE (PATTERN (next)) == PARALLEL)
3513 {
3514 rtx delay = XVECEXP (PATTERN (next), 0, 1);
3515
3516 if (GET_CODE (delay) == RETURN)
3517 {
3518 /* It's a sibling call. Do not emit the nop if we're going
3519 to emit something other than the jump itself as the first
3520 instruction of the sibcall sequence. */
3521 if (sparc_leaf_function_p || TARGET_FLAT)
3522 return 0;
3523 }
3524 }
3525
3526 if (NONJUMP_INSN_P (next))
3527 return 0;
3528
3529 return 1;
3530 }
3531
3532 /* Return nonzero if TRIAL can go into the call delay slot. */
3533
3534 int
eligible_for_call_delay(rtx_insn * trial)3535 eligible_for_call_delay (rtx_insn *trial)
3536 {
3537 rtx pat;
3538
3539 if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE)
3540 return 0;
3541
3542 /* Binutils allows
3543 call __tls_get_addr, %tgd_call (foo)
3544 add %l7, %o0, %o0, %tgd_add (foo)
3545 while Sun as/ld does not. */
3546 if (TARGET_GNU_TLS || !TARGET_TLS)
3547 return 1;
3548
3549 pat = PATTERN (trial);
3550
3551 /* We must reject tgd_add{32|64}, i.e.
3552 (set (reg) (plus (reg) (unspec [(reg) (symbol_ref)] UNSPEC_TLSGD)))
3553 and tldm_add{32|64}, i.e.
3554 (set (reg) (plus (reg) (unspec [(reg) (symbol_ref)] UNSPEC_TLSLDM)))
3555 for Sun as/ld. */
3556 if (GET_CODE (pat) == SET
3557 && GET_CODE (SET_SRC (pat)) == PLUS)
3558 {
3559 rtx unspec = XEXP (SET_SRC (pat), 1);
3560
3561 if (GET_CODE (unspec) == UNSPEC
3562 && (XINT (unspec, 1) == UNSPEC_TLSGD
3563 || XINT (unspec, 1) == UNSPEC_TLSLDM))
3564 return 0;
3565 }
3566
3567 return 1;
3568 }
3569
3570 /* Return nonzero if TRIAL, an insn, can be combined with a 'restore'
3571 instruction. RETURN_P is true if the v9 variant 'return' is to be
3572 considered in the test too.
3573
3574 TRIAL must be a SET whose destination is a REG appropriate for the
3575 'restore' instruction or, if RETURN_P is true, for the 'return'
3576 instruction. */
3577
3578 static int
eligible_for_restore_insn(rtx trial,bool return_p)3579 eligible_for_restore_insn (rtx trial, bool return_p)
3580 {
3581 rtx pat = PATTERN (trial);
3582 rtx src = SET_SRC (pat);
3583 bool src_is_freg = false;
3584 rtx src_reg;
3585
3586 /* Since we now can do moves between float and integer registers when
3587 VIS3 is enabled, we have to catch this case. We can allow such
3588 moves when doing a 'return' however. */
3589 src_reg = src;
3590 if (GET_CODE (src_reg) == SUBREG)
3591 src_reg = SUBREG_REG (src_reg);
3592 if (GET_CODE (src_reg) == REG
3593 && SPARC_FP_REG_P (REGNO (src_reg)))
3594 src_is_freg = true;
3595
3596 /* The 'restore src,%g0,dest' pattern for word mode and below. */
3597 if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT
3598 && arith_operand (src, GET_MODE (src))
3599 && ! src_is_freg)
3600 {
3601 if (TARGET_ARCH64)
3602 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
3603 else
3604 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (SImode);
3605 }
3606
3607 /* The 'restore src,%g0,dest' pattern for double-word mode. */
3608 else if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT
3609 && arith_double_operand (src, GET_MODE (src))
3610 && ! src_is_freg)
3611 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
3612
3613 /* The 'restore src,%g0,dest' pattern for float if no FPU. */
3614 else if (! TARGET_FPU && register_operand (src, SFmode))
3615 return 1;
3616
3617 /* The 'restore src,%g0,dest' pattern for double if no FPU. */
3618 else if (! TARGET_FPU && TARGET_ARCH64 && register_operand (src, DFmode))
3619 return 1;
3620
3621 /* If we have the 'return' instruction, anything that does not use
3622 local or output registers and can go into a delay slot wins. */
3623 else if (return_p && TARGET_V9 && !epilogue_renumber (&pat, 1))
3624 return 1;
3625
3626 /* The 'restore src1,src2,dest' pattern for SImode. */
3627 else if (GET_CODE (src) == PLUS
3628 && register_operand (XEXP (src, 0), SImode)
3629 && arith_operand (XEXP (src, 1), SImode))
3630 return 1;
3631
3632 /* The 'restore src1,src2,dest' pattern for DImode. */
3633 else if (GET_CODE (src) == PLUS
3634 && register_operand (XEXP (src, 0), DImode)
3635 && arith_double_operand (XEXP (src, 1), DImode))
3636 return 1;
3637
3638 /* The 'restore src1,%lo(src2),dest' pattern. */
3639 else if (GET_CODE (src) == LO_SUM
3640 && ! TARGET_CM_MEDMID
3641 && ((register_operand (XEXP (src, 0), SImode)
3642 && immediate_operand (XEXP (src, 1), SImode))
3643 || (TARGET_ARCH64
3644 && register_operand (XEXP (src, 0), DImode)
3645 && immediate_operand (XEXP (src, 1), DImode))))
3646 return 1;
3647
3648 /* The 'restore src,src,dest' pattern. */
3649 else if (GET_CODE (src) == ASHIFT
3650 && (register_operand (XEXP (src, 0), SImode)
3651 || register_operand (XEXP (src, 0), DImode))
3652 && XEXP (src, 1) == const1_rtx)
3653 return 1;
3654
3655 return 0;
3656 }
3657
3658 /* Return nonzero if TRIAL can go into the function return's delay slot. */
3659
3660 int
eligible_for_return_delay(rtx_insn * trial)3661 eligible_for_return_delay (rtx_insn *trial)
3662 {
3663 int regno;
3664 rtx pat;
3665
3666 /* If the function uses __builtin_eh_return, the eh_return machinery
3667 occupies the delay slot. */
3668 if (crtl->calls_eh_return)
3669 return 0;
3670
3671 if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE)
3672 return 0;
3673
3674 /* In the case of a leaf or flat function, anything can go into the slot. */
3675 if (sparc_leaf_function_p || TARGET_FLAT)
3676 return 1;
3677
3678 if (!NONJUMP_INSN_P (trial))
3679 return 0;
3680
3681 pat = PATTERN (trial);
3682 if (GET_CODE (pat) == PARALLEL)
3683 {
3684 int i;
3685
3686 if (! TARGET_V9)
3687 return 0;
3688 for (i = XVECLEN (pat, 0) - 1; i >= 0; i--)
3689 {
3690 rtx expr = XVECEXP (pat, 0, i);
3691 if (GET_CODE (expr) != SET)
3692 return 0;
3693 if (GET_CODE (SET_DEST (expr)) != REG)
3694 return 0;
3695 regno = REGNO (SET_DEST (expr));
3696 if (regno >= 8 && regno < 24)
3697 return 0;
3698 }
3699 return !epilogue_renumber (&pat, 1);
3700 }
3701
3702 if (GET_CODE (pat) != SET)
3703 return 0;
3704
3705 if (GET_CODE (SET_DEST (pat)) != REG)
3706 return 0;
3707
3708 regno = REGNO (SET_DEST (pat));
3709
3710 /* Otherwise, only operations which can be done in tandem with
3711 a `restore' or `return' insn can go into the delay slot. */
3712 if (regno >= 8 && regno < 24)
3713 return 0;
3714
3715 /* If this instruction sets up floating point register and we have a return
3716 instruction, it can probably go in. But restore will not work
3717 with FP_REGS. */
3718 if (! SPARC_INT_REG_P (regno))
3719 return TARGET_V9 && !epilogue_renumber (&pat, 1);
3720
3721 return eligible_for_restore_insn (trial, true);
3722 }
3723
3724 /* Return nonzero if TRIAL can go into the sibling call's delay slot. */
3725
3726 int
eligible_for_sibcall_delay(rtx_insn * trial)3727 eligible_for_sibcall_delay (rtx_insn *trial)
3728 {
3729 rtx pat;
3730
3731 if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE)
3732 return 0;
3733
3734 if (!NONJUMP_INSN_P (trial))
3735 return 0;
3736
3737 pat = PATTERN (trial);
3738
3739 if (sparc_leaf_function_p || TARGET_FLAT)
3740 {
3741 /* If the tail call is done using the call instruction,
3742 we have to restore %o7 in the delay slot. */
3743 if (LEAF_SIBCALL_SLOT_RESERVED_P)
3744 return 0;
3745
3746 /* %g1 is used to build the function address */
3747 if (reg_mentioned_p (gen_rtx_REG (Pmode, 1), pat))
3748 return 0;
3749
3750 return 1;
3751 }
3752
3753 if (GET_CODE (pat) != SET)
3754 return 0;
3755
3756 /* Otherwise, only operations which can be done in tandem with
3757 a `restore' insn can go into the delay slot. */
3758 if (GET_CODE (SET_DEST (pat)) != REG
3759 || (REGNO (SET_DEST (pat)) >= 8 && REGNO (SET_DEST (pat)) < 24)
3760 || ! SPARC_INT_REG_P (REGNO (SET_DEST (pat))))
3761 return 0;
3762
3763 /* If it mentions %o7, it can't go in, because sibcall will clobber it
3764 in most cases. */
3765 if (reg_mentioned_p (gen_rtx_REG (Pmode, 15), pat))
3766 return 0;
3767
3768 return eligible_for_restore_insn (trial, false);
3769 }
3770
3771 /* Determine if it's legal to put X into the constant pool. This
3772 is not possible if X contains the address of a symbol that is
3773 not constant (TLS) or not known at final link time (PIC). */
3774
3775 static bool
sparc_cannot_force_const_mem(machine_mode mode,rtx x)3776 sparc_cannot_force_const_mem (machine_mode mode, rtx x)
3777 {
3778 switch (GET_CODE (x))
3779 {
3780 case CONST_INT:
3781 case CONST_WIDE_INT:
3782 case CONST_DOUBLE:
3783 case CONST_VECTOR:
3784 /* Accept all non-symbolic constants. */
3785 return false;
3786
3787 case LABEL_REF:
3788 /* Labels are OK iff we are non-PIC. */
3789 return flag_pic != 0;
3790
3791 case SYMBOL_REF:
3792 /* 'Naked' TLS symbol references are never OK,
3793 non-TLS symbols are OK iff we are non-PIC. */
3794 if (SYMBOL_REF_TLS_MODEL (x))
3795 return true;
3796 else
3797 return flag_pic != 0;
3798
3799 case CONST:
3800 return sparc_cannot_force_const_mem (mode, XEXP (x, 0));
3801 case PLUS:
3802 case MINUS:
3803 return sparc_cannot_force_const_mem (mode, XEXP (x, 0))
3804 || sparc_cannot_force_const_mem (mode, XEXP (x, 1));
3805 case UNSPEC:
3806 return true;
3807 default:
3808 gcc_unreachable ();
3809 }
3810 }
3811
3812 /* Global Offset Table support. */
3813 static GTY(()) rtx got_helper_rtx = NULL_RTX;
3814 static GTY(()) rtx global_offset_table_rtx = NULL_RTX;
3815
3816 /* Return the SYMBOL_REF for the Global Offset Table. */
3817
3818 static GTY(()) rtx sparc_got_symbol = NULL_RTX;
3819
3820 static rtx
sparc_got(void)3821 sparc_got (void)
3822 {
3823 if (!sparc_got_symbol)
3824 sparc_got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
3825
3826 return sparc_got_symbol;
3827 }
3828
3829 /* Ensure that we are not using patterns that are not OK with PIC. */
3830
3831 int
check_pic(int i)3832 check_pic (int i)
3833 {
3834 rtx op;
3835
3836 switch (flag_pic)
3837 {
3838 case 1:
3839 op = recog_data.operand[i];
3840 gcc_assert (GET_CODE (op) != SYMBOL_REF
3841 && (GET_CODE (op) != CONST
3842 || (GET_CODE (XEXP (op, 0)) == MINUS
3843 && XEXP (XEXP (op, 0), 0) == sparc_got ()
3844 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST)));
3845 case 2:
3846 default:
3847 return 1;
3848 }
3849 }
3850
3851 /* Return true if X is an address which needs a temporary register when
3852 reloaded while generating PIC code. */
3853
3854 int
pic_address_needs_scratch(rtx x)3855 pic_address_needs_scratch (rtx x)
3856 {
3857 /* An address which is a symbolic plus a non SMALL_INT needs a temp reg. */
3858 if (GET_CODE (x) == CONST
3859 && GET_CODE (XEXP (x, 0)) == PLUS
3860 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
3861 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
3862 && !SMALL_INT (XEXP (XEXP (x, 0), 1)))
3863 return 1;
3864
3865 return 0;
3866 }
3867
3868 /* Determine if a given RTX is a valid constant. We already know this
3869 satisfies CONSTANT_P. */
3870
3871 static bool
sparc_legitimate_constant_p(machine_mode mode,rtx x)3872 sparc_legitimate_constant_p (machine_mode mode, rtx x)
3873 {
3874 switch (GET_CODE (x))
3875 {
3876 case CONST:
3877 case SYMBOL_REF:
3878 if (sparc_tls_referenced_p (x))
3879 return false;
3880 break;
3881
3882 case CONST_DOUBLE:
3883 /* Floating point constants are generally not ok.
3884 The only exception is 0.0 and all-ones in VIS. */
3885 if (TARGET_VIS
3886 && SCALAR_FLOAT_MODE_P (mode)
3887 && (const_zero_operand (x, mode)
3888 || const_all_ones_operand (x, mode)))
3889 return true;
3890
3891 return false;
3892
3893 case CONST_VECTOR:
3894 /* Vector constants are generally not ok.
3895 The only exception is 0 or -1 in VIS. */
3896 if (TARGET_VIS
3897 && (const_zero_operand (x, mode)
3898 || const_all_ones_operand (x, mode)))
3899 return true;
3900
3901 return false;
3902
3903 default:
3904 break;
3905 }
3906
3907 return true;
3908 }
3909
3910 /* Determine if a given RTX is a valid constant address. */
3911
3912 bool
constant_address_p(rtx x)3913 constant_address_p (rtx x)
3914 {
3915 switch (GET_CODE (x))
3916 {
3917 case LABEL_REF:
3918 case CONST_INT:
3919 case HIGH:
3920 return true;
3921
3922 case CONST:
3923 if (flag_pic && pic_address_needs_scratch (x))
3924 return false;
3925 return sparc_legitimate_constant_p (Pmode, x);
3926
3927 case SYMBOL_REF:
3928 return !flag_pic && sparc_legitimate_constant_p (Pmode, x);
3929
3930 default:
3931 return false;
3932 }
3933 }
3934
3935 /* Nonzero if the constant value X is a legitimate general operand
3936 when generating PIC code. It is given that flag_pic is on and
3937 that X satisfies CONSTANT_P. */
3938
3939 bool
legitimate_pic_operand_p(rtx x)3940 legitimate_pic_operand_p (rtx x)
3941 {
3942 if (pic_address_needs_scratch (x))
3943 return false;
3944 if (sparc_tls_referenced_p (x))
3945 return false;
3946 return true;
3947 }
3948
3949 #define RTX_OK_FOR_OFFSET_P(X, MODE) \
3950 (CONST_INT_P (X) \
3951 && INTVAL (X) >= -0x1000 \
3952 && INTVAL (X) <= (0x1000 - GET_MODE_SIZE (MODE)))
3953
3954 #define RTX_OK_FOR_OLO10_P(X, MODE) \
3955 (CONST_INT_P (X) \
3956 && INTVAL (X) >= -0x1000 \
3957 && INTVAL (X) <= (0xc00 - GET_MODE_SIZE (MODE)))
3958
3959 /* Handle the TARGET_LEGITIMATE_ADDRESS_P target hook.
3960
3961 On SPARC, the actual legitimate addresses must be REG+REG or REG+SMALLINT
3962 ordinarily. This changes a bit when generating PIC. */
3963
3964 static bool
sparc_legitimate_address_p(machine_mode mode,rtx addr,bool strict)3965 sparc_legitimate_address_p (machine_mode mode, rtx addr, bool strict)
3966 {
3967 rtx rs1 = NULL, rs2 = NULL, imm1 = NULL;
3968
3969 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
3970 rs1 = addr;
3971 else if (GET_CODE (addr) == PLUS)
3972 {
3973 rs1 = XEXP (addr, 0);
3974 rs2 = XEXP (addr, 1);
3975
3976 /* Canonicalize. REG comes first, if there are no regs,
3977 LO_SUM comes first. */
3978 if (!REG_P (rs1)
3979 && GET_CODE (rs1) != SUBREG
3980 && (REG_P (rs2)
3981 || GET_CODE (rs2) == SUBREG
3982 || (GET_CODE (rs2) == LO_SUM && GET_CODE (rs1) != LO_SUM)))
3983 {
3984 rs1 = XEXP (addr, 1);
3985 rs2 = XEXP (addr, 0);
3986 }
3987
3988 if ((flag_pic == 1
3989 && rs1 == pic_offset_table_rtx
3990 && !REG_P (rs2)
3991 && GET_CODE (rs2) != SUBREG
3992 && GET_CODE (rs2) != LO_SUM
3993 && GET_CODE (rs2) != MEM
3994 && !(GET_CODE (rs2) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs2))
3995 && (! symbolic_operand (rs2, VOIDmode) || mode == Pmode)
3996 && (GET_CODE (rs2) != CONST_INT || SMALL_INT (rs2)))
3997 || ((REG_P (rs1)
3998 || GET_CODE (rs1) == SUBREG)
3999 && RTX_OK_FOR_OFFSET_P (rs2, mode)))
4000 {
4001 imm1 = rs2;
4002 rs2 = NULL;
4003 }
4004 else if ((REG_P (rs1) || GET_CODE (rs1) == SUBREG)
4005 && (REG_P (rs2) || GET_CODE (rs2) == SUBREG))
4006 {
4007 /* We prohibit REG + REG for TFmode when there are no quad move insns
4008 and we consequently need to split. We do this because REG+REG
4009 is not an offsettable address. If we get the situation in reload
4010 where source and destination of a movtf pattern are both MEMs with
4011 REG+REG address, then only one of them gets converted to an
4012 offsettable address. */
4013 if (mode == TFmode
4014 && ! (TARGET_ARCH64 && TARGET_HARD_QUAD))
4015 return 0;
4016
4017 /* Likewise for TImode, but in all cases. */
4018 if (mode == TImode)
4019 return 0;
4020
4021 /* We prohibit REG + REG on ARCH32 if not optimizing for
4022 DFmode/DImode because then mem_min_alignment is likely to be zero
4023 after reload and the forced split would lack a matching splitter
4024 pattern. */
4025 if (TARGET_ARCH32 && !optimize
4026 && (mode == DFmode || mode == DImode))
4027 return 0;
4028 }
4029 else if (USE_AS_OFFSETABLE_LO10
4030 && GET_CODE (rs1) == LO_SUM
4031 && TARGET_ARCH64
4032 && ! TARGET_CM_MEDMID
4033 && RTX_OK_FOR_OLO10_P (rs2, mode))
4034 {
4035 rs2 = NULL;
4036 imm1 = XEXP (rs1, 1);
4037 rs1 = XEXP (rs1, 0);
4038 if (!CONSTANT_P (imm1)
4039 || (GET_CODE (rs1) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs1)))
4040 return 0;
4041 }
4042 }
4043 else if (GET_CODE (addr) == LO_SUM)
4044 {
4045 rs1 = XEXP (addr, 0);
4046 imm1 = XEXP (addr, 1);
4047
4048 if (!CONSTANT_P (imm1)
4049 || (GET_CODE (rs1) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs1)))
4050 return 0;
4051
4052 /* We can't allow TFmode in 32-bit mode, because an offset greater
4053 than the alignment (8) may cause the LO_SUM to overflow. */
4054 if (mode == TFmode && TARGET_ARCH32)
4055 return 0;
4056 }
4057 else if (GET_CODE (addr) == CONST_INT && SMALL_INT (addr))
4058 return 1;
4059 else
4060 return 0;
4061
4062 if (GET_CODE (rs1) == SUBREG)
4063 rs1 = SUBREG_REG (rs1);
4064 if (!REG_P (rs1))
4065 return 0;
4066
4067 if (rs2)
4068 {
4069 if (GET_CODE (rs2) == SUBREG)
4070 rs2 = SUBREG_REG (rs2);
4071 if (!REG_P (rs2))
4072 return 0;
4073 }
4074
4075 if (strict)
4076 {
4077 if (!REGNO_OK_FOR_BASE_P (REGNO (rs1))
4078 || (rs2 && !REGNO_OK_FOR_BASE_P (REGNO (rs2))))
4079 return 0;
4080 }
4081 else
4082 {
4083 if ((! SPARC_INT_REG_P (REGNO (rs1))
4084 && REGNO (rs1) != FRAME_POINTER_REGNUM
4085 && REGNO (rs1) < FIRST_PSEUDO_REGISTER)
4086 || (rs2
4087 && (! SPARC_INT_REG_P (REGNO (rs2))
4088 && REGNO (rs2) != FRAME_POINTER_REGNUM
4089 && REGNO (rs2) < FIRST_PSEUDO_REGISTER)))
4090 return 0;
4091 }
4092 return 1;
4093 }
4094
4095 /* Return the SYMBOL_REF for the tls_get_addr function. */
4096
4097 static GTY(()) rtx sparc_tls_symbol = NULL_RTX;
4098
4099 static rtx
sparc_tls_get_addr(void)4100 sparc_tls_get_addr (void)
4101 {
4102 if (!sparc_tls_symbol)
4103 sparc_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tls_get_addr");
4104
4105 return sparc_tls_symbol;
4106 }
4107
4108 /* Return the Global Offset Table to be used in TLS mode. */
4109
4110 static rtx
sparc_tls_got(void)4111 sparc_tls_got (void)
4112 {
4113 /* In PIC mode, this is just the PIC offset table. */
4114 if (flag_pic)
4115 {
4116 crtl->uses_pic_offset_table = 1;
4117 return pic_offset_table_rtx;
4118 }
4119
4120 /* In non-PIC mode, Sun as (unlike GNU as) emits PC-relative relocations for
4121 the GOT symbol with the 32-bit ABI, so we reload the GOT register. */
4122 if (TARGET_SUN_TLS && TARGET_ARCH32)
4123 {
4124 load_got_register ();
4125 return global_offset_table_rtx;
4126 }
4127
4128 /* In all other cases, we load a new pseudo with the GOT symbol. */
4129 return copy_to_reg (sparc_got ());
4130 }
4131
4132 /* Return true if X contains a thread-local symbol. */
4133
4134 static bool
sparc_tls_referenced_p(rtx x)4135 sparc_tls_referenced_p (rtx x)
4136 {
4137 if (!TARGET_HAVE_TLS)
4138 return false;
4139
4140 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
4141 x = XEXP (XEXP (x, 0), 0);
4142
4143 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x))
4144 return true;
4145
4146 /* That's all we handle in sparc_legitimize_tls_address for now. */
4147 return false;
4148 }
4149
4150 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
4151 this (thread-local) address. */
4152
4153 static rtx
sparc_legitimize_tls_address(rtx addr)4154 sparc_legitimize_tls_address (rtx addr)
4155 {
4156 rtx temp1, temp2, temp3, ret, o0, got;
4157 rtx_insn *insn;
4158
4159 gcc_assert (can_create_pseudo_p ());
4160
4161 if (GET_CODE (addr) == SYMBOL_REF)
4162 switch (SYMBOL_REF_TLS_MODEL (addr))
4163 {
4164 case TLS_MODEL_GLOBAL_DYNAMIC:
4165 start_sequence ();
4166 temp1 = gen_reg_rtx (SImode);
4167 temp2 = gen_reg_rtx (SImode);
4168 ret = gen_reg_rtx (Pmode);
4169 o0 = gen_rtx_REG (Pmode, 8);
4170 got = sparc_tls_got ();
4171 emit_insn (gen_tgd_hi22 (temp1, addr));
4172 emit_insn (gen_tgd_lo10 (temp2, temp1, addr));
4173 if (TARGET_ARCH32)
4174 {
4175 emit_insn (gen_tgd_add32 (o0, got, temp2, addr));
4176 insn = emit_call_insn (gen_tgd_call32 (o0, sparc_tls_get_addr (),
4177 addr, const1_rtx));
4178 }
4179 else
4180 {
4181 emit_insn (gen_tgd_add64 (o0, got, temp2, addr));
4182 insn = emit_call_insn (gen_tgd_call64 (o0, sparc_tls_get_addr (),
4183 addr, const1_rtx));
4184 }
4185 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), o0);
4186 insn = get_insns ();
4187 end_sequence ();
4188 emit_libcall_block (insn, ret, o0, addr);
4189 break;
4190
4191 case TLS_MODEL_LOCAL_DYNAMIC:
4192 start_sequence ();
4193 temp1 = gen_reg_rtx (SImode);
4194 temp2 = gen_reg_rtx (SImode);
4195 temp3 = gen_reg_rtx (Pmode);
4196 ret = gen_reg_rtx (Pmode);
4197 o0 = gen_rtx_REG (Pmode, 8);
4198 got = sparc_tls_got ();
4199 emit_insn (gen_tldm_hi22 (temp1));
4200 emit_insn (gen_tldm_lo10 (temp2, temp1));
4201 if (TARGET_ARCH32)
4202 {
4203 emit_insn (gen_tldm_add32 (o0, got, temp2));
4204 insn = emit_call_insn (gen_tldm_call32 (o0, sparc_tls_get_addr (),
4205 const1_rtx));
4206 }
4207 else
4208 {
4209 emit_insn (gen_tldm_add64 (o0, got, temp2));
4210 insn = emit_call_insn (gen_tldm_call64 (o0, sparc_tls_get_addr (),
4211 const1_rtx));
4212 }
4213 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), o0);
4214 insn = get_insns ();
4215 end_sequence ();
4216 emit_libcall_block (insn, temp3, o0,
4217 gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
4218 UNSPEC_TLSLD_BASE));
4219 temp1 = gen_reg_rtx (SImode);
4220 temp2 = gen_reg_rtx (SImode);
4221 emit_insn (gen_tldo_hix22 (temp1, addr));
4222 emit_insn (gen_tldo_lox10 (temp2, temp1, addr));
4223 if (TARGET_ARCH32)
4224 emit_insn (gen_tldo_add32 (ret, temp3, temp2, addr));
4225 else
4226 emit_insn (gen_tldo_add64 (ret, temp3, temp2, addr));
4227 break;
4228
4229 case TLS_MODEL_INITIAL_EXEC:
4230 temp1 = gen_reg_rtx (SImode);
4231 temp2 = gen_reg_rtx (SImode);
4232 temp3 = gen_reg_rtx (Pmode);
4233 got = sparc_tls_got ();
4234 emit_insn (gen_tie_hi22 (temp1, addr));
4235 emit_insn (gen_tie_lo10 (temp2, temp1, addr));
4236 if (TARGET_ARCH32)
4237 emit_insn (gen_tie_ld32 (temp3, got, temp2, addr));
4238 else
4239 emit_insn (gen_tie_ld64 (temp3, got, temp2, addr));
4240 if (TARGET_SUN_TLS)
4241 {
4242 ret = gen_reg_rtx (Pmode);
4243 if (TARGET_ARCH32)
4244 emit_insn (gen_tie_add32 (ret, gen_rtx_REG (Pmode, 7),
4245 temp3, addr));
4246 else
4247 emit_insn (gen_tie_add64 (ret, gen_rtx_REG (Pmode, 7),
4248 temp3, addr));
4249 }
4250 else
4251 ret = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, 7), temp3);
4252 break;
4253
4254 case TLS_MODEL_LOCAL_EXEC:
4255 temp1 = gen_reg_rtx (Pmode);
4256 temp2 = gen_reg_rtx (Pmode);
4257 if (TARGET_ARCH32)
4258 {
4259 emit_insn (gen_tle_hix22_sp32 (temp1, addr));
4260 emit_insn (gen_tle_lox10_sp32 (temp2, temp1, addr));
4261 }
4262 else
4263 {
4264 emit_insn (gen_tle_hix22_sp64 (temp1, addr));
4265 emit_insn (gen_tle_lox10_sp64 (temp2, temp1, addr));
4266 }
4267 ret = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, 7), temp2);
4268 break;
4269
4270 default:
4271 gcc_unreachable ();
4272 }
4273
4274 else if (GET_CODE (addr) == CONST)
4275 {
4276 rtx base, offset;
4277
4278 gcc_assert (GET_CODE (XEXP (addr, 0)) == PLUS);
4279
4280 base = sparc_legitimize_tls_address (XEXP (XEXP (addr, 0), 0));
4281 offset = XEXP (XEXP (addr, 0), 1);
4282
4283 base = force_operand (base, NULL_RTX);
4284 if (!(GET_CODE (offset) == CONST_INT && SMALL_INT (offset)))
4285 offset = force_reg (Pmode, offset);
4286 ret = gen_rtx_PLUS (Pmode, base, offset);
4287 }
4288
4289 else
4290 gcc_unreachable (); /* for now ... */
4291
4292 return ret;
4293 }
4294
4295 /* Legitimize PIC addresses. If the address is already position-independent,
4296 we return ORIG. Newly generated position-independent addresses go into a
4297 reg. This is REG if nonzero, otherwise we allocate register(s) as
4298 necessary. */
4299
4300 static rtx
sparc_legitimize_pic_address(rtx orig,rtx reg)4301 sparc_legitimize_pic_address (rtx orig, rtx reg)
4302 {
4303 if (GET_CODE (orig) == SYMBOL_REF
4304 /* See the comment in sparc_expand_move. */
4305 || (GET_CODE (orig) == LABEL_REF && !can_use_mov_pic_label_ref (orig)))
4306 {
4307 bool gotdata_op = false;
4308 rtx pic_ref, address;
4309 rtx_insn *insn;
4310
4311 if (!reg)
4312 {
4313 gcc_assert (can_create_pseudo_p ());
4314 reg = gen_reg_rtx (Pmode);
4315 }
4316
4317 if (flag_pic == 2)
4318 {
4319 /* If not during reload, allocate another temp reg here for loading
4320 in the address, so that these instructions can be optimized
4321 properly. */
4322 rtx temp_reg = can_create_pseudo_p () ? gen_reg_rtx (Pmode) : reg;
4323
4324 /* Must put the SYMBOL_REF inside an UNSPEC here so that cse
4325 won't get confused into thinking that these two instructions
4326 are loading in the true address of the symbol. If in the
4327 future a PIC rtx exists, that should be used instead. */
4328 if (TARGET_ARCH64)
4329 {
4330 emit_insn (gen_movdi_high_pic (temp_reg, orig));
4331 emit_insn (gen_movdi_lo_sum_pic (temp_reg, temp_reg, orig));
4332 }
4333 else
4334 {
4335 emit_insn (gen_movsi_high_pic (temp_reg, orig));
4336 emit_insn (gen_movsi_lo_sum_pic (temp_reg, temp_reg, orig));
4337 }
4338
4339 address = temp_reg;
4340 gotdata_op = true;
4341 }
4342 else
4343 address = orig;
4344
4345 crtl->uses_pic_offset_table = 1;
4346 if (gotdata_op)
4347 {
4348 if (TARGET_ARCH64)
4349 insn = emit_insn (gen_movdi_pic_gotdata_op (reg,
4350 pic_offset_table_rtx,
4351 address, orig));
4352 else
4353 insn = emit_insn (gen_movsi_pic_gotdata_op (reg,
4354 pic_offset_table_rtx,
4355 address, orig));
4356 }
4357 else
4358 {
4359 pic_ref
4360 = gen_const_mem (Pmode,
4361 gen_rtx_PLUS (Pmode,
4362 pic_offset_table_rtx, address));
4363 insn = emit_move_insn (reg, pic_ref);
4364 }
4365
4366 /* Put a REG_EQUAL note on this insn, so that it can be optimized
4367 by loop. */
4368 set_unique_reg_note (insn, REG_EQUAL, orig);
4369 return reg;
4370 }
4371 else if (GET_CODE (orig) == CONST)
4372 {
4373 rtx base, offset;
4374
4375 if (GET_CODE (XEXP (orig, 0)) == PLUS
4376 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
4377 return orig;
4378
4379 if (!reg)
4380 {
4381 gcc_assert (can_create_pseudo_p ());
4382 reg = gen_reg_rtx (Pmode);
4383 }
4384
4385 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
4386 base = sparc_legitimize_pic_address (XEXP (XEXP (orig, 0), 0), reg);
4387 offset = sparc_legitimize_pic_address (XEXP (XEXP (orig, 0), 1),
4388 base == reg ? NULL_RTX : reg);
4389
4390 if (GET_CODE (offset) == CONST_INT)
4391 {
4392 if (SMALL_INT (offset))
4393 return plus_constant (Pmode, base, INTVAL (offset));
4394 else if (can_create_pseudo_p ())
4395 offset = force_reg (Pmode, offset);
4396 else
4397 /* If we reach here, then something is seriously wrong. */
4398 gcc_unreachable ();
4399 }
4400 return gen_rtx_PLUS (Pmode, base, offset);
4401 }
4402 else if (GET_CODE (orig) == LABEL_REF)
4403 /* ??? We ought to be checking that the register is live instead, in case
4404 it is eliminated. */
4405 crtl->uses_pic_offset_table = 1;
4406
4407 return orig;
4408 }
4409
4410 /* Try machine-dependent ways of modifying an illegitimate address X
4411 to be legitimate. If we find one, return the new, valid address.
4412
4413 OLDX is the address as it was before break_out_memory_refs was called.
4414 In some cases it is useful to look at this to decide what needs to be done.
4415
4416 MODE is the mode of the operand pointed to by X.
4417
4418 On SPARC, change REG+N into REG+REG, and REG+(X*Y) into REG+REG. */
4419
4420 static rtx
sparc_legitimize_address(rtx x,rtx oldx ATTRIBUTE_UNUSED,machine_mode mode)4421 sparc_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
4422 machine_mode mode)
4423 {
4424 rtx orig_x = x;
4425
4426 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == MULT)
4427 x = gen_rtx_PLUS (Pmode, XEXP (x, 1),
4428 force_operand (XEXP (x, 0), NULL_RTX));
4429 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 1)) == MULT)
4430 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
4431 force_operand (XEXP (x, 1), NULL_RTX));
4432 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS)
4433 x = gen_rtx_PLUS (Pmode, force_operand (XEXP (x, 0), NULL_RTX),
4434 XEXP (x, 1));
4435 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 1)) == PLUS)
4436 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
4437 force_operand (XEXP (x, 1), NULL_RTX));
4438
4439 if (x != orig_x && sparc_legitimate_address_p (mode, x, FALSE))
4440 return x;
4441
4442 if (sparc_tls_referenced_p (x))
4443 x = sparc_legitimize_tls_address (x);
4444 else if (flag_pic)
4445 x = sparc_legitimize_pic_address (x, NULL_RTX);
4446 else if (GET_CODE (x) == PLUS && CONSTANT_ADDRESS_P (XEXP (x, 1)))
4447 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
4448 copy_to_mode_reg (Pmode, XEXP (x, 1)));
4449 else if (GET_CODE (x) == PLUS && CONSTANT_ADDRESS_P (XEXP (x, 0)))
4450 x = gen_rtx_PLUS (Pmode, XEXP (x, 1),
4451 copy_to_mode_reg (Pmode, XEXP (x, 0)));
4452 else if (GET_CODE (x) == SYMBOL_REF
4453 || GET_CODE (x) == CONST
4454 || GET_CODE (x) == LABEL_REF)
4455 x = copy_to_suggested_reg (x, NULL_RTX, Pmode);
4456
4457 return x;
4458 }
4459
4460 /* Delegitimize an address that was legitimized by the above function. */
4461
4462 static rtx
sparc_delegitimize_address(rtx x)4463 sparc_delegitimize_address (rtx x)
4464 {
4465 x = delegitimize_mem_from_attrs (x);
4466
4467 if (GET_CODE (x) == LO_SUM && GET_CODE (XEXP (x, 1)) == UNSPEC)
4468 switch (XINT (XEXP (x, 1), 1))
4469 {
4470 case UNSPEC_MOVE_PIC:
4471 case UNSPEC_TLSLE:
4472 x = XVECEXP (XEXP (x, 1), 0, 0);
4473 gcc_assert (GET_CODE (x) == SYMBOL_REF);
4474 break;
4475 default:
4476 break;
4477 }
4478
4479 /* This is generated by mov{si,di}_pic_label_ref in PIC mode. */
4480 if (GET_CODE (x) == MINUS
4481 && REG_P (XEXP (x, 0))
4482 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM
4483 && GET_CODE (XEXP (x, 1)) == LO_SUM
4484 && GET_CODE (XEXP (XEXP (x, 1), 1)) == UNSPEC
4485 && XINT (XEXP (XEXP (x, 1), 1), 1) == UNSPEC_MOVE_PIC_LABEL)
4486 {
4487 x = XVECEXP (XEXP (XEXP (x, 1), 1), 0, 0);
4488 gcc_assert (GET_CODE (x) == LABEL_REF
4489 || (GET_CODE (x) == CONST
4490 && GET_CODE (XEXP (x, 0)) == PLUS
4491 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
4492 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT));
4493 }
4494
4495 return x;
4496 }
4497
4498 /* SPARC implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
4499 replace the input X, or the original X if no replacement is called for.
4500 The output parameter *WIN is 1 if the calling macro should goto WIN,
4501 0 if it should not.
4502
4503 For SPARC, we wish to handle addresses by splitting them into
4504 HIGH+LO_SUM pairs, retaining the LO_SUM in the memory reference.
4505 This cuts the number of extra insns by one.
4506
4507 Do nothing when generating PIC code and the address is a symbolic
4508 operand or requires a scratch register. */
4509
4510 rtx
sparc_legitimize_reload_address(rtx x,machine_mode mode,int opnum,int type,int ind_levels ATTRIBUTE_UNUSED,int * win)4511 sparc_legitimize_reload_address (rtx x, machine_mode mode,
4512 int opnum, int type,
4513 int ind_levels ATTRIBUTE_UNUSED, int *win)
4514 {
4515 /* Decompose SImode constants into HIGH+LO_SUM. */
4516 if (CONSTANT_P (x)
4517 && (mode != TFmode || TARGET_ARCH64)
4518 && GET_MODE (x) == SImode
4519 && GET_CODE (x) != LO_SUM
4520 && GET_CODE (x) != HIGH
4521 && sparc_cmodel <= CM_MEDLOW
4522 && !(flag_pic
4523 && (symbolic_operand (x, Pmode) || pic_address_needs_scratch (x))))
4524 {
4525 x = gen_rtx_LO_SUM (GET_MODE (x), gen_rtx_HIGH (GET_MODE (x), x), x);
4526 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4527 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4528 opnum, (enum reload_type)type);
4529 *win = 1;
4530 return x;
4531 }
4532
4533 /* We have to recognize what we have already generated above. */
4534 if (GET_CODE (x) == LO_SUM && GET_CODE (XEXP (x, 0)) == HIGH)
4535 {
4536 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4537 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4538 opnum, (enum reload_type)type);
4539 *win = 1;
4540 return x;
4541 }
4542
4543 *win = 0;
4544 return x;
4545 }
4546
4547 /* Return true if ADDR (a legitimate address expression)
4548 has an effect that depends on the machine mode it is used for.
4549
4550 In PIC mode,
4551
4552 (mem:HI [%l7+a])
4553
4554 is not equivalent to
4555
4556 (mem:QI [%l7+a]) (mem:QI [%l7+a+1])
4557
4558 because [%l7+a+1] is interpreted as the address of (a+1). */
4559
4560
4561 static bool
sparc_mode_dependent_address_p(const_rtx addr,addr_space_t as ATTRIBUTE_UNUSED)4562 sparc_mode_dependent_address_p (const_rtx addr,
4563 addr_space_t as ATTRIBUTE_UNUSED)
4564 {
4565 if (flag_pic && GET_CODE (addr) == PLUS)
4566 {
4567 rtx op0 = XEXP (addr, 0);
4568 rtx op1 = XEXP (addr, 1);
4569 if (op0 == pic_offset_table_rtx
4570 && symbolic_operand (op1, VOIDmode))
4571 return true;
4572 }
4573
4574 return false;
4575 }
4576
4577 #ifdef HAVE_GAS_HIDDEN
4578 # define USE_HIDDEN_LINKONCE 1
4579 #else
4580 # define USE_HIDDEN_LINKONCE 0
4581 #endif
4582
4583 static void
get_pc_thunk_name(char name[32],unsigned int regno)4584 get_pc_thunk_name (char name[32], unsigned int regno)
4585 {
4586 const char *reg_name = reg_names[regno];
4587
4588 /* Skip the leading '%' as that cannot be used in a
4589 symbol name. */
4590 reg_name += 1;
4591
4592 if (USE_HIDDEN_LINKONCE)
4593 sprintf (name, "__sparc_get_pc_thunk.%s", reg_name);
4594 else
4595 ASM_GENERATE_INTERNAL_LABEL (name, "LADDPC", regno);
4596 }
4597
4598 /* Wrapper around the load_pcrel_sym{si,di} patterns. */
4599
4600 static rtx
gen_load_pcrel_sym(rtx op0,rtx op1,rtx op2,rtx op3)4601 gen_load_pcrel_sym (rtx op0, rtx op1, rtx op2, rtx op3)
4602 {
4603 int orig_flag_pic = flag_pic;
4604 rtx insn;
4605
4606 /* The load_pcrel_sym{si,di} patterns require absolute addressing. */
4607 flag_pic = 0;
4608 if (TARGET_ARCH64)
4609 insn = gen_load_pcrel_symdi (op0, op1, op2, op3);
4610 else
4611 insn = gen_load_pcrel_symsi (op0, op1, op2, op3);
4612 flag_pic = orig_flag_pic;
4613
4614 return insn;
4615 }
4616
4617 /* Emit code to load the GOT register. */
4618
4619 void
load_got_register(void)4620 load_got_register (void)
4621 {
4622 /* In PIC mode, this will retrieve pic_offset_table_rtx. */
4623 if (!global_offset_table_rtx)
4624 global_offset_table_rtx = gen_rtx_REG (Pmode, GLOBAL_OFFSET_TABLE_REGNUM);
4625
4626 if (TARGET_VXWORKS_RTP)
4627 emit_insn (gen_vxworks_load_got ());
4628 else
4629 {
4630 /* The GOT symbol is subject to a PC-relative relocation so we need a
4631 helper function to add the PC value and thus get the final value. */
4632 if (!got_helper_rtx)
4633 {
4634 char name[32];
4635 get_pc_thunk_name (name, GLOBAL_OFFSET_TABLE_REGNUM);
4636 got_helper_rtx = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
4637 }
4638
4639 emit_insn (gen_load_pcrel_sym (global_offset_table_rtx, sparc_got (),
4640 got_helper_rtx,
4641 GEN_INT (GLOBAL_OFFSET_TABLE_REGNUM)));
4642 }
4643
4644 /* Need to emit this whether or not we obey regdecls,
4645 since setjmp/longjmp can cause life info to screw up.
4646 ??? In the case where we don't obey regdecls, this is not sufficient
4647 since we may not fall out the bottom. */
4648 emit_use (global_offset_table_rtx);
4649 }
4650
4651 /* Emit a call instruction with the pattern given by PAT. ADDR is the
4652 address of the call target. */
4653
4654 void
sparc_emit_call_insn(rtx pat,rtx addr)4655 sparc_emit_call_insn (rtx pat, rtx addr)
4656 {
4657 rtx_insn *insn;
4658
4659 insn = emit_call_insn (pat);
4660
4661 /* The PIC register is live on entry to VxWorks PIC PLT entries. */
4662 if (TARGET_VXWORKS_RTP
4663 && flag_pic
4664 && GET_CODE (addr) == SYMBOL_REF
4665 && (SYMBOL_REF_DECL (addr)
4666 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
4667 : !SYMBOL_REF_LOCAL_P (addr)))
4668 {
4669 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx);
4670 crtl->uses_pic_offset_table = 1;
4671 }
4672 }
4673
4674 /* Return 1 if RTX is a MEM which is known to be aligned to at
4675 least a DESIRED byte boundary. */
4676
4677 int
mem_min_alignment(rtx mem,int desired)4678 mem_min_alignment (rtx mem, int desired)
4679 {
4680 rtx addr, base, offset;
4681
4682 /* If it's not a MEM we can't accept it. */
4683 if (GET_CODE (mem) != MEM)
4684 return 0;
4685
4686 /* Obviously... */
4687 if (!TARGET_UNALIGNED_DOUBLES
4688 && MEM_ALIGN (mem) / BITS_PER_UNIT >= (unsigned)desired)
4689 return 1;
4690
4691 /* ??? The rest of the function predates MEM_ALIGN so
4692 there is probably a bit of redundancy. */
4693 addr = XEXP (mem, 0);
4694 base = offset = NULL_RTX;
4695 if (GET_CODE (addr) == PLUS)
4696 {
4697 if (GET_CODE (XEXP (addr, 0)) == REG)
4698 {
4699 base = XEXP (addr, 0);
4700
4701 /* What we are saying here is that if the base
4702 REG is aligned properly, the compiler will make
4703 sure any REG based index upon it will be so
4704 as well. */
4705 if (GET_CODE (XEXP (addr, 1)) == CONST_INT)
4706 offset = XEXP (addr, 1);
4707 else
4708 offset = const0_rtx;
4709 }
4710 }
4711 else if (GET_CODE (addr) == REG)
4712 {
4713 base = addr;
4714 offset = const0_rtx;
4715 }
4716
4717 if (base != NULL_RTX)
4718 {
4719 int regno = REGNO (base);
4720
4721 if (regno != HARD_FRAME_POINTER_REGNUM && regno != STACK_POINTER_REGNUM)
4722 {
4723 /* Check if the compiler has recorded some information
4724 about the alignment of the base REG. If reload has
4725 completed, we already matched with proper alignments.
4726 If not running global_alloc, reload might give us
4727 unaligned pointer to local stack though. */
4728 if (((cfun != 0
4729 && REGNO_POINTER_ALIGN (regno) >= desired * BITS_PER_UNIT)
4730 || (optimize && reload_completed))
4731 && (INTVAL (offset) & (desired - 1)) == 0)
4732 return 1;
4733 }
4734 else
4735 {
4736 if (((INTVAL (offset) - SPARC_STACK_BIAS) & (desired - 1)) == 0)
4737 return 1;
4738 }
4739 }
4740 else if (! TARGET_UNALIGNED_DOUBLES
4741 || CONSTANT_P (addr)
4742 || GET_CODE (addr) == LO_SUM)
4743 {
4744 /* Anything else we know is properly aligned unless TARGET_UNALIGNED_DOUBLES
4745 is true, in which case we can only assume that an access is aligned if
4746 it is to a constant address, or the address involves a LO_SUM. */
4747 return 1;
4748 }
4749
4750 /* An obviously unaligned address. */
4751 return 0;
4752 }
4753
4754
4755 /* Vectors to keep interesting information about registers where it can easily
4756 be got. We used to use the actual mode value as the bit number, but there
4757 are more than 32 modes now. Instead we use two tables: one indexed by
4758 hard register number, and one indexed by mode. */
4759
4760 /* The purpose of sparc_mode_class is to shrink the range of modes so that
4761 they all fit (as bit numbers) in a 32-bit word (again). Each real mode is
4762 mapped into one sparc_mode_class mode. */
4763
4764 enum sparc_mode_class {
4765 H_MODE, S_MODE, D_MODE, T_MODE, O_MODE,
4766 SF_MODE, DF_MODE, TF_MODE, OF_MODE,
4767 CC_MODE, CCFP_MODE
4768 };
4769
4770 /* Modes for single-word and smaller quantities. */
4771 #define S_MODES \
4772 ((1 << (int) H_MODE) | (1 << (int) S_MODE) | (1 << (int) SF_MODE))
4773
4774 /* Modes for double-word and smaller quantities. */
4775 #define D_MODES (S_MODES | (1 << (int) D_MODE) | (1 << DF_MODE))
4776
4777 /* Modes for quad-word and smaller quantities. */
4778 #define T_MODES (D_MODES | (1 << (int) T_MODE) | (1 << (int) TF_MODE))
4779
4780 /* Modes for 8-word and smaller quantities. */
4781 #define O_MODES (T_MODES | (1 << (int) O_MODE) | (1 << (int) OF_MODE))
4782
4783 /* Modes for single-float quantities. */
4784 #define SF_MODES ((1 << (int) S_MODE) | (1 << (int) SF_MODE))
4785
4786 /* Modes for double-float and smaller quantities. */
4787 #define DF_MODES (SF_MODES | (1 << (int) D_MODE) | (1 << DF_MODE))
4788
4789 /* Modes for quad-float and smaller quantities. */
4790 #define TF_MODES (DF_MODES | (1 << (int) TF_MODE))
4791
4792 /* Modes for quad-float pairs and smaller quantities. */
4793 #define OF_MODES (TF_MODES | (1 << (int) OF_MODE))
4794
4795 /* Modes for double-float only quantities. */
4796 #define DF_MODES_NO_S ((1 << (int) D_MODE) | (1 << (int) DF_MODE))
4797
4798 /* Modes for quad-float and double-float only quantities. */
4799 #define TF_MODES_NO_S (DF_MODES_NO_S | (1 << (int) TF_MODE))
4800
4801 /* Modes for quad-float pairs and double-float only quantities. */
4802 #define OF_MODES_NO_S (TF_MODES_NO_S | (1 << (int) OF_MODE))
4803
4804 /* Modes for condition codes. */
4805 #define CC_MODES (1 << (int) CC_MODE)
4806 #define CCFP_MODES (1 << (int) CCFP_MODE)
4807
4808 /* Value is 1 if register/mode pair is acceptable on sparc.
4809
4810 The funny mixture of D and T modes is because integer operations
4811 do not specially operate on tetra quantities, so non-quad-aligned
4812 registers can hold quadword quantities (except %o4 and %i4 because
4813 they cross fixed registers).
4814
4815 ??? Note that, despite the settings, non-double-aligned parameter
4816 registers can hold double-word quantities in 32-bit mode. */
4817
4818 /* This points to either the 32 bit or the 64 bit version. */
4819 const int *hard_regno_mode_classes;
4820
4821 static const int hard_32bit_mode_classes[] = {
4822 S_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES,
4823 T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES,
4824 T_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES,
4825 T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES,
4826
4827 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4828 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4829 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4830 OF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
4831
4832 /* FP regs f32 to f63. Only the even numbered registers actually exist,
4833 and none can hold SFmode/SImode values. */
4834 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4835 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4836 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4837 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, TF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4838
4839 /* %fcc[0123] */
4840 CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES,
4841
4842 /* %icc, %sfp, %gsr */
4843 CC_MODES, 0, D_MODES
4844 };
4845
4846 static const int hard_64bit_mode_classes[] = {
4847 D_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
4848 O_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
4849 T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
4850 O_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
4851
4852 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4853 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4854 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4855 OF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
4856
4857 /* FP regs f32 to f63. Only the even numbered registers actually exist,
4858 and none can hold SFmode/SImode values. */
4859 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4860 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4861 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4862 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, TF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4863
4864 /* %fcc[0123] */
4865 CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES,
4866
4867 /* %icc, %sfp, %gsr */
4868 CC_MODES, 0, D_MODES
4869 };
4870
4871 int sparc_mode_class [NUM_MACHINE_MODES];
4872
4873 enum reg_class sparc_regno_reg_class[FIRST_PSEUDO_REGISTER];
4874
4875 static void
sparc_init_modes(void)4876 sparc_init_modes (void)
4877 {
4878 int i;
4879
4880 for (i = 0; i < NUM_MACHINE_MODES; i++)
4881 {
4882 machine_mode m = (machine_mode) i;
4883 unsigned int size = GET_MODE_SIZE (m);
4884
4885 switch (GET_MODE_CLASS (m))
4886 {
4887 case MODE_INT:
4888 case MODE_PARTIAL_INT:
4889 case MODE_COMPLEX_INT:
4890 if (size < 4)
4891 sparc_mode_class[i] = 1 << (int) H_MODE;
4892 else if (size == 4)
4893 sparc_mode_class[i] = 1 << (int) S_MODE;
4894 else if (size == 8)
4895 sparc_mode_class[i] = 1 << (int) D_MODE;
4896 else if (size == 16)
4897 sparc_mode_class[i] = 1 << (int) T_MODE;
4898 else if (size == 32)
4899 sparc_mode_class[i] = 1 << (int) O_MODE;
4900 else
4901 sparc_mode_class[i] = 0;
4902 break;
4903 case MODE_VECTOR_INT:
4904 if (size == 4)
4905 sparc_mode_class[i] = 1 << (int) SF_MODE;
4906 else if (size == 8)
4907 sparc_mode_class[i] = 1 << (int) DF_MODE;
4908 else
4909 sparc_mode_class[i] = 0;
4910 break;
4911 case MODE_FLOAT:
4912 case MODE_COMPLEX_FLOAT:
4913 if (size == 4)
4914 sparc_mode_class[i] = 1 << (int) SF_MODE;
4915 else if (size == 8)
4916 sparc_mode_class[i] = 1 << (int) DF_MODE;
4917 else if (size == 16)
4918 sparc_mode_class[i] = 1 << (int) TF_MODE;
4919 else if (size == 32)
4920 sparc_mode_class[i] = 1 << (int) OF_MODE;
4921 else
4922 sparc_mode_class[i] = 0;
4923 break;
4924 case MODE_CC:
4925 if (m == CCFPmode || m == CCFPEmode)
4926 sparc_mode_class[i] = 1 << (int) CCFP_MODE;
4927 else
4928 sparc_mode_class[i] = 1 << (int) CC_MODE;
4929 break;
4930 default:
4931 sparc_mode_class[i] = 0;
4932 break;
4933 }
4934 }
4935
4936 if (TARGET_ARCH64)
4937 hard_regno_mode_classes = hard_64bit_mode_classes;
4938 else
4939 hard_regno_mode_classes = hard_32bit_mode_classes;
4940
4941 /* Initialize the array used by REGNO_REG_CLASS. */
4942 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4943 {
4944 if (i < 16 && TARGET_V8PLUS)
4945 sparc_regno_reg_class[i] = I64_REGS;
4946 else if (i < 32 || i == FRAME_POINTER_REGNUM)
4947 sparc_regno_reg_class[i] = GENERAL_REGS;
4948 else if (i < 64)
4949 sparc_regno_reg_class[i] = FP_REGS;
4950 else if (i < 96)
4951 sparc_regno_reg_class[i] = EXTRA_FP_REGS;
4952 else if (i < 100)
4953 sparc_regno_reg_class[i] = FPCC_REGS;
4954 else
4955 sparc_regno_reg_class[i] = NO_REGS;
4956 }
4957 }
4958
4959 /* Return whether REGNO, a global or FP register, must be saved/restored. */
4960
4961 static inline bool
save_global_or_fp_reg_p(unsigned int regno,int leaf_function ATTRIBUTE_UNUSED)4962 save_global_or_fp_reg_p (unsigned int regno,
4963 int leaf_function ATTRIBUTE_UNUSED)
4964 {
4965 return !call_used_regs[regno] && df_regs_ever_live_p (regno);
4966 }
4967
4968 /* Return whether the return address register (%i7) is needed. */
4969
4970 static inline bool
return_addr_reg_needed_p(int leaf_function)4971 return_addr_reg_needed_p (int leaf_function)
4972 {
4973 /* If it is live, for example because of __builtin_return_address (0). */
4974 if (df_regs_ever_live_p (RETURN_ADDR_REGNUM))
4975 return true;
4976
4977 /* Otherwise, it is needed as save register if %o7 is clobbered. */
4978 if (!leaf_function
4979 /* Loading the GOT register clobbers %o7. */
4980 || crtl->uses_pic_offset_table
4981 || df_regs_ever_live_p (INCOMING_RETURN_ADDR_REGNUM))
4982 return true;
4983
4984 return false;
4985 }
4986
4987 /* Return whether REGNO, a local or in register, must be saved/restored. */
4988
4989 static bool
save_local_or_in_reg_p(unsigned int regno,int leaf_function)4990 save_local_or_in_reg_p (unsigned int regno, int leaf_function)
4991 {
4992 /* General case: call-saved registers live at some point. */
4993 if (!call_used_regs[regno] && df_regs_ever_live_p (regno))
4994 return true;
4995
4996 /* Frame pointer register (%fp) if needed. */
4997 if (regno == HARD_FRAME_POINTER_REGNUM && frame_pointer_needed)
4998 return true;
4999
5000 /* Return address register (%i7) if needed. */
5001 if (regno == RETURN_ADDR_REGNUM && return_addr_reg_needed_p (leaf_function))
5002 return true;
5003
5004 /* GOT register (%l7) if needed. */
5005 if (regno == PIC_OFFSET_TABLE_REGNUM && crtl->uses_pic_offset_table)
5006 return true;
5007
5008 /* If the function accesses prior frames, the frame pointer and the return
5009 address of the previous frame must be saved on the stack. */
5010 if (crtl->accesses_prior_frames
5011 && (regno == HARD_FRAME_POINTER_REGNUM || regno == RETURN_ADDR_REGNUM))
5012 return true;
5013
5014 return false;
5015 }
5016
5017 /* Compute the frame size required by the function. This function is called
5018 during the reload pass and also by sparc_expand_prologue. */
5019
5020 HOST_WIDE_INT
sparc_compute_frame_size(HOST_WIDE_INT size,int leaf_function)5021 sparc_compute_frame_size (HOST_WIDE_INT size, int leaf_function)
5022 {
5023 HOST_WIDE_INT frame_size, apparent_frame_size;
5024 int args_size, n_global_fp_regs = 0;
5025 bool save_local_in_regs_p = false;
5026 unsigned int i;
5027
5028 /* If the function allocates dynamic stack space, the dynamic offset is
5029 computed early and contains REG_PARM_STACK_SPACE, so we need to cope. */
5030 if (leaf_function && !cfun->calls_alloca)
5031 args_size = 0;
5032 else
5033 args_size = crtl->outgoing_args_size + REG_PARM_STACK_SPACE (cfun->decl);
5034
5035 /* Calculate space needed for global registers. */
5036 if (TARGET_ARCH64)
5037 {
5038 for (i = 0; i < 8; i++)
5039 if (save_global_or_fp_reg_p (i, 0))
5040 n_global_fp_regs += 2;
5041 }
5042 else
5043 {
5044 for (i = 0; i < 8; i += 2)
5045 if (save_global_or_fp_reg_p (i, 0)
5046 || save_global_or_fp_reg_p (i + 1, 0))
5047 n_global_fp_regs += 2;
5048 }
5049
5050 /* In the flat window model, find out which local and in registers need to
5051 be saved. We don't reserve space in the current frame for them as they
5052 will be spilled into the register window save area of the caller's frame.
5053 However, as soon as we use this register window save area, we must create
5054 that of the current frame to make it the live one. */
5055 if (TARGET_FLAT)
5056 for (i = 16; i < 32; i++)
5057 if (save_local_or_in_reg_p (i, leaf_function))
5058 {
5059 save_local_in_regs_p = true;
5060 break;
5061 }
5062
5063 /* Calculate space needed for FP registers. */
5064 for (i = 32; i < (TARGET_V9 ? 96 : 64); i += 2)
5065 if (save_global_or_fp_reg_p (i, 0) || save_global_or_fp_reg_p (i + 1, 0))
5066 n_global_fp_regs += 2;
5067
5068 if (size == 0
5069 && n_global_fp_regs == 0
5070 && args_size == 0
5071 && !save_local_in_regs_p)
5072 frame_size = apparent_frame_size = 0;
5073 else
5074 {
5075 /* We subtract STARTING_FRAME_OFFSET, remember it's negative. */
5076 apparent_frame_size = ROUND_UP (size - STARTING_FRAME_OFFSET, 8);
5077 apparent_frame_size += n_global_fp_regs * 4;
5078
5079 /* We need to add the size of the outgoing argument area. */
5080 frame_size = apparent_frame_size + ROUND_UP (args_size, 8);
5081
5082 /* And that of the register window save area. */
5083 frame_size += FIRST_PARM_OFFSET (cfun->decl);
5084
5085 /* Finally, bump to the appropriate alignment. */
5086 frame_size = SPARC_STACK_ALIGN (frame_size);
5087 }
5088
5089 /* Set up values for use in prologue and epilogue. */
5090 sparc_frame_size = frame_size;
5091 sparc_apparent_frame_size = apparent_frame_size;
5092 sparc_n_global_fp_regs = n_global_fp_regs;
5093 sparc_save_local_in_regs_p = save_local_in_regs_p;
5094
5095 return frame_size;
5096 }
5097
5098 /* Implement the macro INITIAL_ELIMINATION_OFFSET, return the OFFSET. */
5099
5100 int
sparc_initial_elimination_offset(int to)5101 sparc_initial_elimination_offset (int to)
5102 {
5103 int offset;
5104
5105 if (to == STACK_POINTER_REGNUM)
5106 offset = sparc_compute_frame_size (get_frame_size (), crtl->is_leaf);
5107 else
5108 offset = 0;
5109
5110 offset += SPARC_STACK_BIAS;
5111 return offset;
5112 }
5113
5114 /* Output any necessary .register pseudo-ops. */
5115
5116 void
sparc_output_scratch_registers(FILE * file ATTRIBUTE_UNUSED)5117 sparc_output_scratch_registers (FILE *file ATTRIBUTE_UNUSED)
5118 {
5119 #ifdef HAVE_AS_REGISTER_PSEUDO_OP
5120 int i;
5121
5122 if (TARGET_ARCH32)
5123 return;
5124
5125 /* Check if %g[2367] were used without
5126 .register being printed for them already. */
5127 for (i = 2; i < 8; i++)
5128 {
5129 if (df_regs_ever_live_p (i)
5130 && ! sparc_hard_reg_printed [i])
5131 {
5132 sparc_hard_reg_printed [i] = 1;
5133 /* %g7 is used as TLS base register, use #ignore
5134 for it instead of #scratch. */
5135 fprintf (file, "\t.register\t%%g%d, #%s\n", i,
5136 i == 7 ? "ignore" : "scratch");
5137 }
5138 if (i == 3) i = 5;
5139 }
5140 #endif
5141 }
5142
5143 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
5144
5145 #if PROBE_INTERVAL > 4096
5146 #error Cannot use indexed addressing mode for stack probing
5147 #endif
5148
5149 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
5150 inclusive. These are offsets from the current stack pointer.
5151
5152 Note that we don't use the REG+REG addressing mode for the probes because
5153 of the stack bias in 64-bit mode. And it doesn't really buy us anything
5154 so the advantages of having a single code win here. */
5155
5156 static void
sparc_emit_probe_stack_range(HOST_WIDE_INT first,HOST_WIDE_INT size)5157 sparc_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
5158 {
5159 rtx g1 = gen_rtx_REG (Pmode, 1);
5160
5161 /* See if we have a constant small number of probes to generate. If so,
5162 that's the easy case. */
5163 if (size <= PROBE_INTERVAL)
5164 {
5165 emit_move_insn (g1, GEN_INT (first));
5166 emit_insn (gen_rtx_SET (g1,
5167 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
5168 emit_stack_probe (plus_constant (Pmode, g1, -size));
5169 }
5170
5171 /* The run-time loop is made up of 9 insns in the generic case while the
5172 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */
5173 else if (size <= 4 * PROBE_INTERVAL)
5174 {
5175 HOST_WIDE_INT i;
5176
5177 emit_move_insn (g1, GEN_INT (first + PROBE_INTERVAL));
5178 emit_insn (gen_rtx_SET (g1,
5179 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
5180 emit_stack_probe (g1);
5181
5182 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
5183 it exceeds SIZE. If only two probes are needed, this will not
5184 generate any code. Then probe at FIRST + SIZE. */
5185 for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
5186 {
5187 emit_insn (gen_rtx_SET (g1,
5188 plus_constant (Pmode, g1, -PROBE_INTERVAL)));
5189 emit_stack_probe (g1);
5190 }
5191
5192 emit_stack_probe (plus_constant (Pmode, g1,
5193 (i - PROBE_INTERVAL) - size));
5194 }
5195
5196 /* Otherwise, do the same as above, but in a loop. Note that we must be
5197 extra careful with variables wrapping around because we might be at
5198 the very top (or the very bottom) of the address space and we have
5199 to be able to handle this case properly; in particular, we use an
5200 equality test for the loop condition. */
5201 else
5202 {
5203 HOST_WIDE_INT rounded_size;
5204 rtx g4 = gen_rtx_REG (Pmode, 4);
5205
5206 emit_move_insn (g1, GEN_INT (first));
5207
5208
5209 /* Step 1: round SIZE to the previous multiple of the interval. */
5210
5211 rounded_size = ROUND_DOWN (size, PROBE_INTERVAL);
5212 emit_move_insn (g4, GEN_INT (rounded_size));
5213
5214
5215 /* Step 2: compute initial and final value of the loop counter. */
5216
5217 /* TEST_ADDR = SP + FIRST. */
5218 emit_insn (gen_rtx_SET (g1,
5219 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
5220
5221 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
5222 emit_insn (gen_rtx_SET (g4, gen_rtx_MINUS (Pmode, g1, g4)));
5223
5224
5225 /* Step 3: the loop
5226
5227 while (TEST_ADDR != LAST_ADDR)
5228 {
5229 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
5230 probe at TEST_ADDR
5231 }
5232
5233 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
5234 until it is equal to ROUNDED_SIZE. */
5235
5236 if (TARGET_ARCH64)
5237 emit_insn (gen_probe_stack_rangedi (g1, g1, g4));
5238 else
5239 emit_insn (gen_probe_stack_rangesi (g1, g1, g4));
5240
5241
5242 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
5243 that SIZE is equal to ROUNDED_SIZE. */
5244
5245 if (size != rounded_size)
5246 emit_stack_probe (plus_constant (Pmode, g4, rounded_size - size));
5247 }
5248
5249 /* Make sure nothing is scheduled before we are done. */
5250 emit_insn (gen_blockage ());
5251 }
5252
5253 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
5254 absolute addresses. */
5255
5256 const char *
output_probe_stack_range(rtx reg1,rtx reg2)5257 output_probe_stack_range (rtx reg1, rtx reg2)
5258 {
5259 static int labelno = 0;
5260 char loop_lab[32];
5261 rtx xops[2];
5262
5263 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
5264
5265 /* Loop. */
5266 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
5267
5268 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
5269 xops[0] = reg1;
5270 xops[1] = GEN_INT (-PROBE_INTERVAL);
5271 output_asm_insn ("add\t%0, %1, %0", xops);
5272
5273 /* Test if TEST_ADDR == LAST_ADDR. */
5274 xops[1] = reg2;
5275 output_asm_insn ("cmp\t%0, %1", xops);
5276
5277 /* Probe at TEST_ADDR and branch. */
5278 if (TARGET_ARCH64)
5279 fputs ("\tbne,pt\t%xcc,", asm_out_file);
5280 else
5281 fputs ("\tbne\t", asm_out_file);
5282 assemble_name_raw (asm_out_file, loop_lab);
5283 fputc ('\n', asm_out_file);
5284 xops[1] = GEN_INT (SPARC_STACK_BIAS);
5285 output_asm_insn (" st\t%%g0, [%0+%1]", xops);
5286
5287 return "";
5288 }
5289
5290 /* Emit code to save/restore registers from LOW to HIGH at BASE+OFFSET as
5291 needed. LOW is supposed to be double-word aligned for 32-bit registers.
5292 SAVE_P decides whether a register must be saved/restored. ACTION_TRUE
5293 is the action to be performed if SAVE_P returns true and ACTION_FALSE
5294 the action to be performed if it returns false. Return the new offset. */
5295
5296 typedef bool (*sorr_pred_t) (unsigned int, int);
5297 typedef enum { SORR_NONE, SORR_ADVANCE, SORR_SAVE, SORR_RESTORE } sorr_act_t;
5298
5299 static int
emit_save_or_restore_regs(unsigned int low,unsigned int high,rtx base,int offset,int leaf_function,sorr_pred_t save_p,sorr_act_t action_true,sorr_act_t action_false)5300 emit_save_or_restore_regs (unsigned int low, unsigned int high, rtx base,
5301 int offset, int leaf_function, sorr_pred_t save_p,
5302 sorr_act_t action_true, sorr_act_t action_false)
5303 {
5304 unsigned int i;
5305 rtx mem;
5306 rtx_insn *insn;
5307
5308 if (TARGET_ARCH64 && high <= 32)
5309 {
5310 int fp_offset = -1;
5311
5312 for (i = low; i < high; i++)
5313 {
5314 if (save_p (i, leaf_function))
5315 {
5316 mem = gen_frame_mem (DImode, plus_constant (Pmode,
5317 base, offset));
5318 if (action_true == SORR_SAVE)
5319 {
5320 insn = emit_move_insn (mem, gen_rtx_REG (DImode, i));
5321 RTX_FRAME_RELATED_P (insn) = 1;
5322 }
5323 else /* action_true == SORR_RESTORE */
5324 {
5325 /* The frame pointer must be restored last since its old
5326 value may be used as base address for the frame. This
5327 is problematic in 64-bit mode only because of the lack
5328 of double-word load instruction. */
5329 if (i == HARD_FRAME_POINTER_REGNUM)
5330 fp_offset = offset;
5331 else
5332 emit_move_insn (gen_rtx_REG (DImode, i), mem);
5333 }
5334 offset += 8;
5335 }
5336 else if (action_false == SORR_ADVANCE)
5337 offset += 8;
5338 }
5339
5340 if (fp_offset >= 0)
5341 {
5342 mem = gen_frame_mem (DImode, plus_constant (Pmode, base, fp_offset));
5343 emit_move_insn (hard_frame_pointer_rtx, mem);
5344 }
5345 }
5346 else
5347 {
5348 for (i = low; i < high; i += 2)
5349 {
5350 bool reg0 = save_p (i, leaf_function);
5351 bool reg1 = save_p (i + 1, leaf_function);
5352 machine_mode mode;
5353 int regno;
5354
5355 if (reg0 && reg1)
5356 {
5357 mode = SPARC_INT_REG_P (i) ? DImode : DFmode;
5358 regno = i;
5359 }
5360 else if (reg0)
5361 {
5362 mode = SPARC_INT_REG_P (i) ? SImode : SFmode;
5363 regno = i;
5364 }
5365 else if (reg1)
5366 {
5367 mode = SPARC_INT_REG_P (i) ? SImode : SFmode;
5368 regno = i + 1;
5369 offset += 4;
5370 }
5371 else
5372 {
5373 if (action_false == SORR_ADVANCE)
5374 offset += 8;
5375 continue;
5376 }
5377
5378 mem = gen_frame_mem (mode, plus_constant (Pmode, base, offset));
5379 if (action_true == SORR_SAVE)
5380 {
5381 insn = emit_move_insn (mem, gen_rtx_REG (mode, regno));
5382 RTX_FRAME_RELATED_P (insn) = 1;
5383 if (mode == DImode)
5384 {
5385 rtx set1, set2;
5386 mem = gen_frame_mem (SImode, plus_constant (Pmode, base,
5387 offset));
5388 set1 = gen_rtx_SET (mem, gen_rtx_REG (SImode, regno));
5389 RTX_FRAME_RELATED_P (set1) = 1;
5390 mem
5391 = gen_frame_mem (SImode, plus_constant (Pmode, base,
5392 offset + 4));
5393 set2 = gen_rtx_SET (mem, gen_rtx_REG (SImode, regno + 1));
5394 RTX_FRAME_RELATED_P (set2) = 1;
5395 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
5396 gen_rtx_PARALLEL (VOIDmode,
5397 gen_rtvec (2, set1, set2)));
5398 }
5399 }
5400 else /* action_true == SORR_RESTORE */
5401 emit_move_insn (gen_rtx_REG (mode, regno), mem);
5402
5403 /* Bump and round down to double word
5404 in case we already bumped by 4. */
5405 offset = ROUND_DOWN (offset + 8, 8);
5406 }
5407 }
5408
5409 return offset;
5410 }
5411
5412 /* Emit code to adjust BASE to OFFSET. Return the new base. */
5413
5414 static rtx
emit_adjust_base_to_offset(rtx base,int offset)5415 emit_adjust_base_to_offset (rtx base, int offset)
5416 {
5417 /* ??? This might be optimized a little as %g1 might already have a
5418 value close enough that a single add insn will do. */
5419 /* ??? Although, all of this is probably only a temporary fix because
5420 if %g1 can hold a function result, then sparc_expand_epilogue will
5421 lose (the result will be clobbered). */
5422 rtx new_base = gen_rtx_REG (Pmode, 1);
5423 emit_move_insn (new_base, GEN_INT (offset));
5424 emit_insn (gen_rtx_SET (new_base, gen_rtx_PLUS (Pmode, base, new_base)));
5425 return new_base;
5426 }
5427
5428 /* Emit code to save/restore call-saved global and FP registers. */
5429
5430 static void
emit_save_or_restore_global_fp_regs(rtx base,int offset,sorr_act_t action)5431 emit_save_or_restore_global_fp_regs (rtx base, int offset, sorr_act_t action)
5432 {
5433 if (offset < -4096 || offset + sparc_n_global_fp_regs * 4 > 4095)
5434 {
5435 base = emit_adjust_base_to_offset (base, offset);
5436 offset = 0;
5437 }
5438
5439 offset
5440 = emit_save_or_restore_regs (0, 8, base, offset, 0,
5441 save_global_or_fp_reg_p, action, SORR_NONE);
5442 emit_save_or_restore_regs (32, TARGET_V9 ? 96 : 64, base, offset, 0,
5443 save_global_or_fp_reg_p, action, SORR_NONE);
5444 }
5445
5446 /* Emit code to save/restore call-saved local and in registers. */
5447
5448 static void
emit_save_or_restore_local_in_regs(rtx base,int offset,sorr_act_t action)5449 emit_save_or_restore_local_in_regs (rtx base, int offset, sorr_act_t action)
5450 {
5451 if (offset < -4096 || offset + 16 * UNITS_PER_WORD > 4095)
5452 {
5453 base = emit_adjust_base_to_offset (base, offset);
5454 offset = 0;
5455 }
5456
5457 emit_save_or_restore_regs (16, 32, base, offset, sparc_leaf_function_p,
5458 save_local_or_in_reg_p, action, SORR_ADVANCE);
5459 }
5460
5461 /* Emit a window_save insn. */
5462
5463 static rtx_insn *
emit_window_save(rtx increment)5464 emit_window_save (rtx increment)
5465 {
5466 rtx_insn *insn = emit_insn (gen_window_save (increment));
5467 RTX_FRAME_RELATED_P (insn) = 1;
5468
5469 /* The incoming return address (%o7) is saved in %i7. */
5470 add_reg_note (insn, REG_CFA_REGISTER,
5471 gen_rtx_SET (gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM),
5472 gen_rtx_REG (Pmode,
5473 INCOMING_RETURN_ADDR_REGNUM)));
5474
5475 /* The window save event. */
5476 add_reg_note (insn, REG_CFA_WINDOW_SAVE, const0_rtx);
5477
5478 /* The CFA is %fp, the hard frame pointer. */
5479 add_reg_note (insn, REG_CFA_DEF_CFA,
5480 plus_constant (Pmode, hard_frame_pointer_rtx,
5481 INCOMING_FRAME_SP_OFFSET));
5482
5483 return insn;
5484 }
5485
5486 /* Generate an increment for the stack pointer. */
5487
5488 static rtx
gen_stack_pointer_inc(rtx increment)5489 gen_stack_pointer_inc (rtx increment)
5490 {
5491 return gen_rtx_SET (stack_pointer_rtx,
5492 gen_rtx_PLUS (Pmode,
5493 stack_pointer_rtx,
5494 increment));
5495 }
5496
5497 /* Expand the function prologue. The prologue is responsible for reserving
5498 storage for the frame, saving the call-saved registers and loading the
5499 GOT register if needed. */
5500
5501 void
sparc_expand_prologue(void)5502 sparc_expand_prologue (void)
5503 {
5504 HOST_WIDE_INT size;
5505 rtx_insn *insn;
5506
5507 /* Compute a snapshot of crtl->uses_only_leaf_regs. Relying
5508 on the final value of the flag means deferring the prologue/epilogue
5509 expansion until just before the second scheduling pass, which is too
5510 late to emit multiple epilogues or return insns.
5511
5512 Of course we are making the assumption that the value of the flag
5513 will not change between now and its final value. Of the three parts
5514 of the formula, only the last one can reasonably vary. Let's take a
5515 closer look, after assuming that the first two ones are set to true
5516 (otherwise the last value is effectively silenced).
5517
5518 If only_leaf_regs_used returns false, the global predicate will also
5519 be false so the actual frame size calculated below will be positive.
5520 As a consequence, the save_register_window insn will be emitted in
5521 the instruction stream; now this insn explicitly references %fp
5522 which is not a leaf register so only_leaf_regs_used will always
5523 return false subsequently.
5524
5525 If only_leaf_regs_used returns true, we hope that the subsequent
5526 optimization passes won't cause non-leaf registers to pop up. For
5527 example, the regrename pass has special provisions to not rename to
5528 non-leaf registers in a leaf function. */
5529 sparc_leaf_function_p
5530 = optimize > 0 && crtl->is_leaf && only_leaf_regs_used ();
5531
5532 size = sparc_compute_frame_size (get_frame_size(), sparc_leaf_function_p);
5533
5534 if (flag_stack_usage_info)
5535 current_function_static_stack_size = size;
5536
5537 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
5538 {
5539 if (crtl->is_leaf && !cfun->calls_alloca)
5540 {
5541 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
5542 sparc_emit_probe_stack_range (STACK_CHECK_PROTECT,
5543 size - STACK_CHECK_PROTECT);
5544 }
5545 else if (size > 0)
5546 sparc_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
5547 }
5548
5549 if (size == 0)
5550 ; /* do nothing. */
5551 else if (sparc_leaf_function_p)
5552 {
5553 rtx size_int_rtx = GEN_INT (-size);
5554
5555 if (size <= 4096)
5556 insn = emit_insn (gen_stack_pointer_inc (size_int_rtx));
5557 else if (size <= 8192)
5558 {
5559 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (-4096)));
5560 RTX_FRAME_RELATED_P (insn) = 1;
5561
5562 /* %sp is still the CFA register. */
5563 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
5564 }
5565 else
5566 {
5567 rtx size_rtx = gen_rtx_REG (Pmode, 1);
5568 emit_move_insn (size_rtx, size_int_rtx);
5569 insn = emit_insn (gen_stack_pointer_inc (size_rtx));
5570 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
5571 gen_stack_pointer_inc (size_int_rtx));
5572 }
5573
5574 RTX_FRAME_RELATED_P (insn) = 1;
5575 }
5576 else
5577 {
5578 rtx size_int_rtx = GEN_INT (-size);
5579
5580 if (size <= 4096)
5581 emit_window_save (size_int_rtx);
5582 else if (size <= 8192)
5583 {
5584 emit_window_save (GEN_INT (-4096));
5585
5586 /* %sp is not the CFA register anymore. */
5587 emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
5588
5589 /* Make sure no %fp-based store is issued until after the frame is
5590 established. The offset between the frame pointer and the stack
5591 pointer is calculated relative to the value of the stack pointer
5592 at the end of the function prologue, and moving instructions that
5593 access the stack via the frame pointer between the instructions
5594 that decrement the stack pointer could result in accessing the
5595 register window save area, which is volatile. */
5596 emit_insn (gen_frame_blockage ());
5597 }
5598 else
5599 {
5600 rtx size_rtx = gen_rtx_REG (Pmode, 1);
5601 emit_move_insn (size_rtx, size_int_rtx);
5602 emit_window_save (size_rtx);
5603 }
5604 }
5605
5606 if (sparc_leaf_function_p)
5607 {
5608 sparc_frame_base_reg = stack_pointer_rtx;
5609 sparc_frame_base_offset = size + SPARC_STACK_BIAS;
5610 }
5611 else
5612 {
5613 sparc_frame_base_reg = hard_frame_pointer_rtx;
5614 sparc_frame_base_offset = SPARC_STACK_BIAS;
5615 }
5616
5617 if (sparc_n_global_fp_regs > 0)
5618 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
5619 sparc_frame_base_offset
5620 - sparc_apparent_frame_size,
5621 SORR_SAVE);
5622
5623 /* Load the GOT register if needed. */
5624 if (crtl->uses_pic_offset_table)
5625 load_got_register ();
5626
5627 /* Advertise that the data calculated just above are now valid. */
5628 sparc_prologue_data_valid_p = true;
5629 }
5630
5631 /* Expand the function prologue. The prologue is responsible for reserving
5632 storage for the frame, saving the call-saved registers and loading the
5633 GOT register if needed. */
5634
5635 void
sparc_flat_expand_prologue(void)5636 sparc_flat_expand_prologue (void)
5637 {
5638 HOST_WIDE_INT size;
5639 rtx_insn *insn;
5640
5641 sparc_leaf_function_p = optimize > 0 && crtl->is_leaf;
5642
5643 size = sparc_compute_frame_size (get_frame_size(), sparc_leaf_function_p);
5644
5645 if (flag_stack_usage_info)
5646 current_function_static_stack_size = size;
5647
5648 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
5649 {
5650 if (crtl->is_leaf && !cfun->calls_alloca)
5651 {
5652 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
5653 sparc_emit_probe_stack_range (STACK_CHECK_PROTECT,
5654 size - STACK_CHECK_PROTECT);
5655 }
5656 else if (size > 0)
5657 sparc_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
5658 }
5659
5660 if (sparc_save_local_in_regs_p)
5661 emit_save_or_restore_local_in_regs (stack_pointer_rtx, SPARC_STACK_BIAS,
5662 SORR_SAVE);
5663
5664 if (size == 0)
5665 ; /* do nothing. */
5666 else
5667 {
5668 rtx size_int_rtx, size_rtx;
5669
5670 size_rtx = size_int_rtx = GEN_INT (-size);
5671
5672 /* We establish the frame (i.e. decrement the stack pointer) first, even
5673 if we use a frame pointer, because we cannot clobber any call-saved
5674 registers, including the frame pointer, if we haven't created a new
5675 register save area, for the sake of compatibility with the ABI. */
5676 if (size <= 4096)
5677 insn = emit_insn (gen_stack_pointer_inc (size_int_rtx));
5678 else if (size <= 8192 && !frame_pointer_needed)
5679 {
5680 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (-4096)));
5681 RTX_FRAME_RELATED_P (insn) = 1;
5682 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
5683 }
5684 else
5685 {
5686 size_rtx = gen_rtx_REG (Pmode, 1);
5687 emit_move_insn (size_rtx, size_int_rtx);
5688 insn = emit_insn (gen_stack_pointer_inc (size_rtx));
5689 add_reg_note (insn, REG_CFA_ADJUST_CFA,
5690 gen_stack_pointer_inc (size_int_rtx));
5691 }
5692 RTX_FRAME_RELATED_P (insn) = 1;
5693
5694 /* Ensure nothing is scheduled until after the frame is established. */
5695 emit_insn (gen_blockage ());
5696
5697 if (frame_pointer_needed)
5698 {
5699 insn = emit_insn (gen_rtx_SET (hard_frame_pointer_rtx,
5700 gen_rtx_MINUS (Pmode,
5701 stack_pointer_rtx,
5702 size_rtx)));
5703 RTX_FRAME_RELATED_P (insn) = 1;
5704
5705 add_reg_note (insn, REG_CFA_ADJUST_CFA,
5706 gen_rtx_SET (hard_frame_pointer_rtx,
5707 plus_constant (Pmode, stack_pointer_rtx,
5708 size)));
5709 }
5710
5711 if (return_addr_reg_needed_p (sparc_leaf_function_p))
5712 {
5713 rtx o7 = gen_rtx_REG (Pmode, INCOMING_RETURN_ADDR_REGNUM);
5714 rtx i7 = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
5715
5716 insn = emit_move_insn (i7, o7);
5717 RTX_FRAME_RELATED_P (insn) = 1;
5718
5719 add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (i7, o7));
5720
5721 /* Prevent this instruction from ever being considered dead,
5722 even if this function has no epilogue. */
5723 emit_use (i7);
5724 }
5725 }
5726
5727 if (frame_pointer_needed)
5728 {
5729 sparc_frame_base_reg = hard_frame_pointer_rtx;
5730 sparc_frame_base_offset = SPARC_STACK_BIAS;
5731 }
5732 else
5733 {
5734 sparc_frame_base_reg = stack_pointer_rtx;
5735 sparc_frame_base_offset = size + SPARC_STACK_BIAS;
5736 }
5737
5738 if (sparc_n_global_fp_regs > 0)
5739 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
5740 sparc_frame_base_offset
5741 - sparc_apparent_frame_size,
5742 SORR_SAVE);
5743
5744 /* Load the GOT register if needed. */
5745 if (crtl->uses_pic_offset_table)
5746 load_got_register ();
5747
5748 /* Advertise that the data calculated just above are now valid. */
5749 sparc_prologue_data_valid_p = true;
5750 }
5751
5752 /* This function generates the assembly code for function entry, which boils
5753 down to emitting the necessary .register directives. */
5754
5755 static void
sparc_asm_function_prologue(FILE * file,HOST_WIDE_INT size ATTRIBUTE_UNUSED)5756 sparc_asm_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
5757 {
5758 /* Check that the assumption we made in sparc_expand_prologue is valid. */
5759 if (!TARGET_FLAT)
5760 gcc_assert (sparc_leaf_function_p == crtl->uses_only_leaf_regs);
5761
5762 sparc_output_scratch_registers (file);
5763 }
5764
5765 /* Expand the function epilogue, either normal or part of a sibcall.
5766 We emit all the instructions except the return or the call. */
5767
5768 void
sparc_expand_epilogue(bool for_eh)5769 sparc_expand_epilogue (bool for_eh)
5770 {
5771 HOST_WIDE_INT size = sparc_frame_size;
5772
5773 if (cfun->calls_alloca)
5774 emit_insn (gen_frame_blockage ());
5775
5776 if (sparc_n_global_fp_regs > 0)
5777 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
5778 sparc_frame_base_offset
5779 - sparc_apparent_frame_size,
5780 SORR_RESTORE);
5781
5782 if (size == 0 || for_eh)
5783 ; /* do nothing. */
5784 else if (sparc_leaf_function_p)
5785 {
5786 if (size <= 4096)
5787 emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
5788 else if (size <= 8192)
5789 {
5790 emit_insn (gen_stack_pointer_inc (GEN_INT (4096)));
5791 emit_insn (gen_stack_pointer_inc (GEN_INT (size - 4096)));
5792 }
5793 else
5794 {
5795 rtx reg = gen_rtx_REG (Pmode, 1);
5796 emit_move_insn (reg, GEN_INT (size));
5797 emit_insn (gen_stack_pointer_inc (reg));
5798 }
5799 }
5800 }
5801
5802 /* Expand the function epilogue, either normal or part of a sibcall.
5803 We emit all the instructions except the return or the call. */
5804
5805 void
sparc_flat_expand_epilogue(bool for_eh)5806 sparc_flat_expand_epilogue (bool for_eh)
5807 {
5808 HOST_WIDE_INT size = sparc_frame_size;
5809
5810 if (sparc_n_global_fp_regs > 0)
5811 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
5812 sparc_frame_base_offset
5813 - sparc_apparent_frame_size,
5814 SORR_RESTORE);
5815
5816 /* If we have a frame pointer, we'll need both to restore it before the
5817 frame is destroyed and use its current value in destroying the frame.
5818 Since we don't have an atomic way to do that in the flat window model,
5819 we save the current value into a temporary register (%g1). */
5820 if (frame_pointer_needed && !for_eh)
5821 emit_move_insn (gen_rtx_REG (Pmode, 1), hard_frame_pointer_rtx);
5822
5823 if (return_addr_reg_needed_p (sparc_leaf_function_p))
5824 emit_move_insn (gen_rtx_REG (Pmode, INCOMING_RETURN_ADDR_REGNUM),
5825 gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM));
5826
5827 if (sparc_save_local_in_regs_p)
5828 emit_save_or_restore_local_in_regs (sparc_frame_base_reg,
5829 sparc_frame_base_offset,
5830 SORR_RESTORE);
5831
5832 if (size == 0 || for_eh)
5833 ; /* do nothing. */
5834 else if (frame_pointer_needed)
5835 {
5836 /* Make sure the frame is destroyed after everything else is done. */
5837 emit_insn (gen_blockage ());
5838
5839 emit_move_insn (stack_pointer_rtx, gen_rtx_REG (Pmode, 1));
5840 }
5841 else
5842 {
5843 /* Likewise. */
5844 emit_insn (gen_blockage ());
5845
5846 if (size <= 4096)
5847 emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
5848 else if (size <= 8192)
5849 {
5850 emit_insn (gen_stack_pointer_inc (GEN_INT (4096)));
5851 emit_insn (gen_stack_pointer_inc (GEN_INT (size - 4096)));
5852 }
5853 else
5854 {
5855 rtx reg = gen_rtx_REG (Pmode, 1);
5856 emit_move_insn (reg, GEN_INT (size));
5857 emit_insn (gen_stack_pointer_inc (reg));
5858 }
5859 }
5860 }
5861
5862 /* Return true if it is appropriate to emit `return' instructions in the
5863 body of a function. */
5864
5865 bool
sparc_can_use_return_insn_p(void)5866 sparc_can_use_return_insn_p (void)
5867 {
5868 return sparc_prologue_data_valid_p
5869 && sparc_n_global_fp_regs == 0
5870 && TARGET_FLAT
5871 ? (sparc_frame_size == 0 && !sparc_save_local_in_regs_p)
5872 : (sparc_frame_size == 0 || !sparc_leaf_function_p);
5873 }
5874
5875 /* This function generates the assembly code for function exit. */
5876
5877 static void
sparc_asm_function_epilogue(FILE * file,HOST_WIDE_INT size ATTRIBUTE_UNUSED)5878 sparc_asm_function_epilogue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
5879 {
5880 /* If the last two instructions of a function are "call foo; dslot;"
5881 the return address might point to the first instruction in the next
5882 function and we have to output a dummy nop for the sake of sane
5883 backtraces in such cases. This is pointless for sibling calls since
5884 the return address is explicitly adjusted. */
5885
5886 rtx insn, last_real_insn;
5887
5888 insn = get_last_insn ();
5889
5890 last_real_insn = prev_real_insn (insn);
5891 if (last_real_insn
5892 && NONJUMP_INSN_P (last_real_insn)
5893 && GET_CODE (PATTERN (last_real_insn)) == SEQUENCE)
5894 last_real_insn = XVECEXP (PATTERN (last_real_insn), 0, 0);
5895
5896 if (last_real_insn
5897 && CALL_P (last_real_insn)
5898 && !SIBLING_CALL_P (last_real_insn))
5899 fputs("\tnop\n", file);
5900
5901 sparc_output_deferred_case_vectors ();
5902 }
5903
5904 /* Output a 'restore' instruction. */
5905
5906 static void
output_restore(rtx pat)5907 output_restore (rtx pat)
5908 {
5909 rtx operands[3];
5910
5911 if (! pat)
5912 {
5913 fputs ("\t restore\n", asm_out_file);
5914 return;
5915 }
5916
5917 gcc_assert (GET_CODE (pat) == SET);
5918
5919 operands[0] = SET_DEST (pat);
5920 pat = SET_SRC (pat);
5921
5922 switch (GET_CODE (pat))
5923 {
5924 case PLUS:
5925 operands[1] = XEXP (pat, 0);
5926 operands[2] = XEXP (pat, 1);
5927 output_asm_insn (" restore %r1, %2, %Y0", operands);
5928 break;
5929 case LO_SUM:
5930 operands[1] = XEXP (pat, 0);
5931 operands[2] = XEXP (pat, 1);
5932 output_asm_insn (" restore %r1, %%lo(%a2), %Y0", operands);
5933 break;
5934 case ASHIFT:
5935 operands[1] = XEXP (pat, 0);
5936 gcc_assert (XEXP (pat, 1) == const1_rtx);
5937 output_asm_insn (" restore %r1, %r1, %Y0", operands);
5938 break;
5939 default:
5940 operands[1] = pat;
5941 output_asm_insn (" restore %%g0, %1, %Y0", operands);
5942 break;
5943 }
5944 }
5945
5946 /* Output a return. */
5947
5948 const char *
output_return(rtx_insn * insn)5949 output_return (rtx_insn *insn)
5950 {
5951 if (crtl->calls_eh_return)
5952 {
5953 /* If the function uses __builtin_eh_return, the eh_return
5954 machinery occupies the delay slot. */
5955 gcc_assert (!final_sequence);
5956
5957 if (flag_delayed_branch)
5958 {
5959 if (!TARGET_FLAT && TARGET_V9)
5960 fputs ("\treturn\t%i7+8\n", asm_out_file);
5961 else
5962 {
5963 if (!TARGET_FLAT)
5964 fputs ("\trestore\n", asm_out_file);
5965
5966 fputs ("\tjmp\t%o7+8\n", asm_out_file);
5967 }
5968
5969 fputs ("\t add\t%sp, %g1, %sp\n", asm_out_file);
5970 }
5971 else
5972 {
5973 if (!TARGET_FLAT)
5974 fputs ("\trestore\n", asm_out_file);
5975
5976 fputs ("\tadd\t%sp, %g1, %sp\n", asm_out_file);
5977 fputs ("\tjmp\t%o7+8\n\t nop\n", asm_out_file);
5978 }
5979 }
5980 else if (sparc_leaf_function_p || TARGET_FLAT)
5981 {
5982 /* This is a leaf or flat function so we don't have to bother restoring
5983 the register window, which frees us from dealing with the convoluted
5984 semantics of restore/return. We simply output the jump to the
5985 return address and the insn in the delay slot (if any). */
5986
5987 return "jmp\t%%o7+%)%#";
5988 }
5989 else
5990 {
5991 /* This is a regular function so we have to restore the register window.
5992 We may have a pending insn for the delay slot, which will be either
5993 combined with the 'restore' instruction or put in the delay slot of
5994 the 'return' instruction. */
5995
5996 if (final_sequence)
5997 {
5998 rtx delay, pat;
5999
6000 delay = NEXT_INSN (insn);
6001 gcc_assert (delay);
6002
6003 pat = PATTERN (delay);
6004
6005 if (TARGET_V9 && ! epilogue_renumber (&pat, 1))
6006 {
6007 epilogue_renumber (&pat, 0);
6008 return "return\t%%i7+%)%#";
6009 }
6010 else
6011 {
6012 output_asm_insn ("jmp\t%%i7+%)", NULL);
6013 output_restore (pat);
6014 PATTERN (delay) = gen_blockage ();
6015 INSN_CODE (delay) = -1;
6016 }
6017 }
6018 else
6019 {
6020 /* The delay slot is empty. */
6021 if (TARGET_V9)
6022 return "return\t%%i7+%)\n\t nop";
6023 else if (flag_delayed_branch)
6024 return "jmp\t%%i7+%)\n\t restore";
6025 else
6026 return "restore\n\tjmp\t%%o7+%)\n\t nop";
6027 }
6028 }
6029
6030 return "";
6031 }
6032
6033 /* Output a sibling call. */
6034
6035 const char *
output_sibcall(rtx_insn * insn,rtx call_operand)6036 output_sibcall (rtx_insn *insn, rtx call_operand)
6037 {
6038 rtx operands[1];
6039
6040 gcc_assert (flag_delayed_branch);
6041
6042 operands[0] = call_operand;
6043
6044 if (sparc_leaf_function_p || TARGET_FLAT)
6045 {
6046 /* This is a leaf or flat function so we don't have to bother restoring
6047 the register window. We simply output the jump to the function and
6048 the insn in the delay slot (if any). */
6049
6050 gcc_assert (!(LEAF_SIBCALL_SLOT_RESERVED_P && final_sequence));
6051
6052 if (final_sequence)
6053 output_asm_insn ("sethi\t%%hi(%a0), %%g1\n\tjmp\t%%g1 + %%lo(%a0)%#",
6054 operands);
6055 else
6056 /* Use or with rs2 %%g0 instead of mov, so that as/ld can optimize
6057 it into branch if possible. */
6058 output_asm_insn ("or\t%%o7, %%g0, %%g1\n\tcall\t%a0, 0\n\t or\t%%g1, %%g0, %%o7",
6059 operands);
6060 }
6061 else
6062 {
6063 /* This is a regular function so we have to restore the register window.
6064 We may have a pending insn for the delay slot, which will be combined
6065 with the 'restore' instruction. */
6066
6067 output_asm_insn ("call\t%a0, 0", operands);
6068
6069 if (final_sequence)
6070 {
6071 rtx_insn *delay = NEXT_INSN (insn);
6072 gcc_assert (delay);
6073
6074 output_restore (PATTERN (delay));
6075
6076 PATTERN (delay) = gen_blockage ();
6077 INSN_CODE (delay) = -1;
6078 }
6079 else
6080 output_restore (NULL_RTX);
6081 }
6082
6083 return "";
6084 }
6085
6086 /* Functions for handling argument passing.
6087
6088 For 32-bit, the first 6 args are normally in registers and the rest are
6089 pushed. Any arg that starts within the first 6 words is at least
6090 partially passed in a register unless its data type forbids.
6091
6092 For 64-bit, the argument registers are laid out as an array of 16 elements
6093 and arguments are added sequentially. The first 6 int args and up to the
6094 first 16 fp args (depending on size) are passed in regs.
6095
6096 Slot Stack Integral Float Float in structure Double Long Double
6097 ---- ----- -------- ----- ------------------ ------ -----------
6098 15 [SP+248] %f31 %f30,%f31 %d30
6099 14 [SP+240] %f29 %f28,%f29 %d28 %q28
6100 13 [SP+232] %f27 %f26,%f27 %d26
6101 12 [SP+224] %f25 %f24,%f25 %d24 %q24
6102 11 [SP+216] %f23 %f22,%f23 %d22
6103 10 [SP+208] %f21 %f20,%f21 %d20 %q20
6104 9 [SP+200] %f19 %f18,%f19 %d18
6105 8 [SP+192] %f17 %f16,%f17 %d16 %q16
6106 7 [SP+184] %f15 %f14,%f15 %d14
6107 6 [SP+176] %f13 %f12,%f13 %d12 %q12
6108 5 [SP+168] %o5 %f11 %f10,%f11 %d10
6109 4 [SP+160] %o4 %f9 %f8,%f9 %d8 %q8
6110 3 [SP+152] %o3 %f7 %f6,%f7 %d6
6111 2 [SP+144] %o2 %f5 %f4,%f5 %d4 %q4
6112 1 [SP+136] %o1 %f3 %f2,%f3 %d2
6113 0 [SP+128] %o0 %f1 %f0,%f1 %d0 %q0
6114
6115 Here SP = %sp if -mno-stack-bias or %sp+stack_bias otherwise.
6116
6117 Integral arguments are always passed as 64-bit quantities appropriately
6118 extended.
6119
6120 Passing of floating point values is handled as follows.
6121 If a prototype is in scope:
6122 If the value is in a named argument (i.e. not a stdarg function or a
6123 value not part of the `...') then the value is passed in the appropriate
6124 fp reg.
6125 If the value is part of the `...' and is passed in one of the first 6
6126 slots then the value is passed in the appropriate int reg.
6127 If the value is part of the `...' and is not passed in one of the first 6
6128 slots then the value is passed in memory.
6129 If a prototype is not in scope:
6130 If the value is one of the first 6 arguments the value is passed in the
6131 appropriate integer reg and the appropriate fp reg.
6132 If the value is not one of the first 6 arguments the value is passed in
6133 the appropriate fp reg and in memory.
6134
6135
6136 Summary of the calling conventions implemented by GCC on the SPARC:
6137
6138 32-bit ABI:
6139 size argument return value
6140
6141 small integer <4 int. reg. int. reg.
6142 word 4 int. reg. int. reg.
6143 double word 8 int. reg. int. reg.
6144
6145 _Complex small integer <8 int. reg. int. reg.
6146 _Complex word 8 int. reg. int. reg.
6147 _Complex double word 16 memory int. reg.
6148
6149 vector integer <=8 int. reg. FP reg.
6150 vector integer >8 memory memory
6151
6152 float 4 int. reg. FP reg.
6153 double 8 int. reg. FP reg.
6154 long double 16 memory memory
6155
6156 _Complex float 8 memory FP reg.
6157 _Complex double 16 memory FP reg.
6158 _Complex long double 32 memory FP reg.
6159
6160 vector float any memory memory
6161
6162 aggregate any memory memory
6163
6164
6165
6166 64-bit ABI:
6167 size argument return value
6168
6169 small integer <8 int. reg. int. reg.
6170 word 8 int. reg. int. reg.
6171 double word 16 int. reg. int. reg.
6172
6173 _Complex small integer <16 int. reg. int. reg.
6174 _Complex word 16 int. reg. int. reg.
6175 _Complex double word 32 memory int. reg.
6176
6177 vector integer <=16 FP reg. FP reg.
6178 vector integer 16<s<=32 memory FP reg.
6179 vector integer >32 memory memory
6180
6181 float 4 FP reg. FP reg.
6182 double 8 FP reg. FP reg.
6183 long double 16 FP reg. FP reg.
6184
6185 _Complex float 8 FP reg. FP reg.
6186 _Complex double 16 FP reg. FP reg.
6187 _Complex long double 32 memory FP reg.
6188
6189 vector float <=16 FP reg. FP reg.
6190 vector float 16<s<=32 memory FP reg.
6191 vector float >32 memory memory
6192
6193 aggregate <=16 reg. reg.
6194 aggregate 16<s<=32 memory reg.
6195 aggregate >32 memory memory
6196
6197
6198
6199 Note #1: complex floating-point types follow the extended SPARC ABIs as
6200 implemented by the Sun compiler.
6201
6202 Note #2: integral vector types follow the scalar floating-point types
6203 conventions to match what is implemented by the Sun VIS SDK.
6204
6205 Note #3: floating-point vector types follow the aggregate types
6206 conventions. */
6207
6208
6209 /* Maximum number of int regs for args. */
6210 #define SPARC_INT_ARG_MAX 6
6211 /* Maximum number of fp regs for args. */
6212 #define SPARC_FP_ARG_MAX 16
6213 /* Number of words (partially) occupied for a given size in units. */
6214 #define CEIL_NWORDS(SIZE) CEIL((SIZE), UNITS_PER_WORD)
6215
6216 /* Handle the INIT_CUMULATIVE_ARGS macro.
6217 Initialize a variable CUM of type CUMULATIVE_ARGS
6218 for a call to a function whose data type is FNTYPE.
6219 For a library call, FNTYPE is 0. */
6220
6221 void
init_cumulative_args(struct sparc_args * cum,tree fntype,rtx,tree)6222 init_cumulative_args (struct sparc_args *cum, tree fntype, rtx, tree)
6223 {
6224 cum->words = 0;
6225 cum->prototype_p = fntype && prototype_p (fntype);
6226 cum->libcall_p = !fntype;
6227 }
6228
6229 /* Handle promotion of pointer and integer arguments. */
6230
6231 static machine_mode
sparc_promote_function_mode(const_tree type,machine_mode mode,int * punsignedp,const_tree,int)6232 sparc_promote_function_mode (const_tree type, machine_mode mode,
6233 int *punsignedp, const_tree, int)
6234 {
6235 if (type && POINTER_TYPE_P (type))
6236 {
6237 *punsignedp = POINTERS_EXTEND_UNSIGNED;
6238 return Pmode;
6239 }
6240
6241 /* Integral arguments are passed as full words, as per the ABI. */
6242 if (GET_MODE_CLASS (mode) == MODE_INT
6243 && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
6244 return word_mode;
6245
6246 return mode;
6247 }
6248
6249 /* Handle the TARGET_STRICT_ARGUMENT_NAMING target hook. */
6250
6251 static bool
sparc_strict_argument_naming(cumulative_args_t ca ATTRIBUTE_UNUSED)6252 sparc_strict_argument_naming (cumulative_args_t ca ATTRIBUTE_UNUSED)
6253 {
6254 return TARGET_ARCH64 ? true : false;
6255 }
6256
6257 /* Traverse the record TYPE recursively and call FUNC on its fields.
6258 NAMED is true if this is for a named parameter. DATA is passed
6259 to FUNC for each field. OFFSET is the starting position and
6260 PACKED is true if we are inside a packed record. */
6261
6262 template <typename T, void Func (const_tree, HOST_WIDE_INT, bool, T*)>
6263 static void
6264 traverse_record_type (const_tree type, bool named, T *data,
6265 HOST_WIDE_INT offset = 0, bool packed = false)
6266 {
6267 /* The ABI obviously doesn't specify how packed structures are passed.
6268 These are passed in integer regs if possible, otherwise memory. */
6269 if (!packed)
6270 for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6271 if (TREE_CODE (field) == FIELD_DECL && DECL_PACKED (field))
6272 {
6273 packed = true;
6274 break;
6275 }
6276
6277 /* Walk the real fields, but skip those with no size or a zero size.
6278 ??? Fields with variable offset are handled as having zero offset. */
6279 for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6280 if (TREE_CODE (field) == FIELD_DECL)
6281 {
6282 if (!DECL_SIZE (field) || integer_zerop (DECL_SIZE (field)))
6283 continue;
6284
6285 HOST_WIDE_INT bitpos = offset;
6286 if (TREE_CODE (DECL_FIELD_OFFSET (field)) == INTEGER_CST)
6287 bitpos += int_bit_position (field);
6288
6289 tree field_type = TREE_TYPE (field);
6290 if (TREE_CODE (field_type) == RECORD_TYPE)
6291 traverse_record_type<T, Func> (field_type, named, data, bitpos,
6292 packed);
6293 else
6294 {
6295 const bool fp_type
6296 = FLOAT_TYPE_P (field_type) || VECTOR_TYPE_P (field_type);
6297 Func (field, bitpos, fp_type && named && !packed && TARGET_FPU,
6298 data);
6299 }
6300 }
6301 }
6302
6303 /* Handle recursive register classifying for structure layout. */
6304
6305 typedef struct
6306 {
6307 bool fp_regs; /* true if field eligible to FP registers. */
6308 bool fp_regs_in_first_word; /* true if such field in first word. */
6309 } classify_data_t;
6310
6311 /* A subroutine of function_arg_slotno. Classify the field. */
6312
6313 inline void
classify_registers(const_tree,HOST_WIDE_INT bitpos,bool fp,classify_data_t * data)6314 classify_registers (const_tree, HOST_WIDE_INT bitpos, bool fp,
6315 classify_data_t *data)
6316 {
6317 if (fp)
6318 {
6319 data->fp_regs = true;
6320 if (bitpos < BITS_PER_WORD)
6321 data->fp_regs_in_first_word = true;
6322 }
6323 }
6324
6325 /* Compute the slot number to pass an argument in.
6326 Return the slot number or -1 if passing on the stack.
6327
6328 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6329 the preceding args and about the function being called.
6330 MODE is the argument's machine mode.
6331 TYPE is the data type of the argument (as a tree).
6332 This is null for libcalls where that information may
6333 not be available.
6334 NAMED is nonzero if this argument is a named parameter
6335 (otherwise it is an extra parameter matching an ellipsis).
6336 INCOMING is zero for FUNCTION_ARG, nonzero for FUNCTION_INCOMING_ARG.
6337 *PREGNO records the register number to use if scalar type.
6338 *PPADDING records the amount of padding needed in words. */
6339
6340 static int
function_arg_slotno(const struct sparc_args * cum,machine_mode mode,const_tree type,bool named,bool incoming,int * pregno,int * ppadding)6341 function_arg_slotno (const struct sparc_args *cum, machine_mode mode,
6342 const_tree type, bool named, bool incoming,
6343 int *pregno, int *ppadding)
6344 {
6345 int regbase = (incoming
6346 ? SPARC_INCOMING_INT_ARG_FIRST
6347 : SPARC_OUTGOING_INT_ARG_FIRST);
6348 int slotno = cum->words;
6349 enum mode_class mclass;
6350 int regno;
6351
6352 *ppadding = 0;
6353
6354 if (type && TREE_ADDRESSABLE (type))
6355 return -1;
6356
6357 if (TARGET_ARCH32
6358 && mode == BLKmode
6359 && type
6360 && TYPE_ALIGN (type) % PARM_BOUNDARY != 0)
6361 return -1;
6362
6363 /* For SPARC64, objects requiring 16-byte alignment get it. */
6364 if (TARGET_ARCH64
6365 && (type ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode)) >= 128
6366 && (slotno & 1) != 0)
6367 slotno++, *ppadding = 1;
6368
6369 mclass = GET_MODE_CLASS (mode);
6370 if (type && TREE_CODE (type) == VECTOR_TYPE)
6371 {
6372 /* Vector types deserve special treatment because they are
6373 polymorphic wrt their mode, depending upon whether VIS
6374 instructions are enabled. */
6375 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
6376 {
6377 /* The SPARC port defines no floating-point vector modes. */
6378 gcc_assert (mode == BLKmode);
6379 }
6380 else
6381 {
6382 /* Integral vector types should either have a vector
6383 mode or an integral mode, because we are guaranteed
6384 by pass_by_reference that their size is not greater
6385 than 16 bytes and TImode is 16-byte wide. */
6386 gcc_assert (mode != BLKmode);
6387
6388 /* Vector integers are handled like floats according to
6389 the Sun VIS SDK. */
6390 mclass = MODE_FLOAT;
6391 }
6392 }
6393
6394 switch (mclass)
6395 {
6396 case MODE_FLOAT:
6397 case MODE_COMPLEX_FLOAT:
6398 case MODE_VECTOR_INT:
6399 if (TARGET_ARCH64 && TARGET_FPU && named)
6400 {
6401 /* If all arg slots are filled, then must pass on stack. */
6402 if (slotno >= SPARC_FP_ARG_MAX)
6403 return -1;
6404
6405 regno = SPARC_FP_ARG_FIRST + slotno * 2;
6406 /* Arguments filling only one single FP register are
6407 right-justified in the outer double FP register. */
6408 if (GET_MODE_SIZE (mode) <= 4)
6409 regno++;
6410 break;
6411 }
6412 /* fallthrough */
6413
6414 case MODE_INT:
6415 case MODE_COMPLEX_INT:
6416 /* If all arg slots are filled, then must pass on stack. */
6417 if (slotno >= SPARC_INT_ARG_MAX)
6418 return -1;
6419
6420 regno = regbase + slotno;
6421 break;
6422
6423 case MODE_RANDOM:
6424 if (mode == VOIDmode)
6425 /* MODE is VOIDmode when generating the actual call. */
6426 return -1;
6427
6428 gcc_assert (mode == BLKmode);
6429
6430 if (TARGET_ARCH32
6431 || !type
6432 || (TREE_CODE (type) != RECORD_TYPE
6433 && TREE_CODE (type) != VECTOR_TYPE))
6434 {
6435 /* If all arg slots are filled, then must pass on stack. */
6436 if (slotno >= SPARC_INT_ARG_MAX)
6437 return -1;
6438
6439 regno = regbase + slotno;
6440 }
6441 else /* TARGET_ARCH64 && type */
6442 {
6443 /* If all arg slots are filled, then must pass on stack. */
6444 if (slotno >= SPARC_FP_ARG_MAX)
6445 return -1;
6446
6447 if (TREE_CODE (type) == RECORD_TYPE)
6448 {
6449 classify_data_t data = { false, false };
6450 traverse_record_type<classify_data_t, classify_registers>
6451 (type, named, &data);
6452
6453 if (data.fp_regs)
6454 {
6455 /* If all FP slots are filled except for the last one and
6456 there is no FP field in the first word, then must pass
6457 on stack. */
6458 if (slotno >= SPARC_FP_ARG_MAX - 1
6459 && !data.fp_regs_in_first_word)
6460 return -1;
6461 }
6462 else
6463 {
6464 /* If all int slots are filled, then must pass on stack. */
6465 if (slotno >= SPARC_INT_ARG_MAX)
6466 return -1;
6467 }
6468 }
6469
6470 /* PREGNO isn't set since both int and FP regs can be used. */
6471 return slotno;
6472 }
6473 break;
6474
6475 default :
6476 gcc_unreachable ();
6477 }
6478
6479 *pregno = regno;
6480 return slotno;
6481 }
6482
6483 /* Handle recursive register counting/assigning for structure layout. */
6484
6485 typedef struct
6486 {
6487 int slotno; /* slot number of the argument. */
6488 int regbase; /* regno of the base register. */
6489 int intoffset; /* offset of the first pending integer field. */
6490 int nregs; /* number of words passed in registers. */
6491 bool stack; /* true if part of the argument is on the stack. */
6492 rtx ret; /* return expression being built. */
6493 } assign_data_t;
6494
6495 /* A subroutine of function_arg_record_value. Compute the number of integer
6496 registers to be assigned between PARMS->intoffset and BITPOS. Return
6497 true if at least one integer register is assigned or false otherwise. */
6498
6499 static bool
compute_int_layout(HOST_WIDE_INT bitpos,assign_data_t * data,int * pnregs)6500 compute_int_layout (HOST_WIDE_INT bitpos, assign_data_t *data, int *pnregs)
6501 {
6502 if (data->intoffset < 0)
6503 return false;
6504
6505 const int intoffset = data->intoffset;
6506 data->intoffset = -1;
6507
6508 const int this_slotno = data->slotno + intoffset / BITS_PER_WORD;
6509 const unsigned int startbit = ROUND_DOWN (intoffset, BITS_PER_WORD);
6510 const unsigned int endbit = ROUND_UP (bitpos, BITS_PER_WORD);
6511 int nregs = (endbit - startbit) / BITS_PER_WORD;
6512
6513 if (nregs > 0 && nregs > SPARC_INT_ARG_MAX - this_slotno)
6514 {
6515 nregs = SPARC_INT_ARG_MAX - this_slotno;
6516
6517 /* We need to pass this field (partly) on the stack. */
6518 data->stack = 1;
6519 }
6520
6521 if (nregs <= 0)
6522 return false;
6523
6524 *pnregs = nregs;
6525 return true;
6526 }
6527
6528 /* A subroutine of function_arg_record_value. Compute the number and the mode
6529 of the FP registers to be assigned for FIELD. Return true if at least one
6530 FP register is assigned or false otherwise. */
6531
6532 static bool
compute_fp_layout(const_tree field,HOST_WIDE_INT bitpos,assign_data_t * data,int * pnregs,machine_mode * pmode)6533 compute_fp_layout (const_tree field, HOST_WIDE_INT bitpos,
6534 assign_data_t *data,
6535 int *pnregs, machine_mode *pmode)
6536 {
6537 const int this_slotno = data->slotno + bitpos / BITS_PER_WORD;
6538 machine_mode mode = DECL_MODE (field);
6539 int nregs, nslots;
6540
6541 /* Slots are counted as words while regs are counted as having the size of
6542 the (inner) mode. */
6543 if (TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE && mode == BLKmode)
6544 {
6545 mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (field)));
6546 nregs = TYPE_VECTOR_SUBPARTS (TREE_TYPE (field));
6547 }
6548 else if (TREE_CODE (TREE_TYPE (field)) == COMPLEX_TYPE)
6549 {
6550 mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (field)));
6551 nregs = 2;
6552 }
6553 else
6554 nregs = 1;
6555
6556 nslots = CEIL_NWORDS (nregs * GET_MODE_SIZE (mode));
6557
6558 if (nslots > SPARC_FP_ARG_MAX - this_slotno)
6559 {
6560 nslots = SPARC_FP_ARG_MAX - this_slotno;
6561 nregs = (nslots * UNITS_PER_WORD) / GET_MODE_SIZE (mode);
6562
6563 /* We need to pass this field (partly) on the stack. */
6564 data->stack = 1;
6565
6566 if (nregs <= 0)
6567 return false;
6568 }
6569
6570 *pnregs = nregs;
6571 *pmode = mode;
6572 return true;
6573 }
6574
6575 /* A subroutine of function_arg_record_value. Count the number of registers
6576 to be assigned for FIELD and between PARMS->intoffset and BITPOS. */
6577
6578 inline void
count_registers(const_tree field,HOST_WIDE_INT bitpos,bool fp,assign_data_t * data)6579 count_registers (const_tree field, HOST_WIDE_INT bitpos, bool fp,
6580 assign_data_t *data)
6581 {
6582 if (fp)
6583 {
6584 int nregs;
6585 machine_mode mode;
6586
6587 if (compute_int_layout (bitpos, data, &nregs))
6588 data->nregs += nregs;
6589
6590 if (compute_fp_layout (field, bitpos, data, &nregs, &mode))
6591 data->nregs += nregs;
6592 }
6593 else
6594 {
6595 if (data->intoffset < 0)
6596 data->intoffset = bitpos;
6597 }
6598 }
6599
6600 /* A subroutine of function_arg_record_value. Assign the bits of the
6601 structure between PARMS->intoffset and BITPOS to integer registers. */
6602
6603 static void
assign_int_registers(HOST_WIDE_INT bitpos,assign_data_t * data)6604 assign_int_registers (HOST_WIDE_INT bitpos, assign_data_t *data)
6605 {
6606 int intoffset = data->intoffset;
6607 machine_mode mode;
6608 int nregs;
6609
6610 if (!compute_int_layout (bitpos, data, &nregs))
6611 return;
6612
6613 /* If this is the trailing part of a word, only load that much into
6614 the register. Otherwise load the whole register. Note that in
6615 the latter case we may pick up unwanted bits. It's not a problem
6616 at the moment but may wish to revisit. */
6617 if (intoffset % BITS_PER_WORD != 0)
6618 mode = smallest_mode_for_size (BITS_PER_WORD - intoffset % BITS_PER_WORD,
6619 MODE_INT);
6620 else
6621 mode = word_mode;
6622
6623 const int this_slotno = data->slotno + intoffset / BITS_PER_WORD;
6624 unsigned int regno = data->regbase + this_slotno;
6625 intoffset /= BITS_PER_UNIT;
6626
6627 do
6628 {
6629 rtx reg = gen_rtx_REG (mode, regno);
6630 XVECEXP (data->ret, 0, data->stack + data->nregs)
6631 = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (intoffset));
6632 data->nregs += 1;
6633 mode = word_mode;
6634 regno += 1;
6635 intoffset = (intoffset | (UNITS_PER_WORD - 1)) + 1;
6636 }
6637 while (--nregs > 0);
6638 }
6639
6640 /* A subroutine of function_arg_record_value. Assign FIELD at position
6641 BITPOS to FP registers. */
6642
6643 static void
assign_fp_registers(const_tree field,HOST_WIDE_INT bitpos,assign_data_t * data)6644 assign_fp_registers (const_tree field, HOST_WIDE_INT bitpos,
6645 assign_data_t *data)
6646 {
6647 int nregs;
6648 machine_mode mode;
6649
6650 if (!compute_fp_layout (field, bitpos, data, &nregs, &mode))
6651 return;
6652
6653 const int this_slotno = data->slotno + bitpos / BITS_PER_WORD;
6654 int regno = SPARC_FP_ARG_FIRST + this_slotno * 2;
6655 if (GET_MODE_SIZE (mode) <= 4 && (bitpos & 32) != 0)
6656 regno++;
6657 int pos = bitpos / BITS_PER_UNIT;
6658
6659 do
6660 {
6661 rtx reg = gen_rtx_REG (mode, regno);
6662 XVECEXP (data->ret, 0, data->stack + data->nregs)
6663 = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (pos));
6664 data->nregs += 1;
6665 regno += GET_MODE_SIZE (mode) / 4;
6666 pos += GET_MODE_SIZE (mode);
6667 }
6668 while (--nregs > 0);
6669 }
6670
6671 /* A subroutine of function_arg_record_value. Assign FIELD and the bits of
6672 the structure between PARMS->intoffset and BITPOS to registers. */
6673
6674 inline void
assign_registers(const_tree field,HOST_WIDE_INT bitpos,bool fp,assign_data_t * data)6675 assign_registers (const_tree field, HOST_WIDE_INT bitpos, bool fp,
6676 assign_data_t *data)
6677 {
6678 if (fp)
6679 {
6680 assign_int_registers (bitpos, data);
6681
6682 assign_fp_registers (field, bitpos, data);
6683 }
6684 else
6685 {
6686 if (data->intoffset < 0)
6687 data->intoffset = bitpos;
6688 }
6689 }
6690
6691 /* Used by function_arg and sparc_function_value_1 to implement the complex
6692 conventions of the 64-bit ABI for passing and returning structures.
6693 Return an expression valid as a return value for the FUNCTION_ARG
6694 and TARGET_FUNCTION_VALUE.
6695
6696 TYPE is the data type of the argument (as a tree).
6697 This is null for libcalls where that information may
6698 not be available.
6699 MODE is the argument's machine mode.
6700 SLOTNO is the index number of the argument's slot in the parameter array.
6701 NAMED is true if this argument is a named parameter
6702 (otherwise it is an extra parameter matching an ellipsis).
6703 REGBASE is the regno of the base register for the parameter array. */
6704
6705 static rtx
function_arg_record_value(const_tree type,machine_mode mode,int slotno,bool named,int regbase)6706 function_arg_record_value (const_tree type, machine_mode mode,
6707 int slotno, bool named, int regbase)
6708 {
6709 HOST_WIDE_INT typesize = int_size_in_bytes (type);
6710 assign_data_t data;
6711 int nregs;
6712
6713 data.slotno = slotno;
6714 data.regbase = regbase;
6715
6716 /* Count how many registers we need. */
6717 data.nregs = 0;
6718 data.intoffset = 0;
6719 data.stack = false;
6720 traverse_record_type<assign_data_t, count_registers> (type, named, &data);
6721
6722 /* Take into account pending integer fields. */
6723 if (compute_int_layout (typesize * BITS_PER_UNIT, &data, &nregs))
6724 data.nregs += nregs;
6725
6726 /* Allocate the vector and handle some annoying special cases. */
6727 nregs = data.nregs;
6728
6729 if (nregs == 0)
6730 {
6731 /* ??? Empty structure has no value? Duh? */
6732 if (typesize <= 0)
6733 {
6734 /* Though there's nothing really to store, return a word register
6735 anyway so the rest of gcc doesn't go nuts. Returning a PARALLEL
6736 leads to breakage due to the fact that there are zero bytes to
6737 load. */
6738 return gen_rtx_REG (mode, regbase);
6739 }
6740
6741 /* ??? C++ has structures with no fields, and yet a size. Give up
6742 for now and pass everything back in integer registers. */
6743 nregs = (typesize + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6744 if (nregs + slotno > SPARC_INT_ARG_MAX)
6745 nregs = SPARC_INT_ARG_MAX - slotno;
6746 }
6747
6748 gcc_assert (nregs > 0);
6749
6750 data.ret = gen_rtx_PARALLEL (mode, rtvec_alloc (data.stack + nregs));
6751
6752 /* If at least one field must be passed on the stack, generate
6753 (parallel [(expr_list (nil) ...) ...]) so that all fields will
6754 also be passed on the stack. We can't do much better because the
6755 semantics of TARGET_ARG_PARTIAL_BYTES doesn't handle the case
6756 of structures for which the fields passed exclusively in registers
6757 are not at the beginning of the structure. */
6758 if (data.stack)
6759 XVECEXP (data.ret, 0, 0)
6760 = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
6761
6762 /* Assign the registers. */
6763 data.nregs = 0;
6764 data.intoffset = 0;
6765 traverse_record_type<assign_data_t, assign_registers> (type, named, &data);
6766
6767 /* Assign pending integer fields. */
6768 assign_int_registers (typesize * BITS_PER_UNIT, &data);
6769
6770 gcc_assert (data.nregs == nregs);
6771
6772 return data.ret;
6773 }
6774
6775 /* Used by function_arg and sparc_function_value_1 to implement the conventions
6776 of the 64-bit ABI for passing and returning unions.
6777 Return an expression valid as a return value for the FUNCTION_ARG
6778 and TARGET_FUNCTION_VALUE.
6779
6780 SIZE is the size in bytes of the union.
6781 MODE is the argument's machine mode.
6782 REGNO is the hard register the union will be passed in. */
6783
6784 static rtx
function_arg_union_value(int size,machine_mode mode,int slotno,int regno)6785 function_arg_union_value (int size, machine_mode mode, int slotno,
6786 int regno)
6787 {
6788 int nwords = CEIL_NWORDS (size), i;
6789 rtx regs;
6790
6791 /* See comment in previous function for empty structures. */
6792 if (nwords == 0)
6793 return gen_rtx_REG (mode, regno);
6794
6795 if (slotno == SPARC_INT_ARG_MAX - 1)
6796 nwords = 1;
6797
6798 regs = gen_rtx_PARALLEL (mode, rtvec_alloc (nwords));
6799
6800 for (i = 0; i < nwords; i++)
6801 {
6802 /* Unions are passed left-justified. */
6803 XVECEXP (regs, 0, i)
6804 = gen_rtx_EXPR_LIST (VOIDmode,
6805 gen_rtx_REG (word_mode, regno),
6806 GEN_INT (UNITS_PER_WORD * i));
6807 regno++;
6808 }
6809
6810 return regs;
6811 }
6812
6813 /* Used by function_arg and sparc_function_value_1 to implement the conventions
6814 for passing and returning BLKmode vectors.
6815 Return an expression valid as a return value for the FUNCTION_ARG
6816 and TARGET_FUNCTION_VALUE.
6817
6818 SIZE is the size in bytes of the vector.
6819 REGNO is the FP hard register the vector will be passed in. */
6820
6821 static rtx
function_arg_vector_value(int size,int regno)6822 function_arg_vector_value (int size, int regno)
6823 {
6824 const int nregs = MAX (1, size / 8);
6825 rtx regs = gen_rtx_PARALLEL (BLKmode, rtvec_alloc (nregs));
6826
6827 if (size < 8)
6828 XVECEXP (regs, 0, 0)
6829 = gen_rtx_EXPR_LIST (VOIDmode,
6830 gen_rtx_REG (SImode, regno),
6831 const0_rtx);
6832 else
6833 for (int i = 0; i < nregs; i++)
6834 XVECEXP (regs, 0, i)
6835 = gen_rtx_EXPR_LIST (VOIDmode,
6836 gen_rtx_REG (DImode, regno + 2*i),
6837 GEN_INT (i*8));
6838
6839 return regs;
6840 }
6841
6842 /* Determine where to put an argument to a function.
6843 Value is zero to push the argument on the stack,
6844 or a hard register in which to store the argument.
6845
6846 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6847 the preceding args and about the function being called.
6848 MODE is the argument's machine mode.
6849 TYPE is the data type of the argument (as a tree).
6850 This is null for libcalls where that information may
6851 not be available.
6852 NAMED is true if this argument is a named parameter
6853 (otherwise it is an extra parameter matching an ellipsis).
6854 INCOMING_P is false for TARGET_FUNCTION_ARG, true for
6855 TARGET_FUNCTION_INCOMING_ARG. */
6856
6857 static rtx
sparc_function_arg_1(cumulative_args_t cum_v,machine_mode mode,const_tree type,bool named,bool incoming)6858 sparc_function_arg_1 (cumulative_args_t cum_v, machine_mode mode,
6859 const_tree type, bool named, bool incoming)
6860 {
6861 const CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
6862
6863 int regbase = (incoming
6864 ? SPARC_INCOMING_INT_ARG_FIRST
6865 : SPARC_OUTGOING_INT_ARG_FIRST);
6866 int slotno, regno, padding;
6867 enum mode_class mclass = GET_MODE_CLASS (mode);
6868
6869 slotno = function_arg_slotno (cum, mode, type, named, incoming,
6870 ®no, &padding);
6871 if (slotno == -1)
6872 return 0;
6873
6874 /* Vector types deserve special treatment because they are polymorphic wrt
6875 their mode, depending upon whether VIS instructions are enabled. */
6876 if (type && TREE_CODE (type) == VECTOR_TYPE)
6877 {
6878 HOST_WIDE_INT size = int_size_in_bytes (type);
6879 gcc_assert ((TARGET_ARCH32 && size <= 8)
6880 || (TARGET_ARCH64 && size <= 16));
6881
6882 if (mode == BLKmode)
6883 return function_arg_vector_value (size, SPARC_FP_ARG_FIRST + 2*slotno);
6884
6885 mclass = MODE_FLOAT;
6886 }
6887
6888 if (TARGET_ARCH32)
6889 return gen_rtx_REG (mode, regno);
6890
6891 /* Structures up to 16 bytes in size are passed in arg slots on the stack
6892 and are promoted to registers if possible. */
6893 if (type && TREE_CODE (type) == RECORD_TYPE)
6894 {
6895 HOST_WIDE_INT size = int_size_in_bytes (type);
6896 gcc_assert (size <= 16);
6897
6898 return function_arg_record_value (type, mode, slotno, named, regbase);
6899 }
6900
6901 /* Unions up to 16 bytes in size are passed in integer registers. */
6902 else if (type && TREE_CODE (type) == UNION_TYPE)
6903 {
6904 HOST_WIDE_INT size = int_size_in_bytes (type);
6905 gcc_assert (size <= 16);
6906
6907 return function_arg_union_value (size, mode, slotno, regno);
6908 }
6909
6910 /* v9 fp args in reg slots beyond the int reg slots get passed in regs
6911 but also have the slot allocated for them.
6912 If no prototype is in scope fp values in register slots get passed
6913 in two places, either fp regs and int regs or fp regs and memory. */
6914 else if ((mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT)
6915 && SPARC_FP_REG_P (regno))
6916 {
6917 rtx reg = gen_rtx_REG (mode, regno);
6918 if (cum->prototype_p || cum->libcall_p)
6919 return reg;
6920 else
6921 {
6922 rtx v0, v1;
6923
6924 if ((regno - SPARC_FP_ARG_FIRST) < SPARC_INT_ARG_MAX * 2)
6925 {
6926 int intreg;
6927
6928 /* On incoming, we don't need to know that the value
6929 is passed in %f0 and %i0, and it confuses other parts
6930 causing needless spillage even on the simplest cases. */
6931 if (incoming)
6932 return reg;
6933
6934 intreg = (SPARC_OUTGOING_INT_ARG_FIRST
6935 + (regno - SPARC_FP_ARG_FIRST) / 2);
6936
6937 v0 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx);
6938 v1 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_REG (mode, intreg),
6939 const0_rtx);
6940 return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1));
6941 }
6942 else
6943 {
6944 v0 = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
6945 v1 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx);
6946 return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1));
6947 }
6948 }
6949 }
6950
6951 /* All other aggregate types are passed in an integer register in a mode
6952 corresponding to the size of the type. */
6953 else if (type && AGGREGATE_TYPE_P (type))
6954 {
6955 HOST_WIDE_INT size = int_size_in_bytes (type);
6956 gcc_assert (size <= 16);
6957
6958 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
6959 }
6960
6961 return gen_rtx_REG (mode, regno);
6962 }
6963
6964 /* Handle the TARGET_FUNCTION_ARG target hook. */
6965
6966 static rtx
sparc_function_arg(cumulative_args_t cum,machine_mode mode,const_tree type,bool named)6967 sparc_function_arg (cumulative_args_t cum, machine_mode mode,
6968 const_tree type, bool named)
6969 {
6970 return sparc_function_arg_1 (cum, mode, type, named, false);
6971 }
6972
6973 /* Handle the TARGET_FUNCTION_INCOMING_ARG target hook. */
6974
6975 static rtx
sparc_function_incoming_arg(cumulative_args_t cum,machine_mode mode,const_tree type,bool named)6976 sparc_function_incoming_arg (cumulative_args_t cum, machine_mode mode,
6977 const_tree type, bool named)
6978 {
6979 return sparc_function_arg_1 (cum, mode, type, named, true);
6980 }
6981
6982 /* For sparc64, objects requiring 16 byte alignment are passed that way. */
6983
6984 static unsigned int
sparc_function_arg_boundary(machine_mode mode,const_tree type)6985 sparc_function_arg_boundary (machine_mode mode, const_tree type)
6986 {
6987 return ((TARGET_ARCH64
6988 && (GET_MODE_ALIGNMENT (mode) == 128
6989 || (type && TYPE_ALIGN (type) == 128)))
6990 ? 128
6991 : PARM_BOUNDARY);
6992 }
6993
6994 /* For an arg passed partly in registers and partly in memory,
6995 this is the number of bytes of registers used.
6996 For args passed entirely in registers or entirely in memory, zero.
6997
6998 Any arg that starts in the first 6 regs but won't entirely fit in them
6999 needs partial registers on v8. On v9, structures with integer
7000 values in arg slots 5,6 will be passed in %o5 and SP+176, and complex fp
7001 values that begin in the last fp reg [where "last fp reg" varies with the
7002 mode] will be split between that reg and memory. */
7003
7004 static int
sparc_arg_partial_bytes(cumulative_args_t cum,machine_mode mode,tree type,bool named)7005 sparc_arg_partial_bytes (cumulative_args_t cum, machine_mode mode,
7006 tree type, bool named)
7007 {
7008 int slotno, regno, padding;
7009
7010 /* We pass false for incoming here, it doesn't matter. */
7011 slotno = function_arg_slotno (get_cumulative_args (cum), mode, type, named,
7012 false, ®no, &padding);
7013
7014 if (slotno == -1)
7015 return 0;
7016
7017 if (TARGET_ARCH32)
7018 {
7019 if ((slotno + (mode == BLKmode
7020 ? CEIL_NWORDS (int_size_in_bytes (type))
7021 : CEIL_NWORDS (GET_MODE_SIZE (mode))))
7022 > SPARC_INT_ARG_MAX)
7023 return (SPARC_INT_ARG_MAX - slotno) * UNITS_PER_WORD;
7024 }
7025 else
7026 {
7027 /* We are guaranteed by pass_by_reference that the size of the
7028 argument is not greater than 16 bytes, so we only need to return
7029 one word if the argument is partially passed in registers. */
7030
7031 if (type && AGGREGATE_TYPE_P (type))
7032 {
7033 int size = int_size_in_bytes (type);
7034
7035 if (size > UNITS_PER_WORD
7036 && (slotno == SPARC_INT_ARG_MAX - 1
7037 || slotno == SPARC_FP_ARG_MAX - 1))
7038 return UNITS_PER_WORD;
7039 }
7040 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_INT
7041 || (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
7042 && ! (TARGET_FPU && named)))
7043 {
7044 /* The complex types are passed as packed types. */
7045 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD
7046 && slotno == SPARC_INT_ARG_MAX - 1)
7047 return UNITS_PER_WORD;
7048 }
7049 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
7050 {
7051 if ((slotno + GET_MODE_SIZE (mode) / UNITS_PER_WORD)
7052 > SPARC_FP_ARG_MAX)
7053 return UNITS_PER_WORD;
7054 }
7055 }
7056
7057 return 0;
7058 }
7059
7060 /* Handle the TARGET_PASS_BY_REFERENCE target hook.
7061 Specify whether to pass the argument by reference. */
7062
7063 static bool
sparc_pass_by_reference(cumulative_args_t cum ATTRIBUTE_UNUSED,machine_mode mode,const_tree type,bool named ATTRIBUTE_UNUSED)7064 sparc_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
7065 machine_mode mode, const_tree type,
7066 bool named ATTRIBUTE_UNUSED)
7067 {
7068 if (TARGET_ARCH32)
7069 /* Original SPARC 32-bit ABI says that structures and unions,
7070 and quad-precision floats are passed by reference. For Pascal,
7071 also pass arrays by reference. All other base types are passed
7072 in registers.
7073
7074 Extended ABI (as implemented by the Sun compiler) says that all
7075 complex floats are passed by reference. Pass complex integers
7076 in registers up to 8 bytes. More generally, enforce the 2-word
7077 cap for passing arguments in registers.
7078
7079 Vector ABI (as implemented by the Sun VIS SDK) says that vector
7080 integers are passed like floats of the same size, that is in
7081 registers up to 8 bytes. Pass all vector floats by reference
7082 like structure and unions. */
7083 return ((type && (AGGREGATE_TYPE_P (type) || VECTOR_FLOAT_TYPE_P (type)))
7084 || mode == SCmode
7085 /* Catch CDImode, TFmode, DCmode and TCmode. */
7086 || GET_MODE_SIZE (mode) > 8
7087 || (type
7088 && TREE_CODE (type) == VECTOR_TYPE
7089 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8));
7090 else
7091 /* Original SPARC 64-bit ABI says that structures and unions
7092 smaller than 16 bytes are passed in registers, as well as
7093 all other base types.
7094
7095 Extended ABI (as implemented by the Sun compiler) says that
7096 complex floats are passed in registers up to 16 bytes. Pass
7097 all complex integers in registers up to 16 bytes. More generally,
7098 enforce the 2-word cap for passing arguments in registers.
7099
7100 Vector ABI (as implemented by the Sun VIS SDK) says that vector
7101 integers are passed like floats of the same size, that is in
7102 registers (up to 16 bytes). Pass all vector floats like structure
7103 and unions. */
7104 return ((type
7105 && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == VECTOR_TYPE)
7106 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 16)
7107 /* Catch CTImode and TCmode. */
7108 || GET_MODE_SIZE (mode) > 16);
7109 }
7110
7111 /* Handle the TARGET_FUNCTION_ARG_ADVANCE hook.
7112 Update the data in CUM to advance over an argument
7113 of mode MODE and data type TYPE.
7114 TYPE is null for libcalls where that information may not be available. */
7115
7116 static void
sparc_function_arg_advance(cumulative_args_t cum_v,machine_mode mode,const_tree type,bool named)7117 sparc_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
7118 const_tree type, bool named)
7119 {
7120 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7121 int regno, padding;
7122
7123 /* We pass false for incoming here, it doesn't matter. */
7124 function_arg_slotno (cum, mode, type, named, false, ®no, &padding);
7125
7126 /* If argument requires leading padding, add it. */
7127 cum->words += padding;
7128
7129 if (TARGET_ARCH32)
7130 cum->words += (mode == BLKmode
7131 ? CEIL_NWORDS (int_size_in_bytes (type))
7132 : CEIL_NWORDS (GET_MODE_SIZE (mode)));
7133 else
7134 {
7135 if (type && AGGREGATE_TYPE_P (type))
7136 {
7137 int size = int_size_in_bytes (type);
7138
7139 if (size <= 8)
7140 ++cum->words;
7141 else if (size <= 16)
7142 cum->words += 2;
7143 else /* passed by reference */
7144 ++cum->words;
7145 }
7146 else
7147 cum->words += (mode == BLKmode
7148 ? CEIL_NWORDS (int_size_in_bytes (type))
7149 : CEIL_NWORDS (GET_MODE_SIZE (mode)));
7150 }
7151 }
7152
7153 /* Handle the FUNCTION_ARG_PADDING macro.
7154 For the 64 bit ABI structs are always stored left shifted in their
7155 argument slot. */
7156
7157 enum direction
function_arg_padding(machine_mode mode,const_tree type)7158 function_arg_padding (machine_mode mode, const_tree type)
7159 {
7160 if (TARGET_ARCH64 && type && AGGREGATE_TYPE_P (type))
7161 return upward;
7162
7163 /* Fall back to the default. */
7164 return DEFAULT_FUNCTION_ARG_PADDING (mode, type);
7165 }
7166
7167 /* Handle the TARGET_RETURN_IN_MEMORY target hook.
7168 Specify whether to return the return value in memory. */
7169
7170 static bool
sparc_return_in_memory(const_tree type,const_tree fntype ATTRIBUTE_UNUSED)7171 sparc_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
7172 {
7173 if (TARGET_ARCH32)
7174 /* Original SPARC 32-bit ABI says that structures and unions,
7175 and quad-precision floats are returned in memory. All other
7176 base types are returned in registers.
7177
7178 Extended ABI (as implemented by the Sun compiler) says that
7179 all complex floats are returned in registers (8 FP registers
7180 at most for '_Complex long double'). Return all complex integers
7181 in registers (4 at most for '_Complex long long').
7182
7183 Vector ABI (as implemented by the Sun VIS SDK) says that vector
7184 integers are returned like floats of the same size, that is in
7185 registers up to 8 bytes and in memory otherwise. Return all
7186 vector floats in memory like structure and unions; note that
7187 they always have BLKmode like the latter. */
7188 return (TYPE_MODE (type) == BLKmode
7189 || TYPE_MODE (type) == TFmode
7190 || (TREE_CODE (type) == VECTOR_TYPE
7191 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8));
7192 else
7193 /* Original SPARC 64-bit ABI says that structures and unions
7194 smaller than 32 bytes are returned in registers, as well as
7195 all other base types.
7196
7197 Extended ABI (as implemented by the Sun compiler) says that all
7198 complex floats are returned in registers (8 FP registers at most
7199 for '_Complex long double'). Return all complex integers in
7200 registers (4 at most for '_Complex TItype').
7201
7202 Vector ABI (as implemented by the Sun VIS SDK) says that vector
7203 integers are returned like floats of the same size, that is in
7204 registers. Return all vector floats like structure and unions;
7205 note that they always have BLKmode like the latter. */
7206 return (TYPE_MODE (type) == BLKmode
7207 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 32);
7208 }
7209
7210 /* Handle the TARGET_STRUCT_VALUE target hook.
7211 Return where to find the structure return value address. */
7212
7213 static rtx
sparc_struct_value_rtx(tree fndecl,int incoming)7214 sparc_struct_value_rtx (tree fndecl, int incoming)
7215 {
7216 if (TARGET_ARCH64)
7217 return 0;
7218 else
7219 {
7220 rtx mem;
7221
7222 if (incoming)
7223 mem = gen_frame_mem (Pmode, plus_constant (Pmode, frame_pointer_rtx,
7224 STRUCT_VALUE_OFFSET));
7225 else
7226 mem = gen_frame_mem (Pmode, plus_constant (Pmode, stack_pointer_rtx,
7227 STRUCT_VALUE_OFFSET));
7228
7229 /* Only follow the SPARC ABI for fixed-size structure returns.
7230 Variable size structure returns are handled per the normal
7231 procedures in GCC. This is enabled by -mstd-struct-return */
7232 if (incoming == 2
7233 && sparc_std_struct_return
7234 && TYPE_SIZE_UNIT (TREE_TYPE (fndecl))
7235 && TREE_CODE (TYPE_SIZE_UNIT (TREE_TYPE (fndecl))) == INTEGER_CST)
7236 {
7237 /* We must check and adjust the return address, as it is optional
7238 as to whether the return object is really provided. */
7239 rtx ret_reg = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
7240 rtx scratch = gen_reg_rtx (SImode);
7241 rtx_code_label *endlab = gen_label_rtx ();
7242
7243 /* Calculate the return object size. */
7244 tree size = TYPE_SIZE_UNIT (TREE_TYPE (fndecl));
7245 rtx size_rtx = GEN_INT (TREE_INT_CST_LOW (size) & 0xfff);
7246 /* Construct a temporary return value. */
7247 rtx temp_val
7248 = assign_stack_local (Pmode, TREE_INT_CST_LOW (size), 0);
7249
7250 /* Implement SPARC 32-bit psABI callee return struct checking:
7251
7252 Fetch the instruction where we will return to and see if
7253 it's an unimp instruction (the most significant 10 bits
7254 will be zero). */
7255 emit_move_insn (scratch, gen_rtx_MEM (SImode,
7256 plus_constant (Pmode,
7257 ret_reg, 8)));
7258 /* Assume the size is valid and pre-adjust. */
7259 emit_insn (gen_add3_insn (ret_reg, ret_reg, GEN_INT (4)));
7260 emit_cmp_and_jump_insns (scratch, size_rtx, EQ, const0_rtx, SImode,
7261 0, endlab);
7262 emit_insn (gen_sub3_insn (ret_reg, ret_reg, GEN_INT (4)));
7263 /* Write the address of the memory pointed to by temp_val into
7264 the memory pointed to by mem. */
7265 emit_move_insn (mem, XEXP (temp_val, 0));
7266 emit_label (endlab);
7267 }
7268
7269 return mem;
7270 }
7271 }
7272
7273 /* Handle TARGET_FUNCTION_VALUE, and TARGET_LIBCALL_VALUE target hook.
7274 For v9, function return values are subject to the same rules as arguments,
7275 except that up to 32 bytes may be returned in registers. */
7276
7277 static rtx
sparc_function_value_1(const_tree type,machine_mode mode,bool outgoing)7278 sparc_function_value_1 (const_tree type, machine_mode mode,
7279 bool outgoing)
7280 {
7281 /* Beware that the two values are swapped here wrt function_arg. */
7282 int regbase = (outgoing
7283 ? SPARC_INCOMING_INT_ARG_FIRST
7284 : SPARC_OUTGOING_INT_ARG_FIRST);
7285 enum mode_class mclass = GET_MODE_CLASS (mode);
7286 int regno;
7287
7288 /* Vector types deserve special treatment because they are polymorphic wrt
7289 their mode, depending upon whether VIS instructions are enabled. */
7290 if (type && TREE_CODE (type) == VECTOR_TYPE)
7291 {
7292 HOST_WIDE_INT size = int_size_in_bytes (type);
7293 gcc_assert ((TARGET_ARCH32 && size <= 8)
7294 || (TARGET_ARCH64 && size <= 32));
7295
7296 if (mode == BLKmode)
7297 return function_arg_vector_value (size, SPARC_FP_ARG_FIRST);
7298
7299 mclass = MODE_FLOAT;
7300 }
7301
7302 if (TARGET_ARCH64 && type)
7303 {
7304 /* Structures up to 32 bytes in size are returned in registers. */
7305 if (TREE_CODE (type) == RECORD_TYPE)
7306 {
7307 HOST_WIDE_INT size = int_size_in_bytes (type);
7308 gcc_assert (size <= 32);
7309
7310 return function_arg_record_value (type, mode, 0, 1, regbase);
7311 }
7312
7313 /* Unions up to 32 bytes in size are returned in integer registers. */
7314 else if (TREE_CODE (type) == UNION_TYPE)
7315 {
7316 HOST_WIDE_INT size = int_size_in_bytes (type);
7317 gcc_assert (size <= 32);
7318
7319 return function_arg_union_value (size, mode, 0, regbase);
7320 }
7321
7322 /* Objects that require it are returned in FP registers. */
7323 else if (mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT)
7324 ;
7325
7326 /* All other aggregate types are returned in an integer register in a
7327 mode corresponding to the size of the type. */
7328 else if (AGGREGATE_TYPE_P (type))
7329 {
7330 /* All other aggregate types are passed in an integer register
7331 in a mode corresponding to the size of the type. */
7332 HOST_WIDE_INT size = int_size_in_bytes (type);
7333 gcc_assert (size <= 32);
7334
7335 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
7336
7337 /* ??? We probably should have made the same ABI change in
7338 3.4.0 as the one we made for unions. The latter was
7339 required by the SCD though, while the former is not
7340 specified, so we favored compatibility and efficiency.
7341
7342 Now we're stuck for aggregates larger than 16 bytes,
7343 because OImode vanished in the meantime. Let's not
7344 try to be unduly clever, and simply follow the ABI
7345 for unions in that case. */
7346 if (mode == BLKmode)
7347 return function_arg_union_value (size, mode, 0, regbase);
7348 else
7349 mclass = MODE_INT;
7350 }
7351
7352 /* We should only have pointer and integer types at this point. This
7353 must match sparc_promote_function_mode. */
7354 else if (mclass == MODE_INT && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
7355 mode = word_mode;
7356 }
7357
7358 /* We should only have pointer and integer types at this point, except with
7359 -freg-struct-return. This must match sparc_promote_function_mode. */
7360 else if (TARGET_ARCH32
7361 && !(type && AGGREGATE_TYPE_P (type))
7362 && mclass == MODE_INT
7363 && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
7364 mode = word_mode;
7365
7366 if ((mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT) && TARGET_FPU)
7367 regno = SPARC_FP_ARG_FIRST;
7368 else
7369 regno = regbase;
7370
7371 return gen_rtx_REG (mode, regno);
7372 }
7373
7374 /* Handle TARGET_FUNCTION_VALUE.
7375 On the SPARC, the value is found in the first "output" register, but the
7376 called function leaves it in the first "input" register. */
7377
7378 static rtx
sparc_function_value(const_tree valtype,const_tree fn_decl_or_type ATTRIBUTE_UNUSED,bool outgoing)7379 sparc_function_value (const_tree valtype,
7380 const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
7381 bool outgoing)
7382 {
7383 return sparc_function_value_1 (valtype, TYPE_MODE (valtype), outgoing);
7384 }
7385
7386 /* Handle TARGET_LIBCALL_VALUE. */
7387
7388 static rtx
sparc_libcall_value(machine_mode mode,const_rtx fun ATTRIBUTE_UNUSED)7389 sparc_libcall_value (machine_mode mode,
7390 const_rtx fun ATTRIBUTE_UNUSED)
7391 {
7392 return sparc_function_value_1 (NULL_TREE, mode, false);
7393 }
7394
7395 /* Handle FUNCTION_VALUE_REGNO_P.
7396 On the SPARC, the first "output" reg is used for integer values, and the
7397 first floating point register is used for floating point values. */
7398
7399 static bool
sparc_function_value_regno_p(const unsigned int regno)7400 sparc_function_value_regno_p (const unsigned int regno)
7401 {
7402 return (regno == 8 || (TARGET_FPU && regno == 32));
7403 }
7404
7405 /* Do what is necessary for `va_start'. We look at the current function
7406 to determine if stdarg or varargs is used and return the address of
7407 the first unnamed parameter. */
7408
7409 static rtx
sparc_builtin_saveregs(void)7410 sparc_builtin_saveregs (void)
7411 {
7412 int first_reg = crtl->args.info.words;
7413 rtx address;
7414 int regno;
7415
7416 for (regno = first_reg; regno < SPARC_INT_ARG_MAX; regno++)
7417 emit_move_insn (gen_rtx_MEM (word_mode,
7418 gen_rtx_PLUS (Pmode,
7419 frame_pointer_rtx,
7420 GEN_INT (FIRST_PARM_OFFSET (0)
7421 + (UNITS_PER_WORD
7422 * regno)))),
7423 gen_rtx_REG (word_mode,
7424 SPARC_INCOMING_INT_ARG_FIRST + regno));
7425
7426 address = gen_rtx_PLUS (Pmode,
7427 frame_pointer_rtx,
7428 GEN_INT (FIRST_PARM_OFFSET (0)
7429 + UNITS_PER_WORD * first_reg));
7430
7431 return address;
7432 }
7433
7434 /* Implement `va_start' for stdarg. */
7435
7436 static void
sparc_va_start(tree valist,rtx nextarg)7437 sparc_va_start (tree valist, rtx nextarg)
7438 {
7439 nextarg = expand_builtin_saveregs ();
7440 std_expand_builtin_va_start (valist, nextarg);
7441 }
7442
7443 /* Implement `va_arg' for stdarg. */
7444
7445 static tree
sparc_gimplify_va_arg(tree valist,tree type,gimple_seq * pre_p,gimple_seq * post_p)7446 sparc_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
7447 gimple_seq *post_p)
7448 {
7449 HOST_WIDE_INT size, rsize, align;
7450 tree addr, incr;
7451 bool indirect;
7452 tree ptrtype = build_pointer_type (type);
7453
7454 if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
7455 {
7456 indirect = true;
7457 size = rsize = UNITS_PER_WORD;
7458 align = 0;
7459 }
7460 else
7461 {
7462 indirect = false;
7463 size = int_size_in_bytes (type);
7464 rsize = ROUND_UP (size, UNITS_PER_WORD);
7465 align = 0;
7466
7467 if (TARGET_ARCH64)
7468 {
7469 /* For SPARC64, objects requiring 16-byte alignment get it. */
7470 if (TYPE_ALIGN (type) >= 2 * (unsigned) BITS_PER_WORD)
7471 align = 2 * UNITS_PER_WORD;
7472
7473 /* SPARC-V9 ABI states that structures up to 16 bytes in size
7474 are left-justified in their slots. */
7475 if (AGGREGATE_TYPE_P (type))
7476 {
7477 if (size == 0)
7478 size = rsize = UNITS_PER_WORD;
7479 else
7480 size = rsize;
7481 }
7482 }
7483 }
7484
7485 incr = valist;
7486 if (align)
7487 {
7488 incr = fold_build_pointer_plus_hwi (incr, align - 1);
7489 incr = fold_convert (sizetype, incr);
7490 incr = fold_build2 (BIT_AND_EXPR, sizetype, incr,
7491 size_int (-align));
7492 incr = fold_convert (ptr_type_node, incr);
7493 }
7494
7495 gimplify_expr (&incr, pre_p, post_p, is_gimple_val, fb_rvalue);
7496 addr = incr;
7497
7498 if (BYTES_BIG_ENDIAN && size < rsize)
7499 addr = fold_build_pointer_plus_hwi (incr, rsize - size);
7500
7501 if (indirect)
7502 {
7503 addr = fold_convert (build_pointer_type (ptrtype), addr);
7504 addr = build_va_arg_indirect_ref (addr);
7505 }
7506
7507 /* If the address isn't aligned properly for the type, we need a temporary.
7508 FIXME: This is inefficient, usually we can do this in registers. */
7509 else if (align == 0 && TYPE_ALIGN (type) > BITS_PER_WORD)
7510 {
7511 tree tmp = create_tmp_var (type, "va_arg_tmp");
7512 tree dest_addr = build_fold_addr_expr (tmp);
7513 tree copy = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
7514 3, dest_addr, addr, size_int (rsize));
7515 TREE_ADDRESSABLE (tmp) = 1;
7516 gimplify_and_add (copy, pre_p);
7517 addr = dest_addr;
7518 }
7519
7520 else
7521 addr = fold_convert (ptrtype, addr);
7522
7523 incr = fold_build_pointer_plus_hwi (incr, rsize);
7524 gimplify_assign (valist, incr, post_p);
7525
7526 return build_va_arg_indirect_ref (addr);
7527 }
7528
7529 /* Implement the TARGET_VECTOR_MODE_SUPPORTED_P target hook.
7530 Specify whether the vector mode is supported by the hardware. */
7531
7532 static bool
sparc_vector_mode_supported_p(machine_mode mode)7533 sparc_vector_mode_supported_p (machine_mode mode)
7534 {
7535 return TARGET_VIS && VECTOR_MODE_P (mode) ? true : false;
7536 }
7537
7538 /* Implement the TARGET_VECTORIZE_PREFERRED_SIMD_MODE target hook. */
7539
7540 static machine_mode
sparc_preferred_simd_mode(machine_mode mode)7541 sparc_preferred_simd_mode (machine_mode mode)
7542 {
7543 if (TARGET_VIS)
7544 switch (mode)
7545 {
7546 case SImode:
7547 return V2SImode;
7548 case HImode:
7549 return V4HImode;
7550 case QImode:
7551 return V8QImode;
7552
7553 default:;
7554 }
7555
7556 return word_mode;
7557 }
7558
7559 /* Return the string to output an unconditional branch to LABEL, which is
7560 the operand number of the label.
7561
7562 DEST is the destination insn (i.e. the label), INSN is the source. */
7563
7564 const char *
output_ubranch(rtx dest,rtx_insn * insn)7565 output_ubranch (rtx dest, rtx_insn *insn)
7566 {
7567 static char string[64];
7568 bool v9_form = false;
7569 int delta;
7570 char *p;
7571
7572 /* Even if we are trying to use cbcond for this, evaluate
7573 whether we can use V9 branches as our backup plan. */
7574
7575 delta = 5000000;
7576 if (INSN_ADDRESSES_SET_P ())
7577 delta = (INSN_ADDRESSES (INSN_UID (dest))
7578 - INSN_ADDRESSES (INSN_UID (insn)));
7579
7580 /* Leave some instructions for "slop". */
7581 if (TARGET_V9 && delta >= -260000 && delta < 260000)
7582 v9_form = true;
7583
7584 if (TARGET_CBCOND)
7585 {
7586 bool emit_nop = emit_cbcond_nop (insn);
7587 bool far = false;
7588 const char *rval;
7589
7590 if (delta < -500 || delta > 500)
7591 far = true;
7592
7593 if (far)
7594 {
7595 if (v9_form)
7596 rval = "ba,a,pt\t%%xcc, %l0";
7597 else
7598 rval = "b,a\t%l0";
7599 }
7600 else
7601 {
7602 if (emit_nop)
7603 rval = "cwbe\t%%g0, %%g0, %l0\n\tnop";
7604 else
7605 rval = "cwbe\t%%g0, %%g0, %l0";
7606 }
7607 return rval;
7608 }
7609
7610 if (v9_form)
7611 strcpy (string, "ba%*,pt\t%%xcc, ");
7612 else
7613 strcpy (string, "b%*\t");
7614
7615 p = strchr (string, '\0');
7616 *p++ = '%';
7617 *p++ = 'l';
7618 *p++ = '0';
7619 *p++ = '%';
7620 *p++ = '(';
7621 *p = '\0';
7622
7623 return string;
7624 }
7625
7626 /* Return the string to output a conditional branch to LABEL, which is
7627 the operand number of the label. OP is the conditional expression.
7628 XEXP (OP, 0) is assumed to be a condition code register (integer or
7629 floating point) and its mode specifies what kind of comparison we made.
7630
7631 DEST is the destination insn (i.e. the label), INSN is the source.
7632
7633 REVERSED is nonzero if we should reverse the sense of the comparison.
7634
7635 ANNUL is nonzero if we should generate an annulling branch. */
7636
7637 const char *
output_cbranch(rtx op,rtx dest,int label,int reversed,int annul,rtx_insn * insn)7638 output_cbranch (rtx op, rtx dest, int label, int reversed, int annul,
7639 rtx_insn *insn)
7640 {
7641 static char string[64];
7642 enum rtx_code code = GET_CODE (op);
7643 rtx cc_reg = XEXP (op, 0);
7644 machine_mode mode = GET_MODE (cc_reg);
7645 const char *labelno, *branch;
7646 int spaces = 8, far;
7647 char *p;
7648
7649 /* v9 branches are limited to +-1MB. If it is too far away,
7650 change
7651
7652 bne,pt %xcc, .LC30
7653
7654 to
7655
7656 be,pn %xcc, .+12
7657 nop
7658 ba .LC30
7659
7660 and
7661
7662 fbne,a,pn %fcc2, .LC29
7663
7664 to
7665
7666 fbe,pt %fcc2, .+16
7667 nop
7668 ba .LC29 */
7669
7670 far = TARGET_V9 && (get_attr_length (insn) >= 3);
7671 if (reversed ^ far)
7672 {
7673 /* Reversal of FP compares takes care -- an ordered compare
7674 becomes an unordered compare and vice versa. */
7675 if (mode == CCFPmode || mode == CCFPEmode)
7676 code = reverse_condition_maybe_unordered (code);
7677 else
7678 code = reverse_condition (code);
7679 }
7680
7681 /* Start by writing the branch condition. */
7682 if (mode == CCFPmode || mode == CCFPEmode)
7683 {
7684 switch (code)
7685 {
7686 case NE:
7687 branch = "fbne";
7688 break;
7689 case EQ:
7690 branch = "fbe";
7691 break;
7692 case GE:
7693 branch = "fbge";
7694 break;
7695 case GT:
7696 branch = "fbg";
7697 break;
7698 case LE:
7699 branch = "fble";
7700 break;
7701 case LT:
7702 branch = "fbl";
7703 break;
7704 case UNORDERED:
7705 branch = "fbu";
7706 break;
7707 case ORDERED:
7708 branch = "fbo";
7709 break;
7710 case UNGT:
7711 branch = "fbug";
7712 break;
7713 case UNLT:
7714 branch = "fbul";
7715 break;
7716 case UNEQ:
7717 branch = "fbue";
7718 break;
7719 case UNGE:
7720 branch = "fbuge";
7721 break;
7722 case UNLE:
7723 branch = "fbule";
7724 break;
7725 case LTGT:
7726 branch = "fblg";
7727 break;
7728
7729 default:
7730 gcc_unreachable ();
7731 }
7732
7733 /* ??? !v9: FP branches cannot be preceded by another floating point
7734 insn. Because there is currently no concept of pre-delay slots,
7735 we can fix this only by always emitting a nop before a floating
7736 point branch. */
7737
7738 string[0] = '\0';
7739 if (! TARGET_V9)
7740 strcpy (string, "nop\n\t");
7741 strcat (string, branch);
7742 }
7743 else
7744 {
7745 switch (code)
7746 {
7747 case NE:
7748 branch = "bne";
7749 break;
7750 case EQ:
7751 branch = "be";
7752 break;
7753 case GE:
7754 if (mode == CC_NOOVmode || mode == CCX_NOOVmode)
7755 branch = "bpos";
7756 else
7757 branch = "bge";
7758 break;
7759 case GT:
7760 branch = "bg";
7761 break;
7762 case LE:
7763 branch = "ble";
7764 break;
7765 case LT:
7766 if (mode == CC_NOOVmode || mode == CCX_NOOVmode)
7767 branch = "bneg";
7768 else
7769 branch = "bl";
7770 break;
7771 case GEU:
7772 branch = "bgeu";
7773 break;
7774 case GTU:
7775 branch = "bgu";
7776 break;
7777 case LEU:
7778 branch = "bleu";
7779 break;
7780 case LTU:
7781 branch = "blu";
7782 break;
7783
7784 default:
7785 gcc_unreachable ();
7786 }
7787 strcpy (string, branch);
7788 }
7789 spaces -= strlen (branch);
7790 p = strchr (string, '\0');
7791
7792 /* Now add the annulling, the label, and a possible noop. */
7793 if (annul && ! far)
7794 {
7795 strcpy (p, ",a");
7796 p += 2;
7797 spaces -= 2;
7798 }
7799
7800 if (TARGET_V9)
7801 {
7802 rtx note;
7803 int v8 = 0;
7804
7805 if (! far && insn && INSN_ADDRESSES_SET_P ())
7806 {
7807 int delta = (INSN_ADDRESSES (INSN_UID (dest))
7808 - INSN_ADDRESSES (INSN_UID (insn)));
7809 /* Leave some instructions for "slop". */
7810 if (delta < -260000 || delta >= 260000)
7811 v8 = 1;
7812 }
7813
7814 if (mode == CCFPmode || mode == CCFPEmode)
7815 {
7816 static char v9_fcc_labelno[] = "%%fccX, ";
7817 /* Set the char indicating the number of the fcc reg to use. */
7818 v9_fcc_labelno[5] = REGNO (cc_reg) - SPARC_FIRST_V9_FCC_REG + '0';
7819 labelno = v9_fcc_labelno;
7820 if (v8)
7821 {
7822 gcc_assert (REGNO (cc_reg) == SPARC_FCC_REG);
7823 labelno = "";
7824 }
7825 }
7826 else if (mode == CCXmode || mode == CCX_NOOVmode)
7827 {
7828 labelno = "%%xcc, ";
7829 gcc_assert (! v8);
7830 }
7831 else
7832 {
7833 labelno = "%%icc, ";
7834 if (v8)
7835 labelno = "";
7836 }
7837
7838 if (*labelno && insn && (note = find_reg_note (insn, REG_BR_PROB, NULL_RTX)))
7839 {
7840 strcpy (p,
7841 ((XINT (note, 0) >= REG_BR_PROB_BASE / 2) ^ far)
7842 ? ",pt" : ",pn");
7843 p += 3;
7844 spaces -= 3;
7845 }
7846 }
7847 else
7848 labelno = "";
7849
7850 if (spaces > 0)
7851 *p++ = '\t';
7852 else
7853 *p++ = ' ';
7854 strcpy (p, labelno);
7855 p = strchr (p, '\0');
7856 if (far)
7857 {
7858 strcpy (p, ".+12\n\t nop\n\tb\t");
7859 /* Skip the next insn if requested or
7860 if we know that it will be a nop. */
7861 if (annul || ! final_sequence)
7862 p[3] = '6';
7863 p += 14;
7864 }
7865 *p++ = '%';
7866 *p++ = 'l';
7867 *p++ = label + '0';
7868 *p++ = '%';
7869 *p++ = '#';
7870 *p = '\0';
7871
7872 return string;
7873 }
7874
7875 /* Emit a library call comparison between floating point X and Y.
7876 COMPARISON is the operator to compare with (EQ, NE, GT, etc).
7877 Return the new operator to be used in the comparison sequence.
7878
7879 TARGET_ARCH64 uses _Qp_* functions, which use pointers to TFmode
7880 values as arguments instead of the TFmode registers themselves,
7881 that's why we cannot call emit_float_lib_cmp. */
7882
7883 rtx
sparc_emit_float_lib_cmp(rtx x,rtx y,enum rtx_code comparison)7884 sparc_emit_float_lib_cmp (rtx x, rtx y, enum rtx_code comparison)
7885 {
7886 const char *qpfunc;
7887 rtx slot0, slot1, result, tem, tem2, libfunc;
7888 machine_mode mode;
7889 enum rtx_code new_comparison;
7890
7891 switch (comparison)
7892 {
7893 case EQ:
7894 qpfunc = (TARGET_ARCH64 ? "_Qp_feq" : "_Q_feq");
7895 break;
7896
7897 case NE:
7898 qpfunc = (TARGET_ARCH64 ? "_Qp_fne" : "_Q_fne");
7899 break;
7900
7901 case GT:
7902 qpfunc = (TARGET_ARCH64 ? "_Qp_fgt" : "_Q_fgt");
7903 break;
7904
7905 case GE:
7906 qpfunc = (TARGET_ARCH64 ? "_Qp_fge" : "_Q_fge");
7907 break;
7908
7909 case LT:
7910 qpfunc = (TARGET_ARCH64 ? "_Qp_flt" : "_Q_flt");
7911 break;
7912
7913 case LE:
7914 qpfunc = (TARGET_ARCH64 ? "_Qp_fle" : "_Q_fle");
7915 break;
7916
7917 case ORDERED:
7918 case UNORDERED:
7919 case UNGT:
7920 case UNLT:
7921 case UNEQ:
7922 case UNGE:
7923 case UNLE:
7924 case LTGT:
7925 qpfunc = (TARGET_ARCH64 ? "_Qp_cmp" : "_Q_cmp");
7926 break;
7927
7928 default:
7929 gcc_unreachable ();
7930 }
7931
7932 if (TARGET_ARCH64)
7933 {
7934 if (MEM_P (x))
7935 {
7936 tree expr = MEM_EXPR (x);
7937 if (expr)
7938 mark_addressable (expr);
7939 slot0 = x;
7940 }
7941 else
7942 {
7943 slot0 = assign_stack_temp (TFmode, GET_MODE_SIZE(TFmode));
7944 emit_move_insn (slot0, x);
7945 }
7946
7947 if (MEM_P (y))
7948 {
7949 tree expr = MEM_EXPR (y);
7950 if (expr)
7951 mark_addressable (expr);
7952 slot1 = y;
7953 }
7954 else
7955 {
7956 slot1 = assign_stack_temp (TFmode, GET_MODE_SIZE(TFmode));
7957 emit_move_insn (slot1, y);
7958 }
7959
7960 libfunc = gen_rtx_SYMBOL_REF (Pmode, qpfunc);
7961 emit_library_call (libfunc, LCT_NORMAL,
7962 DImode, 2,
7963 XEXP (slot0, 0), Pmode,
7964 XEXP (slot1, 0), Pmode);
7965 mode = DImode;
7966 }
7967 else
7968 {
7969 libfunc = gen_rtx_SYMBOL_REF (Pmode, qpfunc);
7970 emit_library_call (libfunc, LCT_NORMAL,
7971 SImode, 2,
7972 x, TFmode, y, TFmode);
7973 mode = SImode;
7974 }
7975
7976
7977 /* Immediately move the result of the libcall into a pseudo
7978 register so reload doesn't clobber the value if it needs
7979 the return register for a spill reg. */
7980 result = gen_reg_rtx (mode);
7981 emit_move_insn (result, hard_libcall_value (mode, libfunc));
7982
7983 switch (comparison)
7984 {
7985 default:
7986 return gen_rtx_NE (VOIDmode, result, const0_rtx);
7987 case ORDERED:
7988 case UNORDERED:
7989 new_comparison = (comparison == UNORDERED ? EQ : NE);
7990 return gen_rtx_fmt_ee (new_comparison, VOIDmode, result, GEN_INT(3));
7991 case UNGT:
7992 case UNGE:
7993 new_comparison = (comparison == UNGT ? GT : NE);
7994 return gen_rtx_fmt_ee (new_comparison, VOIDmode, result, const1_rtx);
7995 case UNLE:
7996 return gen_rtx_NE (VOIDmode, result, const2_rtx);
7997 case UNLT:
7998 tem = gen_reg_rtx (mode);
7999 if (TARGET_ARCH32)
8000 emit_insn (gen_andsi3 (tem, result, const1_rtx));
8001 else
8002 emit_insn (gen_anddi3 (tem, result, const1_rtx));
8003 return gen_rtx_NE (VOIDmode, tem, const0_rtx);
8004 case UNEQ:
8005 case LTGT:
8006 tem = gen_reg_rtx (mode);
8007 if (TARGET_ARCH32)
8008 emit_insn (gen_addsi3 (tem, result, const1_rtx));
8009 else
8010 emit_insn (gen_adddi3 (tem, result, const1_rtx));
8011 tem2 = gen_reg_rtx (mode);
8012 if (TARGET_ARCH32)
8013 emit_insn (gen_andsi3 (tem2, tem, const2_rtx));
8014 else
8015 emit_insn (gen_anddi3 (tem2, tem, const2_rtx));
8016 new_comparison = (comparison == UNEQ ? EQ : NE);
8017 return gen_rtx_fmt_ee (new_comparison, VOIDmode, tem2, const0_rtx);
8018 }
8019
8020 gcc_unreachable ();
8021 }
8022
8023 /* Generate an unsigned DImode to FP conversion. This is the same code
8024 optabs would emit if we didn't have TFmode patterns. */
8025
8026 void
sparc_emit_floatunsdi(rtx * operands,machine_mode mode)8027 sparc_emit_floatunsdi (rtx *operands, machine_mode mode)
8028 {
8029 rtx i0, i1, f0, in, out;
8030
8031 out = operands[0];
8032 in = force_reg (DImode, operands[1]);
8033 rtx_code_label *neglab = gen_label_rtx ();
8034 rtx_code_label *donelab = gen_label_rtx ();
8035 i0 = gen_reg_rtx (DImode);
8036 i1 = gen_reg_rtx (DImode);
8037 f0 = gen_reg_rtx (mode);
8038
8039 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, DImode, 0, neglab);
8040
8041 emit_insn (gen_rtx_SET (out, gen_rtx_FLOAT (mode, in)));
8042 emit_jump_insn (gen_jump (donelab));
8043 emit_barrier ();
8044
8045 emit_label (neglab);
8046
8047 emit_insn (gen_lshrdi3 (i0, in, const1_rtx));
8048 emit_insn (gen_anddi3 (i1, in, const1_rtx));
8049 emit_insn (gen_iordi3 (i0, i0, i1));
8050 emit_insn (gen_rtx_SET (f0, gen_rtx_FLOAT (mode, i0)));
8051 emit_insn (gen_rtx_SET (out, gen_rtx_PLUS (mode, f0, f0)));
8052
8053 emit_label (donelab);
8054 }
8055
8056 /* Generate an FP to unsigned DImode conversion. This is the same code
8057 optabs would emit if we didn't have TFmode patterns. */
8058
8059 void
sparc_emit_fixunsdi(rtx * operands,machine_mode mode)8060 sparc_emit_fixunsdi (rtx *operands, machine_mode mode)
8061 {
8062 rtx i0, i1, f0, in, out, limit;
8063
8064 out = operands[0];
8065 in = force_reg (mode, operands[1]);
8066 rtx_code_label *neglab = gen_label_rtx ();
8067 rtx_code_label *donelab = gen_label_rtx ();
8068 i0 = gen_reg_rtx (DImode);
8069 i1 = gen_reg_rtx (DImode);
8070 limit = gen_reg_rtx (mode);
8071 f0 = gen_reg_rtx (mode);
8072
8073 emit_move_insn (limit,
8074 const_double_from_real_value (
8075 REAL_VALUE_ATOF ("9223372036854775808.0", mode), mode));
8076 emit_cmp_and_jump_insns (in, limit, GE, NULL_RTX, mode, 0, neglab);
8077
8078 emit_insn (gen_rtx_SET (out,
8079 gen_rtx_FIX (DImode, gen_rtx_FIX (mode, in))));
8080 emit_jump_insn (gen_jump (donelab));
8081 emit_barrier ();
8082
8083 emit_label (neglab);
8084
8085 emit_insn (gen_rtx_SET (f0, gen_rtx_MINUS (mode, in, limit)));
8086 emit_insn (gen_rtx_SET (i0,
8087 gen_rtx_FIX (DImode, gen_rtx_FIX (mode, f0))));
8088 emit_insn (gen_movdi (i1, const1_rtx));
8089 emit_insn (gen_ashldi3 (i1, i1, GEN_INT (63)));
8090 emit_insn (gen_xordi3 (out, i0, i1));
8091
8092 emit_label (donelab);
8093 }
8094
8095 /* Return the string to output a compare and branch instruction to DEST.
8096 DEST is the destination insn (i.e. the label), INSN is the source,
8097 and OP is the conditional expression. */
8098
8099 const char *
output_cbcond(rtx op,rtx dest,rtx_insn * insn)8100 output_cbcond (rtx op, rtx dest, rtx_insn *insn)
8101 {
8102 machine_mode mode = GET_MODE (XEXP (op, 0));
8103 enum rtx_code code = GET_CODE (op);
8104 const char *cond_str, *tmpl;
8105 int far, emit_nop, len;
8106 static char string[64];
8107 char size_char;
8108
8109 /* Compare and Branch is limited to +-2KB. If it is too far away,
8110 change
8111
8112 cxbne X, Y, .LC30
8113
8114 to
8115
8116 cxbe X, Y, .+16
8117 nop
8118 ba,pt xcc, .LC30
8119 nop */
8120
8121 len = get_attr_length (insn);
8122
8123 far = len == 4;
8124 emit_nop = len == 2;
8125
8126 if (far)
8127 code = reverse_condition (code);
8128
8129 size_char = ((mode == SImode) ? 'w' : 'x');
8130
8131 switch (code)
8132 {
8133 case NE:
8134 cond_str = "ne";
8135 break;
8136
8137 case EQ:
8138 cond_str = "e";
8139 break;
8140
8141 case GE:
8142 if (mode == CC_NOOVmode || mode == CCX_NOOVmode)
8143 cond_str = "pos";
8144 else
8145 cond_str = "ge";
8146 break;
8147
8148 case GT:
8149 cond_str = "g";
8150 break;
8151
8152 case LE:
8153 cond_str = "le";
8154 break;
8155
8156 case LT:
8157 if (mode == CC_NOOVmode || mode == CCX_NOOVmode)
8158 cond_str = "neg";
8159 else
8160 cond_str = "l";
8161 break;
8162
8163 case GEU:
8164 cond_str = "cc";
8165 break;
8166
8167 case GTU:
8168 cond_str = "gu";
8169 break;
8170
8171 case LEU:
8172 cond_str = "leu";
8173 break;
8174
8175 case LTU:
8176 cond_str = "cs";
8177 break;
8178
8179 default:
8180 gcc_unreachable ();
8181 }
8182
8183 if (far)
8184 {
8185 int veryfar = 1, delta;
8186
8187 if (INSN_ADDRESSES_SET_P ())
8188 {
8189 delta = (INSN_ADDRESSES (INSN_UID (dest))
8190 - INSN_ADDRESSES (INSN_UID (insn)));
8191 /* Leave some instructions for "slop". */
8192 if (delta >= -260000 && delta < 260000)
8193 veryfar = 0;
8194 }
8195
8196 if (veryfar)
8197 tmpl = "c%cb%s\t%%1, %%2, .+16\n\tnop\n\tb\t%%3\n\tnop";
8198 else
8199 tmpl = "c%cb%s\t%%1, %%2, .+16\n\tnop\n\tba,pt\t%%%%xcc, %%3\n\tnop";
8200 }
8201 else
8202 {
8203 if (emit_nop)
8204 tmpl = "c%cb%s\t%%1, %%2, %%3\n\tnop";
8205 else
8206 tmpl = "c%cb%s\t%%1, %%2, %%3";
8207 }
8208
8209 snprintf (string, sizeof(string), tmpl, size_char, cond_str);
8210
8211 return string;
8212 }
8213
8214 /* Return the string to output a conditional branch to LABEL, testing
8215 register REG. LABEL is the operand number of the label; REG is the
8216 operand number of the reg. OP is the conditional expression. The mode
8217 of REG says what kind of comparison we made.
8218
8219 DEST is the destination insn (i.e. the label), INSN is the source.
8220
8221 REVERSED is nonzero if we should reverse the sense of the comparison.
8222
8223 ANNUL is nonzero if we should generate an annulling branch. */
8224
8225 const char *
output_v9branch(rtx op,rtx dest,int reg,int label,int reversed,int annul,rtx_insn * insn)8226 output_v9branch (rtx op, rtx dest, int reg, int label, int reversed,
8227 int annul, rtx_insn *insn)
8228 {
8229 static char string[64];
8230 enum rtx_code code = GET_CODE (op);
8231 machine_mode mode = GET_MODE (XEXP (op, 0));
8232 rtx note;
8233 int far;
8234 char *p;
8235
8236 /* branch on register are limited to +-128KB. If it is too far away,
8237 change
8238
8239 brnz,pt %g1, .LC30
8240
8241 to
8242
8243 brz,pn %g1, .+12
8244 nop
8245 ba,pt %xcc, .LC30
8246
8247 and
8248
8249 brgez,a,pn %o1, .LC29
8250
8251 to
8252
8253 brlz,pt %o1, .+16
8254 nop
8255 ba,pt %xcc, .LC29 */
8256
8257 far = get_attr_length (insn) >= 3;
8258
8259 /* If not floating-point or if EQ or NE, we can just reverse the code. */
8260 if (reversed ^ far)
8261 code = reverse_condition (code);
8262
8263 /* Only 64 bit versions of these instructions exist. */
8264 gcc_assert (mode == DImode);
8265
8266 /* Start by writing the branch condition. */
8267
8268 switch (code)
8269 {
8270 case NE:
8271 strcpy (string, "brnz");
8272 break;
8273
8274 case EQ:
8275 strcpy (string, "brz");
8276 break;
8277
8278 case GE:
8279 strcpy (string, "brgez");
8280 break;
8281
8282 case LT:
8283 strcpy (string, "brlz");
8284 break;
8285
8286 case LE:
8287 strcpy (string, "brlez");
8288 break;
8289
8290 case GT:
8291 strcpy (string, "brgz");
8292 break;
8293
8294 default:
8295 gcc_unreachable ();
8296 }
8297
8298 p = strchr (string, '\0');
8299
8300 /* Now add the annulling, reg, label, and nop. */
8301 if (annul && ! far)
8302 {
8303 strcpy (p, ",a");
8304 p += 2;
8305 }
8306
8307 if (insn && (note = find_reg_note (insn, REG_BR_PROB, NULL_RTX)))
8308 {
8309 strcpy (p,
8310 ((XINT (note, 0) >= REG_BR_PROB_BASE / 2) ^ far)
8311 ? ",pt" : ",pn");
8312 p += 3;
8313 }
8314
8315 *p = p < string + 8 ? '\t' : ' ';
8316 p++;
8317 *p++ = '%';
8318 *p++ = '0' + reg;
8319 *p++ = ',';
8320 *p++ = ' ';
8321 if (far)
8322 {
8323 int veryfar = 1, delta;
8324
8325 if (INSN_ADDRESSES_SET_P ())
8326 {
8327 delta = (INSN_ADDRESSES (INSN_UID (dest))
8328 - INSN_ADDRESSES (INSN_UID (insn)));
8329 /* Leave some instructions for "slop". */
8330 if (delta >= -260000 && delta < 260000)
8331 veryfar = 0;
8332 }
8333
8334 strcpy (p, ".+12\n\t nop\n\t");
8335 /* Skip the next insn if requested or
8336 if we know that it will be a nop. */
8337 if (annul || ! final_sequence)
8338 p[3] = '6';
8339 p += 12;
8340 if (veryfar)
8341 {
8342 strcpy (p, "b\t");
8343 p += 2;
8344 }
8345 else
8346 {
8347 strcpy (p, "ba,pt\t%%xcc, ");
8348 p += 13;
8349 }
8350 }
8351 *p++ = '%';
8352 *p++ = 'l';
8353 *p++ = '0' + label;
8354 *p++ = '%';
8355 *p++ = '#';
8356 *p = '\0';
8357
8358 return string;
8359 }
8360
8361 /* Return 1, if any of the registers of the instruction are %l[0-7] or %o[0-7].
8362 Such instructions cannot be used in the delay slot of return insn on v9.
8363 If TEST is 0, also rename all %i[0-7] registers to their %o[0-7] counterparts.
8364 */
8365
8366 static int
epilogue_renumber(register rtx * where,int test)8367 epilogue_renumber (register rtx *where, int test)
8368 {
8369 register const char *fmt;
8370 register int i;
8371 register enum rtx_code code;
8372
8373 if (*where == 0)
8374 return 0;
8375
8376 code = GET_CODE (*where);
8377
8378 switch (code)
8379 {
8380 case REG:
8381 if (REGNO (*where) >= 8 && REGNO (*where) < 24) /* oX or lX */
8382 return 1;
8383 if (! test && REGNO (*where) >= 24 && REGNO (*where) < 32)
8384 *where = gen_rtx_REG (GET_MODE (*where), OUTGOING_REGNO (REGNO(*where)));
8385 case SCRATCH:
8386 case CC0:
8387 case PC:
8388 case CONST_INT:
8389 case CONST_WIDE_INT:
8390 case CONST_DOUBLE:
8391 return 0;
8392
8393 /* Do not replace the frame pointer with the stack pointer because
8394 it can cause the delayed instruction to load below the stack.
8395 This occurs when instructions like:
8396
8397 (set (reg/i:SI 24 %i0)
8398 (mem/f:SI (plus:SI (reg/f:SI 30 %fp)
8399 (const_int -20 [0xffffffec])) 0))
8400
8401 are in the return delayed slot. */
8402 case PLUS:
8403 if (GET_CODE (XEXP (*where, 0)) == REG
8404 && REGNO (XEXP (*where, 0)) == HARD_FRAME_POINTER_REGNUM
8405 && (GET_CODE (XEXP (*where, 1)) != CONST_INT
8406 || INTVAL (XEXP (*where, 1)) < SPARC_STACK_BIAS))
8407 return 1;
8408 break;
8409
8410 case MEM:
8411 if (SPARC_STACK_BIAS
8412 && GET_CODE (XEXP (*where, 0)) == REG
8413 && REGNO (XEXP (*where, 0)) == HARD_FRAME_POINTER_REGNUM)
8414 return 1;
8415 break;
8416
8417 default:
8418 break;
8419 }
8420
8421 fmt = GET_RTX_FORMAT (code);
8422
8423 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
8424 {
8425 if (fmt[i] == 'E')
8426 {
8427 register int j;
8428 for (j = XVECLEN (*where, i) - 1; j >= 0; j--)
8429 if (epilogue_renumber (&(XVECEXP (*where, i, j)), test))
8430 return 1;
8431 }
8432 else if (fmt[i] == 'e'
8433 && epilogue_renumber (&(XEXP (*where, i)), test))
8434 return 1;
8435 }
8436 return 0;
8437 }
8438
8439 /* Leaf functions and non-leaf functions have different needs. */
8440
8441 static const int
8442 reg_leaf_alloc_order[] = REG_LEAF_ALLOC_ORDER;
8443
8444 static const int
8445 reg_nonleaf_alloc_order[] = REG_ALLOC_ORDER;
8446
8447 static const int *const reg_alloc_orders[] = {
8448 reg_leaf_alloc_order,
8449 reg_nonleaf_alloc_order};
8450
8451 void
order_regs_for_local_alloc(void)8452 order_regs_for_local_alloc (void)
8453 {
8454 static int last_order_nonleaf = 1;
8455
8456 if (df_regs_ever_live_p (15) != last_order_nonleaf)
8457 {
8458 last_order_nonleaf = !last_order_nonleaf;
8459 memcpy ((char *) reg_alloc_order,
8460 (const char *) reg_alloc_orders[last_order_nonleaf],
8461 FIRST_PSEUDO_REGISTER * sizeof (int));
8462 }
8463 }
8464
8465 /* Return 1 if REG and MEM are legitimate enough to allow the various
8466 mem<-->reg splits to be run. */
8467
8468 int
sparc_splitdi_legitimate(rtx reg,rtx mem)8469 sparc_splitdi_legitimate (rtx reg, rtx mem)
8470 {
8471 /* Punt if we are here by mistake. */
8472 gcc_assert (reload_completed);
8473
8474 /* We must have an offsettable memory reference. */
8475 if (! offsettable_memref_p (mem))
8476 return 0;
8477
8478 /* If we have legitimate args for ldd/std, we do not want
8479 the split to happen. */
8480 if ((REGNO (reg) % 2) == 0
8481 && mem_min_alignment (mem, 8))
8482 return 0;
8483
8484 /* Success. */
8485 return 1;
8486 }
8487
8488 /* Like sparc_splitdi_legitimate but for REG <--> REG moves. */
8489
8490 int
sparc_split_regreg_legitimate(rtx reg1,rtx reg2)8491 sparc_split_regreg_legitimate (rtx reg1, rtx reg2)
8492 {
8493 int regno1, regno2;
8494
8495 if (GET_CODE (reg1) == SUBREG)
8496 reg1 = SUBREG_REG (reg1);
8497 if (GET_CODE (reg1) != REG)
8498 return 0;
8499 regno1 = REGNO (reg1);
8500
8501 if (GET_CODE (reg2) == SUBREG)
8502 reg2 = SUBREG_REG (reg2);
8503 if (GET_CODE (reg2) != REG)
8504 return 0;
8505 regno2 = REGNO (reg2);
8506
8507 if (SPARC_INT_REG_P (regno1) && SPARC_INT_REG_P (regno2))
8508 return 1;
8509
8510 if (TARGET_VIS3)
8511 {
8512 if ((SPARC_INT_REG_P (regno1) && SPARC_FP_REG_P (regno2))
8513 || (SPARC_FP_REG_P (regno1) && SPARC_INT_REG_P (regno2)))
8514 return 1;
8515 }
8516
8517 return 0;
8518 }
8519
8520 /* Return 1 if REGNO (reg1) is even and REGNO (reg1) == REGNO (reg2) - 1.
8521 This makes them candidates for using ldd and std insns.
8522
8523 Note reg1 and reg2 *must* be hard registers. */
8524
8525 int
registers_ok_for_ldd_peep(rtx reg1,rtx reg2)8526 registers_ok_for_ldd_peep (rtx reg1, rtx reg2)
8527 {
8528 /* We might have been passed a SUBREG. */
8529 if (GET_CODE (reg1) != REG || GET_CODE (reg2) != REG)
8530 return 0;
8531
8532 if (REGNO (reg1) % 2 != 0)
8533 return 0;
8534
8535 /* Integer ldd is deprecated in SPARC V9 */
8536 if (TARGET_V9 && SPARC_INT_REG_P (REGNO (reg1)))
8537 return 0;
8538
8539 return (REGNO (reg1) == REGNO (reg2) - 1);
8540 }
8541
8542 /* Return 1 if the addresses in mem1 and mem2 are suitable for use in
8543 an ldd or std insn.
8544
8545 This can only happen when addr1 and addr2, the addresses in mem1
8546 and mem2, are consecutive memory locations (addr1 + 4 == addr2).
8547 addr1 must also be aligned on a 64-bit boundary.
8548
8549 Also iff dependent_reg_rtx is not null it should not be used to
8550 compute the address for mem1, i.e. we cannot optimize a sequence
8551 like:
8552 ld [%o0], %o0
8553 ld [%o0 + 4], %o1
8554 to
8555 ldd [%o0], %o0
8556 nor:
8557 ld [%g3 + 4], %g3
8558 ld [%g3], %g2
8559 to
8560 ldd [%g3], %g2
8561
8562 But, note that the transformation from:
8563 ld [%g2 + 4], %g3
8564 ld [%g2], %g2
8565 to
8566 ldd [%g2], %g2
8567 is perfectly fine. Thus, the peephole2 patterns always pass us
8568 the destination register of the first load, never the second one.
8569
8570 For stores we don't have a similar problem, so dependent_reg_rtx is
8571 NULL_RTX. */
8572
8573 int
mems_ok_for_ldd_peep(rtx mem1,rtx mem2,rtx dependent_reg_rtx)8574 mems_ok_for_ldd_peep (rtx mem1, rtx mem2, rtx dependent_reg_rtx)
8575 {
8576 rtx addr1, addr2;
8577 unsigned int reg1;
8578 HOST_WIDE_INT offset1;
8579
8580 /* The mems cannot be volatile. */
8581 if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2))
8582 return 0;
8583
8584 /* MEM1 should be aligned on a 64-bit boundary. */
8585 if (MEM_ALIGN (mem1) < 64)
8586 return 0;
8587
8588 addr1 = XEXP (mem1, 0);
8589 addr2 = XEXP (mem2, 0);
8590
8591 /* Extract a register number and offset (if used) from the first addr. */
8592 if (GET_CODE (addr1) == PLUS)
8593 {
8594 /* If not a REG, return zero. */
8595 if (GET_CODE (XEXP (addr1, 0)) != REG)
8596 return 0;
8597 else
8598 {
8599 reg1 = REGNO (XEXP (addr1, 0));
8600 /* The offset must be constant! */
8601 if (GET_CODE (XEXP (addr1, 1)) != CONST_INT)
8602 return 0;
8603 offset1 = INTVAL (XEXP (addr1, 1));
8604 }
8605 }
8606 else if (GET_CODE (addr1) != REG)
8607 return 0;
8608 else
8609 {
8610 reg1 = REGNO (addr1);
8611 /* This was a simple (mem (reg)) expression. Offset is 0. */
8612 offset1 = 0;
8613 }
8614
8615 /* Make sure the second address is a (mem (plus (reg) (const_int). */
8616 if (GET_CODE (addr2) != PLUS)
8617 return 0;
8618
8619 if (GET_CODE (XEXP (addr2, 0)) != REG
8620 || GET_CODE (XEXP (addr2, 1)) != CONST_INT)
8621 return 0;
8622
8623 if (reg1 != REGNO (XEXP (addr2, 0)))
8624 return 0;
8625
8626 if (dependent_reg_rtx != NULL_RTX && reg1 == REGNO (dependent_reg_rtx))
8627 return 0;
8628
8629 /* The first offset must be evenly divisible by 8 to ensure the
8630 address is 64 bit aligned. */
8631 if (offset1 % 8 != 0)
8632 return 0;
8633
8634 /* The offset for the second addr must be 4 more than the first addr. */
8635 if (INTVAL (XEXP (addr2, 1)) != offset1 + 4)
8636 return 0;
8637
8638 /* All the tests passed. addr1 and addr2 are valid for ldd and std
8639 instructions. */
8640 return 1;
8641 }
8642
8643 /* Return the widened memory access made of MEM1 and MEM2 in MODE. */
8644
8645 rtx
widen_mem_for_ldd_peep(rtx mem1,rtx mem2,machine_mode mode)8646 widen_mem_for_ldd_peep (rtx mem1, rtx mem2, machine_mode mode)
8647 {
8648 rtx x = widen_memory_access (mem1, mode, 0);
8649 MEM_NOTRAP_P (x) = MEM_NOTRAP_P (mem1) && MEM_NOTRAP_P (mem2);
8650 return x;
8651 }
8652
8653 /* Return 1 if reg is a pseudo, or is the first register in
8654 a hard register pair. This makes it suitable for use in
8655 ldd and std insns. */
8656
8657 int
register_ok_for_ldd(rtx reg)8658 register_ok_for_ldd (rtx reg)
8659 {
8660 /* We might have been passed a SUBREG. */
8661 if (!REG_P (reg))
8662 return 0;
8663
8664 if (REGNO (reg) < FIRST_PSEUDO_REGISTER)
8665 return (REGNO (reg) % 2 == 0);
8666
8667 return 1;
8668 }
8669
8670 /* Return 1 if OP, a MEM, has an address which is known to be
8671 aligned to an 8-byte boundary. */
8672
8673 int
memory_ok_for_ldd(rtx op)8674 memory_ok_for_ldd (rtx op)
8675 {
8676 /* In 64-bit mode, we assume that the address is word-aligned. */
8677 if (TARGET_ARCH32 && !mem_min_alignment (op, 8))
8678 return 0;
8679
8680 if (! can_create_pseudo_p ()
8681 && !strict_memory_address_p (Pmode, XEXP (op, 0)))
8682 return 0;
8683
8684 return 1;
8685 }
8686
8687 /* Implement TARGET_PRINT_OPERAND_PUNCT_VALID_P. */
8688
8689 static bool
sparc_print_operand_punct_valid_p(unsigned char code)8690 sparc_print_operand_punct_valid_p (unsigned char code)
8691 {
8692 if (code == '#'
8693 || code == '*'
8694 || code == '('
8695 || code == ')'
8696 || code == '_'
8697 || code == '&')
8698 return true;
8699
8700 return false;
8701 }
8702
8703 /* Implement TARGET_PRINT_OPERAND.
8704 Print operand X (an rtx) in assembler syntax to file FILE.
8705 CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
8706 For `%' followed by punctuation, CODE is the punctuation and X is null. */
8707
8708 static void
sparc_print_operand(FILE * file,rtx x,int code)8709 sparc_print_operand (FILE *file, rtx x, int code)
8710 {
8711 switch (code)
8712 {
8713 case '#':
8714 /* Output an insn in a delay slot. */
8715 if (final_sequence)
8716 sparc_indent_opcode = 1;
8717 else
8718 fputs ("\n\t nop", file);
8719 return;
8720 case '*':
8721 /* Output an annul flag if there's nothing for the delay slot and we
8722 are optimizing. This is always used with '(' below.
8723 Sun OS 4.1.1 dbx can't handle an annulled unconditional branch;
8724 this is a dbx bug. So, we only do this when optimizing.
8725 On UltraSPARC, a branch in a delay slot causes a pipeline flush.
8726 Always emit a nop in case the next instruction is a branch. */
8727 if (! final_sequence && (optimize && (int)sparc_cpu < PROCESSOR_V9))
8728 fputs (",a", file);
8729 return;
8730 case '(':
8731 /* Output a 'nop' if there's nothing for the delay slot and we are
8732 not optimizing. This is always used with '*' above. */
8733 if (! final_sequence && ! (optimize && (int)sparc_cpu < PROCESSOR_V9))
8734 fputs ("\n\t nop", file);
8735 else if (final_sequence)
8736 sparc_indent_opcode = 1;
8737 return;
8738 case ')':
8739 /* Output the right displacement from the saved PC on function return.
8740 The caller may have placed an "unimp" insn immediately after the call
8741 so we have to account for it. This insn is used in the 32-bit ABI
8742 when calling a function that returns a non zero-sized structure. The
8743 64-bit ABI doesn't have it. Be careful to have this test be the same
8744 as that for the call. The exception is when sparc_std_struct_return
8745 is enabled, the psABI is followed exactly and the adjustment is made
8746 by the code in sparc_struct_value_rtx. The call emitted is the same
8747 when sparc_std_struct_return is enabled. */
8748 if (!TARGET_ARCH64
8749 && cfun->returns_struct
8750 && !sparc_std_struct_return
8751 && DECL_SIZE (DECL_RESULT (current_function_decl))
8752 && TREE_CODE (DECL_SIZE (DECL_RESULT (current_function_decl)))
8753 == INTEGER_CST
8754 && !integer_zerop (DECL_SIZE (DECL_RESULT (current_function_decl))))
8755 fputs ("12", file);
8756 else
8757 fputc ('8', file);
8758 return;
8759 case '_':
8760 /* Output the Embedded Medium/Anywhere code model base register. */
8761 fputs (EMBMEDANY_BASE_REG, file);
8762 return;
8763 case '&':
8764 /* Print some local dynamic TLS name. */
8765 if (const char *name = get_some_local_dynamic_name ())
8766 assemble_name (file, name);
8767 else
8768 output_operand_lossage ("'%%&' used without any "
8769 "local dynamic TLS references");
8770 return;
8771
8772 case 'Y':
8773 /* Adjust the operand to take into account a RESTORE operation. */
8774 if (GET_CODE (x) == CONST_INT)
8775 break;
8776 else if (GET_CODE (x) != REG)
8777 output_operand_lossage ("invalid %%Y operand");
8778 else if (REGNO (x) < 8)
8779 fputs (reg_names[REGNO (x)], file);
8780 else if (REGNO (x) >= 24 && REGNO (x) < 32)
8781 fputs (reg_names[REGNO (x)-16], file);
8782 else
8783 output_operand_lossage ("invalid %%Y operand");
8784 return;
8785 case 'L':
8786 /* Print out the low order register name of a register pair. */
8787 if (WORDS_BIG_ENDIAN)
8788 fputs (reg_names[REGNO (x)+1], file);
8789 else
8790 fputs (reg_names[REGNO (x)], file);
8791 return;
8792 case 'H':
8793 /* Print out the high order register name of a register pair. */
8794 if (WORDS_BIG_ENDIAN)
8795 fputs (reg_names[REGNO (x)], file);
8796 else
8797 fputs (reg_names[REGNO (x)+1], file);
8798 return;
8799 case 'R':
8800 /* Print out the second register name of a register pair or quad.
8801 I.e., R (%o0) => %o1. */
8802 fputs (reg_names[REGNO (x)+1], file);
8803 return;
8804 case 'S':
8805 /* Print out the third register name of a register quad.
8806 I.e., S (%o0) => %o2. */
8807 fputs (reg_names[REGNO (x)+2], file);
8808 return;
8809 case 'T':
8810 /* Print out the fourth register name of a register quad.
8811 I.e., T (%o0) => %o3. */
8812 fputs (reg_names[REGNO (x)+3], file);
8813 return;
8814 case 'x':
8815 /* Print a condition code register. */
8816 if (REGNO (x) == SPARC_ICC_REG)
8817 {
8818 /* We don't handle CC[X]_NOOVmode because they're not supposed
8819 to occur here. */
8820 if (GET_MODE (x) == CCmode)
8821 fputs ("%icc", file);
8822 else if (GET_MODE (x) == CCXmode)
8823 fputs ("%xcc", file);
8824 else
8825 gcc_unreachable ();
8826 }
8827 else
8828 /* %fccN register */
8829 fputs (reg_names[REGNO (x)], file);
8830 return;
8831 case 'm':
8832 /* Print the operand's address only. */
8833 output_address (GET_MODE (x), XEXP (x, 0));
8834 return;
8835 case 'r':
8836 /* In this case we need a register. Use %g0 if the
8837 operand is const0_rtx. */
8838 if (x == const0_rtx
8839 || (GET_MODE (x) != VOIDmode && x == CONST0_RTX (GET_MODE (x))))
8840 {
8841 fputs ("%g0", file);
8842 return;
8843 }
8844 else
8845 break;
8846
8847 case 'A':
8848 switch (GET_CODE (x))
8849 {
8850 case IOR: fputs ("or", file); break;
8851 case AND: fputs ("and", file); break;
8852 case XOR: fputs ("xor", file); break;
8853 default: output_operand_lossage ("invalid %%A operand");
8854 }
8855 return;
8856
8857 case 'B':
8858 switch (GET_CODE (x))
8859 {
8860 case IOR: fputs ("orn", file); break;
8861 case AND: fputs ("andn", file); break;
8862 case XOR: fputs ("xnor", file); break;
8863 default: output_operand_lossage ("invalid %%B operand");
8864 }
8865 return;
8866
8867 /* This is used by the conditional move instructions. */
8868 case 'C':
8869 {
8870 enum rtx_code rc = GET_CODE (x);
8871
8872 switch (rc)
8873 {
8874 case NE: fputs ("ne", file); break;
8875 case EQ: fputs ("e", file); break;
8876 case GE: fputs ("ge", file); break;
8877 case GT: fputs ("g", file); break;
8878 case LE: fputs ("le", file); break;
8879 case LT: fputs ("l", file); break;
8880 case GEU: fputs ("geu", file); break;
8881 case GTU: fputs ("gu", file); break;
8882 case LEU: fputs ("leu", file); break;
8883 case LTU: fputs ("lu", file); break;
8884 case LTGT: fputs ("lg", file); break;
8885 case UNORDERED: fputs ("u", file); break;
8886 case ORDERED: fputs ("o", file); break;
8887 case UNLT: fputs ("ul", file); break;
8888 case UNLE: fputs ("ule", file); break;
8889 case UNGT: fputs ("ug", file); break;
8890 case UNGE: fputs ("uge", file); break;
8891 case UNEQ: fputs ("ue", file); break;
8892 default: output_operand_lossage ("invalid %%C operand");
8893 }
8894 return;
8895 }
8896
8897 /* This are used by the movr instruction pattern. */
8898 case 'D':
8899 {
8900 enum rtx_code rc = GET_CODE (x);
8901 switch (rc)
8902 {
8903 case NE: fputs ("ne", file); break;
8904 case EQ: fputs ("e", file); break;
8905 case GE: fputs ("gez", file); break;
8906 case LT: fputs ("lz", file); break;
8907 case LE: fputs ("lez", file); break;
8908 case GT: fputs ("gz", file); break;
8909 default: output_operand_lossage ("invalid %%D operand");
8910 }
8911 return;
8912 }
8913
8914 case 'b':
8915 {
8916 /* Print a sign-extended character. */
8917 int i = trunc_int_for_mode (INTVAL (x), QImode);
8918 fprintf (file, "%d", i);
8919 return;
8920 }
8921
8922 case 'f':
8923 /* Operand must be a MEM; write its address. */
8924 if (GET_CODE (x) != MEM)
8925 output_operand_lossage ("invalid %%f operand");
8926 output_address (GET_MODE (x), XEXP (x, 0));
8927 return;
8928
8929 case 's':
8930 {
8931 /* Print a sign-extended 32-bit value. */
8932 HOST_WIDE_INT i;
8933 if (GET_CODE(x) == CONST_INT)
8934 i = INTVAL (x);
8935 else
8936 {
8937 output_operand_lossage ("invalid %%s operand");
8938 return;
8939 }
8940 i = trunc_int_for_mode (i, SImode);
8941 fprintf (file, HOST_WIDE_INT_PRINT_DEC, i);
8942 return;
8943 }
8944
8945 case 0:
8946 /* Do nothing special. */
8947 break;
8948
8949 default:
8950 /* Undocumented flag. */
8951 output_operand_lossage ("invalid operand output code");
8952 }
8953
8954 if (GET_CODE (x) == REG)
8955 fputs (reg_names[REGNO (x)], file);
8956 else if (GET_CODE (x) == MEM)
8957 {
8958 fputc ('[', file);
8959 /* Poor Sun assembler doesn't understand absolute addressing. */
8960 if (CONSTANT_P (XEXP (x, 0)))
8961 fputs ("%g0+", file);
8962 output_address (GET_MODE (x), XEXP (x, 0));
8963 fputc (']', file);
8964 }
8965 else if (GET_CODE (x) == HIGH)
8966 {
8967 fputs ("%hi(", file);
8968 output_addr_const (file, XEXP (x, 0));
8969 fputc (')', file);
8970 }
8971 else if (GET_CODE (x) == LO_SUM)
8972 {
8973 sparc_print_operand (file, XEXP (x, 0), 0);
8974 if (TARGET_CM_MEDMID)
8975 fputs ("+%l44(", file);
8976 else
8977 fputs ("+%lo(", file);
8978 output_addr_const (file, XEXP (x, 1));
8979 fputc (')', file);
8980 }
8981 else if (GET_CODE (x) == CONST_DOUBLE)
8982 output_operand_lossage ("floating-point constant not a valid immediate operand");
8983 else
8984 output_addr_const (file, x);
8985 }
8986
8987 /* Implement TARGET_PRINT_OPERAND_ADDRESS. */
8988
8989 static void
sparc_print_operand_address(FILE * file,machine_mode,rtx x)8990 sparc_print_operand_address (FILE *file, machine_mode /*mode*/, rtx x)
8991 {
8992 register rtx base, index = 0;
8993 int offset = 0;
8994 register rtx addr = x;
8995
8996 if (REG_P (addr))
8997 fputs (reg_names[REGNO (addr)], file);
8998 else if (GET_CODE (addr) == PLUS)
8999 {
9000 if (CONST_INT_P (XEXP (addr, 0)))
9001 offset = INTVAL (XEXP (addr, 0)), base = XEXP (addr, 1);
9002 else if (CONST_INT_P (XEXP (addr, 1)))
9003 offset = INTVAL (XEXP (addr, 1)), base = XEXP (addr, 0);
9004 else
9005 base = XEXP (addr, 0), index = XEXP (addr, 1);
9006 if (GET_CODE (base) == LO_SUM)
9007 {
9008 gcc_assert (USE_AS_OFFSETABLE_LO10
9009 && TARGET_ARCH64
9010 && ! TARGET_CM_MEDMID);
9011 output_operand (XEXP (base, 0), 0);
9012 fputs ("+%lo(", file);
9013 output_address (VOIDmode, XEXP (base, 1));
9014 fprintf (file, ")+%d", offset);
9015 }
9016 else
9017 {
9018 fputs (reg_names[REGNO (base)], file);
9019 if (index == 0)
9020 fprintf (file, "%+d", offset);
9021 else if (REG_P (index))
9022 fprintf (file, "+%s", reg_names[REGNO (index)]);
9023 else if (GET_CODE (index) == SYMBOL_REF
9024 || GET_CODE (index) == LABEL_REF
9025 || GET_CODE (index) == CONST)
9026 fputc ('+', file), output_addr_const (file, index);
9027 else gcc_unreachable ();
9028 }
9029 }
9030 else if (GET_CODE (addr) == MINUS
9031 && GET_CODE (XEXP (addr, 1)) == LABEL_REF)
9032 {
9033 output_addr_const (file, XEXP (addr, 0));
9034 fputs ("-(", file);
9035 output_addr_const (file, XEXP (addr, 1));
9036 fputs ("-.)", file);
9037 }
9038 else if (GET_CODE (addr) == LO_SUM)
9039 {
9040 output_operand (XEXP (addr, 0), 0);
9041 if (TARGET_CM_MEDMID)
9042 fputs ("+%l44(", file);
9043 else
9044 fputs ("+%lo(", file);
9045 output_address (VOIDmode, XEXP (addr, 1));
9046 fputc (')', file);
9047 }
9048 else if (flag_pic
9049 && GET_CODE (addr) == CONST
9050 && GET_CODE (XEXP (addr, 0)) == MINUS
9051 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST
9052 && GET_CODE (XEXP (XEXP (XEXP (addr, 0), 1), 0)) == MINUS
9053 && XEXP (XEXP (XEXP (XEXP (addr, 0), 1), 0), 1) == pc_rtx)
9054 {
9055 addr = XEXP (addr, 0);
9056 output_addr_const (file, XEXP (addr, 0));
9057 /* Group the args of the second CONST in parenthesis. */
9058 fputs ("-(", file);
9059 /* Skip past the second CONST--it does nothing for us. */
9060 output_addr_const (file, XEXP (XEXP (addr, 1), 0));
9061 /* Close the parenthesis. */
9062 fputc (')', file);
9063 }
9064 else
9065 {
9066 output_addr_const (file, addr);
9067 }
9068 }
9069
9070 /* Target hook for assembling integer objects. The sparc version has
9071 special handling for aligned DI-mode objects. */
9072
9073 static bool
sparc_assemble_integer(rtx x,unsigned int size,int aligned_p)9074 sparc_assemble_integer (rtx x, unsigned int size, int aligned_p)
9075 {
9076 /* ??? We only output .xword's for symbols and only then in environments
9077 where the assembler can handle them. */
9078 if (aligned_p && size == 8 && GET_CODE (x) != CONST_INT)
9079 {
9080 if (TARGET_V9)
9081 {
9082 assemble_integer_with_op ("\t.xword\t", x);
9083 return true;
9084 }
9085 else
9086 {
9087 assemble_aligned_integer (4, const0_rtx);
9088 assemble_aligned_integer (4, x);
9089 return true;
9090 }
9091 }
9092 return default_assemble_integer (x, size, aligned_p);
9093 }
9094
9095 /* Return the value of a code used in the .proc pseudo-op that says
9096 what kind of result this function returns. For non-C types, we pick
9097 the closest C type. */
9098
9099 #ifndef SHORT_TYPE_SIZE
9100 #define SHORT_TYPE_SIZE (BITS_PER_UNIT * 2)
9101 #endif
9102
9103 #ifndef INT_TYPE_SIZE
9104 #define INT_TYPE_SIZE BITS_PER_WORD
9105 #endif
9106
9107 #ifndef LONG_TYPE_SIZE
9108 #define LONG_TYPE_SIZE BITS_PER_WORD
9109 #endif
9110
9111 #ifndef LONG_LONG_TYPE_SIZE
9112 #define LONG_LONG_TYPE_SIZE (BITS_PER_WORD * 2)
9113 #endif
9114
9115 #ifndef FLOAT_TYPE_SIZE
9116 #define FLOAT_TYPE_SIZE BITS_PER_WORD
9117 #endif
9118
9119 #ifndef DOUBLE_TYPE_SIZE
9120 #define DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2)
9121 #endif
9122
9123 #ifndef LONG_DOUBLE_TYPE_SIZE
9124 #define LONG_DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2)
9125 #endif
9126
9127 unsigned long
sparc_type_code(register tree type)9128 sparc_type_code (register tree type)
9129 {
9130 register unsigned long qualifiers = 0;
9131 register unsigned shift;
9132
9133 /* Only the first 30 bits of the qualifier are valid. We must refrain from
9134 setting more, since some assemblers will give an error for this. Also,
9135 we must be careful to avoid shifts of 32 bits or more to avoid getting
9136 unpredictable results. */
9137
9138 for (shift = 6; shift < 30; shift += 2, type = TREE_TYPE (type))
9139 {
9140 switch (TREE_CODE (type))
9141 {
9142 case ERROR_MARK:
9143 return qualifiers;
9144
9145 case ARRAY_TYPE:
9146 qualifiers |= (3 << shift);
9147 break;
9148
9149 case FUNCTION_TYPE:
9150 case METHOD_TYPE:
9151 qualifiers |= (2 << shift);
9152 break;
9153
9154 case POINTER_TYPE:
9155 case REFERENCE_TYPE:
9156 case OFFSET_TYPE:
9157 qualifiers |= (1 << shift);
9158 break;
9159
9160 case RECORD_TYPE:
9161 return (qualifiers | 8);
9162
9163 case UNION_TYPE:
9164 case QUAL_UNION_TYPE:
9165 return (qualifiers | 9);
9166
9167 case ENUMERAL_TYPE:
9168 return (qualifiers | 10);
9169
9170 case VOID_TYPE:
9171 return (qualifiers | 16);
9172
9173 case INTEGER_TYPE:
9174 /* If this is a range type, consider it to be the underlying
9175 type. */
9176 if (TREE_TYPE (type) != 0)
9177 break;
9178
9179 /* Carefully distinguish all the standard types of C,
9180 without messing up if the language is not C. We do this by
9181 testing TYPE_PRECISION and TYPE_UNSIGNED. The old code used to
9182 look at both the names and the above fields, but that's redundant.
9183 Any type whose size is between two C types will be considered
9184 to be the wider of the two types. Also, we do not have a
9185 special code to use for "long long", so anything wider than
9186 long is treated the same. Note that we can't distinguish
9187 between "int" and "long" in this code if they are the same
9188 size, but that's fine, since neither can the assembler. */
9189
9190 if (TYPE_PRECISION (type) <= CHAR_TYPE_SIZE)
9191 return (qualifiers | (TYPE_UNSIGNED (type) ? 12 : 2));
9192
9193 else if (TYPE_PRECISION (type) <= SHORT_TYPE_SIZE)
9194 return (qualifiers | (TYPE_UNSIGNED (type) ? 13 : 3));
9195
9196 else if (TYPE_PRECISION (type) <= INT_TYPE_SIZE)
9197 return (qualifiers | (TYPE_UNSIGNED (type) ? 14 : 4));
9198
9199 else
9200 return (qualifiers | (TYPE_UNSIGNED (type) ? 15 : 5));
9201
9202 case REAL_TYPE:
9203 /* If this is a range type, consider it to be the underlying
9204 type. */
9205 if (TREE_TYPE (type) != 0)
9206 break;
9207
9208 /* Carefully distinguish all the standard types of C,
9209 without messing up if the language is not C. */
9210
9211 if (TYPE_PRECISION (type) == FLOAT_TYPE_SIZE)
9212 return (qualifiers | 6);
9213
9214 else
9215 return (qualifiers | 7);
9216
9217 case COMPLEX_TYPE: /* GNU Fortran COMPLEX type. */
9218 /* ??? We need to distinguish between double and float complex types,
9219 but I don't know how yet because I can't reach this code from
9220 existing front-ends. */
9221 return (qualifiers | 7); /* Who knows? */
9222
9223 case VECTOR_TYPE:
9224 case BOOLEAN_TYPE: /* Boolean truth value type. */
9225 case LANG_TYPE:
9226 case NULLPTR_TYPE:
9227 return qualifiers;
9228
9229 default:
9230 gcc_unreachable (); /* Not a type! */
9231 }
9232 }
9233
9234 return qualifiers;
9235 }
9236
9237 /* Nested function support. */
9238
9239 /* Emit RTL insns to initialize the variable parts of a trampoline.
9240 FNADDR is an RTX for the address of the function's pure code.
9241 CXT is an RTX for the static chain value for the function.
9242
9243 This takes 16 insns: 2 shifts & 2 ands (to split up addresses), 4 sethi
9244 (to load in opcodes), 4 iors (to merge address and opcodes), and 4 writes
9245 (to store insns). This is a bit excessive. Perhaps a different
9246 mechanism would be better here.
9247
9248 Emit enough FLUSH insns to synchronize the data and instruction caches. */
9249
9250 static void
sparc32_initialize_trampoline(rtx m_tramp,rtx fnaddr,rtx cxt)9251 sparc32_initialize_trampoline (rtx m_tramp, rtx fnaddr, rtx cxt)
9252 {
9253 /* SPARC 32-bit trampoline:
9254
9255 sethi %hi(fn), %g1
9256 sethi %hi(static), %g2
9257 jmp %g1+%lo(fn)
9258 or %g2, %lo(static), %g2
9259
9260 SETHI i,r = 00rr rrr1 00ii iiii iiii iiii iiii iiii
9261 JMPL r+i,d = 10dd ddd1 1100 0rrr rr1i iiii iiii iiii
9262 */
9263
9264 emit_move_insn
9265 (adjust_address (m_tramp, SImode, 0),
9266 expand_binop (SImode, ior_optab,
9267 expand_shift (RSHIFT_EXPR, SImode, fnaddr, 10, 0, 1),
9268 GEN_INT (trunc_int_for_mode (0x03000000, SImode)),
9269 NULL_RTX, 1, OPTAB_DIRECT));
9270
9271 emit_move_insn
9272 (adjust_address (m_tramp, SImode, 4),
9273 expand_binop (SImode, ior_optab,
9274 expand_shift (RSHIFT_EXPR, SImode, cxt, 10, 0, 1),
9275 GEN_INT (trunc_int_for_mode (0x05000000, SImode)),
9276 NULL_RTX, 1, OPTAB_DIRECT));
9277
9278 emit_move_insn
9279 (adjust_address (m_tramp, SImode, 8),
9280 expand_binop (SImode, ior_optab,
9281 expand_and (SImode, fnaddr, GEN_INT (0x3ff), NULL_RTX),
9282 GEN_INT (trunc_int_for_mode (0x81c06000, SImode)),
9283 NULL_RTX, 1, OPTAB_DIRECT));
9284
9285 emit_move_insn
9286 (adjust_address (m_tramp, SImode, 12),
9287 expand_binop (SImode, ior_optab,
9288 expand_and (SImode, cxt, GEN_INT (0x3ff), NULL_RTX),
9289 GEN_INT (trunc_int_for_mode (0x8410a000, SImode)),
9290 NULL_RTX, 1, OPTAB_DIRECT));
9291
9292 /* On UltraSPARC a flush flushes an entire cache line. The trampoline is
9293 aligned on a 16 byte boundary so one flush clears it all. */
9294 emit_insn (gen_flushsi (validize_mem (adjust_address (m_tramp, SImode, 0))));
9295 if (sparc_cpu != PROCESSOR_ULTRASPARC
9296 && sparc_cpu != PROCESSOR_ULTRASPARC3
9297 && sparc_cpu != PROCESSOR_NIAGARA
9298 && sparc_cpu != PROCESSOR_NIAGARA2
9299 && sparc_cpu != PROCESSOR_NIAGARA3
9300 && sparc_cpu != PROCESSOR_NIAGARA4
9301 && sparc_cpu != PROCESSOR_NIAGARA7)
9302 emit_insn (gen_flushsi (validize_mem (adjust_address (m_tramp, SImode, 8))));
9303
9304 /* Call __enable_execute_stack after writing onto the stack to make sure
9305 the stack address is accessible. */
9306 #ifdef HAVE_ENABLE_EXECUTE_STACK
9307 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
9308 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
9309 #endif
9310
9311 }
9312
9313 /* The 64-bit version is simpler because it makes more sense to load the
9314 values as "immediate" data out of the trampoline. It's also easier since
9315 we can read the PC without clobbering a register. */
9316
9317 static void
sparc64_initialize_trampoline(rtx m_tramp,rtx fnaddr,rtx cxt)9318 sparc64_initialize_trampoline (rtx m_tramp, rtx fnaddr, rtx cxt)
9319 {
9320 /* SPARC 64-bit trampoline:
9321
9322 rd %pc, %g1
9323 ldx [%g1+24], %g5
9324 jmp %g5
9325 ldx [%g1+16], %g5
9326 +16 bytes data
9327 */
9328
9329 emit_move_insn (adjust_address (m_tramp, SImode, 0),
9330 GEN_INT (trunc_int_for_mode (0x83414000, SImode)));
9331 emit_move_insn (adjust_address (m_tramp, SImode, 4),
9332 GEN_INT (trunc_int_for_mode (0xca586018, SImode)));
9333 emit_move_insn (adjust_address (m_tramp, SImode, 8),
9334 GEN_INT (trunc_int_for_mode (0x81c14000, SImode)));
9335 emit_move_insn (adjust_address (m_tramp, SImode, 12),
9336 GEN_INT (trunc_int_for_mode (0xca586010, SImode)));
9337 emit_move_insn (adjust_address (m_tramp, DImode, 16), cxt);
9338 emit_move_insn (adjust_address (m_tramp, DImode, 24), fnaddr);
9339 emit_insn (gen_flushdi (validize_mem (adjust_address (m_tramp, DImode, 0))));
9340
9341 if (sparc_cpu != PROCESSOR_ULTRASPARC
9342 && sparc_cpu != PROCESSOR_ULTRASPARC3
9343 && sparc_cpu != PROCESSOR_NIAGARA
9344 && sparc_cpu != PROCESSOR_NIAGARA2
9345 && sparc_cpu != PROCESSOR_NIAGARA3
9346 && sparc_cpu != PROCESSOR_NIAGARA4
9347 && sparc_cpu != PROCESSOR_NIAGARA7)
9348 emit_insn (gen_flushdi (validize_mem (adjust_address (m_tramp, DImode, 8))));
9349
9350 /* Call __enable_execute_stack after writing onto the stack to make sure
9351 the stack address is accessible. */
9352 #ifdef HAVE_ENABLE_EXECUTE_STACK
9353 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
9354 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
9355 #endif
9356 }
9357
9358 /* Worker for TARGET_TRAMPOLINE_INIT. */
9359
9360 static void
sparc_trampoline_init(rtx m_tramp,tree fndecl,rtx cxt)9361 sparc_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
9362 {
9363 rtx fnaddr = force_reg (Pmode, XEXP (DECL_RTL (fndecl), 0));
9364 cxt = force_reg (Pmode, cxt);
9365 if (TARGET_ARCH64)
9366 sparc64_initialize_trampoline (m_tramp, fnaddr, cxt);
9367 else
9368 sparc32_initialize_trampoline (m_tramp, fnaddr, cxt);
9369 }
9370
9371 /* Adjust the cost of a scheduling dependency. Return the new cost of
9372 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
9373
9374 static int
supersparc_adjust_cost(rtx_insn * insn,rtx link,rtx_insn * dep_insn,int cost)9375 supersparc_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep_insn, int cost)
9376 {
9377 enum attr_type insn_type;
9378
9379 if (recog_memoized (insn) < 0)
9380 return cost;
9381
9382 insn_type = get_attr_type (insn);
9383
9384 if (REG_NOTE_KIND (link) == 0)
9385 {
9386 /* Data dependency; DEP_INSN writes a register that INSN reads some
9387 cycles later. */
9388
9389 /* if a load, then the dependence must be on the memory address;
9390 add an extra "cycle". Note that the cost could be two cycles
9391 if the reg was written late in an instruction group; we ca not tell
9392 here. */
9393 if (insn_type == TYPE_LOAD || insn_type == TYPE_FPLOAD)
9394 return cost + 3;
9395
9396 /* Get the delay only if the address of the store is the dependence. */
9397 if (insn_type == TYPE_STORE || insn_type == TYPE_FPSTORE)
9398 {
9399 rtx pat = PATTERN(insn);
9400 rtx dep_pat = PATTERN (dep_insn);
9401
9402 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
9403 return cost; /* This should not happen! */
9404
9405 /* The dependency between the two instructions was on the data that
9406 is being stored. Assume that this implies that the address of the
9407 store is not dependent. */
9408 if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
9409 return cost;
9410
9411 return cost + 3; /* An approximation. */
9412 }
9413
9414 /* A shift instruction cannot receive its data from an instruction
9415 in the same cycle; add a one cycle penalty. */
9416 if (insn_type == TYPE_SHIFT)
9417 return cost + 3; /* Split before cascade into shift. */
9418 }
9419 else
9420 {
9421 /* Anti- or output- dependency; DEP_INSN reads/writes a register that
9422 INSN writes some cycles later. */
9423
9424 /* These are only significant for the fpu unit; writing a fp reg before
9425 the fpu has finished with it stalls the processor. */
9426
9427 /* Reusing an integer register causes no problems. */
9428 if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT)
9429 return 0;
9430 }
9431
9432 return cost;
9433 }
9434
9435 static int
hypersparc_adjust_cost(rtx_insn * insn,rtx link,rtx_insn * dep_insn,int cost)9436 hypersparc_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep_insn, int cost)
9437 {
9438 enum attr_type insn_type, dep_type;
9439 rtx pat = PATTERN(insn);
9440 rtx dep_pat = PATTERN (dep_insn);
9441
9442 if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
9443 return cost;
9444
9445 insn_type = get_attr_type (insn);
9446 dep_type = get_attr_type (dep_insn);
9447
9448 switch (REG_NOTE_KIND (link))
9449 {
9450 case 0:
9451 /* Data dependency; DEP_INSN writes a register that INSN reads some
9452 cycles later. */
9453
9454 switch (insn_type)
9455 {
9456 case TYPE_STORE:
9457 case TYPE_FPSTORE:
9458 /* Get the delay iff the address of the store is the dependence. */
9459 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
9460 return cost;
9461
9462 if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
9463 return cost;
9464 return cost + 3;
9465
9466 case TYPE_LOAD:
9467 case TYPE_SLOAD:
9468 case TYPE_FPLOAD:
9469 /* If a load, then the dependence must be on the memory address. If
9470 the addresses aren't equal, then it might be a false dependency */
9471 if (dep_type == TYPE_STORE || dep_type == TYPE_FPSTORE)
9472 {
9473 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET
9474 || GET_CODE (SET_DEST (dep_pat)) != MEM
9475 || GET_CODE (SET_SRC (pat)) != MEM
9476 || ! rtx_equal_p (XEXP (SET_DEST (dep_pat), 0),
9477 XEXP (SET_SRC (pat), 0)))
9478 return cost + 2;
9479
9480 return cost + 8;
9481 }
9482 break;
9483
9484 case TYPE_BRANCH:
9485 /* Compare to branch latency is 0. There is no benefit from
9486 separating compare and branch. */
9487 if (dep_type == TYPE_COMPARE)
9488 return 0;
9489 /* Floating point compare to branch latency is less than
9490 compare to conditional move. */
9491 if (dep_type == TYPE_FPCMP)
9492 return cost - 1;
9493 break;
9494 default:
9495 break;
9496 }
9497 break;
9498
9499 case REG_DEP_ANTI:
9500 /* Anti-dependencies only penalize the fpu unit. */
9501 if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT)
9502 return 0;
9503 break;
9504
9505 default:
9506 break;
9507 }
9508
9509 return cost;
9510 }
9511
9512 static int
sparc_adjust_cost(rtx_insn * insn,rtx link,rtx_insn * dep,int cost)9513 sparc_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep, int cost)
9514 {
9515 switch (sparc_cpu)
9516 {
9517 case PROCESSOR_SUPERSPARC:
9518 cost = supersparc_adjust_cost (insn, link, dep, cost);
9519 break;
9520 case PROCESSOR_HYPERSPARC:
9521 case PROCESSOR_SPARCLITE86X:
9522 cost = hypersparc_adjust_cost (insn, link, dep, cost);
9523 break;
9524 default:
9525 break;
9526 }
9527 return cost;
9528 }
9529
9530 static void
sparc_sched_init(FILE * dump ATTRIBUTE_UNUSED,int sched_verbose ATTRIBUTE_UNUSED,int max_ready ATTRIBUTE_UNUSED)9531 sparc_sched_init (FILE *dump ATTRIBUTE_UNUSED,
9532 int sched_verbose ATTRIBUTE_UNUSED,
9533 int max_ready ATTRIBUTE_UNUSED)
9534 {}
9535
9536 static int
sparc_use_sched_lookahead(void)9537 sparc_use_sched_lookahead (void)
9538 {
9539 if (sparc_cpu == PROCESSOR_NIAGARA
9540 || sparc_cpu == PROCESSOR_NIAGARA2
9541 || sparc_cpu == PROCESSOR_NIAGARA3)
9542 return 0;
9543 if (sparc_cpu == PROCESSOR_NIAGARA4
9544 || sparc_cpu == PROCESSOR_NIAGARA7)
9545 return 2;
9546 if (sparc_cpu == PROCESSOR_ULTRASPARC
9547 || sparc_cpu == PROCESSOR_ULTRASPARC3)
9548 return 4;
9549 if ((1 << sparc_cpu) &
9550 ((1 << PROCESSOR_SUPERSPARC) | (1 << PROCESSOR_HYPERSPARC) |
9551 (1 << PROCESSOR_SPARCLITE86X)))
9552 return 3;
9553 return 0;
9554 }
9555
9556 static int
sparc_issue_rate(void)9557 sparc_issue_rate (void)
9558 {
9559 switch (sparc_cpu)
9560 {
9561 case PROCESSOR_NIAGARA:
9562 case PROCESSOR_NIAGARA2:
9563 case PROCESSOR_NIAGARA3:
9564 default:
9565 return 1;
9566 case PROCESSOR_NIAGARA4:
9567 case PROCESSOR_NIAGARA7:
9568 case PROCESSOR_V9:
9569 /* Assume V9 processors are capable of at least dual-issue. */
9570 return 2;
9571 case PROCESSOR_SUPERSPARC:
9572 return 3;
9573 case PROCESSOR_HYPERSPARC:
9574 case PROCESSOR_SPARCLITE86X:
9575 return 2;
9576 case PROCESSOR_ULTRASPARC:
9577 case PROCESSOR_ULTRASPARC3:
9578 return 4;
9579 }
9580 }
9581
9582 static int
set_extends(rtx_insn * insn)9583 set_extends (rtx_insn *insn)
9584 {
9585 register rtx pat = PATTERN (insn);
9586
9587 switch (GET_CODE (SET_SRC (pat)))
9588 {
9589 /* Load and some shift instructions zero extend. */
9590 case MEM:
9591 case ZERO_EXTEND:
9592 /* sethi clears the high bits */
9593 case HIGH:
9594 /* LO_SUM is used with sethi. sethi cleared the high
9595 bits and the values used with lo_sum are positive */
9596 case LO_SUM:
9597 /* Store flag stores 0 or 1 */
9598 case LT: case LTU:
9599 case GT: case GTU:
9600 case LE: case LEU:
9601 case GE: case GEU:
9602 case EQ:
9603 case NE:
9604 return 1;
9605 case AND:
9606 {
9607 rtx op0 = XEXP (SET_SRC (pat), 0);
9608 rtx op1 = XEXP (SET_SRC (pat), 1);
9609 if (GET_CODE (op1) == CONST_INT)
9610 return INTVAL (op1) >= 0;
9611 if (GET_CODE (op0) != REG)
9612 return 0;
9613 if (sparc_check_64 (op0, insn) == 1)
9614 return 1;
9615 return (GET_CODE (op1) == REG && sparc_check_64 (op1, insn) == 1);
9616 }
9617 case IOR:
9618 case XOR:
9619 {
9620 rtx op0 = XEXP (SET_SRC (pat), 0);
9621 rtx op1 = XEXP (SET_SRC (pat), 1);
9622 if (GET_CODE (op0) != REG || sparc_check_64 (op0, insn) <= 0)
9623 return 0;
9624 if (GET_CODE (op1) == CONST_INT)
9625 return INTVAL (op1) >= 0;
9626 return (GET_CODE (op1) == REG && sparc_check_64 (op1, insn) == 1);
9627 }
9628 case LSHIFTRT:
9629 return GET_MODE (SET_SRC (pat)) == SImode;
9630 /* Positive integers leave the high bits zero. */
9631 case CONST_INT:
9632 return !(INTVAL (SET_SRC (pat)) & 0x80000000);
9633 case ASHIFTRT:
9634 case SIGN_EXTEND:
9635 return - (GET_MODE (SET_SRC (pat)) == SImode);
9636 case REG:
9637 return sparc_check_64 (SET_SRC (pat), insn);
9638 default:
9639 return 0;
9640 }
9641 }
9642
9643 /* We _ought_ to have only one kind per function, but... */
9644 static GTY(()) rtx sparc_addr_diff_list;
9645 static GTY(()) rtx sparc_addr_list;
9646
9647 void
sparc_defer_case_vector(rtx lab,rtx vec,int diff)9648 sparc_defer_case_vector (rtx lab, rtx vec, int diff)
9649 {
9650 vec = gen_rtx_EXPR_LIST (VOIDmode, lab, vec);
9651 if (diff)
9652 sparc_addr_diff_list
9653 = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_diff_list);
9654 else
9655 sparc_addr_list = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_list);
9656 }
9657
9658 static void
sparc_output_addr_vec(rtx vec)9659 sparc_output_addr_vec (rtx vec)
9660 {
9661 rtx lab = XEXP (vec, 0), body = XEXP (vec, 1);
9662 int idx, vlen = XVECLEN (body, 0);
9663
9664 #ifdef ASM_OUTPUT_ADDR_VEC_START
9665 ASM_OUTPUT_ADDR_VEC_START (asm_out_file);
9666 #endif
9667
9668 #ifdef ASM_OUTPUT_CASE_LABEL
9669 ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab),
9670 NEXT_INSN (lab));
9671 #else
9672 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
9673 #endif
9674
9675 for (idx = 0; idx < vlen; idx++)
9676 {
9677 ASM_OUTPUT_ADDR_VEC_ELT
9678 (asm_out_file, CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 0, idx), 0)));
9679 }
9680
9681 #ifdef ASM_OUTPUT_ADDR_VEC_END
9682 ASM_OUTPUT_ADDR_VEC_END (asm_out_file);
9683 #endif
9684 }
9685
9686 static void
sparc_output_addr_diff_vec(rtx vec)9687 sparc_output_addr_diff_vec (rtx vec)
9688 {
9689 rtx lab = XEXP (vec, 0), body = XEXP (vec, 1);
9690 rtx base = XEXP (XEXP (body, 0), 0);
9691 int idx, vlen = XVECLEN (body, 1);
9692
9693 #ifdef ASM_OUTPUT_ADDR_VEC_START
9694 ASM_OUTPUT_ADDR_VEC_START (asm_out_file);
9695 #endif
9696
9697 #ifdef ASM_OUTPUT_CASE_LABEL
9698 ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab),
9699 NEXT_INSN (lab));
9700 #else
9701 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
9702 #endif
9703
9704 for (idx = 0; idx < vlen; idx++)
9705 {
9706 ASM_OUTPUT_ADDR_DIFF_ELT
9707 (asm_out_file,
9708 body,
9709 CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 1, idx), 0)),
9710 CODE_LABEL_NUMBER (base));
9711 }
9712
9713 #ifdef ASM_OUTPUT_ADDR_VEC_END
9714 ASM_OUTPUT_ADDR_VEC_END (asm_out_file);
9715 #endif
9716 }
9717
9718 static void
sparc_output_deferred_case_vectors(void)9719 sparc_output_deferred_case_vectors (void)
9720 {
9721 rtx t;
9722 int align;
9723
9724 if (sparc_addr_list == NULL_RTX
9725 && sparc_addr_diff_list == NULL_RTX)
9726 return;
9727
9728 /* Align to cache line in the function's code section. */
9729 switch_to_section (current_function_section ());
9730
9731 align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT);
9732 if (align > 0)
9733 ASM_OUTPUT_ALIGN (asm_out_file, align);
9734
9735 for (t = sparc_addr_list; t ; t = XEXP (t, 1))
9736 sparc_output_addr_vec (XEXP (t, 0));
9737 for (t = sparc_addr_diff_list; t ; t = XEXP (t, 1))
9738 sparc_output_addr_diff_vec (XEXP (t, 0));
9739
9740 sparc_addr_list = sparc_addr_diff_list = NULL_RTX;
9741 }
9742
9743 /* Return 0 if the high 32 bits of X (the low word of X, if DImode) are
9744 unknown. Return 1 if the high bits are zero, -1 if the register is
9745 sign extended. */
9746 int
sparc_check_64(rtx x,rtx_insn * insn)9747 sparc_check_64 (rtx x, rtx_insn *insn)
9748 {
9749 /* If a register is set only once it is safe to ignore insns this
9750 code does not know how to handle. The loop will either recognize
9751 the single set and return the correct value or fail to recognize
9752 it and return 0. */
9753 int set_once = 0;
9754 rtx y = x;
9755
9756 gcc_assert (GET_CODE (x) == REG);
9757
9758 if (GET_MODE (x) == DImode)
9759 y = gen_rtx_REG (SImode, REGNO (x) + WORDS_BIG_ENDIAN);
9760
9761 if (flag_expensive_optimizations
9762 && df && DF_REG_DEF_COUNT (REGNO (y)) == 1)
9763 set_once = 1;
9764
9765 if (insn == 0)
9766 {
9767 if (set_once)
9768 insn = get_last_insn_anywhere ();
9769 else
9770 return 0;
9771 }
9772
9773 while ((insn = PREV_INSN (insn)))
9774 {
9775 switch (GET_CODE (insn))
9776 {
9777 case JUMP_INSN:
9778 case NOTE:
9779 break;
9780 case CODE_LABEL:
9781 case CALL_INSN:
9782 default:
9783 if (! set_once)
9784 return 0;
9785 break;
9786 case INSN:
9787 {
9788 rtx pat = PATTERN (insn);
9789 if (GET_CODE (pat) != SET)
9790 return 0;
9791 if (rtx_equal_p (x, SET_DEST (pat)))
9792 return set_extends (insn);
9793 if (y && rtx_equal_p (y, SET_DEST (pat)))
9794 return set_extends (insn);
9795 if (reg_overlap_mentioned_p (SET_DEST (pat), y))
9796 return 0;
9797 }
9798 }
9799 }
9800 return 0;
9801 }
9802
9803 /* Output a wide shift instruction in V8+ mode. INSN is the instruction,
9804 OPERANDS are its operands and OPCODE is the mnemonic to be used. */
9805
9806 const char *
output_v8plus_shift(rtx_insn * insn,rtx * operands,const char * opcode)9807 output_v8plus_shift (rtx_insn *insn, rtx *operands, const char *opcode)
9808 {
9809 static char asm_code[60];
9810
9811 /* The scratch register is only required when the destination
9812 register is not a 64-bit global or out register. */
9813 if (which_alternative != 2)
9814 operands[3] = operands[0];
9815
9816 /* We can only shift by constants <= 63. */
9817 if (GET_CODE (operands[2]) == CONST_INT)
9818 operands[2] = GEN_INT (INTVAL (operands[2]) & 0x3f);
9819
9820 if (GET_CODE (operands[1]) == CONST_INT)
9821 {
9822 output_asm_insn ("mov\t%1, %3", operands);
9823 }
9824 else
9825 {
9826 output_asm_insn ("sllx\t%H1, 32, %3", operands);
9827 if (sparc_check_64 (operands[1], insn) <= 0)
9828 output_asm_insn ("srl\t%L1, 0, %L1", operands);
9829 output_asm_insn ("or\t%L1, %3, %3", operands);
9830 }
9831
9832 strcpy (asm_code, opcode);
9833
9834 if (which_alternative != 2)
9835 return strcat (asm_code, "\t%0, %2, %L0\n\tsrlx\t%L0, 32, %H0");
9836 else
9837 return
9838 strcat (asm_code, "\t%3, %2, %3\n\tsrlx\t%3, 32, %H0\n\tmov\t%3, %L0");
9839 }
9840
9841 /* Output rtl to increment the profiler label LABELNO
9842 for profiling a function entry. */
9843
9844 void
sparc_profile_hook(int labelno)9845 sparc_profile_hook (int labelno)
9846 {
9847 char buf[32];
9848 rtx lab, fun;
9849
9850 fun = gen_rtx_SYMBOL_REF (Pmode, MCOUNT_FUNCTION);
9851 if (NO_PROFILE_COUNTERS)
9852 {
9853 emit_library_call (fun, LCT_NORMAL, VOIDmode, 0);
9854 }
9855 else
9856 {
9857 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
9858 lab = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
9859 emit_library_call (fun, LCT_NORMAL, VOIDmode, 1, lab, Pmode);
9860 }
9861 }
9862
9863 #ifdef TARGET_SOLARIS
9864 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
9865
9866 static void
sparc_solaris_elf_asm_named_section(const char * name,unsigned int flags,tree decl ATTRIBUTE_UNUSED)9867 sparc_solaris_elf_asm_named_section (const char *name, unsigned int flags,
9868 tree decl ATTRIBUTE_UNUSED)
9869 {
9870 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
9871 {
9872 solaris_elf_asm_comdat_section (name, flags, decl);
9873 return;
9874 }
9875
9876 fprintf (asm_out_file, "\t.section\t\"%s\"", name);
9877
9878 if (!(flags & SECTION_DEBUG))
9879 fputs (",#alloc", asm_out_file);
9880 if (flags & SECTION_WRITE)
9881 fputs (",#write", asm_out_file);
9882 if (flags & SECTION_TLS)
9883 fputs (",#tls", asm_out_file);
9884 if (flags & SECTION_CODE)
9885 fputs (",#execinstr", asm_out_file);
9886
9887 if (flags & SECTION_NOTYPE)
9888 ;
9889 else if (flags & SECTION_BSS)
9890 fputs (",#nobits", asm_out_file);
9891 else
9892 fputs (",#progbits", asm_out_file);
9893
9894 fputc ('\n', asm_out_file);
9895 }
9896 #endif /* TARGET_SOLARIS */
9897
9898 /* We do not allow indirect calls to be optimized into sibling calls.
9899
9900 We cannot use sibling calls when delayed branches are disabled
9901 because they will likely require the call delay slot to be filled.
9902
9903 Also, on SPARC 32-bit we cannot emit a sibling call when the
9904 current function returns a structure. This is because the "unimp
9905 after call" convention would cause the callee to return to the
9906 wrong place. The generic code already disallows cases where the
9907 function being called returns a structure.
9908
9909 It may seem strange how this last case could occur. Usually there
9910 is code after the call which jumps to epilogue code which dumps the
9911 return value into the struct return area. That ought to invalidate
9912 the sibling call right? Well, in the C++ case we can end up passing
9913 the pointer to the struct return area to a constructor (which returns
9914 void) and then nothing else happens. Such a sibling call would look
9915 valid without the added check here.
9916
9917 VxWorks PIC PLT entries require the global pointer to be initialized
9918 on entry. We therefore can't emit sibling calls to them. */
9919 static bool
sparc_function_ok_for_sibcall(tree decl,tree exp ATTRIBUTE_UNUSED)9920 sparc_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
9921 {
9922 return (decl
9923 && flag_delayed_branch
9924 && (TARGET_ARCH64 || ! cfun->returns_struct)
9925 && !(TARGET_VXWORKS_RTP
9926 && flag_pic
9927 && !targetm.binds_local_p (decl)));
9928 }
9929
9930 /* libfunc renaming. */
9931
9932 static void
sparc_init_libfuncs(void)9933 sparc_init_libfuncs (void)
9934 {
9935 if (TARGET_ARCH32)
9936 {
9937 /* Use the subroutines that Sun's library provides for integer
9938 multiply and divide. The `*' prevents an underscore from
9939 being prepended by the compiler. .umul is a little faster
9940 than .mul. */
9941 set_optab_libfunc (smul_optab, SImode, "*.umul");
9942 set_optab_libfunc (sdiv_optab, SImode, "*.div");
9943 set_optab_libfunc (udiv_optab, SImode, "*.udiv");
9944 set_optab_libfunc (smod_optab, SImode, "*.rem");
9945 set_optab_libfunc (umod_optab, SImode, "*.urem");
9946
9947 /* TFmode arithmetic. These names are part of the SPARC 32bit ABI. */
9948 set_optab_libfunc (add_optab, TFmode, "_Q_add");
9949 set_optab_libfunc (sub_optab, TFmode, "_Q_sub");
9950 set_optab_libfunc (neg_optab, TFmode, "_Q_neg");
9951 set_optab_libfunc (smul_optab, TFmode, "_Q_mul");
9952 set_optab_libfunc (sdiv_optab, TFmode, "_Q_div");
9953
9954 /* We can define the TFmode sqrt optab only if TARGET_FPU. This
9955 is because with soft-float, the SFmode and DFmode sqrt
9956 instructions will be absent, and the compiler will notice and
9957 try to use the TFmode sqrt instruction for calls to the
9958 builtin function sqrt, but this fails. */
9959 if (TARGET_FPU)
9960 set_optab_libfunc (sqrt_optab, TFmode, "_Q_sqrt");
9961
9962 set_optab_libfunc (eq_optab, TFmode, "_Q_feq");
9963 set_optab_libfunc (ne_optab, TFmode, "_Q_fne");
9964 set_optab_libfunc (gt_optab, TFmode, "_Q_fgt");
9965 set_optab_libfunc (ge_optab, TFmode, "_Q_fge");
9966 set_optab_libfunc (lt_optab, TFmode, "_Q_flt");
9967 set_optab_libfunc (le_optab, TFmode, "_Q_fle");
9968
9969 set_conv_libfunc (sext_optab, TFmode, SFmode, "_Q_stoq");
9970 set_conv_libfunc (sext_optab, TFmode, DFmode, "_Q_dtoq");
9971 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_Q_qtos");
9972 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_Q_qtod");
9973
9974 set_conv_libfunc (sfix_optab, SImode, TFmode, "_Q_qtoi");
9975 set_conv_libfunc (ufix_optab, SImode, TFmode, "_Q_qtou");
9976 set_conv_libfunc (sfloat_optab, TFmode, SImode, "_Q_itoq");
9977 set_conv_libfunc (ufloat_optab, TFmode, SImode, "_Q_utoq");
9978
9979 if (DITF_CONVERSION_LIBFUNCS)
9980 {
9981 set_conv_libfunc (sfix_optab, DImode, TFmode, "_Q_qtoll");
9982 set_conv_libfunc (ufix_optab, DImode, TFmode, "_Q_qtoull");
9983 set_conv_libfunc (sfloat_optab, TFmode, DImode, "_Q_lltoq");
9984 set_conv_libfunc (ufloat_optab, TFmode, DImode, "_Q_ulltoq");
9985 }
9986
9987 if (SUN_CONVERSION_LIBFUNCS)
9988 {
9989 set_conv_libfunc (sfix_optab, DImode, SFmode, "__ftoll");
9990 set_conv_libfunc (ufix_optab, DImode, SFmode, "__ftoull");
9991 set_conv_libfunc (sfix_optab, DImode, DFmode, "__dtoll");
9992 set_conv_libfunc (ufix_optab, DImode, DFmode, "__dtoull");
9993 }
9994 }
9995 if (TARGET_ARCH64)
9996 {
9997 /* In the SPARC 64bit ABI, SImode multiply and divide functions
9998 do not exist in the library. Make sure the compiler does not
9999 emit calls to them by accident. (It should always use the
10000 hardware instructions.) */
10001 set_optab_libfunc (smul_optab, SImode, 0);
10002 set_optab_libfunc (sdiv_optab, SImode, 0);
10003 set_optab_libfunc (udiv_optab, SImode, 0);
10004 set_optab_libfunc (smod_optab, SImode, 0);
10005 set_optab_libfunc (umod_optab, SImode, 0);
10006
10007 if (SUN_INTEGER_MULTIPLY_64)
10008 {
10009 set_optab_libfunc (smul_optab, DImode, "__mul64");
10010 set_optab_libfunc (sdiv_optab, DImode, "__div64");
10011 set_optab_libfunc (udiv_optab, DImode, "__udiv64");
10012 set_optab_libfunc (smod_optab, DImode, "__rem64");
10013 set_optab_libfunc (umod_optab, DImode, "__urem64");
10014 }
10015
10016 if (SUN_CONVERSION_LIBFUNCS)
10017 {
10018 set_conv_libfunc (sfix_optab, DImode, SFmode, "__ftol");
10019 set_conv_libfunc (ufix_optab, DImode, SFmode, "__ftoul");
10020 set_conv_libfunc (sfix_optab, DImode, DFmode, "__dtol");
10021 set_conv_libfunc (ufix_optab, DImode, DFmode, "__dtoul");
10022 }
10023 }
10024 }
10025
10026 /* SPARC builtins. */
10027 enum sparc_builtins
10028 {
10029 /* FPU builtins. */
10030 SPARC_BUILTIN_LDFSR,
10031 SPARC_BUILTIN_STFSR,
10032
10033 /* VIS 1.0 builtins. */
10034 SPARC_BUILTIN_FPACK16,
10035 SPARC_BUILTIN_FPACK32,
10036 SPARC_BUILTIN_FPACKFIX,
10037 SPARC_BUILTIN_FEXPAND,
10038 SPARC_BUILTIN_FPMERGE,
10039 SPARC_BUILTIN_FMUL8X16,
10040 SPARC_BUILTIN_FMUL8X16AU,
10041 SPARC_BUILTIN_FMUL8X16AL,
10042 SPARC_BUILTIN_FMUL8SUX16,
10043 SPARC_BUILTIN_FMUL8ULX16,
10044 SPARC_BUILTIN_FMULD8SUX16,
10045 SPARC_BUILTIN_FMULD8ULX16,
10046 SPARC_BUILTIN_FALIGNDATAV4HI,
10047 SPARC_BUILTIN_FALIGNDATAV8QI,
10048 SPARC_BUILTIN_FALIGNDATAV2SI,
10049 SPARC_BUILTIN_FALIGNDATADI,
10050 SPARC_BUILTIN_WRGSR,
10051 SPARC_BUILTIN_RDGSR,
10052 SPARC_BUILTIN_ALIGNADDR,
10053 SPARC_BUILTIN_ALIGNADDRL,
10054 SPARC_BUILTIN_PDIST,
10055 SPARC_BUILTIN_EDGE8,
10056 SPARC_BUILTIN_EDGE8L,
10057 SPARC_BUILTIN_EDGE16,
10058 SPARC_BUILTIN_EDGE16L,
10059 SPARC_BUILTIN_EDGE32,
10060 SPARC_BUILTIN_EDGE32L,
10061 SPARC_BUILTIN_FCMPLE16,
10062 SPARC_BUILTIN_FCMPLE32,
10063 SPARC_BUILTIN_FCMPNE16,
10064 SPARC_BUILTIN_FCMPNE32,
10065 SPARC_BUILTIN_FCMPGT16,
10066 SPARC_BUILTIN_FCMPGT32,
10067 SPARC_BUILTIN_FCMPEQ16,
10068 SPARC_BUILTIN_FCMPEQ32,
10069 SPARC_BUILTIN_FPADD16,
10070 SPARC_BUILTIN_FPADD16S,
10071 SPARC_BUILTIN_FPADD32,
10072 SPARC_BUILTIN_FPADD32S,
10073 SPARC_BUILTIN_FPSUB16,
10074 SPARC_BUILTIN_FPSUB16S,
10075 SPARC_BUILTIN_FPSUB32,
10076 SPARC_BUILTIN_FPSUB32S,
10077 SPARC_BUILTIN_ARRAY8,
10078 SPARC_BUILTIN_ARRAY16,
10079 SPARC_BUILTIN_ARRAY32,
10080
10081 /* VIS 2.0 builtins. */
10082 SPARC_BUILTIN_EDGE8N,
10083 SPARC_BUILTIN_EDGE8LN,
10084 SPARC_BUILTIN_EDGE16N,
10085 SPARC_BUILTIN_EDGE16LN,
10086 SPARC_BUILTIN_EDGE32N,
10087 SPARC_BUILTIN_EDGE32LN,
10088 SPARC_BUILTIN_BMASK,
10089 SPARC_BUILTIN_BSHUFFLEV4HI,
10090 SPARC_BUILTIN_BSHUFFLEV8QI,
10091 SPARC_BUILTIN_BSHUFFLEV2SI,
10092 SPARC_BUILTIN_BSHUFFLEDI,
10093
10094 /* VIS 3.0 builtins. */
10095 SPARC_BUILTIN_CMASK8,
10096 SPARC_BUILTIN_CMASK16,
10097 SPARC_BUILTIN_CMASK32,
10098 SPARC_BUILTIN_FCHKSM16,
10099 SPARC_BUILTIN_FSLL16,
10100 SPARC_BUILTIN_FSLAS16,
10101 SPARC_BUILTIN_FSRL16,
10102 SPARC_BUILTIN_FSRA16,
10103 SPARC_BUILTIN_FSLL32,
10104 SPARC_BUILTIN_FSLAS32,
10105 SPARC_BUILTIN_FSRL32,
10106 SPARC_BUILTIN_FSRA32,
10107 SPARC_BUILTIN_PDISTN,
10108 SPARC_BUILTIN_FMEAN16,
10109 SPARC_BUILTIN_FPADD64,
10110 SPARC_BUILTIN_FPSUB64,
10111 SPARC_BUILTIN_FPADDS16,
10112 SPARC_BUILTIN_FPADDS16S,
10113 SPARC_BUILTIN_FPSUBS16,
10114 SPARC_BUILTIN_FPSUBS16S,
10115 SPARC_BUILTIN_FPADDS32,
10116 SPARC_BUILTIN_FPADDS32S,
10117 SPARC_BUILTIN_FPSUBS32,
10118 SPARC_BUILTIN_FPSUBS32S,
10119 SPARC_BUILTIN_FUCMPLE8,
10120 SPARC_BUILTIN_FUCMPNE8,
10121 SPARC_BUILTIN_FUCMPGT8,
10122 SPARC_BUILTIN_FUCMPEQ8,
10123 SPARC_BUILTIN_FHADDS,
10124 SPARC_BUILTIN_FHADDD,
10125 SPARC_BUILTIN_FHSUBS,
10126 SPARC_BUILTIN_FHSUBD,
10127 SPARC_BUILTIN_FNHADDS,
10128 SPARC_BUILTIN_FNHADDD,
10129 SPARC_BUILTIN_UMULXHI,
10130 SPARC_BUILTIN_XMULX,
10131 SPARC_BUILTIN_XMULXHI,
10132
10133 /* VIS 4.0 builtins. */
10134 SPARC_BUILTIN_FPADD8,
10135 SPARC_BUILTIN_FPADDS8,
10136 SPARC_BUILTIN_FPADDUS8,
10137 SPARC_BUILTIN_FPADDUS16,
10138 SPARC_BUILTIN_FPCMPLE8,
10139 SPARC_BUILTIN_FPCMPGT8,
10140 SPARC_BUILTIN_FPCMPULE16,
10141 SPARC_BUILTIN_FPCMPUGT16,
10142 SPARC_BUILTIN_FPCMPULE32,
10143 SPARC_BUILTIN_FPCMPUGT32,
10144 SPARC_BUILTIN_FPMAX8,
10145 SPARC_BUILTIN_FPMAX16,
10146 SPARC_BUILTIN_FPMAX32,
10147 SPARC_BUILTIN_FPMAXU8,
10148 SPARC_BUILTIN_FPMAXU16,
10149 SPARC_BUILTIN_FPMAXU32,
10150 SPARC_BUILTIN_FPMIN8,
10151 SPARC_BUILTIN_FPMIN16,
10152 SPARC_BUILTIN_FPMIN32,
10153 SPARC_BUILTIN_FPMINU8,
10154 SPARC_BUILTIN_FPMINU16,
10155 SPARC_BUILTIN_FPMINU32,
10156 SPARC_BUILTIN_FPSUB8,
10157 SPARC_BUILTIN_FPSUBS8,
10158 SPARC_BUILTIN_FPSUBUS8,
10159 SPARC_BUILTIN_FPSUBUS16,
10160
10161 SPARC_BUILTIN_MAX
10162 };
10163
10164 static GTY (()) tree sparc_builtins[(int) SPARC_BUILTIN_MAX];
10165 static enum insn_code sparc_builtins_icode[(int) SPARC_BUILTIN_MAX];
10166
10167 /* Add a SPARC builtin function with NAME, ICODE, CODE and TYPE. Return the
10168 function decl or NULL_TREE if the builtin was not added. */
10169
10170 static tree
def_builtin(const char * name,enum insn_code icode,enum sparc_builtins code,tree type)10171 def_builtin (const char *name, enum insn_code icode, enum sparc_builtins code,
10172 tree type)
10173 {
10174 tree t
10175 = add_builtin_function (name, type, code, BUILT_IN_MD, NULL, NULL_TREE);
10176
10177 if (t)
10178 {
10179 sparc_builtins[code] = t;
10180 sparc_builtins_icode[code] = icode;
10181 }
10182
10183 return t;
10184 }
10185
10186 /* Likewise, but also marks the function as "const". */
10187
10188 static tree
def_builtin_const(const char * name,enum insn_code icode,enum sparc_builtins code,tree type)10189 def_builtin_const (const char *name, enum insn_code icode,
10190 enum sparc_builtins code, tree type)
10191 {
10192 tree t = def_builtin (name, icode, code, type);
10193
10194 if (t)
10195 TREE_READONLY (t) = 1;
10196
10197 return t;
10198 }
10199
10200 /* Implement the TARGET_INIT_BUILTINS target hook.
10201 Create builtin functions for special SPARC instructions. */
10202
10203 static void
sparc_init_builtins(void)10204 sparc_init_builtins (void)
10205 {
10206 if (TARGET_FPU)
10207 sparc_fpu_init_builtins ();
10208
10209 if (TARGET_VIS)
10210 sparc_vis_init_builtins ();
10211 }
10212
10213 /* Create builtin functions for FPU instructions. */
10214
10215 static void
sparc_fpu_init_builtins(void)10216 sparc_fpu_init_builtins (void)
10217 {
10218 tree ftype
10219 = build_function_type_list (void_type_node,
10220 build_pointer_type (unsigned_type_node), 0);
10221 def_builtin ("__builtin_load_fsr", CODE_FOR_ldfsr,
10222 SPARC_BUILTIN_LDFSR, ftype);
10223 def_builtin ("__builtin_store_fsr", CODE_FOR_stfsr,
10224 SPARC_BUILTIN_STFSR, ftype);
10225 }
10226
10227 /* Create builtin functions for VIS instructions. */
10228
10229 static void
sparc_vis_init_builtins(void)10230 sparc_vis_init_builtins (void)
10231 {
10232 tree v4qi = build_vector_type (unsigned_intQI_type_node, 4);
10233 tree v8qi = build_vector_type (unsigned_intQI_type_node, 8);
10234 tree v4hi = build_vector_type (intHI_type_node, 4);
10235 tree v2hi = build_vector_type (intHI_type_node, 2);
10236 tree v2si = build_vector_type (intSI_type_node, 2);
10237 tree v1si = build_vector_type (intSI_type_node, 1);
10238
10239 tree v4qi_ftype_v4hi = build_function_type_list (v4qi, v4hi, 0);
10240 tree v8qi_ftype_v2si_v8qi = build_function_type_list (v8qi, v2si, v8qi, 0);
10241 tree v2hi_ftype_v2si = build_function_type_list (v2hi, v2si, 0);
10242 tree v4hi_ftype_v4qi = build_function_type_list (v4hi, v4qi, 0);
10243 tree v8qi_ftype_v4qi_v4qi = build_function_type_list (v8qi, v4qi, v4qi, 0);
10244 tree v4hi_ftype_v4qi_v4hi = build_function_type_list (v4hi, v4qi, v4hi, 0);
10245 tree v4hi_ftype_v4qi_v2hi = build_function_type_list (v4hi, v4qi, v2hi, 0);
10246 tree v2si_ftype_v4qi_v2hi = build_function_type_list (v2si, v4qi, v2hi, 0);
10247 tree v4hi_ftype_v8qi_v4hi = build_function_type_list (v4hi, v8qi, v4hi, 0);
10248 tree v4hi_ftype_v4hi_v4hi = build_function_type_list (v4hi, v4hi, v4hi, 0);
10249 tree v2si_ftype_v2si_v2si = build_function_type_list (v2si, v2si, v2si, 0);
10250 tree v8qi_ftype_v8qi_v8qi = build_function_type_list (v8qi, v8qi, v8qi, 0);
10251 tree v2hi_ftype_v2hi_v2hi = build_function_type_list (v2hi, v2hi, v2hi, 0);
10252 tree v1si_ftype_v1si_v1si = build_function_type_list (v1si, v1si, v1si, 0);
10253 tree di_ftype_v8qi_v8qi_di = build_function_type_list (intDI_type_node,
10254 v8qi, v8qi,
10255 intDI_type_node, 0);
10256 tree di_ftype_v8qi_v8qi = build_function_type_list (intDI_type_node,
10257 v8qi, v8qi, 0);
10258 tree si_ftype_v8qi_v8qi = build_function_type_list (intSI_type_node,
10259 v8qi, v8qi, 0);
10260 tree di_ftype_di_di = build_function_type_list (intDI_type_node,
10261 intDI_type_node,
10262 intDI_type_node, 0);
10263 tree si_ftype_si_si = build_function_type_list (intSI_type_node,
10264 intSI_type_node,
10265 intSI_type_node, 0);
10266 tree ptr_ftype_ptr_si = build_function_type_list (ptr_type_node,
10267 ptr_type_node,
10268 intSI_type_node, 0);
10269 tree ptr_ftype_ptr_di = build_function_type_list (ptr_type_node,
10270 ptr_type_node,
10271 intDI_type_node, 0);
10272 tree si_ftype_ptr_ptr = build_function_type_list (intSI_type_node,
10273 ptr_type_node,
10274 ptr_type_node, 0);
10275 tree di_ftype_ptr_ptr = build_function_type_list (intDI_type_node,
10276 ptr_type_node,
10277 ptr_type_node, 0);
10278 tree si_ftype_v4hi_v4hi = build_function_type_list (intSI_type_node,
10279 v4hi, v4hi, 0);
10280 tree si_ftype_v2si_v2si = build_function_type_list (intSI_type_node,
10281 v2si, v2si, 0);
10282 tree di_ftype_v4hi_v4hi = build_function_type_list (intDI_type_node,
10283 v4hi, v4hi, 0);
10284 tree di_ftype_v2si_v2si = build_function_type_list (intDI_type_node,
10285 v2si, v2si, 0);
10286 tree void_ftype_di = build_function_type_list (void_type_node,
10287 intDI_type_node, 0);
10288 tree di_ftype_void = build_function_type_list (intDI_type_node,
10289 void_type_node, 0);
10290 tree void_ftype_si = build_function_type_list (void_type_node,
10291 intSI_type_node, 0);
10292 tree sf_ftype_sf_sf = build_function_type_list (float_type_node,
10293 float_type_node,
10294 float_type_node, 0);
10295 tree df_ftype_df_df = build_function_type_list (double_type_node,
10296 double_type_node,
10297 double_type_node, 0);
10298
10299 /* Packing and expanding vectors. */
10300 def_builtin ("__builtin_vis_fpack16", CODE_FOR_fpack16_vis,
10301 SPARC_BUILTIN_FPACK16, v4qi_ftype_v4hi);
10302 def_builtin ("__builtin_vis_fpack32", CODE_FOR_fpack32_vis,
10303 SPARC_BUILTIN_FPACK32, v8qi_ftype_v2si_v8qi);
10304 def_builtin ("__builtin_vis_fpackfix", CODE_FOR_fpackfix_vis,
10305 SPARC_BUILTIN_FPACKFIX, v2hi_ftype_v2si);
10306 def_builtin_const ("__builtin_vis_fexpand", CODE_FOR_fexpand_vis,
10307 SPARC_BUILTIN_FEXPAND, v4hi_ftype_v4qi);
10308 def_builtin_const ("__builtin_vis_fpmerge", CODE_FOR_fpmerge_vis,
10309 SPARC_BUILTIN_FPMERGE, v8qi_ftype_v4qi_v4qi);
10310
10311 /* Multiplications. */
10312 def_builtin_const ("__builtin_vis_fmul8x16", CODE_FOR_fmul8x16_vis,
10313 SPARC_BUILTIN_FMUL8X16, v4hi_ftype_v4qi_v4hi);
10314 def_builtin_const ("__builtin_vis_fmul8x16au", CODE_FOR_fmul8x16au_vis,
10315 SPARC_BUILTIN_FMUL8X16AU, v4hi_ftype_v4qi_v2hi);
10316 def_builtin_const ("__builtin_vis_fmul8x16al", CODE_FOR_fmul8x16al_vis,
10317 SPARC_BUILTIN_FMUL8X16AL, v4hi_ftype_v4qi_v2hi);
10318 def_builtin_const ("__builtin_vis_fmul8sux16", CODE_FOR_fmul8sux16_vis,
10319 SPARC_BUILTIN_FMUL8SUX16, v4hi_ftype_v8qi_v4hi);
10320 def_builtin_const ("__builtin_vis_fmul8ulx16", CODE_FOR_fmul8ulx16_vis,
10321 SPARC_BUILTIN_FMUL8ULX16, v4hi_ftype_v8qi_v4hi);
10322 def_builtin_const ("__builtin_vis_fmuld8sux16", CODE_FOR_fmuld8sux16_vis,
10323 SPARC_BUILTIN_FMULD8SUX16, v2si_ftype_v4qi_v2hi);
10324 def_builtin_const ("__builtin_vis_fmuld8ulx16", CODE_FOR_fmuld8ulx16_vis,
10325 SPARC_BUILTIN_FMULD8ULX16, v2si_ftype_v4qi_v2hi);
10326
10327 /* Data aligning. */
10328 def_builtin ("__builtin_vis_faligndatav4hi", CODE_FOR_faligndatav4hi_vis,
10329 SPARC_BUILTIN_FALIGNDATAV4HI, v4hi_ftype_v4hi_v4hi);
10330 def_builtin ("__builtin_vis_faligndatav8qi", CODE_FOR_faligndatav8qi_vis,
10331 SPARC_BUILTIN_FALIGNDATAV8QI, v8qi_ftype_v8qi_v8qi);
10332 def_builtin ("__builtin_vis_faligndatav2si", CODE_FOR_faligndatav2si_vis,
10333 SPARC_BUILTIN_FALIGNDATAV2SI, v2si_ftype_v2si_v2si);
10334 def_builtin ("__builtin_vis_faligndatadi", CODE_FOR_faligndatav1di_vis,
10335 SPARC_BUILTIN_FALIGNDATADI, di_ftype_di_di);
10336
10337 def_builtin ("__builtin_vis_write_gsr", CODE_FOR_wrgsr_vis,
10338 SPARC_BUILTIN_WRGSR, void_ftype_di);
10339 def_builtin ("__builtin_vis_read_gsr", CODE_FOR_rdgsr_vis,
10340 SPARC_BUILTIN_RDGSR, di_ftype_void);
10341
10342 if (TARGET_ARCH64)
10343 {
10344 def_builtin ("__builtin_vis_alignaddr", CODE_FOR_alignaddrdi_vis,
10345 SPARC_BUILTIN_ALIGNADDR, ptr_ftype_ptr_di);
10346 def_builtin ("__builtin_vis_alignaddrl", CODE_FOR_alignaddrldi_vis,
10347 SPARC_BUILTIN_ALIGNADDRL, ptr_ftype_ptr_di);
10348 }
10349 else
10350 {
10351 def_builtin ("__builtin_vis_alignaddr", CODE_FOR_alignaddrsi_vis,
10352 SPARC_BUILTIN_ALIGNADDR, ptr_ftype_ptr_si);
10353 def_builtin ("__builtin_vis_alignaddrl", CODE_FOR_alignaddrlsi_vis,
10354 SPARC_BUILTIN_ALIGNADDRL, ptr_ftype_ptr_si);
10355 }
10356
10357 /* Pixel distance. */
10358 def_builtin_const ("__builtin_vis_pdist", CODE_FOR_pdist_vis,
10359 SPARC_BUILTIN_PDIST, di_ftype_v8qi_v8qi_di);
10360
10361 /* Edge handling. */
10362 if (TARGET_ARCH64)
10363 {
10364 def_builtin_const ("__builtin_vis_edge8", CODE_FOR_edge8di_vis,
10365 SPARC_BUILTIN_EDGE8, di_ftype_ptr_ptr);
10366 def_builtin_const ("__builtin_vis_edge8l", CODE_FOR_edge8ldi_vis,
10367 SPARC_BUILTIN_EDGE8L, di_ftype_ptr_ptr);
10368 def_builtin_const ("__builtin_vis_edge16", CODE_FOR_edge16di_vis,
10369 SPARC_BUILTIN_EDGE16, di_ftype_ptr_ptr);
10370 def_builtin_const ("__builtin_vis_edge16l", CODE_FOR_edge16ldi_vis,
10371 SPARC_BUILTIN_EDGE16L, di_ftype_ptr_ptr);
10372 def_builtin_const ("__builtin_vis_edge32", CODE_FOR_edge32di_vis,
10373 SPARC_BUILTIN_EDGE32, di_ftype_ptr_ptr);
10374 def_builtin_const ("__builtin_vis_edge32l", CODE_FOR_edge32ldi_vis,
10375 SPARC_BUILTIN_EDGE32L, di_ftype_ptr_ptr);
10376 }
10377 else
10378 {
10379 def_builtin_const ("__builtin_vis_edge8", CODE_FOR_edge8si_vis,
10380 SPARC_BUILTIN_EDGE8, si_ftype_ptr_ptr);
10381 def_builtin_const ("__builtin_vis_edge8l", CODE_FOR_edge8lsi_vis,
10382 SPARC_BUILTIN_EDGE8L, si_ftype_ptr_ptr);
10383 def_builtin_const ("__builtin_vis_edge16", CODE_FOR_edge16si_vis,
10384 SPARC_BUILTIN_EDGE16, si_ftype_ptr_ptr);
10385 def_builtin_const ("__builtin_vis_edge16l", CODE_FOR_edge16lsi_vis,
10386 SPARC_BUILTIN_EDGE16L, si_ftype_ptr_ptr);
10387 def_builtin_const ("__builtin_vis_edge32", CODE_FOR_edge32si_vis,
10388 SPARC_BUILTIN_EDGE32, si_ftype_ptr_ptr);
10389 def_builtin_const ("__builtin_vis_edge32l", CODE_FOR_edge32lsi_vis,
10390 SPARC_BUILTIN_EDGE32L, si_ftype_ptr_ptr);
10391 }
10392
10393 /* Pixel compare. */
10394 if (TARGET_ARCH64)
10395 {
10396 def_builtin_const ("__builtin_vis_fcmple16", CODE_FOR_fcmple16di_vis,
10397 SPARC_BUILTIN_FCMPLE16, di_ftype_v4hi_v4hi);
10398 def_builtin_const ("__builtin_vis_fcmple32", CODE_FOR_fcmple32di_vis,
10399 SPARC_BUILTIN_FCMPLE32, di_ftype_v2si_v2si);
10400 def_builtin_const ("__builtin_vis_fcmpne16", CODE_FOR_fcmpne16di_vis,
10401 SPARC_BUILTIN_FCMPNE16, di_ftype_v4hi_v4hi);
10402 def_builtin_const ("__builtin_vis_fcmpne32", CODE_FOR_fcmpne32di_vis,
10403 SPARC_BUILTIN_FCMPNE32, di_ftype_v2si_v2si);
10404 def_builtin_const ("__builtin_vis_fcmpgt16", CODE_FOR_fcmpgt16di_vis,
10405 SPARC_BUILTIN_FCMPGT16, di_ftype_v4hi_v4hi);
10406 def_builtin_const ("__builtin_vis_fcmpgt32", CODE_FOR_fcmpgt32di_vis,
10407 SPARC_BUILTIN_FCMPGT32, di_ftype_v2si_v2si);
10408 def_builtin_const ("__builtin_vis_fcmpeq16", CODE_FOR_fcmpeq16di_vis,
10409 SPARC_BUILTIN_FCMPEQ16, di_ftype_v4hi_v4hi);
10410 def_builtin_const ("__builtin_vis_fcmpeq32", CODE_FOR_fcmpeq32di_vis,
10411 SPARC_BUILTIN_FCMPEQ32, di_ftype_v2si_v2si);
10412 }
10413 else
10414 {
10415 def_builtin_const ("__builtin_vis_fcmple16", CODE_FOR_fcmple16si_vis,
10416 SPARC_BUILTIN_FCMPLE16, si_ftype_v4hi_v4hi);
10417 def_builtin_const ("__builtin_vis_fcmple32", CODE_FOR_fcmple32si_vis,
10418 SPARC_BUILTIN_FCMPLE32, si_ftype_v2si_v2si);
10419 def_builtin_const ("__builtin_vis_fcmpne16", CODE_FOR_fcmpne16si_vis,
10420 SPARC_BUILTIN_FCMPNE16, si_ftype_v4hi_v4hi);
10421 def_builtin_const ("__builtin_vis_fcmpne32", CODE_FOR_fcmpne32si_vis,
10422 SPARC_BUILTIN_FCMPNE32, si_ftype_v2si_v2si);
10423 def_builtin_const ("__builtin_vis_fcmpgt16", CODE_FOR_fcmpgt16si_vis,
10424 SPARC_BUILTIN_FCMPGT16, si_ftype_v4hi_v4hi);
10425 def_builtin_const ("__builtin_vis_fcmpgt32", CODE_FOR_fcmpgt32si_vis,
10426 SPARC_BUILTIN_FCMPGT32, si_ftype_v2si_v2si);
10427 def_builtin_const ("__builtin_vis_fcmpeq16", CODE_FOR_fcmpeq16si_vis,
10428 SPARC_BUILTIN_FCMPEQ16, si_ftype_v4hi_v4hi);
10429 def_builtin_const ("__builtin_vis_fcmpeq32", CODE_FOR_fcmpeq32si_vis,
10430 SPARC_BUILTIN_FCMPEQ32, si_ftype_v2si_v2si);
10431 }
10432
10433 /* Addition and subtraction. */
10434 def_builtin_const ("__builtin_vis_fpadd16", CODE_FOR_addv4hi3,
10435 SPARC_BUILTIN_FPADD16, v4hi_ftype_v4hi_v4hi);
10436 def_builtin_const ("__builtin_vis_fpadd16s", CODE_FOR_addv2hi3,
10437 SPARC_BUILTIN_FPADD16S, v2hi_ftype_v2hi_v2hi);
10438 def_builtin_const ("__builtin_vis_fpadd32", CODE_FOR_addv2si3,
10439 SPARC_BUILTIN_FPADD32, v2si_ftype_v2si_v2si);
10440 def_builtin_const ("__builtin_vis_fpadd32s", CODE_FOR_addv1si3,
10441 SPARC_BUILTIN_FPADD32S, v1si_ftype_v1si_v1si);
10442 def_builtin_const ("__builtin_vis_fpsub16", CODE_FOR_subv4hi3,
10443 SPARC_BUILTIN_FPSUB16, v4hi_ftype_v4hi_v4hi);
10444 def_builtin_const ("__builtin_vis_fpsub16s", CODE_FOR_subv2hi3,
10445 SPARC_BUILTIN_FPSUB16S, v2hi_ftype_v2hi_v2hi);
10446 def_builtin_const ("__builtin_vis_fpsub32", CODE_FOR_subv2si3,
10447 SPARC_BUILTIN_FPSUB32, v2si_ftype_v2si_v2si);
10448 def_builtin_const ("__builtin_vis_fpsub32s", CODE_FOR_subv1si3,
10449 SPARC_BUILTIN_FPSUB32S, v1si_ftype_v1si_v1si);
10450
10451 /* Three-dimensional array addressing. */
10452 if (TARGET_ARCH64)
10453 {
10454 def_builtin_const ("__builtin_vis_array8", CODE_FOR_array8di_vis,
10455 SPARC_BUILTIN_ARRAY8, di_ftype_di_di);
10456 def_builtin_const ("__builtin_vis_array16", CODE_FOR_array16di_vis,
10457 SPARC_BUILTIN_ARRAY16, di_ftype_di_di);
10458 def_builtin_const ("__builtin_vis_array32", CODE_FOR_array32di_vis,
10459 SPARC_BUILTIN_ARRAY32, di_ftype_di_di);
10460 }
10461 else
10462 {
10463 def_builtin_const ("__builtin_vis_array8", CODE_FOR_array8si_vis,
10464 SPARC_BUILTIN_ARRAY8, si_ftype_si_si);
10465 def_builtin_const ("__builtin_vis_array16", CODE_FOR_array16si_vis,
10466 SPARC_BUILTIN_ARRAY16, si_ftype_si_si);
10467 def_builtin_const ("__builtin_vis_array32", CODE_FOR_array32si_vis,
10468 SPARC_BUILTIN_ARRAY32, si_ftype_si_si);
10469 }
10470
10471 if (TARGET_VIS2)
10472 {
10473 /* Edge handling. */
10474 if (TARGET_ARCH64)
10475 {
10476 def_builtin_const ("__builtin_vis_edge8n", CODE_FOR_edge8ndi_vis,
10477 SPARC_BUILTIN_EDGE8N, di_ftype_ptr_ptr);
10478 def_builtin_const ("__builtin_vis_edge8ln", CODE_FOR_edge8lndi_vis,
10479 SPARC_BUILTIN_EDGE8LN, di_ftype_ptr_ptr);
10480 def_builtin_const ("__builtin_vis_edge16n", CODE_FOR_edge16ndi_vis,
10481 SPARC_BUILTIN_EDGE16N, di_ftype_ptr_ptr);
10482 def_builtin_const ("__builtin_vis_edge16ln", CODE_FOR_edge16lndi_vis,
10483 SPARC_BUILTIN_EDGE16LN, di_ftype_ptr_ptr);
10484 def_builtin_const ("__builtin_vis_edge32n", CODE_FOR_edge32ndi_vis,
10485 SPARC_BUILTIN_EDGE32N, di_ftype_ptr_ptr);
10486 def_builtin_const ("__builtin_vis_edge32ln", CODE_FOR_edge32lndi_vis,
10487 SPARC_BUILTIN_EDGE32LN, di_ftype_ptr_ptr);
10488 }
10489 else
10490 {
10491 def_builtin_const ("__builtin_vis_edge8n", CODE_FOR_edge8nsi_vis,
10492 SPARC_BUILTIN_EDGE8N, si_ftype_ptr_ptr);
10493 def_builtin_const ("__builtin_vis_edge8ln", CODE_FOR_edge8lnsi_vis,
10494 SPARC_BUILTIN_EDGE8LN, si_ftype_ptr_ptr);
10495 def_builtin_const ("__builtin_vis_edge16n", CODE_FOR_edge16nsi_vis,
10496 SPARC_BUILTIN_EDGE16N, si_ftype_ptr_ptr);
10497 def_builtin_const ("__builtin_vis_edge16ln", CODE_FOR_edge16lnsi_vis,
10498 SPARC_BUILTIN_EDGE16LN, si_ftype_ptr_ptr);
10499 def_builtin_const ("__builtin_vis_edge32n", CODE_FOR_edge32nsi_vis,
10500 SPARC_BUILTIN_EDGE32N, si_ftype_ptr_ptr);
10501 def_builtin_const ("__builtin_vis_edge32ln", CODE_FOR_edge32lnsi_vis,
10502 SPARC_BUILTIN_EDGE32LN, si_ftype_ptr_ptr);
10503 }
10504
10505 /* Byte mask and shuffle. */
10506 if (TARGET_ARCH64)
10507 def_builtin ("__builtin_vis_bmask", CODE_FOR_bmaskdi_vis,
10508 SPARC_BUILTIN_BMASK, di_ftype_di_di);
10509 else
10510 def_builtin ("__builtin_vis_bmask", CODE_FOR_bmasksi_vis,
10511 SPARC_BUILTIN_BMASK, si_ftype_si_si);
10512 def_builtin ("__builtin_vis_bshufflev4hi", CODE_FOR_bshufflev4hi_vis,
10513 SPARC_BUILTIN_BSHUFFLEV4HI, v4hi_ftype_v4hi_v4hi);
10514 def_builtin ("__builtin_vis_bshufflev8qi", CODE_FOR_bshufflev8qi_vis,
10515 SPARC_BUILTIN_BSHUFFLEV8QI, v8qi_ftype_v8qi_v8qi);
10516 def_builtin ("__builtin_vis_bshufflev2si", CODE_FOR_bshufflev2si_vis,
10517 SPARC_BUILTIN_BSHUFFLEV2SI, v2si_ftype_v2si_v2si);
10518 def_builtin ("__builtin_vis_bshuffledi", CODE_FOR_bshufflev1di_vis,
10519 SPARC_BUILTIN_BSHUFFLEDI, di_ftype_di_di);
10520 }
10521
10522 if (TARGET_VIS3)
10523 {
10524 if (TARGET_ARCH64)
10525 {
10526 def_builtin ("__builtin_vis_cmask8", CODE_FOR_cmask8di_vis,
10527 SPARC_BUILTIN_CMASK8, void_ftype_di);
10528 def_builtin ("__builtin_vis_cmask16", CODE_FOR_cmask16di_vis,
10529 SPARC_BUILTIN_CMASK16, void_ftype_di);
10530 def_builtin ("__builtin_vis_cmask32", CODE_FOR_cmask32di_vis,
10531 SPARC_BUILTIN_CMASK32, void_ftype_di);
10532 }
10533 else
10534 {
10535 def_builtin ("__builtin_vis_cmask8", CODE_FOR_cmask8si_vis,
10536 SPARC_BUILTIN_CMASK8, void_ftype_si);
10537 def_builtin ("__builtin_vis_cmask16", CODE_FOR_cmask16si_vis,
10538 SPARC_BUILTIN_CMASK16, void_ftype_si);
10539 def_builtin ("__builtin_vis_cmask32", CODE_FOR_cmask32si_vis,
10540 SPARC_BUILTIN_CMASK32, void_ftype_si);
10541 }
10542
10543 def_builtin_const ("__builtin_vis_fchksm16", CODE_FOR_fchksm16_vis,
10544 SPARC_BUILTIN_FCHKSM16, v4hi_ftype_v4hi_v4hi);
10545
10546 def_builtin_const ("__builtin_vis_fsll16", CODE_FOR_vashlv4hi3,
10547 SPARC_BUILTIN_FSLL16, v4hi_ftype_v4hi_v4hi);
10548 def_builtin_const ("__builtin_vis_fslas16", CODE_FOR_vssashlv4hi3,
10549 SPARC_BUILTIN_FSLAS16, v4hi_ftype_v4hi_v4hi);
10550 def_builtin_const ("__builtin_vis_fsrl16", CODE_FOR_vlshrv4hi3,
10551 SPARC_BUILTIN_FSRL16, v4hi_ftype_v4hi_v4hi);
10552 def_builtin_const ("__builtin_vis_fsra16", CODE_FOR_vashrv4hi3,
10553 SPARC_BUILTIN_FSRA16, v4hi_ftype_v4hi_v4hi);
10554 def_builtin_const ("__builtin_vis_fsll32", CODE_FOR_vashlv2si3,
10555 SPARC_BUILTIN_FSLL32, v2si_ftype_v2si_v2si);
10556 def_builtin_const ("__builtin_vis_fslas32", CODE_FOR_vssashlv2si3,
10557 SPARC_BUILTIN_FSLAS32, v2si_ftype_v2si_v2si);
10558 def_builtin_const ("__builtin_vis_fsrl32", CODE_FOR_vlshrv2si3,
10559 SPARC_BUILTIN_FSRL32, v2si_ftype_v2si_v2si);
10560 def_builtin_const ("__builtin_vis_fsra32", CODE_FOR_vashrv2si3,
10561 SPARC_BUILTIN_FSRA32, v2si_ftype_v2si_v2si);
10562
10563 if (TARGET_ARCH64)
10564 def_builtin_const ("__builtin_vis_pdistn", CODE_FOR_pdistndi_vis,
10565 SPARC_BUILTIN_PDISTN, di_ftype_v8qi_v8qi);
10566 else
10567 def_builtin_const ("__builtin_vis_pdistn", CODE_FOR_pdistnsi_vis,
10568 SPARC_BUILTIN_PDISTN, si_ftype_v8qi_v8qi);
10569
10570 def_builtin_const ("__builtin_vis_fmean16", CODE_FOR_fmean16_vis,
10571 SPARC_BUILTIN_FMEAN16, v4hi_ftype_v4hi_v4hi);
10572 def_builtin_const ("__builtin_vis_fpadd64", CODE_FOR_fpadd64_vis,
10573 SPARC_BUILTIN_FPADD64, di_ftype_di_di);
10574 def_builtin_const ("__builtin_vis_fpsub64", CODE_FOR_fpsub64_vis,
10575 SPARC_BUILTIN_FPSUB64, di_ftype_di_di);
10576
10577 def_builtin_const ("__builtin_vis_fpadds16", CODE_FOR_ssaddv4hi3,
10578 SPARC_BUILTIN_FPADDS16, v4hi_ftype_v4hi_v4hi);
10579 def_builtin_const ("__builtin_vis_fpadds16s", CODE_FOR_ssaddv2hi3,
10580 SPARC_BUILTIN_FPADDS16S, v2hi_ftype_v2hi_v2hi);
10581 def_builtin_const ("__builtin_vis_fpsubs16", CODE_FOR_sssubv4hi3,
10582 SPARC_BUILTIN_FPSUBS16, v4hi_ftype_v4hi_v4hi);
10583 def_builtin_const ("__builtin_vis_fpsubs16s", CODE_FOR_sssubv2hi3,
10584 SPARC_BUILTIN_FPSUBS16S, v2hi_ftype_v2hi_v2hi);
10585 def_builtin_const ("__builtin_vis_fpadds32", CODE_FOR_ssaddv2si3,
10586 SPARC_BUILTIN_FPADDS32, v2si_ftype_v2si_v2si);
10587 def_builtin_const ("__builtin_vis_fpadds32s", CODE_FOR_ssaddv1si3,
10588 SPARC_BUILTIN_FPADDS32S, v1si_ftype_v1si_v1si);
10589 def_builtin_const ("__builtin_vis_fpsubs32", CODE_FOR_sssubv2si3,
10590 SPARC_BUILTIN_FPSUBS32, v2si_ftype_v2si_v2si);
10591 def_builtin_const ("__builtin_vis_fpsubs32s", CODE_FOR_sssubv1si3,
10592 SPARC_BUILTIN_FPSUBS32S, v1si_ftype_v1si_v1si);
10593
10594 if (TARGET_ARCH64)
10595 {
10596 def_builtin_const ("__builtin_vis_fucmple8", CODE_FOR_fucmple8di_vis,
10597 SPARC_BUILTIN_FUCMPLE8, di_ftype_v8qi_v8qi);
10598 def_builtin_const ("__builtin_vis_fucmpne8", CODE_FOR_fucmpne8di_vis,
10599 SPARC_BUILTIN_FUCMPNE8, di_ftype_v8qi_v8qi);
10600 def_builtin_const ("__builtin_vis_fucmpgt8", CODE_FOR_fucmpgt8di_vis,
10601 SPARC_BUILTIN_FUCMPGT8, di_ftype_v8qi_v8qi);
10602 def_builtin_const ("__builtin_vis_fucmpeq8", CODE_FOR_fucmpeq8di_vis,
10603 SPARC_BUILTIN_FUCMPEQ8, di_ftype_v8qi_v8qi);
10604 }
10605 else
10606 {
10607 def_builtin_const ("__builtin_vis_fucmple8", CODE_FOR_fucmple8si_vis,
10608 SPARC_BUILTIN_FUCMPLE8, si_ftype_v8qi_v8qi);
10609 def_builtin_const ("__builtin_vis_fucmpne8", CODE_FOR_fucmpne8si_vis,
10610 SPARC_BUILTIN_FUCMPNE8, si_ftype_v8qi_v8qi);
10611 def_builtin_const ("__builtin_vis_fucmpgt8", CODE_FOR_fucmpgt8si_vis,
10612 SPARC_BUILTIN_FUCMPGT8, si_ftype_v8qi_v8qi);
10613 def_builtin_const ("__builtin_vis_fucmpeq8", CODE_FOR_fucmpeq8si_vis,
10614 SPARC_BUILTIN_FUCMPEQ8, si_ftype_v8qi_v8qi);
10615 }
10616
10617 def_builtin_const ("__builtin_vis_fhadds", CODE_FOR_fhaddsf_vis,
10618 SPARC_BUILTIN_FHADDS, sf_ftype_sf_sf);
10619 def_builtin_const ("__builtin_vis_fhaddd", CODE_FOR_fhadddf_vis,
10620 SPARC_BUILTIN_FHADDD, df_ftype_df_df);
10621 def_builtin_const ("__builtin_vis_fhsubs", CODE_FOR_fhsubsf_vis,
10622 SPARC_BUILTIN_FHSUBS, sf_ftype_sf_sf);
10623 def_builtin_const ("__builtin_vis_fhsubd", CODE_FOR_fhsubdf_vis,
10624 SPARC_BUILTIN_FHSUBD, df_ftype_df_df);
10625 def_builtin_const ("__builtin_vis_fnhadds", CODE_FOR_fnhaddsf_vis,
10626 SPARC_BUILTIN_FNHADDS, sf_ftype_sf_sf);
10627 def_builtin_const ("__builtin_vis_fnhaddd", CODE_FOR_fnhadddf_vis,
10628 SPARC_BUILTIN_FNHADDD, df_ftype_df_df);
10629
10630 def_builtin_const ("__builtin_vis_umulxhi", CODE_FOR_umulxhi_vis,
10631 SPARC_BUILTIN_UMULXHI, di_ftype_di_di);
10632 def_builtin_const ("__builtin_vis_xmulx", CODE_FOR_xmulx_vis,
10633 SPARC_BUILTIN_XMULX, di_ftype_di_di);
10634 def_builtin_const ("__builtin_vis_xmulxhi", CODE_FOR_xmulxhi_vis,
10635 SPARC_BUILTIN_XMULXHI, di_ftype_di_di);
10636 }
10637
10638 if (TARGET_VIS4)
10639 {
10640 def_builtin_const ("__builtin_vis_fpadd8", CODE_FOR_addv8qi3,
10641 SPARC_BUILTIN_FPADD8, v8qi_ftype_v8qi_v8qi);
10642 def_builtin_const ("__builtin_vis_fpadds8", CODE_FOR_ssaddv8qi3,
10643 SPARC_BUILTIN_FPADDS8, v8qi_ftype_v8qi_v8qi);
10644 def_builtin_const ("__builtin_vis_fpaddus8", CODE_FOR_usaddv8qi3,
10645 SPARC_BUILTIN_FPADDUS8, v8qi_ftype_v8qi_v8qi);
10646 def_builtin_const ("__builtin_vis_fpaddus16", CODE_FOR_usaddv4hi3,
10647 SPARC_BUILTIN_FPADDUS16, v4hi_ftype_v4hi_v4hi);
10648
10649
10650 if (TARGET_ARCH64)
10651 {
10652 def_builtin_const ("__builtin_vis_fpcmple8", CODE_FOR_fpcmple8di_vis,
10653 SPARC_BUILTIN_FPCMPLE8, di_ftype_v8qi_v8qi);
10654 def_builtin_const ("__builtin_vis_fpcmpgt8", CODE_FOR_fpcmpgt8di_vis,
10655 SPARC_BUILTIN_FPCMPGT8, di_ftype_v8qi_v8qi);
10656 def_builtin_const ("__builtin_vis_fpcmpule16", CODE_FOR_fpcmpule16di_vis,
10657 SPARC_BUILTIN_FPCMPULE16, di_ftype_v4hi_v4hi);
10658 def_builtin_const ("__builtin_vis_fpcmpugt16", CODE_FOR_fpcmpugt16di_vis,
10659 SPARC_BUILTIN_FPCMPUGT16, di_ftype_v4hi_v4hi);
10660 def_builtin_const ("__builtin_vis_fpcmpule32", CODE_FOR_fpcmpule32di_vis,
10661 SPARC_BUILTIN_FPCMPULE32, di_ftype_v2si_v2si);
10662 def_builtin_const ("__builtin_vis_fpcmpugt32", CODE_FOR_fpcmpugt32di_vis,
10663 SPARC_BUILTIN_FPCMPUGT32, di_ftype_v2si_v2si);
10664 }
10665 else
10666 {
10667 def_builtin_const ("__builtin_vis_fpcmple8", CODE_FOR_fpcmple8si_vis,
10668 SPARC_BUILTIN_FPCMPLE8, si_ftype_v8qi_v8qi);
10669 def_builtin_const ("__builtin_vis_fpcmpgt8", CODE_FOR_fpcmpgt8si_vis,
10670 SPARC_BUILTIN_FPCMPGT8, si_ftype_v8qi_v8qi);
10671 def_builtin_const ("__builtin_vis_fpcmpule16", CODE_FOR_fpcmpule16si_vis,
10672 SPARC_BUILTIN_FPCMPULE16, si_ftype_v4hi_v4hi);
10673 def_builtin_const ("__builtin_vis_fpcmpugt16", CODE_FOR_fpcmpugt16si_vis,
10674 SPARC_BUILTIN_FPCMPUGT16, si_ftype_v4hi_v4hi);
10675 def_builtin_const ("__builtin_vis_fpcmpule32", CODE_FOR_fpcmpule32si_vis,
10676 SPARC_BUILTIN_FPCMPULE32, di_ftype_v2si_v2si);
10677 def_builtin_const ("__builtin_vis_fpcmpugt32", CODE_FOR_fpcmpugt32si_vis,
10678 SPARC_BUILTIN_FPCMPUGT32, di_ftype_v2si_v2si);
10679 }
10680
10681 def_builtin_const ("__builtin_vis_fpmax8", CODE_FOR_maxv8qi3,
10682 SPARC_BUILTIN_FPMAX8, v8qi_ftype_v8qi_v8qi);
10683 def_builtin_const ("__builtin_vis_fpmax16", CODE_FOR_maxv4hi3,
10684 SPARC_BUILTIN_FPMAX16, v4hi_ftype_v4hi_v4hi);
10685 def_builtin_const ("__builtin_vis_fpmax32", CODE_FOR_maxv2si3,
10686 SPARC_BUILTIN_FPMAX32, v2si_ftype_v2si_v2si);
10687 def_builtin_const ("__builtin_vis_fpmaxu8", CODE_FOR_maxuv8qi3,
10688 SPARC_BUILTIN_FPMAXU8, v8qi_ftype_v8qi_v8qi);
10689 def_builtin_const ("__builtin_vis_fpmaxu16", CODE_FOR_maxuv4hi3,
10690 SPARC_BUILTIN_FPMAXU16, v4hi_ftype_v4hi_v4hi);
10691 def_builtin_const ("__builtin_vis_fpmaxu32", CODE_FOR_maxuv2si3,
10692 SPARC_BUILTIN_FPMAXU32, v2si_ftype_v2si_v2si);
10693 def_builtin_const ("__builtin_vis_fpmin8", CODE_FOR_minv8qi3,
10694 SPARC_BUILTIN_FPMIN8, v8qi_ftype_v8qi_v8qi);
10695 def_builtin_const ("__builtin_vis_fpmin16", CODE_FOR_minv4hi3,
10696 SPARC_BUILTIN_FPMIN16, v4hi_ftype_v4hi_v4hi);
10697 def_builtin_const ("__builtin_vis_fpmin32", CODE_FOR_minv2si3,
10698 SPARC_BUILTIN_FPMIN32, v2si_ftype_v2si_v2si);
10699 def_builtin_const ("__builtin_vis_fpminu8", CODE_FOR_minuv8qi3,
10700 SPARC_BUILTIN_FPMINU8, v8qi_ftype_v8qi_v8qi);
10701 def_builtin_const ("__builtin_vis_fpminu16", CODE_FOR_minuv4hi3,
10702 SPARC_BUILTIN_FPMINU16, v4hi_ftype_v4hi_v4hi);
10703 def_builtin_const ("__builtin_vis_fpminu32", CODE_FOR_minuv2si3,
10704 SPARC_BUILTIN_FPMINU32, v2si_ftype_v2si_v2si);
10705 def_builtin_const ("__builtin_vis_fpsub8", CODE_FOR_subv8qi3,
10706 SPARC_BUILTIN_FPSUB8, v8qi_ftype_v8qi_v8qi);
10707 def_builtin_const ("__builtin_vis_fpsubs8", CODE_FOR_sssubv8qi3,
10708 SPARC_BUILTIN_FPSUBS8, v8qi_ftype_v8qi_v8qi);
10709 def_builtin_const ("__builtin_vis_fpsubus8", CODE_FOR_ussubv8qi3,
10710 SPARC_BUILTIN_FPSUBUS8, v8qi_ftype_v8qi_v8qi);
10711 def_builtin_const ("__builtin_vis_fpsubus16", CODE_FOR_ussubv4hi3,
10712 SPARC_BUILTIN_FPSUBUS16, v4hi_ftype_v4hi_v4hi);
10713 }
10714 }
10715
10716 /* Implement TARGET_BUILTIN_DECL hook. */
10717
10718 static tree
sparc_builtin_decl(unsigned code,bool initialize_p ATTRIBUTE_UNUSED)10719 sparc_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
10720 {
10721 if (code >= SPARC_BUILTIN_MAX)
10722 return error_mark_node;
10723
10724 return sparc_builtins[code];
10725 }
10726
10727 /* Implemented TARGET_EXPAND_BUILTIN hook. */
10728
10729 static rtx
sparc_expand_builtin(tree exp,rtx target,rtx subtarget ATTRIBUTE_UNUSED,machine_mode tmode ATTRIBUTE_UNUSED,int ignore ATTRIBUTE_UNUSED)10730 sparc_expand_builtin (tree exp, rtx target,
10731 rtx subtarget ATTRIBUTE_UNUSED,
10732 machine_mode tmode ATTRIBUTE_UNUSED,
10733 int ignore ATTRIBUTE_UNUSED)
10734 {
10735 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
10736 enum sparc_builtins code = (enum sparc_builtins) DECL_FUNCTION_CODE (fndecl);
10737 enum insn_code icode = sparc_builtins_icode[code];
10738 bool nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
10739 call_expr_arg_iterator iter;
10740 int arg_count = 0;
10741 rtx pat, op[4];
10742 tree arg;
10743
10744 if (nonvoid)
10745 {
10746 machine_mode tmode = insn_data[icode].operand[0].mode;
10747 if (!target
10748 || GET_MODE (target) != tmode
10749 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
10750 op[0] = gen_reg_rtx (tmode);
10751 else
10752 op[0] = target;
10753 }
10754
10755 FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
10756 {
10757 const struct insn_operand_data *insn_op;
10758 int idx;
10759
10760 if (arg == error_mark_node)
10761 return NULL_RTX;
10762
10763 arg_count++;
10764 idx = arg_count - !nonvoid;
10765 insn_op = &insn_data[icode].operand[idx];
10766 op[arg_count] = expand_normal (arg);
10767
10768 if (code == SPARC_BUILTIN_LDFSR || code == SPARC_BUILTIN_STFSR)
10769 {
10770 if (!address_operand (op[arg_count], SImode))
10771 {
10772 op[arg_count] = convert_memory_address (Pmode, op[arg_count]);
10773 op[arg_count] = copy_addr_to_reg (op[arg_count]);
10774 }
10775 op[arg_count] = gen_rtx_MEM (SImode, op[arg_count]);
10776 }
10777
10778 else if (insn_op->mode == V1DImode
10779 && GET_MODE (op[arg_count]) == DImode)
10780 op[arg_count] = gen_lowpart (V1DImode, op[arg_count]);
10781
10782 else if (insn_op->mode == V1SImode
10783 && GET_MODE (op[arg_count]) == SImode)
10784 op[arg_count] = gen_lowpart (V1SImode, op[arg_count]);
10785
10786 if (! (*insn_data[icode].operand[idx].predicate) (op[arg_count],
10787 insn_op->mode))
10788 op[arg_count] = copy_to_mode_reg (insn_op->mode, op[arg_count]);
10789 }
10790
10791 switch (arg_count)
10792 {
10793 case 0:
10794 pat = GEN_FCN (icode) (op[0]);
10795 break;
10796 case 1:
10797 if (nonvoid)
10798 pat = GEN_FCN (icode) (op[0], op[1]);
10799 else
10800 pat = GEN_FCN (icode) (op[1]);
10801 break;
10802 case 2:
10803 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
10804 break;
10805 case 3:
10806 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
10807 break;
10808 default:
10809 gcc_unreachable ();
10810 }
10811
10812 if (!pat)
10813 return NULL_RTX;
10814
10815 emit_insn (pat);
10816
10817 return (nonvoid ? op[0] : const0_rtx);
10818 }
10819
10820 /* Return the upper 16 bits of the 8x16 multiplication. */
10821
10822 static int
sparc_vis_mul8x16(int e8,int e16)10823 sparc_vis_mul8x16 (int e8, int e16)
10824 {
10825 return (e8 * e16 + 128) / 256;
10826 }
10827
10828 /* Multiply the VECTOR_CSTs CST0 and CST1 as specified by FNCODE and put
10829 the result into the array N_ELTS, whose elements are of INNER_TYPE. */
10830
10831 static void
sparc_handle_vis_mul8x16(tree * n_elts,enum sparc_builtins fncode,tree inner_type,tree cst0,tree cst1)10832 sparc_handle_vis_mul8x16 (tree *n_elts, enum sparc_builtins fncode,
10833 tree inner_type, tree cst0, tree cst1)
10834 {
10835 unsigned i, num = VECTOR_CST_NELTS (cst0);
10836 int scale;
10837
10838 switch (fncode)
10839 {
10840 case SPARC_BUILTIN_FMUL8X16:
10841 for (i = 0; i < num; ++i)
10842 {
10843 int val
10844 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
10845 TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, i)));
10846 n_elts[i] = build_int_cst (inner_type, val);
10847 }
10848 break;
10849
10850 case SPARC_BUILTIN_FMUL8X16AU:
10851 scale = TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, 0));
10852
10853 for (i = 0; i < num; ++i)
10854 {
10855 int val
10856 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
10857 scale);
10858 n_elts[i] = build_int_cst (inner_type, val);
10859 }
10860 break;
10861
10862 case SPARC_BUILTIN_FMUL8X16AL:
10863 scale = TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, 1));
10864
10865 for (i = 0; i < num; ++i)
10866 {
10867 int val
10868 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
10869 scale);
10870 n_elts[i] = build_int_cst (inner_type, val);
10871 }
10872 break;
10873
10874 default:
10875 gcc_unreachable ();
10876 }
10877 }
10878
10879 /* Implement TARGET_FOLD_BUILTIN hook.
10880
10881 Fold builtin functions for SPARC intrinsics. If IGNORE is true the
10882 result of the function call is ignored. NULL_TREE is returned if the
10883 function could not be folded. */
10884
10885 static tree
sparc_fold_builtin(tree fndecl,int n_args ATTRIBUTE_UNUSED,tree * args,bool ignore)10886 sparc_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED,
10887 tree *args, bool ignore)
10888 {
10889 enum sparc_builtins code = (enum sparc_builtins) DECL_FUNCTION_CODE (fndecl);
10890 tree rtype = TREE_TYPE (TREE_TYPE (fndecl));
10891 tree arg0, arg1, arg2;
10892
10893 if (ignore)
10894 switch (code)
10895 {
10896 case SPARC_BUILTIN_LDFSR:
10897 case SPARC_BUILTIN_STFSR:
10898 case SPARC_BUILTIN_ALIGNADDR:
10899 case SPARC_BUILTIN_WRGSR:
10900 case SPARC_BUILTIN_BMASK:
10901 case SPARC_BUILTIN_CMASK8:
10902 case SPARC_BUILTIN_CMASK16:
10903 case SPARC_BUILTIN_CMASK32:
10904 break;
10905
10906 default:
10907 return build_zero_cst (rtype);
10908 }
10909
10910 switch (code)
10911 {
10912 case SPARC_BUILTIN_FEXPAND:
10913 arg0 = args[0];
10914 STRIP_NOPS (arg0);
10915
10916 if (TREE_CODE (arg0) == VECTOR_CST)
10917 {
10918 tree inner_type = TREE_TYPE (rtype);
10919 tree *n_elts;
10920 unsigned i;
10921
10922 n_elts = XALLOCAVEC (tree, VECTOR_CST_NELTS (arg0));
10923 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
10924 n_elts[i] = build_int_cst (inner_type,
10925 TREE_INT_CST_LOW
10926 (VECTOR_CST_ELT (arg0, i)) << 4);
10927 return build_vector (rtype, n_elts);
10928 }
10929 break;
10930
10931 case SPARC_BUILTIN_FMUL8X16:
10932 case SPARC_BUILTIN_FMUL8X16AU:
10933 case SPARC_BUILTIN_FMUL8X16AL:
10934 arg0 = args[0];
10935 arg1 = args[1];
10936 STRIP_NOPS (arg0);
10937 STRIP_NOPS (arg1);
10938
10939 if (TREE_CODE (arg0) == VECTOR_CST && TREE_CODE (arg1) == VECTOR_CST)
10940 {
10941 tree inner_type = TREE_TYPE (rtype);
10942 tree *n_elts = XALLOCAVEC (tree, VECTOR_CST_NELTS (arg0));
10943 sparc_handle_vis_mul8x16 (n_elts, code, inner_type, arg0, arg1);
10944 return build_vector (rtype, n_elts);
10945 }
10946 break;
10947
10948 case SPARC_BUILTIN_FPMERGE:
10949 arg0 = args[0];
10950 arg1 = args[1];
10951 STRIP_NOPS (arg0);
10952 STRIP_NOPS (arg1);
10953
10954 if (TREE_CODE (arg0) == VECTOR_CST && TREE_CODE (arg1) == VECTOR_CST)
10955 {
10956 tree *n_elts = XALLOCAVEC (tree, 2 * VECTOR_CST_NELTS (arg0));
10957 unsigned i;
10958 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
10959 {
10960 n_elts[2*i] = VECTOR_CST_ELT (arg0, i);
10961 n_elts[2*i+1] = VECTOR_CST_ELT (arg1, i);
10962 }
10963
10964 return build_vector (rtype, n_elts);
10965 }
10966 break;
10967
10968 case SPARC_BUILTIN_PDIST:
10969 case SPARC_BUILTIN_PDISTN:
10970 arg0 = args[0];
10971 arg1 = args[1];
10972 STRIP_NOPS (arg0);
10973 STRIP_NOPS (arg1);
10974 if (code == SPARC_BUILTIN_PDIST)
10975 {
10976 arg2 = args[2];
10977 STRIP_NOPS (arg2);
10978 }
10979 else
10980 arg2 = integer_zero_node;
10981
10982 if (TREE_CODE (arg0) == VECTOR_CST
10983 && TREE_CODE (arg1) == VECTOR_CST
10984 && TREE_CODE (arg2) == INTEGER_CST)
10985 {
10986 bool overflow = false;
10987 widest_int result = wi::to_widest (arg2);
10988 widest_int tmp;
10989 unsigned i;
10990
10991 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
10992 {
10993 tree e0 = VECTOR_CST_ELT (arg0, i);
10994 tree e1 = VECTOR_CST_ELT (arg1, i);
10995
10996 bool neg1_ovf, neg2_ovf, add1_ovf, add2_ovf;
10997
10998 tmp = wi::neg (wi::to_widest (e1), &neg1_ovf);
10999 tmp = wi::add (wi::to_widest (e0), tmp, SIGNED, &add1_ovf);
11000 if (wi::neg_p (tmp))
11001 tmp = wi::neg (tmp, &neg2_ovf);
11002 else
11003 neg2_ovf = false;
11004 result = wi::add (result, tmp, SIGNED, &add2_ovf);
11005 overflow |= neg1_ovf | neg2_ovf | add1_ovf | add2_ovf;
11006 }
11007
11008 gcc_assert (!overflow);
11009
11010 return wide_int_to_tree (rtype, result);
11011 }
11012
11013 default:
11014 break;
11015 }
11016
11017 return NULL_TREE;
11018 }
11019
11020 /* ??? This duplicates information provided to the compiler by the
11021 ??? scheduler description. Some day, teach genautomata to output
11022 ??? the latencies and then CSE will just use that. */
11023
11024 static bool
sparc_rtx_costs(rtx x,machine_mode mode,int outer_code,int opno ATTRIBUTE_UNUSED,int * total,bool speed ATTRIBUTE_UNUSED)11025 sparc_rtx_costs (rtx x, machine_mode mode, int outer_code,
11026 int opno ATTRIBUTE_UNUSED,
11027 int *total, bool speed ATTRIBUTE_UNUSED)
11028 {
11029 int code = GET_CODE (x);
11030 bool float_mode_p = FLOAT_MODE_P (mode);
11031
11032 switch (code)
11033 {
11034 case CONST_INT:
11035 if (SMALL_INT (x))
11036 *total = 0;
11037 else
11038 *total = 2;
11039 return true;
11040
11041 case CONST_WIDE_INT:
11042 *total = 0;
11043 if (!SPARC_SIMM13_P (CONST_WIDE_INT_ELT (x, 0)))
11044 *total += 2;
11045 if (!SPARC_SIMM13_P (CONST_WIDE_INT_ELT (x, 1)))
11046 *total += 2;
11047 return true;
11048
11049 case HIGH:
11050 *total = 2;
11051 return true;
11052
11053 case CONST:
11054 case LABEL_REF:
11055 case SYMBOL_REF:
11056 *total = 4;
11057 return true;
11058
11059 case CONST_DOUBLE:
11060 *total = 8;
11061 return true;
11062
11063 case MEM:
11064 /* If outer-code was a sign or zero extension, a cost
11065 of COSTS_N_INSNS (1) was already added in. This is
11066 why we are subtracting it back out. */
11067 if (outer_code == ZERO_EXTEND)
11068 {
11069 *total = sparc_costs->int_zload - COSTS_N_INSNS (1);
11070 }
11071 else if (outer_code == SIGN_EXTEND)
11072 {
11073 *total = sparc_costs->int_sload - COSTS_N_INSNS (1);
11074 }
11075 else if (float_mode_p)
11076 {
11077 *total = sparc_costs->float_load;
11078 }
11079 else
11080 {
11081 *total = sparc_costs->int_load;
11082 }
11083
11084 return true;
11085
11086 case PLUS:
11087 case MINUS:
11088 if (float_mode_p)
11089 *total = sparc_costs->float_plusminus;
11090 else
11091 *total = COSTS_N_INSNS (1);
11092 return false;
11093
11094 case FMA:
11095 {
11096 rtx sub;
11097
11098 gcc_assert (float_mode_p);
11099 *total = sparc_costs->float_mul;
11100
11101 sub = XEXP (x, 0);
11102 if (GET_CODE (sub) == NEG)
11103 sub = XEXP (sub, 0);
11104 *total += rtx_cost (sub, mode, FMA, 0, speed);
11105
11106 sub = XEXP (x, 2);
11107 if (GET_CODE (sub) == NEG)
11108 sub = XEXP (sub, 0);
11109 *total += rtx_cost (sub, mode, FMA, 2, speed);
11110 return true;
11111 }
11112
11113 case MULT:
11114 if (float_mode_p)
11115 *total = sparc_costs->float_mul;
11116 else if (TARGET_ARCH32 && !TARGET_HARD_MUL)
11117 *total = COSTS_N_INSNS (25);
11118 else
11119 {
11120 int bit_cost;
11121
11122 bit_cost = 0;
11123 if (sparc_costs->int_mul_bit_factor)
11124 {
11125 int nbits;
11126
11127 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
11128 {
11129 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
11130 for (nbits = 0; value != 0; value &= value - 1)
11131 nbits++;
11132 }
11133 else
11134 nbits = 7;
11135
11136 if (nbits < 3)
11137 nbits = 3;
11138 bit_cost = (nbits - 3) / sparc_costs->int_mul_bit_factor;
11139 bit_cost = COSTS_N_INSNS (bit_cost);
11140 }
11141
11142 if (mode == DImode || !TARGET_HARD_MUL)
11143 *total = sparc_costs->int_mulX + bit_cost;
11144 else
11145 *total = sparc_costs->int_mul + bit_cost;
11146 }
11147 return false;
11148
11149 case ASHIFT:
11150 case ASHIFTRT:
11151 case LSHIFTRT:
11152 *total = COSTS_N_INSNS (1) + sparc_costs->shift_penalty;
11153 return false;
11154
11155 case DIV:
11156 case UDIV:
11157 case MOD:
11158 case UMOD:
11159 if (float_mode_p)
11160 {
11161 if (mode == DFmode)
11162 *total = sparc_costs->float_div_df;
11163 else
11164 *total = sparc_costs->float_div_sf;
11165 }
11166 else
11167 {
11168 if (mode == DImode)
11169 *total = sparc_costs->int_divX;
11170 else
11171 *total = sparc_costs->int_div;
11172 }
11173 return false;
11174
11175 case NEG:
11176 if (! float_mode_p)
11177 {
11178 *total = COSTS_N_INSNS (1);
11179 return false;
11180 }
11181 /* FALLTHRU */
11182
11183 case ABS:
11184 case FLOAT:
11185 case UNSIGNED_FLOAT:
11186 case FIX:
11187 case UNSIGNED_FIX:
11188 case FLOAT_EXTEND:
11189 case FLOAT_TRUNCATE:
11190 *total = sparc_costs->float_move;
11191 return false;
11192
11193 case SQRT:
11194 if (mode == DFmode)
11195 *total = sparc_costs->float_sqrt_df;
11196 else
11197 *total = sparc_costs->float_sqrt_sf;
11198 return false;
11199
11200 case COMPARE:
11201 if (float_mode_p)
11202 *total = sparc_costs->float_cmp;
11203 else
11204 *total = COSTS_N_INSNS (1);
11205 return false;
11206
11207 case IF_THEN_ELSE:
11208 if (float_mode_p)
11209 *total = sparc_costs->float_cmove;
11210 else
11211 *total = sparc_costs->int_cmove;
11212 return false;
11213
11214 case IOR:
11215 /* Handle the NAND vector patterns. */
11216 if (sparc_vector_mode_supported_p (mode)
11217 && GET_CODE (XEXP (x, 0)) == NOT
11218 && GET_CODE (XEXP (x, 1)) == NOT)
11219 {
11220 *total = COSTS_N_INSNS (1);
11221 return true;
11222 }
11223 else
11224 return false;
11225
11226 default:
11227 return false;
11228 }
11229 }
11230
11231 /* Return true if CLASS is either GENERAL_REGS or I64_REGS. */
11232
11233 static inline bool
general_or_i64_p(reg_class_t rclass)11234 general_or_i64_p (reg_class_t rclass)
11235 {
11236 return (rclass == GENERAL_REGS || rclass == I64_REGS);
11237 }
11238
11239 /* Implement TARGET_REGISTER_MOVE_COST. */
11240
11241 static int
sparc_register_move_cost(machine_mode mode ATTRIBUTE_UNUSED,reg_class_t from,reg_class_t to)11242 sparc_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
11243 reg_class_t from, reg_class_t to)
11244 {
11245 bool need_memory = false;
11246
11247 if (from == FPCC_REGS || to == FPCC_REGS)
11248 need_memory = true;
11249 else if ((FP_REG_CLASS_P (from) && general_or_i64_p (to))
11250 || (general_or_i64_p (from) && FP_REG_CLASS_P (to)))
11251 {
11252 if (TARGET_VIS3)
11253 {
11254 int size = GET_MODE_SIZE (mode);
11255 if (size == 8 || size == 4)
11256 {
11257 if (! TARGET_ARCH32 || size == 4)
11258 return 4;
11259 else
11260 return 6;
11261 }
11262 }
11263 need_memory = true;
11264 }
11265
11266 if (need_memory)
11267 {
11268 if (sparc_cpu == PROCESSOR_ULTRASPARC
11269 || sparc_cpu == PROCESSOR_ULTRASPARC3
11270 || sparc_cpu == PROCESSOR_NIAGARA
11271 || sparc_cpu == PROCESSOR_NIAGARA2
11272 || sparc_cpu == PROCESSOR_NIAGARA3
11273 || sparc_cpu == PROCESSOR_NIAGARA4
11274 || sparc_cpu == PROCESSOR_NIAGARA7)
11275 return 12;
11276
11277 return 6;
11278 }
11279
11280 return 2;
11281 }
11282
11283 /* Emit the sequence of insns SEQ while preserving the registers REG and REG2.
11284 This is achieved by means of a manual dynamic stack space allocation in
11285 the current frame. We make the assumption that SEQ doesn't contain any
11286 function calls, with the possible exception of calls to the GOT helper. */
11287
11288 static void
emit_and_preserve(rtx seq,rtx reg,rtx reg2)11289 emit_and_preserve (rtx seq, rtx reg, rtx reg2)
11290 {
11291 /* We must preserve the lowest 16 words for the register save area. */
11292 HOST_WIDE_INT offset = 16*UNITS_PER_WORD;
11293 /* We really need only 2 words of fresh stack space. */
11294 HOST_WIDE_INT size = SPARC_STACK_ALIGN (offset + 2*UNITS_PER_WORD);
11295
11296 rtx slot
11297 = gen_rtx_MEM (word_mode, plus_constant (Pmode, stack_pointer_rtx,
11298 SPARC_STACK_BIAS + offset));
11299
11300 emit_insn (gen_stack_pointer_inc (GEN_INT (-size)));
11301 emit_insn (gen_rtx_SET (slot, reg));
11302 if (reg2)
11303 emit_insn (gen_rtx_SET (adjust_address (slot, word_mode, UNITS_PER_WORD),
11304 reg2));
11305 emit_insn (seq);
11306 if (reg2)
11307 emit_insn (gen_rtx_SET (reg2,
11308 adjust_address (slot, word_mode, UNITS_PER_WORD)));
11309 emit_insn (gen_rtx_SET (reg, slot));
11310 emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
11311 }
11312
11313 /* Output the assembler code for a thunk function. THUNK_DECL is the
11314 declaration for the thunk function itself, FUNCTION is the decl for
11315 the target function. DELTA is an immediate constant offset to be
11316 added to THIS. If VCALL_OFFSET is nonzero, the word at address
11317 (*THIS + VCALL_OFFSET) should be additionally added to THIS. */
11318
11319 static void
sparc_output_mi_thunk(FILE * file,tree thunk_fndecl ATTRIBUTE_UNUSED,HOST_WIDE_INT delta,HOST_WIDE_INT vcall_offset,tree function)11320 sparc_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
11321 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
11322 tree function)
11323 {
11324 rtx this_rtx, funexp;
11325 rtx_insn *insn;
11326 unsigned int int_arg_first;
11327
11328 reload_completed = 1;
11329 epilogue_completed = 1;
11330
11331 emit_note (NOTE_INSN_PROLOGUE_END);
11332
11333 if (TARGET_FLAT)
11334 {
11335 sparc_leaf_function_p = 1;
11336
11337 int_arg_first = SPARC_OUTGOING_INT_ARG_FIRST;
11338 }
11339 else if (flag_delayed_branch)
11340 {
11341 /* We will emit a regular sibcall below, so we need to instruct
11342 output_sibcall that we are in a leaf function. */
11343 sparc_leaf_function_p = crtl->uses_only_leaf_regs = 1;
11344
11345 /* This will cause final.c to invoke leaf_renumber_regs so we
11346 must behave as if we were in a not-yet-leafified function. */
11347 int_arg_first = SPARC_INCOMING_INT_ARG_FIRST;
11348 }
11349 else
11350 {
11351 /* We will emit the sibcall manually below, so we will need to
11352 manually spill non-leaf registers. */
11353 sparc_leaf_function_p = crtl->uses_only_leaf_regs = 0;
11354
11355 /* We really are in a leaf function. */
11356 int_arg_first = SPARC_OUTGOING_INT_ARG_FIRST;
11357 }
11358
11359 /* Find the "this" pointer. Normally in %o0, but in ARCH64 if the function
11360 returns a structure, the structure return pointer is there instead. */
11361 if (TARGET_ARCH64
11362 && aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
11363 this_rtx = gen_rtx_REG (Pmode, int_arg_first + 1);
11364 else
11365 this_rtx = gen_rtx_REG (Pmode, int_arg_first);
11366
11367 /* Add DELTA. When possible use a plain add, otherwise load it into
11368 a register first. */
11369 if (delta)
11370 {
11371 rtx delta_rtx = GEN_INT (delta);
11372
11373 if (! SPARC_SIMM13_P (delta))
11374 {
11375 rtx scratch = gen_rtx_REG (Pmode, 1);
11376 emit_move_insn (scratch, delta_rtx);
11377 delta_rtx = scratch;
11378 }
11379
11380 /* THIS_RTX += DELTA. */
11381 emit_insn (gen_add2_insn (this_rtx, delta_rtx));
11382 }
11383
11384 /* Add the word at address (*THIS_RTX + VCALL_OFFSET). */
11385 if (vcall_offset)
11386 {
11387 rtx vcall_offset_rtx = GEN_INT (vcall_offset);
11388 rtx scratch = gen_rtx_REG (Pmode, 1);
11389
11390 gcc_assert (vcall_offset < 0);
11391
11392 /* SCRATCH = *THIS_RTX. */
11393 emit_move_insn (scratch, gen_rtx_MEM (Pmode, this_rtx));
11394
11395 /* Prepare for adding VCALL_OFFSET. The difficulty is that we
11396 may not have any available scratch register at this point. */
11397 if (SPARC_SIMM13_P (vcall_offset))
11398 ;
11399 /* This is the case if ARCH64 (unless -ffixed-g5 is passed). */
11400 else if (! fixed_regs[5]
11401 /* The below sequence is made up of at least 2 insns,
11402 while the default method may need only one. */
11403 && vcall_offset < -8192)
11404 {
11405 rtx scratch2 = gen_rtx_REG (Pmode, 5);
11406 emit_move_insn (scratch2, vcall_offset_rtx);
11407 vcall_offset_rtx = scratch2;
11408 }
11409 else
11410 {
11411 rtx increment = GEN_INT (-4096);
11412
11413 /* VCALL_OFFSET is a negative number whose typical range can be
11414 estimated as -32768..0 in 32-bit mode. In almost all cases
11415 it is therefore cheaper to emit multiple add insns than
11416 spilling and loading the constant into a register (at least
11417 6 insns). */
11418 while (! SPARC_SIMM13_P (vcall_offset))
11419 {
11420 emit_insn (gen_add2_insn (scratch, increment));
11421 vcall_offset += 4096;
11422 }
11423 vcall_offset_rtx = GEN_INT (vcall_offset); /* cannot be 0 */
11424 }
11425
11426 /* SCRATCH = *(*THIS_RTX + VCALL_OFFSET). */
11427 emit_move_insn (scratch, gen_rtx_MEM (Pmode,
11428 gen_rtx_PLUS (Pmode,
11429 scratch,
11430 vcall_offset_rtx)));
11431
11432 /* THIS_RTX += *(*THIS_RTX + VCALL_OFFSET). */
11433 emit_insn (gen_add2_insn (this_rtx, scratch));
11434 }
11435
11436 /* Generate a tail call to the target function. */
11437 if (! TREE_USED (function))
11438 {
11439 assemble_external (function);
11440 TREE_USED (function) = 1;
11441 }
11442 funexp = XEXP (DECL_RTL (function), 0);
11443
11444 if (flag_delayed_branch)
11445 {
11446 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
11447 insn = emit_call_insn (gen_sibcall (funexp));
11448 SIBLING_CALL_P (insn) = 1;
11449 }
11450 else
11451 {
11452 /* The hoops we have to jump through in order to generate a sibcall
11453 without using delay slots... */
11454 rtx spill_reg, seq, scratch = gen_rtx_REG (Pmode, 1);
11455
11456 if (flag_pic)
11457 {
11458 spill_reg = gen_rtx_REG (word_mode, 15); /* %o7 */
11459 start_sequence ();
11460 load_got_register (); /* clobbers %o7 */
11461 scratch = sparc_legitimize_pic_address (funexp, scratch);
11462 seq = get_insns ();
11463 end_sequence ();
11464 emit_and_preserve (seq, spill_reg, pic_offset_table_rtx);
11465 }
11466 else if (TARGET_ARCH32)
11467 {
11468 emit_insn (gen_rtx_SET (scratch,
11469 gen_rtx_HIGH (SImode, funexp)));
11470 emit_insn (gen_rtx_SET (scratch,
11471 gen_rtx_LO_SUM (SImode, scratch, funexp)));
11472 }
11473 else /* TARGET_ARCH64 */
11474 {
11475 switch (sparc_cmodel)
11476 {
11477 case CM_MEDLOW:
11478 case CM_MEDMID:
11479 /* The destination can serve as a temporary. */
11480 sparc_emit_set_symbolic_const64 (scratch, funexp, scratch);
11481 break;
11482
11483 case CM_MEDANY:
11484 case CM_EMBMEDANY:
11485 /* The destination cannot serve as a temporary. */
11486 spill_reg = gen_rtx_REG (DImode, 15); /* %o7 */
11487 start_sequence ();
11488 sparc_emit_set_symbolic_const64 (scratch, funexp, spill_reg);
11489 seq = get_insns ();
11490 end_sequence ();
11491 emit_and_preserve (seq, spill_reg, 0);
11492 break;
11493
11494 default:
11495 gcc_unreachable ();
11496 }
11497 }
11498
11499 emit_jump_insn (gen_indirect_jump (scratch));
11500 }
11501
11502 emit_barrier ();
11503
11504 /* Run just enough of rest_of_compilation to get the insns emitted.
11505 There's not really enough bulk here to make other passes such as
11506 instruction scheduling worth while. Note that use_thunk calls
11507 assemble_start_function and assemble_end_function. */
11508 insn = get_insns ();
11509 shorten_branches (insn);
11510 final_start_function (insn, file, 1);
11511 final (insn, file, 1);
11512 final_end_function ();
11513
11514 reload_completed = 0;
11515 epilogue_completed = 0;
11516 }
11517
11518 /* Return true if sparc_output_mi_thunk would be able to output the
11519 assembler code for the thunk function specified by the arguments
11520 it is passed, and false otherwise. */
11521 static bool
sparc_can_output_mi_thunk(const_tree thunk_fndecl ATTRIBUTE_UNUSED,HOST_WIDE_INT delta ATTRIBUTE_UNUSED,HOST_WIDE_INT vcall_offset,const_tree function ATTRIBUTE_UNUSED)11522 sparc_can_output_mi_thunk (const_tree thunk_fndecl ATTRIBUTE_UNUSED,
11523 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
11524 HOST_WIDE_INT vcall_offset,
11525 const_tree function ATTRIBUTE_UNUSED)
11526 {
11527 /* Bound the loop used in the default method above. */
11528 return (vcall_offset >= -32768 || ! fixed_regs[5]);
11529 }
11530
11531 /* How to allocate a 'struct machine_function'. */
11532
11533 static struct machine_function *
sparc_init_machine_status(void)11534 sparc_init_machine_status (void)
11535 {
11536 return ggc_cleared_alloc<machine_function> ();
11537 }
11538
11539 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
11540 We need to emit DTP-relative relocations. */
11541
11542 static void
sparc_output_dwarf_dtprel(FILE * file,int size,rtx x)11543 sparc_output_dwarf_dtprel (FILE *file, int size, rtx x)
11544 {
11545 switch (size)
11546 {
11547 case 4:
11548 fputs ("\t.word\t%r_tls_dtpoff32(", file);
11549 break;
11550 case 8:
11551 fputs ("\t.xword\t%r_tls_dtpoff64(", file);
11552 break;
11553 default:
11554 gcc_unreachable ();
11555 }
11556 output_addr_const (file, x);
11557 fputs (")", file);
11558 }
11559
11560 /* Do whatever processing is required at the end of a file. */
11561
11562 static void
sparc_file_end(void)11563 sparc_file_end (void)
11564 {
11565 /* If we need to emit the special GOT helper function, do so now. */
11566 if (got_helper_rtx)
11567 {
11568 const char *name = XSTR (got_helper_rtx, 0);
11569 const char *reg_name = reg_names[GLOBAL_OFFSET_TABLE_REGNUM];
11570 #ifdef DWARF2_UNWIND_INFO
11571 bool do_cfi;
11572 #endif
11573
11574 if (USE_HIDDEN_LINKONCE)
11575 {
11576 tree decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
11577 get_identifier (name),
11578 build_function_type_list (void_type_node,
11579 NULL_TREE));
11580 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
11581 NULL_TREE, void_type_node);
11582 TREE_PUBLIC (decl) = 1;
11583 TREE_STATIC (decl) = 1;
11584 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
11585 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
11586 DECL_VISIBILITY_SPECIFIED (decl) = 1;
11587 resolve_unique_section (decl, 0, flag_function_sections);
11588 allocate_struct_function (decl, true);
11589 cfun->is_thunk = 1;
11590 current_function_decl = decl;
11591 init_varasm_status ();
11592 assemble_start_function (decl, name);
11593 }
11594 else
11595 {
11596 const int align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT);
11597 switch_to_section (text_section);
11598 if (align > 0)
11599 ASM_OUTPUT_ALIGN (asm_out_file, align);
11600 ASM_OUTPUT_LABEL (asm_out_file, name);
11601 }
11602
11603 #ifdef DWARF2_UNWIND_INFO
11604 do_cfi = dwarf2out_do_cfi_asm ();
11605 if (do_cfi)
11606 fprintf (asm_out_file, "\t.cfi_startproc\n");
11607 #endif
11608 if (flag_delayed_branch)
11609 fprintf (asm_out_file, "\tjmp\t%%o7+8\n\t add\t%%o7, %s, %s\n",
11610 reg_name, reg_name);
11611 else
11612 fprintf (asm_out_file, "\tadd\t%%o7, %s, %s\n\tjmp\t%%o7+8\n\t nop\n",
11613 reg_name, reg_name);
11614 #ifdef DWARF2_UNWIND_INFO
11615 if (do_cfi)
11616 fprintf (asm_out_file, "\t.cfi_endproc\n");
11617 #endif
11618 }
11619
11620 if (NEED_INDICATE_EXEC_STACK)
11621 file_end_indicate_exec_stack ();
11622
11623 #ifdef TARGET_SOLARIS
11624 solaris_file_end ();
11625 #endif
11626 }
11627
11628 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
11629 /* Implement TARGET_MANGLE_TYPE. */
11630
11631 static const char *
sparc_mangle_type(const_tree type)11632 sparc_mangle_type (const_tree type)
11633 {
11634 if (!TARGET_64BIT
11635 && TYPE_MAIN_VARIANT (type) == long_double_type_node
11636 && TARGET_LONG_DOUBLE_128)
11637 return "g";
11638
11639 /* For all other types, use normal C++ mangling. */
11640 return NULL;
11641 }
11642 #endif
11643
11644 /* Expand a membar instruction for various use cases. Both the LOAD_STORE
11645 and BEFORE_AFTER arguments of the form X_Y. They are two-bit masks where
11646 bit 0 indicates that X is true, and bit 1 indicates Y is true. */
11647
11648 void
sparc_emit_membar_for_model(enum memmodel model,int load_store,int before_after)11649 sparc_emit_membar_for_model (enum memmodel model,
11650 int load_store, int before_after)
11651 {
11652 /* Bits for the MEMBAR mmask field. */
11653 const int LoadLoad = 1;
11654 const int StoreLoad = 2;
11655 const int LoadStore = 4;
11656 const int StoreStore = 8;
11657
11658 int mm = 0, implied = 0;
11659
11660 switch (sparc_memory_model)
11661 {
11662 case SMM_SC:
11663 /* Sequential Consistency. All memory transactions are immediately
11664 visible in sequential execution order. No barriers needed. */
11665 implied = LoadLoad | StoreLoad | LoadStore | StoreStore;
11666 break;
11667
11668 case SMM_TSO:
11669 /* Total Store Ordering: all memory transactions with store semantics
11670 are followed by an implied StoreStore. */
11671 implied |= StoreStore;
11672
11673 /* If we're not looking for a raw barrer (before+after), then atomic
11674 operations get the benefit of being both load and store. */
11675 if (load_store == 3 && before_after == 1)
11676 implied |= StoreLoad;
11677 /* FALLTHRU */
11678
11679 case SMM_PSO:
11680 /* Partial Store Ordering: all memory transactions with load semantics
11681 are followed by an implied LoadLoad | LoadStore. */
11682 implied |= LoadLoad | LoadStore;
11683
11684 /* If we're not looking for a raw barrer (before+after), then atomic
11685 operations get the benefit of being both load and store. */
11686 if (load_store == 3 && before_after == 2)
11687 implied |= StoreLoad | StoreStore;
11688 /* FALLTHRU */
11689
11690 case SMM_RMO:
11691 /* Relaxed Memory Ordering: no implicit bits. */
11692 break;
11693
11694 default:
11695 gcc_unreachable ();
11696 }
11697
11698 if (before_after & 1)
11699 {
11700 if (is_mm_release (model) || is_mm_acq_rel (model)
11701 || is_mm_seq_cst (model))
11702 {
11703 if (load_store & 1)
11704 mm |= LoadLoad | StoreLoad;
11705 if (load_store & 2)
11706 mm |= LoadStore | StoreStore;
11707 }
11708 }
11709 if (before_after & 2)
11710 {
11711 if (is_mm_acquire (model) || is_mm_acq_rel (model)
11712 || is_mm_seq_cst (model))
11713 {
11714 if (load_store & 1)
11715 mm |= LoadLoad | LoadStore;
11716 if (load_store & 2)
11717 mm |= StoreLoad | StoreStore;
11718 }
11719 }
11720
11721 /* Remove the bits implied by the system memory model. */
11722 mm &= ~implied;
11723
11724 /* For raw barriers (before+after), always emit a barrier.
11725 This will become a compile-time barrier if needed. */
11726 if (mm || before_after == 3)
11727 emit_insn (gen_membar (GEN_INT (mm)));
11728 }
11729
11730 /* Expand code to perform a 8 or 16-bit compare and swap by doing 32-bit
11731 compare and swap on the word containing the byte or half-word. */
11732
11733 static void
sparc_expand_compare_and_swap_12(rtx bool_result,rtx result,rtx mem,rtx oldval,rtx newval)11734 sparc_expand_compare_and_swap_12 (rtx bool_result, rtx result, rtx mem,
11735 rtx oldval, rtx newval)
11736 {
11737 rtx addr1 = force_reg (Pmode, XEXP (mem, 0));
11738 rtx addr = gen_reg_rtx (Pmode);
11739 rtx off = gen_reg_rtx (SImode);
11740 rtx oldv = gen_reg_rtx (SImode);
11741 rtx newv = gen_reg_rtx (SImode);
11742 rtx oldvalue = gen_reg_rtx (SImode);
11743 rtx newvalue = gen_reg_rtx (SImode);
11744 rtx res = gen_reg_rtx (SImode);
11745 rtx resv = gen_reg_rtx (SImode);
11746 rtx memsi, val, mask, cc;
11747
11748 emit_insn (gen_rtx_SET (addr, gen_rtx_AND (Pmode, addr1, GEN_INT (-4))));
11749
11750 if (Pmode != SImode)
11751 addr1 = gen_lowpart (SImode, addr1);
11752 emit_insn (gen_rtx_SET (off, gen_rtx_AND (SImode, addr1, GEN_INT (3))));
11753
11754 memsi = gen_rtx_MEM (SImode, addr);
11755 set_mem_alias_set (memsi, ALIAS_SET_MEMORY_BARRIER);
11756 MEM_VOLATILE_P (memsi) = MEM_VOLATILE_P (mem);
11757
11758 val = copy_to_reg (memsi);
11759
11760 emit_insn (gen_rtx_SET (off,
11761 gen_rtx_XOR (SImode, off,
11762 GEN_INT (GET_MODE (mem) == QImode
11763 ? 3 : 2))));
11764
11765 emit_insn (gen_rtx_SET (off, gen_rtx_ASHIFT (SImode, off, GEN_INT (3))));
11766
11767 if (GET_MODE (mem) == QImode)
11768 mask = force_reg (SImode, GEN_INT (0xff));
11769 else
11770 mask = force_reg (SImode, GEN_INT (0xffff));
11771
11772 emit_insn (gen_rtx_SET (mask, gen_rtx_ASHIFT (SImode, mask, off)));
11773
11774 emit_insn (gen_rtx_SET (val,
11775 gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask),
11776 val)));
11777
11778 oldval = gen_lowpart (SImode, oldval);
11779 emit_insn (gen_rtx_SET (oldv, gen_rtx_ASHIFT (SImode, oldval, off)));
11780
11781 newval = gen_lowpart_common (SImode, newval);
11782 emit_insn (gen_rtx_SET (newv, gen_rtx_ASHIFT (SImode, newval, off)));
11783
11784 emit_insn (gen_rtx_SET (oldv, gen_rtx_AND (SImode, oldv, mask)));
11785
11786 emit_insn (gen_rtx_SET (newv, gen_rtx_AND (SImode, newv, mask)));
11787
11788 rtx_code_label *end_label = gen_label_rtx ();
11789 rtx_code_label *loop_label = gen_label_rtx ();
11790 emit_label (loop_label);
11791
11792 emit_insn (gen_rtx_SET (oldvalue, gen_rtx_IOR (SImode, oldv, val)));
11793
11794 emit_insn (gen_rtx_SET (newvalue, gen_rtx_IOR (SImode, newv, val)));
11795
11796 emit_move_insn (bool_result, const1_rtx);
11797
11798 emit_insn (gen_atomic_compare_and_swapsi_1 (res, memsi, oldvalue, newvalue));
11799
11800 emit_cmp_and_jump_insns (res, oldvalue, EQ, NULL, SImode, 0, end_label);
11801
11802 emit_insn (gen_rtx_SET (resv,
11803 gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask),
11804 res)));
11805
11806 emit_move_insn (bool_result, const0_rtx);
11807
11808 cc = gen_compare_reg_1 (NE, resv, val);
11809 emit_insn (gen_rtx_SET (val, resv));
11810
11811 /* Use cbranchcc4 to separate the compare and branch! */
11812 emit_jump_insn (gen_cbranchcc4 (gen_rtx_NE (VOIDmode, cc, const0_rtx),
11813 cc, const0_rtx, loop_label));
11814
11815 emit_label (end_label);
11816
11817 emit_insn (gen_rtx_SET (res, gen_rtx_AND (SImode, res, mask)));
11818
11819 emit_insn (gen_rtx_SET (res, gen_rtx_LSHIFTRT (SImode, res, off)));
11820
11821 emit_move_insn (result, gen_lowpart (GET_MODE (result), res));
11822 }
11823
11824 /* Expand code to perform a compare-and-swap. */
11825
11826 void
sparc_expand_compare_and_swap(rtx operands[])11827 sparc_expand_compare_and_swap (rtx operands[])
11828 {
11829 rtx bval, retval, mem, oldval, newval;
11830 machine_mode mode;
11831 enum memmodel model;
11832
11833 bval = operands[0];
11834 retval = operands[1];
11835 mem = operands[2];
11836 oldval = operands[3];
11837 newval = operands[4];
11838 model = (enum memmodel) INTVAL (operands[6]);
11839 mode = GET_MODE (mem);
11840
11841 sparc_emit_membar_for_model (model, 3, 1);
11842
11843 if (reg_overlap_mentioned_p (retval, oldval))
11844 oldval = copy_to_reg (oldval);
11845
11846 if (mode == QImode || mode == HImode)
11847 sparc_expand_compare_and_swap_12 (bval, retval, mem, oldval, newval);
11848 else
11849 {
11850 rtx (*gen) (rtx, rtx, rtx, rtx);
11851 rtx x;
11852
11853 if (mode == SImode)
11854 gen = gen_atomic_compare_and_swapsi_1;
11855 else
11856 gen = gen_atomic_compare_and_swapdi_1;
11857 emit_insn (gen (retval, mem, oldval, newval));
11858
11859 x = emit_store_flag (bval, EQ, retval, oldval, mode, 1, 1);
11860 if (x != bval)
11861 convert_move (bval, x, 1);
11862 }
11863
11864 sparc_emit_membar_for_model (model, 3, 2);
11865 }
11866
11867 void
sparc_expand_vec_perm_bmask(machine_mode vmode,rtx sel)11868 sparc_expand_vec_perm_bmask (machine_mode vmode, rtx sel)
11869 {
11870 rtx t_1, t_2, t_3;
11871
11872 sel = gen_lowpart (DImode, sel);
11873 switch (vmode)
11874 {
11875 case V2SImode:
11876 /* inp = xxxxxxxAxxxxxxxB */
11877 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
11878 NULL_RTX, 1, OPTAB_DIRECT);
11879 /* t_1 = ....xxxxxxxAxxx. */
11880 sel = expand_simple_binop (SImode, AND, gen_lowpart (SImode, sel),
11881 GEN_INT (3), NULL_RTX, 1, OPTAB_DIRECT);
11882 t_1 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_1),
11883 GEN_INT (0x30000), NULL_RTX, 1, OPTAB_DIRECT);
11884 /* sel = .......B */
11885 /* t_1 = ...A.... */
11886 sel = expand_simple_binop (SImode, IOR, sel, t_1, sel, 1, OPTAB_DIRECT);
11887 /* sel = ...A...B */
11888 sel = expand_mult (SImode, sel, GEN_INT (0x4444), sel, 1);
11889 /* sel = AAAABBBB * 4 */
11890 t_1 = force_reg (SImode, GEN_INT (0x01230123));
11891 /* sel = { A*4, A*4+1, A*4+2, ... } */
11892 break;
11893
11894 case V4HImode:
11895 /* inp = xxxAxxxBxxxCxxxD */
11896 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (8),
11897 NULL_RTX, 1, OPTAB_DIRECT);
11898 t_2 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
11899 NULL_RTX, 1, OPTAB_DIRECT);
11900 t_3 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (24),
11901 NULL_RTX, 1, OPTAB_DIRECT);
11902 /* t_1 = ..xxxAxxxBxxxCxx */
11903 /* t_2 = ....xxxAxxxBxxxC */
11904 /* t_3 = ......xxxAxxxBxx */
11905 sel = expand_simple_binop (SImode, AND, gen_lowpart (SImode, sel),
11906 GEN_INT (0x07),
11907 NULL_RTX, 1, OPTAB_DIRECT);
11908 t_1 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_1),
11909 GEN_INT (0x0700),
11910 NULL_RTX, 1, OPTAB_DIRECT);
11911 t_2 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_2),
11912 GEN_INT (0x070000),
11913 NULL_RTX, 1, OPTAB_DIRECT);
11914 t_3 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_3),
11915 GEN_INT (0x07000000),
11916 NULL_RTX, 1, OPTAB_DIRECT);
11917 /* sel = .......D */
11918 /* t_1 = .....C.. */
11919 /* t_2 = ...B.... */
11920 /* t_3 = .A...... */
11921 sel = expand_simple_binop (SImode, IOR, sel, t_1, sel, 1, OPTAB_DIRECT);
11922 t_2 = expand_simple_binop (SImode, IOR, t_2, t_3, t_2, 1, OPTAB_DIRECT);
11923 sel = expand_simple_binop (SImode, IOR, sel, t_2, sel, 1, OPTAB_DIRECT);
11924 /* sel = .A.B.C.D */
11925 sel = expand_mult (SImode, sel, GEN_INT (0x22), sel, 1);
11926 /* sel = AABBCCDD * 2 */
11927 t_1 = force_reg (SImode, GEN_INT (0x01010101));
11928 /* sel = { A*2, A*2+1, B*2, B*2+1, ... } */
11929 break;
11930
11931 case V8QImode:
11932 /* input = xAxBxCxDxExFxGxH */
11933 sel = expand_simple_binop (DImode, AND, sel,
11934 GEN_INT ((HOST_WIDE_INT)0x0f0f0f0f << 32
11935 | 0x0f0f0f0f),
11936 NULL_RTX, 1, OPTAB_DIRECT);
11937 /* sel = .A.B.C.D.E.F.G.H */
11938 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (4),
11939 NULL_RTX, 1, OPTAB_DIRECT);
11940 /* t_1 = ..A.B.C.D.E.F.G. */
11941 sel = expand_simple_binop (DImode, IOR, sel, t_1,
11942 NULL_RTX, 1, OPTAB_DIRECT);
11943 /* sel = .AABBCCDDEEFFGGH */
11944 sel = expand_simple_binop (DImode, AND, sel,
11945 GEN_INT ((HOST_WIDE_INT)0xff00ff << 32
11946 | 0xff00ff),
11947 NULL_RTX, 1, OPTAB_DIRECT);
11948 /* sel = ..AB..CD..EF..GH */
11949 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (8),
11950 NULL_RTX, 1, OPTAB_DIRECT);
11951 /* t_1 = ....AB..CD..EF.. */
11952 sel = expand_simple_binop (DImode, IOR, sel, t_1,
11953 NULL_RTX, 1, OPTAB_DIRECT);
11954 /* sel = ..ABABCDCDEFEFGH */
11955 sel = expand_simple_binop (DImode, AND, sel,
11956 GEN_INT ((HOST_WIDE_INT)0xffff << 32 | 0xffff),
11957 NULL_RTX, 1, OPTAB_DIRECT);
11958 /* sel = ....ABCD....EFGH */
11959 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
11960 NULL_RTX, 1, OPTAB_DIRECT);
11961 /* t_1 = ........ABCD.... */
11962 sel = gen_lowpart (SImode, sel);
11963 t_1 = gen_lowpart (SImode, t_1);
11964 break;
11965
11966 default:
11967 gcc_unreachable ();
11968 }
11969
11970 /* Always perform the final addition/merge within the bmask insn. */
11971 emit_insn (gen_bmasksi_vis (gen_reg_rtx (SImode), sel, t_1));
11972 }
11973
11974 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
11975
11976 static bool
sparc_frame_pointer_required(void)11977 sparc_frame_pointer_required (void)
11978 {
11979 /* If the stack pointer is dynamically modified in the function, it cannot
11980 serve as the frame pointer. */
11981 if (cfun->calls_alloca)
11982 return true;
11983
11984 /* If the function receives nonlocal gotos, it needs to save the frame
11985 pointer in the nonlocal_goto_save_area object. */
11986 if (cfun->has_nonlocal_label)
11987 return true;
11988
11989 /* In flat mode, that's it. */
11990 if (TARGET_FLAT)
11991 return false;
11992
11993 /* Otherwise, the frame pointer is required if the function isn't leaf, but
11994 we cannot use sparc_leaf_function_p since it hasn't been computed yet. */
11995 return !(optimize > 0 && crtl->is_leaf && only_leaf_regs_used ());
11996 }
11997
11998 /* The way this is structured, we can't eliminate SFP in favor of SP
11999 if the frame pointer is required: we want to use the SFP->HFP elimination
12000 in that case. But the test in update_eliminables doesn't know we are
12001 assuming below that we only do the former elimination. */
12002
12003 static bool
sparc_can_eliminate(const int from ATTRIBUTE_UNUSED,const int to)12004 sparc_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
12005 {
12006 return to == HARD_FRAME_POINTER_REGNUM || !sparc_frame_pointer_required ();
12007 }
12008
12009 /* Return the hard frame pointer directly to bypass the stack bias. */
12010
12011 static rtx
sparc_builtin_setjmp_frame_value(void)12012 sparc_builtin_setjmp_frame_value (void)
12013 {
12014 return hard_frame_pointer_rtx;
12015 }
12016
12017 /* If !TARGET_FPU, then make the fp registers and fp cc regs fixed so that
12018 they won't be allocated. */
12019
12020 static void
sparc_conditional_register_usage(void)12021 sparc_conditional_register_usage (void)
12022 {
12023 if (PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
12024 {
12025 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
12026 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
12027 }
12028 /* If the user has passed -f{fixed,call-{used,saved}}-g5 */
12029 /* then honor it. */
12030 if (TARGET_ARCH32 && fixed_regs[5])
12031 fixed_regs[5] = 1;
12032 else if (TARGET_ARCH64 && fixed_regs[5] == 2)
12033 fixed_regs[5] = 0;
12034 if (! TARGET_V9)
12035 {
12036 int regno;
12037 for (regno = SPARC_FIRST_V9_FP_REG;
12038 regno <= SPARC_LAST_V9_FP_REG;
12039 regno++)
12040 fixed_regs[regno] = 1;
12041 /* %fcc0 is used by v8 and v9. */
12042 for (regno = SPARC_FIRST_V9_FCC_REG + 1;
12043 regno <= SPARC_LAST_V9_FCC_REG;
12044 regno++)
12045 fixed_regs[regno] = 1;
12046 }
12047 if (! TARGET_FPU)
12048 {
12049 int regno;
12050 for (regno = 32; regno < SPARC_LAST_V9_FCC_REG; regno++)
12051 fixed_regs[regno] = 1;
12052 }
12053 /* If the user has passed -f{fixed,call-{used,saved}}-g2 */
12054 /* then honor it. Likewise with g3 and g4. */
12055 if (fixed_regs[2] == 2)
12056 fixed_regs[2] = ! TARGET_APP_REGS;
12057 if (fixed_regs[3] == 2)
12058 fixed_regs[3] = ! TARGET_APP_REGS;
12059 if (TARGET_ARCH32 && fixed_regs[4] == 2)
12060 fixed_regs[4] = ! TARGET_APP_REGS;
12061 else if (TARGET_CM_EMBMEDANY)
12062 fixed_regs[4] = 1;
12063 else if (fixed_regs[4] == 2)
12064 fixed_regs[4] = 0;
12065 if (TARGET_FLAT)
12066 {
12067 int regno;
12068 /* Disable leaf functions. */
12069 memset (sparc_leaf_regs, 0, FIRST_PSEUDO_REGISTER);
12070 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
12071 leaf_reg_remap [regno] = regno;
12072 }
12073 if (TARGET_VIS)
12074 global_regs[SPARC_GSR_REG] = 1;
12075 }
12076
12077 /* Implement TARGET_PREFERRED_RELOAD_CLASS:
12078
12079 - We can't load constants into FP registers.
12080 - We can't load FP constants into integer registers when soft-float,
12081 because there is no soft-float pattern with a r/F constraint.
12082 - We can't load FP constants into integer registers for TFmode unless
12083 it is 0.0L, because there is no movtf pattern with a r/F constraint.
12084 - Try and reload integer constants (symbolic or otherwise) back into
12085 registers directly, rather than having them dumped to memory. */
12086
12087 static reg_class_t
sparc_preferred_reload_class(rtx x,reg_class_t rclass)12088 sparc_preferred_reload_class (rtx x, reg_class_t rclass)
12089 {
12090 machine_mode mode = GET_MODE (x);
12091 if (CONSTANT_P (x))
12092 {
12093 if (FP_REG_CLASS_P (rclass)
12094 || rclass == GENERAL_OR_FP_REGS
12095 || rclass == GENERAL_OR_EXTRA_FP_REGS
12096 || (GET_MODE_CLASS (mode) == MODE_FLOAT && ! TARGET_FPU)
12097 || (mode == TFmode && ! const_zero_operand (x, mode)))
12098 return NO_REGS;
12099
12100 if (GET_MODE_CLASS (mode) == MODE_INT)
12101 return GENERAL_REGS;
12102
12103 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
12104 {
12105 if (! FP_REG_CLASS_P (rclass)
12106 || !(const_zero_operand (x, mode)
12107 || const_all_ones_operand (x, mode)))
12108 return NO_REGS;
12109 }
12110 }
12111
12112 if (TARGET_VIS3
12113 && ! TARGET_ARCH64
12114 && (rclass == EXTRA_FP_REGS
12115 || rclass == GENERAL_OR_EXTRA_FP_REGS))
12116 {
12117 int regno = true_regnum (x);
12118
12119 if (SPARC_INT_REG_P (regno))
12120 return (rclass == EXTRA_FP_REGS
12121 ? FP_REGS : GENERAL_OR_FP_REGS);
12122 }
12123
12124 return rclass;
12125 }
12126
12127 /* Output a wide multiply instruction in V8+ mode. INSN is the instruction,
12128 OPERANDS are its operands and OPCODE is the mnemonic to be used. */
12129
12130 const char *
output_v8plus_mult(rtx_insn * insn,rtx * operands,const char * opcode)12131 output_v8plus_mult (rtx_insn *insn, rtx *operands, const char *opcode)
12132 {
12133 char mulstr[32];
12134
12135 gcc_assert (! TARGET_ARCH64);
12136
12137 if (sparc_check_64 (operands[1], insn) <= 0)
12138 output_asm_insn ("srl\t%L1, 0, %L1", operands);
12139 if (which_alternative == 1)
12140 output_asm_insn ("sllx\t%H1, 32, %H1", operands);
12141 if (GET_CODE (operands[2]) == CONST_INT)
12142 {
12143 if (which_alternative == 1)
12144 {
12145 output_asm_insn ("or\t%L1, %H1, %H1", operands);
12146 sprintf (mulstr, "%s\t%%H1, %%2, %%L0", opcode);
12147 output_asm_insn (mulstr, operands);
12148 return "srlx\t%L0, 32, %H0";
12149 }
12150 else
12151 {
12152 output_asm_insn ("sllx\t%H1, 32, %3", operands);
12153 output_asm_insn ("or\t%L1, %3, %3", operands);
12154 sprintf (mulstr, "%s\t%%3, %%2, %%3", opcode);
12155 output_asm_insn (mulstr, operands);
12156 output_asm_insn ("srlx\t%3, 32, %H0", operands);
12157 return "mov\t%3, %L0";
12158 }
12159 }
12160 else if (rtx_equal_p (operands[1], operands[2]))
12161 {
12162 if (which_alternative == 1)
12163 {
12164 output_asm_insn ("or\t%L1, %H1, %H1", operands);
12165 sprintf (mulstr, "%s\t%%H1, %%H1, %%L0", opcode);
12166 output_asm_insn (mulstr, operands);
12167 return "srlx\t%L0, 32, %H0";
12168 }
12169 else
12170 {
12171 output_asm_insn ("sllx\t%H1, 32, %3", operands);
12172 output_asm_insn ("or\t%L1, %3, %3", operands);
12173 sprintf (mulstr, "%s\t%%3, %%3, %%3", opcode);
12174 output_asm_insn (mulstr, operands);
12175 output_asm_insn ("srlx\t%3, 32, %H0", operands);
12176 return "mov\t%3, %L0";
12177 }
12178 }
12179 if (sparc_check_64 (operands[2], insn) <= 0)
12180 output_asm_insn ("srl\t%L2, 0, %L2", operands);
12181 if (which_alternative == 1)
12182 {
12183 output_asm_insn ("or\t%L1, %H1, %H1", operands);
12184 output_asm_insn ("sllx\t%H2, 32, %L1", operands);
12185 output_asm_insn ("or\t%L2, %L1, %L1", operands);
12186 sprintf (mulstr, "%s\t%%H1, %%L1, %%L0", opcode);
12187 output_asm_insn (mulstr, operands);
12188 return "srlx\t%L0, 32, %H0";
12189 }
12190 else
12191 {
12192 output_asm_insn ("sllx\t%H1, 32, %3", operands);
12193 output_asm_insn ("sllx\t%H2, 32, %4", operands);
12194 output_asm_insn ("or\t%L1, %3, %3", operands);
12195 output_asm_insn ("or\t%L2, %4, %4", operands);
12196 sprintf (mulstr, "%s\t%%3, %%4, %%3", opcode);
12197 output_asm_insn (mulstr, operands);
12198 output_asm_insn ("srlx\t%3, 32, %H0", operands);
12199 return "mov\t%3, %L0";
12200 }
12201 }
12202
12203 /* Subroutine of sparc_expand_vector_init. Emit code to initialize
12204 all fields of TARGET to ELT by means of VIS2 BSHUFFLE insn. MODE
12205 and INNER_MODE are the modes describing TARGET. */
12206
12207 static void
vector_init_bshuffle(rtx target,rtx elt,machine_mode mode,machine_mode inner_mode)12208 vector_init_bshuffle (rtx target, rtx elt, machine_mode mode,
12209 machine_mode inner_mode)
12210 {
12211 rtx t1, final_insn, sel;
12212 int bmask;
12213
12214 t1 = gen_reg_rtx (mode);
12215
12216 elt = convert_modes (SImode, inner_mode, elt, true);
12217 emit_move_insn (gen_lowpart(SImode, t1), elt);
12218
12219 switch (mode)
12220 {
12221 case V2SImode:
12222 final_insn = gen_bshufflev2si_vis (target, t1, t1);
12223 bmask = 0x45674567;
12224 break;
12225 case V4HImode:
12226 final_insn = gen_bshufflev4hi_vis (target, t1, t1);
12227 bmask = 0x67676767;
12228 break;
12229 case V8QImode:
12230 final_insn = gen_bshufflev8qi_vis (target, t1, t1);
12231 bmask = 0x77777777;
12232 break;
12233 default:
12234 gcc_unreachable ();
12235 }
12236
12237 sel = force_reg (SImode, GEN_INT (bmask));
12238 emit_insn (gen_bmasksi_vis (gen_reg_rtx (SImode), sel, const0_rtx));
12239 emit_insn (final_insn);
12240 }
12241
12242 /* Subroutine of sparc_expand_vector_init. Emit code to initialize
12243 all fields of TARGET to ELT in V8QI by means of VIS FPMERGE insn. */
12244
12245 static void
vector_init_fpmerge(rtx target,rtx elt)12246 vector_init_fpmerge (rtx target, rtx elt)
12247 {
12248 rtx t1, t2, t2_low, t3, t3_low;
12249
12250 t1 = gen_reg_rtx (V4QImode);
12251 elt = convert_modes (SImode, QImode, elt, true);
12252 emit_move_insn (gen_lowpart (SImode, t1), elt);
12253
12254 t2 = gen_reg_rtx (V8QImode);
12255 t2_low = gen_lowpart (V4QImode, t2);
12256 emit_insn (gen_fpmerge_vis (t2, t1, t1));
12257
12258 t3 = gen_reg_rtx (V8QImode);
12259 t3_low = gen_lowpart (V4QImode, t3);
12260 emit_insn (gen_fpmerge_vis (t3, t2_low, t2_low));
12261
12262 emit_insn (gen_fpmerge_vis (target, t3_low, t3_low));
12263 }
12264
12265 /* Subroutine of sparc_expand_vector_init. Emit code to initialize
12266 all fields of TARGET to ELT in V4HI by means of VIS FALIGNDATA insn. */
12267
12268 static void
vector_init_faligndata(rtx target,rtx elt)12269 vector_init_faligndata (rtx target, rtx elt)
12270 {
12271 rtx t1 = gen_reg_rtx (V4HImode);
12272 int i;
12273
12274 elt = convert_modes (SImode, HImode, elt, true);
12275 emit_move_insn (gen_lowpart (SImode, t1), elt);
12276
12277 emit_insn (gen_alignaddrsi_vis (gen_reg_rtx (SImode),
12278 force_reg (SImode, GEN_INT (6)),
12279 const0_rtx));
12280
12281 for (i = 0; i < 4; i++)
12282 emit_insn (gen_faligndatav4hi_vis (target, t1, target));
12283 }
12284
12285 /* Emit code to initialize TARGET to values for individual fields VALS. */
12286
12287 void
sparc_expand_vector_init(rtx target,rtx vals)12288 sparc_expand_vector_init (rtx target, rtx vals)
12289 {
12290 const machine_mode mode = GET_MODE (target);
12291 const machine_mode inner_mode = GET_MODE_INNER (mode);
12292 const int n_elts = GET_MODE_NUNITS (mode);
12293 int i, n_var = 0;
12294 bool all_same = true;
12295 rtx mem;
12296
12297 for (i = 0; i < n_elts; i++)
12298 {
12299 rtx x = XVECEXP (vals, 0, i);
12300 if (!(CONST_SCALAR_INT_P (x) || CONST_DOUBLE_P (x) || CONST_FIXED_P (x)))
12301 n_var++;
12302
12303 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12304 all_same = false;
12305 }
12306
12307 if (n_var == 0)
12308 {
12309 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
12310 return;
12311 }
12312
12313 if (GET_MODE_SIZE (inner_mode) == GET_MODE_SIZE (mode))
12314 {
12315 if (GET_MODE_SIZE (inner_mode) == 4)
12316 {
12317 emit_move_insn (gen_lowpart (SImode, target),
12318 gen_lowpart (SImode, XVECEXP (vals, 0, 0)));
12319 return;
12320 }
12321 else if (GET_MODE_SIZE (inner_mode) == 8)
12322 {
12323 emit_move_insn (gen_lowpart (DImode, target),
12324 gen_lowpart (DImode, XVECEXP (vals, 0, 0)));
12325 return;
12326 }
12327 }
12328 else if (GET_MODE_SIZE (inner_mode) == GET_MODE_SIZE (word_mode)
12329 && GET_MODE_SIZE (mode) == 2 * GET_MODE_SIZE (word_mode))
12330 {
12331 emit_move_insn (gen_highpart (word_mode, target),
12332 gen_lowpart (word_mode, XVECEXP (vals, 0, 0)));
12333 emit_move_insn (gen_lowpart (word_mode, target),
12334 gen_lowpart (word_mode, XVECEXP (vals, 0, 1)));
12335 return;
12336 }
12337
12338 if (all_same && GET_MODE_SIZE (mode) == 8)
12339 {
12340 if (TARGET_VIS2)
12341 {
12342 vector_init_bshuffle (target, XVECEXP (vals, 0, 0), mode, inner_mode);
12343 return;
12344 }
12345 if (mode == V8QImode)
12346 {
12347 vector_init_fpmerge (target, XVECEXP (vals, 0, 0));
12348 return;
12349 }
12350 if (mode == V4HImode)
12351 {
12352 vector_init_faligndata (target, XVECEXP (vals, 0, 0));
12353 return;
12354 }
12355 }
12356
12357 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
12358 for (i = 0; i < n_elts; i++)
12359 emit_move_insn (adjust_address_nv (mem, inner_mode,
12360 i * GET_MODE_SIZE (inner_mode)),
12361 XVECEXP (vals, 0, i));
12362 emit_move_insn (target, mem);
12363 }
12364
12365 /* Implement TARGET_SECONDARY_RELOAD. */
12366
12367 static reg_class_t
sparc_secondary_reload(bool in_p,rtx x,reg_class_t rclass_i,machine_mode mode,secondary_reload_info * sri)12368 sparc_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
12369 machine_mode mode, secondary_reload_info *sri)
12370 {
12371 enum reg_class rclass = (enum reg_class) rclass_i;
12372
12373 sri->icode = CODE_FOR_nothing;
12374 sri->extra_cost = 0;
12375
12376 /* We need a temporary when loading/storing a HImode/QImode value
12377 between memory and the FPU registers. This can happen when combine puts
12378 a paradoxical subreg in a float/fix conversion insn. */
12379 if (FP_REG_CLASS_P (rclass)
12380 && (mode == HImode || mode == QImode)
12381 && (GET_CODE (x) == MEM
12382 || ((GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
12383 && true_regnum (x) == -1)))
12384 return GENERAL_REGS;
12385
12386 /* On 32-bit we need a temporary when loading/storing a DFmode value
12387 between unaligned memory and the upper FPU registers. */
12388 if (TARGET_ARCH32
12389 && rclass == EXTRA_FP_REGS
12390 && mode == DFmode
12391 && GET_CODE (x) == MEM
12392 && ! mem_min_alignment (x, 8))
12393 return FP_REGS;
12394
12395 if (((TARGET_CM_MEDANY
12396 && symbolic_operand (x, mode))
12397 || (TARGET_CM_EMBMEDANY
12398 && text_segment_operand (x, mode)))
12399 && ! flag_pic)
12400 {
12401 if (in_p)
12402 sri->icode = direct_optab_handler (reload_in_optab, mode);
12403 else
12404 sri->icode = direct_optab_handler (reload_out_optab, mode);
12405 return NO_REGS;
12406 }
12407
12408 if (TARGET_VIS3 && TARGET_ARCH32)
12409 {
12410 int regno = true_regnum (x);
12411
12412 /* When using VIS3 fp<-->int register moves, on 32-bit we have
12413 to move 8-byte values in 4-byte pieces. This only works via
12414 FP_REGS, and not via EXTRA_FP_REGS. Therefore if we try to
12415 move between EXTRA_FP_REGS and GENERAL_REGS, we will need
12416 an FP_REGS intermediate move. */
12417 if ((rclass == EXTRA_FP_REGS && SPARC_INT_REG_P (regno))
12418 || ((general_or_i64_p (rclass)
12419 || rclass == GENERAL_OR_FP_REGS)
12420 && SPARC_FP_REG_P (regno)))
12421 {
12422 sri->extra_cost = 2;
12423 return FP_REGS;
12424 }
12425 }
12426
12427 return NO_REGS;
12428 }
12429
12430 /* Emit code to conditionally move either OPERANDS[2] or OPERANDS[3] into
12431 OPERANDS[0] in MODE. OPERANDS[1] is the operator of the condition. */
12432
12433 bool
sparc_expand_conditional_move(machine_mode mode,rtx * operands)12434 sparc_expand_conditional_move (machine_mode mode, rtx *operands)
12435 {
12436 enum rtx_code rc = GET_CODE (operands[1]);
12437 machine_mode cmp_mode;
12438 rtx cc_reg, dst, cmp;
12439
12440 cmp = operands[1];
12441 if (GET_MODE (XEXP (cmp, 0)) == DImode && !TARGET_ARCH64)
12442 return false;
12443
12444 if (GET_MODE (XEXP (cmp, 0)) == TFmode && !TARGET_HARD_QUAD)
12445 cmp = sparc_emit_float_lib_cmp (XEXP (cmp, 0), XEXP (cmp, 1), rc);
12446
12447 cmp_mode = GET_MODE (XEXP (cmp, 0));
12448 rc = GET_CODE (cmp);
12449
12450 dst = operands[0];
12451 if (! rtx_equal_p (operands[2], dst)
12452 && ! rtx_equal_p (operands[3], dst))
12453 {
12454 if (reg_overlap_mentioned_p (dst, cmp))
12455 dst = gen_reg_rtx (mode);
12456
12457 emit_move_insn (dst, operands[3]);
12458 }
12459 else if (operands[2] == dst)
12460 {
12461 operands[2] = operands[3];
12462
12463 if (GET_MODE_CLASS (cmp_mode) == MODE_FLOAT)
12464 rc = reverse_condition_maybe_unordered (rc);
12465 else
12466 rc = reverse_condition (rc);
12467 }
12468
12469 if (XEXP (cmp, 1) == const0_rtx
12470 && GET_CODE (XEXP (cmp, 0)) == REG
12471 && cmp_mode == DImode
12472 && v9_regcmp_p (rc))
12473 cc_reg = XEXP (cmp, 0);
12474 else
12475 cc_reg = gen_compare_reg_1 (rc, XEXP (cmp, 0), XEXP (cmp, 1));
12476
12477 cmp = gen_rtx_fmt_ee (rc, GET_MODE (cc_reg), cc_reg, const0_rtx);
12478
12479 emit_insn (gen_rtx_SET (dst,
12480 gen_rtx_IF_THEN_ELSE (mode, cmp, operands[2], dst)));
12481
12482 if (dst != operands[0])
12483 emit_move_insn (operands[0], dst);
12484
12485 return true;
12486 }
12487
12488 /* Emit code to conditionally move a combination of OPERANDS[1] and OPERANDS[2]
12489 into OPERANDS[0] in MODE, depending on the outcome of the comparison of
12490 OPERANDS[4] and OPERANDS[5]. OPERANDS[3] is the operator of the condition.
12491 FCODE is the machine code to be used for OPERANDS[3] and CCODE the machine
12492 code to be used for the condition mask. */
12493
12494 void
sparc_expand_vcond(machine_mode mode,rtx * operands,int ccode,int fcode)12495 sparc_expand_vcond (machine_mode mode, rtx *operands, int ccode, int fcode)
12496 {
12497 rtx mask, cop0, cop1, fcmp, cmask, bshuf, gsr;
12498 enum rtx_code code = GET_CODE (operands[3]);
12499
12500 mask = gen_reg_rtx (Pmode);
12501 cop0 = operands[4];
12502 cop1 = operands[5];
12503 if (code == LT || code == GE)
12504 {
12505 rtx t;
12506
12507 code = swap_condition (code);
12508 t = cop0; cop0 = cop1; cop1 = t;
12509 }
12510
12511 gsr = gen_rtx_REG (DImode, SPARC_GSR_REG);
12512
12513 fcmp = gen_rtx_UNSPEC (Pmode,
12514 gen_rtvec (1, gen_rtx_fmt_ee (code, mode, cop0, cop1)),
12515 fcode);
12516
12517 cmask = gen_rtx_UNSPEC (DImode,
12518 gen_rtvec (2, mask, gsr),
12519 ccode);
12520
12521 bshuf = gen_rtx_UNSPEC (mode,
12522 gen_rtvec (3, operands[1], operands[2], gsr),
12523 UNSPEC_BSHUFFLE);
12524
12525 emit_insn (gen_rtx_SET (mask, fcmp));
12526 emit_insn (gen_rtx_SET (gsr, cmask));
12527
12528 emit_insn (gen_rtx_SET (operands[0], bshuf));
12529 }
12530
12531 /* On sparc, any mode which naturally allocates into the float
12532 registers should return 4 here. */
12533
12534 unsigned int
sparc_regmode_natural_size(machine_mode mode)12535 sparc_regmode_natural_size (machine_mode mode)
12536 {
12537 int size = UNITS_PER_WORD;
12538
12539 if (TARGET_ARCH64)
12540 {
12541 enum mode_class mclass = GET_MODE_CLASS (mode);
12542
12543 if (mclass == MODE_FLOAT || mclass == MODE_VECTOR_INT)
12544 size = 4;
12545 }
12546
12547 return size;
12548 }
12549
12550 /* Return TRUE if it is a good idea to tie two pseudo registers
12551 when one has mode MODE1 and one has mode MODE2.
12552 If HARD_REGNO_MODE_OK could produce different values for MODE1 and MODE2,
12553 for any hard reg, then this must be FALSE for correct output.
12554
12555 For V9 we have to deal with the fact that only the lower 32 floating
12556 point registers are 32-bit addressable. */
12557
12558 bool
sparc_modes_tieable_p(machine_mode mode1,machine_mode mode2)12559 sparc_modes_tieable_p (machine_mode mode1, machine_mode mode2)
12560 {
12561 enum mode_class mclass1, mclass2;
12562 unsigned short size1, size2;
12563
12564 if (mode1 == mode2)
12565 return true;
12566
12567 mclass1 = GET_MODE_CLASS (mode1);
12568 mclass2 = GET_MODE_CLASS (mode2);
12569 if (mclass1 != mclass2)
12570 return false;
12571
12572 if (! TARGET_V9)
12573 return true;
12574
12575 /* Classes are the same and we are V9 so we have to deal with upper
12576 vs. lower floating point registers. If one of the modes is a
12577 4-byte mode, and the other is not, we have to mark them as not
12578 tieable because only the lower 32 floating point register are
12579 addressable 32-bits at a time.
12580
12581 We can't just test explicitly for SFmode, otherwise we won't
12582 cover the vector mode cases properly. */
12583
12584 if (mclass1 != MODE_FLOAT && mclass1 != MODE_VECTOR_INT)
12585 return true;
12586
12587 size1 = GET_MODE_SIZE (mode1);
12588 size2 = GET_MODE_SIZE (mode2);
12589 if ((size1 > 4 && size2 == 4)
12590 || (size2 > 4 && size1 == 4))
12591 return false;
12592
12593 return true;
12594 }
12595
12596 /* Implement TARGET_CSTORE_MODE. */
12597
12598 static machine_mode
sparc_cstore_mode(enum insn_code icode ATTRIBUTE_UNUSED)12599 sparc_cstore_mode (enum insn_code icode ATTRIBUTE_UNUSED)
12600 {
12601 return (TARGET_ARCH64 ? DImode : SImode);
12602 }
12603
12604 /* Return the compound expression made of T1 and T2. */
12605
12606 static inline tree
compound_expr(tree t1,tree t2)12607 compound_expr (tree t1, tree t2)
12608 {
12609 return build2 (COMPOUND_EXPR, void_type_node, t1, t2);
12610 }
12611
12612 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */
12613
12614 static void
sparc_atomic_assign_expand_fenv(tree * hold,tree * clear,tree * update)12615 sparc_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
12616 {
12617 if (!TARGET_FPU)
12618 return;
12619
12620 const unsigned HOST_WIDE_INT accrued_exception_mask = 0x1f << 5;
12621 const unsigned HOST_WIDE_INT trap_enable_mask = 0x1f << 23;
12622
12623 /* We generate the equivalent of feholdexcept (&fenv_var):
12624
12625 unsigned int fenv_var;
12626 __builtin_store_fsr (&fenv_var);
12627
12628 unsigned int tmp1_var;
12629 tmp1_var = fenv_var & ~(accrued_exception_mask | trap_enable_mask);
12630
12631 __builtin_load_fsr (&tmp1_var); */
12632
12633 tree fenv_var = create_tmp_var_raw (unsigned_type_node);
12634 TREE_ADDRESSABLE (fenv_var) = 1;
12635 tree fenv_addr = build_fold_addr_expr (fenv_var);
12636 tree stfsr = sparc_builtins[SPARC_BUILTIN_STFSR];
12637 tree hold_stfsr
12638 = build4 (TARGET_EXPR, unsigned_type_node, fenv_var,
12639 build_call_expr (stfsr, 1, fenv_addr), NULL_TREE, NULL_TREE);
12640
12641 tree tmp1_var = create_tmp_var_raw (unsigned_type_node);
12642 TREE_ADDRESSABLE (tmp1_var) = 1;
12643 tree masked_fenv_var
12644 = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_var,
12645 build_int_cst (unsigned_type_node,
12646 ~(accrued_exception_mask | trap_enable_mask)));
12647 tree hold_mask
12648 = build4 (TARGET_EXPR, unsigned_type_node, tmp1_var, masked_fenv_var,
12649 NULL_TREE, NULL_TREE);
12650
12651 tree tmp1_addr = build_fold_addr_expr (tmp1_var);
12652 tree ldfsr = sparc_builtins[SPARC_BUILTIN_LDFSR];
12653 tree hold_ldfsr = build_call_expr (ldfsr, 1, tmp1_addr);
12654
12655 *hold = compound_expr (compound_expr (hold_stfsr, hold_mask), hold_ldfsr);
12656
12657 /* We reload the value of tmp1_var to clear the exceptions:
12658
12659 __builtin_load_fsr (&tmp1_var); */
12660
12661 *clear = build_call_expr (ldfsr, 1, tmp1_addr);
12662
12663 /* We generate the equivalent of feupdateenv (&fenv_var):
12664
12665 unsigned int tmp2_var;
12666 __builtin_store_fsr (&tmp2_var);
12667
12668 __builtin_load_fsr (&fenv_var);
12669
12670 if (SPARC_LOW_FE_EXCEPT_VALUES)
12671 tmp2_var >>= 5;
12672 __atomic_feraiseexcept ((int) tmp2_var); */
12673
12674 tree tmp2_var = create_tmp_var_raw (unsigned_type_node);
12675 TREE_ADDRESSABLE (tmp2_var) = 1;
12676 tree tmp2_addr = build_fold_addr_expr (tmp2_var);
12677 tree update_stfsr
12678 = build4 (TARGET_EXPR, unsigned_type_node, tmp2_var,
12679 build_call_expr (stfsr, 1, tmp2_addr), NULL_TREE, NULL_TREE);
12680
12681 tree update_ldfsr = build_call_expr (ldfsr, 1, fenv_addr);
12682
12683 tree atomic_feraiseexcept
12684 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
12685 tree update_call
12686 = build_call_expr (atomic_feraiseexcept, 1,
12687 fold_convert (integer_type_node, tmp2_var));
12688
12689 if (SPARC_LOW_FE_EXCEPT_VALUES)
12690 {
12691 tree shifted_tmp2_var
12692 = build2 (RSHIFT_EXPR, unsigned_type_node, tmp2_var,
12693 build_int_cst (unsigned_type_node, 5));
12694 tree update_shift
12695 = build2 (MODIFY_EXPR, void_type_node, tmp2_var, shifted_tmp2_var);
12696 update_call = compound_expr (update_shift, update_call);
12697 }
12698
12699 *update
12700 = compound_expr (compound_expr (update_stfsr, update_ldfsr), update_call);
12701 }
12702
12703 #include "gt-sparc.h"
12704