1 /* Subroutines for insn-output.c for SPARC.
2 Copyright (C) 1987-2018 Free Software Foundation, Inc.
3 Contributed by Michael Tiemann (tiemann@cygnus.com)
4 64-bit SPARC-V9 support by Michael Tiemann, Jim Wilson, and Doug Evans,
5 at Cygnus Support.
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3, or (at your option)
12 any later version.
13
14 GCC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
22
23 #define IN_TARGET_CODE 1
24
25 #include "config.h"
26 #include "system.h"
27 #include "coretypes.h"
28 #include "backend.h"
29 #include "target.h"
30 #include "rtl.h"
31 #include "tree.h"
32 #include "memmodel.h"
33 #include "gimple.h"
34 #include "df.h"
35 #include "tm_p.h"
36 #include "stringpool.h"
37 #include "attribs.h"
38 #include "expmed.h"
39 #include "optabs.h"
40 #include "regs.h"
41 #include "emit-rtl.h"
42 #include "recog.h"
43 #include "diagnostic-core.h"
44 #include "alias.h"
45 #include "fold-const.h"
46 #include "stor-layout.h"
47 #include "calls.h"
48 #include "varasm.h"
49 #include "output.h"
50 #include "insn-attr.h"
51 #include "explow.h"
52 #include "expr.h"
53 #include "debug.h"
54 #include "cfgrtl.h"
55 #include "common/common-target.h"
56 #include "gimplify.h"
57 #include "langhooks.h"
58 #include "reload.h"
59 #include "params.h"
60 #include "tree-pass.h"
61 #include "context.h"
62 #include "builtins.h"
63 #include "tree-vector-builder.h"
64
65 /* This file should be included last. */
66 #include "target-def.h"
67
68 /* Processor costs */
69
70 struct processor_costs {
71 /* Integer load */
72 const int int_load;
73
74 /* Integer signed load */
75 const int int_sload;
76
77 /* Integer zeroed load */
78 const int int_zload;
79
80 /* Float load */
81 const int float_load;
82
83 /* fmov, fneg, fabs */
84 const int float_move;
85
86 /* fadd, fsub */
87 const int float_plusminus;
88
89 /* fcmp */
90 const int float_cmp;
91
92 /* fmov, fmovr */
93 const int float_cmove;
94
95 /* fmul */
96 const int float_mul;
97
98 /* fdivs */
99 const int float_div_sf;
100
101 /* fdivd */
102 const int float_div_df;
103
104 /* fsqrts */
105 const int float_sqrt_sf;
106
107 /* fsqrtd */
108 const int float_sqrt_df;
109
110 /* umul/smul */
111 const int int_mul;
112
113 /* mulX */
114 const int int_mulX;
115
116 /* integer multiply cost for each bit set past the most
117 significant 3, so the formula for multiply cost becomes:
118
119 if (rs1 < 0)
120 highest_bit = highest_clear_bit(rs1);
121 else
122 highest_bit = highest_set_bit(rs1);
123 if (highest_bit < 3)
124 highest_bit = 3;
125 cost = int_mul{,X} + ((highest_bit - 3) / int_mul_bit_factor);
126
127 A value of zero indicates that the multiply costs is fixed,
128 and not variable. */
129 const int int_mul_bit_factor;
130
131 /* udiv/sdiv */
132 const int int_div;
133
134 /* divX */
135 const int int_divX;
136
137 /* movcc, movr */
138 const int int_cmove;
139
140 /* penalty for shifts, due to scheduling rules etc. */
141 const int shift_penalty;
142 };
143
144 static const
145 struct processor_costs cypress_costs = {
146 COSTS_N_INSNS (2), /* int load */
147 COSTS_N_INSNS (2), /* int signed load */
148 COSTS_N_INSNS (2), /* int zeroed load */
149 COSTS_N_INSNS (2), /* float load */
150 COSTS_N_INSNS (5), /* fmov, fneg, fabs */
151 COSTS_N_INSNS (5), /* fadd, fsub */
152 COSTS_N_INSNS (1), /* fcmp */
153 COSTS_N_INSNS (1), /* fmov, fmovr */
154 COSTS_N_INSNS (7), /* fmul */
155 COSTS_N_INSNS (37), /* fdivs */
156 COSTS_N_INSNS (37), /* fdivd */
157 COSTS_N_INSNS (63), /* fsqrts */
158 COSTS_N_INSNS (63), /* fsqrtd */
159 COSTS_N_INSNS (1), /* imul */
160 COSTS_N_INSNS (1), /* imulX */
161 0, /* imul bit factor */
162 COSTS_N_INSNS (1), /* idiv */
163 COSTS_N_INSNS (1), /* idivX */
164 COSTS_N_INSNS (1), /* movcc/movr */
165 0, /* shift penalty */
166 };
167
168 static const
169 struct processor_costs supersparc_costs = {
170 COSTS_N_INSNS (1), /* int load */
171 COSTS_N_INSNS (1), /* int signed load */
172 COSTS_N_INSNS (1), /* int zeroed load */
173 COSTS_N_INSNS (0), /* float load */
174 COSTS_N_INSNS (3), /* fmov, fneg, fabs */
175 COSTS_N_INSNS (3), /* fadd, fsub */
176 COSTS_N_INSNS (3), /* fcmp */
177 COSTS_N_INSNS (1), /* fmov, fmovr */
178 COSTS_N_INSNS (3), /* fmul */
179 COSTS_N_INSNS (6), /* fdivs */
180 COSTS_N_INSNS (9), /* fdivd */
181 COSTS_N_INSNS (12), /* fsqrts */
182 COSTS_N_INSNS (12), /* fsqrtd */
183 COSTS_N_INSNS (4), /* imul */
184 COSTS_N_INSNS (4), /* imulX */
185 0, /* imul bit factor */
186 COSTS_N_INSNS (4), /* idiv */
187 COSTS_N_INSNS (4), /* idivX */
188 COSTS_N_INSNS (1), /* movcc/movr */
189 1, /* shift penalty */
190 };
191
192 static const
193 struct processor_costs hypersparc_costs = {
194 COSTS_N_INSNS (1), /* int load */
195 COSTS_N_INSNS (1), /* int signed load */
196 COSTS_N_INSNS (1), /* int zeroed load */
197 COSTS_N_INSNS (1), /* float load */
198 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
199 COSTS_N_INSNS (1), /* fadd, fsub */
200 COSTS_N_INSNS (1), /* fcmp */
201 COSTS_N_INSNS (1), /* fmov, fmovr */
202 COSTS_N_INSNS (1), /* fmul */
203 COSTS_N_INSNS (8), /* fdivs */
204 COSTS_N_INSNS (12), /* fdivd */
205 COSTS_N_INSNS (17), /* fsqrts */
206 COSTS_N_INSNS (17), /* fsqrtd */
207 COSTS_N_INSNS (17), /* imul */
208 COSTS_N_INSNS (17), /* imulX */
209 0, /* imul bit factor */
210 COSTS_N_INSNS (17), /* idiv */
211 COSTS_N_INSNS (17), /* idivX */
212 COSTS_N_INSNS (1), /* movcc/movr */
213 0, /* shift penalty */
214 };
215
216 static const
217 struct processor_costs leon_costs = {
218 COSTS_N_INSNS (1), /* int load */
219 COSTS_N_INSNS (1), /* int signed load */
220 COSTS_N_INSNS (1), /* int zeroed load */
221 COSTS_N_INSNS (1), /* float load */
222 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
223 COSTS_N_INSNS (1), /* fadd, fsub */
224 COSTS_N_INSNS (1), /* fcmp */
225 COSTS_N_INSNS (1), /* fmov, fmovr */
226 COSTS_N_INSNS (1), /* fmul */
227 COSTS_N_INSNS (15), /* fdivs */
228 COSTS_N_INSNS (15), /* fdivd */
229 COSTS_N_INSNS (23), /* fsqrts */
230 COSTS_N_INSNS (23), /* fsqrtd */
231 COSTS_N_INSNS (5), /* imul */
232 COSTS_N_INSNS (5), /* imulX */
233 0, /* imul bit factor */
234 COSTS_N_INSNS (5), /* idiv */
235 COSTS_N_INSNS (5), /* idivX */
236 COSTS_N_INSNS (1), /* movcc/movr */
237 0, /* shift penalty */
238 };
239
240 static const
241 struct processor_costs leon3_costs = {
242 COSTS_N_INSNS (1), /* int load */
243 COSTS_N_INSNS (1), /* int signed load */
244 COSTS_N_INSNS (1), /* int zeroed load */
245 COSTS_N_INSNS (1), /* float load */
246 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
247 COSTS_N_INSNS (1), /* fadd, fsub */
248 COSTS_N_INSNS (1), /* fcmp */
249 COSTS_N_INSNS (1), /* fmov, fmovr */
250 COSTS_N_INSNS (1), /* fmul */
251 COSTS_N_INSNS (14), /* fdivs */
252 COSTS_N_INSNS (15), /* fdivd */
253 COSTS_N_INSNS (22), /* fsqrts */
254 COSTS_N_INSNS (23), /* fsqrtd */
255 COSTS_N_INSNS (5), /* imul */
256 COSTS_N_INSNS (5), /* imulX */
257 0, /* imul bit factor */
258 COSTS_N_INSNS (35), /* idiv */
259 COSTS_N_INSNS (35), /* idivX */
260 COSTS_N_INSNS (1), /* movcc/movr */
261 0, /* shift penalty */
262 };
263
264 static const
265 struct processor_costs sparclet_costs = {
266 COSTS_N_INSNS (3), /* int load */
267 COSTS_N_INSNS (3), /* int signed load */
268 COSTS_N_INSNS (1), /* int zeroed load */
269 COSTS_N_INSNS (1), /* float load */
270 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
271 COSTS_N_INSNS (1), /* fadd, fsub */
272 COSTS_N_INSNS (1), /* fcmp */
273 COSTS_N_INSNS (1), /* fmov, fmovr */
274 COSTS_N_INSNS (1), /* fmul */
275 COSTS_N_INSNS (1), /* fdivs */
276 COSTS_N_INSNS (1), /* fdivd */
277 COSTS_N_INSNS (1), /* fsqrts */
278 COSTS_N_INSNS (1), /* fsqrtd */
279 COSTS_N_INSNS (5), /* imul */
280 COSTS_N_INSNS (5), /* imulX */
281 0, /* imul bit factor */
282 COSTS_N_INSNS (5), /* idiv */
283 COSTS_N_INSNS (5), /* idivX */
284 COSTS_N_INSNS (1), /* movcc/movr */
285 0, /* shift penalty */
286 };
287
288 static const
289 struct processor_costs ultrasparc_costs = {
290 COSTS_N_INSNS (2), /* int load */
291 COSTS_N_INSNS (3), /* int signed load */
292 COSTS_N_INSNS (2), /* int zeroed load */
293 COSTS_N_INSNS (2), /* float load */
294 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
295 COSTS_N_INSNS (4), /* fadd, fsub */
296 COSTS_N_INSNS (1), /* fcmp */
297 COSTS_N_INSNS (2), /* fmov, fmovr */
298 COSTS_N_INSNS (4), /* fmul */
299 COSTS_N_INSNS (13), /* fdivs */
300 COSTS_N_INSNS (23), /* fdivd */
301 COSTS_N_INSNS (13), /* fsqrts */
302 COSTS_N_INSNS (23), /* fsqrtd */
303 COSTS_N_INSNS (4), /* imul */
304 COSTS_N_INSNS (4), /* imulX */
305 2, /* imul bit factor */
306 COSTS_N_INSNS (37), /* idiv */
307 COSTS_N_INSNS (68), /* idivX */
308 COSTS_N_INSNS (2), /* movcc/movr */
309 2, /* shift penalty */
310 };
311
312 static const
313 struct processor_costs ultrasparc3_costs = {
314 COSTS_N_INSNS (2), /* int load */
315 COSTS_N_INSNS (3), /* int signed load */
316 COSTS_N_INSNS (3), /* int zeroed load */
317 COSTS_N_INSNS (2), /* float load */
318 COSTS_N_INSNS (3), /* fmov, fneg, fabs */
319 COSTS_N_INSNS (4), /* fadd, fsub */
320 COSTS_N_INSNS (5), /* fcmp */
321 COSTS_N_INSNS (3), /* fmov, fmovr */
322 COSTS_N_INSNS (4), /* fmul */
323 COSTS_N_INSNS (17), /* fdivs */
324 COSTS_N_INSNS (20), /* fdivd */
325 COSTS_N_INSNS (20), /* fsqrts */
326 COSTS_N_INSNS (29), /* fsqrtd */
327 COSTS_N_INSNS (6), /* imul */
328 COSTS_N_INSNS (6), /* imulX */
329 0, /* imul bit factor */
330 COSTS_N_INSNS (40), /* idiv */
331 COSTS_N_INSNS (71), /* idivX */
332 COSTS_N_INSNS (2), /* movcc/movr */
333 0, /* shift penalty */
334 };
335
336 static const
337 struct processor_costs niagara_costs = {
338 COSTS_N_INSNS (3), /* int load */
339 COSTS_N_INSNS (3), /* int signed load */
340 COSTS_N_INSNS (3), /* int zeroed load */
341 COSTS_N_INSNS (9), /* float load */
342 COSTS_N_INSNS (8), /* fmov, fneg, fabs */
343 COSTS_N_INSNS (8), /* fadd, fsub */
344 COSTS_N_INSNS (26), /* fcmp */
345 COSTS_N_INSNS (8), /* fmov, fmovr */
346 COSTS_N_INSNS (29), /* fmul */
347 COSTS_N_INSNS (54), /* fdivs */
348 COSTS_N_INSNS (83), /* fdivd */
349 COSTS_N_INSNS (100), /* fsqrts - not implemented in hardware */
350 COSTS_N_INSNS (100), /* fsqrtd - not implemented in hardware */
351 COSTS_N_INSNS (11), /* imul */
352 COSTS_N_INSNS (11), /* imulX */
353 0, /* imul bit factor */
354 COSTS_N_INSNS (72), /* idiv */
355 COSTS_N_INSNS (72), /* idivX */
356 COSTS_N_INSNS (1), /* movcc/movr */
357 0, /* shift penalty */
358 };
359
360 static const
361 struct processor_costs niagara2_costs = {
362 COSTS_N_INSNS (3), /* int load */
363 COSTS_N_INSNS (3), /* int signed load */
364 COSTS_N_INSNS (3), /* int zeroed load */
365 COSTS_N_INSNS (3), /* float load */
366 COSTS_N_INSNS (6), /* fmov, fneg, fabs */
367 COSTS_N_INSNS (6), /* fadd, fsub */
368 COSTS_N_INSNS (6), /* fcmp */
369 COSTS_N_INSNS (6), /* fmov, fmovr */
370 COSTS_N_INSNS (6), /* fmul */
371 COSTS_N_INSNS (19), /* fdivs */
372 COSTS_N_INSNS (33), /* fdivd */
373 COSTS_N_INSNS (19), /* fsqrts */
374 COSTS_N_INSNS (33), /* fsqrtd */
375 COSTS_N_INSNS (5), /* imul */
376 COSTS_N_INSNS (5), /* imulX */
377 0, /* imul bit factor */
378 COSTS_N_INSNS (26), /* idiv, average of 12 - 41 cycle range */
379 COSTS_N_INSNS (26), /* idivX, average of 12 - 41 cycle range */
380 COSTS_N_INSNS (1), /* movcc/movr */
381 0, /* shift penalty */
382 };
383
384 static const
385 struct processor_costs niagara3_costs = {
386 COSTS_N_INSNS (3), /* int load */
387 COSTS_N_INSNS (3), /* int signed load */
388 COSTS_N_INSNS (3), /* int zeroed load */
389 COSTS_N_INSNS (3), /* float load */
390 COSTS_N_INSNS (9), /* fmov, fneg, fabs */
391 COSTS_N_INSNS (9), /* fadd, fsub */
392 COSTS_N_INSNS (9), /* fcmp */
393 COSTS_N_INSNS (9), /* fmov, fmovr */
394 COSTS_N_INSNS (9), /* fmul */
395 COSTS_N_INSNS (23), /* fdivs */
396 COSTS_N_INSNS (37), /* fdivd */
397 COSTS_N_INSNS (23), /* fsqrts */
398 COSTS_N_INSNS (37), /* fsqrtd */
399 COSTS_N_INSNS (9), /* imul */
400 COSTS_N_INSNS (9), /* imulX */
401 0, /* imul bit factor */
402 COSTS_N_INSNS (31), /* idiv, average of 17 - 45 cycle range */
403 COSTS_N_INSNS (30), /* idivX, average of 16 - 44 cycle range */
404 COSTS_N_INSNS (1), /* movcc/movr */
405 0, /* shift penalty */
406 };
407
408 static const
409 struct processor_costs niagara4_costs = {
410 COSTS_N_INSNS (5), /* int load */
411 COSTS_N_INSNS (5), /* int signed load */
412 COSTS_N_INSNS (5), /* int zeroed load */
413 COSTS_N_INSNS (5), /* float load */
414 COSTS_N_INSNS (11), /* fmov, fneg, fabs */
415 COSTS_N_INSNS (11), /* fadd, fsub */
416 COSTS_N_INSNS (11), /* fcmp */
417 COSTS_N_INSNS (11), /* fmov, fmovr */
418 COSTS_N_INSNS (11), /* fmul */
419 COSTS_N_INSNS (24), /* fdivs */
420 COSTS_N_INSNS (37), /* fdivd */
421 COSTS_N_INSNS (24), /* fsqrts */
422 COSTS_N_INSNS (37), /* fsqrtd */
423 COSTS_N_INSNS (12), /* imul */
424 COSTS_N_INSNS (12), /* imulX */
425 0, /* imul bit factor */
426 COSTS_N_INSNS (50), /* idiv, average of 41 - 60 cycle range */
427 COSTS_N_INSNS (35), /* idivX, average of 26 - 44 cycle range */
428 COSTS_N_INSNS (1), /* movcc/movr */
429 0, /* shift penalty */
430 };
431
432 static const
433 struct processor_costs niagara7_costs = {
434 COSTS_N_INSNS (5), /* int load */
435 COSTS_N_INSNS (5), /* int signed load */
436 COSTS_N_INSNS (5), /* int zeroed load */
437 COSTS_N_INSNS (5), /* float load */
438 COSTS_N_INSNS (11), /* fmov, fneg, fabs */
439 COSTS_N_INSNS (11), /* fadd, fsub */
440 COSTS_N_INSNS (11), /* fcmp */
441 COSTS_N_INSNS (11), /* fmov, fmovr */
442 COSTS_N_INSNS (11), /* fmul */
443 COSTS_N_INSNS (24), /* fdivs */
444 COSTS_N_INSNS (37), /* fdivd */
445 COSTS_N_INSNS (24), /* fsqrts */
446 COSTS_N_INSNS (37), /* fsqrtd */
447 COSTS_N_INSNS (12), /* imul */
448 COSTS_N_INSNS (12), /* imulX */
449 0, /* imul bit factor */
450 COSTS_N_INSNS (51), /* idiv, average of 42 - 61 cycle range */
451 COSTS_N_INSNS (35), /* idivX, average of 26 - 44 cycle range */
452 COSTS_N_INSNS (1), /* movcc/movr */
453 0, /* shift penalty */
454 };
455
456 static const
457 struct processor_costs m8_costs = {
458 COSTS_N_INSNS (3), /* int load */
459 COSTS_N_INSNS (3), /* int signed load */
460 COSTS_N_INSNS (3), /* int zeroed load */
461 COSTS_N_INSNS (3), /* float load */
462 COSTS_N_INSNS (9), /* fmov, fneg, fabs */
463 COSTS_N_INSNS (9), /* fadd, fsub */
464 COSTS_N_INSNS (9), /* fcmp */
465 COSTS_N_INSNS (9), /* fmov, fmovr */
466 COSTS_N_INSNS (9), /* fmul */
467 COSTS_N_INSNS (26), /* fdivs */
468 COSTS_N_INSNS (30), /* fdivd */
469 COSTS_N_INSNS (33), /* fsqrts */
470 COSTS_N_INSNS (41), /* fsqrtd */
471 COSTS_N_INSNS (12), /* imul */
472 COSTS_N_INSNS (10), /* imulX */
473 0, /* imul bit factor */
474 COSTS_N_INSNS (57), /* udiv/sdiv */
475 COSTS_N_INSNS (30), /* udivx/sdivx */
476 COSTS_N_INSNS (1), /* movcc/movr */
477 0, /* shift penalty */
478 };
479
480 static const struct processor_costs *sparc_costs = &cypress_costs;
481
482 #ifdef HAVE_AS_RELAX_OPTION
483 /* If 'as' and 'ld' are relaxing tail call insns into branch always, use
484 "or %o7,%g0,X; call Y; or X,%g0,%o7" always, so that it can be optimized.
485 With sethi/jmp, neither 'as' nor 'ld' has an easy way how to find out if
486 somebody does not branch between the sethi and jmp. */
487 #define LEAF_SIBCALL_SLOT_RESERVED_P 1
488 #else
489 #define LEAF_SIBCALL_SLOT_RESERVED_P \
490 ((TARGET_ARCH64 && !TARGET_CM_MEDLOW) || flag_pic)
491 #endif
492
493 /* Vector to say how input registers are mapped to output registers.
494 HARD_FRAME_POINTER_REGNUM cannot be remapped by this function to
495 eliminate it. You must use -fomit-frame-pointer to get that. */
496 char leaf_reg_remap[] =
497 { 0, 1, 2, 3, 4, 5, 6, 7,
498 -1, -1, -1, -1, -1, -1, 14, -1,
499 -1, -1, -1, -1, -1, -1, -1, -1,
500 8, 9, 10, 11, 12, 13, -1, 15,
501
502 32, 33, 34, 35, 36, 37, 38, 39,
503 40, 41, 42, 43, 44, 45, 46, 47,
504 48, 49, 50, 51, 52, 53, 54, 55,
505 56, 57, 58, 59, 60, 61, 62, 63,
506 64, 65, 66, 67, 68, 69, 70, 71,
507 72, 73, 74, 75, 76, 77, 78, 79,
508 80, 81, 82, 83, 84, 85, 86, 87,
509 88, 89, 90, 91, 92, 93, 94, 95,
510 96, 97, 98, 99, 100, 101, 102};
511
512 /* Vector, indexed by hard register number, which contains 1
513 for a register that is allowable in a candidate for leaf
514 function treatment. */
515 char sparc_leaf_regs[] =
516 { 1, 1, 1, 1, 1, 1, 1, 1,
517 0, 0, 0, 0, 0, 0, 1, 0,
518 0, 0, 0, 0, 0, 0, 0, 0,
519 1, 1, 1, 1, 1, 1, 0, 1,
520 1, 1, 1, 1, 1, 1, 1, 1,
521 1, 1, 1, 1, 1, 1, 1, 1,
522 1, 1, 1, 1, 1, 1, 1, 1,
523 1, 1, 1, 1, 1, 1, 1, 1,
524 1, 1, 1, 1, 1, 1, 1, 1,
525 1, 1, 1, 1, 1, 1, 1, 1,
526 1, 1, 1, 1, 1, 1, 1, 1,
527 1, 1, 1, 1, 1, 1, 1, 1,
528 1, 1, 1, 1, 1, 1, 1};
529
530 struct GTY(()) machine_function
531 {
532 /* Size of the frame of the function. */
533 HOST_WIDE_INT frame_size;
534
535 /* Size of the frame of the function minus the register window save area
536 and the outgoing argument area. */
537 HOST_WIDE_INT apparent_frame_size;
538
539 /* Register we pretend the frame pointer is allocated to. Normally, this
540 is %fp, but if we are in a leaf procedure, this is (%sp + offset). We
541 record "offset" separately as it may be too big for (reg + disp). */
542 rtx frame_base_reg;
543 HOST_WIDE_INT frame_base_offset;
544
545 /* Number of global or FP registers to be saved (as 4-byte quantities). */
546 int n_global_fp_regs;
547
548 /* True if the current function is leaf and uses only leaf regs,
549 so that the SPARC leaf function optimization can be applied.
550 Private version of crtl->uses_only_leaf_regs, see
551 sparc_expand_prologue for the rationale. */
552 int leaf_function_p;
553
554 /* True if the prologue saves local or in registers. */
555 bool save_local_in_regs_p;
556
557 /* True if the data calculated by sparc_expand_prologue are valid. */
558 bool prologue_data_valid_p;
559 };
560
561 #define sparc_frame_size cfun->machine->frame_size
562 #define sparc_apparent_frame_size cfun->machine->apparent_frame_size
563 #define sparc_frame_base_reg cfun->machine->frame_base_reg
564 #define sparc_frame_base_offset cfun->machine->frame_base_offset
565 #define sparc_n_global_fp_regs cfun->machine->n_global_fp_regs
566 #define sparc_leaf_function_p cfun->machine->leaf_function_p
567 #define sparc_save_local_in_regs_p cfun->machine->save_local_in_regs_p
568 #define sparc_prologue_data_valid_p cfun->machine->prologue_data_valid_p
569
570 /* 1 if the next opcode is to be specially indented. */
571 int sparc_indent_opcode = 0;
572
573 static void sparc_option_override (void);
574 static void sparc_init_modes (void);
575 static int function_arg_slotno (const CUMULATIVE_ARGS *, machine_mode,
576 const_tree, bool, bool, int *, int *);
577
578 static int supersparc_adjust_cost (rtx_insn *, int, rtx_insn *, int);
579 static int hypersparc_adjust_cost (rtx_insn *, int, rtx_insn *, int);
580
581 static void sparc_emit_set_const32 (rtx, rtx);
582 static void sparc_emit_set_const64 (rtx, rtx);
583 static void sparc_output_addr_vec (rtx);
584 static void sparc_output_addr_diff_vec (rtx);
585 static void sparc_output_deferred_case_vectors (void);
586 static bool sparc_legitimate_address_p (machine_mode, rtx, bool);
587 static bool sparc_legitimate_constant_p (machine_mode, rtx);
588 static rtx sparc_builtin_saveregs (void);
589 static int epilogue_renumber (rtx *, int);
590 static bool sparc_assemble_integer (rtx, unsigned int, int);
591 static int set_extends (rtx_insn *);
592 static void sparc_asm_function_prologue (FILE *);
593 static void sparc_asm_function_epilogue (FILE *);
594 #ifdef TARGET_SOLARIS
595 static void sparc_solaris_elf_asm_named_section (const char *, unsigned int,
596 tree) ATTRIBUTE_UNUSED;
597 #endif
598 static int sparc_adjust_cost (rtx_insn *, int, rtx_insn *, int, unsigned int);
599 static int sparc_issue_rate (void);
600 static void sparc_sched_init (FILE *, int, int);
601 static int sparc_use_sched_lookahead (void);
602
603 static void emit_soft_tfmode_libcall (const char *, int, rtx *);
604 static void emit_soft_tfmode_binop (enum rtx_code, rtx *);
605 static void emit_soft_tfmode_unop (enum rtx_code, rtx *);
606 static void emit_soft_tfmode_cvt (enum rtx_code, rtx *);
607 static void emit_hard_tfmode_operation (enum rtx_code, rtx *);
608
609 static bool sparc_function_ok_for_sibcall (tree, tree);
610 static void sparc_init_libfuncs (void);
611 static void sparc_init_builtins (void);
612 static void sparc_fpu_init_builtins (void);
613 static void sparc_vis_init_builtins (void);
614 static tree sparc_builtin_decl (unsigned, bool);
615 static rtx sparc_expand_builtin (tree, rtx, rtx, machine_mode, int);
616 static tree sparc_fold_builtin (tree, int, tree *, bool);
617 static void sparc_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
618 HOST_WIDE_INT, tree);
619 static bool sparc_can_output_mi_thunk (const_tree, HOST_WIDE_INT,
620 HOST_WIDE_INT, const_tree);
621 static struct machine_function * sparc_init_machine_status (void);
622 static bool sparc_cannot_force_const_mem (machine_mode, rtx);
623 static rtx sparc_tls_get_addr (void);
624 static rtx sparc_tls_got (void);
625 static int sparc_register_move_cost (machine_mode,
626 reg_class_t, reg_class_t);
627 static bool sparc_rtx_costs (rtx, machine_mode, int, int, int *, bool);
628 static rtx sparc_function_value (const_tree, const_tree, bool);
629 static rtx sparc_libcall_value (machine_mode, const_rtx);
630 static bool sparc_function_value_regno_p (const unsigned int);
631 static rtx sparc_struct_value_rtx (tree, int);
632 static machine_mode sparc_promote_function_mode (const_tree, machine_mode,
633 int *, const_tree, int);
634 static bool sparc_return_in_memory (const_tree, const_tree);
635 static bool sparc_strict_argument_naming (cumulative_args_t);
636 static void sparc_va_start (tree, rtx);
637 static tree sparc_gimplify_va_arg (tree, tree, gimple_seq *, gimple_seq *);
638 static bool sparc_vector_mode_supported_p (machine_mode);
639 static bool sparc_tls_referenced_p (rtx);
640 static rtx sparc_legitimize_tls_address (rtx);
641 static rtx sparc_legitimize_pic_address (rtx, rtx);
642 static rtx sparc_legitimize_address (rtx, rtx, machine_mode);
643 static rtx sparc_delegitimize_address (rtx);
644 static bool sparc_mode_dependent_address_p (const_rtx, addr_space_t);
645 static bool sparc_pass_by_reference (cumulative_args_t,
646 machine_mode, const_tree, bool);
647 static void sparc_function_arg_advance (cumulative_args_t,
648 machine_mode, const_tree, bool);
649 static rtx sparc_function_arg_1 (cumulative_args_t,
650 machine_mode, const_tree, bool, bool);
651 static rtx sparc_function_arg (cumulative_args_t,
652 machine_mode, const_tree, bool);
653 static rtx sparc_function_incoming_arg (cumulative_args_t,
654 machine_mode, const_tree, bool);
655 static pad_direction sparc_function_arg_padding (machine_mode, const_tree);
656 static unsigned int sparc_function_arg_boundary (machine_mode,
657 const_tree);
658 static int sparc_arg_partial_bytes (cumulative_args_t,
659 machine_mode, tree, bool);
660 static void sparc_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
661 static void sparc_file_end (void);
662 static bool sparc_frame_pointer_required (void);
663 static bool sparc_can_eliminate (const int, const int);
664 static rtx sparc_builtin_setjmp_frame_value (void);
665 static void sparc_conditional_register_usage (void);
666 static bool sparc_use_pseudo_pic_reg (void);
667 static void sparc_init_pic_reg (void);
668 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
669 static const char *sparc_mangle_type (const_tree);
670 #endif
671 static void sparc_trampoline_init (rtx, tree, rtx);
672 static machine_mode sparc_preferred_simd_mode (scalar_mode);
673 static reg_class_t sparc_preferred_reload_class (rtx x, reg_class_t rclass);
674 static bool sparc_lra_p (void);
675 static bool sparc_print_operand_punct_valid_p (unsigned char);
676 static void sparc_print_operand (FILE *, rtx, int);
677 static void sparc_print_operand_address (FILE *, machine_mode, rtx);
678 static reg_class_t sparc_secondary_reload (bool, rtx, reg_class_t,
679 machine_mode,
680 secondary_reload_info *);
681 static bool sparc_secondary_memory_needed (machine_mode, reg_class_t,
682 reg_class_t);
683 static machine_mode sparc_secondary_memory_needed_mode (machine_mode);
684 static scalar_int_mode sparc_cstore_mode (enum insn_code icode);
685 static void sparc_atomic_assign_expand_fenv (tree *, tree *, tree *);
686 static bool sparc_fixed_condition_code_regs (unsigned int *, unsigned int *);
687 static unsigned int sparc_min_arithmetic_precision (void);
688 static unsigned int sparc_hard_regno_nregs (unsigned int, machine_mode);
689 static bool sparc_hard_regno_mode_ok (unsigned int, machine_mode);
690 static bool sparc_modes_tieable_p (machine_mode, machine_mode);
691 static bool sparc_can_change_mode_class (machine_mode, machine_mode,
692 reg_class_t);
693 static HOST_WIDE_INT sparc_constant_alignment (const_tree, HOST_WIDE_INT);
694 static bool sparc_vectorize_vec_perm_const (machine_mode, rtx, rtx, rtx,
695 const vec_perm_indices &);
696
697 #ifdef SUBTARGET_ATTRIBUTE_TABLE
698 /* Table of valid machine attributes. */
699 static const struct attribute_spec sparc_attribute_table[] =
700 {
701 /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
702 do_diagnostic, handler, exclude } */
703 SUBTARGET_ATTRIBUTE_TABLE,
704 { NULL, 0, 0, false, false, false, false, NULL, NULL }
705 };
706 #endif
707
708 /* Option handling. */
709
710 /* Parsed value. */
711 enum cmodel sparc_cmodel;
712
713 char sparc_hard_reg_printed[8];
714
715 /* Initialize the GCC target structure. */
716
717 /* The default is to use .half rather than .short for aligned HI objects. */
718 #undef TARGET_ASM_ALIGNED_HI_OP
719 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
720
721 #undef TARGET_ASM_UNALIGNED_HI_OP
722 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uahalf\t"
723 #undef TARGET_ASM_UNALIGNED_SI_OP
724 #define TARGET_ASM_UNALIGNED_SI_OP "\t.uaword\t"
725 #undef TARGET_ASM_UNALIGNED_DI_OP
726 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaxword\t"
727
728 /* The target hook has to handle DI-mode values. */
729 #undef TARGET_ASM_INTEGER
730 #define TARGET_ASM_INTEGER sparc_assemble_integer
731
732 #undef TARGET_ASM_FUNCTION_PROLOGUE
733 #define TARGET_ASM_FUNCTION_PROLOGUE sparc_asm_function_prologue
734 #undef TARGET_ASM_FUNCTION_EPILOGUE
735 #define TARGET_ASM_FUNCTION_EPILOGUE sparc_asm_function_epilogue
736
737 #undef TARGET_SCHED_ADJUST_COST
738 #define TARGET_SCHED_ADJUST_COST sparc_adjust_cost
739 #undef TARGET_SCHED_ISSUE_RATE
740 #define TARGET_SCHED_ISSUE_RATE sparc_issue_rate
741 #undef TARGET_SCHED_INIT
742 #define TARGET_SCHED_INIT sparc_sched_init
743 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
744 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD sparc_use_sched_lookahead
745
746 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
747 #define TARGET_FUNCTION_OK_FOR_SIBCALL sparc_function_ok_for_sibcall
748
749 #undef TARGET_INIT_LIBFUNCS
750 #define TARGET_INIT_LIBFUNCS sparc_init_libfuncs
751
752 #undef TARGET_LEGITIMIZE_ADDRESS
753 #define TARGET_LEGITIMIZE_ADDRESS sparc_legitimize_address
754 #undef TARGET_DELEGITIMIZE_ADDRESS
755 #define TARGET_DELEGITIMIZE_ADDRESS sparc_delegitimize_address
756 #undef TARGET_MODE_DEPENDENT_ADDRESS_P
757 #define TARGET_MODE_DEPENDENT_ADDRESS_P sparc_mode_dependent_address_p
758
759 #undef TARGET_INIT_BUILTINS
760 #define TARGET_INIT_BUILTINS sparc_init_builtins
761 #undef TARGET_BUILTIN_DECL
762 #define TARGET_BUILTIN_DECL sparc_builtin_decl
763 #undef TARGET_EXPAND_BUILTIN
764 #define TARGET_EXPAND_BUILTIN sparc_expand_builtin
765 #undef TARGET_FOLD_BUILTIN
766 #define TARGET_FOLD_BUILTIN sparc_fold_builtin
767
768 #if TARGET_TLS
769 #undef TARGET_HAVE_TLS
770 #define TARGET_HAVE_TLS true
771 #endif
772
773 #undef TARGET_CANNOT_FORCE_CONST_MEM
774 #define TARGET_CANNOT_FORCE_CONST_MEM sparc_cannot_force_const_mem
775
776 #undef TARGET_ASM_OUTPUT_MI_THUNK
777 #define TARGET_ASM_OUTPUT_MI_THUNK sparc_output_mi_thunk
778 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
779 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK sparc_can_output_mi_thunk
780
781 #undef TARGET_RTX_COSTS
782 #define TARGET_RTX_COSTS sparc_rtx_costs
783 #undef TARGET_ADDRESS_COST
784 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
785 #undef TARGET_REGISTER_MOVE_COST
786 #define TARGET_REGISTER_MOVE_COST sparc_register_move_cost
787
788 #undef TARGET_PROMOTE_FUNCTION_MODE
789 #define TARGET_PROMOTE_FUNCTION_MODE sparc_promote_function_mode
790
791 #undef TARGET_FUNCTION_VALUE
792 #define TARGET_FUNCTION_VALUE sparc_function_value
793 #undef TARGET_LIBCALL_VALUE
794 #define TARGET_LIBCALL_VALUE sparc_libcall_value
795 #undef TARGET_FUNCTION_VALUE_REGNO_P
796 #define TARGET_FUNCTION_VALUE_REGNO_P sparc_function_value_regno_p
797
798 #undef TARGET_STRUCT_VALUE_RTX
799 #define TARGET_STRUCT_VALUE_RTX sparc_struct_value_rtx
800 #undef TARGET_RETURN_IN_MEMORY
801 #define TARGET_RETURN_IN_MEMORY sparc_return_in_memory
802 #undef TARGET_MUST_PASS_IN_STACK
803 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
804 #undef TARGET_PASS_BY_REFERENCE
805 #define TARGET_PASS_BY_REFERENCE sparc_pass_by_reference
806 #undef TARGET_ARG_PARTIAL_BYTES
807 #define TARGET_ARG_PARTIAL_BYTES sparc_arg_partial_bytes
808 #undef TARGET_FUNCTION_ARG_ADVANCE
809 #define TARGET_FUNCTION_ARG_ADVANCE sparc_function_arg_advance
810 #undef TARGET_FUNCTION_ARG
811 #define TARGET_FUNCTION_ARG sparc_function_arg
812 #undef TARGET_FUNCTION_INCOMING_ARG
813 #define TARGET_FUNCTION_INCOMING_ARG sparc_function_incoming_arg
814 #undef TARGET_FUNCTION_ARG_PADDING
815 #define TARGET_FUNCTION_ARG_PADDING sparc_function_arg_padding
816 #undef TARGET_FUNCTION_ARG_BOUNDARY
817 #define TARGET_FUNCTION_ARG_BOUNDARY sparc_function_arg_boundary
818
819 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
820 #define TARGET_EXPAND_BUILTIN_SAVEREGS sparc_builtin_saveregs
821 #undef TARGET_STRICT_ARGUMENT_NAMING
822 #define TARGET_STRICT_ARGUMENT_NAMING sparc_strict_argument_naming
823
824 #undef TARGET_EXPAND_BUILTIN_VA_START
825 #define TARGET_EXPAND_BUILTIN_VA_START sparc_va_start
826 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
827 #define TARGET_GIMPLIFY_VA_ARG_EXPR sparc_gimplify_va_arg
828
829 #undef TARGET_VECTOR_MODE_SUPPORTED_P
830 #define TARGET_VECTOR_MODE_SUPPORTED_P sparc_vector_mode_supported_p
831
832 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
833 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE sparc_preferred_simd_mode
834
835 #ifdef SUBTARGET_INSERT_ATTRIBUTES
836 #undef TARGET_INSERT_ATTRIBUTES
837 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
838 #endif
839
840 #ifdef SUBTARGET_ATTRIBUTE_TABLE
841 #undef TARGET_ATTRIBUTE_TABLE
842 #define TARGET_ATTRIBUTE_TABLE sparc_attribute_table
843 #endif
844
845 #undef TARGET_OPTION_OVERRIDE
846 #define TARGET_OPTION_OVERRIDE sparc_option_override
847
848 #ifdef TARGET_THREAD_SSP_OFFSET
849 #undef TARGET_STACK_PROTECT_GUARD
850 #define TARGET_STACK_PROTECT_GUARD hook_tree_void_null
851 #endif
852
853 #if TARGET_GNU_TLS && defined(HAVE_AS_SPARC_UA_PCREL)
854 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
855 #define TARGET_ASM_OUTPUT_DWARF_DTPREL sparc_output_dwarf_dtprel
856 #endif
857
858 #undef TARGET_ASM_FILE_END
859 #define TARGET_ASM_FILE_END sparc_file_end
860
861 #undef TARGET_FRAME_POINTER_REQUIRED
862 #define TARGET_FRAME_POINTER_REQUIRED sparc_frame_pointer_required
863
864 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
865 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE sparc_builtin_setjmp_frame_value
866
867 #undef TARGET_CAN_ELIMINATE
868 #define TARGET_CAN_ELIMINATE sparc_can_eliminate
869
870 #undef TARGET_PREFERRED_RELOAD_CLASS
871 #define TARGET_PREFERRED_RELOAD_CLASS sparc_preferred_reload_class
872
873 #undef TARGET_SECONDARY_RELOAD
874 #define TARGET_SECONDARY_RELOAD sparc_secondary_reload
875 #undef TARGET_SECONDARY_MEMORY_NEEDED
876 #define TARGET_SECONDARY_MEMORY_NEEDED sparc_secondary_memory_needed
877 #undef TARGET_SECONDARY_MEMORY_NEEDED_MODE
878 #define TARGET_SECONDARY_MEMORY_NEEDED_MODE sparc_secondary_memory_needed_mode
879
880 #undef TARGET_CONDITIONAL_REGISTER_USAGE
881 #define TARGET_CONDITIONAL_REGISTER_USAGE sparc_conditional_register_usage
882
883 #undef TARGET_INIT_PIC_REG
884 #define TARGET_INIT_PIC_REG sparc_init_pic_reg
885
886 #undef TARGET_USE_PSEUDO_PIC_REG
887 #define TARGET_USE_PSEUDO_PIC_REG sparc_use_pseudo_pic_reg
888
889 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
890 #undef TARGET_MANGLE_TYPE
891 #define TARGET_MANGLE_TYPE sparc_mangle_type
892 #endif
893
894 #undef TARGET_LRA_P
895 #define TARGET_LRA_P sparc_lra_p
896
897 #undef TARGET_LEGITIMATE_ADDRESS_P
898 #define TARGET_LEGITIMATE_ADDRESS_P sparc_legitimate_address_p
899
900 #undef TARGET_LEGITIMATE_CONSTANT_P
901 #define TARGET_LEGITIMATE_CONSTANT_P sparc_legitimate_constant_p
902
903 #undef TARGET_TRAMPOLINE_INIT
904 #define TARGET_TRAMPOLINE_INIT sparc_trampoline_init
905
906 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
907 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P sparc_print_operand_punct_valid_p
908 #undef TARGET_PRINT_OPERAND
909 #define TARGET_PRINT_OPERAND sparc_print_operand
910 #undef TARGET_PRINT_OPERAND_ADDRESS
911 #define TARGET_PRINT_OPERAND_ADDRESS sparc_print_operand_address
912
913 /* The value stored by LDSTUB. */
914 #undef TARGET_ATOMIC_TEST_AND_SET_TRUEVAL
915 #define TARGET_ATOMIC_TEST_AND_SET_TRUEVAL 0xff
916
917 #undef TARGET_CSTORE_MODE
918 #define TARGET_CSTORE_MODE sparc_cstore_mode
919
920 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
921 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV sparc_atomic_assign_expand_fenv
922
923 #undef TARGET_FIXED_CONDITION_CODE_REGS
924 #define TARGET_FIXED_CONDITION_CODE_REGS sparc_fixed_condition_code_regs
925
926 #undef TARGET_MIN_ARITHMETIC_PRECISION
927 #define TARGET_MIN_ARITHMETIC_PRECISION sparc_min_arithmetic_precision
928
929 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
930 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
931
932 #undef TARGET_HARD_REGNO_NREGS
933 #define TARGET_HARD_REGNO_NREGS sparc_hard_regno_nregs
934 #undef TARGET_HARD_REGNO_MODE_OK
935 #define TARGET_HARD_REGNO_MODE_OK sparc_hard_regno_mode_ok
936
937 #undef TARGET_MODES_TIEABLE_P
938 #define TARGET_MODES_TIEABLE_P sparc_modes_tieable_p
939
940 #undef TARGET_CAN_CHANGE_MODE_CLASS
941 #define TARGET_CAN_CHANGE_MODE_CLASS sparc_can_change_mode_class
942
943 #undef TARGET_CONSTANT_ALIGNMENT
944 #define TARGET_CONSTANT_ALIGNMENT sparc_constant_alignment
945
946 #undef TARGET_VECTORIZE_VEC_PERM_CONST
947 #define TARGET_VECTORIZE_VEC_PERM_CONST sparc_vectorize_vec_perm_const
948
949 struct gcc_target targetm = TARGET_INITIALIZER;
950
951 /* Return the memory reference contained in X if any, zero otherwise. */
952
953 static rtx
mem_ref(rtx x)954 mem_ref (rtx x)
955 {
956 if (GET_CODE (x) == SIGN_EXTEND || GET_CODE (x) == ZERO_EXTEND)
957 x = XEXP (x, 0);
958
959 if (MEM_P (x))
960 return x;
961
962 return NULL_RTX;
963 }
964
965 /* True if any of INSN's source register(s) is REG. */
966
967 static bool
insn_uses_reg_p(rtx_insn * insn,unsigned int reg)968 insn_uses_reg_p (rtx_insn *insn, unsigned int reg)
969 {
970 extract_insn (insn);
971 return ((REG_P (recog_data.operand[1])
972 && REGNO (recog_data.operand[1]) == reg)
973 || (recog_data.n_operands == 3
974 && REG_P (recog_data.operand[2])
975 && REGNO (recog_data.operand[2]) == reg));
976 }
977
978 /* True if INSN is a floating-point division or square-root. */
979
980 static bool
div_sqrt_insn_p(rtx_insn * insn)981 div_sqrt_insn_p (rtx_insn *insn)
982 {
983 if (GET_CODE (PATTERN (insn)) != SET)
984 return false;
985
986 switch (get_attr_type (insn))
987 {
988 case TYPE_FPDIVS:
989 case TYPE_FPSQRTS:
990 case TYPE_FPDIVD:
991 case TYPE_FPSQRTD:
992 return true;
993 default:
994 return false;
995 }
996 }
997
998 /* True if INSN is a floating-point instruction. */
999
1000 static bool
fpop_insn_p(rtx_insn * insn)1001 fpop_insn_p (rtx_insn *insn)
1002 {
1003 if (GET_CODE (PATTERN (insn)) != SET)
1004 return false;
1005
1006 switch (get_attr_type (insn))
1007 {
1008 case TYPE_FPMOVE:
1009 case TYPE_FPCMOVE:
1010 case TYPE_FP:
1011 case TYPE_FPCMP:
1012 case TYPE_FPMUL:
1013 case TYPE_FPDIVS:
1014 case TYPE_FPSQRTS:
1015 case TYPE_FPDIVD:
1016 case TYPE_FPSQRTD:
1017 return true;
1018 default:
1019 return false;
1020 }
1021 }
1022
1023 /* True if INSN is an atomic instruction. */
1024
1025 static bool
atomic_insn_for_leon3_p(rtx_insn * insn)1026 atomic_insn_for_leon3_p (rtx_insn *insn)
1027 {
1028 switch (INSN_CODE (insn))
1029 {
1030 case CODE_FOR_swapsi:
1031 case CODE_FOR_ldstub:
1032 case CODE_FOR_atomic_compare_and_swap_leon3_1:
1033 return true;
1034 default:
1035 return false;
1036 }
1037 }
1038
1039 /* We use a machine specific pass to enable workarounds for errata.
1040
1041 We need to have the (essentially) final form of the insn stream in order
1042 to properly detect the various hazards. Therefore, this machine specific
1043 pass runs as late as possible. */
1044
1045 /* True if INSN is a md pattern or asm statement. */
1046 #define USEFUL_INSN_P(INSN) \
1047 (NONDEBUG_INSN_P (INSN) \
1048 && GET_CODE (PATTERN (INSN)) != USE \
1049 && GET_CODE (PATTERN (INSN)) != CLOBBER)
1050
1051 static unsigned int
sparc_do_work_around_errata(void)1052 sparc_do_work_around_errata (void)
1053 {
1054 rtx_insn *insn, *next;
1055
1056 /* Force all instructions to be split into their final form. */
1057 split_all_insns_noflow ();
1058
1059 /* Now look for specific patterns in the insn stream. */
1060 for (insn = get_insns (); insn; insn = next)
1061 {
1062 bool insert_nop = false;
1063 rtx set;
1064 rtx_insn *jump;
1065 rtx_sequence *seq;
1066
1067 /* Look into the instruction in a delay slot. */
1068 if (NONJUMP_INSN_P (insn)
1069 && (seq = dyn_cast <rtx_sequence *> (PATTERN (insn))))
1070 {
1071 jump = seq->insn (0);
1072 insn = seq->insn (1);
1073 }
1074 else if (JUMP_P (insn))
1075 jump = insn;
1076 else
1077 jump = NULL;
1078
1079 /* Place a NOP at the branch target of an integer branch if it is a
1080 floating-point operation or a floating-point branch. */
1081 if (sparc_fix_gr712rc
1082 && jump
1083 && jump_to_label_p (jump)
1084 && get_attr_branch_type (jump) == BRANCH_TYPE_ICC)
1085 {
1086 rtx_insn *target = next_active_insn (JUMP_LABEL_AS_INSN (jump));
1087 if (target
1088 && (fpop_insn_p (target)
1089 || (JUMP_P (target)
1090 && get_attr_branch_type (target) == BRANCH_TYPE_FCC)))
1091 emit_insn_before (gen_nop (), target);
1092 }
1093
1094 /* Insert a NOP between load instruction and atomic instruction. Insert
1095 a NOP at branch target if there is a load in delay slot and an atomic
1096 instruction at branch target. */
1097 if (sparc_fix_ut700
1098 && NONJUMP_INSN_P (insn)
1099 && (set = single_set (insn)) != NULL_RTX
1100 && mem_ref (SET_SRC (set))
1101 && REG_P (SET_DEST (set)))
1102 {
1103 if (jump && jump_to_label_p (jump))
1104 {
1105 rtx_insn *target = next_active_insn (JUMP_LABEL_AS_INSN (jump));
1106 if (target && atomic_insn_for_leon3_p (target))
1107 emit_insn_before (gen_nop (), target);
1108 }
1109
1110 next = next_active_insn (insn);
1111 if (!next)
1112 break;
1113
1114 if (atomic_insn_for_leon3_p (next))
1115 insert_nop = true;
1116 }
1117
1118 /* Look for a sequence that starts with a fdiv or fsqrt instruction and
1119 ends with another fdiv or fsqrt instruction with no dependencies on
1120 the former, along with an appropriate pattern in between. */
1121 if (sparc_fix_lost_divsqrt
1122 && NONJUMP_INSN_P (insn)
1123 && div_sqrt_insn_p (insn))
1124 {
1125 int i;
1126 int fp_found = 0;
1127 rtx_insn *after;
1128
1129 const unsigned int dest_reg = REGNO (SET_DEST (single_set (insn)));
1130
1131 next = next_active_insn (insn);
1132 if (!next)
1133 break;
1134
1135 for (after = next, i = 0; i < 4; i++)
1136 {
1137 /* Count floating-point operations. */
1138 if (i != 3 && fpop_insn_p (after))
1139 {
1140 /* If the insn uses the destination register of
1141 the div/sqrt, then it cannot be problematic. */
1142 if (insn_uses_reg_p (after, dest_reg))
1143 break;
1144 fp_found++;
1145 }
1146
1147 /* Count floating-point loads. */
1148 if (i != 3
1149 && (set = single_set (after)) != NULL_RTX
1150 && REG_P (SET_DEST (set))
1151 && REGNO (SET_DEST (set)) > 31)
1152 {
1153 /* If the insn uses the destination register of
1154 the div/sqrt, then it cannot be problematic. */
1155 if (REGNO (SET_DEST (set)) == dest_reg)
1156 break;
1157 fp_found++;
1158 }
1159
1160 /* Check if this is a problematic sequence. */
1161 if (i > 1
1162 && fp_found >= 2
1163 && div_sqrt_insn_p (after))
1164 {
1165 /* If this is the short version of the problematic
1166 sequence we add two NOPs in a row to also prevent
1167 the long version. */
1168 if (i == 2)
1169 emit_insn_before (gen_nop (), next);
1170 insert_nop = true;
1171 break;
1172 }
1173
1174 /* No need to scan past a second div/sqrt. */
1175 if (div_sqrt_insn_p (after))
1176 break;
1177
1178 /* Insert NOP before branch. */
1179 if (i < 3
1180 && (!NONJUMP_INSN_P (after)
1181 || GET_CODE (PATTERN (after)) == SEQUENCE))
1182 {
1183 insert_nop = true;
1184 break;
1185 }
1186
1187 after = next_active_insn (after);
1188 if (!after)
1189 break;
1190 }
1191 }
1192
1193 /* Look for either of these two sequences:
1194
1195 Sequence A:
1196 1. store of word size or less (e.g. st / stb / sth / stf)
1197 2. any single instruction that is not a load or store
1198 3. any store instruction (e.g. st / stb / sth / stf / std / stdf)
1199
1200 Sequence B:
1201 1. store of double word size (e.g. std / stdf)
1202 2. any store instruction (e.g. st / stb / sth / stf / std / stdf) */
1203 if (sparc_fix_b2bst
1204 && NONJUMP_INSN_P (insn)
1205 && (set = single_set (insn)) != NULL_RTX
1206 && MEM_P (SET_DEST (set)))
1207 {
1208 /* Sequence B begins with a double-word store. */
1209 bool seq_b = GET_MODE_SIZE (GET_MODE (SET_DEST (set))) == 8;
1210 rtx_insn *after;
1211 int i;
1212
1213 next = next_active_insn (insn);
1214 if (!next)
1215 break;
1216
1217 for (after = next, i = 0; i < 2; i++)
1218 {
1219 /* Skip empty assembly statements. */
1220 if ((GET_CODE (PATTERN (after)) == UNSPEC_VOLATILE)
1221 || (USEFUL_INSN_P (after)
1222 && (asm_noperands (PATTERN (after))>=0)
1223 && !strcmp (decode_asm_operands (PATTERN (after),
1224 NULL, NULL, NULL,
1225 NULL, NULL), "")))
1226 after = next_active_insn (after);
1227 if (!after)
1228 break;
1229
1230 /* If the insn is a branch, then it cannot be problematic. */
1231 if (!NONJUMP_INSN_P (after)
1232 || GET_CODE (PATTERN (after)) == SEQUENCE)
1233 break;
1234
1235 /* Sequence B is only two instructions long. */
1236 if (seq_b)
1237 {
1238 /* Add NOP if followed by a store. */
1239 if ((set = single_set (after)) != NULL_RTX
1240 && MEM_P (SET_DEST (set)))
1241 insert_nop = true;
1242
1243 /* Otherwise it is ok. */
1244 break;
1245 }
1246
1247 /* If the second instruction is a load or a store,
1248 then the sequence cannot be problematic. */
1249 if (i == 0)
1250 {
1251 if ((set = single_set (after)) != NULL_RTX
1252 && (MEM_P (SET_DEST (set)) || mem_ref (SET_SRC (set))))
1253 break;
1254
1255 after = next_active_insn (after);
1256 if (!after)
1257 break;
1258 }
1259
1260 /* Add NOP if third instruction is a store. */
1261 if (i == 1
1262 && (set = single_set (after)) != NULL_RTX
1263 && MEM_P (SET_DEST (set)))
1264 insert_nop = true;
1265 }
1266 }
1267
1268 /* Look for a single-word load into an odd-numbered FP register. */
1269 else if (sparc_fix_at697f
1270 && NONJUMP_INSN_P (insn)
1271 && (set = single_set (insn)) != NULL_RTX
1272 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
1273 && mem_ref (SET_SRC (set))
1274 && REG_P (SET_DEST (set))
1275 && REGNO (SET_DEST (set)) > 31
1276 && REGNO (SET_DEST (set)) % 2 != 0)
1277 {
1278 /* The wrong dependency is on the enclosing double register. */
1279 const unsigned int x = REGNO (SET_DEST (set)) - 1;
1280 unsigned int src1, src2, dest;
1281 int code;
1282
1283 next = next_active_insn (insn);
1284 if (!next)
1285 break;
1286 /* If the insn is a branch, then it cannot be problematic. */
1287 if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE)
1288 continue;
1289
1290 extract_insn (next);
1291 code = INSN_CODE (next);
1292
1293 switch (code)
1294 {
1295 case CODE_FOR_adddf3:
1296 case CODE_FOR_subdf3:
1297 case CODE_FOR_muldf3:
1298 case CODE_FOR_divdf3:
1299 dest = REGNO (recog_data.operand[0]);
1300 src1 = REGNO (recog_data.operand[1]);
1301 src2 = REGNO (recog_data.operand[2]);
1302 if (src1 != src2)
1303 {
1304 /* Case [1-4]:
1305 ld [address], %fx+1
1306 FPOPd %f{x,y}, %f{y,x}, %f{x,y} */
1307 if ((src1 == x || src2 == x)
1308 && (dest == src1 || dest == src2))
1309 insert_nop = true;
1310 }
1311 else
1312 {
1313 /* Case 5:
1314 ld [address], %fx+1
1315 FPOPd %fx, %fx, %fx */
1316 if (src1 == x
1317 && dest == src1
1318 && (code == CODE_FOR_adddf3 || code == CODE_FOR_muldf3))
1319 insert_nop = true;
1320 }
1321 break;
1322
1323 case CODE_FOR_sqrtdf2:
1324 dest = REGNO (recog_data.operand[0]);
1325 src1 = REGNO (recog_data.operand[1]);
1326 /* Case 6:
1327 ld [address], %fx+1
1328 fsqrtd %fx, %fx */
1329 if (src1 == x && dest == src1)
1330 insert_nop = true;
1331 break;
1332
1333 default:
1334 break;
1335 }
1336 }
1337
1338 /* Look for a single-word load into an integer register. */
1339 else if (sparc_fix_ut699
1340 && NONJUMP_INSN_P (insn)
1341 && (set = single_set (insn)) != NULL_RTX
1342 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) <= 4
1343 && (mem_ref (SET_SRC (set)) != NULL_RTX
1344 || INSN_CODE (insn) == CODE_FOR_movsi_pic_gotdata_op)
1345 && REG_P (SET_DEST (set))
1346 && REGNO (SET_DEST (set)) < 32)
1347 {
1348 /* There is no problem if the second memory access has a data
1349 dependency on the first single-cycle load. */
1350 rtx x = SET_DEST (set);
1351
1352 next = next_active_insn (insn);
1353 if (!next)
1354 break;
1355 /* If the insn is a branch, then it cannot be problematic. */
1356 if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE)
1357 continue;
1358
1359 /* Look for a second memory access to/from an integer register. */
1360 if ((set = single_set (next)) != NULL_RTX)
1361 {
1362 rtx src = SET_SRC (set);
1363 rtx dest = SET_DEST (set);
1364 rtx mem;
1365
1366 /* LDD is affected. */
1367 if ((mem = mem_ref (src)) != NULL_RTX
1368 && REG_P (dest)
1369 && REGNO (dest) < 32
1370 && !reg_mentioned_p (x, XEXP (mem, 0)))
1371 insert_nop = true;
1372
1373 /* STD is *not* affected. */
1374 else if (MEM_P (dest)
1375 && GET_MODE_SIZE (GET_MODE (dest)) <= 4
1376 && (src == CONST0_RTX (GET_MODE (dest))
1377 || (REG_P (src)
1378 && REGNO (src) < 32
1379 && REGNO (src) != REGNO (x)))
1380 && !reg_mentioned_p (x, XEXP (dest, 0)))
1381 insert_nop = true;
1382
1383 /* GOT accesses uses LD. */
1384 else if (INSN_CODE (next) == CODE_FOR_movsi_pic_gotdata_op
1385 && !reg_mentioned_p (x, XEXP (XEXP (src, 0), 1)))
1386 insert_nop = true;
1387 }
1388 }
1389
1390 /* Look for a single-word load/operation into an FP register. */
1391 else if (sparc_fix_ut699
1392 && NONJUMP_INSN_P (insn)
1393 && (set = single_set (insn)) != NULL_RTX
1394 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
1395 && REG_P (SET_DEST (set))
1396 && REGNO (SET_DEST (set)) > 31)
1397 {
1398 /* Number of instructions in the problematic window. */
1399 const int n_insns = 4;
1400 /* The problematic combination is with the sibling FP register. */
1401 const unsigned int x = REGNO (SET_DEST (set));
1402 const unsigned int y = x ^ 1;
1403 rtx_insn *after;
1404 int i;
1405
1406 next = next_active_insn (insn);
1407 if (!next)
1408 break;
1409 /* If the insn is a branch, then it cannot be problematic. */
1410 if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE)
1411 continue;
1412
1413 /* Look for a second load/operation into the sibling FP register. */
1414 if (!((set = single_set (next)) != NULL_RTX
1415 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
1416 && REG_P (SET_DEST (set))
1417 && REGNO (SET_DEST (set)) == y))
1418 continue;
1419
1420 /* Look for a (possible) store from the FP register in the next N
1421 instructions, but bail out if it is again modified or if there
1422 is a store from the sibling FP register before this store. */
1423 for (after = next, i = 0; i < n_insns; i++)
1424 {
1425 bool branch_p;
1426
1427 after = next_active_insn (after);
1428 if (!after)
1429 break;
1430
1431 /* This is a branch with an empty delay slot. */
1432 if (!NONJUMP_INSN_P (after))
1433 {
1434 if (++i == n_insns)
1435 break;
1436 branch_p = true;
1437 after = NULL;
1438 }
1439 /* This is a branch with a filled delay slot. */
1440 else if (rtx_sequence *seq =
1441 dyn_cast <rtx_sequence *> (PATTERN (after)))
1442 {
1443 if (++i == n_insns)
1444 break;
1445 branch_p = true;
1446 after = seq->insn (1);
1447 }
1448 /* This is a regular instruction. */
1449 else
1450 branch_p = false;
1451
1452 if (after && (set = single_set (after)) != NULL_RTX)
1453 {
1454 const rtx src = SET_SRC (set);
1455 const rtx dest = SET_DEST (set);
1456 const unsigned int size = GET_MODE_SIZE (GET_MODE (dest));
1457
1458 /* If the FP register is again modified before the store,
1459 then the store isn't affected. */
1460 if (REG_P (dest)
1461 && (REGNO (dest) == x
1462 || (REGNO (dest) == y && size == 8)))
1463 break;
1464
1465 if (MEM_P (dest) && REG_P (src))
1466 {
1467 /* If there is a store from the sibling FP register
1468 before the store, then the store is not affected. */
1469 if (REGNO (src) == y || (REGNO (src) == x && size == 8))
1470 break;
1471
1472 /* Otherwise, the store is affected. */
1473 if (REGNO (src) == x && size == 4)
1474 {
1475 insert_nop = true;
1476 break;
1477 }
1478 }
1479 }
1480
1481 /* If we have a branch in the first M instructions, then we
1482 cannot see the (M+2)th instruction so we play safe. */
1483 if (branch_p && i <= (n_insns - 2))
1484 {
1485 insert_nop = true;
1486 break;
1487 }
1488 }
1489 }
1490
1491 else
1492 next = NEXT_INSN (insn);
1493
1494 if (insert_nop)
1495 emit_insn_before (gen_nop (), next);
1496 }
1497
1498 return 0;
1499 }
1500
1501 namespace {
1502
1503 const pass_data pass_data_work_around_errata =
1504 {
1505 RTL_PASS, /* type */
1506 "errata", /* name */
1507 OPTGROUP_NONE, /* optinfo_flags */
1508 TV_MACH_DEP, /* tv_id */
1509 0, /* properties_required */
1510 0, /* properties_provided */
1511 0, /* properties_destroyed */
1512 0, /* todo_flags_start */
1513 0, /* todo_flags_finish */
1514 };
1515
1516 class pass_work_around_errata : public rtl_opt_pass
1517 {
1518 public:
pass_work_around_errata(gcc::context * ctxt)1519 pass_work_around_errata(gcc::context *ctxt)
1520 : rtl_opt_pass(pass_data_work_around_errata, ctxt)
1521 {}
1522
1523 /* opt_pass methods: */
gate(function *)1524 virtual bool gate (function *)
1525 {
1526 return sparc_fix_at697f || sparc_fix_ut699 || sparc_fix_b2bst
1527 || sparc_fix_gr712rc || sparc_fix_ut700 || sparc_fix_lost_divsqrt;
1528 }
1529
execute(function *)1530 virtual unsigned int execute (function *)
1531 {
1532 return sparc_do_work_around_errata ();
1533 }
1534
1535 }; // class pass_work_around_errata
1536
1537 } // anon namespace
1538
1539 rtl_opt_pass *
make_pass_work_around_errata(gcc::context * ctxt)1540 make_pass_work_around_errata (gcc::context *ctxt)
1541 {
1542 return new pass_work_around_errata (ctxt);
1543 }
1544
1545 /* Helpers for TARGET_DEBUG_OPTIONS. */
1546 static void
dump_target_flag_bits(const int flags)1547 dump_target_flag_bits (const int flags)
1548 {
1549 if (flags & MASK_64BIT)
1550 fprintf (stderr, "64BIT ");
1551 if (flags & MASK_APP_REGS)
1552 fprintf (stderr, "APP_REGS ");
1553 if (flags & MASK_FASTER_STRUCTS)
1554 fprintf (stderr, "FASTER_STRUCTS ");
1555 if (flags & MASK_FLAT)
1556 fprintf (stderr, "FLAT ");
1557 if (flags & MASK_FMAF)
1558 fprintf (stderr, "FMAF ");
1559 if (flags & MASK_FSMULD)
1560 fprintf (stderr, "FSMULD ");
1561 if (flags & MASK_FPU)
1562 fprintf (stderr, "FPU ");
1563 if (flags & MASK_HARD_QUAD)
1564 fprintf (stderr, "HARD_QUAD ");
1565 if (flags & MASK_POPC)
1566 fprintf (stderr, "POPC ");
1567 if (flags & MASK_PTR64)
1568 fprintf (stderr, "PTR64 ");
1569 if (flags & MASK_STACK_BIAS)
1570 fprintf (stderr, "STACK_BIAS ");
1571 if (flags & MASK_UNALIGNED_DOUBLES)
1572 fprintf (stderr, "UNALIGNED_DOUBLES ");
1573 if (flags & MASK_V8PLUS)
1574 fprintf (stderr, "V8PLUS ");
1575 if (flags & MASK_VIS)
1576 fprintf (stderr, "VIS ");
1577 if (flags & MASK_VIS2)
1578 fprintf (stderr, "VIS2 ");
1579 if (flags & MASK_VIS3)
1580 fprintf (stderr, "VIS3 ");
1581 if (flags & MASK_VIS4)
1582 fprintf (stderr, "VIS4 ");
1583 if (flags & MASK_VIS4B)
1584 fprintf (stderr, "VIS4B ");
1585 if (flags & MASK_CBCOND)
1586 fprintf (stderr, "CBCOND ");
1587 if (flags & MASK_DEPRECATED_V8_INSNS)
1588 fprintf (stderr, "DEPRECATED_V8_INSNS ");
1589 if (flags & MASK_SPARCLET)
1590 fprintf (stderr, "SPARCLET ");
1591 if (flags & MASK_SPARCLITE)
1592 fprintf (stderr, "SPARCLITE ");
1593 if (flags & MASK_V8)
1594 fprintf (stderr, "V8 ");
1595 if (flags & MASK_V9)
1596 fprintf (stderr, "V9 ");
1597 }
1598
1599 static void
dump_target_flags(const char * prefix,const int flags)1600 dump_target_flags (const char *prefix, const int flags)
1601 {
1602 fprintf (stderr, "%s: (%08x) [ ", prefix, flags);
1603 dump_target_flag_bits (flags);
1604 fprintf(stderr, "]\n");
1605 }
1606
1607 /* Validate and override various options, and do some machine dependent
1608 initialization. */
1609
1610 static void
sparc_option_override(void)1611 sparc_option_override (void)
1612 {
1613 static struct code_model {
1614 const char *const name;
1615 const enum cmodel value;
1616 } const cmodels[] = {
1617 { "32", CM_32 },
1618 { "medlow", CM_MEDLOW },
1619 { "medmid", CM_MEDMID },
1620 { "medany", CM_MEDANY },
1621 { "embmedany", CM_EMBMEDANY },
1622 { NULL, (enum cmodel) 0 }
1623 };
1624 const struct code_model *cmodel;
1625 /* Map TARGET_CPU_DEFAULT to value for -m{cpu,tune}=. */
1626 static struct cpu_default {
1627 const int cpu;
1628 const enum processor_type processor;
1629 } const cpu_default[] = {
1630 /* There must be one entry here for each TARGET_CPU value. */
1631 { TARGET_CPU_sparc, PROCESSOR_CYPRESS },
1632 { TARGET_CPU_v8, PROCESSOR_V8 },
1633 { TARGET_CPU_supersparc, PROCESSOR_SUPERSPARC },
1634 { TARGET_CPU_hypersparc, PROCESSOR_HYPERSPARC },
1635 { TARGET_CPU_leon, PROCESSOR_LEON },
1636 { TARGET_CPU_leon3, PROCESSOR_LEON3 },
1637 { TARGET_CPU_leon3v7, PROCESSOR_LEON3V7 },
1638 { TARGET_CPU_sparclite, PROCESSOR_F930 },
1639 { TARGET_CPU_sparclite86x, PROCESSOR_SPARCLITE86X },
1640 { TARGET_CPU_sparclet, PROCESSOR_TSC701 },
1641 { TARGET_CPU_v9, PROCESSOR_V9 },
1642 { TARGET_CPU_ultrasparc, PROCESSOR_ULTRASPARC },
1643 { TARGET_CPU_ultrasparc3, PROCESSOR_ULTRASPARC3 },
1644 { TARGET_CPU_niagara, PROCESSOR_NIAGARA },
1645 { TARGET_CPU_niagara2, PROCESSOR_NIAGARA2 },
1646 { TARGET_CPU_niagara3, PROCESSOR_NIAGARA3 },
1647 { TARGET_CPU_niagara4, PROCESSOR_NIAGARA4 },
1648 { TARGET_CPU_niagara7, PROCESSOR_NIAGARA7 },
1649 { TARGET_CPU_m8, PROCESSOR_M8 },
1650 { -1, PROCESSOR_V7 }
1651 };
1652 const struct cpu_default *def;
1653 /* Table of values for -m{cpu,tune}=. This must match the order of
1654 the enum processor_type in sparc-opts.h. */
1655 static struct cpu_table {
1656 const char *const name;
1657 const int disable;
1658 const int enable;
1659 } const cpu_table[] = {
1660 { "v7", MASK_ISA|MASK_FSMULD, 0 },
1661 { "cypress", MASK_ISA|MASK_FSMULD, 0 },
1662 { "v8", MASK_ISA, MASK_V8 },
1663 /* TI TMS390Z55 supersparc */
1664 { "supersparc", MASK_ISA, MASK_V8 },
1665 { "hypersparc", MASK_ISA, MASK_V8 },
1666 { "leon", MASK_ISA|MASK_FSMULD, MASK_V8|MASK_LEON },
1667 { "leon3", MASK_ISA, MASK_V8|MASK_LEON3 },
1668 { "leon3v7", MASK_ISA|MASK_FSMULD, MASK_LEON3 },
1669 { "sparclite", MASK_ISA|MASK_FSMULD, MASK_SPARCLITE },
1670 /* The Fujitsu MB86930 is the original sparclite chip, with no FPU. */
1671 { "f930", MASK_ISA|MASK_FPU, MASK_SPARCLITE },
1672 /* The Fujitsu MB86934 is the recent sparclite chip, with an FPU. */
1673 { "f934", MASK_ISA|MASK_FSMULD, MASK_SPARCLITE },
1674 { "sparclite86x", MASK_ISA|MASK_FPU, MASK_SPARCLITE },
1675 { "sparclet", MASK_ISA|MASK_FSMULD, MASK_SPARCLET },
1676 /* TEMIC sparclet */
1677 { "tsc701", MASK_ISA|MASK_FSMULD, MASK_SPARCLET },
1678 { "v9", MASK_ISA, MASK_V9 },
1679 /* UltraSPARC I, II, IIi */
1680 { "ultrasparc", MASK_ISA,
1681 /* Although insns using %y are deprecated, it is a clear win. */
1682 MASK_V9|MASK_DEPRECATED_V8_INSNS },
1683 /* UltraSPARC III */
1684 /* ??? Check if %y issue still holds true. */
1685 { "ultrasparc3", MASK_ISA,
1686 MASK_V9|MASK_DEPRECATED_V8_INSNS|MASK_VIS2 },
1687 /* UltraSPARC T1 */
1688 { "niagara", MASK_ISA,
1689 MASK_V9|MASK_DEPRECATED_V8_INSNS },
1690 /* UltraSPARC T2 */
1691 { "niagara2", MASK_ISA,
1692 MASK_V9|MASK_POPC|MASK_VIS2 },
1693 /* UltraSPARC T3 */
1694 { "niagara3", MASK_ISA,
1695 MASK_V9|MASK_POPC|MASK_VIS3|MASK_FMAF },
1696 /* UltraSPARC T4 */
1697 { "niagara4", MASK_ISA,
1698 MASK_V9|MASK_POPC|MASK_VIS3|MASK_FMAF|MASK_CBCOND },
1699 /* UltraSPARC M7 */
1700 { "niagara7", MASK_ISA,
1701 MASK_V9|MASK_POPC|MASK_VIS4|MASK_FMAF|MASK_CBCOND|MASK_SUBXC },
1702 /* UltraSPARC M8 */
1703 { "m8", MASK_ISA,
1704 MASK_V9|MASK_POPC|MASK_VIS4|MASK_FMAF|MASK_CBCOND|MASK_SUBXC|MASK_VIS4B }
1705 };
1706 const struct cpu_table *cpu;
1707 unsigned int i;
1708
1709 if (sparc_debug_string != NULL)
1710 {
1711 const char *q;
1712 char *p;
1713
1714 p = ASTRDUP (sparc_debug_string);
1715 while ((q = strtok (p, ",")) != NULL)
1716 {
1717 bool invert;
1718 int mask;
1719
1720 p = NULL;
1721 if (*q == '!')
1722 {
1723 invert = true;
1724 q++;
1725 }
1726 else
1727 invert = false;
1728
1729 if (! strcmp (q, "all"))
1730 mask = MASK_DEBUG_ALL;
1731 else if (! strcmp (q, "options"))
1732 mask = MASK_DEBUG_OPTIONS;
1733 else
1734 error ("unknown -mdebug-%s switch", q);
1735
1736 if (invert)
1737 sparc_debug &= ~mask;
1738 else
1739 sparc_debug |= mask;
1740 }
1741 }
1742
1743 /* Enable the FsMULd instruction by default if not explicitly specified by
1744 the user. It may be later disabled by the CPU (explicitly or not). */
1745 if (TARGET_FPU && !(target_flags_explicit & MASK_FSMULD))
1746 target_flags |= MASK_FSMULD;
1747
1748 if (TARGET_DEBUG_OPTIONS)
1749 {
1750 dump_target_flags("Initial target_flags", target_flags);
1751 dump_target_flags("target_flags_explicit", target_flags_explicit);
1752 }
1753
1754 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1755 SUBTARGET_OVERRIDE_OPTIONS;
1756 #endif
1757
1758 #ifndef SPARC_BI_ARCH
1759 /* Check for unsupported architecture size. */
1760 if (!TARGET_64BIT != DEFAULT_ARCH32_P)
1761 error ("%s is not supported by this configuration",
1762 DEFAULT_ARCH32_P ? "-m64" : "-m32");
1763 #endif
1764
1765 /* We force all 64bit archs to use 128 bit long double */
1766 if (TARGET_ARCH64 && !TARGET_LONG_DOUBLE_128)
1767 {
1768 error ("-mlong-double-64 not allowed with -m64");
1769 target_flags |= MASK_LONG_DOUBLE_128;
1770 }
1771
1772 /* Code model selection. */
1773 sparc_cmodel = SPARC_DEFAULT_CMODEL;
1774
1775 #ifdef SPARC_BI_ARCH
1776 if (TARGET_ARCH32)
1777 sparc_cmodel = CM_32;
1778 #endif
1779
1780 if (sparc_cmodel_string != NULL)
1781 {
1782 if (TARGET_ARCH64)
1783 {
1784 for (cmodel = &cmodels[0]; cmodel->name; cmodel++)
1785 if (strcmp (sparc_cmodel_string, cmodel->name) == 0)
1786 break;
1787 if (cmodel->name == NULL)
1788 error ("bad value (%s) for -mcmodel= switch", sparc_cmodel_string);
1789 else
1790 sparc_cmodel = cmodel->value;
1791 }
1792 else
1793 error ("-mcmodel= is not supported on 32-bit systems");
1794 }
1795
1796 /* Check that -fcall-saved-REG wasn't specified for out registers. */
1797 for (i = 8; i < 16; i++)
1798 if (!call_used_regs [i])
1799 {
1800 error ("-fcall-saved-REG is not supported for out registers");
1801 call_used_regs [i] = 1;
1802 }
1803
1804 /* Set the default CPU if no -mcpu option was specified. */
1805 if (!global_options_set.x_sparc_cpu_and_features)
1806 {
1807 for (def = &cpu_default[0]; def->cpu != -1; ++def)
1808 if (def->cpu == TARGET_CPU_DEFAULT)
1809 break;
1810 gcc_assert (def->cpu != -1);
1811 sparc_cpu_and_features = def->processor;
1812 }
1813
1814 /* Set the default CPU if no -mtune option was specified. */
1815 if (!global_options_set.x_sparc_cpu)
1816 sparc_cpu = sparc_cpu_and_features;
1817
1818 cpu = &cpu_table[(int) sparc_cpu_and_features];
1819
1820 if (TARGET_DEBUG_OPTIONS)
1821 {
1822 fprintf (stderr, "sparc_cpu_and_features: %s\n", cpu->name);
1823 dump_target_flags ("cpu->disable", cpu->disable);
1824 dump_target_flags ("cpu->enable", cpu->enable);
1825 }
1826
1827 target_flags &= ~cpu->disable;
1828 target_flags |= (cpu->enable
1829 #ifndef HAVE_AS_FMAF_HPC_VIS3
1830 & ~(MASK_FMAF | MASK_VIS3)
1831 #endif
1832 #ifndef HAVE_AS_SPARC4
1833 & ~MASK_CBCOND
1834 #endif
1835 #ifndef HAVE_AS_SPARC5_VIS4
1836 & ~(MASK_VIS4 | MASK_SUBXC)
1837 #endif
1838 #ifndef HAVE_AS_SPARC6
1839 & ~(MASK_VIS4B)
1840 #endif
1841 #ifndef HAVE_AS_LEON
1842 & ~(MASK_LEON | MASK_LEON3)
1843 #endif
1844 & ~(target_flags_explicit & MASK_FEATURES)
1845 );
1846
1847 /* -mvis2 implies -mvis. */
1848 if (TARGET_VIS2)
1849 target_flags |= MASK_VIS;
1850
1851 /* -mvis3 implies -mvis2 and -mvis. */
1852 if (TARGET_VIS3)
1853 target_flags |= MASK_VIS2 | MASK_VIS;
1854
1855 /* -mvis4 implies -mvis3, -mvis2 and -mvis. */
1856 if (TARGET_VIS4)
1857 target_flags |= MASK_VIS3 | MASK_VIS2 | MASK_VIS;
1858
1859 /* -mvis4b implies -mvis4, -mvis3, -mvis2 and -mvis */
1860 if (TARGET_VIS4B)
1861 target_flags |= MASK_VIS4 | MASK_VIS3 | MASK_VIS2 | MASK_VIS;
1862
1863 /* Don't allow -mvis, -mvis2, -mvis3, -mvis4, -mvis4b, -mfmaf and -mfsmuld if
1864 FPU is disabled. */
1865 if (!TARGET_FPU)
1866 target_flags &= ~(MASK_VIS | MASK_VIS2 | MASK_VIS3 | MASK_VIS4
1867 | MASK_VIS4B | MASK_FMAF | MASK_FSMULD);
1868
1869 /* -mvis assumes UltraSPARC+, so we are sure v9 instructions
1870 are available; -m64 also implies v9. */
1871 if (TARGET_VIS || TARGET_ARCH64)
1872 {
1873 target_flags |= MASK_V9;
1874 target_flags &= ~(MASK_V8 | MASK_SPARCLET | MASK_SPARCLITE);
1875 }
1876
1877 /* -mvis also implies -mv8plus on 32-bit. */
1878 if (TARGET_VIS && !TARGET_ARCH64)
1879 target_flags |= MASK_V8PLUS;
1880
1881 /* Use the deprecated v8 insns for sparc64 in 32-bit mode. */
1882 if (TARGET_V9 && TARGET_ARCH32)
1883 target_flags |= MASK_DEPRECATED_V8_INSNS;
1884
1885 /* V8PLUS requires V9 and makes no sense in 64-bit mode. */
1886 if (!TARGET_V9 || TARGET_ARCH64)
1887 target_flags &= ~MASK_V8PLUS;
1888
1889 /* Don't use stack biasing in 32-bit mode. */
1890 if (TARGET_ARCH32)
1891 target_flags &= ~MASK_STACK_BIAS;
1892
1893 /* Use LRA instead of reload, unless otherwise instructed. */
1894 if (!(target_flags_explicit & MASK_LRA))
1895 target_flags |= MASK_LRA;
1896
1897 /* Enable applicable errata workarounds for LEON3FT. */
1898 if (sparc_fix_ut699 || sparc_fix_ut700 || sparc_fix_gr712rc)
1899 {
1900 sparc_fix_b2bst = 1;
1901 sparc_fix_lost_divsqrt = 1;
1902 }
1903
1904 /* Disable FsMULd for the UT699 since it doesn't work correctly. */
1905 if (sparc_fix_ut699)
1906 target_flags &= ~MASK_FSMULD;
1907
1908 /* Supply a default value for align_functions. */
1909 if (align_functions == 0)
1910 {
1911 if (sparc_cpu == PROCESSOR_ULTRASPARC
1912 || sparc_cpu == PROCESSOR_ULTRASPARC3
1913 || sparc_cpu == PROCESSOR_NIAGARA
1914 || sparc_cpu == PROCESSOR_NIAGARA2
1915 || sparc_cpu == PROCESSOR_NIAGARA3
1916 || sparc_cpu == PROCESSOR_NIAGARA4)
1917 align_functions = 32;
1918 else if (sparc_cpu == PROCESSOR_NIAGARA7
1919 || sparc_cpu == PROCESSOR_M8)
1920 align_functions = 64;
1921 }
1922
1923 /* Validate PCC_STRUCT_RETURN. */
1924 if (flag_pcc_struct_return == DEFAULT_PCC_STRUCT_RETURN)
1925 flag_pcc_struct_return = (TARGET_ARCH64 ? 0 : 1);
1926
1927 /* Only use .uaxword when compiling for a 64-bit target. */
1928 if (!TARGET_ARCH64)
1929 targetm.asm_out.unaligned_op.di = NULL;
1930
1931 /* Do various machine dependent initializations. */
1932 sparc_init_modes ();
1933
1934 /* Set up function hooks. */
1935 init_machine_status = sparc_init_machine_status;
1936
1937 switch (sparc_cpu)
1938 {
1939 case PROCESSOR_V7:
1940 case PROCESSOR_CYPRESS:
1941 sparc_costs = &cypress_costs;
1942 break;
1943 case PROCESSOR_V8:
1944 case PROCESSOR_SPARCLITE:
1945 case PROCESSOR_SUPERSPARC:
1946 sparc_costs = &supersparc_costs;
1947 break;
1948 case PROCESSOR_F930:
1949 case PROCESSOR_F934:
1950 case PROCESSOR_HYPERSPARC:
1951 case PROCESSOR_SPARCLITE86X:
1952 sparc_costs = &hypersparc_costs;
1953 break;
1954 case PROCESSOR_LEON:
1955 sparc_costs = &leon_costs;
1956 break;
1957 case PROCESSOR_LEON3:
1958 case PROCESSOR_LEON3V7:
1959 sparc_costs = &leon3_costs;
1960 break;
1961 case PROCESSOR_SPARCLET:
1962 case PROCESSOR_TSC701:
1963 sparc_costs = &sparclet_costs;
1964 break;
1965 case PROCESSOR_V9:
1966 case PROCESSOR_ULTRASPARC:
1967 sparc_costs = &ultrasparc_costs;
1968 break;
1969 case PROCESSOR_ULTRASPARC3:
1970 sparc_costs = &ultrasparc3_costs;
1971 break;
1972 case PROCESSOR_NIAGARA:
1973 sparc_costs = &niagara_costs;
1974 break;
1975 case PROCESSOR_NIAGARA2:
1976 sparc_costs = &niagara2_costs;
1977 break;
1978 case PROCESSOR_NIAGARA3:
1979 sparc_costs = &niagara3_costs;
1980 break;
1981 case PROCESSOR_NIAGARA4:
1982 sparc_costs = &niagara4_costs;
1983 break;
1984 case PROCESSOR_NIAGARA7:
1985 sparc_costs = &niagara7_costs;
1986 break;
1987 case PROCESSOR_M8:
1988 sparc_costs = &m8_costs;
1989 break;
1990 case PROCESSOR_NATIVE:
1991 gcc_unreachable ();
1992 };
1993
1994 if (sparc_memory_model == SMM_DEFAULT)
1995 {
1996 /* Choose the memory model for the operating system. */
1997 enum sparc_memory_model_type os_default = SUBTARGET_DEFAULT_MEMORY_MODEL;
1998 if (os_default != SMM_DEFAULT)
1999 sparc_memory_model = os_default;
2000 /* Choose the most relaxed model for the processor. */
2001 else if (TARGET_V9)
2002 sparc_memory_model = SMM_RMO;
2003 else if (TARGET_LEON3)
2004 sparc_memory_model = SMM_TSO;
2005 else if (TARGET_LEON)
2006 sparc_memory_model = SMM_SC;
2007 else if (TARGET_V8)
2008 sparc_memory_model = SMM_PSO;
2009 else
2010 sparc_memory_model = SMM_SC;
2011 }
2012
2013 #ifdef TARGET_DEFAULT_LONG_DOUBLE_128
2014 if (!(target_flags_explicit & MASK_LONG_DOUBLE_128))
2015 target_flags |= MASK_LONG_DOUBLE_128;
2016 #endif
2017
2018 if (TARGET_DEBUG_OPTIONS)
2019 dump_target_flags ("Final target_flags", target_flags);
2020
2021 /* PARAM_SIMULTANEOUS_PREFETCHES is the number of prefetches that
2022 can run at the same time. More important, it is the threshold
2023 defining when additional prefetches will be dropped by the
2024 hardware.
2025
2026 The UltraSPARC-III features a documented prefetch queue with a
2027 size of 8. Additional prefetches issued in the cpu are
2028 dropped.
2029
2030 Niagara processors are different. In these processors prefetches
2031 are handled much like regular loads. The L1 miss buffer is 32
2032 entries, but prefetches start getting affected when 30 entries
2033 become occupied. That occupation could be a mix of regular loads
2034 and prefetches though. And that buffer is shared by all threads.
2035 Once the threshold is reached, if the core is running a single
2036 thread the prefetch will retry. If more than one thread is
2037 running, the prefetch will be dropped.
2038
2039 All this makes it very difficult to determine how many
2040 simultaneous prefetches can be issued simultaneously, even in a
2041 single-threaded program. Experimental results show that setting
2042 this parameter to 32 works well when the number of threads is not
2043 high. */
2044 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
2045 ((sparc_cpu == PROCESSOR_ULTRASPARC
2046 || sparc_cpu == PROCESSOR_NIAGARA
2047 || sparc_cpu == PROCESSOR_NIAGARA2
2048 || sparc_cpu == PROCESSOR_NIAGARA3
2049 || sparc_cpu == PROCESSOR_NIAGARA4)
2050 ? 2
2051 : (sparc_cpu == PROCESSOR_ULTRASPARC3
2052 ? 8 : ((sparc_cpu == PROCESSOR_NIAGARA7
2053 || sparc_cpu == PROCESSOR_M8)
2054 ? 32 : 3))),
2055 global_options.x_param_values,
2056 global_options_set.x_param_values);
2057
2058 /* PARAM_L1_CACHE_LINE_SIZE is the size of the L1 cache line, in
2059 bytes.
2060
2061 The Oracle SPARC Architecture (previously the UltraSPARC
2062 Architecture) specification states that when a PREFETCH[A]
2063 instruction is executed an implementation-specific amount of data
2064 is prefetched, and that it is at least 64 bytes long (aligned to
2065 at least 64 bytes).
2066
2067 However, this is not correct. The M7 (and implementations prior
2068 to that) does not guarantee a 64B prefetch into a cache if the
2069 line size is smaller. A single cache line is all that is ever
2070 prefetched. So for the M7, where the L1D$ has 32B lines and the
2071 L2D$ and L3 have 64B lines, a prefetch will prefetch 64B into the
2072 L2 and L3, but only 32B are brought into the L1D$. (Assuming it
2073 is a read_n prefetch, which is the only type which allocates to
2074 the L1.) */
2075 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
2076 (sparc_cpu == PROCESSOR_M8
2077 ? 64 : 32),
2078 global_options.x_param_values,
2079 global_options_set.x_param_values);
2080
2081 /* PARAM_L1_CACHE_SIZE is the size of the L1D$ (most SPARC chips use
2082 Hardvard level-1 caches) in kilobytes. Both UltraSPARC and
2083 Niagara processors feature a L1D$ of 16KB. */
2084 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
2085 ((sparc_cpu == PROCESSOR_ULTRASPARC
2086 || sparc_cpu == PROCESSOR_ULTRASPARC3
2087 || sparc_cpu == PROCESSOR_NIAGARA
2088 || sparc_cpu == PROCESSOR_NIAGARA2
2089 || sparc_cpu == PROCESSOR_NIAGARA3
2090 || sparc_cpu == PROCESSOR_NIAGARA4
2091 || sparc_cpu == PROCESSOR_NIAGARA7
2092 || sparc_cpu == PROCESSOR_M8)
2093 ? 16 : 64),
2094 global_options.x_param_values,
2095 global_options_set.x_param_values);
2096
2097
2098 /* PARAM_L2_CACHE_SIZE is the size fo the L2 in kilobytes. Note
2099 that 512 is the default in params.def. */
2100 maybe_set_param_value (PARAM_L2_CACHE_SIZE,
2101 ((sparc_cpu == PROCESSOR_NIAGARA4
2102 || sparc_cpu == PROCESSOR_M8)
2103 ? 128 : (sparc_cpu == PROCESSOR_NIAGARA7
2104 ? 256 : 512)),
2105 global_options.x_param_values,
2106 global_options_set.x_param_values);
2107
2108
2109 /* Disable save slot sharing for call-clobbered registers by default.
2110 The IRA sharing algorithm works on single registers only and this
2111 pessimizes for double floating-point registers. */
2112 if (!global_options_set.x_flag_ira_share_save_slots)
2113 flag_ira_share_save_slots = 0;
2114
2115 /* Only enable REE by default in 64-bit mode where it helps to eliminate
2116 redundant 32-to-64-bit extensions. */
2117 if (!global_options_set.x_flag_ree && TARGET_ARCH32)
2118 flag_ree = 0;
2119 }
2120
2121 /* Miscellaneous utilities. */
2122
2123 /* Nonzero if CODE, a comparison, is suitable for use in v9 conditional move
2124 or branch on register contents instructions. */
2125
2126 int
v9_regcmp_p(enum rtx_code code)2127 v9_regcmp_p (enum rtx_code code)
2128 {
2129 return (code == EQ || code == NE || code == GE || code == LT
2130 || code == LE || code == GT);
2131 }
2132
2133 /* Nonzero if OP is a floating point constant which can
2134 be loaded into an integer register using a single
2135 sethi instruction. */
2136
2137 int
fp_sethi_p(rtx op)2138 fp_sethi_p (rtx op)
2139 {
2140 if (GET_CODE (op) == CONST_DOUBLE)
2141 {
2142 long i;
2143
2144 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), i);
2145 return !SPARC_SIMM13_P (i) && SPARC_SETHI_P (i);
2146 }
2147
2148 return 0;
2149 }
2150
2151 /* Nonzero if OP is a floating point constant which can
2152 be loaded into an integer register using a single
2153 mov instruction. */
2154
2155 int
fp_mov_p(rtx op)2156 fp_mov_p (rtx op)
2157 {
2158 if (GET_CODE (op) == CONST_DOUBLE)
2159 {
2160 long i;
2161
2162 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), i);
2163 return SPARC_SIMM13_P (i);
2164 }
2165
2166 return 0;
2167 }
2168
2169 /* Nonzero if OP is a floating point constant which can
2170 be loaded into an integer register using a high/losum
2171 instruction sequence. */
2172
2173 int
fp_high_losum_p(rtx op)2174 fp_high_losum_p (rtx op)
2175 {
2176 /* The constraints calling this should only be in
2177 SFmode move insns, so any constant which cannot
2178 be moved using a single insn will do. */
2179 if (GET_CODE (op) == CONST_DOUBLE)
2180 {
2181 long i;
2182
2183 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), i);
2184 return !SPARC_SIMM13_P (i) && !SPARC_SETHI_P (i);
2185 }
2186
2187 return 0;
2188 }
2189
2190 /* Return true if the address of LABEL can be loaded by means of the
2191 mov{si,di}_pic_label_ref patterns in PIC mode. */
2192
2193 static bool
can_use_mov_pic_label_ref(rtx label)2194 can_use_mov_pic_label_ref (rtx label)
2195 {
2196 /* VxWorks does not impose a fixed gap between segments; the run-time
2197 gap can be different from the object-file gap. We therefore can't
2198 assume X - _GLOBAL_OFFSET_TABLE_ is a link-time constant unless we
2199 are absolutely sure that X is in the same segment as the GOT.
2200 Unfortunately, the flexibility of linker scripts means that we
2201 can't be sure of that in general, so assume that GOT-relative
2202 accesses are never valid on VxWorks. */
2203 if (TARGET_VXWORKS_RTP)
2204 return false;
2205
2206 /* Similarly, if the label is non-local, it might end up being placed
2207 in a different section than the current one; now mov_pic_label_ref
2208 requires the label and the code to be in the same section. */
2209 if (LABEL_REF_NONLOCAL_P (label))
2210 return false;
2211
2212 /* Finally, if we are reordering basic blocks and partition into hot
2213 and cold sections, this might happen for any label. */
2214 if (flag_reorder_blocks_and_partition)
2215 return false;
2216
2217 return true;
2218 }
2219
2220 /* Expand a move instruction. Return true if all work is done. */
2221
2222 bool
sparc_expand_move(machine_mode mode,rtx * operands)2223 sparc_expand_move (machine_mode mode, rtx *operands)
2224 {
2225 /* Handle sets of MEM first. */
2226 if (GET_CODE (operands[0]) == MEM)
2227 {
2228 /* 0 is a register (or a pair of registers) on SPARC. */
2229 if (register_or_zero_operand (operands[1], mode))
2230 return false;
2231
2232 if (!reload_in_progress)
2233 {
2234 operands[0] = validize_mem (operands[0]);
2235 operands[1] = force_reg (mode, operands[1]);
2236 }
2237 }
2238
2239 /* Fix up TLS cases. */
2240 if (TARGET_HAVE_TLS
2241 && CONSTANT_P (operands[1])
2242 && sparc_tls_referenced_p (operands [1]))
2243 {
2244 operands[1] = sparc_legitimize_tls_address (operands[1]);
2245 return false;
2246 }
2247
2248 /* Fix up PIC cases. */
2249 if (flag_pic && CONSTANT_P (operands[1]))
2250 {
2251 if (pic_address_needs_scratch (operands[1]))
2252 operands[1] = sparc_legitimize_pic_address (operands[1], NULL_RTX);
2253
2254 /* We cannot use the mov{si,di}_pic_label_ref patterns in all cases. */
2255 if ((GET_CODE (operands[1]) == LABEL_REF
2256 && can_use_mov_pic_label_ref (operands[1]))
2257 || (GET_CODE (operands[1]) == CONST
2258 && GET_CODE (XEXP (operands[1], 0)) == PLUS
2259 && GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == LABEL_REF
2260 && GET_CODE (XEXP (XEXP (operands[1], 0), 1)) == CONST_INT
2261 && can_use_mov_pic_label_ref (XEXP (XEXP (operands[1], 0), 0))))
2262 {
2263 if (mode == SImode)
2264 {
2265 emit_insn (gen_movsi_pic_label_ref (operands[0], operands[1]));
2266 return true;
2267 }
2268
2269 if (mode == DImode)
2270 {
2271 emit_insn (gen_movdi_pic_label_ref (operands[0], operands[1]));
2272 return true;
2273 }
2274 }
2275
2276 if (symbolic_operand (operands[1], mode))
2277 {
2278 operands[1]
2279 = sparc_legitimize_pic_address (operands[1],
2280 reload_in_progress
2281 ? operands[0] : NULL_RTX);
2282 return false;
2283 }
2284 }
2285
2286 /* If we are trying to toss an integer constant into FP registers,
2287 or loading a FP or vector constant, force it into memory. */
2288 if (CONSTANT_P (operands[1])
2289 && REG_P (operands[0])
2290 && (SPARC_FP_REG_P (REGNO (operands[0]))
2291 || SCALAR_FLOAT_MODE_P (mode)
2292 || VECTOR_MODE_P (mode)))
2293 {
2294 /* emit_group_store will send such bogosity to us when it is
2295 not storing directly into memory. So fix this up to avoid
2296 crashes in output_constant_pool. */
2297 if (operands [1] == const0_rtx)
2298 operands[1] = CONST0_RTX (mode);
2299
2300 /* We can clear or set to all-ones FP registers if TARGET_VIS, and
2301 always other regs. */
2302 if ((TARGET_VIS || REGNO (operands[0]) < SPARC_FIRST_FP_REG)
2303 && (const_zero_operand (operands[1], mode)
2304 || const_all_ones_operand (operands[1], mode)))
2305 return false;
2306
2307 if (REGNO (operands[0]) < SPARC_FIRST_FP_REG
2308 /* We are able to build any SF constant in integer registers
2309 with at most 2 instructions. */
2310 && (mode == SFmode
2311 /* And any DF constant in integer registers if needed. */
2312 || (mode == DFmode && !can_create_pseudo_p ())))
2313 return false;
2314
2315 operands[1] = force_const_mem (mode, operands[1]);
2316 if (!reload_in_progress)
2317 operands[1] = validize_mem (operands[1]);
2318 return false;
2319 }
2320
2321 /* Accept non-constants and valid constants unmodified. */
2322 if (!CONSTANT_P (operands[1])
2323 || GET_CODE (operands[1]) == HIGH
2324 || input_operand (operands[1], mode))
2325 return false;
2326
2327 switch (mode)
2328 {
2329 case E_QImode:
2330 /* All QImode constants require only one insn, so proceed. */
2331 break;
2332
2333 case E_HImode:
2334 case E_SImode:
2335 sparc_emit_set_const32 (operands[0], operands[1]);
2336 return true;
2337
2338 case E_DImode:
2339 /* input_operand should have filtered out 32-bit mode. */
2340 sparc_emit_set_const64 (operands[0], operands[1]);
2341 return true;
2342
2343 case E_TImode:
2344 {
2345 rtx high, low;
2346 /* TImode isn't available in 32-bit mode. */
2347 split_double (operands[1], &high, &low);
2348 emit_insn (gen_movdi (operand_subword (operands[0], 0, 0, TImode),
2349 high));
2350 emit_insn (gen_movdi (operand_subword (operands[0], 1, 0, TImode),
2351 low));
2352 }
2353 return true;
2354
2355 default:
2356 gcc_unreachable ();
2357 }
2358
2359 return false;
2360 }
2361
2362 /* Load OP1, a 32-bit constant, into OP0, a register.
2363 We know it can't be done in one insn when we get
2364 here, the move expander guarantees this. */
2365
2366 static void
sparc_emit_set_const32(rtx op0,rtx op1)2367 sparc_emit_set_const32 (rtx op0, rtx op1)
2368 {
2369 machine_mode mode = GET_MODE (op0);
2370 rtx temp = op0;
2371
2372 if (can_create_pseudo_p ())
2373 temp = gen_reg_rtx (mode);
2374
2375 if (GET_CODE (op1) == CONST_INT)
2376 {
2377 gcc_assert (!small_int_operand (op1, mode)
2378 && !const_high_operand (op1, mode));
2379
2380 /* Emit them as real moves instead of a HIGH/LO_SUM,
2381 this way CSE can see everything and reuse intermediate
2382 values if it wants. */
2383 emit_insn (gen_rtx_SET (temp, GEN_INT (INTVAL (op1)
2384 & ~(HOST_WIDE_INT) 0x3ff)));
2385
2386 emit_insn (gen_rtx_SET (op0,
2387 gen_rtx_IOR (mode, temp,
2388 GEN_INT (INTVAL (op1) & 0x3ff))));
2389 }
2390 else
2391 {
2392 /* A symbol, emit in the traditional way. */
2393 emit_insn (gen_rtx_SET (temp, gen_rtx_HIGH (mode, op1)));
2394 emit_insn (gen_rtx_SET (op0, gen_rtx_LO_SUM (mode, temp, op1)));
2395 }
2396 }
2397
2398 /* Load OP1, a symbolic 64-bit constant, into OP0, a DImode register.
2399 If TEMP is nonzero, we are forbidden to use any other scratch
2400 registers. Otherwise, we are allowed to generate them as needed.
2401
2402 Note that TEMP may have TImode if the code model is TARGET_CM_MEDANY
2403 or TARGET_CM_EMBMEDANY (see the reload_indi and reload_outdi patterns). */
2404
2405 void
sparc_emit_set_symbolic_const64(rtx op0,rtx op1,rtx temp)2406 sparc_emit_set_symbolic_const64 (rtx op0, rtx op1, rtx temp)
2407 {
2408 rtx cst, temp1, temp2, temp3, temp4, temp5;
2409 rtx ti_temp = 0;
2410
2411 /* Deal with too large offsets. */
2412 if (GET_CODE (op1) == CONST
2413 && GET_CODE (XEXP (op1, 0)) == PLUS
2414 && CONST_INT_P (cst = XEXP (XEXP (op1, 0), 1))
2415 && trunc_int_for_mode (INTVAL (cst), SImode) != INTVAL (cst))
2416 {
2417 gcc_assert (!temp);
2418 temp1 = gen_reg_rtx (DImode);
2419 temp2 = gen_reg_rtx (DImode);
2420 sparc_emit_set_const64 (temp2, cst);
2421 sparc_emit_set_symbolic_const64 (temp1, XEXP (XEXP (op1, 0), 0),
2422 NULL_RTX);
2423 emit_insn (gen_rtx_SET (op0, gen_rtx_PLUS (DImode, temp1, temp2)));
2424 return;
2425 }
2426
2427 if (temp && GET_MODE (temp) == TImode)
2428 {
2429 ti_temp = temp;
2430 temp = gen_rtx_REG (DImode, REGNO (temp));
2431 }
2432
2433 /* SPARC-V9 code-model support. */
2434 switch (sparc_cmodel)
2435 {
2436 case CM_MEDLOW:
2437 /* The range spanned by all instructions in the object is less
2438 than 2^31 bytes (2GB) and the distance from any instruction
2439 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
2440 than 2^31 bytes (2GB).
2441
2442 The executable must be in the low 4TB of the virtual address
2443 space.
2444
2445 sethi %hi(symbol), %temp1
2446 or %temp1, %lo(symbol), %reg */
2447 if (temp)
2448 temp1 = temp; /* op0 is allowed. */
2449 else
2450 temp1 = gen_reg_rtx (DImode);
2451
2452 emit_insn (gen_rtx_SET (temp1, gen_rtx_HIGH (DImode, op1)));
2453 emit_insn (gen_rtx_SET (op0, gen_rtx_LO_SUM (DImode, temp1, op1)));
2454 break;
2455
2456 case CM_MEDMID:
2457 /* The range spanned by all instructions in the object is less
2458 than 2^31 bytes (2GB) and the distance from any instruction
2459 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
2460 than 2^31 bytes (2GB).
2461
2462 The executable must be in the low 16TB of the virtual address
2463 space.
2464
2465 sethi %h44(symbol), %temp1
2466 or %temp1, %m44(symbol), %temp2
2467 sllx %temp2, 12, %temp3
2468 or %temp3, %l44(symbol), %reg */
2469 if (temp)
2470 {
2471 temp1 = op0;
2472 temp2 = op0;
2473 temp3 = temp; /* op0 is allowed. */
2474 }
2475 else
2476 {
2477 temp1 = gen_reg_rtx (DImode);
2478 temp2 = gen_reg_rtx (DImode);
2479 temp3 = gen_reg_rtx (DImode);
2480 }
2481
2482 emit_insn (gen_seth44 (temp1, op1));
2483 emit_insn (gen_setm44 (temp2, temp1, op1));
2484 emit_insn (gen_rtx_SET (temp3,
2485 gen_rtx_ASHIFT (DImode, temp2, GEN_INT (12))));
2486 emit_insn (gen_setl44 (op0, temp3, op1));
2487 break;
2488
2489 case CM_MEDANY:
2490 /* The range spanned by all instructions in the object is less
2491 than 2^31 bytes (2GB) and the distance from any instruction
2492 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
2493 than 2^31 bytes (2GB).
2494
2495 The executable can be placed anywhere in the virtual address
2496 space.
2497
2498 sethi %hh(symbol), %temp1
2499 sethi %lm(symbol), %temp2
2500 or %temp1, %hm(symbol), %temp3
2501 sllx %temp3, 32, %temp4
2502 or %temp4, %temp2, %temp5
2503 or %temp5, %lo(symbol), %reg */
2504 if (temp)
2505 {
2506 /* It is possible that one of the registers we got for operands[2]
2507 might coincide with that of operands[0] (which is why we made
2508 it TImode). Pick the other one to use as our scratch. */
2509 if (rtx_equal_p (temp, op0))
2510 {
2511 gcc_assert (ti_temp);
2512 temp = gen_rtx_REG (DImode, REGNO (temp) + 1);
2513 }
2514 temp1 = op0;
2515 temp2 = temp; /* op0 is _not_ allowed, see above. */
2516 temp3 = op0;
2517 temp4 = op0;
2518 temp5 = op0;
2519 }
2520 else
2521 {
2522 temp1 = gen_reg_rtx (DImode);
2523 temp2 = gen_reg_rtx (DImode);
2524 temp3 = gen_reg_rtx (DImode);
2525 temp4 = gen_reg_rtx (DImode);
2526 temp5 = gen_reg_rtx (DImode);
2527 }
2528
2529 emit_insn (gen_sethh (temp1, op1));
2530 emit_insn (gen_setlm (temp2, op1));
2531 emit_insn (gen_sethm (temp3, temp1, op1));
2532 emit_insn (gen_rtx_SET (temp4,
2533 gen_rtx_ASHIFT (DImode, temp3, GEN_INT (32))));
2534 emit_insn (gen_rtx_SET (temp5, gen_rtx_PLUS (DImode, temp4, temp2)));
2535 emit_insn (gen_setlo (op0, temp5, op1));
2536 break;
2537
2538 case CM_EMBMEDANY:
2539 /* Old old old backwards compatibility kruft here.
2540 Essentially it is MEDLOW with a fixed 64-bit
2541 virtual base added to all data segment addresses.
2542 Text-segment stuff is computed like MEDANY, we can't
2543 reuse the code above because the relocation knobs
2544 look different.
2545
2546 Data segment: sethi %hi(symbol), %temp1
2547 add %temp1, EMBMEDANY_BASE_REG, %temp2
2548 or %temp2, %lo(symbol), %reg */
2549 if (data_segment_operand (op1, GET_MODE (op1)))
2550 {
2551 if (temp)
2552 {
2553 temp1 = temp; /* op0 is allowed. */
2554 temp2 = op0;
2555 }
2556 else
2557 {
2558 temp1 = gen_reg_rtx (DImode);
2559 temp2 = gen_reg_rtx (DImode);
2560 }
2561
2562 emit_insn (gen_embmedany_sethi (temp1, op1));
2563 emit_insn (gen_embmedany_brsum (temp2, temp1));
2564 emit_insn (gen_embmedany_losum (op0, temp2, op1));
2565 }
2566
2567 /* Text segment: sethi %uhi(symbol), %temp1
2568 sethi %hi(symbol), %temp2
2569 or %temp1, %ulo(symbol), %temp3
2570 sllx %temp3, 32, %temp4
2571 or %temp4, %temp2, %temp5
2572 or %temp5, %lo(symbol), %reg */
2573 else
2574 {
2575 if (temp)
2576 {
2577 /* It is possible that one of the registers we got for operands[2]
2578 might coincide with that of operands[0] (which is why we made
2579 it TImode). Pick the other one to use as our scratch. */
2580 if (rtx_equal_p (temp, op0))
2581 {
2582 gcc_assert (ti_temp);
2583 temp = gen_rtx_REG (DImode, REGNO (temp) + 1);
2584 }
2585 temp1 = op0;
2586 temp2 = temp; /* op0 is _not_ allowed, see above. */
2587 temp3 = op0;
2588 temp4 = op0;
2589 temp5 = op0;
2590 }
2591 else
2592 {
2593 temp1 = gen_reg_rtx (DImode);
2594 temp2 = gen_reg_rtx (DImode);
2595 temp3 = gen_reg_rtx (DImode);
2596 temp4 = gen_reg_rtx (DImode);
2597 temp5 = gen_reg_rtx (DImode);
2598 }
2599
2600 emit_insn (gen_embmedany_textuhi (temp1, op1));
2601 emit_insn (gen_embmedany_texthi (temp2, op1));
2602 emit_insn (gen_embmedany_textulo (temp3, temp1, op1));
2603 emit_insn (gen_rtx_SET (temp4,
2604 gen_rtx_ASHIFT (DImode, temp3, GEN_INT (32))));
2605 emit_insn (gen_rtx_SET (temp5, gen_rtx_PLUS (DImode, temp4, temp2)));
2606 emit_insn (gen_embmedany_textlo (op0, temp5, op1));
2607 }
2608 break;
2609
2610 default:
2611 gcc_unreachable ();
2612 }
2613 }
2614
2615 /* These avoid problems when cross compiling. If we do not
2616 go through all this hair then the optimizer will see
2617 invalid REG_EQUAL notes or in some cases none at all. */
2618 static rtx gen_safe_HIGH64 (rtx, HOST_WIDE_INT);
2619 static rtx gen_safe_SET64 (rtx, HOST_WIDE_INT);
2620 static rtx gen_safe_OR64 (rtx, HOST_WIDE_INT);
2621 static rtx gen_safe_XOR64 (rtx, HOST_WIDE_INT);
2622
2623 /* The optimizer is not to assume anything about exactly
2624 which bits are set for a HIGH, they are unspecified.
2625 Unfortunately this leads to many missed optimizations
2626 during CSE. We mask out the non-HIGH bits, and matches
2627 a plain movdi, to alleviate this problem. */
2628 static rtx
gen_safe_HIGH64(rtx dest,HOST_WIDE_INT val)2629 gen_safe_HIGH64 (rtx dest, HOST_WIDE_INT val)
2630 {
2631 return gen_rtx_SET (dest, GEN_INT (val & ~(HOST_WIDE_INT)0x3ff));
2632 }
2633
2634 static rtx
gen_safe_SET64(rtx dest,HOST_WIDE_INT val)2635 gen_safe_SET64 (rtx dest, HOST_WIDE_INT val)
2636 {
2637 return gen_rtx_SET (dest, GEN_INT (val));
2638 }
2639
2640 static rtx
gen_safe_OR64(rtx src,HOST_WIDE_INT val)2641 gen_safe_OR64 (rtx src, HOST_WIDE_INT val)
2642 {
2643 return gen_rtx_IOR (DImode, src, GEN_INT (val));
2644 }
2645
2646 static rtx
gen_safe_XOR64(rtx src,HOST_WIDE_INT val)2647 gen_safe_XOR64 (rtx src, HOST_WIDE_INT val)
2648 {
2649 return gen_rtx_XOR (DImode, src, GEN_INT (val));
2650 }
2651
2652 /* Worker routines for 64-bit constant formation on arch64.
2653 One of the key things to be doing in these emissions is
2654 to create as many temp REGs as possible. This makes it
2655 possible for half-built constants to be used later when
2656 such values are similar to something required later on.
2657 Without doing this, the optimizer cannot see such
2658 opportunities. */
2659
2660 static void sparc_emit_set_const64_quick1 (rtx, rtx,
2661 unsigned HOST_WIDE_INT, int);
2662
2663 static void
sparc_emit_set_const64_quick1(rtx op0,rtx temp,unsigned HOST_WIDE_INT low_bits,int is_neg)2664 sparc_emit_set_const64_quick1 (rtx op0, rtx temp,
2665 unsigned HOST_WIDE_INT low_bits, int is_neg)
2666 {
2667 unsigned HOST_WIDE_INT high_bits;
2668
2669 if (is_neg)
2670 high_bits = (~low_bits) & 0xffffffff;
2671 else
2672 high_bits = low_bits;
2673
2674 emit_insn (gen_safe_HIGH64 (temp, high_bits));
2675 if (!is_neg)
2676 {
2677 emit_insn (gen_rtx_SET (op0, gen_safe_OR64 (temp, (high_bits & 0x3ff))));
2678 }
2679 else
2680 {
2681 /* If we are XOR'ing with -1, then we should emit a one's complement
2682 instead. This way the combiner will notice logical operations
2683 such as ANDN later on and substitute. */
2684 if ((low_bits & 0x3ff) == 0x3ff)
2685 {
2686 emit_insn (gen_rtx_SET (op0, gen_rtx_NOT (DImode, temp)));
2687 }
2688 else
2689 {
2690 emit_insn (gen_rtx_SET (op0,
2691 gen_safe_XOR64 (temp,
2692 (-(HOST_WIDE_INT)0x400
2693 | (low_bits & 0x3ff)))));
2694 }
2695 }
2696 }
2697
2698 static void sparc_emit_set_const64_quick2 (rtx, rtx, unsigned HOST_WIDE_INT,
2699 unsigned HOST_WIDE_INT, int);
2700
2701 static void
sparc_emit_set_const64_quick2(rtx op0,rtx temp,unsigned HOST_WIDE_INT high_bits,unsigned HOST_WIDE_INT low_immediate,int shift_count)2702 sparc_emit_set_const64_quick2 (rtx op0, rtx temp,
2703 unsigned HOST_WIDE_INT high_bits,
2704 unsigned HOST_WIDE_INT low_immediate,
2705 int shift_count)
2706 {
2707 rtx temp2 = op0;
2708
2709 if ((high_bits & 0xfffffc00) != 0)
2710 {
2711 emit_insn (gen_safe_HIGH64 (temp, high_bits));
2712 if ((high_bits & ~0xfffffc00) != 0)
2713 emit_insn (gen_rtx_SET (op0,
2714 gen_safe_OR64 (temp, (high_bits & 0x3ff))));
2715 else
2716 temp2 = temp;
2717 }
2718 else
2719 {
2720 emit_insn (gen_safe_SET64 (temp, high_bits));
2721 temp2 = temp;
2722 }
2723
2724 /* Now shift it up into place. */
2725 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, temp2,
2726 GEN_INT (shift_count))));
2727
2728 /* If there is a low immediate part piece, finish up by
2729 putting that in as well. */
2730 if (low_immediate != 0)
2731 emit_insn (gen_rtx_SET (op0, gen_safe_OR64 (op0, low_immediate)));
2732 }
2733
2734 static void sparc_emit_set_const64_longway (rtx, rtx, unsigned HOST_WIDE_INT,
2735 unsigned HOST_WIDE_INT);
2736
2737 /* Full 64-bit constant decomposition. Even though this is the
2738 'worst' case, we still optimize a few things away. */
2739 static void
sparc_emit_set_const64_longway(rtx op0,rtx temp,unsigned HOST_WIDE_INT high_bits,unsigned HOST_WIDE_INT low_bits)2740 sparc_emit_set_const64_longway (rtx op0, rtx temp,
2741 unsigned HOST_WIDE_INT high_bits,
2742 unsigned HOST_WIDE_INT low_bits)
2743 {
2744 rtx sub_temp = op0;
2745
2746 if (can_create_pseudo_p ())
2747 sub_temp = gen_reg_rtx (DImode);
2748
2749 if ((high_bits & 0xfffffc00) != 0)
2750 {
2751 emit_insn (gen_safe_HIGH64 (temp, high_bits));
2752 if ((high_bits & ~0xfffffc00) != 0)
2753 emit_insn (gen_rtx_SET (sub_temp,
2754 gen_safe_OR64 (temp, (high_bits & 0x3ff))));
2755 else
2756 sub_temp = temp;
2757 }
2758 else
2759 {
2760 emit_insn (gen_safe_SET64 (temp, high_bits));
2761 sub_temp = temp;
2762 }
2763
2764 if (can_create_pseudo_p ())
2765 {
2766 rtx temp2 = gen_reg_rtx (DImode);
2767 rtx temp3 = gen_reg_rtx (DImode);
2768 rtx temp4 = gen_reg_rtx (DImode);
2769
2770 emit_insn (gen_rtx_SET (temp4, gen_rtx_ASHIFT (DImode, sub_temp,
2771 GEN_INT (32))));
2772
2773 emit_insn (gen_safe_HIGH64 (temp2, low_bits));
2774 if ((low_bits & ~0xfffffc00) != 0)
2775 {
2776 emit_insn (gen_rtx_SET (temp3,
2777 gen_safe_OR64 (temp2, (low_bits & 0x3ff))));
2778 emit_insn (gen_rtx_SET (op0, gen_rtx_PLUS (DImode, temp4, temp3)));
2779 }
2780 else
2781 {
2782 emit_insn (gen_rtx_SET (op0, gen_rtx_PLUS (DImode, temp4, temp2)));
2783 }
2784 }
2785 else
2786 {
2787 rtx low1 = GEN_INT ((low_bits >> (32 - 12)) & 0xfff);
2788 rtx low2 = GEN_INT ((low_bits >> (32 - 12 - 12)) & 0xfff);
2789 rtx low3 = GEN_INT ((low_bits >> (32 - 12 - 12 - 8)) & 0x0ff);
2790 int to_shift = 12;
2791
2792 /* We are in the middle of reload, so this is really
2793 painful. However we do still make an attempt to
2794 avoid emitting truly stupid code. */
2795 if (low1 != const0_rtx)
2796 {
2797 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, sub_temp,
2798 GEN_INT (to_shift))));
2799 emit_insn (gen_rtx_SET (op0, gen_rtx_IOR (DImode, op0, low1)));
2800 sub_temp = op0;
2801 to_shift = 12;
2802 }
2803 else
2804 {
2805 to_shift += 12;
2806 }
2807 if (low2 != const0_rtx)
2808 {
2809 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, sub_temp,
2810 GEN_INT (to_shift))));
2811 emit_insn (gen_rtx_SET (op0, gen_rtx_IOR (DImode, op0, low2)));
2812 sub_temp = op0;
2813 to_shift = 8;
2814 }
2815 else
2816 {
2817 to_shift += 8;
2818 }
2819 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, sub_temp,
2820 GEN_INT (to_shift))));
2821 if (low3 != const0_rtx)
2822 emit_insn (gen_rtx_SET (op0, gen_rtx_IOR (DImode, op0, low3)));
2823 /* phew... */
2824 }
2825 }
2826
2827 /* Analyze a 64-bit constant for certain properties. */
2828 static void analyze_64bit_constant (unsigned HOST_WIDE_INT,
2829 unsigned HOST_WIDE_INT,
2830 int *, int *, int *);
2831
2832 static void
analyze_64bit_constant(unsigned HOST_WIDE_INT high_bits,unsigned HOST_WIDE_INT low_bits,int * hbsp,int * lbsp,int * abbasp)2833 analyze_64bit_constant (unsigned HOST_WIDE_INT high_bits,
2834 unsigned HOST_WIDE_INT low_bits,
2835 int *hbsp, int *lbsp, int *abbasp)
2836 {
2837 int lowest_bit_set, highest_bit_set, all_bits_between_are_set;
2838 int i;
2839
2840 lowest_bit_set = highest_bit_set = -1;
2841 i = 0;
2842 do
2843 {
2844 if ((lowest_bit_set == -1)
2845 && ((low_bits >> i) & 1))
2846 lowest_bit_set = i;
2847 if ((highest_bit_set == -1)
2848 && ((high_bits >> (32 - i - 1)) & 1))
2849 highest_bit_set = (64 - i - 1);
2850 }
2851 while (++i < 32
2852 && ((highest_bit_set == -1)
2853 || (lowest_bit_set == -1)));
2854 if (i == 32)
2855 {
2856 i = 0;
2857 do
2858 {
2859 if ((lowest_bit_set == -1)
2860 && ((high_bits >> i) & 1))
2861 lowest_bit_set = i + 32;
2862 if ((highest_bit_set == -1)
2863 && ((low_bits >> (32 - i - 1)) & 1))
2864 highest_bit_set = 32 - i - 1;
2865 }
2866 while (++i < 32
2867 && ((highest_bit_set == -1)
2868 || (lowest_bit_set == -1)));
2869 }
2870 /* If there are no bits set this should have gone out
2871 as one instruction! */
2872 gcc_assert (lowest_bit_set != -1 && highest_bit_set != -1);
2873 all_bits_between_are_set = 1;
2874 for (i = lowest_bit_set; i <= highest_bit_set; i++)
2875 {
2876 if (i < 32)
2877 {
2878 if ((low_bits & (1 << i)) != 0)
2879 continue;
2880 }
2881 else
2882 {
2883 if ((high_bits & (1 << (i - 32))) != 0)
2884 continue;
2885 }
2886 all_bits_between_are_set = 0;
2887 break;
2888 }
2889 *hbsp = highest_bit_set;
2890 *lbsp = lowest_bit_set;
2891 *abbasp = all_bits_between_are_set;
2892 }
2893
2894 static int const64_is_2insns (unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT);
2895
2896 static int
const64_is_2insns(unsigned HOST_WIDE_INT high_bits,unsigned HOST_WIDE_INT low_bits)2897 const64_is_2insns (unsigned HOST_WIDE_INT high_bits,
2898 unsigned HOST_WIDE_INT low_bits)
2899 {
2900 int highest_bit_set, lowest_bit_set, all_bits_between_are_set;
2901
2902 if (high_bits == 0
2903 || high_bits == 0xffffffff)
2904 return 1;
2905
2906 analyze_64bit_constant (high_bits, low_bits,
2907 &highest_bit_set, &lowest_bit_set,
2908 &all_bits_between_are_set);
2909
2910 if ((highest_bit_set == 63
2911 || lowest_bit_set == 0)
2912 && all_bits_between_are_set != 0)
2913 return 1;
2914
2915 if ((highest_bit_set - lowest_bit_set) < 21)
2916 return 1;
2917
2918 return 0;
2919 }
2920
2921 static unsigned HOST_WIDE_INT create_simple_focus_bits (unsigned HOST_WIDE_INT,
2922 unsigned HOST_WIDE_INT,
2923 int, int);
2924
2925 static unsigned HOST_WIDE_INT
create_simple_focus_bits(unsigned HOST_WIDE_INT high_bits,unsigned HOST_WIDE_INT low_bits,int lowest_bit_set,int shift)2926 create_simple_focus_bits (unsigned HOST_WIDE_INT high_bits,
2927 unsigned HOST_WIDE_INT low_bits,
2928 int lowest_bit_set, int shift)
2929 {
2930 HOST_WIDE_INT hi, lo;
2931
2932 if (lowest_bit_set < 32)
2933 {
2934 lo = (low_bits >> lowest_bit_set) << shift;
2935 hi = ((high_bits << (32 - lowest_bit_set)) << shift);
2936 }
2937 else
2938 {
2939 lo = 0;
2940 hi = ((high_bits >> (lowest_bit_set - 32)) << shift);
2941 }
2942 gcc_assert (! (hi & lo));
2943 return (hi | lo);
2944 }
2945
2946 /* Here we are sure to be arch64 and this is an integer constant
2947 being loaded into a register. Emit the most efficient
2948 insn sequence possible. Detection of all the 1-insn cases
2949 has been done already. */
2950 static void
sparc_emit_set_const64(rtx op0,rtx op1)2951 sparc_emit_set_const64 (rtx op0, rtx op1)
2952 {
2953 unsigned HOST_WIDE_INT high_bits, low_bits;
2954 int lowest_bit_set, highest_bit_set;
2955 int all_bits_between_are_set;
2956 rtx temp = 0;
2957
2958 /* Sanity check that we know what we are working with. */
2959 gcc_assert (TARGET_ARCH64
2960 && (GET_CODE (op0) == SUBREG
2961 || (REG_P (op0) && ! SPARC_FP_REG_P (REGNO (op0)))));
2962
2963 if (! can_create_pseudo_p ())
2964 temp = op0;
2965
2966 if (GET_CODE (op1) != CONST_INT)
2967 {
2968 sparc_emit_set_symbolic_const64 (op0, op1, temp);
2969 return;
2970 }
2971
2972 if (! temp)
2973 temp = gen_reg_rtx (DImode);
2974
2975 high_bits = ((INTVAL (op1) >> 32) & 0xffffffff);
2976 low_bits = (INTVAL (op1) & 0xffffffff);
2977
2978 /* low_bits bits 0 --> 31
2979 high_bits bits 32 --> 63 */
2980
2981 analyze_64bit_constant (high_bits, low_bits,
2982 &highest_bit_set, &lowest_bit_set,
2983 &all_bits_between_are_set);
2984
2985 /* First try for a 2-insn sequence. */
2986
2987 /* These situations are preferred because the optimizer can
2988 * do more things with them:
2989 * 1) mov -1, %reg
2990 * sllx %reg, shift, %reg
2991 * 2) mov -1, %reg
2992 * srlx %reg, shift, %reg
2993 * 3) mov some_small_const, %reg
2994 * sllx %reg, shift, %reg
2995 */
2996 if (((highest_bit_set == 63
2997 || lowest_bit_set == 0)
2998 && all_bits_between_are_set != 0)
2999 || ((highest_bit_set - lowest_bit_set) < 12))
3000 {
3001 HOST_WIDE_INT the_const = -1;
3002 int shift = lowest_bit_set;
3003
3004 if ((highest_bit_set != 63
3005 && lowest_bit_set != 0)
3006 || all_bits_between_are_set == 0)
3007 {
3008 the_const =
3009 create_simple_focus_bits (high_bits, low_bits,
3010 lowest_bit_set, 0);
3011 }
3012 else if (lowest_bit_set == 0)
3013 shift = -(63 - highest_bit_set);
3014
3015 gcc_assert (SPARC_SIMM13_P (the_const));
3016 gcc_assert (shift != 0);
3017
3018 emit_insn (gen_safe_SET64 (temp, the_const));
3019 if (shift > 0)
3020 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, temp,
3021 GEN_INT (shift))));
3022 else if (shift < 0)
3023 emit_insn (gen_rtx_SET (op0, gen_rtx_LSHIFTRT (DImode, temp,
3024 GEN_INT (-shift))));
3025 return;
3026 }
3027
3028 /* Now a range of 22 or less bits set somewhere.
3029 * 1) sethi %hi(focus_bits), %reg
3030 * sllx %reg, shift, %reg
3031 * 2) sethi %hi(focus_bits), %reg
3032 * srlx %reg, shift, %reg
3033 */
3034 if ((highest_bit_set - lowest_bit_set) < 21)
3035 {
3036 unsigned HOST_WIDE_INT focus_bits =
3037 create_simple_focus_bits (high_bits, low_bits,
3038 lowest_bit_set, 10);
3039
3040 gcc_assert (SPARC_SETHI_P (focus_bits));
3041 gcc_assert (lowest_bit_set != 10);
3042
3043 emit_insn (gen_safe_HIGH64 (temp, focus_bits));
3044
3045 /* If lowest_bit_set == 10 then a sethi alone could have done it. */
3046 if (lowest_bit_set < 10)
3047 emit_insn (gen_rtx_SET (op0,
3048 gen_rtx_LSHIFTRT (DImode, temp,
3049 GEN_INT (10 - lowest_bit_set))));
3050 else if (lowest_bit_set > 10)
3051 emit_insn (gen_rtx_SET (op0,
3052 gen_rtx_ASHIFT (DImode, temp,
3053 GEN_INT (lowest_bit_set - 10))));
3054 return;
3055 }
3056
3057 /* 1) sethi %hi(low_bits), %reg
3058 * or %reg, %lo(low_bits), %reg
3059 * 2) sethi %hi(~low_bits), %reg
3060 * xor %reg, %lo(-0x400 | (low_bits & 0x3ff)), %reg
3061 */
3062 if (high_bits == 0
3063 || high_bits == 0xffffffff)
3064 {
3065 sparc_emit_set_const64_quick1 (op0, temp, low_bits,
3066 (high_bits == 0xffffffff));
3067 return;
3068 }
3069
3070 /* Now, try 3-insn sequences. */
3071
3072 /* 1) sethi %hi(high_bits), %reg
3073 * or %reg, %lo(high_bits), %reg
3074 * sllx %reg, 32, %reg
3075 */
3076 if (low_bits == 0)
3077 {
3078 sparc_emit_set_const64_quick2 (op0, temp, high_bits, 0, 32);
3079 return;
3080 }
3081
3082 /* We may be able to do something quick
3083 when the constant is negated, so try that. */
3084 if (const64_is_2insns ((~high_bits) & 0xffffffff,
3085 (~low_bits) & 0xfffffc00))
3086 {
3087 /* NOTE: The trailing bits get XOR'd so we need the
3088 non-negated bits, not the negated ones. */
3089 unsigned HOST_WIDE_INT trailing_bits = low_bits & 0x3ff;
3090
3091 if ((((~high_bits) & 0xffffffff) == 0
3092 && ((~low_bits) & 0x80000000) == 0)
3093 || (((~high_bits) & 0xffffffff) == 0xffffffff
3094 && ((~low_bits) & 0x80000000) != 0))
3095 {
3096 unsigned HOST_WIDE_INT fast_int = (~low_bits & 0xffffffff);
3097
3098 if ((SPARC_SETHI_P (fast_int)
3099 && (~high_bits & 0xffffffff) == 0)
3100 || SPARC_SIMM13_P (fast_int))
3101 emit_insn (gen_safe_SET64 (temp, fast_int));
3102 else
3103 sparc_emit_set_const64 (temp, GEN_INT (fast_int));
3104 }
3105 else
3106 {
3107 rtx negated_const;
3108 negated_const = GEN_INT (((~low_bits) & 0xfffffc00) |
3109 (((HOST_WIDE_INT)((~high_bits) & 0xffffffff))<<32));
3110 sparc_emit_set_const64 (temp, negated_const);
3111 }
3112
3113 /* If we are XOR'ing with -1, then we should emit a one's complement
3114 instead. This way the combiner will notice logical operations
3115 such as ANDN later on and substitute. */
3116 if (trailing_bits == 0x3ff)
3117 {
3118 emit_insn (gen_rtx_SET (op0, gen_rtx_NOT (DImode, temp)));
3119 }
3120 else
3121 {
3122 emit_insn (gen_rtx_SET (op0,
3123 gen_safe_XOR64 (temp,
3124 (-0x400 | trailing_bits))));
3125 }
3126 return;
3127 }
3128
3129 /* 1) sethi %hi(xxx), %reg
3130 * or %reg, %lo(xxx), %reg
3131 * sllx %reg, yyy, %reg
3132 *
3133 * ??? This is just a generalized version of the low_bits==0
3134 * thing above, FIXME...
3135 */
3136 if ((highest_bit_set - lowest_bit_set) < 32)
3137 {
3138 unsigned HOST_WIDE_INT focus_bits =
3139 create_simple_focus_bits (high_bits, low_bits,
3140 lowest_bit_set, 0);
3141
3142 /* We can't get here in this state. */
3143 gcc_assert (highest_bit_set >= 32 && lowest_bit_set < 32);
3144
3145 /* So what we know is that the set bits straddle the
3146 middle of the 64-bit word. */
3147 sparc_emit_set_const64_quick2 (op0, temp,
3148 focus_bits, 0,
3149 lowest_bit_set);
3150 return;
3151 }
3152
3153 /* 1) sethi %hi(high_bits), %reg
3154 * or %reg, %lo(high_bits), %reg
3155 * sllx %reg, 32, %reg
3156 * or %reg, low_bits, %reg
3157 */
3158 if (SPARC_SIMM13_P (low_bits) && ((int)low_bits > 0))
3159 {
3160 sparc_emit_set_const64_quick2 (op0, temp, high_bits, low_bits, 32);
3161 return;
3162 }
3163
3164 /* The easiest way when all else fails, is full decomposition. */
3165 sparc_emit_set_const64_longway (op0, temp, high_bits, low_bits);
3166 }
3167
3168 /* Implement TARGET_FIXED_CONDITION_CODE_REGS. */
3169
3170 static bool
sparc_fixed_condition_code_regs(unsigned int * p1,unsigned int * p2)3171 sparc_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
3172 {
3173 *p1 = SPARC_ICC_REG;
3174 *p2 = SPARC_FCC_REG;
3175 return true;
3176 }
3177
3178 /* Implement TARGET_MIN_ARITHMETIC_PRECISION. */
3179
3180 static unsigned int
sparc_min_arithmetic_precision(void)3181 sparc_min_arithmetic_precision (void)
3182 {
3183 return 32;
3184 }
3185
3186 /* Given a comparison code (EQ, NE, etc.) and the first operand of a COMPARE,
3187 return the mode to be used for the comparison. For floating-point,
3188 CCFP[E]mode is used. CCNZmode should be used when the first operand
3189 is a PLUS, MINUS, NEG, or ASHIFT. CCmode should be used when no special
3190 processing is needed. */
3191
3192 machine_mode
select_cc_mode(enum rtx_code op,rtx x,rtx y)3193 select_cc_mode (enum rtx_code op, rtx x, rtx y)
3194 {
3195 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
3196 {
3197 switch (op)
3198 {
3199 case EQ:
3200 case NE:
3201 case UNORDERED:
3202 case ORDERED:
3203 case UNLT:
3204 case UNLE:
3205 case UNGT:
3206 case UNGE:
3207 case UNEQ:
3208 case LTGT:
3209 return CCFPmode;
3210
3211 case LT:
3212 case LE:
3213 case GT:
3214 case GE:
3215 return CCFPEmode;
3216
3217 default:
3218 gcc_unreachable ();
3219 }
3220 }
3221 else if ((GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
3222 || GET_CODE (x) == NEG || GET_CODE (x) == ASHIFT)
3223 && y == const0_rtx)
3224 {
3225 if (TARGET_ARCH64 && GET_MODE (x) == DImode)
3226 return CCXNZmode;
3227 else
3228 return CCNZmode;
3229 }
3230 else
3231 {
3232 /* This is for the cmp<mode>_sne pattern. */
3233 if (GET_CODE (x) == NOT && y == constm1_rtx)
3234 {
3235 if (TARGET_ARCH64 && GET_MODE (x) == DImode)
3236 return CCXCmode;
3237 else
3238 return CCCmode;
3239 }
3240
3241 /* This is for the [u]addvdi4_sp32 and [u]subvdi4_sp32 patterns. */
3242 if (!TARGET_ARCH64 && GET_MODE (x) == DImode)
3243 {
3244 if (GET_CODE (y) == UNSPEC
3245 && (XINT (y, 1) == UNSPEC_ADDV
3246 || XINT (y, 1) == UNSPEC_SUBV
3247 || XINT (y, 1) == UNSPEC_NEGV))
3248 return CCVmode;
3249 else
3250 return CCCmode;
3251 }
3252
3253 if (TARGET_ARCH64 && GET_MODE (x) == DImode)
3254 return CCXmode;
3255 else
3256 return CCmode;
3257 }
3258 }
3259
3260 /* Emit the compare insn and return the CC reg for a CODE comparison
3261 with operands X and Y. */
3262
3263 static rtx
gen_compare_reg_1(enum rtx_code code,rtx x,rtx y)3264 gen_compare_reg_1 (enum rtx_code code, rtx x, rtx y)
3265 {
3266 machine_mode mode;
3267 rtx cc_reg;
3268
3269 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
3270 return x;
3271
3272 mode = SELECT_CC_MODE (code, x, y);
3273
3274 /* ??? We don't have movcc patterns so we cannot generate pseudo regs for the
3275 fcc regs (cse can't tell they're really call clobbered regs and will
3276 remove a duplicate comparison even if there is an intervening function
3277 call - it will then try to reload the cc reg via an int reg which is why
3278 we need the movcc patterns). It is possible to provide the movcc
3279 patterns by using the ldxfsr/stxfsr v9 insns. I tried it: you need two
3280 registers (say %g1,%g5) and it takes about 6 insns. A better fix would be
3281 to tell cse that CCFPE mode registers (even pseudos) are call
3282 clobbered. */
3283
3284 /* ??? This is an experiment. Rather than making changes to cse which may
3285 or may not be easy/clean, we do our own cse. This is possible because
3286 we will generate hard registers. Cse knows they're call clobbered (it
3287 doesn't know the same thing about pseudos). If we guess wrong, no big
3288 deal, but if we win, great! */
3289
3290 if (TARGET_V9 && GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
3291 #if 1 /* experiment */
3292 {
3293 int reg;
3294 /* We cycle through the registers to ensure they're all exercised. */
3295 static int next_fcc_reg = 0;
3296 /* Previous x,y for each fcc reg. */
3297 static rtx prev_args[4][2];
3298
3299 /* Scan prev_args for x,y. */
3300 for (reg = 0; reg < 4; reg++)
3301 if (prev_args[reg][0] == x && prev_args[reg][1] == y)
3302 break;
3303 if (reg == 4)
3304 {
3305 reg = next_fcc_reg;
3306 prev_args[reg][0] = x;
3307 prev_args[reg][1] = y;
3308 next_fcc_reg = (next_fcc_reg + 1) & 3;
3309 }
3310 cc_reg = gen_rtx_REG (mode, reg + SPARC_FIRST_V9_FCC_REG);
3311 }
3312 #else
3313 cc_reg = gen_reg_rtx (mode);
3314 #endif /* ! experiment */
3315 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
3316 cc_reg = gen_rtx_REG (mode, SPARC_FCC_REG);
3317 else
3318 cc_reg = gen_rtx_REG (mode, SPARC_ICC_REG);
3319
3320 /* We shouldn't get there for TFmode if !TARGET_HARD_QUAD. If we do, this
3321 will only result in an unrecognizable insn so no point in asserting. */
3322 emit_insn (gen_rtx_SET (cc_reg, gen_rtx_COMPARE (mode, x, y)));
3323
3324 return cc_reg;
3325 }
3326
3327
3328 /* Emit the compare insn and return the CC reg for the comparison in CMP. */
3329
3330 rtx
gen_compare_reg(rtx cmp)3331 gen_compare_reg (rtx cmp)
3332 {
3333 return gen_compare_reg_1 (GET_CODE (cmp), XEXP (cmp, 0), XEXP (cmp, 1));
3334 }
3335
3336 /* This function is used for v9 only.
3337 DEST is the target of the Scc insn.
3338 CODE is the code for an Scc's comparison.
3339 X and Y are the values we compare.
3340
3341 This function is needed to turn
3342
3343 (set (reg:SI 110)
3344 (gt (reg:CCX 100 %icc)
3345 (const_int 0)))
3346 into
3347 (set (reg:SI 110)
3348 (gt:DI (reg:CCX 100 %icc)
3349 (const_int 0)))
3350
3351 IE: The instruction recognizer needs to see the mode of the comparison to
3352 find the right instruction. We could use "gt:DI" right in the
3353 define_expand, but leaving it out allows us to handle DI, SI, etc. */
3354
3355 static int
gen_v9_scc(rtx dest,enum rtx_code compare_code,rtx x,rtx y)3356 gen_v9_scc (rtx dest, enum rtx_code compare_code, rtx x, rtx y)
3357 {
3358 if (! TARGET_ARCH64
3359 && (GET_MODE (x) == DImode
3360 || GET_MODE (dest) == DImode))
3361 return 0;
3362
3363 /* Try to use the movrCC insns. */
3364 if (TARGET_ARCH64
3365 && GET_MODE_CLASS (GET_MODE (x)) == MODE_INT
3366 && y == const0_rtx
3367 && v9_regcmp_p (compare_code))
3368 {
3369 rtx op0 = x;
3370 rtx temp;
3371
3372 /* Special case for op0 != 0. This can be done with one instruction if
3373 dest == x. */
3374
3375 if (compare_code == NE
3376 && GET_MODE (dest) == DImode
3377 && rtx_equal_p (op0, dest))
3378 {
3379 emit_insn (gen_rtx_SET (dest,
3380 gen_rtx_IF_THEN_ELSE (DImode,
3381 gen_rtx_fmt_ee (compare_code, DImode,
3382 op0, const0_rtx),
3383 const1_rtx,
3384 dest)));
3385 return 1;
3386 }
3387
3388 if (reg_overlap_mentioned_p (dest, op0))
3389 {
3390 /* Handle the case where dest == x.
3391 We "early clobber" the result. */
3392 op0 = gen_reg_rtx (GET_MODE (x));
3393 emit_move_insn (op0, x);
3394 }
3395
3396 emit_insn (gen_rtx_SET (dest, const0_rtx));
3397 if (GET_MODE (op0) != DImode)
3398 {
3399 temp = gen_reg_rtx (DImode);
3400 convert_move (temp, op0, 0);
3401 }
3402 else
3403 temp = op0;
3404 emit_insn (gen_rtx_SET (dest,
3405 gen_rtx_IF_THEN_ELSE (GET_MODE (dest),
3406 gen_rtx_fmt_ee (compare_code, DImode,
3407 temp, const0_rtx),
3408 const1_rtx,
3409 dest)));
3410 return 1;
3411 }
3412 else
3413 {
3414 x = gen_compare_reg_1 (compare_code, x, y);
3415 y = const0_rtx;
3416
3417 emit_insn (gen_rtx_SET (dest, const0_rtx));
3418 emit_insn (gen_rtx_SET (dest,
3419 gen_rtx_IF_THEN_ELSE (GET_MODE (dest),
3420 gen_rtx_fmt_ee (compare_code,
3421 GET_MODE (x), x, y),
3422 const1_rtx, dest)));
3423 return 1;
3424 }
3425 }
3426
3427
3428 /* Emit an scc insn. For seq, sne, sgeu, and sltu, we can do this
3429 without jumps using the addx/subx instructions. */
3430
3431 bool
emit_scc_insn(rtx operands[])3432 emit_scc_insn (rtx operands[])
3433 {
3434 rtx tem, x, y;
3435 enum rtx_code code;
3436 machine_mode mode;
3437
3438 /* The quad-word fp compare library routines all return nonzero to indicate
3439 true, which is different from the equivalent libgcc routines, so we must
3440 handle them specially here. */
3441 if (GET_MODE (operands[2]) == TFmode && ! TARGET_HARD_QUAD)
3442 {
3443 operands[1] = sparc_emit_float_lib_cmp (operands[2], operands[3],
3444 GET_CODE (operands[1]));
3445 operands[2] = XEXP (operands[1], 0);
3446 operands[3] = XEXP (operands[1], 1);
3447 }
3448
3449 code = GET_CODE (operands[1]);
3450 x = operands[2];
3451 y = operands[3];
3452 mode = GET_MODE (x);
3453
3454 /* For seq/sne on v9 we use the same code as v8 (the addx/subx method has
3455 more applications). The exception to this is "reg != 0" which can
3456 be done in one instruction on v9 (so we do it). */
3457 if ((code == EQ || code == NE) && (mode == SImode || mode == DImode))
3458 {
3459 if (y != const0_rtx)
3460 x = force_reg (mode, gen_rtx_XOR (mode, x, y));
3461
3462 rtx pat = gen_rtx_SET (operands[0],
3463 gen_rtx_fmt_ee (code, GET_MODE (operands[0]),
3464 x, const0_rtx));
3465
3466 /* If we can use addx/subx or addxc, add a clobber for CC. */
3467 if (mode == SImode || (code == NE && TARGET_VIS3))
3468 {
3469 rtx clobber
3470 = gen_rtx_CLOBBER (VOIDmode,
3471 gen_rtx_REG (mode == SImode ? CCmode : CCXmode,
3472 SPARC_ICC_REG));
3473 pat = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, pat, clobber));
3474 }
3475
3476 emit_insn (pat);
3477 return true;
3478 }
3479
3480 /* We can do LTU in DImode using the addxc instruction with VIS3. */
3481 if (TARGET_ARCH64
3482 && mode == DImode
3483 && !((code == LTU || code == GTU) && TARGET_VIS3)
3484 && gen_v9_scc (operands[0], code, x, y))
3485 return true;
3486
3487 /* We can do LTU and GEU using the addx/subx instructions too. And
3488 for GTU/LEU, if both operands are registers swap them and fall
3489 back to the easy case. */
3490 if (code == GTU || code == LEU)
3491 {
3492 if ((GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
3493 && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG))
3494 {
3495 tem = x;
3496 x = y;
3497 y = tem;
3498 code = swap_condition (code);
3499 }
3500 }
3501
3502 if (code == LTU || code == GEU)
3503 {
3504 emit_insn (gen_rtx_SET (operands[0],
3505 gen_rtx_fmt_ee (code, GET_MODE (operands[0]),
3506 gen_compare_reg_1 (code, x, y),
3507 const0_rtx)));
3508 return true;
3509 }
3510
3511 /* All the posibilities to use addx/subx based sequences has been
3512 exhausted, try for a 3 instruction sequence using v9 conditional
3513 moves. */
3514 if (TARGET_V9 && gen_v9_scc (operands[0], code, x, y))
3515 return true;
3516
3517 /* Nope, do branches. */
3518 return false;
3519 }
3520
3521 /* Emit a conditional jump insn for the v9 architecture using comparison code
3522 CODE and jump target LABEL.
3523 This function exists to take advantage of the v9 brxx insns. */
3524
3525 static void
emit_v9_brxx_insn(enum rtx_code code,rtx op0,rtx label)3526 emit_v9_brxx_insn (enum rtx_code code, rtx op0, rtx label)
3527 {
3528 emit_jump_insn (gen_rtx_SET (pc_rtx,
3529 gen_rtx_IF_THEN_ELSE (VOIDmode,
3530 gen_rtx_fmt_ee (code, GET_MODE (op0),
3531 op0, const0_rtx),
3532 gen_rtx_LABEL_REF (VOIDmode, label),
3533 pc_rtx)));
3534 }
3535
3536 /* Emit a conditional jump insn for the UA2011 architecture using
3537 comparison code CODE and jump target LABEL. This function exists
3538 to take advantage of the UA2011 Compare and Branch insns. */
3539
3540 static void
emit_cbcond_insn(enum rtx_code code,rtx op0,rtx op1,rtx label)3541 emit_cbcond_insn (enum rtx_code code, rtx op0, rtx op1, rtx label)
3542 {
3543 rtx if_then_else;
3544
3545 if_then_else = gen_rtx_IF_THEN_ELSE (VOIDmode,
3546 gen_rtx_fmt_ee(code, GET_MODE(op0),
3547 op0, op1),
3548 gen_rtx_LABEL_REF (VOIDmode, label),
3549 pc_rtx);
3550
3551 emit_jump_insn (gen_rtx_SET (pc_rtx, if_then_else));
3552 }
3553
3554 void
emit_conditional_branch_insn(rtx operands[])3555 emit_conditional_branch_insn (rtx operands[])
3556 {
3557 /* The quad-word fp compare library routines all return nonzero to indicate
3558 true, which is different from the equivalent libgcc routines, so we must
3559 handle them specially here. */
3560 if (GET_MODE (operands[1]) == TFmode && ! TARGET_HARD_QUAD)
3561 {
3562 operands[0] = sparc_emit_float_lib_cmp (operands[1], operands[2],
3563 GET_CODE (operands[0]));
3564 operands[1] = XEXP (operands[0], 0);
3565 operands[2] = XEXP (operands[0], 1);
3566 }
3567
3568 /* If we can tell early on that the comparison is against a constant
3569 that won't fit in the 5-bit signed immediate field of a cbcond,
3570 use one of the other v9 conditional branch sequences. */
3571 if (TARGET_CBCOND
3572 && GET_CODE (operands[1]) == REG
3573 && (GET_MODE (operands[1]) == SImode
3574 || (TARGET_ARCH64 && GET_MODE (operands[1]) == DImode))
3575 && (GET_CODE (operands[2]) != CONST_INT
3576 || SPARC_SIMM5_P (INTVAL (operands[2]))))
3577 {
3578 emit_cbcond_insn (GET_CODE (operands[0]), operands[1], operands[2], operands[3]);
3579 return;
3580 }
3581
3582 if (TARGET_ARCH64 && operands[2] == const0_rtx
3583 && GET_CODE (operands[1]) == REG
3584 && GET_MODE (operands[1]) == DImode)
3585 {
3586 emit_v9_brxx_insn (GET_CODE (operands[0]), operands[1], operands[3]);
3587 return;
3588 }
3589
3590 operands[1] = gen_compare_reg (operands[0]);
3591 operands[2] = const0_rtx;
3592 operands[0] = gen_rtx_fmt_ee (GET_CODE (operands[0]), VOIDmode,
3593 operands[1], operands[2]);
3594 emit_jump_insn (gen_cbranchcc4 (operands[0], operands[1], operands[2],
3595 operands[3]));
3596 }
3597
3598
3599 /* Generate a DFmode part of a hard TFmode register.
3600 REG is the TFmode hard register, LOW is 1 for the
3601 low 64bit of the register and 0 otherwise.
3602 */
3603 rtx
gen_df_reg(rtx reg,int low)3604 gen_df_reg (rtx reg, int low)
3605 {
3606 int regno = REGNO (reg);
3607
3608 if ((WORDS_BIG_ENDIAN == 0) ^ (low != 0))
3609 regno += (TARGET_ARCH64 && SPARC_INT_REG_P (regno)) ? 1 : 2;
3610 return gen_rtx_REG (DFmode, regno);
3611 }
3612
3613 /* Generate a call to FUNC with OPERANDS. Operand 0 is the return value.
3614 Unlike normal calls, TFmode operands are passed by reference. It is
3615 assumed that no more than 3 operands are required. */
3616
3617 static void
emit_soft_tfmode_libcall(const char * func_name,int nargs,rtx * operands)3618 emit_soft_tfmode_libcall (const char *func_name, int nargs, rtx *operands)
3619 {
3620 rtx ret_slot = NULL, arg[3], func_sym;
3621 int i;
3622
3623 /* We only expect to be called for conversions, unary, and binary ops. */
3624 gcc_assert (nargs == 2 || nargs == 3);
3625
3626 for (i = 0; i < nargs; ++i)
3627 {
3628 rtx this_arg = operands[i];
3629 rtx this_slot;
3630
3631 /* TFmode arguments and return values are passed by reference. */
3632 if (GET_MODE (this_arg) == TFmode)
3633 {
3634 int force_stack_temp;
3635
3636 force_stack_temp = 0;
3637 if (TARGET_BUGGY_QP_LIB && i == 0)
3638 force_stack_temp = 1;
3639
3640 if (GET_CODE (this_arg) == MEM
3641 && ! force_stack_temp)
3642 {
3643 tree expr = MEM_EXPR (this_arg);
3644 if (expr)
3645 mark_addressable (expr);
3646 this_arg = XEXP (this_arg, 0);
3647 }
3648 else if (CONSTANT_P (this_arg)
3649 && ! force_stack_temp)
3650 {
3651 this_slot = force_const_mem (TFmode, this_arg);
3652 this_arg = XEXP (this_slot, 0);
3653 }
3654 else
3655 {
3656 this_slot = assign_stack_temp (TFmode, GET_MODE_SIZE (TFmode));
3657
3658 /* Operand 0 is the return value. We'll copy it out later. */
3659 if (i > 0)
3660 emit_move_insn (this_slot, this_arg);
3661 else
3662 ret_slot = this_slot;
3663
3664 this_arg = XEXP (this_slot, 0);
3665 }
3666 }
3667
3668 arg[i] = this_arg;
3669 }
3670
3671 func_sym = gen_rtx_SYMBOL_REF (Pmode, func_name);
3672
3673 if (GET_MODE (operands[0]) == TFmode)
3674 {
3675 if (nargs == 2)
3676 emit_library_call (func_sym, LCT_NORMAL, VOIDmode,
3677 arg[0], GET_MODE (arg[0]),
3678 arg[1], GET_MODE (arg[1]));
3679 else
3680 emit_library_call (func_sym, LCT_NORMAL, VOIDmode,
3681 arg[0], GET_MODE (arg[0]),
3682 arg[1], GET_MODE (arg[1]),
3683 arg[2], GET_MODE (arg[2]));
3684
3685 if (ret_slot)
3686 emit_move_insn (operands[0], ret_slot);
3687 }
3688 else
3689 {
3690 rtx ret;
3691
3692 gcc_assert (nargs == 2);
3693
3694 ret = emit_library_call_value (func_sym, operands[0], LCT_NORMAL,
3695 GET_MODE (operands[0]),
3696 arg[1], GET_MODE (arg[1]));
3697
3698 if (ret != operands[0])
3699 emit_move_insn (operands[0], ret);
3700 }
3701 }
3702
3703 /* Expand soft-float TFmode calls to sparc abi routines. */
3704
3705 static void
emit_soft_tfmode_binop(enum rtx_code code,rtx * operands)3706 emit_soft_tfmode_binop (enum rtx_code code, rtx *operands)
3707 {
3708 const char *func;
3709
3710 switch (code)
3711 {
3712 case PLUS:
3713 func = "_Qp_add";
3714 break;
3715 case MINUS:
3716 func = "_Qp_sub";
3717 break;
3718 case MULT:
3719 func = "_Qp_mul";
3720 break;
3721 case DIV:
3722 func = "_Qp_div";
3723 break;
3724 default:
3725 gcc_unreachable ();
3726 }
3727
3728 emit_soft_tfmode_libcall (func, 3, operands);
3729 }
3730
3731 static void
emit_soft_tfmode_unop(enum rtx_code code,rtx * operands)3732 emit_soft_tfmode_unop (enum rtx_code code, rtx *operands)
3733 {
3734 const char *func;
3735
3736 gcc_assert (code == SQRT);
3737 func = "_Qp_sqrt";
3738
3739 emit_soft_tfmode_libcall (func, 2, operands);
3740 }
3741
3742 static void
emit_soft_tfmode_cvt(enum rtx_code code,rtx * operands)3743 emit_soft_tfmode_cvt (enum rtx_code code, rtx *operands)
3744 {
3745 const char *func;
3746
3747 switch (code)
3748 {
3749 case FLOAT_EXTEND:
3750 switch (GET_MODE (operands[1]))
3751 {
3752 case E_SFmode:
3753 func = "_Qp_stoq";
3754 break;
3755 case E_DFmode:
3756 func = "_Qp_dtoq";
3757 break;
3758 default:
3759 gcc_unreachable ();
3760 }
3761 break;
3762
3763 case FLOAT_TRUNCATE:
3764 switch (GET_MODE (operands[0]))
3765 {
3766 case E_SFmode:
3767 func = "_Qp_qtos";
3768 break;
3769 case E_DFmode:
3770 func = "_Qp_qtod";
3771 break;
3772 default:
3773 gcc_unreachable ();
3774 }
3775 break;
3776
3777 case FLOAT:
3778 switch (GET_MODE (operands[1]))
3779 {
3780 case E_SImode:
3781 func = "_Qp_itoq";
3782 if (TARGET_ARCH64)
3783 operands[1] = gen_rtx_SIGN_EXTEND (DImode, operands[1]);
3784 break;
3785 case E_DImode:
3786 func = "_Qp_xtoq";
3787 break;
3788 default:
3789 gcc_unreachable ();
3790 }
3791 break;
3792
3793 case UNSIGNED_FLOAT:
3794 switch (GET_MODE (operands[1]))
3795 {
3796 case E_SImode:
3797 func = "_Qp_uitoq";
3798 if (TARGET_ARCH64)
3799 operands[1] = gen_rtx_ZERO_EXTEND (DImode, operands[1]);
3800 break;
3801 case E_DImode:
3802 func = "_Qp_uxtoq";
3803 break;
3804 default:
3805 gcc_unreachable ();
3806 }
3807 break;
3808
3809 case FIX:
3810 switch (GET_MODE (operands[0]))
3811 {
3812 case E_SImode:
3813 func = "_Qp_qtoi";
3814 break;
3815 case E_DImode:
3816 func = "_Qp_qtox";
3817 break;
3818 default:
3819 gcc_unreachable ();
3820 }
3821 break;
3822
3823 case UNSIGNED_FIX:
3824 switch (GET_MODE (operands[0]))
3825 {
3826 case E_SImode:
3827 func = "_Qp_qtoui";
3828 break;
3829 case E_DImode:
3830 func = "_Qp_qtoux";
3831 break;
3832 default:
3833 gcc_unreachable ();
3834 }
3835 break;
3836
3837 default:
3838 gcc_unreachable ();
3839 }
3840
3841 emit_soft_tfmode_libcall (func, 2, operands);
3842 }
3843
3844 /* Expand a hard-float tfmode operation. All arguments must be in
3845 registers. */
3846
3847 static void
emit_hard_tfmode_operation(enum rtx_code code,rtx * operands)3848 emit_hard_tfmode_operation (enum rtx_code code, rtx *operands)
3849 {
3850 rtx op, dest;
3851
3852 if (GET_RTX_CLASS (code) == RTX_UNARY)
3853 {
3854 operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
3855 op = gen_rtx_fmt_e (code, GET_MODE (operands[0]), operands[1]);
3856 }
3857 else
3858 {
3859 operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
3860 operands[2] = force_reg (GET_MODE (operands[2]), operands[2]);
3861 op = gen_rtx_fmt_ee (code, GET_MODE (operands[0]),
3862 operands[1], operands[2]);
3863 }
3864
3865 if (register_operand (operands[0], VOIDmode))
3866 dest = operands[0];
3867 else
3868 dest = gen_reg_rtx (GET_MODE (operands[0]));
3869
3870 emit_insn (gen_rtx_SET (dest, op));
3871
3872 if (dest != operands[0])
3873 emit_move_insn (operands[0], dest);
3874 }
3875
3876 void
emit_tfmode_binop(enum rtx_code code,rtx * operands)3877 emit_tfmode_binop (enum rtx_code code, rtx *operands)
3878 {
3879 if (TARGET_HARD_QUAD)
3880 emit_hard_tfmode_operation (code, operands);
3881 else
3882 emit_soft_tfmode_binop (code, operands);
3883 }
3884
3885 void
emit_tfmode_unop(enum rtx_code code,rtx * operands)3886 emit_tfmode_unop (enum rtx_code code, rtx *operands)
3887 {
3888 if (TARGET_HARD_QUAD)
3889 emit_hard_tfmode_operation (code, operands);
3890 else
3891 emit_soft_tfmode_unop (code, operands);
3892 }
3893
3894 void
emit_tfmode_cvt(enum rtx_code code,rtx * operands)3895 emit_tfmode_cvt (enum rtx_code code, rtx *operands)
3896 {
3897 if (TARGET_HARD_QUAD)
3898 emit_hard_tfmode_operation (code, operands);
3899 else
3900 emit_soft_tfmode_cvt (code, operands);
3901 }
3902
3903 /* Return nonzero if a branch/jump/call instruction will be emitting
3904 nop into its delay slot. */
3905
3906 int
empty_delay_slot(rtx_insn * insn)3907 empty_delay_slot (rtx_insn *insn)
3908 {
3909 rtx seq;
3910
3911 /* If no previous instruction (should not happen), return true. */
3912 if (PREV_INSN (insn) == NULL)
3913 return 1;
3914
3915 seq = NEXT_INSN (PREV_INSN (insn));
3916 if (GET_CODE (PATTERN (seq)) == SEQUENCE)
3917 return 0;
3918
3919 return 1;
3920 }
3921
3922 /* Return nonzero if we should emit a nop after a cbcond instruction.
3923 The cbcond instruction does not have a delay slot, however there is
3924 a severe performance penalty if a control transfer appears right
3925 after a cbcond. Therefore we emit a nop when we detect this
3926 situation. */
3927
3928 int
emit_cbcond_nop(rtx_insn * insn)3929 emit_cbcond_nop (rtx_insn *insn)
3930 {
3931 rtx next = next_active_insn (insn);
3932
3933 if (!next)
3934 return 1;
3935
3936 if (NONJUMP_INSN_P (next)
3937 && GET_CODE (PATTERN (next)) == SEQUENCE)
3938 next = XVECEXP (PATTERN (next), 0, 0);
3939 else if (CALL_P (next)
3940 && GET_CODE (PATTERN (next)) == PARALLEL)
3941 {
3942 rtx delay = XVECEXP (PATTERN (next), 0, 1);
3943
3944 if (GET_CODE (delay) == RETURN)
3945 {
3946 /* It's a sibling call. Do not emit the nop if we're going
3947 to emit something other than the jump itself as the first
3948 instruction of the sibcall sequence. */
3949 if (sparc_leaf_function_p || TARGET_FLAT)
3950 return 0;
3951 }
3952 }
3953
3954 if (NONJUMP_INSN_P (next))
3955 return 0;
3956
3957 return 1;
3958 }
3959
3960 /* Return nonzero if TRIAL can go into the call delay slot. */
3961
3962 int
eligible_for_call_delay(rtx_insn * trial)3963 eligible_for_call_delay (rtx_insn *trial)
3964 {
3965 rtx pat;
3966
3967 if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE)
3968 return 0;
3969
3970 /* The only problematic cases are TLS sequences with Sun as/ld. */
3971 if ((TARGET_GNU_TLS && HAVE_GNU_LD) || !TARGET_TLS)
3972 return 1;
3973
3974 pat = PATTERN (trial);
3975
3976 /* We must reject tgd_add{32|64}, i.e.
3977 (set (reg) (plus (reg) (unspec [(reg) (symbol_ref)] UNSPEC_TLSGD)))
3978 and tldm_add{32|64}, i.e.
3979 (set (reg) (plus (reg) (unspec [(reg) (symbol_ref)] UNSPEC_TLSLDM)))
3980 for Sun as/ld. */
3981 if (GET_CODE (pat) == SET
3982 && GET_CODE (SET_SRC (pat)) == PLUS)
3983 {
3984 rtx unspec = XEXP (SET_SRC (pat), 1);
3985
3986 if (GET_CODE (unspec) == UNSPEC
3987 && (XINT (unspec, 1) == UNSPEC_TLSGD
3988 || XINT (unspec, 1) == UNSPEC_TLSLDM))
3989 return 0;
3990 }
3991
3992 return 1;
3993 }
3994
3995 /* Return nonzero if TRIAL, an insn, can be combined with a 'restore'
3996 instruction. RETURN_P is true if the v9 variant 'return' is to be
3997 considered in the test too.
3998
3999 TRIAL must be a SET whose destination is a REG appropriate for the
4000 'restore' instruction or, if RETURN_P is true, for the 'return'
4001 instruction. */
4002
4003 static int
eligible_for_restore_insn(rtx trial,bool return_p)4004 eligible_for_restore_insn (rtx trial, bool return_p)
4005 {
4006 rtx pat = PATTERN (trial);
4007 rtx src = SET_SRC (pat);
4008 bool src_is_freg = false;
4009 rtx src_reg;
4010
4011 /* Since we now can do moves between float and integer registers when
4012 VIS3 is enabled, we have to catch this case. We can allow such
4013 moves when doing a 'return' however. */
4014 src_reg = src;
4015 if (GET_CODE (src_reg) == SUBREG)
4016 src_reg = SUBREG_REG (src_reg);
4017 if (GET_CODE (src_reg) == REG
4018 && SPARC_FP_REG_P (REGNO (src_reg)))
4019 src_is_freg = true;
4020
4021 /* The 'restore src,%g0,dest' pattern for word mode and below. */
4022 if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT
4023 && arith_operand (src, GET_MODE (src))
4024 && ! src_is_freg)
4025 {
4026 if (TARGET_ARCH64)
4027 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
4028 else
4029 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (SImode);
4030 }
4031
4032 /* The 'restore src,%g0,dest' pattern for double-word mode. */
4033 else if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT
4034 && arith_double_operand (src, GET_MODE (src))
4035 && ! src_is_freg)
4036 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
4037
4038 /* The 'restore src,%g0,dest' pattern for float if no FPU. */
4039 else if (! TARGET_FPU && register_operand (src, SFmode))
4040 return 1;
4041
4042 /* The 'restore src,%g0,dest' pattern for double if no FPU. */
4043 else if (! TARGET_FPU && TARGET_ARCH64 && register_operand (src, DFmode))
4044 return 1;
4045
4046 /* If we have the 'return' instruction, anything that does not use
4047 local or output registers and can go into a delay slot wins. */
4048 else if (return_p && TARGET_V9 && !epilogue_renumber (&pat, 1))
4049 return 1;
4050
4051 /* The 'restore src1,src2,dest' pattern for SImode. */
4052 else if (GET_CODE (src) == PLUS
4053 && register_operand (XEXP (src, 0), SImode)
4054 && arith_operand (XEXP (src, 1), SImode))
4055 return 1;
4056
4057 /* The 'restore src1,src2,dest' pattern for DImode. */
4058 else if (GET_CODE (src) == PLUS
4059 && register_operand (XEXP (src, 0), DImode)
4060 && arith_double_operand (XEXP (src, 1), DImode))
4061 return 1;
4062
4063 /* The 'restore src1,%lo(src2),dest' pattern. */
4064 else if (GET_CODE (src) == LO_SUM
4065 && ! TARGET_CM_MEDMID
4066 && ((register_operand (XEXP (src, 0), SImode)
4067 && immediate_operand (XEXP (src, 1), SImode))
4068 || (TARGET_ARCH64
4069 && register_operand (XEXP (src, 0), DImode)
4070 && immediate_operand (XEXP (src, 1), DImode))))
4071 return 1;
4072
4073 /* The 'restore src,src,dest' pattern. */
4074 else if (GET_CODE (src) == ASHIFT
4075 && (register_operand (XEXP (src, 0), SImode)
4076 || register_operand (XEXP (src, 0), DImode))
4077 && XEXP (src, 1) == const1_rtx)
4078 return 1;
4079
4080 return 0;
4081 }
4082
4083 /* Return nonzero if TRIAL can go into the function return's delay slot. */
4084
4085 int
eligible_for_return_delay(rtx_insn * trial)4086 eligible_for_return_delay (rtx_insn *trial)
4087 {
4088 int regno;
4089 rtx pat;
4090
4091 /* If the function uses __builtin_eh_return, the eh_return machinery
4092 occupies the delay slot. */
4093 if (crtl->calls_eh_return)
4094 return 0;
4095
4096 if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE)
4097 return 0;
4098
4099 /* In the case of a leaf or flat function, anything can go into the slot. */
4100 if (sparc_leaf_function_p || TARGET_FLAT)
4101 return 1;
4102
4103 if (!NONJUMP_INSN_P (trial))
4104 return 0;
4105
4106 pat = PATTERN (trial);
4107 if (GET_CODE (pat) == PARALLEL)
4108 {
4109 int i;
4110
4111 if (! TARGET_V9)
4112 return 0;
4113 for (i = XVECLEN (pat, 0) - 1; i >= 0; i--)
4114 {
4115 rtx expr = XVECEXP (pat, 0, i);
4116 if (GET_CODE (expr) != SET)
4117 return 0;
4118 if (GET_CODE (SET_DEST (expr)) != REG)
4119 return 0;
4120 regno = REGNO (SET_DEST (expr));
4121 if (regno >= 8 && regno < 24)
4122 return 0;
4123 }
4124 return !epilogue_renumber (&pat, 1);
4125 }
4126
4127 if (GET_CODE (pat) != SET)
4128 return 0;
4129
4130 if (GET_CODE (SET_DEST (pat)) != REG)
4131 return 0;
4132
4133 regno = REGNO (SET_DEST (pat));
4134
4135 /* Otherwise, only operations which can be done in tandem with
4136 a `restore' or `return' insn can go into the delay slot. */
4137 if (regno >= 8 && regno < 24)
4138 return 0;
4139
4140 /* If this instruction sets up floating point register and we have a return
4141 instruction, it can probably go in. But restore will not work
4142 with FP_REGS. */
4143 if (! SPARC_INT_REG_P (regno))
4144 return TARGET_V9 && !epilogue_renumber (&pat, 1);
4145
4146 return eligible_for_restore_insn (trial, true);
4147 }
4148
4149 /* Return nonzero if TRIAL can go into the sibling call's delay slot. */
4150
4151 int
eligible_for_sibcall_delay(rtx_insn * trial)4152 eligible_for_sibcall_delay (rtx_insn *trial)
4153 {
4154 rtx pat;
4155
4156 if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE)
4157 return 0;
4158
4159 if (!NONJUMP_INSN_P (trial))
4160 return 0;
4161
4162 pat = PATTERN (trial);
4163
4164 if (sparc_leaf_function_p || TARGET_FLAT)
4165 {
4166 /* If the tail call is done using the call instruction,
4167 we have to restore %o7 in the delay slot. */
4168 if (LEAF_SIBCALL_SLOT_RESERVED_P)
4169 return 0;
4170
4171 /* %g1 is used to build the function address */
4172 if (reg_mentioned_p (gen_rtx_REG (Pmode, 1), pat))
4173 return 0;
4174
4175 return 1;
4176 }
4177
4178 if (GET_CODE (pat) != SET)
4179 return 0;
4180
4181 /* Otherwise, only operations which can be done in tandem with
4182 a `restore' insn can go into the delay slot. */
4183 if (GET_CODE (SET_DEST (pat)) != REG
4184 || (REGNO (SET_DEST (pat)) >= 8 && REGNO (SET_DEST (pat)) < 24)
4185 || ! SPARC_INT_REG_P (REGNO (SET_DEST (pat))))
4186 return 0;
4187
4188 /* If it mentions %o7, it can't go in, because sibcall will clobber it
4189 in most cases. */
4190 if (reg_mentioned_p (gen_rtx_REG (Pmode, 15), pat))
4191 return 0;
4192
4193 return eligible_for_restore_insn (trial, false);
4194 }
4195
4196 /* Determine if it's legal to put X into the constant pool. This
4197 is not possible if X contains the address of a symbol that is
4198 not constant (TLS) or not known at final link time (PIC). */
4199
4200 static bool
sparc_cannot_force_const_mem(machine_mode mode,rtx x)4201 sparc_cannot_force_const_mem (machine_mode mode, rtx x)
4202 {
4203 switch (GET_CODE (x))
4204 {
4205 case CONST_INT:
4206 case CONST_WIDE_INT:
4207 case CONST_DOUBLE:
4208 case CONST_VECTOR:
4209 /* Accept all non-symbolic constants. */
4210 return false;
4211
4212 case LABEL_REF:
4213 /* Labels are OK iff we are non-PIC. */
4214 return flag_pic != 0;
4215
4216 case SYMBOL_REF:
4217 /* 'Naked' TLS symbol references are never OK,
4218 non-TLS symbols are OK iff we are non-PIC. */
4219 if (SYMBOL_REF_TLS_MODEL (x))
4220 return true;
4221 else
4222 return flag_pic != 0;
4223
4224 case CONST:
4225 return sparc_cannot_force_const_mem (mode, XEXP (x, 0));
4226 case PLUS:
4227 case MINUS:
4228 return sparc_cannot_force_const_mem (mode, XEXP (x, 0))
4229 || sparc_cannot_force_const_mem (mode, XEXP (x, 1));
4230 case UNSPEC:
4231 return true;
4232 default:
4233 gcc_unreachable ();
4234 }
4235 }
4236
4237 /* Global Offset Table support. */
4238 static GTY(()) rtx got_symbol_rtx = NULL_RTX;
4239 static GTY(()) rtx got_register_rtx = NULL_RTX;
4240 static GTY(()) rtx got_helper_rtx = NULL_RTX;
4241
4242 static GTY(()) bool got_helper_needed = false;
4243
4244 /* Return the SYMBOL_REF for the Global Offset Table. */
4245
4246 static rtx
sparc_got(void)4247 sparc_got (void)
4248 {
4249 if (!got_symbol_rtx)
4250 got_symbol_rtx = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
4251
4252 return got_symbol_rtx;
4253 }
4254
4255 /* Wrapper around the load_pcrel_sym{si,di} patterns. */
4256
4257 static rtx
gen_load_pcrel_sym(rtx op0,rtx op1,rtx op2)4258 gen_load_pcrel_sym (rtx op0, rtx op1, rtx op2)
4259 {
4260 int orig_flag_pic = flag_pic;
4261 rtx insn;
4262
4263 /* The load_pcrel_sym{si,di} patterns require absolute addressing. */
4264 flag_pic = 0;
4265 if (TARGET_ARCH64)
4266 insn = gen_load_pcrel_symdi (op0, op1, op2, GEN_INT (REGNO (op0)));
4267 else
4268 insn = gen_load_pcrel_symsi (op0, op1, op2, GEN_INT (REGNO (op0)));
4269 flag_pic = orig_flag_pic;
4270
4271 return insn;
4272 }
4273
4274 /* Output the load_pcrel_sym{si,di} patterns. */
4275
4276 const char *
output_load_pcrel_sym(rtx * operands)4277 output_load_pcrel_sym (rtx *operands)
4278 {
4279 if (flag_delayed_branch)
4280 {
4281 output_asm_insn ("sethi\t%%hi(%a1-4), %0", operands);
4282 output_asm_insn ("call\t%a2", operands);
4283 output_asm_insn (" add\t%0, %%lo(%a1+4), %0", operands);
4284 }
4285 else
4286 {
4287 output_asm_insn ("sethi\t%%hi(%a1-8), %0", operands);
4288 output_asm_insn ("add\t%0, %%lo(%a1-4), %0", operands);
4289 output_asm_insn ("call\t%a2", operands);
4290 output_asm_insn (" nop", NULL);
4291 }
4292
4293 if (operands[2] == got_helper_rtx)
4294 got_helper_needed = true;
4295
4296 return "";
4297 }
4298
4299 #ifdef HAVE_GAS_HIDDEN
4300 # define USE_HIDDEN_LINKONCE 1
4301 #else
4302 # define USE_HIDDEN_LINKONCE 0
4303 #endif
4304
4305 /* Emit code to load the GOT register. */
4306
4307 void
load_got_register(void)4308 load_got_register (void)
4309 {
4310 rtx insn;
4311
4312 if (TARGET_VXWORKS_RTP)
4313 {
4314 if (!got_register_rtx)
4315 got_register_rtx = pic_offset_table_rtx;
4316
4317 insn = gen_vxworks_load_got ();
4318 }
4319 else
4320 {
4321 if (!got_register_rtx)
4322 got_register_rtx = gen_rtx_REG (Pmode, GLOBAL_OFFSET_TABLE_REGNUM);
4323
4324 /* The GOT symbol is subject to a PC-relative relocation so we need a
4325 helper function to add the PC value and thus get the final value. */
4326 if (!got_helper_rtx)
4327 {
4328 char name[32];
4329
4330 /* Skip the leading '%' as that cannot be used in a symbol name. */
4331 if (USE_HIDDEN_LINKONCE)
4332 sprintf (name, "__sparc_get_pc_thunk.%s",
4333 reg_names[REGNO (got_register_rtx)] + 1);
4334 else
4335 ASM_GENERATE_INTERNAL_LABEL (name, "LADDPC",
4336 REGNO (got_register_rtx));
4337
4338 got_helper_rtx = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
4339 }
4340
4341 insn
4342 = gen_load_pcrel_sym (got_register_rtx, sparc_got (), got_helper_rtx);
4343 }
4344
4345 emit_insn (insn);
4346 }
4347
4348 /* Ensure that we are not using patterns that are not OK with PIC. */
4349
4350 int
check_pic(int i)4351 check_pic (int i)
4352 {
4353 rtx op;
4354
4355 switch (flag_pic)
4356 {
4357 case 1:
4358 op = recog_data.operand[i];
4359 gcc_assert (GET_CODE (op) != SYMBOL_REF
4360 && (GET_CODE (op) != CONST
4361 || (GET_CODE (XEXP (op, 0)) == MINUS
4362 && XEXP (XEXP (op, 0), 0) == sparc_got ()
4363 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST)));
4364 /* fallthrough */
4365 case 2:
4366 default:
4367 return 1;
4368 }
4369 }
4370
4371 /* Return true if X is an address which needs a temporary register when
4372 reloaded while generating PIC code. */
4373
4374 int
pic_address_needs_scratch(rtx x)4375 pic_address_needs_scratch (rtx x)
4376 {
4377 /* An address which is a symbolic plus a non SMALL_INT needs a temp reg. */
4378 if (GET_CODE (x) == CONST
4379 && GET_CODE (XEXP (x, 0)) == PLUS
4380 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
4381 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
4382 && !SMALL_INT (XEXP (XEXP (x, 0), 1)))
4383 return 1;
4384
4385 return 0;
4386 }
4387
4388 /* Determine if a given RTX is a valid constant. We already know this
4389 satisfies CONSTANT_P. */
4390
4391 static bool
sparc_legitimate_constant_p(machine_mode mode,rtx x)4392 sparc_legitimate_constant_p (machine_mode mode, rtx x)
4393 {
4394 switch (GET_CODE (x))
4395 {
4396 case CONST:
4397 case SYMBOL_REF:
4398 if (sparc_tls_referenced_p (x))
4399 return false;
4400 break;
4401
4402 case CONST_DOUBLE:
4403 /* Floating point constants are generally not ok.
4404 The only exception is 0.0 and all-ones in VIS. */
4405 if (TARGET_VIS
4406 && SCALAR_FLOAT_MODE_P (mode)
4407 && (const_zero_operand (x, mode)
4408 || const_all_ones_operand (x, mode)))
4409 return true;
4410
4411 return false;
4412
4413 case CONST_VECTOR:
4414 /* Vector constants are generally not ok.
4415 The only exception is 0 or -1 in VIS. */
4416 if (TARGET_VIS
4417 && (const_zero_operand (x, mode)
4418 || const_all_ones_operand (x, mode)))
4419 return true;
4420
4421 return false;
4422
4423 default:
4424 break;
4425 }
4426
4427 return true;
4428 }
4429
4430 /* Determine if a given RTX is a valid constant address. */
4431
4432 bool
constant_address_p(rtx x)4433 constant_address_p (rtx x)
4434 {
4435 switch (GET_CODE (x))
4436 {
4437 case LABEL_REF:
4438 case CONST_INT:
4439 case HIGH:
4440 return true;
4441
4442 case CONST:
4443 if (flag_pic && pic_address_needs_scratch (x))
4444 return false;
4445 return sparc_legitimate_constant_p (Pmode, x);
4446
4447 case SYMBOL_REF:
4448 return !flag_pic && sparc_legitimate_constant_p (Pmode, x);
4449
4450 default:
4451 return false;
4452 }
4453 }
4454
4455 /* Nonzero if the constant value X is a legitimate general operand
4456 when generating PIC code. It is given that flag_pic is on and
4457 that X satisfies CONSTANT_P. */
4458
4459 bool
legitimate_pic_operand_p(rtx x)4460 legitimate_pic_operand_p (rtx x)
4461 {
4462 if (pic_address_needs_scratch (x))
4463 return false;
4464 if (sparc_tls_referenced_p (x))
4465 return false;
4466 return true;
4467 }
4468
4469 /* Return true if X is a representation of the PIC register. */
4470
4471 static bool
sparc_pic_register_p(rtx x)4472 sparc_pic_register_p (rtx x)
4473 {
4474 if (!REG_P (x) || !pic_offset_table_rtx)
4475 return false;
4476
4477 if (x == pic_offset_table_rtx)
4478 return true;
4479
4480 if (!HARD_REGISTER_P (pic_offset_table_rtx)
4481 && (HARD_REGISTER_P (x) || lra_in_progress || reload_in_progress)
4482 && ORIGINAL_REGNO (x) == REGNO (pic_offset_table_rtx))
4483 return true;
4484
4485 return false;
4486 }
4487
4488 #define RTX_OK_FOR_OFFSET_P(X, MODE) \
4489 (CONST_INT_P (X) \
4490 && INTVAL (X) >= -0x1000 \
4491 && INTVAL (X) <= (0x1000 - GET_MODE_SIZE (MODE)))
4492
4493 #define RTX_OK_FOR_OLO10_P(X, MODE) \
4494 (CONST_INT_P (X) \
4495 && INTVAL (X) >= -0x1000 \
4496 && INTVAL (X) <= (0xc00 - GET_MODE_SIZE (MODE)))
4497
4498 /* Handle the TARGET_LEGITIMATE_ADDRESS_P target hook.
4499
4500 On SPARC, the actual legitimate addresses must be REG+REG or REG+SMALLINT
4501 ordinarily. This changes a bit when generating PIC. */
4502
4503 static bool
sparc_legitimate_address_p(machine_mode mode,rtx addr,bool strict)4504 sparc_legitimate_address_p (machine_mode mode, rtx addr, bool strict)
4505 {
4506 rtx rs1 = NULL, rs2 = NULL, imm1 = NULL;
4507
4508 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
4509 rs1 = addr;
4510 else if (GET_CODE (addr) == PLUS)
4511 {
4512 rs1 = XEXP (addr, 0);
4513 rs2 = XEXP (addr, 1);
4514
4515 /* Canonicalize. REG comes first, if there are no regs,
4516 LO_SUM comes first. */
4517 if (!REG_P (rs1)
4518 && GET_CODE (rs1) != SUBREG
4519 && (REG_P (rs2)
4520 || GET_CODE (rs2) == SUBREG
4521 || (GET_CODE (rs2) == LO_SUM && GET_CODE (rs1) != LO_SUM)))
4522 {
4523 rs1 = XEXP (addr, 1);
4524 rs2 = XEXP (addr, 0);
4525 }
4526
4527 if ((flag_pic == 1
4528 && sparc_pic_register_p (rs1)
4529 && !REG_P (rs2)
4530 && GET_CODE (rs2) != SUBREG
4531 && GET_CODE (rs2) != LO_SUM
4532 && GET_CODE (rs2) != MEM
4533 && !(GET_CODE (rs2) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs2))
4534 && (! symbolic_operand (rs2, VOIDmode) || mode == Pmode)
4535 && (GET_CODE (rs2) != CONST_INT || SMALL_INT (rs2)))
4536 || ((REG_P (rs1)
4537 || GET_CODE (rs1) == SUBREG)
4538 && RTX_OK_FOR_OFFSET_P (rs2, mode)))
4539 {
4540 imm1 = rs2;
4541 rs2 = NULL;
4542 }
4543 else if ((REG_P (rs1) || GET_CODE (rs1) == SUBREG)
4544 && (REG_P (rs2) || GET_CODE (rs2) == SUBREG))
4545 {
4546 /* We prohibit REG + REG for TFmode when there are no quad move insns
4547 and we consequently need to split. We do this because REG+REG
4548 is not an offsettable address. If we get the situation in reload
4549 where source and destination of a movtf pattern are both MEMs with
4550 REG+REG address, then only one of them gets converted to an
4551 offsettable address. */
4552 if (mode == TFmode
4553 && ! (TARGET_ARCH64 && TARGET_HARD_QUAD))
4554 return 0;
4555
4556 /* Likewise for TImode, but in all cases. */
4557 if (mode == TImode)
4558 return 0;
4559
4560 /* We prohibit REG + REG on ARCH32 if not optimizing for
4561 DFmode/DImode because then mem_min_alignment is likely to be zero
4562 after reload and the forced split would lack a matching splitter
4563 pattern. */
4564 if (TARGET_ARCH32 && !optimize
4565 && (mode == DFmode || mode == DImode))
4566 return 0;
4567 }
4568 else if (USE_AS_OFFSETABLE_LO10
4569 && GET_CODE (rs1) == LO_SUM
4570 && TARGET_ARCH64
4571 && ! TARGET_CM_MEDMID
4572 && RTX_OK_FOR_OLO10_P (rs2, mode))
4573 {
4574 rs2 = NULL;
4575 imm1 = XEXP (rs1, 1);
4576 rs1 = XEXP (rs1, 0);
4577 if (!CONSTANT_P (imm1)
4578 || (GET_CODE (rs1) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs1)))
4579 return 0;
4580 }
4581 }
4582 else if (GET_CODE (addr) == LO_SUM)
4583 {
4584 rs1 = XEXP (addr, 0);
4585 imm1 = XEXP (addr, 1);
4586
4587 if (!CONSTANT_P (imm1)
4588 || (GET_CODE (rs1) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs1)))
4589 return 0;
4590
4591 /* We can't allow TFmode in 32-bit mode, because an offset greater
4592 than the alignment (8) may cause the LO_SUM to overflow. */
4593 if (mode == TFmode && TARGET_ARCH32)
4594 return 0;
4595
4596 /* During reload, accept the HIGH+LO_SUM construct generated by
4597 sparc_legitimize_reload_address. */
4598 if (reload_in_progress
4599 && GET_CODE (rs1) == HIGH
4600 && XEXP (rs1, 0) == imm1)
4601 return 1;
4602 }
4603 else if (GET_CODE (addr) == CONST_INT && SMALL_INT (addr))
4604 return 1;
4605 else
4606 return 0;
4607
4608 if (GET_CODE (rs1) == SUBREG)
4609 rs1 = SUBREG_REG (rs1);
4610 if (!REG_P (rs1))
4611 return 0;
4612
4613 if (rs2)
4614 {
4615 if (GET_CODE (rs2) == SUBREG)
4616 rs2 = SUBREG_REG (rs2);
4617 if (!REG_P (rs2))
4618 return 0;
4619 }
4620
4621 if (strict)
4622 {
4623 if (!REGNO_OK_FOR_BASE_P (REGNO (rs1))
4624 || (rs2 && !REGNO_OK_FOR_BASE_P (REGNO (rs2))))
4625 return 0;
4626 }
4627 else
4628 {
4629 if ((! SPARC_INT_REG_P (REGNO (rs1))
4630 && REGNO (rs1) != FRAME_POINTER_REGNUM
4631 && REGNO (rs1) < FIRST_PSEUDO_REGISTER)
4632 || (rs2
4633 && (! SPARC_INT_REG_P (REGNO (rs2))
4634 && REGNO (rs2) != FRAME_POINTER_REGNUM
4635 && REGNO (rs2) < FIRST_PSEUDO_REGISTER)))
4636 return 0;
4637 }
4638 return 1;
4639 }
4640
4641 /* Return the SYMBOL_REF for the tls_get_addr function. */
4642
4643 static GTY(()) rtx sparc_tls_symbol = NULL_RTX;
4644
4645 static rtx
sparc_tls_get_addr(void)4646 sparc_tls_get_addr (void)
4647 {
4648 if (!sparc_tls_symbol)
4649 sparc_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tls_get_addr");
4650
4651 return sparc_tls_symbol;
4652 }
4653
4654 /* Return the Global Offset Table to be used in TLS mode. */
4655
4656 static rtx
sparc_tls_got(void)4657 sparc_tls_got (void)
4658 {
4659 /* In PIC mode, this is just the PIC offset table. */
4660 if (flag_pic)
4661 {
4662 crtl->uses_pic_offset_table = 1;
4663 return pic_offset_table_rtx;
4664 }
4665
4666 /* In non-PIC mode, Sun as (unlike GNU as) emits PC-relative relocations for
4667 the GOT symbol with the 32-bit ABI, so we reload the GOT register. */
4668 if (TARGET_SUN_TLS && TARGET_ARCH32)
4669 {
4670 load_got_register ();
4671 return got_register_rtx;
4672 }
4673
4674 /* In all other cases, we load a new pseudo with the GOT symbol. */
4675 return copy_to_reg (sparc_got ());
4676 }
4677
4678 /* Return true if X contains a thread-local symbol. */
4679
4680 static bool
sparc_tls_referenced_p(rtx x)4681 sparc_tls_referenced_p (rtx x)
4682 {
4683 if (!TARGET_HAVE_TLS)
4684 return false;
4685
4686 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
4687 x = XEXP (XEXP (x, 0), 0);
4688
4689 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x))
4690 return true;
4691
4692 /* That's all we handle in sparc_legitimize_tls_address for now. */
4693 return false;
4694 }
4695
4696 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
4697 this (thread-local) address. */
4698
4699 static rtx
sparc_legitimize_tls_address(rtx addr)4700 sparc_legitimize_tls_address (rtx addr)
4701 {
4702 rtx temp1, temp2, temp3, ret, o0, got;
4703 rtx_insn *insn;
4704
4705 gcc_assert (can_create_pseudo_p ());
4706
4707 if (GET_CODE (addr) == SYMBOL_REF)
4708 /* Although the various sethi/or sequences generate SImode values, many of
4709 them can be transformed by the linker when relaxing and, if relaxing to
4710 local-exec, will become a sethi/xor pair, which is signed and therefore
4711 a full DImode value in 64-bit mode. Thus we must use Pmode, lest these
4712 values be spilled onto the stack in 64-bit mode. */
4713 switch (SYMBOL_REF_TLS_MODEL (addr))
4714 {
4715 case TLS_MODEL_GLOBAL_DYNAMIC:
4716 start_sequence ();
4717 temp1 = gen_reg_rtx (Pmode);
4718 temp2 = gen_reg_rtx (Pmode);
4719 ret = gen_reg_rtx (Pmode);
4720 o0 = gen_rtx_REG (Pmode, 8);
4721 got = sparc_tls_got ();
4722 if (TARGET_ARCH32)
4723 {
4724 emit_insn (gen_tgd_hi22si (temp1, addr));
4725 emit_insn (gen_tgd_lo10si (temp2, temp1, addr));
4726 emit_insn (gen_tgd_addsi (o0, got, temp2, addr));
4727 insn = emit_call_insn (gen_tgd_callsi (o0, sparc_tls_get_addr (),
4728 addr, const1_rtx));
4729 }
4730 else
4731 {
4732 emit_insn (gen_tgd_hi22di (temp1, addr));
4733 emit_insn (gen_tgd_lo10di (temp2, temp1, addr));
4734 emit_insn (gen_tgd_adddi (o0, got, temp2, addr));
4735 insn = emit_call_insn (gen_tgd_calldi (o0, sparc_tls_get_addr (),
4736 addr, const1_rtx));
4737 }
4738 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), o0);
4739 RTL_CONST_CALL_P (insn) = 1;
4740 insn = get_insns ();
4741 end_sequence ();
4742 emit_libcall_block (insn, ret, o0, addr);
4743 break;
4744
4745 case TLS_MODEL_LOCAL_DYNAMIC:
4746 start_sequence ();
4747 temp1 = gen_reg_rtx (Pmode);
4748 temp2 = gen_reg_rtx (Pmode);
4749 temp3 = gen_reg_rtx (Pmode);
4750 ret = gen_reg_rtx (Pmode);
4751 o0 = gen_rtx_REG (Pmode, 8);
4752 got = sparc_tls_got ();
4753 if (TARGET_ARCH32)
4754 {
4755 emit_insn (gen_tldm_hi22si (temp1));
4756 emit_insn (gen_tldm_lo10si (temp2, temp1));
4757 emit_insn (gen_tldm_addsi (o0, got, temp2));
4758 insn = emit_call_insn (gen_tldm_callsi (o0, sparc_tls_get_addr (),
4759 const1_rtx));
4760 }
4761 else
4762 {
4763 emit_insn (gen_tldm_hi22di (temp1));
4764 emit_insn (gen_tldm_lo10di (temp2, temp1));
4765 emit_insn (gen_tldm_adddi (o0, got, temp2));
4766 insn = emit_call_insn (gen_tldm_calldi (o0, sparc_tls_get_addr (),
4767 const1_rtx));
4768 }
4769 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), o0);
4770 RTL_CONST_CALL_P (insn) = 1;
4771 insn = get_insns ();
4772 end_sequence ();
4773 /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
4774 share the LD_BASE result with other LD model accesses. */
4775 emit_libcall_block (insn, temp3, o0,
4776 gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
4777 UNSPEC_TLSLD_BASE));
4778 temp1 = gen_reg_rtx (Pmode);
4779 temp2 = gen_reg_rtx (Pmode);
4780 if (TARGET_ARCH32)
4781 {
4782 emit_insn (gen_tldo_hix22si (temp1, addr));
4783 emit_insn (gen_tldo_lox10si (temp2, temp1, addr));
4784 emit_insn (gen_tldo_addsi (ret, temp3, temp2, addr));
4785 }
4786 else
4787 {
4788 emit_insn (gen_tldo_hix22di (temp1, addr));
4789 emit_insn (gen_tldo_lox10di (temp2, temp1, addr));
4790 emit_insn (gen_tldo_adddi (ret, temp3, temp2, addr));
4791 }
4792 break;
4793
4794 case TLS_MODEL_INITIAL_EXEC:
4795 temp1 = gen_reg_rtx (Pmode);
4796 temp2 = gen_reg_rtx (Pmode);
4797 temp3 = gen_reg_rtx (Pmode);
4798 got = sparc_tls_got ();
4799 if (TARGET_ARCH32)
4800 {
4801 emit_insn (gen_tie_hi22si (temp1, addr));
4802 emit_insn (gen_tie_lo10si (temp2, temp1, addr));
4803 emit_insn (gen_tie_ld32 (temp3, got, temp2, addr));
4804 }
4805 else
4806 {
4807 emit_insn (gen_tie_hi22di (temp1, addr));
4808 emit_insn (gen_tie_lo10di (temp2, temp1, addr));
4809 emit_insn (gen_tie_ld64 (temp3, got, temp2, addr));
4810 }
4811 if (TARGET_SUN_TLS)
4812 {
4813 ret = gen_reg_rtx (Pmode);
4814 if (TARGET_ARCH32)
4815 emit_insn (gen_tie_addsi (ret, gen_rtx_REG (Pmode, 7),
4816 temp3, addr));
4817 else
4818 emit_insn (gen_tie_adddi (ret, gen_rtx_REG (Pmode, 7),
4819 temp3, addr));
4820 }
4821 else
4822 ret = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, 7), temp3);
4823 break;
4824
4825 case TLS_MODEL_LOCAL_EXEC:
4826 temp1 = gen_reg_rtx (Pmode);
4827 temp2 = gen_reg_rtx (Pmode);
4828 if (TARGET_ARCH32)
4829 {
4830 emit_insn (gen_tle_hix22si (temp1, addr));
4831 emit_insn (gen_tle_lox10si (temp2, temp1, addr));
4832 }
4833 else
4834 {
4835 emit_insn (gen_tle_hix22di (temp1, addr));
4836 emit_insn (gen_tle_lox10di (temp2, temp1, addr));
4837 }
4838 ret = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, 7), temp2);
4839 break;
4840
4841 default:
4842 gcc_unreachable ();
4843 }
4844
4845 else if (GET_CODE (addr) == CONST)
4846 {
4847 rtx base, offset;
4848
4849 gcc_assert (GET_CODE (XEXP (addr, 0)) == PLUS);
4850
4851 base = sparc_legitimize_tls_address (XEXP (XEXP (addr, 0), 0));
4852 offset = XEXP (XEXP (addr, 0), 1);
4853
4854 base = force_operand (base, NULL_RTX);
4855 if (!(GET_CODE (offset) == CONST_INT && SMALL_INT (offset)))
4856 offset = force_reg (Pmode, offset);
4857 ret = gen_rtx_PLUS (Pmode, base, offset);
4858 }
4859
4860 else
4861 gcc_unreachable (); /* for now ... */
4862
4863 return ret;
4864 }
4865
4866 /* Legitimize PIC addresses. If the address is already position-independent,
4867 we return ORIG. Newly generated position-independent addresses go into a
4868 reg. This is REG if nonzero, otherwise we allocate register(s) as
4869 necessary. */
4870
4871 static rtx
sparc_legitimize_pic_address(rtx orig,rtx reg)4872 sparc_legitimize_pic_address (rtx orig, rtx reg)
4873 {
4874 if (GET_CODE (orig) == SYMBOL_REF
4875 /* See the comment in sparc_expand_move. */
4876 || (GET_CODE (orig) == LABEL_REF && !can_use_mov_pic_label_ref (orig)))
4877 {
4878 bool gotdata_op = false;
4879 rtx pic_ref, address;
4880 rtx_insn *insn;
4881
4882 if (!reg)
4883 {
4884 gcc_assert (can_create_pseudo_p ());
4885 reg = gen_reg_rtx (Pmode);
4886 }
4887
4888 if (flag_pic == 2)
4889 {
4890 /* If not during reload, allocate another temp reg here for loading
4891 in the address, so that these instructions can be optimized
4892 properly. */
4893 rtx temp_reg = can_create_pseudo_p () ? gen_reg_rtx (Pmode) : reg;
4894
4895 /* Must put the SYMBOL_REF inside an UNSPEC here so that cse
4896 won't get confused into thinking that these two instructions
4897 are loading in the true address of the symbol. If in the
4898 future a PIC rtx exists, that should be used instead. */
4899 if (TARGET_ARCH64)
4900 {
4901 emit_insn (gen_movdi_high_pic (temp_reg, orig));
4902 emit_insn (gen_movdi_lo_sum_pic (temp_reg, temp_reg, orig));
4903 }
4904 else
4905 {
4906 emit_insn (gen_movsi_high_pic (temp_reg, orig));
4907 emit_insn (gen_movsi_lo_sum_pic (temp_reg, temp_reg, orig));
4908 }
4909
4910 address = temp_reg;
4911 gotdata_op = true;
4912 }
4913 else
4914 address = orig;
4915
4916 crtl->uses_pic_offset_table = 1;
4917 if (gotdata_op)
4918 {
4919 if (TARGET_ARCH64)
4920 insn = emit_insn (gen_movdi_pic_gotdata_op (reg,
4921 pic_offset_table_rtx,
4922 address, orig));
4923 else
4924 insn = emit_insn (gen_movsi_pic_gotdata_op (reg,
4925 pic_offset_table_rtx,
4926 address, orig));
4927 }
4928 else
4929 {
4930 pic_ref
4931 = gen_const_mem (Pmode,
4932 gen_rtx_PLUS (Pmode,
4933 pic_offset_table_rtx, address));
4934 insn = emit_move_insn (reg, pic_ref);
4935 }
4936
4937 /* Put a REG_EQUAL note on this insn, so that it can be optimized
4938 by loop. */
4939 set_unique_reg_note (insn, REG_EQUAL, orig);
4940 return reg;
4941 }
4942 else if (GET_CODE (orig) == CONST)
4943 {
4944 rtx base, offset;
4945
4946 if (GET_CODE (XEXP (orig, 0)) == PLUS
4947 && sparc_pic_register_p (XEXP (XEXP (orig, 0), 0)))
4948 return orig;
4949
4950 if (!reg)
4951 {
4952 gcc_assert (can_create_pseudo_p ());
4953 reg = gen_reg_rtx (Pmode);
4954 }
4955
4956 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
4957 base = sparc_legitimize_pic_address (XEXP (XEXP (orig, 0), 0), reg);
4958 offset = sparc_legitimize_pic_address (XEXP (XEXP (orig, 0), 1),
4959 base == reg ? NULL_RTX : reg);
4960
4961 if (GET_CODE (offset) == CONST_INT)
4962 {
4963 if (SMALL_INT (offset))
4964 return plus_constant (Pmode, base, INTVAL (offset));
4965 else if (can_create_pseudo_p ())
4966 offset = force_reg (Pmode, offset);
4967 else
4968 /* If we reach here, then something is seriously wrong. */
4969 gcc_unreachable ();
4970 }
4971 return gen_rtx_PLUS (Pmode, base, offset);
4972 }
4973 else if (GET_CODE (orig) == LABEL_REF)
4974 /* ??? We ought to be checking that the register is live instead, in case
4975 it is eliminated. */
4976 crtl->uses_pic_offset_table = 1;
4977
4978 return orig;
4979 }
4980
4981 /* Try machine-dependent ways of modifying an illegitimate address X
4982 to be legitimate. If we find one, return the new, valid address.
4983
4984 OLDX is the address as it was before break_out_memory_refs was called.
4985 In some cases it is useful to look at this to decide what needs to be done.
4986
4987 MODE is the mode of the operand pointed to by X.
4988
4989 On SPARC, change REG+N into REG+REG, and REG+(X*Y) into REG+REG. */
4990
4991 static rtx
sparc_legitimize_address(rtx x,rtx oldx ATTRIBUTE_UNUSED,machine_mode mode)4992 sparc_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
4993 machine_mode mode)
4994 {
4995 rtx orig_x = x;
4996
4997 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == MULT)
4998 x = gen_rtx_PLUS (Pmode, XEXP (x, 1),
4999 force_operand (XEXP (x, 0), NULL_RTX));
5000 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 1)) == MULT)
5001 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
5002 force_operand (XEXP (x, 1), NULL_RTX));
5003 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS)
5004 x = gen_rtx_PLUS (Pmode, force_operand (XEXP (x, 0), NULL_RTX),
5005 XEXP (x, 1));
5006 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 1)) == PLUS)
5007 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
5008 force_operand (XEXP (x, 1), NULL_RTX));
5009
5010 if (x != orig_x && sparc_legitimate_address_p (mode, x, FALSE))
5011 return x;
5012
5013 if (sparc_tls_referenced_p (x))
5014 x = sparc_legitimize_tls_address (x);
5015 else if (flag_pic)
5016 x = sparc_legitimize_pic_address (x, NULL_RTX);
5017 else if (GET_CODE (x) == PLUS && CONSTANT_ADDRESS_P (XEXP (x, 1)))
5018 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
5019 copy_to_mode_reg (Pmode, XEXP (x, 1)));
5020 else if (GET_CODE (x) == PLUS && CONSTANT_ADDRESS_P (XEXP (x, 0)))
5021 x = gen_rtx_PLUS (Pmode, XEXP (x, 1),
5022 copy_to_mode_reg (Pmode, XEXP (x, 0)));
5023 else if (GET_CODE (x) == SYMBOL_REF
5024 || GET_CODE (x) == CONST
5025 || GET_CODE (x) == LABEL_REF)
5026 x = copy_to_suggested_reg (x, NULL_RTX, Pmode);
5027
5028 return x;
5029 }
5030
5031 /* Delegitimize an address that was legitimized by the above function. */
5032
5033 static rtx
sparc_delegitimize_address(rtx x)5034 sparc_delegitimize_address (rtx x)
5035 {
5036 x = delegitimize_mem_from_attrs (x);
5037
5038 if (GET_CODE (x) == LO_SUM && GET_CODE (XEXP (x, 1)) == UNSPEC)
5039 switch (XINT (XEXP (x, 1), 1))
5040 {
5041 case UNSPEC_MOVE_PIC:
5042 case UNSPEC_TLSLE:
5043 x = XVECEXP (XEXP (x, 1), 0, 0);
5044 gcc_assert (GET_CODE (x) == SYMBOL_REF);
5045 break;
5046 default:
5047 break;
5048 }
5049
5050 /* This is generated by mov{si,di}_pic_label_ref in PIC mode. */
5051 if (GET_CODE (x) == MINUS
5052 && sparc_pic_register_p (XEXP (x, 0))
5053 && GET_CODE (XEXP (x, 1)) == LO_SUM
5054 && GET_CODE (XEXP (XEXP (x, 1), 1)) == UNSPEC
5055 && XINT (XEXP (XEXP (x, 1), 1), 1) == UNSPEC_MOVE_PIC_LABEL)
5056 {
5057 x = XVECEXP (XEXP (XEXP (x, 1), 1), 0, 0);
5058 gcc_assert (GET_CODE (x) == LABEL_REF
5059 || (GET_CODE (x) == CONST
5060 && GET_CODE (XEXP (x, 0)) == PLUS
5061 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5062 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT));
5063 }
5064
5065 return x;
5066 }
5067
5068 /* SPARC implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
5069 replace the input X, or the original X if no replacement is called for.
5070 The output parameter *WIN is 1 if the calling macro should goto WIN,
5071 0 if it should not.
5072
5073 For SPARC, we wish to handle addresses by splitting them into
5074 HIGH+LO_SUM pairs, retaining the LO_SUM in the memory reference.
5075 This cuts the number of extra insns by one.
5076
5077 Do nothing when generating PIC code and the address is a symbolic
5078 operand or requires a scratch register. */
5079
5080 rtx
sparc_legitimize_reload_address(rtx x,machine_mode mode,int opnum,int type,int ind_levels ATTRIBUTE_UNUSED,int * win)5081 sparc_legitimize_reload_address (rtx x, machine_mode mode,
5082 int opnum, int type,
5083 int ind_levels ATTRIBUTE_UNUSED, int *win)
5084 {
5085 /* Decompose SImode constants into HIGH+LO_SUM. */
5086 if (CONSTANT_P (x)
5087 && (mode != TFmode || TARGET_ARCH64)
5088 && GET_MODE (x) == SImode
5089 && GET_CODE (x) != LO_SUM
5090 && GET_CODE (x) != HIGH
5091 && sparc_cmodel <= CM_MEDLOW
5092 && !(flag_pic
5093 && (symbolic_operand (x, Pmode) || pic_address_needs_scratch (x))))
5094 {
5095 x = gen_rtx_LO_SUM (GET_MODE (x), gen_rtx_HIGH (GET_MODE (x), x), x);
5096 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
5097 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
5098 opnum, (enum reload_type)type);
5099 *win = 1;
5100 return x;
5101 }
5102
5103 /* We have to recognize what we have already generated above. */
5104 if (GET_CODE (x) == LO_SUM && GET_CODE (XEXP (x, 0)) == HIGH)
5105 {
5106 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
5107 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
5108 opnum, (enum reload_type)type);
5109 *win = 1;
5110 return x;
5111 }
5112
5113 *win = 0;
5114 return x;
5115 }
5116
5117 /* Return true if ADDR (a legitimate address expression)
5118 has an effect that depends on the machine mode it is used for.
5119
5120 In PIC mode,
5121
5122 (mem:HI [%l7+a])
5123
5124 is not equivalent to
5125
5126 (mem:QI [%l7+a]) (mem:QI [%l7+a+1])
5127
5128 because [%l7+a+1] is interpreted as the address of (a+1). */
5129
5130
5131 static bool
sparc_mode_dependent_address_p(const_rtx addr,addr_space_t as ATTRIBUTE_UNUSED)5132 sparc_mode_dependent_address_p (const_rtx addr,
5133 addr_space_t as ATTRIBUTE_UNUSED)
5134 {
5135 if (GET_CODE (addr) == PLUS
5136 && sparc_pic_register_p (XEXP (addr, 0))
5137 && symbolic_operand (XEXP (addr, 1), VOIDmode))
5138 return true;
5139
5140 return false;
5141 }
5142
5143 /* Emit a call instruction with the pattern given by PAT. ADDR is the
5144 address of the call target. */
5145
5146 void
sparc_emit_call_insn(rtx pat,rtx addr)5147 sparc_emit_call_insn (rtx pat, rtx addr)
5148 {
5149 rtx_insn *insn;
5150
5151 insn = emit_call_insn (pat);
5152
5153 /* The PIC register is live on entry to VxWorks PIC PLT entries. */
5154 if (TARGET_VXWORKS_RTP
5155 && flag_pic
5156 && GET_CODE (addr) == SYMBOL_REF
5157 && (SYMBOL_REF_DECL (addr)
5158 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
5159 : !SYMBOL_REF_LOCAL_P (addr)))
5160 {
5161 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx);
5162 crtl->uses_pic_offset_table = 1;
5163 }
5164 }
5165
5166 /* Return 1 if RTX is a MEM which is known to be aligned to at
5167 least a DESIRED byte boundary. */
5168
5169 int
mem_min_alignment(rtx mem,int desired)5170 mem_min_alignment (rtx mem, int desired)
5171 {
5172 rtx addr, base, offset;
5173
5174 /* If it's not a MEM we can't accept it. */
5175 if (GET_CODE (mem) != MEM)
5176 return 0;
5177
5178 /* Obviously... */
5179 if (!TARGET_UNALIGNED_DOUBLES
5180 && MEM_ALIGN (mem) / BITS_PER_UNIT >= (unsigned)desired)
5181 return 1;
5182
5183 /* ??? The rest of the function predates MEM_ALIGN so
5184 there is probably a bit of redundancy. */
5185 addr = XEXP (mem, 0);
5186 base = offset = NULL_RTX;
5187 if (GET_CODE (addr) == PLUS)
5188 {
5189 if (GET_CODE (XEXP (addr, 0)) == REG)
5190 {
5191 base = XEXP (addr, 0);
5192
5193 /* What we are saying here is that if the base
5194 REG is aligned properly, the compiler will make
5195 sure any REG based index upon it will be so
5196 as well. */
5197 if (GET_CODE (XEXP (addr, 1)) == CONST_INT)
5198 offset = XEXP (addr, 1);
5199 else
5200 offset = const0_rtx;
5201 }
5202 }
5203 else if (GET_CODE (addr) == REG)
5204 {
5205 base = addr;
5206 offset = const0_rtx;
5207 }
5208
5209 if (base != NULL_RTX)
5210 {
5211 int regno = REGNO (base);
5212
5213 if (regno != HARD_FRAME_POINTER_REGNUM && regno != STACK_POINTER_REGNUM)
5214 {
5215 /* Check if the compiler has recorded some information
5216 about the alignment of the base REG. If reload has
5217 completed, we already matched with proper alignments.
5218 If not running global_alloc, reload might give us
5219 unaligned pointer to local stack though. */
5220 if (((cfun != 0
5221 && REGNO_POINTER_ALIGN (regno) >= desired * BITS_PER_UNIT)
5222 || (optimize && reload_completed))
5223 && (INTVAL (offset) & (desired - 1)) == 0)
5224 return 1;
5225 }
5226 else
5227 {
5228 if (((INTVAL (offset) - SPARC_STACK_BIAS) & (desired - 1)) == 0)
5229 return 1;
5230 }
5231 }
5232 else if (! TARGET_UNALIGNED_DOUBLES
5233 || CONSTANT_P (addr)
5234 || GET_CODE (addr) == LO_SUM)
5235 {
5236 /* Anything else we know is properly aligned unless TARGET_UNALIGNED_DOUBLES
5237 is true, in which case we can only assume that an access is aligned if
5238 it is to a constant address, or the address involves a LO_SUM. */
5239 return 1;
5240 }
5241
5242 /* An obviously unaligned address. */
5243 return 0;
5244 }
5245
5246
5247 /* Vectors to keep interesting information about registers where it can easily
5248 be got. We used to use the actual mode value as the bit number, but there
5249 are more than 32 modes now. Instead we use two tables: one indexed by
5250 hard register number, and one indexed by mode. */
5251
5252 /* The purpose of sparc_mode_class is to shrink the range of modes so that
5253 they all fit (as bit numbers) in a 32-bit word (again). Each real mode is
5254 mapped into one sparc_mode_class mode. */
5255
5256 enum sparc_mode_class {
5257 H_MODE, S_MODE, D_MODE, T_MODE, O_MODE,
5258 SF_MODE, DF_MODE, TF_MODE, OF_MODE,
5259 CC_MODE, CCFP_MODE
5260 };
5261
5262 /* Modes for single-word and smaller quantities. */
5263 #define S_MODES \
5264 ((1 << (int) H_MODE) | (1 << (int) S_MODE) | (1 << (int) SF_MODE))
5265
5266 /* Modes for double-word and smaller quantities. */
5267 #define D_MODES (S_MODES | (1 << (int) D_MODE) | (1 << (int) DF_MODE))
5268
5269 /* Modes for quad-word and smaller quantities. */
5270 #define T_MODES (D_MODES | (1 << (int) T_MODE) | (1 << (int) TF_MODE))
5271
5272 /* Modes for 8-word and smaller quantities. */
5273 #define O_MODES (T_MODES | (1 << (int) O_MODE) | (1 << (int) OF_MODE))
5274
5275 /* Modes for single-float quantities. */
5276 #define SF_MODES ((1 << (int) S_MODE) | (1 << (int) SF_MODE))
5277
5278 /* Modes for double-float and smaller quantities. */
5279 #define DF_MODES (SF_MODES | (1 << (int) D_MODE) | (1 << (int) DF_MODE))
5280
5281 /* Modes for quad-float and smaller quantities. */
5282 #define TF_MODES (DF_MODES | (1 << (int) TF_MODE))
5283
5284 /* Modes for quad-float pairs and smaller quantities. */
5285 #define OF_MODES (TF_MODES | (1 << (int) OF_MODE))
5286
5287 /* Modes for double-float only quantities. */
5288 #define DF_MODES_NO_S ((1 << (int) D_MODE) | (1 << (int) DF_MODE))
5289
5290 /* Modes for quad-float and double-float only quantities. */
5291 #define TF_MODES_NO_S (DF_MODES_NO_S | (1 << (int) TF_MODE))
5292
5293 /* Modes for quad-float pairs and double-float only quantities. */
5294 #define OF_MODES_NO_S (TF_MODES_NO_S | (1 << (int) OF_MODE))
5295
5296 /* Modes for condition codes. */
5297 #define CC_MODES (1 << (int) CC_MODE)
5298 #define CCFP_MODES (1 << (int) CCFP_MODE)
5299
5300 /* Value is 1 if register/mode pair is acceptable on sparc.
5301
5302 The funny mixture of D and T modes is because integer operations
5303 do not specially operate on tetra quantities, so non-quad-aligned
5304 registers can hold quadword quantities (except %o4 and %i4 because
5305 they cross fixed registers).
5306
5307 ??? Note that, despite the settings, non-double-aligned parameter
5308 registers can hold double-word quantities in 32-bit mode. */
5309
5310 /* This points to either the 32-bit or the 64-bit version. */
5311 static const int *hard_regno_mode_classes;
5312
5313 static const int hard_32bit_mode_classes[] = {
5314 S_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES,
5315 T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES,
5316 T_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES,
5317 T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES,
5318
5319 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
5320 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
5321 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
5322 OF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
5323
5324 /* FP regs f32 to f63. Only the even numbered registers actually exist,
5325 and none can hold SFmode/SImode values. */
5326 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5327 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5328 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5329 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, TF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5330
5331 /* %fcc[0123] */
5332 CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES,
5333
5334 /* %icc, %sfp, %gsr */
5335 CC_MODES, 0, D_MODES
5336 };
5337
5338 static const int hard_64bit_mode_classes[] = {
5339 D_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
5340 O_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
5341 T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
5342 O_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
5343
5344 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
5345 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
5346 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
5347 OF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
5348
5349 /* FP regs f32 to f63. Only the even numbered registers actually exist,
5350 and none can hold SFmode/SImode values. */
5351 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5352 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5353 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5354 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, TF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5355
5356 /* %fcc[0123] */
5357 CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES,
5358
5359 /* %icc, %sfp, %gsr */
5360 CC_MODES, 0, D_MODES
5361 };
5362
5363 static int sparc_mode_class [NUM_MACHINE_MODES];
5364
5365 enum reg_class sparc_regno_reg_class[FIRST_PSEUDO_REGISTER];
5366
5367 static void
sparc_init_modes(void)5368 sparc_init_modes (void)
5369 {
5370 int i;
5371
5372 for (i = 0; i < NUM_MACHINE_MODES; i++)
5373 {
5374 machine_mode m = (machine_mode) i;
5375 unsigned int size = GET_MODE_SIZE (m);
5376
5377 switch (GET_MODE_CLASS (m))
5378 {
5379 case MODE_INT:
5380 case MODE_PARTIAL_INT:
5381 case MODE_COMPLEX_INT:
5382 if (size < 4)
5383 sparc_mode_class[i] = 1 << (int) H_MODE;
5384 else if (size == 4)
5385 sparc_mode_class[i] = 1 << (int) S_MODE;
5386 else if (size == 8)
5387 sparc_mode_class[i] = 1 << (int) D_MODE;
5388 else if (size == 16)
5389 sparc_mode_class[i] = 1 << (int) T_MODE;
5390 else if (size == 32)
5391 sparc_mode_class[i] = 1 << (int) O_MODE;
5392 else
5393 sparc_mode_class[i] = 0;
5394 break;
5395 case MODE_VECTOR_INT:
5396 if (size == 4)
5397 sparc_mode_class[i] = 1 << (int) SF_MODE;
5398 else if (size == 8)
5399 sparc_mode_class[i] = 1 << (int) DF_MODE;
5400 else
5401 sparc_mode_class[i] = 0;
5402 break;
5403 case MODE_FLOAT:
5404 case MODE_COMPLEX_FLOAT:
5405 if (size == 4)
5406 sparc_mode_class[i] = 1 << (int) SF_MODE;
5407 else if (size == 8)
5408 sparc_mode_class[i] = 1 << (int) DF_MODE;
5409 else if (size == 16)
5410 sparc_mode_class[i] = 1 << (int) TF_MODE;
5411 else if (size == 32)
5412 sparc_mode_class[i] = 1 << (int) OF_MODE;
5413 else
5414 sparc_mode_class[i] = 0;
5415 break;
5416 case MODE_CC:
5417 if (m == CCFPmode || m == CCFPEmode)
5418 sparc_mode_class[i] = 1 << (int) CCFP_MODE;
5419 else
5420 sparc_mode_class[i] = 1 << (int) CC_MODE;
5421 break;
5422 default:
5423 sparc_mode_class[i] = 0;
5424 break;
5425 }
5426 }
5427
5428 if (TARGET_ARCH64)
5429 hard_regno_mode_classes = hard_64bit_mode_classes;
5430 else
5431 hard_regno_mode_classes = hard_32bit_mode_classes;
5432
5433 /* Initialize the array used by REGNO_REG_CLASS. */
5434 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
5435 {
5436 if (i < 16 && TARGET_V8PLUS)
5437 sparc_regno_reg_class[i] = I64_REGS;
5438 else if (i < 32 || i == FRAME_POINTER_REGNUM)
5439 sparc_regno_reg_class[i] = GENERAL_REGS;
5440 else if (i < 64)
5441 sparc_regno_reg_class[i] = FP_REGS;
5442 else if (i < 96)
5443 sparc_regno_reg_class[i] = EXTRA_FP_REGS;
5444 else if (i < 100)
5445 sparc_regno_reg_class[i] = FPCC_REGS;
5446 else
5447 sparc_regno_reg_class[i] = NO_REGS;
5448 }
5449 }
5450
5451 /* Return whether REGNO, a global or FP register, must be saved/restored. */
5452
5453 static inline bool
save_global_or_fp_reg_p(unsigned int regno,int leaf_function ATTRIBUTE_UNUSED)5454 save_global_or_fp_reg_p (unsigned int regno,
5455 int leaf_function ATTRIBUTE_UNUSED)
5456 {
5457 return !call_used_regs[regno] && df_regs_ever_live_p (regno);
5458 }
5459
5460 /* Return whether the return address register (%i7) is needed. */
5461
5462 static inline bool
return_addr_reg_needed_p(int leaf_function)5463 return_addr_reg_needed_p (int leaf_function)
5464 {
5465 /* If it is live, for example because of __builtin_return_address (0). */
5466 if (df_regs_ever_live_p (RETURN_ADDR_REGNUM))
5467 return true;
5468
5469 /* Otherwise, it is needed as save register if %o7 is clobbered. */
5470 if (!leaf_function
5471 /* Loading the GOT register clobbers %o7. */
5472 || crtl->uses_pic_offset_table
5473 || df_regs_ever_live_p (INCOMING_RETURN_ADDR_REGNUM))
5474 return true;
5475
5476 return false;
5477 }
5478
5479 /* Return whether REGNO, a local or in register, must be saved/restored. */
5480
5481 static bool
save_local_or_in_reg_p(unsigned int regno,int leaf_function)5482 save_local_or_in_reg_p (unsigned int regno, int leaf_function)
5483 {
5484 /* General case: call-saved registers live at some point. */
5485 if (!call_used_regs[regno] && df_regs_ever_live_p (regno))
5486 return true;
5487
5488 /* Frame pointer register (%fp) if needed. */
5489 if (regno == HARD_FRAME_POINTER_REGNUM && frame_pointer_needed)
5490 return true;
5491
5492 /* Return address register (%i7) if needed. */
5493 if (regno == RETURN_ADDR_REGNUM && return_addr_reg_needed_p (leaf_function))
5494 return true;
5495
5496 /* GOT register (%l7) if needed. */
5497 if (got_register_rtx && regno == REGNO (got_register_rtx))
5498 return true;
5499
5500 /* If the function accesses prior frames, the frame pointer and the return
5501 address of the previous frame must be saved on the stack. */
5502 if (crtl->accesses_prior_frames
5503 && (regno == HARD_FRAME_POINTER_REGNUM || regno == RETURN_ADDR_REGNUM))
5504 return true;
5505
5506 return false;
5507 }
5508
5509 /* Compute the frame size required by the function. This function is called
5510 during the reload pass and also by sparc_expand_prologue. */
5511
5512 HOST_WIDE_INT
sparc_compute_frame_size(HOST_WIDE_INT size,int leaf_function)5513 sparc_compute_frame_size (HOST_WIDE_INT size, int leaf_function)
5514 {
5515 HOST_WIDE_INT frame_size, apparent_frame_size;
5516 int args_size, n_global_fp_regs = 0;
5517 bool save_local_in_regs_p = false;
5518 unsigned int i;
5519
5520 /* If the function allocates dynamic stack space, the dynamic offset is
5521 computed early and contains REG_PARM_STACK_SPACE, so we need to cope. */
5522 if (leaf_function && !cfun->calls_alloca)
5523 args_size = 0;
5524 else
5525 args_size = crtl->outgoing_args_size + REG_PARM_STACK_SPACE (cfun->decl);
5526
5527 /* Calculate space needed for global registers. */
5528 if (TARGET_ARCH64)
5529 {
5530 for (i = 0; i < 8; i++)
5531 if (save_global_or_fp_reg_p (i, 0))
5532 n_global_fp_regs += 2;
5533 }
5534 else
5535 {
5536 for (i = 0; i < 8; i += 2)
5537 if (save_global_or_fp_reg_p (i, 0)
5538 || save_global_or_fp_reg_p (i + 1, 0))
5539 n_global_fp_regs += 2;
5540 }
5541
5542 /* In the flat window model, find out which local and in registers need to
5543 be saved. We don't reserve space in the current frame for them as they
5544 will be spilled into the register window save area of the caller's frame.
5545 However, as soon as we use this register window save area, we must create
5546 that of the current frame to make it the live one. */
5547 if (TARGET_FLAT)
5548 for (i = 16; i < 32; i++)
5549 if (save_local_or_in_reg_p (i, leaf_function))
5550 {
5551 save_local_in_regs_p = true;
5552 break;
5553 }
5554
5555 /* Calculate space needed for FP registers. */
5556 for (i = 32; i < (TARGET_V9 ? 96 : 64); i += 2)
5557 if (save_global_or_fp_reg_p (i, 0) || save_global_or_fp_reg_p (i + 1, 0))
5558 n_global_fp_regs += 2;
5559
5560 if (size == 0
5561 && n_global_fp_regs == 0
5562 && args_size == 0
5563 && !save_local_in_regs_p)
5564 frame_size = apparent_frame_size = 0;
5565 else
5566 {
5567 /* Start from the apparent frame size. */
5568 apparent_frame_size = ROUND_UP (size, 8) + n_global_fp_regs * 4;
5569
5570 /* We need to add the size of the outgoing argument area. */
5571 frame_size = apparent_frame_size + ROUND_UP (args_size, 8);
5572
5573 /* And that of the register window save area. */
5574 frame_size += FIRST_PARM_OFFSET (cfun->decl);
5575
5576 /* Finally, bump to the appropriate alignment. */
5577 frame_size = SPARC_STACK_ALIGN (frame_size);
5578 }
5579
5580 /* Set up values for use in prologue and epilogue. */
5581 sparc_frame_size = frame_size;
5582 sparc_apparent_frame_size = apparent_frame_size;
5583 sparc_n_global_fp_regs = n_global_fp_regs;
5584 sparc_save_local_in_regs_p = save_local_in_regs_p;
5585
5586 return frame_size;
5587 }
5588
5589 /* Implement the macro INITIAL_ELIMINATION_OFFSET, return the OFFSET. */
5590
5591 int
sparc_initial_elimination_offset(int to)5592 sparc_initial_elimination_offset (int to)
5593 {
5594 int offset;
5595
5596 if (to == STACK_POINTER_REGNUM)
5597 offset = sparc_compute_frame_size (get_frame_size (), crtl->is_leaf);
5598 else
5599 offset = 0;
5600
5601 offset += SPARC_STACK_BIAS;
5602 return offset;
5603 }
5604
5605 /* Output any necessary .register pseudo-ops. */
5606
5607 void
sparc_output_scratch_registers(FILE * file ATTRIBUTE_UNUSED)5608 sparc_output_scratch_registers (FILE *file ATTRIBUTE_UNUSED)
5609 {
5610 #ifdef HAVE_AS_REGISTER_PSEUDO_OP
5611 int i;
5612
5613 if (TARGET_ARCH32)
5614 return;
5615
5616 /* Check if %g[2367] were used without
5617 .register being printed for them already. */
5618 for (i = 2; i < 8; i++)
5619 {
5620 if (df_regs_ever_live_p (i)
5621 && ! sparc_hard_reg_printed [i])
5622 {
5623 sparc_hard_reg_printed [i] = 1;
5624 /* %g7 is used as TLS base register, use #ignore
5625 for it instead of #scratch. */
5626 fprintf (file, "\t.register\t%%g%d, #%s\n", i,
5627 i == 7 ? "ignore" : "scratch");
5628 }
5629 if (i == 3) i = 5;
5630 }
5631 #endif
5632 }
5633
5634 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
5635
5636 #if PROBE_INTERVAL > 4096
5637 #error Cannot use indexed addressing mode for stack probing
5638 #endif
5639
5640 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
5641 inclusive. These are offsets from the current stack pointer.
5642
5643 Note that we don't use the REG+REG addressing mode for the probes because
5644 of the stack bias in 64-bit mode. And it doesn't really buy us anything
5645 so the advantages of having a single code win here. */
5646
5647 static void
sparc_emit_probe_stack_range(HOST_WIDE_INT first,HOST_WIDE_INT size)5648 sparc_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
5649 {
5650 rtx g1 = gen_rtx_REG (Pmode, 1);
5651
5652 /* See if we have a constant small number of probes to generate. If so,
5653 that's the easy case. */
5654 if (size <= PROBE_INTERVAL)
5655 {
5656 emit_move_insn (g1, GEN_INT (first));
5657 emit_insn (gen_rtx_SET (g1,
5658 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
5659 emit_stack_probe (plus_constant (Pmode, g1, -size));
5660 }
5661
5662 /* The run-time loop is made up of 9 insns in the generic case while the
5663 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */
5664 else if (size <= 4 * PROBE_INTERVAL)
5665 {
5666 HOST_WIDE_INT i;
5667
5668 emit_move_insn (g1, GEN_INT (first + PROBE_INTERVAL));
5669 emit_insn (gen_rtx_SET (g1,
5670 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
5671 emit_stack_probe (g1);
5672
5673 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
5674 it exceeds SIZE. If only two probes are needed, this will not
5675 generate any code. Then probe at FIRST + SIZE. */
5676 for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
5677 {
5678 emit_insn (gen_rtx_SET (g1,
5679 plus_constant (Pmode, g1, -PROBE_INTERVAL)));
5680 emit_stack_probe (g1);
5681 }
5682
5683 emit_stack_probe (plus_constant (Pmode, g1,
5684 (i - PROBE_INTERVAL) - size));
5685 }
5686
5687 /* Otherwise, do the same as above, but in a loop. Note that we must be
5688 extra careful with variables wrapping around because we might be at
5689 the very top (or the very bottom) of the address space and we have
5690 to be able to handle this case properly; in particular, we use an
5691 equality test for the loop condition. */
5692 else
5693 {
5694 HOST_WIDE_INT rounded_size;
5695 rtx g4 = gen_rtx_REG (Pmode, 4);
5696
5697 emit_move_insn (g1, GEN_INT (first));
5698
5699
5700 /* Step 1: round SIZE to the previous multiple of the interval. */
5701
5702 rounded_size = ROUND_DOWN (size, PROBE_INTERVAL);
5703 emit_move_insn (g4, GEN_INT (rounded_size));
5704
5705
5706 /* Step 2: compute initial and final value of the loop counter. */
5707
5708 /* TEST_ADDR = SP + FIRST. */
5709 emit_insn (gen_rtx_SET (g1,
5710 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
5711
5712 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
5713 emit_insn (gen_rtx_SET (g4, gen_rtx_MINUS (Pmode, g1, g4)));
5714
5715
5716 /* Step 3: the loop
5717
5718 while (TEST_ADDR != LAST_ADDR)
5719 {
5720 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
5721 probe at TEST_ADDR
5722 }
5723
5724 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
5725 until it is equal to ROUNDED_SIZE. */
5726
5727 if (TARGET_ARCH64)
5728 emit_insn (gen_probe_stack_rangedi (g1, g1, g4));
5729 else
5730 emit_insn (gen_probe_stack_rangesi (g1, g1, g4));
5731
5732
5733 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
5734 that SIZE is equal to ROUNDED_SIZE. */
5735
5736 if (size != rounded_size)
5737 emit_stack_probe (plus_constant (Pmode, g4, rounded_size - size));
5738 }
5739
5740 /* Make sure nothing is scheduled before we are done. */
5741 emit_insn (gen_blockage ());
5742 }
5743
5744 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
5745 absolute addresses. */
5746
5747 const char *
output_probe_stack_range(rtx reg1,rtx reg2)5748 output_probe_stack_range (rtx reg1, rtx reg2)
5749 {
5750 static int labelno = 0;
5751 char loop_lab[32];
5752 rtx xops[2];
5753
5754 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
5755
5756 /* Loop. */
5757 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
5758
5759 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
5760 xops[0] = reg1;
5761 xops[1] = GEN_INT (-PROBE_INTERVAL);
5762 output_asm_insn ("add\t%0, %1, %0", xops);
5763
5764 /* Test if TEST_ADDR == LAST_ADDR. */
5765 xops[1] = reg2;
5766 output_asm_insn ("cmp\t%0, %1", xops);
5767
5768 /* Probe at TEST_ADDR and branch. */
5769 if (TARGET_ARCH64)
5770 fputs ("\tbne,pt\t%xcc,", asm_out_file);
5771 else
5772 fputs ("\tbne\t", asm_out_file);
5773 assemble_name_raw (asm_out_file, loop_lab);
5774 fputc ('\n', asm_out_file);
5775 xops[1] = GEN_INT (SPARC_STACK_BIAS);
5776 output_asm_insn (" st\t%%g0, [%0+%1]", xops);
5777
5778 return "";
5779 }
5780
5781 /* Emit code to save/restore registers from LOW to HIGH at BASE+OFFSET as
5782 needed. LOW is supposed to be double-word aligned for 32-bit registers.
5783 SAVE_P decides whether a register must be saved/restored. ACTION_TRUE
5784 is the action to be performed if SAVE_P returns true and ACTION_FALSE
5785 the action to be performed if it returns false. Return the new offset. */
5786
5787 typedef bool (*sorr_pred_t) (unsigned int, int);
5788 typedef enum { SORR_NONE, SORR_ADVANCE, SORR_SAVE, SORR_RESTORE } sorr_act_t;
5789
5790 static int
emit_save_or_restore_regs(unsigned int low,unsigned int high,rtx base,int offset,int leaf_function,sorr_pred_t save_p,sorr_act_t action_true,sorr_act_t action_false)5791 emit_save_or_restore_regs (unsigned int low, unsigned int high, rtx base,
5792 int offset, int leaf_function, sorr_pred_t save_p,
5793 sorr_act_t action_true, sorr_act_t action_false)
5794 {
5795 unsigned int i;
5796 rtx mem;
5797 rtx_insn *insn;
5798
5799 if (TARGET_ARCH64 && high <= 32)
5800 {
5801 int fp_offset = -1;
5802
5803 for (i = low; i < high; i++)
5804 {
5805 if (save_p (i, leaf_function))
5806 {
5807 mem = gen_frame_mem (DImode, plus_constant (Pmode,
5808 base, offset));
5809 if (action_true == SORR_SAVE)
5810 {
5811 insn = emit_move_insn (mem, gen_rtx_REG (DImode, i));
5812 RTX_FRAME_RELATED_P (insn) = 1;
5813 }
5814 else /* action_true == SORR_RESTORE */
5815 {
5816 /* The frame pointer must be restored last since its old
5817 value may be used as base address for the frame. This
5818 is problematic in 64-bit mode only because of the lack
5819 of double-word load instruction. */
5820 if (i == HARD_FRAME_POINTER_REGNUM)
5821 fp_offset = offset;
5822 else
5823 emit_move_insn (gen_rtx_REG (DImode, i), mem);
5824 }
5825 offset += 8;
5826 }
5827 else if (action_false == SORR_ADVANCE)
5828 offset += 8;
5829 }
5830
5831 if (fp_offset >= 0)
5832 {
5833 mem = gen_frame_mem (DImode, plus_constant (Pmode, base, fp_offset));
5834 emit_move_insn (hard_frame_pointer_rtx, mem);
5835 }
5836 }
5837 else
5838 {
5839 for (i = low; i < high; i += 2)
5840 {
5841 bool reg0 = save_p (i, leaf_function);
5842 bool reg1 = save_p (i + 1, leaf_function);
5843 machine_mode mode;
5844 int regno;
5845
5846 if (reg0 && reg1)
5847 {
5848 mode = SPARC_INT_REG_P (i) ? E_DImode : E_DFmode;
5849 regno = i;
5850 }
5851 else if (reg0)
5852 {
5853 mode = SPARC_INT_REG_P (i) ? E_SImode : E_SFmode;
5854 regno = i;
5855 }
5856 else if (reg1)
5857 {
5858 mode = SPARC_INT_REG_P (i) ? E_SImode : E_SFmode;
5859 regno = i + 1;
5860 offset += 4;
5861 }
5862 else
5863 {
5864 if (action_false == SORR_ADVANCE)
5865 offset += 8;
5866 continue;
5867 }
5868
5869 mem = gen_frame_mem (mode, plus_constant (Pmode, base, offset));
5870 if (action_true == SORR_SAVE)
5871 {
5872 insn = emit_move_insn (mem, gen_rtx_REG (mode, regno));
5873 RTX_FRAME_RELATED_P (insn) = 1;
5874 if (mode == DImode)
5875 {
5876 rtx set1, set2;
5877 mem = gen_frame_mem (SImode, plus_constant (Pmode, base,
5878 offset));
5879 set1 = gen_rtx_SET (mem, gen_rtx_REG (SImode, regno));
5880 RTX_FRAME_RELATED_P (set1) = 1;
5881 mem
5882 = gen_frame_mem (SImode, plus_constant (Pmode, base,
5883 offset + 4));
5884 set2 = gen_rtx_SET (mem, gen_rtx_REG (SImode, regno + 1));
5885 RTX_FRAME_RELATED_P (set2) = 1;
5886 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
5887 gen_rtx_PARALLEL (VOIDmode,
5888 gen_rtvec (2, set1, set2)));
5889 }
5890 }
5891 else /* action_true == SORR_RESTORE */
5892 emit_move_insn (gen_rtx_REG (mode, regno), mem);
5893
5894 /* Bump and round down to double word
5895 in case we already bumped by 4. */
5896 offset = ROUND_DOWN (offset + 8, 8);
5897 }
5898 }
5899
5900 return offset;
5901 }
5902
5903 /* Emit code to adjust BASE to OFFSET. Return the new base. */
5904
5905 static rtx
emit_adjust_base_to_offset(rtx base,int offset)5906 emit_adjust_base_to_offset (rtx base, int offset)
5907 {
5908 /* ??? This might be optimized a little as %g1 might already have a
5909 value close enough that a single add insn will do. */
5910 /* ??? Although, all of this is probably only a temporary fix because
5911 if %g1 can hold a function result, then sparc_expand_epilogue will
5912 lose (the result will be clobbered). */
5913 rtx new_base = gen_rtx_REG (Pmode, 1);
5914 emit_move_insn (new_base, GEN_INT (offset));
5915 emit_insn (gen_rtx_SET (new_base, gen_rtx_PLUS (Pmode, base, new_base)));
5916 return new_base;
5917 }
5918
5919 /* Emit code to save/restore call-saved global and FP registers. */
5920
5921 static void
emit_save_or_restore_global_fp_regs(rtx base,int offset,sorr_act_t action)5922 emit_save_or_restore_global_fp_regs (rtx base, int offset, sorr_act_t action)
5923 {
5924 if (offset < -4096 || offset + sparc_n_global_fp_regs * 4 > 4095)
5925 {
5926 base = emit_adjust_base_to_offset (base, offset);
5927 offset = 0;
5928 }
5929
5930 offset
5931 = emit_save_or_restore_regs (0, 8, base, offset, 0,
5932 save_global_or_fp_reg_p, action, SORR_NONE);
5933 emit_save_or_restore_regs (32, TARGET_V9 ? 96 : 64, base, offset, 0,
5934 save_global_or_fp_reg_p, action, SORR_NONE);
5935 }
5936
5937 /* Emit code to save/restore call-saved local and in registers. */
5938
5939 static void
emit_save_or_restore_local_in_regs(rtx base,int offset,sorr_act_t action)5940 emit_save_or_restore_local_in_regs (rtx base, int offset, sorr_act_t action)
5941 {
5942 if (offset < -4096 || offset + 16 * UNITS_PER_WORD > 4095)
5943 {
5944 base = emit_adjust_base_to_offset (base, offset);
5945 offset = 0;
5946 }
5947
5948 emit_save_or_restore_regs (16, 32, base, offset, sparc_leaf_function_p,
5949 save_local_or_in_reg_p, action, SORR_ADVANCE);
5950 }
5951
5952 /* Emit a window_save insn. */
5953
5954 static rtx_insn *
emit_window_save(rtx increment)5955 emit_window_save (rtx increment)
5956 {
5957 rtx_insn *insn = emit_insn (gen_window_save (increment));
5958 RTX_FRAME_RELATED_P (insn) = 1;
5959
5960 /* The incoming return address (%o7) is saved in %i7. */
5961 add_reg_note (insn, REG_CFA_REGISTER,
5962 gen_rtx_SET (gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM),
5963 gen_rtx_REG (Pmode,
5964 INCOMING_RETURN_ADDR_REGNUM)));
5965
5966 /* The window save event. */
5967 add_reg_note (insn, REG_CFA_WINDOW_SAVE, const0_rtx);
5968
5969 /* The CFA is %fp, the hard frame pointer. */
5970 add_reg_note (insn, REG_CFA_DEF_CFA,
5971 plus_constant (Pmode, hard_frame_pointer_rtx,
5972 INCOMING_FRAME_SP_OFFSET));
5973
5974 return insn;
5975 }
5976
5977 /* Generate an increment for the stack pointer. */
5978
5979 static rtx
gen_stack_pointer_inc(rtx increment)5980 gen_stack_pointer_inc (rtx increment)
5981 {
5982 return gen_rtx_SET (stack_pointer_rtx,
5983 gen_rtx_PLUS (Pmode,
5984 stack_pointer_rtx,
5985 increment));
5986 }
5987
5988 /* Expand the function prologue. The prologue is responsible for reserving
5989 storage for the frame, saving the call-saved registers and loading the
5990 GOT register if needed. */
5991
5992 void
sparc_expand_prologue(void)5993 sparc_expand_prologue (void)
5994 {
5995 HOST_WIDE_INT size;
5996 rtx_insn *insn;
5997
5998 /* Compute a snapshot of crtl->uses_only_leaf_regs. Relying
5999 on the final value of the flag means deferring the prologue/epilogue
6000 expansion until just before the second scheduling pass, which is too
6001 late to emit multiple epilogues or return insns.
6002
6003 Of course we are making the assumption that the value of the flag
6004 will not change between now and its final value. Of the three parts
6005 of the formula, only the last one can reasonably vary. Let's take a
6006 closer look, after assuming that the first two ones are set to true
6007 (otherwise the last value is effectively silenced).
6008
6009 If only_leaf_regs_used returns false, the global predicate will also
6010 be false so the actual frame size calculated below will be positive.
6011 As a consequence, the save_register_window insn will be emitted in
6012 the instruction stream; now this insn explicitly references %fp
6013 which is not a leaf register so only_leaf_regs_used will always
6014 return false subsequently.
6015
6016 If only_leaf_regs_used returns true, we hope that the subsequent
6017 optimization passes won't cause non-leaf registers to pop up. For
6018 example, the regrename pass has special provisions to not rename to
6019 non-leaf registers in a leaf function. */
6020 sparc_leaf_function_p
6021 = optimize > 0 && crtl->is_leaf && only_leaf_regs_used ();
6022
6023 size = sparc_compute_frame_size (get_frame_size(), sparc_leaf_function_p);
6024
6025 if (flag_stack_usage_info)
6026 current_function_static_stack_size = size;
6027
6028 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
6029 || flag_stack_clash_protection)
6030 {
6031 if (crtl->is_leaf && !cfun->calls_alloca)
6032 {
6033 if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
6034 sparc_emit_probe_stack_range (get_stack_check_protect (),
6035 size - get_stack_check_protect ());
6036 }
6037 else if (size > 0)
6038 sparc_emit_probe_stack_range (get_stack_check_protect (), size);
6039 }
6040
6041 if (size == 0)
6042 ; /* do nothing. */
6043 else if (sparc_leaf_function_p)
6044 {
6045 rtx size_int_rtx = GEN_INT (-size);
6046
6047 if (size <= 4096)
6048 insn = emit_insn (gen_stack_pointer_inc (size_int_rtx));
6049 else if (size <= 8192)
6050 {
6051 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (-4096)));
6052 RTX_FRAME_RELATED_P (insn) = 1;
6053
6054 /* %sp is still the CFA register. */
6055 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
6056 }
6057 else
6058 {
6059 rtx size_rtx = gen_rtx_REG (Pmode, 1);
6060 emit_move_insn (size_rtx, size_int_rtx);
6061 insn = emit_insn (gen_stack_pointer_inc (size_rtx));
6062 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
6063 gen_stack_pointer_inc (size_int_rtx));
6064 }
6065
6066 RTX_FRAME_RELATED_P (insn) = 1;
6067 }
6068 else
6069 {
6070 rtx size_int_rtx = GEN_INT (-size);
6071
6072 if (size <= 4096)
6073 emit_window_save (size_int_rtx);
6074 else if (size <= 8192)
6075 {
6076 emit_window_save (GEN_INT (-4096));
6077
6078 /* %sp is not the CFA register anymore. */
6079 emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
6080
6081 /* Make sure no %fp-based store is issued until after the frame is
6082 established. The offset between the frame pointer and the stack
6083 pointer is calculated relative to the value of the stack pointer
6084 at the end of the function prologue, and moving instructions that
6085 access the stack via the frame pointer between the instructions
6086 that decrement the stack pointer could result in accessing the
6087 register window save area, which is volatile. */
6088 emit_insn (gen_frame_blockage ());
6089 }
6090 else
6091 {
6092 rtx size_rtx = gen_rtx_REG (Pmode, 1);
6093 emit_move_insn (size_rtx, size_int_rtx);
6094 emit_window_save (size_rtx);
6095 }
6096 }
6097
6098 if (sparc_leaf_function_p)
6099 {
6100 sparc_frame_base_reg = stack_pointer_rtx;
6101 sparc_frame_base_offset = size + SPARC_STACK_BIAS;
6102 }
6103 else
6104 {
6105 sparc_frame_base_reg = hard_frame_pointer_rtx;
6106 sparc_frame_base_offset = SPARC_STACK_BIAS;
6107 }
6108
6109 if (sparc_n_global_fp_regs > 0)
6110 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
6111 sparc_frame_base_offset
6112 - sparc_apparent_frame_size,
6113 SORR_SAVE);
6114
6115 /* Advertise that the data calculated just above are now valid. */
6116 sparc_prologue_data_valid_p = true;
6117 }
6118
6119 /* Expand the function prologue. The prologue is responsible for reserving
6120 storage for the frame, saving the call-saved registers and loading the
6121 GOT register if needed. */
6122
6123 void
sparc_flat_expand_prologue(void)6124 sparc_flat_expand_prologue (void)
6125 {
6126 HOST_WIDE_INT size;
6127 rtx_insn *insn;
6128
6129 sparc_leaf_function_p = optimize > 0 && crtl->is_leaf;
6130
6131 size = sparc_compute_frame_size (get_frame_size(), sparc_leaf_function_p);
6132
6133 if (flag_stack_usage_info)
6134 current_function_static_stack_size = size;
6135
6136 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
6137 || flag_stack_clash_protection)
6138 {
6139 if (crtl->is_leaf && !cfun->calls_alloca)
6140 {
6141 if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
6142 sparc_emit_probe_stack_range (get_stack_check_protect (),
6143 size - get_stack_check_protect ());
6144 }
6145 else if (size > 0)
6146 sparc_emit_probe_stack_range (get_stack_check_protect (), size);
6147 }
6148
6149 if (sparc_save_local_in_regs_p)
6150 emit_save_or_restore_local_in_regs (stack_pointer_rtx, SPARC_STACK_BIAS,
6151 SORR_SAVE);
6152
6153 if (size == 0)
6154 ; /* do nothing. */
6155 else
6156 {
6157 rtx size_int_rtx, size_rtx;
6158
6159 size_rtx = size_int_rtx = GEN_INT (-size);
6160
6161 /* We establish the frame (i.e. decrement the stack pointer) first, even
6162 if we use a frame pointer, because we cannot clobber any call-saved
6163 registers, including the frame pointer, if we haven't created a new
6164 register save area, for the sake of compatibility with the ABI. */
6165 if (size <= 4096)
6166 insn = emit_insn (gen_stack_pointer_inc (size_int_rtx));
6167 else if (size <= 8192 && !frame_pointer_needed)
6168 {
6169 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (-4096)));
6170 RTX_FRAME_RELATED_P (insn) = 1;
6171 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
6172 }
6173 else
6174 {
6175 size_rtx = gen_rtx_REG (Pmode, 1);
6176 emit_move_insn (size_rtx, size_int_rtx);
6177 insn = emit_insn (gen_stack_pointer_inc (size_rtx));
6178 add_reg_note (insn, REG_CFA_ADJUST_CFA,
6179 gen_stack_pointer_inc (size_int_rtx));
6180 }
6181 RTX_FRAME_RELATED_P (insn) = 1;
6182
6183 /* Ensure nothing is scheduled until after the frame is established. */
6184 emit_insn (gen_blockage ());
6185
6186 if (frame_pointer_needed)
6187 {
6188 insn = emit_insn (gen_rtx_SET (hard_frame_pointer_rtx,
6189 gen_rtx_MINUS (Pmode,
6190 stack_pointer_rtx,
6191 size_rtx)));
6192 RTX_FRAME_RELATED_P (insn) = 1;
6193
6194 add_reg_note (insn, REG_CFA_ADJUST_CFA,
6195 gen_rtx_SET (hard_frame_pointer_rtx,
6196 plus_constant (Pmode, stack_pointer_rtx,
6197 size)));
6198 }
6199
6200 if (return_addr_reg_needed_p (sparc_leaf_function_p))
6201 {
6202 rtx o7 = gen_rtx_REG (Pmode, INCOMING_RETURN_ADDR_REGNUM);
6203 rtx i7 = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
6204
6205 insn = emit_move_insn (i7, o7);
6206 RTX_FRAME_RELATED_P (insn) = 1;
6207
6208 add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (i7, o7));
6209
6210 /* Prevent this instruction from ever being considered dead,
6211 even if this function has no epilogue. */
6212 emit_use (i7);
6213 }
6214 }
6215
6216 if (frame_pointer_needed)
6217 {
6218 sparc_frame_base_reg = hard_frame_pointer_rtx;
6219 sparc_frame_base_offset = SPARC_STACK_BIAS;
6220 }
6221 else
6222 {
6223 sparc_frame_base_reg = stack_pointer_rtx;
6224 sparc_frame_base_offset = size + SPARC_STACK_BIAS;
6225 }
6226
6227 if (sparc_n_global_fp_regs > 0)
6228 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
6229 sparc_frame_base_offset
6230 - sparc_apparent_frame_size,
6231 SORR_SAVE);
6232
6233 /* Advertise that the data calculated just above are now valid. */
6234 sparc_prologue_data_valid_p = true;
6235 }
6236
6237 /* This function generates the assembly code for function entry, which boils
6238 down to emitting the necessary .register directives. */
6239
6240 static void
sparc_asm_function_prologue(FILE * file)6241 sparc_asm_function_prologue (FILE *file)
6242 {
6243 /* Check that the assumption we made in sparc_expand_prologue is valid. */
6244 if (!TARGET_FLAT)
6245 gcc_assert (sparc_leaf_function_p == crtl->uses_only_leaf_regs);
6246
6247 sparc_output_scratch_registers (file);
6248 }
6249
6250 /* Expand the function epilogue, either normal or part of a sibcall.
6251 We emit all the instructions except the return or the call. */
6252
6253 void
sparc_expand_epilogue(bool for_eh)6254 sparc_expand_epilogue (bool for_eh)
6255 {
6256 HOST_WIDE_INT size = sparc_frame_size;
6257
6258 if (cfun->calls_alloca)
6259 emit_insn (gen_frame_blockage ());
6260
6261 if (sparc_n_global_fp_regs > 0)
6262 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
6263 sparc_frame_base_offset
6264 - sparc_apparent_frame_size,
6265 SORR_RESTORE);
6266
6267 if (size == 0 || for_eh)
6268 ; /* do nothing. */
6269 else if (sparc_leaf_function_p)
6270 {
6271 if (size <= 4096)
6272 emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
6273 else if (size <= 8192)
6274 {
6275 emit_insn (gen_stack_pointer_inc (GEN_INT (4096)));
6276 emit_insn (gen_stack_pointer_inc (GEN_INT (size - 4096)));
6277 }
6278 else
6279 {
6280 rtx reg = gen_rtx_REG (Pmode, 1);
6281 emit_move_insn (reg, GEN_INT (size));
6282 emit_insn (gen_stack_pointer_inc (reg));
6283 }
6284 }
6285 }
6286
6287 /* Expand the function epilogue, either normal or part of a sibcall.
6288 We emit all the instructions except the return or the call. */
6289
6290 void
sparc_flat_expand_epilogue(bool for_eh)6291 sparc_flat_expand_epilogue (bool for_eh)
6292 {
6293 HOST_WIDE_INT size = sparc_frame_size;
6294
6295 if (sparc_n_global_fp_regs > 0)
6296 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
6297 sparc_frame_base_offset
6298 - sparc_apparent_frame_size,
6299 SORR_RESTORE);
6300
6301 /* If we have a frame pointer, we'll need both to restore it before the
6302 frame is destroyed and use its current value in destroying the frame.
6303 Since we don't have an atomic way to do that in the flat window model,
6304 we save the current value into a temporary register (%g1). */
6305 if (frame_pointer_needed && !for_eh)
6306 emit_move_insn (gen_rtx_REG (Pmode, 1), hard_frame_pointer_rtx);
6307
6308 if (return_addr_reg_needed_p (sparc_leaf_function_p))
6309 emit_move_insn (gen_rtx_REG (Pmode, INCOMING_RETURN_ADDR_REGNUM),
6310 gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM));
6311
6312 if (sparc_save_local_in_regs_p)
6313 emit_save_or_restore_local_in_regs (sparc_frame_base_reg,
6314 sparc_frame_base_offset,
6315 SORR_RESTORE);
6316
6317 if (size == 0 || for_eh)
6318 ; /* do nothing. */
6319 else if (frame_pointer_needed)
6320 {
6321 /* Make sure the frame is destroyed after everything else is done. */
6322 emit_insn (gen_blockage ());
6323
6324 emit_move_insn (stack_pointer_rtx, gen_rtx_REG (Pmode, 1));
6325 }
6326 else
6327 {
6328 /* Likewise. */
6329 emit_insn (gen_blockage ());
6330
6331 if (size <= 4096)
6332 emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
6333 else if (size <= 8192)
6334 {
6335 emit_insn (gen_stack_pointer_inc (GEN_INT (4096)));
6336 emit_insn (gen_stack_pointer_inc (GEN_INT (size - 4096)));
6337 }
6338 else
6339 {
6340 rtx reg = gen_rtx_REG (Pmode, 1);
6341 emit_move_insn (reg, GEN_INT (size));
6342 emit_insn (gen_stack_pointer_inc (reg));
6343 }
6344 }
6345 }
6346
6347 /* Return true if it is appropriate to emit `return' instructions in the
6348 body of a function. */
6349
6350 bool
sparc_can_use_return_insn_p(void)6351 sparc_can_use_return_insn_p (void)
6352 {
6353 return sparc_prologue_data_valid_p
6354 && sparc_n_global_fp_regs == 0
6355 && TARGET_FLAT
6356 ? (sparc_frame_size == 0 && !sparc_save_local_in_regs_p)
6357 : (sparc_frame_size == 0 || !sparc_leaf_function_p);
6358 }
6359
6360 /* This function generates the assembly code for function exit. */
6361
6362 static void
sparc_asm_function_epilogue(FILE * file)6363 sparc_asm_function_epilogue (FILE *file)
6364 {
6365 /* If the last two instructions of a function are "call foo; dslot;"
6366 the return address might point to the first instruction in the next
6367 function and we have to output a dummy nop for the sake of sane
6368 backtraces in such cases. This is pointless for sibling calls since
6369 the return address is explicitly adjusted. */
6370
6371 rtx_insn *insn = get_last_insn ();
6372
6373 rtx last_real_insn = prev_real_insn (insn);
6374 if (last_real_insn
6375 && NONJUMP_INSN_P (last_real_insn)
6376 && GET_CODE (PATTERN (last_real_insn)) == SEQUENCE)
6377 last_real_insn = XVECEXP (PATTERN (last_real_insn), 0, 0);
6378
6379 if (last_real_insn
6380 && CALL_P (last_real_insn)
6381 && !SIBLING_CALL_P (last_real_insn))
6382 fputs("\tnop\n", file);
6383
6384 sparc_output_deferred_case_vectors ();
6385 }
6386
6387 /* Output a 'restore' instruction. */
6388
6389 static void
output_restore(rtx pat)6390 output_restore (rtx pat)
6391 {
6392 rtx operands[3];
6393
6394 if (! pat)
6395 {
6396 fputs ("\t restore\n", asm_out_file);
6397 return;
6398 }
6399
6400 gcc_assert (GET_CODE (pat) == SET);
6401
6402 operands[0] = SET_DEST (pat);
6403 pat = SET_SRC (pat);
6404
6405 switch (GET_CODE (pat))
6406 {
6407 case PLUS:
6408 operands[1] = XEXP (pat, 0);
6409 operands[2] = XEXP (pat, 1);
6410 output_asm_insn (" restore %r1, %2, %Y0", operands);
6411 break;
6412 case LO_SUM:
6413 operands[1] = XEXP (pat, 0);
6414 operands[2] = XEXP (pat, 1);
6415 output_asm_insn (" restore %r1, %%lo(%a2), %Y0", operands);
6416 break;
6417 case ASHIFT:
6418 operands[1] = XEXP (pat, 0);
6419 gcc_assert (XEXP (pat, 1) == const1_rtx);
6420 output_asm_insn (" restore %r1, %r1, %Y0", operands);
6421 break;
6422 default:
6423 operands[1] = pat;
6424 output_asm_insn (" restore %%g0, %1, %Y0", operands);
6425 break;
6426 }
6427 }
6428
6429 /* Output a return. */
6430
6431 const char *
output_return(rtx_insn * insn)6432 output_return (rtx_insn *insn)
6433 {
6434 if (crtl->calls_eh_return)
6435 {
6436 /* If the function uses __builtin_eh_return, the eh_return
6437 machinery occupies the delay slot. */
6438 gcc_assert (!final_sequence);
6439
6440 if (flag_delayed_branch)
6441 {
6442 if (!TARGET_FLAT && TARGET_V9)
6443 fputs ("\treturn\t%i7+8\n", asm_out_file);
6444 else
6445 {
6446 if (!TARGET_FLAT)
6447 fputs ("\trestore\n", asm_out_file);
6448
6449 fputs ("\tjmp\t%o7+8\n", asm_out_file);
6450 }
6451
6452 fputs ("\t add\t%sp, %g1, %sp\n", asm_out_file);
6453 }
6454 else
6455 {
6456 if (!TARGET_FLAT)
6457 fputs ("\trestore\n", asm_out_file);
6458
6459 fputs ("\tadd\t%sp, %g1, %sp\n", asm_out_file);
6460 fputs ("\tjmp\t%o7+8\n\t nop\n", asm_out_file);
6461 }
6462 }
6463 else if (sparc_leaf_function_p || TARGET_FLAT)
6464 {
6465 /* This is a leaf or flat function so we don't have to bother restoring
6466 the register window, which frees us from dealing with the convoluted
6467 semantics of restore/return. We simply output the jump to the
6468 return address and the insn in the delay slot (if any). */
6469
6470 return "jmp\t%%o7+%)%#";
6471 }
6472 else
6473 {
6474 /* This is a regular function so we have to restore the register window.
6475 We may have a pending insn for the delay slot, which will be either
6476 combined with the 'restore' instruction or put in the delay slot of
6477 the 'return' instruction. */
6478
6479 if (final_sequence)
6480 {
6481 rtx_insn *delay;
6482 rtx pat;
6483
6484 delay = NEXT_INSN (insn);
6485 gcc_assert (delay);
6486
6487 pat = PATTERN (delay);
6488
6489 if (TARGET_V9 && ! epilogue_renumber (&pat, 1))
6490 {
6491 epilogue_renumber (&pat, 0);
6492 return "return\t%%i7+%)%#";
6493 }
6494 else
6495 {
6496 output_asm_insn ("jmp\t%%i7+%)", NULL);
6497
6498 /* We're going to output the insn in the delay slot manually.
6499 Make sure to output its source location first. */
6500 PATTERN (delay) = gen_blockage ();
6501 INSN_CODE (delay) = -1;
6502 final_scan_insn (delay, asm_out_file, optimize, 0, NULL);
6503 INSN_LOCATION (delay) = UNKNOWN_LOCATION;
6504
6505 output_restore (pat);
6506 }
6507 }
6508 else
6509 {
6510 /* The delay slot is empty. */
6511 if (TARGET_V9)
6512 return "return\t%%i7+%)\n\t nop";
6513 else if (flag_delayed_branch)
6514 return "jmp\t%%i7+%)\n\t restore";
6515 else
6516 return "restore\n\tjmp\t%%o7+%)\n\t nop";
6517 }
6518 }
6519
6520 return "";
6521 }
6522
6523 /* Output a sibling call. */
6524
6525 const char *
output_sibcall(rtx_insn * insn,rtx call_operand)6526 output_sibcall (rtx_insn *insn, rtx call_operand)
6527 {
6528 rtx operands[1];
6529
6530 gcc_assert (flag_delayed_branch);
6531
6532 operands[0] = call_operand;
6533
6534 if (sparc_leaf_function_p || TARGET_FLAT)
6535 {
6536 /* This is a leaf or flat function so we don't have to bother restoring
6537 the register window. We simply output the jump to the function and
6538 the insn in the delay slot (if any). */
6539
6540 gcc_assert (!(LEAF_SIBCALL_SLOT_RESERVED_P && final_sequence));
6541
6542 if (final_sequence)
6543 output_asm_insn ("sethi\t%%hi(%a0), %%g1\n\tjmp\t%%g1 + %%lo(%a0)%#",
6544 operands);
6545 else
6546 /* Use or with rs2 %%g0 instead of mov, so that as/ld can optimize
6547 it into branch if possible. */
6548 output_asm_insn ("or\t%%o7, %%g0, %%g1\n\tcall\t%a0, 0\n\t or\t%%g1, %%g0, %%o7",
6549 operands);
6550 }
6551 else
6552 {
6553 /* This is a regular function so we have to restore the register window.
6554 We may have a pending insn for the delay slot, which will be combined
6555 with the 'restore' instruction. */
6556
6557 output_asm_insn ("call\t%a0, 0", operands);
6558
6559 if (final_sequence)
6560 {
6561 rtx_insn *delay;
6562 rtx pat;
6563
6564 delay = NEXT_INSN (insn);
6565 gcc_assert (delay);
6566
6567 pat = PATTERN (delay);
6568
6569 /* We're going to output the insn in the delay slot manually.
6570 Make sure to output its source location first. */
6571 PATTERN (delay) = gen_blockage ();
6572 INSN_CODE (delay) = -1;
6573 final_scan_insn (delay, asm_out_file, optimize, 0, NULL);
6574 INSN_LOCATION (delay) = UNKNOWN_LOCATION;
6575
6576 output_restore (pat);
6577 }
6578 else
6579 output_restore (NULL_RTX);
6580 }
6581
6582 return "";
6583 }
6584
6585 /* Functions for handling argument passing.
6586
6587 For 32-bit, the first 6 args are normally in registers and the rest are
6588 pushed. Any arg that starts within the first 6 words is at least
6589 partially passed in a register unless its data type forbids.
6590
6591 For 64-bit, the argument registers are laid out as an array of 16 elements
6592 and arguments are added sequentially. The first 6 int args and up to the
6593 first 16 fp args (depending on size) are passed in regs.
6594
6595 Slot Stack Integral Float Float in structure Double Long Double
6596 ---- ----- -------- ----- ------------------ ------ -----------
6597 15 [SP+248] %f31 %f30,%f31 %d30
6598 14 [SP+240] %f29 %f28,%f29 %d28 %q28
6599 13 [SP+232] %f27 %f26,%f27 %d26
6600 12 [SP+224] %f25 %f24,%f25 %d24 %q24
6601 11 [SP+216] %f23 %f22,%f23 %d22
6602 10 [SP+208] %f21 %f20,%f21 %d20 %q20
6603 9 [SP+200] %f19 %f18,%f19 %d18
6604 8 [SP+192] %f17 %f16,%f17 %d16 %q16
6605 7 [SP+184] %f15 %f14,%f15 %d14
6606 6 [SP+176] %f13 %f12,%f13 %d12 %q12
6607 5 [SP+168] %o5 %f11 %f10,%f11 %d10
6608 4 [SP+160] %o4 %f9 %f8,%f9 %d8 %q8
6609 3 [SP+152] %o3 %f7 %f6,%f7 %d6
6610 2 [SP+144] %o2 %f5 %f4,%f5 %d4 %q4
6611 1 [SP+136] %o1 %f3 %f2,%f3 %d2
6612 0 [SP+128] %o0 %f1 %f0,%f1 %d0 %q0
6613
6614 Here SP = %sp if -mno-stack-bias or %sp+stack_bias otherwise.
6615
6616 Integral arguments are always passed as 64-bit quantities appropriately
6617 extended.
6618
6619 Passing of floating point values is handled as follows.
6620 If a prototype is in scope:
6621 If the value is in a named argument (i.e. not a stdarg function or a
6622 value not part of the `...') then the value is passed in the appropriate
6623 fp reg.
6624 If the value is part of the `...' and is passed in one of the first 6
6625 slots then the value is passed in the appropriate int reg.
6626 If the value is part of the `...' and is not passed in one of the first 6
6627 slots then the value is passed in memory.
6628 If a prototype is not in scope:
6629 If the value is one of the first 6 arguments the value is passed in the
6630 appropriate integer reg and the appropriate fp reg.
6631 If the value is not one of the first 6 arguments the value is passed in
6632 the appropriate fp reg and in memory.
6633
6634
6635 Summary of the calling conventions implemented by GCC on the SPARC:
6636
6637 32-bit ABI:
6638 size argument return value
6639
6640 small integer <4 int. reg. int. reg.
6641 word 4 int. reg. int. reg.
6642 double word 8 int. reg. int. reg.
6643
6644 _Complex small integer <8 int. reg. int. reg.
6645 _Complex word 8 int. reg. int. reg.
6646 _Complex double word 16 memory int. reg.
6647
6648 vector integer <=8 int. reg. FP reg.
6649 vector integer >8 memory memory
6650
6651 float 4 int. reg. FP reg.
6652 double 8 int. reg. FP reg.
6653 long double 16 memory memory
6654
6655 _Complex float 8 memory FP reg.
6656 _Complex double 16 memory FP reg.
6657 _Complex long double 32 memory FP reg.
6658
6659 vector float any memory memory
6660
6661 aggregate any memory memory
6662
6663
6664
6665 64-bit ABI:
6666 size argument return value
6667
6668 small integer <8 int. reg. int. reg.
6669 word 8 int. reg. int. reg.
6670 double word 16 int. reg. int. reg.
6671
6672 _Complex small integer <16 int. reg. int. reg.
6673 _Complex word 16 int. reg. int. reg.
6674 _Complex double word 32 memory int. reg.
6675
6676 vector integer <=16 FP reg. FP reg.
6677 vector integer 16<s<=32 memory FP reg.
6678 vector integer >32 memory memory
6679
6680 float 4 FP reg. FP reg.
6681 double 8 FP reg. FP reg.
6682 long double 16 FP reg. FP reg.
6683
6684 _Complex float 8 FP reg. FP reg.
6685 _Complex double 16 FP reg. FP reg.
6686 _Complex long double 32 memory FP reg.
6687
6688 vector float <=16 FP reg. FP reg.
6689 vector float 16<s<=32 memory FP reg.
6690 vector float >32 memory memory
6691
6692 aggregate <=16 reg. reg.
6693 aggregate 16<s<=32 memory reg.
6694 aggregate >32 memory memory
6695
6696
6697
6698 Note #1: complex floating-point types follow the extended SPARC ABIs as
6699 implemented by the Sun compiler.
6700
6701 Note #2: integral vector types follow the scalar floating-point types
6702 conventions to match what is implemented by the Sun VIS SDK.
6703
6704 Note #3: floating-point vector types follow the aggregate types
6705 conventions. */
6706
6707
6708 /* Maximum number of int regs for args. */
6709 #define SPARC_INT_ARG_MAX 6
6710 /* Maximum number of fp regs for args. */
6711 #define SPARC_FP_ARG_MAX 16
6712 /* Number of words (partially) occupied for a given size in units. */
6713 #define CEIL_NWORDS(SIZE) CEIL((SIZE), UNITS_PER_WORD)
6714
6715 /* Handle the INIT_CUMULATIVE_ARGS macro.
6716 Initialize a variable CUM of type CUMULATIVE_ARGS
6717 for a call to a function whose data type is FNTYPE.
6718 For a library call, FNTYPE is 0. */
6719
6720 void
init_cumulative_args(struct sparc_args * cum,tree fntype,rtx,tree)6721 init_cumulative_args (struct sparc_args *cum, tree fntype, rtx, tree)
6722 {
6723 cum->words = 0;
6724 cum->prototype_p = fntype && prototype_p (fntype);
6725 cum->libcall_p = !fntype;
6726 }
6727
6728 /* Handle promotion of pointer and integer arguments. */
6729
6730 static machine_mode
sparc_promote_function_mode(const_tree type,machine_mode mode,int * punsignedp,const_tree,int)6731 sparc_promote_function_mode (const_tree type, machine_mode mode,
6732 int *punsignedp, const_tree, int)
6733 {
6734 if (type && POINTER_TYPE_P (type))
6735 {
6736 *punsignedp = POINTERS_EXTEND_UNSIGNED;
6737 return Pmode;
6738 }
6739
6740 /* Integral arguments are passed as full words, as per the ABI. */
6741 if (GET_MODE_CLASS (mode) == MODE_INT
6742 && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
6743 return word_mode;
6744
6745 return mode;
6746 }
6747
6748 /* Handle the TARGET_STRICT_ARGUMENT_NAMING target hook. */
6749
6750 static bool
sparc_strict_argument_naming(cumulative_args_t ca ATTRIBUTE_UNUSED)6751 sparc_strict_argument_naming (cumulative_args_t ca ATTRIBUTE_UNUSED)
6752 {
6753 return TARGET_ARCH64 ? true : false;
6754 }
6755
6756 /* Traverse the record TYPE recursively and call FUNC on its fields.
6757 NAMED is true if this is for a named parameter. DATA is passed
6758 to FUNC for each field. OFFSET is the starting position and
6759 PACKED is true if we are inside a packed record. */
6760
6761 template <typename T, void Func (const_tree, HOST_WIDE_INT, bool, T*)>
6762 static void
6763 traverse_record_type (const_tree type, bool named, T *data,
6764 HOST_WIDE_INT offset = 0, bool packed = false)
6765 {
6766 /* The ABI obviously doesn't specify how packed structures are passed.
6767 These are passed in integer regs if possible, otherwise memory. */
6768 if (!packed)
6769 for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6770 if (TREE_CODE (field) == FIELD_DECL && DECL_PACKED (field))
6771 {
6772 packed = true;
6773 break;
6774 }
6775
6776 /* Walk the real fields, but skip those with no size or a zero size.
6777 ??? Fields with variable offset are handled as having zero offset. */
6778 for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6779 if (TREE_CODE (field) == FIELD_DECL)
6780 {
6781 if (!DECL_SIZE (field) || integer_zerop (DECL_SIZE (field)))
6782 continue;
6783
6784 HOST_WIDE_INT bitpos = offset;
6785 if (TREE_CODE (DECL_FIELD_OFFSET (field)) == INTEGER_CST)
6786 bitpos += int_bit_position (field);
6787
6788 tree field_type = TREE_TYPE (field);
6789 if (TREE_CODE (field_type) == RECORD_TYPE)
6790 traverse_record_type<T, Func> (field_type, named, data, bitpos,
6791 packed);
6792 else
6793 {
6794 const bool fp_type
6795 = FLOAT_TYPE_P (field_type) || VECTOR_TYPE_P (field_type);
6796 Func (field, bitpos, fp_type && named && !packed && TARGET_FPU,
6797 data);
6798 }
6799 }
6800 }
6801
6802 /* Handle recursive register classifying for structure layout. */
6803
6804 typedef struct
6805 {
6806 bool fp_regs; /* true if field eligible to FP registers. */
6807 bool fp_regs_in_first_word; /* true if such field in first word. */
6808 } classify_data_t;
6809
6810 /* A subroutine of function_arg_slotno. Classify the field. */
6811
6812 inline void
classify_registers(const_tree,HOST_WIDE_INT bitpos,bool fp,classify_data_t * data)6813 classify_registers (const_tree, HOST_WIDE_INT bitpos, bool fp,
6814 classify_data_t *data)
6815 {
6816 if (fp)
6817 {
6818 data->fp_regs = true;
6819 if (bitpos < BITS_PER_WORD)
6820 data->fp_regs_in_first_word = true;
6821 }
6822 }
6823
6824 /* Compute the slot number to pass an argument in.
6825 Return the slot number or -1 if passing on the stack.
6826
6827 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6828 the preceding args and about the function being called.
6829 MODE is the argument's machine mode.
6830 TYPE is the data type of the argument (as a tree).
6831 This is null for libcalls where that information may
6832 not be available.
6833 NAMED is nonzero if this argument is a named parameter
6834 (otherwise it is an extra parameter matching an ellipsis).
6835 INCOMING is zero for FUNCTION_ARG, nonzero for FUNCTION_INCOMING_ARG.
6836 *PREGNO records the register number to use if scalar type.
6837 *PPADDING records the amount of padding needed in words. */
6838
6839 static int
function_arg_slotno(const struct sparc_args * cum,machine_mode mode,const_tree type,bool named,bool incoming,int * pregno,int * ppadding)6840 function_arg_slotno (const struct sparc_args *cum, machine_mode mode,
6841 const_tree type, bool named, bool incoming,
6842 int *pregno, int *ppadding)
6843 {
6844 int regbase = (incoming
6845 ? SPARC_INCOMING_INT_ARG_FIRST
6846 : SPARC_OUTGOING_INT_ARG_FIRST);
6847 int slotno = cum->words;
6848 enum mode_class mclass;
6849 int regno;
6850
6851 *ppadding = 0;
6852
6853 if (type && TREE_ADDRESSABLE (type))
6854 return -1;
6855
6856 if (TARGET_ARCH32
6857 && mode == BLKmode
6858 && type
6859 && TYPE_ALIGN (type) % PARM_BOUNDARY != 0)
6860 return -1;
6861
6862 /* For SPARC64, objects requiring 16-byte alignment get it. */
6863 if (TARGET_ARCH64
6864 && (type ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode)) >= 128
6865 && (slotno & 1) != 0)
6866 slotno++, *ppadding = 1;
6867
6868 mclass = GET_MODE_CLASS (mode);
6869 if (type && TREE_CODE (type) == VECTOR_TYPE)
6870 {
6871 /* Vector types deserve special treatment because they are
6872 polymorphic wrt their mode, depending upon whether VIS
6873 instructions are enabled. */
6874 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
6875 {
6876 /* The SPARC port defines no floating-point vector modes. */
6877 gcc_assert (mode == BLKmode);
6878 }
6879 else
6880 {
6881 /* Integral vector types should either have a vector
6882 mode or an integral mode, because we are guaranteed
6883 by pass_by_reference that their size is not greater
6884 than 16 bytes and TImode is 16-byte wide. */
6885 gcc_assert (mode != BLKmode);
6886
6887 /* Vector integers are handled like floats according to
6888 the Sun VIS SDK. */
6889 mclass = MODE_FLOAT;
6890 }
6891 }
6892
6893 switch (mclass)
6894 {
6895 case MODE_FLOAT:
6896 case MODE_COMPLEX_FLOAT:
6897 case MODE_VECTOR_INT:
6898 if (TARGET_ARCH64 && TARGET_FPU && named)
6899 {
6900 /* If all arg slots are filled, then must pass on stack. */
6901 if (slotno >= SPARC_FP_ARG_MAX)
6902 return -1;
6903
6904 regno = SPARC_FP_ARG_FIRST + slotno * 2;
6905 /* Arguments filling only one single FP register are
6906 right-justified in the outer double FP register. */
6907 if (GET_MODE_SIZE (mode) <= 4)
6908 regno++;
6909 break;
6910 }
6911 /* fallthrough */
6912
6913 case MODE_INT:
6914 case MODE_COMPLEX_INT:
6915 /* If all arg slots are filled, then must pass on stack. */
6916 if (slotno >= SPARC_INT_ARG_MAX)
6917 return -1;
6918
6919 regno = regbase + slotno;
6920 break;
6921
6922 case MODE_RANDOM:
6923 if (mode == VOIDmode)
6924 /* MODE is VOIDmode when generating the actual call. */
6925 return -1;
6926
6927 gcc_assert (mode == BLKmode);
6928
6929 if (TARGET_ARCH32
6930 || !type
6931 || (TREE_CODE (type) != RECORD_TYPE
6932 && TREE_CODE (type) != VECTOR_TYPE))
6933 {
6934 /* If all arg slots are filled, then must pass on stack. */
6935 if (slotno >= SPARC_INT_ARG_MAX)
6936 return -1;
6937
6938 regno = regbase + slotno;
6939 }
6940 else /* TARGET_ARCH64 && type */
6941 {
6942 /* If all arg slots are filled, then must pass on stack. */
6943 if (slotno >= SPARC_FP_ARG_MAX)
6944 return -1;
6945
6946 if (TREE_CODE (type) == RECORD_TYPE)
6947 {
6948 classify_data_t data = { false, false };
6949 traverse_record_type<classify_data_t, classify_registers>
6950 (type, named, &data);
6951
6952 if (data.fp_regs)
6953 {
6954 /* If all FP slots are filled except for the last one and
6955 there is no FP field in the first word, then must pass
6956 on stack. */
6957 if (slotno >= SPARC_FP_ARG_MAX - 1
6958 && !data.fp_regs_in_first_word)
6959 return -1;
6960 }
6961 else
6962 {
6963 /* If all int slots are filled, then must pass on stack. */
6964 if (slotno >= SPARC_INT_ARG_MAX)
6965 return -1;
6966 }
6967 }
6968
6969 /* PREGNO isn't set since both int and FP regs can be used. */
6970 return slotno;
6971 }
6972 break;
6973
6974 default :
6975 gcc_unreachable ();
6976 }
6977
6978 *pregno = regno;
6979 return slotno;
6980 }
6981
6982 /* Handle recursive register counting/assigning for structure layout. */
6983
6984 typedef struct
6985 {
6986 int slotno; /* slot number of the argument. */
6987 int regbase; /* regno of the base register. */
6988 int intoffset; /* offset of the first pending integer field. */
6989 int nregs; /* number of words passed in registers. */
6990 bool stack; /* true if part of the argument is on the stack. */
6991 rtx ret; /* return expression being built. */
6992 } assign_data_t;
6993
6994 /* A subroutine of function_arg_record_value. Compute the number of integer
6995 registers to be assigned between PARMS->intoffset and BITPOS. Return
6996 true if at least one integer register is assigned or false otherwise. */
6997
6998 static bool
compute_int_layout(HOST_WIDE_INT bitpos,assign_data_t * data,int * pnregs)6999 compute_int_layout (HOST_WIDE_INT bitpos, assign_data_t *data, int *pnregs)
7000 {
7001 if (data->intoffset < 0)
7002 return false;
7003
7004 const int intoffset = data->intoffset;
7005 data->intoffset = -1;
7006
7007 const int this_slotno = data->slotno + intoffset / BITS_PER_WORD;
7008 const unsigned int startbit = ROUND_DOWN (intoffset, BITS_PER_WORD);
7009 const unsigned int endbit = ROUND_UP (bitpos, BITS_PER_WORD);
7010 int nregs = (endbit - startbit) / BITS_PER_WORD;
7011
7012 if (nregs > 0 && nregs > SPARC_INT_ARG_MAX - this_slotno)
7013 {
7014 nregs = SPARC_INT_ARG_MAX - this_slotno;
7015
7016 /* We need to pass this field (partly) on the stack. */
7017 data->stack = 1;
7018 }
7019
7020 if (nregs <= 0)
7021 return false;
7022
7023 *pnregs = nregs;
7024 return true;
7025 }
7026
7027 /* A subroutine of function_arg_record_value. Compute the number and the mode
7028 of the FP registers to be assigned for FIELD. Return true if at least one
7029 FP register is assigned or false otherwise. */
7030
7031 static bool
compute_fp_layout(const_tree field,HOST_WIDE_INT bitpos,assign_data_t * data,int * pnregs,machine_mode * pmode)7032 compute_fp_layout (const_tree field, HOST_WIDE_INT bitpos,
7033 assign_data_t *data,
7034 int *pnregs, machine_mode *pmode)
7035 {
7036 const int this_slotno = data->slotno + bitpos / BITS_PER_WORD;
7037 machine_mode mode = DECL_MODE (field);
7038 int nregs, nslots;
7039
7040 /* Slots are counted as words while regs are counted as having the size of
7041 the (inner) mode. */
7042 if (TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE && mode == BLKmode)
7043 {
7044 mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (field)));
7045 nregs = TYPE_VECTOR_SUBPARTS (TREE_TYPE (field));
7046 }
7047 else if (TREE_CODE (TREE_TYPE (field)) == COMPLEX_TYPE)
7048 {
7049 mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (field)));
7050 nregs = 2;
7051 }
7052 else
7053 nregs = 1;
7054
7055 nslots = CEIL_NWORDS (nregs * GET_MODE_SIZE (mode));
7056
7057 if (nslots > SPARC_FP_ARG_MAX - this_slotno)
7058 {
7059 nslots = SPARC_FP_ARG_MAX - this_slotno;
7060 nregs = (nslots * UNITS_PER_WORD) / GET_MODE_SIZE (mode);
7061
7062 /* We need to pass this field (partly) on the stack. */
7063 data->stack = 1;
7064
7065 if (nregs <= 0)
7066 return false;
7067 }
7068
7069 *pnregs = nregs;
7070 *pmode = mode;
7071 return true;
7072 }
7073
7074 /* A subroutine of function_arg_record_value. Count the number of registers
7075 to be assigned for FIELD and between PARMS->intoffset and BITPOS. */
7076
7077 inline void
count_registers(const_tree field,HOST_WIDE_INT bitpos,bool fp,assign_data_t * data)7078 count_registers (const_tree field, HOST_WIDE_INT bitpos, bool fp,
7079 assign_data_t *data)
7080 {
7081 if (fp)
7082 {
7083 int nregs;
7084 machine_mode mode;
7085
7086 if (compute_int_layout (bitpos, data, &nregs))
7087 data->nregs += nregs;
7088
7089 if (compute_fp_layout (field, bitpos, data, &nregs, &mode))
7090 data->nregs += nregs;
7091 }
7092 else
7093 {
7094 if (data->intoffset < 0)
7095 data->intoffset = bitpos;
7096 }
7097 }
7098
7099 /* A subroutine of function_arg_record_value. Assign the bits of the
7100 structure between PARMS->intoffset and BITPOS to integer registers. */
7101
7102 static void
assign_int_registers(HOST_WIDE_INT bitpos,assign_data_t * data)7103 assign_int_registers (HOST_WIDE_INT bitpos, assign_data_t *data)
7104 {
7105 int intoffset = data->intoffset;
7106 machine_mode mode;
7107 int nregs;
7108
7109 if (!compute_int_layout (bitpos, data, &nregs))
7110 return;
7111
7112 /* If this is the trailing part of a word, only load that much into
7113 the register. Otherwise load the whole register. Note that in
7114 the latter case we may pick up unwanted bits. It's not a problem
7115 at the moment but may wish to revisit. */
7116 if (intoffset % BITS_PER_WORD != 0)
7117 mode = smallest_int_mode_for_size (BITS_PER_WORD
7118 - intoffset % BITS_PER_WORD);
7119 else
7120 mode = word_mode;
7121
7122 const int this_slotno = data->slotno + intoffset / BITS_PER_WORD;
7123 unsigned int regno = data->regbase + this_slotno;
7124 intoffset /= BITS_PER_UNIT;
7125
7126 do
7127 {
7128 rtx reg = gen_rtx_REG (mode, regno);
7129 XVECEXP (data->ret, 0, data->stack + data->nregs)
7130 = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (intoffset));
7131 data->nregs += 1;
7132 mode = word_mode;
7133 regno += 1;
7134 intoffset = (intoffset | (UNITS_PER_WORD - 1)) + 1;
7135 }
7136 while (--nregs > 0);
7137 }
7138
7139 /* A subroutine of function_arg_record_value. Assign FIELD at position
7140 BITPOS to FP registers. */
7141
7142 static void
assign_fp_registers(const_tree field,HOST_WIDE_INT bitpos,assign_data_t * data)7143 assign_fp_registers (const_tree field, HOST_WIDE_INT bitpos,
7144 assign_data_t *data)
7145 {
7146 int nregs;
7147 machine_mode mode;
7148
7149 if (!compute_fp_layout (field, bitpos, data, &nregs, &mode))
7150 return;
7151
7152 const int this_slotno = data->slotno + bitpos / BITS_PER_WORD;
7153 int regno = SPARC_FP_ARG_FIRST + this_slotno * 2;
7154 if (GET_MODE_SIZE (mode) <= 4 && (bitpos & 32) != 0)
7155 regno++;
7156 int pos = bitpos / BITS_PER_UNIT;
7157
7158 do
7159 {
7160 rtx reg = gen_rtx_REG (mode, regno);
7161 XVECEXP (data->ret, 0, data->stack + data->nregs)
7162 = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (pos));
7163 data->nregs += 1;
7164 regno += GET_MODE_SIZE (mode) / 4;
7165 pos += GET_MODE_SIZE (mode);
7166 }
7167 while (--nregs > 0);
7168 }
7169
7170 /* A subroutine of function_arg_record_value. Assign FIELD and the bits of
7171 the structure between PARMS->intoffset and BITPOS to registers. */
7172
7173 inline void
assign_registers(const_tree field,HOST_WIDE_INT bitpos,bool fp,assign_data_t * data)7174 assign_registers (const_tree field, HOST_WIDE_INT bitpos, bool fp,
7175 assign_data_t *data)
7176 {
7177 if (fp)
7178 {
7179 assign_int_registers (bitpos, data);
7180
7181 assign_fp_registers (field, bitpos, data);
7182 }
7183 else
7184 {
7185 if (data->intoffset < 0)
7186 data->intoffset = bitpos;
7187 }
7188 }
7189
7190 /* Used by function_arg and sparc_function_value_1 to implement the complex
7191 conventions of the 64-bit ABI for passing and returning structures.
7192 Return an expression valid as a return value for the FUNCTION_ARG
7193 and TARGET_FUNCTION_VALUE.
7194
7195 TYPE is the data type of the argument (as a tree).
7196 This is null for libcalls where that information may
7197 not be available.
7198 MODE is the argument's machine mode.
7199 SLOTNO is the index number of the argument's slot in the parameter array.
7200 NAMED is true if this argument is a named parameter
7201 (otherwise it is an extra parameter matching an ellipsis).
7202 REGBASE is the regno of the base register for the parameter array. */
7203
7204 static rtx
function_arg_record_value(const_tree type,machine_mode mode,int slotno,bool named,int regbase)7205 function_arg_record_value (const_tree type, machine_mode mode,
7206 int slotno, bool named, int regbase)
7207 {
7208 HOST_WIDE_INT typesize = int_size_in_bytes (type);
7209 assign_data_t data;
7210 int nregs;
7211
7212 data.slotno = slotno;
7213 data.regbase = regbase;
7214
7215 /* Count how many registers we need. */
7216 data.nregs = 0;
7217 data.intoffset = 0;
7218 data.stack = false;
7219 traverse_record_type<assign_data_t, count_registers> (type, named, &data);
7220
7221 /* Take into account pending integer fields. */
7222 if (compute_int_layout (typesize * BITS_PER_UNIT, &data, &nregs))
7223 data.nregs += nregs;
7224
7225 /* Allocate the vector and handle some annoying special cases. */
7226 nregs = data.nregs;
7227
7228 if (nregs == 0)
7229 {
7230 /* ??? Empty structure has no value? Duh? */
7231 if (typesize <= 0)
7232 {
7233 /* Though there's nothing really to store, return a word register
7234 anyway so the rest of gcc doesn't go nuts. Returning a PARALLEL
7235 leads to breakage due to the fact that there are zero bytes to
7236 load. */
7237 return gen_rtx_REG (mode, regbase);
7238 }
7239
7240 /* ??? C++ has structures with no fields, and yet a size. Give up
7241 for now and pass everything back in integer registers. */
7242 nregs = (typesize + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7243 if (nregs + slotno > SPARC_INT_ARG_MAX)
7244 nregs = SPARC_INT_ARG_MAX - slotno;
7245 }
7246
7247 gcc_assert (nregs > 0);
7248
7249 data.ret = gen_rtx_PARALLEL (mode, rtvec_alloc (data.stack + nregs));
7250
7251 /* If at least one field must be passed on the stack, generate
7252 (parallel [(expr_list (nil) ...) ...]) so that all fields will
7253 also be passed on the stack. We can't do much better because the
7254 semantics of TARGET_ARG_PARTIAL_BYTES doesn't handle the case
7255 of structures for which the fields passed exclusively in registers
7256 are not at the beginning of the structure. */
7257 if (data.stack)
7258 XVECEXP (data.ret, 0, 0)
7259 = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
7260
7261 /* Assign the registers. */
7262 data.nregs = 0;
7263 data.intoffset = 0;
7264 traverse_record_type<assign_data_t, assign_registers> (type, named, &data);
7265
7266 /* Assign pending integer fields. */
7267 assign_int_registers (typesize * BITS_PER_UNIT, &data);
7268
7269 gcc_assert (data.nregs == nregs);
7270
7271 return data.ret;
7272 }
7273
7274 /* Used by function_arg and sparc_function_value_1 to implement the conventions
7275 of the 64-bit ABI for passing and returning unions.
7276 Return an expression valid as a return value for the FUNCTION_ARG
7277 and TARGET_FUNCTION_VALUE.
7278
7279 SIZE is the size in bytes of the union.
7280 MODE is the argument's machine mode.
7281 REGNO is the hard register the union will be passed in. */
7282
7283 static rtx
function_arg_union_value(int size,machine_mode mode,int slotno,int regno)7284 function_arg_union_value (int size, machine_mode mode, int slotno,
7285 int regno)
7286 {
7287 int nwords = CEIL_NWORDS (size), i;
7288 rtx regs;
7289
7290 /* See comment in previous function for empty structures. */
7291 if (nwords == 0)
7292 return gen_rtx_REG (mode, regno);
7293
7294 if (slotno == SPARC_INT_ARG_MAX - 1)
7295 nwords = 1;
7296
7297 regs = gen_rtx_PARALLEL (mode, rtvec_alloc (nwords));
7298
7299 for (i = 0; i < nwords; i++)
7300 {
7301 /* Unions are passed left-justified. */
7302 XVECEXP (regs, 0, i)
7303 = gen_rtx_EXPR_LIST (VOIDmode,
7304 gen_rtx_REG (word_mode, regno),
7305 GEN_INT (UNITS_PER_WORD * i));
7306 regno++;
7307 }
7308
7309 return regs;
7310 }
7311
7312 /* Used by function_arg and sparc_function_value_1 to implement the conventions
7313 for passing and returning BLKmode vectors.
7314 Return an expression valid as a return value for the FUNCTION_ARG
7315 and TARGET_FUNCTION_VALUE.
7316
7317 SIZE is the size in bytes of the vector.
7318 REGNO is the FP hard register the vector will be passed in. */
7319
7320 static rtx
function_arg_vector_value(int size,int regno)7321 function_arg_vector_value (int size, int regno)
7322 {
7323 const int nregs = MAX (1, size / 8);
7324 rtx regs = gen_rtx_PARALLEL (BLKmode, rtvec_alloc (nregs));
7325
7326 if (size < 8)
7327 XVECEXP (regs, 0, 0)
7328 = gen_rtx_EXPR_LIST (VOIDmode,
7329 gen_rtx_REG (SImode, regno),
7330 const0_rtx);
7331 else
7332 for (int i = 0; i < nregs; i++)
7333 XVECEXP (regs, 0, i)
7334 = gen_rtx_EXPR_LIST (VOIDmode,
7335 gen_rtx_REG (DImode, regno + 2*i),
7336 GEN_INT (i*8));
7337
7338 return regs;
7339 }
7340
7341 /* Determine where to put an argument to a function.
7342 Value is zero to push the argument on the stack,
7343 or a hard register in which to store the argument.
7344
7345 CUM is a variable of type CUMULATIVE_ARGS which gives info about
7346 the preceding args and about the function being called.
7347 MODE is the argument's machine mode.
7348 TYPE is the data type of the argument (as a tree).
7349 This is null for libcalls where that information may
7350 not be available.
7351 NAMED is true if this argument is a named parameter
7352 (otherwise it is an extra parameter matching an ellipsis).
7353 INCOMING_P is false for TARGET_FUNCTION_ARG, true for
7354 TARGET_FUNCTION_INCOMING_ARG. */
7355
7356 static rtx
sparc_function_arg_1(cumulative_args_t cum_v,machine_mode mode,const_tree type,bool named,bool incoming)7357 sparc_function_arg_1 (cumulative_args_t cum_v, machine_mode mode,
7358 const_tree type, bool named, bool incoming)
7359 {
7360 const CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7361
7362 int regbase = (incoming
7363 ? SPARC_INCOMING_INT_ARG_FIRST
7364 : SPARC_OUTGOING_INT_ARG_FIRST);
7365 int slotno, regno, padding;
7366 enum mode_class mclass = GET_MODE_CLASS (mode);
7367
7368 slotno = function_arg_slotno (cum, mode, type, named, incoming,
7369 ®no, &padding);
7370 if (slotno == -1)
7371 return 0;
7372
7373 /* Vector types deserve special treatment because they are polymorphic wrt
7374 their mode, depending upon whether VIS instructions are enabled. */
7375 if (type && TREE_CODE (type) == VECTOR_TYPE)
7376 {
7377 HOST_WIDE_INT size = int_size_in_bytes (type);
7378 gcc_assert ((TARGET_ARCH32 && size <= 8)
7379 || (TARGET_ARCH64 && size <= 16));
7380
7381 if (mode == BLKmode)
7382 return function_arg_vector_value (size, SPARC_FP_ARG_FIRST + 2*slotno);
7383
7384 mclass = MODE_FLOAT;
7385 }
7386
7387 if (TARGET_ARCH32)
7388 return gen_rtx_REG (mode, regno);
7389
7390 /* Structures up to 16 bytes in size are passed in arg slots on the stack
7391 and are promoted to registers if possible. */
7392 if (type && TREE_CODE (type) == RECORD_TYPE)
7393 {
7394 HOST_WIDE_INT size = int_size_in_bytes (type);
7395 gcc_assert (size <= 16);
7396
7397 return function_arg_record_value (type, mode, slotno, named, regbase);
7398 }
7399
7400 /* Unions up to 16 bytes in size are passed in integer registers. */
7401 else if (type && TREE_CODE (type) == UNION_TYPE)
7402 {
7403 HOST_WIDE_INT size = int_size_in_bytes (type);
7404 gcc_assert (size <= 16);
7405
7406 return function_arg_union_value (size, mode, slotno, regno);
7407 }
7408
7409 /* v9 fp args in reg slots beyond the int reg slots get passed in regs
7410 but also have the slot allocated for them.
7411 If no prototype is in scope fp values in register slots get passed
7412 in two places, either fp regs and int regs or fp regs and memory. */
7413 else if ((mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT)
7414 && SPARC_FP_REG_P (regno))
7415 {
7416 rtx reg = gen_rtx_REG (mode, regno);
7417 if (cum->prototype_p || cum->libcall_p)
7418 return reg;
7419 else
7420 {
7421 rtx v0, v1;
7422
7423 if ((regno - SPARC_FP_ARG_FIRST) < SPARC_INT_ARG_MAX * 2)
7424 {
7425 int intreg;
7426
7427 /* On incoming, we don't need to know that the value
7428 is passed in %f0 and %i0, and it confuses other parts
7429 causing needless spillage even on the simplest cases. */
7430 if (incoming)
7431 return reg;
7432
7433 intreg = (SPARC_OUTGOING_INT_ARG_FIRST
7434 + (regno - SPARC_FP_ARG_FIRST) / 2);
7435
7436 v0 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx);
7437 v1 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_REG (mode, intreg),
7438 const0_rtx);
7439 return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1));
7440 }
7441 else
7442 {
7443 v0 = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
7444 v1 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx);
7445 return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1));
7446 }
7447 }
7448 }
7449
7450 /* All other aggregate types are passed in an integer register in a mode
7451 corresponding to the size of the type. */
7452 else if (type && AGGREGATE_TYPE_P (type))
7453 {
7454 HOST_WIDE_INT size = int_size_in_bytes (type);
7455 gcc_assert (size <= 16);
7456
7457 mode = int_mode_for_size (size * BITS_PER_UNIT, 0).else_blk ();
7458 }
7459
7460 return gen_rtx_REG (mode, regno);
7461 }
7462
7463 /* Handle the TARGET_FUNCTION_ARG target hook. */
7464
7465 static rtx
sparc_function_arg(cumulative_args_t cum,machine_mode mode,const_tree type,bool named)7466 sparc_function_arg (cumulative_args_t cum, machine_mode mode,
7467 const_tree type, bool named)
7468 {
7469 return sparc_function_arg_1 (cum, mode, type, named, false);
7470 }
7471
7472 /* Handle the TARGET_FUNCTION_INCOMING_ARG target hook. */
7473
7474 static rtx
sparc_function_incoming_arg(cumulative_args_t cum,machine_mode mode,const_tree type,bool named)7475 sparc_function_incoming_arg (cumulative_args_t cum, machine_mode mode,
7476 const_tree type, bool named)
7477 {
7478 return sparc_function_arg_1 (cum, mode, type, named, true);
7479 }
7480
7481 /* For sparc64, objects requiring 16 byte alignment are passed that way. */
7482
7483 static unsigned int
sparc_function_arg_boundary(machine_mode mode,const_tree type)7484 sparc_function_arg_boundary (machine_mode mode, const_tree type)
7485 {
7486 return ((TARGET_ARCH64
7487 && (GET_MODE_ALIGNMENT (mode) == 128
7488 || (type && TYPE_ALIGN (type) == 128)))
7489 ? 128
7490 : PARM_BOUNDARY);
7491 }
7492
7493 /* For an arg passed partly in registers and partly in memory,
7494 this is the number of bytes of registers used.
7495 For args passed entirely in registers or entirely in memory, zero.
7496
7497 Any arg that starts in the first 6 regs but won't entirely fit in them
7498 needs partial registers on v8. On v9, structures with integer
7499 values in arg slots 5,6 will be passed in %o5 and SP+176, and complex fp
7500 values that begin in the last fp reg [where "last fp reg" varies with the
7501 mode] will be split between that reg and memory. */
7502
7503 static int
sparc_arg_partial_bytes(cumulative_args_t cum,machine_mode mode,tree type,bool named)7504 sparc_arg_partial_bytes (cumulative_args_t cum, machine_mode mode,
7505 tree type, bool named)
7506 {
7507 int slotno, regno, padding;
7508
7509 /* We pass false for incoming here, it doesn't matter. */
7510 slotno = function_arg_slotno (get_cumulative_args (cum), mode, type, named,
7511 false, ®no, &padding);
7512
7513 if (slotno == -1)
7514 return 0;
7515
7516 if (TARGET_ARCH32)
7517 {
7518 if ((slotno + (mode == BLKmode
7519 ? CEIL_NWORDS (int_size_in_bytes (type))
7520 : CEIL_NWORDS (GET_MODE_SIZE (mode))))
7521 > SPARC_INT_ARG_MAX)
7522 return (SPARC_INT_ARG_MAX - slotno) * UNITS_PER_WORD;
7523 }
7524 else
7525 {
7526 /* We are guaranteed by pass_by_reference that the size of the
7527 argument is not greater than 16 bytes, so we only need to return
7528 one word if the argument is partially passed in registers. */
7529
7530 if (type && AGGREGATE_TYPE_P (type))
7531 {
7532 int size = int_size_in_bytes (type);
7533
7534 if (size > UNITS_PER_WORD
7535 && (slotno == SPARC_INT_ARG_MAX - 1
7536 || slotno == SPARC_FP_ARG_MAX - 1))
7537 return UNITS_PER_WORD;
7538 }
7539 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_INT
7540 || (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
7541 && ! (TARGET_FPU && named)))
7542 {
7543 /* The complex types are passed as packed types. */
7544 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD
7545 && slotno == SPARC_INT_ARG_MAX - 1)
7546 return UNITS_PER_WORD;
7547 }
7548 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
7549 {
7550 if ((slotno + GET_MODE_SIZE (mode) / UNITS_PER_WORD)
7551 > SPARC_FP_ARG_MAX)
7552 return UNITS_PER_WORD;
7553 }
7554 }
7555
7556 return 0;
7557 }
7558
7559 /* Handle the TARGET_PASS_BY_REFERENCE target hook.
7560 Specify whether to pass the argument by reference. */
7561
7562 static bool
sparc_pass_by_reference(cumulative_args_t cum ATTRIBUTE_UNUSED,machine_mode mode,const_tree type,bool named ATTRIBUTE_UNUSED)7563 sparc_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
7564 machine_mode mode, const_tree type,
7565 bool named ATTRIBUTE_UNUSED)
7566 {
7567 if (TARGET_ARCH32)
7568 /* Original SPARC 32-bit ABI says that structures and unions,
7569 and quad-precision floats are passed by reference. For Pascal,
7570 also pass arrays by reference. All other base types are passed
7571 in registers.
7572
7573 Extended ABI (as implemented by the Sun compiler) says that all
7574 complex floats are passed by reference. Pass complex integers
7575 in registers up to 8 bytes. More generally, enforce the 2-word
7576 cap for passing arguments in registers.
7577
7578 Vector ABI (as implemented by the Sun VIS SDK) says that vector
7579 integers are passed like floats of the same size, that is in
7580 registers up to 8 bytes. Pass all vector floats by reference
7581 like structure and unions. */
7582 return ((type && (AGGREGATE_TYPE_P (type) || VECTOR_FLOAT_TYPE_P (type)))
7583 || mode == SCmode
7584 /* Catch CDImode, TFmode, DCmode and TCmode. */
7585 || GET_MODE_SIZE (mode) > 8
7586 || (type
7587 && TREE_CODE (type) == VECTOR_TYPE
7588 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8));
7589 else
7590 /* Original SPARC 64-bit ABI says that structures and unions
7591 smaller than 16 bytes are passed in registers, as well as
7592 all other base types.
7593
7594 Extended ABI (as implemented by the Sun compiler) says that
7595 complex floats are passed in registers up to 16 bytes. Pass
7596 all complex integers in registers up to 16 bytes. More generally,
7597 enforce the 2-word cap for passing arguments in registers.
7598
7599 Vector ABI (as implemented by the Sun VIS SDK) says that vector
7600 integers are passed like floats of the same size, that is in
7601 registers (up to 16 bytes). Pass all vector floats like structure
7602 and unions. */
7603 return ((type
7604 && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == VECTOR_TYPE)
7605 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 16)
7606 /* Catch CTImode and TCmode. */
7607 || GET_MODE_SIZE (mode) > 16);
7608 }
7609
7610 /* Handle the TARGET_FUNCTION_ARG_ADVANCE hook.
7611 Update the data in CUM to advance over an argument
7612 of mode MODE and data type TYPE.
7613 TYPE is null for libcalls where that information may not be available. */
7614
7615 static void
sparc_function_arg_advance(cumulative_args_t cum_v,machine_mode mode,const_tree type,bool named)7616 sparc_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
7617 const_tree type, bool named)
7618 {
7619 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7620 int regno, padding;
7621
7622 /* We pass false for incoming here, it doesn't matter. */
7623 function_arg_slotno (cum, mode, type, named, false, ®no, &padding);
7624
7625 /* If argument requires leading padding, add it. */
7626 cum->words += padding;
7627
7628 if (TARGET_ARCH32)
7629 cum->words += (mode == BLKmode
7630 ? CEIL_NWORDS (int_size_in_bytes (type))
7631 : CEIL_NWORDS (GET_MODE_SIZE (mode)));
7632 else
7633 {
7634 if (type && AGGREGATE_TYPE_P (type))
7635 {
7636 int size = int_size_in_bytes (type);
7637
7638 if (size <= 8)
7639 ++cum->words;
7640 else if (size <= 16)
7641 cum->words += 2;
7642 else /* passed by reference */
7643 ++cum->words;
7644 }
7645 else
7646 cum->words += (mode == BLKmode
7647 ? CEIL_NWORDS (int_size_in_bytes (type))
7648 : CEIL_NWORDS (GET_MODE_SIZE (mode)));
7649 }
7650 }
7651
7652 /* Implement TARGET_FUNCTION_ARG_PADDING. For the 64-bit ABI structs
7653 are always stored left shifted in their argument slot. */
7654
7655 static pad_direction
sparc_function_arg_padding(machine_mode mode,const_tree type)7656 sparc_function_arg_padding (machine_mode mode, const_tree type)
7657 {
7658 if (TARGET_ARCH64 && type && AGGREGATE_TYPE_P (type))
7659 return PAD_UPWARD;
7660
7661 /* Fall back to the default. */
7662 return default_function_arg_padding (mode, type);
7663 }
7664
7665 /* Handle the TARGET_RETURN_IN_MEMORY target hook.
7666 Specify whether to return the return value in memory. */
7667
7668 static bool
sparc_return_in_memory(const_tree type,const_tree fntype ATTRIBUTE_UNUSED)7669 sparc_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
7670 {
7671 if (TARGET_ARCH32)
7672 /* Original SPARC 32-bit ABI says that structures and unions,
7673 and quad-precision floats are returned in memory. All other
7674 base types are returned in registers.
7675
7676 Extended ABI (as implemented by the Sun compiler) says that
7677 all complex floats are returned in registers (8 FP registers
7678 at most for '_Complex long double'). Return all complex integers
7679 in registers (4 at most for '_Complex long long').
7680
7681 Vector ABI (as implemented by the Sun VIS SDK) says that vector
7682 integers are returned like floats of the same size, that is in
7683 registers up to 8 bytes and in memory otherwise. Return all
7684 vector floats in memory like structure and unions; note that
7685 they always have BLKmode like the latter. */
7686 return (TYPE_MODE (type) == BLKmode
7687 || TYPE_MODE (type) == TFmode
7688 || (TREE_CODE (type) == VECTOR_TYPE
7689 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8));
7690 else
7691 /* Original SPARC 64-bit ABI says that structures and unions
7692 smaller than 32 bytes are returned in registers, as well as
7693 all other base types.
7694
7695 Extended ABI (as implemented by the Sun compiler) says that all
7696 complex floats are returned in registers (8 FP registers at most
7697 for '_Complex long double'). Return all complex integers in
7698 registers (4 at most for '_Complex TItype').
7699
7700 Vector ABI (as implemented by the Sun VIS SDK) says that vector
7701 integers are returned like floats of the same size, that is in
7702 registers. Return all vector floats like structure and unions;
7703 note that they always have BLKmode like the latter. */
7704 return (TYPE_MODE (type) == BLKmode
7705 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 32);
7706 }
7707
7708 /* Handle the TARGET_STRUCT_VALUE target hook.
7709 Return where to find the structure return value address. */
7710
7711 static rtx
sparc_struct_value_rtx(tree fndecl,int incoming)7712 sparc_struct_value_rtx (tree fndecl, int incoming)
7713 {
7714 if (TARGET_ARCH64)
7715 return 0;
7716 else
7717 {
7718 rtx mem;
7719
7720 if (incoming)
7721 mem = gen_frame_mem (Pmode, plus_constant (Pmode, frame_pointer_rtx,
7722 STRUCT_VALUE_OFFSET));
7723 else
7724 mem = gen_frame_mem (Pmode, plus_constant (Pmode, stack_pointer_rtx,
7725 STRUCT_VALUE_OFFSET));
7726
7727 /* Only follow the SPARC ABI for fixed-size structure returns.
7728 Variable size structure returns are handled per the normal
7729 procedures in GCC. This is enabled by -mstd-struct-return */
7730 if (incoming == 2
7731 && sparc_std_struct_return
7732 && TYPE_SIZE_UNIT (TREE_TYPE (fndecl))
7733 && TREE_CODE (TYPE_SIZE_UNIT (TREE_TYPE (fndecl))) == INTEGER_CST)
7734 {
7735 /* We must check and adjust the return address, as it is optional
7736 as to whether the return object is really provided. */
7737 rtx ret_reg = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
7738 rtx scratch = gen_reg_rtx (SImode);
7739 rtx_code_label *endlab = gen_label_rtx ();
7740
7741 /* Calculate the return object size. */
7742 tree size = TYPE_SIZE_UNIT (TREE_TYPE (fndecl));
7743 rtx size_rtx = GEN_INT (TREE_INT_CST_LOW (size) & 0xfff);
7744 /* Construct a temporary return value. */
7745 rtx temp_val
7746 = assign_stack_local (Pmode, TREE_INT_CST_LOW (size), 0);
7747
7748 /* Implement SPARC 32-bit psABI callee return struct checking:
7749
7750 Fetch the instruction where we will return to and see if
7751 it's an unimp instruction (the most significant 10 bits
7752 will be zero). */
7753 emit_move_insn (scratch, gen_rtx_MEM (SImode,
7754 plus_constant (Pmode,
7755 ret_reg, 8)));
7756 /* Assume the size is valid and pre-adjust. */
7757 emit_insn (gen_add3_insn (ret_reg, ret_reg, GEN_INT (4)));
7758 emit_cmp_and_jump_insns (scratch, size_rtx, EQ, const0_rtx, SImode,
7759 0, endlab);
7760 emit_insn (gen_sub3_insn (ret_reg, ret_reg, GEN_INT (4)));
7761 /* Write the address of the memory pointed to by temp_val into
7762 the memory pointed to by mem. */
7763 emit_move_insn (mem, XEXP (temp_val, 0));
7764 emit_label (endlab);
7765 }
7766
7767 return mem;
7768 }
7769 }
7770
7771 /* Handle TARGET_FUNCTION_VALUE, and TARGET_LIBCALL_VALUE target hook.
7772 For v9, function return values are subject to the same rules as arguments,
7773 except that up to 32 bytes may be returned in registers. */
7774
7775 static rtx
sparc_function_value_1(const_tree type,machine_mode mode,bool outgoing)7776 sparc_function_value_1 (const_tree type, machine_mode mode,
7777 bool outgoing)
7778 {
7779 /* Beware that the two values are swapped here wrt function_arg. */
7780 int regbase = (outgoing
7781 ? SPARC_INCOMING_INT_ARG_FIRST
7782 : SPARC_OUTGOING_INT_ARG_FIRST);
7783 enum mode_class mclass = GET_MODE_CLASS (mode);
7784 int regno;
7785
7786 /* Vector types deserve special treatment because they are polymorphic wrt
7787 their mode, depending upon whether VIS instructions are enabled. */
7788 if (type && TREE_CODE (type) == VECTOR_TYPE)
7789 {
7790 HOST_WIDE_INT size = int_size_in_bytes (type);
7791 gcc_assert ((TARGET_ARCH32 && size <= 8)
7792 || (TARGET_ARCH64 && size <= 32));
7793
7794 if (mode == BLKmode)
7795 return function_arg_vector_value (size, SPARC_FP_ARG_FIRST);
7796
7797 mclass = MODE_FLOAT;
7798 }
7799
7800 if (TARGET_ARCH64 && type)
7801 {
7802 /* Structures up to 32 bytes in size are returned in registers. */
7803 if (TREE_CODE (type) == RECORD_TYPE)
7804 {
7805 HOST_WIDE_INT size = int_size_in_bytes (type);
7806 gcc_assert (size <= 32);
7807
7808 return function_arg_record_value (type, mode, 0, 1, regbase);
7809 }
7810
7811 /* Unions up to 32 bytes in size are returned in integer registers. */
7812 else if (TREE_CODE (type) == UNION_TYPE)
7813 {
7814 HOST_WIDE_INT size = int_size_in_bytes (type);
7815 gcc_assert (size <= 32);
7816
7817 return function_arg_union_value (size, mode, 0, regbase);
7818 }
7819
7820 /* Objects that require it are returned in FP registers. */
7821 else if (mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT)
7822 ;
7823
7824 /* All other aggregate types are returned in an integer register in a
7825 mode corresponding to the size of the type. */
7826 else if (AGGREGATE_TYPE_P (type))
7827 {
7828 /* All other aggregate types are passed in an integer register
7829 in a mode corresponding to the size of the type. */
7830 HOST_WIDE_INT size = int_size_in_bytes (type);
7831 gcc_assert (size <= 32);
7832
7833 mode = int_mode_for_size (size * BITS_PER_UNIT, 0).else_blk ();
7834
7835 /* ??? We probably should have made the same ABI change in
7836 3.4.0 as the one we made for unions. The latter was
7837 required by the SCD though, while the former is not
7838 specified, so we favored compatibility and efficiency.
7839
7840 Now we're stuck for aggregates larger than 16 bytes,
7841 because OImode vanished in the meantime. Let's not
7842 try to be unduly clever, and simply follow the ABI
7843 for unions in that case. */
7844 if (mode == BLKmode)
7845 return function_arg_union_value (size, mode, 0, regbase);
7846 else
7847 mclass = MODE_INT;
7848 }
7849
7850 /* We should only have pointer and integer types at this point. This
7851 must match sparc_promote_function_mode. */
7852 else if (mclass == MODE_INT && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
7853 mode = word_mode;
7854 }
7855
7856 /* We should only have pointer and integer types at this point, except with
7857 -freg-struct-return. This must match sparc_promote_function_mode. */
7858 else if (TARGET_ARCH32
7859 && !(type && AGGREGATE_TYPE_P (type))
7860 && mclass == MODE_INT
7861 && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
7862 mode = word_mode;
7863
7864 if ((mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT) && TARGET_FPU)
7865 regno = SPARC_FP_ARG_FIRST;
7866 else
7867 regno = regbase;
7868
7869 return gen_rtx_REG (mode, regno);
7870 }
7871
7872 /* Handle TARGET_FUNCTION_VALUE.
7873 On the SPARC, the value is found in the first "output" register, but the
7874 called function leaves it in the first "input" register. */
7875
7876 static rtx
sparc_function_value(const_tree valtype,const_tree fn_decl_or_type ATTRIBUTE_UNUSED,bool outgoing)7877 sparc_function_value (const_tree valtype,
7878 const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
7879 bool outgoing)
7880 {
7881 return sparc_function_value_1 (valtype, TYPE_MODE (valtype), outgoing);
7882 }
7883
7884 /* Handle TARGET_LIBCALL_VALUE. */
7885
7886 static rtx
sparc_libcall_value(machine_mode mode,const_rtx fun ATTRIBUTE_UNUSED)7887 sparc_libcall_value (machine_mode mode,
7888 const_rtx fun ATTRIBUTE_UNUSED)
7889 {
7890 return sparc_function_value_1 (NULL_TREE, mode, false);
7891 }
7892
7893 /* Handle FUNCTION_VALUE_REGNO_P.
7894 On the SPARC, the first "output" reg is used for integer values, and the
7895 first floating point register is used for floating point values. */
7896
7897 static bool
sparc_function_value_regno_p(const unsigned int regno)7898 sparc_function_value_regno_p (const unsigned int regno)
7899 {
7900 return (regno == 8 || (TARGET_FPU && regno == 32));
7901 }
7902
7903 /* Do what is necessary for `va_start'. We look at the current function
7904 to determine if stdarg or varargs is used and return the address of
7905 the first unnamed parameter. */
7906
7907 static rtx
sparc_builtin_saveregs(void)7908 sparc_builtin_saveregs (void)
7909 {
7910 int first_reg = crtl->args.info.words;
7911 rtx address;
7912 int regno;
7913
7914 for (regno = first_reg; regno < SPARC_INT_ARG_MAX; regno++)
7915 emit_move_insn (gen_rtx_MEM (word_mode,
7916 gen_rtx_PLUS (Pmode,
7917 frame_pointer_rtx,
7918 GEN_INT (FIRST_PARM_OFFSET (0)
7919 + (UNITS_PER_WORD
7920 * regno)))),
7921 gen_rtx_REG (word_mode,
7922 SPARC_INCOMING_INT_ARG_FIRST + regno));
7923
7924 address = gen_rtx_PLUS (Pmode,
7925 frame_pointer_rtx,
7926 GEN_INT (FIRST_PARM_OFFSET (0)
7927 + UNITS_PER_WORD * first_reg));
7928
7929 return address;
7930 }
7931
7932 /* Implement `va_start' for stdarg. */
7933
7934 static void
sparc_va_start(tree valist,rtx nextarg)7935 sparc_va_start (tree valist, rtx nextarg)
7936 {
7937 nextarg = expand_builtin_saveregs ();
7938 std_expand_builtin_va_start (valist, nextarg);
7939 }
7940
7941 /* Implement `va_arg' for stdarg. */
7942
7943 static tree
sparc_gimplify_va_arg(tree valist,tree type,gimple_seq * pre_p,gimple_seq * post_p)7944 sparc_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
7945 gimple_seq *post_p)
7946 {
7947 HOST_WIDE_INT size, rsize, align;
7948 tree addr, incr;
7949 bool indirect;
7950 tree ptrtype = build_pointer_type (type);
7951
7952 if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
7953 {
7954 indirect = true;
7955 size = rsize = UNITS_PER_WORD;
7956 align = 0;
7957 }
7958 else
7959 {
7960 indirect = false;
7961 size = int_size_in_bytes (type);
7962 rsize = ROUND_UP (size, UNITS_PER_WORD);
7963 align = 0;
7964
7965 if (TARGET_ARCH64)
7966 {
7967 /* For SPARC64, objects requiring 16-byte alignment get it. */
7968 if (TYPE_ALIGN (type) >= 2 * (unsigned) BITS_PER_WORD)
7969 align = 2 * UNITS_PER_WORD;
7970
7971 /* SPARC-V9 ABI states that structures up to 16 bytes in size
7972 are left-justified in their slots. */
7973 if (AGGREGATE_TYPE_P (type))
7974 {
7975 if (size == 0)
7976 size = rsize = UNITS_PER_WORD;
7977 else
7978 size = rsize;
7979 }
7980 }
7981 }
7982
7983 incr = valist;
7984 if (align)
7985 {
7986 incr = fold_build_pointer_plus_hwi (incr, align - 1);
7987 incr = fold_convert (sizetype, incr);
7988 incr = fold_build2 (BIT_AND_EXPR, sizetype, incr,
7989 size_int (-align));
7990 incr = fold_convert (ptr_type_node, incr);
7991 }
7992
7993 gimplify_expr (&incr, pre_p, post_p, is_gimple_val, fb_rvalue);
7994 addr = incr;
7995
7996 if (BYTES_BIG_ENDIAN && size < rsize)
7997 addr = fold_build_pointer_plus_hwi (incr, rsize - size);
7998
7999 if (indirect)
8000 {
8001 addr = fold_convert (build_pointer_type (ptrtype), addr);
8002 addr = build_va_arg_indirect_ref (addr);
8003 }
8004
8005 /* If the address isn't aligned properly for the type, we need a temporary.
8006 FIXME: This is inefficient, usually we can do this in registers. */
8007 else if (align == 0 && TYPE_ALIGN (type) > BITS_PER_WORD)
8008 {
8009 tree tmp = create_tmp_var (type, "va_arg_tmp");
8010 tree dest_addr = build_fold_addr_expr (tmp);
8011 tree copy = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
8012 3, dest_addr, addr, size_int (rsize));
8013 TREE_ADDRESSABLE (tmp) = 1;
8014 gimplify_and_add (copy, pre_p);
8015 addr = dest_addr;
8016 }
8017
8018 else
8019 addr = fold_convert (ptrtype, addr);
8020
8021 incr = fold_build_pointer_plus_hwi (incr, rsize);
8022 gimplify_assign (valist, incr, post_p);
8023
8024 return build_va_arg_indirect_ref (addr);
8025 }
8026
8027 /* Implement the TARGET_VECTOR_MODE_SUPPORTED_P target hook.
8028 Specify whether the vector mode is supported by the hardware. */
8029
8030 static bool
sparc_vector_mode_supported_p(machine_mode mode)8031 sparc_vector_mode_supported_p (machine_mode mode)
8032 {
8033 return TARGET_VIS && VECTOR_MODE_P (mode) ? true : false;
8034 }
8035
8036 /* Implement the TARGET_VECTORIZE_PREFERRED_SIMD_MODE target hook. */
8037
8038 static machine_mode
sparc_preferred_simd_mode(scalar_mode mode)8039 sparc_preferred_simd_mode (scalar_mode mode)
8040 {
8041 if (TARGET_VIS)
8042 switch (mode)
8043 {
8044 case E_SImode:
8045 return V2SImode;
8046 case E_HImode:
8047 return V4HImode;
8048 case E_QImode:
8049 return V8QImode;
8050
8051 default:;
8052 }
8053
8054 return word_mode;
8055 }
8056
8057 /* Return the string to output an unconditional branch to LABEL, which is
8058 the operand number of the label.
8059
8060 DEST is the destination insn (i.e. the label), INSN is the source. */
8061
8062 const char *
output_ubranch(rtx dest,rtx_insn * insn)8063 output_ubranch (rtx dest, rtx_insn *insn)
8064 {
8065 static char string[64];
8066 bool v9_form = false;
8067 int delta;
8068 char *p;
8069
8070 /* Even if we are trying to use cbcond for this, evaluate
8071 whether we can use V9 branches as our backup plan. */
8072
8073 delta = 5000000;
8074 if (INSN_ADDRESSES_SET_P ())
8075 delta = (INSN_ADDRESSES (INSN_UID (dest))
8076 - INSN_ADDRESSES (INSN_UID (insn)));
8077
8078 /* Leave some instructions for "slop". */
8079 if (TARGET_V9 && delta >= -260000 && delta < 260000)
8080 v9_form = true;
8081
8082 if (TARGET_CBCOND)
8083 {
8084 bool emit_nop = emit_cbcond_nop (insn);
8085 bool far = false;
8086 const char *rval;
8087
8088 if (delta < -500 || delta > 500)
8089 far = true;
8090
8091 if (far)
8092 {
8093 if (v9_form)
8094 rval = "ba,a,pt\t%%xcc, %l0";
8095 else
8096 rval = "b,a\t%l0";
8097 }
8098 else
8099 {
8100 if (emit_nop)
8101 rval = "cwbe\t%%g0, %%g0, %l0\n\tnop";
8102 else
8103 rval = "cwbe\t%%g0, %%g0, %l0";
8104 }
8105 return rval;
8106 }
8107
8108 if (v9_form)
8109 strcpy (string, "ba%*,pt\t%%xcc, ");
8110 else
8111 strcpy (string, "b%*\t");
8112
8113 p = strchr (string, '\0');
8114 *p++ = '%';
8115 *p++ = 'l';
8116 *p++ = '0';
8117 *p++ = '%';
8118 *p++ = '(';
8119 *p = '\0';
8120
8121 return string;
8122 }
8123
8124 /* Return the string to output a conditional branch to LABEL, which is
8125 the operand number of the label. OP is the conditional expression.
8126 XEXP (OP, 0) is assumed to be a condition code register (integer or
8127 floating point) and its mode specifies what kind of comparison we made.
8128
8129 DEST is the destination insn (i.e. the label), INSN is the source.
8130
8131 REVERSED is nonzero if we should reverse the sense of the comparison.
8132
8133 ANNUL is nonzero if we should generate an annulling branch. */
8134
8135 const char *
output_cbranch(rtx op,rtx dest,int label,int reversed,int annul,rtx_insn * insn)8136 output_cbranch (rtx op, rtx dest, int label, int reversed, int annul,
8137 rtx_insn *insn)
8138 {
8139 static char string[64];
8140 enum rtx_code code = GET_CODE (op);
8141 rtx cc_reg = XEXP (op, 0);
8142 machine_mode mode = GET_MODE (cc_reg);
8143 const char *labelno, *branch;
8144 int spaces = 8, far;
8145 char *p;
8146
8147 /* v9 branches are limited to +-1MB. If it is too far away,
8148 change
8149
8150 bne,pt %xcc, .LC30
8151
8152 to
8153
8154 be,pn %xcc, .+12
8155 nop
8156 ba .LC30
8157
8158 and
8159
8160 fbne,a,pn %fcc2, .LC29
8161
8162 to
8163
8164 fbe,pt %fcc2, .+16
8165 nop
8166 ba .LC29 */
8167
8168 far = TARGET_V9 && (get_attr_length (insn) >= 3);
8169 if (reversed ^ far)
8170 {
8171 /* Reversal of FP compares takes care -- an ordered compare
8172 becomes an unordered compare and vice versa. */
8173 if (mode == CCFPmode || mode == CCFPEmode)
8174 code = reverse_condition_maybe_unordered (code);
8175 else
8176 code = reverse_condition (code);
8177 }
8178
8179 /* Start by writing the branch condition. */
8180 if (mode == CCFPmode || mode == CCFPEmode)
8181 {
8182 switch (code)
8183 {
8184 case NE:
8185 branch = "fbne";
8186 break;
8187 case EQ:
8188 branch = "fbe";
8189 break;
8190 case GE:
8191 branch = "fbge";
8192 break;
8193 case GT:
8194 branch = "fbg";
8195 break;
8196 case LE:
8197 branch = "fble";
8198 break;
8199 case LT:
8200 branch = "fbl";
8201 break;
8202 case UNORDERED:
8203 branch = "fbu";
8204 break;
8205 case ORDERED:
8206 branch = "fbo";
8207 break;
8208 case UNGT:
8209 branch = "fbug";
8210 break;
8211 case UNLT:
8212 branch = "fbul";
8213 break;
8214 case UNEQ:
8215 branch = "fbue";
8216 break;
8217 case UNGE:
8218 branch = "fbuge";
8219 break;
8220 case UNLE:
8221 branch = "fbule";
8222 break;
8223 case LTGT:
8224 branch = "fblg";
8225 break;
8226 default:
8227 gcc_unreachable ();
8228 }
8229
8230 /* ??? !v9: FP branches cannot be preceded by another floating point
8231 insn. Because there is currently no concept of pre-delay slots,
8232 we can fix this only by always emitting a nop before a floating
8233 point branch. */
8234
8235 string[0] = '\0';
8236 if (! TARGET_V9)
8237 strcpy (string, "nop\n\t");
8238 strcat (string, branch);
8239 }
8240 else
8241 {
8242 switch (code)
8243 {
8244 case NE:
8245 if (mode == CCVmode || mode == CCXVmode)
8246 branch = "bvs";
8247 else
8248 branch = "bne";
8249 break;
8250 case EQ:
8251 if (mode == CCVmode || mode == CCXVmode)
8252 branch = "bvc";
8253 else
8254 branch = "be";
8255 break;
8256 case GE:
8257 if (mode == CCNZmode || mode == CCXNZmode)
8258 branch = "bpos";
8259 else
8260 branch = "bge";
8261 break;
8262 case GT:
8263 branch = "bg";
8264 break;
8265 case LE:
8266 branch = "ble";
8267 break;
8268 case LT:
8269 if (mode == CCNZmode || mode == CCXNZmode)
8270 branch = "bneg";
8271 else
8272 branch = "bl";
8273 break;
8274 case GEU:
8275 branch = "bgeu";
8276 break;
8277 case GTU:
8278 branch = "bgu";
8279 break;
8280 case LEU:
8281 branch = "bleu";
8282 break;
8283 case LTU:
8284 branch = "blu";
8285 break;
8286 default:
8287 gcc_unreachable ();
8288 }
8289 strcpy (string, branch);
8290 }
8291 spaces -= strlen (branch);
8292 p = strchr (string, '\0');
8293
8294 /* Now add the annulling, the label, and a possible noop. */
8295 if (annul && ! far)
8296 {
8297 strcpy (p, ",a");
8298 p += 2;
8299 spaces -= 2;
8300 }
8301
8302 if (TARGET_V9)
8303 {
8304 rtx note;
8305 int v8 = 0;
8306
8307 if (! far && insn && INSN_ADDRESSES_SET_P ())
8308 {
8309 int delta = (INSN_ADDRESSES (INSN_UID (dest))
8310 - INSN_ADDRESSES (INSN_UID (insn)));
8311 /* Leave some instructions for "slop". */
8312 if (delta < -260000 || delta >= 260000)
8313 v8 = 1;
8314 }
8315
8316 switch (mode)
8317 {
8318 case E_CCmode:
8319 case E_CCNZmode:
8320 case E_CCCmode:
8321 case E_CCVmode:
8322 labelno = "%%icc, ";
8323 if (v8)
8324 labelno = "";
8325 break;
8326 case E_CCXmode:
8327 case E_CCXNZmode:
8328 case E_CCXCmode:
8329 case E_CCXVmode:
8330 labelno = "%%xcc, ";
8331 gcc_assert (!v8);
8332 break;
8333 case E_CCFPmode:
8334 case E_CCFPEmode:
8335 {
8336 static char v9_fcc_labelno[] = "%%fccX, ";
8337 /* Set the char indicating the number of the fcc reg to use. */
8338 v9_fcc_labelno[5] = REGNO (cc_reg) - SPARC_FIRST_V9_FCC_REG + '0';
8339 labelno = v9_fcc_labelno;
8340 if (v8)
8341 {
8342 gcc_assert (REGNO (cc_reg) == SPARC_FCC_REG);
8343 labelno = "";
8344 }
8345 }
8346 break;
8347 default:
8348 gcc_unreachable ();
8349 }
8350
8351 if (*labelno && insn && (note = find_reg_note (insn, REG_BR_PROB, NULL_RTX)))
8352 {
8353 strcpy (p,
8354 ((profile_probability::from_reg_br_prob_note (XINT (note, 0))
8355 >= profile_probability::even ()) ^ far)
8356 ? ",pt" : ",pn");
8357 p += 3;
8358 spaces -= 3;
8359 }
8360 }
8361 else
8362 labelno = "";
8363
8364 if (spaces > 0)
8365 *p++ = '\t';
8366 else
8367 *p++ = ' ';
8368 strcpy (p, labelno);
8369 p = strchr (p, '\0');
8370 if (far)
8371 {
8372 strcpy (p, ".+12\n\t nop\n\tb\t");
8373 /* Skip the next insn if requested or
8374 if we know that it will be a nop. */
8375 if (annul || ! final_sequence)
8376 p[3] = '6';
8377 p += 14;
8378 }
8379 *p++ = '%';
8380 *p++ = 'l';
8381 *p++ = label + '0';
8382 *p++ = '%';
8383 *p++ = '#';
8384 *p = '\0';
8385
8386 return string;
8387 }
8388
8389 /* Emit a library call comparison between floating point X and Y.
8390 COMPARISON is the operator to compare with (EQ, NE, GT, etc).
8391 Return the new operator to be used in the comparison sequence.
8392
8393 TARGET_ARCH64 uses _Qp_* functions, which use pointers to TFmode
8394 values as arguments instead of the TFmode registers themselves,
8395 that's why we cannot call emit_float_lib_cmp. */
8396
8397 rtx
sparc_emit_float_lib_cmp(rtx x,rtx y,enum rtx_code comparison)8398 sparc_emit_float_lib_cmp (rtx x, rtx y, enum rtx_code comparison)
8399 {
8400 const char *qpfunc;
8401 rtx slot0, slot1, result, tem, tem2, libfunc;
8402 machine_mode mode;
8403 enum rtx_code new_comparison;
8404
8405 switch (comparison)
8406 {
8407 case EQ:
8408 qpfunc = (TARGET_ARCH64 ? "_Qp_feq" : "_Q_feq");
8409 break;
8410
8411 case NE:
8412 qpfunc = (TARGET_ARCH64 ? "_Qp_fne" : "_Q_fne");
8413 break;
8414
8415 case GT:
8416 qpfunc = (TARGET_ARCH64 ? "_Qp_fgt" : "_Q_fgt");
8417 break;
8418
8419 case GE:
8420 qpfunc = (TARGET_ARCH64 ? "_Qp_fge" : "_Q_fge");
8421 break;
8422
8423 case LT:
8424 qpfunc = (TARGET_ARCH64 ? "_Qp_flt" : "_Q_flt");
8425 break;
8426
8427 case LE:
8428 qpfunc = (TARGET_ARCH64 ? "_Qp_fle" : "_Q_fle");
8429 break;
8430
8431 case ORDERED:
8432 case UNORDERED:
8433 case UNGT:
8434 case UNLT:
8435 case UNEQ:
8436 case UNGE:
8437 case UNLE:
8438 case LTGT:
8439 qpfunc = (TARGET_ARCH64 ? "_Qp_cmp" : "_Q_cmp");
8440 break;
8441
8442 default:
8443 gcc_unreachable ();
8444 }
8445
8446 if (TARGET_ARCH64)
8447 {
8448 if (MEM_P (x))
8449 {
8450 tree expr = MEM_EXPR (x);
8451 if (expr)
8452 mark_addressable (expr);
8453 slot0 = x;
8454 }
8455 else
8456 {
8457 slot0 = assign_stack_temp (TFmode, GET_MODE_SIZE(TFmode));
8458 emit_move_insn (slot0, x);
8459 }
8460
8461 if (MEM_P (y))
8462 {
8463 tree expr = MEM_EXPR (y);
8464 if (expr)
8465 mark_addressable (expr);
8466 slot1 = y;
8467 }
8468 else
8469 {
8470 slot1 = assign_stack_temp (TFmode, GET_MODE_SIZE(TFmode));
8471 emit_move_insn (slot1, y);
8472 }
8473
8474 libfunc = gen_rtx_SYMBOL_REF (Pmode, qpfunc);
8475 emit_library_call (libfunc, LCT_NORMAL,
8476 DImode,
8477 XEXP (slot0, 0), Pmode,
8478 XEXP (slot1, 0), Pmode);
8479 mode = DImode;
8480 }
8481 else
8482 {
8483 libfunc = gen_rtx_SYMBOL_REF (Pmode, qpfunc);
8484 emit_library_call (libfunc, LCT_NORMAL,
8485 SImode,
8486 x, TFmode, y, TFmode);
8487 mode = SImode;
8488 }
8489
8490
8491 /* Immediately move the result of the libcall into a pseudo
8492 register so reload doesn't clobber the value if it needs
8493 the return register for a spill reg. */
8494 result = gen_reg_rtx (mode);
8495 emit_move_insn (result, hard_libcall_value (mode, libfunc));
8496
8497 switch (comparison)
8498 {
8499 default:
8500 return gen_rtx_NE (VOIDmode, result, const0_rtx);
8501 case ORDERED:
8502 case UNORDERED:
8503 new_comparison = (comparison == UNORDERED ? EQ : NE);
8504 return gen_rtx_fmt_ee (new_comparison, VOIDmode, result, GEN_INT(3));
8505 case UNGT:
8506 case UNGE:
8507 new_comparison = (comparison == UNGT ? GT : NE);
8508 return gen_rtx_fmt_ee (new_comparison, VOIDmode, result, const1_rtx);
8509 case UNLE:
8510 return gen_rtx_NE (VOIDmode, result, const2_rtx);
8511 case UNLT:
8512 tem = gen_reg_rtx (mode);
8513 if (TARGET_ARCH32)
8514 emit_insn (gen_andsi3 (tem, result, const1_rtx));
8515 else
8516 emit_insn (gen_anddi3 (tem, result, const1_rtx));
8517 return gen_rtx_NE (VOIDmode, tem, const0_rtx);
8518 case UNEQ:
8519 case LTGT:
8520 tem = gen_reg_rtx (mode);
8521 if (TARGET_ARCH32)
8522 emit_insn (gen_addsi3 (tem, result, const1_rtx));
8523 else
8524 emit_insn (gen_adddi3 (tem, result, const1_rtx));
8525 tem2 = gen_reg_rtx (mode);
8526 if (TARGET_ARCH32)
8527 emit_insn (gen_andsi3 (tem2, tem, const2_rtx));
8528 else
8529 emit_insn (gen_anddi3 (tem2, tem, const2_rtx));
8530 new_comparison = (comparison == UNEQ ? EQ : NE);
8531 return gen_rtx_fmt_ee (new_comparison, VOIDmode, tem2, const0_rtx);
8532 }
8533
8534 gcc_unreachable ();
8535 }
8536
8537 /* Generate an unsigned DImode to FP conversion. This is the same code
8538 optabs would emit if we didn't have TFmode patterns. */
8539
8540 void
sparc_emit_floatunsdi(rtx * operands,machine_mode mode)8541 sparc_emit_floatunsdi (rtx *operands, machine_mode mode)
8542 {
8543 rtx i0, i1, f0, in, out;
8544
8545 out = operands[0];
8546 in = force_reg (DImode, operands[1]);
8547 rtx_code_label *neglab = gen_label_rtx ();
8548 rtx_code_label *donelab = gen_label_rtx ();
8549 i0 = gen_reg_rtx (DImode);
8550 i1 = gen_reg_rtx (DImode);
8551 f0 = gen_reg_rtx (mode);
8552
8553 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, DImode, 0, neglab);
8554
8555 emit_insn (gen_rtx_SET (out, gen_rtx_FLOAT (mode, in)));
8556 emit_jump_insn (gen_jump (donelab));
8557 emit_barrier ();
8558
8559 emit_label (neglab);
8560
8561 emit_insn (gen_lshrdi3 (i0, in, const1_rtx));
8562 emit_insn (gen_anddi3 (i1, in, const1_rtx));
8563 emit_insn (gen_iordi3 (i0, i0, i1));
8564 emit_insn (gen_rtx_SET (f0, gen_rtx_FLOAT (mode, i0)));
8565 emit_insn (gen_rtx_SET (out, gen_rtx_PLUS (mode, f0, f0)));
8566
8567 emit_label (donelab);
8568 }
8569
8570 /* Generate an FP to unsigned DImode conversion. This is the same code
8571 optabs would emit if we didn't have TFmode patterns. */
8572
8573 void
sparc_emit_fixunsdi(rtx * operands,machine_mode mode)8574 sparc_emit_fixunsdi (rtx *operands, machine_mode mode)
8575 {
8576 rtx i0, i1, f0, in, out, limit;
8577
8578 out = operands[0];
8579 in = force_reg (mode, operands[1]);
8580 rtx_code_label *neglab = gen_label_rtx ();
8581 rtx_code_label *donelab = gen_label_rtx ();
8582 i0 = gen_reg_rtx (DImode);
8583 i1 = gen_reg_rtx (DImode);
8584 limit = gen_reg_rtx (mode);
8585 f0 = gen_reg_rtx (mode);
8586
8587 emit_move_insn (limit,
8588 const_double_from_real_value (
8589 REAL_VALUE_ATOF ("9223372036854775808.0", mode), mode));
8590 emit_cmp_and_jump_insns (in, limit, GE, NULL_RTX, mode, 0, neglab);
8591
8592 emit_insn (gen_rtx_SET (out,
8593 gen_rtx_FIX (DImode, gen_rtx_FIX (mode, in))));
8594 emit_jump_insn (gen_jump (donelab));
8595 emit_barrier ();
8596
8597 emit_label (neglab);
8598
8599 emit_insn (gen_rtx_SET (f0, gen_rtx_MINUS (mode, in, limit)));
8600 emit_insn (gen_rtx_SET (i0,
8601 gen_rtx_FIX (DImode, gen_rtx_FIX (mode, f0))));
8602 emit_insn (gen_movdi (i1, const1_rtx));
8603 emit_insn (gen_ashldi3 (i1, i1, GEN_INT (63)));
8604 emit_insn (gen_xordi3 (out, i0, i1));
8605
8606 emit_label (donelab);
8607 }
8608
8609 /* Return the string to output a compare and branch instruction to DEST.
8610 DEST is the destination insn (i.e. the label), INSN is the source,
8611 and OP is the conditional expression. */
8612
8613 const char *
output_cbcond(rtx op,rtx dest,rtx_insn * insn)8614 output_cbcond (rtx op, rtx dest, rtx_insn *insn)
8615 {
8616 machine_mode mode = GET_MODE (XEXP (op, 0));
8617 enum rtx_code code = GET_CODE (op);
8618 const char *cond_str, *tmpl;
8619 int far, emit_nop, len;
8620 static char string[64];
8621 char size_char;
8622
8623 /* Compare and Branch is limited to +-2KB. If it is too far away,
8624 change
8625
8626 cxbne X, Y, .LC30
8627
8628 to
8629
8630 cxbe X, Y, .+16
8631 nop
8632 ba,pt xcc, .LC30
8633 nop */
8634
8635 len = get_attr_length (insn);
8636
8637 far = len == 4;
8638 emit_nop = len == 2;
8639
8640 if (far)
8641 code = reverse_condition (code);
8642
8643 size_char = ((mode == SImode) ? 'w' : 'x');
8644
8645 switch (code)
8646 {
8647 case NE:
8648 cond_str = "ne";
8649 break;
8650
8651 case EQ:
8652 cond_str = "e";
8653 break;
8654
8655 case GE:
8656 cond_str = "ge";
8657 break;
8658
8659 case GT:
8660 cond_str = "g";
8661 break;
8662
8663 case LE:
8664 cond_str = "le";
8665 break;
8666
8667 case LT:
8668 cond_str = "l";
8669 break;
8670
8671 case GEU:
8672 cond_str = "cc";
8673 break;
8674
8675 case GTU:
8676 cond_str = "gu";
8677 break;
8678
8679 case LEU:
8680 cond_str = "leu";
8681 break;
8682
8683 case LTU:
8684 cond_str = "cs";
8685 break;
8686
8687 default:
8688 gcc_unreachable ();
8689 }
8690
8691 if (far)
8692 {
8693 int veryfar = 1, delta;
8694
8695 if (INSN_ADDRESSES_SET_P ())
8696 {
8697 delta = (INSN_ADDRESSES (INSN_UID (dest))
8698 - INSN_ADDRESSES (INSN_UID (insn)));
8699 /* Leave some instructions for "slop". */
8700 if (delta >= -260000 && delta < 260000)
8701 veryfar = 0;
8702 }
8703
8704 if (veryfar)
8705 tmpl = "c%cb%s\t%%1, %%2, .+16\n\tnop\n\tb\t%%3\n\tnop";
8706 else
8707 tmpl = "c%cb%s\t%%1, %%2, .+16\n\tnop\n\tba,pt\t%%%%xcc, %%3\n\tnop";
8708 }
8709 else
8710 {
8711 if (emit_nop)
8712 tmpl = "c%cb%s\t%%1, %%2, %%3\n\tnop";
8713 else
8714 tmpl = "c%cb%s\t%%1, %%2, %%3";
8715 }
8716
8717 snprintf (string, sizeof(string), tmpl, size_char, cond_str);
8718
8719 return string;
8720 }
8721
8722 /* Return the string to output a conditional branch to LABEL, testing
8723 register REG. LABEL is the operand number of the label; REG is the
8724 operand number of the reg. OP is the conditional expression. The mode
8725 of REG says what kind of comparison we made.
8726
8727 DEST is the destination insn (i.e. the label), INSN is the source.
8728
8729 REVERSED is nonzero if we should reverse the sense of the comparison.
8730
8731 ANNUL is nonzero if we should generate an annulling branch. */
8732
8733 const char *
output_v9branch(rtx op,rtx dest,int reg,int label,int reversed,int annul,rtx_insn * insn)8734 output_v9branch (rtx op, rtx dest, int reg, int label, int reversed,
8735 int annul, rtx_insn *insn)
8736 {
8737 static char string[64];
8738 enum rtx_code code = GET_CODE (op);
8739 machine_mode mode = GET_MODE (XEXP (op, 0));
8740 rtx note;
8741 int far;
8742 char *p;
8743
8744 /* branch on register are limited to +-128KB. If it is too far away,
8745 change
8746
8747 brnz,pt %g1, .LC30
8748
8749 to
8750
8751 brz,pn %g1, .+12
8752 nop
8753 ba,pt %xcc, .LC30
8754
8755 and
8756
8757 brgez,a,pn %o1, .LC29
8758
8759 to
8760
8761 brlz,pt %o1, .+16
8762 nop
8763 ba,pt %xcc, .LC29 */
8764
8765 far = get_attr_length (insn) >= 3;
8766
8767 /* If not floating-point or if EQ or NE, we can just reverse the code. */
8768 if (reversed ^ far)
8769 code = reverse_condition (code);
8770
8771 /* Only 64-bit versions of these instructions exist. */
8772 gcc_assert (mode == DImode);
8773
8774 /* Start by writing the branch condition. */
8775
8776 switch (code)
8777 {
8778 case NE:
8779 strcpy (string, "brnz");
8780 break;
8781
8782 case EQ:
8783 strcpy (string, "brz");
8784 break;
8785
8786 case GE:
8787 strcpy (string, "brgez");
8788 break;
8789
8790 case LT:
8791 strcpy (string, "brlz");
8792 break;
8793
8794 case LE:
8795 strcpy (string, "brlez");
8796 break;
8797
8798 case GT:
8799 strcpy (string, "brgz");
8800 break;
8801
8802 default:
8803 gcc_unreachable ();
8804 }
8805
8806 p = strchr (string, '\0');
8807
8808 /* Now add the annulling, reg, label, and nop. */
8809 if (annul && ! far)
8810 {
8811 strcpy (p, ",a");
8812 p += 2;
8813 }
8814
8815 if (insn && (note = find_reg_note (insn, REG_BR_PROB, NULL_RTX)))
8816 {
8817 strcpy (p,
8818 ((profile_probability::from_reg_br_prob_note (XINT (note, 0))
8819 >= profile_probability::even ()) ^ far)
8820 ? ",pt" : ",pn");
8821 p += 3;
8822 }
8823
8824 *p = p < string + 8 ? '\t' : ' ';
8825 p++;
8826 *p++ = '%';
8827 *p++ = '0' + reg;
8828 *p++ = ',';
8829 *p++ = ' ';
8830 if (far)
8831 {
8832 int veryfar = 1, delta;
8833
8834 if (INSN_ADDRESSES_SET_P ())
8835 {
8836 delta = (INSN_ADDRESSES (INSN_UID (dest))
8837 - INSN_ADDRESSES (INSN_UID (insn)));
8838 /* Leave some instructions for "slop". */
8839 if (delta >= -260000 && delta < 260000)
8840 veryfar = 0;
8841 }
8842
8843 strcpy (p, ".+12\n\t nop\n\t");
8844 /* Skip the next insn if requested or
8845 if we know that it will be a nop. */
8846 if (annul || ! final_sequence)
8847 p[3] = '6';
8848 p += 12;
8849 if (veryfar)
8850 {
8851 strcpy (p, "b\t");
8852 p += 2;
8853 }
8854 else
8855 {
8856 strcpy (p, "ba,pt\t%%xcc, ");
8857 p += 13;
8858 }
8859 }
8860 *p++ = '%';
8861 *p++ = 'l';
8862 *p++ = '0' + label;
8863 *p++ = '%';
8864 *p++ = '#';
8865 *p = '\0';
8866
8867 return string;
8868 }
8869
8870 /* Return 1, if any of the registers of the instruction are %l[0-7] or %o[0-7].
8871 Such instructions cannot be used in the delay slot of return insn on v9.
8872 If TEST is 0, also rename all %i[0-7] registers to their %o[0-7] counterparts.
8873 */
8874
8875 static int
epilogue_renumber(register rtx * where,int test)8876 epilogue_renumber (register rtx *where, int test)
8877 {
8878 register const char *fmt;
8879 register int i;
8880 register enum rtx_code code;
8881
8882 if (*where == 0)
8883 return 0;
8884
8885 code = GET_CODE (*where);
8886
8887 switch (code)
8888 {
8889 case REG:
8890 if (REGNO (*where) >= 8 && REGNO (*where) < 24) /* oX or lX */
8891 return 1;
8892 if (! test && REGNO (*where) >= 24 && REGNO (*where) < 32)
8893 *where = gen_rtx_REG (GET_MODE (*where), OUTGOING_REGNO (REGNO(*where)));
8894 /* fallthrough */
8895 case SCRATCH:
8896 case CC0:
8897 case PC:
8898 case CONST_INT:
8899 case CONST_WIDE_INT:
8900 case CONST_DOUBLE:
8901 return 0;
8902
8903 /* Do not replace the frame pointer with the stack pointer because
8904 it can cause the delayed instruction to load below the stack.
8905 This occurs when instructions like:
8906
8907 (set (reg/i:SI 24 %i0)
8908 (mem/f:SI (plus:SI (reg/f:SI 30 %fp)
8909 (const_int -20 [0xffffffec])) 0))
8910
8911 are in the return delayed slot. */
8912 case PLUS:
8913 if (GET_CODE (XEXP (*where, 0)) == REG
8914 && REGNO (XEXP (*where, 0)) == HARD_FRAME_POINTER_REGNUM
8915 && (GET_CODE (XEXP (*where, 1)) != CONST_INT
8916 || INTVAL (XEXP (*where, 1)) < SPARC_STACK_BIAS))
8917 return 1;
8918 break;
8919
8920 case MEM:
8921 if (SPARC_STACK_BIAS
8922 && GET_CODE (XEXP (*where, 0)) == REG
8923 && REGNO (XEXP (*where, 0)) == HARD_FRAME_POINTER_REGNUM)
8924 return 1;
8925 break;
8926
8927 default:
8928 break;
8929 }
8930
8931 fmt = GET_RTX_FORMAT (code);
8932
8933 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
8934 {
8935 if (fmt[i] == 'E')
8936 {
8937 register int j;
8938 for (j = XVECLEN (*where, i) - 1; j >= 0; j--)
8939 if (epilogue_renumber (&(XVECEXP (*where, i, j)), test))
8940 return 1;
8941 }
8942 else if (fmt[i] == 'e'
8943 && epilogue_renumber (&(XEXP (*where, i)), test))
8944 return 1;
8945 }
8946 return 0;
8947 }
8948
8949 /* Leaf functions and non-leaf functions have different needs. */
8950
8951 static const int
8952 reg_leaf_alloc_order[] = REG_LEAF_ALLOC_ORDER;
8953
8954 static const int
8955 reg_nonleaf_alloc_order[] = REG_ALLOC_ORDER;
8956
8957 static const int *const reg_alloc_orders[] = {
8958 reg_leaf_alloc_order,
8959 reg_nonleaf_alloc_order};
8960
8961 void
order_regs_for_local_alloc(void)8962 order_regs_for_local_alloc (void)
8963 {
8964 static int last_order_nonleaf = 1;
8965
8966 if (df_regs_ever_live_p (15) != last_order_nonleaf)
8967 {
8968 last_order_nonleaf = !last_order_nonleaf;
8969 memcpy ((char *) reg_alloc_order,
8970 (const char *) reg_alloc_orders[last_order_nonleaf],
8971 FIRST_PSEUDO_REGISTER * sizeof (int));
8972 }
8973 }
8974
8975 /* Return 1 if REG and MEM are legitimate enough to allow the various
8976 MEM<-->REG splits to be run. */
8977
8978 int
sparc_split_reg_mem_legitimate(rtx reg,rtx mem)8979 sparc_split_reg_mem_legitimate (rtx reg, rtx mem)
8980 {
8981 /* Punt if we are here by mistake. */
8982 gcc_assert (reload_completed);
8983
8984 /* We must have an offsettable memory reference. */
8985 if (!offsettable_memref_p (mem))
8986 return 0;
8987
8988 /* If we have legitimate args for ldd/std, we do not want
8989 the split to happen. */
8990 if ((REGNO (reg) % 2) == 0 && mem_min_alignment (mem, 8))
8991 return 0;
8992
8993 /* Success. */
8994 return 1;
8995 }
8996
8997 /* Split a REG <-- MEM move into a pair of moves in MODE. */
8998
8999 void
sparc_split_reg_mem(rtx dest,rtx src,machine_mode mode)9000 sparc_split_reg_mem (rtx dest, rtx src, machine_mode mode)
9001 {
9002 rtx high_part = gen_highpart (mode, dest);
9003 rtx low_part = gen_lowpart (mode, dest);
9004 rtx word0 = adjust_address (src, mode, 0);
9005 rtx word1 = adjust_address (src, mode, 4);
9006
9007 if (reg_overlap_mentioned_p (high_part, word1))
9008 {
9009 emit_move_insn_1 (low_part, word1);
9010 emit_move_insn_1 (high_part, word0);
9011 }
9012 else
9013 {
9014 emit_move_insn_1 (high_part, word0);
9015 emit_move_insn_1 (low_part, word1);
9016 }
9017 }
9018
9019 /* Split a MEM <-- REG move into a pair of moves in MODE. */
9020
9021 void
sparc_split_mem_reg(rtx dest,rtx src,machine_mode mode)9022 sparc_split_mem_reg (rtx dest, rtx src, machine_mode mode)
9023 {
9024 rtx word0 = adjust_address (dest, mode, 0);
9025 rtx word1 = adjust_address (dest, mode, 4);
9026 rtx high_part = gen_highpart (mode, src);
9027 rtx low_part = gen_lowpart (mode, src);
9028
9029 emit_move_insn_1 (word0, high_part);
9030 emit_move_insn_1 (word1, low_part);
9031 }
9032
9033 /* Like sparc_split_reg_mem_legitimate but for REG <--> REG moves. */
9034
9035 int
sparc_split_reg_reg_legitimate(rtx reg1,rtx reg2)9036 sparc_split_reg_reg_legitimate (rtx reg1, rtx reg2)
9037 {
9038 /* Punt if we are here by mistake. */
9039 gcc_assert (reload_completed);
9040
9041 if (GET_CODE (reg1) == SUBREG)
9042 reg1 = SUBREG_REG (reg1);
9043 if (GET_CODE (reg1) != REG)
9044 return 0;
9045 const int regno1 = REGNO (reg1);
9046
9047 if (GET_CODE (reg2) == SUBREG)
9048 reg2 = SUBREG_REG (reg2);
9049 if (GET_CODE (reg2) != REG)
9050 return 0;
9051 const int regno2 = REGNO (reg2);
9052
9053 if (SPARC_INT_REG_P (regno1) && SPARC_INT_REG_P (regno2))
9054 return 1;
9055
9056 if (TARGET_VIS3)
9057 {
9058 if ((SPARC_INT_REG_P (regno1) && SPARC_FP_REG_P (regno2))
9059 || (SPARC_FP_REG_P (regno1) && SPARC_INT_REG_P (regno2)))
9060 return 1;
9061 }
9062
9063 return 0;
9064 }
9065
9066 /* Split a REG <--> REG move into a pair of moves in MODE. */
9067
9068 void
sparc_split_reg_reg(rtx dest,rtx src,machine_mode mode)9069 sparc_split_reg_reg (rtx dest, rtx src, machine_mode mode)
9070 {
9071 rtx dest1 = gen_highpart (mode, dest);
9072 rtx dest2 = gen_lowpart (mode, dest);
9073 rtx src1 = gen_highpart (mode, src);
9074 rtx src2 = gen_lowpart (mode, src);
9075
9076 /* Now emit using the real source and destination we found, swapping
9077 the order if we detect overlap. */
9078 if (reg_overlap_mentioned_p (dest1, src2))
9079 {
9080 emit_move_insn_1 (dest2, src2);
9081 emit_move_insn_1 (dest1, src1);
9082 }
9083 else
9084 {
9085 emit_move_insn_1 (dest1, src1);
9086 emit_move_insn_1 (dest2, src2);
9087 }
9088 }
9089
9090 /* Return 1 if REGNO (reg1) is even and REGNO (reg1) == REGNO (reg2) - 1.
9091 This makes them candidates for using ldd and std insns.
9092
9093 Note reg1 and reg2 *must* be hard registers. */
9094
9095 int
registers_ok_for_ldd_peep(rtx reg1,rtx reg2)9096 registers_ok_for_ldd_peep (rtx reg1, rtx reg2)
9097 {
9098 /* We might have been passed a SUBREG. */
9099 if (GET_CODE (reg1) != REG || GET_CODE (reg2) != REG)
9100 return 0;
9101
9102 if (REGNO (reg1) % 2 != 0)
9103 return 0;
9104
9105 /* Integer ldd is deprecated in SPARC V9 */
9106 if (TARGET_V9 && SPARC_INT_REG_P (REGNO (reg1)))
9107 return 0;
9108
9109 return (REGNO (reg1) == REGNO (reg2) - 1);
9110 }
9111
9112 /* Return 1 if the addresses in mem1 and mem2 are suitable for use in
9113 an ldd or std insn.
9114
9115 This can only happen when addr1 and addr2, the addresses in mem1
9116 and mem2, are consecutive memory locations (addr1 + 4 == addr2).
9117 addr1 must also be aligned on a 64-bit boundary.
9118
9119 Also iff dependent_reg_rtx is not null it should not be used to
9120 compute the address for mem1, i.e. we cannot optimize a sequence
9121 like:
9122 ld [%o0], %o0
9123 ld [%o0 + 4], %o1
9124 to
9125 ldd [%o0], %o0
9126 nor:
9127 ld [%g3 + 4], %g3
9128 ld [%g3], %g2
9129 to
9130 ldd [%g3], %g2
9131
9132 But, note that the transformation from:
9133 ld [%g2 + 4], %g3
9134 ld [%g2], %g2
9135 to
9136 ldd [%g2], %g2
9137 is perfectly fine. Thus, the peephole2 patterns always pass us
9138 the destination register of the first load, never the second one.
9139
9140 For stores we don't have a similar problem, so dependent_reg_rtx is
9141 NULL_RTX. */
9142
9143 int
mems_ok_for_ldd_peep(rtx mem1,rtx mem2,rtx dependent_reg_rtx)9144 mems_ok_for_ldd_peep (rtx mem1, rtx mem2, rtx dependent_reg_rtx)
9145 {
9146 rtx addr1, addr2;
9147 unsigned int reg1;
9148 HOST_WIDE_INT offset1;
9149
9150 /* The mems cannot be volatile. */
9151 if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2))
9152 return 0;
9153
9154 /* MEM1 should be aligned on a 64-bit boundary. */
9155 if (MEM_ALIGN (mem1) < 64)
9156 return 0;
9157
9158 addr1 = XEXP (mem1, 0);
9159 addr2 = XEXP (mem2, 0);
9160
9161 /* Extract a register number and offset (if used) from the first addr. */
9162 if (GET_CODE (addr1) == PLUS)
9163 {
9164 /* If not a REG, return zero. */
9165 if (GET_CODE (XEXP (addr1, 0)) != REG)
9166 return 0;
9167 else
9168 {
9169 reg1 = REGNO (XEXP (addr1, 0));
9170 /* The offset must be constant! */
9171 if (GET_CODE (XEXP (addr1, 1)) != CONST_INT)
9172 return 0;
9173 offset1 = INTVAL (XEXP (addr1, 1));
9174 }
9175 }
9176 else if (GET_CODE (addr1) != REG)
9177 return 0;
9178 else
9179 {
9180 reg1 = REGNO (addr1);
9181 /* This was a simple (mem (reg)) expression. Offset is 0. */
9182 offset1 = 0;
9183 }
9184
9185 /* Make sure the second address is a (mem (plus (reg) (const_int). */
9186 if (GET_CODE (addr2) != PLUS)
9187 return 0;
9188
9189 if (GET_CODE (XEXP (addr2, 0)) != REG
9190 || GET_CODE (XEXP (addr2, 1)) != CONST_INT)
9191 return 0;
9192
9193 if (reg1 != REGNO (XEXP (addr2, 0)))
9194 return 0;
9195
9196 if (dependent_reg_rtx != NULL_RTX && reg1 == REGNO (dependent_reg_rtx))
9197 return 0;
9198
9199 /* The first offset must be evenly divisible by 8 to ensure the
9200 address is 64-bit aligned. */
9201 if (offset1 % 8 != 0)
9202 return 0;
9203
9204 /* The offset for the second addr must be 4 more than the first addr. */
9205 if (INTVAL (XEXP (addr2, 1)) != offset1 + 4)
9206 return 0;
9207
9208 /* All the tests passed. addr1 and addr2 are valid for ldd and std
9209 instructions. */
9210 return 1;
9211 }
9212
9213 /* Return the widened memory access made of MEM1 and MEM2 in MODE. */
9214
9215 rtx
widen_mem_for_ldd_peep(rtx mem1,rtx mem2,machine_mode mode)9216 widen_mem_for_ldd_peep (rtx mem1, rtx mem2, machine_mode mode)
9217 {
9218 rtx x = widen_memory_access (mem1, mode, 0);
9219 MEM_NOTRAP_P (x) = MEM_NOTRAP_P (mem1) && MEM_NOTRAP_P (mem2);
9220 return x;
9221 }
9222
9223 /* Return 1 if reg is a pseudo, or is the first register in
9224 a hard register pair. This makes it suitable for use in
9225 ldd and std insns. */
9226
9227 int
register_ok_for_ldd(rtx reg)9228 register_ok_for_ldd (rtx reg)
9229 {
9230 /* We might have been passed a SUBREG. */
9231 if (!REG_P (reg))
9232 return 0;
9233
9234 if (REGNO (reg) < FIRST_PSEUDO_REGISTER)
9235 return (REGNO (reg) % 2 == 0);
9236
9237 return 1;
9238 }
9239
9240 /* Return 1 if OP, a MEM, has an address which is known to be
9241 aligned to an 8-byte boundary. */
9242
9243 int
memory_ok_for_ldd(rtx op)9244 memory_ok_for_ldd (rtx op)
9245 {
9246 /* In 64-bit mode, we assume that the address is word-aligned. */
9247 if (TARGET_ARCH32 && !mem_min_alignment (op, 8))
9248 return 0;
9249
9250 if (! can_create_pseudo_p ()
9251 && !strict_memory_address_p (Pmode, XEXP (op, 0)))
9252 return 0;
9253
9254 return 1;
9255 }
9256
9257 /* Implement TARGET_PRINT_OPERAND_PUNCT_VALID_P. */
9258
9259 static bool
sparc_print_operand_punct_valid_p(unsigned char code)9260 sparc_print_operand_punct_valid_p (unsigned char code)
9261 {
9262 if (code == '#'
9263 || code == '*'
9264 || code == '('
9265 || code == ')'
9266 || code == '_'
9267 || code == '&')
9268 return true;
9269
9270 return false;
9271 }
9272
9273 /* Implement TARGET_PRINT_OPERAND.
9274 Print operand X (an rtx) in assembler syntax to file FILE.
9275 CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
9276 For `%' followed by punctuation, CODE is the punctuation and X is null. */
9277
9278 static void
sparc_print_operand(FILE * file,rtx x,int code)9279 sparc_print_operand (FILE *file, rtx x, int code)
9280 {
9281 const char *s;
9282
9283 switch (code)
9284 {
9285 case '#':
9286 /* Output an insn in a delay slot. */
9287 if (final_sequence)
9288 sparc_indent_opcode = 1;
9289 else
9290 fputs ("\n\t nop", file);
9291 return;
9292 case '*':
9293 /* Output an annul flag if there's nothing for the delay slot and we
9294 are optimizing. This is always used with '(' below.
9295 Sun OS 4.1.1 dbx can't handle an annulled unconditional branch;
9296 this is a dbx bug. So, we only do this when optimizing.
9297 On UltraSPARC, a branch in a delay slot causes a pipeline flush.
9298 Always emit a nop in case the next instruction is a branch. */
9299 if (! final_sequence && (optimize && (int)sparc_cpu < PROCESSOR_V9))
9300 fputs (",a", file);
9301 return;
9302 case '(':
9303 /* Output a 'nop' if there's nothing for the delay slot and we are
9304 not optimizing. This is always used with '*' above. */
9305 if (! final_sequence && ! (optimize && (int)sparc_cpu < PROCESSOR_V9))
9306 fputs ("\n\t nop", file);
9307 else if (final_sequence)
9308 sparc_indent_opcode = 1;
9309 return;
9310 case ')':
9311 /* Output the right displacement from the saved PC on function return.
9312 The caller may have placed an "unimp" insn immediately after the call
9313 so we have to account for it. This insn is used in the 32-bit ABI
9314 when calling a function that returns a non zero-sized structure. The
9315 64-bit ABI doesn't have it. Be careful to have this test be the same
9316 as that for the call. The exception is when sparc_std_struct_return
9317 is enabled, the psABI is followed exactly and the adjustment is made
9318 by the code in sparc_struct_value_rtx. The call emitted is the same
9319 when sparc_std_struct_return is enabled. */
9320 if (!TARGET_ARCH64
9321 && cfun->returns_struct
9322 && !sparc_std_struct_return
9323 && DECL_SIZE (DECL_RESULT (current_function_decl))
9324 && TREE_CODE (DECL_SIZE (DECL_RESULT (current_function_decl)))
9325 == INTEGER_CST
9326 && !integer_zerop (DECL_SIZE (DECL_RESULT (current_function_decl))))
9327 fputs ("12", file);
9328 else
9329 fputc ('8', file);
9330 return;
9331 case '_':
9332 /* Output the Embedded Medium/Anywhere code model base register. */
9333 fputs (EMBMEDANY_BASE_REG, file);
9334 return;
9335 case '&':
9336 /* Print some local dynamic TLS name. */
9337 if (const char *name = get_some_local_dynamic_name ())
9338 assemble_name (file, name);
9339 else
9340 output_operand_lossage ("'%%&' used without any "
9341 "local dynamic TLS references");
9342 return;
9343
9344 case 'Y':
9345 /* Adjust the operand to take into account a RESTORE operation. */
9346 if (GET_CODE (x) == CONST_INT)
9347 break;
9348 else if (GET_CODE (x) != REG)
9349 output_operand_lossage ("invalid %%Y operand");
9350 else if (REGNO (x) < 8)
9351 fputs (reg_names[REGNO (x)], file);
9352 else if (REGNO (x) >= 24 && REGNO (x) < 32)
9353 fputs (reg_names[REGNO (x)-16], file);
9354 else
9355 output_operand_lossage ("invalid %%Y operand");
9356 return;
9357 case 'L':
9358 /* Print out the low order register name of a register pair. */
9359 if (WORDS_BIG_ENDIAN)
9360 fputs (reg_names[REGNO (x)+1], file);
9361 else
9362 fputs (reg_names[REGNO (x)], file);
9363 return;
9364 case 'H':
9365 /* Print out the high order register name of a register pair. */
9366 if (WORDS_BIG_ENDIAN)
9367 fputs (reg_names[REGNO (x)], file);
9368 else
9369 fputs (reg_names[REGNO (x)+1], file);
9370 return;
9371 case 'R':
9372 /* Print out the second register name of a register pair or quad.
9373 I.e., R (%o0) => %o1. */
9374 fputs (reg_names[REGNO (x)+1], file);
9375 return;
9376 case 'S':
9377 /* Print out the third register name of a register quad.
9378 I.e., S (%o0) => %o2. */
9379 fputs (reg_names[REGNO (x)+2], file);
9380 return;
9381 case 'T':
9382 /* Print out the fourth register name of a register quad.
9383 I.e., T (%o0) => %o3. */
9384 fputs (reg_names[REGNO (x)+3], file);
9385 return;
9386 case 'x':
9387 /* Print a condition code register. */
9388 if (REGNO (x) == SPARC_ICC_REG)
9389 {
9390 switch (GET_MODE (x))
9391 {
9392 case E_CCmode:
9393 case E_CCNZmode:
9394 case E_CCCmode:
9395 case E_CCVmode:
9396 s = "%icc";
9397 break;
9398 case E_CCXmode:
9399 case E_CCXNZmode:
9400 case E_CCXCmode:
9401 case E_CCXVmode:
9402 s = "%xcc";
9403 break;
9404 default:
9405 gcc_unreachable ();
9406 }
9407 fputs (s, file);
9408 }
9409 else
9410 /* %fccN register */
9411 fputs (reg_names[REGNO (x)], file);
9412 return;
9413 case 'm':
9414 /* Print the operand's address only. */
9415 output_address (GET_MODE (x), XEXP (x, 0));
9416 return;
9417 case 'r':
9418 /* In this case we need a register. Use %g0 if the
9419 operand is const0_rtx. */
9420 if (x == const0_rtx
9421 || (GET_MODE (x) != VOIDmode && x == CONST0_RTX (GET_MODE (x))))
9422 {
9423 fputs ("%g0", file);
9424 return;
9425 }
9426 else
9427 break;
9428
9429 case 'A':
9430 switch (GET_CODE (x))
9431 {
9432 case IOR:
9433 s = "or";
9434 break;
9435 case AND:
9436 s = "and";
9437 break;
9438 case XOR:
9439 s = "xor";
9440 break;
9441 default:
9442 output_operand_lossage ("invalid %%A operand");
9443 s = "";
9444 break;
9445 }
9446 fputs (s, file);
9447 return;
9448
9449 case 'B':
9450 switch (GET_CODE (x))
9451 {
9452 case IOR:
9453 s = "orn";
9454 break;
9455 case AND:
9456 s = "andn";
9457 break;
9458 case XOR:
9459 s = "xnor";
9460 break;
9461 default:
9462 output_operand_lossage ("invalid %%B operand");
9463 s = "";
9464 break;
9465 }
9466 fputs (s, file);
9467 return;
9468
9469 /* This is used by the conditional move instructions. */
9470 case 'C':
9471 {
9472 machine_mode mode = GET_MODE (XEXP (x, 0));
9473 switch (GET_CODE (x))
9474 {
9475 case NE:
9476 if (mode == CCVmode || mode == CCXVmode)
9477 s = "vs";
9478 else
9479 s = "ne";
9480 break;
9481 case EQ:
9482 if (mode == CCVmode || mode == CCXVmode)
9483 s = "vc";
9484 else
9485 s = "e";
9486 break;
9487 case GE:
9488 if (mode == CCNZmode || mode == CCXNZmode)
9489 s = "pos";
9490 else
9491 s = "ge";
9492 break;
9493 case GT:
9494 s = "g";
9495 break;
9496 case LE:
9497 s = "le";
9498 break;
9499 case LT:
9500 if (mode == CCNZmode || mode == CCXNZmode)
9501 s = "neg";
9502 else
9503 s = "l";
9504 break;
9505 case GEU:
9506 s = "geu";
9507 break;
9508 case GTU:
9509 s = "gu";
9510 break;
9511 case LEU:
9512 s = "leu";
9513 break;
9514 case LTU:
9515 s = "lu";
9516 break;
9517 case LTGT:
9518 s = "lg";
9519 break;
9520 case UNORDERED:
9521 s = "u";
9522 break;
9523 case ORDERED:
9524 s = "o";
9525 break;
9526 case UNLT:
9527 s = "ul";
9528 break;
9529 case UNLE:
9530 s = "ule";
9531 break;
9532 case UNGT:
9533 s = "ug";
9534 break;
9535 case UNGE:
9536 s = "uge"
9537 ; break;
9538 case UNEQ:
9539 s = "ue";
9540 break;
9541 default:
9542 output_operand_lossage ("invalid %%C operand");
9543 s = "";
9544 break;
9545 }
9546 fputs (s, file);
9547 return;
9548 }
9549
9550 /* This are used by the movr instruction pattern. */
9551 case 'D':
9552 {
9553 switch (GET_CODE (x))
9554 {
9555 case NE:
9556 s = "ne";
9557 break;
9558 case EQ:
9559 s = "e";
9560 break;
9561 case GE:
9562 s = "gez";
9563 break;
9564 case LT:
9565 s = "lz";
9566 break;
9567 case LE:
9568 s = "lez";
9569 break;
9570 case GT:
9571 s = "gz";
9572 break;
9573 default:
9574 output_operand_lossage ("invalid %%D operand");
9575 s = "";
9576 break;
9577 }
9578 fputs (s, file);
9579 return;
9580 }
9581
9582 case 'b':
9583 {
9584 /* Print a sign-extended character. */
9585 int i = trunc_int_for_mode (INTVAL (x), QImode);
9586 fprintf (file, "%d", i);
9587 return;
9588 }
9589
9590 case 'f':
9591 /* Operand must be a MEM; write its address. */
9592 if (GET_CODE (x) != MEM)
9593 output_operand_lossage ("invalid %%f operand");
9594 output_address (GET_MODE (x), XEXP (x, 0));
9595 return;
9596
9597 case 's':
9598 {
9599 /* Print a sign-extended 32-bit value. */
9600 HOST_WIDE_INT i;
9601 if (GET_CODE(x) == CONST_INT)
9602 i = INTVAL (x);
9603 else
9604 {
9605 output_operand_lossage ("invalid %%s operand");
9606 return;
9607 }
9608 i = trunc_int_for_mode (i, SImode);
9609 fprintf (file, HOST_WIDE_INT_PRINT_DEC, i);
9610 return;
9611 }
9612
9613 case 0:
9614 /* Do nothing special. */
9615 break;
9616
9617 default:
9618 /* Undocumented flag. */
9619 output_operand_lossage ("invalid operand output code");
9620 }
9621
9622 if (GET_CODE (x) == REG)
9623 fputs (reg_names[REGNO (x)], file);
9624 else if (GET_CODE (x) == MEM)
9625 {
9626 fputc ('[', file);
9627 /* Poor Sun assembler doesn't understand absolute addressing. */
9628 if (CONSTANT_P (XEXP (x, 0)))
9629 fputs ("%g0+", file);
9630 output_address (GET_MODE (x), XEXP (x, 0));
9631 fputc (']', file);
9632 }
9633 else if (GET_CODE (x) == HIGH)
9634 {
9635 fputs ("%hi(", file);
9636 output_addr_const (file, XEXP (x, 0));
9637 fputc (')', file);
9638 }
9639 else if (GET_CODE (x) == LO_SUM)
9640 {
9641 sparc_print_operand (file, XEXP (x, 0), 0);
9642 if (TARGET_CM_MEDMID)
9643 fputs ("+%l44(", file);
9644 else
9645 fputs ("+%lo(", file);
9646 output_addr_const (file, XEXP (x, 1));
9647 fputc (')', file);
9648 }
9649 else if (GET_CODE (x) == CONST_DOUBLE)
9650 output_operand_lossage ("floating-point constant not a valid immediate operand");
9651 else
9652 output_addr_const (file, x);
9653 }
9654
9655 /* Implement TARGET_PRINT_OPERAND_ADDRESS. */
9656
9657 static void
sparc_print_operand_address(FILE * file,machine_mode,rtx x)9658 sparc_print_operand_address (FILE *file, machine_mode /*mode*/, rtx x)
9659 {
9660 register rtx base, index = 0;
9661 int offset = 0;
9662 register rtx addr = x;
9663
9664 if (REG_P (addr))
9665 fputs (reg_names[REGNO (addr)], file);
9666 else if (GET_CODE (addr) == PLUS)
9667 {
9668 if (CONST_INT_P (XEXP (addr, 0)))
9669 offset = INTVAL (XEXP (addr, 0)), base = XEXP (addr, 1);
9670 else if (CONST_INT_P (XEXP (addr, 1)))
9671 offset = INTVAL (XEXP (addr, 1)), base = XEXP (addr, 0);
9672 else
9673 base = XEXP (addr, 0), index = XEXP (addr, 1);
9674 if (GET_CODE (base) == LO_SUM)
9675 {
9676 gcc_assert (USE_AS_OFFSETABLE_LO10
9677 && TARGET_ARCH64
9678 && ! TARGET_CM_MEDMID);
9679 output_operand (XEXP (base, 0), 0);
9680 fputs ("+%lo(", file);
9681 output_address (VOIDmode, XEXP (base, 1));
9682 fprintf (file, ")+%d", offset);
9683 }
9684 else
9685 {
9686 fputs (reg_names[REGNO (base)], file);
9687 if (index == 0)
9688 fprintf (file, "%+d", offset);
9689 else if (REG_P (index))
9690 fprintf (file, "+%s", reg_names[REGNO (index)]);
9691 else if (GET_CODE (index) == SYMBOL_REF
9692 || GET_CODE (index) == LABEL_REF
9693 || GET_CODE (index) == CONST)
9694 fputc ('+', file), output_addr_const (file, index);
9695 else gcc_unreachable ();
9696 }
9697 }
9698 else if (GET_CODE (addr) == MINUS
9699 && GET_CODE (XEXP (addr, 1)) == LABEL_REF)
9700 {
9701 output_addr_const (file, XEXP (addr, 0));
9702 fputs ("-(", file);
9703 output_addr_const (file, XEXP (addr, 1));
9704 fputs ("-.)", file);
9705 }
9706 else if (GET_CODE (addr) == LO_SUM)
9707 {
9708 output_operand (XEXP (addr, 0), 0);
9709 if (TARGET_CM_MEDMID)
9710 fputs ("+%l44(", file);
9711 else
9712 fputs ("+%lo(", file);
9713 output_address (VOIDmode, XEXP (addr, 1));
9714 fputc (')', file);
9715 }
9716 else if (flag_pic
9717 && GET_CODE (addr) == CONST
9718 && GET_CODE (XEXP (addr, 0)) == MINUS
9719 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST
9720 && GET_CODE (XEXP (XEXP (XEXP (addr, 0), 1), 0)) == MINUS
9721 && XEXP (XEXP (XEXP (XEXP (addr, 0), 1), 0), 1) == pc_rtx)
9722 {
9723 addr = XEXP (addr, 0);
9724 output_addr_const (file, XEXP (addr, 0));
9725 /* Group the args of the second CONST in parenthesis. */
9726 fputs ("-(", file);
9727 /* Skip past the second CONST--it does nothing for us. */
9728 output_addr_const (file, XEXP (XEXP (addr, 1), 0));
9729 /* Close the parenthesis. */
9730 fputc (')', file);
9731 }
9732 else
9733 {
9734 output_addr_const (file, addr);
9735 }
9736 }
9737
9738 /* Target hook for assembling integer objects. The sparc version has
9739 special handling for aligned DI-mode objects. */
9740
9741 static bool
sparc_assemble_integer(rtx x,unsigned int size,int aligned_p)9742 sparc_assemble_integer (rtx x, unsigned int size, int aligned_p)
9743 {
9744 /* ??? We only output .xword's for symbols and only then in environments
9745 where the assembler can handle them. */
9746 if (aligned_p && size == 8 && GET_CODE (x) != CONST_INT)
9747 {
9748 if (TARGET_V9)
9749 {
9750 assemble_integer_with_op ("\t.xword\t", x);
9751 return true;
9752 }
9753 else
9754 {
9755 assemble_aligned_integer (4, const0_rtx);
9756 assemble_aligned_integer (4, x);
9757 return true;
9758 }
9759 }
9760 return default_assemble_integer (x, size, aligned_p);
9761 }
9762
9763 /* Return the value of a code used in the .proc pseudo-op that says
9764 what kind of result this function returns. For non-C types, we pick
9765 the closest C type. */
9766
9767 #ifndef SHORT_TYPE_SIZE
9768 #define SHORT_TYPE_SIZE (BITS_PER_UNIT * 2)
9769 #endif
9770
9771 #ifndef INT_TYPE_SIZE
9772 #define INT_TYPE_SIZE BITS_PER_WORD
9773 #endif
9774
9775 #ifndef LONG_TYPE_SIZE
9776 #define LONG_TYPE_SIZE BITS_PER_WORD
9777 #endif
9778
9779 #ifndef LONG_LONG_TYPE_SIZE
9780 #define LONG_LONG_TYPE_SIZE (BITS_PER_WORD * 2)
9781 #endif
9782
9783 #ifndef FLOAT_TYPE_SIZE
9784 #define FLOAT_TYPE_SIZE BITS_PER_WORD
9785 #endif
9786
9787 #ifndef DOUBLE_TYPE_SIZE
9788 #define DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2)
9789 #endif
9790
9791 #ifndef LONG_DOUBLE_TYPE_SIZE
9792 #define LONG_DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2)
9793 #endif
9794
9795 unsigned long
sparc_type_code(register tree type)9796 sparc_type_code (register tree type)
9797 {
9798 register unsigned long qualifiers = 0;
9799 register unsigned shift;
9800
9801 /* Only the first 30 bits of the qualifier are valid. We must refrain from
9802 setting more, since some assemblers will give an error for this. Also,
9803 we must be careful to avoid shifts of 32 bits or more to avoid getting
9804 unpredictable results. */
9805
9806 for (shift = 6; shift < 30; shift += 2, type = TREE_TYPE (type))
9807 {
9808 switch (TREE_CODE (type))
9809 {
9810 case ERROR_MARK:
9811 return qualifiers;
9812
9813 case ARRAY_TYPE:
9814 qualifiers |= (3 << shift);
9815 break;
9816
9817 case FUNCTION_TYPE:
9818 case METHOD_TYPE:
9819 qualifiers |= (2 << shift);
9820 break;
9821
9822 case POINTER_TYPE:
9823 case REFERENCE_TYPE:
9824 case OFFSET_TYPE:
9825 qualifiers |= (1 << shift);
9826 break;
9827
9828 case RECORD_TYPE:
9829 return (qualifiers | 8);
9830
9831 case UNION_TYPE:
9832 case QUAL_UNION_TYPE:
9833 return (qualifiers | 9);
9834
9835 case ENUMERAL_TYPE:
9836 return (qualifiers | 10);
9837
9838 case VOID_TYPE:
9839 return (qualifiers | 16);
9840
9841 case INTEGER_TYPE:
9842 /* If this is a range type, consider it to be the underlying
9843 type. */
9844 if (TREE_TYPE (type) != 0)
9845 break;
9846
9847 /* Carefully distinguish all the standard types of C,
9848 without messing up if the language is not C. We do this by
9849 testing TYPE_PRECISION and TYPE_UNSIGNED. The old code used to
9850 look at both the names and the above fields, but that's redundant.
9851 Any type whose size is between two C types will be considered
9852 to be the wider of the two types. Also, we do not have a
9853 special code to use for "long long", so anything wider than
9854 long is treated the same. Note that we can't distinguish
9855 between "int" and "long" in this code if they are the same
9856 size, but that's fine, since neither can the assembler. */
9857
9858 if (TYPE_PRECISION (type) <= CHAR_TYPE_SIZE)
9859 return (qualifiers | (TYPE_UNSIGNED (type) ? 12 : 2));
9860
9861 else if (TYPE_PRECISION (type) <= SHORT_TYPE_SIZE)
9862 return (qualifiers | (TYPE_UNSIGNED (type) ? 13 : 3));
9863
9864 else if (TYPE_PRECISION (type) <= INT_TYPE_SIZE)
9865 return (qualifiers | (TYPE_UNSIGNED (type) ? 14 : 4));
9866
9867 else
9868 return (qualifiers | (TYPE_UNSIGNED (type) ? 15 : 5));
9869
9870 case REAL_TYPE:
9871 /* If this is a range type, consider it to be the underlying
9872 type. */
9873 if (TREE_TYPE (type) != 0)
9874 break;
9875
9876 /* Carefully distinguish all the standard types of C,
9877 without messing up if the language is not C. */
9878
9879 if (TYPE_PRECISION (type) == FLOAT_TYPE_SIZE)
9880 return (qualifiers | 6);
9881
9882 else
9883 return (qualifiers | 7);
9884
9885 case COMPLEX_TYPE: /* GNU Fortran COMPLEX type. */
9886 /* ??? We need to distinguish between double and float complex types,
9887 but I don't know how yet because I can't reach this code from
9888 existing front-ends. */
9889 return (qualifiers | 7); /* Who knows? */
9890
9891 case VECTOR_TYPE:
9892 case BOOLEAN_TYPE: /* Boolean truth value type. */
9893 case LANG_TYPE:
9894 case NULLPTR_TYPE:
9895 return qualifiers;
9896
9897 default:
9898 gcc_unreachable (); /* Not a type! */
9899 }
9900 }
9901
9902 return qualifiers;
9903 }
9904
9905 /* Nested function support. */
9906
9907 /* Emit RTL insns to initialize the variable parts of a trampoline.
9908 FNADDR is an RTX for the address of the function's pure code.
9909 CXT is an RTX for the static chain value for the function.
9910
9911 This takes 16 insns: 2 shifts & 2 ands (to split up addresses), 4 sethi
9912 (to load in opcodes), 4 iors (to merge address and opcodes), and 4 writes
9913 (to store insns). This is a bit excessive. Perhaps a different
9914 mechanism would be better here.
9915
9916 Emit enough FLUSH insns to synchronize the data and instruction caches. */
9917
9918 static void
sparc32_initialize_trampoline(rtx m_tramp,rtx fnaddr,rtx cxt)9919 sparc32_initialize_trampoline (rtx m_tramp, rtx fnaddr, rtx cxt)
9920 {
9921 /* SPARC 32-bit trampoline:
9922
9923 sethi %hi(fn), %g1
9924 sethi %hi(static), %g2
9925 jmp %g1+%lo(fn)
9926 or %g2, %lo(static), %g2
9927
9928 SETHI i,r = 00rr rrr1 00ii iiii iiii iiii iiii iiii
9929 JMPL r+i,d = 10dd ddd1 1100 0rrr rr1i iiii iiii iiii
9930 */
9931
9932 emit_move_insn
9933 (adjust_address (m_tramp, SImode, 0),
9934 expand_binop (SImode, ior_optab,
9935 expand_shift (RSHIFT_EXPR, SImode, fnaddr, 10, 0, 1),
9936 GEN_INT (trunc_int_for_mode (0x03000000, SImode)),
9937 NULL_RTX, 1, OPTAB_DIRECT));
9938
9939 emit_move_insn
9940 (adjust_address (m_tramp, SImode, 4),
9941 expand_binop (SImode, ior_optab,
9942 expand_shift (RSHIFT_EXPR, SImode, cxt, 10, 0, 1),
9943 GEN_INT (trunc_int_for_mode (0x05000000, SImode)),
9944 NULL_RTX, 1, OPTAB_DIRECT));
9945
9946 emit_move_insn
9947 (adjust_address (m_tramp, SImode, 8),
9948 expand_binop (SImode, ior_optab,
9949 expand_and (SImode, fnaddr, GEN_INT (0x3ff), NULL_RTX),
9950 GEN_INT (trunc_int_for_mode (0x81c06000, SImode)),
9951 NULL_RTX, 1, OPTAB_DIRECT));
9952
9953 emit_move_insn
9954 (adjust_address (m_tramp, SImode, 12),
9955 expand_binop (SImode, ior_optab,
9956 expand_and (SImode, cxt, GEN_INT (0x3ff), NULL_RTX),
9957 GEN_INT (trunc_int_for_mode (0x8410a000, SImode)),
9958 NULL_RTX, 1, OPTAB_DIRECT));
9959
9960 /* On UltraSPARC a flush flushes an entire cache line. The trampoline is
9961 aligned on a 16 byte boundary so one flush clears it all. */
9962 emit_insn (gen_flushsi (validize_mem (adjust_address (m_tramp, SImode, 0))));
9963 if (sparc_cpu != PROCESSOR_ULTRASPARC
9964 && sparc_cpu != PROCESSOR_ULTRASPARC3
9965 && sparc_cpu != PROCESSOR_NIAGARA
9966 && sparc_cpu != PROCESSOR_NIAGARA2
9967 && sparc_cpu != PROCESSOR_NIAGARA3
9968 && sparc_cpu != PROCESSOR_NIAGARA4
9969 && sparc_cpu != PROCESSOR_NIAGARA7
9970 && sparc_cpu != PROCESSOR_M8)
9971 emit_insn (gen_flushsi (validize_mem (adjust_address (m_tramp, SImode, 8))));
9972
9973 /* Call __enable_execute_stack after writing onto the stack to make sure
9974 the stack address is accessible. */
9975 #ifdef HAVE_ENABLE_EXECUTE_STACK
9976 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
9977 LCT_NORMAL, VOIDmode, XEXP (m_tramp, 0), Pmode);
9978 #endif
9979
9980 }
9981
9982 /* The 64-bit version is simpler because it makes more sense to load the
9983 values as "immediate" data out of the trampoline. It's also easier since
9984 we can read the PC without clobbering a register. */
9985
9986 static void
sparc64_initialize_trampoline(rtx m_tramp,rtx fnaddr,rtx cxt)9987 sparc64_initialize_trampoline (rtx m_tramp, rtx fnaddr, rtx cxt)
9988 {
9989 /* SPARC 64-bit trampoline:
9990
9991 rd %pc, %g1
9992 ldx [%g1+24], %g5
9993 jmp %g5
9994 ldx [%g1+16], %g5
9995 +16 bytes data
9996 */
9997
9998 emit_move_insn (adjust_address (m_tramp, SImode, 0),
9999 GEN_INT (trunc_int_for_mode (0x83414000, SImode)));
10000 emit_move_insn (adjust_address (m_tramp, SImode, 4),
10001 GEN_INT (trunc_int_for_mode (0xca586018, SImode)));
10002 emit_move_insn (adjust_address (m_tramp, SImode, 8),
10003 GEN_INT (trunc_int_for_mode (0x81c14000, SImode)));
10004 emit_move_insn (adjust_address (m_tramp, SImode, 12),
10005 GEN_INT (trunc_int_for_mode (0xca586010, SImode)));
10006 emit_move_insn (adjust_address (m_tramp, DImode, 16), cxt);
10007 emit_move_insn (adjust_address (m_tramp, DImode, 24), fnaddr);
10008 emit_insn (gen_flushdi (validize_mem (adjust_address (m_tramp, DImode, 0))));
10009
10010 if (sparc_cpu != PROCESSOR_ULTRASPARC
10011 && sparc_cpu != PROCESSOR_ULTRASPARC3
10012 && sparc_cpu != PROCESSOR_NIAGARA
10013 && sparc_cpu != PROCESSOR_NIAGARA2
10014 && sparc_cpu != PROCESSOR_NIAGARA3
10015 && sparc_cpu != PROCESSOR_NIAGARA4
10016 && sparc_cpu != PROCESSOR_NIAGARA7
10017 && sparc_cpu != PROCESSOR_M8)
10018 emit_insn (gen_flushdi (validize_mem (adjust_address (m_tramp, DImode, 8))));
10019
10020 /* Call __enable_execute_stack after writing onto the stack to make sure
10021 the stack address is accessible. */
10022 #ifdef HAVE_ENABLE_EXECUTE_STACK
10023 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
10024 LCT_NORMAL, VOIDmode, XEXP (m_tramp, 0), Pmode);
10025 #endif
10026 }
10027
10028 /* Worker for TARGET_TRAMPOLINE_INIT. */
10029
10030 static void
sparc_trampoline_init(rtx m_tramp,tree fndecl,rtx cxt)10031 sparc_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
10032 {
10033 rtx fnaddr = force_reg (Pmode, XEXP (DECL_RTL (fndecl), 0));
10034 cxt = force_reg (Pmode, cxt);
10035 if (TARGET_ARCH64)
10036 sparc64_initialize_trampoline (m_tramp, fnaddr, cxt);
10037 else
10038 sparc32_initialize_trampoline (m_tramp, fnaddr, cxt);
10039 }
10040
10041 /* Adjust the cost of a scheduling dependency. Return the new cost of
10042 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
10043
10044 static int
supersparc_adjust_cost(rtx_insn * insn,int dep_type,rtx_insn * dep_insn,int cost)10045 supersparc_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn,
10046 int cost)
10047 {
10048 enum attr_type insn_type;
10049
10050 if (recog_memoized (insn) < 0)
10051 return cost;
10052
10053 insn_type = get_attr_type (insn);
10054
10055 if (dep_type == 0)
10056 {
10057 /* Data dependency; DEP_INSN writes a register that INSN reads some
10058 cycles later. */
10059
10060 /* if a load, then the dependence must be on the memory address;
10061 add an extra "cycle". Note that the cost could be two cycles
10062 if the reg was written late in an instruction group; we ca not tell
10063 here. */
10064 if (insn_type == TYPE_LOAD || insn_type == TYPE_FPLOAD)
10065 return cost + 3;
10066
10067 /* Get the delay only if the address of the store is the dependence. */
10068 if (insn_type == TYPE_STORE || insn_type == TYPE_FPSTORE)
10069 {
10070 rtx pat = PATTERN(insn);
10071 rtx dep_pat = PATTERN (dep_insn);
10072
10073 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
10074 return cost; /* This should not happen! */
10075
10076 /* The dependency between the two instructions was on the data that
10077 is being stored. Assume that this implies that the address of the
10078 store is not dependent. */
10079 if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
10080 return cost;
10081
10082 return cost + 3; /* An approximation. */
10083 }
10084
10085 /* A shift instruction cannot receive its data from an instruction
10086 in the same cycle; add a one cycle penalty. */
10087 if (insn_type == TYPE_SHIFT)
10088 return cost + 3; /* Split before cascade into shift. */
10089 }
10090 else
10091 {
10092 /* Anti- or output- dependency; DEP_INSN reads/writes a register that
10093 INSN writes some cycles later. */
10094
10095 /* These are only significant for the fpu unit; writing a fp reg before
10096 the fpu has finished with it stalls the processor. */
10097
10098 /* Reusing an integer register causes no problems. */
10099 if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT)
10100 return 0;
10101 }
10102
10103 return cost;
10104 }
10105
10106 static int
hypersparc_adjust_cost(rtx_insn * insn,int dtype,rtx_insn * dep_insn,int cost)10107 hypersparc_adjust_cost (rtx_insn *insn, int dtype, rtx_insn *dep_insn,
10108 int cost)
10109 {
10110 enum attr_type insn_type, dep_type;
10111 rtx pat = PATTERN(insn);
10112 rtx dep_pat = PATTERN (dep_insn);
10113
10114 if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
10115 return cost;
10116
10117 insn_type = get_attr_type (insn);
10118 dep_type = get_attr_type (dep_insn);
10119
10120 switch (dtype)
10121 {
10122 case 0:
10123 /* Data dependency; DEP_INSN writes a register that INSN reads some
10124 cycles later. */
10125
10126 switch (insn_type)
10127 {
10128 case TYPE_STORE:
10129 case TYPE_FPSTORE:
10130 /* Get the delay iff the address of the store is the dependence. */
10131 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
10132 return cost;
10133
10134 if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
10135 return cost;
10136 return cost + 3;
10137
10138 case TYPE_LOAD:
10139 case TYPE_SLOAD:
10140 case TYPE_FPLOAD:
10141 /* If a load, then the dependence must be on the memory address. If
10142 the addresses aren't equal, then it might be a false dependency */
10143 if (dep_type == TYPE_STORE || dep_type == TYPE_FPSTORE)
10144 {
10145 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET
10146 || GET_CODE (SET_DEST (dep_pat)) != MEM
10147 || GET_CODE (SET_SRC (pat)) != MEM
10148 || ! rtx_equal_p (XEXP (SET_DEST (dep_pat), 0),
10149 XEXP (SET_SRC (pat), 0)))
10150 return cost + 2;
10151
10152 return cost + 8;
10153 }
10154 break;
10155
10156 case TYPE_BRANCH:
10157 /* Compare to branch latency is 0. There is no benefit from
10158 separating compare and branch. */
10159 if (dep_type == TYPE_COMPARE)
10160 return 0;
10161 /* Floating point compare to branch latency is less than
10162 compare to conditional move. */
10163 if (dep_type == TYPE_FPCMP)
10164 return cost - 1;
10165 break;
10166 default:
10167 break;
10168 }
10169 break;
10170
10171 case REG_DEP_ANTI:
10172 /* Anti-dependencies only penalize the fpu unit. */
10173 if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT)
10174 return 0;
10175 break;
10176
10177 default:
10178 break;
10179 }
10180
10181 return cost;
10182 }
10183
10184 static int
sparc_adjust_cost(rtx_insn * insn,int dep_type,rtx_insn * dep,int cost,unsigned int)10185 sparc_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep, int cost,
10186 unsigned int)
10187 {
10188 switch (sparc_cpu)
10189 {
10190 case PROCESSOR_SUPERSPARC:
10191 cost = supersparc_adjust_cost (insn, dep_type, dep, cost);
10192 break;
10193 case PROCESSOR_HYPERSPARC:
10194 case PROCESSOR_SPARCLITE86X:
10195 cost = hypersparc_adjust_cost (insn, dep_type, dep, cost);
10196 break;
10197 default:
10198 break;
10199 }
10200 return cost;
10201 }
10202
10203 static void
sparc_sched_init(FILE * dump ATTRIBUTE_UNUSED,int sched_verbose ATTRIBUTE_UNUSED,int max_ready ATTRIBUTE_UNUSED)10204 sparc_sched_init (FILE *dump ATTRIBUTE_UNUSED,
10205 int sched_verbose ATTRIBUTE_UNUSED,
10206 int max_ready ATTRIBUTE_UNUSED)
10207 {}
10208
10209 static int
sparc_use_sched_lookahead(void)10210 sparc_use_sched_lookahead (void)
10211 {
10212 if (sparc_cpu == PROCESSOR_NIAGARA
10213 || sparc_cpu == PROCESSOR_NIAGARA2
10214 || sparc_cpu == PROCESSOR_NIAGARA3)
10215 return 0;
10216 if (sparc_cpu == PROCESSOR_NIAGARA4
10217 || sparc_cpu == PROCESSOR_NIAGARA7
10218 || sparc_cpu == PROCESSOR_M8)
10219 return 2;
10220 if (sparc_cpu == PROCESSOR_ULTRASPARC
10221 || sparc_cpu == PROCESSOR_ULTRASPARC3)
10222 return 4;
10223 if ((1 << sparc_cpu) &
10224 ((1 << PROCESSOR_SUPERSPARC) | (1 << PROCESSOR_HYPERSPARC) |
10225 (1 << PROCESSOR_SPARCLITE86X)))
10226 return 3;
10227 return 0;
10228 }
10229
10230 static int
sparc_issue_rate(void)10231 sparc_issue_rate (void)
10232 {
10233 switch (sparc_cpu)
10234 {
10235 case PROCESSOR_NIAGARA:
10236 case PROCESSOR_NIAGARA2:
10237 case PROCESSOR_NIAGARA3:
10238 default:
10239 return 1;
10240 case PROCESSOR_NIAGARA4:
10241 case PROCESSOR_NIAGARA7:
10242 case PROCESSOR_V9:
10243 /* Assume V9 processors are capable of at least dual-issue. */
10244 return 2;
10245 case PROCESSOR_SUPERSPARC:
10246 return 3;
10247 case PROCESSOR_HYPERSPARC:
10248 case PROCESSOR_SPARCLITE86X:
10249 return 2;
10250 case PROCESSOR_ULTRASPARC:
10251 case PROCESSOR_ULTRASPARC3:
10252 case PROCESSOR_M8:
10253 return 4;
10254 }
10255 }
10256
10257 static int
set_extends(rtx_insn * insn)10258 set_extends (rtx_insn *insn)
10259 {
10260 register rtx pat = PATTERN (insn);
10261
10262 switch (GET_CODE (SET_SRC (pat)))
10263 {
10264 /* Load and some shift instructions zero extend. */
10265 case MEM:
10266 case ZERO_EXTEND:
10267 /* sethi clears the high bits */
10268 case HIGH:
10269 /* LO_SUM is used with sethi. sethi cleared the high
10270 bits and the values used with lo_sum are positive */
10271 case LO_SUM:
10272 /* Store flag stores 0 or 1 */
10273 case LT: case LTU:
10274 case GT: case GTU:
10275 case LE: case LEU:
10276 case GE: case GEU:
10277 case EQ:
10278 case NE:
10279 return 1;
10280 case AND:
10281 {
10282 rtx op0 = XEXP (SET_SRC (pat), 0);
10283 rtx op1 = XEXP (SET_SRC (pat), 1);
10284 if (GET_CODE (op1) == CONST_INT)
10285 return INTVAL (op1) >= 0;
10286 if (GET_CODE (op0) != REG)
10287 return 0;
10288 if (sparc_check_64 (op0, insn) == 1)
10289 return 1;
10290 return (GET_CODE (op1) == REG && sparc_check_64 (op1, insn) == 1);
10291 }
10292 case IOR:
10293 case XOR:
10294 {
10295 rtx op0 = XEXP (SET_SRC (pat), 0);
10296 rtx op1 = XEXP (SET_SRC (pat), 1);
10297 if (GET_CODE (op0) != REG || sparc_check_64 (op0, insn) <= 0)
10298 return 0;
10299 if (GET_CODE (op1) == CONST_INT)
10300 return INTVAL (op1) >= 0;
10301 return (GET_CODE (op1) == REG && sparc_check_64 (op1, insn) == 1);
10302 }
10303 case LSHIFTRT:
10304 return GET_MODE (SET_SRC (pat)) == SImode;
10305 /* Positive integers leave the high bits zero. */
10306 case CONST_INT:
10307 return !(INTVAL (SET_SRC (pat)) & 0x80000000);
10308 case ASHIFTRT:
10309 case SIGN_EXTEND:
10310 return - (GET_MODE (SET_SRC (pat)) == SImode);
10311 case REG:
10312 return sparc_check_64 (SET_SRC (pat), insn);
10313 default:
10314 return 0;
10315 }
10316 }
10317
10318 /* We _ought_ to have only one kind per function, but... */
10319 static GTY(()) rtx sparc_addr_diff_list;
10320 static GTY(()) rtx sparc_addr_list;
10321
10322 void
sparc_defer_case_vector(rtx lab,rtx vec,int diff)10323 sparc_defer_case_vector (rtx lab, rtx vec, int diff)
10324 {
10325 vec = gen_rtx_EXPR_LIST (VOIDmode, lab, vec);
10326 if (diff)
10327 sparc_addr_diff_list
10328 = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_diff_list);
10329 else
10330 sparc_addr_list = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_list);
10331 }
10332
10333 static void
sparc_output_addr_vec(rtx vec)10334 sparc_output_addr_vec (rtx vec)
10335 {
10336 rtx lab = XEXP (vec, 0), body = XEXP (vec, 1);
10337 int idx, vlen = XVECLEN (body, 0);
10338
10339 #ifdef ASM_OUTPUT_ADDR_VEC_START
10340 ASM_OUTPUT_ADDR_VEC_START (asm_out_file);
10341 #endif
10342
10343 #ifdef ASM_OUTPUT_CASE_LABEL
10344 ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab),
10345 NEXT_INSN (lab));
10346 #else
10347 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
10348 #endif
10349
10350 for (idx = 0; idx < vlen; idx++)
10351 {
10352 ASM_OUTPUT_ADDR_VEC_ELT
10353 (asm_out_file, CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 0, idx), 0)));
10354 }
10355
10356 #ifdef ASM_OUTPUT_ADDR_VEC_END
10357 ASM_OUTPUT_ADDR_VEC_END (asm_out_file);
10358 #endif
10359 }
10360
10361 static void
sparc_output_addr_diff_vec(rtx vec)10362 sparc_output_addr_diff_vec (rtx vec)
10363 {
10364 rtx lab = XEXP (vec, 0), body = XEXP (vec, 1);
10365 rtx base = XEXP (XEXP (body, 0), 0);
10366 int idx, vlen = XVECLEN (body, 1);
10367
10368 #ifdef ASM_OUTPUT_ADDR_VEC_START
10369 ASM_OUTPUT_ADDR_VEC_START (asm_out_file);
10370 #endif
10371
10372 #ifdef ASM_OUTPUT_CASE_LABEL
10373 ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab),
10374 NEXT_INSN (lab));
10375 #else
10376 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
10377 #endif
10378
10379 for (idx = 0; idx < vlen; idx++)
10380 {
10381 ASM_OUTPUT_ADDR_DIFF_ELT
10382 (asm_out_file,
10383 body,
10384 CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 1, idx), 0)),
10385 CODE_LABEL_NUMBER (base));
10386 }
10387
10388 #ifdef ASM_OUTPUT_ADDR_VEC_END
10389 ASM_OUTPUT_ADDR_VEC_END (asm_out_file);
10390 #endif
10391 }
10392
10393 static void
sparc_output_deferred_case_vectors(void)10394 sparc_output_deferred_case_vectors (void)
10395 {
10396 rtx t;
10397 int align;
10398
10399 if (sparc_addr_list == NULL_RTX
10400 && sparc_addr_diff_list == NULL_RTX)
10401 return;
10402
10403 /* Align to cache line in the function's code section. */
10404 switch_to_section (current_function_section ());
10405
10406 align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT);
10407 if (align > 0)
10408 ASM_OUTPUT_ALIGN (asm_out_file, align);
10409
10410 for (t = sparc_addr_list; t ; t = XEXP (t, 1))
10411 sparc_output_addr_vec (XEXP (t, 0));
10412 for (t = sparc_addr_diff_list; t ; t = XEXP (t, 1))
10413 sparc_output_addr_diff_vec (XEXP (t, 0));
10414
10415 sparc_addr_list = sparc_addr_diff_list = NULL_RTX;
10416 }
10417
10418 /* Return 0 if the high 32 bits of X (the low word of X, if DImode) are
10419 unknown. Return 1 if the high bits are zero, -1 if the register is
10420 sign extended. */
10421 int
sparc_check_64(rtx x,rtx_insn * insn)10422 sparc_check_64 (rtx x, rtx_insn *insn)
10423 {
10424 /* If a register is set only once it is safe to ignore insns this
10425 code does not know how to handle. The loop will either recognize
10426 the single set and return the correct value or fail to recognize
10427 it and return 0. */
10428 int set_once = 0;
10429 rtx y = x;
10430
10431 gcc_assert (GET_CODE (x) == REG);
10432
10433 if (GET_MODE (x) == DImode)
10434 y = gen_rtx_REG (SImode, REGNO (x) + WORDS_BIG_ENDIAN);
10435
10436 if (flag_expensive_optimizations
10437 && df && DF_REG_DEF_COUNT (REGNO (y)) == 1)
10438 set_once = 1;
10439
10440 if (insn == 0)
10441 {
10442 if (set_once)
10443 insn = get_last_insn_anywhere ();
10444 else
10445 return 0;
10446 }
10447
10448 while ((insn = PREV_INSN (insn)))
10449 {
10450 switch (GET_CODE (insn))
10451 {
10452 case JUMP_INSN:
10453 case NOTE:
10454 break;
10455 case CODE_LABEL:
10456 case CALL_INSN:
10457 default:
10458 if (! set_once)
10459 return 0;
10460 break;
10461 case INSN:
10462 {
10463 rtx pat = PATTERN (insn);
10464 if (GET_CODE (pat) != SET)
10465 return 0;
10466 if (rtx_equal_p (x, SET_DEST (pat)))
10467 return set_extends (insn);
10468 if (y && rtx_equal_p (y, SET_DEST (pat)))
10469 return set_extends (insn);
10470 if (reg_overlap_mentioned_p (SET_DEST (pat), y))
10471 return 0;
10472 }
10473 }
10474 }
10475 return 0;
10476 }
10477
10478 /* Output a wide shift instruction in V8+ mode. INSN is the instruction,
10479 OPERANDS are its operands and OPCODE is the mnemonic to be used. */
10480
10481 const char *
output_v8plus_shift(rtx_insn * insn,rtx * operands,const char * opcode)10482 output_v8plus_shift (rtx_insn *insn, rtx *operands, const char *opcode)
10483 {
10484 static char asm_code[60];
10485
10486 /* The scratch register is only required when the destination
10487 register is not a 64-bit global or out register. */
10488 if (which_alternative != 2)
10489 operands[3] = operands[0];
10490
10491 /* We can only shift by constants <= 63. */
10492 if (GET_CODE (operands[2]) == CONST_INT)
10493 operands[2] = GEN_INT (INTVAL (operands[2]) & 0x3f);
10494
10495 if (GET_CODE (operands[1]) == CONST_INT)
10496 {
10497 output_asm_insn ("mov\t%1, %3", operands);
10498 }
10499 else
10500 {
10501 output_asm_insn ("sllx\t%H1, 32, %3", operands);
10502 if (sparc_check_64 (operands[1], insn) <= 0)
10503 output_asm_insn ("srl\t%L1, 0, %L1", operands);
10504 output_asm_insn ("or\t%L1, %3, %3", operands);
10505 }
10506
10507 strcpy (asm_code, opcode);
10508
10509 if (which_alternative != 2)
10510 return strcat (asm_code, "\t%0, %2, %L0\n\tsrlx\t%L0, 32, %H0");
10511 else
10512 return
10513 strcat (asm_code, "\t%3, %2, %3\n\tsrlx\t%3, 32, %H0\n\tmov\t%3, %L0");
10514 }
10515
10516 /* Output rtl to increment the profiler label LABELNO
10517 for profiling a function entry. */
10518
10519 void
sparc_profile_hook(int labelno)10520 sparc_profile_hook (int labelno)
10521 {
10522 char buf[32];
10523 rtx lab, fun;
10524
10525 fun = gen_rtx_SYMBOL_REF (Pmode, MCOUNT_FUNCTION);
10526 if (NO_PROFILE_COUNTERS)
10527 {
10528 emit_library_call (fun, LCT_NORMAL, VOIDmode);
10529 }
10530 else
10531 {
10532 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
10533 lab = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
10534 emit_library_call (fun, LCT_NORMAL, VOIDmode, lab, Pmode);
10535 }
10536 }
10537
10538 #ifdef TARGET_SOLARIS
10539 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
10540
10541 static void
sparc_solaris_elf_asm_named_section(const char * name,unsigned int flags,tree decl ATTRIBUTE_UNUSED)10542 sparc_solaris_elf_asm_named_section (const char *name, unsigned int flags,
10543 tree decl ATTRIBUTE_UNUSED)
10544 {
10545 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
10546 {
10547 solaris_elf_asm_comdat_section (name, flags, decl);
10548 return;
10549 }
10550
10551 fprintf (asm_out_file, "\t.section\t\"%s\"", name);
10552
10553 if (!(flags & SECTION_DEBUG))
10554 fputs (",#alloc", asm_out_file);
10555 if (flags & SECTION_WRITE)
10556 fputs (",#write", asm_out_file);
10557 if (flags & SECTION_TLS)
10558 fputs (",#tls", asm_out_file);
10559 if (flags & SECTION_CODE)
10560 fputs (",#execinstr", asm_out_file);
10561
10562 if (flags & SECTION_NOTYPE)
10563 ;
10564 else if (flags & SECTION_BSS)
10565 fputs (",#nobits", asm_out_file);
10566 else
10567 fputs (",#progbits", asm_out_file);
10568
10569 fputc ('\n', asm_out_file);
10570 }
10571 #endif /* TARGET_SOLARIS */
10572
10573 /* We do not allow indirect calls to be optimized into sibling calls.
10574
10575 We cannot use sibling calls when delayed branches are disabled
10576 because they will likely require the call delay slot to be filled.
10577
10578 Also, on SPARC 32-bit we cannot emit a sibling call when the
10579 current function returns a structure. This is because the "unimp
10580 after call" convention would cause the callee to return to the
10581 wrong place. The generic code already disallows cases where the
10582 function being called returns a structure.
10583
10584 It may seem strange how this last case could occur. Usually there
10585 is code after the call which jumps to epilogue code which dumps the
10586 return value into the struct return area. That ought to invalidate
10587 the sibling call right? Well, in the C++ case we can end up passing
10588 the pointer to the struct return area to a constructor (which returns
10589 void) and then nothing else happens. Such a sibling call would look
10590 valid without the added check here.
10591
10592 VxWorks PIC PLT entries require the global pointer to be initialized
10593 on entry. We therefore can't emit sibling calls to them. */
10594 static bool
sparc_function_ok_for_sibcall(tree decl,tree exp ATTRIBUTE_UNUSED)10595 sparc_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
10596 {
10597 return (decl
10598 && flag_delayed_branch
10599 && (TARGET_ARCH64 || ! cfun->returns_struct)
10600 && !(TARGET_VXWORKS_RTP
10601 && flag_pic
10602 && !targetm.binds_local_p (decl)));
10603 }
10604
10605 /* libfunc renaming. */
10606
10607 static void
sparc_init_libfuncs(void)10608 sparc_init_libfuncs (void)
10609 {
10610 if (TARGET_ARCH32)
10611 {
10612 /* Use the subroutines that Sun's library provides for integer
10613 multiply and divide. The `*' prevents an underscore from
10614 being prepended by the compiler. .umul is a little faster
10615 than .mul. */
10616 set_optab_libfunc (smul_optab, SImode, "*.umul");
10617 set_optab_libfunc (sdiv_optab, SImode, "*.div");
10618 set_optab_libfunc (udiv_optab, SImode, "*.udiv");
10619 set_optab_libfunc (smod_optab, SImode, "*.rem");
10620 set_optab_libfunc (umod_optab, SImode, "*.urem");
10621
10622 /* TFmode arithmetic. These names are part of the SPARC 32bit ABI. */
10623 set_optab_libfunc (add_optab, TFmode, "_Q_add");
10624 set_optab_libfunc (sub_optab, TFmode, "_Q_sub");
10625 set_optab_libfunc (neg_optab, TFmode, "_Q_neg");
10626 set_optab_libfunc (smul_optab, TFmode, "_Q_mul");
10627 set_optab_libfunc (sdiv_optab, TFmode, "_Q_div");
10628
10629 /* We can define the TFmode sqrt optab only if TARGET_FPU. This
10630 is because with soft-float, the SFmode and DFmode sqrt
10631 instructions will be absent, and the compiler will notice and
10632 try to use the TFmode sqrt instruction for calls to the
10633 builtin function sqrt, but this fails. */
10634 if (TARGET_FPU)
10635 set_optab_libfunc (sqrt_optab, TFmode, "_Q_sqrt");
10636
10637 set_optab_libfunc (eq_optab, TFmode, "_Q_feq");
10638 set_optab_libfunc (ne_optab, TFmode, "_Q_fne");
10639 set_optab_libfunc (gt_optab, TFmode, "_Q_fgt");
10640 set_optab_libfunc (ge_optab, TFmode, "_Q_fge");
10641 set_optab_libfunc (lt_optab, TFmode, "_Q_flt");
10642 set_optab_libfunc (le_optab, TFmode, "_Q_fle");
10643
10644 set_conv_libfunc (sext_optab, TFmode, SFmode, "_Q_stoq");
10645 set_conv_libfunc (sext_optab, TFmode, DFmode, "_Q_dtoq");
10646 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_Q_qtos");
10647 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_Q_qtod");
10648
10649 set_conv_libfunc (sfix_optab, SImode, TFmode, "_Q_qtoi");
10650 set_conv_libfunc (ufix_optab, SImode, TFmode, "_Q_qtou");
10651 set_conv_libfunc (sfloat_optab, TFmode, SImode, "_Q_itoq");
10652 set_conv_libfunc (ufloat_optab, TFmode, SImode, "_Q_utoq");
10653
10654 if (DITF_CONVERSION_LIBFUNCS)
10655 {
10656 set_conv_libfunc (sfix_optab, DImode, TFmode, "_Q_qtoll");
10657 set_conv_libfunc (ufix_optab, DImode, TFmode, "_Q_qtoull");
10658 set_conv_libfunc (sfloat_optab, TFmode, DImode, "_Q_lltoq");
10659 set_conv_libfunc (ufloat_optab, TFmode, DImode, "_Q_ulltoq");
10660 }
10661
10662 if (SUN_CONVERSION_LIBFUNCS)
10663 {
10664 set_conv_libfunc (sfix_optab, DImode, SFmode, "__ftoll");
10665 set_conv_libfunc (ufix_optab, DImode, SFmode, "__ftoull");
10666 set_conv_libfunc (sfix_optab, DImode, DFmode, "__dtoll");
10667 set_conv_libfunc (ufix_optab, DImode, DFmode, "__dtoull");
10668 }
10669 }
10670 if (TARGET_ARCH64)
10671 {
10672 /* In the SPARC 64bit ABI, SImode multiply and divide functions
10673 do not exist in the library. Make sure the compiler does not
10674 emit calls to them by accident. (It should always use the
10675 hardware instructions.) */
10676 set_optab_libfunc (smul_optab, SImode, 0);
10677 set_optab_libfunc (sdiv_optab, SImode, 0);
10678 set_optab_libfunc (udiv_optab, SImode, 0);
10679 set_optab_libfunc (smod_optab, SImode, 0);
10680 set_optab_libfunc (umod_optab, SImode, 0);
10681
10682 if (SUN_INTEGER_MULTIPLY_64)
10683 {
10684 set_optab_libfunc (smul_optab, DImode, "__mul64");
10685 set_optab_libfunc (sdiv_optab, DImode, "__div64");
10686 set_optab_libfunc (udiv_optab, DImode, "__udiv64");
10687 set_optab_libfunc (smod_optab, DImode, "__rem64");
10688 set_optab_libfunc (umod_optab, DImode, "__urem64");
10689 }
10690
10691 if (SUN_CONVERSION_LIBFUNCS)
10692 {
10693 set_conv_libfunc (sfix_optab, DImode, SFmode, "__ftol");
10694 set_conv_libfunc (ufix_optab, DImode, SFmode, "__ftoul");
10695 set_conv_libfunc (sfix_optab, DImode, DFmode, "__dtol");
10696 set_conv_libfunc (ufix_optab, DImode, DFmode, "__dtoul");
10697 }
10698 }
10699 }
10700
10701 /* SPARC builtins. */
10702 enum sparc_builtins
10703 {
10704 /* FPU builtins. */
10705 SPARC_BUILTIN_LDFSR,
10706 SPARC_BUILTIN_STFSR,
10707
10708 /* VIS 1.0 builtins. */
10709 SPARC_BUILTIN_FPACK16,
10710 SPARC_BUILTIN_FPACK32,
10711 SPARC_BUILTIN_FPACKFIX,
10712 SPARC_BUILTIN_FEXPAND,
10713 SPARC_BUILTIN_FPMERGE,
10714 SPARC_BUILTIN_FMUL8X16,
10715 SPARC_BUILTIN_FMUL8X16AU,
10716 SPARC_BUILTIN_FMUL8X16AL,
10717 SPARC_BUILTIN_FMUL8SUX16,
10718 SPARC_BUILTIN_FMUL8ULX16,
10719 SPARC_BUILTIN_FMULD8SUX16,
10720 SPARC_BUILTIN_FMULD8ULX16,
10721 SPARC_BUILTIN_FALIGNDATAV4HI,
10722 SPARC_BUILTIN_FALIGNDATAV8QI,
10723 SPARC_BUILTIN_FALIGNDATAV2SI,
10724 SPARC_BUILTIN_FALIGNDATADI,
10725 SPARC_BUILTIN_WRGSR,
10726 SPARC_BUILTIN_RDGSR,
10727 SPARC_BUILTIN_ALIGNADDR,
10728 SPARC_BUILTIN_ALIGNADDRL,
10729 SPARC_BUILTIN_PDIST,
10730 SPARC_BUILTIN_EDGE8,
10731 SPARC_BUILTIN_EDGE8L,
10732 SPARC_BUILTIN_EDGE16,
10733 SPARC_BUILTIN_EDGE16L,
10734 SPARC_BUILTIN_EDGE32,
10735 SPARC_BUILTIN_EDGE32L,
10736 SPARC_BUILTIN_FCMPLE16,
10737 SPARC_BUILTIN_FCMPLE32,
10738 SPARC_BUILTIN_FCMPNE16,
10739 SPARC_BUILTIN_FCMPNE32,
10740 SPARC_BUILTIN_FCMPGT16,
10741 SPARC_BUILTIN_FCMPGT32,
10742 SPARC_BUILTIN_FCMPEQ16,
10743 SPARC_BUILTIN_FCMPEQ32,
10744 SPARC_BUILTIN_FPADD16,
10745 SPARC_BUILTIN_FPADD16S,
10746 SPARC_BUILTIN_FPADD32,
10747 SPARC_BUILTIN_FPADD32S,
10748 SPARC_BUILTIN_FPSUB16,
10749 SPARC_BUILTIN_FPSUB16S,
10750 SPARC_BUILTIN_FPSUB32,
10751 SPARC_BUILTIN_FPSUB32S,
10752 SPARC_BUILTIN_ARRAY8,
10753 SPARC_BUILTIN_ARRAY16,
10754 SPARC_BUILTIN_ARRAY32,
10755
10756 /* VIS 2.0 builtins. */
10757 SPARC_BUILTIN_EDGE8N,
10758 SPARC_BUILTIN_EDGE8LN,
10759 SPARC_BUILTIN_EDGE16N,
10760 SPARC_BUILTIN_EDGE16LN,
10761 SPARC_BUILTIN_EDGE32N,
10762 SPARC_BUILTIN_EDGE32LN,
10763 SPARC_BUILTIN_BMASK,
10764 SPARC_BUILTIN_BSHUFFLEV4HI,
10765 SPARC_BUILTIN_BSHUFFLEV8QI,
10766 SPARC_BUILTIN_BSHUFFLEV2SI,
10767 SPARC_BUILTIN_BSHUFFLEDI,
10768
10769 /* VIS 3.0 builtins. */
10770 SPARC_BUILTIN_CMASK8,
10771 SPARC_BUILTIN_CMASK16,
10772 SPARC_BUILTIN_CMASK32,
10773 SPARC_BUILTIN_FCHKSM16,
10774 SPARC_BUILTIN_FSLL16,
10775 SPARC_BUILTIN_FSLAS16,
10776 SPARC_BUILTIN_FSRL16,
10777 SPARC_BUILTIN_FSRA16,
10778 SPARC_BUILTIN_FSLL32,
10779 SPARC_BUILTIN_FSLAS32,
10780 SPARC_BUILTIN_FSRL32,
10781 SPARC_BUILTIN_FSRA32,
10782 SPARC_BUILTIN_PDISTN,
10783 SPARC_BUILTIN_FMEAN16,
10784 SPARC_BUILTIN_FPADD64,
10785 SPARC_BUILTIN_FPSUB64,
10786 SPARC_BUILTIN_FPADDS16,
10787 SPARC_BUILTIN_FPADDS16S,
10788 SPARC_BUILTIN_FPSUBS16,
10789 SPARC_BUILTIN_FPSUBS16S,
10790 SPARC_BUILTIN_FPADDS32,
10791 SPARC_BUILTIN_FPADDS32S,
10792 SPARC_BUILTIN_FPSUBS32,
10793 SPARC_BUILTIN_FPSUBS32S,
10794 SPARC_BUILTIN_FUCMPLE8,
10795 SPARC_BUILTIN_FUCMPNE8,
10796 SPARC_BUILTIN_FUCMPGT8,
10797 SPARC_BUILTIN_FUCMPEQ8,
10798 SPARC_BUILTIN_FHADDS,
10799 SPARC_BUILTIN_FHADDD,
10800 SPARC_BUILTIN_FHSUBS,
10801 SPARC_BUILTIN_FHSUBD,
10802 SPARC_BUILTIN_FNHADDS,
10803 SPARC_BUILTIN_FNHADDD,
10804 SPARC_BUILTIN_UMULXHI,
10805 SPARC_BUILTIN_XMULX,
10806 SPARC_BUILTIN_XMULXHI,
10807
10808 /* VIS 4.0 builtins. */
10809 SPARC_BUILTIN_FPADD8,
10810 SPARC_BUILTIN_FPADDS8,
10811 SPARC_BUILTIN_FPADDUS8,
10812 SPARC_BUILTIN_FPADDUS16,
10813 SPARC_BUILTIN_FPCMPLE8,
10814 SPARC_BUILTIN_FPCMPGT8,
10815 SPARC_BUILTIN_FPCMPULE16,
10816 SPARC_BUILTIN_FPCMPUGT16,
10817 SPARC_BUILTIN_FPCMPULE32,
10818 SPARC_BUILTIN_FPCMPUGT32,
10819 SPARC_BUILTIN_FPMAX8,
10820 SPARC_BUILTIN_FPMAX16,
10821 SPARC_BUILTIN_FPMAX32,
10822 SPARC_BUILTIN_FPMAXU8,
10823 SPARC_BUILTIN_FPMAXU16,
10824 SPARC_BUILTIN_FPMAXU32,
10825 SPARC_BUILTIN_FPMIN8,
10826 SPARC_BUILTIN_FPMIN16,
10827 SPARC_BUILTIN_FPMIN32,
10828 SPARC_BUILTIN_FPMINU8,
10829 SPARC_BUILTIN_FPMINU16,
10830 SPARC_BUILTIN_FPMINU32,
10831 SPARC_BUILTIN_FPSUB8,
10832 SPARC_BUILTIN_FPSUBS8,
10833 SPARC_BUILTIN_FPSUBUS8,
10834 SPARC_BUILTIN_FPSUBUS16,
10835
10836 /* VIS 4.0B builtins. */
10837
10838 /* Note that all the DICTUNPACK* entries should be kept
10839 contiguous. */
10840 SPARC_BUILTIN_FIRST_DICTUNPACK,
10841 SPARC_BUILTIN_DICTUNPACK8 = SPARC_BUILTIN_FIRST_DICTUNPACK,
10842 SPARC_BUILTIN_DICTUNPACK16,
10843 SPARC_BUILTIN_DICTUNPACK32,
10844 SPARC_BUILTIN_LAST_DICTUNPACK = SPARC_BUILTIN_DICTUNPACK32,
10845
10846 /* Note that all the FPCMP*SHL entries should be kept
10847 contiguous. */
10848 SPARC_BUILTIN_FIRST_FPCMPSHL,
10849 SPARC_BUILTIN_FPCMPLE8SHL = SPARC_BUILTIN_FIRST_FPCMPSHL,
10850 SPARC_BUILTIN_FPCMPGT8SHL,
10851 SPARC_BUILTIN_FPCMPEQ8SHL,
10852 SPARC_BUILTIN_FPCMPNE8SHL,
10853 SPARC_BUILTIN_FPCMPLE16SHL,
10854 SPARC_BUILTIN_FPCMPGT16SHL,
10855 SPARC_BUILTIN_FPCMPEQ16SHL,
10856 SPARC_BUILTIN_FPCMPNE16SHL,
10857 SPARC_BUILTIN_FPCMPLE32SHL,
10858 SPARC_BUILTIN_FPCMPGT32SHL,
10859 SPARC_BUILTIN_FPCMPEQ32SHL,
10860 SPARC_BUILTIN_FPCMPNE32SHL,
10861 SPARC_BUILTIN_FPCMPULE8SHL,
10862 SPARC_BUILTIN_FPCMPUGT8SHL,
10863 SPARC_BUILTIN_FPCMPULE16SHL,
10864 SPARC_BUILTIN_FPCMPUGT16SHL,
10865 SPARC_BUILTIN_FPCMPULE32SHL,
10866 SPARC_BUILTIN_FPCMPUGT32SHL,
10867 SPARC_BUILTIN_FPCMPDE8SHL,
10868 SPARC_BUILTIN_FPCMPDE16SHL,
10869 SPARC_BUILTIN_FPCMPDE32SHL,
10870 SPARC_BUILTIN_FPCMPUR8SHL,
10871 SPARC_BUILTIN_FPCMPUR16SHL,
10872 SPARC_BUILTIN_FPCMPUR32SHL,
10873 SPARC_BUILTIN_LAST_FPCMPSHL = SPARC_BUILTIN_FPCMPUR32SHL,
10874
10875 SPARC_BUILTIN_MAX
10876 };
10877
10878 static GTY (()) tree sparc_builtins[(int) SPARC_BUILTIN_MAX];
10879 static enum insn_code sparc_builtins_icode[(int) SPARC_BUILTIN_MAX];
10880
10881 /* Return true if OPVAL can be used for operand OPNUM of instruction ICODE.
10882 The instruction should require a constant operand of some sort. The
10883 function prints an error if OPVAL is not valid. */
10884
10885 static int
check_constant_argument(enum insn_code icode,int opnum,rtx opval)10886 check_constant_argument (enum insn_code icode, int opnum, rtx opval)
10887 {
10888 if (GET_CODE (opval) != CONST_INT)
10889 {
10890 error ("%qs expects a constant argument", insn_data[icode].name);
10891 return false;
10892 }
10893
10894 if (!(*insn_data[icode].operand[opnum].predicate) (opval, VOIDmode))
10895 {
10896 error ("constant argument out of range for %qs", insn_data[icode].name);
10897 return false;
10898 }
10899 return true;
10900 }
10901
10902 /* Add a SPARC builtin function with NAME, ICODE, CODE and TYPE. Return the
10903 function decl or NULL_TREE if the builtin was not added. */
10904
10905 static tree
def_builtin(const char * name,enum insn_code icode,enum sparc_builtins code,tree type)10906 def_builtin (const char *name, enum insn_code icode, enum sparc_builtins code,
10907 tree type)
10908 {
10909 tree t
10910 = add_builtin_function (name, type, code, BUILT_IN_MD, NULL, NULL_TREE);
10911
10912 if (t)
10913 {
10914 sparc_builtins[code] = t;
10915 sparc_builtins_icode[code] = icode;
10916 }
10917
10918 return t;
10919 }
10920
10921 /* Likewise, but also marks the function as "const". */
10922
10923 static tree
def_builtin_const(const char * name,enum insn_code icode,enum sparc_builtins code,tree type)10924 def_builtin_const (const char *name, enum insn_code icode,
10925 enum sparc_builtins code, tree type)
10926 {
10927 tree t = def_builtin (name, icode, code, type);
10928
10929 if (t)
10930 TREE_READONLY (t) = 1;
10931
10932 return t;
10933 }
10934
10935 /* Implement the TARGET_INIT_BUILTINS target hook.
10936 Create builtin functions for special SPARC instructions. */
10937
10938 static void
sparc_init_builtins(void)10939 sparc_init_builtins (void)
10940 {
10941 if (TARGET_FPU)
10942 sparc_fpu_init_builtins ();
10943
10944 if (TARGET_VIS)
10945 sparc_vis_init_builtins ();
10946 }
10947
10948 /* Create builtin functions for FPU instructions. */
10949
10950 static void
sparc_fpu_init_builtins(void)10951 sparc_fpu_init_builtins (void)
10952 {
10953 tree ftype
10954 = build_function_type_list (void_type_node,
10955 build_pointer_type (unsigned_type_node), 0);
10956 def_builtin ("__builtin_load_fsr", CODE_FOR_ldfsr,
10957 SPARC_BUILTIN_LDFSR, ftype);
10958 def_builtin ("__builtin_store_fsr", CODE_FOR_stfsr,
10959 SPARC_BUILTIN_STFSR, ftype);
10960 }
10961
10962 /* Create builtin functions for VIS instructions. */
10963
10964 static void
sparc_vis_init_builtins(void)10965 sparc_vis_init_builtins (void)
10966 {
10967 tree v4qi = build_vector_type (unsigned_intQI_type_node, 4);
10968 tree v8qi = build_vector_type (unsigned_intQI_type_node, 8);
10969 tree v4hi = build_vector_type (intHI_type_node, 4);
10970 tree v2hi = build_vector_type (intHI_type_node, 2);
10971 tree v2si = build_vector_type (intSI_type_node, 2);
10972 tree v1si = build_vector_type (intSI_type_node, 1);
10973
10974 tree v4qi_ftype_v4hi = build_function_type_list (v4qi, v4hi, 0);
10975 tree v8qi_ftype_v2si_v8qi = build_function_type_list (v8qi, v2si, v8qi, 0);
10976 tree v2hi_ftype_v2si = build_function_type_list (v2hi, v2si, 0);
10977 tree v4hi_ftype_v4qi = build_function_type_list (v4hi, v4qi, 0);
10978 tree v8qi_ftype_v4qi_v4qi = build_function_type_list (v8qi, v4qi, v4qi, 0);
10979 tree v4hi_ftype_v4qi_v4hi = build_function_type_list (v4hi, v4qi, v4hi, 0);
10980 tree v4hi_ftype_v4qi_v2hi = build_function_type_list (v4hi, v4qi, v2hi, 0);
10981 tree v2si_ftype_v4qi_v2hi = build_function_type_list (v2si, v4qi, v2hi, 0);
10982 tree v4hi_ftype_v8qi_v4hi = build_function_type_list (v4hi, v8qi, v4hi, 0);
10983 tree v4hi_ftype_v4hi_v4hi = build_function_type_list (v4hi, v4hi, v4hi, 0);
10984 tree v2si_ftype_v2si_v2si = build_function_type_list (v2si, v2si, v2si, 0);
10985 tree v8qi_ftype_v8qi_v8qi = build_function_type_list (v8qi, v8qi, v8qi, 0);
10986 tree v2hi_ftype_v2hi_v2hi = build_function_type_list (v2hi, v2hi, v2hi, 0);
10987 tree v1si_ftype_v1si_v1si = build_function_type_list (v1si, v1si, v1si, 0);
10988 tree di_ftype_v8qi_v8qi_di = build_function_type_list (intDI_type_node,
10989 v8qi, v8qi,
10990 intDI_type_node, 0);
10991 tree di_ftype_v8qi_v8qi = build_function_type_list (intDI_type_node,
10992 v8qi, v8qi, 0);
10993 tree si_ftype_v8qi_v8qi = build_function_type_list (intSI_type_node,
10994 v8qi, v8qi, 0);
10995 tree v8qi_ftype_df_si = build_function_type_list (v8qi, double_type_node,
10996 intSI_type_node, 0);
10997 tree v4hi_ftype_df_si = build_function_type_list (v4hi, double_type_node,
10998 intSI_type_node, 0);
10999 tree v2si_ftype_df_si = build_function_type_list (v2si, double_type_node,
11000 intDI_type_node, 0);
11001 tree di_ftype_di_di = build_function_type_list (intDI_type_node,
11002 intDI_type_node,
11003 intDI_type_node, 0);
11004 tree si_ftype_si_si = build_function_type_list (intSI_type_node,
11005 intSI_type_node,
11006 intSI_type_node, 0);
11007 tree ptr_ftype_ptr_si = build_function_type_list (ptr_type_node,
11008 ptr_type_node,
11009 intSI_type_node, 0);
11010 tree ptr_ftype_ptr_di = build_function_type_list (ptr_type_node,
11011 ptr_type_node,
11012 intDI_type_node, 0);
11013 tree si_ftype_ptr_ptr = build_function_type_list (intSI_type_node,
11014 ptr_type_node,
11015 ptr_type_node, 0);
11016 tree di_ftype_ptr_ptr = build_function_type_list (intDI_type_node,
11017 ptr_type_node,
11018 ptr_type_node, 0);
11019 tree si_ftype_v4hi_v4hi = build_function_type_list (intSI_type_node,
11020 v4hi, v4hi, 0);
11021 tree si_ftype_v2si_v2si = build_function_type_list (intSI_type_node,
11022 v2si, v2si, 0);
11023 tree di_ftype_v4hi_v4hi = build_function_type_list (intDI_type_node,
11024 v4hi, v4hi, 0);
11025 tree di_ftype_v2si_v2si = build_function_type_list (intDI_type_node,
11026 v2si, v2si, 0);
11027 tree void_ftype_di = build_function_type_list (void_type_node,
11028 intDI_type_node, 0);
11029 tree di_ftype_void = build_function_type_list (intDI_type_node,
11030 void_type_node, 0);
11031 tree void_ftype_si = build_function_type_list (void_type_node,
11032 intSI_type_node, 0);
11033 tree sf_ftype_sf_sf = build_function_type_list (float_type_node,
11034 float_type_node,
11035 float_type_node, 0);
11036 tree df_ftype_df_df = build_function_type_list (double_type_node,
11037 double_type_node,
11038 double_type_node, 0);
11039
11040 /* Packing and expanding vectors. */
11041 def_builtin ("__builtin_vis_fpack16", CODE_FOR_fpack16_vis,
11042 SPARC_BUILTIN_FPACK16, v4qi_ftype_v4hi);
11043 def_builtin ("__builtin_vis_fpack32", CODE_FOR_fpack32_vis,
11044 SPARC_BUILTIN_FPACK32, v8qi_ftype_v2si_v8qi);
11045 def_builtin ("__builtin_vis_fpackfix", CODE_FOR_fpackfix_vis,
11046 SPARC_BUILTIN_FPACKFIX, v2hi_ftype_v2si);
11047 def_builtin_const ("__builtin_vis_fexpand", CODE_FOR_fexpand_vis,
11048 SPARC_BUILTIN_FEXPAND, v4hi_ftype_v4qi);
11049 def_builtin_const ("__builtin_vis_fpmerge", CODE_FOR_fpmerge_vis,
11050 SPARC_BUILTIN_FPMERGE, v8qi_ftype_v4qi_v4qi);
11051
11052 /* Multiplications. */
11053 def_builtin_const ("__builtin_vis_fmul8x16", CODE_FOR_fmul8x16_vis,
11054 SPARC_BUILTIN_FMUL8X16, v4hi_ftype_v4qi_v4hi);
11055 def_builtin_const ("__builtin_vis_fmul8x16au", CODE_FOR_fmul8x16au_vis,
11056 SPARC_BUILTIN_FMUL8X16AU, v4hi_ftype_v4qi_v2hi);
11057 def_builtin_const ("__builtin_vis_fmul8x16al", CODE_FOR_fmul8x16al_vis,
11058 SPARC_BUILTIN_FMUL8X16AL, v4hi_ftype_v4qi_v2hi);
11059 def_builtin_const ("__builtin_vis_fmul8sux16", CODE_FOR_fmul8sux16_vis,
11060 SPARC_BUILTIN_FMUL8SUX16, v4hi_ftype_v8qi_v4hi);
11061 def_builtin_const ("__builtin_vis_fmul8ulx16", CODE_FOR_fmul8ulx16_vis,
11062 SPARC_BUILTIN_FMUL8ULX16, v4hi_ftype_v8qi_v4hi);
11063 def_builtin_const ("__builtin_vis_fmuld8sux16", CODE_FOR_fmuld8sux16_vis,
11064 SPARC_BUILTIN_FMULD8SUX16, v2si_ftype_v4qi_v2hi);
11065 def_builtin_const ("__builtin_vis_fmuld8ulx16", CODE_FOR_fmuld8ulx16_vis,
11066 SPARC_BUILTIN_FMULD8ULX16, v2si_ftype_v4qi_v2hi);
11067
11068 /* Data aligning. */
11069 def_builtin ("__builtin_vis_faligndatav4hi", CODE_FOR_faligndatav4hi_vis,
11070 SPARC_BUILTIN_FALIGNDATAV4HI, v4hi_ftype_v4hi_v4hi);
11071 def_builtin ("__builtin_vis_faligndatav8qi", CODE_FOR_faligndatav8qi_vis,
11072 SPARC_BUILTIN_FALIGNDATAV8QI, v8qi_ftype_v8qi_v8qi);
11073 def_builtin ("__builtin_vis_faligndatav2si", CODE_FOR_faligndatav2si_vis,
11074 SPARC_BUILTIN_FALIGNDATAV2SI, v2si_ftype_v2si_v2si);
11075 def_builtin ("__builtin_vis_faligndatadi", CODE_FOR_faligndatav1di_vis,
11076 SPARC_BUILTIN_FALIGNDATADI, di_ftype_di_di);
11077
11078 def_builtin ("__builtin_vis_write_gsr", CODE_FOR_wrgsr_vis,
11079 SPARC_BUILTIN_WRGSR, void_ftype_di);
11080 def_builtin ("__builtin_vis_read_gsr", CODE_FOR_rdgsr_vis,
11081 SPARC_BUILTIN_RDGSR, di_ftype_void);
11082
11083 if (TARGET_ARCH64)
11084 {
11085 def_builtin ("__builtin_vis_alignaddr", CODE_FOR_alignaddrdi_vis,
11086 SPARC_BUILTIN_ALIGNADDR, ptr_ftype_ptr_di);
11087 def_builtin ("__builtin_vis_alignaddrl", CODE_FOR_alignaddrldi_vis,
11088 SPARC_BUILTIN_ALIGNADDRL, ptr_ftype_ptr_di);
11089 }
11090 else
11091 {
11092 def_builtin ("__builtin_vis_alignaddr", CODE_FOR_alignaddrsi_vis,
11093 SPARC_BUILTIN_ALIGNADDR, ptr_ftype_ptr_si);
11094 def_builtin ("__builtin_vis_alignaddrl", CODE_FOR_alignaddrlsi_vis,
11095 SPARC_BUILTIN_ALIGNADDRL, ptr_ftype_ptr_si);
11096 }
11097
11098 /* Pixel distance. */
11099 def_builtin_const ("__builtin_vis_pdist", CODE_FOR_pdist_vis,
11100 SPARC_BUILTIN_PDIST, di_ftype_v8qi_v8qi_di);
11101
11102 /* Edge handling. */
11103 if (TARGET_ARCH64)
11104 {
11105 def_builtin_const ("__builtin_vis_edge8", CODE_FOR_edge8di_vis,
11106 SPARC_BUILTIN_EDGE8, di_ftype_ptr_ptr);
11107 def_builtin_const ("__builtin_vis_edge8l", CODE_FOR_edge8ldi_vis,
11108 SPARC_BUILTIN_EDGE8L, di_ftype_ptr_ptr);
11109 def_builtin_const ("__builtin_vis_edge16", CODE_FOR_edge16di_vis,
11110 SPARC_BUILTIN_EDGE16, di_ftype_ptr_ptr);
11111 def_builtin_const ("__builtin_vis_edge16l", CODE_FOR_edge16ldi_vis,
11112 SPARC_BUILTIN_EDGE16L, di_ftype_ptr_ptr);
11113 def_builtin_const ("__builtin_vis_edge32", CODE_FOR_edge32di_vis,
11114 SPARC_BUILTIN_EDGE32, di_ftype_ptr_ptr);
11115 def_builtin_const ("__builtin_vis_edge32l", CODE_FOR_edge32ldi_vis,
11116 SPARC_BUILTIN_EDGE32L, di_ftype_ptr_ptr);
11117 }
11118 else
11119 {
11120 def_builtin_const ("__builtin_vis_edge8", CODE_FOR_edge8si_vis,
11121 SPARC_BUILTIN_EDGE8, si_ftype_ptr_ptr);
11122 def_builtin_const ("__builtin_vis_edge8l", CODE_FOR_edge8lsi_vis,
11123 SPARC_BUILTIN_EDGE8L, si_ftype_ptr_ptr);
11124 def_builtin_const ("__builtin_vis_edge16", CODE_FOR_edge16si_vis,
11125 SPARC_BUILTIN_EDGE16, si_ftype_ptr_ptr);
11126 def_builtin_const ("__builtin_vis_edge16l", CODE_FOR_edge16lsi_vis,
11127 SPARC_BUILTIN_EDGE16L, si_ftype_ptr_ptr);
11128 def_builtin_const ("__builtin_vis_edge32", CODE_FOR_edge32si_vis,
11129 SPARC_BUILTIN_EDGE32, si_ftype_ptr_ptr);
11130 def_builtin_const ("__builtin_vis_edge32l", CODE_FOR_edge32lsi_vis,
11131 SPARC_BUILTIN_EDGE32L, si_ftype_ptr_ptr);
11132 }
11133
11134 /* Pixel compare. */
11135 if (TARGET_ARCH64)
11136 {
11137 def_builtin_const ("__builtin_vis_fcmple16", CODE_FOR_fcmple16di_vis,
11138 SPARC_BUILTIN_FCMPLE16, di_ftype_v4hi_v4hi);
11139 def_builtin_const ("__builtin_vis_fcmple32", CODE_FOR_fcmple32di_vis,
11140 SPARC_BUILTIN_FCMPLE32, di_ftype_v2si_v2si);
11141 def_builtin_const ("__builtin_vis_fcmpne16", CODE_FOR_fcmpne16di_vis,
11142 SPARC_BUILTIN_FCMPNE16, di_ftype_v4hi_v4hi);
11143 def_builtin_const ("__builtin_vis_fcmpne32", CODE_FOR_fcmpne32di_vis,
11144 SPARC_BUILTIN_FCMPNE32, di_ftype_v2si_v2si);
11145 def_builtin_const ("__builtin_vis_fcmpgt16", CODE_FOR_fcmpgt16di_vis,
11146 SPARC_BUILTIN_FCMPGT16, di_ftype_v4hi_v4hi);
11147 def_builtin_const ("__builtin_vis_fcmpgt32", CODE_FOR_fcmpgt32di_vis,
11148 SPARC_BUILTIN_FCMPGT32, di_ftype_v2si_v2si);
11149 def_builtin_const ("__builtin_vis_fcmpeq16", CODE_FOR_fcmpeq16di_vis,
11150 SPARC_BUILTIN_FCMPEQ16, di_ftype_v4hi_v4hi);
11151 def_builtin_const ("__builtin_vis_fcmpeq32", CODE_FOR_fcmpeq32di_vis,
11152 SPARC_BUILTIN_FCMPEQ32, di_ftype_v2si_v2si);
11153 }
11154 else
11155 {
11156 def_builtin_const ("__builtin_vis_fcmple16", CODE_FOR_fcmple16si_vis,
11157 SPARC_BUILTIN_FCMPLE16, si_ftype_v4hi_v4hi);
11158 def_builtin_const ("__builtin_vis_fcmple32", CODE_FOR_fcmple32si_vis,
11159 SPARC_BUILTIN_FCMPLE32, si_ftype_v2si_v2si);
11160 def_builtin_const ("__builtin_vis_fcmpne16", CODE_FOR_fcmpne16si_vis,
11161 SPARC_BUILTIN_FCMPNE16, si_ftype_v4hi_v4hi);
11162 def_builtin_const ("__builtin_vis_fcmpne32", CODE_FOR_fcmpne32si_vis,
11163 SPARC_BUILTIN_FCMPNE32, si_ftype_v2si_v2si);
11164 def_builtin_const ("__builtin_vis_fcmpgt16", CODE_FOR_fcmpgt16si_vis,
11165 SPARC_BUILTIN_FCMPGT16, si_ftype_v4hi_v4hi);
11166 def_builtin_const ("__builtin_vis_fcmpgt32", CODE_FOR_fcmpgt32si_vis,
11167 SPARC_BUILTIN_FCMPGT32, si_ftype_v2si_v2si);
11168 def_builtin_const ("__builtin_vis_fcmpeq16", CODE_FOR_fcmpeq16si_vis,
11169 SPARC_BUILTIN_FCMPEQ16, si_ftype_v4hi_v4hi);
11170 def_builtin_const ("__builtin_vis_fcmpeq32", CODE_FOR_fcmpeq32si_vis,
11171 SPARC_BUILTIN_FCMPEQ32, si_ftype_v2si_v2si);
11172 }
11173
11174 /* Addition and subtraction. */
11175 def_builtin_const ("__builtin_vis_fpadd16", CODE_FOR_addv4hi3,
11176 SPARC_BUILTIN_FPADD16, v4hi_ftype_v4hi_v4hi);
11177 def_builtin_const ("__builtin_vis_fpadd16s", CODE_FOR_addv2hi3,
11178 SPARC_BUILTIN_FPADD16S, v2hi_ftype_v2hi_v2hi);
11179 def_builtin_const ("__builtin_vis_fpadd32", CODE_FOR_addv2si3,
11180 SPARC_BUILTIN_FPADD32, v2si_ftype_v2si_v2si);
11181 def_builtin_const ("__builtin_vis_fpadd32s", CODE_FOR_addv1si3,
11182 SPARC_BUILTIN_FPADD32S, v1si_ftype_v1si_v1si);
11183 def_builtin_const ("__builtin_vis_fpsub16", CODE_FOR_subv4hi3,
11184 SPARC_BUILTIN_FPSUB16, v4hi_ftype_v4hi_v4hi);
11185 def_builtin_const ("__builtin_vis_fpsub16s", CODE_FOR_subv2hi3,
11186 SPARC_BUILTIN_FPSUB16S, v2hi_ftype_v2hi_v2hi);
11187 def_builtin_const ("__builtin_vis_fpsub32", CODE_FOR_subv2si3,
11188 SPARC_BUILTIN_FPSUB32, v2si_ftype_v2si_v2si);
11189 def_builtin_const ("__builtin_vis_fpsub32s", CODE_FOR_subv1si3,
11190 SPARC_BUILTIN_FPSUB32S, v1si_ftype_v1si_v1si);
11191
11192 /* Three-dimensional array addressing. */
11193 if (TARGET_ARCH64)
11194 {
11195 def_builtin_const ("__builtin_vis_array8", CODE_FOR_array8di_vis,
11196 SPARC_BUILTIN_ARRAY8, di_ftype_di_di);
11197 def_builtin_const ("__builtin_vis_array16", CODE_FOR_array16di_vis,
11198 SPARC_BUILTIN_ARRAY16, di_ftype_di_di);
11199 def_builtin_const ("__builtin_vis_array32", CODE_FOR_array32di_vis,
11200 SPARC_BUILTIN_ARRAY32, di_ftype_di_di);
11201 }
11202 else
11203 {
11204 def_builtin_const ("__builtin_vis_array8", CODE_FOR_array8si_vis,
11205 SPARC_BUILTIN_ARRAY8, si_ftype_si_si);
11206 def_builtin_const ("__builtin_vis_array16", CODE_FOR_array16si_vis,
11207 SPARC_BUILTIN_ARRAY16, si_ftype_si_si);
11208 def_builtin_const ("__builtin_vis_array32", CODE_FOR_array32si_vis,
11209 SPARC_BUILTIN_ARRAY32, si_ftype_si_si);
11210 }
11211
11212 if (TARGET_VIS2)
11213 {
11214 /* Edge handling. */
11215 if (TARGET_ARCH64)
11216 {
11217 def_builtin_const ("__builtin_vis_edge8n", CODE_FOR_edge8ndi_vis,
11218 SPARC_BUILTIN_EDGE8N, di_ftype_ptr_ptr);
11219 def_builtin_const ("__builtin_vis_edge8ln", CODE_FOR_edge8lndi_vis,
11220 SPARC_BUILTIN_EDGE8LN, di_ftype_ptr_ptr);
11221 def_builtin_const ("__builtin_vis_edge16n", CODE_FOR_edge16ndi_vis,
11222 SPARC_BUILTIN_EDGE16N, di_ftype_ptr_ptr);
11223 def_builtin_const ("__builtin_vis_edge16ln", CODE_FOR_edge16lndi_vis,
11224 SPARC_BUILTIN_EDGE16LN, di_ftype_ptr_ptr);
11225 def_builtin_const ("__builtin_vis_edge32n", CODE_FOR_edge32ndi_vis,
11226 SPARC_BUILTIN_EDGE32N, di_ftype_ptr_ptr);
11227 def_builtin_const ("__builtin_vis_edge32ln", CODE_FOR_edge32lndi_vis,
11228 SPARC_BUILTIN_EDGE32LN, di_ftype_ptr_ptr);
11229 }
11230 else
11231 {
11232 def_builtin_const ("__builtin_vis_edge8n", CODE_FOR_edge8nsi_vis,
11233 SPARC_BUILTIN_EDGE8N, si_ftype_ptr_ptr);
11234 def_builtin_const ("__builtin_vis_edge8ln", CODE_FOR_edge8lnsi_vis,
11235 SPARC_BUILTIN_EDGE8LN, si_ftype_ptr_ptr);
11236 def_builtin_const ("__builtin_vis_edge16n", CODE_FOR_edge16nsi_vis,
11237 SPARC_BUILTIN_EDGE16N, si_ftype_ptr_ptr);
11238 def_builtin_const ("__builtin_vis_edge16ln", CODE_FOR_edge16lnsi_vis,
11239 SPARC_BUILTIN_EDGE16LN, si_ftype_ptr_ptr);
11240 def_builtin_const ("__builtin_vis_edge32n", CODE_FOR_edge32nsi_vis,
11241 SPARC_BUILTIN_EDGE32N, si_ftype_ptr_ptr);
11242 def_builtin_const ("__builtin_vis_edge32ln", CODE_FOR_edge32lnsi_vis,
11243 SPARC_BUILTIN_EDGE32LN, si_ftype_ptr_ptr);
11244 }
11245
11246 /* Byte mask and shuffle. */
11247 if (TARGET_ARCH64)
11248 def_builtin ("__builtin_vis_bmask", CODE_FOR_bmaskdi_vis,
11249 SPARC_BUILTIN_BMASK, di_ftype_di_di);
11250 else
11251 def_builtin ("__builtin_vis_bmask", CODE_FOR_bmasksi_vis,
11252 SPARC_BUILTIN_BMASK, si_ftype_si_si);
11253 def_builtin ("__builtin_vis_bshufflev4hi", CODE_FOR_bshufflev4hi_vis,
11254 SPARC_BUILTIN_BSHUFFLEV4HI, v4hi_ftype_v4hi_v4hi);
11255 def_builtin ("__builtin_vis_bshufflev8qi", CODE_FOR_bshufflev8qi_vis,
11256 SPARC_BUILTIN_BSHUFFLEV8QI, v8qi_ftype_v8qi_v8qi);
11257 def_builtin ("__builtin_vis_bshufflev2si", CODE_FOR_bshufflev2si_vis,
11258 SPARC_BUILTIN_BSHUFFLEV2SI, v2si_ftype_v2si_v2si);
11259 def_builtin ("__builtin_vis_bshuffledi", CODE_FOR_bshufflev1di_vis,
11260 SPARC_BUILTIN_BSHUFFLEDI, di_ftype_di_di);
11261 }
11262
11263 if (TARGET_VIS3)
11264 {
11265 if (TARGET_ARCH64)
11266 {
11267 def_builtin ("__builtin_vis_cmask8", CODE_FOR_cmask8di_vis,
11268 SPARC_BUILTIN_CMASK8, void_ftype_di);
11269 def_builtin ("__builtin_vis_cmask16", CODE_FOR_cmask16di_vis,
11270 SPARC_BUILTIN_CMASK16, void_ftype_di);
11271 def_builtin ("__builtin_vis_cmask32", CODE_FOR_cmask32di_vis,
11272 SPARC_BUILTIN_CMASK32, void_ftype_di);
11273 }
11274 else
11275 {
11276 def_builtin ("__builtin_vis_cmask8", CODE_FOR_cmask8si_vis,
11277 SPARC_BUILTIN_CMASK8, void_ftype_si);
11278 def_builtin ("__builtin_vis_cmask16", CODE_FOR_cmask16si_vis,
11279 SPARC_BUILTIN_CMASK16, void_ftype_si);
11280 def_builtin ("__builtin_vis_cmask32", CODE_FOR_cmask32si_vis,
11281 SPARC_BUILTIN_CMASK32, void_ftype_si);
11282 }
11283
11284 def_builtin_const ("__builtin_vis_fchksm16", CODE_FOR_fchksm16_vis,
11285 SPARC_BUILTIN_FCHKSM16, v4hi_ftype_v4hi_v4hi);
11286
11287 def_builtin_const ("__builtin_vis_fsll16", CODE_FOR_vashlv4hi3,
11288 SPARC_BUILTIN_FSLL16, v4hi_ftype_v4hi_v4hi);
11289 def_builtin_const ("__builtin_vis_fslas16", CODE_FOR_vssashlv4hi3,
11290 SPARC_BUILTIN_FSLAS16, v4hi_ftype_v4hi_v4hi);
11291 def_builtin_const ("__builtin_vis_fsrl16", CODE_FOR_vlshrv4hi3,
11292 SPARC_BUILTIN_FSRL16, v4hi_ftype_v4hi_v4hi);
11293 def_builtin_const ("__builtin_vis_fsra16", CODE_FOR_vashrv4hi3,
11294 SPARC_BUILTIN_FSRA16, v4hi_ftype_v4hi_v4hi);
11295 def_builtin_const ("__builtin_vis_fsll32", CODE_FOR_vashlv2si3,
11296 SPARC_BUILTIN_FSLL32, v2si_ftype_v2si_v2si);
11297 def_builtin_const ("__builtin_vis_fslas32", CODE_FOR_vssashlv2si3,
11298 SPARC_BUILTIN_FSLAS32, v2si_ftype_v2si_v2si);
11299 def_builtin_const ("__builtin_vis_fsrl32", CODE_FOR_vlshrv2si3,
11300 SPARC_BUILTIN_FSRL32, v2si_ftype_v2si_v2si);
11301 def_builtin_const ("__builtin_vis_fsra32", CODE_FOR_vashrv2si3,
11302 SPARC_BUILTIN_FSRA32, v2si_ftype_v2si_v2si);
11303
11304 if (TARGET_ARCH64)
11305 def_builtin_const ("__builtin_vis_pdistn", CODE_FOR_pdistndi_vis,
11306 SPARC_BUILTIN_PDISTN, di_ftype_v8qi_v8qi);
11307 else
11308 def_builtin_const ("__builtin_vis_pdistn", CODE_FOR_pdistnsi_vis,
11309 SPARC_BUILTIN_PDISTN, si_ftype_v8qi_v8qi);
11310
11311 def_builtin_const ("__builtin_vis_fmean16", CODE_FOR_fmean16_vis,
11312 SPARC_BUILTIN_FMEAN16, v4hi_ftype_v4hi_v4hi);
11313 def_builtin_const ("__builtin_vis_fpadd64", CODE_FOR_fpadd64_vis,
11314 SPARC_BUILTIN_FPADD64, di_ftype_di_di);
11315 def_builtin_const ("__builtin_vis_fpsub64", CODE_FOR_fpsub64_vis,
11316 SPARC_BUILTIN_FPSUB64, di_ftype_di_di);
11317
11318 def_builtin_const ("__builtin_vis_fpadds16", CODE_FOR_ssaddv4hi3,
11319 SPARC_BUILTIN_FPADDS16, v4hi_ftype_v4hi_v4hi);
11320 def_builtin_const ("__builtin_vis_fpadds16s", CODE_FOR_ssaddv2hi3,
11321 SPARC_BUILTIN_FPADDS16S, v2hi_ftype_v2hi_v2hi);
11322 def_builtin_const ("__builtin_vis_fpsubs16", CODE_FOR_sssubv4hi3,
11323 SPARC_BUILTIN_FPSUBS16, v4hi_ftype_v4hi_v4hi);
11324 def_builtin_const ("__builtin_vis_fpsubs16s", CODE_FOR_sssubv2hi3,
11325 SPARC_BUILTIN_FPSUBS16S, v2hi_ftype_v2hi_v2hi);
11326 def_builtin_const ("__builtin_vis_fpadds32", CODE_FOR_ssaddv2si3,
11327 SPARC_BUILTIN_FPADDS32, v2si_ftype_v2si_v2si);
11328 def_builtin_const ("__builtin_vis_fpadds32s", CODE_FOR_ssaddv1si3,
11329 SPARC_BUILTIN_FPADDS32S, v1si_ftype_v1si_v1si);
11330 def_builtin_const ("__builtin_vis_fpsubs32", CODE_FOR_sssubv2si3,
11331 SPARC_BUILTIN_FPSUBS32, v2si_ftype_v2si_v2si);
11332 def_builtin_const ("__builtin_vis_fpsubs32s", CODE_FOR_sssubv1si3,
11333 SPARC_BUILTIN_FPSUBS32S, v1si_ftype_v1si_v1si);
11334
11335 if (TARGET_ARCH64)
11336 {
11337 def_builtin_const ("__builtin_vis_fucmple8", CODE_FOR_fucmple8di_vis,
11338 SPARC_BUILTIN_FUCMPLE8, di_ftype_v8qi_v8qi);
11339 def_builtin_const ("__builtin_vis_fucmpne8", CODE_FOR_fucmpne8di_vis,
11340 SPARC_BUILTIN_FUCMPNE8, di_ftype_v8qi_v8qi);
11341 def_builtin_const ("__builtin_vis_fucmpgt8", CODE_FOR_fucmpgt8di_vis,
11342 SPARC_BUILTIN_FUCMPGT8, di_ftype_v8qi_v8qi);
11343 def_builtin_const ("__builtin_vis_fucmpeq8", CODE_FOR_fucmpeq8di_vis,
11344 SPARC_BUILTIN_FUCMPEQ8, di_ftype_v8qi_v8qi);
11345 }
11346 else
11347 {
11348 def_builtin_const ("__builtin_vis_fucmple8", CODE_FOR_fucmple8si_vis,
11349 SPARC_BUILTIN_FUCMPLE8, si_ftype_v8qi_v8qi);
11350 def_builtin_const ("__builtin_vis_fucmpne8", CODE_FOR_fucmpne8si_vis,
11351 SPARC_BUILTIN_FUCMPNE8, si_ftype_v8qi_v8qi);
11352 def_builtin_const ("__builtin_vis_fucmpgt8", CODE_FOR_fucmpgt8si_vis,
11353 SPARC_BUILTIN_FUCMPGT8, si_ftype_v8qi_v8qi);
11354 def_builtin_const ("__builtin_vis_fucmpeq8", CODE_FOR_fucmpeq8si_vis,
11355 SPARC_BUILTIN_FUCMPEQ8, si_ftype_v8qi_v8qi);
11356 }
11357
11358 def_builtin_const ("__builtin_vis_fhadds", CODE_FOR_fhaddsf_vis,
11359 SPARC_BUILTIN_FHADDS, sf_ftype_sf_sf);
11360 def_builtin_const ("__builtin_vis_fhaddd", CODE_FOR_fhadddf_vis,
11361 SPARC_BUILTIN_FHADDD, df_ftype_df_df);
11362 def_builtin_const ("__builtin_vis_fhsubs", CODE_FOR_fhsubsf_vis,
11363 SPARC_BUILTIN_FHSUBS, sf_ftype_sf_sf);
11364 def_builtin_const ("__builtin_vis_fhsubd", CODE_FOR_fhsubdf_vis,
11365 SPARC_BUILTIN_FHSUBD, df_ftype_df_df);
11366 def_builtin_const ("__builtin_vis_fnhadds", CODE_FOR_fnhaddsf_vis,
11367 SPARC_BUILTIN_FNHADDS, sf_ftype_sf_sf);
11368 def_builtin_const ("__builtin_vis_fnhaddd", CODE_FOR_fnhadddf_vis,
11369 SPARC_BUILTIN_FNHADDD, df_ftype_df_df);
11370
11371 def_builtin_const ("__builtin_vis_umulxhi", CODE_FOR_umulxhi_vis,
11372 SPARC_BUILTIN_UMULXHI, di_ftype_di_di);
11373 def_builtin_const ("__builtin_vis_xmulx", CODE_FOR_xmulx_vis,
11374 SPARC_BUILTIN_XMULX, di_ftype_di_di);
11375 def_builtin_const ("__builtin_vis_xmulxhi", CODE_FOR_xmulxhi_vis,
11376 SPARC_BUILTIN_XMULXHI, di_ftype_di_di);
11377 }
11378
11379 if (TARGET_VIS4)
11380 {
11381 def_builtin_const ("__builtin_vis_fpadd8", CODE_FOR_addv8qi3,
11382 SPARC_BUILTIN_FPADD8, v8qi_ftype_v8qi_v8qi);
11383 def_builtin_const ("__builtin_vis_fpadds8", CODE_FOR_ssaddv8qi3,
11384 SPARC_BUILTIN_FPADDS8, v8qi_ftype_v8qi_v8qi);
11385 def_builtin_const ("__builtin_vis_fpaddus8", CODE_FOR_usaddv8qi3,
11386 SPARC_BUILTIN_FPADDUS8, v8qi_ftype_v8qi_v8qi);
11387 def_builtin_const ("__builtin_vis_fpaddus16", CODE_FOR_usaddv4hi3,
11388 SPARC_BUILTIN_FPADDUS16, v4hi_ftype_v4hi_v4hi);
11389
11390
11391 if (TARGET_ARCH64)
11392 {
11393 def_builtin_const ("__builtin_vis_fpcmple8", CODE_FOR_fpcmple8di_vis,
11394 SPARC_BUILTIN_FPCMPLE8, di_ftype_v8qi_v8qi);
11395 def_builtin_const ("__builtin_vis_fpcmpgt8", CODE_FOR_fpcmpgt8di_vis,
11396 SPARC_BUILTIN_FPCMPGT8, di_ftype_v8qi_v8qi);
11397 def_builtin_const ("__builtin_vis_fpcmpule16", CODE_FOR_fpcmpule16di_vis,
11398 SPARC_BUILTIN_FPCMPULE16, di_ftype_v4hi_v4hi);
11399 def_builtin_const ("__builtin_vis_fpcmpugt16", CODE_FOR_fpcmpugt16di_vis,
11400 SPARC_BUILTIN_FPCMPUGT16, di_ftype_v4hi_v4hi);
11401 def_builtin_const ("__builtin_vis_fpcmpule32", CODE_FOR_fpcmpule32di_vis,
11402 SPARC_BUILTIN_FPCMPULE32, di_ftype_v2si_v2si);
11403 def_builtin_const ("__builtin_vis_fpcmpugt32", CODE_FOR_fpcmpugt32di_vis,
11404 SPARC_BUILTIN_FPCMPUGT32, di_ftype_v2si_v2si);
11405 }
11406 else
11407 {
11408 def_builtin_const ("__builtin_vis_fpcmple8", CODE_FOR_fpcmple8si_vis,
11409 SPARC_BUILTIN_FPCMPLE8, si_ftype_v8qi_v8qi);
11410 def_builtin_const ("__builtin_vis_fpcmpgt8", CODE_FOR_fpcmpgt8si_vis,
11411 SPARC_BUILTIN_FPCMPGT8, si_ftype_v8qi_v8qi);
11412 def_builtin_const ("__builtin_vis_fpcmpule16", CODE_FOR_fpcmpule16si_vis,
11413 SPARC_BUILTIN_FPCMPULE16, si_ftype_v4hi_v4hi);
11414 def_builtin_const ("__builtin_vis_fpcmpugt16", CODE_FOR_fpcmpugt16si_vis,
11415 SPARC_BUILTIN_FPCMPUGT16, si_ftype_v4hi_v4hi);
11416 def_builtin_const ("__builtin_vis_fpcmpule32", CODE_FOR_fpcmpule32si_vis,
11417 SPARC_BUILTIN_FPCMPULE32, di_ftype_v2si_v2si);
11418 def_builtin_const ("__builtin_vis_fpcmpugt32", CODE_FOR_fpcmpugt32si_vis,
11419 SPARC_BUILTIN_FPCMPUGT32, di_ftype_v2si_v2si);
11420 }
11421
11422 def_builtin_const ("__builtin_vis_fpmax8", CODE_FOR_maxv8qi3,
11423 SPARC_BUILTIN_FPMAX8, v8qi_ftype_v8qi_v8qi);
11424 def_builtin_const ("__builtin_vis_fpmax16", CODE_FOR_maxv4hi3,
11425 SPARC_BUILTIN_FPMAX16, v4hi_ftype_v4hi_v4hi);
11426 def_builtin_const ("__builtin_vis_fpmax32", CODE_FOR_maxv2si3,
11427 SPARC_BUILTIN_FPMAX32, v2si_ftype_v2si_v2si);
11428 def_builtin_const ("__builtin_vis_fpmaxu8", CODE_FOR_maxuv8qi3,
11429 SPARC_BUILTIN_FPMAXU8, v8qi_ftype_v8qi_v8qi);
11430 def_builtin_const ("__builtin_vis_fpmaxu16", CODE_FOR_maxuv4hi3,
11431 SPARC_BUILTIN_FPMAXU16, v4hi_ftype_v4hi_v4hi);
11432 def_builtin_const ("__builtin_vis_fpmaxu32", CODE_FOR_maxuv2si3,
11433 SPARC_BUILTIN_FPMAXU32, v2si_ftype_v2si_v2si);
11434 def_builtin_const ("__builtin_vis_fpmin8", CODE_FOR_minv8qi3,
11435 SPARC_BUILTIN_FPMIN8, v8qi_ftype_v8qi_v8qi);
11436 def_builtin_const ("__builtin_vis_fpmin16", CODE_FOR_minv4hi3,
11437 SPARC_BUILTIN_FPMIN16, v4hi_ftype_v4hi_v4hi);
11438 def_builtin_const ("__builtin_vis_fpmin32", CODE_FOR_minv2si3,
11439 SPARC_BUILTIN_FPMIN32, v2si_ftype_v2si_v2si);
11440 def_builtin_const ("__builtin_vis_fpminu8", CODE_FOR_minuv8qi3,
11441 SPARC_BUILTIN_FPMINU8, v8qi_ftype_v8qi_v8qi);
11442 def_builtin_const ("__builtin_vis_fpminu16", CODE_FOR_minuv4hi3,
11443 SPARC_BUILTIN_FPMINU16, v4hi_ftype_v4hi_v4hi);
11444 def_builtin_const ("__builtin_vis_fpminu32", CODE_FOR_minuv2si3,
11445 SPARC_BUILTIN_FPMINU32, v2si_ftype_v2si_v2si);
11446 def_builtin_const ("__builtin_vis_fpsub8", CODE_FOR_subv8qi3,
11447 SPARC_BUILTIN_FPSUB8, v8qi_ftype_v8qi_v8qi);
11448 def_builtin_const ("__builtin_vis_fpsubs8", CODE_FOR_sssubv8qi3,
11449 SPARC_BUILTIN_FPSUBS8, v8qi_ftype_v8qi_v8qi);
11450 def_builtin_const ("__builtin_vis_fpsubus8", CODE_FOR_ussubv8qi3,
11451 SPARC_BUILTIN_FPSUBUS8, v8qi_ftype_v8qi_v8qi);
11452 def_builtin_const ("__builtin_vis_fpsubus16", CODE_FOR_ussubv4hi3,
11453 SPARC_BUILTIN_FPSUBUS16, v4hi_ftype_v4hi_v4hi);
11454 }
11455
11456 if (TARGET_VIS4B)
11457 {
11458 def_builtin_const ("__builtin_vis_dictunpack8", CODE_FOR_dictunpack8,
11459 SPARC_BUILTIN_DICTUNPACK8, v8qi_ftype_df_si);
11460 def_builtin_const ("__builtin_vis_dictunpack16", CODE_FOR_dictunpack16,
11461 SPARC_BUILTIN_DICTUNPACK16, v4hi_ftype_df_si);
11462 def_builtin_const ("__builtin_vis_dictunpack32", CODE_FOR_dictunpack32,
11463 SPARC_BUILTIN_DICTUNPACK32, v2si_ftype_df_si);
11464
11465 if (TARGET_ARCH64)
11466 {
11467 tree di_ftype_v8qi_v8qi_si = build_function_type_list (intDI_type_node,
11468 v8qi, v8qi,
11469 intSI_type_node, 0);
11470 tree di_ftype_v4hi_v4hi_si = build_function_type_list (intDI_type_node,
11471 v4hi, v4hi,
11472 intSI_type_node, 0);
11473 tree di_ftype_v2si_v2si_si = build_function_type_list (intDI_type_node,
11474 v2si, v2si,
11475 intSI_type_node, 0);
11476
11477 def_builtin_const ("__builtin_vis_fpcmple8shl", CODE_FOR_fpcmple8dishl,
11478 SPARC_BUILTIN_FPCMPLE8SHL, di_ftype_v8qi_v8qi_si);
11479 def_builtin_const ("__builtin_vis_fpcmpgt8shl", CODE_FOR_fpcmpgt8dishl,
11480 SPARC_BUILTIN_FPCMPGT8SHL, di_ftype_v8qi_v8qi_si);
11481 def_builtin_const ("__builtin_vis_fpcmpeq8shl", CODE_FOR_fpcmpeq8dishl,
11482 SPARC_BUILTIN_FPCMPEQ8SHL, di_ftype_v8qi_v8qi_si);
11483 def_builtin_const ("__builtin_vis_fpcmpne8shl", CODE_FOR_fpcmpne8dishl,
11484 SPARC_BUILTIN_FPCMPNE8SHL, di_ftype_v8qi_v8qi_si);
11485
11486 def_builtin_const ("__builtin_vis_fpcmple16shl", CODE_FOR_fpcmple16dishl,
11487 SPARC_BUILTIN_FPCMPLE16SHL, di_ftype_v4hi_v4hi_si);
11488 def_builtin_const ("__builtin_vis_fpcmpgt16shl", CODE_FOR_fpcmpgt16dishl,
11489 SPARC_BUILTIN_FPCMPGT16SHL, di_ftype_v4hi_v4hi_si);
11490 def_builtin_const ("__builtin_vis_fpcmpeq16shl", CODE_FOR_fpcmpeq16dishl,
11491 SPARC_BUILTIN_FPCMPEQ16SHL, di_ftype_v4hi_v4hi_si);
11492 def_builtin_const ("__builtin_vis_fpcmpne16shl", CODE_FOR_fpcmpne16dishl,
11493 SPARC_BUILTIN_FPCMPNE16SHL, di_ftype_v4hi_v4hi_si);
11494
11495 def_builtin_const ("__builtin_vis_fpcmple32shl", CODE_FOR_fpcmple32dishl,
11496 SPARC_BUILTIN_FPCMPLE32SHL, di_ftype_v2si_v2si_si);
11497 def_builtin_const ("__builtin_vis_fpcmpgt32shl", CODE_FOR_fpcmpgt32dishl,
11498 SPARC_BUILTIN_FPCMPGT32SHL, di_ftype_v2si_v2si_si);
11499 def_builtin_const ("__builtin_vis_fpcmpeq32shl", CODE_FOR_fpcmpeq32dishl,
11500 SPARC_BUILTIN_FPCMPEQ32SHL, di_ftype_v2si_v2si_si);
11501 def_builtin_const ("__builtin_vis_fpcmpne32shl", CODE_FOR_fpcmpne32dishl,
11502 SPARC_BUILTIN_FPCMPNE32SHL, di_ftype_v2si_v2si_si);
11503
11504
11505 def_builtin_const ("__builtin_vis_fpcmpule8shl", CODE_FOR_fpcmpule8dishl,
11506 SPARC_BUILTIN_FPCMPULE8SHL, di_ftype_v8qi_v8qi_si);
11507 def_builtin_const ("__builtin_vis_fpcmpugt8shl", CODE_FOR_fpcmpugt8dishl,
11508 SPARC_BUILTIN_FPCMPUGT8SHL, di_ftype_v8qi_v8qi_si);
11509
11510 def_builtin_const ("__builtin_vis_fpcmpule16shl", CODE_FOR_fpcmpule16dishl,
11511 SPARC_BUILTIN_FPCMPULE16SHL, di_ftype_v4hi_v4hi_si);
11512 def_builtin_const ("__builtin_vis_fpcmpugt16shl", CODE_FOR_fpcmpugt16dishl,
11513 SPARC_BUILTIN_FPCMPUGT16SHL, di_ftype_v4hi_v4hi_si);
11514
11515 def_builtin_const ("__builtin_vis_fpcmpule32shl", CODE_FOR_fpcmpule32dishl,
11516 SPARC_BUILTIN_FPCMPULE32SHL, di_ftype_v2si_v2si_si);
11517 def_builtin_const ("__builtin_vis_fpcmpugt32shl", CODE_FOR_fpcmpugt32dishl,
11518 SPARC_BUILTIN_FPCMPUGT32SHL, di_ftype_v2si_v2si_si);
11519
11520 def_builtin_const ("__builtin_vis_fpcmpde8shl", CODE_FOR_fpcmpde8dishl,
11521 SPARC_BUILTIN_FPCMPDE8SHL, di_ftype_v8qi_v8qi_si);
11522 def_builtin_const ("__builtin_vis_fpcmpde16shl", CODE_FOR_fpcmpde16dishl,
11523 SPARC_BUILTIN_FPCMPDE16SHL, di_ftype_v4hi_v4hi_si);
11524 def_builtin_const ("__builtin_vis_fpcmpde32shl", CODE_FOR_fpcmpde32dishl,
11525 SPARC_BUILTIN_FPCMPDE32SHL, di_ftype_v2si_v2si_si);
11526
11527 def_builtin_const ("__builtin_vis_fpcmpur8shl", CODE_FOR_fpcmpur8dishl,
11528 SPARC_BUILTIN_FPCMPUR8SHL, di_ftype_v8qi_v8qi_si);
11529 def_builtin_const ("__builtin_vis_fpcmpur16shl", CODE_FOR_fpcmpur16dishl,
11530 SPARC_BUILTIN_FPCMPUR16SHL, di_ftype_v4hi_v4hi_si);
11531 def_builtin_const ("__builtin_vis_fpcmpur32shl", CODE_FOR_fpcmpur32dishl,
11532 SPARC_BUILTIN_FPCMPUR32SHL, di_ftype_v2si_v2si_si);
11533
11534 }
11535 else
11536 {
11537 tree si_ftype_v8qi_v8qi_si = build_function_type_list (intSI_type_node,
11538 v8qi, v8qi,
11539 intSI_type_node, 0);
11540 tree si_ftype_v4hi_v4hi_si = build_function_type_list (intSI_type_node,
11541 v4hi, v4hi,
11542 intSI_type_node, 0);
11543 tree si_ftype_v2si_v2si_si = build_function_type_list (intSI_type_node,
11544 v2si, v2si,
11545 intSI_type_node, 0);
11546
11547 def_builtin_const ("__builtin_vis_fpcmple8shl", CODE_FOR_fpcmple8sishl,
11548 SPARC_BUILTIN_FPCMPLE8SHL, si_ftype_v8qi_v8qi_si);
11549 def_builtin_const ("__builtin_vis_fpcmpgt8shl", CODE_FOR_fpcmpgt8sishl,
11550 SPARC_BUILTIN_FPCMPGT8SHL, si_ftype_v8qi_v8qi_si);
11551 def_builtin_const ("__builtin_vis_fpcmpeq8shl", CODE_FOR_fpcmpeq8sishl,
11552 SPARC_BUILTIN_FPCMPEQ8SHL, si_ftype_v8qi_v8qi_si);
11553 def_builtin_const ("__builtin_vis_fpcmpne8shl", CODE_FOR_fpcmpne8sishl,
11554 SPARC_BUILTIN_FPCMPNE8SHL, si_ftype_v8qi_v8qi_si);
11555
11556 def_builtin_const ("__builtin_vis_fpcmple16shl", CODE_FOR_fpcmple16sishl,
11557 SPARC_BUILTIN_FPCMPLE16SHL, si_ftype_v4hi_v4hi_si);
11558 def_builtin_const ("__builtin_vis_fpcmpgt16shl", CODE_FOR_fpcmpgt16sishl,
11559 SPARC_BUILTIN_FPCMPGT16SHL, si_ftype_v4hi_v4hi_si);
11560 def_builtin_const ("__builtin_vis_fpcmpeq16shl", CODE_FOR_fpcmpeq16sishl,
11561 SPARC_BUILTIN_FPCMPEQ16SHL, si_ftype_v4hi_v4hi_si);
11562 def_builtin_const ("__builtin_vis_fpcmpne16shl", CODE_FOR_fpcmpne16sishl,
11563 SPARC_BUILTIN_FPCMPNE16SHL, si_ftype_v4hi_v4hi_si);
11564
11565 def_builtin_const ("__builtin_vis_fpcmple32shl", CODE_FOR_fpcmple32sishl,
11566 SPARC_BUILTIN_FPCMPLE32SHL, si_ftype_v2si_v2si_si);
11567 def_builtin_const ("__builtin_vis_fpcmpgt32shl", CODE_FOR_fpcmpgt32sishl,
11568 SPARC_BUILTIN_FPCMPGT32SHL, si_ftype_v2si_v2si_si);
11569 def_builtin_const ("__builtin_vis_fpcmpeq32shl", CODE_FOR_fpcmpeq32sishl,
11570 SPARC_BUILTIN_FPCMPEQ32SHL, si_ftype_v2si_v2si_si);
11571 def_builtin_const ("__builtin_vis_fpcmpne32shl", CODE_FOR_fpcmpne32sishl,
11572 SPARC_BUILTIN_FPCMPNE32SHL, si_ftype_v2si_v2si_si);
11573
11574
11575 def_builtin_const ("__builtin_vis_fpcmpule8shl", CODE_FOR_fpcmpule8sishl,
11576 SPARC_BUILTIN_FPCMPULE8SHL, si_ftype_v8qi_v8qi_si);
11577 def_builtin_const ("__builtin_vis_fpcmpugt8shl", CODE_FOR_fpcmpugt8sishl,
11578 SPARC_BUILTIN_FPCMPUGT8SHL, si_ftype_v8qi_v8qi_si);
11579
11580 def_builtin_const ("__builtin_vis_fpcmpule16shl", CODE_FOR_fpcmpule16sishl,
11581 SPARC_BUILTIN_FPCMPULE16SHL, si_ftype_v4hi_v4hi_si);
11582 def_builtin_const ("__builtin_vis_fpcmpugt16shl", CODE_FOR_fpcmpugt16sishl,
11583 SPARC_BUILTIN_FPCMPUGT16SHL, si_ftype_v4hi_v4hi_si);
11584
11585 def_builtin_const ("__builtin_vis_fpcmpule32shl", CODE_FOR_fpcmpule32sishl,
11586 SPARC_BUILTIN_FPCMPULE32SHL, si_ftype_v2si_v2si_si);
11587 def_builtin_const ("__builtin_vis_fpcmpugt32shl", CODE_FOR_fpcmpugt32sishl,
11588 SPARC_BUILTIN_FPCMPUGT32SHL, si_ftype_v2si_v2si_si);
11589
11590 def_builtin_const ("__builtin_vis_fpcmpde8shl", CODE_FOR_fpcmpde8sishl,
11591 SPARC_BUILTIN_FPCMPDE8SHL, si_ftype_v8qi_v8qi_si);
11592 def_builtin_const ("__builtin_vis_fpcmpde16shl", CODE_FOR_fpcmpde16sishl,
11593 SPARC_BUILTIN_FPCMPDE16SHL, si_ftype_v4hi_v4hi_si);
11594 def_builtin_const ("__builtin_vis_fpcmpde32shl", CODE_FOR_fpcmpde32sishl,
11595 SPARC_BUILTIN_FPCMPDE32SHL, si_ftype_v2si_v2si_si);
11596
11597 def_builtin_const ("__builtin_vis_fpcmpur8shl", CODE_FOR_fpcmpur8sishl,
11598 SPARC_BUILTIN_FPCMPUR8SHL, si_ftype_v8qi_v8qi_si);
11599 def_builtin_const ("__builtin_vis_fpcmpur16shl", CODE_FOR_fpcmpur16sishl,
11600 SPARC_BUILTIN_FPCMPUR16SHL, si_ftype_v4hi_v4hi_si);
11601 def_builtin_const ("__builtin_vis_fpcmpur32shl", CODE_FOR_fpcmpur32sishl,
11602 SPARC_BUILTIN_FPCMPUR32SHL, si_ftype_v2si_v2si_si);
11603 }
11604 }
11605 }
11606
11607 /* Implement TARGET_BUILTIN_DECL hook. */
11608
11609 static tree
sparc_builtin_decl(unsigned code,bool initialize_p ATTRIBUTE_UNUSED)11610 sparc_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
11611 {
11612 if (code >= SPARC_BUILTIN_MAX)
11613 return error_mark_node;
11614
11615 return sparc_builtins[code];
11616 }
11617
11618 /* Implemented TARGET_EXPAND_BUILTIN hook. */
11619
11620 static rtx
sparc_expand_builtin(tree exp,rtx target,rtx subtarget ATTRIBUTE_UNUSED,machine_mode tmode ATTRIBUTE_UNUSED,int ignore ATTRIBUTE_UNUSED)11621 sparc_expand_builtin (tree exp, rtx target,
11622 rtx subtarget ATTRIBUTE_UNUSED,
11623 machine_mode tmode ATTRIBUTE_UNUSED,
11624 int ignore ATTRIBUTE_UNUSED)
11625 {
11626 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
11627 enum sparc_builtins code = (enum sparc_builtins) DECL_FUNCTION_CODE (fndecl);
11628 enum insn_code icode = sparc_builtins_icode[code];
11629 bool nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
11630 call_expr_arg_iterator iter;
11631 int arg_count = 0;
11632 rtx pat, op[4];
11633 tree arg;
11634
11635 if (nonvoid)
11636 {
11637 machine_mode tmode = insn_data[icode].operand[0].mode;
11638 if (!target
11639 || GET_MODE (target) != tmode
11640 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11641 op[0] = gen_reg_rtx (tmode);
11642 else
11643 op[0] = target;
11644 }
11645
11646 FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
11647 {
11648 const struct insn_operand_data *insn_op;
11649 int idx;
11650
11651 if (arg == error_mark_node)
11652 return NULL_RTX;
11653
11654 arg_count++;
11655 idx = arg_count - !nonvoid;
11656 insn_op = &insn_data[icode].operand[idx];
11657 op[arg_count] = expand_normal (arg);
11658
11659 /* Some of the builtins require constant arguments. We check
11660 for this here. */
11661 if ((code >= SPARC_BUILTIN_FIRST_FPCMPSHL
11662 && code <= SPARC_BUILTIN_LAST_FPCMPSHL
11663 && arg_count == 3)
11664 || (code >= SPARC_BUILTIN_FIRST_DICTUNPACK
11665 && code <= SPARC_BUILTIN_LAST_DICTUNPACK
11666 && arg_count == 2))
11667 {
11668 if (!check_constant_argument (icode, idx, op[arg_count]))
11669 return const0_rtx;
11670 }
11671
11672 if (code == SPARC_BUILTIN_LDFSR || code == SPARC_BUILTIN_STFSR)
11673 {
11674 if (!address_operand (op[arg_count], SImode))
11675 {
11676 op[arg_count] = convert_memory_address (Pmode, op[arg_count]);
11677 op[arg_count] = copy_addr_to_reg (op[arg_count]);
11678 }
11679 op[arg_count] = gen_rtx_MEM (SImode, op[arg_count]);
11680 }
11681
11682 else if (insn_op->mode == V1DImode
11683 && GET_MODE (op[arg_count]) == DImode)
11684 op[arg_count] = gen_lowpart (V1DImode, op[arg_count]);
11685
11686 else if (insn_op->mode == V1SImode
11687 && GET_MODE (op[arg_count]) == SImode)
11688 op[arg_count] = gen_lowpart (V1SImode, op[arg_count]);
11689
11690 if (! (*insn_data[icode].operand[idx].predicate) (op[arg_count],
11691 insn_op->mode))
11692 op[arg_count] = copy_to_mode_reg (insn_op->mode, op[arg_count]);
11693 }
11694
11695 switch (arg_count)
11696 {
11697 case 0:
11698 pat = GEN_FCN (icode) (op[0]);
11699 break;
11700 case 1:
11701 if (nonvoid)
11702 pat = GEN_FCN (icode) (op[0], op[1]);
11703 else
11704 pat = GEN_FCN (icode) (op[1]);
11705 break;
11706 case 2:
11707 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
11708 break;
11709 case 3:
11710 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
11711 break;
11712 default:
11713 gcc_unreachable ();
11714 }
11715
11716 if (!pat)
11717 return NULL_RTX;
11718
11719 emit_insn (pat);
11720
11721 return (nonvoid ? op[0] : const0_rtx);
11722 }
11723
11724 /* Return the upper 16 bits of the 8x16 multiplication. */
11725
11726 static int
sparc_vis_mul8x16(int e8,int e16)11727 sparc_vis_mul8x16 (int e8, int e16)
11728 {
11729 return (e8 * e16 + 128) / 256;
11730 }
11731
11732 /* Multiply the VECTOR_CSTs CST0 and CST1 as specified by FNCODE and put
11733 the result into the array N_ELTS, whose elements are of INNER_TYPE. */
11734
11735 static void
sparc_handle_vis_mul8x16(vec<tree> * n_elts,enum sparc_builtins fncode,tree inner_type,tree cst0,tree cst1)11736 sparc_handle_vis_mul8x16 (vec<tree> *n_elts, enum sparc_builtins fncode,
11737 tree inner_type, tree cst0, tree cst1)
11738 {
11739 unsigned i, num = VECTOR_CST_NELTS (cst0);
11740 int scale;
11741
11742 switch (fncode)
11743 {
11744 case SPARC_BUILTIN_FMUL8X16:
11745 for (i = 0; i < num; ++i)
11746 {
11747 int val
11748 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
11749 TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, i)));
11750 n_elts->quick_push (build_int_cst (inner_type, val));
11751 }
11752 break;
11753
11754 case SPARC_BUILTIN_FMUL8X16AU:
11755 scale = TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, 0));
11756
11757 for (i = 0; i < num; ++i)
11758 {
11759 int val
11760 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
11761 scale);
11762 n_elts->quick_push (build_int_cst (inner_type, val));
11763 }
11764 break;
11765
11766 case SPARC_BUILTIN_FMUL8X16AL:
11767 scale = TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, 1));
11768
11769 for (i = 0; i < num; ++i)
11770 {
11771 int val
11772 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
11773 scale);
11774 n_elts->quick_push (build_int_cst (inner_type, val));
11775 }
11776 break;
11777
11778 default:
11779 gcc_unreachable ();
11780 }
11781 }
11782
11783 /* Implement TARGET_FOLD_BUILTIN hook.
11784
11785 Fold builtin functions for SPARC intrinsics. If IGNORE is true the
11786 result of the function call is ignored. NULL_TREE is returned if the
11787 function could not be folded. */
11788
11789 static tree
sparc_fold_builtin(tree fndecl,int n_args ATTRIBUTE_UNUSED,tree * args,bool ignore)11790 sparc_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED,
11791 tree *args, bool ignore)
11792 {
11793 enum sparc_builtins code = (enum sparc_builtins) DECL_FUNCTION_CODE (fndecl);
11794 tree rtype = TREE_TYPE (TREE_TYPE (fndecl));
11795 tree arg0, arg1, arg2;
11796
11797 if (ignore)
11798 switch (code)
11799 {
11800 case SPARC_BUILTIN_LDFSR:
11801 case SPARC_BUILTIN_STFSR:
11802 case SPARC_BUILTIN_ALIGNADDR:
11803 case SPARC_BUILTIN_WRGSR:
11804 case SPARC_BUILTIN_BMASK:
11805 case SPARC_BUILTIN_CMASK8:
11806 case SPARC_BUILTIN_CMASK16:
11807 case SPARC_BUILTIN_CMASK32:
11808 break;
11809
11810 default:
11811 return build_zero_cst (rtype);
11812 }
11813
11814 switch (code)
11815 {
11816 case SPARC_BUILTIN_FEXPAND:
11817 arg0 = args[0];
11818 STRIP_NOPS (arg0);
11819
11820 if (TREE_CODE (arg0) == VECTOR_CST)
11821 {
11822 tree inner_type = TREE_TYPE (rtype);
11823 unsigned i;
11824
11825 tree_vector_builder n_elts (rtype, VECTOR_CST_NELTS (arg0), 1);
11826 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
11827 {
11828 unsigned HOST_WIDE_INT val
11829 = TREE_INT_CST_LOW (VECTOR_CST_ELT (arg0, i));
11830 n_elts.quick_push (build_int_cst (inner_type, val << 4));
11831 }
11832 return n_elts.build ();
11833 }
11834 break;
11835
11836 case SPARC_BUILTIN_FMUL8X16:
11837 case SPARC_BUILTIN_FMUL8X16AU:
11838 case SPARC_BUILTIN_FMUL8X16AL:
11839 arg0 = args[0];
11840 arg1 = args[1];
11841 STRIP_NOPS (arg0);
11842 STRIP_NOPS (arg1);
11843
11844 if (TREE_CODE (arg0) == VECTOR_CST && TREE_CODE (arg1) == VECTOR_CST)
11845 {
11846 tree inner_type = TREE_TYPE (rtype);
11847 tree_vector_builder n_elts (rtype, VECTOR_CST_NELTS (arg0), 1);
11848 sparc_handle_vis_mul8x16 (&n_elts, code, inner_type, arg0, arg1);
11849 return n_elts.build ();
11850 }
11851 break;
11852
11853 case SPARC_BUILTIN_FPMERGE:
11854 arg0 = args[0];
11855 arg1 = args[1];
11856 STRIP_NOPS (arg0);
11857 STRIP_NOPS (arg1);
11858
11859 if (TREE_CODE (arg0) == VECTOR_CST && TREE_CODE (arg1) == VECTOR_CST)
11860 {
11861 tree_vector_builder n_elts (rtype, 2 * VECTOR_CST_NELTS (arg0), 1);
11862 unsigned i;
11863 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
11864 {
11865 n_elts.quick_push (VECTOR_CST_ELT (arg0, i));
11866 n_elts.quick_push (VECTOR_CST_ELT (arg1, i));
11867 }
11868
11869 return n_elts.build ();
11870 }
11871 break;
11872
11873 case SPARC_BUILTIN_PDIST:
11874 case SPARC_BUILTIN_PDISTN:
11875 arg0 = args[0];
11876 arg1 = args[1];
11877 STRIP_NOPS (arg0);
11878 STRIP_NOPS (arg1);
11879 if (code == SPARC_BUILTIN_PDIST)
11880 {
11881 arg2 = args[2];
11882 STRIP_NOPS (arg2);
11883 }
11884 else
11885 arg2 = integer_zero_node;
11886
11887 if (TREE_CODE (arg0) == VECTOR_CST
11888 && TREE_CODE (arg1) == VECTOR_CST
11889 && TREE_CODE (arg2) == INTEGER_CST)
11890 {
11891 bool overflow = false;
11892 widest_int result = wi::to_widest (arg2);
11893 widest_int tmp;
11894 unsigned i;
11895
11896 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
11897 {
11898 tree e0 = VECTOR_CST_ELT (arg0, i);
11899 tree e1 = VECTOR_CST_ELT (arg1, i);
11900
11901 bool neg1_ovf, neg2_ovf, add1_ovf, add2_ovf;
11902
11903 tmp = wi::neg (wi::to_widest (e1), &neg1_ovf);
11904 tmp = wi::add (wi::to_widest (e0), tmp, SIGNED, &add1_ovf);
11905 if (wi::neg_p (tmp))
11906 tmp = wi::neg (tmp, &neg2_ovf);
11907 else
11908 neg2_ovf = false;
11909 result = wi::add (result, tmp, SIGNED, &add2_ovf);
11910 overflow |= neg1_ovf | neg2_ovf | add1_ovf | add2_ovf;
11911 }
11912
11913 gcc_assert (!overflow);
11914
11915 return wide_int_to_tree (rtype, result);
11916 }
11917
11918 default:
11919 break;
11920 }
11921
11922 return NULL_TREE;
11923 }
11924
11925 /* ??? This duplicates information provided to the compiler by the
11926 ??? scheduler description. Some day, teach genautomata to output
11927 ??? the latencies and then CSE will just use that. */
11928
11929 static bool
sparc_rtx_costs(rtx x,machine_mode mode,int outer_code,int opno ATTRIBUTE_UNUSED,int * total,bool speed ATTRIBUTE_UNUSED)11930 sparc_rtx_costs (rtx x, machine_mode mode, int outer_code,
11931 int opno ATTRIBUTE_UNUSED,
11932 int *total, bool speed ATTRIBUTE_UNUSED)
11933 {
11934 int code = GET_CODE (x);
11935 bool float_mode_p = FLOAT_MODE_P (mode);
11936
11937 switch (code)
11938 {
11939 case CONST_INT:
11940 if (SMALL_INT (x))
11941 *total = 0;
11942 else
11943 *total = 2;
11944 return true;
11945
11946 case CONST_WIDE_INT:
11947 *total = 0;
11948 if (!SPARC_SIMM13_P (CONST_WIDE_INT_ELT (x, 0)))
11949 *total += 2;
11950 if (!SPARC_SIMM13_P (CONST_WIDE_INT_ELT (x, 1)))
11951 *total += 2;
11952 return true;
11953
11954 case HIGH:
11955 *total = 2;
11956 return true;
11957
11958 case CONST:
11959 case LABEL_REF:
11960 case SYMBOL_REF:
11961 *total = 4;
11962 return true;
11963
11964 case CONST_DOUBLE:
11965 *total = 8;
11966 return true;
11967
11968 case MEM:
11969 /* If outer-code was a sign or zero extension, a cost
11970 of COSTS_N_INSNS (1) was already added in. This is
11971 why we are subtracting it back out. */
11972 if (outer_code == ZERO_EXTEND)
11973 {
11974 *total = sparc_costs->int_zload - COSTS_N_INSNS (1);
11975 }
11976 else if (outer_code == SIGN_EXTEND)
11977 {
11978 *total = sparc_costs->int_sload - COSTS_N_INSNS (1);
11979 }
11980 else if (float_mode_p)
11981 {
11982 *total = sparc_costs->float_load;
11983 }
11984 else
11985 {
11986 *total = sparc_costs->int_load;
11987 }
11988
11989 return true;
11990
11991 case PLUS:
11992 case MINUS:
11993 if (float_mode_p)
11994 *total = sparc_costs->float_plusminus;
11995 else
11996 *total = COSTS_N_INSNS (1);
11997 return false;
11998
11999 case FMA:
12000 {
12001 rtx sub;
12002
12003 gcc_assert (float_mode_p);
12004 *total = sparc_costs->float_mul;
12005
12006 sub = XEXP (x, 0);
12007 if (GET_CODE (sub) == NEG)
12008 sub = XEXP (sub, 0);
12009 *total += rtx_cost (sub, mode, FMA, 0, speed);
12010
12011 sub = XEXP (x, 2);
12012 if (GET_CODE (sub) == NEG)
12013 sub = XEXP (sub, 0);
12014 *total += rtx_cost (sub, mode, FMA, 2, speed);
12015 return true;
12016 }
12017
12018 case MULT:
12019 if (float_mode_p)
12020 *total = sparc_costs->float_mul;
12021 else if (TARGET_ARCH32 && !TARGET_HARD_MUL)
12022 *total = COSTS_N_INSNS (25);
12023 else
12024 {
12025 int bit_cost;
12026
12027 bit_cost = 0;
12028 if (sparc_costs->int_mul_bit_factor)
12029 {
12030 int nbits;
12031
12032 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
12033 {
12034 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
12035 for (nbits = 0; value != 0; value &= value - 1)
12036 nbits++;
12037 }
12038 else
12039 nbits = 7;
12040
12041 if (nbits < 3)
12042 nbits = 3;
12043 bit_cost = (nbits - 3) / sparc_costs->int_mul_bit_factor;
12044 bit_cost = COSTS_N_INSNS (bit_cost);
12045 }
12046
12047 if (mode == DImode || !TARGET_HARD_MUL)
12048 *total = sparc_costs->int_mulX + bit_cost;
12049 else
12050 *total = sparc_costs->int_mul + bit_cost;
12051 }
12052 return false;
12053
12054 case ASHIFT:
12055 case ASHIFTRT:
12056 case LSHIFTRT:
12057 *total = COSTS_N_INSNS (1) + sparc_costs->shift_penalty;
12058 return false;
12059
12060 case DIV:
12061 case UDIV:
12062 case MOD:
12063 case UMOD:
12064 if (float_mode_p)
12065 {
12066 if (mode == DFmode)
12067 *total = sparc_costs->float_div_df;
12068 else
12069 *total = sparc_costs->float_div_sf;
12070 }
12071 else
12072 {
12073 if (mode == DImode)
12074 *total = sparc_costs->int_divX;
12075 else
12076 *total = sparc_costs->int_div;
12077 }
12078 return false;
12079
12080 case NEG:
12081 if (! float_mode_p)
12082 {
12083 *total = COSTS_N_INSNS (1);
12084 return false;
12085 }
12086 /* FALLTHRU */
12087
12088 case ABS:
12089 case FLOAT:
12090 case UNSIGNED_FLOAT:
12091 case FIX:
12092 case UNSIGNED_FIX:
12093 case FLOAT_EXTEND:
12094 case FLOAT_TRUNCATE:
12095 *total = sparc_costs->float_move;
12096 return false;
12097
12098 case SQRT:
12099 if (mode == DFmode)
12100 *total = sparc_costs->float_sqrt_df;
12101 else
12102 *total = sparc_costs->float_sqrt_sf;
12103 return false;
12104
12105 case COMPARE:
12106 if (float_mode_p)
12107 *total = sparc_costs->float_cmp;
12108 else
12109 *total = COSTS_N_INSNS (1);
12110 return false;
12111
12112 case IF_THEN_ELSE:
12113 if (float_mode_p)
12114 *total = sparc_costs->float_cmove;
12115 else
12116 *total = sparc_costs->int_cmove;
12117 return false;
12118
12119 case IOR:
12120 /* Handle the NAND vector patterns. */
12121 if (sparc_vector_mode_supported_p (mode)
12122 && GET_CODE (XEXP (x, 0)) == NOT
12123 && GET_CODE (XEXP (x, 1)) == NOT)
12124 {
12125 *total = COSTS_N_INSNS (1);
12126 return true;
12127 }
12128 else
12129 return false;
12130
12131 default:
12132 return false;
12133 }
12134 }
12135
12136 /* Return true if CLASS is either GENERAL_REGS or I64_REGS. */
12137
12138 static inline bool
general_or_i64_p(reg_class_t rclass)12139 general_or_i64_p (reg_class_t rclass)
12140 {
12141 return (rclass == GENERAL_REGS || rclass == I64_REGS);
12142 }
12143
12144 /* Implement TARGET_REGISTER_MOVE_COST. */
12145
12146 static int
sparc_register_move_cost(machine_mode mode ATTRIBUTE_UNUSED,reg_class_t from,reg_class_t to)12147 sparc_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
12148 reg_class_t from, reg_class_t to)
12149 {
12150 bool need_memory = false;
12151
12152 /* This helps postreload CSE to eliminate redundant comparisons. */
12153 if (from == NO_REGS || to == NO_REGS)
12154 return 100;
12155
12156 if (from == FPCC_REGS || to == FPCC_REGS)
12157 need_memory = true;
12158 else if ((FP_REG_CLASS_P (from) && general_or_i64_p (to))
12159 || (general_or_i64_p (from) && FP_REG_CLASS_P (to)))
12160 {
12161 if (TARGET_VIS3)
12162 {
12163 int size = GET_MODE_SIZE (mode);
12164 if (size == 8 || size == 4)
12165 {
12166 if (! TARGET_ARCH32 || size == 4)
12167 return 4;
12168 else
12169 return 6;
12170 }
12171 }
12172 need_memory = true;
12173 }
12174
12175 if (need_memory)
12176 {
12177 if (sparc_cpu == PROCESSOR_ULTRASPARC
12178 || sparc_cpu == PROCESSOR_ULTRASPARC3
12179 || sparc_cpu == PROCESSOR_NIAGARA
12180 || sparc_cpu == PROCESSOR_NIAGARA2
12181 || sparc_cpu == PROCESSOR_NIAGARA3
12182 || sparc_cpu == PROCESSOR_NIAGARA4
12183 || sparc_cpu == PROCESSOR_NIAGARA7
12184 || sparc_cpu == PROCESSOR_M8)
12185 return 12;
12186
12187 return 6;
12188 }
12189
12190 return 2;
12191 }
12192
12193 /* Emit the sequence of insns SEQ while preserving the registers REG and REG2.
12194 This is achieved by means of a manual dynamic stack space allocation in
12195 the current frame. We make the assumption that SEQ doesn't contain any
12196 function calls, with the possible exception of calls to the GOT helper. */
12197
12198 static void
emit_and_preserve(rtx seq,rtx reg,rtx reg2)12199 emit_and_preserve (rtx seq, rtx reg, rtx reg2)
12200 {
12201 /* We must preserve the lowest 16 words for the register save area. */
12202 HOST_WIDE_INT offset = 16*UNITS_PER_WORD;
12203 /* We really need only 2 words of fresh stack space. */
12204 HOST_WIDE_INT size = SPARC_STACK_ALIGN (offset + 2*UNITS_PER_WORD);
12205
12206 rtx slot
12207 = gen_rtx_MEM (word_mode, plus_constant (Pmode, stack_pointer_rtx,
12208 SPARC_STACK_BIAS + offset));
12209
12210 emit_insn (gen_stack_pointer_inc (GEN_INT (-size)));
12211 emit_insn (gen_rtx_SET (slot, reg));
12212 if (reg2)
12213 emit_insn (gen_rtx_SET (adjust_address (slot, word_mode, UNITS_PER_WORD),
12214 reg2));
12215 emit_insn (seq);
12216 if (reg2)
12217 emit_insn (gen_rtx_SET (reg2,
12218 adjust_address (slot, word_mode, UNITS_PER_WORD)));
12219 emit_insn (gen_rtx_SET (reg, slot));
12220 emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
12221 }
12222
12223 /* Output the assembler code for a thunk function. THUNK_DECL is the
12224 declaration for the thunk function itself, FUNCTION is the decl for
12225 the target function. DELTA is an immediate constant offset to be
12226 added to THIS. If VCALL_OFFSET is nonzero, the word at address
12227 (*THIS + VCALL_OFFSET) should be additionally added to THIS. */
12228
12229 static void
sparc_output_mi_thunk(FILE * file,tree thunk_fndecl ATTRIBUTE_UNUSED,HOST_WIDE_INT delta,HOST_WIDE_INT vcall_offset,tree function)12230 sparc_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
12231 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
12232 tree function)
12233 {
12234 rtx this_rtx, funexp;
12235 rtx_insn *insn;
12236 unsigned int int_arg_first;
12237
12238 reload_completed = 1;
12239 epilogue_completed = 1;
12240
12241 emit_note (NOTE_INSN_PROLOGUE_END);
12242
12243 if (TARGET_FLAT)
12244 {
12245 sparc_leaf_function_p = 1;
12246
12247 int_arg_first = SPARC_OUTGOING_INT_ARG_FIRST;
12248 }
12249 else if (flag_delayed_branch)
12250 {
12251 /* We will emit a regular sibcall below, so we need to instruct
12252 output_sibcall that we are in a leaf function. */
12253 sparc_leaf_function_p = crtl->uses_only_leaf_regs = 1;
12254
12255 /* This will cause final.c to invoke leaf_renumber_regs so we
12256 must behave as if we were in a not-yet-leafified function. */
12257 int_arg_first = SPARC_INCOMING_INT_ARG_FIRST;
12258 }
12259 else
12260 {
12261 /* We will emit the sibcall manually below, so we will need to
12262 manually spill non-leaf registers. */
12263 sparc_leaf_function_p = crtl->uses_only_leaf_regs = 0;
12264
12265 /* We really are in a leaf function. */
12266 int_arg_first = SPARC_OUTGOING_INT_ARG_FIRST;
12267 }
12268
12269 /* Find the "this" pointer. Normally in %o0, but in ARCH64 if the function
12270 returns a structure, the structure return pointer is there instead. */
12271 if (TARGET_ARCH64
12272 && aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
12273 this_rtx = gen_rtx_REG (Pmode, int_arg_first + 1);
12274 else
12275 this_rtx = gen_rtx_REG (Pmode, int_arg_first);
12276
12277 /* Add DELTA. When possible use a plain add, otherwise load it into
12278 a register first. */
12279 if (delta)
12280 {
12281 rtx delta_rtx = GEN_INT (delta);
12282
12283 if (! SPARC_SIMM13_P (delta))
12284 {
12285 rtx scratch = gen_rtx_REG (Pmode, 1);
12286 emit_move_insn (scratch, delta_rtx);
12287 delta_rtx = scratch;
12288 }
12289
12290 /* THIS_RTX += DELTA. */
12291 emit_insn (gen_add2_insn (this_rtx, delta_rtx));
12292 }
12293
12294 /* Add the word at address (*THIS_RTX + VCALL_OFFSET). */
12295 if (vcall_offset)
12296 {
12297 rtx vcall_offset_rtx = GEN_INT (vcall_offset);
12298 rtx scratch = gen_rtx_REG (Pmode, 1);
12299
12300 gcc_assert (vcall_offset < 0);
12301
12302 /* SCRATCH = *THIS_RTX. */
12303 emit_move_insn (scratch, gen_rtx_MEM (Pmode, this_rtx));
12304
12305 /* Prepare for adding VCALL_OFFSET. The difficulty is that we
12306 may not have any available scratch register at this point. */
12307 if (SPARC_SIMM13_P (vcall_offset))
12308 ;
12309 /* This is the case if ARCH64 (unless -ffixed-g5 is passed). */
12310 else if (! fixed_regs[5]
12311 /* The below sequence is made up of at least 2 insns,
12312 while the default method may need only one. */
12313 && vcall_offset < -8192)
12314 {
12315 rtx scratch2 = gen_rtx_REG (Pmode, 5);
12316 emit_move_insn (scratch2, vcall_offset_rtx);
12317 vcall_offset_rtx = scratch2;
12318 }
12319 else
12320 {
12321 rtx increment = GEN_INT (-4096);
12322
12323 /* VCALL_OFFSET is a negative number whose typical range can be
12324 estimated as -32768..0 in 32-bit mode. In almost all cases
12325 it is therefore cheaper to emit multiple add insns than
12326 spilling and loading the constant into a register (at least
12327 6 insns). */
12328 while (! SPARC_SIMM13_P (vcall_offset))
12329 {
12330 emit_insn (gen_add2_insn (scratch, increment));
12331 vcall_offset += 4096;
12332 }
12333 vcall_offset_rtx = GEN_INT (vcall_offset); /* cannot be 0 */
12334 }
12335
12336 /* SCRATCH = *(*THIS_RTX + VCALL_OFFSET). */
12337 emit_move_insn (scratch, gen_rtx_MEM (Pmode,
12338 gen_rtx_PLUS (Pmode,
12339 scratch,
12340 vcall_offset_rtx)));
12341
12342 /* THIS_RTX += *(*THIS_RTX + VCALL_OFFSET). */
12343 emit_insn (gen_add2_insn (this_rtx, scratch));
12344 }
12345
12346 /* Generate a tail call to the target function. */
12347 if (! TREE_USED (function))
12348 {
12349 assemble_external (function);
12350 TREE_USED (function) = 1;
12351 }
12352 funexp = XEXP (DECL_RTL (function), 0);
12353
12354 if (flag_delayed_branch)
12355 {
12356 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
12357 insn = emit_call_insn (gen_sibcall (funexp));
12358 SIBLING_CALL_P (insn) = 1;
12359 }
12360 else
12361 {
12362 /* The hoops we have to jump through in order to generate a sibcall
12363 without using delay slots... */
12364 rtx spill_reg, seq, scratch = gen_rtx_REG (Pmode, 1);
12365
12366 if (flag_pic)
12367 {
12368 spill_reg = gen_rtx_REG (word_mode, 15); /* %o7 */
12369 start_sequence ();
12370 load_got_register (); /* clobbers %o7 */
12371 if (!TARGET_VXWORKS_RTP)
12372 pic_offset_table_rtx = got_register_rtx;
12373 scratch = sparc_legitimize_pic_address (funexp, scratch);
12374 seq = get_insns ();
12375 end_sequence ();
12376 emit_and_preserve (seq, spill_reg, pic_offset_table_rtx);
12377 }
12378 else if (TARGET_ARCH32)
12379 {
12380 emit_insn (gen_rtx_SET (scratch,
12381 gen_rtx_HIGH (SImode, funexp)));
12382 emit_insn (gen_rtx_SET (scratch,
12383 gen_rtx_LO_SUM (SImode, scratch, funexp)));
12384 }
12385 else /* TARGET_ARCH64 */
12386 {
12387 switch (sparc_cmodel)
12388 {
12389 case CM_MEDLOW:
12390 case CM_MEDMID:
12391 /* The destination can serve as a temporary. */
12392 sparc_emit_set_symbolic_const64 (scratch, funexp, scratch);
12393 break;
12394
12395 case CM_MEDANY:
12396 case CM_EMBMEDANY:
12397 /* The destination cannot serve as a temporary. */
12398 spill_reg = gen_rtx_REG (DImode, 15); /* %o7 */
12399 start_sequence ();
12400 sparc_emit_set_symbolic_const64 (scratch, funexp, spill_reg);
12401 seq = get_insns ();
12402 end_sequence ();
12403 emit_and_preserve (seq, spill_reg, 0);
12404 break;
12405
12406 default:
12407 gcc_unreachable ();
12408 }
12409 }
12410
12411 emit_jump_insn (gen_indirect_jump (scratch));
12412 }
12413
12414 emit_barrier ();
12415
12416 /* Run just enough of rest_of_compilation to get the insns emitted.
12417 There's not really enough bulk here to make other passes such as
12418 instruction scheduling worth while. Note that use_thunk calls
12419 assemble_start_function and assemble_end_function. */
12420 insn = get_insns ();
12421 shorten_branches (insn);
12422 final_start_function (insn, file, 1);
12423 final (insn, file, 1);
12424 final_end_function ();
12425
12426 reload_completed = 0;
12427 epilogue_completed = 0;
12428 }
12429
12430 /* Return true if sparc_output_mi_thunk would be able to output the
12431 assembler code for the thunk function specified by the arguments
12432 it is passed, and false otherwise. */
12433 static bool
sparc_can_output_mi_thunk(const_tree thunk_fndecl ATTRIBUTE_UNUSED,HOST_WIDE_INT delta ATTRIBUTE_UNUSED,HOST_WIDE_INT vcall_offset,const_tree function ATTRIBUTE_UNUSED)12434 sparc_can_output_mi_thunk (const_tree thunk_fndecl ATTRIBUTE_UNUSED,
12435 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
12436 HOST_WIDE_INT vcall_offset,
12437 const_tree function ATTRIBUTE_UNUSED)
12438 {
12439 /* Bound the loop used in the default method above. */
12440 return (vcall_offset >= -32768 || ! fixed_regs[5]);
12441 }
12442
12443 /* How to allocate a 'struct machine_function'. */
12444
12445 static struct machine_function *
sparc_init_machine_status(void)12446 sparc_init_machine_status (void)
12447 {
12448 return ggc_cleared_alloc<machine_function> ();
12449 }
12450
12451 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
12452 We need to emit DTP-relative relocations. */
12453
12454 static void
sparc_output_dwarf_dtprel(FILE * file,int size,rtx x)12455 sparc_output_dwarf_dtprel (FILE *file, int size, rtx x)
12456 {
12457 switch (size)
12458 {
12459 case 4:
12460 fputs ("\t.word\t%r_tls_dtpoff32(", file);
12461 break;
12462 case 8:
12463 fputs ("\t.xword\t%r_tls_dtpoff64(", file);
12464 break;
12465 default:
12466 gcc_unreachable ();
12467 }
12468 output_addr_const (file, x);
12469 fputs (")", file);
12470 }
12471
12472 /* Do whatever processing is required at the end of a file. */
12473
12474 static void
sparc_file_end(void)12475 sparc_file_end (void)
12476 {
12477 /* If we need to emit the special GOT helper function, do so now. */
12478 if (got_helper_needed)
12479 {
12480 const char *name = XSTR (got_helper_rtx, 0);
12481 #ifdef DWARF2_UNWIND_INFO
12482 bool do_cfi;
12483 #endif
12484
12485 if (USE_HIDDEN_LINKONCE)
12486 {
12487 tree decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
12488 get_identifier (name),
12489 build_function_type_list (void_type_node,
12490 NULL_TREE));
12491 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
12492 NULL_TREE, void_type_node);
12493 TREE_PUBLIC (decl) = 1;
12494 TREE_STATIC (decl) = 1;
12495 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
12496 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
12497 DECL_VISIBILITY_SPECIFIED (decl) = 1;
12498 resolve_unique_section (decl, 0, flag_function_sections);
12499 allocate_struct_function (decl, true);
12500 cfun->is_thunk = 1;
12501 current_function_decl = decl;
12502 init_varasm_status ();
12503 assemble_start_function (decl, name);
12504 }
12505 else
12506 {
12507 const int align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT);
12508 switch_to_section (text_section);
12509 if (align > 0)
12510 ASM_OUTPUT_ALIGN (asm_out_file, align);
12511 ASM_OUTPUT_LABEL (asm_out_file, name);
12512 }
12513
12514 #ifdef DWARF2_UNWIND_INFO
12515 do_cfi = dwarf2out_do_cfi_asm ();
12516 if (do_cfi)
12517 output_asm_insn (".cfi_startproc", NULL);
12518 #endif
12519 if (flag_delayed_branch)
12520 {
12521 output_asm_insn ("jmp\t%%o7+8", NULL);
12522 output_asm_insn (" add\t%%o7, %0, %0", &got_register_rtx);
12523 }
12524 else
12525 {
12526 output_asm_insn ("add\t%%o7, %0, %0", &got_register_rtx);
12527 output_asm_insn ("jmp\t%%o7+8", NULL);
12528 output_asm_insn (" nop", NULL);
12529 }
12530 #ifdef DWARF2_UNWIND_INFO
12531 if (do_cfi)
12532 output_asm_insn (".cfi_endproc", NULL);
12533 #endif
12534 }
12535
12536 if (NEED_INDICATE_EXEC_STACK)
12537 file_end_indicate_exec_stack ();
12538
12539 #ifdef TARGET_SOLARIS
12540 solaris_file_end ();
12541 #endif
12542 }
12543
12544 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
12545 /* Implement TARGET_MANGLE_TYPE. */
12546
12547 static const char *
sparc_mangle_type(const_tree type)12548 sparc_mangle_type (const_tree type)
12549 {
12550 if (TARGET_ARCH32
12551 && TYPE_MAIN_VARIANT (type) == long_double_type_node
12552 && TARGET_LONG_DOUBLE_128)
12553 return "g";
12554
12555 /* For all other types, use normal C++ mangling. */
12556 return NULL;
12557 }
12558 #endif
12559
12560 /* Expand a membar instruction for various use cases. Both the LOAD_STORE
12561 and BEFORE_AFTER arguments of the form X_Y. They are two-bit masks where
12562 bit 0 indicates that X is true, and bit 1 indicates Y is true. */
12563
12564 void
sparc_emit_membar_for_model(enum memmodel model,int load_store,int before_after)12565 sparc_emit_membar_for_model (enum memmodel model,
12566 int load_store, int before_after)
12567 {
12568 /* Bits for the MEMBAR mmask field. */
12569 const int LoadLoad = 1;
12570 const int StoreLoad = 2;
12571 const int LoadStore = 4;
12572 const int StoreStore = 8;
12573
12574 int mm = 0, implied = 0;
12575
12576 switch (sparc_memory_model)
12577 {
12578 case SMM_SC:
12579 /* Sequential Consistency. All memory transactions are immediately
12580 visible in sequential execution order. No barriers needed. */
12581 implied = LoadLoad | StoreLoad | LoadStore | StoreStore;
12582 break;
12583
12584 case SMM_TSO:
12585 /* Total Store Ordering: all memory transactions with store semantics
12586 are followed by an implied StoreStore. */
12587 implied |= StoreStore;
12588
12589 /* If we're not looking for a raw barrer (before+after), then atomic
12590 operations get the benefit of being both load and store. */
12591 if (load_store == 3 && before_after == 1)
12592 implied |= StoreLoad;
12593 /* FALLTHRU */
12594
12595 case SMM_PSO:
12596 /* Partial Store Ordering: all memory transactions with load semantics
12597 are followed by an implied LoadLoad | LoadStore. */
12598 implied |= LoadLoad | LoadStore;
12599
12600 /* If we're not looking for a raw barrer (before+after), then atomic
12601 operations get the benefit of being both load and store. */
12602 if (load_store == 3 && before_after == 2)
12603 implied |= StoreLoad | StoreStore;
12604 /* FALLTHRU */
12605
12606 case SMM_RMO:
12607 /* Relaxed Memory Ordering: no implicit bits. */
12608 break;
12609
12610 default:
12611 gcc_unreachable ();
12612 }
12613
12614 if (before_after & 1)
12615 {
12616 if (is_mm_release (model) || is_mm_acq_rel (model)
12617 || is_mm_seq_cst (model))
12618 {
12619 if (load_store & 1)
12620 mm |= LoadLoad | StoreLoad;
12621 if (load_store & 2)
12622 mm |= LoadStore | StoreStore;
12623 }
12624 }
12625 if (before_after & 2)
12626 {
12627 if (is_mm_acquire (model) || is_mm_acq_rel (model)
12628 || is_mm_seq_cst (model))
12629 {
12630 if (load_store & 1)
12631 mm |= LoadLoad | LoadStore;
12632 if (load_store & 2)
12633 mm |= StoreLoad | StoreStore;
12634 }
12635 }
12636
12637 /* Remove the bits implied by the system memory model. */
12638 mm &= ~implied;
12639
12640 /* For raw barriers (before+after), always emit a barrier.
12641 This will become a compile-time barrier if needed. */
12642 if (mm || before_after == 3)
12643 emit_insn (gen_membar (GEN_INT (mm)));
12644 }
12645
12646 /* Expand code to perform a 8 or 16-bit compare and swap by doing 32-bit
12647 compare and swap on the word containing the byte or half-word. */
12648
12649 static void
sparc_expand_compare_and_swap_12(rtx bool_result,rtx result,rtx mem,rtx oldval,rtx newval)12650 sparc_expand_compare_and_swap_12 (rtx bool_result, rtx result, rtx mem,
12651 rtx oldval, rtx newval)
12652 {
12653 rtx addr1 = force_reg (Pmode, XEXP (mem, 0));
12654 rtx addr = gen_reg_rtx (Pmode);
12655 rtx off = gen_reg_rtx (SImode);
12656 rtx oldv = gen_reg_rtx (SImode);
12657 rtx newv = gen_reg_rtx (SImode);
12658 rtx oldvalue = gen_reg_rtx (SImode);
12659 rtx newvalue = gen_reg_rtx (SImode);
12660 rtx res = gen_reg_rtx (SImode);
12661 rtx resv = gen_reg_rtx (SImode);
12662 rtx memsi, val, mask, cc;
12663
12664 emit_insn (gen_rtx_SET (addr, gen_rtx_AND (Pmode, addr1, GEN_INT (-4))));
12665
12666 if (Pmode != SImode)
12667 addr1 = gen_lowpart (SImode, addr1);
12668 emit_insn (gen_rtx_SET (off, gen_rtx_AND (SImode, addr1, GEN_INT (3))));
12669
12670 memsi = gen_rtx_MEM (SImode, addr);
12671 set_mem_alias_set (memsi, ALIAS_SET_MEMORY_BARRIER);
12672 MEM_VOLATILE_P (memsi) = MEM_VOLATILE_P (mem);
12673
12674 val = copy_to_reg (memsi);
12675
12676 emit_insn (gen_rtx_SET (off,
12677 gen_rtx_XOR (SImode, off,
12678 GEN_INT (GET_MODE (mem) == QImode
12679 ? 3 : 2))));
12680
12681 emit_insn (gen_rtx_SET (off, gen_rtx_ASHIFT (SImode, off, GEN_INT (3))));
12682
12683 if (GET_MODE (mem) == QImode)
12684 mask = force_reg (SImode, GEN_INT (0xff));
12685 else
12686 mask = force_reg (SImode, GEN_INT (0xffff));
12687
12688 emit_insn (gen_rtx_SET (mask, gen_rtx_ASHIFT (SImode, mask, off)));
12689
12690 emit_insn (gen_rtx_SET (val,
12691 gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask),
12692 val)));
12693
12694 oldval = gen_lowpart (SImode, oldval);
12695 emit_insn (gen_rtx_SET (oldv, gen_rtx_ASHIFT (SImode, oldval, off)));
12696
12697 newval = gen_lowpart_common (SImode, newval);
12698 emit_insn (gen_rtx_SET (newv, gen_rtx_ASHIFT (SImode, newval, off)));
12699
12700 emit_insn (gen_rtx_SET (oldv, gen_rtx_AND (SImode, oldv, mask)));
12701
12702 emit_insn (gen_rtx_SET (newv, gen_rtx_AND (SImode, newv, mask)));
12703
12704 rtx_code_label *end_label = gen_label_rtx ();
12705 rtx_code_label *loop_label = gen_label_rtx ();
12706 emit_label (loop_label);
12707
12708 emit_insn (gen_rtx_SET (oldvalue, gen_rtx_IOR (SImode, oldv, val)));
12709
12710 emit_insn (gen_rtx_SET (newvalue, gen_rtx_IOR (SImode, newv, val)));
12711
12712 emit_move_insn (bool_result, const1_rtx);
12713
12714 emit_insn (gen_atomic_compare_and_swapsi_1 (res, memsi, oldvalue, newvalue));
12715
12716 emit_cmp_and_jump_insns (res, oldvalue, EQ, NULL, SImode, 0, end_label);
12717
12718 emit_insn (gen_rtx_SET (resv,
12719 gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask),
12720 res)));
12721
12722 emit_move_insn (bool_result, const0_rtx);
12723
12724 cc = gen_compare_reg_1 (NE, resv, val);
12725 emit_insn (gen_rtx_SET (val, resv));
12726
12727 /* Use cbranchcc4 to separate the compare and branch! */
12728 emit_jump_insn (gen_cbranchcc4 (gen_rtx_NE (VOIDmode, cc, const0_rtx),
12729 cc, const0_rtx, loop_label));
12730
12731 emit_label (end_label);
12732
12733 emit_insn (gen_rtx_SET (res, gen_rtx_AND (SImode, res, mask)));
12734
12735 emit_insn (gen_rtx_SET (res, gen_rtx_LSHIFTRT (SImode, res, off)));
12736
12737 emit_move_insn (result, gen_lowpart (GET_MODE (result), res));
12738 }
12739
12740 /* Expand code to perform a compare-and-swap. */
12741
12742 void
sparc_expand_compare_and_swap(rtx operands[])12743 sparc_expand_compare_and_swap (rtx operands[])
12744 {
12745 rtx bval, retval, mem, oldval, newval;
12746 machine_mode mode;
12747 enum memmodel model;
12748
12749 bval = operands[0];
12750 retval = operands[1];
12751 mem = operands[2];
12752 oldval = operands[3];
12753 newval = operands[4];
12754 model = (enum memmodel) INTVAL (operands[6]);
12755 mode = GET_MODE (mem);
12756
12757 sparc_emit_membar_for_model (model, 3, 1);
12758
12759 if (reg_overlap_mentioned_p (retval, oldval))
12760 oldval = copy_to_reg (oldval);
12761
12762 if (mode == QImode || mode == HImode)
12763 sparc_expand_compare_and_swap_12 (bval, retval, mem, oldval, newval);
12764 else
12765 {
12766 rtx (*gen) (rtx, rtx, rtx, rtx);
12767 rtx x;
12768
12769 if (mode == SImode)
12770 gen = gen_atomic_compare_and_swapsi_1;
12771 else
12772 gen = gen_atomic_compare_and_swapdi_1;
12773 emit_insn (gen (retval, mem, oldval, newval));
12774
12775 x = emit_store_flag (bval, EQ, retval, oldval, mode, 1, 1);
12776 if (x != bval)
12777 convert_move (bval, x, 1);
12778 }
12779
12780 sparc_emit_membar_for_model (model, 3, 2);
12781 }
12782
12783 void
sparc_expand_vec_perm_bmask(machine_mode vmode,rtx sel)12784 sparc_expand_vec_perm_bmask (machine_mode vmode, rtx sel)
12785 {
12786 rtx t_1, t_2, t_3;
12787
12788 sel = gen_lowpart (DImode, sel);
12789 switch (vmode)
12790 {
12791 case E_V2SImode:
12792 /* inp = xxxxxxxAxxxxxxxB */
12793 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
12794 NULL_RTX, 1, OPTAB_DIRECT);
12795 /* t_1 = ....xxxxxxxAxxx. */
12796 sel = expand_simple_binop (SImode, AND, gen_lowpart (SImode, sel),
12797 GEN_INT (3), NULL_RTX, 1, OPTAB_DIRECT);
12798 t_1 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_1),
12799 GEN_INT (0x30000), NULL_RTX, 1, OPTAB_DIRECT);
12800 /* sel = .......B */
12801 /* t_1 = ...A.... */
12802 sel = expand_simple_binop (SImode, IOR, sel, t_1, sel, 1, OPTAB_DIRECT);
12803 /* sel = ...A...B */
12804 sel = expand_mult (SImode, sel, GEN_INT (0x4444), sel, 1);
12805 /* sel = AAAABBBB * 4 */
12806 t_1 = force_reg (SImode, GEN_INT (0x01230123));
12807 /* sel = { A*4, A*4+1, A*4+2, ... } */
12808 break;
12809
12810 case E_V4HImode:
12811 /* inp = xxxAxxxBxxxCxxxD */
12812 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (8),
12813 NULL_RTX, 1, OPTAB_DIRECT);
12814 t_2 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
12815 NULL_RTX, 1, OPTAB_DIRECT);
12816 t_3 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (24),
12817 NULL_RTX, 1, OPTAB_DIRECT);
12818 /* t_1 = ..xxxAxxxBxxxCxx */
12819 /* t_2 = ....xxxAxxxBxxxC */
12820 /* t_3 = ......xxxAxxxBxx */
12821 sel = expand_simple_binop (SImode, AND, gen_lowpart (SImode, sel),
12822 GEN_INT (0x07),
12823 NULL_RTX, 1, OPTAB_DIRECT);
12824 t_1 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_1),
12825 GEN_INT (0x0700),
12826 NULL_RTX, 1, OPTAB_DIRECT);
12827 t_2 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_2),
12828 GEN_INT (0x070000),
12829 NULL_RTX, 1, OPTAB_DIRECT);
12830 t_3 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_3),
12831 GEN_INT (0x07000000),
12832 NULL_RTX, 1, OPTAB_DIRECT);
12833 /* sel = .......D */
12834 /* t_1 = .....C.. */
12835 /* t_2 = ...B.... */
12836 /* t_3 = .A...... */
12837 sel = expand_simple_binop (SImode, IOR, sel, t_1, sel, 1, OPTAB_DIRECT);
12838 t_2 = expand_simple_binop (SImode, IOR, t_2, t_3, t_2, 1, OPTAB_DIRECT);
12839 sel = expand_simple_binop (SImode, IOR, sel, t_2, sel, 1, OPTAB_DIRECT);
12840 /* sel = .A.B.C.D */
12841 sel = expand_mult (SImode, sel, GEN_INT (0x22), sel, 1);
12842 /* sel = AABBCCDD * 2 */
12843 t_1 = force_reg (SImode, GEN_INT (0x01010101));
12844 /* sel = { A*2, A*2+1, B*2, B*2+1, ... } */
12845 break;
12846
12847 case E_V8QImode:
12848 /* input = xAxBxCxDxExFxGxH */
12849 sel = expand_simple_binop (DImode, AND, sel,
12850 GEN_INT ((HOST_WIDE_INT)0x0f0f0f0f << 32
12851 | 0x0f0f0f0f),
12852 NULL_RTX, 1, OPTAB_DIRECT);
12853 /* sel = .A.B.C.D.E.F.G.H */
12854 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (4),
12855 NULL_RTX, 1, OPTAB_DIRECT);
12856 /* t_1 = ..A.B.C.D.E.F.G. */
12857 sel = expand_simple_binop (DImode, IOR, sel, t_1,
12858 NULL_RTX, 1, OPTAB_DIRECT);
12859 /* sel = .AABBCCDDEEFFGGH */
12860 sel = expand_simple_binop (DImode, AND, sel,
12861 GEN_INT ((HOST_WIDE_INT)0xff00ff << 32
12862 | 0xff00ff),
12863 NULL_RTX, 1, OPTAB_DIRECT);
12864 /* sel = ..AB..CD..EF..GH */
12865 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (8),
12866 NULL_RTX, 1, OPTAB_DIRECT);
12867 /* t_1 = ....AB..CD..EF.. */
12868 sel = expand_simple_binop (DImode, IOR, sel, t_1,
12869 NULL_RTX, 1, OPTAB_DIRECT);
12870 /* sel = ..ABABCDCDEFEFGH */
12871 sel = expand_simple_binop (DImode, AND, sel,
12872 GEN_INT ((HOST_WIDE_INT)0xffff << 32 | 0xffff),
12873 NULL_RTX, 1, OPTAB_DIRECT);
12874 /* sel = ....ABCD....EFGH */
12875 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
12876 NULL_RTX, 1, OPTAB_DIRECT);
12877 /* t_1 = ........ABCD.... */
12878 sel = gen_lowpart (SImode, sel);
12879 t_1 = gen_lowpart (SImode, t_1);
12880 break;
12881
12882 default:
12883 gcc_unreachable ();
12884 }
12885
12886 /* Always perform the final addition/merge within the bmask insn. */
12887 emit_insn (gen_bmasksi_vis (gen_reg_rtx (SImode), sel, t_1));
12888 }
12889
12890 /* Implement TARGET_VEC_PERM_CONST. */
12891
12892 static bool
sparc_vectorize_vec_perm_const(machine_mode vmode,rtx target,rtx op0,rtx op1,const vec_perm_indices & sel)12893 sparc_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0,
12894 rtx op1, const vec_perm_indices &sel)
12895 {
12896 if (!TARGET_VIS2)
12897 return false;
12898
12899 /* All permutes are supported. */
12900 if (!target)
12901 return true;
12902
12903 /* Force target-independent code to convert constant permutations on other
12904 modes down to V8QI. Rely on this to avoid the complexity of the byte
12905 order of the permutation. */
12906 if (vmode != V8QImode)
12907 return false;
12908
12909 unsigned int i, mask;
12910 for (i = mask = 0; i < 8; ++i)
12911 mask |= (sel[i] & 0xf) << (28 - i*4);
12912 rtx mask_rtx = force_reg (SImode, gen_int_mode (mask, SImode));
12913
12914 emit_insn (gen_bmasksi_vis (gen_reg_rtx (SImode), mask_rtx, const0_rtx));
12915 emit_insn (gen_bshufflev8qi_vis (target, op0, op1));
12916 return true;
12917 }
12918
12919 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
12920
12921 static bool
sparc_frame_pointer_required(void)12922 sparc_frame_pointer_required (void)
12923 {
12924 /* If the stack pointer is dynamically modified in the function, it cannot
12925 serve as the frame pointer. */
12926 if (cfun->calls_alloca)
12927 return true;
12928
12929 /* If the function receives nonlocal gotos, it needs to save the frame
12930 pointer in the nonlocal_goto_save_area object. */
12931 if (cfun->has_nonlocal_label)
12932 return true;
12933
12934 /* In flat mode, that's it. */
12935 if (TARGET_FLAT)
12936 return false;
12937
12938 /* Otherwise, the frame pointer is required if the function isn't leaf, but
12939 we cannot use sparc_leaf_function_p since it hasn't been computed yet. */
12940 return !(optimize > 0 && crtl->is_leaf && only_leaf_regs_used ());
12941 }
12942
12943 /* The way this is structured, we can't eliminate SFP in favor of SP
12944 if the frame pointer is required: we want to use the SFP->HFP elimination
12945 in that case. But the test in update_eliminables doesn't know we are
12946 assuming below that we only do the former elimination. */
12947
12948 static bool
sparc_can_eliminate(const int from ATTRIBUTE_UNUSED,const int to)12949 sparc_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
12950 {
12951 return to == HARD_FRAME_POINTER_REGNUM || !sparc_frame_pointer_required ();
12952 }
12953
12954 /* Return the hard frame pointer directly to bypass the stack bias. */
12955
12956 static rtx
sparc_builtin_setjmp_frame_value(void)12957 sparc_builtin_setjmp_frame_value (void)
12958 {
12959 return hard_frame_pointer_rtx;
12960 }
12961
12962 /* If !TARGET_FPU, then make the fp registers and fp cc regs fixed so that
12963 they won't be allocated. */
12964
12965 static void
sparc_conditional_register_usage(void)12966 sparc_conditional_register_usage (void)
12967 {
12968 if (PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
12969 {
12970 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
12971 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
12972 }
12973 /* If the user has passed -f{fixed,call-{used,saved}}-g5 */
12974 /* then honor it. */
12975 if (TARGET_ARCH32 && fixed_regs[5])
12976 fixed_regs[5] = 1;
12977 else if (TARGET_ARCH64 && fixed_regs[5] == 2)
12978 fixed_regs[5] = 0;
12979 if (! TARGET_V9)
12980 {
12981 int regno;
12982 for (regno = SPARC_FIRST_V9_FP_REG;
12983 regno <= SPARC_LAST_V9_FP_REG;
12984 regno++)
12985 fixed_regs[regno] = 1;
12986 /* %fcc0 is used by v8 and v9. */
12987 for (regno = SPARC_FIRST_V9_FCC_REG + 1;
12988 regno <= SPARC_LAST_V9_FCC_REG;
12989 regno++)
12990 fixed_regs[regno] = 1;
12991 }
12992 if (! TARGET_FPU)
12993 {
12994 int regno;
12995 for (regno = 32; regno < SPARC_LAST_V9_FCC_REG; regno++)
12996 fixed_regs[regno] = 1;
12997 }
12998 /* If the user has passed -f{fixed,call-{used,saved}}-g2 */
12999 /* then honor it. Likewise with g3 and g4. */
13000 if (fixed_regs[2] == 2)
13001 fixed_regs[2] = ! TARGET_APP_REGS;
13002 if (fixed_regs[3] == 2)
13003 fixed_regs[3] = ! TARGET_APP_REGS;
13004 if (TARGET_ARCH32 && fixed_regs[4] == 2)
13005 fixed_regs[4] = ! TARGET_APP_REGS;
13006 else if (TARGET_CM_EMBMEDANY)
13007 fixed_regs[4] = 1;
13008 else if (fixed_regs[4] == 2)
13009 fixed_regs[4] = 0;
13010 if (TARGET_FLAT)
13011 {
13012 int regno;
13013 /* Disable leaf functions. */
13014 memset (sparc_leaf_regs, 0, FIRST_PSEUDO_REGISTER);
13015 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
13016 leaf_reg_remap [regno] = regno;
13017 }
13018 if (TARGET_VIS)
13019 global_regs[SPARC_GSR_REG] = 1;
13020 }
13021
13022 /* Implement TARGET_USE_PSEUDO_PIC_REG. */
13023
13024 static bool
sparc_use_pseudo_pic_reg(void)13025 sparc_use_pseudo_pic_reg (void)
13026 {
13027 return !TARGET_VXWORKS_RTP && flag_pic;
13028 }
13029
13030 /* Implement TARGET_INIT_PIC_REG. */
13031
13032 static void
sparc_init_pic_reg(void)13033 sparc_init_pic_reg (void)
13034 {
13035 edge entry_edge;
13036 rtx_insn *seq;
13037
13038 /* In PIC mode, we need to always initialize the PIC register if optimization
13039 is enabled, because we are called from IRA and LRA may later force things
13040 to the constant pool for optimization purposes. */
13041 if (!flag_pic || (!crtl->uses_pic_offset_table && !optimize))
13042 return;
13043
13044 start_sequence ();
13045 load_got_register ();
13046 if (!TARGET_VXWORKS_RTP)
13047 emit_move_insn (pic_offset_table_rtx, got_register_rtx);
13048 seq = get_insns ();
13049 end_sequence ();
13050
13051 entry_edge = single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun));
13052 insert_insn_on_edge (seq, entry_edge);
13053 commit_one_edge_insertion (entry_edge);
13054 }
13055
13056 /* Implement TARGET_PREFERRED_RELOAD_CLASS:
13057
13058 - We can't load constants into FP registers.
13059 - We can't load FP constants into integer registers when soft-float,
13060 because there is no soft-float pattern with a r/F constraint.
13061 - We can't load FP constants into integer registers for TFmode unless
13062 it is 0.0L, because there is no movtf pattern with a r/F constraint.
13063 - Try and reload integer constants (symbolic or otherwise) back into
13064 registers directly, rather than having them dumped to memory. */
13065
13066 static reg_class_t
sparc_preferred_reload_class(rtx x,reg_class_t rclass)13067 sparc_preferred_reload_class (rtx x, reg_class_t rclass)
13068 {
13069 machine_mode mode = GET_MODE (x);
13070 if (CONSTANT_P (x))
13071 {
13072 if (FP_REG_CLASS_P (rclass)
13073 || rclass == GENERAL_OR_FP_REGS
13074 || rclass == GENERAL_OR_EXTRA_FP_REGS
13075 || (GET_MODE_CLASS (mode) == MODE_FLOAT && ! TARGET_FPU)
13076 || (mode == TFmode && ! const_zero_operand (x, mode)))
13077 return NO_REGS;
13078
13079 if (GET_MODE_CLASS (mode) == MODE_INT)
13080 return GENERAL_REGS;
13081
13082 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
13083 {
13084 if (! FP_REG_CLASS_P (rclass)
13085 || !(const_zero_operand (x, mode)
13086 || const_all_ones_operand (x, mode)))
13087 return NO_REGS;
13088 }
13089 }
13090
13091 if (TARGET_VIS3
13092 && ! TARGET_ARCH64
13093 && (rclass == EXTRA_FP_REGS
13094 || rclass == GENERAL_OR_EXTRA_FP_REGS))
13095 {
13096 int regno = true_regnum (x);
13097
13098 if (SPARC_INT_REG_P (regno))
13099 return (rclass == EXTRA_FP_REGS
13100 ? FP_REGS : GENERAL_OR_FP_REGS);
13101 }
13102
13103 return rclass;
13104 }
13105
13106 /* Return true if we use LRA instead of reload pass. */
13107
13108 static bool
sparc_lra_p(void)13109 sparc_lra_p (void)
13110 {
13111 return TARGET_LRA;
13112 }
13113
13114 /* Output a wide multiply instruction in V8+ mode. INSN is the instruction,
13115 OPERANDS are its operands and OPCODE is the mnemonic to be used. */
13116
13117 const char *
output_v8plus_mult(rtx_insn * insn,rtx * operands,const char * opcode)13118 output_v8plus_mult (rtx_insn *insn, rtx *operands, const char *opcode)
13119 {
13120 char mulstr[32];
13121
13122 gcc_assert (! TARGET_ARCH64);
13123
13124 if (sparc_check_64 (operands[1], insn) <= 0)
13125 output_asm_insn ("srl\t%L1, 0, %L1", operands);
13126 if (which_alternative == 1)
13127 output_asm_insn ("sllx\t%H1, 32, %H1", operands);
13128 if (GET_CODE (operands[2]) == CONST_INT)
13129 {
13130 if (which_alternative == 1)
13131 {
13132 output_asm_insn ("or\t%L1, %H1, %H1", operands);
13133 sprintf (mulstr, "%s\t%%H1, %%2, %%L0", opcode);
13134 output_asm_insn (mulstr, operands);
13135 return "srlx\t%L0, 32, %H0";
13136 }
13137 else
13138 {
13139 output_asm_insn ("sllx\t%H1, 32, %3", operands);
13140 output_asm_insn ("or\t%L1, %3, %3", operands);
13141 sprintf (mulstr, "%s\t%%3, %%2, %%3", opcode);
13142 output_asm_insn (mulstr, operands);
13143 output_asm_insn ("srlx\t%3, 32, %H0", operands);
13144 return "mov\t%3, %L0";
13145 }
13146 }
13147 else if (rtx_equal_p (operands[1], operands[2]))
13148 {
13149 if (which_alternative == 1)
13150 {
13151 output_asm_insn ("or\t%L1, %H1, %H1", operands);
13152 sprintf (mulstr, "%s\t%%H1, %%H1, %%L0", opcode);
13153 output_asm_insn (mulstr, operands);
13154 return "srlx\t%L0, 32, %H0";
13155 }
13156 else
13157 {
13158 output_asm_insn ("sllx\t%H1, 32, %3", operands);
13159 output_asm_insn ("or\t%L1, %3, %3", operands);
13160 sprintf (mulstr, "%s\t%%3, %%3, %%3", opcode);
13161 output_asm_insn (mulstr, operands);
13162 output_asm_insn ("srlx\t%3, 32, %H0", operands);
13163 return "mov\t%3, %L0";
13164 }
13165 }
13166 if (sparc_check_64 (operands[2], insn) <= 0)
13167 output_asm_insn ("srl\t%L2, 0, %L2", operands);
13168 if (which_alternative == 1)
13169 {
13170 output_asm_insn ("or\t%L1, %H1, %H1", operands);
13171 output_asm_insn ("sllx\t%H2, 32, %L1", operands);
13172 output_asm_insn ("or\t%L2, %L1, %L1", operands);
13173 sprintf (mulstr, "%s\t%%H1, %%L1, %%L0", opcode);
13174 output_asm_insn (mulstr, operands);
13175 return "srlx\t%L0, 32, %H0";
13176 }
13177 else
13178 {
13179 output_asm_insn ("sllx\t%H1, 32, %3", operands);
13180 output_asm_insn ("sllx\t%H2, 32, %4", operands);
13181 output_asm_insn ("or\t%L1, %3, %3", operands);
13182 output_asm_insn ("or\t%L2, %4, %4", operands);
13183 sprintf (mulstr, "%s\t%%3, %%4, %%3", opcode);
13184 output_asm_insn (mulstr, operands);
13185 output_asm_insn ("srlx\t%3, 32, %H0", operands);
13186 return "mov\t%3, %L0";
13187 }
13188 }
13189
13190 /* Subroutine of sparc_expand_vector_init. Emit code to initialize
13191 all fields of TARGET to ELT by means of VIS2 BSHUFFLE insn. MODE
13192 and INNER_MODE are the modes describing TARGET. */
13193
13194 static void
vector_init_bshuffle(rtx target,rtx elt,machine_mode mode,machine_mode inner_mode)13195 vector_init_bshuffle (rtx target, rtx elt, machine_mode mode,
13196 machine_mode inner_mode)
13197 {
13198 rtx t1, final_insn, sel;
13199 int bmask;
13200
13201 t1 = gen_reg_rtx (mode);
13202
13203 elt = convert_modes (SImode, inner_mode, elt, true);
13204 emit_move_insn (gen_lowpart(SImode, t1), elt);
13205
13206 switch (mode)
13207 {
13208 case E_V2SImode:
13209 final_insn = gen_bshufflev2si_vis (target, t1, t1);
13210 bmask = 0x45674567;
13211 break;
13212 case E_V4HImode:
13213 final_insn = gen_bshufflev4hi_vis (target, t1, t1);
13214 bmask = 0x67676767;
13215 break;
13216 case E_V8QImode:
13217 final_insn = gen_bshufflev8qi_vis (target, t1, t1);
13218 bmask = 0x77777777;
13219 break;
13220 default:
13221 gcc_unreachable ();
13222 }
13223
13224 sel = force_reg (SImode, GEN_INT (bmask));
13225 emit_insn (gen_bmasksi_vis (gen_reg_rtx (SImode), sel, const0_rtx));
13226 emit_insn (final_insn);
13227 }
13228
13229 /* Subroutine of sparc_expand_vector_init. Emit code to initialize
13230 all fields of TARGET to ELT in V8QI by means of VIS FPMERGE insn. */
13231
13232 static void
vector_init_fpmerge(rtx target,rtx elt)13233 vector_init_fpmerge (rtx target, rtx elt)
13234 {
13235 rtx t1, t2, t2_low, t3, t3_low;
13236
13237 t1 = gen_reg_rtx (V4QImode);
13238 elt = convert_modes (SImode, QImode, elt, true);
13239 emit_move_insn (gen_lowpart (SImode, t1), elt);
13240
13241 t2 = gen_reg_rtx (V8QImode);
13242 t2_low = gen_lowpart (V4QImode, t2);
13243 emit_insn (gen_fpmerge_vis (t2, t1, t1));
13244
13245 t3 = gen_reg_rtx (V8QImode);
13246 t3_low = gen_lowpart (V4QImode, t3);
13247 emit_insn (gen_fpmerge_vis (t3, t2_low, t2_low));
13248
13249 emit_insn (gen_fpmerge_vis (target, t3_low, t3_low));
13250 }
13251
13252 /* Subroutine of sparc_expand_vector_init. Emit code to initialize
13253 all fields of TARGET to ELT in V4HI by means of VIS FALIGNDATA insn. */
13254
13255 static void
vector_init_faligndata(rtx target,rtx elt)13256 vector_init_faligndata (rtx target, rtx elt)
13257 {
13258 rtx t1 = gen_reg_rtx (V4HImode);
13259 int i;
13260
13261 elt = convert_modes (SImode, HImode, elt, true);
13262 emit_move_insn (gen_lowpart (SImode, t1), elt);
13263
13264 emit_insn (gen_alignaddrsi_vis (gen_reg_rtx (SImode),
13265 force_reg (SImode, GEN_INT (6)),
13266 const0_rtx));
13267
13268 for (i = 0; i < 4; i++)
13269 emit_insn (gen_faligndatav4hi_vis (target, t1, target));
13270 }
13271
13272 /* Emit code to initialize TARGET to values for individual fields VALS. */
13273
13274 void
sparc_expand_vector_init(rtx target,rtx vals)13275 sparc_expand_vector_init (rtx target, rtx vals)
13276 {
13277 const machine_mode mode = GET_MODE (target);
13278 const machine_mode inner_mode = GET_MODE_INNER (mode);
13279 const int n_elts = GET_MODE_NUNITS (mode);
13280 int i, n_var = 0;
13281 bool all_same = true;
13282 rtx mem;
13283
13284 for (i = 0; i < n_elts; i++)
13285 {
13286 rtx x = XVECEXP (vals, 0, i);
13287 if (!(CONST_SCALAR_INT_P (x) || CONST_DOUBLE_P (x) || CONST_FIXED_P (x)))
13288 n_var++;
13289
13290 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
13291 all_same = false;
13292 }
13293
13294 if (n_var == 0)
13295 {
13296 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
13297 return;
13298 }
13299
13300 if (GET_MODE_SIZE (inner_mode) == GET_MODE_SIZE (mode))
13301 {
13302 if (GET_MODE_SIZE (inner_mode) == 4)
13303 {
13304 emit_move_insn (gen_lowpart (SImode, target),
13305 gen_lowpart (SImode, XVECEXP (vals, 0, 0)));
13306 return;
13307 }
13308 else if (GET_MODE_SIZE (inner_mode) == 8)
13309 {
13310 emit_move_insn (gen_lowpart (DImode, target),
13311 gen_lowpart (DImode, XVECEXP (vals, 0, 0)));
13312 return;
13313 }
13314 }
13315 else if (GET_MODE_SIZE (inner_mode) == GET_MODE_SIZE (word_mode)
13316 && GET_MODE_SIZE (mode) == 2 * GET_MODE_SIZE (word_mode))
13317 {
13318 emit_move_insn (gen_highpart (word_mode, target),
13319 gen_lowpart (word_mode, XVECEXP (vals, 0, 0)));
13320 emit_move_insn (gen_lowpart (word_mode, target),
13321 gen_lowpart (word_mode, XVECEXP (vals, 0, 1)));
13322 return;
13323 }
13324
13325 if (all_same && GET_MODE_SIZE (mode) == 8)
13326 {
13327 if (TARGET_VIS2)
13328 {
13329 vector_init_bshuffle (target, XVECEXP (vals, 0, 0), mode, inner_mode);
13330 return;
13331 }
13332 if (mode == V8QImode)
13333 {
13334 vector_init_fpmerge (target, XVECEXP (vals, 0, 0));
13335 return;
13336 }
13337 if (mode == V4HImode)
13338 {
13339 vector_init_faligndata (target, XVECEXP (vals, 0, 0));
13340 return;
13341 }
13342 }
13343
13344 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
13345 for (i = 0; i < n_elts; i++)
13346 emit_move_insn (adjust_address_nv (mem, inner_mode,
13347 i * GET_MODE_SIZE (inner_mode)),
13348 XVECEXP (vals, 0, i));
13349 emit_move_insn (target, mem);
13350 }
13351
13352 /* Implement TARGET_SECONDARY_RELOAD. */
13353
13354 static reg_class_t
sparc_secondary_reload(bool in_p,rtx x,reg_class_t rclass_i,machine_mode mode,secondary_reload_info * sri)13355 sparc_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
13356 machine_mode mode, secondary_reload_info *sri)
13357 {
13358 enum reg_class rclass = (enum reg_class) rclass_i;
13359
13360 sri->icode = CODE_FOR_nothing;
13361 sri->extra_cost = 0;
13362
13363 /* We need a temporary when loading/storing a HImode/QImode value
13364 between memory and the FPU registers. This can happen when combine puts
13365 a paradoxical subreg in a float/fix conversion insn. */
13366 if (FP_REG_CLASS_P (rclass)
13367 && (mode == HImode || mode == QImode)
13368 && (GET_CODE (x) == MEM
13369 || ((GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
13370 && true_regnum (x) == -1)))
13371 return GENERAL_REGS;
13372
13373 /* On 32-bit we need a temporary when loading/storing a DFmode value
13374 between unaligned memory and the upper FPU registers. */
13375 if (TARGET_ARCH32
13376 && rclass == EXTRA_FP_REGS
13377 && mode == DFmode
13378 && GET_CODE (x) == MEM
13379 && ! mem_min_alignment (x, 8))
13380 return FP_REGS;
13381
13382 if (((TARGET_CM_MEDANY
13383 && symbolic_operand (x, mode))
13384 || (TARGET_CM_EMBMEDANY
13385 && text_segment_operand (x, mode)))
13386 && ! flag_pic)
13387 {
13388 if (in_p)
13389 sri->icode = direct_optab_handler (reload_in_optab, mode);
13390 else
13391 sri->icode = direct_optab_handler (reload_out_optab, mode);
13392 return NO_REGS;
13393 }
13394
13395 if (TARGET_VIS3 && TARGET_ARCH32)
13396 {
13397 int regno = true_regnum (x);
13398
13399 /* When using VIS3 fp<-->int register moves, on 32-bit we have
13400 to move 8-byte values in 4-byte pieces. This only works via
13401 FP_REGS, and not via EXTRA_FP_REGS. Therefore if we try to
13402 move between EXTRA_FP_REGS and GENERAL_REGS, we will need
13403 an FP_REGS intermediate move. */
13404 if ((rclass == EXTRA_FP_REGS && SPARC_INT_REG_P (regno))
13405 || ((general_or_i64_p (rclass)
13406 || rclass == GENERAL_OR_FP_REGS)
13407 && SPARC_FP_REG_P (regno)))
13408 {
13409 sri->extra_cost = 2;
13410 return FP_REGS;
13411 }
13412 }
13413
13414 return NO_REGS;
13415 }
13416
13417 /* Implement TARGET_SECONDARY_MEMORY_NEEDED.
13418
13419 On SPARC when not VIS3 it is not possible to directly move data
13420 between GENERAL_REGS and FP_REGS. */
13421
13422 static bool
sparc_secondary_memory_needed(machine_mode mode,reg_class_t class1,reg_class_t class2)13423 sparc_secondary_memory_needed (machine_mode mode, reg_class_t class1,
13424 reg_class_t class2)
13425 {
13426 return ((FP_REG_CLASS_P (class1) != FP_REG_CLASS_P (class2))
13427 && (! TARGET_VIS3
13428 || GET_MODE_SIZE (mode) > 8
13429 || GET_MODE_SIZE (mode) < 4));
13430 }
13431
13432 /* Implement TARGET_SECONDARY_MEMORY_NEEDED_MODE.
13433
13434 get_secondary_mem widens its argument to BITS_PER_WORD which loses on v9
13435 because the movsi and movsf patterns don't handle r/f moves.
13436 For v8 we copy the default definition. */
13437
13438 static machine_mode
sparc_secondary_memory_needed_mode(machine_mode mode)13439 sparc_secondary_memory_needed_mode (machine_mode mode)
13440 {
13441 if (TARGET_ARCH64)
13442 {
13443 if (GET_MODE_BITSIZE (mode) < 32)
13444 return mode_for_size (32, GET_MODE_CLASS (mode), 0).require ();
13445 return mode;
13446 }
13447 else
13448 {
13449 if (GET_MODE_BITSIZE (mode) < BITS_PER_WORD)
13450 return mode_for_size (BITS_PER_WORD,
13451 GET_MODE_CLASS (mode), 0).require ();
13452 return mode;
13453 }
13454 }
13455
13456 /* Emit code to conditionally move either OPERANDS[2] or OPERANDS[3] into
13457 OPERANDS[0] in MODE. OPERANDS[1] is the operator of the condition. */
13458
13459 bool
sparc_expand_conditional_move(machine_mode mode,rtx * operands)13460 sparc_expand_conditional_move (machine_mode mode, rtx *operands)
13461 {
13462 enum rtx_code rc = GET_CODE (operands[1]);
13463 machine_mode cmp_mode;
13464 rtx cc_reg, dst, cmp;
13465
13466 cmp = operands[1];
13467 if (GET_MODE (XEXP (cmp, 0)) == DImode && !TARGET_ARCH64)
13468 return false;
13469
13470 if (GET_MODE (XEXP (cmp, 0)) == TFmode && !TARGET_HARD_QUAD)
13471 cmp = sparc_emit_float_lib_cmp (XEXP (cmp, 0), XEXP (cmp, 1), rc);
13472
13473 cmp_mode = GET_MODE (XEXP (cmp, 0));
13474 rc = GET_CODE (cmp);
13475
13476 dst = operands[0];
13477 if (! rtx_equal_p (operands[2], dst)
13478 && ! rtx_equal_p (operands[3], dst))
13479 {
13480 if (reg_overlap_mentioned_p (dst, cmp))
13481 dst = gen_reg_rtx (mode);
13482
13483 emit_move_insn (dst, operands[3]);
13484 }
13485 else if (operands[2] == dst)
13486 {
13487 operands[2] = operands[3];
13488
13489 if (GET_MODE_CLASS (cmp_mode) == MODE_FLOAT)
13490 rc = reverse_condition_maybe_unordered (rc);
13491 else
13492 rc = reverse_condition (rc);
13493 }
13494
13495 if (XEXP (cmp, 1) == const0_rtx
13496 && GET_CODE (XEXP (cmp, 0)) == REG
13497 && cmp_mode == DImode
13498 && v9_regcmp_p (rc))
13499 cc_reg = XEXP (cmp, 0);
13500 else
13501 cc_reg = gen_compare_reg_1 (rc, XEXP (cmp, 0), XEXP (cmp, 1));
13502
13503 cmp = gen_rtx_fmt_ee (rc, GET_MODE (cc_reg), cc_reg, const0_rtx);
13504
13505 emit_insn (gen_rtx_SET (dst,
13506 gen_rtx_IF_THEN_ELSE (mode, cmp, operands[2], dst)));
13507
13508 if (dst != operands[0])
13509 emit_move_insn (operands[0], dst);
13510
13511 return true;
13512 }
13513
13514 /* Emit code to conditionally move a combination of OPERANDS[1] and OPERANDS[2]
13515 into OPERANDS[0] in MODE, depending on the outcome of the comparison of
13516 OPERANDS[4] and OPERANDS[5]. OPERANDS[3] is the operator of the condition.
13517 FCODE is the machine code to be used for OPERANDS[3] and CCODE the machine
13518 code to be used for the condition mask. */
13519
13520 void
sparc_expand_vcond(machine_mode mode,rtx * operands,int ccode,int fcode)13521 sparc_expand_vcond (machine_mode mode, rtx *operands, int ccode, int fcode)
13522 {
13523 rtx mask, cop0, cop1, fcmp, cmask, bshuf, gsr;
13524 enum rtx_code code = GET_CODE (operands[3]);
13525
13526 mask = gen_reg_rtx (Pmode);
13527 cop0 = operands[4];
13528 cop1 = operands[5];
13529 if (code == LT || code == GE)
13530 {
13531 rtx t;
13532
13533 code = swap_condition (code);
13534 t = cop0; cop0 = cop1; cop1 = t;
13535 }
13536
13537 gsr = gen_rtx_REG (DImode, SPARC_GSR_REG);
13538
13539 fcmp = gen_rtx_UNSPEC (Pmode,
13540 gen_rtvec (1, gen_rtx_fmt_ee (code, mode, cop0, cop1)),
13541 fcode);
13542
13543 cmask = gen_rtx_UNSPEC (DImode,
13544 gen_rtvec (2, mask, gsr),
13545 ccode);
13546
13547 bshuf = gen_rtx_UNSPEC (mode,
13548 gen_rtvec (3, operands[1], operands[2], gsr),
13549 UNSPEC_BSHUFFLE);
13550
13551 emit_insn (gen_rtx_SET (mask, fcmp));
13552 emit_insn (gen_rtx_SET (gsr, cmask));
13553
13554 emit_insn (gen_rtx_SET (operands[0], bshuf));
13555 }
13556
13557 /* On sparc, any mode which naturally allocates into the float
13558 registers should return 4 here. */
13559
13560 unsigned int
sparc_regmode_natural_size(machine_mode mode)13561 sparc_regmode_natural_size (machine_mode mode)
13562 {
13563 int size = UNITS_PER_WORD;
13564
13565 if (TARGET_ARCH64)
13566 {
13567 enum mode_class mclass = GET_MODE_CLASS (mode);
13568
13569 if (mclass == MODE_FLOAT || mclass == MODE_VECTOR_INT)
13570 size = 4;
13571 }
13572
13573 return size;
13574 }
13575
13576 /* Implement TARGET_HARD_REGNO_NREGS.
13577
13578 On SPARC, ordinary registers hold 32 bits worth; this means both
13579 integer and floating point registers. On v9, integer regs hold 64
13580 bits worth; floating point regs hold 32 bits worth (this includes the
13581 new fp regs as even the odd ones are included in the hard register
13582 count). */
13583
13584 static unsigned int
sparc_hard_regno_nregs(unsigned int regno,machine_mode mode)13585 sparc_hard_regno_nregs (unsigned int regno, machine_mode mode)
13586 {
13587 if (regno == SPARC_GSR_REG)
13588 return 1;
13589 if (TARGET_ARCH64)
13590 {
13591 if (SPARC_INT_REG_P (regno) || regno == FRAME_POINTER_REGNUM)
13592 return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
13593 return CEIL (GET_MODE_SIZE (mode), 4);
13594 }
13595 return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
13596 }
13597
13598 /* Implement TARGET_HARD_REGNO_MODE_OK.
13599
13600 ??? Because of the funny way we pass parameters we should allow certain
13601 ??? types of float/complex values to be in integer registers during
13602 ??? RTL generation. This only matters on arch32. */
13603
13604 static bool
sparc_hard_regno_mode_ok(unsigned int regno,machine_mode mode)13605 sparc_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
13606 {
13607 return (hard_regno_mode_classes[regno] & sparc_mode_class[mode]) != 0;
13608 }
13609
13610 /* Implement TARGET_MODES_TIEABLE_P.
13611
13612 For V9 we have to deal with the fact that only the lower 32 floating
13613 point registers are 32-bit addressable. */
13614
13615 static bool
sparc_modes_tieable_p(machine_mode mode1,machine_mode mode2)13616 sparc_modes_tieable_p (machine_mode mode1, machine_mode mode2)
13617 {
13618 enum mode_class mclass1, mclass2;
13619 unsigned short size1, size2;
13620
13621 if (mode1 == mode2)
13622 return true;
13623
13624 mclass1 = GET_MODE_CLASS (mode1);
13625 mclass2 = GET_MODE_CLASS (mode2);
13626 if (mclass1 != mclass2)
13627 return false;
13628
13629 if (! TARGET_V9)
13630 return true;
13631
13632 /* Classes are the same and we are V9 so we have to deal with upper
13633 vs. lower floating point registers. If one of the modes is a
13634 4-byte mode, and the other is not, we have to mark them as not
13635 tieable because only the lower 32 floating point register are
13636 addressable 32-bits at a time.
13637
13638 We can't just test explicitly for SFmode, otherwise we won't
13639 cover the vector mode cases properly. */
13640
13641 if (mclass1 != MODE_FLOAT && mclass1 != MODE_VECTOR_INT)
13642 return true;
13643
13644 size1 = GET_MODE_SIZE (mode1);
13645 size2 = GET_MODE_SIZE (mode2);
13646 if ((size1 > 4 && size2 == 4)
13647 || (size2 > 4 && size1 == 4))
13648 return false;
13649
13650 return true;
13651 }
13652
13653 /* Implement TARGET_CSTORE_MODE. */
13654
13655 static scalar_int_mode
sparc_cstore_mode(enum insn_code icode ATTRIBUTE_UNUSED)13656 sparc_cstore_mode (enum insn_code icode ATTRIBUTE_UNUSED)
13657 {
13658 return (TARGET_ARCH64 ? DImode : SImode);
13659 }
13660
13661 /* Return the compound expression made of T1 and T2. */
13662
13663 static inline tree
compound_expr(tree t1,tree t2)13664 compound_expr (tree t1, tree t2)
13665 {
13666 return build2 (COMPOUND_EXPR, void_type_node, t1, t2);
13667 }
13668
13669 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */
13670
13671 static void
sparc_atomic_assign_expand_fenv(tree * hold,tree * clear,tree * update)13672 sparc_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
13673 {
13674 if (!TARGET_FPU)
13675 return;
13676
13677 const unsigned HOST_WIDE_INT accrued_exception_mask = 0x1f << 5;
13678 const unsigned HOST_WIDE_INT trap_enable_mask = 0x1f << 23;
13679
13680 /* We generate the equivalent of feholdexcept (&fenv_var):
13681
13682 unsigned int fenv_var;
13683 __builtin_store_fsr (&fenv_var);
13684
13685 unsigned int tmp1_var;
13686 tmp1_var = fenv_var & ~(accrued_exception_mask | trap_enable_mask);
13687
13688 __builtin_load_fsr (&tmp1_var); */
13689
13690 tree fenv_var = create_tmp_var_raw (unsigned_type_node);
13691 TREE_ADDRESSABLE (fenv_var) = 1;
13692 tree fenv_addr = build_fold_addr_expr (fenv_var);
13693 tree stfsr = sparc_builtins[SPARC_BUILTIN_STFSR];
13694 tree hold_stfsr
13695 = build4 (TARGET_EXPR, unsigned_type_node, fenv_var,
13696 build_call_expr (stfsr, 1, fenv_addr), NULL_TREE, NULL_TREE);
13697
13698 tree tmp1_var = create_tmp_var_raw (unsigned_type_node);
13699 TREE_ADDRESSABLE (tmp1_var) = 1;
13700 tree masked_fenv_var
13701 = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_var,
13702 build_int_cst (unsigned_type_node,
13703 ~(accrued_exception_mask | trap_enable_mask)));
13704 tree hold_mask
13705 = build4 (TARGET_EXPR, unsigned_type_node, tmp1_var, masked_fenv_var,
13706 NULL_TREE, NULL_TREE);
13707
13708 tree tmp1_addr = build_fold_addr_expr (tmp1_var);
13709 tree ldfsr = sparc_builtins[SPARC_BUILTIN_LDFSR];
13710 tree hold_ldfsr = build_call_expr (ldfsr, 1, tmp1_addr);
13711
13712 *hold = compound_expr (compound_expr (hold_stfsr, hold_mask), hold_ldfsr);
13713
13714 /* We reload the value of tmp1_var to clear the exceptions:
13715
13716 __builtin_load_fsr (&tmp1_var); */
13717
13718 *clear = build_call_expr (ldfsr, 1, tmp1_addr);
13719
13720 /* We generate the equivalent of feupdateenv (&fenv_var):
13721
13722 unsigned int tmp2_var;
13723 __builtin_store_fsr (&tmp2_var);
13724
13725 __builtin_load_fsr (&fenv_var);
13726
13727 if (SPARC_LOW_FE_EXCEPT_VALUES)
13728 tmp2_var >>= 5;
13729 __atomic_feraiseexcept ((int) tmp2_var); */
13730
13731 tree tmp2_var = create_tmp_var_raw (unsigned_type_node);
13732 TREE_ADDRESSABLE (tmp2_var) = 1;
13733 tree tmp2_addr = build_fold_addr_expr (tmp2_var);
13734 tree update_stfsr
13735 = build4 (TARGET_EXPR, unsigned_type_node, tmp2_var,
13736 build_call_expr (stfsr, 1, tmp2_addr), NULL_TREE, NULL_TREE);
13737
13738 tree update_ldfsr = build_call_expr (ldfsr, 1, fenv_addr);
13739
13740 tree atomic_feraiseexcept
13741 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
13742 tree update_call
13743 = build_call_expr (atomic_feraiseexcept, 1,
13744 fold_convert (integer_type_node, tmp2_var));
13745
13746 if (SPARC_LOW_FE_EXCEPT_VALUES)
13747 {
13748 tree shifted_tmp2_var
13749 = build2 (RSHIFT_EXPR, unsigned_type_node, tmp2_var,
13750 build_int_cst (unsigned_type_node, 5));
13751 tree update_shift
13752 = build2 (MODIFY_EXPR, void_type_node, tmp2_var, shifted_tmp2_var);
13753 update_call = compound_expr (update_shift, update_call);
13754 }
13755
13756 *update
13757 = compound_expr (compound_expr (update_stfsr, update_ldfsr), update_call);
13758 }
13759
13760 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. Borrowed from the PA port.
13761
13762 SImode loads to floating-point registers are not zero-extended.
13763 The definition for LOAD_EXTEND_OP specifies that integer loads
13764 narrower than BITS_PER_WORD will be zero-extended. As a result,
13765 we inhibit changes from SImode unless they are to a mode that is
13766 identical in size.
13767
13768 Likewise for SFmode, since word-mode paradoxical subregs are
13769 problematic on big-endian architectures. */
13770
13771 static bool
sparc_can_change_mode_class(machine_mode from,machine_mode to,reg_class_t rclass)13772 sparc_can_change_mode_class (machine_mode from, machine_mode to,
13773 reg_class_t rclass)
13774 {
13775 if (TARGET_ARCH64
13776 && GET_MODE_SIZE (from) == 4
13777 && GET_MODE_SIZE (to) != 4)
13778 return !reg_classes_intersect_p (rclass, FP_REGS);
13779 return true;
13780 }
13781
13782 /* Implement TARGET_CONSTANT_ALIGNMENT. */
13783
13784 static HOST_WIDE_INT
sparc_constant_alignment(const_tree exp,HOST_WIDE_INT align)13785 sparc_constant_alignment (const_tree exp, HOST_WIDE_INT align)
13786 {
13787 if (TREE_CODE (exp) == STRING_CST)
13788 return MAX (align, FASTEST_ALIGNMENT);
13789 return align;
13790 }
13791
13792 #include "gt-sparc.h"
13793