1 /* Subroutines for insn-output.c for SPARC.
2 Copyright (C) 1987-2021 Free Software Foundation, Inc.
3 Contributed by Michael Tiemann (tiemann@cygnus.com)
4 64-bit SPARC-V9 support by Michael Tiemann, Jim Wilson, and Doug Evans,
5 at Cygnus Support.
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3, or (at your option)
12 any later version.
13
14 GCC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
22
23 #define IN_TARGET_CODE 1
24
25 #include "config.h"
26 #include "system.h"
27 #include "coretypes.h"
28 #include "backend.h"
29 #include "target.h"
30 #include "rtl.h"
31 #include "tree.h"
32 #include "memmodel.h"
33 #include "gimple.h"
34 #include "df.h"
35 #include "tm_p.h"
36 #include "stringpool.h"
37 #include "attribs.h"
38 #include "expmed.h"
39 #include "optabs.h"
40 #include "regs.h"
41 #include "emit-rtl.h"
42 #include "recog.h"
43 #include "diagnostic-core.h"
44 #include "alias.h"
45 #include "fold-const.h"
46 #include "stor-layout.h"
47 #include "calls.h"
48 #include "varasm.h"
49 #include "output.h"
50 #include "insn-attr.h"
51 #include "explow.h"
52 #include "expr.h"
53 #include "debug.h"
54 #include "cfgrtl.h"
55 #include "common/common-target.h"
56 #include "gimplify.h"
57 #include "langhooks.h"
58 #include "reload.h"
59 #include "tree-pass.h"
60 #include "context.h"
61 #include "builtins.h"
62 #include "tree-vector-builder.h"
63 #include "opts.h"
64
65 /* This file should be included last. */
66 #include "target-def.h"
67
68 /* Processor costs */
69
70 struct processor_costs {
71 /* Integer load */
72 const int int_load;
73
74 /* Integer signed load */
75 const int int_sload;
76
77 /* Integer zeroed load */
78 const int int_zload;
79
80 /* Float load */
81 const int float_load;
82
83 /* fmov, fneg, fabs */
84 const int float_move;
85
86 /* fadd, fsub */
87 const int float_plusminus;
88
89 /* fcmp */
90 const int float_cmp;
91
92 /* fmov, fmovr */
93 const int float_cmove;
94
95 /* fmul */
96 const int float_mul;
97
98 /* fdivs */
99 const int float_div_sf;
100
101 /* fdivd */
102 const int float_div_df;
103
104 /* fsqrts */
105 const int float_sqrt_sf;
106
107 /* fsqrtd */
108 const int float_sqrt_df;
109
110 /* umul/smul */
111 const int int_mul;
112
113 /* mulX */
114 const int int_mulX;
115
116 /* integer multiply cost for each bit set past the most
117 significant 3, so the formula for multiply cost becomes:
118
119 if (rs1 < 0)
120 highest_bit = highest_clear_bit(rs1);
121 else
122 highest_bit = highest_set_bit(rs1);
123 if (highest_bit < 3)
124 highest_bit = 3;
125 cost = int_mul{,X} + ((highest_bit - 3) / int_mul_bit_factor);
126
127 A value of zero indicates that the multiply costs is fixed,
128 and not variable. */
129 const int int_mul_bit_factor;
130
131 /* udiv/sdiv */
132 const int int_div;
133
134 /* divX */
135 const int int_divX;
136
137 /* movcc, movr */
138 const int int_cmove;
139
140 /* penalty for shifts, due to scheduling rules etc. */
141 const int shift_penalty;
142
143 /* cost of a (predictable) branch. */
144 const int branch_cost;
145 };
146
147 static const
148 struct processor_costs cypress_costs = {
149 COSTS_N_INSNS (2), /* int load */
150 COSTS_N_INSNS (2), /* int signed load */
151 COSTS_N_INSNS (2), /* int zeroed load */
152 COSTS_N_INSNS (2), /* float load */
153 COSTS_N_INSNS (5), /* fmov, fneg, fabs */
154 COSTS_N_INSNS (5), /* fadd, fsub */
155 COSTS_N_INSNS (1), /* fcmp */
156 COSTS_N_INSNS (1), /* fmov, fmovr */
157 COSTS_N_INSNS (7), /* fmul */
158 COSTS_N_INSNS (37), /* fdivs */
159 COSTS_N_INSNS (37), /* fdivd */
160 COSTS_N_INSNS (63), /* fsqrts */
161 COSTS_N_INSNS (63), /* fsqrtd */
162 COSTS_N_INSNS (1), /* imul */
163 COSTS_N_INSNS (1), /* imulX */
164 0, /* imul bit factor */
165 COSTS_N_INSNS (1), /* idiv */
166 COSTS_N_INSNS (1), /* idivX */
167 COSTS_N_INSNS (1), /* movcc/movr */
168 0, /* shift penalty */
169 3 /* branch cost */
170 };
171
172 static const
173 struct processor_costs supersparc_costs = {
174 COSTS_N_INSNS (1), /* int load */
175 COSTS_N_INSNS (1), /* int signed load */
176 COSTS_N_INSNS (1), /* int zeroed load */
177 COSTS_N_INSNS (0), /* float load */
178 COSTS_N_INSNS (3), /* fmov, fneg, fabs */
179 COSTS_N_INSNS (3), /* fadd, fsub */
180 COSTS_N_INSNS (3), /* fcmp */
181 COSTS_N_INSNS (1), /* fmov, fmovr */
182 COSTS_N_INSNS (3), /* fmul */
183 COSTS_N_INSNS (6), /* fdivs */
184 COSTS_N_INSNS (9), /* fdivd */
185 COSTS_N_INSNS (12), /* fsqrts */
186 COSTS_N_INSNS (12), /* fsqrtd */
187 COSTS_N_INSNS (4), /* imul */
188 COSTS_N_INSNS (4), /* imulX */
189 0, /* imul bit factor */
190 COSTS_N_INSNS (4), /* idiv */
191 COSTS_N_INSNS (4), /* idivX */
192 COSTS_N_INSNS (1), /* movcc/movr */
193 1, /* shift penalty */
194 3 /* branch cost */
195 };
196
197 static const
198 struct processor_costs hypersparc_costs = {
199 COSTS_N_INSNS (1), /* int load */
200 COSTS_N_INSNS (1), /* int signed load */
201 COSTS_N_INSNS (1), /* int zeroed load */
202 COSTS_N_INSNS (1), /* float load */
203 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
204 COSTS_N_INSNS (1), /* fadd, fsub */
205 COSTS_N_INSNS (1), /* fcmp */
206 COSTS_N_INSNS (1), /* fmov, fmovr */
207 COSTS_N_INSNS (1), /* fmul */
208 COSTS_N_INSNS (8), /* fdivs */
209 COSTS_N_INSNS (12), /* fdivd */
210 COSTS_N_INSNS (17), /* fsqrts */
211 COSTS_N_INSNS (17), /* fsqrtd */
212 COSTS_N_INSNS (17), /* imul */
213 COSTS_N_INSNS (17), /* imulX */
214 0, /* imul bit factor */
215 COSTS_N_INSNS (17), /* idiv */
216 COSTS_N_INSNS (17), /* idivX */
217 COSTS_N_INSNS (1), /* movcc/movr */
218 0, /* shift penalty */
219 3 /* branch cost */
220 };
221
222 static const
223 struct processor_costs leon_costs = {
224 COSTS_N_INSNS (1), /* int load */
225 COSTS_N_INSNS (1), /* int signed load */
226 COSTS_N_INSNS (1), /* int zeroed load */
227 COSTS_N_INSNS (1), /* float load */
228 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
229 COSTS_N_INSNS (1), /* fadd, fsub */
230 COSTS_N_INSNS (1), /* fcmp */
231 COSTS_N_INSNS (1), /* fmov, fmovr */
232 COSTS_N_INSNS (1), /* fmul */
233 COSTS_N_INSNS (15), /* fdivs */
234 COSTS_N_INSNS (15), /* fdivd */
235 COSTS_N_INSNS (23), /* fsqrts */
236 COSTS_N_INSNS (23), /* fsqrtd */
237 COSTS_N_INSNS (5), /* imul */
238 COSTS_N_INSNS (5), /* imulX */
239 0, /* imul bit factor */
240 COSTS_N_INSNS (5), /* idiv */
241 COSTS_N_INSNS (5), /* idivX */
242 COSTS_N_INSNS (1), /* movcc/movr */
243 0, /* shift penalty */
244 3 /* branch cost */
245 };
246
247 static const
248 struct processor_costs leon3_costs = {
249 COSTS_N_INSNS (1), /* int load */
250 COSTS_N_INSNS (1), /* int signed load */
251 COSTS_N_INSNS (1), /* int zeroed load */
252 COSTS_N_INSNS (1), /* float load */
253 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
254 COSTS_N_INSNS (1), /* fadd, fsub */
255 COSTS_N_INSNS (1), /* fcmp */
256 COSTS_N_INSNS (1), /* fmov, fmovr */
257 COSTS_N_INSNS (1), /* fmul */
258 COSTS_N_INSNS (14), /* fdivs */
259 COSTS_N_INSNS (15), /* fdivd */
260 COSTS_N_INSNS (22), /* fsqrts */
261 COSTS_N_INSNS (23), /* fsqrtd */
262 COSTS_N_INSNS (5), /* imul */
263 COSTS_N_INSNS (5), /* imulX */
264 0, /* imul bit factor */
265 COSTS_N_INSNS (35), /* idiv */
266 COSTS_N_INSNS (35), /* idivX */
267 COSTS_N_INSNS (1), /* movcc/movr */
268 0, /* shift penalty */
269 3 /* branch cost */
270 };
271
272 static const
273 struct processor_costs sparclet_costs = {
274 COSTS_N_INSNS (3), /* int load */
275 COSTS_N_INSNS (3), /* int signed load */
276 COSTS_N_INSNS (1), /* int zeroed load */
277 COSTS_N_INSNS (1), /* float load */
278 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
279 COSTS_N_INSNS (1), /* fadd, fsub */
280 COSTS_N_INSNS (1), /* fcmp */
281 COSTS_N_INSNS (1), /* fmov, fmovr */
282 COSTS_N_INSNS (1), /* fmul */
283 COSTS_N_INSNS (1), /* fdivs */
284 COSTS_N_INSNS (1), /* fdivd */
285 COSTS_N_INSNS (1), /* fsqrts */
286 COSTS_N_INSNS (1), /* fsqrtd */
287 COSTS_N_INSNS (5), /* imul */
288 COSTS_N_INSNS (5), /* imulX */
289 0, /* imul bit factor */
290 COSTS_N_INSNS (5), /* idiv */
291 COSTS_N_INSNS (5), /* idivX */
292 COSTS_N_INSNS (1), /* movcc/movr */
293 0, /* shift penalty */
294 3 /* branch cost */
295 };
296
297 static const
298 struct processor_costs ultrasparc_costs = {
299 COSTS_N_INSNS (2), /* int load */
300 COSTS_N_INSNS (3), /* int signed load */
301 COSTS_N_INSNS (2), /* int zeroed load */
302 COSTS_N_INSNS (2), /* float load */
303 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
304 COSTS_N_INSNS (4), /* fadd, fsub */
305 COSTS_N_INSNS (1), /* fcmp */
306 COSTS_N_INSNS (2), /* fmov, fmovr */
307 COSTS_N_INSNS (4), /* fmul */
308 COSTS_N_INSNS (13), /* fdivs */
309 COSTS_N_INSNS (23), /* fdivd */
310 COSTS_N_INSNS (13), /* fsqrts */
311 COSTS_N_INSNS (23), /* fsqrtd */
312 COSTS_N_INSNS (4), /* imul */
313 COSTS_N_INSNS (4), /* imulX */
314 2, /* imul bit factor */
315 COSTS_N_INSNS (37), /* idiv */
316 COSTS_N_INSNS (68), /* idivX */
317 COSTS_N_INSNS (2), /* movcc/movr */
318 2, /* shift penalty */
319 2 /* branch cost */
320 };
321
322 static const
323 struct processor_costs ultrasparc3_costs = {
324 COSTS_N_INSNS (2), /* int load */
325 COSTS_N_INSNS (3), /* int signed load */
326 COSTS_N_INSNS (3), /* int zeroed load */
327 COSTS_N_INSNS (2), /* float load */
328 COSTS_N_INSNS (3), /* fmov, fneg, fabs */
329 COSTS_N_INSNS (4), /* fadd, fsub */
330 COSTS_N_INSNS (5), /* fcmp */
331 COSTS_N_INSNS (3), /* fmov, fmovr */
332 COSTS_N_INSNS (4), /* fmul */
333 COSTS_N_INSNS (17), /* fdivs */
334 COSTS_N_INSNS (20), /* fdivd */
335 COSTS_N_INSNS (20), /* fsqrts */
336 COSTS_N_INSNS (29), /* fsqrtd */
337 COSTS_N_INSNS (6), /* imul */
338 COSTS_N_INSNS (6), /* imulX */
339 0, /* imul bit factor */
340 COSTS_N_INSNS (40), /* idiv */
341 COSTS_N_INSNS (71), /* idivX */
342 COSTS_N_INSNS (2), /* movcc/movr */
343 0, /* shift penalty */
344 2 /* branch cost */
345 };
346
347 static const
348 struct processor_costs niagara_costs = {
349 COSTS_N_INSNS (3), /* int load */
350 COSTS_N_INSNS (3), /* int signed load */
351 COSTS_N_INSNS (3), /* int zeroed load */
352 COSTS_N_INSNS (9), /* float load */
353 COSTS_N_INSNS (8), /* fmov, fneg, fabs */
354 COSTS_N_INSNS (8), /* fadd, fsub */
355 COSTS_N_INSNS (26), /* fcmp */
356 COSTS_N_INSNS (8), /* fmov, fmovr */
357 COSTS_N_INSNS (29), /* fmul */
358 COSTS_N_INSNS (54), /* fdivs */
359 COSTS_N_INSNS (83), /* fdivd */
360 COSTS_N_INSNS (100), /* fsqrts - not implemented in hardware */
361 COSTS_N_INSNS (100), /* fsqrtd - not implemented in hardware */
362 COSTS_N_INSNS (11), /* imul */
363 COSTS_N_INSNS (11), /* imulX */
364 0, /* imul bit factor */
365 COSTS_N_INSNS (72), /* idiv */
366 COSTS_N_INSNS (72), /* idivX */
367 COSTS_N_INSNS (1), /* movcc/movr */
368 0, /* shift penalty */
369 4 /* branch cost */
370 };
371
372 static const
373 struct processor_costs niagara2_costs = {
374 COSTS_N_INSNS (3), /* int load */
375 COSTS_N_INSNS (3), /* int signed load */
376 COSTS_N_INSNS (3), /* int zeroed load */
377 COSTS_N_INSNS (3), /* float load */
378 COSTS_N_INSNS (6), /* fmov, fneg, fabs */
379 COSTS_N_INSNS (6), /* fadd, fsub */
380 COSTS_N_INSNS (6), /* fcmp */
381 COSTS_N_INSNS (6), /* fmov, fmovr */
382 COSTS_N_INSNS (6), /* fmul */
383 COSTS_N_INSNS (19), /* fdivs */
384 COSTS_N_INSNS (33), /* fdivd */
385 COSTS_N_INSNS (19), /* fsqrts */
386 COSTS_N_INSNS (33), /* fsqrtd */
387 COSTS_N_INSNS (5), /* imul */
388 COSTS_N_INSNS (5), /* imulX */
389 0, /* imul bit factor */
390 COSTS_N_INSNS (26), /* idiv, average of 12 - 41 cycle range */
391 COSTS_N_INSNS (26), /* idivX, average of 12 - 41 cycle range */
392 COSTS_N_INSNS (1), /* movcc/movr */
393 0, /* shift penalty */
394 5 /* branch cost */
395 };
396
397 static const
398 struct processor_costs niagara3_costs = {
399 COSTS_N_INSNS (3), /* int load */
400 COSTS_N_INSNS (3), /* int signed load */
401 COSTS_N_INSNS (3), /* int zeroed load */
402 COSTS_N_INSNS (3), /* float load */
403 COSTS_N_INSNS (9), /* fmov, fneg, fabs */
404 COSTS_N_INSNS (9), /* fadd, fsub */
405 COSTS_N_INSNS (9), /* fcmp */
406 COSTS_N_INSNS (9), /* fmov, fmovr */
407 COSTS_N_INSNS (9), /* fmul */
408 COSTS_N_INSNS (23), /* fdivs */
409 COSTS_N_INSNS (37), /* fdivd */
410 COSTS_N_INSNS (23), /* fsqrts */
411 COSTS_N_INSNS (37), /* fsqrtd */
412 COSTS_N_INSNS (9), /* imul */
413 COSTS_N_INSNS (9), /* imulX */
414 0, /* imul bit factor */
415 COSTS_N_INSNS (31), /* idiv, average of 17 - 45 cycle range */
416 COSTS_N_INSNS (30), /* idivX, average of 16 - 44 cycle range */
417 COSTS_N_INSNS (1), /* movcc/movr */
418 0, /* shift penalty */
419 5 /* branch cost */
420 };
421
422 static const
423 struct processor_costs niagara4_costs = {
424 COSTS_N_INSNS (5), /* int load */
425 COSTS_N_INSNS (5), /* int signed load */
426 COSTS_N_INSNS (5), /* int zeroed load */
427 COSTS_N_INSNS (5), /* float load */
428 COSTS_N_INSNS (11), /* fmov, fneg, fabs */
429 COSTS_N_INSNS (11), /* fadd, fsub */
430 COSTS_N_INSNS (11), /* fcmp */
431 COSTS_N_INSNS (11), /* fmov, fmovr */
432 COSTS_N_INSNS (11), /* fmul */
433 COSTS_N_INSNS (24), /* fdivs */
434 COSTS_N_INSNS (37), /* fdivd */
435 COSTS_N_INSNS (24), /* fsqrts */
436 COSTS_N_INSNS (37), /* fsqrtd */
437 COSTS_N_INSNS (12), /* imul */
438 COSTS_N_INSNS (12), /* imulX */
439 0, /* imul bit factor */
440 COSTS_N_INSNS (50), /* idiv, average of 41 - 60 cycle range */
441 COSTS_N_INSNS (35), /* idivX, average of 26 - 44 cycle range */
442 COSTS_N_INSNS (1), /* movcc/movr */
443 0, /* shift penalty */
444 2 /* branch cost */
445 };
446
447 static const
448 struct processor_costs niagara7_costs = {
449 COSTS_N_INSNS (5), /* int load */
450 COSTS_N_INSNS (5), /* int signed load */
451 COSTS_N_INSNS (5), /* int zeroed load */
452 COSTS_N_INSNS (5), /* float load */
453 COSTS_N_INSNS (11), /* fmov, fneg, fabs */
454 COSTS_N_INSNS (11), /* fadd, fsub */
455 COSTS_N_INSNS (11), /* fcmp */
456 COSTS_N_INSNS (11), /* fmov, fmovr */
457 COSTS_N_INSNS (11), /* fmul */
458 COSTS_N_INSNS (24), /* fdivs */
459 COSTS_N_INSNS (37), /* fdivd */
460 COSTS_N_INSNS (24), /* fsqrts */
461 COSTS_N_INSNS (37), /* fsqrtd */
462 COSTS_N_INSNS (12), /* imul */
463 COSTS_N_INSNS (12), /* imulX */
464 0, /* imul bit factor */
465 COSTS_N_INSNS (51), /* idiv, average of 42 - 61 cycle range */
466 COSTS_N_INSNS (35), /* idivX, average of 26 - 44 cycle range */
467 COSTS_N_INSNS (1), /* movcc/movr */
468 0, /* shift penalty */
469 1 /* branch cost */
470 };
471
472 static const
473 struct processor_costs m8_costs = {
474 COSTS_N_INSNS (3), /* int load */
475 COSTS_N_INSNS (3), /* int signed load */
476 COSTS_N_INSNS (3), /* int zeroed load */
477 COSTS_N_INSNS (3), /* float load */
478 COSTS_N_INSNS (9), /* fmov, fneg, fabs */
479 COSTS_N_INSNS (9), /* fadd, fsub */
480 COSTS_N_INSNS (9), /* fcmp */
481 COSTS_N_INSNS (9), /* fmov, fmovr */
482 COSTS_N_INSNS (9), /* fmul */
483 COSTS_N_INSNS (26), /* fdivs */
484 COSTS_N_INSNS (30), /* fdivd */
485 COSTS_N_INSNS (33), /* fsqrts */
486 COSTS_N_INSNS (41), /* fsqrtd */
487 COSTS_N_INSNS (12), /* imul */
488 COSTS_N_INSNS (10), /* imulX */
489 0, /* imul bit factor */
490 COSTS_N_INSNS (57), /* udiv/sdiv */
491 COSTS_N_INSNS (30), /* udivx/sdivx */
492 COSTS_N_INSNS (1), /* movcc/movr */
493 0, /* shift penalty */
494 1 /* branch cost */
495 };
496
497 static const struct processor_costs *sparc_costs = &cypress_costs;
498
499 #ifdef HAVE_AS_RELAX_OPTION
500 /* If 'as' and 'ld' are relaxing tail call insns into branch always, use
501 "or %o7,%g0,X; call Y; or X,%g0,%o7" always, so that it can be optimized.
502 With sethi/jmp, neither 'as' nor 'ld' has an easy way how to find out if
503 somebody does not branch between the sethi and jmp. */
504 #define LEAF_SIBCALL_SLOT_RESERVED_P 1
505 #else
506 #define LEAF_SIBCALL_SLOT_RESERVED_P \
507 ((TARGET_ARCH64 && !TARGET_CM_MEDLOW) || flag_pic)
508 #endif
509
510 /* Vector to say how input registers are mapped to output registers.
511 HARD_FRAME_POINTER_REGNUM cannot be remapped by this function to
512 eliminate it. You must use -fomit-frame-pointer to get that. */
513 char leaf_reg_remap[] =
514 { 0, 1, 2, 3, 4, 5, 6, 7,
515 -1, -1, -1, -1, -1, -1, 14, -1,
516 -1, -1, -1, -1, -1, -1, -1, -1,
517 8, 9, 10, 11, 12, 13, -1, 15,
518
519 32, 33, 34, 35, 36, 37, 38, 39,
520 40, 41, 42, 43, 44, 45, 46, 47,
521 48, 49, 50, 51, 52, 53, 54, 55,
522 56, 57, 58, 59, 60, 61, 62, 63,
523 64, 65, 66, 67, 68, 69, 70, 71,
524 72, 73, 74, 75, 76, 77, 78, 79,
525 80, 81, 82, 83, 84, 85, 86, 87,
526 88, 89, 90, 91, 92, 93, 94, 95,
527 96, 97, 98, 99, 100, 101, 102};
528
529 /* Vector, indexed by hard register number, which contains 1
530 for a register that is allowable in a candidate for leaf
531 function treatment. */
532 char sparc_leaf_regs[] =
533 { 1, 1, 1, 1, 1, 1, 1, 1,
534 0, 0, 0, 0, 0, 0, 1, 0,
535 0, 0, 0, 0, 0, 0, 0, 0,
536 1, 1, 1, 1, 1, 1, 0, 1,
537 1, 1, 1, 1, 1, 1, 1, 1,
538 1, 1, 1, 1, 1, 1, 1, 1,
539 1, 1, 1, 1, 1, 1, 1, 1,
540 1, 1, 1, 1, 1, 1, 1, 1,
541 1, 1, 1, 1, 1, 1, 1, 1,
542 1, 1, 1, 1, 1, 1, 1, 1,
543 1, 1, 1, 1, 1, 1, 1, 1,
544 1, 1, 1, 1, 1, 1, 1, 1,
545 1, 1, 1, 1, 1, 1, 1};
546
547 struct GTY(()) machine_function
548 {
549 /* Size of the frame of the function. */
550 HOST_WIDE_INT frame_size;
551
552 /* Size of the frame of the function minus the register window save area
553 and the outgoing argument area. */
554 HOST_WIDE_INT apparent_frame_size;
555
556 /* Register we pretend the frame pointer is allocated to. Normally, this
557 is %fp, but if we are in a leaf procedure, this is (%sp + offset). We
558 record "offset" separately as it may be too big for (reg + disp). */
559 rtx frame_base_reg;
560 HOST_WIDE_INT frame_base_offset;
561
562 /* Number of global or FP registers to be saved (as 4-byte quantities). */
563 int n_global_fp_regs;
564
565 /* True if the current function is leaf and uses only leaf regs,
566 so that the SPARC leaf function optimization can be applied.
567 Private version of crtl->uses_only_leaf_regs, see
568 sparc_expand_prologue for the rationale. */
569 int leaf_function_p;
570
571 /* True if the prologue saves local or in registers. */
572 bool save_local_in_regs_p;
573
574 /* True if the data calculated by sparc_expand_prologue are valid. */
575 bool prologue_data_valid_p;
576 };
577
578 #define sparc_frame_size cfun->machine->frame_size
579 #define sparc_apparent_frame_size cfun->machine->apparent_frame_size
580 #define sparc_frame_base_reg cfun->machine->frame_base_reg
581 #define sparc_frame_base_offset cfun->machine->frame_base_offset
582 #define sparc_n_global_fp_regs cfun->machine->n_global_fp_regs
583 #define sparc_leaf_function_p cfun->machine->leaf_function_p
584 #define sparc_save_local_in_regs_p cfun->machine->save_local_in_regs_p
585 #define sparc_prologue_data_valid_p cfun->machine->prologue_data_valid_p
586
587 /* 1 if the next opcode is to be specially indented. */
588 int sparc_indent_opcode = 0;
589
590 static void sparc_option_override (void);
591 static void sparc_init_modes (void);
592 static int function_arg_slotno (const CUMULATIVE_ARGS *, machine_mode,
593 const_tree, bool, bool, int *, int *);
594
595 static int supersparc_adjust_cost (rtx_insn *, int, rtx_insn *, int);
596 static int hypersparc_adjust_cost (rtx_insn *, int, rtx_insn *, int);
597
598 static void sparc_emit_set_const32 (rtx, rtx);
599 static void sparc_emit_set_const64 (rtx, rtx);
600 static void sparc_output_addr_vec (rtx);
601 static void sparc_output_addr_diff_vec (rtx);
602 static void sparc_output_deferred_case_vectors (void);
603 static bool sparc_legitimate_address_p (machine_mode, rtx, bool);
604 static bool sparc_legitimate_constant_p (machine_mode, rtx);
605 static rtx sparc_builtin_saveregs (void);
606 static int epilogue_renumber (rtx *, int);
607 static bool sparc_assemble_integer (rtx, unsigned int, int);
608 static int set_extends (rtx_insn *);
609 static void sparc_asm_function_prologue (FILE *);
610 static void sparc_asm_function_epilogue (FILE *);
611 #ifdef TARGET_SOLARIS
612 static void sparc_solaris_elf_asm_named_section (const char *, unsigned int,
613 tree) ATTRIBUTE_UNUSED;
614 #endif
615 static int sparc_adjust_cost (rtx_insn *, int, rtx_insn *, int, unsigned int);
616 static int sparc_issue_rate (void);
617 static void sparc_sched_init (FILE *, int, int);
618 static int sparc_use_sched_lookahead (void);
619
620 static void emit_soft_tfmode_libcall (const char *, int, rtx *);
621 static void emit_soft_tfmode_binop (enum rtx_code, rtx *);
622 static void emit_soft_tfmode_unop (enum rtx_code, rtx *);
623 static void emit_soft_tfmode_cvt (enum rtx_code, rtx *);
624 static void emit_hard_tfmode_operation (enum rtx_code, rtx *);
625
626 static bool sparc_function_ok_for_sibcall (tree, tree);
627 static void sparc_init_libfuncs (void);
628 static void sparc_init_builtins (void);
629 static void sparc_fpu_init_builtins (void);
630 static void sparc_vis_init_builtins (void);
631 static tree sparc_builtin_decl (unsigned, bool);
632 static rtx sparc_expand_builtin (tree, rtx, rtx, machine_mode, int);
633 static tree sparc_fold_builtin (tree, int, tree *, bool);
634 static void sparc_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
635 HOST_WIDE_INT, tree);
636 static bool sparc_can_output_mi_thunk (const_tree, HOST_WIDE_INT,
637 HOST_WIDE_INT, const_tree);
638 static struct machine_function * sparc_init_machine_status (void);
639 static bool sparc_cannot_force_const_mem (machine_mode, rtx);
640 static rtx sparc_tls_get_addr (void);
641 static rtx sparc_tls_got (void);
642 static int sparc_register_move_cost (machine_mode,
643 reg_class_t, reg_class_t);
644 static bool sparc_rtx_costs (rtx, machine_mode, int, int, int *, bool);
645 static machine_mode sparc_promote_function_mode (const_tree, machine_mode,
646 int *, const_tree, int);
647 static bool sparc_strict_argument_naming (cumulative_args_t);
648 static void sparc_va_start (tree, rtx);
649 static tree sparc_gimplify_va_arg (tree, tree, gimple_seq *, gimple_seq *);
650 static bool sparc_vector_mode_supported_p (machine_mode);
651 static bool sparc_tls_referenced_p (rtx);
652 static rtx sparc_legitimize_tls_address (rtx);
653 static rtx sparc_legitimize_pic_address (rtx, rtx);
654 static rtx sparc_legitimize_address (rtx, rtx, machine_mode);
655 static rtx sparc_delegitimize_address (rtx);
656 static bool sparc_mode_dependent_address_p (const_rtx, addr_space_t);
657 static bool sparc_pass_by_reference (cumulative_args_t,
658 const function_arg_info &);
659 static void sparc_function_arg_advance (cumulative_args_t,
660 const function_arg_info &);
661 static rtx sparc_function_arg (cumulative_args_t, const function_arg_info &);
662 static rtx sparc_function_incoming_arg (cumulative_args_t,
663 const function_arg_info &);
664 static pad_direction sparc_function_arg_padding (machine_mode, const_tree);
665 static unsigned int sparc_function_arg_boundary (machine_mode,
666 const_tree);
667 static int sparc_arg_partial_bytes (cumulative_args_t,
668 const function_arg_info &);
669 static bool sparc_return_in_memory (const_tree, const_tree);
670 static rtx sparc_struct_value_rtx (tree, int);
671 static rtx sparc_function_value (const_tree, const_tree, bool);
672 static rtx sparc_libcall_value (machine_mode, const_rtx);
673 static bool sparc_function_value_regno_p (const unsigned int);
674 static unsigned HOST_WIDE_INT sparc_asan_shadow_offset (void);
675 static void sparc_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
676 static void sparc_file_end (void);
677 static bool sparc_frame_pointer_required (void);
678 static bool sparc_can_eliminate (const int, const int);
679 static void sparc_conditional_register_usage (void);
680 static bool sparc_use_pseudo_pic_reg (void);
681 static void sparc_init_pic_reg (void);
682 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
683 static const char *sparc_mangle_type (const_tree);
684 #endif
685 static void sparc_trampoline_init (rtx, tree, rtx);
686 static machine_mode sparc_preferred_simd_mode (scalar_mode);
687 static reg_class_t sparc_preferred_reload_class (rtx x, reg_class_t rclass);
688 static bool sparc_lra_p (void);
689 static bool sparc_print_operand_punct_valid_p (unsigned char);
690 static void sparc_print_operand (FILE *, rtx, int);
691 static void sparc_print_operand_address (FILE *, machine_mode, rtx);
692 static reg_class_t sparc_secondary_reload (bool, rtx, reg_class_t,
693 machine_mode,
694 secondary_reload_info *);
695 static bool sparc_secondary_memory_needed (machine_mode, reg_class_t,
696 reg_class_t);
697 static machine_mode sparc_secondary_memory_needed_mode (machine_mode);
698 static scalar_int_mode sparc_cstore_mode (enum insn_code icode);
699 static void sparc_atomic_assign_expand_fenv (tree *, tree *, tree *);
700 static bool sparc_fixed_condition_code_regs (unsigned int *, unsigned int *);
701 static unsigned int sparc_min_arithmetic_precision (void);
702 static unsigned int sparc_hard_regno_nregs (unsigned int, machine_mode);
703 static bool sparc_hard_regno_mode_ok (unsigned int, machine_mode);
704 static bool sparc_modes_tieable_p (machine_mode, machine_mode);
705 static bool sparc_can_change_mode_class (machine_mode, machine_mode,
706 reg_class_t);
707 static HOST_WIDE_INT sparc_constant_alignment (const_tree, HOST_WIDE_INT);
708 static bool sparc_vectorize_vec_perm_const (machine_mode, rtx, rtx, rtx,
709 const vec_perm_indices &);
710 static bool sparc_can_follow_jump (const rtx_insn *, const rtx_insn *);
711 static HARD_REG_SET sparc_zero_call_used_regs (HARD_REG_SET);
712
713 #ifdef SUBTARGET_ATTRIBUTE_TABLE
714 /* Table of valid machine attributes. */
715 static const struct attribute_spec sparc_attribute_table[] =
716 {
717 /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
718 do_diagnostic, handler, exclude } */
719 SUBTARGET_ATTRIBUTE_TABLE,
720 { NULL, 0, 0, false, false, false, false, NULL, NULL }
721 };
722 #endif
723
724 char sparc_hard_reg_printed[8];
725
726 /* Initialize the GCC target structure. */
727
728 /* The default is to use .half rather than .short for aligned HI objects. */
729 #undef TARGET_ASM_ALIGNED_HI_OP
730 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
731
732 #undef TARGET_ASM_UNALIGNED_HI_OP
733 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uahalf\t"
734 #undef TARGET_ASM_UNALIGNED_SI_OP
735 #define TARGET_ASM_UNALIGNED_SI_OP "\t.uaword\t"
736 #undef TARGET_ASM_UNALIGNED_DI_OP
737 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaxword\t"
738
739 /* The target hook has to handle DI-mode values. */
740 #undef TARGET_ASM_INTEGER
741 #define TARGET_ASM_INTEGER sparc_assemble_integer
742
743 #undef TARGET_ASM_FUNCTION_PROLOGUE
744 #define TARGET_ASM_FUNCTION_PROLOGUE sparc_asm_function_prologue
745 #undef TARGET_ASM_FUNCTION_EPILOGUE
746 #define TARGET_ASM_FUNCTION_EPILOGUE sparc_asm_function_epilogue
747
748 #undef TARGET_SCHED_ADJUST_COST
749 #define TARGET_SCHED_ADJUST_COST sparc_adjust_cost
750 #undef TARGET_SCHED_ISSUE_RATE
751 #define TARGET_SCHED_ISSUE_RATE sparc_issue_rate
752 #undef TARGET_SCHED_INIT
753 #define TARGET_SCHED_INIT sparc_sched_init
754 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
755 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD sparc_use_sched_lookahead
756
757 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
758 #define TARGET_FUNCTION_OK_FOR_SIBCALL sparc_function_ok_for_sibcall
759
760 #undef TARGET_INIT_LIBFUNCS
761 #define TARGET_INIT_LIBFUNCS sparc_init_libfuncs
762
763 #undef TARGET_LEGITIMIZE_ADDRESS
764 #define TARGET_LEGITIMIZE_ADDRESS sparc_legitimize_address
765 #undef TARGET_DELEGITIMIZE_ADDRESS
766 #define TARGET_DELEGITIMIZE_ADDRESS sparc_delegitimize_address
767 #undef TARGET_MODE_DEPENDENT_ADDRESS_P
768 #define TARGET_MODE_DEPENDENT_ADDRESS_P sparc_mode_dependent_address_p
769
770 #undef TARGET_INIT_BUILTINS
771 #define TARGET_INIT_BUILTINS sparc_init_builtins
772 #undef TARGET_BUILTIN_DECL
773 #define TARGET_BUILTIN_DECL sparc_builtin_decl
774 #undef TARGET_EXPAND_BUILTIN
775 #define TARGET_EXPAND_BUILTIN sparc_expand_builtin
776 #undef TARGET_FOLD_BUILTIN
777 #define TARGET_FOLD_BUILTIN sparc_fold_builtin
778
779 #if TARGET_TLS
780 #undef TARGET_HAVE_TLS
781 #define TARGET_HAVE_TLS true
782 #endif
783
784 #undef TARGET_CANNOT_FORCE_CONST_MEM
785 #define TARGET_CANNOT_FORCE_CONST_MEM sparc_cannot_force_const_mem
786
787 #undef TARGET_ASM_OUTPUT_MI_THUNK
788 #define TARGET_ASM_OUTPUT_MI_THUNK sparc_output_mi_thunk
789 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
790 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK sparc_can_output_mi_thunk
791
792 #undef TARGET_RTX_COSTS
793 #define TARGET_RTX_COSTS sparc_rtx_costs
794 #undef TARGET_ADDRESS_COST
795 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
796 #undef TARGET_REGISTER_MOVE_COST
797 #define TARGET_REGISTER_MOVE_COST sparc_register_move_cost
798
799 #undef TARGET_PROMOTE_FUNCTION_MODE
800 #define TARGET_PROMOTE_FUNCTION_MODE sparc_promote_function_mode
801 #undef TARGET_STRICT_ARGUMENT_NAMING
802 #define TARGET_STRICT_ARGUMENT_NAMING sparc_strict_argument_naming
803
804 #undef TARGET_MUST_PASS_IN_STACK
805 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
806 #undef TARGET_PASS_BY_REFERENCE
807 #define TARGET_PASS_BY_REFERENCE sparc_pass_by_reference
808 #undef TARGET_ARG_PARTIAL_BYTES
809 #define TARGET_ARG_PARTIAL_BYTES sparc_arg_partial_bytes
810 #undef TARGET_FUNCTION_ARG_ADVANCE
811 #define TARGET_FUNCTION_ARG_ADVANCE sparc_function_arg_advance
812 #undef TARGET_FUNCTION_ARG
813 #define TARGET_FUNCTION_ARG sparc_function_arg
814 #undef TARGET_FUNCTION_INCOMING_ARG
815 #define TARGET_FUNCTION_INCOMING_ARG sparc_function_incoming_arg
816 #undef TARGET_FUNCTION_ARG_PADDING
817 #define TARGET_FUNCTION_ARG_PADDING sparc_function_arg_padding
818 #undef TARGET_FUNCTION_ARG_BOUNDARY
819 #define TARGET_FUNCTION_ARG_BOUNDARY sparc_function_arg_boundary
820
821 #undef TARGET_RETURN_IN_MEMORY
822 #define TARGET_RETURN_IN_MEMORY sparc_return_in_memory
823 #undef TARGET_STRUCT_VALUE_RTX
824 #define TARGET_STRUCT_VALUE_RTX sparc_struct_value_rtx
825 #undef TARGET_FUNCTION_VALUE
826 #define TARGET_FUNCTION_VALUE sparc_function_value
827 #undef TARGET_LIBCALL_VALUE
828 #define TARGET_LIBCALL_VALUE sparc_libcall_value
829 #undef TARGET_FUNCTION_VALUE_REGNO_P
830 #define TARGET_FUNCTION_VALUE_REGNO_P sparc_function_value_regno_p
831
832 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
833 #define TARGET_EXPAND_BUILTIN_SAVEREGS sparc_builtin_saveregs
834
835 #undef TARGET_ASAN_SHADOW_OFFSET
836 #define TARGET_ASAN_SHADOW_OFFSET sparc_asan_shadow_offset
837
838 #undef TARGET_EXPAND_BUILTIN_VA_START
839 #define TARGET_EXPAND_BUILTIN_VA_START sparc_va_start
840 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
841 #define TARGET_GIMPLIFY_VA_ARG_EXPR sparc_gimplify_va_arg
842
843 #undef TARGET_VECTOR_MODE_SUPPORTED_P
844 #define TARGET_VECTOR_MODE_SUPPORTED_P sparc_vector_mode_supported_p
845
846 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
847 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE sparc_preferred_simd_mode
848
849 #ifdef SUBTARGET_INSERT_ATTRIBUTES
850 #undef TARGET_INSERT_ATTRIBUTES
851 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
852 #endif
853
854 #ifdef SUBTARGET_ATTRIBUTE_TABLE
855 #undef TARGET_ATTRIBUTE_TABLE
856 #define TARGET_ATTRIBUTE_TABLE sparc_attribute_table
857 #endif
858
859 #undef TARGET_OPTION_OVERRIDE
860 #define TARGET_OPTION_OVERRIDE sparc_option_override
861
862 #ifdef TARGET_THREAD_SSP_OFFSET
863 #undef TARGET_STACK_PROTECT_GUARD
864 #define TARGET_STACK_PROTECT_GUARD hook_tree_void_null
865 #endif
866
867 #if TARGET_GNU_TLS && defined(HAVE_AS_SPARC_UA_PCREL)
868 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
869 #define TARGET_ASM_OUTPUT_DWARF_DTPREL sparc_output_dwarf_dtprel
870 #endif
871
872 #undef TARGET_ASM_FILE_END
873 #define TARGET_ASM_FILE_END sparc_file_end
874
875 #undef TARGET_FRAME_POINTER_REQUIRED
876 #define TARGET_FRAME_POINTER_REQUIRED sparc_frame_pointer_required
877
878 #undef TARGET_CAN_ELIMINATE
879 #define TARGET_CAN_ELIMINATE sparc_can_eliminate
880
881 #undef TARGET_PREFERRED_RELOAD_CLASS
882 #define TARGET_PREFERRED_RELOAD_CLASS sparc_preferred_reload_class
883
884 #undef TARGET_SECONDARY_RELOAD
885 #define TARGET_SECONDARY_RELOAD sparc_secondary_reload
886 #undef TARGET_SECONDARY_MEMORY_NEEDED
887 #define TARGET_SECONDARY_MEMORY_NEEDED sparc_secondary_memory_needed
888 #undef TARGET_SECONDARY_MEMORY_NEEDED_MODE
889 #define TARGET_SECONDARY_MEMORY_NEEDED_MODE sparc_secondary_memory_needed_mode
890
891 #undef TARGET_CONDITIONAL_REGISTER_USAGE
892 #define TARGET_CONDITIONAL_REGISTER_USAGE sparc_conditional_register_usage
893
894 #undef TARGET_INIT_PIC_REG
895 #define TARGET_INIT_PIC_REG sparc_init_pic_reg
896
897 #undef TARGET_USE_PSEUDO_PIC_REG
898 #define TARGET_USE_PSEUDO_PIC_REG sparc_use_pseudo_pic_reg
899
900 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
901 #undef TARGET_MANGLE_TYPE
902 #define TARGET_MANGLE_TYPE sparc_mangle_type
903 #endif
904
905 #undef TARGET_LRA_P
906 #define TARGET_LRA_P sparc_lra_p
907
908 #undef TARGET_LEGITIMATE_ADDRESS_P
909 #define TARGET_LEGITIMATE_ADDRESS_P sparc_legitimate_address_p
910
911 #undef TARGET_LEGITIMATE_CONSTANT_P
912 #define TARGET_LEGITIMATE_CONSTANT_P sparc_legitimate_constant_p
913
914 #undef TARGET_TRAMPOLINE_INIT
915 #define TARGET_TRAMPOLINE_INIT sparc_trampoline_init
916
917 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
918 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P sparc_print_operand_punct_valid_p
919 #undef TARGET_PRINT_OPERAND
920 #define TARGET_PRINT_OPERAND sparc_print_operand
921 #undef TARGET_PRINT_OPERAND_ADDRESS
922 #define TARGET_PRINT_OPERAND_ADDRESS sparc_print_operand_address
923
924 /* The value stored by LDSTUB. */
925 #undef TARGET_ATOMIC_TEST_AND_SET_TRUEVAL
926 #define TARGET_ATOMIC_TEST_AND_SET_TRUEVAL 0xff
927
928 #undef TARGET_CSTORE_MODE
929 #define TARGET_CSTORE_MODE sparc_cstore_mode
930
931 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
932 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV sparc_atomic_assign_expand_fenv
933
934 #undef TARGET_FIXED_CONDITION_CODE_REGS
935 #define TARGET_FIXED_CONDITION_CODE_REGS sparc_fixed_condition_code_regs
936
937 #undef TARGET_MIN_ARITHMETIC_PRECISION
938 #define TARGET_MIN_ARITHMETIC_PRECISION sparc_min_arithmetic_precision
939
940 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
941 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
942
943 #undef TARGET_HARD_REGNO_NREGS
944 #define TARGET_HARD_REGNO_NREGS sparc_hard_regno_nregs
945 #undef TARGET_HARD_REGNO_MODE_OK
946 #define TARGET_HARD_REGNO_MODE_OK sparc_hard_regno_mode_ok
947
948 #undef TARGET_MODES_TIEABLE_P
949 #define TARGET_MODES_TIEABLE_P sparc_modes_tieable_p
950
951 #undef TARGET_CAN_CHANGE_MODE_CLASS
952 #define TARGET_CAN_CHANGE_MODE_CLASS sparc_can_change_mode_class
953
954 #undef TARGET_CONSTANT_ALIGNMENT
955 #define TARGET_CONSTANT_ALIGNMENT sparc_constant_alignment
956
957 #undef TARGET_VECTORIZE_VEC_PERM_CONST
958 #define TARGET_VECTORIZE_VEC_PERM_CONST sparc_vectorize_vec_perm_const
959
960 #undef TARGET_CAN_FOLLOW_JUMP
961 #define TARGET_CAN_FOLLOW_JUMP sparc_can_follow_jump
962
963 #undef TARGET_ZERO_CALL_USED_REGS
964 #define TARGET_ZERO_CALL_USED_REGS sparc_zero_call_used_regs
965
966 struct gcc_target targetm = TARGET_INITIALIZER;
967
968 /* Return the memory reference contained in X if any, zero otherwise. */
969
970 static rtx
mem_ref(rtx x)971 mem_ref (rtx x)
972 {
973 if (GET_CODE (x) == SIGN_EXTEND || GET_CODE (x) == ZERO_EXTEND)
974 x = XEXP (x, 0);
975
976 if (MEM_P (x))
977 return x;
978
979 return NULL_RTX;
980 }
981
982 /* True if any of INSN's source register(s) is REG. */
983
984 static bool
insn_uses_reg_p(rtx_insn * insn,unsigned int reg)985 insn_uses_reg_p (rtx_insn *insn, unsigned int reg)
986 {
987 extract_insn (insn);
988 return ((REG_P (recog_data.operand[1])
989 && REGNO (recog_data.operand[1]) == reg)
990 || (recog_data.n_operands == 3
991 && REG_P (recog_data.operand[2])
992 && REGNO (recog_data.operand[2]) == reg));
993 }
994
995 /* True if INSN is a floating-point division or square-root. */
996
997 static bool
div_sqrt_insn_p(rtx_insn * insn)998 div_sqrt_insn_p (rtx_insn *insn)
999 {
1000 if (GET_CODE (PATTERN (insn)) != SET)
1001 return false;
1002
1003 switch (get_attr_type (insn))
1004 {
1005 case TYPE_FPDIVS:
1006 case TYPE_FPSQRTS:
1007 case TYPE_FPDIVD:
1008 case TYPE_FPSQRTD:
1009 return true;
1010 default:
1011 return false;
1012 }
1013 }
1014
1015 /* True if INSN is a floating-point instruction. */
1016
1017 static bool
fpop_insn_p(rtx_insn * insn)1018 fpop_insn_p (rtx_insn *insn)
1019 {
1020 if (GET_CODE (PATTERN (insn)) != SET)
1021 return false;
1022
1023 switch (get_attr_type (insn))
1024 {
1025 case TYPE_FPMOVE:
1026 case TYPE_FPCMOVE:
1027 case TYPE_FP:
1028 case TYPE_FPCMP:
1029 case TYPE_FPMUL:
1030 case TYPE_FPDIVS:
1031 case TYPE_FPSQRTS:
1032 case TYPE_FPDIVD:
1033 case TYPE_FPSQRTD:
1034 return true;
1035 default:
1036 return false;
1037 }
1038 }
1039
1040 /* True if INSN is an atomic instruction. */
1041
1042 static bool
atomic_insn_for_leon3_p(rtx_insn * insn)1043 atomic_insn_for_leon3_p (rtx_insn *insn)
1044 {
1045 switch (INSN_CODE (insn))
1046 {
1047 case CODE_FOR_swapsi:
1048 case CODE_FOR_ldstub:
1049 case CODE_FOR_atomic_compare_and_swap_leon3_1:
1050 return true;
1051 default:
1052 return false;
1053 }
1054 }
1055
1056 /* We use a machine specific pass to enable workarounds for errata.
1057
1058 We need to have the (essentially) final form of the insn stream in order
1059 to properly detect the various hazards. Therefore, this machine specific
1060 pass runs as late as possible. */
1061
1062 /* True if INSN is a md pattern or asm statement. */
1063 #define USEFUL_INSN_P(INSN) \
1064 (NONDEBUG_INSN_P (INSN) \
1065 && GET_CODE (PATTERN (INSN)) != USE \
1066 && GET_CODE (PATTERN (INSN)) != CLOBBER)
1067
1068 static unsigned int
sparc_do_work_around_errata(void)1069 sparc_do_work_around_errata (void)
1070 {
1071 rtx_insn *insn, *next;
1072
1073 /* Force all instructions to be split into their final form. */
1074 split_all_insns_noflow ();
1075
1076 /* Now look for specific patterns in the insn stream. */
1077 for (insn = get_insns (); insn; insn = next)
1078 {
1079 bool insert_nop = false;
1080 rtx set;
1081 rtx_insn *jump;
1082 rtx_sequence *seq;
1083
1084 /* Look into the instruction in a delay slot. */
1085 if (NONJUMP_INSN_P (insn)
1086 && (seq = dyn_cast <rtx_sequence *> (PATTERN (insn))))
1087 {
1088 jump = seq->insn (0);
1089 insn = seq->insn (1);
1090 }
1091 else if (JUMP_P (insn))
1092 jump = insn;
1093 else
1094 jump = NULL;
1095
1096 /* Place a NOP at the branch target of an integer branch if it is a
1097 floating-point operation or a floating-point branch. */
1098 if (sparc_fix_gr712rc
1099 && jump
1100 && jump_to_label_p (jump)
1101 && get_attr_branch_type (jump) == BRANCH_TYPE_ICC)
1102 {
1103 rtx_insn *target = next_active_insn (JUMP_LABEL_AS_INSN (jump));
1104 if (target
1105 && (fpop_insn_p (target)
1106 || (JUMP_P (target)
1107 && get_attr_branch_type (target) == BRANCH_TYPE_FCC)))
1108 emit_insn_before (gen_nop (), target);
1109 }
1110
1111 /* Insert a NOP between load instruction and atomic instruction. Insert
1112 a NOP at branch target if there is a load in delay slot and an atomic
1113 instruction at branch target. */
1114 if (sparc_fix_ut700
1115 && NONJUMP_INSN_P (insn)
1116 && (set = single_set (insn)) != NULL_RTX
1117 && mem_ref (SET_SRC (set))
1118 && REG_P (SET_DEST (set)))
1119 {
1120 if (jump && jump_to_label_p (jump))
1121 {
1122 rtx_insn *target = next_active_insn (JUMP_LABEL_AS_INSN (jump));
1123 if (target && atomic_insn_for_leon3_p (target))
1124 emit_insn_before (gen_nop (), target);
1125 }
1126
1127 next = next_active_insn (insn);
1128 if (!next)
1129 break;
1130
1131 if (atomic_insn_for_leon3_p (next))
1132 insert_nop = true;
1133 }
1134
1135 /* Look for a sequence that starts with a fdiv or fsqrt instruction and
1136 ends with another fdiv or fsqrt instruction with no dependencies on
1137 the former, along with an appropriate pattern in between. */
1138 if (sparc_fix_lost_divsqrt
1139 && NONJUMP_INSN_P (insn)
1140 && div_sqrt_insn_p (insn))
1141 {
1142 int i;
1143 int fp_found = 0;
1144 rtx_insn *after;
1145
1146 const unsigned int dest_reg = REGNO (SET_DEST (single_set (insn)));
1147
1148 next = next_active_insn (insn);
1149 if (!next)
1150 break;
1151
1152 for (after = next, i = 0; i < 4; i++)
1153 {
1154 /* Count floating-point operations. */
1155 if (i != 3 && fpop_insn_p (after))
1156 {
1157 /* If the insn uses the destination register of
1158 the div/sqrt, then it cannot be problematic. */
1159 if (insn_uses_reg_p (after, dest_reg))
1160 break;
1161 fp_found++;
1162 }
1163
1164 /* Count floating-point loads. */
1165 if (i != 3
1166 && (set = single_set (after)) != NULL_RTX
1167 && REG_P (SET_DEST (set))
1168 && REGNO (SET_DEST (set)) > 31)
1169 {
1170 /* If the insn uses the destination register of
1171 the div/sqrt, then it cannot be problematic. */
1172 if (REGNO (SET_DEST (set)) == dest_reg)
1173 break;
1174 fp_found++;
1175 }
1176
1177 /* Check if this is a problematic sequence. */
1178 if (i > 1
1179 && fp_found >= 2
1180 && div_sqrt_insn_p (after))
1181 {
1182 /* If this is the short version of the problematic
1183 sequence we add two NOPs in a row to also prevent
1184 the long version. */
1185 if (i == 2)
1186 emit_insn_before (gen_nop (), next);
1187 insert_nop = true;
1188 break;
1189 }
1190
1191 /* No need to scan past a second div/sqrt. */
1192 if (div_sqrt_insn_p (after))
1193 break;
1194
1195 /* Insert NOP before branch. */
1196 if (i < 3
1197 && (!NONJUMP_INSN_P (after)
1198 || GET_CODE (PATTERN (after)) == SEQUENCE))
1199 {
1200 insert_nop = true;
1201 break;
1202 }
1203
1204 after = next_active_insn (after);
1205 if (!after)
1206 break;
1207 }
1208 }
1209
1210 /* Look for either of these two sequences:
1211
1212 Sequence A:
1213 1. store of word size or less (e.g. st / stb / sth / stf)
1214 2. any single instruction that is not a load or store
1215 3. any store instruction (e.g. st / stb / sth / stf / std / stdf)
1216
1217 Sequence B:
1218 1. store of double word size (e.g. std / stdf)
1219 2. any store instruction (e.g. st / stb / sth / stf / std / stdf) */
1220 if (sparc_fix_b2bst
1221 && NONJUMP_INSN_P (insn)
1222 && (set = single_set (insn)) != NULL_RTX
1223 && MEM_P (SET_DEST (set)))
1224 {
1225 /* Sequence B begins with a double-word store. */
1226 bool seq_b = GET_MODE_SIZE (GET_MODE (SET_DEST (set))) == 8;
1227 rtx_insn *after;
1228 int i;
1229
1230 next = next_active_insn (insn);
1231 if (!next)
1232 break;
1233
1234 for (after = next, i = 0; i < 2; i++)
1235 {
1236 /* Skip empty assembly statements. */
1237 if ((GET_CODE (PATTERN (after)) == UNSPEC_VOLATILE)
1238 || (USEFUL_INSN_P (after)
1239 && (asm_noperands (PATTERN (after))>=0)
1240 && !strcmp (decode_asm_operands (PATTERN (after),
1241 NULL, NULL, NULL,
1242 NULL, NULL), "")))
1243 after = next_active_insn (after);
1244 if (!after)
1245 break;
1246
1247 /* If the insn is a branch, then it cannot be problematic. */
1248 if (!NONJUMP_INSN_P (after)
1249 || GET_CODE (PATTERN (after)) == SEQUENCE)
1250 break;
1251
1252 /* Sequence B is only two instructions long. */
1253 if (seq_b)
1254 {
1255 /* Add NOP if followed by a store. */
1256 if ((set = single_set (after)) != NULL_RTX
1257 && MEM_P (SET_DEST (set)))
1258 insert_nop = true;
1259
1260 /* Otherwise it is ok. */
1261 break;
1262 }
1263
1264 /* If the second instruction is a load or a store,
1265 then the sequence cannot be problematic. */
1266 if (i == 0)
1267 {
1268 if ((set = single_set (after)) != NULL_RTX
1269 && (MEM_P (SET_DEST (set)) || mem_ref (SET_SRC (set))))
1270 break;
1271
1272 after = next_active_insn (after);
1273 if (!after)
1274 break;
1275 }
1276
1277 /* Add NOP if third instruction is a store. */
1278 if (i == 1
1279 && (set = single_set (after)) != NULL_RTX
1280 && MEM_P (SET_DEST (set)))
1281 insert_nop = true;
1282 }
1283 }
1284
1285 /* Look for a single-word load into an odd-numbered FP register. */
1286 else if (sparc_fix_at697f
1287 && NONJUMP_INSN_P (insn)
1288 && (set = single_set (insn)) != NULL_RTX
1289 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
1290 && mem_ref (SET_SRC (set))
1291 && REG_P (SET_DEST (set))
1292 && REGNO (SET_DEST (set)) > 31
1293 && REGNO (SET_DEST (set)) % 2 != 0)
1294 {
1295 /* The wrong dependency is on the enclosing double register. */
1296 const unsigned int x = REGNO (SET_DEST (set)) - 1;
1297 unsigned int src1, src2, dest;
1298 int code;
1299
1300 next = next_active_insn (insn);
1301 if (!next)
1302 break;
1303 /* If the insn is a branch, then it cannot be problematic. */
1304 if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE)
1305 continue;
1306
1307 extract_insn (next);
1308 code = INSN_CODE (next);
1309
1310 switch (code)
1311 {
1312 case CODE_FOR_adddf3:
1313 case CODE_FOR_subdf3:
1314 case CODE_FOR_muldf3:
1315 case CODE_FOR_divdf3:
1316 dest = REGNO (recog_data.operand[0]);
1317 src1 = REGNO (recog_data.operand[1]);
1318 src2 = REGNO (recog_data.operand[2]);
1319 if (src1 != src2)
1320 {
1321 /* Case [1-4]:
1322 ld [address], %fx+1
1323 FPOPd %f{x,y}, %f{y,x}, %f{x,y} */
1324 if ((src1 == x || src2 == x)
1325 && (dest == src1 || dest == src2))
1326 insert_nop = true;
1327 }
1328 else
1329 {
1330 /* Case 5:
1331 ld [address], %fx+1
1332 FPOPd %fx, %fx, %fx */
1333 if (src1 == x
1334 && dest == src1
1335 && (code == CODE_FOR_adddf3 || code == CODE_FOR_muldf3))
1336 insert_nop = true;
1337 }
1338 break;
1339
1340 case CODE_FOR_sqrtdf2:
1341 dest = REGNO (recog_data.operand[0]);
1342 src1 = REGNO (recog_data.operand[1]);
1343 /* Case 6:
1344 ld [address], %fx+1
1345 fsqrtd %fx, %fx */
1346 if (src1 == x && dest == src1)
1347 insert_nop = true;
1348 break;
1349
1350 default:
1351 break;
1352 }
1353 }
1354
1355 /* Look for a single-word load into an integer register. */
1356 else if (sparc_fix_ut699
1357 && NONJUMP_INSN_P (insn)
1358 && (set = single_set (insn)) != NULL_RTX
1359 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) <= 4
1360 && (mem_ref (SET_SRC (set)) != NULL_RTX
1361 || INSN_CODE (insn) == CODE_FOR_movsi_pic_gotdata_op)
1362 && REG_P (SET_DEST (set))
1363 && REGNO (SET_DEST (set)) < 32)
1364 {
1365 /* There is no problem if the second memory access has a data
1366 dependency on the first single-cycle load. */
1367 rtx x = SET_DEST (set);
1368
1369 next = next_active_insn (insn);
1370 if (!next)
1371 break;
1372 /* If the insn is a branch, then it cannot be problematic. */
1373 if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE)
1374 continue;
1375
1376 /* Look for a second memory access to/from an integer register. */
1377 if ((set = single_set (next)) != NULL_RTX)
1378 {
1379 rtx src = SET_SRC (set);
1380 rtx dest = SET_DEST (set);
1381 rtx mem;
1382
1383 /* LDD is affected. */
1384 if ((mem = mem_ref (src)) != NULL_RTX
1385 && REG_P (dest)
1386 && REGNO (dest) < 32
1387 && !reg_mentioned_p (x, XEXP (mem, 0)))
1388 insert_nop = true;
1389
1390 /* STD is *not* affected. */
1391 else if (MEM_P (dest)
1392 && GET_MODE_SIZE (GET_MODE (dest)) <= 4
1393 && (src == CONST0_RTX (GET_MODE (dest))
1394 || (REG_P (src)
1395 && REGNO (src) < 32
1396 && REGNO (src) != REGNO (x)))
1397 && !reg_mentioned_p (x, XEXP (dest, 0)))
1398 insert_nop = true;
1399
1400 /* GOT accesses uses LD. */
1401 else if (INSN_CODE (next) == CODE_FOR_movsi_pic_gotdata_op
1402 && !reg_mentioned_p (x, XEXP (XEXP (src, 0), 1)))
1403 insert_nop = true;
1404 }
1405 }
1406
1407 /* Look for a single-word load/operation into an FP register. */
1408 else if (sparc_fix_ut699
1409 && NONJUMP_INSN_P (insn)
1410 && (set = single_set (insn)) != NULL_RTX
1411 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
1412 && REG_P (SET_DEST (set))
1413 && REGNO (SET_DEST (set)) > 31)
1414 {
1415 /* Number of instructions in the problematic window. */
1416 const int n_insns = 4;
1417 /* The problematic combination is with the sibling FP register. */
1418 const unsigned int x = REGNO (SET_DEST (set));
1419 const unsigned int y = x ^ 1;
1420 rtx_insn *after;
1421 int i;
1422
1423 next = next_active_insn (insn);
1424 if (!next)
1425 break;
1426 /* If the insn is a branch, then it cannot be problematic. */
1427 if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE)
1428 continue;
1429
1430 /* Look for a second load/operation into the sibling FP register. */
1431 if (!((set = single_set (next)) != NULL_RTX
1432 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
1433 && REG_P (SET_DEST (set))
1434 && REGNO (SET_DEST (set)) == y))
1435 continue;
1436
1437 /* Look for a (possible) store from the FP register in the next N
1438 instructions, but bail out if it is again modified or if there
1439 is a store from the sibling FP register before this store. */
1440 for (after = next, i = 0; i < n_insns; i++)
1441 {
1442 bool branch_p;
1443
1444 after = next_active_insn (after);
1445 if (!after)
1446 break;
1447
1448 /* This is a branch with an empty delay slot. */
1449 if (!NONJUMP_INSN_P (after))
1450 {
1451 if (++i == n_insns)
1452 break;
1453 branch_p = true;
1454 after = NULL;
1455 }
1456 /* This is a branch with a filled delay slot. */
1457 else if (rtx_sequence *seq =
1458 dyn_cast <rtx_sequence *> (PATTERN (after)))
1459 {
1460 if (++i == n_insns)
1461 break;
1462 branch_p = true;
1463 after = seq->insn (1);
1464 }
1465 /* This is a regular instruction. */
1466 else
1467 branch_p = false;
1468
1469 if (after && (set = single_set (after)) != NULL_RTX)
1470 {
1471 const rtx src = SET_SRC (set);
1472 const rtx dest = SET_DEST (set);
1473 const unsigned int size = GET_MODE_SIZE (GET_MODE (dest));
1474
1475 /* If the FP register is again modified before the store,
1476 then the store isn't affected. */
1477 if (REG_P (dest)
1478 && (REGNO (dest) == x
1479 || (REGNO (dest) == y && size == 8)))
1480 break;
1481
1482 if (MEM_P (dest) && REG_P (src))
1483 {
1484 /* If there is a store from the sibling FP register
1485 before the store, then the store is not affected. */
1486 if (REGNO (src) == y || (REGNO (src) == x && size == 8))
1487 break;
1488
1489 /* Otherwise, the store is affected. */
1490 if (REGNO (src) == x && size == 4)
1491 {
1492 insert_nop = true;
1493 break;
1494 }
1495 }
1496 }
1497
1498 /* If we have a branch in the first M instructions, then we
1499 cannot see the (M+2)th instruction so we play safe. */
1500 if (branch_p && i <= (n_insns - 2))
1501 {
1502 insert_nop = true;
1503 break;
1504 }
1505 }
1506 }
1507
1508 else
1509 next = NEXT_INSN (insn);
1510
1511 if (insert_nop)
1512 emit_insn_before (gen_nop (), next);
1513 }
1514
1515 return 0;
1516 }
1517
1518 namespace {
1519
1520 const pass_data pass_data_work_around_errata =
1521 {
1522 RTL_PASS, /* type */
1523 "errata", /* name */
1524 OPTGROUP_NONE, /* optinfo_flags */
1525 TV_MACH_DEP, /* tv_id */
1526 0, /* properties_required */
1527 0, /* properties_provided */
1528 0, /* properties_destroyed */
1529 0, /* todo_flags_start */
1530 0, /* todo_flags_finish */
1531 };
1532
1533 class pass_work_around_errata : public rtl_opt_pass
1534 {
1535 public:
pass_work_around_errata(gcc::context * ctxt)1536 pass_work_around_errata(gcc::context *ctxt)
1537 : rtl_opt_pass(pass_data_work_around_errata, ctxt)
1538 {}
1539
1540 /* opt_pass methods: */
gate(function *)1541 virtual bool gate (function *)
1542 {
1543 return sparc_fix_at697f
1544 || sparc_fix_ut699 || sparc_fix_ut700 || sparc_fix_gr712rc
1545 || sparc_fix_b2bst || sparc_fix_lost_divsqrt;
1546 }
1547
execute(function *)1548 virtual unsigned int execute (function *)
1549 {
1550 return sparc_do_work_around_errata ();
1551 }
1552
1553 }; // class pass_work_around_errata
1554
1555 } // anon namespace
1556
1557 rtl_opt_pass *
make_pass_work_around_errata(gcc::context * ctxt)1558 make_pass_work_around_errata (gcc::context *ctxt)
1559 {
1560 return new pass_work_around_errata (ctxt);
1561 }
1562
1563 /* Helpers for TARGET_DEBUG_OPTIONS. */
1564 static void
dump_target_flag_bits(const int flags)1565 dump_target_flag_bits (const int flags)
1566 {
1567 if (flags & MASK_64BIT)
1568 fprintf (stderr, "64BIT ");
1569 if (flags & MASK_APP_REGS)
1570 fprintf (stderr, "APP_REGS ");
1571 if (flags & MASK_FASTER_STRUCTS)
1572 fprintf (stderr, "FASTER_STRUCTS ");
1573 if (flags & MASK_FLAT)
1574 fprintf (stderr, "FLAT ");
1575 if (flags & MASK_FMAF)
1576 fprintf (stderr, "FMAF ");
1577 if (flags & MASK_FSMULD)
1578 fprintf (stderr, "FSMULD ");
1579 if (flags & MASK_FPU)
1580 fprintf (stderr, "FPU ");
1581 if (flags & MASK_HARD_QUAD)
1582 fprintf (stderr, "HARD_QUAD ");
1583 if (flags & MASK_POPC)
1584 fprintf (stderr, "POPC ");
1585 if (flags & MASK_PTR64)
1586 fprintf (stderr, "PTR64 ");
1587 if (flags & MASK_STACK_BIAS)
1588 fprintf (stderr, "STACK_BIAS ");
1589 if (flags & MASK_UNALIGNED_DOUBLES)
1590 fprintf (stderr, "UNALIGNED_DOUBLES ");
1591 if (flags & MASK_V8PLUS)
1592 fprintf (stderr, "V8PLUS ");
1593 if (flags & MASK_VIS)
1594 fprintf (stderr, "VIS ");
1595 if (flags & MASK_VIS2)
1596 fprintf (stderr, "VIS2 ");
1597 if (flags & MASK_VIS3)
1598 fprintf (stderr, "VIS3 ");
1599 if (flags & MASK_VIS4)
1600 fprintf (stderr, "VIS4 ");
1601 if (flags & MASK_VIS4B)
1602 fprintf (stderr, "VIS4B ");
1603 if (flags & MASK_CBCOND)
1604 fprintf (stderr, "CBCOND ");
1605 if (flags & MASK_DEPRECATED_V8_INSNS)
1606 fprintf (stderr, "DEPRECATED_V8_INSNS ");
1607 if (flags & MASK_SPARCLET)
1608 fprintf (stderr, "SPARCLET ");
1609 if (flags & MASK_SPARCLITE)
1610 fprintf (stderr, "SPARCLITE ");
1611 if (flags & MASK_V8)
1612 fprintf (stderr, "V8 ");
1613 if (flags & MASK_V9)
1614 fprintf (stderr, "V9 ");
1615 }
1616
1617 static void
dump_target_flags(const char * prefix,const int flags)1618 dump_target_flags (const char *prefix, const int flags)
1619 {
1620 fprintf (stderr, "%s: (%08x) [ ", prefix, flags);
1621 dump_target_flag_bits (flags);
1622 fprintf(stderr, "]\n");
1623 }
1624
1625 /* Validate and override various options, and do some machine dependent
1626 initialization. */
1627
1628 static void
sparc_option_override(void)1629 sparc_option_override (void)
1630 {
1631 /* Map TARGET_CPU_DEFAULT to value for -m{cpu,tune}=. */
1632 static struct cpu_default {
1633 const int cpu;
1634 const enum sparc_processor_type processor;
1635 } const cpu_default[] = {
1636 /* There must be one entry here for each TARGET_CPU value. */
1637 { TARGET_CPU_sparc, PROCESSOR_CYPRESS },
1638 { TARGET_CPU_v8, PROCESSOR_V8 },
1639 { TARGET_CPU_supersparc, PROCESSOR_SUPERSPARC },
1640 { TARGET_CPU_hypersparc, PROCESSOR_HYPERSPARC },
1641 { TARGET_CPU_leon, PROCESSOR_LEON },
1642 { TARGET_CPU_leon3, PROCESSOR_LEON3 },
1643 { TARGET_CPU_leon3v7, PROCESSOR_LEON3V7 },
1644 { TARGET_CPU_sparclite, PROCESSOR_F930 },
1645 { TARGET_CPU_sparclite86x, PROCESSOR_SPARCLITE86X },
1646 { TARGET_CPU_sparclet, PROCESSOR_TSC701 },
1647 { TARGET_CPU_v9, PROCESSOR_V9 },
1648 { TARGET_CPU_ultrasparc, PROCESSOR_ULTRASPARC },
1649 { TARGET_CPU_ultrasparc3, PROCESSOR_ULTRASPARC3 },
1650 { TARGET_CPU_niagara, PROCESSOR_NIAGARA },
1651 { TARGET_CPU_niagara2, PROCESSOR_NIAGARA2 },
1652 { TARGET_CPU_niagara3, PROCESSOR_NIAGARA3 },
1653 { TARGET_CPU_niagara4, PROCESSOR_NIAGARA4 },
1654 { TARGET_CPU_niagara7, PROCESSOR_NIAGARA7 },
1655 { TARGET_CPU_m8, PROCESSOR_M8 },
1656 { -1, PROCESSOR_V7 }
1657 };
1658 const struct cpu_default *def;
1659 /* Table of values for -m{cpu,tune}=. This must match the order of
1660 the enum processor_type in sparc-opts.h. */
1661 static struct cpu_table {
1662 const char *const name;
1663 const int disable;
1664 const int enable;
1665 } const cpu_table[] = {
1666 { "v7", MASK_ISA, 0 },
1667 { "cypress", MASK_ISA, 0 },
1668 { "v8", MASK_ISA, MASK_V8 },
1669 /* TI TMS390Z55 supersparc */
1670 { "supersparc", MASK_ISA, MASK_V8 },
1671 { "hypersparc", MASK_ISA, MASK_V8 },
1672 { "leon", MASK_ISA|MASK_FSMULD, MASK_V8|MASK_LEON },
1673 { "leon3", MASK_ISA, MASK_V8|MASK_LEON3 },
1674 { "leon3v7", MASK_ISA, MASK_LEON3 },
1675 { "sparclite", MASK_ISA, MASK_SPARCLITE },
1676 /* The Fujitsu MB86930 is the original sparclite chip, with no FPU. */
1677 { "f930", MASK_ISA|MASK_FPU, MASK_SPARCLITE },
1678 /* The Fujitsu MB86934 is the recent sparclite chip, with an FPU. */
1679 { "f934", MASK_ISA, MASK_SPARCLITE },
1680 { "sparclite86x", MASK_ISA|MASK_FPU, MASK_SPARCLITE },
1681 { "sparclet", MASK_ISA, MASK_SPARCLET },
1682 /* TEMIC sparclet */
1683 { "tsc701", MASK_ISA, MASK_SPARCLET },
1684 { "v9", MASK_ISA, MASK_V9 },
1685 /* UltraSPARC I, II, IIi */
1686 { "ultrasparc", MASK_ISA,
1687 /* Although insns using %y are deprecated, it is a clear win. */
1688 MASK_V9|MASK_DEPRECATED_V8_INSNS },
1689 /* UltraSPARC III */
1690 /* ??? Check if %y issue still holds true. */
1691 { "ultrasparc3", MASK_ISA,
1692 MASK_V9|MASK_DEPRECATED_V8_INSNS|MASK_VIS2 },
1693 /* UltraSPARC T1 */
1694 { "niagara", MASK_ISA,
1695 MASK_V9|MASK_DEPRECATED_V8_INSNS },
1696 /* UltraSPARC T2 */
1697 { "niagara2", MASK_ISA,
1698 MASK_V9|MASK_POPC|MASK_VIS2 },
1699 /* UltraSPARC T3 */
1700 { "niagara3", MASK_ISA,
1701 MASK_V9|MASK_POPC|MASK_VIS3|MASK_FMAF },
1702 /* UltraSPARC T4 */
1703 { "niagara4", MASK_ISA,
1704 MASK_V9|MASK_POPC|MASK_VIS3|MASK_FMAF|MASK_CBCOND },
1705 /* UltraSPARC M7 */
1706 { "niagara7", MASK_ISA,
1707 MASK_V9|MASK_POPC|MASK_VIS4|MASK_FMAF|MASK_CBCOND|MASK_SUBXC },
1708 /* UltraSPARC M8 */
1709 { "m8", MASK_ISA,
1710 MASK_V9|MASK_POPC|MASK_VIS4B|MASK_FMAF|MASK_CBCOND|MASK_SUBXC }
1711 };
1712 const struct cpu_table *cpu;
1713 unsigned int i;
1714
1715 if (sparc_debug_string != NULL)
1716 {
1717 const char *q;
1718 char *p;
1719
1720 p = ASTRDUP (sparc_debug_string);
1721 while ((q = strtok (p, ",")) != NULL)
1722 {
1723 bool invert;
1724 int mask;
1725
1726 p = NULL;
1727 if (*q == '!')
1728 {
1729 invert = true;
1730 q++;
1731 }
1732 else
1733 invert = false;
1734
1735 if (! strcmp (q, "all"))
1736 mask = MASK_DEBUG_ALL;
1737 else if (! strcmp (q, "options"))
1738 mask = MASK_DEBUG_OPTIONS;
1739 else
1740 error ("unknown %<-mdebug-%s%> switch", q);
1741
1742 if (invert)
1743 sparc_debug &= ~mask;
1744 else
1745 sparc_debug |= mask;
1746 }
1747 }
1748
1749 /* Enable the FsMULd instruction by default if not explicitly specified by
1750 the user. It may be later disabled by the CPU (explicitly or not). */
1751 if (TARGET_FPU && !(target_flags_explicit & MASK_FSMULD))
1752 target_flags |= MASK_FSMULD;
1753
1754 if (TARGET_DEBUG_OPTIONS)
1755 {
1756 dump_target_flags("Initial target_flags", target_flags);
1757 dump_target_flags("target_flags_explicit", target_flags_explicit);
1758 }
1759
1760 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1761 SUBTARGET_OVERRIDE_OPTIONS;
1762 #endif
1763
1764 #ifndef SPARC_BI_ARCH
1765 /* Check for unsupported architecture size. */
1766 if (!TARGET_64BIT != DEFAULT_ARCH32_P)
1767 error ("%s is not supported by this configuration",
1768 DEFAULT_ARCH32_P ? "-m64" : "-m32");
1769 #endif
1770
1771 /* We force all 64bit archs to use 128 bit long double */
1772 if (TARGET_ARCH64 && !TARGET_LONG_DOUBLE_128)
1773 {
1774 error ("%<-mlong-double-64%> not allowed with %<-m64%>");
1775 target_flags |= MASK_LONG_DOUBLE_128;
1776 }
1777
1778 /* Check that -fcall-saved-REG wasn't specified for out registers. */
1779 for (i = 8; i < 16; i++)
1780 if (!call_used_regs [i])
1781 {
1782 error ("%<-fcall-saved-REG%> is not supported for out registers");
1783 call_used_regs [i] = 1;
1784 }
1785
1786 /* Set the default CPU if no -mcpu option was specified. */
1787 if (!global_options_set.x_sparc_cpu_and_features)
1788 {
1789 for (def = &cpu_default[0]; def->cpu != -1; ++def)
1790 if (def->cpu == TARGET_CPU_DEFAULT)
1791 break;
1792 gcc_assert (def->cpu != -1);
1793 sparc_cpu_and_features = def->processor;
1794 }
1795
1796 /* Set the default CPU if no -mtune option was specified. */
1797 if (!global_options_set.x_sparc_cpu)
1798 sparc_cpu = sparc_cpu_and_features;
1799
1800 cpu = &cpu_table[(int) sparc_cpu_and_features];
1801
1802 if (TARGET_DEBUG_OPTIONS)
1803 {
1804 fprintf (stderr, "sparc_cpu_and_features: %s\n", cpu->name);
1805 dump_target_flags ("cpu->disable", cpu->disable);
1806 dump_target_flags ("cpu->enable", cpu->enable);
1807 }
1808
1809 target_flags &= ~cpu->disable;
1810 target_flags |= (cpu->enable
1811 #ifndef HAVE_AS_FMAF_HPC_VIS3
1812 & ~(MASK_FMAF | MASK_VIS3)
1813 #endif
1814 #ifndef HAVE_AS_SPARC4
1815 & ~MASK_CBCOND
1816 #endif
1817 #ifndef HAVE_AS_SPARC5_VIS4
1818 & ~(MASK_VIS4 | MASK_SUBXC)
1819 #endif
1820 #ifndef HAVE_AS_SPARC6
1821 & ~(MASK_VIS4B)
1822 #endif
1823 #ifndef HAVE_AS_LEON
1824 & ~(MASK_LEON | MASK_LEON3)
1825 #endif
1826 & ~(target_flags_explicit & MASK_FEATURES)
1827 );
1828
1829 /* FsMULd is a V8 instruction. */
1830 if (!TARGET_V8 && !TARGET_V9)
1831 target_flags &= ~MASK_FSMULD;
1832
1833 /* -mvis2 implies -mvis. */
1834 if (TARGET_VIS2)
1835 target_flags |= MASK_VIS;
1836
1837 /* -mvis3 implies -mvis2 and -mvis. */
1838 if (TARGET_VIS3)
1839 target_flags |= MASK_VIS2 | MASK_VIS;
1840
1841 /* -mvis4 implies -mvis3, -mvis2 and -mvis. */
1842 if (TARGET_VIS4)
1843 target_flags |= MASK_VIS3 | MASK_VIS2 | MASK_VIS;
1844
1845 /* -mvis4b implies -mvis4, -mvis3, -mvis2 and -mvis */
1846 if (TARGET_VIS4B)
1847 target_flags |= MASK_VIS4 | MASK_VIS3 | MASK_VIS2 | MASK_VIS;
1848
1849 /* Don't allow -mvis, -mvis2, -mvis3, -mvis4, -mvis4b, -mfmaf and -mfsmuld if
1850 FPU is disabled. */
1851 if (!TARGET_FPU)
1852 target_flags &= ~(MASK_VIS | MASK_VIS2 | MASK_VIS3 | MASK_VIS4
1853 | MASK_VIS4B | MASK_FMAF | MASK_FSMULD);
1854
1855 /* -mvis assumes UltraSPARC+, so we are sure v9 instructions
1856 are available; -m64 also implies v9. */
1857 if (TARGET_VIS || TARGET_ARCH64)
1858 {
1859 target_flags |= MASK_V9;
1860 target_flags &= ~(MASK_V8 | MASK_SPARCLET | MASK_SPARCLITE);
1861 }
1862
1863 /* -mvis also implies -mv8plus on 32-bit. */
1864 if (TARGET_VIS && !TARGET_ARCH64)
1865 target_flags |= MASK_V8PLUS;
1866
1867 /* Use the deprecated v8 insns for sparc64 in 32-bit mode. */
1868 if (TARGET_V9 && TARGET_ARCH32)
1869 target_flags |= MASK_DEPRECATED_V8_INSNS;
1870
1871 /* V8PLUS requires V9 and makes no sense in 64-bit mode. */
1872 if (!TARGET_V9 || TARGET_ARCH64)
1873 target_flags &= ~MASK_V8PLUS;
1874
1875 /* Don't use stack biasing in 32-bit mode. */
1876 if (TARGET_ARCH32)
1877 target_flags &= ~MASK_STACK_BIAS;
1878
1879 /* Use LRA instead of reload, unless otherwise instructed. */
1880 if (!(target_flags_explicit & MASK_LRA))
1881 target_flags |= MASK_LRA;
1882
1883 /* Enable applicable errata workarounds for LEON3FT. */
1884 if (sparc_fix_ut699 || sparc_fix_ut700 || sparc_fix_gr712rc)
1885 {
1886 sparc_fix_b2bst = 1;
1887 sparc_fix_lost_divsqrt = 1;
1888 }
1889
1890 /* Disable FsMULd for the UT699 since it doesn't work correctly. */
1891 if (sparc_fix_ut699)
1892 target_flags &= ~MASK_FSMULD;
1893
1894 #ifdef TARGET_DEFAULT_LONG_DOUBLE_128
1895 if (!(target_flags_explicit & MASK_LONG_DOUBLE_128))
1896 target_flags |= MASK_LONG_DOUBLE_128;
1897 #endif
1898
1899 if (TARGET_DEBUG_OPTIONS)
1900 dump_target_flags ("Final target_flags", target_flags);
1901
1902 /* Set the code model if no -mcmodel option was specified. */
1903 if (global_options_set.x_sparc_code_model)
1904 {
1905 if (TARGET_ARCH32)
1906 error ("%<-mcmodel=%> is not supported in 32-bit mode");
1907 }
1908 else
1909 {
1910 if (TARGET_ARCH32)
1911 sparc_code_model = CM_32;
1912 else
1913 sparc_code_model = SPARC_DEFAULT_CMODEL;
1914 }
1915
1916 /* Set the memory model if no -mmemory-model option was specified. */
1917 if (!global_options_set.x_sparc_memory_model)
1918 {
1919 /* Choose the memory model for the operating system. */
1920 enum sparc_memory_model_type os_default = SUBTARGET_DEFAULT_MEMORY_MODEL;
1921 if (os_default != SMM_DEFAULT)
1922 sparc_memory_model = os_default;
1923 /* Choose the most relaxed model for the processor. */
1924 else if (TARGET_V9)
1925 sparc_memory_model = SMM_RMO;
1926 else if (TARGET_LEON3)
1927 sparc_memory_model = SMM_TSO;
1928 else if (TARGET_LEON)
1929 sparc_memory_model = SMM_SC;
1930 else if (TARGET_V8)
1931 sparc_memory_model = SMM_PSO;
1932 else
1933 sparc_memory_model = SMM_SC;
1934 }
1935
1936 /* Supply a default value for align_functions. */
1937 if (flag_align_functions && !str_align_functions)
1938 {
1939 if (sparc_cpu == PROCESSOR_ULTRASPARC
1940 || sparc_cpu == PROCESSOR_ULTRASPARC3
1941 || sparc_cpu == PROCESSOR_NIAGARA
1942 || sparc_cpu == PROCESSOR_NIAGARA2
1943 || sparc_cpu == PROCESSOR_NIAGARA3
1944 || sparc_cpu == PROCESSOR_NIAGARA4)
1945 str_align_functions = "32";
1946 else if (sparc_cpu == PROCESSOR_NIAGARA7
1947 || sparc_cpu == PROCESSOR_M8)
1948 str_align_functions = "64";
1949 }
1950
1951 /* Validate PCC_STRUCT_RETURN. */
1952 if (flag_pcc_struct_return == DEFAULT_PCC_STRUCT_RETURN)
1953 flag_pcc_struct_return = (TARGET_ARCH64 ? 0 : 1);
1954
1955 /* Only use .uaxword when compiling for a 64-bit target. */
1956 if (!TARGET_ARCH64)
1957 targetm.asm_out.unaligned_op.di = NULL;
1958
1959 /* Set the processor costs. */
1960 switch (sparc_cpu)
1961 {
1962 case PROCESSOR_V7:
1963 case PROCESSOR_CYPRESS:
1964 sparc_costs = &cypress_costs;
1965 break;
1966 case PROCESSOR_V8:
1967 case PROCESSOR_SPARCLITE:
1968 case PROCESSOR_SUPERSPARC:
1969 sparc_costs = &supersparc_costs;
1970 break;
1971 case PROCESSOR_F930:
1972 case PROCESSOR_F934:
1973 case PROCESSOR_HYPERSPARC:
1974 case PROCESSOR_SPARCLITE86X:
1975 sparc_costs = &hypersparc_costs;
1976 break;
1977 case PROCESSOR_LEON:
1978 sparc_costs = &leon_costs;
1979 break;
1980 case PROCESSOR_LEON3:
1981 case PROCESSOR_LEON3V7:
1982 sparc_costs = &leon3_costs;
1983 break;
1984 case PROCESSOR_SPARCLET:
1985 case PROCESSOR_TSC701:
1986 sparc_costs = &sparclet_costs;
1987 break;
1988 case PROCESSOR_V9:
1989 case PROCESSOR_ULTRASPARC:
1990 sparc_costs = &ultrasparc_costs;
1991 break;
1992 case PROCESSOR_ULTRASPARC3:
1993 sparc_costs = &ultrasparc3_costs;
1994 break;
1995 case PROCESSOR_NIAGARA:
1996 sparc_costs = &niagara_costs;
1997 break;
1998 case PROCESSOR_NIAGARA2:
1999 sparc_costs = &niagara2_costs;
2000 break;
2001 case PROCESSOR_NIAGARA3:
2002 sparc_costs = &niagara3_costs;
2003 break;
2004 case PROCESSOR_NIAGARA4:
2005 sparc_costs = &niagara4_costs;
2006 break;
2007 case PROCESSOR_NIAGARA7:
2008 sparc_costs = &niagara7_costs;
2009 break;
2010 case PROCESSOR_M8:
2011 sparc_costs = &m8_costs;
2012 break;
2013 case PROCESSOR_NATIVE:
2014 gcc_unreachable ();
2015 };
2016
2017 /* param_simultaneous_prefetches is the number of prefetches that
2018 can run at the same time. More important, it is the threshold
2019 defining when additional prefetches will be dropped by the
2020 hardware.
2021
2022 The UltraSPARC-III features a documented prefetch queue with a
2023 size of 8. Additional prefetches issued in the cpu are
2024 dropped.
2025
2026 Niagara processors are different. In these processors prefetches
2027 are handled much like regular loads. The L1 miss buffer is 32
2028 entries, but prefetches start getting affected when 30 entries
2029 become occupied. That occupation could be a mix of regular loads
2030 and prefetches though. And that buffer is shared by all threads.
2031 Once the threshold is reached, if the core is running a single
2032 thread the prefetch will retry. If more than one thread is
2033 running, the prefetch will be dropped.
2034
2035 All this makes it very difficult to determine how many
2036 simultaneous prefetches can be issued simultaneously, even in a
2037 single-threaded program. Experimental results show that setting
2038 this parameter to 32 works well when the number of threads is not
2039 high. */
2040 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
2041 param_simultaneous_prefetches,
2042 ((sparc_cpu == PROCESSOR_ULTRASPARC
2043 || sparc_cpu == PROCESSOR_NIAGARA
2044 || sparc_cpu == PROCESSOR_NIAGARA2
2045 || sparc_cpu == PROCESSOR_NIAGARA3
2046 || sparc_cpu == PROCESSOR_NIAGARA4)
2047 ? 2
2048 : (sparc_cpu == PROCESSOR_ULTRASPARC3
2049 ? 8 : ((sparc_cpu == PROCESSOR_NIAGARA7
2050 || sparc_cpu == PROCESSOR_M8)
2051 ? 32 : 3))));
2052
2053 /* param_l1_cache_line_size is the size of the L1 cache line, in
2054 bytes.
2055
2056 The Oracle SPARC Architecture (previously the UltraSPARC
2057 Architecture) specification states that when a PREFETCH[A]
2058 instruction is executed an implementation-specific amount of data
2059 is prefetched, and that it is at least 64 bytes long (aligned to
2060 at least 64 bytes).
2061
2062 However, this is not correct. The M7 (and implementations prior
2063 to that) does not guarantee a 64B prefetch into a cache if the
2064 line size is smaller. A single cache line is all that is ever
2065 prefetched. So for the M7, where the L1D$ has 32B lines and the
2066 L2D$ and L3 have 64B lines, a prefetch will prefetch 64B into the
2067 L2 and L3, but only 32B are brought into the L1D$. (Assuming it
2068 is a read_n prefetch, which is the only type which allocates to
2069 the L1.) */
2070 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
2071 param_l1_cache_line_size,
2072 (sparc_cpu == PROCESSOR_M8 ? 64 : 32));
2073
2074 /* param_l1_cache_size is the size of the L1D$ (most SPARC chips use
2075 Hardvard level-1 caches) in kilobytes. Both UltraSPARC and
2076 Niagara processors feature a L1D$ of 16KB. */
2077 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
2078 param_l1_cache_size,
2079 ((sparc_cpu == PROCESSOR_ULTRASPARC
2080 || sparc_cpu == PROCESSOR_ULTRASPARC3
2081 || sparc_cpu == PROCESSOR_NIAGARA
2082 || sparc_cpu == PROCESSOR_NIAGARA2
2083 || sparc_cpu == PROCESSOR_NIAGARA3
2084 || sparc_cpu == PROCESSOR_NIAGARA4
2085 || sparc_cpu == PROCESSOR_NIAGARA7
2086 || sparc_cpu == PROCESSOR_M8)
2087 ? 16 : 64));
2088
2089 /* param_l2_cache_size is the size fo the L2 in kilobytes. Note
2090 that 512 is the default in params.def. */
2091 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
2092 param_l2_cache_size,
2093 ((sparc_cpu == PROCESSOR_NIAGARA4
2094 || sparc_cpu == PROCESSOR_M8)
2095 ? 128 : (sparc_cpu == PROCESSOR_NIAGARA7
2096 ? 256 : 512)));
2097
2098
2099 /* Disable save slot sharing for call-clobbered registers by default.
2100 The IRA sharing algorithm works on single registers only and this
2101 pessimizes for double floating-point registers. */
2102 if (!global_options_set.x_flag_ira_share_save_slots)
2103 flag_ira_share_save_slots = 0;
2104
2105 /* Only enable REE by default in 64-bit mode where it helps to eliminate
2106 redundant 32-to-64-bit extensions. */
2107 if (!global_options_set.x_flag_ree && TARGET_ARCH32)
2108 flag_ree = 0;
2109
2110 /* Do various machine dependent initializations. */
2111 sparc_init_modes ();
2112
2113 /* Set up function hooks. */
2114 init_machine_status = sparc_init_machine_status;
2115 }
2116
2117 /* Miscellaneous utilities. */
2118
2119 /* Nonzero if CODE, a comparison, is suitable for use in v9 conditional move
2120 or branch on register contents instructions. */
2121
2122 int
v9_regcmp_p(enum rtx_code code)2123 v9_regcmp_p (enum rtx_code code)
2124 {
2125 return (code == EQ || code == NE || code == GE || code == LT
2126 || code == LE || code == GT);
2127 }
2128
2129 /* Nonzero if OP is a floating point constant which can
2130 be loaded into an integer register using a single
2131 sethi instruction. */
2132
2133 int
fp_sethi_p(rtx op)2134 fp_sethi_p (rtx op)
2135 {
2136 if (GET_CODE (op) == CONST_DOUBLE)
2137 {
2138 long i;
2139
2140 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), i);
2141 return !SPARC_SIMM13_P (i) && SPARC_SETHI_P (i);
2142 }
2143
2144 return 0;
2145 }
2146
2147 /* Nonzero if OP is a floating point constant which can
2148 be loaded into an integer register using a single
2149 mov instruction. */
2150
2151 int
fp_mov_p(rtx op)2152 fp_mov_p (rtx op)
2153 {
2154 if (GET_CODE (op) == CONST_DOUBLE)
2155 {
2156 long i;
2157
2158 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), i);
2159 return SPARC_SIMM13_P (i);
2160 }
2161
2162 return 0;
2163 }
2164
2165 /* Nonzero if OP is a floating point constant which can
2166 be loaded into an integer register using a high/losum
2167 instruction sequence. */
2168
2169 int
fp_high_losum_p(rtx op)2170 fp_high_losum_p (rtx op)
2171 {
2172 /* The constraints calling this should only be in
2173 SFmode move insns, so any constant which cannot
2174 be moved using a single insn will do. */
2175 if (GET_CODE (op) == CONST_DOUBLE)
2176 {
2177 long i;
2178
2179 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), i);
2180 return !SPARC_SIMM13_P (i) && !SPARC_SETHI_P (i);
2181 }
2182
2183 return 0;
2184 }
2185
2186 /* Return true if the address of LABEL can be loaded by means of the
2187 mov{si,di}_pic_label_ref patterns in PIC mode. */
2188
2189 static bool
can_use_mov_pic_label_ref(rtx label)2190 can_use_mov_pic_label_ref (rtx label)
2191 {
2192 /* VxWorks does not impose a fixed gap between segments; the run-time
2193 gap can be different from the object-file gap. We therefore can't
2194 assume X - _GLOBAL_OFFSET_TABLE_ is a link-time constant unless we
2195 are absolutely sure that X is in the same segment as the GOT.
2196 Unfortunately, the flexibility of linker scripts means that we
2197 can't be sure of that in general, so assume that GOT-relative
2198 accesses are never valid on VxWorks. */
2199 if (TARGET_VXWORKS_RTP)
2200 return false;
2201
2202 /* Similarly, if the label is non-local, it might end up being placed
2203 in a different section than the current one; now mov_pic_label_ref
2204 requires the label and the code to be in the same section. */
2205 if (LABEL_REF_NONLOCAL_P (label))
2206 return false;
2207
2208 /* Finally, if we are reordering basic blocks and partition into hot
2209 and cold sections, this might happen for any label. */
2210 if (flag_reorder_blocks_and_partition)
2211 return false;
2212
2213 return true;
2214 }
2215
2216 /* Expand a move instruction. Return true if all work is done. */
2217
2218 bool
sparc_expand_move(machine_mode mode,rtx * operands)2219 sparc_expand_move (machine_mode mode, rtx *operands)
2220 {
2221 /* Handle sets of MEM first. */
2222 if (GET_CODE (operands[0]) == MEM)
2223 {
2224 /* 0 is a register (or a pair of registers) on SPARC. */
2225 if (register_or_zero_operand (operands[1], mode))
2226 return false;
2227
2228 if (!reload_in_progress)
2229 {
2230 operands[0] = validize_mem (operands[0]);
2231 operands[1] = force_reg (mode, operands[1]);
2232 }
2233 }
2234
2235 /* Fix up TLS cases. */
2236 if (TARGET_HAVE_TLS
2237 && CONSTANT_P (operands[1])
2238 && sparc_tls_referenced_p (operands [1]))
2239 {
2240 operands[1] = sparc_legitimize_tls_address (operands[1]);
2241 return false;
2242 }
2243
2244 /* Fix up PIC cases. */
2245 if (flag_pic && CONSTANT_P (operands[1]))
2246 {
2247 if (pic_address_needs_scratch (operands[1]))
2248 operands[1] = sparc_legitimize_pic_address (operands[1], NULL_RTX);
2249
2250 /* We cannot use the mov{si,di}_pic_label_ref patterns in all cases. */
2251 if ((GET_CODE (operands[1]) == LABEL_REF
2252 && can_use_mov_pic_label_ref (operands[1]))
2253 || (GET_CODE (operands[1]) == CONST
2254 && GET_CODE (XEXP (operands[1], 0)) == PLUS
2255 && GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == LABEL_REF
2256 && GET_CODE (XEXP (XEXP (operands[1], 0), 1)) == CONST_INT
2257 && can_use_mov_pic_label_ref (XEXP (XEXP (operands[1], 0), 0))))
2258 {
2259 if (mode == SImode)
2260 {
2261 emit_insn (gen_movsi_pic_label_ref (operands[0], operands[1]));
2262 return true;
2263 }
2264
2265 if (mode == DImode)
2266 {
2267 emit_insn (gen_movdi_pic_label_ref (operands[0], operands[1]));
2268 return true;
2269 }
2270 }
2271
2272 if (symbolic_operand (operands[1], mode))
2273 {
2274 operands[1]
2275 = sparc_legitimize_pic_address (operands[1],
2276 reload_in_progress
2277 ? operands[0] : NULL_RTX);
2278 return false;
2279 }
2280 }
2281
2282 /* If we are trying to toss an integer constant into FP registers,
2283 or loading a FP or vector constant, force it into memory. */
2284 if (CONSTANT_P (operands[1])
2285 && REG_P (operands[0])
2286 && (SPARC_FP_REG_P (REGNO (operands[0]))
2287 || SCALAR_FLOAT_MODE_P (mode)
2288 || VECTOR_MODE_P (mode)))
2289 {
2290 /* emit_group_store will send such bogosity to us when it is
2291 not storing directly into memory. So fix this up to avoid
2292 crashes in output_constant_pool. */
2293 if (operands [1] == const0_rtx)
2294 operands[1] = CONST0_RTX (mode);
2295
2296 /* We can clear or set to all-ones FP registers if TARGET_VIS, and
2297 always other regs. */
2298 if ((TARGET_VIS || REGNO (operands[0]) < SPARC_FIRST_FP_REG)
2299 && (const_zero_operand (operands[1], mode)
2300 || const_all_ones_operand (operands[1], mode)))
2301 return false;
2302
2303 if (REGNO (operands[0]) < SPARC_FIRST_FP_REG
2304 /* We are able to build any SF constant in integer registers
2305 with at most 2 instructions. */
2306 && (mode == SFmode
2307 /* And any DF constant in integer registers if needed. */
2308 || (mode == DFmode && !can_create_pseudo_p ())))
2309 return false;
2310
2311 operands[1] = force_const_mem (mode, operands[1]);
2312 if (!reload_in_progress)
2313 operands[1] = validize_mem (operands[1]);
2314 return false;
2315 }
2316
2317 /* Accept non-constants and valid constants unmodified. */
2318 if (!CONSTANT_P (operands[1])
2319 || GET_CODE (operands[1]) == HIGH
2320 || input_operand (operands[1], mode))
2321 return false;
2322
2323 switch (mode)
2324 {
2325 case E_QImode:
2326 /* All QImode constants require only one insn, so proceed. */
2327 break;
2328
2329 case E_HImode:
2330 case E_SImode:
2331 sparc_emit_set_const32 (operands[0], operands[1]);
2332 return true;
2333
2334 case E_DImode:
2335 /* input_operand should have filtered out 32-bit mode. */
2336 sparc_emit_set_const64 (operands[0], operands[1]);
2337 return true;
2338
2339 case E_TImode:
2340 {
2341 rtx high, low;
2342 /* TImode isn't available in 32-bit mode. */
2343 split_double (operands[1], &high, &low);
2344 emit_insn (gen_movdi (operand_subword (operands[0], 0, 0, TImode),
2345 high));
2346 emit_insn (gen_movdi (operand_subword (operands[0], 1, 0, TImode),
2347 low));
2348 }
2349 return true;
2350
2351 default:
2352 gcc_unreachable ();
2353 }
2354
2355 return false;
2356 }
2357
2358 /* Load OP1, a 32-bit constant, into OP0, a register.
2359 We know it can't be done in one insn when we get
2360 here, the move expander guarantees this. */
2361
2362 static void
sparc_emit_set_const32(rtx op0,rtx op1)2363 sparc_emit_set_const32 (rtx op0, rtx op1)
2364 {
2365 machine_mode mode = GET_MODE (op0);
2366 rtx temp = op0;
2367
2368 if (can_create_pseudo_p ())
2369 temp = gen_reg_rtx (mode);
2370
2371 if (GET_CODE (op1) == CONST_INT)
2372 {
2373 gcc_assert (!small_int_operand (op1, mode)
2374 && !const_high_operand (op1, mode));
2375
2376 /* Emit them as real moves instead of a HIGH/LO_SUM,
2377 this way CSE can see everything and reuse intermediate
2378 values if it wants. */
2379 emit_insn (gen_rtx_SET (temp, GEN_INT (INTVAL (op1)
2380 & ~(HOST_WIDE_INT) 0x3ff)));
2381
2382 emit_insn (gen_rtx_SET (op0,
2383 gen_rtx_IOR (mode, temp,
2384 GEN_INT (INTVAL (op1) & 0x3ff))));
2385 }
2386 else
2387 {
2388 /* A symbol, emit in the traditional way. */
2389 emit_insn (gen_rtx_SET (temp, gen_rtx_HIGH (mode, op1)));
2390 emit_insn (gen_rtx_SET (op0, gen_rtx_LO_SUM (mode, temp, op1)));
2391 }
2392 }
2393
2394 /* Load OP1, a symbolic 64-bit constant, into OP0, a DImode register.
2395 If TEMP is nonzero, we are forbidden to use any other scratch
2396 registers. Otherwise, we are allowed to generate them as needed.
2397
2398 Note that TEMP may have TImode if the code model is TARGET_CM_MEDANY
2399 or TARGET_CM_EMBMEDANY (see the reload_indi and reload_outdi patterns). */
2400
2401 void
sparc_emit_set_symbolic_const64(rtx op0,rtx op1,rtx temp)2402 sparc_emit_set_symbolic_const64 (rtx op0, rtx op1, rtx temp)
2403 {
2404 rtx cst, temp1, temp2, temp3, temp4, temp5;
2405 rtx ti_temp = 0;
2406
2407 /* Deal with too large offsets. */
2408 if (GET_CODE (op1) == CONST
2409 && GET_CODE (XEXP (op1, 0)) == PLUS
2410 && CONST_INT_P (cst = XEXP (XEXP (op1, 0), 1))
2411 && trunc_int_for_mode (INTVAL (cst), SImode) != INTVAL (cst))
2412 {
2413 gcc_assert (!temp);
2414 temp1 = gen_reg_rtx (DImode);
2415 temp2 = gen_reg_rtx (DImode);
2416 sparc_emit_set_const64 (temp2, cst);
2417 sparc_emit_set_symbolic_const64 (temp1, XEXP (XEXP (op1, 0), 0),
2418 NULL_RTX);
2419 emit_insn (gen_rtx_SET (op0, gen_rtx_PLUS (DImode, temp1, temp2)));
2420 return;
2421 }
2422
2423 if (temp && GET_MODE (temp) == TImode)
2424 {
2425 ti_temp = temp;
2426 temp = gen_rtx_REG (DImode, REGNO (temp));
2427 }
2428
2429 /* SPARC-V9 code model support. */
2430 switch (sparc_code_model)
2431 {
2432 case CM_MEDLOW:
2433 /* The range spanned by all instructions in the object is less
2434 than 2^31 bytes (2GB) and the distance from any instruction
2435 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
2436 than 2^31 bytes (2GB).
2437
2438 The executable must be in the low 4TB of the virtual address
2439 space.
2440
2441 sethi %hi(symbol), %temp1
2442 or %temp1, %lo(symbol), %reg */
2443 if (temp)
2444 temp1 = temp; /* op0 is allowed. */
2445 else
2446 temp1 = gen_reg_rtx (DImode);
2447
2448 emit_insn (gen_rtx_SET (temp1, gen_rtx_HIGH (DImode, op1)));
2449 emit_insn (gen_rtx_SET (op0, gen_rtx_LO_SUM (DImode, temp1, op1)));
2450 break;
2451
2452 case CM_MEDMID:
2453 /* The range spanned by all instructions in the object is less
2454 than 2^31 bytes (2GB) and the distance from any instruction
2455 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
2456 than 2^31 bytes (2GB).
2457
2458 The executable must be in the low 16TB of the virtual address
2459 space.
2460
2461 sethi %h44(symbol), %temp1
2462 or %temp1, %m44(symbol), %temp2
2463 sllx %temp2, 12, %temp3
2464 or %temp3, %l44(symbol), %reg */
2465 if (temp)
2466 {
2467 temp1 = op0;
2468 temp2 = op0;
2469 temp3 = temp; /* op0 is allowed. */
2470 }
2471 else
2472 {
2473 temp1 = gen_reg_rtx (DImode);
2474 temp2 = gen_reg_rtx (DImode);
2475 temp3 = gen_reg_rtx (DImode);
2476 }
2477
2478 emit_insn (gen_seth44 (temp1, op1));
2479 emit_insn (gen_setm44 (temp2, temp1, op1));
2480 emit_insn (gen_rtx_SET (temp3,
2481 gen_rtx_ASHIFT (DImode, temp2, GEN_INT (12))));
2482 emit_insn (gen_setl44 (op0, temp3, op1));
2483 break;
2484
2485 case CM_MEDANY:
2486 /* The range spanned by all instructions in the object is less
2487 than 2^31 bytes (2GB) and the distance from any instruction
2488 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
2489 than 2^31 bytes (2GB).
2490
2491 The executable can be placed anywhere in the virtual address
2492 space.
2493
2494 sethi %hh(symbol), %temp1
2495 sethi %lm(symbol), %temp2
2496 or %temp1, %hm(symbol), %temp3
2497 sllx %temp3, 32, %temp4
2498 or %temp4, %temp2, %temp5
2499 or %temp5, %lo(symbol), %reg */
2500 if (temp)
2501 {
2502 /* It is possible that one of the registers we got for operands[2]
2503 might coincide with that of operands[0] (which is why we made
2504 it TImode). Pick the other one to use as our scratch. */
2505 if (rtx_equal_p (temp, op0))
2506 {
2507 gcc_assert (ti_temp);
2508 temp = gen_rtx_REG (DImode, REGNO (temp) + 1);
2509 }
2510 temp1 = op0;
2511 temp2 = temp; /* op0 is _not_ allowed, see above. */
2512 temp3 = op0;
2513 temp4 = op0;
2514 temp5 = op0;
2515 }
2516 else
2517 {
2518 temp1 = gen_reg_rtx (DImode);
2519 temp2 = gen_reg_rtx (DImode);
2520 temp3 = gen_reg_rtx (DImode);
2521 temp4 = gen_reg_rtx (DImode);
2522 temp5 = gen_reg_rtx (DImode);
2523 }
2524
2525 emit_insn (gen_sethh (temp1, op1));
2526 emit_insn (gen_setlm (temp2, op1));
2527 emit_insn (gen_sethm (temp3, temp1, op1));
2528 emit_insn (gen_rtx_SET (temp4,
2529 gen_rtx_ASHIFT (DImode, temp3, GEN_INT (32))));
2530 emit_insn (gen_rtx_SET (temp5, gen_rtx_PLUS (DImode, temp4, temp2)));
2531 emit_insn (gen_setlo (op0, temp5, op1));
2532 break;
2533
2534 case CM_EMBMEDANY:
2535 /* Old old old backwards compatibility kruft here.
2536 Essentially it is MEDLOW with a fixed 64-bit
2537 virtual base added to all data segment addresses.
2538 Text-segment stuff is computed like MEDANY, we can't
2539 reuse the code above because the relocation knobs
2540 look different.
2541
2542 Data segment: sethi %hi(symbol), %temp1
2543 add %temp1, EMBMEDANY_BASE_REG, %temp2
2544 or %temp2, %lo(symbol), %reg */
2545 if (data_segment_operand (op1, GET_MODE (op1)))
2546 {
2547 if (temp)
2548 {
2549 temp1 = temp; /* op0 is allowed. */
2550 temp2 = op0;
2551 }
2552 else
2553 {
2554 temp1 = gen_reg_rtx (DImode);
2555 temp2 = gen_reg_rtx (DImode);
2556 }
2557
2558 emit_insn (gen_embmedany_sethi (temp1, op1));
2559 emit_insn (gen_embmedany_brsum (temp2, temp1));
2560 emit_insn (gen_embmedany_losum (op0, temp2, op1));
2561 }
2562
2563 /* Text segment: sethi %uhi(symbol), %temp1
2564 sethi %hi(symbol), %temp2
2565 or %temp1, %ulo(symbol), %temp3
2566 sllx %temp3, 32, %temp4
2567 or %temp4, %temp2, %temp5
2568 or %temp5, %lo(symbol), %reg */
2569 else
2570 {
2571 if (temp)
2572 {
2573 /* It is possible that one of the registers we got for operands[2]
2574 might coincide with that of operands[0] (which is why we made
2575 it TImode). Pick the other one to use as our scratch. */
2576 if (rtx_equal_p (temp, op0))
2577 {
2578 gcc_assert (ti_temp);
2579 temp = gen_rtx_REG (DImode, REGNO (temp) + 1);
2580 }
2581 temp1 = op0;
2582 temp2 = temp; /* op0 is _not_ allowed, see above. */
2583 temp3 = op0;
2584 temp4 = op0;
2585 temp5 = op0;
2586 }
2587 else
2588 {
2589 temp1 = gen_reg_rtx (DImode);
2590 temp2 = gen_reg_rtx (DImode);
2591 temp3 = gen_reg_rtx (DImode);
2592 temp4 = gen_reg_rtx (DImode);
2593 temp5 = gen_reg_rtx (DImode);
2594 }
2595
2596 emit_insn (gen_embmedany_textuhi (temp1, op1));
2597 emit_insn (gen_embmedany_texthi (temp2, op1));
2598 emit_insn (gen_embmedany_textulo (temp3, temp1, op1));
2599 emit_insn (gen_rtx_SET (temp4,
2600 gen_rtx_ASHIFT (DImode, temp3, GEN_INT (32))));
2601 emit_insn (gen_rtx_SET (temp5, gen_rtx_PLUS (DImode, temp4, temp2)));
2602 emit_insn (gen_embmedany_textlo (op0, temp5, op1));
2603 }
2604 break;
2605
2606 default:
2607 gcc_unreachable ();
2608 }
2609 }
2610
2611 /* These avoid problems when cross compiling. If we do not
2612 go through all this hair then the optimizer will see
2613 invalid REG_EQUAL notes or in some cases none at all. */
2614 static rtx gen_safe_HIGH64 (rtx, HOST_WIDE_INT);
2615 static rtx gen_safe_SET64 (rtx, HOST_WIDE_INT);
2616 static rtx gen_safe_OR64 (rtx, HOST_WIDE_INT);
2617 static rtx gen_safe_XOR64 (rtx, HOST_WIDE_INT);
2618
2619 /* The optimizer is not to assume anything about exactly
2620 which bits are set for a HIGH, they are unspecified.
2621 Unfortunately this leads to many missed optimizations
2622 during CSE. We mask out the non-HIGH bits, and matches
2623 a plain movdi, to alleviate this problem. */
2624 static rtx
gen_safe_HIGH64(rtx dest,HOST_WIDE_INT val)2625 gen_safe_HIGH64 (rtx dest, HOST_WIDE_INT val)
2626 {
2627 return gen_rtx_SET (dest, GEN_INT (val & ~(HOST_WIDE_INT)0x3ff));
2628 }
2629
2630 static rtx
gen_safe_SET64(rtx dest,HOST_WIDE_INT val)2631 gen_safe_SET64 (rtx dest, HOST_WIDE_INT val)
2632 {
2633 return gen_rtx_SET (dest, GEN_INT (val));
2634 }
2635
2636 static rtx
gen_safe_OR64(rtx src,HOST_WIDE_INT val)2637 gen_safe_OR64 (rtx src, HOST_WIDE_INT val)
2638 {
2639 return gen_rtx_IOR (DImode, src, GEN_INT (val));
2640 }
2641
2642 static rtx
gen_safe_XOR64(rtx src,HOST_WIDE_INT val)2643 gen_safe_XOR64 (rtx src, HOST_WIDE_INT val)
2644 {
2645 return gen_rtx_XOR (DImode, src, GEN_INT (val));
2646 }
2647
2648 /* Worker routines for 64-bit constant formation on arch64.
2649 One of the key things to be doing in these emissions is
2650 to create as many temp REGs as possible. This makes it
2651 possible for half-built constants to be used later when
2652 such values are similar to something required later on.
2653 Without doing this, the optimizer cannot see such
2654 opportunities. */
2655
2656 static void sparc_emit_set_const64_quick1 (rtx, rtx,
2657 unsigned HOST_WIDE_INT, int);
2658
2659 static void
sparc_emit_set_const64_quick1(rtx op0,rtx temp,unsigned HOST_WIDE_INT low_bits,int is_neg)2660 sparc_emit_set_const64_quick1 (rtx op0, rtx temp,
2661 unsigned HOST_WIDE_INT low_bits, int is_neg)
2662 {
2663 unsigned HOST_WIDE_INT high_bits;
2664
2665 if (is_neg)
2666 high_bits = (~low_bits) & 0xffffffff;
2667 else
2668 high_bits = low_bits;
2669
2670 emit_insn (gen_safe_HIGH64 (temp, high_bits));
2671 if (!is_neg)
2672 {
2673 emit_insn (gen_rtx_SET (op0, gen_safe_OR64 (temp, (high_bits & 0x3ff))));
2674 }
2675 else
2676 {
2677 /* If we are XOR'ing with -1, then we should emit a one's complement
2678 instead. This way the combiner will notice logical operations
2679 such as ANDN later on and substitute. */
2680 if ((low_bits & 0x3ff) == 0x3ff)
2681 {
2682 emit_insn (gen_rtx_SET (op0, gen_rtx_NOT (DImode, temp)));
2683 }
2684 else
2685 {
2686 emit_insn (gen_rtx_SET (op0,
2687 gen_safe_XOR64 (temp,
2688 (-(HOST_WIDE_INT)0x400
2689 | (low_bits & 0x3ff)))));
2690 }
2691 }
2692 }
2693
2694 static void sparc_emit_set_const64_quick2 (rtx, rtx, unsigned HOST_WIDE_INT,
2695 unsigned HOST_WIDE_INT, int);
2696
2697 static void
sparc_emit_set_const64_quick2(rtx op0,rtx temp,unsigned HOST_WIDE_INT high_bits,unsigned HOST_WIDE_INT low_immediate,int shift_count)2698 sparc_emit_set_const64_quick2 (rtx op0, rtx temp,
2699 unsigned HOST_WIDE_INT high_bits,
2700 unsigned HOST_WIDE_INT low_immediate,
2701 int shift_count)
2702 {
2703 rtx temp2 = op0;
2704
2705 if ((high_bits & 0xfffffc00) != 0)
2706 {
2707 emit_insn (gen_safe_HIGH64 (temp, high_bits));
2708 if ((high_bits & ~0xfffffc00) != 0)
2709 emit_insn (gen_rtx_SET (op0,
2710 gen_safe_OR64 (temp, (high_bits & 0x3ff))));
2711 else
2712 temp2 = temp;
2713 }
2714 else
2715 {
2716 emit_insn (gen_safe_SET64 (temp, high_bits));
2717 temp2 = temp;
2718 }
2719
2720 /* Now shift it up into place. */
2721 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, temp2,
2722 GEN_INT (shift_count))));
2723
2724 /* If there is a low immediate part piece, finish up by
2725 putting that in as well. */
2726 if (low_immediate != 0)
2727 emit_insn (gen_rtx_SET (op0, gen_safe_OR64 (op0, low_immediate)));
2728 }
2729
2730 static void sparc_emit_set_const64_longway (rtx, rtx, unsigned HOST_WIDE_INT,
2731 unsigned HOST_WIDE_INT);
2732
2733 /* Full 64-bit constant decomposition. Even though this is the
2734 'worst' case, we still optimize a few things away. */
2735 static void
sparc_emit_set_const64_longway(rtx op0,rtx temp,unsigned HOST_WIDE_INT high_bits,unsigned HOST_WIDE_INT low_bits)2736 sparc_emit_set_const64_longway (rtx op0, rtx temp,
2737 unsigned HOST_WIDE_INT high_bits,
2738 unsigned HOST_WIDE_INT low_bits)
2739 {
2740 rtx sub_temp = op0;
2741
2742 if (can_create_pseudo_p ())
2743 sub_temp = gen_reg_rtx (DImode);
2744
2745 if ((high_bits & 0xfffffc00) != 0)
2746 {
2747 emit_insn (gen_safe_HIGH64 (temp, high_bits));
2748 if ((high_bits & ~0xfffffc00) != 0)
2749 emit_insn (gen_rtx_SET (sub_temp,
2750 gen_safe_OR64 (temp, (high_bits & 0x3ff))));
2751 else
2752 sub_temp = temp;
2753 }
2754 else
2755 {
2756 emit_insn (gen_safe_SET64 (temp, high_bits));
2757 sub_temp = temp;
2758 }
2759
2760 if (can_create_pseudo_p ())
2761 {
2762 rtx temp2 = gen_reg_rtx (DImode);
2763 rtx temp3 = gen_reg_rtx (DImode);
2764 rtx temp4 = gen_reg_rtx (DImode);
2765
2766 emit_insn (gen_rtx_SET (temp4, gen_rtx_ASHIFT (DImode, sub_temp,
2767 GEN_INT (32))));
2768
2769 emit_insn (gen_safe_HIGH64 (temp2, low_bits));
2770 if ((low_bits & ~0xfffffc00) != 0)
2771 {
2772 emit_insn (gen_rtx_SET (temp3,
2773 gen_safe_OR64 (temp2, (low_bits & 0x3ff))));
2774 emit_insn (gen_rtx_SET (op0, gen_rtx_PLUS (DImode, temp4, temp3)));
2775 }
2776 else
2777 {
2778 emit_insn (gen_rtx_SET (op0, gen_rtx_PLUS (DImode, temp4, temp2)));
2779 }
2780 }
2781 else
2782 {
2783 rtx low1 = GEN_INT ((low_bits >> (32 - 12)) & 0xfff);
2784 rtx low2 = GEN_INT ((low_bits >> (32 - 12 - 12)) & 0xfff);
2785 rtx low3 = GEN_INT ((low_bits >> (32 - 12 - 12 - 8)) & 0x0ff);
2786 int to_shift = 12;
2787
2788 /* We are in the middle of reload, so this is really
2789 painful. However we do still make an attempt to
2790 avoid emitting truly stupid code. */
2791 if (low1 != const0_rtx)
2792 {
2793 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, sub_temp,
2794 GEN_INT (to_shift))));
2795 emit_insn (gen_rtx_SET (op0, gen_rtx_IOR (DImode, op0, low1)));
2796 sub_temp = op0;
2797 to_shift = 12;
2798 }
2799 else
2800 {
2801 to_shift += 12;
2802 }
2803 if (low2 != const0_rtx)
2804 {
2805 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, sub_temp,
2806 GEN_INT (to_shift))));
2807 emit_insn (gen_rtx_SET (op0, gen_rtx_IOR (DImode, op0, low2)));
2808 sub_temp = op0;
2809 to_shift = 8;
2810 }
2811 else
2812 {
2813 to_shift += 8;
2814 }
2815 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, sub_temp,
2816 GEN_INT (to_shift))));
2817 if (low3 != const0_rtx)
2818 emit_insn (gen_rtx_SET (op0, gen_rtx_IOR (DImode, op0, low3)));
2819 /* phew... */
2820 }
2821 }
2822
2823 /* Analyze a 64-bit constant for certain properties. */
2824 static void analyze_64bit_constant (unsigned HOST_WIDE_INT,
2825 unsigned HOST_WIDE_INT,
2826 int *, int *, int *);
2827
2828 static void
analyze_64bit_constant(unsigned HOST_WIDE_INT high_bits,unsigned HOST_WIDE_INT low_bits,int * hbsp,int * lbsp,int * abbasp)2829 analyze_64bit_constant (unsigned HOST_WIDE_INT high_bits,
2830 unsigned HOST_WIDE_INT low_bits,
2831 int *hbsp, int *lbsp, int *abbasp)
2832 {
2833 int lowest_bit_set, highest_bit_set, all_bits_between_are_set;
2834 int i;
2835
2836 lowest_bit_set = highest_bit_set = -1;
2837 i = 0;
2838 do
2839 {
2840 if ((lowest_bit_set == -1)
2841 && ((low_bits >> i) & 1))
2842 lowest_bit_set = i;
2843 if ((highest_bit_set == -1)
2844 && ((high_bits >> (32 - i - 1)) & 1))
2845 highest_bit_set = (64 - i - 1);
2846 }
2847 while (++i < 32
2848 && ((highest_bit_set == -1)
2849 || (lowest_bit_set == -1)));
2850 if (i == 32)
2851 {
2852 i = 0;
2853 do
2854 {
2855 if ((lowest_bit_set == -1)
2856 && ((high_bits >> i) & 1))
2857 lowest_bit_set = i + 32;
2858 if ((highest_bit_set == -1)
2859 && ((low_bits >> (32 - i - 1)) & 1))
2860 highest_bit_set = 32 - i - 1;
2861 }
2862 while (++i < 32
2863 && ((highest_bit_set == -1)
2864 || (lowest_bit_set == -1)));
2865 }
2866 /* If there are no bits set this should have gone out
2867 as one instruction! */
2868 gcc_assert (lowest_bit_set != -1 && highest_bit_set != -1);
2869 all_bits_between_are_set = 1;
2870 for (i = lowest_bit_set; i <= highest_bit_set; i++)
2871 {
2872 if (i < 32)
2873 {
2874 if ((low_bits & (1 << i)) != 0)
2875 continue;
2876 }
2877 else
2878 {
2879 if ((high_bits & (1 << (i - 32))) != 0)
2880 continue;
2881 }
2882 all_bits_between_are_set = 0;
2883 break;
2884 }
2885 *hbsp = highest_bit_set;
2886 *lbsp = lowest_bit_set;
2887 *abbasp = all_bits_between_are_set;
2888 }
2889
2890 static int const64_is_2insns (unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT);
2891
2892 static int
const64_is_2insns(unsigned HOST_WIDE_INT high_bits,unsigned HOST_WIDE_INT low_bits)2893 const64_is_2insns (unsigned HOST_WIDE_INT high_bits,
2894 unsigned HOST_WIDE_INT low_bits)
2895 {
2896 int highest_bit_set, lowest_bit_set, all_bits_between_are_set;
2897
2898 if (high_bits == 0
2899 || high_bits == 0xffffffff)
2900 return 1;
2901
2902 analyze_64bit_constant (high_bits, low_bits,
2903 &highest_bit_set, &lowest_bit_set,
2904 &all_bits_between_are_set);
2905
2906 if ((highest_bit_set == 63
2907 || lowest_bit_set == 0)
2908 && all_bits_between_are_set != 0)
2909 return 1;
2910
2911 if ((highest_bit_set - lowest_bit_set) < 21)
2912 return 1;
2913
2914 return 0;
2915 }
2916
2917 static unsigned HOST_WIDE_INT create_simple_focus_bits (unsigned HOST_WIDE_INT,
2918 unsigned HOST_WIDE_INT,
2919 int, int);
2920
2921 static unsigned HOST_WIDE_INT
create_simple_focus_bits(unsigned HOST_WIDE_INT high_bits,unsigned HOST_WIDE_INT low_bits,int lowest_bit_set,int shift)2922 create_simple_focus_bits (unsigned HOST_WIDE_INT high_bits,
2923 unsigned HOST_WIDE_INT low_bits,
2924 int lowest_bit_set, int shift)
2925 {
2926 HOST_WIDE_INT hi, lo;
2927
2928 if (lowest_bit_set < 32)
2929 {
2930 lo = (low_bits >> lowest_bit_set) << shift;
2931 hi = ((high_bits << (32 - lowest_bit_set)) << shift);
2932 }
2933 else
2934 {
2935 lo = 0;
2936 hi = ((high_bits >> (lowest_bit_set - 32)) << shift);
2937 }
2938 gcc_assert (! (hi & lo));
2939 return (hi | lo);
2940 }
2941
2942 /* Here we are sure to be arch64 and this is an integer constant
2943 being loaded into a register. Emit the most efficient
2944 insn sequence possible. Detection of all the 1-insn cases
2945 has been done already. */
2946 static void
sparc_emit_set_const64(rtx op0,rtx op1)2947 sparc_emit_set_const64 (rtx op0, rtx op1)
2948 {
2949 unsigned HOST_WIDE_INT high_bits, low_bits;
2950 int lowest_bit_set, highest_bit_set;
2951 int all_bits_between_are_set;
2952 rtx temp = 0;
2953
2954 /* Sanity check that we know what we are working with. */
2955 gcc_assert (TARGET_ARCH64
2956 && (GET_CODE (op0) == SUBREG
2957 || (REG_P (op0) && ! SPARC_FP_REG_P (REGNO (op0)))));
2958
2959 if (! can_create_pseudo_p ())
2960 temp = op0;
2961
2962 if (GET_CODE (op1) != CONST_INT)
2963 {
2964 sparc_emit_set_symbolic_const64 (op0, op1, temp);
2965 return;
2966 }
2967
2968 if (! temp)
2969 temp = gen_reg_rtx (DImode);
2970
2971 high_bits = ((INTVAL (op1) >> 32) & 0xffffffff);
2972 low_bits = (INTVAL (op1) & 0xffffffff);
2973
2974 /* low_bits bits 0 --> 31
2975 high_bits bits 32 --> 63 */
2976
2977 analyze_64bit_constant (high_bits, low_bits,
2978 &highest_bit_set, &lowest_bit_set,
2979 &all_bits_between_are_set);
2980
2981 /* First try for a 2-insn sequence. */
2982
2983 /* These situations are preferred because the optimizer can
2984 * do more things with them:
2985 * 1) mov -1, %reg
2986 * sllx %reg, shift, %reg
2987 * 2) mov -1, %reg
2988 * srlx %reg, shift, %reg
2989 * 3) mov some_small_const, %reg
2990 * sllx %reg, shift, %reg
2991 */
2992 if (((highest_bit_set == 63
2993 || lowest_bit_set == 0)
2994 && all_bits_between_are_set != 0)
2995 || ((highest_bit_set - lowest_bit_set) < 12))
2996 {
2997 HOST_WIDE_INT the_const = -1;
2998 int shift = lowest_bit_set;
2999
3000 if ((highest_bit_set != 63
3001 && lowest_bit_set != 0)
3002 || all_bits_between_are_set == 0)
3003 {
3004 the_const =
3005 create_simple_focus_bits (high_bits, low_bits,
3006 lowest_bit_set, 0);
3007 }
3008 else if (lowest_bit_set == 0)
3009 shift = -(63 - highest_bit_set);
3010
3011 gcc_assert (SPARC_SIMM13_P (the_const));
3012 gcc_assert (shift != 0);
3013
3014 emit_insn (gen_safe_SET64 (temp, the_const));
3015 if (shift > 0)
3016 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, temp,
3017 GEN_INT (shift))));
3018 else if (shift < 0)
3019 emit_insn (gen_rtx_SET (op0, gen_rtx_LSHIFTRT (DImode, temp,
3020 GEN_INT (-shift))));
3021 return;
3022 }
3023
3024 /* Now a range of 22 or less bits set somewhere.
3025 * 1) sethi %hi(focus_bits), %reg
3026 * sllx %reg, shift, %reg
3027 * 2) sethi %hi(focus_bits), %reg
3028 * srlx %reg, shift, %reg
3029 */
3030 if ((highest_bit_set - lowest_bit_set) < 21)
3031 {
3032 unsigned HOST_WIDE_INT focus_bits =
3033 create_simple_focus_bits (high_bits, low_bits,
3034 lowest_bit_set, 10);
3035
3036 gcc_assert (SPARC_SETHI_P (focus_bits));
3037 gcc_assert (lowest_bit_set != 10);
3038
3039 emit_insn (gen_safe_HIGH64 (temp, focus_bits));
3040
3041 /* If lowest_bit_set == 10 then a sethi alone could have done it. */
3042 if (lowest_bit_set < 10)
3043 emit_insn (gen_rtx_SET (op0,
3044 gen_rtx_LSHIFTRT (DImode, temp,
3045 GEN_INT (10 - lowest_bit_set))));
3046 else if (lowest_bit_set > 10)
3047 emit_insn (gen_rtx_SET (op0,
3048 gen_rtx_ASHIFT (DImode, temp,
3049 GEN_INT (lowest_bit_set - 10))));
3050 return;
3051 }
3052
3053 /* 1) sethi %hi(low_bits), %reg
3054 * or %reg, %lo(low_bits), %reg
3055 * 2) sethi %hi(~low_bits), %reg
3056 * xor %reg, %lo(-0x400 | (low_bits & 0x3ff)), %reg
3057 */
3058 if (high_bits == 0
3059 || high_bits == 0xffffffff)
3060 {
3061 sparc_emit_set_const64_quick1 (op0, temp, low_bits,
3062 (high_bits == 0xffffffff));
3063 return;
3064 }
3065
3066 /* Now, try 3-insn sequences. */
3067
3068 /* 1) sethi %hi(high_bits), %reg
3069 * or %reg, %lo(high_bits), %reg
3070 * sllx %reg, 32, %reg
3071 */
3072 if (low_bits == 0)
3073 {
3074 sparc_emit_set_const64_quick2 (op0, temp, high_bits, 0, 32);
3075 return;
3076 }
3077
3078 /* We may be able to do something quick
3079 when the constant is negated, so try that. */
3080 if (const64_is_2insns ((~high_bits) & 0xffffffff,
3081 (~low_bits) & 0xfffffc00))
3082 {
3083 /* NOTE: The trailing bits get XOR'd so we need the
3084 non-negated bits, not the negated ones. */
3085 unsigned HOST_WIDE_INT trailing_bits = low_bits & 0x3ff;
3086
3087 if ((((~high_bits) & 0xffffffff) == 0
3088 && ((~low_bits) & 0x80000000) == 0)
3089 || (((~high_bits) & 0xffffffff) == 0xffffffff
3090 && ((~low_bits) & 0x80000000) != 0))
3091 {
3092 unsigned HOST_WIDE_INT fast_int = (~low_bits & 0xffffffff);
3093
3094 if ((SPARC_SETHI_P (fast_int)
3095 && (~high_bits & 0xffffffff) == 0)
3096 || SPARC_SIMM13_P (fast_int))
3097 emit_insn (gen_safe_SET64 (temp, fast_int));
3098 else
3099 sparc_emit_set_const64 (temp, GEN_INT (fast_int));
3100 }
3101 else
3102 {
3103 rtx negated_const;
3104 negated_const = GEN_INT (((~low_bits) & 0xfffffc00) |
3105 (((HOST_WIDE_INT)((~high_bits) & 0xffffffff))<<32));
3106 sparc_emit_set_const64 (temp, negated_const);
3107 }
3108
3109 /* If we are XOR'ing with -1, then we should emit a one's complement
3110 instead. This way the combiner will notice logical operations
3111 such as ANDN later on and substitute. */
3112 if (trailing_bits == 0x3ff)
3113 {
3114 emit_insn (gen_rtx_SET (op0, gen_rtx_NOT (DImode, temp)));
3115 }
3116 else
3117 {
3118 emit_insn (gen_rtx_SET (op0,
3119 gen_safe_XOR64 (temp,
3120 (-0x400 | trailing_bits))));
3121 }
3122 return;
3123 }
3124
3125 /* 1) sethi %hi(xxx), %reg
3126 * or %reg, %lo(xxx), %reg
3127 * sllx %reg, yyy, %reg
3128 *
3129 * ??? This is just a generalized version of the low_bits==0
3130 * thing above, FIXME...
3131 */
3132 if ((highest_bit_set - lowest_bit_set) < 32)
3133 {
3134 unsigned HOST_WIDE_INT focus_bits =
3135 create_simple_focus_bits (high_bits, low_bits,
3136 lowest_bit_set, 0);
3137
3138 /* We can't get here in this state. */
3139 gcc_assert (highest_bit_set >= 32 && lowest_bit_set < 32);
3140
3141 /* So what we know is that the set bits straddle the
3142 middle of the 64-bit word. */
3143 sparc_emit_set_const64_quick2 (op0, temp,
3144 focus_bits, 0,
3145 lowest_bit_set);
3146 return;
3147 }
3148
3149 /* 1) sethi %hi(high_bits), %reg
3150 * or %reg, %lo(high_bits), %reg
3151 * sllx %reg, 32, %reg
3152 * or %reg, low_bits, %reg
3153 */
3154 if (SPARC_SIMM13_P (low_bits) && ((int)low_bits > 0))
3155 {
3156 sparc_emit_set_const64_quick2 (op0, temp, high_bits, low_bits, 32);
3157 return;
3158 }
3159
3160 /* The easiest way when all else fails, is full decomposition. */
3161 sparc_emit_set_const64_longway (op0, temp, high_bits, low_bits);
3162 }
3163
3164 /* Implement TARGET_FIXED_CONDITION_CODE_REGS. */
3165
3166 static bool
sparc_fixed_condition_code_regs(unsigned int * p1,unsigned int * p2)3167 sparc_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
3168 {
3169 *p1 = SPARC_ICC_REG;
3170 *p2 = SPARC_FCC_REG;
3171 return true;
3172 }
3173
3174 /* Implement TARGET_MIN_ARITHMETIC_PRECISION. */
3175
3176 static unsigned int
sparc_min_arithmetic_precision(void)3177 sparc_min_arithmetic_precision (void)
3178 {
3179 return 32;
3180 }
3181
3182 /* Given a comparison code (EQ, NE, etc.) and the first operand of a COMPARE,
3183 return the mode to be used for the comparison. For floating-point,
3184 CCFP[E]mode is used. CCNZmode should be used when the first operand
3185 is a PLUS, MINUS, NEG, or ASHIFT. CCmode should be used when no special
3186 processing is needed. */
3187
3188 machine_mode
select_cc_mode(enum rtx_code op,rtx x,rtx y)3189 select_cc_mode (enum rtx_code op, rtx x, rtx y)
3190 {
3191 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
3192 {
3193 switch (op)
3194 {
3195 case EQ:
3196 case NE:
3197 case UNORDERED:
3198 case ORDERED:
3199 case UNLT:
3200 case UNLE:
3201 case UNGT:
3202 case UNGE:
3203 case UNEQ:
3204 return CCFPmode;
3205
3206 case LT:
3207 case LE:
3208 case GT:
3209 case GE:
3210 case LTGT:
3211 return CCFPEmode;
3212
3213 default:
3214 gcc_unreachable ();
3215 }
3216 }
3217 else if ((GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
3218 || GET_CODE (x) == NEG || GET_CODE (x) == ASHIFT)
3219 && y == const0_rtx)
3220 {
3221 if (TARGET_ARCH64 && GET_MODE (x) == DImode)
3222 return CCXNZmode;
3223 else
3224 return CCNZmode;
3225 }
3226 else
3227 {
3228 /* This is for the cmp<mode>_sne pattern. */
3229 if (GET_CODE (x) == NOT && y == constm1_rtx)
3230 {
3231 if (TARGET_ARCH64 && GET_MODE (x) == DImode)
3232 return CCXCmode;
3233 else
3234 return CCCmode;
3235 }
3236
3237 /* This is for the [u]addvdi4_sp32 and [u]subvdi4_sp32 patterns. */
3238 if (!TARGET_ARCH64 && GET_MODE (x) == DImode)
3239 {
3240 if (GET_CODE (y) == UNSPEC
3241 && (XINT (y, 1) == UNSPEC_ADDV
3242 || XINT (y, 1) == UNSPEC_SUBV
3243 || XINT (y, 1) == UNSPEC_NEGV))
3244 return CCVmode;
3245 else
3246 return CCCmode;
3247 }
3248
3249 if (TARGET_ARCH64 && GET_MODE (x) == DImode)
3250 return CCXmode;
3251 else
3252 return CCmode;
3253 }
3254 }
3255
3256 /* Emit the compare insn and return the CC reg for a CODE comparison
3257 with operands X and Y. */
3258
3259 static rtx
gen_compare_reg_1(enum rtx_code code,rtx x,rtx y)3260 gen_compare_reg_1 (enum rtx_code code, rtx x, rtx y)
3261 {
3262 machine_mode mode;
3263 rtx cc_reg;
3264
3265 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
3266 return x;
3267
3268 mode = SELECT_CC_MODE (code, x, y);
3269
3270 /* ??? We don't have movcc patterns so we cannot generate pseudo regs for the
3271 fcc regs (cse can't tell they're really call clobbered regs and will
3272 remove a duplicate comparison even if there is an intervening function
3273 call - it will then try to reload the cc reg via an int reg which is why
3274 we need the movcc patterns). It is possible to provide the movcc
3275 patterns by using the ldxfsr/stxfsr v9 insns. I tried it: you need two
3276 registers (say %g1,%g5) and it takes about 6 insns. A better fix would be
3277 to tell cse that CCFPE mode registers (even pseudos) are call
3278 clobbered. */
3279
3280 /* ??? This is an experiment. Rather than making changes to cse which may
3281 or may not be easy/clean, we do our own cse. This is possible because
3282 we will generate hard registers. Cse knows they're call clobbered (it
3283 doesn't know the same thing about pseudos). If we guess wrong, no big
3284 deal, but if we win, great! */
3285
3286 if (TARGET_V9 && GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
3287 #if 1 /* experiment */
3288 {
3289 int reg;
3290 /* We cycle through the registers to ensure they're all exercised. */
3291 static int next_fcc_reg = 0;
3292 /* Previous x,y for each fcc reg. */
3293 static rtx prev_args[4][2];
3294
3295 /* Scan prev_args for x,y. */
3296 for (reg = 0; reg < 4; reg++)
3297 if (prev_args[reg][0] == x && prev_args[reg][1] == y)
3298 break;
3299 if (reg == 4)
3300 {
3301 reg = next_fcc_reg;
3302 prev_args[reg][0] = x;
3303 prev_args[reg][1] = y;
3304 next_fcc_reg = (next_fcc_reg + 1) & 3;
3305 }
3306 cc_reg = gen_rtx_REG (mode, reg + SPARC_FIRST_V9_FCC_REG);
3307 }
3308 #else
3309 cc_reg = gen_reg_rtx (mode);
3310 #endif /* ! experiment */
3311 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
3312 cc_reg = gen_rtx_REG (mode, SPARC_FCC_REG);
3313 else
3314 cc_reg = gen_rtx_REG (mode, SPARC_ICC_REG);
3315
3316 /* We shouldn't get there for TFmode if !TARGET_HARD_QUAD. If we do, this
3317 will only result in an unrecognizable insn so no point in asserting. */
3318 emit_insn (gen_rtx_SET (cc_reg, gen_rtx_COMPARE (mode, x, y)));
3319
3320 return cc_reg;
3321 }
3322
3323
3324 /* Emit the compare insn and return the CC reg for the comparison in CMP. */
3325
3326 rtx
gen_compare_reg(rtx cmp)3327 gen_compare_reg (rtx cmp)
3328 {
3329 return gen_compare_reg_1 (GET_CODE (cmp), XEXP (cmp, 0), XEXP (cmp, 1));
3330 }
3331
3332 /* This function is used for v9 only.
3333 DEST is the target of the Scc insn.
3334 CODE is the code for an Scc's comparison.
3335 X and Y are the values we compare.
3336
3337 This function is needed to turn
3338
3339 (set (reg:SI 110)
3340 (gt (reg:CCX 100 %icc)
3341 (const_int 0)))
3342 into
3343 (set (reg:SI 110)
3344 (gt:DI (reg:CCX 100 %icc)
3345 (const_int 0)))
3346
3347 IE: The instruction recognizer needs to see the mode of the comparison to
3348 find the right instruction. We could use "gt:DI" right in the
3349 define_expand, but leaving it out allows us to handle DI, SI, etc. */
3350
3351 static int
gen_v9_scc(rtx dest,enum rtx_code compare_code,rtx x,rtx y)3352 gen_v9_scc (rtx dest, enum rtx_code compare_code, rtx x, rtx y)
3353 {
3354 if (! TARGET_ARCH64
3355 && (GET_MODE (x) == DImode
3356 || GET_MODE (dest) == DImode))
3357 return 0;
3358
3359 /* Try to use the movrCC insns. */
3360 if (TARGET_ARCH64
3361 && GET_MODE_CLASS (GET_MODE (x)) == MODE_INT
3362 && y == const0_rtx
3363 && v9_regcmp_p (compare_code))
3364 {
3365 rtx op0 = x;
3366 rtx temp;
3367
3368 /* Special case for op0 != 0. This can be done with one instruction if
3369 dest == x. */
3370
3371 if (compare_code == NE
3372 && GET_MODE (dest) == DImode
3373 && rtx_equal_p (op0, dest))
3374 {
3375 emit_insn (gen_rtx_SET (dest,
3376 gen_rtx_IF_THEN_ELSE (DImode,
3377 gen_rtx_fmt_ee (compare_code, DImode,
3378 op0, const0_rtx),
3379 const1_rtx,
3380 dest)));
3381 return 1;
3382 }
3383
3384 if (reg_overlap_mentioned_p (dest, op0))
3385 {
3386 /* Handle the case where dest == x.
3387 We "early clobber" the result. */
3388 op0 = gen_reg_rtx (GET_MODE (x));
3389 emit_move_insn (op0, x);
3390 }
3391
3392 emit_insn (gen_rtx_SET (dest, const0_rtx));
3393 if (GET_MODE (op0) != DImode)
3394 {
3395 temp = gen_reg_rtx (DImode);
3396 convert_move (temp, op0, 0);
3397 }
3398 else
3399 temp = op0;
3400 emit_insn (gen_rtx_SET (dest,
3401 gen_rtx_IF_THEN_ELSE (GET_MODE (dest),
3402 gen_rtx_fmt_ee (compare_code, DImode,
3403 temp, const0_rtx),
3404 const1_rtx,
3405 dest)));
3406 return 1;
3407 }
3408 else
3409 {
3410 x = gen_compare_reg_1 (compare_code, x, y);
3411 y = const0_rtx;
3412
3413 emit_insn (gen_rtx_SET (dest, const0_rtx));
3414 emit_insn (gen_rtx_SET (dest,
3415 gen_rtx_IF_THEN_ELSE (GET_MODE (dest),
3416 gen_rtx_fmt_ee (compare_code,
3417 GET_MODE (x), x, y),
3418 const1_rtx, dest)));
3419 return 1;
3420 }
3421 }
3422
3423
3424 /* Emit an scc insn. For seq, sne, sgeu, and sltu, we can do this
3425 without jumps using the addx/subx instructions. */
3426
3427 bool
emit_scc_insn(rtx operands[])3428 emit_scc_insn (rtx operands[])
3429 {
3430 rtx tem, x, y;
3431 enum rtx_code code;
3432 machine_mode mode;
3433
3434 /* The quad-word fp compare library routines all return nonzero to indicate
3435 true, which is different from the equivalent libgcc routines, so we must
3436 handle them specially here. */
3437 if (GET_MODE (operands[2]) == TFmode && ! TARGET_HARD_QUAD)
3438 {
3439 operands[1] = sparc_emit_float_lib_cmp (operands[2], operands[3],
3440 GET_CODE (operands[1]));
3441 operands[2] = XEXP (operands[1], 0);
3442 operands[3] = XEXP (operands[1], 1);
3443 }
3444
3445 code = GET_CODE (operands[1]);
3446 x = operands[2];
3447 y = operands[3];
3448 mode = GET_MODE (x);
3449
3450 /* For seq/sne on v9 we use the same code as v8 (the addx/subx method has
3451 more applications). The exception to this is "reg != 0" which can
3452 be done in one instruction on v9 (so we do it). */
3453 if ((code == EQ || code == NE) && (mode == SImode || mode == DImode))
3454 {
3455 if (y != const0_rtx)
3456 x = force_reg (mode, gen_rtx_XOR (mode, x, y));
3457
3458 rtx pat = gen_rtx_SET (operands[0],
3459 gen_rtx_fmt_ee (code, GET_MODE (operands[0]),
3460 x, const0_rtx));
3461
3462 /* If we can use addx/subx or addxc, add a clobber for CC. */
3463 if (mode == SImode || (code == NE && TARGET_VIS3))
3464 {
3465 rtx clobber
3466 = gen_rtx_CLOBBER (VOIDmode,
3467 gen_rtx_REG (mode == SImode ? CCmode : CCXmode,
3468 SPARC_ICC_REG));
3469 pat = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, pat, clobber));
3470 }
3471
3472 emit_insn (pat);
3473 return true;
3474 }
3475
3476 /* We can do LTU in DImode using the addxc instruction with VIS3. */
3477 if (TARGET_ARCH64
3478 && mode == DImode
3479 && !((code == LTU || code == GTU) && TARGET_VIS3)
3480 && gen_v9_scc (operands[0], code, x, y))
3481 return true;
3482
3483 /* We can do LTU and GEU using the addx/subx instructions too. And
3484 for GTU/LEU, if both operands are registers swap them and fall
3485 back to the easy case. */
3486 if (code == GTU || code == LEU)
3487 {
3488 if ((GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
3489 && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG))
3490 {
3491 tem = x;
3492 x = y;
3493 y = tem;
3494 code = swap_condition (code);
3495 }
3496 }
3497
3498 if (code == LTU || code == GEU)
3499 {
3500 emit_insn (gen_rtx_SET (operands[0],
3501 gen_rtx_fmt_ee (code, GET_MODE (operands[0]),
3502 gen_compare_reg_1 (code, x, y),
3503 const0_rtx)));
3504 return true;
3505 }
3506
3507 /* All the posibilities to use addx/subx based sequences has been
3508 exhausted, try for a 3 instruction sequence using v9 conditional
3509 moves. */
3510 if (TARGET_V9 && gen_v9_scc (operands[0], code, x, y))
3511 return true;
3512
3513 /* Nope, do branches. */
3514 return false;
3515 }
3516
3517 /* Emit a conditional jump insn for the v9 architecture using comparison code
3518 CODE and jump target LABEL.
3519 This function exists to take advantage of the v9 brxx insns. */
3520
3521 static void
emit_v9_brxx_insn(enum rtx_code code,rtx op0,rtx label)3522 emit_v9_brxx_insn (enum rtx_code code, rtx op0, rtx label)
3523 {
3524 emit_jump_insn (gen_rtx_SET (pc_rtx,
3525 gen_rtx_IF_THEN_ELSE (VOIDmode,
3526 gen_rtx_fmt_ee (code, GET_MODE (op0),
3527 op0, const0_rtx),
3528 gen_rtx_LABEL_REF (VOIDmode, label),
3529 pc_rtx)));
3530 }
3531
3532 /* Emit a conditional jump insn for the UA2011 architecture using
3533 comparison code CODE and jump target LABEL. This function exists
3534 to take advantage of the UA2011 Compare and Branch insns. */
3535
3536 static void
emit_cbcond_insn(enum rtx_code code,rtx op0,rtx op1,rtx label)3537 emit_cbcond_insn (enum rtx_code code, rtx op0, rtx op1, rtx label)
3538 {
3539 rtx if_then_else;
3540
3541 if_then_else = gen_rtx_IF_THEN_ELSE (VOIDmode,
3542 gen_rtx_fmt_ee(code, GET_MODE(op0),
3543 op0, op1),
3544 gen_rtx_LABEL_REF (VOIDmode, label),
3545 pc_rtx);
3546
3547 emit_jump_insn (gen_rtx_SET (pc_rtx, if_then_else));
3548 }
3549
3550 void
emit_conditional_branch_insn(rtx operands[])3551 emit_conditional_branch_insn (rtx operands[])
3552 {
3553 /* The quad-word fp compare library routines all return nonzero to indicate
3554 true, which is different from the equivalent libgcc routines, so we must
3555 handle them specially here. */
3556 if (GET_MODE (operands[1]) == TFmode && ! TARGET_HARD_QUAD)
3557 {
3558 operands[0] = sparc_emit_float_lib_cmp (operands[1], operands[2],
3559 GET_CODE (operands[0]));
3560 operands[1] = XEXP (operands[0], 0);
3561 operands[2] = XEXP (operands[0], 1);
3562 }
3563
3564 /* If we can tell early on that the comparison is against a constant
3565 that won't fit in the 5-bit signed immediate field of a cbcond,
3566 use one of the other v9 conditional branch sequences. */
3567 if (TARGET_CBCOND
3568 && GET_CODE (operands[1]) == REG
3569 && (GET_MODE (operands[1]) == SImode
3570 || (TARGET_ARCH64 && GET_MODE (operands[1]) == DImode))
3571 && (GET_CODE (operands[2]) != CONST_INT
3572 || SPARC_SIMM5_P (INTVAL (operands[2]))))
3573 {
3574 emit_cbcond_insn (GET_CODE (operands[0]), operands[1], operands[2], operands[3]);
3575 return;
3576 }
3577
3578 if (TARGET_ARCH64 && operands[2] == const0_rtx
3579 && GET_CODE (operands[1]) == REG
3580 && GET_MODE (operands[1]) == DImode)
3581 {
3582 emit_v9_brxx_insn (GET_CODE (operands[0]), operands[1], operands[3]);
3583 return;
3584 }
3585
3586 operands[1] = gen_compare_reg (operands[0]);
3587 operands[2] = const0_rtx;
3588 operands[0] = gen_rtx_fmt_ee (GET_CODE (operands[0]), VOIDmode,
3589 operands[1], operands[2]);
3590 emit_jump_insn (gen_cbranchcc4 (operands[0], operands[1], operands[2],
3591 operands[3]));
3592 }
3593
3594
3595 /* Generate a DFmode part of a hard TFmode register.
3596 REG is the TFmode hard register, LOW is 1 for the
3597 low 64bit of the register and 0 otherwise.
3598 */
3599 rtx
gen_df_reg(rtx reg,int low)3600 gen_df_reg (rtx reg, int low)
3601 {
3602 int regno = REGNO (reg);
3603
3604 if ((WORDS_BIG_ENDIAN == 0) ^ (low != 0))
3605 regno += (TARGET_ARCH64 && SPARC_INT_REG_P (regno)) ? 1 : 2;
3606 return gen_rtx_REG (DFmode, regno);
3607 }
3608
3609 /* Generate a call to FUNC with OPERANDS. Operand 0 is the return value.
3610 Unlike normal calls, TFmode operands are passed by reference. It is
3611 assumed that no more than 3 operands are required. */
3612
3613 static void
emit_soft_tfmode_libcall(const char * func_name,int nargs,rtx * operands)3614 emit_soft_tfmode_libcall (const char *func_name, int nargs, rtx *operands)
3615 {
3616 rtx ret_slot = NULL, arg[3], func_sym;
3617 int i;
3618
3619 /* We only expect to be called for conversions, unary, and binary ops. */
3620 gcc_assert (nargs == 2 || nargs == 3);
3621
3622 for (i = 0; i < nargs; ++i)
3623 {
3624 rtx this_arg = operands[i];
3625 rtx this_slot;
3626
3627 /* TFmode arguments and return values are passed by reference. */
3628 if (GET_MODE (this_arg) == TFmode)
3629 {
3630 int force_stack_temp;
3631
3632 force_stack_temp = 0;
3633 if (TARGET_BUGGY_QP_LIB && i == 0)
3634 force_stack_temp = 1;
3635
3636 if (GET_CODE (this_arg) == MEM
3637 && ! force_stack_temp)
3638 {
3639 tree expr = MEM_EXPR (this_arg);
3640 if (expr)
3641 mark_addressable (expr);
3642 this_arg = XEXP (this_arg, 0);
3643 }
3644 else if (CONSTANT_P (this_arg)
3645 && ! force_stack_temp)
3646 {
3647 this_slot = force_const_mem (TFmode, this_arg);
3648 this_arg = XEXP (this_slot, 0);
3649 }
3650 else
3651 {
3652 this_slot = assign_stack_temp (TFmode, GET_MODE_SIZE (TFmode));
3653
3654 /* Operand 0 is the return value. We'll copy it out later. */
3655 if (i > 0)
3656 emit_move_insn (this_slot, this_arg);
3657 else
3658 ret_slot = this_slot;
3659
3660 this_arg = XEXP (this_slot, 0);
3661 }
3662 }
3663
3664 arg[i] = this_arg;
3665 }
3666
3667 func_sym = gen_rtx_SYMBOL_REF (Pmode, func_name);
3668
3669 if (GET_MODE (operands[0]) == TFmode)
3670 {
3671 if (nargs == 2)
3672 emit_library_call (func_sym, LCT_NORMAL, VOIDmode,
3673 arg[0], GET_MODE (arg[0]),
3674 arg[1], GET_MODE (arg[1]));
3675 else
3676 emit_library_call (func_sym, LCT_NORMAL, VOIDmode,
3677 arg[0], GET_MODE (arg[0]),
3678 arg[1], GET_MODE (arg[1]),
3679 arg[2], GET_MODE (arg[2]));
3680
3681 if (ret_slot)
3682 emit_move_insn (operands[0], ret_slot);
3683 }
3684 else
3685 {
3686 rtx ret;
3687
3688 gcc_assert (nargs == 2);
3689
3690 ret = emit_library_call_value (func_sym, operands[0], LCT_NORMAL,
3691 GET_MODE (operands[0]),
3692 arg[1], GET_MODE (arg[1]));
3693
3694 if (ret != operands[0])
3695 emit_move_insn (operands[0], ret);
3696 }
3697 }
3698
3699 /* Expand soft-float TFmode calls to sparc abi routines. */
3700
3701 static void
emit_soft_tfmode_binop(enum rtx_code code,rtx * operands)3702 emit_soft_tfmode_binop (enum rtx_code code, rtx *operands)
3703 {
3704 const char *func;
3705
3706 switch (code)
3707 {
3708 case PLUS:
3709 func = "_Qp_add";
3710 break;
3711 case MINUS:
3712 func = "_Qp_sub";
3713 break;
3714 case MULT:
3715 func = "_Qp_mul";
3716 break;
3717 case DIV:
3718 func = "_Qp_div";
3719 break;
3720 default:
3721 gcc_unreachable ();
3722 }
3723
3724 emit_soft_tfmode_libcall (func, 3, operands);
3725 }
3726
3727 static void
emit_soft_tfmode_unop(enum rtx_code code,rtx * operands)3728 emit_soft_tfmode_unop (enum rtx_code code, rtx *operands)
3729 {
3730 const char *func;
3731
3732 gcc_assert (code == SQRT);
3733 func = "_Qp_sqrt";
3734
3735 emit_soft_tfmode_libcall (func, 2, operands);
3736 }
3737
3738 static void
emit_soft_tfmode_cvt(enum rtx_code code,rtx * operands)3739 emit_soft_tfmode_cvt (enum rtx_code code, rtx *operands)
3740 {
3741 const char *func;
3742
3743 switch (code)
3744 {
3745 case FLOAT_EXTEND:
3746 switch (GET_MODE (operands[1]))
3747 {
3748 case E_SFmode:
3749 func = "_Qp_stoq";
3750 break;
3751 case E_DFmode:
3752 func = "_Qp_dtoq";
3753 break;
3754 default:
3755 gcc_unreachable ();
3756 }
3757 break;
3758
3759 case FLOAT_TRUNCATE:
3760 switch (GET_MODE (operands[0]))
3761 {
3762 case E_SFmode:
3763 func = "_Qp_qtos";
3764 break;
3765 case E_DFmode:
3766 func = "_Qp_qtod";
3767 break;
3768 default:
3769 gcc_unreachable ();
3770 }
3771 break;
3772
3773 case FLOAT:
3774 switch (GET_MODE (operands[1]))
3775 {
3776 case E_SImode:
3777 func = "_Qp_itoq";
3778 if (TARGET_ARCH64)
3779 operands[1] = gen_rtx_SIGN_EXTEND (DImode, operands[1]);
3780 break;
3781 case E_DImode:
3782 func = "_Qp_xtoq";
3783 break;
3784 default:
3785 gcc_unreachable ();
3786 }
3787 break;
3788
3789 case UNSIGNED_FLOAT:
3790 switch (GET_MODE (operands[1]))
3791 {
3792 case E_SImode:
3793 func = "_Qp_uitoq";
3794 if (TARGET_ARCH64)
3795 operands[1] = gen_rtx_ZERO_EXTEND (DImode, operands[1]);
3796 break;
3797 case E_DImode:
3798 func = "_Qp_uxtoq";
3799 break;
3800 default:
3801 gcc_unreachable ();
3802 }
3803 break;
3804
3805 case FIX:
3806 switch (GET_MODE (operands[0]))
3807 {
3808 case E_SImode:
3809 func = "_Qp_qtoi";
3810 break;
3811 case E_DImode:
3812 func = "_Qp_qtox";
3813 break;
3814 default:
3815 gcc_unreachable ();
3816 }
3817 break;
3818
3819 case UNSIGNED_FIX:
3820 switch (GET_MODE (operands[0]))
3821 {
3822 case E_SImode:
3823 func = "_Qp_qtoui";
3824 break;
3825 case E_DImode:
3826 func = "_Qp_qtoux";
3827 break;
3828 default:
3829 gcc_unreachable ();
3830 }
3831 break;
3832
3833 default:
3834 gcc_unreachable ();
3835 }
3836
3837 emit_soft_tfmode_libcall (func, 2, operands);
3838 }
3839
3840 /* Expand a hard-float tfmode operation. All arguments must be in
3841 registers. */
3842
3843 static void
emit_hard_tfmode_operation(enum rtx_code code,rtx * operands)3844 emit_hard_tfmode_operation (enum rtx_code code, rtx *operands)
3845 {
3846 rtx op, dest;
3847
3848 if (GET_RTX_CLASS (code) == RTX_UNARY)
3849 {
3850 operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
3851 op = gen_rtx_fmt_e (code, GET_MODE (operands[0]), operands[1]);
3852 }
3853 else
3854 {
3855 operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
3856 operands[2] = force_reg (GET_MODE (operands[2]), operands[2]);
3857 op = gen_rtx_fmt_ee (code, GET_MODE (operands[0]),
3858 operands[1], operands[2]);
3859 }
3860
3861 if (register_operand (operands[0], VOIDmode))
3862 dest = operands[0];
3863 else
3864 dest = gen_reg_rtx (GET_MODE (operands[0]));
3865
3866 emit_insn (gen_rtx_SET (dest, op));
3867
3868 if (dest != operands[0])
3869 emit_move_insn (operands[0], dest);
3870 }
3871
3872 void
emit_tfmode_binop(enum rtx_code code,rtx * operands)3873 emit_tfmode_binop (enum rtx_code code, rtx *operands)
3874 {
3875 if (TARGET_HARD_QUAD)
3876 emit_hard_tfmode_operation (code, operands);
3877 else
3878 emit_soft_tfmode_binop (code, operands);
3879 }
3880
3881 void
emit_tfmode_unop(enum rtx_code code,rtx * operands)3882 emit_tfmode_unop (enum rtx_code code, rtx *operands)
3883 {
3884 if (TARGET_HARD_QUAD)
3885 emit_hard_tfmode_operation (code, operands);
3886 else
3887 emit_soft_tfmode_unop (code, operands);
3888 }
3889
3890 void
emit_tfmode_cvt(enum rtx_code code,rtx * operands)3891 emit_tfmode_cvt (enum rtx_code code, rtx *operands)
3892 {
3893 if (TARGET_HARD_QUAD)
3894 emit_hard_tfmode_operation (code, operands);
3895 else
3896 emit_soft_tfmode_cvt (code, operands);
3897 }
3898
3899 /* Return nonzero if a branch/jump/call instruction will be emitting
3900 nop into its delay slot. */
3901
3902 int
empty_delay_slot(rtx_insn * insn)3903 empty_delay_slot (rtx_insn *insn)
3904 {
3905 rtx seq;
3906
3907 /* If no previous instruction (should not happen), return true. */
3908 if (PREV_INSN (insn) == NULL)
3909 return 1;
3910
3911 seq = NEXT_INSN (PREV_INSN (insn));
3912 if (GET_CODE (PATTERN (seq)) == SEQUENCE)
3913 return 0;
3914
3915 return 1;
3916 }
3917
3918 /* Return nonzero if we should emit a nop after a cbcond instruction.
3919 The cbcond instruction does not have a delay slot, however there is
3920 a severe performance penalty if a control transfer appears right
3921 after a cbcond. Therefore we emit a nop when we detect this
3922 situation. */
3923
3924 int
emit_cbcond_nop(rtx_insn * insn)3925 emit_cbcond_nop (rtx_insn *insn)
3926 {
3927 rtx next = next_active_insn (insn);
3928
3929 if (!next)
3930 return 1;
3931
3932 if (NONJUMP_INSN_P (next)
3933 && GET_CODE (PATTERN (next)) == SEQUENCE)
3934 next = XVECEXP (PATTERN (next), 0, 0);
3935 else if (CALL_P (next)
3936 && GET_CODE (PATTERN (next)) == PARALLEL)
3937 {
3938 rtx delay = XVECEXP (PATTERN (next), 0, 1);
3939
3940 if (GET_CODE (delay) == RETURN)
3941 {
3942 /* It's a sibling call. Do not emit the nop if we're going
3943 to emit something other than the jump itself as the first
3944 instruction of the sibcall sequence. */
3945 if (sparc_leaf_function_p || TARGET_FLAT)
3946 return 0;
3947 }
3948 }
3949
3950 if (NONJUMP_INSN_P (next))
3951 return 0;
3952
3953 return 1;
3954 }
3955
3956 /* Return nonzero if TRIAL, an insn, can be combined with a 'restore'
3957 instruction. RETURN_P is true if the v9 variant 'return' is to be
3958 considered in the test too.
3959
3960 TRIAL must be a SET whose destination is a REG appropriate for the
3961 'restore' instruction or, if RETURN_P is true, for the 'return'
3962 instruction. */
3963
3964 static int
eligible_for_restore_insn(rtx trial,bool return_p)3965 eligible_for_restore_insn (rtx trial, bool return_p)
3966 {
3967 rtx pat = PATTERN (trial);
3968 rtx src = SET_SRC (pat);
3969 bool src_is_freg = false;
3970 rtx src_reg;
3971
3972 /* Since we now can do moves between float and integer registers when
3973 VIS3 is enabled, we have to catch this case. We can allow such
3974 moves when doing a 'return' however. */
3975 src_reg = src;
3976 if (GET_CODE (src_reg) == SUBREG)
3977 src_reg = SUBREG_REG (src_reg);
3978 if (GET_CODE (src_reg) == REG
3979 && SPARC_FP_REG_P (REGNO (src_reg)))
3980 src_is_freg = true;
3981
3982 /* The 'restore src,%g0,dest' pattern for word mode and below. */
3983 if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT
3984 && arith_operand (src, GET_MODE (src))
3985 && ! src_is_freg)
3986 {
3987 if (TARGET_ARCH64)
3988 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
3989 else
3990 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (SImode);
3991 }
3992
3993 /* The 'restore src,%g0,dest' pattern for double-word mode. */
3994 else if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT
3995 && arith_double_operand (src, GET_MODE (src))
3996 && ! src_is_freg)
3997 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
3998
3999 /* The 'restore src,%g0,dest' pattern for float if no FPU. */
4000 else if (! TARGET_FPU && register_operand (src, SFmode))
4001 return 1;
4002
4003 /* The 'restore src,%g0,dest' pattern for double if no FPU. */
4004 else if (! TARGET_FPU && TARGET_ARCH64 && register_operand (src, DFmode))
4005 return 1;
4006
4007 /* If we have the 'return' instruction, anything that does not use
4008 local or output registers and can go into a delay slot wins. */
4009 else if (return_p && TARGET_V9 && !epilogue_renumber (&pat, 1))
4010 return 1;
4011
4012 /* The 'restore src1,src2,dest' pattern for SImode. */
4013 else if (GET_CODE (src) == PLUS
4014 && register_operand (XEXP (src, 0), SImode)
4015 && arith_operand (XEXP (src, 1), SImode))
4016 return 1;
4017
4018 /* The 'restore src1,src2,dest' pattern for DImode. */
4019 else if (GET_CODE (src) == PLUS
4020 && register_operand (XEXP (src, 0), DImode)
4021 && arith_double_operand (XEXP (src, 1), DImode))
4022 return 1;
4023
4024 /* The 'restore src1,%lo(src2),dest' pattern. */
4025 else if (GET_CODE (src) == LO_SUM
4026 && ! TARGET_CM_MEDMID
4027 && ((register_operand (XEXP (src, 0), SImode)
4028 && immediate_operand (XEXP (src, 1), SImode))
4029 || (TARGET_ARCH64
4030 && register_operand (XEXP (src, 0), DImode)
4031 && immediate_operand (XEXP (src, 1), DImode))))
4032 return 1;
4033
4034 /* The 'restore src,src,dest' pattern. */
4035 else if (GET_CODE (src) == ASHIFT
4036 && (register_operand (XEXP (src, 0), SImode)
4037 || register_operand (XEXP (src, 0), DImode))
4038 && XEXP (src, 1) == const1_rtx)
4039 return 1;
4040
4041 return 0;
4042 }
4043
4044 /* Return nonzero if TRIAL can go into the function return's delay slot. */
4045
4046 int
eligible_for_return_delay(rtx_insn * trial)4047 eligible_for_return_delay (rtx_insn *trial)
4048 {
4049 int regno;
4050 rtx pat;
4051
4052 /* If the function uses __builtin_eh_return, the eh_return machinery
4053 occupies the delay slot. */
4054 if (crtl->calls_eh_return)
4055 return 0;
4056
4057 if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE)
4058 return 0;
4059
4060 /* In the case of a leaf or flat function, anything can go into the slot. */
4061 if (sparc_leaf_function_p || TARGET_FLAT)
4062 return 1;
4063
4064 if (!NONJUMP_INSN_P (trial))
4065 return 0;
4066
4067 pat = PATTERN (trial);
4068 if (GET_CODE (pat) == PARALLEL)
4069 {
4070 int i;
4071
4072 if (! TARGET_V9)
4073 return 0;
4074 for (i = XVECLEN (pat, 0) - 1; i >= 0; i--)
4075 {
4076 rtx expr = XVECEXP (pat, 0, i);
4077 if (GET_CODE (expr) != SET)
4078 return 0;
4079 if (GET_CODE (SET_DEST (expr)) != REG)
4080 return 0;
4081 regno = REGNO (SET_DEST (expr));
4082 if (regno >= 8 && regno < 24)
4083 return 0;
4084 }
4085 return !epilogue_renumber (&pat, 1);
4086 }
4087
4088 if (GET_CODE (pat) != SET)
4089 return 0;
4090
4091 if (GET_CODE (SET_DEST (pat)) != REG)
4092 return 0;
4093
4094 regno = REGNO (SET_DEST (pat));
4095
4096 /* Otherwise, only operations which can be done in tandem with
4097 a `restore' or `return' insn can go into the delay slot. */
4098 if (regno >= 8 && regno < 24)
4099 return 0;
4100
4101 /* If this instruction sets up floating point register and we have a return
4102 instruction, it can probably go in. But restore will not work
4103 with FP_REGS. */
4104 if (! SPARC_INT_REG_P (regno))
4105 return TARGET_V9 && !epilogue_renumber (&pat, 1);
4106
4107 return eligible_for_restore_insn (trial, true);
4108 }
4109
4110 /* Return nonzero if TRIAL can go into the sibling call's delay slot. */
4111
4112 int
eligible_for_sibcall_delay(rtx_insn * trial)4113 eligible_for_sibcall_delay (rtx_insn *trial)
4114 {
4115 rtx pat;
4116
4117 if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE)
4118 return 0;
4119
4120 if (!NONJUMP_INSN_P (trial))
4121 return 0;
4122
4123 pat = PATTERN (trial);
4124
4125 if (sparc_leaf_function_p || TARGET_FLAT)
4126 {
4127 /* If the tail call is done using the call instruction,
4128 we have to restore %o7 in the delay slot. */
4129 if (LEAF_SIBCALL_SLOT_RESERVED_P)
4130 return 0;
4131
4132 /* %g1 is used to build the function address */
4133 if (reg_mentioned_p (gen_rtx_REG (Pmode, 1), pat))
4134 return 0;
4135
4136 return 1;
4137 }
4138
4139 if (GET_CODE (pat) != SET)
4140 return 0;
4141
4142 /* Otherwise, only operations which can be done in tandem with
4143 a `restore' insn can go into the delay slot. */
4144 if (GET_CODE (SET_DEST (pat)) != REG
4145 || (REGNO (SET_DEST (pat)) >= 8 && REGNO (SET_DEST (pat)) < 24)
4146 || ! SPARC_INT_REG_P (REGNO (SET_DEST (pat))))
4147 return 0;
4148
4149 /* If it mentions %o7, it can't go in, because sibcall will clobber it
4150 in most cases. */
4151 if (reg_mentioned_p (gen_rtx_REG (Pmode, 15), pat))
4152 return 0;
4153
4154 return eligible_for_restore_insn (trial, false);
4155 }
4156
4157 /* Determine if it's legal to put X into the constant pool. This
4158 is not possible if X contains the address of a symbol that is
4159 not constant (TLS) or not known at final link time (PIC). */
4160
4161 static bool
sparc_cannot_force_const_mem(machine_mode mode,rtx x)4162 sparc_cannot_force_const_mem (machine_mode mode, rtx x)
4163 {
4164 switch (GET_CODE (x))
4165 {
4166 case CONST_INT:
4167 case CONST_WIDE_INT:
4168 case CONST_DOUBLE:
4169 case CONST_VECTOR:
4170 /* Accept all non-symbolic constants. */
4171 return false;
4172
4173 case LABEL_REF:
4174 /* Labels are OK iff we are non-PIC. */
4175 return flag_pic != 0;
4176
4177 case SYMBOL_REF:
4178 /* 'Naked' TLS symbol references are never OK,
4179 non-TLS symbols are OK iff we are non-PIC. */
4180 if (SYMBOL_REF_TLS_MODEL (x))
4181 return true;
4182 else
4183 return flag_pic != 0;
4184
4185 case CONST:
4186 return sparc_cannot_force_const_mem (mode, XEXP (x, 0));
4187 case PLUS:
4188 case MINUS:
4189 return sparc_cannot_force_const_mem (mode, XEXP (x, 0))
4190 || sparc_cannot_force_const_mem (mode, XEXP (x, 1));
4191 case UNSPEC:
4192 return true;
4193 default:
4194 gcc_unreachable ();
4195 }
4196 }
4197
4198 /* Global Offset Table support. */
4199 static GTY(()) rtx got_symbol_rtx = NULL_RTX;
4200 static GTY(()) rtx got_register_rtx = NULL_RTX;
4201 static GTY(()) rtx got_helper_rtx = NULL_RTX;
4202
4203 static GTY(()) bool got_helper_needed = false;
4204
4205 /* Return the SYMBOL_REF for the Global Offset Table. */
4206
4207 static rtx
sparc_got(void)4208 sparc_got (void)
4209 {
4210 if (!got_symbol_rtx)
4211 got_symbol_rtx = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
4212
4213 return got_symbol_rtx;
4214 }
4215
4216 /* Wrapper around the load_pcrel_sym{si,di} patterns. */
4217
4218 static rtx
gen_load_pcrel_sym(rtx op0,rtx op1,rtx op2)4219 gen_load_pcrel_sym (rtx op0, rtx op1, rtx op2)
4220 {
4221 int orig_flag_pic = flag_pic;
4222 rtx insn;
4223
4224 /* The load_pcrel_sym{si,di} patterns require absolute addressing. */
4225 flag_pic = 0;
4226 if (TARGET_ARCH64)
4227 insn = gen_load_pcrel_symdi (op0, op1, op2, GEN_INT (REGNO (op0)));
4228 else
4229 insn = gen_load_pcrel_symsi (op0, op1, op2, GEN_INT (REGNO (op0)));
4230 flag_pic = orig_flag_pic;
4231
4232 return insn;
4233 }
4234
4235 /* Output the load_pcrel_sym{si,di} patterns. */
4236
4237 const char *
output_load_pcrel_sym(rtx * operands)4238 output_load_pcrel_sym (rtx *operands)
4239 {
4240 if (flag_delayed_branch)
4241 {
4242 output_asm_insn ("sethi\t%%hi(%a1-4), %0", operands);
4243 output_asm_insn ("call\t%a2", operands);
4244 output_asm_insn (" add\t%0, %%lo(%a1+4), %0", operands);
4245 }
4246 else
4247 {
4248 output_asm_insn ("sethi\t%%hi(%a1-8), %0", operands);
4249 output_asm_insn ("add\t%0, %%lo(%a1-4), %0", operands);
4250 output_asm_insn ("call\t%a2", operands);
4251 output_asm_insn (" nop", NULL);
4252 }
4253
4254 if (operands[2] == got_helper_rtx)
4255 got_helper_needed = true;
4256
4257 return "";
4258 }
4259
4260 #ifdef HAVE_GAS_HIDDEN
4261 # define USE_HIDDEN_LINKONCE 1
4262 #else
4263 # define USE_HIDDEN_LINKONCE 0
4264 #endif
4265
4266 /* Emit code to load the GOT register. */
4267
4268 void
load_got_register(void)4269 load_got_register (void)
4270 {
4271 rtx insn;
4272
4273 if (TARGET_VXWORKS_RTP)
4274 {
4275 if (!got_register_rtx)
4276 got_register_rtx = pic_offset_table_rtx;
4277
4278 insn = gen_vxworks_load_got ();
4279 }
4280 else
4281 {
4282 if (!got_register_rtx)
4283 got_register_rtx = gen_rtx_REG (Pmode, GLOBAL_OFFSET_TABLE_REGNUM);
4284
4285 /* The GOT symbol is subject to a PC-relative relocation so we need a
4286 helper function to add the PC value and thus get the final value. */
4287 if (!got_helper_rtx)
4288 {
4289 char name[32];
4290
4291 /* Skip the leading '%' as that cannot be used in a symbol name. */
4292 if (USE_HIDDEN_LINKONCE)
4293 sprintf (name, "__sparc_get_pc_thunk.%s",
4294 reg_names[REGNO (got_register_rtx)] + 1);
4295 else
4296 ASM_GENERATE_INTERNAL_LABEL (name, "LADDPC",
4297 REGNO (got_register_rtx));
4298
4299 got_helper_rtx = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
4300 }
4301
4302 insn
4303 = gen_load_pcrel_sym (got_register_rtx, sparc_got (), got_helper_rtx);
4304 }
4305
4306 emit_insn (insn);
4307 }
4308
4309 /* Ensure that we are not using patterns that are not OK with PIC. */
4310
4311 int
check_pic(int i)4312 check_pic (int i)
4313 {
4314 rtx op;
4315
4316 switch (flag_pic)
4317 {
4318 case 1:
4319 op = recog_data.operand[i];
4320 gcc_assert (GET_CODE (op) != SYMBOL_REF
4321 && (GET_CODE (op) != CONST
4322 || (GET_CODE (XEXP (op, 0)) == MINUS
4323 && XEXP (XEXP (op, 0), 0) == sparc_got ()
4324 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST)));
4325 /* fallthrough */
4326 case 2:
4327 default:
4328 return 1;
4329 }
4330 }
4331
4332 /* Return true if X is an address which needs a temporary register when
4333 reloaded while generating PIC code. */
4334
4335 int
pic_address_needs_scratch(rtx x)4336 pic_address_needs_scratch (rtx x)
4337 {
4338 /* An address which is a symbolic plus a non SMALL_INT needs a temp reg. */
4339 if (GET_CODE (x) == CONST
4340 && GET_CODE (XEXP (x, 0)) == PLUS
4341 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
4342 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
4343 && !SMALL_INT (XEXP (XEXP (x, 0), 1)))
4344 return 1;
4345
4346 return 0;
4347 }
4348
4349 /* Determine if a given RTX is a valid constant. We already know this
4350 satisfies CONSTANT_P. */
4351
4352 static bool
sparc_legitimate_constant_p(machine_mode mode,rtx x)4353 sparc_legitimate_constant_p (machine_mode mode, rtx x)
4354 {
4355 switch (GET_CODE (x))
4356 {
4357 case CONST:
4358 case SYMBOL_REF:
4359 if (sparc_tls_referenced_p (x))
4360 return false;
4361 break;
4362
4363 case CONST_DOUBLE:
4364 /* Floating point constants are generally not ok.
4365 The only exception is 0.0 and all-ones in VIS. */
4366 if (TARGET_VIS
4367 && SCALAR_FLOAT_MODE_P (mode)
4368 && (const_zero_operand (x, mode)
4369 || const_all_ones_operand (x, mode)))
4370 return true;
4371
4372 return false;
4373
4374 case CONST_VECTOR:
4375 /* Vector constants are generally not ok.
4376 The only exception is 0 or -1 in VIS. */
4377 if (TARGET_VIS
4378 && (const_zero_operand (x, mode)
4379 || const_all_ones_operand (x, mode)))
4380 return true;
4381
4382 return false;
4383
4384 default:
4385 break;
4386 }
4387
4388 return true;
4389 }
4390
4391 /* Determine if a given RTX is a valid constant address. */
4392
4393 bool
constant_address_p(rtx x)4394 constant_address_p (rtx x)
4395 {
4396 switch (GET_CODE (x))
4397 {
4398 case LABEL_REF:
4399 case CONST_INT:
4400 case HIGH:
4401 return true;
4402
4403 case CONST:
4404 if (flag_pic && pic_address_needs_scratch (x))
4405 return false;
4406 return sparc_legitimate_constant_p (Pmode, x);
4407
4408 case SYMBOL_REF:
4409 return !flag_pic && sparc_legitimate_constant_p (Pmode, x);
4410
4411 default:
4412 return false;
4413 }
4414 }
4415
4416 /* Nonzero if the constant value X is a legitimate general operand
4417 when generating PIC code. It is given that flag_pic is on and
4418 that X satisfies CONSTANT_P. */
4419
4420 bool
legitimate_pic_operand_p(rtx x)4421 legitimate_pic_operand_p (rtx x)
4422 {
4423 if (pic_address_needs_scratch (x))
4424 return false;
4425 if (sparc_tls_referenced_p (x))
4426 return false;
4427 return true;
4428 }
4429
4430 /* Return true if X is a representation of the PIC register. */
4431
4432 static bool
sparc_pic_register_p(rtx x)4433 sparc_pic_register_p (rtx x)
4434 {
4435 if (!REG_P (x) || !pic_offset_table_rtx)
4436 return false;
4437
4438 if (x == pic_offset_table_rtx)
4439 return true;
4440
4441 if (!HARD_REGISTER_P (pic_offset_table_rtx)
4442 && (HARD_REGISTER_P (x) || lra_in_progress || reload_in_progress)
4443 && ORIGINAL_REGNO (x) == REGNO (pic_offset_table_rtx))
4444 return true;
4445
4446 return false;
4447 }
4448
4449 #define RTX_OK_FOR_OFFSET_P(X, MODE) \
4450 (CONST_INT_P (X) \
4451 && INTVAL (X) >= -0x1000 \
4452 && INTVAL (X) <= (0x1000 - GET_MODE_SIZE (MODE)))
4453
4454 #define RTX_OK_FOR_OLO10_P(X, MODE) \
4455 (CONST_INT_P (X) \
4456 && INTVAL (X) >= -0x1000 \
4457 && INTVAL (X) <= (0xc00 - GET_MODE_SIZE (MODE)))
4458
4459 /* Handle the TARGET_LEGITIMATE_ADDRESS_P target hook.
4460
4461 On SPARC, the actual legitimate addresses must be REG+REG or REG+SMALLINT
4462 ordinarily. This changes a bit when generating PIC. */
4463
4464 static bool
sparc_legitimate_address_p(machine_mode mode,rtx addr,bool strict)4465 sparc_legitimate_address_p (machine_mode mode, rtx addr, bool strict)
4466 {
4467 rtx rs1 = NULL, rs2 = NULL, imm1 = NULL;
4468
4469 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
4470 rs1 = addr;
4471 else if (GET_CODE (addr) == PLUS)
4472 {
4473 rs1 = XEXP (addr, 0);
4474 rs2 = XEXP (addr, 1);
4475
4476 /* Canonicalize. REG comes first, if there are no regs,
4477 LO_SUM comes first. */
4478 if (!REG_P (rs1)
4479 && GET_CODE (rs1) != SUBREG
4480 && (REG_P (rs2)
4481 || GET_CODE (rs2) == SUBREG
4482 || (GET_CODE (rs2) == LO_SUM && GET_CODE (rs1) != LO_SUM)))
4483 {
4484 rs1 = XEXP (addr, 1);
4485 rs2 = XEXP (addr, 0);
4486 }
4487
4488 if ((flag_pic == 1
4489 && sparc_pic_register_p (rs1)
4490 && !REG_P (rs2)
4491 && GET_CODE (rs2) != SUBREG
4492 && GET_CODE (rs2) != LO_SUM
4493 && GET_CODE (rs2) != MEM
4494 && !(GET_CODE (rs2) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs2))
4495 && (! symbolic_operand (rs2, VOIDmode) || mode == Pmode)
4496 && (GET_CODE (rs2) != CONST_INT || SMALL_INT (rs2)))
4497 || ((REG_P (rs1)
4498 || GET_CODE (rs1) == SUBREG)
4499 && RTX_OK_FOR_OFFSET_P (rs2, mode)))
4500 {
4501 imm1 = rs2;
4502 rs2 = NULL;
4503 }
4504 else if ((REG_P (rs1) || GET_CODE (rs1) == SUBREG)
4505 && (REG_P (rs2) || GET_CODE (rs2) == SUBREG))
4506 {
4507 /* We prohibit REG + REG for TFmode when there are no quad move insns
4508 and we consequently need to split. We do this because REG+REG
4509 is not an offsettable address. If we get the situation in reload
4510 where source and destination of a movtf pattern are both MEMs with
4511 REG+REG address, then only one of them gets converted to an
4512 offsettable address. */
4513 if (mode == TFmode
4514 && ! (TARGET_ARCH64 && TARGET_HARD_QUAD))
4515 return 0;
4516
4517 /* Likewise for TImode, but in all cases. */
4518 if (mode == TImode)
4519 return 0;
4520
4521 /* We prohibit REG + REG on ARCH32 if not optimizing for
4522 DFmode/DImode because then mem_min_alignment is likely to be zero
4523 after reload and the forced split would lack a matching splitter
4524 pattern. */
4525 if (TARGET_ARCH32 && !optimize
4526 && (mode == DFmode || mode == DImode))
4527 return 0;
4528 }
4529 else if (USE_AS_OFFSETABLE_LO10
4530 && GET_CODE (rs1) == LO_SUM
4531 && TARGET_ARCH64
4532 && ! TARGET_CM_MEDMID
4533 && RTX_OK_FOR_OLO10_P (rs2, mode))
4534 {
4535 rs2 = NULL;
4536 imm1 = XEXP (rs1, 1);
4537 rs1 = XEXP (rs1, 0);
4538 if (!CONSTANT_P (imm1)
4539 || (GET_CODE (rs1) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs1)))
4540 return 0;
4541 }
4542 }
4543 else if (GET_CODE (addr) == LO_SUM)
4544 {
4545 rs1 = XEXP (addr, 0);
4546 imm1 = XEXP (addr, 1);
4547
4548 if (!CONSTANT_P (imm1)
4549 || (GET_CODE (rs1) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs1)))
4550 return 0;
4551
4552 /* We can't allow TFmode in 32-bit mode, because an offset greater
4553 than the alignment (8) may cause the LO_SUM to overflow. */
4554 if (mode == TFmode && TARGET_ARCH32)
4555 return 0;
4556
4557 /* During reload, accept the HIGH+LO_SUM construct generated by
4558 sparc_legitimize_reload_address. */
4559 if (reload_in_progress
4560 && GET_CODE (rs1) == HIGH
4561 && XEXP (rs1, 0) == imm1)
4562 return 1;
4563 }
4564 else if (GET_CODE (addr) == CONST_INT && SMALL_INT (addr))
4565 return 1;
4566 else
4567 return 0;
4568
4569 if (GET_CODE (rs1) == SUBREG)
4570 rs1 = SUBREG_REG (rs1);
4571 if (!REG_P (rs1))
4572 return 0;
4573
4574 if (rs2)
4575 {
4576 if (GET_CODE (rs2) == SUBREG)
4577 rs2 = SUBREG_REG (rs2);
4578 if (!REG_P (rs2))
4579 return 0;
4580 }
4581
4582 if (strict)
4583 {
4584 if (!REGNO_OK_FOR_BASE_P (REGNO (rs1))
4585 || (rs2 && !REGNO_OK_FOR_BASE_P (REGNO (rs2))))
4586 return 0;
4587 }
4588 else
4589 {
4590 if ((! SPARC_INT_REG_P (REGNO (rs1))
4591 && REGNO (rs1) != FRAME_POINTER_REGNUM
4592 && REGNO (rs1) < FIRST_PSEUDO_REGISTER)
4593 || (rs2
4594 && (! SPARC_INT_REG_P (REGNO (rs2))
4595 && REGNO (rs2) != FRAME_POINTER_REGNUM
4596 && REGNO (rs2) < FIRST_PSEUDO_REGISTER)))
4597 return 0;
4598 }
4599 return 1;
4600 }
4601
4602 /* Return the SYMBOL_REF for the tls_get_addr function. */
4603
4604 static GTY(()) rtx sparc_tls_symbol = NULL_RTX;
4605
4606 static rtx
sparc_tls_get_addr(void)4607 sparc_tls_get_addr (void)
4608 {
4609 if (!sparc_tls_symbol)
4610 sparc_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tls_get_addr");
4611
4612 return sparc_tls_symbol;
4613 }
4614
4615 /* Return the Global Offset Table to be used in TLS mode. */
4616
4617 static rtx
sparc_tls_got(void)4618 sparc_tls_got (void)
4619 {
4620 /* In PIC mode, this is just the PIC offset table. */
4621 if (flag_pic)
4622 {
4623 crtl->uses_pic_offset_table = 1;
4624 return pic_offset_table_rtx;
4625 }
4626
4627 /* In non-PIC mode, Sun as (unlike GNU as) emits PC-relative relocations for
4628 the GOT symbol with the 32-bit ABI, so we reload the GOT register. */
4629 if (TARGET_SUN_TLS && TARGET_ARCH32)
4630 {
4631 load_got_register ();
4632 return got_register_rtx;
4633 }
4634
4635 /* In all other cases, we load a new pseudo with the GOT symbol. */
4636 return copy_to_reg (sparc_got ());
4637 }
4638
4639 /* Return true if X contains a thread-local symbol. */
4640
4641 static bool
sparc_tls_referenced_p(rtx x)4642 sparc_tls_referenced_p (rtx x)
4643 {
4644 if (!TARGET_HAVE_TLS)
4645 return false;
4646
4647 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
4648 x = XEXP (XEXP (x, 0), 0);
4649
4650 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x))
4651 return true;
4652
4653 /* That's all we handle in sparc_legitimize_tls_address for now. */
4654 return false;
4655 }
4656
4657 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
4658 this (thread-local) address. */
4659
4660 static rtx
sparc_legitimize_tls_address(rtx addr)4661 sparc_legitimize_tls_address (rtx addr)
4662 {
4663 rtx temp1, temp2, temp3, ret, o0, got;
4664 rtx_insn *insn;
4665
4666 gcc_assert (can_create_pseudo_p ());
4667
4668 if (GET_CODE (addr) == SYMBOL_REF)
4669 /* Although the various sethi/or sequences generate SImode values, many of
4670 them can be transformed by the linker when relaxing and, if relaxing to
4671 local-exec, will become a sethi/xor pair, which is signed and therefore
4672 a full DImode value in 64-bit mode. Thus we must use Pmode, lest these
4673 values be spilled onto the stack in 64-bit mode. */
4674 switch (SYMBOL_REF_TLS_MODEL (addr))
4675 {
4676 case TLS_MODEL_GLOBAL_DYNAMIC:
4677 start_sequence ();
4678 temp1 = gen_reg_rtx (Pmode);
4679 temp2 = gen_reg_rtx (Pmode);
4680 ret = gen_reg_rtx (Pmode);
4681 o0 = gen_rtx_REG (Pmode, 8);
4682 got = sparc_tls_got ();
4683 if (TARGET_ARCH32)
4684 {
4685 emit_insn (gen_tgd_hi22si (temp1, addr));
4686 emit_insn (gen_tgd_lo10si (temp2, temp1, addr));
4687 emit_insn (gen_tgd_addsi (o0, got, temp2, addr));
4688 insn = emit_call_insn (gen_tgd_callsi (o0, sparc_tls_get_addr (),
4689 addr, const1_rtx));
4690 }
4691 else
4692 {
4693 emit_insn (gen_tgd_hi22di (temp1, addr));
4694 emit_insn (gen_tgd_lo10di (temp2, temp1, addr));
4695 emit_insn (gen_tgd_adddi (o0, got, temp2, addr));
4696 insn = emit_call_insn (gen_tgd_calldi (o0, sparc_tls_get_addr (),
4697 addr, const1_rtx));
4698 }
4699 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), o0);
4700 RTL_CONST_CALL_P (insn) = 1;
4701 insn = get_insns ();
4702 end_sequence ();
4703 emit_libcall_block (insn, ret, o0, addr);
4704 break;
4705
4706 case TLS_MODEL_LOCAL_DYNAMIC:
4707 start_sequence ();
4708 temp1 = gen_reg_rtx (Pmode);
4709 temp2 = gen_reg_rtx (Pmode);
4710 temp3 = gen_reg_rtx (Pmode);
4711 ret = gen_reg_rtx (Pmode);
4712 o0 = gen_rtx_REG (Pmode, 8);
4713 got = sparc_tls_got ();
4714 if (TARGET_ARCH32)
4715 {
4716 emit_insn (gen_tldm_hi22si (temp1));
4717 emit_insn (gen_tldm_lo10si (temp2, temp1));
4718 emit_insn (gen_tldm_addsi (o0, got, temp2));
4719 insn = emit_call_insn (gen_tldm_callsi (o0, sparc_tls_get_addr (),
4720 const1_rtx));
4721 }
4722 else
4723 {
4724 emit_insn (gen_tldm_hi22di (temp1));
4725 emit_insn (gen_tldm_lo10di (temp2, temp1));
4726 emit_insn (gen_tldm_adddi (o0, got, temp2));
4727 insn = emit_call_insn (gen_tldm_calldi (o0, sparc_tls_get_addr (),
4728 const1_rtx));
4729 }
4730 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), o0);
4731 RTL_CONST_CALL_P (insn) = 1;
4732 insn = get_insns ();
4733 end_sequence ();
4734 /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
4735 share the LD_BASE result with other LD model accesses. */
4736 emit_libcall_block (insn, temp3, o0,
4737 gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
4738 UNSPEC_TLSLD_BASE));
4739 temp1 = gen_reg_rtx (Pmode);
4740 temp2 = gen_reg_rtx (Pmode);
4741 if (TARGET_ARCH32)
4742 {
4743 emit_insn (gen_tldo_hix22si (temp1, addr));
4744 emit_insn (gen_tldo_lox10si (temp2, temp1, addr));
4745 emit_insn (gen_tldo_addsi (ret, temp3, temp2, addr));
4746 }
4747 else
4748 {
4749 emit_insn (gen_tldo_hix22di (temp1, addr));
4750 emit_insn (gen_tldo_lox10di (temp2, temp1, addr));
4751 emit_insn (gen_tldo_adddi (ret, temp3, temp2, addr));
4752 }
4753 break;
4754
4755 case TLS_MODEL_INITIAL_EXEC:
4756 temp1 = gen_reg_rtx (Pmode);
4757 temp2 = gen_reg_rtx (Pmode);
4758 temp3 = gen_reg_rtx (Pmode);
4759 got = sparc_tls_got ();
4760 if (TARGET_ARCH32)
4761 {
4762 emit_insn (gen_tie_hi22si (temp1, addr));
4763 emit_insn (gen_tie_lo10si (temp2, temp1, addr));
4764 emit_insn (gen_tie_ld32 (temp3, got, temp2, addr));
4765 }
4766 else
4767 {
4768 emit_insn (gen_tie_hi22di (temp1, addr));
4769 emit_insn (gen_tie_lo10di (temp2, temp1, addr));
4770 emit_insn (gen_tie_ld64 (temp3, got, temp2, addr));
4771 }
4772 if (TARGET_SUN_TLS)
4773 {
4774 ret = gen_reg_rtx (Pmode);
4775 if (TARGET_ARCH32)
4776 emit_insn (gen_tie_addsi (ret, gen_rtx_REG (Pmode, 7),
4777 temp3, addr));
4778 else
4779 emit_insn (gen_tie_adddi (ret, gen_rtx_REG (Pmode, 7),
4780 temp3, addr));
4781 }
4782 else
4783 ret = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, 7), temp3);
4784 break;
4785
4786 case TLS_MODEL_LOCAL_EXEC:
4787 temp1 = gen_reg_rtx (Pmode);
4788 temp2 = gen_reg_rtx (Pmode);
4789 if (TARGET_ARCH32)
4790 {
4791 emit_insn (gen_tle_hix22si (temp1, addr));
4792 emit_insn (gen_tle_lox10si (temp2, temp1, addr));
4793 }
4794 else
4795 {
4796 emit_insn (gen_tle_hix22di (temp1, addr));
4797 emit_insn (gen_tle_lox10di (temp2, temp1, addr));
4798 }
4799 ret = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, 7), temp2);
4800 break;
4801
4802 default:
4803 gcc_unreachable ();
4804 }
4805
4806 else if (GET_CODE (addr) == CONST)
4807 {
4808 rtx base, offset;
4809
4810 gcc_assert (GET_CODE (XEXP (addr, 0)) == PLUS);
4811
4812 base = sparc_legitimize_tls_address (XEXP (XEXP (addr, 0), 0));
4813 offset = XEXP (XEXP (addr, 0), 1);
4814
4815 base = force_operand (base, NULL_RTX);
4816 if (!(GET_CODE (offset) == CONST_INT && SMALL_INT (offset)))
4817 offset = force_reg (Pmode, offset);
4818 ret = gen_rtx_PLUS (Pmode, base, offset);
4819 }
4820
4821 else
4822 gcc_unreachable (); /* for now ... */
4823
4824 return ret;
4825 }
4826
4827 /* Legitimize PIC addresses. If the address is already position-independent,
4828 we return ORIG. Newly generated position-independent addresses go into a
4829 reg. This is REG if nonzero, otherwise we allocate register(s) as
4830 necessary. */
4831
4832 static rtx
sparc_legitimize_pic_address(rtx orig,rtx reg)4833 sparc_legitimize_pic_address (rtx orig, rtx reg)
4834 {
4835 if (GET_CODE (orig) == SYMBOL_REF
4836 /* See the comment in sparc_expand_move. */
4837 || (GET_CODE (orig) == LABEL_REF && !can_use_mov_pic_label_ref (orig)))
4838 {
4839 bool gotdata_op = false;
4840 rtx pic_ref, address;
4841 rtx_insn *insn;
4842
4843 if (!reg)
4844 {
4845 gcc_assert (can_create_pseudo_p ());
4846 reg = gen_reg_rtx (Pmode);
4847 }
4848
4849 if (flag_pic == 2)
4850 {
4851 /* If not during reload, allocate another temp reg here for loading
4852 in the address, so that these instructions can be optimized
4853 properly. */
4854 rtx temp_reg = can_create_pseudo_p () ? gen_reg_rtx (Pmode) : reg;
4855
4856 /* Must put the SYMBOL_REF inside an UNSPEC here so that cse
4857 won't get confused into thinking that these two instructions
4858 are loading in the true address of the symbol. If in the
4859 future a PIC rtx exists, that should be used instead. */
4860 if (TARGET_ARCH64)
4861 {
4862 emit_insn (gen_movdi_high_pic (temp_reg, orig));
4863 emit_insn (gen_movdi_lo_sum_pic (temp_reg, temp_reg, orig));
4864 }
4865 else
4866 {
4867 emit_insn (gen_movsi_high_pic (temp_reg, orig));
4868 emit_insn (gen_movsi_lo_sum_pic (temp_reg, temp_reg, orig));
4869 }
4870
4871 address = temp_reg;
4872 gotdata_op = true;
4873 }
4874 else
4875 address = orig;
4876
4877 crtl->uses_pic_offset_table = 1;
4878 if (gotdata_op)
4879 {
4880 if (TARGET_ARCH64)
4881 insn = emit_insn (gen_movdi_pic_gotdata_op (reg,
4882 pic_offset_table_rtx,
4883 address, orig));
4884 else
4885 insn = emit_insn (gen_movsi_pic_gotdata_op (reg,
4886 pic_offset_table_rtx,
4887 address, orig));
4888 }
4889 else
4890 {
4891 pic_ref
4892 = gen_const_mem (Pmode,
4893 gen_rtx_PLUS (Pmode,
4894 pic_offset_table_rtx, address));
4895 insn = emit_move_insn (reg, pic_ref);
4896 }
4897
4898 /* Put a REG_EQUAL note on this insn, so that it can be optimized
4899 by loop. */
4900 set_unique_reg_note (insn, REG_EQUAL, orig);
4901 return reg;
4902 }
4903 else if (GET_CODE (orig) == CONST)
4904 {
4905 rtx base, offset;
4906
4907 if (GET_CODE (XEXP (orig, 0)) == PLUS
4908 && sparc_pic_register_p (XEXP (XEXP (orig, 0), 0)))
4909 return orig;
4910
4911 if (!reg)
4912 {
4913 gcc_assert (can_create_pseudo_p ());
4914 reg = gen_reg_rtx (Pmode);
4915 }
4916
4917 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
4918 base = sparc_legitimize_pic_address (XEXP (XEXP (orig, 0), 0), reg);
4919 offset = sparc_legitimize_pic_address (XEXP (XEXP (orig, 0), 1),
4920 base == reg ? NULL_RTX : reg);
4921
4922 if (GET_CODE (offset) == CONST_INT)
4923 {
4924 if (SMALL_INT (offset))
4925 return plus_constant (Pmode, base, INTVAL (offset));
4926 else if (can_create_pseudo_p ())
4927 offset = force_reg (Pmode, offset);
4928 else
4929 /* If we reach here, then something is seriously wrong. */
4930 gcc_unreachable ();
4931 }
4932 return gen_rtx_PLUS (Pmode, base, offset);
4933 }
4934 else if (GET_CODE (orig) == LABEL_REF)
4935 /* ??? We ought to be checking that the register is live instead, in case
4936 it is eliminated. */
4937 crtl->uses_pic_offset_table = 1;
4938
4939 return orig;
4940 }
4941
4942 /* Try machine-dependent ways of modifying an illegitimate address X
4943 to be legitimate. If we find one, return the new, valid address.
4944
4945 OLDX is the address as it was before break_out_memory_refs was called.
4946 In some cases it is useful to look at this to decide what needs to be done.
4947
4948 MODE is the mode of the operand pointed to by X.
4949
4950 On SPARC, change REG+N into REG+REG, and REG+(X*Y) into REG+REG. */
4951
4952 static rtx
sparc_legitimize_address(rtx x,rtx oldx ATTRIBUTE_UNUSED,machine_mode mode)4953 sparc_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
4954 machine_mode mode)
4955 {
4956 rtx orig_x = x;
4957
4958 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == MULT)
4959 x = gen_rtx_PLUS (Pmode, XEXP (x, 1),
4960 force_operand (XEXP (x, 0), NULL_RTX));
4961 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 1)) == MULT)
4962 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
4963 force_operand (XEXP (x, 1), NULL_RTX));
4964 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS)
4965 x = gen_rtx_PLUS (Pmode, force_operand (XEXP (x, 0), NULL_RTX),
4966 XEXP (x, 1));
4967 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 1)) == PLUS)
4968 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
4969 force_operand (XEXP (x, 1), NULL_RTX));
4970
4971 if (x != orig_x && sparc_legitimate_address_p (mode, x, FALSE))
4972 return x;
4973
4974 if (sparc_tls_referenced_p (x))
4975 x = sparc_legitimize_tls_address (x);
4976 else if (flag_pic)
4977 x = sparc_legitimize_pic_address (x, NULL_RTX);
4978 else if (GET_CODE (x) == PLUS && CONSTANT_ADDRESS_P (XEXP (x, 1)))
4979 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
4980 copy_to_mode_reg (Pmode, XEXP (x, 1)));
4981 else if (GET_CODE (x) == PLUS && CONSTANT_ADDRESS_P (XEXP (x, 0)))
4982 x = gen_rtx_PLUS (Pmode, XEXP (x, 1),
4983 copy_to_mode_reg (Pmode, XEXP (x, 0)));
4984 else if (GET_CODE (x) == SYMBOL_REF
4985 || GET_CODE (x) == CONST
4986 || GET_CODE (x) == LABEL_REF)
4987 x = copy_to_suggested_reg (x, NULL_RTX, Pmode);
4988
4989 return x;
4990 }
4991
4992 /* Delegitimize an address that was legitimized by the above function. */
4993
4994 static rtx
sparc_delegitimize_address(rtx x)4995 sparc_delegitimize_address (rtx x)
4996 {
4997 x = delegitimize_mem_from_attrs (x);
4998
4999 if (GET_CODE (x) == LO_SUM)
5000 x = XEXP (x, 1);
5001
5002 if (GET_CODE (x) == UNSPEC)
5003 switch (XINT (x, 1))
5004 {
5005 case UNSPEC_MOVE_PIC:
5006 case UNSPEC_TLSLE:
5007 x = XVECEXP (x, 0, 0);
5008 gcc_assert (GET_CODE (x) == SYMBOL_REF);
5009 break;
5010 case UNSPEC_MOVE_GOTDATA:
5011 x = XVECEXP (x, 0, 2);
5012 gcc_assert (GET_CODE (x) == SYMBOL_REF);
5013 break;
5014 default:
5015 break;
5016 }
5017
5018 /* This is generated by mov{si,di}_pic_label_ref in PIC mode. */
5019 if (GET_CODE (x) == MINUS
5020 && (XEXP (x, 0) == got_register_rtx
5021 || sparc_pic_register_p (XEXP (x, 0))))
5022 {
5023 rtx y = XEXP (x, 1);
5024
5025 if (GET_CODE (y) == LO_SUM)
5026 y = XEXP (y, 1);
5027
5028 if (GET_CODE (y) == UNSPEC && XINT (y, 1) == UNSPEC_MOVE_PIC_LABEL)
5029 {
5030 x = XVECEXP (y, 0, 0);
5031 gcc_assert (GET_CODE (x) == LABEL_REF
5032 || (GET_CODE (x) == CONST
5033 && GET_CODE (XEXP (x, 0)) == PLUS
5034 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5035 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT));
5036 }
5037 }
5038
5039 return x;
5040 }
5041
5042 /* SPARC implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
5043 replace the input X, or the original X if no replacement is called for.
5044 The output parameter *WIN is 1 if the calling macro should goto WIN,
5045 0 if it should not.
5046
5047 For SPARC, we wish to handle addresses by splitting them into
5048 HIGH+LO_SUM pairs, retaining the LO_SUM in the memory reference.
5049 This cuts the number of extra insns by one.
5050
5051 Do nothing when generating PIC code and the address is a symbolic
5052 operand or requires a scratch register. */
5053
5054 rtx
sparc_legitimize_reload_address(rtx x,machine_mode mode,int opnum,int type,int ind_levels ATTRIBUTE_UNUSED,int * win)5055 sparc_legitimize_reload_address (rtx x, machine_mode mode,
5056 int opnum, int type,
5057 int ind_levels ATTRIBUTE_UNUSED, int *win)
5058 {
5059 /* Decompose SImode constants into HIGH+LO_SUM. */
5060 if (CONSTANT_P (x)
5061 && (mode != TFmode || TARGET_ARCH64)
5062 && GET_MODE (x) == SImode
5063 && GET_CODE (x) != LO_SUM
5064 && GET_CODE (x) != HIGH
5065 && sparc_code_model <= CM_MEDLOW
5066 && !(flag_pic
5067 && (symbolic_operand (x, Pmode) || pic_address_needs_scratch (x))))
5068 {
5069 x = gen_rtx_LO_SUM (GET_MODE (x), gen_rtx_HIGH (GET_MODE (x), x), x);
5070 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
5071 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
5072 opnum, (enum reload_type)type);
5073 *win = 1;
5074 return x;
5075 }
5076
5077 /* We have to recognize what we have already generated above. */
5078 if (GET_CODE (x) == LO_SUM && GET_CODE (XEXP (x, 0)) == HIGH)
5079 {
5080 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
5081 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
5082 opnum, (enum reload_type)type);
5083 *win = 1;
5084 return x;
5085 }
5086
5087 *win = 0;
5088 return x;
5089 }
5090
5091 /* Return true if ADDR (a legitimate address expression)
5092 has an effect that depends on the machine mode it is used for.
5093
5094 In PIC mode,
5095
5096 (mem:HI [%l7+a])
5097
5098 is not equivalent to
5099
5100 (mem:QI [%l7+a]) (mem:QI [%l7+a+1])
5101
5102 because [%l7+a+1] is interpreted as the address of (a+1). */
5103
5104
5105 static bool
sparc_mode_dependent_address_p(const_rtx addr,addr_space_t as ATTRIBUTE_UNUSED)5106 sparc_mode_dependent_address_p (const_rtx addr,
5107 addr_space_t as ATTRIBUTE_UNUSED)
5108 {
5109 if (GET_CODE (addr) == PLUS
5110 && sparc_pic_register_p (XEXP (addr, 0))
5111 && symbolic_operand (XEXP (addr, 1), VOIDmode))
5112 return true;
5113
5114 return false;
5115 }
5116
5117 /* Emit a call instruction with the pattern given by PAT. ADDR is the
5118 address of the call target. */
5119
5120 void
sparc_emit_call_insn(rtx pat,rtx addr)5121 sparc_emit_call_insn (rtx pat, rtx addr)
5122 {
5123 rtx_insn *insn;
5124
5125 insn = emit_call_insn (pat);
5126
5127 /* The PIC register is live on entry to VxWorks PIC PLT entries. */
5128 if (TARGET_VXWORKS_RTP
5129 && flag_pic
5130 && GET_CODE (addr) == SYMBOL_REF
5131 && (SYMBOL_REF_DECL (addr)
5132 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
5133 : !SYMBOL_REF_LOCAL_P (addr)))
5134 {
5135 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx);
5136 crtl->uses_pic_offset_table = 1;
5137 }
5138 }
5139
5140 /* Return 1 if RTX is a MEM which is known to be aligned to at
5141 least a DESIRED byte boundary. */
5142
5143 int
mem_min_alignment(rtx mem,int desired)5144 mem_min_alignment (rtx mem, int desired)
5145 {
5146 rtx addr, base, offset;
5147
5148 /* If it's not a MEM we can't accept it. */
5149 if (GET_CODE (mem) != MEM)
5150 return 0;
5151
5152 /* Obviously... */
5153 if (!TARGET_UNALIGNED_DOUBLES
5154 && MEM_ALIGN (mem) / BITS_PER_UNIT >= (unsigned)desired)
5155 return 1;
5156
5157 /* ??? The rest of the function predates MEM_ALIGN so
5158 there is probably a bit of redundancy. */
5159 addr = XEXP (mem, 0);
5160 base = offset = NULL_RTX;
5161 if (GET_CODE (addr) == PLUS)
5162 {
5163 if (GET_CODE (XEXP (addr, 0)) == REG)
5164 {
5165 base = XEXP (addr, 0);
5166
5167 /* What we are saying here is that if the base
5168 REG is aligned properly, the compiler will make
5169 sure any REG based index upon it will be so
5170 as well. */
5171 if (GET_CODE (XEXP (addr, 1)) == CONST_INT)
5172 offset = XEXP (addr, 1);
5173 else
5174 offset = const0_rtx;
5175 }
5176 }
5177 else if (GET_CODE (addr) == REG)
5178 {
5179 base = addr;
5180 offset = const0_rtx;
5181 }
5182
5183 if (base != NULL_RTX)
5184 {
5185 int regno = REGNO (base);
5186
5187 if (regno != HARD_FRAME_POINTER_REGNUM && regno != STACK_POINTER_REGNUM)
5188 {
5189 /* Check if the compiler has recorded some information
5190 about the alignment of the base REG. If reload has
5191 completed, we already matched with proper alignments.
5192 If not running global_alloc, reload might give us
5193 unaligned pointer to local stack though. */
5194 if (((cfun != 0
5195 && REGNO_POINTER_ALIGN (regno) >= desired * BITS_PER_UNIT)
5196 || (optimize && reload_completed))
5197 && (INTVAL (offset) & (desired - 1)) == 0)
5198 return 1;
5199 }
5200 else
5201 {
5202 if (((INTVAL (offset) - SPARC_STACK_BIAS) & (desired - 1)) == 0)
5203 return 1;
5204 }
5205 }
5206 else if (! TARGET_UNALIGNED_DOUBLES
5207 || CONSTANT_P (addr)
5208 || GET_CODE (addr) == LO_SUM)
5209 {
5210 /* Anything else we know is properly aligned unless TARGET_UNALIGNED_DOUBLES
5211 is true, in which case we can only assume that an access is aligned if
5212 it is to a constant address, or the address involves a LO_SUM. */
5213 return 1;
5214 }
5215
5216 /* An obviously unaligned address. */
5217 return 0;
5218 }
5219
5220
5221 /* Vectors to keep interesting information about registers where it can easily
5222 be got. We used to use the actual mode value as the bit number, but there
5223 are more than 32 modes now. Instead we use two tables: one indexed by
5224 hard register number, and one indexed by mode. */
5225
5226 /* The purpose of sparc_mode_class is to shrink the range of modes so that
5227 they all fit (as bit numbers) in a 32-bit word (again). Each real mode is
5228 mapped into one sparc_mode_class mode. */
5229
5230 enum sparc_mode_class {
5231 H_MODE, S_MODE, D_MODE, T_MODE, O_MODE,
5232 SF_MODE, DF_MODE, TF_MODE, OF_MODE,
5233 CC_MODE, CCFP_MODE
5234 };
5235
5236 /* Modes for single-word and smaller quantities. */
5237 #define S_MODES \
5238 ((1 << (int) H_MODE) | (1 << (int) S_MODE) | (1 << (int) SF_MODE))
5239
5240 /* Modes for double-word and smaller quantities. */
5241 #define D_MODES (S_MODES | (1 << (int) D_MODE) | (1 << (int) DF_MODE))
5242
5243 /* Modes for quad-word and smaller quantities. */
5244 #define T_MODES (D_MODES | (1 << (int) T_MODE) | (1 << (int) TF_MODE))
5245
5246 /* Modes for 8-word and smaller quantities. */
5247 #define O_MODES (T_MODES | (1 << (int) O_MODE) | (1 << (int) OF_MODE))
5248
5249 /* Modes for single-float quantities. */
5250 #define SF_MODES ((1 << (int) S_MODE) | (1 << (int) SF_MODE))
5251
5252 /* Modes for double-float and smaller quantities. */
5253 #define DF_MODES (SF_MODES | (1 << (int) D_MODE) | (1 << (int) DF_MODE))
5254
5255 /* Modes for quad-float and smaller quantities. */
5256 #define TF_MODES (DF_MODES | (1 << (int) TF_MODE))
5257
5258 /* Modes for quad-float pairs and smaller quantities. */
5259 #define OF_MODES (TF_MODES | (1 << (int) OF_MODE))
5260
5261 /* Modes for double-float only quantities. */
5262 #define DF_MODES_NO_S ((1 << (int) D_MODE) | (1 << (int) DF_MODE))
5263
5264 /* Modes for quad-float and double-float only quantities. */
5265 #define TF_MODES_NO_S (DF_MODES_NO_S | (1 << (int) TF_MODE))
5266
5267 /* Modes for quad-float pairs and double-float only quantities. */
5268 #define OF_MODES_NO_S (TF_MODES_NO_S | (1 << (int) OF_MODE))
5269
5270 /* Modes for condition codes. */
5271 #define CC_MODES (1 << (int) CC_MODE)
5272 #define CCFP_MODES (1 << (int) CCFP_MODE)
5273
5274 /* Value is 1 if register/mode pair is acceptable on sparc.
5275
5276 The funny mixture of D and T modes is because integer operations
5277 do not specially operate on tetra quantities, so non-quad-aligned
5278 registers can hold quadword quantities (except %o4 and %i4 because
5279 they cross fixed registers).
5280
5281 ??? Note that, despite the settings, non-double-aligned parameter
5282 registers can hold double-word quantities in 32-bit mode. */
5283
5284 /* This points to either the 32-bit or the 64-bit version. */
5285 static const int *hard_regno_mode_classes;
5286
5287 static const int hard_32bit_mode_classes[] = {
5288 S_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES,
5289 T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES,
5290 T_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES,
5291 T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES,
5292
5293 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
5294 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
5295 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
5296 OF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
5297
5298 /* FP regs f32 to f63. Only the even numbered registers actually exist,
5299 and none can hold SFmode/SImode values. */
5300 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5301 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5302 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5303 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, TF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5304
5305 /* %fcc[0123] */
5306 CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES,
5307
5308 /* %icc, %sfp, %gsr */
5309 CC_MODES, 0, D_MODES
5310 };
5311
5312 static const int hard_64bit_mode_classes[] = {
5313 D_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
5314 O_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
5315 T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
5316 O_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
5317
5318 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
5319 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
5320 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
5321 OF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
5322
5323 /* FP regs f32 to f63. Only the even numbered registers actually exist,
5324 and none can hold SFmode/SImode values. */
5325 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5326 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5327 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5328 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, TF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5329
5330 /* %fcc[0123] */
5331 CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES,
5332
5333 /* %icc, %sfp, %gsr */
5334 CC_MODES, 0, D_MODES
5335 };
5336
5337 static int sparc_mode_class [NUM_MACHINE_MODES];
5338
5339 enum reg_class sparc_regno_reg_class[FIRST_PSEUDO_REGISTER];
5340
5341 static void
sparc_init_modes(void)5342 sparc_init_modes (void)
5343 {
5344 int i;
5345
5346 for (i = 0; i < NUM_MACHINE_MODES; i++)
5347 {
5348 machine_mode m = (machine_mode) i;
5349 unsigned int size = GET_MODE_SIZE (m);
5350
5351 switch (GET_MODE_CLASS (m))
5352 {
5353 case MODE_INT:
5354 case MODE_PARTIAL_INT:
5355 case MODE_COMPLEX_INT:
5356 if (size < 4)
5357 sparc_mode_class[i] = 1 << (int) H_MODE;
5358 else if (size == 4)
5359 sparc_mode_class[i] = 1 << (int) S_MODE;
5360 else if (size == 8)
5361 sparc_mode_class[i] = 1 << (int) D_MODE;
5362 else if (size == 16)
5363 sparc_mode_class[i] = 1 << (int) T_MODE;
5364 else if (size == 32)
5365 sparc_mode_class[i] = 1 << (int) O_MODE;
5366 else
5367 sparc_mode_class[i] = 0;
5368 break;
5369 case MODE_VECTOR_INT:
5370 if (size == 4)
5371 sparc_mode_class[i] = 1 << (int) SF_MODE;
5372 else if (size == 8)
5373 sparc_mode_class[i] = 1 << (int) DF_MODE;
5374 else
5375 sparc_mode_class[i] = 0;
5376 break;
5377 case MODE_FLOAT:
5378 case MODE_COMPLEX_FLOAT:
5379 if (size == 4)
5380 sparc_mode_class[i] = 1 << (int) SF_MODE;
5381 else if (size == 8)
5382 sparc_mode_class[i] = 1 << (int) DF_MODE;
5383 else if (size == 16)
5384 sparc_mode_class[i] = 1 << (int) TF_MODE;
5385 else if (size == 32)
5386 sparc_mode_class[i] = 1 << (int) OF_MODE;
5387 else
5388 sparc_mode_class[i] = 0;
5389 break;
5390 case MODE_CC:
5391 if (m == CCFPmode || m == CCFPEmode)
5392 sparc_mode_class[i] = 1 << (int) CCFP_MODE;
5393 else
5394 sparc_mode_class[i] = 1 << (int) CC_MODE;
5395 break;
5396 default:
5397 sparc_mode_class[i] = 0;
5398 break;
5399 }
5400 }
5401
5402 if (TARGET_ARCH64)
5403 hard_regno_mode_classes = hard_64bit_mode_classes;
5404 else
5405 hard_regno_mode_classes = hard_32bit_mode_classes;
5406
5407 /* Initialize the array used by REGNO_REG_CLASS. */
5408 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
5409 {
5410 if (i < 16 && TARGET_V8PLUS)
5411 sparc_regno_reg_class[i] = I64_REGS;
5412 else if (i < 32 || i == FRAME_POINTER_REGNUM)
5413 sparc_regno_reg_class[i] = GENERAL_REGS;
5414 else if (i < 64)
5415 sparc_regno_reg_class[i] = FP_REGS;
5416 else if (i < 96)
5417 sparc_regno_reg_class[i] = EXTRA_FP_REGS;
5418 else if (i < 100)
5419 sparc_regno_reg_class[i] = FPCC_REGS;
5420 else
5421 sparc_regno_reg_class[i] = NO_REGS;
5422 }
5423 }
5424
5425 /* Return whether REGNO, a global or FP register, must be saved/restored. */
5426
5427 static inline bool
save_global_or_fp_reg_p(unsigned int regno,int leaf_function ATTRIBUTE_UNUSED)5428 save_global_or_fp_reg_p (unsigned int regno,
5429 int leaf_function ATTRIBUTE_UNUSED)
5430 {
5431 return !call_used_or_fixed_reg_p (regno) && df_regs_ever_live_p (regno);
5432 }
5433
5434 /* Return whether the return address register (%i7) is needed. */
5435
5436 static inline bool
return_addr_reg_needed_p(int leaf_function)5437 return_addr_reg_needed_p (int leaf_function)
5438 {
5439 /* If it is live, for example because of __builtin_return_address (0). */
5440 if (df_regs_ever_live_p (RETURN_ADDR_REGNUM))
5441 return true;
5442
5443 /* Otherwise, it is needed as save register if %o7 is clobbered. */
5444 if (!leaf_function
5445 /* Loading the GOT register clobbers %o7. */
5446 || crtl->uses_pic_offset_table
5447 || df_regs_ever_live_p (INCOMING_RETURN_ADDR_REGNUM))
5448 return true;
5449
5450 return false;
5451 }
5452
5453 /* Return whether REGNO, a local or in register, must be saved/restored. */
5454
5455 static bool
save_local_or_in_reg_p(unsigned int regno,int leaf_function)5456 save_local_or_in_reg_p (unsigned int regno, int leaf_function)
5457 {
5458 /* General case: call-saved registers live at some point. */
5459 if (!call_used_or_fixed_reg_p (regno) && df_regs_ever_live_p (regno))
5460 return true;
5461
5462 /* Frame pointer register (%fp) if needed. */
5463 if (regno == HARD_FRAME_POINTER_REGNUM && frame_pointer_needed)
5464 return true;
5465
5466 /* Return address register (%i7) if needed. */
5467 if (regno == RETURN_ADDR_REGNUM && return_addr_reg_needed_p (leaf_function))
5468 return true;
5469
5470 /* GOT register (%l7) if needed. */
5471 if (got_register_rtx && regno == REGNO (got_register_rtx))
5472 return true;
5473
5474 /* If the function accesses prior frames, the frame pointer and the return
5475 address of the previous frame must be saved on the stack. */
5476 if (crtl->accesses_prior_frames
5477 && (regno == HARD_FRAME_POINTER_REGNUM || regno == RETURN_ADDR_REGNUM))
5478 return true;
5479
5480 return false;
5481 }
5482
5483 /* Compute the frame size required by the function. This function is called
5484 during the reload pass and also by sparc_expand_prologue. */
5485
5486 static HOST_WIDE_INT
sparc_compute_frame_size(HOST_WIDE_INT size,int leaf_function)5487 sparc_compute_frame_size (HOST_WIDE_INT size, int leaf_function)
5488 {
5489 HOST_WIDE_INT frame_size, apparent_frame_size;
5490 int args_size, n_global_fp_regs = 0;
5491 bool save_local_in_regs_p = false;
5492 unsigned int i;
5493
5494 /* If the function allocates dynamic stack space, the dynamic offset is
5495 computed early and contains REG_PARM_STACK_SPACE, so we need to cope. */
5496 if (leaf_function && !cfun->calls_alloca)
5497 args_size = 0;
5498 else
5499 args_size = crtl->outgoing_args_size + REG_PARM_STACK_SPACE (cfun->decl);
5500
5501 /* Calculate space needed for global registers. */
5502 if (TARGET_ARCH64)
5503 {
5504 for (i = 0; i < 8; i++)
5505 if (save_global_or_fp_reg_p (i, 0))
5506 n_global_fp_regs += 2;
5507 }
5508 else
5509 {
5510 for (i = 0; i < 8; i += 2)
5511 if (save_global_or_fp_reg_p (i, 0)
5512 || save_global_or_fp_reg_p (i + 1, 0))
5513 n_global_fp_regs += 2;
5514 }
5515
5516 /* In the flat window model, find out which local and in registers need to
5517 be saved. We don't reserve space in the current frame for them as they
5518 will be spilled into the register window save area of the caller's frame.
5519 However, as soon as we use this register window save area, we must create
5520 that of the current frame to make it the live one. */
5521 if (TARGET_FLAT)
5522 for (i = 16; i < 32; i++)
5523 if (save_local_or_in_reg_p (i, leaf_function))
5524 {
5525 save_local_in_regs_p = true;
5526 break;
5527 }
5528
5529 /* Calculate space needed for FP registers. */
5530 for (i = 32; i < (TARGET_V9 ? 96 : 64); i += 2)
5531 if (save_global_or_fp_reg_p (i, 0) || save_global_or_fp_reg_p (i + 1, 0))
5532 n_global_fp_regs += 2;
5533
5534 if (size == 0
5535 && n_global_fp_regs == 0
5536 && args_size == 0
5537 && !save_local_in_regs_p)
5538 frame_size = apparent_frame_size = 0;
5539 else
5540 {
5541 /* Start from the apparent frame size. */
5542 apparent_frame_size = ROUND_UP (size, 8) + n_global_fp_regs * 4;
5543
5544 /* We need to add the size of the outgoing argument area. */
5545 frame_size = apparent_frame_size + ROUND_UP (args_size, 8);
5546
5547 /* And that of the register window save area. */
5548 frame_size += FIRST_PARM_OFFSET (cfun->decl);
5549
5550 /* Finally, bump to the appropriate alignment. */
5551 frame_size = SPARC_STACK_ALIGN (frame_size);
5552 }
5553
5554 /* Set up values for use in prologue and epilogue. */
5555 sparc_frame_size = frame_size;
5556 sparc_apparent_frame_size = apparent_frame_size;
5557 sparc_n_global_fp_regs = n_global_fp_regs;
5558 sparc_save_local_in_regs_p = save_local_in_regs_p;
5559
5560 return frame_size;
5561 }
5562
5563 /* Implement the macro INITIAL_ELIMINATION_OFFSET, return the OFFSET. */
5564
5565 int
sparc_initial_elimination_offset(int to)5566 sparc_initial_elimination_offset (int to)
5567 {
5568 int offset;
5569
5570 if (to == STACK_POINTER_REGNUM)
5571 offset = sparc_compute_frame_size (get_frame_size (), crtl->is_leaf);
5572 else
5573 offset = 0;
5574
5575 offset += SPARC_STACK_BIAS;
5576 return offset;
5577 }
5578
5579 /* Output any necessary .register pseudo-ops. */
5580
5581 void
sparc_output_scratch_registers(FILE * file ATTRIBUTE_UNUSED)5582 sparc_output_scratch_registers (FILE *file ATTRIBUTE_UNUSED)
5583 {
5584 int i;
5585
5586 if (TARGET_ARCH32)
5587 return;
5588
5589 /* Check if %g[2367] were used without
5590 .register being printed for them already. */
5591 for (i = 2; i < 8; i++)
5592 {
5593 if (df_regs_ever_live_p (i)
5594 && ! sparc_hard_reg_printed [i])
5595 {
5596 sparc_hard_reg_printed [i] = 1;
5597 /* %g7 is used as TLS base register, use #ignore
5598 for it instead of #scratch. */
5599 fprintf (file, "\t.register\t%%g%d, #%s\n", i,
5600 i == 7 ? "ignore" : "scratch");
5601 }
5602 if (i == 3) i = 5;
5603 }
5604 }
5605
5606 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
5607
5608 #if PROBE_INTERVAL > 4096
5609 #error Cannot use indexed addressing mode for stack probing
5610 #endif
5611
5612 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
5613 inclusive. These are offsets from the current stack pointer.
5614
5615 Note that we don't use the REG+REG addressing mode for the probes because
5616 of the stack bias in 64-bit mode. And it doesn't really buy us anything
5617 so the advantages of having a single code win here. */
5618
5619 static void
sparc_emit_probe_stack_range(HOST_WIDE_INT first,HOST_WIDE_INT size)5620 sparc_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
5621 {
5622 rtx g1 = gen_rtx_REG (Pmode, 1);
5623
5624 /* See if we have a constant small number of probes to generate. If so,
5625 that's the easy case. */
5626 if (size <= PROBE_INTERVAL)
5627 {
5628 emit_move_insn (g1, GEN_INT (first));
5629 emit_insn (gen_rtx_SET (g1,
5630 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
5631 emit_stack_probe (plus_constant (Pmode, g1, -size));
5632 }
5633
5634 /* The run-time loop is made up of 9 insns in the generic case while the
5635 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */
5636 else if (size <= 4 * PROBE_INTERVAL)
5637 {
5638 HOST_WIDE_INT i;
5639
5640 emit_move_insn (g1, GEN_INT (first + PROBE_INTERVAL));
5641 emit_insn (gen_rtx_SET (g1,
5642 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
5643 emit_stack_probe (g1);
5644
5645 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
5646 it exceeds SIZE. If only two probes are needed, this will not
5647 generate any code. Then probe at FIRST + SIZE. */
5648 for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
5649 {
5650 emit_insn (gen_rtx_SET (g1,
5651 plus_constant (Pmode, g1, -PROBE_INTERVAL)));
5652 emit_stack_probe (g1);
5653 }
5654
5655 emit_stack_probe (plus_constant (Pmode, g1,
5656 (i - PROBE_INTERVAL) - size));
5657 }
5658
5659 /* Otherwise, do the same as above, but in a loop. Note that we must be
5660 extra careful with variables wrapping around because we might be at
5661 the very top (or the very bottom) of the address space and we have
5662 to be able to handle this case properly; in particular, we use an
5663 equality test for the loop condition. */
5664 else
5665 {
5666 HOST_WIDE_INT rounded_size;
5667 rtx g4 = gen_rtx_REG (Pmode, 4);
5668
5669 emit_move_insn (g1, GEN_INT (first));
5670
5671
5672 /* Step 1: round SIZE to the previous multiple of the interval. */
5673
5674 rounded_size = ROUND_DOWN (size, PROBE_INTERVAL);
5675 emit_move_insn (g4, GEN_INT (rounded_size));
5676
5677
5678 /* Step 2: compute initial and final value of the loop counter. */
5679
5680 /* TEST_ADDR = SP + FIRST. */
5681 emit_insn (gen_rtx_SET (g1,
5682 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
5683
5684 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
5685 emit_insn (gen_rtx_SET (g4, gen_rtx_MINUS (Pmode, g1, g4)));
5686
5687
5688 /* Step 3: the loop
5689
5690 while (TEST_ADDR != LAST_ADDR)
5691 {
5692 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
5693 probe at TEST_ADDR
5694 }
5695
5696 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
5697 until it is equal to ROUNDED_SIZE. */
5698
5699 if (TARGET_ARCH64)
5700 emit_insn (gen_probe_stack_rangedi (g1, g1, g4));
5701 else
5702 emit_insn (gen_probe_stack_rangesi (g1, g1, g4));
5703
5704
5705 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
5706 that SIZE is equal to ROUNDED_SIZE. */
5707
5708 if (size != rounded_size)
5709 emit_stack_probe (plus_constant (Pmode, g4, rounded_size - size));
5710 }
5711
5712 /* Make sure nothing is scheduled before we are done. */
5713 emit_insn (gen_blockage ());
5714 }
5715
5716 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
5717 absolute addresses. */
5718
5719 const char *
output_probe_stack_range(rtx reg1,rtx reg2)5720 output_probe_stack_range (rtx reg1, rtx reg2)
5721 {
5722 static int labelno = 0;
5723 char loop_lab[32];
5724 rtx xops[2];
5725
5726 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
5727
5728 /* Loop. */
5729 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
5730
5731 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
5732 xops[0] = reg1;
5733 xops[1] = GEN_INT (-PROBE_INTERVAL);
5734 output_asm_insn ("add\t%0, %1, %0", xops);
5735
5736 /* Test if TEST_ADDR == LAST_ADDR. */
5737 xops[1] = reg2;
5738 output_asm_insn ("cmp\t%0, %1", xops);
5739
5740 /* Probe at TEST_ADDR and branch. */
5741 if (TARGET_ARCH64)
5742 fputs ("\tbne,pt\t%xcc,", asm_out_file);
5743 else
5744 fputs ("\tbne\t", asm_out_file);
5745 assemble_name_raw (asm_out_file, loop_lab);
5746 fputc ('\n', asm_out_file);
5747 xops[1] = GEN_INT (SPARC_STACK_BIAS);
5748 output_asm_insn (" st\t%%g0, [%0+%1]", xops);
5749
5750 return "";
5751 }
5752
5753 /* Emit code to save/restore registers from LOW to HIGH at BASE+OFFSET as
5754 needed. LOW is supposed to be double-word aligned for 32-bit registers.
5755 SAVE_P decides whether a register must be saved/restored. ACTION_TRUE
5756 is the action to be performed if SAVE_P returns true and ACTION_FALSE
5757 the action to be performed if it returns false. Return the new offset. */
5758
5759 typedef bool (*sorr_pred_t) (unsigned int, int);
5760 typedef enum { SORR_NONE, SORR_ADVANCE, SORR_SAVE, SORR_RESTORE } sorr_act_t;
5761
5762 static int
emit_save_or_restore_regs(unsigned int low,unsigned int high,rtx base,int offset,int leaf_function,sorr_pred_t save_p,sorr_act_t action_true,sorr_act_t action_false)5763 emit_save_or_restore_regs (unsigned int low, unsigned int high, rtx base,
5764 int offset, int leaf_function, sorr_pred_t save_p,
5765 sorr_act_t action_true, sorr_act_t action_false)
5766 {
5767 unsigned int i;
5768 rtx mem;
5769 rtx_insn *insn;
5770
5771 if (TARGET_ARCH64 && high <= 32)
5772 {
5773 int fp_offset = -1;
5774
5775 for (i = low; i < high; i++)
5776 {
5777 if (save_p (i, leaf_function))
5778 {
5779 mem = gen_frame_mem (DImode, plus_constant (Pmode,
5780 base, offset));
5781 if (action_true == SORR_SAVE)
5782 {
5783 insn = emit_move_insn (mem, gen_rtx_REG (DImode, i));
5784 RTX_FRAME_RELATED_P (insn) = 1;
5785 }
5786 else /* action_true == SORR_RESTORE */
5787 {
5788 /* The frame pointer must be restored last since its old
5789 value may be used as base address for the frame. This
5790 is problematic in 64-bit mode only because of the lack
5791 of double-word load instruction. */
5792 if (i == HARD_FRAME_POINTER_REGNUM)
5793 fp_offset = offset;
5794 else
5795 emit_move_insn (gen_rtx_REG (DImode, i), mem);
5796 }
5797 offset += 8;
5798 }
5799 else if (action_false == SORR_ADVANCE)
5800 offset += 8;
5801 }
5802
5803 if (fp_offset >= 0)
5804 {
5805 mem = gen_frame_mem (DImode, plus_constant (Pmode, base, fp_offset));
5806 emit_move_insn (hard_frame_pointer_rtx, mem);
5807 }
5808 }
5809 else
5810 {
5811 for (i = low; i < high; i += 2)
5812 {
5813 bool reg0 = save_p (i, leaf_function);
5814 bool reg1 = save_p (i + 1, leaf_function);
5815 machine_mode mode;
5816 int regno;
5817
5818 if (reg0 && reg1)
5819 {
5820 mode = SPARC_INT_REG_P (i) ? E_DImode : E_DFmode;
5821 regno = i;
5822 }
5823 else if (reg0)
5824 {
5825 mode = SPARC_INT_REG_P (i) ? E_SImode : E_SFmode;
5826 regno = i;
5827 }
5828 else if (reg1)
5829 {
5830 mode = SPARC_INT_REG_P (i) ? E_SImode : E_SFmode;
5831 regno = i + 1;
5832 offset += 4;
5833 }
5834 else
5835 {
5836 if (action_false == SORR_ADVANCE)
5837 offset += 8;
5838 continue;
5839 }
5840
5841 mem = gen_frame_mem (mode, plus_constant (Pmode, base, offset));
5842 if (action_true == SORR_SAVE)
5843 {
5844 insn = emit_move_insn (mem, gen_rtx_REG (mode, regno));
5845 RTX_FRAME_RELATED_P (insn) = 1;
5846 if (mode == DImode)
5847 {
5848 rtx set1, set2;
5849 mem = gen_frame_mem (SImode, plus_constant (Pmode, base,
5850 offset));
5851 set1 = gen_rtx_SET (mem, gen_rtx_REG (SImode, regno));
5852 RTX_FRAME_RELATED_P (set1) = 1;
5853 mem
5854 = gen_frame_mem (SImode, plus_constant (Pmode, base,
5855 offset + 4));
5856 set2 = gen_rtx_SET (mem, gen_rtx_REG (SImode, regno + 1));
5857 RTX_FRAME_RELATED_P (set2) = 1;
5858 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
5859 gen_rtx_PARALLEL (VOIDmode,
5860 gen_rtvec (2, set1, set2)));
5861 }
5862 }
5863 else /* action_true == SORR_RESTORE */
5864 emit_move_insn (gen_rtx_REG (mode, regno), mem);
5865
5866 /* Bump and round down to double word
5867 in case we already bumped by 4. */
5868 offset = ROUND_DOWN (offset + 8, 8);
5869 }
5870 }
5871
5872 return offset;
5873 }
5874
5875 /* Emit code to adjust BASE to OFFSET. Return the new base. */
5876
5877 static rtx
emit_adjust_base_to_offset(rtx base,int offset)5878 emit_adjust_base_to_offset (rtx base, int offset)
5879 {
5880 /* ??? This might be optimized a little as %g1 might already have a
5881 value close enough that a single add insn will do. */
5882 /* ??? Although, all of this is probably only a temporary fix because
5883 if %g1 can hold a function result, then sparc_expand_epilogue will
5884 lose (the result will be clobbered). */
5885 rtx new_base = gen_rtx_REG (Pmode, 1);
5886 emit_move_insn (new_base, GEN_INT (offset));
5887 emit_insn (gen_rtx_SET (new_base, gen_rtx_PLUS (Pmode, base, new_base)));
5888 return new_base;
5889 }
5890
5891 /* Emit code to save/restore call-saved global and FP registers. */
5892
5893 static void
emit_save_or_restore_global_fp_regs(rtx base,int offset,sorr_act_t action)5894 emit_save_or_restore_global_fp_regs (rtx base, int offset, sorr_act_t action)
5895 {
5896 if (offset < -4096 || offset + sparc_n_global_fp_regs * 4 > 4095)
5897 {
5898 base = emit_adjust_base_to_offset (base, offset);
5899 offset = 0;
5900 }
5901
5902 offset
5903 = emit_save_or_restore_regs (0, 8, base, offset, 0,
5904 save_global_or_fp_reg_p, action, SORR_NONE);
5905 emit_save_or_restore_regs (32, TARGET_V9 ? 96 : 64, base, offset, 0,
5906 save_global_or_fp_reg_p, action, SORR_NONE);
5907 }
5908
5909 /* Emit code to save/restore call-saved local and in registers. */
5910
5911 static void
emit_save_or_restore_local_in_regs(rtx base,int offset,sorr_act_t action)5912 emit_save_or_restore_local_in_regs (rtx base, int offset, sorr_act_t action)
5913 {
5914 if (offset < -4096 || offset + 16 * UNITS_PER_WORD > 4095)
5915 {
5916 base = emit_adjust_base_to_offset (base, offset);
5917 offset = 0;
5918 }
5919
5920 emit_save_or_restore_regs (16, 32, base, offset, sparc_leaf_function_p,
5921 save_local_or_in_reg_p, action, SORR_ADVANCE);
5922 }
5923
5924 /* Emit a window_save insn. */
5925
5926 static rtx_insn *
emit_window_save(rtx increment)5927 emit_window_save (rtx increment)
5928 {
5929 rtx_insn *insn = emit_insn (gen_window_save (increment));
5930 RTX_FRAME_RELATED_P (insn) = 1;
5931
5932 /* The incoming return address (%o7) is saved in %i7. */
5933 add_reg_note (insn, REG_CFA_REGISTER,
5934 gen_rtx_SET (gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM),
5935 gen_rtx_REG (Pmode,
5936 INCOMING_RETURN_ADDR_REGNUM)));
5937
5938 /* The window save event. */
5939 add_reg_note (insn, REG_CFA_WINDOW_SAVE, const0_rtx);
5940
5941 /* The CFA is %fp, the hard frame pointer. */
5942 add_reg_note (insn, REG_CFA_DEF_CFA,
5943 plus_constant (Pmode, hard_frame_pointer_rtx,
5944 INCOMING_FRAME_SP_OFFSET));
5945
5946 return insn;
5947 }
5948
5949 /* Generate an increment for the stack pointer. */
5950
5951 static rtx
gen_stack_pointer_inc(rtx increment)5952 gen_stack_pointer_inc (rtx increment)
5953 {
5954 return gen_rtx_SET (stack_pointer_rtx,
5955 gen_rtx_PLUS (Pmode,
5956 stack_pointer_rtx,
5957 increment));
5958 }
5959
5960 /* Expand the function prologue. The prologue is responsible for reserving
5961 storage for the frame, saving the call-saved registers and loading the
5962 GOT register if needed. */
5963
5964 void
sparc_expand_prologue(void)5965 sparc_expand_prologue (void)
5966 {
5967 HOST_WIDE_INT size;
5968 rtx_insn *insn;
5969
5970 /* Compute a snapshot of crtl->uses_only_leaf_regs. Relying
5971 on the final value of the flag means deferring the prologue/epilogue
5972 expansion until just before the second scheduling pass, which is too
5973 late to emit multiple epilogues or return insns.
5974
5975 Of course we are making the assumption that the value of the flag
5976 will not change between now and its final value. Of the three parts
5977 of the formula, only the last one can reasonably vary. Let's take a
5978 closer look, after assuming that the first two ones are set to true
5979 (otherwise the last value is effectively silenced).
5980
5981 If only_leaf_regs_used returns false, the global predicate will also
5982 be false so the actual frame size calculated below will be positive.
5983 As a consequence, the save_register_window insn will be emitted in
5984 the instruction stream; now this insn explicitly references %fp
5985 which is not a leaf register so only_leaf_regs_used will always
5986 return false subsequently.
5987
5988 If only_leaf_regs_used returns true, we hope that the subsequent
5989 optimization passes won't cause non-leaf registers to pop up. For
5990 example, the regrename pass has special provisions to not rename to
5991 non-leaf registers in a leaf function. */
5992 sparc_leaf_function_p
5993 = optimize > 0 && crtl->is_leaf && only_leaf_regs_used ();
5994
5995 size = sparc_compute_frame_size (get_frame_size(), sparc_leaf_function_p);
5996
5997 if (flag_stack_usage_info)
5998 current_function_static_stack_size = size;
5999
6000 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
6001 || flag_stack_clash_protection)
6002 {
6003 if (crtl->is_leaf && !cfun->calls_alloca)
6004 {
6005 if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
6006 sparc_emit_probe_stack_range (get_stack_check_protect (),
6007 size - get_stack_check_protect ());
6008 }
6009 else if (size > 0)
6010 sparc_emit_probe_stack_range (get_stack_check_protect (), size);
6011 }
6012
6013 if (size == 0)
6014 ; /* do nothing. */
6015 else if (sparc_leaf_function_p)
6016 {
6017 rtx size_int_rtx = GEN_INT (-size);
6018
6019 if (size <= 4096)
6020 insn = emit_insn (gen_stack_pointer_inc (size_int_rtx));
6021 else if (size <= 8192)
6022 {
6023 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (-4096)));
6024 RTX_FRAME_RELATED_P (insn) = 1;
6025
6026 /* %sp is still the CFA register. */
6027 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
6028 }
6029 else
6030 {
6031 rtx size_rtx = gen_rtx_REG (Pmode, 1);
6032 emit_move_insn (size_rtx, size_int_rtx);
6033 insn = emit_insn (gen_stack_pointer_inc (size_rtx));
6034 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
6035 gen_stack_pointer_inc (size_int_rtx));
6036 }
6037
6038 RTX_FRAME_RELATED_P (insn) = 1;
6039 }
6040 else
6041 {
6042 rtx size_int_rtx = GEN_INT (-size);
6043
6044 if (size <= 4096)
6045 emit_window_save (size_int_rtx);
6046 else if (size <= 8192)
6047 {
6048 emit_window_save (GEN_INT (-4096));
6049
6050 /* %sp is not the CFA register anymore. */
6051 emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
6052
6053 /* Make sure no %fp-based store is issued until after the frame is
6054 established. The offset between the frame pointer and the stack
6055 pointer is calculated relative to the value of the stack pointer
6056 at the end of the function prologue, and moving instructions that
6057 access the stack via the frame pointer between the instructions
6058 that decrement the stack pointer could result in accessing the
6059 register window save area, which is volatile. */
6060 emit_insn (gen_frame_blockage ());
6061 }
6062 else
6063 {
6064 rtx size_rtx = gen_rtx_REG (Pmode, 1);
6065 emit_move_insn (size_rtx, size_int_rtx);
6066 emit_window_save (size_rtx);
6067 }
6068 }
6069
6070 if (sparc_leaf_function_p)
6071 {
6072 sparc_frame_base_reg = stack_pointer_rtx;
6073 sparc_frame_base_offset = size + SPARC_STACK_BIAS;
6074 }
6075 else
6076 {
6077 sparc_frame_base_reg = hard_frame_pointer_rtx;
6078 sparc_frame_base_offset = SPARC_STACK_BIAS;
6079 }
6080
6081 if (sparc_n_global_fp_regs > 0)
6082 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
6083 sparc_frame_base_offset
6084 - sparc_apparent_frame_size,
6085 SORR_SAVE);
6086
6087 /* Advertise that the data calculated just above are now valid. */
6088 sparc_prologue_data_valid_p = true;
6089 }
6090
6091 /* Expand the function prologue. The prologue is responsible for reserving
6092 storage for the frame, saving the call-saved registers and loading the
6093 GOT register if needed. */
6094
6095 void
sparc_flat_expand_prologue(void)6096 sparc_flat_expand_prologue (void)
6097 {
6098 HOST_WIDE_INT size;
6099 rtx_insn *insn;
6100
6101 sparc_leaf_function_p = optimize > 0 && crtl->is_leaf;
6102
6103 size = sparc_compute_frame_size (get_frame_size(), sparc_leaf_function_p);
6104
6105 if (flag_stack_usage_info)
6106 current_function_static_stack_size = size;
6107
6108 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
6109 || flag_stack_clash_protection)
6110 {
6111 if (crtl->is_leaf && !cfun->calls_alloca)
6112 {
6113 if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
6114 sparc_emit_probe_stack_range (get_stack_check_protect (),
6115 size - get_stack_check_protect ());
6116 }
6117 else if (size > 0)
6118 sparc_emit_probe_stack_range (get_stack_check_protect (), size);
6119 }
6120
6121 if (sparc_save_local_in_regs_p)
6122 emit_save_or_restore_local_in_regs (stack_pointer_rtx, SPARC_STACK_BIAS,
6123 SORR_SAVE);
6124
6125 if (size == 0)
6126 ; /* do nothing. */
6127 else
6128 {
6129 rtx size_int_rtx, size_rtx;
6130
6131 size_rtx = size_int_rtx = GEN_INT (-size);
6132
6133 /* We establish the frame (i.e. decrement the stack pointer) first, even
6134 if we use a frame pointer, because we cannot clobber any call-saved
6135 registers, including the frame pointer, if we haven't created a new
6136 register save area, for the sake of compatibility with the ABI. */
6137 if (size <= 4096)
6138 insn = emit_insn (gen_stack_pointer_inc (size_int_rtx));
6139 else if (size <= 8192 && !frame_pointer_needed)
6140 {
6141 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (-4096)));
6142 RTX_FRAME_RELATED_P (insn) = 1;
6143 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
6144 }
6145 else
6146 {
6147 size_rtx = gen_rtx_REG (Pmode, 1);
6148 emit_move_insn (size_rtx, size_int_rtx);
6149 insn = emit_insn (gen_stack_pointer_inc (size_rtx));
6150 add_reg_note (insn, REG_CFA_ADJUST_CFA,
6151 gen_stack_pointer_inc (size_int_rtx));
6152 }
6153 RTX_FRAME_RELATED_P (insn) = 1;
6154
6155 /* Ensure nothing is scheduled until after the frame is established. */
6156 emit_insn (gen_blockage ());
6157
6158 if (frame_pointer_needed)
6159 {
6160 insn = emit_insn (gen_rtx_SET (hard_frame_pointer_rtx,
6161 gen_rtx_MINUS (Pmode,
6162 stack_pointer_rtx,
6163 size_rtx)));
6164 RTX_FRAME_RELATED_P (insn) = 1;
6165
6166 add_reg_note (insn, REG_CFA_ADJUST_CFA,
6167 gen_rtx_SET (hard_frame_pointer_rtx,
6168 plus_constant (Pmode, stack_pointer_rtx,
6169 size)));
6170 }
6171
6172 if (return_addr_reg_needed_p (sparc_leaf_function_p))
6173 {
6174 rtx o7 = gen_rtx_REG (Pmode, INCOMING_RETURN_ADDR_REGNUM);
6175 rtx i7 = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
6176
6177 insn = emit_move_insn (i7, o7);
6178 RTX_FRAME_RELATED_P (insn) = 1;
6179
6180 add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (i7, o7));
6181
6182 /* Prevent this instruction from ever being considered dead,
6183 even if this function has no epilogue. */
6184 emit_use (i7);
6185 }
6186 }
6187
6188 if (frame_pointer_needed)
6189 {
6190 sparc_frame_base_reg = hard_frame_pointer_rtx;
6191 sparc_frame_base_offset = SPARC_STACK_BIAS;
6192 }
6193 else
6194 {
6195 sparc_frame_base_reg = stack_pointer_rtx;
6196 sparc_frame_base_offset = size + SPARC_STACK_BIAS;
6197 }
6198
6199 if (sparc_n_global_fp_regs > 0)
6200 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
6201 sparc_frame_base_offset
6202 - sparc_apparent_frame_size,
6203 SORR_SAVE);
6204
6205 /* Advertise that the data calculated just above are now valid. */
6206 sparc_prologue_data_valid_p = true;
6207 }
6208
6209 /* This function generates the assembly code for function entry, which boils
6210 down to emitting the necessary .register directives. */
6211
6212 static void
sparc_asm_function_prologue(FILE * file)6213 sparc_asm_function_prologue (FILE *file)
6214 {
6215 /* Check that the assumption we made in sparc_expand_prologue is valid. */
6216 if (!TARGET_FLAT)
6217 gcc_assert (sparc_leaf_function_p == crtl->uses_only_leaf_regs);
6218
6219 sparc_output_scratch_registers (file);
6220 }
6221
6222 /* Expand the function epilogue, either normal or part of a sibcall.
6223 We emit all the instructions except the return or the call. */
6224
6225 void
sparc_expand_epilogue(bool for_eh)6226 sparc_expand_epilogue (bool for_eh)
6227 {
6228 HOST_WIDE_INT size = sparc_frame_size;
6229
6230 if (cfun->calls_alloca)
6231 emit_insn (gen_frame_blockage ());
6232
6233 if (sparc_n_global_fp_regs > 0)
6234 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
6235 sparc_frame_base_offset
6236 - sparc_apparent_frame_size,
6237 SORR_RESTORE);
6238
6239 if (size == 0 || for_eh)
6240 ; /* do nothing. */
6241 else if (sparc_leaf_function_p)
6242 {
6243 if (size <= 4096)
6244 emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
6245 else if (size <= 8192)
6246 {
6247 emit_insn (gen_stack_pointer_inc (GEN_INT (4096)));
6248 emit_insn (gen_stack_pointer_inc (GEN_INT (size - 4096)));
6249 }
6250 else
6251 {
6252 rtx reg = gen_rtx_REG (Pmode, 1);
6253 emit_move_insn (reg, GEN_INT (size));
6254 emit_insn (gen_stack_pointer_inc (reg));
6255 }
6256 }
6257 }
6258
6259 /* Expand the function epilogue, either normal or part of a sibcall.
6260 We emit all the instructions except the return or the call. */
6261
6262 void
sparc_flat_expand_epilogue(bool for_eh)6263 sparc_flat_expand_epilogue (bool for_eh)
6264 {
6265 HOST_WIDE_INT size = sparc_frame_size;
6266
6267 if (sparc_n_global_fp_regs > 0)
6268 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
6269 sparc_frame_base_offset
6270 - sparc_apparent_frame_size,
6271 SORR_RESTORE);
6272
6273 /* If we have a frame pointer, we'll need both to restore it before the
6274 frame is destroyed and use its current value in destroying the frame.
6275 Since we don't have an atomic way to do that in the flat window model,
6276 we save the current value into a temporary register (%g1). */
6277 if (frame_pointer_needed && !for_eh)
6278 emit_move_insn (gen_rtx_REG (Pmode, 1), hard_frame_pointer_rtx);
6279
6280 if (return_addr_reg_needed_p (sparc_leaf_function_p))
6281 emit_move_insn (gen_rtx_REG (Pmode, INCOMING_RETURN_ADDR_REGNUM),
6282 gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM));
6283
6284 if (sparc_save_local_in_regs_p)
6285 emit_save_or_restore_local_in_regs (sparc_frame_base_reg,
6286 sparc_frame_base_offset,
6287 SORR_RESTORE);
6288
6289 if (size == 0 || for_eh)
6290 ; /* do nothing. */
6291 else if (frame_pointer_needed)
6292 {
6293 /* Make sure the frame is destroyed after everything else is done. */
6294 emit_insn (gen_blockage ());
6295
6296 emit_move_insn (stack_pointer_rtx, gen_rtx_REG (Pmode, 1));
6297 }
6298 else
6299 {
6300 /* Likewise. */
6301 emit_insn (gen_blockage ());
6302
6303 if (size <= 4096)
6304 emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
6305 else if (size <= 8192)
6306 {
6307 emit_insn (gen_stack_pointer_inc (GEN_INT (4096)));
6308 emit_insn (gen_stack_pointer_inc (GEN_INT (size - 4096)));
6309 }
6310 else
6311 {
6312 rtx reg = gen_rtx_REG (Pmode, 1);
6313 emit_move_insn (reg, GEN_INT (size));
6314 emit_insn (gen_stack_pointer_inc (reg));
6315 }
6316 }
6317 }
6318
6319 /* Return true if it is appropriate to emit `return' instructions in the
6320 body of a function. */
6321
6322 bool
sparc_can_use_return_insn_p(void)6323 sparc_can_use_return_insn_p (void)
6324 {
6325 return sparc_prologue_data_valid_p
6326 && sparc_n_global_fp_regs == 0
6327 && TARGET_FLAT
6328 ? (sparc_frame_size == 0 && !sparc_save_local_in_regs_p)
6329 : (sparc_frame_size == 0 || !sparc_leaf_function_p);
6330 }
6331
6332 /* This function generates the assembly code for function exit. */
6333
6334 static void
sparc_asm_function_epilogue(FILE * file)6335 sparc_asm_function_epilogue (FILE *file)
6336 {
6337 /* If the last two instructions of a function are "call foo; dslot;"
6338 the return address might point to the first instruction in the next
6339 function and we have to output a dummy nop for the sake of sane
6340 backtraces in such cases. This is pointless for sibling calls since
6341 the return address is explicitly adjusted. */
6342
6343 rtx_insn *insn = get_last_insn ();
6344
6345 rtx last_real_insn = prev_real_insn (insn);
6346 if (last_real_insn
6347 && NONJUMP_INSN_P (last_real_insn)
6348 && GET_CODE (PATTERN (last_real_insn)) == SEQUENCE)
6349 last_real_insn = XVECEXP (PATTERN (last_real_insn), 0, 0);
6350
6351 if (last_real_insn
6352 && CALL_P (last_real_insn)
6353 && !SIBLING_CALL_P (last_real_insn))
6354 fputs("\tnop\n", file);
6355
6356 sparc_output_deferred_case_vectors ();
6357 }
6358
6359 /* Output a 'restore' instruction. */
6360
6361 static void
output_restore(rtx pat)6362 output_restore (rtx pat)
6363 {
6364 rtx operands[3];
6365
6366 if (! pat)
6367 {
6368 fputs ("\t restore\n", asm_out_file);
6369 return;
6370 }
6371
6372 gcc_assert (GET_CODE (pat) == SET);
6373
6374 operands[0] = SET_DEST (pat);
6375 pat = SET_SRC (pat);
6376
6377 switch (GET_CODE (pat))
6378 {
6379 case PLUS:
6380 operands[1] = XEXP (pat, 0);
6381 operands[2] = XEXP (pat, 1);
6382 output_asm_insn (" restore %r1, %2, %Y0", operands);
6383 break;
6384 case LO_SUM:
6385 operands[1] = XEXP (pat, 0);
6386 operands[2] = XEXP (pat, 1);
6387 output_asm_insn (" restore %r1, %%lo(%a2), %Y0", operands);
6388 break;
6389 case ASHIFT:
6390 operands[1] = XEXP (pat, 0);
6391 gcc_assert (XEXP (pat, 1) == const1_rtx);
6392 output_asm_insn (" restore %r1, %r1, %Y0", operands);
6393 break;
6394 default:
6395 operands[1] = pat;
6396 output_asm_insn (" restore %%g0, %1, %Y0", operands);
6397 break;
6398 }
6399 }
6400
6401 /* Output a return. */
6402
6403 const char *
output_return(rtx_insn * insn)6404 output_return (rtx_insn *insn)
6405 {
6406 if (crtl->calls_eh_return)
6407 {
6408 /* If the function uses __builtin_eh_return, the eh_return
6409 machinery occupies the delay slot. */
6410 gcc_assert (!final_sequence);
6411
6412 if (flag_delayed_branch)
6413 {
6414 if (!TARGET_FLAT && TARGET_V9)
6415 fputs ("\treturn\t%i7+8\n", asm_out_file);
6416 else
6417 {
6418 if (!TARGET_FLAT)
6419 fputs ("\trestore\n", asm_out_file);
6420
6421 fputs ("\tjmp\t%o7+8\n", asm_out_file);
6422 }
6423
6424 fputs ("\t add\t%sp, %g1, %sp\n", asm_out_file);
6425 }
6426 else
6427 {
6428 if (!TARGET_FLAT)
6429 fputs ("\trestore\n", asm_out_file);
6430
6431 fputs ("\tadd\t%sp, %g1, %sp\n", asm_out_file);
6432 fputs ("\tjmp\t%o7+8\n\t nop\n", asm_out_file);
6433 }
6434 }
6435 else if (sparc_leaf_function_p || TARGET_FLAT)
6436 {
6437 /* This is a leaf or flat function so we don't have to bother restoring
6438 the register window, which frees us from dealing with the convoluted
6439 semantics of restore/return. We simply output the jump to the
6440 return address and the insn in the delay slot (if any). */
6441
6442 return "jmp\t%%o7+%)%#";
6443 }
6444 else
6445 {
6446 /* This is a regular function so we have to restore the register window.
6447 We may have a pending insn for the delay slot, which will be either
6448 combined with the 'restore' instruction or put in the delay slot of
6449 the 'return' instruction. */
6450
6451 if (final_sequence)
6452 {
6453 rtx_insn *delay;
6454 rtx pat;
6455
6456 delay = NEXT_INSN (insn);
6457 gcc_assert (delay);
6458
6459 pat = PATTERN (delay);
6460
6461 if (TARGET_V9 && ! epilogue_renumber (&pat, 1))
6462 {
6463 epilogue_renumber (&pat, 0);
6464 return "return\t%%i7+%)%#";
6465 }
6466 else
6467 {
6468 output_asm_insn ("jmp\t%%i7+%)", NULL);
6469
6470 /* We're going to output the insn in the delay slot manually.
6471 Make sure to output its source location first. */
6472 PATTERN (delay) = gen_blockage ();
6473 INSN_CODE (delay) = -1;
6474 final_scan_insn (delay, asm_out_file, optimize, 0, NULL);
6475 INSN_LOCATION (delay) = UNKNOWN_LOCATION;
6476
6477 output_restore (pat);
6478 }
6479 }
6480 else
6481 {
6482 /* The delay slot is empty. */
6483 if (TARGET_V9)
6484 return "return\t%%i7+%)\n\t nop";
6485 else if (flag_delayed_branch)
6486 return "jmp\t%%i7+%)\n\t restore";
6487 else
6488 return "restore\n\tjmp\t%%o7+%)\n\t nop";
6489 }
6490 }
6491
6492 return "";
6493 }
6494
6495 /* Output a sibling call. */
6496
6497 const char *
output_sibcall(rtx_insn * insn,rtx call_operand)6498 output_sibcall (rtx_insn *insn, rtx call_operand)
6499 {
6500 rtx operands[1];
6501
6502 gcc_assert (flag_delayed_branch);
6503
6504 operands[0] = call_operand;
6505
6506 if (sparc_leaf_function_p || TARGET_FLAT)
6507 {
6508 /* This is a leaf or flat function so we don't have to bother restoring
6509 the register window. We simply output the jump to the function and
6510 the insn in the delay slot (if any). */
6511
6512 gcc_assert (!(LEAF_SIBCALL_SLOT_RESERVED_P && final_sequence));
6513
6514 if (final_sequence)
6515 output_asm_insn ("sethi\t%%hi(%a0), %%g1\n\tjmp\t%%g1 + %%lo(%a0)%#",
6516 operands);
6517 else
6518 /* Use or with rs2 %%g0 instead of mov, so that as/ld can optimize
6519 it into branch if possible. */
6520 output_asm_insn ("or\t%%o7, %%g0, %%g1\n\tcall\t%a0, 0\n\t or\t%%g1, %%g0, %%o7",
6521 operands);
6522 }
6523 else
6524 {
6525 /* This is a regular function so we have to restore the register window.
6526 We may have a pending insn for the delay slot, which will be combined
6527 with the 'restore' instruction. */
6528
6529 output_asm_insn ("call\t%a0, 0", operands);
6530
6531 if (final_sequence)
6532 {
6533 rtx_insn *delay;
6534 rtx pat;
6535
6536 delay = NEXT_INSN (insn);
6537 gcc_assert (delay);
6538
6539 pat = PATTERN (delay);
6540
6541 /* We're going to output the insn in the delay slot manually.
6542 Make sure to output its source location first. */
6543 PATTERN (delay) = gen_blockage ();
6544 INSN_CODE (delay) = -1;
6545 final_scan_insn (delay, asm_out_file, optimize, 0, NULL);
6546 INSN_LOCATION (delay) = UNKNOWN_LOCATION;
6547
6548 output_restore (pat);
6549 }
6550 else
6551 output_restore (NULL_RTX);
6552 }
6553
6554 return "";
6555 }
6556
6557 /* Functions for handling argument passing.
6558
6559 For 32-bit, the first 6 args are normally in registers and the rest are
6560 pushed. Any arg that starts within the first 6 words is at least
6561 partially passed in a register unless its data type forbids.
6562
6563 For 64-bit, the argument registers are laid out as an array of 16 elements
6564 and arguments are added sequentially. The first 6 int args and up to the
6565 first 16 fp args (depending on size) are passed in regs.
6566
6567 Slot Stack Integral Float Float in structure Double Long Double
6568 ---- ----- -------- ----- ------------------ ------ -----------
6569 15 [SP+248] %f31 %f30,%f31 %d30
6570 14 [SP+240] %f29 %f28,%f29 %d28 %q28
6571 13 [SP+232] %f27 %f26,%f27 %d26
6572 12 [SP+224] %f25 %f24,%f25 %d24 %q24
6573 11 [SP+216] %f23 %f22,%f23 %d22
6574 10 [SP+208] %f21 %f20,%f21 %d20 %q20
6575 9 [SP+200] %f19 %f18,%f19 %d18
6576 8 [SP+192] %f17 %f16,%f17 %d16 %q16
6577 7 [SP+184] %f15 %f14,%f15 %d14
6578 6 [SP+176] %f13 %f12,%f13 %d12 %q12
6579 5 [SP+168] %o5 %f11 %f10,%f11 %d10
6580 4 [SP+160] %o4 %f9 %f8,%f9 %d8 %q8
6581 3 [SP+152] %o3 %f7 %f6,%f7 %d6
6582 2 [SP+144] %o2 %f5 %f4,%f5 %d4 %q4
6583 1 [SP+136] %o1 %f3 %f2,%f3 %d2
6584 0 [SP+128] %o0 %f1 %f0,%f1 %d0 %q0
6585
6586 Here SP = %sp if -mno-stack-bias or %sp+stack_bias otherwise.
6587
6588 Integral arguments are always passed as 64-bit quantities appropriately
6589 extended.
6590
6591 Passing of floating point values is handled as follows.
6592 If a prototype is in scope:
6593 If the value is in a named argument (i.e. not a stdarg function or a
6594 value not part of the `...') then the value is passed in the appropriate
6595 fp reg.
6596 If the value is part of the `...' and is passed in one of the first 6
6597 slots then the value is passed in the appropriate int reg.
6598 If the value is part of the `...' and is not passed in one of the first 6
6599 slots then the value is passed in memory.
6600 If a prototype is not in scope:
6601 If the value is one of the first 6 arguments the value is passed in the
6602 appropriate integer reg and the appropriate fp reg.
6603 If the value is not one of the first 6 arguments the value is passed in
6604 the appropriate fp reg and in memory.
6605
6606
6607 Summary of the calling conventions implemented by GCC on the SPARC:
6608
6609 32-bit ABI:
6610 size argument return value
6611
6612 small integer <4 int. reg. int. reg.
6613 word 4 int. reg. int. reg.
6614 double word 8 int. reg. int. reg.
6615
6616 _Complex small integer <8 int. reg. int. reg.
6617 _Complex word 8 int. reg. int. reg.
6618 _Complex double word 16 memory int. reg.
6619
6620 vector integer <=8 int. reg. FP reg.
6621 vector integer >8 memory memory
6622
6623 float 4 int. reg. FP reg.
6624 double 8 int. reg. FP reg.
6625 long double 16 memory memory
6626
6627 _Complex float 8 memory FP reg.
6628 _Complex double 16 memory FP reg.
6629 _Complex long double 32 memory FP reg.
6630
6631 vector float any memory memory
6632
6633 aggregate any memory memory
6634
6635
6636
6637 64-bit ABI:
6638 size argument return value
6639
6640 small integer <8 int. reg. int. reg.
6641 word 8 int. reg. int. reg.
6642 double word 16 int. reg. int. reg.
6643
6644 _Complex small integer <16 int. reg. int. reg.
6645 _Complex word 16 int. reg. int. reg.
6646 _Complex double word 32 memory int. reg.
6647
6648 vector integer <=16 FP reg. FP reg.
6649 vector integer 16<s<=32 memory FP reg.
6650 vector integer >32 memory memory
6651
6652 float 4 FP reg. FP reg.
6653 double 8 FP reg. FP reg.
6654 long double 16 FP reg. FP reg.
6655
6656 _Complex float 8 FP reg. FP reg.
6657 _Complex double 16 FP reg. FP reg.
6658 _Complex long double 32 memory FP reg.
6659
6660 vector float <=16 FP reg. FP reg.
6661 vector float 16<s<=32 memory FP reg.
6662 vector float >32 memory memory
6663
6664 aggregate <=16 reg. reg.
6665 aggregate 16<s<=32 memory reg.
6666 aggregate >32 memory memory
6667
6668
6669
6670 Note #1: complex floating-point types follow the extended SPARC ABIs as
6671 implemented by the Sun compiler.
6672
6673 Note #2: integer vector types follow the scalar floating-point types
6674 conventions to match what is implemented by the Sun VIS SDK.
6675
6676 Note #3: floating-point vector types follow the aggregate types
6677 conventions. */
6678
6679
6680 /* Maximum number of int regs for args. */
6681 #define SPARC_INT_ARG_MAX 6
6682 /* Maximum number of fp regs for args. */
6683 #define SPARC_FP_ARG_MAX 16
6684 /* Number of words (partially) occupied for a given size in units. */
6685 #define CEIL_NWORDS(SIZE) CEIL((SIZE), UNITS_PER_WORD)
6686
6687 /* Handle the INIT_CUMULATIVE_ARGS macro.
6688 Initialize a variable CUM of type CUMULATIVE_ARGS
6689 for a call to a function whose data type is FNTYPE.
6690 For a library call, FNTYPE is 0. */
6691
6692 void
init_cumulative_args(struct sparc_args * cum,tree fntype,rtx,tree)6693 init_cumulative_args (struct sparc_args *cum, tree fntype, rtx, tree)
6694 {
6695 cum->words = 0;
6696 cum->prototype_p = fntype && prototype_p (fntype);
6697 cum->libcall_p = !fntype;
6698 }
6699
6700 /* Handle promotion of pointer and integer arguments. */
6701
6702 static machine_mode
sparc_promote_function_mode(const_tree type,machine_mode mode,int * punsignedp,const_tree,int)6703 sparc_promote_function_mode (const_tree type, machine_mode mode,
6704 int *punsignedp, const_tree, int)
6705 {
6706 if (type && POINTER_TYPE_P (type))
6707 {
6708 *punsignedp = POINTERS_EXTEND_UNSIGNED;
6709 return Pmode;
6710 }
6711
6712 /* Integral arguments are passed as full words, as per the ABI. */
6713 if (GET_MODE_CLASS (mode) == MODE_INT
6714 && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
6715 return word_mode;
6716
6717 return mode;
6718 }
6719
6720 /* Handle the TARGET_STRICT_ARGUMENT_NAMING target hook. */
6721
6722 static bool
sparc_strict_argument_naming(cumulative_args_t ca ATTRIBUTE_UNUSED)6723 sparc_strict_argument_naming (cumulative_args_t ca ATTRIBUTE_UNUSED)
6724 {
6725 return TARGET_ARCH64 ? true : false;
6726 }
6727
6728 /* Handle the TARGET_PASS_BY_REFERENCE target hook.
6729 Specify whether to pass the argument by reference. */
6730
6731 static bool
sparc_pass_by_reference(cumulative_args_t,const function_arg_info & arg)6732 sparc_pass_by_reference (cumulative_args_t, const function_arg_info &arg)
6733 {
6734 tree type = arg.type;
6735 machine_mode mode = arg.mode;
6736 if (TARGET_ARCH32)
6737 /* Original SPARC 32-bit ABI says that structures and unions,
6738 and quad-precision floats are passed by reference.
6739 All other base types are passed in registers.
6740
6741 Extended ABI (as implemented by the Sun compiler) says that all
6742 complex floats are passed by reference. Pass complex integers
6743 in registers up to 8 bytes. More generally, enforce the 2-word
6744 cap for passing arguments in registers.
6745
6746 Vector ABI (as implemented by the Sun VIS SDK) says that integer
6747 vectors are passed like floats of the same size, that is in
6748 registers up to 8 bytes. Pass all vector floats by reference
6749 like structure and unions. */
6750 return ((type && (AGGREGATE_TYPE_P (type) || VECTOR_FLOAT_TYPE_P (type)))
6751 || mode == SCmode
6752 /* Catch CDImode, TFmode, DCmode and TCmode. */
6753 || GET_MODE_SIZE (mode) > 8
6754 || (type
6755 && VECTOR_TYPE_P (type)
6756 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8));
6757 else
6758 /* Original SPARC 64-bit ABI says that structures and unions
6759 smaller than 16 bytes are passed in registers, as well as
6760 all other base types.
6761
6762 Extended ABI (as implemented by the Sun compiler) says that
6763 complex floats are passed in registers up to 16 bytes. Pass
6764 all complex integers in registers up to 16 bytes. More generally,
6765 enforce the 2-word cap for passing arguments in registers.
6766
6767 Vector ABI (as implemented by the Sun VIS SDK) says that integer
6768 vectors are passed like floats of the same size, that is in
6769 registers (up to 16 bytes). Pass all vector floats like structure
6770 and unions. */
6771 return ((type
6772 && (AGGREGATE_TYPE_P (type) || VECTOR_TYPE_P (type))
6773 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 16)
6774 /* Catch CTImode and TCmode. */
6775 || GET_MODE_SIZE (mode) > 16);
6776 }
6777
6778 /* Traverse the record TYPE recursively and call FUNC on its fields.
6779 NAMED is true if this is for a named parameter. DATA is passed
6780 to FUNC for each field. OFFSET is the starting position and
6781 PACKED is true if we are inside a packed record. */
6782
6783 template <typename T, void Func (const_tree, int, bool, T*)>
6784 static void
6785 traverse_record_type (const_tree type, bool named, T *data,
6786 int offset = 0, bool packed = false)
6787 {
6788 /* The ABI obviously doesn't specify how packed structures are passed.
6789 These are passed in integer regs if possible, otherwise memory. */
6790 if (!packed)
6791 for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6792 if (TREE_CODE (field) == FIELD_DECL && DECL_PACKED (field))
6793 {
6794 packed = true;
6795 break;
6796 }
6797
6798 /* Walk the real fields, but skip those with no size or a zero size.
6799 ??? Fields with variable offset are handled as having zero offset. */
6800 for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6801 if (TREE_CODE (field) == FIELD_DECL)
6802 {
6803 if (!DECL_SIZE (field) || integer_zerop (DECL_SIZE (field)))
6804 continue;
6805
6806 int bitpos = offset;
6807 if (TREE_CODE (DECL_FIELD_OFFSET (field)) == INTEGER_CST)
6808 bitpos += int_bit_position (field);
6809
6810 tree field_type = TREE_TYPE (field);
6811 if (TREE_CODE (field_type) == RECORD_TYPE)
6812 traverse_record_type<T, Func> (field_type, named, data, bitpos,
6813 packed);
6814 else
6815 {
6816 const bool fp_type
6817 = FLOAT_TYPE_P (field_type) || VECTOR_TYPE_P (field_type);
6818 Func (field, bitpos, fp_type && named && !packed && TARGET_FPU,
6819 data);
6820 }
6821 }
6822 }
6823
6824 /* Handle recursive register classifying for structure layout. */
6825
6826 typedef struct
6827 {
6828 bool fp_regs; /* true if field eligible to FP registers. */
6829 bool fp_regs_in_first_word; /* true if such field in first word. */
6830 } classify_data_t;
6831
6832 /* A subroutine of function_arg_slotno. Classify the field. */
6833
6834 inline void
classify_registers(const_tree,int bitpos,bool fp,classify_data_t * data)6835 classify_registers (const_tree, int bitpos, bool fp, classify_data_t *data)
6836 {
6837 if (fp)
6838 {
6839 data->fp_regs = true;
6840 if (bitpos < BITS_PER_WORD)
6841 data->fp_regs_in_first_word = true;
6842 }
6843 }
6844
6845 /* Compute the slot number to pass an argument in.
6846 Return the slot number or -1 if passing on the stack.
6847
6848 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6849 the preceding args and about the function being called.
6850 MODE is the argument's machine mode.
6851 TYPE is the data type of the argument (as a tree).
6852 This is null for libcalls where that information may
6853 not be available.
6854 NAMED is nonzero if this argument is a named parameter
6855 (otherwise it is an extra parameter matching an ellipsis).
6856 INCOMING is zero for FUNCTION_ARG, nonzero for FUNCTION_INCOMING_ARG.
6857 *PREGNO records the register number to use if scalar type.
6858 *PPADDING records the amount of padding needed in words. */
6859
6860 static int
function_arg_slotno(const struct sparc_args * cum,machine_mode mode,const_tree type,bool named,bool incoming,int * pregno,int * ppadding)6861 function_arg_slotno (const struct sparc_args *cum, machine_mode mode,
6862 const_tree type, bool named, bool incoming,
6863 int *pregno, int *ppadding)
6864 {
6865 const int regbase
6866 = incoming ? SPARC_INCOMING_INT_ARG_FIRST : SPARC_OUTGOING_INT_ARG_FIRST;
6867 int slotno = cum->words, regno;
6868 enum mode_class mclass = GET_MODE_CLASS (mode);
6869
6870 /* Silence warnings in the callers. */
6871 *pregno = -1;
6872 *ppadding = -1;
6873
6874 if (type && TREE_ADDRESSABLE (type))
6875 return -1;
6876
6877 /* In 64-bit mode, objects requiring 16-byte alignment get it. */
6878 if (TARGET_ARCH64
6879 && (type ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode)) >= 128
6880 && (slotno & 1) != 0)
6881 {
6882 slotno++;
6883 *ppadding = 1;
6884 }
6885 else
6886 *ppadding = 0;
6887
6888 /* Vector types deserve special treatment because they are polymorphic wrt
6889 their mode, depending upon whether VIS instructions are enabled. */
6890 if (type && VECTOR_TYPE_P (type))
6891 {
6892 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
6893 {
6894 /* The SPARC port defines no floating-point vector modes. */
6895 gcc_assert (mode == BLKmode);
6896 }
6897 else
6898 {
6899 /* Integer vector types should either have a vector
6900 mode or an integral mode, because we are guaranteed
6901 by pass_by_reference that their size is not greater
6902 than 16 bytes and TImode is 16-byte wide. */
6903 gcc_assert (mode != BLKmode);
6904
6905 /* Integer vectors are handled like floats as per
6906 the Sun VIS SDK. */
6907 mclass = MODE_FLOAT;
6908 }
6909 }
6910
6911 switch (mclass)
6912 {
6913 case MODE_FLOAT:
6914 case MODE_COMPLEX_FLOAT:
6915 case MODE_VECTOR_INT:
6916 if (TARGET_ARCH64 && TARGET_FPU && named)
6917 {
6918 /* If all arg slots are filled, then must pass on stack. */
6919 if (slotno >= SPARC_FP_ARG_MAX)
6920 return -1;
6921
6922 regno = SPARC_FP_ARG_FIRST + slotno * 2;
6923 /* Arguments filling only one single FP register are
6924 right-justified in the outer double FP register. */
6925 if (GET_MODE_SIZE (mode) <= 4)
6926 regno++;
6927 break;
6928 }
6929 /* fallthrough */
6930
6931 case MODE_INT:
6932 case MODE_COMPLEX_INT:
6933 /* If all arg slots are filled, then must pass on stack. */
6934 if (slotno >= SPARC_INT_ARG_MAX)
6935 return -1;
6936
6937 regno = regbase + slotno;
6938 break;
6939
6940 case MODE_RANDOM:
6941 /* MODE is VOIDmode when generating the actual call. */
6942 if (mode == VOIDmode)
6943 return -1;
6944
6945 if (TARGET_64BIT && TARGET_FPU && named
6946 && type
6947 && (TREE_CODE (type) == RECORD_TYPE || VECTOR_TYPE_P (type)))
6948 {
6949 /* If all arg slots are filled, then must pass on stack. */
6950 if (slotno >= SPARC_FP_ARG_MAX)
6951 return -1;
6952
6953 if (TREE_CODE (type) == RECORD_TYPE)
6954 {
6955 classify_data_t data = { false, false };
6956 traverse_record_type<classify_data_t, classify_registers>
6957 (type, named, &data);
6958
6959 if (data.fp_regs)
6960 {
6961 /* If all FP slots are filled except for the last one and
6962 there is no FP field in the first word, then must pass
6963 on stack. */
6964 if (slotno >= SPARC_FP_ARG_MAX - 1
6965 && !data.fp_regs_in_first_word)
6966 return -1;
6967 }
6968 else
6969 {
6970 /* If all int slots are filled, then must pass on stack. */
6971 if (slotno >= SPARC_INT_ARG_MAX)
6972 return -1;
6973 }
6974
6975 /* PREGNO isn't set since both int and FP regs can be used. */
6976 return slotno;
6977 }
6978
6979 regno = SPARC_FP_ARG_FIRST + slotno * 2;
6980 }
6981 else
6982 {
6983 /* If all arg slots are filled, then must pass on stack. */
6984 if (slotno >= SPARC_INT_ARG_MAX)
6985 return -1;
6986
6987 regno = regbase + slotno;
6988 }
6989 break;
6990
6991 default :
6992 gcc_unreachable ();
6993 }
6994
6995 *pregno = regno;
6996 return slotno;
6997 }
6998
6999 /* Handle recursive register counting/assigning for structure layout. */
7000
7001 typedef struct
7002 {
7003 int slotno; /* slot number of the argument. */
7004 int regbase; /* regno of the base register. */
7005 int intoffset; /* offset of the first pending integer field. */
7006 int nregs; /* number of words passed in registers. */
7007 bool stack; /* true if part of the argument is on the stack. */
7008 rtx ret; /* return expression being built. */
7009 } assign_data_t;
7010
7011 /* A subroutine of function_arg_record_value. Compute the number of integer
7012 registers to be assigned between PARMS->intoffset and BITPOS. Return
7013 true if at least one integer register is assigned or false otherwise. */
7014
7015 static bool
compute_int_layout(int bitpos,assign_data_t * data,int * pnregs)7016 compute_int_layout (int bitpos, assign_data_t *data, int *pnregs)
7017 {
7018 if (data->intoffset < 0)
7019 return false;
7020
7021 const int intoffset = data->intoffset;
7022 data->intoffset = -1;
7023
7024 const int this_slotno = data->slotno + intoffset / BITS_PER_WORD;
7025 const unsigned int startbit = ROUND_DOWN (intoffset, BITS_PER_WORD);
7026 const unsigned int endbit = ROUND_UP (bitpos, BITS_PER_WORD);
7027 int nregs = (endbit - startbit) / BITS_PER_WORD;
7028
7029 if (nregs > 0 && nregs > SPARC_INT_ARG_MAX - this_slotno)
7030 {
7031 nregs = SPARC_INT_ARG_MAX - this_slotno;
7032
7033 /* We need to pass this field (partly) on the stack. */
7034 data->stack = 1;
7035 }
7036
7037 if (nregs <= 0)
7038 return false;
7039
7040 *pnregs = nregs;
7041 return true;
7042 }
7043
7044 /* A subroutine of function_arg_record_value. Compute the number and the mode
7045 of the FP registers to be assigned for FIELD. Return true if at least one
7046 FP register is assigned or false otherwise. */
7047
7048 static bool
compute_fp_layout(const_tree field,int bitpos,assign_data_t * data,int * pnregs,machine_mode * pmode)7049 compute_fp_layout (const_tree field, int bitpos, assign_data_t *data,
7050 int *pnregs, machine_mode *pmode)
7051 {
7052 const int this_slotno = data->slotno + bitpos / BITS_PER_WORD;
7053 machine_mode mode = DECL_MODE (field);
7054 int nregs, nslots;
7055
7056 /* Slots are counted as words while regs are counted as having the size of
7057 the (inner) mode. */
7058 if (VECTOR_TYPE_P (TREE_TYPE (field)) && mode == BLKmode)
7059 {
7060 mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (field)));
7061 nregs = TYPE_VECTOR_SUBPARTS (TREE_TYPE (field));
7062 }
7063 else if (TREE_CODE (TREE_TYPE (field)) == COMPLEX_TYPE)
7064 {
7065 mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (field)));
7066 nregs = 2;
7067 }
7068 else
7069 nregs = 1;
7070
7071 nslots = CEIL_NWORDS (nregs * GET_MODE_SIZE (mode));
7072
7073 if (nslots > SPARC_FP_ARG_MAX - this_slotno)
7074 {
7075 nslots = SPARC_FP_ARG_MAX - this_slotno;
7076 nregs = (nslots * UNITS_PER_WORD) / GET_MODE_SIZE (mode);
7077
7078 /* We need to pass this field (partly) on the stack. */
7079 data->stack = 1;
7080
7081 if (nregs <= 0)
7082 return false;
7083 }
7084
7085 *pnregs = nregs;
7086 *pmode = mode;
7087 return true;
7088 }
7089
7090 /* A subroutine of function_arg_record_value. Count the number of registers
7091 to be assigned for FIELD and between PARMS->intoffset and BITPOS. */
7092
7093 inline void
count_registers(const_tree field,int bitpos,bool fp,assign_data_t * data)7094 count_registers (const_tree field, int bitpos, bool fp, assign_data_t *data)
7095 {
7096 if (fp)
7097 {
7098 int nregs;
7099 machine_mode mode;
7100
7101 if (compute_int_layout (bitpos, data, &nregs))
7102 data->nregs += nregs;
7103
7104 if (compute_fp_layout (field, bitpos, data, &nregs, &mode))
7105 data->nregs += nregs;
7106 }
7107 else
7108 {
7109 if (data->intoffset < 0)
7110 data->intoffset = bitpos;
7111 }
7112 }
7113
7114 /* A subroutine of function_arg_record_value. Assign the bits of the
7115 structure between PARMS->intoffset and BITPOS to integer registers. */
7116
7117 static void
assign_int_registers(int bitpos,assign_data_t * data)7118 assign_int_registers (int bitpos, assign_data_t *data)
7119 {
7120 int intoffset = data->intoffset;
7121 machine_mode mode;
7122 int nregs;
7123
7124 if (!compute_int_layout (bitpos, data, &nregs))
7125 return;
7126
7127 /* If this is the trailing part of a word, only load that much into
7128 the register. Otherwise load the whole register. Note that in
7129 the latter case we may pick up unwanted bits. It's not a problem
7130 at the moment but may wish to revisit. */
7131 if (intoffset % BITS_PER_WORD != 0)
7132 mode = smallest_int_mode_for_size (BITS_PER_WORD
7133 - intoffset % BITS_PER_WORD);
7134 else
7135 mode = word_mode;
7136
7137 const int this_slotno = data->slotno + intoffset / BITS_PER_WORD;
7138 unsigned int regno = data->regbase + this_slotno;
7139 intoffset /= BITS_PER_UNIT;
7140
7141 do
7142 {
7143 rtx reg = gen_rtx_REG (mode, regno);
7144 XVECEXP (data->ret, 0, data->stack + data->nregs)
7145 = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (intoffset));
7146 data->nregs += 1;
7147 mode = word_mode;
7148 regno += 1;
7149 intoffset = (intoffset | (UNITS_PER_WORD - 1)) + 1;
7150 }
7151 while (--nregs > 0);
7152 }
7153
7154 /* A subroutine of function_arg_record_value. Assign FIELD at position
7155 BITPOS to FP registers. */
7156
7157 static void
assign_fp_registers(const_tree field,int bitpos,assign_data_t * data)7158 assign_fp_registers (const_tree field, int bitpos, assign_data_t *data)
7159 {
7160 int nregs;
7161 machine_mode mode;
7162
7163 if (!compute_fp_layout (field, bitpos, data, &nregs, &mode))
7164 return;
7165
7166 const int this_slotno = data->slotno + bitpos / BITS_PER_WORD;
7167 int regno = SPARC_FP_ARG_FIRST + this_slotno * 2;
7168 if (GET_MODE_SIZE (mode) <= 4 && (bitpos & 32) != 0)
7169 regno++;
7170 int pos = bitpos / BITS_PER_UNIT;
7171
7172 do
7173 {
7174 rtx reg = gen_rtx_REG (mode, regno);
7175 XVECEXP (data->ret, 0, data->stack + data->nregs)
7176 = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (pos));
7177 data->nregs += 1;
7178 regno += GET_MODE_SIZE (mode) / 4;
7179 pos += GET_MODE_SIZE (mode);
7180 }
7181 while (--nregs > 0);
7182 }
7183
7184 /* A subroutine of function_arg_record_value. Assign FIELD and the bits of
7185 the structure between PARMS->intoffset and BITPOS to registers. */
7186
7187 inline void
assign_registers(const_tree field,int bitpos,bool fp,assign_data_t * data)7188 assign_registers (const_tree field, int bitpos, bool fp, assign_data_t *data)
7189 {
7190 if (fp)
7191 {
7192 assign_int_registers (bitpos, data);
7193
7194 assign_fp_registers (field, bitpos, data);
7195 }
7196 else
7197 {
7198 if (data->intoffset < 0)
7199 data->intoffset = bitpos;
7200 }
7201 }
7202
7203 /* Used by function_arg and function_value to implement the complex
7204 conventions of the 64-bit ABI for passing and returning structures.
7205 Return an expression valid as a return value for the FUNCTION_ARG
7206 and TARGET_FUNCTION_VALUE.
7207
7208 TYPE is the data type of the argument (as a tree).
7209 This is null for libcalls where that information may
7210 not be available.
7211 MODE is the argument's machine mode.
7212 SLOTNO is the index number of the argument's slot in the parameter array.
7213 NAMED is true if this argument is a named parameter
7214 (otherwise it is an extra parameter matching an ellipsis).
7215 REGBASE is the regno of the base register for the parameter array. */
7216
7217 static rtx
function_arg_record_value(const_tree type,machine_mode mode,int slotno,bool named,int regbase)7218 function_arg_record_value (const_tree type, machine_mode mode,
7219 int slotno, bool named, int regbase)
7220 {
7221 const int size = int_size_in_bytes (type);
7222 assign_data_t data;
7223 int nregs;
7224
7225 data.slotno = slotno;
7226 data.regbase = regbase;
7227
7228 /* Count how many registers we need. */
7229 data.nregs = 0;
7230 data.intoffset = 0;
7231 data.stack = false;
7232 traverse_record_type<assign_data_t, count_registers> (type, named, &data);
7233
7234 /* Take into account pending integer fields. */
7235 if (compute_int_layout (size * BITS_PER_UNIT, &data, &nregs))
7236 data.nregs += nregs;
7237
7238 /* Allocate the vector and handle some annoying special cases. */
7239 nregs = data.nregs;
7240
7241 if (nregs == 0)
7242 {
7243 /* ??? Empty structure has no value? Duh? */
7244 if (size <= 0)
7245 {
7246 /* Though there's nothing really to store, return a word register
7247 anyway so the rest of gcc doesn't go nuts. Returning a PARALLEL
7248 leads to breakage due to the fact that there are zero bytes to
7249 load. */
7250 return gen_rtx_REG (mode, regbase);
7251 }
7252
7253 /* ??? C++ has structures with no fields, and yet a size. Give up
7254 for now and pass everything back in integer registers. */
7255 nregs = CEIL_NWORDS (size);
7256 if (nregs + slotno > SPARC_INT_ARG_MAX)
7257 nregs = SPARC_INT_ARG_MAX - slotno;
7258 }
7259
7260 gcc_assert (nregs > 0);
7261
7262 data.ret = gen_rtx_PARALLEL (mode, rtvec_alloc (data.stack + nregs));
7263
7264 /* If at least one field must be passed on the stack, generate
7265 (parallel [(expr_list (nil) ...) ...]) so that all fields will
7266 also be passed on the stack. We can't do much better because the
7267 semantics of TARGET_ARG_PARTIAL_BYTES doesn't handle the case
7268 of structures for which the fields passed exclusively in registers
7269 are not at the beginning of the structure. */
7270 if (data.stack)
7271 XVECEXP (data.ret, 0, 0)
7272 = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
7273
7274 /* Assign the registers. */
7275 data.nregs = 0;
7276 data.intoffset = 0;
7277 traverse_record_type<assign_data_t, assign_registers> (type, named, &data);
7278
7279 /* Assign pending integer fields. */
7280 assign_int_registers (size * BITS_PER_UNIT, &data);
7281
7282 gcc_assert (data.nregs == nregs);
7283
7284 return data.ret;
7285 }
7286
7287 /* Used by function_arg and function_value to implement the conventions
7288 of the 64-bit ABI for passing and returning unions.
7289 Return an expression valid as a return value for the FUNCTION_ARG
7290 and TARGET_FUNCTION_VALUE.
7291
7292 SIZE is the size in bytes of the union.
7293 MODE is the argument's machine mode.
7294 SLOTNO is the index number of the argument's slot in the parameter array.
7295 REGNO is the hard register the union will be passed in. */
7296
7297 static rtx
function_arg_union_value(int size,machine_mode mode,int slotno,int regno)7298 function_arg_union_value (int size, machine_mode mode, int slotno, int regno)
7299 {
7300 unsigned int nwords;
7301
7302 /* See comment in function_arg_record_value for empty structures. */
7303 if (size <= 0)
7304 return gen_rtx_REG (mode, regno);
7305
7306 if (slotno == SPARC_INT_ARG_MAX - 1)
7307 nwords = 1;
7308 else
7309 nwords = CEIL_NWORDS (size);
7310
7311 rtx regs = gen_rtx_PARALLEL (mode, rtvec_alloc (nwords));
7312
7313 /* Unions are passed left-justified. */
7314 for (unsigned int i = 0; i < nwords; i++)
7315 XVECEXP (regs, 0, i)
7316 = gen_rtx_EXPR_LIST (VOIDmode,
7317 gen_rtx_REG (word_mode, regno + i),
7318 GEN_INT (UNITS_PER_WORD * i));
7319
7320 return regs;
7321 }
7322
7323 /* Used by function_arg and function_value to implement the conventions
7324 of the 64-bit ABI for passing and returning BLKmode vectors.
7325 Return an expression valid as a return value for the FUNCTION_ARG
7326 and TARGET_FUNCTION_VALUE.
7327
7328 SIZE is the size in bytes of the vector.
7329 SLOTNO is the index number of the argument's slot in the parameter array.
7330 NAMED is true if this argument is a named parameter
7331 (otherwise it is an extra parameter matching an ellipsis).
7332 REGNO is the hard register the vector will be passed in. */
7333
7334 static rtx
function_arg_vector_value(int size,int slotno,bool named,int regno)7335 function_arg_vector_value (int size, int slotno, bool named, int regno)
7336 {
7337 const int mult = (named ? 2 : 1);
7338 unsigned int nwords;
7339
7340 if (slotno == (named ? SPARC_FP_ARG_MAX : SPARC_INT_ARG_MAX) - 1)
7341 nwords = 1;
7342 else
7343 nwords = CEIL_NWORDS (size);
7344
7345 rtx regs = gen_rtx_PARALLEL (BLKmode, rtvec_alloc (nwords));
7346
7347 if (size < UNITS_PER_WORD)
7348 XVECEXP (regs, 0, 0)
7349 = gen_rtx_EXPR_LIST (VOIDmode,
7350 gen_rtx_REG (SImode, regno),
7351 const0_rtx);
7352 else
7353 for (unsigned int i = 0; i < nwords; i++)
7354 XVECEXP (regs, 0, i)
7355 = gen_rtx_EXPR_LIST (VOIDmode,
7356 gen_rtx_REG (word_mode, regno + i * mult),
7357 GEN_INT (i * UNITS_PER_WORD));
7358
7359 return regs;
7360 }
7361
7362 /* Determine where to put an argument to a function.
7363 Value is zero to push the argument on the stack,
7364 or a hard register in which to store the argument.
7365
7366 CUM is a variable of type CUMULATIVE_ARGS which gives info about
7367 the preceding args and about the function being called.
7368 ARG is a description of the argument.
7369 INCOMING_P is false for TARGET_FUNCTION_ARG, true for
7370 TARGET_FUNCTION_INCOMING_ARG. */
7371
7372 static rtx
sparc_function_arg_1(cumulative_args_t cum_v,const function_arg_info & arg,bool incoming)7373 sparc_function_arg_1 (cumulative_args_t cum_v, const function_arg_info &arg,
7374 bool incoming)
7375 {
7376 const CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7377 const int regbase
7378 = incoming ? SPARC_INCOMING_INT_ARG_FIRST : SPARC_OUTGOING_INT_ARG_FIRST;
7379 int slotno, regno, padding;
7380 tree type = arg.type;
7381 machine_mode mode = arg.mode;
7382 enum mode_class mclass = GET_MODE_CLASS (mode);
7383 bool named = arg.named;
7384
7385 slotno
7386 = function_arg_slotno (cum, mode, type, named, incoming, ®no, &padding);
7387 if (slotno == -1)
7388 return 0;
7389
7390 /* Integer vectors are handled like floats as per the Sun VIS SDK. */
7391 if (type && VECTOR_INTEGER_TYPE_P (type))
7392 mclass = MODE_FLOAT;
7393
7394 if (TARGET_ARCH32)
7395 return gen_rtx_REG (mode, regno);
7396
7397 /* Structures up to 16 bytes in size are passed in arg slots on the stack
7398 and are promoted to registers if possible. */
7399 if (type && TREE_CODE (type) == RECORD_TYPE)
7400 {
7401 const int size = int_size_in_bytes (type);
7402 gcc_assert (size <= 16);
7403
7404 return function_arg_record_value (type, mode, slotno, named, regbase);
7405 }
7406
7407 /* Unions up to 16 bytes in size are passed in integer registers. */
7408 else if (type && TREE_CODE (type) == UNION_TYPE)
7409 {
7410 const int size = int_size_in_bytes (type);
7411 gcc_assert (size <= 16);
7412
7413 return function_arg_union_value (size, mode, slotno, regno);
7414 }
7415
7416 /* Floating-point vectors up to 16 bytes are passed in registers. */
7417 else if (type && VECTOR_TYPE_P (type) && mode == BLKmode)
7418 {
7419 const int size = int_size_in_bytes (type);
7420 gcc_assert (size <= 16);
7421
7422 return function_arg_vector_value (size, slotno, named, regno);
7423 }
7424
7425 /* v9 fp args in reg slots beyond the int reg slots get passed in regs
7426 but also have the slot allocated for them.
7427 If no prototype is in scope fp values in register slots get passed
7428 in two places, either fp regs and int regs or fp regs and memory. */
7429 else if ((mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT)
7430 && SPARC_FP_REG_P (regno))
7431 {
7432 rtx reg = gen_rtx_REG (mode, regno);
7433 if (cum->prototype_p || cum->libcall_p)
7434 return reg;
7435 else
7436 {
7437 rtx v0, v1;
7438
7439 if ((regno - SPARC_FP_ARG_FIRST) < SPARC_INT_ARG_MAX * 2)
7440 {
7441 int intreg;
7442
7443 /* On incoming, we don't need to know that the value
7444 is passed in %f0 and %i0, and it confuses other parts
7445 causing needless spillage even on the simplest cases. */
7446 if (incoming)
7447 return reg;
7448
7449 intreg = (SPARC_OUTGOING_INT_ARG_FIRST
7450 + (regno - SPARC_FP_ARG_FIRST) / 2);
7451
7452 v0 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx);
7453 v1 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_REG (mode, intreg),
7454 const0_rtx);
7455 return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1));
7456 }
7457 else
7458 {
7459 v0 = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
7460 v1 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx);
7461 return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1));
7462 }
7463 }
7464 }
7465
7466 /* All other aggregate types are passed in an integer register in a mode
7467 corresponding to the size of the type. */
7468 else if (type && AGGREGATE_TYPE_P (type))
7469 {
7470 const int size = int_size_in_bytes (type);
7471 gcc_assert (size <= 16);
7472
7473 mode = int_mode_for_size (size * BITS_PER_UNIT, 0).else_blk ();
7474 }
7475
7476 return gen_rtx_REG (mode, regno);
7477 }
7478
7479 /* Handle the TARGET_FUNCTION_ARG target hook. */
7480
7481 static rtx
sparc_function_arg(cumulative_args_t cum,const function_arg_info & arg)7482 sparc_function_arg (cumulative_args_t cum, const function_arg_info &arg)
7483 {
7484 return sparc_function_arg_1 (cum, arg, false);
7485 }
7486
7487 /* Handle the TARGET_FUNCTION_INCOMING_ARG target hook. */
7488
7489 static rtx
sparc_function_incoming_arg(cumulative_args_t cum,const function_arg_info & arg)7490 sparc_function_incoming_arg (cumulative_args_t cum,
7491 const function_arg_info &arg)
7492 {
7493 return sparc_function_arg_1 (cum, arg, true);
7494 }
7495
7496 /* For sparc64, objects requiring 16 byte alignment are passed that way. */
7497
7498 static unsigned int
sparc_function_arg_boundary(machine_mode mode,const_tree type)7499 sparc_function_arg_boundary (machine_mode mode, const_tree type)
7500 {
7501 return ((TARGET_ARCH64
7502 && (GET_MODE_ALIGNMENT (mode) == 128
7503 || (type && TYPE_ALIGN (type) == 128)))
7504 ? 128
7505 : PARM_BOUNDARY);
7506 }
7507
7508 /* For an arg passed partly in registers and partly in memory,
7509 this is the number of bytes of registers used.
7510 For args passed entirely in registers or entirely in memory, zero.
7511
7512 Any arg that starts in the first 6 regs but won't entirely fit in them
7513 needs partial registers on v8. On v9, structures with integer
7514 values in arg slots 5,6 will be passed in %o5 and SP+176, and complex fp
7515 values that begin in the last fp reg [where "last fp reg" varies with the
7516 mode] will be split between that reg and memory. */
7517
7518 static int
sparc_arg_partial_bytes(cumulative_args_t cum,const function_arg_info & arg)7519 sparc_arg_partial_bytes (cumulative_args_t cum, const function_arg_info &arg)
7520 {
7521 int slotno, regno, padding;
7522
7523 /* We pass false for incoming here, it doesn't matter. */
7524 slotno = function_arg_slotno (get_cumulative_args (cum), arg.mode, arg.type,
7525 arg.named, false, ®no, &padding);
7526
7527 if (slotno == -1)
7528 return 0;
7529
7530 if (TARGET_ARCH32)
7531 {
7532 /* We are guaranteed by pass_by_reference that the size of the
7533 argument is not greater than 8 bytes, so we only need to return
7534 one word if the argument is partially passed in registers. */
7535 const int size = GET_MODE_SIZE (arg.mode);
7536
7537 if (size > UNITS_PER_WORD && slotno == SPARC_INT_ARG_MAX - 1)
7538 return UNITS_PER_WORD;
7539 }
7540 else
7541 {
7542 /* We are guaranteed by pass_by_reference that the size of the
7543 argument is not greater than 16 bytes, so we only need to return
7544 one word if the argument is partially passed in registers. */
7545 if (arg.aggregate_type_p ())
7546 {
7547 const int size = int_size_in_bytes (arg.type);
7548
7549 if (size > UNITS_PER_WORD
7550 && (slotno == SPARC_INT_ARG_MAX - 1
7551 || slotno == SPARC_FP_ARG_MAX - 1))
7552 return UNITS_PER_WORD;
7553 }
7554 else if (GET_MODE_CLASS (arg.mode) == MODE_COMPLEX_INT
7555 || ((GET_MODE_CLASS (arg.mode) == MODE_COMPLEX_FLOAT
7556 || (arg.type && VECTOR_TYPE_P (arg.type)))
7557 && !(TARGET_FPU && arg.named)))
7558 {
7559 const int size = (arg.type && VECTOR_FLOAT_TYPE_P (arg.type))
7560 ? int_size_in_bytes (arg.type)
7561 : GET_MODE_SIZE (arg.mode);
7562
7563 if (size > UNITS_PER_WORD && slotno == SPARC_INT_ARG_MAX - 1)
7564 return UNITS_PER_WORD;
7565 }
7566 else if (GET_MODE_CLASS (arg.mode) == MODE_COMPLEX_FLOAT
7567 || (arg.type && VECTOR_TYPE_P (arg.type)))
7568 {
7569 const int size = (arg.type && VECTOR_FLOAT_TYPE_P (arg.type))
7570 ? int_size_in_bytes (arg.type)
7571 : GET_MODE_SIZE (arg.mode);
7572
7573 if (size > UNITS_PER_WORD && slotno == SPARC_FP_ARG_MAX - 1)
7574 return UNITS_PER_WORD;
7575 }
7576 }
7577
7578 return 0;
7579 }
7580
7581 /* Handle the TARGET_FUNCTION_ARG_ADVANCE hook.
7582 Update the data in CUM to advance over argument ARG. */
7583
7584 static void
sparc_function_arg_advance(cumulative_args_t cum_v,const function_arg_info & arg)7585 sparc_function_arg_advance (cumulative_args_t cum_v,
7586 const function_arg_info &arg)
7587 {
7588 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7589 tree type = arg.type;
7590 machine_mode mode = arg.mode;
7591 int regno, padding;
7592
7593 /* We pass false for incoming here, it doesn't matter. */
7594 function_arg_slotno (cum, mode, type, arg.named, false, ®no, &padding);
7595
7596 /* If argument requires leading padding, add it. */
7597 cum->words += padding;
7598
7599 if (TARGET_ARCH32)
7600 cum->words += CEIL_NWORDS (GET_MODE_SIZE (mode));
7601 else
7602 {
7603 /* For types that can have BLKmode, get the size from the type. */
7604 if (type && (AGGREGATE_TYPE_P (type) || VECTOR_FLOAT_TYPE_P (type)))
7605 {
7606 const int size = int_size_in_bytes (type);
7607
7608 /* See comment in function_arg_record_value for empty structures. */
7609 if (size <= 0)
7610 cum->words++;
7611 else
7612 cum->words += CEIL_NWORDS (size);
7613 }
7614 else
7615 cum->words += CEIL_NWORDS (GET_MODE_SIZE (mode));
7616 }
7617 }
7618
7619 /* Implement TARGET_FUNCTION_ARG_PADDING. For the 64-bit ABI structs
7620 are always stored left shifted in their argument slot. */
7621
7622 static pad_direction
sparc_function_arg_padding(machine_mode mode,const_tree type)7623 sparc_function_arg_padding (machine_mode mode, const_tree type)
7624 {
7625 if (TARGET_ARCH64 && type && AGGREGATE_TYPE_P (type))
7626 return PAD_UPWARD;
7627
7628 /* Fall back to the default. */
7629 return default_function_arg_padding (mode, type);
7630 }
7631
7632 /* Handle the TARGET_RETURN_IN_MEMORY target hook.
7633 Specify whether to return the return value in memory. */
7634
7635 static bool
sparc_return_in_memory(const_tree type,const_tree fntype ATTRIBUTE_UNUSED)7636 sparc_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
7637 {
7638 if (TARGET_ARCH32)
7639 /* Original SPARC 32-bit ABI says that structures and unions, and
7640 quad-precision floats are returned in memory. But note that the
7641 first part is implemented through -fpcc-struct-return being the
7642 default, so here we only implement -freg-struct-return instead.
7643 All other base types are returned in registers.
7644
7645 Extended ABI (as implemented by the Sun compiler) says that
7646 all complex floats are returned in registers (8 FP registers
7647 at most for '_Complex long double'). Return all complex integers
7648 in registers (4 at most for '_Complex long long').
7649
7650 Vector ABI (as implemented by the Sun VIS SDK) says that vector
7651 integers are returned like floats of the same size, that is in
7652 registers up to 8 bytes and in memory otherwise. Return all
7653 vector floats in memory like structure and unions; note that
7654 they always have BLKmode like the latter. */
7655 return (TYPE_MODE (type) == BLKmode
7656 || TYPE_MODE (type) == TFmode
7657 || (TREE_CODE (type) == VECTOR_TYPE
7658 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8));
7659 else
7660 /* Original SPARC 64-bit ABI says that structures and unions
7661 smaller than 32 bytes are returned in registers, as well as
7662 all other base types.
7663
7664 Extended ABI (as implemented by the Sun compiler) says that all
7665 complex floats are returned in registers (8 FP registers at most
7666 for '_Complex long double'). Return all complex integers in
7667 registers (4 at most for '_Complex TItype').
7668
7669 Vector ABI (as implemented by the Sun VIS SDK) says that vector
7670 integers are returned like floats of the same size, that is in
7671 registers. Return all vector floats like structure and unions;
7672 note that they always have BLKmode like the latter. */
7673 return (TYPE_MODE (type) == BLKmode
7674 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 32);
7675 }
7676
7677 /* Handle the TARGET_STRUCT_VALUE target hook.
7678 Return where to find the structure return value address. */
7679
7680 static rtx
sparc_struct_value_rtx(tree fndecl,int incoming)7681 sparc_struct_value_rtx (tree fndecl, int incoming)
7682 {
7683 if (TARGET_ARCH64)
7684 return NULL_RTX;
7685 else
7686 {
7687 rtx mem;
7688
7689 if (incoming)
7690 mem = gen_frame_mem (Pmode, plus_constant (Pmode, frame_pointer_rtx,
7691 STRUCT_VALUE_OFFSET));
7692 else
7693 mem = gen_frame_mem (Pmode, plus_constant (Pmode, stack_pointer_rtx,
7694 STRUCT_VALUE_OFFSET));
7695
7696 /* Only follow the SPARC ABI for fixed-size structure returns.
7697 Variable size structure returns are handled per the normal
7698 procedures in GCC. This is enabled by -mstd-struct-return */
7699 if (incoming == 2
7700 && sparc_std_struct_return
7701 && TYPE_SIZE_UNIT (TREE_TYPE (fndecl))
7702 && TREE_CODE (TYPE_SIZE_UNIT (TREE_TYPE (fndecl))) == INTEGER_CST)
7703 {
7704 /* We must check and adjust the return address, as it is optional
7705 as to whether the return object is really provided. */
7706 rtx ret_reg = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
7707 rtx scratch = gen_reg_rtx (SImode);
7708 rtx_code_label *endlab = gen_label_rtx ();
7709
7710 /* Calculate the return object size. */
7711 tree size = TYPE_SIZE_UNIT (TREE_TYPE (fndecl));
7712 rtx size_rtx = GEN_INT (TREE_INT_CST_LOW (size) & 0xfff);
7713 /* Construct a temporary return value. */
7714 rtx temp_val
7715 = assign_stack_local (Pmode, TREE_INT_CST_LOW (size), 0);
7716
7717 /* Implement SPARC 32-bit psABI callee return struct checking:
7718
7719 Fetch the instruction where we will return to and see if
7720 it's an unimp instruction (the most significant 10 bits
7721 will be zero). */
7722 emit_move_insn (scratch, gen_rtx_MEM (SImode,
7723 plus_constant (Pmode,
7724 ret_reg, 8)));
7725 /* Assume the size is valid and pre-adjust. */
7726 emit_insn (gen_add3_insn (ret_reg, ret_reg, GEN_INT (4)));
7727 emit_cmp_and_jump_insns (scratch, size_rtx, EQ, const0_rtx, SImode,
7728 0, endlab);
7729 emit_insn (gen_sub3_insn (ret_reg, ret_reg, GEN_INT (4)));
7730 /* Write the address of the memory pointed to by temp_val into
7731 the memory pointed to by mem. */
7732 emit_move_insn (mem, XEXP (temp_val, 0));
7733 emit_label (endlab);
7734 }
7735
7736 return mem;
7737 }
7738 }
7739
7740 /* Handle TARGET_FUNCTION_VALUE, and TARGET_LIBCALL_VALUE target hook.
7741 For v9, function return values are subject to the same rules as arguments,
7742 except that up to 32 bytes may be returned in registers. */
7743
7744 static rtx
sparc_function_value_1(const_tree type,machine_mode mode,bool outgoing)7745 sparc_function_value_1 (const_tree type, machine_mode mode, bool outgoing)
7746 {
7747 /* Beware that the two values are swapped here wrt function_arg. */
7748 const int regbase
7749 = outgoing ? SPARC_INCOMING_INT_ARG_FIRST : SPARC_OUTGOING_INT_ARG_FIRST;
7750 enum mode_class mclass = GET_MODE_CLASS (mode);
7751 int regno;
7752
7753 /* Integer vectors are handled like floats as per the Sun VIS SDK.
7754 Note that integer vectors larger than 16 bytes have BLKmode so
7755 they need to be handled like floating-point vectors below. */
7756 if (type && VECTOR_INTEGER_TYPE_P (type) && mode != BLKmode)
7757 mclass = MODE_FLOAT;
7758
7759 if (TARGET_ARCH64 && type)
7760 {
7761 /* Structures up to 32 bytes in size are returned in registers. */
7762 if (TREE_CODE (type) == RECORD_TYPE)
7763 {
7764 const int size = int_size_in_bytes (type);
7765 gcc_assert (size <= 32);
7766
7767 return function_arg_record_value (type, mode, 0, true, regbase);
7768 }
7769
7770 /* Unions up to 32 bytes in size are returned in integer registers. */
7771 else if (TREE_CODE (type) == UNION_TYPE)
7772 {
7773 const int size = int_size_in_bytes (type);
7774 gcc_assert (size <= 32);
7775
7776 return function_arg_union_value (size, mode, 0, regbase);
7777 }
7778
7779 /* Vectors up to 32 bytes are returned in FP registers. */
7780 else if (VECTOR_TYPE_P (type) && mode == BLKmode)
7781 {
7782 const int size = int_size_in_bytes (type);
7783 gcc_assert (size <= 32);
7784
7785 return function_arg_vector_value (size, 0, true, SPARC_FP_ARG_FIRST);
7786 }
7787
7788 /* Objects that require it are returned in FP registers. */
7789 else if (mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT)
7790 ;
7791
7792 /* All other aggregate types are returned in an integer register in a
7793 mode corresponding to the size of the type. */
7794 else if (AGGREGATE_TYPE_P (type))
7795 {
7796 /* All other aggregate types are passed in an integer register
7797 in a mode corresponding to the size of the type. */
7798 const int size = int_size_in_bytes (type);
7799 gcc_assert (size <= 32);
7800
7801 mode = int_mode_for_size (size * BITS_PER_UNIT, 0).else_blk ();
7802
7803 /* ??? We probably should have made the same ABI change in
7804 3.4.0 as the one we made for unions. The latter was
7805 required by the SCD though, while the former is not
7806 specified, so we favored compatibility and efficiency.
7807
7808 Now we're stuck for aggregates larger than 16 bytes,
7809 because OImode vanished in the meantime. Let's not
7810 try to be unduly clever, and simply follow the ABI
7811 for unions in that case. */
7812 if (mode == BLKmode)
7813 return function_arg_union_value (size, mode, 0, regbase);
7814 else
7815 mclass = MODE_INT;
7816 }
7817
7818 /* We should only have pointer and integer types at this point. This
7819 must match sparc_promote_function_mode. */
7820 else if (mclass == MODE_INT && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
7821 mode = word_mode;
7822 }
7823
7824 /* We should only have pointer and integer types at this point, except with
7825 -freg-struct-return. This must match sparc_promote_function_mode. */
7826 else if (TARGET_ARCH32
7827 && !(type && AGGREGATE_TYPE_P (type))
7828 && mclass == MODE_INT
7829 && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
7830 mode = word_mode;
7831
7832 if ((mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT) && TARGET_FPU)
7833 regno = SPARC_FP_ARG_FIRST;
7834 else
7835 regno = regbase;
7836
7837 return gen_rtx_REG (mode, regno);
7838 }
7839
7840 /* Handle TARGET_FUNCTION_VALUE.
7841 On the SPARC, the value is found in the first "output" register, but the
7842 called function leaves it in the first "input" register. */
7843
7844 static rtx
sparc_function_value(const_tree valtype,const_tree fn_decl_or_type ATTRIBUTE_UNUSED,bool outgoing)7845 sparc_function_value (const_tree valtype,
7846 const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
7847 bool outgoing)
7848 {
7849 return sparc_function_value_1 (valtype, TYPE_MODE (valtype), outgoing);
7850 }
7851
7852 /* Handle TARGET_LIBCALL_VALUE. */
7853
7854 static rtx
sparc_libcall_value(machine_mode mode,const_rtx fun ATTRIBUTE_UNUSED)7855 sparc_libcall_value (machine_mode mode,
7856 const_rtx fun ATTRIBUTE_UNUSED)
7857 {
7858 return sparc_function_value_1 (NULL_TREE, mode, false);
7859 }
7860
7861 /* Handle FUNCTION_VALUE_REGNO_P.
7862 On the SPARC, the first "output" reg is used for integer values, and the
7863 first floating point register is used for floating point values. */
7864
7865 static bool
sparc_function_value_regno_p(const unsigned int regno)7866 sparc_function_value_regno_p (const unsigned int regno)
7867 {
7868 return (regno == 8 || (TARGET_FPU && regno == 32));
7869 }
7870
7871 /* Do what is necessary for `va_start'. We look at the current function
7872 to determine if stdarg or varargs is used and return the address of
7873 the first unnamed parameter. */
7874
7875 static rtx
sparc_builtin_saveregs(void)7876 sparc_builtin_saveregs (void)
7877 {
7878 int first_reg = crtl->args.info.words;
7879 rtx address;
7880 int regno;
7881
7882 for (regno = first_reg; regno < SPARC_INT_ARG_MAX; regno++)
7883 emit_move_insn (gen_rtx_MEM (word_mode,
7884 gen_rtx_PLUS (Pmode,
7885 frame_pointer_rtx,
7886 GEN_INT (FIRST_PARM_OFFSET (0)
7887 + (UNITS_PER_WORD
7888 * regno)))),
7889 gen_rtx_REG (word_mode,
7890 SPARC_INCOMING_INT_ARG_FIRST + regno));
7891
7892 address = gen_rtx_PLUS (Pmode,
7893 frame_pointer_rtx,
7894 GEN_INT (FIRST_PARM_OFFSET (0)
7895 + UNITS_PER_WORD * first_reg));
7896
7897 return address;
7898 }
7899
7900 /* Implement `va_start' for stdarg. */
7901
7902 static void
sparc_va_start(tree valist,rtx nextarg)7903 sparc_va_start (tree valist, rtx nextarg)
7904 {
7905 nextarg = expand_builtin_saveregs ();
7906 std_expand_builtin_va_start (valist, nextarg);
7907 }
7908
7909 /* Implement `va_arg' for stdarg. */
7910
7911 static tree
sparc_gimplify_va_arg(tree valist,tree type,gimple_seq * pre_p,gimple_seq * post_p)7912 sparc_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
7913 gimple_seq *post_p)
7914 {
7915 HOST_WIDE_INT size, rsize, align;
7916 tree addr, incr;
7917 bool indirect;
7918 tree ptrtype = build_pointer_type (type);
7919
7920 if (pass_va_arg_by_reference (type))
7921 {
7922 indirect = true;
7923 size = rsize = UNITS_PER_WORD;
7924 align = 0;
7925 }
7926 else
7927 {
7928 indirect = false;
7929 size = int_size_in_bytes (type);
7930 rsize = ROUND_UP (size, UNITS_PER_WORD);
7931 align = 0;
7932
7933 if (TARGET_ARCH64)
7934 {
7935 /* For SPARC64, objects requiring 16-byte alignment get it. */
7936 if (TYPE_ALIGN (type) >= 2 * (unsigned) BITS_PER_WORD)
7937 align = 2 * UNITS_PER_WORD;
7938
7939 /* SPARC-V9 ABI states that structures up to 16 bytes in size
7940 are left-justified in their slots. */
7941 if (AGGREGATE_TYPE_P (type))
7942 {
7943 if (size == 0)
7944 size = rsize = UNITS_PER_WORD;
7945 else
7946 size = rsize;
7947 }
7948 }
7949 }
7950
7951 incr = valist;
7952 if (align)
7953 {
7954 incr = fold_build_pointer_plus_hwi (incr, align - 1);
7955 incr = fold_convert (sizetype, incr);
7956 incr = fold_build2 (BIT_AND_EXPR, sizetype, incr,
7957 size_int (-align));
7958 incr = fold_convert (ptr_type_node, incr);
7959 }
7960
7961 gimplify_expr (&incr, pre_p, post_p, is_gimple_val, fb_rvalue);
7962 addr = incr;
7963
7964 if (BYTES_BIG_ENDIAN && size < rsize)
7965 addr = fold_build_pointer_plus_hwi (incr, rsize - size);
7966
7967 if (indirect)
7968 {
7969 addr = fold_convert (build_pointer_type (ptrtype), addr);
7970 addr = build_va_arg_indirect_ref (addr);
7971 }
7972
7973 /* If the address isn't aligned properly for the type, we need a temporary.
7974 FIXME: This is inefficient, usually we can do this in registers. */
7975 else if (align == 0 && TYPE_ALIGN (type) > BITS_PER_WORD)
7976 {
7977 tree tmp = create_tmp_var (type, "va_arg_tmp");
7978 tree dest_addr = build_fold_addr_expr (tmp);
7979 tree copy = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
7980 3, dest_addr, addr, size_int (rsize));
7981 TREE_ADDRESSABLE (tmp) = 1;
7982 gimplify_and_add (copy, pre_p);
7983 addr = dest_addr;
7984 }
7985
7986 else
7987 addr = fold_convert (ptrtype, addr);
7988
7989 incr = fold_build_pointer_plus_hwi (incr, rsize);
7990 gimplify_assign (valist, incr, post_p);
7991
7992 return build_va_arg_indirect_ref (addr);
7993 }
7994
7995 /* Implement the TARGET_VECTOR_MODE_SUPPORTED_P target hook.
7996 Specify whether the vector mode is supported by the hardware. */
7997
7998 static bool
sparc_vector_mode_supported_p(machine_mode mode)7999 sparc_vector_mode_supported_p (machine_mode mode)
8000 {
8001 return TARGET_VIS && VECTOR_MODE_P (mode) ? true : false;
8002 }
8003
8004 /* Implement the TARGET_VECTORIZE_PREFERRED_SIMD_MODE target hook. */
8005
8006 static machine_mode
sparc_preferred_simd_mode(scalar_mode mode)8007 sparc_preferred_simd_mode (scalar_mode mode)
8008 {
8009 if (TARGET_VIS)
8010 switch (mode)
8011 {
8012 case E_SImode:
8013 return V2SImode;
8014 case E_HImode:
8015 return V4HImode;
8016 case E_QImode:
8017 return V8QImode;
8018
8019 default:;
8020 }
8021
8022 return word_mode;
8023 }
8024
8025 /* Implement TARGET_CAN_FOLLOW_JUMP. */
8026
8027 static bool
sparc_can_follow_jump(const rtx_insn * follower,const rtx_insn * followee)8028 sparc_can_follow_jump (const rtx_insn *follower, const rtx_insn *followee)
8029 {
8030 /* Do not fold unconditional jumps that have been created for crossing
8031 partition boundaries. */
8032 if (CROSSING_JUMP_P (followee) && !CROSSING_JUMP_P (follower))
8033 return false;
8034
8035 return true;
8036 }
8037
8038 /* Return the string to output an unconditional branch to LABEL, which is
8039 the operand number of the label.
8040
8041 DEST is the destination insn (i.e. the label), INSN is the source. */
8042
8043 const char *
output_ubranch(rtx dest,rtx_insn * insn)8044 output_ubranch (rtx dest, rtx_insn *insn)
8045 {
8046 static char string[64];
8047 bool v9_form = false;
8048 int delta;
8049 char *p;
8050
8051 /* Even if we are trying to use cbcond for this, evaluate
8052 whether we can use V9 branches as our backup plan. */
8053 delta = 5000000;
8054 if (!CROSSING_JUMP_P (insn) && INSN_ADDRESSES_SET_P ())
8055 delta = (INSN_ADDRESSES (INSN_UID (dest))
8056 - INSN_ADDRESSES (INSN_UID (insn)));
8057
8058 /* Leave some instructions for "slop". */
8059 if (TARGET_V9 && delta >= -260000 && delta < 260000)
8060 v9_form = true;
8061
8062 if (TARGET_CBCOND)
8063 {
8064 bool emit_nop = emit_cbcond_nop (insn);
8065 bool far = false;
8066 const char *rval;
8067
8068 if (delta < -500 || delta > 500)
8069 far = true;
8070
8071 if (far)
8072 {
8073 if (v9_form)
8074 rval = "ba,a,pt\t%%xcc, %l0";
8075 else
8076 rval = "b,a\t%l0";
8077 }
8078 else
8079 {
8080 if (emit_nop)
8081 rval = "cwbe\t%%g0, %%g0, %l0\n\tnop";
8082 else
8083 rval = "cwbe\t%%g0, %%g0, %l0";
8084 }
8085 return rval;
8086 }
8087
8088 if (v9_form)
8089 strcpy (string, "ba%*,pt\t%%xcc, ");
8090 else
8091 strcpy (string, "b%*\t");
8092
8093 p = strchr (string, '\0');
8094 *p++ = '%';
8095 *p++ = 'l';
8096 *p++ = '0';
8097 *p++ = '%';
8098 *p++ = '(';
8099 *p = '\0';
8100
8101 return string;
8102 }
8103
8104 /* Return the string to output a conditional branch to LABEL, which is
8105 the operand number of the label. OP is the conditional expression.
8106 XEXP (OP, 0) is assumed to be a condition code register (integer or
8107 floating point) and its mode specifies what kind of comparison we made.
8108
8109 DEST is the destination insn (i.e. the label), INSN is the source.
8110
8111 REVERSED is nonzero if we should reverse the sense of the comparison.
8112
8113 ANNUL is nonzero if we should generate an annulling branch. */
8114
8115 const char *
output_cbranch(rtx op,rtx dest,int label,int reversed,int annul,rtx_insn * insn)8116 output_cbranch (rtx op, rtx dest, int label, int reversed, int annul,
8117 rtx_insn *insn)
8118 {
8119 static char string[64];
8120 enum rtx_code code = GET_CODE (op);
8121 rtx cc_reg = XEXP (op, 0);
8122 machine_mode mode = GET_MODE (cc_reg);
8123 const char *labelno, *branch;
8124 int spaces = 8, far;
8125 char *p;
8126
8127 /* v9 branches are limited to +-1MB. If it is too far away,
8128 change
8129
8130 bne,pt %xcc, .LC30
8131
8132 to
8133
8134 be,pn %xcc, .+12
8135 nop
8136 ba .LC30
8137
8138 and
8139
8140 fbne,a,pn %fcc2, .LC29
8141
8142 to
8143
8144 fbe,pt %fcc2, .+16
8145 nop
8146 ba .LC29 */
8147
8148 far = TARGET_V9 && (get_attr_length (insn) >= 3);
8149 if (reversed ^ far)
8150 {
8151 /* Reversal of FP compares takes care -- an ordered compare
8152 becomes an unordered compare and vice versa. */
8153 if (mode == CCFPmode || mode == CCFPEmode)
8154 code = reverse_condition_maybe_unordered (code);
8155 else
8156 code = reverse_condition (code);
8157 }
8158
8159 /* Start by writing the branch condition. */
8160 if (mode == CCFPmode || mode == CCFPEmode)
8161 {
8162 switch (code)
8163 {
8164 case NE:
8165 branch = "fbne";
8166 break;
8167 case EQ:
8168 branch = "fbe";
8169 break;
8170 case GE:
8171 branch = "fbge";
8172 break;
8173 case GT:
8174 branch = "fbg";
8175 break;
8176 case LE:
8177 branch = "fble";
8178 break;
8179 case LT:
8180 branch = "fbl";
8181 break;
8182 case UNORDERED:
8183 branch = "fbu";
8184 break;
8185 case ORDERED:
8186 branch = "fbo";
8187 break;
8188 case UNGT:
8189 branch = "fbug";
8190 break;
8191 case UNLT:
8192 branch = "fbul";
8193 break;
8194 case UNEQ:
8195 branch = "fbue";
8196 break;
8197 case UNGE:
8198 branch = "fbuge";
8199 break;
8200 case UNLE:
8201 branch = "fbule";
8202 break;
8203 case LTGT:
8204 branch = "fblg";
8205 break;
8206 default:
8207 gcc_unreachable ();
8208 }
8209
8210 /* ??? !v9: FP branches cannot be preceded by another floating point
8211 insn. Because there is currently no concept of pre-delay slots,
8212 we can fix this only by always emitting a nop before a floating
8213 point branch. */
8214
8215 string[0] = '\0';
8216 if (! TARGET_V9)
8217 strcpy (string, "nop\n\t");
8218 strcat (string, branch);
8219 }
8220 else
8221 {
8222 switch (code)
8223 {
8224 case NE:
8225 if (mode == CCVmode || mode == CCXVmode)
8226 branch = "bvs";
8227 else
8228 branch = "bne";
8229 break;
8230 case EQ:
8231 if (mode == CCVmode || mode == CCXVmode)
8232 branch = "bvc";
8233 else
8234 branch = "be";
8235 break;
8236 case GE:
8237 if (mode == CCNZmode || mode == CCXNZmode)
8238 branch = "bpos";
8239 else
8240 branch = "bge";
8241 break;
8242 case GT:
8243 branch = "bg";
8244 break;
8245 case LE:
8246 branch = "ble";
8247 break;
8248 case LT:
8249 if (mode == CCNZmode || mode == CCXNZmode)
8250 branch = "bneg";
8251 else
8252 branch = "bl";
8253 break;
8254 case GEU:
8255 branch = "bgeu";
8256 break;
8257 case GTU:
8258 branch = "bgu";
8259 break;
8260 case LEU:
8261 branch = "bleu";
8262 break;
8263 case LTU:
8264 branch = "blu";
8265 break;
8266 default:
8267 gcc_unreachable ();
8268 }
8269 strcpy (string, branch);
8270 }
8271 spaces -= strlen (branch);
8272 p = strchr (string, '\0');
8273
8274 /* Now add the annulling, the label, and a possible noop. */
8275 if (annul && ! far)
8276 {
8277 strcpy (p, ",a");
8278 p += 2;
8279 spaces -= 2;
8280 }
8281
8282 if (TARGET_V9)
8283 {
8284 rtx note;
8285 int v8 = 0;
8286
8287 if (! far && insn && INSN_ADDRESSES_SET_P ())
8288 {
8289 int delta = (INSN_ADDRESSES (INSN_UID (dest))
8290 - INSN_ADDRESSES (INSN_UID (insn)));
8291 /* Leave some instructions for "slop". */
8292 if (delta < -260000 || delta >= 260000)
8293 v8 = 1;
8294 }
8295
8296 switch (mode)
8297 {
8298 case E_CCmode:
8299 case E_CCNZmode:
8300 case E_CCCmode:
8301 case E_CCVmode:
8302 labelno = "%%icc, ";
8303 if (v8)
8304 labelno = "";
8305 break;
8306 case E_CCXmode:
8307 case E_CCXNZmode:
8308 case E_CCXCmode:
8309 case E_CCXVmode:
8310 labelno = "%%xcc, ";
8311 gcc_assert (!v8);
8312 break;
8313 case E_CCFPmode:
8314 case E_CCFPEmode:
8315 {
8316 static char v9_fcc_labelno[] = "%%fccX, ";
8317 /* Set the char indicating the number of the fcc reg to use. */
8318 v9_fcc_labelno[5] = REGNO (cc_reg) - SPARC_FIRST_V9_FCC_REG + '0';
8319 labelno = v9_fcc_labelno;
8320 if (v8)
8321 {
8322 gcc_assert (REGNO (cc_reg) == SPARC_FCC_REG);
8323 labelno = "";
8324 }
8325 }
8326 break;
8327 default:
8328 gcc_unreachable ();
8329 }
8330
8331 if (*labelno && insn && (note = find_reg_note (insn, REG_BR_PROB, NULL_RTX)))
8332 {
8333 strcpy (p,
8334 ((profile_probability::from_reg_br_prob_note (XINT (note, 0))
8335 >= profile_probability::even ()) ^ far)
8336 ? ",pt" : ",pn");
8337 p += 3;
8338 spaces -= 3;
8339 }
8340 }
8341 else
8342 labelno = "";
8343
8344 if (spaces > 0)
8345 *p++ = '\t';
8346 else
8347 *p++ = ' ';
8348 strcpy (p, labelno);
8349 p = strchr (p, '\0');
8350 if (far)
8351 {
8352 strcpy (p, ".+12\n\t nop\n\tb\t");
8353 /* Skip the next insn if requested or
8354 if we know that it will be a nop. */
8355 if (annul || ! final_sequence)
8356 p[3] = '6';
8357 p += 14;
8358 }
8359 *p++ = '%';
8360 *p++ = 'l';
8361 *p++ = label + '0';
8362 *p++ = '%';
8363 *p++ = '#';
8364 *p = '\0';
8365
8366 return string;
8367 }
8368
8369 /* Emit a library call comparison between floating point X and Y.
8370 COMPARISON is the operator to compare with (EQ, NE, GT, etc).
8371 Return the new operator to be used in the comparison sequence.
8372
8373 TARGET_ARCH64 uses _Qp_* functions, which use pointers to TFmode
8374 values as arguments instead of the TFmode registers themselves,
8375 that's why we cannot call emit_float_lib_cmp. */
8376
8377 rtx
sparc_emit_float_lib_cmp(rtx x,rtx y,enum rtx_code comparison)8378 sparc_emit_float_lib_cmp (rtx x, rtx y, enum rtx_code comparison)
8379 {
8380 const char *qpfunc;
8381 rtx slot0, slot1, result, tem, tem2, libfunc;
8382 machine_mode mode;
8383 enum rtx_code new_comparison;
8384
8385 switch (comparison)
8386 {
8387 case EQ:
8388 qpfunc = (TARGET_ARCH64 ? "_Qp_feq" : "_Q_feq");
8389 break;
8390
8391 case NE:
8392 qpfunc = (TARGET_ARCH64 ? "_Qp_fne" : "_Q_fne");
8393 break;
8394
8395 case GT:
8396 qpfunc = (TARGET_ARCH64 ? "_Qp_fgt" : "_Q_fgt");
8397 break;
8398
8399 case GE:
8400 qpfunc = (TARGET_ARCH64 ? "_Qp_fge" : "_Q_fge");
8401 break;
8402
8403 case LT:
8404 qpfunc = (TARGET_ARCH64 ? "_Qp_flt" : "_Q_flt");
8405 break;
8406
8407 case LE:
8408 qpfunc = (TARGET_ARCH64 ? "_Qp_fle" : "_Q_fle");
8409 break;
8410
8411 case ORDERED:
8412 case UNORDERED:
8413 case UNGT:
8414 case UNLT:
8415 case UNEQ:
8416 case UNGE:
8417 case UNLE:
8418 case LTGT:
8419 qpfunc = (TARGET_ARCH64 ? "_Qp_cmp" : "_Q_cmp");
8420 break;
8421
8422 default:
8423 gcc_unreachable ();
8424 }
8425
8426 if (TARGET_ARCH64)
8427 {
8428 if (MEM_P (x))
8429 {
8430 tree expr = MEM_EXPR (x);
8431 if (expr)
8432 mark_addressable (expr);
8433 slot0 = x;
8434 }
8435 else
8436 {
8437 slot0 = assign_stack_temp (TFmode, GET_MODE_SIZE(TFmode));
8438 emit_move_insn (slot0, x);
8439 }
8440
8441 if (MEM_P (y))
8442 {
8443 tree expr = MEM_EXPR (y);
8444 if (expr)
8445 mark_addressable (expr);
8446 slot1 = y;
8447 }
8448 else
8449 {
8450 slot1 = assign_stack_temp (TFmode, GET_MODE_SIZE(TFmode));
8451 emit_move_insn (slot1, y);
8452 }
8453
8454 libfunc = gen_rtx_SYMBOL_REF (Pmode, qpfunc);
8455 emit_library_call (libfunc, LCT_NORMAL,
8456 DImode,
8457 XEXP (slot0, 0), Pmode,
8458 XEXP (slot1, 0), Pmode);
8459 mode = DImode;
8460 }
8461 else
8462 {
8463 libfunc = gen_rtx_SYMBOL_REF (Pmode, qpfunc);
8464 emit_library_call (libfunc, LCT_NORMAL,
8465 SImode,
8466 x, TFmode, y, TFmode);
8467 mode = SImode;
8468 }
8469
8470
8471 /* Immediately move the result of the libcall into a pseudo
8472 register so reload doesn't clobber the value if it needs
8473 the return register for a spill reg. */
8474 result = gen_reg_rtx (mode);
8475 emit_move_insn (result, hard_libcall_value (mode, libfunc));
8476
8477 switch (comparison)
8478 {
8479 default:
8480 return gen_rtx_NE (VOIDmode, result, const0_rtx);
8481 case ORDERED:
8482 case UNORDERED:
8483 new_comparison = (comparison == UNORDERED ? EQ : NE);
8484 return gen_rtx_fmt_ee (new_comparison, VOIDmode, result, GEN_INT(3));
8485 case UNGT:
8486 case UNGE:
8487 new_comparison = (comparison == UNGT ? GT : NE);
8488 return gen_rtx_fmt_ee (new_comparison, VOIDmode, result, const1_rtx);
8489 case UNLE:
8490 return gen_rtx_NE (VOIDmode, result, const2_rtx);
8491 case UNLT:
8492 tem = gen_reg_rtx (mode);
8493 if (TARGET_ARCH32)
8494 emit_insn (gen_andsi3 (tem, result, const1_rtx));
8495 else
8496 emit_insn (gen_anddi3 (tem, result, const1_rtx));
8497 return gen_rtx_NE (VOIDmode, tem, const0_rtx);
8498 case UNEQ:
8499 case LTGT:
8500 tem = gen_reg_rtx (mode);
8501 if (TARGET_ARCH32)
8502 emit_insn (gen_addsi3 (tem, result, const1_rtx));
8503 else
8504 emit_insn (gen_adddi3 (tem, result, const1_rtx));
8505 tem2 = gen_reg_rtx (mode);
8506 if (TARGET_ARCH32)
8507 emit_insn (gen_andsi3 (tem2, tem, const2_rtx));
8508 else
8509 emit_insn (gen_anddi3 (tem2, tem, const2_rtx));
8510 new_comparison = (comparison == UNEQ ? EQ : NE);
8511 return gen_rtx_fmt_ee (new_comparison, VOIDmode, tem2, const0_rtx);
8512 }
8513
8514 gcc_unreachable ();
8515 }
8516
8517 /* Generate an unsigned DImode to FP conversion. This is the same code
8518 optabs would emit if we didn't have TFmode patterns. */
8519
8520 void
sparc_emit_floatunsdi(rtx * operands,machine_mode mode)8521 sparc_emit_floatunsdi (rtx *operands, machine_mode mode)
8522 {
8523 rtx i0, i1, f0, in, out;
8524
8525 out = operands[0];
8526 in = force_reg (DImode, operands[1]);
8527 rtx_code_label *neglab = gen_label_rtx ();
8528 rtx_code_label *donelab = gen_label_rtx ();
8529 i0 = gen_reg_rtx (DImode);
8530 i1 = gen_reg_rtx (DImode);
8531 f0 = gen_reg_rtx (mode);
8532
8533 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, DImode, 0, neglab);
8534
8535 emit_insn (gen_rtx_SET (out, gen_rtx_FLOAT (mode, in)));
8536 emit_jump_insn (gen_jump (donelab));
8537 emit_barrier ();
8538
8539 emit_label (neglab);
8540
8541 emit_insn (gen_lshrdi3 (i0, in, const1_rtx));
8542 emit_insn (gen_anddi3 (i1, in, const1_rtx));
8543 emit_insn (gen_iordi3 (i0, i0, i1));
8544 emit_insn (gen_rtx_SET (f0, gen_rtx_FLOAT (mode, i0)));
8545 emit_insn (gen_rtx_SET (out, gen_rtx_PLUS (mode, f0, f0)));
8546
8547 emit_label (donelab);
8548 }
8549
8550 /* Generate an FP to unsigned DImode conversion. This is the same code
8551 optabs would emit if we didn't have TFmode patterns. */
8552
8553 void
sparc_emit_fixunsdi(rtx * operands,machine_mode mode)8554 sparc_emit_fixunsdi (rtx *operands, machine_mode mode)
8555 {
8556 rtx i0, i1, f0, in, out, limit;
8557
8558 out = operands[0];
8559 in = force_reg (mode, operands[1]);
8560 rtx_code_label *neglab = gen_label_rtx ();
8561 rtx_code_label *donelab = gen_label_rtx ();
8562 i0 = gen_reg_rtx (DImode);
8563 i1 = gen_reg_rtx (DImode);
8564 limit = gen_reg_rtx (mode);
8565 f0 = gen_reg_rtx (mode);
8566
8567 emit_move_insn (limit,
8568 const_double_from_real_value (
8569 REAL_VALUE_ATOF ("9223372036854775808.0", mode), mode));
8570 emit_cmp_and_jump_insns (in, limit, GE, NULL_RTX, mode, 0, neglab);
8571
8572 emit_insn (gen_rtx_SET (out,
8573 gen_rtx_FIX (DImode, gen_rtx_FIX (mode, in))));
8574 emit_jump_insn (gen_jump (donelab));
8575 emit_barrier ();
8576
8577 emit_label (neglab);
8578
8579 emit_insn (gen_rtx_SET (f0, gen_rtx_MINUS (mode, in, limit)));
8580 emit_insn (gen_rtx_SET (i0,
8581 gen_rtx_FIX (DImode, gen_rtx_FIX (mode, f0))));
8582 emit_insn (gen_movdi (i1, const1_rtx));
8583 emit_insn (gen_ashldi3 (i1, i1, GEN_INT (63)));
8584 emit_insn (gen_xordi3 (out, i0, i1));
8585
8586 emit_label (donelab);
8587 }
8588
8589 /* Return the string to output a compare and branch instruction to DEST.
8590 DEST is the destination insn (i.e. the label), INSN is the source,
8591 and OP is the conditional expression. */
8592
8593 const char *
output_cbcond(rtx op,rtx dest,rtx_insn * insn)8594 output_cbcond (rtx op, rtx dest, rtx_insn *insn)
8595 {
8596 machine_mode mode = GET_MODE (XEXP (op, 0));
8597 enum rtx_code code = GET_CODE (op);
8598 const char *cond_str, *tmpl;
8599 int far, emit_nop, len;
8600 static char string[64];
8601 char size_char;
8602
8603 /* Compare and Branch is limited to +-2KB. If it is too far away,
8604 change
8605
8606 cxbne X, Y, .LC30
8607
8608 to
8609
8610 cxbe X, Y, .+16
8611 nop
8612 ba,pt xcc, .LC30
8613 nop */
8614
8615 len = get_attr_length (insn);
8616
8617 far = len == 4;
8618 emit_nop = len == 2;
8619
8620 if (far)
8621 code = reverse_condition (code);
8622
8623 size_char = ((mode == SImode) ? 'w' : 'x');
8624
8625 switch (code)
8626 {
8627 case NE:
8628 cond_str = "ne";
8629 break;
8630
8631 case EQ:
8632 cond_str = "e";
8633 break;
8634
8635 case GE:
8636 cond_str = "ge";
8637 break;
8638
8639 case GT:
8640 cond_str = "g";
8641 break;
8642
8643 case LE:
8644 cond_str = "le";
8645 break;
8646
8647 case LT:
8648 cond_str = "l";
8649 break;
8650
8651 case GEU:
8652 cond_str = "cc";
8653 break;
8654
8655 case GTU:
8656 cond_str = "gu";
8657 break;
8658
8659 case LEU:
8660 cond_str = "leu";
8661 break;
8662
8663 case LTU:
8664 cond_str = "cs";
8665 break;
8666
8667 default:
8668 gcc_unreachable ();
8669 }
8670
8671 if (far)
8672 {
8673 int veryfar = 1, delta;
8674
8675 if (INSN_ADDRESSES_SET_P ())
8676 {
8677 delta = (INSN_ADDRESSES (INSN_UID (dest))
8678 - INSN_ADDRESSES (INSN_UID (insn)));
8679 /* Leave some instructions for "slop". */
8680 if (delta >= -260000 && delta < 260000)
8681 veryfar = 0;
8682 }
8683
8684 if (veryfar)
8685 tmpl = "c%cb%s\t%%1, %%2, .+16\n\tnop\n\tb\t%%3\n\tnop";
8686 else
8687 tmpl = "c%cb%s\t%%1, %%2, .+16\n\tnop\n\tba,pt\t%%%%xcc, %%3\n\tnop";
8688 }
8689 else
8690 {
8691 if (emit_nop)
8692 tmpl = "c%cb%s\t%%1, %%2, %%3\n\tnop";
8693 else
8694 tmpl = "c%cb%s\t%%1, %%2, %%3";
8695 }
8696
8697 snprintf (string, sizeof(string), tmpl, size_char, cond_str);
8698
8699 return string;
8700 }
8701
8702 /* Return the string to output a conditional branch to LABEL, testing
8703 register REG. LABEL is the operand number of the label; REG is the
8704 operand number of the reg. OP is the conditional expression. The mode
8705 of REG says what kind of comparison we made.
8706
8707 DEST is the destination insn (i.e. the label), INSN is the source.
8708
8709 REVERSED is nonzero if we should reverse the sense of the comparison.
8710
8711 ANNUL is nonzero if we should generate an annulling branch. */
8712
8713 const char *
output_v9branch(rtx op,rtx dest,int reg,int label,int reversed,int annul,rtx_insn * insn)8714 output_v9branch (rtx op, rtx dest, int reg, int label, int reversed,
8715 int annul, rtx_insn *insn)
8716 {
8717 static char string[64];
8718 enum rtx_code code = GET_CODE (op);
8719 machine_mode mode = GET_MODE (XEXP (op, 0));
8720 rtx note;
8721 int far;
8722 char *p;
8723
8724 /* branch on register are limited to +-128KB. If it is too far away,
8725 change
8726
8727 brnz,pt %g1, .LC30
8728
8729 to
8730
8731 brz,pn %g1, .+12
8732 nop
8733 ba,pt %xcc, .LC30
8734
8735 and
8736
8737 brgez,a,pn %o1, .LC29
8738
8739 to
8740
8741 brlz,pt %o1, .+16
8742 nop
8743 ba,pt %xcc, .LC29 */
8744
8745 far = get_attr_length (insn) >= 3;
8746
8747 /* If not floating-point or if EQ or NE, we can just reverse the code. */
8748 if (reversed ^ far)
8749 code = reverse_condition (code);
8750
8751 /* Only 64-bit versions of these instructions exist. */
8752 gcc_assert (mode == DImode);
8753
8754 /* Start by writing the branch condition. */
8755
8756 switch (code)
8757 {
8758 case NE:
8759 strcpy (string, "brnz");
8760 break;
8761
8762 case EQ:
8763 strcpy (string, "brz");
8764 break;
8765
8766 case GE:
8767 strcpy (string, "brgez");
8768 break;
8769
8770 case LT:
8771 strcpy (string, "brlz");
8772 break;
8773
8774 case LE:
8775 strcpy (string, "brlez");
8776 break;
8777
8778 case GT:
8779 strcpy (string, "brgz");
8780 break;
8781
8782 default:
8783 gcc_unreachable ();
8784 }
8785
8786 p = strchr (string, '\0');
8787
8788 /* Now add the annulling, reg, label, and nop. */
8789 if (annul && ! far)
8790 {
8791 strcpy (p, ",a");
8792 p += 2;
8793 }
8794
8795 if (insn && (note = find_reg_note (insn, REG_BR_PROB, NULL_RTX)))
8796 {
8797 strcpy (p,
8798 ((profile_probability::from_reg_br_prob_note (XINT (note, 0))
8799 >= profile_probability::even ()) ^ far)
8800 ? ",pt" : ",pn");
8801 p += 3;
8802 }
8803
8804 *p = p < string + 8 ? '\t' : ' ';
8805 p++;
8806 *p++ = '%';
8807 *p++ = '0' + reg;
8808 *p++ = ',';
8809 *p++ = ' ';
8810 if (far)
8811 {
8812 int veryfar = 1, delta;
8813
8814 if (INSN_ADDRESSES_SET_P ())
8815 {
8816 delta = (INSN_ADDRESSES (INSN_UID (dest))
8817 - INSN_ADDRESSES (INSN_UID (insn)));
8818 /* Leave some instructions for "slop". */
8819 if (delta >= -260000 && delta < 260000)
8820 veryfar = 0;
8821 }
8822
8823 strcpy (p, ".+12\n\t nop\n\t");
8824 /* Skip the next insn if requested or
8825 if we know that it will be a nop. */
8826 if (annul || ! final_sequence)
8827 p[3] = '6';
8828 p += 12;
8829 if (veryfar)
8830 {
8831 strcpy (p, "b\t");
8832 p += 2;
8833 }
8834 else
8835 {
8836 strcpy (p, "ba,pt\t%%xcc, ");
8837 p += 13;
8838 }
8839 }
8840 *p++ = '%';
8841 *p++ = 'l';
8842 *p++ = '0' + label;
8843 *p++ = '%';
8844 *p++ = '#';
8845 *p = '\0';
8846
8847 return string;
8848 }
8849
8850 /* Return 1, if any of the registers of the instruction are %l[0-7] or %o[0-7].
8851 Such instructions cannot be used in the delay slot of return insn on v9.
8852 If TEST is 0, also rename all %i[0-7] registers to their %o[0-7] counterparts.
8853 */
8854
8855 static int
epilogue_renumber(rtx * where,int test)8856 epilogue_renumber (rtx *where, int test)
8857 {
8858 const char *fmt;
8859 int i;
8860 enum rtx_code code;
8861
8862 if (*where == 0)
8863 return 0;
8864
8865 code = GET_CODE (*where);
8866
8867 switch (code)
8868 {
8869 case REG:
8870 if (REGNO (*where) >= 8 && REGNO (*where) < 24) /* oX or lX */
8871 return 1;
8872 if (! test && REGNO (*where) >= 24 && REGNO (*where) < 32)
8873 *where = gen_rtx_REG (GET_MODE (*where), OUTGOING_REGNO (REGNO(*where)));
8874 /* fallthrough */
8875 case SCRATCH:
8876 case CC0:
8877 case PC:
8878 case CONST_INT:
8879 case CONST_WIDE_INT:
8880 case CONST_DOUBLE:
8881 return 0;
8882
8883 /* Do not replace the frame pointer with the stack pointer because
8884 it can cause the delayed instruction to load below the stack.
8885 This occurs when instructions like:
8886
8887 (set (reg/i:SI 24 %i0)
8888 (mem/f:SI (plus:SI (reg/f:SI 30 %fp)
8889 (const_int -20 [0xffffffec])) 0))
8890
8891 are in the return delayed slot. */
8892 case PLUS:
8893 if (GET_CODE (XEXP (*where, 0)) == REG
8894 && REGNO (XEXP (*where, 0)) == HARD_FRAME_POINTER_REGNUM
8895 && (GET_CODE (XEXP (*where, 1)) != CONST_INT
8896 || INTVAL (XEXP (*where, 1)) < SPARC_STACK_BIAS))
8897 return 1;
8898 break;
8899
8900 case MEM:
8901 if (SPARC_STACK_BIAS
8902 && GET_CODE (XEXP (*where, 0)) == REG
8903 && REGNO (XEXP (*where, 0)) == HARD_FRAME_POINTER_REGNUM)
8904 return 1;
8905 break;
8906
8907 default:
8908 break;
8909 }
8910
8911 fmt = GET_RTX_FORMAT (code);
8912
8913 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
8914 {
8915 if (fmt[i] == 'E')
8916 {
8917 int j;
8918 for (j = XVECLEN (*where, i) - 1; j >= 0; j--)
8919 if (epilogue_renumber (&(XVECEXP (*where, i, j)), test))
8920 return 1;
8921 }
8922 else if (fmt[i] == 'e'
8923 && epilogue_renumber (&(XEXP (*where, i)), test))
8924 return 1;
8925 }
8926 return 0;
8927 }
8928
8929 /* Leaf functions and non-leaf functions have different needs. */
8930
8931 static const int
8932 reg_leaf_alloc_order[] = REG_LEAF_ALLOC_ORDER;
8933
8934 static const int
8935 reg_nonleaf_alloc_order[] = REG_ALLOC_ORDER;
8936
8937 static const int *const reg_alloc_orders[] = {
8938 reg_leaf_alloc_order,
8939 reg_nonleaf_alloc_order};
8940
8941 void
order_regs_for_local_alloc(void)8942 order_regs_for_local_alloc (void)
8943 {
8944 static int last_order_nonleaf = 1;
8945
8946 if (df_regs_ever_live_p (15) != last_order_nonleaf)
8947 {
8948 last_order_nonleaf = !last_order_nonleaf;
8949 memcpy ((char *) reg_alloc_order,
8950 (const char *) reg_alloc_orders[last_order_nonleaf],
8951 FIRST_PSEUDO_REGISTER * sizeof (int));
8952 }
8953 }
8954
8955 /* Return 1 if REG and MEM are legitimate enough to allow the various
8956 MEM<-->REG splits to be run. */
8957
8958 int
sparc_split_reg_mem_legitimate(rtx reg,rtx mem)8959 sparc_split_reg_mem_legitimate (rtx reg, rtx mem)
8960 {
8961 /* Punt if we are here by mistake. */
8962 gcc_assert (reload_completed);
8963
8964 /* We must have an offsettable memory reference. */
8965 if (!offsettable_memref_p (mem))
8966 return 0;
8967
8968 /* If we have legitimate args for ldd/std, we do not want
8969 the split to happen. */
8970 if ((REGNO (reg) % 2) == 0 && mem_min_alignment (mem, 8))
8971 return 0;
8972
8973 /* Success. */
8974 return 1;
8975 }
8976
8977 /* Split a REG <-- MEM move into a pair of moves in MODE. */
8978
8979 void
sparc_split_reg_mem(rtx dest,rtx src,machine_mode mode)8980 sparc_split_reg_mem (rtx dest, rtx src, machine_mode mode)
8981 {
8982 rtx high_part = gen_highpart (mode, dest);
8983 rtx low_part = gen_lowpart (mode, dest);
8984 rtx word0 = adjust_address (src, mode, 0);
8985 rtx word1 = adjust_address (src, mode, 4);
8986
8987 if (reg_overlap_mentioned_p (high_part, word1))
8988 {
8989 emit_move_insn_1 (low_part, word1);
8990 emit_move_insn_1 (high_part, word0);
8991 }
8992 else
8993 {
8994 emit_move_insn_1 (high_part, word0);
8995 emit_move_insn_1 (low_part, word1);
8996 }
8997 }
8998
8999 /* Split a MEM <-- REG move into a pair of moves in MODE. */
9000
9001 void
sparc_split_mem_reg(rtx dest,rtx src,machine_mode mode)9002 sparc_split_mem_reg (rtx dest, rtx src, machine_mode mode)
9003 {
9004 rtx word0 = adjust_address (dest, mode, 0);
9005 rtx word1 = adjust_address (dest, mode, 4);
9006 rtx high_part = gen_highpart (mode, src);
9007 rtx low_part = gen_lowpart (mode, src);
9008
9009 emit_move_insn_1 (word0, high_part);
9010 emit_move_insn_1 (word1, low_part);
9011 }
9012
9013 /* Like sparc_split_reg_mem_legitimate but for REG <--> REG moves. */
9014
9015 int
sparc_split_reg_reg_legitimate(rtx reg1,rtx reg2)9016 sparc_split_reg_reg_legitimate (rtx reg1, rtx reg2)
9017 {
9018 /* Punt if we are here by mistake. */
9019 gcc_assert (reload_completed);
9020
9021 if (GET_CODE (reg1) == SUBREG)
9022 reg1 = SUBREG_REG (reg1);
9023 if (GET_CODE (reg1) != REG)
9024 return 0;
9025 const int regno1 = REGNO (reg1);
9026
9027 if (GET_CODE (reg2) == SUBREG)
9028 reg2 = SUBREG_REG (reg2);
9029 if (GET_CODE (reg2) != REG)
9030 return 0;
9031 const int regno2 = REGNO (reg2);
9032
9033 if (SPARC_INT_REG_P (regno1) && SPARC_INT_REG_P (regno2))
9034 return 1;
9035
9036 if (TARGET_VIS3)
9037 {
9038 if ((SPARC_INT_REG_P (regno1) && SPARC_FP_REG_P (regno2))
9039 || (SPARC_FP_REG_P (regno1) && SPARC_INT_REG_P (regno2)))
9040 return 1;
9041 }
9042
9043 return 0;
9044 }
9045
9046 /* Split a REG <--> REG move into a pair of moves in MODE. */
9047
9048 void
sparc_split_reg_reg(rtx dest,rtx src,machine_mode mode)9049 sparc_split_reg_reg (rtx dest, rtx src, machine_mode mode)
9050 {
9051 rtx dest1 = gen_highpart (mode, dest);
9052 rtx dest2 = gen_lowpart (mode, dest);
9053 rtx src1 = gen_highpart (mode, src);
9054 rtx src2 = gen_lowpart (mode, src);
9055
9056 /* Now emit using the real source and destination we found, swapping
9057 the order if we detect overlap. */
9058 if (reg_overlap_mentioned_p (dest1, src2))
9059 {
9060 emit_move_insn_1 (dest2, src2);
9061 emit_move_insn_1 (dest1, src1);
9062 }
9063 else
9064 {
9065 emit_move_insn_1 (dest1, src1);
9066 emit_move_insn_1 (dest2, src2);
9067 }
9068 }
9069
9070 /* Return 1 if REGNO (reg1) is even and REGNO (reg1) == REGNO (reg2) - 1.
9071 This makes them candidates for using ldd and std insns.
9072
9073 Note reg1 and reg2 *must* be hard registers. */
9074
9075 int
registers_ok_for_ldd_peep(rtx reg1,rtx reg2)9076 registers_ok_for_ldd_peep (rtx reg1, rtx reg2)
9077 {
9078 /* We might have been passed a SUBREG. */
9079 if (GET_CODE (reg1) != REG || GET_CODE (reg2) != REG)
9080 return 0;
9081
9082 if (REGNO (reg1) % 2 != 0)
9083 return 0;
9084
9085 /* Integer ldd is deprecated in SPARC V9 */
9086 if (TARGET_V9 && SPARC_INT_REG_P (REGNO (reg1)))
9087 return 0;
9088
9089 return (REGNO (reg1) == REGNO (reg2) - 1);
9090 }
9091
9092 /* Return 1 if the addresses in mem1 and mem2 are suitable for use in
9093 an ldd or std insn.
9094
9095 This can only happen when addr1 and addr2, the addresses in mem1
9096 and mem2, are consecutive memory locations (addr1 + 4 == addr2).
9097 addr1 must also be aligned on a 64-bit boundary.
9098
9099 Also iff dependent_reg_rtx is not null it should not be used to
9100 compute the address for mem1, i.e. we cannot optimize a sequence
9101 like:
9102 ld [%o0], %o0
9103 ld [%o0 + 4], %o1
9104 to
9105 ldd [%o0], %o0
9106 nor:
9107 ld [%g3 + 4], %g3
9108 ld [%g3], %g2
9109 to
9110 ldd [%g3], %g2
9111
9112 But, note that the transformation from:
9113 ld [%g2 + 4], %g3
9114 ld [%g2], %g2
9115 to
9116 ldd [%g2], %g2
9117 is perfectly fine. Thus, the peephole2 patterns always pass us
9118 the destination register of the first load, never the second one.
9119
9120 For stores we don't have a similar problem, so dependent_reg_rtx is
9121 NULL_RTX. */
9122
9123 int
mems_ok_for_ldd_peep(rtx mem1,rtx mem2,rtx dependent_reg_rtx)9124 mems_ok_for_ldd_peep (rtx mem1, rtx mem2, rtx dependent_reg_rtx)
9125 {
9126 rtx addr1, addr2;
9127 unsigned int reg1;
9128 HOST_WIDE_INT offset1;
9129
9130 /* The mems cannot be volatile. */
9131 if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2))
9132 return 0;
9133
9134 /* MEM1 should be aligned on a 64-bit boundary. */
9135 if (MEM_ALIGN (mem1) < 64)
9136 return 0;
9137
9138 addr1 = XEXP (mem1, 0);
9139 addr2 = XEXP (mem2, 0);
9140
9141 /* Extract a register number and offset (if used) from the first addr. */
9142 if (GET_CODE (addr1) == PLUS)
9143 {
9144 /* If not a REG, return zero. */
9145 if (GET_CODE (XEXP (addr1, 0)) != REG)
9146 return 0;
9147 else
9148 {
9149 reg1 = REGNO (XEXP (addr1, 0));
9150 /* The offset must be constant! */
9151 if (GET_CODE (XEXP (addr1, 1)) != CONST_INT)
9152 return 0;
9153 offset1 = INTVAL (XEXP (addr1, 1));
9154 }
9155 }
9156 else if (GET_CODE (addr1) != REG)
9157 return 0;
9158 else
9159 {
9160 reg1 = REGNO (addr1);
9161 /* This was a simple (mem (reg)) expression. Offset is 0. */
9162 offset1 = 0;
9163 }
9164
9165 /* Make sure the second address is a (mem (plus (reg) (const_int). */
9166 if (GET_CODE (addr2) != PLUS)
9167 return 0;
9168
9169 if (GET_CODE (XEXP (addr2, 0)) != REG
9170 || GET_CODE (XEXP (addr2, 1)) != CONST_INT)
9171 return 0;
9172
9173 if (reg1 != REGNO (XEXP (addr2, 0)))
9174 return 0;
9175
9176 if (dependent_reg_rtx != NULL_RTX && reg1 == REGNO (dependent_reg_rtx))
9177 return 0;
9178
9179 /* The first offset must be evenly divisible by 8 to ensure the
9180 address is 64-bit aligned. */
9181 if (offset1 % 8 != 0)
9182 return 0;
9183
9184 /* The offset for the second addr must be 4 more than the first addr. */
9185 if (INTVAL (XEXP (addr2, 1)) != offset1 + 4)
9186 return 0;
9187
9188 /* All the tests passed. addr1 and addr2 are valid for ldd and std
9189 instructions. */
9190 return 1;
9191 }
9192
9193 /* Return the widened memory access made of MEM1 and MEM2 in MODE. */
9194
9195 rtx
widen_mem_for_ldd_peep(rtx mem1,rtx mem2,machine_mode mode)9196 widen_mem_for_ldd_peep (rtx mem1, rtx mem2, machine_mode mode)
9197 {
9198 rtx x = widen_memory_access (mem1, mode, 0);
9199 MEM_NOTRAP_P (x) = MEM_NOTRAP_P (mem1) && MEM_NOTRAP_P (mem2);
9200 return x;
9201 }
9202
9203 /* Return 1 if reg is a pseudo, or is the first register in
9204 a hard register pair. This makes it suitable for use in
9205 ldd and std insns. */
9206
9207 int
register_ok_for_ldd(rtx reg)9208 register_ok_for_ldd (rtx reg)
9209 {
9210 /* We might have been passed a SUBREG. */
9211 if (!REG_P (reg))
9212 return 0;
9213
9214 if (REGNO (reg) < FIRST_PSEUDO_REGISTER)
9215 return (REGNO (reg) % 2 == 0);
9216
9217 return 1;
9218 }
9219
9220 /* Return 1 if OP, a MEM, has an address which is known to be
9221 aligned to an 8-byte boundary. */
9222
9223 int
memory_ok_for_ldd(rtx op)9224 memory_ok_for_ldd (rtx op)
9225 {
9226 if (!mem_min_alignment (op, 8))
9227 return 0;
9228
9229 /* We need to perform the job of a memory constraint. */
9230 if ((reload_in_progress || reload_completed)
9231 && !strict_memory_address_p (Pmode, XEXP (op, 0)))
9232 return 0;
9233
9234 if (lra_in_progress && !memory_address_p (Pmode, XEXP (op, 0)))
9235 return 0;
9236
9237 return 1;
9238 }
9239
9240 /* Implement TARGET_PRINT_OPERAND_PUNCT_VALID_P. */
9241
9242 static bool
sparc_print_operand_punct_valid_p(unsigned char code)9243 sparc_print_operand_punct_valid_p (unsigned char code)
9244 {
9245 if (code == '#'
9246 || code == '*'
9247 || code == '('
9248 || code == ')'
9249 || code == '_'
9250 || code == '&')
9251 return true;
9252
9253 return false;
9254 }
9255
9256 /* Implement TARGET_PRINT_OPERAND.
9257 Print operand X (an rtx) in assembler syntax to file FILE.
9258 CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
9259 For `%' followed by punctuation, CODE is the punctuation and X is null. */
9260
9261 static void
sparc_print_operand(FILE * file,rtx x,int code)9262 sparc_print_operand (FILE *file, rtx x, int code)
9263 {
9264 const char *s;
9265
9266 switch (code)
9267 {
9268 case '#':
9269 /* Output an insn in a delay slot. */
9270 if (final_sequence)
9271 sparc_indent_opcode = 1;
9272 else
9273 fputs ("\n\t nop", file);
9274 return;
9275 case '*':
9276 /* Output an annul flag if there's nothing for the delay slot and we
9277 are optimizing. This is always used with '(' below.
9278 Sun OS 4.1.1 dbx can't handle an annulled unconditional branch;
9279 this is a dbx bug. So, we only do this when optimizing.
9280 On UltraSPARC, a branch in a delay slot causes a pipeline flush.
9281 Always emit a nop in case the next instruction is a branch. */
9282 if (! final_sequence && (optimize && (int)sparc_cpu < PROCESSOR_V9))
9283 fputs (",a", file);
9284 return;
9285 case '(':
9286 /* Output a 'nop' if there's nothing for the delay slot and we are
9287 not optimizing. This is always used with '*' above. */
9288 if (! final_sequence && ! (optimize && (int)sparc_cpu < PROCESSOR_V9))
9289 fputs ("\n\t nop", file);
9290 else if (final_sequence)
9291 sparc_indent_opcode = 1;
9292 return;
9293 case ')':
9294 /* Output the right displacement from the saved PC on function return.
9295 The caller may have placed an "unimp" insn immediately after the call
9296 so we have to account for it. This insn is used in the 32-bit ABI
9297 when calling a function that returns a non zero-sized structure. The
9298 64-bit ABI doesn't have it. Be careful to have this test be the same
9299 as that for the call. The exception is when sparc_std_struct_return
9300 is enabled, the psABI is followed exactly and the adjustment is made
9301 by the code in sparc_struct_value_rtx. The call emitted is the same
9302 when sparc_std_struct_return is enabled. */
9303 if (!TARGET_ARCH64
9304 && cfun->returns_struct
9305 && !sparc_std_struct_return
9306 && DECL_SIZE (DECL_RESULT (current_function_decl))
9307 && TREE_CODE (DECL_SIZE (DECL_RESULT (current_function_decl)))
9308 == INTEGER_CST
9309 && !integer_zerop (DECL_SIZE (DECL_RESULT (current_function_decl))))
9310 fputs ("12", file);
9311 else
9312 fputc ('8', file);
9313 return;
9314 case '_':
9315 /* Output the Embedded Medium/Anywhere code model base register. */
9316 fputs (EMBMEDANY_BASE_REG, file);
9317 return;
9318 case '&':
9319 /* Print some local dynamic TLS name. */
9320 if (const char *name = get_some_local_dynamic_name ())
9321 assemble_name (file, name);
9322 else
9323 output_operand_lossage ("'%%&' used without any "
9324 "local dynamic TLS references");
9325 return;
9326
9327 case 'Y':
9328 /* Adjust the operand to take into account a RESTORE operation. */
9329 if (GET_CODE (x) == CONST_INT)
9330 break;
9331 else if (GET_CODE (x) != REG)
9332 output_operand_lossage ("invalid %%Y operand");
9333 else if (REGNO (x) < 8)
9334 fputs (reg_names[REGNO (x)], file);
9335 else if (REGNO (x) >= 24 && REGNO (x) < 32)
9336 fputs (reg_names[REGNO (x)-16], file);
9337 else
9338 output_operand_lossage ("invalid %%Y operand");
9339 return;
9340 case 'L':
9341 /* Print out the low order register name of a register pair. */
9342 if (WORDS_BIG_ENDIAN)
9343 fputs (reg_names[REGNO (x)+1], file);
9344 else
9345 fputs (reg_names[REGNO (x)], file);
9346 return;
9347 case 'H':
9348 /* Print out the high order register name of a register pair. */
9349 if (WORDS_BIG_ENDIAN)
9350 fputs (reg_names[REGNO (x)], file);
9351 else
9352 fputs (reg_names[REGNO (x)+1], file);
9353 return;
9354 case 'R':
9355 /* Print out the second register name of a register pair or quad.
9356 I.e., R (%o0) => %o1. */
9357 fputs (reg_names[REGNO (x)+1], file);
9358 return;
9359 case 'S':
9360 /* Print out the third register name of a register quad.
9361 I.e., S (%o0) => %o2. */
9362 fputs (reg_names[REGNO (x)+2], file);
9363 return;
9364 case 'T':
9365 /* Print out the fourth register name of a register quad.
9366 I.e., T (%o0) => %o3. */
9367 fputs (reg_names[REGNO (x)+3], file);
9368 return;
9369 case 'x':
9370 /* Print a condition code register. */
9371 if (REGNO (x) == SPARC_ICC_REG)
9372 {
9373 switch (GET_MODE (x))
9374 {
9375 case E_CCmode:
9376 case E_CCNZmode:
9377 case E_CCCmode:
9378 case E_CCVmode:
9379 s = "%icc";
9380 break;
9381 case E_CCXmode:
9382 case E_CCXNZmode:
9383 case E_CCXCmode:
9384 case E_CCXVmode:
9385 s = "%xcc";
9386 break;
9387 default:
9388 gcc_unreachable ();
9389 }
9390 fputs (s, file);
9391 }
9392 else
9393 /* %fccN register */
9394 fputs (reg_names[REGNO (x)], file);
9395 return;
9396 case 'm':
9397 /* Print the operand's address only. */
9398 output_address (GET_MODE (x), XEXP (x, 0));
9399 return;
9400 case 'r':
9401 /* In this case we need a register. Use %g0 if the
9402 operand is const0_rtx. */
9403 if (x == const0_rtx
9404 || (GET_MODE (x) != VOIDmode && x == CONST0_RTX (GET_MODE (x))))
9405 {
9406 fputs ("%g0", file);
9407 return;
9408 }
9409 else
9410 break;
9411
9412 case 'A':
9413 switch (GET_CODE (x))
9414 {
9415 case IOR:
9416 s = "or";
9417 break;
9418 case AND:
9419 s = "and";
9420 break;
9421 case XOR:
9422 s = "xor";
9423 break;
9424 default:
9425 output_operand_lossage ("invalid %%A operand");
9426 s = "";
9427 break;
9428 }
9429 fputs (s, file);
9430 return;
9431
9432 case 'B':
9433 switch (GET_CODE (x))
9434 {
9435 case IOR:
9436 s = "orn";
9437 break;
9438 case AND:
9439 s = "andn";
9440 break;
9441 case XOR:
9442 s = "xnor";
9443 break;
9444 default:
9445 output_operand_lossage ("invalid %%B operand");
9446 s = "";
9447 break;
9448 }
9449 fputs (s, file);
9450 return;
9451
9452 /* This is used by the conditional move instructions. */
9453 case 'C':
9454 {
9455 machine_mode mode = GET_MODE (XEXP (x, 0));
9456 switch (GET_CODE (x))
9457 {
9458 case NE:
9459 if (mode == CCVmode || mode == CCXVmode)
9460 s = "vs";
9461 else
9462 s = "ne";
9463 break;
9464 case EQ:
9465 if (mode == CCVmode || mode == CCXVmode)
9466 s = "vc";
9467 else
9468 s = "e";
9469 break;
9470 case GE:
9471 if (mode == CCNZmode || mode == CCXNZmode)
9472 s = "pos";
9473 else
9474 s = "ge";
9475 break;
9476 case GT:
9477 s = "g";
9478 break;
9479 case LE:
9480 s = "le";
9481 break;
9482 case LT:
9483 if (mode == CCNZmode || mode == CCXNZmode)
9484 s = "neg";
9485 else
9486 s = "l";
9487 break;
9488 case GEU:
9489 s = "geu";
9490 break;
9491 case GTU:
9492 s = "gu";
9493 break;
9494 case LEU:
9495 s = "leu";
9496 break;
9497 case LTU:
9498 s = "lu";
9499 break;
9500 case LTGT:
9501 s = "lg";
9502 break;
9503 case UNORDERED:
9504 s = "u";
9505 break;
9506 case ORDERED:
9507 s = "o";
9508 break;
9509 case UNLT:
9510 s = "ul";
9511 break;
9512 case UNLE:
9513 s = "ule";
9514 break;
9515 case UNGT:
9516 s = "ug";
9517 break;
9518 case UNGE:
9519 s = "uge"
9520 ; break;
9521 case UNEQ:
9522 s = "ue";
9523 break;
9524 default:
9525 output_operand_lossage ("invalid %%C operand");
9526 s = "";
9527 break;
9528 }
9529 fputs (s, file);
9530 return;
9531 }
9532
9533 /* This are used by the movr instruction pattern. */
9534 case 'D':
9535 {
9536 switch (GET_CODE (x))
9537 {
9538 case NE:
9539 s = "ne";
9540 break;
9541 case EQ:
9542 s = "e";
9543 break;
9544 case GE:
9545 s = "gez";
9546 break;
9547 case LT:
9548 s = "lz";
9549 break;
9550 case LE:
9551 s = "lez";
9552 break;
9553 case GT:
9554 s = "gz";
9555 break;
9556 default:
9557 output_operand_lossage ("invalid %%D operand");
9558 s = "";
9559 break;
9560 }
9561 fputs (s, file);
9562 return;
9563 }
9564
9565 case 'b':
9566 {
9567 /* Print a sign-extended character. */
9568 int i = trunc_int_for_mode (INTVAL (x), QImode);
9569 fprintf (file, "%d", i);
9570 return;
9571 }
9572
9573 case 'f':
9574 /* Operand must be a MEM; write its address. */
9575 if (GET_CODE (x) != MEM)
9576 output_operand_lossage ("invalid %%f operand");
9577 output_address (GET_MODE (x), XEXP (x, 0));
9578 return;
9579
9580 case 's':
9581 {
9582 /* Print a sign-extended 32-bit value. */
9583 HOST_WIDE_INT i;
9584 if (GET_CODE(x) == CONST_INT)
9585 i = INTVAL (x);
9586 else
9587 {
9588 output_operand_lossage ("invalid %%s operand");
9589 return;
9590 }
9591 i = trunc_int_for_mode (i, SImode);
9592 fprintf (file, HOST_WIDE_INT_PRINT_DEC, i);
9593 return;
9594 }
9595
9596 case 0:
9597 /* Do nothing special. */
9598 break;
9599
9600 default:
9601 /* Undocumented flag. */
9602 output_operand_lossage ("invalid operand output code");
9603 }
9604
9605 if (GET_CODE (x) == REG)
9606 fputs (reg_names[REGNO (x)], file);
9607 else if (GET_CODE (x) == MEM)
9608 {
9609 fputc ('[', file);
9610 /* Poor Sun assembler doesn't understand absolute addressing. */
9611 if (CONSTANT_P (XEXP (x, 0)))
9612 fputs ("%g0+", file);
9613 output_address (GET_MODE (x), XEXP (x, 0));
9614 fputc (']', file);
9615 }
9616 else if (GET_CODE (x) == HIGH)
9617 {
9618 fputs ("%hi(", file);
9619 output_addr_const (file, XEXP (x, 0));
9620 fputc (')', file);
9621 }
9622 else if (GET_CODE (x) == LO_SUM)
9623 {
9624 sparc_print_operand (file, XEXP (x, 0), 0);
9625 if (TARGET_CM_MEDMID)
9626 fputs ("+%l44(", file);
9627 else
9628 fputs ("+%lo(", file);
9629 output_addr_const (file, XEXP (x, 1));
9630 fputc (')', file);
9631 }
9632 else if (GET_CODE (x) == CONST_DOUBLE)
9633 output_operand_lossage ("floating-point constant not a valid immediate operand");
9634 else
9635 output_addr_const (file, x);
9636 }
9637
9638 /* Implement TARGET_PRINT_OPERAND_ADDRESS. */
9639
9640 static void
sparc_print_operand_address(FILE * file,machine_mode,rtx x)9641 sparc_print_operand_address (FILE *file, machine_mode /*mode*/, rtx x)
9642 {
9643 rtx base, index = 0;
9644 int offset = 0;
9645 rtx addr = x;
9646
9647 if (REG_P (addr))
9648 fputs (reg_names[REGNO (addr)], file);
9649 else if (GET_CODE (addr) == PLUS)
9650 {
9651 if (CONST_INT_P (XEXP (addr, 0)))
9652 offset = INTVAL (XEXP (addr, 0)), base = XEXP (addr, 1);
9653 else if (CONST_INT_P (XEXP (addr, 1)))
9654 offset = INTVAL (XEXP (addr, 1)), base = XEXP (addr, 0);
9655 else
9656 base = XEXP (addr, 0), index = XEXP (addr, 1);
9657 if (GET_CODE (base) == LO_SUM)
9658 {
9659 gcc_assert (USE_AS_OFFSETABLE_LO10
9660 && TARGET_ARCH64
9661 && ! TARGET_CM_MEDMID);
9662 output_operand (XEXP (base, 0), 0);
9663 fputs ("+%lo(", file);
9664 output_address (VOIDmode, XEXP (base, 1));
9665 fprintf (file, ")+%d", offset);
9666 }
9667 else
9668 {
9669 fputs (reg_names[REGNO (base)], file);
9670 if (index == 0)
9671 fprintf (file, "%+d", offset);
9672 else if (REG_P (index))
9673 fprintf (file, "+%s", reg_names[REGNO (index)]);
9674 else if (GET_CODE (index) == SYMBOL_REF
9675 || GET_CODE (index) == LABEL_REF
9676 || GET_CODE (index) == CONST)
9677 fputc ('+', file), output_addr_const (file, index);
9678 else gcc_unreachable ();
9679 }
9680 }
9681 else if (GET_CODE (addr) == MINUS
9682 && GET_CODE (XEXP (addr, 1)) == LABEL_REF)
9683 {
9684 output_addr_const (file, XEXP (addr, 0));
9685 fputs ("-(", file);
9686 output_addr_const (file, XEXP (addr, 1));
9687 fputs ("-.)", file);
9688 }
9689 else if (GET_CODE (addr) == LO_SUM)
9690 {
9691 output_operand (XEXP (addr, 0), 0);
9692 if (TARGET_CM_MEDMID)
9693 fputs ("+%l44(", file);
9694 else
9695 fputs ("+%lo(", file);
9696 output_address (VOIDmode, XEXP (addr, 1));
9697 fputc (')', file);
9698 }
9699 else if (flag_pic
9700 && GET_CODE (addr) == CONST
9701 && GET_CODE (XEXP (addr, 0)) == MINUS
9702 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST
9703 && GET_CODE (XEXP (XEXP (XEXP (addr, 0), 1), 0)) == MINUS
9704 && XEXP (XEXP (XEXP (XEXP (addr, 0), 1), 0), 1) == pc_rtx)
9705 {
9706 addr = XEXP (addr, 0);
9707 output_addr_const (file, XEXP (addr, 0));
9708 /* Group the args of the second CONST in parenthesis. */
9709 fputs ("-(", file);
9710 /* Skip past the second CONST--it does nothing for us. */
9711 output_addr_const (file, XEXP (XEXP (addr, 1), 0));
9712 /* Close the parenthesis. */
9713 fputc (')', file);
9714 }
9715 else
9716 {
9717 output_addr_const (file, addr);
9718 }
9719 }
9720
9721 /* Target hook for assembling integer objects. The sparc version has
9722 special handling for aligned DI-mode objects. */
9723
9724 static bool
sparc_assemble_integer(rtx x,unsigned int size,int aligned_p)9725 sparc_assemble_integer (rtx x, unsigned int size, int aligned_p)
9726 {
9727 /* ??? We only output .xword's for symbols and only then in environments
9728 where the assembler can handle them. */
9729 if (aligned_p && size == 8 && GET_CODE (x) != CONST_INT)
9730 {
9731 if (TARGET_V9)
9732 {
9733 assemble_integer_with_op ("\t.xword\t", x);
9734 return true;
9735 }
9736 else
9737 {
9738 assemble_aligned_integer (4, const0_rtx);
9739 assemble_aligned_integer (4, x);
9740 return true;
9741 }
9742 }
9743 return default_assemble_integer (x, size, aligned_p);
9744 }
9745
9746 /* Return the value of a code used in the .proc pseudo-op that says
9747 what kind of result this function returns. For non-C types, we pick
9748 the closest C type. */
9749
9750 #ifndef SHORT_TYPE_SIZE
9751 #define SHORT_TYPE_SIZE (BITS_PER_UNIT * 2)
9752 #endif
9753
9754 #ifndef INT_TYPE_SIZE
9755 #define INT_TYPE_SIZE BITS_PER_WORD
9756 #endif
9757
9758 #ifndef LONG_TYPE_SIZE
9759 #define LONG_TYPE_SIZE BITS_PER_WORD
9760 #endif
9761
9762 #ifndef LONG_LONG_TYPE_SIZE
9763 #define LONG_LONG_TYPE_SIZE (BITS_PER_WORD * 2)
9764 #endif
9765
9766 #ifndef FLOAT_TYPE_SIZE
9767 #define FLOAT_TYPE_SIZE BITS_PER_WORD
9768 #endif
9769
9770 #ifndef DOUBLE_TYPE_SIZE
9771 #define DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2)
9772 #endif
9773
9774 #ifndef LONG_DOUBLE_TYPE_SIZE
9775 #define LONG_DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2)
9776 #endif
9777
9778 unsigned long
sparc_type_code(tree type)9779 sparc_type_code (tree type)
9780 {
9781 unsigned long qualifiers = 0;
9782 unsigned shift;
9783
9784 /* Only the first 30 bits of the qualifier are valid. We must refrain from
9785 setting more, since some assemblers will give an error for this. Also,
9786 we must be careful to avoid shifts of 32 bits or more to avoid getting
9787 unpredictable results. */
9788
9789 for (shift = 6; shift < 30; shift += 2, type = TREE_TYPE (type))
9790 {
9791 switch (TREE_CODE (type))
9792 {
9793 case ERROR_MARK:
9794 return qualifiers;
9795
9796 case ARRAY_TYPE:
9797 qualifiers |= (3 << shift);
9798 break;
9799
9800 case FUNCTION_TYPE:
9801 case METHOD_TYPE:
9802 qualifiers |= (2 << shift);
9803 break;
9804
9805 case POINTER_TYPE:
9806 case REFERENCE_TYPE:
9807 case OFFSET_TYPE:
9808 qualifiers |= (1 << shift);
9809 break;
9810
9811 case RECORD_TYPE:
9812 return (qualifiers | 8);
9813
9814 case UNION_TYPE:
9815 case QUAL_UNION_TYPE:
9816 return (qualifiers | 9);
9817
9818 case ENUMERAL_TYPE:
9819 return (qualifiers | 10);
9820
9821 case VOID_TYPE:
9822 return (qualifiers | 16);
9823
9824 case INTEGER_TYPE:
9825 /* If this is a range type, consider it to be the underlying
9826 type. */
9827 if (TREE_TYPE (type) != 0)
9828 break;
9829
9830 /* Carefully distinguish all the standard types of C,
9831 without messing up if the language is not C. We do this by
9832 testing TYPE_PRECISION and TYPE_UNSIGNED. The old code used to
9833 look at both the names and the above fields, but that's redundant.
9834 Any type whose size is between two C types will be considered
9835 to be the wider of the two types. Also, we do not have a
9836 special code to use for "long long", so anything wider than
9837 long is treated the same. Note that we can't distinguish
9838 between "int" and "long" in this code if they are the same
9839 size, but that's fine, since neither can the assembler. */
9840
9841 if (TYPE_PRECISION (type) <= CHAR_TYPE_SIZE)
9842 return (qualifiers | (TYPE_UNSIGNED (type) ? 12 : 2));
9843
9844 else if (TYPE_PRECISION (type) <= SHORT_TYPE_SIZE)
9845 return (qualifiers | (TYPE_UNSIGNED (type) ? 13 : 3));
9846
9847 else if (TYPE_PRECISION (type) <= INT_TYPE_SIZE)
9848 return (qualifiers | (TYPE_UNSIGNED (type) ? 14 : 4));
9849
9850 else
9851 return (qualifiers | (TYPE_UNSIGNED (type) ? 15 : 5));
9852
9853 case REAL_TYPE:
9854 /* If this is a range type, consider it to be the underlying
9855 type. */
9856 if (TREE_TYPE (type) != 0)
9857 break;
9858
9859 /* Carefully distinguish all the standard types of C,
9860 without messing up if the language is not C. */
9861
9862 if (TYPE_PRECISION (type) == FLOAT_TYPE_SIZE)
9863 return (qualifiers | 6);
9864
9865 else
9866 return (qualifiers | 7);
9867
9868 case COMPLEX_TYPE: /* GNU Fortran COMPLEX type. */
9869 /* ??? We need to distinguish between double and float complex types,
9870 but I don't know how yet because I can't reach this code from
9871 existing front-ends. */
9872 return (qualifiers | 7); /* Who knows? */
9873
9874 case VECTOR_TYPE:
9875 case BOOLEAN_TYPE: /* Boolean truth value type. */
9876 case LANG_TYPE:
9877 case NULLPTR_TYPE:
9878 return qualifiers;
9879
9880 default:
9881 gcc_unreachable (); /* Not a type! */
9882 }
9883 }
9884
9885 return qualifiers;
9886 }
9887
9888 /* Nested function support. */
9889
9890 /* Emit RTL insns to initialize the variable parts of a trampoline.
9891 FNADDR is an RTX for the address of the function's pure code.
9892 CXT is an RTX for the static chain value for the function.
9893
9894 This takes 16 insns: 2 shifts & 2 ands (to split up addresses), 4 sethi
9895 (to load in opcodes), 4 iors (to merge address and opcodes), and 4 writes
9896 (to store insns). This is a bit excessive. Perhaps a different
9897 mechanism would be better here.
9898
9899 Emit enough FLUSH insns to synchronize the data and instruction caches. */
9900
9901 static void
sparc32_initialize_trampoline(rtx m_tramp,rtx fnaddr,rtx cxt)9902 sparc32_initialize_trampoline (rtx m_tramp, rtx fnaddr, rtx cxt)
9903 {
9904 /* SPARC 32-bit trampoline:
9905
9906 sethi %hi(fn), %g1
9907 sethi %hi(static), %g2
9908 jmp %g1+%lo(fn)
9909 or %g2, %lo(static), %g2
9910
9911 SETHI i,r = 00rr rrr1 00ii iiii iiii iiii iiii iiii
9912 JMPL r+i,d = 10dd ddd1 1100 0rrr rr1i iiii iiii iiii
9913 */
9914
9915 emit_move_insn
9916 (adjust_address (m_tramp, SImode, 0),
9917 expand_binop (SImode, ior_optab,
9918 expand_shift (RSHIFT_EXPR, SImode, fnaddr, 10, 0, 1),
9919 GEN_INT (trunc_int_for_mode (0x03000000, SImode)),
9920 NULL_RTX, 1, OPTAB_DIRECT));
9921
9922 emit_move_insn
9923 (adjust_address (m_tramp, SImode, 4),
9924 expand_binop (SImode, ior_optab,
9925 expand_shift (RSHIFT_EXPR, SImode, cxt, 10, 0, 1),
9926 GEN_INT (trunc_int_for_mode (0x05000000, SImode)),
9927 NULL_RTX, 1, OPTAB_DIRECT));
9928
9929 emit_move_insn
9930 (adjust_address (m_tramp, SImode, 8),
9931 expand_binop (SImode, ior_optab,
9932 expand_and (SImode, fnaddr, GEN_INT (0x3ff), NULL_RTX),
9933 GEN_INT (trunc_int_for_mode (0x81c06000, SImode)),
9934 NULL_RTX, 1, OPTAB_DIRECT));
9935
9936 emit_move_insn
9937 (adjust_address (m_tramp, SImode, 12),
9938 expand_binop (SImode, ior_optab,
9939 expand_and (SImode, cxt, GEN_INT (0x3ff), NULL_RTX),
9940 GEN_INT (trunc_int_for_mode (0x8410a000, SImode)),
9941 NULL_RTX, 1, OPTAB_DIRECT));
9942
9943 /* On UltraSPARC a flush flushes an entire cache line. The trampoline is
9944 aligned on a 16 byte boundary so one flush clears it all. */
9945 emit_insn (gen_flushsi (validize_mem (adjust_address (m_tramp, SImode, 0))));
9946 if (sparc_cpu != PROCESSOR_ULTRASPARC
9947 && sparc_cpu != PROCESSOR_ULTRASPARC3
9948 && sparc_cpu != PROCESSOR_NIAGARA
9949 && sparc_cpu != PROCESSOR_NIAGARA2
9950 && sparc_cpu != PROCESSOR_NIAGARA3
9951 && sparc_cpu != PROCESSOR_NIAGARA4
9952 && sparc_cpu != PROCESSOR_NIAGARA7
9953 && sparc_cpu != PROCESSOR_M8)
9954 emit_insn (gen_flushsi (validize_mem (adjust_address (m_tramp, SImode, 8))));
9955
9956 /* Call __enable_execute_stack after writing onto the stack to make sure
9957 the stack address is accessible. */
9958 #ifdef HAVE_ENABLE_EXECUTE_STACK
9959 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
9960 LCT_NORMAL, VOIDmode, XEXP (m_tramp, 0), Pmode);
9961 #endif
9962
9963 }
9964
9965 /* The 64-bit version is simpler because it makes more sense to load the
9966 values as "immediate" data out of the trampoline. It's also easier since
9967 we can read the PC without clobbering a register. */
9968
9969 static void
sparc64_initialize_trampoline(rtx m_tramp,rtx fnaddr,rtx cxt)9970 sparc64_initialize_trampoline (rtx m_tramp, rtx fnaddr, rtx cxt)
9971 {
9972 /* SPARC 64-bit trampoline:
9973
9974 rd %pc, %g1
9975 ldx [%g1+24], %g5
9976 jmp %g5
9977 ldx [%g1+16], %g5
9978 +16 bytes data
9979 */
9980
9981 emit_move_insn (adjust_address (m_tramp, SImode, 0),
9982 GEN_INT (trunc_int_for_mode (0x83414000, SImode)));
9983 emit_move_insn (adjust_address (m_tramp, SImode, 4),
9984 GEN_INT (trunc_int_for_mode (0xca586018, SImode)));
9985 emit_move_insn (adjust_address (m_tramp, SImode, 8),
9986 GEN_INT (trunc_int_for_mode (0x81c14000, SImode)));
9987 emit_move_insn (adjust_address (m_tramp, SImode, 12),
9988 GEN_INT (trunc_int_for_mode (0xca586010, SImode)));
9989 emit_move_insn (adjust_address (m_tramp, DImode, 16), cxt);
9990 emit_move_insn (adjust_address (m_tramp, DImode, 24), fnaddr);
9991 emit_insn (gen_flushdi (validize_mem (adjust_address (m_tramp, DImode, 0))));
9992
9993 if (sparc_cpu != PROCESSOR_ULTRASPARC
9994 && sparc_cpu != PROCESSOR_ULTRASPARC3
9995 && sparc_cpu != PROCESSOR_NIAGARA
9996 && sparc_cpu != PROCESSOR_NIAGARA2
9997 && sparc_cpu != PROCESSOR_NIAGARA3
9998 && sparc_cpu != PROCESSOR_NIAGARA4
9999 && sparc_cpu != PROCESSOR_NIAGARA7
10000 && sparc_cpu != PROCESSOR_M8)
10001 emit_insn (gen_flushdi (validize_mem (adjust_address (m_tramp, DImode, 8))));
10002
10003 /* Call __enable_execute_stack after writing onto the stack to make sure
10004 the stack address is accessible. */
10005 #ifdef HAVE_ENABLE_EXECUTE_STACK
10006 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
10007 LCT_NORMAL, VOIDmode, XEXP (m_tramp, 0), Pmode);
10008 #endif
10009 }
10010
10011 /* Worker for TARGET_TRAMPOLINE_INIT. */
10012
10013 static void
sparc_trampoline_init(rtx m_tramp,tree fndecl,rtx cxt)10014 sparc_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
10015 {
10016 rtx fnaddr = force_reg (Pmode, XEXP (DECL_RTL (fndecl), 0));
10017 cxt = force_reg (Pmode, cxt);
10018 if (TARGET_ARCH64)
10019 sparc64_initialize_trampoline (m_tramp, fnaddr, cxt);
10020 else
10021 sparc32_initialize_trampoline (m_tramp, fnaddr, cxt);
10022 }
10023
10024 /* Adjust the cost of a scheduling dependency. Return the new cost of
10025 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
10026
10027 static int
supersparc_adjust_cost(rtx_insn * insn,int dep_type,rtx_insn * dep_insn,int cost)10028 supersparc_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn,
10029 int cost)
10030 {
10031 enum attr_type insn_type;
10032
10033 if (recog_memoized (insn) < 0)
10034 return cost;
10035
10036 insn_type = get_attr_type (insn);
10037
10038 if (dep_type == 0)
10039 {
10040 /* Data dependency; DEP_INSN writes a register that INSN reads some
10041 cycles later. */
10042
10043 /* if a load, then the dependence must be on the memory address;
10044 add an extra "cycle". Note that the cost could be two cycles
10045 if the reg was written late in an instruction group; we ca not tell
10046 here. */
10047 if (insn_type == TYPE_LOAD || insn_type == TYPE_FPLOAD)
10048 return cost + 3;
10049
10050 /* Get the delay only if the address of the store is the dependence. */
10051 if (insn_type == TYPE_STORE || insn_type == TYPE_FPSTORE)
10052 {
10053 rtx pat = PATTERN(insn);
10054 rtx dep_pat = PATTERN (dep_insn);
10055
10056 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
10057 return cost; /* This should not happen! */
10058
10059 /* The dependency between the two instructions was on the data that
10060 is being stored. Assume that this implies that the address of the
10061 store is not dependent. */
10062 if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
10063 return cost;
10064
10065 return cost + 3; /* An approximation. */
10066 }
10067
10068 /* A shift instruction cannot receive its data from an instruction
10069 in the same cycle; add a one cycle penalty. */
10070 if (insn_type == TYPE_SHIFT)
10071 return cost + 3; /* Split before cascade into shift. */
10072 }
10073 else
10074 {
10075 /* Anti- or output- dependency; DEP_INSN reads/writes a register that
10076 INSN writes some cycles later. */
10077
10078 /* These are only significant for the fpu unit; writing a fp reg before
10079 the fpu has finished with it stalls the processor. */
10080
10081 /* Reusing an integer register causes no problems. */
10082 if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT)
10083 return 0;
10084 }
10085
10086 return cost;
10087 }
10088
10089 static int
hypersparc_adjust_cost(rtx_insn * insn,int dtype,rtx_insn * dep_insn,int cost)10090 hypersparc_adjust_cost (rtx_insn *insn, int dtype, rtx_insn *dep_insn,
10091 int cost)
10092 {
10093 enum attr_type insn_type, dep_type;
10094 rtx pat = PATTERN(insn);
10095 rtx dep_pat = PATTERN (dep_insn);
10096
10097 if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
10098 return cost;
10099
10100 insn_type = get_attr_type (insn);
10101 dep_type = get_attr_type (dep_insn);
10102
10103 switch (dtype)
10104 {
10105 case 0:
10106 /* Data dependency; DEP_INSN writes a register that INSN reads some
10107 cycles later. */
10108
10109 switch (insn_type)
10110 {
10111 case TYPE_STORE:
10112 case TYPE_FPSTORE:
10113 /* Get the delay iff the address of the store is the dependence. */
10114 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
10115 return cost;
10116
10117 if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
10118 return cost;
10119 return cost + 3;
10120
10121 case TYPE_LOAD:
10122 case TYPE_SLOAD:
10123 case TYPE_FPLOAD:
10124 /* If a load, then the dependence must be on the memory address. If
10125 the addresses aren't equal, then it might be a false dependency */
10126 if (dep_type == TYPE_STORE || dep_type == TYPE_FPSTORE)
10127 {
10128 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET
10129 || GET_CODE (SET_DEST (dep_pat)) != MEM
10130 || GET_CODE (SET_SRC (pat)) != MEM
10131 || ! rtx_equal_p (XEXP (SET_DEST (dep_pat), 0),
10132 XEXP (SET_SRC (pat), 0)))
10133 return cost + 2;
10134
10135 return cost + 8;
10136 }
10137 break;
10138
10139 case TYPE_BRANCH:
10140 /* Compare to branch latency is 0. There is no benefit from
10141 separating compare and branch. */
10142 if (dep_type == TYPE_COMPARE)
10143 return 0;
10144 /* Floating point compare to branch latency is less than
10145 compare to conditional move. */
10146 if (dep_type == TYPE_FPCMP)
10147 return cost - 1;
10148 break;
10149 default:
10150 break;
10151 }
10152 break;
10153
10154 case REG_DEP_ANTI:
10155 /* Anti-dependencies only penalize the fpu unit. */
10156 if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT)
10157 return 0;
10158 break;
10159
10160 default:
10161 break;
10162 }
10163
10164 return cost;
10165 }
10166
10167 static int
sparc_adjust_cost(rtx_insn * insn,int dep_type,rtx_insn * dep,int cost,unsigned int)10168 sparc_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep, int cost,
10169 unsigned int)
10170 {
10171 switch (sparc_cpu)
10172 {
10173 case PROCESSOR_SUPERSPARC:
10174 cost = supersparc_adjust_cost (insn, dep_type, dep, cost);
10175 break;
10176 case PROCESSOR_HYPERSPARC:
10177 case PROCESSOR_SPARCLITE86X:
10178 cost = hypersparc_adjust_cost (insn, dep_type, dep, cost);
10179 break;
10180 default:
10181 break;
10182 }
10183 return cost;
10184 }
10185
10186 static void
sparc_sched_init(FILE * dump ATTRIBUTE_UNUSED,int sched_verbose ATTRIBUTE_UNUSED,int max_ready ATTRIBUTE_UNUSED)10187 sparc_sched_init (FILE *dump ATTRIBUTE_UNUSED,
10188 int sched_verbose ATTRIBUTE_UNUSED,
10189 int max_ready ATTRIBUTE_UNUSED)
10190 {}
10191
10192 static int
sparc_use_sched_lookahead(void)10193 sparc_use_sched_lookahead (void)
10194 {
10195 switch (sparc_cpu)
10196 {
10197 case PROCESSOR_ULTRASPARC:
10198 case PROCESSOR_ULTRASPARC3:
10199 return 4;
10200 case PROCESSOR_SUPERSPARC:
10201 case PROCESSOR_HYPERSPARC:
10202 case PROCESSOR_SPARCLITE86X:
10203 return 3;
10204 case PROCESSOR_NIAGARA4:
10205 case PROCESSOR_NIAGARA7:
10206 case PROCESSOR_M8:
10207 return 2;
10208 case PROCESSOR_NIAGARA:
10209 case PROCESSOR_NIAGARA2:
10210 case PROCESSOR_NIAGARA3:
10211 default:
10212 return 0;
10213 }
10214 }
10215
10216 static int
sparc_issue_rate(void)10217 sparc_issue_rate (void)
10218 {
10219 switch (sparc_cpu)
10220 {
10221 case PROCESSOR_ULTRASPARC:
10222 case PROCESSOR_ULTRASPARC3:
10223 case PROCESSOR_M8:
10224 return 4;
10225 case PROCESSOR_SUPERSPARC:
10226 return 3;
10227 case PROCESSOR_HYPERSPARC:
10228 case PROCESSOR_SPARCLITE86X:
10229 case PROCESSOR_V9:
10230 /* Assume V9 processors are capable of at least dual-issue. */
10231 case PROCESSOR_NIAGARA4:
10232 case PROCESSOR_NIAGARA7:
10233 return 2;
10234 case PROCESSOR_NIAGARA:
10235 case PROCESSOR_NIAGARA2:
10236 case PROCESSOR_NIAGARA3:
10237 default:
10238 return 1;
10239 }
10240 }
10241
10242 int
sparc_branch_cost(bool speed_p,bool predictable_p)10243 sparc_branch_cost (bool speed_p, bool predictable_p)
10244 {
10245 if (!speed_p)
10246 return 2;
10247
10248 /* For pre-V9 processors we use a single value (usually 3) to take into
10249 account the potential annulling of the delay slot (which ends up being
10250 a bubble in the pipeline slot) plus a cycle to take into consideration
10251 the instruction cache effects.
10252
10253 On V9 and later processors, which have branch prediction facilities,
10254 we take into account whether the branch is (easily) predictable. */
10255 const int cost = sparc_costs->branch_cost;
10256
10257 switch (sparc_cpu)
10258 {
10259 case PROCESSOR_V9:
10260 case PROCESSOR_ULTRASPARC:
10261 case PROCESSOR_ULTRASPARC3:
10262 case PROCESSOR_NIAGARA:
10263 case PROCESSOR_NIAGARA2:
10264 case PROCESSOR_NIAGARA3:
10265 case PROCESSOR_NIAGARA4:
10266 case PROCESSOR_NIAGARA7:
10267 case PROCESSOR_M8:
10268 return cost + (predictable_p ? 0 : 2);
10269
10270 default:
10271 return cost;
10272 }
10273 }
10274
10275 static int
set_extends(rtx_insn * insn)10276 set_extends (rtx_insn *insn)
10277 {
10278 rtx pat = PATTERN (insn);
10279
10280 switch (GET_CODE (SET_SRC (pat)))
10281 {
10282 /* Load and some shift instructions zero extend. */
10283 case MEM:
10284 case ZERO_EXTEND:
10285 /* sethi clears the high bits */
10286 case HIGH:
10287 /* LO_SUM is used with sethi. sethi cleared the high
10288 bits and the values used with lo_sum are positive */
10289 case LO_SUM:
10290 /* Store flag stores 0 or 1 */
10291 case LT: case LTU:
10292 case GT: case GTU:
10293 case LE: case LEU:
10294 case GE: case GEU:
10295 case EQ:
10296 case NE:
10297 return 1;
10298 case AND:
10299 {
10300 rtx op0 = XEXP (SET_SRC (pat), 0);
10301 rtx op1 = XEXP (SET_SRC (pat), 1);
10302 if (GET_CODE (op1) == CONST_INT)
10303 return INTVAL (op1) >= 0;
10304 if (GET_CODE (op0) != REG)
10305 return 0;
10306 if (sparc_check_64 (op0, insn) == 1)
10307 return 1;
10308 return (GET_CODE (op1) == REG && sparc_check_64 (op1, insn) == 1);
10309 }
10310 case IOR:
10311 case XOR:
10312 {
10313 rtx op0 = XEXP (SET_SRC (pat), 0);
10314 rtx op1 = XEXP (SET_SRC (pat), 1);
10315 if (GET_CODE (op0) != REG || sparc_check_64 (op0, insn) <= 0)
10316 return 0;
10317 if (GET_CODE (op1) == CONST_INT)
10318 return INTVAL (op1) >= 0;
10319 return (GET_CODE (op1) == REG && sparc_check_64 (op1, insn) == 1);
10320 }
10321 case LSHIFTRT:
10322 return GET_MODE (SET_SRC (pat)) == SImode;
10323 /* Positive integers leave the high bits zero. */
10324 case CONST_INT:
10325 return !(INTVAL (SET_SRC (pat)) & 0x80000000);
10326 case ASHIFTRT:
10327 case SIGN_EXTEND:
10328 return - (GET_MODE (SET_SRC (pat)) == SImode);
10329 case REG:
10330 return sparc_check_64 (SET_SRC (pat), insn);
10331 default:
10332 return 0;
10333 }
10334 }
10335
10336 /* We _ought_ to have only one kind per function, but... */
10337 static GTY(()) rtx sparc_addr_diff_list;
10338 static GTY(()) rtx sparc_addr_list;
10339
10340 void
sparc_defer_case_vector(rtx lab,rtx vec,int diff)10341 sparc_defer_case_vector (rtx lab, rtx vec, int diff)
10342 {
10343 vec = gen_rtx_EXPR_LIST (VOIDmode, lab, vec);
10344 if (diff)
10345 sparc_addr_diff_list
10346 = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_diff_list);
10347 else
10348 sparc_addr_list = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_list);
10349 }
10350
10351 static void
sparc_output_addr_vec(rtx vec)10352 sparc_output_addr_vec (rtx vec)
10353 {
10354 rtx lab = XEXP (vec, 0), body = XEXP (vec, 1);
10355 int idx, vlen = XVECLEN (body, 0);
10356
10357 #ifdef ASM_OUTPUT_ADDR_VEC_START
10358 ASM_OUTPUT_ADDR_VEC_START (asm_out_file);
10359 #endif
10360
10361 #ifdef ASM_OUTPUT_CASE_LABEL
10362 ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab),
10363 NEXT_INSN (lab));
10364 #else
10365 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
10366 #endif
10367
10368 for (idx = 0; idx < vlen; idx++)
10369 {
10370 ASM_OUTPUT_ADDR_VEC_ELT
10371 (asm_out_file, CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 0, idx), 0)));
10372 }
10373
10374 #ifdef ASM_OUTPUT_ADDR_VEC_END
10375 ASM_OUTPUT_ADDR_VEC_END (asm_out_file);
10376 #endif
10377 }
10378
10379 static void
sparc_output_addr_diff_vec(rtx vec)10380 sparc_output_addr_diff_vec (rtx vec)
10381 {
10382 rtx lab = XEXP (vec, 0), body = XEXP (vec, 1);
10383 rtx base = XEXP (XEXP (body, 0), 0);
10384 int idx, vlen = XVECLEN (body, 1);
10385
10386 #ifdef ASM_OUTPUT_ADDR_VEC_START
10387 ASM_OUTPUT_ADDR_VEC_START (asm_out_file);
10388 #endif
10389
10390 #ifdef ASM_OUTPUT_CASE_LABEL
10391 ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab),
10392 NEXT_INSN (lab));
10393 #else
10394 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
10395 #endif
10396
10397 for (idx = 0; idx < vlen; idx++)
10398 {
10399 ASM_OUTPUT_ADDR_DIFF_ELT
10400 (asm_out_file,
10401 body,
10402 CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 1, idx), 0)),
10403 CODE_LABEL_NUMBER (base));
10404 }
10405
10406 #ifdef ASM_OUTPUT_ADDR_VEC_END
10407 ASM_OUTPUT_ADDR_VEC_END (asm_out_file);
10408 #endif
10409 }
10410
10411 static void
sparc_output_deferred_case_vectors(void)10412 sparc_output_deferred_case_vectors (void)
10413 {
10414 rtx t;
10415 int align;
10416
10417 if (sparc_addr_list == NULL_RTX
10418 && sparc_addr_diff_list == NULL_RTX)
10419 return;
10420
10421 /* Align to cache line in the function's code section. */
10422 switch_to_section (current_function_section ());
10423
10424 align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT);
10425 if (align > 0)
10426 ASM_OUTPUT_ALIGN (asm_out_file, align);
10427
10428 for (t = sparc_addr_list; t ; t = XEXP (t, 1))
10429 sparc_output_addr_vec (XEXP (t, 0));
10430 for (t = sparc_addr_diff_list; t ; t = XEXP (t, 1))
10431 sparc_output_addr_diff_vec (XEXP (t, 0));
10432
10433 sparc_addr_list = sparc_addr_diff_list = NULL_RTX;
10434 }
10435
10436 /* Return 0 if the high 32 bits of X (the low word of X, if DImode) are
10437 unknown. Return 1 if the high bits are zero, -1 if the register is
10438 sign extended. */
10439 int
sparc_check_64(rtx x,rtx_insn * insn)10440 sparc_check_64 (rtx x, rtx_insn *insn)
10441 {
10442 /* If a register is set only once it is safe to ignore insns this
10443 code does not know how to handle. The loop will either recognize
10444 the single set and return the correct value or fail to recognize
10445 it and return 0. */
10446 int set_once = 0;
10447 rtx y = x;
10448
10449 gcc_assert (GET_CODE (x) == REG);
10450
10451 if (GET_MODE (x) == DImode)
10452 y = gen_rtx_REG (SImode, REGNO (x) + WORDS_BIG_ENDIAN);
10453
10454 if (flag_expensive_optimizations
10455 && df && DF_REG_DEF_COUNT (REGNO (y)) == 1)
10456 set_once = 1;
10457
10458 if (insn == 0)
10459 {
10460 if (set_once)
10461 insn = get_last_insn_anywhere ();
10462 else
10463 return 0;
10464 }
10465
10466 while ((insn = PREV_INSN (insn)))
10467 {
10468 switch (GET_CODE (insn))
10469 {
10470 case JUMP_INSN:
10471 case NOTE:
10472 break;
10473 case CODE_LABEL:
10474 case CALL_INSN:
10475 default:
10476 if (! set_once)
10477 return 0;
10478 break;
10479 case INSN:
10480 {
10481 rtx pat = PATTERN (insn);
10482 if (GET_CODE (pat) != SET)
10483 return 0;
10484 if (rtx_equal_p (x, SET_DEST (pat)))
10485 return set_extends (insn);
10486 if (y && rtx_equal_p (y, SET_DEST (pat)))
10487 return set_extends (insn);
10488 if (reg_overlap_mentioned_p (SET_DEST (pat), y))
10489 return 0;
10490 }
10491 }
10492 }
10493 return 0;
10494 }
10495
10496 /* Output a wide shift instruction in V8+ mode. INSN is the instruction,
10497 OPERANDS are its operands and OPCODE is the mnemonic to be used. */
10498
10499 const char *
output_v8plus_shift(rtx_insn * insn,rtx * operands,const char * opcode)10500 output_v8plus_shift (rtx_insn *insn, rtx *operands, const char *opcode)
10501 {
10502 static char asm_code[60];
10503
10504 /* The scratch register is only required when the destination
10505 register is not a 64-bit global or out register. */
10506 if (which_alternative != 2)
10507 operands[3] = operands[0];
10508
10509 /* We can only shift by constants <= 63. */
10510 if (GET_CODE (operands[2]) == CONST_INT)
10511 operands[2] = GEN_INT (INTVAL (operands[2]) & 0x3f);
10512
10513 if (GET_CODE (operands[1]) == CONST_INT)
10514 {
10515 output_asm_insn ("mov\t%1, %3", operands);
10516 }
10517 else
10518 {
10519 output_asm_insn ("sllx\t%H1, 32, %3", operands);
10520 if (sparc_check_64 (operands[1], insn) <= 0)
10521 output_asm_insn ("srl\t%L1, 0, %L1", operands);
10522 output_asm_insn ("or\t%L1, %3, %3", operands);
10523 }
10524
10525 strcpy (asm_code, opcode);
10526
10527 if (which_alternative != 2)
10528 return strcat (asm_code, "\t%0, %2, %L0\n\tsrlx\t%L0, 32, %H0");
10529 else
10530 return
10531 strcat (asm_code, "\t%3, %2, %3\n\tsrlx\t%3, 32, %H0\n\tmov\t%3, %L0");
10532 }
10533
10534 /* Output rtl to increment the profiler label LABELNO
10535 for profiling a function entry. */
10536
10537 void
sparc_profile_hook(int labelno)10538 sparc_profile_hook (int labelno)
10539 {
10540 char buf[32];
10541 rtx lab, fun;
10542
10543 fun = gen_rtx_SYMBOL_REF (Pmode, MCOUNT_FUNCTION);
10544 if (NO_PROFILE_COUNTERS)
10545 {
10546 emit_library_call (fun, LCT_NORMAL, VOIDmode);
10547 }
10548 else
10549 {
10550 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
10551 lab = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
10552 emit_library_call (fun, LCT_NORMAL, VOIDmode, lab, Pmode);
10553 }
10554 }
10555
10556 #ifdef TARGET_SOLARIS
10557 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
10558
10559 static void
sparc_solaris_elf_asm_named_section(const char * name,unsigned int flags,tree decl ATTRIBUTE_UNUSED)10560 sparc_solaris_elf_asm_named_section (const char *name, unsigned int flags,
10561 tree decl ATTRIBUTE_UNUSED)
10562 {
10563 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
10564 {
10565 solaris_elf_asm_comdat_section (name, flags, decl);
10566 return;
10567 }
10568
10569 fprintf (asm_out_file, "\t.section\t\"%s\"", name);
10570
10571 if (!(flags & SECTION_DEBUG))
10572 fputs (",#alloc", asm_out_file);
10573 #if HAVE_GAS_SECTION_EXCLUDE
10574 if (flags & SECTION_EXCLUDE)
10575 fputs (",#exclude", asm_out_file);
10576 #endif
10577 if (flags & SECTION_WRITE)
10578 fputs (",#write", asm_out_file);
10579 if (flags & SECTION_TLS)
10580 fputs (",#tls", asm_out_file);
10581 if (flags & SECTION_CODE)
10582 fputs (",#execinstr", asm_out_file);
10583
10584 if (flags & SECTION_NOTYPE)
10585 ;
10586 else if (flags & SECTION_BSS)
10587 fputs (",#nobits", asm_out_file);
10588 else
10589 fputs (",#progbits", asm_out_file);
10590
10591 fputc ('\n', asm_out_file);
10592 }
10593 #endif /* TARGET_SOLARIS */
10594
10595 /* We do not allow indirect calls to be optimized into sibling calls.
10596
10597 We cannot use sibling calls when delayed branches are disabled
10598 because they will likely require the call delay slot to be filled.
10599
10600 Also, on SPARC 32-bit we cannot emit a sibling call when the
10601 current function returns a structure. This is because the "unimp
10602 after call" convention would cause the callee to return to the
10603 wrong place. The generic code already disallows cases where the
10604 function being called returns a structure.
10605
10606 It may seem strange how this last case could occur. Usually there
10607 is code after the call which jumps to epilogue code which dumps the
10608 return value into the struct return area. That ought to invalidate
10609 the sibling call right? Well, in the C++ case we can end up passing
10610 the pointer to the struct return area to a constructor (which returns
10611 void) and then nothing else happens. Such a sibling call would look
10612 valid without the added check here.
10613
10614 VxWorks PIC PLT entries require the global pointer to be initialized
10615 on entry. We therefore can't emit sibling calls to them. */
10616 static bool
sparc_function_ok_for_sibcall(tree decl,tree exp ATTRIBUTE_UNUSED)10617 sparc_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
10618 {
10619 return (decl
10620 && flag_delayed_branch
10621 && (TARGET_ARCH64 || ! cfun->returns_struct)
10622 && !(TARGET_VXWORKS_RTP
10623 && flag_pic
10624 && !targetm.binds_local_p (decl)));
10625 }
10626
10627 /* libfunc renaming. */
10628
10629 static void
sparc_init_libfuncs(void)10630 sparc_init_libfuncs (void)
10631 {
10632 if (TARGET_ARCH32)
10633 {
10634 /* Use the subroutines that Sun's library provides for integer
10635 multiply and divide. The `*' prevents an underscore from
10636 being prepended by the compiler. .umul is a little faster
10637 than .mul. */
10638 set_optab_libfunc (smul_optab, SImode, "*.umul");
10639 set_optab_libfunc (sdiv_optab, SImode, "*.div");
10640 set_optab_libfunc (udiv_optab, SImode, "*.udiv");
10641 set_optab_libfunc (smod_optab, SImode, "*.rem");
10642 set_optab_libfunc (umod_optab, SImode, "*.urem");
10643
10644 /* TFmode arithmetic. These names are part of the SPARC 32bit ABI. */
10645 set_optab_libfunc (add_optab, TFmode, "_Q_add");
10646 set_optab_libfunc (sub_optab, TFmode, "_Q_sub");
10647 set_optab_libfunc (neg_optab, TFmode, "_Q_neg");
10648 set_optab_libfunc (smul_optab, TFmode, "_Q_mul");
10649 set_optab_libfunc (sdiv_optab, TFmode, "_Q_div");
10650
10651 /* We can define the TFmode sqrt optab only if TARGET_FPU. This
10652 is because with soft-float, the SFmode and DFmode sqrt
10653 instructions will be absent, and the compiler will notice and
10654 try to use the TFmode sqrt instruction for calls to the
10655 builtin function sqrt, but this fails. */
10656 if (TARGET_FPU)
10657 set_optab_libfunc (sqrt_optab, TFmode, "_Q_sqrt");
10658
10659 set_optab_libfunc (eq_optab, TFmode, "_Q_feq");
10660 set_optab_libfunc (ne_optab, TFmode, "_Q_fne");
10661 set_optab_libfunc (gt_optab, TFmode, "_Q_fgt");
10662 set_optab_libfunc (ge_optab, TFmode, "_Q_fge");
10663 set_optab_libfunc (lt_optab, TFmode, "_Q_flt");
10664 set_optab_libfunc (le_optab, TFmode, "_Q_fle");
10665
10666 set_conv_libfunc (sext_optab, TFmode, SFmode, "_Q_stoq");
10667 set_conv_libfunc (sext_optab, TFmode, DFmode, "_Q_dtoq");
10668 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_Q_qtos");
10669 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_Q_qtod");
10670
10671 set_conv_libfunc (sfix_optab, SImode, TFmode, "_Q_qtoi");
10672 set_conv_libfunc (ufix_optab, SImode, TFmode, "_Q_qtou");
10673 set_conv_libfunc (sfloat_optab, TFmode, SImode, "_Q_itoq");
10674 set_conv_libfunc (ufloat_optab, TFmode, SImode, "_Q_utoq");
10675
10676 if (DITF_CONVERSION_LIBFUNCS)
10677 {
10678 set_conv_libfunc (sfix_optab, DImode, TFmode, "_Q_qtoll");
10679 set_conv_libfunc (ufix_optab, DImode, TFmode, "_Q_qtoull");
10680 set_conv_libfunc (sfloat_optab, TFmode, DImode, "_Q_lltoq");
10681 set_conv_libfunc (ufloat_optab, TFmode, DImode, "_Q_ulltoq");
10682 }
10683
10684 if (SUN_CONVERSION_LIBFUNCS)
10685 {
10686 set_conv_libfunc (sfix_optab, DImode, SFmode, "__ftoll");
10687 set_conv_libfunc (ufix_optab, DImode, SFmode, "__ftoull");
10688 set_conv_libfunc (sfix_optab, DImode, DFmode, "__dtoll");
10689 set_conv_libfunc (ufix_optab, DImode, DFmode, "__dtoull");
10690 }
10691 }
10692 if (TARGET_ARCH64)
10693 {
10694 /* In the SPARC 64bit ABI, SImode multiply and divide functions
10695 do not exist in the library. Make sure the compiler does not
10696 emit calls to them by accident. (It should always use the
10697 hardware instructions.) */
10698 set_optab_libfunc (smul_optab, SImode, 0);
10699 set_optab_libfunc (sdiv_optab, SImode, 0);
10700 set_optab_libfunc (udiv_optab, SImode, 0);
10701 set_optab_libfunc (smod_optab, SImode, 0);
10702 set_optab_libfunc (umod_optab, SImode, 0);
10703
10704 if (SUN_INTEGER_MULTIPLY_64)
10705 {
10706 set_optab_libfunc (smul_optab, DImode, "__mul64");
10707 set_optab_libfunc (sdiv_optab, DImode, "__div64");
10708 set_optab_libfunc (udiv_optab, DImode, "__udiv64");
10709 set_optab_libfunc (smod_optab, DImode, "__rem64");
10710 set_optab_libfunc (umod_optab, DImode, "__urem64");
10711 }
10712
10713 if (SUN_CONVERSION_LIBFUNCS)
10714 {
10715 set_conv_libfunc (sfix_optab, DImode, SFmode, "__ftol");
10716 set_conv_libfunc (ufix_optab, DImode, SFmode, "__ftoul");
10717 set_conv_libfunc (sfix_optab, DImode, DFmode, "__dtol");
10718 set_conv_libfunc (ufix_optab, DImode, DFmode, "__dtoul");
10719 }
10720 }
10721 }
10722
10723 /* SPARC builtins. */
10724 enum sparc_builtins
10725 {
10726 /* FPU builtins. */
10727 SPARC_BUILTIN_LDFSR,
10728 SPARC_BUILTIN_STFSR,
10729
10730 /* VIS 1.0 builtins. */
10731 SPARC_BUILTIN_FPACK16,
10732 SPARC_BUILTIN_FPACK32,
10733 SPARC_BUILTIN_FPACKFIX,
10734 SPARC_BUILTIN_FEXPAND,
10735 SPARC_BUILTIN_FPMERGE,
10736 SPARC_BUILTIN_FMUL8X16,
10737 SPARC_BUILTIN_FMUL8X16AU,
10738 SPARC_BUILTIN_FMUL8X16AL,
10739 SPARC_BUILTIN_FMUL8SUX16,
10740 SPARC_BUILTIN_FMUL8ULX16,
10741 SPARC_BUILTIN_FMULD8SUX16,
10742 SPARC_BUILTIN_FMULD8ULX16,
10743 SPARC_BUILTIN_FALIGNDATAV4HI,
10744 SPARC_BUILTIN_FALIGNDATAV8QI,
10745 SPARC_BUILTIN_FALIGNDATAV2SI,
10746 SPARC_BUILTIN_FALIGNDATADI,
10747 SPARC_BUILTIN_WRGSR,
10748 SPARC_BUILTIN_RDGSR,
10749 SPARC_BUILTIN_ALIGNADDR,
10750 SPARC_BUILTIN_ALIGNADDRL,
10751 SPARC_BUILTIN_PDIST,
10752 SPARC_BUILTIN_EDGE8,
10753 SPARC_BUILTIN_EDGE8L,
10754 SPARC_BUILTIN_EDGE16,
10755 SPARC_BUILTIN_EDGE16L,
10756 SPARC_BUILTIN_EDGE32,
10757 SPARC_BUILTIN_EDGE32L,
10758 SPARC_BUILTIN_FCMPLE16,
10759 SPARC_BUILTIN_FCMPLE32,
10760 SPARC_BUILTIN_FCMPNE16,
10761 SPARC_BUILTIN_FCMPNE32,
10762 SPARC_BUILTIN_FCMPGT16,
10763 SPARC_BUILTIN_FCMPGT32,
10764 SPARC_BUILTIN_FCMPEQ16,
10765 SPARC_BUILTIN_FCMPEQ32,
10766 SPARC_BUILTIN_FPADD16,
10767 SPARC_BUILTIN_FPADD16S,
10768 SPARC_BUILTIN_FPADD32,
10769 SPARC_BUILTIN_FPADD32S,
10770 SPARC_BUILTIN_FPSUB16,
10771 SPARC_BUILTIN_FPSUB16S,
10772 SPARC_BUILTIN_FPSUB32,
10773 SPARC_BUILTIN_FPSUB32S,
10774 SPARC_BUILTIN_ARRAY8,
10775 SPARC_BUILTIN_ARRAY16,
10776 SPARC_BUILTIN_ARRAY32,
10777
10778 /* VIS 2.0 builtins. */
10779 SPARC_BUILTIN_EDGE8N,
10780 SPARC_BUILTIN_EDGE8LN,
10781 SPARC_BUILTIN_EDGE16N,
10782 SPARC_BUILTIN_EDGE16LN,
10783 SPARC_BUILTIN_EDGE32N,
10784 SPARC_BUILTIN_EDGE32LN,
10785 SPARC_BUILTIN_BMASK,
10786 SPARC_BUILTIN_BSHUFFLEV4HI,
10787 SPARC_BUILTIN_BSHUFFLEV8QI,
10788 SPARC_BUILTIN_BSHUFFLEV2SI,
10789 SPARC_BUILTIN_BSHUFFLEDI,
10790
10791 /* VIS 3.0 builtins. */
10792 SPARC_BUILTIN_CMASK8,
10793 SPARC_BUILTIN_CMASK16,
10794 SPARC_BUILTIN_CMASK32,
10795 SPARC_BUILTIN_FCHKSM16,
10796 SPARC_BUILTIN_FSLL16,
10797 SPARC_BUILTIN_FSLAS16,
10798 SPARC_BUILTIN_FSRL16,
10799 SPARC_BUILTIN_FSRA16,
10800 SPARC_BUILTIN_FSLL32,
10801 SPARC_BUILTIN_FSLAS32,
10802 SPARC_BUILTIN_FSRL32,
10803 SPARC_BUILTIN_FSRA32,
10804 SPARC_BUILTIN_PDISTN,
10805 SPARC_BUILTIN_FMEAN16,
10806 SPARC_BUILTIN_FPADD64,
10807 SPARC_BUILTIN_FPSUB64,
10808 SPARC_BUILTIN_FPADDS16,
10809 SPARC_BUILTIN_FPADDS16S,
10810 SPARC_BUILTIN_FPSUBS16,
10811 SPARC_BUILTIN_FPSUBS16S,
10812 SPARC_BUILTIN_FPADDS32,
10813 SPARC_BUILTIN_FPADDS32S,
10814 SPARC_BUILTIN_FPSUBS32,
10815 SPARC_BUILTIN_FPSUBS32S,
10816 SPARC_BUILTIN_FUCMPLE8,
10817 SPARC_BUILTIN_FUCMPNE8,
10818 SPARC_BUILTIN_FUCMPGT8,
10819 SPARC_BUILTIN_FUCMPEQ8,
10820 SPARC_BUILTIN_FHADDS,
10821 SPARC_BUILTIN_FHADDD,
10822 SPARC_BUILTIN_FHSUBS,
10823 SPARC_BUILTIN_FHSUBD,
10824 SPARC_BUILTIN_FNHADDS,
10825 SPARC_BUILTIN_FNHADDD,
10826 SPARC_BUILTIN_UMULXHI,
10827 SPARC_BUILTIN_XMULX,
10828 SPARC_BUILTIN_XMULXHI,
10829
10830 /* VIS 4.0 builtins. */
10831 SPARC_BUILTIN_FPADD8,
10832 SPARC_BUILTIN_FPADDS8,
10833 SPARC_BUILTIN_FPADDUS8,
10834 SPARC_BUILTIN_FPADDUS16,
10835 SPARC_BUILTIN_FPCMPLE8,
10836 SPARC_BUILTIN_FPCMPGT8,
10837 SPARC_BUILTIN_FPCMPULE16,
10838 SPARC_BUILTIN_FPCMPUGT16,
10839 SPARC_BUILTIN_FPCMPULE32,
10840 SPARC_BUILTIN_FPCMPUGT32,
10841 SPARC_BUILTIN_FPMAX8,
10842 SPARC_BUILTIN_FPMAX16,
10843 SPARC_BUILTIN_FPMAX32,
10844 SPARC_BUILTIN_FPMAXU8,
10845 SPARC_BUILTIN_FPMAXU16,
10846 SPARC_BUILTIN_FPMAXU32,
10847 SPARC_BUILTIN_FPMIN8,
10848 SPARC_BUILTIN_FPMIN16,
10849 SPARC_BUILTIN_FPMIN32,
10850 SPARC_BUILTIN_FPMINU8,
10851 SPARC_BUILTIN_FPMINU16,
10852 SPARC_BUILTIN_FPMINU32,
10853 SPARC_BUILTIN_FPSUB8,
10854 SPARC_BUILTIN_FPSUBS8,
10855 SPARC_BUILTIN_FPSUBUS8,
10856 SPARC_BUILTIN_FPSUBUS16,
10857
10858 /* VIS 4.0B builtins. */
10859
10860 /* Note that all the DICTUNPACK* entries should be kept
10861 contiguous. */
10862 SPARC_BUILTIN_FIRST_DICTUNPACK,
10863 SPARC_BUILTIN_DICTUNPACK8 = SPARC_BUILTIN_FIRST_DICTUNPACK,
10864 SPARC_BUILTIN_DICTUNPACK16,
10865 SPARC_BUILTIN_DICTUNPACK32,
10866 SPARC_BUILTIN_LAST_DICTUNPACK = SPARC_BUILTIN_DICTUNPACK32,
10867
10868 /* Note that all the FPCMP*SHL entries should be kept
10869 contiguous. */
10870 SPARC_BUILTIN_FIRST_FPCMPSHL,
10871 SPARC_BUILTIN_FPCMPLE8SHL = SPARC_BUILTIN_FIRST_FPCMPSHL,
10872 SPARC_BUILTIN_FPCMPGT8SHL,
10873 SPARC_BUILTIN_FPCMPEQ8SHL,
10874 SPARC_BUILTIN_FPCMPNE8SHL,
10875 SPARC_BUILTIN_FPCMPLE16SHL,
10876 SPARC_BUILTIN_FPCMPGT16SHL,
10877 SPARC_BUILTIN_FPCMPEQ16SHL,
10878 SPARC_BUILTIN_FPCMPNE16SHL,
10879 SPARC_BUILTIN_FPCMPLE32SHL,
10880 SPARC_BUILTIN_FPCMPGT32SHL,
10881 SPARC_BUILTIN_FPCMPEQ32SHL,
10882 SPARC_BUILTIN_FPCMPNE32SHL,
10883 SPARC_BUILTIN_FPCMPULE8SHL,
10884 SPARC_BUILTIN_FPCMPUGT8SHL,
10885 SPARC_BUILTIN_FPCMPULE16SHL,
10886 SPARC_BUILTIN_FPCMPUGT16SHL,
10887 SPARC_BUILTIN_FPCMPULE32SHL,
10888 SPARC_BUILTIN_FPCMPUGT32SHL,
10889 SPARC_BUILTIN_FPCMPDE8SHL,
10890 SPARC_BUILTIN_FPCMPDE16SHL,
10891 SPARC_BUILTIN_FPCMPDE32SHL,
10892 SPARC_BUILTIN_FPCMPUR8SHL,
10893 SPARC_BUILTIN_FPCMPUR16SHL,
10894 SPARC_BUILTIN_FPCMPUR32SHL,
10895 SPARC_BUILTIN_LAST_FPCMPSHL = SPARC_BUILTIN_FPCMPUR32SHL,
10896
10897 SPARC_BUILTIN_MAX
10898 };
10899
10900 static GTY (()) tree sparc_builtins[(int) SPARC_BUILTIN_MAX];
10901 static enum insn_code sparc_builtins_icode[(int) SPARC_BUILTIN_MAX];
10902
10903 /* Return true if OPVAL can be used for operand OPNUM of instruction ICODE.
10904 The instruction should require a constant operand of some sort. The
10905 function prints an error if OPVAL is not valid. */
10906
10907 static int
check_constant_argument(enum insn_code icode,int opnum,rtx opval)10908 check_constant_argument (enum insn_code icode, int opnum, rtx opval)
10909 {
10910 if (GET_CODE (opval) != CONST_INT)
10911 {
10912 error ("%qs expects a constant argument", insn_data[icode].name);
10913 return false;
10914 }
10915
10916 if (!(*insn_data[icode].operand[opnum].predicate) (opval, VOIDmode))
10917 {
10918 error ("constant argument out of range for %qs", insn_data[icode].name);
10919 return false;
10920 }
10921 return true;
10922 }
10923
10924 /* Add a SPARC builtin function with NAME, ICODE, CODE and TYPE. Return the
10925 function decl or NULL_TREE if the builtin was not added. */
10926
10927 static tree
def_builtin(const char * name,enum insn_code icode,enum sparc_builtins code,tree type)10928 def_builtin (const char *name, enum insn_code icode, enum sparc_builtins code,
10929 tree type)
10930 {
10931 tree t
10932 = add_builtin_function (name, type, code, BUILT_IN_MD, NULL, NULL_TREE);
10933
10934 if (t)
10935 {
10936 sparc_builtins[code] = t;
10937 sparc_builtins_icode[code] = icode;
10938 }
10939
10940 return t;
10941 }
10942
10943 /* Likewise, but also marks the function as "const". */
10944
10945 static tree
def_builtin_const(const char * name,enum insn_code icode,enum sparc_builtins code,tree type)10946 def_builtin_const (const char *name, enum insn_code icode,
10947 enum sparc_builtins code, tree type)
10948 {
10949 tree t = def_builtin (name, icode, code, type);
10950
10951 if (t)
10952 TREE_READONLY (t) = 1;
10953
10954 return t;
10955 }
10956
10957 /* Implement the TARGET_INIT_BUILTINS target hook.
10958 Create builtin functions for special SPARC instructions. */
10959
10960 static void
sparc_init_builtins(void)10961 sparc_init_builtins (void)
10962 {
10963 if (TARGET_FPU)
10964 sparc_fpu_init_builtins ();
10965
10966 if (TARGET_VIS)
10967 sparc_vis_init_builtins ();
10968 }
10969
10970 /* Create builtin functions for FPU instructions. */
10971
10972 static void
sparc_fpu_init_builtins(void)10973 sparc_fpu_init_builtins (void)
10974 {
10975 tree ftype
10976 = build_function_type_list (void_type_node,
10977 build_pointer_type (unsigned_type_node), 0);
10978 def_builtin ("__builtin_load_fsr", CODE_FOR_ldfsr,
10979 SPARC_BUILTIN_LDFSR, ftype);
10980 def_builtin ("__builtin_store_fsr", CODE_FOR_stfsr,
10981 SPARC_BUILTIN_STFSR, ftype);
10982 }
10983
10984 /* Create builtin functions for VIS instructions. */
10985
10986 static void
sparc_vis_init_builtins(void)10987 sparc_vis_init_builtins (void)
10988 {
10989 tree v4qi = build_vector_type (unsigned_intQI_type_node, 4);
10990 tree v8qi = build_vector_type (unsigned_intQI_type_node, 8);
10991 tree v4hi = build_vector_type (intHI_type_node, 4);
10992 tree v2hi = build_vector_type (intHI_type_node, 2);
10993 tree v2si = build_vector_type (intSI_type_node, 2);
10994 tree v1si = build_vector_type (intSI_type_node, 1);
10995
10996 tree v4qi_ftype_v4hi = build_function_type_list (v4qi, v4hi, 0);
10997 tree v8qi_ftype_v2si_v8qi = build_function_type_list (v8qi, v2si, v8qi, 0);
10998 tree v2hi_ftype_v2si = build_function_type_list (v2hi, v2si, 0);
10999 tree v4hi_ftype_v4qi = build_function_type_list (v4hi, v4qi, 0);
11000 tree v8qi_ftype_v4qi_v4qi = build_function_type_list (v8qi, v4qi, v4qi, 0);
11001 tree v4hi_ftype_v4qi_v4hi = build_function_type_list (v4hi, v4qi, v4hi, 0);
11002 tree v4hi_ftype_v4qi_v2hi = build_function_type_list (v4hi, v4qi, v2hi, 0);
11003 tree v2si_ftype_v4qi_v2hi = build_function_type_list (v2si, v4qi, v2hi, 0);
11004 tree v4hi_ftype_v8qi_v4hi = build_function_type_list (v4hi, v8qi, v4hi, 0);
11005 tree v4hi_ftype_v4hi_v4hi = build_function_type_list (v4hi, v4hi, v4hi, 0);
11006 tree v2si_ftype_v2si_v2si = build_function_type_list (v2si, v2si, v2si, 0);
11007 tree v8qi_ftype_v8qi_v8qi = build_function_type_list (v8qi, v8qi, v8qi, 0);
11008 tree v2hi_ftype_v2hi_v2hi = build_function_type_list (v2hi, v2hi, v2hi, 0);
11009 tree v1si_ftype_v1si_v1si = build_function_type_list (v1si, v1si, v1si, 0);
11010 tree di_ftype_v8qi_v8qi_di = build_function_type_list (intDI_type_node,
11011 v8qi, v8qi,
11012 intDI_type_node, 0);
11013 tree di_ftype_v8qi_v8qi = build_function_type_list (intDI_type_node,
11014 v8qi, v8qi, 0);
11015 tree si_ftype_v8qi_v8qi = build_function_type_list (intSI_type_node,
11016 v8qi, v8qi, 0);
11017 tree v8qi_ftype_df_si = build_function_type_list (v8qi, double_type_node,
11018 intSI_type_node, 0);
11019 tree v4hi_ftype_df_si = build_function_type_list (v4hi, double_type_node,
11020 intSI_type_node, 0);
11021 tree v2si_ftype_df_si = build_function_type_list (v2si, double_type_node,
11022 intDI_type_node, 0);
11023 tree di_ftype_di_di = build_function_type_list (intDI_type_node,
11024 intDI_type_node,
11025 intDI_type_node, 0);
11026 tree si_ftype_si_si = build_function_type_list (intSI_type_node,
11027 intSI_type_node,
11028 intSI_type_node, 0);
11029 tree ptr_ftype_ptr_si = build_function_type_list (ptr_type_node,
11030 ptr_type_node,
11031 intSI_type_node, 0);
11032 tree ptr_ftype_ptr_di = build_function_type_list (ptr_type_node,
11033 ptr_type_node,
11034 intDI_type_node, 0);
11035 tree si_ftype_ptr_ptr = build_function_type_list (intSI_type_node,
11036 ptr_type_node,
11037 ptr_type_node, 0);
11038 tree di_ftype_ptr_ptr = build_function_type_list (intDI_type_node,
11039 ptr_type_node,
11040 ptr_type_node, 0);
11041 tree si_ftype_v4hi_v4hi = build_function_type_list (intSI_type_node,
11042 v4hi, v4hi, 0);
11043 tree si_ftype_v2si_v2si = build_function_type_list (intSI_type_node,
11044 v2si, v2si, 0);
11045 tree di_ftype_v4hi_v4hi = build_function_type_list (intDI_type_node,
11046 v4hi, v4hi, 0);
11047 tree di_ftype_v2si_v2si = build_function_type_list (intDI_type_node,
11048 v2si, v2si, 0);
11049 tree void_ftype_di = build_function_type_list (void_type_node,
11050 intDI_type_node, 0);
11051 tree di_ftype_void = build_function_type_list (intDI_type_node,
11052 void_type_node, 0);
11053 tree void_ftype_si = build_function_type_list (void_type_node,
11054 intSI_type_node, 0);
11055 tree sf_ftype_sf_sf = build_function_type_list (float_type_node,
11056 float_type_node,
11057 float_type_node, 0);
11058 tree df_ftype_df_df = build_function_type_list (double_type_node,
11059 double_type_node,
11060 double_type_node, 0);
11061
11062 /* Packing and expanding vectors. */
11063 def_builtin ("__builtin_vis_fpack16", CODE_FOR_fpack16_vis,
11064 SPARC_BUILTIN_FPACK16, v4qi_ftype_v4hi);
11065 def_builtin ("__builtin_vis_fpack32", CODE_FOR_fpack32_vis,
11066 SPARC_BUILTIN_FPACK32, v8qi_ftype_v2si_v8qi);
11067 def_builtin ("__builtin_vis_fpackfix", CODE_FOR_fpackfix_vis,
11068 SPARC_BUILTIN_FPACKFIX, v2hi_ftype_v2si);
11069 def_builtin_const ("__builtin_vis_fexpand", CODE_FOR_fexpand_vis,
11070 SPARC_BUILTIN_FEXPAND, v4hi_ftype_v4qi);
11071 def_builtin_const ("__builtin_vis_fpmerge", CODE_FOR_fpmerge_vis,
11072 SPARC_BUILTIN_FPMERGE, v8qi_ftype_v4qi_v4qi);
11073
11074 /* Multiplications. */
11075 def_builtin_const ("__builtin_vis_fmul8x16", CODE_FOR_fmul8x16_vis,
11076 SPARC_BUILTIN_FMUL8X16, v4hi_ftype_v4qi_v4hi);
11077 def_builtin_const ("__builtin_vis_fmul8x16au", CODE_FOR_fmul8x16au_vis,
11078 SPARC_BUILTIN_FMUL8X16AU, v4hi_ftype_v4qi_v2hi);
11079 def_builtin_const ("__builtin_vis_fmul8x16al", CODE_FOR_fmul8x16al_vis,
11080 SPARC_BUILTIN_FMUL8X16AL, v4hi_ftype_v4qi_v2hi);
11081 def_builtin_const ("__builtin_vis_fmul8sux16", CODE_FOR_fmul8sux16_vis,
11082 SPARC_BUILTIN_FMUL8SUX16, v4hi_ftype_v8qi_v4hi);
11083 def_builtin_const ("__builtin_vis_fmul8ulx16", CODE_FOR_fmul8ulx16_vis,
11084 SPARC_BUILTIN_FMUL8ULX16, v4hi_ftype_v8qi_v4hi);
11085 def_builtin_const ("__builtin_vis_fmuld8sux16", CODE_FOR_fmuld8sux16_vis,
11086 SPARC_BUILTIN_FMULD8SUX16, v2si_ftype_v4qi_v2hi);
11087 def_builtin_const ("__builtin_vis_fmuld8ulx16", CODE_FOR_fmuld8ulx16_vis,
11088 SPARC_BUILTIN_FMULD8ULX16, v2si_ftype_v4qi_v2hi);
11089
11090 /* Data aligning. */
11091 def_builtin ("__builtin_vis_faligndatav4hi", CODE_FOR_faligndatav4hi_vis,
11092 SPARC_BUILTIN_FALIGNDATAV4HI, v4hi_ftype_v4hi_v4hi);
11093 def_builtin ("__builtin_vis_faligndatav8qi", CODE_FOR_faligndatav8qi_vis,
11094 SPARC_BUILTIN_FALIGNDATAV8QI, v8qi_ftype_v8qi_v8qi);
11095 def_builtin ("__builtin_vis_faligndatav2si", CODE_FOR_faligndatav2si_vis,
11096 SPARC_BUILTIN_FALIGNDATAV2SI, v2si_ftype_v2si_v2si);
11097 def_builtin ("__builtin_vis_faligndatadi", CODE_FOR_faligndatav1di_vis,
11098 SPARC_BUILTIN_FALIGNDATADI, di_ftype_di_di);
11099
11100 def_builtin ("__builtin_vis_write_gsr", CODE_FOR_wrgsr_vis,
11101 SPARC_BUILTIN_WRGSR, void_ftype_di);
11102 def_builtin ("__builtin_vis_read_gsr", CODE_FOR_rdgsr_vis,
11103 SPARC_BUILTIN_RDGSR, di_ftype_void);
11104
11105 if (TARGET_ARCH64)
11106 {
11107 def_builtin ("__builtin_vis_alignaddr", CODE_FOR_alignaddrdi_vis,
11108 SPARC_BUILTIN_ALIGNADDR, ptr_ftype_ptr_di);
11109 def_builtin ("__builtin_vis_alignaddrl", CODE_FOR_alignaddrldi_vis,
11110 SPARC_BUILTIN_ALIGNADDRL, ptr_ftype_ptr_di);
11111 }
11112 else
11113 {
11114 def_builtin ("__builtin_vis_alignaddr", CODE_FOR_alignaddrsi_vis,
11115 SPARC_BUILTIN_ALIGNADDR, ptr_ftype_ptr_si);
11116 def_builtin ("__builtin_vis_alignaddrl", CODE_FOR_alignaddrlsi_vis,
11117 SPARC_BUILTIN_ALIGNADDRL, ptr_ftype_ptr_si);
11118 }
11119
11120 /* Pixel distance. */
11121 def_builtin_const ("__builtin_vis_pdist", CODE_FOR_pdist_vis,
11122 SPARC_BUILTIN_PDIST, di_ftype_v8qi_v8qi_di);
11123
11124 /* Edge handling. */
11125 if (TARGET_ARCH64)
11126 {
11127 def_builtin_const ("__builtin_vis_edge8", CODE_FOR_edge8di_vis,
11128 SPARC_BUILTIN_EDGE8, di_ftype_ptr_ptr);
11129 def_builtin_const ("__builtin_vis_edge8l", CODE_FOR_edge8ldi_vis,
11130 SPARC_BUILTIN_EDGE8L, di_ftype_ptr_ptr);
11131 def_builtin_const ("__builtin_vis_edge16", CODE_FOR_edge16di_vis,
11132 SPARC_BUILTIN_EDGE16, di_ftype_ptr_ptr);
11133 def_builtin_const ("__builtin_vis_edge16l", CODE_FOR_edge16ldi_vis,
11134 SPARC_BUILTIN_EDGE16L, di_ftype_ptr_ptr);
11135 def_builtin_const ("__builtin_vis_edge32", CODE_FOR_edge32di_vis,
11136 SPARC_BUILTIN_EDGE32, di_ftype_ptr_ptr);
11137 def_builtin_const ("__builtin_vis_edge32l", CODE_FOR_edge32ldi_vis,
11138 SPARC_BUILTIN_EDGE32L, di_ftype_ptr_ptr);
11139 }
11140 else
11141 {
11142 def_builtin_const ("__builtin_vis_edge8", CODE_FOR_edge8si_vis,
11143 SPARC_BUILTIN_EDGE8, si_ftype_ptr_ptr);
11144 def_builtin_const ("__builtin_vis_edge8l", CODE_FOR_edge8lsi_vis,
11145 SPARC_BUILTIN_EDGE8L, si_ftype_ptr_ptr);
11146 def_builtin_const ("__builtin_vis_edge16", CODE_FOR_edge16si_vis,
11147 SPARC_BUILTIN_EDGE16, si_ftype_ptr_ptr);
11148 def_builtin_const ("__builtin_vis_edge16l", CODE_FOR_edge16lsi_vis,
11149 SPARC_BUILTIN_EDGE16L, si_ftype_ptr_ptr);
11150 def_builtin_const ("__builtin_vis_edge32", CODE_FOR_edge32si_vis,
11151 SPARC_BUILTIN_EDGE32, si_ftype_ptr_ptr);
11152 def_builtin_const ("__builtin_vis_edge32l", CODE_FOR_edge32lsi_vis,
11153 SPARC_BUILTIN_EDGE32L, si_ftype_ptr_ptr);
11154 }
11155
11156 /* Pixel compare. */
11157 if (TARGET_ARCH64)
11158 {
11159 def_builtin_const ("__builtin_vis_fcmple16", CODE_FOR_fcmple16di_vis,
11160 SPARC_BUILTIN_FCMPLE16, di_ftype_v4hi_v4hi);
11161 def_builtin_const ("__builtin_vis_fcmple32", CODE_FOR_fcmple32di_vis,
11162 SPARC_BUILTIN_FCMPLE32, di_ftype_v2si_v2si);
11163 def_builtin_const ("__builtin_vis_fcmpne16", CODE_FOR_fcmpne16di_vis,
11164 SPARC_BUILTIN_FCMPNE16, di_ftype_v4hi_v4hi);
11165 def_builtin_const ("__builtin_vis_fcmpne32", CODE_FOR_fcmpne32di_vis,
11166 SPARC_BUILTIN_FCMPNE32, di_ftype_v2si_v2si);
11167 def_builtin_const ("__builtin_vis_fcmpgt16", CODE_FOR_fcmpgt16di_vis,
11168 SPARC_BUILTIN_FCMPGT16, di_ftype_v4hi_v4hi);
11169 def_builtin_const ("__builtin_vis_fcmpgt32", CODE_FOR_fcmpgt32di_vis,
11170 SPARC_BUILTIN_FCMPGT32, di_ftype_v2si_v2si);
11171 def_builtin_const ("__builtin_vis_fcmpeq16", CODE_FOR_fcmpeq16di_vis,
11172 SPARC_BUILTIN_FCMPEQ16, di_ftype_v4hi_v4hi);
11173 def_builtin_const ("__builtin_vis_fcmpeq32", CODE_FOR_fcmpeq32di_vis,
11174 SPARC_BUILTIN_FCMPEQ32, di_ftype_v2si_v2si);
11175 }
11176 else
11177 {
11178 def_builtin_const ("__builtin_vis_fcmple16", CODE_FOR_fcmple16si_vis,
11179 SPARC_BUILTIN_FCMPLE16, si_ftype_v4hi_v4hi);
11180 def_builtin_const ("__builtin_vis_fcmple32", CODE_FOR_fcmple32si_vis,
11181 SPARC_BUILTIN_FCMPLE32, si_ftype_v2si_v2si);
11182 def_builtin_const ("__builtin_vis_fcmpne16", CODE_FOR_fcmpne16si_vis,
11183 SPARC_BUILTIN_FCMPNE16, si_ftype_v4hi_v4hi);
11184 def_builtin_const ("__builtin_vis_fcmpne32", CODE_FOR_fcmpne32si_vis,
11185 SPARC_BUILTIN_FCMPNE32, si_ftype_v2si_v2si);
11186 def_builtin_const ("__builtin_vis_fcmpgt16", CODE_FOR_fcmpgt16si_vis,
11187 SPARC_BUILTIN_FCMPGT16, si_ftype_v4hi_v4hi);
11188 def_builtin_const ("__builtin_vis_fcmpgt32", CODE_FOR_fcmpgt32si_vis,
11189 SPARC_BUILTIN_FCMPGT32, si_ftype_v2si_v2si);
11190 def_builtin_const ("__builtin_vis_fcmpeq16", CODE_FOR_fcmpeq16si_vis,
11191 SPARC_BUILTIN_FCMPEQ16, si_ftype_v4hi_v4hi);
11192 def_builtin_const ("__builtin_vis_fcmpeq32", CODE_FOR_fcmpeq32si_vis,
11193 SPARC_BUILTIN_FCMPEQ32, si_ftype_v2si_v2si);
11194 }
11195
11196 /* Addition and subtraction. */
11197 def_builtin_const ("__builtin_vis_fpadd16", CODE_FOR_addv4hi3,
11198 SPARC_BUILTIN_FPADD16, v4hi_ftype_v4hi_v4hi);
11199 def_builtin_const ("__builtin_vis_fpadd16s", CODE_FOR_addv2hi3,
11200 SPARC_BUILTIN_FPADD16S, v2hi_ftype_v2hi_v2hi);
11201 def_builtin_const ("__builtin_vis_fpadd32", CODE_FOR_addv2si3,
11202 SPARC_BUILTIN_FPADD32, v2si_ftype_v2si_v2si);
11203 def_builtin_const ("__builtin_vis_fpadd32s", CODE_FOR_addv1si3,
11204 SPARC_BUILTIN_FPADD32S, v1si_ftype_v1si_v1si);
11205 def_builtin_const ("__builtin_vis_fpsub16", CODE_FOR_subv4hi3,
11206 SPARC_BUILTIN_FPSUB16, v4hi_ftype_v4hi_v4hi);
11207 def_builtin_const ("__builtin_vis_fpsub16s", CODE_FOR_subv2hi3,
11208 SPARC_BUILTIN_FPSUB16S, v2hi_ftype_v2hi_v2hi);
11209 def_builtin_const ("__builtin_vis_fpsub32", CODE_FOR_subv2si3,
11210 SPARC_BUILTIN_FPSUB32, v2si_ftype_v2si_v2si);
11211 def_builtin_const ("__builtin_vis_fpsub32s", CODE_FOR_subv1si3,
11212 SPARC_BUILTIN_FPSUB32S, v1si_ftype_v1si_v1si);
11213
11214 /* Three-dimensional array addressing. */
11215 if (TARGET_ARCH64)
11216 {
11217 def_builtin_const ("__builtin_vis_array8", CODE_FOR_array8di_vis,
11218 SPARC_BUILTIN_ARRAY8, di_ftype_di_di);
11219 def_builtin_const ("__builtin_vis_array16", CODE_FOR_array16di_vis,
11220 SPARC_BUILTIN_ARRAY16, di_ftype_di_di);
11221 def_builtin_const ("__builtin_vis_array32", CODE_FOR_array32di_vis,
11222 SPARC_BUILTIN_ARRAY32, di_ftype_di_di);
11223 }
11224 else
11225 {
11226 def_builtin_const ("__builtin_vis_array8", CODE_FOR_array8si_vis,
11227 SPARC_BUILTIN_ARRAY8, si_ftype_si_si);
11228 def_builtin_const ("__builtin_vis_array16", CODE_FOR_array16si_vis,
11229 SPARC_BUILTIN_ARRAY16, si_ftype_si_si);
11230 def_builtin_const ("__builtin_vis_array32", CODE_FOR_array32si_vis,
11231 SPARC_BUILTIN_ARRAY32, si_ftype_si_si);
11232 }
11233
11234 if (TARGET_VIS2)
11235 {
11236 /* Edge handling. */
11237 if (TARGET_ARCH64)
11238 {
11239 def_builtin_const ("__builtin_vis_edge8n", CODE_FOR_edge8ndi_vis,
11240 SPARC_BUILTIN_EDGE8N, di_ftype_ptr_ptr);
11241 def_builtin_const ("__builtin_vis_edge8ln", CODE_FOR_edge8lndi_vis,
11242 SPARC_BUILTIN_EDGE8LN, di_ftype_ptr_ptr);
11243 def_builtin_const ("__builtin_vis_edge16n", CODE_FOR_edge16ndi_vis,
11244 SPARC_BUILTIN_EDGE16N, di_ftype_ptr_ptr);
11245 def_builtin_const ("__builtin_vis_edge16ln", CODE_FOR_edge16lndi_vis,
11246 SPARC_BUILTIN_EDGE16LN, di_ftype_ptr_ptr);
11247 def_builtin_const ("__builtin_vis_edge32n", CODE_FOR_edge32ndi_vis,
11248 SPARC_BUILTIN_EDGE32N, di_ftype_ptr_ptr);
11249 def_builtin_const ("__builtin_vis_edge32ln", CODE_FOR_edge32lndi_vis,
11250 SPARC_BUILTIN_EDGE32LN, di_ftype_ptr_ptr);
11251 }
11252 else
11253 {
11254 def_builtin_const ("__builtin_vis_edge8n", CODE_FOR_edge8nsi_vis,
11255 SPARC_BUILTIN_EDGE8N, si_ftype_ptr_ptr);
11256 def_builtin_const ("__builtin_vis_edge8ln", CODE_FOR_edge8lnsi_vis,
11257 SPARC_BUILTIN_EDGE8LN, si_ftype_ptr_ptr);
11258 def_builtin_const ("__builtin_vis_edge16n", CODE_FOR_edge16nsi_vis,
11259 SPARC_BUILTIN_EDGE16N, si_ftype_ptr_ptr);
11260 def_builtin_const ("__builtin_vis_edge16ln", CODE_FOR_edge16lnsi_vis,
11261 SPARC_BUILTIN_EDGE16LN, si_ftype_ptr_ptr);
11262 def_builtin_const ("__builtin_vis_edge32n", CODE_FOR_edge32nsi_vis,
11263 SPARC_BUILTIN_EDGE32N, si_ftype_ptr_ptr);
11264 def_builtin_const ("__builtin_vis_edge32ln", CODE_FOR_edge32lnsi_vis,
11265 SPARC_BUILTIN_EDGE32LN, si_ftype_ptr_ptr);
11266 }
11267
11268 /* Byte mask and shuffle. */
11269 if (TARGET_ARCH64)
11270 def_builtin ("__builtin_vis_bmask", CODE_FOR_bmaskdi_vis,
11271 SPARC_BUILTIN_BMASK, di_ftype_di_di);
11272 else
11273 def_builtin ("__builtin_vis_bmask", CODE_FOR_bmasksi_vis,
11274 SPARC_BUILTIN_BMASK, si_ftype_si_si);
11275 def_builtin ("__builtin_vis_bshufflev4hi", CODE_FOR_bshufflev4hi_vis,
11276 SPARC_BUILTIN_BSHUFFLEV4HI, v4hi_ftype_v4hi_v4hi);
11277 def_builtin ("__builtin_vis_bshufflev8qi", CODE_FOR_bshufflev8qi_vis,
11278 SPARC_BUILTIN_BSHUFFLEV8QI, v8qi_ftype_v8qi_v8qi);
11279 def_builtin ("__builtin_vis_bshufflev2si", CODE_FOR_bshufflev2si_vis,
11280 SPARC_BUILTIN_BSHUFFLEV2SI, v2si_ftype_v2si_v2si);
11281 def_builtin ("__builtin_vis_bshuffledi", CODE_FOR_bshufflev1di_vis,
11282 SPARC_BUILTIN_BSHUFFLEDI, di_ftype_di_di);
11283 }
11284
11285 if (TARGET_VIS3)
11286 {
11287 if (TARGET_ARCH64)
11288 {
11289 def_builtin ("__builtin_vis_cmask8", CODE_FOR_cmask8di_vis,
11290 SPARC_BUILTIN_CMASK8, void_ftype_di);
11291 def_builtin ("__builtin_vis_cmask16", CODE_FOR_cmask16di_vis,
11292 SPARC_BUILTIN_CMASK16, void_ftype_di);
11293 def_builtin ("__builtin_vis_cmask32", CODE_FOR_cmask32di_vis,
11294 SPARC_BUILTIN_CMASK32, void_ftype_di);
11295 }
11296 else
11297 {
11298 def_builtin ("__builtin_vis_cmask8", CODE_FOR_cmask8si_vis,
11299 SPARC_BUILTIN_CMASK8, void_ftype_si);
11300 def_builtin ("__builtin_vis_cmask16", CODE_FOR_cmask16si_vis,
11301 SPARC_BUILTIN_CMASK16, void_ftype_si);
11302 def_builtin ("__builtin_vis_cmask32", CODE_FOR_cmask32si_vis,
11303 SPARC_BUILTIN_CMASK32, void_ftype_si);
11304 }
11305
11306 def_builtin_const ("__builtin_vis_fchksm16", CODE_FOR_fchksm16_vis,
11307 SPARC_BUILTIN_FCHKSM16, v4hi_ftype_v4hi_v4hi);
11308
11309 def_builtin_const ("__builtin_vis_fsll16", CODE_FOR_vashlv4hi3,
11310 SPARC_BUILTIN_FSLL16, v4hi_ftype_v4hi_v4hi);
11311 def_builtin_const ("__builtin_vis_fslas16", CODE_FOR_vssashlv4hi3,
11312 SPARC_BUILTIN_FSLAS16, v4hi_ftype_v4hi_v4hi);
11313 def_builtin_const ("__builtin_vis_fsrl16", CODE_FOR_vlshrv4hi3,
11314 SPARC_BUILTIN_FSRL16, v4hi_ftype_v4hi_v4hi);
11315 def_builtin_const ("__builtin_vis_fsra16", CODE_FOR_vashrv4hi3,
11316 SPARC_BUILTIN_FSRA16, v4hi_ftype_v4hi_v4hi);
11317 def_builtin_const ("__builtin_vis_fsll32", CODE_FOR_vashlv2si3,
11318 SPARC_BUILTIN_FSLL32, v2si_ftype_v2si_v2si);
11319 def_builtin_const ("__builtin_vis_fslas32", CODE_FOR_vssashlv2si3,
11320 SPARC_BUILTIN_FSLAS32, v2si_ftype_v2si_v2si);
11321 def_builtin_const ("__builtin_vis_fsrl32", CODE_FOR_vlshrv2si3,
11322 SPARC_BUILTIN_FSRL32, v2si_ftype_v2si_v2si);
11323 def_builtin_const ("__builtin_vis_fsra32", CODE_FOR_vashrv2si3,
11324 SPARC_BUILTIN_FSRA32, v2si_ftype_v2si_v2si);
11325
11326 if (TARGET_ARCH64)
11327 def_builtin_const ("__builtin_vis_pdistn", CODE_FOR_pdistndi_vis,
11328 SPARC_BUILTIN_PDISTN, di_ftype_v8qi_v8qi);
11329 else
11330 def_builtin_const ("__builtin_vis_pdistn", CODE_FOR_pdistnsi_vis,
11331 SPARC_BUILTIN_PDISTN, si_ftype_v8qi_v8qi);
11332
11333 def_builtin_const ("__builtin_vis_fmean16", CODE_FOR_fmean16_vis,
11334 SPARC_BUILTIN_FMEAN16, v4hi_ftype_v4hi_v4hi);
11335 def_builtin_const ("__builtin_vis_fpadd64", CODE_FOR_fpadd64_vis,
11336 SPARC_BUILTIN_FPADD64, di_ftype_di_di);
11337 def_builtin_const ("__builtin_vis_fpsub64", CODE_FOR_fpsub64_vis,
11338 SPARC_BUILTIN_FPSUB64, di_ftype_di_di);
11339
11340 def_builtin_const ("__builtin_vis_fpadds16", CODE_FOR_ssaddv4hi3,
11341 SPARC_BUILTIN_FPADDS16, v4hi_ftype_v4hi_v4hi);
11342 def_builtin_const ("__builtin_vis_fpadds16s", CODE_FOR_ssaddv2hi3,
11343 SPARC_BUILTIN_FPADDS16S, v2hi_ftype_v2hi_v2hi);
11344 def_builtin_const ("__builtin_vis_fpsubs16", CODE_FOR_sssubv4hi3,
11345 SPARC_BUILTIN_FPSUBS16, v4hi_ftype_v4hi_v4hi);
11346 def_builtin_const ("__builtin_vis_fpsubs16s", CODE_FOR_sssubv2hi3,
11347 SPARC_BUILTIN_FPSUBS16S, v2hi_ftype_v2hi_v2hi);
11348 def_builtin_const ("__builtin_vis_fpadds32", CODE_FOR_ssaddv2si3,
11349 SPARC_BUILTIN_FPADDS32, v2si_ftype_v2si_v2si);
11350 def_builtin_const ("__builtin_vis_fpadds32s", CODE_FOR_ssaddv1si3,
11351 SPARC_BUILTIN_FPADDS32S, v1si_ftype_v1si_v1si);
11352 def_builtin_const ("__builtin_vis_fpsubs32", CODE_FOR_sssubv2si3,
11353 SPARC_BUILTIN_FPSUBS32, v2si_ftype_v2si_v2si);
11354 def_builtin_const ("__builtin_vis_fpsubs32s", CODE_FOR_sssubv1si3,
11355 SPARC_BUILTIN_FPSUBS32S, v1si_ftype_v1si_v1si);
11356
11357 if (TARGET_ARCH64)
11358 {
11359 def_builtin_const ("__builtin_vis_fucmple8", CODE_FOR_fucmple8di_vis,
11360 SPARC_BUILTIN_FUCMPLE8, di_ftype_v8qi_v8qi);
11361 def_builtin_const ("__builtin_vis_fucmpne8", CODE_FOR_fucmpne8di_vis,
11362 SPARC_BUILTIN_FUCMPNE8, di_ftype_v8qi_v8qi);
11363 def_builtin_const ("__builtin_vis_fucmpgt8", CODE_FOR_fucmpgt8di_vis,
11364 SPARC_BUILTIN_FUCMPGT8, di_ftype_v8qi_v8qi);
11365 def_builtin_const ("__builtin_vis_fucmpeq8", CODE_FOR_fucmpeq8di_vis,
11366 SPARC_BUILTIN_FUCMPEQ8, di_ftype_v8qi_v8qi);
11367 }
11368 else
11369 {
11370 def_builtin_const ("__builtin_vis_fucmple8", CODE_FOR_fucmple8si_vis,
11371 SPARC_BUILTIN_FUCMPLE8, si_ftype_v8qi_v8qi);
11372 def_builtin_const ("__builtin_vis_fucmpne8", CODE_FOR_fucmpne8si_vis,
11373 SPARC_BUILTIN_FUCMPNE8, si_ftype_v8qi_v8qi);
11374 def_builtin_const ("__builtin_vis_fucmpgt8", CODE_FOR_fucmpgt8si_vis,
11375 SPARC_BUILTIN_FUCMPGT8, si_ftype_v8qi_v8qi);
11376 def_builtin_const ("__builtin_vis_fucmpeq8", CODE_FOR_fucmpeq8si_vis,
11377 SPARC_BUILTIN_FUCMPEQ8, si_ftype_v8qi_v8qi);
11378 }
11379
11380 def_builtin_const ("__builtin_vis_fhadds", CODE_FOR_fhaddsf_vis,
11381 SPARC_BUILTIN_FHADDS, sf_ftype_sf_sf);
11382 def_builtin_const ("__builtin_vis_fhaddd", CODE_FOR_fhadddf_vis,
11383 SPARC_BUILTIN_FHADDD, df_ftype_df_df);
11384 def_builtin_const ("__builtin_vis_fhsubs", CODE_FOR_fhsubsf_vis,
11385 SPARC_BUILTIN_FHSUBS, sf_ftype_sf_sf);
11386 def_builtin_const ("__builtin_vis_fhsubd", CODE_FOR_fhsubdf_vis,
11387 SPARC_BUILTIN_FHSUBD, df_ftype_df_df);
11388 def_builtin_const ("__builtin_vis_fnhadds", CODE_FOR_fnhaddsf_vis,
11389 SPARC_BUILTIN_FNHADDS, sf_ftype_sf_sf);
11390 def_builtin_const ("__builtin_vis_fnhaddd", CODE_FOR_fnhadddf_vis,
11391 SPARC_BUILTIN_FNHADDD, df_ftype_df_df);
11392
11393 def_builtin_const ("__builtin_vis_umulxhi", CODE_FOR_umulxhi_vis,
11394 SPARC_BUILTIN_UMULXHI, di_ftype_di_di);
11395 def_builtin_const ("__builtin_vis_xmulx", CODE_FOR_xmulx_vis,
11396 SPARC_BUILTIN_XMULX, di_ftype_di_di);
11397 def_builtin_const ("__builtin_vis_xmulxhi", CODE_FOR_xmulxhi_vis,
11398 SPARC_BUILTIN_XMULXHI, di_ftype_di_di);
11399 }
11400
11401 if (TARGET_VIS4)
11402 {
11403 def_builtin_const ("__builtin_vis_fpadd8", CODE_FOR_addv8qi3,
11404 SPARC_BUILTIN_FPADD8, v8qi_ftype_v8qi_v8qi);
11405 def_builtin_const ("__builtin_vis_fpadds8", CODE_FOR_ssaddv8qi3,
11406 SPARC_BUILTIN_FPADDS8, v8qi_ftype_v8qi_v8qi);
11407 def_builtin_const ("__builtin_vis_fpaddus8", CODE_FOR_usaddv8qi3,
11408 SPARC_BUILTIN_FPADDUS8, v8qi_ftype_v8qi_v8qi);
11409 def_builtin_const ("__builtin_vis_fpaddus16", CODE_FOR_usaddv4hi3,
11410 SPARC_BUILTIN_FPADDUS16, v4hi_ftype_v4hi_v4hi);
11411
11412
11413 if (TARGET_ARCH64)
11414 {
11415 def_builtin_const ("__builtin_vis_fpcmple8", CODE_FOR_fpcmple8di_vis,
11416 SPARC_BUILTIN_FPCMPLE8, di_ftype_v8qi_v8qi);
11417 def_builtin_const ("__builtin_vis_fpcmpgt8", CODE_FOR_fpcmpgt8di_vis,
11418 SPARC_BUILTIN_FPCMPGT8, di_ftype_v8qi_v8qi);
11419 def_builtin_const ("__builtin_vis_fpcmpule16", CODE_FOR_fpcmpule16di_vis,
11420 SPARC_BUILTIN_FPCMPULE16, di_ftype_v4hi_v4hi);
11421 def_builtin_const ("__builtin_vis_fpcmpugt16", CODE_FOR_fpcmpugt16di_vis,
11422 SPARC_BUILTIN_FPCMPUGT16, di_ftype_v4hi_v4hi);
11423 def_builtin_const ("__builtin_vis_fpcmpule32", CODE_FOR_fpcmpule32di_vis,
11424 SPARC_BUILTIN_FPCMPULE32, di_ftype_v2si_v2si);
11425 def_builtin_const ("__builtin_vis_fpcmpugt32", CODE_FOR_fpcmpugt32di_vis,
11426 SPARC_BUILTIN_FPCMPUGT32, di_ftype_v2si_v2si);
11427 }
11428 else
11429 {
11430 def_builtin_const ("__builtin_vis_fpcmple8", CODE_FOR_fpcmple8si_vis,
11431 SPARC_BUILTIN_FPCMPLE8, si_ftype_v8qi_v8qi);
11432 def_builtin_const ("__builtin_vis_fpcmpgt8", CODE_FOR_fpcmpgt8si_vis,
11433 SPARC_BUILTIN_FPCMPGT8, si_ftype_v8qi_v8qi);
11434 def_builtin_const ("__builtin_vis_fpcmpule16", CODE_FOR_fpcmpule16si_vis,
11435 SPARC_BUILTIN_FPCMPULE16, si_ftype_v4hi_v4hi);
11436 def_builtin_const ("__builtin_vis_fpcmpugt16", CODE_FOR_fpcmpugt16si_vis,
11437 SPARC_BUILTIN_FPCMPUGT16, si_ftype_v4hi_v4hi);
11438 def_builtin_const ("__builtin_vis_fpcmpule32", CODE_FOR_fpcmpule32si_vis,
11439 SPARC_BUILTIN_FPCMPULE32, di_ftype_v2si_v2si);
11440 def_builtin_const ("__builtin_vis_fpcmpugt32", CODE_FOR_fpcmpugt32si_vis,
11441 SPARC_BUILTIN_FPCMPUGT32, di_ftype_v2si_v2si);
11442 }
11443
11444 def_builtin_const ("__builtin_vis_fpmax8", CODE_FOR_maxv8qi3,
11445 SPARC_BUILTIN_FPMAX8, v8qi_ftype_v8qi_v8qi);
11446 def_builtin_const ("__builtin_vis_fpmax16", CODE_FOR_maxv4hi3,
11447 SPARC_BUILTIN_FPMAX16, v4hi_ftype_v4hi_v4hi);
11448 def_builtin_const ("__builtin_vis_fpmax32", CODE_FOR_maxv2si3,
11449 SPARC_BUILTIN_FPMAX32, v2si_ftype_v2si_v2si);
11450 def_builtin_const ("__builtin_vis_fpmaxu8", CODE_FOR_maxuv8qi3,
11451 SPARC_BUILTIN_FPMAXU8, v8qi_ftype_v8qi_v8qi);
11452 def_builtin_const ("__builtin_vis_fpmaxu16", CODE_FOR_maxuv4hi3,
11453 SPARC_BUILTIN_FPMAXU16, v4hi_ftype_v4hi_v4hi);
11454 def_builtin_const ("__builtin_vis_fpmaxu32", CODE_FOR_maxuv2si3,
11455 SPARC_BUILTIN_FPMAXU32, v2si_ftype_v2si_v2si);
11456 def_builtin_const ("__builtin_vis_fpmin8", CODE_FOR_minv8qi3,
11457 SPARC_BUILTIN_FPMIN8, v8qi_ftype_v8qi_v8qi);
11458 def_builtin_const ("__builtin_vis_fpmin16", CODE_FOR_minv4hi3,
11459 SPARC_BUILTIN_FPMIN16, v4hi_ftype_v4hi_v4hi);
11460 def_builtin_const ("__builtin_vis_fpmin32", CODE_FOR_minv2si3,
11461 SPARC_BUILTIN_FPMIN32, v2si_ftype_v2si_v2si);
11462 def_builtin_const ("__builtin_vis_fpminu8", CODE_FOR_minuv8qi3,
11463 SPARC_BUILTIN_FPMINU8, v8qi_ftype_v8qi_v8qi);
11464 def_builtin_const ("__builtin_vis_fpminu16", CODE_FOR_minuv4hi3,
11465 SPARC_BUILTIN_FPMINU16, v4hi_ftype_v4hi_v4hi);
11466 def_builtin_const ("__builtin_vis_fpminu32", CODE_FOR_minuv2si3,
11467 SPARC_BUILTIN_FPMINU32, v2si_ftype_v2si_v2si);
11468 def_builtin_const ("__builtin_vis_fpsub8", CODE_FOR_subv8qi3,
11469 SPARC_BUILTIN_FPSUB8, v8qi_ftype_v8qi_v8qi);
11470 def_builtin_const ("__builtin_vis_fpsubs8", CODE_FOR_sssubv8qi3,
11471 SPARC_BUILTIN_FPSUBS8, v8qi_ftype_v8qi_v8qi);
11472 def_builtin_const ("__builtin_vis_fpsubus8", CODE_FOR_ussubv8qi3,
11473 SPARC_BUILTIN_FPSUBUS8, v8qi_ftype_v8qi_v8qi);
11474 def_builtin_const ("__builtin_vis_fpsubus16", CODE_FOR_ussubv4hi3,
11475 SPARC_BUILTIN_FPSUBUS16, v4hi_ftype_v4hi_v4hi);
11476 }
11477
11478 if (TARGET_VIS4B)
11479 {
11480 def_builtin_const ("__builtin_vis_dictunpack8", CODE_FOR_dictunpack8,
11481 SPARC_BUILTIN_DICTUNPACK8, v8qi_ftype_df_si);
11482 def_builtin_const ("__builtin_vis_dictunpack16", CODE_FOR_dictunpack16,
11483 SPARC_BUILTIN_DICTUNPACK16, v4hi_ftype_df_si);
11484 def_builtin_const ("__builtin_vis_dictunpack32", CODE_FOR_dictunpack32,
11485 SPARC_BUILTIN_DICTUNPACK32, v2si_ftype_df_si);
11486
11487 if (TARGET_ARCH64)
11488 {
11489 tree di_ftype_v8qi_v8qi_si = build_function_type_list (intDI_type_node,
11490 v8qi, v8qi,
11491 intSI_type_node, 0);
11492 tree di_ftype_v4hi_v4hi_si = build_function_type_list (intDI_type_node,
11493 v4hi, v4hi,
11494 intSI_type_node, 0);
11495 tree di_ftype_v2si_v2si_si = build_function_type_list (intDI_type_node,
11496 v2si, v2si,
11497 intSI_type_node, 0);
11498
11499 def_builtin_const ("__builtin_vis_fpcmple8shl", CODE_FOR_fpcmple8dishl,
11500 SPARC_BUILTIN_FPCMPLE8SHL, di_ftype_v8qi_v8qi_si);
11501 def_builtin_const ("__builtin_vis_fpcmpgt8shl", CODE_FOR_fpcmpgt8dishl,
11502 SPARC_BUILTIN_FPCMPGT8SHL, di_ftype_v8qi_v8qi_si);
11503 def_builtin_const ("__builtin_vis_fpcmpeq8shl", CODE_FOR_fpcmpeq8dishl,
11504 SPARC_BUILTIN_FPCMPEQ8SHL, di_ftype_v8qi_v8qi_si);
11505 def_builtin_const ("__builtin_vis_fpcmpne8shl", CODE_FOR_fpcmpne8dishl,
11506 SPARC_BUILTIN_FPCMPNE8SHL, di_ftype_v8qi_v8qi_si);
11507
11508 def_builtin_const ("__builtin_vis_fpcmple16shl", CODE_FOR_fpcmple16dishl,
11509 SPARC_BUILTIN_FPCMPLE16SHL, di_ftype_v4hi_v4hi_si);
11510 def_builtin_const ("__builtin_vis_fpcmpgt16shl", CODE_FOR_fpcmpgt16dishl,
11511 SPARC_BUILTIN_FPCMPGT16SHL, di_ftype_v4hi_v4hi_si);
11512 def_builtin_const ("__builtin_vis_fpcmpeq16shl", CODE_FOR_fpcmpeq16dishl,
11513 SPARC_BUILTIN_FPCMPEQ16SHL, di_ftype_v4hi_v4hi_si);
11514 def_builtin_const ("__builtin_vis_fpcmpne16shl", CODE_FOR_fpcmpne16dishl,
11515 SPARC_BUILTIN_FPCMPNE16SHL, di_ftype_v4hi_v4hi_si);
11516
11517 def_builtin_const ("__builtin_vis_fpcmple32shl", CODE_FOR_fpcmple32dishl,
11518 SPARC_BUILTIN_FPCMPLE32SHL, di_ftype_v2si_v2si_si);
11519 def_builtin_const ("__builtin_vis_fpcmpgt32shl", CODE_FOR_fpcmpgt32dishl,
11520 SPARC_BUILTIN_FPCMPGT32SHL, di_ftype_v2si_v2si_si);
11521 def_builtin_const ("__builtin_vis_fpcmpeq32shl", CODE_FOR_fpcmpeq32dishl,
11522 SPARC_BUILTIN_FPCMPEQ32SHL, di_ftype_v2si_v2si_si);
11523 def_builtin_const ("__builtin_vis_fpcmpne32shl", CODE_FOR_fpcmpne32dishl,
11524 SPARC_BUILTIN_FPCMPNE32SHL, di_ftype_v2si_v2si_si);
11525
11526
11527 def_builtin_const ("__builtin_vis_fpcmpule8shl", CODE_FOR_fpcmpule8dishl,
11528 SPARC_BUILTIN_FPCMPULE8SHL, di_ftype_v8qi_v8qi_si);
11529 def_builtin_const ("__builtin_vis_fpcmpugt8shl", CODE_FOR_fpcmpugt8dishl,
11530 SPARC_BUILTIN_FPCMPUGT8SHL, di_ftype_v8qi_v8qi_si);
11531
11532 def_builtin_const ("__builtin_vis_fpcmpule16shl", CODE_FOR_fpcmpule16dishl,
11533 SPARC_BUILTIN_FPCMPULE16SHL, di_ftype_v4hi_v4hi_si);
11534 def_builtin_const ("__builtin_vis_fpcmpugt16shl", CODE_FOR_fpcmpugt16dishl,
11535 SPARC_BUILTIN_FPCMPUGT16SHL, di_ftype_v4hi_v4hi_si);
11536
11537 def_builtin_const ("__builtin_vis_fpcmpule32shl", CODE_FOR_fpcmpule32dishl,
11538 SPARC_BUILTIN_FPCMPULE32SHL, di_ftype_v2si_v2si_si);
11539 def_builtin_const ("__builtin_vis_fpcmpugt32shl", CODE_FOR_fpcmpugt32dishl,
11540 SPARC_BUILTIN_FPCMPUGT32SHL, di_ftype_v2si_v2si_si);
11541
11542 def_builtin_const ("__builtin_vis_fpcmpde8shl", CODE_FOR_fpcmpde8dishl,
11543 SPARC_BUILTIN_FPCMPDE8SHL, di_ftype_v8qi_v8qi_si);
11544 def_builtin_const ("__builtin_vis_fpcmpde16shl", CODE_FOR_fpcmpde16dishl,
11545 SPARC_BUILTIN_FPCMPDE16SHL, di_ftype_v4hi_v4hi_si);
11546 def_builtin_const ("__builtin_vis_fpcmpde32shl", CODE_FOR_fpcmpde32dishl,
11547 SPARC_BUILTIN_FPCMPDE32SHL, di_ftype_v2si_v2si_si);
11548
11549 def_builtin_const ("__builtin_vis_fpcmpur8shl", CODE_FOR_fpcmpur8dishl,
11550 SPARC_BUILTIN_FPCMPUR8SHL, di_ftype_v8qi_v8qi_si);
11551 def_builtin_const ("__builtin_vis_fpcmpur16shl", CODE_FOR_fpcmpur16dishl,
11552 SPARC_BUILTIN_FPCMPUR16SHL, di_ftype_v4hi_v4hi_si);
11553 def_builtin_const ("__builtin_vis_fpcmpur32shl", CODE_FOR_fpcmpur32dishl,
11554 SPARC_BUILTIN_FPCMPUR32SHL, di_ftype_v2si_v2si_si);
11555
11556 }
11557 else
11558 {
11559 tree si_ftype_v8qi_v8qi_si = build_function_type_list (intSI_type_node,
11560 v8qi, v8qi,
11561 intSI_type_node, 0);
11562 tree si_ftype_v4hi_v4hi_si = build_function_type_list (intSI_type_node,
11563 v4hi, v4hi,
11564 intSI_type_node, 0);
11565 tree si_ftype_v2si_v2si_si = build_function_type_list (intSI_type_node,
11566 v2si, v2si,
11567 intSI_type_node, 0);
11568
11569 def_builtin_const ("__builtin_vis_fpcmple8shl", CODE_FOR_fpcmple8sishl,
11570 SPARC_BUILTIN_FPCMPLE8SHL, si_ftype_v8qi_v8qi_si);
11571 def_builtin_const ("__builtin_vis_fpcmpgt8shl", CODE_FOR_fpcmpgt8sishl,
11572 SPARC_BUILTIN_FPCMPGT8SHL, si_ftype_v8qi_v8qi_si);
11573 def_builtin_const ("__builtin_vis_fpcmpeq8shl", CODE_FOR_fpcmpeq8sishl,
11574 SPARC_BUILTIN_FPCMPEQ8SHL, si_ftype_v8qi_v8qi_si);
11575 def_builtin_const ("__builtin_vis_fpcmpne8shl", CODE_FOR_fpcmpne8sishl,
11576 SPARC_BUILTIN_FPCMPNE8SHL, si_ftype_v8qi_v8qi_si);
11577
11578 def_builtin_const ("__builtin_vis_fpcmple16shl", CODE_FOR_fpcmple16sishl,
11579 SPARC_BUILTIN_FPCMPLE16SHL, si_ftype_v4hi_v4hi_si);
11580 def_builtin_const ("__builtin_vis_fpcmpgt16shl", CODE_FOR_fpcmpgt16sishl,
11581 SPARC_BUILTIN_FPCMPGT16SHL, si_ftype_v4hi_v4hi_si);
11582 def_builtin_const ("__builtin_vis_fpcmpeq16shl", CODE_FOR_fpcmpeq16sishl,
11583 SPARC_BUILTIN_FPCMPEQ16SHL, si_ftype_v4hi_v4hi_si);
11584 def_builtin_const ("__builtin_vis_fpcmpne16shl", CODE_FOR_fpcmpne16sishl,
11585 SPARC_BUILTIN_FPCMPNE16SHL, si_ftype_v4hi_v4hi_si);
11586
11587 def_builtin_const ("__builtin_vis_fpcmple32shl", CODE_FOR_fpcmple32sishl,
11588 SPARC_BUILTIN_FPCMPLE32SHL, si_ftype_v2si_v2si_si);
11589 def_builtin_const ("__builtin_vis_fpcmpgt32shl", CODE_FOR_fpcmpgt32sishl,
11590 SPARC_BUILTIN_FPCMPGT32SHL, si_ftype_v2si_v2si_si);
11591 def_builtin_const ("__builtin_vis_fpcmpeq32shl", CODE_FOR_fpcmpeq32sishl,
11592 SPARC_BUILTIN_FPCMPEQ32SHL, si_ftype_v2si_v2si_si);
11593 def_builtin_const ("__builtin_vis_fpcmpne32shl", CODE_FOR_fpcmpne32sishl,
11594 SPARC_BUILTIN_FPCMPNE32SHL, si_ftype_v2si_v2si_si);
11595
11596
11597 def_builtin_const ("__builtin_vis_fpcmpule8shl", CODE_FOR_fpcmpule8sishl,
11598 SPARC_BUILTIN_FPCMPULE8SHL, si_ftype_v8qi_v8qi_si);
11599 def_builtin_const ("__builtin_vis_fpcmpugt8shl", CODE_FOR_fpcmpugt8sishl,
11600 SPARC_BUILTIN_FPCMPUGT8SHL, si_ftype_v8qi_v8qi_si);
11601
11602 def_builtin_const ("__builtin_vis_fpcmpule16shl", CODE_FOR_fpcmpule16sishl,
11603 SPARC_BUILTIN_FPCMPULE16SHL, si_ftype_v4hi_v4hi_si);
11604 def_builtin_const ("__builtin_vis_fpcmpugt16shl", CODE_FOR_fpcmpugt16sishl,
11605 SPARC_BUILTIN_FPCMPUGT16SHL, si_ftype_v4hi_v4hi_si);
11606
11607 def_builtin_const ("__builtin_vis_fpcmpule32shl", CODE_FOR_fpcmpule32sishl,
11608 SPARC_BUILTIN_FPCMPULE32SHL, si_ftype_v2si_v2si_si);
11609 def_builtin_const ("__builtin_vis_fpcmpugt32shl", CODE_FOR_fpcmpugt32sishl,
11610 SPARC_BUILTIN_FPCMPUGT32SHL, si_ftype_v2si_v2si_si);
11611
11612 def_builtin_const ("__builtin_vis_fpcmpde8shl", CODE_FOR_fpcmpde8sishl,
11613 SPARC_BUILTIN_FPCMPDE8SHL, si_ftype_v8qi_v8qi_si);
11614 def_builtin_const ("__builtin_vis_fpcmpde16shl", CODE_FOR_fpcmpde16sishl,
11615 SPARC_BUILTIN_FPCMPDE16SHL, si_ftype_v4hi_v4hi_si);
11616 def_builtin_const ("__builtin_vis_fpcmpde32shl", CODE_FOR_fpcmpde32sishl,
11617 SPARC_BUILTIN_FPCMPDE32SHL, si_ftype_v2si_v2si_si);
11618
11619 def_builtin_const ("__builtin_vis_fpcmpur8shl", CODE_FOR_fpcmpur8sishl,
11620 SPARC_BUILTIN_FPCMPUR8SHL, si_ftype_v8qi_v8qi_si);
11621 def_builtin_const ("__builtin_vis_fpcmpur16shl", CODE_FOR_fpcmpur16sishl,
11622 SPARC_BUILTIN_FPCMPUR16SHL, si_ftype_v4hi_v4hi_si);
11623 def_builtin_const ("__builtin_vis_fpcmpur32shl", CODE_FOR_fpcmpur32sishl,
11624 SPARC_BUILTIN_FPCMPUR32SHL, si_ftype_v2si_v2si_si);
11625 }
11626 }
11627 }
11628
11629 /* Implement TARGET_BUILTIN_DECL hook. */
11630
11631 static tree
sparc_builtin_decl(unsigned code,bool initialize_p ATTRIBUTE_UNUSED)11632 sparc_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
11633 {
11634 if (code >= SPARC_BUILTIN_MAX)
11635 return error_mark_node;
11636
11637 return sparc_builtins[code];
11638 }
11639
11640 /* Implemented TARGET_EXPAND_BUILTIN hook. */
11641
11642 static rtx
sparc_expand_builtin(tree exp,rtx target,rtx subtarget ATTRIBUTE_UNUSED,machine_mode tmode ATTRIBUTE_UNUSED,int ignore ATTRIBUTE_UNUSED)11643 sparc_expand_builtin (tree exp, rtx target,
11644 rtx subtarget ATTRIBUTE_UNUSED,
11645 machine_mode tmode ATTRIBUTE_UNUSED,
11646 int ignore ATTRIBUTE_UNUSED)
11647 {
11648 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
11649 enum sparc_builtins code
11650 = (enum sparc_builtins) DECL_MD_FUNCTION_CODE (fndecl);
11651 enum insn_code icode = sparc_builtins_icode[code];
11652 bool nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
11653 call_expr_arg_iterator iter;
11654 int arg_count = 0;
11655 rtx pat, op[4];
11656 tree arg;
11657
11658 if (nonvoid)
11659 {
11660 machine_mode tmode = insn_data[icode].operand[0].mode;
11661 if (!target
11662 || GET_MODE (target) != tmode
11663 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11664 op[0] = gen_reg_rtx (tmode);
11665 else
11666 op[0] = target;
11667 }
11668 else
11669 op[0] = NULL_RTX;
11670
11671 FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
11672 {
11673 const struct insn_operand_data *insn_op;
11674 int idx;
11675
11676 if (arg == error_mark_node)
11677 return NULL_RTX;
11678
11679 arg_count++;
11680 idx = arg_count - !nonvoid;
11681 insn_op = &insn_data[icode].operand[idx];
11682 op[arg_count] = expand_normal (arg);
11683
11684 /* Some of the builtins require constant arguments. We check
11685 for this here. */
11686 if ((code >= SPARC_BUILTIN_FIRST_FPCMPSHL
11687 && code <= SPARC_BUILTIN_LAST_FPCMPSHL
11688 && arg_count == 3)
11689 || (code >= SPARC_BUILTIN_FIRST_DICTUNPACK
11690 && code <= SPARC_BUILTIN_LAST_DICTUNPACK
11691 && arg_count == 2))
11692 {
11693 if (!check_constant_argument (icode, idx, op[arg_count]))
11694 return const0_rtx;
11695 }
11696
11697 if (code == SPARC_BUILTIN_LDFSR || code == SPARC_BUILTIN_STFSR)
11698 {
11699 if (!address_operand (op[arg_count], SImode))
11700 {
11701 op[arg_count] = convert_memory_address (Pmode, op[arg_count]);
11702 op[arg_count] = copy_addr_to_reg (op[arg_count]);
11703 }
11704 op[arg_count] = gen_rtx_MEM (SImode, op[arg_count]);
11705 }
11706
11707 else if (insn_op->mode == V1DImode
11708 && GET_MODE (op[arg_count]) == DImode)
11709 op[arg_count] = gen_lowpart (V1DImode, op[arg_count]);
11710
11711 else if (insn_op->mode == V1SImode
11712 && GET_MODE (op[arg_count]) == SImode)
11713 op[arg_count] = gen_lowpart (V1SImode, op[arg_count]);
11714
11715 if (! (*insn_data[icode].operand[idx].predicate) (op[arg_count],
11716 insn_op->mode))
11717 op[arg_count] = copy_to_mode_reg (insn_op->mode, op[arg_count]);
11718 }
11719
11720 switch (arg_count)
11721 {
11722 case 0:
11723 pat = GEN_FCN (icode) (op[0]);
11724 break;
11725 case 1:
11726 if (nonvoid)
11727 pat = GEN_FCN (icode) (op[0], op[1]);
11728 else
11729 pat = GEN_FCN (icode) (op[1]);
11730 break;
11731 case 2:
11732 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
11733 break;
11734 case 3:
11735 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
11736 break;
11737 default:
11738 gcc_unreachable ();
11739 }
11740
11741 if (!pat)
11742 return NULL_RTX;
11743
11744 emit_insn (pat);
11745
11746 return (nonvoid ? op[0] : const0_rtx);
11747 }
11748
11749 /* Return the upper 16 bits of the 8x16 multiplication. */
11750
11751 static int
sparc_vis_mul8x16(int e8,int e16)11752 sparc_vis_mul8x16 (int e8, int e16)
11753 {
11754 return (e8 * e16 + 128) / 256;
11755 }
11756
11757 /* Multiply the VECTOR_CSTs CST0 and CST1 as specified by FNCODE and put
11758 the result into the array N_ELTS, whose elements are of INNER_TYPE. */
11759
11760 static void
sparc_handle_vis_mul8x16(vec<tree> * n_elts,enum sparc_builtins fncode,tree inner_type,tree cst0,tree cst1)11761 sparc_handle_vis_mul8x16 (vec<tree> *n_elts, enum sparc_builtins fncode,
11762 tree inner_type, tree cst0, tree cst1)
11763 {
11764 unsigned i, num = VECTOR_CST_NELTS (cst0);
11765 int scale;
11766
11767 switch (fncode)
11768 {
11769 case SPARC_BUILTIN_FMUL8X16:
11770 for (i = 0; i < num; ++i)
11771 {
11772 int val
11773 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
11774 TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, i)));
11775 n_elts->quick_push (build_int_cst (inner_type, val));
11776 }
11777 break;
11778
11779 case SPARC_BUILTIN_FMUL8X16AU:
11780 scale = TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, 0));
11781
11782 for (i = 0; i < num; ++i)
11783 {
11784 int val
11785 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
11786 scale);
11787 n_elts->quick_push (build_int_cst (inner_type, val));
11788 }
11789 break;
11790
11791 case SPARC_BUILTIN_FMUL8X16AL:
11792 scale = TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, 1));
11793
11794 for (i = 0; i < num; ++i)
11795 {
11796 int val
11797 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
11798 scale);
11799 n_elts->quick_push (build_int_cst (inner_type, val));
11800 }
11801 break;
11802
11803 default:
11804 gcc_unreachable ();
11805 }
11806 }
11807
11808 /* Implement TARGET_FOLD_BUILTIN hook.
11809
11810 Fold builtin functions for SPARC intrinsics. If IGNORE is true the
11811 result of the function call is ignored. NULL_TREE is returned if the
11812 function could not be folded. */
11813
11814 static tree
sparc_fold_builtin(tree fndecl,int n_args ATTRIBUTE_UNUSED,tree * args,bool ignore)11815 sparc_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED,
11816 tree *args, bool ignore)
11817 {
11818 enum sparc_builtins code
11819 = (enum sparc_builtins) DECL_MD_FUNCTION_CODE (fndecl);
11820 tree rtype = TREE_TYPE (TREE_TYPE (fndecl));
11821 tree arg0, arg1, arg2;
11822
11823 if (ignore)
11824 switch (code)
11825 {
11826 case SPARC_BUILTIN_LDFSR:
11827 case SPARC_BUILTIN_STFSR:
11828 case SPARC_BUILTIN_ALIGNADDR:
11829 case SPARC_BUILTIN_WRGSR:
11830 case SPARC_BUILTIN_BMASK:
11831 case SPARC_BUILTIN_CMASK8:
11832 case SPARC_BUILTIN_CMASK16:
11833 case SPARC_BUILTIN_CMASK32:
11834 break;
11835
11836 default:
11837 return build_zero_cst (rtype);
11838 }
11839
11840 switch (code)
11841 {
11842 case SPARC_BUILTIN_FEXPAND:
11843 arg0 = args[0];
11844 STRIP_NOPS (arg0);
11845
11846 if (TREE_CODE (arg0) == VECTOR_CST)
11847 {
11848 tree inner_type = TREE_TYPE (rtype);
11849 unsigned i;
11850
11851 tree_vector_builder n_elts (rtype, VECTOR_CST_NELTS (arg0), 1);
11852 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
11853 {
11854 unsigned HOST_WIDE_INT val
11855 = TREE_INT_CST_LOW (VECTOR_CST_ELT (arg0, i));
11856 n_elts.quick_push (build_int_cst (inner_type, val << 4));
11857 }
11858 return n_elts.build ();
11859 }
11860 break;
11861
11862 case SPARC_BUILTIN_FMUL8X16:
11863 case SPARC_BUILTIN_FMUL8X16AU:
11864 case SPARC_BUILTIN_FMUL8X16AL:
11865 arg0 = args[0];
11866 arg1 = args[1];
11867 STRIP_NOPS (arg0);
11868 STRIP_NOPS (arg1);
11869
11870 if (TREE_CODE (arg0) == VECTOR_CST && TREE_CODE (arg1) == VECTOR_CST)
11871 {
11872 tree inner_type = TREE_TYPE (rtype);
11873 tree_vector_builder n_elts (rtype, VECTOR_CST_NELTS (arg0), 1);
11874 sparc_handle_vis_mul8x16 (&n_elts, code, inner_type, arg0, arg1);
11875 return n_elts.build ();
11876 }
11877 break;
11878
11879 case SPARC_BUILTIN_FPMERGE:
11880 arg0 = args[0];
11881 arg1 = args[1];
11882 STRIP_NOPS (arg0);
11883 STRIP_NOPS (arg1);
11884
11885 if (TREE_CODE (arg0) == VECTOR_CST && TREE_CODE (arg1) == VECTOR_CST)
11886 {
11887 tree_vector_builder n_elts (rtype, 2 * VECTOR_CST_NELTS (arg0), 1);
11888 unsigned i;
11889 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
11890 {
11891 n_elts.quick_push (VECTOR_CST_ELT (arg0, i));
11892 n_elts.quick_push (VECTOR_CST_ELT (arg1, i));
11893 }
11894
11895 return n_elts.build ();
11896 }
11897 break;
11898
11899 case SPARC_BUILTIN_PDIST:
11900 case SPARC_BUILTIN_PDISTN:
11901 arg0 = args[0];
11902 arg1 = args[1];
11903 STRIP_NOPS (arg0);
11904 STRIP_NOPS (arg1);
11905 if (code == SPARC_BUILTIN_PDIST)
11906 {
11907 arg2 = args[2];
11908 STRIP_NOPS (arg2);
11909 }
11910 else
11911 arg2 = integer_zero_node;
11912
11913 if (TREE_CODE (arg0) == VECTOR_CST
11914 && TREE_CODE (arg1) == VECTOR_CST
11915 && TREE_CODE (arg2) == INTEGER_CST)
11916 {
11917 bool overflow = false;
11918 widest_int result = wi::to_widest (arg2);
11919 widest_int tmp;
11920 unsigned i;
11921
11922 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
11923 {
11924 tree e0 = VECTOR_CST_ELT (arg0, i);
11925 tree e1 = VECTOR_CST_ELT (arg1, i);
11926
11927 wi::overflow_type neg1_ovf, neg2_ovf, add1_ovf, add2_ovf;
11928
11929 tmp = wi::neg (wi::to_widest (e1), &neg1_ovf);
11930 tmp = wi::add (wi::to_widest (e0), tmp, SIGNED, &add1_ovf);
11931 if (wi::neg_p (tmp))
11932 tmp = wi::neg (tmp, &neg2_ovf);
11933 else
11934 neg2_ovf = wi::OVF_NONE;
11935 result = wi::add (result, tmp, SIGNED, &add2_ovf);
11936 overflow |= ((neg1_ovf != wi::OVF_NONE)
11937 | (neg2_ovf != wi::OVF_NONE)
11938 | (add1_ovf != wi::OVF_NONE)
11939 | (add2_ovf != wi::OVF_NONE));
11940 }
11941
11942 gcc_assert (!overflow);
11943
11944 return wide_int_to_tree (rtype, result);
11945 }
11946
11947 default:
11948 break;
11949 }
11950
11951 return NULL_TREE;
11952 }
11953
11954 /* ??? This duplicates information provided to the compiler by the
11955 ??? scheduler description. Some day, teach genautomata to output
11956 ??? the latencies and then CSE will just use that. */
11957
11958 static bool
sparc_rtx_costs(rtx x,machine_mode mode,int outer_code,int opno ATTRIBUTE_UNUSED,int * total,bool speed ATTRIBUTE_UNUSED)11959 sparc_rtx_costs (rtx x, machine_mode mode, int outer_code,
11960 int opno ATTRIBUTE_UNUSED,
11961 int *total, bool speed ATTRIBUTE_UNUSED)
11962 {
11963 int code = GET_CODE (x);
11964 bool float_mode_p = FLOAT_MODE_P (mode);
11965
11966 switch (code)
11967 {
11968 case CONST_INT:
11969 if (SMALL_INT (x))
11970 *total = 0;
11971 else
11972 *total = 2;
11973 return true;
11974
11975 case CONST_WIDE_INT:
11976 *total = 0;
11977 if (!SPARC_SIMM13_P (CONST_WIDE_INT_ELT (x, 0)))
11978 *total += 2;
11979 if (!SPARC_SIMM13_P (CONST_WIDE_INT_ELT (x, 1)))
11980 *total += 2;
11981 return true;
11982
11983 case HIGH:
11984 *total = 2;
11985 return true;
11986
11987 case CONST:
11988 case LABEL_REF:
11989 case SYMBOL_REF:
11990 *total = 4;
11991 return true;
11992
11993 case CONST_DOUBLE:
11994 *total = 8;
11995 return true;
11996
11997 case MEM:
11998 /* If outer-code was a sign or zero extension, a cost
11999 of COSTS_N_INSNS (1) was already added in. This is
12000 why we are subtracting it back out. */
12001 if (outer_code == ZERO_EXTEND)
12002 {
12003 *total = sparc_costs->int_zload - COSTS_N_INSNS (1);
12004 }
12005 else if (outer_code == SIGN_EXTEND)
12006 {
12007 *total = sparc_costs->int_sload - COSTS_N_INSNS (1);
12008 }
12009 else if (float_mode_p)
12010 {
12011 *total = sparc_costs->float_load;
12012 }
12013 else
12014 {
12015 *total = sparc_costs->int_load;
12016 }
12017
12018 return true;
12019
12020 case PLUS:
12021 case MINUS:
12022 if (float_mode_p)
12023 *total = sparc_costs->float_plusminus;
12024 else
12025 *total = COSTS_N_INSNS (1);
12026 return false;
12027
12028 case FMA:
12029 {
12030 rtx sub;
12031
12032 gcc_assert (float_mode_p);
12033 *total = sparc_costs->float_mul;
12034
12035 sub = XEXP (x, 0);
12036 if (GET_CODE (sub) == NEG)
12037 sub = XEXP (sub, 0);
12038 *total += rtx_cost (sub, mode, FMA, 0, speed);
12039
12040 sub = XEXP (x, 2);
12041 if (GET_CODE (sub) == NEG)
12042 sub = XEXP (sub, 0);
12043 *total += rtx_cost (sub, mode, FMA, 2, speed);
12044 return true;
12045 }
12046
12047 case MULT:
12048 if (float_mode_p)
12049 *total = sparc_costs->float_mul;
12050 else if (TARGET_ARCH32 && !TARGET_HARD_MUL)
12051 *total = COSTS_N_INSNS (25);
12052 else
12053 {
12054 int bit_cost;
12055
12056 bit_cost = 0;
12057 if (sparc_costs->int_mul_bit_factor)
12058 {
12059 int nbits;
12060
12061 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
12062 {
12063 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
12064 for (nbits = 0; value != 0; value &= value - 1)
12065 nbits++;
12066 }
12067 else
12068 nbits = 7;
12069
12070 if (nbits < 3)
12071 nbits = 3;
12072 bit_cost = (nbits - 3) / sparc_costs->int_mul_bit_factor;
12073 bit_cost = COSTS_N_INSNS (bit_cost);
12074 }
12075
12076 if (mode == DImode || !TARGET_HARD_MUL)
12077 *total = sparc_costs->int_mulX + bit_cost;
12078 else
12079 *total = sparc_costs->int_mul + bit_cost;
12080 }
12081 return false;
12082
12083 case ASHIFT:
12084 case ASHIFTRT:
12085 case LSHIFTRT:
12086 *total = COSTS_N_INSNS (1) + sparc_costs->shift_penalty;
12087 return false;
12088
12089 case DIV:
12090 case UDIV:
12091 case MOD:
12092 case UMOD:
12093 if (float_mode_p)
12094 {
12095 if (mode == DFmode)
12096 *total = sparc_costs->float_div_df;
12097 else
12098 *total = sparc_costs->float_div_sf;
12099 }
12100 else
12101 {
12102 if (mode == DImode)
12103 *total = sparc_costs->int_divX;
12104 else
12105 *total = sparc_costs->int_div;
12106 }
12107 return false;
12108
12109 case NEG:
12110 if (! float_mode_p)
12111 {
12112 *total = COSTS_N_INSNS (1);
12113 return false;
12114 }
12115 /* FALLTHRU */
12116
12117 case ABS:
12118 case FLOAT:
12119 case UNSIGNED_FLOAT:
12120 case FIX:
12121 case UNSIGNED_FIX:
12122 case FLOAT_EXTEND:
12123 case FLOAT_TRUNCATE:
12124 *total = sparc_costs->float_move;
12125 return false;
12126
12127 case SQRT:
12128 if (mode == DFmode)
12129 *total = sparc_costs->float_sqrt_df;
12130 else
12131 *total = sparc_costs->float_sqrt_sf;
12132 return false;
12133
12134 case COMPARE:
12135 if (float_mode_p)
12136 *total = sparc_costs->float_cmp;
12137 else
12138 *total = COSTS_N_INSNS (1);
12139 return false;
12140
12141 case IF_THEN_ELSE:
12142 if (float_mode_p)
12143 *total = sparc_costs->float_cmove;
12144 else
12145 *total = sparc_costs->int_cmove;
12146 return false;
12147
12148 case IOR:
12149 /* Handle the NAND vector patterns. */
12150 if (sparc_vector_mode_supported_p (mode)
12151 && GET_CODE (XEXP (x, 0)) == NOT
12152 && GET_CODE (XEXP (x, 1)) == NOT)
12153 {
12154 *total = COSTS_N_INSNS (1);
12155 return true;
12156 }
12157 else
12158 return false;
12159
12160 default:
12161 return false;
12162 }
12163 }
12164
12165 /* Return true if CLASS is either GENERAL_REGS or I64_REGS. */
12166
12167 static inline bool
general_or_i64_p(reg_class_t rclass)12168 general_or_i64_p (reg_class_t rclass)
12169 {
12170 return (rclass == GENERAL_REGS || rclass == I64_REGS);
12171 }
12172
12173 /* Implement TARGET_REGISTER_MOVE_COST. */
12174
12175 static int
sparc_register_move_cost(machine_mode mode ATTRIBUTE_UNUSED,reg_class_t from,reg_class_t to)12176 sparc_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
12177 reg_class_t from, reg_class_t to)
12178 {
12179 bool need_memory = false;
12180
12181 /* This helps postreload CSE to eliminate redundant comparisons. */
12182 if (from == NO_REGS || to == NO_REGS)
12183 return 100;
12184
12185 if (from == FPCC_REGS || to == FPCC_REGS)
12186 need_memory = true;
12187 else if ((FP_REG_CLASS_P (from) && general_or_i64_p (to))
12188 || (general_or_i64_p (from) && FP_REG_CLASS_P (to)))
12189 {
12190 if (TARGET_VIS3)
12191 {
12192 int size = GET_MODE_SIZE (mode);
12193 if (size == 8 || size == 4)
12194 {
12195 if (! TARGET_ARCH32 || size == 4)
12196 return 4;
12197 else
12198 return 6;
12199 }
12200 }
12201 need_memory = true;
12202 }
12203
12204 if (need_memory)
12205 {
12206 if (sparc_cpu == PROCESSOR_ULTRASPARC
12207 || sparc_cpu == PROCESSOR_ULTRASPARC3
12208 || sparc_cpu == PROCESSOR_NIAGARA
12209 || sparc_cpu == PROCESSOR_NIAGARA2
12210 || sparc_cpu == PROCESSOR_NIAGARA3
12211 || sparc_cpu == PROCESSOR_NIAGARA4
12212 || sparc_cpu == PROCESSOR_NIAGARA7
12213 || sparc_cpu == PROCESSOR_M8)
12214 return 12;
12215
12216 return 6;
12217 }
12218
12219 return 2;
12220 }
12221
12222 /* Emit the sequence of insns SEQ while preserving the registers REG and REG2.
12223 This is achieved by means of a manual dynamic stack space allocation in
12224 the current frame. We make the assumption that SEQ doesn't contain any
12225 function calls, with the possible exception of calls to the GOT helper. */
12226
12227 static void
emit_and_preserve(rtx seq,rtx reg,rtx reg2)12228 emit_and_preserve (rtx seq, rtx reg, rtx reg2)
12229 {
12230 /* We must preserve the lowest 16 words for the register save area. */
12231 HOST_WIDE_INT offset = 16*UNITS_PER_WORD;
12232 /* We really need only 2 words of fresh stack space. */
12233 HOST_WIDE_INT size = SPARC_STACK_ALIGN (offset + 2*UNITS_PER_WORD);
12234
12235 rtx slot
12236 = gen_rtx_MEM (word_mode, plus_constant (Pmode, stack_pointer_rtx,
12237 SPARC_STACK_BIAS + offset));
12238
12239 emit_insn (gen_stack_pointer_inc (GEN_INT (-size)));
12240 emit_insn (gen_rtx_SET (slot, reg));
12241 if (reg2)
12242 emit_insn (gen_rtx_SET (adjust_address (slot, word_mode, UNITS_PER_WORD),
12243 reg2));
12244 emit_insn (seq);
12245 if (reg2)
12246 emit_insn (gen_rtx_SET (reg2,
12247 adjust_address (slot, word_mode, UNITS_PER_WORD)));
12248 emit_insn (gen_rtx_SET (reg, slot));
12249 emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
12250 }
12251
12252 /* Output the assembler code for a thunk function. THUNK_DECL is the
12253 declaration for the thunk function itself, FUNCTION is the decl for
12254 the target function. DELTA is an immediate constant offset to be
12255 added to THIS. If VCALL_OFFSET is nonzero, the word at address
12256 (*THIS + VCALL_OFFSET) should be additionally added to THIS. */
12257
12258 static void
sparc_output_mi_thunk(FILE * file,tree thunk_fndecl ATTRIBUTE_UNUSED,HOST_WIDE_INT delta,HOST_WIDE_INT vcall_offset,tree function)12259 sparc_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
12260 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
12261 tree function)
12262 {
12263 const char *fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk_fndecl));
12264 rtx this_rtx, funexp;
12265 rtx_insn *insn;
12266 unsigned int int_arg_first;
12267
12268 reload_completed = 1;
12269 epilogue_completed = 1;
12270
12271 emit_note (NOTE_INSN_PROLOGUE_END);
12272
12273 if (TARGET_FLAT)
12274 {
12275 sparc_leaf_function_p = 1;
12276
12277 int_arg_first = SPARC_OUTGOING_INT_ARG_FIRST;
12278 }
12279 else if (flag_delayed_branch)
12280 {
12281 /* We will emit a regular sibcall below, so we need to instruct
12282 output_sibcall that we are in a leaf function. */
12283 sparc_leaf_function_p = crtl->uses_only_leaf_regs = 1;
12284
12285 /* This will cause final.c to invoke leaf_renumber_regs so we
12286 must behave as if we were in a not-yet-leafified function. */
12287 int_arg_first = SPARC_INCOMING_INT_ARG_FIRST;
12288 }
12289 else
12290 {
12291 /* We will emit the sibcall manually below, so we will need to
12292 manually spill non-leaf registers. */
12293 sparc_leaf_function_p = crtl->uses_only_leaf_regs = 0;
12294
12295 /* We really are in a leaf function. */
12296 int_arg_first = SPARC_OUTGOING_INT_ARG_FIRST;
12297 }
12298
12299 /* Find the "this" pointer. Normally in %o0, but in ARCH64 if the function
12300 returns a structure, the structure return pointer is there instead. */
12301 if (TARGET_ARCH64
12302 && aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
12303 this_rtx = gen_rtx_REG (Pmode, int_arg_first + 1);
12304 else
12305 this_rtx = gen_rtx_REG (Pmode, int_arg_first);
12306
12307 /* Add DELTA. When possible use a plain add, otherwise load it into
12308 a register first. */
12309 if (delta)
12310 {
12311 rtx delta_rtx = GEN_INT (delta);
12312
12313 if (! SPARC_SIMM13_P (delta))
12314 {
12315 rtx scratch = gen_rtx_REG (Pmode, 1);
12316 emit_move_insn (scratch, delta_rtx);
12317 delta_rtx = scratch;
12318 }
12319
12320 /* THIS_RTX += DELTA. */
12321 emit_insn (gen_add2_insn (this_rtx, delta_rtx));
12322 }
12323
12324 /* Add the word at address (*THIS_RTX + VCALL_OFFSET). */
12325 if (vcall_offset)
12326 {
12327 rtx vcall_offset_rtx = GEN_INT (vcall_offset);
12328 rtx scratch = gen_rtx_REG (Pmode, 1);
12329
12330 gcc_assert (vcall_offset < 0);
12331
12332 /* SCRATCH = *THIS_RTX. */
12333 emit_move_insn (scratch, gen_rtx_MEM (Pmode, this_rtx));
12334
12335 /* Prepare for adding VCALL_OFFSET. The difficulty is that we
12336 may not have any available scratch register at this point. */
12337 if (SPARC_SIMM13_P (vcall_offset))
12338 ;
12339 /* This is the case if ARCH64 (unless -ffixed-g5 is passed). */
12340 else if (! fixed_regs[5]
12341 /* The below sequence is made up of at least 2 insns,
12342 while the default method may need only one. */
12343 && vcall_offset < -8192)
12344 {
12345 rtx scratch2 = gen_rtx_REG (Pmode, 5);
12346 emit_move_insn (scratch2, vcall_offset_rtx);
12347 vcall_offset_rtx = scratch2;
12348 }
12349 else
12350 {
12351 rtx increment = GEN_INT (-4096);
12352
12353 /* VCALL_OFFSET is a negative number whose typical range can be
12354 estimated as -32768..0 in 32-bit mode. In almost all cases
12355 it is therefore cheaper to emit multiple add insns than
12356 spilling and loading the constant into a register (at least
12357 6 insns). */
12358 while (! SPARC_SIMM13_P (vcall_offset))
12359 {
12360 emit_insn (gen_add2_insn (scratch, increment));
12361 vcall_offset += 4096;
12362 }
12363 vcall_offset_rtx = GEN_INT (vcall_offset); /* cannot be 0 */
12364 }
12365
12366 /* SCRATCH = *(*THIS_RTX + VCALL_OFFSET). */
12367 emit_move_insn (scratch, gen_rtx_MEM (Pmode,
12368 gen_rtx_PLUS (Pmode,
12369 scratch,
12370 vcall_offset_rtx)));
12371
12372 /* THIS_RTX += *(*THIS_RTX + VCALL_OFFSET). */
12373 emit_insn (gen_add2_insn (this_rtx, scratch));
12374 }
12375
12376 /* Generate a tail call to the target function. */
12377 if (! TREE_USED (function))
12378 {
12379 assemble_external (function);
12380 TREE_USED (function) = 1;
12381 }
12382 funexp = XEXP (DECL_RTL (function), 0);
12383
12384 if (flag_delayed_branch)
12385 {
12386 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
12387 insn = emit_call_insn (gen_sibcall (funexp));
12388 SIBLING_CALL_P (insn) = 1;
12389 }
12390 else
12391 {
12392 /* The hoops we have to jump through in order to generate a sibcall
12393 without using delay slots... */
12394 rtx spill_reg, seq, scratch = gen_rtx_REG (Pmode, 1);
12395
12396 if (flag_pic)
12397 {
12398 spill_reg = gen_rtx_REG (word_mode, 15); /* %o7 */
12399 start_sequence ();
12400 load_got_register (); /* clobbers %o7 */
12401 if (!TARGET_VXWORKS_RTP)
12402 pic_offset_table_rtx = got_register_rtx;
12403 scratch = sparc_legitimize_pic_address (funexp, scratch);
12404 seq = get_insns ();
12405 end_sequence ();
12406 emit_and_preserve (seq, spill_reg, pic_offset_table_rtx);
12407 }
12408 else if (TARGET_ARCH32)
12409 {
12410 emit_insn (gen_rtx_SET (scratch,
12411 gen_rtx_HIGH (SImode, funexp)));
12412 emit_insn (gen_rtx_SET (scratch,
12413 gen_rtx_LO_SUM (SImode, scratch, funexp)));
12414 }
12415 else /* TARGET_ARCH64 */
12416 {
12417 switch (sparc_code_model)
12418 {
12419 case CM_MEDLOW:
12420 case CM_MEDMID:
12421 /* The destination can serve as a temporary. */
12422 sparc_emit_set_symbolic_const64 (scratch, funexp, scratch);
12423 break;
12424
12425 case CM_MEDANY:
12426 case CM_EMBMEDANY:
12427 /* The destination cannot serve as a temporary. */
12428 spill_reg = gen_rtx_REG (DImode, 15); /* %o7 */
12429 start_sequence ();
12430 sparc_emit_set_symbolic_const64 (scratch, funexp, spill_reg);
12431 seq = get_insns ();
12432 end_sequence ();
12433 emit_and_preserve (seq, spill_reg, 0);
12434 break;
12435
12436 default:
12437 gcc_unreachable ();
12438 }
12439 }
12440
12441 emit_jump_insn (gen_indirect_jump (scratch));
12442 }
12443
12444 emit_barrier ();
12445
12446 /* Run just enough of rest_of_compilation to get the insns emitted.
12447 There's not really enough bulk here to make other passes such as
12448 instruction scheduling worth while. */
12449 insn = get_insns ();
12450 shorten_branches (insn);
12451 assemble_start_function (thunk_fndecl, fnname);
12452 final_start_function (insn, file, 1);
12453 final (insn, file, 1);
12454 final_end_function ();
12455 assemble_end_function (thunk_fndecl, fnname);
12456
12457 reload_completed = 0;
12458 epilogue_completed = 0;
12459 }
12460
12461 /* Return true if sparc_output_mi_thunk would be able to output the
12462 assembler code for the thunk function specified by the arguments
12463 it is passed, and false otherwise. */
12464 static bool
sparc_can_output_mi_thunk(const_tree thunk_fndecl ATTRIBUTE_UNUSED,HOST_WIDE_INT delta ATTRIBUTE_UNUSED,HOST_WIDE_INT vcall_offset,const_tree function ATTRIBUTE_UNUSED)12465 sparc_can_output_mi_thunk (const_tree thunk_fndecl ATTRIBUTE_UNUSED,
12466 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
12467 HOST_WIDE_INT vcall_offset,
12468 const_tree function ATTRIBUTE_UNUSED)
12469 {
12470 /* Bound the loop used in the default method above. */
12471 return (vcall_offset >= -32768 || ! fixed_regs[5]);
12472 }
12473
12474 /* How to allocate a 'struct machine_function'. */
12475
12476 static struct machine_function *
sparc_init_machine_status(void)12477 sparc_init_machine_status (void)
12478 {
12479 return ggc_cleared_alloc<machine_function> ();
12480 }
12481
12482 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
12483
12484 static unsigned HOST_WIDE_INT
sparc_asan_shadow_offset(void)12485 sparc_asan_shadow_offset (void)
12486 {
12487 return TARGET_ARCH64 ? (HOST_WIDE_INT_1 << 43) : (HOST_WIDE_INT_1 << 29);
12488 }
12489
12490 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
12491 We need to emit DTP-relative relocations. */
12492
12493 static void
sparc_output_dwarf_dtprel(FILE * file,int size,rtx x)12494 sparc_output_dwarf_dtprel (FILE *file, int size, rtx x)
12495 {
12496 switch (size)
12497 {
12498 case 4:
12499 fputs ("\t.word\t%r_tls_dtpoff32(", file);
12500 break;
12501 case 8:
12502 fputs ("\t.xword\t%r_tls_dtpoff64(", file);
12503 break;
12504 default:
12505 gcc_unreachable ();
12506 }
12507 output_addr_const (file, x);
12508 fputs (")", file);
12509 }
12510
12511 /* Do whatever processing is required at the end of a file. */
12512
12513 static void
sparc_file_end(void)12514 sparc_file_end (void)
12515 {
12516 /* If we need to emit the special GOT helper function, do so now. */
12517 if (got_helper_needed)
12518 {
12519 const char *name = XSTR (got_helper_rtx, 0);
12520 #ifdef DWARF2_UNWIND_INFO
12521 bool do_cfi;
12522 #endif
12523
12524 if (USE_HIDDEN_LINKONCE)
12525 {
12526 tree decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
12527 get_identifier (name),
12528 build_function_type_list (void_type_node,
12529 NULL_TREE));
12530 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
12531 NULL_TREE, void_type_node);
12532 TREE_PUBLIC (decl) = 1;
12533 TREE_STATIC (decl) = 1;
12534 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
12535 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
12536 DECL_VISIBILITY_SPECIFIED (decl) = 1;
12537 resolve_unique_section (decl, 0, flag_function_sections);
12538 allocate_struct_function (decl, true);
12539 cfun->is_thunk = 1;
12540 current_function_decl = decl;
12541 init_varasm_status ();
12542 assemble_start_function (decl, name);
12543 }
12544 else
12545 {
12546 const int align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT);
12547 switch_to_section (text_section);
12548 if (align > 0)
12549 ASM_OUTPUT_ALIGN (asm_out_file, align);
12550 ASM_OUTPUT_LABEL (asm_out_file, name);
12551 }
12552
12553 #ifdef DWARF2_UNWIND_INFO
12554 do_cfi = dwarf2out_do_cfi_asm ();
12555 if (do_cfi)
12556 output_asm_insn (".cfi_startproc", NULL);
12557 #endif
12558 if (flag_delayed_branch)
12559 {
12560 output_asm_insn ("jmp\t%%o7+8", NULL);
12561 output_asm_insn (" add\t%%o7, %0, %0", &got_register_rtx);
12562 }
12563 else
12564 {
12565 output_asm_insn ("add\t%%o7, %0, %0", &got_register_rtx);
12566 output_asm_insn ("jmp\t%%o7+8", NULL);
12567 output_asm_insn (" nop", NULL);
12568 }
12569 #ifdef DWARF2_UNWIND_INFO
12570 if (do_cfi)
12571 output_asm_insn (".cfi_endproc", NULL);
12572 #endif
12573 }
12574
12575 if (NEED_INDICATE_EXEC_STACK)
12576 file_end_indicate_exec_stack ();
12577
12578 #ifdef TARGET_SOLARIS
12579 solaris_file_end ();
12580 #endif
12581 }
12582
12583 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
12584 /* Implement TARGET_MANGLE_TYPE. */
12585
12586 static const char *
sparc_mangle_type(const_tree type)12587 sparc_mangle_type (const_tree type)
12588 {
12589 if (TARGET_ARCH32
12590 && TYPE_MAIN_VARIANT (type) == long_double_type_node
12591 && TARGET_LONG_DOUBLE_128)
12592 return "g";
12593
12594 /* For all other types, use normal C++ mangling. */
12595 return NULL;
12596 }
12597 #endif
12598
12599 /* Expand a membar instruction for various use cases. Both the LOAD_STORE
12600 and BEFORE_AFTER arguments of the form X_Y. They are two-bit masks where
12601 bit 0 indicates that X is true, and bit 1 indicates Y is true. */
12602
12603 void
sparc_emit_membar_for_model(enum memmodel model,int load_store,int before_after)12604 sparc_emit_membar_for_model (enum memmodel model,
12605 int load_store, int before_after)
12606 {
12607 /* Bits for the MEMBAR mmask field. */
12608 const int LoadLoad = 1;
12609 const int StoreLoad = 2;
12610 const int LoadStore = 4;
12611 const int StoreStore = 8;
12612
12613 int mm = 0, implied = 0;
12614
12615 switch (sparc_memory_model)
12616 {
12617 case SMM_SC:
12618 /* Sequential Consistency. All memory transactions are immediately
12619 visible in sequential execution order. No barriers needed. */
12620 implied = LoadLoad | StoreLoad | LoadStore | StoreStore;
12621 break;
12622
12623 case SMM_TSO:
12624 /* Total Store Ordering: all memory transactions with store semantics
12625 are followed by an implied StoreStore. */
12626 implied |= StoreStore;
12627
12628 /* If we're not looking for a raw barrer (before+after), then atomic
12629 operations get the benefit of being both load and store. */
12630 if (load_store == 3 && before_after == 1)
12631 implied |= StoreLoad;
12632 /* FALLTHRU */
12633
12634 case SMM_PSO:
12635 /* Partial Store Ordering: all memory transactions with load semantics
12636 are followed by an implied LoadLoad | LoadStore. */
12637 implied |= LoadLoad | LoadStore;
12638
12639 /* If we're not looking for a raw barrer (before+after), then atomic
12640 operations get the benefit of being both load and store. */
12641 if (load_store == 3 && before_after == 2)
12642 implied |= StoreLoad | StoreStore;
12643 /* FALLTHRU */
12644
12645 case SMM_RMO:
12646 /* Relaxed Memory Ordering: no implicit bits. */
12647 break;
12648
12649 default:
12650 gcc_unreachable ();
12651 }
12652
12653 if (before_after & 1)
12654 {
12655 if (is_mm_release (model) || is_mm_acq_rel (model)
12656 || is_mm_seq_cst (model))
12657 {
12658 if (load_store & 1)
12659 mm |= LoadLoad | StoreLoad;
12660 if (load_store & 2)
12661 mm |= LoadStore | StoreStore;
12662 }
12663 }
12664 if (before_after & 2)
12665 {
12666 if (is_mm_acquire (model) || is_mm_acq_rel (model)
12667 || is_mm_seq_cst (model))
12668 {
12669 if (load_store & 1)
12670 mm |= LoadLoad | LoadStore;
12671 if (load_store & 2)
12672 mm |= StoreLoad | StoreStore;
12673 }
12674 }
12675
12676 /* Remove the bits implied by the system memory model. */
12677 mm &= ~implied;
12678
12679 /* For raw barriers (before+after), always emit a barrier.
12680 This will become a compile-time barrier if needed. */
12681 if (mm || before_after == 3)
12682 emit_insn (gen_membar (GEN_INT (mm)));
12683 }
12684
12685 /* Expand code to perform a 8 or 16-bit compare and swap by doing 32-bit
12686 compare and swap on the word containing the byte or half-word. */
12687
12688 static void
sparc_expand_compare_and_swap_12(rtx bool_result,rtx result,rtx mem,rtx oldval,rtx newval)12689 sparc_expand_compare_and_swap_12 (rtx bool_result, rtx result, rtx mem,
12690 rtx oldval, rtx newval)
12691 {
12692 rtx addr1 = force_reg (Pmode, XEXP (mem, 0));
12693 rtx addr = gen_reg_rtx (Pmode);
12694 rtx off = gen_reg_rtx (SImode);
12695 rtx oldv = gen_reg_rtx (SImode);
12696 rtx newv = gen_reg_rtx (SImode);
12697 rtx oldvalue = gen_reg_rtx (SImode);
12698 rtx newvalue = gen_reg_rtx (SImode);
12699 rtx res = gen_reg_rtx (SImode);
12700 rtx resv = gen_reg_rtx (SImode);
12701 rtx memsi, val, mask, cc;
12702
12703 emit_insn (gen_rtx_SET (addr, gen_rtx_AND (Pmode, addr1, GEN_INT (-4))));
12704
12705 if (Pmode != SImode)
12706 addr1 = gen_lowpart (SImode, addr1);
12707 emit_insn (gen_rtx_SET (off, gen_rtx_AND (SImode, addr1, GEN_INT (3))));
12708
12709 memsi = gen_rtx_MEM (SImode, addr);
12710 set_mem_alias_set (memsi, ALIAS_SET_MEMORY_BARRIER);
12711 MEM_VOLATILE_P (memsi) = MEM_VOLATILE_P (mem);
12712
12713 val = copy_to_reg (memsi);
12714
12715 emit_insn (gen_rtx_SET (off,
12716 gen_rtx_XOR (SImode, off,
12717 GEN_INT (GET_MODE (mem) == QImode
12718 ? 3 : 2))));
12719
12720 emit_insn (gen_rtx_SET (off, gen_rtx_ASHIFT (SImode, off, GEN_INT (3))));
12721
12722 if (GET_MODE (mem) == QImode)
12723 mask = force_reg (SImode, GEN_INT (0xff));
12724 else
12725 mask = force_reg (SImode, GEN_INT (0xffff));
12726
12727 emit_insn (gen_rtx_SET (mask, gen_rtx_ASHIFT (SImode, mask, off)));
12728
12729 emit_insn (gen_rtx_SET (val,
12730 gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask),
12731 val)));
12732
12733 oldval = gen_lowpart (SImode, oldval);
12734 emit_insn (gen_rtx_SET (oldv, gen_rtx_ASHIFT (SImode, oldval, off)));
12735
12736 newval = gen_lowpart_common (SImode, newval);
12737 emit_insn (gen_rtx_SET (newv, gen_rtx_ASHIFT (SImode, newval, off)));
12738
12739 emit_insn (gen_rtx_SET (oldv, gen_rtx_AND (SImode, oldv, mask)));
12740
12741 emit_insn (gen_rtx_SET (newv, gen_rtx_AND (SImode, newv, mask)));
12742
12743 rtx_code_label *end_label = gen_label_rtx ();
12744 rtx_code_label *loop_label = gen_label_rtx ();
12745 emit_label (loop_label);
12746
12747 emit_insn (gen_rtx_SET (oldvalue, gen_rtx_IOR (SImode, oldv, val)));
12748
12749 emit_insn (gen_rtx_SET (newvalue, gen_rtx_IOR (SImode, newv, val)));
12750
12751 emit_move_insn (bool_result, const1_rtx);
12752
12753 emit_insn (gen_atomic_compare_and_swapsi_1 (res, memsi, oldvalue, newvalue));
12754
12755 emit_cmp_and_jump_insns (res, oldvalue, EQ, NULL, SImode, 0, end_label);
12756
12757 emit_insn (gen_rtx_SET (resv,
12758 gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask),
12759 res)));
12760
12761 emit_move_insn (bool_result, const0_rtx);
12762
12763 cc = gen_compare_reg_1 (NE, resv, val);
12764 emit_insn (gen_rtx_SET (val, resv));
12765
12766 /* Use cbranchcc4 to separate the compare and branch! */
12767 emit_jump_insn (gen_cbranchcc4 (gen_rtx_NE (VOIDmode, cc, const0_rtx),
12768 cc, const0_rtx, loop_label));
12769
12770 emit_label (end_label);
12771
12772 emit_insn (gen_rtx_SET (res, gen_rtx_AND (SImode, res, mask)));
12773
12774 emit_insn (gen_rtx_SET (res, gen_rtx_LSHIFTRT (SImode, res, off)));
12775
12776 emit_move_insn (result, gen_lowpart (GET_MODE (result), res));
12777 }
12778
12779 /* Expand code to perform a compare-and-swap. */
12780
12781 void
sparc_expand_compare_and_swap(rtx operands[])12782 sparc_expand_compare_and_swap (rtx operands[])
12783 {
12784 rtx bval, retval, mem, oldval, newval;
12785 machine_mode mode;
12786 enum memmodel model;
12787
12788 bval = operands[0];
12789 retval = operands[1];
12790 mem = operands[2];
12791 oldval = operands[3];
12792 newval = operands[4];
12793 model = (enum memmodel) INTVAL (operands[6]);
12794 mode = GET_MODE (mem);
12795
12796 sparc_emit_membar_for_model (model, 3, 1);
12797
12798 if (reg_overlap_mentioned_p (retval, oldval))
12799 oldval = copy_to_reg (oldval);
12800
12801 if (mode == QImode || mode == HImode)
12802 sparc_expand_compare_and_swap_12 (bval, retval, mem, oldval, newval);
12803 else
12804 {
12805 rtx (*gen) (rtx, rtx, rtx, rtx);
12806 rtx x;
12807
12808 if (mode == SImode)
12809 gen = gen_atomic_compare_and_swapsi_1;
12810 else
12811 gen = gen_atomic_compare_and_swapdi_1;
12812 emit_insn (gen (retval, mem, oldval, newval));
12813
12814 x = emit_store_flag (bval, EQ, retval, oldval, mode, 1, 1);
12815 if (x != bval)
12816 convert_move (bval, x, 1);
12817 }
12818
12819 sparc_emit_membar_for_model (model, 3, 2);
12820 }
12821
12822 void
sparc_expand_vec_perm_bmask(machine_mode vmode,rtx sel)12823 sparc_expand_vec_perm_bmask (machine_mode vmode, rtx sel)
12824 {
12825 rtx t_1, t_2, t_3;
12826
12827 sel = gen_lowpart (DImode, sel);
12828 switch (vmode)
12829 {
12830 case E_V2SImode:
12831 /* inp = xxxxxxxAxxxxxxxB */
12832 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
12833 NULL_RTX, 1, OPTAB_DIRECT);
12834 /* t_1 = ....xxxxxxxAxxx. */
12835 sel = expand_simple_binop (SImode, AND, gen_lowpart (SImode, sel),
12836 GEN_INT (3), NULL_RTX, 1, OPTAB_DIRECT);
12837 t_1 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_1),
12838 GEN_INT (0x30000), NULL_RTX, 1, OPTAB_DIRECT);
12839 /* sel = .......B */
12840 /* t_1 = ...A.... */
12841 sel = expand_simple_binop (SImode, IOR, sel, t_1, sel, 1, OPTAB_DIRECT);
12842 /* sel = ...A...B */
12843 sel = expand_mult (SImode, sel, GEN_INT (0x4444), sel, 1);
12844 /* sel = AAAABBBB * 4 */
12845 t_1 = force_reg (SImode, GEN_INT (0x01230123));
12846 /* sel = { A*4, A*4+1, A*4+2, ... } */
12847 break;
12848
12849 case E_V4HImode:
12850 /* inp = xxxAxxxBxxxCxxxD */
12851 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (8),
12852 NULL_RTX, 1, OPTAB_DIRECT);
12853 t_2 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
12854 NULL_RTX, 1, OPTAB_DIRECT);
12855 t_3 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (24),
12856 NULL_RTX, 1, OPTAB_DIRECT);
12857 /* t_1 = ..xxxAxxxBxxxCxx */
12858 /* t_2 = ....xxxAxxxBxxxC */
12859 /* t_3 = ......xxxAxxxBxx */
12860 sel = expand_simple_binop (SImode, AND, gen_lowpart (SImode, sel),
12861 GEN_INT (0x07),
12862 NULL_RTX, 1, OPTAB_DIRECT);
12863 t_1 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_1),
12864 GEN_INT (0x0700),
12865 NULL_RTX, 1, OPTAB_DIRECT);
12866 t_2 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_2),
12867 GEN_INT (0x070000),
12868 NULL_RTX, 1, OPTAB_DIRECT);
12869 t_3 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_3),
12870 GEN_INT (0x07000000),
12871 NULL_RTX, 1, OPTAB_DIRECT);
12872 /* sel = .......D */
12873 /* t_1 = .....C.. */
12874 /* t_2 = ...B.... */
12875 /* t_3 = .A...... */
12876 sel = expand_simple_binop (SImode, IOR, sel, t_1, sel, 1, OPTAB_DIRECT);
12877 t_2 = expand_simple_binop (SImode, IOR, t_2, t_3, t_2, 1, OPTAB_DIRECT);
12878 sel = expand_simple_binop (SImode, IOR, sel, t_2, sel, 1, OPTAB_DIRECT);
12879 /* sel = .A.B.C.D */
12880 sel = expand_mult (SImode, sel, GEN_INT (0x22), sel, 1);
12881 /* sel = AABBCCDD * 2 */
12882 t_1 = force_reg (SImode, GEN_INT (0x01010101));
12883 /* sel = { A*2, A*2+1, B*2, B*2+1, ... } */
12884 break;
12885
12886 case E_V8QImode:
12887 /* input = xAxBxCxDxExFxGxH */
12888 sel = expand_simple_binop (DImode, AND, sel,
12889 GEN_INT ((HOST_WIDE_INT)0x0f0f0f0f << 32
12890 | 0x0f0f0f0f),
12891 NULL_RTX, 1, OPTAB_DIRECT);
12892 /* sel = .A.B.C.D.E.F.G.H */
12893 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (4),
12894 NULL_RTX, 1, OPTAB_DIRECT);
12895 /* t_1 = ..A.B.C.D.E.F.G. */
12896 sel = expand_simple_binop (DImode, IOR, sel, t_1,
12897 NULL_RTX, 1, OPTAB_DIRECT);
12898 /* sel = .AABBCCDDEEFFGGH */
12899 sel = expand_simple_binop (DImode, AND, sel,
12900 GEN_INT ((HOST_WIDE_INT)0xff00ff << 32
12901 | 0xff00ff),
12902 NULL_RTX, 1, OPTAB_DIRECT);
12903 /* sel = ..AB..CD..EF..GH */
12904 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (8),
12905 NULL_RTX, 1, OPTAB_DIRECT);
12906 /* t_1 = ....AB..CD..EF.. */
12907 sel = expand_simple_binop (DImode, IOR, sel, t_1,
12908 NULL_RTX, 1, OPTAB_DIRECT);
12909 /* sel = ..ABABCDCDEFEFGH */
12910 sel = expand_simple_binop (DImode, AND, sel,
12911 GEN_INT ((HOST_WIDE_INT)0xffff << 32 | 0xffff),
12912 NULL_RTX, 1, OPTAB_DIRECT);
12913 /* sel = ....ABCD....EFGH */
12914 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
12915 NULL_RTX, 1, OPTAB_DIRECT);
12916 /* t_1 = ........ABCD.... */
12917 sel = gen_lowpart (SImode, sel);
12918 t_1 = gen_lowpart (SImode, t_1);
12919 break;
12920
12921 default:
12922 gcc_unreachable ();
12923 }
12924
12925 /* Always perform the final addition/merge within the bmask insn. */
12926 emit_insn (gen_bmasksi_vis (gen_reg_rtx (SImode), sel, t_1));
12927 }
12928
12929 /* Implement TARGET_VEC_PERM_CONST. */
12930
12931 static bool
sparc_vectorize_vec_perm_const(machine_mode vmode,rtx target,rtx op0,rtx op1,const vec_perm_indices & sel)12932 sparc_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0,
12933 rtx op1, const vec_perm_indices &sel)
12934 {
12935 if (!TARGET_VIS2)
12936 return false;
12937
12938 /* All permutes are supported. */
12939 if (!target)
12940 return true;
12941
12942 /* Force target-independent code to convert constant permutations on other
12943 modes down to V8QI. Rely on this to avoid the complexity of the byte
12944 order of the permutation. */
12945 if (vmode != V8QImode)
12946 return false;
12947
12948 rtx nop0 = force_reg (vmode, op0);
12949 if (op0 == op1)
12950 op1 = nop0;
12951 op0 = nop0;
12952 op1 = force_reg (vmode, op1);
12953
12954 unsigned int i, mask;
12955 for (i = mask = 0; i < 8; ++i)
12956 mask |= (sel[i] & 0xf) << (28 - i*4);
12957 rtx mask_rtx = force_reg (SImode, gen_int_mode (mask, SImode));
12958
12959 emit_insn (gen_bmasksi_vis (gen_reg_rtx (SImode), mask_rtx, const0_rtx));
12960 emit_insn (gen_bshufflev8qi_vis (target, op0, op1));
12961 return true;
12962 }
12963
12964 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
12965
12966 static bool
sparc_frame_pointer_required(void)12967 sparc_frame_pointer_required (void)
12968 {
12969 /* If the stack pointer is dynamically modified in the function, it cannot
12970 serve as the frame pointer. */
12971 if (cfun->calls_alloca)
12972 return true;
12973
12974 /* If the function receives nonlocal gotos, it needs to save the frame
12975 pointer in the nonlocal_goto_save_area object. */
12976 if (cfun->has_nonlocal_label)
12977 return true;
12978
12979 /* In flat mode, that's it. */
12980 if (TARGET_FLAT)
12981 return false;
12982
12983 /* Otherwise, the frame pointer is required if the function isn't leaf, but
12984 we cannot use sparc_leaf_function_p since it hasn't been computed yet. */
12985 return !(optimize > 0 && crtl->is_leaf && only_leaf_regs_used ());
12986 }
12987
12988 /* The way this is structured, we can't eliminate SFP in favor of SP
12989 if the frame pointer is required: we want to use the SFP->HFP elimination
12990 in that case. But the test in update_eliminables doesn't know we are
12991 assuming below that we only do the former elimination. */
12992
12993 static bool
sparc_can_eliminate(const int from ATTRIBUTE_UNUSED,const int to)12994 sparc_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
12995 {
12996 return to == HARD_FRAME_POINTER_REGNUM || !sparc_frame_pointer_required ();
12997 }
12998
12999 /* If !TARGET_FPU, then make the fp registers and fp cc regs fixed so that
13000 they won't be allocated. */
13001
13002 static void
sparc_conditional_register_usage(void)13003 sparc_conditional_register_usage (void)
13004 {
13005 if (PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
13006 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
13007 /* If the user has passed -f{fixed,call-{used,saved}}-g5 */
13008 /* then honor it. */
13009 if (TARGET_ARCH32 && fixed_regs[5])
13010 fixed_regs[5] = 1;
13011 else if (TARGET_ARCH64 && fixed_regs[5] == 2)
13012 fixed_regs[5] = 0;
13013 if (! TARGET_V9)
13014 {
13015 int regno;
13016 for (regno = SPARC_FIRST_V9_FP_REG;
13017 regno <= SPARC_LAST_V9_FP_REG;
13018 regno++)
13019 fixed_regs[regno] = 1;
13020 /* %fcc0 is used by v8 and v9. */
13021 for (regno = SPARC_FIRST_V9_FCC_REG + 1;
13022 regno <= SPARC_LAST_V9_FCC_REG;
13023 regno++)
13024 fixed_regs[regno] = 1;
13025 }
13026 if (! TARGET_FPU)
13027 {
13028 int regno;
13029 for (regno = 32; regno < SPARC_LAST_V9_FCC_REG; regno++)
13030 fixed_regs[regno] = 1;
13031 }
13032 /* If the user has passed -f{fixed,call-{used,saved}}-g2 */
13033 /* then honor it. Likewise with g3 and g4. */
13034 if (fixed_regs[2] == 2)
13035 fixed_regs[2] = ! TARGET_APP_REGS;
13036 if (fixed_regs[3] == 2)
13037 fixed_regs[3] = ! TARGET_APP_REGS;
13038 if (TARGET_ARCH32 && fixed_regs[4] == 2)
13039 fixed_regs[4] = ! TARGET_APP_REGS;
13040 else if (TARGET_CM_EMBMEDANY)
13041 fixed_regs[4] = 1;
13042 else if (fixed_regs[4] == 2)
13043 fixed_regs[4] = 0;
13044 if (TARGET_FLAT)
13045 {
13046 int regno;
13047 /* Disable leaf functions. */
13048 memset (sparc_leaf_regs, 0, FIRST_PSEUDO_REGISTER);
13049 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
13050 leaf_reg_remap [regno] = regno;
13051 }
13052 if (TARGET_VIS)
13053 global_regs[SPARC_GSR_REG] = 1;
13054 }
13055
13056 /* Implement TARGET_USE_PSEUDO_PIC_REG. */
13057
13058 static bool
sparc_use_pseudo_pic_reg(void)13059 sparc_use_pseudo_pic_reg (void)
13060 {
13061 return !TARGET_VXWORKS_RTP && flag_pic;
13062 }
13063
13064 /* Implement TARGET_INIT_PIC_REG. */
13065
13066 static void
sparc_init_pic_reg(void)13067 sparc_init_pic_reg (void)
13068 {
13069 edge entry_edge;
13070 rtx_insn *seq;
13071
13072 /* In PIC mode, we need to always initialize the PIC register if optimization
13073 is enabled, because we are called from IRA and LRA may later force things
13074 to the constant pool for optimization purposes. */
13075 if (!flag_pic || (!crtl->uses_pic_offset_table && !optimize))
13076 return;
13077
13078 start_sequence ();
13079 load_got_register ();
13080 if (!TARGET_VXWORKS_RTP)
13081 emit_move_insn (pic_offset_table_rtx, got_register_rtx);
13082 seq = get_insns ();
13083 end_sequence ();
13084
13085 entry_edge = single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun));
13086 insert_insn_on_edge (seq, entry_edge);
13087 commit_one_edge_insertion (entry_edge);
13088 }
13089
13090 /* Implement TARGET_PREFERRED_RELOAD_CLASS:
13091
13092 - We can't load constants into FP registers.
13093 - We can't load FP constants into integer registers when soft-float,
13094 because there is no soft-float pattern with a r/F constraint.
13095 - We can't load FP constants into integer registers for TFmode unless
13096 it is 0.0L, because there is no movtf pattern with a r/F constraint.
13097 - Try and reload integer constants (symbolic or otherwise) back into
13098 registers directly, rather than having them dumped to memory. */
13099
13100 static reg_class_t
sparc_preferred_reload_class(rtx x,reg_class_t rclass)13101 sparc_preferred_reload_class (rtx x, reg_class_t rclass)
13102 {
13103 machine_mode mode = GET_MODE (x);
13104 if (CONSTANT_P (x))
13105 {
13106 if (FP_REG_CLASS_P (rclass)
13107 || rclass == GENERAL_OR_FP_REGS
13108 || rclass == GENERAL_OR_EXTRA_FP_REGS
13109 || (GET_MODE_CLASS (mode) == MODE_FLOAT && ! TARGET_FPU)
13110 || (mode == TFmode && ! const_zero_operand (x, mode)))
13111 return NO_REGS;
13112
13113 if (GET_MODE_CLASS (mode) == MODE_INT)
13114 return GENERAL_REGS;
13115
13116 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
13117 {
13118 if (! FP_REG_CLASS_P (rclass)
13119 || !(const_zero_operand (x, mode)
13120 || const_all_ones_operand (x, mode)))
13121 return NO_REGS;
13122 }
13123 }
13124
13125 if (TARGET_VIS3
13126 && ! TARGET_ARCH64
13127 && (rclass == EXTRA_FP_REGS
13128 || rclass == GENERAL_OR_EXTRA_FP_REGS))
13129 {
13130 int regno = true_regnum (x);
13131
13132 if (SPARC_INT_REG_P (regno))
13133 return (rclass == EXTRA_FP_REGS
13134 ? FP_REGS : GENERAL_OR_FP_REGS);
13135 }
13136
13137 return rclass;
13138 }
13139
13140 /* Return true if we use LRA instead of reload pass. */
13141
13142 static bool
sparc_lra_p(void)13143 sparc_lra_p (void)
13144 {
13145 return TARGET_LRA;
13146 }
13147
13148 /* Output a wide multiply instruction in V8+ mode. INSN is the instruction,
13149 OPERANDS are its operands and OPCODE is the mnemonic to be used. */
13150
13151 const char *
output_v8plus_mult(rtx_insn * insn,rtx * operands,const char * opcode)13152 output_v8plus_mult (rtx_insn *insn, rtx *operands, const char *opcode)
13153 {
13154 char mulstr[32];
13155
13156 gcc_assert (! TARGET_ARCH64);
13157
13158 if (sparc_check_64 (operands[1], insn) <= 0)
13159 output_asm_insn ("srl\t%L1, 0, %L1", operands);
13160 if (which_alternative == 1)
13161 output_asm_insn ("sllx\t%H1, 32, %H1", operands);
13162 if (GET_CODE (operands[2]) == CONST_INT)
13163 {
13164 if (which_alternative == 1)
13165 {
13166 output_asm_insn ("or\t%L1, %H1, %H1", operands);
13167 sprintf (mulstr, "%s\t%%H1, %%2, %%L0", opcode);
13168 output_asm_insn (mulstr, operands);
13169 return "srlx\t%L0, 32, %H0";
13170 }
13171 else
13172 {
13173 output_asm_insn ("sllx\t%H1, 32, %3", operands);
13174 output_asm_insn ("or\t%L1, %3, %3", operands);
13175 sprintf (mulstr, "%s\t%%3, %%2, %%3", opcode);
13176 output_asm_insn (mulstr, operands);
13177 output_asm_insn ("srlx\t%3, 32, %H0", operands);
13178 return "mov\t%3, %L0";
13179 }
13180 }
13181 else if (rtx_equal_p (operands[1], operands[2]))
13182 {
13183 if (which_alternative == 1)
13184 {
13185 output_asm_insn ("or\t%L1, %H1, %H1", operands);
13186 sprintf (mulstr, "%s\t%%H1, %%H1, %%L0", opcode);
13187 output_asm_insn (mulstr, operands);
13188 return "srlx\t%L0, 32, %H0";
13189 }
13190 else
13191 {
13192 output_asm_insn ("sllx\t%H1, 32, %3", operands);
13193 output_asm_insn ("or\t%L1, %3, %3", operands);
13194 sprintf (mulstr, "%s\t%%3, %%3, %%3", opcode);
13195 output_asm_insn (mulstr, operands);
13196 output_asm_insn ("srlx\t%3, 32, %H0", operands);
13197 return "mov\t%3, %L0";
13198 }
13199 }
13200 if (sparc_check_64 (operands[2], insn) <= 0)
13201 output_asm_insn ("srl\t%L2, 0, %L2", operands);
13202 if (which_alternative == 1)
13203 {
13204 output_asm_insn ("or\t%L1, %H1, %H1", operands);
13205 output_asm_insn ("sllx\t%H2, 32, %L1", operands);
13206 output_asm_insn ("or\t%L2, %L1, %L1", operands);
13207 sprintf (mulstr, "%s\t%%H1, %%L1, %%L0", opcode);
13208 output_asm_insn (mulstr, operands);
13209 return "srlx\t%L0, 32, %H0";
13210 }
13211 else
13212 {
13213 output_asm_insn ("sllx\t%H1, 32, %3", operands);
13214 output_asm_insn ("sllx\t%H2, 32, %4", operands);
13215 output_asm_insn ("or\t%L1, %3, %3", operands);
13216 output_asm_insn ("or\t%L2, %4, %4", operands);
13217 sprintf (mulstr, "%s\t%%3, %%4, %%3", opcode);
13218 output_asm_insn (mulstr, operands);
13219 output_asm_insn ("srlx\t%3, 32, %H0", operands);
13220 return "mov\t%3, %L0";
13221 }
13222 }
13223
13224 /* Subroutine of sparc_expand_vector_init. Emit code to initialize
13225 all fields of TARGET to ELT by means of VIS2 BSHUFFLE insn. MODE
13226 and INNER_MODE are the modes describing TARGET. */
13227
13228 static void
vector_init_bshuffle(rtx target,rtx elt,machine_mode mode,machine_mode inner_mode)13229 vector_init_bshuffle (rtx target, rtx elt, machine_mode mode,
13230 machine_mode inner_mode)
13231 {
13232 rtx t1, final_insn, sel;
13233 int bmask;
13234
13235 t1 = gen_reg_rtx (mode);
13236
13237 elt = convert_modes (SImode, inner_mode, elt, true);
13238 emit_move_insn (gen_lowpart(SImode, t1), elt);
13239
13240 switch (mode)
13241 {
13242 case E_V2SImode:
13243 final_insn = gen_bshufflev2si_vis (target, t1, t1);
13244 bmask = 0x45674567;
13245 break;
13246 case E_V4HImode:
13247 final_insn = gen_bshufflev4hi_vis (target, t1, t1);
13248 bmask = 0x67676767;
13249 break;
13250 case E_V8QImode:
13251 final_insn = gen_bshufflev8qi_vis (target, t1, t1);
13252 bmask = 0x77777777;
13253 break;
13254 default:
13255 gcc_unreachable ();
13256 }
13257
13258 sel = force_reg (SImode, GEN_INT (bmask));
13259 emit_insn (gen_bmasksi_vis (gen_reg_rtx (SImode), sel, const0_rtx));
13260 emit_insn (final_insn);
13261 }
13262
13263 /* Subroutine of sparc_expand_vector_init. Emit code to initialize
13264 all fields of TARGET to ELT in V8QI by means of VIS FPMERGE insn. */
13265
13266 static void
vector_init_fpmerge(rtx target,rtx elt)13267 vector_init_fpmerge (rtx target, rtx elt)
13268 {
13269 rtx t1, t2, t2_low, t3, t3_low;
13270
13271 t1 = gen_reg_rtx (V4QImode);
13272 elt = convert_modes (SImode, QImode, elt, true);
13273 emit_move_insn (gen_lowpart (SImode, t1), elt);
13274
13275 t2 = gen_reg_rtx (V8QImode);
13276 t2_low = gen_lowpart (V4QImode, t2);
13277 emit_insn (gen_fpmerge_vis (t2, t1, t1));
13278
13279 t3 = gen_reg_rtx (V8QImode);
13280 t3_low = gen_lowpart (V4QImode, t3);
13281 emit_insn (gen_fpmerge_vis (t3, t2_low, t2_low));
13282
13283 emit_insn (gen_fpmerge_vis (target, t3_low, t3_low));
13284 }
13285
13286 /* Subroutine of sparc_expand_vector_init. Emit code to initialize
13287 all fields of TARGET to ELT in V4HI by means of VIS FALIGNDATA insn. */
13288
13289 static void
vector_init_faligndata(rtx target,rtx elt)13290 vector_init_faligndata (rtx target, rtx elt)
13291 {
13292 rtx t1 = gen_reg_rtx (V4HImode);
13293 int i;
13294
13295 elt = convert_modes (SImode, HImode, elt, true);
13296 emit_move_insn (gen_lowpart (SImode, t1), elt);
13297
13298 emit_insn (gen_alignaddrsi_vis (gen_reg_rtx (SImode),
13299 force_reg (SImode, GEN_INT (6)),
13300 const0_rtx));
13301
13302 for (i = 0; i < 4; i++)
13303 emit_insn (gen_faligndatav4hi_vis (target, t1, target));
13304 }
13305
13306 /* Emit code to initialize TARGET to values for individual fields VALS. */
13307
13308 void
sparc_expand_vector_init(rtx target,rtx vals)13309 sparc_expand_vector_init (rtx target, rtx vals)
13310 {
13311 const machine_mode mode = GET_MODE (target);
13312 const machine_mode inner_mode = GET_MODE_INNER (mode);
13313 const int n_elts = GET_MODE_NUNITS (mode);
13314 int i, n_var = 0;
13315 bool all_same = true;
13316 rtx mem;
13317
13318 for (i = 0; i < n_elts; i++)
13319 {
13320 rtx x = XVECEXP (vals, 0, i);
13321 if (!(CONST_SCALAR_INT_P (x) || CONST_DOUBLE_P (x) || CONST_FIXED_P (x)))
13322 n_var++;
13323
13324 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
13325 all_same = false;
13326 }
13327
13328 if (n_var == 0)
13329 {
13330 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
13331 return;
13332 }
13333
13334 if (GET_MODE_SIZE (inner_mode) == GET_MODE_SIZE (mode))
13335 {
13336 if (GET_MODE_SIZE (inner_mode) == 4)
13337 {
13338 emit_move_insn (gen_lowpart (SImode, target),
13339 gen_lowpart (SImode, XVECEXP (vals, 0, 0)));
13340 return;
13341 }
13342 else if (GET_MODE_SIZE (inner_mode) == 8)
13343 {
13344 emit_move_insn (gen_lowpart (DImode, target),
13345 gen_lowpart (DImode, XVECEXP (vals, 0, 0)));
13346 return;
13347 }
13348 }
13349 else if (GET_MODE_SIZE (inner_mode) == GET_MODE_SIZE (word_mode)
13350 && GET_MODE_SIZE (mode) == 2 * GET_MODE_SIZE (word_mode))
13351 {
13352 emit_move_insn (gen_highpart (word_mode, target),
13353 gen_lowpart (word_mode, XVECEXP (vals, 0, 0)));
13354 emit_move_insn (gen_lowpart (word_mode, target),
13355 gen_lowpart (word_mode, XVECEXP (vals, 0, 1)));
13356 return;
13357 }
13358
13359 if (all_same && GET_MODE_SIZE (mode) == 8)
13360 {
13361 if (TARGET_VIS2)
13362 {
13363 vector_init_bshuffle (target, XVECEXP (vals, 0, 0), mode, inner_mode);
13364 return;
13365 }
13366 if (mode == V8QImode)
13367 {
13368 vector_init_fpmerge (target, XVECEXP (vals, 0, 0));
13369 return;
13370 }
13371 if (mode == V4HImode)
13372 {
13373 vector_init_faligndata (target, XVECEXP (vals, 0, 0));
13374 return;
13375 }
13376 }
13377
13378 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
13379 for (i = 0; i < n_elts; i++)
13380 emit_move_insn (adjust_address_nv (mem, inner_mode,
13381 i * GET_MODE_SIZE (inner_mode)),
13382 XVECEXP (vals, 0, i));
13383 emit_move_insn (target, mem);
13384 }
13385
13386 /* Implement TARGET_SECONDARY_RELOAD. */
13387
13388 static reg_class_t
sparc_secondary_reload(bool in_p,rtx x,reg_class_t rclass_i,machine_mode mode,secondary_reload_info * sri)13389 sparc_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
13390 machine_mode mode, secondary_reload_info *sri)
13391 {
13392 enum reg_class rclass = (enum reg_class) rclass_i;
13393
13394 sri->icode = CODE_FOR_nothing;
13395 sri->extra_cost = 0;
13396
13397 /* We need a temporary when loading/storing a HImode/QImode value
13398 between memory and the FPU registers. This can happen when combine puts
13399 a paradoxical subreg in a float/fix conversion insn. */
13400 if (FP_REG_CLASS_P (rclass)
13401 && (mode == HImode || mode == QImode)
13402 && (GET_CODE (x) == MEM
13403 || ((GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
13404 && true_regnum (x) == -1)))
13405 return GENERAL_REGS;
13406
13407 /* On 32-bit we need a temporary when loading/storing a DFmode value
13408 between unaligned memory and the upper FPU registers. */
13409 if (TARGET_ARCH32
13410 && rclass == EXTRA_FP_REGS
13411 && mode == DFmode
13412 && GET_CODE (x) == MEM
13413 && ! mem_min_alignment (x, 8))
13414 return FP_REGS;
13415
13416 if (((TARGET_CM_MEDANY
13417 && symbolic_operand (x, mode))
13418 || (TARGET_CM_EMBMEDANY
13419 && text_segment_operand (x, mode)))
13420 && ! flag_pic)
13421 {
13422 if (in_p)
13423 sri->icode = direct_optab_handler (reload_in_optab, mode);
13424 else
13425 sri->icode = direct_optab_handler (reload_out_optab, mode);
13426 return NO_REGS;
13427 }
13428
13429 if (TARGET_VIS3 && TARGET_ARCH32)
13430 {
13431 int regno = true_regnum (x);
13432
13433 /* When using VIS3 fp<-->int register moves, on 32-bit we have
13434 to move 8-byte values in 4-byte pieces. This only works via
13435 FP_REGS, and not via EXTRA_FP_REGS. Therefore if we try to
13436 move between EXTRA_FP_REGS and GENERAL_REGS, we will need
13437 an FP_REGS intermediate move. */
13438 if ((rclass == EXTRA_FP_REGS && SPARC_INT_REG_P (regno))
13439 || ((general_or_i64_p (rclass)
13440 || rclass == GENERAL_OR_FP_REGS)
13441 && SPARC_FP_REG_P (regno)))
13442 {
13443 sri->extra_cost = 2;
13444 return FP_REGS;
13445 }
13446 }
13447
13448 return NO_REGS;
13449 }
13450
13451 /* Implement TARGET_SECONDARY_MEMORY_NEEDED.
13452
13453 On SPARC when not VIS3 it is not possible to directly move data
13454 between GENERAL_REGS and FP_REGS. */
13455
13456 static bool
sparc_secondary_memory_needed(machine_mode mode,reg_class_t class1,reg_class_t class2)13457 sparc_secondary_memory_needed (machine_mode mode, reg_class_t class1,
13458 reg_class_t class2)
13459 {
13460 return ((FP_REG_CLASS_P (class1) != FP_REG_CLASS_P (class2))
13461 && (! TARGET_VIS3
13462 || GET_MODE_SIZE (mode) > 8
13463 || GET_MODE_SIZE (mode) < 4));
13464 }
13465
13466 /* Implement TARGET_SECONDARY_MEMORY_NEEDED_MODE.
13467
13468 get_secondary_mem widens its argument to BITS_PER_WORD which loses on v9
13469 because the movsi and movsf patterns don't handle r/f moves.
13470 For v8 we copy the default definition. */
13471
13472 static machine_mode
sparc_secondary_memory_needed_mode(machine_mode mode)13473 sparc_secondary_memory_needed_mode (machine_mode mode)
13474 {
13475 if (TARGET_ARCH64)
13476 {
13477 if (GET_MODE_BITSIZE (mode) < 32)
13478 return mode_for_size (32, GET_MODE_CLASS (mode), 0).require ();
13479 return mode;
13480 }
13481 else
13482 {
13483 if (GET_MODE_BITSIZE (mode) < BITS_PER_WORD)
13484 return mode_for_size (BITS_PER_WORD,
13485 GET_MODE_CLASS (mode), 0).require ();
13486 return mode;
13487 }
13488 }
13489
13490 /* Emit code to conditionally move either OPERANDS[2] or OPERANDS[3] into
13491 OPERANDS[0] in MODE. OPERANDS[1] is the operator of the condition. */
13492
13493 bool
sparc_expand_conditional_move(machine_mode mode,rtx * operands)13494 sparc_expand_conditional_move (machine_mode mode, rtx *operands)
13495 {
13496 enum rtx_code rc = GET_CODE (operands[1]);
13497 machine_mode cmp_mode;
13498 rtx cc_reg, dst, cmp;
13499
13500 cmp = operands[1];
13501 if (GET_MODE (XEXP (cmp, 0)) == DImode && !TARGET_ARCH64)
13502 return false;
13503
13504 if (GET_MODE (XEXP (cmp, 0)) == TFmode && !TARGET_HARD_QUAD)
13505 cmp = sparc_emit_float_lib_cmp (XEXP (cmp, 0), XEXP (cmp, 1), rc);
13506
13507 cmp_mode = GET_MODE (XEXP (cmp, 0));
13508 rc = GET_CODE (cmp);
13509
13510 dst = operands[0];
13511 if (! rtx_equal_p (operands[2], dst)
13512 && ! rtx_equal_p (operands[3], dst))
13513 {
13514 if (reg_overlap_mentioned_p (dst, cmp))
13515 dst = gen_reg_rtx (mode);
13516
13517 emit_move_insn (dst, operands[3]);
13518 }
13519 else if (operands[2] == dst)
13520 {
13521 operands[2] = operands[3];
13522
13523 if (GET_MODE_CLASS (cmp_mode) == MODE_FLOAT)
13524 rc = reverse_condition_maybe_unordered (rc);
13525 else
13526 rc = reverse_condition (rc);
13527 }
13528
13529 if (XEXP (cmp, 1) == const0_rtx
13530 && GET_CODE (XEXP (cmp, 0)) == REG
13531 && cmp_mode == DImode
13532 && v9_regcmp_p (rc))
13533 cc_reg = XEXP (cmp, 0);
13534 else
13535 cc_reg = gen_compare_reg_1 (rc, XEXP (cmp, 0), XEXP (cmp, 1));
13536
13537 cmp = gen_rtx_fmt_ee (rc, GET_MODE (cc_reg), cc_reg, const0_rtx);
13538
13539 emit_insn (gen_rtx_SET (dst,
13540 gen_rtx_IF_THEN_ELSE (mode, cmp, operands[2], dst)));
13541
13542 if (dst != operands[0])
13543 emit_move_insn (operands[0], dst);
13544
13545 return true;
13546 }
13547
13548 /* Emit code to conditionally move a combination of OPERANDS[1] and OPERANDS[2]
13549 into OPERANDS[0] in MODE, depending on the outcome of the comparison of
13550 OPERANDS[4] and OPERANDS[5]. OPERANDS[3] is the operator of the condition.
13551 FCODE is the machine code to be used for OPERANDS[3] and CCODE the machine
13552 code to be used for the condition mask. */
13553
13554 void
sparc_expand_vcond(machine_mode mode,rtx * operands,int ccode,int fcode)13555 sparc_expand_vcond (machine_mode mode, rtx *operands, int ccode, int fcode)
13556 {
13557 rtx mask, cop0, cop1, fcmp, cmask, bshuf, gsr;
13558 enum rtx_code code = GET_CODE (operands[3]);
13559
13560 mask = gen_reg_rtx (Pmode);
13561 cop0 = operands[4];
13562 cop1 = operands[5];
13563 if (code == LT || code == GE)
13564 {
13565 rtx t;
13566
13567 code = swap_condition (code);
13568 t = cop0; cop0 = cop1; cop1 = t;
13569 }
13570
13571 gsr = gen_rtx_REG (DImode, SPARC_GSR_REG);
13572
13573 fcmp = gen_rtx_UNSPEC (Pmode,
13574 gen_rtvec (1, gen_rtx_fmt_ee (code, mode, cop0, cop1)),
13575 fcode);
13576
13577 cmask = gen_rtx_UNSPEC (DImode,
13578 gen_rtvec (2, mask, gsr),
13579 ccode);
13580
13581 bshuf = gen_rtx_UNSPEC (mode,
13582 gen_rtvec (3, operands[1], operands[2], gsr),
13583 UNSPEC_BSHUFFLE);
13584
13585 emit_insn (gen_rtx_SET (mask, fcmp));
13586 emit_insn (gen_rtx_SET (gsr, cmask));
13587
13588 emit_insn (gen_rtx_SET (operands[0], bshuf));
13589 }
13590
13591 /* On the SPARC, any mode which naturally allocates into the single float
13592 registers should return 4 here. */
13593
13594 unsigned int
sparc_regmode_natural_size(machine_mode mode)13595 sparc_regmode_natural_size (machine_mode mode)
13596 {
13597 const enum mode_class cl = GET_MODE_CLASS (mode);
13598
13599 if ((cl == MODE_FLOAT || cl == MODE_VECTOR_INT) && GET_MODE_SIZE (mode) <= 4)
13600 return 4;
13601
13602 return UNITS_PER_WORD;
13603 }
13604
13605 /* Implement TARGET_HARD_REGNO_NREGS.
13606
13607 On SPARC, ordinary registers hold 32 bits worth; this means both
13608 integer and floating point registers. On v9, integer regs hold 64
13609 bits worth; floating point regs hold 32 bits worth (this includes the
13610 new fp regs as even the odd ones are included in the hard register
13611 count). */
13612
13613 static unsigned int
sparc_hard_regno_nregs(unsigned int regno,machine_mode mode)13614 sparc_hard_regno_nregs (unsigned int regno, machine_mode mode)
13615 {
13616 if (regno == SPARC_GSR_REG)
13617 return 1;
13618 if (TARGET_ARCH64)
13619 {
13620 if (SPARC_INT_REG_P (regno) || regno == FRAME_POINTER_REGNUM)
13621 return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
13622 return CEIL (GET_MODE_SIZE (mode), 4);
13623 }
13624 return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
13625 }
13626
13627 /* Implement TARGET_HARD_REGNO_MODE_OK.
13628
13629 ??? Because of the funny way we pass parameters we should allow certain
13630 ??? types of float/complex values to be in integer registers during
13631 ??? RTL generation. This only matters on arch32. */
13632
13633 static bool
sparc_hard_regno_mode_ok(unsigned int regno,machine_mode mode)13634 sparc_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
13635 {
13636 return (hard_regno_mode_classes[regno] & sparc_mode_class[mode]) != 0;
13637 }
13638
13639 /* Implement TARGET_MODES_TIEABLE_P.
13640
13641 For V9 we have to deal with the fact that only the lower 32 floating
13642 point registers are 32-bit addressable. */
13643
13644 static bool
sparc_modes_tieable_p(machine_mode mode1,machine_mode mode2)13645 sparc_modes_tieable_p (machine_mode mode1, machine_mode mode2)
13646 {
13647 enum mode_class mclass1, mclass2;
13648 unsigned short size1, size2;
13649
13650 if (mode1 == mode2)
13651 return true;
13652
13653 mclass1 = GET_MODE_CLASS (mode1);
13654 mclass2 = GET_MODE_CLASS (mode2);
13655 if (mclass1 != mclass2)
13656 return false;
13657
13658 if (! TARGET_V9)
13659 return true;
13660
13661 /* Classes are the same and we are V9 so we have to deal with upper
13662 vs. lower floating point registers. If one of the modes is a
13663 4-byte mode, and the other is not, we have to mark them as not
13664 tieable because only the lower 32 floating point register are
13665 addressable 32-bits at a time.
13666
13667 We can't just test explicitly for SFmode, otherwise we won't
13668 cover the vector mode cases properly. */
13669
13670 if (mclass1 != MODE_FLOAT && mclass1 != MODE_VECTOR_INT)
13671 return true;
13672
13673 size1 = GET_MODE_SIZE (mode1);
13674 size2 = GET_MODE_SIZE (mode2);
13675 if ((size1 > 4 && size2 == 4)
13676 || (size2 > 4 && size1 == 4))
13677 return false;
13678
13679 return true;
13680 }
13681
13682 /* Implement TARGET_CSTORE_MODE. */
13683
13684 static scalar_int_mode
sparc_cstore_mode(enum insn_code icode ATTRIBUTE_UNUSED)13685 sparc_cstore_mode (enum insn_code icode ATTRIBUTE_UNUSED)
13686 {
13687 return (TARGET_ARCH64 ? DImode : SImode);
13688 }
13689
13690 /* Return the compound expression made of T1 and T2. */
13691
13692 static inline tree
compound_expr(tree t1,tree t2)13693 compound_expr (tree t1, tree t2)
13694 {
13695 return build2 (COMPOUND_EXPR, void_type_node, t1, t2);
13696 }
13697
13698 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */
13699
13700 static void
sparc_atomic_assign_expand_fenv(tree * hold,tree * clear,tree * update)13701 sparc_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
13702 {
13703 if (!TARGET_FPU)
13704 return;
13705
13706 const unsigned HOST_WIDE_INT accrued_exception_mask = 0x1f << 5;
13707 const unsigned HOST_WIDE_INT trap_enable_mask = 0x1f << 23;
13708
13709 /* We generate the equivalent of feholdexcept (&fenv_var):
13710
13711 unsigned int fenv_var;
13712 __builtin_store_fsr (&fenv_var);
13713
13714 unsigned int tmp1_var;
13715 tmp1_var = fenv_var & ~(accrued_exception_mask | trap_enable_mask);
13716
13717 __builtin_load_fsr (&tmp1_var); */
13718
13719 tree fenv_var = create_tmp_var_raw (unsigned_type_node);
13720 TREE_ADDRESSABLE (fenv_var) = 1;
13721 tree fenv_addr = build_fold_addr_expr (fenv_var);
13722 tree stfsr = sparc_builtins[SPARC_BUILTIN_STFSR];
13723 tree hold_stfsr
13724 = build4 (TARGET_EXPR, unsigned_type_node, fenv_var,
13725 build_call_expr (stfsr, 1, fenv_addr), NULL_TREE, NULL_TREE);
13726
13727 tree tmp1_var = create_tmp_var_raw (unsigned_type_node);
13728 TREE_ADDRESSABLE (tmp1_var) = 1;
13729 tree masked_fenv_var
13730 = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_var,
13731 build_int_cst (unsigned_type_node,
13732 ~(accrued_exception_mask | trap_enable_mask)));
13733 tree hold_mask
13734 = build4 (TARGET_EXPR, unsigned_type_node, tmp1_var, masked_fenv_var,
13735 NULL_TREE, NULL_TREE);
13736
13737 tree tmp1_addr = build_fold_addr_expr (tmp1_var);
13738 tree ldfsr = sparc_builtins[SPARC_BUILTIN_LDFSR];
13739 tree hold_ldfsr = build_call_expr (ldfsr, 1, tmp1_addr);
13740
13741 *hold = compound_expr (compound_expr (hold_stfsr, hold_mask), hold_ldfsr);
13742
13743 /* We reload the value of tmp1_var to clear the exceptions:
13744
13745 __builtin_load_fsr (&tmp1_var); */
13746
13747 *clear = build_call_expr (ldfsr, 1, tmp1_addr);
13748
13749 /* We generate the equivalent of feupdateenv (&fenv_var):
13750
13751 unsigned int tmp2_var;
13752 __builtin_store_fsr (&tmp2_var);
13753
13754 __builtin_load_fsr (&fenv_var);
13755
13756 if (SPARC_LOW_FE_EXCEPT_VALUES)
13757 tmp2_var >>= 5;
13758 __atomic_feraiseexcept ((int) tmp2_var); */
13759
13760 tree tmp2_var = create_tmp_var_raw (unsigned_type_node);
13761 TREE_ADDRESSABLE (tmp2_var) = 1;
13762 tree tmp2_addr = build_fold_addr_expr (tmp2_var);
13763 tree update_stfsr
13764 = build4 (TARGET_EXPR, unsigned_type_node, tmp2_var,
13765 build_call_expr (stfsr, 1, tmp2_addr), NULL_TREE, NULL_TREE);
13766
13767 tree update_ldfsr = build_call_expr (ldfsr, 1, fenv_addr);
13768
13769 tree atomic_feraiseexcept
13770 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
13771 tree update_call
13772 = build_call_expr (atomic_feraiseexcept, 1,
13773 fold_convert (integer_type_node, tmp2_var));
13774
13775 if (SPARC_LOW_FE_EXCEPT_VALUES)
13776 {
13777 tree shifted_tmp2_var
13778 = build2 (RSHIFT_EXPR, unsigned_type_node, tmp2_var,
13779 build_int_cst (unsigned_type_node, 5));
13780 tree update_shift
13781 = build2 (MODIFY_EXPR, void_type_node, tmp2_var, shifted_tmp2_var);
13782 update_call = compound_expr (update_shift, update_call);
13783 }
13784
13785 *update
13786 = compound_expr (compound_expr (update_stfsr, update_ldfsr), update_call);
13787 }
13788
13789 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. Borrowed from the PA port.
13790
13791 SImode loads to floating-point registers are not zero-extended.
13792 The definition for LOAD_EXTEND_OP specifies that integer loads
13793 narrower than BITS_PER_WORD will be zero-extended. As a result,
13794 we inhibit changes from SImode unless they are to a mode that is
13795 identical in size.
13796
13797 Likewise for SFmode, since word-mode paradoxical subregs are
13798 problematic on big-endian architectures. */
13799
13800 static bool
sparc_can_change_mode_class(machine_mode from,machine_mode to,reg_class_t rclass)13801 sparc_can_change_mode_class (machine_mode from, machine_mode to,
13802 reg_class_t rclass)
13803 {
13804 if (TARGET_ARCH64
13805 && GET_MODE_SIZE (from) == 4
13806 && GET_MODE_SIZE (to) != 4)
13807 return !reg_classes_intersect_p (rclass, FP_REGS);
13808 return true;
13809 }
13810
13811 /* Implement TARGET_CONSTANT_ALIGNMENT. */
13812
13813 static HOST_WIDE_INT
sparc_constant_alignment(const_tree exp,HOST_WIDE_INT align)13814 sparc_constant_alignment (const_tree exp, HOST_WIDE_INT align)
13815 {
13816 if (TREE_CODE (exp) == STRING_CST)
13817 return MAX (align, FASTEST_ALIGNMENT);
13818 return align;
13819 }
13820
13821 /* Implement TARGET_ZERO_CALL_USED_REGS.
13822
13823 Generate a sequence of instructions that zero registers specified by
13824 NEED_ZEROED_HARDREGS. Return the ZEROED_HARDREGS that are actually
13825 zeroed. */
13826
13827 static HARD_REG_SET
sparc_zero_call_used_regs(HARD_REG_SET need_zeroed_hardregs)13828 sparc_zero_call_used_regs (HARD_REG_SET need_zeroed_hardregs)
13829 {
13830 for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
13831 if (TEST_HARD_REG_BIT (need_zeroed_hardregs, regno))
13832 {
13833 /* Do not touch the CC registers or the FP registers if no VIS. */
13834 if (regno >= SPARC_FCC_REG
13835 || (regno >= SPARC_FIRST_FP_REG && !TARGET_VIS))
13836 CLEAR_HARD_REG_BIT (need_zeroed_hardregs, regno);
13837
13838 /* Do not access the odd upper FP registers individually. */
13839 else if (regno >= SPARC_FIRST_V9_FP_REG && (regno & 1))
13840 ;
13841
13842 /* Use the most natural mode for the registers, which is not given by
13843 regno_reg_rtx/reg_raw_mode for the FP registers on the SPARC. */
13844 else
13845 {
13846 machine_mode mode;
13847 rtx reg;
13848
13849 if (regno < SPARC_FIRST_FP_REG)
13850 {
13851 reg = regno_reg_rtx[regno];
13852 mode = GET_MODE (reg);
13853 }
13854 else
13855 {
13856 mode = regno < SPARC_FIRST_V9_FP_REG ? SFmode : DFmode;
13857 reg = gen_raw_REG (mode, regno);
13858 }
13859
13860 emit_move_insn (reg, CONST0_RTX (mode));
13861 }
13862 }
13863
13864 return need_zeroed_hardregs;
13865 }
13866
13867 #include "gt-sparc.h"
13868