1 /* Subroutines for insn-output.c for SPARC.
2 Copyright (C) 1987-2020 Free Software Foundation, Inc.
3 Contributed by Michael Tiemann (tiemann@cygnus.com)
4 64-bit SPARC-V9 support by Michael Tiemann, Jim Wilson, and Doug Evans,
5 at Cygnus Support.
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3, or (at your option)
12 any later version.
13
14 GCC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
22
23 #define IN_TARGET_CODE 1
24
25 #include "config.h"
26 #include "system.h"
27 #include "coretypes.h"
28 #include "backend.h"
29 #include "target.h"
30 #include "rtl.h"
31 #include "tree.h"
32 #include "memmodel.h"
33 #include "gimple.h"
34 #include "df.h"
35 #include "tm_p.h"
36 #include "stringpool.h"
37 #include "attribs.h"
38 #include "expmed.h"
39 #include "optabs.h"
40 #include "regs.h"
41 #include "emit-rtl.h"
42 #include "recog.h"
43 #include "diagnostic-core.h"
44 #include "alias.h"
45 #include "fold-const.h"
46 #include "stor-layout.h"
47 #include "calls.h"
48 #include "varasm.h"
49 #include "output.h"
50 #include "insn-attr.h"
51 #include "explow.h"
52 #include "expr.h"
53 #include "debug.h"
54 #include "cfgrtl.h"
55 #include "common/common-target.h"
56 #include "gimplify.h"
57 #include "langhooks.h"
58 #include "reload.h"
59 #include "tree-pass.h"
60 #include "context.h"
61 #include "builtins.h"
62 #include "tree-vector-builder.h"
63 #include "opts.h"
64
65 /* This file should be included last. */
66 #include "target-def.h"
67
68 /* Processor costs */
69
70 struct processor_costs {
71 /* Integer load */
72 const int int_load;
73
74 /* Integer signed load */
75 const int int_sload;
76
77 /* Integer zeroed load */
78 const int int_zload;
79
80 /* Float load */
81 const int float_load;
82
83 /* fmov, fneg, fabs */
84 const int float_move;
85
86 /* fadd, fsub */
87 const int float_plusminus;
88
89 /* fcmp */
90 const int float_cmp;
91
92 /* fmov, fmovr */
93 const int float_cmove;
94
95 /* fmul */
96 const int float_mul;
97
98 /* fdivs */
99 const int float_div_sf;
100
101 /* fdivd */
102 const int float_div_df;
103
104 /* fsqrts */
105 const int float_sqrt_sf;
106
107 /* fsqrtd */
108 const int float_sqrt_df;
109
110 /* umul/smul */
111 const int int_mul;
112
113 /* mulX */
114 const int int_mulX;
115
116 /* integer multiply cost for each bit set past the most
117 significant 3, so the formula for multiply cost becomes:
118
119 if (rs1 < 0)
120 highest_bit = highest_clear_bit(rs1);
121 else
122 highest_bit = highest_set_bit(rs1);
123 if (highest_bit < 3)
124 highest_bit = 3;
125 cost = int_mul{,X} + ((highest_bit - 3) / int_mul_bit_factor);
126
127 A value of zero indicates that the multiply costs is fixed,
128 and not variable. */
129 const int int_mul_bit_factor;
130
131 /* udiv/sdiv */
132 const int int_div;
133
134 /* divX */
135 const int int_divX;
136
137 /* movcc, movr */
138 const int int_cmove;
139
140 /* penalty for shifts, due to scheduling rules etc. */
141 const int shift_penalty;
142
143 /* cost of a (predictable) branch. */
144 const int branch_cost;
145 };
146
147 static const
148 struct processor_costs cypress_costs = {
149 COSTS_N_INSNS (2), /* int load */
150 COSTS_N_INSNS (2), /* int signed load */
151 COSTS_N_INSNS (2), /* int zeroed load */
152 COSTS_N_INSNS (2), /* float load */
153 COSTS_N_INSNS (5), /* fmov, fneg, fabs */
154 COSTS_N_INSNS (5), /* fadd, fsub */
155 COSTS_N_INSNS (1), /* fcmp */
156 COSTS_N_INSNS (1), /* fmov, fmovr */
157 COSTS_N_INSNS (7), /* fmul */
158 COSTS_N_INSNS (37), /* fdivs */
159 COSTS_N_INSNS (37), /* fdivd */
160 COSTS_N_INSNS (63), /* fsqrts */
161 COSTS_N_INSNS (63), /* fsqrtd */
162 COSTS_N_INSNS (1), /* imul */
163 COSTS_N_INSNS (1), /* imulX */
164 0, /* imul bit factor */
165 COSTS_N_INSNS (1), /* idiv */
166 COSTS_N_INSNS (1), /* idivX */
167 COSTS_N_INSNS (1), /* movcc/movr */
168 0, /* shift penalty */
169 3 /* branch cost */
170 };
171
172 static const
173 struct processor_costs supersparc_costs = {
174 COSTS_N_INSNS (1), /* int load */
175 COSTS_N_INSNS (1), /* int signed load */
176 COSTS_N_INSNS (1), /* int zeroed load */
177 COSTS_N_INSNS (0), /* float load */
178 COSTS_N_INSNS (3), /* fmov, fneg, fabs */
179 COSTS_N_INSNS (3), /* fadd, fsub */
180 COSTS_N_INSNS (3), /* fcmp */
181 COSTS_N_INSNS (1), /* fmov, fmovr */
182 COSTS_N_INSNS (3), /* fmul */
183 COSTS_N_INSNS (6), /* fdivs */
184 COSTS_N_INSNS (9), /* fdivd */
185 COSTS_N_INSNS (12), /* fsqrts */
186 COSTS_N_INSNS (12), /* fsqrtd */
187 COSTS_N_INSNS (4), /* imul */
188 COSTS_N_INSNS (4), /* imulX */
189 0, /* imul bit factor */
190 COSTS_N_INSNS (4), /* idiv */
191 COSTS_N_INSNS (4), /* idivX */
192 COSTS_N_INSNS (1), /* movcc/movr */
193 1, /* shift penalty */
194 3 /* branch cost */
195 };
196
197 static const
198 struct processor_costs hypersparc_costs = {
199 COSTS_N_INSNS (1), /* int load */
200 COSTS_N_INSNS (1), /* int signed load */
201 COSTS_N_INSNS (1), /* int zeroed load */
202 COSTS_N_INSNS (1), /* float load */
203 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
204 COSTS_N_INSNS (1), /* fadd, fsub */
205 COSTS_N_INSNS (1), /* fcmp */
206 COSTS_N_INSNS (1), /* fmov, fmovr */
207 COSTS_N_INSNS (1), /* fmul */
208 COSTS_N_INSNS (8), /* fdivs */
209 COSTS_N_INSNS (12), /* fdivd */
210 COSTS_N_INSNS (17), /* fsqrts */
211 COSTS_N_INSNS (17), /* fsqrtd */
212 COSTS_N_INSNS (17), /* imul */
213 COSTS_N_INSNS (17), /* imulX */
214 0, /* imul bit factor */
215 COSTS_N_INSNS (17), /* idiv */
216 COSTS_N_INSNS (17), /* idivX */
217 COSTS_N_INSNS (1), /* movcc/movr */
218 0, /* shift penalty */
219 3 /* branch cost */
220 };
221
222 static const
223 struct processor_costs leon_costs = {
224 COSTS_N_INSNS (1), /* int load */
225 COSTS_N_INSNS (1), /* int signed load */
226 COSTS_N_INSNS (1), /* int zeroed load */
227 COSTS_N_INSNS (1), /* float load */
228 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
229 COSTS_N_INSNS (1), /* fadd, fsub */
230 COSTS_N_INSNS (1), /* fcmp */
231 COSTS_N_INSNS (1), /* fmov, fmovr */
232 COSTS_N_INSNS (1), /* fmul */
233 COSTS_N_INSNS (15), /* fdivs */
234 COSTS_N_INSNS (15), /* fdivd */
235 COSTS_N_INSNS (23), /* fsqrts */
236 COSTS_N_INSNS (23), /* fsqrtd */
237 COSTS_N_INSNS (5), /* imul */
238 COSTS_N_INSNS (5), /* imulX */
239 0, /* imul bit factor */
240 COSTS_N_INSNS (5), /* idiv */
241 COSTS_N_INSNS (5), /* idivX */
242 COSTS_N_INSNS (1), /* movcc/movr */
243 0, /* shift penalty */
244 3 /* branch cost */
245 };
246
247 static const
248 struct processor_costs leon3_costs = {
249 COSTS_N_INSNS (1), /* int load */
250 COSTS_N_INSNS (1), /* int signed load */
251 COSTS_N_INSNS (1), /* int zeroed load */
252 COSTS_N_INSNS (1), /* float load */
253 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
254 COSTS_N_INSNS (1), /* fadd, fsub */
255 COSTS_N_INSNS (1), /* fcmp */
256 COSTS_N_INSNS (1), /* fmov, fmovr */
257 COSTS_N_INSNS (1), /* fmul */
258 COSTS_N_INSNS (14), /* fdivs */
259 COSTS_N_INSNS (15), /* fdivd */
260 COSTS_N_INSNS (22), /* fsqrts */
261 COSTS_N_INSNS (23), /* fsqrtd */
262 COSTS_N_INSNS (5), /* imul */
263 COSTS_N_INSNS (5), /* imulX */
264 0, /* imul bit factor */
265 COSTS_N_INSNS (35), /* idiv */
266 COSTS_N_INSNS (35), /* idivX */
267 COSTS_N_INSNS (1), /* movcc/movr */
268 0, /* shift penalty */
269 3 /* branch cost */
270 };
271
272 static const
273 struct processor_costs leon5_costs = {
274 COSTS_N_INSNS (1), /* int load */
275 COSTS_N_INSNS (1), /* int signed load */
276 COSTS_N_INSNS (1), /* int zeroed load */
277 COSTS_N_INSNS (1), /* float load */
278 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
279 COSTS_N_INSNS (1), /* fadd, fsub */
280 COSTS_N_INSNS (1), /* fcmp */
281 COSTS_N_INSNS (1), /* fmov, fmovr */
282 COSTS_N_INSNS (1), /* fmul */
283 COSTS_N_INSNS (17), /* fdivs */
284 COSTS_N_INSNS (18), /* fdivd */
285 COSTS_N_INSNS (25), /* fsqrts */
286 COSTS_N_INSNS (26), /* fsqrtd */
287 COSTS_N_INSNS (4), /* imul */
288 COSTS_N_INSNS (4), /* imulX */
289 0, /* imul bit factor */
290 COSTS_N_INSNS (35), /* idiv */
291 COSTS_N_INSNS (35), /* idivX */
292 COSTS_N_INSNS (1), /* movcc/movr */
293 0, /* shift penalty */
294 3 /* branch cost */
295 };
296
297 static const
298 struct processor_costs sparclet_costs = {
299 COSTS_N_INSNS (3), /* int load */
300 COSTS_N_INSNS (3), /* int signed load */
301 COSTS_N_INSNS (1), /* int zeroed load */
302 COSTS_N_INSNS (1), /* float load */
303 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
304 COSTS_N_INSNS (1), /* fadd, fsub */
305 COSTS_N_INSNS (1), /* fcmp */
306 COSTS_N_INSNS (1), /* fmov, fmovr */
307 COSTS_N_INSNS (1), /* fmul */
308 COSTS_N_INSNS (1), /* fdivs */
309 COSTS_N_INSNS (1), /* fdivd */
310 COSTS_N_INSNS (1), /* fsqrts */
311 COSTS_N_INSNS (1), /* fsqrtd */
312 COSTS_N_INSNS (5), /* imul */
313 COSTS_N_INSNS (5), /* imulX */
314 0, /* imul bit factor */
315 COSTS_N_INSNS (5), /* idiv */
316 COSTS_N_INSNS (5), /* idivX */
317 COSTS_N_INSNS (1), /* movcc/movr */
318 0, /* shift penalty */
319 3 /* branch cost */
320 };
321
322 static const
323 struct processor_costs ultrasparc_costs = {
324 COSTS_N_INSNS (2), /* int load */
325 COSTS_N_INSNS (3), /* int signed load */
326 COSTS_N_INSNS (2), /* int zeroed load */
327 COSTS_N_INSNS (2), /* float load */
328 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
329 COSTS_N_INSNS (4), /* fadd, fsub */
330 COSTS_N_INSNS (1), /* fcmp */
331 COSTS_N_INSNS (2), /* fmov, fmovr */
332 COSTS_N_INSNS (4), /* fmul */
333 COSTS_N_INSNS (13), /* fdivs */
334 COSTS_N_INSNS (23), /* fdivd */
335 COSTS_N_INSNS (13), /* fsqrts */
336 COSTS_N_INSNS (23), /* fsqrtd */
337 COSTS_N_INSNS (4), /* imul */
338 COSTS_N_INSNS (4), /* imulX */
339 2, /* imul bit factor */
340 COSTS_N_INSNS (37), /* idiv */
341 COSTS_N_INSNS (68), /* idivX */
342 COSTS_N_INSNS (2), /* movcc/movr */
343 2, /* shift penalty */
344 2 /* branch cost */
345 };
346
347 static const
348 struct processor_costs ultrasparc3_costs = {
349 COSTS_N_INSNS (2), /* int load */
350 COSTS_N_INSNS (3), /* int signed load */
351 COSTS_N_INSNS (3), /* int zeroed load */
352 COSTS_N_INSNS (2), /* float load */
353 COSTS_N_INSNS (3), /* fmov, fneg, fabs */
354 COSTS_N_INSNS (4), /* fadd, fsub */
355 COSTS_N_INSNS (5), /* fcmp */
356 COSTS_N_INSNS (3), /* fmov, fmovr */
357 COSTS_N_INSNS (4), /* fmul */
358 COSTS_N_INSNS (17), /* fdivs */
359 COSTS_N_INSNS (20), /* fdivd */
360 COSTS_N_INSNS (20), /* fsqrts */
361 COSTS_N_INSNS (29), /* fsqrtd */
362 COSTS_N_INSNS (6), /* imul */
363 COSTS_N_INSNS (6), /* imulX */
364 0, /* imul bit factor */
365 COSTS_N_INSNS (40), /* idiv */
366 COSTS_N_INSNS (71), /* idivX */
367 COSTS_N_INSNS (2), /* movcc/movr */
368 0, /* shift penalty */
369 2 /* branch cost */
370 };
371
372 static const
373 struct processor_costs niagara_costs = {
374 COSTS_N_INSNS (3), /* int load */
375 COSTS_N_INSNS (3), /* int signed load */
376 COSTS_N_INSNS (3), /* int zeroed load */
377 COSTS_N_INSNS (9), /* float load */
378 COSTS_N_INSNS (8), /* fmov, fneg, fabs */
379 COSTS_N_INSNS (8), /* fadd, fsub */
380 COSTS_N_INSNS (26), /* fcmp */
381 COSTS_N_INSNS (8), /* fmov, fmovr */
382 COSTS_N_INSNS (29), /* fmul */
383 COSTS_N_INSNS (54), /* fdivs */
384 COSTS_N_INSNS (83), /* fdivd */
385 COSTS_N_INSNS (100), /* fsqrts - not implemented in hardware */
386 COSTS_N_INSNS (100), /* fsqrtd - not implemented in hardware */
387 COSTS_N_INSNS (11), /* imul */
388 COSTS_N_INSNS (11), /* imulX */
389 0, /* imul bit factor */
390 COSTS_N_INSNS (72), /* idiv */
391 COSTS_N_INSNS (72), /* idivX */
392 COSTS_N_INSNS (1), /* movcc/movr */
393 0, /* shift penalty */
394 4 /* branch cost */
395 };
396
397 static const
398 struct processor_costs niagara2_costs = {
399 COSTS_N_INSNS (3), /* int load */
400 COSTS_N_INSNS (3), /* int signed load */
401 COSTS_N_INSNS (3), /* int zeroed load */
402 COSTS_N_INSNS (3), /* float load */
403 COSTS_N_INSNS (6), /* fmov, fneg, fabs */
404 COSTS_N_INSNS (6), /* fadd, fsub */
405 COSTS_N_INSNS (6), /* fcmp */
406 COSTS_N_INSNS (6), /* fmov, fmovr */
407 COSTS_N_INSNS (6), /* fmul */
408 COSTS_N_INSNS (19), /* fdivs */
409 COSTS_N_INSNS (33), /* fdivd */
410 COSTS_N_INSNS (19), /* fsqrts */
411 COSTS_N_INSNS (33), /* fsqrtd */
412 COSTS_N_INSNS (5), /* imul */
413 COSTS_N_INSNS (5), /* imulX */
414 0, /* imul bit factor */
415 COSTS_N_INSNS (26), /* idiv, average of 12 - 41 cycle range */
416 COSTS_N_INSNS (26), /* idivX, average of 12 - 41 cycle range */
417 COSTS_N_INSNS (1), /* movcc/movr */
418 0, /* shift penalty */
419 5 /* branch cost */
420 };
421
422 static const
423 struct processor_costs niagara3_costs = {
424 COSTS_N_INSNS (3), /* int load */
425 COSTS_N_INSNS (3), /* int signed load */
426 COSTS_N_INSNS (3), /* int zeroed load */
427 COSTS_N_INSNS (3), /* float load */
428 COSTS_N_INSNS (9), /* fmov, fneg, fabs */
429 COSTS_N_INSNS (9), /* fadd, fsub */
430 COSTS_N_INSNS (9), /* fcmp */
431 COSTS_N_INSNS (9), /* fmov, fmovr */
432 COSTS_N_INSNS (9), /* fmul */
433 COSTS_N_INSNS (23), /* fdivs */
434 COSTS_N_INSNS (37), /* fdivd */
435 COSTS_N_INSNS (23), /* fsqrts */
436 COSTS_N_INSNS (37), /* fsqrtd */
437 COSTS_N_INSNS (9), /* imul */
438 COSTS_N_INSNS (9), /* imulX */
439 0, /* imul bit factor */
440 COSTS_N_INSNS (31), /* idiv, average of 17 - 45 cycle range */
441 COSTS_N_INSNS (30), /* idivX, average of 16 - 44 cycle range */
442 COSTS_N_INSNS (1), /* movcc/movr */
443 0, /* shift penalty */
444 5 /* branch cost */
445 };
446
447 static const
448 struct processor_costs niagara4_costs = {
449 COSTS_N_INSNS (5), /* int load */
450 COSTS_N_INSNS (5), /* int signed load */
451 COSTS_N_INSNS (5), /* int zeroed load */
452 COSTS_N_INSNS (5), /* float load */
453 COSTS_N_INSNS (11), /* fmov, fneg, fabs */
454 COSTS_N_INSNS (11), /* fadd, fsub */
455 COSTS_N_INSNS (11), /* fcmp */
456 COSTS_N_INSNS (11), /* fmov, fmovr */
457 COSTS_N_INSNS (11), /* fmul */
458 COSTS_N_INSNS (24), /* fdivs */
459 COSTS_N_INSNS (37), /* fdivd */
460 COSTS_N_INSNS (24), /* fsqrts */
461 COSTS_N_INSNS (37), /* fsqrtd */
462 COSTS_N_INSNS (12), /* imul */
463 COSTS_N_INSNS (12), /* imulX */
464 0, /* imul bit factor */
465 COSTS_N_INSNS (50), /* idiv, average of 41 - 60 cycle range */
466 COSTS_N_INSNS (35), /* idivX, average of 26 - 44 cycle range */
467 COSTS_N_INSNS (1), /* movcc/movr */
468 0, /* shift penalty */
469 2 /* branch cost */
470 };
471
472 static const
473 struct processor_costs niagara7_costs = {
474 COSTS_N_INSNS (5), /* int load */
475 COSTS_N_INSNS (5), /* int signed load */
476 COSTS_N_INSNS (5), /* int zeroed load */
477 COSTS_N_INSNS (5), /* float load */
478 COSTS_N_INSNS (11), /* fmov, fneg, fabs */
479 COSTS_N_INSNS (11), /* fadd, fsub */
480 COSTS_N_INSNS (11), /* fcmp */
481 COSTS_N_INSNS (11), /* fmov, fmovr */
482 COSTS_N_INSNS (11), /* fmul */
483 COSTS_N_INSNS (24), /* fdivs */
484 COSTS_N_INSNS (37), /* fdivd */
485 COSTS_N_INSNS (24), /* fsqrts */
486 COSTS_N_INSNS (37), /* fsqrtd */
487 COSTS_N_INSNS (12), /* imul */
488 COSTS_N_INSNS (12), /* imulX */
489 0, /* imul bit factor */
490 COSTS_N_INSNS (51), /* idiv, average of 42 - 61 cycle range */
491 COSTS_N_INSNS (35), /* idivX, average of 26 - 44 cycle range */
492 COSTS_N_INSNS (1), /* movcc/movr */
493 0, /* shift penalty */
494 1 /* branch cost */
495 };
496
497 static const
498 struct processor_costs m8_costs = {
499 COSTS_N_INSNS (3), /* int load */
500 COSTS_N_INSNS (3), /* int signed load */
501 COSTS_N_INSNS (3), /* int zeroed load */
502 COSTS_N_INSNS (3), /* float load */
503 COSTS_N_INSNS (9), /* fmov, fneg, fabs */
504 COSTS_N_INSNS (9), /* fadd, fsub */
505 COSTS_N_INSNS (9), /* fcmp */
506 COSTS_N_INSNS (9), /* fmov, fmovr */
507 COSTS_N_INSNS (9), /* fmul */
508 COSTS_N_INSNS (26), /* fdivs */
509 COSTS_N_INSNS (30), /* fdivd */
510 COSTS_N_INSNS (33), /* fsqrts */
511 COSTS_N_INSNS (41), /* fsqrtd */
512 COSTS_N_INSNS (12), /* imul */
513 COSTS_N_INSNS (10), /* imulX */
514 0, /* imul bit factor */
515 COSTS_N_INSNS (57), /* udiv/sdiv */
516 COSTS_N_INSNS (30), /* udivx/sdivx */
517 COSTS_N_INSNS (1), /* movcc/movr */
518 0, /* shift penalty */
519 1 /* branch cost */
520 };
521
522 static const struct processor_costs *sparc_costs = &cypress_costs;
523
524 #ifdef HAVE_AS_RELAX_OPTION
525 /* If 'as' and 'ld' are relaxing tail call insns into branch always, use
526 "or %o7,%g0,X; call Y; or X,%g0,%o7" always, so that it can be optimized.
527 With sethi/jmp, neither 'as' nor 'ld' has an easy way how to find out if
528 somebody does not branch between the sethi and jmp. */
529 #define LEAF_SIBCALL_SLOT_RESERVED_P 1
530 #else
531 #define LEAF_SIBCALL_SLOT_RESERVED_P \
532 ((TARGET_ARCH64 && !TARGET_CM_MEDLOW) || flag_pic)
533 #endif
534
535 /* Vector to say how input registers are mapped to output registers.
536 HARD_FRAME_POINTER_REGNUM cannot be remapped by this function to
537 eliminate it. You must use -fomit-frame-pointer to get that. */
538 char leaf_reg_remap[] =
539 { 0, 1, 2, 3, 4, 5, 6, 7,
540 -1, -1, -1, -1, -1, -1, 14, -1,
541 -1, -1, -1, -1, -1, -1, -1, -1,
542 8, 9, 10, 11, 12, 13, -1, 15,
543
544 32, 33, 34, 35, 36, 37, 38, 39,
545 40, 41, 42, 43, 44, 45, 46, 47,
546 48, 49, 50, 51, 52, 53, 54, 55,
547 56, 57, 58, 59, 60, 61, 62, 63,
548 64, 65, 66, 67, 68, 69, 70, 71,
549 72, 73, 74, 75, 76, 77, 78, 79,
550 80, 81, 82, 83, 84, 85, 86, 87,
551 88, 89, 90, 91, 92, 93, 94, 95,
552 96, 97, 98, 99, 100, 101, 102};
553
554 /* Vector, indexed by hard register number, which contains 1
555 for a register that is allowable in a candidate for leaf
556 function treatment. */
557 char sparc_leaf_regs[] =
558 { 1, 1, 1, 1, 1, 1, 1, 1,
559 0, 0, 0, 0, 0, 0, 1, 0,
560 0, 0, 0, 0, 0, 0, 0, 0,
561 1, 1, 1, 1, 1, 1, 0, 1,
562 1, 1, 1, 1, 1, 1, 1, 1,
563 1, 1, 1, 1, 1, 1, 1, 1,
564 1, 1, 1, 1, 1, 1, 1, 1,
565 1, 1, 1, 1, 1, 1, 1, 1,
566 1, 1, 1, 1, 1, 1, 1, 1,
567 1, 1, 1, 1, 1, 1, 1, 1,
568 1, 1, 1, 1, 1, 1, 1, 1,
569 1, 1, 1, 1, 1, 1, 1, 1,
570 1, 1, 1, 1, 1, 1, 1};
571
572 struct GTY(()) machine_function
573 {
574 /* Size of the frame of the function. */
575 HOST_WIDE_INT frame_size;
576
577 /* Size of the frame of the function minus the register window save area
578 and the outgoing argument area. */
579 HOST_WIDE_INT apparent_frame_size;
580
581 /* Register we pretend the frame pointer is allocated to. Normally, this
582 is %fp, but if we are in a leaf procedure, this is (%sp + offset). We
583 record "offset" separately as it may be too big for (reg + disp). */
584 rtx frame_base_reg;
585 HOST_WIDE_INT frame_base_offset;
586
587 /* Number of global or FP registers to be saved (as 4-byte quantities). */
588 int n_global_fp_regs;
589
590 /* True if the current function is leaf and uses only leaf regs,
591 so that the SPARC leaf function optimization can be applied.
592 Private version of crtl->uses_only_leaf_regs, see
593 sparc_expand_prologue for the rationale. */
594 int leaf_function_p;
595
596 /* True if the prologue saves local or in registers. */
597 bool save_local_in_regs_p;
598
599 /* True if the data calculated by sparc_expand_prologue are valid. */
600 bool prologue_data_valid_p;
601 };
602
603 #define sparc_frame_size cfun->machine->frame_size
604 #define sparc_apparent_frame_size cfun->machine->apparent_frame_size
605 #define sparc_frame_base_reg cfun->machine->frame_base_reg
606 #define sparc_frame_base_offset cfun->machine->frame_base_offset
607 #define sparc_n_global_fp_regs cfun->machine->n_global_fp_regs
608 #define sparc_leaf_function_p cfun->machine->leaf_function_p
609 #define sparc_save_local_in_regs_p cfun->machine->save_local_in_regs_p
610 #define sparc_prologue_data_valid_p cfun->machine->prologue_data_valid_p
611
612 /* 1 if the next opcode is to be specially indented. */
613 int sparc_indent_opcode = 0;
614
615 static void sparc_option_override (void);
616 static void sparc_init_modes (void);
617 static int function_arg_slotno (const CUMULATIVE_ARGS *, machine_mode,
618 const_tree, bool, bool, int *, int *);
619
620 static int supersparc_adjust_cost (rtx_insn *, int, rtx_insn *, int);
621 static int hypersparc_adjust_cost (rtx_insn *, int, rtx_insn *, int);
622 static int leon5_adjust_cost (rtx_insn *, int, rtx_insn *, int);
623
624 static void sparc_emit_set_const32 (rtx, rtx);
625 static void sparc_emit_set_const64 (rtx, rtx);
626 static void sparc_output_addr_vec (rtx);
627 static void sparc_output_addr_diff_vec (rtx);
628 static void sparc_output_deferred_case_vectors (void);
629 static bool sparc_legitimate_address_p (machine_mode, rtx, bool);
630 static bool sparc_legitimate_constant_p (machine_mode, rtx);
631 static rtx sparc_builtin_saveregs (void);
632 static int epilogue_renumber (rtx *, int);
633 static bool sparc_assemble_integer (rtx, unsigned int, int);
634 static int set_extends (rtx_insn *);
635 static void sparc_asm_function_prologue (FILE *);
636 static void sparc_asm_function_epilogue (FILE *);
637 #ifdef TARGET_SOLARIS
638 static void sparc_solaris_elf_asm_named_section (const char *, unsigned int,
639 tree) ATTRIBUTE_UNUSED;
640 #endif
641 static int sparc_adjust_cost (rtx_insn *, int, rtx_insn *, int, unsigned int);
642 static int sparc_issue_rate (void);
643 static void sparc_sched_init (FILE *, int, int);
644 static int sparc_use_sched_lookahead (void);
645
646 static void emit_soft_tfmode_libcall (const char *, int, rtx *);
647 static void emit_soft_tfmode_binop (enum rtx_code, rtx *);
648 static void emit_soft_tfmode_unop (enum rtx_code, rtx *);
649 static void emit_soft_tfmode_cvt (enum rtx_code, rtx *);
650 static void emit_hard_tfmode_operation (enum rtx_code, rtx *);
651
652 static bool sparc_function_ok_for_sibcall (tree, tree);
653 static void sparc_init_libfuncs (void);
654 static void sparc_init_builtins (void);
655 static void sparc_fpu_init_builtins (void);
656 static void sparc_vis_init_builtins (void);
657 static tree sparc_builtin_decl (unsigned, bool);
658 static rtx sparc_expand_builtin (tree, rtx, rtx, machine_mode, int);
659 static tree sparc_fold_builtin (tree, int, tree *, bool);
660 static void sparc_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
661 HOST_WIDE_INT, tree);
662 static bool sparc_can_output_mi_thunk (const_tree, HOST_WIDE_INT,
663 HOST_WIDE_INT, const_tree);
664 static struct machine_function * sparc_init_machine_status (void);
665 static bool sparc_cannot_force_const_mem (machine_mode, rtx);
666 static rtx sparc_tls_get_addr (void);
667 static rtx sparc_tls_got (void);
668 static int sparc_register_move_cost (machine_mode,
669 reg_class_t, reg_class_t);
670 static bool sparc_rtx_costs (rtx, machine_mode, int, int, int *, bool);
671 static machine_mode sparc_promote_function_mode (const_tree, machine_mode,
672 int *, const_tree, int);
673 static bool sparc_strict_argument_naming (cumulative_args_t);
674 static void sparc_va_start (tree, rtx);
675 static tree sparc_gimplify_va_arg (tree, tree, gimple_seq *, gimple_seq *);
676 static bool sparc_vector_mode_supported_p (machine_mode);
677 static bool sparc_tls_referenced_p (rtx);
678 static rtx sparc_legitimize_tls_address (rtx);
679 static rtx sparc_legitimize_pic_address (rtx, rtx);
680 static rtx sparc_legitimize_address (rtx, rtx, machine_mode);
681 static rtx sparc_delegitimize_address (rtx);
682 static bool sparc_mode_dependent_address_p (const_rtx, addr_space_t);
683 static bool sparc_pass_by_reference (cumulative_args_t,
684 const function_arg_info &);
685 static void sparc_function_arg_advance (cumulative_args_t,
686 const function_arg_info &);
687 static rtx sparc_function_arg (cumulative_args_t, const function_arg_info &);
688 static rtx sparc_function_incoming_arg (cumulative_args_t,
689 const function_arg_info &);
690 static pad_direction sparc_function_arg_padding (machine_mode, const_tree);
691 static unsigned int sparc_function_arg_boundary (machine_mode,
692 const_tree);
693 static int sparc_arg_partial_bytes (cumulative_args_t,
694 const function_arg_info &);
695 static bool sparc_return_in_memory (const_tree, const_tree);
696 static rtx sparc_struct_value_rtx (tree, int);
697 static rtx sparc_function_value (const_tree, const_tree, bool);
698 static rtx sparc_libcall_value (machine_mode, const_rtx);
699 static bool sparc_function_value_regno_p (const unsigned int);
700 static unsigned HOST_WIDE_INT sparc_asan_shadow_offset (void);
701 static void sparc_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
702 static void sparc_file_end (void);
703 static bool sparc_frame_pointer_required (void);
704 static bool sparc_can_eliminate (const int, const int);
705 static void sparc_conditional_register_usage (void);
706 static bool sparc_use_pseudo_pic_reg (void);
707 static void sparc_init_pic_reg (void);
708 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
709 static const char *sparc_mangle_type (const_tree);
710 #endif
711 static void sparc_trampoline_init (rtx, tree, rtx);
712 static machine_mode sparc_preferred_simd_mode (scalar_mode);
713 static reg_class_t sparc_preferred_reload_class (rtx x, reg_class_t rclass);
714 static bool sparc_lra_p (void);
715 static bool sparc_print_operand_punct_valid_p (unsigned char);
716 static void sparc_print_operand (FILE *, rtx, int);
717 static void sparc_print_operand_address (FILE *, machine_mode, rtx);
718 static reg_class_t sparc_secondary_reload (bool, rtx, reg_class_t,
719 machine_mode,
720 secondary_reload_info *);
721 static bool sparc_secondary_memory_needed (machine_mode, reg_class_t,
722 reg_class_t);
723 static machine_mode sparc_secondary_memory_needed_mode (machine_mode);
724 static scalar_int_mode sparc_cstore_mode (enum insn_code icode);
725 static void sparc_atomic_assign_expand_fenv (tree *, tree *, tree *);
726 static bool sparc_fixed_condition_code_regs (unsigned int *, unsigned int *);
727 static unsigned int sparc_min_arithmetic_precision (void);
728 static unsigned int sparc_hard_regno_nregs (unsigned int, machine_mode);
729 static bool sparc_hard_regno_mode_ok (unsigned int, machine_mode);
730 static bool sparc_modes_tieable_p (machine_mode, machine_mode);
731 static bool sparc_can_change_mode_class (machine_mode, machine_mode,
732 reg_class_t);
733 static HOST_WIDE_INT sparc_constant_alignment (const_tree, HOST_WIDE_INT);
734 static bool sparc_vectorize_vec_perm_const (machine_mode, rtx, rtx, rtx,
735 const vec_perm_indices &);
736 static bool sparc_can_follow_jump (const rtx_insn *, const rtx_insn *);
737
738 #ifdef SUBTARGET_ATTRIBUTE_TABLE
739 /* Table of valid machine attributes. */
740 static const struct attribute_spec sparc_attribute_table[] =
741 {
742 /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
743 do_diagnostic, handler, exclude } */
744 SUBTARGET_ATTRIBUTE_TABLE,
745 { NULL, 0, 0, false, false, false, false, NULL, NULL }
746 };
747 #endif
748
749 char sparc_hard_reg_printed[8];
750
751 /* Initialize the GCC target structure. */
752
753 /* The default is to use .half rather than .short for aligned HI objects. */
754 #undef TARGET_ASM_ALIGNED_HI_OP
755 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
756
757 #undef TARGET_ASM_UNALIGNED_HI_OP
758 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uahalf\t"
759 #undef TARGET_ASM_UNALIGNED_SI_OP
760 #define TARGET_ASM_UNALIGNED_SI_OP "\t.uaword\t"
761 #undef TARGET_ASM_UNALIGNED_DI_OP
762 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaxword\t"
763
764 /* The target hook has to handle DI-mode values. */
765 #undef TARGET_ASM_INTEGER
766 #define TARGET_ASM_INTEGER sparc_assemble_integer
767
768 #undef TARGET_ASM_FUNCTION_PROLOGUE
769 #define TARGET_ASM_FUNCTION_PROLOGUE sparc_asm_function_prologue
770 #undef TARGET_ASM_FUNCTION_EPILOGUE
771 #define TARGET_ASM_FUNCTION_EPILOGUE sparc_asm_function_epilogue
772
773 #undef TARGET_SCHED_ADJUST_COST
774 #define TARGET_SCHED_ADJUST_COST sparc_adjust_cost
775 #undef TARGET_SCHED_ISSUE_RATE
776 #define TARGET_SCHED_ISSUE_RATE sparc_issue_rate
777 #undef TARGET_SCHED_INIT
778 #define TARGET_SCHED_INIT sparc_sched_init
779 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
780 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD sparc_use_sched_lookahead
781
782 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
783 #define TARGET_FUNCTION_OK_FOR_SIBCALL sparc_function_ok_for_sibcall
784
785 #undef TARGET_INIT_LIBFUNCS
786 #define TARGET_INIT_LIBFUNCS sparc_init_libfuncs
787
788 #undef TARGET_LEGITIMIZE_ADDRESS
789 #define TARGET_LEGITIMIZE_ADDRESS sparc_legitimize_address
790 #undef TARGET_DELEGITIMIZE_ADDRESS
791 #define TARGET_DELEGITIMIZE_ADDRESS sparc_delegitimize_address
792 #undef TARGET_MODE_DEPENDENT_ADDRESS_P
793 #define TARGET_MODE_DEPENDENT_ADDRESS_P sparc_mode_dependent_address_p
794
795 #undef TARGET_INIT_BUILTINS
796 #define TARGET_INIT_BUILTINS sparc_init_builtins
797 #undef TARGET_BUILTIN_DECL
798 #define TARGET_BUILTIN_DECL sparc_builtin_decl
799 #undef TARGET_EXPAND_BUILTIN
800 #define TARGET_EXPAND_BUILTIN sparc_expand_builtin
801 #undef TARGET_FOLD_BUILTIN
802 #define TARGET_FOLD_BUILTIN sparc_fold_builtin
803
804 #if TARGET_TLS
805 #undef TARGET_HAVE_TLS
806 #define TARGET_HAVE_TLS true
807 #endif
808
809 #undef TARGET_CANNOT_FORCE_CONST_MEM
810 #define TARGET_CANNOT_FORCE_CONST_MEM sparc_cannot_force_const_mem
811
812 #undef TARGET_ASM_OUTPUT_MI_THUNK
813 #define TARGET_ASM_OUTPUT_MI_THUNK sparc_output_mi_thunk
814 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
815 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK sparc_can_output_mi_thunk
816
817 #undef TARGET_RTX_COSTS
818 #define TARGET_RTX_COSTS sparc_rtx_costs
819 #undef TARGET_ADDRESS_COST
820 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
821 #undef TARGET_REGISTER_MOVE_COST
822 #define TARGET_REGISTER_MOVE_COST sparc_register_move_cost
823
824 #undef TARGET_PROMOTE_FUNCTION_MODE
825 #define TARGET_PROMOTE_FUNCTION_MODE sparc_promote_function_mode
826 #undef TARGET_STRICT_ARGUMENT_NAMING
827 #define TARGET_STRICT_ARGUMENT_NAMING sparc_strict_argument_naming
828
829 #undef TARGET_MUST_PASS_IN_STACK
830 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
831 #undef TARGET_PASS_BY_REFERENCE
832 #define TARGET_PASS_BY_REFERENCE sparc_pass_by_reference
833 #undef TARGET_ARG_PARTIAL_BYTES
834 #define TARGET_ARG_PARTIAL_BYTES sparc_arg_partial_bytes
835 #undef TARGET_FUNCTION_ARG_ADVANCE
836 #define TARGET_FUNCTION_ARG_ADVANCE sparc_function_arg_advance
837 #undef TARGET_FUNCTION_ARG
838 #define TARGET_FUNCTION_ARG sparc_function_arg
839 #undef TARGET_FUNCTION_INCOMING_ARG
840 #define TARGET_FUNCTION_INCOMING_ARG sparc_function_incoming_arg
841 #undef TARGET_FUNCTION_ARG_PADDING
842 #define TARGET_FUNCTION_ARG_PADDING sparc_function_arg_padding
843 #undef TARGET_FUNCTION_ARG_BOUNDARY
844 #define TARGET_FUNCTION_ARG_BOUNDARY sparc_function_arg_boundary
845
846 #undef TARGET_RETURN_IN_MEMORY
847 #define TARGET_RETURN_IN_MEMORY sparc_return_in_memory
848 #undef TARGET_STRUCT_VALUE_RTX
849 #define TARGET_STRUCT_VALUE_RTX sparc_struct_value_rtx
850 #undef TARGET_FUNCTION_VALUE
851 #define TARGET_FUNCTION_VALUE sparc_function_value
852 #undef TARGET_LIBCALL_VALUE
853 #define TARGET_LIBCALL_VALUE sparc_libcall_value
854 #undef TARGET_FUNCTION_VALUE_REGNO_P
855 #define TARGET_FUNCTION_VALUE_REGNO_P sparc_function_value_regno_p
856
857 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
858 #define TARGET_EXPAND_BUILTIN_SAVEREGS sparc_builtin_saveregs
859
860 #undef TARGET_ASAN_SHADOW_OFFSET
861 #define TARGET_ASAN_SHADOW_OFFSET sparc_asan_shadow_offset
862
863 #undef TARGET_EXPAND_BUILTIN_VA_START
864 #define TARGET_EXPAND_BUILTIN_VA_START sparc_va_start
865 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
866 #define TARGET_GIMPLIFY_VA_ARG_EXPR sparc_gimplify_va_arg
867
868 #undef TARGET_VECTOR_MODE_SUPPORTED_P
869 #define TARGET_VECTOR_MODE_SUPPORTED_P sparc_vector_mode_supported_p
870
871 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
872 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE sparc_preferred_simd_mode
873
874 #ifdef SUBTARGET_INSERT_ATTRIBUTES
875 #undef TARGET_INSERT_ATTRIBUTES
876 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
877 #endif
878
879 #ifdef SUBTARGET_ATTRIBUTE_TABLE
880 #undef TARGET_ATTRIBUTE_TABLE
881 #define TARGET_ATTRIBUTE_TABLE sparc_attribute_table
882 #endif
883
884 #undef TARGET_OPTION_OVERRIDE
885 #define TARGET_OPTION_OVERRIDE sparc_option_override
886
887 #ifdef TARGET_THREAD_SSP_OFFSET
888 #undef TARGET_STACK_PROTECT_GUARD
889 #define TARGET_STACK_PROTECT_GUARD hook_tree_void_null
890 #endif
891
892 #if TARGET_GNU_TLS && defined(HAVE_AS_SPARC_UA_PCREL)
893 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
894 #define TARGET_ASM_OUTPUT_DWARF_DTPREL sparc_output_dwarf_dtprel
895 #endif
896
897 #undef TARGET_ASM_FILE_END
898 #define TARGET_ASM_FILE_END sparc_file_end
899
900 #undef TARGET_FRAME_POINTER_REQUIRED
901 #define TARGET_FRAME_POINTER_REQUIRED sparc_frame_pointer_required
902
903 #undef TARGET_CAN_ELIMINATE
904 #define TARGET_CAN_ELIMINATE sparc_can_eliminate
905
906 #undef TARGET_PREFERRED_RELOAD_CLASS
907 #define TARGET_PREFERRED_RELOAD_CLASS sparc_preferred_reload_class
908
909 #undef TARGET_SECONDARY_RELOAD
910 #define TARGET_SECONDARY_RELOAD sparc_secondary_reload
911 #undef TARGET_SECONDARY_MEMORY_NEEDED
912 #define TARGET_SECONDARY_MEMORY_NEEDED sparc_secondary_memory_needed
913 #undef TARGET_SECONDARY_MEMORY_NEEDED_MODE
914 #define TARGET_SECONDARY_MEMORY_NEEDED_MODE sparc_secondary_memory_needed_mode
915
916 #undef TARGET_CONDITIONAL_REGISTER_USAGE
917 #define TARGET_CONDITIONAL_REGISTER_USAGE sparc_conditional_register_usage
918
919 #undef TARGET_INIT_PIC_REG
920 #define TARGET_INIT_PIC_REG sparc_init_pic_reg
921
922 #undef TARGET_USE_PSEUDO_PIC_REG
923 #define TARGET_USE_PSEUDO_PIC_REG sparc_use_pseudo_pic_reg
924
925 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
926 #undef TARGET_MANGLE_TYPE
927 #define TARGET_MANGLE_TYPE sparc_mangle_type
928 #endif
929
930 #undef TARGET_LRA_P
931 #define TARGET_LRA_P sparc_lra_p
932
933 #undef TARGET_LEGITIMATE_ADDRESS_P
934 #define TARGET_LEGITIMATE_ADDRESS_P sparc_legitimate_address_p
935
936 #undef TARGET_LEGITIMATE_CONSTANT_P
937 #define TARGET_LEGITIMATE_CONSTANT_P sparc_legitimate_constant_p
938
939 #undef TARGET_TRAMPOLINE_INIT
940 #define TARGET_TRAMPOLINE_INIT sparc_trampoline_init
941
942 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
943 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P sparc_print_operand_punct_valid_p
944 #undef TARGET_PRINT_OPERAND
945 #define TARGET_PRINT_OPERAND sparc_print_operand
946 #undef TARGET_PRINT_OPERAND_ADDRESS
947 #define TARGET_PRINT_OPERAND_ADDRESS sparc_print_operand_address
948
949 /* The value stored by LDSTUB. */
950 #undef TARGET_ATOMIC_TEST_AND_SET_TRUEVAL
951 #define TARGET_ATOMIC_TEST_AND_SET_TRUEVAL 0xff
952
953 #undef TARGET_CSTORE_MODE
954 #define TARGET_CSTORE_MODE sparc_cstore_mode
955
956 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
957 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV sparc_atomic_assign_expand_fenv
958
959 #undef TARGET_FIXED_CONDITION_CODE_REGS
960 #define TARGET_FIXED_CONDITION_CODE_REGS sparc_fixed_condition_code_regs
961
962 #undef TARGET_MIN_ARITHMETIC_PRECISION
963 #define TARGET_MIN_ARITHMETIC_PRECISION sparc_min_arithmetic_precision
964
965 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
966 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
967
968 #undef TARGET_HARD_REGNO_NREGS
969 #define TARGET_HARD_REGNO_NREGS sparc_hard_regno_nregs
970 #undef TARGET_HARD_REGNO_MODE_OK
971 #define TARGET_HARD_REGNO_MODE_OK sparc_hard_regno_mode_ok
972
973 #undef TARGET_MODES_TIEABLE_P
974 #define TARGET_MODES_TIEABLE_P sparc_modes_tieable_p
975
976 #undef TARGET_CAN_CHANGE_MODE_CLASS
977 #define TARGET_CAN_CHANGE_MODE_CLASS sparc_can_change_mode_class
978
979 #undef TARGET_CONSTANT_ALIGNMENT
980 #define TARGET_CONSTANT_ALIGNMENT sparc_constant_alignment
981
982 #undef TARGET_VECTORIZE_VEC_PERM_CONST
983 #define TARGET_VECTORIZE_VEC_PERM_CONST sparc_vectorize_vec_perm_const
984
985 #undef TARGET_CAN_FOLLOW_JUMP
986 #define TARGET_CAN_FOLLOW_JUMP sparc_can_follow_jump
987
988 struct gcc_target targetm = TARGET_INITIALIZER;
989
990 /* Return the memory reference contained in X if any, zero otherwise. */
991
992 static rtx
mem_ref(rtx x)993 mem_ref (rtx x)
994 {
995 if (GET_CODE (x) == SIGN_EXTEND || GET_CODE (x) == ZERO_EXTEND)
996 x = XEXP (x, 0);
997
998 if (MEM_P (x))
999 return x;
1000
1001 return NULL_RTX;
1002 }
1003
1004 /* True if any of INSN's source register(s) is REG. */
1005
1006 static bool
insn_uses_reg_p(rtx_insn * insn,unsigned int reg)1007 insn_uses_reg_p (rtx_insn *insn, unsigned int reg)
1008 {
1009 extract_insn (insn);
1010 return ((REG_P (recog_data.operand[1])
1011 && REGNO (recog_data.operand[1]) == reg)
1012 || (recog_data.n_operands == 3
1013 && REG_P (recog_data.operand[2])
1014 && REGNO (recog_data.operand[2]) == reg));
1015 }
1016
1017 /* True if INSN is a floating-point division or square-root. */
1018
1019 static bool
div_sqrt_insn_p(rtx_insn * insn)1020 div_sqrt_insn_p (rtx_insn *insn)
1021 {
1022 if (GET_CODE (PATTERN (insn)) != SET)
1023 return false;
1024
1025 switch (get_attr_type (insn))
1026 {
1027 case TYPE_FPDIVS:
1028 case TYPE_FPSQRTS:
1029 case TYPE_FPDIVD:
1030 case TYPE_FPSQRTD:
1031 return true;
1032 default:
1033 return false;
1034 }
1035 }
1036
1037 /* True if INSN is a floating-point instruction. */
1038
1039 static bool
fpop_insn_p(rtx_insn * insn)1040 fpop_insn_p (rtx_insn *insn)
1041 {
1042 if (GET_CODE (PATTERN (insn)) != SET)
1043 return false;
1044
1045 switch (get_attr_type (insn))
1046 {
1047 case TYPE_FPMOVE:
1048 case TYPE_FPCMOVE:
1049 case TYPE_FP:
1050 case TYPE_FPCMP:
1051 case TYPE_FPMUL:
1052 case TYPE_FPDIVS:
1053 case TYPE_FPSQRTS:
1054 case TYPE_FPDIVD:
1055 case TYPE_FPSQRTD:
1056 return true;
1057 default:
1058 return false;
1059 }
1060 }
1061
1062 /* True if INSN is an atomic instruction. */
1063
1064 static bool
atomic_insn_for_leon3_p(rtx_insn * insn)1065 atomic_insn_for_leon3_p (rtx_insn *insn)
1066 {
1067 switch (INSN_CODE (insn))
1068 {
1069 case CODE_FOR_swapsi:
1070 case CODE_FOR_ldstub:
1071 case CODE_FOR_atomic_compare_and_swap_leon3_1:
1072 return true;
1073 default:
1074 return false;
1075 }
1076 }
1077
1078 /* True if INSN is a store instruction. */
1079
1080 static bool
store_insn_p(rtx_insn * insn)1081 store_insn_p (rtx_insn *insn)
1082 {
1083 if (GET_CODE (PATTERN (insn)) != SET)
1084 return false;
1085
1086 switch (get_attr_type (insn))
1087 {
1088 case TYPE_STORE:
1089 case TYPE_FPSTORE:
1090 return true;
1091 default:
1092 return false;
1093 }
1094 }
1095
1096 /* True if INSN is a load instruction. */
1097
1098 static bool
load_insn_p(rtx_insn * insn)1099 load_insn_p (rtx_insn *insn)
1100 {
1101 if (GET_CODE (PATTERN (insn)) != SET)
1102 return false;
1103
1104 switch (get_attr_type (insn))
1105 {
1106 case TYPE_LOAD:
1107 case TYPE_SLOAD:
1108 case TYPE_FPLOAD:
1109 return true;
1110 default:
1111 return false;
1112 }
1113 }
1114
1115 /* We use a machine specific pass to enable workarounds for errata.
1116
1117 We need to have the (essentially) final form of the insn stream in order
1118 to properly detect the various hazards. Therefore, this machine specific
1119 pass runs as late as possible. */
1120
1121 /* True if INSN is a md pattern or asm statement. */
1122 #define USEFUL_INSN_P(INSN) \
1123 (NONDEBUG_INSN_P (INSN) \
1124 && GET_CODE (PATTERN (INSN)) != USE \
1125 && GET_CODE (PATTERN (INSN)) != CLOBBER)
1126
1127 rtx_insn *
next_active_non_empty_insn(rtx_insn * insn)1128 next_active_non_empty_insn (rtx_insn *insn)
1129 {
1130 insn = next_active_insn (insn);
1131
1132 while (insn
1133 && (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
1134 || GET_CODE (PATTERN (insn)) == ASM_INPUT
1135 || (USEFUL_INSN_P (insn)
1136 && (asm_noperands (PATTERN (insn)) >= 0)
1137 && !strcmp (decode_asm_operands (PATTERN (insn),
1138 NULL, NULL, NULL,
1139 NULL, NULL), ""))))
1140 insn = next_active_insn (insn);
1141
1142 return insn;
1143 }
1144
1145 static unsigned int
sparc_do_work_around_errata(void)1146 sparc_do_work_around_errata (void)
1147 {
1148 rtx_insn *insn, *next;
1149 bool find_first_useful = true;
1150
1151 /* Force all instructions to be split into their final form. */
1152 split_all_insns_noflow ();
1153
1154 /* Now look for specific patterns in the insn stream. */
1155 for (insn = get_insns (); insn; insn = next)
1156 {
1157 bool insert_nop = false;
1158 rtx set;
1159 rtx_insn *jump;
1160 rtx_sequence *seq;
1161
1162 /* Look into the instruction in a delay slot. */
1163 if (NONJUMP_INSN_P (insn)
1164 && (seq = dyn_cast <rtx_sequence *> (PATTERN (insn))))
1165 {
1166 jump = seq->insn (0);
1167 insn = seq->insn (1);
1168 }
1169 else if (JUMP_P (insn))
1170 jump = insn;
1171 else
1172 jump = NULL;
1173
1174 /* Do not begin function with atomic instruction. */
1175 if (sparc_fix_ut700
1176 && find_first_useful
1177 && USEFUL_INSN_P (insn))
1178 {
1179 find_first_useful = false;
1180 if (atomic_insn_for_leon3_p (insn))
1181 emit_insn_before (gen_nop (), insn);
1182 }
1183
1184 /* Place a NOP at the branch target of an integer branch if it is a
1185 floating-point operation or a floating-point branch. */
1186 if (sparc_fix_gr712rc
1187 && jump
1188 && jump_to_label_p (jump)
1189 && get_attr_branch_type (jump) == BRANCH_TYPE_ICC)
1190 {
1191 rtx_insn *target = next_active_insn (JUMP_LABEL_AS_INSN (jump));
1192 if (target
1193 && (fpop_insn_p (target)
1194 || (JUMP_P (target)
1195 && get_attr_branch_type (target) == BRANCH_TYPE_FCC)))
1196 emit_insn_before (gen_nop (), target);
1197 }
1198
1199 /* Insert a NOP between load instruction and atomic instruction. Insert
1200 a NOP at branch target if there is a load in delay slot and an atomic
1201 instruction at branch target. */
1202 if (sparc_fix_ut700
1203 && NONJUMP_INSN_P (insn)
1204 && load_insn_p (insn))
1205 {
1206 if (jump && jump_to_label_p (jump))
1207 {
1208 rtx_insn *target = next_active_insn (JUMP_LABEL_AS_INSN (jump));
1209 if (target && atomic_insn_for_leon3_p (target))
1210 emit_insn_before (gen_nop (), target);
1211 }
1212
1213 next = next_active_non_empty_insn (insn);
1214 if (!next)
1215 break;
1216
1217 if (atomic_insn_for_leon3_p (next))
1218 insert_nop = true;
1219 }
1220
1221 /* Look for a sequence that starts with a fdiv or fsqrt instruction and
1222 ends with another fdiv or fsqrt instruction with no dependencies on
1223 the former, along with an appropriate pattern in between. */
1224 if (sparc_fix_lost_divsqrt
1225 && NONJUMP_INSN_P (insn)
1226 && div_sqrt_insn_p (insn))
1227 {
1228 int i;
1229 int fp_found = 0;
1230 rtx_insn *after;
1231
1232 const unsigned int dest_reg = REGNO (SET_DEST (single_set (insn)));
1233
1234 next = next_active_insn (insn);
1235 if (!next)
1236 break;
1237
1238 for (after = next, i = 0; i < 4; i++)
1239 {
1240 /* Count floating-point operations. */
1241 if (i != 3 && fpop_insn_p (after))
1242 {
1243 /* If the insn uses the destination register of
1244 the div/sqrt, then it cannot be problematic. */
1245 if (insn_uses_reg_p (after, dest_reg))
1246 break;
1247 fp_found++;
1248 }
1249
1250 /* Count floating-point loads. */
1251 if (i != 3
1252 && (set = single_set (after)) != NULL_RTX
1253 && REG_P (SET_DEST (set))
1254 && REGNO (SET_DEST (set)) > 31)
1255 {
1256 /* If the insn uses the destination register of
1257 the div/sqrt, then it cannot be problematic. */
1258 if (REGNO (SET_DEST (set)) == dest_reg)
1259 break;
1260 fp_found++;
1261 }
1262
1263 /* Check if this is a problematic sequence. */
1264 if (i > 1
1265 && fp_found >= 2
1266 && div_sqrt_insn_p (after))
1267 {
1268 /* If this is the short version of the problematic
1269 sequence we add two NOPs in a row to also prevent
1270 the long version. */
1271 if (i == 2)
1272 emit_insn_before (gen_nop (), next);
1273 insert_nop = true;
1274 break;
1275 }
1276
1277 /* No need to scan past a second div/sqrt. */
1278 if (div_sqrt_insn_p (after))
1279 break;
1280
1281 /* Insert NOP before branch. */
1282 if (i < 3
1283 && (!NONJUMP_INSN_P (after)
1284 || GET_CODE (PATTERN (after)) == SEQUENCE))
1285 {
1286 insert_nop = true;
1287 break;
1288 }
1289
1290 after = next_active_insn (after);
1291 if (!after)
1292 break;
1293 }
1294 }
1295
1296 /* Look for either of these two sequences:
1297
1298 Sequence A:
1299 1. store of word size or less (e.g. st / stb / sth / stf)
1300 2. any single instruction that is not a load or store
1301 3. any store instruction (e.g. st / stb / sth / stf / std / stdf)
1302
1303 Sequence B:
1304 1. store of double word size (e.g. std / stdf)
1305 2. any store instruction (e.g. st / stb / sth / stf / std / stdf) */
1306 if (sparc_fix_b2bst
1307 && NONJUMP_INSN_P (insn)
1308 && (set = single_set (insn)) != NULL_RTX
1309 && store_insn_p (insn))
1310 {
1311 /* Sequence B begins with a double-word store. */
1312 bool seq_b = GET_MODE_SIZE (GET_MODE (SET_DEST (set))) == 8;
1313 rtx_insn *after;
1314 int i;
1315
1316 next = next_active_non_empty_insn (insn);
1317 if (!next)
1318 break;
1319
1320 for (after = next, i = 0; i < 2; i++)
1321 {
1322 /* If the insn is a branch, then it cannot be problematic. */
1323 if (!NONJUMP_INSN_P (after)
1324 || GET_CODE (PATTERN (after)) == SEQUENCE)
1325 break;
1326
1327 /* Sequence B is only two instructions long. */
1328 if (seq_b)
1329 {
1330 /* Add NOP if followed by a store. */
1331 if (store_insn_p (after))
1332 insert_nop = true;
1333
1334 /* Otherwise it is ok. */
1335 break;
1336 }
1337
1338 /* If the second instruction is a load or a store,
1339 then the sequence cannot be problematic. */
1340 if (i == 0)
1341 {
1342 if ((set = single_set (after)) != NULL_RTX
1343 && (MEM_P (SET_DEST (set)) || mem_ref (SET_SRC (set))))
1344 break;
1345
1346 after = next_active_non_empty_insn (after);
1347 if (!after)
1348 break;
1349 }
1350
1351 /* Add NOP if third instruction is a store. */
1352 if (i == 1
1353 && store_insn_p (after))
1354 insert_nop = true;
1355 }
1356 }
1357
1358 /* Look for a single-word load into an odd-numbered FP register. */
1359 else if (sparc_fix_at697f
1360 && NONJUMP_INSN_P (insn)
1361 && (set = single_set (insn)) != NULL_RTX
1362 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
1363 && mem_ref (SET_SRC (set))
1364 && REG_P (SET_DEST (set))
1365 && REGNO (SET_DEST (set)) > 31
1366 && REGNO (SET_DEST (set)) % 2 != 0)
1367 {
1368 /* The wrong dependency is on the enclosing double register. */
1369 const unsigned int x = REGNO (SET_DEST (set)) - 1;
1370 unsigned int src1, src2, dest;
1371 int code;
1372
1373 next = next_active_insn (insn);
1374 if (!next)
1375 break;
1376 /* If the insn is a branch, then it cannot be problematic. */
1377 if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE)
1378 continue;
1379
1380 extract_insn (next);
1381 code = INSN_CODE (next);
1382
1383 switch (code)
1384 {
1385 case CODE_FOR_adddf3:
1386 case CODE_FOR_subdf3:
1387 case CODE_FOR_muldf3:
1388 case CODE_FOR_divdf3:
1389 dest = REGNO (recog_data.operand[0]);
1390 src1 = REGNO (recog_data.operand[1]);
1391 src2 = REGNO (recog_data.operand[2]);
1392 if (src1 != src2)
1393 {
1394 /* Case [1-4]:
1395 ld [address], %fx+1
1396 FPOPd %f{x,y}, %f{y,x}, %f{x,y} */
1397 if ((src1 == x || src2 == x)
1398 && (dest == src1 || dest == src2))
1399 insert_nop = true;
1400 }
1401 else
1402 {
1403 /* Case 5:
1404 ld [address], %fx+1
1405 FPOPd %fx, %fx, %fx */
1406 if (src1 == x
1407 && dest == src1
1408 && (code == CODE_FOR_adddf3 || code == CODE_FOR_muldf3))
1409 insert_nop = true;
1410 }
1411 break;
1412
1413 case CODE_FOR_sqrtdf2:
1414 dest = REGNO (recog_data.operand[0]);
1415 src1 = REGNO (recog_data.operand[1]);
1416 /* Case 6:
1417 ld [address], %fx+1
1418 fsqrtd %fx, %fx */
1419 if (src1 == x && dest == src1)
1420 insert_nop = true;
1421 break;
1422
1423 default:
1424 break;
1425 }
1426 }
1427
1428 /* Look for a single-word load into an integer register. */
1429 else if (sparc_fix_ut699
1430 && NONJUMP_INSN_P (insn)
1431 && (set = single_set (insn)) != NULL_RTX
1432 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) <= 4
1433 && (mem_ref (SET_SRC (set)) != NULL_RTX
1434 || INSN_CODE (insn) == CODE_FOR_movsi_pic_gotdata_op)
1435 && REG_P (SET_DEST (set))
1436 && REGNO (SET_DEST (set)) < 32)
1437 {
1438 /* There is no problem if the second memory access has a data
1439 dependency on the first single-cycle load. */
1440 rtx x = SET_DEST (set);
1441
1442 next = next_active_insn (insn);
1443 if (!next)
1444 break;
1445 /* If the insn is a branch, then it cannot be problematic. */
1446 if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE)
1447 continue;
1448
1449 /* Look for a second memory access to/from an integer register. */
1450 if ((set = single_set (next)) != NULL_RTX)
1451 {
1452 rtx src = SET_SRC (set);
1453 rtx dest = SET_DEST (set);
1454 rtx mem;
1455
1456 /* LDD is affected. */
1457 if ((mem = mem_ref (src)) != NULL_RTX
1458 && REG_P (dest)
1459 && REGNO (dest) < 32
1460 && !reg_mentioned_p (x, XEXP (mem, 0)))
1461 insert_nop = true;
1462
1463 /* STD is *not* affected. */
1464 else if (MEM_P (dest)
1465 && GET_MODE_SIZE (GET_MODE (dest)) <= 4
1466 && (src == CONST0_RTX (GET_MODE (dest))
1467 || (REG_P (src)
1468 && REGNO (src) < 32
1469 && REGNO (src) != REGNO (x)))
1470 && !reg_mentioned_p (x, XEXP (dest, 0)))
1471 insert_nop = true;
1472
1473 /* GOT accesses uses LD. */
1474 else if (INSN_CODE (next) == CODE_FOR_movsi_pic_gotdata_op
1475 && !reg_mentioned_p (x, XEXP (XEXP (src, 0), 1)))
1476 insert_nop = true;
1477 }
1478 }
1479
1480 /* Look for a single-word load/operation into an FP register. */
1481 else if (sparc_fix_ut699
1482 && NONJUMP_INSN_P (insn)
1483 && (set = single_set (insn)) != NULL_RTX
1484 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
1485 && REG_P (SET_DEST (set))
1486 && REGNO (SET_DEST (set)) > 31)
1487 {
1488 /* Number of instructions in the problematic window. */
1489 const int n_insns = 4;
1490 /* The problematic combination is with the sibling FP register. */
1491 const unsigned int x = REGNO (SET_DEST (set));
1492 const unsigned int y = x ^ 1;
1493 rtx_insn *after;
1494 int i;
1495
1496 next = next_active_insn (insn);
1497 if (!next)
1498 break;
1499 /* If the insn is a branch, then it cannot be problematic. */
1500 if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE)
1501 continue;
1502
1503 /* Look for a second load/operation into the sibling FP register. */
1504 if (!((set = single_set (next)) != NULL_RTX
1505 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
1506 && REG_P (SET_DEST (set))
1507 && REGNO (SET_DEST (set)) == y))
1508 continue;
1509
1510 /* Look for a (possible) store from the FP register in the next N
1511 instructions, but bail out if it is again modified or if there
1512 is a store from the sibling FP register before this store. */
1513 for (after = next, i = 0; i < n_insns; i++)
1514 {
1515 bool branch_p;
1516
1517 after = next_active_insn (after);
1518 if (!after)
1519 break;
1520
1521 /* This is a branch with an empty delay slot. */
1522 if (!NONJUMP_INSN_P (after))
1523 {
1524 if (++i == n_insns)
1525 break;
1526 branch_p = true;
1527 after = NULL;
1528 }
1529 /* This is a branch with a filled delay slot. */
1530 else if (rtx_sequence *seq =
1531 dyn_cast <rtx_sequence *> (PATTERN (after)))
1532 {
1533 if (++i == n_insns)
1534 break;
1535 branch_p = true;
1536 after = seq->insn (1);
1537 }
1538 /* This is a regular instruction. */
1539 else
1540 branch_p = false;
1541
1542 if (after && (set = single_set (after)) != NULL_RTX)
1543 {
1544 const rtx src = SET_SRC (set);
1545 const rtx dest = SET_DEST (set);
1546 const unsigned int size = GET_MODE_SIZE (GET_MODE (dest));
1547
1548 /* If the FP register is again modified before the store,
1549 then the store isn't affected. */
1550 if (REG_P (dest)
1551 && (REGNO (dest) == x
1552 || (REGNO (dest) == y && size == 8)))
1553 break;
1554
1555 if (MEM_P (dest) && REG_P (src))
1556 {
1557 /* If there is a store from the sibling FP register
1558 before the store, then the store is not affected. */
1559 if (REGNO (src) == y || (REGNO (src) == x && size == 8))
1560 break;
1561
1562 /* Otherwise, the store is affected. */
1563 if (REGNO (src) == x && size == 4)
1564 {
1565 insert_nop = true;
1566 break;
1567 }
1568 }
1569 }
1570
1571 /* If we have a branch in the first M instructions, then we
1572 cannot see the (M+2)th instruction so we play safe. */
1573 if (branch_p && i <= (n_insns - 2))
1574 {
1575 insert_nop = true;
1576 break;
1577 }
1578 }
1579 }
1580
1581 else
1582 next = NEXT_INSN (insn);
1583
1584 if (insert_nop)
1585 emit_insn_before (gen_nop (), next);
1586 }
1587
1588 return 0;
1589 }
1590
1591 namespace {
1592
1593 const pass_data pass_data_work_around_errata =
1594 {
1595 RTL_PASS, /* type */
1596 "errata", /* name */
1597 OPTGROUP_NONE, /* optinfo_flags */
1598 TV_MACH_DEP, /* tv_id */
1599 0, /* properties_required */
1600 0, /* properties_provided */
1601 0, /* properties_destroyed */
1602 0, /* todo_flags_start */
1603 0, /* todo_flags_finish */
1604 };
1605
1606 class pass_work_around_errata : public rtl_opt_pass
1607 {
1608 public:
pass_work_around_errata(gcc::context * ctxt)1609 pass_work_around_errata(gcc::context *ctxt)
1610 : rtl_opt_pass(pass_data_work_around_errata, ctxt)
1611 {}
1612
1613 /* opt_pass methods: */
gate(function *)1614 virtual bool gate (function *)
1615 {
1616 return sparc_fix_at697f
1617 || sparc_fix_ut699 || sparc_fix_ut700 || sparc_fix_gr712rc
1618 || sparc_fix_b2bst || sparc_fix_lost_divsqrt;
1619 }
1620
execute(function *)1621 virtual unsigned int execute (function *)
1622 {
1623 return sparc_do_work_around_errata ();
1624 }
1625
1626 }; // class pass_work_around_errata
1627
1628 } // anon namespace
1629
1630 rtl_opt_pass *
make_pass_work_around_errata(gcc::context * ctxt)1631 make_pass_work_around_errata (gcc::context *ctxt)
1632 {
1633 return new pass_work_around_errata (ctxt);
1634 }
1635
1636 /* Helpers for TARGET_DEBUG_OPTIONS. */
1637 static void
dump_target_flag_bits(const int flags)1638 dump_target_flag_bits (const int flags)
1639 {
1640 if (flags & MASK_64BIT)
1641 fprintf (stderr, "64BIT ");
1642 if (flags & MASK_APP_REGS)
1643 fprintf (stderr, "APP_REGS ");
1644 if (flags & MASK_FASTER_STRUCTS)
1645 fprintf (stderr, "FASTER_STRUCTS ");
1646 if (flags & MASK_FLAT)
1647 fprintf (stderr, "FLAT ");
1648 if (flags & MASK_FMAF)
1649 fprintf (stderr, "FMAF ");
1650 if (flags & MASK_FSMULD)
1651 fprintf (stderr, "FSMULD ");
1652 if (flags & MASK_FPU)
1653 fprintf (stderr, "FPU ");
1654 if (flags & MASK_HARD_QUAD)
1655 fprintf (stderr, "HARD_QUAD ");
1656 if (flags & MASK_POPC)
1657 fprintf (stderr, "POPC ");
1658 if (flags & MASK_PTR64)
1659 fprintf (stderr, "PTR64 ");
1660 if (flags & MASK_STACK_BIAS)
1661 fprintf (stderr, "STACK_BIAS ");
1662 if (flags & MASK_UNALIGNED_DOUBLES)
1663 fprintf (stderr, "UNALIGNED_DOUBLES ");
1664 if (flags & MASK_V8PLUS)
1665 fprintf (stderr, "V8PLUS ");
1666 if (flags & MASK_VIS)
1667 fprintf (stderr, "VIS ");
1668 if (flags & MASK_VIS2)
1669 fprintf (stderr, "VIS2 ");
1670 if (flags & MASK_VIS3)
1671 fprintf (stderr, "VIS3 ");
1672 if (flags & MASK_VIS4)
1673 fprintf (stderr, "VIS4 ");
1674 if (flags & MASK_VIS4B)
1675 fprintf (stderr, "VIS4B ");
1676 if (flags & MASK_CBCOND)
1677 fprintf (stderr, "CBCOND ");
1678 if (flags & MASK_DEPRECATED_V8_INSNS)
1679 fprintf (stderr, "DEPRECATED_V8_INSNS ");
1680 if (flags & MASK_LEON)
1681 fprintf (stderr, "LEON ");
1682 if (flags & MASK_LEON3)
1683 fprintf (stderr, "LEON3 ");
1684 if (flags & MASK_SPARCLET)
1685 fprintf (stderr, "SPARCLET ");
1686 if (flags & MASK_SPARCLITE)
1687 fprintf (stderr, "SPARCLITE ");
1688 if (flags & MASK_V8)
1689 fprintf (stderr, "V8 ");
1690 if (flags & MASK_V9)
1691 fprintf (stderr, "V9 ");
1692 }
1693
1694 static void
dump_target_flags(const char * prefix,const int flags)1695 dump_target_flags (const char *prefix, const int flags)
1696 {
1697 fprintf (stderr, "%s: (%08x) [ ", prefix, flags);
1698 dump_target_flag_bits (flags);
1699 fprintf(stderr, "]\n");
1700 }
1701
1702 /* Validate and override various options, and do some machine dependent
1703 initialization. */
1704
1705 static void
sparc_option_override(void)1706 sparc_option_override (void)
1707 {
1708 /* Map TARGET_CPU_DEFAULT to value for -m{cpu,tune}=. */
1709 static struct cpu_default {
1710 const int cpu;
1711 const enum sparc_processor_type processor;
1712 } const cpu_default[] = {
1713 /* There must be one entry here for each TARGET_CPU value. */
1714 { TARGET_CPU_sparc, PROCESSOR_CYPRESS },
1715 { TARGET_CPU_v8, PROCESSOR_V8 },
1716 { TARGET_CPU_supersparc, PROCESSOR_SUPERSPARC },
1717 { TARGET_CPU_hypersparc, PROCESSOR_HYPERSPARC },
1718 { TARGET_CPU_leon, PROCESSOR_LEON },
1719 { TARGET_CPU_leon3, PROCESSOR_LEON3 },
1720 { TARGET_CPU_leon5, PROCESSOR_LEON5 },
1721 { TARGET_CPU_leon3v7, PROCESSOR_LEON3V7 },
1722 { TARGET_CPU_sparclite, PROCESSOR_F930 },
1723 { TARGET_CPU_sparclite86x, PROCESSOR_SPARCLITE86X },
1724 { TARGET_CPU_sparclet, PROCESSOR_TSC701 },
1725 { TARGET_CPU_v9, PROCESSOR_V9 },
1726 { TARGET_CPU_ultrasparc, PROCESSOR_ULTRASPARC },
1727 { TARGET_CPU_ultrasparc3, PROCESSOR_ULTRASPARC3 },
1728 { TARGET_CPU_niagara, PROCESSOR_NIAGARA },
1729 { TARGET_CPU_niagara2, PROCESSOR_NIAGARA2 },
1730 { TARGET_CPU_niagara3, PROCESSOR_NIAGARA3 },
1731 { TARGET_CPU_niagara4, PROCESSOR_NIAGARA4 },
1732 { TARGET_CPU_niagara7, PROCESSOR_NIAGARA7 },
1733 { TARGET_CPU_m8, PROCESSOR_M8 },
1734 { -1, PROCESSOR_V7 }
1735 };
1736 const struct cpu_default *def;
1737 /* Table of values for -m{cpu,tune}=. This must match the order of
1738 the enum processor_type in sparc-opts.h. */
1739 static struct cpu_table {
1740 const char *const name;
1741 const int disable;
1742 const int enable;
1743 } const cpu_table[] = {
1744 { "v7", MASK_ISA, 0 },
1745 { "cypress", MASK_ISA, 0 },
1746 { "v8", MASK_ISA, MASK_V8 },
1747 /* TI TMS390Z55 supersparc */
1748 { "supersparc", MASK_ISA, MASK_V8 },
1749 { "hypersparc", MASK_ISA, MASK_V8 },
1750 { "leon", MASK_ISA|MASK_FSMULD, MASK_V8|MASK_LEON },
1751 { "leon3", MASK_ISA, MASK_V8|MASK_LEON3 },
1752 { "leon5", MASK_ISA, MASK_V8|MASK_LEON3 },
1753 { "leon3v7", MASK_ISA, MASK_LEON3 },
1754 { "sparclite", MASK_ISA, MASK_SPARCLITE },
1755 /* The Fujitsu MB86930 is the original sparclite chip, with no FPU. */
1756 { "f930", MASK_ISA|MASK_FPU, MASK_SPARCLITE },
1757 /* The Fujitsu MB86934 is the recent sparclite chip, with an FPU. */
1758 { "f934", MASK_ISA, MASK_SPARCLITE },
1759 { "sparclite86x", MASK_ISA|MASK_FPU, MASK_SPARCLITE },
1760 { "sparclet", MASK_ISA, MASK_SPARCLET },
1761 /* TEMIC sparclet */
1762 { "tsc701", MASK_ISA, MASK_SPARCLET },
1763 { "v9", MASK_ISA, MASK_V9 },
1764 /* UltraSPARC I, II, IIi */
1765 { "ultrasparc", MASK_ISA,
1766 /* Although insns using %y are deprecated, it is a clear win. */
1767 MASK_V9|MASK_DEPRECATED_V8_INSNS },
1768 /* UltraSPARC III */
1769 /* ??? Check if %y issue still holds true. */
1770 { "ultrasparc3", MASK_ISA,
1771 MASK_V9|MASK_DEPRECATED_V8_INSNS|MASK_VIS2 },
1772 /* UltraSPARC T1 */
1773 { "niagara", MASK_ISA,
1774 MASK_V9|MASK_DEPRECATED_V8_INSNS },
1775 /* UltraSPARC T2 */
1776 { "niagara2", MASK_ISA,
1777 MASK_V9|MASK_POPC|MASK_VIS2 },
1778 /* UltraSPARC T3 */
1779 { "niagara3", MASK_ISA,
1780 MASK_V9|MASK_POPC|MASK_VIS3|MASK_FMAF },
1781 /* UltraSPARC T4 */
1782 { "niagara4", MASK_ISA,
1783 MASK_V9|MASK_POPC|MASK_VIS3|MASK_FMAF|MASK_CBCOND },
1784 /* UltraSPARC M7 */
1785 { "niagara7", MASK_ISA,
1786 MASK_V9|MASK_POPC|MASK_VIS4|MASK_FMAF|MASK_CBCOND|MASK_SUBXC },
1787 /* UltraSPARC M8 */
1788 { "m8", MASK_ISA,
1789 MASK_V9|MASK_POPC|MASK_VIS4B|MASK_FMAF|MASK_CBCOND|MASK_SUBXC }
1790 };
1791 const struct cpu_table *cpu;
1792 unsigned int i;
1793
1794 if (sparc_debug_string != NULL)
1795 {
1796 const char *q;
1797 char *p;
1798
1799 p = ASTRDUP (sparc_debug_string);
1800 while ((q = strtok (p, ",")) != NULL)
1801 {
1802 bool invert;
1803 int mask;
1804
1805 p = NULL;
1806 if (*q == '!')
1807 {
1808 invert = true;
1809 q++;
1810 }
1811 else
1812 invert = false;
1813
1814 if (! strcmp (q, "all"))
1815 mask = MASK_DEBUG_ALL;
1816 else if (! strcmp (q, "options"))
1817 mask = MASK_DEBUG_OPTIONS;
1818 else
1819 error ("unknown %<-mdebug-%s%> switch", q);
1820
1821 if (invert)
1822 sparc_debug &= ~mask;
1823 else
1824 sparc_debug |= mask;
1825 }
1826 }
1827
1828 /* Enable the FsMULd instruction by default if not explicitly specified by
1829 the user. It may be later disabled by the CPU (explicitly or not). */
1830 if (TARGET_FPU && !(target_flags_explicit & MASK_FSMULD))
1831 target_flags |= MASK_FSMULD;
1832
1833 if (TARGET_DEBUG_OPTIONS)
1834 {
1835 dump_target_flags("Initial target_flags", target_flags);
1836 dump_target_flags("target_flags_explicit", target_flags_explicit);
1837 }
1838
1839 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1840 SUBTARGET_OVERRIDE_OPTIONS;
1841 #endif
1842
1843 #ifndef SPARC_BI_ARCH
1844 /* Check for unsupported architecture size. */
1845 if (!TARGET_64BIT != DEFAULT_ARCH32_P)
1846 error ("%s is not supported by this configuration",
1847 DEFAULT_ARCH32_P ? "-m64" : "-m32");
1848 #endif
1849
1850 /* We force all 64bit archs to use 128 bit long double */
1851 if (TARGET_ARCH64 && !TARGET_LONG_DOUBLE_128)
1852 {
1853 error ("%<-mlong-double-64%> not allowed with %<-m64%>");
1854 target_flags |= MASK_LONG_DOUBLE_128;
1855 }
1856
1857 /* Check that -fcall-saved-REG wasn't specified for out registers. */
1858 for (i = 8; i < 16; i++)
1859 if (!call_used_regs [i])
1860 {
1861 error ("%<-fcall-saved-REG%> is not supported for out registers");
1862 call_used_regs [i] = 1;
1863 }
1864
1865 /* Set the default CPU if no -mcpu option was specified. */
1866 if (!global_options_set.x_sparc_cpu_and_features)
1867 {
1868 for (def = &cpu_default[0]; def->cpu != -1; ++def)
1869 if (def->cpu == TARGET_CPU_DEFAULT)
1870 break;
1871 gcc_assert (def->cpu != -1);
1872 sparc_cpu_and_features = def->processor;
1873 }
1874
1875 /* Set the default CPU if no -mtune option was specified. */
1876 if (!global_options_set.x_sparc_cpu)
1877 sparc_cpu = sparc_cpu_and_features;
1878
1879 cpu = &cpu_table[(int) sparc_cpu_and_features];
1880
1881 if (TARGET_DEBUG_OPTIONS)
1882 {
1883 fprintf (stderr, "sparc_cpu_and_features: %s\n", cpu->name);
1884 dump_target_flags ("cpu->disable", cpu->disable);
1885 dump_target_flags ("cpu->enable", cpu->enable);
1886 }
1887
1888 target_flags &= ~cpu->disable;
1889 target_flags |= (cpu->enable
1890 #ifndef HAVE_AS_FMAF_HPC_VIS3
1891 & ~(MASK_FMAF | MASK_VIS3)
1892 #endif
1893 #ifndef HAVE_AS_SPARC4
1894 & ~MASK_CBCOND
1895 #endif
1896 #ifndef HAVE_AS_SPARC5_VIS4
1897 & ~(MASK_VIS4 | MASK_SUBXC)
1898 #endif
1899 #ifndef HAVE_AS_SPARC6
1900 & ~(MASK_VIS4B)
1901 #endif
1902 #ifndef HAVE_AS_LEON
1903 & ~(MASK_LEON | MASK_LEON3)
1904 #endif
1905 & ~(target_flags_explicit & MASK_FEATURES)
1906 );
1907
1908 /* FsMULd is a V8 instruction. */
1909 if (!TARGET_V8 && !TARGET_V9)
1910 target_flags &= ~MASK_FSMULD;
1911
1912 /* -mvis2 implies -mvis. */
1913 if (TARGET_VIS2)
1914 target_flags |= MASK_VIS;
1915
1916 /* -mvis3 implies -mvis2 and -mvis. */
1917 if (TARGET_VIS3)
1918 target_flags |= MASK_VIS2 | MASK_VIS;
1919
1920 /* -mvis4 implies -mvis3, -mvis2 and -mvis. */
1921 if (TARGET_VIS4)
1922 target_flags |= MASK_VIS3 | MASK_VIS2 | MASK_VIS;
1923
1924 /* -mvis4b implies -mvis4, -mvis3, -mvis2 and -mvis */
1925 if (TARGET_VIS4B)
1926 target_flags |= MASK_VIS4 | MASK_VIS3 | MASK_VIS2 | MASK_VIS;
1927
1928 /* Don't allow -mvis, -mvis2, -mvis3, -mvis4, -mvis4b, -mfmaf and -mfsmuld if
1929 FPU is disabled. */
1930 if (!TARGET_FPU)
1931 target_flags &= ~(MASK_VIS | MASK_VIS2 | MASK_VIS3 | MASK_VIS4
1932 | MASK_VIS4B | MASK_FMAF | MASK_FSMULD);
1933
1934 /* -mvis assumes UltraSPARC+, so we are sure v9 instructions
1935 are available; -m64 also implies v9. */
1936 if (TARGET_VIS || TARGET_ARCH64)
1937 {
1938 target_flags |= MASK_V9;
1939 target_flags &= ~(MASK_V8 | MASK_SPARCLET | MASK_SPARCLITE);
1940 }
1941
1942 /* -mvis also implies -mv8plus on 32-bit. */
1943 if (TARGET_VIS && !TARGET_ARCH64)
1944 target_flags |= MASK_V8PLUS;
1945
1946 /* Use the deprecated v8 insns for sparc64 in 32-bit mode. */
1947 if (TARGET_V9 && TARGET_ARCH32)
1948 target_flags |= MASK_DEPRECATED_V8_INSNS;
1949
1950 /* V8PLUS requires V9 and makes no sense in 64-bit mode. */
1951 if (!TARGET_V9 || TARGET_ARCH64)
1952 target_flags &= ~MASK_V8PLUS;
1953
1954 /* Don't use stack biasing in 32-bit mode. */
1955 if (TARGET_ARCH32)
1956 target_flags &= ~MASK_STACK_BIAS;
1957
1958 /* Use LRA instead of reload, unless otherwise instructed. */
1959 if (!(target_flags_explicit & MASK_LRA))
1960 target_flags |= MASK_LRA;
1961
1962 /* Enable applicable errata workarounds for LEON3FT. */
1963 if (sparc_fix_ut699 || sparc_fix_ut700 || sparc_fix_gr712rc)
1964 {
1965 sparc_fix_b2bst = 1;
1966 sparc_fix_lost_divsqrt = 1;
1967 }
1968
1969 /* Disable FsMULd for the UT699 since it doesn't work correctly. */
1970 if (sparc_fix_ut699)
1971 target_flags &= ~MASK_FSMULD;
1972
1973 #ifdef TARGET_DEFAULT_LONG_DOUBLE_128
1974 if (!(target_flags_explicit & MASK_LONG_DOUBLE_128))
1975 target_flags |= MASK_LONG_DOUBLE_128;
1976 #endif
1977
1978 if (TARGET_DEBUG_OPTIONS)
1979 dump_target_flags ("Final target_flags", target_flags);
1980
1981 /* Set the code model if no -mcmodel option was specified. */
1982 if (global_options_set.x_sparc_code_model)
1983 {
1984 if (TARGET_ARCH32)
1985 error ("%<-mcmodel=%> is not supported in 32-bit mode");
1986 }
1987 else
1988 {
1989 if (TARGET_ARCH32)
1990 sparc_code_model = CM_32;
1991 else
1992 sparc_code_model = SPARC_DEFAULT_CMODEL;
1993 }
1994
1995 /* Set the memory model if no -mmemory-model option was specified. */
1996 if (!global_options_set.x_sparc_memory_model)
1997 {
1998 /* Choose the memory model for the operating system. */
1999 enum sparc_memory_model_type os_default = SUBTARGET_DEFAULT_MEMORY_MODEL;
2000 if (os_default != SMM_DEFAULT)
2001 sparc_memory_model = os_default;
2002 /* Choose the most relaxed model for the processor. */
2003 else if (TARGET_V9)
2004 sparc_memory_model = SMM_RMO;
2005 else if (TARGET_LEON3)
2006 sparc_memory_model = SMM_TSO;
2007 else if (TARGET_LEON)
2008 sparc_memory_model = SMM_SC;
2009 else if (TARGET_V8)
2010 sparc_memory_model = SMM_PSO;
2011 else
2012 sparc_memory_model = SMM_SC;
2013 }
2014
2015 /* Supply a default value for align_functions. */
2016 if (flag_align_functions && !str_align_functions)
2017 {
2018 if (sparc_cpu == PROCESSOR_ULTRASPARC
2019 || sparc_cpu == PROCESSOR_ULTRASPARC3
2020 || sparc_cpu == PROCESSOR_NIAGARA
2021 || sparc_cpu == PROCESSOR_NIAGARA2
2022 || sparc_cpu == PROCESSOR_NIAGARA3
2023 || sparc_cpu == PROCESSOR_NIAGARA4)
2024 str_align_functions = "32";
2025 else if (sparc_cpu == PROCESSOR_NIAGARA7
2026 || sparc_cpu == PROCESSOR_M8)
2027 str_align_functions = "64";
2028 }
2029
2030 /* Validate PCC_STRUCT_RETURN. */
2031 if (flag_pcc_struct_return == DEFAULT_PCC_STRUCT_RETURN)
2032 flag_pcc_struct_return = (TARGET_ARCH64 ? 0 : 1);
2033
2034 /* Only use .uaxword when compiling for a 64-bit target. */
2035 if (!TARGET_ARCH64)
2036 targetm.asm_out.unaligned_op.di = NULL;
2037
2038 /* Set the processor costs. */
2039 switch (sparc_cpu)
2040 {
2041 case PROCESSOR_V7:
2042 case PROCESSOR_CYPRESS:
2043 sparc_costs = &cypress_costs;
2044 break;
2045 case PROCESSOR_V8:
2046 case PROCESSOR_SPARCLITE:
2047 case PROCESSOR_SUPERSPARC:
2048 sparc_costs = &supersparc_costs;
2049 break;
2050 case PROCESSOR_F930:
2051 case PROCESSOR_F934:
2052 case PROCESSOR_HYPERSPARC:
2053 case PROCESSOR_SPARCLITE86X:
2054 sparc_costs = &hypersparc_costs;
2055 break;
2056 case PROCESSOR_LEON:
2057 sparc_costs = &leon_costs;
2058 break;
2059 case PROCESSOR_LEON3:
2060 case PROCESSOR_LEON3V7:
2061 sparc_costs = &leon3_costs;
2062 break;
2063 case PROCESSOR_LEON5:
2064 sparc_costs = &leon5_costs;
2065 break;
2066 case PROCESSOR_SPARCLET:
2067 case PROCESSOR_TSC701:
2068 sparc_costs = &sparclet_costs;
2069 break;
2070 case PROCESSOR_V9:
2071 case PROCESSOR_ULTRASPARC:
2072 sparc_costs = &ultrasparc_costs;
2073 break;
2074 case PROCESSOR_ULTRASPARC3:
2075 sparc_costs = &ultrasparc3_costs;
2076 break;
2077 case PROCESSOR_NIAGARA:
2078 sparc_costs = &niagara_costs;
2079 break;
2080 case PROCESSOR_NIAGARA2:
2081 sparc_costs = &niagara2_costs;
2082 break;
2083 case PROCESSOR_NIAGARA3:
2084 sparc_costs = &niagara3_costs;
2085 break;
2086 case PROCESSOR_NIAGARA4:
2087 sparc_costs = &niagara4_costs;
2088 break;
2089 case PROCESSOR_NIAGARA7:
2090 sparc_costs = &niagara7_costs;
2091 break;
2092 case PROCESSOR_M8:
2093 sparc_costs = &m8_costs;
2094 break;
2095 case PROCESSOR_NATIVE:
2096 gcc_unreachable ();
2097 };
2098
2099 /* param_simultaneous_prefetches is the number of prefetches that
2100 can run at the same time. More important, it is the threshold
2101 defining when additional prefetches will be dropped by the
2102 hardware.
2103
2104 The UltraSPARC-III features a documented prefetch queue with a
2105 size of 8. Additional prefetches issued in the cpu are
2106 dropped.
2107
2108 Niagara processors are different. In these processors prefetches
2109 are handled much like regular loads. The L1 miss buffer is 32
2110 entries, but prefetches start getting affected when 30 entries
2111 become occupied. That occupation could be a mix of regular loads
2112 and prefetches though. And that buffer is shared by all threads.
2113 Once the threshold is reached, if the core is running a single
2114 thread the prefetch will retry. If more than one thread is
2115 running, the prefetch will be dropped.
2116
2117 All this makes it very difficult to determine how many
2118 simultaneous prefetches can be issued simultaneously, even in a
2119 single-threaded program. Experimental results show that setting
2120 this parameter to 32 works well when the number of threads is not
2121 high. */
2122 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
2123 param_simultaneous_prefetches,
2124 ((sparc_cpu == PROCESSOR_ULTRASPARC
2125 || sparc_cpu == PROCESSOR_NIAGARA
2126 || sparc_cpu == PROCESSOR_NIAGARA2
2127 || sparc_cpu == PROCESSOR_NIAGARA3
2128 || sparc_cpu == PROCESSOR_NIAGARA4)
2129 ? 2
2130 : (sparc_cpu == PROCESSOR_ULTRASPARC3
2131 ? 8 : ((sparc_cpu == PROCESSOR_NIAGARA7
2132 || sparc_cpu == PROCESSOR_M8)
2133 ? 32 : 3))));
2134
2135 /* param_l1_cache_line_size is the size of the L1 cache line, in
2136 bytes.
2137
2138 The Oracle SPARC Architecture (previously the UltraSPARC
2139 Architecture) specification states that when a PREFETCH[A]
2140 instruction is executed an implementation-specific amount of data
2141 is prefetched, and that it is at least 64 bytes long (aligned to
2142 at least 64 bytes).
2143
2144 However, this is not correct. The M7 (and implementations prior
2145 to that) does not guarantee a 64B prefetch into a cache if the
2146 line size is smaller. A single cache line is all that is ever
2147 prefetched. So for the M7, where the L1D$ has 32B lines and the
2148 L2D$ and L3 have 64B lines, a prefetch will prefetch 64B into the
2149 L2 and L3, but only 32B are brought into the L1D$. (Assuming it
2150 is a read_n prefetch, which is the only type which allocates to
2151 the L1.) */
2152 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
2153 param_l1_cache_line_size,
2154 (sparc_cpu == PROCESSOR_M8 ? 64 : 32));
2155
2156 /* param_l1_cache_size is the size of the L1D$ (most SPARC chips use
2157 Hardvard level-1 caches) in kilobytes. Both UltraSPARC and
2158 Niagara processors feature a L1D$ of 16KB. */
2159 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
2160 param_l1_cache_size,
2161 ((sparc_cpu == PROCESSOR_ULTRASPARC
2162 || sparc_cpu == PROCESSOR_ULTRASPARC3
2163 || sparc_cpu == PROCESSOR_NIAGARA
2164 || sparc_cpu == PROCESSOR_NIAGARA2
2165 || sparc_cpu == PROCESSOR_NIAGARA3
2166 || sparc_cpu == PROCESSOR_NIAGARA4
2167 || sparc_cpu == PROCESSOR_NIAGARA7
2168 || sparc_cpu == PROCESSOR_M8)
2169 ? 16 : 64));
2170
2171 /* param_l2_cache_size is the size fo the L2 in kilobytes. Note
2172 that 512 is the default in params.def. */
2173 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
2174 param_l2_cache_size,
2175 ((sparc_cpu == PROCESSOR_NIAGARA4
2176 || sparc_cpu == PROCESSOR_M8)
2177 ? 128 : (sparc_cpu == PROCESSOR_NIAGARA7
2178 ? 256 : 512)));
2179
2180
2181 /* Disable save slot sharing for call-clobbered registers by default.
2182 The IRA sharing algorithm works on single registers only and this
2183 pessimizes for double floating-point registers. */
2184 if (!global_options_set.x_flag_ira_share_save_slots)
2185 flag_ira_share_save_slots = 0;
2186
2187 /* Only enable REE by default in 64-bit mode where it helps to eliminate
2188 redundant 32-to-64-bit extensions. */
2189 if (!global_options_set.x_flag_ree && TARGET_ARCH32)
2190 flag_ree = 0;
2191
2192 /* Do various machine dependent initializations. */
2193 sparc_init_modes ();
2194
2195 /* Set up function hooks. */
2196 init_machine_status = sparc_init_machine_status;
2197 }
2198
2199 /* Miscellaneous utilities. */
2200
2201 /* Nonzero if CODE, a comparison, is suitable for use in v9 conditional move
2202 or branch on register contents instructions. */
2203
2204 int
v9_regcmp_p(enum rtx_code code)2205 v9_regcmp_p (enum rtx_code code)
2206 {
2207 return (code == EQ || code == NE || code == GE || code == LT
2208 || code == LE || code == GT);
2209 }
2210
2211 /* Nonzero if OP is a floating point constant which can
2212 be loaded into an integer register using a single
2213 sethi instruction. */
2214
2215 int
fp_sethi_p(rtx op)2216 fp_sethi_p (rtx op)
2217 {
2218 if (GET_CODE (op) == CONST_DOUBLE)
2219 {
2220 long i;
2221
2222 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), i);
2223 return !SPARC_SIMM13_P (i) && SPARC_SETHI_P (i);
2224 }
2225
2226 return 0;
2227 }
2228
2229 /* Nonzero if OP is a floating point constant which can
2230 be loaded into an integer register using a single
2231 mov instruction. */
2232
2233 int
fp_mov_p(rtx op)2234 fp_mov_p (rtx op)
2235 {
2236 if (GET_CODE (op) == CONST_DOUBLE)
2237 {
2238 long i;
2239
2240 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), i);
2241 return SPARC_SIMM13_P (i);
2242 }
2243
2244 return 0;
2245 }
2246
2247 /* Nonzero if OP is a floating point constant which can
2248 be loaded into an integer register using a high/losum
2249 instruction sequence. */
2250
2251 int
fp_high_losum_p(rtx op)2252 fp_high_losum_p (rtx op)
2253 {
2254 /* The constraints calling this should only be in
2255 SFmode move insns, so any constant which cannot
2256 be moved using a single insn will do. */
2257 if (GET_CODE (op) == CONST_DOUBLE)
2258 {
2259 long i;
2260
2261 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), i);
2262 return !SPARC_SIMM13_P (i) && !SPARC_SETHI_P (i);
2263 }
2264
2265 return 0;
2266 }
2267
2268 /* Return true if the address of LABEL can be loaded by means of the
2269 mov{si,di}_pic_label_ref patterns in PIC mode. */
2270
2271 static bool
can_use_mov_pic_label_ref(rtx label)2272 can_use_mov_pic_label_ref (rtx label)
2273 {
2274 /* VxWorks does not impose a fixed gap between segments; the run-time
2275 gap can be different from the object-file gap. We therefore can't
2276 assume X - _GLOBAL_OFFSET_TABLE_ is a link-time constant unless we
2277 are absolutely sure that X is in the same segment as the GOT.
2278 Unfortunately, the flexibility of linker scripts means that we
2279 can't be sure of that in general, so assume that GOT-relative
2280 accesses are never valid on VxWorks. */
2281 if (TARGET_VXWORKS_RTP)
2282 return false;
2283
2284 /* Similarly, if the label is non-local, it might end up being placed
2285 in a different section than the current one; now mov_pic_label_ref
2286 requires the label and the code to be in the same section. */
2287 if (LABEL_REF_NONLOCAL_P (label))
2288 return false;
2289
2290 /* Finally, if we are reordering basic blocks and partition into hot
2291 and cold sections, this might happen for any label. */
2292 if (flag_reorder_blocks_and_partition)
2293 return false;
2294
2295 return true;
2296 }
2297
2298 /* Expand a move instruction. Return true if all work is done. */
2299
2300 bool
sparc_expand_move(machine_mode mode,rtx * operands)2301 sparc_expand_move (machine_mode mode, rtx *operands)
2302 {
2303 /* Handle sets of MEM first. */
2304 if (GET_CODE (operands[0]) == MEM)
2305 {
2306 /* 0 is a register (or a pair of registers) on SPARC. */
2307 if (register_or_zero_operand (operands[1], mode))
2308 return false;
2309
2310 if (!reload_in_progress)
2311 {
2312 operands[0] = validize_mem (operands[0]);
2313 operands[1] = force_reg (mode, operands[1]);
2314 }
2315 }
2316
2317 /* Fix up TLS cases. */
2318 if (TARGET_HAVE_TLS
2319 && CONSTANT_P (operands[1])
2320 && sparc_tls_referenced_p (operands [1]))
2321 {
2322 operands[1] = sparc_legitimize_tls_address (operands[1]);
2323 return false;
2324 }
2325
2326 /* Fix up PIC cases. */
2327 if (flag_pic && CONSTANT_P (operands[1]))
2328 {
2329 if (pic_address_needs_scratch (operands[1]))
2330 operands[1] = sparc_legitimize_pic_address (operands[1], NULL_RTX);
2331
2332 /* We cannot use the mov{si,di}_pic_label_ref patterns in all cases. */
2333 if ((GET_CODE (operands[1]) == LABEL_REF
2334 && can_use_mov_pic_label_ref (operands[1]))
2335 || (GET_CODE (operands[1]) == CONST
2336 && GET_CODE (XEXP (operands[1], 0)) == PLUS
2337 && GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == LABEL_REF
2338 && GET_CODE (XEXP (XEXP (operands[1], 0), 1)) == CONST_INT
2339 && can_use_mov_pic_label_ref (XEXP (XEXP (operands[1], 0), 0))))
2340 {
2341 if (mode == SImode)
2342 {
2343 emit_insn (gen_movsi_pic_label_ref (operands[0], operands[1]));
2344 return true;
2345 }
2346
2347 if (mode == DImode)
2348 {
2349 emit_insn (gen_movdi_pic_label_ref (operands[0], operands[1]));
2350 return true;
2351 }
2352 }
2353
2354 if (symbolic_operand (operands[1], mode))
2355 {
2356 operands[1]
2357 = sparc_legitimize_pic_address (operands[1],
2358 reload_in_progress
2359 ? operands[0] : NULL_RTX);
2360 return false;
2361 }
2362 }
2363
2364 /* If we are trying to toss an integer constant into FP registers,
2365 or loading a FP or vector constant, force it into memory. */
2366 if (CONSTANT_P (operands[1])
2367 && REG_P (operands[0])
2368 && (SPARC_FP_REG_P (REGNO (operands[0]))
2369 || SCALAR_FLOAT_MODE_P (mode)
2370 || VECTOR_MODE_P (mode)))
2371 {
2372 /* emit_group_store will send such bogosity to us when it is
2373 not storing directly into memory. So fix this up to avoid
2374 crashes in output_constant_pool. */
2375 if (operands [1] == const0_rtx)
2376 operands[1] = CONST0_RTX (mode);
2377
2378 /* We can clear or set to all-ones FP registers if TARGET_VIS, and
2379 always other regs. */
2380 if ((TARGET_VIS || REGNO (operands[0]) < SPARC_FIRST_FP_REG)
2381 && (const_zero_operand (operands[1], mode)
2382 || const_all_ones_operand (operands[1], mode)))
2383 return false;
2384
2385 if (REGNO (operands[0]) < SPARC_FIRST_FP_REG
2386 /* We are able to build any SF constant in integer registers
2387 with at most 2 instructions. */
2388 && (mode == SFmode
2389 /* And any DF constant in integer registers if needed. */
2390 || (mode == DFmode && !can_create_pseudo_p ())))
2391 return false;
2392
2393 operands[1] = force_const_mem (mode, operands[1]);
2394 if (!reload_in_progress)
2395 operands[1] = validize_mem (operands[1]);
2396 return false;
2397 }
2398
2399 /* Accept non-constants and valid constants unmodified. */
2400 if (!CONSTANT_P (operands[1])
2401 || GET_CODE (operands[1]) == HIGH
2402 || input_operand (operands[1], mode))
2403 return false;
2404
2405 switch (mode)
2406 {
2407 case E_QImode:
2408 /* All QImode constants require only one insn, so proceed. */
2409 break;
2410
2411 case E_HImode:
2412 case E_SImode:
2413 sparc_emit_set_const32 (operands[0], operands[1]);
2414 return true;
2415
2416 case E_DImode:
2417 /* input_operand should have filtered out 32-bit mode. */
2418 sparc_emit_set_const64 (operands[0], operands[1]);
2419 return true;
2420
2421 case E_TImode:
2422 {
2423 rtx high, low;
2424 /* TImode isn't available in 32-bit mode. */
2425 split_double (operands[1], &high, &low);
2426 emit_insn (gen_movdi (operand_subword (operands[0], 0, 0, TImode),
2427 high));
2428 emit_insn (gen_movdi (operand_subword (operands[0], 1, 0, TImode),
2429 low));
2430 }
2431 return true;
2432
2433 default:
2434 gcc_unreachable ();
2435 }
2436
2437 return false;
2438 }
2439
2440 /* Load OP1, a 32-bit constant, into OP0, a register.
2441 We know it can't be done in one insn when we get
2442 here, the move expander guarantees this. */
2443
2444 static void
sparc_emit_set_const32(rtx op0,rtx op1)2445 sparc_emit_set_const32 (rtx op0, rtx op1)
2446 {
2447 machine_mode mode = GET_MODE (op0);
2448 rtx temp = op0;
2449
2450 if (can_create_pseudo_p ())
2451 temp = gen_reg_rtx (mode);
2452
2453 if (GET_CODE (op1) == CONST_INT)
2454 {
2455 gcc_assert (!small_int_operand (op1, mode)
2456 && !const_high_operand (op1, mode));
2457
2458 /* Emit them as real moves instead of a HIGH/LO_SUM,
2459 this way CSE can see everything and reuse intermediate
2460 values if it wants. */
2461 emit_insn (gen_rtx_SET (temp, GEN_INT (INTVAL (op1)
2462 & ~(HOST_WIDE_INT) 0x3ff)));
2463
2464 emit_insn (gen_rtx_SET (op0,
2465 gen_rtx_IOR (mode, temp,
2466 GEN_INT (INTVAL (op1) & 0x3ff))));
2467 }
2468 else
2469 {
2470 /* A symbol, emit in the traditional way. */
2471 emit_insn (gen_rtx_SET (temp, gen_rtx_HIGH (mode, op1)));
2472 emit_insn (gen_rtx_SET (op0, gen_rtx_LO_SUM (mode, temp, op1)));
2473 }
2474 }
2475
2476 /* Load OP1, a symbolic 64-bit constant, into OP0, a DImode register.
2477 If TEMP is nonzero, we are forbidden to use any other scratch
2478 registers. Otherwise, we are allowed to generate them as needed.
2479
2480 Note that TEMP may have TImode if the code model is TARGET_CM_MEDANY
2481 or TARGET_CM_EMBMEDANY (see the reload_indi and reload_outdi patterns). */
2482
2483 void
sparc_emit_set_symbolic_const64(rtx op0,rtx op1,rtx temp)2484 sparc_emit_set_symbolic_const64 (rtx op0, rtx op1, rtx temp)
2485 {
2486 rtx cst, temp1, temp2, temp3, temp4, temp5;
2487 rtx ti_temp = 0;
2488
2489 /* Deal with too large offsets. */
2490 if (GET_CODE (op1) == CONST
2491 && GET_CODE (XEXP (op1, 0)) == PLUS
2492 && CONST_INT_P (cst = XEXP (XEXP (op1, 0), 1))
2493 && trunc_int_for_mode (INTVAL (cst), SImode) != INTVAL (cst))
2494 {
2495 gcc_assert (!temp);
2496 temp1 = gen_reg_rtx (DImode);
2497 temp2 = gen_reg_rtx (DImode);
2498 sparc_emit_set_const64 (temp2, cst);
2499 sparc_emit_set_symbolic_const64 (temp1, XEXP (XEXP (op1, 0), 0),
2500 NULL_RTX);
2501 emit_insn (gen_rtx_SET (op0, gen_rtx_PLUS (DImode, temp1, temp2)));
2502 return;
2503 }
2504
2505 if (temp && GET_MODE (temp) == TImode)
2506 {
2507 ti_temp = temp;
2508 temp = gen_rtx_REG (DImode, REGNO (temp));
2509 }
2510
2511 /* SPARC-V9 code model support. */
2512 switch (sparc_code_model)
2513 {
2514 case CM_MEDLOW:
2515 /* The range spanned by all instructions in the object is less
2516 than 2^31 bytes (2GB) and the distance from any instruction
2517 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
2518 than 2^31 bytes (2GB).
2519
2520 The executable must be in the low 4TB of the virtual address
2521 space.
2522
2523 sethi %hi(symbol), %temp1
2524 or %temp1, %lo(symbol), %reg */
2525 if (temp)
2526 temp1 = temp; /* op0 is allowed. */
2527 else
2528 temp1 = gen_reg_rtx (DImode);
2529
2530 emit_insn (gen_rtx_SET (temp1, gen_rtx_HIGH (DImode, op1)));
2531 emit_insn (gen_rtx_SET (op0, gen_rtx_LO_SUM (DImode, temp1, op1)));
2532 break;
2533
2534 case CM_MEDMID:
2535 /* The range spanned by all instructions in the object is less
2536 than 2^31 bytes (2GB) and the distance from any instruction
2537 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
2538 than 2^31 bytes (2GB).
2539
2540 The executable must be in the low 16TB of the virtual address
2541 space.
2542
2543 sethi %h44(symbol), %temp1
2544 or %temp1, %m44(symbol), %temp2
2545 sllx %temp2, 12, %temp3
2546 or %temp3, %l44(symbol), %reg */
2547 if (temp)
2548 {
2549 temp1 = op0;
2550 temp2 = op0;
2551 temp3 = temp; /* op0 is allowed. */
2552 }
2553 else
2554 {
2555 temp1 = gen_reg_rtx (DImode);
2556 temp2 = gen_reg_rtx (DImode);
2557 temp3 = gen_reg_rtx (DImode);
2558 }
2559
2560 emit_insn (gen_seth44 (temp1, op1));
2561 emit_insn (gen_setm44 (temp2, temp1, op1));
2562 emit_insn (gen_rtx_SET (temp3,
2563 gen_rtx_ASHIFT (DImode, temp2, GEN_INT (12))));
2564 emit_insn (gen_setl44 (op0, temp3, op1));
2565 break;
2566
2567 case CM_MEDANY:
2568 /* The range spanned by all instructions in the object is less
2569 than 2^31 bytes (2GB) and the distance from any instruction
2570 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
2571 than 2^31 bytes (2GB).
2572
2573 The executable can be placed anywhere in the virtual address
2574 space.
2575
2576 sethi %hh(symbol), %temp1
2577 sethi %lm(symbol), %temp2
2578 or %temp1, %hm(symbol), %temp3
2579 sllx %temp3, 32, %temp4
2580 or %temp4, %temp2, %temp5
2581 or %temp5, %lo(symbol), %reg */
2582 if (temp)
2583 {
2584 /* It is possible that one of the registers we got for operands[2]
2585 might coincide with that of operands[0] (which is why we made
2586 it TImode). Pick the other one to use as our scratch. */
2587 if (rtx_equal_p (temp, op0))
2588 {
2589 gcc_assert (ti_temp);
2590 temp = gen_rtx_REG (DImode, REGNO (temp) + 1);
2591 }
2592 temp1 = op0;
2593 temp2 = temp; /* op0 is _not_ allowed, see above. */
2594 temp3 = op0;
2595 temp4 = op0;
2596 temp5 = op0;
2597 }
2598 else
2599 {
2600 temp1 = gen_reg_rtx (DImode);
2601 temp2 = gen_reg_rtx (DImode);
2602 temp3 = gen_reg_rtx (DImode);
2603 temp4 = gen_reg_rtx (DImode);
2604 temp5 = gen_reg_rtx (DImode);
2605 }
2606
2607 emit_insn (gen_sethh (temp1, op1));
2608 emit_insn (gen_setlm (temp2, op1));
2609 emit_insn (gen_sethm (temp3, temp1, op1));
2610 emit_insn (gen_rtx_SET (temp4,
2611 gen_rtx_ASHIFT (DImode, temp3, GEN_INT (32))));
2612 emit_insn (gen_rtx_SET (temp5, gen_rtx_PLUS (DImode, temp4, temp2)));
2613 emit_insn (gen_setlo (op0, temp5, op1));
2614 break;
2615
2616 case CM_EMBMEDANY:
2617 /* Old old old backwards compatibility kruft here.
2618 Essentially it is MEDLOW with a fixed 64-bit
2619 virtual base added to all data segment addresses.
2620 Text-segment stuff is computed like MEDANY, we can't
2621 reuse the code above because the relocation knobs
2622 look different.
2623
2624 Data segment: sethi %hi(symbol), %temp1
2625 add %temp1, EMBMEDANY_BASE_REG, %temp2
2626 or %temp2, %lo(symbol), %reg */
2627 if (data_segment_operand (op1, GET_MODE (op1)))
2628 {
2629 if (temp)
2630 {
2631 temp1 = temp; /* op0 is allowed. */
2632 temp2 = op0;
2633 }
2634 else
2635 {
2636 temp1 = gen_reg_rtx (DImode);
2637 temp2 = gen_reg_rtx (DImode);
2638 }
2639
2640 emit_insn (gen_embmedany_sethi (temp1, op1));
2641 emit_insn (gen_embmedany_brsum (temp2, temp1));
2642 emit_insn (gen_embmedany_losum (op0, temp2, op1));
2643 }
2644
2645 /* Text segment: sethi %uhi(symbol), %temp1
2646 sethi %hi(symbol), %temp2
2647 or %temp1, %ulo(symbol), %temp3
2648 sllx %temp3, 32, %temp4
2649 or %temp4, %temp2, %temp5
2650 or %temp5, %lo(symbol), %reg */
2651 else
2652 {
2653 if (temp)
2654 {
2655 /* It is possible that one of the registers we got for operands[2]
2656 might coincide with that of operands[0] (which is why we made
2657 it TImode). Pick the other one to use as our scratch. */
2658 if (rtx_equal_p (temp, op0))
2659 {
2660 gcc_assert (ti_temp);
2661 temp = gen_rtx_REG (DImode, REGNO (temp) + 1);
2662 }
2663 temp1 = op0;
2664 temp2 = temp; /* op0 is _not_ allowed, see above. */
2665 temp3 = op0;
2666 temp4 = op0;
2667 temp5 = op0;
2668 }
2669 else
2670 {
2671 temp1 = gen_reg_rtx (DImode);
2672 temp2 = gen_reg_rtx (DImode);
2673 temp3 = gen_reg_rtx (DImode);
2674 temp4 = gen_reg_rtx (DImode);
2675 temp5 = gen_reg_rtx (DImode);
2676 }
2677
2678 emit_insn (gen_embmedany_textuhi (temp1, op1));
2679 emit_insn (gen_embmedany_texthi (temp2, op1));
2680 emit_insn (gen_embmedany_textulo (temp3, temp1, op1));
2681 emit_insn (gen_rtx_SET (temp4,
2682 gen_rtx_ASHIFT (DImode, temp3, GEN_INT (32))));
2683 emit_insn (gen_rtx_SET (temp5, gen_rtx_PLUS (DImode, temp4, temp2)));
2684 emit_insn (gen_embmedany_textlo (op0, temp5, op1));
2685 }
2686 break;
2687
2688 default:
2689 gcc_unreachable ();
2690 }
2691 }
2692
2693 /* These avoid problems when cross compiling. If we do not
2694 go through all this hair then the optimizer will see
2695 invalid REG_EQUAL notes or in some cases none at all. */
2696 static rtx gen_safe_HIGH64 (rtx, HOST_WIDE_INT);
2697 static rtx gen_safe_SET64 (rtx, HOST_WIDE_INT);
2698 static rtx gen_safe_OR64 (rtx, HOST_WIDE_INT);
2699 static rtx gen_safe_XOR64 (rtx, HOST_WIDE_INT);
2700
2701 /* The optimizer is not to assume anything about exactly
2702 which bits are set for a HIGH, they are unspecified.
2703 Unfortunately this leads to many missed optimizations
2704 during CSE. We mask out the non-HIGH bits, and matches
2705 a plain movdi, to alleviate this problem. */
2706 static rtx
gen_safe_HIGH64(rtx dest,HOST_WIDE_INT val)2707 gen_safe_HIGH64 (rtx dest, HOST_WIDE_INT val)
2708 {
2709 return gen_rtx_SET (dest, GEN_INT (val & ~(HOST_WIDE_INT)0x3ff));
2710 }
2711
2712 static rtx
gen_safe_SET64(rtx dest,HOST_WIDE_INT val)2713 gen_safe_SET64 (rtx dest, HOST_WIDE_INT val)
2714 {
2715 return gen_rtx_SET (dest, GEN_INT (val));
2716 }
2717
2718 static rtx
gen_safe_OR64(rtx src,HOST_WIDE_INT val)2719 gen_safe_OR64 (rtx src, HOST_WIDE_INT val)
2720 {
2721 return gen_rtx_IOR (DImode, src, GEN_INT (val));
2722 }
2723
2724 static rtx
gen_safe_XOR64(rtx src,HOST_WIDE_INT val)2725 gen_safe_XOR64 (rtx src, HOST_WIDE_INT val)
2726 {
2727 return gen_rtx_XOR (DImode, src, GEN_INT (val));
2728 }
2729
2730 /* Worker routines for 64-bit constant formation on arch64.
2731 One of the key things to be doing in these emissions is
2732 to create as many temp REGs as possible. This makes it
2733 possible for half-built constants to be used later when
2734 such values are similar to something required later on.
2735 Without doing this, the optimizer cannot see such
2736 opportunities. */
2737
2738 static void sparc_emit_set_const64_quick1 (rtx, rtx,
2739 unsigned HOST_WIDE_INT, int);
2740
2741 static void
sparc_emit_set_const64_quick1(rtx op0,rtx temp,unsigned HOST_WIDE_INT low_bits,int is_neg)2742 sparc_emit_set_const64_quick1 (rtx op0, rtx temp,
2743 unsigned HOST_WIDE_INT low_bits, int is_neg)
2744 {
2745 unsigned HOST_WIDE_INT high_bits;
2746
2747 if (is_neg)
2748 high_bits = (~low_bits) & 0xffffffff;
2749 else
2750 high_bits = low_bits;
2751
2752 emit_insn (gen_safe_HIGH64 (temp, high_bits));
2753 if (!is_neg)
2754 {
2755 emit_insn (gen_rtx_SET (op0, gen_safe_OR64 (temp, (high_bits & 0x3ff))));
2756 }
2757 else
2758 {
2759 /* If we are XOR'ing with -1, then we should emit a one's complement
2760 instead. This way the combiner will notice logical operations
2761 such as ANDN later on and substitute. */
2762 if ((low_bits & 0x3ff) == 0x3ff)
2763 {
2764 emit_insn (gen_rtx_SET (op0, gen_rtx_NOT (DImode, temp)));
2765 }
2766 else
2767 {
2768 emit_insn (gen_rtx_SET (op0,
2769 gen_safe_XOR64 (temp,
2770 (-(HOST_WIDE_INT)0x400
2771 | (low_bits & 0x3ff)))));
2772 }
2773 }
2774 }
2775
2776 static void sparc_emit_set_const64_quick2 (rtx, rtx, unsigned HOST_WIDE_INT,
2777 unsigned HOST_WIDE_INT, int);
2778
2779 static void
sparc_emit_set_const64_quick2(rtx op0,rtx temp,unsigned HOST_WIDE_INT high_bits,unsigned HOST_WIDE_INT low_immediate,int shift_count)2780 sparc_emit_set_const64_quick2 (rtx op0, rtx temp,
2781 unsigned HOST_WIDE_INT high_bits,
2782 unsigned HOST_WIDE_INT low_immediate,
2783 int shift_count)
2784 {
2785 rtx temp2 = op0;
2786
2787 if ((high_bits & 0xfffffc00) != 0)
2788 {
2789 emit_insn (gen_safe_HIGH64 (temp, high_bits));
2790 if ((high_bits & ~0xfffffc00) != 0)
2791 emit_insn (gen_rtx_SET (op0,
2792 gen_safe_OR64 (temp, (high_bits & 0x3ff))));
2793 else
2794 temp2 = temp;
2795 }
2796 else
2797 {
2798 emit_insn (gen_safe_SET64 (temp, high_bits));
2799 temp2 = temp;
2800 }
2801
2802 /* Now shift it up into place. */
2803 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, temp2,
2804 GEN_INT (shift_count))));
2805
2806 /* If there is a low immediate part piece, finish up by
2807 putting that in as well. */
2808 if (low_immediate != 0)
2809 emit_insn (gen_rtx_SET (op0, gen_safe_OR64 (op0, low_immediate)));
2810 }
2811
2812 static void sparc_emit_set_const64_longway (rtx, rtx, unsigned HOST_WIDE_INT,
2813 unsigned HOST_WIDE_INT);
2814
2815 /* Full 64-bit constant decomposition. Even though this is the
2816 'worst' case, we still optimize a few things away. */
2817 static void
sparc_emit_set_const64_longway(rtx op0,rtx temp,unsigned HOST_WIDE_INT high_bits,unsigned HOST_WIDE_INT low_bits)2818 sparc_emit_set_const64_longway (rtx op0, rtx temp,
2819 unsigned HOST_WIDE_INT high_bits,
2820 unsigned HOST_WIDE_INT low_bits)
2821 {
2822 rtx sub_temp = op0;
2823
2824 if (can_create_pseudo_p ())
2825 sub_temp = gen_reg_rtx (DImode);
2826
2827 if ((high_bits & 0xfffffc00) != 0)
2828 {
2829 emit_insn (gen_safe_HIGH64 (temp, high_bits));
2830 if ((high_bits & ~0xfffffc00) != 0)
2831 emit_insn (gen_rtx_SET (sub_temp,
2832 gen_safe_OR64 (temp, (high_bits & 0x3ff))));
2833 else
2834 sub_temp = temp;
2835 }
2836 else
2837 {
2838 emit_insn (gen_safe_SET64 (temp, high_bits));
2839 sub_temp = temp;
2840 }
2841
2842 if (can_create_pseudo_p ())
2843 {
2844 rtx temp2 = gen_reg_rtx (DImode);
2845 rtx temp3 = gen_reg_rtx (DImode);
2846 rtx temp4 = gen_reg_rtx (DImode);
2847
2848 emit_insn (gen_rtx_SET (temp4, gen_rtx_ASHIFT (DImode, sub_temp,
2849 GEN_INT (32))));
2850
2851 emit_insn (gen_safe_HIGH64 (temp2, low_bits));
2852 if ((low_bits & ~0xfffffc00) != 0)
2853 {
2854 emit_insn (gen_rtx_SET (temp3,
2855 gen_safe_OR64 (temp2, (low_bits & 0x3ff))));
2856 emit_insn (gen_rtx_SET (op0, gen_rtx_PLUS (DImode, temp4, temp3)));
2857 }
2858 else
2859 {
2860 emit_insn (gen_rtx_SET (op0, gen_rtx_PLUS (DImode, temp4, temp2)));
2861 }
2862 }
2863 else
2864 {
2865 rtx low1 = GEN_INT ((low_bits >> (32 - 12)) & 0xfff);
2866 rtx low2 = GEN_INT ((low_bits >> (32 - 12 - 12)) & 0xfff);
2867 rtx low3 = GEN_INT ((low_bits >> (32 - 12 - 12 - 8)) & 0x0ff);
2868 int to_shift = 12;
2869
2870 /* We are in the middle of reload, so this is really
2871 painful. However we do still make an attempt to
2872 avoid emitting truly stupid code. */
2873 if (low1 != const0_rtx)
2874 {
2875 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, sub_temp,
2876 GEN_INT (to_shift))));
2877 emit_insn (gen_rtx_SET (op0, gen_rtx_IOR (DImode, op0, low1)));
2878 sub_temp = op0;
2879 to_shift = 12;
2880 }
2881 else
2882 {
2883 to_shift += 12;
2884 }
2885 if (low2 != const0_rtx)
2886 {
2887 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, sub_temp,
2888 GEN_INT (to_shift))));
2889 emit_insn (gen_rtx_SET (op0, gen_rtx_IOR (DImode, op0, low2)));
2890 sub_temp = op0;
2891 to_shift = 8;
2892 }
2893 else
2894 {
2895 to_shift += 8;
2896 }
2897 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, sub_temp,
2898 GEN_INT (to_shift))));
2899 if (low3 != const0_rtx)
2900 emit_insn (gen_rtx_SET (op0, gen_rtx_IOR (DImode, op0, low3)));
2901 /* phew... */
2902 }
2903 }
2904
2905 /* Analyze a 64-bit constant for certain properties. */
2906 static void analyze_64bit_constant (unsigned HOST_WIDE_INT,
2907 unsigned HOST_WIDE_INT,
2908 int *, int *, int *);
2909
2910 static void
analyze_64bit_constant(unsigned HOST_WIDE_INT high_bits,unsigned HOST_WIDE_INT low_bits,int * hbsp,int * lbsp,int * abbasp)2911 analyze_64bit_constant (unsigned HOST_WIDE_INT high_bits,
2912 unsigned HOST_WIDE_INT low_bits,
2913 int *hbsp, int *lbsp, int *abbasp)
2914 {
2915 int lowest_bit_set, highest_bit_set, all_bits_between_are_set;
2916 int i;
2917
2918 lowest_bit_set = highest_bit_set = -1;
2919 i = 0;
2920 do
2921 {
2922 if ((lowest_bit_set == -1)
2923 && ((low_bits >> i) & 1))
2924 lowest_bit_set = i;
2925 if ((highest_bit_set == -1)
2926 && ((high_bits >> (32 - i - 1)) & 1))
2927 highest_bit_set = (64 - i - 1);
2928 }
2929 while (++i < 32
2930 && ((highest_bit_set == -1)
2931 || (lowest_bit_set == -1)));
2932 if (i == 32)
2933 {
2934 i = 0;
2935 do
2936 {
2937 if ((lowest_bit_set == -1)
2938 && ((high_bits >> i) & 1))
2939 lowest_bit_set = i + 32;
2940 if ((highest_bit_set == -1)
2941 && ((low_bits >> (32 - i - 1)) & 1))
2942 highest_bit_set = 32 - i - 1;
2943 }
2944 while (++i < 32
2945 && ((highest_bit_set == -1)
2946 || (lowest_bit_set == -1)));
2947 }
2948 /* If there are no bits set this should have gone out
2949 as one instruction! */
2950 gcc_assert (lowest_bit_set != -1 && highest_bit_set != -1);
2951 all_bits_between_are_set = 1;
2952 for (i = lowest_bit_set; i <= highest_bit_set; i++)
2953 {
2954 if (i < 32)
2955 {
2956 if ((low_bits & (1 << i)) != 0)
2957 continue;
2958 }
2959 else
2960 {
2961 if ((high_bits & (1 << (i - 32))) != 0)
2962 continue;
2963 }
2964 all_bits_between_are_set = 0;
2965 break;
2966 }
2967 *hbsp = highest_bit_set;
2968 *lbsp = lowest_bit_set;
2969 *abbasp = all_bits_between_are_set;
2970 }
2971
2972 static int const64_is_2insns (unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT);
2973
2974 static int
const64_is_2insns(unsigned HOST_WIDE_INT high_bits,unsigned HOST_WIDE_INT low_bits)2975 const64_is_2insns (unsigned HOST_WIDE_INT high_bits,
2976 unsigned HOST_WIDE_INT low_bits)
2977 {
2978 int highest_bit_set, lowest_bit_set, all_bits_between_are_set;
2979
2980 if (high_bits == 0
2981 || high_bits == 0xffffffff)
2982 return 1;
2983
2984 analyze_64bit_constant (high_bits, low_bits,
2985 &highest_bit_set, &lowest_bit_set,
2986 &all_bits_between_are_set);
2987
2988 if ((highest_bit_set == 63
2989 || lowest_bit_set == 0)
2990 && all_bits_between_are_set != 0)
2991 return 1;
2992
2993 if ((highest_bit_set - lowest_bit_set) < 21)
2994 return 1;
2995
2996 return 0;
2997 }
2998
2999 static unsigned HOST_WIDE_INT create_simple_focus_bits (unsigned HOST_WIDE_INT,
3000 unsigned HOST_WIDE_INT,
3001 int, int);
3002
3003 static unsigned HOST_WIDE_INT
create_simple_focus_bits(unsigned HOST_WIDE_INT high_bits,unsigned HOST_WIDE_INT low_bits,int lowest_bit_set,int shift)3004 create_simple_focus_bits (unsigned HOST_WIDE_INT high_bits,
3005 unsigned HOST_WIDE_INT low_bits,
3006 int lowest_bit_set, int shift)
3007 {
3008 HOST_WIDE_INT hi, lo;
3009
3010 if (lowest_bit_set < 32)
3011 {
3012 lo = (low_bits >> lowest_bit_set) << shift;
3013 hi = ((high_bits << (32 - lowest_bit_set)) << shift);
3014 }
3015 else
3016 {
3017 lo = 0;
3018 hi = ((high_bits >> (lowest_bit_set - 32)) << shift);
3019 }
3020 gcc_assert (! (hi & lo));
3021 return (hi | lo);
3022 }
3023
3024 /* Here we are sure to be arch64 and this is an integer constant
3025 being loaded into a register. Emit the most efficient
3026 insn sequence possible. Detection of all the 1-insn cases
3027 has been done already. */
3028 static void
sparc_emit_set_const64(rtx op0,rtx op1)3029 sparc_emit_set_const64 (rtx op0, rtx op1)
3030 {
3031 unsigned HOST_WIDE_INT high_bits, low_bits;
3032 int lowest_bit_set, highest_bit_set;
3033 int all_bits_between_are_set;
3034 rtx temp = 0;
3035
3036 /* Sanity check that we know what we are working with. */
3037 gcc_assert (TARGET_ARCH64
3038 && (GET_CODE (op0) == SUBREG
3039 || (REG_P (op0) && ! SPARC_FP_REG_P (REGNO (op0)))));
3040
3041 if (! can_create_pseudo_p ())
3042 temp = op0;
3043
3044 if (GET_CODE (op1) != CONST_INT)
3045 {
3046 sparc_emit_set_symbolic_const64 (op0, op1, temp);
3047 return;
3048 }
3049
3050 if (! temp)
3051 temp = gen_reg_rtx (DImode);
3052
3053 high_bits = ((INTVAL (op1) >> 32) & 0xffffffff);
3054 low_bits = (INTVAL (op1) & 0xffffffff);
3055
3056 /* low_bits bits 0 --> 31
3057 high_bits bits 32 --> 63 */
3058
3059 analyze_64bit_constant (high_bits, low_bits,
3060 &highest_bit_set, &lowest_bit_set,
3061 &all_bits_between_are_set);
3062
3063 /* First try for a 2-insn sequence. */
3064
3065 /* These situations are preferred because the optimizer can
3066 * do more things with them:
3067 * 1) mov -1, %reg
3068 * sllx %reg, shift, %reg
3069 * 2) mov -1, %reg
3070 * srlx %reg, shift, %reg
3071 * 3) mov some_small_const, %reg
3072 * sllx %reg, shift, %reg
3073 */
3074 if (((highest_bit_set == 63
3075 || lowest_bit_set == 0)
3076 && all_bits_between_are_set != 0)
3077 || ((highest_bit_set - lowest_bit_set) < 12))
3078 {
3079 HOST_WIDE_INT the_const = -1;
3080 int shift = lowest_bit_set;
3081
3082 if ((highest_bit_set != 63
3083 && lowest_bit_set != 0)
3084 || all_bits_between_are_set == 0)
3085 {
3086 the_const =
3087 create_simple_focus_bits (high_bits, low_bits,
3088 lowest_bit_set, 0);
3089 }
3090 else if (lowest_bit_set == 0)
3091 shift = -(63 - highest_bit_set);
3092
3093 gcc_assert (SPARC_SIMM13_P (the_const));
3094 gcc_assert (shift != 0);
3095
3096 emit_insn (gen_safe_SET64 (temp, the_const));
3097 if (shift > 0)
3098 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, temp,
3099 GEN_INT (shift))));
3100 else if (shift < 0)
3101 emit_insn (gen_rtx_SET (op0, gen_rtx_LSHIFTRT (DImode, temp,
3102 GEN_INT (-shift))));
3103 return;
3104 }
3105
3106 /* Now a range of 22 or less bits set somewhere.
3107 * 1) sethi %hi(focus_bits), %reg
3108 * sllx %reg, shift, %reg
3109 * 2) sethi %hi(focus_bits), %reg
3110 * srlx %reg, shift, %reg
3111 */
3112 if ((highest_bit_set - lowest_bit_set) < 21)
3113 {
3114 unsigned HOST_WIDE_INT focus_bits =
3115 create_simple_focus_bits (high_bits, low_bits,
3116 lowest_bit_set, 10);
3117
3118 gcc_assert (SPARC_SETHI_P (focus_bits));
3119 gcc_assert (lowest_bit_set != 10);
3120
3121 emit_insn (gen_safe_HIGH64 (temp, focus_bits));
3122
3123 /* If lowest_bit_set == 10 then a sethi alone could have done it. */
3124 if (lowest_bit_set < 10)
3125 emit_insn (gen_rtx_SET (op0,
3126 gen_rtx_LSHIFTRT (DImode, temp,
3127 GEN_INT (10 - lowest_bit_set))));
3128 else if (lowest_bit_set > 10)
3129 emit_insn (gen_rtx_SET (op0,
3130 gen_rtx_ASHIFT (DImode, temp,
3131 GEN_INT (lowest_bit_set - 10))));
3132 return;
3133 }
3134
3135 /* 1) sethi %hi(low_bits), %reg
3136 * or %reg, %lo(low_bits), %reg
3137 * 2) sethi %hi(~low_bits), %reg
3138 * xor %reg, %lo(-0x400 | (low_bits & 0x3ff)), %reg
3139 */
3140 if (high_bits == 0
3141 || high_bits == 0xffffffff)
3142 {
3143 sparc_emit_set_const64_quick1 (op0, temp, low_bits,
3144 (high_bits == 0xffffffff));
3145 return;
3146 }
3147
3148 /* Now, try 3-insn sequences. */
3149
3150 /* 1) sethi %hi(high_bits), %reg
3151 * or %reg, %lo(high_bits), %reg
3152 * sllx %reg, 32, %reg
3153 */
3154 if (low_bits == 0)
3155 {
3156 sparc_emit_set_const64_quick2 (op0, temp, high_bits, 0, 32);
3157 return;
3158 }
3159
3160 /* We may be able to do something quick
3161 when the constant is negated, so try that. */
3162 if (const64_is_2insns ((~high_bits) & 0xffffffff,
3163 (~low_bits) & 0xfffffc00))
3164 {
3165 /* NOTE: The trailing bits get XOR'd so we need the
3166 non-negated bits, not the negated ones. */
3167 unsigned HOST_WIDE_INT trailing_bits = low_bits & 0x3ff;
3168
3169 if ((((~high_bits) & 0xffffffff) == 0
3170 && ((~low_bits) & 0x80000000) == 0)
3171 || (((~high_bits) & 0xffffffff) == 0xffffffff
3172 && ((~low_bits) & 0x80000000) != 0))
3173 {
3174 unsigned HOST_WIDE_INT fast_int = (~low_bits & 0xffffffff);
3175
3176 if ((SPARC_SETHI_P (fast_int)
3177 && (~high_bits & 0xffffffff) == 0)
3178 || SPARC_SIMM13_P (fast_int))
3179 emit_insn (gen_safe_SET64 (temp, fast_int));
3180 else
3181 sparc_emit_set_const64 (temp, GEN_INT (fast_int));
3182 }
3183 else
3184 {
3185 rtx negated_const;
3186 negated_const = GEN_INT (((~low_bits) & 0xfffffc00) |
3187 (((HOST_WIDE_INT)((~high_bits) & 0xffffffff))<<32));
3188 sparc_emit_set_const64 (temp, negated_const);
3189 }
3190
3191 /* If we are XOR'ing with -1, then we should emit a one's complement
3192 instead. This way the combiner will notice logical operations
3193 such as ANDN later on and substitute. */
3194 if (trailing_bits == 0x3ff)
3195 {
3196 emit_insn (gen_rtx_SET (op0, gen_rtx_NOT (DImode, temp)));
3197 }
3198 else
3199 {
3200 emit_insn (gen_rtx_SET (op0,
3201 gen_safe_XOR64 (temp,
3202 (-0x400 | trailing_bits))));
3203 }
3204 return;
3205 }
3206
3207 /* 1) sethi %hi(xxx), %reg
3208 * or %reg, %lo(xxx), %reg
3209 * sllx %reg, yyy, %reg
3210 *
3211 * ??? This is just a generalized version of the low_bits==0
3212 * thing above, FIXME...
3213 */
3214 if ((highest_bit_set - lowest_bit_set) < 32)
3215 {
3216 unsigned HOST_WIDE_INT focus_bits =
3217 create_simple_focus_bits (high_bits, low_bits,
3218 lowest_bit_set, 0);
3219
3220 /* We can't get here in this state. */
3221 gcc_assert (highest_bit_set >= 32 && lowest_bit_set < 32);
3222
3223 /* So what we know is that the set bits straddle the
3224 middle of the 64-bit word. */
3225 sparc_emit_set_const64_quick2 (op0, temp,
3226 focus_bits, 0,
3227 lowest_bit_set);
3228 return;
3229 }
3230
3231 /* 1) sethi %hi(high_bits), %reg
3232 * or %reg, %lo(high_bits), %reg
3233 * sllx %reg, 32, %reg
3234 * or %reg, low_bits, %reg
3235 */
3236 if (SPARC_SIMM13_P (low_bits) && ((int)low_bits > 0))
3237 {
3238 sparc_emit_set_const64_quick2 (op0, temp, high_bits, low_bits, 32);
3239 return;
3240 }
3241
3242 /* The easiest way when all else fails, is full decomposition. */
3243 sparc_emit_set_const64_longway (op0, temp, high_bits, low_bits);
3244 }
3245
3246 /* Implement TARGET_FIXED_CONDITION_CODE_REGS. */
3247
3248 static bool
sparc_fixed_condition_code_regs(unsigned int * p1,unsigned int * p2)3249 sparc_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
3250 {
3251 *p1 = SPARC_ICC_REG;
3252 *p2 = SPARC_FCC_REG;
3253 return true;
3254 }
3255
3256 /* Implement TARGET_MIN_ARITHMETIC_PRECISION. */
3257
3258 static unsigned int
sparc_min_arithmetic_precision(void)3259 sparc_min_arithmetic_precision (void)
3260 {
3261 return 32;
3262 }
3263
3264 /* Given a comparison code (EQ, NE, etc.) and the first operand of a COMPARE,
3265 return the mode to be used for the comparison. For floating-point,
3266 CCFP[E]mode is used. CCNZmode should be used when the first operand
3267 is a PLUS, MINUS, NEG, or ASHIFT. CCmode should be used when no special
3268 processing is needed. */
3269
3270 machine_mode
select_cc_mode(enum rtx_code op,rtx x,rtx y)3271 select_cc_mode (enum rtx_code op, rtx x, rtx y)
3272 {
3273 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
3274 {
3275 switch (op)
3276 {
3277 case EQ:
3278 case NE:
3279 case UNORDERED:
3280 case ORDERED:
3281 case UNLT:
3282 case UNLE:
3283 case UNGT:
3284 case UNGE:
3285 case UNEQ:
3286 return CCFPmode;
3287
3288 case LT:
3289 case LE:
3290 case GT:
3291 case GE:
3292 case LTGT:
3293 return CCFPEmode;
3294
3295 default:
3296 gcc_unreachable ();
3297 }
3298 }
3299 else if ((GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
3300 || GET_CODE (x) == NEG || GET_CODE (x) == ASHIFT)
3301 && y == const0_rtx)
3302 {
3303 if (TARGET_ARCH64 && GET_MODE (x) == DImode)
3304 return CCXNZmode;
3305 else
3306 return CCNZmode;
3307 }
3308 else
3309 {
3310 /* This is for the cmp<mode>_sne pattern. */
3311 if (GET_CODE (x) == NOT && y == constm1_rtx)
3312 {
3313 if (TARGET_ARCH64 && GET_MODE (x) == DImode)
3314 return CCXCmode;
3315 else
3316 return CCCmode;
3317 }
3318
3319 /* This is for the [u]addvdi4_sp32 and [u]subvdi4_sp32 patterns. */
3320 if (!TARGET_ARCH64 && GET_MODE (x) == DImode)
3321 {
3322 if (GET_CODE (y) == UNSPEC
3323 && (XINT (y, 1) == UNSPEC_ADDV
3324 || XINT (y, 1) == UNSPEC_SUBV
3325 || XINT (y, 1) == UNSPEC_NEGV))
3326 return CCVmode;
3327 else
3328 return CCCmode;
3329 }
3330
3331 if (TARGET_ARCH64 && GET_MODE (x) == DImode)
3332 return CCXmode;
3333 else
3334 return CCmode;
3335 }
3336 }
3337
3338 /* Emit the compare insn and return the CC reg for a CODE comparison
3339 with operands X and Y. */
3340
3341 static rtx
gen_compare_reg_1(enum rtx_code code,rtx x,rtx y)3342 gen_compare_reg_1 (enum rtx_code code, rtx x, rtx y)
3343 {
3344 machine_mode mode;
3345 rtx cc_reg;
3346
3347 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
3348 return x;
3349
3350 mode = SELECT_CC_MODE (code, x, y);
3351
3352 /* ??? We don't have movcc patterns so we cannot generate pseudo regs for the
3353 fcc regs (cse can't tell they're really call clobbered regs and will
3354 remove a duplicate comparison even if there is an intervening function
3355 call - it will then try to reload the cc reg via an int reg which is why
3356 we need the movcc patterns). It is possible to provide the movcc
3357 patterns by using the ldxfsr/stxfsr v9 insns. I tried it: you need two
3358 registers (say %g1,%g5) and it takes about 6 insns. A better fix would be
3359 to tell cse that CCFPE mode registers (even pseudos) are call
3360 clobbered. */
3361
3362 /* ??? This is an experiment. Rather than making changes to cse which may
3363 or may not be easy/clean, we do our own cse. This is possible because
3364 we will generate hard registers. Cse knows they're call clobbered (it
3365 doesn't know the same thing about pseudos). If we guess wrong, no big
3366 deal, but if we win, great! */
3367
3368 if (TARGET_V9 && GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
3369 #if 1 /* experiment */
3370 {
3371 int reg;
3372 /* We cycle through the registers to ensure they're all exercised. */
3373 static int next_fcc_reg = 0;
3374 /* Previous x,y for each fcc reg. */
3375 static rtx prev_args[4][2];
3376
3377 /* Scan prev_args for x,y. */
3378 for (reg = 0; reg < 4; reg++)
3379 if (prev_args[reg][0] == x && prev_args[reg][1] == y)
3380 break;
3381 if (reg == 4)
3382 {
3383 reg = next_fcc_reg;
3384 prev_args[reg][0] = x;
3385 prev_args[reg][1] = y;
3386 next_fcc_reg = (next_fcc_reg + 1) & 3;
3387 }
3388 cc_reg = gen_rtx_REG (mode, reg + SPARC_FIRST_V9_FCC_REG);
3389 }
3390 #else
3391 cc_reg = gen_reg_rtx (mode);
3392 #endif /* ! experiment */
3393 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
3394 cc_reg = gen_rtx_REG (mode, SPARC_FCC_REG);
3395 else
3396 cc_reg = gen_rtx_REG (mode, SPARC_ICC_REG);
3397
3398 /* We shouldn't get there for TFmode if !TARGET_HARD_QUAD. If we do, this
3399 will only result in an unrecognizable insn so no point in asserting. */
3400 emit_insn (gen_rtx_SET (cc_reg, gen_rtx_COMPARE (mode, x, y)));
3401
3402 return cc_reg;
3403 }
3404
3405
3406 /* Emit the compare insn and return the CC reg for the comparison in CMP. */
3407
3408 rtx
gen_compare_reg(rtx cmp)3409 gen_compare_reg (rtx cmp)
3410 {
3411 return gen_compare_reg_1 (GET_CODE (cmp), XEXP (cmp, 0), XEXP (cmp, 1));
3412 }
3413
3414 /* This function is used for v9 only.
3415 DEST is the target of the Scc insn.
3416 CODE is the code for an Scc's comparison.
3417 X and Y are the values we compare.
3418
3419 This function is needed to turn
3420
3421 (set (reg:SI 110)
3422 (gt (reg:CCX 100 %icc)
3423 (const_int 0)))
3424 into
3425 (set (reg:SI 110)
3426 (gt:DI (reg:CCX 100 %icc)
3427 (const_int 0)))
3428
3429 IE: The instruction recognizer needs to see the mode of the comparison to
3430 find the right instruction. We could use "gt:DI" right in the
3431 define_expand, but leaving it out allows us to handle DI, SI, etc. */
3432
3433 static int
gen_v9_scc(rtx dest,enum rtx_code compare_code,rtx x,rtx y)3434 gen_v9_scc (rtx dest, enum rtx_code compare_code, rtx x, rtx y)
3435 {
3436 if (! TARGET_ARCH64
3437 && (GET_MODE (x) == DImode
3438 || GET_MODE (dest) == DImode))
3439 return 0;
3440
3441 /* Try to use the movrCC insns. */
3442 if (TARGET_ARCH64
3443 && GET_MODE_CLASS (GET_MODE (x)) == MODE_INT
3444 && y == const0_rtx
3445 && v9_regcmp_p (compare_code))
3446 {
3447 rtx op0 = x;
3448 rtx temp;
3449
3450 /* Special case for op0 != 0. This can be done with one instruction if
3451 dest == x. */
3452
3453 if (compare_code == NE
3454 && GET_MODE (dest) == DImode
3455 && rtx_equal_p (op0, dest))
3456 {
3457 emit_insn (gen_rtx_SET (dest,
3458 gen_rtx_IF_THEN_ELSE (DImode,
3459 gen_rtx_fmt_ee (compare_code, DImode,
3460 op0, const0_rtx),
3461 const1_rtx,
3462 dest)));
3463 return 1;
3464 }
3465
3466 if (reg_overlap_mentioned_p (dest, op0))
3467 {
3468 /* Handle the case where dest == x.
3469 We "early clobber" the result. */
3470 op0 = gen_reg_rtx (GET_MODE (x));
3471 emit_move_insn (op0, x);
3472 }
3473
3474 emit_insn (gen_rtx_SET (dest, const0_rtx));
3475 if (GET_MODE (op0) != DImode)
3476 {
3477 temp = gen_reg_rtx (DImode);
3478 convert_move (temp, op0, 0);
3479 }
3480 else
3481 temp = op0;
3482 emit_insn (gen_rtx_SET (dest,
3483 gen_rtx_IF_THEN_ELSE (GET_MODE (dest),
3484 gen_rtx_fmt_ee (compare_code, DImode,
3485 temp, const0_rtx),
3486 const1_rtx,
3487 dest)));
3488 return 1;
3489 }
3490 else
3491 {
3492 x = gen_compare_reg_1 (compare_code, x, y);
3493 y = const0_rtx;
3494
3495 emit_insn (gen_rtx_SET (dest, const0_rtx));
3496 emit_insn (gen_rtx_SET (dest,
3497 gen_rtx_IF_THEN_ELSE (GET_MODE (dest),
3498 gen_rtx_fmt_ee (compare_code,
3499 GET_MODE (x), x, y),
3500 const1_rtx, dest)));
3501 return 1;
3502 }
3503 }
3504
3505
3506 /* Emit an scc insn. For seq, sne, sgeu, and sltu, we can do this
3507 without jumps using the addx/subx instructions. */
3508
3509 bool
emit_scc_insn(rtx operands[])3510 emit_scc_insn (rtx operands[])
3511 {
3512 rtx tem, x, y;
3513 enum rtx_code code;
3514 machine_mode mode;
3515
3516 /* The quad-word fp compare library routines all return nonzero to indicate
3517 true, which is different from the equivalent libgcc routines, so we must
3518 handle them specially here. */
3519 if (GET_MODE (operands[2]) == TFmode && ! TARGET_HARD_QUAD)
3520 {
3521 operands[1] = sparc_emit_float_lib_cmp (operands[2], operands[3],
3522 GET_CODE (operands[1]));
3523 operands[2] = XEXP (operands[1], 0);
3524 operands[3] = XEXP (operands[1], 1);
3525 }
3526
3527 code = GET_CODE (operands[1]);
3528 x = operands[2];
3529 y = operands[3];
3530 mode = GET_MODE (x);
3531
3532 /* For seq/sne on v9 we use the same code as v8 (the addx/subx method has
3533 more applications). The exception to this is "reg != 0" which can
3534 be done in one instruction on v9 (so we do it). */
3535 if ((code == EQ || code == NE) && (mode == SImode || mode == DImode))
3536 {
3537 if (y != const0_rtx)
3538 x = force_reg (mode, gen_rtx_XOR (mode, x, y));
3539
3540 rtx pat = gen_rtx_SET (operands[0],
3541 gen_rtx_fmt_ee (code, GET_MODE (operands[0]),
3542 x, const0_rtx));
3543
3544 /* If we can use addx/subx or addxc, add a clobber for CC. */
3545 if (mode == SImode || (code == NE && TARGET_VIS3))
3546 {
3547 rtx clobber
3548 = gen_rtx_CLOBBER (VOIDmode,
3549 gen_rtx_REG (mode == SImode ? CCmode : CCXmode,
3550 SPARC_ICC_REG));
3551 pat = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, pat, clobber));
3552 }
3553
3554 emit_insn (pat);
3555 return true;
3556 }
3557
3558 /* We can do LTU in DImode using the addxc instruction with VIS3. */
3559 if (TARGET_ARCH64
3560 && mode == DImode
3561 && !((code == LTU || code == GTU) && TARGET_VIS3)
3562 && gen_v9_scc (operands[0], code, x, y))
3563 return true;
3564
3565 /* We can do LTU and GEU using the addx/subx instructions too. And
3566 for GTU/LEU, if both operands are registers swap them and fall
3567 back to the easy case. */
3568 if (code == GTU || code == LEU)
3569 {
3570 if ((GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
3571 && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG))
3572 {
3573 tem = x;
3574 x = y;
3575 y = tem;
3576 code = swap_condition (code);
3577 }
3578 }
3579
3580 if (code == LTU || code == GEU)
3581 {
3582 emit_insn (gen_rtx_SET (operands[0],
3583 gen_rtx_fmt_ee (code, GET_MODE (operands[0]),
3584 gen_compare_reg_1 (code, x, y),
3585 const0_rtx)));
3586 return true;
3587 }
3588
3589 /* All the posibilities to use addx/subx based sequences has been
3590 exhausted, try for a 3 instruction sequence using v9 conditional
3591 moves. */
3592 if (TARGET_V9 && gen_v9_scc (operands[0], code, x, y))
3593 return true;
3594
3595 /* Nope, do branches. */
3596 return false;
3597 }
3598
3599 /* Emit a conditional jump insn for the v9 architecture using comparison code
3600 CODE and jump target LABEL.
3601 This function exists to take advantage of the v9 brxx insns. */
3602
3603 static void
emit_v9_brxx_insn(enum rtx_code code,rtx op0,rtx label)3604 emit_v9_brxx_insn (enum rtx_code code, rtx op0, rtx label)
3605 {
3606 emit_jump_insn (gen_rtx_SET (pc_rtx,
3607 gen_rtx_IF_THEN_ELSE (VOIDmode,
3608 gen_rtx_fmt_ee (code, GET_MODE (op0),
3609 op0, const0_rtx),
3610 gen_rtx_LABEL_REF (VOIDmode, label),
3611 pc_rtx)));
3612 }
3613
3614 /* Emit a conditional jump insn for the UA2011 architecture using
3615 comparison code CODE and jump target LABEL. This function exists
3616 to take advantage of the UA2011 Compare and Branch insns. */
3617
3618 static void
emit_cbcond_insn(enum rtx_code code,rtx op0,rtx op1,rtx label)3619 emit_cbcond_insn (enum rtx_code code, rtx op0, rtx op1, rtx label)
3620 {
3621 rtx if_then_else;
3622
3623 if_then_else = gen_rtx_IF_THEN_ELSE (VOIDmode,
3624 gen_rtx_fmt_ee(code, GET_MODE(op0),
3625 op0, op1),
3626 gen_rtx_LABEL_REF (VOIDmode, label),
3627 pc_rtx);
3628
3629 emit_jump_insn (gen_rtx_SET (pc_rtx, if_then_else));
3630 }
3631
3632 void
emit_conditional_branch_insn(rtx operands[])3633 emit_conditional_branch_insn (rtx operands[])
3634 {
3635 /* The quad-word fp compare library routines all return nonzero to indicate
3636 true, which is different from the equivalent libgcc routines, so we must
3637 handle them specially here. */
3638 if (GET_MODE (operands[1]) == TFmode && ! TARGET_HARD_QUAD)
3639 {
3640 operands[0] = sparc_emit_float_lib_cmp (operands[1], operands[2],
3641 GET_CODE (operands[0]));
3642 operands[1] = XEXP (operands[0], 0);
3643 operands[2] = XEXP (operands[0], 1);
3644 }
3645
3646 /* If we can tell early on that the comparison is against a constant
3647 that won't fit in the 5-bit signed immediate field of a cbcond,
3648 use one of the other v9 conditional branch sequences. */
3649 if (TARGET_CBCOND
3650 && GET_CODE (operands[1]) == REG
3651 && (GET_MODE (operands[1]) == SImode
3652 || (TARGET_ARCH64 && GET_MODE (operands[1]) == DImode))
3653 && (GET_CODE (operands[2]) != CONST_INT
3654 || SPARC_SIMM5_P (INTVAL (operands[2]))))
3655 {
3656 emit_cbcond_insn (GET_CODE (operands[0]), operands[1], operands[2], operands[3]);
3657 return;
3658 }
3659
3660 if (TARGET_ARCH64 && operands[2] == const0_rtx
3661 && GET_CODE (operands[1]) == REG
3662 && GET_MODE (operands[1]) == DImode)
3663 {
3664 emit_v9_brxx_insn (GET_CODE (operands[0]), operands[1], operands[3]);
3665 return;
3666 }
3667
3668 operands[1] = gen_compare_reg (operands[0]);
3669 operands[2] = const0_rtx;
3670 operands[0] = gen_rtx_fmt_ee (GET_CODE (operands[0]), VOIDmode,
3671 operands[1], operands[2]);
3672 emit_jump_insn (gen_cbranchcc4 (operands[0], operands[1], operands[2],
3673 operands[3]));
3674 }
3675
3676
3677 /* Generate a DFmode part of a hard TFmode register.
3678 REG is the TFmode hard register, LOW is 1 for the
3679 low 64bit of the register and 0 otherwise.
3680 */
3681 rtx
gen_df_reg(rtx reg,int low)3682 gen_df_reg (rtx reg, int low)
3683 {
3684 int regno = REGNO (reg);
3685
3686 if ((WORDS_BIG_ENDIAN == 0) ^ (low != 0))
3687 regno += (TARGET_ARCH64 && SPARC_INT_REG_P (regno)) ? 1 : 2;
3688 return gen_rtx_REG (DFmode, regno);
3689 }
3690
3691 /* Generate a call to FUNC with OPERANDS. Operand 0 is the return value.
3692 Unlike normal calls, TFmode operands are passed by reference. It is
3693 assumed that no more than 3 operands are required. */
3694
3695 static void
emit_soft_tfmode_libcall(const char * func_name,int nargs,rtx * operands)3696 emit_soft_tfmode_libcall (const char *func_name, int nargs, rtx *operands)
3697 {
3698 rtx ret_slot = NULL, arg[3], func_sym;
3699 int i;
3700
3701 /* We only expect to be called for conversions, unary, and binary ops. */
3702 gcc_assert (nargs == 2 || nargs == 3);
3703
3704 for (i = 0; i < nargs; ++i)
3705 {
3706 rtx this_arg = operands[i];
3707 rtx this_slot;
3708
3709 /* TFmode arguments and return values are passed by reference. */
3710 if (GET_MODE (this_arg) == TFmode)
3711 {
3712 int force_stack_temp;
3713
3714 force_stack_temp = 0;
3715 if (TARGET_BUGGY_QP_LIB && i == 0)
3716 force_stack_temp = 1;
3717
3718 if (GET_CODE (this_arg) == MEM
3719 && ! force_stack_temp)
3720 {
3721 tree expr = MEM_EXPR (this_arg);
3722 if (expr)
3723 mark_addressable (expr);
3724 this_arg = XEXP (this_arg, 0);
3725 }
3726 else if (CONSTANT_P (this_arg)
3727 && ! force_stack_temp)
3728 {
3729 this_slot = force_const_mem (TFmode, this_arg);
3730 this_arg = XEXP (this_slot, 0);
3731 }
3732 else
3733 {
3734 this_slot = assign_stack_temp (TFmode, GET_MODE_SIZE (TFmode));
3735
3736 /* Operand 0 is the return value. We'll copy it out later. */
3737 if (i > 0)
3738 emit_move_insn (this_slot, this_arg);
3739 else
3740 ret_slot = this_slot;
3741
3742 this_arg = XEXP (this_slot, 0);
3743 }
3744 }
3745
3746 arg[i] = this_arg;
3747 }
3748
3749 func_sym = gen_rtx_SYMBOL_REF (Pmode, func_name);
3750
3751 if (GET_MODE (operands[0]) == TFmode)
3752 {
3753 if (nargs == 2)
3754 emit_library_call (func_sym, LCT_NORMAL, VOIDmode,
3755 arg[0], GET_MODE (arg[0]),
3756 arg[1], GET_MODE (arg[1]));
3757 else
3758 emit_library_call (func_sym, LCT_NORMAL, VOIDmode,
3759 arg[0], GET_MODE (arg[0]),
3760 arg[1], GET_MODE (arg[1]),
3761 arg[2], GET_MODE (arg[2]));
3762
3763 if (ret_slot)
3764 emit_move_insn (operands[0], ret_slot);
3765 }
3766 else
3767 {
3768 rtx ret;
3769
3770 gcc_assert (nargs == 2);
3771
3772 ret = emit_library_call_value (func_sym, operands[0], LCT_NORMAL,
3773 GET_MODE (operands[0]),
3774 arg[1], GET_MODE (arg[1]));
3775
3776 if (ret != operands[0])
3777 emit_move_insn (operands[0], ret);
3778 }
3779 }
3780
3781 /* Expand soft-float TFmode calls to sparc abi routines. */
3782
3783 static void
emit_soft_tfmode_binop(enum rtx_code code,rtx * operands)3784 emit_soft_tfmode_binop (enum rtx_code code, rtx *operands)
3785 {
3786 const char *func;
3787
3788 switch (code)
3789 {
3790 case PLUS:
3791 func = "_Qp_add";
3792 break;
3793 case MINUS:
3794 func = "_Qp_sub";
3795 break;
3796 case MULT:
3797 func = "_Qp_mul";
3798 break;
3799 case DIV:
3800 func = "_Qp_div";
3801 break;
3802 default:
3803 gcc_unreachable ();
3804 }
3805
3806 emit_soft_tfmode_libcall (func, 3, operands);
3807 }
3808
3809 static void
emit_soft_tfmode_unop(enum rtx_code code,rtx * operands)3810 emit_soft_tfmode_unop (enum rtx_code code, rtx *operands)
3811 {
3812 const char *func;
3813
3814 gcc_assert (code == SQRT);
3815 func = "_Qp_sqrt";
3816
3817 emit_soft_tfmode_libcall (func, 2, operands);
3818 }
3819
3820 static void
emit_soft_tfmode_cvt(enum rtx_code code,rtx * operands)3821 emit_soft_tfmode_cvt (enum rtx_code code, rtx *operands)
3822 {
3823 const char *func;
3824
3825 switch (code)
3826 {
3827 case FLOAT_EXTEND:
3828 switch (GET_MODE (operands[1]))
3829 {
3830 case E_SFmode:
3831 func = "_Qp_stoq";
3832 break;
3833 case E_DFmode:
3834 func = "_Qp_dtoq";
3835 break;
3836 default:
3837 gcc_unreachable ();
3838 }
3839 break;
3840
3841 case FLOAT_TRUNCATE:
3842 switch (GET_MODE (operands[0]))
3843 {
3844 case E_SFmode:
3845 func = "_Qp_qtos";
3846 break;
3847 case E_DFmode:
3848 func = "_Qp_qtod";
3849 break;
3850 default:
3851 gcc_unreachable ();
3852 }
3853 break;
3854
3855 case FLOAT:
3856 switch (GET_MODE (operands[1]))
3857 {
3858 case E_SImode:
3859 func = "_Qp_itoq";
3860 if (TARGET_ARCH64)
3861 operands[1] = gen_rtx_SIGN_EXTEND (DImode, operands[1]);
3862 break;
3863 case E_DImode:
3864 func = "_Qp_xtoq";
3865 break;
3866 default:
3867 gcc_unreachable ();
3868 }
3869 break;
3870
3871 case UNSIGNED_FLOAT:
3872 switch (GET_MODE (operands[1]))
3873 {
3874 case E_SImode:
3875 func = "_Qp_uitoq";
3876 if (TARGET_ARCH64)
3877 operands[1] = gen_rtx_ZERO_EXTEND (DImode, operands[1]);
3878 break;
3879 case E_DImode:
3880 func = "_Qp_uxtoq";
3881 break;
3882 default:
3883 gcc_unreachable ();
3884 }
3885 break;
3886
3887 case FIX:
3888 switch (GET_MODE (operands[0]))
3889 {
3890 case E_SImode:
3891 func = "_Qp_qtoi";
3892 break;
3893 case E_DImode:
3894 func = "_Qp_qtox";
3895 break;
3896 default:
3897 gcc_unreachable ();
3898 }
3899 break;
3900
3901 case UNSIGNED_FIX:
3902 switch (GET_MODE (operands[0]))
3903 {
3904 case E_SImode:
3905 func = "_Qp_qtoui";
3906 break;
3907 case E_DImode:
3908 func = "_Qp_qtoux";
3909 break;
3910 default:
3911 gcc_unreachable ();
3912 }
3913 break;
3914
3915 default:
3916 gcc_unreachable ();
3917 }
3918
3919 emit_soft_tfmode_libcall (func, 2, operands);
3920 }
3921
3922 /* Expand a hard-float tfmode operation. All arguments must be in
3923 registers. */
3924
3925 static void
emit_hard_tfmode_operation(enum rtx_code code,rtx * operands)3926 emit_hard_tfmode_operation (enum rtx_code code, rtx *operands)
3927 {
3928 rtx op, dest;
3929
3930 if (GET_RTX_CLASS (code) == RTX_UNARY)
3931 {
3932 operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
3933 op = gen_rtx_fmt_e (code, GET_MODE (operands[0]), operands[1]);
3934 }
3935 else
3936 {
3937 operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
3938 operands[2] = force_reg (GET_MODE (operands[2]), operands[2]);
3939 op = gen_rtx_fmt_ee (code, GET_MODE (operands[0]),
3940 operands[1], operands[2]);
3941 }
3942
3943 if (register_operand (operands[0], VOIDmode))
3944 dest = operands[0];
3945 else
3946 dest = gen_reg_rtx (GET_MODE (operands[0]));
3947
3948 emit_insn (gen_rtx_SET (dest, op));
3949
3950 if (dest != operands[0])
3951 emit_move_insn (operands[0], dest);
3952 }
3953
3954 void
emit_tfmode_binop(enum rtx_code code,rtx * operands)3955 emit_tfmode_binop (enum rtx_code code, rtx *operands)
3956 {
3957 if (TARGET_HARD_QUAD)
3958 emit_hard_tfmode_operation (code, operands);
3959 else
3960 emit_soft_tfmode_binop (code, operands);
3961 }
3962
3963 void
emit_tfmode_unop(enum rtx_code code,rtx * operands)3964 emit_tfmode_unop (enum rtx_code code, rtx *operands)
3965 {
3966 if (TARGET_HARD_QUAD)
3967 emit_hard_tfmode_operation (code, operands);
3968 else
3969 emit_soft_tfmode_unop (code, operands);
3970 }
3971
3972 void
emit_tfmode_cvt(enum rtx_code code,rtx * operands)3973 emit_tfmode_cvt (enum rtx_code code, rtx *operands)
3974 {
3975 if (TARGET_HARD_QUAD)
3976 emit_hard_tfmode_operation (code, operands);
3977 else
3978 emit_soft_tfmode_cvt (code, operands);
3979 }
3980
3981 /* Return nonzero if a branch/jump/call instruction will be emitting
3982 nop into its delay slot. */
3983
3984 int
empty_delay_slot(rtx_insn * insn)3985 empty_delay_slot (rtx_insn *insn)
3986 {
3987 rtx seq;
3988
3989 /* If no previous instruction (should not happen), return true. */
3990 if (PREV_INSN (insn) == NULL)
3991 return 1;
3992
3993 seq = NEXT_INSN (PREV_INSN (insn));
3994 if (GET_CODE (PATTERN (seq)) == SEQUENCE)
3995 return 0;
3996
3997 return 1;
3998 }
3999
4000 /* Return nonzero if we should emit a nop after a cbcond instruction.
4001 The cbcond instruction does not have a delay slot, however there is
4002 a severe performance penalty if a control transfer appears right
4003 after a cbcond. Therefore we emit a nop when we detect this
4004 situation. */
4005
4006 int
emit_cbcond_nop(rtx_insn * insn)4007 emit_cbcond_nop (rtx_insn *insn)
4008 {
4009 rtx next = next_active_insn (insn);
4010
4011 if (!next)
4012 return 1;
4013
4014 if (NONJUMP_INSN_P (next)
4015 && GET_CODE (PATTERN (next)) == SEQUENCE)
4016 next = XVECEXP (PATTERN (next), 0, 0);
4017 else if (CALL_P (next)
4018 && GET_CODE (PATTERN (next)) == PARALLEL)
4019 {
4020 rtx delay = XVECEXP (PATTERN (next), 0, 1);
4021
4022 if (GET_CODE (delay) == RETURN)
4023 {
4024 /* It's a sibling call. Do not emit the nop if we're going
4025 to emit something other than the jump itself as the first
4026 instruction of the sibcall sequence. */
4027 if (sparc_leaf_function_p || TARGET_FLAT)
4028 return 0;
4029 }
4030 }
4031
4032 if (NONJUMP_INSN_P (next))
4033 return 0;
4034
4035 return 1;
4036 }
4037
4038 /* Return nonzero if TRIAL, an insn, can be combined with a 'restore'
4039 instruction. RETURN_P is true if the v9 variant 'return' is to be
4040 considered in the test too.
4041
4042 TRIAL must be a SET whose destination is a REG appropriate for the
4043 'restore' instruction or, if RETURN_P is true, for the 'return'
4044 instruction. */
4045
4046 static int
eligible_for_restore_insn(rtx trial,bool return_p)4047 eligible_for_restore_insn (rtx trial, bool return_p)
4048 {
4049 rtx pat = PATTERN (trial);
4050 rtx src = SET_SRC (pat);
4051 bool src_is_freg = false;
4052 rtx src_reg;
4053
4054 /* Since we now can do moves between float and integer registers when
4055 VIS3 is enabled, we have to catch this case. We can allow such
4056 moves when doing a 'return' however. */
4057 src_reg = src;
4058 if (GET_CODE (src_reg) == SUBREG)
4059 src_reg = SUBREG_REG (src_reg);
4060 if (GET_CODE (src_reg) == REG
4061 && SPARC_FP_REG_P (REGNO (src_reg)))
4062 src_is_freg = true;
4063
4064 /* The 'restore src,%g0,dest' pattern for word mode and below. */
4065 if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT
4066 && arith_operand (src, GET_MODE (src))
4067 && ! src_is_freg)
4068 {
4069 if (TARGET_ARCH64)
4070 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
4071 else
4072 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (SImode);
4073 }
4074
4075 /* The 'restore src,%g0,dest' pattern for double-word mode. */
4076 else if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT
4077 && arith_double_operand (src, GET_MODE (src))
4078 && ! src_is_freg)
4079 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
4080
4081 /* The 'restore src,%g0,dest' pattern for float if no FPU. */
4082 else if (! TARGET_FPU && register_operand (src, SFmode))
4083 return 1;
4084
4085 /* The 'restore src,%g0,dest' pattern for double if no FPU. */
4086 else if (! TARGET_FPU && TARGET_ARCH64 && register_operand (src, DFmode))
4087 return 1;
4088
4089 /* If we have the 'return' instruction, anything that does not use
4090 local or output registers and can go into a delay slot wins. */
4091 else if (return_p && TARGET_V9 && !epilogue_renumber (&pat, 1))
4092 return 1;
4093
4094 /* The 'restore src1,src2,dest' pattern for SImode. */
4095 else if (GET_CODE (src) == PLUS
4096 && register_operand (XEXP (src, 0), SImode)
4097 && arith_operand (XEXP (src, 1), SImode))
4098 return 1;
4099
4100 /* The 'restore src1,src2,dest' pattern for DImode. */
4101 else if (GET_CODE (src) == PLUS
4102 && register_operand (XEXP (src, 0), DImode)
4103 && arith_double_operand (XEXP (src, 1), DImode))
4104 return 1;
4105
4106 /* The 'restore src1,%lo(src2),dest' pattern. */
4107 else if (GET_CODE (src) == LO_SUM
4108 && ! TARGET_CM_MEDMID
4109 && ((register_operand (XEXP (src, 0), SImode)
4110 && immediate_operand (XEXP (src, 1), SImode))
4111 || (TARGET_ARCH64
4112 && register_operand (XEXP (src, 0), DImode)
4113 && immediate_operand (XEXP (src, 1), DImode))))
4114 return 1;
4115
4116 /* The 'restore src,src,dest' pattern. */
4117 else if (GET_CODE (src) == ASHIFT
4118 && (register_operand (XEXP (src, 0), SImode)
4119 || register_operand (XEXP (src, 0), DImode))
4120 && XEXP (src, 1) == const1_rtx)
4121 return 1;
4122
4123 return 0;
4124 }
4125
4126 /* Return nonzero if TRIAL can go into the function return's delay slot. */
4127
4128 int
eligible_for_return_delay(rtx_insn * trial)4129 eligible_for_return_delay (rtx_insn *trial)
4130 {
4131 int regno;
4132 rtx pat;
4133
4134 /* If the function uses __builtin_eh_return, the eh_return machinery
4135 occupies the delay slot. */
4136 if (crtl->calls_eh_return)
4137 return 0;
4138
4139 if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE)
4140 return 0;
4141
4142 /* In the case of a leaf or flat function, anything can go into the slot. */
4143 if (sparc_leaf_function_p || TARGET_FLAT)
4144 return 1;
4145
4146 if (!NONJUMP_INSN_P (trial))
4147 return 0;
4148
4149 pat = PATTERN (trial);
4150 if (GET_CODE (pat) == PARALLEL)
4151 {
4152 int i;
4153
4154 if (! TARGET_V9)
4155 return 0;
4156 for (i = XVECLEN (pat, 0) - 1; i >= 0; i--)
4157 {
4158 rtx expr = XVECEXP (pat, 0, i);
4159 if (GET_CODE (expr) != SET)
4160 return 0;
4161 if (GET_CODE (SET_DEST (expr)) != REG)
4162 return 0;
4163 regno = REGNO (SET_DEST (expr));
4164 if (regno >= 8 && regno < 24)
4165 return 0;
4166 }
4167 return !epilogue_renumber (&pat, 1);
4168 }
4169
4170 if (GET_CODE (pat) != SET)
4171 return 0;
4172
4173 if (GET_CODE (SET_DEST (pat)) != REG)
4174 return 0;
4175
4176 regno = REGNO (SET_DEST (pat));
4177
4178 /* Otherwise, only operations which can be done in tandem with
4179 a `restore' or `return' insn can go into the delay slot. */
4180 if (regno >= 8 && regno < 24)
4181 return 0;
4182
4183 /* If this instruction sets up floating point register and we have a return
4184 instruction, it can probably go in. But restore will not work
4185 with FP_REGS. */
4186 if (! SPARC_INT_REG_P (regno))
4187 return TARGET_V9 && !epilogue_renumber (&pat, 1);
4188
4189 return eligible_for_restore_insn (trial, true);
4190 }
4191
4192 /* Return nonzero if TRIAL can go into the sibling call's delay slot. */
4193
4194 int
eligible_for_sibcall_delay(rtx_insn * trial)4195 eligible_for_sibcall_delay (rtx_insn *trial)
4196 {
4197 rtx pat;
4198
4199 if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE)
4200 return 0;
4201
4202 if (!NONJUMP_INSN_P (trial))
4203 return 0;
4204
4205 pat = PATTERN (trial);
4206
4207 if (sparc_leaf_function_p || TARGET_FLAT)
4208 {
4209 /* If the tail call is done using the call instruction,
4210 we have to restore %o7 in the delay slot. */
4211 if (LEAF_SIBCALL_SLOT_RESERVED_P)
4212 return 0;
4213
4214 /* %g1 is used to build the function address */
4215 if (reg_mentioned_p (gen_rtx_REG (Pmode, 1), pat))
4216 return 0;
4217
4218 return 1;
4219 }
4220
4221 if (GET_CODE (pat) != SET)
4222 return 0;
4223
4224 /* Otherwise, only operations which can be done in tandem with
4225 a `restore' insn can go into the delay slot. */
4226 if (GET_CODE (SET_DEST (pat)) != REG
4227 || (REGNO (SET_DEST (pat)) >= 8 && REGNO (SET_DEST (pat)) < 24)
4228 || ! SPARC_INT_REG_P (REGNO (SET_DEST (pat))))
4229 return 0;
4230
4231 /* If it mentions %o7, it can't go in, because sibcall will clobber it
4232 in most cases. */
4233 if (reg_mentioned_p (gen_rtx_REG (Pmode, 15), pat))
4234 return 0;
4235
4236 return eligible_for_restore_insn (trial, false);
4237 }
4238
4239 /* Determine if it's legal to put X into the constant pool. This
4240 is not possible if X contains the address of a symbol that is
4241 not constant (TLS) or not known at final link time (PIC). */
4242
4243 static bool
sparc_cannot_force_const_mem(machine_mode mode,rtx x)4244 sparc_cannot_force_const_mem (machine_mode mode, rtx x)
4245 {
4246 switch (GET_CODE (x))
4247 {
4248 case CONST_INT:
4249 case CONST_WIDE_INT:
4250 case CONST_DOUBLE:
4251 case CONST_VECTOR:
4252 /* Accept all non-symbolic constants. */
4253 return false;
4254
4255 case LABEL_REF:
4256 /* Labels are OK iff we are non-PIC. */
4257 return flag_pic != 0;
4258
4259 case SYMBOL_REF:
4260 /* 'Naked' TLS symbol references are never OK,
4261 non-TLS symbols are OK iff we are non-PIC. */
4262 if (SYMBOL_REF_TLS_MODEL (x))
4263 return true;
4264 else
4265 return flag_pic != 0;
4266
4267 case CONST:
4268 return sparc_cannot_force_const_mem (mode, XEXP (x, 0));
4269 case PLUS:
4270 case MINUS:
4271 return sparc_cannot_force_const_mem (mode, XEXP (x, 0))
4272 || sparc_cannot_force_const_mem (mode, XEXP (x, 1));
4273 case UNSPEC:
4274 return true;
4275 default:
4276 gcc_unreachable ();
4277 }
4278 }
4279
4280 /* Global Offset Table support. */
4281 static GTY(()) rtx got_symbol_rtx = NULL_RTX;
4282 static GTY(()) rtx got_register_rtx = NULL_RTX;
4283 static GTY(()) rtx got_helper_rtx = NULL_RTX;
4284
4285 static GTY(()) bool got_helper_needed = false;
4286
4287 /* Return the SYMBOL_REF for the Global Offset Table. */
4288
4289 static rtx
sparc_got(void)4290 sparc_got (void)
4291 {
4292 if (!got_symbol_rtx)
4293 got_symbol_rtx = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
4294
4295 return got_symbol_rtx;
4296 }
4297
4298 /* Wrapper around the load_pcrel_sym{si,di} patterns. */
4299
4300 static rtx
gen_load_pcrel_sym(rtx op0,rtx op1,rtx op2)4301 gen_load_pcrel_sym (rtx op0, rtx op1, rtx op2)
4302 {
4303 int orig_flag_pic = flag_pic;
4304 rtx insn;
4305
4306 /* The load_pcrel_sym{si,di} patterns require absolute addressing. */
4307 flag_pic = 0;
4308 if (TARGET_ARCH64)
4309 insn = gen_load_pcrel_symdi (op0, op1, op2, GEN_INT (REGNO (op0)));
4310 else
4311 insn = gen_load_pcrel_symsi (op0, op1, op2, GEN_INT (REGNO (op0)));
4312 flag_pic = orig_flag_pic;
4313
4314 return insn;
4315 }
4316
4317 /* Output the load_pcrel_sym{si,di} patterns. */
4318
4319 const char *
output_load_pcrel_sym(rtx * operands)4320 output_load_pcrel_sym (rtx *operands)
4321 {
4322 if (flag_delayed_branch)
4323 {
4324 output_asm_insn ("sethi\t%%hi(%a1-4), %0", operands);
4325 output_asm_insn ("call\t%a2", operands);
4326 output_asm_insn (" add\t%0, %%lo(%a1+4), %0", operands);
4327 }
4328 else
4329 {
4330 output_asm_insn ("sethi\t%%hi(%a1-8), %0", operands);
4331 output_asm_insn ("add\t%0, %%lo(%a1-4), %0", operands);
4332 output_asm_insn ("call\t%a2", operands);
4333 output_asm_insn (" nop", NULL);
4334 }
4335
4336 if (operands[2] == got_helper_rtx)
4337 got_helper_needed = true;
4338
4339 return "";
4340 }
4341
4342 #ifdef HAVE_GAS_HIDDEN
4343 # define USE_HIDDEN_LINKONCE 1
4344 #else
4345 # define USE_HIDDEN_LINKONCE 0
4346 #endif
4347
4348 /* Emit code to load the GOT register. */
4349
4350 void
load_got_register(void)4351 load_got_register (void)
4352 {
4353 rtx insn;
4354
4355 if (TARGET_VXWORKS_RTP)
4356 {
4357 if (!got_register_rtx)
4358 got_register_rtx = pic_offset_table_rtx;
4359
4360 insn = gen_vxworks_load_got ();
4361 }
4362 else
4363 {
4364 if (!got_register_rtx)
4365 got_register_rtx = gen_rtx_REG (Pmode, GLOBAL_OFFSET_TABLE_REGNUM);
4366
4367 /* The GOT symbol is subject to a PC-relative relocation so we need a
4368 helper function to add the PC value and thus get the final value. */
4369 if (!got_helper_rtx)
4370 {
4371 char name[32];
4372
4373 /* Skip the leading '%' as that cannot be used in a symbol name. */
4374 if (USE_HIDDEN_LINKONCE)
4375 sprintf (name, "__sparc_get_pc_thunk.%s",
4376 reg_names[REGNO (got_register_rtx)] + 1);
4377 else
4378 ASM_GENERATE_INTERNAL_LABEL (name, "LADDPC",
4379 REGNO (got_register_rtx));
4380
4381 got_helper_rtx = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
4382 }
4383
4384 insn
4385 = gen_load_pcrel_sym (got_register_rtx, sparc_got (), got_helper_rtx);
4386 }
4387
4388 emit_insn (insn);
4389 }
4390
4391 /* Ensure that we are not using patterns that are not OK with PIC. */
4392
4393 int
check_pic(int i)4394 check_pic (int i)
4395 {
4396 rtx op;
4397
4398 switch (flag_pic)
4399 {
4400 case 1:
4401 op = recog_data.operand[i];
4402 gcc_assert (GET_CODE (op) != SYMBOL_REF
4403 && (GET_CODE (op) != CONST
4404 || (GET_CODE (XEXP (op, 0)) == MINUS
4405 && XEXP (XEXP (op, 0), 0) == sparc_got ()
4406 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST)));
4407 /* fallthrough */
4408 case 2:
4409 default:
4410 return 1;
4411 }
4412 }
4413
4414 /* Return true if X is an address which needs a temporary register when
4415 reloaded while generating PIC code. */
4416
4417 int
pic_address_needs_scratch(rtx x)4418 pic_address_needs_scratch (rtx x)
4419 {
4420 /* An address which is a symbolic plus a non SMALL_INT needs a temp reg. */
4421 if (GET_CODE (x) == CONST
4422 && GET_CODE (XEXP (x, 0)) == PLUS
4423 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
4424 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
4425 && !SMALL_INT (XEXP (XEXP (x, 0), 1)))
4426 return 1;
4427
4428 return 0;
4429 }
4430
4431 /* Determine if a given RTX is a valid constant. We already know this
4432 satisfies CONSTANT_P. */
4433
4434 static bool
sparc_legitimate_constant_p(machine_mode mode,rtx x)4435 sparc_legitimate_constant_p (machine_mode mode, rtx x)
4436 {
4437 switch (GET_CODE (x))
4438 {
4439 case CONST:
4440 case SYMBOL_REF:
4441 if (sparc_tls_referenced_p (x))
4442 return false;
4443 break;
4444
4445 case CONST_DOUBLE:
4446 /* Floating point constants are generally not ok.
4447 The only exception is 0.0 and all-ones in VIS. */
4448 if (TARGET_VIS
4449 && SCALAR_FLOAT_MODE_P (mode)
4450 && (const_zero_operand (x, mode)
4451 || const_all_ones_operand (x, mode)))
4452 return true;
4453
4454 return false;
4455
4456 case CONST_VECTOR:
4457 /* Vector constants are generally not ok.
4458 The only exception is 0 or -1 in VIS. */
4459 if (TARGET_VIS
4460 && (const_zero_operand (x, mode)
4461 || const_all_ones_operand (x, mode)))
4462 return true;
4463
4464 return false;
4465
4466 default:
4467 break;
4468 }
4469
4470 return true;
4471 }
4472
4473 /* Determine if a given RTX is a valid constant address. */
4474
4475 bool
constant_address_p(rtx x)4476 constant_address_p (rtx x)
4477 {
4478 switch (GET_CODE (x))
4479 {
4480 case LABEL_REF:
4481 case CONST_INT:
4482 case HIGH:
4483 return true;
4484
4485 case CONST:
4486 if (flag_pic && pic_address_needs_scratch (x))
4487 return false;
4488 return sparc_legitimate_constant_p (Pmode, x);
4489
4490 case SYMBOL_REF:
4491 return !flag_pic && sparc_legitimate_constant_p (Pmode, x);
4492
4493 default:
4494 return false;
4495 }
4496 }
4497
4498 /* Nonzero if the constant value X is a legitimate general operand
4499 when generating PIC code. It is given that flag_pic is on and
4500 that X satisfies CONSTANT_P. */
4501
4502 bool
legitimate_pic_operand_p(rtx x)4503 legitimate_pic_operand_p (rtx x)
4504 {
4505 if (pic_address_needs_scratch (x))
4506 return false;
4507 if (sparc_tls_referenced_p (x))
4508 return false;
4509 return true;
4510 }
4511
4512 /* Return true if X is a representation of the PIC register. */
4513
4514 static bool
sparc_pic_register_p(rtx x)4515 sparc_pic_register_p (rtx x)
4516 {
4517 if (!REG_P (x) || !pic_offset_table_rtx)
4518 return false;
4519
4520 if (x == pic_offset_table_rtx)
4521 return true;
4522
4523 if (!HARD_REGISTER_P (pic_offset_table_rtx)
4524 && (HARD_REGISTER_P (x) || lra_in_progress || reload_in_progress)
4525 && ORIGINAL_REGNO (x) == REGNO (pic_offset_table_rtx))
4526 return true;
4527
4528 return false;
4529 }
4530
4531 #define RTX_OK_FOR_OFFSET_P(X, MODE) \
4532 (CONST_INT_P (X) \
4533 && INTVAL (X) >= -0x1000 \
4534 && INTVAL (X) <= (0x1000 - GET_MODE_SIZE (MODE)))
4535
4536 #define RTX_OK_FOR_OLO10_P(X, MODE) \
4537 (CONST_INT_P (X) \
4538 && INTVAL (X) >= -0x1000 \
4539 && INTVAL (X) <= (0xc00 - GET_MODE_SIZE (MODE)))
4540
4541 /* Handle the TARGET_LEGITIMATE_ADDRESS_P target hook.
4542
4543 On SPARC, the actual legitimate addresses must be REG+REG or REG+SMALLINT
4544 ordinarily. This changes a bit when generating PIC. */
4545
4546 static bool
sparc_legitimate_address_p(machine_mode mode,rtx addr,bool strict)4547 sparc_legitimate_address_p (machine_mode mode, rtx addr, bool strict)
4548 {
4549 rtx rs1 = NULL, rs2 = NULL, imm1 = NULL;
4550
4551 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
4552 rs1 = addr;
4553 else if (GET_CODE (addr) == PLUS)
4554 {
4555 rs1 = XEXP (addr, 0);
4556 rs2 = XEXP (addr, 1);
4557
4558 /* Canonicalize. REG comes first, if there are no regs,
4559 LO_SUM comes first. */
4560 if (!REG_P (rs1)
4561 && GET_CODE (rs1) != SUBREG
4562 && (REG_P (rs2)
4563 || GET_CODE (rs2) == SUBREG
4564 || (GET_CODE (rs2) == LO_SUM && GET_CODE (rs1) != LO_SUM)))
4565 {
4566 rs1 = XEXP (addr, 1);
4567 rs2 = XEXP (addr, 0);
4568 }
4569
4570 if ((flag_pic == 1
4571 && sparc_pic_register_p (rs1)
4572 && !REG_P (rs2)
4573 && GET_CODE (rs2) != SUBREG
4574 && GET_CODE (rs2) != LO_SUM
4575 && GET_CODE (rs2) != MEM
4576 && !(GET_CODE (rs2) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs2))
4577 && (! symbolic_operand (rs2, VOIDmode) || mode == Pmode)
4578 && (GET_CODE (rs2) != CONST_INT || SMALL_INT (rs2)))
4579 || ((REG_P (rs1)
4580 || GET_CODE (rs1) == SUBREG)
4581 && RTX_OK_FOR_OFFSET_P (rs2, mode)))
4582 {
4583 imm1 = rs2;
4584 rs2 = NULL;
4585 }
4586 else if ((REG_P (rs1) || GET_CODE (rs1) == SUBREG)
4587 && (REG_P (rs2) || GET_CODE (rs2) == SUBREG))
4588 {
4589 /* We prohibit REG + REG for TFmode when there are no quad move insns
4590 and we consequently need to split. We do this because REG+REG
4591 is not an offsettable address. If we get the situation in reload
4592 where source and destination of a movtf pattern are both MEMs with
4593 REG+REG address, then only one of them gets converted to an
4594 offsettable address. */
4595 if (mode == TFmode
4596 && ! (TARGET_ARCH64 && TARGET_HARD_QUAD))
4597 return 0;
4598
4599 /* Likewise for TImode, but in all cases. */
4600 if (mode == TImode)
4601 return 0;
4602
4603 /* We prohibit REG + REG on ARCH32 if not optimizing for
4604 DFmode/DImode because then mem_min_alignment is likely to be zero
4605 after reload and the forced split would lack a matching splitter
4606 pattern. */
4607 if (TARGET_ARCH32 && !optimize
4608 && (mode == DFmode || mode == DImode))
4609 return 0;
4610 }
4611 else if (USE_AS_OFFSETABLE_LO10
4612 && GET_CODE (rs1) == LO_SUM
4613 && TARGET_ARCH64
4614 && ! TARGET_CM_MEDMID
4615 && RTX_OK_FOR_OLO10_P (rs2, mode))
4616 {
4617 rs2 = NULL;
4618 imm1 = XEXP (rs1, 1);
4619 rs1 = XEXP (rs1, 0);
4620 if (!CONSTANT_P (imm1)
4621 || (GET_CODE (rs1) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs1)))
4622 return 0;
4623 }
4624 }
4625 else if (GET_CODE (addr) == LO_SUM)
4626 {
4627 rs1 = XEXP (addr, 0);
4628 imm1 = XEXP (addr, 1);
4629
4630 if (!CONSTANT_P (imm1)
4631 || (GET_CODE (rs1) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs1)))
4632 return 0;
4633
4634 /* We can't allow TFmode in 32-bit mode, because an offset greater
4635 than the alignment (8) may cause the LO_SUM to overflow. */
4636 if (mode == TFmode && TARGET_ARCH32)
4637 return 0;
4638
4639 /* During reload, accept the HIGH+LO_SUM construct generated by
4640 sparc_legitimize_reload_address. */
4641 if (reload_in_progress
4642 && GET_CODE (rs1) == HIGH
4643 && XEXP (rs1, 0) == imm1)
4644 return 1;
4645 }
4646 else if (GET_CODE (addr) == CONST_INT && SMALL_INT (addr))
4647 return 1;
4648 else
4649 return 0;
4650
4651 if (GET_CODE (rs1) == SUBREG)
4652 rs1 = SUBREG_REG (rs1);
4653 if (!REG_P (rs1))
4654 return 0;
4655
4656 if (rs2)
4657 {
4658 if (GET_CODE (rs2) == SUBREG)
4659 rs2 = SUBREG_REG (rs2);
4660 if (!REG_P (rs2))
4661 return 0;
4662 }
4663
4664 if (strict)
4665 {
4666 if (!REGNO_OK_FOR_BASE_P (REGNO (rs1))
4667 || (rs2 && !REGNO_OK_FOR_BASE_P (REGNO (rs2))))
4668 return 0;
4669 }
4670 else
4671 {
4672 if ((! SPARC_INT_REG_P (REGNO (rs1))
4673 && REGNO (rs1) != FRAME_POINTER_REGNUM
4674 && REGNO (rs1) < FIRST_PSEUDO_REGISTER)
4675 || (rs2
4676 && (! SPARC_INT_REG_P (REGNO (rs2))
4677 && REGNO (rs2) != FRAME_POINTER_REGNUM
4678 && REGNO (rs2) < FIRST_PSEUDO_REGISTER)))
4679 return 0;
4680 }
4681 return 1;
4682 }
4683
4684 /* Return the SYMBOL_REF for the tls_get_addr function. */
4685
4686 static GTY(()) rtx sparc_tls_symbol = NULL_RTX;
4687
4688 static rtx
sparc_tls_get_addr(void)4689 sparc_tls_get_addr (void)
4690 {
4691 if (!sparc_tls_symbol)
4692 sparc_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tls_get_addr");
4693
4694 return sparc_tls_symbol;
4695 }
4696
4697 /* Return the Global Offset Table to be used in TLS mode. */
4698
4699 static rtx
sparc_tls_got(void)4700 sparc_tls_got (void)
4701 {
4702 /* In PIC mode, this is just the PIC offset table. */
4703 if (flag_pic)
4704 {
4705 crtl->uses_pic_offset_table = 1;
4706 return pic_offset_table_rtx;
4707 }
4708
4709 /* In non-PIC mode, Sun as (unlike GNU as) emits PC-relative relocations for
4710 the GOT symbol with the 32-bit ABI, so we reload the GOT register. */
4711 if (TARGET_SUN_TLS && TARGET_ARCH32)
4712 {
4713 load_got_register ();
4714 return got_register_rtx;
4715 }
4716
4717 /* In all other cases, we load a new pseudo with the GOT symbol. */
4718 return copy_to_reg (sparc_got ());
4719 }
4720
4721 /* Return true if X contains a thread-local symbol. */
4722
4723 static bool
sparc_tls_referenced_p(rtx x)4724 sparc_tls_referenced_p (rtx x)
4725 {
4726 if (!TARGET_HAVE_TLS)
4727 return false;
4728
4729 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
4730 x = XEXP (XEXP (x, 0), 0);
4731
4732 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x))
4733 return true;
4734
4735 /* That's all we handle in sparc_legitimize_tls_address for now. */
4736 return false;
4737 }
4738
4739 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
4740 this (thread-local) address. */
4741
4742 static rtx
sparc_legitimize_tls_address(rtx addr)4743 sparc_legitimize_tls_address (rtx addr)
4744 {
4745 rtx temp1, temp2, temp3, ret, o0, got;
4746 rtx_insn *insn;
4747
4748 gcc_assert (can_create_pseudo_p ());
4749
4750 if (GET_CODE (addr) == SYMBOL_REF)
4751 /* Although the various sethi/or sequences generate SImode values, many of
4752 them can be transformed by the linker when relaxing and, if relaxing to
4753 local-exec, will become a sethi/xor pair, which is signed and therefore
4754 a full DImode value in 64-bit mode. Thus we must use Pmode, lest these
4755 values be spilled onto the stack in 64-bit mode. */
4756 switch (SYMBOL_REF_TLS_MODEL (addr))
4757 {
4758 case TLS_MODEL_GLOBAL_DYNAMIC:
4759 start_sequence ();
4760 temp1 = gen_reg_rtx (Pmode);
4761 temp2 = gen_reg_rtx (Pmode);
4762 ret = gen_reg_rtx (Pmode);
4763 o0 = gen_rtx_REG (Pmode, 8);
4764 got = sparc_tls_got ();
4765 if (TARGET_ARCH32)
4766 {
4767 emit_insn (gen_tgd_hi22si (temp1, addr));
4768 emit_insn (gen_tgd_lo10si (temp2, temp1, addr));
4769 emit_insn (gen_tgd_addsi (o0, got, temp2, addr));
4770 insn = emit_call_insn (gen_tgd_callsi (o0, sparc_tls_get_addr (),
4771 addr, const1_rtx));
4772 }
4773 else
4774 {
4775 emit_insn (gen_tgd_hi22di (temp1, addr));
4776 emit_insn (gen_tgd_lo10di (temp2, temp1, addr));
4777 emit_insn (gen_tgd_adddi (o0, got, temp2, addr));
4778 insn = emit_call_insn (gen_tgd_calldi (o0, sparc_tls_get_addr (),
4779 addr, const1_rtx));
4780 }
4781 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), o0);
4782 RTL_CONST_CALL_P (insn) = 1;
4783 insn = get_insns ();
4784 end_sequence ();
4785 emit_libcall_block (insn, ret, o0, addr);
4786 break;
4787
4788 case TLS_MODEL_LOCAL_DYNAMIC:
4789 start_sequence ();
4790 temp1 = gen_reg_rtx (Pmode);
4791 temp2 = gen_reg_rtx (Pmode);
4792 temp3 = gen_reg_rtx (Pmode);
4793 ret = gen_reg_rtx (Pmode);
4794 o0 = gen_rtx_REG (Pmode, 8);
4795 got = sparc_tls_got ();
4796 if (TARGET_ARCH32)
4797 {
4798 emit_insn (gen_tldm_hi22si (temp1));
4799 emit_insn (gen_tldm_lo10si (temp2, temp1));
4800 emit_insn (gen_tldm_addsi (o0, got, temp2));
4801 insn = emit_call_insn (gen_tldm_callsi (o0, sparc_tls_get_addr (),
4802 const1_rtx));
4803 }
4804 else
4805 {
4806 emit_insn (gen_tldm_hi22di (temp1));
4807 emit_insn (gen_tldm_lo10di (temp2, temp1));
4808 emit_insn (gen_tldm_adddi (o0, got, temp2));
4809 insn = emit_call_insn (gen_tldm_calldi (o0, sparc_tls_get_addr (),
4810 const1_rtx));
4811 }
4812 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), o0);
4813 RTL_CONST_CALL_P (insn) = 1;
4814 insn = get_insns ();
4815 end_sequence ();
4816 /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
4817 share the LD_BASE result with other LD model accesses. */
4818 emit_libcall_block (insn, temp3, o0,
4819 gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
4820 UNSPEC_TLSLD_BASE));
4821 temp1 = gen_reg_rtx (Pmode);
4822 temp2 = gen_reg_rtx (Pmode);
4823 if (TARGET_ARCH32)
4824 {
4825 emit_insn (gen_tldo_hix22si (temp1, addr));
4826 emit_insn (gen_tldo_lox10si (temp2, temp1, addr));
4827 emit_insn (gen_tldo_addsi (ret, temp3, temp2, addr));
4828 }
4829 else
4830 {
4831 emit_insn (gen_tldo_hix22di (temp1, addr));
4832 emit_insn (gen_tldo_lox10di (temp2, temp1, addr));
4833 emit_insn (gen_tldo_adddi (ret, temp3, temp2, addr));
4834 }
4835 break;
4836
4837 case TLS_MODEL_INITIAL_EXEC:
4838 temp1 = gen_reg_rtx (Pmode);
4839 temp2 = gen_reg_rtx (Pmode);
4840 temp3 = gen_reg_rtx (Pmode);
4841 got = sparc_tls_got ();
4842 if (TARGET_ARCH32)
4843 {
4844 emit_insn (gen_tie_hi22si (temp1, addr));
4845 emit_insn (gen_tie_lo10si (temp2, temp1, addr));
4846 emit_insn (gen_tie_ld32 (temp3, got, temp2, addr));
4847 }
4848 else
4849 {
4850 emit_insn (gen_tie_hi22di (temp1, addr));
4851 emit_insn (gen_tie_lo10di (temp2, temp1, addr));
4852 emit_insn (gen_tie_ld64 (temp3, got, temp2, addr));
4853 }
4854 if (TARGET_SUN_TLS)
4855 {
4856 ret = gen_reg_rtx (Pmode);
4857 if (TARGET_ARCH32)
4858 emit_insn (gen_tie_addsi (ret, gen_rtx_REG (Pmode, 7),
4859 temp3, addr));
4860 else
4861 emit_insn (gen_tie_adddi (ret, gen_rtx_REG (Pmode, 7),
4862 temp3, addr));
4863 }
4864 else
4865 ret = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, 7), temp3);
4866 break;
4867
4868 case TLS_MODEL_LOCAL_EXEC:
4869 temp1 = gen_reg_rtx (Pmode);
4870 temp2 = gen_reg_rtx (Pmode);
4871 if (TARGET_ARCH32)
4872 {
4873 emit_insn (gen_tle_hix22si (temp1, addr));
4874 emit_insn (gen_tle_lox10si (temp2, temp1, addr));
4875 }
4876 else
4877 {
4878 emit_insn (gen_tle_hix22di (temp1, addr));
4879 emit_insn (gen_tle_lox10di (temp2, temp1, addr));
4880 }
4881 ret = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, 7), temp2);
4882 break;
4883
4884 default:
4885 gcc_unreachable ();
4886 }
4887
4888 else if (GET_CODE (addr) == CONST)
4889 {
4890 rtx base, offset;
4891
4892 gcc_assert (GET_CODE (XEXP (addr, 0)) == PLUS);
4893
4894 base = sparc_legitimize_tls_address (XEXP (XEXP (addr, 0), 0));
4895 offset = XEXP (XEXP (addr, 0), 1);
4896
4897 base = force_operand (base, NULL_RTX);
4898 if (!(GET_CODE (offset) == CONST_INT && SMALL_INT (offset)))
4899 offset = force_reg (Pmode, offset);
4900 ret = gen_rtx_PLUS (Pmode, base, offset);
4901 }
4902
4903 else
4904 gcc_unreachable (); /* for now ... */
4905
4906 return ret;
4907 }
4908
4909 /* Legitimize PIC addresses. If the address is already position-independent,
4910 we return ORIG. Newly generated position-independent addresses go into a
4911 reg. This is REG if nonzero, otherwise we allocate register(s) as
4912 necessary. */
4913
4914 static rtx
sparc_legitimize_pic_address(rtx orig,rtx reg)4915 sparc_legitimize_pic_address (rtx orig, rtx reg)
4916 {
4917 if (GET_CODE (orig) == SYMBOL_REF
4918 /* See the comment in sparc_expand_move. */
4919 || (GET_CODE (orig) == LABEL_REF && !can_use_mov_pic_label_ref (orig)))
4920 {
4921 bool gotdata_op = false;
4922 rtx pic_ref, address;
4923 rtx_insn *insn;
4924
4925 if (!reg)
4926 {
4927 gcc_assert (can_create_pseudo_p ());
4928 reg = gen_reg_rtx (Pmode);
4929 }
4930
4931 if (flag_pic == 2)
4932 {
4933 /* If not during reload, allocate another temp reg here for loading
4934 in the address, so that these instructions can be optimized
4935 properly. */
4936 rtx temp_reg = can_create_pseudo_p () ? gen_reg_rtx (Pmode) : reg;
4937
4938 /* Must put the SYMBOL_REF inside an UNSPEC here so that cse
4939 won't get confused into thinking that these two instructions
4940 are loading in the true address of the symbol. If in the
4941 future a PIC rtx exists, that should be used instead. */
4942 if (TARGET_ARCH64)
4943 {
4944 emit_insn (gen_movdi_high_pic (temp_reg, orig));
4945 emit_insn (gen_movdi_lo_sum_pic (temp_reg, temp_reg, orig));
4946 }
4947 else
4948 {
4949 emit_insn (gen_movsi_high_pic (temp_reg, orig));
4950 emit_insn (gen_movsi_lo_sum_pic (temp_reg, temp_reg, orig));
4951 }
4952
4953 address = temp_reg;
4954 gotdata_op = true;
4955 }
4956 else
4957 address = orig;
4958
4959 crtl->uses_pic_offset_table = 1;
4960 if (gotdata_op)
4961 {
4962 if (TARGET_ARCH64)
4963 insn = emit_insn (gen_movdi_pic_gotdata_op (reg,
4964 pic_offset_table_rtx,
4965 address, orig));
4966 else
4967 insn = emit_insn (gen_movsi_pic_gotdata_op (reg,
4968 pic_offset_table_rtx,
4969 address, orig));
4970 }
4971 else
4972 {
4973 pic_ref
4974 = gen_const_mem (Pmode,
4975 gen_rtx_PLUS (Pmode,
4976 pic_offset_table_rtx, address));
4977 insn = emit_move_insn (reg, pic_ref);
4978 }
4979
4980 /* Put a REG_EQUAL note on this insn, so that it can be optimized
4981 by loop. */
4982 set_unique_reg_note (insn, REG_EQUAL, orig);
4983 return reg;
4984 }
4985 else if (GET_CODE (orig) == CONST)
4986 {
4987 rtx base, offset;
4988
4989 if (GET_CODE (XEXP (orig, 0)) == PLUS
4990 && sparc_pic_register_p (XEXP (XEXP (orig, 0), 0)))
4991 return orig;
4992
4993 if (!reg)
4994 {
4995 gcc_assert (can_create_pseudo_p ());
4996 reg = gen_reg_rtx (Pmode);
4997 }
4998
4999 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
5000 base = sparc_legitimize_pic_address (XEXP (XEXP (orig, 0), 0), reg);
5001 offset = sparc_legitimize_pic_address (XEXP (XEXP (orig, 0), 1),
5002 base == reg ? NULL_RTX : reg);
5003
5004 if (GET_CODE (offset) == CONST_INT)
5005 {
5006 if (SMALL_INT (offset))
5007 return plus_constant (Pmode, base, INTVAL (offset));
5008 else if (can_create_pseudo_p ())
5009 offset = force_reg (Pmode, offset);
5010 else
5011 /* If we reach here, then something is seriously wrong. */
5012 gcc_unreachable ();
5013 }
5014 return gen_rtx_PLUS (Pmode, base, offset);
5015 }
5016 else if (GET_CODE (orig) == LABEL_REF)
5017 /* ??? We ought to be checking that the register is live instead, in case
5018 it is eliminated. */
5019 crtl->uses_pic_offset_table = 1;
5020
5021 return orig;
5022 }
5023
5024 /* Try machine-dependent ways of modifying an illegitimate address X
5025 to be legitimate. If we find one, return the new, valid address.
5026
5027 OLDX is the address as it was before break_out_memory_refs was called.
5028 In some cases it is useful to look at this to decide what needs to be done.
5029
5030 MODE is the mode of the operand pointed to by X.
5031
5032 On SPARC, change REG+N into REG+REG, and REG+(X*Y) into REG+REG. */
5033
5034 static rtx
sparc_legitimize_address(rtx x,rtx oldx ATTRIBUTE_UNUSED,machine_mode mode)5035 sparc_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
5036 machine_mode mode)
5037 {
5038 rtx orig_x = x;
5039
5040 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == MULT)
5041 x = gen_rtx_PLUS (Pmode, XEXP (x, 1),
5042 force_operand (XEXP (x, 0), NULL_RTX));
5043 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 1)) == MULT)
5044 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
5045 force_operand (XEXP (x, 1), NULL_RTX));
5046 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS)
5047 x = gen_rtx_PLUS (Pmode, force_operand (XEXP (x, 0), NULL_RTX),
5048 XEXP (x, 1));
5049 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 1)) == PLUS)
5050 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
5051 force_operand (XEXP (x, 1), NULL_RTX));
5052
5053 if (x != orig_x && sparc_legitimate_address_p (mode, x, FALSE))
5054 return x;
5055
5056 if (sparc_tls_referenced_p (x))
5057 x = sparc_legitimize_tls_address (x);
5058 else if (flag_pic)
5059 x = sparc_legitimize_pic_address (x, NULL_RTX);
5060 else if (GET_CODE (x) == PLUS && CONSTANT_ADDRESS_P (XEXP (x, 1)))
5061 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
5062 copy_to_mode_reg (Pmode, XEXP (x, 1)));
5063 else if (GET_CODE (x) == PLUS && CONSTANT_ADDRESS_P (XEXP (x, 0)))
5064 x = gen_rtx_PLUS (Pmode, XEXP (x, 1),
5065 copy_to_mode_reg (Pmode, XEXP (x, 0)));
5066 else if (GET_CODE (x) == SYMBOL_REF
5067 || GET_CODE (x) == CONST
5068 || GET_CODE (x) == LABEL_REF)
5069 x = copy_to_suggested_reg (x, NULL_RTX, Pmode);
5070
5071 return x;
5072 }
5073
5074 /* Delegitimize an address that was legitimized by the above function. */
5075
5076 static rtx
sparc_delegitimize_address(rtx x)5077 sparc_delegitimize_address (rtx x)
5078 {
5079 x = delegitimize_mem_from_attrs (x);
5080
5081 if (GET_CODE (x) == LO_SUM)
5082 x = XEXP (x, 1);
5083
5084 if (GET_CODE (x) == UNSPEC)
5085 switch (XINT (x, 1))
5086 {
5087 case UNSPEC_MOVE_PIC:
5088 case UNSPEC_TLSLE:
5089 x = XVECEXP (x, 0, 0);
5090 gcc_assert (GET_CODE (x) == SYMBOL_REF);
5091 break;
5092 case UNSPEC_MOVE_GOTDATA:
5093 x = XVECEXP (x, 0, 2);
5094 gcc_assert (GET_CODE (x) == SYMBOL_REF);
5095 break;
5096 default:
5097 break;
5098 }
5099
5100 /* This is generated by mov{si,di}_pic_label_ref in PIC mode. */
5101 if (GET_CODE (x) == MINUS
5102 && (XEXP (x, 0) == got_register_rtx
5103 || sparc_pic_register_p (XEXP (x, 0))))
5104 {
5105 rtx y = XEXP (x, 1);
5106
5107 if (GET_CODE (y) == LO_SUM)
5108 y = XEXP (y, 1);
5109
5110 if (GET_CODE (y) == UNSPEC && XINT (y, 1) == UNSPEC_MOVE_PIC_LABEL)
5111 {
5112 x = XVECEXP (y, 0, 0);
5113 gcc_assert (GET_CODE (x) == LABEL_REF
5114 || (GET_CODE (x) == CONST
5115 && GET_CODE (XEXP (x, 0)) == PLUS
5116 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5117 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT));
5118 }
5119 }
5120
5121 return x;
5122 }
5123
5124 /* SPARC implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
5125 replace the input X, or the original X if no replacement is called for.
5126 The output parameter *WIN is 1 if the calling macro should goto WIN,
5127 0 if it should not.
5128
5129 For SPARC, we wish to handle addresses by splitting them into
5130 HIGH+LO_SUM pairs, retaining the LO_SUM in the memory reference.
5131 This cuts the number of extra insns by one.
5132
5133 Do nothing when generating PIC code and the address is a symbolic
5134 operand or requires a scratch register. */
5135
5136 rtx
sparc_legitimize_reload_address(rtx x,machine_mode mode,int opnum,int type,int ind_levels ATTRIBUTE_UNUSED,int * win)5137 sparc_legitimize_reload_address (rtx x, machine_mode mode,
5138 int opnum, int type,
5139 int ind_levels ATTRIBUTE_UNUSED, int *win)
5140 {
5141 /* Decompose SImode constants into HIGH+LO_SUM. */
5142 if (CONSTANT_P (x)
5143 && (mode != TFmode || TARGET_ARCH64)
5144 && GET_MODE (x) == SImode
5145 && GET_CODE (x) != LO_SUM
5146 && GET_CODE (x) != HIGH
5147 && sparc_code_model <= CM_MEDLOW
5148 && !(flag_pic
5149 && (symbolic_operand (x, Pmode) || pic_address_needs_scratch (x))))
5150 {
5151 x = gen_rtx_LO_SUM (GET_MODE (x), gen_rtx_HIGH (GET_MODE (x), x), x);
5152 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
5153 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
5154 opnum, (enum reload_type)type);
5155 *win = 1;
5156 return x;
5157 }
5158
5159 /* We have to recognize what we have already generated above. */
5160 if (GET_CODE (x) == LO_SUM && GET_CODE (XEXP (x, 0)) == HIGH)
5161 {
5162 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
5163 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
5164 opnum, (enum reload_type)type);
5165 *win = 1;
5166 return x;
5167 }
5168
5169 *win = 0;
5170 return x;
5171 }
5172
5173 /* Return true if ADDR (a legitimate address expression)
5174 has an effect that depends on the machine mode it is used for.
5175
5176 In PIC mode,
5177
5178 (mem:HI [%l7+a])
5179
5180 is not equivalent to
5181
5182 (mem:QI [%l7+a]) (mem:QI [%l7+a+1])
5183
5184 because [%l7+a+1] is interpreted as the address of (a+1). */
5185
5186
5187 static bool
sparc_mode_dependent_address_p(const_rtx addr,addr_space_t as ATTRIBUTE_UNUSED)5188 sparc_mode_dependent_address_p (const_rtx addr,
5189 addr_space_t as ATTRIBUTE_UNUSED)
5190 {
5191 if (GET_CODE (addr) == PLUS
5192 && sparc_pic_register_p (XEXP (addr, 0))
5193 && symbolic_operand (XEXP (addr, 1), VOIDmode))
5194 return true;
5195
5196 return false;
5197 }
5198
5199 /* Emit a call instruction with the pattern given by PAT. ADDR is the
5200 address of the call target. */
5201
5202 void
sparc_emit_call_insn(rtx pat,rtx addr)5203 sparc_emit_call_insn (rtx pat, rtx addr)
5204 {
5205 rtx_insn *insn;
5206
5207 insn = emit_call_insn (pat);
5208
5209 /* The PIC register is live on entry to VxWorks PIC PLT entries. */
5210 if (TARGET_VXWORKS_RTP
5211 && flag_pic
5212 && GET_CODE (addr) == SYMBOL_REF
5213 && (SYMBOL_REF_DECL (addr)
5214 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
5215 : !SYMBOL_REF_LOCAL_P (addr)))
5216 {
5217 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx);
5218 crtl->uses_pic_offset_table = 1;
5219 }
5220 }
5221
5222 /* Return 1 if RTX is a MEM which is known to be aligned to at
5223 least a DESIRED byte boundary. */
5224
5225 int
mem_min_alignment(rtx mem,int desired)5226 mem_min_alignment (rtx mem, int desired)
5227 {
5228 rtx addr, base, offset;
5229
5230 /* If it's not a MEM we can't accept it. */
5231 if (GET_CODE (mem) != MEM)
5232 return 0;
5233
5234 /* Obviously... */
5235 if (!TARGET_UNALIGNED_DOUBLES
5236 && MEM_ALIGN (mem) / BITS_PER_UNIT >= (unsigned)desired)
5237 return 1;
5238
5239 /* ??? The rest of the function predates MEM_ALIGN so
5240 there is probably a bit of redundancy. */
5241 addr = XEXP (mem, 0);
5242 base = offset = NULL_RTX;
5243 if (GET_CODE (addr) == PLUS)
5244 {
5245 if (GET_CODE (XEXP (addr, 0)) == REG)
5246 {
5247 base = XEXP (addr, 0);
5248
5249 /* What we are saying here is that if the base
5250 REG is aligned properly, the compiler will make
5251 sure any REG based index upon it will be so
5252 as well. */
5253 if (GET_CODE (XEXP (addr, 1)) == CONST_INT)
5254 offset = XEXP (addr, 1);
5255 else
5256 offset = const0_rtx;
5257 }
5258 }
5259 else if (GET_CODE (addr) == REG)
5260 {
5261 base = addr;
5262 offset = const0_rtx;
5263 }
5264
5265 if (base != NULL_RTX)
5266 {
5267 int regno = REGNO (base);
5268
5269 if (regno != HARD_FRAME_POINTER_REGNUM && regno != STACK_POINTER_REGNUM)
5270 {
5271 /* Check if the compiler has recorded some information
5272 about the alignment of the base REG. If reload has
5273 completed, we already matched with proper alignments.
5274 If not running global_alloc, reload might give us
5275 unaligned pointer to local stack though. */
5276 if (((cfun != 0
5277 && REGNO_POINTER_ALIGN (regno) >= desired * BITS_PER_UNIT)
5278 || (optimize && reload_completed))
5279 && (INTVAL (offset) & (desired - 1)) == 0)
5280 return 1;
5281 }
5282 else
5283 {
5284 if (((INTVAL (offset) - SPARC_STACK_BIAS) & (desired - 1)) == 0)
5285 return 1;
5286 }
5287 }
5288 else if (! TARGET_UNALIGNED_DOUBLES
5289 || CONSTANT_P (addr)
5290 || GET_CODE (addr) == LO_SUM)
5291 {
5292 /* Anything else we know is properly aligned unless TARGET_UNALIGNED_DOUBLES
5293 is true, in which case we can only assume that an access is aligned if
5294 it is to a constant address, or the address involves a LO_SUM. */
5295 return 1;
5296 }
5297
5298 /* An obviously unaligned address. */
5299 return 0;
5300 }
5301
5302
5303 /* Vectors to keep interesting information about registers where it can easily
5304 be got. We used to use the actual mode value as the bit number, but there
5305 are more than 32 modes now. Instead we use two tables: one indexed by
5306 hard register number, and one indexed by mode. */
5307
5308 /* The purpose of sparc_mode_class is to shrink the range of modes so that
5309 they all fit (as bit numbers) in a 32-bit word (again). Each real mode is
5310 mapped into one sparc_mode_class mode. */
5311
5312 enum sparc_mode_class {
5313 H_MODE, S_MODE, D_MODE, T_MODE, O_MODE,
5314 SF_MODE, DF_MODE, TF_MODE, OF_MODE,
5315 CC_MODE, CCFP_MODE
5316 };
5317
5318 /* Modes for single-word and smaller quantities. */
5319 #define S_MODES \
5320 ((1 << (int) H_MODE) | (1 << (int) S_MODE) | (1 << (int) SF_MODE))
5321
5322 /* Modes for double-word and smaller quantities. */
5323 #define D_MODES (S_MODES | (1 << (int) D_MODE) | (1 << (int) DF_MODE))
5324
5325 /* Modes for quad-word and smaller quantities. */
5326 #define T_MODES (D_MODES | (1 << (int) T_MODE) | (1 << (int) TF_MODE))
5327
5328 /* Modes for 8-word and smaller quantities. */
5329 #define O_MODES (T_MODES | (1 << (int) O_MODE) | (1 << (int) OF_MODE))
5330
5331 /* Modes for single-float quantities. */
5332 #define SF_MODES ((1 << (int) S_MODE) | (1 << (int) SF_MODE))
5333
5334 /* Modes for double-float and smaller quantities. */
5335 #define DF_MODES (SF_MODES | (1 << (int) D_MODE) | (1 << (int) DF_MODE))
5336
5337 /* Modes for quad-float and smaller quantities. */
5338 #define TF_MODES (DF_MODES | (1 << (int) TF_MODE))
5339
5340 /* Modes for quad-float pairs and smaller quantities. */
5341 #define OF_MODES (TF_MODES | (1 << (int) OF_MODE))
5342
5343 /* Modes for double-float only quantities. */
5344 #define DF_MODES_NO_S ((1 << (int) D_MODE) | (1 << (int) DF_MODE))
5345
5346 /* Modes for quad-float and double-float only quantities. */
5347 #define TF_MODES_NO_S (DF_MODES_NO_S | (1 << (int) TF_MODE))
5348
5349 /* Modes for quad-float pairs and double-float only quantities. */
5350 #define OF_MODES_NO_S (TF_MODES_NO_S | (1 << (int) OF_MODE))
5351
5352 /* Modes for condition codes. */
5353 #define CC_MODES (1 << (int) CC_MODE)
5354 #define CCFP_MODES (1 << (int) CCFP_MODE)
5355
5356 /* Value is 1 if register/mode pair is acceptable on sparc.
5357
5358 The funny mixture of D and T modes is because integer operations
5359 do not specially operate on tetra quantities, so non-quad-aligned
5360 registers can hold quadword quantities (except %o4 and %i4 because
5361 they cross fixed registers).
5362
5363 ??? Note that, despite the settings, non-double-aligned parameter
5364 registers can hold double-word quantities in 32-bit mode. */
5365
5366 /* This points to either the 32-bit or the 64-bit version. */
5367 static const int *hard_regno_mode_classes;
5368
5369 static const int hard_32bit_mode_classes[] = {
5370 S_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES,
5371 T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES,
5372 T_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES,
5373 T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES,
5374
5375 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
5376 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
5377 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
5378 OF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
5379
5380 /* FP regs f32 to f63. Only the even numbered registers actually exist,
5381 and none can hold SFmode/SImode values. */
5382 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5383 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5384 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5385 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, TF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5386
5387 /* %fcc[0123] */
5388 CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES,
5389
5390 /* %icc, %sfp, %gsr */
5391 CC_MODES, 0, D_MODES
5392 };
5393
5394 static const int hard_64bit_mode_classes[] = {
5395 D_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
5396 O_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
5397 T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
5398 O_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
5399
5400 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
5401 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
5402 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
5403 OF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
5404
5405 /* FP regs f32 to f63. Only the even numbered registers actually exist,
5406 and none can hold SFmode/SImode values. */
5407 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5408 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5409 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5410 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, TF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5411
5412 /* %fcc[0123] */
5413 CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES,
5414
5415 /* %icc, %sfp, %gsr */
5416 CC_MODES, 0, D_MODES
5417 };
5418
5419 static int sparc_mode_class [NUM_MACHINE_MODES];
5420
5421 enum reg_class sparc_regno_reg_class[FIRST_PSEUDO_REGISTER];
5422
5423 static void
sparc_init_modes(void)5424 sparc_init_modes (void)
5425 {
5426 int i;
5427
5428 for (i = 0; i < NUM_MACHINE_MODES; i++)
5429 {
5430 machine_mode m = (machine_mode) i;
5431 unsigned int size = GET_MODE_SIZE (m);
5432
5433 switch (GET_MODE_CLASS (m))
5434 {
5435 case MODE_INT:
5436 case MODE_PARTIAL_INT:
5437 case MODE_COMPLEX_INT:
5438 if (size < 4)
5439 sparc_mode_class[i] = 1 << (int) H_MODE;
5440 else if (size == 4)
5441 sparc_mode_class[i] = 1 << (int) S_MODE;
5442 else if (size == 8)
5443 sparc_mode_class[i] = 1 << (int) D_MODE;
5444 else if (size == 16)
5445 sparc_mode_class[i] = 1 << (int) T_MODE;
5446 else if (size == 32)
5447 sparc_mode_class[i] = 1 << (int) O_MODE;
5448 else
5449 sparc_mode_class[i] = 0;
5450 break;
5451 case MODE_VECTOR_INT:
5452 if (size == 4)
5453 sparc_mode_class[i] = 1 << (int) SF_MODE;
5454 else if (size == 8)
5455 sparc_mode_class[i] = 1 << (int) DF_MODE;
5456 else
5457 sparc_mode_class[i] = 0;
5458 break;
5459 case MODE_FLOAT:
5460 case MODE_COMPLEX_FLOAT:
5461 if (size == 4)
5462 sparc_mode_class[i] = 1 << (int) SF_MODE;
5463 else if (size == 8)
5464 sparc_mode_class[i] = 1 << (int) DF_MODE;
5465 else if (size == 16)
5466 sparc_mode_class[i] = 1 << (int) TF_MODE;
5467 else if (size == 32)
5468 sparc_mode_class[i] = 1 << (int) OF_MODE;
5469 else
5470 sparc_mode_class[i] = 0;
5471 break;
5472 case MODE_CC:
5473 if (m == CCFPmode || m == CCFPEmode)
5474 sparc_mode_class[i] = 1 << (int) CCFP_MODE;
5475 else
5476 sparc_mode_class[i] = 1 << (int) CC_MODE;
5477 break;
5478 default:
5479 sparc_mode_class[i] = 0;
5480 break;
5481 }
5482 }
5483
5484 if (TARGET_ARCH64)
5485 hard_regno_mode_classes = hard_64bit_mode_classes;
5486 else
5487 hard_regno_mode_classes = hard_32bit_mode_classes;
5488
5489 /* Initialize the array used by REGNO_REG_CLASS. */
5490 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
5491 {
5492 if (i < 16 && TARGET_V8PLUS)
5493 sparc_regno_reg_class[i] = I64_REGS;
5494 else if (i < 32 || i == FRAME_POINTER_REGNUM)
5495 sparc_regno_reg_class[i] = GENERAL_REGS;
5496 else if (i < 64)
5497 sparc_regno_reg_class[i] = FP_REGS;
5498 else if (i < 96)
5499 sparc_regno_reg_class[i] = EXTRA_FP_REGS;
5500 else if (i < 100)
5501 sparc_regno_reg_class[i] = FPCC_REGS;
5502 else
5503 sparc_regno_reg_class[i] = NO_REGS;
5504 }
5505 }
5506
5507 /* Return whether REGNO, a global or FP register, must be saved/restored. */
5508
5509 static inline bool
save_global_or_fp_reg_p(unsigned int regno,int leaf_function ATTRIBUTE_UNUSED)5510 save_global_or_fp_reg_p (unsigned int regno,
5511 int leaf_function ATTRIBUTE_UNUSED)
5512 {
5513 return !call_used_or_fixed_reg_p (regno) && df_regs_ever_live_p (regno);
5514 }
5515
5516 /* Return whether the return address register (%i7) is needed. */
5517
5518 static inline bool
return_addr_reg_needed_p(int leaf_function)5519 return_addr_reg_needed_p (int leaf_function)
5520 {
5521 /* If it is live, for example because of __builtin_return_address (0). */
5522 if (df_regs_ever_live_p (RETURN_ADDR_REGNUM))
5523 return true;
5524
5525 /* Otherwise, it is needed as save register if %o7 is clobbered. */
5526 if (!leaf_function
5527 /* Loading the GOT register clobbers %o7. */
5528 || crtl->uses_pic_offset_table
5529 || df_regs_ever_live_p (INCOMING_RETURN_ADDR_REGNUM))
5530 return true;
5531
5532 return false;
5533 }
5534
5535 /* Return whether REGNO, a local or in register, must be saved/restored. */
5536
5537 static bool
save_local_or_in_reg_p(unsigned int regno,int leaf_function)5538 save_local_or_in_reg_p (unsigned int regno, int leaf_function)
5539 {
5540 /* General case: call-saved registers live at some point. */
5541 if (!call_used_or_fixed_reg_p (regno) && df_regs_ever_live_p (regno))
5542 return true;
5543
5544 /* Frame pointer register (%fp) if needed. */
5545 if (regno == HARD_FRAME_POINTER_REGNUM && frame_pointer_needed)
5546 return true;
5547
5548 /* Return address register (%i7) if needed. */
5549 if (regno == RETURN_ADDR_REGNUM && return_addr_reg_needed_p (leaf_function))
5550 return true;
5551
5552 /* GOT register (%l7) if needed. */
5553 if (got_register_rtx && regno == REGNO (got_register_rtx))
5554 return true;
5555
5556 /* If the function accesses prior frames, the frame pointer and the return
5557 address of the previous frame must be saved on the stack. */
5558 if (crtl->accesses_prior_frames
5559 && (regno == HARD_FRAME_POINTER_REGNUM || regno == RETURN_ADDR_REGNUM))
5560 return true;
5561
5562 return false;
5563 }
5564
5565 /* Compute the frame size required by the function. This function is called
5566 during the reload pass and also by sparc_expand_prologue. */
5567
5568 static HOST_WIDE_INT
sparc_compute_frame_size(HOST_WIDE_INT size,int leaf_function)5569 sparc_compute_frame_size (HOST_WIDE_INT size, int leaf_function)
5570 {
5571 HOST_WIDE_INT frame_size, apparent_frame_size;
5572 int args_size, n_global_fp_regs = 0;
5573 bool save_local_in_regs_p = false;
5574 unsigned int i;
5575
5576 /* If the function allocates dynamic stack space, the dynamic offset is
5577 computed early and contains REG_PARM_STACK_SPACE, so we need to cope. */
5578 if (leaf_function && !cfun->calls_alloca)
5579 args_size = 0;
5580 else
5581 args_size = crtl->outgoing_args_size + REG_PARM_STACK_SPACE (cfun->decl);
5582
5583 /* Calculate space needed for global registers. */
5584 if (TARGET_ARCH64)
5585 {
5586 for (i = 0; i < 8; i++)
5587 if (save_global_or_fp_reg_p (i, 0))
5588 n_global_fp_regs += 2;
5589 }
5590 else
5591 {
5592 for (i = 0; i < 8; i += 2)
5593 if (save_global_or_fp_reg_p (i, 0)
5594 || save_global_or_fp_reg_p (i + 1, 0))
5595 n_global_fp_regs += 2;
5596 }
5597
5598 /* In the flat window model, find out which local and in registers need to
5599 be saved. We don't reserve space in the current frame for them as they
5600 will be spilled into the register window save area of the caller's frame.
5601 However, as soon as we use this register window save area, we must create
5602 that of the current frame to make it the live one. */
5603 if (TARGET_FLAT)
5604 for (i = 16; i < 32; i++)
5605 if (save_local_or_in_reg_p (i, leaf_function))
5606 {
5607 save_local_in_regs_p = true;
5608 break;
5609 }
5610
5611 /* Calculate space needed for FP registers. */
5612 for (i = 32; i < (TARGET_V9 ? 96 : 64); i += 2)
5613 if (save_global_or_fp_reg_p (i, 0) || save_global_or_fp_reg_p (i + 1, 0))
5614 n_global_fp_regs += 2;
5615
5616 if (size == 0
5617 && n_global_fp_regs == 0
5618 && args_size == 0
5619 && !save_local_in_regs_p)
5620 frame_size = apparent_frame_size = 0;
5621 else
5622 {
5623 /* Start from the apparent frame size. */
5624 apparent_frame_size = ROUND_UP (size, 8) + n_global_fp_regs * 4;
5625
5626 /* We need to add the size of the outgoing argument area. */
5627 frame_size = apparent_frame_size + ROUND_UP (args_size, 8);
5628
5629 /* And that of the register window save area. */
5630 frame_size += FIRST_PARM_OFFSET (cfun->decl);
5631
5632 /* Finally, bump to the appropriate alignment. */
5633 frame_size = SPARC_STACK_ALIGN (frame_size);
5634 }
5635
5636 /* Set up values for use in prologue and epilogue. */
5637 sparc_frame_size = frame_size;
5638 sparc_apparent_frame_size = apparent_frame_size;
5639 sparc_n_global_fp_regs = n_global_fp_regs;
5640 sparc_save_local_in_regs_p = save_local_in_regs_p;
5641
5642 return frame_size;
5643 }
5644
5645 /* Implement the macro INITIAL_ELIMINATION_OFFSET, return the OFFSET. */
5646
5647 int
sparc_initial_elimination_offset(int to)5648 sparc_initial_elimination_offset (int to)
5649 {
5650 int offset;
5651
5652 if (to == STACK_POINTER_REGNUM)
5653 offset = sparc_compute_frame_size (get_frame_size (), crtl->is_leaf);
5654 else
5655 offset = 0;
5656
5657 offset += SPARC_STACK_BIAS;
5658 return offset;
5659 }
5660
5661 /* Output any necessary .register pseudo-ops. */
5662
5663 void
sparc_output_scratch_registers(FILE * file ATTRIBUTE_UNUSED)5664 sparc_output_scratch_registers (FILE *file ATTRIBUTE_UNUSED)
5665 {
5666 int i;
5667
5668 if (TARGET_ARCH32)
5669 return;
5670
5671 /* Check if %g[2367] were used without
5672 .register being printed for them already. */
5673 for (i = 2; i < 8; i++)
5674 {
5675 if (df_regs_ever_live_p (i)
5676 && ! sparc_hard_reg_printed [i])
5677 {
5678 sparc_hard_reg_printed [i] = 1;
5679 /* %g7 is used as TLS base register, use #ignore
5680 for it instead of #scratch. */
5681 fprintf (file, "\t.register\t%%g%d, #%s\n", i,
5682 i == 7 ? "ignore" : "scratch");
5683 }
5684 if (i == 3) i = 5;
5685 }
5686 }
5687
5688 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
5689
5690 #if PROBE_INTERVAL > 4096
5691 #error Cannot use indexed addressing mode for stack probing
5692 #endif
5693
5694 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
5695 inclusive. These are offsets from the current stack pointer.
5696
5697 Note that we don't use the REG+REG addressing mode for the probes because
5698 of the stack bias in 64-bit mode. And it doesn't really buy us anything
5699 so the advantages of having a single code win here. */
5700
5701 static void
sparc_emit_probe_stack_range(HOST_WIDE_INT first,HOST_WIDE_INT size)5702 sparc_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
5703 {
5704 rtx g1 = gen_rtx_REG (Pmode, 1);
5705
5706 /* See if we have a constant small number of probes to generate. If so,
5707 that's the easy case. */
5708 if (size <= PROBE_INTERVAL)
5709 {
5710 emit_move_insn (g1, GEN_INT (first));
5711 emit_insn (gen_rtx_SET (g1,
5712 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
5713 emit_stack_probe (plus_constant (Pmode, g1, -size));
5714 }
5715
5716 /* The run-time loop is made up of 9 insns in the generic case while the
5717 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */
5718 else if (size <= 4 * PROBE_INTERVAL)
5719 {
5720 HOST_WIDE_INT i;
5721
5722 emit_move_insn (g1, GEN_INT (first + PROBE_INTERVAL));
5723 emit_insn (gen_rtx_SET (g1,
5724 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
5725 emit_stack_probe (g1);
5726
5727 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
5728 it exceeds SIZE. If only two probes are needed, this will not
5729 generate any code. Then probe at FIRST + SIZE. */
5730 for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
5731 {
5732 emit_insn (gen_rtx_SET (g1,
5733 plus_constant (Pmode, g1, -PROBE_INTERVAL)));
5734 emit_stack_probe (g1);
5735 }
5736
5737 emit_stack_probe (plus_constant (Pmode, g1,
5738 (i - PROBE_INTERVAL) - size));
5739 }
5740
5741 /* Otherwise, do the same as above, but in a loop. Note that we must be
5742 extra careful with variables wrapping around because we might be at
5743 the very top (or the very bottom) of the address space and we have
5744 to be able to handle this case properly; in particular, we use an
5745 equality test for the loop condition. */
5746 else
5747 {
5748 HOST_WIDE_INT rounded_size;
5749 rtx g4 = gen_rtx_REG (Pmode, 4);
5750
5751 emit_move_insn (g1, GEN_INT (first));
5752
5753
5754 /* Step 1: round SIZE to the previous multiple of the interval. */
5755
5756 rounded_size = ROUND_DOWN (size, PROBE_INTERVAL);
5757 emit_move_insn (g4, GEN_INT (rounded_size));
5758
5759
5760 /* Step 2: compute initial and final value of the loop counter. */
5761
5762 /* TEST_ADDR = SP + FIRST. */
5763 emit_insn (gen_rtx_SET (g1,
5764 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
5765
5766 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
5767 emit_insn (gen_rtx_SET (g4, gen_rtx_MINUS (Pmode, g1, g4)));
5768
5769
5770 /* Step 3: the loop
5771
5772 while (TEST_ADDR != LAST_ADDR)
5773 {
5774 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
5775 probe at TEST_ADDR
5776 }
5777
5778 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
5779 until it is equal to ROUNDED_SIZE. */
5780
5781 if (TARGET_ARCH64)
5782 emit_insn (gen_probe_stack_rangedi (g1, g1, g4));
5783 else
5784 emit_insn (gen_probe_stack_rangesi (g1, g1, g4));
5785
5786
5787 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
5788 that SIZE is equal to ROUNDED_SIZE. */
5789
5790 if (size != rounded_size)
5791 emit_stack_probe (plus_constant (Pmode, g4, rounded_size - size));
5792 }
5793
5794 /* Make sure nothing is scheduled before we are done. */
5795 emit_insn (gen_blockage ());
5796 }
5797
5798 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
5799 absolute addresses. */
5800
5801 const char *
output_probe_stack_range(rtx reg1,rtx reg2)5802 output_probe_stack_range (rtx reg1, rtx reg2)
5803 {
5804 static int labelno = 0;
5805 char loop_lab[32];
5806 rtx xops[2];
5807
5808 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
5809
5810 /* Loop. */
5811 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
5812
5813 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
5814 xops[0] = reg1;
5815 xops[1] = GEN_INT (-PROBE_INTERVAL);
5816 output_asm_insn ("add\t%0, %1, %0", xops);
5817
5818 /* Test if TEST_ADDR == LAST_ADDR. */
5819 xops[1] = reg2;
5820 output_asm_insn ("cmp\t%0, %1", xops);
5821
5822 /* Probe at TEST_ADDR and branch. */
5823 if (TARGET_ARCH64)
5824 fputs ("\tbne,pt\t%xcc,", asm_out_file);
5825 else
5826 fputs ("\tbne\t", asm_out_file);
5827 assemble_name_raw (asm_out_file, loop_lab);
5828 fputc ('\n', asm_out_file);
5829 xops[1] = GEN_INT (SPARC_STACK_BIAS);
5830 output_asm_insn (" st\t%%g0, [%0+%1]", xops);
5831
5832 return "";
5833 }
5834
5835 /* Emit code to save/restore registers from LOW to HIGH at BASE+OFFSET as
5836 needed. LOW is supposed to be double-word aligned for 32-bit registers.
5837 SAVE_P decides whether a register must be saved/restored. ACTION_TRUE
5838 is the action to be performed if SAVE_P returns true and ACTION_FALSE
5839 the action to be performed if it returns false. Return the new offset. */
5840
5841 typedef bool (*sorr_pred_t) (unsigned int, int);
5842 typedef enum { SORR_NONE, SORR_ADVANCE, SORR_SAVE, SORR_RESTORE } sorr_act_t;
5843
5844 static int
emit_save_or_restore_regs(unsigned int low,unsigned int high,rtx base,int offset,int leaf_function,sorr_pred_t save_p,sorr_act_t action_true,sorr_act_t action_false)5845 emit_save_or_restore_regs (unsigned int low, unsigned int high, rtx base,
5846 int offset, int leaf_function, sorr_pred_t save_p,
5847 sorr_act_t action_true, sorr_act_t action_false)
5848 {
5849 unsigned int i;
5850 rtx mem;
5851 rtx_insn *insn;
5852
5853 if (TARGET_ARCH64 && high <= 32)
5854 {
5855 int fp_offset = -1;
5856
5857 for (i = low; i < high; i++)
5858 {
5859 if (save_p (i, leaf_function))
5860 {
5861 mem = gen_frame_mem (DImode, plus_constant (Pmode,
5862 base, offset));
5863 if (action_true == SORR_SAVE)
5864 {
5865 insn = emit_move_insn (mem, gen_rtx_REG (DImode, i));
5866 RTX_FRAME_RELATED_P (insn) = 1;
5867 }
5868 else /* action_true == SORR_RESTORE */
5869 {
5870 /* The frame pointer must be restored last since its old
5871 value may be used as base address for the frame. This
5872 is problematic in 64-bit mode only because of the lack
5873 of double-word load instruction. */
5874 if (i == HARD_FRAME_POINTER_REGNUM)
5875 fp_offset = offset;
5876 else
5877 emit_move_insn (gen_rtx_REG (DImode, i), mem);
5878 }
5879 offset += 8;
5880 }
5881 else if (action_false == SORR_ADVANCE)
5882 offset += 8;
5883 }
5884
5885 if (fp_offset >= 0)
5886 {
5887 mem = gen_frame_mem (DImode, plus_constant (Pmode, base, fp_offset));
5888 emit_move_insn (hard_frame_pointer_rtx, mem);
5889 }
5890 }
5891 else
5892 {
5893 for (i = low; i < high; i += 2)
5894 {
5895 bool reg0 = save_p (i, leaf_function);
5896 bool reg1 = save_p (i + 1, leaf_function);
5897 machine_mode mode;
5898 int regno;
5899
5900 if (reg0 && reg1)
5901 {
5902 mode = SPARC_INT_REG_P (i) ? E_DImode : E_DFmode;
5903 regno = i;
5904 }
5905 else if (reg0)
5906 {
5907 mode = SPARC_INT_REG_P (i) ? E_SImode : E_SFmode;
5908 regno = i;
5909 }
5910 else if (reg1)
5911 {
5912 mode = SPARC_INT_REG_P (i) ? E_SImode : E_SFmode;
5913 regno = i + 1;
5914 offset += 4;
5915 }
5916 else
5917 {
5918 if (action_false == SORR_ADVANCE)
5919 offset += 8;
5920 continue;
5921 }
5922
5923 mem = gen_frame_mem (mode, plus_constant (Pmode, base, offset));
5924 if (action_true == SORR_SAVE)
5925 {
5926 insn = emit_move_insn (mem, gen_rtx_REG (mode, regno));
5927 RTX_FRAME_RELATED_P (insn) = 1;
5928 if (mode == DImode)
5929 {
5930 rtx set1, set2;
5931 mem = gen_frame_mem (SImode, plus_constant (Pmode, base,
5932 offset));
5933 set1 = gen_rtx_SET (mem, gen_rtx_REG (SImode, regno));
5934 RTX_FRAME_RELATED_P (set1) = 1;
5935 mem
5936 = gen_frame_mem (SImode, plus_constant (Pmode, base,
5937 offset + 4));
5938 set2 = gen_rtx_SET (mem, gen_rtx_REG (SImode, regno + 1));
5939 RTX_FRAME_RELATED_P (set2) = 1;
5940 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
5941 gen_rtx_PARALLEL (VOIDmode,
5942 gen_rtvec (2, set1, set2)));
5943 }
5944 }
5945 else /* action_true == SORR_RESTORE */
5946 emit_move_insn (gen_rtx_REG (mode, regno), mem);
5947
5948 /* Bump and round down to double word
5949 in case we already bumped by 4. */
5950 offset = ROUND_DOWN (offset + 8, 8);
5951 }
5952 }
5953
5954 return offset;
5955 }
5956
5957 /* Emit code to adjust BASE to OFFSET. Return the new base. */
5958
5959 static rtx
emit_adjust_base_to_offset(rtx base,int offset)5960 emit_adjust_base_to_offset (rtx base, int offset)
5961 {
5962 /* ??? This might be optimized a little as %g1 might already have a
5963 value close enough that a single add insn will do. */
5964 /* ??? Although, all of this is probably only a temporary fix because
5965 if %g1 can hold a function result, then sparc_expand_epilogue will
5966 lose (the result will be clobbered). */
5967 rtx new_base = gen_rtx_REG (Pmode, 1);
5968 emit_move_insn (new_base, GEN_INT (offset));
5969 emit_insn (gen_rtx_SET (new_base, gen_rtx_PLUS (Pmode, base, new_base)));
5970 return new_base;
5971 }
5972
5973 /* Emit code to save/restore call-saved global and FP registers. */
5974
5975 static void
emit_save_or_restore_global_fp_regs(rtx base,int offset,sorr_act_t action)5976 emit_save_or_restore_global_fp_regs (rtx base, int offset, sorr_act_t action)
5977 {
5978 if (offset < -4096 || offset + sparc_n_global_fp_regs * 4 > 4095)
5979 {
5980 base = emit_adjust_base_to_offset (base, offset);
5981 offset = 0;
5982 }
5983
5984 offset
5985 = emit_save_or_restore_regs (0, 8, base, offset, 0,
5986 save_global_or_fp_reg_p, action, SORR_NONE);
5987 emit_save_or_restore_regs (32, TARGET_V9 ? 96 : 64, base, offset, 0,
5988 save_global_or_fp_reg_p, action, SORR_NONE);
5989 }
5990
5991 /* Emit code to save/restore call-saved local and in registers. */
5992
5993 static void
emit_save_or_restore_local_in_regs(rtx base,int offset,sorr_act_t action)5994 emit_save_or_restore_local_in_regs (rtx base, int offset, sorr_act_t action)
5995 {
5996 if (offset < -4096 || offset + 16 * UNITS_PER_WORD > 4095)
5997 {
5998 base = emit_adjust_base_to_offset (base, offset);
5999 offset = 0;
6000 }
6001
6002 emit_save_or_restore_regs (16, 32, base, offset, sparc_leaf_function_p,
6003 save_local_or_in_reg_p, action, SORR_ADVANCE);
6004 }
6005
6006 /* Emit a window_save insn. */
6007
6008 static rtx_insn *
emit_window_save(rtx increment)6009 emit_window_save (rtx increment)
6010 {
6011 rtx_insn *insn = emit_insn (gen_window_save (increment));
6012 RTX_FRAME_RELATED_P (insn) = 1;
6013
6014 /* The incoming return address (%o7) is saved in %i7. */
6015 add_reg_note (insn, REG_CFA_REGISTER,
6016 gen_rtx_SET (gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM),
6017 gen_rtx_REG (Pmode,
6018 INCOMING_RETURN_ADDR_REGNUM)));
6019
6020 /* The window save event. */
6021 add_reg_note (insn, REG_CFA_WINDOW_SAVE, const0_rtx);
6022
6023 /* The CFA is %fp, the hard frame pointer. */
6024 add_reg_note (insn, REG_CFA_DEF_CFA,
6025 plus_constant (Pmode, hard_frame_pointer_rtx,
6026 INCOMING_FRAME_SP_OFFSET));
6027
6028 return insn;
6029 }
6030
6031 /* Generate an increment for the stack pointer. */
6032
6033 static rtx
gen_stack_pointer_inc(rtx increment)6034 gen_stack_pointer_inc (rtx increment)
6035 {
6036 return gen_rtx_SET (stack_pointer_rtx,
6037 gen_rtx_PLUS (Pmode,
6038 stack_pointer_rtx,
6039 increment));
6040 }
6041
6042 /* Expand the function prologue. The prologue is responsible for reserving
6043 storage for the frame, saving the call-saved registers and loading the
6044 GOT register if needed. */
6045
6046 void
sparc_expand_prologue(void)6047 sparc_expand_prologue (void)
6048 {
6049 HOST_WIDE_INT size;
6050 rtx_insn *insn;
6051
6052 /* Compute a snapshot of crtl->uses_only_leaf_regs. Relying
6053 on the final value of the flag means deferring the prologue/epilogue
6054 expansion until just before the second scheduling pass, which is too
6055 late to emit multiple epilogues or return insns.
6056
6057 Of course we are making the assumption that the value of the flag
6058 will not change between now and its final value. Of the three parts
6059 of the formula, only the last one can reasonably vary. Let's take a
6060 closer look, after assuming that the first two ones are set to true
6061 (otherwise the last value is effectively silenced).
6062
6063 If only_leaf_regs_used returns false, the global predicate will also
6064 be false so the actual frame size calculated below will be positive.
6065 As a consequence, the save_register_window insn will be emitted in
6066 the instruction stream; now this insn explicitly references %fp
6067 which is not a leaf register so only_leaf_regs_used will always
6068 return false subsequently.
6069
6070 If only_leaf_regs_used returns true, we hope that the subsequent
6071 optimization passes won't cause non-leaf registers to pop up. For
6072 example, the regrename pass has special provisions to not rename to
6073 non-leaf registers in a leaf function. */
6074 sparc_leaf_function_p
6075 = optimize > 0 && crtl->is_leaf && only_leaf_regs_used ();
6076
6077 size = sparc_compute_frame_size (get_frame_size(), sparc_leaf_function_p);
6078
6079 if (flag_stack_usage_info)
6080 current_function_static_stack_size = size;
6081
6082 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
6083 || flag_stack_clash_protection)
6084 {
6085 if (crtl->is_leaf && !cfun->calls_alloca)
6086 {
6087 if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
6088 sparc_emit_probe_stack_range (get_stack_check_protect (),
6089 size - get_stack_check_protect ());
6090 }
6091 else if (size > 0)
6092 sparc_emit_probe_stack_range (get_stack_check_protect (), size);
6093 }
6094
6095 if (size == 0)
6096 ; /* do nothing. */
6097 else if (sparc_leaf_function_p)
6098 {
6099 rtx size_int_rtx = GEN_INT (-size);
6100
6101 if (size <= 4096)
6102 insn = emit_insn (gen_stack_pointer_inc (size_int_rtx));
6103 else if (size <= 8192)
6104 {
6105 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (-4096)));
6106 RTX_FRAME_RELATED_P (insn) = 1;
6107
6108 /* %sp is still the CFA register. */
6109 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
6110 }
6111 else
6112 {
6113 rtx size_rtx = gen_rtx_REG (Pmode, 1);
6114 emit_move_insn (size_rtx, size_int_rtx);
6115 insn = emit_insn (gen_stack_pointer_inc (size_rtx));
6116 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
6117 gen_stack_pointer_inc (size_int_rtx));
6118 }
6119
6120 RTX_FRAME_RELATED_P (insn) = 1;
6121 }
6122 else
6123 {
6124 rtx size_int_rtx = GEN_INT (-size);
6125
6126 if (size <= 4096)
6127 emit_window_save (size_int_rtx);
6128 else if (size <= 8192)
6129 {
6130 emit_window_save (GEN_INT (-4096));
6131
6132 /* %sp is not the CFA register anymore. */
6133 emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
6134
6135 /* Make sure no %fp-based store is issued until after the frame is
6136 established. The offset between the frame pointer and the stack
6137 pointer is calculated relative to the value of the stack pointer
6138 at the end of the function prologue, and moving instructions that
6139 access the stack via the frame pointer between the instructions
6140 that decrement the stack pointer could result in accessing the
6141 register window save area, which is volatile. */
6142 emit_insn (gen_frame_blockage ());
6143 }
6144 else
6145 {
6146 rtx size_rtx = gen_rtx_REG (Pmode, 1);
6147 emit_move_insn (size_rtx, size_int_rtx);
6148 emit_window_save (size_rtx);
6149 }
6150 }
6151
6152 if (sparc_leaf_function_p)
6153 {
6154 sparc_frame_base_reg = stack_pointer_rtx;
6155 sparc_frame_base_offset = size + SPARC_STACK_BIAS;
6156 }
6157 else
6158 {
6159 sparc_frame_base_reg = hard_frame_pointer_rtx;
6160 sparc_frame_base_offset = SPARC_STACK_BIAS;
6161 }
6162
6163 if (sparc_n_global_fp_regs > 0)
6164 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
6165 sparc_frame_base_offset
6166 - sparc_apparent_frame_size,
6167 SORR_SAVE);
6168
6169 /* Advertise that the data calculated just above are now valid. */
6170 sparc_prologue_data_valid_p = true;
6171 }
6172
6173 /* Expand the function prologue. The prologue is responsible for reserving
6174 storage for the frame, saving the call-saved registers and loading the
6175 GOT register if needed. */
6176
6177 void
sparc_flat_expand_prologue(void)6178 sparc_flat_expand_prologue (void)
6179 {
6180 HOST_WIDE_INT size;
6181 rtx_insn *insn;
6182
6183 sparc_leaf_function_p = optimize > 0 && crtl->is_leaf;
6184
6185 size = sparc_compute_frame_size (get_frame_size(), sparc_leaf_function_p);
6186
6187 if (flag_stack_usage_info)
6188 current_function_static_stack_size = size;
6189
6190 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
6191 || flag_stack_clash_protection)
6192 {
6193 if (crtl->is_leaf && !cfun->calls_alloca)
6194 {
6195 if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
6196 sparc_emit_probe_stack_range (get_stack_check_protect (),
6197 size - get_stack_check_protect ());
6198 }
6199 else if (size > 0)
6200 sparc_emit_probe_stack_range (get_stack_check_protect (), size);
6201 }
6202
6203 if (sparc_save_local_in_regs_p)
6204 emit_save_or_restore_local_in_regs (stack_pointer_rtx, SPARC_STACK_BIAS,
6205 SORR_SAVE);
6206
6207 if (size == 0)
6208 ; /* do nothing. */
6209 else
6210 {
6211 rtx size_int_rtx, size_rtx;
6212
6213 size_rtx = size_int_rtx = GEN_INT (-size);
6214
6215 /* We establish the frame (i.e. decrement the stack pointer) first, even
6216 if we use a frame pointer, because we cannot clobber any call-saved
6217 registers, including the frame pointer, if we haven't created a new
6218 register save area, for the sake of compatibility with the ABI. */
6219 if (size <= 4096)
6220 insn = emit_insn (gen_stack_pointer_inc (size_int_rtx));
6221 else if (size <= 8192 && !frame_pointer_needed)
6222 {
6223 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (-4096)));
6224 RTX_FRAME_RELATED_P (insn) = 1;
6225 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
6226 }
6227 else
6228 {
6229 size_rtx = gen_rtx_REG (Pmode, 1);
6230 emit_move_insn (size_rtx, size_int_rtx);
6231 insn = emit_insn (gen_stack_pointer_inc (size_rtx));
6232 add_reg_note (insn, REG_CFA_ADJUST_CFA,
6233 gen_stack_pointer_inc (size_int_rtx));
6234 }
6235 RTX_FRAME_RELATED_P (insn) = 1;
6236
6237 /* Ensure nothing is scheduled until after the frame is established. */
6238 emit_insn (gen_blockage ());
6239
6240 if (frame_pointer_needed)
6241 {
6242 insn = emit_insn (gen_rtx_SET (hard_frame_pointer_rtx,
6243 gen_rtx_MINUS (Pmode,
6244 stack_pointer_rtx,
6245 size_rtx)));
6246 RTX_FRAME_RELATED_P (insn) = 1;
6247
6248 add_reg_note (insn, REG_CFA_ADJUST_CFA,
6249 gen_rtx_SET (hard_frame_pointer_rtx,
6250 plus_constant (Pmode, stack_pointer_rtx,
6251 size)));
6252 }
6253
6254 if (return_addr_reg_needed_p (sparc_leaf_function_p))
6255 {
6256 rtx o7 = gen_rtx_REG (Pmode, INCOMING_RETURN_ADDR_REGNUM);
6257 rtx i7 = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
6258
6259 insn = emit_move_insn (i7, o7);
6260 RTX_FRAME_RELATED_P (insn) = 1;
6261
6262 add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (i7, o7));
6263
6264 /* Prevent this instruction from ever being considered dead,
6265 even if this function has no epilogue. */
6266 emit_use (i7);
6267 }
6268 }
6269
6270 if (frame_pointer_needed)
6271 {
6272 sparc_frame_base_reg = hard_frame_pointer_rtx;
6273 sparc_frame_base_offset = SPARC_STACK_BIAS;
6274 }
6275 else
6276 {
6277 sparc_frame_base_reg = stack_pointer_rtx;
6278 sparc_frame_base_offset = size + SPARC_STACK_BIAS;
6279 }
6280
6281 if (sparc_n_global_fp_regs > 0)
6282 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
6283 sparc_frame_base_offset
6284 - sparc_apparent_frame_size,
6285 SORR_SAVE);
6286
6287 /* Advertise that the data calculated just above are now valid. */
6288 sparc_prologue_data_valid_p = true;
6289 }
6290
6291 /* This function generates the assembly code for function entry, which boils
6292 down to emitting the necessary .register directives. */
6293
6294 static void
sparc_asm_function_prologue(FILE * file)6295 sparc_asm_function_prologue (FILE *file)
6296 {
6297 /* Check that the assumption we made in sparc_expand_prologue is valid. */
6298 if (!TARGET_FLAT)
6299 gcc_assert (sparc_leaf_function_p == crtl->uses_only_leaf_regs);
6300
6301 sparc_output_scratch_registers (file);
6302 }
6303
6304 /* Expand the function epilogue, either normal or part of a sibcall.
6305 We emit all the instructions except the return or the call. */
6306
6307 void
sparc_expand_epilogue(bool for_eh)6308 sparc_expand_epilogue (bool for_eh)
6309 {
6310 HOST_WIDE_INT size = sparc_frame_size;
6311
6312 if (cfun->calls_alloca)
6313 emit_insn (gen_frame_blockage ());
6314
6315 if (sparc_n_global_fp_regs > 0)
6316 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
6317 sparc_frame_base_offset
6318 - sparc_apparent_frame_size,
6319 SORR_RESTORE);
6320
6321 if (size == 0 || for_eh)
6322 ; /* do nothing. */
6323 else if (sparc_leaf_function_p)
6324 {
6325 if (size <= 4096)
6326 emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
6327 else if (size <= 8192)
6328 {
6329 emit_insn (gen_stack_pointer_inc (GEN_INT (4096)));
6330 emit_insn (gen_stack_pointer_inc (GEN_INT (size - 4096)));
6331 }
6332 else
6333 {
6334 rtx reg = gen_rtx_REG (Pmode, 1);
6335 emit_move_insn (reg, GEN_INT (size));
6336 emit_insn (gen_stack_pointer_inc (reg));
6337 }
6338 }
6339 }
6340
6341 /* Expand the function epilogue, either normal or part of a sibcall.
6342 We emit all the instructions except the return or the call. */
6343
6344 void
sparc_flat_expand_epilogue(bool for_eh)6345 sparc_flat_expand_epilogue (bool for_eh)
6346 {
6347 HOST_WIDE_INT size = sparc_frame_size;
6348
6349 if (sparc_n_global_fp_regs > 0)
6350 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
6351 sparc_frame_base_offset
6352 - sparc_apparent_frame_size,
6353 SORR_RESTORE);
6354
6355 /* If we have a frame pointer, we'll need both to restore it before the
6356 frame is destroyed and use its current value in destroying the frame.
6357 Since we don't have an atomic way to do that in the flat window model,
6358 we save the current value into a temporary register (%g1). */
6359 if (frame_pointer_needed && !for_eh)
6360 emit_move_insn (gen_rtx_REG (Pmode, 1), hard_frame_pointer_rtx);
6361
6362 if (return_addr_reg_needed_p (sparc_leaf_function_p))
6363 emit_move_insn (gen_rtx_REG (Pmode, INCOMING_RETURN_ADDR_REGNUM),
6364 gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM));
6365
6366 if (sparc_save_local_in_regs_p)
6367 emit_save_or_restore_local_in_regs (sparc_frame_base_reg,
6368 sparc_frame_base_offset,
6369 SORR_RESTORE);
6370
6371 if (size == 0 || for_eh)
6372 ; /* do nothing. */
6373 else if (frame_pointer_needed)
6374 {
6375 /* Make sure the frame is destroyed after everything else is done. */
6376 emit_insn (gen_blockage ());
6377
6378 emit_move_insn (stack_pointer_rtx, gen_rtx_REG (Pmode, 1));
6379 }
6380 else
6381 {
6382 /* Likewise. */
6383 emit_insn (gen_blockage ());
6384
6385 if (size <= 4096)
6386 emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
6387 else if (size <= 8192)
6388 {
6389 emit_insn (gen_stack_pointer_inc (GEN_INT (4096)));
6390 emit_insn (gen_stack_pointer_inc (GEN_INT (size - 4096)));
6391 }
6392 else
6393 {
6394 rtx reg = gen_rtx_REG (Pmode, 1);
6395 emit_move_insn (reg, GEN_INT (size));
6396 emit_insn (gen_stack_pointer_inc (reg));
6397 }
6398 }
6399 }
6400
6401 /* Return true if it is appropriate to emit `return' instructions in the
6402 body of a function. */
6403
6404 bool
sparc_can_use_return_insn_p(void)6405 sparc_can_use_return_insn_p (void)
6406 {
6407 return sparc_prologue_data_valid_p
6408 && sparc_n_global_fp_regs == 0
6409 && TARGET_FLAT
6410 ? (sparc_frame_size == 0 && !sparc_save_local_in_regs_p)
6411 : (sparc_frame_size == 0 || !sparc_leaf_function_p);
6412 }
6413
6414 /* This function generates the assembly code for function exit. */
6415
6416 static void
sparc_asm_function_epilogue(FILE * file)6417 sparc_asm_function_epilogue (FILE *file)
6418 {
6419 /* If the last two instructions of a function are "call foo; dslot;"
6420 the return address might point to the first instruction in the next
6421 function and we have to output a dummy nop for the sake of sane
6422 backtraces in such cases. This is pointless for sibling calls since
6423 the return address is explicitly adjusted. */
6424
6425 rtx_insn *insn = get_last_insn ();
6426
6427 rtx last_real_insn = prev_real_insn (insn);
6428 if (last_real_insn
6429 && NONJUMP_INSN_P (last_real_insn)
6430 && GET_CODE (PATTERN (last_real_insn)) == SEQUENCE)
6431 last_real_insn = XVECEXP (PATTERN (last_real_insn), 0, 0);
6432
6433 if (last_real_insn
6434 && CALL_P (last_real_insn)
6435 && !SIBLING_CALL_P (last_real_insn))
6436 fputs("\tnop\n", file);
6437
6438 sparc_output_deferred_case_vectors ();
6439 }
6440
6441 /* Output a 'restore' instruction. */
6442
6443 static void
output_restore(rtx pat)6444 output_restore (rtx pat)
6445 {
6446 rtx operands[3];
6447
6448 if (! pat)
6449 {
6450 fputs ("\t restore\n", asm_out_file);
6451 return;
6452 }
6453
6454 gcc_assert (GET_CODE (pat) == SET);
6455
6456 operands[0] = SET_DEST (pat);
6457 pat = SET_SRC (pat);
6458
6459 switch (GET_CODE (pat))
6460 {
6461 case PLUS:
6462 operands[1] = XEXP (pat, 0);
6463 operands[2] = XEXP (pat, 1);
6464 output_asm_insn (" restore %r1, %2, %Y0", operands);
6465 break;
6466 case LO_SUM:
6467 operands[1] = XEXP (pat, 0);
6468 operands[2] = XEXP (pat, 1);
6469 output_asm_insn (" restore %r1, %%lo(%a2), %Y0", operands);
6470 break;
6471 case ASHIFT:
6472 operands[1] = XEXP (pat, 0);
6473 gcc_assert (XEXP (pat, 1) == const1_rtx);
6474 output_asm_insn (" restore %r1, %r1, %Y0", operands);
6475 break;
6476 default:
6477 operands[1] = pat;
6478 output_asm_insn (" restore %%g0, %1, %Y0", operands);
6479 break;
6480 }
6481 }
6482
6483 /* Output a return. */
6484
6485 const char *
output_return(rtx_insn * insn)6486 output_return (rtx_insn *insn)
6487 {
6488 if (crtl->calls_eh_return)
6489 {
6490 /* If the function uses __builtin_eh_return, the eh_return
6491 machinery occupies the delay slot. */
6492 gcc_assert (!final_sequence);
6493
6494 if (flag_delayed_branch)
6495 {
6496 if (!TARGET_FLAT && TARGET_V9)
6497 fputs ("\treturn\t%i7+8\n", asm_out_file);
6498 else
6499 {
6500 if (!TARGET_FLAT)
6501 fputs ("\trestore\n", asm_out_file);
6502
6503 fputs ("\tjmp\t%o7+8\n", asm_out_file);
6504 }
6505
6506 fputs ("\t add\t%sp, %g1, %sp\n", asm_out_file);
6507 }
6508 else
6509 {
6510 if (!TARGET_FLAT)
6511 fputs ("\trestore\n", asm_out_file);
6512
6513 fputs ("\tadd\t%sp, %g1, %sp\n", asm_out_file);
6514 fputs ("\tjmp\t%o7+8\n\t nop\n", asm_out_file);
6515 }
6516 }
6517 else if (sparc_leaf_function_p || TARGET_FLAT)
6518 {
6519 /* This is a leaf or flat function so we don't have to bother restoring
6520 the register window, which frees us from dealing with the convoluted
6521 semantics of restore/return. We simply output the jump to the
6522 return address and the insn in the delay slot (if any). */
6523
6524 return "jmp\t%%o7+%)%#";
6525 }
6526 else
6527 {
6528 /* This is a regular function so we have to restore the register window.
6529 We may have a pending insn for the delay slot, which will be either
6530 combined with the 'restore' instruction or put in the delay slot of
6531 the 'return' instruction. */
6532
6533 if (final_sequence)
6534 {
6535 rtx_insn *delay;
6536 rtx pat;
6537
6538 delay = NEXT_INSN (insn);
6539 gcc_assert (delay);
6540
6541 pat = PATTERN (delay);
6542
6543 if (TARGET_V9 && ! epilogue_renumber (&pat, 1))
6544 {
6545 epilogue_renumber (&pat, 0);
6546 return "return\t%%i7+%)%#";
6547 }
6548 else
6549 {
6550 output_asm_insn ("jmp\t%%i7+%)", NULL);
6551
6552 /* We're going to output the insn in the delay slot manually.
6553 Make sure to output its source location first. */
6554 PATTERN (delay) = gen_blockage ();
6555 INSN_CODE (delay) = -1;
6556 final_scan_insn (delay, asm_out_file, optimize, 0, NULL);
6557 INSN_LOCATION (delay) = UNKNOWN_LOCATION;
6558
6559 output_restore (pat);
6560 }
6561 }
6562 else
6563 {
6564 /* The delay slot is empty. */
6565 if (TARGET_V9)
6566 return "return\t%%i7+%)\n\t nop";
6567 else if (flag_delayed_branch)
6568 return "jmp\t%%i7+%)\n\t restore";
6569 else
6570 return "restore\n\tjmp\t%%o7+%)\n\t nop";
6571 }
6572 }
6573
6574 return "";
6575 }
6576
6577 /* Output a sibling call. */
6578
6579 const char *
output_sibcall(rtx_insn * insn,rtx call_operand)6580 output_sibcall (rtx_insn *insn, rtx call_operand)
6581 {
6582 rtx operands[1];
6583
6584 gcc_assert (flag_delayed_branch);
6585
6586 operands[0] = call_operand;
6587
6588 if (sparc_leaf_function_p || TARGET_FLAT)
6589 {
6590 /* This is a leaf or flat function so we don't have to bother restoring
6591 the register window. We simply output the jump to the function and
6592 the insn in the delay slot (if any). */
6593
6594 gcc_assert (!(LEAF_SIBCALL_SLOT_RESERVED_P && final_sequence));
6595
6596 if (final_sequence)
6597 output_asm_insn ("sethi\t%%hi(%a0), %%g1\n\tjmp\t%%g1 + %%lo(%a0)%#",
6598 operands);
6599 else
6600 /* Use or with rs2 %%g0 instead of mov, so that as/ld can optimize
6601 it into branch if possible. */
6602 output_asm_insn ("or\t%%o7, %%g0, %%g1\n\tcall\t%a0, 0\n\t or\t%%g1, %%g0, %%o7",
6603 operands);
6604 }
6605 else
6606 {
6607 /* This is a regular function so we have to restore the register window.
6608 We may have a pending insn for the delay slot, which will be combined
6609 with the 'restore' instruction. */
6610
6611 output_asm_insn ("call\t%a0, 0", operands);
6612
6613 if (final_sequence)
6614 {
6615 rtx_insn *delay;
6616 rtx pat;
6617
6618 delay = NEXT_INSN (insn);
6619 gcc_assert (delay);
6620
6621 pat = PATTERN (delay);
6622
6623 /* We're going to output the insn in the delay slot manually.
6624 Make sure to output its source location first. */
6625 PATTERN (delay) = gen_blockage ();
6626 INSN_CODE (delay) = -1;
6627 final_scan_insn (delay, asm_out_file, optimize, 0, NULL);
6628 INSN_LOCATION (delay) = UNKNOWN_LOCATION;
6629
6630 output_restore (pat);
6631 }
6632 else
6633 output_restore (NULL_RTX);
6634 }
6635
6636 return "";
6637 }
6638
6639 /* Functions for handling argument passing.
6640
6641 For 32-bit, the first 6 args are normally in registers and the rest are
6642 pushed. Any arg that starts within the first 6 words is at least
6643 partially passed in a register unless its data type forbids.
6644
6645 For 64-bit, the argument registers are laid out as an array of 16 elements
6646 and arguments are added sequentially. The first 6 int args and up to the
6647 first 16 fp args (depending on size) are passed in regs.
6648
6649 Slot Stack Integral Float Float in structure Double Long Double
6650 ---- ----- -------- ----- ------------------ ------ -----------
6651 15 [SP+248] %f31 %f30,%f31 %d30
6652 14 [SP+240] %f29 %f28,%f29 %d28 %q28
6653 13 [SP+232] %f27 %f26,%f27 %d26
6654 12 [SP+224] %f25 %f24,%f25 %d24 %q24
6655 11 [SP+216] %f23 %f22,%f23 %d22
6656 10 [SP+208] %f21 %f20,%f21 %d20 %q20
6657 9 [SP+200] %f19 %f18,%f19 %d18
6658 8 [SP+192] %f17 %f16,%f17 %d16 %q16
6659 7 [SP+184] %f15 %f14,%f15 %d14
6660 6 [SP+176] %f13 %f12,%f13 %d12 %q12
6661 5 [SP+168] %o5 %f11 %f10,%f11 %d10
6662 4 [SP+160] %o4 %f9 %f8,%f9 %d8 %q8
6663 3 [SP+152] %o3 %f7 %f6,%f7 %d6
6664 2 [SP+144] %o2 %f5 %f4,%f5 %d4 %q4
6665 1 [SP+136] %o1 %f3 %f2,%f3 %d2
6666 0 [SP+128] %o0 %f1 %f0,%f1 %d0 %q0
6667
6668 Here SP = %sp if -mno-stack-bias or %sp+stack_bias otherwise.
6669
6670 Integral arguments are always passed as 64-bit quantities appropriately
6671 extended.
6672
6673 Passing of floating point values is handled as follows.
6674 If a prototype is in scope:
6675 If the value is in a named argument (i.e. not a stdarg function or a
6676 value not part of the `...') then the value is passed in the appropriate
6677 fp reg.
6678 If the value is part of the `...' and is passed in one of the first 6
6679 slots then the value is passed in the appropriate int reg.
6680 If the value is part of the `...' and is not passed in one of the first 6
6681 slots then the value is passed in memory.
6682 If a prototype is not in scope:
6683 If the value is one of the first 6 arguments the value is passed in the
6684 appropriate integer reg and the appropriate fp reg.
6685 If the value is not one of the first 6 arguments the value is passed in
6686 the appropriate fp reg and in memory.
6687
6688
6689 Summary of the calling conventions implemented by GCC on the SPARC:
6690
6691 32-bit ABI:
6692 size argument return value
6693
6694 small integer <4 int. reg. int. reg.
6695 word 4 int. reg. int. reg.
6696 double word 8 int. reg. int. reg.
6697
6698 _Complex small integer <8 int. reg. int. reg.
6699 _Complex word 8 int. reg. int. reg.
6700 _Complex double word 16 memory int. reg.
6701
6702 vector integer <=8 int. reg. FP reg.
6703 vector integer >8 memory memory
6704
6705 float 4 int. reg. FP reg.
6706 double 8 int. reg. FP reg.
6707 long double 16 memory memory
6708
6709 _Complex float 8 memory FP reg.
6710 _Complex double 16 memory FP reg.
6711 _Complex long double 32 memory FP reg.
6712
6713 vector float any memory memory
6714
6715 aggregate any memory memory
6716
6717
6718
6719 64-bit ABI:
6720 size argument return value
6721
6722 small integer <8 int. reg. int. reg.
6723 word 8 int. reg. int. reg.
6724 double word 16 int. reg. int. reg.
6725
6726 _Complex small integer <16 int. reg. int. reg.
6727 _Complex word 16 int. reg. int. reg.
6728 _Complex double word 32 memory int. reg.
6729
6730 vector integer <=16 FP reg. FP reg.
6731 vector integer 16<s<=32 memory FP reg.
6732 vector integer >32 memory memory
6733
6734 float 4 FP reg. FP reg.
6735 double 8 FP reg. FP reg.
6736 long double 16 FP reg. FP reg.
6737
6738 _Complex float 8 FP reg. FP reg.
6739 _Complex double 16 FP reg. FP reg.
6740 _Complex long double 32 memory FP reg.
6741
6742 vector float <=16 FP reg. FP reg.
6743 vector float 16<s<=32 memory FP reg.
6744 vector float >32 memory memory
6745
6746 aggregate <=16 reg. reg.
6747 aggregate 16<s<=32 memory reg.
6748 aggregate >32 memory memory
6749
6750
6751
6752 Note #1: complex floating-point types follow the extended SPARC ABIs as
6753 implemented by the Sun compiler.
6754
6755 Note #2: integer vector types follow the scalar floating-point types
6756 conventions to match what is implemented by the Sun VIS SDK.
6757
6758 Note #3: floating-point vector types follow the aggregate types
6759 conventions. */
6760
6761
6762 /* Maximum number of int regs for args. */
6763 #define SPARC_INT_ARG_MAX 6
6764 /* Maximum number of fp regs for args. */
6765 #define SPARC_FP_ARG_MAX 16
6766 /* Number of words (partially) occupied for a given size in units. */
6767 #define CEIL_NWORDS(SIZE) CEIL((SIZE), UNITS_PER_WORD)
6768
6769 /* Handle the INIT_CUMULATIVE_ARGS macro.
6770 Initialize a variable CUM of type CUMULATIVE_ARGS
6771 for a call to a function whose data type is FNTYPE.
6772 For a library call, FNTYPE is 0. */
6773
6774 void
init_cumulative_args(struct sparc_args * cum,tree fntype,rtx,tree)6775 init_cumulative_args (struct sparc_args *cum, tree fntype, rtx, tree)
6776 {
6777 cum->words = 0;
6778 cum->prototype_p = fntype && prototype_p (fntype);
6779 cum->libcall_p = !fntype;
6780 }
6781
6782 /* Handle promotion of pointer and integer arguments. */
6783
6784 static machine_mode
sparc_promote_function_mode(const_tree type,machine_mode mode,int * punsignedp,const_tree,int)6785 sparc_promote_function_mode (const_tree type, machine_mode mode,
6786 int *punsignedp, const_tree, int)
6787 {
6788 if (type && POINTER_TYPE_P (type))
6789 {
6790 *punsignedp = POINTERS_EXTEND_UNSIGNED;
6791 return Pmode;
6792 }
6793
6794 /* Integral arguments are passed as full words, as per the ABI. */
6795 if (GET_MODE_CLASS (mode) == MODE_INT
6796 && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
6797 return word_mode;
6798
6799 return mode;
6800 }
6801
6802 /* Handle the TARGET_STRICT_ARGUMENT_NAMING target hook. */
6803
6804 static bool
sparc_strict_argument_naming(cumulative_args_t ca ATTRIBUTE_UNUSED)6805 sparc_strict_argument_naming (cumulative_args_t ca ATTRIBUTE_UNUSED)
6806 {
6807 return TARGET_ARCH64 ? true : false;
6808 }
6809
6810 /* Handle the TARGET_PASS_BY_REFERENCE target hook.
6811 Specify whether to pass the argument by reference. */
6812
6813 static bool
sparc_pass_by_reference(cumulative_args_t,const function_arg_info & arg)6814 sparc_pass_by_reference (cumulative_args_t, const function_arg_info &arg)
6815 {
6816 tree type = arg.type;
6817 machine_mode mode = arg.mode;
6818 if (TARGET_ARCH32)
6819 /* Original SPARC 32-bit ABI says that structures and unions,
6820 and quad-precision floats are passed by reference.
6821 All other base types are passed in registers.
6822
6823 Extended ABI (as implemented by the Sun compiler) says that all
6824 complex floats are passed by reference. Pass complex integers
6825 in registers up to 8 bytes. More generally, enforce the 2-word
6826 cap for passing arguments in registers.
6827
6828 Vector ABI (as implemented by the Sun VIS SDK) says that integer
6829 vectors are passed like floats of the same size, that is in
6830 registers up to 8 bytes. Pass all vector floats by reference
6831 like structure and unions. */
6832 return ((type && (AGGREGATE_TYPE_P (type) || VECTOR_FLOAT_TYPE_P (type)))
6833 || mode == SCmode
6834 /* Catch CDImode, TFmode, DCmode and TCmode. */
6835 || GET_MODE_SIZE (mode) > 8
6836 || (type
6837 && VECTOR_TYPE_P (type)
6838 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8));
6839 else
6840 /* Original SPARC 64-bit ABI says that structures and unions
6841 smaller than 16 bytes are passed in registers, as well as
6842 all other base types.
6843
6844 Extended ABI (as implemented by the Sun compiler) says that
6845 complex floats are passed in registers up to 16 bytes. Pass
6846 all complex integers in registers up to 16 bytes. More generally,
6847 enforce the 2-word cap for passing arguments in registers.
6848
6849 Vector ABI (as implemented by the Sun VIS SDK) says that integer
6850 vectors are passed like floats of the same size, that is in
6851 registers (up to 16 bytes). Pass all vector floats like structure
6852 and unions. */
6853 return ((type
6854 && (AGGREGATE_TYPE_P (type) || VECTOR_TYPE_P (type))
6855 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 16)
6856 /* Catch CTImode and TCmode. */
6857 || GET_MODE_SIZE (mode) > 16);
6858 }
6859
6860 /* Traverse the record TYPE recursively and call FUNC on its fields.
6861 NAMED is true if this is for a named parameter. DATA is passed
6862 to FUNC for each field. OFFSET is the starting position and
6863 PACKED is true if we are inside a packed record. */
6864
6865 template <typename T, void Func (const_tree, int, bool, T*)>
6866 static void
6867 traverse_record_type (const_tree type, bool named, T *data,
6868 int offset = 0, bool packed = false)
6869 {
6870 /* The ABI obviously doesn't specify how packed structures are passed.
6871 These are passed in integer regs if possible, otherwise memory. */
6872 if (!packed)
6873 for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6874 if (TREE_CODE (field) == FIELD_DECL && DECL_PACKED (field))
6875 {
6876 packed = true;
6877 break;
6878 }
6879
6880 /* Walk the real fields, but skip those with no size or a zero size.
6881 ??? Fields with variable offset are handled as having zero offset. */
6882 for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6883 if (TREE_CODE (field) == FIELD_DECL)
6884 {
6885 if (!DECL_SIZE (field) || integer_zerop (DECL_SIZE (field)))
6886 continue;
6887
6888 int bitpos = offset;
6889 if (TREE_CODE (DECL_FIELD_OFFSET (field)) == INTEGER_CST)
6890 bitpos += int_bit_position (field);
6891
6892 tree field_type = TREE_TYPE (field);
6893 if (TREE_CODE (field_type) == RECORD_TYPE)
6894 traverse_record_type<T, Func> (field_type, named, data, bitpos,
6895 packed);
6896 else
6897 {
6898 const bool fp_type
6899 = FLOAT_TYPE_P (field_type) || VECTOR_TYPE_P (field_type);
6900 Func (field, bitpos, fp_type && named && !packed && TARGET_FPU,
6901 data);
6902 }
6903 }
6904 }
6905
6906 /* Handle recursive register classifying for structure layout. */
6907
6908 typedef struct
6909 {
6910 bool fp_regs; /* true if field eligible to FP registers. */
6911 bool fp_regs_in_first_word; /* true if such field in first word. */
6912 } classify_data_t;
6913
6914 /* A subroutine of function_arg_slotno. Classify the field. */
6915
6916 inline void
classify_registers(const_tree,int bitpos,bool fp,classify_data_t * data)6917 classify_registers (const_tree, int bitpos, bool fp, classify_data_t *data)
6918 {
6919 if (fp)
6920 {
6921 data->fp_regs = true;
6922 if (bitpos < BITS_PER_WORD)
6923 data->fp_regs_in_first_word = true;
6924 }
6925 }
6926
6927 /* Compute the slot number to pass an argument in.
6928 Return the slot number or -1 if passing on the stack.
6929
6930 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6931 the preceding args and about the function being called.
6932 MODE is the argument's machine mode.
6933 TYPE is the data type of the argument (as a tree).
6934 This is null for libcalls where that information may
6935 not be available.
6936 NAMED is nonzero if this argument is a named parameter
6937 (otherwise it is an extra parameter matching an ellipsis).
6938 INCOMING is zero for FUNCTION_ARG, nonzero for FUNCTION_INCOMING_ARG.
6939 *PREGNO records the register number to use if scalar type.
6940 *PPADDING records the amount of padding needed in words. */
6941
6942 static int
function_arg_slotno(const struct sparc_args * cum,machine_mode mode,const_tree type,bool named,bool incoming,int * pregno,int * ppadding)6943 function_arg_slotno (const struct sparc_args *cum, machine_mode mode,
6944 const_tree type, bool named, bool incoming,
6945 int *pregno, int *ppadding)
6946 {
6947 const int regbase
6948 = incoming ? SPARC_INCOMING_INT_ARG_FIRST : SPARC_OUTGOING_INT_ARG_FIRST;
6949 int slotno = cum->words, regno;
6950 enum mode_class mclass = GET_MODE_CLASS (mode);
6951
6952 /* Silence warnings in the callers. */
6953 *pregno = -1;
6954 *ppadding = -1;
6955
6956 if (type && TREE_ADDRESSABLE (type))
6957 return -1;
6958
6959 /* In 64-bit mode, objects requiring 16-byte alignment get it. */
6960 if (TARGET_ARCH64
6961 && (type ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode)) >= 128
6962 && (slotno & 1) != 0)
6963 {
6964 slotno++;
6965 *ppadding = 1;
6966 }
6967 else
6968 *ppadding = 0;
6969
6970 /* Vector types deserve special treatment because they are polymorphic wrt
6971 their mode, depending upon whether VIS instructions are enabled. */
6972 if (type && VECTOR_TYPE_P (type))
6973 {
6974 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
6975 {
6976 /* The SPARC port defines no floating-point vector modes. */
6977 gcc_assert (mode == BLKmode);
6978 }
6979 else
6980 {
6981 /* Integer vector types should either have a vector
6982 mode or an integral mode, because we are guaranteed
6983 by pass_by_reference that their size is not greater
6984 than 16 bytes and TImode is 16-byte wide. */
6985 gcc_assert (mode != BLKmode);
6986
6987 /* Integer vectors are handled like floats as per
6988 the Sun VIS SDK. */
6989 mclass = MODE_FLOAT;
6990 }
6991 }
6992
6993 switch (mclass)
6994 {
6995 case MODE_FLOAT:
6996 case MODE_COMPLEX_FLOAT:
6997 case MODE_VECTOR_INT:
6998 if (TARGET_ARCH64 && TARGET_FPU && named)
6999 {
7000 /* If all arg slots are filled, then must pass on stack. */
7001 if (slotno >= SPARC_FP_ARG_MAX)
7002 return -1;
7003
7004 regno = SPARC_FP_ARG_FIRST + slotno * 2;
7005 /* Arguments filling only one single FP register are
7006 right-justified in the outer double FP register. */
7007 if (GET_MODE_SIZE (mode) <= 4)
7008 regno++;
7009 break;
7010 }
7011 /* fallthrough */
7012
7013 case MODE_INT:
7014 case MODE_COMPLEX_INT:
7015 /* If all arg slots are filled, then must pass on stack. */
7016 if (slotno >= SPARC_INT_ARG_MAX)
7017 return -1;
7018
7019 regno = regbase + slotno;
7020 break;
7021
7022 case MODE_RANDOM:
7023 /* MODE is VOIDmode when generating the actual call. */
7024 if (mode == VOIDmode)
7025 return -1;
7026
7027 if (TARGET_64BIT && TARGET_FPU && named
7028 && type
7029 && (TREE_CODE (type) == RECORD_TYPE || VECTOR_TYPE_P (type)))
7030 {
7031 /* If all arg slots are filled, then must pass on stack. */
7032 if (slotno >= SPARC_FP_ARG_MAX)
7033 return -1;
7034
7035 if (TREE_CODE (type) == RECORD_TYPE)
7036 {
7037 classify_data_t data = { false, false };
7038 traverse_record_type<classify_data_t, classify_registers>
7039 (type, named, &data);
7040
7041 if (data.fp_regs)
7042 {
7043 /* If all FP slots are filled except for the last one and
7044 there is no FP field in the first word, then must pass
7045 on stack. */
7046 if (slotno >= SPARC_FP_ARG_MAX - 1
7047 && !data.fp_regs_in_first_word)
7048 return -1;
7049 }
7050 else
7051 {
7052 /* If all int slots are filled, then must pass on stack. */
7053 if (slotno >= SPARC_INT_ARG_MAX)
7054 return -1;
7055 }
7056
7057 /* PREGNO isn't set since both int and FP regs can be used. */
7058 return slotno;
7059 }
7060
7061 regno = SPARC_FP_ARG_FIRST + slotno * 2;
7062 }
7063 else
7064 {
7065 /* If all arg slots are filled, then must pass on stack. */
7066 if (slotno >= SPARC_INT_ARG_MAX)
7067 return -1;
7068
7069 regno = regbase + slotno;
7070 }
7071 break;
7072
7073 default :
7074 gcc_unreachable ();
7075 }
7076
7077 *pregno = regno;
7078 return slotno;
7079 }
7080
7081 /* Handle recursive register counting/assigning for structure layout. */
7082
7083 typedef struct
7084 {
7085 int slotno; /* slot number of the argument. */
7086 int regbase; /* regno of the base register. */
7087 int intoffset; /* offset of the first pending integer field. */
7088 int nregs; /* number of words passed in registers. */
7089 bool stack; /* true if part of the argument is on the stack. */
7090 rtx ret; /* return expression being built. */
7091 } assign_data_t;
7092
7093 /* A subroutine of function_arg_record_value. Compute the number of integer
7094 registers to be assigned between PARMS->intoffset and BITPOS. Return
7095 true if at least one integer register is assigned or false otherwise. */
7096
7097 static bool
compute_int_layout(int bitpos,assign_data_t * data,int * pnregs)7098 compute_int_layout (int bitpos, assign_data_t *data, int *pnregs)
7099 {
7100 if (data->intoffset < 0)
7101 return false;
7102
7103 const int intoffset = data->intoffset;
7104 data->intoffset = -1;
7105
7106 const int this_slotno = data->slotno + intoffset / BITS_PER_WORD;
7107 const unsigned int startbit = ROUND_DOWN (intoffset, BITS_PER_WORD);
7108 const unsigned int endbit = ROUND_UP (bitpos, BITS_PER_WORD);
7109 int nregs = (endbit - startbit) / BITS_PER_WORD;
7110
7111 if (nregs > 0 && nregs > SPARC_INT_ARG_MAX - this_slotno)
7112 {
7113 nregs = SPARC_INT_ARG_MAX - this_slotno;
7114
7115 /* We need to pass this field (partly) on the stack. */
7116 data->stack = 1;
7117 }
7118
7119 if (nregs <= 0)
7120 return false;
7121
7122 *pnregs = nregs;
7123 return true;
7124 }
7125
7126 /* A subroutine of function_arg_record_value. Compute the number and the mode
7127 of the FP registers to be assigned for FIELD. Return true if at least one
7128 FP register is assigned or false otherwise. */
7129
7130 static bool
compute_fp_layout(const_tree field,int bitpos,assign_data_t * data,int * pnregs,machine_mode * pmode)7131 compute_fp_layout (const_tree field, int bitpos, assign_data_t *data,
7132 int *pnregs, machine_mode *pmode)
7133 {
7134 const int this_slotno = data->slotno + bitpos / BITS_PER_WORD;
7135 machine_mode mode = DECL_MODE (field);
7136 int nregs, nslots;
7137
7138 /* Slots are counted as words while regs are counted as having the size of
7139 the (inner) mode. */
7140 if (VECTOR_TYPE_P (TREE_TYPE (field)) && mode == BLKmode)
7141 {
7142 mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (field)));
7143 nregs = TYPE_VECTOR_SUBPARTS (TREE_TYPE (field));
7144 }
7145 else if (TREE_CODE (TREE_TYPE (field)) == COMPLEX_TYPE)
7146 {
7147 mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (field)));
7148 nregs = 2;
7149 }
7150 else
7151 nregs = 1;
7152
7153 nslots = CEIL_NWORDS (nregs * GET_MODE_SIZE (mode));
7154
7155 if (nslots > SPARC_FP_ARG_MAX - this_slotno)
7156 {
7157 nslots = SPARC_FP_ARG_MAX - this_slotno;
7158 nregs = (nslots * UNITS_PER_WORD) / GET_MODE_SIZE (mode);
7159
7160 /* We need to pass this field (partly) on the stack. */
7161 data->stack = 1;
7162
7163 if (nregs <= 0)
7164 return false;
7165 }
7166
7167 *pnregs = nregs;
7168 *pmode = mode;
7169 return true;
7170 }
7171
7172 /* A subroutine of function_arg_record_value. Count the number of registers
7173 to be assigned for FIELD and between PARMS->intoffset and BITPOS. */
7174
7175 inline void
count_registers(const_tree field,int bitpos,bool fp,assign_data_t * data)7176 count_registers (const_tree field, int bitpos, bool fp, assign_data_t *data)
7177 {
7178 if (fp)
7179 {
7180 int nregs;
7181 machine_mode mode;
7182
7183 if (compute_int_layout (bitpos, data, &nregs))
7184 data->nregs += nregs;
7185
7186 if (compute_fp_layout (field, bitpos, data, &nregs, &mode))
7187 data->nregs += nregs;
7188 }
7189 else
7190 {
7191 if (data->intoffset < 0)
7192 data->intoffset = bitpos;
7193 }
7194 }
7195
7196 /* A subroutine of function_arg_record_value. Assign the bits of the
7197 structure between PARMS->intoffset and BITPOS to integer registers. */
7198
7199 static void
assign_int_registers(int bitpos,assign_data_t * data)7200 assign_int_registers (int bitpos, assign_data_t *data)
7201 {
7202 int intoffset = data->intoffset;
7203 machine_mode mode;
7204 int nregs;
7205
7206 if (!compute_int_layout (bitpos, data, &nregs))
7207 return;
7208
7209 /* If this is the trailing part of a word, only load that much into
7210 the register. Otherwise load the whole register. Note that in
7211 the latter case we may pick up unwanted bits. It's not a problem
7212 at the moment but may wish to revisit. */
7213 if (intoffset % BITS_PER_WORD != 0)
7214 mode = smallest_int_mode_for_size (BITS_PER_WORD
7215 - intoffset % BITS_PER_WORD);
7216 else
7217 mode = word_mode;
7218
7219 const int this_slotno = data->slotno + intoffset / BITS_PER_WORD;
7220 unsigned int regno = data->regbase + this_slotno;
7221 intoffset /= BITS_PER_UNIT;
7222
7223 do
7224 {
7225 rtx reg = gen_rtx_REG (mode, regno);
7226 XVECEXP (data->ret, 0, data->stack + data->nregs)
7227 = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (intoffset));
7228 data->nregs += 1;
7229 mode = word_mode;
7230 regno += 1;
7231 intoffset = (intoffset | (UNITS_PER_WORD - 1)) + 1;
7232 }
7233 while (--nregs > 0);
7234 }
7235
7236 /* A subroutine of function_arg_record_value. Assign FIELD at position
7237 BITPOS to FP registers. */
7238
7239 static void
assign_fp_registers(const_tree field,int bitpos,assign_data_t * data)7240 assign_fp_registers (const_tree field, int bitpos, assign_data_t *data)
7241 {
7242 int nregs;
7243 machine_mode mode;
7244
7245 if (!compute_fp_layout (field, bitpos, data, &nregs, &mode))
7246 return;
7247
7248 const int this_slotno = data->slotno + bitpos / BITS_PER_WORD;
7249 int regno = SPARC_FP_ARG_FIRST + this_slotno * 2;
7250 if (GET_MODE_SIZE (mode) <= 4 && (bitpos & 32) != 0)
7251 regno++;
7252 int pos = bitpos / BITS_PER_UNIT;
7253
7254 do
7255 {
7256 rtx reg = gen_rtx_REG (mode, regno);
7257 XVECEXP (data->ret, 0, data->stack + data->nregs)
7258 = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (pos));
7259 data->nregs += 1;
7260 regno += GET_MODE_SIZE (mode) / 4;
7261 pos += GET_MODE_SIZE (mode);
7262 }
7263 while (--nregs > 0);
7264 }
7265
7266 /* A subroutine of function_arg_record_value. Assign FIELD and the bits of
7267 the structure between PARMS->intoffset and BITPOS to registers. */
7268
7269 inline void
assign_registers(const_tree field,int bitpos,bool fp,assign_data_t * data)7270 assign_registers (const_tree field, int bitpos, bool fp, assign_data_t *data)
7271 {
7272 if (fp)
7273 {
7274 assign_int_registers (bitpos, data);
7275
7276 assign_fp_registers (field, bitpos, data);
7277 }
7278 else
7279 {
7280 if (data->intoffset < 0)
7281 data->intoffset = bitpos;
7282 }
7283 }
7284
7285 /* Used by function_arg and function_value to implement the complex
7286 conventions of the 64-bit ABI for passing and returning structures.
7287 Return an expression valid as a return value for the FUNCTION_ARG
7288 and TARGET_FUNCTION_VALUE.
7289
7290 TYPE is the data type of the argument (as a tree).
7291 This is null for libcalls where that information may
7292 not be available.
7293 MODE is the argument's machine mode.
7294 SLOTNO is the index number of the argument's slot in the parameter array.
7295 NAMED is true if this argument is a named parameter
7296 (otherwise it is an extra parameter matching an ellipsis).
7297 REGBASE is the regno of the base register for the parameter array. */
7298
7299 static rtx
function_arg_record_value(const_tree type,machine_mode mode,int slotno,bool named,int regbase)7300 function_arg_record_value (const_tree type, machine_mode mode,
7301 int slotno, bool named, int regbase)
7302 {
7303 const int size = int_size_in_bytes (type);
7304 assign_data_t data;
7305 int nregs;
7306
7307 data.slotno = slotno;
7308 data.regbase = regbase;
7309
7310 /* Count how many registers we need. */
7311 data.nregs = 0;
7312 data.intoffset = 0;
7313 data.stack = false;
7314 traverse_record_type<assign_data_t, count_registers> (type, named, &data);
7315
7316 /* Take into account pending integer fields. */
7317 if (compute_int_layout (size * BITS_PER_UNIT, &data, &nregs))
7318 data.nregs += nregs;
7319
7320 /* Allocate the vector and handle some annoying special cases. */
7321 nregs = data.nregs;
7322
7323 if (nregs == 0)
7324 {
7325 /* ??? Empty structure has no value? Duh? */
7326 if (size <= 0)
7327 {
7328 /* Though there's nothing really to store, return a word register
7329 anyway so the rest of gcc doesn't go nuts. Returning a PARALLEL
7330 leads to breakage due to the fact that there are zero bytes to
7331 load. */
7332 return gen_rtx_REG (mode, regbase);
7333 }
7334
7335 /* ??? C++ has structures with no fields, and yet a size. Give up
7336 for now and pass everything back in integer registers. */
7337 nregs = CEIL_NWORDS (size);
7338 if (nregs + slotno > SPARC_INT_ARG_MAX)
7339 nregs = SPARC_INT_ARG_MAX - slotno;
7340 }
7341
7342 gcc_assert (nregs > 0);
7343
7344 data.ret = gen_rtx_PARALLEL (mode, rtvec_alloc (data.stack + nregs));
7345
7346 /* If at least one field must be passed on the stack, generate
7347 (parallel [(expr_list (nil) ...) ...]) so that all fields will
7348 also be passed on the stack. We can't do much better because the
7349 semantics of TARGET_ARG_PARTIAL_BYTES doesn't handle the case
7350 of structures for which the fields passed exclusively in registers
7351 are not at the beginning of the structure. */
7352 if (data.stack)
7353 XVECEXP (data.ret, 0, 0)
7354 = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
7355
7356 /* Assign the registers. */
7357 data.nregs = 0;
7358 data.intoffset = 0;
7359 traverse_record_type<assign_data_t, assign_registers> (type, named, &data);
7360
7361 /* Assign pending integer fields. */
7362 assign_int_registers (size * BITS_PER_UNIT, &data);
7363
7364 gcc_assert (data.nregs == nregs);
7365
7366 return data.ret;
7367 }
7368
7369 /* Used by function_arg and function_value to implement the conventions
7370 of the 64-bit ABI for passing and returning unions.
7371 Return an expression valid as a return value for the FUNCTION_ARG
7372 and TARGET_FUNCTION_VALUE.
7373
7374 SIZE is the size in bytes of the union.
7375 MODE is the argument's machine mode.
7376 SLOTNO is the index number of the argument's slot in the parameter array.
7377 REGNO is the hard register the union will be passed in. */
7378
7379 static rtx
function_arg_union_value(int size,machine_mode mode,int slotno,int regno)7380 function_arg_union_value (int size, machine_mode mode, int slotno, int regno)
7381 {
7382 unsigned int nwords;
7383
7384 /* See comment in function_arg_record_value for empty structures. */
7385 if (size <= 0)
7386 return gen_rtx_REG (mode, regno);
7387
7388 if (slotno == SPARC_INT_ARG_MAX - 1)
7389 nwords = 1;
7390 else
7391 nwords = CEIL_NWORDS (size);
7392
7393 rtx regs = gen_rtx_PARALLEL (mode, rtvec_alloc (nwords));
7394
7395 /* Unions are passed left-justified. */
7396 for (unsigned int i = 0; i < nwords; i++)
7397 XVECEXP (regs, 0, i)
7398 = gen_rtx_EXPR_LIST (VOIDmode,
7399 gen_rtx_REG (word_mode, regno + i),
7400 GEN_INT (UNITS_PER_WORD * i));
7401
7402 return regs;
7403 }
7404
7405 /* Used by function_arg and function_value to implement the conventions
7406 of the 64-bit ABI for passing and returning BLKmode vectors.
7407 Return an expression valid as a return value for the FUNCTION_ARG
7408 and TARGET_FUNCTION_VALUE.
7409
7410 SIZE is the size in bytes of the vector.
7411 SLOTNO is the index number of the argument's slot in the parameter array.
7412 NAMED is true if this argument is a named parameter
7413 (otherwise it is an extra parameter matching an ellipsis).
7414 REGNO is the hard register the vector will be passed in. */
7415
7416 static rtx
function_arg_vector_value(int size,int slotno,bool named,int regno)7417 function_arg_vector_value (int size, int slotno, bool named, int regno)
7418 {
7419 const int mult = (named ? 2 : 1);
7420 unsigned int nwords;
7421
7422 if (slotno == (named ? SPARC_FP_ARG_MAX : SPARC_INT_ARG_MAX) - 1)
7423 nwords = 1;
7424 else
7425 nwords = CEIL_NWORDS (size);
7426
7427 rtx regs = gen_rtx_PARALLEL (BLKmode, rtvec_alloc (nwords));
7428
7429 if (size < UNITS_PER_WORD)
7430 XVECEXP (regs, 0, 0)
7431 = gen_rtx_EXPR_LIST (VOIDmode,
7432 gen_rtx_REG (SImode, regno),
7433 const0_rtx);
7434 else
7435 for (unsigned int i = 0; i < nwords; i++)
7436 XVECEXP (regs, 0, i)
7437 = gen_rtx_EXPR_LIST (VOIDmode,
7438 gen_rtx_REG (word_mode, regno + i * mult),
7439 GEN_INT (i * UNITS_PER_WORD));
7440
7441 return regs;
7442 }
7443
7444 /* Determine where to put an argument to a function.
7445 Value is zero to push the argument on the stack,
7446 or a hard register in which to store the argument.
7447
7448 CUM is a variable of type CUMULATIVE_ARGS which gives info about
7449 the preceding args and about the function being called.
7450 ARG is a description of the argument.
7451 INCOMING_P is false for TARGET_FUNCTION_ARG, true for
7452 TARGET_FUNCTION_INCOMING_ARG. */
7453
7454 static rtx
sparc_function_arg_1(cumulative_args_t cum_v,const function_arg_info & arg,bool incoming)7455 sparc_function_arg_1 (cumulative_args_t cum_v, const function_arg_info &arg,
7456 bool incoming)
7457 {
7458 const CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7459 const int regbase
7460 = incoming ? SPARC_INCOMING_INT_ARG_FIRST : SPARC_OUTGOING_INT_ARG_FIRST;
7461 int slotno, regno, padding;
7462 tree type = arg.type;
7463 machine_mode mode = arg.mode;
7464 enum mode_class mclass = GET_MODE_CLASS (mode);
7465 bool named = arg.named;
7466
7467 slotno
7468 = function_arg_slotno (cum, mode, type, named, incoming, ®no, &padding);
7469 if (slotno == -1)
7470 return 0;
7471
7472 /* Integer vectors are handled like floats as per the Sun VIS SDK. */
7473 if (type && VECTOR_INTEGER_TYPE_P (type))
7474 mclass = MODE_FLOAT;
7475
7476 if (TARGET_ARCH32)
7477 return gen_rtx_REG (mode, regno);
7478
7479 /* Structures up to 16 bytes in size are passed in arg slots on the stack
7480 and are promoted to registers if possible. */
7481 if (type && TREE_CODE (type) == RECORD_TYPE)
7482 {
7483 const int size = int_size_in_bytes (type);
7484 gcc_assert (size <= 16);
7485
7486 return function_arg_record_value (type, mode, slotno, named, regbase);
7487 }
7488
7489 /* Unions up to 16 bytes in size are passed in integer registers. */
7490 else if (type && TREE_CODE (type) == UNION_TYPE)
7491 {
7492 const int size = int_size_in_bytes (type);
7493 gcc_assert (size <= 16);
7494
7495 return function_arg_union_value (size, mode, slotno, regno);
7496 }
7497
7498 /* Floating-point vectors up to 16 bytes are passed in registers. */
7499 else if (type && VECTOR_TYPE_P (type) && mode == BLKmode)
7500 {
7501 const int size = int_size_in_bytes (type);
7502 gcc_assert (size <= 16);
7503
7504 return function_arg_vector_value (size, slotno, named, regno);
7505 }
7506
7507 /* v9 fp args in reg slots beyond the int reg slots get passed in regs
7508 but also have the slot allocated for them.
7509 If no prototype is in scope fp values in register slots get passed
7510 in two places, either fp regs and int regs or fp regs and memory. */
7511 else if ((mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT)
7512 && SPARC_FP_REG_P (regno))
7513 {
7514 rtx reg = gen_rtx_REG (mode, regno);
7515 if (cum->prototype_p || cum->libcall_p)
7516 return reg;
7517 else
7518 {
7519 rtx v0, v1;
7520
7521 if ((regno - SPARC_FP_ARG_FIRST) < SPARC_INT_ARG_MAX * 2)
7522 {
7523 int intreg;
7524
7525 /* On incoming, we don't need to know that the value
7526 is passed in %f0 and %i0, and it confuses other parts
7527 causing needless spillage even on the simplest cases. */
7528 if (incoming)
7529 return reg;
7530
7531 intreg = (SPARC_OUTGOING_INT_ARG_FIRST
7532 + (regno - SPARC_FP_ARG_FIRST) / 2);
7533
7534 v0 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx);
7535 v1 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_REG (mode, intreg),
7536 const0_rtx);
7537 return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1));
7538 }
7539 else
7540 {
7541 v0 = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
7542 v1 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx);
7543 return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1));
7544 }
7545 }
7546 }
7547
7548 /* All other aggregate types are passed in an integer register in a mode
7549 corresponding to the size of the type. */
7550 else if (type && AGGREGATE_TYPE_P (type))
7551 {
7552 const int size = int_size_in_bytes (type);
7553 gcc_assert (size <= 16);
7554
7555 mode = int_mode_for_size (size * BITS_PER_UNIT, 0).else_blk ();
7556 }
7557
7558 return gen_rtx_REG (mode, regno);
7559 }
7560
7561 /* Handle the TARGET_FUNCTION_ARG target hook. */
7562
7563 static rtx
sparc_function_arg(cumulative_args_t cum,const function_arg_info & arg)7564 sparc_function_arg (cumulative_args_t cum, const function_arg_info &arg)
7565 {
7566 return sparc_function_arg_1 (cum, arg, false);
7567 }
7568
7569 /* Handle the TARGET_FUNCTION_INCOMING_ARG target hook. */
7570
7571 static rtx
sparc_function_incoming_arg(cumulative_args_t cum,const function_arg_info & arg)7572 sparc_function_incoming_arg (cumulative_args_t cum,
7573 const function_arg_info &arg)
7574 {
7575 return sparc_function_arg_1 (cum, arg, true);
7576 }
7577
7578 /* For sparc64, objects requiring 16 byte alignment are passed that way. */
7579
7580 static unsigned int
sparc_function_arg_boundary(machine_mode mode,const_tree type)7581 sparc_function_arg_boundary (machine_mode mode, const_tree type)
7582 {
7583 return ((TARGET_ARCH64
7584 && (GET_MODE_ALIGNMENT (mode) == 128
7585 || (type && TYPE_ALIGN (type) == 128)))
7586 ? 128
7587 : PARM_BOUNDARY);
7588 }
7589
7590 /* For an arg passed partly in registers and partly in memory,
7591 this is the number of bytes of registers used.
7592 For args passed entirely in registers or entirely in memory, zero.
7593
7594 Any arg that starts in the first 6 regs but won't entirely fit in them
7595 needs partial registers on v8. On v9, structures with integer
7596 values in arg slots 5,6 will be passed in %o5 and SP+176, and complex fp
7597 values that begin in the last fp reg [where "last fp reg" varies with the
7598 mode] will be split between that reg and memory. */
7599
7600 static int
sparc_arg_partial_bytes(cumulative_args_t cum,const function_arg_info & arg)7601 sparc_arg_partial_bytes (cumulative_args_t cum, const function_arg_info &arg)
7602 {
7603 int slotno, regno, padding;
7604
7605 /* We pass false for incoming here, it doesn't matter. */
7606 slotno = function_arg_slotno (get_cumulative_args (cum), arg.mode, arg.type,
7607 arg.named, false, ®no, &padding);
7608
7609 if (slotno == -1)
7610 return 0;
7611
7612 if (TARGET_ARCH32)
7613 {
7614 /* We are guaranteed by pass_by_reference that the size of the
7615 argument is not greater than 8 bytes, so we only need to return
7616 one word if the argument is partially passed in registers. */
7617 const int size = GET_MODE_SIZE (arg.mode);
7618
7619 if (size > UNITS_PER_WORD && slotno == SPARC_INT_ARG_MAX - 1)
7620 return UNITS_PER_WORD;
7621 }
7622 else
7623 {
7624 /* We are guaranteed by pass_by_reference that the size of the
7625 argument is not greater than 16 bytes, so we only need to return
7626 one word if the argument is partially passed in registers. */
7627 if (arg.aggregate_type_p ())
7628 {
7629 const int size = int_size_in_bytes (arg.type);
7630
7631 if (size > UNITS_PER_WORD
7632 && (slotno == SPARC_INT_ARG_MAX - 1
7633 || slotno == SPARC_FP_ARG_MAX - 1))
7634 return UNITS_PER_WORD;
7635 }
7636 else if (GET_MODE_CLASS (arg.mode) == MODE_COMPLEX_INT
7637 || ((GET_MODE_CLASS (arg.mode) == MODE_COMPLEX_FLOAT
7638 || (arg.type && VECTOR_TYPE_P (arg.type)))
7639 && !(TARGET_FPU && arg.named)))
7640 {
7641 const int size = (arg.type && VECTOR_FLOAT_TYPE_P (arg.type))
7642 ? int_size_in_bytes (arg.type)
7643 : GET_MODE_SIZE (arg.mode);
7644
7645 if (size > UNITS_PER_WORD && slotno == SPARC_INT_ARG_MAX - 1)
7646 return UNITS_PER_WORD;
7647 }
7648 else if (GET_MODE_CLASS (arg.mode) == MODE_COMPLEX_FLOAT
7649 || (arg.type && VECTOR_TYPE_P (arg.type)))
7650 {
7651 const int size = (arg.type && VECTOR_FLOAT_TYPE_P (arg.type))
7652 ? int_size_in_bytes (arg.type)
7653 : GET_MODE_SIZE (arg.mode);
7654
7655 if (size > UNITS_PER_WORD && slotno == SPARC_FP_ARG_MAX - 1)
7656 return UNITS_PER_WORD;
7657 }
7658 }
7659
7660 return 0;
7661 }
7662
7663 /* Handle the TARGET_FUNCTION_ARG_ADVANCE hook.
7664 Update the data in CUM to advance over argument ARG. */
7665
7666 static void
sparc_function_arg_advance(cumulative_args_t cum_v,const function_arg_info & arg)7667 sparc_function_arg_advance (cumulative_args_t cum_v,
7668 const function_arg_info &arg)
7669 {
7670 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7671 tree type = arg.type;
7672 machine_mode mode = arg.mode;
7673 int regno, padding;
7674
7675 /* We pass false for incoming here, it doesn't matter. */
7676 function_arg_slotno (cum, mode, type, arg.named, false, ®no, &padding);
7677
7678 /* If argument requires leading padding, add it. */
7679 cum->words += padding;
7680
7681 if (TARGET_ARCH32)
7682 cum->words += CEIL_NWORDS (GET_MODE_SIZE (mode));
7683 else
7684 {
7685 /* For types that can have BLKmode, get the size from the type. */
7686 if (type && (AGGREGATE_TYPE_P (type) || VECTOR_FLOAT_TYPE_P (type)))
7687 {
7688 const int size = int_size_in_bytes (type);
7689
7690 /* See comment in function_arg_record_value for empty structures. */
7691 if (size <= 0)
7692 cum->words++;
7693 else
7694 cum->words += CEIL_NWORDS (size);
7695 }
7696 else
7697 cum->words += CEIL_NWORDS (GET_MODE_SIZE (mode));
7698 }
7699 }
7700
7701 /* Implement TARGET_FUNCTION_ARG_PADDING. For the 64-bit ABI structs
7702 are always stored left shifted in their argument slot. */
7703
7704 static pad_direction
sparc_function_arg_padding(machine_mode mode,const_tree type)7705 sparc_function_arg_padding (machine_mode mode, const_tree type)
7706 {
7707 if (TARGET_ARCH64 && type && AGGREGATE_TYPE_P (type))
7708 return PAD_UPWARD;
7709
7710 /* Fall back to the default. */
7711 return default_function_arg_padding (mode, type);
7712 }
7713
7714 /* Handle the TARGET_RETURN_IN_MEMORY target hook.
7715 Specify whether to return the return value in memory. */
7716
7717 static bool
sparc_return_in_memory(const_tree type,const_tree fntype ATTRIBUTE_UNUSED)7718 sparc_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
7719 {
7720 if (TARGET_ARCH32)
7721 /* Original SPARC 32-bit ABI says that structures and unions, and
7722 quad-precision floats are returned in memory. But note that the
7723 first part is implemented through -fpcc-struct-return being the
7724 default, so here we only implement -freg-struct-return instead.
7725 All other base types are returned in registers.
7726
7727 Extended ABI (as implemented by the Sun compiler) says that
7728 all complex floats are returned in registers (8 FP registers
7729 at most for '_Complex long double'). Return all complex integers
7730 in registers (4 at most for '_Complex long long').
7731
7732 Vector ABI (as implemented by the Sun VIS SDK) says that vector
7733 integers are returned like floats of the same size, that is in
7734 registers up to 8 bytes and in memory otherwise. Return all
7735 vector floats in memory like structure and unions; note that
7736 they always have BLKmode like the latter. */
7737 return (TYPE_MODE (type) == BLKmode
7738 || TYPE_MODE (type) == TFmode
7739 || (TREE_CODE (type) == VECTOR_TYPE
7740 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8));
7741 else
7742 /* Original SPARC 64-bit ABI says that structures and unions
7743 smaller than 32 bytes are returned in registers, as well as
7744 all other base types.
7745
7746 Extended ABI (as implemented by the Sun compiler) says that all
7747 complex floats are returned in registers (8 FP registers at most
7748 for '_Complex long double'). Return all complex integers in
7749 registers (4 at most for '_Complex TItype').
7750
7751 Vector ABI (as implemented by the Sun VIS SDK) says that vector
7752 integers are returned like floats of the same size, that is in
7753 registers. Return all vector floats like structure and unions;
7754 note that they always have BLKmode like the latter. */
7755 return (TYPE_MODE (type) == BLKmode
7756 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 32);
7757 }
7758
7759 /* Handle the TARGET_STRUCT_VALUE target hook.
7760 Return where to find the structure return value address. */
7761
7762 static rtx
sparc_struct_value_rtx(tree fndecl,int incoming)7763 sparc_struct_value_rtx (tree fndecl, int incoming)
7764 {
7765 if (TARGET_ARCH64)
7766 return NULL_RTX;
7767 else
7768 {
7769 rtx mem;
7770
7771 if (incoming)
7772 mem = gen_frame_mem (Pmode, plus_constant (Pmode, frame_pointer_rtx,
7773 STRUCT_VALUE_OFFSET));
7774 else
7775 mem = gen_frame_mem (Pmode, plus_constant (Pmode, stack_pointer_rtx,
7776 STRUCT_VALUE_OFFSET));
7777
7778 /* Only follow the SPARC ABI for fixed-size structure returns.
7779 Variable size structure returns are handled per the normal
7780 procedures in GCC. This is enabled by -mstd-struct-return */
7781 if (incoming == 2
7782 && sparc_std_struct_return
7783 && TYPE_SIZE_UNIT (TREE_TYPE (fndecl))
7784 && TREE_CODE (TYPE_SIZE_UNIT (TREE_TYPE (fndecl))) == INTEGER_CST)
7785 {
7786 /* We must check and adjust the return address, as it is optional
7787 as to whether the return object is really provided. */
7788 rtx ret_reg = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
7789 rtx scratch = gen_reg_rtx (SImode);
7790 rtx_code_label *endlab = gen_label_rtx ();
7791
7792 /* Calculate the return object size. */
7793 tree size = TYPE_SIZE_UNIT (TREE_TYPE (fndecl));
7794 rtx size_rtx = GEN_INT (TREE_INT_CST_LOW (size) & 0xfff);
7795 /* Construct a temporary return value. */
7796 rtx temp_val
7797 = assign_stack_local (Pmode, TREE_INT_CST_LOW (size), 0);
7798
7799 /* Implement SPARC 32-bit psABI callee return struct checking:
7800
7801 Fetch the instruction where we will return to and see if
7802 it's an unimp instruction (the most significant 10 bits
7803 will be zero). */
7804 emit_move_insn (scratch, gen_rtx_MEM (SImode,
7805 plus_constant (Pmode,
7806 ret_reg, 8)));
7807 /* Assume the size is valid and pre-adjust. */
7808 emit_insn (gen_add3_insn (ret_reg, ret_reg, GEN_INT (4)));
7809 emit_cmp_and_jump_insns (scratch, size_rtx, EQ, const0_rtx, SImode,
7810 0, endlab);
7811 emit_insn (gen_sub3_insn (ret_reg, ret_reg, GEN_INT (4)));
7812 /* Write the address of the memory pointed to by temp_val into
7813 the memory pointed to by mem. */
7814 emit_move_insn (mem, XEXP (temp_val, 0));
7815 emit_label (endlab);
7816 }
7817
7818 return mem;
7819 }
7820 }
7821
7822 /* Handle TARGET_FUNCTION_VALUE, and TARGET_LIBCALL_VALUE target hook.
7823 For v9, function return values are subject to the same rules as arguments,
7824 except that up to 32 bytes may be returned in registers. */
7825
7826 static rtx
sparc_function_value_1(const_tree type,machine_mode mode,bool outgoing)7827 sparc_function_value_1 (const_tree type, machine_mode mode, bool outgoing)
7828 {
7829 /* Beware that the two values are swapped here wrt function_arg. */
7830 const int regbase
7831 = outgoing ? SPARC_INCOMING_INT_ARG_FIRST : SPARC_OUTGOING_INT_ARG_FIRST;
7832 enum mode_class mclass = GET_MODE_CLASS (mode);
7833 int regno;
7834
7835 /* Integer vectors are handled like floats as per the Sun VIS SDK.
7836 Note that integer vectors larger than 16 bytes have BLKmode so
7837 they need to be handled like floating-point vectors below. */
7838 if (type && VECTOR_INTEGER_TYPE_P (type) && mode != BLKmode)
7839 mclass = MODE_FLOAT;
7840
7841 if (TARGET_ARCH64 && type)
7842 {
7843 /* Structures up to 32 bytes in size are returned in registers. */
7844 if (TREE_CODE (type) == RECORD_TYPE)
7845 {
7846 const int size = int_size_in_bytes (type);
7847 gcc_assert (size <= 32);
7848
7849 return function_arg_record_value (type, mode, 0, true, regbase);
7850 }
7851
7852 /* Unions up to 32 bytes in size are returned in integer registers. */
7853 else if (TREE_CODE (type) == UNION_TYPE)
7854 {
7855 const int size = int_size_in_bytes (type);
7856 gcc_assert (size <= 32);
7857
7858 return function_arg_union_value (size, mode, 0, regbase);
7859 }
7860
7861 /* Vectors up to 32 bytes are returned in FP registers. */
7862 else if (VECTOR_TYPE_P (type) && mode == BLKmode)
7863 {
7864 const int size = int_size_in_bytes (type);
7865 gcc_assert (size <= 32);
7866
7867 return function_arg_vector_value (size, 0, true, SPARC_FP_ARG_FIRST);
7868 }
7869
7870 /* Objects that require it are returned in FP registers. */
7871 else if (mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT)
7872 ;
7873
7874 /* All other aggregate types are returned in an integer register in a
7875 mode corresponding to the size of the type. */
7876 else if (AGGREGATE_TYPE_P (type))
7877 {
7878 /* All other aggregate types are passed in an integer register
7879 in a mode corresponding to the size of the type. */
7880 const int size = int_size_in_bytes (type);
7881 gcc_assert (size <= 32);
7882
7883 mode = int_mode_for_size (size * BITS_PER_UNIT, 0).else_blk ();
7884
7885 /* ??? We probably should have made the same ABI change in
7886 3.4.0 as the one we made for unions. The latter was
7887 required by the SCD though, while the former is not
7888 specified, so we favored compatibility and efficiency.
7889
7890 Now we're stuck for aggregates larger than 16 bytes,
7891 because OImode vanished in the meantime. Let's not
7892 try to be unduly clever, and simply follow the ABI
7893 for unions in that case. */
7894 if (mode == BLKmode)
7895 return function_arg_union_value (size, mode, 0, regbase);
7896 else
7897 mclass = MODE_INT;
7898 }
7899
7900 /* We should only have pointer and integer types at this point. This
7901 must match sparc_promote_function_mode. */
7902 else if (mclass == MODE_INT && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
7903 mode = word_mode;
7904 }
7905
7906 /* We should only have pointer and integer types at this point, except with
7907 -freg-struct-return. This must match sparc_promote_function_mode. */
7908 else if (TARGET_ARCH32
7909 && !(type && AGGREGATE_TYPE_P (type))
7910 && mclass == MODE_INT
7911 && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
7912 mode = word_mode;
7913
7914 if ((mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT) && TARGET_FPU)
7915 regno = SPARC_FP_ARG_FIRST;
7916 else
7917 regno = regbase;
7918
7919 return gen_rtx_REG (mode, regno);
7920 }
7921
7922 /* Handle TARGET_FUNCTION_VALUE.
7923 On the SPARC, the value is found in the first "output" register, but the
7924 called function leaves it in the first "input" register. */
7925
7926 static rtx
sparc_function_value(const_tree valtype,const_tree fn_decl_or_type ATTRIBUTE_UNUSED,bool outgoing)7927 sparc_function_value (const_tree valtype,
7928 const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
7929 bool outgoing)
7930 {
7931 return sparc_function_value_1 (valtype, TYPE_MODE (valtype), outgoing);
7932 }
7933
7934 /* Handle TARGET_LIBCALL_VALUE. */
7935
7936 static rtx
sparc_libcall_value(machine_mode mode,const_rtx fun ATTRIBUTE_UNUSED)7937 sparc_libcall_value (machine_mode mode,
7938 const_rtx fun ATTRIBUTE_UNUSED)
7939 {
7940 return sparc_function_value_1 (NULL_TREE, mode, false);
7941 }
7942
7943 /* Handle FUNCTION_VALUE_REGNO_P.
7944 On the SPARC, the first "output" reg is used for integer values, and the
7945 first floating point register is used for floating point values. */
7946
7947 static bool
sparc_function_value_regno_p(const unsigned int regno)7948 sparc_function_value_regno_p (const unsigned int regno)
7949 {
7950 return (regno == 8 || (TARGET_FPU && regno == 32));
7951 }
7952
7953 /* Do what is necessary for `va_start'. We look at the current function
7954 to determine if stdarg or varargs is used and return the address of
7955 the first unnamed parameter. */
7956
7957 static rtx
sparc_builtin_saveregs(void)7958 sparc_builtin_saveregs (void)
7959 {
7960 int first_reg = crtl->args.info.words;
7961 rtx address;
7962 int regno;
7963
7964 for (regno = first_reg; regno < SPARC_INT_ARG_MAX; regno++)
7965 emit_move_insn (gen_rtx_MEM (word_mode,
7966 gen_rtx_PLUS (Pmode,
7967 frame_pointer_rtx,
7968 GEN_INT (FIRST_PARM_OFFSET (0)
7969 + (UNITS_PER_WORD
7970 * regno)))),
7971 gen_rtx_REG (word_mode,
7972 SPARC_INCOMING_INT_ARG_FIRST + regno));
7973
7974 address = gen_rtx_PLUS (Pmode,
7975 frame_pointer_rtx,
7976 GEN_INT (FIRST_PARM_OFFSET (0)
7977 + UNITS_PER_WORD * first_reg));
7978
7979 return address;
7980 }
7981
7982 /* Implement `va_start' for stdarg. */
7983
7984 static void
sparc_va_start(tree valist,rtx nextarg)7985 sparc_va_start (tree valist, rtx nextarg)
7986 {
7987 nextarg = expand_builtin_saveregs ();
7988 std_expand_builtin_va_start (valist, nextarg);
7989 }
7990
7991 /* Implement `va_arg' for stdarg. */
7992
7993 static tree
sparc_gimplify_va_arg(tree valist,tree type,gimple_seq * pre_p,gimple_seq * post_p)7994 sparc_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
7995 gimple_seq *post_p)
7996 {
7997 HOST_WIDE_INT size, rsize, align;
7998 tree addr, incr;
7999 bool indirect;
8000 tree ptrtype = build_pointer_type (type);
8001
8002 if (pass_va_arg_by_reference (type))
8003 {
8004 indirect = true;
8005 size = rsize = UNITS_PER_WORD;
8006 align = 0;
8007 }
8008 else
8009 {
8010 indirect = false;
8011 size = int_size_in_bytes (type);
8012 rsize = ROUND_UP (size, UNITS_PER_WORD);
8013 align = 0;
8014
8015 if (TARGET_ARCH64)
8016 {
8017 /* For SPARC64, objects requiring 16-byte alignment get it. */
8018 if (TYPE_ALIGN (type) >= 2 * (unsigned) BITS_PER_WORD)
8019 align = 2 * UNITS_PER_WORD;
8020
8021 /* SPARC-V9 ABI states that structures up to 16 bytes in size
8022 are left-justified in their slots. */
8023 if (AGGREGATE_TYPE_P (type))
8024 {
8025 if (size == 0)
8026 size = rsize = UNITS_PER_WORD;
8027 else
8028 size = rsize;
8029 }
8030 }
8031 }
8032
8033 incr = valist;
8034 if (align)
8035 {
8036 incr = fold_build_pointer_plus_hwi (incr, align - 1);
8037 incr = fold_convert (sizetype, incr);
8038 incr = fold_build2 (BIT_AND_EXPR, sizetype, incr,
8039 size_int (-align));
8040 incr = fold_convert (ptr_type_node, incr);
8041 }
8042
8043 gimplify_expr (&incr, pre_p, post_p, is_gimple_val, fb_rvalue);
8044 addr = incr;
8045
8046 if (BYTES_BIG_ENDIAN && size < rsize)
8047 addr = fold_build_pointer_plus_hwi (incr, rsize - size);
8048
8049 if (indirect)
8050 {
8051 addr = fold_convert (build_pointer_type (ptrtype), addr);
8052 addr = build_va_arg_indirect_ref (addr);
8053 }
8054
8055 /* If the address isn't aligned properly for the type, we need a temporary.
8056 FIXME: This is inefficient, usually we can do this in registers. */
8057 else if (align == 0 && TYPE_ALIGN (type) > BITS_PER_WORD)
8058 {
8059 tree tmp = create_tmp_var (type, "va_arg_tmp");
8060 tree dest_addr = build_fold_addr_expr (tmp);
8061 tree copy = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
8062 3, dest_addr, addr, size_int (rsize));
8063 TREE_ADDRESSABLE (tmp) = 1;
8064 gimplify_and_add (copy, pre_p);
8065 addr = dest_addr;
8066 }
8067
8068 else
8069 addr = fold_convert (ptrtype, addr);
8070
8071 incr = fold_build_pointer_plus_hwi (incr, rsize);
8072 gimplify_assign (valist, incr, post_p);
8073
8074 return build_va_arg_indirect_ref (addr);
8075 }
8076
8077 /* Implement the TARGET_VECTOR_MODE_SUPPORTED_P target hook.
8078 Specify whether the vector mode is supported by the hardware. */
8079
8080 static bool
sparc_vector_mode_supported_p(machine_mode mode)8081 sparc_vector_mode_supported_p (machine_mode mode)
8082 {
8083 return TARGET_VIS && VECTOR_MODE_P (mode) ? true : false;
8084 }
8085
8086 /* Implement the TARGET_VECTORIZE_PREFERRED_SIMD_MODE target hook. */
8087
8088 static machine_mode
sparc_preferred_simd_mode(scalar_mode mode)8089 sparc_preferred_simd_mode (scalar_mode mode)
8090 {
8091 if (TARGET_VIS)
8092 switch (mode)
8093 {
8094 case E_SImode:
8095 return V2SImode;
8096 case E_HImode:
8097 return V4HImode;
8098 case E_QImode:
8099 return V8QImode;
8100
8101 default:;
8102 }
8103
8104 return word_mode;
8105 }
8106
8107 /* Implement TARGET_CAN_FOLLOW_JUMP. */
8108
8109 static bool
sparc_can_follow_jump(const rtx_insn * follower,const rtx_insn * followee)8110 sparc_can_follow_jump (const rtx_insn *follower, const rtx_insn *followee)
8111 {
8112 /* Do not fold unconditional jumps that have been created for crossing
8113 partition boundaries. */
8114 if (CROSSING_JUMP_P (followee) && !CROSSING_JUMP_P (follower))
8115 return false;
8116
8117 return true;
8118 }
8119
8120 /* Return the string to output an unconditional branch to LABEL, which is
8121 the operand number of the label.
8122
8123 DEST is the destination insn (i.e. the label), INSN is the source. */
8124
8125 const char *
output_ubranch(rtx dest,rtx_insn * insn)8126 output_ubranch (rtx dest, rtx_insn *insn)
8127 {
8128 static char string[64];
8129 bool v9_form = false;
8130 int delta;
8131 char *p;
8132
8133 /* Even if we are trying to use cbcond for this, evaluate
8134 whether we can use V9 branches as our backup plan. */
8135 delta = 5000000;
8136 if (!CROSSING_JUMP_P (insn) && INSN_ADDRESSES_SET_P ())
8137 delta = (INSN_ADDRESSES (INSN_UID (dest))
8138 - INSN_ADDRESSES (INSN_UID (insn)));
8139
8140 /* Leave some instructions for "slop". */
8141 if (TARGET_V9 && delta >= -260000 && delta < 260000)
8142 v9_form = true;
8143
8144 if (TARGET_CBCOND)
8145 {
8146 bool emit_nop = emit_cbcond_nop (insn);
8147 bool far = false;
8148 const char *rval;
8149
8150 if (delta < -500 || delta > 500)
8151 far = true;
8152
8153 if (far)
8154 {
8155 if (v9_form)
8156 rval = "ba,a,pt\t%%xcc, %l0";
8157 else
8158 rval = "b,a\t%l0";
8159 }
8160 else
8161 {
8162 if (emit_nop)
8163 rval = "cwbe\t%%g0, %%g0, %l0\n\tnop";
8164 else
8165 rval = "cwbe\t%%g0, %%g0, %l0";
8166 }
8167 return rval;
8168 }
8169
8170 if (v9_form)
8171 strcpy (string, "ba%*,pt\t%%xcc, ");
8172 else
8173 strcpy (string, "b%*\t");
8174
8175 p = strchr (string, '\0');
8176 *p++ = '%';
8177 *p++ = 'l';
8178 *p++ = '0';
8179 *p++ = '%';
8180 *p++ = '(';
8181 *p = '\0';
8182
8183 return string;
8184 }
8185
8186 /* Return the string to output a conditional branch to LABEL, which is
8187 the operand number of the label. OP is the conditional expression.
8188 XEXP (OP, 0) is assumed to be a condition code register (integer or
8189 floating point) and its mode specifies what kind of comparison we made.
8190
8191 DEST is the destination insn (i.e. the label), INSN is the source.
8192
8193 REVERSED is nonzero if we should reverse the sense of the comparison.
8194
8195 ANNUL is nonzero if we should generate an annulling branch. */
8196
8197 const char *
output_cbranch(rtx op,rtx dest,int label,int reversed,int annul,rtx_insn * insn)8198 output_cbranch (rtx op, rtx dest, int label, int reversed, int annul,
8199 rtx_insn *insn)
8200 {
8201 static char string[64];
8202 enum rtx_code code = GET_CODE (op);
8203 rtx cc_reg = XEXP (op, 0);
8204 machine_mode mode = GET_MODE (cc_reg);
8205 const char *labelno, *branch;
8206 int spaces = 8, far;
8207 char *p;
8208
8209 /* v9 branches are limited to +-1MB. If it is too far away,
8210 change
8211
8212 bne,pt %xcc, .LC30
8213
8214 to
8215
8216 be,pn %xcc, .+12
8217 nop
8218 ba .LC30
8219
8220 and
8221
8222 fbne,a,pn %fcc2, .LC29
8223
8224 to
8225
8226 fbe,pt %fcc2, .+16
8227 nop
8228 ba .LC29 */
8229
8230 far = TARGET_V9 && (get_attr_length (insn) >= 3);
8231 if (reversed ^ far)
8232 {
8233 /* Reversal of FP compares takes care -- an ordered compare
8234 becomes an unordered compare and vice versa. */
8235 if (mode == CCFPmode || mode == CCFPEmode)
8236 code = reverse_condition_maybe_unordered (code);
8237 else
8238 code = reverse_condition (code);
8239 }
8240
8241 /* Start by writing the branch condition. */
8242 if (mode == CCFPmode || mode == CCFPEmode)
8243 {
8244 switch (code)
8245 {
8246 case NE:
8247 branch = "fbne";
8248 break;
8249 case EQ:
8250 branch = "fbe";
8251 break;
8252 case GE:
8253 branch = "fbge";
8254 break;
8255 case GT:
8256 branch = "fbg";
8257 break;
8258 case LE:
8259 branch = "fble";
8260 break;
8261 case LT:
8262 branch = "fbl";
8263 break;
8264 case UNORDERED:
8265 branch = "fbu";
8266 break;
8267 case ORDERED:
8268 branch = "fbo";
8269 break;
8270 case UNGT:
8271 branch = "fbug";
8272 break;
8273 case UNLT:
8274 branch = "fbul";
8275 break;
8276 case UNEQ:
8277 branch = "fbue";
8278 break;
8279 case UNGE:
8280 branch = "fbuge";
8281 break;
8282 case UNLE:
8283 branch = "fbule";
8284 break;
8285 case LTGT:
8286 branch = "fblg";
8287 break;
8288 default:
8289 gcc_unreachable ();
8290 }
8291
8292 /* ??? !v9: FP branches cannot be preceded by another floating point
8293 insn. Because there is currently no concept of pre-delay slots,
8294 we can fix this only by always emitting a nop before a floating
8295 point branch. */
8296
8297 string[0] = '\0';
8298 if (! TARGET_V9)
8299 strcpy (string, "nop\n\t");
8300 strcat (string, branch);
8301 }
8302 else
8303 {
8304 switch (code)
8305 {
8306 case NE:
8307 if (mode == CCVmode || mode == CCXVmode)
8308 branch = "bvs";
8309 else
8310 branch = "bne";
8311 break;
8312 case EQ:
8313 if (mode == CCVmode || mode == CCXVmode)
8314 branch = "bvc";
8315 else
8316 branch = "be";
8317 break;
8318 case GE:
8319 if (mode == CCNZmode || mode == CCXNZmode)
8320 branch = "bpos";
8321 else
8322 branch = "bge";
8323 break;
8324 case GT:
8325 branch = "bg";
8326 break;
8327 case LE:
8328 branch = "ble";
8329 break;
8330 case LT:
8331 if (mode == CCNZmode || mode == CCXNZmode)
8332 branch = "bneg";
8333 else
8334 branch = "bl";
8335 break;
8336 case GEU:
8337 branch = "bgeu";
8338 break;
8339 case GTU:
8340 branch = "bgu";
8341 break;
8342 case LEU:
8343 branch = "bleu";
8344 break;
8345 case LTU:
8346 branch = "blu";
8347 break;
8348 default:
8349 gcc_unreachable ();
8350 }
8351 strcpy (string, branch);
8352 }
8353 spaces -= strlen (branch);
8354 p = strchr (string, '\0');
8355
8356 /* Now add the annulling, the label, and a possible noop. */
8357 if (annul && ! far)
8358 {
8359 strcpy (p, ",a");
8360 p += 2;
8361 spaces -= 2;
8362 }
8363
8364 if (TARGET_V9)
8365 {
8366 rtx note;
8367 int v8 = 0;
8368
8369 if (! far && insn && INSN_ADDRESSES_SET_P ())
8370 {
8371 int delta = (INSN_ADDRESSES (INSN_UID (dest))
8372 - INSN_ADDRESSES (INSN_UID (insn)));
8373 /* Leave some instructions for "slop". */
8374 if (delta < -260000 || delta >= 260000)
8375 v8 = 1;
8376 }
8377
8378 switch (mode)
8379 {
8380 case E_CCmode:
8381 case E_CCNZmode:
8382 case E_CCCmode:
8383 case E_CCVmode:
8384 labelno = "%%icc, ";
8385 if (v8)
8386 labelno = "";
8387 break;
8388 case E_CCXmode:
8389 case E_CCXNZmode:
8390 case E_CCXCmode:
8391 case E_CCXVmode:
8392 labelno = "%%xcc, ";
8393 gcc_assert (!v8);
8394 break;
8395 case E_CCFPmode:
8396 case E_CCFPEmode:
8397 {
8398 static char v9_fcc_labelno[] = "%%fccX, ";
8399 /* Set the char indicating the number of the fcc reg to use. */
8400 v9_fcc_labelno[5] = REGNO (cc_reg) - SPARC_FIRST_V9_FCC_REG + '0';
8401 labelno = v9_fcc_labelno;
8402 if (v8)
8403 {
8404 gcc_assert (REGNO (cc_reg) == SPARC_FCC_REG);
8405 labelno = "";
8406 }
8407 }
8408 break;
8409 default:
8410 gcc_unreachable ();
8411 }
8412
8413 if (*labelno && insn && (note = find_reg_note (insn, REG_BR_PROB, NULL_RTX)))
8414 {
8415 strcpy (p,
8416 ((profile_probability::from_reg_br_prob_note (XINT (note, 0))
8417 >= profile_probability::even ()) ^ far)
8418 ? ",pt" : ",pn");
8419 p += 3;
8420 spaces -= 3;
8421 }
8422 }
8423 else
8424 labelno = "";
8425
8426 if (spaces > 0)
8427 *p++ = '\t';
8428 else
8429 *p++ = ' ';
8430 strcpy (p, labelno);
8431 p = strchr (p, '\0');
8432 if (far)
8433 {
8434 strcpy (p, ".+12\n\t nop\n\tb\t");
8435 /* Skip the next insn if requested or
8436 if we know that it will be a nop. */
8437 if (annul || ! final_sequence)
8438 p[3] = '6';
8439 p += 14;
8440 }
8441 *p++ = '%';
8442 *p++ = 'l';
8443 *p++ = label + '0';
8444 *p++ = '%';
8445 *p++ = '#';
8446 *p = '\0';
8447
8448 return string;
8449 }
8450
8451 /* Emit a library call comparison between floating point X and Y.
8452 COMPARISON is the operator to compare with (EQ, NE, GT, etc).
8453 Return the new operator to be used in the comparison sequence.
8454
8455 TARGET_ARCH64 uses _Qp_* functions, which use pointers to TFmode
8456 values as arguments instead of the TFmode registers themselves,
8457 that's why we cannot call emit_float_lib_cmp. */
8458
8459 rtx
sparc_emit_float_lib_cmp(rtx x,rtx y,enum rtx_code comparison)8460 sparc_emit_float_lib_cmp (rtx x, rtx y, enum rtx_code comparison)
8461 {
8462 const char *qpfunc;
8463 rtx slot0, slot1, result, tem, tem2, libfunc;
8464 machine_mode mode;
8465 enum rtx_code new_comparison;
8466
8467 switch (comparison)
8468 {
8469 case EQ:
8470 qpfunc = (TARGET_ARCH64 ? "_Qp_feq" : "_Q_feq");
8471 break;
8472
8473 case NE:
8474 qpfunc = (TARGET_ARCH64 ? "_Qp_fne" : "_Q_fne");
8475 break;
8476
8477 case GT:
8478 qpfunc = (TARGET_ARCH64 ? "_Qp_fgt" : "_Q_fgt");
8479 break;
8480
8481 case GE:
8482 qpfunc = (TARGET_ARCH64 ? "_Qp_fge" : "_Q_fge");
8483 break;
8484
8485 case LT:
8486 qpfunc = (TARGET_ARCH64 ? "_Qp_flt" : "_Q_flt");
8487 break;
8488
8489 case LE:
8490 qpfunc = (TARGET_ARCH64 ? "_Qp_fle" : "_Q_fle");
8491 break;
8492
8493 case ORDERED:
8494 case UNORDERED:
8495 case UNGT:
8496 case UNLT:
8497 case UNEQ:
8498 case UNGE:
8499 case UNLE:
8500 case LTGT:
8501 qpfunc = (TARGET_ARCH64 ? "_Qp_cmp" : "_Q_cmp");
8502 break;
8503
8504 default:
8505 gcc_unreachable ();
8506 }
8507
8508 if (TARGET_ARCH64)
8509 {
8510 if (MEM_P (x))
8511 {
8512 tree expr = MEM_EXPR (x);
8513 if (expr)
8514 mark_addressable (expr);
8515 slot0 = x;
8516 }
8517 else
8518 {
8519 slot0 = assign_stack_temp (TFmode, GET_MODE_SIZE(TFmode));
8520 emit_move_insn (slot0, x);
8521 }
8522
8523 if (MEM_P (y))
8524 {
8525 tree expr = MEM_EXPR (y);
8526 if (expr)
8527 mark_addressable (expr);
8528 slot1 = y;
8529 }
8530 else
8531 {
8532 slot1 = assign_stack_temp (TFmode, GET_MODE_SIZE(TFmode));
8533 emit_move_insn (slot1, y);
8534 }
8535
8536 libfunc = gen_rtx_SYMBOL_REF (Pmode, qpfunc);
8537 emit_library_call (libfunc, LCT_NORMAL,
8538 DImode,
8539 XEXP (slot0, 0), Pmode,
8540 XEXP (slot1, 0), Pmode);
8541 mode = DImode;
8542 }
8543 else
8544 {
8545 libfunc = gen_rtx_SYMBOL_REF (Pmode, qpfunc);
8546 emit_library_call (libfunc, LCT_NORMAL,
8547 SImode,
8548 x, TFmode, y, TFmode);
8549 mode = SImode;
8550 }
8551
8552
8553 /* Immediately move the result of the libcall into a pseudo
8554 register so reload doesn't clobber the value if it needs
8555 the return register for a spill reg. */
8556 result = gen_reg_rtx (mode);
8557 emit_move_insn (result, hard_libcall_value (mode, libfunc));
8558
8559 switch (comparison)
8560 {
8561 default:
8562 return gen_rtx_NE (VOIDmode, result, const0_rtx);
8563 case ORDERED:
8564 case UNORDERED:
8565 new_comparison = (comparison == UNORDERED ? EQ : NE);
8566 return gen_rtx_fmt_ee (new_comparison, VOIDmode, result, GEN_INT(3));
8567 case UNGT:
8568 case UNGE:
8569 new_comparison = (comparison == UNGT ? GT : NE);
8570 return gen_rtx_fmt_ee (new_comparison, VOIDmode, result, const1_rtx);
8571 case UNLE:
8572 return gen_rtx_NE (VOIDmode, result, const2_rtx);
8573 case UNLT:
8574 tem = gen_reg_rtx (mode);
8575 if (TARGET_ARCH32)
8576 emit_insn (gen_andsi3 (tem, result, const1_rtx));
8577 else
8578 emit_insn (gen_anddi3 (tem, result, const1_rtx));
8579 return gen_rtx_NE (VOIDmode, tem, const0_rtx);
8580 case UNEQ:
8581 case LTGT:
8582 tem = gen_reg_rtx (mode);
8583 if (TARGET_ARCH32)
8584 emit_insn (gen_addsi3 (tem, result, const1_rtx));
8585 else
8586 emit_insn (gen_adddi3 (tem, result, const1_rtx));
8587 tem2 = gen_reg_rtx (mode);
8588 if (TARGET_ARCH32)
8589 emit_insn (gen_andsi3 (tem2, tem, const2_rtx));
8590 else
8591 emit_insn (gen_anddi3 (tem2, tem, const2_rtx));
8592 new_comparison = (comparison == UNEQ ? EQ : NE);
8593 return gen_rtx_fmt_ee (new_comparison, VOIDmode, tem2, const0_rtx);
8594 }
8595
8596 gcc_unreachable ();
8597 }
8598
8599 /* Generate an unsigned DImode to FP conversion. This is the same code
8600 optabs would emit if we didn't have TFmode patterns. */
8601
8602 void
sparc_emit_floatunsdi(rtx * operands,machine_mode mode)8603 sparc_emit_floatunsdi (rtx *operands, machine_mode mode)
8604 {
8605 rtx i0, i1, f0, in, out;
8606
8607 out = operands[0];
8608 in = force_reg (DImode, operands[1]);
8609 rtx_code_label *neglab = gen_label_rtx ();
8610 rtx_code_label *donelab = gen_label_rtx ();
8611 i0 = gen_reg_rtx (DImode);
8612 i1 = gen_reg_rtx (DImode);
8613 f0 = gen_reg_rtx (mode);
8614
8615 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, DImode, 0, neglab);
8616
8617 emit_insn (gen_rtx_SET (out, gen_rtx_FLOAT (mode, in)));
8618 emit_jump_insn (gen_jump (donelab));
8619 emit_barrier ();
8620
8621 emit_label (neglab);
8622
8623 emit_insn (gen_lshrdi3 (i0, in, const1_rtx));
8624 emit_insn (gen_anddi3 (i1, in, const1_rtx));
8625 emit_insn (gen_iordi3 (i0, i0, i1));
8626 emit_insn (gen_rtx_SET (f0, gen_rtx_FLOAT (mode, i0)));
8627 emit_insn (gen_rtx_SET (out, gen_rtx_PLUS (mode, f0, f0)));
8628
8629 emit_label (donelab);
8630 }
8631
8632 /* Generate an FP to unsigned DImode conversion. This is the same code
8633 optabs would emit if we didn't have TFmode patterns. */
8634
8635 void
sparc_emit_fixunsdi(rtx * operands,machine_mode mode)8636 sparc_emit_fixunsdi (rtx *operands, machine_mode mode)
8637 {
8638 rtx i0, i1, f0, in, out, limit;
8639
8640 out = operands[0];
8641 in = force_reg (mode, operands[1]);
8642 rtx_code_label *neglab = gen_label_rtx ();
8643 rtx_code_label *donelab = gen_label_rtx ();
8644 i0 = gen_reg_rtx (DImode);
8645 i1 = gen_reg_rtx (DImode);
8646 limit = gen_reg_rtx (mode);
8647 f0 = gen_reg_rtx (mode);
8648
8649 emit_move_insn (limit,
8650 const_double_from_real_value (
8651 REAL_VALUE_ATOF ("9223372036854775808.0", mode), mode));
8652 emit_cmp_and_jump_insns (in, limit, GE, NULL_RTX, mode, 0, neglab);
8653
8654 emit_insn (gen_rtx_SET (out,
8655 gen_rtx_FIX (DImode, gen_rtx_FIX (mode, in))));
8656 emit_jump_insn (gen_jump (donelab));
8657 emit_barrier ();
8658
8659 emit_label (neglab);
8660
8661 emit_insn (gen_rtx_SET (f0, gen_rtx_MINUS (mode, in, limit)));
8662 emit_insn (gen_rtx_SET (i0,
8663 gen_rtx_FIX (DImode, gen_rtx_FIX (mode, f0))));
8664 emit_insn (gen_movdi (i1, const1_rtx));
8665 emit_insn (gen_ashldi3 (i1, i1, GEN_INT (63)));
8666 emit_insn (gen_xordi3 (out, i0, i1));
8667
8668 emit_label (donelab);
8669 }
8670
8671 /* Return the string to output a compare and branch instruction to DEST.
8672 DEST is the destination insn (i.e. the label), INSN is the source,
8673 and OP is the conditional expression. */
8674
8675 const char *
output_cbcond(rtx op,rtx dest,rtx_insn * insn)8676 output_cbcond (rtx op, rtx dest, rtx_insn *insn)
8677 {
8678 machine_mode mode = GET_MODE (XEXP (op, 0));
8679 enum rtx_code code = GET_CODE (op);
8680 const char *cond_str, *tmpl;
8681 int far, emit_nop, len;
8682 static char string[64];
8683 char size_char;
8684
8685 /* Compare and Branch is limited to +-2KB. If it is too far away,
8686 change
8687
8688 cxbne X, Y, .LC30
8689
8690 to
8691
8692 cxbe X, Y, .+16
8693 nop
8694 ba,pt xcc, .LC30
8695 nop */
8696
8697 len = get_attr_length (insn);
8698
8699 far = len == 4;
8700 emit_nop = len == 2;
8701
8702 if (far)
8703 code = reverse_condition (code);
8704
8705 size_char = ((mode == SImode) ? 'w' : 'x');
8706
8707 switch (code)
8708 {
8709 case NE:
8710 cond_str = "ne";
8711 break;
8712
8713 case EQ:
8714 cond_str = "e";
8715 break;
8716
8717 case GE:
8718 cond_str = "ge";
8719 break;
8720
8721 case GT:
8722 cond_str = "g";
8723 break;
8724
8725 case LE:
8726 cond_str = "le";
8727 break;
8728
8729 case LT:
8730 cond_str = "l";
8731 break;
8732
8733 case GEU:
8734 cond_str = "cc";
8735 break;
8736
8737 case GTU:
8738 cond_str = "gu";
8739 break;
8740
8741 case LEU:
8742 cond_str = "leu";
8743 break;
8744
8745 case LTU:
8746 cond_str = "cs";
8747 break;
8748
8749 default:
8750 gcc_unreachable ();
8751 }
8752
8753 if (far)
8754 {
8755 int veryfar = 1, delta;
8756
8757 if (INSN_ADDRESSES_SET_P ())
8758 {
8759 delta = (INSN_ADDRESSES (INSN_UID (dest))
8760 - INSN_ADDRESSES (INSN_UID (insn)));
8761 /* Leave some instructions for "slop". */
8762 if (delta >= -260000 && delta < 260000)
8763 veryfar = 0;
8764 }
8765
8766 if (veryfar)
8767 tmpl = "c%cb%s\t%%1, %%2, .+16\n\tnop\n\tb\t%%3\n\tnop";
8768 else
8769 tmpl = "c%cb%s\t%%1, %%2, .+16\n\tnop\n\tba,pt\t%%%%xcc, %%3\n\tnop";
8770 }
8771 else
8772 {
8773 if (emit_nop)
8774 tmpl = "c%cb%s\t%%1, %%2, %%3\n\tnop";
8775 else
8776 tmpl = "c%cb%s\t%%1, %%2, %%3";
8777 }
8778
8779 snprintf (string, sizeof(string), tmpl, size_char, cond_str);
8780
8781 return string;
8782 }
8783
8784 /* Return the string to output a conditional branch to LABEL, testing
8785 register REG. LABEL is the operand number of the label; REG is the
8786 operand number of the reg. OP is the conditional expression. The mode
8787 of REG says what kind of comparison we made.
8788
8789 DEST is the destination insn (i.e. the label), INSN is the source.
8790
8791 REVERSED is nonzero if we should reverse the sense of the comparison.
8792
8793 ANNUL is nonzero if we should generate an annulling branch. */
8794
8795 const char *
output_v9branch(rtx op,rtx dest,int reg,int label,int reversed,int annul,rtx_insn * insn)8796 output_v9branch (rtx op, rtx dest, int reg, int label, int reversed,
8797 int annul, rtx_insn *insn)
8798 {
8799 static char string[64];
8800 enum rtx_code code = GET_CODE (op);
8801 machine_mode mode = GET_MODE (XEXP (op, 0));
8802 rtx note;
8803 int far;
8804 char *p;
8805
8806 /* branch on register are limited to +-128KB. If it is too far away,
8807 change
8808
8809 brnz,pt %g1, .LC30
8810
8811 to
8812
8813 brz,pn %g1, .+12
8814 nop
8815 ba,pt %xcc, .LC30
8816
8817 and
8818
8819 brgez,a,pn %o1, .LC29
8820
8821 to
8822
8823 brlz,pt %o1, .+16
8824 nop
8825 ba,pt %xcc, .LC29 */
8826
8827 far = get_attr_length (insn) >= 3;
8828
8829 /* If not floating-point or if EQ or NE, we can just reverse the code. */
8830 if (reversed ^ far)
8831 code = reverse_condition (code);
8832
8833 /* Only 64-bit versions of these instructions exist. */
8834 gcc_assert (mode == DImode);
8835
8836 /* Start by writing the branch condition. */
8837
8838 switch (code)
8839 {
8840 case NE:
8841 strcpy (string, "brnz");
8842 break;
8843
8844 case EQ:
8845 strcpy (string, "brz");
8846 break;
8847
8848 case GE:
8849 strcpy (string, "brgez");
8850 break;
8851
8852 case LT:
8853 strcpy (string, "brlz");
8854 break;
8855
8856 case LE:
8857 strcpy (string, "brlez");
8858 break;
8859
8860 case GT:
8861 strcpy (string, "brgz");
8862 break;
8863
8864 default:
8865 gcc_unreachable ();
8866 }
8867
8868 p = strchr (string, '\0');
8869
8870 /* Now add the annulling, reg, label, and nop. */
8871 if (annul && ! far)
8872 {
8873 strcpy (p, ",a");
8874 p += 2;
8875 }
8876
8877 if (insn && (note = find_reg_note (insn, REG_BR_PROB, NULL_RTX)))
8878 {
8879 strcpy (p,
8880 ((profile_probability::from_reg_br_prob_note (XINT (note, 0))
8881 >= profile_probability::even ()) ^ far)
8882 ? ",pt" : ",pn");
8883 p += 3;
8884 }
8885
8886 *p = p < string + 8 ? '\t' : ' ';
8887 p++;
8888 *p++ = '%';
8889 *p++ = '0' + reg;
8890 *p++ = ',';
8891 *p++ = ' ';
8892 if (far)
8893 {
8894 int veryfar = 1, delta;
8895
8896 if (INSN_ADDRESSES_SET_P ())
8897 {
8898 delta = (INSN_ADDRESSES (INSN_UID (dest))
8899 - INSN_ADDRESSES (INSN_UID (insn)));
8900 /* Leave some instructions for "slop". */
8901 if (delta >= -260000 && delta < 260000)
8902 veryfar = 0;
8903 }
8904
8905 strcpy (p, ".+12\n\t nop\n\t");
8906 /* Skip the next insn if requested or
8907 if we know that it will be a nop. */
8908 if (annul || ! final_sequence)
8909 p[3] = '6';
8910 p += 12;
8911 if (veryfar)
8912 {
8913 strcpy (p, "b\t");
8914 p += 2;
8915 }
8916 else
8917 {
8918 strcpy (p, "ba,pt\t%%xcc, ");
8919 p += 13;
8920 }
8921 }
8922 *p++ = '%';
8923 *p++ = 'l';
8924 *p++ = '0' + label;
8925 *p++ = '%';
8926 *p++ = '#';
8927 *p = '\0';
8928
8929 return string;
8930 }
8931
8932 /* Return 1, if any of the registers of the instruction are %l[0-7] or %o[0-7].
8933 Such instructions cannot be used in the delay slot of return insn on v9.
8934 If TEST is 0, also rename all %i[0-7] registers to their %o[0-7] counterparts.
8935 */
8936
8937 static int
epilogue_renumber(register rtx * where,int test)8938 epilogue_renumber (register rtx *where, int test)
8939 {
8940 register const char *fmt;
8941 register int i;
8942 register enum rtx_code code;
8943
8944 if (*where == 0)
8945 return 0;
8946
8947 code = GET_CODE (*where);
8948
8949 switch (code)
8950 {
8951 case REG:
8952 if (REGNO (*where) >= 8 && REGNO (*where) < 24) /* oX or lX */
8953 return 1;
8954 if (! test && REGNO (*where) >= 24 && REGNO (*where) < 32)
8955 *where = gen_rtx_REG (GET_MODE (*where), OUTGOING_REGNO (REGNO(*where)));
8956 /* fallthrough */
8957 case SCRATCH:
8958 case CC0:
8959 case PC:
8960 case CONST_INT:
8961 case CONST_WIDE_INT:
8962 case CONST_DOUBLE:
8963 return 0;
8964
8965 /* Do not replace the frame pointer with the stack pointer because
8966 it can cause the delayed instruction to load below the stack.
8967 This occurs when instructions like:
8968
8969 (set (reg/i:SI 24 %i0)
8970 (mem/f:SI (plus:SI (reg/f:SI 30 %fp)
8971 (const_int -20 [0xffffffec])) 0))
8972
8973 are in the return delayed slot. */
8974 case PLUS:
8975 if (GET_CODE (XEXP (*where, 0)) == REG
8976 && REGNO (XEXP (*where, 0)) == HARD_FRAME_POINTER_REGNUM
8977 && (GET_CODE (XEXP (*where, 1)) != CONST_INT
8978 || INTVAL (XEXP (*where, 1)) < SPARC_STACK_BIAS))
8979 return 1;
8980 break;
8981
8982 case MEM:
8983 if (SPARC_STACK_BIAS
8984 && GET_CODE (XEXP (*where, 0)) == REG
8985 && REGNO (XEXP (*where, 0)) == HARD_FRAME_POINTER_REGNUM)
8986 return 1;
8987 break;
8988
8989 default:
8990 break;
8991 }
8992
8993 fmt = GET_RTX_FORMAT (code);
8994
8995 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
8996 {
8997 if (fmt[i] == 'E')
8998 {
8999 register int j;
9000 for (j = XVECLEN (*where, i) - 1; j >= 0; j--)
9001 if (epilogue_renumber (&(XVECEXP (*where, i, j)), test))
9002 return 1;
9003 }
9004 else if (fmt[i] == 'e'
9005 && epilogue_renumber (&(XEXP (*where, i)), test))
9006 return 1;
9007 }
9008 return 0;
9009 }
9010
9011 /* Leaf functions and non-leaf functions have different needs. */
9012
9013 static const int
9014 reg_leaf_alloc_order[] = REG_LEAF_ALLOC_ORDER;
9015
9016 static const int
9017 reg_nonleaf_alloc_order[] = REG_ALLOC_ORDER;
9018
9019 static const int *const reg_alloc_orders[] = {
9020 reg_leaf_alloc_order,
9021 reg_nonleaf_alloc_order};
9022
9023 void
order_regs_for_local_alloc(void)9024 order_regs_for_local_alloc (void)
9025 {
9026 static int last_order_nonleaf = 1;
9027
9028 if (df_regs_ever_live_p (15) != last_order_nonleaf)
9029 {
9030 last_order_nonleaf = !last_order_nonleaf;
9031 memcpy ((char *) reg_alloc_order,
9032 (const char *) reg_alloc_orders[last_order_nonleaf],
9033 FIRST_PSEUDO_REGISTER * sizeof (int));
9034 }
9035 }
9036
9037 /* Return 1 if REG and MEM are legitimate enough to allow the various
9038 MEM<-->REG splits to be run. */
9039
9040 int
sparc_split_reg_mem_legitimate(rtx reg,rtx mem)9041 sparc_split_reg_mem_legitimate (rtx reg, rtx mem)
9042 {
9043 /* Punt if we are here by mistake. */
9044 gcc_assert (reload_completed);
9045
9046 /* We must have an offsettable memory reference. */
9047 if (!offsettable_memref_p (mem))
9048 return 0;
9049
9050 /* If we have legitimate args for ldd/std, we do not want
9051 the split to happen. */
9052 if ((REGNO (reg) % 2) == 0 && mem_min_alignment (mem, 8))
9053 return 0;
9054
9055 /* Success. */
9056 return 1;
9057 }
9058
9059 /* Split a REG <-- MEM move into a pair of moves in MODE. */
9060
9061 void
sparc_split_reg_mem(rtx dest,rtx src,machine_mode mode)9062 sparc_split_reg_mem (rtx dest, rtx src, machine_mode mode)
9063 {
9064 rtx high_part = gen_highpart (mode, dest);
9065 rtx low_part = gen_lowpart (mode, dest);
9066 rtx word0 = adjust_address (src, mode, 0);
9067 rtx word1 = adjust_address (src, mode, 4);
9068
9069 if (reg_overlap_mentioned_p (high_part, word1))
9070 {
9071 emit_move_insn_1 (low_part, word1);
9072 emit_move_insn_1 (high_part, word0);
9073 }
9074 else
9075 {
9076 emit_move_insn_1 (high_part, word0);
9077 emit_move_insn_1 (low_part, word1);
9078 }
9079 }
9080
9081 /* Split a MEM <-- REG move into a pair of moves in MODE. */
9082
9083 void
sparc_split_mem_reg(rtx dest,rtx src,machine_mode mode)9084 sparc_split_mem_reg (rtx dest, rtx src, machine_mode mode)
9085 {
9086 rtx word0 = adjust_address (dest, mode, 0);
9087 rtx word1 = adjust_address (dest, mode, 4);
9088 rtx high_part = gen_highpart (mode, src);
9089 rtx low_part = gen_lowpart (mode, src);
9090
9091 emit_move_insn_1 (word0, high_part);
9092 emit_move_insn_1 (word1, low_part);
9093 }
9094
9095 /* Like sparc_split_reg_mem_legitimate but for REG <--> REG moves. */
9096
9097 int
sparc_split_reg_reg_legitimate(rtx reg1,rtx reg2)9098 sparc_split_reg_reg_legitimate (rtx reg1, rtx reg2)
9099 {
9100 /* Punt if we are here by mistake. */
9101 gcc_assert (reload_completed);
9102
9103 if (GET_CODE (reg1) == SUBREG)
9104 reg1 = SUBREG_REG (reg1);
9105 if (GET_CODE (reg1) != REG)
9106 return 0;
9107 const int regno1 = REGNO (reg1);
9108
9109 if (GET_CODE (reg2) == SUBREG)
9110 reg2 = SUBREG_REG (reg2);
9111 if (GET_CODE (reg2) != REG)
9112 return 0;
9113 const int regno2 = REGNO (reg2);
9114
9115 if (SPARC_INT_REG_P (regno1) && SPARC_INT_REG_P (regno2))
9116 return 1;
9117
9118 if (TARGET_VIS3)
9119 {
9120 if ((SPARC_INT_REG_P (regno1) && SPARC_FP_REG_P (regno2))
9121 || (SPARC_FP_REG_P (regno1) && SPARC_INT_REG_P (regno2)))
9122 return 1;
9123 }
9124
9125 return 0;
9126 }
9127
9128 /* Split a REG <--> REG move into a pair of moves in MODE. */
9129
9130 void
sparc_split_reg_reg(rtx dest,rtx src,machine_mode mode)9131 sparc_split_reg_reg (rtx dest, rtx src, machine_mode mode)
9132 {
9133 rtx dest1 = gen_highpart (mode, dest);
9134 rtx dest2 = gen_lowpart (mode, dest);
9135 rtx src1 = gen_highpart (mode, src);
9136 rtx src2 = gen_lowpart (mode, src);
9137
9138 /* Now emit using the real source and destination we found, swapping
9139 the order if we detect overlap. */
9140 if (reg_overlap_mentioned_p (dest1, src2))
9141 {
9142 emit_move_insn_1 (dest2, src2);
9143 emit_move_insn_1 (dest1, src1);
9144 }
9145 else
9146 {
9147 emit_move_insn_1 (dest1, src1);
9148 emit_move_insn_1 (dest2, src2);
9149 }
9150 }
9151
9152 /* Return 1 if REGNO (reg1) is even and REGNO (reg1) == REGNO (reg2) - 1.
9153 This makes them candidates for using ldd and std insns.
9154
9155 Note reg1 and reg2 *must* be hard registers. */
9156
9157 int
registers_ok_for_ldd_peep(rtx reg1,rtx reg2)9158 registers_ok_for_ldd_peep (rtx reg1, rtx reg2)
9159 {
9160 /* We might have been passed a SUBREG. */
9161 if (GET_CODE (reg1) != REG || GET_CODE (reg2) != REG)
9162 return 0;
9163
9164 if (REGNO (reg1) % 2 != 0)
9165 return 0;
9166
9167 /* Integer ldd is deprecated in SPARC V9 */
9168 if (TARGET_V9 && SPARC_INT_REG_P (REGNO (reg1)))
9169 return 0;
9170
9171 return (REGNO (reg1) == REGNO (reg2) - 1);
9172 }
9173
9174 /* Return 1 if the addresses in mem1 and mem2 are suitable for use in
9175 an ldd or std insn.
9176
9177 This can only happen when addr1 and addr2, the addresses in mem1
9178 and mem2, are consecutive memory locations (addr1 + 4 == addr2).
9179 addr1 must also be aligned on a 64-bit boundary.
9180
9181 Also iff dependent_reg_rtx is not null it should not be used to
9182 compute the address for mem1, i.e. we cannot optimize a sequence
9183 like:
9184 ld [%o0], %o0
9185 ld [%o0 + 4], %o1
9186 to
9187 ldd [%o0], %o0
9188 nor:
9189 ld [%g3 + 4], %g3
9190 ld [%g3], %g2
9191 to
9192 ldd [%g3], %g2
9193
9194 But, note that the transformation from:
9195 ld [%g2 + 4], %g3
9196 ld [%g2], %g2
9197 to
9198 ldd [%g2], %g2
9199 is perfectly fine. Thus, the peephole2 patterns always pass us
9200 the destination register of the first load, never the second one.
9201
9202 For stores we don't have a similar problem, so dependent_reg_rtx is
9203 NULL_RTX. */
9204
9205 int
mems_ok_for_ldd_peep(rtx mem1,rtx mem2,rtx dependent_reg_rtx)9206 mems_ok_for_ldd_peep (rtx mem1, rtx mem2, rtx dependent_reg_rtx)
9207 {
9208 rtx addr1, addr2;
9209 unsigned int reg1;
9210 HOST_WIDE_INT offset1;
9211
9212 /* The mems cannot be volatile. */
9213 if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2))
9214 return 0;
9215
9216 /* MEM1 should be aligned on a 64-bit boundary. */
9217 if (MEM_ALIGN (mem1) < 64)
9218 return 0;
9219
9220 addr1 = XEXP (mem1, 0);
9221 addr2 = XEXP (mem2, 0);
9222
9223 /* Extract a register number and offset (if used) from the first addr. */
9224 if (GET_CODE (addr1) == PLUS)
9225 {
9226 /* If not a REG, return zero. */
9227 if (GET_CODE (XEXP (addr1, 0)) != REG)
9228 return 0;
9229 else
9230 {
9231 reg1 = REGNO (XEXP (addr1, 0));
9232 /* The offset must be constant! */
9233 if (GET_CODE (XEXP (addr1, 1)) != CONST_INT)
9234 return 0;
9235 offset1 = INTVAL (XEXP (addr1, 1));
9236 }
9237 }
9238 else if (GET_CODE (addr1) != REG)
9239 return 0;
9240 else
9241 {
9242 reg1 = REGNO (addr1);
9243 /* This was a simple (mem (reg)) expression. Offset is 0. */
9244 offset1 = 0;
9245 }
9246
9247 /* Make sure the second address is a (mem (plus (reg) (const_int). */
9248 if (GET_CODE (addr2) != PLUS)
9249 return 0;
9250
9251 if (GET_CODE (XEXP (addr2, 0)) != REG
9252 || GET_CODE (XEXP (addr2, 1)) != CONST_INT)
9253 return 0;
9254
9255 if (reg1 != REGNO (XEXP (addr2, 0)))
9256 return 0;
9257
9258 if (dependent_reg_rtx != NULL_RTX && reg1 == REGNO (dependent_reg_rtx))
9259 return 0;
9260
9261 /* The first offset must be evenly divisible by 8 to ensure the
9262 address is 64-bit aligned. */
9263 if (offset1 % 8 != 0)
9264 return 0;
9265
9266 /* The offset for the second addr must be 4 more than the first addr. */
9267 if (INTVAL (XEXP (addr2, 1)) != offset1 + 4)
9268 return 0;
9269
9270 /* All the tests passed. addr1 and addr2 are valid for ldd and std
9271 instructions. */
9272 return 1;
9273 }
9274
9275 /* Return the widened memory access made of MEM1 and MEM2 in MODE. */
9276
9277 rtx
widen_mem_for_ldd_peep(rtx mem1,rtx mem2,machine_mode mode)9278 widen_mem_for_ldd_peep (rtx mem1, rtx mem2, machine_mode mode)
9279 {
9280 rtx x = widen_memory_access (mem1, mode, 0);
9281 MEM_NOTRAP_P (x) = MEM_NOTRAP_P (mem1) && MEM_NOTRAP_P (mem2);
9282 return x;
9283 }
9284
9285 /* Return 1 if reg is a pseudo, or is the first register in
9286 a hard register pair. This makes it suitable for use in
9287 ldd and std insns. */
9288
9289 int
register_ok_for_ldd(rtx reg)9290 register_ok_for_ldd (rtx reg)
9291 {
9292 /* We might have been passed a SUBREG. */
9293 if (!REG_P (reg))
9294 return 0;
9295
9296 if (REGNO (reg) < FIRST_PSEUDO_REGISTER)
9297 return (REGNO (reg) % 2 == 0);
9298
9299 return 1;
9300 }
9301
9302 /* Return 1 if OP, a MEM, has an address which is known to be
9303 aligned to an 8-byte boundary. */
9304
9305 int
memory_ok_for_ldd(rtx op)9306 memory_ok_for_ldd (rtx op)
9307 {
9308 /* In 64-bit mode, we assume that the address is word-aligned. */
9309 if (TARGET_ARCH32 && !mem_min_alignment (op, 8))
9310 return 0;
9311
9312 if (! can_create_pseudo_p ()
9313 && !strict_memory_address_p (Pmode, XEXP (op, 0)))
9314 return 0;
9315
9316 return 1;
9317 }
9318
9319 /* Implement TARGET_PRINT_OPERAND_PUNCT_VALID_P. */
9320
9321 static bool
sparc_print_operand_punct_valid_p(unsigned char code)9322 sparc_print_operand_punct_valid_p (unsigned char code)
9323 {
9324 if (code == '#'
9325 || code == '*'
9326 || code == '('
9327 || code == ')'
9328 || code == '_'
9329 || code == '&')
9330 return true;
9331
9332 return false;
9333 }
9334
9335 /* Implement TARGET_PRINT_OPERAND.
9336 Print operand X (an rtx) in assembler syntax to file FILE.
9337 CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
9338 For `%' followed by punctuation, CODE is the punctuation and X is null. */
9339
9340 static void
sparc_print_operand(FILE * file,rtx x,int code)9341 sparc_print_operand (FILE *file, rtx x, int code)
9342 {
9343 const char *s;
9344
9345 switch (code)
9346 {
9347 case '#':
9348 /* Output an insn in a delay slot. */
9349 if (final_sequence)
9350 sparc_indent_opcode = 1;
9351 else
9352 fputs ("\n\t nop", file);
9353 return;
9354 case '*':
9355 /* Output an annul flag if there's nothing for the delay slot and we
9356 are optimizing. This is always used with '(' below.
9357 Sun OS 4.1.1 dbx can't handle an annulled unconditional branch;
9358 this is a dbx bug. So, we only do this when optimizing.
9359 On UltraSPARC, a branch in a delay slot causes a pipeline flush.
9360 Always emit a nop in case the next instruction is a branch. */
9361 if (! final_sequence && (optimize && (int)sparc_cpu < PROCESSOR_V9))
9362 fputs (",a", file);
9363 return;
9364 case '(':
9365 /* Output a 'nop' if there's nothing for the delay slot and we are
9366 not optimizing. This is always used with '*' above. */
9367 if (! final_sequence && ! (optimize && (int)sparc_cpu < PROCESSOR_V9))
9368 fputs ("\n\t nop", file);
9369 else if (final_sequence)
9370 sparc_indent_opcode = 1;
9371 return;
9372 case ')':
9373 /* Output the right displacement from the saved PC on function return.
9374 The caller may have placed an "unimp" insn immediately after the call
9375 so we have to account for it. This insn is used in the 32-bit ABI
9376 when calling a function that returns a non zero-sized structure. The
9377 64-bit ABI doesn't have it. Be careful to have this test be the same
9378 as that for the call. The exception is when sparc_std_struct_return
9379 is enabled, the psABI is followed exactly and the adjustment is made
9380 by the code in sparc_struct_value_rtx. The call emitted is the same
9381 when sparc_std_struct_return is enabled. */
9382 if (!TARGET_ARCH64
9383 && cfun->returns_struct
9384 && !sparc_std_struct_return
9385 && DECL_SIZE (DECL_RESULT (current_function_decl))
9386 && TREE_CODE (DECL_SIZE (DECL_RESULT (current_function_decl)))
9387 == INTEGER_CST
9388 && !integer_zerop (DECL_SIZE (DECL_RESULT (current_function_decl))))
9389 fputs ("12", file);
9390 else
9391 fputc ('8', file);
9392 return;
9393 case '_':
9394 /* Output the Embedded Medium/Anywhere code model base register. */
9395 fputs (EMBMEDANY_BASE_REG, file);
9396 return;
9397 case '&':
9398 /* Print some local dynamic TLS name. */
9399 if (const char *name = get_some_local_dynamic_name ())
9400 assemble_name (file, name);
9401 else
9402 output_operand_lossage ("'%%&' used without any "
9403 "local dynamic TLS references");
9404 return;
9405
9406 case 'Y':
9407 /* Adjust the operand to take into account a RESTORE operation. */
9408 if (GET_CODE (x) == CONST_INT)
9409 break;
9410 else if (GET_CODE (x) != REG)
9411 output_operand_lossage ("invalid %%Y operand");
9412 else if (REGNO (x) < 8)
9413 fputs (reg_names[REGNO (x)], file);
9414 else if (REGNO (x) >= 24 && REGNO (x) < 32)
9415 fputs (reg_names[REGNO (x)-16], file);
9416 else
9417 output_operand_lossage ("invalid %%Y operand");
9418 return;
9419 case 'L':
9420 /* Print out the low order register name of a register pair. */
9421 if (WORDS_BIG_ENDIAN)
9422 fputs (reg_names[REGNO (x)+1], file);
9423 else
9424 fputs (reg_names[REGNO (x)], file);
9425 return;
9426 case 'H':
9427 /* Print out the high order register name of a register pair. */
9428 if (WORDS_BIG_ENDIAN)
9429 fputs (reg_names[REGNO (x)], file);
9430 else
9431 fputs (reg_names[REGNO (x)+1], file);
9432 return;
9433 case 'R':
9434 /* Print out the second register name of a register pair or quad.
9435 I.e., R (%o0) => %o1. */
9436 fputs (reg_names[REGNO (x)+1], file);
9437 return;
9438 case 'S':
9439 /* Print out the third register name of a register quad.
9440 I.e., S (%o0) => %o2. */
9441 fputs (reg_names[REGNO (x)+2], file);
9442 return;
9443 case 'T':
9444 /* Print out the fourth register name of a register quad.
9445 I.e., T (%o0) => %o3. */
9446 fputs (reg_names[REGNO (x)+3], file);
9447 return;
9448 case 'x':
9449 /* Print a condition code register. */
9450 if (REGNO (x) == SPARC_ICC_REG)
9451 {
9452 switch (GET_MODE (x))
9453 {
9454 case E_CCmode:
9455 case E_CCNZmode:
9456 case E_CCCmode:
9457 case E_CCVmode:
9458 s = "%icc";
9459 break;
9460 case E_CCXmode:
9461 case E_CCXNZmode:
9462 case E_CCXCmode:
9463 case E_CCXVmode:
9464 s = "%xcc";
9465 break;
9466 default:
9467 gcc_unreachable ();
9468 }
9469 fputs (s, file);
9470 }
9471 else
9472 /* %fccN register */
9473 fputs (reg_names[REGNO (x)], file);
9474 return;
9475 case 'm':
9476 /* Print the operand's address only. */
9477 output_address (GET_MODE (x), XEXP (x, 0));
9478 return;
9479 case 'r':
9480 /* In this case we need a register. Use %g0 if the
9481 operand is const0_rtx. */
9482 if (x == const0_rtx
9483 || (GET_MODE (x) != VOIDmode && x == CONST0_RTX (GET_MODE (x))))
9484 {
9485 fputs ("%g0", file);
9486 return;
9487 }
9488 else
9489 break;
9490
9491 case 'A':
9492 switch (GET_CODE (x))
9493 {
9494 case IOR:
9495 s = "or";
9496 break;
9497 case AND:
9498 s = "and";
9499 break;
9500 case XOR:
9501 s = "xor";
9502 break;
9503 default:
9504 output_operand_lossage ("invalid %%A operand");
9505 s = "";
9506 break;
9507 }
9508 fputs (s, file);
9509 return;
9510
9511 case 'B':
9512 switch (GET_CODE (x))
9513 {
9514 case IOR:
9515 s = "orn";
9516 break;
9517 case AND:
9518 s = "andn";
9519 break;
9520 case XOR:
9521 s = "xnor";
9522 break;
9523 default:
9524 output_operand_lossage ("invalid %%B operand");
9525 s = "";
9526 break;
9527 }
9528 fputs (s, file);
9529 return;
9530
9531 /* This is used by the conditional move instructions. */
9532 case 'C':
9533 {
9534 machine_mode mode = GET_MODE (XEXP (x, 0));
9535 switch (GET_CODE (x))
9536 {
9537 case NE:
9538 if (mode == CCVmode || mode == CCXVmode)
9539 s = "vs";
9540 else
9541 s = "ne";
9542 break;
9543 case EQ:
9544 if (mode == CCVmode || mode == CCXVmode)
9545 s = "vc";
9546 else
9547 s = "e";
9548 break;
9549 case GE:
9550 if (mode == CCNZmode || mode == CCXNZmode)
9551 s = "pos";
9552 else
9553 s = "ge";
9554 break;
9555 case GT:
9556 s = "g";
9557 break;
9558 case LE:
9559 s = "le";
9560 break;
9561 case LT:
9562 if (mode == CCNZmode || mode == CCXNZmode)
9563 s = "neg";
9564 else
9565 s = "l";
9566 break;
9567 case GEU:
9568 s = "geu";
9569 break;
9570 case GTU:
9571 s = "gu";
9572 break;
9573 case LEU:
9574 s = "leu";
9575 break;
9576 case LTU:
9577 s = "lu";
9578 break;
9579 case LTGT:
9580 s = "lg";
9581 break;
9582 case UNORDERED:
9583 s = "u";
9584 break;
9585 case ORDERED:
9586 s = "o";
9587 break;
9588 case UNLT:
9589 s = "ul";
9590 break;
9591 case UNLE:
9592 s = "ule";
9593 break;
9594 case UNGT:
9595 s = "ug";
9596 break;
9597 case UNGE:
9598 s = "uge"
9599 ; break;
9600 case UNEQ:
9601 s = "ue";
9602 break;
9603 default:
9604 output_operand_lossage ("invalid %%C operand");
9605 s = "";
9606 break;
9607 }
9608 fputs (s, file);
9609 return;
9610 }
9611
9612 /* This are used by the movr instruction pattern. */
9613 case 'D':
9614 {
9615 switch (GET_CODE (x))
9616 {
9617 case NE:
9618 s = "ne";
9619 break;
9620 case EQ:
9621 s = "e";
9622 break;
9623 case GE:
9624 s = "gez";
9625 break;
9626 case LT:
9627 s = "lz";
9628 break;
9629 case LE:
9630 s = "lez";
9631 break;
9632 case GT:
9633 s = "gz";
9634 break;
9635 default:
9636 output_operand_lossage ("invalid %%D operand");
9637 s = "";
9638 break;
9639 }
9640 fputs (s, file);
9641 return;
9642 }
9643
9644 case 'b':
9645 {
9646 /* Print a sign-extended character. */
9647 int i = trunc_int_for_mode (INTVAL (x), QImode);
9648 fprintf (file, "%d", i);
9649 return;
9650 }
9651
9652 case 'f':
9653 /* Operand must be a MEM; write its address. */
9654 if (GET_CODE (x) != MEM)
9655 output_operand_lossage ("invalid %%f operand");
9656 output_address (GET_MODE (x), XEXP (x, 0));
9657 return;
9658
9659 case 's':
9660 {
9661 /* Print a sign-extended 32-bit value. */
9662 HOST_WIDE_INT i;
9663 if (GET_CODE(x) == CONST_INT)
9664 i = INTVAL (x);
9665 else
9666 {
9667 output_operand_lossage ("invalid %%s operand");
9668 return;
9669 }
9670 i = trunc_int_for_mode (i, SImode);
9671 fprintf (file, HOST_WIDE_INT_PRINT_DEC, i);
9672 return;
9673 }
9674
9675 case 0:
9676 /* Do nothing special. */
9677 break;
9678
9679 default:
9680 /* Undocumented flag. */
9681 output_operand_lossage ("invalid operand output code");
9682 }
9683
9684 if (GET_CODE (x) == REG)
9685 fputs (reg_names[REGNO (x)], file);
9686 else if (GET_CODE (x) == MEM)
9687 {
9688 fputc ('[', file);
9689 /* Poor Sun assembler doesn't understand absolute addressing. */
9690 if (CONSTANT_P (XEXP (x, 0)))
9691 fputs ("%g0+", file);
9692 output_address (GET_MODE (x), XEXP (x, 0));
9693 fputc (']', file);
9694 }
9695 else if (GET_CODE (x) == HIGH)
9696 {
9697 fputs ("%hi(", file);
9698 output_addr_const (file, XEXP (x, 0));
9699 fputc (')', file);
9700 }
9701 else if (GET_CODE (x) == LO_SUM)
9702 {
9703 sparc_print_operand (file, XEXP (x, 0), 0);
9704 if (TARGET_CM_MEDMID)
9705 fputs ("+%l44(", file);
9706 else
9707 fputs ("+%lo(", file);
9708 output_addr_const (file, XEXP (x, 1));
9709 fputc (')', file);
9710 }
9711 else if (GET_CODE (x) == CONST_DOUBLE)
9712 output_operand_lossage ("floating-point constant not a valid immediate operand");
9713 else
9714 output_addr_const (file, x);
9715 }
9716
9717 /* Implement TARGET_PRINT_OPERAND_ADDRESS. */
9718
9719 static void
sparc_print_operand_address(FILE * file,machine_mode,rtx x)9720 sparc_print_operand_address (FILE *file, machine_mode /*mode*/, rtx x)
9721 {
9722 register rtx base, index = 0;
9723 int offset = 0;
9724 register rtx addr = x;
9725
9726 if (REG_P (addr))
9727 fputs (reg_names[REGNO (addr)], file);
9728 else if (GET_CODE (addr) == PLUS)
9729 {
9730 if (CONST_INT_P (XEXP (addr, 0)))
9731 offset = INTVAL (XEXP (addr, 0)), base = XEXP (addr, 1);
9732 else if (CONST_INT_P (XEXP (addr, 1)))
9733 offset = INTVAL (XEXP (addr, 1)), base = XEXP (addr, 0);
9734 else
9735 base = XEXP (addr, 0), index = XEXP (addr, 1);
9736 if (GET_CODE (base) == LO_SUM)
9737 {
9738 gcc_assert (USE_AS_OFFSETABLE_LO10
9739 && TARGET_ARCH64
9740 && ! TARGET_CM_MEDMID);
9741 output_operand (XEXP (base, 0), 0);
9742 fputs ("+%lo(", file);
9743 output_address (VOIDmode, XEXP (base, 1));
9744 fprintf (file, ")+%d", offset);
9745 }
9746 else
9747 {
9748 fputs (reg_names[REGNO (base)], file);
9749 if (index == 0)
9750 fprintf (file, "%+d", offset);
9751 else if (REG_P (index))
9752 fprintf (file, "+%s", reg_names[REGNO (index)]);
9753 else if (GET_CODE (index) == SYMBOL_REF
9754 || GET_CODE (index) == LABEL_REF
9755 || GET_CODE (index) == CONST)
9756 fputc ('+', file), output_addr_const (file, index);
9757 else gcc_unreachable ();
9758 }
9759 }
9760 else if (GET_CODE (addr) == MINUS
9761 && GET_CODE (XEXP (addr, 1)) == LABEL_REF)
9762 {
9763 output_addr_const (file, XEXP (addr, 0));
9764 fputs ("-(", file);
9765 output_addr_const (file, XEXP (addr, 1));
9766 fputs ("-.)", file);
9767 }
9768 else if (GET_CODE (addr) == LO_SUM)
9769 {
9770 output_operand (XEXP (addr, 0), 0);
9771 if (TARGET_CM_MEDMID)
9772 fputs ("+%l44(", file);
9773 else
9774 fputs ("+%lo(", file);
9775 output_address (VOIDmode, XEXP (addr, 1));
9776 fputc (')', file);
9777 }
9778 else if (flag_pic
9779 && GET_CODE (addr) == CONST
9780 && GET_CODE (XEXP (addr, 0)) == MINUS
9781 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST
9782 && GET_CODE (XEXP (XEXP (XEXP (addr, 0), 1), 0)) == MINUS
9783 && XEXP (XEXP (XEXP (XEXP (addr, 0), 1), 0), 1) == pc_rtx)
9784 {
9785 addr = XEXP (addr, 0);
9786 output_addr_const (file, XEXP (addr, 0));
9787 /* Group the args of the second CONST in parenthesis. */
9788 fputs ("-(", file);
9789 /* Skip past the second CONST--it does nothing for us. */
9790 output_addr_const (file, XEXP (XEXP (addr, 1), 0));
9791 /* Close the parenthesis. */
9792 fputc (')', file);
9793 }
9794 else
9795 {
9796 output_addr_const (file, addr);
9797 }
9798 }
9799
9800 /* Target hook for assembling integer objects. The sparc version has
9801 special handling for aligned DI-mode objects. */
9802
9803 static bool
sparc_assemble_integer(rtx x,unsigned int size,int aligned_p)9804 sparc_assemble_integer (rtx x, unsigned int size, int aligned_p)
9805 {
9806 /* ??? We only output .xword's for symbols and only then in environments
9807 where the assembler can handle them. */
9808 if (aligned_p && size == 8 && GET_CODE (x) != CONST_INT)
9809 {
9810 if (TARGET_V9)
9811 {
9812 assemble_integer_with_op ("\t.xword\t", x);
9813 return true;
9814 }
9815 else
9816 {
9817 assemble_aligned_integer (4, const0_rtx);
9818 assemble_aligned_integer (4, x);
9819 return true;
9820 }
9821 }
9822 return default_assemble_integer (x, size, aligned_p);
9823 }
9824
9825 /* Return the value of a code used in the .proc pseudo-op that says
9826 what kind of result this function returns. For non-C types, we pick
9827 the closest C type. */
9828
9829 #ifndef SHORT_TYPE_SIZE
9830 #define SHORT_TYPE_SIZE (BITS_PER_UNIT * 2)
9831 #endif
9832
9833 #ifndef INT_TYPE_SIZE
9834 #define INT_TYPE_SIZE BITS_PER_WORD
9835 #endif
9836
9837 #ifndef LONG_TYPE_SIZE
9838 #define LONG_TYPE_SIZE BITS_PER_WORD
9839 #endif
9840
9841 #ifndef LONG_LONG_TYPE_SIZE
9842 #define LONG_LONG_TYPE_SIZE (BITS_PER_WORD * 2)
9843 #endif
9844
9845 #ifndef FLOAT_TYPE_SIZE
9846 #define FLOAT_TYPE_SIZE BITS_PER_WORD
9847 #endif
9848
9849 #ifndef DOUBLE_TYPE_SIZE
9850 #define DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2)
9851 #endif
9852
9853 #ifndef LONG_DOUBLE_TYPE_SIZE
9854 #define LONG_DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2)
9855 #endif
9856
9857 unsigned long
sparc_type_code(register tree type)9858 sparc_type_code (register tree type)
9859 {
9860 register unsigned long qualifiers = 0;
9861 register unsigned shift;
9862
9863 /* Only the first 30 bits of the qualifier are valid. We must refrain from
9864 setting more, since some assemblers will give an error for this. Also,
9865 we must be careful to avoid shifts of 32 bits or more to avoid getting
9866 unpredictable results. */
9867
9868 for (shift = 6; shift < 30; shift += 2, type = TREE_TYPE (type))
9869 {
9870 switch (TREE_CODE (type))
9871 {
9872 case ERROR_MARK:
9873 return qualifiers;
9874
9875 case ARRAY_TYPE:
9876 qualifiers |= (3 << shift);
9877 break;
9878
9879 case FUNCTION_TYPE:
9880 case METHOD_TYPE:
9881 qualifiers |= (2 << shift);
9882 break;
9883
9884 case POINTER_TYPE:
9885 case REFERENCE_TYPE:
9886 case OFFSET_TYPE:
9887 qualifiers |= (1 << shift);
9888 break;
9889
9890 case RECORD_TYPE:
9891 return (qualifiers | 8);
9892
9893 case UNION_TYPE:
9894 case QUAL_UNION_TYPE:
9895 return (qualifiers | 9);
9896
9897 case ENUMERAL_TYPE:
9898 return (qualifiers | 10);
9899
9900 case VOID_TYPE:
9901 return (qualifiers | 16);
9902
9903 case INTEGER_TYPE:
9904 /* If this is a range type, consider it to be the underlying
9905 type. */
9906 if (TREE_TYPE (type) != 0)
9907 break;
9908
9909 /* Carefully distinguish all the standard types of C,
9910 without messing up if the language is not C. We do this by
9911 testing TYPE_PRECISION and TYPE_UNSIGNED. The old code used to
9912 look at both the names and the above fields, but that's redundant.
9913 Any type whose size is between two C types will be considered
9914 to be the wider of the two types. Also, we do not have a
9915 special code to use for "long long", so anything wider than
9916 long is treated the same. Note that we can't distinguish
9917 between "int" and "long" in this code if they are the same
9918 size, but that's fine, since neither can the assembler. */
9919
9920 if (TYPE_PRECISION (type) <= CHAR_TYPE_SIZE)
9921 return (qualifiers | (TYPE_UNSIGNED (type) ? 12 : 2));
9922
9923 else if (TYPE_PRECISION (type) <= SHORT_TYPE_SIZE)
9924 return (qualifiers | (TYPE_UNSIGNED (type) ? 13 : 3));
9925
9926 else if (TYPE_PRECISION (type) <= INT_TYPE_SIZE)
9927 return (qualifiers | (TYPE_UNSIGNED (type) ? 14 : 4));
9928
9929 else
9930 return (qualifiers | (TYPE_UNSIGNED (type) ? 15 : 5));
9931
9932 case REAL_TYPE:
9933 /* If this is a range type, consider it to be the underlying
9934 type. */
9935 if (TREE_TYPE (type) != 0)
9936 break;
9937
9938 /* Carefully distinguish all the standard types of C,
9939 without messing up if the language is not C. */
9940
9941 if (TYPE_PRECISION (type) == FLOAT_TYPE_SIZE)
9942 return (qualifiers | 6);
9943
9944 else
9945 return (qualifiers | 7);
9946
9947 case COMPLEX_TYPE: /* GNU Fortran COMPLEX type. */
9948 /* ??? We need to distinguish between double and float complex types,
9949 but I don't know how yet because I can't reach this code from
9950 existing front-ends. */
9951 return (qualifiers | 7); /* Who knows? */
9952
9953 case VECTOR_TYPE:
9954 case BOOLEAN_TYPE: /* Boolean truth value type. */
9955 case LANG_TYPE:
9956 case NULLPTR_TYPE:
9957 return qualifiers;
9958
9959 default:
9960 gcc_unreachable (); /* Not a type! */
9961 }
9962 }
9963
9964 return qualifiers;
9965 }
9966
9967 /* Nested function support. */
9968
9969 /* Emit RTL insns to initialize the variable parts of a trampoline.
9970 FNADDR is an RTX for the address of the function's pure code.
9971 CXT is an RTX for the static chain value for the function.
9972
9973 This takes 16 insns: 2 shifts & 2 ands (to split up addresses), 4 sethi
9974 (to load in opcodes), 4 iors (to merge address and opcodes), and 4 writes
9975 (to store insns). This is a bit excessive. Perhaps a different
9976 mechanism would be better here.
9977
9978 Emit enough FLUSH insns to synchronize the data and instruction caches. */
9979
9980 static void
sparc32_initialize_trampoline(rtx m_tramp,rtx fnaddr,rtx cxt)9981 sparc32_initialize_trampoline (rtx m_tramp, rtx fnaddr, rtx cxt)
9982 {
9983 /* SPARC 32-bit trampoline:
9984
9985 sethi %hi(fn), %g1
9986 sethi %hi(static), %g2
9987 jmp %g1+%lo(fn)
9988 or %g2, %lo(static), %g2
9989
9990 SETHI i,r = 00rr rrr1 00ii iiii iiii iiii iiii iiii
9991 JMPL r+i,d = 10dd ddd1 1100 0rrr rr1i iiii iiii iiii
9992 */
9993
9994 emit_move_insn
9995 (adjust_address (m_tramp, SImode, 0),
9996 expand_binop (SImode, ior_optab,
9997 expand_shift (RSHIFT_EXPR, SImode, fnaddr, 10, 0, 1),
9998 GEN_INT (trunc_int_for_mode (0x03000000, SImode)),
9999 NULL_RTX, 1, OPTAB_DIRECT));
10000
10001 emit_move_insn
10002 (adjust_address (m_tramp, SImode, 4),
10003 expand_binop (SImode, ior_optab,
10004 expand_shift (RSHIFT_EXPR, SImode, cxt, 10, 0, 1),
10005 GEN_INT (trunc_int_for_mode (0x05000000, SImode)),
10006 NULL_RTX, 1, OPTAB_DIRECT));
10007
10008 emit_move_insn
10009 (adjust_address (m_tramp, SImode, 8),
10010 expand_binop (SImode, ior_optab,
10011 expand_and (SImode, fnaddr, GEN_INT (0x3ff), NULL_RTX),
10012 GEN_INT (trunc_int_for_mode (0x81c06000, SImode)),
10013 NULL_RTX, 1, OPTAB_DIRECT));
10014
10015 emit_move_insn
10016 (adjust_address (m_tramp, SImode, 12),
10017 expand_binop (SImode, ior_optab,
10018 expand_and (SImode, cxt, GEN_INT (0x3ff), NULL_RTX),
10019 GEN_INT (trunc_int_for_mode (0x8410a000, SImode)),
10020 NULL_RTX, 1, OPTAB_DIRECT));
10021
10022 /* On UltraSPARC a flush flushes an entire cache line. The trampoline is
10023 aligned on a 16 byte boundary so one flush clears it all. */
10024 emit_insn (gen_flushsi (validize_mem (adjust_address (m_tramp, SImode, 0))));
10025 if (sparc_cpu != PROCESSOR_ULTRASPARC
10026 && sparc_cpu != PROCESSOR_ULTRASPARC3
10027 && sparc_cpu != PROCESSOR_NIAGARA
10028 && sparc_cpu != PROCESSOR_NIAGARA2
10029 && sparc_cpu != PROCESSOR_NIAGARA3
10030 && sparc_cpu != PROCESSOR_NIAGARA4
10031 && sparc_cpu != PROCESSOR_NIAGARA7
10032 && sparc_cpu != PROCESSOR_M8)
10033 emit_insn (gen_flushsi (validize_mem (adjust_address (m_tramp, SImode, 8))));
10034
10035 /* Call __enable_execute_stack after writing onto the stack to make sure
10036 the stack address is accessible. */
10037 #ifdef HAVE_ENABLE_EXECUTE_STACK
10038 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
10039 LCT_NORMAL, VOIDmode, XEXP (m_tramp, 0), Pmode);
10040 #endif
10041
10042 }
10043
10044 /* The 64-bit version is simpler because it makes more sense to load the
10045 values as "immediate" data out of the trampoline. It's also easier since
10046 we can read the PC without clobbering a register. */
10047
10048 static void
sparc64_initialize_trampoline(rtx m_tramp,rtx fnaddr,rtx cxt)10049 sparc64_initialize_trampoline (rtx m_tramp, rtx fnaddr, rtx cxt)
10050 {
10051 /* SPARC 64-bit trampoline:
10052
10053 rd %pc, %g1
10054 ldx [%g1+24], %g5
10055 jmp %g5
10056 ldx [%g1+16], %g5
10057 +16 bytes data
10058 */
10059
10060 emit_move_insn (adjust_address (m_tramp, SImode, 0),
10061 GEN_INT (trunc_int_for_mode (0x83414000, SImode)));
10062 emit_move_insn (adjust_address (m_tramp, SImode, 4),
10063 GEN_INT (trunc_int_for_mode (0xca586018, SImode)));
10064 emit_move_insn (adjust_address (m_tramp, SImode, 8),
10065 GEN_INT (trunc_int_for_mode (0x81c14000, SImode)));
10066 emit_move_insn (adjust_address (m_tramp, SImode, 12),
10067 GEN_INT (trunc_int_for_mode (0xca586010, SImode)));
10068 emit_move_insn (adjust_address (m_tramp, DImode, 16), cxt);
10069 emit_move_insn (adjust_address (m_tramp, DImode, 24), fnaddr);
10070 emit_insn (gen_flushdi (validize_mem (adjust_address (m_tramp, DImode, 0))));
10071
10072 if (sparc_cpu != PROCESSOR_ULTRASPARC
10073 && sparc_cpu != PROCESSOR_ULTRASPARC3
10074 && sparc_cpu != PROCESSOR_NIAGARA
10075 && sparc_cpu != PROCESSOR_NIAGARA2
10076 && sparc_cpu != PROCESSOR_NIAGARA3
10077 && sparc_cpu != PROCESSOR_NIAGARA4
10078 && sparc_cpu != PROCESSOR_NIAGARA7
10079 && sparc_cpu != PROCESSOR_M8)
10080 emit_insn (gen_flushdi (validize_mem (adjust_address (m_tramp, DImode, 8))));
10081
10082 /* Call __enable_execute_stack after writing onto the stack to make sure
10083 the stack address is accessible. */
10084 #ifdef HAVE_ENABLE_EXECUTE_STACK
10085 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
10086 LCT_NORMAL, VOIDmode, XEXP (m_tramp, 0), Pmode);
10087 #endif
10088 }
10089
10090 /* Worker for TARGET_TRAMPOLINE_INIT. */
10091
10092 static void
sparc_trampoline_init(rtx m_tramp,tree fndecl,rtx cxt)10093 sparc_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
10094 {
10095 rtx fnaddr = force_reg (Pmode, XEXP (DECL_RTL (fndecl), 0));
10096 cxt = force_reg (Pmode, cxt);
10097 if (TARGET_ARCH64)
10098 sparc64_initialize_trampoline (m_tramp, fnaddr, cxt);
10099 else
10100 sparc32_initialize_trampoline (m_tramp, fnaddr, cxt);
10101 }
10102
10103 /* Adjust the cost of a scheduling dependency. Return the new cost of
10104 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
10105
10106 static int
supersparc_adjust_cost(rtx_insn * insn,int dep_type,rtx_insn * dep_insn,int cost)10107 supersparc_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn,
10108 int cost)
10109 {
10110 enum attr_type insn_type;
10111
10112 if (recog_memoized (insn) < 0)
10113 return cost;
10114
10115 insn_type = get_attr_type (insn);
10116
10117 if (dep_type == 0)
10118 {
10119 /* Data dependency; DEP_INSN writes a register that INSN reads some
10120 cycles later. */
10121
10122 /* if a load, then the dependence must be on the memory address;
10123 add an extra "cycle". Note that the cost could be two cycles
10124 if the reg was written late in an instruction group; we ca not tell
10125 here. */
10126 if (insn_type == TYPE_LOAD || insn_type == TYPE_FPLOAD)
10127 return cost + 3;
10128
10129 /* Get the delay only if the address of the store is the dependence. */
10130 if (insn_type == TYPE_STORE || insn_type == TYPE_FPSTORE)
10131 {
10132 rtx pat = PATTERN(insn);
10133 rtx dep_pat = PATTERN (dep_insn);
10134
10135 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
10136 return cost; /* This should not happen! */
10137
10138 /* The dependency between the two instructions was on the data that
10139 is being stored. Assume that this implies that the address of the
10140 store is not dependent. */
10141 if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
10142 return cost;
10143
10144 return cost + 3; /* An approximation. */
10145 }
10146
10147 /* A shift instruction cannot receive its data from an instruction
10148 in the same cycle; add a one cycle penalty. */
10149 if (insn_type == TYPE_SHIFT)
10150 return cost + 3; /* Split before cascade into shift. */
10151 }
10152 else
10153 {
10154 /* Anti- or output- dependency; DEP_INSN reads/writes a register that
10155 INSN writes some cycles later. */
10156
10157 /* These are only significant for the fpu unit; writing a fp reg before
10158 the fpu has finished with it stalls the processor. */
10159
10160 /* Reusing an integer register causes no problems. */
10161 if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT)
10162 return 0;
10163 }
10164
10165 return cost;
10166 }
10167
10168 static int
hypersparc_adjust_cost(rtx_insn * insn,int dtype,rtx_insn * dep_insn,int cost)10169 hypersparc_adjust_cost (rtx_insn *insn, int dtype, rtx_insn *dep_insn,
10170 int cost)
10171 {
10172 enum attr_type insn_type, dep_type;
10173 rtx pat = PATTERN(insn);
10174 rtx dep_pat = PATTERN (dep_insn);
10175
10176 if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
10177 return cost;
10178
10179 insn_type = get_attr_type (insn);
10180 dep_type = get_attr_type (dep_insn);
10181
10182 switch (dtype)
10183 {
10184 case 0:
10185 /* Data dependency; DEP_INSN writes a register that INSN reads some
10186 cycles later. */
10187
10188 switch (insn_type)
10189 {
10190 case TYPE_STORE:
10191 case TYPE_FPSTORE:
10192 /* Get the delay iff the address of the store is the dependence. */
10193 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
10194 return cost;
10195
10196 if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
10197 return cost;
10198 return cost + 3;
10199
10200 case TYPE_LOAD:
10201 case TYPE_SLOAD:
10202 case TYPE_FPLOAD:
10203 /* If a load, then the dependence must be on the memory address. If
10204 the addresses aren't equal, then it might be a false dependency */
10205 if (dep_type == TYPE_STORE || dep_type == TYPE_FPSTORE)
10206 {
10207 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET
10208 || GET_CODE (SET_DEST (dep_pat)) != MEM
10209 || GET_CODE (SET_SRC (pat)) != MEM
10210 || ! rtx_equal_p (XEXP (SET_DEST (dep_pat), 0),
10211 XEXP (SET_SRC (pat), 0)))
10212 return cost + 2;
10213
10214 return cost + 8;
10215 }
10216 break;
10217
10218 case TYPE_BRANCH:
10219 /* Compare to branch latency is 0. There is no benefit from
10220 separating compare and branch. */
10221 if (dep_type == TYPE_COMPARE)
10222 return 0;
10223 /* Floating point compare to branch latency is less than
10224 compare to conditional move. */
10225 if (dep_type == TYPE_FPCMP)
10226 return cost - 1;
10227 break;
10228 default:
10229 break;
10230 }
10231 break;
10232
10233 case REG_DEP_ANTI:
10234 /* Anti-dependencies only penalize the fpu unit. */
10235 if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT)
10236 return 0;
10237 break;
10238
10239 default:
10240 break;
10241 }
10242
10243 return cost;
10244 }
10245
10246 static int
leon5_adjust_cost(rtx_insn * insn,int dtype,rtx_insn * dep_insn,int cost)10247 leon5_adjust_cost (rtx_insn *insn, int dtype, rtx_insn *dep_insn,
10248 int cost)
10249 {
10250 enum attr_type insn_type, dep_type;
10251 rtx pat = PATTERN (insn);
10252 rtx dep_pat = PATTERN (dep_insn);
10253
10254 if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
10255 return cost;
10256
10257 insn_type = get_attr_type (insn);
10258 dep_type = get_attr_type (dep_insn);
10259
10260 switch (dtype)
10261 {
10262 case REG_DEP_TRUE:
10263 /* Data dependency; DEP_INSN writes a register that INSN reads some
10264 cycles later. */
10265
10266 switch (insn_type)
10267 {
10268 case TYPE_STORE:
10269 /* Try to schedule three instructions between the store and
10270 the ALU instruction that generated the data. */
10271 if (dep_type == TYPE_IALU || dep_type == TYPE_SHIFT)
10272 {
10273 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
10274 break;
10275
10276 if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
10277 return 4;
10278 }
10279 break;
10280 default:
10281 break;
10282 }
10283 break;
10284 case REG_DEP_ANTI:
10285 /* Penalize anti-dependencies for FPU instructions. */
10286 if (fpop_insn_p (insn) || insn_type == TYPE_FPLOAD)
10287 return 4;
10288 break;
10289 default:
10290 break;
10291 }
10292
10293 return cost;
10294 }
10295
10296 static int
sparc_adjust_cost(rtx_insn * insn,int dep_type,rtx_insn * dep,int cost,unsigned int)10297 sparc_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep, int cost,
10298 unsigned int)
10299 {
10300 switch (sparc_cpu)
10301 {
10302 case PROCESSOR_LEON5:
10303 cost = leon5_adjust_cost (insn, dep_type, dep, cost);
10304 break;
10305 case PROCESSOR_SUPERSPARC:
10306 cost = supersparc_adjust_cost (insn, dep_type, dep, cost);
10307 break;
10308 case PROCESSOR_HYPERSPARC:
10309 case PROCESSOR_SPARCLITE86X:
10310 cost = hypersparc_adjust_cost (insn, dep_type, dep, cost);
10311 break;
10312 default:
10313 break;
10314 }
10315 return cost;
10316 }
10317
10318 static void
sparc_sched_init(FILE * dump ATTRIBUTE_UNUSED,int sched_verbose ATTRIBUTE_UNUSED,int max_ready ATTRIBUTE_UNUSED)10319 sparc_sched_init (FILE *dump ATTRIBUTE_UNUSED,
10320 int sched_verbose ATTRIBUTE_UNUSED,
10321 int max_ready ATTRIBUTE_UNUSED)
10322 {}
10323
10324 static int
sparc_use_sched_lookahead(void)10325 sparc_use_sched_lookahead (void)
10326 {
10327 switch (sparc_cpu)
10328 {
10329 case PROCESSOR_ULTRASPARC:
10330 case PROCESSOR_ULTRASPARC3:
10331 return 4;
10332 case PROCESSOR_SUPERSPARC:
10333 case PROCESSOR_HYPERSPARC:
10334 case PROCESSOR_SPARCLITE86X:
10335 return 3;
10336 case PROCESSOR_NIAGARA4:
10337 case PROCESSOR_NIAGARA7:
10338 case PROCESSOR_M8:
10339 return 2;
10340 case PROCESSOR_NIAGARA:
10341 case PROCESSOR_NIAGARA2:
10342 case PROCESSOR_NIAGARA3:
10343 default:
10344 return 0;
10345 }
10346 }
10347
10348 static int
sparc_issue_rate(void)10349 sparc_issue_rate (void)
10350 {
10351 switch (sparc_cpu)
10352 {
10353 case PROCESSOR_ULTRASPARC:
10354 case PROCESSOR_ULTRASPARC3:
10355 case PROCESSOR_M8:
10356 return 4;
10357 case PROCESSOR_SUPERSPARC:
10358 return 3;
10359 case PROCESSOR_HYPERSPARC:
10360 case PROCESSOR_SPARCLITE86X:
10361 case PROCESSOR_V9:
10362 /* Assume V9 processors are capable of at least dual-issue. */
10363 case PROCESSOR_NIAGARA4:
10364 case PROCESSOR_NIAGARA7:
10365 return 2;
10366 case PROCESSOR_NIAGARA:
10367 case PROCESSOR_NIAGARA2:
10368 case PROCESSOR_NIAGARA3:
10369 default:
10370 return 1;
10371 }
10372 }
10373
10374 int
sparc_branch_cost(bool speed_p,bool predictable_p)10375 sparc_branch_cost (bool speed_p, bool predictable_p)
10376 {
10377 if (!speed_p)
10378 return 2;
10379
10380 /* For pre-V9 processors we use a single value (usually 3) to take into
10381 account the potential annulling of the delay slot (which ends up being
10382 a bubble in the pipeline slot) plus a cycle to take into consideration
10383 the instruction cache effects.
10384
10385 On V9 and later processors, which have branch prediction facilities,
10386 we take into account whether the branch is (easily) predictable. */
10387 const int cost = sparc_costs->branch_cost;
10388
10389 switch (sparc_cpu)
10390 {
10391 case PROCESSOR_V9:
10392 case PROCESSOR_ULTRASPARC:
10393 case PROCESSOR_ULTRASPARC3:
10394 case PROCESSOR_NIAGARA:
10395 case PROCESSOR_NIAGARA2:
10396 case PROCESSOR_NIAGARA3:
10397 case PROCESSOR_NIAGARA4:
10398 case PROCESSOR_NIAGARA7:
10399 case PROCESSOR_M8:
10400 return cost + (predictable_p ? 0 : 2);
10401
10402 default:
10403 return cost;
10404 }
10405 }
10406
10407 static int
set_extends(rtx_insn * insn)10408 set_extends (rtx_insn *insn)
10409 {
10410 register rtx pat = PATTERN (insn);
10411
10412 switch (GET_CODE (SET_SRC (pat)))
10413 {
10414 /* Load and some shift instructions zero extend. */
10415 case MEM:
10416 case ZERO_EXTEND:
10417 /* sethi clears the high bits */
10418 case HIGH:
10419 /* LO_SUM is used with sethi. sethi cleared the high
10420 bits and the values used with lo_sum are positive */
10421 case LO_SUM:
10422 /* Store flag stores 0 or 1 */
10423 case LT: case LTU:
10424 case GT: case GTU:
10425 case LE: case LEU:
10426 case GE: case GEU:
10427 case EQ:
10428 case NE:
10429 return 1;
10430 case AND:
10431 {
10432 rtx op0 = XEXP (SET_SRC (pat), 0);
10433 rtx op1 = XEXP (SET_SRC (pat), 1);
10434 if (GET_CODE (op1) == CONST_INT)
10435 return INTVAL (op1) >= 0;
10436 if (GET_CODE (op0) != REG)
10437 return 0;
10438 if (sparc_check_64 (op0, insn) == 1)
10439 return 1;
10440 return (GET_CODE (op1) == REG && sparc_check_64 (op1, insn) == 1);
10441 }
10442 case IOR:
10443 case XOR:
10444 {
10445 rtx op0 = XEXP (SET_SRC (pat), 0);
10446 rtx op1 = XEXP (SET_SRC (pat), 1);
10447 if (GET_CODE (op0) != REG || sparc_check_64 (op0, insn) <= 0)
10448 return 0;
10449 if (GET_CODE (op1) == CONST_INT)
10450 return INTVAL (op1) >= 0;
10451 return (GET_CODE (op1) == REG && sparc_check_64 (op1, insn) == 1);
10452 }
10453 case LSHIFTRT:
10454 return GET_MODE (SET_SRC (pat)) == SImode;
10455 /* Positive integers leave the high bits zero. */
10456 case CONST_INT:
10457 return !(INTVAL (SET_SRC (pat)) & 0x80000000);
10458 case ASHIFTRT:
10459 case SIGN_EXTEND:
10460 return - (GET_MODE (SET_SRC (pat)) == SImode);
10461 case REG:
10462 return sparc_check_64 (SET_SRC (pat), insn);
10463 default:
10464 return 0;
10465 }
10466 }
10467
10468 /* We _ought_ to have only one kind per function, but... */
10469 static GTY(()) rtx sparc_addr_diff_list;
10470 static GTY(()) rtx sparc_addr_list;
10471
10472 void
sparc_defer_case_vector(rtx lab,rtx vec,int diff)10473 sparc_defer_case_vector (rtx lab, rtx vec, int diff)
10474 {
10475 vec = gen_rtx_EXPR_LIST (VOIDmode, lab, vec);
10476 if (diff)
10477 sparc_addr_diff_list
10478 = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_diff_list);
10479 else
10480 sparc_addr_list = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_list);
10481 }
10482
10483 static void
sparc_output_addr_vec(rtx vec)10484 sparc_output_addr_vec (rtx vec)
10485 {
10486 rtx lab = XEXP (vec, 0), body = XEXP (vec, 1);
10487 int idx, vlen = XVECLEN (body, 0);
10488
10489 #ifdef ASM_OUTPUT_ADDR_VEC_START
10490 ASM_OUTPUT_ADDR_VEC_START (asm_out_file);
10491 #endif
10492
10493 #ifdef ASM_OUTPUT_CASE_LABEL
10494 ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab),
10495 NEXT_INSN (lab));
10496 #else
10497 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
10498 #endif
10499
10500 for (idx = 0; idx < vlen; idx++)
10501 {
10502 ASM_OUTPUT_ADDR_VEC_ELT
10503 (asm_out_file, CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 0, idx), 0)));
10504 }
10505
10506 #ifdef ASM_OUTPUT_ADDR_VEC_END
10507 ASM_OUTPUT_ADDR_VEC_END (asm_out_file);
10508 #endif
10509 }
10510
10511 static void
sparc_output_addr_diff_vec(rtx vec)10512 sparc_output_addr_diff_vec (rtx vec)
10513 {
10514 rtx lab = XEXP (vec, 0), body = XEXP (vec, 1);
10515 rtx base = XEXP (XEXP (body, 0), 0);
10516 int idx, vlen = XVECLEN (body, 1);
10517
10518 #ifdef ASM_OUTPUT_ADDR_VEC_START
10519 ASM_OUTPUT_ADDR_VEC_START (asm_out_file);
10520 #endif
10521
10522 #ifdef ASM_OUTPUT_CASE_LABEL
10523 ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab),
10524 NEXT_INSN (lab));
10525 #else
10526 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
10527 #endif
10528
10529 for (idx = 0; idx < vlen; idx++)
10530 {
10531 ASM_OUTPUT_ADDR_DIFF_ELT
10532 (asm_out_file,
10533 body,
10534 CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 1, idx), 0)),
10535 CODE_LABEL_NUMBER (base));
10536 }
10537
10538 #ifdef ASM_OUTPUT_ADDR_VEC_END
10539 ASM_OUTPUT_ADDR_VEC_END (asm_out_file);
10540 #endif
10541 }
10542
10543 static void
sparc_output_deferred_case_vectors(void)10544 sparc_output_deferred_case_vectors (void)
10545 {
10546 rtx t;
10547 int align;
10548
10549 if (sparc_addr_list == NULL_RTX
10550 && sparc_addr_diff_list == NULL_RTX)
10551 return;
10552
10553 /* Align to cache line in the function's code section. */
10554 switch_to_section (current_function_section ());
10555
10556 align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT);
10557 if (align > 0)
10558 ASM_OUTPUT_ALIGN (asm_out_file, align);
10559
10560 for (t = sparc_addr_list; t ; t = XEXP (t, 1))
10561 sparc_output_addr_vec (XEXP (t, 0));
10562 for (t = sparc_addr_diff_list; t ; t = XEXP (t, 1))
10563 sparc_output_addr_diff_vec (XEXP (t, 0));
10564
10565 sparc_addr_list = sparc_addr_diff_list = NULL_RTX;
10566 }
10567
10568 /* Return 0 if the high 32 bits of X (the low word of X, if DImode) are
10569 unknown. Return 1 if the high bits are zero, -1 if the register is
10570 sign extended. */
10571 int
sparc_check_64(rtx x,rtx_insn * insn)10572 sparc_check_64 (rtx x, rtx_insn *insn)
10573 {
10574 /* If a register is set only once it is safe to ignore insns this
10575 code does not know how to handle. The loop will either recognize
10576 the single set and return the correct value or fail to recognize
10577 it and return 0. */
10578 int set_once = 0;
10579 rtx y = x;
10580
10581 gcc_assert (GET_CODE (x) == REG);
10582
10583 if (GET_MODE (x) == DImode)
10584 y = gen_rtx_REG (SImode, REGNO (x) + WORDS_BIG_ENDIAN);
10585
10586 if (flag_expensive_optimizations
10587 && df && DF_REG_DEF_COUNT (REGNO (y)) == 1)
10588 set_once = 1;
10589
10590 if (insn == 0)
10591 {
10592 if (set_once)
10593 insn = get_last_insn_anywhere ();
10594 else
10595 return 0;
10596 }
10597
10598 while ((insn = PREV_INSN (insn)))
10599 {
10600 switch (GET_CODE (insn))
10601 {
10602 case JUMP_INSN:
10603 case NOTE:
10604 break;
10605 case CODE_LABEL:
10606 case CALL_INSN:
10607 default:
10608 if (! set_once)
10609 return 0;
10610 break;
10611 case INSN:
10612 {
10613 rtx pat = PATTERN (insn);
10614 if (GET_CODE (pat) != SET)
10615 return 0;
10616 if (rtx_equal_p (x, SET_DEST (pat)))
10617 return set_extends (insn);
10618 if (y && rtx_equal_p (y, SET_DEST (pat)))
10619 return set_extends (insn);
10620 if (reg_overlap_mentioned_p (SET_DEST (pat), y))
10621 return 0;
10622 }
10623 }
10624 }
10625 return 0;
10626 }
10627
10628 /* Output a wide shift instruction in V8+ mode. INSN is the instruction,
10629 OPERANDS are its operands and OPCODE is the mnemonic to be used. */
10630
10631 const char *
output_v8plus_shift(rtx_insn * insn,rtx * operands,const char * opcode)10632 output_v8plus_shift (rtx_insn *insn, rtx *operands, const char *opcode)
10633 {
10634 static char asm_code[60];
10635
10636 /* The scratch register is only required when the destination
10637 register is not a 64-bit global or out register. */
10638 if (which_alternative != 2)
10639 operands[3] = operands[0];
10640
10641 /* We can only shift by constants <= 63. */
10642 if (GET_CODE (operands[2]) == CONST_INT)
10643 operands[2] = GEN_INT (INTVAL (operands[2]) & 0x3f);
10644
10645 if (GET_CODE (operands[1]) == CONST_INT)
10646 {
10647 output_asm_insn ("mov\t%1, %3", operands);
10648 }
10649 else
10650 {
10651 output_asm_insn ("sllx\t%H1, 32, %3", operands);
10652 if (sparc_check_64 (operands[1], insn) <= 0)
10653 output_asm_insn ("srl\t%L1, 0, %L1", operands);
10654 output_asm_insn ("or\t%L1, %3, %3", operands);
10655 }
10656
10657 strcpy (asm_code, opcode);
10658
10659 if (which_alternative != 2)
10660 return strcat (asm_code, "\t%0, %2, %L0\n\tsrlx\t%L0, 32, %H0");
10661 else
10662 return
10663 strcat (asm_code, "\t%3, %2, %3\n\tsrlx\t%3, 32, %H0\n\tmov\t%3, %L0");
10664 }
10665
10666 /* Output rtl to increment the profiler label LABELNO
10667 for profiling a function entry. */
10668
10669 void
sparc_profile_hook(int labelno)10670 sparc_profile_hook (int labelno)
10671 {
10672 char buf[32];
10673 rtx lab, fun;
10674
10675 fun = gen_rtx_SYMBOL_REF (Pmode, MCOUNT_FUNCTION);
10676 if (NO_PROFILE_COUNTERS)
10677 {
10678 emit_library_call (fun, LCT_NORMAL, VOIDmode);
10679 }
10680 else
10681 {
10682 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
10683 lab = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
10684 emit_library_call (fun, LCT_NORMAL, VOIDmode, lab, Pmode);
10685 }
10686 }
10687
10688 #ifdef TARGET_SOLARIS
10689 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
10690
10691 static void
sparc_solaris_elf_asm_named_section(const char * name,unsigned int flags,tree decl ATTRIBUTE_UNUSED)10692 sparc_solaris_elf_asm_named_section (const char *name, unsigned int flags,
10693 tree decl ATTRIBUTE_UNUSED)
10694 {
10695 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
10696 {
10697 solaris_elf_asm_comdat_section (name, flags, decl);
10698 return;
10699 }
10700
10701 fprintf (asm_out_file, "\t.section\t\"%s\"", name);
10702
10703 if (!(flags & SECTION_DEBUG))
10704 fputs (",#alloc", asm_out_file);
10705 #if HAVE_GAS_SECTION_EXCLUDE
10706 if (flags & SECTION_EXCLUDE)
10707 fputs (",#exclude", asm_out_file);
10708 #endif
10709 if (flags & SECTION_WRITE)
10710 fputs (",#write", asm_out_file);
10711 if (flags & SECTION_TLS)
10712 fputs (",#tls", asm_out_file);
10713 if (flags & SECTION_CODE)
10714 fputs (",#execinstr", asm_out_file);
10715
10716 if (flags & SECTION_NOTYPE)
10717 ;
10718 else if (flags & SECTION_BSS)
10719 fputs (",#nobits", asm_out_file);
10720 else
10721 fputs (",#progbits", asm_out_file);
10722
10723 fputc ('\n', asm_out_file);
10724 }
10725 #endif /* TARGET_SOLARIS */
10726
10727 /* We do not allow indirect calls to be optimized into sibling calls.
10728
10729 We cannot use sibling calls when delayed branches are disabled
10730 because they will likely require the call delay slot to be filled.
10731
10732 Also, on SPARC 32-bit we cannot emit a sibling call when the
10733 current function returns a structure. This is because the "unimp
10734 after call" convention would cause the callee to return to the
10735 wrong place. The generic code already disallows cases where the
10736 function being called returns a structure.
10737
10738 It may seem strange how this last case could occur. Usually there
10739 is code after the call which jumps to epilogue code which dumps the
10740 return value into the struct return area. That ought to invalidate
10741 the sibling call right? Well, in the C++ case we can end up passing
10742 the pointer to the struct return area to a constructor (which returns
10743 void) and then nothing else happens. Such a sibling call would look
10744 valid without the added check here.
10745
10746 VxWorks PIC PLT entries require the global pointer to be initialized
10747 on entry. We therefore can't emit sibling calls to them. */
10748 static bool
sparc_function_ok_for_sibcall(tree decl,tree exp ATTRIBUTE_UNUSED)10749 sparc_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
10750 {
10751 return (decl
10752 && flag_delayed_branch
10753 && (TARGET_ARCH64 || ! cfun->returns_struct)
10754 && !(TARGET_VXWORKS_RTP
10755 && flag_pic
10756 && !targetm.binds_local_p (decl)));
10757 }
10758
10759 /* libfunc renaming. */
10760
10761 static void
sparc_init_libfuncs(void)10762 sparc_init_libfuncs (void)
10763 {
10764 if (TARGET_ARCH32)
10765 {
10766 /* Use the subroutines that Sun's library provides for integer
10767 multiply and divide. The `*' prevents an underscore from
10768 being prepended by the compiler. .umul is a little faster
10769 than .mul. */
10770 set_optab_libfunc (smul_optab, SImode, "*.umul");
10771 set_optab_libfunc (sdiv_optab, SImode, "*.div");
10772 set_optab_libfunc (udiv_optab, SImode, "*.udiv");
10773 set_optab_libfunc (smod_optab, SImode, "*.rem");
10774 set_optab_libfunc (umod_optab, SImode, "*.urem");
10775
10776 /* TFmode arithmetic. These names are part of the SPARC 32bit ABI. */
10777 set_optab_libfunc (add_optab, TFmode, "_Q_add");
10778 set_optab_libfunc (sub_optab, TFmode, "_Q_sub");
10779 set_optab_libfunc (neg_optab, TFmode, "_Q_neg");
10780 set_optab_libfunc (smul_optab, TFmode, "_Q_mul");
10781 set_optab_libfunc (sdiv_optab, TFmode, "_Q_div");
10782
10783 /* We can define the TFmode sqrt optab only if TARGET_FPU. This
10784 is because with soft-float, the SFmode and DFmode sqrt
10785 instructions will be absent, and the compiler will notice and
10786 try to use the TFmode sqrt instruction for calls to the
10787 builtin function sqrt, but this fails. */
10788 if (TARGET_FPU)
10789 set_optab_libfunc (sqrt_optab, TFmode, "_Q_sqrt");
10790
10791 set_optab_libfunc (eq_optab, TFmode, "_Q_feq");
10792 set_optab_libfunc (ne_optab, TFmode, "_Q_fne");
10793 set_optab_libfunc (gt_optab, TFmode, "_Q_fgt");
10794 set_optab_libfunc (ge_optab, TFmode, "_Q_fge");
10795 set_optab_libfunc (lt_optab, TFmode, "_Q_flt");
10796 set_optab_libfunc (le_optab, TFmode, "_Q_fle");
10797
10798 set_conv_libfunc (sext_optab, TFmode, SFmode, "_Q_stoq");
10799 set_conv_libfunc (sext_optab, TFmode, DFmode, "_Q_dtoq");
10800 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_Q_qtos");
10801 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_Q_qtod");
10802
10803 set_conv_libfunc (sfix_optab, SImode, TFmode, "_Q_qtoi");
10804 set_conv_libfunc (ufix_optab, SImode, TFmode, "_Q_qtou");
10805 set_conv_libfunc (sfloat_optab, TFmode, SImode, "_Q_itoq");
10806 set_conv_libfunc (ufloat_optab, TFmode, SImode, "_Q_utoq");
10807
10808 if (DITF_CONVERSION_LIBFUNCS)
10809 {
10810 set_conv_libfunc (sfix_optab, DImode, TFmode, "_Q_qtoll");
10811 set_conv_libfunc (ufix_optab, DImode, TFmode, "_Q_qtoull");
10812 set_conv_libfunc (sfloat_optab, TFmode, DImode, "_Q_lltoq");
10813 set_conv_libfunc (ufloat_optab, TFmode, DImode, "_Q_ulltoq");
10814 }
10815
10816 if (SUN_CONVERSION_LIBFUNCS)
10817 {
10818 set_conv_libfunc (sfix_optab, DImode, SFmode, "__ftoll");
10819 set_conv_libfunc (ufix_optab, DImode, SFmode, "__ftoull");
10820 set_conv_libfunc (sfix_optab, DImode, DFmode, "__dtoll");
10821 set_conv_libfunc (ufix_optab, DImode, DFmode, "__dtoull");
10822 }
10823 }
10824 if (TARGET_ARCH64)
10825 {
10826 /* In the SPARC 64bit ABI, SImode multiply and divide functions
10827 do not exist in the library. Make sure the compiler does not
10828 emit calls to them by accident. (It should always use the
10829 hardware instructions.) */
10830 set_optab_libfunc (smul_optab, SImode, 0);
10831 set_optab_libfunc (sdiv_optab, SImode, 0);
10832 set_optab_libfunc (udiv_optab, SImode, 0);
10833 set_optab_libfunc (smod_optab, SImode, 0);
10834 set_optab_libfunc (umod_optab, SImode, 0);
10835
10836 if (SUN_INTEGER_MULTIPLY_64)
10837 {
10838 set_optab_libfunc (smul_optab, DImode, "__mul64");
10839 set_optab_libfunc (sdiv_optab, DImode, "__div64");
10840 set_optab_libfunc (udiv_optab, DImode, "__udiv64");
10841 set_optab_libfunc (smod_optab, DImode, "__rem64");
10842 set_optab_libfunc (umod_optab, DImode, "__urem64");
10843 }
10844
10845 if (SUN_CONVERSION_LIBFUNCS)
10846 {
10847 set_conv_libfunc (sfix_optab, DImode, SFmode, "__ftol");
10848 set_conv_libfunc (ufix_optab, DImode, SFmode, "__ftoul");
10849 set_conv_libfunc (sfix_optab, DImode, DFmode, "__dtol");
10850 set_conv_libfunc (ufix_optab, DImode, DFmode, "__dtoul");
10851 }
10852 }
10853 }
10854
10855 /* SPARC builtins. */
10856 enum sparc_builtins
10857 {
10858 /* FPU builtins. */
10859 SPARC_BUILTIN_LDFSR,
10860 SPARC_BUILTIN_STFSR,
10861
10862 /* VIS 1.0 builtins. */
10863 SPARC_BUILTIN_FPACK16,
10864 SPARC_BUILTIN_FPACK32,
10865 SPARC_BUILTIN_FPACKFIX,
10866 SPARC_BUILTIN_FEXPAND,
10867 SPARC_BUILTIN_FPMERGE,
10868 SPARC_BUILTIN_FMUL8X16,
10869 SPARC_BUILTIN_FMUL8X16AU,
10870 SPARC_BUILTIN_FMUL8X16AL,
10871 SPARC_BUILTIN_FMUL8SUX16,
10872 SPARC_BUILTIN_FMUL8ULX16,
10873 SPARC_BUILTIN_FMULD8SUX16,
10874 SPARC_BUILTIN_FMULD8ULX16,
10875 SPARC_BUILTIN_FALIGNDATAV4HI,
10876 SPARC_BUILTIN_FALIGNDATAV8QI,
10877 SPARC_BUILTIN_FALIGNDATAV2SI,
10878 SPARC_BUILTIN_FALIGNDATADI,
10879 SPARC_BUILTIN_WRGSR,
10880 SPARC_BUILTIN_RDGSR,
10881 SPARC_BUILTIN_ALIGNADDR,
10882 SPARC_BUILTIN_ALIGNADDRL,
10883 SPARC_BUILTIN_PDIST,
10884 SPARC_BUILTIN_EDGE8,
10885 SPARC_BUILTIN_EDGE8L,
10886 SPARC_BUILTIN_EDGE16,
10887 SPARC_BUILTIN_EDGE16L,
10888 SPARC_BUILTIN_EDGE32,
10889 SPARC_BUILTIN_EDGE32L,
10890 SPARC_BUILTIN_FCMPLE16,
10891 SPARC_BUILTIN_FCMPLE32,
10892 SPARC_BUILTIN_FCMPNE16,
10893 SPARC_BUILTIN_FCMPNE32,
10894 SPARC_BUILTIN_FCMPGT16,
10895 SPARC_BUILTIN_FCMPGT32,
10896 SPARC_BUILTIN_FCMPEQ16,
10897 SPARC_BUILTIN_FCMPEQ32,
10898 SPARC_BUILTIN_FPADD16,
10899 SPARC_BUILTIN_FPADD16S,
10900 SPARC_BUILTIN_FPADD32,
10901 SPARC_BUILTIN_FPADD32S,
10902 SPARC_BUILTIN_FPSUB16,
10903 SPARC_BUILTIN_FPSUB16S,
10904 SPARC_BUILTIN_FPSUB32,
10905 SPARC_BUILTIN_FPSUB32S,
10906 SPARC_BUILTIN_ARRAY8,
10907 SPARC_BUILTIN_ARRAY16,
10908 SPARC_BUILTIN_ARRAY32,
10909
10910 /* VIS 2.0 builtins. */
10911 SPARC_BUILTIN_EDGE8N,
10912 SPARC_BUILTIN_EDGE8LN,
10913 SPARC_BUILTIN_EDGE16N,
10914 SPARC_BUILTIN_EDGE16LN,
10915 SPARC_BUILTIN_EDGE32N,
10916 SPARC_BUILTIN_EDGE32LN,
10917 SPARC_BUILTIN_BMASK,
10918 SPARC_BUILTIN_BSHUFFLEV4HI,
10919 SPARC_BUILTIN_BSHUFFLEV8QI,
10920 SPARC_BUILTIN_BSHUFFLEV2SI,
10921 SPARC_BUILTIN_BSHUFFLEDI,
10922
10923 /* VIS 3.0 builtins. */
10924 SPARC_BUILTIN_CMASK8,
10925 SPARC_BUILTIN_CMASK16,
10926 SPARC_BUILTIN_CMASK32,
10927 SPARC_BUILTIN_FCHKSM16,
10928 SPARC_BUILTIN_FSLL16,
10929 SPARC_BUILTIN_FSLAS16,
10930 SPARC_BUILTIN_FSRL16,
10931 SPARC_BUILTIN_FSRA16,
10932 SPARC_BUILTIN_FSLL32,
10933 SPARC_BUILTIN_FSLAS32,
10934 SPARC_BUILTIN_FSRL32,
10935 SPARC_BUILTIN_FSRA32,
10936 SPARC_BUILTIN_PDISTN,
10937 SPARC_BUILTIN_FMEAN16,
10938 SPARC_BUILTIN_FPADD64,
10939 SPARC_BUILTIN_FPSUB64,
10940 SPARC_BUILTIN_FPADDS16,
10941 SPARC_BUILTIN_FPADDS16S,
10942 SPARC_BUILTIN_FPSUBS16,
10943 SPARC_BUILTIN_FPSUBS16S,
10944 SPARC_BUILTIN_FPADDS32,
10945 SPARC_BUILTIN_FPADDS32S,
10946 SPARC_BUILTIN_FPSUBS32,
10947 SPARC_BUILTIN_FPSUBS32S,
10948 SPARC_BUILTIN_FUCMPLE8,
10949 SPARC_BUILTIN_FUCMPNE8,
10950 SPARC_BUILTIN_FUCMPGT8,
10951 SPARC_BUILTIN_FUCMPEQ8,
10952 SPARC_BUILTIN_FHADDS,
10953 SPARC_BUILTIN_FHADDD,
10954 SPARC_BUILTIN_FHSUBS,
10955 SPARC_BUILTIN_FHSUBD,
10956 SPARC_BUILTIN_FNHADDS,
10957 SPARC_BUILTIN_FNHADDD,
10958 SPARC_BUILTIN_UMULXHI,
10959 SPARC_BUILTIN_XMULX,
10960 SPARC_BUILTIN_XMULXHI,
10961
10962 /* VIS 4.0 builtins. */
10963 SPARC_BUILTIN_FPADD8,
10964 SPARC_BUILTIN_FPADDS8,
10965 SPARC_BUILTIN_FPADDUS8,
10966 SPARC_BUILTIN_FPADDUS16,
10967 SPARC_BUILTIN_FPCMPLE8,
10968 SPARC_BUILTIN_FPCMPGT8,
10969 SPARC_BUILTIN_FPCMPULE16,
10970 SPARC_BUILTIN_FPCMPUGT16,
10971 SPARC_BUILTIN_FPCMPULE32,
10972 SPARC_BUILTIN_FPCMPUGT32,
10973 SPARC_BUILTIN_FPMAX8,
10974 SPARC_BUILTIN_FPMAX16,
10975 SPARC_BUILTIN_FPMAX32,
10976 SPARC_BUILTIN_FPMAXU8,
10977 SPARC_BUILTIN_FPMAXU16,
10978 SPARC_BUILTIN_FPMAXU32,
10979 SPARC_BUILTIN_FPMIN8,
10980 SPARC_BUILTIN_FPMIN16,
10981 SPARC_BUILTIN_FPMIN32,
10982 SPARC_BUILTIN_FPMINU8,
10983 SPARC_BUILTIN_FPMINU16,
10984 SPARC_BUILTIN_FPMINU32,
10985 SPARC_BUILTIN_FPSUB8,
10986 SPARC_BUILTIN_FPSUBS8,
10987 SPARC_BUILTIN_FPSUBUS8,
10988 SPARC_BUILTIN_FPSUBUS16,
10989
10990 /* VIS 4.0B builtins. */
10991
10992 /* Note that all the DICTUNPACK* entries should be kept
10993 contiguous. */
10994 SPARC_BUILTIN_FIRST_DICTUNPACK,
10995 SPARC_BUILTIN_DICTUNPACK8 = SPARC_BUILTIN_FIRST_DICTUNPACK,
10996 SPARC_BUILTIN_DICTUNPACK16,
10997 SPARC_BUILTIN_DICTUNPACK32,
10998 SPARC_BUILTIN_LAST_DICTUNPACK = SPARC_BUILTIN_DICTUNPACK32,
10999
11000 /* Note that all the FPCMP*SHL entries should be kept
11001 contiguous. */
11002 SPARC_BUILTIN_FIRST_FPCMPSHL,
11003 SPARC_BUILTIN_FPCMPLE8SHL = SPARC_BUILTIN_FIRST_FPCMPSHL,
11004 SPARC_BUILTIN_FPCMPGT8SHL,
11005 SPARC_BUILTIN_FPCMPEQ8SHL,
11006 SPARC_BUILTIN_FPCMPNE8SHL,
11007 SPARC_BUILTIN_FPCMPLE16SHL,
11008 SPARC_BUILTIN_FPCMPGT16SHL,
11009 SPARC_BUILTIN_FPCMPEQ16SHL,
11010 SPARC_BUILTIN_FPCMPNE16SHL,
11011 SPARC_BUILTIN_FPCMPLE32SHL,
11012 SPARC_BUILTIN_FPCMPGT32SHL,
11013 SPARC_BUILTIN_FPCMPEQ32SHL,
11014 SPARC_BUILTIN_FPCMPNE32SHL,
11015 SPARC_BUILTIN_FPCMPULE8SHL,
11016 SPARC_BUILTIN_FPCMPUGT8SHL,
11017 SPARC_BUILTIN_FPCMPULE16SHL,
11018 SPARC_BUILTIN_FPCMPUGT16SHL,
11019 SPARC_BUILTIN_FPCMPULE32SHL,
11020 SPARC_BUILTIN_FPCMPUGT32SHL,
11021 SPARC_BUILTIN_FPCMPDE8SHL,
11022 SPARC_BUILTIN_FPCMPDE16SHL,
11023 SPARC_BUILTIN_FPCMPDE32SHL,
11024 SPARC_BUILTIN_FPCMPUR8SHL,
11025 SPARC_BUILTIN_FPCMPUR16SHL,
11026 SPARC_BUILTIN_FPCMPUR32SHL,
11027 SPARC_BUILTIN_LAST_FPCMPSHL = SPARC_BUILTIN_FPCMPUR32SHL,
11028
11029 SPARC_BUILTIN_MAX
11030 };
11031
11032 static GTY (()) tree sparc_builtins[(int) SPARC_BUILTIN_MAX];
11033 static enum insn_code sparc_builtins_icode[(int) SPARC_BUILTIN_MAX];
11034
11035 /* Return true if OPVAL can be used for operand OPNUM of instruction ICODE.
11036 The instruction should require a constant operand of some sort. The
11037 function prints an error if OPVAL is not valid. */
11038
11039 static int
check_constant_argument(enum insn_code icode,int opnum,rtx opval)11040 check_constant_argument (enum insn_code icode, int opnum, rtx opval)
11041 {
11042 if (GET_CODE (opval) != CONST_INT)
11043 {
11044 error ("%qs expects a constant argument", insn_data[icode].name);
11045 return false;
11046 }
11047
11048 if (!(*insn_data[icode].operand[opnum].predicate) (opval, VOIDmode))
11049 {
11050 error ("constant argument out of range for %qs", insn_data[icode].name);
11051 return false;
11052 }
11053 return true;
11054 }
11055
11056 /* Add a SPARC builtin function with NAME, ICODE, CODE and TYPE. Return the
11057 function decl or NULL_TREE if the builtin was not added. */
11058
11059 static tree
def_builtin(const char * name,enum insn_code icode,enum sparc_builtins code,tree type)11060 def_builtin (const char *name, enum insn_code icode, enum sparc_builtins code,
11061 tree type)
11062 {
11063 tree t
11064 = add_builtin_function (name, type, code, BUILT_IN_MD, NULL, NULL_TREE);
11065
11066 if (t)
11067 {
11068 sparc_builtins[code] = t;
11069 sparc_builtins_icode[code] = icode;
11070 }
11071
11072 return t;
11073 }
11074
11075 /* Likewise, but also marks the function as "const". */
11076
11077 static tree
def_builtin_const(const char * name,enum insn_code icode,enum sparc_builtins code,tree type)11078 def_builtin_const (const char *name, enum insn_code icode,
11079 enum sparc_builtins code, tree type)
11080 {
11081 tree t = def_builtin (name, icode, code, type);
11082
11083 if (t)
11084 TREE_READONLY (t) = 1;
11085
11086 return t;
11087 }
11088
11089 /* Implement the TARGET_INIT_BUILTINS target hook.
11090 Create builtin functions for special SPARC instructions. */
11091
11092 static void
sparc_init_builtins(void)11093 sparc_init_builtins (void)
11094 {
11095 if (TARGET_FPU)
11096 sparc_fpu_init_builtins ();
11097
11098 if (TARGET_VIS)
11099 sparc_vis_init_builtins ();
11100 }
11101
11102 /* Create builtin functions for FPU instructions. */
11103
11104 static void
sparc_fpu_init_builtins(void)11105 sparc_fpu_init_builtins (void)
11106 {
11107 tree ftype
11108 = build_function_type_list (void_type_node,
11109 build_pointer_type (unsigned_type_node), 0);
11110 def_builtin ("__builtin_load_fsr", CODE_FOR_ldfsr,
11111 SPARC_BUILTIN_LDFSR, ftype);
11112 def_builtin ("__builtin_store_fsr", CODE_FOR_stfsr,
11113 SPARC_BUILTIN_STFSR, ftype);
11114 }
11115
11116 /* Create builtin functions for VIS instructions. */
11117
11118 static void
sparc_vis_init_builtins(void)11119 sparc_vis_init_builtins (void)
11120 {
11121 tree v4qi = build_vector_type (unsigned_intQI_type_node, 4);
11122 tree v8qi = build_vector_type (unsigned_intQI_type_node, 8);
11123 tree v4hi = build_vector_type (intHI_type_node, 4);
11124 tree v2hi = build_vector_type (intHI_type_node, 2);
11125 tree v2si = build_vector_type (intSI_type_node, 2);
11126 tree v1si = build_vector_type (intSI_type_node, 1);
11127
11128 tree v4qi_ftype_v4hi = build_function_type_list (v4qi, v4hi, 0);
11129 tree v8qi_ftype_v2si_v8qi = build_function_type_list (v8qi, v2si, v8qi, 0);
11130 tree v2hi_ftype_v2si = build_function_type_list (v2hi, v2si, 0);
11131 tree v4hi_ftype_v4qi = build_function_type_list (v4hi, v4qi, 0);
11132 tree v8qi_ftype_v4qi_v4qi = build_function_type_list (v8qi, v4qi, v4qi, 0);
11133 tree v4hi_ftype_v4qi_v4hi = build_function_type_list (v4hi, v4qi, v4hi, 0);
11134 tree v4hi_ftype_v4qi_v2hi = build_function_type_list (v4hi, v4qi, v2hi, 0);
11135 tree v2si_ftype_v4qi_v2hi = build_function_type_list (v2si, v4qi, v2hi, 0);
11136 tree v4hi_ftype_v8qi_v4hi = build_function_type_list (v4hi, v8qi, v4hi, 0);
11137 tree v4hi_ftype_v4hi_v4hi = build_function_type_list (v4hi, v4hi, v4hi, 0);
11138 tree v2si_ftype_v2si_v2si = build_function_type_list (v2si, v2si, v2si, 0);
11139 tree v8qi_ftype_v8qi_v8qi = build_function_type_list (v8qi, v8qi, v8qi, 0);
11140 tree v2hi_ftype_v2hi_v2hi = build_function_type_list (v2hi, v2hi, v2hi, 0);
11141 tree v1si_ftype_v1si_v1si = build_function_type_list (v1si, v1si, v1si, 0);
11142 tree di_ftype_v8qi_v8qi_di = build_function_type_list (intDI_type_node,
11143 v8qi, v8qi,
11144 intDI_type_node, 0);
11145 tree di_ftype_v8qi_v8qi = build_function_type_list (intDI_type_node,
11146 v8qi, v8qi, 0);
11147 tree si_ftype_v8qi_v8qi = build_function_type_list (intSI_type_node,
11148 v8qi, v8qi, 0);
11149 tree v8qi_ftype_df_si = build_function_type_list (v8qi, double_type_node,
11150 intSI_type_node, 0);
11151 tree v4hi_ftype_df_si = build_function_type_list (v4hi, double_type_node,
11152 intSI_type_node, 0);
11153 tree v2si_ftype_df_si = build_function_type_list (v2si, double_type_node,
11154 intDI_type_node, 0);
11155 tree di_ftype_di_di = build_function_type_list (intDI_type_node,
11156 intDI_type_node,
11157 intDI_type_node, 0);
11158 tree si_ftype_si_si = build_function_type_list (intSI_type_node,
11159 intSI_type_node,
11160 intSI_type_node, 0);
11161 tree ptr_ftype_ptr_si = build_function_type_list (ptr_type_node,
11162 ptr_type_node,
11163 intSI_type_node, 0);
11164 tree ptr_ftype_ptr_di = build_function_type_list (ptr_type_node,
11165 ptr_type_node,
11166 intDI_type_node, 0);
11167 tree si_ftype_ptr_ptr = build_function_type_list (intSI_type_node,
11168 ptr_type_node,
11169 ptr_type_node, 0);
11170 tree di_ftype_ptr_ptr = build_function_type_list (intDI_type_node,
11171 ptr_type_node,
11172 ptr_type_node, 0);
11173 tree si_ftype_v4hi_v4hi = build_function_type_list (intSI_type_node,
11174 v4hi, v4hi, 0);
11175 tree si_ftype_v2si_v2si = build_function_type_list (intSI_type_node,
11176 v2si, v2si, 0);
11177 tree di_ftype_v4hi_v4hi = build_function_type_list (intDI_type_node,
11178 v4hi, v4hi, 0);
11179 tree di_ftype_v2si_v2si = build_function_type_list (intDI_type_node,
11180 v2si, v2si, 0);
11181 tree void_ftype_di = build_function_type_list (void_type_node,
11182 intDI_type_node, 0);
11183 tree di_ftype_void = build_function_type_list (intDI_type_node,
11184 void_type_node, 0);
11185 tree void_ftype_si = build_function_type_list (void_type_node,
11186 intSI_type_node, 0);
11187 tree sf_ftype_sf_sf = build_function_type_list (float_type_node,
11188 float_type_node,
11189 float_type_node, 0);
11190 tree df_ftype_df_df = build_function_type_list (double_type_node,
11191 double_type_node,
11192 double_type_node, 0);
11193
11194 /* Packing and expanding vectors. */
11195 def_builtin ("__builtin_vis_fpack16", CODE_FOR_fpack16_vis,
11196 SPARC_BUILTIN_FPACK16, v4qi_ftype_v4hi);
11197 def_builtin ("__builtin_vis_fpack32", CODE_FOR_fpack32_vis,
11198 SPARC_BUILTIN_FPACK32, v8qi_ftype_v2si_v8qi);
11199 def_builtin ("__builtin_vis_fpackfix", CODE_FOR_fpackfix_vis,
11200 SPARC_BUILTIN_FPACKFIX, v2hi_ftype_v2si);
11201 def_builtin_const ("__builtin_vis_fexpand", CODE_FOR_fexpand_vis,
11202 SPARC_BUILTIN_FEXPAND, v4hi_ftype_v4qi);
11203 def_builtin_const ("__builtin_vis_fpmerge", CODE_FOR_fpmerge_vis,
11204 SPARC_BUILTIN_FPMERGE, v8qi_ftype_v4qi_v4qi);
11205
11206 /* Multiplications. */
11207 def_builtin_const ("__builtin_vis_fmul8x16", CODE_FOR_fmul8x16_vis,
11208 SPARC_BUILTIN_FMUL8X16, v4hi_ftype_v4qi_v4hi);
11209 def_builtin_const ("__builtin_vis_fmul8x16au", CODE_FOR_fmul8x16au_vis,
11210 SPARC_BUILTIN_FMUL8X16AU, v4hi_ftype_v4qi_v2hi);
11211 def_builtin_const ("__builtin_vis_fmul8x16al", CODE_FOR_fmul8x16al_vis,
11212 SPARC_BUILTIN_FMUL8X16AL, v4hi_ftype_v4qi_v2hi);
11213 def_builtin_const ("__builtin_vis_fmul8sux16", CODE_FOR_fmul8sux16_vis,
11214 SPARC_BUILTIN_FMUL8SUX16, v4hi_ftype_v8qi_v4hi);
11215 def_builtin_const ("__builtin_vis_fmul8ulx16", CODE_FOR_fmul8ulx16_vis,
11216 SPARC_BUILTIN_FMUL8ULX16, v4hi_ftype_v8qi_v4hi);
11217 def_builtin_const ("__builtin_vis_fmuld8sux16", CODE_FOR_fmuld8sux16_vis,
11218 SPARC_BUILTIN_FMULD8SUX16, v2si_ftype_v4qi_v2hi);
11219 def_builtin_const ("__builtin_vis_fmuld8ulx16", CODE_FOR_fmuld8ulx16_vis,
11220 SPARC_BUILTIN_FMULD8ULX16, v2si_ftype_v4qi_v2hi);
11221
11222 /* Data aligning. */
11223 def_builtin ("__builtin_vis_faligndatav4hi", CODE_FOR_faligndatav4hi_vis,
11224 SPARC_BUILTIN_FALIGNDATAV4HI, v4hi_ftype_v4hi_v4hi);
11225 def_builtin ("__builtin_vis_faligndatav8qi", CODE_FOR_faligndatav8qi_vis,
11226 SPARC_BUILTIN_FALIGNDATAV8QI, v8qi_ftype_v8qi_v8qi);
11227 def_builtin ("__builtin_vis_faligndatav2si", CODE_FOR_faligndatav2si_vis,
11228 SPARC_BUILTIN_FALIGNDATAV2SI, v2si_ftype_v2si_v2si);
11229 def_builtin ("__builtin_vis_faligndatadi", CODE_FOR_faligndatav1di_vis,
11230 SPARC_BUILTIN_FALIGNDATADI, di_ftype_di_di);
11231
11232 def_builtin ("__builtin_vis_write_gsr", CODE_FOR_wrgsr_vis,
11233 SPARC_BUILTIN_WRGSR, void_ftype_di);
11234 def_builtin ("__builtin_vis_read_gsr", CODE_FOR_rdgsr_vis,
11235 SPARC_BUILTIN_RDGSR, di_ftype_void);
11236
11237 if (TARGET_ARCH64)
11238 {
11239 def_builtin ("__builtin_vis_alignaddr", CODE_FOR_alignaddrdi_vis,
11240 SPARC_BUILTIN_ALIGNADDR, ptr_ftype_ptr_di);
11241 def_builtin ("__builtin_vis_alignaddrl", CODE_FOR_alignaddrldi_vis,
11242 SPARC_BUILTIN_ALIGNADDRL, ptr_ftype_ptr_di);
11243 }
11244 else
11245 {
11246 def_builtin ("__builtin_vis_alignaddr", CODE_FOR_alignaddrsi_vis,
11247 SPARC_BUILTIN_ALIGNADDR, ptr_ftype_ptr_si);
11248 def_builtin ("__builtin_vis_alignaddrl", CODE_FOR_alignaddrlsi_vis,
11249 SPARC_BUILTIN_ALIGNADDRL, ptr_ftype_ptr_si);
11250 }
11251
11252 /* Pixel distance. */
11253 def_builtin_const ("__builtin_vis_pdist", CODE_FOR_pdist_vis,
11254 SPARC_BUILTIN_PDIST, di_ftype_v8qi_v8qi_di);
11255
11256 /* Edge handling. */
11257 if (TARGET_ARCH64)
11258 {
11259 def_builtin_const ("__builtin_vis_edge8", CODE_FOR_edge8di_vis,
11260 SPARC_BUILTIN_EDGE8, di_ftype_ptr_ptr);
11261 def_builtin_const ("__builtin_vis_edge8l", CODE_FOR_edge8ldi_vis,
11262 SPARC_BUILTIN_EDGE8L, di_ftype_ptr_ptr);
11263 def_builtin_const ("__builtin_vis_edge16", CODE_FOR_edge16di_vis,
11264 SPARC_BUILTIN_EDGE16, di_ftype_ptr_ptr);
11265 def_builtin_const ("__builtin_vis_edge16l", CODE_FOR_edge16ldi_vis,
11266 SPARC_BUILTIN_EDGE16L, di_ftype_ptr_ptr);
11267 def_builtin_const ("__builtin_vis_edge32", CODE_FOR_edge32di_vis,
11268 SPARC_BUILTIN_EDGE32, di_ftype_ptr_ptr);
11269 def_builtin_const ("__builtin_vis_edge32l", CODE_FOR_edge32ldi_vis,
11270 SPARC_BUILTIN_EDGE32L, di_ftype_ptr_ptr);
11271 }
11272 else
11273 {
11274 def_builtin_const ("__builtin_vis_edge8", CODE_FOR_edge8si_vis,
11275 SPARC_BUILTIN_EDGE8, si_ftype_ptr_ptr);
11276 def_builtin_const ("__builtin_vis_edge8l", CODE_FOR_edge8lsi_vis,
11277 SPARC_BUILTIN_EDGE8L, si_ftype_ptr_ptr);
11278 def_builtin_const ("__builtin_vis_edge16", CODE_FOR_edge16si_vis,
11279 SPARC_BUILTIN_EDGE16, si_ftype_ptr_ptr);
11280 def_builtin_const ("__builtin_vis_edge16l", CODE_FOR_edge16lsi_vis,
11281 SPARC_BUILTIN_EDGE16L, si_ftype_ptr_ptr);
11282 def_builtin_const ("__builtin_vis_edge32", CODE_FOR_edge32si_vis,
11283 SPARC_BUILTIN_EDGE32, si_ftype_ptr_ptr);
11284 def_builtin_const ("__builtin_vis_edge32l", CODE_FOR_edge32lsi_vis,
11285 SPARC_BUILTIN_EDGE32L, si_ftype_ptr_ptr);
11286 }
11287
11288 /* Pixel compare. */
11289 if (TARGET_ARCH64)
11290 {
11291 def_builtin_const ("__builtin_vis_fcmple16", CODE_FOR_fcmple16di_vis,
11292 SPARC_BUILTIN_FCMPLE16, di_ftype_v4hi_v4hi);
11293 def_builtin_const ("__builtin_vis_fcmple32", CODE_FOR_fcmple32di_vis,
11294 SPARC_BUILTIN_FCMPLE32, di_ftype_v2si_v2si);
11295 def_builtin_const ("__builtin_vis_fcmpne16", CODE_FOR_fcmpne16di_vis,
11296 SPARC_BUILTIN_FCMPNE16, di_ftype_v4hi_v4hi);
11297 def_builtin_const ("__builtin_vis_fcmpne32", CODE_FOR_fcmpne32di_vis,
11298 SPARC_BUILTIN_FCMPNE32, di_ftype_v2si_v2si);
11299 def_builtin_const ("__builtin_vis_fcmpgt16", CODE_FOR_fcmpgt16di_vis,
11300 SPARC_BUILTIN_FCMPGT16, di_ftype_v4hi_v4hi);
11301 def_builtin_const ("__builtin_vis_fcmpgt32", CODE_FOR_fcmpgt32di_vis,
11302 SPARC_BUILTIN_FCMPGT32, di_ftype_v2si_v2si);
11303 def_builtin_const ("__builtin_vis_fcmpeq16", CODE_FOR_fcmpeq16di_vis,
11304 SPARC_BUILTIN_FCMPEQ16, di_ftype_v4hi_v4hi);
11305 def_builtin_const ("__builtin_vis_fcmpeq32", CODE_FOR_fcmpeq32di_vis,
11306 SPARC_BUILTIN_FCMPEQ32, di_ftype_v2si_v2si);
11307 }
11308 else
11309 {
11310 def_builtin_const ("__builtin_vis_fcmple16", CODE_FOR_fcmple16si_vis,
11311 SPARC_BUILTIN_FCMPLE16, si_ftype_v4hi_v4hi);
11312 def_builtin_const ("__builtin_vis_fcmple32", CODE_FOR_fcmple32si_vis,
11313 SPARC_BUILTIN_FCMPLE32, si_ftype_v2si_v2si);
11314 def_builtin_const ("__builtin_vis_fcmpne16", CODE_FOR_fcmpne16si_vis,
11315 SPARC_BUILTIN_FCMPNE16, si_ftype_v4hi_v4hi);
11316 def_builtin_const ("__builtin_vis_fcmpne32", CODE_FOR_fcmpne32si_vis,
11317 SPARC_BUILTIN_FCMPNE32, si_ftype_v2si_v2si);
11318 def_builtin_const ("__builtin_vis_fcmpgt16", CODE_FOR_fcmpgt16si_vis,
11319 SPARC_BUILTIN_FCMPGT16, si_ftype_v4hi_v4hi);
11320 def_builtin_const ("__builtin_vis_fcmpgt32", CODE_FOR_fcmpgt32si_vis,
11321 SPARC_BUILTIN_FCMPGT32, si_ftype_v2si_v2si);
11322 def_builtin_const ("__builtin_vis_fcmpeq16", CODE_FOR_fcmpeq16si_vis,
11323 SPARC_BUILTIN_FCMPEQ16, si_ftype_v4hi_v4hi);
11324 def_builtin_const ("__builtin_vis_fcmpeq32", CODE_FOR_fcmpeq32si_vis,
11325 SPARC_BUILTIN_FCMPEQ32, si_ftype_v2si_v2si);
11326 }
11327
11328 /* Addition and subtraction. */
11329 def_builtin_const ("__builtin_vis_fpadd16", CODE_FOR_addv4hi3,
11330 SPARC_BUILTIN_FPADD16, v4hi_ftype_v4hi_v4hi);
11331 def_builtin_const ("__builtin_vis_fpadd16s", CODE_FOR_addv2hi3,
11332 SPARC_BUILTIN_FPADD16S, v2hi_ftype_v2hi_v2hi);
11333 def_builtin_const ("__builtin_vis_fpadd32", CODE_FOR_addv2si3,
11334 SPARC_BUILTIN_FPADD32, v2si_ftype_v2si_v2si);
11335 def_builtin_const ("__builtin_vis_fpadd32s", CODE_FOR_addv1si3,
11336 SPARC_BUILTIN_FPADD32S, v1si_ftype_v1si_v1si);
11337 def_builtin_const ("__builtin_vis_fpsub16", CODE_FOR_subv4hi3,
11338 SPARC_BUILTIN_FPSUB16, v4hi_ftype_v4hi_v4hi);
11339 def_builtin_const ("__builtin_vis_fpsub16s", CODE_FOR_subv2hi3,
11340 SPARC_BUILTIN_FPSUB16S, v2hi_ftype_v2hi_v2hi);
11341 def_builtin_const ("__builtin_vis_fpsub32", CODE_FOR_subv2si3,
11342 SPARC_BUILTIN_FPSUB32, v2si_ftype_v2si_v2si);
11343 def_builtin_const ("__builtin_vis_fpsub32s", CODE_FOR_subv1si3,
11344 SPARC_BUILTIN_FPSUB32S, v1si_ftype_v1si_v1si);
11345
11346 /* Three-dimensional array addressing. */
11347 if (TARGET_ARCH64)
11348 {
11349 def_builtin_const ("__builtin_vis_array8", CODE_FOR_array8di_vis,
11350 SPARC_BUILTIN_ARRAY8, di_ftype_di_di);
11351 def_builtin_const ("__builtin_vis_array16", CODE_FOR_array16di_vis,
11352 SPARC_BUILTIN_ARRAY16, di_ftype_di_di);
11353 def_builtin_const ("__builtin_vis_array32", CODE_FOR_array32di_vis,
11354 SPARC_BUILTIN_ARRAY32, di_ftype_di_di);
11355 }
11356 else
11357 {
11358 def_builtin_const ("__builtin_vis_array8", CODE_FOR_array8si_vis,
11359 SPARC_BUILTIN_ARRAY8, si_ftype_si_si);
11360 def_builtin_const ("__builtin_vis_array16", CODE_FOR_array16si_vis,
11361 SPARC_BUILTIN_ARRAY16, si_ftype_si_si);
11362 def_builtin_const ("__builtin_vis_array32", CODE_FOR_array32si_vis,
11363 SPARC_BUILTIN_ARRAY32, si_ftype_si_si);
11364 }
11365
11366 if (TARGET_VIS2)
11367 {
11368 /* Edge handling. */
11369 if (TARGET_ARCH64)
11370 {
11371 def_builtin_const ("__builtin_vis_edge8n", CODE_FOR_edge8ndi_vis,
11372 SPARC_BUILTIN_EDGE8N, di_ftype_ptr_ptr);
11373 def_builtin_const ("__builtin_vis_edge8ln", CODE_FOR_edge8lndi_vis,
11374 SPARC_BUILTIN_EDGE8LN, di_ftype_ptr_ptr);
11375 def_builtin_const ("__builtin_vis_edge16n", CODE_FOR_edge16ndi_vis,
11376 SPARC_BUILTIN_EDGE16N, di_ftype_ptr_ptr);
11377 def_builtin_const ("__builtin_vis_edge16ln", CODE_FOR_edge16lndi_vis,
11378 SPARC_BUILTIN_EDGE16LN, di_ftype_ptr_ptr);
11379 def_builtin_const ("__builtin_vis_edge32n", CODE_FOR_edge32ndi_vis,
11380 SPARC_BUILTIN_EDGE32N, di_ftype_ptr_ptr);
11381 def_builtin_const ("__builtin_vis_edge32ln", CODE_FOR_edge32lndi_vis,
11382 SPARC_BUILTIN_EDGE32LN, di_ftype_ptr_ptr);
11383 }
11384 else
11385 {
11386 def_builtin_const ("__builtin_vis_edge8n", CODE_FOR_edge8nsi_vis,
11387 SPARC_BUILTIN_EDGE8N, si_ftype_ptr_ptr);
11388 def_builtin_const ("__builtin_vis_edge8ln", CODE_FOR_edge8lnsi_vis,
11389 SPARC_BUILTIN_EDGE8LN, si_ftype_ptr_ptr);
11390 def_builtin_const ("__builtin_vis_edge16n", CODE_FOR_edge16nsi_vis,
11391 SPARC_BUILTIN_EDGE16N, si_ftype_ptr_ptr);
11392 def_builtin_const ("__builtin_vis_edge16ln", CODE_FOR_edge16lnsi_vis,
11393 SPARC_BUILTIN_EDGE16LN, si_ftype_ptr_ptr);
11394 def_builtin_const ("__builtin_vis_edge32n", CODE_FOR_edge32nsi_vis,
11395 SPARC_BUILTIN_EDGE32N, si_ftype_ptr_ptr);
11396 def_builtin_const ("__builtin_vis_edge32ln", CODE_FOR_edge32lnsi_vis,
11397 SPARC_BUILTIN_EDGE32LN, si_ftype_ptr_ptr);
11398 }
11399
11400 /* Byte mask and shuffle. */
11401 if (TARGET_ARCH64)
11402 def_builtin ("__builtin_vis_bmask", CODE_FOR_bmaskdi_vis,
11403 SPARC_BUILTIN_BMASK, di_ftype_di_di);
11404 else
11405 def_builtin ("__builtin_vis_bmask", CODE_FOR_bmasksi_vis,
11406 SPARC_BUILTIN_BMASK, si_ftype_si_si);
11407 def_builtin ("__builtin_vis_bshufflev4hi", CODE_FOR_bshufflev4hi_vis,
11408 SPARC_BUILTIN_BSHUFFLEV4HI, v4hi_ftype_v4hi_v4hi);
11409 def_builtin ("__builtin_vis_bshufflev8qi", CODE_FOR_bshufflev8qi_vis,
11410 SPARC_BUILTIN_BSHUFFLEV8QI, v8qi_ftype_v8qi_v8qi);
11411 def_builtin ("__builtin_vis_bshufflev2si", CODE_FOR_bshufflev2si_vis,
11412 SPARC_BUILTIN_BSHUFFLEV2SI, v2si_ftype_v2si_v2si);
11413 def_builtin ("__builtin_vis_bshuffledi", CODE_FOR_bshufflev1di_vis,
11414 SPARC_BUILTIN_BSHUFFLEDI, di_ftype_di_di);
11415 }
11416
11417 if (TARGET_VIS3)
11418 {
11419 if (TARGET_ARCH64)
11420 {
11421 def_builtin ("__builtin_vis_cmask8", CODE_FOR_cmask8di_vis,
11422 SPARC_BUILTIN_CMASK8, void_ftype_di);
11423 def_builtin ("__builtin_vis_cmask16", CODE_FOR_cmask16di_vis,
11424 SPARC_BUILTIN_CMASK16, void_ftype_di);
11425 def_builtin ("__builtin_vis_cmask32", CODE_FOR_cmask32di_vis,
11426 SPARC_BUILTIN_CMASK32, void_ftype_di);
11427 }
11428 else
11429 {
11430 def_builtin ("__builtin_vis_cmask8", CODE_FOR_cmask8si_vis,
11431 SPARC_BUILTIN_CMASK8, void_ftype_si);
11432 def_builtin ("__builtin_vis_cmask16", CODE_FOR_cmask16si_vis,
11433 SPARC_BUILTIN_CMASK16, void_ftype_si);
11434 def_builtin ("__builtin_vis_cmask32", CODE_FOR_cmask32si_vis,
11435 SPARC_BUILTIN_CMASK32, void_ftype_si);
11436 }
11437
11438 def_builtin_const ("__builtin_vis_fchksm16", CODE_FOR_fchksm16_vis,
11439 SPARC_BUILTIN_FCHKSM16, v4hi_ftype_v4hi_v4hi);
11440
11441 def_builtin_const ("__builtin_vis_fsll16", CODE_FOR_vashlv4hi3,
11442 SPARC_BUILTIN_FSLL16, v4hi_ftype_v4hi_v4hi);
11443 def_builtin_const ("__builtin_vis_fslas16", CODE_FOR_vssashlv4hi3,
11444 SPARC_BUILTIN_FSLAS16, v4hi_ftype_v4hi_v4hi);
11445 def_builtin_const ("__builtin_vis_fsrl16", CODE_FOR_vlshrv4hi3,
11446 SPARC_BUILTIN_FSRL16, v4hi_ftype_v4hi_v4hi);
11447 def_builtin_const ("__builtin_vis_fsra16", CODE_FOR_vashrv4hi3,
11448 SPARC_BUILTIN_FSRA16, v4hi_ftype_v4hi_v4hi);
11449 def_builtin_const ("__builtin_vis_fsll32", CODE_FOR_vashlv2si3,
11450 SPARC_BUILTIN_FSLL32, v2si_ftype_v2si_v2si);
11451 def_builtin_const ("__builtin_vis_fslas32", CODE_FOR_vssashlv2si3,
11452 SPARC_BUILTIN_FSLAS32, v2si_ftype_v2si_v2si);
11453 def_builtin_const ("__builtin_vis_fsrl32", CODE_FOR_vlshrv2si3,
11454 SPARC_BUILTIN_FSRL32, v2si_ftype_v2si_v2si);
11455 def_builtin_const ("__builtin_vis_fsra32", CODE_FOR_vashrv2si3,
11456 SPARC_BUILTIN_FSRA32, v2si_ftype_v2si_v2si);
11457
11458 if (TARGET_ARCH64)
11459 def_builtin_const ("__builtin_vis_pdistn", CODE_FOR_pdistndi_vis,
11460 SPARC_BUILTIN_PDISTN, di_ftype_v8qi_v8qi);
11461 else
11462 def_builtin_const ("__builtin_vis_pdistn", CODE_FOR_pdistnsi_vis,
11463 SPARC_BUILTIN_PDISTN, si_ftype_v8qi_v8qi);
11464
11465 def_builtin_const ("__builtin_vis_fmean16", CODE_FOR_fmean16_vis,
11466 SPARC_BUILTIN_FMEAN16, v4hi_ftype_v4hi_v4hi);
11467 def_builtin_const ("__builtin_vis_fpadd64", CODE_FOR_fpadd64_vis,
11468 SPARC_BUILTIN_FPADD64, di_ftype_di_di);
11469 def_builtin_const ("__builtin_vis_fpsub64", CODE_FOR_fpsub64_vis,
11470 SPARC_BUILTIN_FPSUB64, di_ftype_di_di);
11471
11472 def_builtin_const ("__builtin_vis_fpadds16", CODE_FOR_ssaddv4hi3,
11473 SPARC_BUILTIN_FPADDS16, v4hi_ftype_v4hi_v4hi);
11474 def_builtin_const ("__builtin_vis_fpadds16s", CODE_FOR_ssaddv2hi3,
11475 SPARC_BUILTIN_FPADDS16S, v2hi_ftype_v2hi_v2hi);
11476 def_builtin_const ("__builtin_vis_fpsubs16", CODE_FOR_sssubv4hi3,
11477 SPARC_BUILTIN_FPSUBS16, v4hi_ftype_v4hi_v4hi);
11478 def_builtin_const ("__builtin_vis_fpsubs16s", CODE_FOR_sssubv2hi3,
11479 SPARC_BUILTIN_FPSUBS16S, v2hi_ftype_v2hi_v2hi);
11480 def_builtin_const ("__builtin_vis_fpadds32", CODE_FOR_ssaddv2si3,
11481 SPARC_BUILTIN_FPADDS32, v2si_ftype_v2si_v2si);
11482 def_builtin_const ("__builtin_vis_fpadds32s", CODE_FOR_ssaddv1si3,
11483 SPARC_BUILTIN_FPADDS32S, v1si_ftype_v1si_v1si);
11484 def_builtin_const ("__builtin_vis_fpsubs32", CODE_FOR_sssubv2si3,
11485 SPARC_BUILTIN_FPSUBS32, v2si_ftype_v2si_v2si);
11486 def_builtin_const ("__builtin_vis_fpsubs32s", CODE_FOR_sssubv1si3,
11487 SPARC_BUILTIN_FPSUBS32S, v1si_ftype_v1si_v1si);
11488
11489 if (TARGET_ARCH64)
11490 {
11491 def_builtin_const ("__builtin_vis_fucmple8", CODE_FOR_fucmple8di_vis,
11492 SPARC_BUILTIN_FUCMPLE8, di_ftype_v8qi_v8qi);
11493 def_builtin_const ("__builtin_vis_fucmpne8", CODE_FOR_fucmpne8di_vis,
11494 SPARC_BUILTIN_FUCMPNE8, di_ftype_v8qi_v8qi);
11495 def_builtin_const ("__builtin_vis_fucmpgt8", CODE_FOR_fucmpgt8di_vis,
11496 SPARC_BUILTIN_FUCMPGT8, di_ftype_v8qi_v8qi);
11497 def_builtin_const ("__builtin_vis_fucmpeq8", CODE_FOR_fucmpeq8di_vis,
11498 SPARC_BUILTIN_FUCMPEQ8, di_ftype_v8qi_v8qi);
11499 }
11500 else
11501 {
11502 def_builtin_const ("__builtin_vis_fucmple8", CODE_FOR_fucmple8si_vis,
11503 SPARC_BUILTIN_FUCMPLE8, si_ftype_v8qi_v8qi);
11504 def_builtin_const ("__builtin_vis_fucmpne8", CODE_FOR_fucmpne8si_vis,
11505 SPARC_BUILTIN_FUCMPNE8, si_ftype_v8qi_v8qi);
11506 def_builtin_const ("__builtin_vis_fucmpgt8", CODE_FOR_fucmpgt8si_vis,
11507 SPARC_BUILTIN_FUCMPGT8, si_ftype_v8qi_v8qi);
11508 def_builtin_const ("__builtin_vis_fucmpeq8", CODE_FOR_fucmpeq8si_vis,
11509 SPARC_BUILTIN_FUCMPEQ8, si_ftype_v8qi_v8qi);
11510 }
11511
11512 def_builtin_const ("__builtin_vis_fhadds", CODE_FOR_fhaddsf_vis,
11513 SPARC_BUILTIN_FHADDS, sf_ftype_sf_sf);
11514 def_builtin_const ("__builtin_vis_fhaddd", CODE_FOR_fhadddf_vis,
11515 SPARC_BUILTIN_FHADDD, df_ftype_df_df);
11516 def_builtin_const ("__builtin_vis_fhsubs", CODE_FOR_fhsubsf_vis,
11517 SPARC_BUILTIN_FHSUBS, sf_ftype_sf_sf);
11518 def_builtin_const ("__builtin_vis_fhsubd", CODE_FOR_fhsubdf_vis,
11519 SPARC_BUILTIN_FHSUBD, df_ftype_df_df);
11520 def_builtin_const ("__builtin_vis_fnhadds", CODE_FOR_fnhaddsf_vis,
11521 SPARC_BUILTIN_FNHADDS, sf_ftype_sf_sf);
11522 def_builtin_const ("__builtin_vis_fnhaddd", CODE_FOR_fnhadddf_vis,
11523 SPARC_BUILTIN_FNHADDD, df_ftype_df_df);
11524
11525 def_builtin_const ("__builtin_vis_umulxhi", CODE_FOR_umulxhi_vis,
11526 SPARC_BUILTIN_UMULXHI, di_ftype_di_di);
11527 def_builtin_const ("__builtin_vis_xmulx", CODE_FOR_xmulx_vis,
11528 SPARC_BUILTIN_XMULX, di_ftype_di_di);
11529 def_builtin_const ("__builtin_vis_xmulxhi", CODE_FOR_xmulxhi_vis,
11530 SPARC_BUILTIN_XMULXHI, di_ftype_di_di);
11531 }
11532
11533 if (TARGET_VIS4)
11534 {
11535 def_builtin_const ("__builtin_vis_fpadd8", CODE_FOR_addv8qi3,
11536 SPARC_BUILTIN_FPADD8, v8qi_ftype_v8qi_v8qi);
11537 def_builtin_const ("__builtin_vis_fpadds8", CODE_FOR_ssaddv8qi3,
11538 SPARC_BUILTIN_FPADDS8, v8qi_ftype_v8qi_v8qi);
11539 def_builtin_const ("__builtin_vis_fpaddus8", CODE_FOR_usaddv8qi3,
11540 SPARC_BUILTIN_FPADDUS8, v8qi_ftype_v8qi_v8qi);
11541 def_builtin_const ("__builtin_vis_fpaddus16", CODE_FOR_usaddv4hi3,
11542 SPARC_BUILTIN_FPADDUS16, v4hi_ftype_v4hi_v4hi);
11543
11544
11545 if (TARGET_ARCH64)
11546 {
11547 def_builtin_const ("__builtin_vis_fpcmple8", CODE_FOR_fpcmple8di_vis,
11548 SPARC_BUILTIN_FPCMPLE8, di_ftype_v8qi_v8qi);
11549 def_builtin_const ("__builtin_vis_fpcmpgt8", CODE_FOR_fpcmpgt8di_vis,
11550 SPARC_BUILTIN_FPCMPGT8, di_ftype_v8qi_v8qi);
11551 def_builtin_const ("__builtin_vis_fpcmpule16", CODE_FOR_fpcmpule16di_vis,
11552 SPARC_BUILTIN_FPCMPULE16, di_ftype_v4hi_v4hi);
11553 def_builtin_const ("__builtin_vis_fpcmpugt16", CODE_FOR_fpcmpugt16di_vis,
11554 SPARC_BUILTIN_FPCMPUGT16, di_ftype_v4hi_v4hi);
11555 def_builtin_const ("__builtin_vis_fpcmpule32", CODE_FOR_fpcmpule32di_vis,
11556 SPARC_BUILTIN_FPCMPULE32, di_ftype_v2si_v2si);
11557 def_builtin_const ("__builtin_vis_fpcmpugt32", CODE_FOR_fpcmpugt32di_vis,
11558 SPARC_BUILTIN_FPCMPUGT32, di_ftype_v2si_v2si);
11559 }
11560 else
11561 {
11562 def_builtin_const ("__builtin_vis_fpcmple8", CODE_FOR_fpcmple8si_vis,
11563 SPARC_BUILTIN_FPCMPLE8, si_ftype_v8qi_v8qi);
11564 def_builtin_const ("__builtin_vis_fpcmpgt8", CODE_FOR_fpcmpgt8si_vis,
11565 SPARC_BUILTIN_FPCMPGT8, si_ftype_v8qi_v8qi);
11566 def_builtin_const ("__builtin_vis_fpcmpule16", CODE_FOR_fpcmpule16si_vis,
11567 SPARC_BUILTIN_FPCMPULE16, si_ftype_v4hi_v4hi);
11568 def_builtin_const ("__builtin_vis_fpcmpugt16", CODE_FOR_fpcmpugt16si_vis,
11569 SPARC_BUILTIN_FPCMPUGT16, si_ftype_v4hi_v4hi);
11570 def_builtin_const ("__builtin_vis_fpcmpule32", CODE_FOR_fpcmpule32si_vis,
11571 SPARC_BUILTIN_FPCMPULE32, di_ftype_v2si_v2si);
11572 def_builtin_const ("__builtin_vis_fpcmpugt32", CODE_FOR_fpcmpugt32si_vis,
11573 SPARC_BUILTIN_FPCMPUGT32, di_ftype_v2si_v2si);
11574 }
11575
11576 def_builtin_const ("__builtin_vis_fpmax8", CODE_FOR_maxv8qi3,
11577 SPARC_BUILTIN_FPMAX8, v8qi_ftype_v8qi_v8qi);
11578 def_builtin_const ("__builtin_vis_fpmax16", CODE_FOR_maxv4hi3,
11579 SPARC_BUILTIN_FPMAX16, v4hi_ftype_v4hi_v4hi);
11580 def_builtin_const ("__builtin_vis_fpmax32", CODE_FOR_maxv2si3,
11581 SPARC_BUILTIN_FPMAX32, v2si_ftype_v2si_v2si);
11582 def_builtin_const ("__builtin_vis_fpmaxu8", CODE_FOR_maxuv8qi3,
11583 SPARC_BUILTIN_FPMAXU8, v8qi_ftype_v8qi_v8qi);
11584 def_builtin_const ("__builtin_vis_fpmaxu16", CODE_FOR_maxuv4hi3,
11585 SPARC_BUILTIN_FPMAXU16, v4hi_ftype_v4hi_v4hi);
11586 def_builtin_const ("__builtin_vis_fpmaxu32", CODE_FOR_maxuv2si3,
11587 SPARC_BUILTIN_FPMAXU32, v2si_ftype_v2si_v2si);
11588 def_builtin_const ("__builtin_vis_fpmin8", CODE_FOR_minv8qi3,
11589 SPARC_BUILTIN_FPMIN8, v8qi_ftype_v8qi_v8qi);
11590 def_builtin_const ("__builtin_vis_fpmin16", CODE_FOR_minv4hi3,
11591 SPARC_BUILTIN_FPMIN16, v4hi_ftype_v4hi_v4hi);
11592 def_builtin_const ("__builtin_vis_fpmin32", CODE_FOR_minv2si3,
11593 SPARC_BUILTIN_FPMIN32, v2si_ftype_v2si_v2si);
11594 def_builtin_const ("__builtin_vis_fpminu8", CODE_FOR_minuv8qi3,
11595 SPARC_BUILTIN_FPMINU8, v8qi_ftype_v8qi_v8qi);
11596 def_builtin_const ("__builtin_vis_fpminu16", CODE_FOR_minuv4hi3,
11597 SPARC_BUILTIN_FPMINU16, v4hi_ftype_v4hi_v4hi);
11598 def_builtin_const ("__builtin_vis_fpminu32", CODE_FOR_minuv2si3,
11599 SPARC_BUILTIN_FPMINU32, v2si_ftype_v2si_v2si);
11600 def_builtin_const ("__builtin_vis_fpsub8", CODE_FOR_subv8qi3,
11601 SPARC_BUILTIN_FPSUB8, v8qi_ftype_v8qi_v8qi);
11602 def_builtin_const ("__builtin_vis_fpsubs8", CODE_FOR_sssubv8qi3,
11603 SPARC_BUILTIN_FPSUBS8, v8qi_ftype_v8qi_v8qi);
11604 def_builtin_const ("__builtin_vis_fpsubus8", CODE_FOR_ussubv8qi3,
11605 SPARC_BUILTIN_FPSUBUS8, v8qi_ftype_v8qi_v8qi);
11606 def_builtin_const ("__builtin_vis_fpsubus16", CODE_FOR_ussubv4hi3,
11607 SPARC_BUILTIN_FPSUBUS16, v4hi_ftype_v4hi_v4hi);
11608 }
11609
11610 if (TARGET_VIS4B)
11611 {
11612 def_builtin_const ("__builtin_vis_dictunpack8", CODE_FOR_dictunpack8,
11613 SPARC_BUILTIN_DICTUNPACK8, v8qi_ftype_df_si);
11614 def_builtin_const ("__builtin_vis_dictunpack16", CODE_FOR_dictunpack16,
11615 SPARC_BUILTIN_DICTUNPACK16, v4hi_ftype_df_si);
11616 def_builtin_const ("__builtin_vis_dictunpack32", CODE_FOR_dictunpack32,
11617 SPARC_BUILTIN_DICTUNPACK32, v2si_ftype_df_si);
11618
11619 if (TARGET_ARCH64)
11620 {
11621 tree di_ftype_v8qi_v8qi_si = build_function_type_list (intDI_type_node,
11622 v8qi, v8qi,
11623 intSI_type_node, 0);
11624 tree di_ftype_v4hi_v4hi_si = build_function_type_list (intDI_type_node,
11625 v4hi, v4hi,
11626 intSI_type_node, 0);
11627 tree di_ftype_v2si_v2si_si = build_function_type_list (intDI_type_node,
11628 v2si, v2si,
11629 intSI_type_node, 0);
11630
11631 def_builtin_const ("__builtin_vis_fpcmple8shl", CODE_FOR_fpcmple8dishl,
11632 SPARC_BUILTIN_FPCMPLE8SHL, di_ftype_v8qi_v8qi_si);
11633 def_builtin_const ("__builtin_vis_fpcmpgt8shl", CODE_FOR_fpcmpgt8dishl,
11634 SPARC_BUILTIN_FPCMPGT8SHL, di_ftype_v8qi_v8qi_si);
11635 def_builtin_const ("__builtin_vis_fpcmpeq8shl", CODE_FOR_fpcmpeq8dishl,
11636 SPARC_BUILTIN_FPCMPEQ8SHL, di_ftype_v8qi_v8qi_si);
11637 def_builtin_const ("__builtin_vis_fpcmpne8shl", CODE_FOR_fpcmpne8dishl,
11638 SPARC_BUILTIN_FPCMPNE8SHL, di_ftype_v8qi_v8qi_si);
11639
11640 def_builtin_const ("__builtin_vis_fpcmple16shl", CODE_FOR_fpcmple16dishl,
11641 SPARC_BUILTIN_FPCMPLE16SHL, di_ftype_v4hi_v4hi_si);
11642 def_builtin_const ("__builtin_vis_fpcmpgt16shl", CODE_FOR_fpcmpgt16dishl,
11643 SPARC_BUILTIN_FPCMPGT16SHL, di_ftype_v4hi_v4hi_si);
11644 def_builtin_const ("__builtin_vis_fpcmpeq16shl", CODE_FOR_fpcmpeq16dishl,
11645 SPARC_BUILTIN_FPCMPEQ16SHL, di_ftype_v4hi_v4hi_si);
11646 def_builtin_const ("__builtin_vis_fpcmpne16shl", CODE_FOR_fpcmpne16dishl,
11647 SPARC_BUILTIN_FPCMPNE16SHL, di_ftype_v4hi_v4hi_si);
11648
11649 def_builtin_const ("__builtin_vis_fpcmple32shl", CODE_FOR_fpcmple32dishl,
11650 SPARC_BUILTIN_FPCMPLE32SHL, di_ftype_v2si_v2si_si);
11651 def_builtin_const ("__builtin_vis_fpcmpgt32shl", CODE_FOR_fpcmpgt32dishl,
11652 SPARC_BUILTIN_FPCMPGT32SHL, di_ftype_v2si_v2si_si);
11653 def_builtin_const ("__builtin_vis_fpcmpeq32shl", CODE_FOR_fpcmpeq32dishl,
11654 SPARC_BUILTIN_FPCMPEQ32SHL, di_ftype_v2si_v2si_si);
11655 def_builtin_const ("__builtin_vis_fpcmpne32shl", CODE_FOR_fpcmpne32dishl,
11656 SPARC_BUILTIN_FPCMPNE32SHL, di_ftype_v2si_v2si_si);
11657
11658
11659 def_builtin_const ("__builtin_vis_fpcmpule8shl", CODE_FOR_fpcmpule8dishl,
11660 SPARC_BUILTIN_FPCMPULE8SHL, di_ftype_v8qi_v8qi_si);
11661 def_builtin_const ("__builtin_vis_fpcmpugt8shl", CODE_FOR_fpcmpugt8dishl,
11662 SPARC_BUILTIN_FPCMPUGT8SHL, di_ftype_v8qi_v8qi_si);
11663
11664 def_builtin_const ("__builtin_vis_fpcmpule16shl", CODE_FOR_fpcmpule16dishl,
11665 SPARC_BUILTIN_FPCMPULE16SHL, di_ftype_v4hi_v4hi_si);
11666 def_builtin_const ("__builtin_vis_fpcmpugt16shl", CODE_FOR_fpcmpugt16dishl,
11667 SPARC_BUILTIN_FPCMPUGT16SHL, di_ftype_v4hi_v4hi_si);
11668
11669 def_builtin_const ("__builtin_vis_fpcmpule32shl", CODE_FOR_fpcmpule32dishl,
11670 SPARC_BUILTIN_FPCMPULE32SHL, di_ftype_v2si_v2si_si);
11671 def_builtin_const ("__builtin_vis_fpcmpugt32shl", CODE_FOR_fpcmpugt32dishl,
11672 SPARC_BUILTIN_FPCMPUGT32SHL, di_ftype_v2si_v2si_si);
11673
11674 def_builtin_const ("__builtin_vis_fpcmpde8shl", CODE_FOR_fpcmpde8dishl,
11675 SPARC_BUILTIN_FPCMPDE8SHL, di_ftype_v8qi_v8qi_si);
11676 def_builtin_const ("__builtin_vis_fpcmpde16shl", CODE_FOR_fpcmpde16dishl,
11677 SPARC_BUILTIN_FPCMPDE16SHL, di_ftype_v4hi_v4hi_si);
11678 def_builtin_const ("__builtin_vis_fpcmpde32shl", CODE_FOR_fpcmpde32dishl,
11679 SPARC_BUILTIN_FPCMPDE32SHL, di_ftype_v2si_v2si_si);
11680
11681 def_builtin_const ("__builtin_vis_fpcmpur8shl", CODE_FOR_fpcmpur8dishl,
11682 SPARC_BUILTIN_FPCMPUR8SHL, di_ftype_v8qi_v8qi_si);
11683 def_builtin_const ("__builtin_vis_fpcmpur16shl", CODE_FOR_fpcmpur16dishl,
11684 SPARC_BUILTIN_FPCMPUR16SHL, di_ftype_v4hi_v4hi_si);
11685 def_builtin_const ("__builtin_vis_fpcmpur32shl", CODE_FOR_fpcmpur32dishl,
11686 SPARC_BUILTIN_FPCMPUR32SHL, di_ftype_v2si_v2si_si);
11687
11688 }
11689 else
11690 {
11691 tree si_ftype_v8qi_v8qi_si = build_function_type_list (intSI_type_node,
11692 v8qi, v8qi,
11693 intSI_type_node, 0);
11694 tree si_ftype_v4hi_v4hi_si = build_function_type_list (intSI_type_node,
11695 v4hi, v4hi,
11696 intSI_type_node, 0);
11697 tree si_ftype_v2si_v2si_si = build_function_type_list (intSI_type_node,
11698 v2si, v2si,
11699 intSI_type_node, 0);
11700
11701 def_builtin_const ("__builtin_vis_fpcmple8shl", CODE_FOR_fpcmple8sishl,
11702 SPARC_BUILTIN_FPCMPLE8SHL, si_ftype_v8qi_v8qi_si);
11703 def_builtin_const ("__builtin_vis_fpcmpgt8shl", CODE_FOR_fpcmpgt8sishl,
11704 SPARC_BUILTIN_FPCMPGT8SHL, si_ftype_v8qi_v8qi_si);
11705 def_builtin_const ("__builtin_vis_fpcmpeq8shl", CODE_FOR_fpcmpeq8sishl,
11706 SPARC_BUILTIN_FPCMPEQ8SHL, si_ftype_v8qi_v8qi_si);
11707 def_builtin_const ("__builtin_vis_fpcmpne8shl", CODE_FOR_fpcmpne8sishl,
11708 SPARC_BUILTIN_FPCMPNE8SHL, si_ftype_v8qi_v8qi_si);
11709
11710 def_builtin_const ("__builtin_vis_fpcmple16shl", CODE_FOR_fpcmple16sishl,
11711 SPARC_BUILTIN_FPCMPLE16SHL, si_ftype_v4hi_v4hi_si);
11712 def_builtin_const ("__builtin_vis_fpcmpgt16shl", CODE_FOR_fpcmpgt16sishl,
11713 SPARC_BUILTIN_FPCMPGT16SHL, si_ftype_v4hi_v4hi_si);
11714 def_builtin_const ("__builtin_vis_fpcmpeq16shl", CODE_FOR_fpcmpeq16sishl,
11715 SPARC_BUILTIN_FPCMPEQ16SHL, si_ftype_v4hi_v4hi_si);
11716 def_builtin_const ("__builtin_vis_fpcmpne16shl", CODE_FOR_fpcmpne16sishl,
11717 SPARC_BUILTIN_FPCMPNE16SHL, si_ftype_v4hi_v4hi_si);
11718
11719 def_builtin_const ("__builtin_vis_fpcmple32shl", CODE_FOR_fpcmple32sishl,
11720 SPARC_BUILTIN_FPCMPLE32SHL, si_ftype_v2si_v2si_si);
11721 def_builtin_const ("__builtin_vis_fpcmpgt32shl", CODE_FOR_fpcmpgt32sishl,
11722 SPARC_BUILTIN_FPCMPGT32SHL, si_ftype_v2si_v2si_si);
11723 def_builtin_const ("__builtin_vis_fpcmpeq32shl", CODE_FOR_fpcmpeq32sishl,
11724 SPARC_BUILTIN_FPCMPEQ32SHL, si_ftype_v2si_v2si_si);
11725 def_builtin_const ("__builtin_vis_fpcmpne32shl", CODE_FOR_fpcmpne32sishl,
11726 SPARC_BUILTIN_FPCMPNE32SHL, si_ftype_v2si_v2si_si);
11727
11728
11729 def_builtin_const ("__builtin_vis_fpcmpule8shl", CODE_FOR_fpcmpule8sishl,
11730 SPARC_BUILTIN_FPCMPULE8SHL, si_ftype_v8qi_v8qi_si);
11731 def_builtin_const ("__builtin_vis_fpcmpugt8shl", CODE_FOR_fpcmpugt8sishl,
11732 SPARC_BUILTIN_FPCMPUGT8SHL, si_ftype_v8qi_v8qi_si);
11733
11734 def_builtin_const ("__builtin_vis_fpcmpule16shl", CODE_FOR_fpcmpule16sishl,
11735 SPARC_BUILTIN_FPCMPULE16SHL, si_ftype_v4hi_v4hi_si);
11736 def_builtin_const ("__builtin_vis_fpcmpugt16shl", CODE_FOR_fpcmpugt16sishl,
11737 SPARC_BUILTIN_FPCMPUGT16SHL, si_ftype_v4hi_v4hi_si);
11738
11739 def_builtin_const ("__builtin_vis_fpcmpule32shl", CODE_FOR_fpcmpule32sishl,
11740 SPARC_BUILTIN_FPCMPULE32SHL, si_ftype_v2si_v2si_si);
11741 def_builtin_const ("__builtin_vis_fpcmpugt32shl", CODE_FOR_fpcmpugt32sishl,
11742 SPARC_BUILTIN_FPCMPUGT32SHL, si_ftype_v2si_v2si_si);
11743
11744 def_builtin_const ("__builtin_vis_fpcmpde8shl", CODE_FOR_fpcmpde8sishl,
11745 SPARC_BUILTIN_FPCMPDE8SHL, si_ftype_v8qi_v8qi_si);
11746 def_builtin_const ("__builtin_vis_fpcmpde16shl", CODE_FOR_fpcmpde16sishl,
11747 SPARC_BUILTIN_FPCMPDE16SHL, si_ftype_v4hi_v4hi_si);
11748 def_builtin_const ("__builtin_vis_fpcmpde32shl", CODE_FOR_fpcmpde32sishl,
11749 SPARC_BUILTIN_FPCMPDE32SHL, si_ftype_v2si_v2si_si);
11750
11751 def_builtin_const ("__builtin_vis_fpcmpur8shl", CODE_FOR_fpcmpur8sishl,
11752 SPARC_BUILTIN_FPCMPUR8SHL, si_ftype_v8qi_v8qi_si);
11753 def_builtin_const ("__builtin_vis_fpcmpur16shl", CODE_FOR_fpcmpur16sishl,
11754 SPARC_BUILTIN_FPCMPUR16SHL, si_ftype_v4hi_v4hi_si);
11755 def_builtin_const ("__builtin_vis_fpcmpur32shl", CODE_FOR_fpcmpur32sishl,
11756 SPARC_BUILTIN_FPCMPUR32SHL, si_ftype_v2si_v2si_si);
11757 }
11758 }
11759 }
11760
11761 /* Implement TARGET_BUILTIN_DECL hook. */
11762
11763 static tree
sparc_builtin_decl(unsigned code,bool initialize_p ATTRIBUTE_UNUSED)11764 sparc_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
11765 {
11766 if (code >= SPARC_BUILTIN_MAX)
11767 return error_mark_node;
11768
11769 return sparc_builtins[code];
11770 }
11771
11772 /* Implemented TARGET_EXPAND_BUILTIN hook. */
11773
11774 static rtx
sparc_expand_builtin(tree exp,rtx target,rtx subtarget ATTRIBUTE_UNUSED,machine_mode tmode ATTRIBUTE_UNUSED,int ignore ATTRIBUTE_UNUSED)11775 sparc_expand_builtin (tree exp, rtx target,
11776 rtx subtarget ATTRIBUTE_UNUSED,
11777 machine_mode tmode ATTRIBUTE_UNUSED,
11778 int ignore ATTRIBUTE_UNUSED)
11779 {
11780 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
11781 enum sparc_builtins code
11782 = (enum sparc_builtins) DECL_MD_FUNCTION_CODE (fndecl);
11783 enum insn_code icode = sparc_builtins_icode[code];
11784 bool nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
11785 call_expr_arg_iterator iter;
11786 int arg_count = 0;
11787 rtx pat, op[4];
11788 tree arg;
11789
11790 if (nonvoid)
11791 {
11792 machine_mode tmode = insn_data[icode].operand[0].mode;
11793 if (!target
11794 || GET_MODE (target) != tmode
11795 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11796 op[0] = gen_reg_rtx (tmode);
11797 else
11798 op[0] = target;
11799 }
11800 else
11801 op[0] = NULL_RTX;
11802
11803 FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
11804 {
11805 const struct insn_operand_data *insn_op;
11806 int idx;
11807
11808 if (arg == error_mark_node)
11809 return NULL_RTX;
11810
11811 arg_count++;
11812 idx = arg_count - !nonvoid;
11813 insn_op = &insn_data[icode].operand[idx];
11814 op[arg_count] = expand_normal (arg);
11815
11816 /* Some of the builtins require constant arguments. We check
11817 for this here. */
11818 if ((code >= SPARC_BUILTIN_FIRST_FPCMPSHL
11819 && code <= SPARC_BUILTIN_LAST_FPCMPSHL
11820 && arg_count == 3)
11821 || (code >= SPARC_BUILTIN_FIRST_DICTUNPACK
11822 && code <= SPARC_BUILTIN_LAST_DICTUNPACK
11823 && arg_count == 2))
11824 {
11825 if (!check_constant_argument (icode, idx, op[arg_count]))
11826 return const0_rtx;
11827 }
11828
11829 if (code == SPARC_BUILTIN_LDFSR || code == SPARC_BUILTIN_STFSR)
11830 {
11831 if (!address_operand (op[arg_count], SImode))
11832 {
11833 op[arg_count] = convert_memory_address (Pmode, op[arg_count]);
11834 op[arg_count] = copy_addr_to_reg (op[arg_count]);
11835 }
11836 op[arg_count] = gen_rtx_MEM (SImode, op[arg_count]);
11837 }
11838
11839 else if (insn_op->mode == V1DImode
11840 && GET_MODE (op[arg_count]) == DImode)
11841 op[arg_count] = gen_lowpart (V1DImode, op[arg_count]);
11842
11843 else if (insn_op->mode == V1SImode
11844 && GET_MODE (op[arg_count]) == SImode)
11845 op[arg_count] = gen_lowpart (V1SImode, op[arg_count]);
11846
11847 if (! (*insn_data[icode].operand[idx].predicate) (op[arg_count],
11848 insn_op->mode))
11849 op[arg_count] = copy_to_mode_reg (insn_op->mode, op[arg_count]);
11850 }
11851
11852 switch (arg_count)
11853 {
11854 case 0:
11855 pat = GEN_FCN (icode) (op[0]);
11856 break;
11857 case 1:
11858 if (nonvoid)
11859 pat = GEN_FCN (icode) (op[0], op[1]);
11860 else
11861 pat = GEN_FCN (icode) (op[1]);
11862 break;
11863 case 2:
11864 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
11865 break;
11866 case 3:
11867 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
11868 break;
11869 default:
11870 gcc_unreachable ();
11871 }
11872
11873 if (!pat)
11874 return NULL_RTX;
11875
11876 emit_insn (pat);
11877
11878 return (nonvoid ? op[0] : const0_rtx);
11879 }
11880
11881 /* Return the upper 16 bits of the 8x16 multiplication. */
11882
11883 static int
sparc_vis_mul8x16(int e8,int e16)11884 sparc_vis_mul8x16 (int e8, int e16)
11885 {
11886 return (e8 * e16 + 128) / 256;
11887 }
11888
11889 /* Multiply the VECTOR_CSTs CST0 and CST1 as specified by FNCODE and put
11890 the result into the array N_ELTS, whose elements are of INNER_TYPE. */
11891
11892 static void
sparc_handle_vis_mul8x16(vec<tree> * n_elts,enum sparc_builtins fncode,tree inner_type,tree cst0,tree cst1)11893 sparc_handle_vis_mul8x16 (vec<tree> *n_elts, enum sparc_builtins fncode,
11894 tree inner_type, tree cst0, tree cst1)
11895 {
11896 unsigned i, num = VECTOR_CST_NELTS (cst0);
11897 int scale;
11898
11899 switch (fncode)
11900 {
11901 case SPARC_BUILTIN_FMUL8X16:
11902 for (i = 0; i < num; ++i)
11903 {
11904 int val
11905 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
11906 TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, i)));
11907 n_elts->quick_push (build_int_cst (inner_type, val));
11908 }
11909 break;
11910
11911 case SPARC_BUILTIN_FMUL8X16AU:
11912 scale = TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, 0));
11913
11914 for (i = 0; i < num; ++i)
11915 {
11916 int val
11917 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
11918 scale);
11919 n_elts->quick_push (build_int_cst (inner_type, val));
11920 }
11921 break;
11922
11923 case SPARC_BUILTIN_FMUL8X16AL:
11924 scale = TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, 1));
11925
11926 for (i = 0; i < num; ++i)
11927 {
11928 int val
11929 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
11930 scale);
11931 n_elts->quick_push (build_int_cst (inner_type, val));
11932 }
11933 break;
11934
11935 default:
11936 gcc_unreachable ();
11937 }
11938 }
11939
11940 /* Implement TARGET_FOLD_BUILTIN hook.
11941
11942 Fold builtin functions for SPARC intrinsics. If IGNORE is true the
11943 result of the function call is ignored. NULL_TREE is returned if the
11944 function could not be folded. */
11945
11946 static tree
sparc_fold_builtin(tree fndecl,int n_args ATTRIBUTE_UNUSED,tree * args,bool ignore)11947 sparc_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED,
11948 tree *args, bool ignore)
11949 {
11950 enum sparc_builtins code
11951 = (enum sparc_builtins) DECL_MD_FUNCTION_CODE (fndecl);
11952 tree rtype = TREE_TYPE (TREE_TYPE (fndecl));
11953 tree arg0, arg1, arg2;
11954
11955 if (ignore)
11956 switch (code)
11957 {
11958 case SPARC_BUILTIN_LDFSR:
11959 case SPARC_BUILTIN_STFSR:
11960 case SPARC_BUILTIN_ALIGNADDR:
11961 case SPARC_BUILTIN_WRGSR:
11962 case SPARC_BUILTIN_BMASK:
11963 case SPARC_BUILTIN_CMASK8:
11964 case SPARC_BUILTIN_CMASK16:
11965 case SPARC_BUILTIN_CMASK32:
11966 break;
11967
11968 default:
11969 return build_zero_cst (rtype);
11970 }
11971
11972 switch (code)
11973 {
11974 case SPARC_BUILTIN_FEXPAND:
11975 arg0 = args[0];
11976 STRIP_NOPS (arg0);
11977
11978 if (TREE_CODE (arg0) == VECTOR_CST)
11979 {
11980 tree inner_type = TREE_TYPE (rtype);
11981 unsigned i;
11982
11983 tree_vector_builder n_elts (rtype, VECTOR_CST_NELTS (arg0), 1);
11984 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
11985 {
11986 unsigned HOST_WIDE_INT val
11987 = TREE_INT_CST_LOW (VECTOR_CST_ELT (arg0, i));
11988 n_elts.quick_push (build_int_cst (inner_type, val << 4));
11989 }
11990 return n_elts.build ();
11991 }
11992 break;
11993
11994 case SPARC_BUILTIN_FMUL8X16:
11995 case SPARC_BUILTIN_FMUL8X16AU:
11996 case SPARC_BUILTIN_FMUL8X16AL:
11997 arg0 = args[0];
11998 arg1 = args[1];
11999 STRIP_NOPS (arg0);
12000 STRIP_NOPS (arg1);
12001
12002 if (TREE_CODE (arg0) == VECTOR_CST && TREE_CODE (arg1) == VECTOR_CST)
12003 {
12004 tree inner_type = TREE_TYPE (rtype);
12005 tree_vector_builder n_elts (rtype, VECTOR_CST_NELTS (arg0), 1);
12006 sparc_handle_vis_mul8x16 (&n_elts, code, inner_type, arg0, arg1);
12007 return n_elts.build ();
12008 }
12009 break;
12010
12011 case SPARC_BUILTIN_FPMERGE:
12012 arg0 = args[0];
12013 arg1 = args[1];
12014 STRIP_NOPS (arg0);
12015 STRIP_NOPS (arg1);
12016
12017 if (TREE_CODE (arg0) == VECTOR_CST && TREE_CODE (arg1) == VECTOR_CST)
12018 {
12019 tree_vector_builder n_elts (rtype, 2 * VECTOR_CST_NELTS (arg0), 1);
12020 unsigned i;
12021 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
12022 {
12023 n_elts.quick_push (VECTOR_CST_ELT (arg0, i));
12024 n_elts.quick_push (VECTOR_CST_ELT (arg1, i));
12025 }
12026
12027 return n_elts.build ();
12028 }
12029 break;
12030
12031 case SPARC_BUILTIN_PDIST:
12032 case SPARC_BUILTIN_PDISTN:
12033 arg0 = args[0];
12034 arg1 = args[1];
12035 STRIP_NOPS (arg0);
12036 STRIP_NOPS (arg1);
12037 if (code == SPARC_BUILTIN_PDIST)
12038 {
12039 arg2 = args[2];
12040 STRIP_NOPS (arg2);
12041 }
12042 else
12043 arg2 = integer_zero_node;
12044
12045 if (TREE_CODE (arg0) == VECTOR_CST
12046 && TREE_CODE (arg1) == VECTOR_CST
12047 && TREE_CODE (arg2) == INTEGER_CST)
12048 {
12049 bool overflow = false;
12050 widest_int result = wi::to_widest (arg2);
12051 widest_int tmp;
12052 unsigned i;
12053
12054 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
12055 {
12056 tree e0 = VECTOR_CST_ELT (arg0, i);
12057 tree e1 = VECTOR_CST_ELT (arg1, i);
12058
12059 wi::overflow_type neg1_ovf, neg2_ovf, add1_ovf, add2_ovf;
12060
12061 tmp = wi::neg (wi::to_widest (e1), &neg1_ovf);
12062 tmp = wi::add (wi::to_widest (e0), tmp, SIGNED, &add1_ovf);
12063 if (wi::neg_p (tmp))
12064 tmp = wi::neg (tmp, &neg2_ovf);
12065 else
12066 neg2_ovf = wi::OVF_NONE;
12067 result = wi::add (result, tmp, SIGNED, &add2_ovf);
12068 overflow |= ((neg1_ovf != wi::OVF_NONE)
12069 | (neg2_ovf != wi::OVF_NONE)
12070 | (add1_ovf != wi::OVF_NONE)
12071 | (add2_ovf != wi::OVF_NONE));
12072 }
12073
12074 gcc_assert (!overflow);
12075
12076 return wide_int_to_tree (rtype, result);
12077 }
12078
12079 default:
12080 break;
12081 }
12082
12083 return NULL_TREE;
12084 }
12085
12086 /* ??? This duplicates information provided to the compiler by the
12087 ??? scheduler description. Some day, teach genautomata to output
12088 ??? the latencies and then CSE will just use that. */
12089
12090 static bool
sparc_rtx_costs(rtx x,machine_mode mode,int outer_code,int opno ATTRIBUTE_UNUSED,int * total,bool speed ATTRIBUTE_UNUSED)12091 sparc_rtx_costs (rtx x, machine_mode mode, int outer_code,
12092 int opno ATTRIBUTE_UNUSED,
12093 int *total, bool speed ATTRIBUTE_UNUSED)
12094 {
12095 int code = GET_CODE (x);
12096 bool float_mode_p = FLOAT_MODE_P (mode);
12097
12098 switch (code)
12099 {
12100 case CONST_INT:
12101 if (SMALL_INT (x))
12102 *total = 0;
12103 else
12104 *total = 2;
12105 return true;
12106
12107 case CONST_WIDE_INT:
12108 *total = 0;
12109 if (!SPARC_SIMM13_P (CONST_WIDE_INT_ELT (x, 0)))
12110 *total += 2;
12111 if (!SPARC_SIMM13_P (CONST_WIDE_INT_ELT (x, 1)))
12112 *total += 2;
12113 return true;
12114
12115 case HIGH:
12116 *total = 2;
12117 return true;
12118
12119 case CONST:
12120 case LABEL_REF:
12121 case SYMBOL_REF:
12122 *total = 4;
12123 return true;
12124
12125 case CONST_DOUBLE:
12126 *total = 8;
12127 return true;
12128
12129 case MEM:
12130 /* If outer-code was a sign or zero extension, a cost
12131 of COSTS_N_INSNS (1) was already added in. This is
12132 why we are subtracting it back out. */
12133 if (outer_code == ZERO_EXTEND)
12134 {
12135 *total = sparc_costs->int_zload - COSTS_N_INSNS (1);
12136 }
12137 else if (outer_code == SIGN_EXTEND)
12138 {
12139 *total = sparc_costs->int_sload - COSTS_N_INSNS (1);
12140 }
12141 else if (float_mode_p)
12142 {
12143 *total = sparc_costs->float_load;
12144 }
12145 else
12146 {
12147 *total = sparc_costs->int_load;
12148 }
12149
12150 return true;
12151
12152 case PLUS:
12153 case MINUS:
12154 if (float_mode_p)
12155 *total = sparc_costs->float_plusminus;
12156 else
12157 *total = COSTS_N_INSNS (1);
12158 return false;
12159
12160 case FMA:
12161 {
12162 rtx sub;
12163
12164 gcc_assert (float_mode_p);
12165 *total = sparc_costs->float_mul;
12166
12167 sub = XEXP (x, 0);
12168 if (GET_CODE (sub) == NEG)
12169 sub = XEXP (sub, 0);
12170 *total += rtx_cost (sub, mode, FMA, 0, speed);
12171
12172 sub = XEXP (x, 2);
12173 if (GET_CODE (sub) == NEG)
12174 sub = XEXP (sub, 0);
12175 *total += rtx_cost (sub, mode, FMA, 2, speed);
12176 return true;
12177 }
12178
12179 case MULT:
12180 if (float_mode_p)
12181 *total = sparc_costs->float_mul;
12182 else if (TARGET_ARCH32 && !TARGET_HARD_MUL)
12183 *total = COSTS_N_INSNS (25);
12184 else
12185 {
12186 int bit_cost;
12187
12188 bit_cost = 0;
12189 if (sparc_costs->int_mul_bit_factor)
12190 {
12191 int nbits;
12192
12193 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
12194 {
12195 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
12196 for (nbits = 0; value != 0; value &= value - 1)
12197 nbits++;
12198 }
12199 else
12200 nbits = 7;
12201
12202 if (nbits < 3)
12203 nbits = 3;
12204 bit_cost = (nbits - 3) / sparc_costs->int_mul_bit_factor;
12205 bit_cost = COSTS_N_INSNS (bit_cost);
12206 }
12207
12208 if (mode == DImode || !TARGET_HARD_MUL)
12209 *total = sparc_costs->int_mulX + bit_cost;
12210 else
12211 *total = sparc_costs->int_mul + bit_cost;
12212 }
12213 return false;
12214
12215 case ASHIFT:
12216 case ASHIFTRT:
12217 case LSHIFTRT:
12218 *total = COSTS_N_INSNS (1) + sparc_costs->shift_penalty;
12219 return false;
12220
12221 case DIV:
12222 case UDIV:
12223 case MOD:
12224 case UMOD:
12225 if (float_mode_p)
12226 {
12227 if (mode == DFmode)
12228 *total = sparc_costs->float_div_df;
12229 else
12230 *total = sparc_costs->float_div_sf;
12231 }
12232 else
12233 {
12234 if (mode == DImode)
12235 *total = sparc_costs->int_divX;
12236 else
12237 *total = sparc_costs->int_div;
12238 }
12239 return false;
12240
12241 case NEG:
12242 if (! float_mode_p)
12243 {
12244 *total = COSTS_N_INSNS (1);
12245 return false;
12246 }
12247 /* FALLTHRU */
12248
12249 case ABS:
12250 case FLOAT:
12251 case UNSIGNED_FLOAT:
12252 case FIX:
12253 case UNSIGNED_FIX:
12254 case FLOAT_EXTEND:
12255 case FLOAT_TRUNCATE:
12256 *total = sparc_costs->float_move;
12257 return false;
12258
12259 case SQRT:
12260 if (mode == DFmode)
12261 *total = sparc_costs->float_sqrt_df;
12262 else
12263 *total = sparc_costs->float_sqrt_sf;
12264 return false;
12265
12266 case COMPARE:
12267 if (float_mode_p)
12268 *total = sparc_costs->float_cmp;
12269 else
12270 *total = COSTS_N_INSNS (1);
12271 return false;
12272
12273 case IF_THEN_ELSE:
12274 if (float_mode_p)
12275 *total = sparc_costs->float_cmove;
12276 else
12277 *total = sparc_costs->int_cmove;
12278 return false;
12279
12280 case IOR:
12281 /* Handle the NAND vector patterns. */
12282 if (sparc_vector_mode_supported_p (mode)
12283 && GET_CODE (XEXP (x, 0)) == NOT
12284 && GET_CODE (XEXP (x, 1)) == NOT)
12285 {
12286 *total = COSTS_N_INSNS (1);
12287 return true;
12288 }
12289 else
12290 return false;
12291
12292 default:
12293 return false;
12294 }
12295 }
12296
12297 /* Return true if CLASS is either GENERAL_REGS or I64_REGS. */
12298
12299 static inline bool
general_or_i64_p(reg_class_t rclass)12300 general_or_i64_p (reg_class_t rclass)
12301 {
12302 return (rclass == GENERAL_REGS || rclass == I64_REGS);
12303 }
12304
12305 /* Implement TARGET_REGISTER_MOVE_COST. */
12306
12307 static int
sparc_register_move_cost(machine_mode mode ATTRIBUTE_UNUSED,reg_class_t from,reg_class_t to)12308 sparc_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
12309 reg_class_t from, reg_class_t to)
12310 {
12311 bool need_memory = false;
12312
12313 /* This helps postreload CSE to eliminate redundant comparisons. */
12314 if (from == NO_REGS || to == NO_REGS)
12315 return 100;
12316
12317 if (from == FPCC_REGS || to == FPCC_REGS)
12318 need_memory = true;
12319 else if ((FP_REG_CLASS_P (from) && general_or_i64_p (to))
12320 || (general_or_i64_p (from) && FP_REG_CLASS_P (to)))
12321 {
12322 if (TARGET_VIS3)
12323 {
12324 int size = GET_MODE_SIZE (mode);
12325 if (size == 8 || size == 4)
12326 {
12327 if (! TARGET_ARCH32 || size == 4)
12328 return 4;
12329 else
12330 return 6;
12331 }
12332 }
12333 need_memory = true;
12334 }
12335
12336 if (need_memory)
12337 {
12338 if (sparc_cpu == PROCESSOR_ULTRASPARC
12339 || sparc_cpu == PROCESSOR_ULTRASPARC3
12340 || sparc_cpu == PROCESSOR_NIAGARA
12341 || sparc_cpu == PROCESSOR_NIAGARA2
12342 || sparc_cpu == PROCESSOR_NIAGARA3
12343 || sparc_cpu == PROCESSOR_NIAGARA4
12344 || sparc_cpu == PROCESSOR_NIAGARA7
12345 || sparc_cpu == PROCESSOR_M8)
12346 return 12;
12347
12348 return 6;
12349 }
12350
12351 return 2;
12352 }
12353
12354 /* Emit the sequence of insns SEQ while preserving the registers REG and REG2.
12355 This is achieved by means of a manual dynamic stack space allocation in
12356 the current frame. We make the assumption that SEQ doesn't contain any
12357 function calls, with the possible exception of calls to the GOT helper. */
12358
12359 static void
emit_and_preserve(rtx seq,rtx reg,rtx reg2)12360 emit_and_preserve (rtx seq, rtx reg, rtx reg2)
12361 {
12362 /* We must preserve the lowest 16 words for the register save area. */
12363 HOST_WIDE_INT offset = 16*UNITS_PER_WORD;
12364 /* We really need only 2 words of fresh stack space. */
12365 HOST_WIDE_INT size = SPARC_STACK_ALIGN (offset + 2*UNITS_PER_WORD);
12366
12367 rtx slot
12368 = gen_rtx_MEM (word_mode, plus_constant (Pmode, stack_pointer_rtx,
12369 SPARC_STACK_BIAS + offset));
12370
12371 emit_insn (gen_stack_pointer_inc (GEN_INT (-size)));
12372 emit_insn (gen_rtx_SET (slot, reg));
12373 if (reg2)
12374 emit_insn (gen_rtx_SET (adjust_address (slot, word_mode, UNITS_PER_WORD),
12375 reg2));
12376 emit_insn (seq);
12377 if (reg2)
12378 emit_insn (gen_rtx_SET (reg2,
12379 adjust_address (slot, word_mode, UNITS_PER_WORD)));
12380 emit_insn (gen_rtx_SET (reg, slot));
12381 emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
12382 }
12383
12384 /* Output the assembler code for a thunk function. THUNK_DECL is the
12385 declaration for the thunk function itself, FUNCTION is the decl for
12386 the target function. DELTA is an immediate constant offset to be
12387 added to THIS. If VCALL_OFFSET is nonzero, the word at address
12388 (*THIS + VCALL_OFFSET) should be additionally added to THIS. */
12389
12390 static void
sparc_output_mi_thunk(FILE * file,tree thunk_fndecl ATTRIBUTE_UNUSED,HOST_WIDE_INT delta,HOST_WIDE_INT vcall_offset,tree function)12391 sparc_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
12392 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
12393 tree function)
12394 {
12395 const char *fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk_fndecl));
12396 rtx this_rtx, funexp;
12397 rtx_insn *insn;
12398 unsigned int int_arg_first;
12399
12400 reload_completed = 1;
12401 epilogue_completed = 1;
12402
12403 emit_note (NOTE_INSN_PROLOGUE_END);
12404
12405 if (TARGET_FLAT)
12406 {
12407 sparc_leaf_function_p = 1;
12408
12409 int_arg_first = SPARC_OUTGOING_INT_ARG_FIRST;
12410 }
12411 else if (flag_delayed_branch)
12412 {
12413 /* We will emit a regular sibcall below, so we need to instruct
12414 output_sibcall that we are in a leaf function. */
12415 sparc_leaf_function_p = crtl->uses_only_leaf_regs = 1;
12416
12417 /* This will cause final.c to invoke leaf_renumber_regs so we
12418 must behave as if we were in a not-yet-leafified function. */
12419 int_arg_first = SPARC_INCOMING_INT_ARG_FIRST;
12420 }
12421 else
12422 {
12423 /* We will emit the sibcall manually below, so we will need to
12424 manually spill non-leaf registers. */
12425 sparc_leaf_function_p = crtl->uses_only_leaf_regs = 0;
12426
12427 /* We really are in a leaf function. */
12428 int_arg_first = SPARC_OUTGOING_INT_ARG_FIRST;
12429 }
12430
12431 /* Find the "this" pointer. Normally in %o0, but in ARCH64 if the function
12432 returns a structure, the structure return pointer is there instead. */
12433 if (TARGET_ARCH64
12434 && aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
12435 this_rtx = gen_rtx_REG (Pmode, int_arg_first + 1);
12436 else
12437 this_rtx = gen_rtx_REG (Pmode, int_arg_first);
12438
12439 /* Add DELTA. When possible use a plain add, otherwise load it into
12440 a register first. */
12441 if (delta)
12442 {
12443 rtx delta_rtx = GEN_INT (delta);
12444
12445 if (! SPARC_SIMM13_P (delta))
12446 {
12447 rtx scratch = gen_rtx_REG (Pmode, 1);
12448 emit_move_insn (scratch, delta_rtx);
12449 delta_rtx = scratch;
12450 }
12451
12452 /* THIS_RTX += DELTA. */
12453 emit_insn (gen_add2_insn (this_rtx, delta_rtx));
12454 }
12455
12456 /* Add the word at address (*THIS_RTX + VCALL_OFFSET). */
12457 if (vcall_offset)
12458 {
12459 rtx vcall_offset_rtx = GEN_INT (vcall_offset);
12460 rtx scratch = gen_rtx_REG (Pmode, 1);
12461
12462 gcc_assert (vcall_offset < 0);
12463
12464 /* SCRATCH = *THIS_RTX. */
12465 emit_move_insn (scratch, gen_rtx_MEM (Pmode, this_rtx));
12466
12467 /* Prepare for adding VCALL_OFFSET. The difficulty is that we
12468 may not have any available scratch register at this point. */
12469 if (SPARC_SIMM13_P (vcall_offset))
12470 ;
12471 /* This is the case if ARCH64 (unless -ffixed-g5 is passed). */
12472 else if (! fixed_regs[5]
12473 /* The below sequence is made up of at least 2 insns,
12474 while the default method may need only one. */
12475 && vcall_offset < -8192)
12476 {
12477 rtx scratch2 = gen_rtx_REG (Pmode, 5);
12478 emit_move_insn (scratch2, vcall_offset_rtx);
12479 vcall_offset_rtx = scratch2;
12480 }
12481 else
12482 {
12483 rtx increment = GEN_INT (-4096);
12484
12485 /* VCALL_OFFSET is a negative number whose typical range can be
12486 estimated as -32768..0 in 32-bit mode. In almost all cases
12487 it is therefore cheaper to emit multiple add insns than
12488 spilling and loading the constant into a register (at least
12489 6 insns). */
12490 while (! SPARC_SIMM13_P (vcall_offset))
12491 {
12492 emit_insn (gen_add2_insn (scratch, increment));
12493 vcall_offset += 4096;
12494 }
12495 vcall_offset_rtx = GEN_INT (vcall_offset); /* cannot be 0 */
12496 }
12497
12498 /* SCRATCH = *(*THIS_RTX + VCALL_OFFSET). */
12499 emit_move_insn (scratch, gen_rtx_MEM (Pmode,
12500 gen_rtx_PLUS (Pmode,
12501 scratch,
12502 vcall_offset_rtx)));
12503
12504 /* THIS_RTX += *(*THIS_RTX + VCALL_OFFSET). */
12505 emit_insn (gen_add2_insn (this_rtx, scratch));
12506 }
12507
12508 /* Generate a tail call to the target function. */
12509 if (! TREE_USED (function))
12510 {
12511 assemble_external (function);
12512 TREE_USED (function) = 1;
12513 }
12514 funexp = XEXP (DECL_RTL (function), 0);
12515
12516 if (flag_delayed_branch)
12517 {
12518 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
12519 insn = emit_call_insn (gen_sibcall (funexp));
12520 SIBLING_CALL_P (insn) = 1;
12521 }
12522 else
12523 {
12524 /* The hoops we have to jump through in order to generate a sibcall
12525 without using delay slots... */
12526 rtx spill_reg, seq, scratch = gen_rtx_REG (Pmode, 1);
12527
12528 if (flag_pic)
12529 {
12530 spill_reg = gen_rtx_REG (word_mode, 15); /* %o7 */
12531 start_sequence ();
12532 load_got_register (); /* clobbers %o7 */
12533 if (!TARGET_VXWORKS_RTP)
12534 pic_offset_table_rtx = got_register_rtx;
12535 scratch = sparc_legitimize_pic_address (funexp, scratch);
12536 seq = get_insns ();
12537 end_sequence ();
12538 emit_and_preserve (seq, spill_reg, pic_offset_table_rtx);
12539 }
12540 else if (TARGET_ARCH32)
12541 {
12542 emit_insn (gen_rtx_SET (scratch,
12543 gen_rtx_HIGH (SImode, funexp)));
12544 emit_insn (gen_rtx_SET (scratch,
12545 gen_rtx_LO_SUM (SImode, scratch, funexp)));
12546 }
12547 else /* TARGET_ARCH64 */
12548 {
12549 switch (sparc_code_model)
12550 {
12551 case CM_MEDLOW:
12552 case CM_MEDMID:
12553 /* The destination can serve as a temporary. */
12554 sparc_emit_set_symbolic_const64 (scratch, funexp, scratch);
12555 break;
12556
12557 case CM_MEDANY:
12558 case CM_EMBMEDANY:
12559 /* The destination cannot serve as a temporary. */
12560 spill_reg = gen_rtx_REG (DImode, 15); /* %o7 */
12561 start_sequence ();
12562 sparc_emit_set_symbolic_const64 (scratch, funexp, spill_reg);
12563 seq = get_insns ();
12564 end_sequence ();
12565 emit_and_preserve (seq, spill_reg, 0);
12566 break;
12567
12568 default:
12569 gcc_unreachable ();
12570 }
12571 }
12572
12573 emit_jump_insn (gen_indirect_jump (scratch));
12574 }
12575
12576 emit_barrier ();
12577
12578 /* Run just enough of rest_of_compilation to get the insns emitted.
12579 There's not really enough bulk here to make other passes such as
12580 instruction scheduling worth while. */
12581 insn = get_insns ();
12582 shorten_branches (insn);
12583 assemble_start_function (thunk_fndecl, fnname);
12584 final_start_function (insn, file, 1);
12585 final (insn, file, 1);
12586 final_end_function ();
12587 assemble_end_function (thunk_fndecl, fnname);
12588
12589 reload_completed = 0;
12590 epilogue_completed = 0;
12591 }
12592
12593 /* Return true if sparc_output_mi_thunk would be able to output the
12594 assembler code for the thunk function specified by the arguments
12595 it is passed, and false otherwise. */
12596 static bool
sparc_can_output_mi_thunk(const_tree thunk_fndecl ATTRIBUTE_UNUSED,HOST_WIDE_INT delta ATTRIBUTE_UNUSED,HOST_WIDE_INT vcall_offset,const_tree function ATTRIBUTE_UNUSED)12597 sparc_can_output_mi_thunk (const_tree thunk_fndecl ATTRIBUTE_UNUSED,
12598 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
12599 HOST_WIDE_INT vcall_offset,
12600 const_tree function ATTRIBUTE_UNUSED)
12601 {
12602 /* Bound the loop used in the default method above. */
12603 return (vcall_offset >= -32768 || ! fixed_regs[5]);
12604 }
12605
12606 /* How to allocate a 'struct machine_function'. */
12607
12608 static struct machine_function *
sparc_init_machine_status(void)12609 sparc_init_machine_status (void)
12610 {
12611 return ggc_cleared_alloc<machine_function> ();
12612 }
12613
12614 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
12615
12616 static unsigned HOST_WIDE_INT
sparc_asan_shadow_offset(void)12617 sparc_asan_shadow_offset (void)
12618 {
12619 return TARGET_ARCH64 ? (HOST_WIDE_INT_1 << 43) : (HOST_WIDE_INT_1 << 29);
12620 }
12621
12622 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
12623 We need to emit DTP-relative relocations. */
12624
12625 static void
sparc_output_dwarf_dtprel(FILE * file,int size,rtx x)12626 sparc_output_dwarf_dtprel (FILE *file, int size, rtx x)
12627 {
12628 switch (size)
12629 {
12630 case 4:
12631 fputs ("\t.word\t%r_tls_dtpoff32(", file);
12632 break;
12633 case 8:
12634 fputs ("\t.xword\t%r_tls_dtpoff64(", file);
12635 break;
12636 default:
12637 gcc_unreachable ();
12638 }
12639 output_addr_const (file, x);
12640 fputs (")", file);
12641 }
12642
12643 /* Do whatever processing is required at the end of a file. */
12644
12645 static void
sparc_file_end(void)12646 sparc_file_end (void)
12647 {
12648 /* If we need to emit the special GOT helper function, do so now. */
12649 if (got_helper_needed)
12650 {
12651 const char *name = XSTR (got_helper_rtx, 0);
12652 #ifdef DWARF2_UNWIND_INFO
12653 bool do_cfi;
12654 #endif
12655
12656 if (USE_HIDDEN_LINKONCE)
12657 {
12658 tree decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
12659 get_identifier (name),
12660 build_function_type_list (void_type_node,
12661 NULL_TREE));
12662 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
12663 NULL_TREE, void_type_node);
12664 TREE_PUBLIC (decl) = 1;
12665 TREE_STATIC (decl) = 1;
12666 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
12667 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
12668 DECL_VISIBILITY_SPECIFIED (decl) = 1;
12669 resolve_unique_section (decl, 0, flag_function_sections);
12670 allocate_struct_function (decl, true);
12671 cfun->is_thunk = 1;
12672 current_function_decl = decl;
12673 init_varasm_status ();
12674 assemble_start_function (decl, name);
12675 }
12676 else
12677 {
12678 const int align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT);
12679 switch_to_section (text_section);
12680 if (align > 0)
12681 ASM_OUTPUT_ALIGN (asm_out_file, align);
12682 ASM_OUTPUT_LABEL (asm_out_file, name);
12683 }
12684
12685 #ifdef DWARF2_UNWIND_INFO
12686 do_cfi = dwarf2out_do_cfi_asm ();
12687 if (do_cfi)
12688 output_asm_insn (".cfi_startproc", NULL);
12689 #endif
12690 if (flag_delayed_branch)
12691 {
12692 output_asm_insn ("jmp\t%%o7+8", NULL);
12693 output_asm_insn (" add\t%%o7, %0, %0", &got_register_rtx);
12694 }
12695 else
12696 {
12697 output_asm_insn ("add\t%%o7, %0, %0", &got_register_rtx);
12698 output_asm_insn ("jmp\t%%o7+8", NULL);
12699 output_asm_insn (" nop", NULL);
12700 }
12701 #ifdef DWARF2_UNWIND_INFO
12702 if (do_cfi)
12703 output_asm_insn (".cfi_endproc", NULL);
12704 #endif
12705 }
12706
12707 if (NEED_INDICATE_EXEC_STACK)
12708 file_end_indicate_exec_stack ();
12709
12710 #ifdef TARGET_SOLARIS
12711 solaris_file_end ();
12712 #endif
12713 }
12714
12715 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
12716 /* Implement TARGET_MANGLE_TYPE. */
12717
12718 static const char *
sparc_mangle_type(const_tree type)12719 sparc_mangle_type (const_tree type)
12720 {
12721 if (TARGET_ARCH32
12722 && TYPE_MAIN_VARIANT (type) == long_double_type_node
12723 && TARGET_LONG_DOUBLE_128)
12724 return "g";
12725
12726 /* For all other types, use normal C++ mangling. */
12727 return NULL;
12728 }
12729 #endif
12730
12731 /* Expand a membar instruction for various use cases. Both the LOAD_STORE
12732 and BEFORE_AFTER arguments of the form X_Y. They are two-bit masks where
12733 bit 0 indicates that X is true, and bit 1 indicates Y is true. */
12734
12735 void
sparc_emit_membar_for_model(enum memmodel model,int load_store,int before_after)12736 sparc_emit_membar_for_model (enum memmodel model,
12737 int load_store, int before_after)
12738 {
12739 /* Bits for the MEMBAR mmask field. */
12740 const int LoadLoad = 1;
12741 const int StoreLoad = 2;
12742 const int LoadStore = 4;
12743 const int StoreStore = 8;
12744
12745 int mm = 0, implied = 0;
12746
12747 switch (sparc_memory_model)
12748 {
12749 case SMM_SC:
12750 /* Sequential Consistency. All memory transactions are immediately
12751 visible in sequential execution order. No barriers needed. */
12752 implied = LoadLoad | StoreLoad | LoadStore | StoreStore;
12753 break;
12754
12755 case SMM_TSO:
12756 /* Total Store Ordering: all memory transactions with store semantics
12757 are followed by an implied StoreStore. */
12758 implied |= StoreStore;
12759
12760 /* If we're not looking for a raw barrer (before+after), then atomic
12761 operations get the benefit of being both load and store. */
12762 if (load_store == 3 && before_after == 1)
12763 implied |= StoreLoad;
12764 /* FALLTHRU */
12765
12766 case SMM_PSO:
12767 /* Partial Store Ordering: all memory transactions with load semantics
12768 are followed by an implied LoadLoad | LoadStore. */
12769 implied |= LoadLoad | LoadStore;
12770
12771 /* If we're not looking for a raw barrer (before+after), then atomic
12772 operations get the benefit of being both load and store. */
12773 if (load_store == 3 && before_after == 2)
12774 implied |= StoreLoad | StoreStore;
12775 /* FALLTHRU */
12776
12777 case SMM_RMO:
12778 /* Relaxed Memory Ordering: no implicit bits. */
12779 break;
12780
12781 default:
12782 gcc_unreachable ();
12783 }
12784
12785 if (before_after & 1)
12786 {
12787 if (is_mm_release (model) || is_mm_acq_rel (model)
12788 || is_mm_seq_cst (model))
12789 {
12790 if (load_store & 1)
12791 mm |= LoadLoad | StoreLoad;
12792 if (load_store & 2)
12793 mm |= LoadStore | StoreStore;
12794 }
12795 }
12796 if (before_after & 2)
12797 {
12798 if (is_mm_acquire (model) || is_mm_acq_rel (model)
12799 || is_mm_seq_cst (model))
12800 {
12801 if (load_store & 1)
12802 mm |= LoadLoad | LoadStore;
12803 if (load_store & 2)
12804 mm |= StoreLoad | StoreStore;
12805 }
12806 }
12807
12808 /* Remove the bits implied by the system memory model. */
12809 mm &= ~implied;
12810
12811 /* For raw barriers (before+after), always emit a barrier.
12812 This will become a compile-time barrier if needed. */
12813 if (mm || before_after == 3)
12814 emit_insn (gen_membar (GEN_INT (mm)));
12815 }
12816
12817 /* Expand code to perform a 8 or 16-bit compare and swap by doing 32-bit
12818 compare and swap on the word containing the byte or half-word. */
12819
12820 static void
sparc_expand_compare_and_swap_12(rtx bool_result,rtx result,rtx mem,rtx oldval,rtx newval)12821 sparc_expand_compare_and_swap_12 (rtx bool_result, rtx result, rtx mem,
12822 rtx oldval, rtx newval)
12823 {
12824 rtx addr1 = force_reg (Pmode, XEXP (mem, 0));
12825 rtx addr = gen_reg_rtx (Pmode);
12826 rtx off = gen_reg_rtx (SImode);
12827 rtx oldv = gen_reg_rtx (SImode);
12828 rtx newv = gen_reg_rtx (SImode);
12829 rtx oldvalue = gen_reg_rtx (SImode);
12830 rtx newvalue = gen_reg_rtx (SImode);
12831 rtx res = gen_reg_rtx (SImode);
12832 rtx resv = gen_reg_rtx (SImode);
12833 rtx memsi, val, mask, cc;
12834
12835 emit_insn (gen_rtx_SET (addr, gen_rtx_AND (Pmode, addr1, GEN_INT (-4))));
12836
12837 if (Pmode != SImode)
12838 addr1 = gen_lowpart (SImode, addr1);
12839 emit_insn (gen_rtx_SET (off, gen_rtx_AND (SImode, addr1, GEN_INT (3))));
12840
12841 memsi = gen_rtx_MEM (SImode, addr);
12842 set_mem_alias_set (memsi, ALIAS_SET_MEMORY_BARRIER);
12843 MEM_VOLATILE_P (memsi) = MEM_VOLATILE_P (mem);
12844
12845 val = copy_to_reg (memsi);
12846
12847 emit_insn (gen_rtx_SET (off,
12848 gen_rtx_XOR (SImode, off,
12849 GEN_INT (GET_MODE (mem) == QImode
12850 ? 3 : 2))));
12851
12852 emit_insn (gen_rtx_SET (off, gen_rtx_ASHIFT (SImode, off, GEN_INT (3))));
12853
12854 if (GET_MODE (mem) == QImode)
12855 mask = force_reg (SImode, GEN_INT (0xff));
12856 else
12857 mask = force_reg (SImode, GEN_INT (0xffff));
12858
12859 emit_insn (gen_rtx_SET (mask, gen_rtx_ASHIFT (SImode, mask, off)));
12860
12861 emit_insn (gen_rtx_SET (val,
12862 gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask),
12863 val)));
12864
12865 oldval = gen_lowpart (SImode, oldval);
12866 emit_insn (gen_rtx_SET (oldv, gen_rtx_ASHIFT (SImode, oldval, off)));
12867
12868 newval = gen_lowpart_common (SImode, newval);
12869 emit_insn (gen_rtx_SET (newv, gen_rtx_ASHIFT (SImode, newval, off)));
12870
12871 emit_insn (gen_rtx_SET (oldv, gen_rtx_AND (SImode, oldv, mask)));
12872
12873 emit_insn (gen_rtx_SET (newv, gen_rtx_AND (SImode, newv, mask)));
12874
12875 rtx_code_label *end_label = gen_label_rtx ();
12876 rtx_code_label *loop_label = gen_label_rtx ();
12877 emit_label (loop_label);
12878
12879 emit_insn (gen_rtx_SET (oldvalue, gen_rtx_IOR (SImode, oldv, val)));
12880
12881 emit_insn (gen_rtx_SET (newvalue, gen_rtx_IOR (SImode, newv, val)));
12882
12883 emit_move_insn (bool_result, const1_rtx);
12884
12885 emit_insn (gen_atomic_compare_and_swapsi_1 (res, memsi, oldvalue, newvalue));
12886
12887 emit_cmp_and_jump_insns (res, oldvalue, EQ, NULL, SImode, 0, end_label);
12888
12889 emit_insn (gen_rtx_SET (resv,
12890 gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask),
12891 res)));
12892
12893 emit_move_insn (bool_result, const0_rtx);
12894
12895 cc = gen_compare_reg_1 (NE, resv, val);
12896 emit_insn (gen_rtx_SET (val, resv));
12897
12898 /* Use cbranchcc4 to separate the compare and branch! */
12899 emit_jump_insn (gen_cbranchcc4 (gen_rtx_NE (VOIDmode, cc, const0_rtx),
12900 cc, const0_rtx, loop_label));
12901
12902 emit_label (end_label);
12903
12904 emit_insn (gen_rtx_SET (res, gen_rtx_AND (SImode, res, mask)));
12905
12906 emit_insn (gen_rtx_SET (res, gen_rtx_LSHIFTRT (SImode, res, off)));
12907
12908 emit_move_insn (result, gen_lowpart (GET_MODE (result), res));
12909 }
12910
12911 /* Expand code to perform a compare-and-swap. */
12912
12913 void
sparc_expand_compare_and_swap(rtx operands[])12914 sparc_expand_compare_and_swap (rtx operands[])
12915 {
12916 rtx bval, retval, mem, oldval, newval;
12917 machine_mode mode;
12918 enum memmodel model;
12919
12920 bval = operands[0];
12921 retval = operands[1];
12922 mem = operands[2];
12923 oldval = operands[3];
12924 newval = operands[4];
12925 model = (enum memmodel) INTVAL (operands[6]);
12926 mode = GET_MODE (mem);
12927
12928 sparc_emit_membar_for_model (model, 3, 1);
12929
12930 if (reg_overlap_mentioned_p (retval, oldval))
12931 oldval = copy_to_reg (oldval);
12932
12933 if (mode == QImode || mode == HImode)
12934 sparc_expand_compare_and_swap_12 (bval, retval, mem, oldval, newval);
12935 else
12936 {
12937 rtx (*gen) (rtx, rtx, rtx, rtx);
12938 rtx x;
12939
12940 if (mode == SImode)
12941 gen = gen_atomic_compare_and_swapsi_1;
12942 else
12943 gen = gen_atomic_compare_and_swapdi_1;
12944 emit_insn (gen (retval, mem, oldval, newval));
12945
12946 x = emit_store_flag (bval, EQ, retval, oldval, mode, 1, 1);
12947 if (x != bval)
12948 convert_move (bval, x, 1);
12949 }
12950
12951 sparc_emit_membar_for_model (model, 3, 2);
12952 }
12953
12954 void
sparc_expand_vec_perm_bmask(machine_mode vmode,rtx sel)12955 sparc_expand_vec_perm_bmask (machine_mode vmode, rtx sel)
12956 {
12957 rtx t_1, t_2, t_3;
12958
12959 sel = gen_lowpart (DImode, sel);
12960 switch (vmode)
12961 {
12962 case E_V2SImode:
12963 /* inp = xxxxxxxAxxxxxxxB */
12964 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
12965 NULL_RTX, 1, OPTAB_DIRECT);
12966 /* t_1 = ....xxxxxxxAxxx. */
12967 sel = expand_simple_binop (SImode, AND, gen_lowpart (SImode, sel),
12968 GEN_INT (3), NULL_RTX, 1, OPTAB_DIRECT);
12969 t_1 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_1),
12970 GEN_INT (0x30000), NULL_RTX, 1, OPTAB_DIRECT);
12971 /* sel = .......B */
12972 /* t_1 = ...A.... */
12973 sel = expand_simple_binop (SImode, IOR, sel, t_1, sel, 1, OPTAB_DIRECT);
12974 /* sel = ...A...B */
12975 sel = expand_mult (SImode, sel, GEN_INT (0x4444), sel, 1);
12976 /* sel = AAAABBBB * 4 */
12977 t_1 = force_reg (SImode, GEN_INT (0x01230123));
12978 /* sel = { A*4, A*4+1, A*4+2, ... } */
12979 break;
12980
12981 case E_V4HImode:
12982 /* inp = xxxAxxxBxxxCxxxD */
12983 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (8),
12984 NULL_RTX, 1, OPTAB_DIRECT);
12985 t_2 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
12986 NULL_RTX, 1, OPTAB_DIRECT);
12987 t_3 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (24),
12988 NULL_RTX, 1, OPTAB_DIRECT);
12989 /* t_1 = ..xxxAxxxBxxxCxx */
12990 /* t_2 = ....xxxAxxxBxxxC */
12991 /* t_3 = ......xxxAxxxBxx */
12992 sel = expand_simple_binop (SImode, AND, gen_lowpart (SImode, sel),
12993 GEN_INT (0x07),
12994 NULL_RTX, 1, OPTAB_DIRECT);
12995 t_1 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_1),
12996 GEN_INT (0x0700),
12997 NULL_RTX, 1, OPTAB_DIRECT);
12998 t_2 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_2),
12999 GEN_INT (0x070000),
13000 NULL_RTX, 1, OPTAB_DIRECT);
13001 t_3 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_3),
13002 GEN_INT (0x07000000),
13003 NULL_RTX, 1, OPTAB_DIRECT);
13004 /* sel = .......D */
13005 /* t_1 = .....C.. */
13006 /* t_2 = ...B.... */
13007 /* t_3 = .A...... */
13008 sel = expand_simple_binop (SImode, IOR, sel, t_1, sel, 1, OPTAB_DIRECT);
13009 t_2 = expand_simple_binop (SImode, IOR, t_2, t_3, t_2, 1, OPTAB_DIRECT);
13010 sel = expand_simple_binop (SImode, IOR, sel, t_2, sel, 1, OPTAB_DIRECT);
13011 /* sel = .A.B.C.D */
13012 sel = expand_mult (SImode, sel, GEN_INT (0x22), sel, 1);
13013 /* sel = AABBCCDD * 2 */
13014 t_1 = force_reg (SImode, GEN_INT (0x01010101));
13015 /* sel = { A*2, A*2+1, B*2, B*2+1, ... } */
13016 break;
13017
13018 case E_V8QImode:
13019 /* input = xAxBxCxDxExFxGxH */
13020 sel = expand_simple_binop (DImode, AND, sel,
13021 GEN_INT ((HOST_WIDE_INT)0x0f0f0f0f << 32
13022 | 0x0f0f0f0f),
13023 NULL_RTX, 1, OPTAB_DIRECT);
13024 /* sel = .A.B.C.D.E.F.G.H */
13025 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (4),
13026 NULL_RTX, 1, OPTAB_DIRECT);
13027 /* t_1 = ..A.B.C.D.E.F.G. */
13028 sel = expand_simple_binop (DImode, IOR, sel, t_1,
13029 NULL_RTX, 1, OPTAB_DIRECT);
13030 /* sel = .AABBCCDDEEFFGGH */
13031 sel = expand_simple_binop (DImode, AND, sel,
13032 GEN_INT ((HOST_WIDE_INT)0xff00ff << 32
13033 | 0xff00ff),
13034 NULL_RTX, 1, OPTAB_DIRECT);
13035 /* sel = ..AB..CD..EF..GH */
13036 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (8),
13037 NULL_RTX, 1, OPTAB_DIRECT);
13038 /* t_1 = ....AB..CD..EF.. */
13039 sel = expand_simple_binop (DImode, IOR, sel, t_1,
13040 NULL_RTX, 1, OPTAB_DIRECT);
13041 /* sel = ..ABABCDCDEFEFGH */
13042 sel = expand_simple_binop (DImode, AND, sel,
13043 GEN_INT ((HOST_WIDE_INT)0xffff << 32 | 0xffff),
13044 NULL_RTX, 1, OPTAB_DIRECT);
13045 /* sel = ....ABCD....EFGH */
13046 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
13047 NULL_RTX, 1, OPTAB_DIRECT);
13048 /* t_1 = ........ABCD.... */
13049 sel = gen_lowpart (SImode, sel);
13050 t_1 = gen_lowpart (SImode, t_1);
13051 break;
13052
13053 default:
13054 gcc_unreachable ();
13055 }
13056
13057 /* Always perform the final addition/merge within the bmask insn. */
13058 emit_insn (gen_bmasksi_vis (gen_reg_rtx (SImode), sel, t_1));
13059 }
13060
13061 /* Implement TARGET_VEC_PERM_CONST. */
13062
13063 static bool
sparc_vectorize_vec_perm_const(machine_mode vmode,rtx target,rtx op0,rtx op1,const vec_perm_indices & sel)13064 sparc_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0,
13065 rtx op1, const vec_perm_indices &sel)
13066 {
13067 if (!TARGET_VIS2)
13068 return false;
13069
13070 /* All permutes are supported. */
13071 if (!target)
13072 return true;
13073
13074 /* Force target-independent code to convert constant permutations on other
13075 modes down to V8QI. Rely on this to avoid the complexity of the byte
13076 order of the permutation. */
13077 if (vmode != V8QImode)
13078 return false;
13079
13080 unsigned int i, mask;
13081 for (i = mask = 0; i < 8; ++i)
13082 mask |= (sel[i] & 0xf) << (28 - i*4);
13083 rtx mask_rtx = force_reg (SImode, gen_int_mode (mask, SImode));
13084
13085 emit_insn (gen_bmasksi_vis (gen_reg_rtx (SImode), mask_rtx, const0_rtx));
13086 emit_insn (gen_bshufflev8qi_vis (target, op0, op1));
13087 return true;
13088 }
13089
13090 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
13091
13092 static bool
sparc_frame_pointer_required(void)13093 sparc_frame_pointer_required (void)
13094 {
13095 /* If the stack pointer is dynamically modified in the function, it cannot
13096 serve as the frame pointer. */
13097 if (cfun->calls_alloca)
13098 return true;
13099
13100 /* If the function receives nonlocal gotos, it needs to save the frame
13101 pointer in the nonlocal_goto_save_area object. */
13102 if (cfun->has_nonlocal_label)
13103 return true;
13104
13105 /* In flat mode, that's it. */
13106 if (TARGET_FLAT)
13107 return false;
13108
13109 /* Otherwise, the frame pointer is required if the function isn't leaf, but
13110 we cannot use sparc_leaf_function_p since it hasn't been computed yet. */
13111 return !(optimize > 0 && crtl->is_leaf && only_leaf_regs_used ());
13112 }
13113
13114 /* The way this is structured, we can't eliminate SFP in favor of SP
13115 if the frame pointer is required: we want to use the SFP->HFP elimination
13116 in that case. But the test in update_eliminables doesn't know we are
13117 assuming below that we only do the former elimination. */
13118
13119 static bool
sparc_can_eliminate(const int from ATTRIBUTE_UNUSED,const int to)13120 sparc_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
13121 {
13122 return to == HARD_FRAME_POINTER_REGNUM || !sparc_frame_pointer_required ();
13123 }
13124
13125 /* If !TARGET_FPU, then make the fp registers and fp cc regs fixed so that
13126 they won't be allocated. */
13127
13128 static void
sparc_conditional_register_usage(void)13129 sparc_conditional_register_usage (void)
13130 {
13131 if (PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
13132 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
13133 /* If the user has passed -f{fixed,call-{used,saved}}-g5 */
13134 /* then honor it. */
13135 if (TARGET_ARCH32 && fixed_regs[5])
13136 fixed_regs[5] = 1;
13137 else if (TARGET_ARCH64 && fixed_regs[5] == 2)
13138 fixed_regs[5] = 0;
13139 if (! TARGET_V9)
13140 {
13141 int regno;
13142 for (regno = SPARC_FIRST_V9_FP_REG;
13143 regno <= SPARC_LAST_V9_FP_REG;
13144 regno++)
13145 fixed_regs[regno] = 1;
13146 /* %fcc0 is used by v8 and v9. */
13147 for (regno = SPARC_FIRST_V9_FCC_REG + 1;
13148 regno <= SPARC_LAST_V9_FCC_REG;
13149 regno++)
13150 fixed_regs[regno] = 1;
13151 }
13152 if (! TARGET_FPU)
13153 {
13154 int regno;
13155 for (regno = 32; regno < SPARC_LAST_V9_FCC_REG; regno++)
13156 fixed_regs[regno] = 1;
13157 }
13158 /* If the user has passed -f{fixed,call-{used,saved}}-g2 */
13159 /* then honor it. Likewise with g3 and g4. */
13160 if (fixed_regs[2] == 2)
13161 fixed_regs[2] = ! TARGET_APP_REGS;
13162 if (fixed_regs[3] == 2)
13163 fixed_regs[3] = ! TARGET_APP_REGS;
13164 if (TARGET_ARCH32 && fixed_regs[4] == 2)
13165 fixed_regs[4] = ! TARGET_APP_REGS;
13166 else if (TARGET_CM_EMBMEDANY)
13167 fixed_regs[4] = 1;
13168 else if (fixed_regs[4] == 2)
13169 fixed_regs[4] = 0;
13170 if (TARGET_FLAT)
13171 {
13172 int regno;
13173 /* Disable leaf functions. */
13174 memset (sparc_leaf_regs, 0, FIRST_PSEUDO_REGISTER);
13175 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
13176 leaf_reg_remap [regno] = regno;
13177 }
13178 if (TARGET_VIS)
13179 global_regs[SPARC_GSR_REG] = 1;
13180 }
13181
13182 /* Implement TARGET_USE_PSEUDO_PIC_REG. */
13183
13184 static bool
sparc_use_pseudo_pic_reg(void)13185 sparc_use_pseudo_pic_reg (void)
13186 {
13187 return !TARGET_VXWORKS_RTP && flag_pic;
13188 }
13189
13190 /* Implement TARGET_INIT_PIC_REG. */
13191
13192 static void
sparc_init_pic_reg(void)13193 sparc_init_pic_reg (void)
13194 {
13195 edge entry_edge;
13196 rtx_insn *seq;
13197
13198 /* In PIC mode, we need to always initialize the PIC register if optimization
13199 is enabled, because we are called from IRA and LRA may later force things
13200 to the constant pool for optimization purposes. */
13201 if (!flag_pic || (!crtl->uses_pic_offset_table && !optimize))
13202 return;
13203
13204 start_sequence ();
13205 load_got_register ();
13206 if (!TARGET_VXWORKS_RTP)
13207 emit_move_insn (pic_offset_table_rtx, got_register_rtx);
13208 seq = get_insns ();
13209 end_sequence ();
13210
13211 entry_edge = single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun));
13212 insert_insn_on_edge (seq, entry_edge);
13213 commit_one_edge_insertion (entry_edge);
13214 }
13215
13216 /* Implement TARGET_PREFERRED_RELOAD_CLASS:
13217
13218 - We can't load constants into FP registers.
13219 - We can't load FP constants into integer registers when soft-float,
13220 because there is no soft-float pattern with a r/F constraint.
13221 - We can't load FP constants into integer registers for TFmode unless
13222 it is 0.0L, because there is no movtf pattern with a r/F constraint.
13223 - Try and reload integer constants (symbolic or otherwise) back into
13224 registers directly, rather than having them dumped to memory. */
13225
13226 static reg_class_t
sparc_preferred_reload_class(rtx x,reg_class_t rclass)13227 sparc_preferred_reload_class (rtx x, reg_class_t rclass)
13228 {
13229 machine_mode mode = GET_MODE (x);
13230 if (CONSTANT_P (x))
13231 {
13232 if (FP_REG_CLASS_P (rclass)
13233 || rclass == GENERAL_OR_FP_REGS
13234 || rclass == GENERAL_OR_EXTRA_FP_REGS
13235 || (GET_MODE_CLASS (mode) == MODE_FLOAT && ! TARGET_FPU)
13236 || (mode == TFmode && ! const_zero_operand (x, mode)))
13237 return NO_REGS;
13238
13239 if (GET_MODE_CLASS (mode) == MODE_INT)
13240 return GENERAL_REGS;
13241
13242 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
13243 {
13244 if (! FP_REG_CLASS_P (rclass)
13245 || !(const_zero_operand (x, mode)
13246 || const_all_ones_operand (x, mode)))
13247 return NO_REGS;
13248 }
13249 }
13250
13251 if (TARGET_VIS3
13252 && ! TARGET_ARCH64
13253 && (rclass == EXTRA_FP_REGS
13254 || rclass == GENERAL_OR_EXTRA_FP_REGS))
13255 {
13256 int regno = true_regnum (x);
13257
13258 if (SPARC_INT_REG_P (regno))
13259 return (rclass == EXTRA_FP_REGS
13260 ? FP_REGS : GENERAL_OR_FP_REGS);
13261 }
13262
13263 return rclass;
13264 }
13265
13266 /* Return true if we use LRA instead of reload pass. */
13267
13268 static bool
sparc_lra_p(void)13269 sparc_lra_p (void)
13270 {
13271 return TARGET_LRA;
13272 }
13273
13274 /* Output a wide multiply instruction in V8+ mode. INSN is the instruction,
13275 OPERANDS are its operands and OPCODE is the mnemonic to be used. */
13276
13277 const char *
output_v8plus_mult(rtx_insn * insn,rtx * operands,const char * opcode)13278 output_v8plus_mult (rtx_insn *insn, rtx *operands, const char *opcode)
13279 {
13280 char mulstr[32];
13281
13282 gcc_assert (! TARGET_ARCH64);
13283
13284 if (sparc_check_64 (operands[1], insn) <= 0)
13285 output_asm_insn ("srl\t%L1, 0, %L1", operands);
13286 if (which_alternative == 1)
13287 output_asm_insn ("sllx\t%H1, 32, %H1", operands);
13288 if (GET_CODE (operands[2]) == CONST_INT)
13289 {
13290 if (which_alternative == 1)
13291 {
13292 output_asm_insn ("or\t%L1, %H1, %H1", operands);
13293 sprintf (mulstr, "%s\t%%H1, %%2, %%L0", opcode);
13294 output_asm_insn (mulstr, operands);
13295 return "srlx\t%L0, 32, %H0";
13296 }
13297 else
13298 {
13299 output_asm_insn ("sllx\t%H1, 32, %3", operands);
13300 output_asm_insn ("or\t%L1, %3, %3", operands);
13301 sprintf (mulstr, "%s\t%%3, %%2, %%3", opcode);
13302 output_asm_insn (mulstr, operands);
13303 output_asm_insn ("srlx\t%3, 32, %H0", operands);
13304 return "mov\t%3, %L0";
13305 }
13306 }
13307 else if (rtx_equal_p (operands[1], operands[2]))
13308 {
13309 if (which_alternative == 1)
13310 {
13311 output_asm_insn ("or\t%L1, %H1, %H1", operands);
13312 sprintf (mulstr, "%s\t%%H1, %%H1, %%L0", opcode);
13313 output_asm_insn (mulstr, operands);
13314 return "srlx\t%L0, 32, %H0";
13315 }
13316 else
13317 {
13318 output_asm_insn ("sllx\t%H1, 32, %3", operands);
13319 output_asm_insn ("or\t%L1, %3, %3", operands);
13320 sprintf (mulstr, "%s\t%%3, %%3, %%3", opcode);
13321 output_asm_insn (mulstr, operands);
13322 output_asm_insn ("srlx\t%3, 32, %H0", operands);
13323 return "mov\t%3, %L0";
13324 }
13325 }
13326 if (sparc_check_64 (operands[2], insn) <= 0)
13327 output_asm_insn ("srl\t%L2, 0, %L2", operands);
13328 if (which_alternative == 1)
13329 {
13330 output_asm_insn ("or\t%L1, %H1, %H1", operands);
13331 output_asm_insn ("sllx\t%H2, 32, %L1", operands);
13332 output_asm_insn ("or\t%L2, %L1, %L1", operands);
13333 sprintf (mulstr, "%s\t%%H1, %%L1, %%L0", opcode);
13334 output_asm_insn (mulstr, operands);
13335 return "srlx\t%L0, 32, %H0";
13336 }
13337 else
13338 {
13339 output_asm_insn ("sllx\t%H1, 32, %3", operands);
13340 output_asm_insn ("sllx\t%H2, 32, %4", operands);
13341 output_asm_insn ("or\t%L1, %3, %3", operands);
13342 output_asm_insn ("or\t%L2, %4, %4", operands);
13343 sprintf (mulstr, "%s\t%%3, %%4, %%3", opcode);
13344 output_asm_insn (mulstr, operands);
13345 output_asm_insn ("srlx\t%3, 32, %H0", operands);
13346 return "mov\t%3, %L0";
13347 }
13348 }
13349
13350 /* Subroutine of sparc_expand_vector_init. Emit code to initialize
13351 all fields of TARGET to ELT by means of VIS2 BSHUFFLE insn. MODE
13352 and INNER_MODE are the modes describing TARGET. */
13353
13354 static void
vector_init_bshuffle(rtx target,rtx elt,machine_mode mode,machine_mode inner_mode)13355 vector_init_bshuffle (rtx target, rtx elt, machine_mode mode,
13356 machine_mode inner_mode)
13357 {
13358 rtx t1, final_insn, sel;
13359 int bmask;
13360
13361 t1 = gen_reg_rtx (mode);
13362
13363 elt = convert_modes (SImode, inner_mode, elt, true);
13364 emit_move_insn (gen_lowpart(SImode, t1), elt);
13365
13366 switch (mode)
13367 {
13368 case E_V2SImode:
13369 final_insn = gen_bshufflev2si_vis (target, t1, t1);
13370 bmask = 0x45674567;
13371 break;
13372 case E_V4HImode:
13373 final_insn = gen_bshufflev4hi_vis (target, t1, t1);
13374 bmask = 0x67676767;
13375 break;
13376 case E_V8QImode:
13377 final_insn = gen_bshufflev8qi_vis (target, t1, t1);
13378 bmask = 0x77777777;
13379 break;
13380 default:
13381 gcc_unreachable ();
13382 }
13383
13384 sel = force_reg (SImode, GEN_INT (bmask));
13385 emit_insn (gen_bmasksi_vis (gen_reg_rtx (SImode), sel, const0_rtx));
13386 emit_insn (final_insn);
13387 }
13388
13389 /* Subroutine of sparc_expand_vector_init. Emit code to initialize
13390 all fields of TARGET to ELT in V8QI by means of VIS FPMERGE insn. */
13391
13392 static void
vector_init_fpmerge(rtx target,rtx elt)13393 vector_init_fpmerge (rtx target, rtx elt)
13394 {
13395 rtx t1, t2, t2_low, t3, t3_low;
13396
13397 t1 = gen_reg_rtx (V4QImode);
13398 elt = convert_modes (SImode, QImode, elt, true);
13399 emit_move_insn (gen_lowpart (SImode, t1), elt);
13400
13401 t2 = gen_reg_rtx (V8QImode);
13402 t2_low = gen_lowpart (V4QImode, t2);
13403 emit_insn (gen_fpmerge_vis (t2, t1, t1));
13404
13405 t3 = gen_reg_rtx (V8QImode);
13406 t3_low = gen_lowpart (V4QImode, t3);
13407 emit_insn (gen_fpmerge_vis (t3, t2_low, t2_low));
13408
13409 emit_insn (gen_fpmerge_vis (target, t3_low, t3_low));
13410 }
13411
13412 /* Subroutine of sparc_expand_vector_init. Emit code to initialize
13413 all fields of TARGET to ELT in V4HI by means of VIS FALIGNDATA insn. */
13414
13415 static void
vector_init_faligndata(rtx target,rtx elt)13416 vector_init_faligndata (rtx target, rtx elt)
13417 {
13418 rtx t1 = gen_reg_rtx (V4HImode);
13419 int i;
13420
13421 elt = convert_modes (SImode, HImode, elt, true);
13422 emit_move_insn (gen_lowpart (SImode, t1), elt);
13423
13424 emit_insn (gen_alignaddrsi_vis (gen_reg_rtx (SImode),
13425 force_reg (SImode, GEN_INT (6)),
13426 const0_rtx));
13427
13428 for (i = 0; i < 4; i++)
13429 emit_insn (gen_faligndatav4hi_vis (target, t1, target));
13430 }
13431
13432 /* Emit code to initialize TARGET to values for individual fields VALS. */
13433
13434 void
sparc_expand_vector_init(rtx target,rtx vals)13435 sparc_expand_vector_init (rtx target, rtx vals)
13436 {
13437 const machine_mode mode = GET_MODE (target);
13438 const machine_mode inner_mode = GET_MODE_INNER (mode);
13439 const int n_elts = GET_MODE_NUNITS (mode);
13440 int i, n_var = 0;
13441 bool all_same = true;
13442 rtx mem;
13443
13444 for (i = 0; i < n_elts; i++)
13445 {
13446 rtx x = XVECEXP (vals, 0, i);
13447 if (!(CONST_SCALAR_INT_P (x) || CONST_DOUBLE_P (x) || CONST_FIXED_P (x)))
13448 n_var++;
13449
13450 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
13451 all_same = false;
13452 }
13453
13454 if (n_var == 0)
13455 {
13456 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
13457 return;
13458 }
13459
13460 if (GET_MODE_SIZE (inner_mode) == GET_MODE_SIZE (mode))
13461 {
13462 if (GET_MODE_SIZE (inner_mode) == 4)
13463 {
13464 emit_move_insn (gen_lowpart (SImode, target),
13465 gen_lowpart (SImode, XVECEXP (vals, 0, 0)));
13466 return;
13467 }
13468 else if (GET_MODE_SIZE (inner_mode) == 8)
13469 {
13470 emit_move_insn (gen_lowpart (DImode, target),
13471 gen_lowpart (DImode, XVECEXP (vals, 0, 0)));
13472 return;
13473 }
13474 }
13475 else if (GET_MODE_SIZE (inner_mode) == GET_MODE_SIZE (word_mode)
13476 && GET_MODE_SIZE (mode) == 2 * GET_MODE_SIZE (word_mode))
13477 {
13478 emit_move_insn (gen_highpart (word_mode, target),
13479 gen_lowpart (word_mode, XVECEXP (vals, 0, 0)));
13480 emit_move_insn (gen_lowpart (word_mode, target),
13481 gen_lowpart (word_mode, XVECEXP (vals, 0, 1)));
13482 return;
13483 }
13484
13485 if (all_same && GET_MODE_SIZE (mode) == 8)
13486 {
13487 if (TARGET_VIS2)
13488 {
13489 vector_init_bshuffle (target, XVECEXP (vals, 0, 0), mode, inner_mode);
13490 return;
13491 }
13492 if (mode == V8QImode)
13493 {
13494 vector_init_fpmerge (target, XVECEXP (vals, 0, 0));
13495 return;
13496 }
13497 if (mode == V4HImode)
13498 {
13499 vector_init_faligndata (target, XVECEXP (vals, 0, 0));
13500 return;
13501 }
13502 }
13503
13504 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
13505 for (i = 0; i < n_elts; i++)
13506 emit_move_insn (adjust_address_nv (mem, inner_mode,
13507 i * GET_MODE_SIZE (inner_mode)),
13508 XVECEXP (vals, 0, i));
13509 emit_move_insn (target, mem);
13510 }
13511
13512 /* Implement TARGET_SECONDARY_RELOAD. */
13513
13514 static reg_class_t
sparc_secondary_reload(bool in_p,rtx x,reg_class_t rclass_i,machine_mode mode,secondary_reload_info * sri)13515 sparc_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
13516 machine_mode mode, secondary_reload_info *sri)
13517 {
13518 enum reg_class rclass = (enum reg_class) rclass_i;
13519
13520 sri->icode = CODE_FOR_nothing;
13521 sri->extra_cost = 0;
13522
13523 /* We need a temporary when loading/storing a HImode/QImode value
13524 between memory and the FPU registers. This can happen when combine puts
13525 a paradoxical subreg in a float/fix conversion insn. */
13526 if (FP_REG_CLASS_P (rclass)
13527 && (mode == HImode || mode == QImode)
13528 && (GET_CODE (x) == MEM
13529 || ((GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
13530 && true_regnum (x) == -1)))
13531 return GENERAL_REGS;
13532
13533 /* On 32-bit we need a temporary when loading/storing a DFmode value
13534 between unaligned memory and the upper FPU registers. */
13535 if (TARGET_ARCH32
13536 && rclass == EXTRA_FP_REGS
13537 && mode == DFmode
13538 && GET_CODE (x) == MEM
13539 && ! mem_min_alignment (x, 8))
13540 return FP_REGS;
13541
13542 if (((TARGET_CM_MEDANY
13543 && symbolic_operand (x, mode))
13544 || (TARGET_CM_EMBMEDANY
13545 && text_segment_operand (x, mode)))
13546 && ! flag_pic)
13547 {
13548 if (in_p)
13549 sri->icode = direct_optab_handler (reload_in_optab, mode);
13550 else
13551 sri->icode = direct_optab_handler (reload_out_optab, mode);
13552 return NO_REGS;
13553 }
13554
13555 if (TARGET_VIS3 && TARGET_ARCH32)
13556 {
13557 int regno = true_regnum (x);
13558
13559 /* When using VIS3 fp<-->int register moves, on 32-bit we have
13560 to move 8-byte values in 4-byte pieces. This only works via
13561 FP_REGS, and not via EXTRA_FP_REGS. Therefore if we try to
13562 move between EXTRA_FP_REGS and GENERAL_REGS, we will need
13563 an FP_REGS intermediate move. */
13564 if ((rclass == EXTRA_FP_REGS && SPARC_INT_REG_P (regno))
13565 || ((general_or_i64_p (rclass)
13566 || rclass == GENERAL_OR_FP_REGS)
13567 && SPARC_FP_REG_P (regno)))
13568 {
13569 sri->extra_cost = 2;
13570 return FP_REGS;
13571 }
13572 }
13573
13574 return NO_REGS;
13575 }
13576
13577 /* Implement TARGET_SECONDARY_MEMORY_NEEDED.
13578
13579 On SPARC when not VIS3 it is not possible to directly move data
13580 between GENERAL_REGS and FP_REGS. */
13581
13582 static bool
sparc_secondary_memory_needed(machine_mode mode,reg_class_t class1,reg_class_t class2)13583 sparc_secondary_memory_needed (machine_mode mode, reg_class_t class1,
13584 reg_class_t class2)
13585 {
13586 return ((FP_REG_CLASS_P (class1) != FP_REG_CLASS_P (class2))
13587 && (! TARGET_VIS3
13588 || GET_MODE_SIZE (mode) > 8
13589 || GET_MODE_SIZE (mode) < 4));
13590 }
13591
13592 /* Implement TARGET_SECONDARY_MEMORY_NEEDED_MODE.
13593
13594 get_secondary_mem widens its argument to BITS_PER_WORD which loses on v9
13595 because the movsi and movsf patterns don't handle r/f moves.
13596 For v8 we copy the default definition. */
13597
13598 static machine_mode
sparc_secondary_memory_needed_mode(machine_mode mode)13599 sparc_secondary_memory_needed_mode (machine_mode mode)
13600 {
13601 if (TARGET_ARCH64)
13602 {
13603 if (GET_MODE_BITSIZE (mode) < 32)
13604 return mode_for_size (32, GET_MODE_CLASS (mode), 0).require ();
13605 return mode;
13606 }
13607 else
13608 {
13609 if (GET_MODE_BITSIZE (mode) < BITS_PER_WORD)
13610 return mode_for_size (BITS_PER_WORD,
13611 GET_MODE_CLASS (mode), 0).require ();
13612 return mode;
13613 }
13614 }
13615
13616 /* Emit code to conditionally move either OPERANDS[2] or OPERANDS[3] into
13617 OPERANDS[0] in MODE. OPERANDS[1] is the operator of the condition. */
13618
13619 bool
sparc_expand_conditional_move(machine_mode mode,rtx * operands)13620 sparc_expand_conditional_move (machine_mode mode, rtx *operands)
13621 {
13622 enum rtx_code rc = GET_CODE (operands[1]);
13623 machine_mode cmp_mode;
13624 rtx cc_reg, dst, cmp;
13625
13626 cmp = operands[1];
13627 if (GET_MODE (XEXP (cmp, 0)) == DImode && !TARGET_ARCH64)
13628 return false;
13629
13630 if (GET_MODE (XEXP (cmp, 0)) == TFmode && !TARGET_HARD_QUAD)
13631 cmp = sparc_emit_float_lib_cmp (XEXP (cmp, 0), XEXP (cmp, 1), rc);
13632
13633 cmp_mode = GET_MODE (XEXP (cmp, 0));
13634 rc = GET_CODE (cmp);
13635
13636 dst = operands[0];
13637 if (! rtx_equal_p (operands[2], dst)
13638 && ! rtx_equal_p (operands[3], dst))
13639 {
13640 if (reg_overlap_mentioned_p (dst, cmp))
13641 dst = gen_reg_rtx (mode);
13642
13643 emit_move_insn (dst, operands[3]);
13644 }
13645 else if (operands[2] == dst)
13646 {
13647 operands[2] = operands[3];
13648
13649 if (GET_MODE_CLASS (cmp_mode) == MODE_FLOAT)
13650 rc = reverse_condition_maybe_unordered (rc);
13651 else
13652 rc = reverse_condition (rc);
13653 }
13654
13655 if (XEXP (cmp, 1) == const0_rtx
13656 && GET_CODE (XEXP (cmp, 0)) == REG
13657 && cmp_mode == DImode
13658 && v9_regcmp_p (rc))
13659 cc_reg = XEXP (cmp, 0);
13660 else
13661 cc_reg = gen_compare_reg_1 (rc, XEXP (cmp, 0), XEXP (cmp, 1));
13662
13663 cmp = gen_rtx_fmt_ee (rc, GET_MODE (cc_reg), cc_reg, const0_rtx);
13664
13665 emit_insn (gen_rtx_SET (dst,
13666 gen_rtx_IF_THEN_ELSE (mode, cmp, operands[2], dst)));
13667
13668 if (dst != operands[0])
13669 emit_move_insn (operands[0], dst);
13670
13671 return true;
13672 }
13673
13674 /* Emit code to conditionally move a combination of OPERANDS[1] and OPERANDS[2]
13675 into OPERANDS[0] in MODE, depending on the outcome of the comparison of
13676 OPERANDS[4] and OPERANDS[5]. OPERANDS[3] is the operator of the condition.
13677 FCODE is the machine code to be used for OPERANDS[3] and CCODE the machine
13678 code to be used for the condition mask. */
13679
13680 void
sparc_expand_vcond(machine_mode mode,rtx * operands,int ccode,int fcode)13681 sparc_expand_vcond (machine_mode mode, rtx *operands, int ccode, int fcode)
13682 {
13683 rtx mask, cop0, cop1, fcmp, cmask, bshuf, gsr;
13684 enum rtx_code code = GET_CODE (operands[3]);
13685
13686 mask = gen_reg_rtx (Pmode);
13687 cop0 = operands[4];
13688 cop1 = operands[5];
13689 if (code == LT || code == GE)
13690 {
13691 rtx t;
13692
13693 code = swap_condition (code);
13694 t = cop0; cop0 = cop1; cop1 = t;
13695 }
13696
13697 gsr = gen_rtx_REG (DImode, SPARC_GSR_REG);
13698
13699 fcmp = gen_rtx_UNSPEC (Pmode,
13700 gen_rtvec (1, gen_rtx_fmt_ee (code, mode, cop0, cop1)),
13701 fcode);
13702
13703 cmask = gen_rtx_UNSPEC (DImode,
13704 gen_rtvec (2, mask, gsr),
13705 ccode);
13706
13707 bshuf = gen_rtx_UNSPEC (mode,
13708 gen_rtvec (3, operands[1], operands[2], gsr),
13709 UNSPEC_BSHUFFLE);
13710
13711 emit_insn (gen_rtx_SET (mask, fcmp));
13712 emit_insn (gen_rtx_SET (gsr, cmask));
13713
13714 emit_insn (gen_rtx_SET (operands[0], bshuf));
13715 }
13716
13717 /* On sparc, any mode which naturally allocates into the float
13718 registers should return 4 here. */
13719
13720 unsigned int
sparc_regmode_natural_size(machine_mode mode)13721 sparc_regmode_natural_size (machine_mode mode)
13722 {
13723 int size = UNITS_PER_WORD;
13724
13725 if (TARGET_ARCH64)
13726 {
13727 enum mode_class mclass = GET_MODE_CLASS (mode);
13728
13729 if (mclass == MODE_FLOAT || mclass == MODE_VECTOR_INT)
13730 size = 4;
13731 }
13732
13733 return size;
13734 }
13735
13736 /* Implement TARGET_HARD_REGNO_NREGS.
13737
13738 On SPARC, ordinary registers hold 32 bits worth; this means both
13739 integer and floating point registers. On v9, integer regs hold 64
13740 bits worth; floating point regs hold 32 bits worth (this includes the
13741 new fp regs as even the odd ones are included in the hard register
13742 count). */
13743
13744 static unsigned int
sparc_hard_regno_nregs(unsigned int regno,machine_mode mode)13745 sparc_hard_regno_nregs (unsigned int regno, machine_mode mode)
13746 {
13747 if (regno == SPARC_GSR_REG)
13748 return 1;
13749 if (TARGET_ARCH64)
13750 {
13751 if (SPARC_INT_REG_P (regno) || regno == FRAME_POINTER_REGNUM)
13752 return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
13753 return CEIL (GET_MODE_SIZE (mode), 4);
13754 }
13755 return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
13756 }
13757
13758 /* Implement TARGET_HARD_REGNO_MODE_OK.
13759
13760 ??? Because of the funny way we pass parameters we should allow certain
13761 ??? types of float/complex values to be in integer registers during
13762 ??? RTL generation. This only matters on arch32. */
13763
13764 static bool
sparc_hard_regno_mode_ok(unsigned int regno,machine_mode mode)13765 sparc_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
13766 {
13767 return (hard_regno_mode_classes[regno] & sparc_mode_class[mode]) != 0;
13768 }
13769
13770 /* Implement TARGET_MODES_TIEABLE_P.
13771
13772 For V9 we have to deal with the fact that only the lower 32 floating
13773 point registers are 32-bit addressable. */
13774
13775 static bool
sparc_modes_tieable_p(machine_mode mode1,machine_mode mode2)13776 sparc_modes_tieable_p (machine_mode mode1, machine_mode mode2)
13777 {
13778 enum mode_class mclass1, mclass2;
13779 unsigned short size1, size2;
13780
13781 if (mode1 == mode2)
13782 return true;
13783
13784 mclass1 = GET_MODE_CLASS (mode1);
13785 mclass2 = GET_MODE_CLASS (mode2);
13786 if (mclass1 != mclass2)
13787 return false;
13788
13789 if (! TARGET_V9)
13790 return true;
13791
13792 /* Classes are the same and we are V9 so we have to deal with upper
13793 vs. lower floating point registers. If one of the modes is a
13794 4-byte mode, and the other is not, we have to mark them as not
13795 tieable because only the lower 32 floating point register are
13796 addressable 32-bits at a time.
13797
13798 We can't just test explicitly for SFmode, otherwise we won't
13799 cover the vector mode cases properly. */
13800
13801 if (mclass1 != MODE_FLOAT && mclass1 != MODE_VECTOR_INT)
13802 return true;
13803
13804 size1 = GET_MODE_SIZE (mode1);
13805 size2 = GET_MODE_SIZE (mode2);
13806 if ((size1 > 4 && size2 == 4)
13807 || (size2 > 4 && size1 == 4))
13808 return false;
13809
13810 return true;
13811 }
13812
13813 /* Implement TARGET_CSTORE_MODE. */
13814
13815 static scalar_int_mode
sparc_cstore_mode(enum insn_code icode ATTRIBUTE_UNUSED)13816 sparc_cstore_mode (enum insn_code icode ATTRIBUTE_UNUSED)
13817 {
13818 return (TARGET_ARCH64 ? DImode : SImode);
13819 }
13820
13821 /* Return the compound expression made of T1 and T2. */
13822
13823 static inline tree
compound_expr(tree t1,tree t2)13824 compound_expr (tree t1, tree t2)
13825 {
13826 return build2 (COMPOUND_EXPR, void_type_node, t1, t2);
13827 }
13828
13829 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */
13830
13831 static void
sparc_atomic_assign_expand_fenv(tree * hold,tree * clear,tree * update)13832 sparc_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
13833 {
13834 if (!TARGET_FPU)
13835 return;
13836
13837 const unsigned HOST_WIDE_INT accrued_exception_mask = 0x1f << 5;
13838 const unsigned HOST_WIDE_INT trap_enable_mask = 0x1f << 23;
13839
13840 /* We generate the equivalent of feholdexcept (&fenv_var):
13841
13842 unsigned int fenv_var;
13843 __builtin_store_fsr (&fenv_var);
13844
13845 unsigned int tmp1_var;
13846 tmp1_var = fenv_var & ~(accrued_exception_mask | trap_enable_mask);
13847
13848 __builtin_load_fsr (&tmp1_var); */
13849
13850 tree fenv_var = create_tmp_var_raw (unsigned_type_node);
13851 TREE_ADDRESSABLE (fenv_var) = 1;
13852 tree fenv_addr = build_fold_addr_expr (fenv_var);
13853 tree stfsr = sparc_builtins[SPARC_BUILTIN_STFSR];
13854 tree hold_stfsr
13855 = build4 (TARGET_EXPR, unsigned_type_node, fenv_var,
13856 build_call_expr (stfsr, 1, fenv_addr), NULL_TREE, NULL_TREE);
13857
13858 tree tmp1_var = create_tmp_var_raw (unsigned_type_node);
13859 TREE_ADDRESSABLE (tmp1_var) = 1;
13860 tree masked_fenv_var
13861 = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_var,
13862 build_int_cst (unsigned_type_node,
13863 ~(accrued_exception_mask | trap_enable_mask)));
13864 tree hold_mask
13865 = build4 (TARGET_EXPR, unsigned_type_node, tmp1_var, masked_fenv_var,
13866 NULL_TREE, NULL_TREE);
13867
13868 tree tmp1_addr = build_fold_addr_expr (tmp1_var);
13869 tree ldfsr = sparc_builtins[SPARC_BUILTIN_LDFSR];
13870 tree hold_ldfsr = build_call_expr (ldfsr, 1, tmp1_addr);
13871
13872 *hold = compound_expr (compound_expr (hold_stfsr, hold_mask), hold_ldfsr);
13873
13874 /* We reload the value of tmp1_var to clear the exceptions:
13875
13876 __builtin_load_fsr (&tmp1_var); */
13877
13878 *clear = build_call_expr (ldfsr, 1, tmp1_addr);
13879
13880 /* We generate the equivalent of feupdateenv (&fenv_var):
13881
13882 unsigned int tmp2_var;
13883 __builtin_store_fsr (&tmp2_var);
13884
13885 __builtin_load_fsr (&fenv_var);
13886
13887 if (SPARC_LOW_FE_EXCEPT_VALUES)
13888 tmp2_var >>= 5;
13889 __atomic_feraiseexcept ((int) tmp2_var); */
13890
13891 tree tmp2_var = create_tmp_var_raw (unsigned_type_node);
13892 TREE_ADDRESSABLE (tmp2_var) = 1;
13893 tree tmp2_addr = build_fold_addr_expr (tmp2_var);
13894 tree update_stfsr
13895 = build4 (TARGET_EXPR, unsigned_type_node, tmp2_var,
13896 build_call_expr (stfsr, 1, tmp2_addr), NULL_TREE, NULL_TREE);
13897
13898 tree update_ldfsr = build_call_expr (ldfsr, 1, fenv_addr);
13899
13900 tree atomic_feraiseexcept
13901 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
13902 tree update_call
13903 = build_call_expr (atomic_feraiseexcept, 1,
13904 fold_convert (integer_type_node, tmp2_var));
13905
13906 if (SPARC_LOW_FE_EXCEPT_VALUES)
13907 {
13908 tree shifted_tmp2_var
13909 = build2 (RSHIFT_EXPR, unsigned_type_node, tmp2_var,
13910 build_int_cst (unsigned_type_node, 5));
13911 tree update_shift
13912 = build2 (MODIFY_EXPR, void_type_node, tmp2_var, shifted_tmp2_var);
13913 update_call = compound_expr (update_shift, update_call);
13914 }
13915
13916 *update
13917 = compound_expr (compound_expr (update_stfsr, update_ldfsr), update_call);
13918 }
13919
13920 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. Borrowed from the PA port.
13921
13922 SImode loads to floating-point registers are not zero-extended.
13923 The definition for LOAD_EXTEND_OP specifies that integer loads
13924 narrower than BITS_PER_WORD will be zero-extended. As a result,
13925 we inhibit changes from SImode unless they are to a mode that is
13926 identical in size.
13927
13928 Likewise for SFmode, since word-mode paradoxical subregs are
13929 problematic on big-endian architectures. */
13930
13931 static bool
sparc_can_change_mode_class(machine_mode from,machine_mode to,reg_class_t rclass)13932 sparc_can_change_mode_class (machine_mode from, machine_mode to,
13933 reg_class_t rclass)
13934 {
13935 if (TARGET_ARCH64
13936 && GET_MODE_SIZE (from) == 4
13937 && GET_MODE_SIZE (to) != 4)
13938 return !reg_classes_intersect_p (rclass, FP_REGS);
13939 return true;
13940 }
13941
13942 /* Implement TARGET_CONSTANT_ALIGNMENT. */
13943
13944 static HOST_WIDE_INT
sparc_constant_alignment(const_tree exp,HOST_WIDE_INT align)13945 sparc_constant_alignment (const_tree exp, HOST_WIDE_INT align)
13946 {
13947 if (TREE_CODE (exp) == STRING_CST)
13948 return MAX (align, FASTEST_ALIGNMENT);
13949 return align;
13950 }
13951
13952 #include "gt-sparc.h"
13953