1 /* Subroutines for insn-output.c for SPARC.
2 Copyright (C) 1987-2020 Free Software Foundation, Inc.
3 Contributed by Michael Tiemann (tiemann@cygnus.com)
4 64-bit SPARC-V9 support by Michael Tiemann, Jim Wilson, and Doug Evans,
5 at Cygnus Support.
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3, or (at your option)
12 any later version.
13
14 GCC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
22
23 #define IN_TARGET_CODE 1
24
25 #include "config.h"
26 #include "system.h"
27 #include "coretypes.h"
28 #include "backend.h"
29 #include "target.h"
30 #include "rtl.h"
31 #include "tree.h"
32 #include "memmodel.h"
33 #include "gimple.h"
34 #include "df.h"
35 #include "tm_p.h"
36 #include "stringpool.h"
37 #include "attribs.h"
38 #include "expmed.h"
39 #include "optabs.h"
40 #include "regs.h"
41 #include "emit-rtl.h"
42 #include "recog.h"
43 #include "diagnostic-core.h"
44 #include "alias.h"
45 #include "fold-const.h"
46 #include "stor-layout.h"
47 #include "calls.h"
48 #include "varasm.h"
49 #include "output.h"
50 #include "insn-attr.h"
51 #include "explow.h"
52 #include "expr.h"
53 #include "debug.h"
54 #include "cfgrtl.h"
55 #include "common/common-target.h"
56 #include "gimplify.h"
57 #include "langhooks.h"
58 #include "reload.h"
59 #include "tree-pass.h"
60 #include "context.h"
61 #include "builtins.h"
62 #include "tree-vector-builder.h"
63 #include "opts.h"
64
65 /* This file should be included last. */
66 #include "target-def.h"
67
68 /* Processor costs */
69
70 struct processor_costs {
71 /* Integer load */
72 const int int_load;
73
74 /* Integer signed load */
75 const int int_sload;
76
77 /* Integer zeroed load */
78 const int int_zload;
79
80 /* Float load */
81 const int float_load;
82
83 /* fmov, fneg, fabs */
84 const int float_move;
85
86 /* fadd, fsub */
87 const int float_plusminus;
88
89 /* fcmp */
90 const int float_cmp;
91
92 /* fmov, fmovr */
93 const int float_cmove;
94
95 /* fmul */
96 const int float_mul;
97
98 /* fdivs */
99 const int float_div_sf;
100
101 /* fdivd */
102 const int float_div_df;
103
104 /* fsqrts */
105 const int float_sqrt_sf;
106
107 /* fsqrtd */
108 const int float_sqrt_df;
109
110 /* umul/smul */
111 const int int_mul;
112
113 /* mulX */
114 const int int_mulX;
115
116 /* integer multiply cost for each bit set past the most
117 significant 3, so the formula for multiply cost becomes:
118
119 if (rs1 < 0)
120 highest_bit = highest_clear_bit(rs1);
121 else
122 highest_bit = highest_set_bit(rs1);
123 if (highest_bit < 3)
124 highest_bit = 3;
125 cost = int_mul{,X} + ((highest_bit - 3) / int_mul_bit_factor);
126
127 A value of zero indicates that the multiply costs is fixed,
128 and not variable. */
129 const int int_mul_bit_factor;
130
131 /* udiv/sdiv */
132 const int int_div;
133
134 /* divX */
135 const int int_divX;
136
137 /* movcc, movr */
138 const int int_cmove;
139
140 /* penalty for shifts, due to scheduling rules etc. */
141 const int shift_penalty;
142
143 /* cost of a (predictable) branch. */
144 const int branch_cost;
145 };
146
147 static const
148 struct processor_costs cypress_costs = {
149 COSTS_N_INSNS (2), /* int load */
150 COSTS_N_INSNS (2), /* int signed load */
151 COSTS_N_INSNS (2), /* int zeroed load */
152 COSTS_N_INSNS (2), /* float load */
153 COSTS_N_INSNS (5), /* fmov, fneg, fabs */
154 COSTS_N_INSNS (5), /* fadd, fsub */
155 COSTS_N_INSNS (1), /* fcmp */
156 COSTS_N_INSNS (1), /* fmov, fmovr */
157 COSTS_N_INSNS (7), /* fmul */
158 COSTS_N_INSNS (37), /* fdivs */
159 COSTS_N_INSNS (37), /* fdivd */
160 COSTS_N_INSNS (63), /* fsqrts */
161 COSTS_N_INSNS (63), /* fsqrtd */
162 COSTS_N_INSNS (1), /* imul */
163 COSTS_N_INSNS (1), /* imulX */
164 0, /* imul bit factor */
165 COSTS_N_INSNS (1), /* idiv */
166 COSTS_N_INSNS (1), /* idivX */
167 COSTS_N_INSNS (1), /* movcc/movr */
168 0, /* shift penalty */
169 3 /* branch cost */
170 };
171
172 static const
173 struct processor_costs supersparc_costs = {
174 COSTS_N_INSNS (1), /* int load */
175 COSTS_N_INSNS (1), /* int signed load */
176 COSTS_N_INSNS (1), /* int zeroed load */
177 COSTS_N_INSNS (0), /* float load */
178 COSTS_N_INSNS (3), /* fmov, fneg, fabs */
179 COSTS_N_INSNS (3), /* fadd, fsub */
180 COSTS_N_INSNS (3), /* fcmp */
181 COSTS_N_INSNS (1), /* fmov, fmovr */
182 COSTS_N_INSNS (3), /* fmul */
183 COSTS_N_INSNS (6), /* fdivs */
184 COSTS_N_INSNS (9), /* fdivd */
185 COSTS_N_INSNS (12), /* fsqrts */
186 COSTS_N_INSNS (12), /* fsqrtd */
187 COSTS_N_INSNS (4), /* imul */
188 COSTS_N_INSNS (4), /* imulX */
189 0, /* imul bit factor */
190 COSTS_N_INSNS (4), /* idiv */
191 COSTS_N_INSNS (4), /* idivX */
192 COSTS_N_INSNS (1), /* movcc/movr */
193 1, /* shift penalty */
194 3 /* branch cost */
195 };
196
197 static const
198 struct processor_costs hypersparc_costs = {
199 COSTS_N_INSNS (1), /* int load */
200 COSTS_N_INSNS (1), /* int signed load */
201 COSTS_N_INSNS (1), /* int zeroed load */
202 COSTS_N_INSNS (1), /* float load */
203 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
204 COSTS_N_INSNS (1), /* fadd, fsub */
205 COSTS_N_INSNS (1), /* fcmp */
206 COSTS_N_INSNS (1), /* fmov, fmovr */
207 COSTS_N_INSNS (1), /* fmul */
208 COSTS_N_INSNS (8), /* fdivs */
209 COSTS_N_INSNS (12), /* fdivd */
210 COSTS_N_INSNS (17), /* fsqrts */
211 COSTS_N_INSNS (17), /* fsqrtd */
212 COSTS_N_INSNS (17), /* imul */
213 COSTS_N_INSNS (17), /* imulX */
214 0, /* imul bit factor */
215 COSTS_N_INSNS (17), /* idiv */
216 COSTS_N_INSNS (17), /* idivX */
217 COSTS_N_INSNS (1), /* movcc/movr */
218 0, /* shift penalty */
219 3 /* branch cost */
220 };
221
222 static const
223 struct processor_costs leon_costs = {
224 COSTS_N_INSNS (1), /* int load */
225 COSTS_N_INSNS (1), /* int signed load */
226 COSTS_N_INSNS (1), /* int zeroed load */
227 COSTS_N_INSNS (1), /* float load */
228 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
229 COSTS_N_INSNS (1), /* fadd, fsub */
230 COSTS_N_INSNS (1), /* fcmp */
231 COSTS_N_INSNS (1), /* fmov, fmovr */
232 COSTS_N_INSNS (1), /* fmul */
233 COSTS_N_INSNS (15), /* fdivs */
234 COSTS_N_INSNS (15), /* fdivd */
235 COSTS_N_INSNS (23), /* fsqrts */
236 COSTS_N_INSNS (23), /* fsqrtd */
237 COSTS_N_INSNS (5), /* imul */
238 COSTS_N_INSNS (5), /* imulX */
239 0, /* imul bit factor */
240 COSTS_N_INSNS (5), /* idiv */
241 COSTS_N_INSNS (5), /* idivX */
242 COSTS_N_INSNS (1), /* movcc/movr */
243 0, /* shift penalty */
244 3 /* branch cost */
245 };
246
247 static const
248 struct processor_costs leon3_costs = {
249 COSTS_N_INSNS (1), /* int load */
250 COSTS_N_INSNS (1), /* int signed load */
251 COSTS_N_INSNS (1), /* int zeroed load */
252 COSTS_N_INSNS (1), /* float load */
253 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
254 COSTS_N_INSNS (1), /* fadd, fsub */
255 COSTS_N_INSNS (1), /* fcmp */
256 COSTS_N_INSNS (1), /* fmov, fmovr */
257 COSTS_N_INSNS (1), /* fmul */
258 COSTS_N_INSNS (14), /* fdivs */
259 COSTS_N_INSNS (15), /* fdivd */
260 COSTS_N_INSNS (22), /* fsqrts */
261 COSTS_N_INSNS (23), /* fsqrtd */
262 COSTS_N_INSNS (5), /* imul */
263 COSTS_N_INSNS (5), /* imulX */
264 0, /* imul bit factor */
265 COSTS_N_INSNS (35), /* idiv */
266 COSTS_N_INSNS (35), /* idivX */
267 COSTS_N_INSNS (1), /* movcc/movr */
268 0, /* shift penalty */
269 3 /* branch cost */
270 };
271
272 static const
273 struct processor_costs leon5_costs = {
274 COSTS_N_INSNS (1), /* int load */
275 COSTS_N_INSNS (1), /* int signed load */
276 COSTS_N_INSNS (1), /* int zeroed load */
277 COSTS_N_INSNS (1), /* float load */
278 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
279 COSTS_N_INSNS (1), /* fadd, fsub */
280 COSTS_N_INSNS (1), /* fcmp */
281 COSTS_N_INSNS (1), /* fmov, fmovr */
282 COSTS_N_INSNS (1), /* fmul */
283 COSTS_N_INSNS (17), /* fdivs */
284 COSTS_N_INSNS (18), /* fdivd */
285 COSTS_N_INSNS (25), /* fsqrts */
286 COSTS_N_INSNS (26), /* fsqrtd */
287 COSTS_N_INSNS (4), /* imul */
288 COSTS_N_INSNS (4), /* imulX */
289 0, /* imul bit factor */
290 COSTS_N_INSNS (35), /* idiv */
291 COSTS_N_INSNS (35), /* idivX */
292 COSTS_N_INSNS (1), /* movcc/movr */
293 0, /* shift penalty */
294 3 /* branch cost */
295 };
296
297 static const
298 struct processor_costs sparclet_costs = {
299 COSTS_N_INSNS (3), /* int load */
300 COSTS_N_INSNS (3), /* int signed load */
301 COSTS_N_INSNS (1), /* int zeroed load */
302 COSTS_N_INSNS (1), /* float load */
303 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
304 COSTS_N_INSNS (1), /* fadd, fsub */
305 COSTS_N_INSNS (1), /* fcmp */
306 COSTS_N_INSNS (1), /* fmov, fmovr */
307 COSTS_N_INSNS (1), /* fmul */
308 COSTS_N_INSNS (1), /* fdivs */
309 COSTS_N_INSNS (1), /* fdivd */
310 COSTS_N_INSNS (1), /* fsqrts */
311 COSTS_N_INSNS (1), /* fsqrtd */
312 COSTS_N_INSNS (5), /* imul */
313 COSTS_N_INSNS (5), /* imulX */
314 0, /* imul bit factor */
315 COSTS_N_INSNS (5), /* idiv */
316 COSTS_N_INSNS (5), /* idivX */
317 COSTS_N_INSNS (1), /* movcc/movr */
318 0, /* shift penalty */
319 3 /* branch cost */
320 };
321
322 static const
323 struct processor_costs ultrasparc_costs = {
324 COSTS_N_INSNS (2), /* int load */
325 COSTS_N_INSNS (3), /* int signed load */
326 COSTS_N_INSNS (2), /* int zeroed load */
327 COSTS_N_INSNS (2), /* float load */
328 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
329 COSTS_N_INSNS (4), /* fadd, fsub */
330 COSTS_N_INSNS (1), /* fcmp */
331 COSTS_N_INSNS (2), /* fmov, fmovr */
332 COSTS_N_INSNS (4), /* fmul */
333 COSTS_N_INSNS (13), /* fdivs */
334 COSTS_N_INSNS (23), /* fdivd */
335 COSTS_N_INSNS (13), /* fsqrts */
336 COSTS_N_INSNS (23), /* fsqrtd */
337 COSTS_N_INSNS (4), /* imul */
338 COSTS_N_INSNS (4), /* imulX */
339 2, /* imul bit factor */
340 COSTS_N_INSNS (37), /* idiv */
341 COSTS_N_INSNS (68), /* idivX */
342 COSTS_N_INSNS (2), /* movcc/movr */
343 2, /* shift penalty */
344 2 /* branch cost */
345 };
346
347 static const
348 struct processor_costs ultrasparc3_costs = {
349 COSTS_N_INSNS (2), /* int load */
350 COSTS_N_INSNS (3), /* int signed load */
351 COSTS_N_INSNS (3), /* int zeroed load */
352 COSTS_N_INSNS (2), /* float load */
353 COSTS_N_INSNS (3), /* fmov, fneg, fabs */
354 COSTS_N_INSNS (4), /* fadd, fsub */
355 COSTS_N_INSNS (5), /* fcmp */
356 COSTS_N_INSNS (3), /* fmov, fmovr */
357 COSTS_N_INSNS (4), /* fmul */
358 COSTS_N_INSNS (17), /* fdivs */
359 COSTS_N_INSNS (20), /* fdivd */
360 COSTS_N_INSNS (20), /* fsqrts */
361 COSTS_N_INSNS (29), /* fsqrtd */
362 COSTS_N_INSNS (6), /* imul */
363 COSTS_N_INSNS (6), /* imulX */
364 0, /* imul bit factor */
365 COSTS_N_INSNS (40), /* idiv */
366 COSTS_N_INSNS (71), /* idivX */
367 COSTS_N_INSNS (2), /* movcc/movr */
368 0, /* shift penalty */
369 2 /* branch cost */
370 };
371
372 static const
373 struct processor_costs niagara_costs = {
374 COSTS_N_INSNS (3), /* int load */
375 COSTS_N_INSNS (3), /* int signed load */
376 COSTS_N_INSNS (3), /* int zeroed load */
377 COSTS_N_INSNS (9), /* float load */
378 COSTS_N_INSNS (8), /* fmov, fneg, fabs */
379 COSTS_N_INSNS (8), /* fadd, fsub */
380 COSTS_N_INSNS (26), /* fcmp */
381 COSTS_N_INSNS (8), /* fmov, fmovr */
382 COSTS_N_INSNS (29), /* fmul */
383 COSTS_N_INSNS (54), /* fdivs */
384 COSTS_N_INSNS (83), /* fdivd */
385 COSTS_N_INSNS (100), /* fsqrts - not implemented in hardware */
386 COSTS_N_INSNS (100), /* fsqrtd - not implemented in hardware */
387 COSTS_N_INSNS (11), /* imul */
388 COSTS_N_INSNS (11), /* imulX */
389 0, /* imul bit factor */
390 COSTS_N_INSNS (72), /* idiv */
391 COSTS_N_INSNS (72), /* idivX */
392 COSTS_N_INSNS (1), /* movcc/movr */
393 0, /* shift penalty */
394 4 /* branch cost */
395 };
396
397 static const
398 struct processor_costs niagara2_costs = {
399 COSTS_N_INSNS (3), /* int load */
400 COSTS_N_INSNS (3), /* int signed load */
401 COSTS_N_INSNS (3), /* int zeroed load */
402 COSTS_N_INSNS (3), /* float load */
403 COSTS_N_INSNS (6), /* fmov, fneg, fabs */
404 COSTS_N_INSNS (6), /* fadd, fsub */
405 COSTS_N_INSNS (6), /* fcmp */
406 COSTS_N_INSNS (6), /* fmov, fmovr */
407 COSTS_N_INSNS (6), /* fmul */
408 COSTS_N_INSNS (19), /* fdivs */
409 COSTS_N_INSNS (33), /* fdivd */
410 COSTS_N_INSNS (19), /* fsqrts */
411 COSTS_N_INSNS (33), /* fsqrtd */
412 COSTS_N_INSNS (5), /* imul */
413 COSTS_N_INSNS (5), /* imulX */
414 0, /* imul bit factor */
415 COSTS_N_INSNS (26), /* idiv, average of 12 - 41 cycle range */
416 COSTS_N_INSNS (26), /* idivX, average of 12 - 41 cycle range */
417 COSTS_N_INSNS (1), /* movcc/movr */
418 0, /* shift penalty */
419 5 /* branch cost */
420 };
421
422 static const
423 struct processor_costs niagara3_costs = {
424 COSTS_N_INSNS (3), /* int load */
425 COSTS_N_INSNS (3), /* int signed load */
426 COSTS_N_INSNS (3), /* int zeroed load */
427 COSTS_N_INSNS (3), /* float load */
428 COSTS_N_INSNS (9), /* fmov, fneg, fabs */
429 COSTS_N_INSNS (9), /* fadd, fsub */
430 COSTS_N_INSNS (9), /* fcmp */
431 COSTS_N_INSNS (9), /* fmov, fmovr */
432 COSTS_N_INSNS (9), /* fmul */
433 COSTS_N_INSNS (23), /* fdivs */
434 COSTS_N_INSNS (37), /* fdivd */
435 COSTS_N_INSNS (23), /* fsqrts */
436 COSTS_N_INSNS (37), /* fsqrtd */
437 COSTS_N_INSNS (9), /* imul */
438 COSTS_N_INSNS (9), /* imulX */
439 0, /* imul bit factor */
440 COSTS_N_INSNS (31), /* idiv, average of 17 - 45 cycle range */
441 COSTS_N_INSNS (30), /* idivX, average of 16 - 44 cycle range */
442 COSTS_N_INSNS (1), /* movcc/movr */
443 0, /* shift penalty */
444 5 /* branch cost */
445 };
446
447 static const
448 struct processor_costs niagara4_costs = {
449 COSTS_N_INSNS (5), /* int load */
450 COSTS_N_INSNS (5), /* int signed load */
451 COSTS_N_INSNS (5), /* int zeroed load */
452 COSTS_N_INSNS (5), /* float load */
453 COSTS_N_INSNS (11), /* fmov, fneg, fabs */
454 COSTS_N_INSNS (11), /* fadd, fsub */
455 COSTS_N_INSNS (11), /* fcmp */
456 COSTS_N_INSNS (11), /* fmov, fmovr */
457 COSTS_N_INSNS (11), /* fmul */
458 COSTS_N_INSNS (24), /* fdivs */
459 COSTS_N_INSNS (37), /* fdivd */
460 COSTS_N_INSNS (24), /* fsqrts */
461 COSTS_N_INSNS (37), /* fsqrtd */
462 COSTS_N_INSNS (12), /* imul */
463 COSTS_N_INSNS (12), /* imulX */
464 0, /* imul bit factor */
465 COSTS_N_INSNS (50), /* idiv, average of 41 - 60 cycle range */
466 COSTS_N_INSNS (35), /* idivX, average of 26 - 44 cycle range */
467 COSTS_N_INSNS (1), /* movcc/movr */
468 0, /* shift penalty */
469 2 /* branch cost */
470 };
471
472 static const
473 struct processor_costs niagara7_costs = {
474 COSTS_N_INSNS (5), /* int load */
475 COSTS_N_INSNS (5), /* int signed load */
476 COSTS_N_INSNS (5), /* int zeroed load */
477 COSTS_N_INSNS (5), /* float load */
478 COSTS_N_INSNS (11), /* fmov, fneg, fabs */
479 COSTS_N_INSNS (11), /* fadd, fsub */
480 COSTS_N_INSNS (11), /* fcmp */
481 COSTS_N_INSNS (11), /* fmov, fmovr */
482 COSTS_N_INSNS (11), /* fmul */
483 COSTS_N_INSNS (24), /* fdivs */
484 COSTS_N_INSNS (37), /* fdivd */
485 COSTS_N_INSNS (24), /* fsqrts */
486 COSTS_N_INSNS (37), /* fsqrtd */
487 COSTS_N_INSNS (12), /* imul */
488 COSTS_N_INSNS (12), /* imulX */
489 0, /* imul bit factor */
490 COSTS_N_INSNS (51), /* idiv, average of 42 - 61 cycle range */
491 COSTS_N_INSNS (35), /* idivX, average of 26 - 44 cycle range */
492 COSTS_N_INSNS (1), /* movcc/movr */
493 0, /* shift penalty */
494 1 /* branch cost */
495 };
496
497 static const
498 struct processor_costs m8_costs = {
499 COSTS_N_INSNS (3), /* int load */
500 COSTS_N_INSNS (3), /* int signed load */
501 COSTS_N_INSNS (3), /* int zeroed load */
502 COSTS_N_INSNS (3), /* float load */
503 COSTS_N_INSNS (9), /* fmov, fneg, fabs */
504 COSTS_N_INSNS (9), /* fadd, fsub */
505 COSTS_N_INSNS (9), /* fcmp */
506 COSTS_N_INSNS (9), /* fmov, fmovr */
507 COSTS_N_INSNS (9), /* fmul */
508 COSTS_N_INSNS (26), /* fdivs */
509 COSTS_N_INSNS (30), /* fdivd */
510 COSTS_N_INSNS (33), /* fsqrts */
511 COSTS_N_INSNS (41), /* fsqrtd */
512 COSTS_N_INSNS (12), /* imul */
513 COSTS_N_INSNS (10), /* imulX */
514 0, /* imul bit factor */
515 COSTS_N_INSNS (57), /* udiv/sdiv */
516 COSTS_N_INSNS (30), /* udivx/sdivx */
517 COSTS_N_INSNS (1), /* movcc/movr */
518 0, /* shift penalty */
519 1 /* branch cost */
520 };
521
522 static const struct processor_costs *sparc_costs = &cypress_costs;
523
524 #ifdef HAVE_AS_RELAX_OPTION
525 /* If 'as' and 'ld' are relaxing tail call insns into branch always, use
526 "or %o7,%g0,X; call Y; or X,%g0,%o7" always, so that it can be optimized.
527 With sethi/jmp, neither 'as' nor 'ld' has an easy way how to find out if
528 somebody does not branch between the sethi and jmp. */
529 #define LEAF_SIBCALL_SLOT_RESERVED_P 1
530 #else
531 #define LEAF_SIBCALL_SLOT_RESERVED_P \
532 ((TARGET_ARCH64 && !TARGET_CM_MEDLOW) || flag_pic)
533 #endif
534
535 /* Vector to say how input registers are mapped to output registers.
536 HARD_FRAME_POINTER_REGNUM cannot be remapped by this function to
537 eliminate it. You must use -fomit-frame-pointer to get that. */
538 char leaf_reg_remap[] =
539 { 0, 1, 2, 3, 4, 5, 6, 7,
540 -1, -1, -1, -1, -1, -1, 14, -1,
541 -1, -1, -1, -1, -1, -1, -1, -1,
542 8, 9, 10, 11, 12, 13, -1, 15,
543
544 32, 33, 34, 35, 36, 37, 38, 39,
545 40, 41, 42, 43, 44, 45, 46, 47,
546 48, 49, 50, 51, 52, 53, 54, 55,
547 56, 57, 58, 59, 60, 61, 62, 63,
548 64, 65, 66, 67, 68, 69, 70, 71,
549 72, 73, 74, 75, 76, 77, 78, 79,
550 80, 81, 82, 83, 84, 85, 86, 87,
551 88, 89, 90, 91, 92, 93, 94, 95,
552 96, 97, 98, 99, 100, 101, 102};
553
554 /* Vector, indexed by hard register number, which contains 1
555 for a register that is allowable in a candidate for leaf
556 function treatment. */
557 char sparc_leaf_regs[] =
558 { 1, 1, 1, 1, 1, 1, 1, 1,
559 0, 0, 0, 0, 0, 0, 1, 0,
560 0, 0, 0, 0, 0, 0, 0, 0,
561 1, 1, 1, 1, 1, 1, 0, 1,
562 1, 1, 1, 1, 1, 1, 1, 1,
563 1, 1, 1, 1, 1, 1, 1, 1,
564 1, 1, 1, 1, 1, 1, 1, 1,
565 1, 1, 1, 1, 1, 1, 1, 1,
566 1, 1, 1, 1, 1, 1, 1, 1,
567 1, 1, 1, 1, 1, 1, 1, 1,
568 1, 1, 1, 1, 1, 1, 1, 1,
569 1, 1, 1, 1, 1, 1, 1, 1,
570 1, 1, 1, 1, 1, 1, 1};
571
572 struct GTY(()) machine_function
573 {
574 /* Size of the frame of the function. */
575 HOST_WIDE_INT frame_size;
576
577 /* Size of the frame of the function minus the register window save area
578 and the outgoing argument area. */
579 HOST_WIDE_INT apparent_frame_size;
580
581 /* Register we pretend the frame pointer is allocated to. Normally, this
582 is %fp, but if we are in a leaf procedure, this is (%sp + offset). We
583 record "offset" separately as it may be too big for (reg + disp). */
584 rtx frame_base_reg;
585 HOST_WIDE_INT frame_base_offset;
586
587 /* Number of global or FP registers to be saved (as 4-byte quantities). */
588 int n_global_fp_regs;
589
590 /* True if the current function is leaf and uses only leaf regs,
591 so that the SPARC leaf function optimization can be applied.
592 Private version of crtl->uses_only_leaf_regs, see
593 sparc_expand_prologue for the rationale. */
594 int leaf_function_p;
595
596 /* True if the prologue saves local or in registers. */
597 bool save_local_in_regs_p;
598
599 /* True if the data calculated by sparc_expand_prologue are valid. */
600 bool prologue_data_valid_p;
601 };
602
603 #define sparc_frame_size cfun->machine->frame_size
604 #define sparc_apparent_frame_size cfun->machine->apparent_frame_size
605 #define sparc_frame_base_reg cfun->machine->frame_base_reg
606 #define sparc_frame_base_offset cfun->machine->frame_base_offset
607 #define sparc_n_global_fp_regs cfun->machine->n_global_fp_regs
608 #define sparc_leaf_function_p cfun->machine->leaf_function_p
609 #define sparc_save_local_in_regs_p cfun->machine->save_local_in_regs_p
610 #define sparc_prologue_data_valid_p cfun->machine->prologue_data_valid_p
611
612 /* 1 if the next opcode is to be specially indented. */
613 int sparc_indent_opcode = 0;
614
615 static void sparc_option_override (void);
616 static void sparc_init_modes (void);
617 static int function_arg_slotno (const CUMULATIVE_ARGS *, machine_mode,
618 const_tree, bool, bool, int *, int *);
619
620 static int supersparc_adjust_cost (rtx_insn *, int, rtx_insn *, int);
621 static int hypersparc_adjust_cost (rtx_insn *, int, rtx_insn *, int);
622 static int leon5_adjust_cost (rtx_insn *, int, rtx_insn *, int);
623
624 static void sparc_emit_set_const32 (rtx, rtx);
625 static void sparc_emit_set_const64 (rtx, rtx);
626 static void sparc_output_addr_vec (rtx);
627 static void sparc_output_addr_diff_vec (rtx);
628 static void sparc_output_deferred_case_vectors (void);
629 static bool sparc_legitimate_address_p (machine_mode, rtx, bool);
630 static bool sparc_legitimate_constant_p (machine_mode, rtx);
631 static rtx sparc_builtin_saveregs (void);
632 static int epilogue_renumber (rtx *, int);
633 static bool sparc_assemble_integer (rtx, unsigned int, int);
634 static int set_extends (rtx_insn *);
635 static void sparc_asm_function_prologue (FILE *);
636 static void sparc_asm_function_epilogue (FILE *);
637 #ifdef TARGET_SOLARIS
638 static void sparc_solaris_elf_asm_named_section (const char *, unsigned int,
639 tree) ATTRIBUTE_UNUSED;
640 #endif
641 static int sparc_adjust_cost (rtx_insn *, int, rtx_insn *, int, unsigned int);
642 static int sparc_issue_rate (void);
643 static void sparc_sched_init (FILE *, int, int);
644 static int sparc_use_sched_lookahead (void);
645
646 static void emit_soft_tfmode_libcall (const char *, int, rtx *);
647 static void emit_soft_tfmode_binop (enum rtx_code, rtx *);
648 static void emit_soft_tfmode_unop (enum rtx_code, rtx *);
649 static void emit_soft_tfmode_cvt (enum rtx_code, rtx *);
650 static void emit_hard_tfmode_operation (enum rtx_code, rtx *);
651
652 static bool sparc_function_ok_for_sibcall (tree, tree);
653 static void sparc_init_libfuncs (void);
654 static void sparc_init_builtins (void);
655 static void sparc_fpu_init_builtins (void);
656 static void sparc_vis_init_builtins (void);
657 static tree sparc_builtin_decl (unsigned, bool);
658 static rtx sparc_expand_builtin (tree, rtx, rtx, machine_mode, int);
659 static tree sparc_fold_builtin (tree, int, tree *, bool);
660 static void sparc_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
661 HOST_WIDE_INT, tree);
662 static bool sparc_can_output_mi_thunk (const_tree, HOST_WIDE_INT,
663 HOST_WIDE_INT, const_tree);
664 static struct machine_function * sparc_init_machine_status (void);
665 static bool sparc_cannot_force_const_mem (machine_mode, rtx);
666 static rtx sparc_tls_get_addr (void);
667 static rtx sparc_tls_got (void);
668 static int sparc_register_move_cost (machine_mode,
669 reg_class_t, reg_class_t);
670 static bool sparc_rtx_costs (rtx, machine_mode, int, int, int *, bool);
671 static machine_mode sparc_promote_function_mode (const_tree, machine_mode,
672 int *, const_tree, int);
673 static bool sparc_strict_argument_naming (cumulative_args_t);
674 static void sparc_va_start (tree, rtx);
675 static tree sparc_gimplify_va_arg (tree, tree, gimple_seq *, gimple_seq *);
676 static bool sparc_vector_mode_supported_p (machine_mode);
677 static bool sparc_tls_referenced_p (rtx);
678 static rtx sparc_legitimize_tls_address (rtx);
679 static rtx sparc_legitimize_pic_address (rtx, rtx);
680 static rtx sparc_legitimize_address (rtx, rtx, machine_mode);
681 static rtx sparc_delegitimize_address (rtx);
682 static bool sparc_mode_dependent_address_p (const_rtx, addr_space_t);
683 static bool sparc_pass_by_reference (cumulative_args_t,
684 const function_arg_info &);
685 static void sparc_function_arg_advance (cumulative_args_t,
686 const function_arg_info &);
687 static rtx sparc_function_arg (cumulative_args_t, const function_arg_info &);
688 static rtx sparc_function_incoming_arg (cumulative_args_t,
689 const function_arg_info &);
690 static pad_direction sparc_function_arg_padding (machine_mode, const_tree);
691 static unsigned int sparc_function_arg_boundary (machine_mode,
692 const_tree);
693 static int sparc_arg_partial_bytes (cumulative_args_t,
694 const function_arg_info &);
695 static bool sparc_return_in_memory (const_tree, const_tree);
696 static rtx sparc_struct_value_rtx (tree, int);
697 static rtx sparc_function_value (const_tree, const_tree, bool);
698 static rtx sparc_libcall_value (machine_mode, const_rtx);
699 static bool sparc_function_value_regno_p (const unsigned int);
700 static unsigned HOST_WIDE_INT sparc_asan_shadow_offset (void);
701 static void sparc_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
702 static void sparc_file_end (void);
703 static bool sparc_frame_pointer_required (void);
704 static bool sparc_can_eliminate (const int, const int);
705 static void sparc_conditional_register_usage (void);
706 static bool sparc_use_pseudo_pic_reg (void);
707 static void sparc_init_pic_reg (void);
708 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
709 static const char *sparc_mangle_type (const_tree);
710 #endif
711 static void sparc_trampoline_init (rtx, tree, rtx);
712 static machine_mode sparc_preferred_simd_mode (scalar_mode);
713 static reg_class_t sparc_preferred_reload_class (rtx x, reg_class_t rclass);
714 static bool sparc_lra_p (void);
715 static bool sparc_print_operand_punct_valid_p (unsigned char);
716 static void sparc_print_operand (FILE *, rtx, int);
717 static void sparc_print_operand_address (FILE *, machine_mode, rtx);
718 static reg_class_t sparc_secondary_reload (bool, rtx, reg_class_t,
719 machine_mode,
720 secondary_reload_info *);
721 static bool sparc_secondary_memory_needed (machine_mode, reg_class_t,
722 reg_class_t);
723 static machine_mode sparc_secondary_memory_needed_mode (machine_mode);
724 static scalar_int_mode sparc_cstore_mode (enum insn_code icode);
725 static void sparc_atomic_assign_expand_fenv (tree *, tree *, tree *);
726 static bool sparc_fixed_condition_code_regs (unsigned int *, unsigned int *);
727 static unsigned int sparc_min_arithmetic_precision (void);
728 static unsigned int sparc_hard_regno_nregs (unsigned int, machine_mode);
729 static bool sparc_hard_regno_mode_ok (unsigned int, machine_mode);
730 static bool sparc_modes_tieable_p (machine_mode, machine_mode);
731 static bool sparc_can_change_mode_class (machine_mode, machine_mode,
732 reg_class_t);
733 static HOST_WIDE_INT sparc_constant_alignment (const_tree, HOST_WIDE_INT);
734 static bool sparc_vectorize_vec_perm_const (machine_mode, rtx, rtx, rtx,
735 const vec_perm_indices &);
736 static bool sparc_can_follow_jump (const rtx_insn *, const rtx_insn *);
737
738 #ifdef SUBTARGET_ATTRIBUTE_TABLE
739 /* Table of valid machine attributes. */
740 static const struct attribute_spec sparc_attribute_table[] =
741 {
742 /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
743 do_diagnostic, handler, exclude } */
744 SUBTARGET_ATTRIBUTE_TABLE,
745 { NULL, 0, 0, false, false, false, false, NULL, NULL }
746 };
747 #endif
748
749 char sparc_hard_reg_printed[8];
750
751 /* Initialize the GCC target structure. */
752
753 /* The default is to use .half rather than .short for aligned HI objects. */
754 #undef TARGET_ASM_ALIGNED_HI_OP
755 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
756
757 #undef TARGET_ASM_UNALIGNED_HI_OP
758 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uahalf\t"
759 #undef TARGET_ASM_UNALIGNED_SI_OP
760 #define TARGET_ASM_UNALIGNED_SI_OP "\t.uaword\t"
761 #undef TARGET_ASM_UNALIGNED_DI_OP
762 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaxword\t"
763
764 /* The target hook has to handle DI-mode values. */
765 #undef TARGET_ASM_INTEGER
766 #define TARGET_ASM_INTEGER sparc_assemble_integer
767
768 #undef TARGET_ASM_FUNCTION_PROLOGUE
769 #define TARGET_ASM_FUNCTION_PROLOGUE sparc_asm_function_prologue
770 #undef TARGET_ASM_FUNCTION_EPILOGUE
771 #define TARGET_ASM_FUNCTION_EPILOGUE sparc_asm_function_epilogue
772
773 #undef TARGET_SCHED_ADJUST_COST
774 #define TARGET_SCHED_ADJUST_COST sparc_adjust_cost
775 #undef TARGET_SCHED_ISSUE_RATE
776 #define TARGET_SCHED_ISSUE_RATE sparc_issue_rate
777 #undef TARGET_SCHED_INIT
778 #define TARGET_SCHED_INIT sparc_sched_init
779 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
780 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD sparc_use_sched_lookahead
781
782 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
783 #define TARGET_FUNCTION_OK_FOR_SIBCALL sparc_function_ok_for_sibcall
784
785 #undef TARGET_INIT_LIBFUNCS
786 #define TARGET_INIT_LIBFUNCS sparc_init_libfuncs
787
788 #undef TARGET_LEGITIMIZE_ADDRESS
789 #define TARGET_LEGITIMIZE_ADDRESS sparc_legitimize_address
790 #undef TARGET_DELEGITIMIZE_ADDRESS
791 #define TARGET_DELEGITIMIZE_ADDRESS sparc_delegitimize_address
792 #undef TARGET_MODE_DEPENDENT_ADDRESS_P
793 #define TARGET_MODE_DEPENDENT_ADDRESS_P sparc_mode_dependent_address_p
794
795 #undef TARGET_INIT_BUILTINS
796 #define TARGET_INIT_BUILTINS sparc_init_builtins
797 #undef TARGET_BUILTIN_DECL
798 #define TARGET_BUILTIN_DECL sparc_builtin_decl
799 #undef TARGET_EXPAND_BUILTIN
800 #define TARGET_EXPAND_BUILTIN sparc_expand_builtin
801 #undef TARGET_FOLD_BUILTIN
802 #define TARGET_FOLD_BUILTIN sparc_fold_builtin
803
804 #if TARGET_TLS
805 #undef TARGET_HAVE_TLS
806 #define TARGET_HAVE_TLS true
807 #endif
808
809 #undef TARGET_CANNOT_FORCE_CONST_MEM
810 #define TARGET_CANNOT_FORCE_CONST_MEM sparc_cannot_force_const_mem
811
812 #undef TARGET_ASM_OUTPUT_MI_THUNK
813 #define TARGET_ASM_OUTPUT_MI_THUNK sparc_output_mi_thunk
814 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
815 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK sparc_can_output_mi_thunk
816
817 #undef TARGET_RTX_COSTS
818 #define TARGET_RTX_COSTS sparc_rtx_costs
819 #undef TARGET_ADDRESS_COST
820 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
821 #undef TARGET_REGISTER_MOVE_COST
822 #define TARGET_REGISTER_MOVE_COST sparc_register_move_cost
823
824 #undef TARGET_PROMOTE_FUNCTION_MODE
825 #define TARGET_PROMOTE_FUNCTION_MODE sparc_promote_function_mode
826 #undef TARGET_STRICT_ARGUMENT_NAMING
827 #define TARGET_STRICT_ARGUMENT_NAMING sparc_strict_argument_naming
828
829 #undef TARGET_MUST_PASS_IN_STACK
830 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
831 #undef TARGET_PASS_BY_REFERENCE
832 #define TARGET_PASS_BY_REFERENCE sparc_pass_by_reference
833 #undef TARGET_ARG_PARTIAL_BYTES
834 #define TARGET_ARG_PARTIAL_BYTES sparc_arg_partial_bytes
835 #undef TARGET_FUNCTION_ARG_ADVANCE
836 #define TARGET_FUNCTION_ARG_ADVANCE sparc_function_arg_advance
837 #undef TARGET_FUNCTION_ARG
838 #define TARGET_FUNCTION_ARG sparc_function_arg
839 #undef TARGET_FUNCTION_INCOMING_ARG
840 #define TARGET_FUNCTION_INCOMING_ARG sparc_function_incoming_arg
841 #undef TARGET_FUNCTION_ARG_PADDING
842 #define TARGET_FUNCTION_ARG_PADDING sparc_function_arg_padding
843 #undef TARGET_FUNCTION_ARG_BOUNDARY
844 #define TARGET_FUNCTION_ARG_BOUNDARY sparc_function_arg_boundary
845
846 #undef TARGET_RETURN_IN_MEMORY
847 #define TARGET_RETURN_IN_MEMORY sparc_return_in_memory
848 #undef TARGET_STRUCT_VALUE_RTX
849 #define TARGET_STRUCT_VALUE_RTX sparc_struct_value_rtx
850 #undef TARGET_FUNCTION_VALUE
851 #define TARGET_FUNCTION_VALUE sparc_function_value
852 #undef TARGET_LIBCALL_VALUE
853 #define TARGET_LIBCALL_VALUE sparc_libcall_value
854 #undef TARGET_FUNCTION_VALUE_REGNO_P
855 #define TARGET_FUNCTION_VALUE_REGNO_P sparc_function_value_regno_p
856
857 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
858 #define TARGET_EXPAND_BUILTIN_SAVEREGS sparc_builtin_saveregs
859
860 #undef TARGET_ASAN_SHADOW_OFFSET
861 #define TARGET_ASAN_SHADOW_OFFSET sparc_asan_shadow_offset
862
863 #undef TARGET_EXPAND_BUILTIN_VA_START
864 #define TARGET_EXPAND_BUILTIN_VA_START sparc_va_start
865 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
866 #define TARGET_GIMPLIFY_VA_ARG_EXPR sparc_gimplify_va_arg
867
868 #undef TARGET_VECTOR_MODE_SUPPORTED_P
869 #define TARGET_VECTOR_MODE_SUPPORTED_P sparc_vector_mode_supported_p
870
871 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
872 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE sparc_preferred_simd_mode
873
874 #ifdef SUBTARGET_INSERT_ATTRIBUTES
875 #undef TARGET_INSERT_ATTRIBUTES
876 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
877 #endif
878
879 #ifdef SUBTARGET_ATTRIBUTE_TABLE
880 #undef TARGET_ATTRIBUTE_TABLE
881 #define TARGET_ATTRIBUTE_TABLE sparc_attribute_table
882 #endif
883
884 #undef TARGET_OPTION_OVERRIDE
885 #define TARGET_OPTION_OVERRIDE sparc_option_override
886
887 #ifdef TARGET_THREAD_SSP_OFFSET
888 #undef TARGET_STACK_PROTECT_GUARD
889 #define TARGET_STACK_PROTECT_GUARD hook_tree_void_null
890 #endif
891
892 #if TARGET_GNU_TLS && defined(HAVE_AS_SPARC_UA_PCREL)
893 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
894 #define TARGET_ASM_OUTPUT_DWARF_DTPREL sparc_output_dwarf_dtprel
895 #endif
896
897 #undef TARGET_ASM_FILE_END
898 #define TARGET_ASM_FILE_END sparc_file_end
899
900 #undef TARGET_FRAME_POINTER_REQUIRED
901 #define TARGET_FRAME_POINTER_REQUIRED sparc_frame_pointer_required
902
903 #undef TARGET_CAN_ELIMINATE
904 #define TARGET_CAN_ELIMINATE sparc_can_eliminate
905
906 #undef TARGET_PREFERRED_RELOAD_CLASS
907 #define TARGET_PREFERRED_RELOAD_CLASS sparc_preferred_reload_class
908
909 #undef TARGET_SECONDARY_RELOAD
910 #define TARGET_SECONDARY_RELOAD sparc_secondary_reload
911 #undef TARGET_SECONDARY_MEMORY_NEEDED
912 #define TARGET_SECONDARY_MEMORY_NEEDED sparc_secondary_memory_needed
913 #undef TARGET_SECONDARY_MEMORY_NEEDED_MODE
914 #define TARGET_SECONDARY_MEMORY_NEEDED_MODE sparc_secondary_memory_needed_mode
915
916 #undef TARGET_CONDITIONAL_REGISTER_USAGE
917 #define TARGET_CONDITIONAL_REGISTER_USAGE sparc_conditional_register_usage
918
919 #undef TARGET_INIT_PIC_REG
920 #define TARGET_INIT_PIC_REG sparc_init_pic_reg
921
922 #undef TARGET_USE_PSEUDO_PIC_REG
923 #define TARGET_USE_PSEUDO_PIC_REG sparc_use_pseudo_pic_reg
924
925 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
926 #undef TARGET_MANGLE_TYPE
927 #define TARGET_MANGLE_TYPE sparc_mangle_type
928 #endif
929
930 #undef TARGET_LRA_P
931 #define TARGET_LRA_P sparc_lra_p
932
933 #undef TARGET_LEGITIMATE_ADDRESS_P
934 #define TARGET_LEGITIMATE_ADDRESS_P sparc_legitimate_address_p
935
936 #undef TARGET_LEGITIMATE_CONSTANT_P
937 #define TARGET_LEGITIMATE_CONSTANT_P sparc_legitimate_constant_p
938
939 #undef TARGET_TRAMPOLINE_INIT
940 #define TARGET_TRAMPOLINE_INIT sparc_trampoline_init
941
942 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
943 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P sparc_print_operand_punct_valid_p
944 #undef TARGET_PRINT_OPERAND
945 #define TARGET_PRINT_OPERAND sparc_print_operand
946 #undef TARGET_PRINT_OPERAND_ADDRESS
947 #define TARGET_PRINT_OPERAND_ADDRESS sparc_print_operand_address
948
949 /* The value stored by LDSTUB. */
950 #undef TARGET_ATOMIC_TEST_AND_SET_TRUEVAL
951 #define TARGET_ATOMIC_TEST_AND_SET_TRUEVAL 0xff
952
953 #undef TARGET_CSTORE_MODE
954 #define TARGET_CSTORE_MODE sparc_cstore_mode
955
956 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
957 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV sparc_atomic_assign_expand_fenv
958
959 #undef TARGET_FIXED_CONDITION_CODE_REGS
960 #define TARGET_FIXED_CONDITION_CODE_REGS sparc_fixed_condition_code_regs
961
962 #undef TARGET_MIN_ARITHMETIC_PRECISION
963 #define TARGET_MIN_ARITHMETIC_PRECISION sparc_min_arithmetic_precision
964
965 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
966 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
967
968 #undef TARGET_HARD_REGNO_NREGS
969 #define TARGET_HARD_REGNO_NREGS sparc_hard_regno_nregs
970 #undef TARGET_HARD_REGNO_MODE_OK
971 #define TARGET_HARD_REGNO_MODE_OK sparc_hard_regno_mode_ok
972
973 #undef TARGET_MODES_TIEABLE_P
974 #define TARGET_MODES_TIEABLE_P sparc_modes_tieable_p
975
976 #undef TARGET_CAN_CHANGE_MODE_CLASS
977 #define TARGET_CAN_CHANGE_MODE_CLASS sparc_can_change_mode_class
978
979 #undef TARGET_CONSTANT_ALIGNMENT
980 #define TARGET_CONSTANT_ALIGNMENT sparc_constant_alignment
981
982 #undef TARGET_VECTORIZE_VEC_PERM_CONST
983 #define TARGET_VECTORIZE_VEC_PERM_CONST sparc_vectorize_vec_perm_const
984
985 #undef TARGET_CAN_FOLLOW_JUMP
986 #define TARGET_CAN_FOLLOW_JUMP sparc_can_follow_jump
987
988 struct gcc_target targetm = TARGET_INITIALIZER;
989
990 /* Return the memory reference contained in X if any, zero otherwise. */
991
992 static rtx
mem_ref(rtx x)993 mem_ref (rtx x)
994 {
995 if (GET_CODE (x) == SIGN_EXTEND || GET_CODE (x) == ZERO_EXTEND)
996 x = XEXP (x, 0);
997
998 if (MEM_P (x))
999 return x;
1000
1001 return NULL_RTX;
1002 }
1003
1004 /* True if any of INSN's source register(s) is REG. */
1005
1006 static bool
insn_uses_reg_p(rtx_insn * insn,unsigned int reg)1007 insn_uses_reg_p (rtx_insn *insn, unsigned int reg)
1008 {
1009 extract_insn (insn);
1010 return ((REG_P (recog_data.operand[1])
1011 && REGNO (recog_data.operand[1]) == reg)
1012 || (recog_data.n_operands == 3
1013 && REG_P (recog_data.operand[2])
1014 && REGNO (recog_data.operand[2]) == reg));
1015 }
1016
1017 /* True if INSN is a floating-point division or square-root. */
1018
1019 static bool
div_sqrt_insn_p(rtx_insn * insn)1020 div_sqrt_insn_p (rtx_insn *insn)
1021 {
1022 if (GET_CODE (PATTERN (insn)) != SET)
1023 return false;
1024
1025 switch (get_attr_type (insn))
1026 {
1027 case TYPE_FPDIVS:
1028 case TYPE_FPSQRTS:
1029 case TYPE_FPDIVD:
1030 case TYPE_FPSQRTD:
1031 return true;
1032 default:
1033 return false;
1034 }
1035 }
1036
1037 /* True if INSN is a floating-point instruction. */
1038
1039 static bool
fpop_insn_p(rtx_insn * insn)1040 fpop_insn_p (rtx_insn *insn)
1041 {
1042 if (GET_CODE (PATTERN (insn)) != SET)
1043 return false;
1044
1045 switch (get_attr_type (insn))
1046 {
1047 case TYPE_FPMOVE:
1048 case TYPE_FPCMOVE:
1049 case TYPE_FP:
1050 case TYPE_FPCMP:
1051 case TYPE_FPMUL:
1052 case TYPE_FPDIVS:
1053 case TYPE_FPSQRTS:
1054 case TYPE_FPDIVD:
1055 case TYPE_FPSQRTD:
1056 return true;
1057 default:
1058 return false;
1059 }
1060 }
1061
1062 /* True if INSN is an atomic instruction. */
1063
1064 static bool
atomic_insn_for_leon3_p(rtx_insn * insn)1065 atomic_insn_for_leon3_p (rtx_insn *insn)
1066 {
1067 switch (INSN_CODE (insn))
1068 {
1069 case CODE_FOR_swapsi:
1070 case CODE_FOR_ldstub:
1071 case CODE_FOR_atomic_compare_and_swap_leon3_1:
1072 return true;
1073 default:
1074 return false;
1075 }
1076 }
1077
1078 /* True if INSN is a store instruction. */
1079
1080 static bool
store_insn_p(rtx_insn * insn)1081 store_insn_p (rtx_insn *insn)
1082 {
1083 if (GET_CODE (PATTERN (insn)) != SET)
1084 return false;
1085
1086 switch (get_attr_type (insn))
1087 {
1088 case TYPE_STORE:
1089 case TYPE_FPSTORE:
1090 return true;
1091 default:
1092 return false;
1093 }
1094 }
1095
1096 /* True if INSN is a load instruction. */
1097
1098 static bool
load_insn_p(rtx_insn * insn)1099 load_insn_p (rtx_insn *insn)
1100 {
1101 if (GET_CODE (PATTERN (insn)) != SET)
1102 return false;
1103
1104 switch (get_attr_type (insn))
1105 {
1106 case TYPE_LOAD:
1107 case TYPE_SLOAD:
1108 case TYPE_FPLOAD:
1109 return true;
1110 default:
1111 return false;
1112 }
1113 }
1114
1115 /* We use a machine specific pass to enable workarounds for errata.
1116
1117 We need to have the (essentially) final form of the insn stream in order
1118 to properly detect the various hazards. Therefore, this machine specific
1119 pass runs as late as possible. */
1120
1121 /* True if INSN is a md pattern or asm statement. */
1122 #define USEFUL_INSN_P(INSN) \
1123 (NONDEBUG_INSN_P (INSN) \
1124 && GET_CODE (PATTERN (INSN)) != USE \
1125 && GET_CODE (PATTERN (INSN)) != CLOBBER)
1126
1127 rtx_insn *
next_active_non_empty_insn(rtx_insn * insn)1128 next_active_non_empty_insn (rtx_insn *insn)
1129 {
1130 insn = next_active_insn (insn);
1131
1132 while (insn
1133 && (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
1134 || GET_CODE (PATTERN (insn)) == ASM_INPUT
1135 || (USEFUL_INSN_P (insn)
1136 && (asm_noperands (PATTERN (insn)) >= 0)
1137 && !strcmp (decode_asm_operands (PATTERN (insn),
1138 NULL, NULL, NULL,
1139 NULL, NULL), ""))))
1140 insn = next_active_insn (insn);
1141
1142 return insn;
1143 }
1144
1145 static unsigned int
sparc_do_work_around_errata(void)1146 sparc_do_work_around_errata (void)
1147 {
1148 rtx_insn *insn, *next;
1149 bool find_first_useful = true;
1150
1151 /* Force all instructions to be split into their final form. */
1152 split_all_insns_noflow ();
1153
1154 /* Now look for specific patterns in the insn stream. */
1155 for (insn = get_insns (); insn; insn = next)
1156 {
1157 bool insert_nop = false;
1158 rtx set;
1159 rtx_insn *jump;
1160 rtx_sequence *seq;
1161
1162 /* Look into the instruction in a delay slot. */
1163 if (NONJUMP_INSN_P (insn)
1164 && (seq = dyn_cast <rtx_sequence *> (PATTERN (insn))))
1165 {
1166 jump = seq->insn (0);
1167 insn = seq->insn (1);
1168 }
1169 else if (JUMP_P (insn))
1170 jump = insn;
1171 else
1172 jump = NULL;
1173
1174 /* Do not begin function with atomic instruction. */
1175 if (sparc_fix_ut700
1176 && find_first_useful
1177 && USEFUL_INSN_P (insn))
1178 {
1179 find_first_useful = false;
1180 if (atomic_insn_for_leon3_p (insn))
1181 emit_insn_before (gen_nop (), insn);
1182 }
1183
1184 /* Place a NOP at the branch target of an integer branch if it is a
1185 floating-point operation or a floating-point branch. */
1186 if (sparc_fix_gr712rc
1187 && jump
1188 && jump_to_label_p (jump)
1189 && get_attr_branch_type (jump) == BRANCH_TYPE_ICC)
1190 {
1191 rtx_insn *target = next_active_insn (JUMP_LABEL_AS_INSN (jump));
1192 if (target
1193 && (fpop_insn_p (target)
1194 || (JUMP_P (target)
1195 && get_attr_branch_type (target) == BRANCH_TYPE_FCC)))
1196 emit_insn_before (gen_nop (), target);
1197 }
1198
1199 /* Insert a NOP between load instruction and atomic instruction. Insert
1200 a NOP at branch target if there is a load in delay slot and an atomic
1201 instruction at branch target. */
1202 if (sparc_fix_ut700
1203 && NONJUMP_INSN_P (insn)
1204 && load_insn_p (insn))
1205 {
1206 if (jump && jump_to_label_p (jump))
1207 {
1208 rtx_insn *target = next_active_insn (JUMP_LABEL_AS_INSN (jump));
1209 if (target && atomic_insn_for_leon3_p (target))
1210 emit_insn_before (gen_nop (), target);
1211 }
1212
1213 next = next_active_non_empty_insn (insn);
1214 if (!next)
1215 break;
1216
1217 if (atomic_insn_for_leon3_p (next))
1218 insert_nop = true;
1219 }
1220
1221 /* Look for a sequence that starts with a fdiv or fsqrt instruction and
1222 ends with another fdiv or fsqrt instruction with no dependencies on
1223 the former, along with an appropriate pattern in between. */
1224 if (sparc_fix_lost_divsqrt
1225 && NONJUMP_INSN_P (insn)
1226 && div_sqrt_insn_p (insn))
1227 {
1228 int i;
1229 int fp_found = 0;
1230 rtx_insn *after;
1231
1232 const unsigned int dest_reg = REGNO (SET_DEST (single_set (insn)));
1233
1234 next = next_active_insn (insn);
1235 if (!next)
1236 break;
1237
1238 for (after = next, i = 0; i < 4; i++)
1239 {
1240 /* Count floating-point operations. */
1241 if (i != 3 && fpop_insn_p (after))
1242 {
1243 /* If the insn uses the destination register of
1244 the div/sqrt, then it cannot be problematic. */
1245 if (insn_uses_reg_p (after, dest_reg))
1246 break;
1247 fp_found++;
1248 }
1249
1250 /* Count floating-point loads. */
1251 if (i != 3
1252 && (set = single_set (after)) != NULL_RTX
1253 && REG_P (SET_DEST (set))
1254 && REGNO (SET_DEST (set)) > 31)
1255 {
1256 /* If the insn uses the destination register of
1257 the div/sqrt, then it cannot be problematic. */
1258 if (REGNO (SET_DEST (set)) == dest_reg)
1259 break;
1260 fp_found++;
1261 }
1262
1263 /* Check if this is a problematic sequence. */
1264 if (i > 1
1265 && fp_found >= 2
1266 && div_sqrt_insn_p (after))
1267 {
1268 /* If this is the short version of the problematic
1269 sequence we add two NOPs in a row to also prevent
1270 the long version. */
1271 if (i == 2)
1272 emit_insn_before (gen_nop (), next);
1273 insert_nop = true;
1274 break;
1275 }
1276
1277 /* No need to scan past a second div/sqrt. */
1278 if (div_sqrt_insn_p (after))
1279 break;
1280
1281 /* Insert NOP before branch. */
1282 if (i < 3
1283 && (!NONJUMP_INSN_P (after)
1284 || GET_CODE (PATTERN (after)) == SEQUENCE))
1285 {
1286 insert_nop = true;
1287 break;
1288 }
1289
1290 after = next_active_insn (after);
1291 if (!after)
1292 break;
1293 }
1294 }
1295
1296 /* Look for either of these two sequences:
1297
1298 Sequence A:
1299 1. store of word size or less (e.g. st / stb / sth / stf)
1300 2. any single instruction that is not a load or store
1301 3. any store instruction (e.g. st / stb / sth / stf / std / stdf)
1302
1303 Sequence B:
1304 1. store of double word size (e.g. std / stdf)
1305 2. any store instruction (e.g. st / stb / sth / stf / std / stdf) */
1306 if (sparc_fix_b2bst
1307 && NONJUMP_INSN_P (insn)
1308 && (set = single_set (insn)) != NULL_RTX
1309 && store_insn_p (insn))
1310 {
1311 /* Sequence B begins with a double-word store. */
1312 bool seq_b = GET_MODE_SIZE (GET_MODE (SET_DEST (set))) == 8;
1313 rtx_insn *after;
1314 int i;
1315
1316 next = next_active_non_empty_insn (insn);
1317 if (!next)
1318 break;
1319
1320 for (after = next, i = 0; i < 2; i++)
1321 {
1322 /* If the insn is a branch, then it cannot be problematic. */
1323 if (!NONJUMP_INSN_P (after)
1324 || GET_CODE (PATTERN (after)) == SEQUENCE)
1325 break;
1326
1327 /* Sequence B is only two instructions long. */
1328 if (seq_b)
1329 {
1330 /* Add NOP if followed by a store. */
1331 if (store_insn_p (after))
1332 insert_nop = true;
1333
1334 /* Otherwise it is ok. */
1335 break;
1336 }
1337
1338 /* If the second instruction is a load or a store,
1339 then the sequence cannot be problematic. */
1340 if (i == 0)
1341 {
1342 if ((set = single_set (after)) != NULL_RTX
1343 && (MEM_P (SET_DEST (set)) || mem_ref (SET_SRC (set))))
1344 break;
1345
1346 after = next_active_non_empty_insn (after);
1347 if (!after)
1348 break;
1349 }
1350
1351 /* Add NOP if third instruction is a store. */
1352 if (i == 1
1353 && store_insn_p (after))
1354 insert_nop = true;
1355 }
1356 }
1357
1358 /* Look for a single-word load into an odd-numbered FP register. */
1359 else if (sparc_fix_at697f
1360 && NONJUMP_INSN_P (insn)
1361 && (set = single_set (insn)) != NULL_RTX
1362 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
1363 && mem_ref (SET_SRC (set))
1364 && REG_P (SET_DEST (set))
1365 && REGNO (SET_DEST (set)) > 31
1366 && REGNO (SET_DEST (set)) % 2 != 0)
1367 {
1368 /* The wrong dependency is on the enclosing double register. */
1369 const unsigned int x = REGNO (SET_DEST (set)) - 1;
1370 unsigned int src1, src2, dest;
1371 int code;
1372
1373 next = next_active_insn (insn);
1374 if (!next)
1375 break;
1376 /* If the insn is a branch, then it cannot be problematic. */
1377 if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE)
1378 continue;
1379
1380 extract_insn (next);
1381 code = INSN_CODE (next);
1382
1383 switch (code)
1384 {
1385 case CODE_FOR_adddf3:
1386 case CODE_FOR_subdf3:
1387 case CODE_FOR_muldf3:
1388 case CODE_FOR_divdf3:
1389 dest = REGNO (recog_data.operand[0]);
1390 src1 = REGNO (recog_data.operand[1]);
1391 src2 = REGNO (recog_data.operand[2]);
1392 if (src1 != src2)
1393 {
1394 /* Case [1-4]:
1395 ld [address], %fx+1
1396 FPOPd %f{x,y}, %f{y,x}, %f{x,y} */
1397 if ((src1 == x || src2 == x)
1398 && (dest == src1 || dest == src2))
1399 insert_nop = true;
1400 }
1401 else
1402 {
1403 /* Case 5:
1404 ld [address], %fx+1
1405 FPOPd %fx, %fx, %fx */
1406 if (src1 == x
1407 && dest == src1
1408 && (code == CODE_FOR_adddf3 || code == CODE_FOR_muldf3))
1409 insert_nop = true;
1410 }
1411 break;
1412
1413 case CODE_FOR_sqrtdf2:
1414 dest = REGNO (recog_data.operand[0]);
1415 src1 = REGNO (recog_data.operand[1]);
1416 /* Case 6:
1417 ld [address], %fx+1
1418 fsqrtd %fx, %fx */
1419 if (src1 == x && dest == src1)
1420 insert_nop = true;
1421 break;
1422
1423 default:
1424 break;
1425 }
1426 }
1427
1428 /* Look for a single-word load into an integer register. */
1429 else if (sparc_fix_ut699
1430 && NONJUMP_INSN_P (insn)
1431 && (set = single_set (insn)) != NULL_RTX
1432 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) <= 4
1433 && (mem_ref (SET_SRC (set)) != NULL_RTX
1434 || INSN_CODE (insn) == CODE_FOR_movsi_pic_gotdata_op)
1435 && REG_P (SET_DEST (set))
1436 && REGNO (SET_DEST (set)) < 32)
1437 {
1438 /* There is no problem if the second memory access has a data
1439 dependency on the first single-cycle load. */
1440 rtx x = SET_DEST (set);
1441
1442 next = next_active_insn (insn);
1443 if (!next)
1444 break;
1445 /* If the insn is a branch, then it cannot be problematic. */
1446 if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE)
1447 continue;
1448
1449 /* Look for a second memory access to/from an integer register. */
1450 if ((set = single_set (next)) != NULL_RTX)
1451 {
1452 rtx src = SET_SRC (set);
1453 rtx dest = SET_DEST (set);
1454 rtx mem;
1455
1456 /* LDD is affected. */
1457 if ((mem = mem_ref (src)) != NULL_RTX
1458 && REG_P (dest)
1459 && REGNO (dest) < 32
1460 && !reg_mentioned_p (x, XEXP (mem, 0)))
1461 insert_nop = true;
1462
1463 /* STD is *not* affected. */
1464 else if (MEM_P (dest)
1465 && GET_MODE_SIZE (GET_MODE (dest)) <= 4
1466 && (src == CONST0_RTX (GET_MODE (dest))
1467 || (REG_P (src)
1468 && REGNO (src) < 32
1469 && REGNO (src) != REGNO (x)))
1470 && !reg_mentioned_p (x, XEXP (dest, 0)))
1471 insert_nop = true;
1472
1473 /* GOT accesses uses LD. */
1474 else if (INSN_CODE (next) == CODE_FOR_movsi_pic_gotdata_op
1475 && !reg_mentioned_p (x, XEXP (XEXP (src, 0), 1)))
1476 insert_nop = true;
1477 }
1478 }
1479
1480 /* Look for a single-word load/operation into an FP register. */
1481 else if (sparc_fix_ut699
1482 && NONJUMP_INSN_P (insn)
1483 && (set = single_set (insn)) != NULL_RTX
1484 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
1485 && REG_P (SET_DEST (set))
1486 && REGNO (SET_DEST (set)) > 31)
1487 {
1488 /* Number of instructions in the problematic window. */
1489 const int n_insns = 4;
1490 /* The problematic combination is with the sibling FP register. */
1491 const unsigned int x = REGNO (SET_DEST (set));
1492 const unsigned int y = x ^ 1;
1493 rtx_insn *after;
1494 int i;
1495
1496 next = next_active_insn (insn);
1497 if (!next)
1498 break;
1499 /* If the insn is a branch, then it cannot be problematic. */
1500 if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE)
1501 continue;
1502
1503 /* Look for a second load/operation into the sibling FP register. */
1504 if (!((set = single_set (next)) != NULL_RTX
1505 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
1506 && REG_P (SET_DEST (set))
1507 && REGNO (SET_DEST (set)) == y))
1508 continue;
1509
1510 /* Look for a (possible) store from the FP register in the next N
1511 instructions, but bail out if it is again modified or if there
1512 is a store from the sibling FP register before this store. */
1513 for (after = next, i = 0; i < n_insns; i++)
1514 {
1515 bool branch_p;
1516
1517 after = next_active_insn (after);
1518 if (!after)
1519 break;
1520
1521 /* This is a branch with an empty delay slot. */
1522 if (!NONJUMP_INSN_P (after))
1523 {
1524 if (++i == n_insns)
1525 break;
1526 branch_p = true;
1527 after = NULL;
1528 }
1529 /* This is a branch with a filled delay slot. */
1530 else if (rtx_sequence *seq =
1531 dyn_cast <rtx_sequence *> (PATTERN (after)))
1532 {
1533 if (++i == n_insns)
1534 break;
1535 branch_p = true;
1536 after = seq->insn (1);
1537 }
1538 /* This is a regular instruction. */
1539 else
1540 branch_p = false;
1541
1542 if (after && (set = single_set (after)) != NULL_RTX)
1543 {
1544 const rtx src = SET_SRC (set);
1545 const rtx dest = SET_DEST (set);
1546 const unsigned int size = GET_MODE_SIZE (GET_MODE (dest));
1547
1548 /* If the FP register is again modified before the store,
1549 then the store isn't affected. */
1550 if (REG_P (dest)
1551 && (REGNO (dest) == x
1552 || (REGNO (dest) == y && size == 8)))
1553 break;
1554
1555 if (MEM_P (dest) && REG_P (src))
1556 {
1557 /* If there is a store from the sibling FP register
1558 before the store, then the store is not affected. */
1559 if (REGNO (src) == y || (REGNO (src) == x && size == 8))
1560 break;
1561
1562 /* Otherwise, the store is affected. */
1563 if (REGNO (src) == x && size == 4)
1564 {
1565 insert_nop = true;
1566 break;
1567 }
1568 }
1569 }
1570
1571 /* If we have a branch in the first M instructions, then we
1572 cannot see the (M+2)th instruction so we play safe. */
1573 if (branch_p && i <= (n_insns - 2))
1574 {
1575 insert_nop = true;
1576 break;
1577 }
1578 }
1579 }
1580
1581 else
1582 next = NEXT_INSN (insn);
1583
1584 if (insert_nop)
1585 emit_insn_before (gen_nop (), next);
1586 }
1587
1588 return 0;
1589 }
1590
1591 namespace {
1592
1593 const pass_data pass_data_work_around_errata =
1594 {
1595 RTL_PASS, /* type */
1596 "errata", /* name */
1597 OPTGROUP_NONE, /* optinfo_flags */
1598 TV_MACH_DEP, /* tv_id */
1599 0, /* properties_required */
1600 0, /* properties_provided */
1601 0, /* properties_destroyed */
1602 0, /* todo_flags_start */
1603 0, /* todo_flags_finish */
1604 };
1605
1606 class pass_work_around_errata : public rtl_opt_pass
1607 {
1608 public:
pass_work_around_errata(gcc::context * ctxt)1609 pass_work_around_errata(gcc::context *ctxt)
1610 : rtl_opt_pass(pass_data_work_around_errata, ctxt)
1611 {}
1612
1613 /* opt_pass methods: */
gate(function *)1614 virtual bool gate (function *)
1615 {
1616 return sparc_fix_at697f
1617 || sparc_fix_ut699 || sparc_fix_ut700 || sparc_fix_gr712rc
1618 || sparc_fix_b2bst || sparc_fix_lost_divsqrt;
1619 }
1620
execute(function *)1621 virtual unsigned int execute (function *)
1622 {
1623 return sparc_do_work_around_errata ();
1624 }
1625
1626 }; // class pass_work_around_errata
1627
1628 } // anon namespace
1629
1630 rtl_opt_pass *
make_pass_work_around_errata(gcc::context * ctxt)1631 make_pass_work_around_errata (gcc::context *ctxt)
1632 {
1633 return new pass_work_around_errata (ctxt);
1634 }
1635
1636 /* Helpers for TARGET_DEBUG_OPTIONS. */
1637 static void
dump_target_flag_bits(const int flags)1638 dump_target_flag_bits (const int flags)
1639 {
1640 if (flags & MASK_64BIT)
1641 fprintf (stderr, "64BIT ");
1642 if (flags & MASK_APP_REGS)
1643 fprintf (stderr, "APP_REGS ");
1644 if (flags & MASK_FASTER_STRUCTS)
1645 fprintf (stderr, "FASTER_STRUCTS ");
1646 if (flags & MASK_FLAT)
1647 fprintf (stderr, "FLAT ");
1648 if (flags & MASK_FMAF)
1649 fprintf (stderr, "FMAF ");
1650 if (flags & MASK_FSMULD)
1651 fprintf (stderr, "FSMULD ");
1652 if (flags & MASK_FPU)
1653 fprintf (stderr, "FPU ");
1654 if (flags & MASK_HARD_QUAD)
1655 fprintf (stderr, "HARD_QUAD ");
1656 if (flags & MASK_POPC)
1657 fprintf (stderr, "POPC ");
1658 if (flags & MASK_PTR64)
1659 fprintf (stderr, "PTR64 ");
1660 if (flags & MASK_STACK_BIAS)
1661 fprintf (stderr, "STACK_BIAS ");
1662 if (flags & MASK_UNALIGNED_DOUBLES)
1663 fprintf (stderr, "UNALIGNED_DOUBLES ");
1664 if (flags & MASK_V8PLUS)
1665 fprintf (stderr, "V8PLUS ");
1666 if (flags & MASK_VIS)
1667 fprintf (stderr, "VIS ");
1668 if (flags & MASK_VIS2)
1669 fprintf (stderr, "VIS2 ");
1670 if (flags & MASK_VIS3)
1671 fprintf (stderr, "VIS3 ");
1672 if (flags & MASK_VIS4)
1673 fprintf (stderr, "VIS4 ");
1674 if (flags & MASK_VIS4B)
1675 fprintf (stderr, "VIS4B ");
1676 if (flags & MASK_CBCOND)
1677 fprintf (stderr, "CBCOND ");
1678 if (flags & MASK_DEPRECATED_V8_INSNS)
1679 fprintf (stderr, "DEPRECATED_V8_INSNS ");
1680 if (flags & MASK_LEON)
1681 fprintf (stderr, "LEON ");
1682 if (flags & MASK_LEON3)
1683 fprintf (stderr, "LEON3 ");
1684 if (flags & MASK_SPARCLET)
1685 fprintf (stderr, "SPARCLET ");
1686 if (flags & MASK_SPARCLITE)
1687 fprintf (stderr, "SPARCLITE ");
1688 if (flags & MASK_V8)
1689 fprintf (stderr, "V8 ");
1690 if (flags & MASK_V9)
1691 fprintf (stderr, "V9 ");
1692 }
1693
1694 static void
dump_target_flags(const char * prefix,const int flags)1695 dump_target_flags (const char *prefix, const int flags)
1696 {
1697 fprintf (stderr, "%s: (%08x) [ ", prefix, flags);
1698 dump_target_flag_bits (flags);
1699 fprintf(stderr, "]\n");
1700 }
1701
1702 /* Validate and override various options, and do some machine dependent
1703 initialization. */
1704
1705 static void
sparc_option_override(void)1706 sparc_option_override (void)
1707 {
1708 /* Map TARGET_CPU_DEFAULT to value for -m{cpu,tune}=. */
1709 static struct cpu_default {
1710 const int cpu;
1711 const enum sparc_processor_type processor;
1712 } const cpu_default[] = {
1713 /* There must be one entry here for each TARGET_CPU value. */
1714 { TARGET_CPU_sparc, PROCESSOR_CYPRESS },
1715 { TARGET_CPU_v8, PROCESSOR_V8 },
1716 { TARGET_CPU_supersparc, PROCESSOR_SUPERSPARC },
1717 { TARGET_CPU_hypersparc, PROCESSOR_HYPERSPARC },
1718 { TARGET_CPU_leon, PROCESSOR_LEON },
1719 { TARGET_CPU_leon3, PROCESSOR_LEON3 },
1720 { TARGET_CPU_leon5, PROCESSOR_LEON5 },
1721 { TARGET_CPU_leon3v7, PROCESSOR_LEON3V7 },
1722 { TARGET_CPU_sparclite, PROCESSOR_F930 },
1723 { TARGET_CPU_sparclite86x, PROCESSOR_SPARCLITE86X },
1724 { TARGET_CPU_sparclet, PROCESSOR_TSC701 },
1725 { TARGET_CPU_v9, PROCESSOR_V9 },
1726 { TARGET_CPU_ultrasparc, PROCESSOR_ULTRASPARC },
1727 { TARGET_CPU_ultrasparc3, PROCESSOR_ULTRASPARC3 },
1728 { TARGET_CPU_niagara, PROCESSOR_NIAGARA },
1729 { TARGET_CPU_niagara2, PROCESSOR_NIAGARA2 },
1730 { TARGET_CPU_niagara3, PROCESSOR_NIAGARA3 },
1731 { TARGET_CPU_niagara4, PROCESSOR_NIAGARA4 },
1732 { TARGET_CPU_niagara7, PROCESSOR_NIAGARA7 },
1733 { TARGET_CPU_m8, PROCESSOR_M8 },
1734 { -1, PROCESSOR_V7 }
1735 };
1736 const struct cpu_default *def;
1737 /* Table of values for -m{cpu,tune}=. This must match the order of
1738 the enum processor_type in sparc-opts.h. */
1739 static struct cpu_table {
1740 const char *const name;
1741 const int disable;
1742 const int enable;
1743 } const cpu_table[] = {
1744 { "v7", MASK_ISA, 0 },
1745 { "cypress", MASK_ISA, 0 },
1746 { "v8", MASK_ISA, MASK_V8 },
1747 /* TI TMS390Z55 supersparc */
1748 { "supersparc", MASK_ISA, MASK_V8 },
1749 { "hypersparc", MASK_ISA, MASK_V8 },
1750 { "leon", MASK_ISA|MASK_FSMULD, MASK_V8|MASK_LEON },
1751 { "leon3", MASK_ISA, MASK_V8|MASK_LEON3 },
1752 { "leon5", MASK_ISA, MASK_V8|MASK_LEON3 },
1753 { "leon3v7", MASK_ISA, MASK_LEON3 },
1754 { "sparclite", MASK_ISA, MASK_SPARCLITE },
1755 /* The Fujitsu MB86930 is the original sparclite chip, with no FPU. */
1756 { "f930", MASK_ISA|MASK_FPU, MASK_SPARCLITE },
1757 /* The Fujitsu MB86934 is the recent sparclite chip, with an FPU. */
1758 { "f934", MASK_ISA, MASK_SPARCLITE },
1759 { "sparclite86x", MASK_ISA|MASK_FPU, MASK_SPARCLITE },
1760 { "sparclet", MASK_ISA, MASK_SPARCLET },
1761 /* TEMIC sparclet */
1762 { "tsc701", MASK_ISA, MASK_SPARCLET },
1763 { "v9", MASK_ISA, MASK_V9 },
1764 /* UltraSPARC I, II, IIi */
1765 { "ultrasparc", MASK_ISA,
1766 /* Although insns using %y are deprecated, it is a clear win. */
1767 MASK_V9|MASK_DEPRECATED_V8_INSNS },
1768 /* UltraSPARC III */
1769 /* ??? Check if %y issue still holds true. */
1770 { "ultrasparc3", MASK_ISA,
1771 MASK_V9|MASK_DEPRECATED_V8_INSNS|MASK_VIS2 },
1772 /* UltraSPARC T1 */
1773 { "niagara", MASK_ISA,
1774 MASK_V9|MASK_DEPRECATED_V8_INSNS },
1775 /* UltraSPARC T2 */
1776 { "niagara2", MASK_ISA,
1777 MASK_V9|MASK_POPC|MASK_VIS2 },
1778 /* UltraSPARC T3 */
1779 { "niagara3", MASK_ISA,
1780 MASK_V9|MASK_POPC|MASK_VIS3|MASK_FMAF },
1781 /* UltraSPARC T4 */
1782 { "niagara4", MASK_ISA,
1783 MASK_V9|MASK_POPC|MASK_VIS3|MASK_FMAF|MASK_CBCOND },
1784 /* UltraSPARC M7 */
1785 { "niagara7", MASK_ISA,
1786 MASK_V9|MASK_POPC|MASK_VIS4|MASK_FMAF|MASK_CBCOND|MASK_SUBXC },
1787 /* UltraSPARC M8 */
1788 { "m8", MASK_ISA,
1789 MASK_V9|MASK_POPC|MASK_VIS4B|MASK_FMAF|MASK_CBCOND|MASK_SUBXC }
1790 };
1791 const struct cpu_table *cpu;
1792 unsigned int i;
1793
1794 if (sparc_debug_string != NULL)
1795 {
1796 const char *q;
1797 char *p;
1798
1799 p = ASTRDUP (sparc_debug_string);
1800 while ((q = strtok (p, ",")) != NULL)
1801 {
1802 bool invert;
1803 int mask;
1804
1805 p = NULL;
1806 if (*q == '!')
1807 {
1808 invert = true;
1809 q++;
1810 }
1811 else
1812 invert = false;
1813
1814 if (! strcmp (q, "all"))
1815 mask = MASK_DEBUG_ALL;
1816 else if (! strcmp (q, "options"))
1817 mask = MASK_DEBUG_OPTIONS;
1818 else
1819 error ("unknown %<-mdebug-%s%> switch", q);
1820
1821 if (invert)
1822 sparc_debug &= ~mask;
1823 else
1824 sparc_debug |= mask;
1825 }
1826 }
1827
1828 /* Enable the FsMULd instruction by default if not explicitly specified by
1829 the user. It may be later disabled by the CPU (explicitly or not). */
1830 if (TARGET_FPU && !(target_flags_explicit & MASK_FSMULD))
1831 target_flags |= MASK_FSMULD;
1832
1833 if (TARGET_DEBUG_OPTIONS)
1834 {
1835 dump_target_flags("Initial target_flags", target_flags);
1836 dump_target_flags("target_flags_explicit", target_flags_explicit);
1837 }
1838
1839 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1840 SUBTARGET_OVERRIDE_OPTIONS;
1841 #endif
1842
1843 #ifndef SPARC_BI_ARCH
1844 /* Check for unsupported architecture size. */
1845 if (!TARGET_64BIT != DEFAULT_ARCH32_P)
1846 error ("%s is not supported by this configuration",
1847 DEFAULT_ARCH32_P ? "-m64" : "-m32");
1848 #endif
1849
1850 /* We force all 64bit archs to use 128 bit long double */
1851 if (TARGET_ARCH64 && !TARGET_LONG_DOUBLE_128)
1852 {
1853 error ("%<-mlong-double-64%> not allowed with %<-m64%>");
1854 target_flags |= MASK_LONG_DOUBLE_128;
1855 }
1856
1857 /* Check that -fcall-saved-REG wasn't specified for out registers. */
1858 for (i = 8; i < 16; i++)
1859 if (!call_used_regs [i])
1860 {
1861 error ("%<-fcall-saved-REG%> is not supported for out registers");
1862 call_used_regs [i] = 1;
1863 }
1864
1865 /* Set the default CPU if no -mcpu option was specified. */
1866 if (!global_options_set.x_sparc_cpu_and_features)
1867 {
1868 for (def = &cpu_default[0]; def->cpu != -1; ++def)
1869 if (def->cpu == TARGET_CPU_DEFAULT)
1870 break;
1871 gcc_assert (def->cpu != -1);
1872 sparc_cpu_and_features = def->processor;
1873 }
1874
1875 /* Set the default CPU if no -mtune option was specified. */
1876 if (!global_options_set.x_sparc_cpu)
1877 sparc_cpu = sparc_cpu_and_features;
1878
1879 cpu = &cpu_table[(int) sparc_cpu_and_features];
1880
1881 if (TARGET_DEBUG_OPTIONS)
1882 {
1883 fprintf (stderr, "sparc_cpu_and_features: %s\n", cpu->name);
1884 dump_target_flags ("cpu->disable", cpu->disable);
1885 dump_target_flags ("cpu->enable", cpu->enable);
1886 }
1887
1888 target_flags &= ~cpu->disable;
1889 target_flags |= (cpu->enable
1890 #ifndef HAVE_AS_FMAF_HPC_VIS3
1891 & ~(MASK_FMAF | MASK_VIS3)
1892 #endif
1893 #ifndef HAVE_AS_SPARC4
1894 & ~MASK_CBCOND
1895 #endif
1896 #ifndef HAVE_AS_SPARC5_VIS4
1897 & ~(MASK_VIS4 | MASK_SUBXC)
1898 #endif
1899 #ifndef HAVE_AS_SPARC6
1900 & ~(MASK_VIS4B)
1901 #endif
1902 #ifndef HAVE_AS_LEON
1903 & ~(MASK_LEON | MASK_LEON3)
1904 #endif
1905 & ~(target_flags_explicit & MASK_FEATURES)
1906 );
1907
1908 /* FsMULd is a V8 instruction. */
1909 if (!TARGET_V8 && !TARGET_V9)
1910 target_flags &= ~MASK_FSMULD;
1911
1912 /* -mvis2 implies -mvis. */
1913 if (TARGET_VIS2)
1914 target_flags |= MASK_VIS;
1915
1916 /* -mvis3 implies -mvis2 and -mvis. */
1917 if (TARGET_VIS3)
1918 target_flags |= MASK_VIS2 | MASK_VIS;
1919
1920 /* -mvis4 implies -mvis3, -mvis2 and -mvis. */
1921 if (TARGET_VIS4)
1922 target_flags |= MASK_VIS3 | MASK_VIS2 | MASK_VIS;
1923
1924 /* -mvis4b implies -mvis4, -mvis3, -mvis2 and -mvis */
1925 if (TARGET_VIS4B)
1926 target_flags |= MASK_VIS4 | MASK_VIS3 | MASK_VIS2 | MASK_VIS;
1927
1928 /* Don't allow -mvis, -mvis2, -mvis3, -mvis4, -mvis4b, -mfmaf and -mfsmuld if
1929 FPU is disabled. */
1930 if (!TARGET_FPU)
1931 target_flags &= ~(MASK_VIS | MASK_VIS2 | MASK_VIS3 | MASK_VIS4
1932 | MASK_VIS4B | MASK_FMAF | MASK_FSMULD);
1933
1934 /* -mvis assumes UltraSPARC+, so we are sure v9 instructions
1935 are available; -m64 also implies v9. */
1936 if (TARGET_VIS || TARGET_ARCH64)
1937 {
1938 target_flags |= MASK_V9;
1939 target_flags &= ~(MASK_V8 | MASK_SPARCLET | MASK_SPARCLITE);
1940 }
1941
1942 /* -mvis also implies -mv8plus on 32-bit. */
1943 if (TARGET_VIS && !TARGET_ARCH64)
1944 target_flags |= MASK_V8PLUS;
1945
1946 /* Use the deprecated v8 insns for sparc64 in 32-bit mode. */
1947 if (TARGET_V9 && TARGET_ARCH32)
1948 target_flags |= MASK_DEPRECATED_V8_INSNS;
1949
1950 /* V8PLUS requires V9 and makes no sense in 64-bit mode. */
1951 if (!TARGET_V9 || TARGET_ARCH64)
1952 target_flags &= ~MASK_V8PLUS;
1953
1954 /* Don't use stack biasing in 32-bit mode. */
1955 if (TARGET_ARCH32)
1956 target_flags &= ~MASK_STACK_BIAS;
1957
1958 /* Use LRA instead of reload, unless otherwise instructed. */
1959 if (!(target_flags_explicit & MASK_LRA))
1960 target_flags |= MASK_LRA;
1961
1962 /* Enable applicable errata workarounds for LEON3FT. */
1963 if (sparc_fix_ut699 || sparc_fix_ut700 || sparc_fix_gr712rc)
1964 {
1965 sparc_fix_b2bst = 1;
1966 sparc_fix_lost_divsqrt = 1;
1967 }
1968
1969 /* Disable FsMULd for the UT699 since it doesn't work correctly. */
1970 if (sparc_fix_ut699)
1971 target_flags &= ~MASK_FSMULD;
1972
1973 #ifdef TARGET_DEFAULT_LONG_DOUBLE_128
1974 if (!(target_flags_explicit & MASK_LONG_DOUBLE_128))
1975 target_flags |= MASK_LONG_DOUBLE_128;
1976 #endif
1977
1978 if (TARGET_DEBUG_OPTIONS)
1979 dump_target_flags ("Final target_flags", target_flags);
1980
1981 /* Set the code model if no -mcmodel option was specified. */
1982 if (global_options_set.x_sparc_code_model)
1983 {
1984 if (TARGET_ARCH32)
1985 error ("%<-mcmodel=%> is not supported in 32-bit mode");
1986 }
1987 else
1988 {
1989 if (TARGET_ARCH32)
1990 sparc_code_model = CM_32;
1991 else
1992 sparc_code_model = SPARC_DEFAULT_CMODEL;
1993 }
1994
1995 /* Set the memory model if no -mmemory-model option was specified. */
1996 if (!global_options_set.x_sparc_memory_model)
1997 {
1998 /* Choose the memory model for the operating system. */
1999 enum sparc_memory_model_type os_default = SUBTARGET_DEFAULT_MEMORY_MODEL;
2000 if (os_default != SMM_DEFAULT)
2001 sparc_memory_model = os_default;
2002 /* Choose the most relaxed model for the processor. */
2003 else if (TARGET_V9)
2004 sparc_memory_model = SMM_RMO;
2005 else if (TARGET_LEON3)
2006 sparc_memory_model = SMM_TSO;
2007 else if (TARGET_LEON)
2008 sparc_memory_model = SMM_SC;
2009 else if (TARGET_V8)
2010 sparc_memory_model = SMM_PSO;
2011 else
2012 sparc_memory_model = SMM_SC;
2013 }
2014
2015 /* Supply a default value for align_functions. */
2016 if (flag_align_functions && !str_align_functions)
2017 {
2018 if (sparc_cpu == PROCESSOR_ULTRASPARC
2019 || sparc_cpu == PROCESSOR_ULTRASPARC3
2020 || sparc_cpu == PROCESSOR_NIAGARA
2021 || sparc_cpu == PROCESSOR_NIAGARA2
2022 || sparc_cpu == PROCESSOR_NIAGARA3
2023 || sparc_cpu == PROCESSOR_NIAGARA4)
2024 str_align_functions = "32";
2025 else if (sparc_cpu == PROCESSOR_NIAGARA7
2026 || sparc_cpu == PROCESSOR_M8)
2027 str_align_functions = "64";
2028 }
2029
2030 /* Validate PCC_STRUCT_RETURN. */
2031 if (flag_pcc_struct_return == DEFAULT_PCC_STRUCT_RETURN)
2032 flag_pcc_struct_return = (TARGET_ARCH64 ? 0 : 1);
2033
2034 /* Only use .uaxword when compiling for a 64-bit target. */
2035 if (!TARGET_ARCH64)
2036 targetm.asm_out.unaligned_op.di = NULL;
2037
2038 /* Set the processor costs. */
2039 switch (sparc_cpu)
2040 {
2041 case PROCESSOR_V7:
2042 case PROCESSOR_CYPRESS:
2043 sparc_costs = &cypress_costs;
2044 break;
2045 case PROCESSOR_V8:
2046 case PROCESSOR_SPARCLITE:
2047 case PROCESSOR_SUPERSPARC:
2048 sparc_costs = &supersparc_costs;
2049 break;
2050 case PROCESSOR_F930:
2051 case PROCESSOR_F934:
2052 case PROCESSOR_HYPERSPARC:
2053 case PROCESSOR_SPARCLITE86X:
2054 sparc_costs = &hypersparc_costs;
2055 break;
2056 case PROCESSOR_LEON:
2057 sparc_costs = &leon_costs;
2058 break;
2059 case PROCESSOR_LEON3:
2060 case PROCESSOR_LEON3V7:
2061 sparc_costs = &leon3_costs;
2062 break;
2063 case PROCESSOR_LEON5:
2064 sparc_costs = &leon5_costs;
2065 break;
2066 case PROCESSOR_SPARCLET:
2067 case PROCESSOR_TSC701:
2068 sparc_costs = &sparclet_costs;
2069 break;
2070 case PROCESSOR_V9:
2071 case PROCESSOR_ULTRASPARC:
2072 sparc_costs = &ultrasparc_costs;
2073 break;
2074 case PROCESSOR_ULTRASPARC3:
2075 sparc_costs = &ultrasparc3_costs;
2076 break;
2077 case PROCESSOR_NIAGARA:
2078 sparc_costs = &niagara_costs;
2079 break;
2080 case PROCESSOR_NIAGARA2:
2081 sparc_costs = &niagara2_costs;
2082 break;
2083 case PROCESSOR_NIAGARA3:
2084 sparc_costs = &niagara3_costs;
2085 break;
2086 case PROCESSOR_NIAGARA4:
2087 sparc_costs = &niagara4_costs;
2088 break;
2089 case PROCESSOR_NIAGARA7:
2090 sparc_costs = &niagara7_costs;
2091 break;
2092 case PROCESSOR_M8:
2093 sparc_costs = &m8_costs;
2094 break;
2095 case PROCESSOR_NATIVE:
2096 gcc_unreachable ();
2097 };
2098
2099 /* param_simultaneous_prefetches is the number of prefetches that
2100 can run at the same time. More important, it is the threshold
2101 defining when additional prefetches will be dropped by the
2102 hardware.
2103
2104 The UltraSPARC-III features a documented prefetch queue with a
2105 size of 8. Additional prefetches issued in the cpu are
2106 dropped.
2107
2108 Niagara processors are different. In these processors prefetches
2109 are handled much like regular loads. The L1 miss buffer is 32
2110 entries, but prefetches start getting affected when 30 entries
2111 become occupied. That occupation could be a mix of regular loads
2112 and prefetches though. And that buffer is shared by all threads.
2113 Once the threshold is reached, if the core is running a single
2114 thread the prefetch will retry. If more than one thread is
2115 running, the prefetch will be dropped.
2116
2117 All this makes it very difficult to determine how many
2118 simultaneous prefetches can be issued simultaneously, even in a
2119 single-threaded program. Experimental results show that setting
2120 this parameter to 32 works well when the number of threads is not
2121 high. */
2122 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
2123 param_simultaneous_prefetches,
2124 ((sparc_cpu == PROCESSOR_ULTRASPARC
2125 || sparc_cpu == PROCESSOR_NIAGARA
2126 || sparc_cpu == PROCESSOR_NIAGARA2
2127 || sparc_cpu == PROCESSOR_NIAGARA3
2128 || sparc_cpu == PROCESSOR_NIAGARA4)
2129 ? 2
2130 : (sparc_cpu == PROCESSOR_ULTRASPARC3
2131 ? 8 : ((sparc_cpu == PROCESSOR_NIAGARA7
2132 || sparc_cpu == PROCESSOR_M8)
2133 ? 32 : 3))));
2134
2135 /* param_l1_cache_line_size is the size of the L1 cache line, in
2136 bytes.
2137
2138 The Oracle SPARC Architecture (previously the UltraSPARC
2139 Architecture) specification states that when a PREFETCH[A]
2140 instruction is executed an implementation-specific amount of data
2141 is prefetched, and that it is at least 64 bytes long (aligned to
2142 at least 64 bytes).
2143
2144 However, this is not correct. The M7 (and implementations prior
2145 to that) does not guarantee a 64B prefetch into a cache if the
2146 line size is smaller. A single cache line is all that is ever
2147 prefetched. So for the M7, where the L1D$ has 32B lines and the
2148 L2D$ and L3 have 64B lines, a prefetch will prefetch 64B into the
2149 L2 and L3, but only 32B are brought into the L1D$. (Assuming it
2150 is a read_n prefetch, which is the only type which allocates to
2151 the L1.) */
2152 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
2153 param_l1_cache_line_size,
2154 (sparc_cpu == PROCESSOR_M8 ? 64 : 32));
2155
2156 /* param_l1_cache_size is the size of the L1D$ (most SPARC chips use
2157 Hardvard level-1 caches) in kilobytes. Both UltraSPARC and
2158 Niagara processors feature a L1D$ of 16KB. */
2159 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
2160 param_l1_cache_size,
2161 ((sparc_cpu == PROCESSOR_ULTRASPARC
2162 || sparc_cpu == PROCESSOR_ULTRASPARC3
2163 || sparc_cpu == PROCESSOR_NIAGARA
2164 || sparc_cpu == PROCESSOR_NIAGARA2
2165 || sparc_cpu == PROCESSOR_NIAGARA3
2166 || sparc_cpu == PROCESSOR_NIAGARA4
2167 || sparc_cpu == PROCESSOR_NIAGARA7
2168 || sparc_cpu == PROCESSOR_M8)
2169 ? 16 : 64));
2170
2171 /* param_l2_cache_size is the size fo the L2 in kilobytes. Note
2172 that 512 is the default in params.def. */
2173 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
2174 param_l2_cache_size,
2175 ((sparc_cpu == PROCESSOR_NIAGARA4
2176 || sparc_cpu == PROCESSOR_M8)
2177 ? 128 : (sparc_cpu == PROCESSOR_NIAGARA7
2178 ? 256 : 512)));
2179
2180
2181 /* Disable save slot sharing for call-clobbered registers by default.
2182 The IRA sharing algorithm works on single registers only and this
2183 pessimizes for double floating-point registers. */
2184 if (!global_options_set.x_flag_ira_share_save_slots)
2185 flag_ira_share_save_slots = 0;
2186
2187 /* Only enable REE by default in 64-bit mode where it helps to eliminate
2188 redundant 32-to-64-bit extensions. */
2189 if (!global_options_set.x_flag_ree && TARGET_ARCH32)
2190 flag_ree = 0;
2191
2192 /* Do various machine dependent initializations. */
2193 sparc_init_modes ();
2194
2195 /* Set up function hooks. */
2196 init_machine_status = sparc_init_machine_status;
2197 }
2198
2199 /* Miscellaneous utilities. */
2200
2201 /* Nonzero if CODE, a comparison, is suitable for use in v9 conditional move
2202 or branch on register contents instructions. */
2203
2204 int
v9_regcmp_p(enum rtx_code code)2205 v9_regcmp_p (enum rtx_code code)
2206 {
2207 return (code == EQ || code == NE || code == GE || code == LT
2208 || code == LE || code == GT);
2209 }
2210
2211 /* Nonzero if OP is a floating point constant which can
2212 be loaded into an integer register using a single
2213 sethi instruction. */
2214
2215 int
fp_sethi_p(rtx op)2216 fp_sethi_p (rtx op)
2217 {
2218 if (GET_CODE (op) == CONST_DOUBLE)
2219 {
2220 long i;
2221
2222 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), i);
2223 return !SPARC_SIMM13_P (i) && SPARC_SETHI_P (i);
2224 }
2225
2226 return 0;
2227 }
2228
2229 /* Nonzero if OP is a floating point constant which can
2230 be loaded into an integer register using a single
2231 mov instruction. */
2232
2233 int
fp_mov_p(rtx op)2234 fp_mov_p (rtx op)
2235 {
2236 if (GET_CODE (op) == CONST_DOUBLE)
2237 {
2238 long i;
2239
2240 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), i);
2241 return SPARC_SIMM13_P (i);
2242 }
2243
2244 return 0;
2245 }
2246
2247 /* Nonzero if OP is a floating point constant which can
2248 be loaded into an integer register using a high/losum
2249 instruction sequence. */
2250
2251 int
fp_high_losum_p(rtx op)2252 fp_high_losum_p (rtx op)
2253 {
2254 /* The constraints calling this should only be in
2255 SFmode move insns, so any constant which cannot
2256 be moved using a single insn will do. */
2257 if (GET_CODE (op) == CONST_DOUBLE)
2258 {
2259 long i;
2260
2261 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), i);
2262 return !SPARC_SIMM13_P (i) && !SPARC_SETHI_P (i);
2263 }
2264
2265 return 0;
2266 }
2267
2268 /* Return true if the address of LABEL can be loaded by means of the
2269 mov{si,di}_pic_label_ref patterns in PIC mode. */
2270
2271 static bool
can_use_mov_pic_label_ref(rtx label)2272 can_use_mov_pic_label_ref (rtx label)
2273 {
2274 /* VxWorks does not impose a fixed gap between segments; the run-time
2275 gap can be different from the object-file gap. We therefore can't
2276 assume X - _GLOBAL_OFFSET_TABLE_ is a link-time constant unless we
2277 are absolutely sure that X is in the same segment as the GOT.
2278 Unfortunately, the flexibility of linker scripts means that we
2279 can't be sure of that in general, so assume that GOT-relative
2280 accesses are never valid on VxWorks. */
2281 if (TARGET_VXWORKS_RTP)
2282 return false;
2283
2284 /* Similarly, if the label is non-local, it might end up being placed
2285 in a different section than the current one; now mov_pic_label_ref
2286 requires the label and the code to be in the same section. */
2287 if (LABEL_REF_NONLOCAL_P (label))
2288 return false;
2289
2290 /* Finally, if we are reordering basic blocks and partition into hot
2291 and cold sections, this might happen for any label. */
2292 if (flag_reorder_blocks_and_partition)
2293 return false;
2294
2295 return true;
2296 }
2297
2298 /* Expand a move instruction. Return true if all work is done. */
2299
2300 bool
sparc_expand_move(machine_mode mode,rtx * operands)2301 sparc_expand_move (machine_mode mode, rtx *operands)
2302 {
2303 /* Handle sets of MEM first. */
2304 if (GET_CODE (operands[0]) == MEM)
2305 {
2306 /* 0 is a register (or a pair of registers) on SPARC. */
2307 if (register_or_zero_operand (operands[1], mode))
2308 return false;
2309
2310 if (!reload_in_progress)
2311 {
2312 operands[0] = validize_mem (operands[0]);
2313 operands[1] = force_reg (mode, operands[1]);
2314 }
2315 }
2316
2317 /* Fix up TLS cases. */
2318 if (TARGET_HAVE_TLS
2319 && CONSTANT_P (operands[1])
2320 && sparc_tls_referenced_p (operands [1]))
2321 {
2322 operands[1] = sparc_legitimize_tls_address (operands[1]);
2323 return false;
2324 }
2325
2326 /* Fix up PIC cases. */
2327 if (flag_pic && CONSTANT_P (operands[1]))
2328 {
2329 if (pic_address_needs_scratch (operands[1]))
2330 operands[1] = sparc_legitimize_pic_address (operands[1], NULL_RTX);
2331
2332 /* We cannot use the mov{si,di}_pic_label_ref patterns in all cases. */
2333 if ((GET_CODE (operands[1]) == LABEL_REF
2334 && can_use_mov_pic_label_ref (operands[1]))
2335 || (GET_CODE (operands[1]) == CONST
2336 && GET_CODE (XEXP (operands[1], 0)) == PLUS
2337 && GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == LABEL_REF
2338 && GET_CODE (XEXP (XEXP (operands[1], 0), 1)) == CONST_INT
2339 && can_use_mov_pic_label_ref (XEXP (XEXP (operands[1], 0), 0))))
2340 {
2341 if (mode == SImode)
2342 {
2343 emit_insn (gen_movsi_pic_label_ref (operands[0], operands[1]));
2344 return true;
2345 }
2346
2347 if (mode == DImode)
2348 {
2349 emit_insn (gen_movdi_pic_label_ref (operands[0], operands[1]));
2350 return true;
2351 }
2352 }
2353
2354 if (symbolic_operand (operands[1], mode))
2355 {
2356 operands[1]
2357 = sparc_legitimize_pic_address (operands[1],
2358 reload_in_progress
2359 ? operands[0] : NULL_RTX);
2360 return false;
2361 }
2362 }
2363
2364 /* If we are trying to toss an integer constant into FP registers,
2365 or loading a FP or vector constant, force it into memory. */
2366 if (CONSTANT_P (operands[1])
2367 && REG_P (operands[0])
2368 && (SPARC_FP_REG_P (REGNO (operands[0]))
2369 || SCALAR_FLOAT_MODE_P (mode)
2370 || VECTOR_MODE_P (mode)))
2371 {
2372 /* emit_group_store will send such bogosity to us when it is
2373 not storing directly into memory. So fix this up to avoid
2374 crashes in output_constant_pool. */
2375 if (operands [1] == const0_rtx)
2376 operands[1] = CONST0_RTX (mode);
2377
2378 /* We can clear or set to all-ones FP registers if TARGET_VIS, and
2379 always other regs. */
2380 if ((TARGET_VIS || REGNO (operands[0]) < SPARC_FIRST_FP_REG)
2381 && (const_zero_operand (operands[1], mode)
2382 || const_all_ones_operand (operands[1], mode)))
2383 return false;
2384
2385 if (REGNO (operands[0]) < SPARC_FIRST_FP_REG
2386 /* We are able to build any SF constant in integer registers
2387 with at most 2 instructions. */
2388 && (mode == SFmode
2389 /* And any DF constant in integer registers if needed. */
2390 || (mode == DFmode && !can_create_pseudo_p ())))
2391 return false;
2392
2393 operands[1] = force_const_mem (mode, operands[1]);
2394 if (!reload_in_progress)
2395 operands[1] = validize_mem (operands[1]);
2396 return false;
2397 }
2398
2399 /* Accept non-constants and valid constants unmodified. */
2400 if (!CONSTANT_P (operands[1])
2401 || GET_CODE (operands[1]) == HIGH
2402 || input_operand (operands[1], mode))
2403 return false;
2404
2405 switch (mode)
2406 {
2407 case E_QImode:
2408 /* All QImode constants require only one insn, so proceed. */
2409 break;
2410
2411 case E_HImode:
2412 case E_SImode:
2413 sparc_emit_set_const32 (operands[0], operands[1]);
2414 return true;
2415
2416 case E_DImode:
2417 /* input_operand should have filtered out 32-bit mode. */
2418 sparc_emit_set_const64 (operands[0], operands[1]);
2419 return true;
2420
2421 case E_TImode:
2422 {
2423 rtx high, low;
2424 /* TImode isn't available in 32-bit mode. */
2425 split_double (operands[1], &high, &low);
2426 emit_insn (gen_movdi (operand_subword (operands[0], 0, 0, TImode),
2427 high));
2428 emit_insn (gen_movdi (operand_subword (operands[0], 1, 0, TImode),
2429 low));
2430 }
2431 return true;
2432
2433 default:
2434 gcc_unreachable ();
2435 }
2436
2437 return false;
2438 }
2439
2440 /* Load OP1, a 32-bit constant, into OP0, a register.
2441 We know it can't be done in one insn when we get
2442 here, the move expander guarantees this. */
2443
2444 static void
sparc_emit_set_const32(rtx op0,rtx op1)2445 sparc_emit_set_const32 (rtx op0, rtx op1)
2446 {
2447 machine_mode mode = GET_MODE (op0);
2448 rtx temp = op0;
2449
2450 if (can_create_pseudo_p ())
2451 temp = gen_reg_rtx (mode);
2452
2453 if (GET_CODE (op1) == CONST_INT)
2454 {
2455 gcc_assert (!small_int_operand (op1, mode)
2456 && !const_high_operand (op1, mode));
2457
2458 /* Emit them as real moves instead of a HIGH/LO_SUM,
2459 this way CSE can see everything and reuse intermediate
2460 values if it wants. */
2461 emit_insn (gen_rtx_SET (temp, GEN_INT (INTVAL (op1)
2462 & ~(HOST_WIDE_INT) 0x3ff)));
2463
2464 emit_insn (gen_rtx_SET (op0,
2465 gen_rtx_IOR (mode, temp,
2466 GEN_INT (INTVAL (op1) & 0x3ff))));
2467 }
2468 else
2469 {
2470 /* A symbol, emit in the traditional way. */
2471 emit_insn (gen_rtx_SET (temp, gen_rtx_HIGH (mode, op1)));
2472 emit_insn (gen_rtx_SET (op0, gen_rtx_LO_SUM (mode, temp, op1)));
2473 }
2474 }
2475
2476 /* Load OP1, a symbolic 64-bit constant, into OP0, a DImode register.
2477 If TEMP is nonzero, we are forbidden to use any other scratch
2478 registers. Otherwise, we are allowed to generate them as needed.
2479
2480 Note that TEMP may have TImode if the code model is TARGET_CM_MEDANY
2481 or TARGET_CM_EMBMEDANY (see the reload_indi and reload_outdi patterns). */
2482
2483 void
sparc_emit_set_symbolic_const64(rtx op0,rtx op1,rtx temp)2484 sparc_emit_set_symbolic_const64 (rtx op0, rtx op1, rtx temp)
2485 {
2486 rtx cst, temp1, temp2, temp3, temp4, temp5;
2487 rtx ti_temp = 0;
2488
2489 /* Deal with too large offsets. */
2490 if (GET_CODE (op1) == CONST
2491 && GET_CODE (XEXP (op1, 0)) == PLUS
2492 && CONST_INT_P (cst = XEXP (XEXP (op1, 0), 1))
2493 && trunc_int_for_mode (INTVAL (cst), SImode) != INTVAL (cst))
2494 {
2495 gcc_assert (!temp);
2496 temp1 = gen_reg_rtx (DImode);
2497 temp2 = gen_reg_rtx (DImode);
2498 sparc_emit_set_const64 (temp2, cst);
2499 sparc_emit_set_symbolic_const64 (temp1, XEXP (XEXP (op1, 0), 0),
2500 NULL_RTX);
2501 emit_insn (gen_rtx_SET (op0, gen_rtx_PLUS (DImode, temp1, temp2)));
2502 return;
2503 }
2504
2505 if (temp && GET_MODE (temp) == TImode)
2506 {
2507 ti_temp = temp;
2508 temp = gen_rtx_REG (DImode, REGNO (temp));
2509 }
2510
2511 /* SPARC-V9 code model support. */
2512 switch (sparc_code_model)
2513 {
2514 case CM_MEDLOW:
2515 /* The range spanned by all instructions in the object is less
2516 than 2^31 bytes (2GB) and the distance from any instruction
2517 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
2518 than 2^31 bytes (2GB).
2519
2520 The executable must be in the low 4TB of the virtual address
2521 space.
2522
2523 sethi %hi(symbol), %temp1
2524 or %temp1, %lo(symbol), %reg */
2525 if (temp)
2526 temp1 = temp; /* op0 is allowed. */
2527 else
2528 temp1 = gen_reg_rtx (DImode);
2529
2530 emit_insn (gen_rtx_SET (temp1, gen_rtx_HIGH (DImode, op1)));
2531 emit_insn (gen_rtx_SET (op0, gen_rtx_LO_SUM (DImode, temp1, op1)));
2532 break;
2533
2534 case CM_MEDMID:
2535 /* The range spanned by all instructions in the object is less
2536 than 2^31 bytes (2GB) and the distance from any instruction
2537 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
2538 than 2^31 bytes (2GB).
2539
2540 The executable must be in the low 16TB of the virtual address
2541 space.
2542
2543 sethi %h44(symbol), %temp1
2544 or %temp1, %m44(symbol), %temp2
2545 sllx %temp2, 12, %temp3
2546 or %temp3, %l44(symbol), %reg */
2547 if (temp)
2548 {
2549 temp1 = op0;
2550 temp2 = op0;
2551 temp3 = temp; /* op0 is allowed. */
2552 }
2553 else
2554 {
2555 temp1 = gen_reg_rtx (DImode);
2556 temp2 = gen_reg_rtx (DImode);
2557 temp3 = gen_reg_rtx (DImode);
2558 }
2559
2560 emit_insn (gen_seth44 (temp1, op1));
2561 emit_insn (gen_setm44 (temp2, temp1, op1));
2562 emit_insn (gen_rtx_SET (temp3,
2563 gen_rtx_ASHIFT (DImode, temp2, GEN_INT (12))));
2564 emit_insn (gen_setl44 (op0, temp3, op1));
2565 break;
2566
2567 case CM_MEDANY:
2568 /* The range spanned by all instructions in the object is less
2569 than 2^31 bytes (2GB) and the distance from any instruction
2570 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
2571 than 2^31 bytes (2GB).
2572
2573 The executable can be placed anywhere in the virtual address
2574 space.
2575
2576 sethi %hh(symbol), %temp1
2577 sethi %lm(symbol), %temp2
2578 or %temp1, %hm(symbol), %temp3
2579 sllx %temp3, 32, %temp4
2580 or %temp4, %temp2, %temp5
2581 or %temp5, %lo(symbol), %reg */
2582 if (temp)
2583 {
2584 /* It is possible that one of the registers we got for operands[2]
2585 might coincide with that of operands[0] (which is why we made
2586 it TImode). Pick the other one to use as our scratch. */
2587 if (rtx_equal_p (temp, op0))
2588 {
2589 gcc_assert (ti_temp);
2590 temp = gen_rtx_REG (DImode, REGNO (temp) + 1);
2591 }
2592 temp1 = op0;
2593 temp2 = temp; /* op0 is _not_ allowed, see above. */
2594 temp3 = op0;
2595 temp4 = op0;
2596 temp5 = op0;
2597 }
2598 else
2599 {
2600 temp1 = gen_reg_rtx (DImode);
2601 temp2 = gen_reg_rtx (DImode);
2602 temp3 = gen_reg_rtx (DImode);
2603 temp4 = gen_reg_rtx (DImode);
2604 temp5 = gen_reg_rtx (DImode);
2605 }
2606
2607 emit_insn (gen_sethh (temp1, op1));
2608 emit_insn (gen_setlm (temp2, op1));
2609 emit_insn (gen_sethm (temp3, temp1, op1));
2610 emit_insn (gen_rtx_SET (temp4,
2611 gen_rtx_ASHIFT (DImode, temp3, GEN_INT (32))));
2612 emit_insn (gen_rtx_SET (temp5, gen_rtx_PLUS (DImode, temp4, temp2)));
2613 emit_insn (gen_setlo (op0, temp5, op1));
2614 break;
2615
2616 case CM_EMBMEDANY:
2617 /* Old old old backwards compatibility kruft here.
2618 Essentially it is MEDLOW with a fixed 64-bit
2619 virtual base added to all data segment addresses.
2620 Text-segment stuff is computed like MEDANY, we can't
2621 reuse the code above because the relocation knobs
2622 look different.
2623
2624 Data segment: sethi %hi(symbol), %temp1
2625 add %temp1, EMBMEDANY_BASE_REG, %temp2
2626 or %temp2, %lo(symbol), %reg */
2627 if (data_segment_operand (op1, GET_MODE (op1)))
2628 {
2629 if (temp)
2630 {
2631 temp1 = temp; /* op0 is allowed. */
2632 temp2 = op0;
2633 }
2634 else
2635 {
2636 temp1 = gen_reg_rtx (DImode);
2637 temp2 = gen_reg_rtx (DImode);
2638 }
2639
2640 emit_insn (gen_embmedany_sethi (temp1, op1));
2641 emit_insn (gen_embmedany_brsum (temp2, temp1));
2642 emit_insn (gen_embmedany_losum (op0, temp2, op1));
2643 }
2644
2645 /* Text segment: sethi %uhi(symbol), %temp1
2646 sethi %hi(symbol), %temp2
2647 or %temp1, %ulo(symbol), %temp3
2648 sllx %temp3, 32, %temp4
2649 or %temp4, %temp2, %temp5
2650 or %temp5, %lo(symbol), %reg */
2651 else
2652 {
2653 if (temp)
2654 {
2655 /* It is possible that one of the registers we got for operands[2]
2656 might coincide with that of operands[0] (which is why we made
2657 it TImode). Pick the other one to use as our scratch. */
2658 if (rtx_equal_p (temp, op0))
2659 {
2660 gcc_assert (ti_temp);
2661 temp = gen_rtx_REG (DImode, REGNO (temp) + 1);
2662 }
2663 temp1 = op0;
2664 temp2 = temp; /* op0 is _not_ allowed, see above. */
2665 temp3 = op0;
2666 temp4 = op0;
2667 temp5 = op0;
2668 }
2669 else
2670 {
2671 temp1 = gen_reg_rtx (DImode);
2672 temp2 = gen_reg_rtx (DImode);
2673 temp3 = gen_reg_rtx (DImode);
2674 temp4 = gen_reg_rtx (DImode);
2675 temp5 = gen_reg_rtx (DImode);
2676 }
2677
2678 emit_insn (gen_embmedany_textuhi (temp1, op1));
2679 emit_insn (gen_embmedany_texthi (temp2, op1));
2680 emit_insn (gen_embmedany_textulo (temp3, temp1, op1));
2681 emit_insn (gen_rtx_SET (temp4,
2682 gen_rtx_ASHIFT (DImode, temp3, GEN_INT (32))));
2683 emit_insn (gen_rtx_SET (temp5, gen_rtx_PLUS (DImode, temp4, temp2)));
2684 emit_insn (gen_embmedany_textlo (op0, temp5, op1));
2685 }
2686 break;
2687
2688 default:
2689 gcc_unreachable ();
2690 }
2691 }
2692
2693 /* These avoid problems when cross compiling. If we do not
2694 go through all this hair then the optimizer will see
2695 invalid REG_EQUAL notes or in some cases none at all. */
2696 static rtx gen_safe_HIGH64 (rtx, HOST_WIDE_INT);
2697 static rtx gen_safe_SET64 (rtx, HOST_WIDE_INT);
2698 static rtx gen_safe_OR64 (rtx, HOST_WIDE_INT);
2699 static rtx gen_safe_XOR64 (rtx, HOST_WIDE_INT);
2700
2701 /* The optimizer is not to assume anything about exactly
2702 which bits are set for a HIGH, they are unspecified.
2703 Unfortunately this leads to many missed optimizations
2704 during CSE. We mask out the non-HIGH bits, and matches
2705 a plain movdi, to alleviate this problem. */
2706 static rtx
gen_safe_HIGH64(rtx dest,HOST_WIDE_INT val)2707 gen_safe_HIGH64 (rtx dest, HOST_WIDE_INT val)
2708 {
2709 return gen_rtx_SET (dest, GEN_INT (val & ~(HOST_WIDE_INT)0x3ff));
2710 }
2711
2712 static rtx
gen_safe_SET64(rtx dest,HOST_WIDE_INT val)2713 gen_safe_SET64 (rtx dest, HOST_WIDE_INT val)
2714 {
2715 return gen_rtx_SET (dest, GEN_INT (val));
2716 }
2717
2718 static rtx
gen_safe_OR64(rtx src,HOST_WIDE_INT val)2719 gen_safe_OR64 (rtx src, HOST_WIDE_INT val)
2720 {
2721 return gen_rtx_IOR (DImode, src, GEN_INT (val));
2722 }
2723
2724 static rtx
gen_safe_XOR64(rtx src,HOST_WIDE_INT val)2725 gen_safe_XOR64 (rtx src, HOST_WIDE_INT val)
2726 {
2727 return gen_rtx_XOR (DImode, src, GEN_INT (val));
2728 }
2729
2730 /* Worker routines for 64-bit constant formation on arch64.
2731 One of the key things to be doing in these emissions is
2732 to create as many temp REGs as possible. This makes it
2733 possible for half-built constants to be used later when
2734 such values are similar to something required later on.
2735 Without doing this, the optimizer cannot see such
2736 opportunities. */
2737
2738 static void sparc_emit_set_const64_quick1 (rtx, rtx,
2739 unsigned HOST_WIDE_INT, int);
2740
2741 static void
sparc_emit_set_const64_quick1(rtx op0,rtx temp,unsigned HOST_WIDE_INT low_bits,int is_neg)2742 sparc_emit_set_const64_quick1 (rtx op0, rtx temp,
2743 unsigned HOST_WIDE_INT low_bits, int is_neg)
2744 {
2745 unsigned HOST_WIDE_INT high_bits;
2746
2747 if (is_neg)
2748 high_bits = (~low_bits) & 0xffffffff;
2749 else
2750 high_bits = low_bits;
2751
2752 emit_insn (gen_safe_HIGH64 (temp, high_bits));
2753 if (!is_neg)
2754 {
2755 emit_insn (gen_rtx_SET (op0, gen_safe_OR64 (temp, (high_bits & 0x3ff))));
2756 }
2757 else
2758 {
2759 /* If we are XOR'ing with -1, then we should emit a one's complement
2760 instead. This way the combiner will notice logical operations
2761 such as ANDN later on and substitute. */
2762 if ((low_bits & 0x3ff) == 0x3ff)
2763 {
2764 emit_insn (gen_rtx_SET (op0, gen_rtx_NOT (DImode, temp)));
2765 }
2766 else
2767 {
2768 emit_insn (gen_rtx_SET (op0,
2769 gen_safe_XOR64 (temp,
2770 (-(HOST_WIDE_INT)0x400
2771 | (low_bits & 0x3ff)))));
2772 }
2773 }
2774 }
2775
2776 static void sparc_emit_set_const64_quick2 (rtx, rtx, unsigned HOST_WIDE_INT,
2777 unsigned HOST_WIDE_INT, int);
2778
2779 static void
sparc_emit_set_const64_quick2(rtx op0,rtx temp,unsigned HOST_WIDE_INT high_bits,unsigned HOST_WIDE_INT low_immediate,int shift_count)2780 sparc_emit_set_const64_quick2 (rtx op0, rtx temp,
2781 unsigned HOST_WIDE_INT high_bits,
2782 unsigned HOST_WIDE_INT low_immediate,
2783 int shift_count)
2784 {
2785 rtx temp2 = op0;
2786
2787 if ((high_bits & 0xfffffc00) != 0)
2788 {
2789 emit_insn (gen_safe_HIGH64 (temp, high_bits));
2790 if ((high_bits & ~0xfffffc00) != 0)
2791 emit_insn (gen_rtx_SET (op0,
2792 gen_safe_OR64 (temp, (high_bits & 0x3ff))));
2793 else
2794 temp2 = temp;
2795 }
2796 else
2797 {
2798 emit_insn (gen_safe_SET64 (temp, high_bits));
2799 temp2 = temp;
2800 }
2801
2802 /* Now shift it up into place. */
2803 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, temp2,
2804 GEN_INT (shift_count))));
2805
2806 /* If there is a low immediate part piece, finish up by
2807 putting that in as well. */
2808 if (low_immediate != 0)
2809 emit_insn (gen_rtx_SET (op0, gen_safe_OR64 (op0, low_immediate)));
2810 }
2811
2812 static void sparc_emit_set_const64_longway (rtx, rtx, unsigned HOST_WIDE_INT,
2813 unsigned HOST_WIDE_INT);
2814
2815 /* Full 64-bit constant decomposition. Even though this is the
2816 'worst' case, we still optimize a few things away. */
2817 static void
sparc_emit_set_const64_longway(rtx op0,rtx temp,unsigned HOST_WIDE_INT high_bits,unsigned HOST_WIDE_INT low_bits)2818 sparc_emit_set_const64_longway (rtx op0, rtx temp,
2819 unsigned HOST_WIDE_INT high_bits,
2820 unsigned HOST_WIDE_INT low_bits)
2821 {
2822 rtx sub_temp = op0;
2823
2824 if (can_create_pseudo_p ())
2825 sub_temp = gen_reg_rtx (DImode);
2826
2827 if ((high_bits & 0xfffffc00) != 0)
2828 {
2829 emit_insn (gen_safe_HIGH64 (temp, high_bits));
2830 if ((high_bits & ~0xfffffc00) != 0)
2831 emit_insn (gen_rtx_SET (sub_temp,
2832 gen_safe_OR64 (temp, (high_bits & 0x3ff))));
2833 else
2834 sub_temp = temp;
2835 }
2836 else
2837 {
2838 emit_insn (gen_safe_SET64 (temp, high_bits));
2839 sub_temp = temp;
2840 }
2841
2842 if (can_create_pseudo_p ())
2843 {
2844 rtx temp2 = gen_reg_rtx (DImode);
2845 rtx temp3 = gen_reg_rtx (DImode);
2846 rtx temp4 = gen_reg_rtx (DImode);
2847
2848 emit_insn (gen_rtx_SET (temp4, gen_rtx_ASHIFT (DImode, sub_temp,
2849 GEN_INT (32))));
2850
2851 emit_insn (gen_safe_HIGH64 (temp2, low_bits));
2852 if ((low_bits & ~0xfffffc00) != 0)
2853 {
2854 emit_insn (gen_rtx_SET (temp3,
2855 gen_safe_OR64 (temp2, (low_bits & 0x3ff))));
2856 emit_insn (gen_rtx_SET (op0, gen_rtx_PLUS (DImode, temp4, temp3)));
2857 }
2858 else
2859 {
2860 emit_insn (gen_rtx_SET (op0, gen_rtx_PLUS (DImode, temp4, temp2)));
2861 }
2862 }
2863 else
2864 {
2865 rtx low1 = GEN_INT ((low_bits >> (32 - 12)) & 0xfff);
2866 rtx low2 = GEN_INT ((low_bits >> (32 - 12 - 12)) & 0xfff);
2867 rtx low3 = GEN_INT ((low_bits >> (32 - 12 - 12 - 8)) & 0x0ff);
2868 int to_shift = 12;
2869
2870 /* We are in the middle of reload, so this is really
2871 painful. However we do still make an attempt to
2872 avoid emitting truly stupid code. */
2873 if (low1 != const0_rtx)
2874 {
2875 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, sub_temp,
2876 GEN_INT (to_shift))));
2877 emit_insn (gen_rtx_SET (op0, gen_rtx_IOR (DImode, op0, low1)));
2878 sub_temp = op0;
2879 to_shift = 12;
2880 }
2881 else
2882 {
2883 to_shift += 12;
2884 }
2885 if (low2 != const0_rtx)
2886 {
2887 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, sub_temp,
2888 GEN_INT (to_shift))));
2889 emit_insn (gen_rtx_SET (op0, gen_rtx_IOR (DImode, op0, low2)));
2890 sub_temp = op0;
2891 to_shift = 8;
2892 }
2893 else
2894 {
2895 to_shift += 8;
2896 }
2897 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, sub_temp,
2898 GEN_INT (to_shift))));
2899 if (low3 != const0_rtx)
2900 emit_insn (gen_rtx_SET (op0, gen_rtx_IOR (DImode, op0, low3)));
2901 /* phew... */
2902 }
2903 }
2904
2905 /* Analyze a 64-bit constant for certain properties. */
2906 static void analyze_64bit_constant (unsigned HOST_WIDE_INT,
2907 unsigned HOST_WIDE_INT,
2908 int *, int *, int *);
2909
2910 static void
analyze_64bit_constant(unsigned HOST_WIDE_INT high_bits,unsigned HOST_WIDE_INT low_bits,int * hbsp,int * lbsp,int * abbasp)2911 analyze_64bit_constant (unsigned HOST_WIDE_INT high_bits,
2912 unsigned HOST_WIDE_INT low_bits,
2913 int *hbsp, int *lbsp, int *abbasp)
2914 {
2915 int lowest_bit_set, highest_bit_set, all_bits_between_are_set;
2916 int i;
2917
2918 lowest_bit_set = highest_bit_set = -1;
2919 i = 0;
2920 do
2921 {
2922 if ((lowest_bit_set == -1)
2923 && ((low_bits >> i) & 1))
2924 lowest_bit_set = i;
2925 if ((highest_bit_set == -1)
2926 && ((high_bits >> (32 - i - 1)) & 1))
2927 highest_bit_set = (64 - i - 1);
2928 }
2929 while (++i < 32
2930 && ((highest_bit_set == -1)
2931 || (lowest_bit_set == -1)));
2932 if (i == 32)
2933 {
2934 i = 0;
2935 do
2936 {
2937 if ((lowest_bit_set == -1)
2938 && ((high_bits >> i) & 1))
2939 lowest_bit_set = i + 32;
2940 if ((highest_bit_set == -1)
2941 && ((low_bits >> (32 - i - 1)) & 1))
2942 highest_bit_set = 32 - i - 1;
2943 }
2944 while (++i < 32
2945 && ((highest_bit_set == -1)
2946 || (lowest_bit_set == -1)));
2947 }
2948 /* If there are no bits set this should have gone out
2949 as one instruction! */
2950 gcc_assert (lowest_bit_set != -1 && highest_bit_set != -1);
2951 all_bits_between_are_set = 1;
2952 for (i = lowest_bit_set; i <= highest_bit_set; i++)
2953 {
2954 if (i < 32)
2955 {
2956 if ((low_bits & (1 << i)) != 0)
2957 continue;
2958 }
2959 else
2960 {
2961 if ((high_bits & (1 << (i - 32))) != 0)
2962 continue;
2963 }
2964 all_bits_between_are_set = 0;
2965 break;
2966 }
2967 *hbsp = highest_bit_set;
2968 *lbsp = lowest_bit_set;
2969 *abbasp = all_bits_between_are_set;
2970 }
2971
2972 static int const64_is_2insns (unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT);
2973
2974 static int
const64_is_2insns(unsigned HOST_WIDE_INT high_bits,unsigned HOST_WIDE_INT low_bits)2975 const64_is_2insns (unsigned HOST_WIDE_INT high_bits,
2976 unsigned HOST_WIDE_INT low_bits)
2977 {
2978 int highest_bit_set, lowest_bit_set, all_bits_between_are_set;
2979
2980 if (high_bits == 0
2981 || high_bits == 0xffffffff)
2982 return 1;
2983
2984 analyze_64bit_constant (high_bits, low_bits,
2985 &highest_bit_set, &lowest_bit_set,
2986 &all_bits_between_are_set);
2987
2988 if ((highest_bit_set == 63
2989 || lowest_bit_set == 0)
2990 && all_bits_between_are_set != 0)
2991 return 1;
2992
2993 if ((highest_bit_set - lowest_bit_set) < 21)
2994 return 1;
2995
2996 return 0;
2997 }
2998
2999 static unsigned HOST_WIDE_INT create_simple_focus_bits (unsigned HOST_WIDE_INT,
3000 unsigned HOST_WIDE_INT,
3001 int, int);
3002
3003 static unsigned HOST_WIDE_INT
create_simple_focus_bits(unsigned HOST_WIDE_INT high_bits,unsigned HOST_WIDE_INT low_bits,int lowest_bit_set,int shift)3004 create_simple_focus_bits (unsigned HOST_WIDE_INT high_bits,
3005 unsigned HOST_WIDE_INT low_bits,
3006 int lowest_bit_set, int shift)
3007 {
3008 HOST_WIDE_INT hi, lo;
3009
3010 if (lowest_bit_set < 32)
3011 {
3012 lo = (low_bits >> lowest_bit_set) << shift;
3013 hi = ((high_bits << (32 - lowest_bit_set)) << shift);
3014 }
3015 else
3016 {
3017 lo = 0;
3018 hi = ((high_bits >> (lowest_bit_set - 32)) << shift);
3019 }
3020 gcc_assert (! (hi & lo));
3021 return (hi | lo);
3022 }
3023
3024 /* Here we are sure to be arch64 and this is an integer constant
3025 being loaded into a register. Emit the most efficient
3026 insn sequence possible. Detection of all the 1-insn cases
3027 has been done already. */
3028 static void
sparc_emit_set_const64(rtx op0,rtx op1)3029 sparc_emit_set_const64 (rtx op0, rtx op1)
3030 {
3031 unsigned HOST_WIDE_INT high_bits, low_bits;
3032 int lowest_bit_set, highest_bit_set;
3033 int all_bits_between_are_set;
3034 rtx temp = 0;
3035
3036 /* Sanity check that we know what we are working with. */
3037 gcc_assert (TARGET_ARCH64
3038 && (GET_CODE (op0) == SUBREG
3039 || (REG_P (op0) && ! SPARC_FP_REG_P (REGNO (op0)))));
3040
3041 if (! can_create_pseudo_p ())
3042 temp = op0;
3043
3044 if (GET_CODE (op1) != CONST_INT)
3045 {
3046 sparc_emit_set_symbolic_const64 (op0, op1, temp);
3047 return;
3048 }
3049
3050 if (! temp)
3051 temp = gen_reg_rtx (DImode);
3052
3053 high_bits = ((INTVAL (op1) >> 32) & 0xffffffff);
3054 low_bits = (INTVAL (op1) & 0xffffffff);
3055
3056 /* low_bits bits 0 --> 31
3057 high_bits bits 32 --> 63 */
3058
3059 analyze_64bit_constant (high_bits, low_bits,
3060 &highest_bit_set, &lowest_bit_set,
3061 &all_bits_between_are_set);
3062
3063 /* First try for a 2-insn sequence. */
3064
3065 /* These situations are preferred because the optimizer can
3066 * do more things with them:
3067 * 1) mov -1, %reg
3068 * sllx %reg, shift, %reg
3069 * 2) mov -1, %reg
3070 * srlx %reg, shift, %reg
3071 * 3) mov some_small_const, %reg
3072 * sllx %reg, shift, %reg
3073 */
3074 if (((highest_bit_set == 63
3075 || lowest_bit_set == 0)
3076 && all_bits_between_are_set != 0)
3077 || ((highest_bit_set - lowest_bit_set) < 12))
3078 {
3079 HOST_WIDE_INT the_const = -1;
3080 int shift = lowest_bit_set;
3081
3082 if ((highest_bit_set != 63
3083 && lowest_bit_set != 0)
3084 || all_bits_between_are_set == 0)
3085 {
3086 the_const =
3087 create_simple_focus_bits (high_bits, low_bits,
3088 lowest_bit_set, 0);
3089 }
3090 else if (lowest_bit_set == 0)
3091 shift = -(63 - highest_bit_set);
3092
3093 gcc_assert (SPARC_SIMM13_P (the_const));
3094 gcc_assert (shift != 0);
3095
3096 emit_insn (gen_safe_SET64 (temp, the_const));
3097 if (shift > 0)
3098 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, temp,
3099 GEN_INT (shift))));
3100 else if (shift < 0)
3101 emit_insn (gen_rtx_SET (op0, gen_rtx_LSHIFTRT (DImode, temp,
3102 GEN_INT (-shift))));
3103 return;
3104 }
3105
3106 /* Now a range of 22 or less bits set somewhere.
3107 * 1) sethi %hi(focus_bits), %reg
3108 * sllx %reg, shift, %reg
3109 * 2) sethi %hi(focus_bits), %reg
3110 * srlx %reg, shift, %reg
3111 */
3112 if ((highest_bit_set - lowest_bit_set) < 21)
3113 {
3114 unsigned HOST_WIDE_INT focus_bits =
3115 create_simple_focus_bits (high_bits, low_bits,
3116 lowest_bit_set, 10);
3117
3118 gcc_assert (SPARC_SETHI_P (focus_bits));
3119 gcc_assert (lowest_bit_set != 10);
3120
3121 emit_insn (gen_safe_HIGH64 (temp, focus_bits));
3122
3123 /* If lowest_bit_set == 10 then a sethi alone could have done it. */
3124 if (lowest_bit_set < 10)
3125 emit_insn (gen_rtx_SET (op0,
3126 gen_rtx_LSHIFTRT (DImode, temp,
3127 GEN_INT (10 - lowest_bit_set))));
3128 else if (lowest_bit_set > 10)
3129 emit_insn (gen_rtx_SET (op0,
3130 gen_rtx_ASHIFT (DImode, temp,
3131 GEN_INT (lowest_bit_set - 10))));
3132 return;
3133 }
3134
3135 /* 1) sethi %hi(low_bits), %reg
3136 * or %reg, %lo(low_bits), %reg
3137 * 2) sethi %hi(~low_bits), %reg
3138 * xor %reg, %lo(-0x400 | (low_bits & 0x3ff)), %reg
3139 */
3140 if (high_bits == 0
3141 || high_bits == 0xffffffff)
3142 {
3143 sparc_emit_set_const64_quick1 (op0, temp, low_bits,
3144 (high_bits == 0xffffffff));
3145 return;
3146 }
3147
3148 /* Now, try 3-insn sequences. */
3149
3150 /* 1) sethi %hi(high_bits), %reg
3151 * or %reg, %lo(high_bits), %reg
3152 * sllx %reg, 32, %reg
3153 */
3154 if (low_bits == 0)
3155 {
3156 sparc_emit_set_const64_quick2 (op0, temp, high_bits, 0, 32);
3157 return;
3158 }
3159
3160 /* We may be able to do something quick
3161 when the constant is negated, so try that. */
3162 if (const64_is_2insns ((~high_bits) & 0xffffffff,
3163 (~low_bits) & 0xfffffc00))
3164 {
3165 /* NOTE: The trailing bits get XOR'd so we need the
3166 non-negated bits, not the negated ones. */
3167 unsigned HOST_WIDE_INT trailing_bits = low_bits & 0x3ff;
3168
3169 if ((((~high_bits) & 0xffffffff) == 0
3170 && ((~low_bits) & 0x80000000) == 0)
3171 || (((~high_bits) & 0xffffffff) == 0xffffffff
3172 && ((~low_bits) & 0x80000000) != 0))
3173 {
3174 unsigned HOST_WIDE_INT fast_int = (~low_bits & 0xffffffff);
3175
3176 if ((SPARC_SETHI_P (fast_int)
3177 && (~high_bits & 0xffffffff) == 0)
3178 || SPARC_SIMM13_P (fast_int))
3179 emit_insn (gen_safe_SET64 (temp, fast_int));
3180 else
3181 sparc_emit_set_const64 (temp, GEN_INT (fast_int));
3182 }
3183 else
3184 {
3185 rtx negated_const;
3186 negated_const = GEN_INT (((~low_bits) & 0xfffffc00) |
3187 (((HOST_WIDE_INT)((~high_bits) & 0xffffffff))<<32));
3188 sparc_emit_set_const64 (temp, negated_const);
3189 }
3190
3191 /* If we are XOR'ing with -1, then we should emit a one's complement
3192 instead. This way the combiner will notice logical operations
3193 such as ANDN later on and substitute. */
3194 if (trailing_bits == 0x3ff)
3195 {
3196 emit_insn (gen_rtx_SET (op0, gen_rtx_NOT (DImode, temp)));
3197 }
3198 else
3199 {
3200 emit_insn (gen_rtx_SET (op0,
3201 gen_safe_XOR64 (temp,
3202 (-0x400 | trailing_bits))));
3203 }
3204 return;
3205 }
3206
3207 /* 1) sethi %hi(xxx), %reg
3208 * or %reg, %lo(xxx), %reg
3209 * sllx %reg, yyy, %reg
3210 *
3211 * ??? This is just a generalized version of the low_bits==0
3212 * thing above, FIXME...
3213 */
3214 if ((highest_bit_set - lowest_bit_set) < 32)
3215 {
3216 unsigned HOST_WIDE_INT focus_bits =
3217 create_simple_focus_bits (high_bits, low_bits,
3218 lowest_bit_set, 0);
3219
3220 /* We can't get here in this state. */
3221 gcc_assert (highest_bit_set >= 32 && lowest_bit_set < 32);
3222
3223 /* So what we know is that the set bits straddle the
3224 middle of the 64-bit word. */
3225 sparc_emit_set_const64_quick2 (op0, temp,
3226 focus_bits, 0,
3227 lowest_bit_set);
3228 return;
3229 }
3230
3231 /* 1) sethi %hi(high_bits), %reg
3232 * or %reg, %lo(high_bits), %reg
3233 * sllx %reg, 32, %reg
3234 * or %reg, low_bits, %reg
3235 */
3236 if (SPARC_SIMM13_P (low_bits) && ((int)low_bits > 0))
3237 {
3238 sparc_emit_set_const64_quick2 (op0, temp, high_bits, low_bits, 32);
3239 return;
3240 }
3241
3242 /* The easiest way when all else fails, is full decomposition. */
3243 sparc_emit_set_const64_longway (op0, temp, high_bits, low_bits);
3244 }
3245
3246 /* Implement TARGET_FIXED_CONDITION_CODE_REGS. */
3247
3248 static bool
sparc_fixed_condition_code_regs(unsigned int * p1,unsigned int * p2)3249 sparc_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
3250 {
3251 *p1 = SPARC_ICC_REG;
3252 *p2 = SPARC_FCC_REG;
3253 return true;
3254 }
3255
3256 /* Implement TARGET_MIN_ARITHMETIC_PRECISION. */
3257
3258 static unsigned int
sparc_min_arithmetic_precision(void)3259 sparc_min_arithmetic_precision (void)
3260 {
3261 return 32;
3262 }
3263
3264 /* Given a comparison code (EQ, NE, etc.) and the first operand of a COMPARE,
3265 return the mode to be used for the comparison. For floating-point,
3266 CCFP[E]mode is used. CCNZmode should be used when the first operand
3267 is a PLUS, MINUS, NEG, or ASHIFT. CCmode should be used when no special
3268 processing is needed. */
3269
3270 machine_mode
select_cc_mode(enum rtx_code op,rtx x,rtx y)3271 select_cc_mode (enum rtx_code op, rtx x, rtx y)
3272 {
3273 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
3274 {
3275 switch (op)
3276 {
3277 case EQ:
3278 case NE:
3279 case UNORDERED:
3280 case ORDERED:
3281 case UNLT:
3282 case UNLE:
3283 case UNGT:
3284 case UNGE:
3285 case UNEQ:
3286 return CCFPmode;
3287
3288 case LT:
3289 case LE:
3290 case GT:
3291 case GE:
3292 case LTGT:
3293 return CCFPEmode;
3294
3295 default:
3296 gcc_unreachable ();
3297 }
3298 }
3299 else if ((GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
3300 || GET_CODE (x) == NEG || GET_CODE (x) == ASHIFT)
3301 && y == const0_rtx)
3302 {
3303 if (TARGET_ARCH64 && GET_MODE (x) == DImode)
3304 return CCXNZmode;
3305 else
3306 return CCNZmode;
3307 }
3308 else
3309 {
3310 /* This is for the cmp<mode>_sne pattern. */
3311 if (GET_CODE (x) == NOT && y == constm1_rtx)
3312 {
3313 if (TARGET_ARCH64 && GET_MODE (x) == DImode)
3314 return CCXCmode;
3315 else
3316 return CCCmode;
3317 }
3318
3319 /* This is for the [u]addvdi4_sp32 and [u]subvdi4_sp32 patterns. */
3320 if (!TARGET_ARCH64 && GET_MODE (x) == DImode)
3321 {
3322 if (GET_CODE (y) == UNSPEC
3323 && (XINT (y, 1) == UNSPEC_ADDV
3324 || XINT (y, 1) == UNSPEC_SUBV
3325 || XINT (y, 1) == UNSPEC_NEGV))
3326 return CCVmode;
3327 else
3328 return CCCmode;
3329 }
3330
3331 if (TARGET_ARCH64 && GET_MODE (x) == DImode)
3332 return CCXmode;
3333 else
3334 return CCmode;
3335 }
3336 }
3337
3338 /* Emit the compare insn and return the CC reg for a CODE comparison
3339 with operands X and Y. */
3340
3341 static rtx
gen_compare_reg_1(enum rtx_code code,rtx x,rtx y)3342 gen_compare_reg_1 (enum rtx_code code, rtx x, rtx y)
3343 {
3344 machine_mode mode;
3345 rtx cc_reg;
3346
3347 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
3348 return x;
3349
3350 mode = SELECT_CC_MODE (code, x, y);
3351
3352 /* ??? We don't have movcc patterns so we cannot generate pseudo regs for the
3353 fcc regs (cse can't tell they're really call clobbered regs and will
3354 remove a duplicate comparison even if there is an intervening function
3355 call - it will then try to reload the cc reg via an int reg which is why
3356 we need the movcc patterns). It is possible to provide the movcc
3357 patterns by using the ldxfsr/stxfsr v9 insns. I tried it: you need two
3358 registers (say %g1,%g5) and it takes about 6 insns. A better fix would be
3359 to tell cse that CCFPE mode registers (even pseudos) are call
3360 clobbered. */
3361
3362 /* ??? This is an experiment. Rather than making changes to cse which may
3363 or may not be easy/clean, we do our own cse. This is possible because
3364 we will generate hard registers. Cse knows they're call clobbered (it
3365 doesn't know the same thing about pseudos). If we guess wrong, no big
3366 deal, but if we win, great! */
3367
3368 if (TARGET_V9 && GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
3369 #if 1 /* experiment */
3370 {
3371 int reg;
3372 /* We cycle through the registers to ensure they're all exercised. */
3373 static int next_fcc_reg = 0;
3374 /* Previous x,y for each fcc reg. */
3375 static rtx prev_args[4][2];
3376
3377 /* Scan prev_args for x,y. */
3378 for (reg = 0; reg < 4; reg++)
3379 if (prev_args[reg][0] == x && prev_args[reg][1] == y)
3380 break;
3381 if (reg == 4)
3382 {
3383 reg = next_fcc_reg;
3384 prev_args[reg][0] = x;
3385 prev_args[reg][1] = y;
3386 next_fcc_reg = (next_fcc_reg + 1) & 3;
3387 }
3388 cc_reg = gen_rtx_REG (mode, reg + SPARC_FIRST_V9_FCC_REG);
3389 }
3390 #else
3391 cc_reg = gen_reg_rtx (mode);
3392 #endif /* ! experiment */
3393 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
3394 cc_reg = gen_rtx_REG (mode, SPARC_FCC_REG);
3395 else
3396 cc_reg = gen_rtx_REG (mode, SPARC_ICC_REG);
3397
3398 /* We shouldn't get there for TFmode if !TARGET_HARD_QUAD. If we do, this
3399 will only result in an unrecognizable insn so no point in asserting. */
3400 emit_insn (gen_rtx_SET (cc_reg, gen_rtx_COMPARE (mode, x, y)));
3401
3402 return cc_reg;
3403 }
3404
3405
3406 /* Emit the compare insn and return the CC reg for the comparison in CMP. */
3407
3408 rtx
gen_compare_reg(rtx cmp)3409 gen_compare_reg (rtx cmp)
3410 {
3411 return gen_compare_reg_1 (GET_CODE (cmp), XEXP (cmp, 0), XEXP (cmp, 1));
3412 }
3413
3414 /* This function is used for v9 only.
3415 DEST is the target of the Scc insn.
3416 CODE is the code for an Scc's comparison.
3417 X and Y are the values we compare.
3418
3419 This function is needed to turn
3420
3421 (set (reg:SI 110)
3422 (gt (reg:CCX 100 %icc)
3423 (const_int 0)))
3424 into
3425 (set (reg:SI 110)
3426 (gt:DI (reg:CCX 100 %icc)
3427 (const_int 0)))
3428
3429 IE: The instruction recognizer needs to see the mode of the comparison to
3430 find the right instruction. We could use "gt:DI" right in the
3431 define_expand, but leaving it out allows us to handle DI, SI, etc. */
3432
3433 static int
gen_v9_scc(rtx dest,enum rtx_code compare_code,rtx x,rtx y)3434 gen_v9_scc (rtx dest, enum rtx_code compare_code, rtx x, rtx y)
3435 {
3436 if (! TARGET_ARCH64
3437 && (GET_MODE (x) == DImode
3438 || GET_MODE (dest) == DImode))
3439 return 0;
3440
3441 /* Try to use the movrCC insns. */
3442 if (TARGET_ARCH64
3443 && GET_MODE_CLASS (GET_MODE (x)) == MODE_INT
3444 && y == const0_rtx
3445 && v9_regcmp_p (compare_code))
3446 {
3447 rtx op0 = x;
3448 rtx temp;
3449
3450 /* Special case for op0 != 0. This can be done with one instruction if
3451 dest == x. */
3452
3453 if (compare_code == NE
3454 && GET_MODE (dest) == DImode
3455 && rtx_equal_p (op0, dest))
3456 {
3457 emit_insn (gen_rtx_SET (dest,
3458 gen_rtx_IF_THEN_ELSE (DImode,
3459 gen_rtx_fmt_ee (compare_code, DImode,
3460 op0, const0_rtx),
3461 const1_rtx,
3462 dest)));
3463 return 1;
3464 }
3465
3466 if (reg_overlap_mentioned_p (dest, op0))
3467 {
3468 /* Handle the case where dest == x.
3469 We "early clobber" the result. */
3470 op0 = gen_reg_rtx (GET_MODE (x));
3471 emit_move_insn (op0, x);
3472 }
3473
3474 emit_insn (gen_rtx_SET (dest, const0_rtx));
3475 if (GET_MODE (op0) != DImode)
3476 {
3477 temp = gen_reg_rtx (DImode);
3478 convert_move (temp, op0, 0);
3479 }
3480 else
3481 temp = op0;
3482 emit_insn (gen_rtx_SET (dest,
3483 gen_rtx_IF_THEN_ELSE (GET_MODE (dest),
3484 gen_rtx_fmt_ee (compare_code, DImode,
3485 temp, const0_rtx),
3486 const1_rtx,
3487 dest)));
3488 return 1;
3489 }
3490 else
3491 {
3492 x = gen_compare_reg_1 (compare_code, x, y);
3493 y = const0_rtx;
3494
3495 emit_insn (gen_rtx_SET (dest, const0_rtx));
3496 emit_insn (gen_rtx_SET (dest,
3497 gen_rtx_IF_THEN_ELSE (GET_MODE (dest),
3498 gen_rtx_fmt_ee (compare_code,
3499 GET_MODE (x), x, y),
3500 const1_rtx, dest)));
3501 return 1;
3502 }
3503 }
3504
3505
3506 /* Emit an scc insn. For seq, sne, sgeu, and sltu, we can do this
3507 without jumps using the addx/subx instructions. */
3508
3509 bool
emit_scc_insn(rtx operands[])3510 emit_scc_insn (rtx operands[])
3511 {
3512 rtx tem, x, y;
3513 enum rtx_code code;
3514 machine_mode mode;
3515
3516 /* The quad-word fp compare library routines all return nonzero to indicate
3517 true, which is different from the equivalent libgcc routines, so we must
3518 handle them specially here. */
3519 if (GET_MODE (operands[2]) == TFmode && ! TARGET_HARD_QUAD)
3520 {
3521 operands[1] = sparc_emit_float_lib_cmp (operands[2], operands[3],
3522 GET_CODE (operands[1]));
3523 operands[2] = XEXP (operands[1], 0);
3524 operands[3] = XEXP (operands[1], 1);
3525 }
3526
3527 code = GET_CODE (operands[1]);
3528 x = operands[2];
3529 y = operands[3];
3530 mode = GET_MODE (x);
3531
3532 /* For seq/sne on v9 we use the same code as v8 (the addx/subx method has
3533 more applications). The exception to this is "reg != 0" which can
3534 be done in one instruction on v9 (so we do it). */
3535 if ((code == EQ || code == NE) && (mode == SImode || mode == DImode))
3536 {
3537 if (y != const0_rtx)
3538 x = force_reg (mode, gen_rtx_XOR (mode, x, y));
3539
3540 rtx pat = gen_rtx_SET (operands[0],
3541 gen_rtx_fmt_ee (code, GET_MODE (operands[0]),
3542 x, const0_rtx));
3543
3544 /* If we can use addx/subx or addxc, add a clobber for CC. */
3545 if (mode == SImode || (code == NE && TARGET_VIS3))
3546 {
3547 rtx clobber
3548 = gen_rtx_CLOBBER (VOIDmode,
3549 gen_rtx_REG (mode == SImode ? CCmode : CCXmode,
3550 SPARC_ICC_REG));
3551 pat = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, pat, clobber));
3552 }
3553
3554 emit_insn (pat);
3555 return true;
3556 }
3557
3558 /* We can do LTU in DImode using the addxc instruction with VIS3. */
3559 if (TARGET_ARCH64
3560 && mode == DImode
3561 && !((code == LTU || code == GTU) && TARGET_VIS3)
3562 && gen_v9_scc (operands[0], code, x, y))
3563 return true;
3564
3565 /* We can do LTU and GEU using the addx/subx instructions too. And
3566 for GTU/LEU, if both operands are registers swap them and fall
3567 back to the easy case. */
3568 if (code == GTU || code == LEU)
3569 {
3570 if ((GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
3571 && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG))
3572 {
3573 tem = x;
3574 x = y;
3575 y = tem;
3576 code = swap_condition (code);
3577 }
3578 }
3579
3580 if (code == LTU || code == GEU)
3581 {
3582 emit_insn (gen_rtx_SET (operands[0],
3583 gen_rtx_fmt_ee (code, GET_MODE (operands[0]),
3584 gen_compare_reg_1 (code, x, y),
3585 const0_rtx)));
3586 return true;
3587 }
3588
3589 /* All the posibilities to use addx/subx based sequences has been
3590 exhausted, try for a 3 instruction sequence using v9 conditional
3591 moves. */
3592 if (TARGET_V9 && gen_v9_scc (operands[0], code, x, y))
3593 return true;
3594
3595 /* Nope, do branches. */
3596 return false;
3597 }
3598
3599 /* Emit a conditional jump insn for the v9 architecture using comparison code
3600 CODE and jump target LABEL.
3601 This function exists to take advantage of the v9 brxx insns. */
3602
3603 static void
emit_v9_brxx_insn(enum rtx_code code,rtx op0,rtx label)3604 emit_v9_brxx_insn (enum rtx_code code, rtx op0, rtx label)
3605 {
3606 emit_jump_insn (gen_rtx_SET (pc_rtx,
3607 gen_rtx_IF_THEN_ELSE (VOIDmode,
3608 gen_rtx_fmt_ee (code, GET_MODE (op0),
3609 op0, const0_rtx),
3610 gen_rtx_LABEL_REF (VOIDmode, label),
3611 pc_rtx)));
3612 }
3613
3614 /* Emit a conditional jump insn for the UA2011 architecture using
3615 comparison code CODE and jump target LABEL. This function exists
3616 to take advantage of the UA2011 Compare and Branch insns. */
3617
3618 static void
emit_cbcond_insn(enum rtx_code code,rtx op0,rtx op1,rtx label)3619 emit_cbcond_insn (enum rtx_code code, rtx op0, rtx op1, rtx label)
3620 {
3621 rtx if_then_else;
3622
3623 if_then_else = gen_rtx_IF_THEN_ELSE (VOIDmode,
3624 gen_rtx_fmt_ee(code, GET_MODE(op0),
3625 op0, op1),
3626 gen_rtx_LABEL_REF (VOIDmode, label),
3627 pc_rtx);
3628
3629 emit_jump_insn (gen_rtx_SET (pc_rtx, if_then_else));
3630 }
3631
3632 void
emit_conditional_branch_insn(rtx operands[])3633 emit_conditional_branch_insn (rtx operands[])
3634 {
3635 /* The quad-word fp compare library routines all return nonzero to indicate
3636 true, which is different from the equivalent libgcc routines, so we must
3637 handle them specially here. */
3638 if (GET_MODE (operands[1]) == TFmode && ! TARGET_HARD_QUAD)
3639 {
3640 operands[0] = sparc_emit_float_lib_cmp (operands[1], operands[2],
3641 GET_CODE (operands[0]));
3642 operands[1] = XEXP (operands[0], 0);
3643 operands[2] = XEXP (operands[0], 1);
3644 }
3645
3646 /* If we can tell early on that the comparison is against a constant
3647 that won't fit in the 5-bit signed immediate field of a cbcond,
3648 use one of the other v9 conditional branch sequences. */
3649 if (TARGET_CBCOND
3650 && GET_CODE (operands[1]) == REG
3651 && (GET_MODE (operands[1]) == SImode
3652 || (TARGET_ARCH64 && GET_MODE (operands[1]) == DImode))
3653 && (GET_CODE (operands[2]) != CONST_INT
3654 || SPARC_SIMM5_P (INTVAL (operands[2]))))
3655 {
3656 emit_cbcond_insn (GET_CODE (operands[0]), operands[1], operands[2], operands[3]);
3657 return;
3658 }
3659
3660 if (TARGET_ARCH64 && operands[2] == const0_rtx
3661 && GET_CODE (operands[1]) == REG
3662 && GET_MODE (operands[1]) == DImode)
3663 {
3664 emit_v9_brxx_insn (GET_CODE (operands[0]), operands[1], operands[3]);
3665 return;
3666 }
3667
3668 operands[1] = gen_compare_reg (operands[0]);
3669 operands[2] = const0_rtx;
3670 operands[0] = gen_rtx_fmt_ee (GET_CODE (operands[0]), VOIDmode,
3671 operands[1], operands[2]);
3672 emit_jump_insn (gen_cbranchcc4 (operands[0], operands[1], operands[2],
3673 operands[3]));
3674 }
3675
3676
3677 /* Generate a DFmode part of a hard TFmode register.
3678 REG is the TFmode hard register, LOW is 1 for the
3679 low 64bit of the register and 0 otherwise.
3680 */
3681 rtx
gen_df_reg(rtx reg,int low)3682 gen_df_reg (rtx reg, int low)
3683 {
3684 int regno = REGNO (reg);
3685
3686 if ((WORDS_BIG_ENDIAN == 0) ^ (low != 0))
3687 regno += (TARGET_ARCH64 && SPARC_INT_REG_P (regno)) ? 1 : 2;
3688 return gen_rtx_REG (DFmode, regno);
3689 }
3690
3691 /* Generate a call to FUNC with OPERANDS. Operand 0 is the return value.
3692 Unlike normal calls, TFmode operands are passed by reference. It is
3693 assumed that no more than 3 operands are required. */
3694
3695 static void
emit_soft_tfmode_libcall(const char * func_name,int nargs,rtx * operands)3696 emit_soft_tfmode_libcall (const char *func_name, int nargs, rtx *operands)
3697 {
3698 rtx ret_slot = NULL, arg[3], func_sym;
3699 int i;
3700
3701 /* We only expect to be called for conversions, unary, and binary ops. */
3702 gcc_assert (nargs == 2 || nargs == 3);
3703
3704 for (i = 0; i < nargs; ++i)
3705 {
3706 rtx this_arg = operands[i];
3707 rtx this_slot;
3708
3709 /* TFmode arguments and return values are passed by reference. */
3710 if (GET_MODE (this_arg) == TFmode)
3711 {
3712 int force_stack_temp;
3713
3714 force_stack_temp = 0;
3715 if (TARGET_BUGGY_QP_LIB && i == 0)
3716 force_stack_temp = 1;
3717
3718 if (GET_CODE (this_arg) == MEM
3719 && ! force_stack_temp)
3720 {
3721 tree expr = MEM_EXPR (this_arg);
3722 if (expr)
3723 mark_addressable (expr);
3724 this_arg = XEXP (this_arg, 0);
3725 }
3726 else if (CONSTANT_P (this_arg)
3727 && ! force_stack_temp)
3728 {
3729 this_slot = force_const_mem (TFmode, this_arg);
3730 this_arg = XEXP (this_slot, 0);
3731 }
3732 else
3733 {
3734 this_slot = assign_stack_temp (TFmode, GET_MODE_SIZE (TFmode));
3735
3736 /* Operand 0 is the return value. We'll copy it out later. */
3737 if (i > 0)
3738 emit_move_insn (this_slot, this_arg);
3739 else
3740 ret_slot = this_slot;
3741
3742 this_arg = XEXP (this_slot, 0);
3743 }
3744 }
3745
3746 arg[i] = this_arg;
3747 }
3748
3749 func_sym = gen_rtx_SYMBOL_REF (Pmode, func_name);
3750
3751 if (GET_MODE (operands[0]) == TFmode)
3752 {
3753 if (nargs == 2)
3754 emit_library_call (func_sym, LCT_NORMAL, VOIDmode,
3755 arg[0], GET_MODE (arg[0]),
3756 arg[1], GET_MODE (arg[1]));
3757 else
3758 emit_library_call (func_sym, LCT_NORMAL, VOIDmode,
3759 arg[0], GET_MODE (arg[0]),
3760 arg[1], GET_MODE (arg[1]),
3761 arg[2], GET_MODE (arg[2]));
3762
3763 if (ret_slot)
3764 emit_move_insn (operands[0], ret_slot);
3765 }
3766 else
3767 {
3768 rtx ret;
3769
3770 gcc_assert (nargs == 2);
3771
3772 ret = emit_library_call_value (func_sym, operands[0], LCT_NORMAL,
3773 GET_MODE (operands[0]),
3774 arg[1], GET_MODE (arg[1]));
3775
3776 if (ret != operands[0])
3777 emit_move_insn (operands[0], ret);
3778 }
3779 }
3780
3781 /* Expand soft-float TFmode calls to sparc abi routines. */
3782
3783 static void
emit_soft_tfmode_binop(enum rtx_code code,rtx * operands)3784 emit_soft_tfmode_binop (enum rtx_code code, rtx *operands)
3785 {
3786 const char *func;
3787
3788 switch (code)
3789 {
3790 case PLUS:
3791 func = "_Qp_add";
3792 break;
3793 case MINUS:
3794 func = "_Qp_sub";
3795 break;
3796 case MULT:
3797 func = "_Qp_mul";
3798 break;
3799 case DIV:
3800 func = "_Qp_div";
3801 break;
3802 default:
3803 gcc_unreachable ();
3804 }
3805
3806 emit_soft_tfmode_libcall (func, 3, operands);
3807 }
3808
3809 static void
emit_soft_tfmode_unop(enum rtx_code code,rtx * operands)3810 emit_soft_tfmode_unop (enum rtx_code code, rtx *operands)
3811 {
3812 const char *func;
3813
3814 gcc_assert (code == SQRT);
3815 func = "_Qp_sqrt";
3816
3817 emit_soft_tfmode_libcall (func, 2, operands);
3818 }
3819
3820 static void
emit_soft_tfmode_cvt(enum rtx_code code,rtx * operands)3821 emit_soft_tfmode_cvt (enum rtx_code code, rtx *operands)
3822 {
3823 const char *func;
3824
3825 switch (code)
3826 {
3827 case FLOAT_EXTEND:
3828 switch (GET_MODE (operands[1]))
3829 {
3830 case E_SFmode:
3831 func = "_Qp_stoq";
3832 break;
3833 case E_DFmode:
3834 func = "_Qp_dtoq";
3835 break;
3836 default:
3837 gcc_unreachable ();
3838 }
3839 break;
3840
3841 case FLOAT_TRUNCATE:
3842 switch (GET_MODE (operands[0]))
3843 {
3844 case E_SFmode:
3845 func = "_Qp_qtos";
3846 break;
3847 case E_DFmode:
3848 func = "_Qp_qtod";
3849 break;
3850 default:
3851 gcc_unreachable ();
3852 }
3853 break;
3854
3855 case FLOAT:
3856 switch (GET_MODE (operands[1]))
3857 {
3858 case E_SImode:
3859 func = "_Qp_itoq";
3860 if (TARGET_ARCH64)
3861 operands[1] = gen_rtx_SIGN_EXTEND (DImode, operands[1]);
3862 break;
3863 case E_DImode:
3864 func = "_Qp_xtoq";
3865 break;
3866 default:
3867 gcc_unreachable ();
3868 }
3869 break;
3870
3871 case UNSIGNED_FLOAT:
3872 switch (GET_MODE (operands[1]))
3873 {
3874 case E_SImode:
3875 func = "_Qp_uitoq";
3876 if (TARGET_ARCH64)
3877 operands[1] = gen_rtx_ZERO_EXTEND (DImode, operands[1]);
3878 break;
3879 case E_DImode:
3880 func = "_Qp_uxtoq";
3881 break;
3882 default:
3883 gcc_unreachable ();
3884 }
3885 break;
3886
3887 case FIX:
3888 switch (GET_MODE (operands[0]))
3889 {
3890 case E_SImode:
3891 func = "_Qp_qtoi";
3892 break;
3893 case E_DImode:
3894 func = "_Qp_qtox";
3895 break;
3896 default:
3897 gcc_unreachable ();
3898 }
3899 break;
3900
3901 case UNSIGNED_FIX:
3902 switch (GET_MODE (operands[0]))
3903 {
3904 case E_SImode:
3905 func = "_Qp_qtoui";
3906 break;
3907 case E_DImode:
3908 func = "_Qp_qtoux";
3909 break;
3910 default:
3911 gcc_unreachable ();
3912 }
3913 break;
3914
3915 default:
3916 gcc_unreachable ();
3917 }
3918
3919 emit_soft_tfmode_libcall (func, 2, operands);
3920 }
3921
3922 /* Expand a hard-float tfmode operation. All arguments must be in
3923 registers. */
3924
3925 static void
emit_hard_tfmode_operation(enum rtx_code code,rtx * operands)3926 emit_hard_tfmode_operation (enum rtx_code code, rtx *operands)
3927 {
3928 rtx op, dest;
3929
3930 if (GET_RTX_CLASS (code) == RTX_UNARY)
3931 {
3932 operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
3933 op = gen_rtx_fmt_e (code, GET_MODE (operands[0]), operands[1]);
3934 }
3935 else
3936 {
3937 operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
3938 operands[2] = force_reg (GET_MODE (operands[2]), operands[2]);
3939 op = gen_rtx_fmt_ee (code, GET_MODE (operands[0]),
3940 operands[1], operands[2]);
3941 }
3942
3943 if (register_operand (operands[0], VOIDmode))
3944 dest = operands[0];
3945 else
3946 dest = gen_reg_rtx (GET_MODE (operands[0]));
3947
3948 emit_insn (gen_rtx_SET (dest, op));
3949
3950 if (dest != operands[0])
3951 emit_move_insn (operands[0], dest);
3952 }
3953
3954 void
emit_tfmode_binop(enum rtx_code code,rtx * operands)3955 emit_tfmode_binop (enum rtx_code code, rtx *operands)
3956 {
3957 if (TARGET_HARD_QUAD)
3958 emit_hard_tfmode_operation (code, operands);
3959 else
3960 emit_soft_tfmode_binop (code, operands);
3961 }
3962
3963 void
emit_tfmode_unop(enum rtx_code code,rtx * operands)3964 emit_tfmode_unop (enum rtx_code code, rtx *operands)
3965 {
3966 if (TARGET_HARD_QUAD)
3967 emit_hard_tfmode_operation (code, operands);
3968 else
3969 emit_soft_tfmode_unop (code, operands);
3970 }
3971
3972 void
emit_tfmode_cvt(enum rtx_code code,rtx * operands)3973 emit_tfmode_cvt (enum rtx_code code, rtx *operands)
3974 {
3975 if (TARGET_HARD_QUAD)
3976 emit_hard_tfmode_operation (code, operands);
3977 else
3978 emit_soft_tfmode_cvt (code, operands);
3979 }
3980
3981 /* Return nonzero if a branch/jump/call instruction will be emitting
3982 nop into its delay slot. */
3983
3984 int
empty_delay_slot(rtx_insn * insn)3985 empty_delay_slot (rtx_insn *insn)
3986 {
3987 rtx seq;
3988
3989 /* If no previous instruction (should not happen), return true. */
3990 if (PREV_INSN (insn) == NULL)
3991 return 1;
3992
3993 seq = NEXT_INSN (PREV_INSN (insn));
3994 if (GET_CODE (PATTERN (seq)) == SEQUENCE)
3995 return 0;
3996
3997 return 1;
3998 }
3999
4000 /* Return nonzero if we should emit a nop after a cbcond instruction.
4001 The cbcond instruction does not have a delay slot, however there is
4002 a severe performance penalty if a control transfer appears right
4003 after a cbcond. Therefore we emit a nop when we detect this
4004 situation. */
4005
4006 int
emit_cbcond_nop(rtx_insn * insn)4007 emit_cbcond_nop (rtx_insn *insn)
4008 {
4009 rtx next = next_active_insn (insn);
4010
4011 if (!next)
4012 return 1;
4013
4014 if (NONJUMP_INSN_P (next)
4015 && GET_CODE (PATTERN (next)) == SEQUENCE)
4016 next = XVECEXP (PATTERN (next), 0, 0);
4017 else if (CALL_P (next)
4018 && GET_CODE (PATTERN (next)) == PARALLEL)
4019 {
4020 rtx delay = XVECEXP (PATTERN (next), 0, 1);
4021
4022 if (GET_CODE (delay) == RETURN)
4023 {
4024 /* It's a sibling call. Do not emit the nop if we're going
4025 to emit something other than the jump itself as the first
4026 instruction of the sibcall sequence. */
4027 if (sparc_leaf_function_p || TARGET_FLAT)
4028 return 0;
4029 }
4030 }
4031
4032 if (NONJUMP_INSN_P (next))
4033 return 0;
4034
4035 return 1;
4036 }
4037
4038 /* Return nonzero if TRIAL, an insn, can be combined with a 'restore'
4039 instruction. RETURN_P is true if the v9 variant 'return' is to be
4040 considered in the test too.
4041
4042 TRIAL must be a SET whose destination is a REG appropriate for the
4043 'restore' instruction or, if RETURN_P is true, for the 'return'
4044 instruction. */
4045
4046 static int
eligible_for_restore_insn(rtx trial,bool return_p)4047 eligible_for_restore_insn (rtx trial, bool return_p)
4048 {
4049 rtx pat = PATTERN (trial);
4050 rtx src = SET_SRC (pat);
4051 bool src_is_freg = false;
4052 rtx src_reg;
4053
4054 /* Since we now can do moves between float and integer registers when
4055 VIS3 is enabled, we have to catch this case. We can allow such
4056 moves when doing a 'return' however. */
4057 src_reg = src;
4058 if (GET_CODE (src_reg) == SUBREG)
4059 src_reg = SUBREG_REG (src_reg);
4060 if (GET_CODE (src_reg) == REG
4061 && SPARC_FP_REG_P (REGNO (src_reg)))
4062 src_is_freg = true;
4063
4064 /* The 'restore src,%g0,dest' pattern for word mode and below. */
4065 if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT
4066 && arith_operand (src, GET_MODE (src))
4067 && ! src_is_freg)
4068 {
4069 if (TARGET_ARCH64)
4070 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
4071 else
4072 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (SImode);
4073 }
4074
4075 /* The 'restore src,%g0,dest' pattern for double-word mode. */
4076 else if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT
4077 && arith_double_operand (src, GET_MODE (src))
4078 && ! src_is_freg)
4079 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
4080
4081 /* The 'restore src,%g0,dest' pattern for float if no FPU. */
4082 else if (! TARGET_FPU && register_operand (src, SFmode))
4083 return 1;
4084
4085 /* The 'restore src,%g0,dest' pattern for double if no FPU. */
4086 else if (! TARGET_FPU && TARGET_ARCH64 && register_operand (src, DFmode))
4087 return 1;
4088
4089 /* If we have the 'return' instruction, anything that does not use
4090 local or output registers and can go into a delay slot wins. */
4091 else if (return_p && TARGET_V9 && !epilogue_renumber (&pat, 1))
4092 return 1;
4093
4094 /* The 'restore src1,src2,dest' pattern for SImode. */
4095 else if (GET_CODE (src) == PLUS
4096 && register_operand (XEXP (src, 0), SImode)
4097 && arith_operand (XEXP (src, 1), SImode))
4098 return 1;
4099
4100 /* The 'restore src1,src2,dest' pattern for DImode. */
4101 else if (GET_CODE (src) == PLUS
4102 && register_operand (XEXP (src, 0), DImode)
4103 && arith_double_operand (XEXP (src, 1), DImode))
4104 return 1;
4105
4106 /* The 'restore src1,%lo(src2),dest' pattern. */
4107 else if (GET_CODE (src) == LO_SUM
4108 && ! TARGET_CM_MEDMID
4109 && ((register_operand (XEXP (src, 0), SImode)
4110 && immediate_operand (XEXP (src, 1), SImode))
4111 || (TARGET_ARCH64
4112 && register_operand (XEXP (src, 0), DImode)
4113 && immediate_operand (XEXP (src, 1), DImode))))
4114 return 1;
4115
4116 /* The 'restore src,src,dest' pattern. */
4117 else if (GET_CODE (src) == ASHIFT
4118 && (register_operand (XEXP (src, 0), SImode)
4119 || register_operand (XEXP (src, 0), DImode))
4120 && XEXP (src, 1) == const1_rtx)
4121 return 1;
4122
4123 return 0;
4124 }
4125
4126 /* Return nonzero if TRIAL can go into the function return's delay slot. */
4127
4128 int
eligible_for_return_delay(rtx_insn * trial)4129 eligible_for_return_delay (rtx_insn *trial)
4130 {
4131 int regno;
4132 rtx pat;
4133
4134 /* If the function uses __builtin_eh_return, the eh_return machinery
4135 occupies the delay slot. */
4136 if (crtl->calls_eh_return)
4137 return 0;
4138
4139 if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE)
4140 return 0;
4141
4142 /* In the case of a leaf or flat function, anything can go into the slot. */
4143 if (sparc_leaf_function_p || TARGET_FLAT)
4144 return 1;
4145
4146 if (!NONJUMP_INSN_P (trial))
4147 return 0;
4148
4149 pat = PATTERN (trial);
4150 if (GET_CODE (pat) == PARALLEL)
4151 {
4152 int i;
4153
4154 if (! TARGET_V9)
4155 return 0;
4156 for (i = XVECLEN (pat, 0) - 1; i >= 0; i--)
4157 {
4158 rtx expr = XVECEXP (pat, 0, i);
4159 if (GET_CODE (expr) != SET)
4160 return 0;
4161 if (GET_CODE (SET_DEST (expr)) != REG)
4162 return 0;
4163 regno = REGNO (SET_DEST (expr));
4164 if (regno >= 8 && regno < 24)
4165 return 0;
4166 }
4167 return !epilogue_renumber (&pat, 1);
4168 }
4169
4170 if (GET_CODE (pat) != SET)
4171 return 0;
4172
4173 if (GET_CODE (SET_DEST (pat)) != REG)
4174 return 0;
4175
4176 regno = REGNO (SET_DEST (pat));
4177
4178 /* Otherwise, only operations which can be done in tandem with
4179 a `restore' or `return' insn can go into the delay slot. */
4180 if (regno >= 8 && regno < 24)
4181 return 0;
4182
4183 /* If this instruction sets up floating point register and we have a return
4184 instruction, it can probably go in. But restore will not work
4185 with FP_REGS. */
4186 if (! SPARC_INT_REG_P (regno))
4187 return TARGET_V9 && !epilogue_renumber (&pat, 1);
4188
4189 return eligible_for_restore_insn (trial, true);
4190 }
4191
4192 /* Return nonzero if TRIAL can go into the sibling call's delay slot. */
4193
4194 int
eligible_for_sibcall_delay(rtx_insn * trial)4195 eligible_for_sibcall_delay (rtx_insn *trial)
4196 {
4197 rtx pat;
4198
4199 if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE)
4200 return 0;
4201
4202 if (!NONJUMP_INSN_P (trial))
4203 return 0;
4204
4205 pat = PATTERN (trial);
4206
4207 if (sparc_leaf_function_p || TARGET_FLAT)
4208 {
4209 /* If the tail call is done using the call instruction,
4210 we have to restore %o7 in the delay slot. */
4211 if (LEAF_SIBCALL_SLOT_RESERVED_P)
4212 return 0;
4213
4214 /* %g1 is used to build the function address */
4215 if (reg_mentioned_p (gen_rtx_REG (Pmode, 1), pat))
4216 return 0;
4217
4218 return 1;
4219 }
4220
4221 if (GET_CODE (pat) != SET)
4222 return 0;
4223
4224 /* Otherwise, only operations which can be done in tandem with
4225 a `restore' insn can go into the delay slot. */
4226 if (GET_CODE (SET_DEST (pat)) != REG
4227 || (REGNO (SET_DEST (pat)) >= 8 && REGNO (SET_DEST (pat)) < 24)
4228 || ! SPARC_INT_REG_P (REGNO (SET_DEST (pat))))
4229 return 0;
4230
4231 /* If it mentions %o7, it can't go in, because sibcall will clobber it
4232 in most cases. */
4233 if (reg_mentioned_p (gen_rtx_REG (Pmode, 15), pat))
4234 return 0;
4235
4236 return eligible_for_restore_insn (trial, false);
4237 }
4238
4239 /* Determine if it's legal to put X into the constant pool. This
4240 is not possible if X contains the address of a symbol that is
4241 not constant (TLS) or not known at final link time (PIC). */
4242
4243 static bool
sparc_cannot_force_const_mem(machine_mode mode,rtx x)4244 sparc_cannot_force_const_mem (machine_mode mode, rtx x)
4245 {
4246 switch (GET_CODE (x))
4247 {
4248 case CONST_INT:
4249 case CONST_WIDE_INT:
4250 case CONST_DOUBLE:
4251 case CONST_VECTOR:
4252 /* Accept all non-symbolic constants. */
4253 return false;
4254
4255 case LABEL_REF:
4256 /* Labels are OK iff we are non-PIC. */
4257 return flag_pic != 0;
4258
4259 case SYMBOL_REF:
4260 /* 'Naked' TLS symbol references are never OK,
4261 non-TLS symbols are OK iff we are non-PIC. */
4262 if (SYMBOL_REF_TLS_MODEL (x))
4263 return true;
4264 else
4265 return flag_pic != 0;
4266
4267 case CONST:
4268 return sparc_cannot_force_const_mem (mode, XEXP (x, 0));
4269 case PLUS:
4270 case MINUS:
4271 return sparc_cannot_force_const_mem (mode, XEXP (x, 0))
4272 || sparc_cannot_force_const_mem (mode, XEXP (x, 1));
4273 case UNSPEC:
4274 return true;
4275 default:
4276 gcc_unreachable ();
4277 }
4278 }
4279
4280 /* Global Offset Table support. */
4281 static GTY(()) rtx got_symbol_rtx = NULL_RTX;
4282 static GTY(()) rtx got_register_rtx = NULL_RTX;
4283 static GTY(()) rtx got_helper_rtx = NULL_RTX;
4284
4285 static GTY(()) bool got_helper_needed = false;
4286
4287 /* Return the SYMBOL_REF for the Global Offset Table. */
4288
4289 static rtx
sparc_got(void)4290 sparc_got (void)
4291 {
4292 if (!got_symbol_rtx)
4293 got_symbol_rtx = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
4294
4295 return got_symbol_rtx;
4296 }
4297
4298 /* Wrapper around the load_pcrel_sym{si,di} patterns. */
4299
4300 static rtx
gen_load_pcrel_sym(rtx op0,rtx op1,rtx op2)4301 gen_load_pcrel_sym (rtx op0, rtx op1, rtx op2)
4302 {
4303 int orig_flag_pic = flag_pic;
4304 rtx insn;
4305
4306 /* The load_pcrel_sym{si,di} patterns require absolute addressing. */
4307 flag_pic = 0;
4308 if (TARGET_ARCH64)
4309 insn = gen_load_pcrel_symdi (op0, op1, op2, GEN_INT (REGNO (op0)));
4310 else
4311 insn = gen_load_pcrel_symsi (op0, op1, op2, GEN_INT (REGNO (op0)));
4312 flag_pic = orig_flag_pic;
4313
4314 return insn;
4315 }
4316
4317 /* Output the load_pcrel_sym{si,di} patterns. */
4318
4319 const char *
output_load_pcrel_sym(rtx * operands)4320 output_load_pcrel_sym (rtx *operands)
4321 {
4322 if (flag_delayed_branch)
4323 {
4324 output_asm_insn ("sethi\t%%hi(%a1-4), %0", operands);
4325 output_asm_insn ("call\t%a2", operands);
4326 output_asm_insn (" add\t%0, %%lo(%a1+4), %0", operands);
4327 }
4328 else
4329 {
4330 output_asm_insn ("sethi\t%%hi(%a1-8), %0", operands);
4331 output_asm_insn ("add\t%0, %%lo(%a1-4), %0", operands);
4332 output_asm_insn ("call\t%a2", operands);
4333 output_asm_insn (" nop", NULL);
4334 }
4335
4336 if (operands[2] == got_helper_rtx)
4337 got_helper_needed = true;
4338
4339 return "";
4340 }
4341
4342 #ifdef HAVE_GAS_HIDDEN
4343 # define USE_HIDDEN_LINKONCE 1
4344 #else
4345 # define USE_HIDDEN_LINKONCE 0
4346 #endif
4347
4348 /* Emit code to load the GOT register. */
4349
4350 void
load_got_register(void)4351 load_got_register (void)
4352 {
4353 rtx insn;
4354
4355 if (TARGET_VXWORKS_RTP)
4356 {
4357 if (!got_register_rtx)
4358 got_register_rtx = pic_offset_table_rtx;
4359
4360 insn = gen_vxworks_load_got ();
4361 }
4362 else
4363 {
4364 if (!got_register_rtx)
4365 got_register_rtx = gen_rtx_REG (Pmode, GLOBAL_OFFSET_TABLE_REGNUM);
4366
4367 /* The GOT symbol is subject to a PC-relative relocation so we need a
4368 helper function to add the PC value and thus get the final value. */
4369 if (!got_helper_rtx)
4370 {
4371 char name[32];
4372
4373 /* Skip the leading '%' as that cannot be used in a symbol name. */
4374 if (USE_HIDDEN_LINKONCE)
4375 sprintf (name, "__sparc_get_pc_thunk.%s",
4376 reg_names[REGNO (got_register_rtx)] + 1);
4377 else
4378 ASM_GENERATE_INTERNAL_LABEL (name, "LADDPC",
4379 REGNO (got_register_rtx));
4380
4381 got_helper_rtx = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
4382 }
4383
4384 insn
4385 = gen_load_pcrel_sym (got_register_rtx, sparc_got (), got_helper_rtx);
4386 }
4387
4388 emit_insn (insn);
4389 }
4390
4391 /* Ensure that we are not using patterns that are not OK with PIC. */
4392
4393 int
check_pic(int i)4394 check_pic (int i)
4395 {
4396 rtx op;
4397
4398 switch (flag_pic)
4399 {
4400 case 1:
4401 op = recog_data.operand[i];
4402 gcc_assert (GET_CODE (op) != SYMBOL_REF
4403 && (GET_CODE (op) != CONST
4404 || (GET_CODE (XEXP (op, 0)) == MINUS
4405 && XEXP (XEXP (op, 0), 0) == sparc_got ()
4406 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST)));
4407 /* fallthrough */
4408 case 2:
4409 default:
4410 return 1;
4411 }
4412 }
4413
4414 /* Return true if X is an address which needs a temporary register when
4415 reloaded while generating PIC code. */
4416
4417 int
pic_address_needs_scratch(rtx x)4418 pic_address_needs_scratch (rtx x)
4419 {
4420 /* An address which is a symbolic plus a non SMALL_INT needs a temp reg. */
4421 if (GET_CODE (x) == CONST
4422 && GET_CODE (XEXP (x, 0)) == PLUS
4423 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
4424 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
4425 && !SMALL_INT (XEXP (XEXP (x, 0), 1)))
4426 return 1;
4427
4428 return 0;
4429 }
4430
4431 /* Determine if a given RTX is a valid constant. We already know this
4432 satisfies CONSTANT_P. */
4433
4434 static bool
sparc_legitimate_constant_p(machine_mode mode,rtx x)4435 sparc_legitimate_constant_p (machine_mode mode, rtx x)
4436 {
4437 switch (GET_CODE (x))
4438 {
4439 case CONST:
4440 case SYMBOL_REF:
4441 if (sparc_tls_referenced_p (x))
4442 return false;
4443 break;
4444
4445 case CONST_DOUBLE:
4446 /* Floating point constants are generally not ok.
4447 The only exception is 0.0 and all-ones in VIS. */
4448 if (TARGET_VIS
4449 && SCALAR_FLOAT_MODE_P (mode)
4450 && (const_zero_operand (x, mode)
4451 || const_all_ones_operand (x, mode)))
4452 return true;
4453
4454 return false;
4455
4456 case CONST_VECTOR:
4457 /* Vector constants are generally not ok.
4458 The only exception is 0 or -1 in VIS. */
4459 if (TARGET_VIS
4460 && (const_zero_operand (x, mode)
4461 || const_all_ones_operand (x, mode)))
4462 return true;
4463
4464 return false;
4465
4466 default:
4467 break;
4468 }
4469
4470 return true;
4471 }
4472
4473 /* Determine if a given RTX is a valid constant address. */
4474
4475 bool
constant_address_p(rtx x)4476 constant_address_p (rtx x)
4477 {
4478 switch (GET_CODE (x))
4479 {
4480 case LABEL_REF:
4481 case CONST_INT:
4482 case HIGH:
4483 return true;
4484
4485 case CONST:
4486 if (flag_pic && pic_address_needs_scratch (x))
4487 return false;
4488 return sparc_legitimate_constant_p (Pmode, x);
4489
4490 case SYMBOL_REF:
4491 return !flag_pic && sparc_legitimate_constant_p (Pmode, x);
4492
4493 default:
4494 return false;
4495 }
4496 }
4497
4498 /* Nonzero if the constant value X is a legitimate general operand
4499 when generating PIC code. It is given that flag_pic is on and
4500 that X satisfies CONSTANT_P. */
4501
4502 bool
legitimate_pic_operand_p(rtx x)4503 legitimate_pic_operand_p (rtx x)
4504 {
4505 if (pic_address_needs_scratch (x))
4506 return false;
4507 if (sparc_tls_referenced_p (x))
4508 return false;
4509 return true;
4510 }
4511
4512 /* Return true if X is a representation of the PIC register. */
4513
4514 static bool
sparc_pic_register_p(rtx x)4515 sparc_pic_register_p (rtx x)
4516 {
4517 if (!REG_P (x) || !pic_offset_table_rtx)
4518 return false;
4519
4520 if (x == pic_offset_table_rtx)
4521 return true;
4522
4523 if (!HARD_REGISTER_P (pic_offset_table_rtx)
4524 && (HARD_REGISTER_P (x) || lra_in_progress || reload_in_progress)
4525 && ORIGINAL_REGNO (x) == REGNO (pic_offset_table_rtx))
4526 return true;
4527
4528 return false;
4529 }
4530
4531 #define RTX_OK_FOR_OFFSET_P(X, MODE) \
4532 (CONST_INT_P (X) \
4533 && INTVAL (X) >= -0x1000 \
4534 && INTVAL (X) <= (0x1000 - GET_MODE_SIZE (MODE)))
4535
4536 #define RTX_OK_FOR_OLO10_P(X, MODE) \
4537 (CONST_INT_P (X) \
4538 && INTVAL (X) >= -0x1000 \
4539 && INTVAL (X) <= (0xc00 - GET_MODE_SIZE (MODE)))
4540
4541 /* Handle the TARGET_LEGITIMATE_ADDRESS_P target hook.
4542
4543 On SPARC, the actual legitimate addresses must be REG+REG or REG+SMALLINT
4544 ordinarily. This changes a bit when generating PIC. */
4545
4546 static bool
sparc_legitimate_address_p(machine_mode mode,rtx addr,bool strict)4547 sparc_legitimate_address_p (machine_mode mode, rtx addr, bool strict)
4548 {
4549 rtx rs1 = NULL, rs2 = NULL, imm1 = NULL;
4550
4551 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
4552 rs1 = addr;
4553 else if (GET_CODE (addr) == PLUS)
4554 {
4555 rs1 = XEXP (addr, 0);
4556 rs2 = XEXP (addr, 1);
4557
4558 /* Canonicalize. REG comes first, if there are no regs,
4559 LO_SUM comes first. */
4560 if (!REG_P (rs1)
4561 && GET_CODE (rs1) != SUBREG
4562 && (REG_P (rs2)
4563 || GET_CODE (rs2) == SUBREG
4564 || (GET_CODE (rs2) == LO_SUM && GET_CODE (rs1) != LO_SUM)))
4565 {
4566 rs1 = XEXP (addr, 1);
4567 rs2 = XEXP (addr, 0);
4568 }
4569
4570 if ((flag_pic == 1
4571 && sparc_pic_register_p (rs1)
4572 && !REG_P (rs2)
4573 && GET_CODE (rs2) != SUBREG
4574 && GET_CODE (rs2) != LO_SUM
4575 && GET_CODE (rs2) != MEM
4576 && !(GET_CODE (rs2) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs2))
4577 && (! symbolic_operand (rs2, VOIDmode) || mode == Pmode)
4578 && (GET_CODE (rs2) != CONST_INT || SMALL_INT (rs2)))
4579 || ((REG_P (rs1)
4580 || GET_CODE (rs1) == SUBREG)
4581 && RTX_OK_FOR_OFFSET_P (rs2, mode)))
4582 {
4583 imm1 = rs2;
4584 rs2 = NULL;
4585 }
4586 else if ((REG_P (rs1) || GET_CODE (rs1) == SUBREG)
4587 && (REG_P (rs2) || GET_CODE (rs2) == SUBREG))
4588 {
4589 /* We prohibit REG + REG for TFmode when there are no quad move insns
4590 and we consequently need to split. We do this because REG+REG
4591 is not an offsettable address. If we get the situation in reload
4592 where source and destination of a movtf pattern are both MEMs with
4593 REG+REG address, then only one of them gets converted to an
4594 offsettable address. */
4595 if (mode == TFmode
4596 && ! (TARGET_ARCH64 && TARGET_HARD_QUAD))
4597 return 0;
4598
4599 /* Likewise for TImode, but in all cases. */
4600 if (mode == TImode)
4601 return 0;
4602
4603 /* We prohibit REG + REG on ARCH32 if not optimizing for
4604 DFmode/DImode because then mem_min_alignment is likely to be zero
4605 after reload and the forced split would lack a matching splitter
4606 pattern. */
4607 if (TARGET_ARCH32 && !optimize
4608 && (mode == DFmode || mode == DImode))
4609 return 0;
4610 }
4611 else if (USE_AS_OFFSETABLE_LO10
4612 && GET_CODE (rs1) == LO_SUM
4613 && TARGET_ARCH64
4614 && ! TARGET_CM_MEDMID
4615 && RTX_OK_FOR_OLO10_P (rs2, mode))
4616 {
4617 rs2 = NULL;
4618 imm1 = XEXP (rs1, 1);
4619 rs1 = XEXP (rs1, 0);
4620 if (!CONSTANT_P (imm1)
4621 || (GET_CODE (rs1) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs1)))
4622 return 0;
4623 }
4624 }
4625 else if (GET_CODE (addr) == LO_SUM)
4626 {
4627 rs1 = XEXP (addr, 0);
4628 imm1 = XEXP (addr, 1);
4629
4630 if (!CONSTANT_P (imm1)
4631 || (GET_CODE (rs1) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs1)))
4632 return 0;
4633
4634 /* We can't allow TFmode in 32-bit mode, because an offset greater
4635 than the alignment (8) may cause the LO_SUM to overflow. */
4636 if (mode == TFmode && TARGET_ARCH32)
4637 return 0;
4638
4639 /* During reload, accept the HIGH+LO_SUM construct generated by
4640 sparc_legitimize_reload_address. */
4641 if (reload_in_progress
4642 && GET_CODE (rs1) == HIGH
4643 && XEXP (rs1, 0) == imm1)
4644 return 1;
4645 }
4646 else if (GET_CODE (addr) == CONST_INT && SMALL_INT (addr))
4647 return 1;
4648 else
4649 return 0;
4650
4651 if (GET_CODE (rs1) == SUBREG)
4652 rs1 = SUBREG_REG (rs1);
4653 if (!REG_P (rs1))
4654 return 0;
4655
4656 if (rs2)
4657 {
4658 if (GET_CODE (rs2) == SUBREG)
4659 rs2 = SUBREG_REG (rs2);
4660 if (!REG_P (rs2))
4661 return 0;
4662 }
4663
4664 if (strict)
4665 {
4666 if (!REGNO_OK_FOR_BASE_P (REGNO (rs1))
4667 || (rs2 && !REGNO_OK_FOR_BASE_P (REGNO (rs2))))
4668 return 0;
4669 }
4670 else
4671 {
4672 if ((! SPARC_INT_REG_P (REGNO (rs1))
4673 && REGNO (rs1) != FRAME_POINTER_REGNUM
4674 && REGNO (rs1) < FIRST_PSEUDO_REGISTER)
4675 || (rs2
4676 && (! SPARC_INT_REG_P (REGNO (rs2))
4677 && REGNO (rs2) != FRAME_POINTER_REGNUM
4678 && REGNO (rs2) < FIRST_PSEUDO_REGISTER)))
4679 return 0;
4680 }
4681 return 1;
4682 }
4683
4684 /* Return the SYMBOL_REF for the tls_get_addr function. */
4685
4686 static GTY(()) rtx sparc_tls_symbol = NULL_RTX;
4687
4688 static rtx
sparc_tls_get_addr(void)4689 sparc_tls_get_addr (void)
4690 {
4691 if (!sparc_tls_symbol)
4692 sparc_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tls_get_addr");
4693
4694 return sparc_tls_symbol;
4695 }
4696
4697 /* Return the Global Offset Table to be used in TLS mode. */
4698
4699 static rtx
sparc_tls_got(void)4700 sparc_tls_got (void)
4701 {
4702 /* In PIC mode, this is just the PIC offset table. */
4703 if (flag_pic)
4704 {
4705 crtl->uses_pic_offset_table = 1;
4706 return pic_offset_table_rtx;
4707 }
4708
4709 /* In non-PIC mode, Sun as (unlike GNU as) emits PC-relative relocations for
4710 the GOT symbol with the 32-bit ABI, so we reload the GOT register. */
4711 if (TARGET_SUN_TLS && TARGET_ARCH32)
4712 {
4713 load_got_register ();
4714 return got_register_rtx;
4715 }
4716
4717 /* In all other cases, we load a new pseudo with the GOT symbol. */
4718 return copy_to_reg (sparc_got ());
4719 }
4720
4721 /* Return true if X contains a thread-local symbol. */
4722
4723 static bool
sparc_tls_referenced_p(rtx x)4724 sparc_tls_referenced_p (rtx x)
4725 {
4726 if (!TARGET_HAVE_TLS)
4727 return false;
4728
4729 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
4730 x = XEXP (XEXP (x, 0), 0);
4731
4732 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x))
4733 return true;
4734
4735 /* That's all we handle in sparc_legitimize_tls_address for now. */
4736 return false;
4737 }
4738
4739 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
4740 this (thread-local) address. */
4741
4742 static rtx
sparc_legitimize_tls_address(rtx addr)4743 sparc_legitimize_tls_address (rtx addr)
4744 {
4745 rtx temp1, temp2, temp3, ret, o0, got;
4746 rtx_insn *insn;
4747
4748 gcc_assert (can_create_pseudo_p ());
4749
4750 if (GET_CODE (addr) == SYMBOL_REF)
4751 /* Although the various sethi/or sequences generate SImode values, many of
4752 them can be transformed by the linker when relaxing and, if relaxing to
4753 local-exec, will become a sethi/xor pair, which is signed and therefore
4754 a full DImode value in 64-bit mode. Thus we must use Pmode, lest these
4755 values be spilled onto the stack in 64-bit mode. */
4756 switch (SYMBOL_REF_TLS_MODEL (addr))
4757 {
4758 case TLS_MODEL_GLOBAL_DYNAMIC:
4759 start_sequence ();
4760 temp1 = gen_reg_rtx (Pmode);
4761 temp2 = gen_reg_rtx (Pmode);
4762 ret = gen_reg_rtx (Pmode);
4763 o0 = gen_rtx_REG (Pmode, 8);
4764 got = sparc_tls_got ();
4765 if (TARGET_ARCH32)
4766 {
4767 emit_insn (gen_tgd_hi22si (temp1, addr));
4768 emit_insn (gen_tgd_lo10si (temp2, temp1, addr));
4769 emit_insn (gen_tgd_addsi (o0, got, temp2, addr));
4770 insn = emit_call_insn (gen_tgd_callsi (o0, sparc_tls_get_addr (),
4771 addr, const1_rtx));
4772 }
4773 else
4774 {
4775 emit_insn (gen_tgd_hi22di (temp1, addr));
4776 emit_insn (gen_tgd_lo10di (temp2, temp1, addr));
4777 emit_insn (gen_tgd_adddi (o0, got, temp2, addr));
4778 insn = emit_call_insn (gen_tgd_calldi (o0, sparc_tls_get_addr (),
4779 addr, const1_rtx));
4780 }
4781 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), o0);
4782 RTL_CONST_CALL_P (insn) = 1;
4783 insn = get_insns ();
4784 end_sequence ();
4785 emit_libcall_block (insn, ret, o0, addr);
4786 break;
4787
4788 case TLS_MODEL_LOCAL_DYNAMIC:
4789 start_sequence ();
4790 temp1 = gen_reg_rtx (Pmode);
4791 temp2 = gen_reg_rtx (Pmode);
4792 temp3 = gen_reg_rtx (Pmode);
4793 ret = gen_reg_rtx (Pmode);
4794 o0 = gen_rtx_REG (Pmode, 8);
4795 got = sparc_tls_got ();
4796 if (TARGET_ARCH32)
4797 {
4798 emit_insn (gen_tldm_hi22si (temp1));
4799 emit_insn (gen_tldm_lo10si (temp2, temp1));
4800 emit_insn (gen_tldm_addsi (o0, got, temp2));
4801 insn = emit_call_insn (gen_tldm_callsi (o0, sparc_tls_get_addr (),
4802 const1_rtx));
4803 }
4804 else
4805 {
4806 emit_insn (gen_tldm_hi22di (temp1));
4807 emit_insn (gen_tldm_lo10di (temp2, temp1));
4808 emit_insn (gen_tldm_adddi (o0, got, temp2));
4809 insn = emit_call_insn (gen_tldm_calldi (o0, sparc_tls_get_addr (),
4810 const1_rtx));
4811 }
4812 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), o0);
4813 RTL_CONST_CALL_P (insn) = 1;
4814 insn = get_insns ();
4815 end_sequence ();
4816 /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
4817 share the LD_BASE result with other LD model accesses. */
4818 emit_libcall_block (insn, temp3, o0,
4819 gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
4820 UNSPEC_TLSLD_BASE));
4821 temp1 = gen_reg_rtx (Pmode);
4822 temp2 = gen_reg_rtx (Pmode);
4823 if (TARGET_ARCH32)
4824 {
4825 emit_insn (gen_tldo_hix22si (temp1, addr));
4826 emit_insn (gen_tldo_lox10si (temp2, temp1, addr));
4827 emit_insn (gen_tldo_addsi (ret, temp3, temp2, addr));
4828 }
4829 else
4830 {
4831 emit_insn (gen_tldo_hix22di (temp1, addr));
4832 emit_insn (gen_tldo_lox10di (temp2, temp1, addr));
4833 emit_insn (gen_tldo_adddi (ret, temp3, temp2, addr));
4834 }
4835 break;
4836
4837 case TLS_MODEL_INITIAL_EXEC:
4838 temp1 = gen_reg_rtx (Pmode);
4839 temp2 = gen_reg_rtx (Pmode);
4840 temp3 = gen_reg_rtx (Pmode);
4841 got = sparc_tls_got ();
4842 if (TARGET_ARCH32)
4843 {
4844 emit_insn (gen_tie_hi22si (temp1, addr));
4845 emit_insn (gen_tie_lo10si (temp2, temp1, addr));
4846 emit_insn (gen_tie_ld32 (temp3, got, temp2, addr));
4847 }
4848 else
4849 {
4850 emit_insn (gen_tie_hi22di (temp1, addr));
4851 emit_insn (gen_tie_lo10di (temp2, temp1, addr));
4852 emit_insn (gen_tie_ld64 (temp3, got, temp2, addr));
4853 }
4854 if (TARGET_SUN_TLS)
4855 {
4856 ret = gen_reg_rtx (Pmode);
4857 if (TARGET_ARCH32)
4858 emit_insn (gen_tie_addsi (ret, gen_rtx_REG (Pmode, 7),
4859 temp3, addr));
4860 else
4861 emit_insn (gen_tie_adddi (ret, gen_rtx_REG (Pmode, 7),
4862 temp3, addr));
4863 }
4864 else
4865 ret = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, 7), temp3);
4866 break;
4867
4868 case TLS_MODEL_LOCAL_EXEC:
4869 temp1 = gen_reg_rtx (Pmode);
4870 temp2 = gen_reg_rtx (Pmode);
4871 if (TARGET_ARCH32)
4872 {
4873 emit_insn (gen_tle_hix22si (temp1, addr));
4874 emit_insn (gen_tle_lox10si (temp2, temp1, addr));
4875 }
4876 else
4877 {
4878 emit_insn (gen_tle_hix22di (temp1, addr));
4879 emit_insn (gen_tle_lox10di (temp2, temp1, addr));
4880 }
4881 ret = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, 7), temp2);
4882 break;
4883
4884 default:
4885 gcc_unreachable ();
4886 }
4887
4888 else if (GET_CODE (addr) == CONST)
4889 {
4890 rtx base, offset;
4891
4892 gcc_assert (GET_CODE (XEXP (addr, 0)) == PLUS);
4893
4894 base = sparc_legitimize_tls_address (XEXP (XEXP (addr, 0), 0));
4895 offset = XEXP (XEXP (addr, 0), 1);
4896
4897 base = force_operand (base, NULL_RTX);
4898 if (!(GET_CODE (offset) == CONST_INT && SMALL_INT (offset)))
4899 offset = force_reg (Pmode, offset);
4900 ret = gen_rtx_PLUS (Pmode, base, offset);
4901 }
4902
4903 else
4904 gcc_unreachable (); /* for now ... */
4905
4906 return ret;
4907 }
4908
4909 /* Legitimize PIC addresses. If the address is already position-independent,
4910 we return ORIG. Newly generated position-independent addresses go into a
4911 reg. This is REG if nonzero, otherwise we allocate register(s) as
4912 necessary. */
4913
4914 static rtx
sparc_legitimize_pic_address(rtx orig,rtx reg)4915 sparc_legitimize_pic_address (rtx orig, rtx reg)
4916 {
4917 if (GET_CODE (orig) == SYMBOL_REF
4918 /* See the comment in sparc_expand_move. */
4919 || (GET_CODE (orig) == LABEL_REF && !can_use_mov_pic_label_ref (orig)))
4920 {
4921 bool gotdata_op = false;
4922 rtx pic_ref, address;
4923 rtx_insn *insn;
4924
4925 if (!reg)
4926 {
4927 gcc_assert (can_create_pseudo_p ());
4928 reg = gen_reg_rtx (Pmode);
4929 }
4930
4931 if (flag_pic == 2)
4932 {
4933 /* If not during reload, allocate another temp reg here for loading
4934 in the address, so that these instructions can be optimized
4935 properly. */
4936 rtx temp_reg = can_create_pseudo_p () ? gen_reg_rtx (Pmode) : reg;
4937
4938 /* Must put the SYMBOL_REF inside an UNSPEC here so that cse
4939 won't get confused into thinking that these two instructions
4940 are loading in the true address of the symbol. If in the
4941 future a PIC rtx exists, that should be used instead. */
4942 if (TARGET_ARCH64)
4943 {
4944 emit_insn (gen_movdi_high_pic (temp_reg, orig));
4945 emit_insn (gen_movdi_lo_sum_pic (temp_reg, temp_reg, orig));
4946 }
4947 else
4948 {
4949 emit_insn (gen_movsi_high_pic (temp_reg, orig));
4950 emit_insn (gen_movsi_lo_sum_pic (temp_reg, temp_reg, orig));
4951 }
4952
4953 address = temp_reg;
4954 gotdata_op = true;
4955 }
4956 else
4957 address = orig;
4958
4959 crtl->uses_pic_offset_table = 1;
4960 if (gotdata_op)
4961 {
4962 if (TARGET_ARCH64)
4963 insn = emit_insn (gen_movdi_pic_gotdata_op (reg,
4964 pic_offset_table_rtx,
4965 address, orig));
4966 else
4967 insn = emit_insn (gen_movsi_pic_gotdata_op (reg,
4968 pic_offset_table_rtx,
4969 address, orig));
4970 }
4971 else
4972 {
4973 pic_ref
4974 = gen_const_mem (Pmode,
4975 gen_rtx_PLUS (Pmode,
4976 pic_offset_table_rtx, address));
4977 insn = emit_move_insn (reg, pic_ref);
4978 }
4979
4980 /* Put a REG_EQUAL note on this insn, so that it can be optimized
4981 by loop. */
4982 set_unique_reg_note (insn, REG_EQUAL, orig);
4983 return reg;
4984 }
4985 else if (GET_CODE (orig) == CONST)
4986 {
4987 rtx base, offset;
4988
4989 if (GET_CODE (XEXP (orig, 0)) == PLUS
4990 && sparc_pic_register_p (XEXP (XEXP (orig, 0), 0)))
4991 return orig;
4992
4993 if (!reg)
4994 {
4995 gcc_assert (can_create_pseudo_p ());
4996 reg = gen_reg_rtx (Pmode);
4997 }
4998
4999 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
5000 base = sparc_legitimize_pic_address (XEXP (XEXP (orig, 0), 0), reg);
5001 offset = sparc_legitimize_pic_address (XEXP (XEXP (orig, 0), 1),
5002 base == reg ? NULL_RTX : reg);
5003
5004 if (GET_CODE (offset) == CONST_INT)
5005 {
5006 if (SMALL_INT (offset))
5007 return plus_constant (Pmode, base, INTVAL (offset));
5008 else if (can_create_pseudo_p ())
5009 offset = force_reg (Pmode, offset);
5010 else
5011 /* If we reach here, then something is seriously wrong. */
5012 gcc_unreachable ();
5013 }
5014 return gen_rtx_PLUS (Pmode, base, offset);
5015 }
5016 else if (GET_CODE (orig) == LABEL_REF)
5017 /* ??? We ought to be checking that the register is live instead, in case
5018 it is eliminated. */
5019 crtl->uses_pic_offset_table = 1;
5020
5021 return orig;
5022 }
5023
5024 /* Try machine-dependent ways of modifying an illegitimate address X
5025 to be legitimate. If we find one, return the new, valid address.
5026
5027 OLDX is the address as it was before break_out_memory_refs was called.
5028 In some cases it is useful to look at this to decide what needs to be done.
5029
5030 MODE is the mode of the operand pointed to by X.
5031
5032 On SPARC, change REG+N into REG+REG, and REG+(X*Y) into REG+REG. */
5033
5034 static rtx
sparc_legitimize_address(rtx x,rtx oldx ATTRIBUTE_UNUSED,machine_mode mode)5035 sparc_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
5036 machine_mode mode)
5037 {
5038 rtx orig_x = x;
5039
5040 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == MULT)
5041 x = gen_rtx_PLUS (Pmode, XEXP (x, 1),
5042 force_operand (XEXP (x, 0), NULL_RTX));
5043 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 1)) == MULT)
5044 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
5045 force_operand (XEXP (x, 1), NULL_RTX));
5046 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS)
5047 x = gen_rtx_PLUS (Pmode, force_operand (XEXP (x, 0), NULL_RTX),
5048 XEXP (x, 1));
5049 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 1)) == PLUS)
5050 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
5051 force_operand (XEXP (x, 1), NULL_RTX));
5052
5053 if (x != orig_x && sparc_legitimate_address_p (mode, x, FALSE))
5054 return x;
5055
5056 if (sparc_tls_referenced_p (x))
5057 x = sparc_legitimize_tls_address (x);
5058 else if (flag_pic)
5059 x = sparc_legitimize_pic_address (x, NULL_RTX);
5060 else if (GET_CODE (x) == PLUS && CONSTANT_ADDRESS_P (XEXP (x, 1)))
5061 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
5062 copy_to_mode_reg (Pmode, XEXP (x, 1)));
5063 else if (GET_CODE (x) == PLUS && CONSTANT_ADDRESS_P (XEXP (x, 0)))
5064 x = gen_rtx_PLUS (Pmode, XEXP (x, 1),
5065 copy_to_mode_reg (Pmode, XEXP (x, 0)));
5066 else if (GET_CODE (x) == SYMBOL_REF
5067 || GET_CODE (x) == CONST
5068 || GET_CODE (x) == LABEL_REF)
5069 x = copy_to_suggested_reg (x, NULL_RTX, Pmode);
5070
5071 return x;
5072 }
5073
5074 /* Delegitimize an address that was legitimized by the above function. */
5075
5076 static rtx
sparc_delegitimize_address(rtx x)5077 sparc_delegitimize_address (rtx x)
5078 {
5079 x = delegitimize_mem_from_attrs (x);
5080
5081 if (GET_CODE (x) == LO_SUM)
5082 x = XEXP (x, 1);
5083
5084 if (GET_CODE (x) == UNSPEC)
5085 switch (XINT (x, 1))
5086 {
5087 case UNSPEC_MOVE_PIC:
5088 case UNSPEC_TLSLE:
5089 x = XVECEXP (x, 0, 0);
5090 gcc_assert (GET_CODE (x) == SYMBOL_REF);
5091 break;
5092 case UNSPEC_MOVE_GOTDATA:
5093 x = XVECEXP (x, 0, 2);
5094 gcc_assert (GET_CODE (x) == SYMBOL_REF);
5095 break;
5096 default:
5097 break;
5098 }
5099
5100 /* This is generated by mov{si,di}_pic_label_ref in PIC mode. */
5101 if (GET_CODE (x) == MINUS
5102 && (XEXP (x, 0) == got_register_rtx
5103 || sparc_pic_register_p (XEXP (x, 0))))
5104 {
5105 rtx y = XEXP (x, 1);
5106
5107 if (GET_CODE (y) == LO_SUM)
5108 y = XEXP (y, 1);
5109
5110 if (GET_CODE (y) == UNSPEC && XINT (y, 1) == UNSPEC_MOVE_PIC_LABEL)
5111 {
5112 x = XVECEXP (y, 0, 0);
5113 gcc_assert (GET_CODE (x) == LABEL_REF
5114 || (GET_CODE (x) == CONST
5115 && GET_CODE (XEXP (x, 0)) == PLUS
5116 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5117 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT));
5118 }
5119 }
5120
5121 return x;
5122 }
5123
5124 /* SPARC implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
5125 replace the input X, or the original X if no replacement is called for.
5126 The output parameter *WIN is 1 if the calling macro should goto WIN,
5127 0 if it should not.
5128
5129 For SPARC, we wish to handle addresses by splitting them into
5130 HIGH+LO_SUM pairs, retaining the LO_SUM in the memory reference.
5131 This cuts the number of extra insns by one.
5132
5133 Do nothing when generating PIC code and the address is a symbolic
5134 operand or requires a scratch register. */
5135
5136 rtx
sparc_legitimize_reload_address(rtx x,machine_mode mode,int opnum,int type,int ind_levels ATTRIBUTE_UNUSED,int * win)5137 sparc_legitimize_reload_address (rtx x, machine_mode mode,
5138 int opnum, int type,
5139 int ind_levels ATTRIBUTE_UNUSED, int *win)
5140 {
5141 /* Decompose SImode constants into HIGH+LO_SUM. */
5142 if (CONSTANT_P (x)
5143 && (mode != TFmode || TARGET_ARCH64)
5144 && GET_MODE (x) == SImode
5145 && GET_CODE (x) != LO_SUM
5146 && GET_CODE (x) != HIGH
5147 && sparc_code_model <= CM_MEDLOW
5148 && !(flag_pic
5149 && (symbolic_operand (x, Pmode) || pic_address_needs_scratch (x))))
5150 {
5151 x = gen_rtx_LO_SUM (GET_MODE (x), gen_rtx_HIGH (GET_MODE (x), x), x);
5152 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
5153 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
5154 opnum, (enum reload_type)type);
5155 *win = 1;
5156 return x;
5157 }
5158
5159 /* We have to recognize what we have already generated above. */
5160 if (GET_CODE (x) == LO_SUM && GET_CODE (XEXP (x, 0)) == HIGH)
5161 {
5162 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
5163 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
5164 opnum, (enum reload_type)type);
5165 *win = 1;
5166 return x;
5167 }
5168
5169 *win = 0;
5170 return x;
5171 }
5172
5173 /* Return true if ADDR (a legitimate address expression)
5174 has an effect that depends on the machine mode it is used for.
5175
5176 In PIC mode,
5177
5178 (mem:HI [%l7+a])
5179
5180 is not equivalent to
5181
5182 (mem:QI [%l7+a]) (mem:QI [%l7+a+1])
5183
5184 because [%l7+a+1] is interpreted as the address of (a+1). */
5185
5186
5187 static bool
sparc_mode_dependent_address_p(const_rtx addr,addr_space_t as ATTRIBUTE_UNUSED)5188 sparc_mode_dependent_address_p (const_rtx addr,
5189 addr_space_t as ATTRIBUTE_UNUSED)
5190 {
5191 if (GET_CODE (addr) == PLUS
5192 && sparc_pic_register_p (XEXP (addr, 0))
5193 && symbolic_operand (XEXP (addr, 1), VOIDmode))
5194 return true;
5195
5196 return false;
5197 }
5198
5199 /* Emit a call instruction with the pattern given by PAT. ADDR is the
5200 address of the call target. */
5201
5202 void
sparc_emit_call_insn(rtx pat,rtx addr)5203 sparc_emit_call_insn (rtx pat, rtx addr)
5204 {
5205 rtx_insn *insn;
5206
5207 insn = emit_call_insn (pat);
5208
5209 /* The PIC register is live on entry to VxWorks PIC PLT entries. */
5210 if (TARGET_VXWORKS_RTP
5211 && flag_pic
5212 && GET_CODE (addr) == SYMBOL_REF
5213 && (SYMBOL_REF_DECL (addr)
5214 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
5215 : !SYMBOL_REF_LOCAL_P (addr)))
5216 {
5217 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx);
5218 crtl->uses_pic_offset_table = 1;
5219 }
5220 }
5221
5222 /* Return 1 if RTX is a MEM which is known to be aligned to at
5223 least a DESIRED byte boundary. */
5224
5225 int
mem_min_alignment(rtx mem,int desired)5226 mem_min_alignment (rtx mem, int desired)
5227 {
5228 rtx addr, base, offset;
5229
5230 /* If it's not a MEM we can't accept it. */
5231 if (GET_CODE (mem) != MEM)
5232 return 0;
5233
5234 /* Obviously... */
5235 if (!TARGET_UNALIGNED_DOUBLES
5236 && MEM_ALIGN (mem) / BITS_PER_UNIT >= (unsigned)desired)
5237 return 1;
5238
5239 /* ??? The rest of the function predates MEM_ALIGN so
5240 there is probably a bit of redundancy. */
5241 addr = XEXP (mem, 0);
5242 base = offset = NULL_RTX;
5243 if (GET_CODE (addr) == PLUS)
5244 {
5245 if (GET_CODE (XEXP (addr, 0)) == REG)
5246 {
5247 base = XEXP (addr, 0);
5248
5249 /* What we are saying here is that if the base
5250 REG is aligned properly, the compiler will make
5251 sure any REG based index upon it will be so
5252 as well. */
5253 if (GET_CODE (XEXP (addr, 1)) == CONST_INT)
5254 offset = XEXP (addr, 1);
5255 else
5256 offset = const0_rtx;
5257 }
5258 }
5259 else if (GET_CODE (addr) == REG)
5260 {
5261 base = addr;
5262 offset = const0_rtx;
5263 }
5264
5265 if (base != NULL_RTX)
5266 {
5267 int regno = REGNO (base);
5268
5269 if (regno != HARD_FRAME_POINTER_REGNUM && regno != STACK_POINTER_REGNUM)
5270 {
5271 /* Check if the compiler has recorded some information
5272 about the alignment of the base REG. If reload has
5273 completed, we already matched with proper alignments.
5274 If not running global_alloc, reload might give us
5275 unaligned pointer to local stack though. */
5276 if (((cfun != 0
5277 && REGNO_POINTER_ALIGN (regno) >= desired * BITS_PER_UNIT)
5278 || (optimize && reload_completed))
5279 && (INTVAL (offset) & (desired - 1)) == 0)
5280 return 1;
5281 }
5282 else
5283 {
5284 if (((INTVAL (offset) - SPARC_STACK_BIAS) & (desired - 1)) == 0)
5285 return 1;
5286 }
5287 }
5288 else if (! TARGET_UNALIGNED_DOUBLES
5289 || CONSTANT_P (addr)
5290 || GET_CODE (addr) == LO_SUM)
5291 {
5292 /* Anything else we know is properly aligned unless TARGET_UNALIGNED_DOUBLES
5293 is true, in which case we can only assume that an access is aligned if
5294 it is to a constant address, or the address involves a LO_SUM. */
5295 return 1;
5296 }
5297
5298 /* An obviously unaligned address. */
5299 return 0;
5300 }
5301
5302
5303 /* Vectors to keep interesting information about registers where it can easily
5304 be got. We used to use the actual mode value as the bit number, but there
5305 are more than 32 modes now. Instead we use two tables: one indexed by
5306 hard register number, and one indexed by mode. */
5307
5308 /* The purpose of sparc_mode_class is to shrink the range of modes so that
5309 they all fit (as bit numbers) in a 32-bit word (again). Each real mode is
5310 mapped into one sparc_mode_class mode. */
5311
5312 enum sparc_mode_class {
5313 H_MODE, S_MODE, D_MODE, T_MODE, O_MODE,
5314 SF_MODE, DF_MODE, TF_MODE, OF_MODE,
5315 CC_MODE, CCFP_MODE
5316 };
5317
5318 /* Modes for single-word and smaller quantities. */
5319 #define S_MODES \
5320 ((1 << (int) H_MODE) | (1 << (int) S_MODE) | (1 << (int) SF_MODE))
5321
5322 /* Modes for double-word and smaller quantities. */
5323 #define D_MODES (S_MODES | (1 << (int) D_MODE) | (1 << (int) DF_MODE))
5324
5325 /* Modes for quad-word and smaller quantities. */
5326 #define T_MODES (D_MODES | (1 << (int) T_MODE) | (1 << (int) TF_MODE))
5327
5328 /* Modes for 8-word and smaller quantities. */
5329 #define O_MODES (T_MODES | (1 << (int) O_MODE) | (1 << (int) OF_MODE))
5330
5331 /* Modes for single-float quantities. */
5332 #define SF_MODES ((1 << (int) S_MODE) | (1 << (int) SF_MODE))
5333
5334 /* Modes for double-float and smaller quantities. */
5335 #define DF_MODES (SF_MODES | (1 << (int) D_MODE) | (1 << (int) DF_MODE))
5336
5337 /* Modes for quad-float and smaller quantities. */
5338 #define TF_MODES (DF_MODES | (1 << (int) TF_MODE))
5339
5340 /* Modes for quad-float pairs and smaller quantities. */
5341 #define OF_MODES (TF_MODES | (1 << (int) OF_MODE))
5342
5343 /* Modes for double-float only quantities. */
5344 #define DF_MODES_NO_S ((1 << (int) D_MODE) | (1 << (int) DF_MODE))
5345
5346 /* Modes for quad-float and double-float only quantities. */
5347 #define TF_MODES_NO_S (DF_MODES_NO_S | (1 << (int) TF_MODE))
5348
5349 /* Modes for quad-float pairs and double-float only quantities. */
5350 #define OF_MODES_NO_S (TF_MODES_NO_S | (1 << (int) OF_MODE))
5351
5352 /* Modes for condition codes. */
5353 #define CC_MODES (1 << (int) CC_MODE)
5354 #define CCFP_MODES (1 << (int) CCFP_MODE)
5355
5356 /* Value is 1 if register/mode pair is acceptable on sparc.
5357
5358 The funny mixture of D and T modes is because integer operations
5359 do not specially operate on tetra quantities, so non-quad-aligned
5360 registers can hold quadword quantities (except %o4 and %i4 because
5361 they cross fixed registers).
5362
5363 ??? Note that, despite the settings, non-double-aligned parameter
5364 registers can hold double-word quantities in 32-bit mode. */
5365
5366 /* This points to either the 32-bit or the 64-bit version. */
5367 static const int *hard_regno_mode_classes;
5368
5369 static const int hard_32bit_mode_classes[] = {
5370 S_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES,
5371 T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES,
5372 T_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES,
5373 T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES,
5374
5375 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
5376 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
5377 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
5378 OF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
5379
5380 /* FP regs f32 to f63. Only the even numbered registers actually exist,
5381 and none can hold SFmode/SImode values. */
5382 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5383 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5384 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5385 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, TF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5386
5387 /* %fcc[0123] */
5388 CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES,
5389
5390 /* %icc, %sfp, %gsr */
5391 CC_MODES, 0, D_MODES
5392 };
5393
5394 static const int hard_64bit_mode_classes[] = {
5395 D_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
5396 O_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
5397 T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
5398 O_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
5399
5400 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
5401 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
5402 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
5403 OF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
5404
5405 /* FP regs f32 to f63. Only the even numbered registers actually exist,
5406 and none can hold SFmode/SImode values. */
5407 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5408 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5409 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5410 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, TF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5411
5412 /* %fcc[0123] */
5413 CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES,
5414
5415 /* %icc, %sfp, %gsr */
5416 CC_MODES, 0, D_MODES
5417 };
5418
5419 static int sparc_mode_class [NUM_MACHINE_MODES];
5420
5421 enum reg_class sparc_regno_reg_class[FIRST_PSEUDO_REGISTER];
5422
5423 static void
sparc_init_modes(void)5424 sparc_init_modes (void)
5425 {
5426 int i;
5427
5428 for (i = 0; i < NUM_MACHINE_MODES; i++)
5429 {
5430 machine_mode m = (machine_mode) i;
5431 unsigned int size = GET_MODE_SIZE (m);
5432
5433 switch (GET_MODE_CLASS (m))
5434 {
5435 case MODE_INT:
5436 case MODE_PARTIAL_INT:
5437 case MODE_COMPLEX_INT:
5438 if (size < 4)
5439 sparc_mode_class[i] = 1 << (int) H_MODE;
5440 else if (size == 4)
5441 sparc_mode_class[i] = 1 << (int) S_MODE;
5442 else if (size == 8)
5443 sparc_mode_class[i] = 1 << (int) D_MODE;
5444 else if (size == 16)
5445 sparc_mode_class[i] = 1 << (int) T_MODE;
5446 else if (size == 32)
5447 sparc_mode_class[i] = 1 << (int) O_MODE;
5448 else
5449 sparc_mode_class[i] = 0;
5450 break;
5451 case MODE_VECTOR_INT:
5452 if (size == 4)
5453 sparc_mode_class[i] = 1 << (int) SF_MODE;
5454 else if (size == 8)
5455 sparc_mode_class[i] = 1 << (int) DF_MODE;
5456 else
5457 sparc_mode_class[i] = 0;
5458 break;
5459 case MODE_FLOAT:
5460 case MODE_COMPLEX_FLOAT:
5461 if (size == 4)
5462 sparc_mode_class[i] = 1 << (int) SF_MODE;
5463 else if (size == 8)
5464 sparc_mode_class[i] = 1 << (int) DF_MODE;
5465 else if (size == 16)
5466 sparc_mode_class[i] = 1 << (int) TF_MODE;
5467 else if (size == 32)
5468 sparc_mode_class[i] = 1 << (int) OF_MODE;
5469 else
5470 sparc_mode_class[i] = 0;
5471 break;
5472 case MODE_CC:
5473 if (m == CCFPmode || m == CCFPEmode)
5474 sparc_mode_class[i] = 1 << (int) CCFP_MODE;
5475 else
5476 sparc_mode_class[i] = 1 << (int) CC_MODE;
5477 break;
5478 default:
5479 sparc_mode_class[i] = 0;
5480 break;
5481 }
5482 }
5483
5484 if (TARGET_ARCH64)
5485 hard_regno_mode_classes = hard_64bit_mode_classes;
5486 else
5487 hard_regno_mode_classes = hard_32bit_mode_classes;
5488
5489 /* Initialize the array used by REGNO_REG_CLASS. */
5490 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
5491 {
5492 if (i < 16 && TARGET_V8PLUS)
5493 sparc_regno_reg_class[i] = I64_REGS;
5494 else if (i < 32 || i == FRAME_POINTER_REGNUM)
5495 sparc_regno_reg_class[i] = GENERAL_REGS;
5496 else if (i < 64)
5497 sparc_regno_reg_class[i] = FP_REGS;
5498 else if (i < 96)
5499 sparc_regno_reg_class[i] = EXTRA_FP_REGS;
5500 else if (i < 100)
5501 sparc_regno_reg_class[i] = FPCC_REGS;
5502 else
5503 sparc_regno_reg_class[i] = NO_REGS;
5504 }
5505 }
5506
5507 /* Return whether REGNO, a global or FP register, must be saved/restored. */
5508
5509 static inline bool
save_global_or_fp_reg_p(unsigned int regno,int leaf_function ATTRIBUTE_UNUSED)5510 save_global_or_fp_reg_p (unsigned int regno,
5511 int leaf_function ATTRIBUTE_UNUSED)
5512 {
5513 return !call_used_or_fixed_reg_p (regno) && df_regs_ever_live_p (regno);
5514 }
5515
5516 /* Return whether the return address register (%i7) is needed. */
5517
5518 static inline bool
return_addr_reg_needed_p(int leaf_function)5519 return_addr_reg_needed_p (int leaf_function)
5520 {
5521 /* If it is live, for example because of __builtin_return_address (0). */
5522 if (df_regs_ever_live_p (RETURN_ADDR_REGNUM))
5523 return true;
5524
5525 /* Otherwise, it is needed as save register if %o7 is clobbered. */
5526 if (!leaf_function
5527 /* Loading the GOT register clobbers %o7. */
5528 || crtl->uses_pic_offset_table
5529 || df_regs_ever_live_p (INCOMING_RETURN_ADDR_REGNUM))
5530 return true;
5531
5532 return false;
5533 }
5534
5535 /* Return whether REGNO, a local or in register, must be saved/restored. */
5536
5537 static bool
save_local_or_in_reg_p(unsigned int regno,int leaf_function)5538 save_local_or_in_reg_p (unsigned int regno, int leaf_function)
5539 {
5540 /* General case: call-saved registers live at some point. */
5541 if (!call_used_or_fixed_reg_p (regno) && df_regs_ever_live_p (regno))
5542 return true;
5543
5544 /* Frame pointer register (%fp) if needed. */
5545 if (regno == HARD_FRAME_POINTER_REGNUM && frame_pointer_needed)
5546 return true;
5547
5548 /* Return address register (%i7) if needed. */
5549 if (regno == RETURN_ADDR_REGNUM && return_addr_reg_needed_p (leaf_function))
5550 return true;
5551
5552 /* GOT register (%l7) if needed. */
5553 if (got_register_rtx && regno == REGNO (got_register_rtx))
5554 return true;
5555
5556 /* If the function accesses prior frames, the frame pointer and the return
5557 address of the previous frame must be saved on the stack. */
5558 if (crtl->accesses_prior_frames
5559 && (regno == HARD_FRAME_POINTER_REGNUM || regno == RETURN_ADDR_REGNUM))
5560 return true;
5561
5562 return false;
5563 }
5564
5565 /* Compute the frame size required by the function. This function is called
5566 during the reload pass and also by sparc_expand_prologue. */
5567
5568 static HOST_WIDE_INT
sparc_compute_frame_size(HOST_WIDE_INT size,int leaf_function)5569 sparc_compute_frame_size (HOST_WIDE_INT size, int leaf_function)
5570 {
5571 HOST_WIDE_INT frame_size, apparent_frame_size;
5572 int args_size, n_global_fp_regs = 0;
5573 bool save_local_in_regs_p = false;
5574 unsigned int i;
5575
5576 /* If the function allocates dynamic stack space, the dynamic offset is
5577 computed early and contains REG_PARM_STACK_SPACE, so we need to cope. */
5578 if (leaf_function && !cfun->calls_alloca)
5579 args_size = 0;
5580 else
5581 args_size = crtl->outgoing_args_size + REG_PARM_STACK_SPACE (cfun->decl);
5582
5583 /* Calculate space needed for global registers. */
5584 if (TARGET_ARCH64)
5585 {
5586 for (i = 0; i < 8; i++)
5587 if (save_global_or_fp_reg_p (i, 0))
5588 n_global_fp_regs += 2;
5589 }
5590 else
5591 {
5592 for (i = 0; i < 8; i += 2)
5593 if (save_global_or_fp_reg_p (i, 0)
5594 || save_global_or_fp_reg_p (i + 1, 0))
5595 n_global_fp_regs += 2;
5596 }
5597
5598 /* In the flat window model, find out which local and in registers need to
5599 be saved. We don't reserve space in the current frame for them as they
5600 will be spilled into the register window save area of the caller's frame.
5601 However, as soon as we use this register window save area, we must create
5602 that of the current frame to make it the live one. */
5603 if (TARGET_FLAT)
5604 for (i = 16; i < 32; i++)
5605 if (save_local_or_in_reg_p (i, leaf_function))
5606 {
5607 save_local_in_regs_p = true;
5608 break;
5609 }
5610
5611 /* Calculate space needed for FP registers. */
5612 for (i = 32; i < (TARGET_V9 ? 96 : 64); i += 2)
5613 if (save_global_or_fp_reg_p (i, 0) || save_global_or_fp_reg_p (i + 1, 0))
5614 n_global_fp_regs += 2;
5615
5616 if (size == 0
5617 && n_global_fp_regs == 0
5618 && args_size == 0
5619 && !save_local_in_regs_p)
5620 frame_size = apparent_frame_size = 0;
5621 else
5622 {
5623 /* Start from the apparent frame size. */
5624 apparent_frame_size = ROUND_UP (size, 8) + n_global_fp_regs * 4;
5625
5626 /* We need to add the size of the outgoing argument area. */
5627 frame_size = apparent_frame_size + ROUND_UP (args_size, 8);
5628
5629 /* And that of the register window save area. */
5630 frame_size += FIRST_PARM_OFFSET (cfun->decl);
5631
5632 /* Finally, bump to the appropriate alignment. */
5633 frame_size = SPARC_STACK_ALIGN (frame_size);
5634 }
5635
5636 /* Set up values for use in prologue and epilogue. */
5637 sparc_frame_size = frame_size;
5638 sparc_apparent_frame_size = apparent_frame_size;
5639 sparc_n_global_fp_regs = n_global_fp_regs;
5640 sparc_save_local_in_regs_p = save_local_in_regs_p;
5641
5642 return frame_size;
5643 }
5644
5645 /* Implement the macro INITIAL_ELIMINATION_OFFSET, return the OFFSET. */
5646
5647 int
sparc_initial_elimination_offset(int to)5648 sparc_initial_elimination_offset (int to)
5649 {
5650 int offset;
5651
5652 if (to == STACK_POINTER_REGNUM)
5653 offset = sparc_compute_frame_size (get_frame_size (), crtl->is_leaf);
5654 else
5655 offset = 0;
5656
5657 offset += SPARC_STACK_BIAS;
5658 return offset;
5659 }
5660
5661 /* Output any necessary .register pseudo-ops. */
5662
5663 void
sparc_output_scratch_registers(FILE * file ATTRIBUTE_UNUSED)5664 sparc_output_scratch_registers (FILE *file ATTRIBUTE_UNUSED)
5665 {
5666 int i;
5667
5668 if (TARGET_ARCH32)
5669 return;
5670
5671 /* Check if %g[2367] were used without
5672 .register being printed for them already. */
5673 for (i = 2; i < 8; i++)
5674 {
5675 if (df_regs_ever_live_p (i)
5676 && ! sparc_hard_reg_printed [i])
5677 {
5678 sparc_hard_reg_printed [i] = 1;
5679 /* %g7 is used as TLS base register, use #ignore
5680 for it instead of #scratch. */
5681 fprintf (file, "\t.register\t%%g%d, #%s\n", i,
5682 i == 7 ? "ignore" : "scratch");
5683 }
5684 if (i == 3) i = 5;
5685 }
5686 }
5687
5688 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
5689
5690 #if PROBE_INTERVAL > 4096
5691 #error Cannot use indexed addressing mode for stack probing
5692 #endif
5693
5694 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
5695 inclusive. These are offsets from the current stack pointer.
5696
5697 Note that we don't use the REG+REG addressing mode for the probes because
5698 of the stack bias in 64-bit mode. And it doesn't really buy us anything
5699 so the advantages of having a single code win here. */
5700
5701 static void
sparc_emit_probe_stack_range(HOST_WIDE_INT first,HOST_WIDE_INT size)5702 sparc_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
5703 {
5704 rtx g1 = gen_rtx_REG (Pmode, 1);
5705
5706 /* See if we have a constant small number of probes to generate. If so,
5707 that's the easy case. */
5708 if (size <= PROBE_INTERVAL)
5709 {
5710 emit_move_insn (g1, GEN_INT (first));
5711 emit_insn (gen_rtx_SET (g1,
5712 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
5713 emit_stack_probe (plus_constant (Pmode, g1, -size));
5714 }
5715
5716 /* The run-time loop is made up of 9 insns in the generic case while the
5717 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */
5718 else if (size <= 4 * PROBE_INTERVAL)
5719 {
5720 HOST_WIDE_INT i;
5721
5722 emit_move_insn (g1, GEN_INT (first + PROBE_INTERVAL));
5723 emit_insn (gen_rtx_SET (g1,
5724 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
5725 emit_stack_probe (g1);
5726
5727 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
5728 it exceeds SIZE. If only two probes are needed, this will not
5729 generate any code. Then probe at FIRST + SIZE. */
5730 for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
5731 {
5732 emit_insn (gen_rtx_SET (g1,
5733 plus_constant (Pmode, g1, -PROBE_INTERVAL)));
5734 emit_stack_probe (g1);
5735 }
5736
5737 emit_stack_probe (plus_constant (Pmode, g1,
5738 (i - PROBE_INTERVAL) - size));
5739 }
5740
5741 /* Otherwise, do the same as above, but in a loop. Note that we must be
5742 extra careful with variables wrapping around because we might be at
5743 the very top (or the very bottom) of the address space and we have
5744 to be able to handle this case properly; in particular, we use an
5745 equality test for the loop condition. */
5746 else
5747 {
5748 HOST_WIDE_INT rounded_size;
5749 rtx g4 = gen_rtx_REG (Pmode, 4);
5750
5751 emit_move_insn (g1, GEN_INT (first));
5752
5753
5754 /* Step 1: round SIZE to the previous multiple of the interval. */
5755
5756 rounded_size = ROUND_DOWN (size, PROBE_INTERVAL);
5757 emit_move_insn (g4, GEN_INT (rounded_size));
5758
5759
5760 /* Step 2: compute initial and final value of the loop counter. */
5761
5762 /* TEST_ADDR = SP + FIRST. */
5763 emit_insn (gen_rtx_SET (g1,
5764 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
5765
5766 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
5767 emit_insn (gen_rtx_SET (g4, gen_rtx_MINUS (Pmode, g1, g4)));
5768
5769
5770 /* Step 3: the loop
5771
5772 while (TEST_ADDR != LAST_ADDR)
5773 {
5774 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
5775 probe at TEST_ADDR
5776 }
5777
5778 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
5779 until it is equal to ROUNDED_SIZE. */
5780
5781 if (TARGET_ARCH64)
5782 emit_insn (gen_probe_stack_rangedi (g1, g1, g4));
5783 else
5784 emit_insn (gen_probe_stack_rangesi (g1, g1, g4));
5785
5786
5787 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
5788 that SIZE is equal to ROUNDED_SIZE. */
5789
5790 if (size != rounded_size)
5791 emit_stack_probe (plus_constant (Pmode, g4, rounded_size - size));
5792 }
5793
5794 /* Make sure nothing is scheduled before we are done. */
5795 emit_insn (gen_blockage ());
5796 }
5797
5798 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
5799 absolute addresses. */
5800
5801 const char *
output_probe_stack_range(rtx reg1,rtx reg2)5802 output_probe_stack_range (rtx reg1, rtx reg2)
5803 {
5804 static int labelno = 0;
5805 char loop_lab[32];
5806 rtx xops[2];
5807
5808 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
5809
5810 /* Loop. */
5811 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
5812
5813 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
5814 xops[0] = reg1;
5815 xops[1] = GEN_INT (-PROBE_INTERVAL);
5816 output_asm_insn ("add\t%0, %1, %0", xops);
5817
5818 /* Test if TEST_ADDR == LAST_ADDR. */
5819 xops[1] = reg2;
5820 output_asm_insn ("cmp\t%0, %1", xops);
5821
5822 /* Probe at TEST_ADDR and branch. */
5823 if (TARGET_ARCH64)
5824 fputs ("\tbne,pt\t%xcc,", asm_out_file);
5825 else
5826 fputs ("\tbne\t", asm_out_file);
5827 assemble_name_raw (asm_out_file, loop_lab);
5828 fputc ('\n', asm_out_file);
5829 xops[1] = GEN_INT (SPARC_STACK_BIAS);
5830 output_asm_insn (" st\t%%g0, [%0+%1]", xops);
5831
5832 return "";
5833 }
5834
5835 /* Emit code to save/restore registers from LOW to HIGH at BASE+OFFSET as
5836 needed. LOW is supposed to be double-word aligned for 32-bit registers.
5837 SAVE_P decides whether a register must be saved/restored. ACTION_TRUE
5838 is the action to be performed if SAVE_P returns true and ACTION_FALSE
5839 the action to be performed if it returns false. Return the new offset. */
5840
5841 typedef bool (*sorr_pred_t) (unsigned int, int);
5842 typedef enum { SORR_NONE, SORR_ADVANCE, SORR_SAVE, SORR_RESTORE } sorr_act_t;
5843
5844 static int
emit_save_or_restore_regs(unsigned int low,unsigned int high,rtx base,int offset,int leaf_function,sorr_pred_t save_p,sorr_act_t action_true,sorr_act_t action_false)5845 emit_save_or_restore_regs (unsigned int low, unsigned int high, rtx base,
5846 int offset, int leaf_function, sorr_pred_t save_p,
5847 sorr_act_t action_true, sorr_act_t action_false)
5848 {
5849 unsigned int i;
5850 rtx mem;
5851 rtx_insn *insn;
5852
5853 if (TARGET_ARCH64 && high <= 32)
5854 {
5855 int fp_offset = -1;
5856
5857 for (i = low; i < high; i++)
5858 {
5859 if (save_p (i, leaf_function))
5860 {
5861 mem = gen_frame_mem (DImode, plus_constant (Pmode,
5862 base, offset));
5863 if (action_true == SORR_SAVE)
5864 {
5865 insn = emit_move_insn (mem, gen_rtx_REG (DImode, i));
5866 RTX_FRAME_RELATED_P (insn) = 1;
5867 }
5868 else /* action_true == SORR_RESTORE */
5869 {
5870 /* The frame pointer must be restored last since its old
5871 value may be used as base address for the frame. This
5872 is problematic in 64-bit mode only because of the lack
5873 of double-word load instruction. */
5874 if (i == HARD_FRAME_POINTER_REGNUM)
5875 fp_offset = offset;
5876 else
5877 emit_move_insn (gen_rtx_REG (DImode, i), mem);
5878 }
5879 offset += 8;
5880 }
5881 else if (action_false == SORR_ADVANCE)
5882 offset += 8;
5883 }
5884
5885 if (fp_offset >= 0)
5886 {
5887 mem = gen_frame_mem (DImode, plus_constant (Pmode, base, fp_offset));
5888 emit_move_insn (hard_frame_pointer_rtx, mem);
5889 }
5890 }
5891 else
5892 {
5893 for (i = low; i < high; i += 2)
5894 {
5895 bool reg0 = save_p (i, leaf_function);
5896 bool reg1 = save_p (i + 1, leaf_function);
5897 machine_mode mode;
5898 int regno;
5899
5900 if (reg0 && reg1)
5901 {
5902 mode = SPARC_INT_REG_P (i) ? E_DImode : E_DFmode;
5903 regno = i;
5904 }
5905 else if (reg0)
5906 {
5907 mode = SPARC_INT_REG_P (i) ? E_SImode : E_SFmode;
5908 regno = i;
5909 }
5910 else if (reg1)
5911 {
5912 mode = SPARC_INT_REG_P (i) ? E_SImode : E_SFmode;
5913 regno = i + 1;
5914 offset += 4;
5915 }
5916 else
5917 {
5918 if (action_false == SORR_ADVANCE)
5919 offset += 8;
5920 continue;
5921 }
5922
5923 mem = gen_frame_mem (mode, plus_constant (Pmode, base, offset));
5924 if (action_true == SORR_SAVE)
5925 {
5926 insn = emit_move_insn (mem, gen_rtx_REG (mode, regno));
5927 RTX_FRAME_RELATED_P (insn) = 1;
5928 if (mode == DImode)
5929 {
5930 rtx set1, set2;
5931 mem = gen_frame_mem (SImode, plus_constant (Pmode, base,
5932 offset));
5933 set1 = gen_rtx_SET (mem, gen_rtx_REG (SImode, regno));
5934 RTX_FRAME_RELATED_P (set1) = 1;
5935 mem
5936 = gen_frame_mem (SImode, plus_constant (Pmode, base,
5937 offset + 4));
5938 set2 = gen_rtx_SET (mem, gen_rtx_REG (SImode, regno + 1));
5939 RTX_FRAME_RELATED_P (set2) = 1;
5940 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
5941 gen_rtx_PARALLEL (VOIDmode,
5942 gen_rtvec (2, set1, set2)));
5943 }
5944 }
5945 else /* action_true == SORR_RESTORE */
5946 emit_move_insn (gen_rtx_REG (mode, regno), mem);
5947
5948 /* Bump and round down to double word
5949 in case we already bumped by 4. */
5950 offset = ROUND_DOWN (offset + 8, 8);
5951 }
5952 }
5953
5954 return offset;
5955 }
5956
5957 /* Emit code to adjust BASE to OFFSET. Return the new base. */
5958
5959 static rtx
emit_adjust_base_to_offset(rtx base,int offset)5960 emit_adjust_base_to_offset (rtx base, int offset)
5961 {
5962 /* ??? This might be optimized a little as %g1 might already have a
5963 value close enough that a single add insn will do. */
5964 /* ??? Although, all of this is probably only a temporary fix because
5965 if %g1 can hold a function result, then sparc_expand_epilogue will
5966 lose (the result will be clobbered). */
5967 rtx new_base = gen_rtx_REG (Pmode, 1);
5968 emit_move_insn (new_base, GEN_INT (offset));
5969 emit_insn (gen_rtx_SET (new_base, gen_rtx_PLUS (Pmode, base, new_base)));
5970 return new_base;
5971 }
5972
5973 /* Emit code to save/restore call-saved global and FP registers. */
5974
5975 static void
emit_save_or_restore_global_fp_regs(rtx base,int offset,sorr_act_t action)5976 emit_save_or_restore_global_fp_regs (rtx base, int offset, sorr_act_t action)
5977 {
5978 if (offset < -4096 || offset + sparc_n_global_fp_regs * 4 > 4095)
5979 {
5980 base = emit_adjust_base_to_offset (base, offset);
5981 offset = 0;
5982 }
5983
5984 offset
5985 = emit_save_or_restore_regs (0, 8, base, offset, 0,
5986 save_global_or_fp_reg_p, action, SORR_NONE);
5987 emit_save_or_restore_regs (32, TARGET_V9 ? 96 : 64, base, offset, 0,
5988 save_global_or_fp_reg_p, action, SORR_NONE);
5989 }
5990
5991 /* Emit code to save/restore call-saved local and in registers. */
5992
5993 static void
emit_save_or_restore_local_in_regs(rtx base,int offset,sorr_act_t action)5994 emit_save_or_restore_local_in_regs (rtx base, int offset, sorr_act_t action)
5995 {
5996 if (offset < -4096 || offset + 16 * UNITS_PER_WORD > 4095)
5997 {
5998 base = emit_adjust_base_to_offset (base, offset);
5999 offset = 0;
6000 }
6001
6002 emit_save_or_restore_regs (16, 32, base, offset, sparc_leaf_function_p,
6003 save_local_or_in_reg_p, action, SORR_ADVANCE);
6004 }
6005
6006 /* Emit a window_save insn. */
6007
6008 static rtx_insn *
emit_window_save(rtx increment)6009 emit_window_save (rtx increment)
6010 {
6011 rtx_insn *insn = emit_insn (gen_window_save (increment));
6012 RTX_FRAME_RELATED_P (insn) = 1;
6013
6014 /* The incoming return address (%o7) is saved in %i7. */
6015 add_reg_note (insn, REG_CFA_REGISTER,
6016 gen_rtx_SET (gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM),
6017 gen_rtx_REG (Pmode,
6018 INCOMING_RETURN_ADDR_REGNUM)));
6019
6020 /* The window save event. */
6021 add_reg_note (insn, REG_CFA_WINDOW_SAVE, const0_rtx);
6022
6023 /* The CFA is %fp, the hard frame pointer. */
6024 add_reg_note (insn, REG_CFA_DEF_CFA,
6025 plus_constant (Pmode, hard_frame_pointer_rtx,
6026 INCOMING_FRAME_SP_OFFSET));
6027
6028 return insn;
6029 }
6030
6031 /* Generate an increment for the stack pointer. */
6032
6033 static rtx
gen_stack_pointer_inc(rtx increment)6034 gen_stack_pointer_inc (rtx increment)
6035 {
6036 return gen_rtx_SET (stack_pointer_rtx,
6037 gen_rtx_PLUS (Pmode,
6038 stack_pointer_rtx,
6039 increment));
6040 }
6041
6042 /* Expand the function prologue. The prologue is responsible for reserving
6043 storage for the frame, saving the call-saved registers and loading the
6044 GOT register if needed. */
6045
6046 void
sparc_expand_prologue(void)6047 sparc_expand_prologue (void)
6048 {
6049 HOST_WIDE_INT size;
6050 rtx_insn *insn;
6051
6052 /* Compute a snapshot of crtl->uses_only_leaf_regs. Relying
6053 on the final value of the flag means deferring the prologue/epilogue
6054 expansion until just before the second scheduling pass, which is too
6055 late to emit multiple epilogues or return insns.
6056
6057 Of course we are making the assumption that the value of the flag
6058 will not change between now and its final value. Of the three parts
6059 of the formula, only the last one can reasonably vary. Let's take a
6060 closer look, after assuming that the first two ones are set to true
6061 (otherwise the last value is effectively silenced).
6062
6063 If only_leaf_regs_used returns false, the global predicate will also
6064 be false so the actual frame size calculated below will be positive.
6065 As a consequence, the save_register_window insn will be emitted in
6066 the instruction stream; now this insn explicitly references %fp
6067 which is not a leaf register so only_leaf_regs_used will always
6068 return false subsequently.
6069
6070 If only_leaf_regs_used returns true, we hope that the subsequent
6071 optimization passes won't cause non-leaf registers to pop up. For
6072 example, the regrename pass has special provisions to not rename to
6073 non-leaf registers in a leaf function. */
6074 sparc_leaf_function_p
6075 = optimize > 0 && crtl->is_leaf && only_leaf_regs_used ();
6076
6077 size = sparc_compute_frame_size (get_frame_size(), sparc_leaf_function_p);
6078
6079 if (flag_stack_usage_info)
6080 current_function_static_stack_size = size;
6081
6082 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
6083 || flag_stack_clash_protection)
6084 {
6085 if (crtl->is_leaf && !cfun->calls_alloca)
6086 {
6087 if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
6088 sparc_emit_probe_stack_range (get_stack_check_protect (),
6089 size - get_stack_check_protect ());
6090 }
6091 else if (size > 0)
6092 sparc_emit_probe_stack_range (get_stack_check_protect (), size);
6093 }
6094
6095 if (size == 0)
6096 ; /* do nothing. */
6097 else if (sparc_leaf_function_p)
6098 {
6099 rtx size_int_rtx = GEN_INT (-size);
6100
6101 if (size <= 4096)
6102 insn = emit_insn (gen_stack_pointer_inc (size_int_rtx));
6103 else if (size <= 8192)
6104 {
6105 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (-4096)));
6106 RTX_FRAME_RELATED_P (insn) = 1;
6107
6108 /* %sp is still the CFA register. */
6109 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
6110 }
6111 else
6112 {
6113 rtx size_rtx = gen_rtx_REG (Pmode, 1);
6114 emit_move_insn (size_rtx, size_int_rtx);
6115 insn = emit_insn (gen_stack_pointer_inc (size_rtx));
6116 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
6117 gen_stack_pointer_inc (size_int_rtx));
6118 }
6119
6120 RTX_FRAME_RELATED_P (insn) = 1;
6121
6122 /* Ensure no memory access is done before the frame is established. */
6123 emit_insn (gen_frame_blockage ());
6124 }
6125 else
6126 {
6127 rtx size_int_rtx = GEN_INT (-size);
6128
6129 if (size <= 4096)
6130 emit_window_save (size_int_rtx);
6131 else if (size <= 8192)
6132 {
6133 emit_window_save (GEN_INT (-4096));
6134
6135 /* %sp is not the CFA register anymore. */
6136 emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
6137
6138 /* Likewise. */
6139 emit_insn (gen_frame_blockage ());
6140 }
6141 else
6142 {
6143 rtx size_rtx = gen_rtx_REG (Pmode, 1);
6144 emit_move_insn (size_rtx, size_int_rtx);
6145 emit_window_save (size_rtx);
6146 }
6147 }
6148
6149 if (sparc_leaf_function_p)
6150 {
6151 sparc_frame_base_reg = stack_pointer_rtx;
6152 sparc_frame_base_offset = size + SPARC_STACK_BIAS;
6153 }
6154 else
6155 {
6156 sparc_frame_base_reg = hard_frame_pointer_rtx;
6157 sparc_frame_base_offset = SPARC_STACK_BIAS;
6158 }
6159
6160 if (sparc_n_global_fp_regs > 0)
6161 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
6162 sparc_frame_base_offset
6163 - sparc_apparent_frame_size,
6164 SORR_SAVE);
6165
6166 /* Advertise that the data calculated just above are now valid. */
6167 sparc_prologue_data_valid_p = true;
6168 }
6169
6170 /* Expand the function prologue. The prologue is responsible for reserving
6171 storage for the frame, saving the call-saved registers and loading the
6172 GOT register if needed. */
6173
6174 void
sparc_flat_expand_prologue(void)6175 sparc_flat_expand_prologue (void)
6176 {
6177 HOST_WIDE_INT size;
6178 rtx_insn *insn;
6179
6180 sparc_leaf_function_p = optimize > 0 && crtl->is_leaf;
6181
6182 size = sparc_compute_frame_size (get_frame_size(), sparc_leaf_function_p);
6183
6184 if (flag_stack_usage_info)
6185 current_function_static_stack_size = size;
6186
6187 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
6188 || flag_stack_clash_protection)
6189 {
6190 if (crtl->is_leaf && !cfun->calls_alloca)
6191 {
6192 if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
6193 sparc_emit_probe_stack_range (get_stack_check_protect (),
6194 size - get_stack_check_protect ());
6195 }
6196 else if (size > 0)
6197 sparc_emit_probe_stack_range (get_stack_check_protect (), size);
6198 }
6199
6200 if (sparc_save_local_in_regs_p)
6201 emit_save_or_restore_local_in_regs (stack_pointer_rtx, SPARC_STACK_BIAS,
6202 SORR_SAVE);
6203
6204 if (size == 0)
6205 ; /* do nothing. */
6206 else
6207 {
6208 rtx size_int_rtx, size_rtx;
6209
6210 size_rtx = size_int_rtx = GEN_INT (-size);
6211
6212 /* We establish the frame (i.e. decrement the stack pointer) first, even
6213 if we use a frame pointer, because we cannot clobber any call-saved
6214 registers, including the frame pointer, if we haven't created a new
6215 register save area, for the sake of compatibility with the ABI. */
6216 if (size <= 4096)
6217 insn = emit_insn (gen_stack_pointer_inc (size_int_rtx));
6218 else if (size <= 8192 && !frame_pointer_needed)
6219 {
6220 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (-4096)));
6221 RTX_FRAME_RELATED_P (insn) = 1;
6222 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
6223 }
6224 else
6225 {
6226 size_rtx = gen_rtx_REG (Pmode, 1);
6227 emit_move_insn (size_rtx, size_int_rtx);
6228 insn = emit_insn (gen_stack_pointer_inc (size_rtx));
6229 add_reg_note (insn, REG_CFA_ADJUST_CFA,
6230 gen_stack_pointer_inc (size_int_rtx));
6231 }
6232 RTX_FRAME_RELATED_P (insn) = 1;
6233
6234 /* Ensure no memory access is done before the frame is established. */
6235 emit_insn (gen_frame_blockage ());
6236
6237 if (frame_pointer_needed)
6238 {
6239 insn = emit_insn (gen_rtx_SET (hard_frame_pointer_rtx,
6240 gen_rtx_MINUS (Pmode,
6241 stack_pointer_rtx,
6242 size_rtx)));
6243 RTX_FRAME_RELATED_P (insn) = 1;
6244
6245 add_reg_note (insn, REG_CFA_ADJUST_CFA,
6246 gen_rtx_SET (hard_frame_pointer_rtx,
6247 plus_constant (Pmode, stack_pointer_rtx,
6248 size)));
6249 }
6250
6251 if (return_addr_reg_needed_p (sparc_leaf_function_p))
6252 {
6253 rtx o7 = gen_rtx_REG (Pmode, INCOMING_RETURN_ADDR_REGNUM);
6254 rtx i7 = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
6255
6256 insn = emit_move_insn (i7, o7);
6257 RTX_FRAME_RELATED_P (insn) = 1;
6258
6259 add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (i7, o7));
6260
6261 /* Prevent this instruction from ever being considered dead,
6262 even if this function has no epilogue. */
6263 emit_use (i7);
6264 }
6265 }
6266
6267 if (frame_pointer_needed)
6268 {
6269 sparc_frame_base_reg = hard_frame_pointer_rtx;
6270 sparc_frame_base_offset = SPARC_STACK_BIAS;
6271 }
6272 else
6273 {
6274 sparc_frame_base_reg = stack_pointer_rtx;
6275 sparc_frame_base_offset = size + SPARC_STACK_BIAS;
6276 }
6277
6278 if (sparc_n_global_fp_regs > 0)
6279 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
6280 sparc_frame_base_offset
6281 - sparc_apparent_frame_size,
6282 SORR_SAVE);
6283
6284 /* Advertise that the data calculated just above are now valid. */
6285 sparc_prologue_data_valid_p = true;
6286 }
6287
6288 /* This function generates the assembly code for function entry, which boils
6289 down to emitting the necessary .register directives. */
6290
6291 static void
sparc_asm_function_prologue(FILE * file)6292 sparc_asm_function_prologue (FILE *file)
6293 {
6294 /* Check that the assumption we made in sparc_expand_prologue is valid. */
6295 if (!TARGET_FLAT)
6296 gcc_assert (sparc_leaf_function_p == crtl->uses_only_leaf_regs);
6297
6298 sparc_output_scratch_registers (file);
6299 }
6300
6301 /* Expand the function epilogue, either normal or part of a sibcall.
6302 We emit all the instructions except the return or the call. */
6303
6304 void
sparc_expand_epilogue(bool for_eh)6305 sparc_expand_epilogue (bool for_eh)
6306 {
6307 HOST_WIDE_INT size = sparc_frame_size;
6308
6309 if (cfun->calls_alloca)
6310 emit_insn (gen_frame_blockage ());
6311
6312 if (sparc_n_global_fp_regs > 0)
6313 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
6314 sparc_frame_base_offset
6315 - sparc_apparent_frame_size,
6316 SORR_RESTORE);
6317
6318 if (size == 0 || for_eh)
6319 ; /* do nothing. */
6320 else if (sparc_leaf_function_p)
6321 {
6322 /* Ensure no memory access is done after the frame is destroyed. */
6323 emit_insn (gen_frame_blockage ());
6324
6325 if (size <= 4096)
6326 emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
6327 else if (size <= 8192)
6328 {
6329 emit_insn (gen_stack_pointer_inc (GEN_INT (4096)));
6330 emit_insn (gen_stack_pointer_inc (GEN_INT (size - 4096)));
6331 }
6332 else
6333 {
6334 rtx reg = gen_rtx_REG (Pmode, 1);
6335 emit_move_insn (reg, GEN_INT (size));
6336 emit_insn (gen_stack_pointer_inc (reg));
6337 }
6338 }
6339 }
6340
6341 /* Expand the function epilogue, either normal or part of a sibcall.
6342 We emit all the instructions except the return or the call. */
6343
6344 void
sparc_flat_expand_epilogue(bool for_eh)6345 sparc_flat_expand_epilogue (bool for_eh)
6346 {
6347 HOST_WIDE_INT size = sparc_frame_size;
6348
6349 if (sparc_n_global_fp_regs > 0)
6350 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
6351 sparc_frame_base_offset
6352 - sparc_apparent_frame_size,
6353 SORR_RESTORE);
6354
6355 /* If we have a frame pointer, we'll need both to restore it before the
6356 frame is destroyed and use its current value in destroying the frame.
6357 Since we don't have an atomic way to do that in the flat window model,
6358 we save the current value into a temporary register (%g1). */
6359 if (frame_pointer_needed && !for_eh)
6360 emit_move_insn (gen_rtx_REG (Pmode, 1), hard_frame_pointer_rtx);
6361
6362 if (return_addr_reg_needed_p (sparc_leaf_function_p))
6363 emit_move_insn (gen_rtx_REG (Pmode, INCOMING_RETURN_ADDR_REGNUM),
6364 gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM));
6365
6366 if (sparc_save_local_in_regs_p)
6367 emit_save_or_restore_local_in_regs (sparc_frame_base_reg,
6368 sparc_frame_base_offset,
6369 SORR_RESTORE);
6370
6371 if (size == 0 || for_eh)
6372 ; /* do nothing. */
6373 else if (frame_pointer_needed)
6374 {
6375 /* Ensure no memory access is done after the frame is destroyed. */
6376 emit_insn (gen_frame_blockage ());
6377
6378 emit_move_insn (stack_pointer_rtx, gen_rtx_REG (Pmode, 1));
6379 }
6380 else
6381 {
6382 /* Likewise. */
6383 emit_insn (gen_frame_blockage ());
6384
6385 if (size <= 4096)
6386 emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
6387 else if (size <= 8192)
6388 {
6389 emit_insn (gen_stack_pointer_inc (GEN_INT (4096)));
6390 emit_insn (gen_stack_pointer_inc (GEN_INT (size - 4096)));
6391 }
6392 else
6393 {
6394 rtx reg = gen_rtx_REG (Pmode, 1);
6395 emit_move_insn (reg, GEN_INT (size));
6396 emit_insn (gen_stack_pointer_inc (reg));
6397 }
6398 }
6399 }
6400
6401 /* Return true if it is appropriate to emit `return' instructions in the
6402 body of a function. */
6403
6404 bool
sparc_can_use_return_insn_p(void)6405 sparc_can_use_return_insn_p (void)
6406 {
6407 return sparc_prologue_data_valid_p
6408 && sparc_n_global_fp_regs == 0
6409 && TARGET_FLAT
6410 ? (sparc_frame_size == 0 && !sparc_save_local_in_regs_p)
6411 : (sparc_frame_size == 0 || !sparc_leaf_function_p);
6412 }
6413
6414 /* This function generates the assembly code for function exit. */
6415
6416 static void
sparc_asm_function_epilogue(FILE * file)6417 sparc_asm_function_epilogue (FILE *file)
6418 {
6419 /* If the last two instructions of a function are "call foo; dslot;"
6420 the return address might point to the first instruction in the next
6421 function and we have to output a dummy nop for the sake of sane
6422 backtraces in such cases. This is pointless for sibling calls since
6423 the return address is explicitly adjusted. */
6424
6425 rtx_insn *insn = get_last_insn ();
6426
6427 rtx last_real_insn = prev_real_insn (insn);
6428 if (last_real_insn
6429 && NONJUMP_INSN_P (last_real_insn)
6430 && GET_CODE (PATTERN (last_real_insn)) == SEQUENCE)
6431 last_real_insn = XVECEXP (PATTERN (last_real_insn), 0, 0);
6432
6433 if (last_real_insn
6434 && CALL_P (last_real_insn)
6435 && !SIBLING_CALL_P (last_real_insn))
6436 fputs("\tnop\n", file);
6437
6438 sparc_output_deferred_case_vectors ();
6439 }
6440
6441 /* Output a 'restore' instruction. */
6442
6443 static void
output_restore(rtx pat)6444 output_restore (rtx pat)
6445 {
6446 rtx operands[3];
6447
6448 if (! pat)
6449 {
6450 fputs ("\t restore\n", asm_out_file);
6451 return;
6452 }
6453
6454 gcc_assert (GET_CODE (pat) == SET);
6455
6456 operands[0] = SET_DEST (pat);
6457 pat = SET_SRC (pat);
6458
6459 switch (GET_CODE (pat))
6460 {
6461 case PLUS:
6462 operands[1] = XEXP (pat, 0);
6463 operands[2] = XEXP (pat, 1);
6464 output_asm_insn (" restore %r1, %2, %Y0", operands);
6465 break;
6466 case LO_SUM:
6467 operands[1] = XEXP (pat, 0);
6468 operands[2] = XEXP (pat, 1);
6469 output_asm_insn (" restore %r1, %%lo(%a2), %Y0", operands);
6470 break;
6471 case ASHIFT:
6472 operands[1] = XEXP (pat, 0);
6473 gcc_assert (XEXP (pat, 1) == const1_rtx);
6474 output_asm_insn (" restore %r1, %r1, %Y0", operands);
6475 break;
6476 default:
6477 operands[1] = pat;
6478 output_asm_insn (" restore %%g0, %1, %Y0", operands);
6479 break;
6480 }
6481 }
6482
6483 /* Output a return. */
6484
6485 const char *
output_return(rtx_insn * insn)6486 output_return (rtx_insn *insn)
6487 {
6488 if (crtl->calls_eh_return)
6489 {
6490 /* If the function uses __builtin_eh_return, the eh_return
6491 machinery occupies the delay slot. */
6492 gcc_assert (!final_sequence);
6493
6494 if (flag_delayed_branch)
6495 {
6496 if (!TARGET_FLAT && TARGET_V9)
6497 fputs ("\treturn\t%i7+8\n", asm_out_file);
6498 else
6499 {
6500 if (!TARGET_FLAT)
6501 fputs ("\trestore\n", asm_out_file);
6502
6503 fputs ("\tjmp\t%o7+8\n", asm_out_file);
6504 }
6505
6506 fputs ("\t add\t%sp, %g1, %sp\n", asm_out_file);
6507 }
6508 else
6509 {
6510 if (!TARGET_FLAT)
6511 fputs ("\trestore\n", asm_out_file);
6512
6513 fputs ("\tadd\t%sp, %g1, %sp\n", asm_out_file);
6514 fputs ("\tjmp\t%o7+8\n\t nop\n", asm_out_file);
6515 }
6516 }
6517 else if (sparc_leaf_function_p || TARGET_FLAT)
6518 {
6519 /* This is a leaf or flat function so we don't have to bother restoring
6520 the register window, which frees us from dealing with the convoluted
6521 semantics of restore/return. We simply output the jump to the
6522 return address and the insn in the delay slot (if any). */
6523
6524 return "jmp\t%%o7+%)%#";
6525 }
6526 else
6527 {
6528 /* This is a regular function so we have to restore the register window.
6529 We may have a pending insn for the delay slot, which will be either
6530 combined with the 'restore' instruction or put in the delay slot of
6531 the 'return' instruction. */
6532
6533 if (final_sequence)
6534 {
6535 rtx_insn *delay;
6536 rtx pat;
6537
6538 delay = NEXT_INSN (insn);
6539 gcc_assert (delay);
6540
6541 pat = PATTERN (delay);
6542
6543 if (TARGET_V9 && ! epilogue_renumber (&pat, 1))
6544 {
6545 epilogue_renumber (&pat, 0);
6546 return "return\t%%i7+%)%#";
6547 }
6548 else
6549 {
6550 output_asm_insn ("jmp\t%%i7+%)", NULL);
6551
6552 /* We're going to output the insn in the delay slot manually.
6553 Make sure to output its source location first. */
6554 PATTERN (delay) = gen_blockage ();
6555 INSN_CODE (delay) = -1;
6556 final_scan_insn (delay, asm_out_file, optimize, 0, NULL);
6557 INSN_LOCATION (delay) = UNKNOWN_LOCATION;
6558
6559 output_restore (pat);
6560 }
6561 }
6562 else
6563 {
6564 /* The delay slot is empty. */
6565 if (TARGET_V9)
6566 return "return\t%%i7+%)\n\t nop";
6567 else if (flag_delayed_branch)
6568 return "jmp\t%%i7+%)\n\t restore";
6569 else
6570 return "restore\n\tjmp\t%%o7+%)\n\t nop";
6571 }
6572 }
6573
6574 return "";
6575 }
6576
6577 /* Output a sibling call. */
6578
6579 const char *
output_sibcall(rtx_insn * insn,rtx call_operand)6580 output_sibcall (rtx_insn *insn, rtx call_operand)
6581 {
6582 rtx operands[1];
6583
6584 gcc_assert (flag_delayed_branch);
6585
6586 operands[0] = call_operand;
6587
6588 if (sparc_leaf_function_p || TARGET_FLAT)
6589 {
6590 /* This is a leaf or flat function so we don't have to bother restoring
6591 the register window. We simply output the jump to the function and
6592 the insn in the delay slot (if any). */
6593
6594 gcc_assert (!(LEAF_SIBCALL_SLOT_RESERVED_P && final_sequence));
6595
6596 if (final_sequence)
6597 output_asm_insn ("sethi\t%%hi(%a0), %%g1\n\tjmp\t%%g1 + %%lo(%a0)%#",
6598 operands);
6599 else
6600 /* Use or with rs2 %%g0 instead of mov, so that as/ld can optimize
6601 it into branch if possible. */
6602 output_asm_insn ("or\t%%o7, %%g0, %%g1\n\tcall\t%a0, 0\n\t or\t%%g1, %%g0, %%o7",
6603 operands);
6604 }
6605 else
6606 {
6607 /* This is a regular function so we have to restore the register window.
6608 We may have a pending insn for the delay slot, which will be combined
6609 with the 'restore' instruction. */
6610
6611 output_asm_insn ("call\t%a0, 0", operands);
6612
6613 if (final_sequence)
6614 {
6615 rtx_insn *delay;
6616 rtx pat;
6617
6618 delay = NEXT_INSN (insn);
6619 gcc_assert (delay);
6620
6621 pat = PATTERN (delay);
6622
6623 /* We're going to output the insn in the delay slot manually.
6624 Make sure to output its source location first. */
6625 PATTERN (delay) = gen_blockage ();
6626 INSN_CODE (delay) = -1;
6627 final_scan_insn (delay, asm_out_file, optimize, 0, NULL);
6628 INSN_LOCATION (delay) = UNKNOWN_LOCATION;
6629
6630 output_restore (pat);
6631 }
6632 else
6633 output_restore (NULL_RTX);
6634 }
6635
6636 return "";
6637 }
6638
6639 /* Functions for handling argument passing.
6640
6641 For 32-bit, the first 6 args are normally in registers and the rest are
6642 pushed. Any arg that starts within the first 6 words is at least
6643 partially passed in a register unless its data type forbids.
6644
6645 For 64-bit, the argument registers are laid out as an array of 16 elements
6646 and arguments are added sequentially. The first 6 int args and up to the
6647 first 16 fp args (depending on size) are passed in regs.
6648
6649 Slot Stack Integral Float Float in structure Double Long Double
6650 ---- ----- -------- ----- ------------------ ------ -----------
6651 15 [SP+248] %f31 %f30,%f31 %d30
6652 14 [SP+240] %f29 %f28,%f29 %d28 %q28
6653 13 [SP+232] %f27 %f26,%f27 %d26
6654 12 [SP+224] %f25 %f24,%f25 %d24 %q24
6655 11 [SP+216] %f23 %f22,%f23 %d22
6656 10 [SP+208] %f21 %f20,%f21 %d20 %q20
6657 9 [SP+200] %f19 %f18,%f19 %d18
6658 8 [SP+192] %f17 %f16,%f17 %d16 %q16
6659 7 [SP+184] %f15 %f14,%f15 %d14
6660 6 [SP+176] %f13 %f12,%f13 %d12 %q12
6661 5 [SP+168] %o5 %f11 %f10,%f11 %d10
6662 4 [SP+160] %o4 %f9 %f8,%f9 %d8 %q8
6663 3 [SP+152] %o3 %f7 %f6,%f7 %d6
6664 2 [SP+144] %o2 %f5 %f4,%f5 %d4 %q4
6665 1 [SP+136] %o1 %f3 %f2,%f3 %d2
6666 0 [SP+128] %o0 %f1 %f0,%f1 %d0 %q0
6667
6668 Here SP = %sp if -mno-stack-bias or %sp+stack_bias otherwise.
6669
6670 Integral arguments are always passed as 64-bit quantities appropriately
6671 extended.
6672
6673 Passing of floating point values is handled as follows.
6674 If a prototype is in scope:
6675 If the value is in a named argument (i.e. not a stdarg function or a
6676 value not part of the `...') then the value is passed in the appropriate
6677 fp reg.
6678 If the value is part of the `...' and is passed in one of the first 6
6679 slots then the value is passed in the appropriate int reg.
6680 If the value is part of the `...' and is not passed in one of the first 6
6681 slots then the value is passed in memory.
6682 If a prototype is not in scope:
6683 If the value is one of the first 6 arguments the value is passed in the
6684 appropriate integer reg and the appropriate fp reg.
6685 If the value is not one of the first 6 arguments the value is passed in
6686 the appropriate fp reg and in memory.
6687
6688
6689 Summary of the calling conventions implemented by GCC on the SPARC:
6690
6691 32-bit ABI:
6692 size argument return value
6693
6694 small integer <4 int. reg. int. reg.
6695 word 4 int. reg. int. reg.
6696 double word 8 int. reg. int. reg.
6697
6698 _Complex small integer <8 int. reg. int. reg.
6699 _Complex word 8 int. reg. int. reg.
6700 _Complex double word 16 memory int. reg.
6701
6702 vector integer <=8 int. reg. FP reg.
6703 vector integer >8 memory memory
6704
6705 float 4 int. reg. FP reg.
6706 double 8 int. reg. FP reg.
6707 long double 16 memory memory
6708
6709 _Complex float 8 memory FP reg.
6710 _Complex double 16 memory FP reg.
6711 _Complex long double 32 memory FP reg.
6712
6713 vector float any memory memory
6714
6715 aggregate any memory memory
6716
6717
6718
6719 64-bit ABI:
6720 size argument return value
6721
6722 small integer <8 int. reg. int. reg.
6723 word 8 int. reg. int. reg.
6724 double word 16 int. reg. int. reg.
6725
6726 _Complex small integer <16 int. reg. int. reg.
6727 _Complex word 16 int. reg. int. reg.
6728 _Complex double word 32 memory int. reg.
6729
6730 vector integer <=16 FP reg. FP reg.
6731 vector integer 16<s<=32 memory FP reg.
6732 vector integer >32 memory memory
6733
6734 float 4 FP reg. FP reg.
6735 double 8 FP reg. FP reg.
6736 long double 16 FP reg. FP reg.
6737
6738 _Complex float 8 FP reg. FP reg.
6739 _Complex double 16 FP reg. FP reg.
6740 _Complex long double 32 memory FP reg.
6741
6742 vector float <=16 FP reg. FP reg.
6743 vector float 16<s<=32 memory FP reg.
6744 vector float >32 memory memory
6745
6746 aggregate <=16 reg. reg.
6747 aggregate 16<s<=32 memory reg.
6748 aggregate >32 memory memory
6749
6750
6751
6752 Note #1: complex floating-point types follow the extended SPARC ABIs as
6753 implemented by the Sun compiler.
6754
6755 Note #2: integer vector types follow the scalar floating-point types
6756 conventions to match what is implemented by the Sun VIS SDK.
6757
6758 Note #3: floating-point vector types follow the aggregate types
6759 conventions. */
6760
6761
6762 /* Maximum number of int regs for args. */
6763 #define SPARC_INT_ARG_MAX 6
6764 /* Maximum number of fp regs for args. */
6765 #define SPARC_FP_ARG_MAX 16
6766 /* Number of words (partially) occupied for a given size in units. */
6767 #define CEIL_NWORDS(SIZE) CEIL((SIZE), UNITS_PER_WORD)
6768
6769 /* Handle the INIT_CUMULATIVE_ARGS macro.
6770 Initialize a variable CUM of type CUMULATIVE_ARGS
6771 for a call to a function whose data type is FNTYPE.
6772 For a library call, FNTYPE is 0. */
6773
6774 void
init_cumulative_args(struct sparc_args * cum,tree fntype,rtx,tree)6775 init_cumulative_args (struct sparc_args *cum, tree fntype, rtx, tree)
6776 {
6777 cum->words = 0;
6778 cum->prototype_p = fntype && prototype_p (fntype);
6779 cum->libcall_p = !fntype;
6780 }
6781
6782 /* Handle promotion of pointer and integer arguments. */
6783
6784 static machine_mode
sparc_promote_function_mode(const_tree type,machine_mode mode,int * punsignedp,const_tree,int)6785 sparc_promote_function_mode (const_tree type, machine_mode mode,
6786 int *punsignedp, const_tree, int)
6787 {
6788 if (type && POINTER_TYPE_P (type))
6789 {
6790 *punsignedp = POINTERS_EXTEND_UNSIGNED;
6791 return Pmode;
6792 }
6793
6794 /* Integral arguments are passed as full words, as per the ABI. */
6795 if (GET_MODE_CLASS (mode) == MODE_INT
6796 && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
6797 return word_mode;
6798
6799 return mode;
6800 }
6801
6802 /* Handle the TARGET_STRICT_ARGUMENT_NAMING target hook. */
6803
6804 static bool
sparc_strict_argument_naming(cumulative_args_t ca ATTRIBUTE_UNUSED)6805 sparc_strict_argument_naming (cumulative_args_t ca ATTRIBUTE_UNUSED)
6806 {
6807 return TARGET_ARCH64 ? true : false;
6808 }
6809
6810 /* Handle the TARGET_PASS_BY_REFERENCE target hook.
6811 Specify whether to pass the argument by reference. */
6812
6813 static bool
sparc_pass_by_reference(cumulative_args_t,const function_arg_info & arg)6814 sparc_pass_by_reference (cumulative_args_t, const function_arg_info &arg)
6815 {
6816 tree type = arg.type;
6817 machine_mode mode = arg.mode;
6818 if (TARGET_ARCH32)
6819 /* Original SPARC 32-bit ABI says that structures and unions,
6820 and quad-precision floats are passed by reference.
6821 All other base types are passed in registers.
6822
6823 Extended ABI (as implemented by the Sun compiler) says that all
6824 complex floats are passed by reference. Pass complex integers
6825 in registers up to 8 bytes. More generally, enforce the 2-word
6826 cap for passing arguments in registers.
6827
6828 Vector ABI (as implemented by the Sun VIS SDK) says that integer
6829 vectors are passed like floats of the same size, that is in
6830 registers up to 8 bytes. Pass all vector floats by reference
6831 like structure and unions. */
6832 return ((type && (AGGREGATE_TYPE_P (type) || VECTOR_FLOAT_TYPE_P (type)))
6833 || mode == SCmode
6834 /* Catch CDImode, TFmode, DCmode and TCmode. */
6835 || GET_MODE_SIZE (mode) > 8
6836 || (type
6837 && VECTOR_TYPE_P (type)
6838 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8));
6839 else
6840 /* Original SPARC 64-bit ABI says that structures and unions
6841 smaller than 16 bytes are passed in registers, as well as
6842 all other base types.
6843
6844 Extended ABI (as implemented by the Sun compiler) says that
6845 complex floats are passed in registers up to 16 bytes. Pass
6846 all complex integers in registers up to 16 bytes. More generally,
6847 enforce the 2-word cap for passing arguments in registers.
6848
6849 Vector ABI (as implemented by the Sun VIS SDK) says that integer
6850 vectors are passed like floats of the same size, that is in
6851 registers (up to 16 bytes). Pass all vector floats like structure
6852 and unions. */
6853 return ((type
6854 && (AGGREGATE_TYPE_P (type) || VECTOR_TYPE_P (type))
6855 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 16)
6856 /* Catch CTImode and TCmode. */
6857 || GET_MODE_SIZE (mode) > 16);
6858 }
6859
6860 /* Traverse the record TYPE recursively and call FUNC on its fields.
6861 NAMED is true if this is for a named parameter. DATA is passed
6862 to FUNC for each field. OFFSET is the starting position and
6863 PACKED is true if we are inside a packed record. */
6864
6865 template <typename T, void Func (const_tree, int, bool, T*)>
6866 static void
6867 traverse_record_type (const_tree type, bool named, T *data,
6868 int offset = 0, bool packed = false)
6869 {
6870 /* The ABI obviously doesn't specify how packed structures are passed.
6871 These are passed in integer regs if possible, otherwise memory. */
6872 if (!packed)
6873 for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6874 if (TREE_CODE (field) == FIELD_DECL && DECL_PACKED (field))
6875 {
6876 packed = true;
6877 break;
6878 }
6879
6880 /* Walk the real fields, but skip those with no size or a zero size.
6881 ??? Fields with variable offset are handled as having zero offset. */
6882 for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6883 if (TREE_CODE (field) == FIELD_DECL)
6884 {
6885 if (!DECL_SIZE (field) || integer_zerop (DECL_SIZE (field)))
6886 continue;
6887
6888 int bitpos = offset;
6889 if (TREE_CODE (DECL_FIELD_OFFSET (field)) == INTEGER_CST)
6890 bitpos += int_bit_position (field);
6891
6892 tree field_type = TREE_TYPE (field);
6893 if (TREE_CODE (field_type) == RECORD_TYPE)
6894 traverse_record_type<T, Func> (field_type, named, data, bitpos,
6895 packed);
6896 else
6897 {
6898 const bool fp_type
6899 = FLOAT_TYPE_P (field_type) || VECTOR_TYPE_P (field_type);
6900 Func (field, bitpos, fp_type && named && !packed && TARGET_FPU,
6901 data);
6902 }
6903 }
6904 }
6905
6906 /* Handle recursive register classifying for structure layout. */
6907
6908 typedef struct
6909 {
6910 bool fp_regs; /* true if field eligible to FP registers. */
6911 bool fp_regs_in_first_word; /* true if such field in first word. */
6912 } classify_data_t;
6913
6914 /* A subroutine of function_arg_slotno. Classify the field. */
6915
6916 inline void
classify_registers(const_tree,int bitpos,bool fp,classify_data_t * data)6917 classify_registers (const_tree, int bitpos, bool fp, classify_data_t *data)
6918 {
6919 if (fp)
6920 {
6921 data->fp_regs = true;
6922 if (bitpos < BITS_PER_WORD)
6923 data->fp_regs_in_first_word = true;
6924 }
6925 }
6926
6927 /* Compute the slot number to pass an argument in.
6928 Return the slot number or -1 if passing on the stack.
6929
6930 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6931 the preceding args and about the function being called.
6932 MODE is the argument's machine mode.
6933 TYPE is the data type of the argument (as a tree).
6934 This is null for libcalls where that information may
6935 not be available.
6936 NAMED is nonzero if this argument is a named parameter
6937 (otherwise it is an extra parameter matching an ellipsis).
6938 INCOMING is zero for FUNCTION_ARG, nonzero for FUNCTION_INCOMING_ARG.
6939 *PREGNO records the register number to use if scalar type.
6940 *PPADDING records the amount of padding needed in words. */
6941
6942 static int
function_arg_slotno(const struct sparc_args * cum,machine_mode mode,const_tree type,bool named,bool incoming,int * pregno,int * ppadding)6943 function_arg_slotno (const struct sparc_args *cum, machine_mode mode,
6944 const_tree type, bool named, bool incoming,
6945 int *pregno, int *ppadding)
6946 {
6947 const int regbase
6948 = incoming ? SPARC_INCOMING_INT_ARG_FIRST : SPARC_OUTGOING_INT_ARG_FIRST;
6949 int slotno = cum->words, regno;
6950 enum mode_class mclass = GET_MODE_CLASS (mode);
6951
6952 /* Silence warnings in the callers. */
6953 *pregno = -1;
6954 *ppadding = -1;
6955
6956 if (type && TREE_ADDRESSABLE (type))
6957 return -1;
6958
6959 /* In 64-bit mode, objects requiring 16-byte alignment get it. */
6960 if (TARGET_ARCH64
6961 && (type ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode)) >= 128
6962 && (slotno & 1) != 0)
6963 {
6964 slotno++;
6965 *ppadding = 1;
6966 }
6967 else
6968 *ppadding = 0;
6969
6970 /* Vector types deserve special treatment because they are polymorphic wrt
6971 their mode, depending upon whether VIS instructions are enabled. */
6972 if (type && VECTOR_TYPE_P (type))
6973 {
6974 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
6975 {
6976 /* The SPARC port defines no floating-point vector modes. */
6977 gcc_assert (mode == BLKmode);
6978 }
6979 else
6980 {
6981 /* Integer vector types should either have a vector
6982 mode or an integral mode, because we are guaranteed
6983 by pass_by_reference that their size is not greater
6984 than 16 bytes and TImode is 16-byte wide. */
6985 gcc_assert (mode != BLKmode);
6986
6987 /* Integer vectors are handled like floats as per
6988 the Sun VIS SDK. */
6989 mclass = MODE_FLOAT;
6990 }
6991 }
6992
6993 switch (mclass)
6994 {
6995 case MODE_FLOAT:
6996 case MODE_COMPLEX_FLOAT:
6997 case MODE_VECTOR_INT:
6998 if (TARGET_ARCH64 && TARGET_FPU && named)
6999 {
7000 /* If all arg slots are filled, then must pass on stack. */
7001 if (slotno >= SPARC_FP_ARG_MAX)
7002 return -1;
7003
7004 regno = SPARC_FP_ARG_FIRST + slotno * 2;
7005 /* Arguments filling only one single FP register are
7006 right-justified in the outer double FP register. */
7007 if (GET_MODE_SIZE (mode) <= 4)
7008 regno++;
7009 break;
7010 }
7011 /* fallthrough */
7012
7013 case MODE_INT:
7014 case MODE_COMPLEX_INT:
7015 /* If all arg slots are filled, then must pass on stack. */
7016 if (slotno >= SPARC_INT_ARG_MAX)
7017 return -1;
7018
7019 regno = regbase + slotno;
7020 break;
7021
7022 case MODE_RANDOM:
7023 /* MODE is VOIDmode when generating the actual call. */
7024 if (mode == VOIDmode)
7025 return -1;
7026
7027 if (TARGET_64BIT && TARGET_FPU && named
7028 && type
7029 && (TREE_CODE (type) == RECORD_TYPE || VECTOR_TYPE_P (type)))
7030 {
7031 /* If all arg slots are filled, then must pass on stack. */
7032 if (slotno >= SPARC_FP_ARG_MAX)
7033 return -1;
7034
7035 if (TREE_CODE (type) == RECORD_TYPE)
7036 {
7037 classify_data_t data = { false, false };
7038 traverse_record_type<classify_data_t, classify_registers>
7039 (type, named, &data);
7040
7041 if (data.fp_regs)
7042 {
7043 /* If all FP slots are filled except for the last one and
7044 there is no FP field in the first word, then must pass
7045 on stack. */
7046 if (slotno >= SPARC_FP_ARG_MAX - 1
7047 && !data.fp_regs_in_first_word)
7048 return -1;
7049 }
7050 else
7051 {
7052 /* If all int slots are filled, then must pass on stack. */
7053 if (slotno >= SPARC_INT_ARG_MAX)
7054 return -1;
7055 }
7056
7057 /* PREGNO isn't set since both int and FP regs can be used. */
7058 return slotno;
7059 }
7060
7061 regno = SPARC_FP_ARG_FIRST + slotno * 2;
7062 }
7063 else
7064 {
7065 /* If all arg slots are filled, then must pass on stack. */
7066 if (slotno >= SPARC_INT_ARG_MAX)
7067 return -1;
7068
7069 regno = regbase + slotno;
7070 }
7071 break;
7072
7073 default :
7074 gcc_unreachable ();
7075 }
7076
7077 *pregno = regno;
7078 return slotno;
7079 }
7080
7081 /* Handle recursive register counting/assigning for structure layout. */
7082
7083 typedef struct
7084 {
7085 int slotno; /* slot number of the argument. */
7086 int regbase; /* regno of the base register. */
7087 int intoffset; /* offset of the first pending integer field. */
7088 int nregs; /* number of words passed in registers. */
7089 bool stack; /* true if part of the argument is on the stack. */
7090 rtx ret; /* return expression being built. */
7091 } assign_data_t;
7092
7093 /* A subroutine of function_arg_record_value. Compute the number of integer
7094 registers to be assigned between PARMS->intoffset and BITPOS. Return
7095 true if at least one integer register is assigned or false otherwise. */
7096
7097 static bool
compute_int_layout(int bitpos,assign_data_t * data,int * pnregs)7098 compute_int_layout (int bitpos, assign_data_t *data, int *pnregs)
7099 {
7100 if (data->intoffset < 0)
7101 return false;
7102
7103 const int intoffset = data->intoffset;
7104 data->intoffset = -1;
7105
7106 const int this_slotno = data->slotno + intoffset / BITS_PER_WORD;
7107 const unsigned int startbit = ROUND_DOWN (intoffset, BITS_PER_WORD);
7108 const unsigned int endbit = ROUND_UP (bitpos, BITS_PER_WORD);
7109 int nregs = (endbit - startbit) / BITS_PER_WORD;
7110
7111 if (nregs > 0 && nregs > SPARC_INT_ARG_MAX - this_slotno)
7112 {
7113 nregs = SPARC_INT_ARG_MAX - this_slotno;
7114
7115 /* We need to pass this field (partly) on the stack. */
7116 data->stack = 1;
7117 }
7118
7119 if (nregs <= 0)
7120 return false;
7121
7122 *pnregs = nregs;
7123 return true;
7124 }
7125
7126 /* A subroutine of function_arg_record_value. Compute the number and the mode
7127 of the FP registers to be assigned for FIELD. Return true if at least one
7128 FP register is assigned or false otherwise. */
7129
7130 static bool
compute_fp_layout(const_tree field,int bitpos,assign_data_t * data,int * pnregs,machine_mode * pmode)7131 compute_fp_layout (const_tree field, int bitpos, assign_data_t *data,
7132 int *pnregs, machine_mode *pmode)
7133 {
7134 const int this_slotno = data->slotno + bitpos / BITS_PER_WORD;
7135 machine_mode mode = DECL_MODE (field);
7136 int nregs, nslots;
7137
7138 /* Slots are counted as words while regs are counted as having the size of
7139 the (inner) mode. */
7140 if (VECTOR_TYPE_P (TREE_TYPE (field)) && mode == BLKmode)
7141 {
7142 mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (field)));
7143 nregs = TYPE_VECTOR_SUBPARTS (TREE_TYPE (field));
7144 }
7145 else if (TREE_CODE (TREE_TYPE (field)) == COMPLEX_TYPE)
7146 {
7147 mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (field)));
7148 nregs = 2;
7149 }
7150 else
7151 nregs = 1;
7152
7153 nslots = CEIL_NWORDS (nregs * GET_MODE_SIZE (mode));
7154
7155 if (nslots > SPARC_FP_ARG_MAX - this_slotno)
7156 {
7157 nslots = SPARC_FP_ARG_MAX - this_slotno;
7158 nregs = (nslots * UNITS_PER_WORD) / GET_MODE_SIZE (mode);
7159
7160 /* We need to pass this field (partly) on the stack. */
7161 data->stack = 1;
7162
7163 if (nregs <= 0)
7164 return false;
7165 }
7166
7167 *pnregs = nregs;
7168 *pmode = mode;
7169 return true;
7170 }
7171
7172 /* A subroutine of function_arg_record_value. Count the number of registers
7173 to be assigned for FIELD and between PARMS->intoffset and BITPOS. */
7174
7175 inline void
count_registers(const_tree field,int bitpos,bool fp,assign_data_t * data)7176 count_registers (const_tree field, int bitpos, bool fp, assign_data_t *data)
7177 {
7178 if (fp)
7179 {
7180 int nregs;
7181 machine_mode mode;
7182
7183 if (compute_int_layout (bitpos, data, &nregs))
7184 data->nregs += nregs;
7185
7186 if (compute_fp_layout (field, bitpos, data, &nregs, &mode))
7187 data->nregs += nregs;
7188 }
7189 else
7190 {
7191 if (data->intoffset < 0)
7192 data->intoffset = bitpos;
7193 }
7194 }
7195
7196 /* A subroutine of function_arg_record_value. Assign the bits of the
7197 structure between PARMS->intoffset and BITPOS to integer registers. */
7198
7199 static void
assign_int_registers(int bitpos,assign_data_t * data)7200 assign_int_registers (int bitpos, assign_data_t *data)
7201 {
7202 int intoffset = data->intoffset;
7203 machine_mode mode;
7204 int nregs;
7205
7206 if (!compute_int_layout (bitpos, data, &nregs))
7207 return;
7208
7209 /* If this is the trailing part of a word, only load that much into
7210 the register. Otherwise load the whole register. Note that in
7211 the latter case we may pick up unwanted bits. It's not a problem
7212 at the moment but may wish to revisit. */
7213 if (intoffset % BITS_PER_WORD != 0)
7214 mode = smallest_int_mode_for_size (BITS_PER_WORD
7215 - intoffset % BITS_PER_WORD);
7216 else
7217 mode = word_mode;
7218
7219 const int this_slotno = data->slotno + intoffset / BITS_PER_WORD;
7220 unsigned int regno = data->regbase + this_slotno;
7221 intoffset /= BITS_PER_UNIT;
7222
7223 do
7224 {
7225 rtx reg = gen_rtx_REG (mode, regno);
7226 XVECEXP (data->ret, 0, data->stack + data->nregs)
7227 = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (intoffset));
7228 data->nregs += 1;
7229 mode = word_mode;
7230 regno += 1;
7231 intoffset = (intoffset | (UNITS_PER_WORD - 1)) + 1;
7232 }
7233 while (--nregs > 0);
7234 }
7235
7236 /* A subroutine of function_arg_record_value. Assign FIELD at position
7237 BITPOS to FP registers. */
7238
7239 static void
assign_fp_registers(const_tree field,int bitpos,assign_data_t * data)7240 assign_fp_registers (const_tree field, int bitpos, assign_data_t *data)
7241 {
7242 int nregs;
7243 machine_mode mode;
7244
7245 if (!compute_fp_layout (field, bitpos, data, &nregs, &mode))
7246 return;
7247
7248 const int this_slotno = data->slotno + bitpos / BITS_PER_WORD;
7249 int regno = SPARC_FP_ARG_FIRST + this_slotno * 2;
7250 if (GET_MODE_SIZE (mode) <= 4 && (bitpos & 32) != 0)
7251 regno++;
7252 int pos = bitpos / BITS_PER_UNIT;
7253
7254 do
7255 {
7256 rtx reg = gen_rtx_REG (mode, regno);
7257 XVECEXP (data->ret, 0, data->stack + data->nregs)
7258 = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (pos));
7259 data->nregs += 1;
7260 regno += GET_MODE_SIZE (mode) / 4;
7261 pos += GET_MODE_SIZE (mode);
7262 }
7263 while (--nregs > 0);
7264 }
7265
7266 /* A subroutine of function_arg_record_value. Assign FIELD and the bits of
7267 the structure between PARMS->intoffset and BITPOS to registers. */
7268
7269 inline void
assign_registers(const_tree field,int bitpos,bool fp,assign_data_t * data)7270 assign_registers (const_tree field, int bitpos, bool fp, assign_data_t *data)
7271 {
7272 if (fp)
7273 {
7274 assign_int_registers (bitpos, data);
7275
7276 assign_fp_registers (field, bitpos, data);
7277 }
7278 else
7279 {
7280 if (data->intoffset < 0)
7281 data->intoffset = bitpos;
7282 }
7283 }
7284
7285 /* Used by function_arg and function_value to implement the complex
7286 conventions of the 64-bit ABI for passing and returning structures.
7287 Return an expression valid as a return value for the FUNCTION_ARG
7288 and TARGET_FUNCTION_VALUE.
7289
7290 TYPE is the data type of the argument (as a tree).
7291 This is null for libcalls where that information may
7292 not be available.
7293 MODE is the argument's machine mode.
7294 SLOTNO is the index number of the argument's slot in the parameter array.
7295 NAMED is true if this argument is a named parameter
7296 (otherwise it is an extra parameter matching an ellipsis).
7297 REGBASE is the regno of the base register for the parameter array. */
7298
7299 static rtx
function_arg_record_value(const_tree type,machine_mode mode,int slotno,bool named,int regbase)7300 function_arg_record_value (const_tree type, machine_mode mode,
7301 int slotno, bool named, int regbase)
7302 {
7303 const int size = int_size_in_bytes (type);
7304 assign_data_t data;
7305 int nregs;
7306
7307 data.slotno = slotno;
7308 data.regbase = regbase;
7309
7310 /* Count how many registers we need. */
7311 data.nregs = 0;
7312 data.intoffset = 0;
7313 data.stack = false;
7314 traverse_record_type<assign_data_t, count_registers> (type, named, &data);
7315
7316 /* Take into account pending integer fields. */
7317 if (compute_int_layout (size * BITS_PER_UNIT, &data, &nregs))
7318 data.nregs += nregs;
7319
7320 /* Allocate the vector and handle some annoying special cases. */
7321 nregs = data.nregs;
7322
7323 if (nregs == 0)
7324 {
7325 /* ??? Empty structure has no value? Duh? */
7326 if (size <= 0)
7327 {
7328 /* Though there's nothing really to store, return a word register
7329 anyway so the rest of gcc doesn't go nuts. Returning a PARALLEL
7330 leads to breakage due to the fact that there are zero bytes to
7331 load. */
7332 return gen_rtx_REG (mode, regbase);
7333 }
7334
7335 /* ??? C++ has structures with no fields, and yet a size. Give up
7336 for now and pass everything back in integer registers. */
7337 nregs = CEIL_NWORDS (size);
7338 if (nregs + slotno > SPARC_INT_ARG_MAX)
7339 nregs = SPARC_INT_ARG_MAX - slotno;
7340 }
7341
7342 gcc_assert (nregs > 0);
7343
7344 data.ret = gen_rtx_PARALLEL (mode, rtvec_alloc (data.stack + nregs));
7345
7346 /* If at least one field must be passed on the stack, generate
7347 (parallel [(expr_list (nil) ...) ...]) so that all fields will
7348 also be passed on the stack. We can't do much better because the
7349 semantics of TARGET_ARG_PARTIAL_BYTES doesn't handle the case
7350 of structures for which the fields passed exclusively in registers
7351 are not at the beginning of the structure. */
7352 if (data.stack)
7353 XVECEXP (data.ret, 0, 0)
7354 = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
7355
7356 /* Assign the registers. */
7357 data.nregs = 0;
7358 data.intoffset = 0;
7359 traverse_record_type<assign_data_t, assign_registers> (type, named, &data);
7360
7361 /* Assign pending integer fields. */
7362 assign_int_registers (size * BITS_PER_UNIT, &data);
7363
7364 gcc_assert (data.nregs == nregs);
7365
7366 return data.ret;
7367 }
7368
7369 /* Used by function_arg and function_value to implement the conventions
7370 of the 64-bit ABI for passing and returning unions.
7371 Return an expression valid as a return value for the FUNCTION_ARG
7372 and TARGET_FUNCTION_VALUE.
7373
7374 SIZE is the size in bytes of the union.
7375 MODE is the argument's machine mode.
7376 SLOTNO is the index number of the argument's slot in the parameter array.
7377 REGNO is the hard register the union will be passed in. */
7378
7379 static rtx
function_arg_union_value(int size,machine_mode mode,int slotno,int regno)7380 function_arg_union_value (int size, machine_mode mode, int slotno, int regno)
7381 {
7382 unsigned int nwords;
7383
7384 /* See comment in function_arg_record_value for empty structures. */
7385 if (size <= 0)
7386 return gen_rtx_REG (mode, regno);
7387
7388 if (slotno == SPARC_INT_ARG_MAX - 1)
7389 nwords = 1;
7390 else
7391 nwords = CEIL_NWORDS (size);
7392
7393 rtx regs = gen_rtx_PARALLEL (mode, rtvec_alloc (nwords));
7394
7395 /* Unions are passed left-justified. */
7396 for (unsigned int i = 0; i < nwords; i++)
7397 XVECEXP (regs, 0, i)
7398 = gen_rtx_EXPR_LIST (VOIDmode,
7399 gen_rtx_REG (word_mode, regno + i),
7400 GEN_INT (UNITS_PER_WORD * i));
7401
7402 return regs;
7403 }
7404
7405 /* Used by function_arg and function_value to implement the conventions
7406 of the 64-bit ABI for passing and returning BLKmode vectors.
7407 Return an expression valid as a return value for the FUNCTION_ARG
7408 and TARGET_FUNCTION_VALUE.
7409
7410 SIZE is the size in bytes of the vector.
7411 SLOTNO is the index number of the argument's slot in the parameter array.
7412 NAMED is true if this argument is a named parameter
7413 (otherwise it is an extra parameter matching an ellipsis).
7414 REGNO is the hard register the vector will be passed in. */
7415
7416 static rtx
function_arg_vector_value(int size,int slotno,bool named,int regno)7417 function_arg_vector_value (int size, int slotno, bool named, int regno)
7418 {
7419 const int mult = (named ? 2 : 1);
7420 unsigned int nwords;
7421
7422 if (slotno == (named ? SPARC_FP_ARG_MAX : SPARC_INT_ARG_MAX) - 1)
7423 nwords = 1;
7424 else
7425 nwords = CEIL_NWORDS (size);
7426
7427 rtx regs = gen_rtx_PARALLEL (BLKmode, rtvec_alloc (nwords));
7428
7429 if (size < UNITS_PER_WORD)
7430 XVECEXP (regs, 0, 0)
7431 = gen_rtx_EXPR_LIST (VOIDmode,
7432 gen_rtx_REG (SImode, regno),
7433 const0_rtx);
7434 else
7435 for (unsigned int i = 0; i < nwords; i++)
7436 XVECEXP (regs, 0, i)
7437 = gen_rtx_EXPR_LIST (VOIDmode,
7438 gen_rtx_REG (word_mode, regno + i * mult),
7439 GEN_INT (i * UNITS_PER_WORD));
7440
7441 return regs;
7442 }
7443
7444 /* Determine where to put an argument to a function.
7445 Value is zero to push the argument on the stack,
7446 or a hard register in which to store the argument.
7447
7448 CUM is a variable of type CUMULATIVE_ARGS which gives info about
7449 the preceding args and about the function being called.
7450 ARG is a description of the argument.
7451 INCOMING_P is false for TARGET_FUNCTION_ARG, true for
7452 TARGET_FUNCTION_INCOMING_ARG. */
7453
7454 static rtx
sparc_function_arg_1(cumulative_args_t cum_v,const function_arg_info & arg,bool incoming)7455 sparc_function_arg_1 (cumulative_args_t cum_v, const function_arg_info &arg,
7456 bool incoming)
7457 {
7458 const CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7459 const int regbase
7460 = incoming ? SPARC_INCOMING_INT_ARG_FIRST : SPARC_OUTGOING_INT_ARG_FIRST;
7461 int slotno, regno, padding;
7462 tree type = arg.type;
7463 machine_mode mode = arg.mode;
7464 enum mode_class mclass = GET_MODE_CLASS (mode);
7465 bool named = arg.named;
7466
7467 slotno
7468 = function_arg_slotno (cum, mode, type, named, incoming, ®no, &padding);
7469 if (slotno == -1)
7470 return 0;
7471
7472 /* Integer vectors are handled like floats as per the Sun VIS SDK. */
7473 if (type && VECTOR_INTEGER_TYPE_P (type))
7474 mclass = MODE_FLOAT;
7475
7476 if (TARGET_ARCH32)
7477 return gen_rtx_REG (mode, regno);
7478
7479 /* Structures up to 16 bytes in size are passed in arg slots on the stack
7480 and are promoted to registers if possible. */
7481 if (type && TREE_CODE (type) == RECORD_TYPE)
7482 {
7483 const int size = int_size_in_bytes (type);
7484 gcc_assert (size <= 16);
7485
7486 return function_arg_record_value (type, mode, slotno, named, regbase);
7487 }
7488
7489 /* Unions up to 16 bytes in size are passed in integer registers. */
7490 else if (type && TREE_CODE (type) == UNION_TYPE)
7491 {
7492 const int size = int_size_in_bytes (type);
7493 gcc_assert (size <= 16);
7494
7495 return function_arg_union_value (size, mode, slotno, regno);
7496 }
7497
7498 /* Floating-point vectors up to 16 bytes are passed in registers. */
7499 else if (type && VECTOR_TYPE_P (type) && mode == BLKmode)
7500 {
7501 const int size = int_size_in_bytes (type);
7502 gcc_assert (size <= 16);
7503
7504 return function_arg_vector_value (size, slotno, named, regno);
7505 }
7506
7507 /* v9 fp args in reg slots beyond the int reg slots get passed in regs
7508 but also have the slot allocated for them.
7509 If no prototype is in scope fp values in register slots get passed
7510 in two places, either fp regs and int regs or fp regs and memory. */
7511 else if ((mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT)
7512 && SPARC_FP_REG_P (regno))
7513 {
7514 rtx reg = gen_rtx_REG (mode, regno);
7515 if (cum->prototype_p || cum->libcall_p)
7516 return reg;
7517 else
7518 {
7519 rtx v0, v1;
7520
7521 if ((regno - SPARC_FP_ARG_FIRST) < SPARC_INT_ARG_MAX * 2)
7522 {
7523 int intreg;
7524
7525 /* On incoming, we don't need to know that the value
7526 is passed in %f0 and %i0, and it confuses other parts
7527 causing needless spillage even on the simplest cases. */
7528 if (incoming)
7529 return reg;
7530
7531 intreg = (SPARC_OUTGOING_INT_ARG_FIRST
7532 + (regno - SPARC_FP_ARG_FIRST) / 2);
7533
7534 v0 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx);
7535 v1 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_REG (mode, intreg),
7536 const0_rtx);
7537 return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1));
7538 }
7539 else
7540 {
7541 v0 = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
7542 v1 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx);
7543 return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1));
7544 }
7545 }
7546 }
7547
7548 /* All other aggregate types are passed in an integer register in a mode
7549 corresponding to the size of the type. */
7550 else if (type && AGGREGATE_TYPE_P (type))
7551 {
7552 const int size = int_size_in_bytes (type);
7553 gcc_assert (size <= 16);
7554
7555 mode = int_mode_for_size (size * BITS_PER_UNIT, 0).else_blk ();
7556 }
7557
7558 return gen_rtx_REG (mode, regno);
7559 }
7560
7561 /* Handle the TARGET_FUNCTION_ARG target hook. */
7562
7563 static rtx
sparc_function_arg(cumulative_args_t cum,const function_arg_info & arg)7564 sparc_function_arg (cumulative_args_t cum, const function_arg_info &arg)
7565 {
7566 return sparc_function_arg_1 (cum, arg, false);
7567 }
7568
7569 /* Handle the TARGET_FUNCTION_INCOMING_ARG target hook. */
7570
7571 static rtx
sparc_function_incoming_arg(cumulative_args_t cum,const function_arg_info & arg)7572 sparc_function_incoming_arg (cumulative_args_t cum,
7573 const function_arg_info &arg)
7574 {
7575 return sparc_function_arg_1 (cum, arg, true);
7576 }
7577
7578 /* For sparc64, objects requiring 16 byte alignment are passed that way. */
7579
7580 static unsigned int
sparc_function_arg_boundary(machine_mode mode,const_tree type)7581 sparc_function_arg_boundary (machine_mode mode, const_tree type)
7582 {
7583 return ((TARGET_ARCH64
7584 && (GET_MODE_ALIGNMENT (mode) == 128
7585 || (type && TYPE_ALIGN (type) == 128)))
7586 ? 128
7587 : PARM_BOUNDARY);
7588 }
7589
7590 /* For an arg passed partly in registers and partly in memory,
7591 this is the number of bytes of registers used.
7592 For args passed entirely in registers or entirely in memory, zero.
7593
7594 Any arg that starts in the first 6 regs but won't entirely fit in them
7595 needs partial registers on v8. On v9, structures with integer
7596 values in arg slots 5,6 will be passed in %o5 and SP+176, and complex fp
7597 values that begin in the last fp reg [where "last fp reg" varies with the
7598 mode] will be split between that reg and memory. */
7599
7600 static int
sparc_arg_partial_bytes(cumulative_args_t cum,const function_arg_info & arg)7601 sparc_arg_partial_bytes (cumulative_args_t cum, const function_arg_info &arg)
7602 {
7603 int slotno, regno, padding;
7604
7605 /* We pass false for incoming here, it doesn't matter. */
7606 slotno = function_arg_slotno (get_cumulative_args (cum), arg.mode, arg.type,
7607 arg.named, false, ®no, &padding);
7608
7609 if (slotno == -1)
7610 return 0;
7611
7612 if (TARGET_ARCH32)
7613 {
7614 /* We are guaranteed by pass_by_reference that the size of the
7615 argument is not greater than 8 bytes, so we only need to return
7616 one word if the argument is partially passed in registers. */
7617 const int size = GET_MODE_SIZE (arg.mode);
7618
7619 if (size > UNITS_PER_WORD && slotno == SPARC_INT_ARG_MAX - 1)
7620 return UNITS_PER_WORD;
7621 }
7622 else
7623 {
7624 /* We are guaranteed by pass_by_reference that the size of the
7625 argument is not greater than 16 bytes, so we only need to return
7626 one word if the argument is partially passed in registers. */
7627 if (arg.aggregate_type_p ())
7628 {
7629 const int size = int_size_in_bytes (arg.type);
7630
7631 if (size > UNITS_PER_WORD
7632 && (slotno == SPARC_INT_ARG_MAX - 1
7633 || slotno == SPARC_FP_ARG_MAX - 1))
7634 return UNITS_PER_WORD;
7635 }
7636 else if (GET_MODE_CLASS (arg.mode) == MODE_COMPLEX_INT
7637 || ((GET_MODE_CLASS (arg.mode) == MODE_COMPLEX_FLOAT
7638 || (arg.type && VECTOR_TYPE_P (arg.type)))
7639 && !(TARGET_FPU && arg.named)))
7640 {
7641 const int size = (arg.type && VECTOR_FLOAT_TYPE_P (arg.type))
7642 ? int_size_in_bytes (arg.type)
7643 : GET_MODE_SIZE (arg.mode);
7644
7645 if (size > UNITS_PER_WORD && slotno == SPARC_INT_ARG_MAX - 1)
7646 return UNITS_PER_WORD;
7647 }
7648 else if (GET_MODE_CLASS (arg.mode) == MODE_COMPLEX_FLOAT
7649 || (arg.type && VECTOR_TYPE_P (arg.type)))
7650 {
7651 const int size = (arg.type && VECTOR_FLOAT_TYPE_P (arg.type))
7652 ? int_size_in_bytes (arg.type)
7653 : GET_MODE_SIZE (arg.mode);
7654
7655 if (size > UNITS_PER_WORD && slotno == SPARC_FP_ARG_MAX - 1)
7656 return UNITS_PER_WORD;
7657 }
7658 }
7659
7660 return 0;
7661 }
7662
7663 /* Handle the TARGET_FUNCTION_ARG_ADVANCE hook.
7664 Update the data in CUM to advance over argument ARG. */
7665
7666 static void
sparc_function_arg_advance(cumulative_args_t cum_v,const function_arg_info & arg)7667 sparc_function_arg_advance (cumulative_args_t cum_v,
7668 const function_arg_info &arg)
7669 {
7670 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7671 tree type = arg.type;
7672 machine_mode mode = arg.mode;
7673 int regno, padding;
7674
7675 /* We pass false for incoming here, it doesn't matter. */
7676 function_arg_slotno (cum, mode, type, arg.named, false, ®no, &padding);
7677
7678 /* If argument requires leading padding, add it. */
7679 cum->words += padding;
7680
7681 if (TARGET_ARCH32)
7682 cum->words += CEIL_NWORDS (GET_MODE_SIZE (mode));
7683 else
7684 {
7685 /* For types that can have BLKmode, get the size from the type. */
7686 if (type && (AGGREGATE_TYPE_P (type) || VECTOR_FLOAT_TYPE_P (type)))
7687 {
7688 const int size = int_size_in_bytes (type);
7689
7690 /* See comment in function_arg_record_value for empty structures. */
7691 if (size <= 0)
7692 cum->words++;
7693 else
7694 cum->words += CEIL_NWORDS (size);
7695 }
7696 else
7697 cum->words += CEIL_NWORDS (GET_MODE_SIZE (mode));
7698 }
7699 }
7700
7701 /* Implement TARGET_FUNCTION_ARG_PADDING. For the 64-bit ABI structs
7702 are always stored left shifted in their argument slot. */
7703
7704 static pad_direction
sparc_function_arg_padding(machine_mode mode,const_tree type)7705 sparc_function_arg_padding (machine_mode mode, const_tree type)
7706 {
7707 if (TARGET_ARCH64 && type && AGGREGATE_TYPE_P (type))
7708 return PAD_UPWARD;
7709
7710 /* Fall back to the default. */
7711 return default_function_arg_padding (mode, type);
7712 }
7713
7714 /* Handle the TARGET_RETURN_IN_MEMORY target hook.
7715 Specify whether to return the return value in memory. */
7716
7717 static bool
sparc_return_in_memory(const_tree type,const_tree fntype ATTRIBUTE_UNUSED)7718 sparc_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
7719 {
7720 if (TARGET_ARCH32)
7721 /* Original SPARC 32-bit ABI says that structures and unions, and
7722 quad-precision floats are returned in memory. But note that the
7723 first part is implemented through -fpcc-struct-return being the
7724 default, so here we only implement -freg-struct-return instead.
7725 All other base types are returned in registers.
7726
7727 Extended ABI (as implemented by the Sun compiler) says that
7728 all complex floats are returned in registers (8 FP registers
7729 at most for '_Complex long double'). Return all complex integers
7730 in registers (4 at most for '_Complex long long').
7731
7732 Vector ABI (as implemented by the Sun VIS SDK) says that vector
7733 integers are returned like floats of the same size, that is in
7734 registers up to 8 bytes and in memory otherwise. Return all
7735 vector floats in memory like structure and unions; note that
7736 they always have BLKmode like the latter. */
7737 return (TYPE_MODE (type) == BLKmode
7738 || TYPE_MODE (type) == TFmode
7739 || (TREE_CODE (type) == VECTOR_TYPE
7740 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8));
7741 else
7742 /* Original SPARC 64-bit ABI says that structures and unions
7743 smaller than 32 bytes are returned in registers, as well as
7744 all other base types.
7745
7746 Extended ABI (as implemented by the Sun compiler) says that all
7747 complex floats are returned in registers (8 FP registers at most
7748 for '_Complex long double'). Return all complex integers in
7749 registers (4 at most for '_Complex TItype').
7750
7751 Vector ABI (as implemented by the Sun VIS SDK) says that vector
7752 integers are returned like floats of the same size, that is in
7753 registers. Return all vector floats like structure and unions;
7754 note that they always have BLKmode like the latter. */
7755 return (TYPE_MODE (type) == BLKmode
7756 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 32);
7757 }
7758
7759 /* Handle the TARGET_STRUCT_VALUE target hook.
7760 Return where to find the structure return value address. */
7761
7762 static rtx
sparc_struct_value_rtx(tree fndecl,int incoming)7763 sparc_struct_value_rtx (tree fndecl, int incoming)
7764 {
7765 if (TARGET_ARCH64)
7766 return NULL_RTX;
7767 else
7768 {
7769 rtx mem;
7770
7771 if (incoming)
7772 mem = gen_frame_mem (Pmode, plus_constant (Pmode, frame_pointer_rtx,
7773 STRUCT_VALUE_OFFSET));
7774 else
7775 mem = gen_frame_mem (Pmode, plus_constant (Pmode, stack_pointer_rtx,
7776 STRUCT_VALUE_OFFSET));
7777
7778 /* Only follow the SPARC ABI for fixed-size structure returns.
7779 Variable size structure returns are handled per the normal
7780 procedures in GCC. This is enabled by -mstd-struct-return */
7781 if (incoming == 2
7782 && sparc_std_struct_return
7783 && TYPE_SIZE_UNIT (TREE_TYPE (fndecl))
7784 && TREE_CODE (TYPE_SIZE_UNIT (TREE_TYPE (fndecl))) == INTEGER_CST)
7785 {
7786 /* We must check and adjust the return address, as it is optional
7787 as to whether the return object is really provided. */
7788 rtx ret_reg = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
7789 rtx scratch = gen_reg_rtx (SImode);
7790 rtx_code_label *endlab = gen_label_rtx ();
7791
7792 /* Calculate the return object size. */
7793 tree size = TYPE_SIZE_UNIT (TREE_TYPE (fndecl));
7794 rtx size_rtx = GEN_INT (TREE_INT_CST_LOW (size) & 0xfff);
7795 /* Construct a temporary return value. */
7796 rtx temp_val
7797 = assign_stack_local (Pmode, TREE_INT_CST_LOW (size), 0);
7798
7799 /* Implement SPARC 32-bit psABI callee return struct checking:
7800
7801 Fetch the instruction where we will return to and see if
7802 it's an unimp instruction (the most significant 10 bits
7803 will be zero). */
7804 emit_move_insn (scratch, gen_rtx_MEM (SImode,
7805 plus_constant (Pmode,
7806 ret_reg, 8)));
7807 /* Assume the size is valid and pre-adjust. */
7808 emit_insn (gen_add3_insn (ret_reg, ret_reg, GEN_INT (4)));
7809 emit_cmp_and_jump_insns (scratch, size_rtx, EQ, const0_rtx, SImode,
7810 0, endlab);
7811 emit_insn (gen_sub3_insn (ret_reg, ret_reg, GEN_INT (4)));
7812 /* Write the address of the memory pointed to by temp_val into
7813 the memory pointed to by mem. */
7814 emit_move_insn (mem, XEXP (temp_val, 0));
7815 emit_label (endlab);
7816 }
7817
7818 return mem;
7819 }
7820 }
7821
7822 /* Handle TARGET_FUNCTION_VALUE, and TARGET_LIBCALL_VALUE target hook.
7823 For v9, function return values are subject to the same rules as arguments,
7824 except that up to 32 bytes may be returned in registers. */
7825
7826 static rtx
sparc_function_value_1(const_tree type,machine_mode mode,bool outgoing)7827 sparc_function_value_1 (const_tree type, machine_mode mode, bool outgoing)
7828 {
7829 /* Beware that the two values are swapped here wrt function_arg. */
7830 const int regbase
7831 = outgoing ? SPARC_INCOMING_INT_ARG_FIRST : SPARC_OUTGOING_INT_ARG_FIRST;
7832 enum mode_class mclass = GET_MODE_CLASS (mode);
7833 int regno;
7834
7835 /* Integer vectors are handled like floats as per the Sun VIS SDK.
7836 Note that integer vectors larger than 16 bytes have BLKmode so
7837 they need to be handled like floating-point vectors below. */
7838 if (type && VECTOR_INTEGER_TYPE_P (type) && mode != BLKmode)
7839 mclass = MODE_FLOAT;
7840
7841 if (TARGET_ARCH64 && type)
7842 {
7843 /* Structures up to 32 bytes in size are returned in registers. */
7844 if (TREE_CODE (type) == RECORD_TYPE)
7845 {
7846 const int size = int_size_in_bytes (type);
7847 gcc_assert (size <= 32);
7848
7849 return function_arg_record_value (type, mode, 0, true, regbase);
7850 }
7851
7852 /* Unions up to 32 bytes in size are returned in integer registers. */
7853 else if (TREE_CODE (type) == UNION_TYPE)
7854 {
7855 const int size = int_size_in_bytes (type);
7856 gcc_assert (size <= 32);
7857
7858 return function_arg_union_value (size, mode, 0, regbase);
7859 }
7860
7861 /* Vectors up to 32 bytes are returned in FP registers. */
7862 else if (VECTOR_TYPE_P (type) && mode == BLKmode)
7863 {
7864 const int size = int_size_in_bytes (type);
7865 gcc_assert (size <= 32);
7866
7867 return function_arg_vector_value (size, 0, true, SPARC_FP_ARG_FIRST);
7868 }
7869
7870 /* Objects that require it are returned in FP registers. */
7871 else if (mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT)
7872 ;
7873
7874 /* All other aggregate types are returned in an integer register in a
7875 mode corresponding to the size of the type. */
7876 else if (AGGREGATE_TYPE_P (type))
7877 {
7878 /* All other aggregate types are passed in an integer register
7879 in a mode corresponding to the size of the type. */
7880 const int size = int_size_in_bytes (type);
7881 gcc_assert (size <= 32);
7882
7883 mode = int_mode_for_size (size * BITS_PER_UNIT, 0).else_blk ();
7884
7885 /* ??? We probably should have made the same ABI change in
7886 3.4.0 as the one we made for unions. The latter was
7887 required by the SCD though, while the former is not
7888 specified, so we favored compatibility and efficiency.
7889
7890 Now we're stuck for aggregates larger than 16 bytes,
7891 because OImode vanished in the meantime. Let's not
7892 try to be unduly clever, and simply follow the ABI
7893 for unions in that case. */
7894 if (mode == BLKmode)
7895 return function_arg_union_value (size, mode, 0, regbase);
7896 else
7897 mclass = MODE_INT;
7898 }
7899
7900 /* We should only have pointer and integer types at this point. This
7901 must match sparc_promote_function_mode. */
7902 else if (mclass == MODE_INT && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
7903 mode = word_mode;
7904 }
7905
7906 /* We should only have pointer and integer types at this point, except with
7907 -freg-struct-return. This must match sparc_promote_function_mode. */
7908 else if (TARGET_ARCH32
7909 && !(type && AGGREGATE_TYPE_P (type))
7910 && mclass == MODE_INT
7911 && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
7912 mode = word_mode;
7913
7914 if ((mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT) && TARGET_FPU)
7915 regno = SPARC_FP_ARG_FIRST;
7916 else
7917 regno = regbase;
7918
7919 return gen_rtx_REG (mode, regno);
7920 }
7921
7922 /* Handle TARGET_FUNCTION_VALUE.
7923 On the SPARC, the value is found in the first "output" register, but the
7924 called function leaves it in the first "input" register. */
7925
7926 static rtx
sparc_function_value(const_tree valtype,const_tree fn_decl_or_type ATTRIBUTE_UNUSED,bool outgoing)7927 sparc_function_value (const_tree valtype,
7928 const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
7929 bool outgoing)
7930 {
7931 return sparc_function_value_1 (valtype, TYPE_MODE (valtype), outgoing);
7932 }
7933
7934 /* Handle TARGET_LIBCALL_VALUE. */
7935
7936 static rtx
sparc_libcall_value(machine_mode mode,const_rtx fun ATTRIBUTE_UNUSED)7937 sparc_libcall_value (machine_mode mode,
7938 const_rtx fun ATTRIBUTE_UNUSED)
7939 {
7940 return sparc_function_value_1 (NULL_TREE, mode, false);
7941 }
7942
7943 /* Handle FUNCTION_VALUE_REGNO_P.
7944 On the SPARC, the first "output" reg is used for integer values, and the
7945 first floating point register is used for floating point values. */
7946
7947 static bool
sparc_function_value_regno_p(const unsigned int regno)7948 sparc_function_value_regno_p (const unsigned int regno)
7949 {
7950 return (regno == 8 || (TARGET_FPU && regno == 32));
7951 }
7952
7953 /* Do what is necessary for `va_start'. We look at the current function
7954 to determine if stdarg or varargs is used and return the address of
7955 the first unnamed parameter. */
7956
7957 static rtx
sparc_builtin_saveregs(void)7958 sparc_builtin_saveregs (void)
7959 {
7960 int first_reg = crtl->args.info.words;
7961 rtx address;
7962 int regno;
7963
7964 for (regno = first_reg; regno < SPARC_INT_ARG_MAX; regno++)
7965 emit_move_insn (gen_rtx_MEM (word_mode,
7966 gen_rtx_PLUS (Pmode,
7967 frame_pointer_rtx,
7968 GEN_INT (FIRST_PARM_OFFSET (0)
7969 + (UNITS_PER_WORD
7970 * regno)))),
7971 gen_rtx_REG (word_mode,
7972 SPARC_INCOMING_INT_ARG_FIRST + regno));
7973
7974 address = gen_rtx_PLUS (Pmode,
7975 frame_pointer_rtx,
7976 GEN_INT (FIRST_PARM_OFFSET (0)
7977 + UNITS_PER_WORD * first_reg));
7978
7979 return address;
7980 }
7981
7982 /* Implement `va_start' for stdarg. */
7983
7984 static void
sparc_va_start(tree valist,rtx nextarg)7985 sparc_va_start (tree valist, rtx nextarg)
7986 {
7987 nextarg = expand_builtin_saveregs ();
7988 std_expand_builtin_va_start (valist, nextarg);
7989 }
7990
7991 /* Implement `va_arg' for stdarg. */
7992
7993 static tree
sparc_gimplify_va_arg(tree valist,tree type,gimple_seq * pre_p,gimple_seq * post_p)7994 sparc_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
7995 gimple_seq *post_p)
7996 {
7997 HOST_WIDE_INT size, rsize, align;
7998 tree addr, incr;
7999 bool indirect;
8000 tree ptrtype = build_pointer_type (type);
8001
8002 if (pass_va_arg_by_reference (type))
8003 {
8004 indirect = true;
8005 size = rsize = UNITS_PER_WORD;
8006 align = 0;
8007 }
8008 else
8009 {
8010 indirect = false;
8011 size = int_size_in_bytes (type);
8012 rsize = ROUND_UP (size, UNITS_PER_WORD);
8013 align = 0;
8014
8015 if (TARGET_ARCH64)
8016 {
8017 /* For SPARC64, objects requiring 16-byte alignment get it. */
8018 if (TYPE_ALIGN (type) >= 2 * (unsigned) BITS_PER_WORD)
8019 align = 2 * UNITS_PER_WORD;
8020
8021 /* SPARC-V9 ABI states that structures up to 16 bytes in size
8022 are left-justified in their slots. */
8023 if (AGGREGATE_TYPE_P (type))
8024 {
8025 if (size == 0)
8026 size = rsize = UNITS_PER_WORD;
8027 else
8028 size = rsize;
8029 }
8030 }
8031 }
8032
8033 incr = valist;
8034 if (align)
8035 {
8036 incr = fold_build_pointer_plus_hwi (incr, align - 1);
8037 incr = fold_convert (sizetype, incr);
8038 incr = fold_build2 (BIT_AND_EXPR, sizetype, incr,
8039 size_int (-align));
8040 incr = fold_convert (ptr_type_node, incr);
8041 }
8042
8043 gimplify_expr (&incr, pre_p, post_p, is_gimple_val, fb_rvalue);
8044 addr = incr;
8045
8046 if (BYTES_BIG_ENDIAN && size < rsize)
8047 addr = fold_build_pointer_plus_hwi (incr, rsize - size);
8048
8049 if (indirect)
8050 {
8051 addr = fold_convert (build_pointer_type (ptrtype), addr);
8052 addr = build_va_arg_indirect_ref (addr);
8053 }
8054
8055 /* If the address isn't aligned properly for the type, we need a temporary.
8056 FIXME: This is inefficient, usually we can do this in registers. */
8057 else if (align == 0 && TYPE_ALIGN (type) > BITS_PER_WORD)
8058 {
8059 tree tmp = create_tmp_var (type, "va_arg_tmp");
8060 tree dest_addr = build_fold_addr_expr (tmp);
8061 tree copy = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
8062 3, dest_addr, addr, size_int (rsize));
8063 TREE_ADDRESSABLE (tmp) = 1;
8064 gimplify_and_add (copy, pre_p);
8065 addr = dest_addr;
8066 }
8067
8068 else
8069 addr = fold_convert (ptrtype, addr);
8070
8071 incr = fold_build_pointer_plus_hwi (incr, rsize);
8072 gimplify_assign (valist, incr, post_p);
8073
8074 return build_va_arg_indirect_ref (addr);
8075 }
8076
8077 /* Implement the TARGET_VECTOR_MODE_SUPPORTED_P target hook.
8078 Specify whether the vector mode is supported by the hardware. */
8079
8080 static bool
sparc_vector_mode_supported_p(machine_mode mode)8081 sparc_vector_mode_supported_p (machine_mode mode)
8082 {
8083 return TARGET_VIS && VECTOR_MODE_P (mode) ? true : false;
8084 }
8085
8086 /* Implement the TARGET_VECTORIZE_PREFERRED_SIMD_MODE target hook. */
8087
8088 static machine_mode
sparc_preferred_simd_mode(scalar_mode mode)8089 sparc_preferred_simd_mode (scalar_mode mode)
8090 {
8091 if (TARGET_VIS)
8092 switch (mode)
8093 {
8094 case E_SImode:
8095 return V2SImode;
8096 case E_HImode:
8097 return V4HImode;
8098 case E_QImode:
8099 return V8QImode;
8100
8101 default:;
8102 }
8103
8104 return word_mode;
8105 }
8106
8107 /* Implement TARGET_CAN_FOLLOW_JUMP. */
8108
8109 static bool
sparc_can_follow_jump(const rtx_insn * follower,const rtx_insn * followee)8110 sparc_can_follow_jump (const rtx_insn *follower, const rtx_insn *followee)
8111 {
8112 /* Do not fold unconditional jumps that have been created for crossing
8113 partition boundaries. */
8114 if (CROSSING_JUMP_P (followee) && !CROSSING_JUMP_P (follower))
8115 return false;
8116
8117 return true;
8118 }
8119
8120 /* Return the string to output an unconditional branch to LABEL, which is
8121 the operand number of the label.
8122
8123 DEST is the destination insn (i.e. the label), INSN is the source. */
8124
8125 const char *
output_ubranch(rtx dest,rtx_insn * insn)8126 output_ubranch (rtx dest, rtx_insn *insn)
8127 {
8128 static char string[64];
8129 bool v9_form = false;
8130 int delta;
8131 char *p;
8132
8133 /* Even if we are trying to use cbcond for this, evaluate
8134 whether we can use V9 branches as our backup plan. */
8135 delta = 5000000;
8136 if (!CROSSING_JUMP_P (insn) && INSN_ADDRESSES_SET_P ())
8137 delta = (INSN_ADDRESSES (INSN_UID (dest))
8138 - INSN_ADDRESSES (INSN_UID (insn)));
8139
8140 /* Leave some instructions for "slop". */
8141 if (TARGET_V9 && delta >= -260000 && delta < 260000)
8142 v9_form = true;
8143
8144 if (TARGET_CBCOND)
8145 {
8146 bool emit_nop = emit_cbcond_nop (insn);
8147 bool far = false;
8148 const char *rval;
8149
8150 if (delta < -500 || delta > 500)
8151 far = true;
8152
8153 if (far)
8154 {
8155 if (v9_form)
8156 rval = "ba,a,pt\t%%xcc, %l0";
8157 else
8158 rval = "b,a\t%l0";
8159 }
8160 else
8161 {
8162 if (emit_nop)
8163 rval = "cwbe\t%%g0, %%g0, %l0\n\tnop";
8164 else
8165 rval = "cwbe\t%%g0, %%g0, %l0";
8166 }
8167 return rval;
8168 }
8169
8170 if (v9_form)
8171 strcpy (string, "ba%*,pt\t%%xcc, ");
8172 else
8173 strcpy (string, "b%*\t");
8174
8175 p = strchr (string, '\0');
8176 *p++ = '%';
8177 *p++ = 'l';
8178 *p++ = '0';
8179 *p++ = '%';
8180 *p++ = '(';
8181 *p = '\0';
8182
8183 return string;
8184 }
8185
8186 /* Return the string to output a conditional branch to LABEL, which is
8187 the operand number of the label. OP is the conditional expression.
8188 XEXP (OP, 0) is assumed to be a condition code register (integer or
8189 floating point) and its mode specifies what kind of comparison we made.
8190
8191 DEST is the destination insn (i.e. the label), INSN is the source.
8192
8193 REVERSED is nonzero if we should reverse the sense of the comparison.
8194
8195 ANNUL is nonzero if we should generate an annulling branch. */
8196
8197 const char *
output_cbranch(rtx op,rtx dest,int label,int reversed,int annul,rtx_insn * insn)8198 output_cbranch (rtx op, rtx dest, int label, int reversed, int annul,
8199 rtx_insn *insn)
8200 {
8201 static char string[64];
8202 enum rtx_code code = GET_CODE (op);
8203 rtx cc_reg = XEXP (op, 0);
8204 machine_mode mode = GET_MODE (cc_reg);
8205 const char *labelno, *branch;
8206 int spaces = 8, far;
8207 char *p;
8208
8209 /* v9 branches are limited to +-1MB. If it is too far away,
8210 change
8211
8212 bne,pt %xcc, .LC30
8213
8214 to
8215
8216 be,pn %xcc, .+12
8217 nop
8218 ba .LC30
8219
8220 and
8221
8222 fbne,a,pn %fcc2, .LC29
8223
8224 to
8225
8226 fbe,pt %fcc2, .+16
8227 nop
8228 ba .LC29 */
8229
8230 far = TARGET_V9 && (get_attr_length (insn) >= 3);
8231 if (reversed ^ far)
8232 {
8233 /* Reversal of FP compares takes care -- an ordered compare
8234 becomes an unordered compare and vice versa. */
8235 if (mode == CCFPmode || mode == CCFPEmode)
8236 code = reverse_condition_maybe_unordered (code);
8237 else
8238 code = reverse_condition (code);
8239 }
8240
8241 /* Start by writing the branch condition. */
8242 if (mode == CCFPmode || mode == CCFPEmode)
8243 {
8244 switch (code)
8245 {
8246 case NE:
8247 branch = "fbne";
8248 break;
8249 case EQ:
8250 branch = "fbe";
8251 break;
8252 case GE:
8253 branch = "fbge";
8254 break;
8255 case GT:
8256 branch = "fbg";
8257 break;
8258 case LE:
8259 branch = "fble";
8260 break;
8261 case LT:
8262 branch = "fbl";
8263 break;
8264 case UNORDERED:
8265 branch = "fbu";
8266 break;
8267 case ORDERED:
8268 branch = "fbo";
8269 break;
8270 case UNGT:
8271 branch = "fbug";
8272 break;
8273 case UNLT:
8274 branch = "fbul";
8275 break;
8276 case UNEQ:
8277 branch = "fbue";
8278 break;
8279 case UNGE:
8280 branch = "fbuge";
8281 break;
8282 case UNLE:
8283 branch = "fbule";
8284 break;
8285 case LTGT:
8286 branch = "fblg";
8287 break;
8288 default:
8289 gcc_unreachable ();
8290 }
8291
8292 /* ??? !v9: FP branches cannot be preceded by another floating point
8293 insn. Because there is currently no concept of pre-delay slots,
8294 we can fix this only by always emitting a nop before a floating
8295 point branch. */
8296
8297 string[0] = '\0';
8298 if (! TARGET_V9)
8299 strcpy (string, "nop\n\t");
8300 strcat (string, branch);
8301 }
8302 else
8303 {
8304 switch (code)
8305 {
8306 case NE:
8307 if (mode == CCVmode || mode == CCXVmode)
8308 branch = "bvs";
8309 else
8310 branch = "bne";
8311 break;
8312 case EQ:
8313 if (mode == CCVmode || mode == CCXVmode)
8314 branch = "bvc";
8315 else
8316 branch = "be";
8317 break;
8318 case GE:
8319 if (mode == CCNZmode || mode == CCXNZmode)
8320 branch = "bpos";
8321 else
8322 branch = "bge";
8323 break;
8324 case GT:
8325 branch = "bg";
8326 break;
8327 case LE:
8328 branch = "ble";
8329 break;
8330 case LT:
8331 if (mode == CCNZmode || mode == CCXNZmode)
8332 branch = "bneg";
8333 else
8334 branch = "bl";
8335 break;
8336 case GEU:
8337 branch = "bgeu";
8338 break;
8339 case GTU:
8340 branch = "bgu";
8341 break;
8342 case LEU:
8343 branch = "bleu";
8344 break;
8345 case LTU:
8346 branch = "blu";
8347 break;
8348 default:
8349 gcc_unreachable ();
8350 }
8351 strcpy (string, branch);
8352 }
8353 spaces -= strlen (branch);
8354 p = strchr (string, '\0');
8355
8356 /* Now add the annulling, the label, and a possible noop. */
8357 if (annul && ! far)
8358 {
8359 strcpy (p, ",a");
8360 p += 2;
8361 spaces -= 2;
8362 }
8363
8364 if (TARGET_V9)
8365 {
8366 rtx note;
8367 int v8 = 0;
8368
8369 if (! far && insn && INSN_ADDRESSES_SET_P ())
8370 {
8371 int delta = (INSN_ADDRESSES (INSN_UID (dest))
8372 - INSN_ADDRESSES (INSN_UID (insn)));
8373 /* Leave some instructions for "slop". */
8374 if (delta < -260000 || delta >= 260000)
8375 v8 = 1;
8376 }
8377
8378 switch (mode)
8379 {
8380 case E_CCmode:
8381 case E_CCNZmode:
8382 case E_CCCmode:
8383 case E_CCVmode:
8384 labelno = "%%icc, ";
8385 if (v8)
8386 labelno = "";
8387 break;
8388 case E_CCXmode:
8389 case E_CCXNZmode:
8390 case E_CCXCmode:
8391 case E_CCXVmode:
8392 labelno = "%%xcc, ";
8393 gcc_assert (!v8);
8394 break;
8395 case E_CCFPmode:
8396 case E_CCFPEmode:
8397 {
8398 static char v9_fcc_labelno[] = "%%fccX, ";
8399 /* Set the char indicating the number of the fcc reg to use. */
8400 v9_fcc_labelno[5] = REGNO (cc_reg) - SPARC_FIRST_V9_FCC_REG + '0';
8401 labelno = v9_fcc_labelno;
8402 if (v8)
8403 {
8404 gcc_assert (REGNO (cc_reg) == SPARC_FCC_REG);
8405 labelno = "";
8406 }
8407 }
8408 break;
8409 default:
8410 gcc_unreachable ();
8411 }
8412
8413 if (*labelno && insn && (note = find_reg_note (insn, REG_BR_PROB, NULL_RTX)))
8414 {
8415 strcpy (p,
8416 ((profile_probability::from_reg_br_prob_note (XINT (note, 0))
8417 >= profile_probability::even ()) ^ far)
8418 ? ",pt" : ",pn");
8419 p += 3;
8420 spaces -= 3;
8421 }
8422 }
8423 else
8424 labelno = "";
8425
8426 if (spaces > 0)
8427 *p++ = '\t';
8428 else
8429 *p++ = ' ';
8430 strcpy (p, labelno);
8431 p = strchr (p, '\0');
8432 if (far)
8433 {
8434 strcpy (p, ".+12\n\t nop\n\tb\t");
8435 /* Skip the next insn if requested or
8436 if we know that it will be a nop. */
8437 if (annul || ! final_sequence)
8438 p[3] = '6';
8439 p += 14;
8440 }
8441 *p++ = '%';
8442 *p++ = 'l';
8443 *p++ = label + '0';
8444 *p++ = '%';
8445 *p++ = '#';
8446 *p = '\0';
8447
8448 return string;
8449 }
8450
8451 /* Emit a library call comparison between floating point X and Y.
8452 COMPARISON is the operator to compare with (EQ, NE, GT, etc).
8453 Return the new operator to be used in the comparison sequence.
8454
8455 TARGET_ARCH64 uses _Qp_* functions, which use pointers to TFmode
8456 values as arguments instead of the TFmode registers themselves,
8457 that's why we cannot call emit_float_lib_cmp. */
8458
8459 rtx
sparc_emit_float_lib_cmp(rtx x,rtx y,enum rtx_code comparison)8460 sparc_emit_float_lib_cmp (rtx x, rtx y, enum rtx_code comparison)
8461 {
8462 const char *qpfunc;
8463 rtx slot0, slot1, result, tem, tem2, libfunc;
8464 machine_mode mode;
8465 enum rtx_code new_comparison;
8466
8467 switch (comparison)
8468 {
8469 case EQ:
8470 qpfunc = (TARGET_ARCH64 ? "_Qp_feq" : "_Q_feq");
8471 break;
8472
8473 case NE:
8474 qpfunc = (TARGET_ARCH64 ? "_Qp_fne" : "_Q_fne");
8475 break;
8476
8477 case GT:
8478 qpfunc = (TARGET_ARCH64 ? "_Qp_fgt" : "_Q_fgt");
8479 break;
8480
8481 case GE:
8482 qpfunc = (TARGET_ARCH64 ? "_Qp_fge" : "_Q_fge");
8483 break;
8484
8485 case LT:
8486 qpfunc = (TARGET_ARCH64 ? "_Qp_flt" : "_Q_flt");
8487 break;
8488
8489 case LE:
8490 qpfunc = (TARGET_ARCH64 ? "_Qp_fle" : "_Q_fle");
8491 break;
8492
8493 case ORDERED:
8494 case UNORDERED:
8495 case UNGT:
8496 case UNLT:
8497 case UNEQ:
8498 case UNGE:
8499 case UNLE:
8500 case LTGT:
8501 qpfunc = (TARGET_ARCH64 ? "_Qp_cmp" : "_Q_cmp");
8502 break;
8503
8504 default:
8505 gcc_unreachable ();
8506 }
8507
8508 if (TARGET_ARCH64)
8509 {
8510 if (MEM_P (x))
8511 {
8512 tree expr = MEM_EXPR (x);
8513 if (expr)
8514 mark_addressable (expr);
8515 slot0 = x;
8516 }
8517 else
8518 {
8519 slot0 = assign_stack_temp (TFmode, GET_MODE_SIZE(TFmode));
8520 emit_move_insn (slot0, x);
8521 }
8522
8523 if (MEM_P (y))
8524 {
8525 tree expr = MEM_EXPR (y);
8526 if (expr)
8527 mark_addressable (expr);
8528 slot1 = y;
8529 }
8530 else
8531 {
8532 slot1 = assign_stack_temp (TFmode, GET_MODE_SIZE(TFmode));
8533 emit_move_insn (slot1, y);
8534 }
8535
8536 libfunc = gen_rtx_SYMBOL_REF (Pmode, qpfunc);
8537 emit_library_call (libfunc, LCT_NORMAL,
8538 DImode,
8539 XEXP (slot0, 0), Pmode,
8540 XEXP (slot1, 0), Pmode);
8541 mode = DImode;
8542 }
8543 else
8544 {
8545 libfunc = gen_rtx_SYMBOL_REF (Pmode, qpfunc);
8546 emit_library_call (libfunc, LCT_NORMAL,
8547 SImode,
8548 x, TFmode, y, TFmode);
8549 mode = SImode;
8550 }
8551
8552
8553 /* Immediately move the result of the libcall into a pseudo
8554 register so reload doesn't clobber the value if it needs
8555 the return register for a spill reg. */
8556 result = gen_reg_rtx (mode);
8557 emit_move_insn (result, hard_libcall_value (mode, libfunc));
8558
8559 switch (comparison)
8560 {
8561 default:
8562 return gen_rtx_NE (VOIDmode, result, const0_rtx);
8563 case ORDERED:
8564 case UNORDERED:
8565 new_comparison = (comparison == UNORDERED ? EQ : NE);
8566 return gen_rtx_fmt_ee (new_comparison, VOIDmode, result, GEN_INT(3));
8567 case UNGT:
8568 case UNGE:
8569 new_comparison = (comparison == UNGT ? GT : NE);
8570 return gen_rtx_fmt_ee (new_comparison, VOIDmode, result, const1_rtx);
8571 case UNLE:
8572 return gen_rtx_NE (VOIDmode, result, const2_rtx);
8573 case UNLT:
8574 tem = gen_reg_rtx (mode);
8575 if (TARGET_ARCH32)
8576 emit_insn (gen_andsi3 (tem, result, const1_rtx));
8577 else
8578 emit_insn (gen_anddi3 (tem, result, const1_rtx));
8579 return gen_rtx_NE (VOIDmode, tem, const0_rtx);
8580 case UNEQ:
8581 case LTGT:
8582 tem = gen_reg_rtx (mode);
8583 if (TARGET_ARCH32)
8584 emit_insn (gen_addsi3 (tem, result, const1_rtx));
8585 else
8586 emit_insn (gen_adddi3 (tem, result, const1_rtx));
8587 tem2 = gen_reg_rtx (mode);
8588 if (TARGET_ARCH32)
8589 emit_insn (gen_andsi3 (tem2, tem, const2_rtx));
8590 else
8591 emit_insn (gen_anddi3 (tem2, tem, const2_rtx));
8592 new_comparison = (comparison == UNEQ ? EQ : NE);
8593 return gen_rtx_fmt_ee (new_comparison, VOIDmode, tem2, const0_rtx);
8594 }
8595
8596 gcc_unreachable ();
8597 }
8598
8599 /* Generate an unsigned DImode to FP conversion. This is the same code
8600 optabs would emit if we didn't have TFmode patterns. */
8601
8602 void
sparc_emit_floatunsdi(rtx * operands,machine_mode mode)8603 sparc_emit_floatunsdi (rtx *operands, machine_mode mode)
8604 {
8605 rtx i0, i1, f0, in, out;
8606
8607 out = operands[0];
8608 in = force_reg (DImode, operands[1]);
8609 rtx_code_label *neglab = gen_label_rtx ();
8610 rtx_code_label *donelab = gen_label_rtx ();
8611 i0 = gen_reg_rtx (DImode);
8612 i1 = gen_reg_rtx (DImode);
8613 f0 = gen_reg_rtx (mode);
8614
8615 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, DImode, 0, neglab);
8616
8617 emit_insn (gen_rtx_SET (out, gen_rtx_FLOAT (mode, in)));
8618 emit_jump_insn (gen_jump (donelab));
8619 emit_barrier ();
8620
8621 emit_label (neglab);
8622
8623 emit_insn (gen_lshrdi3 (i0, in, const1_rtx));
8624 emit_insn (gen_anddi3 (i1, in, const1_rtx));
8625 emit_insn (gen_iordi3 (i0, i0, i1));
8626 emit_insn (gen_rtx_SET (f0, gen_rtx_FLOAT (mode, i0)));
8627 emit_insn (gen_rtx_SET (out, gen_rtx_PLUS (mode, f0, f0)));
8628
8629 emit_label (donelab);
8630 }
8631
8632 /* Generate an FP to unsigned DImode conversion. This is the same code
8633 optabs would emit if we didn't have TFmode patterns. */
8634
8635 void
sparc_emit_fixunsdi(rtx * operands,machine_mode mode)8636 sparc_emit_fixunsdi (rtx *operands, machine_mode mode)
8637 {
8638 rtx i0, i1, f0, in, out, limit;
8639
8640 out = operands[0];
8641 in = force_reg (mode, operands[1]);
8642 rtx_code_label *neglab = gen_label_rtx ();
8643 rtx_code_label *donelab = gen_label_rtx ();
8644 i0 = gen_reg_rtx (DImode);
8645 i1 = gen_reg_rtx (DImode);
8646 limit = gen_reg_rtx (mode);
8647 f0 = gen_reg_rtx (mode);
8648
8649 emit_move_insn (limit,
8650 const_double_from_real_value (
8651 REAL_VALUE_ATOF ("9223372036854775808.0", mode), mode));
8652 emit_cmp_and_jump_insns (in, limit, GE, NULL_RTX, mode, 0, neglab);
8653
8654 emit_insn (gen_rtx_SET (out,
8655 gen_rtx_FIX (DImode, gen_rtx_FIX (mode, in))));
8656 emit_jump_insn (gen_jump (donelab));
8657 emit_barrier ();
8658
8659 emit_label (neglab);
8660
8661 emit_insn (gen_rtx_SET (f0, gen_rtx_MINUS (mode, in, limit)));
8662 emit_insn (gen_rtx_SET (i0,
8663 gen_rtx_FIX (DImode, gen_rtx_FIX (mode, f0))));
8664 emit_insn (gen_movdi (i1, const1_rtx));
8665 emit_insn (gen_ashldi3 (i1, i1, GEN_INT (63)));
8666 emit_insn (gen_xordi3 (out, i0, i1));
8667
8668 emit_label (donelab);
8669 }
8670
8671 /* Return the string to output a compare and branch instruction to DEST.
8672 DEST is the destination insn (i.e. the label), INSN is the source,
8673 and OP is the conditional expression. */
8674
8675 const char *
output_cbcond(rtx op,rtx dest,rtx_insn * insn)8676 output_cbcond (rtx op, rtx dest, rtx_insn *insn)
8677 {
8678 machine_mode mode = GET_MODE (XEXP (op, 0));
8679 enum rtx_code code = GET_CODE (op);
8680 const char *cond_str, *tmpl;
8681 int far, emit_nop, len;
8682 static char string[64];
8683 char size_char;
8684
8685 /* Compare and Branch is limited to +-2KB. If it is too far away,
8686 change
8687
8688 cxbne X, Y, .LC30
8689
8690 to
8691
8692 cxbe X, Y, .+16
8693 nop
8694 ba,pt xcc, .LC30
8695 nop */
8696
8697 len = get_attr_length (insn);
8698
8699 far = len == 4;
8700 emit_nop = len == 2;
8701
8702 if (far)
8703 code = reverse_condition (code);
8704
8705 size_char = ((mode == SImode) ? 'w' : 'x');
8706
8707 switch (code)
8708 {
8709 case NE:
8710 cond_str = "ne";
8711 break;
8712
8713 case EQ:
8714 cond_str = "e";
8715 break;
8716
8717 case GE:
8718 cond_str = "ge";
8719 break;
8720
8721 case GT:
8722 cond_str = "g";
8723 break;
8724
8725 case LE:
8726 cond_str = "le";
8727 break;
8728
8729 case LT:
8730 cond_str = "l";
8731 break;
8732
8733 case GEU:
8734 cond_str = "cc";
8735 break;
8736
8737 case GTU:
8738 cond_str = "gu";
8739 break;
8740
8741 case LEU:
8742 cond_str = "leu";
8743 break;
8744
8745 case LTU:
8746 cond_str = "cs";
8747 break;
8748
8749 default:
8750 gcc_unreachable ();
8751 }
8752
8753 if (far)
8754 {
8755 int veryfar = 1, delta;
8756
8757 if (INSN_ADDRESSES_SET_P ())
8758 {
8759 delta = (INSN_ADDRESSES (INSN_UID (dest))
8760 - INSN_ADDRESSES (INSN_UID (insn)));
8761 /* Leave some instructions for "slop". */
8762 if (delta >= -260000 && delta < 260000)
8763 veryfar = 0;
8764 }
8765
8766 if (veryfar)
8767 tmpl = "c%cb%s\t%%1, %%2, .+16\n\tnop\n\tb\t%%3\n\tnop";
8768 else
8769 tmpl = "c%cb%s\t%%1, %%2, .+16\n\tnop\n\tba,pt\t%%%%xcc, %%3\n\tnop";
8770 }
8771 else
8772 {
8773 if (emit_nop)
8774 tmpl = "c%cb%s\t%%1, %%2, %%3\n\tnop";
8775 else
8776 tmpl = "c%cb%s\t%%1, %%2, %%3";
8777 }
8778
8779 snprintf (string, sizeof(string), tmpl, size_char, cond_str);
8780
8781 return string;
8782 }
8783
8784 /* Return the string to output a conditional branch to LABEL, testing
8785 register REG. LABEL is the operand number of the label; REG is the
8786 operand number of the reg. OP is the conditional expression. The mode
8787 of REG says what kind of comparison we made.
8788
8789 DEST is the destination insn (i.e. the label), INSN is the source.
8790
8791 REVERSED is nonzero if we should reverse the sense of the comparison.
8792
8793 ANNUL is nonzero if we should generate an annulling branch. */
8794
8795 const char *
output_v9branch(rtx op,rtx dest,int reg,int label,int reversed,int annul,rtx_insn * insn)8796 output_v9branch (rtx op, rtx dest, int reg, int label, int reversed,
8797 int annul, rtx_insn *insn)
8798 {
8799 static char string[64];
8800 enum rtx_code code = GET_CODE (op);
8801 machine_mode mode = GET_MODE (XEXP (op, 0));
8802 rtx note;
8803 int far;
8804 char *p;
8805
8806 /* branch on register are limited to +-128KB. If it is too far away,
8807 change
8808
8809 brnz,pt %g1, .LC30
8810
8811 to
8812
8813 brz,pn %g1, .+12
8814 nop
8815 ba,pt %xcc, .LC30
8816
8817 and
8818
8819 brgez,a,pn %o1, .LC29
8820
8821 to
8822
8823 brlz,pt %o1, .+16
8824 nop
8825 ba,pt %xcc, .LC29 */
8826
8827 far = get_attr_length (insn) >= 3;
8828
8829 /* If not floating-point or if EQ or NE, we can just reverse the code. */
8830 if (reversed ^ far)
8831 code = reverse_condition (code);
8832
8833 /* Only 64-bit versions of these instructions exist. */
8834 gcc_assert (mode == DImode);
8835
8836 /* Start by writing the branch condition. */
8837
8838 switch (code)
8839 {
8840 case NE:
8841 strcpy (string, "brnz");
8842 break;
8843
8844 case EQ:
8845 strcpy (string, "brz");
8846 break;
8847
8848 case GE:
8849 strcpy (string, "brgez");
8850 break;
8851
8852 case LT:
8853 strcpy (string, "brlz");
8854 break;
8855
8856 case LE:
8857 strcpy (string, "brlez");
8858 break;
8859
8860 case GT:
8861 strcpy (string, "brgz");
8862 break;
8863
8864 default:
8865 gcc_unreachable ();
8866 }
8867
8868 p = strchr (string, '\0');
8869
8870 /* Now add the annulling, reg, label, and nop. */
8871 if (annul && ! far)
8872 {
8873 strcpy (p, ",a");
8874 p += 2;
8875 }
8876
8877 if (insn && (note = find_reg_note (insn, REG_BR_PROB, NULL_RTX)))
8878 {
8879 strcpy (p,
8880 ((profile_probability::from_reg_br_prob_note (XINT (note, 0))
8881 >= profile_probability::even ()) ^ far)
8882 ? ",pt" : ",pn");
8883 p += 3;
8884 }
8885
8886 *p = p < string + 8 ? '\t' : ' ';
8887 p++;
8888 *p++ = '%';
8889 *p++ = '0' + reg;
8890 *p++ = ',';
8891 *p++ = ' ';
8892 if (far)
8893 {
8894 int veryfar = 1, delta;
8895
8896 if (INSN_ADDRESSES_SET_P ())
8897 {
8898 delta = (INSN_ADDRESSES (INSN_UID (dest))
8899 - INSN_ADDRESSES (INSN_UID (insn)));
8900 /* Leave some instructions for "slop". */
8901 if (delta >= -260000 && delta < 260000)
8902 veryfar = 0;
8903 }
8904
8905 strcpy (p, ".+12\n\t nop\n\t");
8906 /* Skip the next insn if requested or
8907 if we know that it will be a nop. */
8908 if (annul || ! final_sequence)
8909 p[3] = '6';
8910 p += 12;
8911 if (veryfar)
8912 {
8913 strcpy (p, "b\t");
8914 p += 2;
8915 }
8916 else
8917 {
8918 strcpy (p, "ba,pt\t%%xcc, ");
8919 p += 13;
8920 }
8921 }
8922 *p++ = '%';
8923 *p++ = 'l';
8924 *p++ = '0' + label;
8925 *p++ = '%';
8926 *p++ = '#';
8927 *p = '\0';
8928
8929 return string;
8930 }
8931
8932 /* Return 1, if any of the registers of the instruction are %l[0-7] or %o[0-7].
8933 Such instructions cannot be used in the delay slot of return insn on v9.
8934 If TEST is 0, also rename all %i[0-7] registers to their %o[0-7] counterparts.
8935 */
8936
8937 static int
epilogue_renumber(register rtx * where,int test)8938 epilogue_renumber (register rtx *where, int test)
8939 {
8940 register const char *fmt;
8941 register int i;
8942 register enum rtx_code code;
8943
8944 if (*where == 0)
8945 return 0;
8946
8947 code = GET_CODE (*where);
8948
8949 switch (code)
8950 {
8951 case REG:
8952 if (REGNO (*where) >= 8 && REGNO (*where) < 24) /* oX or lX */
8953 return 1;
8954 if (! test && REGNO (*where) >= 24 && REGNO (*where) < 32)
8955 {
8956 if (ORIGINAL_REGNO (*where))
8957 {
8958 rtx n = gen_raw_REG (GET_MODE (*where),
8959 OUTGOING_REGNO (REGNO (*where)));
8960 ORIGINAL_REGNO (n) = ORIGINAL_REGNO (*where);
8961 *where = n;
8962 }
8963 else
8964 *where = gen_rtx_REG (GET_MODE (*where),
8965 OUTGOING_REGNO (REGNO (*where)));
8966 }
8967 return 0;
8968
8969 case SCRATCH:
8970 case CC0:
8971 case PC:
8972 case CONST_INT:
8973 case CONST_WIDE_INT:
8974 case CONST_DOUBLE:
8975 return 0;
8976
8977 /* Do not replace the frame pointer with the stack pointer because
8978 it can cause the delayed instruction to load below the stack.
8979 This occurs when instructions like:
8980
8981 (set (reg/i:SI 24 %i0)
8982 (mem/f:SI (plus:SI (reg/f:SI 30 %fp)
8983 (const_int -20 [0xffffffec])) 0))
8984
8985 are in the return delayed slot. */
8986 case PLUS:
8987 if (GET_CODE (XEXP (*where, 0)) == REG
8988 && REGNO (XEXP (*where, 0)) == HARD_FRAME_POINTER_REGNUM
8989 && (GET_CODE (XEXP (*where, 1)) != CONST_INT
8990 || INTVAL (XEXP (*where, 1)) < SPARC_STACK_BIAS))
8991 return 1;
8992 break;
8993
8994 case MEM:
8995 if (SPARC_STACK_BIAS
8996 && GET_CODE (XEXP (*where, 0)) == REG
8997 && REGNO (XEXP (*where, 0)) == HARD_FRAME_POINTER_REGNUM)
8998 return 1;
8999 break;
9000
9001 default:
9002 break;
9003 }
9004
9005 fmt = GET_RTX_FORMAT (code);
9006
9007 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
9008 {
9009 if (fmt[i] == 'E')
9010 {
9011 register int j;
9012 for (j = XVECLEN (*where, i) - 1; j >= 0; j--)
9013 if (epilogue_renumber (&(XVECEXP (*where, i, j)), test))
9014 return 1;
9015 }
9016 else if (fmt[i] == 'e'
9017 && epilogue_renumber (&(XEXP (*where, i)), test))
9018 return 1;
9019 }
9020 return 0;
9021 }
9022
9023 /* Leaf functions and non-leaf functions have different needs. */
9024
9025 static const int
9026 reg_leaf_alloc_order[] = REG_LEAF_ALLOC_ORDER;
9027
9028 static const int
9029 reg_nonleaf_alloc_order[] = REG_ALLOC_ORDER;
9030
9031 static const int *const reg_alloc_orders[] = {
9032 reg_leaf_alloc_order,
9033 reg_nonleaf_alloc_order};
9034
9035 void
order_regs_for_local_alloc(void)9036 order_regs_for_local_alloc (void)
9037 {
9038 static int last_order_nonleaf = 1;
9039
9040 if (df_regs_ever_live_p (15) != last_order_nonleaf)
9041 {
9042 last_order_nonleaf = !last_order_nonleaf;
9043 memcpy ((char *) reg_alloc_order,
9044 (const char *) reg_alloc_orders[last_order_nonleaf],
9045 FIRST_PSEUDO_REGISTER * sizeof (int));
9046 }
9047 }
9048
9049 /* Return 1 if REG and MEM are legitimate enough to allow the various
9050 MEM<-->REG splits to be run. */
9051
9052 int
sparc_split_reg_mem_legitimate(rtx reg,rtx mem)9053 sparc_split_reg_mem_legitimate (rtx reg, rtx mem)
9054 {
9055 /* Punt if we are here by mistake. */
9056 gcc_assert (reload_completed);
9057
9058 /* We must have an offsettable memory reference. */
9059 if (!offsettable_memref_p (mem))
9060 return 0;
9061
9062 /* If we have legitimate args for ldd/std, we do not want
9063 the split to happen. */
9064 if ((REGNO (reg) % 2) == 0 && mem_min_alignment (mem, 8))
9065 return 0;
9066
9067 /* Success. */
9068 return 1;
9069 }
9070
9071 /* Split a REG <-- MEM move into a pair of moves in MODE. */
9072
9073 void
sparc_split_reg_mem(rtx dest,rtx src,machine_mode mode)9074 sparc_split_reg_mem (rtx dest, rtx src, machine_mode mode)
9075 {
9076 rtx high_part = gen_highpart (mode, dest);
9077 rtx low_part = gen_lowpart (mode, dest);
9078 rtx word0 = adjust_address (src, mode, 0);
9079 rtx word1 = adjust_address (src, mode, 4);
9080
9081 if (reg_overlap_mentioned_p (high_part, word1))
9082 {
9083 emit_move_insn_1 (low_part, word1);
9084 emit_move_insn_1 (high_part, word0);
9085 }
9086 else
9087 {
9088 emit_move_insn_1 (high_part, word0);
9089 emit_move_insn_1 (low_part, word1);
9090 }
9091 }
9092
9093 /* Split a MEM <-- REG move into a pair of moves in MODE. */
9094
9095 void
sparc_split_mem_reg(rtx dest,rtx src,machine_mode mode)9096 sparc_split_mem_reg (rtx dest, rtx src, machine_mode mode)
9097 {
9098 rtx word0 = adjust_address (dest, mode, 0);
9099 rtx word1 = adjust_address (dest, mode, 4);
9100 rtx high_part = gen_highpart (mode, src);
9101 rtx low_part = gen_lowpart (mode, src);
9102
9103 emit_move_insn_1 (word0, high_part);
9104 emit_move_insn_1 (word1, low_part);
9105 }
9106
9107 /* Like sparc_split_reg_mem_legitimate but for REG <--> REG moves. */
9108
9109 int
sparc_split_reg_reg_legitimate(rtx reg1,rtx reg2)9110 sparc_split_reg_reg_legitimate (rtx reg1, rtx reg2)
9111 {
9112 /* Punt if we are here by mistake. */
9113 gcc_assert (reload_completed);
9114
9115 if (GET_CODE (reg1) == SUBREG)
9116 reg1 = SUBREG_REG (reg1);
9117 if (GET_CODE (reg1) != REG)
9118 return 0;
9119 const int regno1 = REGNO (reg1);
9120
9121 if (GET_CODE (reg2) == SUBREG)
9122 reg2 = SUBREG_REG (reg2);
9123 if (GET_CODE (reg2) != REG)
9124 return 0;
9125 const int regno2 = REGNO (reg2);
9126
9127 if (SPARC_INT_REG_P (regno1) && SPARC_INT_REG_P (regno2))
9128 return 1;
9129
9130 if (TARGET_VIS3)
9131 {
9132 if ((SPARC_INT_REG_P (regno1) && SPARC_FP_REG_P (regno2))
9133 || (SPARC_FP_REG_P (regno1) && SPARC_INT_REG_P (regno2)))
9134 return 1;
9135 }
9136
9137 return 0;
9138 }
9139
9140 /* Split a REG <--> REG move into a pair of moves in MODE. */
9141
9142 void
sparc_split_reg_reg(rtx dest,rtx src,machine_mode mode)9143 sparc_split_reg_reg (rtx dest, rtx src, machine_mode mode)
9144 {
9145 rtx dest1 = gen_highpart (mode, dest);
9146 rtx dest2 = gen_lowpart (mode, dest);
9147 rtx src1 = gen_highpart (mode, src);
9148 rtx src2 = gen_lowpart (mode, src);
9149
9150 /* Now emit using the real source and destination we found, swapping
9151 the order if we detect overlap. */
9152 if (reg_overlap_mentioned_p (dest1, src2))
9153 {
9154 emit_move_insn_1 (dest2, src2);
9155 emit_move_insn_1 (dest1, src1);
9156 }
9157 else
9158 {
9159 emit_move_insn_1 (dest1, src1);
9160 emit_move_insn_1 (dest2, src2);
9161 }
9162 }
9163
9164 /* Return 1 if REGNO (reg1) is even and REGNO (reg1) == REGNO (reg2) - 1.
9165 This makes them candidates for using ldd and std insns.
9166
9167 Note reg1 and reg2 *must* be hard registers. */
9168
9169 int
registers_ok_for_ldd_peep(rtx reg1,rtx reg2)9170 registers_ok_for_ldd_peep (rtx reg1, rtx reg2)
9171 {
9172 /* We might have been passed a SUBREG. */
9173 if (GET_CODE (reg1) != REG || GET_CODE (reg2) != REG)
9174 return 0;
9175
9176 if (REGNO (reg1) % 2 != 0)
9177 return 0;
9178
9179 /* Integer ldd is deprecated in SPARC V9 */
9180 if (TARGET_V9 && SPARC_INT_REG_P (REGNO (reg1)))
9181 return 0;
9182
9183 return (REGNO (reg1) == REGNO (reg2) - 1);
9184 }
9185
9186 /* Return 1 if the addresses in mem1 and mem2 are suitable for use in
9187 an ldd or std insn.
9188
9189 This can only happen when addr1 and addr2, the addresses in mem1
9190 and mem2, are consecutive memory locations (addr1 + 4 == addr2).
9191 addr1 must also be aligned on a 64-bit boundary.
9192
9193 Also iff dependent_reg_rtx is not null it should not be used to
9194 compute the address for mem1, i.e. we cannot optimize a sequence
9195 like:
9196 ld [%o0], %o0
9197 ld [%o0 + 4], %o1
9198 to
9199 ldd [%o0], %o0
9200 nor:
9201 ld [%g3 + 4], %g3
9202 ld [%g3], %g2
9203 to
9204 ldd [%g3], %g2
9205
9206 But, note that the transformation from:
9207 ld [%g2 + 4], %g3
9208 ld [%g2], %g2
9209 to
9210 ldd [%g2], %g2
9211 is perfectly fine. Thus, the peephole2 patterns always pass us
9212 the destination register of the first load, never the second one.
9213
9214 For stores we don't have a similar problem, so dependent_reg_rtx is
9215 NULL_RTX. */
9216
9217 int
mems_ok_for_ldd_peep(rtx mem1,rtx mem2,rtx dependent_reg_rtx)9218 mems_ok_for_ldd_peep (rtx mem1, rtx mem2, rtx dependent_reg_rtx)
9219 {
9220 rtx addr1, addr2;
9221 unsigned int reg1;
9222 HOST_WIDE_INT offset1;
9223
9224 /* The mems cannot be volatile. */
9225 if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2))
9226 return 0;
9227
9228 /* MEM1 should be aligned on a 64-bit boundary. */
9229 if (MEM_ALIGN (mem1) < 64)
9230 return 0;
9231
9232 addr1 = XEXP (mem1, 0);
9233 addr2 = XEXP (mem2, 0);
9234
9235 /* Extract a register number and offset (if used) from the first addr. */
9236 if (GET_CODE (addr1) == PLUS)
9237 {
9238 /* If not a REG, return zero. */
9239 if (GET_CODE (XEXP (addr1, 0)) != REG)
9240 return 0;
9241 else
9242 {
9243 reg1 = REGNO (XEXP (addr1, 0));
9244 /* The offset must be constant! */
9245 if (GET_CODE (XEXP (addr1, 1)) != CONST_INT)
9246 return 0;
9247 offset1 = INTVAL (XEXP (addr1, 1));
9248 }
9249 }
9250 else if (GET_CODE (addr1) != REG)
9251 return 0;
9252 else
9253 {
9254 reg1 = REGNO (addr1);
9255 /* This was a simple (mem (reg)) expression. Offset is 0. */
9256 offset1 = 0;
9257 }
9258
9259 /* Make sure the second address is a (mem (plus (reg) (const_int). */
9260 if (GET_CODE (addr2) != PLUS)
9261 return 0;
9262
9263 if (GET_CODE (XEXP (addr2, 0)) != REG
9264 || GET_CODE (XEXP (addr2, 1)) != CONST_INT)
9265 return 0;
9266
9267 if (reg1 != REGNO (XEXP (addr2, 0)))
9268 return 0;
9269
9270 if (dependent_reg_rtx != NULL_RTX && reg1 == REGNO (dependent_reg_rtx))
9271 return 0;
9272
9273 /* The first offset must be evenly divisible by 8 to ensure the
9274 address is 64-bit aligned. */
9275 if (offset1 % 8 != 0)
9276 return 0;
9277
9278 /* The offset for the second addr must be 4 more than the first addr. */
9279 if (INTVAL (XEXP (addr2, 1)) != offset1 + 4)
9280 return 0;
9281
9282 /* All the tests passed. addr1 and addr2 are valid for ldd and std
9283 instructions. */
9284 return 1;
9285 }
9286
9287 /* Return the widened memory access made of MEM1 and MEM2 in MODE. */
9288
9289 rtx
widen_mem_for_ldd_peep(rtx mem1,rtx mem2,machine_mode mode)9290 widen_mem_for_ldd_peep (rtx mem1, rtx mem2, machine_mode mode)
9291 {
9292 rtx x = widen_memory_access (mem1, mode, 0);
9293 MEM_NOTRAP_P (x) = MEM_NOTRAP_P (mem1) && MEM_NOTRAP_P (mem2);
9294 return x;
9295 }
9296
9297 /* Return 1 if reg is a pseudo, or is the first register in
9298 a hard register pair. This makes it suitable for use in
9299 ldd and std insns. */
9300
9301 int
register_ok_for_ldd(rtx reg)9302 register_ok_for_ldd (rtx reg)
9303 {
9304 /* We might have been passed a SUBREG. */
9305 if (!REG_P (reg))
9306 return 0;
9307
9308 if (REGNO (reg) < FIRST_PSEUDO_REGISTER)
9309 return (REGNO (reg) % 2 == 0);
9310
9311 return 1;
9312 }
9313
9314 /* Return 1 if OP, a MEM, has an address which is known to be
9315 aligned to an 8-byte boundary. */
9316
9317 int
memory_ok_for_ldd(rtx op)9318 memory_ok_for_ldd (rtx op)
9319 {
9320 /* In 64-bit mode, we assume that the address is word-aligned. */
9321 if (TARGET_ARCH32 && !mem_min_alignment (op, 8))
9322 return 0;
9323
9324 if (! can_create_pseudo_p ()
9325 && !strict_memory_address_p (Pmode, XEXP (op, 0)))
9326 return 0;
9327
9328 return 1;
9329 }
9330
9331 /* Implement TARGET_PRINT_OPERAND_PUNCT_VALID_P. */
9332
9333 static bool
sparc_print_operand_punct_valid_p(unsigned char code)9334 sparc_print_operand_punct_valid_p (unsigned char code)
9335 {
9336 if (code == '#'
9337 || code == '*'
9338 || code == '('
9339 || code == ')'
9340 || code == '_'
9341 || code == '&')
9342 return true;
9343
9344 return false;
9345 }
9346
9347 /* Implement TARGET_PRINT_OPERAND.
9348 Print operand X (an rtx) in assembler syntax to file FILE.
9349 CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
9350 For `%' followed by punctuation, CODE is the punctuation and X is null. */
9351
9352 static void
sparc_print_operand(FILE * file,rtx x,int code)9353 sparc_print_operand (FILE *file, rtx x, int code)
9354 {
9355 const char *s;
9356
9357 switch (code)
9358 {
9359 case '#':
9360 /* Output an insn in a delay slot. */
9361 if (final_sequence)
9362 sparc_indent_opcode = 1;
9363 else
9364 fputs ("\n\t nop", file);
9365 return;
9366 case '*':
9367 /* Output an annul flag if there's nothing for the delay slot and we
9368 are optimizing. This is always used with '(' below.
9369 Sun OS 4.1.1 dbx can't handle an annulled unconditional branch;
9370 this is a dbx bug. So, we only do this when optimizing.
9371 On UltraSPARC, a branch in a delay slot causes a pipeline flush.
9372 Always emit a nop in case the next instruction is a branch. */
9373 if (! final_sequence && (optimize && (int)sparc_cpu < PROCESSOR_V9))
9374 fputs (",a", file);
9375 return;
9376 case '(':
9377 /* Output a 'nop' if there's nothing for the delay slot and we are
9378 not optimizing. This is always used with '*' above. */
9379 if (! final_sequence && ! (optimize && (int)sparc_cpu < PROCESSOR_V9))
9380 fputs ("\n\t nop", file);
9381 else if (final_sequence)
9382 sparc_indent_opcode = 1;
9383 return;
9384 case ')':
9385 /* Output the right displacement from the saved PC on function return.
9386 The caller may have placed an "unimp" insn immediately after the call
9387 so we have to account for it. This insn is used in the 32-bit ABI
9388 when calling a function that returns a non zero-sized structure. The
9389 64-bit ABI doesn't have it. Be careful to have this test be the same
9390 as that for the call. The exception is when sparc_std_struct_return
9391 is enabled, the psABI is followed exactly and the adjustment is made
9392 by the code in sparc_struct_value_rtx. The call emitted is the same
9393 when sparc_std_struct_return is enabled. */
9394 if (!TARGET_ARCH64
9395 && cfun->returns_struct
9396 && !sparc_std_struct_return
9397 && DECL_SIZE (DECL_RESULT (current_function_decl))
9398 && TREE_CODE (DECL_SIZE (DECL_RESULT (current_function_decl)))
9399 == INTEGER_CST
9400 && !integer_zerop (DECL_SIZE (DECL_RESULT (current_function_decl))))
9401 fputs ("12", file);
9402 else
9403 fputc ('8', file);
9404 return;
9405 case '_':
9406 /* Output the Embedded Medium/Anywhere code model base register. */
9407 fputs (EMBMEDANY_BASE_REG, file);
9408 return;
9409 case '&':
9410 /* Print some local dynamic TLS name. */
9411 if (const char *name = get_some_local_dynamic_name ())
9412 assemble_name (file, name);
9413 else
9414 output_operand_lossage ("'%%&' used without any "
9415 "local dynamic TLS references");
9416 return;
9417
9418 case 'Y':
9419 /* Adjust the operand to take into account a RESTORE operation. */
9420 if (GET_CODE (x) == CONST_INT)
9421 break;
9422 else if (GET_CODE (x) != REG)
9423 output_operand_lossage ("invalid %%Y operand");
9424 else if (REGNO (x) < 8)
9425 fputs (reg_names[REGNO (x)], file);
9426 else if (REGNO (x) >= 24 && REGNO (x) < 32)
9427 fputs (reg_names[REGNO (x)-16], file);
9428 else
9429 output_operand_lossage ("invalid %%Y operand");
9430 return;
9431 case 'L':
9432 /* Print out the low order register name of a register pair. */
9433 if (WORDS_BIG_ENDIAN)
9434 fputs (reg_names[REGNO (x)+1], file);
9435 else
9436 fputs (reg_names[REGNO (x)], file);
9437 return;
9438 case 'H':
9439 /* Print out the high order register name of a register pair. */
9440 if (WORDS_BIG_ENDIAN)
9441 fputs (reg_names[REGNO (x)], file);
9442 else
9443 fputs (reg_names[REGNO (x)+1], file);
9444 return;
9445 case 'R':
9446 /* Print out the second register name of a register pair or quad.
9447 I.e., R (%o0) => %o1. */
9448 fputs (reg_names[REGNO (x)+1], file);
9449 return;
9450 case 'S':
9451 /* Print out the third register name of a register quad.
9452 I.e., S (%o0) => %o2. */
9453 fputs (reg_names[REGNO (x)+2], file);
9454 return;
9455 case 'T':
9456 /* Print out the fourth register name of a register quad.
9457 I.e., T (%o0) => %o3. */
9458 fputs (reg_names[REGNO (x)+3], file);
9459 return;
9460 case 'x':
9461 /* Print a condition code register. */
9462 if (REGNO (x) == SPARC_ICC_REG)
9463 {
9464 switch (GET_MODE (x))
9465 {
9466 case E_CCmode:
9467 case E_CCNZmode:
9468 case E_CCCmode:
9469 case E_CCVmode:
9470 s = "%icc";
9471 break;
9472 case E_CCXmode:
9473 case E_CCXNZmode:
9474 case E_CCXCmode:
9475 case E_CCXVmode:
9476 s = "%xcc";
9477 break;
9478 default:
9479 gcc_unreachable ();
9480 }
9481 fputs (s, file);
9482 }
9483 else
9484 /* %fccN register */
9485 fputs (reg_names[REGNO (x)], file);
9486 return;
9487 case 'm':
9488 /* Print the operand's address only. */
9489 output_address (GET_MODE (x), XEXP (x, 0));
9490 return;
9491 case 'r':
9492 /* In this case we need a register. Use %g0 if the
9493 operand is const0_rtx. */
9494 if (x == const0_rtx
9495 || (GET_MODE (x) != VOIDmode && x == CONST0_RTX (GET_MODE (x))))
9496 {
9497 fputs ("%g0", file);
9498 return;
9499 }
9500 else
9501 break;
9502
9503 case 'A':
9504 switch (GET_CODE (x))
9505 {
9506 case IOR:
9507 s = "or";
9508 break;
9509 case AND:
9510 s = "and";
9511 break;
9512 case XOR:
9513 s = "xor";
9514 break;
9515 default:
9516 output_operand_lossage ("invalid %%A operand");
9517 s = "";
9518 break;
9519 }
9520 fputs (s, file);
9521 return;
9522
9523 case 'B':
9524 switch (GET_CODE (x))
9525 {
9526 case IOR:
9527 s = "orn";
9528 break;
9529 case AND:
9530 s = "andn";
9531 break;
9532 case XOR:
9533 s = "xnor";
9534 break;
9535 default:
9536 output_operand_lossage ("invalid %%B operand");
9537 s = "";
9538 break;
9539 }
9540 fputs (s, file);
9541 return;
9542
9543 /* This is used by the conditional move instructions. */
9544 case 'C':
9545 {
9546 machine_mode mode = GET_MODE (XEXP (x, 0));
9547 switch (GET_CODE (x))
9548 {
9549 case NE:
9550 if (mode == CCVmode || mode == CCXVmode)
9551 s = "vs";
9552 else
9553 s = "ne";
9554 break;
9555 case EQ:
9556 if (mode == CCVmode || mode == CCXVmode)
9557 s = "vc";
9558 else
9559 s = "e";
9560 break;
9561 case GE:
9562 if (mode == CCNZmode || mode == CCXNZmode)
9563 s = "pos";
9564 else
9565 s = "ge";
9566 break;
9567 case GT:
9568 s = "g";
9569 break;
9570 case LE:
9571 s = "le";
9572 break;
9573 case LT:
9574 if (mode == CCNZmode || mode == CCXNZmode)
9575 s = "neg";
9576 else
9577 s = "l";
9578 break;
9579 case GEU:
9580 s = "geu";
9581 break;
9582 case GTU:
9583 s = "gu";
9584 break;
9585 case LEU:
9586 s = "leu";
9587 break;
9588 case LTU:
9589 s = "lu";
9590 break;
9591 case LTGT:
9592 s = "lg";
9593 break;
9594 case UNORDERED:
9595 s = "u";
9596 break;
9597 case ORDERED:
9598 s = "o";
9599 break;
9600 case UNLT:
9601 s = "ul";
9602 break;
9603 case UNLE:
9604 s = "ule";
9605 break;
9606 case UNGT:
9607 s = "ug";
9608 break;
9609 case UNGE:
9610 s = "uge"
9611 ; break;
9612 case UNEQ:
9613 s = "ue";
9614 break;
9615 default:
9616 output_operand_lossage ("invalid %%C operand");
9617 s = "";
9618 break;
9619 }
9620 fputs (s, file);
9621 return;
9622 }
9623
9624 /* This are used by the movr instruction pattern. */
9625 case 'D':
9626 {
9627 switch (GET_CODE (x))
9628 {
9629 case NE:
9630 s = "ne";
9631 break;
9632 case EQ:
9633 s = "e";
9634 break;
9635 case GE:
9636 s = "gez";
9637 break;
9638 case LT:
9639 s = "lz";
9640 break;
9641 case LE:
9642 s = "lez";
9643 break;
9644 case GT:
9645 s = "gz";
9646 break;
9647 default:
9648 output_operand_lossage ("invalid %%D operand");
9649 s = "";
9650 break;
9651 }
9652 fputs (s, file);
9653 return;
9654 }
9655
9656 case 'b':
9657 {
9658 /* Print a sign-extended character. */
9659 int i = trunc_int_for_mode (INTVAL (x), QImode);
9660 fprintf (file, "%d", i);
9661 return;
9662 }
9663
9664 case 'f':
9665 /* Operand must be a MEM; write its address. */
9666 if (GET_CODE (x) != MEM)
9667 output_operand_lossage ("invalid %%f operand");
9668 output_address (GET_MODE (x), XEXP (x, 0));
9669 return;
9670
9671 case 's':
9672 {
9673 /* Print a sign-extended 32-bit value. */
9674 HOST_WIDE_INT i;
9675 if (GET_CODE(x) == CONST_INT)
9676 i = INTVAL (x);
9677 else
9678 {
9679 output_operand_lossage ("invalid %%s operand");
9680 return;
9681 }
9682 i = trunc_int_for_mode (i, SImode);
9683 fprintf (file, HOST_WIDE_INT_PRINT_DEC, i);
9684 return;
9685 }
9686
9687 case 0:
9688 /* Do nothing special. */
9689 break;
9690
9691 default:
9692 /* Undocumented flag. */
9693 output_operand_lossage ("invalid operand output code");
9694 }
9695
9696 if (GET_CODE (x) == REG)
9697 fputs (reg_names[REGNO (x)], file);
9698 else if (GET_CODE (x) == MEM)
9699 {
9700 fputc ('[', file);
9701 /* Poor Sun assembler doesn't understand absolute addressing. */
9702 if (CONSTANT_P (XEXP (x, 0)))
9703 fputs ("%g0+", file);
9704 output_address (GET_MODE (x), XEXP (x, 0));
9705 fputc (']', file);
9706 }
9707 else if (GET_CODE (x) == HIGH)
9708 {
9709 fputs ("%hi(", file);
9710 output_addr_const (file, XEXP (x, 0));
9711 fputc (')', file);
9712 }
9713 else if (GET_CODE (x) == LO_SUM)
9714 {
9715 sparc_print_operand (file, XEXP (x, 0), 0);
9716 if (TARGET_CM_MEDMID)
9717 fputs ("+%l44(", file);
9718 else
9719 fputs ("+%lo(", file);
9720 output_addr_const (file, XEXP (x, 1));
9721 fputc (')', file);
9722 }
9723 else if (GET_CODE (x) == CONST_DOUBLE)
9724 output_operand_lossage ("floating-point constant not a valid immediate operand");
9725 else
9726 output_addr_const (file, x);
9727 }
9728
9729 /* Implement TARGET_PRINT_OPERAND_ADDRESS. */
9730
9731 static void
sparc_print_operand_address(FILE * file,machine_mode,rtx x)9732 sparc_print_operand_address (FILE *file, machine_mode /*mode*/, rtx x)
9733 {
9734 register rtx base, index = 0;
9735 int offset = 0;
9736 register rtx addr = x;
9737
9738 if (REG_P (addr))
9739 fputs (reg_names[REGNO (addr)], file);
9740 else if (GET_CODE (addr) == PLUS)
9741 {
9742 if (CONST_INT_P (XEXP (addr, 0)))
9743 offset = INTVAL (XEXP (addr, 0)), base = XEXP (addr, 1);
9744 else if (CONST_INT_P (XEXP (addr, 1)))
9745 offset = INTVAL (XEXP (addr, 1)), base = XEXP (addr, 0);
9746 else
9747 base = XEXP (addr, 0), index = XEXP (addr, 1);
9748 if (GET_CODE (base) == LO_SUM)
9749 {
9750 gcc_assert (USE_AS_OFFSETABLE_LO10
9751 && TARGET_ARCH64
9752 && ! TARGET_CM_MEDMID);
9753 output_operand (XEXP (base, 0), 0);
9754 fputs ("+%lo(", file);
9755 output_address (VOIDmode, XEXP (base, 1));
9756 fprintf (file, ")+%d", offset);
9757 }
9758 else
9759 {
9760 fputs (reg_names[REGNO (base)], file);
9761 if (index == 0)
9762 fprintf (file, "%+d", offset);
9763 else if (REG_P (index))
9764 fprintf (file, "+%s", reg_names[REGNO (index)]);
9765 else if (GET_CODE (index) == SYMBOL_REF
9766 || GET_CODE (index) == LABEL_REF
9767 || GET_CODE (index) == CONST)
9768 fputc ('+', file), output_addr_const (file, index);
9769 else gcc_unreachable ();
9770 }
9771 }
9772 else if (GET_CODE (addr) == MINUS
9773 && GET_CODE (XEXP (addr, 1)) == LABEL_REF)
9774 {
9775 output_addr_const (file, XEXP (addr, 0));
9776 fputs ("-(", file);
9777 output_addr_const (file, XEXP (addr, 1));
9778 fputs ("-.)", file);
9779 }
9780 else if (GET_CODE (addr) == LO_SUM)
9781 {
9782 output_operand (XEXP (addr, 0), 0);
9783 if (TARGET_CM_MEDMID)
9784 fputs ("+%l44(", file);
9785 else
9786 fputs ("+%lo(", file);
9787 output_address (VOIDmode, XEXP (addr, 1));
9788 fputc (')', file);
9789 }
9790 else if (flag_pic
9791 && GET_CODE (addr) == CONST
9792 && GET_CODE (XEXP (addr, 0)) == MINUS
9793 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST
9794 && GET_CODE (XEXP (XEXP (XEXP (addr, 0), 1), 0)) == MINUS
9795 && XEXP (XEXP (XEXP (XEXP (addr, 0), 1), 0), 1) == pc_rtx)
9796 {
9797 addr = XEXP (addr, 0);
9798 output_addr_const (file, XEXP (addr, 0));
9799 /* Group the args of the second CONST in parenthesis. */
9800 fputs ("-(", file);
9801 /* Skip past the second CONST--it does nothing for us. */
9802 output_addr_const (file, XEXP (XEXP (addr, 1), 0));
9803 /* Close the parenthesis. */
9804 fputc (')', file);
9805 }
9806 else
9807 {
9808 output_addr_const (file, addr);
9809 }
9810 }
9811
9812 /* Target hook for assembling integer objects. The sparc version has
9813 special handling for aligned DI-mode objects. */
9814
9815 static bool
sparc_assemble_integer(rtx x,unsigned int size,int aligned_p)9816 sparc_assemble_integer (rtx x, unsigned int size, int aligned_p)
9817 {
9818 /* ??? We only output .xword's for symbols and only then in environments
9819 where the assembler can handle them. */
9820 if (aligned_p && size == 8 && GET_CODE (x) != CONST_INT)
9821 {
9822 if (TARGET_V9)
9823 {
9824 assemble_integer_with_op ("\t.xword\t", x);
9825 return true;
9826 }
9827 else
9828 {
9829 assemble_aligned_integer (4, const0_rtx);
9830 assemble_aligned_integer (4, x);
9831 return true;
9832 }
9833 }
9834 return default_assemble_integer (x, size, aligned_p);
9835 }
9836
9837 /* Return the value of a code used in the .proc pseudo-op that says
9838 what kind of result this function returns. For non-C types, we pick
9839 the closest C type. */
9840
9841 #ifndef SHORT_TYPE_SIZE
9842 #define SHORT_TYPE_SIZE (BITS_PER_UNIT * 2)
9843 #endif
9844
9845 #ifndef INT_TYPE_SIZE
9846 #define INT_TYPE_SIZE BITS_PER_WORD
9847 #endif
9848
9849 #ifndef LONG_TYPE_SIZE
9850 #define LONG_TYPE_SIZE BITS_PER_WORD
9851 #endif
9852
9853 #ifndef LONG_LONG_TYPE_SIZE
9854 #define LONG_LONG_TYPE_SIZE (BITS_PER_WORD * 2)
9855 #endif
9856
9857 #ifndef FLOAT_TYPE_SIZE
9858 #define FLOAT_TYPE_SIZE BITS_PER_WORD
9859 #endif
9860
9861 #ifndef DOUBLE_TYPE_SIZE
9862 #define DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2)
9863 #endif
9864
9865 #ifndef LONG_DOUBLE_TYPE_SIZE
9866 #define LONG_DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2)
9867 #endif
9868
9869 unsigned long
sparc_type_code(register tree type)9870 sparc_type_code (register tree type)
9871 {
9872 register unsigned long qualifiers = 0;
9873 register unsigned shift;
9874
9875 /* Only the first 30 bits of the qualifier are valid. We must refrain from
9876 setting more, since some assemblers will give an error for this. Also,
9877 we must be careful to avoid shifts of 32 bits or more to avoid getting
9878 unpredictable results. */
9879
9880 for (shift = 6; shift < 30; shift += 2, type = TREE_TYPE (type))
9881 {
9882 switch (TREE_CODE (type))
9883 {
9884 case ERROR_MARK:
9885 return qualifiers;
9886
9887 case ARRAY_TYPE:
9888 qualifiers |= (3 << shift);
9889 break;
9890
9891 case FUNCTION_TYPE:
9892 case METHOD_TYPE:
9893 qualifiers |= (2 << shift);
9894 break;
9895
9896 case POINTER_TYPE:
9897 case REFERENCE_TYPE:
9898 case OFFSET_TYPE:
9899 qualifiers |= (1 << shift);
9900 break;
9901
9902 case RECORD_TYPE:
9903 return (qualifiers | 8);
9904
9905 case UNION_TYPE:
9906 case QUAL_UNION_TYPE:
9907 return (qualifiers | 9);
9908
9909 case ENUMERAL_TYPE:
9910 return (qualifiers | 10);
9911
9912 case VOID_TYPE:
9913 return (qualifiers | 16);
9914
9915 case INTEGER_TYPE:
9916 /* If this is a range type, consider it to be the underlying
9917 type. */
9918 if (TREE_TYPE (type) != 0)
9919 break;
9920
9921 /* Carefully distinguish all the standard types of C,
9922 without messing up if the language is not C. We do this by
9923 testing TYPE_PRECISION and TYPE_UNSIGNED. The old code used to
9924 look at both the names and the above fields, but that's redundant.
9925 Any type whose size is between two C types will be considered
9926 to be the wider of the two types. Also, we do not have a
9927 special code to use for "long long", so anything wider than
9928 long is treated the same. Note that we can't distinguish
9929 between "int" and "long" in this code if they are the same
9930 size, but that's fine, since neither can the assembler. */
9931
9932 if (TYPE_PRECISION (type) <= CHAR_TYPE_SIZE)
9933 return (qualifiers | (TYPE_UNSIGNED (type) ? 12 : 2));
9934
9935 else if (TYPE_PRECISION (type) <= SHORT_TYPE_SIZE)
9936 return (qualifiers | (TYPE_UNSIGNED (type) ? 13 : 3));
9937
9938 else if (TYPE_PRECISION (type) <= INT_TYPE_SIZE)
9939 return (qualifiers | (TYPE_UNSIGNED (type) ? 14 : 4));
9940
9941 else
9942 return (qualifiers | (TYPE_UNSIGNED (type) ? 15 : 5));
9943
9944 case REAL_TYPE:
9945 /* If this is a range type, consider it to be the underlying
9946 type. */
9947 if (TREE_TYPE (type) != 0)
9948 break;
9949
9950 /* Carefully distinguish all the standard types of C,
9951 without messing up if the language is not C. */
9952
9953 if (TYPE_PRECISION (type) == FLOAT_TYPE_SIZE)
9954 return (qualifiers | 6);
9955
9956 else
9957 return (qualifiers | 7);
9958
9959 case COMPLEX_TYPE: /* GNU Fortran COMPLEX type. */
9960 /* ??? We need to distinguish between double and float complex types,
9961 but I don't know how yet because I can't reach this code from
9962 existing front-ends. */
9963 return (qualifiers | 7); /* Who knows? */
9964
9965 case VECTOR_TYPE:
9966 case BOOLEAN_TYPE: /* Boolean truth value type. */
9967 case LANG_TYPE:
9968 case NULLPTR_TYPE:
9969 return qualifiers;
9970
9971 default:
9972 gcc_unreachable (); /* Not a type! */
9973 }
9974 }
9975
9976 return qualifiers;
9977 }
9978
9979 /* Nested function support. */
9980
9981 /* Emit RTL insns to initialize the variable parts of a trampoline.
9982 FNADDR is an RTX for the address of the function's pure code.
9983 CXT is an RTX for the static chain value for the function.
9984
9985 This takes 16 insns: 2 shifts & 2 ands (to split up addresses), 4 sethi
9986 (to load in opcodes), 4 iors (to merge address and opcodes), and 4 writes
9987 (to store insns). This is a bit excessive. Perhaps a different
9988 mechanism would be better here.
9989
9990 Emit enough FLUSH insns to synchronize the data and instruction caches. */
9991
9992 static void
sparc32_initialize_trampoline(rtx m_tramp,rtx fnaddr,rtx cxt)9993 sparc32_initialize_trampoline (rtx m_tramp, rtx fnaddr, rtx cxt)
9994 {
9995 /* SPARC 32-bit trampoline:
9996
9997 sethi %hi(fn), %g1
9998 sethi %hi(static), %g2
9999 jmp %g1+%lo(fn)
10000 or %g2, %lo(static), %g2
10001
10002 SETHI i,r = 00rr rrr1 00ii iiii iiii iiii iiii iiii
10003 JMPL r+i,d = 10dd ddd1 1100 0rrr rr1i iiii iiii iiii
10004 */
10005
10006 emit_move_insn
10007 (adjust_address (m_tramp, SImode, 0),
10008 expand_binop (SImode, ior_optab,
10009 expand_shift (RSHIFT_EXPR, SImode, fnaddr, 10, 0, 1),
10010 GEN_INT (trunc_int_for_mode (0x03000000, SImode)),
10011 NULL_RTX, 1, OPTAB_DIRECT));
10012
10013 emit_move_insn
10014 (adjust_address (m_tramp, SImode, 4),
10015 expand_binop (SImode, ior_optab,
10016 expand_shift (RSHIFT_EXPR, SImode, cxt, 10, 0, 1),
10017 GEN_INT (trunc_int_for_mode (0x05000000, SImode)),
10018 NULL_RTX, 1, OPTAB_DIRECT));
10019
10020 emit_move_insn
10021 (adjust_address (m_tramp, SImode, 8),
10022 expand_binop (SImode, ior_optab,
10023 expand_and (SImode, fnaddr, GEN_INT (0x3ff), NULL_RTX),
10024 GEN_INT (trunc_int_for_mode (0x81c06000, SImode)),
10025 NULL_RTX, 1, OPTAB_DIRECT));
10026
10027 emit_move_insn
10028 (adjust_address (m_tramp, SImode, 12),
10029 expand_binop (SImode, ior_optab,
10030 expand_and (SImode, cxt, GEN_INT (0x3ff), NULL_RTX),
10031 GEN_INT (trunc_int_for_mode (0x8410a000, SImode)),
10032 NULL_RTX, 1, OPTAB_DIRECT));
10033
10034 /* On UltraSPARC a flush flushes an entire cache line. The trampoline is
10035 aligned on a 16 byte boundary so one flush clears it all. */
10036 emit_insn (gen_flushsi (validize_mem (adjust_address (m_tramp, SImode, 0))));
10037 if (sparc_cpu != PROCESSOR_ULTRASPARC
10038 && sparc_cpu != PROCESSOR_ULTRASPARC3
10039 && sparc_cpu != PROCESSOR_NIAGARA
10040 && sparc_cpu != PROCESSOR_NIAGARA2
10041 && sparc_cpu != PROCESSOR_NIAGARA3
10042 && sparc_cpu != PROCESSOR_NIAGARA4
10043 && sparc_cpu != PROCESSOR_NIAGARA7
10044 && sparc_cpu != PROCESSOR_M8)
10045 emit_insn (gen_flushsi (validize_mem (adjust_address (m_tramp, SImode, 8))));
10046
10047 /* Call __enable_execute_stack after writing onto the stack to make sure
10048 the stack address is accessible. */
10049 #ifdef HAVE_ENABLE_EXECUTE_STACK
10050 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
10051 LCT_NORMAL, VOIDmode, XEXP (m_tramp, 0), Pmode);
10052 #endif
10053
10054 }
10055
10056 /* The 64-bit version is simpler because it makes more sense to load the
10057 values as "immediate" data out of the trampoline. It's also easier since
10058 we can read the PC without clobbering a register. */
10059
10060 static void
sparc64_initialize_trampoline(rtx m_tramp,rtx fnaddr,rtx cxt)10061 sparc64_initialize_trampoline (rtx m_tramp, rtx fnaddr, rtx cxt)
10062 {
10063 /* SPARC 64-bit trampoline:
10064
10065 rd %pc, %g1
10066 ldx [%g1+24], %g5
10067 jmp %g5
10068 ldx [%g1+16], %g5
10069 +16 bytes data
10070 */
10071
10072 emit_move_insn (adjust_address (m_tramp, SImode, 0),
10073 GEN_INT (trunc_int_for_mode (0x83414000, SImode)));
10074 emit_move_insn (adjust_address (m_tramp, SImode, 4),
10075 GEN_INT (trunc_int_for_mode (0xca586018, SImode)));
10076 emit_move_insn (adjust_address (m_tramp, SImode, 8),
10077 GEN_INT (trunc_int_for_mode (0x81c14000, SImode)));
10078 emit_move_insn (adjust_address (m_tramp, SImode, 12),
10079 GEN_INT (trunc_int_for_mode (0xca586010, SImode)));
10080 emit_move_insn (adjust_address (m_tramp, DImode, 16), cxt);
10081 emit_move_insn (adjust_address (m_tramp, DImode, 24), fnaddr);
10082 emit_insn (gen_flushdi (validize_mem (adjust_address (m_tramp, DImode, 0))));
10083
10084 if (sparc_cpu != PROCESSOR_ULTRASPARC
10085 && sparc_cpu != PROCESSOR_ULTRASPARC3
10086 && sparc_cpu != PROCESSOR_NIAGARA
10087 && sparc_cpu != PROCESSOR_NIAGARA2
10088 && sparc_cpu != PROCESSOR_NIAGARA3
10089 && sparc_cpu != PROCESSOR_NIAGARA4
10090 && sparc_cpu != PROCESSOR_NIAGARA7
10091 && sparc_cpu != PROCESSOR_M8)
10092 emit_insn (gen_flushdi (validize_mem (adjust_address (m_tramp, DImode, 8))));
10093
10094 /* Call __enable_execute_stack after writing onto the stack to make sure
10095 the stack address is accessible. */
10096 #ifdef HAVE_ENABLE_EXECUTE_STACK
10097 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
10098 LCT_NORMAL, VOIDmode, XEXP (m_tramp, 0), Pmode);
10099 #endif
10100 }
10101
10102 /* Worker for TARGET_TRAMPOLINE_INIT. */
10103
10104 static void
sparc_trampoline_init(rtx m_tramp,tree fndecl,rtx cxt)10105 sparc_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
10106 {
10107 rtx fnaddr = force_reg (Pmode, XEXP (DECL_RTL (fndecl), 0));
10108 cxt = force_reg (Pmode, cxt);
10109 if (TARGET_ARCH64)
10110 sparc64_initialize_trampoline (m_tramp, fnaddr, cxt);
10111 else
10112 sparc32_initialize_trampoline (m_tramp, fnaddr, cxt);
10113 }
10114
10115 /* Adjust the cost of a scheduling dependency. Return the new cost of
10116 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
10117
10118 static int
supersparc_adjust_cost(rtx_insn * insn,int dep_type,rtx_insn * dep_insn,int cost)10119 supersparc_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn,
10120 int cost)
10121 {
10122 enum attr_type insn_type;
10123
10124 if (recog_memoized (insn) < 0)
10125 return cost;
10126
10127 insn_type = get_attr_type (insn);
10128
10129 if (dep_type == 0)
10130 {
10131 /* Data dependency; DEP_INSN writes a register that INSN reads some
10132 cycles later. */
10133
10134 /* if a load, then the dependence must be on the memory address;
10135 add an extra "cycle". Note that the cost could be two cycles
10136 if the reg was written late in an instruction group; we ca not tell
10137 here. */
10138 if (insn_type == TYPE_LOAD || insn_type == TYPE_FPLOAD)
10139 return cost + 3;
10140
10141 /* Get the delay only if the address of the store is the dependence. */
10142 if (insn_type == TYPE_STORE || insn_type == TYPE_FPSTORE)
10143 {
10144 rtx pat = PATTERN(insn);
10145 rtx dep_pat = PATTERN (dep_insn);
10146
10147 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
10148 return cost; /* This should not happen! */
10149
10150 /* The dependency between the two instructions was on the data that
10151 is being stored. Assume that this implies that the address of the
10152 store is not dependent. */
10153 if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
10154 return cost;
10155
10156 return cost + 3; /* An approximation. */
10157 }
10158
10159 /* A shift instruction cannot receive its data from an instruction
10160 in the same cycle; add a one cycle penalty. */
10161 if (insn_type == TYPE_SHIFT)
10162 return cost + 3; /* Split before cascade into shift. */
10163 }
10164 else
10165 {
10166 /* Anti- or output- dependency; DEP_INSN reads/writes a register that
10167 INSN writes some cycles later. */
10168
10169 /* These are only significant for the fpu unit; writing a fp reg before
10170 the fpu has finished with it stalls the processor. */
10171
10172 /* Reusing an integer register causes no problems. */
10173 if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT)
10174 return 0;
10175 }
10176
10177 return cost;
10178 }
10179
10180 static int
hypersparc_adjust_cost(rtx_insn * insn,int dtype,rtx_insn * dep_insn,int cost)10181 hypersparc_adjust_cost (rtx_insn *insn, int dtype, rtx_insn *dep_insn,
10182 int cost)
10183 {
10184 enum attr_type insn_type, dep_type;
10185 rtx pat = PATTERN(insn);
10186 rtx dep_pat = PATTERN (dep_insn);
10187
10188 if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
10189 return cost;
10190
10191 insn_type = get_attr_type (insn);
10192 dep_type = get_attr_type (dep_insn);
10193
10194 switch (dtype)
10195 {
10196 case 0:
10197 /* Data dependency; DEP_INSN writes a register that INSN reads some
10198 cycles later. */
10199
10200 switch (insn_type)
10201 {
10202 case TYPE_STORE:
10203 case TYPE_FPSTORE:
10204 /* Get the delay iff the address of the store is the dependence. */
10205 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
10206 return cost;
10207
10208 if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
10209 return cost;
10210 return cost + 3;
10211
10212 case TYPE_LOAD:
10213 case TYPE_SLOAD:
10214 case TYPE_FPLOAD:
10215 /* If a load, then the dependence must be on the memory address. If
10216 the addresses aren't equal, then it might be a false dependency */
10217 if (dep_type == TYPE_STORE || dep_type == TYPE_FPSTORE)
10218 {
10219 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET
10220 || GET_CODE (SET_DEST (dep_pat)) != MEM
10221 || GET_CODE (SET_SRC (pat)) != MEM
10222 || ! rtx_equal_p (XEXP (SET_DEST (dep_pat), 0),
10223 XEXP (SET_SRC (pat), 0)))
10224 return cost + 2;
10225
10226 return cost + 8;
10227 }
10228 break;
10229
10230 case TYPE_BRANCH:
10231 /* Compare to branch latency is 0. There is no benefit from
10232 separating compare and branch. */
10233 if (dep_type == TYPE_COMPARE)
10234 return 0;
10235 /* Floating point compare to branch latency is less than
10236 compare to conditional move. */
10237 if (dep_type == TYPE_FPCMP)
10238 return cost - 1;
10239 break;
10240 default:
10241 break;
10242 }
10243 break;
10244
10245 case REG_DEP_ANTI:
10246 /* Anti-dependencies only penalize the fpu unit. */
10247 if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT)
10248 return 0;
10249 break;
10250
10251 default:
10252 break;
10253 }
10254
10255 return cost;
10256 }
10257
10258 static int
leon5_adjust_cost(rtx_insn * insn,int dtype,rtx_insn * dep_insn,int cost)10259 leon5_adjust_cost (rtx_insn *insn, int dtype, rtx_insn *dep_insn,
10260 int cost)
10261 {
10262 enum attr_type insn_type, dep_type;
10263 rtx pat = PATTERN (insn);
10264 rtx dep_pat = PATTERN (dep_insn);
10265
10266 if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
10267 return cost;
10268
10269 insn_type = get_attr_type (insn);
10270 dep_type = get_attr_type (dep_insn);
10271
10272 switch (dtype)
10273 {
10274 case REG_DEP_TRUE:
10275 /* Data dependency; DEP_INSN writes a register that INSN reads some
10276 cycles later. */
10277
10278 switch (insn_type)
10279 {
10280 case TYPE_STORE:
10281 /* Try to schedule three instructions between the store and
10282 the ALU instruction that generated the data. */
10283 if (dep_type == TYPE_IALU || dep_type == TYPE_SHIFT)
10284 {
10285 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
10286 break;
10287
10288 if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
10289 return 4;
10290 }
10291 break;
10292 default:
10293 break;
10294 }
10295 break;
10296 case REG_DEP_ANTI:
10297 /* Penalize anti-dependencies for FPU instructions. */
10298 if (fpop_insn_p (insn) || insn_type == TYPE_FPLOAD)
10299 return 4;
10300 break;
10301 default:
10302 break;
10303 }
10304
10305 return cost;
10306 }
10307
10308 static int
sparc_adjust_cost(rtx_insn * insn,int dep_type,rtx_insn * dep,int cost,unsigned int)10309 sparc_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep, int cost,
10310 unsigned int)
10311 {
10312 switch (sparc_cpu)
10313 {
10314 case PROCESSOR_LEON5:
10315 cost = leon5_adjust_cost (insn, dep_type, dep, cost);
10316 break;
10317 case PROCESSOR_SUPERSPARC:
10318 cost = supersparc_adjust_cost (insn, dep_type, dep, cost);
10319 break;
10320 case PROCESSOR_HYPERSPARC:
10321 case PROCESSOR_SPARCLITE86X:
10322 cost = hypersparc_adjust_cost (insn, dep_type, dep, cost);
10323 break;
10324 default:
10325 break;
10326 }
10327 return cost;
10328 }
10329
10330 static void
sparc_sched_init(FILE * dump ATTRIBUTE_UNUSED,int sched_verbose ATTRIBUTE_UNUSED,int max_ready ATTRIBUTE_UNUSED)10331 sparc_sched_init (FILE *dump ATTRIBUTE_UNUSED,
10332 int sched_verbose ATTRIBUTE_UNUSED,
10333 int max_ready ATTRIBUTE_UNUSED)
10334 {}
10335
10336 static int
sparc_use_sched_lookahead(void)10337 sparc_use_sched_lookahead (void)
10338 {
10339 switch (sparc_cpu)
10340 {
10341 case PROCESSOR_ULTRASPARC:
10342 case PROCESSOR_ULTRASPARC3:
10343 return 4;
10344 case PROCESSOR_SUPERSPARC:
10345 case PROCESSOR_HYPERSPARC:
10346 case PROCESSOR_SPARCLITE86X:
10347 return 3;
10348 case PROCESSOR_NIAGARA4:
10349 case PROCESSOR_NIAGARA7:
10350 case PROCESSOR_M8:
10351 return 2;
10352 case PROCESSOR_NIAGARA:
10353 case PROCESSOR_NIAGARA2:
10354 case PROCESSOR_NIAGARA3:
10355 default:
10356 return 0;
10357 }
10358 }
10359
10360 static int
sparc_issue_rate(void)10361 sparc_issue_rate (void)
10362 {
10363 switch (sparc_cpu)
10364 {
10365 case PROCESSOR_ULTRASPARC:
10366 case PROCESSOR_ULTRASPARC3:
10367 case PROCESSOR_M8:
10368 return 4;
10369 case PROCESSOR_SUPERSPARC:
10370 return 3;
10371 case PROCESSOR_HYPERSPARC:
10372 case PROCESSOR_SPARCLITE86X:
10373 case PROCESSOR_V9:
10374 /* Assume V9 processors are capable of at least dual-issue. */
10375 case PROCESSOR_NIAGARA4:
10376 case PROCESSOR_NIAGARA7:
10377 return 2;
10378 case PROCESSOR_NIAGARA:
10379 case PROCESSOR_NIAGARA2:
10380 case PROCESSOR_NIAGARA3:
10381 default:
10382 return 1;
10383 }
10384 }
10385
10386 int
sparc_branch_cost(bool speed_p,bool predictable_p)10387 sparc_branch_cost (bool speed_p, bool predictable_p)
10388 {
10389 if (!speed_p)
10390 return 2;
10391
10392 /* For pre-V9 processors we use a single value (usually 3) to take into
10393 account the potential annulling of the delay slot (which ends up being
10394 a bubble in the pipeline slot) plus a cycle to take into consideration
10395 the instruction cache effects.
10396
10397 On V9 and later processors, which have branch prediction facilities,
10398 we take into account whether the branch is (easily) predictable. */
10399 const int cost = sparc_costs->branch_cost;
10400
10401 switch (sparc_cpu)
10402 {
10403 case PROCESSOR_V9:
10404 case PROCESSOR_ULTRASPARC:
10405 case PROCESSOR_ULTRASPARC3:
10406 case PROCESSOR_NIAGARA:
10407 case PROCESSOR_NIAGARA2:
10408 case PROCESSOR_NIAGARA3:
10409 case PROCESSOR_NIAGARA4:
10410 case PROCESSOR_NIAGARA7:
10411 case PROCESSOR_M8:
10412 return cost + (predictable_p ? 0 : 2);
10413
10414 default:
10415 return cost;
10416 }
10417 }
10418
10419 static int
set_extends(rtx_insn * insn)10420 set_extends (rtx_insn *insn)
10421 {
10422 register rtx pat = PATTERN (insn);
10423
10424 switch (GET_CODE (SET_SRC (pat)))
10425 {
10426 /* Load and some shift instructions zero extend. */
10427 case MEM:
10428 case ZERO_EXTEND:
10429 /* sethi clears the high bits */
10430 case HIGH:
10431 /* LO_SUM is used with sethi. sethi cleared the high
10432 bits and the values used with lo_sum are positive */
10433 case LO_SUM:
10434 /* Store flag stores 0 or 1 */
10435 case LT: case LTU:
10436 case GT: case GTU:
10437 case LE: case LEU:
10438 case GE: case GEU:
10439 case EQ:
10440 case NE:
10441 return 1;
10442 case AND:
10443 {
10444 rtx op0 = XEXP (SET_SRC (pat), 0);
10445 rtx op1 = XEXP (SET_SRC (pat), 1);
10446 if (GET_CODE (op1) == CONST_INT)
10447 return INTVAL (op1) >= 0;
10448 if (GET_CODE (op0) != REG)
10449 return 0;
10450 if (sparc_check_64 (op0, insn) == 1)
10451 return 1;
10452 return (GET_CODE (op1) == REG && sparc_check_64 (op1, insn) == 1);
10453 }
10454 case IOR:
10455 case XOR:
10456 {
10457 rtx op0 = XEXP (SET_SRC (pat), 0);
10458 rtx op1 = XEXP (SET_SRC (pat), 1);
10459 if (GET_CODE (op0) != REG || sparc_check_64 (op0, insn) <= 0)
10460 return 0;
10461 if (GET_CODE (op1) == CONST_INT)
10462 return INTVAL (op1) >= 0;
10463 return (GET_CODE (op1) == REG && sparc_check_64 (op1, insn) == 1);
10464 }
10465 case LSHIFTRT:
10466 return GET_MODE (SET_SRC (pat)) == SImode;
10467 /* Positive integers leave the high bits zero. */
10468 case CONST_INT:
10469 return !(INTVAL (SET_SRC (pat)) & 0x80000000);
10470 case ASHIFTRT:
10471 case SIGN_EXTEND:
10472 return - (GET_MODE (SET_SRC (pat)) == SImode);
10473 case REG:
10474 return sparc_check_64 (SET_SRC (pat), insn);
10475 default:
10476 return 0;
10477 }
10478 }
10479
10480 /* We _ought_ to have only one kind per function, but... */
10481 static GTY(()) rtx sparc_addr_diff_list;
10482 static GTY(()) rtx sparc_addr_list;
10483
10484 void
sparc_defer_case_vector(rtx lab,rtx vec,int diff)10485 sparc_defer_case_vector (rtx lab, rtx vec, int diff)
10486 {
10487 vec = gen_rtx_EXPR_LIST (VOIDmode, lab, vec);
10488 if (diff)
10489 sparc_addr_diff_list
10490 = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_diff_list);
10491 else
10492 sparc_addr_list = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_list);
10493 }
10494
10495 static void
sparc_output_addr_vec(rtx vec)10496 sparc_output_addr_vec (rtx vec)
10497 {
10498 rtx lab = XEXP (vec, 0), body = XEXP (vec, 1);
10499 int idx, vlen = XVECLEN (body, 0);
10500
10501 #ifdef ASM_OUTPUT_ADDR_VEC_START
10502 ASM_OUTPUT_ADDR_VEC_START (asm_out_file);
10503 #endif
10504
10505 #ifdef ASM_OUTPUT_CASE_LABEL
10506 ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab),
10507 NEXT_INSN (lab));
10508 #else
10509 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
10510 #endif
10511
10512 for (idx = 0; idx < vlen; idx++)
10513 {
10514 ASM_OUTPUT_ADDR_VEC_ELT
10515 (asm_out_file, CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 0, idx), 0)));
10516 }
10517
10518 #ifdef ASM_OUTPUT_ADDR_VEC_END
10519 ASM_OUTPUT_ADDR_VEC_END (asm_out_file);
10520 #endif
10521 }
10522
10523 static void
sparc_output_addr_diff_vec(rtx vec)10524 sparc_output_addr_diff_vec (rtx vec)
10525 {
10526 rtx lab = XEXP (vec, 0), body = XEXP (vec, 1);
10527 rtx base = XEXP (XEXP (body, 0), 0);
10528 int idx, vlen = XVECLEN (body, 1);
10529
10530 #ifdef ASM_OUTPUT_ADDR_VEC_START
10531 ASM_OUTPUT_ADDR_VEC_START (asm_out_file);
10532 #endif
10533
10534 #ifdef ASM_OUTPUT_CASE_LABEL
10535 ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab),
10536 NEXT_INSN (lab));
10537 #else
10538 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
10539 #endif
10540
10541 for (idx = 0; idx < vlen; idx++)
10542 {
10543 ASM_OUTPUT_ADDR_DIFF_ELT
10544 (asm_out_file,
10545 body,
10546 CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 1, idx), 0)),
10547 CODE_LABEL_NUMBER (base));
10548 }
10549
10550 #ifdef ASM_OUTPUT_ADDR_VEC_END
10551 ASM_OUTPUT_ADDR_VEC_END (asm_out_file);
10552 #endif
10553 }
10554
10555 static void
sparc_output_deferred_case_vectors(void)10556 sparc_output_deferred_case_vectors (void)
10557 {
10558 rtx t;
10559 int align;
10560
10561 if (sparc_addr_list == NULL_RTX
10562 && sparc_addr_diff_list == NULL_RTX)
10563 return;
10564
10565 /* Align to cache line in the function's code section. */
10566 switch_to_section (current_function_section ());
10567
10568 align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT);
10569 if (align > 0)
10570 ASM_OUTPUT_ALIGN (asm_out_file, align);
10571
10572 for (t = sparc_addr_list; t ; t = XEXP (t, 1))
10573 sparc_output_addr_vec (XEXP (t, 0));
10574 for (t = sparc_addr_diff_list; t ; t = XEXP (t, 1))
10575 sparc_output_addr_diff_vec (XEXP (t, 0));
10576
10577 sparc_addr_list = sparc_addr_diff_list = NULL_RTX;
10578 }
10579
10580 /* Return 0 if the high 32 bits of X (the low word of X, if DImode) are
10581 unknown. Return 1 if the high bits are zero, -1 if the register is
10582 sign extended. */
10583 int
sparc_check_64(rtx x,rtx_insn * insn)10584 sparc_check_64 (rtx x, rtx_insn *insn)
10585 {
10586 /* If a register is set only once it is safe to ignore insns this
10587 code does not know how to handle. The loop will either recognize
10588 the single set and return the correct value or fail to recognize
10589 it and return 0. */
10590 int set_once = 0;
10591 rtx y = x;
10592
10593 gcc_assert (GET_CODE (x) == REG);
10594
10595 if (GET_MODE (x) == DImode)
10596 y = gen_rtx_REG (SImode, REGNO (x) + WORDS_BIG_ENDIAN);
10597
10598 if (flag_expensive_optimizations
10599 && df && DF_REG_DEF_COUNT (REGNO (y)) == 1)
10600 set_once = 1;
10601
10602 if (insn == 0)
10603 {
10604 if (set_once)
10605 insn = get_last_insn_anywhere ();
10606 else
10607 return 0;
10608 }
10609
10610 while ((insn = PREV_INSN (insn)))
10611 {
10612 switch (GET_CODE (insn))
10613 {
10614 case JUMP_INSN:
10615 case NOTE:
10616 break;
10617 case CODE_LABEL:
10618 case CALL_INSN:
10619 default:
10620 if (! set_once)
10621 return 0;
10622 break;
10623 case INSN:
10624 {
10625 rtx pat = PATTERN (insn);
10626 if (GET_CODE (pat) != SET)
10627 return 0;
10628 if (rtx_equal_p (x, SET_DEST (pat)))
10629 return set_extends (insn);
10630 if (y && rtx_equal_p (y, SET_DEST (pat)))
10631 return set_extends (insn);
10632 if (reg_overlap_mentioned_p (SET_DEST (pat), y))
10633 return 0;
10634 }
10635 }
10636 }
10637 return 0;
10638 }
10639
10640 /* Output a wide shift instruction in V8+ mode. INSN is the instruction,
10641 OPERANDS are its operands and OPCODE is the mnemonic to be used. */
10642
10643 const char *
output_v8plus_shift(rtx_insn * insn,rtx * operands,const char * opcode)10644 output_v8plus_shift (rtx_insn *insn, rtx *operands, const char *opcode)
10645 {
10646 static char asm_code[60];
10647
10648 /* The scratch register is only required when the destination
10649 register is not a 64-bit global or out register. */
10650 if (which_alternative != 2)
10651 operands[3] = operands[0];
10652
10653 /* We can only shift by constants <= 63. */
10654 if (GET_CODE (operands[2]) == CONST_INT)
10655 operands[2] = GEN_INT (INTVAL (operands[2]) & 0x3f);
10656
10657 if (GET_CODE (operands[1]) == CONST_INT)
10658 {
10659 output_asm_insn ("mov\t%1, %3", operands);
10660 }
10661 else
10662 {
10663 output_asm_insn ("sllx\t%H1, 32, %3", operands);
10664 if (sparc_check_64 (operands[1], insn) <= 0)
10665 output_asm_insn ("srl\t%L1, 0, %L1", operands);
10666 output_asm_insn ("or\t%L1, %3, %3", operands);
10667 }
10668
10669 strcpy (asm_code, opcode);
10670
10671 if (which_alternative != 2)
10672 return strcat (asm_code, "\t%0, %2, %L0\n\tsrlx\t%L0, 32, %H0");
10673 else
10674 return
10675 strcat (asm_code, "\t%3, %2, %3\n\tsrlx\t%3, 32, %H0\n\tmov\t%3, %L0");
10676 }
10677
10678 /* Output rtl to increment the profiler label LABELNO
10679 for profiling a function entry. */
10680
10681 void
sparc_profile_hook(int labelno)10682 sparc_profile_hook (int labelno)
10683 {
10684 char buf[32];
10685 rtx lab, fun;
10686
10687 fun = gen_rtx_SYMBOL_REF (Pmode, MCOUNT_FUNCTION);
10688 if (NO_PROFILE_COUNTERS)
10689 {
10690 emit_library_call (fun, LCT_NORMAL, VOIDmode);
10691 }
10692 else
10693 {
10694 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
10695 lab = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
10696 emit_library_call (fun, LCT_NORMAL, VOIDmode, lab, Pmode);
10697 }
10698 }
10699
10700 #ifdef TARGET_SOLARIS
10701 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
10702
10703 static void
sparc_solaris_elf_asm_named_section(const char * name,unsigned int flags,tree decl ATTRIBUTE_UNUSED)10704 sparc_solaris_elf_asm_named_section (const char *name, unsigned int flags,
10705 tree decl ATTRIBUTE_UNUSED)
10706 {
10707 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
10708 {
10709 solaris_elf_asm_comdat_section (name, flags, decl);
10710 return;
10711 }
10712
10713 fprintf (asm_out_file, "\t.section\t\"%s\"", name);
10714
10715 if (!(flags & SECTION_DEBUG))
10716 fputs (",#alloc", asm_out_file);
10717 #if HAVE_GAS_SECTION_EXCLUDE
10718 if (flags & SECTION_EXCLUDE)
10719 fputs (",#exclude", asm_out_file);
10720 #endif
10721 if (flags & SECTION_WRITE)
10722 fputs (",#write", asm_out_file);
10723 if (flags & SECTION_TLS)
10724 fputs (",#tls", asm_out_file);
10725 if (flags & SECTION_CODE)
10726 fputs (",#execinstr", asm_out_file);
10727
10728 if (flags & SECTION_NOTYPE)
10729 ;
10730 else if (flags & SECTION_BSS)
10731 fputs (",#nobits", asm_out_file);
10732 else
10733 fputs (",#progbits", asm_out_file);
10734
10735 fputc ('\n', asm_out_file);
10736 }
10737 #endif /* TARGET_SOLARIS */
10738
10739 /* We do not allow indirect calls to be optimized into sibling calls.
10740
10741 We cannot use sibling calls when delayed branches are disabled
10742 because they will likely require the call delay slot to be filled.
10743
10744 Also, on SPARC 32-bit we cannot emit a sibling call when the
10745 current function returns a structure. This is because the "unimp
10746 after call" convention would cause the callee to return to the
10747 wrong place. The generic code already disallows cases where the
10748 function being called returns a structure.
10749
10750 It may seem strange how this last case could occur. Usually there
10751 is code after the call which jumps to epilogue code which dumps the
10752 return value into the struct return area. That ought to invalidate
10753 the sibling call right? Well, in the C++ case we can end up passing
10754 the pointer to the struct return area to a constructor (which returns
10755 void) and then nothing else happens. Such a sibling call would look
10756 valid without the added check here.
10757
10758 VxWorks PIC PLT entries require the global pointer to be initialized
10759 on entry. We therefore can't emit sibling calls to them. */
10760 static bool
sparc_function_ok_for_sibcall(tree decl,tree exp ATTRIBUTE_UNUSED)10761 sparc_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
10762 {
10763 return (decl
10764 && flag_delayed_branch
10765 && (TARGET_ARCH64 || ! cfun->returns_struct)
10766 && !(TARGET_VXWORKS_RTP
10767 && flag_pic
10768 && !targetm.binds_local_p (decl)));
10769 }
10770
10771 /* libfunc renaming. */
10772
10773 static void
sparc_init_libfuncs(void)10774 sparc_init_libfuncs (void)
10775 {
10776 if (TARGET_ARCH32)
10777 {
10778 /* Use the subroutines that Sun's library provides for integer
10779 multiply and divide. The `*' prevents an underscore from
10780 being prepended by the compiler. .umul is a little faster
10781 than .mul. */
10782 set_optab_libfunc (smul_optab, SImode, "*.umul");
10783 set_optab_libfunc (sdiv_optab, SImode, "*.div");
10784 set_optab_libfunc (udiv_optab, SImode, "*.udiv");
10785 set_optab_libfunc (smod_optab, SImode, "*.rem");
10786 set_optab_libfunc (umod_optab, SImode, "*.urem");
10787
10788 /* TFmode arithmetic. These names are part of the SPARC 32bit ABI. */
10789 set_optab_libfunc (add_optab, TFmode, "_Q_add");
10790 set_optab_libfunc (sub_optab, TFmode, "_Q_sub");
10791 set_optab_libfunc (neg_optab, TFmode, "_Q_neg");
10792 set_optab_libfunc (smul_optab, TFmode, "_Q_mul");
10793 set_optab_libfunc (sdiv_optab, TFmode, "_Q_div");
10794
10795 /* We can define the TFmode sqrt optab only if TARGET_FPU. This
10796 is because with soft-float, the SFmode and DFmode sqrt
10797 instructions will be absent, and the compiler will notice and
10798 try to use the TFmode sqrt instruction for calls to the
10799 builtin function sqrt, but this fails. */
10800 if (TARGET_FPU)
10801 set_optab_libfunc (sqrt_optab, TFmode, "_Q_sqrt");
10802
10803 set_optab_libfunc (eq_optab, TFmode, "_Q_feq");
10804 set_optab_libfunc (ne_optab, TFmode, "_Q_fne");
10805 set_optab_libfunc (gt_optab, TFmode, "_Q_fgt");
10806 set_optab_libfunc (ge_optab, TFmode, "_Q_fge");
10807 set_optab_libfunc (lt_optab, TFmode, "_Q_flt");
10808 set_optab_libfunc (le_optab, TFmode, "_Q_fle");
10809
10810 set_conv_libfunc (sext_optab, TFmode, SFmode, "_Q_stoq");
10811 set_conv_libfunc (sext_optab, TFmode, DFmode, "_Q_dtoq");
10812 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_Q_qtos");
10813 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_Q_qtod");
10814
10815 set_conv_libfunc (sfix_optab, SImode, TFmode, "_Q_qtoi");
10816 set_conv_libfunc (ufix_optab, SImode, TFmode, "_Q_qtou");
10817 set_conv_libfunc (sfloat_optab, TFmode, SImode, "_Q_itoq");
10818 set_conv_libfunc (ufloat_optab, TFmode, SImode, "_Q_utoq");
10819
10820 if (DITF_CONVERSION_LIBFUNCS)
10821 {
10822 set_conv_libfunc (sfix_optab, DImode, TFmode, "_Q_qtoll");
10823 set_conv_libfunc (ufix_optab, DImode, TFmode, "_Q_qtoull");
10824 set_conv_libfunc (sfloat_optab, TFmode, DImode, "_Q_lltoq");
10825 set_conv_libfunc (ufloat_optab, TFmode, DImode, "_Q_ulltoq");
10826 }
10827
10828 if (SUN_CONVERSION_LIBFUNCS)
10829 {
10830 set_conv_libfunc (sfix_optab, DImode, SFmode, "__ftoll");
10831 set_conv_libfunc (ufix_optab, DImode, SFmode, "__ftoull");
10832 set_conv_libfunc (sfix_optab, DImode, DFmode, "__dtoll");
10833 set_conv_libfunc (ufix_optab, DImode, DFmode, "__dtoull");
10834 }
10835 }
10836 if (TARGET_ARCH64)
10837 {
10838 /* In the SPARC 64bit ABI, SImode multiply and divide functions
10839 do not exist in the library. Make sure the compiler does not
10840 emit calls to them by accident. (It should always use the
10841 hardware instructions.) */
10842 set_optab_libfunc (smul_optab, SImode, 0);
10843 set_optab_libfunc (sdiv_optab, SImode, 0);
10844 set_optab_libfunc (udiv_optab, SImode, 0);
10845 set_optab_libfunc (smod_optab, SImode, 0);
10846 set_optab_libfunc (umod_optab, SImode, 0);
10847
10848 if (SUN_INTEGER_MULTIPLY_64)
10849 {
10850 set_optab_libfunc (smul_optab, DImode, "__mul64");
10851 set_optab_libfunc (sdiv_optab, DImode, "__div64");
10852 set_optab_libfunc (udiv_optab, DImode, "__udiv64");
10853 set_optab_libfunc (smod_optab, DImode, "__rem64");
10854 set_optab_libfunc (umod_optab, DImode, "__urem64");
10855 }
10856
10857 if (SUN_CONVERSION_LIBFUNCS)
10858 {
10859 set_conv_libfunc (sfix_optab, DImode, SFmode, "__ftol");
10860 set_conv_libfunc (ufix_optab, DImode, SFmode, "__ftoul");
10861 set_conv_libfunc (sfix_optab, DImode, DFmode, "__dtol");
10862 set_conv_libfunc (ufix_optab, DImode, DFmode, "__dtoul");
10863 }
10864 }
10865 }
10866
10867 /* SPARC builtins. */
10868 enum sparc_builtins
10869 {
10870 /* FPU builtins. */
10871 SPARC_BUILTIN_LDFSR,
10872 SPARC_BUILTIN_STFSR,
10873
10874 /* VIS 1.0 builtins. */
10875 SPARC_BUILTIN_FPACK16,
10876 SPARC_BUILTIN_FPACK32,
10877 SPARC_BUILTIN_FPACKFIX,
10878 SPARC_BUILTIN_FEXPAND,
10879 SPARC_BUILTIN_FPMERGE,
10880 SPARC_BUILTIN_FMUL8X16,
10881 SPARC_BUILTIN_FMUL8X16AU,
10882 SPARC_BUILTIN_FMUL8X16AL,
10883 SPARC_BUILTIN_FMUL8SUX16,
10884 SPARC_BUILTIN_FMUL8ULX16,
10885 SPARC_BUILTIN_FMULD8SUX16,
10886 SPARC_BUILTIN_FMULD8ULX16,
10887 SPARC_BUILTIN_FALIGNDATAV4HI,
10888 SPARC_BUILTIN_FALIGNDATAV8QI,
10889 SPARC_BUILTIN_FALIGNDATAV2SI,
10890 SPARC_BUILTIN_FALIGNDATADI,
10891 SPARC_BUILTIN_WRGSR,
10892 SPARC_BUILTIN_RDGSR,
10893 SPARC_BUILTIN_ALIGNADDR,
10894 SPARC_BUILTIN_ALIGNADDRL,
10895 SPARC_BUILTIN_PDIST,
10896 SPARC_BUILTIN_EDGE8,
10897 SPARC_BUILTIN_EDGE8L,
10898 SPARC_BUILTIN_EDGE16,
10899 SPARC_BUILTIN_EDGE16L,
10900 SPARC_BUILTIN_EDGE32,
10901 SPARC_BUILTIN_EDGE32L,
10902 SPARC_BUILTIN_FCMPLE16,
10903 SPARC_BUILTIN_FCMPLE32,
10904 SPARC_BUILTIN_FCMPNE16,
10905 SPARC_BUILTIN_FCMPNE32,
10906 SPARC_BUILTIN_FCMPGT16,
10907 SPARC_BUILTIN_FCMPGT32,
10908 SPARC_BUILTIN_FCMPEQ16,
10909 SPARC_BUILTIN_FCMPEQ32,
10910 SPARC_BUILTIN_FPADD16,
10911 SPARC_BUILTIN_FPADD16S,
10912 SPARC_BUILTIN_FPADD32,
10913 SPARC_BUILTIN_FPADD32S,
10914 SPARC_BUILTIN_FPSUB16,
10915 SPARC_BUILTIN_FPSUB16S,
10916 SPARC_BUILTIN_FPSUB32,
10917 SPARC_BUILTIN_FPSUB32S,
10918 SPARC_BUILTIN_ARRAY8,
10919 SPARC_BUILTIN_ARRAY16,
10920 SPARC_BUILTIN_ARRAY32,
10921
10922 /* VIS 2.0 builtins. */
10923 SPARC_BUILTIN_EDGE8N,
10924 SPARC_BUILTIN_EDGE8LN,
10925 SPARC_BUILTIN_EDGE16N,
10926 SPARC_BUILTIN_EDGE16LN,
10927 SPARC_BUILTIN_EDGE32N,
10928 SPARC_BUILTIN_EDGE32LN,
10929 SPARC_BUILTIN_BMASK,
10930 SPARC_BUILTIN_BSHUFFLEV4HI,
10931 SPARC_BUILTIN_BSHUFFLEV8QI,
10932 SPARC_BUILTIN_BSHUFFLEV2SI,
10933 SPARC_BUILTIN_BSHUFFLEDI,
10934
10935 /* VIS 3.0 builtins. */
10936 SPARC_BUILTIN_CMASK8,
10937 SPARC_BUILTIN_CMASK16,
10938 SPARC_BUILTIN_CMASK32,
10939 SPARC_BUILTIN_FCHKSM16,
10940 SPARC_BUILTIN_FSLL16,
10941 SPARC_BUILTIN_FSLAS16,
10942 SPARC_BUILTIN_FSRL16,
10943 SPARC_BUILTIN_FSRA16,
10944 SPARC_BUILTIN_FSLL32,
10945 SPARC_BUILTIN_FSLAS32,
10946 SPARC_BUILTIN_FSRL32,
10947 SPARC_BUILTIN_FSRA32,
10948 SPARC_BUILTIN_PDISTN,
10949 SPARC_BUILTIN_FMEAN16,
10950 SPARC_BUILTIN_FPADD64,
10951 SPARC_BUILTIN_FPSUB64,
10952 SPARC_BUILTIN_FPADDS16,
10953 SPARC_BUILTIN_FPADDS16S,
10954 SPARC_BUILTIN_FPSUBS16,
10955 SPARC_BUILTIN_FPSUBS16S,
10956 SPARC_BUILTIN_FPADDS32,
10957 SPARC_BUILTIN_FPADDS32S,
10958 SPARC_BUILTIN_FPSUBS32,
10959 SPARC_BUILTIN_FPSUBS32S,
10960 SPARC_BUILTIN_FUCMPLE8,
10961 SPARC_BUILTIN_FUCMPNE8,
10962 SPARC_BUILTIN_FUCMPGT8,
10963 SPARC_BUILTIN_FUCMPEQ8,
10964 SPARC_BUILTIN_FHADDS,
10965 SPARC_BUILTIN_FHADDD,
10966 SPARC_BUILTIN_FHSUBS,
10967 SPARC_BUILTIN_FHSUBD,
10968 SPARC_BUILTIN_FNHADDS,
10969 SPARC_BUILTIN_FNHADDD,
10970 SPARC_BUILTIN_UMULXHI,
10971 SPARC_BUILTIN_XMULX,
10972 SPARC_BUILTIN_XMULXHI,
10973
10974 /* VIS 4.0 builtins. */
10975 SPARC_BUILTIN_FPADD8,
10976 SPARC_BUILTIN_FPADDS8,
10977 SPARC_BUILTIN_FPADDUS8,
10978 SPARC_BUILTIN_FPADDUS16,
10979 SPARC_BUILTIN_FPCMPLE8,
10980 SPARC_BUILTIN_FPCMPGT8,
10981 SPARC_BUILTIN_FPCMPULE16,
10982 SPARC_BUILTIN_FPCMPUGT16,
10983 SPARC_BUILTIN_FPCMPULE32,
10984 SPARC_BUILTIN_FPCMPUGT32,
10985 SPARC_BUILTIN_FPMAX8,
10986 SPARC_BUILTIN_FPMAX16,
10987 SPARC_BUILTIN_FPMAX32,
10988 SPARC_BUILTIN_FPMAXU8,
10989 SPARC_BUILTIN_FPMAXU16,
10990 SPARC_BUILTIN_FPMAXU32,
10991 SPARC_BUILTIN_FPMIN8,
10992 SPARC_BUILTIN_FPMIN16,
10993 SPARC_BUILTIN_FPMIN32,
10994 SPARC_BUILTIN_FPMINU8,
10995 SPARC_BUILTIN_FPMINU16,
10996 SPARC_BUILTIN_FPMINU32,
10997 SPARC_BUILTIN_FPSUB8,
10998 SPARC_BUILTIN_FPSUBS8,
10999 SPARC_BUILTIN_FPSUBUS8,
11000 SPARC_BUILTIN_FPSUBUS16,
11001
11002 /* VIS 4.0B builtins. */
11003
11004 /* Note that all the DICTUNPACK* entries should be kept
11005 contiguous. */
11006 SPARC_BUILTIN_FIRST_DICTUNPACK,
11007 SPARC_BUILTIN_DICTUNPACK8 = SPARC_BUILTIN_FIRST_DICTUNPACK,
11008 SPARC_BUILTIN_DICTUNPACK16,
11009 SPARC_BUILTIN_DICTUNPACK32,
11010 SPARC_BUILTIN_LAST_DICTUNPACK = SPARC_BUILTIN_DICTUNPACK32,
11011
11012 /* Note that all the FPCMP*SHL entries should be kept
11013 contiguous. */
11014 SPARC_BUILTIN_FIRST_FPCMPSHL,
11015 SPARC_BUILTIN_FPCMPLE8SHL = SPARC_BUILTIN_FIRST_FPCMPSHL,
11016 SPARC_BUILTIN_FPCMPGT8SHL,
11017 SPARC_BUILTIN_FPCMPEQ8SHL,
11018 SPARC_BUILTIN_FPCMPNE8SHL,
11019 SPARC_BUILTIN_FPCMPLE16SHL,
11020 SPARC_BUILTIN_FPCMPGT16SHL,
11021 SPARC_BUILTIN_FPCMPEQ16SHL,
11022 SPARC_BUILTIN_FPCMPNE16SHL,
11023 SPARC_BUILTIN_FPCMPLE32SHL,
11024 SPARC_BUILTIN_FPCMPGT32SHL,
11025 SPARC_BUILTIN_FPCMPEQ32SHL,
11026 SPARC_BUILTIN_FPCMPNE32SHL,
11027 SPARC_BUILTIN_FPCMPULE8SHL,
11028 SPARC_BUILTIN_FPCMPUGT8SHL,
11029 SPARC_BUILTIN_FPCMPULE16SHL,
11030 SPARC_BUILTIN_FPCMPUGT16SHL,
11031 SPARC_BUILTIN_FPCMPULE32SHL,
11032 SPARC_BUILTIN_FPCMPUGT32SHL,
11033 SPARC_BUILTIN_FPCMPDE8SHL,
11034 SPARC_BUILTIN_FPCMPDE16SHL,
11035 SPARC_BUILTIN_FPCMPDE32SHL,
11036 SPARC_BUILTIN_FPCMPUR8SHL,
11037 SPARC_BUILTIN_FPCMPUR16SHL,
11038 SPARC_BUILTIN_FPCMPUR32SHL,
11039 SPARC_BUILTIN_LAST_FPCMPSHL = SPARC_BUILTIN_FPCMPUR32SHL,
11040
11041 SPARC_BUILTIN_MAX
11042 };
11043
11044 static GTY (()) tree sparc_builtins[(int) SPARC_BUILTIN_MAX];
11045 static enum insn_code sparc_builtins_icode[(int) SPARC_BUILTIN_MAX];
11046
11047 /* Return true if OPVAL can be used for operand OPNUM of instruction ICODE.
11048 The instruction should require a constant operand of some sort. The
11049 function prints an error if OPVAL is not valid. */
11050
11051 static int
check_constant_argument(enum insn_code icode,int opnum,rtx opval)11052 check_constant_argument (enum insn_code icode, int opnum, rtx opval)
11053 {
11054 if (GET_CODE (opval) != CONST_INT)
11055 {
11056 error ("%qs expects a constant argument", insn_data[icode].name);
11057 return false;
11058 }
11059
11060 if (!(*insn_data[icode].operand[opnum].predicate) (opval, VOIDmode))
11061 {
11062 error ("constant argument out of range for %qs", insn_data[icode].name);
11063 return false;
11064 }
11065 return true;
11066 }
11067
11068 /* Add a SPARC builtin function with NAME, ICODE, CODE and TYPE. Return the
11069 function decl or NULL_TREE if the builtin was not added. */
11070
11071 static tree
def_builtin(const char * name,enum insn_code icode,enum sparc_builtins code,tree type)11072 def_builtin (const char *name, enum insn_code icode, enum sparc_builtins code,
11073 tree type)
11074 {
11075 tree t
11076 = add_builtin_function (name, type, code, BUILT_IN_MD, NULL, NULL_TREE);
11077
11078 if (t)
11079 {
11080 sparc_builtins[code] = t;
11081 sparc_builtins_icode[code] = icode;
11082 }
11083
11084 return t;
11085 }
11086
11087 /* Likewise, but also marks the function as "const". */
11088
11089 static tree
def_builtin_const(const char * name,enum insn_code icode,enum sparc_builtins code,tree type)11090 def_builtin_const (const char *name, enum insn_code icode,
11091 enum sparc_builtins code, tree type)
11092 {
11093 tree t = def_builtin (name, icode, code, type);
11094
11095 if (t)
11096 TREE_READONLY (t) = 1;
11097
11098 return t;
11099 }
11100
11101 /* Implement the TARGET_INIT_BUILTINS target hook.
11102 Create builtin functions for special SPARC instructions. */
11103
11104 static void
sparc_init_builtins(void)11105 sparc_init_builtins (void)
11106 {
11107 if (TARGET_FPU)
11108 sparc_fpu_init_builtins ();
11109
11110 if (TARGET_VIS)
11111 sparc_vis_init_builtins ();
11112 }
11113
11114 /* Create builtin functions for FPU instructions. */
11115
11116 static void
sparc_fpu_init_builtins(void)11117 sparc_fpu_init_builtins (void)
11118 {
11119 tree ftype
11120 = build_function_type_list (void_type_node,
11121 build_pointer_type (unsigned_type_node), 0);
11122 def_builtin ("__builtin_load_fsr", CODE_FOR_ldfsr,
11123 SPARC_BUILTIN_LDFSR, ftype);
11124 def_builtin ("__builtin_store_fsr", CODE_FOR_stfsr,
11125 SPARC_BUILTIN_STFSR, ftype);
11126 }
11127
11128 /* Create builtin functions for VIS instructions. */
11129
11130 static void
sparc_vis_init_builtins(void)11131 sparc_vis_init_builtins (void)
11132 {
11133 tree v4qi = build_vector_type (unsigned_intQI_type_node, 4);
11134 tree v8qi = build_vector_type (unsigned_intQI_type_node, 8);
11135 tree v4hi = build_vector_type (intHI_type_node, 4);
11136 tree v2hi = build_vector_type (intHI_type_node, 2);
11137 tree v2si = build_vector_type (intSI_type_node, 2);
11138 tree v1si = build_vector_type (intSI_type_node, 1);
11139
11140 tree v4qi_ftype_v4hi = build_function_type_list (v4qi, v4hi, 0);
11141 tree v8qi_ftype_v2si_v8qi = build_function_type_list (v8qi, v2si, v8qi, 0);
11142 tree v2hi_ftype_v2si = build_function_type_list (v2hi, v2si, 0);
11143 tree v4hi_ftype_v4qi = build_function_type_list (v4hi, v4qi, 0);
11144 tree v8qi_ftype_v4qi_v4qi = build_function_type_list (v8qi, v4qi, v4qi, 0);
11145 tree v4hi_ftype_v4qi_v4hi = build_function_type_list (v4hi, v4qi, v4hi, 0);
11146 tree v4hi_ftype_v4qi_v2hi = build_function_type_list (v4hi, v4qi, v2hi, 0);
11147 tree v2si_ftype_v4qi_v2hi = build_function_type_list (v2si, v4qi, v2hi, 0);
11148 tree v4hi_ftype_v8qi_v4hi = build_function_type_list (v4hi, v8qi, v4hi, 0);
11149 tree v4hi_ftype_v4hi_v4hi = build_function_type_list (v4hi, v4hi, v4hi, 0);
11150 tree v2si_ftype_v2si_v2si = build_function_type_list (v2si, v2si, v2si, 0);
11151 tree v8qi_ftype_v8qi_v8qi = build_function_type_list (v8qi, v8qi, v8qi, 0);
11152 tree v2hi_ftype_v2hi_v2hi = build_function_type_list (v2hi, v2hi, v2hi, 0);
11153 tree v1si_ftype_v1si_v1si = build_function_type_list (v1si, v1si, v1si, 0);
11154 tree di_ftype_v8qi_v8qi_di = build_function_type_list (intDI_type_node,
11155 v8qi, v8qi,
11156 intDI_type_node, 0);
11157 tree di_ftype_v8qi_v8qi = build_function_type_list (intDI_type_node,
11158 v8qi, v8qi, 0);
11159 tree si_ftype_v8qi_v8qi = build_function_type_list (intSI_type_node,
11160 v8qi, v8qi, 0);
11161 tree v8qi_ftype_df_si = build_function_type_list (v8qi, double_type_node,
11162 intSI_type_node, 0);
11163 tree v4hi_ftype_df_si = build_function_type_list (v4hi, double_type_node,
11164 intSI_type_node, 0);
11165 tree v2si_ftype_df_si = build_function_type_list (v2si, double_type_node,
11166 intDI_type_node, 0);
11167 tree di_ftype_di_di = build_function_type_list (intDI_type_node,
11168 intDI_type_node,
11169 intDI_type_node, 0);
11170 tree si_ftype_si_si = build_function_type_list (intSI_type_node,
11171 intSI_type_node,
11172 intSI_type_node, 0);
11173 tree ptr_ftype_ptr_si = build_function_type_list (ptr_type_node,
11174 ptr_type_node,
11175 intSI_type_node, 0);
11176 tree ptr_ftype_ptr_di = build_function_type_list (ptr_type_node,
11177 ptr_type_node,
11178 intDI_type_node, 0);
11179 tree si_ftype_ptr_ptr = build_function_type_list (intSI_type_node,
11180 ptr_type_node,
11181 ptr_type_node, 0);
11182 tree di_ftype_ptr_ptr = build_function_type_list (intDI_type_node,
11183 ptr_type_node,
11184 ptr_type_node, 0);
11185 tree si_ftype_v4hi_v4hi = build_function_type_list (intSI_type_node,
11186 v4hi, v4hi, 0);
11187 tree si_ftype_v2si_v2si = build_function_type_list (intSI_type_node,
11188 v2si, v2si, 0);
11189 tree di_ftype_v4hi_v4hi = build_function_type_list (intDI_type_node,
11190 v4hi, v4hi, 0);
11191 tree di_ftype_v2si_v2si = build_function_type_list (intDI_type_node,
11192 v2si, v2si, 0);
11193 tree void_ftype_di = build_function_type_list (void_type_node,
11194 intDI_type_node, 0);
11195 tree di_ftype_void = build_function_type_list (intDI_type_node,
11196 void_type_node, 0);
11197 tree void_ftype_si = build_function_type_list (void_type_node,
11198 intSI_type_node, 0);
11199 tree sf_ftype_sf_sf = build_function_type_list (float_type_node,
11200 float_type_node,
11201 float_type_node, 0);
11202 tree df_ftype_df_df = build_function_type_list (double_type_node,
11203 double_type_node,
11204 double_type_node, 0);
11205
11206 /* Packing and expanding vectors. */
11207 def_builtin ("__builtin_vis_fpack16", CODE_FOR_fpack16_vis,
11208 SPARC_BUILTIN_FPACK16, v4qi_ftype_v4hi);
11209 def_builtin ("__builtin_vis_fpack32", CODE_FOR_fpack32_vis,
11210 SPARC_BUILTIN_FPACK32, v8qi_ftype_v2si_v8qi);
11211 def_builtin ("__builtin_vis_fpackfix", CODE_FOR_fpackfix_vis,
11212 SPARC_BUILTIN_FPACKFIX, v2hi_ftype_v2si);
11213 def_builtin_const ("__builtin_vis_fexpand", CODE_FOR_fexpand_vis,
11214 SPARC_BUILTIN_FEXPAND, v4hi_ftype_v4qi);
11215 def_builtin_const ("__builtin_vis_fpmerge", CODE_FOR_fpmerge_vis,
11216 SPARC_BUILTIN_FPMERGE, v8qi_ftype_v4qi_v4qi);
11217
11218 /* Multiplications. */
11219 def_builtin_const ("__builtin_vis_fmul8x16", CODE_FOR_fmul8x16_vis,
11220 SPARC_BUILTIN_FMUL8X16, v4hi_ftype_v4qi_v4hi);
11221 def_builtin_const ("__builtin_vis_fmul8x16au", CODE_FOR_fmul8x16au_vis,
11222 SPARC_BUILTIN_FMUL8X16AU, v4hi_ftype_v4qi_v2hi);
11223 def_builtin_const ("__builtin_vis_fmul8x16al", CODE_FOR_fmul8x16al_vis,
11224 SPARC_BUILTIN_FMUL8X16AL, v4hi_ftype_v4qi_v2hi);
11225 def_builtin_const ("__builtin_vis_fmul8sux16", CODE_FOR_fmul8sux16_vis,
11226 SPARC_BUILTIN_FMUL8SUX16, v4hi_ftype_v8qi_v4hi);
11227 def_builtin_const ("__builtin_vis_fmul8ulx16", CODE_FOR_fmul8ulx16_vis,
11228 SPARC_BUILTIN_FMUL8ULX16, v4hi_ftype_v8qi_v4hi);
11229 def_builtin_const ("__builtin_vis_fmuld8sux16", CODE_FOR_fmuld8sux16_vis,
11230 SPARC_BUILTIN_FMULD8SUX16, v2si_ftype_v4qi_v2hi);
11231 def_builtin_const ("__builtin_vis_fmuld8ulx16", CODE_FOR_fmuld8ulx16_vis,
11232 SPARC_BUILTIN_FMULD8ULX16, v2si_ftype_v4qi_v2hi);
11233
11234 /* Data aligning. */
11235 def_builtin ("__builtin_vis_faligndatav4hi", CODE_FOR_faligndatav4hi_vis,
11236 SPARC_BUILTIN_FALIGNDATAV4HI, v4hi_ftype_v4hi_v4hi);
11237 def_builtin ("__builtin_vis_faligndatav8qi", CODE_FOR_faligndatav8qi_vis,
11238 SPARC_BUILTIN_FALIGNDATAV8QI, v8qi_ftype_v8qi_v8qi);
11239 def_builtin ("__builtin_vis_faligndatav2si", CODE_FOR_faligndatav2si_vis,
11240 SPARC_BUILTIN_FALIGNDATAV2SI, v2si_ftype_v2si_v2si);
11241 def_builtin ("__builtin_vis_faligndatadi", CODE_FOR_faligndatav1di_vis,
11242 SPARC_BUILTIN_FALIGNDATADI, di_ftype_di_di);
11243
11244 def_builtin ("__builtin_vis_write_gsr", CODE_FOR_wrgsr_vis,
11245 SPARC_BUILTIN_WRGSR, void_ftype_di);
11246 def_builtin ("__builtin_vis_read_gsr", CODE_FOR_rdgsr_vis,
11247 SPARC_BUILTIN_RDGSR, di_ftype_void);
11248
11249 if (TARGET_ARCH64)
11250 {
11251 def_builtin ("__builtin_vis_alignaddr", CODE_FOR_alignaddrdi_vis,
11252 SPARC_BUILTIN_ALIGNADDR, ptr_ftype_ptr_di);
11253 def_builtin ("__builtin_vis_alignaddrl", CODE_FOR_alignaddrldi_vis,
11254 SPARC_BUILTIN_ALIGNADDRL, ptr_ftype_ptr_di);
11255 }
11256 else
11257 {
11258 def_builtin ("__builtin_vis_alignaddr", CODE_FOR_alignaddrsi_vis,
11259 SPARC_BUILTIN_ALIGNADDR, ptr_ftype_ptr_si);
11260 def_builtin ("__builtin_vis_alignaddrl", CODE_FOR_alignaddrlsi_vis,
11261 SPARC_BUILTIN_ALIGNADDRL, ptr_ftype_ptr_si);
11262 }
11263
11264 /* Pixel distance. */
11265 def_builtin_const ("__builtin_vis_pdist", CODE_FOR_pdist_vis,
11266 SPARC_BUILTIN_PDIST, di_ftype_v8qi_v8qi_di);
11267
11268 /* Edge handling. */
11269 if (TARGET_ARCH64)
11270 {
11271 def_builtin_const ("__builtin_vis_edge8", CODE_FOR_edge8di_vis,
11272 SPARC_BUILTIN_EDGE8, di_ftype_ptr_ptr);
11273 def_builtin_const ("__builtin_vis_edge8l", CODE_FOR_edge8ldi_vis,
11274 SPARC_BUILTIN_EDGE8L, di_ftype_ptr_ptr);
11275 def_builtin_const ("__builtin_vis_edge16", CODE_FOR_edge16di_vis,
11276 SPARC_BUILTIN_EDGE16, di_ftype_ptr_ptr);
11277 def_builtin_const ("__builtin_vis_edge16l", CODE_FOR_edge16ldi_vis,
11278 SPARC_BUILTIN_EDGE16L, di_ftype_ptr_ptr);
11279 def_builtin_const ("__builtin_vis_edge32", CODE_FOR_edge32di_vis,
11280 SPARC_BUILTIN_EDGE32, di_ftype_ptr_ptr);
11281 def_builtin_const ("__builtin_vis_edge32l", CODE_FOR_edge32ldi_vis,
11282 SPARC_BUILTIN_EDGE32L, di_ftype_ptr_ptr);
11283 }
11284 else
11285 {
11286 def_builtin_const ("__builtin_vis_edge8", CODE_FOR_edge8si_vis,
11287 SPARC_BUILTIN_EDGE8, si_ftype_ptr_ptr);
11288 def_builtin_const ("__builtin_vis_edge8l", CODE_FOR_edge8lsi_vis,
11289 SPARC_BUILTIN_EDGE8L, si_ftype_ptr_ptr);
11290 def_builtin_const ("__builtin_vis_edge16", CODE_FOR_edge16si_vis,
11291 SPARC_BUILTIN_EDGE16, si_ftype_ptr_ptr);
11292 def_builtin_const ("__builtin_vis_edge16l", CODE_FOR_edge16lsi_vis,
11293 SPARC_BUILTIN_EDGE16L, si_ftype_ptr_ptr);
11294 def_builtin_const ("__builtin_vis_edge32", CODE_FOR_edge32si_vis,
11295 SPARC_BUILTIN_EDGE32, si_ftype_ptr_ptr);
11296 def_builtin_const ("__builtin_vis_edge32l", CODE_FOR_edge32lsi_vis,
11297 SPARC_BUILTIN_EDGE32L, si_ftype_ptr_ptr);
11298 }
11299
11300 /* Pixel compare. */
11301 if (TARGET_ARCH64)
11302 {
11303 def_builtin_const ("__builtin_vis_fcmple16", CODE_FOR_fcmple16di_vis,
11304 SPARC_BUILTIN_FCMPLE16, di_ftype_v4hi_v4hi);
11305 def_builtin_const ("__builtin_vis_fcmple32", CODE_FOR_fcmple32di_vis,
11306 SPARC_BUILTIN_FCMPLE32, di_ftype_v2si_v2si);
11307 def_builtin_const ("__builtin_vis_fcmpne16", CODE_FOR_fcmpne16di_vis,
11308 SPARC_BUILTIN_FCMPNE16, di_ftype_v4hi_v4hi);
11309 def_builtin_const ("__builtin_vis_fcmpne32", CODE_FOR_fcmpne32di_vis,
11310 SPARC_BUILTIN_FCMPNE32, di_ftype_v2si_v2si);
11311 def_builtin_const ("__builtin_vis_fcmpgt16", CODE_FOR_fcmpgt16di_vis,
11312 SPARC_BUILTIN_FCMPGT16, di_ftype_v4hi_v4hi);
11313 def_builtin_const ("__builtin_vis_fcmpgt32", CODE_FOR_fcmpgt32di_vis,
11314 SPARC_BUILTIN_FCMPGT32, di_ftype_v2si_v2si);
11315 def_builtin_const ("__builtin_vis_fcmpeq16", CODE_FOR_fcmpeq16di_vis,
11316 SPARC_BUILTIN_FCMPEQ16, di_ftype_v4hi_v4hi);
11317 def_builtin_const ("__builtin_vis_fcmpeq32", CODE_FOR_fcmpeq32di_vis,
11318 SPARC_BUILTIN_FCMPEQ32, di_ftype_v2si_v2si);
11319 }
11320 else
11321 {
11322 def_builtin_const ("__builtin_vis_fcmple16", CODE_FOR_fcmple16si_vis,
11323 SPARC_BUILTIN_FCMPLE16, si_ftype_v4hi_v4hi);
11324 def_builtin_const ("__builtin_vis_fcmple32", CODE_FOR_fcmple32si_vis,
11325 SPARC_BUILTIN_FCMPLE32, si_ftype_v2si_v2si);
11326 def_builtin_const ("__builtin_vis_fcmpne16", CODE_FOR_fcmpne16si_vis,
11327 SPARC_BUILTIN_FCMPNE16, si_ftype_v4hi_v4hi);
11328 def_builtin_const ("__builtin_vis_fcmpne32", CODE_FOR_fcmpne32si_vis,
11329 SPARC_BUILTIN_FCMPNE32, si_ftype_v2si_v2si);
11330 def_builtin_const ("__builtin_vis_fcmpgt16", CODE_FOR_fcmpgt16si_vis,
11331 SPARC_BUILTIN_FCMPGT16, si_ftype_v4hi_v4hi);
11332 def_builtin_const ("__builtin_vis_fcmpgt32", CODE_FOR_fcmpgt32si_vis,
11333 SPARC_BUILTIN_FCMPGT32, si_ftype_v2si_v2si);
11334 def_builtin_const ("__builtin_vis_fcmpeq16", CODE_FOR_fcmpeq16si_vis,
11335 SPARC_BUILTIN_FCMPEQ16, si_ftype_v4hi_v4hi);
11336 def_builtin_const ("__builtin_vis_fcmpeq32", CODE_FOR_fcmpeq32si_vis,
11337 SPARC_BUILTIN_FCMPEQ32, si_ftype_v2si_v2si);
11338 }
11339
11340 /* Addition and subtraction. */
11341 def_builtin_const ("__builtin_vis_fpadd16", CODE_FOR_addv4hi3,
11342 SPARC_BUILTIN_FPADD16, v4hi_ftype_v4hi_v4hi);
11343 def_builtin_const ("__builtin_vis_fpadd16s", CODE_FOR_addv2hi3,
11344 SPARC_BUILTIN_FPADD16S, v2hi_ftype_v2hi_v2hi);
11345 def_builtin_const ("__builtin_vis_fpadd32", CODE_FOR_addv2si3,
11346 SPARC_BUILTIN_FPADD32, v2si_ftype_v2si_v2si);
11347 def_builtin_const ("__builtin_vis_fpadd32s", CODE_FOR_addv1si3,
11348 SPARC_BUILTIN_FPADD32S, v1si_ftype_v1si_v1si);
11349 def_builtin_const ("__builtin_vis_fpsub16", CODE_FOR_subv4hi3,
11350 SPARC_BUILTIN_FPSUB16, v4hi_ftype_v4hi_v4hi);
11351 def_builtin_const ("__builtin_vis_fpsub16s", CODE_FOR_subv2hi3,
11352 SPARC_BUILTIN_FPSUB16S, v2hi_ftype_v2hi_v2hi);
11353 def_builtin_const ("__builtin_vis_fpsub32", CODE_FOR_subv2si3,
11354 SPARC_BUILTIN_FPSUB32, v2si_ftype_v2si_v2si);
11355 def_builtin_const ("__builtin_vis_fpsub32s", CODE_FOR_subv1si3,
11356 SPARC_BUILTIN_FPSUB32S, v1si_ftype_v1si_v1si);
11357
11358 /* Three-dimensional array addressing. */
11359 if (TARGET_ARCH64)
11360 {
11361 def_builtin_const ("__builtin_vis_array8", CODE_FOR_array8di_vis,
11362 SPARC_BUILTIN_ARRAY8, di_ftype_di_di);
11363 def_builtin_const ("__builtin_vis_array16", CODE_FOR_array16di_vis,
11364 SPARC_BUILTIN_ARRAY16, di_ftype_di_di);
11365 def_builtin_const ("__builtin_vis_array32", CODE_FOR_array32di_vis,
11366 SPARC_BUILTIN_ARRAY32, di_ftype_di_di);
11367 }
11368 else
11369 {
11370 def_builtin_const ("__builtin_vis_array8", CODE_FOR_array8si_vis,
11371 SPARC_BUILTIN_ARRAY8, si_ftype_si_si);
11372 def_builtin_const ("__builtin_vis_array16", CODE_FOR_array16si_vis,
11373 SPARC_BUILTIN_ARRAY16, si_ftype_si_si);
11374 def_builtin_const ("__builtin_vis_array32", CODE_FOR_array32si_vis,
11375 SPARC_BUILTIN_ARRAY32, si_ftype_si_si);
11376 }
11377
11378 if (TARGET_VIS2)
11379 {
11380 /* Edge handling. */
11381 if (TARGET_ARCH64)
11382 {
11383 def_builtin_const ("__builtin_vis_edge8n", CODE_FOR_edge8ndi_vis,
11384 SPARC_BUILTIN_EDGE8N, di_ftype_ptr_ptr);
11385 def_builtin_const ("__builtin_vis_edge8ln", CODE_FOR_edge8lndi_vis,
11386 SPARC_BUILTIN_EDGE8LN, di_ftype_ptr_ptr);
11387 def_builtin_const ("__builtin_vis_edge16n", CODE_FOR_edge16ndi_vis,
11388 SPARC_BUILTIN_EDGE16N, di_ftype_ptr_ptr);
11389 def_builtin_const ("__builtin_vis_edge16ln", CODE_FOR_edge16lndi_vis,
11390 SPARC_BUILTIN_EDGE16LN, di_ftype_ptr_ptr);
11391 def_builtin_const ("__builtin_vis_edge32n", CODE_FOR_edge32ndi_vis,
11392 SPARC_BUILTIN_EDGE32N, di_ftype_ptr_ptr);
11393 def_builtin_const ("__builtin_vis_edge32ln", CODE_FOR_edge32lndi_vis,
11394 SPARC_BUILTIN_EDGE32LN, di_ftype_ptr_ptr);
11395 }
11396 else
11397 {
11398 def_builtin_const ("__builtin_vis_edge8n", CODE_FOR_edge8nsi_vis,
11399 SPARC_BUILTIN_EDGE8N, si_ftype_ptr_ptr);
11400 def_builtin_const ("__builtin_vis_edge8ln", CODE_FOR_edge8lnsi_vis,
11401 SPARC_BUILTIN_EDGE8LN, si_ftype_ptr_ptr);
11402 def_builtin_const ("__builtin_vis_edge16n", CODE_FOR_edge16nsi_vis,
11403 SPARC_BUILTIN_EDGE16N, si_ftype_ptr_ptr);
11404 def_builtin_const ("__builtin_vis_edge16ln", CODE_FOR_edge16lnsi_vis,
11405 SPARC_BUILTIN_EDGE16LN, si_ftype_ptr_ptr);
11406 def_builtin_const ("__builtin_vis_edge32n", CODE_FOR_edge32nsi_vis,
11407 SPARC_BUILTIN_EDGE32N, si_ftype_ptr_ptr);
11408 def_builtin_const ("__builtin_vis_edge32ln", CODE_FOR_edge32lnsi_vis,
11409 SPARC_BUILTIN_EDGE32LN, si_ftype_ptr_ptr);
11410 }
11411
11412 /* Byte mask and shuffle. */
11413 if (TARGET_ARCH64)
11414 def_builtin ("__builtin_vis_bmask", CODE_FOR_bmaskdi_vis,
11415 SPARC_BUILTIN_BMASK, di_ftype_di_di);
11416 else
11417 def_builtin ("__builtin_vis_bmask", CODE_FOR_bmasksi_vis,
11418 SPARC_BUILTIN_BMASK, si_ftype_si_si);
11419 def_builtin ("__builtin_vis_bshufflev4hi", CODE_FOR_bshufflev4hi_vis,
11420 SPARC_BUILTIN_BSHUFFLEV4HI, v4hi_ftype_v4hi_v4hi);
11421 def_builtin ("__builtin_vis_bshufflev8qi", CODE_FOR_bshufflev8qi_vis,
11422 SPARC_BUILTIN_BSHUFFLEV8QI, v8qi_ftype_v8qi_v8qi);
11423 def_builtin ("__builtin_vis_bshufflev2si", CODE_FOR_bshufflev2si_vis,
11424 SPARC_BUILTIN_BSHUFFLEV2SI, v2si_ftype_v2si_v2si);
11425 def_builtin ("__builtin_vis_bshuffledi", CODE_FOR_bshufflev1di_vis,
11426 SPARC_BUILTIN_BSHUFFLEDI, di_ftype_di_di);
11427 }
11428
11429 if (TARGET_VIS3)
11430 {
11431 if (TARGET_ARCH64)
11432 {
11433 def_builtin ("__builtin_vis_cmask8", CODE_FOR_cmask8di_vis,
11434 SPARC_BUILTIN_CMASK8, void_ftype_di);
11435 def_builtin ("__builtin_vis_cmask16", CODE_FOR_cmask16di_vis,
11436 SPARC_BUILTIN_CMASK16, void_ftype_di);
11437 def_builtin ("__builtin_vis_cmask32", CODE_FOR_cmask32di_vis,
11438 SPARC_BUILTIN_CMASK32, void_ftype_di);
11439 }
11440 else
11441 {
11442 def_builtin ("__builtin_vis_cmask8", CODE_FOR_cmask8si_vis,
11443 SPARC_BUILTIN_CMASK8, void_ftype_si);
11444 def_builtin ("__builtin_vis_cmask16", CODE_FOR_cmask16si_vis,
11445 SPARC_BUILTIN_CMASK16, void_ftype_si);
11446 def_builtin ("__builtin_vis_cmask32", CODE_FOR_cmask32si_vis,
11447 SPARC_BUILTIN_CMASK32, void_ftype_si);
11448 }
11449
11450 def_builtin_const ("__builtin_vis_fchksm16", CODE_FOR_fchksm16_vis,
11451 SPARC_BUILTIN_FCHKSM16, v4hi_ftype_v4hi_v4hi);
11452
11453 def_builtin_const ("__builtin_vis_fsll16", CODE_FOR_vashlv4hi3,
11454 SPARC_BUILTIN_FSLL16, v4hi_ftype_v4hi_v4hi);
11455 def_builtin_const ("__builtin_vis_fslas16", CODE_FOR_vssashlv4hi3,
11456 SPARC_BUILTIN_FSLAS16, v4hi_ftype_v4hi_v4hi);
11457 def_builtin_const ("__builtin_vis_fsrl16", CODE_FOR_vlshrv4hi3,
11458 SPARC_BUILTIN_FSRL16, v4hi_ftype_v4hi_v4hi);
11459 def_builtin_const ("__builtin_vis_fsra16", CODE_FOR_vashrv4hi3,
11460 SPARC_BUILTIN_FSRA16, v4hi_ftype_v4hi_v4hi);
11461 def_builtin_const ("__builtin_vis_fsll32", CODE_FOR_vashlv2si3,
11462 SPARC_BUILTIN_FSLL32, v2si_ftype_v2si_v2si);
11463 def_builtin_const ("__builtin_vis_fslas32", CODE_FOR_vssashlv2si3,
11464 SPARC_BUILTIN_FSLAS32, v2si_ftype_v2si_v2si);
11465 def_builtin_const ("__builtin_vis_fsrl32", CODE_FOR_vlshrv2si3,
11466 SPARC_BUILTIN_FSRL32, v2si_ftype_v2si_v2si);
11467 def_builtin_const ("__builtin_vis_fsra32", CODE_FOR_vashrv2si3,
11468 SPARC_BUILTIN_FSRA32, v2si_ftype_v2si_v2si);
11469
11470 if (TARGET_ARCH64)
11471 def_builtin_const ("__builtin_vis_pdistn", CODE_FOR_pdistndi_vis,
11472 SPARC_BUILTIN_PDISTN, di_ftype_v8qi_v8qi);
11473 else
11474 def_builtin_const ("__builtin_vis_pdistn", CODE_FOR_pdistnsi_vis,
11475 SPARC_BUILTIN_PDISTN, si_ftype_v8qi_v8qi);
11476
11477 def_builtin_const ("__builtin_vis_fmean16", CODE_FOR_fmean16_vis,
11478 SPARC_BUILTIN_FMEAN16, v4hi_ftype_v4hi_v4hi);
11479 def_builtin_const ("__builtin_vis_fpadd64", CODE_FOR_fpadd64_vis,
11480 SPARC_BUILTIN_FPADD64, di_ftype_di_di);
11481 def_builtin_const ("__builtin_vis_fpsub64", CODE_FOR_fpsub64_vis,
11482 SPARC_BUILTIN_FPSUB64, di_ftype_di_di);
11483
11484 def_builtin_const ("__builtin_vis_fpadds16", CODE_FOR_ssaddv4hi3,
11485 SPARC_BUILTIN_FPADDS16, v4hi_ftype_v4hi_v4hi);
11486 def_builtin_const ("__builtin_vis_fpadds16s", CODE_FOR_ssaddv2hi3,
11487 SPARC_BUILTIN_FPADDS16S, v2hi_ftype_v2hi_v2hi);
11488 def_builtin_const ("__builtin_vis_fpsubs16", CODE_FOR_sssubv4hi3,
11489 SPARC_BUILTIN_FPSUBS16, v4hi_ftype_v4hi_v4hi);
11490 def_builtin_const ("__builtin_vis_fpsubs16s", CODE_FOR_sssubv2hi3,
11491 SPARC_BUILTIN_FPSUBS16S, v2hi_ftype_v2hi_v2hi);
11492 def_builtin_const ("__builtin_vis_fpadds32", CODE_FOR_ssaddv2si3,
11493 SPARC_BUILTIN_FPADDS32, v2si_ftype_v2si_v2si);
11494 def_builtin_const ("__builtin_vis_fpadds32s", CODE_FOR_ssaddv1si3,
11495 SPARC_BUILTIN_FPADDS32S, v1si_ftype_v1si_v1si);
11496 def_builtin_const ("__builtin_vis_fpsubs32", CODE_FOR_sssubv2si3,
11497 SPARC_BUILTIN_FPSUBS32, v2si_ftype_v2si_v2si);
11498 def_builtin_const ("__builtin_vis_fpsubs32s", CODE_FOR_sssubv1si3,
11499 SPARC_BUILTIN_FPSUBS32S, v1si_ftype_v1si_v1si);
11500
11501 if (TARGET_ARCH64)
11502 {
11503 def_builtin_const ("__builtin_vis_fucmple8", CODE_FOR_fucmple8di_vis,
11504 SPARC_BUILTIN_FUCMPLE8, di_ftype_v8qi_v8qi);
11505 def_builtin_const ("__builtin_vis_fucmpne8", CODE_FOR_fucmpne8di_vis,
11506 SPARC_BUILTIN_FUCMPNE8, di_ftype_v8qi_v8qi);
11507 def_builtin_const ("__builtin_vis_fucmpgt8", CODE_FOR_fucmpgt8di_vis,
11508 SPARC_BUILTIN_FUCMPGT8, di_ftype_v8qi_v8qi);
11509 def_builtin_const ("__builtin_vis_fucmpeq8", CODE_FOR_fucmpeq8di_vis,
11510 SPARC_BUILTIN_FUCMPEQ8, di_ftype_v8qi_v8qi);
11511 }
11512 else
11513 {
11514 def_builtin_const ("__builtin_vis_fucmple8", CODE_FOR_fucmple8si_vis,
11515 SPARC_BUILTIN_FUCMPLE8, si_ftype_v8qi_v8qi);
11516 def_builtin_const ("__builtin_vis_fucmpne8", CODE_FOR_fucmpne8si_vis,
11517 SPARC_BUILTIN_FUCMPNE8, si_ftype_v8qi_v8qi);
11518 def_builtin_const ("__builtin_vis_fucmpgt8", CODE_FOR_fucmpgt8si_vis,
11519 SPARC_BUILTIN_FUCMPGT8, si_ftype_v8qi_v8qi);
11520 def_builtin_const ("__builtin_vis_fucmpeq8", CODE_FOR_fucmpeq8si_vis,
11521 SPARC_BUILTIN_FUCMPEQ8, si_ftype_v8qi_v8qi);
11522 }
11523
11524 def_builtin_const ("__builtin_vis_fhadds", CODE_FOR_fhaddsf_vis,
11525 SPARC_BUILTIN_FHADDS, sf_ftype_sf_sf);
11526 def_builtin_const ("__builtin_vis_fhaddd", CODE_FOR_fhadddf_vis,
11527 SPARC_BUILTIN_FHADDD, df_ftype_df_df);
11528 def_builtin_const ("__builtin_vis_fhsubs", CODE_FOR_fhsubsf_vis,
11529 SPARC_BUILTIN_FHSUBS, sf_ftype_sf_sf);
11530 def_builtin_const ("__builtin_vis_fhsubd", CODE_FOR_fhsubdf_vis,
11531 SPARC_BUILTIN_FHSUBD, df_ftype_df_df);
11532 def_builtin_const ("__builtin_vis_fnhadds", CODE_FOR_fnhaddsf_vis,
11533 SPARC_BUILTIN_FNHADDS, sf_ftype_sf_sf);
11534 def_builtin_const ("__builtin_vis_fnhaddd", CODE_FOR_fnhadddf_vis,
11535 SPARC_BUILTIN_FNHADDD, df_ftype_df_df);
11536
11537 def_builtin_const ("__builtin_vis_umulxhi", CODE_FOR_umulxhi_vis,
11538 SPARC_BUILTIN_UMULXHI, di_ftype_di_di);
11539 def_builtin_const ("__builtin_vis_xmulx", CODE_FOR_xmulx_vis,
11540 SPARC_BUILTIN_XMULX, di_ftype_di_di);
11541 def_builtin_const ("__builtin_vis_xmulxhi", CODE_FOR_xmulxhi_vis,
11542 SPARC_BUILTIN_XMULXHI, di_ftype_di_di);
11543 }
11544
11545 if (TARGET_VIS4)
11546 {
11547 def_builtin_const ("__builtin_vis_fpadd8", CODE_FOR_addv8qi3,
11548 SPARC_BUILTIN_FPADD8, v8qi_ftype_v8qi_v8qi);
11549 def_builtin_const ("__builtin_vis_fpadds8", CODE_FOR_ssaddv8qi3,
11550 SPARC_BUILTIN_FPADDS8, v8qi_ftype_v8qi_v8qi);
11551 def_builtin_const ("__builtin_vis_fpaddus8", CODE_FOR_usaddv8qi3,
11552 SPARC_BUILTIN_FPADDUS8, v8qi_ftype_v8qi_v8qi);
11553 def_builtin_const ("__builtin_vis_fpaddus16", CODE_FOR_usaddv4hi3,
11554 SPARC_BUILTIN_FPADDUS16, v4hi_ftype_v4hi_v4hi);
11555
11556
11557 if (TARGET_ARCH64)
11558 {
11559 def_builtin_const ("__builtin_vis_fpcmple8", CODE_FOR_fpcmple8di_vis,
11560 SPARC_BUILTIN_FPCMPLE8, di_ftype_v8qi_v8qi);
11561 def_builtin_const ("__builtin_vis_fpcmpgt8", CODE_FOR_fpcmpgt8di_vis,
11562 SPARC_BUILTIN_FPCMPGT8, di_ftype_v8qi_v8qi);
11563 def_builtin_const ("__builtin_vis_fpcmpule16", CODE_FOR_fpcmpule16di_vis,
11564 SPARC_BUILTIN_FPCMPULE16, di_ftype_v4hi_v4hi);
11565 def_builtin_const ("__builtin_vis_fpcmpugt16", CODE_FOR_fpcmpugt16di_vis,
11566 SPARC_BUILTIN_FPCMPUGT16, di_ftype_v4hi_v4hi);
11567 def_builtin_const ("__builtin_vis_fpcmpule32", CODE_FOR_fpcmpule32di_vis,
11568 SPARC_BUILTIN_FPCMPULE32, di_ftype_v2si_v2si);
11569 def_builtin_const ("__builtin_vis_fpcmpugt32", CODE_FOR_fpcmpugt32di_vis,
11570 SPARC_BUILTIN_FPCMPUGT32, di_ftype_v2si_v2si);
11571 }
11572 else
11573 {
11574 def_builtin_const ("__builtin_vis_fpcmple8", CODE_FOR_fpcmple8si_vis,
11575 SPARC_BUILTIN_FPCMPLE8, si_ftype_v8qi_v8qi);
11576 def_builtin_const ("__builtin_vis_fpcmpgt8", CODE_FOR_fpcmpgt8si_vis,
11577 SPARC_BUILTIN_FPCMPGT8, si_ftype_v8qi_v8qi);
11578 def_builtin_const ("__builtin_vis_fpcmpule16", CODE_FOR_fpcmpule16si_vis,
11579 SPARC_BUILTIN_FPCMPULE16, si_ftype_v4hi_v4hi);
11580 def_builtin_const ("__builtin_vis_fpcmpugt16", CODE_FOR_fpcmpugt16si_vis,
11581 SPARC_BUILTIN_FPCMPUGT16, si_ftype_v4hi_v4hi);
11582 def_builtin_const ("__builtin_vis_fpcmpule32", CODE_FOR_fpcmpule32si_vis,
11583 SPARC_BUILTIN_FPCMPULE32, di_ftype_v2si_v2si);
11584 def_builtin_const ("__builtin_vis_fpcmpugt32", CODE_FOR_fpcmpugt32si_vis,
11585 SPARC_BUILTIN_FPCMPUGT32, di_ftype_v2si_v2si);
11586 }
11587
11588 def_builtin_const ("__builtin_vis_fpmax8", CODE_FOR_maxv8qi3,
11589 SPARC_BUILTIN_FPMAX8, v8qi_ftype_v8qi_v8qi);
11590 def_builtin_const ("__builtin_vis_fpmax16", CODE_FOR_maxv4hi3,
11591 SPARC_BUILTIN_FPMAX16, v4hi_ftype_v4hi_v4hi);
11592 def_builtin_const ("__builtin_vis_fpmax32", CODE_FOR_maxv2si3,
11593 SPARC_BUILTIN_FPMAX32, v2si_ftype_v2si_v2si);
11594 def_builtin_const ("__builtin_vis_fpmaxu8", CODE_FOR_maxuv8qi3,
11595 SPARC_BUILTIN_FPMAXU8, v8qi_ftype_v8qi_v8qi);
11596 def_builtin_const ("__builtin_vis_fpmaxu16", CODE_FOR_maxuv4hi3,
11597 SPARC_BUILTIN_FPMAXU16, v4hi_ftype_v4hi_v4hi);
11598 def_builtin_const ("__builtin_vis_fpmaxu32", CODE_FOR_maxuv2si3,
11599 SPARC_BUILTIN_FPMAXU32, v2si_ftype_v2si_v2si);
11600 def_builtin_const ("__builtin_vis_fpmin8", CODE_FOR_minv8qi3,
11601 SPARC_BUILTIN_FPMIN8, v8qi_ftype_v8qi_v8qi);
11602 def_builtin_const ("__builtin_vis_fpmin16", CODE_FOR_minv4hi3,
11603 SPARC_BUILTIN_FPMIN16, v4hi_ftype_v4hi_v4hi);
11604 def_builtin_const ("__builtin_vis_fpmin32", CODE_FOR_minv2si3,
11605 SPARC_BUILTIN_FPMIN32, v2si_ftype_v2si_v2si);
11606 def_builtin_const ("__builtin_vis_fpminu8", CODE_FOR_minuv8qi3,
11607 SPARC_BUILTIN_FPMINU8, v8qi_ftype_v8qi_v8qi);
11608 def_builtin_const ("__builtin_vis_fpminu16", CODE_FOR_minuv4hi3,
11609 SPARC_BUILTIN_FPMINU16, v4hi_ftype_v4hi_v4hi);
11610 def_builtin_const ("__builtin_vis_fpminu32", CODE_FOR_minuv2si3,
11611 SPARC_BUILTIN_FPMINU32, v2si_ftype_v2si_v2si);
11612 def_builtin_const ("__builtin_vis_fpsub8", CODE_FOR_subv8qi3,
11613 SPARC_BUILTIN_FPSUB8, v8qi_ftype_v8qi_v8qi);
11614 def_builtin_const ("__builtin_vis_fpsubs8", CODE_FOR_sssubv8qi3,
11615 SPARC_BUILTIN_FPSUBS8, v8qi_ftype_v8qi_v8qi);
11616 def_builtin_const ("__builtin_vis_fpsubus8", CODE_FOR_ussubv8qi3,
11617 SPARC_BUILTIN_FPSUBUS8, v8qi_ftype_v8qi_v8qi);
11618 def_builtin_const ("__builtin_vis_fpsubus16", CODE_FOR_ussubv4hi3,
11619 SPARC_BUILTIN_FPSUBUS16, v4hi_ftype_v4hi_v4hi);
11620 }
11621
11622 if (TARGET_VIS4B)
11623 {
11624 def_builtin_const ("__builtin_vis_dictunpack8", CODE_FOR_dictunpack8,
11625 SPARC_BUILTIN_DICTUNPACK8, v8qi_ftype_df_si);
11626 def_builtin_const ("__builtin_vis_dictunpack16", CODE_FOR_dictunpack16,
11627 SPARC_BUILTIN_DICTUNPACK16, v4hi_ftype_df_si);
11628 def_builtin_const ("__builtin_vis_dictunpack32", CODE_FOR_dictunpack32,
11629 SPARC_BUILTIN_DICTUNPACK32, v2si_ftype_df_si);
11630
11631 if (TARGET_ARCH64)
11632 {
11633 tree di_ftype_v8qi_v8qi_si = build_function_type_list (intDI_type_node,
11634 v8qi, v8qi,
11635 intSI_type_node, 0);
11636 tree di_ftype_v4hi_v4hi_si = build_function_type_list (intDI_type_node,
11637 v4hi, v4hi,
11638 intSI_type_node, 0);
11639 tree di_ftype_v2si_v2si_si = build_function_type_list (intDI_type_node,
11640 v2si, v2si,
11641 intSI_type_node, 0);
11642
11643 def_builtin_const ("__builtin_vis_fpcmple8shl", CODE_FOR_fpcmple8dishl,
11644 SPARC_BUILTIN_FPCMPLE8SHL, di_ftype_v8qi_v8qi_si);
11645 def_builtin_const ("__builtin_vis_fpcmpgt8shl", CODE_FOR_fpcmpgt8dishl,
11646 SPARC_BUILTIN_FPCMPGT8SHL, di_ftype_v8qi_v8qi_si);
11647 def_builtin_const ("__builtin_vis_fpcmpeq8shl", CODE_FOR_fpcmpeq8dishl,
11648 SPARC_BUILTIN_FPCMPEQ8SHL, di_ftype_v8qi_v8qi_si);
11649 def_builtin_const ("__builtin_vis_fpcmpne8shl", CODE_FOR_fpcmpne8dishl,
11650 SPARC_BUILTIN_FPCMPNE8SHL, di_ftype_v8qi_v8qi_si);
11651
11652 def_builtin_const ("__builtin_vis_fpcmple16shl", CODE_FOR_fpcmple16dishl,
11653 SPARC_BUILTIN_FPCMPLE16SHL, di_ftype_v4hi_v4hi_si);
11654 def_builtin_const ("__builtin_vis_fpcmpgt16shl", CODE_FOR_fpcmpgt16dishl,
11655 SPARC_BUILTIN_FPCMPGT16SHL, di_ftype_v4hi_v4hi_si);
11656 def_builtin_const ("__builtin_vis_fpcmpeq16shl", CODE_FOR_fpcmpeq16dishl,
11657 SPARC_BUILTIN_FPCMPEQ16SHL, di_ftype_v4hi_v4hi_si);
11658 def_builtin_const ("__builtin_vis_fpcmpne16shl", CODE_FOR_fpcmpne16dishl,
11659 SPARC_BUILTIN_FPCMPNE16SHL, di_ftype_v4hi_v4hi_si);
11660
11661 def_builtin_const ("__builtin_vis_fpcmple32shl", CODE_FOR_fpcmple32dishl,
11662 SPARC_BUILTIN_FPCMPLE32SHL, di_ftype_v2si_v2si_si);
11663 def_builtin_const ("__builtin_vis_fpcmpgt32shl", CODE_FOR_fpcmpgt32dishl,
11664 SPARC_BUILTIN_FPCMPGT32SHL, di_ftype_v2si_v2si_si);
11665 def_builtin_const ("__builtin_vis_fpcmpeq32shl", CODE_FOR_fpcmpeq32dishl,
11666 SPARC_BUILTIN_FPCMPEQ32SHL, di_ftype_v2si_v2si_si);
11667 def_builtin_const ("__builtin_vis_fpcmpne32shl", CODE_FOR_fpcmpne32dishl,
11668 SPARC_BUILTIN_FPCMPNE32SHL, di_ftype_v2si_v2si_si);
11669
11670
11671 def_builtin_const ("__builtin_vis_fpcmpule8shl", CODE_FOR_fpcmpule8dishl,
11672 SPARC_BUILTIN_FPCMPULE8SHL, di_ftype_v8qi_v8qi_si);
11673 def_builtin_const ("__builtin_vis_fpcmpugt8shl", CODE_FOR_fpcmpugt8dishl,
11674 SPARC_BUILTIN_FPCMPUGT8SHL, di_ftype_v8qi_v8qi_si);
11675
11676 def_builtin_const ("__builtin_vis_fpcmpule16shl", CODE_FOR_fpcmpule16dishl,
11677 SPARC_BUILTIN_FPCMPULE16SHL, di_ftype_v4hi_v4hi_si);
11678 def_builtin_const ("__builtin_vis_fpcmpugt16shl", CODE_FOR_fpcmpugt16dishl,
11679 SPARC_BUILTIN_FPCMPUGT16SHL, di_ftype_v4hi_v4hi_si);
11680
11681 def_builtin_const ("__builtin_vis_fpcmpule32shl", CODE_FOR_fpcmpule32dishl,
11682 SPARC_BUILTIN_FPCMPULE32SHL, di_ftype_v2si_v2si_si);
11683 def_builtin_const ("__builtin_vis_fpcmpugt32shl", CODE_FOR_fpcmpugt32dishl,
11684 SPARC_BUILTIN_FPCMPUGT32SHL, di_ftype_v2si_v2si_si);
11685
11686 def_builtin_const ("__builtin_vis_fpcmpde8shl", CODE_FOR_fpcmpde8dishl,
11687 SPARC_BUILTIN_FPCMPDE8SHL, di_ftype_v8qi_v8qi_si);
11688 def_builtin_const ("__builtin_vis_fpcmpde16shl", CODE_FOR_fpcmpde16dishl,
11689 SPARC_BUILTIN_FPCMPDE16SHL, di_ftype_v4hi_v4hi_si);
11690 def_builtin_const ("__builtin_vis_fpcmpde32shl", CODE_FOR_fpcmpde32dishl,
11691 SPARC_BUILTIN_FPCMPDE32SHL, di_ftype_v2si_v2si_si);
11692
11693 def_builtin_const ("__builtin_vis_fpcmpur8shl", CODE_FOR_fpcmpur8dishl,
11694 SPARC_BUILTIN_FPCMPUR8SHL, di_ftype_v8qi_v8qi_si);
11695 def_builtin_const ("__builtin_vis_fpcmpur16shl", CODE_FOR_fpcmpur16dishl,
11696 SPARC_BUILTIN_FPCMPUR16SHL, di_ftype_v4hi_v4hi_si);
11697 def_builtin_const ("__builtin_vis_fpcmpur32shl", CODE_FOR_fpcmpur32dishl,
11698 SPARC_BUILTIN_FPCMPUR32SHL, di_ftype_v2si_v2si_si);
11699
11700 }
11701 else
11702 {
11703 tree si_ftype_v8qi_v8qi_si = build_function_type_list (intSI_type_node,
11704 v8qi, v8qi,
11705 intSI_type_node, 0);
11706 tree si_ftype_v4hi_v4hi_si = build_function_type_list (intSI_type_node,
11707 v4hi, v4hi,
11708 intSI_type_node, 0);
11709 tree si_ftype_v2si_v2si_si = build_function_type_list (intSI_type_node,
11710 v2si, v2si,
11711 intSI_type_node, 0);
11712
11713 def_builtin_const ("__builtin_vis_fpcmple8shl", CODE_FOR_fpcmple8sishl,
11714 SPARC_BUILTIN_FPCMPLE8SHL, si_ftype_v8qi_v8qi_si);
11715 def_builtin_const ("__builtin_vis_fpcmpgt8shl", CODE_FOR_fpcmpgt8sishl,
11716 SPARC_BUILTIN_FPCMPGT8SHL, si_ftype_v8qi_v8qi_si);
11717 def_builtin_const ("__builtin_vis_fpcmpeq8shl", CODE_FOR_fpcmpeq8sishl,
11718 SPARC_BUILTIN_FPCMPEQ8SHL, si_ftype_v8qi_v8qi_si);
11719 def_builtin_const ("__builtin_vis_fpcmpne8shl", CODE_FOR_fpcmpne8sishl,
11720 SPARC_BUILTIN_FPCMPNE8SHL, si_ftype_v8qi_v8qi_si);
11721
11722 def_builtin_const ("__builtin_vis_fpcmple16shl", CODE_FOR_fpcmple16sishl,
11723 SPARC_BUILTIN_FPCMPLE16SHL, si_ftype_v4hi_v4hi_si);
11724 def_builtin_const ("__builtin_vis_fpcmpgt16shl", CODE_FOR_fpcmpgt16sishl,
11725 SPARC_BUILTIN_FPCMPGT16SHL, si_ftype_v4hi_v4hi_si);
11726 def_builtin_const ("__builtin_vis_fpcmpeq16shl", CODE_FOR_fpcmpeq16sishl,
11727 SPARC_BUILTIN_FPCMPEQ16SHL, si_ftype_v4hi_v4hi_si);
11728 def_builtin_const ("__builtin_vis_fpcmpne16shl", CODE_FOR_fpcmpne16sishl,
11729 SPARC_BUILTIN_FPCMPNE16SHL, si_ftype_v4hi_v4hi_si);
11730
11731 def_builtin_const ("__builtin_vis_fpcmple32shl", CODE_FOR_fpcmple32sishl,
11732 SPARC_BUILTIN_FPCMPLE32SHL, si_ftype_v2si_v2si_si);
11733 def_builtin_const ("__builtin_vis_fpcmpgt32shl", CODE_FOR_fpcmpgt32sishl,
11734 SPARC_BUILTIN_FPCMPGT32SHL, si_ftype_v2si_v2si_si);
11735 def_builtin_const ("__builtin_vis_fpcmpeq32shl", CODE_FOR_fpcmpeq32sishl,
11736 SPARC_BUILTIN_FPCMPEQ32SHL, si_ftype_v2si_v2si_si);
11737 def_builtin_const ("__builtin_vis_fpcmpne32shl", CODE_FOR_fpcmpne32sishl,
11738 SPARC_BUILTIN_FPCMPNE32SHL, si_ftype_v2si_v2si_si);
11739
11740
11741 def_builtin_const ("__builtin_vis_fpcmpule8shl", CODE_FOR_fpcmpule8sishl,
11742 SPARC_BUILTIN_FPCMPULE8SHL, si_ftype_v8qi_v8qi_si);
11743 def_builtin_const ("__builtin_vis_fpcmpugt8shl", CODE_FOR_fpcmpugt8sishl,
11744 SPARC_BUILTIN_FPCMPUGT8SHL, si_ftype_v8qi_v8qi_si);
11745
11746 def_builtin_const ("__builtin_vis_fpcmpule16shl", CODE_FOR_fpcmpule16sishl,
11747 SPARC_BUILTIN_FPCMPULE16SHL, si_ftype_v4hi_v4hi_si);
11748 def_builtin_const ("__builtin_vis_fpcmpugt16shl", CODE_FOR_fpcmpugt16sishl,
11749 SPARC_BUILTIN_FPCMPUGT16SHL, si_ftype_v4hi_v4hi_si);
11750
11751 def_builtin_const ("__builtin_vis_fpcmpule32shl", CODE_FOR_fpcmpule32sishl,
11752 SPARC_BUILTIN_FPCMPULE32SHL, si_ftype_v2si_v2si_si);
11753 def_builtin_const ("__builtin_vis_fpcmpugt32shl", CODE_FOR_fpcmpugt32sishl,
11754 SPARC_BUILTIN_FPCMPUGT32SHL, si_ftype_v2si_v2si_si);
11755
11756 def_builtin_const ("__builtin_vis_fpcmpde8shl", CODE_FOR_fpcmpde8sishl,
11757 SPARC_BUILTIN_FPCMPDE8SHL, si_ftype_v8qi_v8qi_si);
11758 def_builtin_const ("__builtin_vis_fpcmpde16shl", CODE_FOR_fpcmpde16sishl,
11759 SPARC_BUILTIN_FPCMPDE16SHL, si_ftype_v4hi_v4hi_si);
11760 def_builtin_const ("__builtin_vis_fpcmpde32shl", CODE_FOR_fpcmpde32sishl,
11761 SPARC_BUILTIN_FPCMPDE32SHL, si_ftype_v2si_v2si_si);
11762
11763 def_builtin_const ("__builtin_vis_fpcmpur8shl", CODE_FOR_fpcmpur8sishl,
11764 SPARC_BUILTIN_FPCMPUR8SHL, si_ftype_v8qi_v8qi_si);
11765 def_builtin_const ("__builtin_vis_fpcmpur16shl", CODE_FOR_fpcmpur16sishl,
11766 SPARC_BUILTIN_FPCMPUR16SHL, si_ftype_v4hi_v4hi_si);
11767 def_builtin_const ("__builtin_vis_fpcmpur32shl", CODE_FOR_fpcmpur32sishl,
11768 SPARC_BUILTIN_FPCMPUR32SHL, si_ftype_v2si_v2si_si);
11769 }
11770 }
11771 }
11772
11773 /* Implement TARGET_BUILTIN_DECL hook. */
11774
11775 static tree
sparc_builtin_decl(unsigned code,bool initialize_p ATTRIBUTE_UNUSED)11776 sparc_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
11777 {
11778 if (code >= SPARC_BUILTIN_MAX)
11779 return error_mark_node;
11780
11781 return sparc_builtins[code];
11782 }
11783
11784 /* Implemented TARGET_EXPAND_BUILTIN hook. */
11785
11786 static rtx
sparc_expand_builtin(tree exp,rtx target,rtx subtarget ATTRIBUTE_UNUSED,machine_mode tmode ATTRIBUTE_UNUSED,int ignore ATTRIBUTE_UNUSED)11787 sparc_expand_builtin (tree exp, rtx target,
11788 rtx subtarget ATTRIBUTE_UNUSED,
11789 machine_mode tmode ATTRIBUTE_UNUSED,
11790 int ignore ATTRIBUTE_UNUSED)
11791 {
11792 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
11793 enum sparc_builtins code
11794 = (enum sparc_builtins) DECL_MD_FUNCTION_CODE (fndecl);
11795 enum insn_code icode = sparc_builtins_icode[code];
11796 bool nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
11797 call_expr_arg_iterator iter;
11798 int arg_count = 0;
11799 rtx pat, op[4];
11800 tree arg;
11801
11802 if (nonvoid)
11803 {
11804 machine_mode tmode = insn_data[icode].operand[0].mode;
11805 if (!target
11806 || GET_MODE (target) != tmode
11807 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11808 op[0] = gen_reg_rtx (tmode);
11809 else
11810 op[0] = target;
11811 }
11812 else
11813 op[0] = NULL_RTX;
11814
11815 FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
11816 {
11817 const struct insn_operand_data *insn_op;
11818 int idx;
11819
11820 if (arg == error_mark_node)
11821 return NULL_RTX;
11822
11823 arg_count++;
11824 idx = arg_count - !nonvoid;
11825 insn_op = &insn_data[icode].operand[idx];
11826 op[arg_count] = expand_normal (arg);
11827
11828 /* Some of the builtins require constant arguments. We check
11829 for this here. */
11830 if ((code >= SPARC_BUILTIN_FIRST_FPCMPSHL
11831 && code <= SPARC_BUILTIN_LAST_FPCMPSHL
11832 && arg_count == 3)
11833 || (code >= SPARC_BUILTIN_FIRST_DICTUNPACK
11834 && code <= SPARC_BUILTIN_LAST_DICTUNPACK
11835 && arg_count == 2))
11836 {
11837 if (!check_constant_argument (icode, idx, op[arg_count]))
11838 return const0_rtx;
11839 }
11840
11841 if (code == SPARC_BUILTIN_LDFSR || code == SPARC_BUILTIN_STFSR)
11842 {
11843 if (!address_operand (op[arg_count], SImode))
11844 {
11845 op[arg_count] = convert_memory_address (Pmode, op[arg_count]);
11846 op[arg_count] = copy_addr_to_reg (op[arg_count]);
11847 }
11848 op[arg_count] = gen_rtx_MEM (SImode, op[arg_count]);
11849 }
11850
11851 else if (insn_op->mode == V1DImode
11852 && GET_MODE (op[arg_count]) == DImode)
11853 op[arg_count] = gen_lowpart (V1DImode, op[arg_count]);
11854
11855 else if (insn_op->mode == V1SImode
11856 && GET_MODE (op[arg_count]) == SImode)
11857 op[arg_count] = gen_lowpart (V1SImode, op[arg_count]);
11858
11859 if (! (*insn_data[icode].operand[idx].predicate) (op[arg_count],
11860 insn_op->mode))
11861 op[arg_count] = copy_to_mode_reg (insn_op->mode, op[arg_count]);
11862 }
11863
11864 switch (arg_count)
11865 {
11866 case 0:
11867 pat = GEN_FCN (icode) (op[0]);
11868 break;
11869 case 1:
11870 if (nonvoid)
11871 pat = GEN_FCN (icode) (op[0], op[1]);
11872 else
11873 pat = GEN_FCN (icode) (op[1]);
11874 break;
11875 case 2:
11876 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
11877 break;
11878 case 3:
11879 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
11880 break;
11881 default:
11882 gcc_unreachable ();
11883 }
11884
11885 if (!pat)
11886 return NULL_RTX;
11887
11888 emit_insn (pat);
11889
11890 return (nonvoid ? op[0] : const0_rtx);
11891 }
11892
11893 /* Return the upper 16 bits of the 8x16 multiplication. */
11894
11895 static int
sparc_vis_mul8x16(int e8,int e16)11896 sparc_vis_mul8x16 (int e8, int e16)
11897 {
11898 return (e8 * e16 + 128) / 256;
11899 }
11900
11901 /* Multiply the VECTOR_CSTs CST0 and CST1 as specified by FNCODE and put
11902 the result into the array N_ELTS, whose elements are of INNER_TYPE. */
11903
11904 static void
sparc_handle_vis_mul8x16(vec<tree> * n_elts,enum sparc_builtins fncode,tree inner_type,tree cst0,tree cst1)11905 sparc_handle_vis_mul8x16 (vec<tree> *n_elts, enum sparc_builtins fncode,
11906 tree inner_type, tree cst0, tree cst1)
11907 {
11908 unsigned i, num = VECTOR_CST_NELTS (cst0);
11909 int scale;
11910
11911 switch (fncode)
11912 {
11913 case SPARC_BUILTIN_FMUL8X16:
11914 for (i = 0; i < num; ++i)
11915 {
11916 int val
11917 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
11918 TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, i)));
11919 n_elts->quick_push (build_int_cst (inner_type, val));
11920 }
11921 break;
11922
11923 case SPARC_BUILTIN_FMUL8X16AU:
11924 scale = TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, 0));
11925
11926 for (i = 0; i < num; ++i)
11927 {
11928 int val
11929 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
11930 scale);
11931 n_elts->quick_push (build_int_cst (inner_type, val));
11932 }
11933 break;
11934
11935 case SPARC_BUILTIN_FMUL8X16AL:
11936 scale = TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, 1));
11937
11938 for (i = 0; i < num; ++i)
11939 {
11940 int val
11941 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
11942 scale);
11943 n_elts->quick_push (build_int_cst (inner_type, val));
11944 }
11945 break;
11946
11947 default:
11948 gcc_unreachable ();
11949 }
11950 }
11951
11952 /* Implement TARGET_FOLD_BUILTIN hook.
11953
11954 Fold builtin functions for SPARC intrinsics. If IGNORE is true the
11955 result of the function call is ignored. NULL_TREE is returned if the
11956 function could not be folded. */
11957
11958 static tree
sparc_fold_builtin(tree fndecl,int n_args ATTRIBUTE_UNUSED,tree * args,bool ignore)11959 sparc_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED,
11960 tree *args, bool ignore)
11961 {
11962 enum sparc_builtins code
11963 = (enum sparc_builtins) DECL_MD_FUNCTION_CODE (fndecl);
11964 tree rtype = TREE_TYPE (TREE_TYPE (fndecl));
11965 tree arg0, arg1, arg2;
11966
11967 if (ignore)
11968 switch (code)
11969 {
11970 case SPARC_BUILTIN_LDFSR:
11971 case SPARC_BUILTIN_STFSR:
11972 case SPARC_BUILTIN_ALIGNADDR:
11973 case SPARC_BUILTIN_WRGSR:
11974 case SPARC_BUILTIN_BMASK:
11975 case SPARC_BUILTIN_CMASK8:
11976 case SPARC_BUILTIN_CMASK16:
11977 case SPARC_BUILTIN_CMASK32:
11978 break;
11979
11980 default:
11981 return build_zero_cst (rtype);
11982 }
11983
11984 switch (code)
11985 {
11986 case SPARC_BUILTIN_FEXPAND:
11987 arg0 = args[0];
11988 STRIP_NOPS (arg0);
11989
11990 if (TREE_CODE (arg0) == VECTOR_CST)
11991 {
11992 tree inner_type = TREE_TYPE (rtype);
11993 unsigned i;
11994
11995 tree_vector_builder n_elts (rtype, VECTOR_CST_NELTS (arg0), 1);
11996 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
11997 {
11998 unsigned HOST_WIDE_INT val
11999 = TREE_INT_CST_LOW (VECTOR_CST_ELT (arg0, i));
12000 n_elts.quick_push (build_int_cst (inner_type, val << 4));
12001 }
12002 return n_elts.build ();
12003 }
12004 break;
12005
12006 case SPARC_BUILTIN_FMUL8X16:
12007 case SPARC_BUILTIN_FMUL8X16AU:
12008 case SPARC_BUILTIN_FMUL8X16AL:
12009 arg0 = args[0];
12010 arg1 = args[1];
12011 STRIP_NOPS (arg0);
12012 STRIP_NOPS (arg1);
12013
12014 if (TREE_CODE (arg0) == VECTOR_CST && TREE_CODE (arg1) == VECTOR_CST)
12015 {
12016 tree inner_type = TREE_TYPE (rtype);
12017 tree_vector_builder n_elts (rtype, VECTOR_CST_NELTS (arg0), 1);
12018 sparc_handle_vis_mul8x16 (&n_elts, code, inner_type, arg0, arg1);
12019 return n_elts.build ();
12020 }
12021 break;
12022
12023 case SPARC_BUILTIN_FPMERGE:
12024 arg0 = args[0];
12025 arg1 = args[1];
12026 STRIP_NOPS (arg0);
12027 STRIP_NOPS (arg1);
12028
12029 if (TREE_CODE (arg0) == VECTOR_CST && TREE_CODE (arg1) == VECTOR_CST)
12030 {
12031 tree_vector_builder n_elts (rtype, 2 * VECTOR_CST_NELTS (arg0), 1);
12032 unsigned i;
12033 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
12034 {
12035 n_elts.quick_push (VECTOR_CST_ELT (arg0, i));
12036 n_elts.quick_push (VECTOR_CST_ELT (arg1, i));
12037 }
12038
12039 return n_elts.build ();
12040 }
12041 break;
12042
12043 case SPARC_BUILTIN_PDIST:
12044 case SPARC_BUILTIN_PDISTN:
12045 arg0 = args[0];
12046 arg1 = args[1];
12047 STRIP_NOPS (arg0);
12048 STRIP_NOPS (arg1);
12049 if (code == SPARC_BUILTIN_PDIST)
12050 {
12051 arg2 = args[2];
12052 STRIP_NOPS (arg2);
12053 }
12054 else
12055 arg2 = integer_zero_node;
12056
12057 if (TREE_CODE (arg0) == VECTOR_CST
12058 && TREE_CODE (arg1) == VECTOR_CST
12059 && TREE_CODE (arg2) == INTEGER_CST)
12060 {
12061 bool overflow = false;
12062 widest_int result = wi::to_widest (arg2);
12063 widest_int tmp;
12064 unsigned i;
12065
12066 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
12067 {
12068 tree e0 = VECTOR_CST_ELT (arg0, i);
12069 tree e1 = VECTOR_CST_ELT (arg1, i);
12070
12071 wi::overflow_type neg1_ovf, neg2_ovf, add1_ovf, add2_ovf;
12072
12073 tmp = wi::neg (wi::to_widest (e1), &neg1_ovf);
12074 tmp = wi::add (wi::to_widest (e0), tmp, SIGNED, &add1_ovf);
12075 if (wi::neg_p (tmp))
12076 tmp = wi::neg (tmp, &neg2_ovf);
12077 else
12078 neg2_ovf = wi::OVF_NONE;
12079 result = wi::add (result, tmp, SIGNED, &add2_ovf);
12080 overflow |= ((neg1_ovf != wi::OVF_NONE)
12081 | (neg2_ovf != wi::OVF_NONE)
12082 | (add1_ovf != wi::OVF_NONE)
12083 | (add2_ovf != wi::OVF_NONE));
12084 }
12085
12086 gcc_assert (!overflow);
12087
12088 return wide_int_to_tree (rtype, result);
12089 }
12090
12091 default:
12092 break;
12093 }
12094
12095 return NULL_TREE;
12096 }
12097
12098 /* ??? This duplicates information provided to the compiler by the
12099 ??? scheduler description. Some day, teach genautomata to output
12100 ??? the latencies and then CSE will just use that. */
12101
12102 static bool
sparc_rtx_costs(rtx x,machine_mode mode,int outer_code,int opno ATTRIBUTE_UNUSED,int * total,bool speed ATTRIBUTE_UNUSED)12103 sparc_rtx_costs (rtx x, machine_mode mode, int outer_code,
12104 int opno ATTRIBUTE_UNUSED,
12105 int *total, bool speed ATTRIBUTE_UNUSED)
12106 {
12107 int code = GET_CODE (x);
12108 bool float_mode_p = FLOAT_MODE_P (mode);
12109
12110 switch (code)
12111 {
12112 case CONST_INT:
12113 if (SMALL_INT (x))
12114 *total = 0;
12115 else
12116 *total = 2;
12117 return true;
12118
12119 case CONST_WIDE_INT:
12120 *total = 0;
12121 if (!SPARC_SIMM13_P (CONST_WIDE_INT_ELT (x, 0)))
12122 *total += 2;
12123 if (!SPARC_SIMM13_P (CONST_WIDE_INT_ELT (x, 1)))
12124 *total += 2;
12125 return true;
12126
12127 case HIGH:
12128 *total = 2;
12129 return true;
12130
12131 case CONST:
12132 case LABEL_REF:
12133 case SYMBOL_REF:
12134 *total = 4;
12135 return true;
12136
12137 case CONST_DOUBLE:
12138 *total = 8;
12139 return true;
12140
12141 case MEM:
12142 /* If outer-code was a sign or zero extension, a cost
12143 of COSTS_N_INSNS (1) was already added in. This is
12144 why we are subtracting it back out. */
12145 if (outer_code == ZERO_EXTEND)
12146 {
12147 *total = sparc_costs->int_zload - COSTS_N_INSNS (1);
12148 }
12149 else if (outer_code == SIGN_EXTEND)
12150 {
12151 *total = sparc_costs->int_sload - COSTS_N_INSNS (1);
12152 }
12153 else if (float_mode_p)
12154 {
12155 *total = sparc_costs->float_load;
12156 }
12157 else
12158 {
12159 *total = sparc_costs->int_load;
12160 }
12161
12162 return true;
12163
12164 case PLUS:
12165 case MINUS:
12166 if (float_mode_p)
12167 *total = sparc_costs->float_plusminus;
12168 else
12169 *total = COSTS_N_INSNS (1);
12170 return false;
12171
12172 case FMA:
12173 {
12174 rtx sub;
12175
12176 gcc_assert (float_mode_p);
12177 *total = sparc_costs->float_mul;
12178
12179 sub = XEXP (x, 0);
12180 if (GET_CODE (sub) == NEG)
12181 sub = XEXP (sub, 0);
12182 *total += rtx_cost (sub, mode, FMA, 0, speed);
12183
12184 sub = XEXP (x, 2);
12185 if (GET_CODE (sub) == NEG)
12186 sub = XEXP (sub, 0);
12187 *total += rtx_cost (sub, mode, FMA, 2, speed);
12188 return true;
12189 }
12190
12191 case MULT:
12192 if (float_mode_p)
12193 *total = sparc_costs->float_mul;
12194 else if (TARGET_ARCH32 && !TARGET_HARD_MUL)
12195 *total = COSTS_N_INSNS (25);
12196 else
12197 {
12198 int bit_cost;
12199
12200 bit_cost = 0;
12201 if (sparc_costs->int_mul_bit_factor)
12202 {
12203 int nbits;
12204
12205 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
12206 {
12207 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
12208 for (nbits = 0; value != 0; value &= value - 1)
12209 nbits++;
12210 }
12211 else
12212 nbits = 7;
12213
12214 if (nbits < 3)
12215 nbits = 3;
12216 bit_cost = (nbits - 3) / sparc_costs->int_mul_bit_factor;
12217 bit_cost = COSTS_N_INSNS (bit_cost);
12218 }
12219
12220 if (mode == DImode || !TARGET_HARD_MUL)
12221 *total = sparc_costs->int_mulX + bit_cost;
12222 else
12223 *total = sparc_costs->int_mul + bit_cost;
12224 }
12225 return false;
12226
12227 case ASHIFT:
12228 case ASHIFTRT:
12229 case LSHIFTRT:
12230 *total = COSTS_N_INSNS (1) + sparc_costs->shift_penalty;
12231 return false;
12232
12233 case DIV:
12234 case UDIV:
12235 case MOD:
12236 case UMOD:
12237 if (float_mode_p)
12238 {
12239 if (mode == DFmode)
12240 *total = sparc_costs->float_div_df;
12241 else
12242 *total = sparc_costs->float_div_sf;
12243 }
12244 else
12245 {
12246 if (mode == DImode)
12247 *total = sparc_costs->int_divX;
12248 else
12249 *total = sparc_costs->int_div;
12250 }
12251 return false;
12252
12253 case NEG:
12254 if (! float_mode_p)
12255 {
12256 *total = COSTS_N_INSNS (1);
12257 return false;
12258 }
12259 /* FALLTHRU */
12260
12261 case ABS:
12262 case FLOAT:
12263 case UNSIGNED_FLOAT:
12264 case FIX:
12265 case UNSIGNED_FIX:
12266 case FLOAT_EXTEND:
12267 case FLOAT_TRUNCATE:
12268 *total = sparc_costs->float_move;
12269 return false;
12270
12271 case SQRT:
12272 if (mode == DFmode)
12273 *total = sparc_costs->float_sqrt_df;
12274 else
12275 *total = sparc_costs->float_sqrt_sf;
12276 return false;
12277
12278 case COMPARE:
12279 if (float_mode_p)
12280 *total = sparc_costs->float_cmp;
12281 else
12282 *total = COSTS_N_INSNS (1);
12283 return false;
12284
12285 case IF_THEN_ELSE:
12286 if (float_mode_p)
12287 *total = sparc_costs->float_cmove;
12288 else
12289 *total = sparc_costs->int_cmove;
12290 return false;
12291
12292 case IOR:
12293 /* Handle the NAND vector patterns. */
12294 if (sparc_vector_mode_supported_p (mode)
12295 && GET_CODE (XEXP (x, 0)) == NOT
12296 && GET_CODE (XEXP (x, 1)) == NOT)
12297 {
12298 *total = COSTS_N_INSNS (1);
12299 return true;
12300 }
12301 else
12302 return false;
12303
12304 default:
12305 return false;
12306 }
12307 }
12308
12309 /* Return true if CLASS is either GENERAL_REGS or I64_REGS. */
12310
12311 static inline bool
general_or_i64_p(reg_class_t rclass)12312 general_or_i64_p (reg_class_t rclass)
12313 {
12314 return (rclass == GENERAL_REGS || rclass == I64_REGS);
12315 }
12316
12317 /* Implement TARGET_REGISTER_MOVE_COST. */
12318
12319 static int
sparc_register_move_cost(machine_mode mode ATTRIBUTE_UNUSED,reg_class_t from,reg_class_t to)12320 sparc_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
12321 reg_class_t from, reg_class_t to)
12322 {
12323 bool need_memory = false;
12324
12325 /* This helps postreload CSE to eliminate redundant comparisons. */
12326 if (from == NO_REGS || to == NO_REGS)
12327 return 100;
12328
12329 if (from == FPCC_REGS || to == FPCC_REGS)
12330 need_memory = true;
12331 else if ((FP_REG_CLASS_P (from) && general_or_i64_p (to))
12332 || (general_or_i64_p (from) && FP_REG_CLASS_P (to)))
12333 {
12334 if (TARGET_VIS3)
12335 {
12336 int size = GET_MODE_SIZE (mode);
12337 if (size == 8 || size == 4)
12338 {
12339 if (! TARGET_ARCH32 || size == 4)
12340 return 4;
12341 else
12342 return 6;
12343 }
12344 }
12345 need_memory = true;
12346 }
12347
12348 if (need_memory)
12349 {
12350 if (sparc_cpu == PROCESSOR_ULTRASPARC
12351 || sparc_cpu == PROCESSOR_ULTRASPARC3
12352 || sparc_cpu == PROCESSOR_NIAGARA
12353 || sparc_cpu == PROCESSOR_NIAGARA2
12354 || sparc_cpu == PROCESSOR_NIAGARA3
12355 || sparc_cpu == PROCESSOR_NIAGARA4
12356 || sparc_cpu == PROCESSOR_NIAGARA7
12357 || sparc_cpu == PROCESSOR_M8)
12358 return 12;
12359
12360 return 6;
12361 }
12362
12363 return 2;
12364 }
12365
12366 /* Emit the sequence of insns SEQ while preserving the registers REG and REG2.
12367 This is achieved by means of a manual dynamic stack space allocation in
12368 the current frame. We make the assumption that SEQ doesn't contain any
12369 function calls, with the possible exception of calls to the GOT helper. */
12370
12371 static void
emit_and_preserve(rtx seq,rtx reg,rtx reg2)12372 emit_and_preserve (rtx seq, rtx reg, rtx reg2)
12373 {
12374 /* We must preserve the lowest 16 words for the register save area. */
12375 HOST_WIDE_INT offset = 16*UNITS_PER_WORD;
12376 /* We really need only 2 words of fresh stack space. */
12377 HOST_WIDE_INT size = SPARC_STACK_ALIGN (offset + 2*UNITS_PER_WORD);
12378
12379 rtx slot
12380 = gen_rtx_MEM (word_mode, plus_constant (Pmode, stack_pointer_rtx,
12381 SPARC_STACK_BIAS + offset));
12382
12383 emit_insn (gen_stack_pointer_inc (GEN_INT (-size)));
12384 emit_insn (gen_rtx_SET (slot, reg));
12385 if (reg2)
12386 emit_insn (gen_rtx_SET (adjust_address (slot, word_mode, UNITS_PER_WORD),
12387 reg2));
12388 emit_insn (seq);
12389 if (reg2)
12390 emit_insn (gen_rtx_SET (reg2,
12391 adjust_address (slot, word_mode, UNITS_PER_WORD)));
12392 emit_insn (gen_rtx_SET (reg, slot));
12393 emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
12394 }
12395
12396 /* Output the assembler code for a thunk function. THUNK_DECL is the
12397 declaration for the thunk function itself, FUNCTION is the decl for
12398 the target function. DELTA is an immediate constant offset to be
12399 added to THIS. If VCALL_OFFSET is nonzero, the word at address
12400 (*THIS + VCALL_OFFSET) should be additionally added to THIS. */
12401
12402 static void
sparc_output_mi_thunk(FILE * file,tree thunk_fndecl ATTRIBUTE_UNUSED,HOST_WIDE_INT delta,HOST_WIDE_INT vcall_offset,tree function)12403 sparc_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
12404 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
12405 tree function)
12406 {
12407 const char *fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk_fndecl));
12408 rtx this_rtx, funexp;
12409 rtx_insn *insn;
12410 unsigned int int_arg_first;
12411
12412 reload_completed = 1;
12413 epilogue_completed = 1;
12414
12415 emit_note (NOTE_INSN_PROLOGUE_END);
12416
12417 if (TARGET_FLAT)
12418 {
12419 sparc_leaf_function_p = 1;
12420
12421 int_arg_first = SPARC_OUTGOING_INT_ARG_FIRST;
12422 }
12423 else if (flag_delayed_branch)
12424 {
12425 /* We will emit a regular sibcall below, so we need to instruct
12426 output_sibcall that we are in a leaf function. */
12427 sparc_leaf_function_p = crtl->uses_only_leaf_regs = 1;
12428
12429 /* This will cause final.c to invoke leaf_renumber_regs so we
12430 must behave as if we were in a not-yet-leafified function. */
12431 int_arg_first = SPARC_INCOMING_INT_ARG_FIRST;
12432 }
12433 else
12434 {
12435 /* We will emit the sibcall manually below, so we will need to
12436 manually spill non-leaf registers. */
12437 sparc_leaf_function_p = crtl->uses_only_leaf_regs = 0;
12438
12439 /* We really are in a leaf function. */
12440 int_arg_first = SPARC_OUTGOING_INT_ARG_FIRST;
12441 }
12442
12443 /* Find the "this" pointer. Normally in %o0, but in ARCH64 if the function
12444 returns a structure, the structure return pointer is there instead. */
12445 if (TARGET_ARCH64
12446 && aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
12447 this_rtx = gen_rtx_REG (Pmode, int_arg_first + 1);
12448 else
12449 this_rtx = gen_rtx_REG (Pmode, int_arg_first);
12450
12451 /* Add DELTA. When possible use a plain add, otherwise load it into
12452 a register first. */
12453 if (delta)
12454 {
12455 rtx delta_rtx = GEN_INT (delta);
12456
12457 if (! SPARC_SIMM13_P (delta))
12458 {
12459 rtx scratch = gen_rtx_REG (Pmode, 1);
12460 emit_move_insn (scratch, delta_rtx);
12461 delta_rtx = scratch;
12462 }
12463
12464 /* THIS_RTX += DELTA. */
12465 emit_insn (gen_add2_insn (this_rtx, delta_rtx));
12466 }
12467
12468 /* Add the word at address (*THIS_RTX + VCALL_OFFSET). */
12469 if (vcall_offset)
12470 {
12471 rtx vcall_offset_rtx = GEN_INT (vcall_offset);
12472 rtx scratch = gen_rtx_REG (Pmode, 1);
12473
12474 gcc_assert (vcall_offset < 0);
12475
12476 /* SCRATCH = *THIS_RTX. */
12477 emit_move_insn (scratch, gen_rtx_MEM (Pmode, this_rtx));
12478
12479 /* Prepare for adding VCALL_OFFSET. The difficulty is that we
12480 may not have any available scratch register at this point. */
12481 if (SPARC_SIMM13_P (vcall_offset))
12482 ;
12483 /* This is the case if ARCH64 (unless -ffixed-g5 is passed). */
12484 else if (! fixed_regs[5]
12485 /* The below sequence is made up of at least 2 insns,
12486 while the default method may need only one. */
12487 && vcall_offset < -8192)
12488 {
12489 rtx scratch2 = gen_rtx_REG (Pmode, 5);
12490 emit_move_insn (scratch2, vcall_offset_rtx);
12491 vcall_offset_rtx = scratch2;
12492 }
12493 else
12494 {
12495 rtx increment = GEN_INT (-4096);
12496
12497 /* VCALL_OFFSET is a negative number whose typical range can be
12498 estimated as -32768..0 in 32-bit mode. In almost all cases
12499 it is therefore cheaper to emit multiple add insns than
12500 spilling and loading the constant into a register (at least
12501 6 insns). */
12502 while (! SPARC_SIMM13_P (vcall_offset))
12503 {
12504 emit_insn (gen_add2_insn (scratch, increment));
12505 vcall_offset += 4096;
12506 }
12507 vcall_offset_rtx = GEN_INT (vcall_offset); /* cannot be 0 */
12508 }
12509
12510 /* SCRATCH = *(*THIS_RTX + VCALL_OFFSET). */
12511 emit_move_insn (scratch, gen_rtx_MEM (Pmode,
12512 gen_rtx_PLUS (Pmode,
12513 scratch,
12514 vcall_offset_rtx)));
12515
12516 /* THIS_RTX += *(*THIS_RTX + VCALL_OFFSET). */
12517 emit_insn (gen_add2_insn (this_rtx, scratch));
12518 }
12519
12520 /* Generate a tail call to the target function. */
12521 if (! TREE_USED (function))
12522 {
12523 assemble_external (function);
12524 TREE_USED (function) = 1;
12525 }
12526 funexp = XEXP (DECL_RTL (function), 0);
12527
12528 if (flag_delayed_branch)
12529 {
12530 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
12531 insn = emit_call_insn (gen_sibcall (funexp));
12532 SIBLING_CALL_P (insn) = 1;
12533 }
12534 else
12535 {
12536 /* The hoops we have to jump through in order to generate a sibcall
12537 without using delay slots... */
12538 rtx spill_reg, seq, scratch = gen_rtx_REG (Pmode, 1);
12539
12540 if (flag_pic)
12541 {
12542 spill_reg = gen_rtx_REG (word_mode, 15); /* %o7 */
12543 start_sequence ();
12544 load_got_register (); /* clobbers %o7 */
12545 if (!TARGET_VXWORKS_RTP)
12546 pic_offset_table_rtx = got_register_rtx;
12547 scratch = sparc_legitimize_pic_address (funexp, scratch);
12548 seq = get_insns ();
12549 end_sequence ();
12550 emit_and_preserve (seq, spill_reg, pic_offset_table_rtx);
12551 }
12552 else if (TARGET_ARCH32)
12553 {
12554 emit_insn (gen_rtx_SET (scratch,
12555 gen_rtx_HIGH (SImode, funexp)));
12556 emit_insn (gen_rtx_SET (scratch,
12557 gen_rtx_LO_SUM (SImode, scratch, funexp)));
12558 }
12559 else /* TARGET_ARCH64 */
12560 {
12561 switch (sparc_code_model)
12562 {
12563 case CM_MEDLOW:
12564 case CM_MEDMID:
12565 /* The destination can serve as a temporary. */
12566 sparc_emit_set_symbolic_const64 (scratch, funexp, scratch);
12567 break;
12568
12569 case CM_MEDANY:
12570 case CM_EMBMEDANY:
12571 /* The destination cannot serve as a temporary. */
12572 spill_reg = gen_rtx_REG (DImode, 15); /* %o7 */
12573 start_sequence ();
12574 sparc_emit_set_symbolic_const64 (scratch, funexp, spill_reg);
12575 seq = get_insns ();
12576 end_sequence ();
12577 emit_and_preserve (seq, spill_reg, 0);
12578 break;
12579
12580 default:
12581 gcc_unreachable ();
12582 }
12583 }
12584
12585 emit_jump_insn (gen_indirect_jump (scratch));
12586 }
12587
12588 emit_barrier ();
12589
12590 /* Run just enough of rest_of_compilation to get the insns emitted.
12591 There's not really enough bulk here to make other passes such as
12592 instruction scheduling worth while. */
12593 insn = get_insns ();
12594 shorten_branches (insn);
12595 assemble_start_function (thunk_fndecl, fnname);
12596 final_start_function (insn, file, 1);
12597 final (insn, file, 1);
12598 final_end_function ();
12599 assemble_end_function (thunk_fndecl, fnname);
12600
12601 reload_completed = 0;
12602 epilogue_completed = 0;
12603 }
12604
12605 /* Return true if sparc_output_mi_thunk would be able to output the
12606 assembler code for the thunk function specified by the arguments
12607 it is passed, and false otherwise. */
12608 static bool
sparc_can_output_mi_thunk(const_tree thunk_fndecl ATTRIBUTE_UNUSED,HOST_WIDE_INT delta ATTRIBUTE_UNUSED,HOST_WIDE_INT vcall_offset,const_tree function ATTRIBUTE_UNUSED)12609 sparc_can_output_mi_thunk (const_tree thunk_fndecl ATTRIBUTE_UNUSED,
12610 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
12611 HOST_WIDE_INT vcall_offset,
12612 const_tree function ATTRIBUTE_UNUSED)
12613 {
12614 /* Bound the loop used in the default method above. */
12615 return (vcall_offset >= -32768 || ! fixed_regs[5]);
12616 }
12617
12618 /* How to allocate a 'struct machine_function'. */
12619
12620 static struct machine_function *
sparc_init_machine_status(void)12621 sparc_init_machine_status (void)
12622 {
12623 return ggc_cleared_alloc<machine_function> ();
12624 }
12625
12626 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
12627
12628 static unsigned HOST_WIDE_INT
sparc_asan_shadow_offset(void)12629 sparc_asan_shadow_offset (void)
12630 {
12631 return TARGET_ARCH64 ? (HOST_WIDE_INT_1 << 43) : (HOST_WIDE_INT_1 << 29);
12632 }
12633
12634 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
12635 We need to emit DTP-relative relocations. */
12636
12637 static void
sparc_output_dwarf_dtprel(FILE * file,int size,rtx x)12638 sparc_output_dwarf_dtprel (FILE *file, int size, rtx x)
12639 {
12640 switch (size)
12641 {
12642 case 4:
12643 fputs ("\t.word\t%r_tls_dtpoff32(", file);
12644 break;
12645 case 8:
12646 fputs ("\t.xword\t%r_tls_dtpoff64(", file);
12647 break;
12648 default:
12649 gcc_unreachable ();
12650 }
12651 output_addr_const (file, x);
12652 fputs (")", file);
12653 }
12654
12655 /* Do whatever processing is required at the end of a file. */
12656
12657 static void
sparc_file_end(void)12658 sparc_file_end (void)
12659 {
12660 /* If we need to emit the special GOT helper function, do so now. */
12661 if (got_helper_needed)
12662 {
12663 const char *name = XSTR (got_helper_rtx, 0);
12664 #ifdef DWARF2_UNWIND_INFO
12665 bool do_cfi;
12666 #endif
12667
12668 if (USE_HIDDEN_LINKONCE)
12669 {
12670 tree decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
12671 get_identifier (name),
12672 build_function_type_list (void_type_node,
12673 NULL_TREE));
12674 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
12675 NULL_TREE, void_type_node);
12676 TREE_PUBLIC (decl) = 1;
12677 TREE_STATIC (decl) = 1;
12678 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
12679 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
12680 DECL_VISIBILITY_SPECIFIED (decl) = 1;
12681 resolve_unique_section (decl, 0, flag_function_sections);
12682 allocate_struct_function (decl, true);
12683 cfun->is_thunk = 1;
12684 current_function_decl = decl;
12685 init_varasm_status ();
12686 assemble_start_function (decl, name);
12687 }
12688 else
12689 {
12690 const int align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT);
12691 switch_to_section (text_section);
12692 if (align > 0)
12693 ASM_OUTPUT_ALIGN (asm_out_file, align);
12694 ASM_OUTPUT_LABEL (asm_out_file, name);
12695 }
12696
12697 #ifdef DWARF2_UNWIND_INFO
12698 do_cfi = dwarf2out_do_cfi_asm ();
12699 if (do_cfi)
12700 output_asm_insn (".cfi_startproc", NULL);
12701 #endif
12702 if (flag_delayed_branch)
12703 {
12704 output_asm_insn ("jmp\t%%o7+8", NULL);
12705 output_asm_insn (" add\t%%o7, %0, %0", &got_register_rtx);
12706 }
12707 else
12708 {
12709 output_asm_insn ("add\t%%o7, %0, %0", &got_register_rtx);
12710 output_asm_insn ("jmp\t%%o7+8", NULL);
12711 output_asm_insn (" nop", NULL);
12712 }
12713 #ifdef DWARF2_UNWIND_INFO
12714 if (do_cfi)
12715 output_asm_insn (".cfi_endproc", NULL);
12716 #endif
12717 }
12718
12719 if (NEED_INDICATE_EXEC_STACK)
12720 file_end_indicate_exec_stack ();
12721
12722 #ifdef TARGET_SOLARIS
12723 solaris_file_end ();
12724 #endif
12725 }
12726
12727 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
12728 /* Implement TARGET_MANGLE_TYPE. */
12729
12730 static const char *
sparc_mangle_type(const_tree type)12731 sparc_mangle_type (const_tree type)
12732 {
12733 if (TARGET_ARCH32
12734 && TYPE_MAIN_VARIANT (type) == long_double_type_node
12735 && TARGET_LONG_DOUBLE_128)
12736 return "g";
12737
12738 /* For all other types, use normal C++ mangling. */
12739 return NULL;
12740 }
12741 #endif
12742
12743 /* Expand a membar instruction for various use cases. Both the LOAD_STORE
12744 and BEFORE_AFTER arguments of the form X_Y. They are two-bit masks where
12745 bit 0 indicates that X is true, and bit 1 indicates Y is true. */
12746
12747 void
sparc_emit_membar_for_model(enum memmodel model,int load_store,int before_after)12748 sparc_emit_membar_for_model (enum memmodel model,
12749 int load_store, int before_after)
12750 {
12751 /* Bits for the MEMBAR mmask field. */
12752 const int LoadLoad = 1;
12753 const int StoreLoad = 2;
12754 const int LoadStore = 4;
12755 const int StoreStore = 8;
12756
12757 int mm = 0, implied = 0;
12758
12759 switch (sparc_memory_model)
12760 {
12761 case SMM_SC:
12762 /* Sequential Consistency. All memory transactions are immediately
12763 visible in sequential execution order. No barriers needed. */
12764 implied = LoadLoad | StoreLoad | LoadStore | StoreStore;
12765 break;
12766
12767 case SMM_TSO:
12768 /* Total Store Ordering: all memory transactions with store semantics
12769 are followed by an implied StoreStore. */
12770 implied |= StoreStore;
12771
12772 /* If we're not looking for a raw barrer (before+after), then atomic
12773 operations get the benefit of being both load and store. */
12774 if (load_store == 3 && before_after == 1)
12775 implied |= StoreLoad;
12776 /* FALLTHRU */
12777
12778 case SMM_PSO:
12779 /* Partial Store Ordering: all memory transactions with load semantics
12780 are followed by an implied LoadLoad | LoadStore. */
12781 implied |= LoadLoad | LoadStore;
12782
12783 /* If we're not looking for a raw barrer (before+after), then atomic
12784 operations get the benefit of being both load and store. */
12785 if (load_store == 3 && before_after == 2)
12786 implied |= StoreLoad | StoreStore;
12787 /* FALLTHRU */
12788
12789 case SMM_RMO:
12790 /* Relaxed Memory Ordering: no implicit bits. */
12791 break;
12792
12793 default:
12794 gcc_unreachable ();
12795 }
12796
12797 if (before_after & 1)
12798 {
12799 if (is_mm_release (model) || is_mm_acq_rel (model)
12800 || is_mm_seq_cst (model))
12801 {
12802 if (load_store & 1)
12803 mm |= LoadLoad | StoreLoad;
12804 if (load_store & 2)
12805 mm |= LoadStore | StoreStore;
12806 }
12807 }
12808 if (before_after & 2)
12809 {
12810 if (is_mm_acquire (model) || is_mm_acq_rel (model)
12811 || is_mm_seq_cst (model))
12812 {
12813 if (load_store & 1)
12814 mm |= LoadLoad | LoadStore;
12815 if (load_store & 2)
12816 mm |= StoreLoad | StoreStore;
12817 }
12818 }
12819
12820 /* Remove the bits implied by the system memory model. */
12821 mm &= ~implied;
12822
12823 /* For raw barriers (before+after), always emit a barrier.
12824 This will become a compile-time barrier if needed. */
12825 if (mm || before_after == 3)
12826 emit_insn (gen_membar (GEN_INT (mm)));
12827 }
12828
12829 /* Expand code to perform a 8 or 16-bit compare and swap by doing 32-bit
12830 compare and swap on the word containing the byte or half-word. */
12831
12832 static void
sparc_expand_compare_and_swap_12(rtx bool_result,rtx result,rtx mem,rtx oldval,rtx newval)12833 sparc_expand_compare_and_swap_12 (rtx bool_result, rtx result, rtx mem,
12834 rtx oldval, rtx newval)
12835 {
12836 rtx addr1 = force_reg (Pmode, XEXP (mem, 0));
12837 rtx addr = gen_reg_rtx (Pmode);
12838 rtx off = gen_reg_rtx (SImode);
12839 rtx oldv = gen_reg_rtx (SImode);
12840 rtx newv = gen_reg_rtx (SImode);
12841 rtx oldvalue = gen_reg_rtx (SImode);
12842 rtx newvalue = gen_reg_rtx (SImode);
12843 rtx res = gen_reg_rtx (SImode);
12844 rtx resv = gen_reg_rtx (SImode);
12845 rtx memsi, val, mask, cc;
12846
12847 emit_insn (gen_rtx_SET (addr, gen_rtx_AND (Pmode, addr1, GEN_INT (-4))));
12848
12849 if (Pmode != SImode)
12850 addr1 = gen_lowpart (SImode, addr1);
12851 emit_insn (gen_rtx_SET (off, gen_rtx_AND (SImode, addr1, GEN_INT (3))));
12852
12853 memsi = gen_rtx_MEM (SImode, addr);
12854 set_mem_alias_set (memsi, ALIAS_SET_MEMORY_BARRIER);
12855 MEM_VOLATILE_P (memsi) = MEM_VOLATILE_P (mem);
12856
12857 val = copy_to_reg (memsi);
12858
12859 emit_insn (gen_rtx_SET (off,
12860 gen_rtx_XOR (SImode, off,
12861 GEN_INT (GET_MODE (mem) == QImode
12862 ? 3 : 2))));
12863
12864 emit_insn (gen_rtx_SET (off, gen_rtx_ASHIFT (SImode, off, GEN_INT (3))));
12865
12866 if (GET_MODE (mem) == QImode)
12867 mask = force_reg (SImode, GEN_INT (0xff));
12868 else
12869 mask = force_reg (SImode, GEN_INT (0xffff));
12870
12871 emit_insn (gen_rtx_SET (mask, gen_rtx_ASHIFT (SImode, mask, off)));
12872
12873 emit_insn (gen_rtx_SET (val,
12874 gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask),
12875 val)));
12876
12877 oldval = gen_lowpart (SImode, oldval);
12878 emit_insn (gen_rtx_SET (oldv, gen_rtx_ASHIFT (SImode, oldval, off)));
12879
12880 newval = gen_lowpart_common (SImode, newval);
12881 emit_insn (gen_rtx_SET (newv, gen_rtx_ASHIFT (SImode, newval, off)));
12882
12883 emit_insn (gen_rtx_SET (oldv, gen_rtx_AND (SImode, oldv, mask)));
12884
12885 emit_insn (gen_rtx_SET (newv, gen_rtx_AND (SImode, newv, mask)));
12886
12887 rtx_code_label *end_label = gen_label_rtx ();
12888 rtx_code_label *loop_label = gen_label_rtx ();
12889 emit_label (loop_label);
12890
12891 emit_insn (gen_rtx_SET (oldvalue, gen_rtx_IOR (SImode, oldv, val)));
12892
12893 emit_insn (gen_rtx_SET (newvalue, gen_rtx_IOR (SImode, newv, val)));
12894
12895 emit_move_insn (bool_result, const1_rtx);
12896
12897 emit_insn (gen_atomic_compare_and_swapsi_1 (res, memsi, oldvalue, newvalue));
12898
12899 emit_cmp_and_jump_insns (res, oldvalue, EQ, NULL, SImode, 0, end_label);
12900
12901 emit_insn (gen_rtx_SET (resv,
12902 gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask),
12903 res)));
12904
12905 emit_move_insn (bool_result, const0_rtx);
12906
12907 cc = gen_compare_reg_1 (NE, resv, val);
12908 emit_insn (gen_rtx_SET (val, resv));
12909
12910 /* Use cbranchcc4 to separate the compare and branch! */
12911 emit_jump_insn (gen_cbranchcc4 (gen_rtx_NE (VOIDmode, cc, const0_rtx),
12912 cc, const0_rtx, loop_label));
12913
12914 emit_label (end_label);
12915
12916 emit_insn (gen_rtx_SET (res, gen_rtx_AND (SImode, res, mask)));
12917
12918 emit_insn (gen_rtx_SET (res, gen_rtx_LSHIFTRT (SImode, res, off)));
12919
12920 emit_move_insn (result, gen_lowpart (GET_MODE (result), res));
12921 }
12922
12923 /* Expand code to perform a compare-and-swap. */
12924
12925 void
sparc_expand_compare_and_swap(rtx operands[])12926 sparc_expand_compare_and_swap (rtx operands[])
12927 {
12928 rtx bval, retval, mem, oldval, newval;
12929 machine_mode mode;
12930 enum memmodel model;
12931
12932 bval = operands[0];
12933 retval = operands[1];
12934 mem = operands[2];
12935 oldval = operands[3];
12936 newval = operands[4];
12937 model = (enum memmodel) INTVAL (operands[6]);
12938 mode = GET_MODE (mem);
12939
12940 sparc_emit_membar_for_model (model, 3, 1);
12941
12942 if (reg_overlap_mentioned_p (retval, oldval))
12943 oldval = copy_to_reg (oldval);
12944
12945 if (mode == QImode || mode == HImode)
12946 sparc_expand_compare_and_swap_12 (bval, retval, mem, oldval, newval);
12947 else
12948 {
12949 rtx (*gen) (rtx, rtx, rtx, rtx);
12950 rtx x;
12951
12952 if (mode == SImode)
12953 gen = gen_atomic_compare_and_swapsi_1;
12954 else
12955 gen = gen_atomic_compare_and_swapdi_1;
12956 emit_insn (gen (retval, mem, oldval, newval));
12957
12958 x = emit_store_flag (bval, EQ, retval, oldval, mode, 1, 1);
12959 if (x != bval)
12960 convert_move (bval, x, 1);
12961 }
12962
12963 sparc_emit_membar_for_model (model, 3, 2);
12964 }
12965
12966 void
sparc_expand_vec_perm_bmask(machine_mode vmode,rtx sel)12967 sparc_expand_vec_perm_bmask (machine_mode vmode, rtx sel)
12968 {
12969 rtx t_1, t_2, t_3;
12970
12971 sel = gen_lowpart (DImode, sel);
12972 switch (vmode)
12973 {
12974 case E_V2SImode:
12975 /* inp = xxxxxxxAxxxxxxxB */
12976 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
12977 NULL_RTX, 1, OPTAB_DIRECT);
12978 /* t_1 = ....xxxxxxxAxxx. */
12979 sel = expand_simple_binop (SImode, AND, gen_lowpart (SImode, sel),
12980 GEN_INT (3), NULL_RTX, 1, OPTAB_DIRECT);
12981 t_1 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_1),
12982 GEN_INT (0x30000), NULL_RTX, 1, OPTAB_DIRECT);
12983 /* sel = .......B */
12984 /* t_1 = ...A.... */
12985 sel = expand_simple_binop (SImode, IOR, sel, t_1, sel, 1, OPTAB_DIRECT);
12986 /* sel = ...A...B */
12987 sel = expand_mult (SImode, sel, GEN_INT (0x4444), sel, 1);
12988 /* sel = AAAABBBB * 4 */
12989 t_1 = force_reg (SImode, GEN_INT (0x01230123));
12990 /* sel = { A*4, A*4+1, A*4+2, ... } */
12991 break;
12992
12993 case E_V4HImode:
12994 /* inp = xxxAxxxBxxxCxxxD */
12995 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (8),
12996 NULL_RTX, 1, OPTAB_DIRECT);
12997 t_2 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
12998 NULL_RTX, 1, OPTAB_DIRECT);
12999 t_3 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (24),
13000 NULL_RTX, 1, OPTAB_DIRECT);
13001 /* t_1 = ..xxxAxxxBxxxCxx */
13002 /* t_2 = ....xxxAxxxBxxxC */
13003 /* t_3 = ......xxxAxxxBxx */
13004 sel = expand_simple_binop (SImode, AND, gen_lowpart (SImode, sel),
13005 GEN_INT (0x07),
13006 NULL_RTX, 1, OPTAB_DIRECT);
13007 t_1 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_1),
13008 GEN_INT (0x0700),
13009 NULL_RTX, 1, OPTAB_DIRECT);
13010 t_2 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_2),
13011 GEN_INT (0x070000),
13012 NULL_RTX, 1, OPTAB_DIRECT);
13013 t_3 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_3),
13014 GEN_INT (0x07000000),
13015 NULL_RTX, 1, OPTAB_DIRECT);
13016 /* sel = .......D */
13017 /* t_1 = .....C.. */
13018 /* t_2 = ...B.... */
13019 /* t_3 = .A...... */
13020 sel = expand_simple_binop (SImode, IOR, sel, t_1, sel, 1, OPTAB_DIRECT);
13021 t_2 = expand_simple_binop (SImode, IOR, t_2, t_3, t_2, 1, OPTAB_DIRECT);
13022 sel = expand_simple_binop (SImode, IOR, sel, t_2, sel, 1, OPTAB_DIRECT);
13023 /* sel = .A.B.C.D */
13024 sel = expand_mult (SImode, sel, GEN_INT (0x22), sel, 1);
13025 /* sel = AABBCCDD * 2 */
13026 t_1 = force_reg (SImode, GEN_INT (0x01010101));
13027 /* sel = { A*2, A*2+1, B*2, B*2+1, ... } */
13028 break;
13029
13030 case E_V8QImode:
13031 /* input = xAxBxCxDxExFxGxH */
13032 sel = expand_simple_binop (DImode, AND, sel,
13033 GEN_INT ((HOST_WIDE_INT)0x0f0f0f0f << 32
13034 | 0x0f0f0f0f),
13035 NULL_RTX, 1, OPTAB_DIRECT);
13036 /* sel = .A.B.C.D.E.F.G.H */
13037 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (4),
13038 NULL_RTX, 1, OPTAB_DIRECT);
13039 /* t_1 = ..A.B.C.D.E.F.G. */
13040 sel = expand_simple_binop (DImode, IOR, sel, t_1,
13041 NULL_RTX, 1, OPTAB_DIRECT);
13042 /* sel = .AABBCCDDEEFFGGH */
13043 sel = expand_simple_binop (DImode, AND, sel,
13044 GEN_INT ((HOST_WIDE_INT)0xff00ff << 32
13045 | 0xff00ff),
13046 NULL_RTX, 1, OPTAB_DIRECT);
13047 /* sel = ..AB..CD..EF..GH */
13048 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (8),
13049 NULL_RTX, 1, OPTAB_DIRECT);
13050 /* t_1 = ....AB..CD..EF.. */
13051 sel = expand_simple_binop (DImode, IOR, sel, t_1,
13052 NULL_RTX, 1, OPTAB_DIRECT);
13053 /* sel = ..ABABCDCDEFEFGH */
13054 sel = expand_simple_binop (DImode, AND, sel,
13055 GEN_INT ((HOST_WIDE_INT)0xffff << 32 | 0xffff),
13056 NULL_RTX, 1, OPTAB_DIRECT);
13057 /* sel = ....ABCD....EFGH */
13058 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
13059 NULL_RTX, 1, OPTAB_DIRECT);
13060 /* t_1 = ........ABCD.... */
13061 sel = gen_lowpart (SImode, sel);
13062 t_1 = gen_lowpart (SImode, t_1);
13063 break;
13064
13065 default:
13066 gcc_unreachable ();
13067 }
13068
13069 /* Always perform the final addition/merge within the bmask insn. */
13070 emit_insn (gen_bmasksi_vis (gen_reg_rtx (SImode), sel, t_1));
13071 }
13072
13073 /* Implement TARGET_VEC_PERM_CONST. */
13074
13075 static bool
sparc_vectorize_vec_perm_const(machine_mode vmode,rtx target,rtx op0,rtx op1,const vec_perm_indices & sel)13076 sparc_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0,
13077 rtx op1, const vec_perm_indices &sel)
13078 {
13079 if (!TARGET_VIS2)
13080 return false;
13081
13082 /* All 8-byte permutes are supported. */
13083 if (!target)
13084 return GET_MODE_SIZE (vmode) == 8;
13085
13086 /* Force target-independent code to convert constant permutations on other
13087 modes down to V8QI. Rely on this to avoid the complexity of the byte
13088 order of the permutation. */
13089 if (vmode != V8QImode)
13090 return false;
13091
13092 unsigned int i, mask;
13093 for (i = mask = 0; i < 8; ++i)
13094 mask |= (sel[i] & 0xf) << (28 - i*4);
13095 rtx mask_rtx = force_reg (SImode, gen_int_mode (mask, SImode));
13096
13097 emit_insn (gen_bmasksi_vis (gen_reg_rtx (SImode), mask_rtx, const0_rtx));
13098 emit_insn (gen_bshufflev8qi_vis (target, op0, op1));
13099 return true;
13100 }
13101
13102 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
13103
13104 static bool
sparc_frame_pointer_required(void)13105 sparc_frame_pointer_required (void)
13106 {
13107 /* If the stack pointer is dynamically modified in the function, it cannot
13108 serve as the frame pointer. */
13109 if (cfun->calls_alloca)
13110 return true;
13111
13112 /* If the function receives nonlocal gotos, it needs to save the frame
13113 pointer in the nonlocal_goto_save_area object. */
13114 if (cfun->has_nonlocal_label)
13115 return true;
13116
13117 /* In flat mode, that's it. */
13118 if (TARGET_FLAT)
13119 return false;
13120
13121 /* Otherwise, the frame pointer is required if the function isn't leaf, but
13122 we cannot use sparc_leaf_function_p since it hasn't been computed yet. */
13123 return !(optimize > 0 && crtl->is_leaf && only_leaf_regs_used ());
13124 }
13125
13126 /* The way this is structured, we can't eliminate SFP in favor of SP
13127 if the frame pointer is required: we want to use the SFP->HFP elimination
13128 in that case. But the test in update_eliminables doesn't know we are
13129 assuming below that we only do the former elimination. */
13130
13131 static bool
sparc_can_eliminate(const int from ATTRIBUTE_UNUSED,const int to)13132 sparc_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
13133 {
13134 return to == HARD_FRAME_POINTER_REGNUM || !sparc_frame_pointer_required ();
13135 }
13136
13137 /* If !TARGET_FPU, then make the fp registers and fp cc regs fixed so that
13138 they won't be allocated. */
13139
13140 static void
sparc_conditional_register_usage(void)13141 sparc_conditional_register_usage (void)
13142 {
13143 if (PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
13144 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
13145 /* If the user has passed -f{fixed,call-{used,saved}}-g5 */
13146 /* then honor it. */
13147 if (TARGET_ARCH32 && fixed_regs[5])
13148 fixed_regs[5] = 1;
13149 else if (TARGET_ARCH64 && fixed_regs[5] == 2)
13150 fixed_regs[5] = 0;
13151 if (! TARGET_V9)
13152 {
13153 int regno;
13154 for (regno = SPARC_FIRST_V9_FP_REG;
13155 regno <= SPARC_LAST_V9_FP_REG;
13156 regno++)
13157 fixed_regs[regno] = 1;
13158 /* %fcc0 is used by v8 and v9. */
13159 for (regno = SPARC_FIRST_V9_FCC_REG + 1;
13160 regno <= SPARC_LAST_V9_FCC_REG;
13161 regno++)
13162 fixed_regs[regno] = 1;
13163 }
13164 if (! TARGET_FPU)
13165 {
13166 int regno;
13167 for (regno = 32; regno < SPARC_LAST_V9_FCC_REG; regno++)
13168 fixed_regs[regno] = 1;
13169 }
13170 /* If the user has passed -f{fixed,call-{used,saved}}-g2 */
13171 /* then honor it. Likewise with g3 and g4. */
13172 if (fixed_regs[2] == 2)
13173 fixed_regs[2] = ! TARGET_APP_REGS;
13174 if (fixed_regs[3] == 2)
13175 fixed_regs[3] = ! TARGET_APP_REGS;
13176 if (TARGET_ARCH32 && fixed_regs[4] == 2)
13177 fixed_regs[4] = ! TARGET_APP_REGS;
13178 else if (TARGET_CM_EMBMEDANY)
13179 fixed_regs[4] = 1;
13180 else if (fixed_regs[4] == 2)
13181 fixed_regs[4] = 0;
13182 if (TARGET_FLAT)
13183 {
13184 int regno;
13185 /* Disable leaf functions. */
13186 memset (sparc_leaf_regs, 0, FIRST_PSEUDO_REGISTER);
13187 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
13188 leaf_reg_remap [regno] = regno;
13189 }
13190 if (TARGET_VIS)
13191 global_regs[SPARC_GSR_REG] = 1;
13192 }
13193
13194 /* Implement TARGET_USE_PSEUDO_PIC_REG. */
13195
13196 static bool
sparc_use_pseudo_pic_reg(void)13197 sparc_use_pseudo_pic_reg (void)
13198 {
13199 return !TARGET_VXWORKS_RTP && flag_pic;
13200 }
13201
13202 /* Implement TARGET_INIT_PIC_REG. */
13203
13204 static void
sparc_init_pic_reg(void)13205 sparc_init_pic_reg (void)
13206 {
13207 edge entry_edge;
13208 rtx_insn *seq;
13209
13210 /* In PIC mode, we need to always initialize the PIC register if optimization
13211 is enabled, because we are called from IRA and LRA may later force things
13212 to the constant pool for optimization purposes. */
13213 if (!flag_pic || (!crtl->uses_pic_offset_table && !optimize))
13214 return;
13215
13216 start_sequence ();
13217 load_got_register ();
13218 if (!TARGET_VXWORKS_RTP)
13219 emit_move_insn (pic_offset_table_rtx, got_register_rtx);
13220 seq = get_insns ();
13221 end_sequence ();
13222
13223 entry_edge = single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun));
13224 insert_insn_on_edge (seq, entry_edge);
13225 commit_one_edge_insertion (entry_edge);
13226 }
13227
13228 /* Implement TARGET_PREFERRED_RELOAD_CLASS:
13229
13230 - We can't load constants into FP registers.
13231 - We can't load FP constants into integer registers when soft-float,
13232 because there is no soft-float pattern with a r/F constraint.
13233 - We can't load FP constants into integer registers for TFmode unless
13234 it is 0.0L, because there is no movtf pattern with a r/F constraint.
13235 - Try and reload integer constants (symbolic or otherwise) back into
13236 registers directly, rather than having them dumped to memory. */
13237
13238 static reg_class_t
sparc_preferred_reload_class(rtx x,reg_class_t rclass)13239 sparc_preferred_reload_class (rtx x, reg_class_t rclass)
13240 {
13241 machine_mode mode = GET_MODE (x);
13242 if (CONSTANT_P (x))
13243 {
13244 if (FP_REG_CLASS_P (rclass)
13245 || rclass == GENERAL_OR_FP_REGS
13246 || rclass == GENERAL_OR_EXTRA_FP_REGS
13247 || (GET_MODE_CLASS (mode) == MODE_FLOAT && ! TARGET_FPU)
13248 || (mode == TFmode && ! const_zero_operand (x, mode)))
13249 return NO_REGS;
13250
13251 if (GET_MODE_CLASS (mode) == MODE_INT)
13252 return GENERAL_REGS;
13253
13254 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
13255 {
13256 if (! FP_REG_CLASS_P (rclass)
13257 || !(const_zero_operand (x, mode)
13258 || const_all_ones_operand (x, mode)))
13259 return NO_REGS;
13260 }
13261 }
13262
13263 if (TARGET_VIS3
13264 && ! TARGET_ARCH64
13265 && (rclass == EXTRA_FP_REGS
13266 || rclass == GENERAL_OR_EXTRA_FP_REGS))
13267 {
13268 int regno = true_regnum (x);
13269
13270 if (SPARC_INT_REG_P (regno))
13271 return (rclass == EXTRA_FP_REGS
13272 ? FP_REGS : GENERAL_OR_FP_REGS);
13273 }
13274
13275 return rclass;
13276 }
13277
13278 /* Return true if we use LRA instead of reload pass. */
13279
13280 static bool
sparc_lra_p(void)13281 sparc_lra_p (void)
13282 {
13283 return TARGET_LRA;
13284 }
13285
13286 /* Output a wide multiply instruction in V8+ mode. INSN is the instruction,
13287 OPERANDS are its operands and OPCODE is the mnemonic to be used. */
13288
13289 const char *
output_v8plus_mult(rtx_insn * insn,rtx * operands,const char * opcode)13290 output_v8plus_mult (rtx_insn *insn, rtx *operands, const char *opcode)
13291 {
13292 char mulstr[32];
13293
13294 gcc_assert (! TARGET_ARCH64);
13295
13296 if (sparc_check_64 (operands[1], insn) <= 0)
13297 output_asm_insn ("srl\t%L1, 0, %L1", operands);
13298 if (which_alternative == 1)
13299 output_asm_insn ("sllx\t%H1, 32, %H1", operands);
13300 if (GET_CODE (operands[2]) == CONST_INT)
13301 {
13302 if (which_alternative == 1)
13303 {
13304 output_asm_insn ("or\t%L1, %H1, %H1", operands);
13305 sprintf (mulstr, "%s\t%%H1, %%2, %%L0", opcode);
13306 output_asm_insn (mulstr, operands);
13307 return "srlx\t%L0, 32, %H0";
13308 }
13309 else
13310 {
13311 output_asm_insn ("sllx\t%H1, 32, %3", operands);
13312 output_asm_insn ("or\t%L1, %3, %3", operands);
13313 sprintf (mulstr, "%s\t%%3, %%2, %%3", opcode);
13314 output_asm_insn (mulstr, operands);
13315 output_asm_insn ("srlx\t%3, 32, %H0", operands);
13316 return "mov\t%3, %L0";
13317 }
13318 }
13319 else if (rtx_equal_p (operands[1], operands[2]))
13320 {
13321 if (which_alternative == 1)
13322 {
13323 output_asm_insn ("or\t%L1, %H1, %H1", operands);
13324 sprintf (mulstr, "%s\t%%H1, %%H1, %%L0", opcode);
13325 output_asm_insn (mulstr, operands);
13326 return "srlx\t%L0, 32, %H0";
13327 }
13328 else
13329 {
13330 output_asm_insn ("sllx\t%H1, 32, %3", operands);
13331 output_asm_insn ("or\t%L1, %3, %3", operands);
13332 sprintf (mulstr, "%s\t%%3, %%3, %%3", opcode);
13333 output_asm_insn (mulstr, operands);
13334 output_asm_insn ("srlx\t%3, 32, %H0", operands);
13335 return "mov\t%3, %L0";
13336 }
13337 }
13338 if (sparc_check_64 (operands[2], insn) <= 0)
13339 output_asm_insn ("srl\t%L2, 0, %L2", operands);
13340 if (which_alternative == 1)
13341 {
13342 output_asm_insn ("or\t%L1, %H1, %H1", operands);
13343 output_asm_insn ("sllx\t%H2, 32, %L1", operands);
13344 output_asm_insn ("or\t%L2, %L1, %L1", operands);
13345 sprintf (mulstr, "%s\t%%H1, %%L1, %%L0", opcode);
13346 output_asm_insn (mulstr, operands);
13347 return "srlx\t%L0, 32, %H0";
13348 }
13349 else
13350 {
13351 output_asm_insn ("sllx\t%H1, 32, %3", operands);
13352 output_asm_insn ("sllx\t%H2, 32, %4", operands);
13353 output_asm_insn ("or\t%L1, %3, %3", operands);
13354 output_asm_insn ("or\t%L2, %4, %4", operands);
13355 sprintf (mulstr, "%s\t%%3, %%4, %%3", opcode);
13356 output_asm_insn (mulstr, operands);
13357 output_asm_insn ("srlx\t%3, 32, %H0", operands);
13358 return "mov\t%3, %L0";
13359 }
13360 }
13361
13362 /* Subroutine of sparc_expand_vector_init. Emit code to initialize
13363 all fields of TARGET to ELT by means of VIS2 BSHUFFLE insn. MODE
13364 and INNER_MODE are the modes describing TARGET. */
13365
13366 static void
vector_init_bshuffle(rtx target,rtx elt,machine_mode mode,machine_mode inner_mode)13367 vector_init_bshuffle (rtx target, rtx elt, machine_mode mode,
13368 machine_mode inner_mode)
13369 {
13370 rtx t1, final_insn, sel;
13371 int bmask;
13372
13373 t1 = gen_reg_rtx (mode);
13374
13375 elt = convert_modes (SImode, inner_mode, elt, true);
13376 emit_move_insn (gen_lowpart(SImode, t1), elt);
13377
13378 switch (mode)
13379 {
13380 case E_V2SImode:
13381 final_insn = gen_bshufflev2si_vis (target, t1, t1);
13382 bmask = 0x45674567;
13383 break;
13384 case E_V4HImode:
13385 final_insn = gen_bshufflev4hi_vis (target, t1, t1);
13386 bmask = 0x67676767;
13387 break;
13388 case E_V8QImode:
13389 final_insn = gen_bshufflev8qi_vis (target, t1, t1);
13390 bmask = 0x77777777;
13391 break;
13392 default:
13393 gcc_unreachable ();
13394 }
13395
13396 sel = force_reg (SImode, GEN_INT (bmask));
13397 emit_insn (gen_bmasksi_vis (gen_reg_rtx (SImode), sel, const0_rtx));
13398 emit_insn (final_insn);
13399 }
13400
13401 /* Subroutine of sparc_expand_vector_init. Emit code to initialize
13402 all fields of TARGET to ELT in V8QI by means of VIS FPMERGE insn. */
13403
13404 static void
vector_init_fpmerge(rtx target,rtx elt)13405 vector_init_fpmerge (rtx target, rtx elt)
13406 {
13407 rtx t1, t2, t2_low, t3, t3_low;
13408
13409 t1 = gen_reg_rtx (V4QImode);
13410 elt = convert_modes (SImode, QImode, elt, true);
13411 emit_move_insn (gen_lowpart (SImode, t1), elt);
13412
13413 t2 = gen_reg_rtx (V8QImode);
13414 t2_low = gen_lowpart (V4QImode, t2);
13415 emit_insn (gen_fpmerge_vis (t2, t1, t1));
13416
13417 t3 = gen_reg_rtx (V8QImode);
13418 t3_low = gen_lowpart (V4QImode, t3);
13419 emit_insn (gen_fpmerge_vis (t3, t2_low, t2_low));
13420
13421 emit_insn (gen_fpmerge_vis (target, t3_low, t3_low));
13422 }
13423
13424 /* Subroutine of sparc_expand_vector_init. Emit code to initialize
13425 all fields of TARGET to ELT in V4HI by means of VIS FALIGNDATA insn. */
13426
13427 static void
vector_init_faligndata(rtx target,rtx elt)13428 vector_init_faligndata (rtx target, rtx elt)
13429 {
13430 rtx t1 = gen_reg_rtx (V4HImode);
13431 int i;
13432
13433 elt = convert_modes (SImode, HImode, elt, true);
13434 emit_move_insn (gen_lowpart (SImode, t1), elt);
13435
13436 emit_insn (gen_alignaddrsi_vis (gen_reg_rtx (SImode),
13437 force_reg (SImode, GEN_INT (6)),
13438 const0_rtx));
13439
13440 for (i = 0; i < 4; i++)
13441 emit_insn (gen_faligndatav4hi_vis (target, t1, target));
13442 }
13443
13444 /* Emit code to initialize TARGET to values for individual fields VALS. */
13445
13446 void
sparc_expand_vector_init(rtx target,rtx vals)13447 sparc_expand_vector_init (rtx target, rtx vals)
13448 {
13449 const machine_mode mode = GET_MODE (target);
13450 const machine_mode inner_mode = GET_MODE_INNER (mode);
13451 const int n_elts = GET_MODE_NUNITS (mode);
13452 int i, n_var = 0;
13453 bool all_same = true;
13454 rtx mem;
13455
13456 for (i = 0; i < n_elts; i++)
13457 {
13458 rtx x = XVECEXP (vals, 0, i);
13459 if (!(CONST_SCALAR_INT_P (x) || CONST_DOUBLE_P (x) || CONST_FIXED_P (x)))
13460 n_var++;
13461
13462 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
13463 all_same = false;
13464 }
13465
13466 if (n_var == 0)
13467 {
13468 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
13469 return;
13470 }
13471
13472 if (GET_MODE_SIZE (inner_mode) == GET_MODE_SIZE (mode))
13473 {
13474 if (GET_MODE_SIZE (inner_mode) == 4)
13475 {
13476 emit_move_insn (gen_lowpart (SImode, target),
13477 gen_lowpart (SImode, XVECEXP (vals, 0, 0)));
13478 return;
13479 }
13480 else if (GET_MODE_SIZE (inner_mode) == 8)
13481 {
13482 emit_move_insn (gen_lowpart (DImode, target),
13483 gen_lowpart (DImode, XVECEXP (vals, 0, 0)));
13484 return;
13485 }
13486 }
13487 else if (GET_MODE_SIZE (inner_mode) == GET_MODE_SIZE (word_mode)
13488 && GET_MODE_SIZE (mode) == 2 * GET_MODE_SIZE (word_mode))
13489 {
13490 emit_move_insn (gen_highpart (word_mode, target),
13491 gen_lowpart (word_mode, XVECEXP (vals, 0, 0)));
13492 emit_move_insn (gen_lowpart (word_mode, target),
13493 gen_lowpart (word_mode, XVECEXP (vals, 0, 1)));
13494 return;
13495 }
13496
13497 if (all_same && GET_MODE_SIZE (mode) == 8)
13498 {
13499 if (TARGET_VIS2)
13500 {
13501 vector_init_bshuffle (target, XVECEXP (vals, 0, 0), mode, inner_mode);
13502 return;
13503 }
13504 if (mode == V8QImode)
13505 {
13506 vector_init_fpmerge (target, XVECEXP (vals, 0, 0));
13507 return;
13508 }
13509 if (mode == V4HImode)
13510 {
13511 vector_init_faligndata (target, XVECEXP (vals, 0, 0));
13512 return;
13513 }
13514 }
13515
13516 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
13517 for (i = 0; i < n_elts; i++)
13518 emit_move_insn (adjust_address_nv (mem, inner_mode,
13519 i * GET_MODE_SIZE (inner_mode)),
13520 XVECEXP (vals, 0, i));
13521 emit_move_insn (target, mem);
13522 }
13523
13524 /* Implement TARGET_SECONDARY_RELOAD. */
13525
13526 static reg_class_t
sparc_secondary_reload(bool in_p,rtx x,reg_class_t rclass_i,machine_mode mode,secondary_reload_info * sri)13527 sparc_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
13528 machine_mode mode, secondary_reload_info *sri)
13529 {
13530 enum reg_class rclass = (enum reg_class) rclass_i;
13531
13532 sri->icode = CODE_FOR_nothing;
13533 sri->extra_cost = 0;
13534
13535 /* We need a temporary when loading/storing a HImode/QImode value
13536 between memory and the FPU registers. This can happen when combine puts
13537 a paradoxical subreg in a float/fix conversion insn. */
13538 if (FP_REG_CLASS_P (rclass)
13539 && (mode == HImode || mode == QImode)
13540 && (GET_CODE (x) == MEM
13541 || ((GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
13542 && true_regnum (x) == -1)))
13543 return GENERAL_REGS;
13544
13545 /* On 32-bit we need a temporary when loading/storing a DFmode value
13546 between unaligned memory and the upper FPU registers. */
13547 if (TARGET_ARCH32
13548 && rclass == EXTRA_FP_REGS
13549 && mode == DFmode
13550 && GET_CODE (x) == MEM
13551 && ! mem_min_alignment (x, 8))
13552 return FP_REGS;
13553
13554 if (((TARGET_CM_MEDANY
13555 && symbolic_operand (x, mode))
13556 || (TARGET_CM_EMBMEDANY
13557 && text_segment_operand (x, mode)))
13558 && ! flag_pic)
13559 {
13560 if (in_p)
13561 sri->icode = direct_optab_handler (reload_in_optab, mode);
13562 else
13563 sri->icode = direct_optab_handler (reload_out_optab, mode);
13564 return NO_REGS;
13565 }
13566
13567 if (TARGET_VIS3 && TARGET_ARCH32)
13568 {
13569 int regno = true_regnum (x);
13570
13571 /* When using VIS3 fp<-->int register moves, on 32-bit we have
13572 to move 8-byte values in 4-byte pieces. This only works via
13573 FP_REGS, and not via EXTRA_FP_REGS. Therefore if we try to
13574 move between EXTRA_FP_REGS and GENERAL_REGS, we will need
13575 an FP_REGS intermediate move. */
13576 if ((rclass == EXTRA_FP_REGS && SPARC_INT_REG_P (regno))
13577 || ((general_or_i64_p (rclass)
13578 || rclass == GENERAL_OR_FP_REGS)
13579 && SPARC_FP_REG_P (regno)))
13580 {
13581 sri->extra_cost = 2;
13582 return FP_REGS;
13583 }
13584 }
13585
13586 return NO_REGS;
13587 }
13588
13589 /* Implement TARGET_SECONDARY_MEMORY_NEEDED.
13590
13591 On SPARC when not VIS3 it is not possible to directly move data
13592 between GENERAL_REGS and FP_REGS. */
13593
13594 static bool
sparc_secondary_memory_needed(machine_mode mode,reg_class_t class1,reg_class_t class2)13595 sparc_secondary_memory_needed (machine_mode mode, reg_class_t class1,
13596 reg_class_t class2)
13597 {
13598 return ((FP_REG_CLASS_P (class1) != FP_REG_CLASS_P (class2))
13599 && (! TARGET_VIS3
13600 || GET_MODE_SIZE (mode) > 8
13601 || GET_MODE_SIZE (mode) < 4));
13602 }
13603
13604 /* Implement TARGET_SECONDARY_MEMORY_NEEDED_MODE.
13605
13606 get_secondary_mem widens its argument to BITS_PER_WORD which loses on v9
13607 because the movsi and movsf patterns don't handle r/f moves.
13608 For v8 we copy the default definition. */
13609
13610 static machine_mode
sparc_secondary_memory_needed_mode(machine_mode mode)13611 sparc_secondary_memory_needed_mode (machine_mode mode)
13612 {
13613 if (TARGET_ARCH64)
13614 {
13615 if (GET_MODE_BITSIZE (mode) < 32)
13616 return mode_for_size (32, GET_MODE_CLASS (mode), 0).require ();
13617 return mode;
13618 }
13619 else
13620 {
13621 if (GET_MODE_BITSIZE (mode) < BITS_PER_WORD)
13622 return mode_for_size (BITS_PER_WORD,
13623 GET_MODE_CLASS (mode), 0).require ();
13624 return mode;
13625 }
13626 }
13627
13628 /* Emit code to conditionally move either OPERANDS[2] or OPERANDS[3] into
13629 OPERANDS[0] in MODE. OPERANDS[1] is the operator of the condition. */
13630
13631 bool
sparc_expand_conditional_move(machine_mode mode,rtx * operands)13632 sparc_expand_conditional_move (machine_mode mode, rtx *operands)
13633 {
13634 enum rtx_code rc = GET_CODE (operands[1]);
13635 machine_mode cmp_mode;
13636 rtx cc_reg, dst, cmp;
13637
13638 cmp = operands[1];
13639 if (GET_MODE (XEXP (cmp, 0)) == DImode && !TARGET_ARCH64)
13640 return false;
13641
13642 if (GET_MODE (XEXP (cmp, 0)) == TFmode && !TARGET_HARD_QUAD)
13643 cmp = sparc_emit_float_lib_cmp (XEXP (cmp, 0), XEXP (cmp, 1), rc);
13644
13645 cmp_mode = GET_MODE (XEXP (cmp, 0));
13646 rc = GET_CODE (cmp);
13647
13648 dst = operands[0];
13649 if (! rtx_equal_p (operands[2], dst)
13650 && ! rtx_equal_p (operands[3], dst))
13651 {
13652 if (reg_overlap_mentioned_p (dst, cmp))
13653 dst = gen_reg_rtx (mode);
13654
13655 emit_move_insn (dst, operands[3]);
13656 }
13657 else if (operands[2] == dst)
13658 {
13659 operands[2] = operands[3];
13660
13661 if (GET_MODE_CLASS (cmp_mode) == MODE_FLOAT)
13662 rc = reverse_condition_maybe_unordered (rc);
13663 else
13664 rc = reverse_condition (rc);
13665 }
13666
13667 if (XEXP (cmp, 1) == const0_rtx
13668 && GET_CODE (XEXP (cmp, 0)) == REG
13669 && cmp_mode == DImode
13670 && v9_regcmp_p (rc))
13671 cc_reg = XEXP (cmp, 0);
13672 else
13673 cc_reg = gen_compare_reg_1 (rc, XEXP (cmp, 0), XEXP (cmp, 1));
13674
13675 cmp = gen_rtx_fmt_ee (rc, GET_MODE (cc_reg), cc_reg, const0_rtx);
13676
13677 emit_insn (gen_rtx_SET (dst,
13678 gen_rtx_IF_THEN_ELSE (mode, cmp, operands[2], dst)));
13679
13680 if (dst != operands[0])
13681 emit_move_insn (operands[0], dst);
13682
13683 return true;
13684 }
13685
13686 /* Emit code to conditionally move a combination of OPERANDS[1] and OPERANDS[2]
13687 into OPERANDS[0] in MODE, depending on the outcome of the comparison of
13688 OPERANDS[4] and OPERANDS[5]. OPERANDS[3] is the operator of the condition.
13689 FCODE is the machine code to be used for OPERANDS[3] and CCODE the machine
13690 code to be used for the condition mask. */
13691
13692 void
sparc_expand_vcond(machine_mode mode,rtx * operands,int ccode,int fcode)13693 sparc_expand_vcond (machine_mode mode, rtx *operands, int ccode, int fcode)
13694 {
13695 rtx mask, cop0, cop1, fcmp, cmask, bshuf, gsr;
13696 enum rtx_code code = GET_CODE (operands[3]);
13697
13698 mask = gen_reg_rtx (Pmode);
13699 cop0 = operands[4];
13700 cop1 = operands[5];
13701 if (code == LT || code == GE)
13702 {
13703 rtx t;
13704
13705 code = swap_condition (code);
13706 t = cop0; cop0 = cop1; cop1 = t;
13707 }
13708
13709 gsr = gen_rtx_REG (DImode, SPARC_GSR_REG);
13710
13711 fcmp = gen_rtx_UNSPEC (Pmode,
13712 gen_rtvec (1, gen_rtx_fmt_ee (code, mode, cop0, cop1)),
13713 fcode);
13714
13715 cmask = gen_rtx_UNSPEC (DImode,
13716 gen_rtvec (2, mask, gsr),
13717 ccode);
13718
13719 bshuf = gen_rtx_UNSPEC (mode,
13720 gen_rtvec (3, operands[1], operands[2], gsr),
13721 UNSPEC_BSHUFFLE);
13722
13723 emit_insn (gen_rtx_SET (mask, fcmp));
13724 emit_insn (gen_rtx_SET (gsr, cmask));
13725
13726 emit_insn (gen_rtx_SET (operands[0], bshuf));
13727 }
13728
13729 /* On sparc, any mode which naturally allocates into the float
13730 registers should return 4 here. */
13731
13732 unsigned int
sparc_regmode_natural_size(machine_mode mode)13733 sparc_regmode_natural_size (machine_mode mode)
13734 {
13735 int size = UNITS_PER_WORD;
13736
13737 if (TARGET_ARCH64)
13738 {
13739 enum mode_class mclass = GET_MODE_CLASS (mode);
13740
13741 if (mclass == MODE_FLOAT || mclass == MODE_VECTOR_INT)
13742 size = 4;
13743 }
13744
13745 return size;
13746 }
13747
13748 /* Implement TARGET_HARD_REGNO_NREGS.
13749
13750 On SPARC, ordinary registers hold 32 bits worth; this means both
13751 integer and floating point registers. On v9, integer regs hold 64
13752 bits worth; floating point regs hold 32 bits worth (this includes the
13753 new fp regs as even the odd ones are included in the hard register
13754 count). */
13755
13756 static unsigned int
sparc_hard_regno_nregs(unsigned int regno,machine_mode mode)13757 sparc_hard_regno_nregs (unsigned int regno, machine_mode mode)
13758 {
13759 if (regno == SPARC_GSR_REG)
13760 return 1;
13761 if (TARGET_ARCH64)
13762 {
13763 if (SPARC_INT_REG_P (regno) || regno == FRAME_POINTER_REGNUM)
13764 return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
13765 return CEIL (GET_MODE_SIZE (mode), 4);
13766 }
13767 return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
13768 }
13769
13770 /* Implement TARGET_HARD_REGNO_MODE_OK.
13771
13772 ??? Because of the funny way we pass parameters we should allow certain
13773 ??? types of float/complex values to be in integer registers during
13774 ??? RTL generation. This only matters on arch32. */
13775
13776 static bool
sparc_hard_regno_mode_ok(unsigned int regno,machine_mode mode)13777 sparc_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
13778 {
13779 return (hard_regno_mode_classes[regno] & sparc_mode_class[mode]) != 0;
13780 }
13781
13782 /* Implement TARGET_MODES_TIEABLE_P.
13783
13784 For V9 we have to deal with the fact that only the lower 32 floating
13785 point registers are 32-bit addressable. */
13786
13787 static bool
sparc_modes_tieable_p(machine_mode mode1,machine_mode mode2)13788 sparc_modes_tieable_p (machine_mode mode1, machine_mode mode2)
13789 {
13790 enum mode_class mclass1, mclass2;
13791 unsigned short size1, size2;
13792
13793 if (mode1 == mode2)
13794 return true;
13795
13796 mclass1 = GET_MODE_CLASS (mode1);
13797 mclass2 = GET_MODE_CLASS (mode2);
13798 if (mclass1 != mclass2)
13799 return false;
13800
13801 if (! TARGET_V9)
13802 return true;
13803
13804 /* Classes are the same and we are V9 so we have to deal with upper
13805 vs. lower floating point registers. If one of the modes is a
13806 4-byte mode, and the other is not, we have to mark them as not
13807 tieable because only the lower 32 floating point register are
13808 addressable 32-bits at a time.
13809
13810 We can't just test explicitly for SFmode, otherwise we won't
13811 cover the vector mode cases properly. */
13812
13813 if (mclass1 != MODE_FLOAT && mclass1 != MODE_VECTOR_INT)
13814 return true;
13815
13816 size1 = GET_MODE_SIZE (mode1);
13817 size2 = GET_MODE_SIZE (mode2);
13818 if ((size1 > 4 && size2 == 4)
13819 || (size2 > 4 && size1 == 4))
13820 return false;
13821
13822 return true;
13823 }
13824
13825 /* Implement TARGET_CSTORE_MODE. */
13826
13827 static scalar_int_mode
sparc_cstore_mode(enum insn_code icode ATTRIBUTE_UNUSED)13828 sparc_cstore_mode (enum insn_code icode ATTRIBUTE_UNUSED)
13829 {
13830 return (TARGET_ARCH64 ? DImode : SImode);
13831 }
13832
13833 /* Return the compound expression made of T1 and T2. */
13834
13835 static inline tree
compound_expr(tree t1,tree t2)13836 compound_expr (tree t1, tree t2)
13837 {
13838 return build2 (COMPOUND_EXPR, void_type_node, t1, t2);
13839 }
13840
13841 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */
13842
13843 static void
sparc_atomic_assign_expand_fenv(tree * hold,tree * clear,tree * update)13844 sparc_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
13845 {
13846 if (!TARGET_FPU)
13847 return;
13848
13849 const unsigned HOST_WIDE_INT accrued_exception_mask = 0x1f << 5;
13850 const unsigned HOST_WIDE_INT trap_enable_mask = 0x1f << 23;
13851
13852 /* We generate the equivalent of feholdexcept (&fenv_var):
13853
13854 unsigned int fenv_var;
13855 __builtin_store_fsr (&fenv_var);
13856
13857 unsigned int tmp1_var;
13858 tmp1_var = fenv_var & ~(accrued_exception_mask | trap_enable_mask);
13859
13860 __builtin_load_fsr (&tmp1_var); */
13861
13862 tree fenv_var = create_tmp_var_raw (unsigned_type_node);
13863 TREE_ADDRESSABLE (fenv_var) = 1;
13864 tree fenv_addr = build_fold_addr_expr (fenv_var);
13865 tree stfsr = sparc_builtins[SPARC_BUILTIN_STFSR];
13866 tree hold_stfsr
13867 = build4 (TARGET_EXPR, unsigned_type_node, fenv_var,
13868 build_call_expr (stfsr, 1, fenv_addr), NULL_TREE, NULL_TREE);
13869
13870 tree tmp1_var = create_tmp_var_raw (unsigned_type_node);
13871 TREE_ADDRESSABLE (tmp1_var) = 1;
13872 tree masked_fenv_var
13873 = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_var,
13874 build_int_cst (unsigned_type_node,
13875 ~(accrued_exception_mask | trap_enable_mask)));
13876 tree hold_mask
13877 = build4 (TARGET_EXPR, unsigned_type_node, tmp1_var, masked_fenv_var,
13878 NULL_TREE, NULL_TREE);
13879
13880 tree tmp1_addr = build_fold_addr_expr (tmp1_var);
13881 tree ldfsr = sparc_builtins[SPARC_BUILTIN_LDFSR];
13882 tree hold_ldfsr = build_call_expr (ldfsr, 1, tmp1_addr);
13883
13884 *hold = compound_expr (compound_expr (hold_stfsr, hold_mask), hold_ldfsr);
13885
13886 /* We reload the value of tmp1_var to clear the exceptions:
13887
13888 __builtin_load_fsr (&tmp1_var); */
13889
13890 *clear = build_call_expr (ldfsr, 1, tmp1_addr);
13891
13892 /* We generate the equivalent of feupdateenv (&fenv_var):
13893
13894 unsigned int tmp2_var;
13895 __builtin_store_fsr (&tmp2_var);
13896
13897 __builtin_load_fsr (&fenv_var);
13898
13899 if (SPARC_LOW_FE_EXCEPT_VALUES)
13900 tmp2_var >>= 5;
13901 __atomic_feraiseexcept ((int) tmp2_var); */
13902
13903 tree tmp2_var = create_tmp_var_raw (unsigned_type_node);
13904 TREE_ADDRESSABLE (tmp2_var) = 1;
13905 tree tmp2_addr = build_fold_addr_expr (tmp2_var);
13906 tree update_stfsr
13907 = build4 (TARGET_EXPR, unsigned_type_node, tmp2_var,
13908 build_call_expr (stfsr, 1, tmp2_addr), NULL_TREE, NULL_TREE);
13909
13910 tree update_ldfsr = build_call_expr (ldfsr, 1, fenv_addr);
13911
13912 tree atomic_feraiseexcept
13913 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
13914 tree update_call
13915 = build_call_expr (atomic_feraiseexcept, 1,
13916 fold_convert (integer_type_node, tmp2_var));
13917
13918 if (SPARC_LOW_FE_EXCEPT_VALUES)
13919 {
13920 tree shifted_tmp2_var
13921 = build2 (RSHIFT_EXPR, unsigned_type_node, tmp2_var,
13922 build_int_cst (unsigned_type_node, 5));
13923 tree update_shift
13924 = build2 (MODIFY_EXPR, void_type_node, tmp2_var, shifted_tmp2_var);
13925 update_call = compound_expr (update_shift, update_call);
13926 }
13927
13928 *update
13929 = compound_expr (compound_expr (update_stfsr, update_ldfsr), update_call);
13930 }
13931
13932 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. Borrowed from the PA port.
13933
13934 SImode loads to floating-point registers are not zero-extended.
13935 The definition for LOAD_EXTEND_OP specifies that integer loads
13936 narrower than BITS_PER_WORD will be zero-extended. As a result,
13937 we inhibit changes from SImode unless they are to a mode that is
13938 identical in size.
13939
13940 Likewise for SFmode, since word-mode paradoxical subregs are
13941 problematic on big-endian architectures. */
13942
13943 static bool
sparc_can_change_mode_class(machine_mode from,machine_mode to,reg_class_t rclass)13944 sparc_can_change_mode_class (machine_mode from, machine_mode to,
13945 reg_class_t rclass)
13946 {
13947 if (TARGET_ARCH64
13948 && GET_MODE_SIZE (from) == 4
13949 && GET_MODE_SIZE (to) != 4)
13950 return !reg_classes_intersect_p (rclass, FP_REGS);
13951 return true;
13952 }
13953
13954 /* Implement TARGET_CONSTANT_ALIGNMENT. */
13955
13956 static HOST_WIDE_INT
sparc_constant_alignment(const_tree exp,HOST_WIDE_INT align)13957 sparc_constant_alignment (const_tree exp, HOST_WIDE_INT align)
13958 {
13959 if (TREE_CODE (exp) == STRING_CST)
13960 return MAX (align, FASTEST_ALIGNMENT);
13961 return align;
13962 }
13963
13964 #include "gt-sparc.h"
13965