1 /* Subroutines for insn-output.c for SPARC.
2    Copyright (C) 1987-2016 Free Software Foundation, Inc.
3    Contributed by Michael Tiemann (tiemann@cygnus.com)
4    64-bit SPARC-V9 support by Michael Tiemann, Jim Wilson, and Doug Evans,
5    at Cygnus Support.
6 
7 This file is part of GCC.
8 
9 GCC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3, or (at your option)
12 any later version.
13 
14 GCC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17 GNU General Public License for more details.
18 
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3.  If not see
21 <http://www.gnu.org/licenses/>.  */
22 
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "backend.h"
27 #include "target.h"
28 #include "rtl.h"
29 #include "tree.h"
30 #include "gimple.h"
31 #include "df.h"
32 #include "tm_p.h"
33 #include "stringpool.h"
34 #include "expmed.h"
35 #include "optabs.h"
36 #include "regs.h"
37 #include "emit-rtl.h"
38 #include "recog.h"
39 #include "diagnostic-core.h"
40 #include "alias.h"
41 #include "fold-const.h"
42 #include "stor-layout.h"
43 #include "calls.h"
44 #include "varasm.h"
45 #include "output.h"
46 #include "insn-attr.h"
47 #include "explow.h"
48 #include "expr.h"
49 #include "debug.h"
50 #include "common/common-target.h"
51 #include "gimplify.h"
52 #include "langhooks.h"
53 #include "reload.h"
54 #include "params.h"
55 #include "tree-pass.h"
56 #include "context.h"
57 #include "builtins.h"
58 
59 /* This file should be included last.  */
60 #include "target-def.h"
61 
62 /* Processor costs */
63 
64 struct processor_costs {
65   /* Integer load */
66   const int int_load;
67 
68   /* Integer signed load */
69   const int int_sload;
70 
71   /* Integer zeroed load */
72   const int int_zload;
73 
74   /* Float load */
75   const int float_load;
76 
77   /* fmov, fneg, fabs */
78   const int float_move;
79 
80   /* fadd, fsub */
81   const int float_plusminus;
82 
83   /* fcmp */
84   const int float_cmp;
85 
86   /* fmov, fmovr */
87   const int float_cmove;
88 
89   /* fmul */
90   const int float_mul;
91 
92   /* fdivs */
93   const int float_div_sf;
94 
95   /* fdivd */
96   const int float_div_df;
97 
98   /* fsqrts */
99   const int float_sqrt_sf;
100 
101   /* fsqrtd */
102   const int float_sqrt_df;
103 
104   /* umul/smul */
105   const int int_mul;
106 
107   /* mulX */
108   const int int_mulX;
109 
110   /* integer multiply cost for each bit set past the most
111      significant 3, so the formula for multiply cost becomes:
112 
113 	if (rs1 < 0)
114 	  highest_bit = highest_clear_bit(rs1);
115 	else
116 	  highest_bit = highest_set_bit(rs1);
117 	if (highest_bit < 3)
118 	  highest_bit = 3;
119 	cost = int_mul{,X} + ((highest_bit - 3) / int_mul_bit_factor);
120 
121      A value of zero indicates that the multiply costs is fixed,
122      and not variable.  */
123   const int int_mul_bit_factor;
124 
125   /* udiv/sdiv */
126   const int int_div;
127 
128   /* divX */
129   const int int_divX;
130 
131   /* movcc, movr */
132   const int int_cmove;
133 
134   /* penalty for shifts, due to scheduling rules etc. */
135   const int shift_penalty;
136 };
137 
138 static const
139 struct processor_costs cypress_costs = {
140   COSTS_N_INSNS (2), /* int load */
141   COSTS_N_INSNS (2), /* int signed load */
142   COSTS_N_INSNS (2), /* int zeroed load */
143   COSTS_N_INSNS (2), /* float load */
144   COSTS_N_INSNS (5), /* fmov, fneg, fabs */
145   COSTS_N_INSNS (5), /* fadd, fsub */
146   COSTS_N_INSNS (1), /* fcmp */
147   COSTS_N_INSNS (1), /* fmov, fmovr */
148   COSTS_N_INSNS (7), /* fmul */
149   COSTS_N_INSNS (37), /* fdivs */
150   COSTS_N_INSNS (37), /* fdivd */
151   COSTS_N_INSNS (63), /* fsqrts */
152   COSTS_N_INSNS (63), /* fsqrtd */
153   COSTS_N_INSNS (1), /* imul */
154   COSTS_N_INSNS (1), /* imulX */
155   0, /* imul bit factor */
156   COSTS_N_INSNS (1), /* idiv */
157   COSTS_N_INSNS (1), /* idivX */
158   COSTS_N_INSNS (1), /* movcc/movr */
159   0, /* shift penalty */
160 };
161 
162 static const
163 struct processor_costs supersparc_costs = {
164   COSTS_N_INSNS (1), /* int load */
165   COSTS_N_INSNS (1), /* int signed load */
166   COSTS_N_INSNS (1), /* int zeroed load */
167   COSTS_N_INSNS (0), /* float load */
168   COSTS_N_INSNS (3), /* fmov, fneg, fabs */
169   COSTS_N_INSNS (3), /* fadd, fsub */
170   COSTS_N_INSNS (3), /* fcmp */
171   COSTS_N_INSNS (1), /* fmov, fmovr */
172   COSTS_N_INSNS (3), /* fmul */
173   COSTS_N_INSNS (6), /* fdivs */
174   COSTS_N_INSNS (9), /* fdivd */
175   COSTS_N_INSNS (12), /* fsqrts */
176   COSTS_N_INSNS (12), /* fsqrtd */
177   COSTS_N_INSNS (4), /* imul */
178   COSTS_N_INSNS (4), /* imulX */
179   0, /* imul bit factor */
180   COSTS_N_INSNS (4), /* idiv */
181   COSTS_N_INSNS (4), /* idivX */
182   COSTS_N_INSNS (1), /* movcc/movr */
183   1, /* shift penalty */
184 };
185 
186 static const
187 struct processor_costs hypersparc_costs = {
188   COSTS_N_INSNS (1), /* int load */
189   COSTS_N_INSNS (1), /* int signed load */
190   COSTS_N_INSNS (1), /* int zeroed load */
191   COSTS_N_INSNS (1), /* float load */
192   COSTS_N_INSNS (1), /* fmov, fneg, fabs */
193   COSTS_N_INSNS (1), /* fadd, fsub */
194   COSTS_N_INSNS (1), /* fcmp */
195   COSTS_N_INSNS (1), /* fmov, fmovr */
196   COSTS_N_INSNS (1), /* fmul */
197   COSTS_N_INSNS (8), /* fdivs */
198   COSTS_N_INSNS (12), /* fdivd */
199   COSTS_N_INSNS (17), /* fsqrts */
200   COSTS_N_INSNS (17), /* fsqrtd */
201   COSTS_N_INSNS (17), /* imul */
202   COSTS_N_INSNS (17), /* imulX */
203   0, /* imul bit factor */
204   COSTS_N_INSNS (17), /* idiv */
205   COSTS_N_INSNS (17), /* idivX */
206   COSTS_N_INSNS (1), /* movcc/movr */
207   0, /* shift penalty */
208 };
209 
210 static const
211 struct processor_costs leon_costs = {
212   COSTS_N_INSNS (1), /* int load */
213   COSTS_N_INSNS (1), /* int signed load */
214   COSTS_N_INSNS (1), /* int zeroed load */
215   COSTS_N_INSNS (1), /* float load */
216   COSTS_N_INSNS (1), /* fmov, fneg, fabs */
217   COSTS_N_INSNS (1), /* fadd, fsub */
218   COSTS_N_INSNS (1), /* fcmp */
219   COSTS_N_INSNS (1), /* fmov, fmovr */
220   COSTS_N_INSNS (1), /* fmul */
221   COSTS_N_INSNS (15), /* fdivs */
222   COSTS_N_INSNS (15), /* fdivd */
223   COSTS_N_INSNS (23), /* fsqrts */
224   COSTS_N_INSNS (23), /* fsqrtd */
225   COSTS_N_INSNS (5), /* imul */
226   COSTS_N_INSNS (5), /* imulX */
227   0, /* imul bit factor */
228   COSTS_N_INSNS (5), /* idiv */
229   COSTS_N_INSNS (5), /* idivX */
230   COSTS_N_INSNS (1), /* movcc/movr */
231   0, /* shift penalty */
232 };
233 
234 static const
235 struct processor_costs leon3_costs = {
236   COSTS_N_INSNS (1), /* int load */
237   COSTS_N_INSNS (1), /* int signed load */
238   COSTS_N_INSNS (1), /* int zeroed load */
239   COSTS_N_INSNS (1), /* float load */
240   COSTS_N_INSNS (1), /* fmov, fneg, fabs */
241   COSTS_N_INSNS (1), /* fadd, fsub */
242   COSTS_N_INSNS (1), /* fcmp */
243   COSTS_N_INSNS (1), /* fmov, fmovr */
244   COSTS_N_INSNS (1), /* fmul */
245   COSTS_N_INSNS (14), /* fdivs */
246   COSTS_N_INSNS (15), /* fdivd */
247   COSTS_N_INSNS (22), /* fsqrts */
248   COSTS_N_INSNS (23), /* fsqrtd */
249   COSTS_N_INSNS (5), /* imul */
250   COSTS_N_INSNS (5), /* imulX */
251   0, /* imul bit factor */
252   COSTS_N_INSNS (35), /* idiv */
253   COSTS_N_INSNS (35), /* idivX */
254   COSTS_N_INSNS (1), /* movcc/movr */
255   0, /* shift penalty */
256 };
257 
258 static const
259 struct processor_costs sparclet_costs = {
260   COSTS_N_INSNS (3), /* int load */
261   COSTS_N_INSNS (3), /* int signed load */
262   COSTS_N_INSNS (1), /* int zeroed load */
263   COSTS_N_INSNS (1), /* float load */
264   COSTS_N_INSNS (1), /* fmov, fneg, fabs */
265   COSTS_N_INSNS (1), /* fadd, fsub */
266   COSTS_N_INSNS (1), /* fcmp */
267   COSTS_N_INSNS (1), /* fmov, fmovr */
268   COSTS_N_INSNS (1), /* fmul */
269   COSTS_N_INSNS (1), /* fdivs */
270   COSTS_N_INSNS (1), /* fdivd */
271   COSTS_N_INSNS (1), /* fsqrts */
272   COSTS_N_INSNS (1), /* fsqrtd */
273   COSTS_N_INSNS (5), /* imul */
274   COSTS_N_INSNS (5), /* imulX */
275   0, /* imul bit factor */
276   COSTS_N_INSNS (5), /* idiv */
277   COSTS_N_INSNS (5), /* idivX */
278   COSTS_N_INSNS (1), /* movcc/movr */
279   0, /* shift penalty */
280 };
281 
282 static const
283 struct processor_costs ultrasparc_costs = {
284   COSTS_N_INSNS (2), /* int load */
285   COSTS_N_INSNS (3), /* int signed load */
286   COSTS_N_INSNS (2), /* int zeroed load */
287   COSTS_N_INSNS (2), /* float load */
288   COSTS_N_INSNS (1), /* fmov, fneg, fabs */
289   COSTS_N_INSNS (4), /* fadd, fsub */
290   COSTS_N_INSNS (1), /* fcmp */
291   COSTS_N_INSNS (2), /* fmov, fmovr */
292   COSTS_N_INSNS (4), /* fmul */
293   COSTS_N_INSNS (13), /* fdivs */
294   COSTS_N_INSNS (23), /* fdivd */
295   COSTS_N_INSNS (13), /* fsqrts */
296   COSTS_N_INSNS (23), /* fsqrtd */
297   COSTS_N_INSNS (4), /* imul */
298   COSTS_N_INSNS (4), /* imulX */
299   2, /* imul bit factor */
300   COSTS_N_INSNS (37), /* idiv */
301   COSTS_N_INSNS (68), /* idivX */
302   COSTS_N_INSNS (2), /* movcc/movr */
303   2, /* shift penalty */
304 };
305 
306 static const
307 struct processor_costs ultrasparc3_costs = {
308   COSTS_N_INSNS (2), /* int load */
309   COSTS_N_INSNS (3), /* int signed load */
310   COSTS_N_INSNS (3), /* int zeroed load */
311   COSTS_N_INSNS (2), /* float load */
312   COSTS_N_INSNS (3), /* fmov, fneg, fabs */
313   COSTS_N_INSNS (4), /* fadd, fsub */
314   COSTS_N_INSNS (5), /* fcmp */
315   COSTS_N_INSNS (3), /* fmov, fmovr */
316   COSTS_N_INSNS (4), /* fmul */
317   COSTS_N_INSNS (17), /* fdivs */
318   COSTS_N_INSNS (20), /* fdivd */
319   COSTS_N_INSNS (20), /* fsqrts */
320   COSTS_N_INSNS (29), /* fsqrtd */
321   COSTS_N_INSNS (6), /* imul */
322   COSTS_N_INSNS (6), /* imulX */
323   0, /* imul bit factor */
324   COSTS_N_INSNS (40), /* idiv */
325   COSTS_N_INSNS (71), /* idivX */
326   COSTS_N_INSNS (2), /* movcc/movr */
327   0, /* shift penalty */
328 };
329 
330 static const
331 struct processor_costs niagara_costs = {
332   COSTS_N_INSNS (3), /* int load */
333   COSTS_N_INSNS (3), /* int signed load */
334   COSTS_N_INSNS (3), /* int zeroed load */
335   COSTS_N_INSNS (9), /* float load */
336   COSTS_N_INSNS (8), /* fmov, fneg, fabs */
337   COSTS_N_INSNS (8), /* fadd, fsub */
338   COSTS_N_INSNS (26), /* fcmp */
339   COSTS_N_INSNS (8), /* fmov, fmovr */
340   COSTS_N_INSNS (29), /* fmul */
341   COSTS_N_INSNS (54), /* fdivs */
342   COSTS_N_INSNS (83), /* fdivd */
343   COSTS_N_INSNS (100), /* fsqrts - not implemented in hardware */
344   COSTS_N_INSNS (100), /* fsqrtd - not implemented in hardware */
345   COSTS_N_INSNS (11), /* imul */
346   COSTS_N_INSNS (11), /* imulX */
347   0, /* imul bit factor */
348   COSTS_N_INSNS (72), /* idiv */
349   COSTS_N_INSNS (72), /* idivX */
350   COSTS_N_INSNS (1), /* movcc/movr */
351   0, /* shift penalty */
352 };
353 
354 static const
355 struct processor_costs niagara2_costs = {
356   COSTS_N_INSNS (3), /* int load */
357   COSTS_N_INSNS (3), /* int signed load */
358   COSTS_N_INSNS (3), /* int zeroed load */
359   COSTS_N_INSNS (3), /* float load */
360   COSTS_N_INSNS (6), /* fmov, fneg, fabs */
361   COSTS_N_INSNS (6), /* fadd, fsub */
362   COSTS_N_INSNS (6), /* fcmp */
363   COSTS_N_INSNS (6), /* fmov, fmovr */
364   COSTS_N_INSNS (6), /* fmul */
365   COSTS_N_INSNS (19), /* fdivs */
366   COSTS_N_INSNS (33), /* fdivd */
367   COSTS_N_INSNS (19), /* fsqrts */
368   COSTS_N_INSNS (33), /* fsqrtd */
369   COSTS_N_INSNS (5), /* imul */
370   COSTS_N_INSNS (5), /* imulX */
371   0, /* imul bit factor */
372   COSTS_N_INSNS (26), /* idiv, average of 12 - 41 cycle range */
373   COSTS_N_INSNS (26), /* idivX, average of 12 - 41 cycle range */
374   COSTS_N_INSNS (1), /* movcc/movr */
375   0, /* shift penalty */
376 };
377 
378 static const
379 struct processor_costs niagara3_costs = {
380   COSTS_N_INSNS (3), /* int load */
381   COSTS_N_INSNS (3), /* int signed load */
382   COSTS_N_INSNS (3), /* int zeroed load */
383   COSTS_N_INSNS (3), /* float load */
384   COSTS_N_INSNS (9), /* fmov, fneg, fabs */
385   COSTS_N_INSNS (9), /* fadd, fsub */
386   COSTS_N_INSNS (9), /* fcmp */
387   COSTS_N_INSNS (9), /* fmov, fmovr */
388   COSTS_N_INSNS (9), /* fmul */
389   COSTS_N_INSNS (23), /* fdivs */
390   COSTS_N_INSNS (37), /* fdivd */
391   COSTS_N_INSNS (23), /* fsqrts */
392   COSTS_N_INSNS (37), /* fsqrtd */
393   COSTS_N_INSNS (9), /* imul */
394   COSTS_N_INSNS (9), /* imulX */
395   0, /* imul bit factor */
396   COSTS_N_INSNS (31), /* idiv, average of 17 - 45 cycle range */
397   COSTS_N_INSNS (30), /* idivX, average of 16 - 44 cycle range */
398   COSTS_N_INSNS (1), /* movcc/movr */
399   0, /* shift penalty */
400 };
401 
402 static const
403 struct processor_costs niagara4_costs = {
404   COSTS_N_INSNS (5), /* int load */
405   COSTS_N_INSNS (5), /* int signed load */
406   COSTS_N_INSNS (5), /* int zeroed load */
407   COSTS_N_INSNS (5), /* float load */
408   COSTS_N_INSNS (11), /* fmov, fneg, fabs */
409   COSTS_N_INSNS (11), /* fadd, fsub */
410   COSTS_N_INSNS (11), /* fcmp */
411   COSTS_N_INSNS (11), /* fmov, fmovr */
412   COSTS_N_INSNS (11), /* fmul */
413   COSTS_N_INSNS (24), /* fdivs */
414   COSTS_N_INSNS (37), /* fdivd */
415   COSTS_N_INSNS (24), /* fsqrts */
416   COSTS_N_INSNS (37), /* fsqrtd */
417   COSTS_N_INSNS (12), /* imul */
418   COSTS_N_INSNS (12), /* imulX */
419   0, /* imul bit factor */
420   COSTS_N_INSNS (50), /* idiv, average of 41 - 60 cycle range */
421   COSTS_N_INSNS (35), /* idivX, average of 26 - 44 cycle range */
422   COSTS_N_INSNS (1), /* movcc/movr */
423   0, /* shift penalty */
424 };
425 
426 static const
427 struct processor_costs niagara7_costs = {
428   COSTS_N_INSNS (5), /* int load */
429   COSTS_N_INSNS (5), /* int signed load */
430   COSTS_N_INSNS (5), /* int zeroed load */
431   COSTS_N_INSNS (5), /* float load */
432   COSTS_N_INSNS (11), /* fmov, fneg, fabs */
433   COSTS_N_INSNS (11), /* fadd, fsub */
434   COSTS_N_INSNS (11), /* fcmp */
435   COSTS_N_INSNS (11), /* fmov, fmovr */
436   COSTS_N_INSNS (11), /* fmul */
437   COSTS_N_INSNS (24), /* fdivs */
438   COSTS_N_INSNS (37), /* fdivd */
439   COSTS_N_INSNS (24), /* fsqrts */
440   COSTS_N_INSNS (37), /* fsqrtd */
441   COSTS_N_INSNS (12), /* imul */
442   COSTS_N_INSNS (12), /* imulX */
443   0, /* imul bit factor */
444   COSTS_N_INSNS (51), /* idiv, average of 42 - 61 cycle range */
445   COSTS_N_INSNS (35), /* idivX, average of 26 - 44 cycle range */
446   COSTS_N_INSNS (1), /* movcc/movr */
447   0, /* shift penalty */
448 };
449 
450 static const struct processor_costs *sparc_costs = &cypress_costs;
451 
452 #ifdef HAVE_AS_RELAX_OPTION
453 /* If 'as' and 'ld' are relaxing tail call insns into branch always, use
454    "or %o7,%g0,X; call Y; or X,%g0,%o7" always, so that it can be optimized.
455    With sethi/jmp, neither 'as' nor 'ld' has an easy way how to find out if
456    somebody does not branch between the sethi and jmp.  */
457 #define LEAF_SIBCALL_SLOT_RESERVED_P 1
458 #else
459 #define LEAF_SIBCALL_SLOT_RESERVED_P \
460   ((TARGET_ARCH64 && !TARGET_CM_MEDLOW) || flag_pic)
461 #endif
462 
463 /* Vector to say how input registers are mapped to output registers.
464    HARD_FRAME_POINTER_REGNUM cannot be remapped by this function to
465    eliminate it.  You must use -fomit-frame-pointer to get that.  */
466 char leaf_reg_remap[] =
467 { 0, 1, 2, 3, 4, 5, 6, 7,
468   -1, -1, -1, -1, -1, -1, 14, -1,
469   -1, -1, -1, -1, -1, -1, -1, -1,
470   8, 9, 10, 11, 12, 13, -1, 15,
471 
472   32, 33, 34, 35, 36, 37, 38, 39,
473   40, 41, 42, 43, 44, 45, 46, 47,
474   48, 49, 50, 51, 52, 53, 54, 55,
475   56, 57, 58, 59, 60, 61, 62, 63,
476   64, 65, 66, 67, 68, 69, 70, 71,
477   72, 73, 74, 75, 76, 77, 78, 79,
478   80, 81, 82, 83, 84, 85, 86, 87,
479   88, 89, 90, 91, 92, 93, 94, 95,
480   96, 97, 98, 99, 100, 101, 102};
481 
482 /* Vector, indexed by hard register number, which contains 1
483    for a register that is allowable in a candidate for leaf
484    function treatment.  */
485 char sparc_leaf_regs[] =
486 { 1, 1, 1, 1, 1, 1, 1, 1,
487   0, 0, 0, 0, 0, 0, 1, 0,
488   0, 0, 0, 0, 0, 0, 0, 0,
489   1, 1, 1, 1, 1, 1, 0, 1,
490   1, 1, 1, 1, 1, 1, 1, 1,
491   1, 1, 1, 1, 1, 1, 1, 1,
492   1, 1, 1, 1, 1, 1, 1, 1,
493   1, 1, 1, 1, 1, 1, 1, 1,
494   1, 1, 1, 1, 1, 1, 1, 1,
495   1, 1, 1, 1, 1, 1, 1, 1,
496   1, 1, 1, 1, 1, 1, 1, 1,
497   1, 1, 1, 1, 1, 1, 1, 1,
498   1, 1, 1, 1, 1, 1, 1};
499 
500 struct GTY(()) machine_function
501 {
502   /* Size of the frame of the function.  */
503   HOST_WIDE_INT frame_size;
504 
505   /* Size of the frame of the function minus the register window save area
506      and the outgoing argument area.  */
507   HOST_WIDE_INT apparent_frame_size;
508 
509   /* Register we pretend the frame pointer is allocated to.  Normally, this
510      is %fp, but if we are in a leaf procedure, this is (%sp + offset).  We
511      record "offset" separately as it may be too big for (reg + disp).  */
512   rtx frame_base_reg;
513   HOST_WIDE_INT frame_base_offset;
514 
515   /* Number of global or FP registers to be saved (as 4-byte quantities).  */
516   int n_global_fp_regs;
517 
518   /* True if the current function is leaf and uses only leaf regs,
519      so that the SPARC leaf function optimization can be applied.
520      Private version of crtl->uses_only_leaf_regs, see
521      sparc_expand_prologue for the rationale.  */
522   int leaf_function_p;
523 
524   /* True if the prologue saves local or in registers.  */
525   bool save_local_in_regs_p;
526 
527   /* True if the data calculated by sparc_expand_prologue are valid.  */
528   bool prologue_data_valid_p;
529 };
530 
531 #define sparc_frame_size		cfun->machine->frame_size
532 #define sparc_apparent_frame_size	cfun->machine->apparent_frame_size
533 #define sparc_frame_base_reg		cfun->machine->frame_base_reg
534 #define sparc_frame_base_offset		cfun->machine->frame_base_offset
535 #define sparc_n_global_fp_regs		cfun->machine->n_global_fp_regs
536 #define sparc_leaf_function_p		cfun->machine->leaf_function_p
537 #define sparc_save_local_in_regs_p	cfun->machine->save_local_in_regs_p
538 #define sparc_prologue_data_valid_p	cfun->machine->prologue_data_valid_p
539 
540 /* 1 if the next opcode is to be specially indented.  */
541 int sparc_indent_opcode = 0;
542 
543 static void sparc_option_override (void);
544 static void sparc_init_modes (void);
545 static int function_arg_slotno (const CUMULATIVE_ARGS *, machine_mode,
546 				const_tree, bool, bool, int *, int *);
547 
548 static int supersparc_adjust_cost (rtx_insn *, rtx, rtx_insn *, int);
549 static int hypersparc_adjust_cost (rtx_insn *, rtx, rtx_insn *, int);
550 
551 static void sparc_emit_set_const32 (rtx, rtx);
552 static void sparc_emit_set_const64 (rtx, rtx);
553 static void sparc_output_addr_vec (rtx);
554 static void sparc_output_addr_diff_vec (rtx);
555 static void sparc_output_deferred_case_vectors (void);
556 static bool sparc_legitimate_address_p (machine_mode, rtx, bool);
557 static bool sparc_legitimate_constant_p (machine_mode, rtx);
558 static rtx sparc_builtin_saveregs (void);
559 static int epilogue_renumber (rtx *, int);
560 static bool sparc_assemble_integer (rtx, unsigned int, int);
561 static int set_extends (rtx_insn *);
562 static void sparc_asm_function_prologue (FILE *, HOST_WIDE_INT);
563 static void sparc_asm_function_epilogue (FILE *, HOST_WIDE_INT);
564 #ifdef TARGET_SOLARIS
565 static void sparc_solaris_elf_asm_named_section (const char *, unsigned int,
566 						 tree) ATTRIBUTE_UNUSED;
567 #endif
568 static int sparc_adjust_cost (rtx_insn *, rtx, rtx_insn *, int);
569 static int sparc_issue_rate (void);
570 static void sparc_sched_init (FILE *, int, int);
571 static int sparc_use_sched_lookahead (void);
572 
573 static void emit_soft_tfmode_libcall (const char *, int, rtx *);
574 static void emit_soft_tfmode_binop (enum rtx_code, rtx *);
575 static void emit_soft_tfmode_unop (enum rtx_code, rtx *);
576 static void emit_soft_tfmode_cvt (enum rtx_code, rtx *);
577 static void emit_hard_tfmode_operation (enum rtx_code, rtx *);
578 
579 static bool sparc_function_ok_for_sibcall (tree, tree);
580 static void sparc_init_libfuncs (void);
581 static void sparc_init_builtins (void);
582 static void sparc_fpu_init_builtins (void);
583 static void sparc_vis_init_builtins (void);
584 static tree sparc_builtin_decl (unsigned, bool);
585 static rtx sparc_expand_builtin (tree, rtx, rtx, machine_mode, int);
586 static tree sparc_fold_builtin (tree, int, tree *, bool);
587 static void sparc_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
588 				   HOST_WIDE_INT, tree);
589 static bool sparc_can_output_mi_thunk (const_tree, HOST_WIDE_INT,
590 				       HOST_WIDE_INT, const_tree);
591 static struct machine_function * sparc_init_machine_status (void);
592 static bool sparc_cannot_force_const_mem (machine_mode, rtx);
593 static rtx sparc_tls_get_addr (void);
594 static rtx sparc_tls_got (void);
595 static int sparc_register_move_cost (machine_mode,
596 				     reg_class_t, reg_class_t);
597 static bool sparc_rtx_costs (rtx, machine_mode, int, int, int *, bool);
598 static rtx sparc_function_value (const_tree, const_tree, bool);
599 static rtx sparc_libcall_value (machine_mode, const_rtx);
600 static bool sparc_function_value_regno_p (const unsigned int);
601 static rtx sparc_struct_value_rtx (tree, int);
602 static machine_mode sparc_promote_function_mode (const_tree, machine_mode,
603 						      int *, const_tree, int);
604 static bool sparc_return_in_memory (const_tree, const_tree);
605 static bool sparc_strict_argument_naming (cumulative_args_t);
606 static void sparc_va_start (tree, rtx);
607 static tree sparc_gimplify_va_arg (tree, tree, gimple_seq *, gimple_seq *);
608 static bool sparc_vector_mode_supported_p (machine_mode);
609 static bool sparc_tls_referenced_p (rtx);
610 static rtx sparc_legitimize_tls_address (rtx);
611 static rtx sparc_legitimize_pic_address (rtx, rtx);
612 static rtx sparc_legitimize_address (rtx, rtx, machine_mode);
613 static rtx sparc_delegitimize_address (rtx);
614 static bool sparc_mode_dependent_address_p (const_rtx, addr_space_t);
615 static bool sparc_pass_by_reference (cumulative_args_t,
616 				     machine_mode, const_tree, bool);
617 static void sparc_function_arg_advance (cumulative_args_t,
618 					machine_mode, const_tree, bool);
619 static rtx sparc_function_arg_1 (cumulative_args_t,
620 				 machine_mode, const_tree, bool, bool);
621 static rtx sparc_function_arg (cumulative_args_t,
622 			       machine_mode, const_tree, bool);
623 static rtx sparc_function_incoming_arg (cumulative_args_t,
624 					machine_mode, const_tree, bool);
625 static unsigned int sparc_function_arg_boundary (machine_mode,
626 						 const_tree);
627 static int sparc_arg_partial_bytes (cumulative_args_t,
628 				    machine_mode, tree, bool);
629 static void sparc_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
630 static void sparc_file_end (void);
631 static bool sparc_frame_pointer_required (void);
632 static bool sparc_can_eliminate (const int, const int);
633 static rtx sparc_builtin_setjmp_frame_value (void);
634 static void sparc_conditional_register_usage (void);
635 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
636 static const char *sparc_mangle_type (const_tree);
637 #endif
638 static void sparc_trampoline_init (rtx, tree, rtx);
639 static machine_mode sparc_preferred_simd_mode (machine_mode);
640 static reg_class_t sparc_preferred_reload_class (rtx x, reg_class_t rclass);
641 static bool sparc_print_operand_punct_valid_p (unsigned char);
642 static void sparc_print_operand (FILE *, rtx, int);
643 static void sparc_print_operand_address (FILE *, machine_mode, rtx);
644 static reg_class_t sparc_secondary_reload (bool, rtx, reg_class_t,
645 					   machine_mode,
646 					   secondary_reload_info *);
647 static machine_mode sparc_cstore_mode (enum insn_code icode);
648 static void sparc_atomic_assign_expand_fenv (tree *, tree *, tree *);
649 
650 #ifdef SUBTARGET_ATTRIBUTE_TABLE
651 /* Table of valid machine attributes.  */
652 static const struct attribute_spec sparc_attribute_table[] =
653 {
654   /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
655        do_diagnostic } */
656   SUBTARGET_ATTRIBUTE_TABLE,
657   { NULL,        0, 0, false, false, false, NULL, false }
658 };
659 #endif
660 
661 /* Option handling.  */
662 
663 /* Parsed value.  */
664 enum cmodel sparc_cmodel;
665 
666 char sparc_hard_reg_printed[8];
667 
668 /* Initialize the GCC target structure.  */
669 
670 /* The default is to use .half rather than .short for aligned HI objects.  */
671 #undef TARGET_ASM_ALIGNED_HI_OP
672 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
673 
674 #undef TARGET_ASM_UNALIGNED_HI_OP
675 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uahalf\t"
676 #undef TARGET_ASM_UNALIGNED_SI_OP
677 #define TARGET_ASM_UNALIGNED_SI_OP "\t.uaword\t"
678 #undef TARGET_ASM_UNALIGNED_DI_OP
679 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaxword\t"
680 
681 /* The target hook has to handle DI-mode values.  */
682 #undef TARGET_ASM_INTEGER
683 #define TARGET_ASM_INTEGER sparc_assemble_integer
684 
685 #undef TARGET_ASM_FUNCTION_PROLOGUE
686 #define TARGET_ASM_FUNCTION_PROLOGUE sparc_asm_function_prologue
687 #undef TARGET_ASM_FUNCTION_EPILOGUE
688 #define TARGET_ASM_FUNCTION_EPILOGUE sparc_asm_function_epilogue
689 
690 #undef TARGET_SCHED_ADJUST_COST
691 #define TARGET_SCHED_ADJUST_COST sparc_adjust_cost
692 #undef TARGET_SCHED_ISSUE_RATE
693 #define TARGET_SCHED_ISSUE_RATE sparc_issue_rate
694 #undef TARGET_SCHED_INIT
695 #define TARGET_SCHED_INIT sparc_sched_init
696 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
697 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD sparc_use_sched_lookahead
698 
699 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
700 #define TARGET_FUNCTION_OK_FOR_SIBCALL sparc_function_ok_for_sibcall
701 
702 #undef TARGET_INIT_LIBFUNCS
703 #define TARGET_INIT_LIBFUNCS sparc_init_libfuncs
704 
705 #undef TARGET_LEGITIMIZE_ADDRESS
706 #define TARGET_LEGITIMIZE_ADDRESS sparc_legitimize_address
707 #undef TARGET_DELEGITIMIZE_ADDRESS
708 #define TARGET_DELEGITIMIZE_ADDRESS sparc_delegitimize_address
709 #undef TARGET_MODE_DEPENDENT_ADDRESS_P
710 #define TARGET_MODE_DEPENDENT_ADDRESS_P sparc_mode_dependent_address_p
711 
712 #undef TARGET_INIT_BUILTINS
713 #define TARGET_INIT_BUILTINS sparc_init_builtins
714 #undef TARGET_BUILTIN_DECL
715 #define TARGET_BUILTIN_DECL sparc_builtin_decl
716 #undef TARGET_EXPAND_BUILTIN
717 #define TARGET_EXPAND_BUILTIN sparc_expand_builtin
718 #undef TARGET_FOLD_BUILTIN
719 #define TARGET_FOLD_BUILTIN sparc_fold_builtin
720 
721 #if TARGET_TLS
722 #undef TARGET_HAVE_TLS
723 #define TARGET_HAVE_TLS true
724 #endif
725 
726 #undef TARGET_CANNOT_FORCE_CONST_MEM
727 #define TARGET_CANNOT_FORCE_CONST_MEM sparc_cannot_force_const_mem
728 
729 #undef TARGET_ASM_OUTPUT_MI_THUNK
730 #define TARGET_ASM_OUTPUT_MI_THUNK sparc_output_mi_thunk
731 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
732 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK sparc_can_output_mi_thunk
733 
734 #undef TARGET_RTX_COSTS
735 #define TARGET_RTX_COSTS sparc_rtx_costs
736 #undef TARGET_ADDRESS_COST
737 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
738 #undef TARGET_REGISTER_MOVE_COST
739 #define TARGET_REGISTER_MOVE_COST sparc_register_move_cost
740 
741 #undef TARGET_PROMOTE_FUNCTION_MODE
742 #define TARGET_PROMOTE_FUNCTION_MODE sparc_promote_function_mode
743 
744 #undef TARGET_FUNCTION_VALUE
745 #define TARGET_FUNCTION_VALUE sparc_function_value
746 #undef TARGET_LIBCALL_VALUE
747 #define TARGET_LIBCALL_VALUE sparc_libcall_value
748 #undef TARGET_FUNCTION_VALUE_REGNO_P
749 #define TARGET_FUNCTION_VALUE_REGNO_P sparc_function_value_regno_p
750 
751 #undef TARGET_STRUCT_VALUE_RTX
752 #define TARGET_STRUCT_VALUE_RTX sparc_struct_value_rtx
753 #undef TARGET_RETURN_IN_MEMORY
754 #define TARGET_RETURN_IN_MEMORY sparc_return_in_memory
755 #undef TARGET_MUST_PASS_IN_STACK
756 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
757 #undef TARGET_PASS_BY_REFERENCE
758 #define TARGET_PASS_BY_REFERENCE sparc_pass_by_reference
759 #undef TARGET_ARG_PARTIAL_BYTES
760 #define TARGET_ARG_PARTIAL_BYTES sparc_arg_partial_bytes
761 #undef TARGET_FUNCTION_ARG_ADVANCE
762 #define TARGET_FUNCTION_ARG_ADVANCE sparc_function_arg_advance
763 #undef TARGET_FUNCTION_ARG
764 #define TARGET_FUNCTION_ARG sparc_function_arg
765 #undef TARGET_FUNCTION_INCOMING_ARG
766 #define TARGET_FUNCTION_INCOMING_ARG sparc_function_incoming_arg
767 #undef TARGET_FUNCTION_ARG_BOUNDARY
768 #define TARGET_FUNCTION_ARG_BOUNDARY sparc_function_arg_boundary
769 
770 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
771 #define TARGET_EXPAND_BUILTIN_SAVEREGS sparc_builtin_saveregs
772 #undef TARGET_STRICT_ARGUMENT_NAMING
773 #define TARGET_STRICT_ARGUMENT_NAMING sparc_strict_argument_naming
774 
775 #undef TARGET_EXPAND_BUILTIN_VA_START
776 #define TARGET_EXPAND_BUILTIN_VA_START sparc_va_start
777 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
778 #define TARGET_GIMPLIFY_VA_ARG_EXPR sparc_gimplify_va_arg
779 
780 #undef TARGET_VECTOR_MODE_SUPPORTED_P
781 #define TARGET_VECTOR_MODE_SUPPORTED_P sparc_vector_mode_supported_p
782 
783 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
784 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE sparc_preferred_simd_mode
785 
786 #ifdef SUBTARGET_INSERT_ATTRIBUTES
787 #undef TARGET_INSERT_ATTRIBUTES
788 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
789 #endif
790 
791 #ifdef SUBTARGET_ATTRIBUTE_TABLE
792 #undef TARGET_ATTRIBUTE_TABLE
793 #define TARGET_ATTRIBUTE_TABLE sparc_attribute_table
794 #endif
795 
796 #undef TARGET_OPTION_OVERRIDE
797 #define TARGET_OPTION_OVERRIDE sparc_option_override
798 
799 #if TARGET_GNU_TLS && defined(HAVE_AS_SPARC_UA_PCREL)
800 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
801 #define TARGET_ASM_OUTPUT_DWARF_DTPREL sparc_output_dwarf_dtprel
802 #endif
803 
804 #undef TARGET_ASM_FILE_END
805 #define TARGET_ASM_FILE_END sparc_file_end
806 
807 #undef TARGET_FRAME_POINTER_REQUIRED
808 #define TARGET_FRAME_POINTER_REQUIRED sparc_frame_pointer_required
809 
810 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
811 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE sparc_builtin_setjmp_frame_value
812 
813 #undef TARGET_CAN_ELIMINATE
814 #define TARGET_CAN_ELIMINATE sparc_can_eliminate
815 
816 #undef  TARGET_PREFERRED_RELOAD_CLASS
817 #define TARGET_PREFERRED_RELOAD_CLASS sparc_preferred_reload_class
818 
819 #undef TARGET_SECONDARY_RELOAD
820 #define TARGET_SECONDARY_RELOAD sparc_secondary_reload
821 
822 #undef TARGET_CONDITIONAL_REGISTER_USAGE
823 #define TARGET_CONDITIONAL_REGISTER_USAGE sparc_conditional_register_usage
824 
825 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
826 #undef TARGET_MANGLE_TYPE
827 #define TARGET_MANGLE_TYPE sparc_mangle_type
828 #endif
829 
830 #undef TARGET_LEGITIMATE_ADDRESS_P
831 #define TARGET_LEGITIMATE_ADDRESS_P sparc_legitimate_address_p
832 
833 #undef TARGET_LEGITIMATE_CONSTANT_P
834 #define TARGET_LEGITIMATE_CONSTANT_P sparc_legitimate_constant_p
835 
836 #undef TARGET_TRAMPOLINE_INIT
837 #define TARGET_TRAMPOLINE_INIT sparc_trampoline_init
838 
839 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
840 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P sparc_print_operand_punct_valid_p
841 #undef TARGET_PRINT_OPERAND
842 #define TARGET_PRINT_OPERAND sparc_print_operand
843 #undef TARGET_PRINT_OPERAND_ADDRESS
844 #define TARGET_PRINT_OPERAND_ADDRESS sparc_print_operand_address
845 
846 /* The value stored by LDSTUB.  */
847 #undef TARGET_ATOMIC_TEST_AND_SET_TRUEVAL
848 #define TARGET_ATOMIC_TEST_AND_SET_TRUEVAL 0xff
849 
850 #undef TARGET_CSTORE_MODE
851 #define TARGET_CSTORE_MODE sparc_cstore_mode
852 
853 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
854 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV sparc_atomic_assign_expand_fenv
855 
856 struct gcc_target targetm = TARGET_INITIALIZER;
857 
858 /* Return the memory reference contained in X if any, zero otherwise.  */
859 
860 static rtx
mem_ref(rtx x)861 mem_ref (rtx x)
862 {
863   if (GET_CODE (x) == SIGN_EXTEND || GET_CODE (x) == ZERO_EXTEND)
864     x = XEXP (x, 0);
865 
866   if (MEM_P (x))
867     return x;
868 
869   return NULL_RTX;
870 }
871 
872 /* We use a machine specific pass to enable workarounds for errata.
873    We need to have the (essentially) final form of the insn stream in order
874    to properly detect the various hazards.  Therefore, this machine specific
875    pass runs as late as possible.  The pass is inserted in the pass pipeline
876    at the end of sparc_option_override.  */
877 
878 static unsigned int
sparc_do_work_around_errata(void)879 sparc_do_work_around_errata (void)
880 {
881   rtx_insn *insn, *next;
882 
883   /* Force all instructions to be split into their final form.  */
884   split_all_insns_noflow ();
885 
886   /* Now look for specific patterns in the insn stream.  */
887   for (insn = get_insns (); insn; insn = next)
888     {
889       bool insert_nop = false;
890       rtx set;
891 
892       /* Look into the instruction in a delay slot.  */
893       if (NONJUMP_INSN_P (insn))
894 	if (rtx_sequence *seq = dyn_cast <rtx_sequence *> (PATTERN (insn)))
895 	  insn = seq->insn (1);
896 
897       /* Look for a single-word load into an odd-numbered FP register.  */
898       if (sparc_fix_at697f
899 	  && NONJUMP_INSN_P (insn)
900 	  && (set = single_set (insn)) != NULL_RTX
901 	  && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
902 	  && MEM_P (SET_SRC (set))
903 	  && REG_P (SET_DEST (set))
904 	  && REGNO (SET_DEST (set)) > 31
905 	  && REGNO (SET_DEST (set)) % 2 != 0)
906 	{
907 	  /* The wrong dependency is on the enclosing double register.  */
908 	  const unsigned int x = REGNO (SET_DEST (set)) - 1;
909 	  unsigned int src1, src2, dest;
910 	  int code;
911 
912 	  next = next_active_insn (insn);
913 	  if (!next)
914 	    break;
915 	  /* If the insn is a branch, then it cannot be problematic.  */
916 	  if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE)
917 	    continue;
918 
919 	  extract_insn (next);
920 	  code = INSN_CODE (next);
921 
922 	  switch (code)
923 	    {
924 	    case CODE_FOR_adddf3:
925 	    case CODE_FOR_subdf3:
926 	    case CODE_FOR_muldf3:
927 	    case CODE_FOR_divdf3:
928 	      dest = REGNO (recog_data.operand[0]);
929 	      src1 = REGNO (recog_data.operand[1]);
930 	      src2 = REGNO (recog_data.operand[2]);
931 	      if (src1 != src2)
932 		{
933 		  /* Case [1-4]:
934 				 ld [address], %fx+1
935 				 FPOPd %f{x,y}, %f{y,x}, %f{x,y}  */
936 		  if ((src1 == x || src2 == x)
937 		      && (dest == src1 || dest == src2))
938 		    insert_nop = true;
939 		}
940 	      else
941 		{
942 		  /* Case 5:
943 			     ld [address], %fx+1
944 			     FPOPd %fx, %fx, %fx  */
945 		  if (src1 == x
946 		      && dest == src1
947 		      && (code == CODE_FOR_adddf3 || code == CODE_FOR_muldf3))
948 		    insert_nop = true;
949 		}
950 	      break;
951 
952 	    case CODE_FOR_sqrtdf2:
953 	      dest = REGNO (recog_data.operand[0]);
954 	      src1 = REGNO (recog_data.operand[1]);
955 	      /* Case 6:
956 			 ld [address], %fx+1
957 			 fsqrtd %fx, %fx  */
958 	      if (src1 == x && dest == src1)
959 		insert_nop = true;
960 	      break;
961 
962 	    default:
963 	      break;
964 	    }
965 	}
966 
967       /* Look for a single-word load into an integer register.  */
968       else if (sparc_fix_ut699
969 	       && NONJUMP_INSN_P (insn)
970 	       && (set = single_set (insn)) != NULL_RTX
971 	       && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) <= 4
972 	       && (mem_ref (SET_SRC (set)) != NULL_RTX
973 		   || INSN_CODE (insn) == CODE_FOR_movsi_pic_gotdata_op)
974 	       && REG_P (SET_DEST (set))
975 	       && REGNO (SET_DEST (set)) < 32)
976 	{
977 	  /* There is no problem if the second memory access has a data
978 	     dependency on the first single-cycle load.  */
979 	  rtx x = SET_DEST (set);
980 
981 	  next = next_active_insn (insn);
982 	  if (!next)
983 	    break;
984 	  /* If the insn is a branch, then it cannot be problematic.  */
985 	  if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE)
986 	    continue;
987 
988 	  /* Look for a second memory access to/from an integer register.  */
989 	  if ((set = single_set (next)) != NULL_RTX)
990 	    {
991 	      rtx src = SET_SRC (set);
992 	      rtx dest = SET_DEST (set);
993 	      rtx mem;
994 
995 	      /* LDD is affected.  */
996 	      if ((mem = mem_ref (src)) != NULL_RTX
997 		  && REG_P (dest)
998 		  && REGNO (dest) < 32
999 		  && !reg_mentioned_p (x, XEXP (mem, 0)))
1000 		insert_nop = true;
1001 
1002 	      /* STD is *not* affected.  */
1003 	      else if (MEM_P (dest)
1004 		       && GET_MODE_SIZE (GET_MODE (dest)) <= 4
1005 		       && (src == CONST0_RTX (GET_MODE (dest))
1006 			   || (REG_P (src)
1007 			       && REGNO (src) < 32
1008 			       && REGNO (src) != REGNO (x)))
1009 		       && !reg_mentioned_p (x, XEXP (dest, 0)))
1010 		insert_nop = true;
1011 
1012 	      /* GOT accesses uses LD.  */
1013 	      else if (INSN_CODE (next) == CODE_FOR_movsi_pic_gotdata_op
1014 		       && !reg_mentioned_p (x, XEXP (XEXP (src, 0), 1)))
1015 		insert_nop = true;
1016 	    }
1017 	}
1018 
1019       /* Look for a single-word load/operation into an FP register.  */
1020       else if (sparc_fix_ut699
1021 	       && NONJUMP_INSN_P (insn)
1022 	       && (set = single_set (insn)) != NULL_RTX
1023 	       && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
1024 	       && REG_P (SET_DEST (set))
1025 	       && REGNO (SET_DEST (set)) > 31)
1026 	{
1027 	  /* Number of instructions in the problematic window.  */
1028 	  const int n_insns = 4;
1029 	  /* The problematic combination is with the sibling FP register.  */
1030 	  const unsigned int x = REGNO (SET_DEST (set));
1031 	  const unsigned int y = x ^ 1;
1032 	  rtx_insn *after;
1033 	  int i;
1034 
1035 	  next = next_active_insn (insn);
1036 	  if (!next)
1037 	    break;
1038 	  /* If the insn is a branch, then it cannot be problematic.  */
1039 	  if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE)
1040 	    continue;
1041 
1042 	  /* Look for a second load/operation into the sibling FP register.  */
1043 	  if (!((set = single_set (next)) != NULL_RTX
1044 		&& GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
1045 		&& REG_P (SET_DEST (set))
1046 		&& REGNO (SET_DEST (set)) == y))
1047 	    continue;
1048 
1049 	  /* Look for a (possible) store from the FP register in the next N
1050 	     instructions, but bail out if it is again modified or if there
1051 	     is a store from the sibling FP register before this store.  */
1052 	  for (after = next, i = 0; i < n_insns; i++)
1053 	    {
1054 	      bool branch_p;
1055 
1056 	      after = next_active_insn (after);
1057 	      if (!after)
1058 		break;
1059 
1060 	      /* This is a branch with an empty delay slot.  */
1061 	      if (!NONJUMP_INSN_P (after))
1062 		{
1063 		  if (++i == n_insns)
1064 		    break;
1065 		  branch_p = true;
1066 		  after = NULL;
1067 		}
1068 	      /* This is a branch with a filled delay slot.  */
1069 	      else if (rtx_sequence *seq =
1070 		         dyn_cast <rtx_sequence *> (PATTERN (after)))
1071 		{
1072 		  if (++i == n_insns)
1073 		    break;
1074 		  branch_p = true;
1075 		  after = seq->insn (1);
1076 		}
1077 	      /* This is a regular instruction.  */
1078 	      else
1079 		branch_p = false;
1080 
1081 	      if (after && (set = single_set (after)) != NULL_RTX)
1082 		{
1083 		  const rtx src = SET_SRC (set);
1084 		  const rtx dest = SET_DEST (set);
1085 		  const unsigned int size = GET_MODE_SIZE (GET_MODE (dest));
1086 
1087 		  /* If the FP register is again modified before the store,
1088 		     then the store isn't affected.  */
1089 		  if (REG_P (dest)
1090 		      && (REGNO (dest) == x
1091 			  || (REGNO (dest) == y && size == 8)))
1092 		    break;
1093 
1094 		  if (MEM_P (dest) && REG_P (src))
1095 		    {
1096 		      /* If there is a store from the sibling FP register
1097 			 before the store, then the store is not affected.  */
1098 		      if (REGNO (src) == y || (REGNO (src) == x && size == 8))
1099 			break;
1100 
1101 		      /* Otherwise, the store is affected.  */
1102 		      if (REGNO (src) == x && size == 4)
1103 			{
1104 			  insert_nop = true;
1105 			  break;
1106 			}
1107 		    }
1108 		}
1109 
1110 	      /* If we have a branch in the first M instructions, then we
1111 		 cannot see the (M+2)th instruction so we play safe.  */
1112 	      if (branch_p && i <= (n_insns - 2))
1113 		{
1114 		  insert_nop = true;
1115 		  break;
1116 		}
1117 	    }
1118 	}
1119 
1120       else
1121 	next = NEXT_INSN (insn);
1122 
1123       if (insert_nop)
1124 	emit_insn_before (gen_nop (), next);
1125     }
1126 
1127   return 0;
1128 }
1129 
1130 namespace {
1131 
1132 const pass_data pass_data_work_around_errata =
1133 {
1134   RTL_PASS, /* type */
1135   "errata", /* name */
1136   OPTGROUP_NONE, /* optinfo_flags */
1137   TV_MACH_DEP, /* tv_id */
1138   0, /* properties_required */
1139   0, /* properties_provided */
1140   0, /* properties_destroyed */
1141   0, /* todo_flags_start */
1142   0, /* todo_flags_finish */
1143 };
1144 
1145 class pass_work_around_errata : public rtl_opt_pass
1146 {
1147 public:
pass_work_around_errata(gcc::context * ctxt)1148   pass_work_around_errata(gcc::context *ctxt)
1149     : rtl_opt_pass(pass_data_work_around_errata, ctxt)
1150   {}
1151 
1152   /* opt_pass methods: */
gate(function *)1153   virtual bool gate (function *)
1154     {
1155       /* The only errata we handle are those of the AT697F and UT699.  */
1156       return sparc_fix_at697f != 0 || sparc_fix_ut699 != 0;
1157     }
1158 
execute(function *)1159   virtual unsigned int execute (function *)
1160     {
1161       return sparc_do_work_around_errata ();
1162     }
1163 
1164 }; // class pass_work_around_errata
1165 
1166 } // anon namespace
1167 
1168 rtl_opt_pass *
make_pass_work_around_errata(gcc::context * ctxt)1169 make_pass_work_around_errata (gcc::context *ctxt)
1170 {
1171   return new pass_work_around_errata (ctxt);
1172 }
1173 
1174 /* Helpers for TARGET_DEBUG_OPTIONS.  */
1175 static void
dump_target_flag_bits(const int flags)1176 dump_target_flag_bits (const int flags)
1177 {
1178   if (flags & MASK_64BIT)
1179     fprintf (stderr, "64BIT ");
1180   if (flags & MASK_APP_REGS)
1181     fprintf (stderr, "APP_REGS ");
1182   if (flags & MASK_FASTER_STRUCTS)
1183     fprintf (stderr, "FASTER_STRUCTS ");
1184   if (flags & MASK_FLAT)
1185     fprintf (stderr, "FLAT ");
1186   if (flags & MASK_FMAF)
1187     fprintf (stderr, "FMAF ");
1188   if (flags & MASK_FPU)
1189     fprintf (stderr, "FPU ");
1190   if (flags & MASK_HARD_QUAD)
1191     fprintf (stderr, "HARD_QUAD ");
1192   if (flags & MASK_POPC)
1193     fprintf (stderr, "POPC ");
1194   if (flags & MASK_PTR64)
1195     fprintf (stderr, "PTR64 ");
1196   if (flags & MASK_STACK_BIAS)
1197     fprintf (stderr, "STACK_BIAS ");
1198   if (flags & MASK_UNALIGNED_DOUBLES)
1199     fprintf (stderr, "UNALIGNED_DOUBLES ");
1200   if (flags & MASK_V8PLUS)
1201     fprintf (stderr, "V8PLUS ");
1202   if (flags & MASK_VIS)
1203     fprintf (stderr, "VIS ");
1204   if (flags & MASK_VIS2)
1205     fprintf (stderr, "VIS2 ");
1206   if (flags & MASK_VIS3)
1207     fprintf (stderr, "VIS3 ");
1208   if (flags & MASK_VIS4)
1209     fprintf (stderr, "VIS4 ");
1210   if (flags & MASK_CBCOND)
1211     fprintf (stderr, "CBCOND ");
1212   if (flags & MASK_DEPRECATED_V8_INSNS)
1213     fprintf (stderr, "DEPRECATED_V8_INSNS ");
1214   if (flags & MASK_SPARCLET)
1215     fprintf (stderr, "SPARCLET ");
1216   if (flags & MASK_SPARCLITE)
1217     fprintf (stderr, "SPARCLITE ");
1218   if (flags & MASK_V8)
1219     fprintf (stderr, "V8 ");
1220   if (flags & MASK_V9)
1221     fprintf (stderr, "V9 ");
1222 }
1223 
1224 static void
dump_target_flags(const char * prefix,const int flags)1225 dump_target_flags (const char *prefix, const int flags)
1226 {
1227   fprintf (stderr, "%s: (%08x) [ ", prefix, flags);
1228   dump_target_flag_bits (flags);
1229   fprintf(stderr, "]\n");
1230 }
1231 
1232 /* Validate and override various options, and do some machine dependent
1233    initialization.  */
1234 
1235 static void
sparc_option_override(void)1236 sparc_option_override (void)
1237 {
1238   static struct code_model {
1239     const char *const name;
1240     const enum cmodel value;
1241   } const cmodels[] = {
1242     { "32", CM_32 },
1243     { "medlow", CM_MEDLOW },
1244     { "medmid", CM_MEDMID },
1245     { "medany", CM_MEDANY },
1246     { "embmedany", CM_EMBMEDANY },
1247     { NULL, (enum cmodel) 0 }
1248   };
1249   const struct code_model *cmodel;
1250   /* Map TARGET_CPU_DEFAULT to value for -m{cpu,tune}=.  */
1251   static struct cpu_default {
1252     const int cpu;
1253     const enum processor_type processor;
1254   } const cpu_default[] = {
1255     /* There must be one entry here for each TARGET_CPU value.  */
1256     { TARGET_CPU_sparc, PROCESSOR_CYPRESS },
1257     { TARGET_CPU_v8, PROCESSOR_V8 },
1258     { TARGET_CPU_supersparc, PROCESSOR_SUPERSPARC },
1259     { TARGET_CPU_hypersparc, PROCESSOR_HYPERSPARC },
1260     { TARGET_CPU_leon, PROCESSOR_LEON },
1261     { TARGET_CPU_leon3, PROCESSOR_LEON3 },
1262     { TARGET_CPU_leon3v7, PROCESSOR_LEON3V7 },
1263     { TARGET_CPU_sparclite, PROCESSOR_F930 },
1264     { TARGET_CPU_sparclite86x, PROCESSOR_SPARCLITE86X },
1265     { TARGET_CPU_sparclet, PROCESSOR_TSC701 },
1266     { TARGET_CPU_v9, PROCESSOR_V9 },
1267     { TARGET_CPU_ultrasparc, PROCESSOR_ULTRASPARC },
1268     { TARGET_CPU_ultrasparc3, PROCESSOR_ULTRASPARC3 },
1269     { TARGET_CPU_niagara, PROCESSOR_NIAGARA },
1270     { TARGET_CPU_niagara2, PROCESSOR_NIAGARA2 },
1271     { TARGET_CPU_niagara3, PROCESSOR_NIAGARA3 },
1272     { TARGET_CPU_niagara4, PROCESSOR_NIAGARA4 },
1273     { TARGET_CPU_niagara7, PROCESSOR_NIAGARA7 },
1274     { -1, PROCESSOR_V7 }
1275   };
1276   const struct cpu_default *def;
1277   /* Table of values for -m{cpu,tune}=.  This must match the order of
1278      the enum processor_type in sparc-opts.h.  */
1279   static struct cpu_table {
1280     const char *const name;
1281     const int disable;
1282     const int enable;
1283   } const cpu_table[] = {
1284     { "v7",		MASK_ISA, 0 },
1285     { "cypress",	MASK_ISA, 0 },
1286     { "v8",		MASK_ISA, MASK_V8 },
1287     /* TI TMS390Z55 supersparc */
1288     { "supersparc",	MASK_ISA, MASK_V8 },
1289     { "hypersparc",	MASK_ISA, MASK_V8|MASK_FPU },
1290     { "leon",		MASK_ISA, MASK_V8|MASK_LEON|MASK_FPU },
1291     { "leon3",		MASK_ISA, MASK_V8|MASK_LEON3|MASK_FPU },
1292     { "leon3v7",	MASK_ISA, MASK_LEON3|MASK_FPU },
1293     { "sparclite",	MASK_ISA, MASK_SPARCLITE },
1294     /* The Fujitsu MB86930 is the original sparclite chip, with no FPU.  */
1295     { "f930",		MASK_ISA|MASK_FPU, MASK_SPARCLITE },
1296     /* The Fujitsu MB86934 is the recent sparclite chip, with an FPU.  */
1297     { "f934",		MASK_ISA, MASK_SPARCLITE|MASK_FPU },
1298     { "sparclite86x",	MASK_ISA|MASK_FPU, MASK_SPARCLITE },
1299     { "sparclet",	MASK_ISA, MASK_SPARCLET },
1300     /* TEMIC sparclet */
1301     { "tsc701",		MASK_ISA, MASK_SPARCLET },
1302     { "v9",		MASK_ISA, MASK_V9 },
1303     /* UltraSPARC I, II, IIi */
1304     { "ultrasparc",	MASK_ISA,
1305     /* Although insns using %y are deprecated, it is a clear win.  */
1306       MASK_V9|MASK_DEPRECATED_V8_INSNS },
1307     /* UltraSPARC III */
1308     /* ??? Check if %y issue still holds true.  */
1309     { "ultrasparc3",	MASK_ISA,
1310       MASK_V9|MASK_DEPRECATED_V8_INSNS|MASK_VIS2 },
1311     /* UltraSPARC T1 */
1312     { "niagara",	MASK_ISA,
1313       MASK_V9|MASK_DEPRECATED_V8_INSNS },
1314     /* UltraSPARC T2 */
1315     { "niagara2",	MASK_ISA,
1316       MASK_V9|MASK_POPC|MASK_VIS2 },
1317     /* UltraSPARC T3 */
1318     { "niagara3",	MASK_ISA,
1319       MASK_V9|MASK_POPC|MASK_VIS2|MASK_VIS3|MASK_FMAF },
1320     /* UltraSPARC T4 */
1321     { "niagara4",	MASK_ISA,
1322       MASK_V9|MASK_POPC|MASK_VIS2|MASK_VIS3|MASK_FMAF|MASK_CBCOND },
1323     /* UltraSPARC M7 */
1324     { "niagara7",	MASK_ISA,
1325       MASK_V9|MASK_POPC|MASK_VIS2|MASK_VIS3|MASK_VIS4|MASK_FMAF|MASK_CBCOND },
1326   };
1327   const struct cpu_table *cpu;
1328   unsigned int i;
1329   int fpu;
1330 
1331   if (sparc_debug_string != NULL)
1332     {
1333       const char *q;
1334       char *p;
1335 
1336       p = ASTRDUP (sparc_debug_string);
1337       while ((q = strtok (p, ",")) != NULL)
1338 	{
1339 	  bool invert;
1340 	  int mask;
1341 
1342 	  p = NULL;
1343 	  if (*q == '!')
1344 	    {
1345 	      invert = true;
1346 	      q++;
1347 	    }
1348 	  else
1349 	    invert = false;
1350 
1351 	  if (! strcmp (q, "all"))
1352 	    mask = MASK_DEBUG_ALL;
1353 	  else if (! strcmp (q, "options"))
1354 	    mask = MASK_DEBUG_OPTIONS;
1355 	  else
1356 	    error ("unknown -mdebug-%s switch", q);
1357 
1358 	  if (invert)
1359 	    sparc_debug &= ~mask;
1360 	  else
1361 	    sparc_debug |= mask;
1362 	}
1363     }
1364 
1365   if (TARGET_DEBUG_OPTIONS)
1366     {
1367       dump_target_flags("Initial target_flags", target_flags);
1368       dump_target_flags("target_flags_explicit", target_flags_explicit);
1369     }
1370 
1371 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1372   SUBTARGET_OVERRIDE_OPTIONS;
1373 #endif
1374 
1375 #ifndef SPARC_BI_ARCH
1376   /* Check for unsupported architecture size.  */
1377   if (! TARGET_64BIT != DEFAULT_ARCH32_P)
1378     error ("%s is not supported by this configuration",
1379 	   DEFAULT_ARCH32_P ? "-m64" : "-m32");
1380 #endif
1381 
1382   /* We force all 64bit archs to use 128 bit long double */
1383   if (TARGET_64BIT && ! TARGET_LONG_DOUBLE_128)
1384     {
1385       error ("-mlong-double-64 not allowed with -m64");
1386       target_flags |= MASK_LONG_DOUBLE_128;
1387     }
1388 
1389   /* Code model selection.  */
1390   sparc_cmodel = SPARC_DEFAULT_CMODEL;
1391 
1392 #ifdef SPARC_BI_ARCH
1393   if (TARGET_ARCH32)
1394     sparc_cmodel = CM_32;
1395 #endif
1396 
1397   if (sparc_cmodel_string != NULL)
1398     {
1399       if (TARGET_ARCH64)
1400 	{
1401 	  for (cmodel = &cmodels[0]; cmodel->name; cmodel++)
1402 	    if (strcmp (sparc_cmodel_string, cmodel->name) == 0)
1403 	      break;
1404 	  if (cmodel->name == NULL)
1405 	    error ("bad value (%s) for -mcmodel= switch", sparc_cmodel_string);
1406 	  else
1407 	    sparc_cmodel = cmodel->value;
1408 	}
1409       else
1410 	error ("-mcmodel= is not supported on 32 bit systems");
1411     }
1412 
1413   /* Check that -fcall-saved-REG wasn't specified for out registers.  */
1414   for (i = 8; i < 16; i++)
1415     if (!call_used_regs [i])
1416       {
1417 	error ("-fcall-saved-REG is not supported for out registers");
1418         call_used_regs [i] = 1;
1419       }
1420 
1421   fpu = target_flags & MASK_FPU; /* save current -mfpu status */
1422 
1423   /* Set the default CPU.  */
1424   if (!global_options_set.x_sparc_cpu_and_features)
1425     {
1426       for (def = &cpu_default[0]; def->cpu != -1; ++def)
1427 	if (def->cpu == TARGET_CPU_DEFAULT)
1428 	  break;
1429       gcc_assert (def->cpu != -1);
1430       sparc_cpu_and_features = def->processor;
1431     }
1432 
1433   if (!global_options_set.x_sparc_cpu)
1434     sparc_cpu = sparc_cpu_and_features;
1435 
1436   cpu = &cpu_table[(int) sparc_cpu_and_features];
1437 
1438   if (TARGET_DEBUG_OPTIONS)
1439     {
1440       fprintf (stderr, "sparc_cpu_and_features: %s\n", cpu->name);
1441       fprintf (stderr, "sparc_cpu: %s\n",
1442 	       cpu_table[(int) sparc_cpu].name);
1443       dump_target_flags ("cpu->disable", cpu->disable);
1444       dump_target_flags ("cpu->enable", cpu->enable);
1445     }
1446 
1447   target_flags &= ~cpu->disable;
1448   target_flags |= (cpu->enable
1449 #ifndef HAVE_AS_FMAF_HPC_VIS3
1450 		   & ~(MASK_FMAF | MASK_VIS3)
1451 #endif
1452 #ifndef HAVE_AS_SPARC4
1453 		   & ~MASK_CBCOND
1454 #endif
1455 #ifndef HAVE_AS_SPARC5_VIS4
1456 		   & ~MASK_VIS4
1457 #endif
1458 #ifndef HAVE_AS_LEON
1459 		   & ~(MASK_LEON | MASK_LEON3)
1460 #endif
1461 		   );
1462 
1463   /* If -mfpu or -mno-fpu was explicitly used, don't override with
1464      the processor default.  */
1465   if (target_flags_explicit & MASK_FPU)
1466     target_flags = (target_flags & ~MASK_FPU) | fpu;
1467 
1468   /* -mvis2 implies -mvis */
1469   if (TARGET_VIS2)
1470     target_flags |= MASK_VIS;
1471 
1472   /* -mvis3 implies -mvis2 and -mvis */
1473   if (TARGET_VIS3)
1474     target_flags |= MASK_VIS2 | MASK_VIS;
1475 
1476   /* -mvis4 implies -mvis3, -mvis2 and -mvis */
1477   if (TARGET_VIS4)
1478     target_flags |= MASK_VIS3 | MASK_VIS2 | MASK_VIS;
1479 
1480   /* Don't allow -mvis, -mvis2, -mvis3, -mvis4 or -mfmaf if FPU is
1481      disabled.  */
1482   if (! TARGET_FPU)
1483     target_flags &= ~(MASK_VIS | MASK_VIS2 | MASK_VIS3 | MASK_VIS4
1484 		      | MASK_FMAF);
1485 
1486   /* -mvis assumes UltraSPARC+, so we are sure v9 instructions
1487      are available.
1488      -m64 also implies v9.  */
1489   if (TARGET_VIS || TARGET_ARCH64)
1490     {
1491       target_flags |= MASK_V9;
1492       target_flags &= ~(MASK_V8 | MASK_SPARCLET | MASK_SPARCLITE);
1493     }
1494 
1495   /* -mvis also implies -mv8plus on 32-bit */
1496   if (TARGET_VIS && ! TARGET_ARCH64)
1497     target_flags |= MASK_V8PLUS;
1498 
1499   /* Use the deprecated v8 insns for sparc64 in 32 bit mode.  */
1500   if (TARGET_V9 && TARGET_ARCH32)
1501     target_flags |= MASK_DEPRECATED_V8_INSNS;
1502 
1503   /* V8PLUS requires V9, makes no sense in 64 bit mode.  */
1504   if (! TARGET_V9 || TARGET_ARCH64)
1505     target_flags &= ~MASK_V8PLUS;
1506 
1507   /* Don't use stack biasing in 32 bit mode.  */
1508   if (TARGET_ARCH32)
1509     target_flags &= ~MASK_STACK_BIAS;
1510 
1511   /* Supply a default value for align_functions.  */
1512   if (align_functions == 0)
1513     {
1514       if (sparc_cpu == PROCESSOR_ULTRASPARC
1515 	  || sparc_cpu == PROCESSOR_ULTRASPARC3
1516 	  || sparc_cpu == PROCESSOR_NIAGARA
1517 	  || sparc_cpu == PROCESSOR_NIAGARA2
1518 	  || sparc_cpu == PROCESSOR_NIAGARA3
1519 	  || sparc_cpu == PROCESSOR_NIAGARA4)
1520 	align_functions = 32;
1521       else if (sparc_cpu == PROCESSOR_NIAGARA7)
1522 	align_functions = 64;
1523     }
1524 
1525   /* Validate PCC_STRUCT_RETURN.  */
1526   if (flag_pcc_struct_return == DEFAULT_PCC_STRUCT_RETURN)
1527     flag_pcc_struct_return = (TARGET_ARCH64 ? 0 : 1);
1528 
1529   /* Only use .uaxword when compiling for a 64-bit target.  */
1530   if (!TARGET_ARCH64)
1531     targetm.asm_out.unaligned_op.di = NULL;
1532 
1533   /* Do various machine dependent initializations.  */
1534   sparc_init_modes ();
1535 
1536   /* Set up function hooks.  */
1537   init_machine_status = sparc_init_machine_status;
1538 
1539   switch (sparc_cpu)
1540     {
1541     case PROCESSOR_V7:
1542     case PROCESSOR_CYPRESS:
1543       sparc_costs = &cypress_costs;
1544       break;
1545     case PROCESSOR_V8:
1546     case PROCESSOR_SPARCLITE:
1547     case PROCESSOR_SUPERSPARC:
1548       sparc_costs = &supersparc_costs;
1549       break;
1550     case PROCESSOR_F930:
1551     case PROCESSOR_F934:
1552     case PROCESSOR_HYPERSPARC:
1553     case PROCESSOR_SPARCLITE86X:
1554       sparc_costs = &hypersparc_costs;
1555       break;
1556     case PROCESSOR_LEON:
1557       sparc_costs = &leon_costs;
1558       break;
1559     case PROCESSOR_LEON3:
1560     case PROCESSOR_LEON3V7:
1561       sparc_costs = &leon3_costs;
1562       break;
1563     case PROCESSOR_SPARCLET:
1564     case PROCESSOR_TSC701:
1565       sparc_costs = &sparclet_costs;
1566       break;
1567     case PROCESSOR_V9:
1568     case PROCESSOR_ULTRASPARC:
1569       sparc_costs = &ultrasparc_costs;
1570       break;
1571     case PROCESSOR_ULTRASPARC3:
1572       sparc_costs = &ultrasparc3_costs;
1573       break;
1574     case PROCESSOR_NIAGARA:
1575       sparc_costs = &niagara_costs;
1576       break;
1577     case PROCESSOR_NIAGARA2:
1578       sparc_costs = &niagara2_costs;
1579       break;
1580     case PROCESSOR_NIAGARA3:
1581       sparc_costs = &niagara3_costs;
1582       break;
1583     case PROCESSOR_NIAGARA4:
1584       sparc_costs = &niagara4_costs;
1585       break;
1586     case PROCESSOR_NIAGARA7:
1587       sparc_costs = &niagara7_costs;
1588       break;
1589     case PROCESSOR_NATIVE:
1590       gcc_unreachable ();
1591     };
1592 
1593   if (sparc_memory_model == SMM_DEFAULT)
1594     {
1595       /* Choose the memory model for the operating system.  */
1596       enum sparc_memory_model_type os_default = SUBTARGET_DEFAULT_MEMORY_MODEL;
1597       if (os_default != SMM_DEFAULT)
1598 	sparc_memory_model = os_default;
1599       /* Choose the most relaxed model for the processor.  */
1600       else if (TARGET_V9)
1601 	sparc_memory_model = SMM_RMO;
1602       else if (TARGET_LEON3)
1603 	sparc_memory_model = SMM_TSO;
1604       else if (TARGET_LEON)
1605 	sparc_memory_model = SMM_SC;
1606       else if (TARGET_V8)
1607 	sparc_memory_model = SMM_PSO;
1608       else
1609 	sparc_memory_model = SMM_SC;
1610     }
1611 
1612 #ifdef TARGET_DEFAULT_LONG_DOUBLE_128
1613   if (!(target_flags_explicit & MASK_LONG_DOUBLE_128))
1614     target_flags |= MASK_LONG_DOUBLE_128;
1615 #endif
1616 
1617   if (TARGET_DEBUG_OPTIONS)
1618     dump_target_flags ("Final target_flags", target_flags);
1619 
1620   /* PARAM_SIMULTANEOUS_PREFETCHES is the number of prefetches that
1621      can run at the same time.  More important, it is the threshold
1622      defining when additional prefetches will be dropped by the
1623      hardware.
1624 
1625      The UltraSPARC-III features a documented prefetch queue with a
1626      size of 8.  Additional prefetches issued in the cpu are
1627      dropped.
1628 
1629      Niagara processors are different.  In these processors prefetches
1630      are handled much like regular loads.  The L1 miss buffer is 32
1631      entries, but prefetches start getting affected when 30 entries
1632      become occupied.  That occupation could be a mix of regular loads
1633      and prefetches though.  And that buffer is shared by all threads.
1634      Once the threshold is reached, if the core is running a single
1635      thread the prefetch will retry.  If more than one thread is
1636      running, the prefetch will be dropped.
1637 
1638      All this makes it very difficult to determine how many
1639      simultaneous prefetches can be issued simultaneously, even in a
1640      single-threaded program.  Experimental results show that setting
1641      this parameter to 32 works well when the number of threads is not
1642      high.  */
1643   maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
1644 			 ((sparc_cpu == PROCESSOR_ULTRASPARC
1645 			   || sparc_cpu == PROCESSOR_NIAGARA
1646 			   || sparc_cpu == PROCESSOR_NIAGARA2
1647 			   || sparc_cpu == PROCESSOR_NIAGARA3
1648 			   || sparc_cpu == PROCESSOR_NIAGARA4)
1649 			  ? 2
1650 			  : (sparc_cpu == PROCESSOR_ULTRASPARC3
1651 			     ? 8 : (sparc_cpu == PROCESSOR_NIAGARA7
1652 				    ? 32 : 3))),
1653 			 global_options.x_param_values,
1654 			 global_options_set.x_param_values);
1655 
1656   /* For PARAM_L1_CACHE_LINE_SIZE we use the default 32 bytes (see
1657      params.def), so no maybe_set_param_value is needed.
1658 
1659      The Oracle SPARC Architecture (previously the UltraSPARC
1660      Architecture) specification states that when a PREFETCH[A]
1661      instruction is executed an implementation-specific amount of data
1662      is prefetched, and that it is at least 64 bytes long (aligned to
1663      at least 64 bytes).
1664 
1665      However, this is not correct.  The M7 (and implementations prior
1666      to that) does not guarantee a 64B prefetch into a cache if the
1667      line size is smaller.  A single cache line is all that is ever
1668      prefetched.  So for the M7, where the L1D$ has 32B lines and the
1669      L2D$ and L3 have 64B lines, a prefetch will prefetch 64B into the
1670      L2 and L3, but only 32B are brought into the L1D$. (Assuming it
1671      is a read_n prefetch, which is the only type which allocates to
1672      the L1.)  */
1673 
1674   /* PARAM_L1_CACHE_SIZE is the size of the L1D$ (most SPARC chips use
1675      Hardvard level-1 caches) in kilobytes.  Both UltraSPARC and
1676      Niagara processors feature a L1D$ of 16KB.  */
1677   maybe_set_param_value (PARAM_L1_CACHE_SIZE,
1678 			 ((sparc_cpu == PROCESSOR_ULTRASPARC
1679 			   || sparc_cpu == PROCESSOR_ULTRASPARC3
1680 			   || sparc_cpu == PROCESSOR_NIAGARA
1681 			   || sparc_cpu == PROCESSOR_NIAGARA2
1682 			   || sparc_cpu == PROCESSOR_NIAGARA3
1683 			   || sparc_cpu == PROCESSOR_NIAGARA4
1684 			   || sparc_cpu == PROCESSOR_NIAGARA7)
1685 			  ? 16 : 64),
1686 			 global_options.x_param_values,
1687 			 global_options_set.x_param_values);
1688 
1689 
1690   /* PARAM_L2_CACHE_SIZE is the size fo the L2 in kilobytes.  Note
1691      that 512 is the default in params.def.  */
1692   maybe_set_param_value (PARAM_L2_CACHE_SIZE,
1693 			 (sparc_cpu == PROCESSOR_NIAGARA4
1694 			  ? 128 : (sparc_cpu == PROCESSOR_NIAGARA7
1695 				   ? 256 : 512)),
1696 			 global_options.x_param_values,
1697 			 global_options_set.x_param_values);
1698 
1699 
1700   /* Disable save slot sharing for call-clobbered registers by default.
1701      The IRA sharing algorithm works on single registers only and this
1702      pessimizes for double floating-point registers.  */
1703   if (!global_options_set.x_flag_ira_share_save_slots)
1704     flag_ira_share_save_slots = 0;
1705 
1706   /* We register a machine specific pass to work around errata, if any.
1707      The pass mut be scheduled as late as possible so that we have the
1708      (essentially) final form of the insn stream to work on.
1709      Registering the pass must be done at start up.  It's convenient to
1710      do it here.  */
1711   opt_pass *errata_pass = make_pass_work_around_errata (g);
1712   struct register_pass_info insert_pass_work_around_errata =
1713     {
1714       errata_pass,		/* pass */
1715       "dbr",			/* reference_pass_name */
1716       1,			/* ref_pass_instance_number */
1717       PASS_POS_INSERT_AFTER	/* po_op */
1718     };
1719   register_pass (&insert_pass_work_around_errata);
1720 }
1721 
1722 /* Miscellaneous utilities.  */
1723 
1724 /* Nonzero if CODE, a comparison, is suitable for use in v9 conditional move
1725    or branch on register contents instructions.  */
1726 
1727 int
v9_regcmp_p(enum rtx_code code)1728 v9_regcmp_p (enum rtx_code code)
1729 {
1730   return (code == EQ || code == NE || code == GE || code == LT
1731 	  || code == LE || code == GT);
1732 }
1733 
1734 /* Nonzero if OP is a floating point constant which can
1735    be loaded into an integer register using a single
1736    sethi instruction.  */
1737 
1738 int
fp_sethi_p(rtx op)1739 fp_sethi_p (rtx op)
1740 {
1741   if (GET_CODE (op) == CONST_DOUBLE)
1742     {
1743       long i;
1744 
1745       REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), i);
1746       return !SPARC_SIMM13_P (i) && SPARC_SETHI_P (i);
1747     }
1748 
1749   return 0;
1750 }
1751 
1752 /* Nonzero if OP is a floating point constant which can
1753    be loaded into an integer register using a single
1754    mov instruction.  */
1755 
1756 int
fp_mov_p(rtx op)1757 fp_mov_p (rtx op)
1758 {
1759   if (GET_CODE (op) == CONST_DOUBLE)
1760     {
1761       long i;
1762 
1763       REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), i);
1764       return SPARC_SIMM13_P (i);
1765     }
1766 
1767   return 0;
1768 }
1769 
1770 /* Nonzero if OP is a floating point constant which can
1771    be loaded into an integer register using a high/losum
1772    instruction sequence.  */
1773 
1774 int
fp_high_losum_p(rtx op)1775 fp_high_losum_p (rtx op)
1776 {
1777   /* The constraints calling this should only be in
1778      SFmode move insns, so any constant which cannot
1779      be moved using a single insn will do.  */
1780   if (GET_CODE (op) == CONST_DOUBLE)
1781     {
1782       long i;
1783 
1784       REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), i);
1785       return !SPARC_SIMM13_P (i) && !SPARC_SETHI_P (i);
1786     }
1787 
1788   return 0;
1789 }
1790 
1791 /* Return true if the address of LABEL can be loaded by means of the
1792    mov{si,di}_pic_label_ref patterns in PIC mode.  */
1793 
1794 static bool
can_use_mov_pic_label_ref(rtx label)1795 can_use_mov_pic_label_ref (rtx label)
1796 {
1797   /* VxWorks does not impose a fixed gap between segments; the run-time
1798      gap can be different from the object-file gap.  We therefore can't
1799      assume X - _GLOBAL_OFFSET_TABLE_ is a link-time constant unless we
1800      are absolutely sure that X is in the same segment as the GOT.
1801      Unfortunately, the flexibility of linker scripts means that we
1802      can't be sure of that in general, so assume that GOT-relative
1803      accesses are never valid on VxWorks.  */
1804   if (TARGET_VXWORKS_RTP)
1805     return false;
1806 
1807   /* Similarly, if the label is non-local, it might end up being placed
1808      in a different section than the current one; now mov_pic_label_ref
1809      requires the label and the code to be in the same section.  */
1810   if (LABEL_REF_NONLOCAL_P (label))
1811     return false;
1812 
1813   /* Finally, if we are reordering basic blocks and partition into hot
1814      and cold sections, this might happen for any label.  */
1815   if (flag_reorder_blocks_and_partition)
1816     return false;
1817 
1818   return true;
1819 }
1820 
1821 /* Expand a move instruction.  Return true if all work is done.  */
1822 
1823 bool
sparc_expand_move(machine_mode mode,rtx * operands)1824 sparc_expand_move (machine_mode mode, rtx *operands)
1825 {
1826   /* Handle sets of MEM first.  */
1827   if (GET_CODE (operands[0]) == MEM)
1828     {
1829       /* 0 is a register (or a pair of registers) on SPARC.  */
1830       if (register_or_zero_operand (operands[1], mode))
1831 	return false;
1832 
1833       if (!reload_in_progress)
1834 	{
1835 	  operands[0] = validize_mem (operands[0]);
1836 	  operands[1] = force_reg (mode, operands[1]);
1837 	}
1838     }
1839 
1840   /* Fix up TLS cases.  */
1841   if (TARGET_HAVE_TLS
1842       && CONSTANT_P (operands[1])
1843       && sparc_tls_referenced_p (operands [1]))
1844     {
1845       operands[1] = sparc_legitimize_tls_address (operands[1]);
1846       return false;
1847     }
1848 
1849   /* Fix up PIC cases.  */
1850   if (flag_pic && CONSTANT_P (operands[1]))
1851     {
1852       if (pic_address_needs_scratch (operands[1]))
1853 	operands[1] = sparc_legitimize_pic_address (operands[1], NULL_RTX);
1854 
1855       /* We cannot use the mov{si,di}_pic_label_ref patterns in all cases.  */
1856       if ((GET_CODE (operands[1]) == LABEL_REF
1857 	   && can_use_mov_pic_label_ref (operands[1]))
1858 	  || (GET_CODE (operands[1]) == CONST
1859 	      && GET_CODE (XEXP (operands[1], 0)) == PLUS
1860 	      && GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == LABEL_REF
1861 	      && GET_CODE (XEXP (XEXP (operands[1], 0), 1)) == CONST_INT
1862 	      && can_use_mov_pic_label_ref (XEXP (XEXP (operands[1], 0), 0))))
1863 	{
1864 	  if (mode == SImode)
1865 	    {
1866 	      emit_insn (gen_movsi_pic_label_ref (operands[0], operands[1]));
1867 	      return true;
1868 	    }
1869 
1870 	  if (mode == DImode)
1871 	    {
1872 	      emit_insn (gen_movdi_pic_label_ref (operands[0], operands[1]));
1873 	      return true;
1874 	    }
1875 	}
1876 
1877       if (symbolic_operand (operands[1], mode))
1878 	{
1879 	  operands[1]
1880 	    = sparc_legitimize_pic_address (operands[1],
1881 					    reload_in_progress
1882 					    ? operands[0] : NULL_RTX);
1883 	  return false;
1884 	}
1885     }
1886 
1887   /* If we are trying to toss an integer constant into FP registers,
1888      or loading a FP or vector constant, force it into memory.  */
1889   if (CONSTANT_P (operands[1])
1890       && REG_P (operands[0])
1891       && (SPARC_FP_REG_P (REGNO (operands[0]))
1892 	  || SCALAR_FLOAT_MODE_P (mode)
1893 	  || VECTOR_MODE_P (mode)))
1894     {
1895       /* emit_group_store will send such bogosity to us when it is
1896          not storing directly into memory.  So fix this up to avoid
1897          crashes in output_constant_pool.  */
1898       if (operands [1] == const0_rtx)
1899 	operands[1] = CONST0_RTX (mode);
1900 
1901       /* We can clear or set to all-ones FP registers if TARGET_VIS, and
1902 	 always other regs.  */
1903       if ((TARGET_VIS || REGNO (operands[0]) < SPARC_FIRST_FP_REG)
1904 	  && (const_zero_operand (operands[1], mode)
1905 	      || const_all_ones_operand (operands[1], mode)))
1906 	return false;
1907 
1908       if (REGNO (operands[0]) < SPARC_FIRST_FP_REG
1909 	  /* We are able to build any SF constant in integer registers
1910 	     with at most 2 instructions.  */
1911 	  && (mode == SFmode
1912 	      /* And any DF constant in integer registers.  */
1913 	      || (mode == DFmode
1914 		  && ! can_create_pseudo_p ())))
1915 	return false;
1916 
1917       operands[1] = force_const_mem (mode, operands[1]);
1918       if (!reload_in_progress)
1919 	operands[1] = validize_mem (operands[1]);
1920       return false;
1921     }
1922 
1923   /* Accept non-constants and valid constants unmodified.  */
1924   if (!CONSTANT_P (operands[1])
1925       || GET_CODE (operands[1]) == HIGH
1926       || input_operand (operands[1], mode))
1927     return false;
1928 
1929   switch (mode)
1930     {
1931     case QImode:
1932       /* All QImode constants require only one insn, so proceed.  */
1933       break;
1934 
1935     case HImode:
1936     case SImode:
1937       sparc_emit_set_const32 (operands[0], operands[1]);
1938       return true;
1939 
1940     case DImode:
1941       /* input_operand should have filtered out 32-bit mode.  */
1942       sparc_emit_set_const64 (operands[0], operands[1]);
1943       return true;
1944 
1945     case TImode:
1946       {
1947 	rtx high, low;
1948 	/* TImode isn't available in 32-bit mode.  */
1949 	split_double (operands[1], &high, &low);
1950 	emit_insn (gen_movdi (operand_subword (operands[0], 0, 0, TImode),
1951 			      high));
1952 	emit_insn (gen_movdi (operand_subword (operands[0], 1, 0, TImode),
1953 			      low));
1954       }
1955       return true;
1956 
1957     default:
1958       gcc_unreachable ();
1959     }
1960 
1961   return false;
1962 }
1963 
1964 /* Load OP1, a 32-bit constant, into OP0, a register.
1965    We know it can't be done in one insn when we get
1966    here, the move expander guarantees this.  */
1967 
1968 static void
sparc_emit_set_const32(rtx op0,rtx op1)1969 sparc_emit_set_const32 (rtx op0, rtx op1)
1970 {
1971   machine_mode mode = GET_MODE (op0);
1972   rtx temp = op0;
1973 
1974   if (can_create_pseudo_p ())
1975     temp = gen_reg_rtx (mode);
1976 
1977   if (GET_CODE (op1) == CONST_INT)
1978     {
1979       gcc_assert (!small_int_operand (op1, mode)
1980 		  && !const_high_operand (op1, mode));
1981 
1982       /* Emit them as real moves instead of a HIGH/LO_SUM,
1983 	 this way CSE can see everything and reuse intermediate
1984 	 values if it wants.  */
1985       emit_insn (gen_rtx_SET (temp, GEN_INT (INTVAL (op1)
1986 					     & ~(HOST_WIDE_INT) 0x3ff)));
1987 
1988       emit_insn (gen_rtx_SET (op0,
1989 			      gen_rtx_IOR (mode, temp,
1990 					   GEN_INT (INTVAL (op1) & 0x3ff))));
1991     }
1992   else
1993     {
1994       /* A symbol, emit in the traditional way.  */
1995       emit_insn (gen_rtx_SET (temp, gen_rtx_HIGH (mode, op1)));
1996       emit_insn (gen_rtx_SET (op0, gen_rtx_LO_SUM (mode, temp, op1)));
1997     }
1998 }
1999 
2000 /* Load OP1, a symbolic 64-bit constant, into OP0, a DImode register.
2001    If TEMP is nonzero, we are forbidden to use any other scratch
2002    registers.  Otherwise, we are allowed to generate them as needed.
2003 
2004    Note that TEMP may have TImode if the code model is TARGET_CM_MEDANY
2005    or TARGET_CM_EMBMEDANY (see the reload_indi and reload_outdi patterns).  */
2006 
2007 void
sparc_emit_set_symbolic_const64(rtx op0,rtx op1,rtx temp)2008 sparc_emit_set_symbolic_const64 (rtx op0, rtx op1, rtx temp)
2009 {
2010   rtx temp1, temp2, temp3, temp4, temp5;
2011   rtx ti_temp = 0;
2012 
2013   if (temp && GET_MODE (temp) == TImode)
2014     {
2015       ti_temp = temp;
2016       temp = gen_rtx_REG (DImode, REGNO (temp));
2017     }
2018 
2019   /* SPARC-V9 code-model support.  */
2020   switch (sparc_cmodel)
2021     {
2022     case CM_MEDLOW:
2023       /* The range spanned by all instructions in the object is less
2024 	 than 2^31 bytes (2GB) and the distance from any instruction
2025 	 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
2026 	 than 2^31 bytes (2GB).
2027 
2028 	 The executable must be in the low 4TB of the virtual address
2029 	 space.
2030 
2031 	 sethi	%hi(symbol), %temp1
2032 	 or	%temp1, %lo(symbol), %reg  */
2033       if (temp)
2034 	temp1 = temp;  /* op0 is allowed.  */
2035       else
2036 	temp1 = gen_reg_rtx (DImode);
2037 
2038       emit_insn (gen_rtx_SET (temp1, gen_rtx_HIGH (DImode, op1)));
2039       emit_insn (gen_rtx_SET (op0, gen_rtx_LO_SUM (DImode, temp1, op1)));
2040       break;
2041 
2042     case CM_MEDMID:
2043       /* The range spanned by all instructions in the object is less
2044 	 than 2^31 bytes (2GB) and the distance from any instruction
2045 	 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
2046 	 than 2^31 bytes (2GB).
2047 
2048 	 The executable must be in the low 16TB of the virtual address
2049 	 space.
2050 
2051 	 sethi	%h44(symbol), %temp1
2052 	 or	%temp1, %m44(symbol), %temp2
2053 	 sllx	%temp2, 12, %temp3
2054 	 or	%temp3, %l44(symbol), %reg  */
2055       if (temp)
2056 	{
2057 	  temp1 = op0;
2058 	  temp2 = op0;
2059 	  temp3 = temp;  /* op0 is allowed.  */
2060 	}
2061       else
2062 	{
2063 	  temp1 = gen_reg_rtx (DImode);
2064 	  temp2 = gen_reg_rtx (DImode);
2065 	  temp3 = gen_reg_rtx (DImode);
2066 	}
2067 
2068       emit_insn (gen_seth44 (temp1, op1));
2069       emit_insn (gen_setm44 (temp2, temp1, op1));
2070       emit_insn (gen_rtx_SET (temp3,
2071 			      gen_rtx_ASHIFT (DImode, temp2, GEN_INT (12))));
2072       emit_insn (gen_setl44 (op0, temp3, op1));
2073       break;
2074 
2075     case CM_MEDANY:
2076       /* The range spanned by all instructions in the object is less
2077 	 than 2^31 bytes (2GB) and the distance from any instruction
2078 	 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
2079 	 than 2^31 bytes (2GB).
2080 
2081 	 The executable can be placed anywhere in the virtual address
2082 	 space.
2083 
2084 	 sethi	%hh(symbol), %temp1
2085 	 sethi	%lm(symbol), %temp2
2086 	 or	%temp1, %hm(symbol), %temp3
2087 	 sllx	%temp3, 32, %temp4
2088 	 or	%temp4, %temp2, %temp5
2089 	 or	%temp5, %lo(symbol), %reg  */
2090       if (temp)
2091 	{
2092 	  /* It is possible that one of the registers we got for operands[2]
2093 	     might coincide with that of operands[0] (which is why we made
2094 	     it TImode).  Pick the other one to use as our scratch.  */
2095 	  if (rtx_equal_p (temp, op0))
2096 	    {
2097 	      gcc_assert (ti_temp);
2098 	      temp = gen_rtx_REG (DImode, REGNO (temp) + 1);
2099 	    }
2100 	  temp1 = op0;
2101 	  temp2 = temp;  /* op0 is _not_ allowed, see above.  */
2102 	  temp3 = op0;
2103 	  temp4 = op0;
2104 	  temp5 = op0;
2105 	}
2106       else
2107 	{
2108 	  temp1 = gen_reg_rtx (DImode);
2109 	  temp2 = gen_reg_rtx (DImode);
2110 	  temp3 = gen_reg_rtx (DImode);
2111 	  temp4 = gen_reg_rtx (DImode);
2112 	  temp5 = gen_reg_rtx (DImode);
2113 	}
2114 
2115       emit_insn (gen_sethh (temp1, op1));
2116       emit_insn (gen_setlm (temp2, op1));
2117       emit_insn (gen_sethm (temp3, temp1, op1));
2118       emit_insn (gen_rtx_SET (temp4,
2119 			      gen_rtx_ASHIFT (DImode, temp3, GEN_INT (32))));
2120       emit_insn (gen_rtx_SET (temp5, gen_rtx_PLUS (DImode, temp4, temp2)));
2121       emit_insn (gen_setlo (op0, temp5, op1));
2122       break;
2123 
2124     case CM_EMBMEDANY:
2125       /* Old old old backwards compatibility kruft here.
2126 	 Essentially it is MEDLOW with a fixed 64-bit
2127 	 virtual base added to all data segment addresses.
2128 	 Text-segment stuff is computed like MEDANY, we can't
2129 	 reuse the code above because the relocation knobs
2130 	 look different.
2131 
2132 	 Data segment:	sethi	%hi(symbol), %temp1
2133 			add	%temp1, EMBMEDANY_BASE_REG, %temp2
2134 			or	%temp2, %lo(symbol), %reg  */
2135       if (data_segment_operand (op1, GET_MODE (op1)))
2136 	{
2137 	  if (temp)
2138 	    {
2139 	      temp1 = temp;  /* op0 is allowed.  */
2140 	      temp2 = op0;
2141 	    }
2142 	  else
2143 	    {
2144 	      temp1 = gen_reg_rtx (DImode);
2145 	      temp2 = gen_reg_rtx (DImode);
2146 	    }
2147 
2148 	  emit_insn (gen_embmedany_sethi (temp1, op1));
2149 	  emit_insn (gen_embmedany_brsum (temp2, temp1));
2150 	  emit_insn (gen_embmedany_losum (op0, temp2, op1));
2151 	}
2152 
2153       /* Text segment:	sethi	%uhi(symbol), %temp1
2154 			sethi	%hi(symbol), %temp2
2155 			or	%temp1, %ulo(symbol), %temp3
2156 			sllx	%temp3, 32, %temp4
2157 			or	%temp4, %temp2, %temp5
2158 			or	%temp5, %lo(symbol), %reg  */
2159       else
2160 	{
2161 	  if (temp)
2162 	    {
2163 	      /* It is possible that one of the registers we got for operands[2]
2164 		 might coincide with that of operands[0] (which is why we made
2165 		 it TImode).  Pick the other one to use as our scratch.  */
2166 	      if (rtx_equal_p (temp, op0))
2167 		{
2168 		  gcc_assert (ti_temp);
2169 		  temp = gen_rtx_REG (DImode, REGNO (temp) + 1);
2170 		}
2171 	      temp1 = op0;
2172 	      temp2 = temp;  /* op0 is _not_ allowed, see above.  */
2173 	      temp3 = op0;
2174 	      temp4 = op0;
2175 	      temp5 = op0;
2176 	    }
2177 	  else
2178 	    {
2179 	      temp1 = gen_reg_rtx (DImode);
2180 	      temp2 = gen_reg_rtx (DImode);
2181 	      temp3 = gen_reg_rtx (DImode);
2182 	      temp4 = gen_reg_rtx (DImode);
2183 	      temp5 = gen_reg_rtx (DImode);
2184 	    }
2185 
2186 	  emit_insn (gen_embmedany_textuhi (temp1, op1));
2187 	  emit_insn (gen_embmedany_texthi  (temp2, op1));
2188 	  emit_insn (gen_embmedany_textulo (temp3, temp1, op1));
2189 	  emit_insn (gen_rtx_SET (temp4,
2190 				  gen_rtx_ASHIFT (DImode, temp3, GEN_INT (32))));
2191 	  emit_insn (gen_rtx_SET (temp5, gen_rtx_PLUS (DImode, temp4, temp2)));
2192 	  emit_insn (gen_embmedany_textlo  (op0, temp5, op1));
2193 	}
2194       break;
2195 
2196     default:
2197       gcc_unreachable ();
2198     }
2199 }
2200 
2201 /* These avoid problems when cross compiling.  If we do not
2202    go through all this hair then the optimizer will see
2203    invalid REG_EQUAL notes or in some cases none at all.  */
2204 static rtx gen_safe_HIGH64 (rtx, HOST_WIDE_INT);
2205 static rtx gen_safe_SET64 (rtx, HOST_WIDE_INT);
2206 static rtx gen_safe_OR64 (rtx, HOST_WIDE_INT);
2207 static rtx gen_safe_XOR64 (rtx, HOST_WIDE_INT);
2208 
2209 /* The optimizer is not to assume anything about exactly
2210    which bits are set for a HIGH, they are unspecified.
2211    Unfortunately this leads to many missed optimizations
2212    during CSE.  We mask out the non-HIGH bits, and matches
2213    a plain movdi, to alleviate this problem.  */
2214 static rtx
gen_safe_HIGH64(rtx dest,HOST_WIDE_INT val)2215 gen_safe_HIGH64 (rtx dest, HOST_WIDE_INT val)
2216 {
2217   return gen_rtx_SET (dest, GEN_INT (val & ~(HOST_WIDE_INT)0x3ff));
2218 }
2219 
2220 static rtx
gen_safe_SET64(rtx dest,HOST_WIDE_INT val)2221 gen_safe_SET64 (rtx dest, HOST_WIDE_INT val)
2222 {
2223   return gen_rtx_SET (dest, GEN_INT (val));
2224 }
2225 
2226 static rtx
gen_safe_OR64(rtx src,HOST_WIDE_INT val)2227 gen_safe_OR64 (rtx src, HOST_WIDE_INT val)
2228 {
2229   return gen_rtx_IOR (DImode, src, GEN_INT (val));
2230 }
2231 
2232 static rtx
gen_safe_XOR64(rtx src,HOST_WIDE_INT val)2233 gen_safe_XOR64 (rtx src, HOST_WIDE_INT val)
2234 {
2235   return gen_rtx_XOR (DImode, src, GEN_INT (val));
2236 }
2237 
2238 /* Worker routines for 64-bit constant formation on arch64.
2239    One of the key things to be doing in these emissions is
2240    to create as many temp REGs as possible.  This makes it
2241    possible for half-built constants to be used later when
2242    such values are similar to something required later on.
2243    Without doing this, the optimizer cannot see such
2244    opportunities.  */
2245 
2246 static void sparc_emit_set_const64_quick1 (rtx, rtx,
2247 					   unsigned HOST_WIDE_INT, int);
2248 
2249 static void
sparc_emit_set_const64_quick1(rtx op0,rtx temp,unsigned HOST_WIDE_INT low_bits,int is_neg)2250 sparc_emit_set_const64_quick1 (rtx op0, rtx temp,
2251 			       unsigned HOST_WIDE_INT low_bits, int is_neg)
2252 {
2253   unsigned HOST_WIDE_INT high_bits;
2254 
2255   if (is_neg)
2256     high_bits = (~low_bits) & 0xffffffff;
2257   else
2258     high_bits = low_bits;
2259 
2260   emit_insn (gen_safe_HIGH64 (temp, high_bits));
2261   if (!is_neg)
2262     {
2263       emit_insn (gen_rtx_SET (op0, gen_safe_OR64 (temp, (high_bits & 0x3ff))));
2264     }
2265   else
2266     {
2267       /* If we are XOR'ing with -1, then we should emit a one's complement
2268 	 instead.  This way the combiner will notice logical operations
2269 	 such as ANDN later on and substitute.  */
2270       if ((low_bits & 0x3ff) == 0x3ff)
2271 	{
2272 	  emit_insn (gen_rtx_SET (op0, gen_rtx_NOT (DImode, temp)));
2273 	}
2274       else
2275 	{
2276 	  emit_insn (gen_rtx_SET (op0,
2277 				  gen_safe_XOR64 (temp,
2278 						  (-(HOST_WIDE_INT)0x400
2279 						   | (low_bits & 0x3ff)))));
2280 	}
2281     }
2282 }
2283 
2284 static void sparc_emit_set_const64_quick2 (rtx, rtx, unsigned HOST_WIDE_INT,
2285 					   unsigned HOST_WIDE_INT, int);
2286 
2287 static void
sparc_emit_set_const64_quick2(rtx op0,rtx temp,unsigned HOST_WIDE_INT high_bits,unsigned HOST_WIDE_INT low_immediate,int shift_count)2288 sparc_emit_set_const64_quick2 (rtx op0, rtx temp,
2289 			       unsigned HOST_WIDE_INT high_bits,
2290 			       unsigned HOST_WIDE_INT low_immediate,
2291 			       int shift_count)
2292 {
2293   rtx temp2 = op0;
2294 
2295   if ((high_bits & 0xfffffc00) != 0)
2296     {
2297       emit_insn (gen_safe_HIGH64 (temp, high_bits));
2298       if ((high_bits & ~0xfffffc00) != 0)
2299 	emit_insn (gen_rtx_SET (op0,
2300 				gen_safe_OR64 (temp, (high_bits & 0x3ff))));
2301       else
2302 	temp2 = temp;
2303     }
2304   else
2305     {
2306       emit_insn (gen_safe_SET64 (temp, high_bits));
2307       temp2 = temp;
2308     }
2309 
2310   /* Now shift it up into place.  */
2311   emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, temp2,
2312 					       GEN_INT (shift_count))));
2313 
2314   /* If there is a low immediate part piece, finish up by
2315      putting that in as well.  */
2316   if (low_immediate != 0)
2317     emit_insn (gen_rtx_SET (op0, gen_safe_OR64 (op0, low_immediate)));
2318 }
2319 
2320 static void sparc_emit_set_const64_longway (rtx, rtx, unsigned HOST_WIDE_INT,
2321 					    unsigned HOST_WIDE_INT);
2322 
2323 /* Full 64-bit constant decomposition.  Even though this is the
2324    'worst' case, we still optimize a few things away.  */
2325 static void
sparc_emit_set_const64_longway(rtx op0,rtx temp,unsigned HOST_WIDE_INT high_bits,unsigned HOST_WIDE_INT low_bits)2326 sparc_emit_set_const64_longway (rtx op0, rtx temp,
2327 				unsigned HOST_WIDE_INT high_bits,
2328 				unsigned HOST_WIDE_INT low_bits)
2329 {
2330   rtx sub_temp = op0;
2331 
2332   if (can_create_pseudo_p ())
2333     sub_temp = gen_reg_rtx (DImode);
2334 
2335   if ((high_bits & 0xfffffc00) != 0)
2336     {
2337       emit_insn (gen_safe_HIGH64 (temp, high_bits));
2338       if ((high_bits & ~0xfffffc00) != 0)
2339 	emit_insn (gen_rtx_SET (sub_temp,
2340 				gen_safe_OR64 (temp, (high_bits & 0x3ff))));
2341       else
2342 	sub_temp = temp;
2343     }
2344   else
2345     {
2346       emit_insn (gen_safe_SET64 (temp, high_bits));
2347       sub_temp = temp;
2348     }
2349 
2350   if (can_create_pseudo_p ())
2351     {
2352       rtx temp2 = gen_reg_rtx (DImode);
2353       rtx temp3 = gen_reg_rtx (DImode);
2354       rtx temp4 = gen_reg_rtx (DImode);
2355 
2356       emit_insn (gen_rtx_SET (temp4, gen_rtx_ASHIFT (DImode, sub_temp,
2357 						     GEN_INT (32))));
2358 
2359       emit_insn (gen_safe_HIGH64 (temp2, low_bits));
2360       if ((low_bits & ~0xfffffc00) != 0)
2361 	{
2362 	  emit_insn (gen_rtx_SET (temp3,
2363 				  gen_safe_OR64 (temp2, (low_bits & 0x3ff))));
2364 	  emit_insn (gen_rtx_SET (op0, gen_rtx_PLUS (DImode, temp4, temp3)));
2365 	}
2366       else
2367 	{
2368 	  emit_insn (gen_rtx_SET (op0, gen_rtx_PLUS (DImode, temp4, temp2)));
2369 	}
2370     }
2371   else
2372     {
2373       rtx low1 = GEN_INT ((low_bits >> (32 - 12))          & 0xfff);
2374       rtx low2 = GEN_INT ((low_bits >> (32 - 12 - 12))     & 0xfff);
2375       rtx low3 = GEN_INT ((low_bits >> (32 - 12 - 12 - 8)) & 0x0ff);
2376       int to_shift = 12;
2377 
2378       /* We are in the middle of reload, so this is really
2379 	 painful.  However we do still make an attempt to
2380 	 avoid emitting truly stupid code.  */
2381       if (low1 != const0_rtx)
2382 	{
2383 	  emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, sub_temp,
2384 						       GEN_INT (to_shift))));
2385 	  emit_insn (gen_rtx_SET (op0, gen_rtx_IOR (DImode, op0, low1)));
2386 	  sub_temp = op0;
2387 	  to_shift = 12;
2388 	}
2389       else
2390 	{
2391 	  to_shift += 12;
2392 	}
2393       if (low2 != const0_rtx)
2394 	{
2395 	  emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, sub_temp,
2396 						       GEN_INT (to_shift))));
2397 	  emit_insn (gen_rtx_SET (op0, gen_rtx_IOR (DImode, op0, low2)));
2398 	  sub_temp = op0;
2399 	  to_shift = 8;
2400 	}
2401       else
2402 	{
2403 	  to_shift += 8;
2404 	}
2405       emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, sub_temp,
2406 						   GEN_INT (to_shift))));
2407       if (low3 != const0_rtx)
2408 	emit_insn (gen_rtx_SET (op0, gen_rtx_IOR (DImode, op0, low3)));
2409       /* phew...  */
2410     }
2411 }
2412 
2413 /* Analyze a 64-bit constant for certain properties.  */
2414 static void analyze_64bit_constant (unsigned HOST_WIDE_INT,
2415 				    unsigned HOST_WIDE_INT,
2416 				    int *, int *, int *);
2417 
2418 static void
analyze_64bit_constant(unsigned HOST_WIDE_INT high_bits,unsigned HOST_WIDE_INT low_bits,int * hbsp,int * lbsp,int * abbasp)2419 analyze_64bit_constant (unsigned HOST_WIDE_INT high_bits,
2420 			unsigned HOST_WIDE_INT low_bits,
2421 			int *hbsp, int *lbsp, int *abbasp)
2422 {
2423   int lowest_bit_set, highest_bit_set, all_bits_between_are_set;
2424   int i;
2425 
2426   lowest_bit_set = highest_bit_set = -1;
2427   i = 0;
2428   do
2429     {
2430       if ((lowest_bit_set == -1)
2431 	  && ((low_bits >> i) & 1))
2432 	lowest_bit_set = i;
2433       if ((highest_bit_set == -1)
2434 	  && ((high_bits >> (32 - i - 1)) & 1))
2435 	highest_bit_set = (64 - i - 1);
2436     }
2437   while (++i < 32
2438 	 && ((highest_bit_set == -1)
2439 	     || (lowest_bit_set == -1)));
2440   if (i == 32)
2441     {
2442       i = 0;
2443       do
2444 	{
2445 	  if ((lowest_bit_set == -1)
2446 	      && ((high_bits >> i) & 1))
2447 	    lowest_bit_set = i + 32;
2448 	  if ((highest_bit_set == -1)
2449 	      && ((low_bits >> (32 - i - 1)) & 1))
2450 	    highest_bit_set = 32 - i - 1;
2451 	}
2452       while (++i < 32
2453 	     && ((highest_bit_set == -1)
2454 		 || (lowest_bit_set == -1)));
2455     }
2456   /* If there are no bits set this should have gone out
2457      as one instruction!  */
2458   gcc_assert (lowest_bit_set != -1 && highest_bit_set != -1);
2459   all_bits_between_are_set = 1;
2460   for (i = lowest_bit_set; i <= highest_bit_set; i++)
2461     {
2462       if (i < 32)
2463 	{
2464 	  if ((low_bits & (1 << i)) != 0)
2465 	    continue;
2466 	}
2467       else
2468 	{
2469 	  if ((high_bits & (1 << (i - 32))) != 0)
2470 	    continue;
2471 	}
2472       all_bits_between_are_set = 0;
2473       break;
2474     }
2475   *hbsp = highest_bit_set;
2476   *lbsp = lowest_bit_set;
2477   *abbasp = all_bits_between_are_set;
2478 }
2479 
2480 static int const64_is_2insns (unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT);
2481 
2482 static int
const64_is_2insns(unsigned HOST_WIDE_INT high_bits,unsigned HOST_WIDE_INT low_bits)2483 const64_is_2insns (unsigned HOST_WIDE_INT high_bits,
2484 		   unsigned HOST_WIDE_INT low_bits)
2485 {
2486   int highest_bit_set, lowest_bit_set, all_bits_between_are_set;
2487 
2488   if (high_bits == 0
2489       || high_bits == 0xffffffff)
2490     return 1;
2491 
2492   analyze_64bit_constant (high_bits, low_bits,
2493 			  &highest_bit_set, &lowest_bit_set,
2494 			  &all_bits_between_are_set);
2495 
2496   if ((highest_bit_set == 63
2497        || lowest_bit_set == 0)
2498       && all_bits_between_are_set != 0)
2499     return 1;
2500 
2501   if ((highest_bit_set - lowest_bit_set) < 21)
2502     return 1;
2503 
2504   return 0;
2505 }
2506 
2507 static unsigned HOST_WIDE_INT create_simple_focus_bits (unsigned HOST_WIDE_INT,
2508 							unsigned HOST_WIDE_INT,
2509 							int, int);
2510 
2511 static unsigned HOST_WIDE_INT
create_simple_focus_bits(unsigned HOST_WIDE_INT high_bits,unsigned HOST_WIDE_INT low_bits,int lowest_bit_set,int shift)2512 create_simple_focus_bits (unsigned HOST_WIDE_INT high_bits,
2513 			  unsigned HOST_WIDE_INT low_bits,
2514 			  int lowest_bit_set, int shift)
2515 {
2516   HOST_WIDE_INT hi, lo;
2517 
2518   if (lowest_bit_set < 32)
2519     {
2520       lo = (low_bits >> lowest_bit_set) << shift;
2521       hi = ((high_bits << (32 - lowest_bit_set)) << shift);
2522     }
2523   else
2524     {
2525       lo = 0;
2526       hi = ((high_bits >> (lowest_bit_set - 32)) << shift);
2527     }
2528   gcc_assert (! (hi & lo));
2529   return (hi | lo);
2530 }
2531 
2532 /* Here we are sure to be arch64 and this is an integer constant
2533    being loaded into a register.  Emit the most efficient
2534    insn sequence possible.  Detection of all the 1-insn cases
2535    has been done already.  */
2536 static void
sparc_emit_set_const64(rtx op0,rtx op1)2537 sparc_emit_set_const64 (rtx op0, rtx op1)
2538 {
2539   unsigned HOST_WIDE_INT high_bits, low_bits;
2540   int lowest_bit_set, highest_bit_set;
2541   int all_bits_between_are_set;
2542   rtx temp = 0;
2543 
2544   /* Sanity check that we know what we are working with.  */
2545   gcc_assert (TARGET_ARCH64
2546 	      && (GET_CODE (op0) == SUBREG
2547 		  || (REG_P (op0) && ! SPARC_FP_REG_P (REGNO (op0)))));
2548 
2549   if (! can_create_pseudo_p ())
2550     temp = op0;
2551 
2552   if (GET_CODE (op1) != CONST_INT)
2553     {
2554       sparc_emit_set_symbolic_const64 (op0, op1, temp);
2555       return;
2556     }
2557 
2558   if (! temp)
2559     temp = gen_reg_rtx (DImode);
2560 
2561   high_bits = ((INTVAL (op1) >> 32) & 0xffffffff);
2562   low_bits = (INTVAL (op1) & 0xffffffff);
2563 
2564   /* low_bits	bits 0  --> 31
2565      high_bits	bits 32 --> 63  */
2566 
2567   analyze_64bit_constant (high_bits, low_bits,
2568 			  &highest_bit_set, &lowest_bit_set,
2569 			  &all_bits_between_are_set);
2570 
2571   /* First try for a 2-insn sequence.  */
2572 
2573   /* These situations are preferred because the optimizer can
2574    * do more things with them:
2575    * 1) mov	-1, %reg
2576    *    sllx	%reg, shift, %reg
2577    * 2) mov	-1, %reg
2578    *    srlx	%reg, shift, %reg
2579    * 3) mov	some_small_const, %reg
2580    *    sllx	%reg, shift, %reg
2581    */
2582   if (((highest_bit_set == 63
2583 	|| lowest_bit_set == 0)
2584        && all_bits_between_are_set != 0)
2585       || ((highest_bit_set - lowest_bit_set) < 12))
2586     {
2587       HOST_WIDE_INT the_const = -1;
2588       int shift = lowest_bit_set;
2589 
2590       if ((highest_bit_set != 63
2591 	   && lowest_bit_set != 0)
2592 	  || all_bits_between_are_set == 0)
2593 	{
2594 	  the_const =
2595 	    create_simple_focus_bits (high_bits, low_bits,
2596 				      lowest_bit_set, 0);
2597 	}
2598       else if (lowest_bit_set == 0)
2599 	shift = -(63 - highest_bit_set);
2600 
2601       gcc_assert (SPARC_SIMM13_P (the_const));
2602       gcc_assert (shift != 0);
2603 
2604       emit_insn (gen_safe_SET64 (temp, the_const));
2605       if (shift > 0)
2606 	emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, temp,
2607 						     GEN_INT (shift))));
2608       else if (shift < 0)
2609 	emit_insn (gen_rtx_SET (op0, gen_rtx_LSHIFTRT (DImode, temp,
2610 						       GEN_INT (-shift))));
2611       return;
2612     }
2613 
2614   /* Now a range of 22 or less bits set somewhere.
2615    * 1) sethi	%hi(focus_bits), %reg
2616    *    sllx	%reg, shift, %reg
2617    * 2) sethi	%hi(focus_bits), %reg
2618    *    srlx	%reg, shift, %reg
2619    */
2620   if ((highest_bit_set - lowest_bit_set) < 21)
2621     {
2622       unsigned HOST_WIDE_INT focus_bits =
2623 	create_simple_focus_bits (high_bits, low_bits,
2624 				  lowest_bit_set, 10);
2625 
2626       gcc_assert (SPARC_SETHI_P (focus_bits));
2627       gcc_assert (lowest_bit_set != 10);
2628 
2629       emit_insn (gen_safe_HIGH64 (temp, focus_bits));
2630 
2631       /* If lowest_bit_set == 10 then a sethi alone could have done it.  */
2632       if (lowest_bit_set < 10)
2633 	emit_insn (gen_rtx_SET (op0,
2634 				gen_rtx_LSHIFTRT (DImode, temp,
2635 						  GEN_INT (10 - lowest_bit_set))));
2636       else if (lowest_bit_set > 10)
2637 	emit_insn (gen_rtx_SET (op0,
2638 				gen_rtx_ASHIFT (DImode, temp,
2639 						GEN_INT (lowest_bit_set - 10))));
2640       return;
2641     }
2642 
2643   /* 1) sethi	%hi(low_bits), %reg
2644    *    or	%reg, %lo(low_bits), %reg
2645    * 2) sethi	%hi(~low_bits), %reg
2646    *	xor	%reg, %lo(-0x400 | (low_bits & 0x3ff)), %reg
2647    */
2648   if (high_bits == 0
2649       || high_bits == 0xffffffff)
2650     {
2651       sparc_emit_set_const64_quick1 (op0, temp, low_bits,
2652 				     (high_bits == 0xffffffff));
2653       return;
2654     }
2655 
2656   /* Now, try 3-insn sequences.  */
2657 
2658   /* 1) sethi	%hi(high_bits), %reg
2659    *    or	%reg, %lo(high_bits), %reg
2660    *    sllx	%reg, 32, %reg
2661    */
2662   if (low_bits == 0)
2663     {
2664       sparc_emit_set_const64_quick2 (op0, temp, high_bits, 0, 32);
2665       return;
2666     }
2667 
2668   /* We may be able to do something quick
2669      when the constant is negated, so try that.  */
2670   if (const64_is_2insns ((~high_bits) & 0xffffffff,
2671 			 (~low_bits) & 0xfffffc00))
2672     {
2673       /* NOTE: The trailing bits get XOR'd so we need the
2674 	 non-negated bits, not the negated ones.  */
2675       unsigned HOST_WIDE_INT trailing_bits = low_bits & 0x3ff;
2676 
2677       if ((((~high_bits) & 0xffffffff) == 0
2678 	   && ((~low_bits) & 0x80000000) == 0)
2679 	  || (((~high_bits) & 0xffffffff) == 0xffffffff
2680 	      && ((~low_bits) & 0x80000000) != 0))
2681 	{
2682 	  unsigned HOST_WIDE_INT fast_int = (~low_bits & 0xffffffff);
2683 
2684 	  if ((SPARC_SETHI_P (fast_int)
2685 	       && (~high_bits & 0xffffffff) == 0)
2686 	      || SPARC_SIMM13_P (fast_int))
2687 	    emit_insn (gen_safe_SET64 (temp, fast_int));
2688 	  else
2689 	    sparc_emit_set_const64 (temp, GEN_INT (fast_int));
2690 	}
2691       else
2692 	{
2693 	  rtx negated_const;
2694 	  negated_const = GEN_INT (((~low_bits) & 0xfffffc00) |
2695 				   (((HOST_WIDE_INT)((~high_bits) & 0xffffffff))<<32));
2696 	  sparc_emit_set_const64 (temp, negated_const);
2697 	}
2698 
2699       /* If we are XOR'ing with -1, then we should emit a one's complement
2700 	 instead.  This way the combiner will notice logical operations
2701 	 such as ANDN later on and substitute.  */
2702       if (trailing_bits == 0x3ff)
2703 	{
2704 	  emit_insn (gen_rtx_SET (op0, gen_rtx_NOT (DImode, temp)));
2705 	}
2706       else
2707 	{
2708 	  emit_insn (gen_rtx_SET (op0,
2709 				  gen_safe_XOR64 (temp,
2710 						  (-0x400 | trailing_bits))));
2711 	}
2712       return;
2713     }
2714 
2715   /* 1) sethi	%hi(xxx), %reg
2716    *    or	%reg, %lo(xxx), %reg
2717    *	sllx	%reg, yyy, %reg
2718    *
2719    * ??? This is just a generalized version of the low_bits==0
2720    * thing above, FIXME...
2721    */
2722   if ((highest_bit_set - lowest_bit_set) < 32)
2723     {
2724       unsigned HOST_WIDE_INT focus_bits =
2725 	create_simple_focus_bits (high_bits, low_bits,
2726 				  lowest_bit_set, 0);
2727 
2728       /* We can't get here in this state.  */
2729       gcc_assert (highest_bit_set >= 32 && lowest_bit_set < 32);
2730 
2731       /* So what we know is that the set bits straddle the
2732 	 middle of the 64-bit word.  */
2733       sparc_emit_set_const64_quick2 (op0, temp,
2734 				     focus_bits, 0,
2735 				     lowest_bit_set);
2736       return;
2737     }
2738 
2739   /* 1) sethi	%hi(high_bits), %reg
2740    *    or	%reg, %lo(high_bits), %reg
2741    *    sllx	%reg, 32, %reg
2742    *	or	%reg, low_bits, %reg
2743    */
2744   if (SPARC_SIMM13_P (low_bits) && ((int)low_bits > 0))
2745     {
2746       sparc_emit_set_const64_quick2 (op0, temp, high_bits, low_bits, 32);
2747       return;
2748     }
2749 
2750   /* The easiest way when all else fails, is full decomposition.  */
2751   sparc_emit_set_const64_longway (op0, temp, high_bits, low_bits);
2752 }
2753 
2754 /* Given a comparison code (EQ, NE, etc.) and the first operand of a COMPARE,
2755    return the mode to be used for the comparison.  For floating-point,
2756    CCFP[E]mode is used.  CC_NOOVmode should be used when the first operand
2757    is a PLUS, MINUS, NEG, or ASHIFT.  CCmode should be used when no special
2758    processing is needed.  */
2759 
2760 machine_mode
select_cc_mode(enum rtx_code op,rtx x,rtx y ATTRIBUTE_UNUSED)2761 select_cc_mode (enum rtx_code op, rtx x, rtx y ATTRIBUTE_UNUSED)
2762 {
2763   if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
2764     {
2765       switch (op)
2766 	{
2767 	case EQ:
2768 	case NE:
2769 	case UNORDERED:
2770 	case ORDERED:
2771 	case UNLT:
2772 	case UNLE:
2773 	case UNGT:
2774 	case UNGE:
2775 	case UNEQ:
2776 	case LTGT:
2777 	  return CCFPmode;
2778 
2779 	case LT:
2780 	case LE:
2781 	case GT:
2782 	case GE:
2783 	  return CCFPEmode;
2784 
2785 	default:
2786 	  gcc_unreachable ();
2787 	}
2788     }
2789   else if (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
2790 	   || GET_CODE (x) == NEG || GET_CODE (x) == ASHIFT)
2791     {
2792       if (TARGET_ARCH64 && GET_MODE (x) == DImode)
2793 	return CCX_NOOVmode;
2794       else
2795 	return CC_NOOVmode;
2796     }
2797   else
2798     {
2799       if (TARGET_ARCH64 && GET_MODE (x) == DImode)
2800 	return CCXmode;
2801       else
2802 	return CCmode;
2803     }
2804 }
2805 
2806 /* Emit the compare insn and return the CC reg for a CODE comparison
2807    with operands X and Y.  */
2808 
2809 static rtx
gen_compare_reg_1(enum rtx_code code,rtx x,rtx y)2810 gen_compare_reg_1 (enum rtx_code code, rtx x, rtx y)
2811 {
2812   machine_mode mode;
2813   rtx cc_reg;
2814 
2815   if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
2816     return x;
2817 
2818   mode = SELECT_CC_MODE (code, x, y);
2819 
2820   /* ??? We don't have movcc patterns so we cannot generate pseudo regs for the
2821      fcc regs (cse can't tell they're really call clobbered regs and will
2822      remove a duplicate comparison even if there is an intervening function
2823      call - it will then try to reload the cc reg via an int reg which is why
2824      we need the movcc patterns).  It is possible to provide the movcc
2825      patterns by using the ldxfsr/stxfsr v9 insns.  I tried it: you need two
2826      registers (say %g1,%g5) and it takes about 6 insns.  A better fix would be
2827      to tell cse that CCFPE mode registers (even pseudos) are call
2828      clobbered.  */
2829 
2830   /* ??? This is an experiment.  Rather than making changes to cse which may
2831      or may not be easy/clean, we do our own cse.  This is possible because
2832      we will generate hard registers.  Cse knows they're call clobbered (it
2833      doesn't know the same thing about pseudos). If we guess wrong, no big
2834      deal, but if we win, great!  */
2835 
2836   if (TARGET_V9 && GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
2837 #if 1 /* experiment */
2838     {
2839       int reg;
2840       /* We cycle through the registers to ensure they're all exercised.  */
2841       static int next_fcc_reg = 0;
2842       /* Previous x,y for each fcc reg.  */
2843       static rtx prev_args[4][2];
2844 
2845       /* Scan prev_args for x,y.  */
2846       for (reg = 0; reg < 4; reg++)
2847 	if (prev_args[reg][0] == x && prev_args[reg][1] == y)
2848 	  break;
2849       if (reg == 4)
2850 	{
2851 	  reg = next_fcc_reg;
2852 	  prev_args[reg][0] = x;
2853 	  prev_args[reg][1] = y;
2854 	  next_fcc_reg = (next_fcc_reg + 1) & 3;
2855 	}
2856       cc_reg = gen_rtx_REG (mode, reg + SPARC_FIRST_V9_FCC_REG);
2857     }
2858 #else
2859     cc_reg = gen_reg_rtx (mode);
2860 #endif /* ! experiment */
2861   else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
2862     cc_reg = gen_rtx_REG (mode, SPARC_FCC_REG);
2863   else
2864     cc_reg = gen_rtx_REG (mode, SPARC_ICC_REG);
2865 
2866   /* We shouldn't get there for TFmode if !TARGET_HARD_QUAD.  If we do, this
2867      will only result in an unrecognizable insn so no point in asserting.  */
2868   emit_insn (gen_rtx_SET (cc_reg, gen_rtx_COMPARE (mode, x, y)));
2869 
2870   return cc_reg;
2871 }
2872 
2873 
2874 /* Emit the compare insn and return the CC reg for the comparison in CMP.  */
2875 
2876 rtx
gen_compare_reg(rtx cmp)2877 gen_compare_reg (rtx cmp)
2878 {
2879   return gen_compare_reg_1 (GET_CODE (cmp), XEXP (cmp, 0), XEXP (cmp, 1));
2880 }
2881 
2882 /* This function is used for v9 only.
2883    DEST is the target of the Scc insn.
2884    CODE is the code for an Scc's comparison.
2885    X and Y are the values we compare.
2886 
2887    This function is needed to turn
2888 
2889 	   (set (reg:SI 110)
2890 	       (gt (reg:CCX 100 %icc)
2891 	           (const_int 0)))
2892    into
2893 	   (set (reg:SI 110)
2894 	       (gt:DI (reg:CCX 100 %icc)
2895 	           (const_int 0)))
2896 
2897    IE: The instruction recognizer needs to see the mode of the comparison to
2898    find the right instruction. We could use "gt:DI" right in the
2899    define_expand, but leaving it out allows us to handle DI, SI, etc.  */
2900 
2901 static int
gen_v9_scc(rtx dest,enum rtx_code compare_code,rtx x,rtx y)2902 gen_v9_scc (rtx dest, enum rtx_code compare_code, rtx x, rtx y)
2903 {
2904   if (! TARGET_ARCH64
2905       && (GET_MODE (x) == DImode
2906 	  || GET_MODE (dest) == DImode))
2907     return 0;
2908 
2909   /* Try to use the movrCC insns.  */
2910   if (TARGET_ARCH64
2911       && GET_MODE_CLASS (GET_MODE (x)) == MODE_INT
2912       && y == const0_rtx
2913       && v9_regcmp_p (compare_code))
2914     {
2915       rtx op0 = x;
2916       rtx temp;
2917 
2918       /* Special case for op0 != 0.  This can be done with one instruction if
2919 	 dest == x.  */
2920 
2921       if (compare_code == NE
2922 	  && GET_MODE (dest) == DImode
2923 	  && rtx_equal_p (op0, dest))
2924 	{
2925 	  emit_insn (gen_rtx_SET (dest,
2926 			      gen_rtx_IF_THEN_ELSE (DImode,
2927 				       gen_rtx_fmt_ee (compare_code, DImode,
2928 						       op0, const0_rtx),
2929 				       const1_rtx,
2930 				       dest)));
2931 	  return 1;
2932 	}
2933 
2934       if (reg_overlap_mentioned_p (dest, op0))
2935 	{
2936 	  /* Handle the case where dest == x.
2937 	     We "early clobber" the result.  */
2938 	  op0 = gen_reg_rtx (GET_MODE (x));
2939 	  emit_move_insn (op0, x);
2940 	}
2941 
2942       emit_insn (gen_rtx_SET (dest, const0_rtx));
2943       if (GET_MODE (op0) != DImode)
2944 	{
2945 	  temp = gen_reg_rtx (DImode);
2946 	  convert_move (temp, op0, 0);
2947 	}
2948       else
2949 	temp = op0;
2950       emit_insn (gen_rtx_SET (dest,
2951 			  gen_rtx_IF_THEN_ELSE (GET_MODE (dest),
2952 				   gen_rtx_fmt_ee (compare_code, DImode,
2953 						   temp, const0_rtx),
2954 				   const1_rtx,
2955 				   dest)));
2956       return 1;
2957     }
2958   else
2959     {
2960       x = gen_compare_reg_1 (compare_code, x, y);
2961       y = const0_rtx;
2962 
2963       gcc_assert (GET_MODE (x) != CC_NOOVmode
2964 		  && GET_MODE (x) != CCX_NOOVmode);
2965 
2966       emit_insn (gen_rtx_SET (dest, const0_rtx));
2967       emit_insn (gen_rtx_SET (dest,
2968 			  gen_rtx_IF_THEN_ELSE (GET_MODE (dest),
2969 				   gen_rtx_fmt_ee (compare_code,
2970 						   GET_MODE (x), x, y),
2971 				    const1_rtx, dest)));
2972       return 1;
2973     }
2974 }
2975 
2976 
2977 /* Emit an scc insn.  For seq, sne, sgeu, and sltu, we can do this
2978    without jumps using the addx/subx instructions.  */
2979 
2980 bool
emit_scc_insn(rtx operands[])2981 emit_scc_insn (rtx operands[])
2982 {
2983   rtx tem;
2984   rtx x;
2985   rtx y;
2986   enum rtx_code code;
2987 
2988   /* The quad-word fp compare library routines all return nonzero to indicate
2989      true, which is different from the equivalent libgcc routines, so we must
2990      handle them specially here.  */
2991   if (GET_MODE (operands[2]) == TFmode && ! TARGET_HARD_QUAD)
2992     {
2993       operands[1] = sparc_emit_float_lib_cmp (operands[2], operands[3],
2994 					      GET_CODE (operands[1]));
2995       operands[2] = XEXP (operands[1], 0);
2996       operands[3] = XEXP (operands[1], 1);
2997     }
2998 
2999   code = GET_CODE (operands[1]);
3000   x = operands[2];
3001   y = operands[3];
3002 
3003   /* For seq/sne on v9 we use the same code as v8 (the addx/subx method has
3004      more applications).  The exception to this is "reg != 0" which can
3005      be done in one instruction on v9 (so we do it).  */
3006   if (code == EQ)
3007     {
3008       if (GET_MODE (x) == SImode)
3009         {
3010 	  rtx pat;
3011 	  if (TARGET_ARCH64)
3012 	    pat = gen_seqsidi_special (operands[0], x, y);
3013 	  else
3014 	    pat = gen_seqsisi_special (operands[0], x, y);
3015           emit_insn (pat);
3016           return true;
3017         }
3018       else if (GET_MODE (x) == DImode)
3019         {
3020 	  rtx pat = gen_seqdi_special (operands[0], x, y);
3021           emit_insn (pat);
3022           return true;
3023         }
3024     }
3025 
3026   if (code == NE)
3027     {
3028       if (GET_MODE (x) == SImode)
3029         {
3030           rtx pat;
3031 	  if (TARGET_ARCH64)
3032 	    pat = gen_snesidi_special (operands[0], x, y);
3033 	  else
3034 	    pat = gen_snesisi_special (operands[0], x, y);
3035           emit_insn (pat);
3036           return true;
3037         }
3038       else if (GET_MODE (x) == DImode)
3039         {
3040 	  rtx pat;
3041 	  if (TARGET_VIS3)
3042 	    pat = gen_snedi_special_vis3 (operands[0], x, y);
3043 	  else
3044 	    pat = gen_snedi_special (operands[0], x, y);
3045           emit_insn (pat);
3046           return true;
3047         }
3048     }
3049 
3050   if (TARGET_V9
3051       && TARGET_ARCH64
3052       && GET_MODE (x) == DImode
3053       && !(TARGET_VIS3
3054 	   && (code == GTU || code == LTU))
3055       && gen_v9_scc (operands[0], code, x, y))
3056     return true;
3057 
3058   /* We can do LTU and GEU using the addx/subx instructions too.  And
3059      for GTU/LEU, if both operands are registers swap them and fall
3060      back to the easy case.  */
3061   if (code == GTU || code == LEU)
3062     {
3063       if ((GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
3064           && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG))
3065         {
3066           tem = x;
3067           x = y;
3068           y = tem;
3069           code = swap_condition (code);
3070         }
3071     }
3072 
3073   if (code == LTU
3074       || (!TARGET_VIS3 && code == GEU))
3075     {
3076       emit_insn (gen_rtx_SET (operands[0],
3077 			      gen_rtx_fmt_ee (code, GET_MODE (operands[0]),
3078 					      gen_compare_reg_1 (code, x, y),
3079 					      const0_rtx)));
3080       return true;
3081     }
3082 
3083   /* All the posibilities to use addx/subx based sequences has been
3084      exhausted, try for a 3 instruction sequence using v9 conditional
3085      moves.  */
3086   if (TARGET_V9 && gen_v9_scc (operands[0], code, x, y))
3087     return true;
3088 
3089   /* Nope, do branches.  */
3090   return false;
3091 }
3092 
3093 /* Emit a conditional jump insn for the v9 architecture using comparison code
3094    CODE and jump target LABEL.
3095    This function exists to take advantage of the v9 brxx insns.  */
3096 
3097 static void
emit_v9_brxx_insn(enum rtx_code code,rtx op0,rtx label)3098 emit_v9_brxx_insn (enum rtx_code code, rtx op0, rtx label)
3099 {
3100   emit_jump_insn (gen_rtx_SET (pc_rtx,
3101 			   gen_rtx_IF_THEN_ELSE (VOIDmode,
3102 				    gen_rtx_fmt_ee (code, GET_MODE (op0),
3103 						    op0, const0_rtx),
3104 				    gen_rtx_LABEL_REF (VOIDmode, label),
3105 				    pc_rtx)));
3106 }
3107 
3108 /* Emit a conditional jump insn for the UA2011 architecture using
3109    comparison code CODE and jump target LABEL.  This function exists
3110    to take advantage of the UA2011 Compare and Branch insns.  */
3111 
3112 static void
emit_cbcond_insn(enum rtx_code code,rtx op0,rtx op1,rtx label)3113 emit_cbcond_insn (enum rtx_code code, rtx op0, rtx op1, rtx label)
3114 {
3115   rtx if_then_else;
3116 
3117   if_then_else = gen_rtx_IF_THEN_ELSE (VOIDmode,
3118 				       gen_rtx_fmt_ee(code, GET_MODE(op0),
3119 						      op0, op1),
3120 				       gen_rtx_LABEL_REF (VOIDmode, label),
3121 				       pc_rtx);
3122 
3123   emit_jump_insn (gen_rtx_SET (pc_rtx, if_then_else));
3124 }
3125 
3126 void
emit_conditional_branch_insn(rtx operands[])3127 emit_conditional_branch_insn (rtx operands[])
3128 {
3129   /* The quad-word fp compare library routines all return nonzero to indicate
3130      true, which is different from the equivalent libgcc routines, so we must
3131      handle them specially here.  */
3132   if (GET_MODE (operands[1]) == TFmode && ! TARGET_HARD_QUAD)
3133     {
3134       operands[0] = sparc_emit_float_lib_cmp (operands[1], operands[2],
3135 					      GET_CODE (operands[0]));
3136       operands[1] = XEXP (operands[0], 0);
3137       operands[2] = XEXP (operands[0], 1);
3138     }
3139 
3140   /* If we can tell early on that the comparison is against a constant
3141      that won't fit in the 5-bit signed immediate field of a cbcond,
3142      use one of the other v9 conditional branch sequences.  */
3143   if (TARGET_CBCOND
3144       && GET_CODE (operands[1]) == REG
3145       && (GET_MODE (operands[1]) == SImode
3146 	  || (TARGET_ARCH64 && GET_MODE (operands[1]) == DImode))
3147       && (GET_CODE (operands[2]) != CONST_INT
3148 	  || SPARC_SIMM5_P (INTVAL (operands[2]))))
3149     {
3150       emit_cbcond_insn (GET_CODE (operands[0]), operands[1], operands[2], operands[3]);
3151       return;
3152     }
3153 
3154   if (TARGET_ARCH64 && operands[2] == const0_rtx
3155       && GET_CODE (operands[1]) == REG
3156       && GET_MODE (operands[1]) == DImode)
3157     {
3158       emit_v9_brxx_insn (GET_CODE (operands[0]), operands[1], operands[3]);
3159       return;
3160     }
3161 
3162   operands[1] = gen_compare_reg (operands[0]);
3163   operands[2] = const0_rtx;
3164   operands[0] = gen_rtx_fmt_ee (GET_CODE (operands[0]), VOIDmode,
3165 				operands[1], operands[2]);
3166   emit_jump_insn (gen_cbranchcc4 (operands[0], operands[1], operands[2],
3167 				  operands[3]));
3168 }
3169 
3170 
3171 /* Generate a DFmode part of a hard TFmode register.
3172    REG is the TFmode hard register, LOW is 1 for the
3173    low 64bit of the register and 0 otherwise.
3174  */
3175 rtx
gen_df_reg(rtx reg,int low)3176 gen_df_reg (rtx reg, int low)
3177 {
3178   int regno = REGNO (reg);
3179 
3180   if ((WORDS_BIG_ENDIAN == 0) ^ (low != 0))
3181     regno += (TARGET_ARCH64 && SPARC_INT_REG_P (regno)) ? 1 : 2;
3182   return gen_rtx_REG (DFmode, regno);
3183 }
3184 
3185 /* Generate a call to FUNC with OPERANDS.  Operand 0 is the return value.
3186    Unlike normal calls, TFmode operands are passed by reference.  It is
3187    assumed that no more than 3 operands are required.  */
3188 
3189 static void
emit_soft_tfmode_libcall(const char * func_name,int nargs,rtx * operands)3190 emit_soft_tfmode_libcall (const char *func_name, int nargs, rtx *operands)
3191 {
3192   rtx ret_slot = NULL, arg[3], func_sym;
3193   int i;
3194 
3195   /* We only expect to be called for conversions, unary, and binary ops.  */
3196   gcc_assert (nargs == 2 || nargs == 3);
3197 
3198   for (i = 0; i < nargs; ++i)
3199     {
3200       rtx this_arg = operands[i];
3201       rtx this_slot;
3202 
3203       /* TFmode arguments and return values are passed by reference.  */
3204       if (GET_MODE (this_arg) == TFmode)
3205 	{
3206 	  int force_stack_temp;
3207 
3208 	  force_stack_temp = 0;
3209 	  if (TARGET_BUGGY_QP_LIB && i == 0)
3210 	    force_stack_temp = 1;
3211 
3212 	  if (GET_CODE (this_arg) == MEM
3213 	      && ! force_stack_temp)
3214 	    {
3215 	      tree expr = MEM_EXPR (this_arg);
3216 	      if (expr)
3217 		mark_addressable (expr);
3218 	      this_arg = XEXP (this_arg, 0);
3219 	    }
3220 	  else if (CONSTANT_P (this_arg)
3221 		   && ! force_stack_temp)
3222 	    {
3223 	      this_slot = force_const_mem (TFmode, this_arg);
3224 	      this_arg = XEXP (this_slot, 0);
3225 	    }
3226 	  else
3227 	    {
3228 	      this_slot = assign_stack_temp (TFmode, GET_MODE_SIZE (TFmode));
3229 
3230 	      /* Operand 0 is the return value.  We'll copy it out later.  */
3231 	      if (i > 0)
3232 		emit_move_insn (this_slot, this_arg);
3233 	      else
3234 		ret_slot = this_slot;
3235 
3236 	      this_arg = XEXP (this_slot, 0);
3237 	    }
3238 	}
3239 
3240       arg[i] = this_arg;
3241     }
3242 
3243   func_sym = gen_rtx_SYMBOL_REF (Pmode, func_name);
3244 
3245   if (GET_MODE (operands[0]) == TFmode)
3246     {
3247       if (nargs == 2)
3248 	emit_library_call (func_sym, LCT_NORMAL, VOIDmode, 2,
3249 			   arg[0], GET_MODE (arg[0]),
3250 			   arg[1], GET_MODE (arg[1]));
3251       else
3252 	emit_library_call (func_sym, LCT_NORMAL, VOIDmode, 3,
3253 			   arg[0], GET_MODE (arg[0]),
3254 			   arg[1], GET_MODE (arg[1]),
3255 			   arg[2], GET_MODE (arg[2]));
3256 
3257       if (ret_slot)
3258 	emit_move_insn (operands[0], ret_slot);
3259     }
3260   else
3261     {
3262       rtx ret;
3263 
3264       gcc_assert (nargs == 2);
3265 
3266       ret = emit_library_call_value (func_sym, operands[0], LCT_NORMAL,
3267 				     GET_MODE (operands[0]), 1,
3268 				     arg[1], GET_MODE (arg[1]));
3269 
3270       if (ret != operands[0])
3271 	emit_move_insn (operands[0], ret);
3272     }
3273 }
3274 
3275 /* Expand soft-float TFmode calls to sparc abi routines.  */
3276 
3277 static void
emit_soft_tfmode_binop(enum rtx_code code,rtx * operands)3278 emit_soft_tfmode_binop (enum rtx_code code, rtx *operands)
3279 {
3280   const char *func;
3281 
3282   switch (code)
3283     {
3284     case PLUS:
3285       func = "_Qp_add";
3286       break;
3287     case MINUS:
3288       func = "_Qp_sub";
3289       break;
3290     case MULT:
3291       func = "_Qp_mul";
3292       break;
3293     case DIV:
3294       func = "_Qp_div";
3295       break;
3296     default:
3297       gcc_unreachable ();
3298     }
3299 
3300   emit_soft_tfmode_libcall (func, 3, operands);
3301 }
3302 
3303 static void
emit_soft_tfmode_unop(enum rtx_code code,rtx * operands)3304 emit_soft_tfmode_unop (enum rtx_code code, rtx *operands)
3305 {
3306   const char *func;
3307 
3308   gcc_assert (code == SQRT);
3309   func = "_Qp_sqrt";
3310 
3311   emit_soft_tfmode_libcall (func, 2, operands);
3312 }
3313 
3314 static void
emit_soft_tfmode_cvt(enum rtx_code code,rtx * operands)3315 emit_soft_tfmode_cvt (enum rtx_code code, rtx *operands)
3316 {
3317   const char *func;
3318 
3319   switch (code)
3320     {
3321     case FLOAT_EXTEND:
3322       switch (GET_MODE (operands[1]))
3323 	{
3324 	case SFmode:
3325 	  func = "_Qp_stoq";
3326 	  break;
3327 	case DFmode:
3328 	  func = "_Qp_dtoq";
3329 	  break;
3330 	default:
3331 	  gcc_unreachable ();
3332 	}
3333       break;
3334 
3335     case FLOAT_TRUNCATE:
3336       switch (GET_MODE (operands[0]))
3337 	{
3338 	case SFmode:
3339 	  func = "_Qp_qtos";
3340 	  break;
3341 	case DFmode:
3342 	  func = "_Qp_qtod";
3343 	  break;
3344 	default:
3345 	  gcc_unreachable ();
3346 	}
3347       break;
3348 
3349     case FLOAT:
3350       switch (GET_MODE (operands[1]))
3351 	{
3352 	case SImode:
3353 	  func = "_Qp_itoq";
3354 	  if (TARGET_ARCH64)
3355 	    operands[1] = gen_rtx_SIGN_EXTEND (DImode, operands[1]);
3356 	  break;
3357 	case DImode:
3358 	  func = "_Qp_xtoq";
3359 	  break;
3360 	default:
3361 	  gcc_unreachable ();
3362 	}
3363       break;
3364 
3365     case UNSIGNED_FLOAT:
3366       switch (GET_MODE (operands[1]))
3367 	{
3368 	case SImode:
3369 	  func = "_Qp_uitoq";
3370 	  if (TARGET_ARCH64)
3371 	    operands[1] = gen_rtx_ZERO_EXTEND (DImode, operands[1]);
3372 	  break;
3373 	case DImode:
3374 	  func = "_Qp_uxtoq";
3375 	  break;
3376 	default:
3377 	  gcc_unreachable ();
3378 	}
3379       break;
3380 
3381     case FIX:
3382       switch (GET_MODE (operands[0]))
3383 	{
3384 	case SImode:
3385 	  func = "_Qp_qtoi";
3386 	  break;
3387 	case DImode:
3388 	  func = "_Qp_qtox";
3389 	  break;
3390 	default:
3391 	  gcc_unreachable ();
3392 	}
3393       break;
3394 
3395     case UNSIGNED_FIX:
3396       switch (GET_MODE (operands[0]))
3397 	{
3398 	case SImode:
3399 	  func = "_Qp_qtoui";
3400 	  break;
3401 	case DImode:
3402 	  func = "_Qp_qtoux";
3403 	  break;
3404 	default:
3405 	  gcc_unreachable ();
3406 	}
3407       break;
3408 
3409     default:
3410       gcc_unreachable ();
3411     }
3412 
3413   emit_soft_tfmode_libcall (func, 2, operands);
3414 }
3415 
3416 /* Expand a hard-float tfmode operation.  All arguments must be in
3417    registers.  */
3418 
3419 static void
emit_hard_tfmode_operation(enum rtx_code code,rtx * operands)3420 emit_hard_tfmode_operation (enum rtx_code code, rtx *operands)
3421 {
3422   rtx op, dest;
3423 
3424   if (GET_RTX_CLASS (code) == RTX_UNARY)
3425     {
3426       operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
3427       op = gen_rtx_fmt_e (code, GET_MODE (operands[0]), operands[1]);
3428     }
3429   else
3430     {
3431       operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
3432       operands[2] = force_reg (GET_MODE (operands[2]), operands[2]);
3433       op = gen_rtx_fmt_ee (code, GET_MODE (operands[0]),
3434 			   operands[1], operands[2]);
3435     }
3436 
3437   if (register_operand (operands[0], VOIDmode))
3438     dest = operands[0];
3439   else
3440     dest = gen_reg_rtx (GET_MODE (operands[0]));
3441 
3442   emit_insn (gen_rtx_SET (dest, op));
3443 
3444   if (dest != operands[0])
3445     emit_move_insn (operands[0], dest);
3446 }
3447 
3448 void
emit_tfmode_binop(enum rtx_code code,rtx * operands)3449 emit_tfmode_binop (enum rtx_code code, rtx *operands)
3450 {
3451   if (TARGET_HARD_QUAD)
3452     emit_hard_tfmode_operation (code, operands);
3453   else
3454     emit_soft_tfmode_binop (code, operands);
3455 }
3456 
3457 void
emit_tfmode_unop(enum rtx_code code,rtx * operands)3458 emit_tfmode_unop (enum rtx_code code, rtx *operands)
3459 {
3460   if (TARGET_HARD_QUAD)
3461     emit_hard_tfmode_operation (code, operands);
3462   else
3463     emit_soft_tfmode_unop (code, operands);
3464 }
3465 
3466 void
emit_tfmode_cvt(enum rtx_code code,rtx * operands)3467 emit_tfmode_cvt (enum rtx_code code, rtx *operands)
3468 {
3469   if (TARGET_HARD_QUAD)
3470     emit_hard_tfmode_operation (code, operands);
3471   else
3472     emit_soft_tfmode_cvt (code, operands);
3473 }
3474 
3475 /* Return nonzero if a branch/jump/call instruction will be emitting
3476    nop into its delay slot.  */
3477 
3478 int
empty_delay_slot(rtx_insn * insn)3479 empty_delay_slot (rtx_insn *insn)
3480 {
3481   rtx seq;
3482 
3483   /* If no previous instruction (should not happen), return true.  */
3484   if (PREV_INSN (insn) == NULL)
3485     return 1;
3486 
3487   seq = NEXT_INSN (PREV_INSN (insn));
3488   if (GET_CODE (PATTERN (seq)) == SEQUENCE)
3489     return 0;
3490 
3491   return 1;
3492 }
3493 
3494 /* Return nonzero if we should emit a nop after a cbcond instruction.
3495    The cbcond instruction does not have a delay slot, however there is
3496    a severe performance penalty if a control transfer appears right
3497    after a cbcond.  Therefore we emit a nop when we detect this
3498    situation.  */
3499 
3500 int
emit_cbcond_nop(rtx insn)3501 emit_cbcond_nop (rtx insn)
3502 {
3503   rtx next = next_active_insn (insn);
3504 
3505   if (!next)
3506     return 1;
3507 
3508   if (NONJUMP_INSN_P (next)
3509       && GET_CODE (PATTERN (next)) == SEQUENCE)
3510     next = XVECEXP (PATTERN (next), 0, 0);
3511   else if (CALL_P (next)
3512 	   && GET_CODE (PATTERN (next)) == PARALLEL)
3513     {
3514       rtx delay = XVECEXP (PATTERN (next), 0, 1);
3515 
3516       if (GET_CODE (delay) == RETURN)
3517 	{
3518 	  /* It's a sibling call.  Do not emit the nop if we're going
3519 	     to emit something other than the jump itself as the first
3520 	     instruction of the sibcall sequence.  */
3521 	  if (sparc_leaf_function_p || TARGET_FLAT)
3522 	    return 0;
3523 	}
3524     }
3525 
3526   if (NONJUMP_INSN_P (next))
3527     return 0;
3528 
3529   return 1;
3530 }
3531 
3532 /* Return nonzero if TRIAL can go into the call delay slot.  */
3533 
3534 int
eligible_for_call_delay(rtx_insn * trial)3535 eligible_for_call_delay (rtx_insn *trial)
3536 {
3537   rtx pat;
3538 
3539   if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE)
3540     return 0;
3541 
3542   /* Binutils allows
3543        call __tls_get_addr, %tgd_call (foo)
3544         add %l7, %o0, %o0, %tgd_add (foo)
3545      while Sun as/ld does not.  */
3546   if (TARGET_GNU_TLS || !TARGET_TLS)
3547     return 1;
3548 
3549   pat = PATTERN (trial);
3550 
3551   /* We must reject tgd_add{32|64}, i.e.
3552        (set (reg) (plus (reg) (unspec [(reg) (symbol_ref)] UNSPEC_TLSGD)))
3553      and tldm_add{32|64}, i.e.
3554        (set (reg) (plus (reg) (unspec [(reg) (symbol_ref)] UNSPEC_TLSLDM)))
3555      for Sun as/ld.  */
3556   if (GET_CODE (pat) == SET
3557       && GET_CODE (SET_SRC (pat)) == PLUS)
3558     {
3559       rtx unspec = XEXP (SET_SRC (pat), 1);
3560 
3561       if (GET_CODE (unspec) == UNSPEC
3562 	  && (XINT (unspec, 1) == UNSPEC_TLSGD
3563 	      || XINT (unspec, 1) == UNSPEC_TLSLDM))
3564 	return 0;
3565     }
3566 
3567   return 1;
3568 }
3569 
3570 /* Return nonzero if TRIAL, an insn, can be combined with a 'restore'
3571    instruction.  RETURN_P is true if the v9 variant 'return' is to be
3572    considered in the test too.
3573 
3574    TRIAL must be a SET whose destination is a REG appropriate for the
3575    'restore' instruction or, if RETURN_P is true, for the 'return'
3576    instruction.  */
3577 
3578 static int
eligible_for_restore_insn(rtx trial,bool return_p)3579 eligible_for_restore_insn (rtx trial, bool return_p)
3580 {
3581   rtx pat = PATTERN (trial);
3582   rtx src = SET_SRC (pat);
3583   bool src_is_freg = false;
3584   rtx src_reg;
3585 
3586   /* Since we now can do moves between float and integer registers when
3587      VIS3 is enabled, we have to catch this case.  We can allow such
3588      moves when doing a 'return' however.  */
3589   src_reg = src;
3590   if (GET_CODE (src_reg) == SUBREG)
3591     src_reg = SUBREG_REG (src_reg);
3592   if (GET_CODE (src_reg) == REG
3593       && SPARC_FP_REG_P (REGNO (src_reg)))
3594     src_is_freg = true;
3595 
3596   /* The 'restore src,%g0,dest' pattern for word mode and below.  */
3597   if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT
3598       && arith_operand (src, GET_MODE (src))
3599       && ! src_is_freg)
3600     {
3601       if (TARGET_ARCH64)
3602         return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
3603       else
3604         return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (SImode);
3605     }
3606 
3607   /* The 'restore src,%g0,dest' pattern for double-word mode.  */
3608   else if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT
3609 	   && arith_double_operand (src, GET_MODE (src))
3610 	   && ! src_is_freg)
3611     return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
3612 
3613   /* The 'restore src,%g0,dest' pattern for float if no FPU.  */
3614   else if (! TARGET_FPU && register_operand (src, SFmode))
3615     return 1;
3616 
3617   /* The 'restore src,%g0,dest' pattern for double if no FPU.  */
3618   else if (! TARGET_FPU && TARGET_ARCH64 && register_operand (src, DFmode))
3619     return 1;
3620 
3621   /* If we have the 'return' instruction, anything that does not use
3622      local or output registers and can go into a delay slot wins.  */
3623   else if (return_p && TARGET_V9 && !epilogue_renumber (&pat, 1))
3624     return 1;
3625 
3626   /* The 'restore src1,src2,dest' pattern for SImode.  */
3627   else if (GET_CODE (src) == PLUS
3628 	   && register_operand (XEXP (src, 0), SImode)
3629 	   && arith_operand (XEXP (src, 1), SImode))
3630     return 1;
3631 
3632   /* The 'restore src1,src2,dest' pattern for DImode.  */
3633   else if (GET_CODE (src) == PLUS
3634 	   && register_operand (XEXP (src, 0), DImode)
3635 	   && arith_double_operand (XEXP (src, 1), DImode))
3636     return 1;
3637 
3638   /* The 'restore src1,%lo(src2),dest' pattern.  */
3639   else if (GET_CODE (src) == LO_SUM
3640 	   && ! TARGET_CM_MEDMID
3641 	   && ((register_operand (XEXP (src, 0), SImode)
3642 	        && immediate_operand (XEXP (src, 1), SImode))
3643 	       || (TARGET_ARCH64
3644 		   && register_operand (XEXP (src, 0), DImode)
3645 		   && immediate_operand (XEXP (src, 1), DImode))))
3646     return 1;
3647 
3648   /* The 'restore src,src,dest' pattern.  */
3649   else if (GET_CODE (src) == ASHIFT
3650 	   && (register_operand (XEXP (src, 0), SImode)
3651 	       || register_operand (XEXP (src, 0), DImode))
3652 	   && XEXP (src, 1) == const1_rtx)
3653     return 1;
3654 
3655   return 0;
3656 }
3657 
3658 /* Return nonzero if TRIAL can go into the function return's delay slot.  */
3659 
3660 int
eligible_for_return_delay(rtx_insn * trial)3661 eligible_for_return_delay (rtx_insn *trial)
3662 {
3663   int regno;
3664   rtx pat;
3665 
3666   /* If the function uses __builtin_eh_return, the eh_return machinery
3667      occupies the delay slot.  */
3668   if (crtl->calls_eh_return)
3669     return 0;
3670 
3671   if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE)
3672     return 0;
3673 
3674   /* In the case of a leaf or flat function, anything can go into the slot.  */
3675   if (sparc_leaf_function_p || TARGET_FLAT)
3676     return 1;
3677 
3678   if (!NONJUMP_INSN_P (trial))
3679     return 0;
3680 
3681   pat = PATTERN (trial);
3682   if (GET_CODE (pat) == PARALLEL)
3683     {
3684       int i;
3685 
3686       if (! TARGET_V9)
3687 	return 0;
3688       for (i = XVECLEN (pat, 0) - 1; i >= 0; i--)
3689 	{
3690 	  rtx expr = XVECEXP (pat, 0, i);
3691 	  if (GET_CODE (expr) != SET)
3692 	    return 0;
3693 	  if (GET_CODE (SET_DEST (expr)) != REG)
3694 	    return 0;
3695 	  regno = REGNO (SET_DEST (expr));
3696 	  if (regno >= 8 && regno < 24)
3697 	    return 0;
3698 	}
3699       return !epilogue_renumber (&pat, 1);
3700     }
3701 
3702   if (GET_CODE (pat) != SET)
3703     return 0;
3704 
3705   if (GET_CODE (SET_DEST (pat)) != REG)
3706     return 0;
3707 
3708   regno = REGNO (SET_DEST (pat));
3709 
3710   /* Otherwise, only operations which can be done in tandem with
3711      a `restore' or `return' insn can go into the delay slot.  */
3712   if (regno >= 8 && regno < 24)
3713     return 0;
3714 
3715   /* If this instruction sets up floating point register and we have a return
3716      instruction, it can probably go in.  But restore will not work
3717      with FP_REGS.  */
3718   if (! SPARC_INT_REG_P (regno))
3719     return TARGET_V9 && !epilogue_renumber (&pat, 1);
3720 
3721   return eligible_for_restore_insn (trial, true);
3722 }
3723 
3724 /* Return nonzero if TRIAL can go into the sibling call's delay slot.  */
3725 
3726 int
eligible_for_sibcall_delay(rtx_insn * trial)3727 eligible_for_sibcall_delay (rtx_insn *trial)
3728 {
3729   rtx pat;
3730 
3731   if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE)
3732     return 0;
3733 
3734   if (!NONJUMP_INSN_P (trial))
3735     return 0;
3736 
3737   pat = PATTERN (trial);
3738 
3739   if (sparc_leaf_function_p || TARGET_FLAT)
3740     {
3741       /* If the tail call is done using the call instruction,
3742 	 we have to restore %o7 in the delay slot.  */
3743       if (LEAF_SIBCALL_SLOT_RESERVED_P)
3744 	return 0;
3745 
3746       /* %g1 is used to build the function address */
3747       if (reg_mentioned_p (gen_rtx_REG (Pmode, 1), pat))
3748 	return 0;
3749 
3750       return 1;
3751     }
3752 
3753   if (GET_CODE (pat) != SET)
3754     return 0;
3755 
3756   /* Otherwise, only operations which can be done in tandem with
3757      a `restore' insn can go into the delay slot.  */
3758   if (GET_CODE (SET_DEST (pat)) != REG
3759       || (REGNO (SET_DEST (pat)) >= 8 && REGNO (SET_DEST (pat)) < 24)
3760       || ! SPARC_INT_REG_P (REGNO (SET_DEST (pat))))
3761     return 0;
3762 
3763   /* If it mentions %o7, it can't go in, because sibcall will clobber it
3764      in most cases.  */
3765   if (reg_mentioned_p (gen_rtx_REG (Pmode, 15), pat))
3766     return 0;
3767 
3768   return eligible_for_restore_insn (trial, false);
3769 }
3770 
3771 /* Determine if it's legal to put X into the constant pool.  This
3772    is not possible if X contains the address of a symbol that is
3773    not constant (TLS) or not known at final link time (PIC).  */
3774 
3775 static bool
sparc_cannot_force_const_mem(machine_mode mode,rtx x)3776 sparc_cannot_force_const_mem (machine_mode mode, rtx x)
3777 {
3778   switch (GET_CODE (x))
3779     {
3780     case CONST_INT:
3781     case CONST_WIDE_INT:
3782     case CONST_DOUBLE:
3783     case CONST_VECTOR:
3784       /* Accept all non-symbolic constants.  */
3785       return false;
3786 
3787     case LABEL_REF:
3788       /* Labels are OK iff we are non-PIC.  */
3789       return flag_pic != 0;
3790 
3791     case SYMBOL_REF:
3792       /* 'Naked' TLS symbol references are never OK,
3793 	 non-TLS symbols are OK iff we are non-PIC.  */
3794       if (SYMBOL_REF_TLS_MODEL (x))
3795 	return true;
3796       else
3797 	return flag_pic != 0;
3798 
3799     case CONST:
3800       return sparc_cannot_force_const_mem (mode, XEXP (x, 0));
3801     case PLUS:
3802     case MINUS:
3803       return sparc_cannot_force_const_mem (mode, XEXP (x, 0))
3804          || sparc_cannot_force_const_mem (mode, XEXP (x, 1));
3805     case UNSPEC:
3806       return true;
3807     default:
3808       gcc_unreachable ();
3809     }
3810 }
3811 
3812 /* Global Offset Table support.  */
3813 static GTY(()) rtx got_helper_rtx = NULL_RTX;
3814 static GTY(()) rtx global_offset_table_rtx = NULL_RTX;
3815 
3816 /* Return the SYMBOL_REF for the Global Offset Table.  */
3817 
3818 static GTY(()) rtx sparc_got_symbol = NULL_RTX;
3819 
3820 static rtx
sparc_got(void)3821 sparc_got (void)
3822 {
3823   if (!sparc_got_symbol)
3824     sparc_got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
3825 
3826   return sparc_got_symbol;
3827 }
3828 
3829 /* Ensure that we are not using patterns that are not OK with PIC.  */
3830 
3831 int
check_pic(int i)3832 check_pic (int i)
3833 {
3834   rtx op;
3835 
3836   switch (flag_pic)
3837     {
3838     case 1:
3839       op = recog_data.operand[i];
3840       gcc_assert (GET_CODE (op) != SYMBOL_REF
3841 	  	  && (GET_CODE (op) != CONST
3842 		      || (GET_CODE (XEXP (op, 0)) == MINUS
3843 			  && XEXP (XEXP (op, 0), 0) == sparc_got ()
3844 			  && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST)));
3845     case 2:
3846     default:
3847       return 1;
3848     }
3849 }
3850 
3851 /* Return true if X is an address which needs a temporary register when
3852    reloaded while generating PIC code.  */
3853 
3854 int
pic_address_needs_scratch(rtx x)3855 pic_address_needs_scratch (rtx x)
3856 {
3857   /* An address which is a symbolic plus a non SMALL_INT needs a temp reg.  */
3858   if (GET_CODE (x) == CONST
3859       && GET_CODE (XEXP (x, 0)) == PLUS
3860       && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
3861       && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
3862       && !SMALL_INT (XEXP (XEXP (x, 0), 1)))
3863     return 1;
3864 
3865   return 0;
3866 }
3867 
3868 /* Determine if a given RTX is a valid constant.  We already know this
3869    satisfies CONSTANT_P.  */
3870 
3871 static bool
sparc_legitimate_constant_p(machine_mode mode,rtx x)3872 sparc_legitimate_constant_p (machine_mode mode, rtx x)
3873 {
3874   switch (GET_CODE (x))
3875     {
3876     case CONST:
3877     case SYMBOL_REF:
3878       if (sparc_tls_referenced_p (x))
3879 	return false;
3880       break;
3881 
3882     case CONST_DOUBLE:
3883       /* Floating point constants are generally not ok.
3884 	 The only exception is 0.0 and all-ones in VIS.  */
3885       if (TARGET_VIS
3886 	  && SCALAR_FLOAT_MODE_P (mode)
3887 	  && (const_zero_operand (x, mode)
3888 	      || const_all_ones_operand (x, mode)))
3889 	return true;
3890 
3891       return false;
3892 
3893     case CONST_VECTOR:
3894       /* Vector constants are generally not ok.
3895 	 The only exception is 0 or -1 in VIS.  */
3896       if (TARGET_VIS
3897 	  && (const_zero_operand (x, mode)
3898 	      || const_all_ones_operand (x, mode)))
3899 	return true;
3900 
3901       return false;
3902 
3903     default:
3904       break;
3905     }
3906 
3907   return true;
3908 }
3909 
3910 /* Determine if a given RTX is a valid constant address.  */
3911 
3912 bool
constant_address_p(rtx x)3913 constant_address_p (rtx x)
3914 {
3915   switch (GET_CODE (x))
3916     {
3917     case LABEL_REF:
3918     case CONST_INT:
3919     case HIGH:
3920       return true;
3921 
3922     case CONST:
3923       if (flag_pic && pic_address_needs_scratch (x))
3924 	return false;
3925       return sparc_legitimate_constant_p (Pmode, x);
3926 
3927     case SYMBOL_REF:
3928       return !flag_pic && sparc_legitimate_constant_p (Pmode, x);
3929 
3930     default:
3931       return false;
3932     }
3933 }
3934 
3935 /* Nonzero if the constant value X is a legitimate general operand
3936    when generating PIC code.  It is given that flag_pic is on and
3937    that X satisfies CONSTANT_P.  */
3938 
3939 bool
legitimate_pic_operand_p(rtx x)3940 legitimate_pic_operand_p (rtx x)
3941 {
3942   if (pic_address_needs_scratch (x))
3943     return false;
3944   if (sparc_tls_referenced_p (x))
3945     return false;
3946   return true;
3947 }
3948 
3949 #define RTX_OK_FOR_OFFSET_P(X, MODE)			\
3950   (CONST_INT_P (X)					\
3951    && INTVAL (X) >= -0x1000				\
3952    && INTVAL (X) <= (0x1000 - GET_MODE_SIZE (MODE)))
3953 
3954 #define RTX_OK_FOR_OLO10_P(X, MODE)			\
3955   (CONST_INT_P (X)					\
3956    && INTVAL (X) >= -0x1000				\
3957    && INTVAL (X) <= (0xc00 - GET_MODE_SIZE (MODE)))
3958 
3959 /* Handle the TARGET_LEGITIMATE_ADDRESS_P target hook.
3960 
3961    On SPARC, the actual legitimate addresses must be REG+REG or REG+SMALLINT
3962    ordinarily.  This changes a bit when generating PIC.  */
3963 
3964 static bool
sparc_legitimate_address_p(machine_mode mode,rtx addr,bool strict)3965 sparc_legitimate_address_p (machine_mode mode, rtx addr, bool strict)
3966 {
3967   rtx rs1 = NULL, rs2 = NULL, imm1 = NULL;
3968 
3969   if (REG_P (addr) || GET_CODE (addr) == SUBREG)
3970     rs1 = addr;
3971   else if (GET_CODE (addr) == PLUS)
3972     {
3973       rs1 = XEXP (addr, 0);
3974       rs2 = XEXP (addr, 1);
3975 
3976       /* Canonicalize.  REG comes first, if there are no regs,
3977 	 LO_SUM comes first.  */
3978       if (!REG_P (rs1)
3979 	  && GET_CODE (rs1) != SUBREG
3980 	  && (REG_P (rs2)
3981 	      || GET_CODE (rs2) == SUBREG
3982 	      || (GET_CODE (rs2) == LO_SUM && GET_CODE (rs1) != LO_SUM)))
3983 	{
3984 	  rs1 = XEXP (addr, 1);
3985 	  rs2 = XEXP (addr, 0);
3986 	}
3987 
3988       if ((flag_pic == 1
3989 	   && rs1 == pic_offset_table_rtx
3990 	   && !REG_P (rs2)
3991 	   && GET_CODE (rs2) != SUBREG
3992 	   && GET_CODE (rs2) != LO_SUM
3993 	   && GET_CODE (rs2) != MEM
3994 	   && !(GET_CODE (rs2) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs2))
3995 	   && (! symbolic_operand (rs2, VOIDmode) || mode == Pmode)
3996 	   && (GET_CODE (rs2) != CONST_INT || SMALL_INT (rs2)))
3997 	  || ((REG_P (rs1)
3998 	       || GET_CODE (rs1) == SUBREG)
3999 	      && RTX_OK_FOR_OFFSET_P (rs2, mode)))
4000 	{
4001 	  imm1 = rs2;
4002 	  rs2 = NULL;
4003 	}
4004       else if ((REG_P (rs1) || GET_CODE (rs1) == SUBREG)
4005 	       && (REG_P (rs2) || GET_CODE (rs2) == SUBREG))
4006 	{
4007 	  /* We prohibit REG + REG for TFmode when there are no quad move insns
4008 	     and we consequently need to split.  We do this because REG+REG
4009 	     is not an offsettable address.  If we get the situation in reload
4010 	     where source and destination of a movtf pattern are both MEMs with
4011 	     REG+REG address, then only one of them gets converted to an
4012 	     offsettable address.  */
4013 	  if (mode == TFmode
4014 	      && ! (TARGET_ARCH64 && TARGET_HARD_QUAD))
4015 	    return 0;
4016 
4017 	  /* Likewise for TImode, but in all cases.  */
4018 	  if (mode == TImode)
4019 	    return 0;
4020 
4021 	  /* We prohibit REG + REG on ARCH32 if not optimizing for
4022 	     DFmode/DImode because then mem_min_alignment is likely to be zero
4023 	     after reload and the  forced split would lack a matching splitter
4024 	     pattern.  */
4025 	  if (TARGET_ARCH32 && !optimize
4026 	      && (mode == DFmode || mode == DImode))
4027 	    return 0;
4028 	}
4029       else if (USE_AS_OFFSETABLE_LO10
4030 	       && GET_CODE (rs1) == LO_SUM
4031 	       && TARGET_ARCH64
4032 	       && ! TARGET_CM_MEDMID
4033 	       && RTX_OK_FOR_OLO10_P (rs2, mode))
4034 	{
4035 	  rs2 = NULL;
4036 	  imm1 = XEXP (rs1, 1);
4037 	  rs1 = XEXP (rs1, 0);
4038 	  if (!CONSTANT_P (imm1)
4039 	      || (GET_CODE (rs1) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs1)))
4040 	    return 0;
4041 	}
4042     }
4043   else if (GET_CODE (addr) == LO_SUM)
4044     {
4045       rs1 = XEXP (addr, 0);
4046       imm1 = XEXP (addr, 1);
4047 
4048       if (!CONSTANT_P (imm1)
4049 	  || (GET_CODE (rs1) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs1)))
4050 	return 0;
4051 
4052       /* We can't allow TFmode in 32-bit mode, because an offset greater
4053 	 than the alignment (8) may cause the LO_SUM to overflow.  */
4054       if (mode == TFmode && TARGET_ARCH32)
4055 	return 0;
4056     }
4057   else if (GET_CODE (addr) == CONST_INT && SMALL_INT (addr))
4058     return 1;
4059   else
4060     return 0;
4061 
4062   if (GET_CODE (rs1) == SUBREG)
4063     rs1 = SUBREG_REG (rs1);
4064   if (!REG_P (rs1))
4065     return 0;
4066 
4067   if (rs2)
4068     {
4069       if (GET_CODE (rs2) == SUBREG)
4070 	rs2 = SUBREG_REG (rs2);
4071       if (!REG_P (rs2))
4072 	return 0;
4073     }
4074 
4075   if (strict)
4076     {
4077       if (!REGNO_OK_FOR_BASE_P (REGNO (rs1))
4078 	  || (rs2 && !REGNO_OK_FOR_BASE_P (REGNO (rs2))))
4079 	return 0;
4080     }
4081   else
4082     {
4083       if ((! SPARC_INT_REG_P (REGNO (rs1))
4084 	   && REGNO (rs1) != FRAME_POINTER_REGNUM
4085 	   && REGNO (rs1) < FIRST_PSEUDO_REGISTER)
4086 	  || (rs2
4087 	      && (! SPARC_INT_REG_P (REGNO (rs2))
4088 		  && REGNO (rs2) != FRAME_POINTER_REGNUM
4089 		  && REGNO (rs2) < FIRST_PSEUDO_REGISTER)))
4090 	return 0;
4091     }
4092   return 1;
4093 }
4094 
4095 /* Return the SYMBOL_REF for the tls_get_addr function.  */
4096 
4097 static GTY(()) rtx sparc_tls_symbol = NULL_RTX;
4098 
4099 static rtx
sparc_tls_get_addr(void)4100 sparc_tls_get_addr (void)
4101 {
4102   if (!sparc_tls_symbol)
4103     sparc_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tls_get_addr");
4104 
4105   return sparc_tls_symbol;
4106 }
4107 
4108 /* Return the Global Offset Table to be used in TLS mode.  */
4109 
4110 static rtx
sparc_tls_got(void)4111 sparc_tls_got (void)
4112 {
4113   /* In PIC mode, this is just the PIC offset table.  */
4114   if (flag_pic)
4115     {
4116       crtl->uses_pic_offset_table = 1;
4117       return pic_offset_table_rtx;
4118     }
4119 
4120   /* In non-PIC mode, Sun as (unlike GNU as) emits PC-relative relocations for
4121      the GOT symbol with the 32-bit ABI, so we reload the GOT register.  */
4122   if (TARGET_SUN_TLS && TARGET_ARCH32)
4123     {
4124       load_got_register ();
4125       return global_offset_table_rtx;
4126     }
4127 
4128   /* In all other cases, we load a new pseudo with the GOT symbol.  */
4129   return copy_to_reg (sparc_got ());
4130 }
4131 
4132 /* Return true if X contains a thread-local symbol.  */
4133 
4134 static bool
sparc_tls_referenced_p(rtx x)4135 sparc_tls_referenced_p (rtx x)
4136 {
4137   if (!TARGET_HAVE_TLS)
4138     return false;
4139 
4140   if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
4141     x = XEXP (XEXP (x, 0), 0);
4142 
4143   if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x))
4144     return true;
4145 
4146   /* That's all we handle in sparc_legitimize_tls_address for now.  */
4147   return false;
4148 }
4149 
4150 /* ADDR contains a thread-local SYMBOL_REF.  Generate code to compute
4151    this (thread-local) address.  */
4152 
4153 static rtx
sparc_legitimize_tls_address(rtx addr)4154 sparc_legitimize_tls_address (rtx addr)
4155 {
4156   rtx temp1, temp2, temp3, ret, o0, got;
4157   rtx_insn *insn;
4158 
4159   gcc_assert (can_create_pseudo_p ());
4160 
4161   if (GET_CODE (addr) == SYMBOL_REF)
4162     switch (SYMBOL_REF_TLS_MODEL (addr))
4163       {
4164       case TLS_MODEL_GLOBAL_DYNAMIC:
4165 	start_sequence ();
4166 	temp1 = gen_reg_rtx (SImode);
4167 	temp2 = gen_reg_rtx (SImode);
4168 	ret = gen_reg_rtx (Pmode);
4169 	o0 = gen_rtx_REG (Pmode, 8);
4170 	got = sparc_tls_got ();
4171 	emit_insn (gen_tgd_hi22 (temp1, addr));
4172 	emit_insn (gen_tgd_lo10 (temp2, temp1, addr));
4173 	if (TARGET_ARCH32)
4174 	  {
4175 	    emit_insn (gen_tgd_add32 (o0, got, temp2, addr));
4176 	    insn = emit_call_insn (gen_tgd_call32 (o0, sparc_tls_get_addr (),
4177 						   addr, const1_rtx));
4178 	  }
4179 	else
4180 	  {
4181 	    emit_insn (gen_tgd_add64 (o0, got, temp2, addr));
4182 	    insn = emit_call_insn (gen_tgd_call64 (o0, sparc_tls_get_addr (),
4183 						   addr, const1_rtx));
4184 	  }
4185 	use_reg (&CALL_INSN_FUNCTION_USAGE (insn), o0);
4186 	insn = get_insns ();
4187 	end_sequence ();
4188 	emit_libcall_block (insn, ret, o0, addr);
4189 	break;
4190 
4191       case TLS_MODEL_LOCAL_DYNAMIC:
4192 	start_sequence ();
4193 	temp1 = gen_reg_rtx (SImode);
4194 	temp2 = gen_reg_rtx (SImode);
4195 	temp3 = gen_reg_rtx (Pmode);
4196 	ret = gen_reg_rtx (Pmode);
4197 	o0 = gen_rtx_REG (Pmode, 8);
4198 	got = sparc_tls_got ();
4199 	emit_insn (gen_tldm_hi22 (temp1));
4200 	emit_insn (gen_tldm_lo10 (temp2, temp1));
4201 	if (TARGET_ARCH32)
4202 	  {
4203 	    emit_insn (gen_tldm_add32 (o0, got, temp2));
4204 	    insn = emit_call_insn (gen_tldm_call32 (o0, sparc_tls_get_addr (),
4205 						    const1_rtx));
4206 	  }
4207 	else
4208 	  {
4209 	    emit_insn (gen_tldm_add64 (o0, got, temp2));
4210 	    insn = emit_call_insn (gen_tldm_call64 (o0, sparc_tls_get_addr (),
4211 						    const1_rtx));
4212 	  }
4213 	use_reg (&CALL_INSN_FUNCTION_USAGE (insn), o0);
4214 	insn = get_insns ();
4215 	end_sequence ();
4216 	emit_libcall_block (insn, temp3, o0,
4217 			    gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
4218 					    UNSPEC_TLSLD_BASE));
4219 	temp1 = gen_reg_rtx (SImode);
4220 	temp2 = gen_reg_rtx (SImode);
4221 	emit_insn (gen_tldo_hix22 (temp1, addr));
4222 	emit_insn (gen_tldo_lox10 (temp2, temp1, addr));
4223 	if (TARGET_ARCH32)
4224 	  emit_insn (gen_tldo_add32 (ret, temp3, temp2, addr));
4225 	else
4226 	  emit_insn (gen_tldo_add64 (ret, temp3, temp2, addr));
4227 	break;
4228 
4229       case TLS_MODEL_INITIAL_EXEC:
4230 	temp1 = gen_reg_rtx (SImode);
4231 	temp2 = gen_reg_rtx (SImode);
4232 	temp3 = gen_reg_rtx (Pmode);
4233 	got = sparc_tls_got ();
4234 	emit_insn (gen_tie_hi22 (temp1, addr));
4235 	emit_insn (gen_tie_lo10 (temp2, temp1, addr));
4236 	if (TARGET_ARCH32)
4237 	  emit_insn (gen_tie_ld32 (temp3, got, temp2, addr));
4238 	else
4239 	  emit_insn (gen_tie_ld64 (temp3, got, temp2, addr));
4240         if (TARGET_SUN_TLS)
4241 	  {
4242 	    ret = gen_reg_rtx (Pmode);
4243 	    if (TARGET_ARCH32)
4244 	      emit_insn (gen_tie_add32 (ret, gen_rtx_REG (Pmode, 7),
4245 					temp3, addr));
4246 	    else
4247 	      emit_insn (gen_tie_add64 (ret, gen_rtx_REG (Pmode, 7),
4248 					temp3, addr));
4249 	  }
4250 	else
4251 	  ret = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, 7), temp3);
4252 	break;
4253 
4254       case TLS_MODEL_LOCAL_EXEC:
4255 	temp1 = gen_reg_rtx (Pmode);
4256 	temp2 = gen_reg_rtx (Pmode);
4257 	if (TARGET_ARCH32)
4258 	  {
4259 	    emit_insn (gen_tle_hix22_sp32 (temp1, addr));
4260 	    emit_insn (gen_tle_lox10_sp32 (temp2, temp1, addr));
4261 	  }
4262 	else
4263 	  {
4264 	    emit_insn (gen_tle_hix22_sp64 (temp1, addr));
4265 	    emit_insn (gen_tle_lox10_sp64 (temp2, temp1, addr));
4266 	  }
4267 	ret = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, 7), temp2);
4268 	break;
4269 
4270       default:
4271 	gcc_unreachable ();
4272       }
4273 
4274   else if (GET_CODE (addr) == CONST)
4275     {
4276       rtx base, offset;
4277 
4278       gcc_assert (GET_CODE (XEXP (addr, 0)) == PLUS);
4279 
4280       base = sparc_legitimize_tls_address (XEXP (XEXP (addr, 0), 0));
4281       offset = XEXP (XEXP (addr, 0), 1);
4282 
4283       base = force_operand (base, NULL_RTX);
4284       if (!(GET_CODE (offset) == CONST_INT && SMALL_INT (offset)))
4285 	offset = force_reg (Pmode, offset);
4286       ret = gen_rtx_PLUS (Pmode, base, offset);
4287     }
4288 
4289   else
4290     gcc_unreachable ();  /* for now ... */
4291 
4292   return ret;
4293 }
4294 
4295 /* Legitimize PIC addresses.  If the address is already position-independent,
4296    we return ORIG.  Newly generated position-independent addresses go into a
4297    reg.  This is REG if nonzero, otherwise we allocate register(s) as
4298    necessary.  */
4299 
4300 static rtx
sparc_legitimize_pic_address(rtx orig,rtx reg)4301 sparc_legitimize_pic_address (rtx orig, rtx reg)
4302 {
4303   if (GET_CODE (orig) == SYMBOL_REF
4304       /* See the comment in sparc_expand_move.  */
4305       || (GET_CODE (orig) == LABEL_REF && !can_use_mov_pic_label_ref (orig)))
4306     {
4307       bool gotdata_op = false;
4308       rtx pic_ref, address;
4309       rtx_insn *insn;
4310 
4311       if (!reg)
4312 	{
4313 	  gcc_assert (can_create_pseudo_p ());
4314 	  reg = gen_reg_rtx (Pmode);
4315 	}
4316 
4317       if (flag_pic == 2)
4318 	{
4319 	  /* If not during reload, allocate another temp reg here for loading
4320 	     in the address, so that these instructions can be optimized
4321 	     properly.  */
4322 	  rtx temp_reg = can_create_pseudo_p () ? gen_reg_rtx (Pmode) : reg;
4323 
4324 	  /* Must put the SYMBOL_REF inside an UNSPEC here so that cse
4325 	     won't get confused into thinking that these two instructions
4326 	     are loading in the true address of the symbol.  If in the
4327 	     future a PIC rtx exists, that should be used instead.  */
4328 	  if (TARGET_ARCH64)
4329 	    {
4330 	      emit_insn (gen_movdi_high_pic (temp_reg, orig));
4331 	      emit_insn (gen_movdi_lo_sum_pic (temp_reg, temp_reg, orig));
4332 	    }
4333 	  else
4334 	    {
4335 	      emit_insn (gen_movsi_high_pic (temp_reg, orig));
4336 	      emit_insn (gen_movsi_lo_sum_pic (temp_reg, temp_reg, orig));
4337 	    }
4338 
4339 	  address = temp_reg;
4340 	  gotdata_op = true;
4341 	}
4342       else
4343 	address = orig;
4344 
4345       crtl->uses_pic_offset_table = 1;
4346       if (gotdata_op)
4347 	{
4348 	  if (TARGET_ARCH64)
4349 	    insn = emit_insn (gen_movdi_pic_gotdata_op (reg,
4350 							pic_offset_table_rtx,
4351 							address, orig));
4352 	  else
4353 	    insn = emit_insn (gen_movsi_pic_gotdata_op (reg,
4354 							pic_offset_table_rtx,
4355 							address, orig));
4356 	}
4357       else
4358 	{
4359 	  pic_ref
4360 	    = gen_const_mem (Pmode,
4361 			     gen_rtx_PLUS (Pmode,
4362 					   pic_offset_table_rtx, address));
4363 	  insn = emit_move_insn (reg, pic_ref);
4364 	}
4365 
4366       /* Put a REG_EQUAL note on this insn, so that it can be optimized
4367 	 by loop.  */
4368       set_unique_reg_note (insn, REG_EQUAL, orig);
4369       return reg;
4370     }
4371   else if (GET_CODE (orig) == CONST)
4372     {
4373       rtx base, offset;
4374 
4375       if (GET_CODE (XEXP (orig, 0)) == PLUS
4376 	  && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
4377 	return orig;
4378 
4379       if (!reg)
4380 	{
4381 	  gcc_assert (can_create_pseudo_p ());
4382 	  reg = gen_reg_rtx (Pmode);
4383 	}
4384 
4385       gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
4386       base = sparc_legitimize_pic_address (XEXP (XEXP (orig, 0), 0), reg);
4387       offset = sparc_legitimize_pic_address (XEXP (XEXP (orig, 0), 1),
4388 			 		     base == reg ? NULL_RTX : reg);
4389 
4390       if (GET_CODE (offset) == CONST_INT)
4391 	{
4392 	  if (SMALL_INT (offset))
4393 	    return plus_constant (Pmode, base, INTVAL (offset));
4394 	  else if (can_create_pseudo_p ())
4395 	    offset = force_reg (Pmode, offset);
4396 	  else
4397 	    /* If we reach here, then something is seriously wrong.  */
4398 	    gcc_unreachable ();
4399 	}
4400       return gen_rtx_PLUS (Pmode, base, offset);
4401     }
4402   else if (GET_CODE (orig) == LABEL_REF)
4403     /* ??? We ought to be checking that the register is live instead, in case
4404        it is eliminated.  */
4405     crtl->uses_pic_offset_table = 1;
4406 
4407   return orig;
4408 }
4409 
4410 /* Try machine-dependent ways of modifying an illegitimate address X
4411    to be legitimate.  If we find one, return the new, valid address.
4412 
4413    OLDX is the address as it was before break_out_memory_refs was called.
4414    In some cases it is useful to look at this to decide what needs to be done.
4415 
4416    MODE is the mode of the operand pointed to by X.
4417 
4418    On SPARC, change REG+N into REG+REG, and REG+(X*Y) into REG+REG.  */
4419 
4420 static rtx
sparc_legitimize_address(rtx x,rtx oldx ATTRIBUTE_UNUSED,machine_mode mode)4421 sparc_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
4422 			  machine_mode mode)
4423 {
4424   rtx orig_x = x;
4425 
4426   if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == MULT)
4427     x = gen_rtx_PLUS (Pmode, XEXP (x, 1),
4428 		      force_operand (XEXP (x, 0), NULL_RTX));
4429   if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 1)) == MULT)
4430     x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
4431 		      force_operand (XEXP (x, 1), NULL_RTX));
4432   if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS)
4433     x = gen_rtx_PLUS (Pmode, force_operand (XEXP (x, 0), NULL_RTX),
4434 		      XEXP (x, 1));
4435   if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 1)) == PLUS)
4436     x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
4437 		      force_operand (XEXP (x, 1), NULL_RTX));
4438 
4439   if (x != orig_x && sparc_legitimate_address_p (mode, x, FALSE))
4440     return x;
4441 
4442   if (sparc_tls_referenced_p (x))
4443     x = sparc_legitimize_tls_address (x);
4444   else if (flag_pic)
4445     x = sparc_legitimize_pic_address (x, NULL_RTX);
4446   else if (GET_CODE (x) == PLUS && CONSTANT_ADDRESS_P (XEXP (x, 1)))
4447     x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
4448 		      copy_to_mode_reg (Pmode, XEXP (x, 1)));
4449   else if (GET_CODE (x) == PLUS && CONSTANT_ADDRESS_P (XEXP (x, 0)))
4450     x = gen_rtx_PLUS (Pmode, XEXP (x, 1),
4451 		      copy_to_mode_reg (Pmode, XEXP (x, 0)));
4452   else if (GET_CODE (x) == SYMBOL_REF
4453 	   || GET_CODE (x) == CONST
4454 	   || GET_CODE (x) == LABEL_REF)
4455     x = copy_to_suggested_reg (x, NULL_RTX, Pmode);
4456 
4457   return x;
4458 }
4459 
4460 /* Delegitimize an address that was legitimized by the above function.  */
4461 
4462 static rtx
sparc_delegitimize_address(rtx x)4463 sparc_delegitimize_address (rtx x)
4464 {
4465   x = delegitimize_mem_from_attrs (x);
4466 
4467   if (GET_CODE (x) == LO_SUM && GET_CODE (XEXP (x, 1)) == UNSPEC)
4468     switch (XINT (XEXP (x, 1), 1))
4469       {
4470       case UNSPEC_MOVE_PIC:
4471       case UNSPEC_TLSLE:
4472 	x = XVECEXP (XEXP (x, 1), 0, 0);
4473 	gcc_assert (GET_CODE (x) == SYMBOL_REF);
4474 	break;
4475       default:
4476 	break;
4477       }
4478 
4479   /* This is generated by mov{si,di}_pic_label_ref in PIC mode.  */
4480   if (GET_CODE (x) == MINUS
4481       && REG_P (XEXP (x, 0))
4482       && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM
4483       && GET_CODE (XEXP (x, 1)) == LO_SUM
4484       && GET_CODE (XEXP (XEXP (x, 1), 1)) == UNSPEC
4485       && XINT (XEXP (XEXP (x, 1), 1), 1) == UNSPEC_MOVE_PIC_LABEL)
4486     {
4487       x = XVECEXP (XEXP (XEXP (x, 1), 1), 0, 0);
4488       gcc_assert (GET_CODE (x) == LABEL_REF
4489 		  || (GET_CODE (x) == CONST
4490 		      && GET_CODE (XEXP (x, 0)) == PLUS
4491 		      && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
4492 		      && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT));
4493     }
4494 
4495   return x;
4496 }
4497 
4498 /* SPARC implementation of LEGITIMIZE_RELOAD_ADDRESS.  Returns a value to
4499    replace the input X, or the original X if no replacement is called for.
4500    The output parameter *WIN is 1 if the calling macro should goto WIN,
4501    0 if it should not.
4502 
4503    For SPARC, we wish to handle addresses by splitting them into
4504    HIGH+LO_SUM pairs, retaining the LO_SUM in the memory reference.
4505    This cuts the number of extra insns by one.
4506 
4507    Do nothing when generating PIC code and the address is a symbolic
4508    operand or requires a scratch register.  */
4509 
4510 rtx
sparc_legitimize_reload_address(rtx x,machine_mode mode,int opnum,int type,int ind_levels ATTRIBUTE_UNUSED,int * win)4511 sparc_legitimize_reload_address (rtx x, machine_mode mode,
4512 				 int opnum, int type,
4513 				 int ind_levels ATTRIBUTE_UNUSED, int *win)
4514 {
4515   /* Decompose SImode constants into HIGH+LO_SUM.  */
4516   if (CONSTANT_P (x)
4517       && (mode != TFmode || TARGET_ARCH64)
4518       && GET_MODE (x) == SImode
4519       && GET_CODE (x) != LO_SUM
4520       && GET_CODE (x) != HIGH
4521       && sparc_cmodel <= CM_MEDLOW
4522       && !(flag_pic
4523 	   && (symbolic_operand (x, Pmode) || pic_address_needs_scratch (x))))
4524     {
4525       x = gen_rtx_LO_SUM (GET_MODE (x), gen_rtx_HIGH (GET_MODE (x), x), x);
4526       push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4527 		   BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4528 		   opnum, (enum reload_type)type);
4529       *win = 1;
4530       return x;
4531     }
4532 
4533   /* We have to recognize what we have already generated above.  */
4534   if (GET_CODE (x) == LO_SUM && GET_CODE (XEXP (x, 0)) == HIGH)
4535     {
4536       push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4537 		   BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4538 		   opnum, (enum reload_type)type);
4539       *win = 1;
4540       return x;
4541     }
4542 
4543   *win = 0;
4544   return x;
4545 }
4546 
4547 /* Return true if ADDR (a legitimate address expression)
4548    has an effect that depends on the machine mode it is used for.
4549 
4550    In PIC mode,
4551 
4552       (mem:HI [%l7+a])
4553 
4554    is not equivalent to
4555 
4556       (mem:QI [%l7+a]) (mem:QI [%l7+a+1])
4557 
4558    because [%l7+a+1] is interpreted as the address of (a+1).  */
4559 
4560 
4561 static bool
sparc_mode_dependent_address_p(const_rtx addr,addr_space_t as ATTRIBUTE_UNUSED)4562 sparc_mode_dependent_address_p (const_rtx addr,
4563 				addr_space_t as ATTRIBUTE_UNUSED)
4564 {
4565   if (flag_pic && GET_CODE (addr) == PLUS)
4566     {
4567       rtx op0 = XEXP (addr, 0);
4568       rtx op1 = XEXP (addr, 1);
4569       if (op0 == pic_offset_table_rtx
4570 	  && symbolic_operand (op1, VOIDmode))
4571 	return true;
4572     }
4573 
4574   return false;
4575 }
4576 
4577 #ifdef HAVE_GAS_HIDDEN
4578 # define USE_HIDDEN_LINKONCE 1
4579 #else
4580 # define USE_HIDDEN_LINKONCE 0
4581 #endif
4582 
4583 static void
get_pc_thunk_name(char name[32],unsigned int regno)4584 get_pc_thunk_name (char name[32], unsigned int regno)
4585 {
4586   const char *reg_name = reg_names[regno];
4587 
4588   /* Skip the leading '%' as that cannot be used in a
4589      symbol name.  */
4590   reg_name += 1;
4591 
4592   if (USE_HIDDEN_LINKONCE)
4593     sprintf (name, "__sparc_get_pc_thunk.%s", reg_name);
4594   else
4595     ASM_GENERATE_INTERNAL_LABEL (name, "LADDPC", regno);
4596 }
4597 
4598 /* Wrapper around the load_pcrel_sym{si,di} patterns.  */
4599 
4600 static rtx
gen_load_pcrel_sym(rtx op0,rtx op1,rtx op2,rtx op3)4601 gen_load_pcrel_sym (rtx op0, rtx op1, rtx op2, rtx op3)
4602 {
4603   int orig_flag_pic = flag_pic;
4604   rtx insn;
4605 
4606   /* The load_pcrel_sym{si,di} patterns require absolute addressing.  */
4607   flag_pic = 0;
4608   if (TARGET_ARCH64)
4609     insn = gen_load_pcrel_symdi (op0, op1, op2, op3);
4610   else
4611     insn = gen_load_pcrel_symsi (op0, op1, op2, op3);
4612   flag_pic = orig_flag_pic;
4613 
4614   return insn;
4615 }
4616 
4617 /* Emit code to load the GOT register.  */
4618 
4619 void
load_got_register(void)4620 load_got_register (void)
4621 {
4622   /* In PIC mode, this will retrieve pic_offset_table_rtx.  */
4623   if (!global_offset_table_rtx)
4624     global_offset_table_rtx = gen_rtx_REG (Pmode, GLOBAL_OFFSET_TABLE_REGNUM);
4625 
4626   if (TARGET_VXWORKS_RTP)
4627     emit_insn (gen_vxworks_load_got ());
4628   else
4629     {
4630       /* The GOT symbol is subject to a PC-relative relocation so we need a
4631 	 helper function to add the PC value and thus get the final value.  */
4632       if (!got_helper_rtx)
4633 	{
4634 	  char name[32];
4635 	  get_pc_thunk_name (name, GLOBAL_OFFSET_TABLE_REGNUM);
4636 	  got_helper_rtx = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
4637 	}
4638 
4639       emit_insn (gen_load_pcrel_sym (global_offset_table_rtx, sparc_got (),
4640 				     got_helper_rtx,
4641 				     GEN_INT (GLOBAL_OFFSET_TABLE_REGNUM)));
4642     }
4643 
4644   /* Need to emit this whether or not we obey regdecls,
4645      since setjmp/longjmp can cause life info to screw up.
4646      ??? In the case where we don't obey regdecls, this is not sufficient
4647      since we may not fall out the bottom.  */
4648   emit_use (global_offset_table_rtx);
4649 }
4650 
4651 /* Emit a call instruction with the pattern given by PAT.  ADDR is the
4652    address of the call target.  */
4653 
4654 void
sparc_emit_call_insn(rtx pat,rtx addr)4655 sparc_emit_call_insn (rtx pat, rtx addr)
4656 {
4657   rtx_insn *insn;
4658 
4659   insn = emit_call_insn (pat);
4660 
4661   /* The PIC register is live on entry to VxWorks PIC PLT entries.  */
4662   if (TARGET_VXWORKS_RTP
4663       && flag_pic
4664       && GET_CODE (addr) == SYMBOL_REF
4665       && (SYMBOL_REF_DECL (addr)
4666 	  ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
4667 	  : !SYMBOL_REF_LOCAL_P (addr)))
4668     {
4669       use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx);
4670       crtl->uses_pic_offset_table = 1;
4671     }
4672 }
4673 
4674 /* Return 1 if RTX is a MEM which is known to be aligned to at
4675    least a DESIRED byte boundary.  */
4676 
4677 int
mem_min_alignment(rtx mem,int desired)4678 mem_min_alignment (rtx mem, int desired)
4679 {
4680   rtx addr, base, offset;
4681 
4682   /* If it's not a MEM we can't accept it.  */
4683   if (GET_CODE (mem) != MEM)
4684     return 0;
4685 
4686   /* Obviously...  */
4687   if (!TARGET_UNALIGNED_DOUBLES
4688       && MEM_ALIGN (mem) / BITS_PER_UNIT >= (unsigned)desired)
4689     return 1;
4690 
4691   /* ??? The rest of the function predates MEM_ALIGN so
4692      there is probably a bit of redundancy.  */
4693   addr = XEXP (mem, 0);
4694   base = offset = NULL_RTX;
4695   if (GET_CODE (addr) == PLUS)
4696     {
4697       if (GET_CODE (XEXP (addr, 0)) == REG)
4698 	{
4699 	  base = XEXP (addr, 0);
4700 
4701 	  /* What we are saying here is that if the base
4702 	     REG is aligned properly, the compiler will make
4703 	     sure any REG based index upon it will be so
4704 	     as well.  */
4705 	  if (GET_CODE (XEXP (addr, 1)) == CONST_INT)
4706 	    offset = XEXP (addr, 1);
4707 	  else
4708 	    offset = const0_rtx;
4709 	}
4710     }
4711   else if (GET_CODE (addr) == REG)
4712     {
4713       base = addr;
4714       offset = const0_rtx;
4715     }
4716 
4717   if (base != NULL_RTX)
4718     {
4719       int regno = REGNO (base);
4720 
4721       if (regno != HARD_FRAME_POINTER_REGNUM && regno != STACK_POINTER_REGNUM)
4722 	{
4723 	  /* Check if the compiler has recorded some information
4724 	     about the alignment of the base REG.  If reload has
4725 	     completed, we already matched with proper alignments.
4726 	     If not running global_alloc, reload might give us
4727 	     unaligned pointer to local stack though.  */
4728 	  if (((cfun != 0
4729 		&& REGNO_POINTER_ALIGN (regno) >= desired * BITS_PER_UNIT)
4730 	       || (optimize && reload_completed))
4731 	      && (INTVAL (offset) & (desired - 1)) == 0)
4732 	    return 1;
4733 	}
4734       else
4735 	{
4736 	  if (((INTVAL (offset) - SPARC_STACK_BIAS) & (desired - 1)) == 0)
4737 	    return 1;
4738 	}
4739     }
4740   else if (! TARGET_UNALIGNED_DOUBLES
4741 	   || CONSTANT_P (addr)
4742 	   || GET_CODE (addr) == LO_SUM)
4743     {
4744       /* Anything else we know is properly aligned unless TARGET_UNALIGNED_DOUBLES
4745 	 is true, in which case we can only assume that an access is aligned if
4746 	 it is to a constant address, or the address involves a LO_SUM.  */
4747       return 1;
4748     }
4749 
4750   /* An obviously unaligned address.  */
4751   return 0;
4752 }
4753 
4754 
4755 /* Vectors to keep interesting information about registers where it can easily
4756    be got.  We used to use the actual mode value as the bit number, but there
4757    are more than 32 modes now.  Instead we use two tables: one indexed by
4758    hard register number, and one indexed by mode.  */
4759 
4760 /* The purpose of sparc_mode_class is to shrink the range of modes so that
4761    they all fit (as bit numbers) in a 32-bit word (again).  Each real mode is
4762    mapped into one sparc_mode_class mode.  */
4763 
4764 enum sparc_mode_class {
4765   H_MODE, S_MODE, D_MODE, T_MODE, O_MODE,
4766   SF_MODE, DF_MODE, TF_MODE, OF_MODE,
4767   CC_MODE, CCFP_MODE
4768 };
4769 
4770 /* Modes for single-word and smaller quantities.  */
4771 #define S_MODES \
4772   ((1 << (int) H_MODE) | (1 << (int) S_MODE) | (1 << (int) SF_MODE))
4773 
4774 /* Modes for double-word and smaller quantities.  */
4775 #define D_MODES (S_MODES | (1 << (int) D_MODE) | (1 << DF_MODE))
4776 
4777 /* Modes for quad-word and smaller quantities.  */
4778 #define T_MODES (D_MODES | (1 << (int) T_MODE) | (1 << (int) TF_MODE))
4779 
4780 /* Modes for 8-word and smaller quantities.  */
4781 #define O_MODES (T_MODES | (1 << (int) O_MODE) | (1 << (int) OF_MODE))
4782 
4783 /* Modes for single-float quantities.  */
4784 #define SF_MODES ((1 << (int) S_MODE) | (1 << (int) SF_MODE))
4785 
4786 /* Modes for double-float and smaller quantities.  */
4787 #define DF_MODES (SF_MODES | (1 << (int) D_MODE) | (1 << DF_MODE))
4788 
4789 /* Modes for quad-float and smaller quantities.  */
4790 #define TF_MODES (DF_MODES | (1 << (int) TF_MODE))
4791 
4792 /* Modes for quad-float pairs and smaller quantities.  */
4793 #define OF_MODES (TF_MODES | (1 << (int) OF_MODE))
4794 
4795 /* Modes for double-float only quantities.  */
4796 #define DF_MODES_NO_S ((1 << (int) D_MODE) | (1 << (int) DF_MODE))
4797 
4798 /* Modes for quad-float and double-float only quantities.  */
4799 #define TF_MODES_NO_S (DF_MODES_NO_S | (1 << (int) TF_MODE))
4800 
4801 /* Modes for quad-float pairs and double-float only quantities.  */
4802 #define OF_MODES_NO_S (TF_MODES_NO_S | (1 << (int) OF_MODE))
4803 
4804 /* Modes for condition codes.  */
4805 #define CC_MODES (1 << (int) CC_MODE)
4806 #define CCFP_MODES (1 << (int) CCFP_MODE)
4807 
4808 /* Value is 1 if register/mode pair is acceptable on sparc.
4809 
4810    The funny mixture of D and T modes is because integer operations
4811    do not specially operate on tetra quantities, so non-quad-aligned
4812    registers can hold quadword quantities (except %o4 and %i4 because
4813    they cross fixed registers).
4814 
4815    ??? Note that, despite the settings, non-double-aligned parameter
4816    registers can hold double-word quantities in 32-bit mode.  */
4817 
4818 /* This points to either the 32 bit or the 64 bit version.  */
4819 const int *hard_regno_mode_classes;
4820 
4821 static const int hard_32bit_mode_classes[] = {
4822   S_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES,
4823   T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES,
4824   T_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES,
4825   T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES,
4826 
4827   OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4828   OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4829   OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4830   OF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
4831 
4832   /* FP regs f32 to f63.  Only the even numbered registers actually exist,
4833      and none can hold SFmode/SImode values.  */
4834   OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4835   OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4836   OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4837   OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, TF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4838 
4839   /* %fcc[0123] */
4840   CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES,
4841 
4842   /* %icc, %sfp, %gsr */
4843   CC_MODES, 0, D_MODES
4844 };
4845 
4846 static const int hard_64bit_mode_classes[] = {
4847   D_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
4848   O_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
4849   T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
4850   O_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
4851 
4852   OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4853   OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4854   OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
4855   OF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
4856 
4857   /* FP regs f32 to f63.  Only the even numbered registers actually exist,
4858      and none can hold SFmode/SImode values.  */
4859   OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4860   OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4861   OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4862   OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, TF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
4863 
4864   /* %fcc[0123] */
4865   CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES,
4866 
4867   /* %icc, %sfp, %gsr */
4868   CC_MODES, 0, D_MODES
4869 };
4870 
4871 int sparc_mode_class [NUM_MACHINE_MODES];
4872 
4873 enum reg_class sparc_regno_reg_class[FIRST_PSEUDO_REGISTER];
4874 
4875 static void
sparc_init_modes(void)4876 sparc_init_modes (void)
4877 {
4878   int i;
4879 
4880   for (i = 0; i < NUM_MACHINE_MODES; i++)
4881     {
4882       machine_mode m = (machine_mode) i;
4883       unsigned int size = GET_MODE_SIZE (m);
4884 
4885       switch (GET_MODE_CLASS (m))
4886 	{
4887 	case MODE_INT:
4888 	case MODE_PARTIAL_INT:
4889 	case MODE_COMPLEX_INT:
4890 	  if (size < 4)
4891 	    sparc_mode_class[i] = 1 << (int) H_MODE;
4892 	  else if (size == 4)
4893 	    sparc_mode_class[i] = 1 << (int) S_MODE;
4894 	  else if (size == 8)
4895 	    sparc_mode_class[i] = 1 << (int) D_MODE;
4896 	  else if (size == 16)
4897 	    sparc_mode_class[i] = 1 << (int) T_MODE;
4898 	  else if (size == 32)
4899 	    sparc_mode_class[i] = 1 << (int) O_MODE;
4900 	  else
4901 	    sparc_mode_class[i] = 0;
4902 	  break;
4903 	case MODE_VECTOR_INT:
4904 	  if (size == 4)
4905 	    sparc_mode_class[i] = 1 << (int) SF_MODE;
4906 	  else if (size == 8)
4907 	    sparc_mode_class[i] = 1 << (int) DF_MODE;
4908 	  else
4909 	    sparc_mode_class[i] = 0;
4910 	  break;
4911 	case MODE_FLOAT:
4912 	case MODE_COMPLEX_FLOAT:
4913 	  if (size == 4)
4914 	    sparc_mode_class[i] = 1 << (int) SF_MODE;
4915 	  else if (size == 8)
4916 	    sparc_mode_class[i] = 1 << (int) DF_MODE;
4917 	  else if (size == 16)
4918 	    sparc_mode_class[i] = 1 << (int) TF_MODE;
4919 	  else if (size == 32)
4920 	    sparc_mode_class[i] = 1 << (int) OF_MODE;
4921 	  else
4922 	    sparc_mode_class[i] = 0;
4923 	  break;
4924 	case MODE_CC:
4925 	  if (m == CCFPmode || m == CCFPEmode)
4926 	    sparc_mode_class[i] = 1 << (int) CCFP_MODE;
4927 	  else
4928 	    sparc_mode_class[i] = 1 << (int) CC_MODE;
4929 	  break;
4930 	default:
4931 	  sparc_mode_class[i] = 0;
4932 	  break;
4933 	}
4934     }
4935 
4936   if (TARGET_ARCH64)
4937     hard_regno_mode_classes = hard_64bit_mode_classes;
4938   else
4939     hard_regno_mode_classes = hard_32bit_mode_classes;
4940 
4941   /* Initialize the array used by REGNO_REG_CLASS.  */
4942   for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4943     {
4944       if (i < 16 && TARGET_V8PLUS)
4945 	sparc_regno_reg_class[i] = I64_REGS;
4946       else if (i < 32 || i == FRAME_POINTER_REGNUM)
4947 	sparc_regno_reg_class[i] = GENERAL_REGS;
4948       else if (i < 64)
4949 	sparc_regno_reg_class[i] = FP_REGS;
4950       else if (i < 96)
4951 	sparc_regno_reg_class[i] = EXTRA_FP_REGS;
4952       else if (i < 100)
4953 	sparc_regno_reg_class[i] = FPCC_REGS;
4954       else
4955 	sparc_regno_reg_class[i] = NO_REGS;
4956     }
4957 }
4958 
4959 /* Return whether REGNO, a global or FP register, must be saved/restored.  */
4960 
4961 static inline bool
save_global_or_fp_reg_p(unsigned int regno,int leaf_function ATTRIBUTE_UNUSED)4962 save_global_or_fp_reg_p (unsigned int regno,
4963 			 int leaf_function ATTRIBUTE_UNUSED)
4964 {
4965   return !call_used_regs[regno] && df_regs_ever_live_p (regno);
4966 }
4967 
4968 /* Return whether the return address register (%i7) is needed.  */
4969 
4970 static inline bool
return_addr_reg_needed_p(int leaf_function)4971 return_addr_reg_needed_p (int leaf_function)
4972 {
4973   /* If it is live, for example because of __builtin_return_address (0).  */
4974   if (df_regs_ever_live_p (RETURN_ADDR_REGNUM))
4975     return true;
4976 
4977   /* Otherwise, it is needed as save register if %o7 is clobbered.  */
4978   if (!leaf_function
4979       /* Loading the GOT register clobbers %o7.  */
4980       || crtl->uses_pic_offset_table
4981       || df_regs_ever_live_p (INCOMING_RETURN_ADDR_REGNUM))
4982     return true;
4983 
4984   return false;
4985 }
4986 
4987 /* Return whether REGNO, a local or in register, must be saved/restored.  */
4988 
4989 static bool
save_local_or_in_reg_p(unsigned int regno,int leaf_function)4990 save_local_or_in_reg_p (unsigned int regno, int leaf_function)
4991 {
4992   /* General case: call-saved registers live at some point.  */
4993   if (!call_used_regs[regno] && df_regs_ever_live_p (regno))
4994     return true;
4995 
4996   /* Frame pointer register (%fp) if needed.  */
4997   if (regno == HARD_FRAME_POINTER_REGNUM && frame_pointer_needed)
4998     return true;
4999 
5000   /* Return address register (%i7) if needed.  */
5001   if (regno == RETURN_ADDR_REGNUM && return_addr_reg_needed_p (leaf_function))
5002     return true;
5003 
5004   /* GOT register (%l7) if needed.  */
5005   if (regno == PIC_OFFSET_TABLE_REGNUM && crtl->uses_pic_offset_table)
5006     return true;
5007 
5008   /* If the function accesses prior frames, the frame pointer and the return
5009      address of the previous frame must be saved on the stack.  */
5010   if (crtl->accesses_prior_frames
5011       && (regno == HARD_FRAME_POINTER_REGNUM || regno == RETURN_ADDR_REGNUM))
5012     return true;
5013 
5014   return false;
5015 }
5016 
5017 /* Compute the frame size required by the function.  This function is called
5018    during the reload pass and also by sparc_expand_prologue.  */
5019 
5020 HOST_WIDE_INT
sparc_compute_frame_size(HOST_WIDE_INT size,int leaf_function)5021 sparc_compute_frame_size (HOST_WIDE_INT size, int leaf_function)
5022 {
5023   HOST_WIDE_INT frame_size, apparent_frame_size;
5024   int args_size, n_global_fp_regs = 0;
5025   bool save_local_in_regs_p = false;
5026   unsigned int i;
5027 
5028   /* If the function allocates dynamic stack space, the dynamic offset is
5029      computed early and contains REG_PARM_STACK_SPACE, so we need to cope.  */
5030   if (leaf_function && !cfun->calls_alloca)
5031     args_size = 0;
5032   else
5033     args_size = crtl->outgoing_args_size + REG_PARM_STACK_SPACE (cfun->decl);
5034 
5035   /* Calculate space needed for global registers.  */
5036   if (TARGET_ARCH64)
5037     {
5038       for (i = 0; i < 8; i++)
5039 	if (save_global_or_fp_reg_p (i, 0))
5040 	  n_global_fp_regs += 2;
5041     }
5042   else
5043     {
5044       for (i = 0; i < 8; i += 2)
5045 	if (save_global_or_fp_reg_p (i, 0)
5046 	    || save_global_or_fp_reg_p (i + 1, 0))
5047 	  n_global_fp_regs += 2;
5048     }
5049 
5050   /* In the flat window model, find out which local and in registers need to
5051      be saved.  We don't reserve space in the current frame for them as they
5052      will be spilled into the register window save area of the caller's frame.
5053      However, as soon as we use this register window save area, we must create
5054      that of the current frame to make it the live one.  */
5055   if (TARGET_FLAT)
5056     for (i = 16; i < 32; i++)
5057       if (save_local_or_in_reg_p (i, leaf_function))
5058 	{
5059 	 save_local_in_regs_p = true;
5060 	 break;
5061 	}
5062 
5063   /* Calculate space needed for FP registers.  */
5064   for (i = 32; i < (TARGET_V9 ? 96 : 64); i += 2)
5065     if (save_global_or_fp_reg_p (i, 0) || save_global_or_fp_reg_p (i + 1, 0))
5066       n_global_fp_regs += 2;
5067 
5068   if (size == 0
5069       && n_global_fp_regs == 0
5070       && args_size == 0
5071       && !save_local_in_regs_p)
5072     frame_size = apparent_frame_size = 0;
5073   else
5074     {
5075       /* We subtract STARTING_FRAME_OFFSET, remember it's negative.  */
5076       apparent_frame_size = ROUND_UP (size - STARTING_FRAME_OFFSET, 8);
5077       apparent_frame_size += n_global_fp_regs * 4;
5078 
5079       /* We need to add the size of the outgoing argument area.  */
5080       frame_size = apparent_frame_size + ROUND_UP (args_size, 8);
5081 
5082       /* And that of the register window save area.  */
5083       frame_size += FIRST_PARM_OFFSET (cfun->decl);
5084 
5085       /* Finally, bump to the appropriate alignment.  */
5086       frame_size = SPARC_STACK_ALIGN (frame_size);
5087     }
5088 
5089   /* Set up values for use in prologue and epilogue.  */
5090   sparc_frame_size = frame_size;
5091   sparc_apparent_frame_size = apparent_frame_size;
5092   sparc_n_global_fp_regs = n_global_fp_regs;
5093   sparc_save_local_in_regs_p = save_local_in_regs_p;
5094 
5095   return frame_size;
5096 }
5097 
5098 /* Implement the macro INITIAL_ELIMINATION_OFFSET, return the OFFSET.  */
5099 
5100 int
sparc_initial_elimination_offset(int to)5101 sparc_initial_elimination_offset (int to)
5102 {
5103   int offset;
5104 
5105   if (to == STACK_POINTER_REGNUM)
5106     offset = sparc_compute_frame_size (get_frame_size (), crtl->is_leaf);
5107   else
5108     offset = 0;
5109 
5110   offset += SPARC_STACK_BIAS;
5111   return offset;
5112 }
5113 
5114 /* Output any necessary .register pseudo-ops.  */
5115 
5116 void
sparc_output_scratch_registers(FILE * file ATTRIBUTE_UNUSED)5117 sparc_output_scratch_registers (FILE *file ATTRIBUTE_UNUSED)
5118 {
5119 #ifdef HAVE_AS_REGISTER_PSEUDO_OP
5120   int i;
5121 
5122   if (TARGET_ARCH32)
5123     return;
5124 
5125   /* Check if %g[2367] were used without
5126      .register being printed for them already.  */
5127   for (i = 2; i < 8; i++)
5128     {
5129       if (df_regs_ever_live_p (i)
5130 	  && ! sparc_hard_reg_printed [i])
5131 	{
5132 	  sparc_hard_reg_printed [i] = 1;
5133 	  /* %g7 is used as TLS base register, use #ignore
5134 	     for it instead of #scratch.  */
5135 	  fprintf (file, "\t.register\t%%g%d, #%s\n", i,
5136 		   i == 7 ? "ignore" : "scratch");
5137 	}
5138       if (i == 3) i = 5;
5139     }
5140 #endif
5141 }
5142 
5143 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
5144 
5145 #if PROBE_INTERVAL > 4096
5146 #error Cannot use indexed addressing mode for stack probing
5147 #endif
5148 
5149 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
5150    inclusive.  These are offsets from the current stack pointer.
5151 
5152    Note that we don't use the REG+REG addressing mode for the probes because
5153    of the stack bias in 64-bit mode.  And it doesn't really buy us anything
5154    so the advantages of having a single code win here.  */
5155 
5156 static void
sparc_emit_probe_stack_range(HOST_WIDE_INT first,HOST_WIDE_INT size)5157 sparc_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
5158 {
5159   rtx g1 = gen_rtx_REG (Pmode, 1);
5160 
5161   /* See if we have a constant small number of probes to generate.  If so,
5162      that's the easy case.  */
5163   if (size <= PROBE_INTERVAL)
5164     {
5165       emit_move_insn (g1, GEN_INT (first));
5166       emit_insn (gen_rtx_SET (g1,
5167 			      gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
5168       emit_stack_probe (plus_constant (Pmode, g1, -size));
5169     }
5170 
5171   /* The run-time loop is made up of 9 insns in the generic case while the
5172      compile-time loop is made up of 4+2*(n-2) insns for n # of intervals.  */
5173   else if (size <= 4 * PROBE_INTERVAL)
5174     {
5175       HOST_WIDE_INT i;
5176 
5177       emit_move_insn (g1, GEN_INT (first + PROBE_INTERVAL));
5178       emit_insn (gen_rtx_SET (g1,
5179 			      gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
5180       emit_stack_probe (g1);
5181 
5182       /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
5183 	 it exceeds SIZE.  If only two probes are needed, this will not
5184 	 generate any code.  Then probe at FIRST + SIZE.  */
5185       for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
5186 	{
5187 	  emit_insn (gen_rtx_SET (g1,
5188 				  plus_constant (Pmode, g1, -PROBE_INTERVAL)));
5189 	  emit_stack_probe (g1);
5190 	}
5191 
5192       emit_stack_probe (plus_constant (Pmode, g1,
5193 				       (i - PROBE_INTERVAL) - size));
5194     }
5195 
5196   /* Otherwise, do the same as above, but in a loop.  Note that we must be
5197      extra careful with variables wrapping around because we might be at
5198      the very top (or the very bottom) of the address space and we have
5199      to be able to handle this case properly; in particular, we use an
5200      equality test for the loop condition.  */
5201   else
5202     {
5203       HOST_WIDE_INT rounded_size;
5204       rtx g4 = gen_rtx_REG (Pmode, 4);
5205 
5206       emit_move_insn (g1, GEN_INT (first));
5207 
5208 
5209       /* Step 1: round SIZE to the previous multiple of the interval.  */
5210 
5211       rounded_size = ROUND_DOWN (size, PROBE_INTERVAL);
5212       emit_move_insn (g4, GEN_INT (rounded_size));
5213 
5214 
5215       /* Step 2: compute initial and final value of the loop counter.  */
5216 
5217       /* TEST_ADDR = SP + FIRST.  */
5218       emit_insn (gen_rtx_SET (g1,
5219 			      gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
5220 
5221       /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE.  */
5222       emit_insn (gen_rtx_SET (g4, gen_rtx_MINUS (Pmode, g1, g4)));
5223 
5224 
5225       /* Step 3: the loop
5226 
5227 	 while (TEST_ADDR != LAST_ADDR)
5228 	   {
5229 	     TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
5230 	     probe at TEST_ADDR
5231 	   }
5232 
5233 	 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
5234 	 until it is equal to ROUNDED_SIZE.  */
5235 
5236       if (TARGET_ARCH64)
5237 	emit_insn (gen_probe_stack_rangedi (g1, g1, g4));
5238       else
5239 	emit_insn (gen_probe_stack_rangesi (g1, g1, g4));
5240 
5241 
5242       /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
5243 	 that SIZE is equal to ROUNDED_SIZE.  */
5244 
5245       if (size != rounded_size)
5246 	emit_stack_probe (plus_constant (Pmode, g4, rounded_size - size));
5247     }
5248 
5249   /* Make sure nothing is scheduled before we are done.  */
5250   emit_insn (gen_blockage ());
5251 }
5252 
5253 /* Probe a range of stack addresses from REG1 to REG2 inclusive.  These are
5254    absolute addresses.  */
5255 
5256 const char *
output_probe_stack_range(rtx reg1,rtx reg2)5257 output_probe_stack_range (rtx reg1, rtx reg2)
5258 {
5259   static int labelno = 0;
5260   char loop_lab[32];
5261   rtx xops[2];
5262 
5263   ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
5264 
5265   /* Loop.  */
5266   ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
5267 
5268   /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL.  */
5269   xops[0] = reg1;
5270   xops[1] = GEN_INT (-PROBE_INTERVAL);
5271   output_asm_insn ("add\t%0, %1, %0", xops);
5272 
5273   /* Test if TEST_ADDR == LAST_ADDR.  */
5274   xops[1] = reg2;
5275   output_asm_insn ("cmp\t%0, %1", xops);
5276 
5277   /* Probe at TEST_ADDR and branch.  */
5278   if (TARGET_ARCH64)
5279     fputs ("\tbne,pt\t%xcc,", asm_out_file);
5280   else
5281     fputs ("\tbne\t", asm_out_file);
5282   assemble_name_raw (asm_out_file, loop_lab);
5283   fputc ('\n', asm_out_file);
5284   xops[1] = GEN_INT (SPARC_STACK_BIAS);
5285   output_asm_insn (" st\t%%g0, [%0+%1]", xops);
5286 
5287   return "";
5288 }
5289 
5290 /* Emit code to save/restore registers from LOW to HIGH at BASE+OFFSET as
5291    needed.  LOW is supposed to be double-word aligned for 32-bit registers.
5292    SAVE_P decides whether a register must be saved/restored.  ACTION_TRUE
5293    is the action to be performed if SAVE_P returns true and ACTION_FALSE
5294    the action to be performed if it returns false.  Return the new offset.  */
5295 
5296 typedef bool (*sorr_pred_t) (unsigned int, int);
5297 typedef enum { SORR_NONE, SORR_ADVANCE, SORR_SAVE, SORR_RESTORE } sorr_act_t;
5298 
5299 static int
emit_save_or_restore_regs(unsigned int low,unsigned int high,rtx base,int offset,int leaf_function,sorr_pred_t save_p,sorr_act_t action_true,sorr_act_t action_false)5300 emit_save_or_restore_regs (unsigned int low, unsigned int high, rtx base,
5301 			   int offset, int leaf_function, sorr_pred_t save_p,
5302 			   sorr_act_t action_true, sorr_act_t action_false)
5303 {
5304   unsigned int i;
5305   rtx mem;
5306   rtx_insn *insn;
5307 
5308   if (TARGET_ARCH64 && high <= 32)
5309     {
5310       int fp_offset = -1;
5311 
5312       for (i = low; i < high; i++)
5313 	{
5314 	  if (save_p (i, leaf_function))
5315 	    {
5316 	      mem = gen_frame_mem (DImode, plus_constant (Pmode,
5317 							  base, offset));
5318 	      if (action_true == SORR_SAVE)
5319 		{
5320 		  insn = emit_move_insn (mem, gen_rtx_REG (DImode, i));
5321 		  RTX_FRAME_RELATED_P (insn) = 1;
5322 		}
5323 	      else  /* action_true == SORR_RESTORE */
5324 		{
5325 		  /* The frame pointer must be restored last since its old
5326 		     value may be used as base address for the frame.  This
5327 		     is problematic in 64-bit mode only because of the lack
5328 		     of double-word load instruction.  */
5329 		  if (i == HARD_FRAME_POINTER_REGNUM)
5330 		    fp_offset = offset;
5331 		  else
5332 		    emit_move_insn (gen_rtx_REG (DImode, i), mem);
5333 		}
5334 	      offset += 8;
5335 	    }
5336 	  else if (action_false == SORR_ADVANCE)
5337 	    offset += 8;
5338 	}
5339 
5340       if (fp_offset >= 0)
5341 	{
5342 	  mem = gen_frame_mem (DImode, plus_constant (Pmode, base, fp_offset));
5343 	  emit_move_insn (hard_frame_pointer_rtx, mem);
5344 	}
5345     }
5346   else
5347     {
5348       for (i = low; i < high; i += 2)
5349 	{
5350 	  bool reg0 = save_p (i, leaf_function);
5351 	  bool reg1 = save_p (i + 1, leaf_function);
5352 	  machine_mode mode;
5353 	  int regno;
5354 
5355 	  if (reg0 && reg1)
5356 	    {
5357 	      mode = SPARC_INT_REG_P (i) ? DImode : DFmode;
5358 	      regno = i;
5359 	    }
5360 	  else if (reg0)
5361 	    {
5362 	      mode = SPARC_INT_REG_P (i) ? SImode : SFmode;
5363 	      regno = i;
5364 	    }
5365 	  else if (reg1)
5366 	    {
5367 	      mode = SPARC_INT_REG_P (i) ? SImode : SFmode;
5368 	      regno = i + 1;
5369 	      offset += 4;
5370 	    }
5371 	  else
5372 	    {
5373 	      if (action_false == SORR_ADVANCE)
5374 		offset += 8;
5375 	      continue;
5376 	    }
5377 
5378 	  mem = gen_frame_mem (mode, plus_constant (Pmode, base, offset));
5379 	  if (action_true == SORR_SAVE)
5380 	    {
5381 	      insn = emit_move_insn (mem, gen_rtx_REG (mode, regno));
5382 	      RTX_FRAME_RELATED_P (insn) = 1;
5383 	      if (mode == DImode)
5384 		{
5385 		  rtx set1, set2;
5386 		  mem = gen_frame_mem (SImode, plus_constant (Pmode, base,
5387 							      offset));
5388 		  set1 = gen_rtx_SET (mem, gen_rtx_REG (SImode, regno));
5389 		  RTX_FRAME_RELATED_P (set1) = 1;
5390 		  mem
5391 		    = gen_frame_mem (SImode, plus_constant (Pmode, base,
5392 							    offset + 4));
5393 		  set2 = gen_rtx_SET (mem, gen_rtx_REG (SImode, regno + 1));
5394 		  RTX_FRAME_RELATED_P (set2) = 1;
5395 		  add_reg_note (insn, REG_FRAME_RELATED_EXPR,
5396 				gen_rtx_PARALLEL (VOIDmode,
5397 						  gen_rtvec (2, set1, set2)));
5398 		}
5399 	    }
5400 	  else  /* action_true == SORR_RESTORE */
5401 	    emit_move_insn (gen_rtx_REG (mode, regno), mem);
5402 
5403 	  /* Bump and round down to double word
5404 	     in case we already bumped by 4.  */
5405 	  offset = ROUND_DOWN (offset + 8, 8);
5406 	}
5407     }
5408 
5409   return offset;
5410 }
5411 
5412 /* Emit code to adjust BASE to OFFSET.  Return the new base.  */
5413 
5414 static rtx
emit_adjust_base_to_offset(rtx base,int offset)5415 emit_adjust_base_to_offset (rtx base, int offset)
5416 {
5417   /* ??? This might be optimized a little as %g1 might already have a
5418      value close enough that a single add insn will do.  */
5419   /* ??? Although, all of this is probably only a temporary fix because
5420      if %g1 can hold a function result, then sparc_expand_epilogue will
5421      lose (the result will be clobbered).  */
5422   rtx new_base = gen_rtx_REG (Pmode, 1);
5423   emit_move_insn (new_base, GEN_INT (offset));
5424   emit_insn (gen_rtx_SET (new_base, gen_rtx_PLUS (Pmode, base, new_base)));
5425   return new_base;
5426 }
5427 
5428 /* Emit code to save/restore call-saved global and FP registers.  */
5429 
5430 static void
emit_save_or_restore_global_fp_regs(rtx base,int offset,sorr_act_t action)5431 emit_save_or_restore_global_fp_regs (rtx base, int offset, sorr_act_t action)
5432 {
5433   if (offset < -4096 || offset + sparc_n_global_fp_regs * 4 > 4095)
5434     {
5435       base = emit_adjust_base_to_offset  (base, offset);
5436       offset = 0;
5437     }
5438 
5439   offset
5440     = emit_save_or_restore_regs (0, 8, base, offset, 0,
5441 				 save_global_or_fp_reg_p, action, SORR_NONE);
5442   emit_save_or_restore_regs (32, TARGET_V9 ? 96 : 64, base, offset, 0,
5443 			     save_global_or_fp_reg_p, action, SORR_NONE);
5444 }
5445 
5446 /* Emit code to save/restore call-saved local and in registers.  */
5447 
5448 static void
emit_save_or_restore_local_in_regs(rtx base,int offset,sorr_act_t action)5449 emit_save_or_restore_local_in_regs (rtx base, int offset, sorr_act_t action)
5450 {
5451   if (offset < -4096 || offset + 16 * UNITS_PER_WORD > 4095)
5452     {
5453       base = emit_adjust_base_to_offset  (base, offset);
5454       offset = 0;
5455     }
5456 
5457   emit_save_or_restore_regs (16, 32, base, offset, sparc_leaf_function_p,
5458 			     save_local_or_in_reg_p, action, SORR_ADVANCE);
5459 }
5460 
5461 /* Emit a window_save insn.  */
5462 
5463 static rtx_insn *
emit_window_save(rtx increment)5464 emit_window_save (rtx increment)
5465 {
5466   rtx_insn *insn = emit_insn (gen_window_save (increment));
5467   RTX_FRAME_RELATED_P (insn) = 1;
5468 
5469   /* The incoming return address (%o7) is saved in %i7.  */
5470   add_reg_note (insn, REG_CFA_REGISTER,
5471 		gen_rtx_SET (gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM),
5472 			     gen_rtx_REG (Pmode,
5473 					  INCOMING_RETURN_ADDR_REGNUM)));
5474 
5475   /* The window save event.  */
5476   add_reg_note (insn, REG_CFA_WINDOW_SAVE, const0_rtx);
5477 
5478   /* The CFA is %fp, the hard frame pointer.  */
5479   add_reg_note (insn, REG_CFA_DEF_CFA,
5480 		plus_constant (Pmode, hard_frame_pointer_rtx,
5481 			       INCOMING_FRAME_SP_OFFSET));
5482 
5483   return insn;
5484 }
5485 
5486 /* Generate an increment for the stack pointer.  */
5487 
5488 static rtx
gen_stack_pointer_inc(rtx increment)5489 gen_stack_pointer_inc (rtx increment)
5490 {
5491   return gen_rtx_SET (stack_pointer_rtx,
5492 		      gen_rtx_PLUS (Pmode,
5493 				    stack_pointer_rtx,
5494 				    increment));
5495 }
5496 
5497 /* Expand the function prologue.  The prologue is responsible for reserving
5498    storage for the frame, saving the call-saved registers and loading the
5499    GOT register if needed.  */
5500 
5501 void
sparc_expand_prologue(void)5502 sparc_expand_prologue (void)
5503 {
5504   HOST_WIDE_INT size;
5505   rtx_insn *insn;
5506 
5507   /* Compute a snapshot of crtl->uses_only_leaf_regs.  Relying
5508      on the final value of the flag means deferring the prologue/epilogue
5509      expansion until just before the second scheduling pass, which is too
5510      late to emit multiple epilogues or return insns.
5511 
5512      Of course we are making the assumption that the value of the flag
5513      will not change between now and its final value.  Of the three parts
5514      of the formula, only the last one can reasonably vary.  Let's take a
5515      closer look, after assuming that the first two ones are set to true
5516      (otherwise the last value is effectively silenced).
5517 
5518      If only_leaf_regs_used returns false, the global predicate will also
5519      be false so the actual frame size calculated below will be positive.
5520      As a consequence, the save_register_window insn will be emitted in
5521      the instruction stream; now this insn explicitly references %fp
5522      which is not a leaf register so only_leaf_regs_used will always
5523      return false subsequently.
5524 
5525      If only_leaf_regs_used returns true, we hope that the subsequent
5526      optimization passes won't cause non-leaf registers to pop up.  For
5527      example, the regrename pass has special provisions to not rename to
5528      non-leaf registers in a leaf function.  */
5529   sparc_leaf_function_p
5530     = optimize > 0 && crtl->is_leaf && only_leaf_regs_used ();
5531 
5532   size = sparc_compute_frame_size (get_frame_size(), sparc_leaf_function_p);
5533 
5534   if (flag_stack_usage_info)
5535     current_function_static_stack_size = size;
5536 
5537   if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
5538     {
5539       if (crtl->is_leaf && !cfun->calls_alloca)
5540 	{
5541 	  if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
5542 	    sparc_emit_probe_stack_range (STACK_CHECK_PROTECT,
5543 					  size - STACK_CHECK_PROTECT);
5544 	}
5545       else if (size > 0)
5546 	sparc_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
5547     }
5548 
5549   if (size == 0)
5550     ; /* do nothing.  */
5551   else if (sparc_leaf_function_p)
5552     {
5553       rtx size_int_rtx = GEN_INT (-size);
5554 
5555       if (size <= 4096)
5556 	insn = emit_insn (gen_stack_pointer_inc (size_int_rtx));
5557       else if (size <= 8192)
5558 	{
5559 	  insn = emit_insn (gen_stack_pointer_inc (GEN_INT (-4096)));
5560 	  RTX_FRAME_RELATED_P (insn) = 1;
5561 
5562 	  /* %sp is still the CFA register.  */
5563 	  insn = emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
5564 	}
5565       else
5566 	{
5567 	  rtx size_rtx = gen_rtx_REG (Pmode, 1);
5568 	  emit_move_insn (size_rtx, size_int_rtx);
5569 	  insn = emit_insn (gen_stack_pointer_inc (size_rtx));
5570 	  add_reg_note (insn, REG_FRAME_RELATED_EXPR,
5571 			gen_stack_pointer_inc (size_int_rtx));
5572 	}
5573 
5574       RTX_FRAME_RELATED_P (insn) = 1;
5575     }
5576   else
5577     {
5578       rtx size_int_rtx = GEN_INT (-size);
5579 
5580       if (size <= 4096)
5581 	emit_window_save (size_int_rtx);
5582       else if (size <= 8192)
5583 	{
5584 	  emit_window_save (GEN_INT (-4096));
5585 
5586 	  /* %sp is not the CFA register anymore.  */
5587 	  emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
5588 
5589 	  /* Make sure no %fp-based store is issued until after the frame is
5590 	     established.  The offset between the frame pointer and the stack
5591 	     pointer is calculated relative to the value of the stack pointer
5592 	     at the end of the function prologue, and moving instructions that
5593 	     access the stack via the frame pointer between the instructions
5594 	     that decrement the stack pointer could result in accessing the
5595 	     register window save area, which is volatile.  */
5596 	  emit_insn (gen_frame_blockage ());
5597 	}
5598       else
5599 	{
5600 	  rtx size_rtx = gen_rtx_REG (Pmode, 1);
5601 	  emit_move_insn (size_rtx, size_int_rtx);
5602 	  emit_window_save (size_rtx);
5603 	}
5604     }
5605 
5606   if (sparc_leaf_function_p)
5607     {
5608       sparc_frame_base_reg = stack_pointer_rtx;
5609       sparc_frame_base_offset = size + SPARC_STACK_BIAS;
5610     }
5611   else
5612     {
5613       sparc_frame_base_reg = hard_frame_pointer_rtx;
5614       sparc_frame_base_offset = SPARC_STACK_BIAS;
5615     }
5616 
5617   if (sparc_n_global_fp_regs > 0)
5618     emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
5619 				         sparc_frame_base_offset
5620 					   - sparc_apparent_frame_size,
5621 					 SORR_SAVE);
5622 
5623   /* Load the GOT register if needed.  */
5624   if (crtl->uses_pic_offset_table)
5625     load_got_register ();
5626 
5627   /* Advertise that the data calculated just above are now valid.  */
5628   sparc_prologue_data_valid_p = true;
5629 }
5630 
5631 /* Expand the function prologue.  The prologue is responsible for reserving
5632    storage for the frame, saving the call-saved registers and loading the
5633    GOT register if needed.  */
5634 
5635 void
sparc_flat_expand_prologue(void)5636 sparc_flat_expand_prologue (void)
5637 {
5638   HOST_WIDE_INT size;
5639   rtx_insn *insn;
5640 
5641   sparc_leaf_function_p = optimize > 0 && crtl->is_leaf;
5642 
5643   size = sparc_compute_frame_size (get_frame_size(), sparc_leaf_function_p);
5644 
5645   if (flag_stack_usage_info)
5646     current_function_static_stack_size = size;
5647 
5648   if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
5649     {
5650       if (crtl->is_leaf && !cfun->calls_alloca)
5651 	{
5652 	  if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
5653 	    sparc_emit_probe_stack_range (STACK_CHECK_PROTECT,
5654 					  size - STACK_CHECK_PROTECT);
5655 	}
5656       else if (size > 0)
5657 	sparc_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
5658     }
5659 
5660   if (sparc_save_local_in_regs_p)
5661     emit_save_or_restore_local_in_regs (stack_pointer_rtx, SPARC_STACK_BIAS,
5662 					SORR_SAVE);
5663 
5664   if (size == 0)
5665     ; /* do nothing.  */
5666   else
5667     {
5668       rtx size_int_rtx, size_rtx;
5669 
5670       size_rtx = size_int_rtx = GEN_INT (-size);
5671 
5672       /* We establish the frame (i.e. decrement the stack pointer) first, even
5673 	 if we use a frame pointer, because we cannot clobber any call-saved
5674 	 registers, including the frame pointer, if we haven't created a new
5675 	 register save area, for the sake of compatibility with the ABI.  */
5676       if (size <= 4096)
5677 	insn = emit_insn (gen_stack_pointer_inc (size_int_rtx));
5678       else if (size <= 8192 && !frame_pointer_needed)
5679 	{
5680 	  insn = emit_insn (gen_stack_pointer_inc (GEN_INT (-4096)));
5681 	  RTX_FRAME_RELATED_P (insn) = 1;
5682 	  insn = emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
5683 	}
5684       else
5685 	{
5686 	  size_rtx = gen_rtx_REG (Pmode, 1);
5687 	  emit_move_insn (size_rtx, size_int_rtx);
5688 	  insn = emit_insn (gen_stack_pointer_inc (size_rtx));
5689 	  add_reg_note (insn, REG_CFA_ADJUST_CFA,
5690 			gen_stack_pointer_inc (size_int_rtx));
5691 	}
5692       RTX_FRAME_RELATED_P (insn) = 1;
5693 
5694       /* Ensure nothing is scheduled until after the frame is established.  */
5695       emit_insn (gen_blockage ());
5696 
5697       if (frame_pointer_needed)
5698 	{
5699 	  insn = emit_insn (gen_rtx_SET (hard_frame_pointer_rtx,
5700 					 gen_rtx_MINUS (Pmode,
5701 							stack_pointer_rtx,
5702 							size_rtx)));
5703 	  RTX_FRAME_RELATED_P (insn) = 1;
5704 
5705 	  add_reg_note (insn, REG_CFA_ADJUST_CFA,
5706 			gen_rtx_SET (hard_frame_pointer_rtx,
5707 				     plus_constant (Pmode, stack_pointer_rtx,
5708 						    size)));
5709 	}
5710 
5711       if (return_addr_reg_needed_p (sparc_leaf_function_p))
5712 	{
5713 	  rtx o7 = gen_rtx_REG (Pmode, INCOMING_RETURN_ADDR_REGNUM);
5714 	  rtx i7 = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
5715 
5716 	  insn = emit_move_insn (i7, o7);
5717 	  RTX_FRAME_RELATED_P (insn) = 1;
5718 
5719 	  add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (i7, o7));
5720 
5721 	  /* Prevent this instruction from ever being considered dead,
5722 	     even if this function has no epilogue.  */
5723 	  emit_use (i7);
5724 	}
5725     }
5726 
5727   if (frame_pointer_needed)
5728     {
5729       sparc_frame_base_reg = hard_frame_pointer_rtx;
5730       sparc_frame_base_offset = SPARC_STACK_BIAS;
5731     }
5732   else
5733     {
5734       sparc_frame_base_reg = stack_pointer_rtx;
5735       sparc_frame_base_offset = size + SPARC_STACK_BIAS;
5736     }
5737 
5738   if (sparc_n_global_fp_regs > 0)
5739     emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
5740 				         sparc_frame_base_offset
5741 					   - sparc_apparent_frame_size,
5742 					 SORR_SAVE);
5743 
5744   /* Load the GOT register if needed.  */
5745   if (crtl->uses_pic_offset_table)
5746     load_got_register ();
5747 
5748   /* Advertise that the data calculated just above are now valid.  */
5749   sparc_prologue_data_valid_p = true;
5750 }
5751 
5752 /* This function generates the assembly code for function entry, which boils
5753    down to emitting the necessary .register directives.  */
5754 
5755 static void
sparc_asm_function_prologue(FILE * file,HOST_WIDE_INT size ATTRIBUTE_UNUSED)5756 sparc_asm_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
5757 {
5758   /* Check that the assumption we made in sparc_expand_prologue is valid.  */
5759   if (!TARGET_FLAT)
5760     gcc_assert (sparc_leaf_function_p == crtl->uses_only_leaf_regs);
5761 
5762   sparc_output_scratch_registers (file);
5763 }
5764 
5765 /* Expand the function epilogue, either normal or part of a sibcall.
5766    We emit all the instructions except the return or the call.  */
5767 
5768 void
sparc_expand_epilogue(bool for_eh)5769 sparc_expand_epilogue (bool for_eh)
5770 {
5771   HOST_WIDE_INT size = sparc_frame_size;
5772 
5773   if (cfun->calls_alloca)
5774     emit_insn (gen_frame_blockage ());
5775 
5776   if (sparc_n_global_fp_regs > 0)
5777     emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
5778 				         sparc_frame_base_offset
5779 					   - sparc_apparent_frame_size,
5780 					 SORR_RESTORE);
5781 
5782   if (size == 0 || for_eh)
5783     ; /* do nothing.  */
5784   else if (sparc_leaf_function_p)
5785     {
5786       if (size <= 4096)
5787 	emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
5788       else if (size <= 8192)
5789 	{
5790 	  emit_insn (gen_stack_pointer_inc (GEN_INT (4096)));
5791 	  emit_insn (gen_stack_pointer_inc (GEN_INT (size - 4096)));
5792 	}
5793       else
5794 	{
5795 	  rtx reg = gen_rtx_REG (Pmode, 1);
5796 	  emit_move_insn (reg, GEN_INT (size));
5797 	  emit_insn (gen_stack_pointer_inc (reg));
5798 	}
5799     }
5800 }
5801 
5802 /* Expand the function epilogue, either normal or part of a sibcall.
5803    We emit all the instructions except the return or the call.  */
5804 
5805 void
sparc_flat_expand_epilogue(bool for_eh)5806 sparc_flat_expand_epilogue (bool for_eh)
5807 {
5808   HOST_WIDE_INT size = sparc_frame_size;
5809 
5810   if (sparc_n_global_fp_regs > 0)
5811     emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
5812 				         sparc_frame_base_offset
5813 					   - sparc_apparent_frame_size,
5814 					 SORR_RESTORE);
5815 
5816   /* If we have a frame pointer, we'll need both to restore it before the
5817      frame is destroyed and use its current value in destroying the frame.
5818      Since we don't have an atomic way to do that in the flat window model,
5819      we save the current value into a temporary register (%g1).  */
5820   if (frame_pointer_needed && !for_eh)
5821     emit_move_insn (gen_rtx_REG (Pmode, 1), hard_frame_pointer_rtx);
5822 
5823   if (return_addr_reg_needed_p (sparc_leaf_function_p))
5824     emit_move_insn (gen_rtx_REG (Pmode, INCOMING_RETURN_ADDR_REGNUM),
5825 		    gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM));
5826 
5827   if (sparc_save_local_in_regs_p)
5828     emit_save_or_restore_local_in_regs (sparc_frame_base_reg,
5829 					sparc_frame_base_offset,
5830 					SORR_RESTORE);
5831 
5832   if (size == 0 || for_eh)
5833     ; /* do nothing.  */
5834   else if (frame_pointer_needed)
5835     {
5836       /* Make sure the frame is destroyed after everything else is done.  */
5837       emit_insn (gen_blockage ());
5838 
5839       emit_move_insn (stack_pointer_rtx, gen_rtx_REG (Pmode, 1));
5840     }
5841   else
5842     {
5843       /* Likewise.  */
5844       emit_insn (gen_blockage ());
5845 
5846       if (size <= 4096)
5847 	emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
5848       else if (size <= 8192)
5849 	{
5850 	  emit_insn (gen_stack_pointer_inc (GEN_INT (4096)));
5851 	  emit_insn (gen_stack_pointer_inc (GEN_INT (size - 4096)));
5852 	}
5853       else
5854 	{
5855 	  rtx reg = gen_rtx_REG (Pmode, 1);
5856 	  emit_move_insn (reg, GEN_INT (size));
5857 	  emit_insn (gen_stack_pointer_inc (reg));
5858 	}
5859     }
5860 }
5861 
5862 /* Return true if it is appropriate to emit `return' instructions in the
5863    body of a function.  */
5864 
5865 bool
sparc_can_use_return_insn_p(void)5866 sparc_can_use_return_insn_p (void)
5867 {
5868   return sparc_prologue_data_valid_p
5869 	 && sparc_n_global_fp_regs == 0
5870 	 && TARGET_FLAT
5871 	    ? (sparc_frame_size == 0 && !sparc_save_local_in_regs_p)
5872 	    : (sparc_frame_size == 0 || !sparc_leaf_function_p);
5873 }
5874 
5875 /* This function generates the assembly code for function exit.  */
5876 
5877 static void
sparc_asm_function_epilogue(FILE * file,HOST_WIDE_INT size ATTRIBUTE_UNUSED)5878 sparc_asm_function_epilogue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
5879 {
5880   /* If the last two instructions of a function are "call foo; dslot;"
5881      the return address might point to the first instruction in the next
5882      function and we have to output a dummy nop for the sake of sane
5883      backtraces in such cases.  This is pointless for sibling calls since
5884      the return address is explicitly adjusted.  */
5885 
5886   rtx insn, last_real_insn;
5887 
5888   insn = get_last_insn ();
5889 
5890   last_real_insn = prev_real_insn (insn);
5891   if (last_real_insn
5892       && NONJUMP_INSN_P (last_real_insn)
5893       && GET_CODE (PATTERN (last_real_insn)) == SEQUENCE)
5894     last_real_insn = XVECEXP (PATTERN (last_real_insn), 0, 0);
5895 
5896   if (last_real_insn
5897       && CALL_P (last_real_insn)
5898       && !SIBLING_CALL_P (last_real_insn))
5899     fputs("\tnop\n", file);
5900 
5901   sparc_output_deferred_case_vectors ();
5902 }
5903 
5904 /* Output a 'restore' instruction.  */
5905 
5906 static void
output_restore(rtx pat)5907 output_restore (rtx pat)
5908 {
5909   rtx operands[3];
5910 
5911   if (! pat)
5912     {
5913       fputs ("\t restore\n", asm_out_file);
5914       return;
5915     }
5916 
5917   gcc_assert (GET_CODE (pat) == SET);
5918 
5919   operands[0] = SET_DEST (pat);
5920   pat = SET_SRC (pat);
5921 
5922   switch (GET_CODE (pat))
5923     {
5924       case PLUS:
5925 	operands[1] = XEXP (pat, 0);
5926 	operands[2] = XEXP (pat, 1);
5927 	output_asm_insn (" restore %r1, %2, %Y0", operands);
5928 	break;
5929       case LO_SUM:
5930 	operands[1] = XEXP (pat, 0);
5931 	operands[2] = XEXP (pat, 1);
5932 	output_asm_insn (" restore %r1, %%lo(%a2), %Y0", operands);
5933 	break;
5934       case ASHIFT:
5935 	operands[1] = XEXP (pat, 0);
5936 	gcc_assert (XEXP (pat, 1) == const1_rtx);
5937 	output_asm_insn (" restore %r1, %r1, %Y0", operands);
5938 	break;
5939       default:
5940 	operands[1] = pat;
5941 	output_asm_insn (" restore %%g0, %1, %Y0", operands);
5942 	break;
5943     }
5944 }
5945 
5946 /* Output a return.  */
5947 
5948 const char *
output_return(rtx_insn * insn)5949 output_return (rtx_insn *insn)
5950 {
5951   if (crtl->calls_eh_return)
5952     {
5953       /* If the function uses __builtin_eh_return, the eh_return
5954 	 machinery occupies the delay slot.  */
5955       gcc_assert (!final_sequence);
5956 
5957       if (flag_delayed_branch)
5958 	{
5959 	  if (!TARGET_FLAT && TARGET_V9)
5960 	    fputs ("\treturn\t%i7+8\n", asm_out_file);
5961 	  else
5962 	    {
5963 	      if (!TARGET_FLAT)
5964 		fputs ("\trestore\n", asm_out_file);
5965 
5966 	      fputs ("\tjmp\t%o7+8\n", asm_out_file);
5967 	    }
5968 
5969 	  fputs ("\t add\t%sp, %g1, %sp\n", asm_out_file);
5970 	}
5971       else
5972 	{
5973 	  if (!TARGET_FLAT)
5974 	    fputs ("\trestore\n", asm_out_file);
5975 
5976 	  fputs ("\tadd\t%sp, %g1, %sp\n", asm_out_file);
5977 	  fputs ("\tjmp\t%o7+8\n\t nop\n", asm_out_file);
5978 	}
5979     }
5980   else if (sparc_leaf_function_p || TARGET_FLAT)
5981     {
5982       /* This is a leaf or flat function so we don't have to bother restoring
5983 	 the register window, which frees us from dealing with the convoluted
5984 	 semantics of restore/return.  We simply output the jump to the
5985 	 return address and the insn in the delay slot (if any).  */
5986 
5987       return "jmp\t%%o7+%)%#";
5988     }
5989   else
5990     {
5991       /* This is a regular function so we have to restore the register window.
5992 	 We may have a pending insn for the delay slot, which will be either
5993 	 combined with the 'restore' instruction or put in the delay slot of
5994 	 the 'return' instruction.  */
5995 
5996       if (final_sequence)
5997 	{
5998 	  rtx delay, pat;
5999 
6000 	  delay = NEXT_INSN (insn);
6001 	  gcc_assert (delay);
6002 
6003 	  pat = PATTERN (delay);
6004 
6005 	  if (TARGET_V9 && ! epilogue_renumber (&pat, 1))
6006 	    {
6007 	      epilogue_renumber (&pat, 0);
6008 	      return "return\t%%i7+%)%#";
6009 	    }
6010 	  else
6011 	    {
6012 	      output_asm_insn ("jmp\t%%i7+%)", NULL);
6013 	      output_restore (pat);
6014 	      PATTERN (delay) = gen_blockage ();
6015 	      INSN_CODE (delay) = -1;
6016 	    }
6017 	}
6018       else
6019         {
6020 	  /* The delay slot is empty.  */
6021 	  if (TARGET_V9)
6022 	    return "return\t%%i7+%)\n\t nop";
6023 	  else if (flag_delayed_branch)
6024 	    return "jmp\t%%i7+%)\n\t restore";
6025 	  else
6026 	    return "restore\n\tjmp\t%%o7+%)\n\t nop";
6027 	}
6028     }
6029 
6030   return "";
6031 }
6032 
6033 /* Output a sibling call.  */
6034 
6035 const char *
output_sibcall(rtx_insn * insn,rtx call_operand)6036 output_sibcall (rtx_insn *insn, rtx call_operand)
6037 {
6038   rtx operands[1];
6039 
6040   gcc_assert (flag_delayed_branch);
6041 
6042   operands[0] = call_operand;
6043 
6044   if (sparc_leaf_function_p || TARGET_FLAT)
6045     {
6046       /* This is a leaf or flat function so we don't have to bother restoring
6047 	 the register window.  We simply output the jump to the function and
6048 	 the insn in the delay slot (if any).  */
6049 
6050       gcc_assert (!(LEAF_SIBCALL_SLOT_RESERVED_P && final_sequence));
6051 
6052       if (final_sequence)
6053 	output_asm_insn ("sethi\t%%hi(%a0), %%g1\n\tjmp\t%%g1 + %%lo(%a0)%#",
6054 			 operands);
6055       else
6056 	/* Use or with rs2 %%g0 instead of mov, so that as/ld can optimize
6057 	   it into branch if possible.  */
6058 	output_asm_insn ("or\t%%o7, %%g0, %%g1\n\tcall\t%a0, 0\n\t or\t%%g1, %%g0, %%o7",
6059 			 operands);
6060     }
6061   else
6062     {
6063       /* This is a regular function so we have to restore the register window.
6064 	 We may have a pending insn for the delay slot, which will be combined
6065 	 with the 'restore' instruction.  */
6066 
6067       output_asm_insn ("call\t%a0, 0", operands);
6068 
6069       if (final_sequence)
6070 	{
6071 	  rtx_insn *delay = NEXT_INSN (insn);
6072 	  gcc_assert (delay);
6073 
6074 	  output_restore (PATTERN (delay));
6075 
6076 	  PATTERN (delay) = gen_blockage ();
6077 	  INSN_CODE (delay) = -1;
6078 	}
6079       else
6080 	output_restore (NULL_RTX);
6081     }
6082 
6083   return "";
6084 }
6085 
6086 /* Functions for handling argument passing.
6087 
6088    For 32-bit, the first 6 args are normally in registers and the rest are
6089    pushed.  Any arg that starts within the first 6 words is at least
6090    partially passed in a register unless its data type forbids.
6091 
6092    For 64-bit, the argument registers are laid out as an array of 16 elements
6093    and arguments are added sequentially.  The first 6 int args and up to the
6094    first 16 fp args (depending on size) are passed in regs.
6095 
6096    Slot    Stack   Integral   Float   Float in structure   Double   Long Double
6097    ----    -----   --------   -----   ------------------   ------   -----------
6098     15   [SP+248]              %f31       %f30,%f31         %d30
6099     14   [SP+240]              %f29       %f28,%f29         %d28       %q28
6100     13   [SP+232]              %f27       %f26,%f27         %d26
6101     12   [SP+224]              %f25       %f24,%f25         %d24       %q24
6102     11   [SP+216]              %f23       %f22,%f23         %d22
6103     10   [SP+208]              %f21       %f20,%f21         %d20       %q20
6104      9   [SP+200]              %f19       %f18,%f19         %d18
6105      8   [SP+192]              %f17       %f16,%f17         %d16       %q16
6106      7   [SP+184]              %f15       %f14,%f15         %d14
6107      6   [SP+176]              %f13       %f12,%f13         %d12       %q12
6108      5   [SP+168]     %o5      %f11       %f10,%f11         %d10
6109      4   [SP+160]     %o4       %f9        %f8,%f9           %d8        %q8
6110      3   [SP+152]     %o3       %f7        %f6,%f7           %d6
6111      2   [SP+144]     %o2       %f5        %f4,%f5           %d4        %q4
6112      1   [SP+136]     %o1       %f3        %f2,%f3           %d2
6113      0   [SP+128]     %o0       %f1        %f0,%f1           %d0        %q0
6114 
6115    Here SP = %sp if -mno-stack-bias or %sp+stack_bias otherwise.
6116 
6117    Integral arguments are always passed as 64-bit quantities appropriately
6118    extended.
6119 
6120    Passing of floating point values is handled as follows.
6121    If a prototype is in scope:
6122      If the value is in a named argument (i.e. not a stdarg function or a
6123      value not part of the `...') then the value is passed in the appropriate
6124      fp reg.
6125      If the value is part of the `...' and is passed in one of the first 6
6126      slots then the value is passed in the appropriate int reg.
6127      If the value is part of the `...' and is not passed in one of the first 6
6128      slots then the value is passed in memory.
6129    If a prototype is not in scope:
6130      If the value is one of the first 6 arguments the value is passed in the
6131      appropriate integer reg and the appropriate fp reg.
6132      If the value is not one of the first 6 arguments the value is passed in
6133      the appropriate fp reg and in memory.
6134 
6135 
6136    Summary of the calling conventions implemented by GCC on the SPARC:
6137 
6138    32-bit ABI:
6139                                 size      argument     return value
6140 
6141       small integer              <4       int. reg.      int. reg.
6142       word                        4       int. reg.      int. reg.
6143       double word                 8       int. reg.      int. reg.
6144 
6145       _Complex small integer     <8       int. reg.      int. reg.
6146       _Complex word               8       int. reg.      int. reg.
6147       _Complex double word       16        memory        int. reg.
6148 
6149       vector integer            <=8       int. reg.       FP reg.
6150       vector integer             >8        memory         memory
6151 
6152       float                       4       int. reg.       FP reg.
6153       double                      8       int. reg.       FP reg.
6154       long double                16        memory         memory
6155 
6156       _Complex float              8        memory         FP reg.
6157       _Complex double            16        memory         FP reg.
6158       _Complex long double       32        memory         FP reg.
6159 
6160       vector float              any        memory         memory
6161 
6162       aggregate                 any        memory         memory
6163 
6164 
6165 
6166     64-bit ABI:
6167                                 size      argument     return value
6168 
6169       small integer              <8       int. reg.      int. reg.
6170       word                        8       int. reg.      int. reg.
6171       double word                16       int. reg.      int. reg.
6172 
6173       _Complex small integer    <16       int. reg.      int. reg.
6174       _Complex word              16       int. reg.      int. reg.
6175       _Complex double word       32        memory        int. reg.
6176 
6177       vector integer           <=16        FP reg.        FP reg.
6178       vector integer       16<s<=32        memory         FP reg.
6179       vector integer            >32        memory         memory
6180 
6181       float                       4        FP reg.        FP reg.
6182       double                      8        FP reg.        FP reg.
6183       long double                16        FP reg.        FP reg.
6184 
6185       _Complex float              8        FP reg.        FP reg.
6186       _Complex double            16        FP reg.        FP reg.
6187       _Complex long double       32        memory         FP reg.
6188 
6189       vector float             <=16        FP reg.        FP reg.
6190       vector float         16<s<=32        memory         FP reg.
6191       vector float              >32        memory         memory
6192 
6193       aggregate                <=16         reg.           reg.
6194       aggregate            16<s<=32        memory          reg.
6195       aggregate                 >32        memory         memory
6196 
6197 
6198 
6199 Note #1: complex floating-point types follow the extended SPARC ABIs as
6200 implemented by the Sun compiler.
6201 
6202 Note #2: integral vector types follow the scalar floating-point types
6203 conventions to match what is implemented by the Sun VIS SDK.
6204 
6205 Note #3: floating-point vector types follow the aggregate types
6206 conventions.  */
6207 
6208 
6209 /* Maximum number of int regs for args.  */
6210 #define SPARC_INT_ARG_MAX 6
6211 /* Maximum number of fp regs for args.  */
6212 #define SPARC_FP_ARG_MAX 16
6213 /* Number of words (partially) occupied for a given size in units.  */
6214 #define CEIL_NWORDS(SIZE) CEIL((SIZE), UNITS_PER_WORD)
6215 
6216 /* Handle the INIT_CUMULATIVE_ARGS macro.
6217    Initialize a variable CUM of type CUMULATIVE_ARGS
6218    for a call to a function whose data type is FNTYPE.
6219    For a library call, FNTYPE is 0.  */
6220 
6221 void
init_cumulative_args(struct sparc_args * cum,tree fntype,rtx,tree)6222 init_cumulative_args (struct sparc_args *cum, tree fntype, rtx, tree)
6223 {
6224   cum->words = 0;
6225   cum->prototype_p = fntype && prototype_p (fntype);
6226   cum->libcall_p = !fntype;
6227 }
6228 
6229 /* Handle promotion of pointer and integer arguments.  */
6230 
6231 static machine_mode
sparc_promote_function_mode(const_tree type,machine_mode mode,int * punsignedp,const_tree,int)6232 sparc_promote_function_mode (const_tree type, machine_mode mode,
6233 			     int *punsignedp, const_tree, int)
6234 {
6235   if (type && POINTER_TYPE_P (type))
6236     {
6237       *punsignedp = POINTERS_EXTEND_UNSIGNED;
6238       return Pmode;
6239     }
6240 
6241   /* Integral arguments are passed as full words, as per the ABI.  */
6242   if (GET_MODE_CLASS (mode) == MODE_INT
6243       && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
6244     return word_mode;
6245 
6246   return mode;
6247 }
6248 
6249 /* Handle the TARGET_STRICT_ARGUMENT_NAMING target hook.  */
6250 
6251 static bool
sparc_strict_argument_naming(cumulative_args_t ca ATTRIBUTE_UNUSED)6252 sparc_strict_argument_naming (cumulative_args_t ca ATTRIBUTE_UNUSED)
6253 {
6254   return TARGET_ARCH64 ? true : false;
6255 }
6256 
6257 /* Traverse the record TYPE recursively and call FUNC on its fields.
6258    NAMED is true if this is for a named parameter.  DATA is passed
6259    to FUNC for each field.  OFFSET is the starting position and
6260    PACKED is true if we are inside a packed record.  */
6261 
6262 template <typename T, void Func (const_tree, HOST_WIDE_INT, bool, T*)>
6263 static void
6264 traverse_record_type (const_tree type, bool named, T *data,
6265 		      HOST_WIDE_INT offset = 0, bool packed = false)
6266 {
6267   /* The ABI obviously doesn't specify how packed structures are passed.
6268      These are passed in integer regs if possible, otherwise memory.  */
6269   if (!packed)
6270     for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6271       if (TREE_CODE (field) == FIELD_DECL && DECL_PACKED (field))
6272 	{
6273 	  packed = true;
6274 	  break;
6275 	}
6276 
6277   /* Walk the real fields, but skip those with no size or a zero size.
6278      ??? Fields with variable offset are handled as having zero offset.  */
6279   for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6280     if (TREE_CODE (field) == FIELD_DECL)
6281       {
6282 	if (!DECL_SIZE (field) || integer_zerop (DECL_SIZE (field)))
6283 	  continue;
6284 
6285 	HOST_WIDE_INT bitpos = offset;
6286 	if (TREE_CODE (DECL_FIELD_OFFSET (field)) == INTEGER_CST)
6287 	  bitpos += int_bit_position (field);
6288 
6289 	tree field_type = TREE_TYPE (field);
6290 	if (TREE_CODE (field_type) == RECORD_TYPE)
6291 	  traverse_record_type<T, Func> (field_type, named, data, bitpos,
6292 					 packed);
6293 	else
6294 	  {
6295 	    const bool fp_type
6296 	      = FLOAT_TYPE_P (field_type) || VECTOR_TYPE_P (field_type);
6297 	    Func (field, bitpos, fp_type && named && !packed && TARGET_FPU,
6298 		  data);
6299 	  }
6300       }
6301 }
6302 
6303 /* Handle recursive register classifying for structure layout.  */
6304 
6305 typedef struct
6306 {
6307   bool fp_regs;		/* true if field eligible to FP registers.  */
6308   bool fp_regs_in_first_word;	/* true if such field in first word.  */
6309 } classify_data_t;
6310 
6311 /* A subroutine of function_arg_slotno.  Classify the field.  */
6312 
6313 inline void
classify_registers(const_tree,HOST_WIDE_INT bitpos,bool fp,classify_data_t * data)6314 classify_registers (const_tree, HOST_WIDE_INT bitpos, bool fp,
6315 		    classify_data_t *data)
6316 {
6317   if (fp)
6318     {
6319       data->fp_regs = true;
6320       if (bitpos < BITS_PER_WORD)
6321 	data->fp_regs_in_first_word = true;
6322     }
6323 }
6324 
6325 /* Compute the slot number to pass an argument in.
6326    Return the slot number or -1 if passing on the stack.
6327 
6328    CUM is a variable of type CUMULATIVE_ARGS which gives info about
6329     the preceding args and about the function being called.
6330    MODE is the argument's machine mode.
6331    TYPE is the data type of the argument (as a tree).
6332     This is null for libcalls where that information may
6333     not be available.
6334    NAMED is nonzero if this argument is a named parameter
6335     (otherwise it is an extra parameter matching an ellipsis).
6336    INCOMING is zero for FUNCTION_ARG, nonzero for FUNCTION_INCOMING_ARG.
6337    *PREGNO records the register number to use if scalar type.
6338    *PPADDING records the amount of padding needed in words.  */
6339 
6340 static int
function_arg_slotno(const struct sparc_args * cum,machine_mode mode,const_tree type,bool named,bool incoming,int * pregno,int * ppadding)6341 function_arg_slotno (const struct sparc_args *cum, machine_mode mode,
6342 		     const_tree type, bool named, bool incoming,
6343 		     int *pregno, int *ppadding)
6344 {
6345   int regbase = (incoming
6346 		 ? SPARC_INCOMING_INT_ARG_FIRST
6347 		 : SPARC_OUTGOING_INT_ARG_FIRST);
6348   int slotno = cum->words;
6349   enum mode_class mclass;
6350   int regno;
6351 
6352   *ppadding = 0;
6353 
6354   if (type && TREE_ADDRESSABLE (type))
6355     return -1;
6356 
6357   if (TARGET_ARCH32
6358       && mode == BLKmode
6359       && type
6360       && TYPE_ALIGN (type) % PARM_BOUNDARY != 0)
6361     return -1;
6362 
6363   /* For SPARC64, objects requiring 16-byte alignment get it.  */
6364   if (TARGET_ARCH64
6365       && (type ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode)) >= 128
6366       && (slotno & 1) != 0)
6367     slotno++, *ppadding = 1;
6368 
6369   mclass = GET_MODE_CLASS (mode);
6370   if (type && TREE_CODE (type) == VECTOR_TYPE)
6371     {
6372       /* Vector types deserve special treatment because they are
6373 	 polymorphic wrt their mode, depending upon whether VIS
6374 	 instructions are enabled.  */
6375       if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
6376 	{
6377 	  /* The SPARC port defines no floating-point vector modes.  */
6378 	  gcc_assert (mode == BLKmode);
6379 	}
6380       else
6381 	{
6382 	  /* Integral vector types should either have a vector
6383 	     mode or an integral mode, because we are guaranteed
6384 	     by pass_by_reference that their size is not greater
6385 	     than 16 bytes and TImode is 16-byte wide.  */
6386 	  gcc_assert (mode != BLKmode);
6387 
6388 	  /* Vector integers are handled like floats according to
6389 	     the Sun VIS SDK.  */
6390 	  mclass = MODE_FLOAT;
6391 	}
6392     }
6393 
6394   switch (mclass)
6395     {
6396     case MODE_FLOAT:
6397     case MODE_COMPLEX_FLOAT:
6398     case MODE_VECTOR_INT:
6399       if (TARGET_ARCH64 && TARGET_FPU && named)
6400 	{
6401 	  /* If all arg slots are filled, then must pass on stack.  */
6402 	  if (slotno >= SPARC_FP_ARG_MAX)
6403 	    return -1;
6404 
6405 	  regno = SPARC_FP_ARG_FIRST + slotno * 2;
6406 	  /* Arguments filling only one single FP register are
6407 	     right-justified in the outer double FP register.  */
6408 	  if (GET_MODE_SIZE (mode) <= 4)
6409 	    regno++;
6410 	  break;
6411 	}
6412       /* fallthrough */
6413 
6414     case MODE_INT:
6415     case MODE_COMPLEX_INT:
6416       /* If all arg slots are filled, then must pass on stack.  */
6417       if (slotno >= SPARC_INT_ARG_MAX)
6418 	return -1;
6419 
6420       regno = regbase + slotno;
6421       break;
6422 
6423     case MODE_RANDOM:
6424       if (mode == VOIDmode)
6425 	/* MODE is VOIDmode when generating the actual call.  */
6426 	return -1;
6427 
6428       gcc_assert (mode == BLKmode);
6429 
6430       if (TARGET_ARCH32
6431 	  || !type
6432 	  || (TREE_CODE (type) != RECORD_TYPE
6433 	      && TREE_CODE (type) != VECTOR_TYPE))
6434 	{
6435 	  /* If all arg slots are filled, then must pass on stack.  */
6436 	  if (slotno >= SPARC_INT_ARG_MAX)
6437 	    return -1;
6438 
6439 	  regno = regbase + slotno;
6440 	}
6441       else  /* TARGET_ARCH64 && type */
6442 	{
6443 	  /* If all arg slots are filled, then must pass on stack.  */
6444 	  if (slotno >= SPARC_FP_ARG_MAX)
6445 	    return -1;
6446 
6447 	  if (TREE_CODE (type) == RECORD_TYPE)
6448 	    {
6449 	      classify_data_t data = { false, false };
6450 	      traverse_record_type<classify_data_t, classify_registers>
6451 		(type, named, &data);
6452 
6453 	      if (data.fp_regs)
6454 		{
6455 		  /* If all FP slots are filled except for the last one and
6456 		     there is no FP field in the first word, then must pass
6457 		     on stack.  */
6458 		  if (slotno >= SPARC_FP_ARG_MAX - 1
6459 		      && !data.fp_regs_in_first_word)
6460 		    return -1;
6461 		}
6462 	      else
6463 		{
6464 		  /* If all int slots are filled, then must pass on stack.  */
6465 		  if (slotno >= SPARC_INT_ARG_MAX)
6466 		    return -1;
6467 		}
6468 	    }
6469 
6470 	  /* PREGNO isn't set since both int and FP regs can be used.  */
6471 	  return slotno;
6472 	}
6473       break;
6474 
6475     default :
6476       gcc_unreachable ();
6477     }
6478 
6479   *pregno = regno;
6480   return slotno;
6481 }
6482 
6483 /* Handle recursive register counting/assigning for structure layout.  */
6484 
6485 typedef struct
6486 {
6487   int slotno;		/* slot number of the argument.  */
6488   int regbase;		/* regno of the base register.  */
6489   int intoffset;	/* offset of the first pending integer field.  */
6490   int nregs;		/* number of words passed in registers.  */
6491   bool stack;		/* true if part of the argument is on the stack.  */
6492   rtx ret;		/* return expression being built.  */
6493 } assign_data_t;
6494 
6495 /* A subroutine of function_arg_record_value.  Compute the number of integer
6496    registers to be assigned between PARMS->intoffset and BITPOS.  Return
6497    true if at least one integer register is assigned or false otherwise.  */
6498 
6499 static bool
compute_int_layout(HOST_WIDE_INT bitpos,assign_data_t * data,int * pnregs)6500 compute_int_layout (HOST_WIDE_INT bitpos, assign_data_t *data, int *pnregs)
6501 {
6502   if (data->intoffset < 0)
6503     return false;
6504 
6505   const int intoffset = data->intoffset;
6506   data->intoffset = -1;
6507 
6508   const int this_slotno = data->slotno + intoffset / BITS_PER_WORD;
6509   const unsigned int startbit = ROUND_DOWN (intoffset, BITS_PER_WORD);
6510   const unsigned int endbit = ROUND_UP (bitpos, BITS_PER_WORD);
6511   int nregs = (endbit - startbit) / BITS_PER_WORD;
6512 
6513   if (nregs > 0 && nregs > SPARC_INT_ARG_MAX - this_slotno)
6514     {
6515       nregs = SPARC_INT_ARG_MAX - this_slotno;
6516 
6517       /* We need to pass this field (partly) on the stack.  */
6518       data->stack = 1;
6519     }
6520 
6521   if (nregs <= 0)
6522     return false;
6523 
6524   *pnregs = nregs;
6525   return true;
6526 }
6527 
6528 /* A subroutine of function_arg_record_value.  Compute the number and the mode
6529    of the FP registers to be assigned for FIELD.  Return true if at least one
6530    FP register is assigned or false otherwise.  */
6531 
6532 static bool
compute_fp_layout(const_tree field,HOST_WIDE_INT bitpos,assign_data_t * data,int * pnregs,machine_mode * pmode)6533 compute_fp_layout (const_tree field, HOST_WIDE_INT bitpos,
6534 		   assign_data_t *data,
6535 		   int *pnregs, machine_mode *pmode)
6536 {
6537   const int this_slotno = data->slotno + bitpos / BITS_PER_WORD;
6538   machine_mode mode = DECL_MODE (field);
6539   int nregs, nslots;
6540 
6541   /* Slots are counted as words while regs are counted as having the size of
6542      the (inner) mode.  */
6543   if (TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE && mode == BLKmode)
6544     {
6545       mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (field)));
6546       nregs = TYPE_VECTOR_SUBPARTS (TREE_TYPE (field));
6547     }
6548   else if (TREE_CODE (TREE_TYPE (field)) == COMPLEX_TYPE)
6549     {
6550       mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (field)));
6551       nregs = 2;
6552     }
6553   else
6554     nregs = 1;
6555 
6556   nslots = CEIL_NWORDS (nregs * GET_MODE_SIZE (mode));
6557 
6558   if (nslots > SPARC_FP_ARG_MAX - this_slotno)
6559     {
6560       nslots = SPARC_FP_ARG_MAX - this_slotno;
6561       nregs = (nslots * UNITS_PER_WORD) / GET_MODE_SIZE (mode);
6562 
6563       /* We need to pass this field (partly) on the stack.  */
6564       data->stack = 1;
6565 
6566       if (nregs <= 0)
6567 	return false;
6568     }
6569 
6570   *pnregs = nregs;
6571   *pmode = mode;
6572   return true;
6573 }
6574 
6575 /* A subroutine of function_arg_record_value.  Count the number of registers
6576    to be assigned for FIELD and between PARMS->intoffset and BITPOS.  */
6577 
6578 inline void
count_registers(const_tree field,HOST_WIDE_INT bitpos,bool fp,assign_data_t * data)6579 count_registers (const_tree field, HOST_WIDE_INT bitpos, bool fp,
6580 		 assign_data_t *data)
6581 {
6582   if (fp)
6583     {
6584       int nregs;
6585       machine_mode mode;
6586 
6587       if (compute_int_layout (bitpos, data, &nregs))
6588 	data->nregs += nregs;
6589 
6590       if (compute_fp_layout (field, bitpos, data, &nregs, &mode))
6591 	data->nregs += nregs;
6592     }
6593   else
6594     {
6595       if (data->intoffset < 0)
6596 	data->intoffset = bitpos;
6597     }
6598 }
6599 
6600 /* A subroutine of function_arg_record_value.  Assign the bits of the
6601    structure between PARMS->intoffset and BITPOS to integer registers.  */
6602 
6603 static void
assign_int_registers(HOST_WIDE_INT bitpos,assign_data_t * data)6604 assign_int_registers (HOST_WIDE_INT bitpos, assign_data_t *data)
6605 {
6606   int intoffset = data->intoffset;
6607   machine_mode mode;
6608   int nregs;
6609 
6610   if (!compute_int_layout (bitpos, data, &nregs))
6611     return;
6612 
6613   /* If this is the trailing part of a word, only load that much into
6614      the register.  Otherwise load the whole register.  Note that in
6615      the latter case we may pick up unwanted bits.  It's not a problem
6616      at the moment but may wish to revisit.  */
6617   if (intoffset % BITS_PER_WORD != 0)
6618     mode = smallest_mode_for_size (BITS_PER_WORD - intoffset % BITS_PER_WORD,
6619 			  	   MODE_INT);
6620   else
6621     mode = word_mode;
6622 
6623   const int this_slotno = data->slotno + intoffset / BITS_PER_WORD;
6624   unsigned int regno = data->regbase + this_slotno;
6625   intoffset /= BITS_PER_UNIT;
6626 
6627   do
6628     {
6629       rtx reg = gen_rtx_REG (mode, regno);
6630       XVECEXP (data->ret, 0, data->stack + data->nregs)
6631 	= gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (intoffset));
6632       data->nregs += 1;
6633       mode = word_mode;
6634       regno += 1;
6635       intoffset = (intoffset | (UNITS_PER_WORD - 1)) + 1;
6636     }
6637   while (--nregs > 0);
6638 }
6639 
6640 /* A subroutine of function_arg_record_value.  Assign FIELD at position
6641    BITPOS to FP registers.  */
6642 
6643 static void
assign_fp_registers(const_tree field,HOST_WIDE_INT bitpos,assign_data_t * data)6644 assign_fp_registers (const_tree field, HOST_WIDE_INT bitpos,
6645 			     assign_data_t *data)
6646 {
6647   int nregs;
6648   machine_mode mode;
6649 
6650   if (!compute_fp_layout (field, bitpos, data, &nregs, &mode))
6651     return;
6652 
6653   const int this_slotno = data->slotno + bitpos / BITS_PER_WORD;
6654   int regno = SPARC_FP_ARG_FIRST + this_slotno * 2;
6655   if (GET_MODE_SIZE (mode) <= 4 && (bitpos & 32) != 0)
6656     regno++;
6657   int pos = bitpos / BITS_PER_UNIT;
6658 
6659   do
6660     {
6661       rtx reg = gen_rtx_REG (mode, regno);
6662       XVECEXP (data->ret, 0, data->stack + data->nregs)
6663 	= gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (pos));
6664       data->nregs += 1;
6665       regno += GET_MODE_SIZE (mode) / 4;
6666       pos += GET_MODE_SIZE (mode);
6667     }
6668   while (--nregs > 0);
6669 }
6670 
6671 /* A subroutine of function_arg_record_value.  Assign FIELD and the bits of
6672    the structure between PARMS->intoffset and BITPOS to registers.  */
6673 
6674 inline void
assign_registers(const_tree field,HOST_WIDE_INT bitpos,bool fp,assign_data_t * data)6675 assign_registers (const_tree field, HOST_WIDE_INT bitpos, bool fp,
6676 		  assign_data_t *data)
6677 {
6678   if (fp)
6679     {
6680       assign_int_registers (bitpos, data);
6681 
6682       assign_fp_registers (field, bitpos, data);
6683     }
6684   else
6685     {
6686       if (data->intoffset < 0)
6687 	data->intoffset = bitpos;
6688     }
6689 }
6690 
6691 /* Used by function_arg and sparc_function_value_1 to implement the complex
6692    conventions of the 64-bit ABI for passing and returning structures.
6693    Return an expression valid as a return value for the FUNCTION_ARG
6694    and TARGET_FUNCTION_VALUE.
6695 
6696    TYPE is the data type of the argument (as a tree).
6697     This is null for libcalls where that information may
6698     not be available.
6699    MODE is the argument's machine mode.
6700    SLOTNO is the index number of the argument's slot in the parameter array.
6701    NAMED is true if this argument is a named parameter
6702     (otherwise it is an extra parameter matching an ellipsis).
6703    REGBASE is the regno of the base register for the parameter array.  */
6704 
6705 static rtx
function_arg_record_value(const_tree type,machine_mode mode,int slotno,bool named,int regbase)6706 function_arg_record_value (const_tree type, machine_mode mode,
6707 			   int slotno, bool named, int regbase)
6708 {
6709   HOST_WIDE_INT typesize = int_size_in_bytes (type);
6710   assign_data_t data;
6711   int nregs;
6712 
6713   data.slotno = slotno;
6714   data.regbase = regbase;
6715 
6716   /* Count how many registers we need.  */
6717   data.nregs = 0;
6718   data.intoffset = 0;
6719   data.stack = false;
6720   traverse_record_type<assign_data_t, count_registers> (type, named, &data);
6721 
6722   /* Take into account pending integer fields.  */
6723   if (compute_int_layout (typesize * BITS_PER_UNIT, &data, &nregs))
6724     data.nregs += nregs;
6725 
6726   /* Allocate the vector and handle some annoying special cases.  */
6727   nregs = data.nregs;
6728 
6729   if (nregs == 0)
6730     {
6731       /* ??? Empty structure has no value?  Duh?  */
6732       if (typesize <= 0)
6733 	{
6734 	  /* Though there's nothing really to store, return a word register
6735 	     anyway so the rest of gcc doesn't go nuts.  Returning a PARALLEL
6736 	     leads to breakage due to the fact that there are zero bytes to
6737 	     load.  */
6738 	  return gen_rtx_REG (mode, regbase);
6739 	}
6740 
6741       /* ??? C++ has structures with no fields, and yet a size.  Give up
6742 	 for now and pass everything back in integer registers.  */
6743       nregs = (typesize + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6744       if (nregs + slotno > SPARC_INT_ARG_MAX)
6745 	nregs = SPARC_INT_ARG_MAX - slotno;
6746     }
6747 
6748   gcc_assert (nregs > 0);
6749 
6750   data.ret = gen_rtx_PARALLEL (mode, rtvec_alloc (data.stack + nregs));
6751 
6752   /* If at least one field must be passed on the stack, generate
6753      (parallel [(expr_list (nil) ...) ...]) so that all fields will
6754      also be passed on the stack.  We can't do much better because the
6755      semantics of TARGET_ARG_PARTIAL_BYTES doesn't handle the case
6756      of structures for which the fields passed exclusively in registers
6757      are not at the beginning of the structure.  */
6758   if (data.stack)
6759     XVECEXP (data.ret, 0, 0)
6760       = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
6761 
6762   /* Assign the registers.  */
6763   data.nregs = 0;
6764   data.intoffset = 0;
6765   traverse_record_type<assign_data_t, assign_registers> (type, named, &data);
6766 
6767   /* Assign pending integer fields.  */
6768   assign_int_registers (typesize * BITS_PER_UNIT, &data);
6769 
6770   gcc_assert (data.nregs == nregs);
6771 
6772   return data.ret;
6773 }
6774 
6775 /* Used by function_arg and sparc_function_value_1 to implement the conventions
6776    of the 64-bit ABI for passing and returning unions.
6777    Return an expression valid as a return value for the FUNCTION_ARG
6778    and TARGET_FUNCTION_VALUE.
6779 
6780    SIZE is the size in bytes of the union.
6781    MODE is the argument's machine mode.
6782    REGNO is the hard register the union will be passed in.  */
6783 
6784 static rtx
function_arg_union_value(int size,machine_mode mode,int slotno,int regno)6785 function_arg_union_value (int size, machine_mode mode, int slotno,
6786 			  int regno)
6787 {
6788   int nwords = CEIL_NWORDS (size), i;
6789   rtx regs;
6790 
6791   /* See comment in previous function for empty structures.  */
6792   if (nwords == 0)
6793     return gen_rtx_REG (mode, regno);
6794 
6795   if (slotno == SPARC_INT_ARG_MAX - 1)
6796     nwords = 1;
6797 
6798   regs = gen_rtx_PARALLEL (mode, rtvec_alloc (nwords));
6799 
6800   for (i = 0; i < nwords; i++)
6801     {
6802       /* Unions are passed left-justified.  */
6803       XVECEXP (regs, 0, i)
6804 	= gen_rtx_EXPR_LIST (VOIDmode,
6805 			     gen_rtx_REG (word_mode, regno),
6806 			     GEN_INT (UNITS_PER_WORD * i));
6807       regno++;
6808     }
6809 
6810   return regs;
6811 }
6812 
6813 /* Used by function_arg and sparc_function_value_1 to implement the conventions
6814    for passing and returning BLKmode vectors.
6815    Return an expression valid as a return value for the FUNCTION_ARG
6816    and TARGET_FUNCTION_VALUE.
6817 
6818    SIZE is the size in bytes of the vector.
6819    REGNO is the FP hard register the vector will be passed in.  */
6820 
6821 static rtx
function_arg_vector_value(int size,int regno)6822 function_arg_vector_value (int size, int regno)
6823 {
6824   const int nregs = MAX (1, size / 8);
6825   rtx regs = gen_rtx_PARALLEL (BLKmode, rtvec_alloc (nregs));
6826 
6827   if (size < 8)
6828     XVECEXP (regs, 0, 0)
6829       = gen_rtx_EXPR_LIST (VOIDmode,
6830 			   gen_rtx_REG (SImode, regno),
6831 			   const0_rtx);
6832   else
6833     for (int i = 0; i < nregs; i++)
6834       XVECEXP (regs, 0, i)
6835 	= gen_rtx_EXPR_LIST (VOIDmode,
6836 			     gen_rtx_REG (DImode, regno + 2*i),
6837 			     GEN_INT (i*8));
6838 
6839   return regs;
6840 }
6841 
6842 /* Determine where to put an argument to a function.
6843    Value is zero to push the argument on the stack,
6844    or a hard register in which to store the argument.
6845 
6846    CUM is a variable of type CUMULATIVE_ARGS which gives info about
6847     the preceding args and about the function being called.
6848    MODE is the argument's machine mode.
6849    TYPE is the data type of the argument (as a tree).
6850     This is null for libcalls where that information may
6851     not be available.
6852    NAMED is true if this argument is a named parameter
6853     (otherwise it is an extra parameter matching an ellipsis).
6854    INCOMING_P is false for TARGET_FUNCTION_ARG, true for
6855     TARGET_FUNCTION_INCOMING_ARG.  */
6856 
6857 static rtx
sparc_function_arg_1(cumulative_args_t cum_v,machine_mode mode,const_tree type,bool named,bool incoming)6858 sparc_function_arg_1 (cumulative_args_t cum_v, machine_mode mode,
6859 		      const_tree type, bool named, bool incoming)
6860 {
6861   const CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
6862 
6863   int regbase = (incoming
6864 		 ? SPARC_INCOMING_INT_ARG_FIRST
6865 		 : SPARC_OUTGOING_INT_ARG_FIRST);
6866   int slotno, regno, padding;
6867   enum mode_class mclass = GET_MODE_CLASS (mode);
6868 
6869   slotno = function_arg_slotno (cum, mode, type, named, incoming,
6870 				&regno, &padding);
6871   if (slotno == -1)
6872     return 0;
6873 
6874   /* Vector types deserve special treatment because they are polymorphic wrt
6875      their mode, depending upon whether VIS instructions are enabled.  */
6876   if (type && TREE_CODE (type) == VECTOR_TYPE)
6877     {
6878       HOST_WIDE_INT size = int_size_in_bytes (type);
6879       gcc_assert ((TARGET_ARCH32 && size <= 8)
6880 		  || (TARGET_ARCH64 && size <= 16));
6881 
6882       if (mode == BLKmode)
6883 	return function_arg_vector_value (size, SPARC_FP_ARG_FIRST + 2*slotno);
6884 
6885       mclass = MODE_FLOAT;
6886     }
6887 
6888   if (TARGET_ARCH32)
6889     return gen_rtx_REG (mode, regno);
6890 
6891   /* Structures up to 16 bytes in size are passed in arg slots on the stack
6892      and are promoted to registers if possible.  */
6893   if (type && TREE_CODE (type) == RECORD_TYPE)
6894     {
6895       HOST_WIDE_INT size = int_size_in_bytes (type);
6896       gcc_assert (size <= 16);
6897 
6898       return function_arg_record_value (type, mode, slotno, named, regbase);
6899     }
6900 
6901   /* Unions up to 16 bytes in size are passed in integer registers.  */
6902   else if (type && TREE_CODE (type) == UNION_TYPE)
6903     {
6904       HOST_WIDE_INT size = int_size_in_bytes (type);
6905       gcc_assert (size <= 16);
6906 
6907       return function_arg_union_value (size, mode, slotno, regno);
6908     }
6909 
6910   /* v9 fp args in reg slots beyond the int reg slots get passed in regs
6911      but also have the slot allocated for them.
6912      If no prototype is in scope fp values in register slots get passed
6913      in two places, either fp regs and int regs or fp regs and memory.  */
6914   else if ((mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT)
6915 	   && SPARC_FP_REG_P (regno))
6916     {
6917       rtx reg = gen_rtx_REG (mode, regno);
6918       if (cum->prototype_p || cum->libcall_p)
6919 	return reg;
6920       else
6921 	{
6922 	  rtx v0, v1;
6923 
6924 	  if ((regno - SPARC_FP_ARG_FIRST) < SPARC_INT_ARG_MAX * 2)
6925 	    {
6926 	      int intreg;
6927 
6928 	      /* On incoming, we don't need to know that the value
6929 		 is passed in %f0 and %i0, and it confuses other parts
6930 		 causing needless spillage even on the simplest cases.  */
6931 	      if (incoming)
6932 		return reg;
6933 
6934 	      intreg = (SPARC_OUTGOING_INT_ARG_FIRST
6935 			+ (regno - SPARC_FP_ARG_FIRST) / 2);
6936 
6937 	      v0 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx);
6938 	      v1 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_REG (mode, intreg),
6939 				      const0_rtx);
6940 	      return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1));
6941 	    }
6942 	  else
6943 	    {
6944 	      v0 = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
6945 	      v1 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx);
6946 	      return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1));
6947 	    }
6948 	}
6949     }
6950 
6951   /* All other aggregate types are passed in an integer register in a mode
6952      corresponding to the size of the type.  */
6953   else if (type && AGGREGATE_TYPE_P (type))
6954     {
6955       HOST_WIDE_INT size = int_size_in_bytes (type);
6956       gcc_assert (size <= 16);
6957 
6958       mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
6959     }
6960 
6961   return gen_rtx_REG (mode, regno);
6962 }
6963 
6964 /* Handle the TARGET_FUNCTION_ARG target hook.  */
6965 
6966 static rtx
sparc_function_arg(cumulative_args_t cum,machine_mode mode,const_tree type,bool named)6967 sparc_function_arg (cumulative_args_t cum, machine_mode mode,
6968 		    const_tree type, bool named)
6969 {
6970   return sparc_function_arg_1 (cum, mode, type, named, false);
6971 }
6972 
6973 /* Handle the TARGET_FUNCTION_INCOMING_ARG target hook.  */
6974 
6975 static rtx
sparc_function_incoming_arg(cumulative_args_t cum,machine_mode mode,const_tree type,bool named)6976 sparc_function_incoming_arg (cumulative_args_t cum, machine_mode mode,
6977 			     const_tree type, bool named)
6978 {
6979   return sparc_function_arg_1 (cum, mode, type, named, true);
6980 }
6981 
6982 /* For sparc64, objects requiring 16 byte alignment are passed that way.  */
6983 
6984 static unsigned int
sparc_function_arg_boundary(machine_mode mode,const_tree type)6985 sparc_function_arg_boundary (machine_mode mode, const_tree type)
6986 {
6987   return ((TARGET_ARCH64
6988 	   && (GET_MODE_ALIGNMENT (mode) == 128
6989 	       || (type && TYPE_ALIGN (type) == 128)))
6990 	  ? 128
6991 	  : PARM_BOUNDARY);
6992 }
6993 
6994 /* For an arg passed partly in registers and partly in memory,
6995    this is the number of bytes of registers used.
6996    For args passed entirely in registers or entirely in memory, zero.
6997 
6998    Any arg that starts in the first 6 regs but won't entirely fit in them
6999    needs partial registers on v8.  On v9, structures with integer
7000    values in arg slots 5,6 will be passed in %o5 and SP+176, and complex fp
7001    values that begin in the last fp reg [where "last fp reg" varies with the
7002    mode] will be split between that reg and memory.  */
7003 
7004 static int
sparc_arg_partial_bytes(cumulative_args_t cum,machine_mode mode,tree type,bool named)7005 sparc_arg_partial_bytes (cumulative_args_t cum, machine_mode mode,
7006 			 tree type, bool named)
7007 {
7008   int slotno, regno, padding;
7009 
7010   /* We pass false for incoming here, it doesn't matter.  */
7011   slotno = function_arg_slotno (get_cumulative_args (cum), mode, type, named,
7012 				false, &regno, &padding);
7013 
7014   if (slotno == -1)
7015     return 0;
7016 
7017   if (TARGET_ARCH32)
7018     {
7019       if ((slotno + (mode == BLKmode
7020 		     ? CEIL_NWORDS (int_size_in_bytes (type))
7021 		     : CEIL_NWORDS (GET_MODE_SIZE (mode))))
7022 	  > SPARC_INT_ARG_MAX)
7023 	return (SPARC_INT_ARG_MAX - slotno) * UNITS_PER_WORD;
7024     }
7025   else
7026     {
7027       /* We are guaranteed by pass_by_reference that the size of the
7028 	 argument is not greater than 16 bytes, so we only need to return
7029 	 one word if the argument is partially passed in registers.  */
7030 
7031       if (type && AGGREGATE_TYPE_P (type))
7032 	{
7033 	  int size = int_size_in_bytes (type);
7034 
7035 	  if (size > UNITS_PER_WORD
7036 	      && (slotno == SPARC_INT_ARG_MAX - 1
7037 		  || slotno == SPARC_FP_ARG_MAX - 1))
7038 	    return UNITS_PER_WORD;
7039 	}
7040       else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_INT
7041 	       || (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
7042 		   && ! (TARGET_FPU && named)))
7043 	{
7044 	  /* The complex types are passed as packed types.  */
7045 	  if (GET_MODE_SIZE (mode) > UNITS_PER_WORD
7046 	      && slotno == SPARC_INT_ARG_MAX - 1)
7047 	    return UNITS_PER_WORD;
7048 	}
7049       else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
7050 	{
7051 	  if ((slotno + GET_MODE_SIZE (mode) / UNITS_PER_WORD)
7052 	      > SPARC_FP_ARG_MAX)
7053 	    return UNITS_PER_WORD;
7054 	}
7055     }
7056 
7057   return 0;
7058 }
7059 
7060 /* Handle the TARGET_PASS_BY_REFERENCE target hook.
7061    Specify whether to pass the argument by reference.  */
7062 
7063 static bool
sparc_pass_by_reference(cumulative_args_t cum ATTRIBUTE_UNUSED,machine_mode mode,const_tree type,bool named ATTRIBUTE_UNUSED)7064 sparc_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
7065 			 machine_mode mode, const_tree type,
7066 			 bool named ATTRIBUTE_UNUSED)
7067 {
7068   if (TARGET_ARCH32)
7069     /* Original SPARC 32-bit ABI says that structures and unions,
7070        and quad-precision floats are passed by reference.  For Pascal,
7071        also pass arrays by reference.  All other base types are passed
7072        in registers.
7073 
7074        Extended ABI (as implemented by the Sun compiler) says that all
7075        complex floats are passed by reference.  Pass complex integers
7076        in registers up to 8 bytes.  More generally, enforce the 2-word
7077        cap for passing arguments in registers.
7078 
7079        Vector ABI (as implemented by the Sun VIS SDK) says that vector
7080        integers are passed like floats of the same size, that is in
7081        registers up to 8 bytes.  Pass all vector floats by reference
7082        like structure and unions.  */
7083     return ((type && (AGGREGATE_TYPE_P (type) || VECTOR_FLOAT_TYPE_P (type)))
7084 	    || mode == SCmode
7085 	    /* Catch CDImode, TFmode, DCmode and TCmode.  */
7086 	    || GET_MODE_SIZE (mode) > 8
7087 	    || (type
7088 		&& TREE_CODE (type) == VECTOR_TYPE
7089 		&& (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8));
7090   else
7091     /* Original SPARC 64-bit ABI says that structures and unions
7092        smaller than 16 bytes are passed in registers, as well as
7093        all other base types.
7094 
7095        Extended ABI (as implemented by the Sun compiler) says that
7096        complex floats are passed in registers up to 16 bytes.  Pass
7097        all complex integers in registers up to 16 bytes.  More generally,
7098        enforce the 2-word cap for passing arguments in registers.
7099 
7100        Vector ABI (as implemented by the Sun VIS SDK) says that vector
7101        integers are passed like floats of the same size, that is in
7102        registers (up to 16 bytes).  Pass all vector floats like structure
7103        and unions.  */
7104     return ((type
7105 	     && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == VECTOR_TYPE)
7106 	     && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 16)
7107 	    /* Catch CTImode and TCmode.  */
7108 	    || GET_MODE_SIZE (mode) > 16);
7109 }
7110 
7111 /* Handle the TARGET_FUNCTION_ARG_ADVANCE hook.
7112    Update the data in CUM to advance over an argument
7113    of mode MODE and data type TYPE.
7114    TYPE is null for libcalls where that information may not be available.  */
7115 
7116 static void
sparc_function_arg_advance(cumulative_args_t cum_v,machine_mode mode,const_tree type,bool named)7117 sparc_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
7118 			    const_tree type, bool named)
7119 {
7120   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7121   int regno, padding;
7122 
7123   /* We pass false for incoming here, it doesn't matter.  */
7124   function_arg_slotno (cum, mode, type, named, false, &regno, &padding);
7125 
7126   /* If argument requires leading padding, add it.  */
7127   cum->words += padding;
7128 
7129   if (TARGET_ARCH32)
7130     cum->words += (mode == BLKmode
7131 		   ? CEIL_NWORDS (int_size_in_bytes (type))
7132 		   : CEIL_NWORDS (GET_MODE_SIZE (mode)));
7133   else
7134     {
7135       if (type && AGGREGATE_TYPE_P (type))
7136 	{
7137 	  int size = int_size_in_bytes (type);
7138 
7139 	  if (size <= 8)
7140 	    ++cum->words;
7141 	  else if (size <= 16)
7142 	    cum->words += 2;
7143 	  else /* passed by reference */
7144 	    ++cum->words;
7145 	}
7146       else
7147 	cum->words += (mode == BLKmode
7148 		       ? CEIL_NWORDS (int_size_in_bytes (type))
7149 		       : CEIL_NWORDS (GET_MODE_SIZE (mode)));
7150     }
7151 }
7152 
7153 /* Handle the FUNCTION_ARG_PADDING macro.
7154    For the 64 bit ABI structs are always stored left shifted in their
7155    argument slot.  */
7156 
7157 enum direction
function_arg_padding(machine_mode mode,const_tree type)7158 function_arg_padding (machine_mode mode, const_tree type)
7159 {
7160   if (TARGET_ARCH64 && type && AGGREGATE_TYPE_P (type))
7161     return upward;
7162 
7163   /* Fall back to the default.  */
7164   return DEFAULT_FUNCTION_ARG_PADDING (mode, type);
7165 }
7166 
7167 /* Handle the TARGET_RETURN_IN_MEMORY target hook.
7168    Specify whether to return the return value in memory.  */
7169 
7170 static bool
sparc_return_in_memory(const_tree type,const_tree fntype ATTRIBUTE_UNUSED)7171 sparc_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
7172 {
7173   if (TARGET_ARCH32)
7174     /* Original SPARC 32-bit ABI says that structures and unions,
7175        and quad-precision floats are returned in memory.  All other
7176        base types are returned in registers.
7177 
7178        Extended ABI (as implemented by the Sun compiler) says that
7179        all complex floats are returned in registers (8 FP registers
7180        at most for '_Complex long double').  Return all complex integers
7181        in registers (4 at most for '_Complex long long').
7182 
7183        Vector ABI (as implemented by the Sun VIS SDK) says that vector
7184        integers are returned like floats of the same size, that is in
7185        registers up to 8 bytes and in memory otherwise.  Return all
7186        vector floats in memory like structure and unions; note that
7187        they always have BLKmode like the latter.  */
7188     return (TYPE_MODE (type) == BLKmode
7189 	    || TYPE_MODE (type) == TFmode
7190 	    || (TREE_CODE (type) == VECTOR_TYPE
7191 		&& (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8));
7192   else
7193     /* Original SPARC 64-bit ABI says that structures and unions
7194        smaller than 32 bytes are returned in registers, as well as
7195        all other base types.
7196 
7197        Extended ABI (as implemented by the Sun compiler) says that all
7198        complex floats are returned in registers (8 FP registers at most
7199        for '_Complex long double').  Return all complex integers in
7200        registers (4 at most for '_Complex TItype').
7201 
7202        Vector ABI (as implemented by the Sun VIS SDK) says that vector
7203        integers are returned like floats of the same size, that is in
7204        registers.  Return all vector floats like structure and unions;
7205        note that they always have BLKmode like the latter.  */
7206     return (TYPE_MODE (type) == BLKmode
7207 	    && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 32);
7208 }
7209 
7210 /* Handle the TARGET_STRUCT_VALUE target hook.
7211    Return where to find the structure return value address.  */
7212 
7213 static rtx
sparc_struct_value_rtx(tree fndecl,int incoming)7214 sparc_struct_value_rtx (tree fndecl, int incoming)
7215 {
7216   if (TARGET_ARCH64)
7217     return 0;
7218   else
7219     {
7220       rtx mem;
7221 
7222       if (incoming)
7223 	mem = gen_frame_mem (Pmode, plus_constant (Pmode, frame_pointer_rtx,
7224 						   STRUCT_VALUE_OFFSET));
7225       else
7226 	mem = gen_frame_mem (Pmode, plus_constant (Pmode, stack_pointer_rtx,
7227 						   STRUCT_VALUE_OFFSET));
7228 
7229       /* Only follow the SPARC ABI for fixed-size structure returns.
7230          Variable size structure returns are handled per the normal
7231          procedures in GCC. This is enabled by -mstd-struct-return */
7232       if (incoming == 2
7233 	  && sparc_std_struct_return
7234 	  && TYPE_SIZE_UNIT (TREE_TYPE (fndecl))
7235 	  && TREE_CODE (TYPE_SIZE_UNIT (TREE_TYPE (fndecl))) == INTEGER_CST)
7236 	{
7237 	  /* We must check and adjust the return address, as it is optional
7238 	     as to whether the return object is really provided.  */
7239 	  rtx ret_reg = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
7240 	  rtx scratch = gen_reg_rtx (SImode);
7241 	  rtx_code_label *endlab = gen_label_rtx ();
7242 
7243 	  /* Calculate the return object size.  */
7244 	  tree size = TYPE_SIZE_UNIT (TREE_TYPE (fndecl));
7245 	  rtx size_rtx = GEN_INT (TREE_INT_CST_LOW (size) & 0xfff);
7246 	  /* Construct a temporary return value.  */
7247 	  rtx temp_val
7248 	    = assign_stack_local (Pmode, TREE_INT_CST_LOW (size), 0);
7249 
7250 	  /* Implement SPARC 32-bit psABI callee return struct checking:
7251 
7252 	     Fetch the instruction where we will return to and see if
7253 	     it's an unimp instruction (the most significant 10 bits
7254 	     will be zero).  */
7255 	  emit_move_insn (scratch, gen_rtx_MEM (SImode,
7256 						plus_constant (Pmode,
7257 							       ret_reg, 8)));
7258 	  /* Assume the size is valid and pre-adjust.  */
7259 	  emit_insn (gen_add3_insn (ret_reg, ret_reg, GEN_INT (4)));
7260 	  emit_cmp_and_jump_insns (scratch, size_rtx, EQ, const0_rtx, SImode,
7261 				   0, endlab);
7262 	  emit_insn (gen_sub3_insn (ret_reg, ret_reg, GEN_INT (4)));
7263 	  /* Write the address of the memory pointed to by temp_val into
7264 	     the memory pointed to by mem.  */
7265 	  emit_move_insn (mem, XEXP (temp_val, 0));
7266 	  emit_label (endlab);
7267 	}
7268 
7269       return mem;
7270     }
7271 }
7272 
7273 /* Handle TARGET_FUNCTION_VALUE, and TARGET_LIBCALL_VALUE target hook.
7274    For v9, function return values are subject to the same rules as arguments,
7275    except that up to 32 bytes may be returned in registers.  */
7276 
7277 static rtx
sparc_function_value_1(const_tree type,machine_mode mode,bool outgoing)7278 sparc_function_value_1 (const_tree type, machine_mode mode,
7279 			bool outgoing)
7280 {
7281   /* Beware that the two values are swapped here wrt function_arg.  */
7282   int regbase = (outgoing
7283 		 ? SPARC_INCOMING_INT_ARG_FIRST
7284 		 : SPARC_OUTGOING_INT_ARG_FIRST);
7285   enum mode_class mclass = GET_MODE_CLASS (mode);
7286   int regno;
7287 
7288   /* Vector types deserve special treatment because they are polymorphic wrt
7289      their mode, depending upon whether VIS instructions are enabled.  */
7290   if (type && TREE_CODE (type) == VECTOR_TYPE)
7291     {
7292       HOST_WIDE_INT size = int_size_in_bytes (type);
7293       gcc_assert ((TARGET_ARCH32 && size <= 8)
7294 		  || (TARGET_ARCH64 && size <= 32));
7295 
7296       if (mode == BLKmode)
7297 	return function_arg_vector_value (size, SPARC_FP_ARG_FIRST);
7298 
7299       mclass = MODE_FLOAT;
7300     }
7301 
7302   if (TARGET_ARCH64 && type)
7303     {
7304       /* Structures up to 32 bytes in size are returned in registers.  */
7305       if (TREE_CODE (type) == RECORD_TYPE)
7306 	{
7307 	  HOST_WIDE_INT size = int_size_in_bytes (type);
7308 	  gcc_assert (size <= 32);
7309 
7310 	  return function_arg_record_value (type, mode, 0, 1, regbase);
7311 	}
7312 
7313       /* Unions up to 32 bytes in size are returned in integer registers.  */
7314       else if (TREE_CODE (type) == UNION_TYPE)
7315 	{
7316 	  HOST_WIDE_INT size = int_size_in_bytes (type);
7317 	  gcc_assert (size <= 32);
7318 
7319 	  return function_arg_union_value (size, mode, 0, regbase);
7320 	}
7321 
7322       /* Objects that require it are returned in FP registers.  */
7323       else if (mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT)
7324 	;
7325 
7326       /* All other aggregate types are returned in an integer register in a
7327 	 mode corresponding to the size of the type.  */
7328       else if (AGGREGATE_TYPE_P (type))
7329 	{
7330 	  /* All other aggregate types are passed in an integer register
7331 	     in a mode corresponding to the size of the type.  */
7332 	  HOST_WIDE_INT size = int_size_in_bytes (type);
7333 	  gcc_assert (size <= 32);
7334 
7335 	  mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
7336 
7337 	  /* ??? We probably should have made the same ABI change in
7338 	     3.4.0 as the one we made for unions.   The latter was
7339 	     required by the SCD though, while the former is not
7340 	     specified, so we favored compatibility and efficiency.
7341 
7342 	     Now we're stuck for aggregates larger than 16 bytes,
7343 	     because OImode vanished in the meantime.  Let's not
7344 	     try to be unduly clever, and simply follow the ABI
7345 	     for unions in that case.  */
7346 	  if (mode == BLKmode)
7347 	    return function_arg_union_value (size, mode, 0, regbase);
7348 	  else
7349 	    mclass = MODE_INT;
7350 	}
7351 
7352       /* We should only have pointer and integer types at this point.  This
7353 	 must match sparc_promote_function_mode.  */
7354       else if (mclass == MODE_INT && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
7355 	mode = word_mode;
7356     }
7357 
7358   /* We should only have pointer and integer types at this point, except with
7359      -freg-struct-return.  This must match sparc_promote_function_mode.  */
7360   else if (TARGET_ARCH32
7361 	   && !(type && AGGREGATE_TYPE_P (type))
7362 	   && mclass == MODE_INT
7363 	   && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
7364     mode = word_mode;
7365 
7366   if ((mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT) && TARGET_FPU)
7367     regno = SPARC_FP_ARG_FIRST;
7368   else
7369     regno = regbase;
7370 
7371   return gen_rtx_REG (mode, regno);
7372 }
7373 
7374 /* Handle TARGET_FUNCTION_VALUE.
7375    On the SPARC, the value is found in the first "output" register, but the
7376    called function leaves it in the first "input" register.  */
7377 
7378 static rtx
sparc_function_value(const_tree valtype,const_tree fn_decl_or_type ATTRIBUTE_UNUSED,bool outgoing)7379 sparc_function_value (const_tree valtype,
7380 		      const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
7381 		      bool outgoing)
7382 {
7383   return sparc_function_value_1 (valtype, TYPE_MODE (valtype), outgoing);
7384 }
7385 
7386 /* Handle TARGET_LIBCALL_VALUE.  */
7387 
7388 static rtx
sparc_libcall_value(machine_mode mode,const_rtx fun ATTRIBUTE_UNUSED)7389 sparc_libcall_value (machine_mode mode,
7390 		     const_rtx fun ATTRIBUTE_UNUSED)
7391 {
7392   return sparc_function_value_1 (NULL_TREE, mode, false);
7393 }
7394 
7395 /* Handle FUNCTION_VALUE_REGNO_P.
7396    On the SPARC, the first "output" reg is used for integer values, and the
7397    first floating point register is used for floating point values.  */
7398 
7399 static bool
sparc_function_value_regno_p(const unsigned int regno)7400 sparc_function_value_regno_p (const unsigned int regno)
7401 {
7402   return (regno == 8 || (TARGET_FPU && regno == 32));
7403 }
7404 
7405 /* Do what is necessary for `va_start'.  We look at the current function
7406    to determine if stdarg or varargs is used and return the address of
7407    the first unnamed parameter.  */
7408 
7409 static rtx
sparc_builtin_saveregs(void)7410 sparc_builtin_saveregs (void)
7411 {
7412   int first_reg = crtl->args.info.words;
7413   rtx address;
7414   int regno;
7415 
7416   for (regno = first_reg; regno < SPARC_INT_ARG_MAX; regno++)
7417     emit_move_insn (gen_rtx_MEM (word_mode,
7418 				 gen_rtx_PLUS (Pmode,
7419 					       frame_pointer_rtx,
7420 					       GEN_INT (FIRST_PARM_OFFSET (0)
7421 							+ (UNITS_PER_WORD
7422 							   * regno)))),
7423 		    gen_rtx_REG (word_mode,
7424 				 SPARC_INCOMING_INT_ARG_FIRST + regno));
7425 
7426   address = gen_rtx_PLUS (Pmode,
7427 			  frame_pointer_rtx,
7428 			  GEN_INT (FIRST_PARM_OFFSET (0)
7429 				   + UNITS_PER_WORD * first_reg));
7430 
7431   return address;
7432 }
7433 
7434 /* Implement `va_start' for stdarg.  */
7435 
7436 static void
sparc_va_start(tree valist,rtx nextarg)7437 sparc_va_start (tree valist, rtx nextarg)
7438 {
7439   nextarg = expand_builtin_saveregs ();
7440   std_expand_builtin_va_start (valist, nextarg);
7441 }
7442 
7443 /* Implement `va_arg' for stdarg.  */
7444 
7445 static tree
sparc_gimplify_va_arg(tree valist,tree type,gimple_seq * pre_p,gimple_seq * post_p)7446 sparc_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
7447 		       gimple_seq *post_p)
7448 {
7449   HOST_WIDE_INT size, rsize, align;
7450   tree addr, incr;
7451   bool indirect;
7452   tree ptrtype = build_pointer_type (type);
7453 
7454   if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
7455     {
7456       indirect = true;
7457       size = rsize = UNITS_PER_WORD;
7458       align = 0;
7459     }
7460   else
7461     {
7462       indirect = false;
7463       size = int_size_in_bytes (type);
7464       rsize = ROUND_UP (size, UNITS_PER_WORD);
7465       align = 0;
7466 
7467       if (TARGET_ARCH64)
7468 	{
7469 	  /* For SPARC64, objects requiring 16-byte alignment get it.  */
7470 	  if (TYPE_ALIGN (type) >= 2 * (unsigned) BITS_PER_WORD)
7471 	    align = 2 * UNITS_PER_WORD;
7472 
7473 	  /* SPARC-V9 ABI states that structures up to 16 bytes in size
7474 	     are left-justified in their slots.  */
7475 	  if (AGGREGATE_TYPE_P (type))
7476 	    {
7477 	      if (size == 0)
7478 		size = rsize = UNITS_PER_WORD;
7479 	      else
7480 		size = rsize;
7481 	    }
7482 	}
7483     }
7484 
7485   incr = valist;
7486   if (align)
7487     {
7488       incr = fold_build_pointer_plus_hwi (incr, align - 1);
7489       incr = fold_convert (sizetype, incr);
7490       incr = fold_build2 (BIT_AND_EXPR, sizetype, incr,
7491 			  size_int (-align));
7492       incr = fold_convert (ptr_type_node, incr);
7493     }
7494 
7495   gimplify_expr (&incr, pre_p, post_p, is_gimple_val, fb_rvalue);
7496   addr = incr;
7497 
7498   if (BYTES_BIG_ENDIAN && size < rsize)
7499     addr = fold_build_pointer_plus_hwi (incr, rsize - size);
7500 
7501   if (indirect)
7502     {
7503       addr = fold_convert (build_pointer_type (ptrtype), addr);
7504       addr = build_va_arg_indirect_ref (addr);
7505     }
7506 
7507   /* If the address isn't aligned properly for the type, we need a temporary.
7508      FIXME: This is inefficient, usually we can do this in registers.  */
7509   else if (align == 0 && TYPE_ALIGN (type) > BITS_PER_WORD)
7510     {
7511       tree tmp = create_tmp_var (type, "va_arg_tmp");
7512       tree dest_addr = build_fold_addr_expr (tmp);
7513       tree copy = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
7514 				   3, dest_addr, addr, size_int (rsize));
7515       TREE_ADDRESSABLE (tmp) = 1;
7516       gimplify_and_add (copy, pre_p);
7517       addr = dest_addr;
7518     }
7519 
7520   else
7521     addr = fold_convert (ptrtype, addr);
7522 
7523   incr = fold_build_pointer_plus_hwi (incr, rsize);
7524   gimplify_assign (valist, incr, post_p);
7525 
7526   return build_va_arg_indirect_ref (addr);
7527 }
7528 
7529 /* Implement the TARGET_VECTOR_MODE_SUPPORTED_P target hook.
7530    Specify whether the vector mode is supported by the hardware.  */
7531 
7532 static bool
sparc_vector_mode_supported_p(machine_mode mode)7533 sparc_vector_mode_supported_p (machine_mode mode)
7534 {
7535   return TARGET_VIS && VECTOR_MODE_P (mode) ? true : false;
7536 }
7537 
7538 /* Implement the TARGET_VECTORIZE_PREFERRED_SIMD_MODE target hook.  */
7539 
7540 static machine_mode
sparc_preferred_simd_mode(machine_mode mode)7541 sparc_preferred_simd_mode (machine_mode mode)
7542 {
7543   if (TARGET_VIS)
7544     switch (mode)
7545       {
7546       case SImode:
7547 	return V2SImode;
7548       case HImode:
7549 	return V4HImode;
7550       case QImode:
7551 	return V8QImode;
7552 
7553       default:;
7554       }
7555 
7556   return word_mode;
7557 }
7558 
7559 /* Return the string to output an unconditional branch to LABEL, which is
7560    the operand number of the label.
7561 
7562    DEST is the destination insn (i.e. the label), INSN is the source.  */
7563 
7564 const char *
output_ubranch(rtx dest,rtx_insn * insn)7565 output_ubranch (rtx dest, rtx_insn *insn)
7566 {
7567   static char string[64];
7568   bool v9_form = false;
7569   int delta;
7570   char *p;
7571 
7572   /* Even if we are trying to use cbcond for this, evaluate
7573      whether we can use V9 branches as our backup plan.  */
7574 
7575   delta = 5000000;
7576   if (INSN_ADDRESSES_SET_P ())
7577     delta = (INSN_ADDRESSES (INSN_UID (dest))
7578 	     - INSN_ADDRESSES (INSN_UID (insn)));
7579 
7580   /* Leave some instructions for "slop".  */
7581   if (TARGET_V9 && delta >= -260000 && delta < 260000)
7582     v9_form = true;
7583 
7584   if (TARGET_CBCOND)
7585     {
7586       bool emit_nop = emit_cbcond_nop (insn);
7587       bool far = false;
7588       const char *rval;
7589 
7590       if (delta < -500 || delta > 500)
7591 	far = true;
7592 
7593       if (far)
7594 	{
7595 	  if (v9_form)
7596 	    rval = "ba,a,pt\t%%xcc, %l0";
7597 	  else
7598 	    rval = "b,a\t%l0";
7599 	}
7600       else
7601 	{
7602 	  if (emit_nop)
7603 	    rval = "cwbe\t%%g0, %%g0, %l0\n\tnop";
7604 	  else
7605 	    rval = "cwbe\t%%g0, %%g0, %l0";
7606 	}
7607       return rval;
7608     }
7609 
7610   if (v9_form)
7611     strcpy (string, "ba%*,pt\t%%xcc, ");
7612   else
7613     strcpy (string, "b%*\t");
7614 
7615   p = strchr (string, '\0');
7616   *p++ = '%';
7617   *p++ = 'l';
7618   *p++ = '0';
7619   *p++ = '%';
7620   *p++ = '(';
7621   *p = '\0';
7622 
7623   return string;
7624 }
7625 
7626 /* Return the string to output a conditional branch to LABEL, which is
7627    the operand number of the label.  OP is the conditional expression.
7628    XEXP (OP, 0) is assumed to be a condition code register (integer or
7629    floating point) and its mode specifies what kind of comparison we made.
7630 
7631    DEST is the destination insn (i.e. the label), INSN is the source.
7632 
7633    REVERSED is nonzero if we should reverse the sense of the comparison.
7634 
7635    ANNUL is nonzero if we should generate an annulling branch.  */
7636 
7637 const char *
output_cbranch(rtx op,rtx dest,int label,int reversed,int annul,rtx_insn * insn)7638 output_cbranch (rtx op, rtx dest, int label, int reversed, int annul,
7639 		rtx_insn *insn)
7640 {
7641   static char string[64];
7642   enum rtx_code code = GET_CODE (op);
7643   rtx cc_reg = XEXP (op, 0);
7644   machine_mode mode = GET_MODE (cc_reg);
7645   const char *labelno, *branch;
7646   int spaces = 8, far;
7647   char *p;
7648 
7649   /* v9 branches are limited to +-1MB.  If it is too far away,
7650      change
7651 
7652      bne,pt %xcc, .LC30
7653 
7654      to
7655 
7656      be,pn %xcc, .+12
7657       nop
7658      ba .LC30
7659 
7660      and
7661 
7662      fbne,a,pn %fcc2, .LC29
7663 
7664      to
7665 
7666      fbe,pt %fcc2, .+16
7667       nop
7668      ba .LC29  */
7669 
7670   far = TARGET_V9 && (get_attr_length (insn) >= 3);
7671   if (reversed ^ far)
7672     {
7673       /* Reversal of FP compares takes care -- an ordered compare
7674 	 becomes an unordered compare and vice versa.  */
7675       if (mode == CCFPmode || mode == CCFPEmode)
7676 	code = reverse_condition_maybe_unordered (code);
7677       else
7678 	code = reverse_condition (code);
7679     }
7680 
7681   /* Start by writing the branch condition.  */
7682   if (mode == CCFPmode || mode == CCFPEmode)
7683     {
7684       switch (code)
7685 	{
7686 	case NE:
7687 	  branch = "fbne";
7688 	  break;
7689 	case EQ:
7690 	  branch = "fbe";
7691 	  break;
7692 	case GE:
7693 	  branch = "fbge";
7694 	  break;
7695 	case GT:
7696 	  branch = "fbg";
7697 	  break;
7698 	case LE:
7699 	  branch = "fble";
7700 	  break;
7701 	case LT:
7702 	  branch = "fbl";
7703 	  break;
7704 	case UNORDERED:
7705 	  branch = "fbu";
7706 	  break;
7707 	case ORDERED:
7708 	  branch = "fbo";
7709 	  break;
7710 	case UNGT:
7711 	  branch = "fbug";
7712 	  break;
7713 	case UNLT:
7714 	  branch = "fbul";
7715 	  break;
7716 	case UNEQ:
7717 	  branch = "fbue";
7718 	  break;
7719 	case UNGE:
7720 	  branch = "fbuge";
7721 	  break;
7722 	case UNLE:
7723 	  branch = "fbule";
7724 	  break;
7725 	case LTGT:
7726 	  branch = "fblg";
7727 	  break;
7728 
7729 	default:
7730 	  gcc_unreachable ();
7731 	}
7732 
7733       /* ??? !v9: FP branches cannot be preceded by another floating point
7734 	 insn.  Because there is currently no concept of pre-delay slots,
7735 	 we can fix this only by always emitting a nop before a floating
7736 	 point branch.  */
7737 
7738       string[0] = '\0';
7739       if (! TARGET_V9)
7740 	strcpy (string, "nop\n\t");
7741       strcat (string, branch);
7742     }
7743   else
7744     {
7745       switch (code)
7746 	{
7747 	case NE:
7748 	  branch = "bne";
7749 	  break;
7750 	case EQ:
7751 	  branch = "be";
7752 	  break;
7753 	case GE:
7754 	  if (mode == CC_NOOVmode || mode == CCX_NOOVmode)
7755 	    branch = "bpos";
7756 	  else
7757 	    branch = "bge";
7758 	  break;
7759 	case GT:
7760 	  branch = "bg";
7761 	  break;
7762 	case LE:
7763 	  branch = "ble";
7764 	  break;
7765 	case LT:
7766 	  if (mode == CC_NOOVmode || mode == CCX_NOOVmode)
7767 	    branch = "bneg";
7768 	  else
7769 	    branch = "bl";
7770 	  break;
7771 	case GEU:
7772 	  branch = "bgeu";
7773 	  break;
7774 	case GTU:
7775 	  branch = "bgu";
7776 	  break;
7777 	case LEU:
7778 	  branch = "bleu";
7779 	  break;
7780 	case LTU:
7781 	  branch = "blu";
7782 	  break;
7783 
7784 	default:
7785 	  gcc_unreachable ();
7786 	}
7787       strcpy (string, branch);
7788     }
7789   spaces -= strlen (branch);
7790   p = strchr (string, '\0');
7791 
7792   /* Now add the annulling, the label, and a possible noop.  */
7793   if (annul && ! far)
7794     {
7795       strcpy (p, ",a");
7796       p += 2;
7797       spaces -= 2;
7798     }
7799 
7800   if (TARGET_V9)
7801     {
7802       rtx note;
7803       int v8 = 0;
7804 
7805       if (! far && insn && INSN_ADDRESSES_SET_P ())
7806 	{
7807 	  int delta = (INSN_ADDRESSES (INSN_UID (dest))
7808 		       - INSN_ADDRESSES (INSN_UID (insn)));
7809 	  /* Leave some instructions for "slop".  */
7810 	  if (delta < -260000 || delta >= 260000)
7811 	    v8 = 1;
7812 	}
7813 
7814       if (mode == CCFPmode || mode == CCFPEmode)
7815 	{
7816 	  static char v9_fcc_labelno[] = "%%fccX, ";
7817 	  /* Set the char indicating the number of the fcc reg to use.  */
7818 	  v9_fcc_labelno[5] = REGNO (cc_reg) - SPARC_FIRST_V9_FCC_REG + '0';
7819 	  labelno = v9_fcc_labelno;
7820 	  if (v8)
7821 	    {
7822 	      gcc_assert (REGNO (cc_reg) == SPARC_FCC_REG);
7823 	      labelno = "";
7824 	    }
7825 	}
7826       else if (mode == CCXmode || mode == CCX_NOOVmode)
7827 	{
7828 	  labelno = "%%xcc, ";
7829 	  gcc_assert (! v8);
7830 	}
7831       else
7832 	{
7833 	  labelno = "%%icc, ";
7834 	  if (v8)
7835 	    labelno = "";
7836 	}
7837 
7838       if (*labelno && insn && (note = find_reg_note (insn, REG_BR_PROB, NULL_RTX)))
7839 	{
7840 	  strcpy (p,
7841 		  ((XINT (note, 0) >= REG_BR_PROB_BASE / 2) ^ far)
7842 		  ? ",pt" : ",pn");
7843 	  p += 3;
7844 	  spaces -= 3;
7845 	}
7846     }
7847   else
7848     labelno = "";
7849 
7850   if (spaces > 0)
7851     *p++ = '\t';
7852   else
7853     *p++ = ' ';
7854   strcpy (p, labelno);
7855   p = strchr (p, '\0');
7856   if (far)
7857     {
7858       strcpy (p, ".+12\n\t nop\n\tb\t");
7859       /* Skip the next insn if requested or
7860 	 if we know that it will be a nop.  */
7861       if (annul || ! final_sequence)
7862         p[3] = '6';
7863       p += 14;
7864     }
7865   *p++ = '%';
7866   *p++ = 'l';
7867   *p++ = label + '0';
7868   *p++ = '%';
7869   *p++ = '#';
7870   *p = '\0';
7871 
7872   return string;
7873 }
7874 
7875 /* Emit a library call comparison between floating point X and Y.
7876    COMPARISON is the operator to compare with (EQ, NE, GT, etc).
7877    Return the new operator to be used in the comparison sequence.
7878 
7879    TARGET_ARCH64 uses _Qp_* functions, which use pointers to TFmode
7880    values as arguments instead of the TFmode registers themselves,
7881    that's why we cannot call emit_float_lib_cmp.  */
7882 
7883 rtx
sparc_emit_float_lib_cmp(rtx x,rtx y,enum rtx_code comparison)7884 sparc_emit_float_lib_cmp (rtx x, rtx y, enum rtx_code comparison)
7885 {
7886   const char *qpfunc;
7887   rtx slot0, slot1, result, tem, tem2, libfunc;
7888   machine_mode mode;
7889   enum rtx_code new_comparison;
7890 
7891   switch (comparison)
7892     {
7893     case EQ:
7894       qpfunc = (TARGET_ARCH64 ? "_Qp_feq" : "_Q_feq");
7895       break;
7896 
7897     case NE:
7898       qpfunc = (TARGET_ARCH64 ? "_Qp_fne" : "_Q_fne");
7899       break;
7900 
7901     case GT:
7902       qpfunc = (TARGET_ARCH64 ? "_Qp_fgt" : "_Q_fgt");
7903       break;
7904 
7905     case GE:
7906       qpfunc = (TARGET_ARCH64 ? "_Qp_fge" : "_Q_fge");
7907       break;
7908 
7909     case LT:
7910       qpfunc = (TARGET_ARCH64 ? "_Qp_flt" : "_Q_flt");
7911       break;
7912 
7913     case LE:
7914       qpfunc = (TARGET_ARCH64 ? "_Qp_fle" : "_Q_fle");
7915       break;
7916 
7917     case ORDERED:
7918     case UNORDERED:
7919     case UNGT:
7920     case UNLT:
7921     case UNEQ:
7922     case UNGE:
7923     case UNLE:
7924     case LTGT:
7925       qpfunc = (TARGET_ARCH64 ? "_Qp_cmp" : "_Q_cmp");
7926       break;
7927 
7928     default:
7929       gcc_unreachable ();
7930     }
7931 
7932   if (TARGET_ARCH64)
7933     {
7934       if (MEM_P (x))
7935 	{
7936 	  tree expr = MEM_EXPR (x);
7937 	  if (expr)
7938 	    mark_addressable (expr);
7939 	  slot0 = x;
7940 	}
7941       else
7942 	{
7943 	  slot0 = assign_stack_temp (TFmode, GET_MODE_SIZE(TFmode));
7944 	  emit_move_insn (slot0, x);
7945 	}
7946 
7947       if (MEM_P (y))
7948 	{
7949 	  tree expr = MEM_EXPR (y);
7950 	  if (expr)
7951 	    mark_addressable (expr);
7952 	  slot1 = y;
7953 	}
7954       else
7955 	{
7956 	  slot1 = assign_stack_temp (TFmode, GET_MODE_SIZE(TFmode));
7957 	  emit_move_insn (slot1, y);
7958 	}
7959 
7960       libfunc = gen_rtx_SYMBOL_REF (Pmode, qpfunc);
7961       emit_library_call (libfunc, LCT_NORMAL,
7962 			 DImode, 2,
7963 			 XEXP (slot0, 0), Pmode,
7964 			 XEXP (slot1, 0), Pmode);
7965       mode = DImode;
7966     }
7967   else
7968     {
7969       libfunc = gen_rtx_SYMBOL_REF (Pmode, qpfunc);
7970       emit_library_call (libfunc, LCT_NORMAL,
7971 			 SImode, 2,
7972 			 x, TFmode, y, TFmode);
7973       mode = SImode;
7974     }
7975 
7976 
7977   /* Immediately move the result of the libcall into a pseudo
7978      register so reload doesn't clobber the value if it needs
7979      the return register for a spill reg.  */
7980   result = gen_reg_rtx (mode);
7981   emit_move_insn (result, hard_libcall_value (mode, libfunc));
7982 
7983   switch (comparison)
7984     {
7985     default:
7986       return gen_rtx_NE (VOIDmode, result, const0_rtx);
7987     case ORDERED:
7988     case UNORDERED:
7989       new_comparison = (comparison == UNORDERED ? EQ : NE);
7990       return gen_rtx_fmt_ee (new_comparison, VOIDmode, result, GEN_INT(3));
7991     case UNGT:
7992     case UNGE:
7993       new_comparison = (comparison == UNGT ? GT : NE);
7994       return gen_rtx_fmt_ee (new_comparison, VOIDmode, result, const1_rtx);
7995     case UNLE:
7996       return gen_rtx_NE (VOIDmode, result, const2_rtx);
7997     case UNLT:
7998       tem = gen_reg_rtx (mode);
7999       if (TARGET_ARCH32)
8000 	emit_insn (gen_andsi3 (tem, result, const1_rtx));
8001       else
8002 	emit_insn (gen_anddi3 (tem, result, const1_rtx));
8003       return gen_rtx_NE (VOIDmode, tem, const0_rtx);
8004     case UNEQ:
8005     case LTGT:
8006       tem = gen_reg_rtx (mode);
8007       if (TARGET_ARCH32)
8008 	emit_insn (gen_addsi3 (tem, result, const1_rtx));
8009       else
8010 	emit_insn (gen_adddi3 (tem, result, const1_rtx));
8011       tem2 = gen_reg_rtx (mode);
8012       if (TARGET_ARCH32)
8013 	emit_insn (gen_andsi3 (tem2, tem, const2_rtx));
8014       else
8015 	emit_insn (gen_anddi3 (tem2, tem, const2_rtx));
8016       new_comparison = (comparison == UNEQ ? EQ : NE);
8017       return gen_rtx_fmt_ee (new_comparison, VOIDmode, tem2, const0_rtx);
8018     }
8019 
8020   gcc_unreachable ();
8021 }
8022 
8023 /* Generate an unsigned DImode to FP conversion.  This is the same code
8024    optabs would emit if we didn't have TFmode patterns.  */
8025 
8026 void
sparc_emit_floatunsdi(rtx * operands,machine_mode mode)8027 sparc_emit_floatunsdi (rtx *operands, machine_mode mode)
8028 {
8029   rtx i0, i1, f0, in, out;
8030 
8031   out = operands[0];
8032   in = force_reg (DImode, operands[1]);
8033   rtx_code_label *neglab = gen_label_rtx ();
8034   rtx_code_label *donelab = gen_label_rtx ();
8035   i0 = gen_reg_rtx (DImode);
8036   i1 = gen_reg_rtx (DImode);
8037   f0 = gen_reg_rtx (mode);
8038 
8039   emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, DImode, 0, neglab);
8040 
8041   emit_insn (gen_rtx_SET (out, gen_rtx_FLOAT (mode, in)));
8042   emit_jump_insn (gen_jump (donelab));
8043   emit_barrier ();
8044 
8045   emit_label (neglab);
8046 
8047   emit_insn (gen_lshrdi3 (i0, in, const1_rtx));
8048   emit_insn (gen_anddi3 (i1, in, const1_rtx));
8049   emit_insn (gen_iordi3 (i0, i0, i1));
8050   emit_insn (gen_rtx_SET (f0, gen_rtx_FLOAT (mode, i0)));
8051   emit_insn (gen_rtx_SET (out, gen_rtx_PLUS (mode, f0, f0)));
8052 
8053   emit_label (donelab);
8054 }
8055 
8056 /* Generate an FP to unsigned DImode conversion.  This is the same code
8057    optabs would emit if we didn't have TFmode patterns.  */
8058 
8059 void
sparc_emit_fixunsdi(rtx * operands,machine_mode mode)8060 sparc_emit_fixunsdi (rtx *operands, machine_mode mode)
8061 {
8062   rtx i0, i1, f0, in, out, limit;
8063 
8064   out = operands[0];
8065   in = force_reg (mode, operands[1]);
8066   rtx_code_label *neglab = gen_label_rtx ();
8067   rtx_code_label *donelab = gen_label_rtx ();
8068   i0 = gen_reg_rtx (DImode);
8069   i1 = gen_reg_rtx (DImode);
8070   limit = gen_reg_rtx (mode);
8071   f0 = gen_reg_rtx (mode);
8072 
8073   emit_move_insn (limit,
8074 		  const_double_from_real_value (
8075 		    REAL_VALUE_ATOF ("9223372036854775808.0", mode), mode));
8076   emit_cmp_and_jump_insns (in, limit, GE, NULL_RTX, mode, 0, neglab);
8077 
8078   emit_insn (gen_rtx_SET (out,
8079 			  gen_rtx_FIX (DImode, gen_rtx_FIX (mode, in))));
8080   emit_jump_insn (gen_jump (donelab));
8081   emit_barrier ();
8082 
8083   emit_label (neglab);
8084 
8085   emit_insn (gen_rtx_SET (f0, gen_rtx_MINUS (mode, in, limit)));
8086   emit_insn (gen_rtx_SET (i0,
8087 			  gen_rtx_FIX (DImode, gen_rtx_FIX (mode, f0))));
8088   emit_insn (gen_movdi (i1, const1_rtx));
8089   emit_insn (gen_ashldi3 (i1, i1, GEN_INT (63)));
8090   emit_insn (gen_xordi3 (out, i0, i1));
8091 
8092   emit_label (donelab);
8093 }
8094 
8095 /* Return the string to output a compare and branch instruction to DEST.
8096    DEST is the destination insn (i.e. the label), INSN is the source,
8097    and OP is the conditional expression.  */
8098 
8099 const char *
output_cbcond(rtx op,rtx dest,rtx_insn * insn)8100 output_cbcond (rtx op, rtx dest, rtx_insn *insn)
8101 {
8102   machine_mode mode = GET_MODE (XEXP (op, 0));
8103   enum rtx_code code = GET_CODE (op);
8104   const char *cond_str, *tmpl;
8105   int far, emit_nop, len;
8106   static char string[64];
8107   char size_char;
8108 
8109   /* Compare and Branch is limited to +-2KB.  If it is too far away,
8110      change
8111 
8112      cxbne X, Y, .LC30
8113 
8114      to
8115 
8116      cxbe X, Y, .+16
8117      nop
8118      ba,pt xcc, .LC30
8119       nop  */
8120 
8121   len = get_attr_length (insn);
8122 
8123   far = len == 4;
8124   emit_nop = len == 2;
8125 
8126   if (far)
8127     code = reverse_condition (code);
8128 
8129   size_char = ((mode == SImode) ? 'w' : 'x');
8130 
8131   switch (code)
8132     {
8133     case NE:
8134       cond_str = "ne";
8135       break;
8136 
8137     case EQ:
8138       cond_str = "e";
8139       break;
8140 
8141     case GE:
8142       if (mode == CC_NOOVmode || mode == CCX_NOOVmode)
8143 	cond_str = "pos";
8144       else
8145 	cond_str = "ge";
8146       break;
8147 
8148     case GT:
8149       cond_str = "g";
8150       break;
8151 
8152     case LE:
8153       cond_str = "le";
8154       break;
8155 
8156     case LT:
8157       if (mode == CC_NOOVmode || mode == CCX_NOOVmode)
8158 	cond_str = "neg";
8159       else
8160 	cond_str = "l";
8161       break;
8162 
8163     case GEU:
8164       cond_str = "cc";
8165       break;
8166 
8167     case GTU:
8168       cond_str = "gu";
8169       break;
8170 
8171     case LEU:
8172       cond_str = "leu";
8173       break;
8174 
8175     case LTU:
8176       cond_str = "cs";
8177       break;
8178 
8179     default:
8180       gcc_unreachable ();
8181     }
8182 
8183   if (far)
8184     {
8185       int veryfar = 1, delta;
8186 
8187       if (INSN_ADDRESSES_SET_P ())
8188 	{
8189 	  delta = (INSN_ADDRESSES (INSN_UID (dest))
8190 		   - INSN_ADDRESSES (INSN_UID (insn)));
8191 	  /* Leave some instructions for "slop".  */
8192 	  if (delta >= -260000 && delta < 260000)
8193 	    veryfar = 0;
8194 	}
8195 
8196       if (veryfar)
8197 	tmpl = "c%cb%s\t%%1, %%2, .+16\n\tnop\n\tb\t%%3\n\tnop";
8198       else
8199 	tmpl = "c%cb%s\t%%1, %%2, .+16\n\tnop\n\tba,pt\t%%%%xcc, %%3\n\tnop";
8200     }
8201   else
8202     {
8203       if (emit_nop)
8204 	tmpl = "c%cb%s\t%%1, %%2, %%3\n\tnop";
8205       else
8206 	tmpl = "c%cb%s\t%%1, %%2, %%3";
8207     }
8208 
8209   snprintf (string, sizeof(string), tmpl, size_char, cond_str);
8210 
8211   return string;
8212 }
8213 
8214 /* Return the string to output a conditional branch to LABEL, testing
8215    register REG.  LABEL is the operand number of the label; REG is the
8216    operand number of the reg.  OP is the conditional expression.  The mode
8217    of REG says what kind of comparison we made.
8218 
8219    DEST is the destination insn (i.e. the label), INSN is the source.
8220 
8221    REVERSED is nonzero if we should reverse the sense of the comparison.
8222 
8223    ANNUL is nonzero if we should generate an annulling branch.  */
8224 
8225 const char *
output_v9branch(rtx op,rtx dest,int reg,int label,int reversed,int annul,rtx_insn * insn)8226 output_v9branch (rtx op, rtx dest, int reg, int label, int reversed,
8227 		 int annul, rtx_insn *insn)
8228 {
8229   static char string[64];
8230   enum rtx_code code = GET_CODE (op);
8231   machine_mode mode = GET_MODE (XEXP (op, 0));
8232   rtx note;
8233   int far;
8234   char *p;
8235 
8236   /* branch on register are limited to +-128KB.  If it is too far away,
8237      change
8238 
8239      brnz,pt %g1, .LC30
8240 
8241      to
8242 
8243      brz,pn %g1, .+12
8244       nop
8245      ba,pt %xcc, .LC30
8246 
8247      and
8248 
8249      brgez,a,pn %o1, .LC29
8250 
8251      to
8252 
8253      brlz,pt %o1, .+16
8254       nop
8255      ba,pt %xcc, .LC29  */
8256 
8257   far = get_attr_length (insn) >= 3;
8258 
8259   /* If not floating-point or if EQ or NE, we can just reverse the code.  */
8260   if (reversed ^ far)
8261     code = reverse_condition (code);
8262 
8263   /* Only 64 bit versions of these instructions exist.  */
8264   gcc_assert (mode == DImode);
8265 
8266   /* Start by writing the branch condition.  */
8267 
8268   switch (code)
8269     {
8270     case NE:
8271       strcpy (string, "brnz");
8272       break;
8273 
8274     case EQ:
8275       strcpy (string, "brz");
8276       break;
8277 
8278     case GE:
8279       strcpy (string, "brgez");
8280       break;
8281 
8282     case LT:
8283       strcpy (string, "brlz");
8284       break;
8285 
8286     case LE:
8287       strcpy (string, "brlez");
8288       break;
8289 
8290     case GT:
8291       strcpy (string, "brgz");
8292       break;
8293 
8294     default:
8295       gcc_unreachable ();
8296     }
8297 
8298   p = strchr (string, '\0');
8299 
8300   /* Now add the annulling, reg, label, and nop.  */
8301   if (annul && ! far)
8302     {
8303       strcpy (p, ",a");
8304       p += 2;
8305     }
8306 
8307   if (insn && (note = find_reg_note (insn, REG_BR_PROB, NULL_RTX)))
8308     {
8309       strcpy (p,
8310 	      ((XINT (note, 0) >= REG_BR_PROB_BASE / 2) ^ far)
8311 	      ? ",pt" : ",pn");
8312       p += 3;
8313     }
8314 
8315   *p = p < string + 8 ? '\t' : ' ';
8316   p++;
8317   *p++ = '%';
8318   *p++ = '0' + reg;
8319   *p++ = ',';
8320   *p++ = ' ';
8321   if (far)
8322     {
8323       int veryfar = 1, delta;
8324 
8325       if (INSN_ADDRESSES_SET_P ())
8326 	{
8327 	  delta = (INSN_ADDRESSES (INSN_UID (dest))
8328 		   - INSN_ADDRESSES (INSN_UID (insn)));
8329 	  /* Leave some instructions for "slop".  */
8330 	  if (delta >= -260000 && delta < 260000)
8331 	    veryfar = 0;
8332 	}
8333 
8334       strcpy (p, ".+12\n\t nop\n\t");
8335       /* Skip the next insn if requested or
8336 	 if we know that it will be a nop.  */
8337       if (annul || ! final_sequence)
8338         p[3] = '6';
8339       p += 12;
8340       if (veryfar)
8341 	{
8342 	  strcpy (p, "b\t");
8343 	  p += 2;
8344 	}
8345       else
8346 	{
8347 	  strcpy (p, "ba,pt\t%%xcc, ");
8348 	  p += 13;
8349 	}
8350     }
8351   *p++ = '%';
8352   *p++ = 'l';
8353   *p++ = '0' + label;
8354   *p++ = '%';
8355   *p++ = '#';
8356   *p = '\0';
8357 
8358   return string;
8359 }
8360 
8361 /* Return 1, if any of the registers of the instruction are %l[0-7] or %o[0-7].
8362    Such instructions cannot be used in the delay slot of return insn on v9.
8363    If TEST is 0, also rename all %i[0-7] registers to their %o[0-7] counterparts.
8364  */
8365 
8366 static int
epilogue_renumber(register rtx * where,int test)8367 epilogue_renumber (register rtx *where, int test)
8368 {
8369   register const char *fmt;
8370   register int i;
8371   register enum rtx_code code;
8372 
8373   if (*where == 0)
8374     return 0;
8375 
8376   code = GET_CODE (*where);
8377 
8378   switch (code)
8379     {
8380     case REG:
8381       if (REGNO (*where) >= 8 && REGNO (*where) < 24)      /* oX or lX */
8382 	return 1;
8383       if (! test && REGNO (*where) >= 24 && REGNO (*where) < 32)
8384 	*where = gen_rtx_REG (GET_MODE (*where), OUTGOING_REGNO (REGNO(*where)));
8385     case SCRATCH:
8386     case CC0:
8387     case PC:
8388     case CONST_INT:
8389     case CONST_WIDE_INT:
8390     case CONST_DOUBLE:
8391       return 0;
8392 
8393       /* Do not replace the frame pointer with the stack pointer because
8394 	 it can cause the delayed instruction to load below the stack.
8395 	 This occurs when instructions like:
8396 
8397 	 (set (reg/i:SI 24 %i0)
8398 	     (mem/f:SI (plus:SI (reg/f:SI 30 %fp)
8399                        (const_int -20 [0xffffffec])) 0))
8400 
8401 	 are in the return delayed slot.  */
8402     case PLUS:
8403       if (GET_CODE (XEXP (*where, 0)) == REG
8404 	  && REGNO (XEXP (*where, 0)) == HARD_FRAME_POINTER_REGNUM
8405 	  && (GET_CODE (XEXP (*where, 1)) != CONST_INT
8406 	      || INTVAL (XEXP (*where, 1)) < SPARC_STACK_BIAS))
8407 	return 1;
8408       break;
8409 
8410     case MEM:
8411       if (SPARC_STACK_BIAS
8412 	  && GET_CODE (XEXP (*where, 0)) == REG
8413 	  && REGNO (XEXP (*where, 0)) == HARD_FRAME_POINTER_REGNUM)
8414 	return 1;
8415       break;
8416 
8417     default:
8418       break;
8419     }
8420 
8421   fmt = GET_RTX_FORMAT (code);
8422 
8423   for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
8424     {
8425       if (fmt[i] == 'E')
8426 	{
8427 	  register int j;
8428 	  for (j = XVECLEN (*where, i) - 1; j >= 0; j--)
8429 	    if (epilogue_renumber (&(XVECEXP (*where, i, j)), test))
8430 	      return 1;
8431 	}
8432       else if (fmt[i] == 'e'
8433 	       && epilogue_renumber (&(XEXP (*where, i)), test))
8434 	return 1;
8435     }
8436   return 0;
8437 }
8438 
8439 /* Leaf functions and non-leaf functions have different needs.  */
8440 
8441 static const int
8442 reg_leaf_alloc_order[] = REG_LEAF_ALLOC_ORDER;
8443 
8444 static const int
8445 reg_nonleaf_alloc_order[] = REG_ALLOC_ORDER;
8446 
8447 static const int *const reg_alloc_orders[] = {
8448   reg_leaf_alloc_order,
8449   reg_nonleaf_alloc_order};
8450 
8451 void
order_regs_for_local_alloc(void)8452 order_regs_for_local_alloc (void)
8453 {
8454   static int last_order_nonleaf = 1;
8455 
8456   if (df_regs_ever_live_p (15) != last_order_nonleaf)
8457     {
8458       last_order_nonleaf = !last_order_nonleaf;
8459       memcpy ((char *) reg_alloc_order,
8460 	      (const char *) reg_alloc_orders[last_order_nonleaf],
8461 	      FIRST_PSEUDO_REGISTER * sizeof (int));
8462     }
8463 }
8464 
8465 /* Return 1 if REG and MEM are legitimate enough to allow the various
8466    mem<-->reg splits to be run.  */
8467 
8468 int
sparc_splitdi_legitimate(rtx reg,rtx mem)8469 sparc_splitdi_legitimate (rtx reg, rtx mem)
8470 {
8471   /* Punt if we are here by mistake.  */
8472   gcc_assert (reload_completed);
8473 
8474   /* We must have an offsettable memory reference.  */
8475   if (! offsettable_memref_p (mem))
8476     return 0;
8477 
8478   /* If we have legitimate args for ldd/std, we do not want
8479      the split to happen.  */
8480   if ((REGNO (reg) % 2) == 0
8481       && mem_min_alignment (mem, 8))
8482     return 0;
8483 
8484   /* Success.  */
8485   return 1;
8486 }
8487 
8488 /* Like sparc_splitdi_legitimate but for REG <--> REG moves.  */
8489 
8490 int
sparc_split_regreg_legitimate(rtx reg1,rtx reg2)8491 sparc_split_regreg_legitimate (rtx reg1, rtx reg2)
8492 {
8493   int regno1, regno2;
8494 
8495   if (GET_CODE (reg1) == SUBREG)
8496     reg1 = SUBREG_REG (reg1);
8497   if (GET_CODE (reg1) != REG)
8498     return 0;
8499   regno1 = REGNO (reg1);
8500 
8501   if (GET_CODE (reg2) == SUBREG)
8502     reg2 = SUBREG_REG (reg2);
8503   if (GET_CODE (reg2) != REG)
8504     return 0;
8505   regno2 = REGNO (reg2);
8506 
8507   if (SPARC_INT_REG_P (regno1) && SPARC_INT_REG_P (regno2))
8508     return 1;
8509 
8510   if (TARGET_VIS3)
8511     {
8512       if ((SPARC_INT_REG_P (regno1) && SPARC_FP_REG_P (regno2))
8513 	  || (SPARC_FP_REG_P (regno1) && SPARC_INT_REG_P (regno2)))
8514 	return 1;
8515     }
8516 
8517   return 0;
8518 }
8519 
8520 /* Return 1 if REGNO (reg1) is even and REGNO (reg1) == REGNO (reg2) - 1.
8521    This makes them candidates for using ldd and std insns.
8522 
8523    Note reg1 and reg2 *must* be hard registers.  */
8524 
8525 int
registers_ok_for_ldd_peep(rtx reg1,rtx reg2)8526 registers_ok_for_ldd_peep (rtx reg1, rtx reg2)
8527 {
8528   /* We might have been passed a SUBREG.  */
8529   if (GET_CODE (reg1) != REG || GET_CODE (reg2) != REG)
8530     return 0;
8531 
8532   if (REGNO (reg1) % 2 != 0)
8533     return 0;
8534 
8535   /* Integer ldd is deprecated in SPARC V9 */
8536   if (TARGET_V9 && SPARC_INT_REG_P (REGNO (reg1)))
8537     return 0;
8538 
8539   return (REGNO (reg1) == REGNO (reg2) - 1);
8540 }
8541 
8542 /* Return 1 if the addresses in mem1 and mem2 are suitable for use in
8543    an ldd or std insn.
8544 
8545    This can only happen when addr1 and addr2, the addresses in mem1
8546    and mem2, are consecutive memory locations (addr1 + 4 == addr2).
8547    addr1 must also be aligned on a 64-bit boundary.
8548 
8549    Also iff dependent_reg_rtx is not null it should not be used to
8550    compute the address for mem1, i.e. we cannot optimize a sequence
8551    like:
8552    	ld [%o0], %o0
8553 	ld [%o0 + 4], %o1
8554    to
8555    	ldd [%o0], %o0
8556    nor:
8557 	ld [%g3 + 4], %g3
8558 	ld [%g3], %g2
8559    to
8560         ldd [%g3], %g2
8561 
8562    But, note that the transformation from:
8563 	ld [%g2 + 4], %g3
8564         ld [%g2], %g2
8565    to
8566 	ldd [%g2], %g2
8567    is perfectly fine.  Thus, the peephole2 patterns always pass us
8568    the destination register of the first load, never the second one.
8569 
8570    For stores we don't have a similar problem, so dependent_reg_rtx is
8571    NULL_RTX.  */
8572 
8573 int
mems_ok_for_ldd_peep(rtx mem1,rtx mem2,rtx dependent_reg_rtx)8574 mems_ok_for_ldd_peep (rtx mem1, rtx mem2, rtx dependent_reg_rtx)
8575 {
8576   rtx addr1, addr2;
8577   unsigned int reg1;
8578   HOST_WIDE_INT offset1;
8579 
8580   /* The mems cannot be volatile.  */
8581   if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2))
8582     return 0;
8583 
8584   /* MEM1 should be aligned on a 64-bit boundary.  */
8585   if (MEM_ALIGN (mem1) < 64)
8586     return 0;
8587 
8588   addr1 = XEXP (mem1, 0);
8589   addr2 = XEXP (mem2, 0);
8590 
8591   /* Extract a register number and offset (if used) from the first addr.  */
8592   if (GET_CODE (addr1) == PLUS)
8593     {
8594       /* If not a REG, return zero.  */
8595       if (GET_CODE (XEXP (addr1, 0)) != REG)
8596 	return 0;
8597       else
8598 	{
8599           reg1 = REGNO (XEXP (addr1, 0));
8600 	  /* The offset must be constant!  */
8601 	  if (GET_CODE (XEXP (addr1, 1)) != CONST_INT)
8602             return 0;
8603           offset1 = INTVAL (XEXP (addr1, 1));
8604 	}
8605     }
8606   else if (GET_CODE (addr1) != REG)
8607     return 0;
8608   else
8609     {
8610       reg1 = REGNO (addr1);
8611       /* This was a simple (mem (reg)) expression.  Offset is 0.  */
8612       offset1 = 0;
8613     }
8614 
8615   /* Make sure the second address is a (mem (plus (reg) (const_int).  */
8616   if (GET_CODE (addr2) != PLUS)
8617     return 0;
8618 
8619   if (GET_CODE (XEXP (addr2, 0)) != REG
8620       || GET_CODE (XEXP (addr2, 1)) != CONST_INT)
8621     return 0;
8622 
8623   if (reg1 != REGNO (XEXP (addr2, 0)))
8624     return 0;
8625 
8626   if (dependent_reg_rtx != NULL_RTX && reg1 == REGNO (dependent_reg_rtx))
8627     return 0;
8628 
8629   /* The first offset must be evenly divisible by 8 to ensure the
8630      address is 64 bit aligned.  */
8631   if (offset1 % 8 != 0)
8632     return 0;
8633 
8634   /* The offset for the second addr must be 4 more than the first addr.  */
8635   if (INTVAL (XEXP (addr2, 1)) != offset1 + 4)
8636     return 0;
8637 
8638   /* All the tests passed.  addr1 and addr2 are valid for ldd and std
8639      instructions.  */
8640   return 1;
8641 }
8642 
8643 /* Return the widened memory access made of MEM1 and MEM2 in MODE.  */
8644 
8645 rtx
widen_mem_for_ldd_peep(rtx mem1,rtx mem2,machine_mode mode)8646 widen_mem_for_ldd_peep (rtx mem1, rtx mem2, machine_mode mode)
8647 {
8648   rtx x = widen_memory_access (mem1, mode, 0);
8649   MEM_NOTRAP_P (x) = MEM_NOTRAP_P (mem1) && MEM_NOTRAP_P (mem2);
8650   return x;
8651 }
8652 
8653 /* Return 1 if reg is a pseudo, or is the first register in
8654    a hard register pair.  This makes it suitable for use in
8655    ldd and std insns.  */
8656 
8657 int
register_ok_for_ldd(rtx reg)8658 register_ok_for_ldd (rtx reg)
8659 {
8660   /* We might have been passed a SUBREG.  */
8661   if (!REG_P (reg))
8662     return 0;
8663 
8664   if (REGNO (reg) < FIRST_PSEUDO_REGISTER)
8665     return (REGNO (reg) % 2 == 0);
8666 
8667   return 1;
8668 }
8669 
8670 /* Return 1 if OP, a MEM, has an address which is known to be
8671    aligned to an 8-byte boundary.  */
8672 
8673 int
memory_ok_for_ldd(rtx op)8674 memory_ok_for_ldd (rtx op)
8675 {
8676   /* In 64-bit mode, we assume that the address is word-aligned.  */
8677   if (TARGET_ARCH32 && !mem_min_alignment (op, 8))
8678     return 0;
8679 
8680   if (! can_create_pseudo_p ()
8681       && !strict_memory_address_p (Pmode, XEXP (op, 0)))
8682     return 0;
8683 
8684   return 1;
8685 }
8686 
8687 /* Implement TARGET_PRINT_OPERAND_PUNCT_VALID_P.  */
8688 
8689 static bool
sparc_print_operand_punct_valid_p(unsigned char code)8690 sparc_print_operand_punct_valid_p (unsigned char code)
8691 {
8692   if (code == '#'
8693       || code == '*'
8694       || code == '('
8695       || code == ')'
8696       || code == '_'
8697       || code == '&')
8698     return true;
8699 
8700   return false;
8701 }
8702 
8703 /* Implement TARGET_PRINT_OPERAND.
8704    Print operand X (an rtx) in assembler syntax to file FILE.
8705    CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
8706    For `%' followed by punctuation, CODE is the punctuation and X is null.  */
8707 
8708 static void
sparc_print_operand(FILE * file,rtx x,int code)8709 sparc_print_operand (FILE *file, rtx x, int code)
8710 {
8711   switch (code)
8712     {
8713     case '#':
8714       /* Output an insn in a delay slot.  */
8715       if (final_sequence)
8716         sparc_indent_opcode = 1;
8717       else
8718 	fputs ("\n\t nop", file);
8719       return;
8720     case '*':
8721       /* Output an annul flag if there's nothing for the delay slot and we
8722 	 are optimizing.  This is always used with '(' below.
8723          Sun OS 4.1.1 dbx can't handle an annulled unconditional branch;
8724 	 this is a dbx bug.  So, we only do this when optimizing.
8725          On UltraSPARC, a branch in a delay slot causes a pipeline flush.
8726 	 Always emit a nop in case the next instruction is a branch.  */
8727       if (! final_sequence && (optimize && (int)sparc_cpu < PROCESSOR_V9))
8728 	fputs (",a", file);
8729       return;
8730     case '(':
8731       /* Output a 'nop' if there's nothing for the delay slot and we are
8732 	 not optimizing.  This is always used with '*' above.  */
8733       if (! final_sequence && ! (optimize && (int)sparc_cpu < PROCESSOR_V9))
8734 	fputs ("\n\t nop", file);
8735       else if (final_sequence)
8736         sparc_indent_opcode = 1;
8737       return;
8738     case ')':
8739       /* Output the right displacement from the saved PC on function return.
8740 	 The caller may have placed an "unimp" insn immediately after the call
8741 	 so we have to account for it.  This insn is used in the 32-bit ABI
8742 	 when calling a function that returns a non zero-sized structure.  The
8743 	 64-bit ABI doesn't have it.  Be careful to have this test be the same
8744 	 as that for the call.  The exception is when sparc_std_struct_return
8745 	 is enabled, the psABI is followed exactly and the adjustment is made
8746 	 by the code in sparc_struct_value_rtx.  The call emitted is the same
8747 	 when sparc_std_struct_return is enabled. */
8748      if (!TARGET_ARCH64
8749 	 && cfun->returns_struct
8750 	 && !sparc_std_struct_return
8751 	 && DECL_SIZE (DECL_RESULT (current_function_decl))
8752 	 && TREE_CODE (DECL_SIZE (DECL_RESULT (current_function_decl)))
8753 	     == INTEGER_CST
8754 	 && !integer_zerop (DECL_SIZE (DECL_RESULT (current_function_decl))))
8755 	fputs ("12", file);
8756       else
8757         fputc ('8', file);
8758       return;
8759     case '_':
8760       /* Output the Embedded Medium/Anywhere code model base register.  */
8761       fputs (EMBMEDANY_BASE_REG, file);
8762       return;
8763     case '&':
8764       /* Print some local dynamic TLS name.  */
8765       if (const char *name = get_some_local_dynamic_name ())
8766 	assemble_name (file, name);
8767       else
8768 	output_operand_lossage ("'%%&' used without any "
8769 				"local dynamic TLS references");
8770       return;
8771 
8772     case 'Y':
8773       /* Adjust the operand to take into account a RESTORE operation.  */
8774       if (GET_CODE (x) == CONST_INT)
8775 	break;
8776       else if (GET_CODE (x) != REG)
8777 	output_operand_lossage ("invalid %%Y operand");
8778       else if (REGNO (x) < 8)
8779 	fputs (reg_names[REGNO (x)], file);
8780       else if (REGNO (x) >= 24 && REGNO (x) < 32)
8781 	fputs (reg_names[REGNO (x)-16], file);
8782       else
8783 	output_operand_lossage ("invalid %%Y operand");
8784       return;
8785     case 'L':
8786       /* Print out the low order register name of a register pair.  */
8787       if (WORDS_BIG_ENDIAN)
8788 	fputs (reg_names[REGNO (x)+1], file);
8789       else
8790 	fputs (reg_names[REGNO (x)], file);
8791       return;
8792     case 'H':
8793       /* Print out the high order register name of a register pair.  */
8794       if (WORDS_BIG_ENDIAN)
8795 	fputs (reg_names[REGNO (x)], file);
8796       else
8797 	fputs (reg_names[REGNO (x)+1], file);
8798       return;
8799     case 'R':
8800       /* Print out the second register name of a register pair or quad.
8801 	 I.e., R (%o0) => %o1.  */
8802       fputs (reg_names[REGNO (x)+1], file);
8803       return;
8804     case 'S':
8805       /* Print out the third register name of a register quad.
8806 	 I.e., S (%o0) => %o2.  */
8807       fputs (reg_names[REGNO (x)+2], file);
8808       return;
8809     case 'T':
8810       /* Print out the fourth register name of a register quad.
8811 	 I.e., T (%o0) => %o3.  */
8812       fputs (reg_names[REGNO (x)+3], file);
8813       return;
8814     case 'x':
8815       /* Print a condition code register.  */
8816       if (REGNO (x) == SPARC_ICC_REG)
8817 	{
8818 	  /* We don't handle CC[X]_NOOVmode because they're not supposed
8819 	     to occur here.  */
8820 	  if (GET_MODE (x) == CCmode)
8821 	    fputs ("%icc", file);
8822 	  else if (GET_MODE (x) == CCXmode)
8823 	    fputs ("%xcc", file);
8824 	  else
8825 	    gcc_unreachable ();
8826 	}
8827       else
8828 	/* %fccN register */
8829 	fputs (reg_names[REGNO (x)], file);
8830       return;
8831     case 'm':
8832       /* Print the operand's address only.  */
8833       output_address (GET_MODE (x), XEXP (x, 0));
8834       return;
8835     case 'r':
8836       /* In this case we need a register.  Use %g0 if the
8837 	 operand is const0_rtx.  */
8838       if (x == const0_rtx
8839 	  || (GET_MODE (x) != VOIDmode && x == CONST0_RTX (GET_MODE (x))))
8840 	{
8841 	  fputs ("%g0", file);
8842 	  return;
8843 	}
8844       else
8845 	break;
8846 
8847     case 'A':
8848       switch (GET_CODE (x))
8849 	{
8850 	case IOR: fputs ("or", file); break;
8851 	case AND: fputs ("and", file); break;
8852 	case XOR: fputs ("xor", file); break;
8853 	default: output_operand_lossage ("invalid %%A operand");
8854 	}
8855       return;
8856 
8857     case 'B':
8858       switch (GET_CODE (x))
8859 	{
8860 	case IOR: fputs ("orn", file); break;
8861 	case AND: fputs ("andn", file); break;
8862 	case XOR: fputs ("xnor", file); break;
8863 	default: output_operand_lossage ("invalid %%B operand");
8864 	}
8865       return;
8866 
8867       /* This is used by the conditional move instructions.  */
8868     case 'C':
8869       {
8870 	enum rtx_code rc = GET_CODE (x);
8871 
8872 	switch (rc)
8873 	  {
8874 	  case NE: fputs ("ne", file); break;
8875 	  case EQ: fputs ("e", file); break;
8876 	  case GE: fputs ("ge", file); break;
8877 	  case GT: fputs ("g", file); break;
8878 	  case LE: fputs ("le", file); break;
8879 	  case LT: fputs ("l", file); break;
8880 	  case GEU: fputs ("geu", file); break;
8881 	  case GTU: fputs ("gu", file); break;
8882 	  case LEU: fputs ("leu", file); break;
8883 	  case LTU: fputs ("lu", file); break;
8884 	  case LTGT: fputs ("lg", file); break;
8885 	  case UNORDERED: fputs ("u", file); break;
8886 	  case ORDERED: fputs ("o", file); break;
8887 	  case UNLT: fputs ("ul", file); break;
8888 	  case UNLE: fputs ("ule", file); break;
8889 	  case UNGT: fputs ("ug", file); break;
8890 	  case UNGE: fputs ("uge", file); break;
8891 	  case UNEQ: fputs ("ue", file); break;
8892 	  default: output_operand_lossage ("invalid %%C operand");
8893 	  }
8894 	return;
8895       }
8896 
8897       /* This are used by the movr instruction pattern.  */
8898     case 'D':
8899       {
8900 	enum rtx_code rc = GET_CODE (x);
8901 	switch (rc)
8902 	  {
8903 	  case NE: fputs ("ne", file); break;
8904 	  case EQ: fputs ("e", file); break;
8905 	  case GE: fputs ("gez", file); break;
8906 	  case LT: fputs ("lz", file); break;
8907 	  case LE: fputs ("lez", file); break;
8908 	  case GT: fputs ("gz", file); break;
8909 	  default: output_operand_lossage ("invalid %%D operand");
8910 	  }
8911 	return;
8912       }
8913 
8914     case 'b':
8915       {
8916 	/* Print a sign-extended character.  */
8917 	int i = trunc_int_for_mode (INTVAL (x), QImode);
8918 	fprintf (file, "%d", i);
8919 	return;
8920       }
8921 
8922     case 'f':
8923       /* Operand must be a MEM; write its address.  */
8924       if (GET_CODE (x) != MEM)
8925 	output_operand_lossage ("invalid %%f operand");
8926       output_address (GET_MODE (x), XEXP (x, 0));
8927       return;
8928 
8929     case 's':
8930       {
8931 	/* Print a sign-extended 32-bit value.  */
8932 	HOST_WIDE_INT i;
8933 	if (GET_CODE(x) == CONST_INT)
8934 	  i = INTVAL (x);
8935 	else
8936 	  {
8937 	    output_operand_lossage ("invalid %%s operand");
8938 	    return;
8939 	  }
8940 	i = trunc_int_for_mode (i, SImode);
8941 	fprintf (file, HOST_WIDE_INT_PRINT_DEC, i);
8942 	return;
8943       }
8944 
8945     case 0:
8946       /* Do nothing special.  */
8947       break;
8948 
8949     default:
8950       /* Undocumented flag.  */
8951       output_operand_lossage ("invalid operand output code");
8952     }
8953 
8954   if (GET_CODE (x) == REG)
8955     fputs (reg_names[REGNO (x)], file);
8956   else if (GET_CODE (x) == MEM)
8957     {
8958       fputc ('[', file);
8959 	/* Poor Sun assembler doesn't understand absolute addressing.  */
8960       if (CONSTANT_P (XEXP (x, 0)))
8961 	fputs ("%g0+", file);
8962       output_address (GET_MODE (x), XEXP (x, 0));
8963       fputc (']', file);
8964     }
8965   else if (GET_CODE (x) == HIGH)
8966     {
8967       fputs ("%hi(", file);
8968       output_addr_const (file, XEXP (x, 0));
8969       fputc (')', file);
8970     }
8971   else if (GET_CODE (x) == LO_SUM)
8972     {
8973       sparc_print_operand (file, XEXP (x, 0), 0);
8974       if (TARGET_CM_MEDMID)
8975 	fputs ("+%l44(", file);
8976       else
8977 	fputs ("+%lo(", file);
8978       output_addr_const (file, XEXP (x, 1));
8979       fputc (')', file);
8980     }
8981   else if (GET_CODE (x) == CONST_DOUBLE)
8982     output_operand_lossage ("floating-point constant not a valid immediate operand");
8983   else
8984     output_addr_const (file, x);
8985 }
8986 
8987 /* Implement TARGET_PRINT_OPERAND_ADDRESS.  */
8988 
8989 static void
sparc_print_operand_address(FILE * file,machine_mode,rtx x)8990 sparc_print_operand_address (FILE *file, machine_mode /*mode*/, rtx x)
8991 {
8992   register rtx base, index = 0;
8993   int offset = 0;
8994   register rtx addr = x;
8995 
8996   if (REG_P (addr))
8997     fputs (reg_names[REGNO (addr)], file);
8998   else if (GET_CODE (addr) == PLUS)
8999     {
9000       if (CONST_INT_P (XEXP (addr, 0)))
9001 	offset = INTVAL (XEXP (addr, 0)), base = XEXP (addr, 1);
9002       else if (CONST_INT_P (XEXP (addr, 1)))
9003 	offset = INTVAL (XEXP (addr, 1)), base = XEXP (addr, 0);
9004       else
9005 	base = XEXP (addr, 0), index = XEXP (addr, 1);
9006       if (GET_CODE (base) == LO_SUM)
9007 	{
9008 	  gcc_assert (USE_AS_OFFSETABLE_LO10
9009 		      && TARGET_ARCH64
9010 		      && ! TARGET_CM_MEDMID);
9011 	  output_operand (XEXP (base, 0), 0);
9012 	  fputs ("+%lo(", file);
9013 	  output_address (VOIDmode, XEXP (base, 1));
9014 	  fprintf (file, ")+%d", offset);
9015 	}
9016       else
9017 	{
9018 	  fputs (reg_names[REGNO (base)], file);
9019 	  if (index == 0)
9020 	    fprintf (file, "%+d", offset);
9021 	  else if (REG_P (index))
9022 	    fprintf (file, "+%s", reg_names[REGNO (index)]);
9023 	  else if (GET_CODE (index) == SYMBOL_REF
9024 		   || GET_CODE (index) == LABEL_REF
9025 		   || GET_CODE (index) == CONST)
9026 	    fputc ('+', file), output_addr_const (file, index);
9027 	  else gcc_unreachable ();
9028 	}
9029     }
9030   else if (GET_CODE (addr) == MINUS
9031 	   && GET_CODE (XEXP (addr, 1)) == LABEL_REF)
9032     {
9033       output_addr_const (file, XEXP (addr, 0));
9034       fputs ("-(", file);
9035       output_addr_const (file, XEXP (addr, 1));
9036       fputs ("-.)", file);
9037     }
9038   else if (GET_CODE (addr) == LO_SUM)
9039     {
9040       output_operand (XEXP (addr, 0), 0);
9041       if (TARGET_CM_MEDMID)
9042         fputs ("+%l44(", file);
9043       else
9044         fputs ("+%lo(", file);
9045       output_address (VOIDmode, XEXP (addr, 1));
9046       fputc (')', file);
9047     }
9048   else if (flag_pic
9049 	   && GET_CODE (addr) == CONST
9050 	   && GET_CODE (XEXP (addr, 0)) == MINUS
9051 	   && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST
9052 	   && GET_CODE (XEXP (XEXP (XEXP (addr, 0), 1), 0)) == MINUS
9053 	   && XEXP (XEXP (XEXP (XEXP (addr, 0), 1), 0), 1) == pc_rtx)
9054     {
9055       addr = XEXP (addr, 0);
9056       output_addr_const (file, XEXP (addr, 0));
9057       /* Group the args of the second CONST in parenthesis.  */
9058       fputs ("-(", file);
9059       /* Skip past the second CONST--it does nothing for us.  */
9060       output_addr_const (file, XEXP (XEXP (addr, 1), 0));
9061       /* Close the parenthesis.  */
9062       fputc (')', file);
9063     }
9064   else
9065     {
9066       output_addr_const (file, addr);
9067     }
9068 }
9069 
9070 /* Target hook for assembling integer objects.  The sparc version has
9071    special handling for aligned DI-mode objects.  */
9072 
9073 static bool
sparc_assemble_integer(rtx x,unsigned int size,int aligned_p)9074 sparc_assemble_integer (rtx x, unsigned int size, int aligned_p)
9075 {
9076   /* ??? We only output .xword's for symbols and only then in environments
9077      where the assembler can handle them.  */
9078   if (aligned_p && size == 8 && GET_CODE (x) != CONST_INT)
9079     {
9080       if (TARGET_V9)
9081 	{
9082 	  assemble_integer_with_op ("\t.xword\t", x);
9083 	  return true;
9084 	}
9085       else
9086 	{
9087 	  assemble_aligned_integer (4, const0_rtx);
9088 	  assemble_aligned_integer (4, x);
9089 	  return true;
9090 	}
9091     }
9092   return default_assemble_integer (x, size, aligned_p);
9093 }
9094 
9095 /* Return the value of a code used in the .proc pseudo-op that says
9096    what kind of result this function returns.  For non-C types, we pick
9097    the closest C type.  */
9098 
9099 #ifndef SHORT_TYPE_SIZE
9100 #define SHORT_TYPE_SIZE (BITS_PER_UNIT * 2)
9101 #endif
9102 
9103 #ifndef INT_TYPE_SIZE
9104 #define INT_TYPE_SIZE BITS_PER_WORD
9105 #endif
9106 
9107 #ifndef LONG_TYPE_SIZE
9108 #define LONG_TYPE_SIZE BITS_PER_WORD
9109 #endif
9110 
9111 #ifndef LONG_LONG_TYPE_SIZE
9112 #define LONG_LONG_TYPE_SIZE (BITS_PER_WORD * 2)
9113 #endif
9114 
9115 #ifndef FLOAT_TYPE_SIZE
9116 #define FLOAT_TYPE_SIZE BITS_PER_WORD
9117 #endif
9118 
9119 #ifndef DOUBLE_TYPE_SIZE
9120 #define DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2)
9121 #endif
9122 
9123 #ifndef LONG_DOUBLE_TYPE_SIZE
9124 #define LONG_DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2)
9125 #endif
9126 
9127 unsigned long
sparc_type_code(register tree type)9128 sparc_type_code (register tree type)
9129 {
9130   register unsigned long qualifiers = 0;
9131   register unsigned shift;
9132 
9133   /* Only the first 30 bits of the qualifier are valid.  We must refrain from
9134      setting more, since some assemblers will give an error for this.  Also,
9135      we must be careful to avoid shifts of 32 bits or more to avoid getting
9136      unpredictable results.  */
9137 
9138   for (shift = 6; shift < 30; shift += 2, type = TREE_TYPE (type))
9139     {
9140       switch (TREE_CODE (type))
9141 	{
9142 	case ERROR_MARK:
9143 	  return qualifiers;
9144 
9145 	case ARRAY_TYPE:
9146 	  qualifiers |= (3 << shift);
9147 	  break;
9148 
9149 	case FUNCTION_TYPE:
9150 	case METHOD_TYPE:
9151 	  qualifiers |= (2 << shift);
9152 	  break;
9153 
9154 	case POINTER_TYPE:
9155 	case REFERENCE_TYPE:
9156 	case OFFSET_TYPE:
9157 	  qualifiers |= (1 << shift);
9158 	  break;
9159 
9160 	case RECORD_TYPE:
9161 	  return (qualifiers | 8);
9162 
9163 	case UNION_TYPE:
9164 	case QUAL_UNION_TYPE:
9165 	  return (qualifiers | 9);
9166 
9167 	case ENUMERAL_TYPE:
9168 	  return (qualifiers | 10);
9169 
9170 	case VOID_TYPE:
9171 	  return (qualifiers | 16);
9172 
9173 	case INTEGER_TYPE:
9174 	  /* If this is a range type, consider it to be the underlying
9175 	     type.  */
9176 	  if (TREE_TYPE (type) != 0)
9177 	    break;
9178 
9179 	  /* Carefully distinguish all the standard types of C,
9180 	     without messing up if the language is not C.  We do this by
9181 	     testing TYPE_PRECISION and TYPE_UNSIGNED.  The old code used to
9182 	     look at both the names and the above fields, but that's redundant.
9183 	     Any type whose size is between two C types will be considered
9184 	     to be the wider of the two types.  Also, we do not have a
9185 	     special code to use for "long long", so anything wider than
9186 	     long is treated the same.  Note that we can't distinguish
9187 	     between "int" and "long" in this code if they are the same
9188 	     size, but that's fine, since neither can the assembler.  */
9189 
9190 	  if (TYPE_PRECISION (type) <= CHAR_TYPE_SIZE)
9191 	    return (qualifiers | (TYPE_UNSIGNED (type) ? 12 : 2));
9192 
9193 	  else if (TYPE_PRECISION (type) <= SHORT_TYPE_SIZE)
9194 	    return (qualifiers | (TYPE_UNSIGNED (type) ? 13 : 3));
9195 
9196 	  else if (TYPE_PRECISION (type) <= INT_TYPE_SIZE)
9197 	    return (qualifiers | (TYPE_UNSIGNED (type) ? 14 : 4));
9198 
9199 	  else
9200 	    return (qualifiers | (TYPE_UNSIGNED (type) ? 15 : 5));
9201 
9202 	case REAL_TYPE:
9203 	  /* If this is a range type, consider it to be the underlying
9204 	     type.  */
9205 	  if (TREE_TYPE (type) != 0)
9206 	    break;
9207 
9208 	  /* Carefully distinguish all the standard types of C,
9209 	     without messing up if the language is not C.  */
9210 
9211 	  if (TYPE_PRECISION (type) == FLOAT_TYPE_SIZE)
9212 	    return (qualifiers | 6);
9213 
9214 	  else
9215 	    return (qualifiers | 7);
9216 
9217 	case COMPLEX_TYPE:	/* GNU Fortran COMPLEX type.  */
9218 	  /* ??? We need to distinguish between double and float complex types,
9219 	     but I don't know how yet because I can't reach this code from
9220 	     existing front-ends.  */
9221 	  return (qualifiers | 7);	/* Who knows? */
9222 
9223 	case VECTOR_TYPE:
9224 	case BOOLEAN_TYPE:	/* Boolean truth value type.  */
9225 	case LANG_TYPE:
9226 	case NULLPTR_TYPE:
9227 	  return qualifiers;
9228 
9229 	default:
9230 	  gcc_unreachable ();		/* Not a type! */
9231         }
9232     }
9233 
9234   return qualifiers;
9235 }
9236 
9237 /* Nested function support.  */
9238 
9239 /* Emit RTL insns to initialize the variable parts of a trampoline.
9240    FNADDR is an RTX for the address of the function's pure code.
9241    CXT is an RTX for the static chain value for the function.
9242 
9243    This takes 16 insns: 2 shifts & 2 ands (to split up addresses), 4 sethi
9244    (to load in opcodes), 4 iors (to merge address and opcodes), and 4 writes
9245    (to store insns).  This is a bit excessive.  Perhaps a different
9246    mechanism would be better here.
9247 
9248    Emit enough FLUSH insns to synchronize the data and instruction caches.  */
9249 
9250 static void
sparc32_initialize_trampoline(rtx m_tramp,rtx fnaddr,rtx cxt)9251 sparc32_initialize_trampoline (rtx m_tramp, rtx fnaddr, rtx cxt)
9252 {
9253   /* SPARC 32-bit trampoline:
9254 
9255  	sethi	%hi(fn), %g1
9256  	sethi	%hi(static), %g2
9257  	jmp	%g1+%lo(fn)
9258  	or	%g2, %lo(static), %g2
9259 
9260     SETHI i,r  = 00rr rrr1 00ii iiii iiii iiii iiii iiii
9261     JMPL r+i,d = 10dd ddd1 1100 0rrr rr1i iiii iiii iiii
9262    */
9263 
9264   emit_move_insn
9265     (adjust_address (m_tramp, SImode, 0),
9266      expand_binop (SImode, ior_optab,
9267 		   expand_shift (RSHIFT_EXPR, SImode, fnaddr, 10, 0, 1),
9268 		   GEN_INT (trunc_int_for_mode (0x03000000, SImode)),
9269 		   NULL_RTX, 1, OPTAB_DIRECT));
9270 
9271   emit_move_insn
9272     (adjust_address (m_tramp, SImode, 4),
9273      expand_binop (SImode, ior_optab,
9274 		   expand_shift (RSHIFT_EXPR, SImode, cxt, 10, 0, 1),
9275 		   GEN_INT (trunc_int_for_mode (0x05000000, SImode)),
9276 		   NULL_RTX, 1, OPTAB_DIRECT));
9277 
9278   emit_move_insn
9279     (adjust_address (m_tramp, SImode, 8),
9280      expand_binop (SImode, ior_optab,
9281 		   expand_and (SImode, fnaddr, GEN_INT (0x3ff), NULL_RTX),
9282 		   GEN_INT (trunc_int_for_mode (0x81c06000, SImode)),
9283 		   NULL_RTX, 1, OPTAB_DIRECT));
9284 
9285   emit_move_insn
9286     (adjust_address (m_tramp, SImode, 12),
9287      expand_binop (SImode, ior_optab,
9288 		   expand_and (SImode, cxt, GEN_INT (0x3ff), NULL_RTX),
9289 		   GEN_INT (trunc_int_for_mode (0x8410a000, SImode)),
9290 		   NULL_RTX, 1, OPTAB_DIRECT));
9291 
9292   /* On UltraSPARC a flush flushes an entire cache line.  The trampoline is
9293      aligned on a 16 byte boundary so one flush clears it all.  */
9294   emit_insn (gen_flushsi (validize_mem (adjust_address (m_tramp, SImode, 0))));
9295   if (sparc_cpu != PROCESSOR_ULTRASPARC
9296       && sparc_cpu != PROCESSOR_ULTRASPARC3
9297       && sparc_cpu != PROCESSOR_NIAGARA
9298       && sparc_cpu != PROCESSOR_NIAGARA2
9299       && sparc_cpu != PROCESSOR_NIAGARA3
9300       && sparc_cpu != PROCESSOR_NIAGARA4
9301       && sparc_cpu != PROCESSOR_NIAGARA7)
9302     emit_insn (gen_flushsi (validize_mem (adjust_address (m_tramp, SImode, 8))));
9303 
9304   /* Call __enable_execute_stack after writing onto the stack to make sure
9305      the stack address is accessible.  */
9306 #ifdef HAVE_ENABLE_EXECUTE_STACK
9307   emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
9308                      LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
9309 #endif
9310 
9311 }
9312 
9313 /* The 64-bit version is simpler because it makes more sense to load the
9314    values as "immediate" data out of the trampoline.  It's also easier since
9315    we can read the PC without clobbering a register.  */
9316 
9317 static void
sparc64_initialize_trampoline(rtx m_tramp,rtx fnaddr,rtx cxt)9318 sparc64_initialize_trampoline (rtx m_tramp, rtx fnaddr, rtx cxt)
9319 {
9320   /* SPARC 64-bit trampoline:
9321 
9322 	rd	%pc, %g1
9323 	ldx	[%g1+24], %g5
9324 	jmp	%g5
9325 	ldx	[%g1+16], %g5
9326 	+16 bytes data
9327    */
9328 
9329   emit_move_insn (adjust_address (m_tramp, SImode, 0),
9330 		  GEN_INT (trunc_int_for_mode (0x83414000, SImode)));
9331   emit_move_insn (adjust_address (m_tramp, SImode, 4),
9332 		  GEN_INT (trunc_int_for_mode (0xca586018, SImode)));
9333   emit_move_insn (adjust_address (m_tramp, SImode, 8),
9334 		  GEN_INT (trunc_int_for_mode (0x81c14000, SImode)));
9335   emit_move_insn (adjust_address (m_tramp, SImode, 12),
9336 		  GEN_INT (trunc_int_for_mode (0xca586010, SImode)));
9337   emit_move_insn (adjust_address (m_tramp, DImode, 16), cxt);
9338   emit_move_insn (adjust_address (m_tramp, DImode, 24), fnaddr);
9339   emit_insn (gen_flushdi (validize_mem (adjust_address (m_tramp, DImode, 0))));
9340 
9341   if (sparc_cpu != PROCESSOR_ULTRASPARC
9342       && sparc_cpu != PROCESSOR_ULTRASPARC3
9343       && sparc_cpu != PROCESSOR_NIAGARA
9344       && sparc_cpu != PROCESSOR_NIAGARA2
9345       && sparc_cpu != PROCESSOR_NIAGARA3
9346       && sparc_cpu != PROCESSOR_NIAGARA4
9347       && sparc_cpu != PROCESSOR_NIAGARA7)
9348     emit_insn (gen_flushdi (validize_mem (adjust_address (m_tramp, DImode, 8))));
9349 
9350   /* Call __enable_execute_stack after writing onto the stack to make sure
9351      the stack address is accessible.  */
9352 #ifdef HAVE_ENABLE_EXECUTE_STACK
9353   emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
9354                      LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
9355 #endif
9356 }
9357 
9358 /* Worker for TARGET_TRAMPOLINE_INIT.  */
9359 
9360 static void
sparc_trampoline_init(rtx m_tramp,tree fndecl,rtx cxt)9361 sparc_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
9362 {
9363   rtx fnaddr = force_reg (Pmode, XEXP (DECL_RTL (fndecl), 0));
9364   cxt = force_reg (Pmode, cxt);
9365   if (TARGET_ARCH64)
9366     sparc64_initialize_trampoline (m_tramp, fnaddr, cxt);
9367   else
9368     sparc32_initialize_trampoline (m_tramp, fnaddr, cxt);
9369 }
9370 
9371 /* Adjust the cost of a scheduling dependency.  Return the new cost of
9372    a dependency LINK or INSN on DEP_INSN.  COST is the current cost.  */
9373 
9374 static int
supersparc_adjust_cost(rtx_insn * insn,rtx link,rtx_insn * dep_insn,int cost)9375 supersparc_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep_insn, int cost)
9376 {
9377   enum attr_type insn_type;
9378 
9379   if (recog_memoized (insn) < 0)
9380     return cost;
9381 
9382   insn_type = get_attr_type (insn);
9383 
9384   if (REG_NOTE_KIND (link) == 0)
9385     {
9386       /* Data dependency; DEP_INSN writes a register that INSN reads some
9387 	 cycles later.  */
9388 
9389       /* if a load, then the dependence must be on the memory address;
9390 	 add an extra "cycle".  Note that the cost could be two cycles
9391 	 if the reg was written late in an instruction group; we ca not tell
9392 	 here.  */
9393       if (insn_type == TYPE_LOAD || insn_type == TYPE_FPLOAD)
9394 	return cost + 3;
9395 
9396       /* Get the delay only if the address of the store is the dependence.  */
9397       if (insn_type == TYPE_STORE || insn_type == TYPE_FPSTORE)
9398 	{
9399 	  rtx pat = PATTERN(insn);
9400 	  rtx dep_pat = PATTERN (dep_insn);
9401 
9402 	  if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
9403 	    return cost;  /* This should not happen!  */
9404 
9405 	  /* The dependency between the two instructions was on the data that
9406 	     is being stored.  Assume that this implies that the address of the
9407 	     store is not dependent.  */
9408 	  if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
9409 	    return cost;
9410 
9411 	  return cost + 3;  /* An approximation.  */
9412 	}
9413 
9414       /* A shift instruction cannot receive its data from an instruction
9415 	 in the same cycle; add a one cycle penalty.  */
9416       if (insn_type == TYPE_SHIFT)
9417 	return cost + 3;   /* Split before cascade into shift.  */
9418     }
9419   else
9420     {
9421       /* Anti- or output- dependency; DEP_INSN reads/writes a register that
9422 	 INSN writes some cycles later.  */
9423 
9424       /* These are only significant for the fpu unit; writing a fp reg before
9425          the fpu has finished with it stalls the processor.  */
9426 
9427       /* Reusing an integer register causes no problems.  */
9428       if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT)
9429 	return 0;
9430     }
9431 
9432   return cost;
9433 }
9434 
9435 static int
hypersparc_adjust_cost(rtx_insn * insn,rtx link,rtx_insn * dep_insn,int cost)9436 hypersparc_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep_insn, int cost)
9437 {
9438   enum attr_type insn_type, dep_type;
9439   rtx pat = PATTERN(insn);
9440   rtx dep_pat = PATTERN (dep_insn);
9441 
9442   if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
9443     return cost;
9444 
9445   insn_type = get_attr_type (insn);
9446   dep_type = get_attr_type (dep_insn);
9447 
9448   switch (REG_NOTE_KIND (link))
9449     {
9450     case 0:
9451       /* Data dependency; DEP_INSN writes a register that INSN reads some
9452 	 cycles later.  */
9453 
9454       switch (insn_type)
9455 	{
9456 	case TYPE_STORE:
9457 	case TYPE_FPSTORE:
9458 	  /* Get the delay iff the address of the store is the dependence.  */
9459 	  if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
9460 	    return cost;
9461 
9462 	  if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
9463 	    return cost;
9464 	  return cost + 3;
9465 
9466 	case TYPE_LOAD:
9467 	case TYPE_SLOAD:
9468 	case TYPE_FPLOAD:
9469 	  /* If a load, then the dependence must be on the memory address.  If
9470 	     the addresses aren't equal, then it might be a false dependency */
9471 	  if (dep_type == TYPE_STORE || dep_type == TYPE_FPSTORE)
9472 	    {
9473 	      if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET
9474 		  || GET_CODE (SET_DEST (dep_pat)) != MEM
9475 		  || GET_CODE (SET_SRC (pat)) != MEM
9476 		  || ! rtx_equal_p (XEXP (SET_DEST (dep_pat), 0),
9477 				    XEXP (SET_SRC (pat), 0)))
9478 		return cost + 2;
9479 
9480 	      return cost + 8;
9481 	    }
9482 	  break;
9483 
9484 	case TYPE_BRANCH:
9485 	  /* Compare to branch latency is 0.  There is no benefit from
9486 	     separating compare and branch.  */
9487 	  if (dep_type == TYPE_COMPARE)
9488 	    return 0;
9489 	  /* Floating point compare to branch latency is less than
9490 	     compare to conditional move.  */
9491 	  if (dep_type == TYPE_FPCMP)
9492 	    return cost - 1;
9493 	  break;
9494 	default:
9495 	  break;
9496 	}
9497 	break;
9498 
9499     case REG_DEP_ANTI:
9500       /* Anti-dependencies only penalize the fpu unit.  */
9501       if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT)
9502         return 0;
9503       break;
9504 
9505     default:
9506       break;
9507     }
9508 
9509   return cost;
9510 }
9511 
9512 static int
sparc_adjust_cost(rtx_insn * insn,rtx link,rtx_insn * dep,int cost)9513 sparc_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep, int cost)
9514 {
9515   switch (sparc_cpu)
9516     {
9517     case PROCESSOR_SUPERSPARC:
9518       cost = supersparc_adjust_cost (insn, link, dep, cost);
9519       break;
9520     case PROCESSOR_HYPERSPARC:
9521     case PROCESSOR_SPARCLITE86X:
9522       cost = hypersparc_adjust_cost (insn, link, dep, cost);
9523       break;
9524     default:
9525       break;
9526     }
9527   return cost;
9528 }
9529 
9530 static void
sparc_sched_init(FILE * dump ATTRIBUTE_UNUSED,int sched_verbose ATTRIBUTE_UNUSED,int max_ready ATTRIBUTE_UNUSED)9531 sparc_sched_init (FILE *dump ATTRIBUTE_UNUSED,
9532 		  int sched_verbose ATTRIBUTE_UNUSED,
9533 		  int max_ready ATTRIBUTE_UNUSED)
9534 {}
9535 
9536 static int
sparc_use_sched_lookahead(void)9537 sparc_use_sched_lookahead (void)
9538 {
9539   if (sparc_cpu == PROCESSOR_NIAGARA
9540       || sparc_cpu == PROCESSOR_NIAGARA2
9541       || sparc_cpu == PROCESSOR_NIAGARA3)
9542     return 0;
9543   if (sparc_cpu == PROCESSOR_NIAGARA4
9544       || sparc_cpu == PROCESSOR_NIAGARA7)
9545     return 2;
9546   if (sparc_cpu == PROCESSOR_ULTRASPARC
9547       || sparc_cpu == PROCESSOR_ULTRASPARC3)
9548     return 4;
9549   if ((1 << sparc_cpu) &
9550       ((1 << PROCESSOR_SUPERSPARC) | (1 << PROCESSOR_HYPERSPARC) |
9551        (1 << PROCESSOR_SPARCLITE86X)))
9552     return 3;
9553   return 0;
9554 }
9555 
9556 static int
sparc_issue_rate(void)9557 sparc_issue_rate (void)
9558 {
9559   switch (sparc_cpu)
9560     {
9561     case PROCESSOR_NIAGARA:
9562     case PROCESSOR_NIAGARA2:
9563     case PROCESSOR_NIAGARA3:
9564     default:
9565       return 1;
9566     case PROCESSOR_NIAGARA4:
9567     case PROCESSOR_NIAGARA7:
9568     case PROCESSOR_V9:
9569       /* Assume V9 processors are capable of at least dual-issue.  */
9570       return 2;
9571     case PROCESSOR_SUPERSPARC:
9572       return 3;
9573     case PROCESSOR_HYPERSPARC:
9574     case PROCESSOR_SPARCLITE86X:
9575       return 2;
9576     case PROCESSOR_ULTRASPARC:
9577     case PROCESSOR_ULTRASPARC3:
9578       return 4;
9579     }
9580 }
9581 
9582 static int
set_extends(rtx_insn * insn)9583 set_extends (rtx_insn *insn)
9584 {
9585   register rtx pat = PATTERN (insn);
9586 
9587   switch (GET_CODE (SET_SRC (pat)))
9588     {
9589       /* Load and some shift instructions zero extend.  */
9590     case MEM:
9591     case ZERO_EXTEND:
9592       /* sethi clears the high bits */
9593     case HIGH:
9594       /* LO_SUM is used with sethi.  sethi cleared the high
9595 	 bits and the values used with lo_sum are positive */
9596     case LO_SUM:
9597       /* Store flag stores 0 or 1 */
9598     case LT: case LTU:
9599     case GT: case GTU:
9600     case LE: case LEU:
9601     case GE: case GEU:
9602     case EQ:
9603     case NE:
9604       return 1;
9605     case AND:
9606       {
9607 	rtx op0 = XEXP (SET_SRC (pat), 0);
9608 	rtx op1 = XEXP (SET_SRC (pat), 1);
9609 	if (GET_CODE (op1) == CONST_INT)
9610 	  return INTVAL (op1) >= 0;
9611 	if (GET_CODE (op0) != REG)
9612 	  return 0;
9613 	if (sparc_check_64 (op0, insn) == 1)
9614 	  return 1;
9615 	return (GET_CODE (op1) == REG && sparc_check_64 (op1, insn) == 1);
9616       }
9617     case IOR:
9618     case XOR:
9619       {
9620 	rtx op0 = XEXP (SET_SRC (pat), 0);
9621 	rtx op1 = XEXP (SET_SRC (pat), 1);
9622 	if (GET_CODE (op0) != REG || sparc_check_64 (op0, insn) <= 0)
9623 	  return 0;
9624 	if (GET_CODE (op1) == CONST_INT)
9625 	  return INTVAL (op1) >= 0;
9626 	return (GET_CODE (op1) == REG && sparc_check_64 (op1, insn) == 1);
9627       }
9628     case LSHIFTRT:
9629       return GET_MODE (SET_SRC (pat)) == SImode;
9630       /* Positive integers leave the high bits zero.  */
9631     case CONST_INT:
9632       return !(INTVAL (SET_SRC (pat)) & 0x80000000);
9633     case ASHIFTRT:
9634     case SIGN_EXTEND:
9635       return - (GET_MODE (SET_SRC (pat)) == SImode);
9636     case REG:
9637       return sparc_check_64 (SET_SRC (pat), insn);
9638     default:
9639       return 0;
9640     }
9641 }
9642 
9643 /* We _ought_ to have only one kind per function, but...  */
9644 static GTY(()) rtx sparc_addr_diff_list;
9645 static GTY(()) rtx sparc_addr_list;
9646 
9647 void
sparc_defer_case_vector(rtx lab,rtx vec,int diff)9648 sparc_defer_case_vector (rtx lab, rtx vec, int diff)
9649 {
9650   vec = gen_rtx_EXPR_LIST (VOIDmode, lab, vec);
9651   if (diff)
9652     sparc_addr_diff_list
9653       = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_diff_list);
9654   else
9655     sparc_addr_list = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_list);
9656 }
9657 
9658 static void
sparc_output_addr_vec(rtx vec)9659 sparc_output_addr_vec (rtx vec)
9660 {
9661   rtx lab = XEXP (vec, 0), body = XEXP (vec, 1);
9662   int idx, vlen = XVECLEN (body, 0);
9663 
9664 #ifdef ASM_OUTPUT_ADDR_VEC_START
9665   ASM_OUTPUT_ADDR_VEC_START (asm_out_file);
9666 #endif
9667 
9668 #ifdef ASM_OUTPUT_CASE_LABEL
9669   ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab),
9670 			 NEXT_INSN (lab));
9671 #else
9672   (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
9673 #endif
9674 
9675   for (idx = 0; idx < vlen; idx++)
9676     {
9677       ASM_OUTPUT_ADDR_VEC_ELT
9678 	(asm_out_file, CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 0, idx), 0)));
9679     }
9680 
9681 #ifdef ASM_OUTPUT_ADDR_VEC_END
9682   ASM_OUTPUT_ADDR_VEC_END (asm_out_file);
9683 #endif
9684 }
9685 
9686 static void
sparc_output_addr_diff_vec(rtx vec)9687 sparc_output_addr_diff_vec (rtx vec)
9688 {
9689   rtx lab = XEXP (vec, 0), body = XEXP (vec, 1);
9690   rtx base = XEXP (XEXP (body, 0), 0);
9691   int idx, vlen = XVECLEN (body, 1);
9692 
9693 #ifdef ASM_OUTPUT_ADDR_VEC_START
9694   ASM_OUTPUT_ADDR_VEC_START (asm_out_file);
9695 #endif
9696 
9697 #ifdef ASM_OUTPUT_CASE_LABEL
9698   ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab),
9699 			 NEXT_INSN (lab));
9700 #else
9701   (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
9702 #endif
9703 
9704   for (idx = 0; idx < vlen; idx++)
9705     {
9706       ASM_OUTPUT_ADDR_DIFF_ELT
9707         (asm_out_file,
9708          body,
9709          CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 1, idx), 0)),
9710          CODE_LABEL_NUMBER (base));
9711     }
9712 
9713 #ifdef ASM_OUTPUT_ADDR_VEC_END
9714   ASM_OUTPUT_ADDR_VEC_END (asm_out_file);
9715 #endif
9716 }
9717 
9718 static void
sparc_output_deferred_case_vectors(void)9719 sparc_output_deferred_case_vectors (void)
9720 {
9721   rtx t;
9722   int align;
9723 
9724   if (sparc_addr_list == NULL_RTX
9725       && sparc_addr_diff_list == NULL_RTX)
9726     return;
9727 
9728   /* Align to cache line in the function's code section.  */
9729   switch_to_section (current_function_section ());
9730 
9731   align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT);
9732   if (align > 0)
9733     ASM_OUTPUT_ALIGN (asm_out_file, align);
9734 
9735   for (t = sparc_addr_list; t ; t = XEXP (t, 1))
9736     sparc_output_addr_vec (XEXP (t, 0));
9737   for (t = sparc_addr_diff_list; t ; t = XEXP (t, 1))
9738     sparc_output_addr_diff_vec (XEXP (t, 0));
9739 
9740   sparc_addr_list = sparc_addr_diff_list = NULL_RTX;
9741 }
9742 
9743 /* Return 0 if the high 32 bits of X (the low word of X, if DImode) are
9744    unknown.  Return 1 if the high bits are zero, -1 if the register is
9745    sign extended.  */
9746 int
sparc_check_64(rtx x,rtx_insn * insn)9747 sparc_check_64 (rtx x, rtx_insn *insn)
9748 {
9749   /* If a register is set only once it is safe to ignore insns this
9750      code does not know how to handle.  The loop will either recognize
9751      the single set and return the correct value or fail to recognize
9752      it and return 0.  */
9753   int set_once = 0;
9754   rtx y = x;
9755 
9756   gcc_assert (GET_CODE (x) == REG);
9757 
9758   if (GET_MODE (x) == DImode)
9759     y = gen_rtx_REG (SImode, REGNO (x) + WORDS_BIG_ENDIAN);
9760 
9761   if (flag_expensive_optimizations
9762       && df && DF_REG_DEF_COUNT (REGNO (y)) == 1)
9763     set_once = 1;
9764 
9765   if (insn == 0)
9766     {
9767       if (set_once)
9768 	insn = get_last_insn_anywhere ();
9769       else
9770 	return 0;
9771     }
9772 
9773   while ((insn = PREV_INSN (insn)))
9774     {
9775       switch (GET_CODE (insn))
9776 	{
9777 	case JUMP_INSN:
9778 	case NOTE:
9779 	  break;
9780 	case CODE_LABEL:
9781 	case CALL_INSN:
9782 	default:
9783 	  if (! set_once)
9784 	    return 0;
9785 	  break;
9786 	case INSN:
9787 	  {
9788 	    rtx pat = PATTERN (insn);
9789 	    if (GET_CODE (pat) != SET)
9790 	      return 0;
9791 	    if (rtx_equal_p (x, SET_DEST (pat)))
9792 	      return set_extends (insn);
9793 	    if (y && rtx_equal_p (y, SET_DEST (pat)))
9794 	      return set_extends (insn);
9795 	    if (reg_overlap_mentioned_p (SET_DEST (pat), y))
9796 	      return 0;
9797 	  }
9798 	}
9799     }
9800   return 0;
9801 }
9802 
9803 /* Output a wide shift instruction in V8+ mode.  INSN is the instruction,
9804    OPERANDS are its operands and OPCODE is the mnemonic to be used.  */
9805 
9806 const char *
output_v8plus_shift(rtx_insn * insn,rtx * operands,const char * opcode)9807 output_v8plus_shift (rtx_insn *insn, rtx *operands, const char *opcode)
9808 {
9809   static char asm_code[60];
9810 
9811   /* The scratch register is only required when the destination
9812      register is not a 64-bit global or out register.  */
9813   if (which_alternative != 2)
9814     operands[3] = operands[0];
9815 
9816   /* We can only shift by constants <= 63. */
9817   if (GET_CODE (operands[2]) == CONST_INT)
9818     operands[2] = GEN_INT (INTVAL (operands[2]) & 0x3f);
9819 
9820   if (GET_CODE (operands[1]) == CONST_INT)
9821     {
9822       output_asm_insn ("mov\t%1, %3", operands);
9823     }
9824   else
9825     {
9826       output_asm_insn ("sllx\t%H1, 32, %3", operands);
9827       if (sparc_check_64 (operands[1], insn) <= 0)
9828 	output_asm_insn ("srl\t%L1, 0, %L1", operands);
9829       output_asm_insn ("or\t%L1, %3, %3", operands);
9830     }
9831 
9832   strcpy (asm_code, opcode);
9833 
9834   if (which_alternative != 2)
9835     return strcat (asm_code, "\t%0, %2, %L0\n\tsrlx\t%L0, 32, %H0");
9836   else
9837     return
9838       strcat (asm_code, "\t%3, %2, %3\n\tsrlx\t%3, 32, %H0\n\tmov\t%3, %L0");
9839 }
9840 
9841 /* Output rtl to increment the profiler label LABELNO
9842    for profiling a function entry.  */
9843 
9844 void
sparc_profile_hook(int labelno)9845 sparc_profile_hook (int labelno)
9846 {
9847   char buf[32];
9848   rtx lab, fun;
9849 
9850   fun = gen_rtx_SYMBOL_REF (Pmode, MCOUNT_FUNCTION);
9851   if (NO_PROFILE_COUNTERS)
9852     {
9853       emit_library_call (fun, LCT_NORMAL, VOIDmode, 0);
9854     }
9855   else
9856     {
9857       ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
9858       lab = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
9859       emit_library_call (fun, LCT_NORMAL, VOIDmode, 1, lab, Pmode);
9860     }
9861 }
9862 
9863 #ifdef TARGET_SOLARIS
9864 /* Solaris implementation of TARGET_ASM_NAMED_SECTION.  */
9865 
9866 static void
sparc_solaris_elf_asm_named_section(const char * name,unsigned int flags,tree decl ATTRIBUTE_UNUSED)9867 sparc_solaris_elf_asm_named_section (const char *name, unsigned int flags,
9868 				     tree decl ATTRIBUTE_UNUSED)
9869 {
9870   if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
9871     {
9872       solaris_elf_asm_comdat_section (name, flags, decl);
9873       return;
9874     }
9875 
9876   fprintf (asm_out_file, "\t.section\t\"%s\"", name);
9877 
9878   if (!(flags & SECTION_DEBUG))
9879     fputs (",#alloc", asm_out_file);
9880   if (flags & SECTION_WRITE)
9881     fputs (",#write", asm_out_file);
9882   if (flags & SECTION_TLS)
9883     fputs (",#tls", asm_out_file);
9884   if (flags & SECTION_CODE)
9885     fputs (",#execinstr", asm_out_file);
9886 
9887   if (flags & SECTION_NOTYPE)
9888     ;
9889   else if (flags & SECTION_BSS)
9890     fputs (",#nobits", asm_out_file);
9891   else
9892     fputs (",#progbits", asm_out_file);
9893 
9894   fputc ('\n', asm_out_file);
9895 }
9896 #endif /* TARGET_SOLARIS */
9897 
9898 /* We do not allow indirect calls to be optimized into sibling calls.
9899 
9900    We cannot use sibling calls when delayed branches are disabled
9901    because they will likely require the call delay slot to be filled.
9902 
9903    Also, on SPARC 32-bit we cannot emit a sibling call when the
9904    current function returns a structure.  This is because the "unimp
9905    after call" convention would cause the callee to return to the
9906    wrong place.  The generic code already disallows cases where the
9907    function being called returns a structure.
9908 
9909    It may seem strange how this last case could occur.  Usually there
9910    is code after the call which jumps to epilogue code which dumps the
9911    return value into the struct return area.  That ought to invalidate
9912    the sibling call right?  Well, in the C++ case we can end up passing
9913    the pointer to the struct return area to a constructor (which returns
9914    void) and then nothing else happens.  Such a sibling call would look
9915    valid without the added check here.
9916 
9917    VxWorks PIC PLT entries require the global pointer to be initialized
9918    on entry.  We therefore can't emit sibling calls to them.  */
9919 static bool
sparc_function_ok_for_sibcall(tree decl,tree exp ATTRIBUTE_UNUSED)9920 sparc_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
9921 {
9922   return (decl
9923 	  && flag_delayed_branch
9924 	  && (TARGET_ARCH64 || ! cfun->returns_struct)
9925 	  && !(TARGET_VXWORKS_RTP
9926 	       && flag_pic
9927 	       && !targetm.binds_local_p (decl)));
9928 }
9929 
9930 /* libfunc renaming.  */
9931 
9932 static void
sparc_init_libfuncs(void)9933 sparc_init_libfuncs (void)
9934 {
9935   if (TARGET_ARCH32)
9936     {
9937       /* Use the subroutines that Sun's library provides for integer
9938 	 multiply and divide.  The `*' prevents an underscore from
9939 	 being prepended by the compiler. .umul is a little faster
9940 	 than .mul.  */
9941       set_optab_libfunc (smul_optab, SImode, "*.umul");
9942       set_optab_libfunc (sdiv_optab, SImode, "*.div");
9943       set_optab_libfunc (udiv_optab, SImode, "*.udiv");
9944       set_optab_libfunc (smod_optab, SImode, "*.rem");
9945       set_optab_libfunc (umod_optab, SImode, "*.urem");
9946 
9947       /* TFmode arithmetic.  These names are part of the SPARC 32bit ABI.  */
9948       set_optab_libfunc (add_optab, TFmode, "_Q_add");
9949       set_optab_libfunc (sub_optab, TFmode, "_Q_sub");
9950       set_optab_libfunc (neg_optab, TFmode, "_Q_neg");
9951       set_optab_libfunc (smul_optab, TFmode, "_Q_mul");
9952       set_optab_libfunc (sdiv_optab, TFmode, "_Q_div");
9953 
9954       /* We can define the TFmode sqrt optab only if TARGET_FPU.  This
9955 	 is because with soft-float, the SFmode and DFmode sqrt
9956 	 instructions will be absent, and the compiler will notice and
9957 	 try to use the TFmode sqrt instruction for calls to the
9958 	 builtin function sqrt, but this fails.  */
9959       if (TARGET_FPU)
9960 	set_optab_libfunc (sqrt_optab, TFmode, "_Q_sqrt");
9961 
9962       set_optab_libfunc (eq_optab, TFmode, "_Q_feq");
9963       set_optab_libfunc (ne_optab, TFmode, "_Q_fne");
9964       set_optab_libfunc (gt_optab, TFmode, "_Q_fgt");
9965       set_optab_libfunc (ge_optab, TFmode, "_Q_fge");
9966       set_optab_libfunc (lt_optab, TFmode, "_Q_flt");
9967       set_optab_libfunc (le_optab, TFmode, "_Q_fle");
9968 
9969       set_conv_libfunc (sext_optab,   TFmode, SFmode, "_Q_stoq");
9970       set_conv_libfunc (sext_optab,   TFmode, DFmode, "_Q_dtoq");
9971       set_conv_libfunc (trunc_optab,  SFmode, TFmode, "_Q_qtos");
9972       set_conv_libfunc (trunc_optab,  DFmode, TFmode, "_Q_qtod");
9973 
9974       set_conv_libfunc (sfix_optab,   SImode, TFmode, "_Q_qtoi");
9975       set_conv_libfunc (ufix_optab,   SImode, TFmode, "_Q_qtou");
9976       set_conv_libfunc (sfloat_optab, TFmode, SImode, "_Q_itoq");
9977       set_conv_libfunc (ufloat_optab, TFmode, SImode, "_Q_utoq");
9978 
9979       if (DITF_CONVERSION_LIBFUNCS)
9980 	{
9981 	  set_conv_libfunc (sfix_optab,   DImode, TFmode, "_Q_qtoll");
9982 	  set_conv_libfunc (ufix_optab,   DImode, TFmode, "_Q_qtoull");
9983 	  set_conv_libfunc (sfloat_optab, TFmode, DImode, "_Q_lltoq");
9984 	  set_conv_libfunc (ufloat_optab, TFmode, DImode, "_Q_ulltoq");
9985 	}
9986 
9987       if (SUN_CONVERSION_LIBFUNCS)
9988 	{
9989 	  set_conv_libfunc (sfix_optab, DImode, SFmode, "__ftoll");
9990 	  set_conv_libfunc (ufix_optab, DImode, SFmode, "__ftoull");
9991 	  set_conv_libfunc (sfix_optab, DImode, DFmode, "__dtoll");
9992 	  set_conv_libfunc (ufix_optab, DImode, DFmode, "__dtoull");
9993 	}
9994     }
9995   if (TARGET_ARCH64)
9996     {
9997       /* In the SPARC 64bit ABI, SImode multiply and divide functions
9998 	 do not exist in the library.  Make sure the compiler does not
9999 	 emit calls to them by accident.  (It should always use the
10000          hardware instructions.)  */
10001       set_optab_libfunc (smul_optab, SImode, 0);
10002       set_optab_libfunc (sdiv_optab, SImode, 0);
10003       set_optab_libfunc (udiv_optab, SImode, 0);
10004       set_optab_libfunc (smod_optab, SImode, 0);
10005       set_optab_libfunc (umod_optab, SImode, 0);
10006 
10007       if (SUN_INTEGER_MULTIPLY_64)
10008 	{
10009 	  set_optab_libfunc (smul_optab, DImode, "__mul64");
10010 	  set_optab_libfunc (sdiv_optab, DImode, "__div64");
10011 	  set_optab_libfunc (udiv_optab, DImode, "__udiv64");
10012 	  set_optab_libfunc (smod_optab, DImode, "__rem64");
10013 	  set_optab_libfunc (umod_optab, DImode, "__urem64");
10014 	}
10015 
10016       if (SUN_CONVERSION_LIBFUNCS)
10017 	{
10018 	  set_conv_libfunc (sfix_optab, DImode, SFmode, "__ftol");
10019 	  set_conv_libfunc (ufix_optab, DImode, SFmode, "__ftoul");
10020 	  set_conv_libfunc (sfix_optab, DImode, DFmode, "__dtol");
10021 	  set_conv_libfunc (ufix_optab, DImode, DFmode, "__dtoul");
10022 	}
10023     }
10024 }
10025 
10026 /* SPARC builtins.  */
10027 enum sparc_builtins
10028 {
10029   /* FPU builtins.  */
10030   SPARC_BUILTIN_LDFSR,
10031   SPARC_BUILTIN_STFSR,
10032 
10033   /* VIS 1.0 builtins.  */
10034   SPARC_BUILTIN_FPACK16,
10035   SPARC_BUILTIN_FPACK32,
10036   SPARC_BUILTIN_FPACKFIX,
10037   SPARC_BUILTIN_FEXPAND,
10038   SPARC_BUILTIN_FPMERGE,
10039   SPARC_BUILTIN_FMUL8X16,
10040   SPARC_BUILTIN_FMUL8X16AU,
10041   SPARC_BUILTIN_FMUL8X16AL,
10042   SPARC_BUILTIN_FMUL8SUX16,
10043   SPARC_BUILTIN_FMUL8ULX16,
10044   SPARC_BUILTIN_FMULD8SUX16,
10045   SPARC_BUILTIN_FMULD8ULX16,
10046   SPARC_BUILTIN_FALIGNDATAV4HI,
10047   SPARC_BUILTIN_FALIGNDATAV8QI,
10048   SPARC_BUILTIN_FALIGNDATAV2SI,
10049   SPARC_BUILTIN_FALIGNDATADI,
10050   SPARC_BUILTIN_WRGSR,
10051   SPARC_BUILTIN_RDGSR,
10052   SPARC_BUILTIN_ALIGNADDR,
10053   SPARC_BUILTIN_ALIGNADDRL,
10054   SPARC_BUILTIN_PDIST,
10055   SPARC_BUILTIN_EDGE8,
10056   SPARC_BUILTIN_EDGE8L,
10057   SPARC_BUILTIN_EDGE16,
10058   SPARC_BUILTIN_EDGE16L,
10059   SPARC_BUILTIN_EDGE32,
10060   SPARC_BUILTIN_EDGE32L,
10061   SPARC_BUILTIN_FCMPLE16,
10062   SPARC_BUILTIN_FCMPLE32,
10063   SPARC_BUILTIN_FCMPNE16,
10064   SPARC_BUILTIN_FCMPNE32,
10065   SPARC_BUILTIN_FCMPGT16,
10066   SPARC_BUILTIN_FCMPGT32,
10067   SPARC_BUILTIN_FCMPEQ16,
10068   SPARC_BUILTIN_FCMPEQ32,
10069   SPARC_BUILTIN_FPADD16,
10070   SPARC_BUILTIN_FPADD16S,
10071   SPARC_BUILTIN_FPADD32,
10072   SPARC_BUILTIN_FPADD32S,
10073   SPARC_BUILTIN_FPSUB16,
10074   SPARC_BUILTIN_FPSUB16S,
10075   SPARC_BUILTIN_FPSUB32,
10076   SPARC_BUILTIN_FPSUB32S,
10077   SPARC_BUILTIN_ARRAY8,
10078   SPARC_BUILTIN_ARRAY16,
10079   SPARC_BUILTIN_ARRAY32,
10080 
10081   /* VIS 2.0 builtins.  */
10082   SPARC_BUILTIN_EDGE8N,
10083   SPARC_BUILTIN_EDGE8LN,
10084   SPARC_BUILTIN_EDGE16N,
10085   SPARC_BUILTIN_EDGE16LN,
10086   SPARC_BUILTIN_EDGE32N,
10087   SPARC_BUILTIN_EDGE32LN,
10088   SPARC_BUILTIN_BMASK,
10089   SPARC_BUILTIN_BSHUFFLEV4HI,
10090   SPARC_BUILTIN_BSHUFFLEV8QI,
10091   SPARC_BUILTIN_BSHUFFLEV2SI,
10092   SPARC_BUILTIN_BSHUFFLEDI,
10093 
10094   /* VIS 3.0 builtins.  */
10095   SPARC_BUILTIN_CMASK8,
10096   SPARC_BUILTIN_CMASK16,
10097   SPARC_BUILTIN_CMASK32,
10098   SPARC_BUILTIN_FCHKSM16,
10099   SPARC_BUILTIN_FSLL16,
10100   SPARC_BUILTIN_FSLAS16,
10101   SPARC_BUILTIN_FSRL16,
10102   SPARC_BUILTIN_FSRA16,
10103   SPARC_BUILTIN_FSLL32,
10104   SPARC_BUILTIN_FSLAS32,
10105   SPARC_BUILTIN_FSRL32,
10106   SPARC_BUILTIN_FSRA32,
10107   SPARC_BUILTIN_PDISTN,
10108   SPARC_BUILTIN_FMEAN16,
10109   SPARC_BUILTIN_FPADD64,
10110   SPARC_BUILTIN_FPSUB64,
10111   SPARC_BUILTIN_FPADDS16,
10112   SPARC_BUILTIN_FPADDS16S,
10113   SPARC_BUILTIN_FPSUBS16,
10114   SPARC_BUILTIN_FPSUBS16S,
10115   SPARC_BUILTIN_FPADDS32,
10116   SPARC_BUILTIN_FPADDS32S,
10117   SPARC_BUILTIN_FPSUBS32,
10118   SPARC_BUILTIN_FPSUBS32S,
10119   SPARC_BUILTIN_FUCMPLE8,
10120   SPARC_BUILTIN_FUCMPNE8,
10121   SPARC_BUILTIN_FUCMPGT8,
10122   SPARC_BUILTIN_FUCMPEQ8,
10123   SPARC_BUILTIN_FHADDS,
10124   SPARC_BUILTIN_FHADDD,
10125   SPARC_BUILTIN_FHSUBS,
10126   SPARC_BUILTIN_FHSUBD,
10127   SPARC_BUILTIN_FNHADDS,
10128   SPARC_BUILTIN_FNHADDD,
10129   SPARC_BUILTIN_UMULXHI,
10130   SPARC_BUILTIN_XMULX,
10131   SPARC_BUILTIN_XMULXHI,
10132 
10133   /* VIS 4.0 builtins.  */
10134   SPARC_BUILTIN_FPADD8,
10135   SPARC_BUILTIN_FPADDS8,
10136   SPARC_BUILTIN_FPADDUS8,
10137   SPARC_BUILTIN_FPADDUS16,
10138   SPARC_BUILTIN_FPCMPLE8,
10139   SPARC_BUILTIN_FPCMPGT8,
10140   SPARC_BUILTIN_FPCMPULE16,
10141   SPARC_BUILTIN_FPCMPUGT16,
10142   SPARC_BUILTIN_FPCMPULE32,
10143   SPARC_BUILTIN_FPCMPUGT32,
10144   SPARC_BUILTIN_FPMAX8,
10145   SPARC_BUILTIN_FPMAX16,
10146   SPARC_BUILTIN_FPMAX32,
10147   SPARC_BUILTIN_FPMAXU8,
10148   SPARC_BUILTIN_FPMAXU16,
10149   SPARC_BUILTIN_FPMAXU32,
10150   SPARC_BUILTIN_FPMIN8,
10151   SPARC_BUILTIN_FPMIN16,
10152   SPARC_BUILTIN_FPMIN32,
10153   SPARC_BUILTIN_FPMINU8,
10154   SPARC_BUILTIN_FPMINU16,
10155   SPARC_BUILTIN_FPMINU32,
10156   SPARC_BUILTIN_FPSUB8,
10157   SPARC_BUILTIN_FPSUBS8,
10158   SPARC_BUILTIN_FPSUBUS8,
10159   SPARC_BUILTIN_FPSUBUS16,
10160 
10161   SPARC_BUILTIN_MAX
10162 };
10163 
10164 static GTY (()) tree sparc_builtins[(int) SPARC_BUILTIN_MAX];
10165 static enum insn_code sparc_builtins_icode[(int) SPARC_BUILTIN_MAX];
10166 
10167 /* Add a SPARC builtin function with NAME, ICODE, CODE and TYPE.  Return the
10168    function decl or NULL_TREE if the builtin was not added.  */
10169 
10170 static tree
def_builtin(const char * name,enum insn_code icode,enum sparc_builtins code,tree type)10171 def_builtin (const char *name, enum insn_code icode, enum sparc_builtins code,
10172 	     tree type)
10173 {
10174   tree t
10175     = add_builtin_function (name, type, code, BUILT_IN_MD, NULL, NULL_TREE);
10176 
10177   if (t)
10178     {
10179       sparc_builtins[code] = t;
10180       sparc_builtins_icode[code] = icode;
10181     }
10182 
10183   return t;
10184 }
10185 
10186 /* Likewise, but also marks the function as "const".  */
10187 
10188 static tree
def_builtin_const(const char * name,enum insn_code icode,enum sparc_builtins code,tree type)10189 def_builtin_const (const char *name, enum insn_code icode,
10190 		   enum sparc_builtins code, tree type)
10191 {
10192   tree t = def_builtin (name, icode, code, type);
10193 
10194   if (t)
10195     TREE_READONLY (t) = 1;
10196 
10197   return t;
10198 }
10199 
10200 /* Implement the TARGET_INIT_BUILTINS target hook.
10201    Create builtin functions for special SPARC instructions.  */
10202 
10203 static void
sparc_init_builtins(void)10204 sparc_init_builtins (void)
10205 {
10206   if (TARGET_FPU)
10207     sparc_fpu_init_builtins ();
10208 
10209   if (TARGET_VIS)
10210     sparc_vis_init_builtins ();
10211 }
10212 
10213 /* Create builtin functions for FPU instructions.  */
10214 
10215 static void
sparc_fpu_init_builtins(void)10216 sparc_fpu_init_builtins (void)
10217 {
10218   tree ftype
10219     = build_function_type_list (void_type_node,
10220 				build_pointer_type (unsigned_type_node), 0);
10221   def_builtin ("__builtin_load_fsr", CODE_FOR_ldfsr,
10222 	       SPARC_BUILTIN_LDFSR, ftype);
10223   def_builtin ("__builtin_store_fsr", CODE_FOR_stfsr,
10224 	       SPARC_BUILTIN_STFSR, ftype);
10225 }
10226 
10227 /* Create builtin functions for VIS instructions.  */
10228 
10229 static void
sparc_vis_init_builtins(void)10230 sparc_vis_init_builtins (void)
10231 {
10232   tree v4qi = build_vector_type (unsigned_intQI_type_node, 4);
10233   tree v8qi = build_vector_type (unsigned_intQI_type_node, 8);
10234   tree v4hi = build_vector_type (intHI_type_node, 4);
10235   tree v2hi = build_vector_type (intHI_type_node, 2);
10236   tree v2si = build_vector_type (intSI_type_node, 2);
10237   tree v1si = build_vector_type (intSI_type_node, 1);
10238 
10239   tree v4qi_ftype_v4hi = build_function_type_list (v4qi, v4hi, 0);
10240   tree v8qi_ftype_v2si_v8qi = build_function_type_list (v8qi, v2si, v8qi, 0);
10241   tree v2hi_ftype_v2si = build_function_type_list (v2hi, v2si, 0);
10242   tree v4hi_ftype_v4qi = build_function_type_list (v4hi, v4qi, 0);
10243   tree v8qi_ftype_v4qi_v4qi = build_function_type_list (v8qi, v4qi, v4qi, 0);
10244   tree v4hi_ftype_v4qi_v4hi = build_function_type_list (v4hi, v4qi, v4hi, 0);
10245   tree v4hi_ftype_v4qi_v2hi = build_function_type_list (v4hi, v4qi, v2hi, 0);
10246   tree v2si_ftype_v4qi_v2hi = build_function_type_list (v2si, v4qi, v2hi, 0);
10247   tree v4hi_ftype_v8qi_v4hi = build_function_type_list (v4hi, v8qi, v4hi, 0);
10248   tree v4hi_ftype_v4hi_v4hi = build_function_type_list (v4hi, v4hi, v4hi, 0);
10249   tree v2si_ftype_v2si_v2si = build_function_type_list (v2si, v2si, v2si, 0);
10250   tree v8qi_ftype_v8qi_v8qi = build_function_type_list (v8qi, v8qi, v8qi, 0);
10251   tree v2hi_ftype_v2hi_v2hi = build_function_type_list (v2hi, v2hi, v2hi, 0);
10252   tree v1si_ftype_v1si_v1si = build_function_type_list (v1si, v1si, v1si, 0);
10253   tree di_ftype_v8qi_v8qi_di = build_function_type_list (intDI_type_node,
10254 							 v8qi, v8qi,
10255 							 intDI_type_node, 0);
10256   tree di_ftype_v8qi_v8qi = build_function_type_list (intDI_type_node,
10257 						      v8qi, v8qi, 0);
10258   tree si_ftype_v8qi_v8qi = build_function_type_list (intSI_type_node,
10259 						      v8qi, v8qi, 0);
10260   tree di_ftype_di_di = build_function_type_list (intDI_type_node,
10261 						  intDI_type_node,
10262 						  intDI_type_node, 0);
10263   tree si_ftype_si_si = build_function_type_list (intSI_type_node,
10264 						  intSI_type_node,
10265 						  intSI_type_node, 0);
10266   tree ptr_ftype_ptr_si = build_function_type_list (ptr_type_node,
10267 		        			    ptr_type_node,
10268 					            intSI_type_node, 0);
10269   tree ptr_ftype_ptr_di = build_function_type_list (ptr_type_node,
10270 		        			    ptr_type_node,
10271 					            intDI_type_node, 0);
10272   tree si_ftype_ptr_ptr = build_function_type_list (intSI_type_node,
10273 		        			    ptr_type_node,
10274 					            ptr_type_node, 0);
10275   tree di_ftype_ptr_ptr = build_function_type_list (intDI_type_node,
10276 		        			    ptr_type_node,
10277 					            ptr_type_node, 0);
10278   tree si_ftype_v4hi_v4hi = build_function_type_list (intSI_type_node,
10279 						      v4hi, v4hi, 0);
10280   tree si_ftype_v2si_v2si = build_function_type_list (intSI_type_node,
10281 						      v2si, v2si, 0);
10282   tree di_ftype_v4hi_v4hi = build_function_type_list (intDI_type_node,
10283 						      v4hi, v4hi, 0);
10284   tree di_ftype_v2si_v2si = build_function_type_list (intDI_type_node,
10285 						      v2si, v2si, 0);
10286   tree void_ftype_di = build_function_type_list (void_type_node,
10287 						 intDI_type_node, 0);
10288   tree di_ftype_void = build_function_type_list (intDI_type_node,
10289 						 void_type_node, 0);
10290   tree void_ftype_si = build_function_type_list (void_type_node,
10291 						 intSI_type_node, 0);
10292   tree sf_ftype_sf_sf = build_function_type_list (float_type_node,
10293 						  float_type_node,
10294 						  float_type_node, 0);
10295   tree df_ftype_df_df = build_function_type_list (double_type_node,
10296 						  double_type_node,
10297 						  double_type_node, 0);
10298 
10299   /* Packing and expanding vectors.  */
10300   def_builtin ("__builtin_vis_fpack16", CODE_FOR_fpack16_vis,
10301 	       SPARC_BUILTIN_FPACK16, v4qi_ftype_v4hi);
10302   def_builtin ("__builtin_vis_fpack32", CODE_FOR_fpack32_vis,
10303 	       SPARC_BUILTIN_FPACK32, v8qi_ftype_v2si_v8qi);
10304   def_builtin ("__builtin_vis_fpackfix", CODE_FOR_fpackfix_vis,
10305 	       SPARC_BUILTIN_FPACKFIX, v2hi_ftype_v2si);
10306   def_builtin_const ("__builtin_vis_fexpand", CODE_FOR_fexpand_vis,
10307 		     SPARC_BUILTIN_FEXPAND, v4hi_ftype_v4qi);
10308   def_builtin_const ("__builtin_vis_fpmerge", CODE_FOR_fpmerge_vis,
10309 		     SPARC_BUILTIN_FPMERGE, v8qi_ftype_v4qi_v4qi);
10310 
10311   /* Multiplications.  */
10312   def_builtin_const ("__builtin_vis_fmul8x16", CODE_FOR_fmul8x16_vis,
10313 		     SPARC_BUILTIN_FMUL8X16, v4hi_ftype_v4qi_v4hi);
10314   def_builtin_const ("__builtin_vis_fmul8x16au", CODE_FOR_fmul8x16au_vis,
10315 		     SPARC_BUILTIN_FMUL8X16AU, v4hi_ftype_v4qi_v2hi);
10316   def_builtin_const ("__builtin_vis_fmul8x16al", CODE_FOR_fmul8x16al_vis,
10317 		     SPARC_BUILTIN_FMUL8X16AL, v4hi_ftype_v4qi_v2hi);
10318   def_builtin_const ("__builtin_vis_fmul8sux16", CODE_FOR_fmul8sux16_vis,
10319 		     SPARC_BUILTIN_FMUL8SUX16, v4hi_ftype_v8qi_v4hi);
10320   def_builtin_const ("__builtin_vis_fmul8ulx16", CODE_FOR_fmul8ulx16_vis,
10321 		     SPARC_BUILTIN_FMUL8ULX16, v4hi_ftype_v8qi_v4hi);
10322   def_builtin_const ("__builtin_vis_fmuld8sux16", CODE_FOR_fmuld8sux16_vis,
10323 		     SPARC_BUILTIN_FMULD8SUX16, v2si_ftype_v4qi_v2hi);
10324   def_builtin_const ("__builtin_vis_fmuld8ulx16", CODE_FOR_fmuld8ulx16_vis,
10325 		     SPARC_BUILTIN_FMULD8ULX16, v2si_ftype_v4qi_v2hi);
10326 
10327   /* Data aligning.  */
10328   def_builtin ("__builtin_vis_faligndatav4hi", CODE_FOR_faligndatav4hi_vis,
10329 	       SPARC_BUILTIN_FALIGNDATAV4HI, v4hi_ftype_v4hi_v4hi);
10330   def_builtin ("__builtin_vis_faligndatav8qi", CODE_FOR_faligndatav8qi_vis,
10331 	       SPARC_BUILTIN_FALIGNDATAV8QI, v8qi_ftype_v8qi_v8qi);
10332   def_builtin ("__builtin_vis_faligndatav2si", CODE_FOR_faligndatav2si_vis,
10333 	       SPARC_BUILTIN_FALIGNDATAV2SI, v2si_ftype_v2si_v2si);
10334   def_builtin ("__builtin_vis_faligndatadi", CODE_FOR_faligndatav1di_vis,
10335 	       SPARC_BUILTIN_FALIGNDATADI, di_ftype_di_di);
10336 
10337   def_builtin ("__builtin_vis_write_gsr", CODE_FOR_wrgsr_vis,
10338 	       SPARC_BUILTIN_WRGSR, void_ftype_di);
10339   def_builtin ("__builtin_vis_read_gsr", CODE_FOR_rdgsr_vis,
10340 	       SPARC_BUILTIN_RDGSR, di_ftype_void);
10341 
10342   if (TARGET_ARCH64)
10343     {
10344       def_builtin ("__builtin_vis_alignaddr", CODE_FOR_alignaddrdi_vis,
10345 		   SPARC_BUILTIN_ALIGNADDR, ptr_ftype_ptr_di);
10346       def_builtin ("__builtin_vis_alignaddrl", CODE_FOR_alignaddrldi_vis,
10347 		   SPARC_BUILTIN_ALIGNADDRL, ptr_ftype_ptr_di);
10348     }
10349   else
10350     {
10351       def_builtin ("__builtin_vis_alignaddr", CODE_FOR_alignaddrsi_vis,
10352 		   SPARC_BUILTIN_ALIGNADDR, ptr_ftype_ptr_si);
10353       def_builtin ("__builtin_vis_alignaddrl", CODE_FOR_alignaddrlsi_vis,
10354 		   SPARC_BUILTIN_ALIGNADDRL, ptr_ftype_ptr_si);
10355     }
10356 
10357   /* Pixel distance.  */
10358   def_builtin_const ("__builtin_vis_pdist", CODE_FOR_pdist_vis,
10359 		     SPARC_BUILTIN_PDIST, di_ftype_v8qi_v8qi_di);
10360 
10361   /* Edge handling.  */
10362   if (TARGET_ARCH64)
10363     {
10364       def_builtin_const ("__builtin_vis_edge8", CODE_FOR_edge8di_vis,
10365 			 SPARC_BUILTIN_EDGE8, di_ftype_ptr_ptr);
10366       def_builtin_const ("__builtin_vis_edge8l", CODE_FOR_edge8ldi_vis,
10367 			 SPARC_BUILTIN_EDGE8L, di_ftype_ptr_ptr);
10368       def_builtin_const ("__builtin_vis_edge16", CODE_FOR_edge16di_vis,
10369 			 SPARC_BUILTIN_EDGE16, di_ftype_ptr_ptr);
10370       def_builtin_const ("__builtin_vis_edge16l", CODE_FOR_edge16ldi_vis,
10371 			 SPARC_BUILTIN_EDGE16L, di_ftype_ptr_ptr);
10372       def_builtin_const ("__builtin_vis_edge32", CODE_FOR_edge32di_vis,
10373 			 SPARC_BUILTIN_EDGE32, di_ftype_ptr_ptr);
10374       def_builtin_const ("__builtin_vis_edge32l", CODE_FOR_edge32ldi_vis,
10375 			 SPARC_BUILTIN_EDGE32L, di_ftype_ptr_ptr);
10376     }
10377   else
10378     {
10379       def_builtin_const ("__builtin_vis_edge8", CODE_FOR_edge8si_vis,
10380 			 SPARC_BUILTIN_EDGE8, si_ftype_ptr_ptr);
10381       def_builtin_const ("__builtin_vis_edge8l", CODE_FOR_edge8lsi_vis,
10382 			 SPARC_BUILTIN_EDGE8L, si_ftype_ptr_ptr);
10383       def_builtin_const ("__builtin_vis_edge16", CODE_FOR_edge16si_vis,
10384 			 SPARC_BUILTIN_EDGE16, si_ftype_ptr_ptr);
10385       def_builtin_const ("__builtin_vis_edge16l", CODE_FOR_edge16lsi_vis,
10386 			 SPARC_BUILTIN_EDGE16L, si_ftype_ptr_ptr);
10387       def_builtin_const ("__builtin_vis_edge32", CODE_FOR_edge32si_vis,
10388 			 SPARC_BUILTIN_EDGE32, si_ftype_ptr_ptr);
10389       def_builtin_const ("__builtin_vis_edge32l", CODE_FOR_edge32lsi_vis,
10390 			 SPARC_BUILTIN_EDGE32L, si_ftype_ptr_ptr);
10391     }
10392 
10393   /* Pixel compare.  */
10394   if (TARGET_ARCH64)
10395     {
10396       def_builtin_const ("__builtin_vis_fcmple16", CODE_FOR_fcmple16di_vis,
10397 			 SPARC_BUILTIN_FCMPLE16, di_ftype_v4hi_v4hi);
10398       def_builtin_const ("__builtin_vis_fcmple32", CODE_FOR_fcmple32di_vis,
10399 			 SPARC_BUILTIN_FCMPLE32, di_ftype_v2si_v2si);
10400       def_builtin_const ("__builtin_vis_fcmpne16", CODE_FOR_fcmpne16di_vis,
10401 			 SPARC_BUILTIN_FCMPNE16, di_ftype_v4hi_v4hi);
10402       def_builtin_const ("__builtin_vis_fcmpne32", CODE_FOR_fcmpne32di_vis,
10403 			 SPARC_BUILTIN_FCMPNE32, di_ftype_v2si_v2si);
10404       def_builtin_const ("__builtin_vis_fcmpgt16", CODE_FOR_fcmpgt16di_vis,
10405 			 SPARC_BUILTIN_FCMPGT16, di_ftype_v4hi_v4hi);
10406       def_builtin_const ("__builtin_vis_fcmpgt32", CODE_FOR_fcmpgt32di_vis,
10407 			 SPARC_BUILTIN_FCMPGT32, di_ftype_v2si_v2si);
10408       def_builtin_const ("__builtin_vis_fcmpeq16", CODE_FOR_fcmpeq16di_vis,
10409 			 SPARC_BUILTIN_FCMPEQ16, di_ftype_v4hi_v4hi);
10410       def_builtin_const ("__builtin_vis_fcmpeq32", CODE_FOR_fcmpeq32di_vis,
10411 			 SPARC_BUILTIN_FCMPEQ32, di_ftype_v2si_v2si);
10412     }
10413   else
10414     {
10415       def_builtin_const ("__builtin_vis_fcmple16", CODE_FOR_fcmple16si_vis,
10416 			 SPARC_BUILTIN_FCMPLE16, si_ftype_v4hi_v4hi);
10417       def_builtin_const ("__builtin_vis_fcmple32", CODE_FOR_fcmple32si_vis,
10418 			 SPARC_BUILTIN_FCMPLE32, si_ftype_v2si_v2si);
10419       def_builtin_const ("__builtin_vis_fcmpne16", CODE_FOR_fcmpne16si_vis,
10420 			 SPARC_BUILTIN_FCMPNE16, si_ftype_v4hi_v4hi);
10421       def_builtin_const ("__builtin_vis_fcmpne32", CODE_FOR_fcmpne32si_vis,
10422 			 SPARC_BUILTIN_FCMPNE32, si_ftype_v2si_v2si);
10423       def_builtin_const ("__builtin_vis_fcmpgt16", CODE_FOR_fcmpgt16si_vis,
10424 			 SPARC_BUILTIN_FCMPGT16, si_ftype_v4hi_v4hi);
10425       def_builtin_const ("__builtin_vis_fcmpgt32", CODE_FOR_fcmpgt32si_vis,
10426 			 SPARC_BUILTIN_FCMPGT32, si_ftype_v2si_v2si);
10427       def_builtin_const ("__builtin_vis_fcmpeq16", CODE_FOR_fcmpeq16si_vis,
10428 			 SPARC_BUILTIN_FCMPEQ16, si_ftype_v4hi_v4hi);
10429       def_builtin_const ("__builtin_vis_fcmpeq32", CODE_FOR_fcmpeq32si_vis,
10430 			 SPARC_BUILTIN_FCMPEQ32, si_ftype_v2si_v2si);
10431     }
10432 
10433   /* Addition and subtraction.  */
10434   def_builtin_const ("__builtin_vis_fpadd16", CODE_FOR_addv4hi3,
10435 		     SPARC_BUILTIN_FPADD16, v4hi_ftype_v4hi_v4hi);
10436   def_builtin_const ("__builtin_vis_fpadd16s", CODE_FOR_addv2hi3,
10437 		     SPARC_BUILTIN_FPADD16S, v2hi_ftype_v2hi_v2hi);
10438   def_builtin_const ("__builtin_vis_fpadd32", CODE_FOR_addv2si3,
10439 		     SPARC_BUILTIN_FPADD32, v2si_ftype_v2si_v2si);
10440   def_builtin_const ("__builtin_vis_fpadd32s", CODE_FOR_addv1si3,
10441 		     SPARC_BUILTIN_FPADD32S, v1si_ftype_v1si_v1si);
10442   def_builtin_const ("__builtin_vis_fpsub16", CODE_FOR_subv4hi3,
10443 		     SPARC_BUILTIN_FPSUB16, v4hi_ftype_v4hi_v4hi);
10444   def_builtin_const ("__builtin_vis_fpsub16s", CODE_FOR_subv2hi3,
10445 		     SPARC_BUILTIN_FPSUB16S, v2hi_ftype_v2hi_v2hi);
10446   def_builtin_const ("__builtin_vis_fpsub32", CODE_FOR_subv2si3,
10447 		     SPARC_BUILTIN_FPSUB32, v2si_ftype_v2si_v2si);
10448   def_builtin_const ("__builtin_vis_fpsub32s", CODE_FOR_subv1si3,
10449 		     SPARC_BUILTIN_FPSUB32S, v1si_ftype_v1si_v1si);
10450 
10451   /* Three-dimensional array addressing.  */
10452   if (TARGET_ARCH64)
10453     {
10454       def_builtin_const ("__builtin_vis_array8", CODE_FOR_array8di_vis,
10455 			 SPARC_BUILTIN_ARRAY8, di_ftype_di_di);
10456       def_builtin_const ("__builtin_vis_array16", CODE_FOR_array16di_vis,
10457 			 SPARC_BUILTIN_ARRAY16, di_ftype_di_di);
10458       def_builtin_const ("__builtin_vis_array32", CODE_FOR_array32di_vis,
10459 			 SPARC_BUILTIN_ARRAY32, di_ftype_di_di);
10460     }
10461   else
10462     {
10463       def_builtin_const ("__builtin_vis_array8", CODE_FOR_array8si_vis,
10464 			 SPARC_BUILTIN_ARRAY8, si_ftype_si_si);
10465       def_builtin_const ("__builtin_vis_array16", CODE_FOR_array16si_vis,
10466 			 SPARC_BUILTIN_ARRAY16, si_ftype_si_si);
10467       def_builtin_const ("__builtin_vis_array32", CODE_FOR_array32si_vis,
10468 			 SPARC_BUILTIN_ARRAY32, si_ftype_si_si);
10469     }
10470 
10471   if (TARGET_VIS2)
10472     {
10473       /* Edge handling.  */
10474       if (TARGET_ARCH64)
10475 	{
10476 	  def_builtin_const ("__builtin_vis_edge8n", CODE_FOR_edge8ndi_vis,
10477 			     SPARC_BUILTIN_EDGE8N, di_ftype_ptr_ptr);
10478 	  def_builtin_const ("__builtin_vis_edge8ln", CODE_FOR_edge8lndi_vis,
10479 			     SPARC_BUILTIN_EDGE8LN, di_ftype_ptr_ptr);
10480 	  def_builtin_const ("__builtin_vis_edge16n", CODE_FOR_edge16ndi_vis,
10481 			     SPARC_BUILTIN_EDGE16N, di_ftype_ptr_ptr);
10482 	  def_builtin_const ("__builtin_vis_edge16ln", CODE_FOR_edge16lndi_vis,
10483 			     SPARC_BUILTIN_EDGE16LN, di_ftype_ptr_ptr);
10484 	  def_builtin_const ("__builtin_vis_edge32n", CODE_FOR_edge32ndi_vis,
10485 			     SPARC_BUILTIN_EDGE32N, di_ftype_ptr_ptr);
10486 	  def_builtin_const ("__builtin_vis_edge32ln", CODE_FOR_edge32lndi_vis,
10487 			     SPARC_BUILTIN_EDGE32LN, di_ftype_ptr_ptr);
10488 	}
10489       else
10490 	{
10491 	  def_builtin_const ("__builtin_vis_edge8n", CODE_FOR_edge8nsi_vis,
10492 			     SPARC_BUILTIN_EDGE8N, si_ftype_ptr_ptr);
10493 	  def_builtin_const ("__builtin_vis_edge8ln", CODE_FOR_edge8lnsi_vis,
10494 			     SPARC_BUILTIN_EDGE8LN, si_ftype_ptr_ptr);
10495 	  def_builtin_const ("__builtin_vis_edge16n", CODE_FOR_edge16nsi_vis,
10496 			     SPARC_BUILTIN_EDGE16N, si_ftype_ptr_ptr);
10497 	  def_builtin_const ("__builtin_vis_edge16ln", CODE_FOR_edge16lnsi_vis,
10498 			     SPARC_BUILTIN_EDGE16LN, si_ftype_ptr_ptr);
10499 	  def_builtin_const ("__builtin_vis_edge32n", CODE_FOR_edge32nsi_vis,
10500 			     SPARC_BUILTIN_EDGE32N, si_ftype_ptr_ptr);
10501 	  def_builtin_const ("__builtin_vis_edge32ln", CODE_FOR_edge32lnsi_vis,
10502 			     SPARC_BUILTIN_EDGE32LN, si_ftype_ptr_ptr);
10503 	}
10504 
10505       /* Byte mask and shuffle.  */
10506       if (TARGET_ARCH64)
10507 	def_builtin ("__builtin_vis_bmask", CODE_FOR_bmaskdi_vis,
10508 		     SPARC_BUILTIN_BMASK, di_ftype_di_di);
10509       else
10510 	def_builtin ("__builtin_vis_bmask", CODE_FOR_bmasksi_vis,
10511 		     SPARC_BUILTIN_BMASK, si_ftype_si_si);
10512       def_builtin ("__builtin_vis_bshufflev4hi", CODE_FOR_bshufflev4hi_vis,
10513 		   SPARC_BUILTIN_BSHUFFLEV4HI, v4hi_ftype_v4hi_v4hi);
10514       def_builtin ("__builtin_vis_bshufflev8qi", CODE_FOR_bshufflev8qi_vis,
10515 		   SPARC_BUILTIN_BSHUFFLEV8QI, v8qi_ftype_v8qi_v8qi);
10516       def_builtin ("__builtin_vis_bshufflev2si", CODE_FOR_bshufflev2si_vis,
10517 		   SPARC_BUILTIN_BSHUFFLEV2SI, v2si_ftype_v2si_v2si);
10518       def_builtin ("__builtin_vis_bshuffledi", CODE_FOR_bshufflev1di_vis,
10519 		   SPARC_BUILTIN_BSHUFFLEDI, di_ftype_di_di);
10520     }
10521 
10522   if (TARGET_VIS3)
10523     {
10524       if (TARGET_ARCH64)
10525 	{
10526 	  def_builtin ("__builtin_vis_cmask8", CODE_FOR_cmask8di_vis,
10527 		       SPARC_BUILTIN_CMASK8, void_ftype_di);
10528 	  def_builtin ("__builtin_vis_cmask16", CODE_FOR_cmask16di_vis,
10529 		       SPARC_BUILTIN_CMASK16, void_ftype_di);
10530 	  def_builtin ("__builtin_vis_cmask32", CODE_FOR_cmask32di_vis,
10531 		       SPARC_BUILTIN_CMASK32, void_ftype_di);
10532 	}
10533       else
10534 	{
10535 	  def_builtin ("__builtin_vis_cmask8", CODE_FOR_cmask8si_vis,
10536 		       SPARC_BUILTIN_CMASK8, void_ftype_si);
10537 	  def_builtin ("__builtin_vis_cmask16", CODE_FOR_cmask16si_vis,
10538 		       SPARC_BUILTIN_CMASK16, void_ftype_si);
10539 	  def_builtin ("__builtin_vis_cmask32", CODE_FOR_cmask32si_vis,
10540 		       SPARC_BUILTIN_CMASK32, void_ftype_si);
10541 	}
10542 
10543       def_builtin_const ("__builtin_vis_fchksm16", CODE_FOR_fchksm16_vis,
10544 			 SPARC_BUILTIN_FCHKSM16, v4hi_ftype_v4hi_v4hi);
10545 
10546       def_builtin_const ("__builtin_vis_fsll16", CODE_FOR_vashlv4hi3,
10547 			 SPARC_BUILTIN_FSLL16, v4hi_ftype_v4hi_v4hi);
10548       def_builtin_const ("__builtin_vis_fslas16", CODE_FOR_vssashlv4hi3,
10549 			 SPARC_BUILTIN_FSLAS16, v4hi_ftype_v4hi_v4hi);
10550       def_builtin_const ("__builtin_vis_fsrl16", CODE_FOR_vlshrv4hi3,
10551 			 SPARC_BUILTIN_FSRL16, v4hi_ftype_v4hi_v4hi);
10552       def_builtin_const ("__builtin_vis_fsra16", CODE_FOR_vashrv4hi3,
10553 			 SPARC_BUILTIN_FSRA16, v4hi_ftype_v4hi_v4hi);
10554       def_builtin_const ("__builtin_vis_fsll32", CODE_FOR_vashlv2si3,
10555 			 SPARC_BUILTIN_FSLL32, v2si_ftype_v2si_v2si);
10556       def_builtin_const ("__builtin_vis_fslas32", CODE_FOR_vssashlv2si3,
10557 			 SPARC_BUILTIN_FSLAS32, v2si_ftype_v2si_v2si);
10558       def_builtin_const ("__builtin_vis_fsrl32", CODE_FOR_vlshrv2si3,
10559 			 SPARC_BUILTIN_FSRL32, v2si_ftype_v2si_v2si);
10560       def_builtin_const ("__builtin_vis_fsra32", CODE_FOR_vashrv2si3,
10561 			 SPARC_BUILTIN_FSRA32, v2si_ftype_v2si_v2si);
10562 
10563       if (TARGET_ARCH64)
10564 	def_builtin_const ("__builtin_vis_pdistn", CODE_FOR_pdistndi_vis,
10565 			   SPARC_BUILTIN_PDISTN, di_ftype_v8qi_v8qi);
10566       else
10567 	def_builtin_const ("__builtin_vis_pdistn", CODE_FOR_pdistnsi_vis,
10568 			   SPARC_BUILTIN_PDISTN, si_ftype_v8qi_v8qi);
10569 
10570       def_builtin_const ("__builtin_vis_fmean16", CODE_FOR_fmean16_vis,
10571 			 SPARC_BUILTIN_FMEAN16, v4hi_ftype_v4hi_v4hi);
10572       def_builtin_const ("__builtin_vis_fpadd64", CODE_FOR_fpadd64_vis,
10573 			 SPARC_BUILTIN_FPADD64, di_ftype_di_di);
10574       def_builtin_const ("__builtin_vis_fpsub64", CODE_FOR_fpsub64_vis,
10575 			 SPARC_BUILTIN_FPSUB64, di_ftype_di_di);
10576 
10577       def_builtin_const ("__builtin_vis_fpadds16", CODE_FOR_ssaddv4hi3,
10578 			 SPARC_BUILTIN_FPADDS16, v4hi_ftype_v4hi_v4hi);
10579       def_builtin_const ("__builtin_vis_fpadds16s", CODE_FOR_ssaddv2hi3,
10580 			 SPARC_BUILTIN_FPADDS16S, v2hi_ftype_v2hi_v2hi);
10581       def_builtin_const ("__builtin_vis_fpsubs16", CODE_FOR_sssubv4hi3,
10582 			 SPARC_BUILTIN_FPSUBS16, v4hi_ftype_v4hi_v4hi);
10583       def_builtin_const ("__builtin_vis_fpsubs16s", CODE_FOR_sssubv2hi3,
10584 			 SPARC_BUILTIN_FPSUBS16S, v2hi_ftype_v2hi_v2hi);
10585       def_builtin_const ("__builtin_vis_fpadds32", CODE_FOR_ssaddv2si3,
10586 			 SPARC_BUILTIN_FPADDS32, v2si_ftype_v2si_v2si);
10587       def_builtin_const ("__builtin_vis_fpadds32s", CODE_FOR_ssaddv1si3,
10588 			 SPARC_BUILTIN_FPADDS32S, v1si_ftype_v1si_v1si);
10589       def_builtin_const ("__builtin_vis_fpsubs32", CODE_FOR_sssubv2si3,
10590 			 SPARC_BUILTIN_FPSUBS32, v2si_ftype_v2si_v2si);
10591       def_builtin_const ("__builtin_vis_fpsubs32s", CODE_FOR_sssubv1si3,
10592 			 SPARC_BUILTIN_FPSUBS32S, v1si_ftype_v1si_v1si);
10593 
10594       if (TARGET_ARCH64)
10595 	{
10596 	  def_builtin_const ("__builtin_vis_fucmple8", CODE_FOR_fucmple8di_vis,
10597 			     SPARC_BUILTIN_FUCMPLE8, di_ftype_v8qi_v8qi);
10598 	  def_builtin_const ("__builtin_vis_fucmpne8", CODE_FOR_fucmpne8di_vis,
10599 			     SPARC_BUILTIN_FUCMPNE8, di_ftype_v8qi_v8qi);
10600 	  def_builtin_const ("__builtin_vis_fucmpgt8", CODE_FOR_fucmpgt8di_vis,
10601 			     SPARC_BUILTIN_FUCMPGT8, di_ftype_v8qi_v8qi);
10602 	  def_builtin_const ("__builtin_vis_fucmpeq8", CODE_FOR_fucmpeq8di_vis,
10603 			     SPARC_BUILTIN_FUCMPEQ8, di_ftype_v8qi_v8qi);
10604 	}
10605       else
10606 	{
10607 	  def_builtin_const ("__builtin_vis_fucmple8", CODE_FOR_fucmple8si_vis,
10608 			     SPARC_BUILTIN_FUCMPLE8, si_ftype_v8qi_v8qi);
10609 	  def_builtin_const ("__builtin_vis_fucmpne8", CODE_FOR_fucmpne8si_vis,
10610 			     SPARC_BUILTIN_FUCMPNE8, si_ftype_v8qi_v8qi);
10611 	  def_builtin_const ("__builtin_vis_fucmpgt8", CODE_FOR_fucmpgt8si_vis,
10612 			     SPARC_BUILTIN_FUCMPGT8, si_ftype_v8qi_v8qi);
10613 	  def_builtin_const ("__builtin_vis_fucmpeq8", CODE_FOR_fucmpeq8si_vis,
10614 			     SPARC_BUILTIN_FUCMPEQ8, si_ftype_v8qi_v8qi);
10615 	}
10616 
10617       def_builtin_const ("__builtin_vis_fhadds", CODE_FOR_fhaddsf_vis,
10618 			 SPARC_BUILTIN_FHADDS, sf_ftype_sf_sf);
10619       def_builtin_const ("__builtin_vis_fhaddd", CODE_FOR_fhadddf_vis,
10620 			 SPARC_BUILTIN_FHADDD, df_ftype_df_df);
10621       def_builtin_const ("__builtin_vis_fhsubs", CODE_FOR_fhsubsf_vis,
10622 			 SPARC_BUILTIN_FHSUBS, sf_ftype_sf_sf);
10623       def_builtin_const ("__builtin_vis_fhsubd", CODE_FOR_fhsubdf_vis,
10624 			 SPARC_BUILTIN_FHSUBD, df_ftype_df_df);
10625       def_builtin_const ("__builtin_vis_fnhadds", CODE_FOR_fnhaddsf_vis,
10626 			 SPARC_BUILTIN_FNHADDS, sf_ftype_sf_sf);
10627       def_builtin_const ("__builtin_vis_fnhaddd", CODE_FOR_fnhadddf_vis,
10628 			 SPARC_BUILTIN_FNHADDD, df_ftype_df_df);
10629 
10630       def_builtin_const ("__builtin_vis_umulxhi", CODE_FOR_umulxhi_vis,
10631 			 SPARC_BUILTIN_UMULXHI, di_ftype_di_di);
10632       def_builtin_const ("__builtin_vis_xmulx", CODE_FOR_xmulx_vis,
10633 			 SPARC_BUILTIN_XMULX, di_ftype_di_di);
10634       def_builtin_const ("__builtin_vis_xmulxhi", CODE_FOR_xmulxhi_vis,
10635 			 SPARC_BUILTIN_XMULXHI, di_ftype_di_di);
10636     }
10637 
10638   if (TARGET_VIS4)
10639     {
10640       def_builtin_const ("__builtin_vis_fpadd8", CODE_FOR_addv8qi3,
10641 			 SPARC_BUILTIN_FPADD8, v8qi_ftype_v8qi_v8qi);
10642       def_builtin_const ("__builtin_vis_fpadds8", CODE_FOR_ssaddv8qi3,
10643 			 SPARC_BUILTIN_FPADDS8, v8qi_ftype_v8qi_v8qi);
10644       def_builtin_const ("__builtin_vis_fpaddus8", CODE_FOR_usaddv8qi3,
10645 			 SPARC_BUILTIN_FPADDUS8, v8qi_ftype_v8qi_v8qi);
10646       def_builtin_const ("__builtin_vis_fpaddus16", CODE_FOR_usaddv4hi3,
10647 			 SPARC_BUILTIN_FPADDUS16, v4hi_ftype_v4hi_v4hi);
10648 
10649 
10650       if (TARGET_ARCH64)
10651 	{
10652 	  def_builtin_const ("__builtin_vis_fpcmple8", CODE_FOR_fpcmple8di_vis,
10653 			     SPARC_BUILTIN_FPCMPLE8, di_ftype_v8qi_v8qi);
10654 	  def_builtin_const ("__builtin_vis_fpcmpgt8", CODE_FOR_fpcmpgt8di_vis,
10655 			     SPARC_BUILTIN_FPCMPGT8, di_ftype_v8qi_v8qi);
10656 	  def_builtin_const ("__builtin_vis_fpcmpule16", CODE_FOR_fpcmpule16di_vis,
10657 			     SPARC_BUILTIN_FPCMPULE16, di_ftype_v4hi_v4hi);
10658 	  def_builtin_const ("__builtin_vis_fpcmpugt16", CODE_FOR_fpcmpugt16di_vis,
10659 			     SPARC_BUILTIN_FPCMPUGT16, di_ftype_v4hi_v4hi);
10660 	  def_builtin_const ("__builtin_vis_fpcmpule32", CODE_FOR_fpcmpule32di_vis,
10661 			     SPARC_BUILTIN_FPCMPULE32, di_ftype_v2si_v2si);
10662 	  def_builtin_const ("__builtin_vis_fpcmpugt32", CODE_FOR_fpcmpugt32di_vis,
10663 			     SPARC_BUILTIN_FPCMPUGT32, di_ftype_v2si_v2si);
10664 	}
10665       else
10666 	{
10667 	  def_builtin_const ("__builtin_vis_fpcmple8", CODE_FOR_fpcmple8si_vis,
10668 			     SPARC_BUILTIN_FPCMPLE8, si_ftype_v8qi_v8qi);
10669 	  def_builtin_const ("__builtin_vis_fpcmpgt8", CODE_FOR_fpcmpgt8si_vis,
10670 			     SPARC_BUILTIN_FPCMPGT8, si_ftype_v8qi_v8qi);
10671 	  def_builtin_const ("__builtin_vis_fpcmpule16", CODE_FOR_fpcmpule16si_vis,
10672 			     SPARC_BUILTIN_FPCMPULE16, si_ftype_v4hi_v4hi);
10673 	  def_builtin_const ("__builtin_vis_fpcmpugt16", CODE_FOR_fpcmpugt16si_vis,
10674 			     SPARC_BUILTIN_FPCMPUGT16, si_ftype_v4hi_v4hi);
10675 	  def_builtin_const ("__builtin_vis_fpcmpule32", CODE_FOR_fpcmpule32si_vis,
10676 			     SPARC_BUILTIN_FPCMPULE32, di_ftype_v2si_v2si);
10677 	  def_builtin_const ("__builtin_vis_fpcmpugt32", CODE_FOR_fpcmpugt32si_vis,
10678 			     SPARC_BUILTIN_FPCMPUGT32, di_ftype_v2si_v2si);
10679 	}
10680 
10681       def_builtin_const ("__builtin_vis_fpmax8", CODE_FOR_maxv8qi3,
10682 			 SPARC_BUILTIN_FPMAX8, v8qi_ftype_v8qi_v8qi);
10683       def_builtin_const ("__builtin_vis_fpmax16", CODE_FOR_maxv4hi3,
10684 			 SPARC_BUILTIN_FPMAX16, v4hi_ftype_v4hi_v4hi);
10685       def_builtin_const ("__builtin_vis_fpmax32", CODE_FOR_maxv2si3,
10686 			 SPARC_BUILTIN_FPMAX32, v2si_ftype_v2si_v2si);
10687       def_builtin_const ("__builtin_vis_fpmaxu8", CODE_FOR_maxuv8qi3,
10688 			 SPARC_BUILTIN_FPMAXU8, v8qi_ftype_v8qi_v8qi);
10689       def_builtin_const ("__builtin_vis_fpmaxu16", CODE_FOR_maxuv4hi3,
10690 			 SPARC_BUILTIN_FPMAXU16, v4hi_ftype_v4hi_v4hi);
10691       def_builtin_const ("__builtin_vis_fpmaxu32", CODE_FOR_maxuv2si3,
10692 			 SPARC_BUILTIN_FPMAXU32, v2si_ftype_v2si_v2si);
10693       def_builtin_const ("__builtin_vis_fpmin8", CODE_FOR_minv8qi3,
10694 			 SPARC_BUILTIN_FPMIN8, v8qi_ftype_v8qi_v8qi);
10695       def_builtin_const ("__builtin_vis_fpmin16", CODE_FOR_minv4hi3,
10696 			 SPARC_BUILTIN_FPMIN16, v4hi_ftype_v4hi_v4hi);
10697       def_builtin_const ("__builtin_vis_fpmin32", CODE_FOR_minv2si3,
10698 			 SPARC_BUILTIN_FPMIN32, v2si_ftype_v2si_v2si);
10699       def_builtin_const ("__builtin_vis_fpminu8", CODE_FOR_minuv8qi3,
10700 			 SPARC_BUILTIN_FPMINU8, v8qi_ftype_v8qi_v8qi);
10701       def_builtin_const ("__builtin_vis_fpminu16", CODE_FOR_minuv4hi3,
10702 			 SPARC_BUILTIN_FPMINU16, v4hi_ftype_v4hi_v4hi);
10703       def_builtin_const ("__builtin_vis_fpminu32", CODE_FOR_minuv2si3,
10704 			 SPARC_BUILTIN_FPMINU32, v2si_ftype_v2si_v2si);
10705       def_builtin_const ("__builtin_vis_fpsub8", CODE_FOR_subv8qi3,
10706 			 SPARC_BUILTIN_FPSUB8, v8qi_ftype_v8qi_v8qi);
10707       def_builtin_const ("__builtin_vis_fpsubs8", CODE_FOR_sssubv8qi3,
10708 			 SPARC_BUILTIN_FPSUBS8, v8qi_ftype_v8qi_v8qi);
10709       def_builtin_const ("__builtin_vis_fpsubus8", CODE_FOR_ussubv8qi3,
10710 			 SPARC_BUILTIN_FPSUBUS8, v8qi_ftype_v8qi_v8qi);
10711       def_builtin_const ("__builtin_vis_fpsubus16", CODE_FOR_ussubv4hi3,
10712 			 SPARC_BUILTIN_FPSUBUS16, v4hi_ftype_v4hi_v4hi);
10713     }
10714 }
10715 
10716 /* Implement TARGET_BUILTIN_DECL hook.  */
10717 
10718 static tree
sparc_builtin_decl(unsigned code,bool initialize_p ATTRIBUTE_UNUSED)10719 sparc_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
10720 {
10721   if (code >= SPARC_BUILTIN_MAX)
10722     return error_mark_node;
10723 
10724   return sparc_builtins[code];
10725 }
10726 
10727 /* Implemented TARGET_EXPAND_BUILTIN hook.  */
10728 
10729 static rtx
sparc_expand_builtin(tree exp,rtx target,rtx subtarget ATTRIBUTE_UNUSED,machine_mode tmode ATTRIBUTE_UNUSED,int ignore ATTRIBUTE_UNUSED)10730 sparc_expand_builtin (tree exp, rtx target,
10731 		      rtx subtarget ATTRIBUTE_UNUSED,
10732 		      machine_mode tmode ATTRIBUTE_UNUSED,
10733 		      int ignore ATTRIBUTE_UNUSED)
10734 {
10735   tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
10736   enum sparc_builtins code = (enum sparc_builtins) DECL_FUNCTION_CODE (fndecl);
10737   enum insn_code icode = sparc_builtins_icode[code];
10738   bool nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
10739   call_expr_arg_iterator iter;
10740   int arg_count = 0;
10741   rtx pat, op[4];
10742   tree arg;
10743 
10744   if (nonvoid)
10745     {
10746       machine_mode tmode = insn_data[icode].operand[0].mode;
10747       if (!target
10748 	  || GET_MODE (target) != tmode
10749 	  || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
10750 	op[0] = gen_reg_rtx (tmode);
10751       else
10752 	op[0] = target;
10753     }
10754 
10755   FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
10756     {
10757       const struct insn_operand_data *insn_op;
10758       int idx;
10759 
10760       if (arg == error_mark_node)
10761 	return NULL_RTX;
10762 
10763       arg_count++;
10764       idx = arg_count - !nonvoid;
10765       insn_op = &insn_data[icode].operand[idx];
10766       op[arg_count] = expand_normal (arg);
10767 
10768       if (code == SPARC_BUILTIN_LDFSR || code == SPARC_BUILTIN_STFSR)
10769 	{
10770 	  if (!address_operand (op[arg_count], SImode))
10771 	    {
10772 	      op[arg_count] = convert_memory_address (Pmode, op[arg_count]);
10773 	      op[arg_count] = copy_addr_to_reg (op[arg_count]);
10774 	    }
10775 	  op[arg_count] = gen_rtx_MEM (SImode, op[arg_count]);
10776 	}
10777 
10778       else if (insn_op->mode == V1DImode
10779 	       && GET_MODE (op[arg_count]) == DImode)
10780 	op[arg_count] = gen_lowpart (V1DImode, op[arg_count]);
10781 
10782       else if (insn_op->mode == V1SImode
10783 	       && GET_MODE (op[arg_count]) == SImode)
10784 	op[arg_count] = gen_lowpart (V1SImode, op[arg_count]);
10785 
10786       if (! (*insn_data[icode].operand[idx].predicate) (op[arg_count],
10787 							insn_op->mode))
10788 	op[arg_count] = copy_to_mode_reg (insn_op->mode, op[arg_count]);
10789     }
10790 
10791   switch (arg_count)
10792     {
10793     case 0:
10794       pat = GEN_FCN (icode) (op[0]);
10795       break;
10796     case 1:
10797       if (nonvoid)
10798 	pat = GEN_FCN (icode) (op[0], op[1]);
10799       else
10800 	pat = GEN_FCN (icode) (op[1]);
10801       break;
10802     case 2:
10803       pat = GEN_FCN (icode) (op[0], op[1], op[2]);
10804       break;
10805     case 3:
10806       pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
10807       break;
10808     default:
10809       gcc_unreachable ();
10810     }
10811 
10812   if (!pat)
10813     return NULL_RTX;
10814 
10815   emit_insn (pat);
10816 
10817   return (nonvoid ? op[0] : const0_rtx);
10818 }
10819 
10820 /* Return the upper 16 bits of the 8x16 multiplication.  */
10821 
10822 static int
sparc_vis_mul8x16(int e8,int e16)10823 sparc_vis_mul8x16 (int e8, int e16)
10824 {
10825   return (e8 * e16 + 128) / 256;
10826 }
10827 
10828 /* Multiply the VECTOR_CSTs CST0 and CST1 as specified by FNCODE and put
10829    the result into the array N_ELTS, whose elements are of INNER_TYPE.  */
10830 
10831 static void
sparc_handle_vis_mul8x16(tree * n_elts,enum sparc_builtins fncode,tree inner_type,tree cst0,tree cst1)10832 sparc_handle_vis_mul8x16 (tree *n_elts, enum sparc_builtins fncode,
10833 			  tree inner_type, tree cst0, tree cst1)
10834 {
10835   unsigned i, num = VECTOR_CST_NELTS (cst0);
10836   int scale;
10837 
10838   switch (fncode)
10839     {
10840     case SPARC_BUILTIN_FMUL8X16:
10841       for (i = 0; i < num; ++i)
10842 	{
10843 	  int val
10844 	    = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
10845 				 TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, i)));
10846 	  n_elts[i] = build_int_cst (inner_type, val);
10847 	}
10848       break;
10849 
10850     case SPARC_BUILTIN_FMUL8X16AU:
10851       scale = TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, 0));
10852 
10853       for (i = 0; i < num; ++i)
10854 	{
10855 	  int val
10856 	    = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
10857 				 scale);
10858 	  n_elts[i] = build_int_cst (inner_type, val);
10859 	}
10860       break;
10861 
10862     case SPARC_BUILTIN_FMUL8X16AL:
10863       scale = TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, 1));
10864 
10865       for (i = 0; i < num; ++i)
10866 	{
10867 	  int val
10868 	    = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
10869 				 scale);
10870 	  n_elts[i] = build_int_cst (inner_type, val);
10871 	}
10872       break;
10873 
10874     default:
10875       gcc_unreachable ();
10876     }
10877 }
10878 
10879 /* Implement TARGET_FOLD_BUILTIN hook.
10880 
10881    Fold builtin functions for SPARC intrinsics.  If IGNORE is true the
10882    result of the function call is ignored.  NULL_TREE is returned if the
10883    function could not be folded.  */
10884 
10885 static tree
sparc_fold_builtin(tree fndecl,int n_args ATTRIBUTE_UNUSED,tree * args,bool ignore)10886 sparc_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED,
10887 		    tree *args, bool ignore)
10888 {
10889   enum sparc_builtins code = (enum sparc_builtins) DECL_FUNCTION_CODE (fndecl);
10890   tree rtype = TREE_TYPE (TREE_TYPE (fndecl));
10891   tree arg0, arg1, arg2;
10892 
10893   if (ignore)
10894     switch (code)
10895       {
10896       case SPARC_BUILTIN_LDFSR:
10897       case SPARC_BUILTIN_STFSR:
10898       case SPARC_BUILTIN_ALIGNADDR:
10899       case SPARC_BUILTIN_WRGSR:
10900       case SPARC_BUILTIN_BMASK:
10901       case SPARC_BUILTIN_CMASK8:
10902       case SPARC_BUILTIN_CMASK16:
10903       case SPARC_BUILTIN_CMASK32:
10904 	break;
10905 
10906       default:
10907 	return build_zero_cst (rtype);
10908       }
10909 
10910   switch (code)
10911     {
10912     case SPARC_BUILTIN_FEXPAND:
10913       arg0 = args[0];
10914       STRIP_NOPS (arg0);
10915 
10916       if (TREE_CODE (arg0) == VECTOR_CST)
10917 	{
10918 	  tree inner_type = TREE_TYPE (rtype);
10919 	  tree *n_elts;
10920 	  unsigned i;
10921 
10922 	  n_elts = XALLOCAVEC (tree, VECTOR_CST_NELTS (arg0));
10923 	  for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
10924 	    n_elts[i] = build_int_cst (inner_type,
10925 				       TREE_INT_CST_LOW
10926 				         (VECTOR_CST_ELT (arg0, i)) << 4);
10927 	  return build_vector (rtype, n_elts);
10928 	}
10929       break;
10930 
10931     case SPARC_BUILTIN_FMUL8X16:
10932     case SPARC_BUILTIN_FMUL8X16AU:
10933     case SPARC_BUILTIN_FMUL8X16AL:
10934       arg0 = args[0];
10935       arg1 = args[1];
10936       STRIP_NOPS (arg0);
10937       STRIP_NOPS (arg1);
10938 
10939       if (TREE_CODE (arg0) == VECTOR_CST && TREE_CODE (arg1) == VECTOR_CST)
10940 	{
10941 	  tree inner_type = TREE_TYPE (rtype);
10942 	  tree *n_elts = XALLOCAVEC (tree, VECTOR_CST_NELTS (arg0));
10943 	  sparc_handle_vis_mul8x16 (n_elts, code, inner_type, arg0, arg1);
10944 	  return build_vector (rtype, n_elts);
10945 	}
10946       break;
10947 
10948     case SPARC_BUILTIN_FPMERGE:
10949       arg0 = args[0];
10950       arg1 = args[1];
10951       STRIP_NOPS (arg0);
10952       STRIP_NOPS (arg1);
10953 
10954       if (TREE_CODE (arg0) == VECTOR_CST && TREE_CODE (arg1) == VECTOR_CST)
10955 	{
10956 	  tree *n_elts = XALLOCAVEC (tree, 2 * VECTOR_CST_NELTS (arg0));
10957 	  unsigned i;
10958 	  for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
10959 	    {
10960 	      n_elts[2*i] = VECTOR_CST_ELT (arg0, i);
10961 	      n_elts[2*i+1] = VECTOR_CST_ELT (arg1, i);
10962 	    }
10963 
10964 	  return build_vector (rtype, n_elts);
10965 	}
10966       break;
10967 
10968     case SPARC_BUILTIN_PDIST:
10969     case SPARC_BUILTIN_PDISTN:
10970       arg0 = args[0];
10971       arg1 = args[1];
10972       STRIP_NOPS (arg0);
10973       STRIP_NOPS (arg1);
10974       if (code == SPARC_BUILTIN_PDIST)
10975 	{
10976 	  arg2 = args[2];
10977 	  STRIP_NOPS (arg2);
10978 	}
10979       else
10980 	arg2 = integer_zero_node;
10981 
10982       if (TREE_CODE (arg0) == VECTOR_CST
10983 	  && TREE_CODE (arg1) == VECTOR_CST
10984 	  && TREE_CODE (arg2) == INTEGER_CST)
10985 	{
10986 	  bool overflow = false;
10987 	  widest_int result = wi::to_widest (arg2);
10988 	  widest_int tmp;
10989 	  unsigned i;
10990 
10991 	  for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
10992 	    {
10993 	      tree e0 = VECTOR_CST_ELT (arg0, i);
10994 	      tree e1 = VECTOR_CST_ELT (arg1, i);
10995 
10996 	      bool neg1_ovf, neg2_ovf, add1_ovf, add2_ovf;
10997 
10998 	      tmp = wi::neg (wi::to_widest (e1), &neg1_ovf);
10999 	      tmp = wi::add (wi::to_widest (e0), tmp, SIGNED, &add1_ovf);
11000 	      if (wi::neg_p (tmp))
11001 		tmp = wi::neg (tmp, &neg2_ovf);
11002 	      else
11003 		neg2_ovf = false;
11004 	      result = wi::add (result, tmp, SIGNED, &add2_ovf);
11005 	      overflow |= neg1_ovf | neg2_ovf | add1_ovf | add2_ovf;
11006 	    }
11007 
11008 	  gcc_assert (!overflow);
11009 
11010 	  return wide_int_to_tree (rtype, result);
11011 	}
11012 
11013     default:
11014       break;
11015     }
11016 
11017   return NULL_TREE;
11018 }
11019 
11020 /* ??? This duplicates information provided to the compiler by the
11021    ??? scheduler description.  Some day, teach genautomata to output
11022    ??? the latencies and then CSE will just use that.  */
11023 
11024 static bool
sparc_rtx_costs(rtx x,machine_mode mode,int outer_code,int opno ATTRIBUTE_UNUSED,int * total,bool speed ATTRIBUTE_UNUSED)11025 sparc_rtx_costs (rtx x, machine_mode mode, int outer_code,
11026 		 int opno ATTRIBUTE_UNUSED,
11027 		 int *total, bool speed ATTRIBUTE_UNUSED)
11028 {
11029   int code = GET_CODE (x);
11030   bool float_mode_p = FLOAT_MODE_P (mode);
11031 
11032   switch (code)
11033     {
11034     case CONST_INT:
11035       if (SMALL_INT (x))
11036 	*total = 0;
11037       else
11038 	*total = 2;
11039       return true;
11040 
11041     case CONST_WIDE_INT:
11042       *total = 0;
11043       if (!SPARC_SIMM13_P (CONST_WIDE_INT_ELT (x, 0)))
11044 	*total += 2;
11045       if (!SPARC_SIMM13_P (CONST_WIDE_INT_ELT (x, 1)))
11046 	*total += 2;
11047       return true;
11048 
11049     case HIGH:
11050       *total = 2;
11051       return true;
11052 
11053     case CONST:
11054     case LABEL_REF:
11055     case SYMBOL_REF:
11056       *total = 4;
11057       return true;
11058 
11059     case CONST_DOUBLE:
11060       *total = 8;
11061       return true;
11062 
11063     case MEM:
11064       /* If outer-code was a sign or zero extension, a cost
11065 	 of COSTS_N_INSNS (1) was already added in.  This is
11066 	 why we are subtracting it back out.  */
11067       if (outer_code == ZERO_EXTEND)
11068 	{
11069 	  *total = sparc_costs->int_zload - COSTS_N_INSNS (1);
11070 	}
11071       else if (outer_code == SIGN_EXTEND)
11072 	{
11073 	  *total = sparc_costs->int_sload - COSTS_N_INSNS (1);
11074 	}
11075       else if (float_mode_p)
11076 	{
11077 	  *total = sparc_costs->float_load;
11078 	}
11079       else
11080 	{
11081 	  *total = sparc_costs->int_load;
11082 	}
11083 
11084       return true;
11085 
11086     case PLUS:
11087     case MINUS:
11088       if (float_mode_p)
11089 	*total = sparc_costs->float_plusminus;
11090       else
11091 	*total = COSTS_N_INSNS (1);
11092       return false;
11093 
11094     case FMA:
11095       {
11096 	rtx sub;
11097 
11098 	gcc_assert (float_mode_p);
11099 	*total = sparc_costs->float_mul;
11100 
11101 	sub = XEXP (x, 0);
11102 	if (GET_CODE (sub) == NEG)
11103 	  sub = XEXP (sub, 0);
11104 	*total += rtx_cost (sub, mode, FMA, 0, speed);
11105 
11106 	sub = XEXP (x, 2);
11107 	if (GET_CODE (sub) == NEG)
11108 	  sub = XEXP (sub, 0);
11109 	*total += rtx_cost (sub, mode, FMA, 2, speed);
11110 	return true;
11111       }
11112 
11113     case MULT:
11114       if (float_mode_p)
11115 	*total = sparc_costs->float_mul;
11116       else if (TARGET_ARCH32 && !TARGET_HARD_MUL)
11117 	*total = COSTS_N_INSNS (25);
11118       else
11119 	{
11120 	  int bit_cost;
11121 
11122 	  bit_cost = 0;
11123 	  if (sparc_costs->int_mul_bit_factor)
11124 	    {
11125 	      int nbits;
11126 
11127 	      if (GET_CODE (XEXP (x, 1)) == CONST_INT)
11128 		{
11129 		  unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
11130 		  for (nbits = 0; value != 0; value &= value - 1)
11131 		    nbits++;
11132 		}
11133 	      else
11134 		nbits = 7;
11135 
11136 	      if (nbits < 3)
11137 		nbits = 3;
11138 	      bit_cost = (nbits - 3) / sparc_costs->int_mul_bit_factor;
11139 	      bit_cost = COSTS_N_INSNS (bit_cost);
11140 	    }
11141 
11142 	  if (mode == DImode || !TARGET_HARD_MUL)
11143 	    *total = sparc_costs->int_mulX + bit_cost;
11144 	  else
11145 	    *total = sparc_costs->int_mul + bit_cost;
11146 	}
11147       return false;
11148 
11149     case ASHIFT:
11150     case ASHIFTRT:
11151     case LSHIFTRT:
11152       *total = COSTS_N_INSNS (1) + sparc_costs->shift_penalty;
11153       return false;
11154 
11155     case DIV:
11156     case UDIV:
11157     case MOD:
11158     case UMOD:
11159       if (float_mode_p)
11160 	{
11161 	  if (mode == DFmode)
11162 	    *total = sparc_costs->float_div_df;
11163 	  else
11164 	    *total = sparc_costs->float_div_sf;
11165 	}
11166       else
11167 	{
11168 	  if (mode == DImode)
11169 	    *total = sparc_costs->int_divX;
11170 	  else
11171 	    *total = sparc_costs->int_div;
11172 	}
11173       return false;
11174 
11175     case NEG:
11176       if (! float_mode_p)
11177 	{
11178 	  *total = COSTS_N_INSNS (1);
11179 	  return false;
11180 	}
11181       /* FALLTHRU */
11182 
11183     case ABS:
11184     case FLOAT:
11185     case UNSIGNED_FLOAT:
11186     case FIX:
11187     case UNSIGNED_FIX:
11188     case FLOAT_EXTEND:
11189     case FLOAT_TRUNCATE:
11190       *total = sparc_costs->float_move;
11191       return false;
11192 
11193     case SQRT:
11194       if (mode == DFmode)
11195 	*total = sparc_costs->float_sqrt_df;
11196       else
11197 	*total = sparc_costs->float_sqrt_sf;
11198       return false;
11199 
11200     case COMPARE:
11201       if (float_mode_p)
11202 	*total = sparc_costs->float_cmp;
11203       else
11204 	*total = COSTS_N_INSNS (1);
11205       return false;
11206 
11207     case IF_THEN_ELSE:
11208       if (float_mode_p)
11209 	*total = sparc_costs->float_cmove;
11210       else
11211 	*total = sparc_costs->int_cmove;
11212       return false;
11213 
11214     case IOR:
11215       /* Handle the NAND vector patterns.  */
11216       if (sparc_vector_mode_supported_p (mode)
11217 	  && GET_CODE (XEXP (x, 0)) == NOT
11218 	  && GET_CODE (XEXP (x, 1)) == NOT)
11219 	{
11220 	  *total = COSTS_N_INSNS (1);
11221 	  return true;
11222 	}
11223       else
11224         return false;
11225 
11226     default:
11227       return false;
11228     }
11229 }
11230 
11231 /* Return true if CLASS is either GENERAL_REGS or I64_REGS.  */
11232 
11233 static inline bool
general_or_i64_p(reg_class_t rclass)11234 general_or_i64_p (reg_class_t rclass)
11235 {
11236   return (rclass == GENERAL_REGS || rclass == I64_REGS);
11237 }
11238 
11239 /* Implement TARGET_REGISTER_MOVE_COST.  */
11240 
11241 static int
sparc_register_move_cost(machine_mode mode ATTRIBUTE_UNUSED,reg_class_t from,reg_class_t to)11242 sparc_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
11243 			  reg_class_t from, reg_class_t to)
11244 {
11245   bool need_memory = false;
11246 
11247   if (from == FPCC_REGS || to == FPCC_REGS)
11248     need_memory = true;
11249   else if ((FP_REG_CLASS_P (from) && general_or_i64_p (to))
11250 	   || (general_or_i64_p (from) && FP_REG_CLASS_P (to)))
11251     {
11252       if (TARGET_VIS3)
11253 	{
11254 	  int size = GET_MODE_SIZE (mode);
11255 	  if (size == 8 || size == 4)
11256 	    {
11257 	      if (! TARGET_ARCH32 || size == 4)
11258 		return 4;
11259 	      else
11260 		return 6;
11261 	    }
11262 	}
11263       need_memory = true;
11264     }
11265 
11266   if (need_memory)
11267     {
11268       if (sparc_cpu == PROCESSOR_ULTRASPARC
11269 	  || sparc_cpu == PROCESSOR_ULTRASPARC3
11270 	  || sparc_cpu == PROCESSOR_NIAGARA
11271 	  || sparc_cpu == PROCESSOR_NIAGARA2
11272 	  || sparc_cpu == PROCESSOR_NIAGARA3
11273 	  || sparc_cpu == PROCESSOR_NIAGARA4
11274 	  || sparc_cpu == PROCESSOR_NIAGARA7)
11275 	return 12;
11276 
11277       return 6;
11278     }
11279 
11280   return 2;
11281 }
11282 
11283 /* Emit the sequence of insns SEQ while preserving the registers REG and REG2.
11284    This is achieved by means of a manual dynamic stack space allocation in
11285    the current frame.  We make the assumption that SEQ doesn't contain any
11286    function calls, with the possible exception of calls to the GOT helper.  */
11287 
11288 static void
emit_and_preserve(rtx seq,rtx reg,rtx reg2)11289 emit_and_preserve (rtx seq, rtx reg, rtx reg2)
11290 {
11291   /* We must preserve the lowest 16 words for the register save area.  */
11292   HOST_WIDE_INT offset = 16*UNITS_PER_WORD;
11293   /* We really need only 2 words of fresh stack space.  */
11294   HOST_WIDE_INT size = SPARC_STACK_ALIGN (offset + 2*UNITS_PER_WORD);
11295 
11296   rtx slot
11297     = gen_rtx_MEM (word_mode, plus_constant (Pmode, stack_pointer_rtx,
11298 					     SPARC_STACK_BIAS + offset));
11299 
11300   emit_insn (gen_stack_pointer_inc (GEN_INT (-size)));
11301   emit_insn (gen_rtx_SET (slot, reg));
11302   if (reg2)
11303     emit_insn (gen_rtx_SET (adjust_address (slot, word_mode, UNITS_PER_WORD),
11304 			    reg2));
11305   emit_insn (seq);
11306   if (reg2)
11307     emit_insn (gen_rtx_SET (reg2,
11308 			    adjust_address (slot, word_mode, UNITS_PER_WORD)));
11309   emit_insn (gen_rtx_SET (reg, slot));
11310   emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
11311 }
11312 
11313 /* Output the assembler code for a thunk function.  THUNK_DECL is the
11314    declaration for the thunk function itself, FUNCTION is the decl for
11315    the target function.  DELTA is an immediate constant offset to be
11316    added to THIS.  If VCALL_OFFSET is nonzero, the word at address
11317    (*THIS + VCALL_OFFSET) should be additionally added to THIS.  */
11318 
11319 static void
sparc_output_mi_thunk(FILE * file,tree thunk_fndecl ATTRIBUTE_UNUSED,HOST_WIDE_INT delta,HOST_WIDE_INT vcall_offset,tree function)11320 sparc_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
11321 		       HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
11322 		       tree function)
11323 {
11324   rtx this_rtx, funexp;
11325   rtx_insn *insn;
11326   unsigned int int_arg_first;
11327 
11328   reload_completed = 1;
11329   epilogue_completed = 1;
11330 
11331   emit_note (NOTE_INSN_PROLOGUE_END);
11332 
11333   if (TARGET_FLAT)
11334     {
11335       sparc_leaf_function_p = 1;
11336 
11337       int_arg_first = SPARC_OUTGOING_INT_ARG_FIRST;
11338     }
11339   else if (flag_delayed_branch)
11340     {
11341       /* We will emit a regular sibcall below, so we need to instruct
11342 	 output_sibcall that we are in a leaf function.  */
11343       sparc_leaf_function_p = crtl->uses_only_leaf_regs = 1;
11344 
11345       /* This will cause final.c to invoke leaf_renumber_regs so we
11346 	 must behave as if we were in a not-yet-leafified function.  */
11347       int_arg_first = SPARC_INCOMING_INT_ARG_FIRST;
11348     }
11349   else
11350     {
11351       /* We will emit the sibcall manually below, so we will need to
11352 	 manually spill non-leaf registers.  */
11353       sparc_leaf_function_p = crtl->uses_only_leaf_regs = 0;
11354 
11355       /* We really are in a leaf function.  */
11356       int_arg_first = SPARC_OUTGOING_INT_ARG_FIRST;
11357     }
11358 
11359   /* Find the "this" pointer.  Normally in %o0, but in ARCH64 if the function
11360      returns a structure, the structure return pointer is there instead.  */
11361   if (TARGET_ARCH64
11362       && aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
11363     this_rtx = gen_rtx_REG (Pmode, int_arg_first + 1);
11364   else
11365     this_rtx = gen_rtx_REG (Pmode, int_arg_first);
11366 
11367   /* Add DELTA.  When possible use a plain add, otherwise load it into
11368      a register first.  */
11369   if (delta)
11370     {
11371       rtx delta_rtx = GEN_INT (delta);
11372 
11373       if (! SPARC_SIMM13_P (delta))
11374 	{
11375 	  rtx scratch = gen_rtx_REG (Pmode, 1);
11376 	  emit_move_insn (scratch, delta_rtx);
11377 	  delta_rtx = scratch;
11378 	}
11379 
11380       /* THIS_RTX += DELTA.  */
11381       emit_insn (gen_add2_insn (this_rtx, delta_rtx));
11382     }
11383 
11384   /* Add the word at address (*THIS_RTX + VCALL_OFFSET).  */
11385   if (vcall_offset)
11386     {
11387       rtx vcall_offset_rtx = GEN_INT (vcall_offset);
11388       rtx scratch = gen_rtx_REG (Pmode, 1);
11389 
11390       gcc_assert (vcall_offset < 0);
11391 
11392       /* SCRATCH = *THIS_RTX.  */
11393       emit_move_insn (scratch, gen_rtx_MEM (Pmode, this_rtx));
11394 
11395       /* Prepare for adding VCALL_OFFSET.  The difficulty is that we
11396 	 may not have any available scratch register at this point.  */
11397       if (SPARC_SIMM13_P (vcall_offset))
11398 	;
11399       /* This is the case if ARCH64 (unless -ffixed-g5 is passed).  */
11400       else if (! fixed_regs[5]
11401 	       /* The below sequence is made up of at least 2 insns,
11402 		  while the default method may need only one.  */
11403 	       && vcall_offset < -8192)
11404 	{
11405 	  rtx scratch2 = gen_rtx_REG (Pmode, 5);
11406 	  emit_move_insn (scratch2, vcall_offset_rtx);
11407 	  vcall_offset_rtx = scratch2;
11408 	}
11409       else
11410 	{
11411 	  rtx increment = GEN_INT (-4096);
11412 
11413 	  /* VCALL_OFFSET is a negative number whose typical range can be
11414 	     estimated as -32768..0 in 32-bit mode.  In almost all cases
11415 	     it is therefore cheaper to emit multiple add insns than
11416 	     spilling and loading the constant into a register (at least
11417 	     6 insns).  */
11418 	  while (! SPARC_SIMM13_P (vcall_offset))
11419 	    {
11420 	      emit_insn (gen_add2_insn (scratch, increment));
11421 	      vcall_offset += 4096;
11422 	    }
11423 	  vcall_offset_rtx = GEN_INT (vcall_offset); /* cannot be 0 */
11424 	}
11425 
11426       /* SCRATCH = *(*THIS_RTX + VCALL_OFFSET).  */
11427       emit_move_insn (scratch, gen_rtx_MEM (Pmode,
11428 					    gen_rtx_PLUS (Pmode,
11429 							  scratch,
11430 							  vcall_offset_rtx)));
11431 
11432       /* THIS_RTX += *(*THIS_RTX + VCALL_OFFSET).  */
11433       emit_insn (gen_add2_insn (this_rtx, scratch));
11434     }
11435 
11436   /* Generate a tail call to the target function.  */
11437   if (! TREE_USED (function))
11438     {
11439       assemble_external (function);
11440       TREE_USED (function) = 1;
11441     }
11442   funexp = XEXP (DECL_RTL (function), 0);
11443 
11444   if (flag_delayed_branch)
11445     {
11446       funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
11447       insn = emit_call_insn (gen_sibcall (funexp));
11448       SIBLING_CALL_P (insn) = 1;
11449     }
11450   else
11451     {
11452       /* The hoops we have to jump through in order to generate a sibcall
11453 	 without using delay slots...  */
11454       rtx spill_reg, seq, scratch = gen_rtx_REG (Pmode, 1);
11455 
11456       if (flag_pic)
11457         {
11458 	  spill_reg = gen_rtx_REG (word_mode, 15);  /* %o7 */
11459 	  start_sequence ();
11460 	  load_got_register ();  /* clobbers %o7 */
11461 	  scratch = sparc_legitimize_pic_address (funexp, scratch);
11462 	  seq = get_insns ();
11463 	  end_sequence ();
11464 	  emit_and_preserve (seq, spill_reg, pic_offset_table_rtx);
11465 	}
11466       else if (TARGET_ARCH32)
11467 	{
11468 	  emit_insn (gen_rtx_SET (scratch,
11469 				  gen_rtx_HIGH (SImode, funexp)));
11470 	  emit_insn (gen_rtx_SET (scratch,
11471 				  gen_rtx_LO_SUM (SImode, scratch, funexp)));
11472 	}
11473       else  /* TARGET_ARCH64 */
11474         {
11475 	  switch (sparc_cmodel)
11476 	    {
11477 	    case CM_MEDLOW:
11478 	    case CM_MEDMID:
11479 	      /* The destination can serve as a temporary.  */
11480 	      sparc_emit_set_symbolic_const64 (scratch, funexp, scratch);
11481 	      break;
11482 
11483 	    case CM_MEDANY:
11484 	    case CM_EMBMEDANY:
11485 	      /* The destination cannot serve as a temporary.  */
11486 	      spill_reg = gen_rtx_REG (DImode, 15);  /* %o7 */
11487 	      start_sequence ();
11488 	      sparc_emit_set_symbolic_const64 (scratch, funexp, spill_reg);
11489 	      seq = get_insns ();
11490 	      end_sequence ();
11491 	      emit_and_preserve (seq, spill_reg, 0);
11492 	      break;
11493 
11494 	    default:
11495 	      gcc_unreachable ();
11496 	    }
11497 	}
11498 
11499       emit_jump_insn (gen_indirect_jump (scratch));
11500     }
11501 
11502   emit_barrier ();
11503 
11504   /* Run just enough of rest_of_compilation to get the insns emitted.
11505      There's not really enough bulk here to make other passes such as
11506      instruction scheduling worth while.  Note that use_thunk calls
11507      assemble_start_function and assemble_end_function.  */
11508   insn = get_insns ();
11509   shorten_branches (insn);
11510   final_start_function (insn, file, 1);
11511   final (insn, file, 1);
11512   final_end_function ();
11513 
11514   reload_completed = 0;
11515   epilogue_completed = 0;
11516 }
11517 
11518 /* Return true if sparc_output_mi_thunk would be able to output the
11519    assembler code for the thunk function specified by the arguments
11520    it is passed, and false otherwise.  */
11521 static bool
sparc_can_output_mi_thunk(const_tree thunk_fndecl ATTRIBUTE_UNUSED,HOST_WIDE_INT delta ATTRIBUTE_UNUSED,HOST_WIDE_INT vcall_offset,const_tree function ATTRIBUTE_UNUSED)11522 sparc_can_output_mi_thunk (const_tree thunk_fndecl ATTRIBUTE_UNUSED,
11523 			   HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
11524 			   HOST_WIDE_INT vcall_offset,
11525 			   const_tree function ATTRIBUTE_UNUSED)
11526 {
11527   /* Bound the loop used in the default method above.  */
11528   return (vcall_offset >= -32768 || ! fixed_regs[5]);
11529 }
11530 
11531 /* How to allocate a 'struct machine_function'.  */
11532 
11533 static struct machine_function *
sparc_init_machine_status(void)11534 sparc_init_machine_status (void)
11535 {
11536   return ggc_cleared_alloc<machine_function> ();
11537 }
11538 
11539 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
11540    We need to emit DTP-relative relocations.  */
11541 
11542 static void
sparc_output_dwarf_dtprel(FILE * file,int size,rtx x)11543 sparc_output_dwarf_dtprel (FILE *file, int size, rtx x)
11544 {
11545   switch (size)
11546     {
11547     case 4:
11548       fputs ("\t.word\t%r_tls_dtpoff32(", file);
11549       break;
11550     case 8:
11551       fputs ("\t.xword\t%r_tls_dtpoff64(", file);
11552       break;
11553     default:
11554       gcc_unreachable ();
11555     }
11556   output_addr_const (file, x);
11557   fputs (")", file);
11558 }
11559 
11560 /* Do whatever processing is required at the end of a file.  */
11561 
11562 static void
sparc_file_end(void)11563 sparc_file_end (void)
11564 {
11565   /* If we need to emit the special GOT helper function, do so now.  */
11566   if (got_helper_rtx)
11567     {
11568       const char *name = XSTR (got_helper_rtx, 0);
11569       const char *reg_name = reg_names[GLOBAL_OFFSET_TABLE_REGNUM];
11570 #ifdef DWARF2_UNWIND_INFO
11571       bool do_cfi;
11572 #endif
11573 
11574       if (USE_HIDDEN_LINKONCE)
11575 	{
11576 	  tree decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
11577 				  get_identifier (name),
11578 				  build_function_type_list (void_type_node,
11579                                                             NULL_TREE));
11580 	  DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
11581 					   NULL_TREE, void_type_node);
11582 	  TREE_PUBLIC (decl) = 1;
11583 	  TREE_STATIC (decl) = 1;
11584 	  make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
11585 	  DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
11586 	  DECL_VISIBILITY_SPECIFIED (decl) = 1;
11587 	  resolve_unique_section (decl, 0, flag_function_sections);
11588 	  allocate_struct_function (decl, true);
11589 	  cfun->is_thunk = 1;
11590 	  current_function_decl = decl;
11591 	  init_varasm_status ();
11592 	  assemble_start_function (decl, name);
11593 	}
11594       else
11595 	{
11596 	  const int align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT);
11597           switch_to_section (text_section);
11598 	  if (align > 0)
11599 	    ASM_OUTPUT_ALIGN (asm_out_file, align);
11600 	  ASM_OUTPUT_LABEL (asm_out_file, name);
11601 	}
11602 
11603 #ifdef DWARF2_UNWIND_INFO
11604       do_cfi = dwarf2out_do_cfi_asm ();
11605       if (do_cfi)
11606 	fprintf (asm_out_file, "\t.cfi_startproc\n");
11607 #endif
11608       if (flag_delayed_branch)
11609 	fprintf (asm_out_file, "\tjmp\t%%o7+8\n\t add\t%%o7, %s, %s\n",
11610 		 reg_name, reg_name);
11611       else
11612 	fprintf (asm_out_file, "\tadd\t%%o7, %s, %s\n\tjmp\t%%o7+8\n\t nop\n",
11613 		 reg_name, reg_name);
11614 #ifdef DWARF2_UNWIND_INFO
11615       if (do_cfi)
11616 	fprintf (asm_out_file, "\t.cfi_endproc\n");
11617 #endif
11618     }
11619 
11620   if (NEED_INDICATE_EXEC_STACK)
11621     file_end_indicate_exec_stack ();
11622 
11623 #ifdef TARGET_SOLARIS
11624   solaris_file_end ();
11625 #endif
11626 }
11627 
11628 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
11629 /* Implement TARGET_MANGLE_TYPE.  */
11630 
11631 static const char *
sparc_mangle_type(const_tree type)11632 sparc_mangle_type (const_tree type)
11633 {
11634   if (!TARGET_64BIT
11635       && TYPE_MAIN_VARIANT (type) == long_double_type_node
11636       && TARGET_LONG_DOUBLE_128)
11637     return "g";
11638 
11639   /* For all other types, use normal C++ mangling.  */
11640   return NULL;
11641 }
11642 #endif
11643 
11644 /* Expand a membar instruction for various use cases.  Both the LOAD_STORE
11645    and BEFORE_AFTER arguments of the form X_Y.  They are two-bit masks where
11646    bit 0 indicates that X is true, and bit 1 indicates Y is true.  */
11647 
11648 void
sparc_emit_membar_for_model(enum memmodel model,int load_store,int before_after)11649 sparc_emit_membar_for_model (enum memmodel model,
11650 			     int load_store, int before_after)
11651 {
11652   /* Bits for the MEMBAR mmask field.  */
11653   const int LoadLoad = 1;
11654   const int StoreLoad = 2;
11655   const int LoadStore = 4;
11656   const int StoreStore = 8;
11657 
11658   int mm = 0, implied = 0;
11659 
11660   switch (sparc_memory_model)
11661     {
11662     case SMM_SC:
11663       /* Sequential Consistency.  All memory transactions are immediately
11664 	 visible in sequential execution order.  No barriers needed.  */
11665       implied = LoadLoad | StoreLoad | LoadStore | StoreStore;
11666       break;
11667 
11668     case SMM_TSO:
11669       /* Total Store Ordering: all memory transactions with store semantics
11670 	 are followed by an implied StoreStore.  */
11671       implied |= StoreStore;
11672 
11673       /* If we're not looking for a raw barrer (before+after), then atomic
11674 	 operations get the benefit of being both load and store.  */
11675       if (load_store == 3 && before_after == 1)
11676 	implied |= StoreLoad;
11677       /* FALLTHRU */
11678 
11679     case SMM_PSO:
11680       /* Partial Store Ordering: all memory transactions with load semantics
11681 	 are followed by an implied LoadLoad | LoadStore.  */
11682       implied |= LoadLoad | LoadStore;
11683 
11684       /* If we're not looking for a raw barrer (before+after), then atomic
11685 	 operations get the benefit of being both load and store.  */
11686       if (load_store == 3 && before_after == 2)
11687 	implied |= StoreLoad | StoreStore;
11688       /* FALLTHRU */
11689 
11690     case SMM_RMO:
11691       /* Relaxed Memory Ordering: no implicit bits.  */
11692       break;
11693 
11694     default:
11695       gcc_unreachable ();
11696     }
11697 
11698   if (before_after & 1)
11699     {
11700       if (is_mm_release (model) || is_mm_acq_rel (model)
11701 	  || is_mm_seq_cst (model))
11702 	{
11703 	  if (load_store & 1)
11704 	    mm |= LoadLoad | StoreLoad;
11705 	  if (load_store & 2)
11706 	    mm |= LoadStore | StoreStore;
11707 	}
11708     }
11709   if (before_after & 2)
11710     {
11711       if (is_mm_acquire (model) || is_mm_acq_rel (model)
11712 	  || is_mm_seq_cst (model))
11713 	{
11714 	  if (load_store & 1)
11715 	    mm |= LoadLoad | LoadStore;
11716 	  if (load_store & 2)
11717 	    mm |= StoreLoad | StoreStore;
11718 	}
11719     }
11720 
11721   /* Remove the bits implied by the system memory model.  */
11722   mm &= ~implied;
11723 
11724   /* For raw barriers (before+after), always emit a barrier.
11725      This will become a compile-time barrier if needed.  */
11726   if (mm || before_after == 3)
11727     emit_insn (gen_membar (GEN_INT (mm)));
11728 }
11729 
11730 /* Expand code to perform a 8 or 16-bit compare and swap by doing 32-bit
11731    compare and swap on the word containing the byte or half-word.  */
11732 
11733 static void
sparc_expand_compare_and_swap_12(rtx bool_result,rtx result,rtx mem,rtx oldval,rtx newval)11734 sparc_expand_compare_and_swap_12 (rtx bool_result, rtx result, rtx mem,
11735 				  rtx oldval, rtx newval)
11736 {
11737   rtx addr1 = force_reg (Pmode, XEXP (mem, 0));
11738   rtx addr = gen_reg_rtx (Pmode);
11739   rtx off = gen_reg_rtx (SImode);
11740   rtx oldv = gen_reg_rtx (SImode);
11741   rtx newv = gen_reg_rtx (SImode);
11742   rtx oldvalue = gen_reg_rtx (SImode);
11743   rtx newvalue = gen_reg_rtx (SImode);
11744   rtx res = gen_reg_rtx (SImode);
11745   rtx resv = gen_reg_rtx (SImode);
11746   rtx memsi, val, mask, cc;
11747 
11748   emit_insn (gen_rtx_SET (addr, gen_rtx_AND (Pmode, addr1, GEN_INT (-4))));
11749 
11750   if (Pmode != SImode)
11751     addr1 = gen_lowpart (SImode, addr1);
11752   emit_insn (gen_rtx_SET (off, gen_rtx_AND (SImode, addr1, GEN_INT (3))));
11753 
11754   memsi = gen_rtx_MEM (SImode, addr);
11755   set_mem_alias_set (memsi, ALIAS_SET_MEMORY_BARRIER);
11756   MEM_VOLATILE_P (memsi) = MEM_VOLATILE_P (mem);
11757 
11758   val = copy_to_reg (memsi);
11759 
11760   emit_insn (gen_rtx_SET (off,
11761 			  gen_rtx_XOR (SImode, off,
11762 				       GEN_INT (GET_MODE (mem) == QImode
11763 						? 3 : 2))));
11764 
11765   emit_insn (gen_rtx_SET (off, gen_rtx_ASHIFT (SImode, off, GEN_INT (3))));
11766 
11767   if (GET_MODE (mem) == QImode)
11768     mask = force_reg (SImode, GEN_INT (0xff));
11769   else
11770     mask = force_reg (SImode, GEN_INT (0xffff));
11771 
11772   emit_insn (gen_rtx_SET (mask, gen_rtx_ASHIFT (SImode, mask, off)));
11773 
11774   emit_insn (gen_rtx_SET (val,
11775 			  gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask),
11776 				       val)));
11777 
11778   oldval = gen_lowpart (SImode, oldval);
11779   emit_insn (gen_rtx_SET (oldv, gen_rtx_ASHIFT (SImode, oldval, off)));
11780 
11781   newval = gen_lowpart_common (SImode, newval);
11782   emit_insn (gen_rtx_SET (newv, gen_rtx_ASHIFT (SImode, newval, off)));
11783 
11784   emit_insn (gen_rtx_SET (oldv, gen_rtx_AND (SImode, oldv, mask)));
11785 
11786   emit_insn (gen_rtx_SET (newv, gen_rtx_AND (SImode, newv, mask)));
11787 
11788   rtx_code_label *end_label = gen_label_rtx ();
11789   rtx_code_label *loop_label = gen_label_rtx ();
11790   emit_label (loop_label);
11791 
11792   emit_insn (gen_rtx_SET (oldvalue, gen_rtx_IOR (SImode, oldv, val)));
11793 
11794   emit_insn (gen_rtx_SET (newvalue, gen_rtx_IOR (SImode, newv, val)));
11795 
11796   emit_move_insn (bool_result, const1_rtx);
11797 
11798   emit_insn (gen_atomic_compare_and_swapsi_1 (res, memsi, oldvalue, newvalue));
11799 
11800   emit_cmp_and_jump_insns (res, oldvalue, EQ, NULL, SImode, 0, end_label);
11801 
11802   emit_insn (gen_rtx_SET (resv,
11803 			  gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask),
11804 				       res)));
11805 
11806   emit_move_insn (bool_result, const0_rtx);
11807 
11808   cc = gen_compare_reg_1 (NE, resv, val);
11809   emit_insn (gen_rtx_SET (val, resv));
11810 
11811   /* Use cbranchcc4 to separate the compare and branch!  */
11812   emit_jump_insn (gen_cbranchcc4 (gen_rtx_NE (VOIDmode, cc, const0_rtx),
11813 				  cc, const0_rtx, loop_label));
11814 
11815   emit_label (end_label);
11816 
11817   emit_insn (gen_rtx_SET (res, gen_rtx_AND (SImode, res, mask)));
11818 
11819   emit_insn (gen_rtx_SET (res, gen_rtx_LSHIFTRT (SImode, res, off)));
11820 
11821   emit_move_insn (result, gen_lowpart (GET_MODE (result), res));
11822 }
11823 
11824 /* Expand code to perform a compare-and-swap.  */
11825 
11826 void
sparc_expand_compare_and_swap(rtx operands[])11827 sparc_expand_compare_and_swap (rtx operands[])
11828 {
11829   rtx bval, retval, mem, oldval, newval;
11830   machine_mode mode;
11831   enum memmodel model;
11832 
11833   bval = operands[0];
11834   retval = operands[1];
11835   mem = operands[2];
11836   oldval = operands[3];
11837   newval = operands[4];
11838   model = (enum memmodel) INTVAL (operands[6]);
11839   mode = GET_MODE (mem);
11840 
11841   sparc_emit_membar_for_model (model, 3, 1);
11842 
11843   if (reg_overlap_mentioned_p (retval, oldval))
11844     oldval = copy_to_reg (oldval);
11845 
11846   if (mode == QImode || mode == HImode)
11847     sparc_expand_compare_and_swap_12 (bval, retval, mem, oldval, newval);
11848   else
11849     {
11850       rtx (*gen) (rtx, rtx, rtx, rtx);
11851       rtx x;
11852 
11853       if (mode == SImode)
11854 	gen = gen_atomic_compare_and_swapsi_1;
11855       else
11856 	gen = gen_atomic_compare_and_swapdi_1;
11857       emit_insn (gen (retval, mem, oldval, newval));
11858 
11859       x = emit_store_flag (bval, EQ, retval, oldval, mode, 1, 1);
11860       if (x != bval)
11861 	convert_move (bval, x, 1);
11862     }
11863 
11864   sparc_emit_membar_for_model (model, 3, 2);
11865 }
11866 
11867 void
sparc_expand_vec_perm_bmask(machine_mode vmode,rtx sel)11868 sparc_expand_vec_perm_bmask (machine_mode vmode, rtx sel)
11869 {
11870   rtx t_1, t_2, t_3;
11871 
11872   sel = gen_lowpart (DImode, sel);
11873   switch (vmode)
11874     {
11875     case V2SImode:
11876       /* inp = xxxxxxxAxxxxxxxB */
11877       t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
11878 				 NULL_RTX, 1, OPTAB_DIRECT);
11879       /* t_1 = ....xxxxxxxAxxx. */
11880       sel = expand_simple_binop (SImode, AND, gen_lowpart (SImode, sel),
11881 				 GEN_INT (3), NULL_RTX, 1, OPTAB_DIRECT);
11882       t_1 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_1),
11883 				 GEN_INT (0x30000), NULL_RTX, 1, OPTAB_DIRECT);
11884       /* sel = .......B */
11885       /* t_1 = ...A.... */
11886       sel = expand_simple_binop (SImode, IOR, sel, t_1, sel, 1, OPTAB_DIRECT);
11887       /* sel = ...A...B */
11888       sel = expand_mult (SImode, sel, GEN_INT (0x4444), sel, 1);
11889       /* sel = AAAABBBB * 4 */
11890       t_1 = force_reg (SImode, GEN_INT (0x01230123));
11891       /* sel = { A*4, A*4+1, A*4+2, ... } */
11892       break;
11893 
11894     case V4HImode:
11895       /* inp = xxxAxxxBxxxCxxxD */
11896       t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (8),
11897 				 NULL_RTX, 1, OPTAB_DIRECT);
11898       t_2 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
11899 				 NULL_RTX, 1, OPTAB_DIRECT);
11900       t_3 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (24),
11901 				 NULL_RTX, 1, OPTAB_DIRECT);
11902       /* t_1 = ..xxxAxxxBxxxCxx */
11903       /* t_2 = ....xxxAxxxBxxxC */
11904       /* t_3 = ......xxxAxxxBxx */
11905       sel = expand_simple_binop (SImode, AND, gen_lowpart (SImode, sel),
11906 				 GEN_INT (0x07),
11907 				 NULL_RTX, 1, OPTAB_DIRECT);
11908       t_1 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_1),
11909 				 GEN_INT (0x0700),
11910 				 NULL_RTX, 1, OPTAB_DIRECT);
11911       t_2 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_2),
11912 				 GEN_INT (0x070000),
11913 				 NULL_RTX, 1, OPTAB_DIRECT);
11914       t_3 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_3),
11915 				 GEN_INT (0x07000000),
11916 				 NULL_RTX, 1, OPTAB_DIRECT);
11917       /* sel = .......D */
11918       /* t_1 = .....C.. */
11919       /* t_2 = ...B.... */
11920       /* t_3 = .A...... */
11921       sel = expand_simple_binop (SImode, IOR, sel, t_1, sel, 1, OPTAB_DIRECT);
11922       t_2 = expand_simple_binop (SImode, IOR, t_2, t_3, t_2, 1, OPTAB_DIRECT);
11923       sel = expand_simple_binop (SImode, IOR, sel, t_2, sel, 1, OPTAB_DIRECT);
11924       /* sel = .A.B.C.D */
11925       sel = expand_mult (SImode, sel, GEN_INT (0x22), sel, 1);
11926       /* sel = AABBCCDD * 2 */
11927       t_1 = force_reg (SImode, GEN_INT (0x01010101));
11928       /* sel = { A*2, A*2+1, B*2, B*2+1, ... } */
11929       break;
11930 
11931     case V8QImode:
11932       /* input = xAxBxCxDxExFxGxH */
11933       sel = expand_simple_binop (DImode, AND, sel,
11934 				 GEN_INT ((HOST_WIDE_INT)0x0f0f0f0f << 32
11935 					  | 0x0f0f0f0f),
11936 				 NULL_RTX, 1, OPTAB_DIRECT);
11937       /* sel = .A.B.C.D.E.F.G.H */
11938       t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (4),
11939 				 NULL_RTX, 1, OPTAB_DIRECT);
11940       /* t_1 = ..A.B.C.D.E.F.G. */
11941       sel = expand_simple_binop (DImode, IOR, sel, t_1,
11942 				 NULL_RTX, 1, OPTAB_DIRECT);
11943       /* sel = .AABBCCDDEEFFGGH */
11944       sel = expand_simple_binop (DImode, AND, sel,
11945 				 GEN_INT ((HOST_WIDE_INT)0xff00ff << 32
11946 					  | 0xff00ff),
11947 				 NULL_RTX, 1, OPTAB_DIRECT);
11948       /* sel = ..AB..CD..EF..GH */
11949       t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (8),
11950 				 NULL_RTX, 1, OPTAB_DIRECT);
11951       /* t_1 = ....AB..CD..EF.. */
11952       sel = expand_simple_binop (DImode, IOR, sel, t_1,
11953 				 NULL_RTX, 1, OPTAB_DIRECT);
11954       /* sel = ..ABABCDCDEFEFGH */
11955       sel = expand_simple_binop (DImode, AND, sel,
11956 				 GEN_INT ((HOST_WIDE_INT)0xffff << 32 | 0xffff),
11957 				 NULL_RTX, 1, OPTAB_DIRECT);
11958       /* sel = ....ABCD....EFGH */
11959       t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
11960 				 NULL_RTX, 1, OPTAB_DIRECT);
11961       /* t_1 = ........ABCD.... */
11962       sel = gen_lowpart (SImode, sel);
11963       t_1 = gen_lowpart (SImode, t_1);
11964       break;
11965 
11966     default:
11967       gcc_unreachable ();
11968     }
11969 
11970   /* Always perform the final addition/merge within the bmask insn.  */
11971   emit_insn (gen_bmasksi_vis (gen_reg_rtx (SImode), sel, t_1));
11972 }
11973 
11974 /* Implement TARGET_FRAME_POINTER_REQUIRED.  */
11975 
11976 static bool
sparc_frame_pointer_required(void)11977 sparc_frame_pointer_required (void)
11978 {
11979   /* If the stack pointer is dynamically modified in the function, it cannot
11980      serve as the frame pointer.  */
11981   if (cfun->calls_alloca)
11982     return true;
11983 
11984   /* If the function receives nonlocal gotos, it needs to save the frame
11985      pointer in the nonlocal_goto_save_area object.  */
11986   if (cfun->has_nonlocal_label)
11987     return true;
11988 
11989   /* In flat mode, that's it.  */
11990   if (TARGET_FLAT)
11991     return false;
11992 
11993   /* Otherwise, the frame pointer is required if the function isn't leaf, but
11994      we cannot use sparc_leaf_function_p since it hasn't been computed yet.  */
11995   return !(optimize > 0 && crtl->is_leaf && only_leaf_regs_used ());
11996 }
11997 
11998 /* The way this is structured, we can't eliminate SFP in favor of SP
11999    if the frame pointer is required: we want to use the SFP->HFP elimination
12000    in that case.  But the test in update_eliminables doesn't know we are
12001    assuming below that we only do the former elimination.  */
12002 
12003 static bool
sparc_can_eliminate(const int from ATTRIBUTE_UNUSED,const int to)12004 sparc_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
12005 {
12006   return to == HARD_FRAME_POINTER_REGNUM || !sparc_frame_pointer_required ();
12007 }
12008 
12009 /* Return the hard frame pointer directly to bypass the stack bias.  */
12010 
12011 static rtx
sparc_builtin_setjmp_frame_value(void)12012 sparc_builtin_setjmp_frame_value (void)
12013 {
12014   return hard_frame_pointer_rtx;
12015 }
12016 
12017 /* If !TARGET_FPU, then make the fp registers and fp cc regs fixed so that
12018    they won't be allocated.  */
12019 
12020 static void
sparc_conditional_register_usage(void)12021 sparc_conditional_register_usage (void)
12022 {
12023   if (PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
12024     {
12025       fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
12026       call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
12027     }
12028   /* If the user has passed -f{fixed,call-{used,saved}}-g5 */
12029   /* then honor it.  */
12030   if (TARGET_ARCH32 && fixed_regs[5])
12031     fixed_regs[5] = 1;
12032   else if (TARGET_ARCH64 && fixed_regs[5] == 2)
12033     fixed_regs[5] = 0;
12034   if (! TARGET_V9)
12035     {
12036       int regno;
12037       for (regno = SPARC_FIRST_V9_FP_REG;
12038 	   regno <= SPARC_LAST_V9_FP_REG;
12039 	   regno++)
12040 	fixed_regs[regno] = 1;
12041       /* %fcc0 is used by v8 and v9.  */
12042       for (regno = SPARC_FIRST_V9_FCC_REG + 1;
12043 	   regno <= SPARC_LAST_V9_FCC_REG;
12044 	   regno++)
12045 	fixed_regs[regno] = 1;
12046     }
12047   if (! TARGET_FPU)
12048     {
12049       int regno;
12050       for (regno = 32; regno < SPARC_LAST_V9_FCC_REG; regno++)
12051 	fixed_regs[regno] = 1;
12052     }
12053   /* If the user has passed -f{fixed,call-{used,saved}}-g2 */
12054   /* then honor it.  Likewise with g3 and g4.  */
12055   if (fixed_regs[2] == 2)
12056     fixed_regs[2] = ! TARGET_APP_REGS;
12057   if (fixed_regs[3] == 2)
12058     fixed_regs[3] = ! TARGET_APP_REGS;
12059   if (TARGET_ARCH32 && fixed_regs[4] == 2)
12060     fixed_regs[4] = ! TARGET_APP_REGS;
12061   else if (TARGET_CM_EMBMEDANY)
12062     fixed_regs[4] = 1;
12063   else if (fixed_regs[4] == 2)
12064     fixed_regs[4] = 0;
12065   if (TARGET_FLAT)
12066     {
12067       int regno;
12068       /* Disable leaf functions.  */
12069       memset (sparc_leaf_regs, 0, FIRST_PSEUDO_REGISTER);
12070       for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
12071 	leaf_reg_remap [regno] = regno;
12072     }
12073   if (TARGET_VIS)
12074     global_regs[SPARC_GSR_REG] = 1;
12075 }
12076 
12077 /* Implement TARGET_PREFERRED_RELOAD_CLASS:
12078 
12079    - We can't load constants into FP registers.
12080    - We can't load FP constants into integer registers when soft-float,
12081      because there is no soft-float pattern with a r/F constraint.
12082    - We can't load FP constants into integer registers for TFmode unless
12083      it is 0.0L, because there is no movtf pattern with a r/F constraint.
12084    - Try and reload integer constants (symbolic or otherwise) back into
12085      registers directly, rather than having them dumped to memory.  */
12086 
12087 static reg_class_t
sparc_preferred_reload_class(rtx x,reg_class_t rclass)12088 sparc_preferred_reload_class (rtx x, reg_class_t rclass)
12089 {
12090   machine_mode mode = GET_MODE (x);
12091   if (CONSTANT_P (x))
12092     {
12093       if (FP_REG_CLASS_P (rclass)
12094 	  || rclass == GENERAL_OR_FP_REGS
12095 	  || rclass == GENERAL_OR_EXTRA_FP_REGS
12096 	  || (GET_MODE_CLASS (mode) == MODE_FLOAT && ! TARGET_FPU)
12097 	  || (mode == TFmode && ! const_zero_operand (x, mode)))
12098 	return NO_REGS;
12099 
12100       if (GET_MODE_CLASS (mode) == MODE_INT)
12101 	return GENERAL_REGS;
12102 
12103       if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
12104 	{
12105 	  if (! FP_REG_CLASS_P (rclass)
12106 	      || !(const_zero_operand (x, mode)
12107 		   || const_all_ones_operand (x, mode)))
12108 	    return NO_REGS;
12109 	}
12110     }
12111 
12112   if (TARGET_VIS3
12113       && ! TARGET_ARCH64
12114       && (rclass == EXTRA_FP_REGS
12115 	  || rclass == GENERAL_OR_EXTRA_FP_REGS))
12116     {
12117       int regno = true_regnum (x);
12118 
12119       if (SPARC_INT_REG_P (regno))
12120 	return (rclass == EXTRA_FP_REGS
12121 		? FP_REGS : GENERAL_OR_FP_REGS);
12122     }
12123 
12124   return rclass;
12125 }
12126 
12127 /* Output a wide multiply instruction in V8+ mode.  INSN is the instruction,
12128    OPERANDS are its operands and OPCODE is the mnemonic to be used.  */
12129 
12130 const char *
output_v8plus_mult(rtx_insn * insn,rtx * operands,const char * opcode)12131 output_v8plus_mult (rtx_insn *insn, rtx *operands, const char *opcode)
12132 {
12133   char mulstr[32];
12134 
12135   gcc_assert (! TARGET_ARCH64);
12136 
12137   if (sparc_check_64 (operands[1], insn) <= 0)
12138     output_asm_insn ("srl\t%L1, 0, %L1", operands);
12139   if (which_alternative == 1)
12140     output_asm_insn ("sllx\t%H1, 32, %H1", operands);
12141   if (GET_CODE (operands[2]) == CONST_INT)
12142     {
12143       if (which_alternative == 1)
12144 	{
12145 	  output_asm_insn ("or\t%L1, %H1, %H1", operands);
12146 	  sprintf (mulstr, "%s\t%%H1, %%2, %%L0", opcode);
12147 	  output_asm_insn (mulstr, operands);
12148 	  return "srlx\t%L0, 32, %H0";
12149 	}
12150       else
12151 	{
12152 	  output_asm_insn ("sllx\t%H1, 32, %3", operands);
12153           output_asm_insn ("or\t%L1, %3, %3", operands);
12154           sprintf (mulstr, "%s\t%%3, %%2, %%3", opcode);
12155 	  output_asm_insn (mulstr, operands);
12156 	  output_asm_insn ("srlx\t%3, 32, %H0", operands);
12157           return "mov\t%3, %L0";
12158 	}
12159     }
12160   else if (rtx_equal_p (operands[1], operands[2]))
12161     {
12162       if (which_alternative == 1)
12163 	{
12164 	  output_asm_insn ("or\t%L1, %H1, %H1", operands);
12165           sprintf (mulstr, "%s\t%%H1, %%H1, %%L0", opcode);
12166 	  output_asm_insn (mulstr, operands);
12167 	  return "srlx\t%L0, 32, %H0";
12168 	}
12169       else
12170 	{
12171 	  output_asm_insn ("sllx\t%H1, 32, %3", operands);
12172           output_asm_insn ("or\t%L1, %3, %3", operands);
12173 	  sprintf (mulstr, "%s\t%%3, %%3, %%3", opcode);
12174 	  output_asm_insn (mulstr, operands);
12175 	  output_asm_insn ("srlx\t%3, 32, %H0", operands);
12176           return "mov\t%3, %L0";
12177 	}
12178     }
12179   if (sparc_check_64 (operands[2], insn) <= 0)
12180     output_asm_insn ("srl\t%L2, 0, %L2", operands);
12181   if (which_alternative == 1)
12182     {
12183       output_asm_insn ("or\t%L1, %H1, %H1", operands);
12184       output_asm_insn ("sllx\t%H2, 32, %L1", operands);
12185       output_asm_insn ("or\t%L2, %L1, %L1", operands);
12186       sprintf (mulstr, "%s\t%%H1, %%L1, %%L0", opcode);
12187       output_asm_insn (mulstr, operands);
12188       return "srlx\t%L0, 32, %H0";
12189     }
12190   else
12191     {
12192       output_asm_insn ("sllx\t%H1, 32, %3", operands);
12193       output_asm_insn ("sllx\t%H2, 32, %4", operands);
12194       output_asm_insn ("or\t%L1, %3, %3", operands);
12195       output_asm_insn ("or\t%L2, %4, %4", operands);
12196       sprintf (mulstr, "%s\t%%3, %%4, %%3", opcode);
12197       output_asm_insn (mulstr, operands);
12198       output_asm_insn ("srlx\t%3, 32, %H0", operands);
12199       return "mov\t%3, %L0";
12200     }
12201 }
12202 
12203 /* Subroutine of sparc_expand_vector_init.  Emit code to initialize
12204    all fields of TARGET to ELT by means of VIS2 BSHUFFLE insn.  MODE
12205    and INNER_MODE are the modes describing TARGET.  */
12206 
12207 static void
vector_init_bshuffle(rtx target,rtx elt,machine_mode mode,machine_mode inner_mode)12208 vector_init_bshuffle (rtx target, rtx elt, machine_mode mode,
12209 		      machine_mode inner_mode)
12210 {
12211   rtx t1, final_insn, sel;
12212   int bmask;
12213 
12214   t1 = gen_reg_rtx (mode);
12215 
12216   elt = convert_modes (SImode, inner_mode, elt, true);
12217   emit_move_insn (gen_lowpart(SImode, t1), elt);
12218 
12219   switch (mode)
12220     {
12221     case V2SImode:
12222       final_insn = gen_bshufflev2si_vis (target, t1, t1);
12223       bmask = 0x45674567;
12224       break;
12225     case V4HImode:
12226       final_insn = gen_bshufflev4hi_vis (target, t1, t1);
12227       bmask = 0x67676767;
12228       break;
12229     case V8QImode:
12230       final_insn = gen_bshufflev8qi_vis (target, t1, t1);
12231       bmask = 0x77777777;
12232       break;
12233     default:
12234       gcc_unreachable ();
12235     }
12236 
12237   sel = force_reg (SImode, GEN_INT (bmask));
12238   emit_insn (gen_bmasksi_vis (gen_reg_rtx (SImode), sel, const0_rtx));
12239   emit_insn (final_insn);
12240 }
12241 
12242 /* Subroutine of sparc_expand_vector_init.  Emit code to initialize
12243    all fields of TARGET to ELT in V8QI by means of VIS FPMERGE insn.  */
12244 
12245 static void
vector_init_fpmerge(rtx target,rtx elt)12246 vector_init_fpmerge (rtx target, rtx elt)
12247 {
12248   rtx t1, t2, t2_low, t3, t3_low;
12249 
12250   t1 = gen_reg_rtx (V4QImode);
12251   elt = convert_modes (SImode, QImode, elt, true);
12252   emit_move_insn (gen_lowpart (SImode, t1), elt);
12253 
12254   t2 = gen_reg_rtx (V8QImode);
12255   t2_low = gen_lowpart (V4QImode, t2);
12256   emit_insn (gen_fpmerge_vis (t2, t1, t1));
12257 
12258   t3 = gen_reg_rtx (V8QImode);
12259   t3_low = gen_lowpart (V4QImode, t3);
12260   emit_insn (gen_fpmerge_vis (t3, t2_low, t2_low));
12261 
12262   emit_insn (gen_fpmerge_vis (target, t3_low, t3_low));
12263 }
12264 
12265 /* Subroutine of sparc_expand_vector_init.  Emit code to initialize
12266    all fields of TARGET to ELT in V4HI by means of VIS FALIGNDATA insn.  */
12267 
12268 static void
vector_init_faligndata(rtx target,rtx elt)12269 vector_init_faligndata (rtx target, rtx elt)
12270 {
12271   rtx t1 = gen_reg_rtx (V4HImode);
12272   int i;
12273 
12274   elt = convert_modes (SImode, HImode, elt, true);
12275   emit_move_insn (gen_lowpart (SImode, t1), elt);
12276 
12277   emit_insn (gen_alignaddrsi_vis (gen_reg_rtx (SImode),
12278 				  force_reg (SImode, GEN_INT (6)),
12279 				  const0_rtx));
12280 
12281   for (i = 0; i < 4; i++)
12282     emit_insn (gen_faligndatav4hi_vis (target, t1, target));
12283 }
12284 
12285 /* Emit code to initialize TARGET to values for individual fields VALS.  */
12286 
12287 void
sparc_expand_vector_init(rtx target,rtx vals)12288 sparc_expand_vector_init (rtx target, rtx vals)
12289 {
12290   const machine_mode mode = GET_MODE (target);
12291   const machine_mode inner_mode = GET_MODE_INNER (mode);
12292   const int n_elts = GET_MODE_NUNITS (mode);
12293   int i, n_var = 0;
12294   bool all_same = true;
12295   rtx mem;
12296 
12297   for (i = 0; i < n_elts; i++)
12298     {
12299       rtx x = XVECEXP (vals, 0, i);
12300       if (!(CONST_SCALAR_INT_P (x) || CONST_DOUBLE_P (x) || CONST_FIXED_P (x)))
12301 	n_var++;
12302 
12303       if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12304 	all_same = false;
12305     }
12306 
12307   if (n_var == 0)
12308     {
12309       emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
12310       return;
12311     }
12312 
12313   if (GET_MODE_SIZE (inner_mode) == GET_MODE_SIZE (mode))
12314     {
12315       if (GET_MODE_SIZE (inner_mode) == 4)
12316 	{
12317 	  emit_move_insn (gen_lowpart (SImode, target),
12318 			  gen_lowpart (SImode, XVECEXP (vals, 0, 0)));
12319 	  return;
12320 	}
12321       else if (GET_MODE_SIZE (inner_mode) == 8)
12322 	{
12323 	  emit_move_insn (gen_lowpart (DImode, target),
12324 			  gen_lowpart (DImode, XVECEXP (vals, 0, 0)));
12325 	  return;
12326 	}
12327     }
12328   else if (GET_MODE_SIZE (inner_mode) == GET_MODE_SIZE (word_mode)
12329 	   && GET_MODE_SIZE (mode) == 2 * GET_MODE_SIZE (word_mode))
12330     {
12331       emit_move_insn (gen_highpart (word_mode, target),
12332 		      gen_lowpart (word_mode, XVECEXP (vals, 0, 0)));
12333       emit_move_insn (gen_lowpart (word_mode, target),
12334 		      gen_lowpart (word_mode, XVECEXP (vals, 0, 1)));
12335       return;
12336     }
12337 
12338   if (all_same && GET_MODE_SIZE (mode) == 8)
12339     {
12340       if (TARGET_VIS2)
12341 	{
12342 	  vector_init_bshuffle (target, XVECEXP (vals, 0, 0), mode, inner_mode);
12343 	  return;
12344 	}
12345       if (mode == V8QImode)
12346 	{
12347 	  vector_init_fpmerge (target, XVECEXP (vals, 0, 0));
12348 	  return;
12349 	}
12350       if (mode == V4HImode)
12351 	{
12352 	  vector_init_faligndata (target, XVECEXP (vals, 0, 0));
12353 	  return;
12354 	}
12355     }
12356 
12357   mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
12358   for (i = 0; i < n_elts; i++)
12359     emit_move_insn (adjust_address_nv (mem, inner_mode,
12360 				       i * GET_MODE_SIZE (inner_mode)),
12361 		    XVECEXP (vals, 0, i));
12362   emit_move_insn (target, mem);
12363 }
12364 
12365 /* Implement TARGET_SECONDARY_RELOAD.  */
12366 
12367 static reg_class_t
sparc_secondary_reload(bool in_p,rtx x,reg_class_t rclass_i,machine_mode mode,secondary_reload_info * sri)12368 sparc_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
12369 			machine_mode mode, secondary_reload_info *sri)
12370 {
12371   enum reg_class rclass = (enum reg_class) rclass_i;
12372 
12373   sri->icode = CODE_FOR_nothing;
12374   sri->extra_cost = 0;
12375 
12376   /* We need a temporary when loading/storing a HImode/QImode value
12377      between memory and the FPU registers.  This can happen when combine puts
12378      a paradoxical subreg in a float/fix conversion insn.  */
12379   if (FP_REG_CLASS_P (rclass)
12380       && (mode == HImode || mode == QImode)
12381       && (GET_CODE (x) == MEM
12382 	  || ((GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
12383 	      && true_regnum (x) == -1)))
12384     return GENERAL_REGS;
12385 
12386   /* On 32-bit we need a temporary when loading/storing a DFmode value
12387      between unaligned memory and the upper FPU registers.  */
12388   if (TARGET_ARCH32
12389       && rclass == EXTRA_FP_REGS
12390       && mode == DFmode
12391       && GET_CODE (x) == MEM
12392       && ! mem_min_alignment (x, 8))
12393     return FP_REGS;
12394 
12395   if (((TARGET_CM_MEDANY
12396 	&& symbolic_operand (x, mode))
12397        || (TARGET_CM_EMBMEDANY
12398 	   && text_segment_operand (x, mode)))
12399       && ! flag_pic)
12400     {
12401       if (in_p)
12402 	sri->icode = direct_optab_handler (reload_in_optab, mode);
12403       else
12404 	sri->icode = direct_optab_handler (reload_out_optab, mode);
12405       return NO_REGS;
12406     }
12407 
12408   if (TARGET_VIS3 && TARGET_ARCH32)
12409     {
12410       int regno = true_regnum (x);
12411 
12412       /* When using VIS3 fp<-->int register moves, on 32-bit we have
12413 	 to move 8-byte values in 4-byte pieces.  This only works via
12414 	 FP_REGS, and not via EXTRA_FP_REGS.  Therefore if we try to
12415 	 move between EXTRA_FP_REGS and GENERAL_REGS, we will need
12416 	 an FP_REGS intermediate move.  */
12417       if ((rclass == EXTRA_FP_REGS && SPARC_INT_REG_P (regno))
12418 	  || ((general_or_i64_p (rclass)
12419 	       || rclass == GENERAL_OR_FP_REGS)
12420 	      && SPARC_FP_REG_P (regno)))
12421 	{
12422 	  sri->extra_cost = 2;
12423 	  return FP_REGS;
12424 	}
12425     }
12426 
12427   return NO_REGS;
12428 }
12429 
12430 /* Emit code to conditionally move either OPERANDS[2] or OPERANDS[3] into
12431    OPERANDS[0] in MODE.  OPERANDS[1] is the operator of the condition.  */
12432 
12433 bool
sparc_expand_conditional_move(machine_mode mode,rtx * operands)12434 sparc_expand_conditional_move (machine_mode mode, rtx *operands)
12435 {
12436   enum rtx_code rc = GET_CODE (operands[1]);
12437   machine_mode cmp_mode;
12438   rtx cc_reg, dst, cmp;
12439 
12440   cmp = operands[1];
12441   if (GET_MODE (XEXP (cmp, 0)) == DImode && !TARGET_ARCH64)
12442     return false;
12443 
12444   if (GET_MODE (XEXP (cmp, 0)) == TFmode && !TARGET_HARD_QUAD)
12445     cmp = sparc_emit_float_lib_cmp (XEXP (cmp, 0), XEXP (cmp, 1), rc);
12446 
12447   cmp_mode = GET_MODE (XEXP (cmp, 0));
12448   rc = GET_CODE (cmp);
12449 
12450   dst = operands[0];
12451   if (! rtx_equal_p (operands[2], dst)
12452       && ! rtx_equal_p (operands[3], dst))
12453     {
12454       if (reg_overlap_mentioned_p (dst, cmp))
12455 	dst = gen_reg_rtx (mode);
12456 
12457       emit_move_insn (dst, operands[3]);
12458     }
12459   else if (operands[2] == dst)
12460     {
12461       operands[2] = operands[3];
12462 
12463       if (GET_MODE_CLASS (cmp_mode) == MODE_FLOAT)
12464         rc = reverse_condition_maybe_unordered (rc);
12465       else
12466         rc = reverse_condition (rc);
12467     }
12468 
12469   if (XEXP (cmp, 1) == const0_rtx
12470       && GET_CODE (XEXP (cmp, 0)) == REG
12471       && cmp_mode == DImode
12472       && v9_regcmp_p (rc))
12473     cc_reg = XEXP (cmp, 0);
12474   else
12475     cc_reg = gen_compare_reg_1 (rc, XEXP (cmp, 0), XEXP (cmp, 1));
12476 
12477   cmp = gen_rtx_fmt_ee (rc, GET_MODE (cc_reg), cc_reg, const0_rtx);
12478 
12479   emit_insn (gen_rtx_SET (dst,
12480 			  gen_rtx_IF_THEN_ELSE (mode, cmp, operands[2], dst)));
12481 
12482   if (dst != operands[0])
12483     emit_move_insn (operands[0], dst);
12484 
12485   return true;
12486 }
12487 
12488 /* Emit code to conditionally move a combination of OPERANDS[1] and OPERANDS[2]
12489    into OPERANDS[0] in MODE, depending on the outcome of the comparison of
12490    OPERANDS[4] and OPERANDS[5].  OPERANDS[3] is the operator of the condition.
12491    FCODE is the machine code to be used for OPERANDS[3] and CCODE the machine
12492    code to be used for the condition mask.  */
12493 
12494 void
sparc_expand_vcond(machine_mode mode,rtx * operands,int ccode,int fcode)12495 sparc_expand_vcond (machine_mode mode, rtx *operands, int ccode, int fcode)
12496 {
12497   rtx mask, cop0, cop1, fcmp, cmask, bshuf, gsr;
12498   enum rtx_code code = GET_CODE (operands[3]);
12499 
12500   mask = gen_reg_rtx (Pmode);
12501   cop0 = operands[4];
12502   cop1 = operands[5];
12503   if (code == LT || code == GE)
12504     {
12505       rtx t;
12506 
12507       code = swap_condition (code);
12508       t = cop0; cop0 = cop1; cop1 = t;
12509     }
12510 
12511   gsr = gen_rtx_REG (DImode, SPARC_GSR_REG);
12512 
12513   fcmp = gen_rtx_UNSPEC (Pmode,
12514 			 gen_rtvec (1, gen_rtx_fmt_ee (code, mode, cop0, cop1)),
12515 			 fcode);
12516 
12517   cmask = gen_rtx_UNSPEC (DImode,
12518 			  gen_rtvec (2, mask, gsr),
12519 			  ccode);
12520 
12521   bshuf = gen_rtx_UNSPEC (mode,
12522 			  gen_rtvec (3, operands[1], operands[2], gsr),
12523 			  UNSPEC_BSHUFFLE);
12524 
12525   emit_insn (gen_rtx_SET (mask, fcmp));
12526   emit_insn (gen_rtx_SET (gsr, cmask));
12527 
12528   emit_insn (gen_rtx_SET (operands[0], bshuf));
12529 }
12530 
12531 /* On sparc, any mode which naturally allocates into the float
12532    registers should return 4 here.  */
12533 
12534 unsigned int
sparc_regmode_natural_size(machine_mode mode)12535 sparc_regmode_natural_size (machine_mode mode)
12536 {
12537   int size = UNITS_PER_WORD;
12538 
12539   if (TARGET_ARCH64)
12540     {
12541       enum mode_class mclass = GET_MODE_CLASS (mode);
12542 
12543       if (mclass == MODE_FLOAT || mclass == MODE_VECTOR_INT)
12544 	size = 4;
12545     }
12546 
12547   return size;
12548 }
12549 
12550 /* Return TRUE if it is a good idea to tie two pseudo registers
12551    when one has mode MODE1 and one has mode MODE2.
12552    If HARD_REGNO_MODE_OK could produce different values for MODE1 and MODE2,
12553    for any hard reg, then this must be FALSE for correct output.
12554 
12555    For V9 we have to deal with the fact that only the lower 32 floating
12556    point registers are 32-bit addressable.  */
12557 
12558 bool
sparc_modes_tieable_p(machine_mode mode1,machine_mode mode2)12559 sparc_modes_tieable_p (machine_mode mode1, machine_mode mode2)
12560 {
12561   enum mode_class mclass1, mclass2;
12562   unsigned short size1, size2;
12563 
12564   if (mode1 == mode2)
12565     return true;
12566 
12567   mclass1 = GET_MODE_CLASS (mode1);
12568   mclass2 = GET_MODE_CLASS (mode2);
12569   if (mclass1 != mclass2)
12570     return false;
12571 
12572   if (! TARGET_V9)
12573     return true;
12574 
12575   /* Classes are the same and we are V9 so we have to deal with upper
12576      vs. lower floating point registers.  If one of the modes is a
12577      4-byte mode, and the other is not, we have to mark them as not
12578      tieable because only the lower 32 floating point register are
12579      addressable 32-bits at a time.
12580 
12581      We can't just test explicitly for SFmode, otherwise we won't
12582      cover the vector mode cases properly.  */
12583 
12584   if (mclass1 != MODE_FLOAT && mclass1 != MODE_VECTOR_INT)
12585     return true;
12586 
12587   size1 = GET_MODE_SIZE (mode1);
12588   size2 = GET_MODE_SIZE (mode2);
12589   if ((size1 > 4 && size2 == 4)
12590       || (size2 > 4 && size1 == 4))
12591     return false;
12592 
12593   return true;
12594 }
12595 
12596 /* Implement TARGET_CSTORE_MODE.  */
12597 
12598 static machine_mode
sparc_cstore_mode(enum insn_code icode ATTRIBUTE_UNUSED)12599 sparc_cstore_mode (enum insn_code icode ATTRIBUTE_UNUSED)
12600 {
12601   return (TARGET_ARCH64 ? DImode : SImode);
12602 }
12603 
12604 /* Return the compound expression made of T1 and T2.  */
12605 
12606 static inline tree
compound_expr(tree t1,tree t2)12607 compound_expr (tree t1, tree t2)
12608 {
12609   return build2 (COMPOUND_EXPR, void_type_node, t1, t2);
12610 }
12611 
12612 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook.  */
12613 
12614 static void
sparc_atomic_assign_expand_fenv(tree * hold,tree * clear,tree * update)12615 sparc_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
12616 {
12617   if (!TARGET_FPU)
12618     return;
12619 
12620   const unsigned HOST_WIDE_INT accrued_exception_mask = 0x1f << 5;
12621   const unsigned HOST_WIDE_INT trap_enable_mask = 0x1f << 23;
12622 
12623   /* We generate the equivalent of feholdexcept (&fenv_var):
12624 
12625        unsigned int fenv_var;
12626        __builtin_store_fsr (&fenv_var);
12627 
12628        unsigned int tmp1_var;
12629        tmp1_var = fenv_var & ~(accrued_exception_mask | trap_enable_mask);
12630 
12631        __builtin_load_fsr (&tmp1_var);  */
12632 
12633   tree fenv_var = create_tmp_var_raw (unsigned_type_node);
12634   TREE_ADDRESSABLE (fenv_var) = 1;
12635   tree fenv_addr = build_fold_addr_expr (fenv_var);
12636   tree stfsr = sparc_builtins[SPARC_BUILTIN_STFSR];
12637   tree hold_stfsr
12638     = build4 (TARGET_EXPR, unsigned_type_node, fenv_var,
12639 	      build_call_expr (stfsr, 1, fenv_addr), NULL_TREE, NULL_TREE);
12640 
12641   tree tmp1_var = create_tmp_var_raw (unsigned_type_node);
12642   TREE_ADDRESSABLE (tmp1_var) = 1;
12643   tree masked_fenv_var
12644     = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_var,
12645 	      build_int_cst (unsigned_type_node,
12646 			     ~(accrued_exception_mask | trap_enable_mask)));
12647   tree hold_mask
12648     = build4 (TARGET_EXPR, unsigned_type_node, tmp1_var, masked_fenv_var,
12649 	      NULL_TREE, NULL_TREE);
12650 
12651   tree tmp1_addr = build_fold_addr_expr (tmp1_var);
12652   tree ldfsr = sparc_builtins[SPARC_BUILTIN_LDFSR];
12653   tree hold_ldfsr = build_call_expr (ldfsr, 1, tmp1_addr);
12654 
12655   *hold = compound_expr (compound_expr (hold_stfsr, hold_mask), hold_ldfsr);
12656 
12657   /* We reload the value of tmp1_var to clear the exceptions:
12658 
12659        __builtin_load_fsr (&tmp1_var);  */
12660 
12661   *clear = build_call_expr (ldfsr, 1, tmp1_addr);
12662 
12663   /* We generate the equivalent of feupdateenv (&fenv_var):
12664 
12665        unsigned int tmp2_var;
12666        __builtin_store_fsr (&tmp2_var);
12667 
12668        __builtin_load_fsr (&fenv_var);
12669 
12670        if (SPARC_LOW_FE_EXCEPT_VALUES)
12671          tmp2_var >>= 5;
12672        __atomic_feraiseexcept ((int) tmp2_var);  */
12673 
12674   tree tmp2_var = create_tmp_var_raw (unsigned_type_node);
12675   TREE_ADDRESSABLE (tmp2_var) = 1;
12676   tree tmp2_addr = build_fold_addr_expr (tmp2_var);
12677   tree update_stfsr
12678     = build4 (TARGET_EXPR, unsigned_type_node, tmp2_var,
12679 	      build_call_expr (stfsr, 1, tmp2_addr), NULL_TREE, NULL_TREE);
12680 
12681   tree update_ldfsr = build_call_expr (ldfsr, 1, fenv_addr);
12682 
12683   tree atomic_feraiseexcept
12684     = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
12685   tree update_call
12686     = build_call_expr (atomic_feraiseexcept, 1,
12687 		       fold_convert (integer_type_node, tmp2_var));
12688 
12689   if (SPARC_LOW_FE_EXCEPT_VALUES)
12690     {
12691       tree shifted_tmp2_var
12692 	= build2 (RSHIFT_EXPR, unsigned_type_node, tmp2_var,
12693 		  build_int_cst (unsigned_type_node, 5));
12694       tree update_shift
12695 	= build2 (MODIFY_EXPR, void_type_node, tmp2_var, shifted_tmp2_var);
12696       update_call = compound_expr (update_shift, update_call);
12697     }
12698 
12699   *update
12700     = compound_expr (compound_expr (update_stfsr, update_ldfsr), update_call);
12701 }
12702 
12703 #include "gt-sparc.h"
12704