1 /* Copyright (C) 2006-2018 Free Software Foundation, Inc.
2 
3    This file is free software; you can redistribute it and/or modify it under
4    the terms of the GNU General Public License as published by the Free
5    Software Foundation; either version 3 of the License, or (at your option)
6    any later version.
7 
8    This file is distributed in the hope that it will be useful, but WITHOUT
9    ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10    FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
11    for more details.
12 
13    You should have received a copy of the GNU General Public License
14    along with GCC; see the file COPYING3.  If not see
15    <http://www.gnu.org/licenses/>.  */
16 
17 #define IN_TARGET_CODE 1
18 
19 #include "config.h"
20 #include "system.h"
21 #include "coretypes.h"
22 #include "backend.h"
23 #include "target.h"
24 #include "rtl.h"
25 #include "tree.h"
26 #include "gimple.h"
27 #include "cfghooks.h"
28 #include "cfgloop.h"
29 #include "df.h"
30 #include "memmodel.h"
31 #include "tm_p.h"
32 #include "stringpool.h"
33 #include "attribs.h"
34 #include "expmed.h"
35 #include "optabs.h"
36 #include "regs.h"
37 #include "emit-rtl.h"
38 #include "recog.h"
39 #include "diagnostic-core.h"
40 #include "insn-attr.h"
41 #include "alias.h"
42 #include "fold-const.h"
43 #include "stor-layout.h"
44 #include "calls.h"
45 #include "varasm.h"
46 #include "explow.h"
47 #include "expr.h"
48 #include "output.h"
49 #include "cfgrtl.h"
50 #include "cfgbuild.h"
51 #include "langhooks.h"
52 #include "reload.h"
53 #include "sched-int.h"
54 #include "params.h"
55 #include "gimplify.h"
56 #include "tm-constrs.h"
57 #include "ddg.h"
58 #include "dumpfile.h"
59 #include "builtins.h"
60 #include "rtl-iter.h"
61 
62 /* This file should be included last.  */
63 #include "target-def.h"
64 
65 /* Builtin types, data and prototypes. */
66 
67 enum spu_builtin_type_index
68 {
69   SPU_BTI_END_OF_PARAMS,
70 
71   /* We create new type nodes for these. */
72   SPU_BTI_V16QI,
73   SPU_BTI_V8HI,
74   SPU_BTI_V4SI,
75   SPU_BTI_V2DI,
76   SPU_BTI_V4SF,
77   SPU_BTI_V2DF,
78   SPU_BTI_UV16QI,
79   SPU_BTI_UV8HI,
80   SPU_BTI_UV4SI,
81   SPU_BTI_UV2DI,
82 
83   /* A 16-byte type. (Implemented with V16QI_type_node) */
84   SPU_BTI_QUADWORD,
85 
86   /* These all correspond to intSI_type_node */
87   SPU_BTI_7,
88   SPU_BTI_S7,
89   SPU_BTI_U7,
90   SPU_BTI_S10,
91   SPU_BTI_S10_4,
92   SPU_BTI_U14,
93   SPU_BTI_16,
94   SPU_BTI_S16,
95   SPU_BTI_S16_2,
96   SPU_BTI_U16,
97   SPU_BTI_U16_2,
98   SPU_BTI_U18,
99 
100   /* These correspond to the standard types */
101   SPU_BTI_INTQI,
102   SPU_BTI_INTHI,
103   SPU_BTI_INTSI,
104   SPU_BTI_INTDI,
105 
106   SPU_BTI_UINTQI,
107   SPU_BTI_UINTHI,
108   SPU_BTI_UINTSI,
109   SPU_BTI_UINTDI,
110 
111   SPU_BTI_FLOAT,
112   SPU_BTI_DOUBLE,
113 
114   SPU_BTI_VOID,
115   SPU_BTI_PTR,
116 
117   SPU_BTI_MAX
118 };
119 
120 #define V16QI_type_node               (spu_builtin_types[SPU_BTI_V16QI])
121 #define V8HI_type_node                (spu_builtin_types[SPU_BTI_V8HI])
122 #define V4SI_type_node                (spu_builtin_types[SPU_BTI_V4SI])
123 #define V2DI_type_node                (spu_builtin_types[SPU_BTI_V2DI])
124 #define V4SF_type_node                (spu_builtin_types[SPU_BTI_V4SF])
125 #define V2DF_type_node                (spu_builtin_types[SPU_BTI_V2DF])
126 #define unsigned_V16QI_type_node      (spu_builtin_types[SPU_BTI_UV16QI])
127 #define unsigned_V8HI_type_node       (spu_builtin_types[SPU_BTI_UV8HI])
128 #define unsigned_V4SI_type_node       (spu_builtin_types[SPU_BTI_UV4SI])
129 #define unsigned_V2DI_type_node       (spu_builtin_types[SPU_BTI_UV2DI])
130 
131 static GTY(()) tree spu_builtin_types[SPU_BTI_MAX];
132 
133 struct spu_builtin_range
134 {
135   int low, high;
136 };
137 
138 static struct spu_builtin_range spu_builtin_range[] = {
139   {-0x40ll, 0x7fll},		/* SPU_BTI_7     */
140   {-0x40ll, 0x3fll},		/* SPU_BTI_S7    */
141   {0ll, 0x7fll},		/* SPU_BTI_U7    */
142   {-0x200ll, 0x1ffll},		/* SPU_BTI_S10   */
143   {-0x2000ll, 0x1fffll},	/* SPU_BTI_S10_4 */
144   {0ll, 0x3fffll},		/* SPU_BTI_U14   */
145   {-0x8000ll, 0xffffll},	/* SPU_BTI_16    */
146   {-0x8000ll, 0x7fffll},	/* SPU_BTI_S16   */
147   {-0x20000ll, 0x1ffffll},	/* SPU_BTI_S16_2 */
148   {0ll, 0xffffll},		/* SPU_BTI_U16   */
149   {0ll, 0x3ffffll},		/* SPU_BTI_U16_2 */
150   {0ll, 0x3ffffll},		/* SPU_BTI_U18   */
151 };
152 
153 
154 /*  Target specific attribute specifications.  */
155 char regs_ever_allocated[FIRST_PSEUDO_REGISTER];
156 
157 /*  Prototypes and external defs.  */
158 static int get_pipe (rtx_insn *insn);
159 static int spu_naked_function_p (tree func);
160 static int mem_is_padded_component_ref (rtx x);
161 static void fix_range (const char *);
162 static rtx spu_expand_load (rtx, rtx, rtx, int);
163 
164 /* Which instruction set architecture to use.  */
165 int spu_arch;
166 /* Which cpu are we tuning for.  */
167 int spu_tune;
168 
169 /* The hardware requires 8 insns between a hint and the branch it
170    effects.  This variable describes how many rtl instructions the
171    compiler needs to see before inserting a hint, and then the compiler
172    will insert enough nops to make it at least 8 insns.  The default is
173    for the compiler to allow up to 2 nops be emitted.  The nops are
174    inserted in pairs, so we round down. */
175 int spu_hint_dist = (8*4) - (2*4);
176 
177 enum spu_immediate {
178   SPU_NONE,
179   SPU_IL,
180   SPU_ILA,
181   SPU_ILH,
182   SPU_ILHU,
183   SPU_ORI,
184   SPU_ORHI,
185   SPU_ORBI,
186   SPU_IOHL
187 };
188 enum immediate_class
189 {
190   IC_POOL,			/* constant pool */
191   IC_IL1,			/* one il* instruction */
192   IC_IL2,			/* both ilhu and iohl instructions */
193   IC_IL1s,			/* one il* instruction */
194   IC_IL2s,			/* both ilhu and iohl instructions */
195   IC_FSMBI,			/* the fsmbi instruction */
196   IC_CPAT,			/* one of the c*d instructions */
197   IC_FSMBI2			/* fsmbi plus 1 other instruction */
198 };
199 
200 static enum spu_immediate which_immediate_load (HOST_WIDE_INT val);
201 static enum spu_immediate which_logical_immediate (HOST_WIDE_INT val);
202 static int cpat_info(unsigned char *arr, int size, int *prun, int *pstart);
203 static enum immediate_class classify_immediate (rtx op,
204 						machine_mode mode);
205 
206 /* Pointer mode for __ea references.  */
207 #define EAmode (spu_ea_model != 32 ? DImode : SImode)
208 
209 
210 /* Define the structure for the machine field in struct function.  */
211 struct GTY(()) machine_function
212 {
213   /* Register to use for PIC accesses.  */
214   rtx pic_reg;
215 };
216 
217 /* How to allocate a 'struct machine_function'.  */
218 static struct machine_function *
spu_init_machine_status(void)219 spu_init_machine_status (void)
220 {
221   return ggc_cleared_alloc<machine_function> ();
222 }
223 
224 /* Implement TARGET_OPTION_OVERRIDE.  */
225 static void
spu_option_override(void)226 spu_option_override (void)
227 {
228   /* Set up function hooks.  */
229   init_machine_status = spu_init_machine_status;
230 
231   /* Small loops will be unpeeled at -O3.  For SPU it is more important
232      to keep code small by default.  */
233   if (!flag_unroll_loops && !flag_peel_loops)
234     maybe_set_param_value (PARAM_MAX_COMPLETELY_PEEL_TIMES, 4,
235 			   global_options.x_param_values,
236 			   global_options_set.x_param_values);
237 
238   flag_omit_frame_pointer = 1;
239 
240   /* Functions must be 8 byte aligned so we correctly handle dual issue */
241   if (align_functions < 8)
242     align_functions = 8;
243 
244   spu_hint_dist = 8*4 - spu_max_nops*4;
245   if (spu_hint_dist < 0)
246     spu_hint_dist = 0;
247 
248   if (spu_fixed_range_string)
249     fix_range (spu_fixed_range_string);
250 
251   /* Determine processor architectural level.  */
252   if (spu_arch_string)
253     {
254       if (strcmp (&spu_arch_string[0], "cell") == 0)
255         spu_arch = PROCESSOR_CELL;
256       else if (strcmp (&spu_arch_string[0], "celledp") == 0)
257         spu_arch = PROCESSOR_CELLEDP;
258       else
259         error ("bad value (%s) for -march= switch", spu_arch_string);
260     }
261 
262   /* Determine processor to tune for.  */
263   if (spu_tune_string)
264     {
265       if (strcmp (&spu_tune_string[0], "cell") == 0)
266         spu_tune = PROCESSOR_CELL;
267       else if (strcmp (&spu_tune_string[0], "celledp") == 0)
268         spu_tune = PROCESSOR_CELLEDP;
269       else
270         error ("bad value (%s) for -mtune= switch", spu_tune_string);
271     }
272 
273   /* Change defaults according to the processor architecture.  */
274   if (spu_arch == PROCESSOR_CELLEDP)
275     {
276       /* If no command line option has been otherwise specified, change
277 	 the default to -mno-safe-hints on celledp -- only the original
278 	 Cell/B.E. processors require this workaround.  */
279       if (!(target_flags_explicit & MASK_SAFE_HINTS))
280 	target_flags &= ~MASK_SAFE_HINTS;
281     }
282 
283   REAL_MODE_FORMAT (SFmode) = &spu_single_format;
284 }
285 
286 /* Implement TARGET_HARD_REGNO_NREGS.  */
287 
288 static unsigned int
spu_hard_regno_nregs(unsigned int,machine_mode mode)289 spu_hard_regno_nregs (unsigned int, machine_mode mode)
290 {
291   return CEIL (GET_MODE_BITSIZE (mode), MAX_FIXED_MODE_SIZE);
292 }
293 
294 /* Handle an attribute requiring a FUNCTION_DECL; arguments as in
295    struct attribute_spec.handler.  */
296 
297 /* True if MODE is valid for the target.  By "valid", we mean able to
298    be manipulated in non-trivial ways.  In particular, this means all
299    the arithmetic is supported.  */
300 static bool
spu_scalar_mode_supported_p(scalar_mode mode)301 spu_scalar_mode_supported_p (scalar_mode mode)
302 {
303   switch (mode)
304     {
305     case E_QImode:
306     case E_HImode:
307     case E_SImode:
308     case E_SFmode:
309     case E_DImode:
310     case E_TImode:
311     case E_DFmode:
312       return true;
313 
314     default:
315       return false;
316     }
317 }
318 
319 /* Similarly for vector modes.  "Supported" here is less strict.  At
320    least some operations are supported; need to check optabs or builtins
321    for further details.  */
322 static bool
spu_vector_mode_supported_p(machine_mode mode)323 spu_vector_mode_supported_p (machine_mode mode)
324 {
325   switch (mode)
326     {
327     case E_V16QImode:
328     case E_V8HImode:
329     case E_V4SImode:
330     case E_V2DImode:
331     case E_V4SFmode:
332     case E_V2DFmode:
333       return true;
334 
335     default:
336       return false;
337     }
338 }
339 
340 /* GCC assumes that in a paradoxical SUBREG the inner mode occupies the
341    least significant bytes of the outer mode.  This function returns
342    TRUE for the SUBREG's where this is correct.  */
343 int
valid_subreg(rtx op)344 valid_subreg (rtx op)
345 {
346   machine_mode om = GET_MODE (op);
347   machine_mode im = GET_MODE (SUBREG_REG (op));
348   return om != VOIDmode && im != VOIDmode
349     && (GET_MODE_SIZE (im) == GET_MODE_SIZE (om)
350 	|| (GET_MODE_SIZE (im) <= 4 && GET_MODE_SIZE (om) <= 4)
351 	|| (GET_MODE_SIZE (im) >= 16 && GET_MODE_SIZE (om) >= 16));
352 }
353 
354 /* When insv and ext[sz]v ar passed a TI SUBREG, we want to strip it off
355    and adjust the start offset.  */
356 static rtx
adjust_operand(rtx op,HOST_WIDE_INT * start)357 adjust_operand (rtx op, HOST_WIDE_INT * start)
358 {
359   machine_mode mode;
360   int op_size;
361   /* Strip any paradoxical SUBREG.  */
362   if (GET_CODE (op) == SUBREG
363       && (GET_MODE_BITSIZE (GET_MODE (op))
364 	  > GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)))))
365     {
366       if (start)
367 	*start -=
368 	  GET_MODE_BITSIZE (GET_MODE (op)) -
369 	  GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)));
370       op = SUBREG_REG (op);
371     }
372   /* If it is smaller than SI, assure a SUBREG */
373   op_size = GET_MODE_BITSIZE (GET_MODE (op));
374   if (op_size < 32)
375     {
376       if (start)
377 	*start += 32 - op_size;
378       op_size = 32;
379     }
380   /* If it is not a MODE_INT (and/or it is smaller than SI) add a SUBREG. */
381   mode = int_mode_for_size (op_size, 0).require ();
382   if (mode != GET_MODE (op))
383     op = gen_rtx_SUBREG (mode, op, 0);
384   return op;
385 }
386 
387 void
spu_expand_extv(rtx ops[],int unsignedp)388 spu_expand_extv (rtx ops[], int unsignedp)
389 {
390   rtx dst = ops[0], src = ops[1];
391   HOST_WIDE_INT width = INTVAL (ops[2]);
392   HOST_WIDE_INT start = INTVAL (ops[3]);
393   HOST_WIDE_INT align_mask;
394   rtx s0, s1, mask, r0;
395 
396   gcc_assert (REG_P (dst) && GET_MODE (dst) == TImode);
397 
398   if (MEM_P (src))
399     {
400       /* First, determine if we need 1 TImode load or 2.  We need only 1
401          if the bits being extracted do not cross the alignment boundary
402          as determined by the MEM and its address. */
403 
404       align_mask = -MEM_ALIGN (src);
405       if ((start & align_mask) == ((start + width - 1) & align_mask))
406 	{
407 	  /* Alignment is sufficient for 1 load. */
408 	  s0 = gen_reg_rtx (TImode);
409 	  r0 = spu_expand_load (s0, 0, src, start / 8);
410 	  start &= 7;
411 	  if (r0)
412 	    emit_insn (gen_rotqby_ti (s0, s0, r0));
413 	}
414       else
415 	{
416 	  /* Need 2 loads. */
417 	  s0 = gen_reg_rtx (TImode);
418 	  s1 = gen_reg_rtx (TImode);
419 	  r0 = spu_expand_load (s0, s1, src, start / 8);
420 	  start &= 7;
421 
422 	  gcc_assert (start + width <= 128);
423 	  if (r0)
424 	    {
425 	      rtx r1 = gen_reg_rtx (SImode);
426 	      mask = gen_reg_rtx (TImode);
427 	      emit_move_insn (mask, GEN_INT (-1));
428 	      emit_insn (gen_rotqby_ti (s0, s0, r0));
429 	      emit_insn (gen_rotqby_ti (s1, s1, r0));
430 	      if (GET_CODE (r0) == CONST_INT)
431 		r1 = GEN_INT (INTVAL (r0) & 15);
432 	      else
433 		emit_insn (gen_andsi3 (r1, r0, GEN_INT (15)));
434 	      emit_insn (gen_shlqby_ti (mask, mask, r1));
435 	      emit_insn (gen_selb (s0, s1, s0, mask));
436 	    }
437 	}
438 
439     }
440   else if (GET_CODE (src) == SUBREG)
441     {
442       rtx r = SUBREG_REG (src);
443       gcc_assert (REG_P (r) && SCALAR_INT_MODE_P (GET_MODE (r)));
444       s0 = gen_reg_rtx (TImode);
445       if (GET_MODE_SIZE (GET_MODE (r)) < GET_MODE_SIZE (TImode))
446 	emit_insn (gen_rtx_SET (s0, gen_rtx_ZERO_EXTEND (TImode, r)));
447       else
448 	emit_move_insn (s0, src);
449     }
450   else
451     {
452       gcc_assert (REG_P (src) && GET_MODE (src) == TImode);
453       s0 = gen_reg_rtx (TImode);
454       emit_move_insn (s0, src);
455     }
456 
457   /* Now s0 is TImode and contains the bits to extract at start. */
458 
459   if (start)
460     emit_insn (gen_rotlti3 (s0, s0, GEN_INT (start)));
461 
462   if (128 - width)
463     s0 = expand_shift (RSHIFT_EXPR, TImode, s0, 128 - width, s0, unsignedp);
464 
465   emit_move_insn (dst, s0);
466 }
467 
468 void
spu_expand_insv(rtx ops[])469 spu_expand_insv (rtx ops[])
470 {
471   HOST_WIDE_INT width = INTVAL (ops[1]);
472   HOST_WIDE_INT start = INTVAL (ops[2]);
473   unsigned HOST_WIDE_INT maskbits;
474   machine_mode dst_mode;
475   rtx dst = ops[0], src = ops[3];
476   int dst_size;
477   rtx mask;
478   rtx shift_reg;
479   int shift;
480 
481 
482   if (GET_CODE (ops[0]) == MEM)
483     dst = gen_reg_rtx (TImode);
484   else
485     dst = adjust_operand (dst, &start);
486   dst_mode = GET_MODE (dst);
487   dst_size = GET_MODE_BITSIZE (GET_MODE (dst));
488 
489   if (CONSTANT_P (src))
490     {
491       machine_mode m =
492 	(width <= 32 ? SImode : width <= 64 ? DImode : TImode);
493       src = force_reg (m, convert_to_mode (m, src, 0));
494     }
495   src = adjust_operand (src, 0);
496 
497   mask = gen_reg_rtx (dst_mode);
498   shift_reg = gen_reg_rtx (dst_mode);
499   shift = dst_size - start - width;
500 
501   /* It's not safe to use subreg here because the compiler assumes
502      that the SUBREG_REG is right justified in the SUBREG. */
503   convert_move (shift_reg, src, 1);
504 
505   if (shift > 0)
506     {
507       switch (dst_mode)
508 	{
509 	case E_SImode:
510 	  emit_insn (gen_ashlsi3 (shift_reg, shift_reg, GEN_INT (shift)));
511 	  break;
512 	case E_DImode:
513 	  emit_insn (gen_ashldi3 (shift_reg, shift_reg, GEN_INT (shift)));
514 	  break;
515 	case E_TImode:
516 	  emit_insn (gen_ashlti3 (shift_reg, shift_reg, GEN_INT (shift)));
517 	  break;
518 	default:
519 	  abort ();
520 	}
521     }
522   else if (shift < 0)
523     abort ();
524 
525   switch (dst_size)
526     {
527     case 32:
528       maskbits = (~(unsigned HOST_WIDE_INT)0 << (32 - width - start));
529       if (start)
530 	maskbits += ((unsigned HOST_WIDE_INT)1 << (32 - start));
531       emit_move_insn (mask, GEN_INT (maskbits));
532       break;
533     case 64:
534       maskbits = (~(unsigned HOST_WIDE_INT)0 << (64 - width - start));
535       if (start)
536 	maskbits += ((unsigned HOST_WIDE_INT)1 << (64 - start));
537       emit_move_insn (mask, GEN_INT (maskbits));
538       break;
539     case 128:
540       {
541 	unsigned char arr[16];
542 	int i = start / 8;
543 	memset (arr, 0, sizeof (arr));
544 	arr[i] = 0xff >> (start & 7);
545 	for (i++; i <= (start + width - 1) / 8; i++)
546 	  arr[i] = 0xff;
547 	arr[i - 1] &= 0xff << (7 - ((start + width - 1) & 7));
548 	emit_move_insn (mask, array_to_constant (TImode, arr));
549       }
550       break;
551     default:
552       abort ();
553     }
554   if (GET_CODE (ops[0]) == MEM)
555     {
556       rtx low = gen_reg_rtx (SImode);
557       rtx rotl = gen_reg_rtx (SImode);
558       rtx mask0 = gen_reg_rtx (TImode);
559       rtx addr;
560       rtx addr0;
561       rtx addr1;
562       rtx mem;
563 
564       addr = force_reg (Pmode, XEXP (ops[0], 0));
565       addr0 = gen_rtx_AND (Pmode, addr, GEN_INT (-16));
566       emit_insn (gen_andsi3 (low, addr, GEN_INT (15)));
567       emit_insn (gen_negsi2 (rotl, low));
568       emit_insn (gen_rotqby_ti (shift_reg, shift_reg, rotl));
569       emit_insn (gen_rotqmby_ti (mask0, mask, rotl));
570       mem = change_address (ops[0], TImode, addr0);
571       set_mem_alias_set (mem, 0);
572       emit_move_insn (dst, mem);
573       emit_insn (gen_selb (dst, dst, shift_reg, mask0));
574       if (start + width > MEM_ALIGN (ops[0]))
575 	{
576 	  rtx shl = gen_reg_rtx (SImode);
577 	  rtx mask1 = gen_reg_rtx (TImode);
578 	  rtx dst1 = gen_reg_rtx (TImode);
579 	  rtx mem1;
580 	  addr1 = plus_constant (Pmode, addr, 16);
581 	  addr1 = gen_rtx_AND (Pmode, addr1, GEN_INT (-16));
582 	  emit_insn (gen_subsi3 (shl, GEN_INT (16), low));
583 	  emit_insn (gen_shlqby_ti (mask1, mask, shl));
584 	  mem1 = change_address (ops[0], TImode, addr1);
585 	  set_mem_alias_set (mem1, 0);
586 	  emit_move_insn (dst1, mem1);
587 	  emit_insn (gen_selb (dst1, dst1, shift_reg, mask1));
588 	  emit_move_insn (mem1, dst1);
589 	}
590       emit_move_insn (mem, dst);
591     }
592   else
593     emit_insn (gen_selb (dst, copy_rtx (dst), shift_reg, mask));
594 }
595 
596 
597 int
spu_expand_block_move(rtx ops[])598 spu_expand_block_move (rtx ops[])
599 {
600   HOST_WIDE_INT bytes, align, offset;
601   rtx src, dst, sreg, dreg, target;
602   int i;
603   if (GET_CODE (ops[2]) != CONST_INT
604       || GET_CODE (ops[3]) != CONST_INT
605       || INTVAL (ops[2]) > (HOST_WIDE_INT) (MOVE_RATIO (optimize_insn_for_speed_p ()) * 8))
606     return 0;
607 
608   bytes = INTVAL (ops[2]);
609   align = INTVAL (ops[3]);
610 
611   if (bytes <= 0)
612     return 1;
613 
614   dst = ops[0];
615   src = ops[1];
616 
617   if (align == 16)
618     {
619       for (offset = 0; offset + 16 <= bytes; offset += 16)
620 	{
621 	  dst = adjust_address (ops[0], V16QImode, offset);
622 	  src = adjust_address (ops[1], V16QImode, offset);
623 	  emit_move_insn (dst, src);
624 	}
625       if (offset < bytes)
626 	{
627 	  rtx mask;
628 	  unsigned char arr[16] = { 0 };
629 	  for (i = 0; i < bytes - offset; i++)
630 	    arr[i] = 0xff;
631 	  dst = adjust_address (ops[0], V16QImode, offset);
632 	  src = adjust_address (ops[1], V16QImode, offset);
633 	  mask = gen_reg_rtx (V16QImode);
634 	  sreg = gen_reg_rtx (V16QImode);
635 	  dreg = gen_reg_rtx (V16QImode);
636 	  target = gen_reg_rtx (V16QImode);
637 	  emit_move_insn (mask, array_to_constant (V16QImode, arr));
638 	  emit_move_insn (dreg, dst);
639 	  emit_move_insn (sreg, src);
640 	  emit_insn (gen_selb (target, dreg, sreg, mask));
641 	  emit_move_insn (dst, target);
642 	}
643       return 1;
644     }
645   return 0;
646 }
647 
648 enum spu_comp_code
649 { SPU_EQ, SPU_GT, SPU_GTU };
650 
651 int spu_comp_icode[12][3] = {
652  {CODE_FOR_ceq_qi, CODE_FOR_cgt_qi, CODE_FOR_clgt_qi},
653  {CODE_FOR_ceq_hi, CODE_FOR_cgt_hi, CODE_FOR_clgt_hi},
654  {CODE_FOR_ceq_si, CODE_FOR_cgt_si, CODE_FOR_clgt_si},
655  {CODE_FOR_ceq_di, CODE_FOR_cgt_di, CODE_FOR_clgt_di},
656  {CODE_FOR_ceq_ti, CODE_FOR_cgt_ti, CODE_FOR_clgt_ti},
657  {CODE_FOR_ceq_sf, CODE_FOR_cgt_sf, 0},
658  {CODE_FOR_ceq_df, CODE_FOR_cgt_df, 0},
659  {CODE_FOR_ceq_v16qi, CODE_FOR_cgt_v16qi, CODE_FOR_clgt_v16qi},
660  {CODE_FOR_ceq_v8hi,  CODE_FOR_cgt_v8hi,  CODE_FOR_clgt_v8hi},
661  {CODE_FOR_ceq_v4si,  CODE_FOR_cgt_v4si,  CODE_FOR_clgt_v4si},
662  {CODE_FOR_ceq_v4sf,  CODE_FOR_cgt_v4sf, 0},
663  {CODE_FOR_ceq_v2df,  CODE_FOR_cgt_v2df, 0},
664 };
665 
666 /* Generate a compare for CODE.  Return a brand-new rtx that represents
667    the result of the compare.   GCC can figure this out too if we don't
668    provide all variations of compares, but GCC always wants to use
669    WORD_MODE, we can generate better code in most cases if we do it
670    ourselves.  */
671 void
spu_emit_branch_or_set(int is_set,rtx cmp,rtx operands[])672 spu_emit_branch_or_set (int is_set, rtx cmp, rtx operands[])
673 {
674   int reverse_compare = 0;
675   int reverse_test = 0;
676   rtx compare_result, eq_result;
677   rtx comp_rtx, eq_rtx;
678   machine_mode comp_mode;
679   machine_mode op_mode;
680   enum spu_comp_code scode, eq_code;
681   enum insn_code ior_code;
682   enum rtx_code code = GET_CODE (cmp);
683   rtx op0 = XEXP (cmp, 0);
684   rtx op1 = XEXP (cmp, 1);
685   int index;
686   int eq_test = 0;
687 
688   /* When op1 is a CONST_INT change (X >= C) to (X > C-1),
689      and so on, to keep the constant in operand 1. */
690   if (GET_CODE (op1) == CONST_INT)
691     {
692       HOST_WIDE_INT val = INTVAL (op1) - 1;
693       if (trunc_int_for_mode (val, GET_MODE (op0)) == val)
694 	switch (code)
695 	  {
696 	  case GE:
697 	    op1 = GEN_INT (val);
698 	    code = GT;
699 	    break;
700 	  case LT:
701 	    op1 = GEN_INT (val);
702 	    code = LE;
703 	    break;
704 	  case GEU:
705 	    op1 = GEN_INT (val);
706 	    code = GTU;
707 	    break;
708 	  case LTU:
709 	    op1 = GEN_INT (val);
710 	    code = LEU;
711 	    break;
712 	  default:
713 	    break;
714 	  }
715     }
716 
717   /* However, if we generate an integer result, performing a reverse test
718      would require an extra negation, so avoid that where possible.  */
719   if (GET_CODE (op1) == CONST_INT && is_set == 1)
720     {
721       HOST_WIDE_INT val = INTVAL (op1) + 1;
722       if (trunc_int_for_mode (val, GET_MODE (op0)) == val)
723 	switch (code)
724 	  {
725 	  case LE:
726 	    op1 = GEN_INT (val);
727 	    code = LT;
728 	    break;
729 	  case LEU:
730 	    op1 = GEN_INT (val);
731 	    code = LTU;
732 	    break;
733 	  default:
734 	    break;
735 	  }
736     }
737 
738   comp_mode = SImode;
739   op_mode = GET_MODE (op0);
740 
741   switch (code)
742     {
743     case GE:
744       scode = SPU_GT;
745       if (HONOR_NANS (op_mode))
746 	{
747 	  reverse_compare = 0;
748 	  reverse_test = 0;
749 	  eq_test = 1;
750 	  eq_code = SPU_EQ;
751 	}
752       else
753 	{
754 	  reverse_compare = 1;
755 	  reverse_test = 1;
756 	}
757       break;
758     case LE:
759       scode = SPU_GT;
760       if (HONOR_NANS (op_mode))
761 	{
762 	  reverse_compare = 1;
763 	  reverse_test = 0;
764 	  eq_test = 1;
765 	  eq_code = SPU_EQ;
766 	}
767       else
768 	{
769 	  reverse_compare = 0;
770 	  reverse_test = 1;
771 	}
772       break;
773     case LT:
774       reverse_compare = 1;
775       reverse_test = 0;
776       scode = SPU_GT;
777       break;
778     case GEU:
779       reverse_compare = 1;
780       reverse_test = 1;
781       scode = SPU_GTU;
782       break;
783     case LEU:
784       reverse_compare = 0;
785       reverse_test = 1;
786       scode = SPU_GTU;
787       break;
788     case LTU:
789       reverse_compare = 1;
790       reverse_test = 0;
791       scode = SPU_GTU;
792       break;
793     case NE:
794       reverse_compare = 0;
795       reverse_test = 1;
796       scode = SPU_EQ;
797       break;
798 
799     case EQ:
800       scode = SPU_EQ;
801       break;
802     case GT:
803       scode = SPU_GT;
804       break;
805     case GTU:
806       scode = SPU_GTU;
807       break;
808     default:
809       scode = SPU_EQ;
810       break;
811     }
812 
813   switch (op_mode)
814     {
815     case E_QImode:
816       index = 0;
817       comp_mode = QImode;
818       break;
819     case E_HImode:
820       index = 1;
821       comp_mode = HImode;
822       break;
823     case E_SImode:
824       index = 2;
825       break;
826     case E_DImode:
827       index = 3;
828       break;
829     case E_TImode:
830       index = 4;
831       break;
832     case E_SFmode:
833       index = 5;
834       break;
835     case E_DFmode:
836       index = 6;
837       break;
838     case E_V16QImode:
839       index = 7;
840       comp_mode = op_mode;
841       break;
842     case E_V8HImode:
843       index = 8;
844       comp_mode = op_mode;
845       break;
846     case E_V4SImode:
847       index = 9;
848       comp_mode = op_mode;
849       break;
850     case E_V4SFmode:
851       index = 10;
852       comp_mode = V4SImode;
853       break;
854     case E_V2DFmode:
855       index = 11;
856       comp_mode = V2DImode;
857       break;
858     case E_V2DImode:
859     default:
860       abort ();
861     }
862 
863   if (GET_MODE (op1) == DFmode
864       && (scode != SPU_GT && scode != SPU_EQ))
865     abort ();
866 
867   if (is_set == 0 && op1 == const0_rtx
868       && (GET_MODE (op0) == SImode
869 	  || GET_MODE (op0) == HImode
870 	  || GET_MODE (op0) == QImode) && scode == SPU_EQ)
871     {
872       /* Don't need to set a register with the result when we are
873          comparing against zero and branching. */
874       reverse_test = !reverse_test;
875       compare_result = op0;
876     }
877   else
878     {
879       compare_result = gen_reg_rtx (comp_mode);
880 
881       if (reverse_compare)
882 	{
883 	  rtx t = op1;
884 	  op1 = op0;
885 	  op0 = t;
886 	}
887 
888       if (spu_comp_icode[index][scode] == 0)
889 	abort ();
890 
891       if (!(*insn_data[spu_comp_icode[index][scode]].operand[1].predicate)
892 	  (op0, op_mode))
893 	op0 = force_reg (op_mode, op0);
894       if (!(*insn_data[spu_comp_icode[index][scode]].operand[2].predicate)
895 	  (op1, op_mode))
896 	op1 = force_reg (op_mode, op1);
897       comp_rtx = GEN_FCN (spu_comp_icode[index][scode]) (compare_result,
898 							 op0, op1);
899       if (comp_rtx == 0)
900 	abort ();
901       emit_insn (comp_rtx);
902 
903       if (eq_test)
904         {
905           eq_result = gen_reg_rtx (comp_mode);
906           eq_rtx = GEN_FCN (spu_comp_icode[index][eq_code]) (eq_result,
907 							     op0, op1);
908           if (eq_rtx == 0)
909 	    abort ();
910           emit_insn (eq_rtx);
911           ior_code = optab_handler (ior_optab, comp_mode);
912           gcc_assert (ior_code != CODE_FOR_nothing);
913           emit_insn (GEN_FCN (ior_code)
914 		     (compare_result, compare_result, eq_result));
915         }
916     }
917 
918   if (is_set == 0)
919     {
920       rtx bcomp;
921       rtx loc_ref;
922 
923       /* We don't have branch on QI compare insns, so we convert the
924          QI compare result to a HI result. */
925       if (comp_mode == QImode)
926 	{
927 	  rtx old_res = compare_result;
928 	  compare_result = gen_reg_rtx (HImode);
929 	  comp_mode = HImode;
930 	  emit_insn (gen_extendqihi2 (compare_result, old_res));
931 	}
932 
933       if (reverse_test)
934 	bcomp = gen_rtx_EQ (comp_mode, compare_result, const0_rtx);
935       else
936 	bcomp = gen_rtx_NE (comp_mode, compare_result, const0_rtx);
937 
938       loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands[3]);
939       emit_jump_insn (gen_rtx_SET (pc_rtx,
940 				   gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
941 							 loc_ref, pc_rtx)));
942     }
943   else if (is_set == 2)
944     {
945       rtx target = operands[0];
946       int compare_size = GET_MODE_BITSIZE (comp_mode);
947       int target_size = GET_MODE_BITSIZE (GET_MODE (target));
948       machine_mode mode = int_mode_for_size (target_size, 0).require ();
949       rtx select_mask;
950       rtx op_t = operands[2];
951       rtx op_f = operands[3];
952 
953       /* The result of the comparison can be SI, HI or QI mode.  Create a
954          mask based on that result. */
955       if (target_size > compare_size)
956 	{
957 	  select_mask = gen_reg_rtx (mode);
958 	  emit_insn (gen_extend_compare (select_mask, compare_result));
959 	}
960       else if (target_size < compare_size)
961 	select_mask =
962 	  gen_rtx_SUBREG (mode, compare_result,
963 			  (compare_size - target_size) / BITS_PER_UNIT);
964       else if (comp_mode != mode)
965 	select_mask = gen_rtx_SUBREG (mode, compare_result, 0);
966       else
967 	select_mask = compare_result;
968 
969       if (GET_MODE (target) != GET_MODE (op_t)
970 	  || GET_MODE (target) != GET_MODE (op_f))
971 	abort ();
972 
973       if (reverse_test)
974 	emit_insn (gen_selb (target, op_t, op_f, select_mask));
975       else
976 	emit_insn (gen_selb (target, op_f, op_t, select_mask));
977     }
978   else
979     {
980       rtx target = operands[0];
981       if (reverse_test)
982 	emit_insn (gen_rtx_SET (compare_result,
983 				gen_rtx_NOT (comp_mode, compare_result)));
984       if (GET_MODE (target) == SImode && GET_MODE (compare_result) == HImode)
985 	emit_insn (gen_extendhisi2 (target, compare_result));
986       else if (GET_MODE (target) == SImode
987 	       && GET_MODE (compare_result) == QImode)
988 	emit_insn (gen_extend_compare (target, compare_result));
989       else
990 	emit_move_insn (target, compare_result);
991     }
992 }
993 
994 HOST_WIDE_INT
const_double_to_hwint(rtx x)995 const_double_to_hwint (rtx x)
996 {
997   HOST_WIDE_INT val;
998   if (GET_MODE (x) == SFmode)
999     REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), val);
1000   else if (GET_MODE (x) == DFmode)
1001     {
1002       long l[2];
1003       REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), l);
1004       val = l[0];
1005       val = (val << 32) | (l[1] & 0xffffffff);
1006     }
1007   else
1008     abort ();
1009   return val;
1010 }
1011 
1012 rtx
hwint_to_const_double(machine_mode mode,HOST_WIDE_INT v)1013 hwint_to_const_double (machine_mode mode, HOST_WIDE_INT v)
1014 {
1015   long tv[2];
1016   REAL_VALUE_TYPE rv;
1017   gcc_assert (mode == SFmode || mode == DFmode);
1018 
1019   if (mode == SFmode)
1020     tv[0] = (v << 32) >> 32;
1021   else if (mode == DFmode)
1022     {
1023       tv[1] = (v << 32) >> 32;
1024       tv[0] = v >> 32;
1025     }
1026   real_from_target (&rv, tv, mode);
1027   return const_double_from_real_value (rv, mode);
1028 }
1029 
1030 void
print_operand_address(FILE * file,register rtx addr)1031 print_operand_address (FILE * file, register rtx addr)
1032 {
1033   rtx reg;
1034   rtx offset;
1035 
1036   if (GET_CODE (addr) == AND
1037       && GET_CODE (XEXP (addr, 1)) == CONST_INT
1038       && INTVAL (XEXP (addr, 1)) == -16)
1039     addr = XEXP (addr, 0);
1040 
1041   switch (GET_CODE (addr))
1042     {
1043     case REG:
1044       fprintf (file, "0(%s)", reg_names[REGNO (addr)]);
1045       break;
1046 
1047     case PLUS:
1048       reg = XEXP (addr, 0);
1049       offset = XEXP (addr, 1);
1050       if (GET_CODE (offset) == REG)
1051 	{
1052 	  fprintf (file, "%s,%s", reg_names[REGNO (reg)],
1053 		   reg_names[REGNO (offset)]);
1054 	}
1055       else if (GET_CODE (offset) == CONST_INT)
1056 	{
1057 	  fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
1058 		   INTVAL (offset), reg_names[REGNO (reg)]);
1059 	}
1060       else
1061 	abort ();
1062       break;
1063 
1064     case CONST:
1065     case LABEL_REF:
1066     case SYMBOL_REF:
1067     case CONST_INT:
1068       output_addr_const (file, addr);
1069       break;
1070 
1071     default:
1072       debug_rtx (addr);
1073       abort ();
1074     }
1075 }
1076 
1077 void
print_operand(FILE * file,rtx x,int code)1078 print_operand (FILE * file, rtx x, int code)
1079 {
1080   machine_mode mode = GET_MODE (x);
1081   HOST_WIDE_INT val;
1082   unsigned char arr[16];
1083   int xcode = GET_CODE (x);
1084   int i, info;
1085   if (GET_MODE (x) == VOIDmode)
1086     switch (code)
1087       {
1088       case 'L':			/* 128 bits, signed */
1089       case 'm':			/* 128 bits, signed */
1090       case 'T':			/* 128 bits, signed */
1091       case 't':			/* 128 bits, signed */
1092 	mode = TImode;
1093 	break;
1094       case 'K':			/* 64 bits, signed */
1095       case 'k':			/* 64 bits, signed */
1096       case 'D':			/* 64 bits, signed */
1097       case 'd':			/* 64 bits, signed */
1098 	mode = DImode;
1099 	break;
1100       case 'J':			/* 32 bits, signed */
1101       case 'j':			/* 32 bits, signed */
1102       case 's':			/* 32 bits, signed */
1103       case 'S':			/* 32 bits, signed */
1104 	mode = SImode;
1105 	break;
1106       }
1107   switch (code)
1108     {
1109 
1110     case 'j':			/* 32 bits, signed */
1111     case 'k':			/* 64 bits, signed */
1112     case 'm':			/* 128 bits, signed */
1113       if (xcode == CONST_INT
1114 	  || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1115 	{
1116 	  gcc_assert (logical_immediate_p (x, mode));
1117 	  constant_to_array (mode, x, arr);
1118 	  val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1119 	  val = trunc_int_for_mode (val, SImode);
1120 	  switch (which_logical_immediate (val))
1121 	  {
1122 	  case SPU_ORI:
1123 	    break;
1124 	  case SPU_ORHI:
1125 	    fprintf (file, "h");
1126 	    break;
1127 	  case SPU_ORBI:
1128 	    fprintf (file, "b");
1129 	    break;
1130 	  default:
1131 	    gcc_unreachable();
1132 	  }
1133 	}
1134       else
1135 	gcc_unreachable();
1136       return;
1137 
1138     case 'J':			/* 32 bits, signed */
1139     case 'K':			/* 64 bits, signed */
1140     case 'L':			/* 128 bits, signed */
1141       if (xcode == CONST_INT
1142 	  || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1143 	{
1144 	  gcc_assert (logical_immediate_p (x, mode)
1145 		      || iohl_immediate_p (x, mode));
1146 	  constant_to_array (mode, x, arr);
1147 	  val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1148 	  val = trunc_int_for_mode (val, SImode);
1149 	  switch (which_logical_immediate (val))
1150 	  {
1151 	  case SPU_ORI:
1152 	  case SPU_IOHL:
1153 	    break;
1154 	  case SPU_ORHI:
1155 	    val = trunc_int_for_mode (val, HImode);
1156 	    break;
1157 	  case SPU_ORBI:
1158 	    val = trunc_int_for_mode (val, QImode);
1159 	    break;
1160 	  default:
1161 	    gcc_unreachable();
1162 	  }
1163 	  fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1164 	}
1165       else
1166 	gcc_unreachable();
1167       return;
1168 
1169     case 't':			/* 128 bits, signed */
1170     case 'd':			/* 64 bits, signed */
1171     case 's':			/* 32 bits, signed */
1172       if (CONSTANT_P (x))
1173 	{
1174 	  enum immediate_class c = classify_immediate (x, mode);
1175 	  switch (c)
1176 	    {
1177 	    case IC_IL1:
1178 	      constant_to_array (mode, x, arr);
1179 	      val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1180 	      val = trunc_int_for_mode (val, SImode);
1181 	      switch (which_immediate_load (val))
1182 		{
1183 		case SPU_IL:
1184 		  break;
1185 		case SPU_ILA:
1186 		  fprintf (file, "a");
1187 		  break;
1188 		case SPU_ILH:
1189 		  fprintf (file, "h");
1190 		  break;
1191 		case SPU_ILHU:
1192 		  fprintf (file, "hu");
1193 		  break;
1194 		default:
1195 		  gcc_unreachable ();
1196 		}
1197 	      break;
1198 	    case IC_CPAT:
1199 	      constant_to_array (mode, x, arr);
1200 	      cpat_info (arr, GET_MODE_SIZE (mode), &info, 0);
1201 	      if (info == 1)
1202 		fprintf (file, "b");
1203 	      else if (info == 2)
1204 		fprintf (file, "h");
1205 	      else if (info == 4)
1206 		fprintf (file, "w");
1207 	      else if (info == 8)
1208 		fprintf (file, "d");
1209 	      break;
1210 	    case IC_IL1s:
1211 	      if (xcode == CONST_VECTOR)
1212 		{
1213 		  x = CONST_VECTOR_ELT (x, 0);
1214 		  xcode = GET_CODE (x);
1215 		}
1216 	      if (xcode == SYMBOL_REF || xcode == LABEL_REF || xcode == CONST)
1217 		fprintf (file, "a");
1218 	      else if (xcode == HIGH)
1219 		fprintf (file, "hu");
1220 	      break;
1221 	    case IC_FSMBI:
1222 	    case IC_FSMBI2:
1223 	    case IC_IL2:
1224 	    case IC_IL2s:
1225 	    case IC_POOL:
1226 	      abort ();
1227 	    }
1228 	}
1229       else
1230 	gcc_unreachable ();
1231       return;
1232 
1233     case 'T':			/* 128 bits, signed */
1234     case 'D':			/* 64 bits, signed */
1235     case 'S':			/* 32 bits, signed */
1236       if (CONSTANT_P (x))
1237 	{
1238 	  enum immediate_class c = classify_immediate (x, mode);
1239 	  switch (c)
1240 	    {
1241 	    case IC_IL1:
1242 	      constant_to_array (mode, x, arr);
1243 	      val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1244 	      val = trunc_int_for_mode (val, SImode);
1245 	      switch (which_immediate_load (val))
1246 		{
1247 		case SPU_IL:
1248 		case SPU_ILA:
1249 		  break;
1250 		case SPU_ILH:
1251 		case SPU_ILHU:
1252 		  val = trunc_int_for_mode (((arr[0] << 8) | arr[1]), HImode);
1253 		  break;
1254 		default:
1255 		  gcc_unreachable ();
1256 		}
1257 	      fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1258 	      break;
1259 	    case IC_FSMBI:
1260 	      constant_to_array (mode, x, arr);
1261 	      val = 0;
1262 	      for (i = 0; i < 16; i++)
1263 		{
1264 		  val <<= 1;
1265 		  val |= arr[i] & 1;
1266 		}
1267 	      print_operand (file, GEN_INT (val), 0);
1268 	      break;
1269 	    case IC_CPAT:
1270 	      constant_to_array (mode, x, arr);
1271 	      cpat_info (arr, GET_MODE_SIZE (mode), 0, &info);
1272 	      fprintf (file, HOST_WIDE_INT_PRINT_DEC, (HOST_WIDE_INT)info);
1273 	      break;
1274 	    case IC_IL1s:
1275 	      if (xcode == HIGH)
1276 		x = XEXP (x, 0);
1277 	      if (GET_CODE (x) == CONST_VECTOR)
1278 		x = CONST_VECTOR_ELT (x, 0);
1279 	      output_addr_const (file, x);
1280 	      if (xcode == HIGH)
1281 		fprintf (file, "@h");
1282 	      break;
1283 	    case IC_IL2:
1284 	    case IC_IL2s:
1285 	    case IC_FSMBI2:
1286 	    case IC_POOL:
1287 	      abort ();
1288 	    }
1289 	}
1290       else
1291 	gcc_unreachable ();
1292       return;
1293 
1294     case 'C':
1295       if (xcode == CONST_INT)
1296 	{
1297 	  /* Only 4 least significant bits are relevant for generate
1298 	     control word instructions. */
1299 	  fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 15);
1300 	  return;
1301 	}
1302       break;
1303 
1304     case 'M':			/* print code for c*d */
1305       if (GET_CODE (x) == CONST_INT)
1306 	switch (INTVAL (x))
1307 	  {
1308 	  case 1:
1309 	    fprintf (file, "b");
1310 	    break;
1311 	  case 2:
1312 	    fprintf (file, "h");
1313 	    break;
1314 	  case 4:
1315 	    fprintf (file, "w");
1316 	    break;
1317 	  case 8:
1318 	    fprintf (file, "d");
1319 	    break;
1320 	  default:
1321 	    gcc_unreachable();
1322 	  }
1323       else
1324 	gcc_unreachable();
1325       return;
1326 
1327     case 'N':			/* Negate the operand */
1328       if (xcode == CONST_INT)
1329 	fprintf (file, HOST_WIDE_INT_PRINT_DEC, -INTVAL (x));
1330       else if (xcode == CONST_VECTOR)
1331 	fprintf (file, HOST_WIDE_INT_PRINT_DEC,
1332 		 -INTVAL (CONST_VECTOR_ELT (x, 0)));
1333       return;
1334 
1335     case 'I':			/* enable/disable interrupts */
1336       if (xcode == CONST_INT)
1337 	fprintf (file, "%s",  INTVAL (x) == 0 ? "d" : "e");
1338       return;
1339 
1340     case 'b':			/* branch modifiers */
1341       if (xcode == REG)
1342 	fprintf (file, "%s", GET_MODE (x) == HImode ? "h" : "");
1343       else if (COMPARISON_P (x))
1344 	fprintf (file, "%s", xcode == NE ? "n" : "");
1345       return;
1346 
1347     case 'i':			/* indirect call */
1348       if (xcode == MEM)
1349 	{
1350 	  if (GET_CODE (XEXP (x, 0)) == REG)
1351 	    /* Used in indirect function calls. */
1352 	    fprintf (file, "%s", reg_names[REGNO (XEXP (x, 0))]);
1353 	  else
1354 	    output_address (GET_MODE (x), XEXP (x, 0));
1355 	}
1356       return;
1357 
1358     case 'p':			/* load/store */
1359       if (xcode == MEM)
1360 	{
1361 	  x = XEXP (x, 0);
1362 	  xcode = GET_CODE (x);
1363 	}
1364       if (xcode == AND)
1365 	{
1366 	  x = XEXP (x, 0);
1367 	  xcode = GET_CODE (x);
1368 	}
1369       if (xcode == REG)
1370 	fprintf (file, "d");
1371       else if (xcode == CONST_INT)
1372 	fprintf (file, "a");
1373       else if (xcode == CONST || xcode == SYMBOL_REF || xcode == LABEL_REF)
1374 	fprintf (file, "r");
1375       else if (xcode == PLUS || xcode == LO_SUM)
1376 	{
1377 	  if (GET_CODE (XEXP (x, 1)) == REG)
1378 	    fprintf (file, "x");
1379 	  else
1380 	    fprintf (file, "d");
1381 	}
1382       return;
1383 
1384     case 'e':
1385       val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1386       val &= 0x7;
1387       output_addr_const (file, GEN_INT (val));
1388       return;
1389 
1390     case 'f':
1391       val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1392       val &= 0x1f;
1393       output_addr_const (file, GEN_INT (val));
1394       return;
1395 
1396     case 'g':
1397       val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1398       val &= 0x3f;
1399       output_addr_const (file, GEN_INT (val));
1400       return;
1401 
1402     case 'h':
1403       val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1404       val = (val >> 3) & 0x1f;
1405       output_addr_const (file, GEN_INT (val));
1406       return;
1407 
1408     case 'E':
1409       val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1410       val = -val;
1411       val &= 0x7;
1412       output_addr_const (file, GEN_INT (val));
1413       return;
1414 
1415     case 'F':
1416       val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1417       val = -val;
1418       val &= 0x1f;
1419       output_addr_const (file, GEN_INT (val));
1420       return;
1421 
1422     case 'G':
1423       val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1424       val = -val;
1425       val &= 0x3f;
1426       output_addr_const (file, GEN_INT (val));
1427       return;
1428 
1429     case 'H':
1430       val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1431       val = -(val & -8ll);
1432       val = (val >> 3) & 0x1f;
1433       output_addr_const (file, GEN_INT (val));
1434       return;
1435 
1436     case 'v':
1437     case 'w':
1438       constant_to_array (mode, x, arr);
1439       val = (((arr[0] << 1) + (arr[1] >> 7)) & 0xff) - 127;
1440       output_addr_const (file, GEN_INT (code == 'w' ? -val : val));
1441       return;
1442 
1443     case 0:
1444       if (xcode == REG)
1445 	fprintf (file, "%s", reg_names[REGNO (x)]);
1446       else if (xcode == MEM)
1447 	output_address (GET_MODE (x), XEXP (x, 0));
1448       else if (xcode == CONST_VECTOR)
1449 	print_operand (file, CONST_VECTOR_ELT (x, 0), 0);
1450       else
1451 	output_addr_const (file, x);
1452       return;
1453 
1454       /* unused letters
1455 	              o qr  u   yz
1456 	AB            OPQR  UVWXYZ */
1457     default:
1458       output_operand_lossage ("invalid %%xn code");
1459     }
1460   gcc_unreachable ();
1461 }
1462 
1463 /* For PIC mode we've reserved PIC_OFFSET_TABLE_REGNUM, which is a
1464    caller saved register.  For leaf functions it is more efficient to
1465    use a volatile register because we won't need to save and restore the
1466    pic register.  This routine is only valid after register allocation
1467    is completed, so we can pick an unused register.  */
1468 static rtx
get_pic_reg(void)1469 get_pic_reg (void)
1470 {
1471   if (!reload_completed && !reload_in_progress)
1472     abort ();
1473 
1474   /* If we've already made the decision, we need to keep with it.  Once we've
1475      decided to use LAST_ARG_REGNUM, future calls to df_regs_ever_live_p may
1476      return true since the register is now live; this should not cause us to
1477      "switch back" to using pic_offset_table_rtx.  */
1478   if (!cfun->machine->pic_reg)
1479     {
1480       if (crtl->is_leaf && !df_regs_ever_live_p (LAST_ARG_REGNUM))
1481 	cfun->machine->pic_reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
1482       else
1483 	cfun->machine->pic_reg = pic_offset_table_rtx;
1484     }
1485 
1486   return cfun->machine->pic_reg;
1487 }
1488 
1489 /* Split constant addresses to handle cases that are too large.
1490    Add in the pic register when in PIC mode.
1491    Split immediates that require more than 1 instruction. */
1492 int
spu_split_immediate(rtx * ops)1493 spu_split_immediate (rtx * ops)
1494 {
1495   machine_mode mode = GET_MODE (ops[0]);
1496   enum immediate_class c = classify_immediate (ops[1], mode);
1497 
1498   switch (c)
1499     {
1500     case IC_IL2:
1501       {
1502 	unsigned char arrhi[16];
1503 	unsigned char arrlo[16];
1504 	rtx to, temp, hi, lo;
1505 	int i;
1506 	/* We need to do reals as ints because the constant used in the
1507 	   IOR might not be a legitimate real constant. */
1508 	scalar_int_mode imode = int_mode_for_mode (mode).require ();
1509 	constant_to_array (mode, ops[1], arrhi);
1510 	if (imode != mode)
1511 	  to = simplify_gen_subreg (imode, ops[0], mode, 0);
1512 	else
1513 	  to = ops[0];
1514 	temp = !can_create_pseudo_p () ? to : gen_reg_rtx (imode);
1515 	for (i = 0; i < 16; i += 4)
1516 	  {
1517 	    arrlo[i + 2] = arrhi[i + 2];
1518 	    arrlo[i + 3] = arrhi[i + 3];
1519 	    arrlo[i + 0] = arrlo[i + 1] = 0;
1520 	    arrhi[i + 2] = arrhi[i + 3] = 0;
1521 	  }
1522 	hi = array_to_constant (imode, arrhi);
1523 	lo = array_to_constant (imode, arrlo);
1524 	emit_move_insn (temp, hi);
1525 	emit_insn (gen_rtx_SET (to, gen_rtx_IOR (imode, temp, lo)));
1526 	return 1;
1527       }
1528     case IC_FSMBI2:
1529       {
1530 	unsigned char arr_fsmbi[16];
1531 	unsigned char arr_andbi[16];
1532 	rtx to, reg_fsmbi, reg_and;
1533 	int i;
1534 	/* We need to do reals as ints because the constant used in the
1535 	 * AND might not be a legitimate real constant. */
1536 	scalar_int_mode imode = int_mode_for_mode (mode).require ();
1537 	constant_to_array (mode, ops[1], arr_fsmbi);
1538 	if (imode != mode)
1539 	  to = simplify_gen_subreg(imode, ops[0], GET_MODE (ops[0]), 0);
1540 	else
1541 	  to = ops[0];
1542 	for (i = 0; i < 16; i++)
1543 	  if (arr_fsmbi[i] != 0)
1544 	    {
1545 	      arr_andbi[0] = arr_fsmbi[i];
1546 	      arr_fsmbi[i] = 0xff;
1547 	    }
1548 	for (i = 1; i < 16; i++)
1549 	  arr_andbi[i] = arr_andbi[0];
1550 	reg_fsmbi = array_to_constant (imode, arr_fsmbi);
1551 	reg_and = array_to_constant (imode, arr_andbi);
1552 	emit_move_insn (to, reg_fsmbi);
1553 	emit_insn (gen_rtx_SET (to, gen_rtx_AND (imode, to, reg_and)));
1554 	return 1;
1555       }
1556     case IC_POOL:
1557       if (reload_in_progress || reload_completed)
1558 	{
1559 	  rtx mem = force_const_mem (mode, ops[1]);
1560 	  if (TARGET_LARGE_MEM)
1561 	    {
1562 	      rtx addr = gen_rtx_REG (Pmode, REGNO (ops[0]));
1563 	      emit_move_insn (addr, XEXP (mem, 0));
1564 	      mem = replace_equiv_address (mem, addr);
1565 	    }
1566 	  emit_move_insn (ops[0], mem);
1567 	  return 1;
1568 	}
1569       break;
1570     case IC_IL1s:
1571     case IC_IL2s:
1572       if (reload_completed && GET_CODE (ops[1]) != HIGH)
1573 	{
1574 	  if (c == IC_IL2s)
1575 	    {
1576 	      emit_move_insn (ops[0], gen_rtx_HIGH (mode, ops[1]));
1577 	      emit_move_insn (ops[0], gen_rtx_LO_SUM (mode, ops[0], ops[1]));
1578 	    }
1579 	  else if (flag_pic)
1580 	    emit_insn (gen_pic (ops[0], ops[1]));
1581 	  if (flag_pic)
1582 	    {
1583 	      rtx pic_reg = get_pic_reg ();
1584 	      emit_insn (gen_addsi3 (ops[0], ops[0], pic_reg));
1585 	    }
1586 	  return flag_pic || c == IC_IL2s;
1587 	}
1588       break;
1589     case IC_IL1:
1590     case IC_FSMBI:
1591     case IC_CPAT:
1592       break;
1593     }
1594   return 0;
1595 }
1596 
1597 /* SAVING is TRUE when we are generating the actual load and store
1598    instructions for REGNO.  When determining the size of the stack
1599    needed for saving register we must allocate enough space for the
1600    worst case, because we don't always have the information early enough
1601    to not allocate it.  But we can at least eliminate the actual loads
1602    and stores during the prologue/epilogue.  */
1603 static int
need_to_save_reg(int regno,int saving)1604 need_to_save_reg (int regno, int saving)
1605 {
1606   if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
1607     return 1;
1608   if (flag_pic
1609       && regno == PIC_OFFSET_TABLE_REGNUM
1610       && (!saving || cfun->machine->pic_reg == pic_offset_table_rtx))
1611     return 1;
1612   return 0;
1613 }
1614 
1615 /* This function is only correct starting with local register
1616    allocation */
1617 int
spu_saved_regs_size(void)1618 spu_saved_regs_size (void)
1619 {
1620   int reg_save_size = 0;
1621   int regno;
1622 
1623   for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; --regno)
1624     if (need_to_save_reg (regno, 0))
1625       reg_save_size += 0x10;
1626   return reg_save_size;
1627 }
1628 
1629 static rtx_insn *
frame_emit_store(int regno,rtx addr,HOST_WIDE_INT offset)1630 frame_emit_store (int regno, rtx addr, HOST_WIDE_INT offset)
1631 {
1632   rtx reg = gen_rtx_REG (V4SImode, regno);
1633   rtx mem =
1634     gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1635   return emit_insn (gen_movv4si (mem, reg));
1636 }
1637 
1638 static rtx_insn *
frame_emit_load(int regno,rtx addr,HOST_WIDE_INT offset)1639 frame_emit_load (int regno, rtx addr, HOST_WIDE_INT offset)
1640 {
1641   rtx reg = gen_rtx_REG (V4SImode, regno);
1642   rtx mem =
1643     gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1644   return emit_insn (gen_movv4si (reg, mem));
1645 }
1646 
1647 /* This happens after reload, so we need to expand it.  */
1648 static rtx_insn *
frame_emit_add_imm(rtx dst,rtx src,HOST_WIDE_INT imm,rtx scratch)1649 frame_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm, rtx scratch)
1650 {
1651   rtx_insn *insn;
1652   if (satisfies_constraint_K (GEN_INT (imm)))
1653     {
1654       insn = emit_insn (gen_addsi3 (dst, src, GEN_INT (imm)));
1655     }
1656   else
1657     {
1658       emit_insn (gen_movsi (scratch, gen_int_mode (imm, SImode)));
1659       insn = emit_insn (gen_addsi3 (dst, src, scratch));
1660       if (REGNO (src) == REGNO (scratch))
1661 	abort ();
1662     }
1663   return insn;
1664 }
1665 
1666 /* Return nonzero if this function is known to have a null epilogue.  */
1667 
1668 int
direct_return(void)1669 direct_return (void)
1670 {
1671   if (reload_completed)
1672     {
1673       if (cfun->static_chain_decl == 0
1674 	  && (spu_saved_regs_size ()
1675 	      + get_frame_size ()
1676 	      + crtl->outgoing_args_size
1677 	      + crtl->args.pretend_args_size == 0)
1678 	  && crtl->is_leaf)
1679 	return 1;
1680     }
1681   return 0;
1682 }
1683 
1684 /*
1685    The stack frame looks like this:
1686          +-------------+
1687          |  incoming   |
1688          |    args     |
1689    AP -> +-------------+
1690          | $lr save    |
1691          +-------------+
1692  prev SP | back chain  |
1693          +-------------+
1694          |  var args   |
1695          |  reg save   | crtl->args.pretend_args_size bytes
1696          +-------------+
1697          |    ...      |
1698          | saved regs  | spu_saved_regs_size() bytes
1699    FP -> +-------------+
1700          |    ...      |
1701          |   vars      | get_frame_size()  bytes
1702   HFP -> +-------------+
1703          |    ...      |
1704          |  outgoing   |
1705          |    args     | crtl->outgoing_args_size bytes
1706          +-------------+
1707          | $lr of next |
1708          |   frame     |
1709          +-------------+
1710          | back chain  |
1711    SP -> +-------------+
1712 
1713 */
1714 void
spu_expand_prologue(void)1715 spu_expand_prologue (void)
1716 {
1717   HOST_WIDE_INT size = get_frame_size (), offset, regno;
1718   HOST_WIDE_INT total_size;
1719   HOST_WIDE_INT saved_regs_size;
1720   rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
1721   rtx scratch_reg_0, scratch_reg_1;
1722   rtx_insn *insn;
1723   rtx real;
1724 
1725   if (flag_pic && optimize == 0 && !cfun->machine->pic_reg)
1726     cfun->machine->pic_reg = pic_offset_table_rtx;
1727 
1728   if (spu_naked_function_p (current_function_decl))
1729     return;
1730 
1731   scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
1732   scratch_reg_1 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 2);
1733 
1734   saved_regs_size = spu_saved_regs_size ();
1735   total_size = size + saved_regs_size
1736     + crtl->outgoing_args_size
1737     + crtl->args.pretend_args_size;
1738 
1739   if (!crtl->is_leaf
1740       || cfun->calls_alloca || total_size > 0)
1741     total_size += STACK_POINTER_OFFSET;
1742 
1743   /* Save this first because code after this might use the link
1744      register as a scratch register. */
1745   if (!crtl->is_leaf)
1746     {
1747       insn = frame_emit_store (LINK_REGISTER_REGNUM, sp_reg, 16);
1748       RTX_FRAME_RELATED_P (insn) = 1;
1749     }
1750 
1751   if (total_size > 0)
1752     {
1753       offset = -crtl->args.pretend_args_size;
1754       for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
1755 	if (need_to_save_reg (regno, 1))
1756 	  {
1757 	    offset -= 16;
1758 	    insn = frame_emit_store (regno, sp_reg, offset);
1759 	    RTX_FRAME_RELATED_P (insn) = 1;
1760 	  }
1761     }
1762 
1763   if (flag_pic && cfun->machine->pic_reg)
1764     {
1765       rtx pic_reg = cfun->machine->pic_reg;
1766       insn = emit_insn (gen_load_pic_offset (pic_reg, scratch_reg_0));
1767       insn = emit_insn (gen_subsi3 (pic_reg, pic_reg, scratch_reg_0));
1768     }
1769 
1770   if (total_size > 0)
1771     {
1772       if (flag_stack_check || flag_stack_clash_protection)
1773 	{
1774 	  /* We compare against total_size-1 because
1775 	     ($sp >= total_size) <=> ($sp > total_size-1) */
1776 	  rtx scratch_v4si = gen_rtx_REG (V4SImode, REGNO (scratch_reg_0));
1777 	  rtx sp_v4si = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
1778 	  rtx size_v4si = spu_const (V4SImode, total_size - 1);
1779 	  if (!satisfies_constraint_K (GEN_INT (total_size - 1)))
1780 	    {
1781 	      emit_move_insn (scratch_v4si, size_v4si);
1782 	      size_v4si = scratch_v4si;
1783 	    }
1784 	  emit_insn (gen_cgt_v4si (scratch_v4si, sp_v4si, size_v4si));
1785 	  emit_insn (gen_vec_extractv4sisi
1786 		     (scratch_reg_0, scratch_v4si, GEN_INT (1)));
1787 	  emit_insn (gen_spu_heq (scratch_reg_0, GEN_INT (0)));
1788 	}
1789 
1790       /* Adjust the stack pointer, and make sure scratch_reg_0 contains
1791          the value of the previous $sp because we save it as the back
1792          chain. */
1793       if (total_size <= 2000)
1794 	{
1795 	  /* In this case we save the back chain first. */
1796 	  insn = frame_emit_store (STACK_POINTER_REGNUM, sp_reg, -total_size);
1797 	  insn =
1798 	    frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_0);
1799 	}
1800       else
1801 	{
1802 	  insn = emit_move_insn (scratch_reg_0, sp_reg);
1803 	  insn =
1804 	    frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_1);
1805 	}
1806       RTX_FRAME_RELATED_P (insn) = 1;
1807       real = gen_addsi3 (sp_reg, sp_reg, GEN_INT (-total_size));
1808       add_reg_note (insn, REG_FRAME_RELATED_EXPR, real);
1809 
1810       if (total_size > 2000)
1811 	{
1812 	  /* Save the back chain ptr */
1813 	  insn = frame_emit_store (REGNO (scratch_reg_0), sp_reg, 0);
1814 	}
1815 
1816       if (frame_pointer_needed)
1817 	{
1818 	  rtx fp_reg = gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM);
1819 	  HOST_WIDE_INT fp_offset = STACK_POINTER_OFFSET
1820 	    + crtl->outgoing_args_size;
1821 	  /* Set the new frame_pointer */
1822 	  insn = frame_emit_add_imm (fp_reg, sp_reg, fp_offset, scratch_reg_0);
1823 	  RTX_FRAME_RELATED_P (insn) = 1;
1824 	  real = gen_addsi3 (fp_reg, sp_reg, GEN_INT (fp_offset));
1825 	  add_reg_note (insn, REG_FRAME_RELATED_EXPR, real);
1826           REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = STACK_BOUNDARY;
1827 	}
1828     }
1829 
1830   if (flag_stack_usage_info)
1831     current_function_static_stack_size = total_size;
1832 }
1833 
1834 void
spu_expand_epilogue(bool sibcall_p)1835 spu_expand_epilogue (bool sibcall_p)
1836 {
1837   int size = get_frame_size (), offset, regno;
1838   HOST_WIDE_INT saved_regs_size, total_size;
1839   rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
1840   rtx scratch_reg_0;
1841 
1842   if (spu_naked_function_p (current_function_decl))
1843     return;
1844 
1845   scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
1846 
1847   saved_regs_size = spu_saved_regs_size ();
1848   total_size = size + saved_regs_size
1849     + crtl->outgoing_args_size
1850     + crtl->args.pretend_args_size;
1851 
1852   if (!crtl->is_leaf
1853       || cfun->calls_alloca || total_size > 0)
1854     total_size += STACK_POINTER_OFFSET;
1855 
1856   if (total_size > 0)
1857     {
1858       if (cfun->calls_alloca)
1859 	frame_emit_load (STACK_POINTER_REGNUM, sp_reg, 0);
1860       else
1861 	frame_emit_add_imm (sp_reg, sp_reg, total_size, scratch_reg_0);
1862 
1863 
1864       if (saved_regs_size > 0)
1865 	{
1866 	  offset = -crtl->args.pretend_args_size;
1867 	  for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
1868 	    if (need_to_save_reg (regno, 1))
1869 	      {
1870 		offset -= 0x10;
1871 		frame_emit_load (regno, sp_reg, offset);
1872 	      }
1873 	}
1874     }
1875 
1876   if (!crtl->is_leaf)
1877     frame_emit_load (LINK_REGISTER_REGNUM, sp_reg, 16);
1878 
1879   if (!sibcall_p)
1880     {
1881       emit_use (gen_rtx_REG (SImode, LINK_REGISTER_REGNUM));
1882       emit_jump_insn (gen__return ());
1883     }
1884 }
1885 
1886 rtx
spu_return_addr(int count,rtx frame ATTRIBUTE_UNUSED)1887 spu_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
1888 {
1889   if (count != 0)
1890     return 0;
1891   /* This is inefficient because it ends up copying to a save-register
1892      which then gets saved even though $lr has already been saved.  But
1893      it does generate better code for leaf functions and we don't need
1894      to use RETURN_ADDRESS_POINTER_REGNUM to get it working.  It's only
1895      used for __builtin_return_address anyway, so maybe we don't care if
1896      it's inefficient. */
1897   return get_hard_reg_initial_val (Pmode, LINK_REGISTER_REGNUM);
1898 }
1899 
1900 
1901 /* Given VAL, generate a constant appropriate for MODE.
1902    If MODE is a vector mode, every element will be VAL.
1903    For TImode, VAL will be zero extended to 128 bits. */
1904 rtx
spu_const(machine_mode mode,HOST_WIDE_INT val)1905 spu_const (machine_mode mode, HOST_WIDE_INT val)
1906 {
1907   rtx inner;
1908 
1909   gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
1910 	      || GET_MODE_CLASS (mode) == MODE_FLOAT
1911 	      || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
1912 	      || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT);
1913 
1914   if (GET_MODE_CLASS (mode) == MODE_INT)
1915     return immed_double_const (val, 0, mode);
1916 
1917   /* val is the bit representation of the float */
1918   if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1919     return hwint_to_const_double (mode, val);
1920 
1921   if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
1922     inner = immed_double_const (val, 0, GET_MODE_INNER (mode));
1923   else
1924     inner = hwint_to_const_double (GET_MODE_INNER (mode), val);
1925 
1926   return gen_const_vec_duplicate (mode, inner);
1927 }
1928 
1929 /* Create a MODE vector constant from 4 ints. */
1930 rtx
spu_const_from_ints(machine_mode mode,int a,int b,int c,int d)1931 spu_const_from_ints(machine_mode mode, int a, int b, int c, int d)
1932 {
1933   unsigned char arr[16];
1934   arr[0] = (a >> 24) & 0xff;
1935   arr[1] = (a >> 16) & 0xff;
1936   arr[2] = (a >> 8) & 0xff;
1937   arr[3] = (a >> 0) & 0xff;
1938   arr[4] = (b >> 24) & 0xff;
1939   arr[5] = (b >> 16) & 0xff;
1940   arr[6] = (b >> 8) & 0xff;
1941   arr[7] = (b >> 0) & 0xff;
1942   arr[8] = (c >> 24) & 0xff;
1943   arr[9] = (c >> 16) & 0xff;
1944   arr[10] = (c >> 8) & 0xff;
1945   arr[11] = (c >> 0) & 0xff;
1946   arr[12] = (d >> 24) & 0xff;
1947   arr[13] = (d >> 16) & 0xff;
1948   arr[14] = (d >> 8) & 0xff;
1949   arr[15] = (d >> 0) & 0xff;
1950   return array_to_constant(mode, arr);
1951 }
1952 
1953 /* branch hint stuff */
1954 
1955 /* An array of these is used to propagate hints to predecessor blocks. */
1956 struct spu_bb_info
1957 {
1958   rtx_insn *prop_jump; /* propagated from another block */
1959   int bb_index;  /* the original block. */
1960 };
1961 static struct spu_bb_info *spu_bb_info;
1962 
1963 #define STOP_HINT_P(INSN) \
1964 		(CALL_P(INSN) \
1965 		 || INSN_CODE(INSN) == CODE_FOR_divmodsi4 \
1966 		 || INSN_CODE(INSN) == CODE_FOR_udivmodsi4)
1967 
1968 /* 1 when RTX is a hinted branch or its target.  We keep track of
1969    what has been hinted so the safe-hint code can test it easily.  */
1970 #define HINTED_P(RTX)						\
1971   (RTL_FLAG_CHECK3("HINTED_P", (RTX), CODE_LABEL, JUMP_INSN, CALL_INSN)->unchanging)
1972 
1973 /* 1 when RTX is an insn that must be scheduled on an even boundary. */
1974 #define SCHED_ON_EVEN_P(RTX)						\
1975   (RTL_FLAG_CHECK2("SCHED_ON_EVEN_P", (RTX), JUMP_INSN, CALL_INSN)->in_struct)
1976 
1977 /* Emit a nop for INSN such that the two will dual issue.  This assumes
1978    INSN is 8-byte aligned.  When INSN is inline asm we emit an lnop.
1979    We check for TImode to handle a MULTI1 insn which has dual issued its
1980    first instruction.  get_pipe returns -1 for MULTI0 or inline asm.  */
1981 static void
emit_nop_for_insn(rtx_insn * insn)1982 emit_nop_for_insn (rtx_insn *insn)
1983 {
1984   int p;
1985   rtx_insn *new_insn;
1986 
1987   /* We need to handle JUMP_TABLE_DATA separately.  */
1988   if (JUMP_TABLE_DATA_P (insn))
1989     {
1990       new_insn = emit_insn_after (gen_lnop(), insn);
1991       recog_memoized (new_insn);
1992       INSN_LOCATION (new_insn) = UNKNOWN_LOCATION;
1993       return;
1994     }
1995 
1996   p = get_pipe (insn);
1997   if ((CALL_P (insn) || JUMP_P (insn)) && SCHED_ON_EVEN_P (insn))
1998     new_insn = emit_insn_after (gen_lnop (), insn);
1999   else if (p == 1 && GET_MODE (insn) == TImode)
2000     {
2001       new_insn = emit_insn_before (gen_nopn (GEN_INT (127)), insn);
2002       PUT_MODE (new_insn, TImode);
2003       PUT_MODE (insn, VOIDmode);
2004     }
2005   else
2006     new_insn = emit_insn_after (gen_lnop (), insn);
2007   recog_memoized (new_insn);
2008   INSN_LOCATION (new_insn) = INSN_LOCATION (insn);
2009 }
2010 
2011 /* Insert nops in basic blocks to meet dual issue alignment
2012    requirements.  Also make sure hbrp and hint instructions are at least
2013    one cycle apart, possibly inserting a nop.  */
2014 static void
pad_bb(void)2015 pad_bb(void)
2016 {
2017   rtx_insn *insn, *next_insn, *prev_insn, *hbr_insn = 0;
2018   int length;
2019   int addr;
2020 
2021   /* This sets up INSN_ADDRESSES. */
2022   shorten_branches (get_insns ());
2023 
2024   /* Keep track of length added by nops. */
2025   length = 0;
2026 
2027   prev_insn = 0;
2028   insn = get_insns ();
2029   if (!active_insn_p (insn))
2030     insn = next_active_insn (insn);
2031   for (; insn; insn = next_insn)
2032     {
2033       next_insn = next_active_insn (insn);
2034       if (INSN_P (insn)
2035           && (INSN_CODE (insn) == CODE_FOR_iprefetch
2036 	      || INSN_CODE (insn) == CODE_FOR_hbr))
2037 	{
2038 	  if (hbr_insn)
2039 	    {
2040 	      int a0 = INSN_ADDRESSES (INSN_UID (hbr_insn));
2041 	      int a1 = INSN_ADDRESSES (INSN_UID (insn));
2042 	      if ((a1 - a0 == 8 && GET_MODE (insn) != TImode)
2043 		  || (a1 - a0 == 4))
2044 		{
2045 		  prev_insn = emit_insn_before (gen_lnop (), insn);
2046 		  PUT_MODE (prev_insn, GET_MODE (insn));
2047 		  PUT_MODE (insn, TImode);
2048 		  INSN_LOCATION (prev_insn) = INSN_LOCATION (insn);
2049 		  length += 4;
2050 		}
2051 	    }
2052 	  hbr_insn = insn;
2053 	}
2054       if (INSN_P (insn) && INSN_CODE (insn) == CODE_FOR_blockage && next_insn)
2055 	{
2056 	  if (GET_MODE (insn) == TImode)
2057 	    PUT_MODE (next_insn, TImode);
2058 	  insn = next_insn;
2059 	  next_insn = next_active_insn (insn);
2060 	}
2061       addr = INSN_ADDRESSES (INSN_UID (insn));
2062       if ((CALL_P (insn) || JUMP_P (insn)) && SCHED_ON_EVEN_P (insn))
2063 	{
2064 	  if (((addr + length) & 7) != 0)
2065 	    {
2066 	      emit_nop_for_insn (prev_insn);
2067 	      length += 4;
2068 	    }
2069 	}
2070       else if (GET_MODE (insn) == TImode
2071 	       && ((next_insn && GET_MODE (next_insn) != TImode)
2072 		   || get_attr_type (insn) == TYPE_MULTI0)
2073 	       && ((addr + length) & 7) != 0)
2074 	{
2075 	  /* prev_insn will always be set because the first insn is
2076 	     always 8-byte aligned. */
2077 	  emit_nop_for_insn (prev_insn);
2078 	  length += 4;
2079 	}
2080       prev_insn = insn;
2081     }
2082 }
2083 
2084 
2085 /* Routines for branch hints. */
2086 
2087 static void
spu_emit_branch_hint(rtx_insn * before,rtx_insn * branch,rtx target,int distance,sbitmap blocks)2088 spu_emit_branch_hint (rtx_insn *before, rtx_insn *branch, rtx target,
2089 		      int distance, sbitmap blocks)
2090 {
2091   rtx_insn *hint;
2092   rtx_insn *insn;
2093   rtx_jump_table_data *table;
2094 
2095   if (before == 0 || branch == 0 || target == 0)
2096     return;
2097 
2098   /* While scheduling we require hints to be no further than 600, so
2099      we need to enforce that here too */
2100   if (distance > 600)
2101     return;
2102 
2103   /* If we have a Basic block note, emit it after the basic block note.  */
2104   if (NOTE_INSN_BASIC_BLOCK_P (before))
2105     before = NEXT_INSN (before);
2106 
2107   rtx_code_label *branch_label = gen_label_rtx ();
2108   LABEL_NUSES (branch_label)++;
2109   LABEL_PRESERVE_P (branch_label) = 1;
2110   insn = emit_label_before (branch_label, branch);
2111   rtx branch_label_ref = gen_rtx_LABEL_REF (VOIDmode, branch_label);
2112   bitmap_set_bit (blocks, BLOCK_FOR_INSN (branch)->index);
2113 
2114   hint = emit_insn_before (gen_hbr (branch_label_ref, target), before);
2115   recog_memoized (hint);
2116   INSN_LOCATION (hint) = INSN_LOCATION (branch);
2117   HINTED_P (branch) = 1;
2118 
2119   if (GET_CODE (target) == LABEL_REF)
2120     HINTED_P (XEXP (target, 0)) = 1;
2121   else if (tablejump_p (branch, 0, &table))
2122     {
2123       rtvec vec;
2124       int j;
2125       if (GET_CODE (PATTERN (table)) == ADDR_VEC)
2126 	vec = XVEC (PATTERN (table), 0);
2127       else
2128 	vec = XVEC (PATTERN (table), 1);
2129       for (j = GET_NUM_ELEM (vec) - 1; j >= 0; --j)
2130 	HINTED_P (XEXP (RTVEC_ELT (vec, j), 0)) = 1;
2131     }
2132 
2133   if (distance >= 588)
2134     {
2135       /* Make sure the hint isn't scheduled any earlier than this point,
2136          which could make it too far for the branch offest to fit */
2137       insn = emit_insn_before (gen_blockage (), hint);
2138       recog_memoized (insn);
2139       INSN_LOCATION (insn) = INSN_LOCATION (hint);
2140     }
2141   else if (distance <= 8 * 4)
2142     {
2143       /* To guarantee at least 8 insns between the hint and branch we
2144          insert nops. */
2145       int d;
2146       for (d = distance; d < 8 * 4; d += 4)
2147 	{
2148 	  insn =
2149 	    emit_insn_after (gen_nopn_nv (gen_rtx_REG (SImode, 127)), hint);
2150 	  recog_memoized (insn);
2151 	  INSN_LOCATION (insn) = INSN_LOCATION (hint);
2152 	}
2153 
2154       /* Make sure any nops inserted aren't scheduled before the hint. */
2155       insn = emit_insn_after (gen_blockage (), hint);
2156       recog_memoized (insn);
2157       INSN_LOCATION (insn) = INSN_LOCATION (hint);
2158 
2159       /* Make sure any nops inserted aren't scheduled after the call. */
2160       if (CALL_P (branch) && distance < 8 * 4)
2161 	{
2162 	  insn = emit_insn_before (gen_blockage (), branch);
2163 	  recog_memoized (insn);
2164 	  INSN_LOCATION (insn) = INSN_LOCATION (branch);
2165 	}
2166     }
2167 }
2168 
2169 /* Returns 0 if we don't want a hint for this branch.  Otherwise return
2170    the rtx for the branch target. */
2171 static rtx
get_branch_target(rtx_insn * branch)2172 get_branch_target (rtx_insn *branch)
2173 {
2174   if (JUMP_P (branch))
2175     {
2176       rtx set, src;
2177 
2178       /* Return statements */
2179       if (GET_CODE (PATTERN (branch)) == RETURN)
2180 	return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
2181 
2182      /* ASM GOTOs. */
2183      if (extract_asm_operands (PATTERN (branch)) != NULL)
2184 	return NULL;
2185 
2186       set = single_set (branch);
2187       src = SET_SRC (set);
2188       if (GET_CODE (SET_DEST (set)) != PC)
2189 	abort ();
2190 
2191       if (GET_CODE (src) == IF_THEN_ELSE)
2192 	{
2193 	  rtx lab = 0;
2194 	  rtx note = find_reg_note (branch, REG_BR_PROB, 0);
2195 	  if (note)
2196 	    {
2197 	      /* If the more probable case is not a fall through, then
2198 	         try a branch hint.  */
2199 	      int prob = profile_probability::from_reg_br_prob_note
2200 			    (XINT (note, 0)).to_reg_br_prob_base ();
2201 	      if (prob > (REG_BR_PROB_BASE * 6 / 10)
2202 		  && GET_CODE (XEXP (src, 1)) != PC)
2203 		lab = XEXP (src, 1);
2204 	      else if (prob < (REG_BR_PROB_BASE * 4 / 10)
2205 		       && GET_CODE (XEXP (src, 2)) != PC)
2206 		lab = XEXP (src, 2);
2207 	    }
2208 	  if (lab)
2209 	    {
2210 	      if (GET_CODE (lab) == RETURN)
2211 		return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
2212 	      return lab;
2213 	    }
2214 	  return 0;
2215 	}
2216 
2217       return src;
2218     }
2219   else if (CALL_P (branch))
2220     {
2221       rtx call;
2222       /* All of our call patterns are in a PARALLEL and the CALL is
2223          the first pattern in the PARALLEL. */
2224       if (GET_CODE (PATTERN (branch)) != PARALLEL)
2225 	abort ();
2226       call = XVECEXP (PATTERN (branch), 0, 0);
2227       if (GET_CODE (call) == SET)
2228 	call = SET_SRC (call);
2229       if (GET_CODE (call) != CALL)
2230 	abort ();
2231       return XEXP (XEXP (call, 0), 0);
2232     }
2233   return 0;
2234 }
2235 
2236 /* The special $hbr register is used to prevent the insn scheduler from
2237    moving hbr insns across instructions which invalidate them.  It
2238    should only be used in a clobber, and this function searches for
2239    insns which clobber it.  */
2240 static bool
insn_clobbers_hbr(rtx_insn * insn)2241 insn_clobbers_hbr (rtx_insn *insn)
2242 {
2243   if (INSN_P (insn)
2244       && GET_CODE (PATTERN (insn)) == PARALLEL)
2245     {
2246       rtx parallel = PATTERN (insn);
2247       rtx clobber;
2248       int j;
2249       for (j = XVECLEN (parallel, 0) - 1; j >= 0; j--)
2250 	{
2251 	  clobber = XVECEXP (parallel, 0, j);
2252 	  if (GET_CODE (clobber) == CLOBBER
2253 	      && GET_CODE (XEXP (clobber, 0)) == REG
2254 	      && REGNO (XEXP (clobber, 0)) == HBR_REGNUM)
2255 	    return 1;
2256 	}
2257     }
2258   return 0;
2259 }
2260 
2261 /* Search up to 32 insns starting at FIRST:
2262    - at any kind of hinted branch, just return
2263    - at any unconditional branch in the first 15 insns, just return
2264    - at a call or indirect branch, after the first 15 insns, force it to
2265      an even address and return
2266    - at any unconditional branch, after the first 15 insns, force it to
2267      an even address.
2268    At then end of the search, insert an hbrp within 4 insns of FIRST,
2269    and an hbrp within 16 instructions of FIRST.
2270  */
2271 static void
insert_hbrp_for_ilb_runout(rtx_insn * first)2272 insert_hbrp_for_ilb_runout (rtx_insn *first)
2273 {
2274   rtx_insn *insn, *before_4 = 0, *before_16 = 0;
2275   int addr = 0, length, first_addr = -1;
2276   int hbrp_addr0 = 128 * 4, hbrp_addr1 = 128 * 4;
2277   int insert_lnop_after = 0;
2278   for (insn = first; insn; insn = NEXT_INSN (insn))
2279     if (INSN_P (insn))
2280       {
2281 	if (first_addr == -1)
2282 	  first_addr = INSN_ADDRESSES (INSN_UID (insn));
2283 	addr = INSN_ADDRESSES (INSN_UID (insn)) - first_addr;
2284 	length = get_attr_length (insn);
2285 
2286 	if (before_4 == 0 && addr + length >= 4 * 4)
2287 	  before_4 = insn;
2288 	/* We test for 14 instructions because the first hbrp will add
2289 	   up to 2 instructions. */
2290 	if (before_16 == 0 && addr + length >= 14 * 4)
2291 	  before_16 = insn;
2292 
2293 	if (INSN_CODE (insn) == CODE_FOR_hbr)
2294 	  {
2295 	    /* Make sure an hbrp is at least 2 cycles away from a hint.
2296 	       Insert an lnop after the hbrp when necessary. */
2297 	    if (before_4 == 0 && addr > 0)
2298 	      {
2299 		before_4 = insn;
2300 		insert_lnop_after |= 1;
2301 	      }
2302 	    else if (before_4 && addr <= 4 * 4)
2303 	      insert_lnop_after |= 1;
2304 	    if (before_16 == 0 && addr > 10 * 4)
2305 	      {
2306 		before_16 = insn;
2307 		insert_lnop_after |= 2;
2308 	      }
2309 	    else if (before_16 && addr <= 14 * 4)
2310 	      insert_lnop_after |= 2;
2311 	  }
2312 
2313 	if (INSN_CODE (insn) == CODE_FOR_iprefetch)
2314 	  {
2315 	    if (addr < hbrp_addr0)
2316 	      hbrp_addr0 = addr;
2317 	    else if (addr < hbrp_addr1)
2318 	      hbrp_addr1 = addr;
2319 	  }
2320 
2321 	if (CALL_P (insn) || JUMP_P (insn))
2322 	  {
2323 	    if (HINTED_P (insn))
2324 	      return;
2325 
2326 	    /* Any branch after the first 15 insns should be on an even
2327 	       address to avoid a special case branch.  There might be
2328 	       some nops and/or hbrps inserted, so we test after 10
2329 	       insns. */
2330 	    if (addr > 10 * 4)
2331 	      SCHED_ON_EVEN_P (insn) = 1;
2332 	  }
2333 
2334 	if (CALL_P (insn) || tablejump_p (insn, 0, 0))
2335 	  return;
2336 
2337 
2338 	if (addr + length >= 32 * 4)
2339 	  {
2340 	    gcc_assert (before_4 && before_16);
2341 	    if (hbrp_addr0 > 4 * 4)
2342 	      {
2343 		insn =
2344 		  emit_insn_before (gen_iprefetch (GEN_INT (1)), before_4);
2345 		recog_memoized (insn);
2346 		INSN_LOCATION (insn) = INSN_LOCATION (before_4);
2347 		INSN_ADDRESSES_NEW (insn,
2348 				    INSN_ADDRESSES (INSN_UID (before_4)));
2349 		PUT_MODE (insn, GET_MODE (before_4));
2350 		PUT_MODE (before_4, TImode);
2351 		if (insert_lnop_after & 1)
2352 		  {
2353 		    insn = emit_insn_before (gen_lnop (), before_4);
2354 		    recog_memoized (insn);
2355 		    INSN_LOCATION (insn) = INSN_LOCATION (before_4);
2356 		    INSN_ADDRESSES_NEW (insn,
2357 					INSN_ADDRESSES (INSN_UID (before_4)));
2358 		    PUT_MODE (insn, TImode);
2359 		  }
2360 	      }
2361 	    if ((hbrp_addr0 <= 4 * 4 || hbrp_addr0 > 16 * 4)
2362 		&& hbrp_addr1 > 16 * 4)
2363 	      {
2364 		insn =
2365 		  emit_insn_before (gen_iprefetch (GEN_INT (2)), before_16);
2366 		recog_memoized (insn);
2367 		INSN_LOCATION (insn) = INSN_LOCATION (before_16);
2368 		INSN_ADDRESSES_NEW (insn,
2369 				    INSN_ADDRESSES (INSN_UID (before_16)));
2370 		PUT_MODE (insn, GET_MODE (before_16));
2371 		PUT_MODE (before_16, TImode);
2372 		if (insert_lnop_after & 2)
2373 		  {
2374 		    insn = emit_insn_before (gen_lnop (), before_16);
2375 		    recog_memoized (insn);
2376 		    INSN_LOCATION (insn) = INSN_LOCATION (before_16);
2377 		    INSN_ADDRESSES_NEW (insn,
2378 					INSN_ADDRESSES (INSN_UID
2379 							(before_16)));
2380 		    PUT_MODE (insn, TImode);
2381 		  }
2382 	      }
2383 	    return;
2384 	  }
2385       }
2386     else if (BARRIER_P (insn))
2387       return;
2388 
2389 }
2390 
2391 /* The SPU might hang when it executes 48 inline instructions after a
2392    hinted branch jumps to its hinted target.  The beginning of a
2393    function and the return from a call might have been hinted, and
2394    must be handled as well.  To prevent a hang we insert 2 hbrps.  The
2395    first should be within 6 insns of the branch target.  The second
2396    should be within 22 insns of the branch target.  When determining
2397    if hbrps are necessary, we look for only 32 inline instructions,
2398    because up to 12 nops and 4 hbrps could be inserted.  Similarily,
2399    when inserting new hbrps, we insert them within 4 and 16 insns of
2400    the target.  */
2401 static void
insert_hbrp(void)2402 insert_hbrp (void)
2403 {
2404   rtx_insn *insn;
2405   if (TARGET_SAFE_HINTS)
2406     {
2407       shorten_branches (get_insns ());
2408       /* Insert hbrp at beginning of function */
2409       insn = next_active_insn (get_insns ());
2410       if (insn)
2411 	insert_hbrp_for_ilb_runout (insn);
2412       /* Insert hbrp after hinted targets. */
2413       for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
2414 	if ((LABEL_P (insn) && HINTED_P (insn)) || CALL_P (insn))
2415 	  insert_hbrp_for_ilb_runout (next_active_insn (insn));
2416     }
2417 }
2418 
2419 static int in_spu_reorg;
2420 
2421 static void
spu_var_tracking(void)2422 spu_var_tracking (void)
2423 {
2424   if (flag_var_tracking)
2425     {
2426       df_analyze ();
2427       timevar_push (TV_VAR_TRACKING);
2428       variable_tracking_main ();
2429       timevar_pop (TV_VAR_TRACKING);
2430       df_finish_pass (false);
2431     }
2432 }
2433 
2434 /* Insert branch hints.  There are no branch optimizations after this
2435    pass, so it's safe to set our branch hints now. */
2436 static void
spu_machine_dependent_reorg(void)2437 spu_machine_dependent_reorg (void)
2438 {
2439   sbitmap blocks;
2440   basic_block bb;
2441   rtx_insn *branch, *insn;
2442   rtx branch_target = 0;
2443   int branch_addr = 0, insn_addr, required_dist = 0;
2444   int i;
2445   unsigned int j;
2446 
2447   if (!TARGET_BRANCH_HINTS || optimize == 0)
2448     {
2449       /* We still do it for unoptimized code because an external
2450          function might have hinted a call or return. */
2451       compute_bb_for_insn ();
2452       insert_hbrp ();
2453       pad_bb ();
2454       spu_var_tracking ();
2455       free_bb_for_insn ();
2456       return;
2457     }
2458 
2459   blocks = sbitmap_alloc (last_basic_block_for_fn (cfun));
2460   bitmap_clear (blocks);
2461 
2462   in_spu_reorg = 1;
2463   compute_bb_for_insn ();
2464 
2465   /* (Re-)discover loops so that bb->loop_father can be used
2466      in the analysis below.  */
2467   loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
2468 
2469   compact_blocks ();
2470 
2471   spu_bb_info =
2472     (struct spu_bb_info *) xcalloc (n_basic_blocks_for_fn (cfun),
2473 				    sizeof (struct spu_bb_info));
2474 
2475   /* We need exact insn addresses and lengths.  */
2476   shorten_branches (get_insns ());
2477 
2478   for (i = n_basic_blocks_for_fn (cfun) - 1; i >= 0; i--)
2479     {
2480       bb = BASIC_BLOCK_FOR_FN (cfun, i);
2481       branch = 0;
2482       if (spu_bb_info[i].prop_jump)
2483 	{
2484 	  branch = spu_bb_info[i].prop_jump;
2485 	  branch_target = get_branch_target (branch);
2486 	  branch_addr = INSN_ADDRESSES (INSN_UID (branch));
2487 	  required_dist = spu_hint_dist;
2488 	}
2489       /* Search from end of a block to beginning.   In this loop, find
2490          jumps which need a branch and emit them only when:
2491          - it's an indirect branch and we're at the insn which sets
2492          the register
2493          - we're at an insn that will invalidate the hint. e.g., a
2494          call, another hint insn, inline asm that clobbers $hbr, and
2495          some inlined operations (divmodsi4).  Don't consider jumps
2496          because they are only at the end of a block and are
2497          considered when we are deciding whether to propagate
2498          - we're getting too far away from the branch.  The hbr insns
2499          only have a signed 10 bit offset
2500          We go back as far as possible so the branch will be considered
2501          for propagation when we get to the beginning of the block.  */
2502       for (insn = BB_END (bb); insn; insn = PREV_INSN (insn))
2503 	{
2504 	  if (INSN_P (insn))
2505 	    {
2506 	      insn_addr = INSN_ADDRESSES (INSN_UID (insn));
2507 	      if (branch
2508 		  && ((GET_CODE (branch_target) == REG
2509 		       && set_of (branch_target, insn) != NULL_RTX)
2510 		      || insn_clobbers_hbr (insn)
2511 		      || branch_addr - insn_addr > 600))
2512 		{
2513 		  rtx_insn *next = NEXT_INSN (insn);
2514 		  int next_addr = INSN_ADDRESSES (INSN_UID (next));
2515 		  if (insn != BB_END (bb)
2516 		      && branch_addr - next_addr >= required_dist)
2517 		    {
2518 		      if (dump_file)
2519 			fprintf (dump_file,
2520 				 "hint for %i in block %i before %i\n",
2521 				 INSN_UID (branch), bb->index,
2522 				 INSN_UID (next));
2523 		      spu_emit_branch_hint (next, branch, branch_target,
2524 					    branch_addr - next_addr, blocks);
2525 		    }
2526 		  branch = 0;
2527 		}
2528 
2529 	      /* JUMP_P will only be true at the end of a block.  When
2530 	         branch is already set it means we've previously decided
2531 	         to propagate a hint for that branch into this block. */
2532 	      if (CALL_P (insn) || (JUMP_P (insn) && !branch))
2533 		{
2534 		  branch = 0;
2535 		  if ((branch_target = get_branch_target (insn)))
2536 		    {
2537 		      branch = insn;
2538 		      branch_addr = insn_addr;
2539 		      required_dist = spu_hint_dist;
2540 		    }
2541 		}
2542 	    }
2543 	  if (insn == BB_HEAD (bb))
2544 	    break;
2545 	}
2546 
2547       if (branch)
2548 	{
2549 	  /* If we haven't emitted a hint for this branch yet, it might
2550 	     be profitable to emit it in one of the predecessor blocks,
2551 	     especially for loops.  */
2552 	  rtx_insn *bbend;
2553 	  basic_block prev = 0, prop = 0, prev2 = 0;
2554 	  int loop_exit = 0, simple_loop = 0;
2555 	  int next_addr = INSN_ADDRESSES (INSN_UID (NEXT_INSN (insn)));
2556 
2557 	  for (j = 0; j < EDGE_COUNT (bb->preds); j++)
2558 	    if (EDGE_PRED (bb, j)->flags & EDGE_FALLTHRU)
2559 	      prev = EDGE_PRED (bb, j)->src;
2560 	    else
2561 	      prev2 = EDGE_PRED (bb, j)->src;
2562 
2563 	  for (j = 0; j < EDGE_COUNT (bb->succs); j++)
2564 	    if (EDGE_SUCC (bb, j)->flags & EDGE_LOOP_EXIT)
2565 	      loop_exit = 1;
2566 	    else if (EDGE_SUCC (bb, j)->dest == bb)
2567 	      simple_loop = 1;
2568 
2569 	  /* If this branch is a loop exit then propagate to previous
2570 	     fallthru block. This catches the cases when it is a simple
2571 	     loop or when there is an initial branch into the loop. */
2572 	  if (prev && (loop_exit || simple_loop)
2573 	      && bb_loop_depth (prev) <= bb_loop_depth (bb))
2574 	    prop = prev;
2575 
2576 	  /* If there is only one adjacent predecessor.  Don't propagate
2577 	     outside this loop.  */
2578 	  else if (prev && single_pred_p (bb)
2579 		   && prev->loop_father == bb->loop_father)
2580 	    prop = prev;
2581 
2582 	  /* If this is the JOIN block of a simple IF-THEN then
2583 	     propagate the hint to the HEADER block. */
2584 	  else if (prev && prev2
2585 		   && EDGE_COUNT (bb->preds) == 2
2586 		   && EDGE_COUNT (prev->preds) == 1
2587 		   && EDGE_PRED (prev, 0)->src == prev2
2588 		   && prev2->loop_father == bb->loop_father
2589 		   && GET_CODE (branch_target) != REG)
2590 	    prop = prev;
2591 
2592 	  /* Don't propagate when:
2593 	     - this is a simple loop and the hint would be too far
2594 	     - this is not a simple loop and there are 16 insns in
2595 	     this block already
2596 	     - the predecessor block ends in a branch that will be
2597 	     hinted
2598 	     - the predecessor block ends in an insn that invalidates
2599 	     the hint */
2600 	  if (prop
2601 	      && prop->index >= 0
2602 	      && (bbend = BB_END (prop))
2603 	      && branch_addr - INSN_ADDRESSES (INSN_UID (bbend)) <
2604 	      (simple_loop ? 600 : 16 * 4) && get_branch_target (bbend) == 0
2605 	      && (JUMP_P (bbend) || !insn_clobbers_hbr (bbend)))
2606 	    {
2607 	      if (dump_file)
2608 		fprintf (dump_file, "propagate from %i to %i (loop depth %i) "
2609 			 "for %i (loop_exit %i simple_loop %i dist %i)\n",
2610 			 bb->index, prop->index, bb_loop_depth (bb),
2611 			 INSN_UID (branch), loop_exit, simple_loop,
2612 			 branch_addr - INSN_ADDRESSES (INSN_UID (bbend)));
2613 
2614 	      spu_bb_info[prop->index].prop_jump = branch;
2615 	      spu_bb_info[prop->index].bb_index = i;
2616 	    }
2617 	  else if (branch_addr - next_addr >= required_dist)
2618 	    {
2619 	      if (dump_file)
2620 		fprintf (dump_file, "hint for %i in block %i before %i\n",
2621 			 INSN_UID (branch), bb->index,
2622 			 INSN_UID (NEXT_INSN (insn)));
2623 	      spu_emit_branch_hint (NEXT_INSN (insn), branch, branch_target,
2624 				    branch_addr - next_addr, blocks);
2625 	    }
2626 	  branch = 0;
2627 	}
2628     }
2629   free (spu_bb_info);
2630 
2631   if (!bitmap_empty_p (blocks))
2632     find_many_sub_basic_blocks (blocks);
2633 
2634   /* We have to schedule to make sure alignment is ok. */
2635   FOR_EACH_BB_FN (bb, cfun) bb->flags &= ~BB_DISABLE_SCHEDULE;
2636 
2637   /* The hints need to be scheduled, so call it again. */
2638   schedule_insns ();
2639   df_finish_pass (true);
2640 
2641   insert_hbrp ();
2642 
2643   pad_bb ();
2644 
2645   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
2646     if (NONJUMP_INSN_P (insn) && INSN_CODE (insn) == CODE_FOR_hbr)
2647       {
2648 	/* Adjust the LABEL_REF in a hint when we have inserted a nop
2649 	   between its branch label and the branch .  We don't move the
2650 	   label because GCC expects it at the beginning of the block. */
2651 	rtx unspec = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2652 	rtx label_ref = XVECEXP (unspec, 0, 0);
2653 	rtx_insn *label = as_a <rtx_insn *> (XEXP (label_ref, 0));
2654 	rtx_insn *branch;
2655 	int offset = 0;
2656 	for (branch = NEXT_INSN (label);
2657 	     !JUMP_P (branch) && !CALL_P (branch);
2658 	     branch = NEXT_INSN (branch))
2659 	  if (NONJUMP_INSN_P (branch))
2660 	    offset += get_attr_length (branch);
2661 	if (offset > 0)
2662 	  XVECEXP (unspec, 0, 0) = plus_constant (Pmode, label_ref, offset);
2663       }
2664 
2665   spu_var_tracking ();
2666 
2667   loop_optimizer_finalize ();
2668 
2669   free_bb_for_insn ();
2670 
2671   in_spu_reorg = 0;
2672 }
2673 
2674 
2675 /* Insn scheduling routines, primarily for dual issue. */
2676 static int
spu_sched_issue_rate(void)2677 spu_sched_issue_rate (void)
2678 {
2679   return 2;
2680 }
2681 
2682 static int
uses_ls_unit(rtx_insn * insn)2683 uses_ls_unit(rtx_insn *insn)
2684 {
2685   rtx set = single_set (insn);
2686   if (set != 0
2687       && (GET_CODE (SET_DEST (set)) == MEM
2688 	  || GET_CODE (SET_SRC (set)) == MEM))
2689     return 1;
2690   return 0;
2691 }
2692 
2693 static int
get_pipe(rtx_insn * insn)2694 get_pipe (rtx_insn *insn)
2695 {
2696   enum attr_type t;
2697   /* Handle inline asm */
2698   if (INSN_CODE (insn) == -1)
2699     return -1;
2700   t = get_attr_type (insn);
2701   switch (t)
2702     {
2703     case TYPE_CONVERT:
2704       return -2;
2705     case TYPE_MULTI0:
2706       return -1;
2707 
2708     case TYPE_FX2:
2709     case TYPE_FX3:
2710     case TYPE_SPR:
2711     case TYPE_NOP:
2712     case TYPE_FXB:
2713     case TYPE_FPD:
2714     case TYPE_FP6:
2715     case TYPE_FP7:
2716       return 0;
2717 
2718     case TYPE_LNOP:
2719     case TYPE_SHUF:
2720     case TYPE_LOAD:
2721     case TYPE_STORE:
2722     case TYPE_BR:
2723     case TYPE_MULTI1:
2724     case TYPE_HBR:
2725     case TYPE_IPREFETCH:
2726       return 1;
2727     default:
2728       abort ();
2729     }
2730 }
2731 
2732 
2733 /* haifa-sched.c has a static variable that keeps track of the current
2734    cycle.  It is passed to spu_sched_reorder, and we record it here for
2735    use by spu_sched_variable_issue.  It won't be accurate if the
2736    scheduler updates it's clock_var between the two calls. */
2737 static int clock_var;
2738 
2739 /* This is used to keep track of insn alignment.  Set to 0 at the
2740    beginning of each block and increased by the "length" attr of each
2741    insn scheduled. */
2742 static int spu_sched_length;
2743 
2744 /* Record when we've issued pipe0 and pipe1 insns so we can reorder the
2745    ready list appropriately in spu_sched_reorder(). */
2746 static int pipe0_clock;
2747 static int pipe1_clock;
2748 
2749 static int prev_clock_var;
2750 
2751 static int prev_priority;
2752 
2753 /* The SPU needs to load the next ilb sometime during the execution of
2754    the previous ilb.  There is a potential conflict if every cycle has a
2755    load or store.  To avoid the conflict we make sure the load/store
2756    unit is free for at least one cycle during the execution of insns in
2757    the previous ilb. */
2758 static int spu_ls_first;
2759 static int prev_ls_clock;
2760 
2761 static void
spu_sched_init_global(FILE * file ATTRIBUTE_UNUSED,int verbose ATTRIBUTE_UNUSED,int max_ready ATTRIBUTE_UNUSED)2762 spu_sched_init_global (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
2763 		       int max_ready ATTRIBUTE_UNUSED)
2764 {
2765   spu_sched_length = 0;
2766 }
2767 
2768 static void
spu_sched_init(FILE * file ATTRIBUTE_UNUSED,int verbose ATTRIBUTE_UNUSED,int max_ready ATTRIBUTE_UNUSED)2769 spu_sched_init (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
2770 		int max_ready ATTRIBUTE_UNUSED)
2771 {
2772   if (align_labels > 4 || align_loops > 4 || align_jumps > 4)
2773     {
2774       /* When any block might be at least 8-byte aligned, assume they
2775          will all be at least 8-byte aligned to make sure dual issue
2776          works out correctly. */
2777       spu_sched_length = 0;
2778     }
2779   spu_ls_first = INT_MAX;
2780   clock_var = -1;
2781   prev_ls_clock = -1;
2782   pipe0_clock = -1;
2783   pipe1_clock = -1;
2784   prev_clock_var = -1;
2785   prev_priority = -1;
2786 }
2787 
2788 static int
spu_sched_variable_issue(FILE * file ATTRIBUTE_UNUSED,int verbose ATTRIBUTE_UNUSED,rtx_insn * insn,int more)2789 spu_sched_variable_issue (FILE *file ATTRIBUTE_UNUSED,
2790 			  int verbose ATTRIBUTE_UNUSED,
2791 			  rtx_insn *insn, int more)
2792 {
2793   int len;
2794   int p;
2795   if (GET_CODE (PATTERN (insn)) == USE
2796       || GET_CODE (PATTERN (insn)) == CLOBBER
2797       || (len = get_attr_length (insn)) == 0)
2798     return more;
2799 
2800   spu_sched_length += len;
2801 
2802   /* Reset on inline asm */
2803   if (INSN_CODE (insn) == -1)
2804     {
2805       spu_ls_first = INT_MAX;
2806       pipe0_clock = -1;
2807       pipe1_clock = -1;
2808       return 0;
2809     }
2810   p = get_pipe (insn);
2811   if (p == 0)
2812     pipe0_clock = clock_var;
2813   else
2814     pipe1_clock = clock_var;
2815 
2816   if (in_spu_reorg)
2817     {
2818       if (clock_var - prev_ls_clock > 1
2819 	  || INSN_CODE (insn) == CODE_FOR_iprefetch)
2820 	spu_ls_first = INT_MAX;
2821       if (uses_ls_unit (insn))
2822 	{
2823 	  if (spu_ls_first == INT_MAX)
2824 	    spu_ls_first = spu_sched_length;
2825 	  prev_ls_clock = clock_var;
2826 	}
2827 
2828       /* The scheduler hasn't inserted the nop, but we will later on.
2829          Include those nops in spu_sched_length. */
2830       if (prev_clock_var == clock_var && (spu_sched_length & 7))
2831 	spu_sched_length += 4;
2832       prev_clock_var = clock_var;
2833 
2834       /* more is -1 when called from spu_sched_reorder for new insns
2835          that don't have INSN_PRIORITY */
2836       if (more >= 0)
2837 	prev_priority = INSN_PRIORITY (insn);
2838     }
2839 
2840   /* Always try issuing more insns.  spu_sched_reorder will decide
2841      when the cycle should be advanced. */
2842   return 1;
2843 }
2844 
2845 /* This function is called for both TARGET_SCHED_REORDER and
2846    TARGET_SCHED_REORDER2.  */
2847 static int
spu_sched_reorder(FILE * file ATTRIBUTE_UNUSED,int verbose ATTRIBUTE_UNUSED,rtx_insn ** ready,int * nreadyp,int clock)2848 spu_sched_reorder (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
2849 		   rtx_insn **ready, int *nreadyp, int clock)
2850 {
2851   int i, nready = *nreadyp;
2852   int pipe_0, pipe_1, pipe_hbrp, pipe_ls, schedule_i;
2853   rtx_insn *insn;
2854 
2855   clock_var = clock;
2856 
2857   if (nready <= 0 || pipe1_clock >= clock)
2858     return 0;
2859 
2860   /* Find any rtl insns that don't generate assembly insns and schedule
2861      them first. */
2862   for (i = nready - 1; i >= 0; i--)
2863     {
2864       insn = ready[i];
2865       if (INSN_CODE (insn) == -1
2866 	  || INSN_CODE (insn) == CODE_FOR_blockage
2867 	  || (INSN_P (insn) && get_attr_length (insn) == 0))
2868 	{
2869 	  ready[i] = ready[nready - 1];
2870 	  ready[nready - 1] = insn;
2871 	  return 1;
2872 	}
2873     }
2874 
2875   pipe_0 = pipe_1 = pipe_hbrp = pipe_ls = schedule_i = -1;
2876   for (i = 0; i < nready; i++)
2877     if (INSN_CODE (ready[i]) != -1)
2878       {
2879 	insn = ready[i];
2880 	switch (get_attr_type (insn))
2881 	  {
2882 	  default:
2883 	  case TYPE_MULTI0:
2884 	  case TYPE_CONVERT:
2885 	  case TYPE_FX2:
2886 	  case TYPE_FX3:
2887 	  case TYPE_SPR:
2888 	  case TYPE_NOP:
2889 	  case TYPE_FXB:
2890 	  case TYPE_FPD:
2891 	  case TYPE_FP6:
2892 	  case TYPE_FP7:
2893 	    pipe_0 = i;
2894 	    break;
2895 	  case TYPE_LOAD:
2896 	  case TYPE_STORE:
2897 	    pipe_ls = i;
2898 	    /* FALLTHRU */
2899 	  case TYPE_LNOP:
2900 	  case TYPE_SHUF:
2901 	  case TYPE_BR:
2902 	  case TYPE_MULTI1:
2903 	  case TYPE_HBR:
2904 	    pipe_1 = i;
2905 	    break;
2906 	  case TYPE_IPREFETCH:
2907 	    pipe_hbrp = i;
2908 	    break;
2909 	  }
2910       }
2911 
2912   /* In the first scheduling phase, schedule loads and stores together
2913      to increase the chance they will get merged during postreload CSE. */
2914   if (!reload_completed && pipe_ls >= 0)
2915     {
2916       insn = ready[pipe_ls];
2917       ready[pipe_ls] = ready[nready - 1];
2918       ready[nready - 1] = insn;
2919       return 1;
2920     }
2921 
2922   /* If there is an hbrp ready, prefer it over other pipe 1 insns. */
2923   if (pipe_hbrp >= 0)
2924     pipe_1 = pipe_hbrp;
2925 
2926   /* When we have loads/stores in every cycle of the last 15 insns and
2927      we are about to schedule another load/store, emit an hbrp insn
2928      instead. */
2929   if (in_spu_reorg
2930       && spu_sched_length - spu_ls_first >= 4 * 15
2931       && !(pipe0_clock < clock && pipe_0 >= 0) && pipe_1 == pipe_ls)
2932     {
2933       insn = sched_emit_insn (gen_iprefetch (GEN_INT (3)));
2934       recog_memoized (insn);
2935       if (pipe0_clock < clock)
2936 	PUT_MODE (insn, TImode);
2937       spu_sched_variable_issue (file, verbose, insn, -1);
2938       return 0;
2939     }
2940 
2941   /* In general, we want to emit nops to increase dual issue, but dual
2942      issue isn't faster when one of the insns could be scheduled later
2943      without effecting the critical path.  We look at INSN_PRIORITY to
2944      make a good guess, but it isn't perfect so -mdual-nops=n can be
2945      used to effect it. */
2946   if (in_spu_reorg && spu_dual_nops < 10)
2947     {
2948       /* When we are at an even address and we are not issuing nops to
2949          improve scheduling then we need to advance the cycle.  */
2950       if ((spu_sched_length & 7) == 0 && prev_clock_var == clock
2951 	  && (spu_dual_nops == 0
2952 	      || (pipe_1 != -1
2953 		  && prev_priority >
2954 		  INSN_PRIORITY (ready[pipe_1]) + spu_dual_nops)))
2955 	return 0;
2956 
2957       /* When at an odd address, schedule the highest priority insn
2958          without considering pipeline. */
2959       if ((spu_sched_length & 7) == 4 && prev_clock_var != clock
2960 	  && (spu_dual_nops == 0
2961 	      || (prev_priority >
2962 		  INSN_PRIORITY (ready[nready - 1]) + spu_dual_nops)))
2963 	return 1;
2964     }
2965 
2966 
2967   /* We haven't issued a pipe0 insn yet this cycle, if there is a
2968      pipe0 insn in the ready list, schedule it. */
2969   if (pipe0_clock < clock && pipe_0 >= 0)
2970     schedule_i = pipe_0;
2971 
2972   /* Either we've scheduled a pipe0 insn already or there is no pipe0
2973      insn to schedule.  Put a pipe1 insn at the front of the ready list. */
2974   else
2975     schedule_i = pipe_1;
2976 
2977   if (schedule_i > -1)
2978     {
2979       insn = ready[schedule_i];
2980       ready[schedule_i] = ready[nready - 1];
2981       ready[nready - 1] = insn;
2982       return 1;
2983     }
2984   return 0;
2985 }
2986 
2987 /* INSN is dependent on DEP_INSN. */
2988 static int
spu_sched_adjust_cost(rtx_insn * insn,int dep_type,rtx_insn * dep_insn,int cost,unsigned int)2989 spu_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn,
2990 		       int cost, unsigned int)
2991 {
2992   rtx set;
2993 
2994   /* The blockage pattern is used to prevent instructions from being
2995      moved across it and has no cost. */
2996   if (INSN_CODE (insn) == CODE_FOR_blockage
2997       || INSN_CODE (dep_insn) == CODE_FOR_blockage)
2998     return 0;
2999 
3000   if ((INSN_P (insn) && get_attr_length (insn) == 0)
3001       || (INSN_P (dep_insn) && get_attr_length (dep_insn) == 0))
3002     return 0;
3003 
3004   /* Make sure hbrps are spread out. */
3005   if (INSN_CODE (insn) == CODE_FOR_iprefetch
3006       && INSN_CODE (dep_insn) == CODE_FOR_iprefetch)
3007     return 8;
3008 
3009   /* Make sure hints and hbrps are 2 cycles apart. */
3010   if ((INSN_CODE (insn) == CODE_FOR_iprefetch
3011        || INSN_CODE (insn) == CODE_FOR_hbr)
3012        && (INSN_CODE (dep_insn) == CODE_FOR_iprefetch
3013 	   || INSN_CODE (dep_insn) == CODE_FOR_hbr))
3014     return 2;
3015 
3016   /* An hbrp has no real dependency on other insns. */
3017   if (INSN_CODE (insn) == CODE_FOR_iprefetch
3018       || INSN_CODE (dep_insn) == CODE_FOR_iprefetch)
3019     return 0;
3020 
3021   /* Assuming that it is unlikely an argument register will be used in
3022      the first cycle of the called function, we reduce the cost for
3023      slightly better scheduling of dep_insn.  When not hinted, the
3024      mispredicted branch would hide the cost as well.  */
3025   if (CALL_P (insn))
3026   {
3027     rtx target = get_branch_target (insn);
3028     if (GET_CODE (target) != REG || !set_of (target, insn))
3029       return cost - 2;
3030     return cost;
3031   }
3032 
3033   /* And when returning from a function, let's assume the return values
3034      are completed sooner too. */
3035   if (CALL_P (dep_insn))
3036     return cost - 2;
3037 
3038   /* Make sure an instruction that loads from the back chain is schedule
3039      away from the return instruction so a hint is more likely to get
3040      issued. */
3041   if (INSN_CODE (insn) == CODE_FOR__return
3042       && (set = single_set (dep_insn))
3043       && GET_CODE (SET_DEST (set)) == REG
3044       && REGNO (SET_DEST (set)) == LINK_REGISTER_REGNUM)
3045     return 20;
3046 
3047   /* The dfa scheduler sets cost to 0 for all anti-dependencies and the
3048      scheduler makes every insn in a block anti-dependent on the final
3049      jump_insn.  We adjust here so higher cost insns will get scheduled
3050      earlier. */
3051   if (JUMP_P (insn) && dep_type == REG_DEP_ANTI)
3052     return insn_sched_cost (dep_insn) - 3;
3053 
3054   return cost;
3055 }
3056 
3057 /* Create a CONST_DOUBLE from a string.  */
3058 rtx
spu_float_const(const char * string,machine_mode mode)3059 spu_float_const (const char *string, machine_mode mode)
3060 {
3061   REAL_VALUE_TYPE value;
3062   value = REAL_VALUE_ATOF (string, mode);
3063   return const_double_from_real_value (value, mode);
3064 }
3065 
3066 int
spu_constant_address_p(rtx x)3067 spu_constant_address_p (rtx x)
3068 {
3069   return (GET_CODE (x) == LABEL_REF || GET_CODE (x) == SYMBOL_REF
3070 	  || GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST
3071 	  || GET_CODE (x) == HIGH);
3072 }
3073 
3074 static enum spu_immediate
which_immediate_load(HOST_WIDE_INT val)3075 which_immediate_load (HOST_WIDE_INT val)
3076 {
3077   gcc_assert (val == trunc_int_for_mode (val, SImode));
3078 
3079   if (val >= -0x8000 && val <= 0x7fff)
3080     return SPU_IL;
3081   if (val >= 0 && val <= 0x3ffff)
3082     return SPU_ILA;
3083   if ((val & 0xffff) == ((val >> 16) & 0xffff))
3084     return SPU_ILH;
3085   if ((val & 0xffff) == 0)
3086     return SPU_ILHU;
3087 
3088   return SPU_NONE;
3089 }
3090 
3091 /* Return true when OP can be loaded by one of the il instructions, or
3092    when flow2 is not completed and OP can be loaded using ilhu and iohl. */
3093 int
immediate_load_p(rtx op,machine_mode mode)3094 immediate_load_p (rtx op, machine_mode mode)
3095 {
3096   if (CONSTANT_P (op))
3097     {
3098       enum immediate_class c = classify_immediate (op, mode);
3099       return c == IC_IL1 || c == IC_IL1s
3100 	     || (!epilogue_completed && (c == IC_IL2 || c == IC_IL2s));
3101     }
3102   return 0;
3103 }
3104 
3105 /* Return true if the first SIZE bytes of arr is a constant that can be
3106    generated with cbd, chd, cwd or cdd.  When non-NULL, PRUN and PSTART
3107    represent the size and offset of the instruction to use. */
3108 static int
cpat_info(unsigned char * arr,int size,int * prun,int * pstart)3109 cpat_info(unsigned char *arr, int size, int *prun, int *pstart)
3110 {
3111   int cpat, run, i, start;
3112   cpat = 1;
3113   run = 0;
3114   start = -1;
3115   for (i = 0; i < size && cpat; i++)
3116     if (arr[i] != i+16)
3117       {
3118 	if (!run)
3119 	  {
3120 	    start = i;
3121 	    if (arr[i] == 3)
3122 	      run = 1;
3123 	    else if (arr[i] == 2 && arr[i+1] == 3)
3124 	      run = 2;
3125 	    else if (arr[i] == 0)
3126 	      {
3127 		while (arr[i+run] == run && i+run < 16)
3128 		  run++;
3129 		if (run != 4 && run != 8)
3130 		  cpat = 0;
3131 	      }
3132 	    else
3133 	      cpat = 0;
3134 	    if ((i & (run-1)) != 0)
3135 	      cpat = 0;
3136 	    i += run;
3137 	  }
3138 	else
3139 	  cpat = 0;
3140       }
3141   if (cpat && (run || size < 16))
3142     {
3143       if (run == 0)
3144 	run = 1;
3145       if (prun)
3146 	*prun = run;
3147       if (pstart)
3148 	*pstart = start == -1 ? 16-run : start;
3149       return 1;
3150     }
3151   return 0;
3152 }
3153 
3154 /* OP is a CONSTANT_P.  Determine what instructions can be used to load
3155    it into a register.  MODE is only valid when OP is a CONST_INT. */
3156 static enum immediate_class
classify_immediate(rtx op,machine_mode mode)3157 classify_immediate (rtx op, machine_mode mode)
3158 {
3159   HOST_WIDE_INT val;
3160   unsigned char arr[16];
3161   int i, j, repeated, fsmbi, repeat;
3162 
3163   gcc_assert (CONSTANT_P (op));
3164 
3165   if (GET_MODE (op) != VOIDmode)
3166     mode = GET_MODE (op);
3167 
3168   /* A V4SI const_vector with all identical symbols is ok. */
3169   if (!flag_pic
3170       && mode == V4SImode
3171       && GET_CODE (op) == CONST_VECTOR
3172       && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_INT
3173       && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_DOUBLE)
3174     op = unwrap_const_vec_duplicate (op);
3175 
3176   switch (GET_CODE (op))
3177     {
3178     case SYMBOL_REF:
3179     case LABEL_REF:
3180       return TARGET_LARGE_MEM ? IC_IL2s : IC_IL1s;
3181 
3182     case CONST:
3183       /* We can never know if the resulting address fits in 18 bits and can be
3184 	 loaded with ila.  For now, assume the address will not overflow if
3185 	 the displacement is "small" (fits 'K' constraint).  */
3186       if (!TARGET_LARGE_MEM && GET_CODE (XEXP (op, 0)) == PLUS)
3187 	{
3188 	  rtx sym = XEXP (XEXP (op, 0), 0);
3189 	  rtx cst = XEXP (XEXP (op, 0), 1);
3190 
3191 	  if (GET_CODE (sym) == SYMBOL_REF
3192 	      && GET_CODE (cst) == CONST_INT
3193 	      && satisfies_constraint_K (cst))
3194 	    return IC_IL1s;
3195 	}
3196       return IC_IL2s;
3197 
3198     case HIGH:
3199       return IC_IL1s;
3200 
3201     case CONST_VECTOR:
3202       for (i = 0; i < GET_MODE_NUNITS (mode); i++)
3203 	if (GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_INT
3204 	    && GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_DOUBLE)
3205 	  return IC_POOL;
3206       /* Fall through. */
3207 
3208     case CONST_INT:
3209     case CONST_DOUBLE:
3210       constant_to_array (mode, op, arr);
3211 
3212       /* Check that each 4-byte slot is identical. */
3213       repeated = 1;
3214       for (i = 4; i < 16; i += 4)
3215 	for (j = 0; j < 4; j++)
3216 	  if (arr[j] != arr[i + j])
3217 	    repeated = 0;
3218 
3219       if (repeated)
3220 	{
3221 	  val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3222 	  val = trunc_int_for_mode (val, SImode);
3223 
3224 	  if (which_immediate_load (val) != SPU_NONE)
3225 	    return IC_IL1;
3226 	}
3227 
3228       /* Any mode of 2 bytes or smaller can be loaded with an il
3229          instruction. */
3230       gcc_assert (GET_MODE_SIZE (mode) > 2);
3231 
3232       fsmbi = 1;
3233       repeat = 0;
3234       for (i = 0; i < 16 && fsmbi; i++)
3235 	if (arr[i] != 0 && repeat == 0)
3236 	  repeat = arr[i];
3237 	else if (arr[i] != 0 && arr[i] != repeat)
3238 	  fsmbi = 0;
3239       if (fsmbi)
3240 	return repeat == 0xff ? IC_FSMBI : IC_FSMBI2;
3241 
3242       if (cpat_info (arr, GET_MODE_SIZE (mode), 0, 0))
3243 	return IC_CPAT;
3244 
3245       if (repeated)
3246 	return IC_IL2;
3247 
3248       return IC_POOL;
3249     default:
3250       break;
3251     }
3252   gcc_unreachable ();
3253 }
3254 
3255 static enum spu_immediate
which_logical_immediate(HOST_WIDE_INT val)3256 which_logical_immediate (HOST_WIDE_INT val)
3257 {
3258   gcc_assert (val == trunc_int_for_mode (val, SImode));
3259 
3260   if (val >= -0x200 && val <= 0x1ff)
3261     return SPU_ORI;
3262   if (val >= 0 && val <= 0xffff)
3263     return SPU_IOHL;
3264   if ((val & 0xffff) == ((val >> 16) & 0xffff))
3265     {
3266       val = trunc_int_for_mode (val, HImode);
3267       if (val >= -0x200 && val <= 0x1ff)
3268 	return SPU_ORHI;
3269       if ((val & 0xff) == ((val >> 8) & 0xff))
3270 	{
3271 	  val = trunc_int_for_mode (val, QImode);
3272 	  if (val >= -0x200 && val <= 0x1ff)
3273 	    return SPU_ORBI;
3274 	}
3275     }
3276   return SPU_NONE;
3277 }
3278 
3279 /* Return TRUE when X, a CONST_VECTOR, only contains CONST_INTs or
3280    CONST_DOUBLEs. */
3281 static int
const_vector_immediate_p(rtx x)3282 const_vector_immediate_p (rtx x)
3283 {
3284   int i;
3285   gcc_assert (GET_CODE (x) == CONST_VECTOR);
3286   for (i = 0; i < GET_MODE_NUNITS (GET_MODE (x)); i++)
3287     if (GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_INT
3288 	&& GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_DOUBLE)
3289       return 0;
3290   return 1;
3291 }
3292 
3293 int
logical_immediate_p(rtx op,machine_mode mode)3294 logical_immediate_p (rtx op, machine_mode mode)
3295 {
3296   HOST_WIDE_INT val;
3297   unsigned char arr[16];
3298   int i, j;
3299 
3300   gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3301 	      || GET_CODE (op) == CONST_VECTOR);
3302 
3303   if (GET_CODE (op) == CONST_VECTOR
3304       && !const_vector_immediate_p (op))
3305     return 0;
3306 
3307   if (GET_MODE (op) != VOIDmode)
3308     mode = GET_MODE (op);
3309 
3310   constant_to_array (mode, op, arr);
3311 
3312   /* Check that bytes are repeated. */
3313   for (i = 4; i < 16; i += 4)
3314     for (j = 0; j < 4; j++)
3315       if (arr[j] != arr[i + j])
3316 	return 0;
3317 
3318   val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3319   val = trunc_int_for_mode (val, SImode);
3320 
3321   i = which_logical_immediate (val);
3322   return i != SPU_NONE && i != SPU_IOHL;
3323 }
3324 
3325 int
iohl_immediate_p(rtx op,machine_mode mode)3326 iohl_immediate_p (rtx op, machine_mode mode)
3327 {
3328   HOST_WIDE_INT val;
3329   unsigned char arr[16];
3330   int i, j;
3331 
3332   gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3333 	      || GET_CODE (op) == CONST_VECTOR);
3334 
3335   if (GET_CODE (op) == CONST_VECTOR
3336       && !const_vector_immediate_p (op))
3337     return 0;
3338 
3339   if (GET_MODE (op) != VOIDmode)
3340     mode = GET_MODE (op);
3341 
3342   constant_to_array (mode, op, arr);
3343 
3344   /* Check that bytes are repeated. */
3345   for (i = 4; i < 16; i += 4)
3346     for (j = 0; j < 4; j++)
3347       if (arr[j] != arr[i + j])
3348 	return 0;
3349 
3350   val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3351   val = trunc_int_for_mode (val, SImode);
3352 
3353   return val >= 0 && val <= 0xffff;
3354 }
3355 
3356 int
arith_immediate_p(rtx op,machine_mode mode,HOST_WIDE_INT low,HOST_WIDE_INT high)3357 arith_immediate_p (rtx op, machine_mode mode,
3358 		   HOST_WIDE_INT low, HOST_WIDE_INT high)
3359 {
3360   HOST_WIDE_INT val;
3361   unsigned char arr[16];
3362   int bytes, i, j;
3363 
3364   gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3365 	      || GET_CODE (op) == CONST_VECTOR);
3366 
3367   if (GET_CODE (op) == CONST_VECTOR
3368       && !const_vector_immediate_p (op))
3369     return 0;
3370 
3371   if (GET_MODE (op) != VOIDmode)
3372     mode = GET_MODE (op);
3373 
3374   constant_to_array (mode, op, arr);
3375 
3376   bytes = GET_MODE_UNIT_SIZE (mode);
3377   mode = int_mode_for_mode (GET_MODE_INNER (mode)).require ();
3378 
3379   /* Check that bytes are repeated. */
3380   for (i = bytes; i < 16; i += bytes)
3381     for (j = 0; j < bytes; j++)
3382       if (arr[j] != arr[i + j])
3383 	return 0;
3384 
3385   val = arr[0];
3386   for (j = 1; j < bytes; j++)
3387     val = (val << 8) | arr[j];
3388 
3389   val = trunc_int_for_mode (val, mode);
3390 
3391   return val >= low && val <= high;
3392 }
3393 
3394 /* TRUE when op is an immediate and an exact power of 2, and given that
3395    OP is 2^scale, scale >= LOW && scale <= HIGH.  When OP is a vector,
3396    all entries must be the same. */
3397 bool
exp2_immediate_p(rtx op,machine_mode mode,int low,int high)3398 exp2_immediate_p (rtx op, machine_mode mode, int low, int high)
3399 {
3400   machine_mode int_mode;
3401   HOST_WIDE_INT val;
3402   unsigned char arr[16];
3403   int bytes, i, j;
3404 
3405   gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3406 	      || GET_CODE (op) == CONST_VECTOR);
3407 
3408   if (GET_CODE (op) == CONST_VECTOR
3409       && !const_vector_immediate_p (op))
3410     return 0;
3411 
3412   if (GET_MODE (op) != VOIDmode)
3413     mode = GET_MODE (op);
3414 
3415   constant_to_array (mode, op, arr);
3416 
3417   mode = GET_MODE_INNER (mode);
3418 
3419   bytes = GET_MODE_SIZE (mode);
3420   int_mode = int_mode_for_mode (mode).require ();
3421 
3422   /* Check that bytes are repeated. */
3423   for (i = bytes; i < 16; i += bytes)
3424     for (j = 0; j < bytes; j++)
3425       if (arr[j] != arr[i + j])
3426 	return 0;
3427 
3428   val = arr[0];
3429   for (j = 1; j < bytes; j++)
3430     val = (val << 8) | arr[j];
3431 
3432   val = trunc_int_for_mode (val, int_mode);
3433 
3434   /* Currently, we only handle SFmode */
3435   gcc_assert (mode == SFmode);
3436   if (mode == SFmode)
3437     {
3438       int exp = (val >> 23) - 127;
3439       return val > 0 && (val & 0x007fffff) == 0
3440 	     &&  exp >= low && exp <= high;
3441     }
3442   return FALSE;
3443 }
3444 
3445 /* Return true if X is a SYMBOL_REF to an __ea qualified variable.  */
3446 
3447 static bool
ea_symbol_ref_p(const_rtx x)3448 ea_symbol_ref_p (const_rtx x)
3449 {
3450   tree decl;
3451 
3452   if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
3453     {
3454       rtx plus = XEXP (x, 0);
3455       rtx op0 = XEXP (plus, 0);
3456       rtx op1 = XEXP (plus, 1);
3457       if (GET_CODE (op1) == CONST_INT)
3458 	x = op0;
3459     }
3460 
3461   return (GET_CODE (x) == SYMBOL_REF
3462  	  && (decl = SYMBOL_REF_DECL (x)) != 0
3463  	  && TREE_CODE (decl) == VAR_DECL
3464  	  && TYPE_ADDR_SPACE (TREE_TYPE (decl)));
3465 }
3466 
3467 /* We accept:
3468    - any 32-bit constant (SImode, SFmode)
3469    - any constant that can be generated with fsmbi (any mode)
3470    - a 64-bit constant where the high and low bits are identical
3471      (DImode, DFmode)
3472    - a 128-bit constant where the four 32-bit words match.  */
3473 bool
spu_legitimate_constant_p(machine_mode mode,rtx x)3474 spu_legitimate_constant_p (machine_mode mode, rtx x)
3475 {
3476   subrtx_iterator::array_type array;
3477   if (GET_CODE (x) == HIGH)
3478     x = XEXP (x, 0);
3479 
3480   /* Reject any __ea qualified reference.  These can't appear in
3481      instructions but must be forced to the constant pool.  */
3482   FOR_EACH_SUBRTX (iter, array, x, ALL)
3483     if (ea_symbol_ref_p (*iter))
3484       return 0;
3485 
3486   /* V4SI with all identical symbols is valid. */
3487   if (!flag_pic
3488       && mode == V4SImode
3489       && (GET_CODE (CONST_VECTOR_ELT (x, 0)) == SYMBOL_REF
3490 	  || GET_CODE (CONST_VECTOR_ELT (x, 0)) == LABEL_REF
3491 	  || GET_CODE (CONST_VECTOR_ELT (x, 0)) == CONST))
3492     return const_vec_duplicate_p (x);
3493 
3494   if (GET_CODE (x) == CONST_VECTOR
3495       && !const_vector_immediate_p (x))
3496     return 0;
3497   return 1;
3498 }
3499 
3500 /* Valid address are:
3501    - symbol_ref, label_ref, const
3502    - reg
3503    - reg + const_int, where const_int is 16 byte aligned
3504    - reg + reg, alignment doesn't matter
3505   The alignment matters in the reg+const case because lqd and stqd
3506   ignore the 4 least significant bits of the const.  We only care about
3507   16 byte modes because the expand phase will change all smaller MEM
3508   references to TImode.  */
3509 static bool
spu_legitimate_address_p(machine_mode mode,rtx x,bool reg_ok_strict)3510 spu_legitimate_address_p (machine_mode mode,
3511 			  rtx x, bool reg_ok_strict)
3512 {
3513   int aligned = GET_MODE_SIZE (mode) >= 16;
3514   if (aligned
3515       && GET_CODE (x) == AND
3516       && GET_CODE (XEXP (x, 1)) == CONST_INT
3517       && INTVAL (XEXP (x, 1)) == (HOST_WIDE_INT) - 16)
3518     x = XEXP (x, 0);
3519   switch (GET_CODE (x))
3520     {
3521     case LABEL_REF:
3522       return !TARGET_LARGE_MEM;
3523 
3524     case SYMBOL_REF:
3525     case CONST:
3526       /* Keep __ea references until reload so that spu_expand_mov can see them
3527 	 in MEMs.  */
3528       if (ea_symbol_ref_p (x))
3529 	return !reload_in_progress && !reload_completed;
3530       return !TARGET_LARGE_MEM;
3531 
3532     case CONST_INT:
3533       return INTVAL (x) >= 0 && INTVAL (x) <= 0x3ffff;
3534 
3535     case SUBREG:
3536       x = XEXP (x, 0);
3537       if (!REG_P (x))
3538 	return 0;
3539       /* FALLTHRU */
3540 
3541     case REG:
3542       return INT_REG_OK_FOR_BASE_P (x, reg_ok_strict);
3543 
3544     case PLUS:
3545     case LO_SUM:
3546       {
3547 	rtx op0 = XEXP (x, 0);
3548 	rtx op1 = XEXP (x, 1);
3549 	if (GET_CODE (op0) == SUBREG)
3550 	  op0 = XEXP (op0, 0);
3551 	if (GET_CODE (op1) == SUBREG)
3552 	  op1 = XEXP (op1, 0);
3553 	if (GET_CODE (op0) == REG
3554 	    && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
3555 	    && GET_CODE (op1) == CONST_INT
3556 	    && ((INTVAL (op1) >= -0x2000 && INTVAL (op1) <= 0x1fff)
3557 		/* If virtual registers are involved, the displacement will
3558 		   change later on anyway, so checking would be premature.
3559 		   Reload will make sure the final displacement after
3560 		   register elimination is OK.  */
3561 		|| op0 == arg_pointer_rtx
3562 		|| op0 == frame_pointer_rtx
3563 		|| op0 == virtual_stack_vars_rtx)
3564 	    && (!aligned || (INTVAL (op1) & 15) == 0))
3565 	  return TRUE;
3566 	if (GET_CODE (op0) == REG
3567 	    && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
3568 	    && GET_CODE (op1) == REG
3569 	    && INT_REG_OK_FOR_INDEX_P (op1, reg_ok_strict))
3570 	  return TRUE;
3571       }
3572       break;
3573 
3574     default:
3575       break;
3576     }
3577   return FALSE;
3578 }
3579 
3580 /* Like spu_legitimate_address_p, except with named addresses.  */
3581 static bool
spu_addr_space_legitimate_address_p(machine_mode mode,rtx x,bool reg_ok_strict,addr_space_t as)3582 spu_addr_space_legitimate_address_p (machine_mode mode, rtx x,
3583 				     bool reg_ok_strict, addr_space_t as)
3584 {
3585   if (as == ADDR_SPACE_EA)
3586     return (REG_P (x) && (GET_MODE (x) == EAmode));
3587 
3588   else if (as != ADDR_SPACE_GENERIC)
3589     gcc_unreachable ();
3590 
3591   return spu_legitimate_address_p (mode, x, reg_ok_strict);
3592 }
3593 
3594 /* When the address is reg + const_int, force the const_int into a
3595    register.  */
3596 static rtx
spu_legitimize_address(rtx x,rtx oldx ATTRIBUTE_UNUSED,machine_mode mode ATTRIBUTE_UNUSED)3597 spu_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
3598 			machine_mode mode ATTRIBUTE_UNUSED)
3599 {
3600   rtx op0, op1;
3601   /* Make sure both operands are registers.  */
3602   if (GET_CODE (x) == PLUS)
3603     {
3604       op0 = XEXP (x, 0);
3605       op1 = XEXP (x, 1);
3606       if (ALIGNED_SYMBOL_REF_P (op0))
3607 	{
3608 	  op0 = force_reg (Pmode, op0);
3609 	  mark_reg_pointer (op0, 128);
3610 	}
3611       else if (GET_CODE (op0) != REG)
3612 	op0 = force_reg (Pmode, op0);
3613       if (ALIGNED_SYMBOL_REF_P (op1))
3614 	{
3615 	  op1 = force_reg (Pmode, op1);
3616 	  mark_reg_pointer (op1, 128);
3617 	}
3618       else if (GET_CODE (op1) != REG)
3619 	op1 = force_reg (Pmode, op1);
3620       x = gen_rtx_PLUS (Pmode, op0, op1);
3621     }
3622   return x;
3623 }
3624 
3625 /* Like spu_legitimate_address, except with named address support.  */
3626 static rtx
spu_addr_space_legitimize_address(rtx x,rtx oldx,machine_mode mode,addr_space_t as)3627 spu_addr_space_legitimize_address (rtx x, rtx oldx, machine_mode mode,
3628 				   addr_space_t as)
3629 {
3630   if (as != ADDR_SPACE_GENERIC)
3631     return x;
3632 
3633   return spu_legitimize_address (x, oldx, mode);
3634 }
3635 
3636 /* Reload reg + const_int for out-of-range displacements.  */
3637 rtx
spu_legitimize_reload_address(rtx ad,machine_mode mode ATTRIBUTE_UNUSED,int opnum,int type)3638 spu_legitimize_reload_address (rtx ad, machine_mode mode ATTRIBUTE_UNUSED,
3639 			       int opnum, int type)
3640 {
3641   bool removed_and = false;
3642 
3643   if (GET_CODE (ad) == AND
3644       && CONST_INT_P (XEXP (ad, 1))
3645       && INTVAL (XEXP (ad, 1)) == (HOST_WIDE_INT) - 16)
3646     {
3647       ad = XEXP (ad, 0);
3648       removed_and = true;
3649     }
3650 
3651   if (GET_CODE (ad) == PLUS
3652       && REG_P (XEXP (ad, 0))
3653       && CONST_INT_P (XEXP (ad, 1))
3654       && !(INTVAL (XEXP (ad, 1)) >= -0x2000
3655 	   && INTVAL (XEXP (ad, 1)) <= 0x1fff))
3656     {
3657       /* Unshare the sum.  */
3658       ad = copy_rtx (ad);
3659 
3660       /* Reload the displacement.  */
3661       push_reload (XEXP (ad, 1), NULL_RTX, &XEXP (ad, 1), NULL,
3662 		   BASE_REG_CLASS, GET_MODE (ad), VOIDmode, 0, 0,
3663 		   opnum, (enum reload_type) type);
3664 
3665       /* Add back AND for alignment if we stripped it.  */
3666       if (removed_and)
3667 	ad = gen_rtx_AND (GET_MODE (ad), ad, GEN_INT (-16));
3668 
3669       return ad;
3670     }
3671 
3672   return NULL_RTX;
3673 }
3674 
3675 /* Handle an attribute requiring a FUNCTION_DECL; arguments as in
3676    struct attribute_spec.handler.  */
3677 static tree
spu_handle_fndecl_attribute(tree * node,tree name,tree args ATTRIBUTE_UNUSED,int flags ATTRIBUTE_UNUSED,bool * no_add_attrs)3678 spu_handle_fndecl_attribute (tree * node,
3679 			     tree name,
3680 			     tree args ATTRIBUTE_UNUSED,
3681 			     int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
3682 {
3683   if (TREE_CODE (*node) != FUNCTION_DECL)
3684     {
3685       warning (0, "%qE attribute only applies to functions",
3686 	       name);
3687       *no_add_attrs = true;
3688     }
3689 
3690   return NULL_TREE;
3691 }
3692 
3693 /* Handle the "vector" attribute.  */
3694 static tree
spu_handle_vector_attribute(tree * node,tree name,tree args ATTRIBUTE_UNUSED,int flags ATTRIBUTE_UNUSED,bool * no_add_attrs)3695 spu_handle_vector_attribute (tree * node, tree name,
3696 			     tree args ATTRIBUTE_UNUSED,
3697 			     int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
3698 {
3699   tree type = *node, result = NULL_TREE;
3700   machine_mode mode;
3701   int unsigned_p;
3702 
3703   while (POINTER_TYPE_P (type)
3704 	 || TREE_CODE (type) == FUNCTION_TYPE
3705 	 || TREE_CODE (type) == METHOD_TYPE || TREE_CODE (type) == ARRAY_TYPE)
3706     type = TREE_TYPE (type);
3707 
3708   mode = TYPE_MODE (type);
3709 
3710   unsigned_p = TYPE_UNSIGNED (type);
3711   switch (mode)
3712     {
3713     case E_DImode:
3714       result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
3715       break;
3716     case E_SImode:
3717       result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
3718       break;
3719     case E_HImode:
3720       result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
3721       break;
3722     case E_QImode:
3723       result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
3724       break;
3725     case E_SFmode:
3726       result = V4SF_type_node;
3727       break;
3728     case E_DFmode:
3729       result = V2DF_type_node;
3730       break;
3731     default:
3732       break;
3733     }
3734 
3735   /* Propagate qualifiers attached to the element type
3736      onto the vector type.  */
3737   if (result && result != type && TYPE_QUALS (type))
3738     result = build_qualified_type (result, TYPE_QUALS (type));
3739 
3740   *no_add_attrs = true;		/* No need to hang on to the attribute.  */
3741 
3742   if (!result)
3743     warning (0, "%qE attribute ignored", name);
3744   else
3745     *node = lang_hooks.types.reconstruct_complex_type (*node, result);
3746 
3747   return NULL_TREE;
3748 }
3749 
3750 /* Return nonzero if FUNC is a naked function.  */
3751 static int
spu_naked_function_p(tree func)3752 spu_naked_function_p (tree func)
3753 {
3754   tree a;
3755 
3756   if (TREE_CODE (func) != FUNCTION_DECL)
3757     abort ();
3758 
3759   a = lookup_attribute ("naked", DECL_ATTRIBUTES (func));
3760   return a != NULL_TREE;
3761 }
3762 
3763 int
spu_initial_elimination_offset(int from,int to)3764 spu_initial_elimination_offset (int from, int to)
3765 {
3766   int saved_regs_size = spu_saved_regs_size ();
3767   int sp_offset = 0;
3768   if (!crtl->is_leaf || crtl->outgoing_args_size
3769       || get_frame_size () || saved_regs_size)
3770     sp_offset = STACK_POINTER_OFFSET;
3771   if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
3772     return get_frame_size () + crtl->outgoing_args_size + sp_offset;
3773   else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3774     return get_frame_size ();
3775   else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
3776     return sp_offset + crtl->outgoing_args_size
3777       + get_frame_size () + saved_regs_size + STACK_POINTER_OFFSET;
3778   else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3779     return get_frame_size () + saved_regs_size + sp_offset;
3780   else
3781     gcc_unreachable ();
3782 }
3783 
3784 rtx
spu_function_value(const_tree type,const_tree func ATTRIBUTE_UNUSED)3785 spu_function_value (const_tree type, const_tree func ATTRIBUTE_UNUSED)
3786 {
3787   machine_mode mode = TYPE_MODE (type);
3788   int byte_size = ((mode == BLKmode)
3789 		   ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3790 
3791   /* Make sure small structs are left justified in a register. */
3792   if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
3793       && byte_size <= UNITS_PER_WORD * MAX_REGISTER_RETURN && byte_size > 0)
3794     {
3795       machine_mode smode;
3796       rtvec v;
3797       int i;
3798       int nregs = (byte_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3799       int n = byte_size / UNITS_PER_WORD;
3800       v = rtvec_alloc (nregs);
3801       for (i = 0; i < n; i++)
3802 	{
3803 	  RTVEC_ELT (v, i) = gen_rtx_EXPR_LIST (VOIDmode,
3804 						gen_rtx_REG (TImode,
3805 							     FIRST_RETURN_REGNUM
3806 							     + i),
3807 						GEN_INT (UNITS_PER_WORD * i));
3808 	  byte_size -= UNITS_PER_WORD;
3809 	}
3810 
3811       if (n < nregs)
3812 	{
3813 	  if (byte_size < 4)
3814 	    byte_size = 4;
3815 	  smode = smallest_int_mode_for_size (byte_size * BITS_PER_UNIT);
3816 	  RTVEC_ELT (v, n) =
3817 	    gen_rtx_EXPR_LIST (VOIDmode,
3818 			       gen_rtx_REG (smode, FIRST_RETURN_REGNUM + n),
3819 			       GEN_INT (UNITS_PER_WORD * n));
3820 	}
3821       return gen_rtx_PARALLEL (mode, v);
3822     }
3823   return gen_rtx_REG (mode, FIRST_RETURN_REGNUM);
3824 }
3825 
3826 static rtx
spu_function_arg(cumulative_args_t cum_v,machine_mode mode,const_tree type,bool named ATTRIBUTE_UNUSED)3827 spu_function_arg (cumulative_args_t cum_v,
3828 		  machine_mode mode,
3829 		  const_tree type, bool named ATTRIBUTE_UNUSED)
3830 {
3831   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
3832   int byte_size;
3833 
3834   if (*cum >= MAX_REGISTER_ARGS)
3835     return 0;
3836 
3837   byte_size = ((mode == BLKmode)
3838 	       ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3839 
3840   /* The ABI does not allow parameters to be passed partially in
3841      reg and partially in stack. */
3842   if ((*cum + (byte_size + 15) / 16) > MAX_REGISTER_ARGS)
3843     return 0;
3844 
3845   /* Make sure small structs are left justified in a register. */
3846   if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
3847       && byte_size < UNITS_PER_WORD && byte_size > 0)
3848     {
3849       machine_mode smode;
3850       rtx gr_reg;
3851       if (byte_size < 4)
3852 	byte_size = 4;
3853       smode = smallest_int_mode_for_size (byte_size * BITS_PER_UNIT);
3854       gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
3855 				  gen_rtx_REG (smode, FIRST_ARG_REGNUM + *cum),
3856 				  const0_rtx);
3857       return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg));
3858     }
3859   else
3860     return gen_rtx_REG (mode, FIRST_ARG_REGNUM + *cum);
3861 }
3862 
3863 static void
spu_function_arg_advance(cumulative_args_t cum_v,machine_mode mode,const_tree type,bool named ATTRIBUTE_UNUSED)3864 spu_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
3865 			  const_tree type, bool named ATTRIBUTE_UNUSED)
3866 {
3867   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
3868 
3869   *cum += (type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
3870 	   ? 1
3871 	   : mode == BLKmode
3872 	   ? ((int_size_in_bytes (type) + 15) / 16)
3873 	   : mode == VOIDmode
3874 	   ? 1
3875 	   : spu_hard_regno_nregs (FIRST_ARG_REGNUM, mode));
3876 }
3877 
3878 /* Implement TARGET_FUNCTION_ARG_OFFSET.  The SPU ABI wants 32/64-bit
3879    types at offset 0 in the quad-word on the stack.  8/16-bit types
3880    should be at offsets 3/2 respectively.  */
3881 
3882 static HOST_WIDE_INT
spu_function_arg_offset(machine_mode mode,const_tree type)3883 spu_function_arg_offset (machine_mode mode, const_tree type)
3884 {
3885   if (type && INTEGRAL_TYPE_P (type) && GET_MODE_SIZE (mode) < 4)
3886     return 4 - GET_MODE_SIZE (mode);
3887   return 0;
3888 }
3889 
3890 /* Implement TARGET_FUNCTION_ARG_PADDING.  */
3891 
3892 static pad_direction
spu_function_arg_padding(machine_mode,const_tree)3893 spu_function_arg_padding (machine_mode, const_tree)
3894 {
3895   return PAD_UPWARD;
3896 }
3897 
3898 /* Variable sized types are passed by reference.  */
3899 static bool
spu_pass_by_reference(cumulative_args_t cum ATTRIBUTE_UNUSED,machine_mode mode ATTRIBUTE_UNUSED,const_tree type,bool named ATTRIBUTE_UNUSED)3900 spu_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
3901 		       machine_mode mode ATTRIBUTE_UNUSED,
3902 		       const_tree type, bool named ATTRIBUTE_UNUSED)
3903 {
3904   return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
3905 }
3906 
3907 
3908 /* Var args. */
3909 
3910 /* Create and return the va_list datatype.
3911 
3912    On SPU, va_list is an array type equivalent to
3913 
3914       typedef struct __va_list_tag
3915         {
3916             void *__args __attribute__((__aligned(16)));
3917             void *__skip __attribute__((__aligned(16)));
3918 
3919         } va_list[1];
3920 
3921    where __args points to the arg that will be returned by the next
3922    va_arg(), and __skip points to the previous stack frame such that
3923    when __args == __skip we should advance __args by 32 bytes. */
3924 static tree
spu_build_builtin_va_list(void)3925 spu_build_builtin_va_list (void)
3926 {
3927   tree f_args, f_skip, record, type_decl;
3928   bool owp;
3929 
3930   record = (*lang_hooks.types.make_type) (RECORD_TYPE);
3931 
3932   type_decl =
3933     build_decl (BUILTINS_LOCATION,
3934 		TYPE_DECL, get_identifier ("__va_list_tag"), record);
3935 
3936   f_args = build_decl (BUILTINS_LOCATION,
3937 		       FIELD_DECL, get_identifier ("__args"), ptr_type_node);
3938   f_skip = build_decl (BUILTINS_LOCATION,
3939 		       FIELD_DECL, get_identifier ("__skip"), ptr_type_node);
3940 
3941   DECL_FIELD_CONTEXT (f_args) = record;
3942   SET_DECL_ALIGN (f_args, 128);
3943   DECL_USER_ALIGN (f_args) = 1;
3944 
3945   DECL_FIELD_CONTEXT (f_skip) = record;
3946   SET_DECL_ALIGN (f_skip, 128);
3947   DECL_USER_ALIGN (f_skip) = 1;
3948 
3949   TYPE_STUB_DECL (record) = type_decl;
3950   TYPE_NAME (record) = type_decl;
3951   TYPE_FIELDS (record) = f_args;
3952   DECL_CHAIN (f_args) = f_skip;
3953 
3954   /* We know this is being padded and we want it too.  It is an internal
3955      type so hide the warnings from the user. */
3956   owp = warn_padded;
3957   warn_padded = false;
3958 
3959   layout_type (record);
3960 
3961   warn_padded = owp;
3962 
3963   /* The correct type is an array type of one element.  */
3964   return build_array_type (record, build_index_type (size_zero_node));
3965 }
3966 
3967 /* Implement va_start by filling the va_list structure VALIST.
3968    NEXTARG points to the first anonymous stack argument.
3969 
3970    The following global variables are used to initialize
3971    the va_list structure:
3972 
3973      crtl->args.info;
3974        the CUMULATIVE_ARGS for this function
3975 
3976      crtl->args.arg_offset_rtx:
3977        holds the offset of the first anonymous stack argument
3978        (relative to the virtual arg pointer).  */
3979 
3980 static void
spu_va_start(tree valist,rtx nextarg)3981 spu_va_start (tree valist, rtx nextarg)
3982 {
3983   tree f_args, f_skip;
3984   tree args, skip, t;
3985 
3986   f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3987   f_skip = DECL_CHAIN (f_args);
3988 
3989   valist = build_simple_mem_ref (valist);
3990   args =
3991     build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
3992   skip =
3993     build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
3994 
3995   /* Find the __args area.  */
3996   t = make_tree (TREE_TYPE (args), nextarg);
3997   if (crtl->args.pretend_args_size > 0)
3998     t = fold_build_pointer_plus_hwi (t, -STACK_POINTER_OFFSET);
3999   t = build2 (MODIFY_EXPR, TREE_TYPE (args), args, t);
4000   TREE_SIDE_EFFECTS (t) = 1;
4001   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4002 
4003   /* Find the __skip area.  */
4004   t = make_tree (TREE_TYPE (skip), virtual_incoming_args_rtx);
4005   t = fold_build_pointer_plus_hwi (t, (crtl->args.pretend_args_size
4006 				       - STACK_POINTER_OFFSET));
4007   t = build2 (MODIFY_EXPR, TREE_TYPE (skip), skip, t);
4008   TREE_SIDE_EFFECTS (t) = 1;
4009   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4010 }
4011 
4012 /* Gimplify va_arg by updating the va_list structure
4013    VALIST as required to retrieve an argument of type
4014    TYPE, and returning that argument.
4015 
4016    ret = va_arg(VALIST, TYPE);
4017 
4018    generates code equivalent to:
4019 
4020     paddedsize = (sizeof(TYPE) + 15) & -16;
4021     if (VALIST.__args + paddedsize > VALIST.__skip
4022 	&& VALIST.__args <= VALIST.__skip)
4023       addr = VALIST.__skip + 32;
4024     else
4025       addr = VALIST.__args;
4026     VALIST.__args = addr + paddedsize;
4027     ret = *(TYPE *)addr;
4028  */
4029 static tree
spu_gimplify_va_arg_expr(tree valist,tree type,gimple_seq * pre_p,gimple_seq * post_p ATTRIBUTE_UNUSED)4030 spu_gimplify_va_arg_expr (tree valist, tree type, gimple_seq * pre_p,
4031 			  gimple_seq * post_p ATTRIBUTE_UNUSED)
4032 {
4033   tree f_args, f_skip;
4034   tree args, skip;
4035   HOST_WIDE_INT size, rsize;
4036   tree addr, tmp;
4037   bool pass_by_reference_p;
4038 
4039   f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4040   f_skip = DECL_CHAIN (f_args);
4041 
4042   args =
4043     build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
4044   skip =
4045     build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
4046 
4047   addr = create_tmp_var (ptr_type_node, "va_arg");
4048 
4049   /* if an object is dynamically sized, a pointer to it is passed
4050      instead of the object itself. */
4051   pass_by_reference_p = pass_by_reference (NULL, TYPE_MODE (type), type,
4052 					   false);
4053   if (pass_by_reference_p)
4054     type = build_pointer_type (type);
4055   size = int_size_in_bytes (type);
4056   rsize = ((size + UNITS_PER_WORD - 1) / UNITS_PER_WORD) * UNITS_PER_WORD;
4057 
4058   /* build conditional expression to calculate addr. The expression
4059      will be gimplified later. */
4060   tmp = fold_build_pointer_plus_hwi (unshare_expr (args), rsize);
4061   tmp = build2 (TRUTH_AND_EXPR, boolean_type_node,
4062 		build2 (GT_EXPR, boolean_type_node, tmp, unshare_expr (skip)),
4063 		build2 (LE_EXPR, boolean_type_node, unshare_expr (args),
4064 		unshare_expr (skip)));
4065 
4066   tmp = build3 (COND_EXPR, ptr_type_node, tmp,
4067 		fold_build_pointer_plus_hwi (unshare_expr (skip), 32),
4068 		unshare_expr (args));
4069 
4070   gimplify_assign (addr, tmp, pre_p);
4071 
4072   /* update VALIST.__args */
4073   tmp = fold_build_pointer_plus_hwi (addr, rsize);
4074   gimplify_assign (unshare_expr (args), tmp, pre_p);
4075 
4076   addr = fold_convert (build_pointer_type_for_mode (type, ptr_mode, true),
4077 		       addr);
4078 
4079   if (pass_by_reference_p)
4080     addr = build_va_arg_indirect_ref (addr);
4081 
4082   return build_va_arg_indirect_ref (addr);
4083 }
4084 
4085 /* Save parameter registers starting with the register that corresponds
4086    to the first unnamed parameters.  If the first unnamed parameter is
4087    in the stack then save no registers.  Set pretend_args_size to the
4088    amount of space needed to save the registers. */
4089 static void
spu_setup_incoming_varargs(cumulative_args_t cum,machine_mode mode,tree type,int * pretend_size,int no_rtl)4090 spu_setup_incoming_varargs (cumulative_args_t cum, machine_mode mode,
4091 			    tree type, int *pretend_size, int no_rtl)
4092 {
4093   if (!no_rtl)
4094     {
4095       rtx tmp;
4096       int regno;
4097       int offset;
4098       int ncum = *get_cumulative_args (cum);
4099 
4100       /* cum currently points to the last named argument, we want to
4101          start at the next argument. */
4102       spu_function_arg_advance (pack_cumulative_args (&ncum), mode, type, true);
4103 
4104       offset = -STACK_POINTER_OFFSET;
4105       for (regno = ncum; regno < MAX_REGISTER_ARGS; regno++)
4106 	{
4107 	  tmp = gen_frame_mem (V4SImode,
4108 			       plus_constant (Pmode, virtual_incoming_args_rtx,
4109 					      offset));
4110 	  emit_move_insn (tmp,
4111 			  gen_rtx_REG (V4SImode, FIRST_ARG_REGNUM + regno));
4112 	  offset += 16;
4113 	}
4114       *pretend_size = offset + STACK_POINTER_OFFSET;
4115     }
4116 }
4117 
4118 static void
spu_conditional_register_usage(void)4119 spu_conditional_register_usage (void)
4120 {
4121   if (flag_pic)
4122     {
4123       fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
4124       call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
4125     }
4126 }
4127 
4128 /* This is called any time we inspect the alignment of a register for
4129    addresses.  */
4130 static int
reg_aligned_for_addr(rtx x)4131 reg_aligned_for_addr (rtx x)
4132 {
4133   int regno =
4134     REGNO (x) < FIRST_PSEUDO_REGISTER ? ORIGINAL_REGNO (x) : REGNO (x);
4135   return REGNO_POINTER_ALIGN (regno) >= 128;
4136 }
4137 
4138 /* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF
4139    into its SYMBOL_REF_FLAGS.  */
4140 static void
spu_encode_section_info(tree decl,rtx rtl,int first)4141 spu_encode_section_info (tree decl, rtx rtl, int first)
4142 {
4143   default_encode_section_info (decl, rtl, first);
4144 
4145   /* If a variable has a forced alignment to < 16 bytes, mark it with
4146      SYMBOL_FLAG_ALIGN1.  */
4147   if (TREE_CODE (decl) == VAR_DECL
4148       && DECL_USER_ALIGN (decl) && DECL_ALIGN (decl) < 128)
4149     SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_ALIGN1;
4150 }
4151 
4152 /* Return TRUE if we are certain the mem refers to a complete object
4153    which is both 16-byte aligned and padded to a 16-byte boundary.  This
4154    would make it safe to store with a single instruction.
4155    We guarantee the alignment and padding for static objects by aligning
4156    all of them to 16-bytes. (DATA_ALIGNMENT and TARGET_CONSTANT_ALIGNMENT.)
4157    FIXME: We currently cannot guarantee this for objects on the stack
4158    because assign_parm_setup_stack calls assign_stack_local with the
4159    alignment of the parameter mode and in that case the alignment never
4160    gets adjusted by LOCAL_ALIGNMENT. */
4161 static int
store_with_one_insn_p(rtx mem)4162 store_with_one_insn_p (rtx mem)
4163 {
4164   machine_mode mode = GET_MODE (mem);
4165   rtx addr = XEXP (mem, 0);
4166   if (mode == BLKmode)
4167     return 0;
4168   if (GET_MODE_SIZE (mode) >= 16)
4169     return 1;
4170   /* Only static objects. */
4171   if (GET_CODE (addr) == SYMBOL_REF)
4172     {
4173       /* We use the associated declaration to make sure the access is
4174          referring to the whole object.
4175          We check both MEM_EXPR and SYMBOL_REF_DECL.  I'm not sure
4176          if it is necessary.  Will there be cases where one exists, and
4177          the other does not?  Will there be cases where both exist, but
4178          have different types?  */
4179       tree decl = MEM_EXPR (mem);
4180       if (decl
4181 	  && TREE_CODE (decl) == VAR_DECL
4182 	  && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
4183 	return 1;
4184       decl = SYMBOL_REF_DECL (addr);
4185       if (decl
4186 	  && TREE_CODE (decl) == VAR_DECL
4187 	  && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
4188 	return 1;
4189     }
4190   return 0;
4191 }
4192 
4193 /* Return 1 when the address is not valid for a simple load and store as
4194    required by the '_mov*' patterns.   We could make this less strict
4195    for loads, but we prefer mem's to look the same so they are more
4196    likely to be merged.  */
4197 static int
address_needs_split(rtx mem)4198 address_needs_split (rtx mem)
4199 {
4200   if (GET_MODE_SIZE (GET_MODE (mem)) < 16
4201       && (GET_MODE_SIZE (GET_MODE (mem)) < 4
4202 	  || !(store_with_one_insn_p (mem)
4203 	       || mem_is_padded_component_ref (mem))))
4204     return 1;
4205 
4206   return 0;
4207 }
4208 
4209 static GTY(()) rtx cache_fetch;		  /* __cache_fetch function */
4210 static GTY(()) rtx cache_fetch_dirty;	  /* __cache_fetch_dirty function */
4211 static alias_set_type ea_alias_set = -1;  /* alias set for __ea memory */
4212 
4213 /* MEM is known to be an __ea qualified memory access.  Emit a call to
4214    fetch the ppu memory to local store, and return its address in local
4215    store.  */
4216 
4217 static void
ea_load_store(rtx mem,bool is_store,rtx ea_addr,rtx data_addr)4218 ea_load_store (rtx mem, bool is_store, rtx ea_addr, rtx data_addr)
4219 {
4220   if (is_store)
4221     {
4222       rtx ndirty = GEN_INT (GET_MODE_SIZE (GET_MODE (mem)));
4223       if (!cache_fetch_dirty)
4224 	cache_fetch_dirty = init_one_libfunc ("__cache_fetch_dirty");
4225       emit_library_call_value (cache_fetch_dirty, data_addr, LCT_NORMAL, Pmode,
4226 			       ea_addr, EAmode, ndirty, SImode);
4227     }
4228   else
4229     {
4230       if (!cache_fetch)
4231 	cache_fetch = init_one_libfunc ("__cache_fetch");
4232       emit_library_call_value (cache_fetch, data_addr, LCT_NORMAL, Pmode,
4233 			       ea_addr, EAmode);
4234     }
4235 }
4236 
4237 /* Like ea_load_store, but do the cache tag comparison and, for stores,
4238    dirty bit marking, inline.
4239 
4240    The cache control data structure is an array of
4241 
4242    struct __cache_tag_array
4243      {
4244         unsigned int tag_lo[4];
4245         unsigned int tag_hi[4];
4246         void *data_pointer[4];
4247         int reserved[4];
4248         vector unsigned short dirty_bits[4];
4249      }  */
4250 
4251 static void
ea_load_store_inline(rtx mem,bool is_store,rtx ea_addr,rtx data_addr)4252 ea_load_store_inline (rtx mem, bool is_store, rtx ea_addr, rtx data_addr)
4253 {
4254   rtx ea_addr_si;
4255   HOST_WIDE_INT v;
4256   rtx tag_size_sym = gen_rtx_SYMBOL_REF (Pmode, "__cache_tag_array_size");
4257   rtx tag_arr_sym = gen_rtx_SYMBOL_REF (Pmode, "__cache_tag_array");
4258   rtx index_mask = gen_reg_rtx (SImode);
4259   rtx tag_arr = gen_reg_rtx (Pmode);
4260   rtx splat_mask = gen_reg_rtx (TImode);
4261   rtx splat = gen_reg_rtx (V4SImode);
4262   rtx splat_hi = NULL_RTX;
4263   rtx tag_index = gen_reg_rtx (Pmode);
4264   rtx block_off = gen_reg_rtx (SImode);
4265   rtx tag_addr = gen_reg_rtx (Pmode);
4266   rtx tag = gen_reg_rtx (V4SImode);
4267   rtx cache_tag = gen_reg_rtx (V4SImode);
4268   rtx cache_tag_hi = NULL_RTX;
4269   rtx cache_ptrs = gen_reg_rtx (TImode);
4270   rtx cache_ptrs_si = gen_reg_rtx (SImode);
4271   rtx tag_equal = gen_reg_rtx (V4SImode);
4272   rtx tag_equal_hi = NULL_RTX;
4273   rtx tag_eq_pack = gen_reg_rtx (V4SImode);
4274   rtx tag_eq_pack_si = gen_reg_rtx (SImode);
4275   rtx eq_index = gen_reg_rtx (SImode);
4276   rtx bcomp, hit_label, hit_ref, cont_label;
4277   rtx_insn *insn;
4278 
4279   if (spu_ea_model != 32)
4280     {
4281       splat_hi = gen_reg_rtx (V4SImode);
4282       cache_tag_hi = gen_reg_rtx (V4SImode);
4283       tag_equal_hi = gen_reg_rtx (V4SImode);
4284     }
4285 
4286   emit_move_insn (index_mask, plus_constant (Pmode, tag_size_sym, -128));
4287   emit_move_insn (tag_arr, tag_arr_sym);
4288   v = 0x0001020300010203LL;
4289   emit_move_insn (splat_mask, immed_double_const (v, v, TImode));
4290   ea_addr_si = ea_addr;
4291   if (spu_ea_model != 32)
4292     ea_addr_si = convert_to_mode (SImode, ea_addr, 1);
4293 
4294   /* tag_index = ea_addr & (tag_array_size - 128)  */
4295   emit_insn (gen_andsi3 (tag_index, ea_addr_si, index_mask));
4296 
4297   /* splat ea_addr to all 4 slots.  */
4298   emit_insn (gen_shufb (splat, ea_addr_si, ea_addr_si, splat_mask));
4299   /* Similarly for high 32 bits of ea_addr.  */
4300   if (spu_ea_model != 32)
4301     emit_insn (gen_shufb (splat_hi, ea_addr, ea_addr, splat_mask));
4302 
4303   /* block_off = ea_addr & 127  */
4304   emit_insn (gen_andsi3 (block_off, ea_addr_si, spu_const (SImode, 127)));
4305 
4306   /* tag_addr = tag_arr + tag_index  */
4307   emit_insn (gen_addsi3 (tag_addr, tag_arr, tag_index));
4308 
4309   /* Read cache tags.  */
4310   emit_move_insn (cache_tag, gen_rtx_MEM (V4SImode, tag_addr));
4311   if (spu_ea_model != 32)
4312     emit_move_insn (cache_tag_hi, gen_rtx_MEM (V4SImode,
4313 					       plus_constant (Pmode,
4314 							      tag_addr, 16)));
4315 
4316   /* tag = ea_addr & -128  */
4317   emit_insn (gen_andv4si3 (tag, splat, spu_const (V4SImode, -128)));
4318 
4319   /* Read all four cache data pointers.  */
4320   emit_move_insn (cache_ptrs, gen_rtx_MEM (TImode,
4321 					   plus_constant (Pmode,
4322 							  tag_addr, 32)));
4323 
4324   /* Compare tags.  */
4325   emit_insn (gen_ceq_v4si (tag_equal, tag, cache_tag));
4326   if (spu_ea_model != 32)
4327     {
4328       emit_insn (gen_ceq_v4si (tag_equal_hi, splat_hi, cache_tag_hi));
4329       emit_insn (gen_andv4si3 (tag_equal, tag_equal, tag_equal_hi));
4330     }
4331 
4332   /* At most one of the tags compare equal, so tag_equal has one
4333      32-bit slot set to all 1's, with the other slots all zero.
4334      gbb picks off low bit from each byte in the 128-bit registers,
4335      so tag_eq_pack is one of 0xf000, 0x0f00, 0x00f0, 0x000f, assuming
4336      we have a hit.  */
4337   emit_insn (gen_spu_gbb (tag_eq_pack, spu_gen_subreg (V16QImode, tag_equal)));
4338   emit_insn (gen_spu_convert (tag_eq_pack_si, tag_eq_pack));
4339 
4340   /* So counting leading zeros will set eq_index to 16, 20, 24 or 28.  */
4341   emit_insn (gen_clzsi2 (eq_index, tag_eq_pack_si));
4342 
4343   /* Allowing us to rotate the corresponding cache data pointer to slot0.
4344      (rotating eq_index mod 16 bytes).  */
4345   emit_insn (gen_rotqby_ti (cache_ptrs, cache_ptrs, eq_index));
4346   emit_insn (gen_spu_convert (cache_ptrs_si, cache_ptrs));
4347 
4348   /* Add block offset to form final data address.  */
4349   emit_insn (gen_addsi3 (data_addr, cache_ptrs_si, block_off));
4350 
4351   /* Check that we did hit.  */
4352   hit_label = gen_label_rtx ();
4353   hit_ref = gen_rtx_LABEL_REF (VOIDmode, hit_label);
4354   bcomp = gen_rtx_NE (SImode, tag_eq_pack_si, const0_rtx);
4355   insn = emit_jump_insn (gen_rtx_SET (pc_rtx,
4356 				      gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
4357 							    hit_ref, pc_rtx)));
4358   /* Say that this branch is very likely to happen.  */
4359   add_reg_br_prob_note (insn, profile_probability::very_likely ());
4360 
4361   ea_load_store (mem, is_store, ea_addr, data_addr);
4362   cont_label = gen_label_rtx ();
4363   emit_jump_insn (gen_jump (cont_label));
4364   emit_barrier ();
4365 
4366   emit_label (hit_label);
4367 
4368   if (is_store)
4369     {
4370       HOST_WIDE_INT v_hi;
4371       rtx dirty_bits = gen_reg_rtx (TImode);
4372       rtx dirty_off = gen_reg_rtx (SImode);
4373       rtx dirty_128 = gen_reg_rtx (TImode);
4374       rtx neg_block_off = gen_reg_rtx (SImode);
4375 
4376       /* Set up mask with one dirty bit per byte of the mem we are
4377 	 writing, starting from top bit.  */
4378       v_hi = v = -1;
4379       v <<= (128 - GET_MODE_SIZE (GET_MODE (mem))) & 63;
4380       if ((128 - GET_MODE_SIZE (GET_MODE (mem))) >= 64)
4381 	{
4382 	  v_hi = v;
4383 	  v = 0;
4384 	}
4385       emit_move_insn (dirty_bits, immed_double_const (v, v_hi, TImode));
4386 
4387       /* Form index into cache dirty_bits.  eq_index is one of
4388 	 0x10, 0x14, 0x18 or 0x1c.  Multiplying by 4 gives us
4389 	 0x40, 0x50, 0x60 or 0x70 which just happens to be the
4390 	 offset to each of the four dirty_bits elements.  */
4391       emit_insn (gen_ashlsi3 (dirty_off, eq_index, spu_const (SImode, 2)));
4392 
4393       emit_insn (gen_spu_lqx (dirty_128, tag_addr, dirty_off));
4394 
4395       /* Rotate bit mask to proper bit.  */
4396       emit_insn (gen_negsi2 (neg_block_off, block_off));
4397       emit_insn (gen_rotqbybi_ti (dirty_bits, dirty_bits, neg_block_off));
4398       emit_insn (gen_rotqbi_ti (dirty_bits, dirty_bits, neg_block_off));
4399 
4400       /* Or in the new dirty bits.  */
4401       emit_insn (gen_iorti3 (dirty_128, dirty_bits, dirty_128));
4402 
4403       /* Store.  */
4404       emit_insn (gen_spu_stqx (dirty_128, tag_addr, dirty_off));
4405     }
4406 
4407   emit_label (cont_label);
4408 }
4409 
4410 static rtx
expand_ea_mem(rtx mem,bool is_store)4411 expand_ea_mem (rtx mem, bool is_store)
4412 {
4413   rtx ea_addr;
4414   rtx data_addr = gen_reg_rtx (Pmode);
4415   rtx new_mem;
4416 
4417   ea_addr = force_reg (EAmode, XEXP (mem, 0));
4418   if (optimize_size || optimize == 0)
4419     ea_load_store (mem, is_store, ea_addr, data_addr);
4420   else
4421     ea_load_store_inline (mem, is_store, ea_addr, data_addr);
4422 
4423   if (ea_alias_set == -1)
4424     ea_alias_set = new_alias_set ();
4425 
4426   /* We generate a new MEM RTX to refer to the copy of the data
4427      in the cache.  We do not copy memory attributes (except the
4428      alignment) from the original MEM, as they may no longer apply
4429      to the cache copy.  */
4430   new_mem = gen_rtx_MEM (GET_MODE (mem), data_addr);
4431   set_mem_alias_set (new_mem, ea_alias_set);
4432   set_mem_align (new_mem, MIN (MEM_ALIGN (mem), 128 * 8));
4433 
4434   return new_mem;
4435 }
4436 
4437 int
spu_expand_mov(rtx * ops,machine_mode mode)4438 spu_expand_mov (rtx * ops, machine_mode mode)
4439 {
4440   if (GET_CODE (ops[0]) == SUBREG && !valid_subreg (ops[0]))
4441     {
4442       /* Perform the move in the destination SUBREG's inner mode.  */
4443       ops[0] = SUBREG_REG (ops[0]);
4444       mode = GET_MODE (ops[0]);
4445       ops[1] = gen_lowpart_common (mode, ops[1]);
4446       gcc_assert (ops[1]);
4447     }
4448 
4449   if (GET_CODE (ops[1]) == SUBREG && !valid_subreg (ops[1]))
4450     {
4451       rtx from = SUBREG_REG (ops[1]);
4452       scalar_int_mode imode = int_mode_for_mode (GET_MODE (from)).require ();
4453 
4454       gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
4455 		  && GET_MODE_CLASS (imode) == MODE_INT
4456 		  && subreg_lowpart_p (ops[1]));
4457 
4458       if (GET_MODE_SIZE (imode) < 4)
4459 	imode = SImode;
4460       if (imode != GET_MODE (from))
4461 	from = gen_rtx_SUBREG (imode, from, 0);
4462 
4463       if (GET_MODE_SIZE (mode) < GET_MODE_SIZE (imode))
4464 	{
4465 	  enum insn_code icode = convert_optab_handler (trunc_optab,
4466 							mode, imode);
4467 	  emit_insn (GEN_FCN (icode) (ops[0], from));
4468 	}
4469       else
4470 	emit_insn (gen_extend_insn (ops[0], from, mode, imode, 1));
4471       return 1;
4472     }
4473 
4474   /* At least one of the operands needs to be a register. */
4475   if ((reload_in_progress | reload_completed) == 0
4476       && !register_operand (ops[0], mode) && !register_operand (ops[1], mode))
4477     {
4478       rtx temp = force_reg (mode, ops[1]);
4479       emit_move_insn (ops[0], temp);
4480       return 1;
4481     }
4482   if (reload_in_progress || reload_completed)
4483     {
4484       if (CONSTANT_P (ops[1]))
4485 	return spu_split_immediate (ops);
4486       return 0;
4487     }
4488 
4489   /* Catch the SImode immediates greater than 0x7fffffff, and sign
4490      extend them. */
4491   if (GET_CODE (ops[1]) == CONST_INT)
4492     {
4493       HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (ops[1]), mode);
4494       if (val != INTVAL (ops[1]))
4495 	{
4496 	  emit_move_insn (ops[0], GEN_INT (val));
4497 	  return 1;
4498 	}
4499     }
4500   if (MEM_P (ops[0]))
4501     {
4502       if (MEM_ADDR_SPACE (ops[0]))
4503 	ops[0] = expand_ea_mem (ops[0], true);
4504       return spu_split_store (ops);
4505     }
4506   if (MEM_P (ops[1]))
4507     {
4508       if (MEM_ADDR_SPACE (ops[1]))
4509 	ops[1] = expand_ea_mem (ops[1], false);
4510       return spu_split_load (ops);
4511     }
4512 
4513   return 0;
4514 }
4515 
4516 static void
spu_convert_move(rtx dst,rtx src)4517 spu_convert_move (rtx dst, rtx src)
4518 {
4519   machine_mode mode = GET_MODE (dst);
4520   machine_mode int_mode = int_mode_for_mode (mode).require ();
4521   rtx reg;
4522   gcc_assert (GET_MODE (src) == TImode);
4523   reg = int_mode != mode ? gen_reg_rtx (int_mode) : dst;
4524   emit_insn (gen_rtx_SET (reg,
4525 	       gen_rtx_TRUNCATE (int_mode,
4526 		 gen_rtx_LSHIFTRT (TImode, src,
4527 		   GEN_INT (int_mode == DImode ? 64 : 96)))));
4528   if (int_mode != mode)
4529     {
4530       reg = simplify_gen_subreg (mode, reg, int_mode, 0);
4531       emit_move_insn (dst, reg);
4532     }
4533 }
4534 
4535 /* Load TImode values into DST0 and DST1 (when it is non-NULL) using
4536    the address from SRC and SRC+16.  Return a REG or CONST_INT that
4537    specifies how many bytes to rotate the loaded registers, plus any
4538    extra from EXTRA_ROTQBY.  The address and rotate amounts are
4539    normalized to improve merging of loads and rotate computations. */
4540 static rtx
spu_expand_load(rtx dst0,rtx dst1,rtx src,int extra_rotby)4541 spu_expand_load (rtx dst0, rtx dst1, rtx src, int extra_rotby)
4542 {
4543   rtx addr = XEXP (src, 0);
4544   rtx p0, p1, rot, addr0, addr1;
4545   int rot_amt;
4546 
4547   rot = 0;
4548   rot_amt = 0;
4549 
4550   if (MEM_ALIGN (src) >= 128)
4551     /* Address is already aligned; simply perform a TImode load.  */ ;
4552   else if (GET_CODE (addr) == PLUS)
4553     {
4554       /* 8 cases:
4555          aligned reg   + aligned reg     => lqx
4556          aligned reg   + unaligned reg   => lqx, rotqby
4557          aligned reg   + aligned const   => lqd
4558          aligned reg   + unaligned const => lqd, rotqbyi
4559          unaligned reg + aligned reg     => lqx, rotqby
4560          unaligned reg + unaligned reg   => lqx, a, rotqby (1 scratch)
4561          unaligned reg + aligned const   => lqd, rotqby
4562          unaligned reg + unaligned const -> not allowed by legitimate address
4563        */
4564       p0 = XEXP (addr, 0);
4565       p1 = XEXP (addr, 1);
4566       if (!reg_aligned_for_addr (p0))
4567 	{
4568 	  if (REG_P (p1) && !reg_aligned_for_addr (p1))
4569 	    {
4570 	      rot = gen_reg_rtx (SImode);
4571 	      emit_insn (gen_addsi3 (rot, p0, p1));
4572 	    }
4573 	  else if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15))
4574 	    {
4575 	      if (INTVAL (p1) > 0
4576 		  && REG_POINTER (p0)
4577 		  && INTVAL (p1) * BITS_PER_UNIT
4578 		     < REGNO_POINTER_ALIGN (REGNO (p0)))
4579 		{
4580 		  rot = gen_reg_rtx (SImode);
4581 		  emit_insn (gen_addsi3 (rot, p0, p1));
4582 		  addr = p0;
4583 		}
4584 	      else
4585 		{
4586 		  rtx x = gen_reg_rtx (SImode);
4587 		  emit_move_insn (x, p1);
4588 		  if (!spu_arith_operand (p1, SImode))
4589 		    p1 = x;
4590 		  rot = gen_reg_rtx (SImode);
4591 		  emit_insn (gen_addsi3 (rot, p0, p1));
4592 		  addr = gen_rtx_PLUS (Pmode, p0, x);
4593 		}
4594 	    }
4595 	  else
4596 	    rot = p0;
4597 	}
4598       else
4599 	{
4600 	  if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15))
4601 	    {
4602 	      rot_amt = INTVAL (p1) & 15;
4603 	      if (INTVAL (p1) & -16)
4604 		{
4605 		  p1 = GEN_INT (INTVAL (p1) & -16);
4606 		  addr = gen_rtx_PLUS (SImode, p0, p1);
4607 		}
4608 	      else
4609 		addr = p0;
4610 	    }
4611 	  else if (REG_P (p1) && !reg_aligned_for_addr (p1))
4612 	    rot = p1;
4613 	}
4614     }
4615   else if (REG_P (addr))
4616     {
4617       if (!reg_aligned_for_addr (addr))
4618 	rot = addr;
4619     }
4620   else if (GET_CODE (addr) == CONST)
4621     {
4622       if (GET_CODE (XEXP (addr, 0)) == PLUS
4623 	  && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
4624 	  && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
4625 	{
4626 	  rot_amt = INTVAL (XEXP (XEXP (addr, 0), 1));
4627 	  if (rot_amt & -16)
4628 	    addr = gen_rtx_CONST (Pmode,
4629 				  gen_rtx_PLUS (Pmode,
4630 						XEXP (XEXP (addr, 0), 0),
4631 						GEN_INT (rot_amt & -16)));
4632 	  else
4633 	    addr = XEXP (XEXP (addr, 0), 0);
4634 	}
4635       else
4636 	{
4637 	  rot = gen_reg_rtx (Pmode);
4638 	  emit_move_insn (rot, addr);
4639 	}
4640     }
4641   else if (GET_CODE (addr) == CONST_INT)
4642     {
4643       rot_amt = INTVAL (addr);
4644       addr = GEN_INT (rot_amt & -16);
4645     }
4646   else if (!ALIGNED_SYMBOL_REF_P (addr))
4647     {
4648       rot = gen_reg_rtx (Pmode);
4649       emit_move_insn (rot, addr);
4650     }
4651 
4652   rot_amt += extra_rotby;
4653 
4654   rot_amt &= 15;
4655 
4656   if (rot && rot_amt)
4657     {
4658       rtx x = gen_reg_rtx (SImode);
4659       emit_insn (gen_addsi3 (x, rot, GEN_INT (rot_amt)));
4660       rot = x;
4661       rot_amt = 0;
4662     }
4663   if (!rot && rot_amt)
4664     rot = GEN_INT (rot_amt);
4665 
4666   addr0 = copy_rtx (addr);
4667   addr0 = gen_rtx_AND (SImode, copy_rtx (addr), GEN_INT (-16));
4668   emit_insn (gen__movti (dst0, change_address (src, TImode, addr0)));
4669 
4670   if (dst1)
4671     {
4672       addr1 = plus_constant (SImode, copy_rtx (addr), 16);
4673       addr1 = gen_rtx_AND (SImode, addr1, GEN_INT (-16));
4674       emit_insn (gen__movti (dst1, change_address (src, TImode, addr1)));
4675     }
4676 
4677   return rot;
4678 }
4679 
4680 int
spu_split_load(rtx * ops)4681 spu_split_load (rtx * ops)
4682 {
4683   machine_mode mode = GET_MODE (ops[0]);
4684   rtx addr, load, rot;
4685   int rot_amt;
4686 
4687   if (GET_MODE_SIZE (mode) >= 16)
4688     return 0;
4689 
4690   addr = XEXP (ops[1], 0);
4691   gcc_assert (GET_CODE (addr) != AND);
4692 
4693   if (!address_needs_split (ops[1]))
4694     {
4695       ops[1] = change_address (ops[1], TImode, addr);
4696       load = gen_reg_rtx (TImode);
4697       emit_insn (gen__movti (load, ops[1]));
4698       spu_convert_move (ops[0], load);
4699       return 1;
4700     }
4701 
4702   rot_amt = GET_MODE_SIZE (mode) < 4 ? GET_MODE_SIZE (mode) - 4 : 0;
4703 
4704   load = gen_reg_rtx (TImode);
4705   rot = spu_expand_load (load, 0, ops[1], rot_amt);
4706 
4707   if (rot)
4708     emit_insn (gen_rotqby_ti (load, load, rot));
4709 
4710   spu_convert_move (ops[0], load);
4711   return 1;
4712 }
4713 
4714 int
spu_split_store(rtx * ops)4715 spu_split_store (rtx * ops)
4716 {
4717   machine_mode mode = GET_MODE (ops[0]);
4718   rtx reg;
4719   rtx addr, p0, p1, p1_lo, smem;
4720   int aform;
4721   int scalar;
4722 
4723   if (GET_MODE_SIZE (mode) >= 16)
4724     return 0;
4725 
4726   addr = XEXP (ops[0], 0);
4727   gcc_assert (GET_CODE (addr) != AND);
4728 
4729   if (!address_needs_split (ops[0]))
4730     {
4731       reg = gen_reg_rtx (TImode);
4732       emit_insn (gen_spu_convert (reg, ops[1]));
4733       ops[0] = change_address (ops[0], TImode, addr);
4734       emit_move_insn (ops[0], reg);
4735       return 1;
4736     }
4737 
4738   if (GET_CODE (addr) == PLUS)
4739     {
4740       /* 8 cases:
4741          aligned reg   + aligned reg     => lqx, c?x, shuf, stqx
4742          aligned reg   + unaligned reg   => lqx, c?x, shuf, stqx
4743          aligned reg   + aligned const   => lqd, c?d, shuf, stqx
4744          aligned reg   + unaligned const => lqd, c?d, shuf, stqx
4745          unaligned reg + aligned reg     => lqx, c?x, shuf, stqx
4746          unaligned reg + unaligned reg   => lqx, c?x, shuf, stqx
4747          unaligned reg + aligned const   => lqd, c?d, shuf, stqx
4748          unaligned reg + unaligned const -> lqx, c?d, shuf, stqx
4749        */
4750       aform = 0;
4751       p0 = XEXP (addr, 0);
4752       p1 = p1_lo = XEXP (addr, 1);
4753       if (REG_P (p0) && GET_CODE (p1) == CONST_INT)
4754 	{
4755 	  p1_lo = GEN_INT (INTVAL (p1) & 15);
4756 	  if (reg_aligned_for_addr (p0))
4757 	    {
4758 	      p1 = GEN_INT (INTVAL (p1) & -16);
4759 	      if (p1 == const0_rtx)
4760 		addr = p0;
4761 	      else
4762 		addr = gen_rtx_PLUS (SImode, p0, p1);
4763 	    }
4764 	  else
4765 	    {
4766 	      rtx x = gen_reg_rtx (SImode);
4767 	      emit_move_insn (x, p1);
4768 	      addr = gen_rtx_PLUS (SImode, p0, x);
4769 	    }
4770 	}
4771     }
4772   else if (REG_P (addr))
4773     {
4774       aform = 0;
4775       p0 = addr;
4776       p1 = p1_lo = const0_rtx;
4777     }
4778   else
4779     {
4780       aform = 1;
4781       p0 = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
4782       p1 = 0;			/* aform doesn't use p1 */
4783       p1_lo = addr;
4784       if (ALIGNED_SYMBOL_REF_P (addr))
4785 	p1_lo = const0_rtx;
4786       else if (GET_CODE (addr) == CONST
4787 	       && GET_CODE (XEXP (addr, 0)) == PLUS
4788 	       && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
4789 	       && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
4790 	{
4791 	  HOST_WIDE_INT v = INTVAL (XEXP (XEXP (addr, 0), 1));
4792 	  if ((v & -16) != 0)
4793 	    addr = gen_rtx_CONST (Pmode,
4794 				  gen_rtx_PLUS (Pmode,
4795 						XEXP (XEXP (addr, 0), 0),
4796 						GEN_INT (v & -16)));
4797 	  else
4798 	    addr = XEXP (XEXP (addr, 0), 0);
4799 	  p1_lo = GEN_INT (v & 15);
4800 	}
4801       else if (GET_CODE (addr) == CONST_INT)
4802 	{
4803 	  p1_lo = GEN_INT (INTVAL (addr) & 15);
4804 	  addr = GEN_INT (INTVAL (addr) & -16);
4805 	}
4806       else
4807 	{
4808 	  p1_lo = gen_reg_rtx (SImode);
4809 	  emit_move_insn (p1_lo, addr);
4810 	}
4811     }
4812 
4813   gcc_assert (aform == 0 || aform == 1);
4814   reg = gen_reg_rtx (TImode);
4815 
4816   scalar = store_with_one_insn_p (ops[0]);
4817   if (!scalar)
4818     {
4819       /* We could copy the flags from the ops[0] MEM to mem here,
4820          We don't because we want this load to be optimized away if
4821          possible, and copying the flags will prevent that in certain
4822          cases, e.g. consider the volatile flag. */
4823 
4824       rtx pat = gen_reg_rtx (TImode);
4825       rtx lmem = change_address (ops[0], TImode, copy_rtx (addr));
4826       set_mem_alias_set (lmem, 0);
4827       emit_insn (gen_movti (reg, lmem));
4828 
4829       if (!p0 || reg_aligned_for_addr (p0))
4830 	p0 = stack_pointer_rtx;
4831       if (!p1_lo)
4832 	p1_lo = const0_rtx;
4833 
4834       emit_insn (gen_cpat (pat, p0, p1_lo, GEN_INT (GET_MODE_SIZE (mode))));
4835       emit_insn (gen_shufb (reg, ops[1], reg, pat));
4836     }
4837   else
4838     {
4839       if (GET_CODE (ops[1]) == REG)
4840 	emit_insn (gen_spu_convert (reg, ops[1]));
4841       else if (GET_CODE (ops[1]) == SUBREG)
4842 	emit_insn (gen_spu_convert (reg, SUBREG_REG (ops[1])));
4843       else
4844 	abort ();
4845     }
4846 
4847   if (GET_MODE_SIZE (mode) < 4 && scalar)
4848     emit_insn (gen_ashlti3
4849 	       (reg, reg, GEN_INT (32 - GET_MODE_BITSIZE (mode))));
4850 
4851   smem = change_address (ops[0], TImode, copy_rtx (addr));
4852   /* We can't use the previous alias set because the memory has changed
4853      size and can potentially overlap objects of other types.  */
4854   set_mem_alias_set (smem, 0);
4855 
4856   emit_insn (gen_movti (smem, reg));
4857   return 1;
4858 }
4859 
4860 /* Return TRUE if X is MEM which is a struct member reference
4861    and the member can safely be loaded and stored with a single
4862    instruction because it is padded. */
4863 static int
mem_is_padded_component_ref(rtx x)4864 mem_is_padded_component_ref (rtx x)
4865 {
4866   tree t = MEM_EXPR (x);
4867   tree r;
4868   if (!t || TREE_CODE (t) != COMPONENT_REF)
4869     return 0;
4870   t = TREE_OPERAND (t, 1);
4871   if (!t || TREE_CODE (t) != FIELD_DECL
4872       || DECL_ALIGN (t) < 128 || AGGREGATE_TYPE_P (TREE_TYPE (t)))
4873     return 0;
4874   /* Only do this for RECORD_TYPEs, not UNION_TYPEs. */
4875   r = DECL_FIELD_CONTEXT (t);
4876   if (!r || TREE_CODE (r) != RECORD_TYPE)
4877     return 0;
4878   /* Make sure they are the same mode */
4879   if (GET_MODE (x) != TYPE_MODE (TREE_TYPE (t)))
4880     return 0;
4881   /* If there are no following fields then the field alignment assures
4882      the structure is padded to the alignment which means this field is
4883      padded too.  */
4884   if (TREE_CHAIN (t) == 0)
4885     return 1;
4886   /* If the following field is also aligned then this field will be
4887      padded. */
4888   t = TREE_CHAIN (t);
4889   if (TREE_CODE (t) == FIELD_DECL && DECL_ALIGN (t) >= 128)
4890     return 1;
4891   return 0;
4892 }
4893 
4894 /* Parse the -mfixed-range= option string.  */
4895 static void
fix_range(const char * const_str)4896 fix_range (const char *const_str)
4897 {
4898   int i, first, last;
4899   char *str, *dash, *comma;
4900 
4901   /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
4902      REG2 are either register names or register numbers.  The effect
4903      of this option is to mark the registers in the range from REG1 to
4904      REG2 as ``fixed'' so they won't be used by the compiler.  */
4905 
4906   i = strlen (const_str);
4907   str = (char *) alloca (i + 1);
4908   memcpy (str, const_str, i + 1);
4909 
4910   while (1)
4911     {
4912       dash = strchr (str, '-');
4913       if (!dash)
4914 	{
4915 	  warning (0, "value of -mfixed-range must have form REG1-REG2");
4916 	  return;
4917 	}
4918       *dash = '\0';
4919       comma = strchr (dash + 1, ',');
4920       if (comma)
4921 	*comma = '\0';
4922 
4923       first = decode_reg_name (str);
4924       if (first < 0)
4925 	{
4926 	  warning (0, "unknown register name: %s", str);
4927 	  return;
4928 	}
4929 
4930       last = decode_reg_name (dash + 1);
4931       if (last < 0)
4932 	{
4933 	  warning (0, "unknown register name: %s", dash + 1);
4934 	  return;
4935 	}
4936 
4937       *dash = '-';
4938 
4939       if (first > last)
4940 	{
4941 	  warning (0, "%s-%s is an empty range", str, dash + 1);
4942 	  return;
4943 	}
4944 
4945       for (i = first; i <= last; ++i)
4946 	fixed_regs[i] = call_used_regs[i] = 1;
4947 
4948       if (!comma)
4949 	break;
4950 
4951       *comma = ',';
4952       str = comma + 1;
4953     }
4954 }
4955 
4956 /* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
4957    can be generated using the fsmbi instruction. */
4958 int
fsmbi_const_p(rtx x)4959 fsmbi_const_p (rtx x)
4960 {
4961   if (CONSTANT_P (x))
4962     {
4963       /* We can always choose TImode for CONST_INT because the high bits
4964          of an SImode will always be all 1s, i.e., valid for fsmbi. */
4965       enum immediate_class c = classify_immediate (x, TImode);
4966       return c == IC_FSMBI || (!epilogue_completed && c == IC_FSMBI2);
4967     }
4968   return 0;
4969 }
4970 
4971 /* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
4972    can be generated using the cbd, chd, cwd or cdd instruction. */
4973 int
cpat_const_p(rtx x,machine_mode mode)4974 cpat_const_p (rtx x, machine_mode mode)
4975 {
4976   if (CONSTANT_P (x))
4977     {
4978       enum immediate_class c = classify_immediate (x, mode);
4979       return c == IC_CPAT;
4980     }
4981   return 0;
4982 }
4983 
4984 rtx
gen_cpat_const(rtx * ops)4985 gen_cpat_const (rtx * ops)
4986 {
4987   unsigned char dst[16];
4988   int i, offset, shift, isize;
4989   if (GET_CODE (ops[3]) != CONST_INT
4990       || GET_CODE (ops[2]) != CONST_INT
4991       || (GET_CODE (ops[1]) != CONST_INT
4992 	  && GET_CODE (ops[1]) != REG))
4993     return 0;
4994   if (GET_CODE (ops[1]) == REG
4995       && (!REG_POINTER (ops[1])
4996 	  || REGNO_POINTER_ALIGN (ORIGINAL_REGNO (ops[1])) < 128))
4997     return 0;
4998 
4999   for (i = 0; i < 16; i++)
5000     dst[i] = i + 16;
5001   isize = INTVAL (ops[3]);
5002   if (isize == 1)
5003     shift = 3;
5004   else if (isize == 2)
5005     shift = 2;
5006   else
5007     shift = 0;
5008   offset = (INTVAL (ops[2]) +
5009 	    (GET_CODE (ops[1]) ==
5010 	     CONST_INT ? INTVAL (ops[1]) : 0)) & 15;
5011   for (i = 0; i < isize; i++)
5012     dst[offset + i] = i + shift;
5013   return array_to_constant (TImode, dst);
5014 }
5015 
5016 /* Convert a CONST_INT, CONST_DOUBLE, or CONST_VECTOR into a 16 byte
5017    array.  Use MODE for CONST_INT's.  When the constant's mode is smaller
5018    than 16 bytes, the value is repeated across the rest of the array. */
5019 void
constant_to_array(machine_mode mode,rtx x,unsigned char arr[16])5020 constant_to_array (machine_mode mode, rtx x, unsigned char arr[16])
5021 {
5022   HOST_WIDE_INT val;
5023   int i, j, first;
5024 
5025   memset (arr, 0, 16);
5026   mode = GET_MODE (x) != VOIDmode ? GET_MODE (x) : mode;
5027   if (GET_CODE (x) == CONST_INT
5028       || (GET_CODE (x) == CONST_DOUBLE
5029 	  && (mode == SFmode || mode == DFmode)))
5030     {
5031       gcc_assert (mode != VOIDmode && mode != BLKmode);
5032 
5033       if (GET_CODE (x) == CONST_DOUBLE)
5034 	val = const_double_to_hwint (x);
5035       else
5036 	val = INTVAL (x);
5037       first = GET_MODE_SIZE (mode) - 1;
5038       for (i = first; i >= 0; i--)
5039 	{
5040 	  arr[i] = val & 0xff;
5041 	  val >>= 8;
5042 	}
5043       /* Splat the constant across the whole array. */
5044       for (j = 0, i = first + 1; i < 16; i++)
5045 	{
5046 	  arr[i] = arr[j];
5047 	  j = (j == first) ? 0 : j + 1;
5048 	}
5049     }
5050   else if (GET_CODE (x) == CONST_DOUBLE)
5051     {
5052       val = CONST_DOUBLE_LOW (x);
5053       for (i = 15; i >= 8; i--)
5054 	{
5055 	  arr[i] = val & 0xff;
5056 	  val >>= 8;
5057 	}
5058       val = CONST_DOUBLE_HIGH (x);
5059       for (i = 7; i >= 0; i--)
5060 	{
5061 	  arr[i] = val & 0xff;
5062 	  val >>= 8;
5063 	}
5064     }
5065   else if (GET_CODE (x) == CONST_VECTOR)
5066     {
5067       int units;
5068       rtx elt;
5069       mode = GET_MODE_INNER (mode);
5070       units = CONST_VECTOR_NUNITS (x);
5071       for (i = 0; i < units; i++)
5072 	{
5073 	  elt = CONST_VECTOR_ELT (x, i);
5074 	  if (GET_CODE (elt) == CONST_INT || GET_CODE (elt) == CONST_DOUBLE)
5075 	    {
5076 	      if (GET_CODE (elt) == CONST_DOUBLE)
5077 		val = const_double_to_hwint (elt);
5078 	      else
5079 		val = INTVAL (elt);
5080 	      first = GET_MODE_SIZE (mode) - 1;
5081 	      if (first + i * GET_MODE_SIZE (mode) > 16)
5082 		abort ();
5083 	      for (j = first; j >= 0; j--)
5084 		{
5085 		  arr[j + i * GET_MODE_SIZE (mode)] = val & 0xff;
5086 		  val >>= 8;
5087 		}
5088 	    }
5089 	}
5090     }
5091   else
5092     gcc_unreachable();
5093 }
5094 
5095 /* Convert a 16 byte array to a constant of mode MODE.  When MODE is
5096    smaller than 16 bytes, use the bytes that would represent that value
5097    in a register, e.g., for QImode return the value of arr[3].  */
5098 rtx
array_to_constant(machine_mode mode,const unsigned char arr[16])5099 array_to_constant (machine_mode mode, const unsigned char arr[16])
5100 {
5101   machine_mode inner_mode;
5102   rtvec v;
5103   int units, size, i, j, k;
5104   HOST_WIDE_INT val;
5105 
5106   if (GET_MODE_CLASS (mode) == MODE_INT
5107       && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
5108     {
5109       j = GET_MODE_SIZE (mode);
5110       i = j < 4 ? 4 - j : 0;
5111       for (val = 0; i < j; i++)
5112 	val = (val << 8) | arr[i];
5113       val = trunc_int_for_mode (val, mode);
5114       return GEN_INT (val);
5115     }
5116 
5117   if (mode == TImode)
5118     {
5119       HOST_WIDE_INT high;
5120       for (i = high = 0; i < 8; i++)
5121 	high = (high << 8) | arr[i];
5122       for (i = 8, val = 0; i < 16; i++)
5123 	val = (val << 8) | arr[i];
5124       return immed_double_const (val, high, TImode);
5125     }
5126   if (mode == SFmode)
5127     {
5128       val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
5129       val = trunc_int_for_mode (val, SImode);
5130       return hwint_to_const_double (SFmode, val);
5131     }
5132   if (mode == DFmode)
5133     {
5134       for (i = 0, val = 0; i < 8; i++)
5135 	val = (val << 8) | arr[i];
5136       return hwint_to_const_double (DFmode, val);
5137     }
5138 
5139   if (!VECTOR_MODE_P (mode))
5140     abort ();
5141 
5142   units = GET_MODE_NUNITS (mode);
5143   size = GET_MODE_UNIT_SIZE (mode);
5144   inner_mode = GET_MODE_INNER (mode);
5145   v = rtvec_alloc (units);
5146 
5147   for (k = i = 0; i < units; ++i)
5148     {
5149       val = 0;
5150       for (j = 0; j < size; j++, k++)
5151 	val = (val << 8) | arr[k];
5152 
5153       if (GET_MODE_CLASS (inner_mode) == MODE_FLOAT)
5154 	RTVEC_ELT (v, i) = hwint_to_const_double (inner_mode, val);
5155       else
5156 	RTVEC_ELT (v, i) = GEN_INT (trunc_int_for_mode (val, inner_mode));
5157     }
5158   if (k > 16)
5159     abort ();
5160 
5161   return gen_rtx_CONST_VECTOR (mode, v);
5162 }
5163 
5164 static void
reloc_diagnostic(rtx x)5165 reloc_diagnostic (rtx x)
5166 {
5167   tree decl = 0;
5168   if (!flag_pic || !(TARGET_WARN_RELOC || TARGET_ERROR_RELOC))
5169     return;
5170 
5171   if (GET_CODE (x) == SYMBOL_REF)
5172     decl = SYMBOL_REF_DECL (x);
5173   else if (GET_CODE (x) == CONST
5174 	   && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
5175     decl = SYMBOL_REF_DECL (XEXP (XEXP (x, 0), 0));
5176 
5177   /* SYMBOL_REF_DECL is not necessarily a DECL. */
5178   if (decl && !DECL_P (decl))
5179     decl = 0;
5180 
5181   /* The decl could be a string constant.  */
5182   if (decl && DECL_P (decl))
5183     {
5184       location_t loc;
5185       /* We use last_assemble_variable_decl to get line information.  It's
5186 	 not always going to be right and might not even be close, but will
5187 	 be right for the more common cases. */
5188       if (!last_assemble_variable_decl || in_section == ctors_section)
5189 	loc = DECL_SOURCE_LOCATION (decl);
5190       else
5191 	loc = DECL_SOURCE_LOCATION (last_assemble_variable_decl);
5192 
5193       if (TARGET_WARN_RELOC)
5194 	warning_at (loc, 0,
5195 		    "creating run-time relocation for %qD", decl);
5196       else
5197 	error_at (loc,
5198 		  "creating run-time relocation for %qD", decl);
5199     }
5200   else
5201     {
5202       if (TARGET_WARN_RELOC)
5203 	warning_at (input_location, 0, "creating run-time relocation");
5204       else
5205 	error_at (input_location, "creating run-time relocation");
5206     }
5207 }
5208 
5209 /* Hook into assemble_integer so we can generate an error for run-time
5210    relocations.  The SPU ABI disallows them. */
5211 static bool
spu_assemble_integer(rtx x,unsigned int size,int aligned_p)5212 spu_assemble_integer (rtx x, unsigned int size, int aligned_p)
5213 {
5214   /* By default run-time relocations aren't supported, but we allow them
5215      in case users support it in their own run-time loader.  And we provide
5216      a warning for those users that don't.  */
5217   if ((GET_CODE (x) == SYMBOL_REF)
5218       || GET_CODE (x) == LABEL_REF || GET_CODE (x) == CONST)
5219     reloc_diagnostic (x);
5220 
5221   return default_assemble_integer (x, size, aligned_p);
5222 }
5223 
5224 static void
spu_asm_globalize_label(FILE * file,const char * name)5225 spu_asm_globalize_label (FILE * file, const char *name)
5226 {
5227   fputs ("\t.global\t", file);
5228   assemble_name (file, name);
5229   fputs ("\n", file);
5230 }
5231 
5232 static bool
spu_rtx_costs(rtx x,machine_mode mode,int outer_code ATTRIBUTE_UNUSED,int opno ATTRIBUTE_UNUSED,int * total,bool speed ATTRIBUTE_UNUSED)5233 spu_rtx_costs (rtx x, machine_mode mode, int outer_code ATTRIBUTE_UNUSED,
5234 	       int opno ATTRIBUTE_UNUSED, int *total,
5235 	       bool speed ATTRIBUTE_UNUSED)
5236 {
5237   int code = GET_CODE (x);
5238   int cost = COSTS_N_INSNS (2);
5239 
5240   /* Folding to a CONST_VECTOR will use extra space but there might
5241      be only a small savings in cycles.  We'd like to use a CONST_VECTOR
5242      only if it allows us to fold away multiple insns.  Changing the cost
5243      of a CONST_VECTOR here (or in CONST_COSTS) doesn't help though
5244      because this cost will only be compared against a single insn.
5245      if (code == CONST_VECTOR)
5246        return spu_legitimate_constant_p (mode, x) ? cost : COSTS_N_INSNS (6);
5247    */
5248 
5249   /* Use defaults for float operations.  Not accurate but good enough. */
5250   if (mode == DFmode)
5251     {
5252       *total = COSTS_N_INSNS (13);
5253       return true;
5254     }
5255   if (mode == SFmode)
5256     {
5257       *total = COSTS_N_INSNS (6);
5258       return true;
5259     }
5260   switch (code)
5261     {
5262     case CONST_INT:
5263       if (satisfies_constraint_K (x))
5264 	*total = 0;
5265       else if (INTVAL (x) >= -0x80000000ll && INTVAL (x) <= 0xffffffffll)
5266 	*total = COSTS_N_INSNS (1);
5267       else
5268 	*total = COSTS_N_INSNS (3);
5269       return true;
5270 
5271     case CONST:
5272       *total = COSTS_N_INSNS (3);
5273       return true;
5274 
5275     case LABEL_REF:
5276     case SYMBOL_REF:
5277       *total = COSTS_N_INSNS (0);
5278       return true;
5279 
5280     case CONST_DOUBLE:
5281       *total = COSTS_N_INSNS (5);
5282       return true;
5283 
5284     case FLOAT_EXTEND:
5285     case FLOAT_TRUNCATE:
5286     case FLOAT:
5287     case UNSIGNED_FLOAT:
5288     case FIX:
5289     case UNSIGNED_FIX:
5290       *total = COSTS_N_INSNS (7);
5291       return true;
5292 
5293     case PLUS:
5294       if (mode == TImode)
5295 	{
5296 	  *total = COSTS_N_INSNS (9);
5297 	  return true;
5298 	}
5299       break;
5300 
5301     case MULT:
5302       cost =
5303 	GET_CODE (XEXP (x, 0)) ==
5304 	REG ? COSTS_N_INSNS (12) : COSTS_N_INSNS (7);
5305       if (mode == SImode && GET_CODE (XEXP (x, 0)) == REG)
5306 	{
5307 	  if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5308 	    {
5309 	      HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
5310 	      cost = COSTS_N_INSNS (14);
5311 	      if ((val & 0xffff) == 0)
5312 		cost = COSTS_N_INSNS (9);
5313 	      else if (val > 0 && val < 0x10000)
5314 		cost = COSTS_N_INSNS (11);
5315 	    }
5316 	}
5317       *total = cost;
5318       return true;
5319     case DIV:
5320     case UDIV:
5321     case MOD:
5322     case UMOD:
5323       *total = COSTS_N_INSNS (20);
5324       return true;
5325     case ROTATE:
5326     case ROTATERT:
5327     case ASHIFT:
5328     case ASHIFTRT:
5329     case LSHIFTRT:
5330       *total = COSTS_N_INSNS (4);
5331       return true;
5332     case UNSPEC:
5333       if (XINT (x, 1) == UNSPEC_CONVERT)
5334 	*total = COSTS_N_INSNS (0);
5335       else
5336 	*total = COSTS_N_INSNS (4);
5337       return true;
5338     }
5339   /* Scale cost by mode size.  Except when initializing (cfun->decl == 0). */
5340   if (GET_MODE_CLASS (mode) == MODE_INT
5341       && GET_MODE_SIZE (mode) > GET_MODE_SIZE (SImode) && cfun && cfun->decl)
5342     cost = cost * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode))
5343       * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode));
5344   *total = cost;
5345   return true;
5346 }
5347 
5348 static scalar_int_mode
spu_unwind_word_mode(void)5349 spu_unwind_word_mode (void)
5350 {
5351   return SImode;
5352 }
5353 
5354 /* Decide whether we can make a sibling call to a function.  DECL is the
5355    declaration of the function being targeted by the call and EXP is the
5356    CALL_EXPR representing the call.  */
5357 static bool
spu_function_ok_for_sibcall(tree decl,tree exp ATTRIBUTE_UNUSED)5358 spu_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
5359 {
5360   return decl && !TARGET_LARGE_MEM;
5361 }
5362 
5363 /* We need to correctly update the back chain pointer and the Available
5364    Stack Size (which is in the second slot of the sp register.) */
5365 void
spu_allocate_stack(rtx op0,rtx op1)5366 spu_allocate_stack (rtx op0, rtx op1)
5367 {
5368   HOST_WIDE_INT v;
5369   rtx chain = gen_reg_rtx (V4SImode);
5370   rtx stack_bot = gen_frame_mem (V4SImode, stack_pointer_rtx);
5371   rtx sp = gen_reg_rtx (V4SImode);
5372   rtx splatted = gen_reg_rtx (V4SImode);
5373   rtx pat = gen_reg_rtx (TImode);
5374 
5375   /* copy the back chain so we can save it back again. */
5376   emit_move_insn (chain, stack_bot);
5377 
5378   op1 = force_reg (SImode, op1);
5379 
5380   v = 0x1020300010203ll;
5381   emit_move_insn (pat, immed_double_const (v, v, TImode));
5382   emit_insn (gen_shufb (splatted, op1, op1, pat));
5383 
5384   emit_insn (gen_spu_convert (sp, stack_pointer_rtx));
5385   emit_insn (gen_subv4si3 (sp, sp, splatted));
5386 
5387   if (flag_stack_check || flag_stack_clash_protection)
5388     {
5389       rtx avail = gen_reg_rtx(SImode);
5390       rtx result = gen_reg_rtx(SImode);
5391       emit_insn (gen_vec_extractv4sisi (avail, sp, GEN_INT (1)));
5392       emit_insn (gen_cgt_si(result, avail, GEN_INT (-1)));
5393       emit_insn (gen_spu_heq (result, GEN_INT(0) ));
5394     }
5395 
5396   emit_insn (gen_spu_convert (stack_pointer_rtx, sp));
5397 
5398   emit_move_insn (stack_bot, chain);
5399 
5400   emit_move_insn (op0, virtual_stack_dynamic_rtx);
5401 }
5402 
5403 void
spu_restore_stack_nonlocal(rtx op0 ATTRIBUTE_UNUSED,rtx op1)5404 spu_restore_stack_nonlocal (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
5405 {
5406   static unsigned char arr[16] =
5407     { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
5408   rtx temp = gen_reg_rtx (SImode);
5409   rtx temp2 = gen_reg_rtx (SImode);
5410   rtx temp3 = gen_reg_rtx (V4SImode);
5411   rtx temp4 = gen_reg_rtx (V4SImode);
5412   rtx pat = gen_reg_rtx (TImode);
5413   rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
5414 
5415   /* Restore the backchain from the first word, sp from the second.  */
5416   emit_move_insn (temp2, adjust_address_nv (op1, SImode, 0));
5417   emit_move_insn (temp, adjust_address_nv (op1, SImode, 4));
5418 
5419   emit_move_insn (pat, array_to_constant (TImode, arr));
5420 
5421   /* Compute Available Stack Size for sp */
5422   emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
5423   emit_insn (gen_shufb (temp3, temp, temp, pat));
5424 
5425   /* Compute Available Stack Size for back chain */
5426   emit_insn (gen_subsi3 (temp2, temp2, stack_pointer_rtx));
5427   emit_insn (gen_shufb (temp4, temp2, temp2, pat));
5428   emit_insn (gen_addv4si3 (temp4, sp, temp4));
5429 
5430   emit_insn (gen_addv4si3 (sp, sp, temp3));
5431   emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp4);
5432 }
5433 
5434 static void
spu_init_libfuncs(void)5435 spu_init_libfuncs (void)
5436 {
5437   set_optab_libfunc (smul_optab, DImode, "__muldi3");
5438   set_optab_libfunc (sdiv_optab, DImode, "__divdi3");
5439   set_optab_libfunc (smod_optab, DImode, "__moddi3");
5440   set_optab_libfunc (udiv_optab, DImode, "__udivdi3");
5441   set_optab_libfunc (umod_optab, DImode, "__umoddi3");
5442   set_optab_libfunc (udivmod_optab, DImode, "__udivmoddi4");
5443   set_optab_libfunc (ffs_optab, DImode, "__ffsdi2");
5444   set_optab_libfunc (clz_optab, DImode, "__clzdi2");
5445   set_optab_libfunc (ctz_optab, DImode, "__ctzdi2");
5446   set_optab_libfunc (clrsb_optab, DImode, "__clrsbdi2");
5447   set_optab_libfunc (popcount_optab, DImode, "__popcountdi2");
5448   set_optab_libfunc (parity_optab, DImode, "__paritydi2");
5449 
5450   set_conv_libfunc (ufloat_optab, DFmode, SImode, "__float_unssidf");
5451   set_conv_libfunc (ufloat_optab, DFmode, DImode, "__float_unsdidf");
5452 
5453   set_optab_libfunc (addv_optab, SImode, "__addvsi3");
5454   set_optab_libfunc (subv_optab, SImode, "__subvsi3");
5455   set_optab_libfunc (smulv_optab, SImode, "__mulvsi3");
5456   set_optab_libfunc (sdivv_optab, SImode, "__divvsi3");
5457   set_optab_libfunc (negv_optab, SImode, "__negvsi2");
5458   set_optab_libfunc (absv_optab, SImode, "__absvsi2");
5459   set_optab_libfunc (addv_optab, DImode, "__addvdi3");
5460   set_optab_libfunc (subv_optab, DImode, "__subvdi3");
5461   set_optab_libfunc (smulv_optab, DImode, "__mulvdi3");
5462   set_optab_libfunc (sdivv_optab, DImode, "__divvdi3");
5463   set_optab_libfunc (negv_optab, DImode, "__negvdi2");
5464   set_optab_libfunc (absv_optab, DImode, "__absvdi2");
5465 
5466   set_optab_libfunc (smul_optab, TImode, "__multi3");
5467   set_optab_libfunc (sdiv_optab, TImode, "__divti3");
5468   set_optab_libfunc (smod_optab, TImode, "__modti3");
5469   set_optab_libfunc (udiv_optab, TImode, "__udivti3");
5470   set_optab_libfunc (umod_optab, TImode, "__umodti3");
5471   set_optab_libfunc (udivmod_optab, TImode, "__udivmodti4");
5472 }
5473 
5474 /* Make a subreg, stripping any existing subreg.  We could possibly just
5475    call simplify_subreg, but in this case we know what we want. */
5476 rtx
spu_gen_subreg(machine_mode mode,rtx x)5477 spu_gen_subreg (machine_mode mode, rtx x)
5478 {
5479   if (GET_CODE (x) == SUBREG)
5480     x = SUBREG_REG (x);
5481   if (GET_MODE (x) == mode)
5482     return x;
5483   return gen_rtx_SUBREG (mode, x, 0);
5484 }
5485 
5486 static bool
spu_return_in_memory(const_tree type,const_tree fntype ATTRIBUTE_UNUSED)5487 spu_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
5488 {
5489   return (TYPE_MODE (type) == BLKmode
5490 	  && ((type) == 0
5491 	      || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
5492 	      || int_size_in_bytes (type) >
5493 	      (MAX_REGISTER_RETURN * UNITS_PER_WORD)));
5494 }
5495 
5496 /* Create the built-in types and functions */
5497 
5498 enum spu_function_code
5499 {
5500 #define DEF_BUILTIN(fcode, icode, name, type, params) fcode,
5501 #include "spu-builtins.def"
5502 #undef DEF_BUILTIN
5503    NUM_SPU_BUILTINS
5504 };
5505 
5506 extern GTY(()) struct spu_builtin_description spu_builtins[NUM_SPU_BUILTINS];
5507 
5508 struct spu_builtin_description spu_builtins[] = {
5509 #define DEF_BUILTIN(fcode, icode, name, type, params) \
5510   {fcode, icode, name, type, params},
5511 #include "spu-builtins.def"
5512 #undef DEF_BUILTIN
5513 };
5514 
5515 static GTY(()) tree spu_builtin_decls[NUM_SPU_BUILTINS];
5516 
5517 /* Returns the spu builtin decl for CODE.  */
5518 
5519 static tree
spu_builtin_decl(unsigned code,bool initialize_p ATTRIBUTE_UNUSED)5520 spu_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
5521 {
5522   if (code >= NUM_SPU_BUILTINS)
5523     return error_mark_node;
5524 
5525   return spu_builtin_decls[code];
5526 }
5527 
5528 
5529 static void
spu_init_builtins(void)5530 spu_init_builtins (void)
5531 {
5532   struct spu_builtin_description *d;
5533   unsigned int i;
5534 
5535   V16QI_type_node = build_vector_type (intQI_type_node, 16);
5536   V8HI_type_node = build_vector_type (intHI_type_node, 8);
5537   V4SI_type_node = build_vector_type (intSI_type_node, 4);
5538   V2DI_type_node = build_vector_type (intDI_type_node, 2);
5539   V4SF_type_node = build_vector_type (float_type_node, 4);
5540   V2DF_type_node = build_vector_type (double_type_node, 2);
5541 
5542   unsigned_V16QI_type_node = build_vector_type (unsigned_intQI_type_node, 16);
5543   unsigned_V8HI_type_node = build_vector_type (unsigned_intHI_type_node, 8);
5544   unsigned_V4SI_type_node = build_vector_type (unsigned_intSI_type_node, 4);
5545   unsigned_V2DI_type_node = build_vector_type (unsigned_intDI_type_node, 2);
5546 
5547   spu_builtin_types[SPU_BTI_QUADWORD] = V16QI_type_node;
5548 
5549   spu_builtin_types[SPU_BTI_7] = global_trees[TI_INTSI_TYPE];
5550   spu_builtin_types[SPU_BTI_S7] = global_trees[TI_INTSI_TYPE];
5551   spu_builtin_types[SPU_BTI_U7] = global_trees[TI_INTSI_TYPE];
5552   spu_builtin_types[SPU_BTI_S10] = global_trees[TI_INTSI_TYPE];
5553   spu_builtin_types[SPU_BTI_S10_4] = global_trees[TI_INTSI_TYPE];
5554   spu_builtin_types[SPU_BTI_U14] = global_trees[TI_INTSI_TYPE];
5555   spu_builtin_types[SPU_BTI_16] = global_trees[TI_INTSI_TYPE];
5556   spu_builtin_types[SPU_BTI_S16] = global_trees[TI_INTSI_TYPE];
5557   spu_builtin_types[SPU_BTI_S16_2] = global_trees[TI_INTSI_TYPE];
5558   spu_builtin_types[SPU_BTI_U16] = global_trees[TI_INTSI_TYPE];
5559   spu_builtin_types[SPU_BTI_U16_2] = global_trees[TI_INTSI_TYPE];
5560   spu_builtin_types[SPU_BTI_U18] = global_trees[TI_INTSI_TYPE];
5561 
5562   spu_builtin_types[SPU_BTI_INTQI] = global_trees[TI_INTQI_TYPE];
5563   spu_builtin_types[SPU_BTI_INTHI] = global_trees[TI_INTHI_TYPE];
5564   spu_builtin_types[SPU_BTI_INTSI] = global_trees[TI_INTSI_TYPE];
5565   spu_builtin_types[SPU_BTI_INTDI] = global_trees[TI_INTDI_TYPE];
5566   spu_builtin_types[SPU_BTI_UINTQI] = global_trees[TI_UINTQI_TYPE];
5567   spu_builtin_types[SPU_BTI_UINTHI] = global_trees[TI_UINTHI_TYPE];
5568   spu_builtin_types[SPU_BTI_UINTSI] = global_trees[TI_UINTSI_TYPE];
5569   spu_builtin_types[SPU_BTI_UINTDI] = global_trees[TI_UINTDI_TYPE];
5570 
5571   spu_builtin_types[SPU_BTI_FLOAT] = global_trees[TI_FLOAT_TYPE];
5572   spu_builtin_types[SPU_BTI_DOUBLE] = global_trees[TI_DOUBLE_TYPE];
5573 
5574   spu_builtin_types[SPU_BTI_VOID] = global_trees[TI_VOID_TYPE];
5575 
5576   spu_builtin_types[SPU_BTI_PTR] =
5577     build_pointer_type (build_qualified_type
5578 			(void_type_node,
5579 			 TYPE_QUAL_CONST | TYPE_QUAL_VOLATILE));
5580 
5581   /* For each builtin we build a new prototype.  The tree code will make
5582      sure nodes are shared. */
5583   for (i = 0, d = spu_builtins; i < NUM_SPU_BUILTINS; i++, d++)
5584     {
5585       tree p;
5586       char name[64];		/* build_function will make a copy. */
5587       int parm;
5588 
5589       if (d->name == 0)
5590 	continue;
5591 
5592       /* Find last parm.  */
5593       for (parm = 1; d->parm[parm] != SPU_BTI_END_OF_PARAMS; parm++)
5594 	;
5595 
5596       p = void_list_node;
5597       while (parm > 1)
5598 	p = tree_cons (NULL_TREE, spu_builtin_types[d->parm[--parm]], p);
5599 
5600       p = build_function_type (spu_builtin_types[d->parm[0]], p);
5601 
5602       sprintf (name, "__builtin_%s", d->name);
5603       spu_builtin_decls[i] =
5604 	add_builtin_function (name, p, i, BUILT_IN_MD, NULL, NULL_TREE);
5605       if (d->fcode == SPU_MASK_FOR_LOAD)
5606 	TREE_READONLY (spu_builtin_decls[i]) = 1;
5607 
5608       /* These builtins don't throw.  */
5609       TREE_NOTHROW (spu_builtin_decls[i]) = 1;
5610     }
5611 }
5612 
5613 void
spu_restore_stack_block(rtx op0 ATTRIBUTE_UNUSED,rtx op1)5614 spu_restore_stack_block (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
5615 {
5616   static unsigned char arr[16] =
5617     { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
5618 
5619   rtx temp = gen_reg_rtx (Pmode);
5620   rtx temp2 = gen_reg_rtx (V4SImode);
5621   rtx temp3 = gen_reg_rtx (V4SImode);
5622   rtx pat = gen_reg_rtx (TImode);
5623   rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
5624 
5625   emit_move_insn (pat, array_to_constant (TImode, arr));
5626 
5627   /* Restore the sp.  */
5628   emit_move_insn (temp, op1);
5629   emit_move_insn (temp2, gen_frame_mem (V4SImode, stack_pointer_rtx));
5630 
5631   /* Compute available stack size for sp.  */
5632   emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
5633   emit_insn (gen_shufb (temp3, temp, temp, pat));
5634 
5635   emit_insn (gen_addv4si3 (sp, sp, temp3));
5636   emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp2);
5637 }
5638 
5639 int
spu_safe_dma(HOST_WIDE_INT channel)5640 spu_safe_dma (HOST_WIDE_INT channel)
5641 {
5642   return TARGET_SAFE_DMA && channel >= 21 && channel <= 27;
5643 }
5644 
5645 void
spu_builtin_splats(rtx ops[])5646 spu_builtin_splats (rtx ops[])
5647 {
5648   machine_mode mode = GET_MODE (ops[0]);
5649   if (GET_CODE (ops[1]) == CONST_INT || GET_CODE (ops[1]) == CONST_DOUBLE)
5650     {
5651       unsigned char arr[16];
5652       constant_to_array (GET_MODE_INNER (mode), ops[1], arr);
5653       emit_move_insn (ops[0], array_to_constant (mode, arr));
5654     }
5655   else
5656     {
5657       rtx reg = gen_reg_rtx (TImode);
5658       rtx shuf;
5659       if (GET_CODE (ops[1]) != REG
5660 	  && GET_CODE (ops[1]) != SUBREG)
5661 	ops[1] = force_reg (GET_MODE_INNER (mode), ops[1]);
5662       switch (mode)
5663 	{
5664 	case E_V2DImode:
5665 	case E_V2DFmode:
5666 	  shuf =
5667 	    immed_double_const (0x0001020304050607ll, 0x1011121314151617ll,
5668 				TImode);
5669 	  break;
5670 	case E_V4SImode:
5671 	case E_V4SFmode:
5672 	  shuf =
5673 	    immed_double_const (0x0001020300010203ll, 0x0001020300010203ll,
5674 				TImode);
5675 	  break;
5676 	case E_V8HImode:
5677 	  shuf =
5678 	    immed_double_const (0x0203020302030203ll, 0x0203020302030203ll,
5679 				TImode);
5680 	  break;
5681 	case E_V16QImode:
5682 	  shuf =
5683 	    immed_double_const (0x0303030303030303ll, 0x0303030303030303ll,
5684 				TImode);
5685 	  break;
5686 	default:
5687 	  abort ();
5688 	}
5689       emit_move_insn (reg, shuf);
5690       emit_insn (gen_shufb (ops[0], ops[1], ops[1], reg));
5691     }
5692 }
5693 
5694 void
spu_builtin_extract(rtx ops[])5695 spu_builtin_extract (rtx ops[])
5696 {
5697   machine_mode mode;
5698   rtx rot, from, tmp;
5699 
5700   mode = GET_MODE (ops[1]);
5701 
5702   if (GET_CODE (ops[2]) == CONST_INT)
5703     {
5704       switch (mode)
5705 	{
5706 	case E_V16QImode:
5707 	  emit_insn (gen_vec_extractv16qiqi (ops[0], ops[1], ops[2]));
5708 	  break;
5709 	case E_V8HImode:
5710 	  emit_insn (gen_vec_extractv8hihi (ops[0], ops[1], ops[2]));
5711 	  break;
5712 	case E_V4SFmode:
5713 	  emit_insn (gen_vec_extractv4sfsf (ops[0], ops[1], ops[2]));
5714 	  break;
5715 	case E_V4SImode:
5716 	  emit_insn (gen_vec_extractv4sisi (ops[0], ops[1], ops[2]));
5717 	  break;
5718 	case E_V2DImode:
5719 	  emit_insn (gen_vec_extractv2didi (ops[0], ops[1], ops[2]));
5720 	  break;
5721 	case E_V2DFmode:
5722 	  emit_insn (gen_vec_extractv2dfdf (ops[0], ops[1], ops[2]));
5723 	  break;
5724 	default:
5725 	  abort ();
5726 	}
5727       return;
5728     }
5729 
5730   from = spu_gen_subreg (TImode, ops[1]);
5731   rot = gen_reg_rtx (TImode);
5732   tmp = gen_reg_rtx (SImode);
5733 
5734   switch (mode)
5735     {
5736     case E_V16QImode:
5737       emit_insn (gen_addsi3 (tmp, ops[2], GEN_INT (-3)));
5738       break;
5739     case E_V8HImode:
5740       emit_insn (gen_addsi3 (tmp, ops[2], ops[2]));
5741       emit_insn (gen_addsi3 (tmp, tmp, GEN_INT (-2)));
5742       break;
5743     case E_V4SFmode:
5744     case E_V4SImode:
5745       emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (2)));
5746       break;
5747     case E_V2DImode:
5748     case E_V2DFmode:
5749       emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (3)));
5750       break;
5751     default:
5752       abort ();
5753     }
5754   emit_insn (gen_rotqby_ti (rot, from, tmp));
5755 
5756   emit_insn (gen_spu_convert (ops[0], rot));
5757 }
5758 
5759 void
spu_builtin_insert(rtx ops[])5760 spu_builtin_insert (rtx ops[])
5761 {
5762   machine_mode mode = GET_MODE (ops[0]);
5763   machine_mode imode = GET_MODE_INNER (mode);
5764   rtx mask = gen_reg_rtx (TImode);
5765   rtx offset;
5766 
5767   if (GET_CODE (ops[3]) == CONST_INT)
5768     offset = GEN_INT (INTVAL (ops[3]) * GET_MODE_SIZE (imode));
5769   else
5770     {
5771       offset = gen_reg_rtx (SImode);
5772       emit_insn (gen_mulsi3
5773 		 (offset, ops[3], GEN_INT (GET_MODE_SIZE (imode))));
5774     }
5775   emit_insn (gen_cpat
5776 	     (mask, stack_pointer_rtx, offset,
5777 	      GEN_INT (GET_MODE_SIZE (imode))));
5778   emit_insn (gen_shufb (ops[0], ops[1], ops[2], mask));
5779 }
5780 
5781 void
spu_builtin_promote(rtx ops[])5782 spu_builtin_promote (rtx ops[])
5783 {
5784   machine_mode mode, imode;
5785   rtx rot, from, offset;
5786   HOST_WIDE_INT pos;
5787 
5788   mode = GET_MODE (ops[0]);
5789   imode = GET_MODE_INNER (mode);
5790 
5791   from = gen_reg_rtx (TImode);
5792   rot = spu_gen_subreg (TImode, ops[0]);
5793 
5794   emit_insn (gen_spu_convert (from, ops[1]));
5795 
5796   if (GET_CODE (ops[2]) == CONST_INT)
5797     {
5798       pos = -GET_MODE_SIZE (imode) * INTVAL (ops[2]);
5799       if (GET_MODE_SIZE (imode) < 4)
5800 	pos += 4 - GET_MODE_SIZE (imode);
5801       offset = GEN_INT (pos & 15);
5802     }
5803   else
5804     {
5805       offset = gen_reg_rtx (SImode);
5806       switch (mode)
5807 	{
5808 	case E_V16QImode:
5809 	  emit_insn (gen_subsi3 (offset, GEN_INT (3), ops[2]));
5810 	  break;
5811 	case E_V8HImode:
5812 	  emit_insn (gen_subsi3 (offset, GEN_INT (1), ops[2]));
5813 	  emit_insn (gen_addsi3 (offset, offset, offset));
5814 	  break;
5815 	case E_V4SFmode:
5816 	case E_V4SImode:
5817 	  emit_insn (gen_subsi3 (offset, GEN_INT (0), ops[2]));
5818 	  emit_insn (gen_ashlsi3 (offset, offset, GEN_INT (2)));
5819 	  break;
5820 	case E_V2DImode:
5821 	case E_V2DFmode:
5822 	  emit_insn (gen_ashlsi3 (offset, ops[2], GEN_INT (3)));
5823 	  break;
5824 	default:
5825 	  abort ();
5826 	}
5827     }
5828   emit_insn (gen_rotqby_ti (rot, from, offset));
5829 }
5830 
5831 static void
spu_trampoline_init(rtx m_tramp,tree fndecl,rtx cxt)5832 spu_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
5833 {
5834   rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
5835   rtx shuf = gen_reg_rtx (V4SImode);
5836   rtx insn = gen_reg_rtx (V4SImode);
5837   rtx shufc;
5838   rtx insnc;
5839   rtx mem;
5840 
5841   fnaddr = force_reg (SImode, fnaddr);
5842   cxt = force_reg (SImode, cxt);
5843 
5844   if (TARGET_LARGE_MEM)
5845     {
5846       rtx rotl = gen_reg_rtx (V4SImode);
5847       rtx mask = gen_reg_rtx (V4SImode);
5848       rtx bi = gen_reg_rtx (SImode);
5849       static unsigned char const shufa[16] = {
5850 	2, 3, 0, 1, 18, 19, 16, 17,
5851 	0, 1, 2, 3, 16, 17, 18, 19
5852       };
5853       static unsigned char const insna[16] = {
5854 	0x41, 0, 0, 79,
5855 	0x41, 0, 0, STATIC_CHAIN_REGNUM,
5856 	0x60, 0x80, 0, 79,
5857 	0x60, 0x80, 0, STATIC_CHAIN_REGNUM
5858       };
5859 
5860       shufc = force_reg (TImode, array_to_constant (TImode, shufa));
5861       insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
5862 
5863       emit_insn (gen_shufb (shuf, fnaddr, cxt, shufc));
5864       emit_insn (gen_vrotlv4si3 (rotl, shuf, spu_const (V4SImode, 7)));
5865       emit_insn (gen_movv4si (mask, spu_const (V4SImode, 0xffff << 7)));
5866       emit_insn (gen_selb (insn, insnc, rotl, mask));
5867 
5868       mem = adjust_address (m_tramp, V4SImode, 0);
5869       emit_move_insn (mem, insn);
5870 
5871       emit_move_insn (bi, GEN_INT (0x35000000 + (79 << 7)));
5872       mem = adjust_address (m_tramp, Pmode, 16);
5873       emit_move_insn (mem, bi);
5874     }
5875   else
5876     {
5877       rtx scxt = gen_reg_rtx (SImode);
5878       rtx sfnaddr = gen_reg_rtx (SImode);
5879       static unsigned char const insna[16] = {
5880 	0x42, 0, 0, STATIC_CHAIN_REGNUM,
5881 	0x30, 0, 0, 0,
5882 	0, 0, 0, 0,
5883 	0, 0, 0, 0
5884       };
5885 
5886       shufc = gen_reg_rtx (TImode);
5887       insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
5888 
5889       /* By or'ing all of cxt with the ila opcode we are assuming cxt
5890 	 fits 18 bits and the last 4 are zeros.  This will be true if
5891 	 the stack pointer is initialized to 0x3fff0 at program start,
5892 	 otherwise the ila instruction will be garbage. */
5893 
5894       emit_insn (gen_ashlsi3 (scxt, cxt, GEN_INT (7)));
5895       emit_insn (gen_ashlsi3 (sfnaddr, fnaddr, GEN_INT (5)));
5896       emit_insn (gen_cpat
5897 		 (shufc, stack_pointer_rtx, GEN_INT (4), GEN_INT (4)));
5898       emit_insn (gen_shufb (shuf, sfnaddr, scxt, shufc));
5899       emit_insn (gen_iorv4si3 (insn, insnc, shuf));
5900 
5901       mem = adjust_address (m_tramp, V4SImode, 0);
5902       emit_move_insn (mem, insn);
5903     }
5904   emit_insn (gen_sync ());
5905 }
5906 
5907 static bool
spu_warn_func_return(tree decl)5908 spu_warn_func_return (tree decl)
5909 {
5910   /* Naked functions are implemented entirely in assembly, including the
5911      return sequence, so suppress warnings about this.  */
5912   return !spu_naked_function_p (decl);
5913 }
5914 
5915 void
spu_expand_sign_extend(rtx ops[])5916 spu_expand_sign_extend (rtx ops[])
5917 {
5918   unsigned char arr[16];
5919   rtx pat = gen_reg_rtx (TImode);
5920   rtx sign, c;
5921   int i, last;
5922   last = GET_MODE (ops[0]) == DImode ? 7 : 15;
5923   if (GET_MODE (ops[1]) == QImode)
5924     {
5925       sign = gen_reg_rtx (HImode);
5926       emit_insn (gen_extendqihi2 (sign, ops[1]));
5927       for (i = 0; i < 16; i++)
5928 	arr[i] = 0x12;
5929       arr[last] = 0x13;
5930     }
5931   else
5932     {
5933       for (i = 0; i < 16; i++)
5934 	arr[i] = 0x10;
5935       switch (GET_MODE (ops[1]))
5936 	{
5937 	case E_HImode:
5938 	  sign = gen_reg_rtx (SImode);
5939 	  emit_insn (gen_extendhisi2 (sign, ops[1]));
5940 	  arr[last] = 0x03;
5941 	  arr[last - 1] = 0x02;
5942 	  break;
5943 	case E_SImode:
5944 	  sign = gen_reg_rtx (SImode);
5945 	  emit_insn (gen_ashrsi3 (sign, ops[1], GEN_INT (31)));
5946 	  for (i = 0; i < 4; i++)
5947 	    arr[last - i] = 3 - i;
5948 	  break;
5949 	case E_DImode:
5950 	  sign = gen_reg_rtx (SImode);
5951 	  c = gen_reg_rtx (SImode);
5952 	  emit_insn (gen_spu_convert (c, ops[1]));
5953 	  emit_insn (gen_ashrsi3 (sign, c, GEN_INT (31)));
5954 	  for (i = 0; i < 8; i++)
5955 	    arr[last - i] = 7 - i;
5956 	  break;
5957 	default:
5958 	  abort ();
5959 	}
5960     }
5961   emit_move_insn (pat, array_to_constant (TImode, arr));
5962   emit_insn (gen_shufb (ops[0], ops[1], sign, pat));
5963 }
5964 
5965 /* expand vector initialization. If there are any constant parts,
5966    load constant parts first. Then load any non-constant parts.  */
5967 void
spu_expand_vector_init(rtx target,rtx vals)5968 spu_expand_vector_init (rtx target, rtx vals)
5969 {
5970   machine_mode mode = GET_MODE (target);
5971   int n_elts = GET_MODE_NUNITS (mode);
5972   int n_var = 0;
5973   bool all_same = true;
5974   rtx first, x = NULL_RTX, first_constant = NULL_RTX;
5975   int i;
5976 
5977   first = XVECEXP (vals, 0, 0);
5978   for (i = 0; i < n_elts; ++i)
5979     {
5980       x = XVECEXP (vals, 0, i);
5981       if (!(CONST_INT_P (x)
5982 	    || GET_CODE (x) == CONST_DOUBLE
5983 	    || GET_CODE (x) == CONST_FIXED))
5984 	++n_var;
5985       else
5986 	{
5987 	  if (first_constant == NULL_RTX)
5988 	    first_constant = x;
5989 	}
5990       if (i > 0 && !rtx_equal_p (x, first))
5991 	all_same = false;
5992     }
5993 
5994   /* if all elements are the same, use splats to repeat elements */
5995   if (all_same)
5996     {
5997       if (!CONSTANT_P (first)
5998 	  && !register_operand (first, GET_MODE (x)))
5999 	first = force_reg (GET_MODE (first), first);
6000       emit_insn (gen_spu_splats (target, first));
6001       return;
6002     }
6003 
6004   /* load constant parts */
6005   if (n_var != n_elts)
6006     {
6007       if (n_var == 0)
6008 	{
6009 	  emit_move_insn (target,
6010 			  gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
6011 	}
6012       else
6013 	{
6014 	  rtx constant_parts_rtx = copy_rtx (vals);
6015 
6016 	  gcc_assert (first_constant != NULL_RTX);
6017 	  /* fill empty slots with the first constant, this increases
6018 	     our chance of using splats in the recursive call below. */
6019 	  for (i = 0; i < n_elts; ++i)
6020 	    {
6021 	      x = XVECEXP (constant_parts_rtx, 0, i);
6022 	      if (!(CONST_INT_P (x)
6023 		    || GET_CODE (x) == CONST_DOUBLE
6024 		    || GET_CODE (x) == CONST_FIXED))
6025 		XVECEXP (constant_parts_rtx, 0, i) = first_constant;
6026 	    }
6027 
6028 	  spu_expand_vector_init (target, constant_parts_rtx);
6029 	}
6030     }
6031 
6032   /* load variable parts */
6033   if (n_var != 0)
6034     {
6035       rtx insert_operands[4];
6036 
6037       insert_operands[0] = target;
6038       insert_operands[2] = target;
6039       for (i = 0; i < n_elts; ++i)
6040 	{
6041 	  x = XVECEXP (vals, 0, i);
6042 	  if (!(CONST_INT_P (x)
6043 		|| GET_CODE (x) == CONST_DOUBLE
6044 		|| GET_CODE (x) == CONST_FIXED))
6045 	    {
6046 	      if (!register_operand (x, GET_MODE (x)))
6047 		x = force_reg (GET_MODE (x), x);
6048 	      insert_operands[1] = x;
6049 	      insert_operands[3] = GEN_INT (i);
6050 	      spu_builtin_insert (insert_operands);
6051 	    }
6052 	}
6053     }
6054 }
6055 
6056 /* Return insn index for the vector compare instruction for given CODE,
6057    and DEST_MODE, OP_MODE. Return -1 if valid insn is not available.  */
6058 
6059 static int
get_vec_cmp_insn(enum rtx_code code,machine_mode dest_mode,machine_mode op_mode)6060 get_vec_cmp_insn (enum rtx_code code,
6061                   machine_mode dest_mode,
6062                   machine_mode op_mode)
6063 
6064 {
6065   switch (code)
6066     {
6067     case EQ:
6068       if (dest_mode == V16QImode && op_mode == V16QImode)
6069         return CODE_FOR_ceq_v16qi;
6070       if (dest_mode == V8HImode && op_mode == V8HImode)
6071         return CODE_FOR_ceq_v8hi;
6072       if (dest_mode == V4SImode && op_mode == V4SImode)
6073         return CODE_FOR_ceq_v4si;
6074       if (dest_mode == V4SImode && op_mode == V4SFmode)
6075         return CODE_FOR_ceq_v4sf;
6076       if (dest_mode == V2DImode && op_mode == V2DFmode)
6077         return CODE_FOR_ceq_v2df;
6078       break;
6079     case GT:
6080       if (dest_mode == V16QImode && op_mode == V16QImode)
6081         return CODE_FOR_cgt_v16qi;
6082       if (dest_mode == V8HImode && op_mode == V8HImode)
6083         return CODE_FOR_cgt_v8hi;
6084       if (dest_mode == V4SImode && op_mode == V4SImode)
6085         return CODE_FOR_cgt_v4si;
6086       if (dest_mode == V4SImode && op_mode == V4SFmode)
6087         return CODE_FOR_cgt_v4sf;
6088       if (dest_mode == V2DImode && op_mode == V2DFmode)
6089         return CODE_FOR_cgt_v2df;
6090       break;
6091     case GTU:
6092       if (dest_mode == V16QImode && op_mode == V16QImode)
6093         return CODE_FOR_clgt_v16qi;
6094       if (dest_mode == V8HImode && op_mode == V8HImode)
6095         return CODE_FOR_clgt_v8hi;
6096       if (dest_mode == V4SImode && op_mode == V4SImode)
6097         return CODE_FOR_clgt_v4si;
6098       break;
6099     default:
6100       break;
6101     }
6102   return -1;
6103 }
6104 
6105 /* Emit vector compare for operands OP0 and OP1 using code RCODE.
6106    DMODE is expected destination mode. This is a recursive function.  */
6107 
6108 static rtx
spu_emit_vector_compare(enum rtx_code rcode,rtx op0,rtx op1,machine_mode dmode)6109 spu_emit_vector_compare (enum rtx_code rcode,
6110                          rtx op0, rtx op1,
6111                          machine_mode dmode)
6112 {
6113   int vec_cmp_insn;
6114   rtx mask;
6115   machine_mode dest_mode;
6116   machine_mode op_mode = GET_MODE (op1);
6117 
6118   gcc_assert (GET_MODE (op0) == GET_MODE (op1));
6119 
6120   /* Floating point vector compare instructions uses destination V4SImode.
6121      Double floating point vector compare instructions uses destination V2DImode.
6122      Move destination to appropriate mode later.  */
6123   if (dmode == V4SFmode)
6124     dest_mode = V4SImode;
6125   else if (dmode == V2DFmode)
6126     dest_mode = V2DImode;
6127   else
6128     dest_mode = dmode;
6129 
6130   mask = gen_reg_rtx (dest_mode);
6131   vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
6132 
6133   if (vec_cmp_insn == -1)
6134     {
6135       bool swap_operands = false;
6136       bool try_again = false;
6137       switch (rcode)
6138         {
6139         case LT:
6140           rcode = GT;
6141           swap_operands = true;
6142           try_again = true;
6143           break;
6144         case LTU:
6145           rcode = GTU;
6146           swap_operands = true;
6147           try_again = true;
6148           break;
6149         case NE:
6150 	case UNEQ:
6151 	case UNLE:
6152 	case UNLT:
6153 	case UNGE:
6154 	case UNGT:
6155 	case UNORDERED:
6156           /* Treat A != B as ~(A==B).  */
6157           {
6158 	    enum rtx_code rev_code;
6159             enum insn_code nor_code;
6160 	    rtx rev_mask;
6161 
6162 	    rev_code = reverse_condition_maybe_unordered (rcode);
6163             rev_mask = spu_emit_vector_compare (rev_code, op0, op1, dest_mode);
6164 
6165             nor_code = optab_handler (one_cmpl_optab, dest_mode);
6166             gcc_assert (nor_code != CODE_FOR_nothing);
6167             emit_insn (GEN_FCN (nor_code) (mask, rev_mask));
6168             if (dmode != dest_mode)
6169               {
6170                 rtx temp = gen_reg_rtx (dest_mode);
6171                 convert_move (temp, mask, 0);
6172                 return temp;
6173               }
6174             return mask;
6175           }
6176           break;
6177         case GE:
6178         case GEU:
6179         case LE:
6180         case LEU:
6181           /* Try GT/GTU/LT/LTU OR EQ */
6182           {
6183             rtx c_rtx, eq_rtx;
6184             enum insn_code ior_code;
6185             enum rtx_code new_code;
6186 
6187             switch (rcode)
6188               {
6189               case GE:  new_code = GT;  break;
6190               case GEU: new_code = GTU; break;
6191               case LE:  new_code = LT;  break;
6192               case LEU: new_code = LTU; break;
6193               default:
6194                 gcc_unreachable ();
6195               }
6196 
6197             c_rtx = spu_emit_vector_compare (new_code, op0, op1, dest_mode);
6198             eq_rtx = spu_emit_vector_compare (EQ, op0, op1, dest_mode);
6199 
6200             ior_code = optab_handler (ior_optab, dest_mode);
6201             gcc_assert (ior_code != CODE_FOR_nothing);
6202             emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
6203             if (dmode != dest_mode)
6204               {
6205                 rtx temp = gen_reg_rtx (dest_mode);
6206                 convert_move (temp, mask, 0);
6207                 return temp;
6208               }
6209             return mask;
6210           }
6211           break;
6212         case LTGT:
6213           /* Try LT OR GT */
6214           {
6215             rtx lt_rtx, gt_rtx;
6216             enum insn_code ior_code;
6217 
6218             lt_rtx = spu_emit_vector_compare (LT, op0, op1, dest_mode);
6219             gt_rtx = spu_emit_vector_compare (GT, op0, op1, dest_mode);
6220 
6221             ior_code = optab_handler (ior_optab, dest_mode);
6222             gcc_assert (ior_code != CODE_FOR_nothing);
6223             emit_insn (GEN_FCN (ior_code) (mask, lt_rtx, gt_rtx));
6224             if (dmode != dest_mode)
6225               {
6226                 rtx temp = gen_reg_rtx (dest_mode);
6227                 convert_move (temp, mask, 0);
6228                 return temp;
6229               }
6230             return mask;
6231           }
6232           break;
6233         case ORDERED:
6234           /* Implement as (A==A) & (B==B) */
6235           {
6236             rtx a_rtx, b_rtx;
6237             enum insn_code and_code;
6238 
6239             a_rtx = spu_emit_vector_compare (EQ, op0, op0, dest_mode);
6240             b_rtx = spu_emit_vector_compare (EQ, op1, op1, dest_mode);
6241 
6242             and_code = optab_handler (and_optab, dest_mode);
6243             gcc_assert (and_code != CODE_FOR_nothing);
6244             emit_insn (GEN_FCN (and_code) (mask, a_rtx, b_rtx));
6245             if (dmode != dest_mode)
6246               {
6247                 rtx temp = gen_reg_rtx (dest_mode);
6248                 convert_move (temp, mask, 0);
6249                 return temp;
6250               }
6251             return mask;
6252           }
6253           break;
6254         default:
6255           gcc_unreachable ();
6256         }
6257 
6258       /* You only get two chances.  */
6259       if (try_again)
6260           vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
6261 
6262       gcc_assert (vec_cmp_insn != -1);
6263 
6264       if (swap_operands)
6265         {
6266           rtx tmp;
6267           tmp = op0;
6268           op0 = op1;
6269           op1 = tmp;
6270         }
6271     }
6272 
6273   emit_insn (GEN_FCN (vec_cmp_insn) (mask, op0, op1));
6274   if (dmode != dest_mode)
6275     {
6276       rtx temp = gen_reg_rtx (dest_mode);
6277       convert_move (temp, mask, 0);
6278       return temp;
6279     }
6280   return mask;
6281 }
6282 
6283 
6284 /* Emit vector conditional expression.
6285    DEST is destination. OP1 and OP2 are two VEC_COND_EXPR operands.
6286    CC_OP0 and CC_OP1 are the two operands for the relation operation COND.  */
6287 
6288 int
spu_emit_vector_cond_expr(rtx dest,rtx op1,rtx op2,rtx cond,rtx cc_op0,rtx cc_op1)6289 spu_emit_vector_cond_expr (rtx dest, rtx op1, rtx op2,
6290                            rtx cond, rtx cc_op0, rtx cc_op1)
6291 {
6292   machine_mode dest_mode = GET_MODE (dest);
6293   enum rtx_code rcode = GET_CODE (cond);
6294   rtx mask;
6295 
6296   /* Get the vector mask for the given relational operations.  */
6297   mask = spu_emit_vector_compare (rcode, cc_op0, cc_op1, dest_mode);
6298 
6299   emit_insn(gen_selb (dest, op2, op1, mask));
6300 
6301   return 1;
6302 }
6303 
6304 static rtx
spu_force_reg(machine_mode mode,rtx op)6305 spu_force_reg (machine_mode mode, rtx op)
6306 {
6307   rtx x, r;
6308   if (GET_MODE (op) == VOIDmode || GET_MODE (op) == BLKmode)
6309     {
6310       if ((SCALAR_INT_MODE_P (mode) && GET_CODE (op) == CONST_INT)
6311 	  || GET_MODE (op) == BLKmode)
6312 	return force_reg (mode, convert_to_mode (mode, op, 0));
6313       abort ();
6314     }
6315 
6316   r = force_reg (GET_MODE (op), op);
6317   if (GET_MODE_SIZE (GET_MODE (op)) == GET_MODE_SIZE (mode))
6318     {
6319       x = simplify_gen_subreg (mode, r, GET_MODE (op), 0);
6320       if (x)
6321 	return x;
6322     }
6323 
6324   x = gen_reg_rtx (mode);
6325   emit_insn (gen_spu_convert (x, r));
6326   return x;
6327 }
6328 
6329 static void
spu_check_builtin_parm(struct spu_builtin_description * d,rtx op,int p)6330 spu_check_builtin_parm (struct spu_builtin_description *d, rtx op, int p)
6331 {
6332   HOST_WIDE_INT v = 0;
6333   int lsbits;
6334   /* Check the range of immediate operands. */
6335   if (p >= SPU_BTI_7 && p <= SPU_BTI_U18)
6336     {
6337       int range = p - SPU_BTI_7;
6338 
6339       if (!CONSTANT_P (op))
6340 	error ("%s expects an integer literal in the range [%d, %d]",
6341 	       d->name,
6342 	       spu_builtin_range[range].low, spu_builtin_range[range].high);
6343 
6344       if (GET_CODE (op) == CONST
6345 	  && (GET_CODE (XEXP (op, 0)) == PLUS
6346 	      || GET_CODE (XEXP (op, 0)) == MINUS))
6347 	{
6348 	  v = INTVAL (XEXP (XEXP (op, 0), 1));
6349 	  op = XEXP (XEXP (op, 0), 0);
6350 	}
6351       else if (GET_CODE (op) == CONST_INT)
6352 	v = INTVAL (op);
6353       else if (GET_CODE (op) == CONST_VECTOR
6354 	       && GET_CODE (CONST_VECTOR_ELT (op, 0)) == CONST_INT)
6355 	v = INTVAL (CONST_VECTOR_ELT (op, 0));
6356 
6357       /* The default for v is 0 which is valid in every range. */
6358       if (v < spu_builtin_range[range].low
6359 	  || v > spu_builtin_range[range].high)
6360 	error ("%s expects an integer literal in the range [%d, %d]. (%wd)",
6361 	       d->name,
6362 	       spu_builtin_range[range].low, spu_builtin_range[range].high,
6363 	       v);
6364 
6365       switch (p)
6366 	{
6367 	case SPU_BTI_S10_4:
6368 	  lsbits = 4;
6369 	  break;
6370 	case SPU_BTI_U16_2:
6371 	  /* This is only used in lqa, and stqa.  Even though the insns
6372 	     encode 16 bits of the address (all but the 2 least
6373 	     significant), only 14 bits are used because it is masked to
6374 	     be 16 byte aligned. */
6375 	  lsbits = 4;
6376 	  break;
6377 	case SPU_BTI_S16_2:
6378 	  /* This is used for lqr and stqr. */
6379 	  lsbits = 2;
6380 	  break;
6381 	default:
6382 	  lsbits = 0;
6383 	}
6384 
6385       if (GET_CODE (op) == LABEL_REF
6386 	  || (GET_CODE (op) == SYMBOL_REF
6387 	      && SYMBOL_REF_FUNCTION_P (op))
6388 	  || (v & ((1 << lsbits) - 1)) != 0)
6389 	warning (0, "%d least significant bits of %s are ignored", lsbits,
6390 		 d->name);
6391     }
6392 }
6393 
6394 
6395 static int
expand_builtin_args(struct spu_builtin_description * d,tree exp,rtx target,rtx ops[])6396 expand_builtin_args (struct spu_builtin_description *d, tree exp,
6397 		     rtx target, rtx ops[])
6398 {
6399   enum insn_code icode = (enum insn_code) d->icode;
6400   int i = 0, a;
6401 
6402   /* Expand the arguments into rtl. */
6403 
6404   if (d->parm[0] != SPU_BTI_VOID)
6405     ops[i++] = target;
6406 
6407   for (a = 0; d->parm[a+1] != SPU_BTI_END_OF_PARAMS; i++, a++)
6408     {
6409       tree arg = CALL_EXPR_ARG (exp, a);
6410       if (arg == 0)
6411 	abort ();
6412       ops[i] = expand_expr (arg, NULL_RTX, VOIDmode, EXPAND_NORMAL);
6413     }
6414 
6415   gcc_assert (i == insn_data[icode].n_generator_args);
6416   return i;
6417 }
6418 
6419 static rtx
spu_expand_builtin_1(struct spu_builtin_description * d,tree exp,rtx target)6420 spu_expand_builtin_1 (struct spu_builtin_description *d,
6421 		      tree exp, rtx target)
6422 {
6423   rtx pat;
6424   rtx ops[8];
6425   enum insn_code icode = (enum insn_code) d->icode;
6426   machine_mode mode, tmode;
6427   int i, p;
6428   int n_operands;
6429   tree return_type;
6430 
6431   /* Set up ops[] with values from arglist. */
6432   n_operands = expand_builtin_args (d, exp, target, ops);
6433 
6434   /* Handle the target operand which must be operand 0. */
6435   i = 0;
6436   if (d->parm[0] != SPU_BTI_VOID)
6437     {
6438 
6439       /* We prefer the mode specified for the match_operand otherwise
6440          use the mode from the builtin function prototype. */
6441       tmode = insn_data[d->icode].operand[0].mode;
6442       if (tmode == VOIDmode)
6443 	tmode = TYPE_MODE (spu_builtin_types[d->parm[0]]);
6444 
6445       /* Try to use target because not using it can lead to extra copies
6446          and when we are using all of the registers extra copies leads
6447          to extra spills.  */
6448       if (target && GET_CODE (target) == REG && GET_MODE (target) == tmode)
6449 	ops[0] = target;
6450       else
6451 	target = ops[0] = gen_reg_rtx (tmode);
6452 
6453       if (!(*insn_data[icode].operand[0].predicate) (ops[0], tmode))
6454 	abort ();
6455 
6456       i++;
6457     }
6458 
6459   if (d->fcode == SPU_MASK_FOR_LOAD)
6460     {
6461       machine_mode mode = insn_data[icode].operand[1].mode;
6462       tree arg;
6463       rtx addr, op, pat;
6464 
6465       /* get addr */
6466       arg = CALL_EXPR_ARG (exp, 0);
6467       gcc_assert (POINTER_TYPE_P (TREE_TYPE (arg)));
6468       op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL);
6469       addr = memory_address (mode, op);
6470 
6471       /* negate addr */
6472       op = gen_reg_rtx (GET_MODE (addr));
6473       emit_insn (gen_rtx_SET (op, gen_rtx_NEG (GET_MODE (addr), addr)));
6474       op = gen_rtx_MEM (mode, op);
6475 
6476       pat = GEN_FCN (icode) (target, op);
6477       if (!pat)
6478         return 0;
6479       emit_insn (pat);
6480       return target;
6481     }
6482 
6483   /* Ignore align_hint, but still expand it's args in case they have
6484      side effects. */
6485   if (icode == CODE_FOR_spu_align_hint)
6486     return 0;
6487 
6488   /* Handle the rest of the operands. */
6489   for (p = 1; i < n_operands; i++, p++)
6490     {
6491       if (insn_data[d->icode].operand[i].mode != VOIDmode)
6492 	mode = insn_data[d->icode].operand[i].mode;
6493       else
6494 	mode = TYPE_MODE (spu_builtin_types[d->parm[i]]);
6495 
6496       /* mode can be VOIDmode here for labels */
6497 
6498       /* For specific intrinsics with an immediate operand, e.g.,
6499          si_ai(), we sometimes need to convert the scalar argument to a
6500          vector argument by splatting the scalar. */
6501       if (VECTOR_MODE_P (mode)
6502 	  && (GET_CODE (ops[i]) == CONST_INT
6503 	      || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_INT
6504 	      || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_FLOAT))
6505 	{
6506 	  if (GET_CODE (ops[i]) == CONST_INT)
6507 	    ops[i] = spu_const (mode, INTVAL (ops[i]));
6508 	  else
6509 	    {
6510 	      rtx reg = gen_reg_rtx (mode);
6511 	      machine_mode imode = GET_MODE_INNER (mode);
6512 	      if (!spu_nonmem_operand (ops[i], GET_MODE (ops[i])))
6513 		ops[i] = force_reg (GET_MODE (ops[i]), ops[i]);
6514 	      if (imode != GET_MODE (ops[i]))
6515 		ops[i] = convert_to_mode (imode, ops[i],
6516 					  TYPE_UNSIGNED (spu_builtin_types
6517 							 [d->parm[i]]));
6518 	      emit_insn (gen_spu_splats (reg, ops[i]));
6519 	      ops[i] = reg;
6520 	    }
6521 	}
6522 
6523       spu_check_builtin_parm (d, ops[i], d->parm[p]);
6524 
6525       if (!(*insn_data[icode].operand[i].predicate) (ops[i], mode))
6526 	ops[i] = spu_force_reg (mode, ops[i]);
6527     }
6528 
6529   switch (n_operands)
6530     {
6531     case 0:
6532       pat = GEN_FCN (icode) (0);
6533       break;
6534     case 1:
6535       pat = GEN_FCN (icode) (ops[0]);
6536       break;
6537     case 2:
6538       pat = GEN_FCN (icode) (ops[0], ops[1]);
6539       break;
6540     case 3:
6541       pat = GEN_FCN (icode) (ops[0], ops[1], ops[2]);
6542       break;
6543     case 4:
6544       pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3]);
6545       break;
6546     case 5:
6547       pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4]);
6548       break;
6549     case 6:
6550       pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4], ops[5]);
6551       break;
6552     default:
6553       abort ();
6554     }
6555 
6556   if (!pat)
6557     abort ();
6558 
6559   if (d->type == B_CALL || d->type == B_BISLED)
6560     emit_call_insn (pat);
6561   else if (d->type == B_JUMP)
6562     {
6563       emit_jump_insn (pat);
6564       emit_barrier ();
6565     }
6566   else
6567     emit_insn (pat);
6568 
6569   return_type = spu_builtin_types[d->parm[0]];
6570   if (d->parm[0] != SPU_BTI_VOID
6571       && GET_MODE (target) != TYPE_MODE (return_type))
6572     {
6573       /* target is the return value.  It should always be the mode of
6574          the builtin function prototype. */
6575       target = spu_force_reg (TYPE_MODE (return_type), target);
6576     }
6577 
6578   return target;
6579 }
6580 
6581 rtx
spu_expand_builtin(tree exp,rtx target,rtx subtarget ATTRIBUTE_UNUSED,machine_mode mode ATTRIBUTE_UNUSED,int ignore ATTRIBUTE_UNUSED)6582 spu_expand_builtin (tree exp,
6583 		    rtx target,
6584 		    rtx subtarget ATTRIBUTE_UNUSED,
6585 		    machine_mode mode ATTRIBUTE_UNUSED,
6586 		    int ignore ATTRIBUTE_UNUSED)
6587 {
6588   tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
6589   unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
6590   struct spu_builtin_description *d;
6591 
6592   if (fcode < NUM_SPU_BUILTINS)
6593     {
6594       d = &spu_builtins[fcode];
6595 
6596       return spu_expand_builtin_1 (d, exp, target);
6597     }
6598   abort ();
6599 }
6600 
6601 /* Implement targetm.vectorize.builtin_mask_for_load.  */
6602 static tree
spu_builtin_mask_for_load(void)6603 spu_builtin_mask_for_load (void)
6604 {
6605   return spu_builtin_decls[SPU_MASK_FOR_LOAD];
6606 }
6607 
6608 /* Implement targetm.vectorize.builtin_vectorization_cost.  */
6609 static int
spu_builtin_vectorization_cost(enum vect_cost_for_stmt type_of_cost,tree vectype,int misalign ATTRIBUTE_UNUSED)6610 spu_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
6611                                 tree vectype,
6612                                 int misalign ATTRIBUTE_UNUSED)
6613 {
6614   unsigned elements;
6615 
6616   switch (type_of_cost)
6617     {
6618       case scalar_stmt:
6619       case vector_stmt:
6620       case vector_load:
6621       case vector_store:
6622       case vec_to_scalar:
6623       case scalar_to_vec:
6624       case cond_branch_not_taken:
6625       case vec_perm:
6626       case vec_promote_demote:
6627         return 1;
6628 
6629       case scalar_store:
6630         return 10;
6631 
6632       case scalar_load:
6633         /* Load + rotate.  */
6634         return 2;
6635 
6636       case unaligned_load:
6637       case vector_gather_load:
6638       case vector_scatter_store:
6639         return 2;
6640 
6641       case cond_branch_taken:
6642         return 6;
6643 
6644       case vec_construct:
6645 	elements = TYPE_VECTOR_SUBPARTS (vectype);
6646 	return elements / 2 + 1;
6647 
6648       default:
6649         gcc_unreachable ();
6650     }
6651 }
6652 
6653 /* Implement targetm.vectorize.init_cost.  */
6654 
6655 static void *
spu_init_cost(struct loop * loop_info ATTRIBUTE_UNUSED)6656 spu_init_cost (struct loop *loop_info ATTRIBUTE_UNUSED)
6657 {
6658   unsigned *cost = XNEWVEC (unsigned, 3);
6659   cost[vect_prologue] = cost[vect_body] = cost[vect_epilogue] = 0;
6660   return cost;
6661 }
6662 
6663 /* Implement targetm.vectorize.add_stmt_cost.  */
6664 
6665 static unsigned
spu_add_stmt_cost(void * data,int count,enum vect_cost_for_stmt kind,struct _stmt_vec_info * stmt_info,int misalign,enum vect_cost_model_location where)6666 spu_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
6667 		   struct _stmt_vec_info *stmt_info, int misalign,
6668 		   enum vect_cost_model_location where)
6669 {
6670   unsigned *cost = (unsigned *) data;
6671   unsigned retval = 0;
6672 
6673   if (flag_vect_cost_model)
6674     {
6675       tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
6676       int stmt_cost = spu_builtin_vectorization_cost (kind, vectype, misalign);
6677 
6678       /* Statements in an inner loop relative to the loop being
6679 	 vectorized are weighted more heavily.  The value here is
6680 	 arbitrary and could potentially be improved with analysis.  */
6681       if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
6682 	count *= 50;  /* FIXME.  */
6683 
6684       retval = (unsigned) (count * stmt_cost);
6685       cost[where] += retval;
6686     }
6687 
6688   return retval;
6689 }
6690 
6691 /* Implement targetm.vectorize.finish_cost.  */
6692 
6693 static void
spu_finish_cost(void * data,unsigned * prologue_cost,unsigned * body_cost,unsigned * epilogue_cost)6694 spu_finish_cost (void *data, unsigned *prologue_cost,
6695 		 unsigned *body_cost, unsigned *epilogue_cost)
6696 {
6697   unsigned *cost = (unsigned *) data;
6698   *prologue_cost = cost[vect_prologue];
6699   *body_cost     = cost[vect_body];
6700   *epilogue_cost = cost[vect_epilogue];
6701 }
6702 
6703 /* Implement targetm.vectorize.destroy_cost_data.  */
6704 
6705 static void
spu_destroy_cost_data(void * data)6706 spu_destroy_cost_data (void *data)
6707 {
6708   free (data);
6709 }
6710 
6711 /* Return true iff, data reference of TYPE can reach vector alignment (16)
6712    after applying N number of iterations.  This routine does not determine
6713    how may iterations are required to reach desired alignment.  */
6714 
6715 static bool
spu_vector_alignment_reachable(const_tree type ATTRIBUTE_UNUSED,bool is_packed)6716 spu_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed)
6717 {
6718   if (is_packed)
6719     return false;
6720 
6721   /* All other types are naturally aligned.  */
6722   return true;
6723 }
6724 
6725 /* Return the appropriate mode for a named address pointer.  */
6726 static scalar_int_mode
spu_addr_space_pointer_mode(addr_space_t addrspace)6727 spu_addr_space_pointer_mode (addr_space_t addrspace)
6728 {
6729   switch (addrspace)
6730     {
6731     case ADDR_SPACE_GENERIC:
6732       return ptr_mode;
6733     case ADDR_SPACE_EA:
6734       return EAmode;
6735     default:
6736       gcc_unreachable ();
6737     }
6738 }
6739 
6740 /* Return the appropriate mode for a named address address.  */
6741 static scalar_int_mode
spu_addr_space_address_mode(addr_space_t addrspace)6742 spu_addr_space_address_mode (addr_space_t addrspace)
6743 {
6744   switch (addrspace)
6745     {
6746     case ADDR_SPACE_GENERIC:
6747       return Pmode;
6748     case ADDR_SPACE_EA:
6749       return EAmode;
6750     default:
6751       gcc_unreachable ();
6752     }
6753 }
6754 
6755 /* Determine if one named address space is a subset of another.  */
6756 
6757 static bool
spu_addr_space_subset_p(addr_space_t subset,addr_space_t superset)6758 spu_addr_space_subset_p (addr_space_t subset, addr_space_t superset)
6759 {
6760   gcc_assert (subset == ADDR_SPACE_GENERIC || subset == ADDR_SPACE_EA);
6761   gcc_assert (superset == ADDR_SPACE_GENERIC || superset == ADDR_SPACE_EA);
6762 
6763   if (subset == superset)
6764     return true;
6765 
6766   /* If we have -mno-address-space-conversion, treat __ea and generic as not
6767      being subsets but instead as disjoint address spaces.  */
6768   else if (!TARGET_ADDRESS_SPACE_CONVERSION)
6769     return false;
6770 
6771   else
6772     return (subset == ADDR_SPACE_GENERIC && superset == ADDR_SPACE_EA);
6773 }
6774 
6775 /* Convert from one address space to another.  */
6776 static rtx
spu_addr_space_convert(rtx op,tree from_type,tree to_type)6777 spu_addr_space_convert (rtx op, tree from_type, tree to_type)
6778 {
6779   addr_space_t from_as = TYPE_ADDR_SPACE (TREE_TYPE (from_type));
6780   addr_space_t to_as = TYPE_ADDR_SPACE (TREE_TYPE (to_type));
6781 
6782   gcc_assert (from_as == ADDR_SPACE_GENERIC || from_as == ADDR_SPACE_EA);
6783   gcc_assert (to_as == ADDR_SPACE_GENERIC || to_as == ADDR_SPACE_EA);
6784 
6785   if (to_as == ADDR_SPACE_GENERIC && from_as == ADDR_SPACE_EA)
6786     {
6787       rtx result, ls;
6788 
6789       ls = gen_const_mem (DImode,
6790 			  gen_rtx_SYMBOL_REF (Pmode, "__ea_local_store"));
6791       set_mem_align (ls, 128);
6792 
6793       result = gen_reg_rtx (Pmode);
6794       ls = force_reg (Pmode, convert_modes (Pmode, DImode, ls, 1));
6795       op = force_reg (Pmode, convert_modes (Pmode, EAmode, op, 1));
6796       ls = emit_conditional_move (ls, NE, op, const0_rtx, Pmode,
6797 					  ls, const0_rtx, Pmode, 1);
6798 
6799       emit_insn (gen_subsi3 (result, op, ls));
6800 
6801       return result;
6802     }
6803 
6804   else if (to_as == ADDR_SPACE_EA && from_as == ADDR_SPACE_GENERIC)
6805     {
6806       rtx result, ls;
6807 
6808       ls = gen_const_mem (DImode,
6809 			  gen_rtx_SYMBOL_REF (Pmode, "__ea_local_store"));
6810       set_mem_align (ls, 128);
6811 
6812       result = gen_reg_rtx (EAmode);
6813       ls = force_reg (EAmode, convert_modes (EAmode, DImode, ls, 1));
6814       op = force_reg (Pmode, op);
6815       ls = emit_conditional_move (ls, NE, op, const0_rtx, Pmode,
6816 					  ls, const0_rtx, EAmode, 1);
6817       op = force_reg (EAmode, convert_modes (EAmode, Pmode, op, 1));
6818 
6819       if (EAmode == SImode)
6820 	emit_insn (gen_addsi3 (result, op, ls));
6821       else
6822 	emit_insn (gen_adddi3 (result, op, ls));
6823 
6824       return result;
6825     }
6826 
6827   else
6828     gcc_unreachable ();
6829 }
6830 
6831 
6832 /* Count the total number of instructions in each pipe and return the
6833    maximum, which is used as the Minimum Iteration Interval (MII)
6834    in the modulo scheduler.  get_pipe() will return -2, -1, 0, or 1.
6835    -2 are instructions that can go in pipe0 or pipe1.  */
6836 static int
spu_sms_res_mii(struct ddg * g)6837 spu_sms_res_mii (struct ddg *g)
6838 {
6839   int i;
6840   unsigned t[4] = {0, 0, 0, 0};
6841 
6842   for (i = 0; i < g->num_nodes; i++)
6843     {
6844       rtx_insn *insn = g->nodes[i].insn;
6845       int p = get_pipe (insn) + 2;
6846 
6847       gcc_assert (p >= 0);
6848       gcc_assert (p < 4);
6849 
6850       t[p]++;
6851       if (dump_file && INSN_P (insn))
6852             fprintf (dump_file, "i%d %s %d %d\n",
6853                      INSN_UID (insn),
6854                      insn_data[INSN_CODE(insn)].name,
6855                      p, t[p]);
6856     }
6857   if (dump_file)
6858     fprintf (dump_file, "%d %d %d %d\n", t[0], t[1], t[2], t[3]);
6859 
6860   return MAX ((t[0] + t[2] + t[3] + 1) / 2, MAX (t[2], t[3]));
6861 }
6862 
6863 
6864 void
spu_init_expanders(void)6865 spu_init_expanders (void)
6866 {
6867   if (cfun)
6868     {
6869       rtx r0, r1;
6870       /* HARD_FRAME_REGISTER is only 128 bit aligned when
6871          frame_pointer_needed is true.  We don't know that until we're
6872          expanding the prologue. */
6873       REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = 8;
6874 
6875       /* A number of passes use LAST_VIRTUAL_REGISTER+1 and
6876 	 LAST_VIRTUAL_REGISTER+2 to test the back-end.  We want them
6877 	 to be treated as aligned, so generate them here. */
6878       r0 = gen_reg_rtx (SImode);
6879       r1 = gen_reg_rtx (SImode);
6880       mark_reg_pointer (r0, 128);
6881       mark_reg_pointer (r1, 128);
6882       gcc_assert (REGNO (r0) == LAST_VIRTUAL_REGISTER + 1
6883 		  && REGNO (r1) == LAST_VIRTUAL_REGISTER + 2);
6884     }
6885 }
6886 
6887 static scalar_int_mode
spu_libgcc_cmp_return_mode(void)6888 spu_libgcc_cmp_return_mode (void)
6889 {
6890 
6891 /* For SPU word mode is TI mode so it is better to use SImode
6892    for compare returns.  */
6893   return SImode;
6894 }
6895 
6896 static scalar_int_mode
spu_libgcc_shift_count_mode(void)6897 spu_libgcc_shift_count_mode (void)
6898 {
6899 /* For SPU word mode is TI mode so it is better to use SImode
6900    for shift counts.  */
6901   return SImode;
6902 }
6903 
6904 /* Implement targetm.section_type_flags.  */
6905 static unsigned int
spu_section_type_flags(tree decl,const char * name,int reloc)6906 spu_section_type_flags (tree decl, const char *name, int reloc)
6907 {
6908   /* .toe needs to have type @nobits.  */
6909   if (strcmp (name, ".toe") == 0)
6910     return SECTION_BSS;
6911   /* Don't load _ea into the current address space.  */
6912   if (strcmp (name, "._ea") == 0)
6913     return SECTION_WRITE | SECTION_DEBUG;
6914   return default_section_type_flags (decl, name, reloc);
6915 }
6916 
6917 /* Implement targetm.select_section.  */
6918 static section *
spu_select_section(tree decl,int reloc,unsigned HOST_WIDE_INT align)6919 spu_select_section (tree decl, int reloc, unsigned HOST_WIDE_INT align)
6920 {
6921   /* Variables and constants defined in the __ea address space
6922      go into a special section named "._ea".  */
6923   if (TREE_TYPE (decl) != error_mark_node
6924       && TYPE_ADDR_SPACE (TREE_TYPE (decl)) == ADDR_SPACE_EA)
6925     {
6926       /* We might get called with string constants, but get_named_section
6927 	 doesn't like them as they are not DECLs.  Also, we need to set
6928 	 flags in that case.  */
6929       if (!DECL_P (decl))
6930 	return get_section ("._ea", SECTION_WRITE | SECTION_DEBUG, NULL);
6931 
6932       return get_named_section (decl, "._ea", reloc);
6933     }
6934 
6935   return default_elf_select_section (decl, reloc, align);
6936 }
6937 
6938 /* Implement targetm.unique_section.  */
6939 static void
spu_unique_section(tree decl,int reloc)6940 spu_unique_section (tree decl, int reloc)
6941 {
6942   /* We don't support unique section names in the __ea address
6943      space for now.  */
6944   if (TREE_TYPE (decl) != error_mark_node
6945       && TYPE_ADDR_SPACE (TREE_TYPE (decl)) != 0)
6946     return;
6947 
6948   default_unique_section (decl, reloc);
6949 }
6950 
6951 /* Generate a constant or register which contains 2^SCALE.  We assume
6952    the result is valid for MODE.  Currently, MODE must be V4SFmode and
6953    SCALE must be SImode. */
6954 rtx
spu_gen_exp2(machine_mode mode,rtx scale)6955 spu_gen_exp2 (machine_mode mode, rtx scale)
6956 {
6957   gcc_assert (mode == V4SFmode);
6958   gcc_assert (GET_MODE (scale) == SImode || GET_CODE (scale) == CONST_INT);
6959   if (GET_CODE (scale) != CONST_INT)
6960     {
6961       /* unsigned int exp = (127 + scale) << 23;
6962 	__vector float m = (__vector float) spu_splats (exp); */
6963       rtx reg = force_reg (SImode, scale);
6964       rtx exp = gen_reg_rtx (SImode);
6965       rtx mul = gen_reg_rtx (mode);
6966       emit_insn (gen_addsi3 (exp, reg, GEN_INT (127)));
6967       emit_insn (gen_ashlsi3 (exp, exp, GEN_INT (23)));
6968       emit_insn (gen_spu_splats (mul, gen_rtx_SUBREG (GET_MODE_INNER (mode), exp, 0)));
6969       return mul;
6970     }
6971   else
6972     {
6973       HOST_WIDE_INT exp = 127 + INTVAL (scale);
6974       unsigned char arr[16];
6975       arr[0] = arr[4] = arr[8] = arr[12] = exp >> 1;
6976       arr[1] = arr[5] = arr[9] = arr[13] = exp << 7;
6977       arr[2] = arr[6] = arr[10] = arr[14] = 0;
6978       arr[3] = arr[7] = arr[11] = arr[15] = 0;
6979       return array_to_constant (mode, arr);
6980     }
6981 }
6982 
6983 /* After reload, just change the convert into a move instruction
6984    or a dead instruction. */
6985 void
spu_split_convert(rtx ops[])6986 spu_split_convert (rtx ops[])
6987 {
6988   if (REGNO (ops[0]) == REGNO (ops[1]))
6989     emit_note (NOTE_INSN_DELETED);
6990   else
6991     {
6992       /* Use TImode always as this might help hard reg copyprop.  */
6993       rtx op0 = gen_rtx_REG (TImode, REGNO (ops[0]));
6994       rtx op1 = gen_rtx_REG (TImode, REGNO (ops[1]));
6995       emit_insn (gen_move_insn (op0, op1));
6996     }
6997 }
6998 
6999 void
spu_function_profiler(FILE * file,int labelno ATTRIBUTE_UNUSED)7000 spu_function_profiler (FILE * file, int labelno ATTRIBUTE_UNUSED)
7001 {
7002   fprintf (file, "# profile\n");
7003   fprintf (file, "brsl $75,  _mcount\n");
7004 }
7005 
7006 /* Implement targetm.ref_may_alias_errno.  */
7007 static bool
spu_ref_may_alias_errno(ao_ref * ref)7008 spu_ref_may_alias_errno (ao_ref *ref)
7009 {
7010   tree base = ao_ref_base (ref);
7011 
7012   /* With SPU newlib, errno is defined as something like
7013          _impure_data._errno
7014      The default implementation of this target macro does not
7015      recognize such expressions, so special-code for it here.  */
7016 
7017   if (TREE_CODE (base) == VAR_DECL
7018       && !TREE_STATIC (base)
7019       && DECL_EXTERNAL (base)
7020       && TREE_CODE (TREE_TYPE (base)) == RECORD_TYPE
7021       && strcmp (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (base)),
7022 		 "_impure_data") == 0
7023       /* _errno is the first member of _impure_data.  */
7024       && ref->offset == 0)
7025     return true;
7026 
7027   return default_ref_may_alias_errno (ref);
7028 }
7029 
7030 /* Output thunk to FILE that implements a C++ virtual function call (with
7031    multiple inheritance) to FUNCTION.  The thunk adjusts the this pointer
7032    by DELTA, and unless VCALL_OFFSET is zero, applies an additional adjustment
7033    stored at VCALL_OFFSET in the vtable whose address is located at offset 0
7034    relative to the resulting this pointer.  */
7035 
7036 static void
spu_output_mi_thunk(FILE * file,tree thunk ATTRIBUTE_UNUSED,HOST_WIDE_INT delta,HOST_WIDE_INT vcall_offset,tree function)7037 spu_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
7038 		     HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
7039 		     tree function)
7040 {
7041   rtx op[8];
7042 
7043   /* Make sure unwind info is emitted for the thunk if needed.  */
7044   final_start_function (emit_barrier (), file, 1);
7045 
7046   /* Operand 0 is the target function.  */
7047   op[0] = XEXP (DECL_RTL (function), 0);
7048 
7049   /* Operand 1 is the 'this' pointer.  */
7050   if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
7051     op[1] = gen_rtx_REG (Pmode, FIRST_ARG_REGNUM + 1);
7052   else
7053     op[1] = gen_rtx_REG (Pmode, FIRST_ARG_REGNUM);
7054 
7055   /* Operands 2/3 are the low/high halfwords of delta.  */
7056   op[2] = GEN_INT (trunc_int_for_mode (delta, HImode));
7057   op[3] = GEN_INT (trunc_int_for_mode (delta >> 16, HImode));
7058 
7059   /* Operands 4/5 are the low/high halfwords of vcall_offset.  */
7060   op[4] = GEN_INT (trunc_int_for_mode (vcall_offset, HImode));
7061   op[5] = GEN_INT (trunc_int_for_mode (vcall_offset >> 16, HImode));
7062 
7063   /* Operands 6/7 are temporary registers.  */
7064   op[6] = gen_rtx_REG (Pmode, 79);
7065   op[7] = gen_rtx_REG (Pmode, 78);
7066 
7067   /* Add DELTA to this pointer.  */
7068   if (delta)
7069     {
7070       if (delta >= -0x200 && delta < 0x200)
7071 	output_asm_insn ("ai\t%1,%1,%2", op);
7072       else if (delta >= -0x8000 && delta < 0x8000)
7073 	{
7074 	  output_asm_insn ("il\t%6,%2", op);
7075 	  output_asm_insn ("a\t%1,%1,%6", op);
7076 	}
7077       else
7078 	{
7079 	  output_asm_insn ("ilhu\t%6,%3", op);
7080 	  output_asm_insn ("iohl\t%6,%2", op);
7081 	  output_asm_insn ("a\t%1,%1,%6", op);
7082 	}
7083     }
7084 
7085   /* Perform vcall adjustment.  */
7086   if (vcall_offset)
7087     {
7088       output_asm_insn ("lqd\t%7,0(%1)", op);
7089       output_asm_insn ("rotqby\t%7,%7,%1", op);
7090 
7091       if (vcall_offset >= -0x200 && vcall_offset < 0x200)
7092 	output_asm_insn ("ai\t%7,%7,%4", op);
7093       else if (vcall_offset >= -0x8000 && vcall_offset < 0x8000)
7094 	{
7095 	  output_asm_insn ("il\t%6,%4", op);
7096 	  output_asm_insn ("a\t%7,%7,%6", op);
7097 	}
7098       else
7099 	{
7100 	  output_asm_insn ("ilhu\t%6,%5", op);
7101 	  output_asm_insn ("iohl\t%6,%4", op);
7102 	  output_asm_insn ("a\t%7,%7,%6", op);
7103 	}
7104 
7105       output_asm_insn ("lqd\t%6,0(%7)", op);
7106       output_asm_insn ("rotqby\t%6,%6,%7", op);
7107       output_asm_insn ("a\t%1,%1,%6", op);
7108     }
7109 
7110   /* Jump to target.  */
7111   output_asm_insn ("br\t%0", op);
7112 
7113   final_end_function ();
7114 }
7115 
7116 /* Canonicalize a comparison from one we don't have to one we do have.  */
7117 static void
spu_canonicalize_comparison(int * code,rtx * op0,rtx * op1,bool op0_preserve_value)7118 spu_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
7119 			     bool op0_preserve_value)
7120 {
7121   if (!op0_preserve_value
7122       && (*code == LE || *code == LT || *code == LEU || *code == LTU))
7123     {
7124       rtx tem = *op0;
7125       *op0 = *op1;
7126       *op1 = tem;
7127       *code = (int)swap_condition ((enum rtx_code)*code);
7128     }
7129 }
7130 
7131 /* Expand an atomic fetch-and-operate pattern.  CODE is the binary operation
7132    to perform.  MEM is the memory on which to operate.  VAL is the second
7133    operand of the binary operator.  BEFORE and AFTER are optional locations to
7134    return the value of MEM either before of after the operation.  */
7135 void
spu_expand_atomic_op(enum rtx_code code,rtx mem,rtx val,rtx orig_before,rtx orig_after)7136 spu_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
7137 		      rtx orig_before, rtx orig_after)
7138 {
7139   machine_mode mode = GET_MODE (mem);
7140   rtx before = orig_before, after = orig_after;
7141 
7142   if (before == NULL_RTX)
7143     before = gen_reg_rtx (mode);
7144 
7145   emit_move_insn (before, mem);
7146 
7147   if (code == MULT)  /* NAND operation */
7148     {
7149       rtx x = expand_simple_binop (mode, AND, before, val,
7150 				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
7151       after = expand_simple_unop (mode, NOT, x, after, 1);
7152     }
7153   else
7154     {
7155       after = expand_simple_binop (mode, code, before, val,
7156 				   after, 1, OPTAB_LIB_WIDEN);
7157     }
7158 
7159   emit_move_insn (mem, after);
7160 
7161   if (orig_after && after != orig_after)
7162     emit_move_insn (orig_after, after);
7163 }
7164 
7165 /* Implement TARGET_MODES_TIEABLE_P.  */
7166 
7167 static bool
spu_modes_tieable_p(machine_mode mode1,machine_mode mode2)7168 spu_modes_tieable_p (machine_mode mode1, machine_mode mode2)
7169 {
7170   return (GET_MODE_BITSIZE (mode1) <= MAX_FIXED_MODE_SIZE
7171 	  && GET_MODE_BITSIZE (mode2) <= MAX_FIXED_MODE_SIZE);
7172 }
7173 
7174 /* Implement TARGET_CAN_CHANGE_MODE_CLASS.  GCC assumes that modes are
7175    in the lowpart of a register, which is only true for SPU.  */
7176 
7177 static bool
spu_can_change_mode_class(machine_mode from,machine_mode to,reg_class_t)7178 spu_can_change_mode_class (machine_mode from, machine_mode to, reg_class_t)
7179 {
7180   return (GET_MODE_SIZE (from) == GET_MODE_SIZE (to)
7181 	  || (GET_MODE_SIZE (from) <= 4 && GET_MODE_SIZE (to) <= 4)
7182 	  || (GET_MODE_SIZE (from) >= 16 && GET_MODE_SIZE (to) >= 16));
7183 }
7184 
7185 /* Implement TARGET_TRULY_NOOP_TRUNCATION.  */
7186 
7187 static bool
spu_truly_noop_truncation(poly_uint64 outprec,poly_uint64 inprec)7188 spu_truly_noop_truncation (poly_uint64 outprec, poly_uint64 inprec)
7189 {
7190   return inprec <= 32 && outprec <= inprec;
7191 }
7192 
7193 /* Implement TARGET_STATIC_RTX_ALIGNMENT.
7194 
7195    Make all static objects 16-byte aligned.  This allows us to assume
7196    they are also padded to 16 bytes, which means we can use a single
7197    load or store instruction to access them.  */
7198 
7199 static HOST_WIDE_INT
spu_static_rtx_alignment(machine_mode mode)7200 spu_static_rtx_alignment (machine_mode mode)
7201 {
7202   return MAX (GET_MODE_ALIGNMENT (mode), 128);
7203 }
7204 
7205 /* Implement TARGET_CONSTANT_ALIGNMENT.
7206 
7207    Make all static objects 16-byte aligned.  This allows us to assume
7208    they are also padded to 16 bytes, which means we can use a single
7209    load or store instruction to access them.  */
7210 
7211 static HOST_WIDE_INT
spu_constant_alignment(const_tree,HOST_WIDE_INT align)7212 spu_constant_alignment (const_tree, HOST_WIDE_INT align)
7213 {
7214   return MAX (align, 128);
7215 }
7216 
7217 /*  Table of machine attributes.  */
7218 static const struct attribute_spec spu_attribute_table[] =
7219 {
7220   /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
7221        affects_type_identity, handler, exclude } */
7222   { "naked",          0, 0, true,  false, false, false,
7223     spu_handle_fndecl_attribute, NULL },
7224   { "spu_vector",     0, 0, false, true,  false, false,
7225     spu_handle_vector_attribute, NULL },
7226   { NULL,             0, 0, false, false, false, false, NULL, NULL }
7227 };
7228 
7229 /*  TARGET overrides.  */
7230 
7231 #undef TARGET_LRA_P
7232 #define TARGET_LRA_P hook_bool_void_false
7233 
7234 #undef TARGET_ADDR_SPACE_POINTER_MODE
7235 #define TARGET_ADDR_SPACE_POINTER_MODE spu_addr_space_pointer_mode
7236 
7237 #undef TARGET_ADDR_SPACE_ADDRESS_MODE
7238 #define TARGET_ADDR_SPACE_ADDRESS_MODE spu_addr_space_address_mode
7239 
7240 #undef TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P
7241 #define TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P \
7242   spu_addr_space_legitimate_address_p
7243 
7244 #undef TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS
7245 #define TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS spu_addr_space_legitimize_address
7246 
7247 #undef TARGET_ADDR_SPACE_SUBSET_P
7248 #define TARGET_ADDR_SPACE_SUBSET_P spu_addr_space_subset_p
7249 
7250 #undef TARGET_ADDR_SPACE_CONVERT
7251 #define TARGET_ADDR_SPACE_CONVERT spu_addr_space_convert
7252 
7253 #undef TARGET_INIT_BUILTINS
7254 #define TARGET_INIT_BUILTINS spu_init_builtins
7255 #undef TARGET_BUILTIN_DECL
7256 #define TARGET_BUILTIN_DECL spu_builtin_decl
7257 
7258 #undef TARGET_EXPAND_BUILTIN
7259 #define TARGET_EXPAND_BUILTIN spu_expand_builtin
7260 
7261 #undef TARGET_UNWIND_WORD_MODE
7262 #define TARGET_UNWIND_WORD_MODE spu_unwind_word_mode
7263 
7264 #undef TARGET_LEGITIMIZE_ADDRESS
7265 #define TARGET_LEGITIMIZE_ADDRESS spu_legitimize_address
7266 
7267 /* The current assembler doesn't like .4byte foo@ppu, so use the normal .long
7268    and .quad for the debugger.  When it is known that the assembler is fixed,
7269    these can be removed.  */
7270 #undef TARGET_ASM_UNALIGNED_SI_OP
7271 #define TARGET_ASM_UNALIGNED_SI_OP	"\t.long\t"
7272 
7273 #undef TARGET_ASM_ALIGNED_DI_OP
7274 #define TARGET_ASM_ALIGNED_DI_OP	"\t.quad\t"
7275 
7276 /* The .8byte directive doesn't seem to work well for a 32 bit
7277    architecture. */
7278 #undef TARGET_ASM_UNALIGNED_DI_OP
7279 #define TARGET_ASM_UNALIGNED_DI_OP NULL
7280 
7281 #undef TARGET_RTX_COSTS
7282 #define TARGET_RTX_COSTS spu_rtx_costs
7283 
7284 #undef TARGET_ADDRESS_COST
7285 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
7286 
7287 #undef TARGET_SCHED_ISSUE_RATE
7288 #define TARGET_SCHED_ISSUE_RATE spu_sched_issue_rate
7289 
7290 #undef TARGET_SCHED_INIT_GLOBAL
7291 #define TARGET_SCHED_INIT_GLOBAL spu_sched_init_global
7292 
7293 #undef TARGET_SCHED_INIT
7294 #define TARGET_SCHED_INIT spu_sched_init
7295 
7296 #undef TARGET_SCHED_VARIABLE_ISSUE
7297 #define TARGET_SCHED_VARIABLE_ISSUE spu_sched_variable_issue
7298 
7299 #undef TARGET_SCHED_REORDER
7300 #define TARGET_SCHED_REORDER spu_sched_reorder
7301 
7302 #undef TARGET_SCHED_REORDER2
7303 #define TARGET_SCHED_REORDER2 spu_sched_reorder
7304 
7305 #undef TARGET_SCHED_ADJUST_COST
7306 #define TARGET_SCHED_ADJUST_COST spu_sched_adjust_cost
7307 
7308 #undef  TARGET_ATTRIBUTE_TABLE
7309 #define TARGET_ATTRIBUTE_TABLE spu_attribute_table
7310 
7311 #undef TARGET_ASM_INTEGER
7312 #define TARGET_ASM_INTEGER spu_assemble_integer
7313 
7314 #undef TARGET_SCALAR_MODE_SUPPORTED_P
7315 #define TARGET_SCALAR_MODE_SUPPORTED_P	spu_scalar_mode_supported_p
7316 
7317 #undef TARGET_VECTOR_MODE_SUPPORTED_P
7318 #define TARGET_VECTOR_MODE_SUPPORTED_P	spu_vector_mode_supported_p
7319 
7320 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
7321 #define TARGET_FUNCTION_OK_FOR_SIBCALL spu_function_ok_for_sibcall
7322 
7323 #undef TARGET_ASM_GLOBALIZE_LABEL
7324 #define TARGET_ASM_GLOBALIZE_LABEL spu_asm_globalize_label
7325 
7326 #undef TARGET_PASS_BY_REFERENCE
7327 #define TARGET_PASS_BY_REFERENCE spu_pass_by_reference
7328 
7329 #undef TARGET_FUNCTION_ARG
7330 #define TARGET_FUNCTION_ARG spu_function_arg
7331 
7332 #undef TARGET_FUNCTION_ARG_ADVANCE
7333 #define TARGET_FUNCTION_ARG_ADVANCE spu_function_arg_advance
7334 
7335 #undef TARGET_FUNCTION_ARG_OFFSET
7336 #define TARGET_FUNCTION_ARG_OFFSET spu_function_arg_offset
7337 
7338 #undef TARGET_FUNCTION_ARG_PADDING
7339 #define TARGET_FUNCTION_ARG_PADDING spu_function_arg_padding
7340 
7341 #undef TARGET_MUST_PASS_IN_STACK
7342 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
7343 
7344 #undef TARGET_BUILD_BUILTIN_VA_LIST
7345 #define TARGET_BUILD_BUILTIN_VA_LIST spu_build_builtin_va_list
7346 
7347 #undef TARGET_EXPAND_BUILTIN_VA_START
7348 #define TARGET_EXPAND_BUILTIN_VA_START spu_va_start
7349 
7350 #undef TARGET_SETUP_INCOMING_VARARGS
7351 #define TARGET_SETUP_INCOMING_VARARGS spu_setup_incoming_varargs
7352 
7353 #undef TARGET_MACHINE_DEPENDENT_REORG
7354 #define TARGET_MACHINE_DEPENDENT_REORG spu_machine_dependent_reorg
7355 
7356 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
7357 #define TARGET_GIMPLIFY_VA_ARG_EXPR spu_gimplify_va_arg_expr
7358 
7359 #undef TARGET_INIT_LIBFUNCS
7360 #define TARGET_INIT_LIBFUNCS spu_init_libfuncs
7361 
7362 #undef TARGET_RETURN_IN_MEMORY
7363 #define TARGET_RETURN_IN_MEMORY spu_return_in_memory
7364 
7365 #undef  TARGET_ENCODE_SECTION_INFO
7366 #define TARGET_ENCODE_SECTION_INFO spu_encode_section_info
7367 
7368 #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
7369 #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD spu_builtin_mask_for_load
7370 
7371 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
7372 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST spu_builtin_vectorization_cost
7373 
7374 #undef TARGET_VECTORIZE_INIT_COST
7375 #define TARGET_VECTORIZE_INIT_COST spu_init_cost
7376 
7377 #undef TARGET_VECTORIZE_ADD_STMT_COST
7378 #define TARGET_VECTORIZE_ADD_STMT_COST spu_add_stmt_cost
7379 
7380 #undef TARGET_VECTORIZE_FINISH_COST
7381 #define TARGET_VECTORIZE_FINISH_COST spu_finish_cost
7382 
7383 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
7384 #define TARGET_VECTORIZE_DESTROY_COST_DATA spu_destroy_cost_data
7385 
7386 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
7387 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE spu_vector_alignment_reachable
7388 
7389 #undef TARGET_LIBGCC_CMP_RETURN_MODE
7390 #define TARGET_LIBGCC_CMP_RETURN_MODE spu_libgcc_cmp_return_mode
7391 
7392 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
7393 #define TARGET_LIBGCC_SHIFT_COUNT_MODE spu_libgcc_shift_count_mode
7394 
7395 #undef TARGET_SCHED_SMS_RES_MII
7396 #define TARGET_SCHED_SMS_RES_MII spu_sms_res_mii
7397 
7398 #undef TARGET_SECTION_TYPE_FLAGS
7399 #define TARGET_SECTION_TYPE_FLAGS spu_section_type_flags
7400 
7401 #undef TARGET_ASM_SELECT_SECTION
7402 #define TARGET_ASM_SELECT_SECTION  spu_select_section
7403 
7404 #undef TARGET_ASM_UNIQUE_SECTION
7405 #define TARGET_ASM_UNIQUE_SECTION  spu_unique_section
7406 
7407 #undef TARGET_LEGITIMATE_ADDRESS_P
7408 #define TARGET_LEGITIMATE_ADDRESS_P spu_legitimate_address_p
7409 
7410 #undef TARGET_LEGITIMATE_CONSTANT_P
7411 #define TARGET_LEGITIMATE_CONSTANT_P spu_legitimate_constant_p
7412 
7413 #undef TARGET_TRAMPOLINE_INIT
7414 #define TARGET_TRAMPOLINE_INIT spu_trampoline_init
7415 
7416 #undef TARGET_WARN_FUNC_RETURN
7417 #define TARGET_WARN_FUNC_RETURN spu_warn_func_return
7418 
7419 #undef TARGET_OPTION_OVERRIDE
7420 #define TARGET_OPTION_OVERRIDE spu_option_override
7421 
7422 #undef TARGET_CONDITIONAL_REGISTER_USAGE
7423 #define TARGET_CONDITIONAL_REGISTER_USAGE spu_conditional_register_usage
7424 
7425 #undef TARGET_REF_MAY_ALIAS_ERRNO
7426 #define TARGET_REF_MAY_ALIAS_ERRNO spu_ref_may_alias_errno
7427 
7428 #undef TARGET_ASM_OUTPUT_MI_THUNK
7429 #define TARGET_ASM_OUTPUT_MI_THUNK spu_output_mi_thunk
7430 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
7431 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
7432 
7433 /* Variable tracking should be run after all optimizations which
7434    change order of insns.  It also needs a valid CFG.  */
7435 #undef TARGET_DELAY_VARTRACK
7436 #define TARGET_DELAY_VARTRACK true
7437 
7438 #undef TARGET_CANONICALIZE_COMPARISON
7439 #define TARGET_CANONICALIZE_COMPARISON spu_canonicalize_comparison
7440 
7441 #undef TARGET_CAN_USE_DOLOOP_P
7442 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
7443 
7444 #undef TARGET_MODES_TIEABLE_P
7445 #define TARGET_MODES_TIEABLE_P spu_modes_tieable_p
7446 
7447 #undef TARGET_HARD_REGNO_NREGS
7448 #define TARGET_HARD_REGNO_NREGS spu_hard_regno_nregs
7449 
7450 #undef TARGET_CAN_CHANGE_MODE_CLASS
7451 #define TARGET_CAN_CHANGE_MODE_CLASS spu_can_change_mode_class
7452 
7453 #undef TARGET_TRULY_NOOP_TRUNCATION
7454 #define TARGET_TRULY_NOOP_TRUNCATION spu_truly_noop_truncation
7455 
7456 #undef TARGET_STATIC_RTX_ALIGNMENT
7457 #define TARGET_STATIC_RTX_ALIGNMENT spu_static_rtx_alignment
7458 #undef TARGET_CONSTANT_ALIGNMENT
7459 #define TARGET_CONSTANT_ALIGNMENT spu_constant_alignment
7460 
7461 struct gcc_target targetm = TARGET_INITIALIZER;
7462 
7463 #include "gt-spu.h"
7464