1 /* Copyright (C) 2006-2016 Free Software Foundation, Inc.
2 
3    This file is free software; you can redistribute it and/or modify it under
4    the terms of the GNU General Public License as published by the Free
5    Software Foundation; either version 3 of the License, or (at your option)
6    any later version.
7 
8    This file is distributed in the hope that it will be useful, but WITHOUT
9    ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10    FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
11    for more details.
12 
13    You should have received a copy of the GNU General Public License
14    along with GCC; see the file COPYING3.  If not see
15    <http://www.gnu.org/licenses/>.  */
16 
17 #include "config.h"
18 #include "system.h"
19 #include "coretypes.h"
20 #include "backend.h"
21 #include "target.h"
22 #include "rtl.h"
23 #include "tree.h"
24 #include "gimple.h"
25 #include "cfghooks.h"
26 #include "cfgloop.h"
27 #include "df.h"
28 #include "tm_p.h"
29 #include "stringpool.h"
30 #include "expmed.h"
31 #include "optabs.h"
32 #include "regs.h"
33 #include "emit-rtl.h"
34 #include "recog.h"
35 #include "diagnostic-core.h"
36 #include "insn-attr.h"
37 #include "alias.h"
38 #include "fold-const.h"
39 #include "stor-layout.h"
40 #include "calls.h"
41 #include "varasm.h"
42 #include "explow.h"
43 #include "expr.h"
44 #include "output.h"
45 #include "cfgrtl.h"
46 #include "cfgbuild.h"
47 #include "langhooks.h"
48 #include "reload.h"
49 #include "sched-int.h"
50 #include "params.h"
51 #include "gimplify.h"
52 #include "tm-constrs.h"
53 #include "ddg.h"
54 #include "dumpfile.h"
55 #include "builtins.h"
56 #include "rtl-iter.h"
57 
58 /* This file should be included last.  */
59 #include "target-def.h"
60 
61 /* Builtin types, data and prototypes. */
62 
63 enum spu_builtin_type_index
64 {
65   SPU_BTI_END_OF_PARAMS,
66 
67   /* We create new type nodes for these. */
68   SPU_BTI_V16QI,
69   SPU_BTI_V8HI,
70   SPU_BTI_V4SI,
71   SPU_BTI_V2DI,
72   SPU_BTI_V4SF,
73   SPU_BTI_V2DF,
74   SPU_BTI_UV16QI,
75   SPU_BTI_UV8HI,
76   SPU_BTI_UV4SI,
77   SPU_BTI_UV2DI,
78 
79   /* A 16-byte type. (Implemented with V16QI_type_node) */
80   SPU_BTI_QUADWORD,
81 
82   /* These all correspond to intSI_type_node */
83   SPU_BTI_7,
84   SPU_BTI_S7,
85   SPU_BTI_U7,
86   SPU_BTI_S10,
87   SPU_BTI_S10_4,
88   SPU_BTI_U14,
89   SPU_BTI_16,
90   SPU_BTI_S16,
91   SPU_BTI_S16_2,
92   SPU_BTI_U16,
93   SPU_BTI_U16_2,
94   SPU_BTI_U18,
95 
96   /* These correspond to the standard types */
97   SPU_BTI_INTQI,
98   SPU_BTI_INTHI,
99   SPU_BTI_INTSI,
100   SPU_BTI_INTDI,
101 
102   SPU_BTI_UINTQI,
103   SPU_BTI_UINTHI,
104   SPU_BTI_UINTSI,
105   SPU_BTI_UINTDI,
106 
107   SPU_BTI_FLOAT,
108   SPU_BTI_DOUBLE,
109 
110   SPU_BTI_VOID,
111   SPU_BTI_PTR,
112 
113   SPU_BTI_MAX
114 };
115 
116 #define V16QI_type_node               (spu_builtin_types[SPU_BTI_V16QI])
117 #define V8HI_type_node                (spu_builtin_types[SPU_BTI_V8HI])
118 #define V4SI_type_node                (spu_builtin_types[SPU_BTI_V4SI])
119 #define V2DI_type_node                (spu_builtin_types[SPU_BTI_V2DI])
120 #define V4SF_type_node                (spu_builtin_types[SPU_BTI_V4SF])
121 #define V2DF_type_node                (spu_builtin_types[SPU_BTI_V2DF])
122 #define unsigned_V16QI_type_node      (spu_builtin_types[SPU_BTI_UV16QI])
123 #define unsigned_V8HI_type_node       (spu_builtin_types[SPU_BTI_UV8HI])
124 #define unsigned_V4SI_type_node       (spu_builtin_types[SPU_BTI_UV4SI])
125 #define unsigned_V2DI_type_node       (spu_builtin_types[SPU_BTI_UV2DI])
126 
127 static GTY(()) tree spu_builtin_types[SPU_BTI_MAX];
128 
129 struct spu_builtin_range
130 {
131   int low, high;
132 };
133 
134 static struct spu_builtin_range spu_builtin_range[] = {
135   {-0x40ll, 0x7fll},		/* SPU_BTI_7     */
136   {-0x40ll, 0x3fll},		/* SPU_BTI_S7    */
137   {0ll, 0x7fll},		/* SPU_BTI_U7    */
138   {-0x200ll, 0x1ffll},		/* SPU_BTI_S10   */
139   {-0x2000ll, 0x1fffll},	/* SPU_BTI_S10_4 */
140   {0ll, 0x3fffll},		/* SPU_BTI_U14   */
141   {-0x8000ll, 0xffffll},	/* SPU_BTI_16    */
142   {-0x8000ll, 0x7fffll},	/* SPU_BTI_S16   */
143   {-0x20000ll, 0x1ffffll},	/* SPU_BTI_S16_2 */
144   {0ll, 0xffffll},		/* SPU_BTI_U16   */
145   {0ll, 0x3ffffll},		/* SPU_BTI_U16_2 */
146   {0ll, 0x3ffffll},		/* SPU_BTI_U18   */
147 };
148 
149 
150 /*  Target specific attribute specifications.  */
151 char regs_ever_allocated[FIRST_PSEUDO_REGISTER];
152 
153 /*  Prototypes and external defs.  */
154 static int get_pipe (rtx_insn *insn);
155 static int spu_naked_function_p (tree func);
156 static int mem_is_padded_component_ref (rtx x);
157 static void fix_range (const char *);
158 static rtx spu_expand_load (rtx, rtx, rtx, int);
159 
160 /* Which instruction set architecture to use.  */
161 int spu_arch;
162 /* Which cpu are we tuning for.  */
163 int spu_tune;
164 
165 /* The hardware requires 8 insns between a hint and the branch it
166    effects.  This variable describes how many rtl instructions the
167    compiler needs to see before inserting a hint, and then the compiler
168    will insert enough nops to make it at least 8 insns.  The default is
169    for the compiler to allow up to 2 nops be emitted.  The nops are
170    inserted in pairs, so we round down. */
171 int spu_hint_dist = (8*4) - (2*4);
172 
173 enum spu_immediate {
174   SPU_NONE,
175   SPU_IL,
176   SPU_ILA,
177   SPU_ILH,
178   SPU_ILHU,
179   SPU_ORI,
180   SPU_ORHI,
181   SPU_ORBI,
182   SPU_IOHL
183 };
184 enum immediate_class
185 {
186   IC_POOL,			/* constant pool */
187   IC_IL1,			/* one il* instruction */
188   IC_IL2,			/* both ilhu and iohl instructions */
189   IC_IL1s,			/* one il* instruction */
190   IC_IL2s,			/* both ilhu and iohl instructions */
191   IC_FSMBI,			/* the fsmbi instruction */
192   IC_CPAT,			/* one of the c*d instructions */
193   IC_FSMBI2			/* fsmbi plus 1 other instruction */
194 };
195 
196 static enum spu_immediate which_immediate_load (HOST_WIDE_INT val);
197 static enum spu_immediate which_logical_immediate (HOST_WIDE_INT val);
198 static int cpat_info(unsigned char *arr, int size, int *prun, int *pstart);
199 static enum immediate_class classify_immediate (rtx op,
200 						machine_mode mode);
201 
202 /* Pointer mode for __ea references.  */
203 #define EAmode (spu_ea_model != 32 ? DImode : SImode)
204 
205 
206 /* Define the structure for the machine field in struct function.  */
207 struct GTY(()) machine_function
208 {
209   /* Register to use for PIC accesses.  */
210   rtx pic_reg;
211 };
212 
213 /* How to allocate a 'struct machine_function'.  */
214 static struct machine_function *
spu_init_machine_status(void)215 spu_init_machine_status (void)
216 {
217   return ggc_cleared_alloc<machine_function> ();
218 }
219 
220 /* Implement TARGET_OPTION_OVERRIDE.  */
221 static void
spu_option_override(void)222 spu_option_override (void)
223 {
224   /* Set up function hooks.  */
225   init_machine_status = spu_init_machine_status;
226 
227   /* Small loops will be unpeeled at -O3.  For SPU it is more important
228      to keep code small by default.  */
229   if (!flag_unroll_loops && !flag_peel_loops)
230     maybe_set_param_value (PARAM_MAX_COMPLETELY_PEEL_TIMES, 4,
231 			   global_options.x_param_values,
232 			   global_options_set.x_param_values);
233 
234   flag_omit_frame_pointer = 1;
235 
236   /* Functions must be 8 byte aligned so we correctly handle dual issue */
237   if (align_functions < 8)
238     align_functions = 8;
239 
240   spu_hint_dist = 8*4 - spu_max_nops*4;
241   if (spu_hint_dist < 0)
242     spu_hint_dist = 0;
243 
244   if (spu_fixed_range_string)
245     fix_range (spu_fixed_range_string);
246 
247   /* Determine processor architectural level.  */
248   if (spu_arch_string)
249     {
250       if (strcmp (&spu_arch_string[0], "cell") == 0)
251         spu_arch = PROCESSOR_CELL;
252       else if (strcmp (&spu_arch_string[0], "celledp") == 0)
253         spu_arch = PROCESSOR_CELLEDP;
254       else
255         error ("bad value (%s) for -march= switch", spu_arch_string);
256     }
257 
258   /* Determine processor to tune for.  */
259   if (spu_tune_string)
260     {
261       if (strcmp (&spu_tune_string[0], "cell") == 0)
262         spu_tune = PROCESSOR_CELL;
263       else if (strcmp (&spu_tune_string[0], "celledp") == 0)
264         spu_tune = PROCESSOR_CELLEDP;
265       else
266         error ("bad value (%s) for -mtune= switch", spu_tune_string);
267     }
268 
269   /* Change defaults according to the processor architecture.  */
270   if (spu_arch == PROCESSOR_CELLEDP)
271     {
272       /* If no command line option has been otherwise specified, change
273 	 the default to -mno-safe-hints on celledp -- only the original
274 	 Cell/B.E. processors require this workaround.  */
275       if (!(target_flags_explicit & MASK_SAFE_HINTS))
276 	target_flags &= ~MASK_SAFE_HINTS;
277     }
278 
279   REAL_MODE_FORMAT (SFmode) = &spu_single_format;
280 }
281 
282 /* Handle an attribute requiring a FUNCTION_DECL; arguments as in
283    struct attribute_spec.handler.  */
284 
285 /* True if MODE is valid for the target.  By "valid", we mean able to
286    be manipulated in non-trivial ways.  In particular, this means all
287    the arithmetic is supported.  */
288 static bool
spu_scalar_mode_supported_p(machine_mode mode)289 spu_scalar_mode_supported_p (machine_mode mode)
290 {
291   switch (mode)
292     {
293     case QImode:
294     case HImode:
295     case SImode:
296     case SFmode:
297     case DImode:
298     case TImode:
299     case DFmode:
300       return true;
301 
302     default:
303       return false;
304     }
305 }
306 
307 /* Similarly for vector modes.  "Supported" here is less strict.  At
308    least some operations are supported; need to check optabs or builtins
309    for further details.  */
310 static bool
spu_vector_mode_supported_p(machine_mode mode)311 spu_vector_mode_supported_p (machine_mode mode)
312 {
313   switch (mode)
314     {
315     case V16QImode:
316     case V8HImode:
317     case V4SImode:
318     case V2DImode:
319     case V4SFmode:
320     case V2DFmode:
321       return true;
322 
323     default:
324       return false;
325     }
326 }
327 
328 /* GCC assumes that in a paradoxical SUBREG the inner mode occupies the
329    least significant bytes of the outer mode.  This function returns
330    TRUE for the SUBREG's where this is correct.  */
331 int
valid_subreg(rtx op)332 valid_subreg (rtx op)
333 {
334   machine_mode om = GET_MODE (op);
335   machine_mode im = GET_MODE (SUBREG_REG (op));
336   return om != VOIDmode && im != VOIDmode
337     && (GET_MODE_SIZE (im) == GET_MODE_SIZE (om)
338 	|| (GET_MODE_SIZE (im) <= 4 && GET_MODE_SIZE (om) <= 4)
339 	|| (GET_MODE_SIZE (im) >= 16 && GET_MODE_SIZE (om) >= 16));
340 }
341 
342 /* When insv and ext[sz]v ar passed a TI SUBREG, we want to strip it off
343    and adjust the start offset.  */
344 static rtx
adjust_operand(rtx op,HOST_WIDE_INT * start)345 adjust_operand (rtx op, HOST_WIDE_INT * start)
346 {
347   machine_mode mode;
348   int op_size;
349   /* Strip any paradoxical SUBREG.  */
350   if (GET_CODE (op) == SUBREG
351       && (GET_MODE_BITSIZE (GET_MODE (op))
352 	  > GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)))))
353     {
354       if (start)
355 	*start -=
356 	  GET_MODE_BITSIZE (GET_MODE (op)) -
357 	  GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)));
358       op = SUBREG_REG (op);
359     }
360   /* If it is smaller than SI, assure a SUBREG */
361   op_size = GET_MODE_BITSIZE (GET_MODE (op));
362   if (op_size < 32)
363     {
364       if (start)
365 	*start += 32 - op_size;
366       op_size = 32;
367     }
368   /* If it is not a MODE_INT (and/or it is smaller than SI) add a SUBREG. */
369   mode = mode_for_size (op_size, MODE_INT, 0);
370   if (mode != GET_MODE (op))
371     op = gen_rtx_SUBREG (mode, op, 0);
372   return op;
373 }
374 
375 void
spu_expand_extv(rtx ops[],int unsignedp)376 spu_expand_extv (rtx ops[], int unsignedp)
377 {
378   rtx dst = ops[0], src = ops[1];
379   HOST_WIDE_INT width = INTVAL (ops[2]);
380   HOST_WIDE_INT start = INTVAL (ops[3]);
381   HOST_WIDE_INT align_mask;
382   rtx s0, s1, mask, r0;
383 
384   gcc_assert (REG_P (dst) && GET_MODE (dst) == TImode);
385 
386   if (MEM_P (src))
387     {
388       /* First, determine if we need 1 TImode load or 2.  We need only 1
389          if the bits being extracted do not cross the alignment boundary
390          as determined by the MEM and its address. */
391 
392       align_mask = -MEM_ALIGN (src);
393       if ((start & align_mask) == ((start + width - 1) & align_mask))
394 	{
395 	  /* Alignment is sufficient for 1 load. */
396 	  s0 = gen_reg_rtx (TImode);
397 	  r0 = spu_expand_load (s0, 0, src, start / 8);
398 	  start &= 7;
399 	  if (r0)
400 	    emit_insn (gen_rotqby_ti (s0, s0, r0));
401 	}
402       else
403 	{
404 	  /* Need 2 loads. */
405 	  s0 = gen_reg_rtx (TImode);
406 	  s1 = gen_reg_rtx (TImode);
407 	  r0 = spu_expand_load (s0, s1, src, start / 8);
408 	  start &= 7;
409 
410 	  gcc_assert (start + width <= 128);
411 	  if (r0)
412 	    {
413 	      rtx r1 = gen_reg_rtx (SImode);
414 	      mask = gen_reg_rtx (TImode);
415 	      emit_move_insn (mask, GEN_INT (-1));
416 	      emit_insn (gen_rotqby_ti (s0, s0, r0));
417 	      emit_insn (gen_rotqby_ti (s1, s1, r0));
418 	      if (GET_CODE (r0) == CONST_INT)
419 		r1 = GEN_INT (INTVAL (r0) & 15);
420 	      else
421 		emit_insn (gen_andsi3 (r1, r0, GEN_INT (15)));
422 	      emit_insn (gen_shlqby_ti (mask, mask, r1));
423 	      emit_insn (gen_selb (s0, s1, s0, mask));
424 	    }
425 	}
426 
427     }
428   else if (GET_CODE (src) == SUBREG)
429     {
430       rtx r = SUBREG_REG (src);
431       gcc_assert (REG_P (r) && SCALAR_INT_MODE_P (GET_MODE (r)));
432       s0 = gen_reg_rtx (TImode);
433       if (GET_MODE_SIZE (GET_MODE (r)) < GET_MODE_SIZE (TImode))
434 	emit_insn (gen_rtx_SET (s0, gen_rtx_ZERO_EXTEND (TImode, r)));
435       else
436 	emit_move_insn (s0, src);
437     }
438   else
439     {
440       gcc_assert (REG_P (src) && GET_MODE (src) == TImode);
441       s0 = gen_reg_rtx (TImode);
442       emit_move_insn (s0, src);
443     }
444 
445   /* Now s0 is TImode and contains the bits to extract at start. */
446 
447   if (start)
448     emit_insn (gen_rotlti3 (s0, s0, GEN_INT (start)));
449 
450   if (128 - width)
451     s0 = expand_shift (RSHIFT_EXPR, TImode, s0, 128 - width, s0, unsignedp);
452 
453   emit_move_insn (dst, s0);
454 }
455 
456 void
spu_expand_insv(rtx ops[])457 spu_expand_insv (rtx ops[])
458 {
459   HOST_WIDE_INT width = INTVAL (ops[1]);
460   HOST_WIDE_INT start = INTVAL (ops[2]);
461   unsigned HOST_WIDE_INT maskbits;
462   machine_mode dst_mode;
463   rtx dst = ops[0], src = ops[3];
464   int dst_size;
465   rtx mask;
466   rtx shift_reg;
467   int shift;
468 
469 
470   if (GET_CODE (ops[0]) == MEM)
471     dst = gen_reg_rtx (TImode);
472   else
473     dst = adjust_operand (dst, &start);
474   dst_mode = GET_MODE (dst);
475   dst_size = GET_MODE_BITSIZE (GET_MODE (dst));
476 
477   if (CONSTANT_P (src))
478     {
479       machine_mode m =
480 	(width <= 32 ? SImode : width <= 64 ? DImode : TImode);
481       src = force_reg (m, convert_to_mode (m, src, 0));
482     }
483   src = adjust_operand (src, 0);
484 
485   mask = gen_reg_rtx (dst_mode);
486   shift_reg = gen_reg_rtx (dst_mode);
487   shift = dst_size - start - width;
488 
489   /* It's not safe to use subreg here because the compiler assumes
490      that the SUBREG_REG is right justified in the SUBREG. */
491   convert_move (shift_reg, src, 1);
492 
493   if (shift > 0)
494     {
495       switch (dst_mode)
496 	{
497 	case SImode:
498 	  emit_insn (gen_ashlsi3 (shift_reg, shift_reg, GEN_INT (shift)));
499 	  break;
500 	case DImode:
501 	  emit_insn (gen_ashldi3 (shift_reg, shift_reg, GEN_INT (shift)));
502 	  break;
503 	case TImode:
504 	  emit_insn (gen_ashlti3 (shift_reg, shift_reg, GEN_INT (shift)));
505 	  break;
506 	default:
507 	  abort ();
508 	}
509     }
510   else if (shift < 0)
511     abort ();
512 
513   switch (dst_size)
514     {
515     case 32:
516       maskbits = (~(unsigned HOST_WIDE_INT)0 << (32 - width - start));
517       if (start)
518 	maskbits += ((unsigned HOST_WIDE_INT)1 << (32 - start));
519       emit_move_insn (mask, GEN_INT (maskbits));
520       break;
521     case 64:
522       maskbits = (~(unsigned HOST_WIDE_INT)0 << (64 - width - start));
523       if (start)
524 	maskbits += ((unsigned HOST_WIDE_INT)1 << (64 - start));
525       emit_move_insn (mask, GEN_INT (maskbits));
526       break;
527     case 128:
528       {
529 	unsigned char arr[16];
530 	int i = start / 8;
531 	memset (arr, 0, sizeof (arr));
532 	arr[i] = 0xff >> (start & 7);
533 	for (i++; i <= (start + width - 1) / 8; i++)
534 	  arr[i] = 0xff;
535 	arr[i - 1] &= 0xff << (7 - ((start + width - 1) & 7));
536 	emit_move_insn (mask, array_to_constant (TImode, arr));
537       }
538       break;
539     default:
540       abort ();
541     }
542   if (GET_CODE (ops[0]) == MEM)
543     {
544       rtx low = gen_reg_rtx (SImode);
545       rtx rotl = gen_reg_rtx (SImode);
546       rtx mask0 = gen_reg_rtx (TImode);
547       rtx addr;
548       rtx addr0;
549       rtx addr1;
550       rtx mem;
551 
552       addr = force_reg (Pmode, XEXP (ops[0], 0));
553       addr0 = gen_rtx_AND (Pmode, addr, GEN_INT (-16));
554       emit_insn (gen_andsi3 (low, addr, GEN_INT (15)));
555       emit_insn (gen_negsi2 (rotl, low));
556       emit_insn (gen_rotqby_ti (shift_reg, shift_reg, rotl));
557       emit_insn (gen_rotqmby_ti (mask0, mask, rotl));
558       mem = change_address (ops[0], TImode, addr0);
559       set_mem_alias_set (mem, 0);
560       emit_move_insn (dst, mem);
561       emit_insn (gen_selb (dst, dst, shift_reg, mask0));
562       if (start + width > MEM_ALIGN (ops[0]))
563 	{
564 	  rtx shl = gen_reg_rtx (SImode);
565 	  rtx mask1 = gen_reg_rtx (TImode);
566 	  rtx dst1 = gen_reg_rtx (TImode);
567 	  rtx mem1;
568 	  addr1 = plus_constant (Pmode, addr, 16);
569 	  addr1 = gen_rtx_AND (Pmode, addr1, GEN_INT (-16));
570 	  emit_insn (gen_subsi3 (shl, GEN_INT (16), low));
571 	  emit_insn (gen_shlqby_ti (mask1, mask, shl));
572 	  mem1 = change_address (ops[0], TImode, addr1);
573 	  set_mem_alias_set (mem1, 0);
574 	  emit_move_insn (dst1, mem1);
575 	  emit_insn (gen_selb (dst1, dst1, shift_reg, mask1));
576 	  emit_move_insn (mem1, dst1);
577 	}
578       emit_move_insn (mem, dst);
579     }
580   else
581     emit_insn (gen_selb (dst, copy_rtx (dst), shift_reg, mask));
582 }
583 
584 
585 int
spu_expand_block_move(rtx ops[])586 spu_expand_block_move (rtx ops[])
587 {
588   HOST_WIDE_INT bytes, align, offset;
589   rtx src, dst, sreg, dreg, target;
590   int i;
591   if (GET_CODE (ops[2]) != CONST_INT
592       || GET_CODE (ops[3]) != CONST_INT
593       || INTVAL (ops[2]) > (HOST_WIDE_INT) (MOVE_RATIO (optimize_insn_for_speed_p ()) * 8))
594     return 0;
595 
596   bytes = INTVAL (ops[2]);
597   align = INTVAL (ops[3]);
598 
599   if (bytes <= 0)
600     return 1;
601 
602   dst = ops[0];
603   src = ops[1];
604 
605   if (align == 16)
606     {
607       for (offset = 0; offset + 16 <= bytes; offset += 16)
608 	{
609 	  dst = adjust_address (ops[0], V16QImode, offset);
610 	  src = adjust_address (ops[1], V16QImode, offset);
611 	  emit_move_insn (dst, src);
612 	}
613       if (offset < bytes)
614 	{
615 	  rtx mask;
616 	  unsigned char arr[16] = { 0 };
617 	  for (i = 0; i < bytes - offset; i++)
618 	    arr[i] = 0xff;
619 	  dst = adjust_address (ops[0], V16QImode, offset);
620 	  src = adjust_address (ops[1], V16QImode, offset);
621 	  mask = gen_reg_rtx (V16QImode);
622 	  sreg = gen_reg_rtx (V16QImode);
623 	  dreg = gen_reg_rtx (V16QImode);
624 	  target = gen_reg_rtx (V16QImode);
625 	  emit_move_insn (mask, array_to_constant (V16QImode, arr));
626 	  emit_move_insn (dreg, dst);
627 	  emit_move_insn (sreg, src);
628 	  emit_insn (gen_selb (target, dreg, sreg, mask));
629 	  emit_move_insn (dst, target);
630 	}
631       return 1;
632     }
633   return 0;
634 }
635 
636 enum spu_comp_code
637 { SPU_EQ, SPU_GT, SPU_GTU };
638 
639 int spu_comp_icode[12][3] = {
640  {CODE_FOR_ceq_qi, CODE_FOR_cgt_qi, CODE_FOR_clgt_qi},
641  {CODE_FOR_ceq_hi, CODE_FOR_cgt_hi, CODE_FOR_clgt_hi},
642  {CODE_FOR_ceq_si, CODE_FOR_cgt_si, CODE_FOR_clgt_si},
643  {CODE_FOR_ceq_di, CODE_FOR_cgt_di, CODE_FOR_clgt_di},
644  {CODE_FOR_ceq_ti, CODE_FOR_cgt_ti, CODE_FOR_clgt_ti},
645  {CODE_FOR_ceq_sf, CODE_FOR_cgt_sf, 0},
646  {CODE_FOR_ceq_df, CODE_FOR_cgt_df, 0},
647  {CODE_FOR_ceq_v16qi, CODE_FOR_cgt_v16qi, CODE_FOR_clgt_v16qi},
648  {CODE_FOR_ceq_v8hi,  CODE_FOR_cgt_v8hi,  CODE_FOR_clgt_v8hi},
649  {CODE_FOR_ceq_v4si,  CODE_FOR_cgt_v4si,  CODE_FOR_clgt_v4si},
650  {CODE_FOR_ceq_v4sf,  CODE_FOR_cgt_v4sf, 0},
651  {CODE_FOR_ceq_v2df,  CODE_FOR_cgt_v2df, 0},
652 };
653 
654 /* Generate a compare for CODE.  Return a brand-new rtx that represents
655    the result of the compare.   GCC can figure this out too if we don't
656    provide all variations of compares, but GCC always wants to use
657    WORD_MODE, we can generate better code in most cases if we do it
658    ourselves.  */
659 void
spu_emit_branch_or_set(int is_set,rtx cmp,rtx operands[])660 spu_emit_branch_or_set (int is_set, rtx cmp, rtx operands[])
661 {
662   int reverse_compare = 0;
663   int reverse_test = 0;
664   rtx compare_result, eq_result;
665   rtx comp_rtx, eq_rtx;
666   machine_mode comp_mode;
667   machine_mode op_mode;
668   enum spu_comp_code scode, eq_code;
669   enum insn_code ior_code;
670   enum rtx_code code = GET_CODE (cmp);
671   rtx op0 = XEXP (cmp, 0);
672   rtx op1 = XEXP (cmp, 1);
673   int index;
674   int eq_test = 0;
675 
676   /* When op1 is a CONST_INT change (X >= C) to (X > C-1),
677      and so on, to keep the constant in operand 1. */
678   if (GET_CODE (op1) == CONST_INT)
679     {
680       HOST_WIDE_INT val = INTVAL (op1) - 1;
681       if (trunc_int_for_mode (val, GET_MODE (op0)) == val)
682 	switch (code)
683 	  {
684 	  case GE:
685 	    op1 = GEN_INT (val);
686 	    code = GT;
687 	    break;
688 	  case LT:
689 	    op1 = GEN_INT (val);
690 	    code = LE;
691 	    break;
692 	  case GEU:
693 	    op1 = GEN_INT (val);
694 	    code = GTU;
695 	    break;
696 	  case LTU:
697 	    op1 = GEN_INT (val);
698 	    code = LEU;
699 	    break;
700 	  default:
701 	    break;
702 	  }
703     }
704 
705   /* However, if we generate an integer result, performing a reverse test
706      would require an extra negation, so avoid that where possible.  */
707   if (GET_CODE (op1) == CONST_INT && is_set == 1)
708     {
709       HOST_WIDE_INT val = INTVAL (op1) + 1;
710       if (trunc_int_for_mode (val, GET_MODE (op0)) == val)
711 	switch (code)
712 	  {
713 	  case LE:
714 	    op1 = GEN_INT (val);
715 	    code = LT;
716 	    break;
717 	  case LEU:
718 	    op1 = GEN_INT (val);
719 	    code = LTU;
720 	    break;
721 	  default:
722 	    break;
723 	  }
724     }
725 
726   comp_mode = SImode;
727   op_mode = GET_MODE (op0);
728 
729   switch (code)
730     {
731     case GE:
732       scode = SPU_GT;
733       if (HONOR_NANS (op_mode))
734 	{
735 	  reverse_compare = 0;
736 	  reverse_test = 0;
737 	  eq_test = 1;
738 	  eq_code = SPU_EQ;
739 	}
740       else
741 	{
742 	  reverse_compare = 1;
743 	  reverse_test = 1;
744 	}
745       break;
746     case LE:
747       scode = SPU_GT;
748       if (HONOR_NANS (op_mode))
749 	{
750 	  reverse_compare = 1;
751 	  reverse_test = 0;
752 	  eq_test = 1;
753 	  eq_code = SPU_EQ;
754 	}
755       else
756 	{
757 	  reverse_compare = 0;
758 	  reverse_test = 1;
759 	}
760       break;
761     case LT:
762       reverse_compare = 1;
763       reverse_test = 0;
764       scode = SPU_GT;
765       break;
766     case GEU:
767       reverse_compare = 1;
768       reverse_test = 1;
769       scode = SPU_GTU;
770       break;
771     case LEU:
772       reverse_compare = 0;
773       reverse_test = 1;
774       scode = SPU_GTU;
775       break;
776     case LTU:
777       reverse_compare = 1;
778       reverse_test = 0;
779       scode = SPU_GTU;
780       break;
781     case NE:
782       reverse_compare = 0;
783       reverse_test = 1;
784       scode = SPU_EQ;
785       break;
786 
787     case EQ:
788       scode = SPU_EQ;
789       break;
790     case GT:
791       scode = SPU_GT;
792       break;
793     case GTU:
794       scode = SPU_GTU;
795       break;
796     default:
797       scode = SPU_EQ;
798       break;
799     }
800 
801   switch (op_mode)
802     {
803     case QImode:
804       index = 0;
805       comp_mode = QImode;
806       break;
807     case HImode:
808       index = 1;
809       comp_mode = HImode;
810       break;
811     case SImode:
812       index = 2;
813       break;
814     case DImode:
815       index = 3;
816       break;
817     case TImode:
818       index = 4;
819       break;
820     case SFmode:
821       index = 5;
822       break;
823     case DFmode:
824       index = 6;
825       break;
826     case V16QImode:
827       index = 7;
828       comp_mode = op_mode;
829       break;
830     case V8HImode:
831       index = 8;
832       comp_mode = op_mode;
833       break;
834     case V4SImode:
835       index = 9;
836       comp_mode = op_mode;
837       break;
838     case V4SFmode:
839       index = 10;
840       comp_mode = V4SImode;
841       break;
842     case V2DFmode:
843       index = 11;
844       comp_mode = V2DImode;
845       break;
846     case V2DImode:
847     default:
848       abort ();
849     }
850 
851   if (GET_MODE (op1) == DFmode
852       && (scode != SPU_GT && scode != SPU_EQ))
853     abort ();
854 
855   if (is_set == 0 && op1 == const0_rtx
856       && (GET_MODE (op0) == SImode
857 	  || GET_MODE (op0) == HImode
858 	  || GET_MODE (op0) == QImode) && scode == SPU_EQ)
859     {
860       /* Don't need to set a register with the result when we are
861          comparing against zero and branching. */
862       reverse_test = !reverse_test;
863       compare_result = op0;
864     }
865   else
866     {
867       compare_result = gen_reg_rtx (comp_mode);
868 
869       if (reverse_compare)
870 	{
871 	  rtx t = op1;
872 	  op1 = op0;
873 	  op0 = t;
874 	}
875 
876       if (spu_comp_icode[index][scode] == 0)
877 	abort ();
878 
879       if (!(*insn_data[spu_comp_icode[index][scode]].operand[1].predicate)
880 	  (op0, op_mode))
881 	op0 = force_reg (op_mode, op0);
882       if (!(*insn_data[spu_comp_icode[index][scode]].operand[2].predicate)
883 	  (op1, op_mode))
884 	op1 = force_reg (op_mode, op1);
885       comp_rtx = GEN_FCN (spu_comp_icode[index][scode]) (compare_result,
886 							 op0, op1);
887       if (comp_rtx == 0)
888 	abort ();
889       emit_insn (comp_rtx);
890 
891       if (eq_test)
892         {
893           eq_result = gen_reg_rtx (comp_mode);
894           eq_rtx = GEN_FCN (spu_comp_icode[index][eq_code]) (eq_result,
895 							     op0, op1);
896           if (eq_rtx == 0)
897 	    abort ();
898           emit_insn (eq_rtx);
899           ior_code = optab_handler (ior_optab, comp_mode);
900           gcc_assert (ior_code != CODE_FOR_nothing);
901           emit_insn (GEN_FCN (ior_code)
902 		     (compare_result, compare_result, eq_result));
903         }
904     }
905 
906   if (is_set == 0)
907     {
908       rtx bcomp;
909       rtx loc_ref;
910 
911       /* We don't have branch on QI compare insns, so we convert the
912          QI compare result to a HI result. */
913       if (comp_mode == QImode)
914 	{
915 	  rtx old_res = compare_result;
916 	  compare_result = gen_reg_rtx (HImode);
917 	  comp_mode = HImode;
918 	  emit_insn (gen_extendqihi2 (compare_result, old_res));
919 	}
920 
921       if (reverse_test)
922 	bcomp = gen_rtx_EQ (comp_mode, compare_result, const0_rtx);
923       else
924 	bcomp = gen_rtx_NE (comp_mode, compare_result, const0_rtx);
925 
926       loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands[3]);
927       emit_jump_insn (gen_rtx_SET (pc_rtx,
928 				   gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
929 							 loc_ref, pc_rtx)));
930     }
931   else if (is_set == 2)
932     {
933       rtx target = operands[0];
934       int compare_size = GET_MODE_BITSIZE (comp_mode);
935       int target_size = GET_MODE_BITSIZE (GET_MODE (target));
936       machine_mode mode = mode_for_size (target_size, MODE_INT, 0);
937       rtx select_mask;
938       rtx op_t = operands[2];
939       rtx op_f = operands[3];
940 
941       /* The result of the comparison can be SI, HI or QI mode.  Create a
942          mask based on that result. */
943       if (target_size > compare_size)
944 	{
945 	  select_mask = gen_reg_rtx (mode);
946 	  emit_insn (gen_extend_compare (select_mask, compare_result));
947 	}
948       else if (target_size < compare_size)
949 	select_mask =
950 	  gen_rtx_SUBREG (mode, compare_result,
951 			  (compare_size - target_size) / BITS_PER_UNIT);
952       else if (comp_mode != mode)
953 	select_mask = gen_rtx_SUBREG (mode, compare_result, 0);
954       else
955 	select_mask = compare_result;
956 
957       if (GET_MODE (target) != GET_MODE (op_t)
958 	  || GET_MODE (target) != GET_MODE (op_f))
959 	abort ();
960 
961       if (reverse_test)
962 	emit_insn (gen_selb (target, op_t, op_f, select_mask));
963       else
964 	emit_insn (gen_selb (target, op_f, op_t, select_mask));
965     }
966   else
967     {
968       rtx target = operands[0];
969       if (reverse_test)
970 	emit_insn (gen_rtx_SET (compare_result,
971 				gen_rtx_NOT (comp_mode, compare_result)));
972       if (GET_MODE (target) == SImode && GET_MODE (compare_result) == HImode)
973 	emit_insn (gen_extendhisi2 (target, compare_result));
974       else if (GET_MODE (target) == SImode
975 	       && GET_MODE (compare_result) == QImode)
976 	emit_insn (gen_extend_compare (target, compare_result));
977       else
978 	emit_move_insn (target, compare_result);
979     }
980 }
981 
982 HOST_WIDE_INT
const_double_to_hwint(rtx x)983 const_double_to_hwint (rtx x)
984 {
985   HOST_WIDE_INT val;
986   if (GET_MODE (x) == SFmode)
987     REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), val);
988   else if (GET_MODE (x) == DFmode)
989     {
990       long l[2];
991       REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), l);
992       val = l[0];
993       val = (val << 32) | (l[1] & 0xffffffff);
994     }
995   else
996     abort ();
997   return val;
998 }
999 
1000 rtx
hwint_to_const_double(machine_mode mode,HOST_WIDE_INT v)1001 hwint_to_const_double (machine_mode mode, HOST_WIDE_INT v)
1002 {
1003   long tv[2];
1004   REAL_VALUE_TYPE rv;
1005   gcc_assert (mode == SFmode || mode == DFmode);
1006 
1007   if (mode == SFmode)
1008     tv[0] = (v << 32) >> 32;
1009   else if (mode == DFmode)
1010     {
1011       tv[1] = (v << 32) >> 32;
1012       tv[0] = v >> 32;
1013     }
1014   real_from_target (&rv, tv, mode);
1015   return const_double_from_real_value (rv, mode);
1016 }
1017 
1018 void
print_operand_address(FILE * file,register rtx addr)1019 print_operand_address (FILE * file, register rtx addr)
1020 {
1021   rtx reg;
1022   rtx offset;
1023 
1024   if (GET_CODE (addr) == AND
1025       && GET_CODE (XEXP (addr, 1)) == CONST_INT
1026       && INTVAL (XEXP (addr, 1)) == -16)
1027     addr = XEXP (addr, 0);
1028 
1029   switch (GET_CODE (addr))
1030     {
1031     case REG:
1032       fprintf (file, "0(%s)", reg_names[REGNO (addr)]);
1033       break;
1034 
1035     case PLUS:
1036       reg = XEXP (addr, 0);
1037       offset = XEXP (addr, 1);
1038       if (GET_CODE (offset) == REG)
1039 	{
1040 	  fprintf (file, "%s,%s", reg_names[REGNO (reg)],
1041 		   reg_names[REGNO (offset)]);
1042 	}
1043       else if (GET_CODE (offset) == CONST_INT)
1044 	{
1045 	  fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
1046 		   INTVAL (offset), reg_names[REGNO (reg)]);
1047 	}
1048       else
1049 	abort ();
1050       break;
1051 
1052     case CONST:
1053     case LABEL_REF:
1054     case SYMBOL_REF:
1055     case CONST_INT:
1056       output_addr_const (file, addr);
1057       break;
1058 
1059     default:
1060       debug_rtx (addr);
1061       abort ();
1062     }
1063 }
1064 
1065 void
print_operand(FILE * file,rtx x,int code)1066 print_operand (FILE * file, rtx x, int code)
1067 {
1068   machine_mode mode = GET_MODE (x);
1069   HOST_WIDE_INT val;
1070   unsigned char arr[16];
1071   int xcode = GET_CODE (x);
1072   int i, info;
1073   if (GET_MODE (x) == VOIDmode)
1074     switch (code)
1075       {
1076       case 'L':			/* 128 bits, signed */
1077       case 'm':			/* 128 bits, signed */
1078       case 'T':			/* 128 bits, signed */
1079       case 't':			/* 128 bits, signed */
1080 	mode = TImode;
1081 	break;
1082       case 'K':			/* 64 bits, signed */
1083       case 'k':			/* 64 bits, signed */
1084       case 'D':			/* 64 bits, signed */
1085       case 'd':			/* 64 bits, signed */
1086 	mode = DImode;
1087 	break;
1088       case 'J':			/* 32 bits, signed */
1089       case 'j':			/* 32 bits, signed */
1090       case 's':			/* 32 bits, signed */
1091       case 'S':			/* 32 bits, signed */
1092 	mode = SImode;
1093 	break;
1094       }
1095   switch (code)
1096     {
1097 
1098     case 'j':			/* 32 bits, signed */
1099     case 'k':			/* 64 bits, signed */
1100     case 'm':			/* 128 bits, signed */
1101       if (xcode == CONST_INT
1102 	  || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1103 	{
1104 	  gcc_assert (logical_immediate_p (x, mode));
1105 	  constant_to_array (mode, x, arr);
1106 	  val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1107 	  val = trunc_int_for_mode (val, SImode);
1108 	  switch (which_logical_immediate (val))
1109 	  {
1110 	  case SPU_ORI:
1111 	    break;
1112 	  case SPU_ORHI:
1113 	    fprintf (file, "h");
1114 	    break;
1115 	  case SPU_ORBI:
1116 	    fprintf (file, "b");
1117 	    break;
1118 	  default:
1119 	    gcc_unreachable();
1120 	  }
1121 	}
1122       else
1123 	gcc_unreachable();
1124       return;
1125 
1126     case 'J':			/* 32 bits, signed */
1127     case 'K':			/* 64 bits, signed */
1128     case 'L':			/* 128 bits, signed */
1129       if (xcode == CONST_INT
1130 	  || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1131 	{
1132 	  gcc_assert (logical_immediate_p (x, mode)
1133 		      || iohl_immediate_p (x, mode));
1134 	  constant_to_array (mode, x, arr);
1135 	  val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1136 	  val = trunc_int_for_mode (val, SImode);
1137 	  switch (which_logical_immediate (val))
1138 	  {
1139 	  case SPU_ORI:
1140 	  case SPU_IOHL:
1141 	    break;
1142 	  case SPU_ORHI:
1143 	    val = trunc_int_for_mode (val, HImode);
1144 	    break;
1145 	  case SPU_ORBI:
1146 	    val = trunc_int_for_mode (val, QImode);
1147 	    break;
1148 	  default:
1149 	    gcc_unreachable();
1150 	  }
1151 	  fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1152 	}
1153       else
1154 	gcc_unreachable();
1155       return;
1156 
1157     case 't':			/* 128 bits, signed */
1158     case 'd':			/* 64 bits, signed */
1159     case 's':			/* 32 bits, signed */
1160       if (CONSTANT_P (x))
1161 	{
1162 	  enum immediate_class c = classify_immediate (x, mode);
1163 	  switch (c)
1164 	    {
1165 	    case IC_IL1:
1166 	      constant_to_array (mode, x, arr);
1167 	      val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1168 	      val = trunc_int_for_mode (val, SImode);
1169 	      switch (which_immediate_load (val))
1170 		{
1171 		case SPU_IL:
1172 		  break;
1173 		case SPU_ILA:
1174 		  fprintf (file, "a");
1175 		  break;
1176 		case SPU_ILH:
1177 		  fprintf (file, "h");
1178 		  break;
1179 		case SPU_ILHU:
1180 		  fprintf (file, "hu");
1181 		  break;
1182 		default:
1183 		  gcc_unreachable ();
1184 		}
1185 	      break;
1186 	    case IC_CPAT:
1187 	      constant_to_array (mode, x, arr);
1188 	      cpat_info (arr, GET_MODE_SIZE (mode), &info, 0);
1189 	      if (info == 1)
1190 		fprintf (file, "b");
1191 	      else if (info == 2)
1192 		fprintf (file, "h");
1193 	      else if (info == 4)
1194 		fprintf (file, "w");
1195 	      else if (info == 8)
1196 		fprintf (file, "d");
1197 	      break;
1198 	    case IC_IL1s:
1199 	      if (xcode == CONST_VECTOR)
1200 		{
1201 		  x = CONST_VECTOR_ELT (x, 0);
1202 		  xcode = GET_CODE (x);
1203 		}
1204 	      if (xcode == SYMBOL_REF || xcode == LABEL_REF || xcode == CONST)
1205 		fprintf (file, "a");
1206 	      else if (xcode == HIGH)
1207 		fprintf (file, "hu");
1208 	      break;
1209 	    case IC_FSMBI:
1210 	    case IC_FSMBI2:
1211 	    case IC_IL2:
1212 	    case IC_IL2s:
1213 	    case IC_POOL:
1214 	      abort ();
1215 	    }
1216 	}
1217       else
1218 	gcc_unreachable ();
1219       return;
1220 
1221     case 'T':			/* 128 bits, signed */
1222     case 'D':			/* 64 bits, signed */
1223     case 'S':			/* 32 bits, signed */
1224       if (CONSTANT_P (x))
1225 	{
1226 	  enum immediate_class c = classify_immediate (x, mode);
1227 	  switch (c)
1228 	    {
1229 	    case IC_IL1:
1230 	      constant_to_array (mode, x, arr);
1231 	      val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1232 	      val = trunc_int_for_mode (val, SImode);
1233 	      switch (which_immediate_load (val))
1234 		{
1235 		case SPU_IL:
1236 		case SPU_ILA:
1237 		  break;
1238 		case SPU_ILH:
1239 		case SPU_ILHU:
1240 		  val = trunc_int_for_mode (((arr[0] << 8) | arr[1]), HImode);
1241 		  break;
1242 		default:
1243 		  gcc_unreachable ();
1244 		}
1245 	      fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1246 	      break;
1247 	    case IC_FSMBI:
1248 	      constant_to_array (mode, x, arr);
1249 	      val = 0;
1250 	      for (i = 0; i < 16; i++)
1251 		{
1252 		  val <<= 1;
1253 		  val |= arr[i] & 1;
1254 		}
1255 	      print_operand (file, GEN_INT (val), 0);
1256 	      break;
1257 	    case IC_CPAT:
1258 	      constant_to_array (mode, x, arr);
1259 	      cpat_info (arr, GET_MODE_SIZE (mode), 0, &info);
1260 	      fprintf (file, HOST_WIDE_INT_PRINT_DEC, (HOST_WIDE_INT)info);
1261 	      break;
1262 	    case IC_IL1s:
1263 	      if (xcode == HIGH)
1264 		x = XEXP (x, 0);
1265 	      if (GET_CODE (x) == CONST_VECTOR)
1266 		x = CONST_VECTOR_ELT (x, 0);
1267 	      output_addr_const (file, x);
1268 	      if (xcode == HIGH)
1269 		fprintf (file, "@h");
1270 	      break;
1271 	    case IC_IL2:
1272 	    case IC_IL2s:
1273 	    case IC_FSMBI2:
1274 	    case IC_POOL:
1275 	      abort ();
1276 	    }
1277 	}
1278       else
1279 	gcc_unreachable ();
1280       return;
1281 
1282     case 'C':
1283       if (xcode == CONST_INT)
1284 	{
1285 	  /* Only 4 least significant bits are relevant for generate
1286 	     control word instructions. */
1287 	  fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 15);
1288 	  return;
1289 	}
1290       break;
1291 
1292     case 'M':			/* print code for c*d */
1293       if (GET_CODE (x) == CONST_INT)
1294 	switch (INTVAL (x))
1295 	  {
1296 	  case 1:
1297 	    fprintf (file, "b");
1298 	    break;
1299 	  case 2:
1300 	    fprintf (file, "h");
1301 	    break;
1302 	  case 4:
1303 	    fprintf (file, "w");
1304 	    break;
1305 	  case 8:
1306 	    fprintf (file, "d");
1307 	    break;
1308 	  default:
1309 	    gcc_unreachable();
1310 	  }
1311       else
1312 	gcc_unreachable();
1313       return;
1314 
1315     case 'N':			/* Negate the operand */
1316       if (xcode == CONST_INT)
1317 	fprintf (file, HOST_WIDE_INT_PRINT_DEC, -INTVAL (x));
1318       else if (xcode == CONST_VECTOR)
1319 	fprintf (file, HOST_WIDE_INT_PRINT_DEC,
1320 		 -INTVAL (CONST_VECTOR_ELT (x, 0)));
1321       return;
1322 
1323     case 'I':			/* enable/disable interrupts */
1324       if (xcode == CONST_INT)
1325 	fprintf (file, "%s",  INTVAL (x) == 0 ? "d" : "e");
1326       return;
1327 
1328     case 'b':			/* branch modifiers */
1329       if (xcode == REG)
1330 	fprintf (file, "%s", GET_MODE (x) == HImode ? "h" : "");
1331       else if (COMPARISON_P (x))
1332 	fprintf (file, "%s", xcode == NE ? "n" : "");
1333       return;
1334 
1335     case 'i':			/* indirect call */
1336       if (xcode == MEM)
1337 	{
1338 	  if (GET_CODE (XEXP (x, 0)) == REG)
1339 	    /* Used in indirect function calls. */
1340 	    fprintf (file, "%s", reg_names[REGNO (XEXP (x, 0))]);
1341 	  else
1342 	    output_address (GET_MODE (x), XEXP (x, 0));
1343 	}
1344       return;
1345 
1346     case 'p':			/* load/store */
1347       if (xcode == MEM)
1348 	{
1349 	  x = XEXP (x, 0);
1350 	  xcode = GET_CODE (x);
1351 	}
1352       if (xcode == AND)
1353 	{
1354 	  x = XEXP (x, 0);
1355 	  xcode = GET_CODE (x);
1356 	}
1357       if (xcode == REG)
1358 	fprintf (file, "d");
1359       else if (xcode == CONST_INT)
1360 	fprintf (file, "a");
1361       else if (xcode == CONST || xcode == SYMBOL_REF || xcode == LABEL_REF)
1362 	fprintf (file, "r");
1363       else if (xcode == PLUS || xcode == LO_SUM)
1364 	{
1365 	  if (GET_CODE (XEXP (x, 1)) == REG)
1366 	    fprintf (file, "x");
1367 	  else
1368 	    fprintf (file, "d");
1369 	}
1370       return;
1371 
1372     case 'e':
1373       val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1374       val &= 0x7;
1375       output_addr_const (file, GEN_INT (val));
1376       return;
1377 
1378     case 'f':
1379       val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1380       val &= 0x1f;
1381       output_addr_const (file, GEN_INT (val));
1382       return;
1383 
1384     case 'g':
1385       val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1386       val &= 0x3f;
1387       output_addr_const (file, GEN_INT (val));
1388       return;
1389 
1390     case 'h':
1391       val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1392       val = (val >> 3) & 0x1f;
1393       output_addr_const (file, GEN_INT (val));
1394       return;
1395 
1396     case 'E':
1397       val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1398       val = -val;
1399       val &= 0x7;
1400       output_addr_const (file, GEN_INT (val));
1401       return;
1402 
1403     case 'F':
1404       val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1405       val = -val;
1406       val &= 0x1f;
1407       output_addr_const (file, GEN_INT (val));
1408       return;
1409 
1410     case 'G':
1411       val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1412       val = -val;
1413       val &= 0x3f;
1414       output_addr_const (file, GEN_INT (val));
1415       return;
1416 
1417     case 'H':
1418       val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1419       val = -(val & -8ll);
1420       val = (val >> 3) & 0x1f;
1421       output_addr_const (file, GEN_INT (val));
1422       return;
1423 
1424     case 'v':
1425     case 'w':
1426       constant_to_array (mode, x, arr);
1427       val = (((arr[0] << 1) + (arr[1] >> 7)) & 0xff) - 127;
1428       output_addr_const (file, GEN_INT (code == 'w' ? -val : val));
1429       return;
1430 
1431     case 0:
1432       if (xcode == REG)
1433 	fprintf (file, "%s", reg_names[REGNO (x)]);
1434       else if (xcode == MEM)
1435 	output_address (GET_MODE (x), XEXP (x, 0));
1436       else if (xcode == CONST_VECTOR)
1437 	print_operand (file, CONST_VECTOR_ELT (x, 0), 0);
1438       else
1439 	output_addr_const (file, x);
1440       return;
1441 
1442       /* unused letters
1443 	              o qr  u   yz
1444 	AB            OPQR  UVWXYZ */
1445     default:
1446       output_operand_lossage ("invalid %%xn code");
1447     }
1448   gcc_unreachable ();
1449 }
1450 
1451 /* For PIC mode we've reserved PIC_OFFSET_TABLE_REGNUM, which is a
1452    caller saved register.  For leaf functions it is more efficient to
1453    use a volatile register because we won't need to save and restore the
1454    pic register.  This routine is only valid after register allocation
1455    is completed, so we can pick an unused register.  */
1456 static rtx
get_pic_reg(void)1457 get_pic_reg (void)
1458 {
1459   if (!reload_completed && !reload_in_progress)
1460     abort ();
1461 
1462   /* If we've already made the decision, we need to keep with it.  Once we've
1463      decided to use LAST_ARG_REGNUM, future calls to df_regs_ever_live_p may
1464      return true since the register is now live; this should not cause us to
1465      "switch back" to using pic_offset_table_rtx.  */
1466   if (!cfun->machine->pic_reg)
1467     {
1468       if (crtl->is_leaf && !df_regs_ever_live_p (LAST_ARG_REGNUM))
1469 	cfun->machine->pic_reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
1470       else
1471 	cfun->machine->pic_reg = pic_offset_table_rtx;
1472     }
1473 
1474   return cfun->machine->pic_reg;
1475 }
1476 
1477 /* Split constant addresses to handle cases that are too large.
1478    Add in the pic register when in PIC mode.
1479    Split immediates that require more than 1 instruction. */
1480 int
spu_split_immediate(rtx * ops)1481 spu_split_immediate (rtx * ops)
1482 {
1483   machine_mode mode = GET_MODE (ops[0]);
1484   enum immediate_class c = classify_immediate (ops[1], mode);
1485 
1486   switch (c)
1487     {
1488     case IC_IL2:
1489       {
1490 	unsigned char arrhi[16];
1491 	unsigned char arrlo[16];
1492 	rtx to, temp, hi, lo;
1493 	int i;
1494 	machine_mode imode = mode;
1495 	/* We need to do reals as ints because the constant used in the
1496 	   IOR might not be a legitimate real constant. */
1497 	imode = int_mode_for_mode (mode);
1498 	constant_to_array (mode, ops[1], arrhi);
1499 	if (imode != mode)
1500 	  to = simplify_gen_subreg (imode, ops[0], mode, 0);
1501 	else
1502 	  to = ops[0];
1503 	temp = !can_create_pseudo_p () ? to : gen_reg_rtx (imode);
1504 	for (i = 0; i < 16; i += 4)
1505 	  {
1506 	    arrlo[i + 2] = arrhi[i + 2];
1507 	    arrlo[i + 3] = arrhi[i + 3];
1508 	    arrlo[i + 0] = arrlo[i + 1] = 0;
1509 	    arrhi[i + 2] = arrhi[i + 3] = 0;
1510 	  }
1511 	hi = array_to_constant (imode, arrhi);
1512 	lo = array_to_constant (imode, arrlo);
1513 	emit_move_insn (temp, hi);
1514 	emit_insn (gen_rtx_SET (to, gen_rtx_IOR (imode, temp, lo)));
1515 	return 1;
1516       }
1517     case IC_FSMBI2:
1518       {
1519 	unsigned char arr_fsmbi[16];
1520 	unsigned char arr_andbi[16];
1521 	rtx to, reg_fsmbi, reg_and;
1522 	int i;
1523 	machine_mode imode = mode;
1524 	/* We need to do reals as ints because the constant used in the
1525 	 * AND might not be a legitimate real constant. */
1526 	imode = int_mode_for_mode (mode);
1527 	constant_to_array (mode, ops[1], arr_fsmbi);
1528 	if (imode != mode)
1529 	  to = simplify_gen_subreg(imode, ops[0], GET_MODE (ops[0]), 0);
1530 	else
1531 	  to = ops[0];
1532 	for (i = 0; i < 16; i++)
1533 	  if (arr_fsmbi[i] != 0)
1534 	    {
1535 	      arr_andbi[0] = arr_fsmbi[i];
1536 	      arr_fsmbi[i] = 0xff;
1537 	    }
1538 	for (i = 1; i < 16; i++)
1539 	  arr_andbi[i] = arr_andbi[0];
1540 	reg_fsmbi = array_to_constant (imode, arr_fsmbi);
1541 	reg_and = array_to_constant (imode, arr_andbi);
1542 	emit_move_insn (to, reg_fsmbi);
1543 	emit_insn (gen_rtx_SET (to, gen_rtx_AND (imode, to, reg_and)));
1544 	return 1;
1545       }
1546     case IC_POOL:
1547       if (reload_in_progress || reload_completed)
1548 	{
1549 	  rtx mem = force_const_mem (mode, ops[1]);
1550 	  if (TARGET_LARGE_MEM)
1551 	    {
1552 	      rtx addr = gen_rtx_REG (Pmode, REGNO (ops[0]));
1553 	      emit_move_insn (addr, XEXP (mem, 0));
1554 	      mem = replace_equiv_address (mem, addr);
1555 	    }
1556 	  emit_move_insn (ops[0], mem);
1557 	  return 1;
1558 	}
1559       break;
1560     case IC_IL1s:
1561     case IC_IL2s:
1562       if (reload_completed && GET_CODE (ops[1]) != HIGH)
1563 	{
1564 	  if (c == IC_IL2s)
1565 	    {
1566 	      emit_move_insn (ops[0], gen_rtx_HIGH (mode, ops[1]));
1567 	      emit_move_insn (ops[0], gen_rtx_LO_SUM (mode, ops[0], ops[1]));
1568 	    }
1569 	  else if (flag_pic)
1570 	    emit_insn (gen_pic (ops[0], ops[1]));
1571 	  if (flag_pic)
1572 	    {
1573 	      rtx pic_reg = get_pic_reg ();
1574 	      emit_insn (gen_addsi3 (ops[0], ops[0], pic_reg));
1575 	    }
1576 	  return flag_pic || c == IC_IL2s;
1577 	}
1578       break;
1579     case IC_IL1:
1580     case IC_FSMBI:
1581     case IC_CPAT:
1582       break;
1583     }
1584   return 0;
1585 }
1586 
1587 /* SAVING is TRUE when we are generating the actual load and store
1588    instructions for REGNO.  When determining the size of the stack
1589    needed for saving register we must allocate enough space for the
1590    worst case, because we don't always have the information early enough
1591    to not allocate it.  But we can at least eliminate the actual loads
1592    and stores during the prologue/epilogue.  */
1593 static int
need_to_save_reg(int regno,int saving)1594 need_to_save_reg (int regno, int saving)
1595 {
1596   if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
1597     return 1;
1598   if (flag_pic
1599       && regno == PIC_OFFSET_TABLE_REGNUM
1600       && (!saving || cfun->machine->pic_reg == pic_offset_table_rtx))
1601     return 1;
1602   return 0;
1603 }
1604 
1605 /* This function is only correct starting with local register
1606    allocation */
1607 int
spu_saved_regs_size(void)1608 spu_saved_regs_size (void)
1609 {
1610   int reg_save_size = 0;
1611   int regno;
1612 
1613   for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; --regno)
1614     if (need_to_save_reg (regno, 0))
1615       reg_save_size += 0x10;
1616   return reg_save_size;
1617 }
1618 
1619 static rtx_insn *
frame_emit_store(int regno,rtx addr,HOST_WIDE_INT offset)1620 frame_emit_store (int regno, rtx addr, HOST_WIDE_INT offset)
1621 {
1622   rtx reg = gen_rtx_REG (V4SImode, regno);
1623   rtx mem =
1624     gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1625   return emit_insn (gen_movv4si (mem, reg));
1626 }
1627 
1628 static rtx_insn *
frame_emit_load(int regno,rtx addr,HOST_WIDE_INT offset)1629 frame_emit_load (int regno, rtx addr, HOST_WIDE_INT offset)
1630 {
1631   rtx reg = gen_rtx_REG (V4SImode, regno);
1632   rtx mem =
1633     gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1634   return emit_insn (gen_movv4si (reg, mem));
1635 }
1636 
1637 /* This happens after reload, so we need to expand it.  */
1638 static rtx_insn *
frame_emit_add_imm(rtx dst,rtx src,HOST_WIDE_INT imm,rtx scratch)1639 frame_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm, rtx scratch)
1640 {
1641   rtx_insn *insn;
1642   if (satisfies_constraint_K (GEN_INT (imm)))
1643     {
1644       insn = emit_insn (gen_addsi3 (dst, src, GEN_INT (imm)));
1645     }
1646   else
1647     {
1648       emit_insn (gen_movsi (scratch, gen_int_mode (imm, SImode)));
1649       insn = emit_insn (gen_addsi3 (dst, src, scratch));
1650       if (REGNO (src) == REGNO (scratch))
1651 	abort ();
1652     }
1653   return insn;
1654 }
1655 
1656 /* Return nonzero if this function is known to have a null epilogue.  */
1657 
1658 int
direct_return(void)1659 direct_return (void)
1660 {
1661   if (reload_completed)
1662     {
1663       if (cfun->static_chain_decl == 0
1664 	  && (spu_saved_regs_size ()
1665 	      + get_frame_size ()
1666 	      + crtl->outgoing_args_size
1667 	      + crtl->args.pretend_args_size == 0)
1668 	  && crtl->is_leaf)
1669 	return 1;
1670     }
1671   return 0;
1672 }
1673 
1674 /*
1675    The stack frame looks like this:
1676          +-------------+
1677          |  incoming   |
1678          |    args     |
1679    AP -> +-------------+
1680          | $lr save    |
1681          +-------------+
1682  prev SP | back chain  |
1683          +-------------+
1684          |  var args   |
1685          |  reg save   | crtl->args.pretend_args_size bytes
1686          +-------------+
1687          |    ...      |
1688          | saved regs  | spu_saved_regs_size() bytes
1689    FP -> +-------------+
1690          |    ...      |
1691          |   vars      | get_frame_size()  bytes
1692   HFP -> +-------------+
1693          |    ...      |
1694          |  outgoing   |
1695          |    args     | crtl->outgoing_args_size bytes
1696          +-------------+
1697          | $lr of next |
1698          |   frame     |
1699          +-------------+
1700          | back chain  |
1701    SP -> +-------------+
1702 
1703 */
1704 void
spu_expand_prologue(void)1705 spu_expand_prologue (void)
1706 {
1707   HOST_WIDE_INT size = get_frame_size (), offset, regno;
1708   HOST_WIDE_INT total_size;
1709   HOST_WIDE_INT saved_regs_size;
1710   rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
1711   rtx scratch_reg_0, scratch_reg_1;
1712   rtx_insn *insn;
1713   rtx real;
1714 
1715   if (flag_pic && optimize == 0 && !cfun->machine->pic_reg)
1716     cfun->machine->pic_reg = pic_offset_table_rtx;
1717 
1718   if (spu_naked_function_p (current_function_decl))
1719     return;
1720 
1721   scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
1722   scratch_reg_1 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 2);
1723 
1724   saved_regs_size = spu_saved_regs_size ();
1725   total_size = size + saved_regs_size
1726     + crtl->outgoing_args_size
1727     + crtl->args.pretend_args_size;
1728 
1729   if (!crtl->is_leaf
1730       || cfun->calls_alloca || total_size > 0)
1731     total_size += STACK_POINTER_OFFSET;
1732 
1733   /* Save this first because code after this might use the link
1734      register as a scratch register. */
1735   if (!crtl->is_leaf)
1736     {
1737       insn = frame_emit_store (LINK_REGISTER_REGNUM, sp_reg, 16);
1738       RTX_FRAME_RELATED_P (insn) = 1;
1739     }
1740 
1741   if (total_size > 0)
1742     {
1743       offset = -crtl->args.pretend_args_size;
1744       for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
1745 	if (need_to_save_reg (regno, 1))
1746 	  {
1747 	    offset -= 16;
1748 	    insn = frame_emit_store (regno, sp_reg, offset);
1749 	    RTX_FRAME_RELATED_P (insn) = 1;
1750 	  }
1751     }
1752 
1753   if (flag_pic && cfun->machine->pic_reg)
1754     {
1755       rtx pic_reg = cfun->machine->pic_reg;
1756       insn = emit_insn (gen_load_pic_offset (pic_reg, scratch_reg_0));
1757       insn = emit_insn (gen_subsi3 (pic_reg, pic_reg, scratch_reg_0));
1758     }
1759 
1760   if (total_size > 0)
1761     {
1762       if (flag_stack_check)
1763 	{
1764 	  /* We compare against total_size-1 because
1765 	     ($sp >= total_size) <=> ($sp > total_size-1) */
1766 	  rtx scratch_v4si = gen_rtx_REG (V4SImode, REGNO (scratch_reg_0));
1767 	  rtx sp_v4si = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
1768 	  rtx size_v4si = spu_const (V4SImode, total_size - 1);
1769 	  if (!satisfies_constraint_K (GEN_INT (total_size - 1)))
1770 	    {
1771 	      emit_move_insn (scratch_v4si, size_v4si);
1772 	      size_v4si = scratch_v4si;
1773 	    }
1774 	  emit_insn (gen_cgt_v4si (scratch_v4si, sp_v4si, size_v4si));
1775 	  emit_insn (gen_vec_extractv4si
1776 		     (scratch_reg_0, scratch_v4si, GEN_INT (1)));
1777 	  emit_insn (gen_spu_heq (scratch_reg_0, GEN_INT (0)));
1778 	}
1779 
1780       /* Adjust the stack pointer, and make sure scratch_reg_0 contains
1781          the value of the previous $sp because we save it as the back
1782          chain. */
1783       if (total_size <= 2000)
1784 	{
1785 	  /* In this case we save the back chain first. */
1786 	  insn = frame_emit_store (STACK_POINTER_REGNUM, sp_reg, -total_size);
1787 	  insn =
1788 	    frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_0);
1789 	}
1790       else
1791 	{
1792 	  insn = emit_move_insn (scratch_reg_0, sp_reg);
1793 	  insn =
1794 	    frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_1);
1795 	}
1796       RTX_FRAME_RELATED_P (insn) = 1;
1797       real = gen_addsi3 (sp_reg, sp_reg, GEN_INT (-total_size));
1798       add_reg_note (insn, REG_FRAME_RELATED_EXPR, real);
1799 
1800       if (total_size > 2000)
1801 	{
1802 	  /* Save the back chain ptr */
1803 	  insn = frame_emit_store (REGNO (scratch_reg_0), sp_reg, 0);
1804 	}
1805 
1806       if (frame_pointer_needed)
1807 	{
1808 	  rtx fp_reg = gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM);
1809 	  HOST_WIDE_INT fp_offset = STACK_POINTER_OFFSET
1810 	    + crtl->outgoing_args_size;
1811 	  /* Set the new frame_pointer */
1812 	  insn = frame_emit_add_imm (fp_reg, sp_reg, fp_offset, scratch_reg_0);
1813 	  RTX_FRAME_RELATED_P (insn) = 1;
1814 	  real = gen_addsi3 (fp_reg, sp_reg, GEN_INT (fp_offset));
1815 	  add_reg_note (insn, REG_FRAME_RELATED_EXPR, real);
1816           REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = STACK_BOUNDARY;
1817 	}
1818     }
1819 
1820   if (flag_stack_usage_info)
1821     current_function_static_stack_size = total_size;
1822 }
1823 
1824 void
spu_expand_epilogue(bool sibcall_p)1825 spu_expand_epilogue (bool sibcall_p)
1826 {
1827   int size = get_frame_size (), offset, regno;
1828   HOST_WIDE_INT saved_regs_size, total_size;
1829   rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
1830   rtx scratch_reg_0;
1831 
1832   if (spu_naked_function_p (current_function_decl))
1833     return;
1834 
1835   scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
1836 
1837   saved_regs_size = spu_saved_regs_size ();
1838   total_size = size + saved_regs_size
1839     + crtl->outgoing_args_size
1840     + crtl->args.pretend_args_size;
1841 
1842   if (!crtl->is_leaf
1843       || cfun->calls_alloca || total_size > 0)
1844     total_size += STACK_POINTER_OFFSET;
1845 
1846   if (total_size > 0)
1847     {
1848       if (cfun->calls_alloca)
1849 	frame_emit_load (STACK_POINTER_REGNUM, sp_reg, 0);
1850       else
1851 	frame_emit_add_imm (sp_reg, sp_reg, total_size, scratch_reg_0);
1852 
1853 
1854       if (saved_regs_size > 0)
1855 	{
1856 	  offset = -crtl->args.pretend_args_size;
1857 	  for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
1858 	    if (need_to_save_reg (regno, 1))
1859 	      {
1860 		offset -= 0x10;
1861 		frame_emit_load (regno, sp_reg, offset);
1862 	      }
1863 	}
1864     }
1865 
1866   if (!crtl->is_leaf)
1867     frame_emit_load (LINK_REGISTER_REGNUM, sp_reg, 16);
1868 
1869   if (!sibcall_p)
1870     {
1871       emit_use (gen_rtx_REG (SImode, LINK_REGISTER_REGNUM));
1872       emit_jump_insn (gen__return ());
1873     }
1874 }
1875 
1876 rtx
spu_return_addr(int count,rtx frame ATTRIBUTE_UNUSED)1877 spu_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
1878 {
1879   if (count != 0)
1880     return 0;
1881   /* This is inefficient because it ends up copying to a save-register
1882      which then gets saved even though $lr has already been saved.  But
1883      it does generate better code for leaf functions and we don't need
1884      to use RETURN_ADDRESS_POINTER_REGNUM to get it working.  It's only
1885      used for __builtin_return_address anyway, so maybe we don't care if
1886      it's inefficient. */
1887   return get_hard_reg_initial_val (Pmode, LINK_REGISTER_REGNUM);
1888 }
1889 
1890 
1891 /* Given VAL, generate a constant appropriate for MODE.
1892    If MODE is a vector mode, every element will be VAL.
1893    For TImode, VAL will be zero extended to 128 bits. */
1894 rtx
spu_const(machine_mode mode,HOST_WIDE_INT val)1895 spu_const (machine_mode mode, HOST_WIDE_INT val)
1896 {
1897   rtx inner;
1898   rtvec v;
1899   int units, i;
1900 
1901   gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
1902 	      || GET_MODE_CLASS (mode) == MODE_FLOAT
1903 	      || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
1904 	      || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT);
1905 
1906   if (GET_MODE_CLASS (mode) == MODE_INT)
1907     return immed_double_const (val, 0, mode);
1908 
1909   /* val is the bit representation of the float */
1910   if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1911     return hwint_to_const_double (mode, val);
1912 
1913   if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
1914     inner = immed_double_const (val, 0, GET_MODE_INNER (mode));
1915   else
1916     inner = hwint_to_const_double (GET_MODE_INNER (mode), val);
1917 
1918   units = GET_MODE_NUNITS (mode);
1919 
1920   v = rtvec_alloc (units);
1921 
1922   for (i = 0; i < units; ++i)
1923     RTVEC_ELT (v, i) = inner;
1924 
1925   return gen_rtx_CONST_VECTOR (mode, v);
1926 }
1927 
1928 /* Create a MODE vector constant from 4 ints. */
1929 rtx
spu_const_from_ints(machine_mode mode,int a,int b,int c,int d)1930 spu_const_from_ints(machine_mode mode, int a, int b, int c, int d)
1931 {
1932   unsigned char arr[16];
1933   arr[0] = (a >> 24) & 0xff;
1934   arr[1] = (a >> 16) & 0xff;
1935   arr[2] = (a >> 8) & 0xff;
1936   arr[3] = (a >> 0) & 0xff;
1937   arr[4] = (b >> 24) & 0xff;
1938   arr[5] = (b >> 16) & 0xff;
1939   arr[6] = (b >> 8) & 0xff;
1940   arr[7] = (b >> 0) & 0xff;
1941   arr[8] = (c >> 24) & 0xff;
1942   arr[9] = (c >> 16) & 0xff;
1943   arr[10] = (c >> 8) & 0xff;
1944   arr[11] = (c >> 0) & 0xff;
1945   arr[12] = (d >> 24) & 0xff;
1946   arr[13] = (d >> 16) & 0xff;
1947   arr[14] = (d >> 8) & 0xff;
1948   arr[15] = (d >> 0) & 0xff;
1949   return array_to_constant(mode, arr);
1950 }
1951 
1952 /* branch hint stuff */
1953 
1954 /* An array of these is used to propagate hints to predecessor blocks. */
1955 struct spu_bb_info
1956 {
1957   rtx_insn *prop_jump; /* propagated from another block */
1958   int bb_index;  /* the original block. */
1959 };
1960 static struct spu_bb_info *spu_bb_info;
1961 
1962 #define STOP_HINT_P(INSN) \
1963 		(CALL_P(INSN) \
1964 		 || INSN_CODE(INSN) == CODE_FOR_divmodsi4 \
1965 		 || INSN_CODE(INSN) == CODE_FOR_udivmodsi4)
1966 
1967 /* 1 when RTX is a hinted branch or its target.  We keep track of
1968    what has been hinted so the safe-hint code can test it easily.  */
1969 #define HINTED_P(RTX)						\
1970   (RTL_FLAG_CHECK3("HINTED_P", (RTX), CODE_LABEL, JUMP_INSN, CALL_INSN)->unchanging)
1971 
1972 /* 1 when RTX is an insn that must be scheduled on an even boundary. */
1973 #define SCHED_ON_EVEN_P(RTX)						\
1974   (RTL_FLAG_CHECK2("SCHED_ON_EVEN_P", (RTX), JUMP_INSN, CALL_INSN)->in_struct)
1975 
1976 /* Emit a nop for INSN such that the two will dual issue.  This assumes
1977    INSN is 8-byte aligned.  When INSN is inline asm we emit an lnop.
1978    We check for TImode to handle a MULTI1 insn which has dual issued its
1979    first instruction.  get_pipe returns -1 for MULTI0 or inline asm.  */
1980 static void
emit_nop_for_insn(rtx_insn * insn)1981 emit_nop_for_insn (rtx_insn *insn)
1982 {
1983   int p;
1984   rtx_insn *new_insn;
1985 
1986   /* We need to handle JUMP_TABLE_DATA separately.  */
1987   if (JUMP_TABLE_DATA_P (insn))
1988     {
1989       new_insn = emit_insn_after (gen_lnop(), insn);
1990       recog_memoized (new_insn);
1991       INSN_LOCATION (new_insn) = UNKNOWN_LOCATION;
1992       return;
1993     }
1994 
1995   p = get_pipe (insn);
1996   if ((CALL_P (insn) || JUMP_P (insn)) && SCHED_ON_EVEN_P (insn))
1997     new_insn = emit_insn_after (gen_lnop (), insn);
1998   else if (p == 1 && GET_MODE (insn) == TImode)
1999     {
2000       new_insn = emit_insn_before (gen_nopn (GEN_INT (127)), insn);
2001       PUT_MODE (new_insn, TImode);
2002       PUT_MODE (insn, VOIDmode);
2003     }
2004   else
2005     new_insn = emit_insn_after (gen_lnop (), insn);
2006   recog_memoized (new_insn);
2007   INSN_LOCATION (new_insn) = INSN_LOCATION (insn);
2008 }
2009 
2010 /* Insert nops in basic blocks to meet dual issue alignment
2011    requirements.  Also make sure hbrp and hint instructions are at least
2012    one cycle apart, possibly inserting a nop.  */
2013 static void
pad_bb(void)2014 pad_bb(void)
2015 {
2016   rtx_insn *insn, *next_insn, *prev_insn, *hbr_insn = 0;
2017   int length;
2018   int addr;
2019 
2020   /* This sets up INSN_ADDRESSES. */
2021   shorten_branches (get_insns ());
2022 
2023   /* Keep track of length added by nops. */
2024   length = 0;
2025 
2026   prev_insn = 0;
2027   insn = get_insns ();
2028   if (!active_insn_p (insn))
2029     insn = next_active_insn (insn);
2030   for (; insn; insn = next_insn)
2031     {
2032       next_insn = next_active_insn (insn);
2033       if (INSN_CODE (insn) == CODE_FOR_iprefetch
2034 	  || INSN_CODE (insn) == CODE_FOR_hbr)
2035 	{
2036 	  if (hbr_insn)
2037 	    {
2038 	      int a0 = INSN_ADDRESSES (INSN_UID (hbr_insn));
2039 	      int a1 = INSN_ADDRESSES (INSN_UID (insn));
2040 	      if ((a1 - a0 == 8 && GET_MODE (insn) != TImode)
2041 		  || (a1 - a0 == 4))
2042 		{
2043 		  prev_insn = emit_insn_before (gen_lnop (), insn);
2044 		  PUT_MODE (prev_insn, GET_MODE (insn));
2045 		  PUT_MODE (insn, TImode);
2046 		  INSN_LOCATION (prev_insn) = INSN_LOCATION (insn);
2047 		  length += 4;
2048 		}
2049 	    }
2050 	  hbr_insn = insn;
2051 	}
2052       if (INSN_CODE (insn) == CODE_FOR_blockage && next_insn)
2053 	{
2054 	  if (GET_MODE (insn) == TImode)
2055 	    PUT_MODE (next_insn, TImode);
2056 	  insn = next_insn;
2057 	  next_insn = next_active_insn (insn);
2058 	}
2059       addr = INSN_ADDRESSES (INSN_UID (insn));
2060       if ((CALL_P (insn) || JUMP_P (insn)) && SCHED_ON_EVEN_P (insn))
2061 	{
2062 	  if (((addr + length) & 7) != 0)
2063 	    {
2064 	      emit_nop_for_insn (prev_insn);
2065 	      length += 4;
2066 	    }
2067 	}
2068       else if (GET_MODE (insn) == TImode
2069 	       && ((next_insn && GET_MODE (next_insn) != TImode)
2070 		   || get_attr_type (insn) == TYPE_MULTI0)
2071 	       && ((addr + length) & 7) != 0)
2072 	{
2073 	  /* prev_insn will always be set because the first insn is
2074 	     always 8-byte aligned. */
2075 	  emit_nop_for_insn (prev_insn);
2076 	  length += 4;
2077 	}
2078       prev_insn = insn;
2079     }
2080 }
2081 
2082 
2083 /* Routines for branch hints. */
2084 
2085 static void
spu_emit_branch_hint(rtx_insn * before,rtx_insn * branch,rtx target,int distance,sbitmap blocks)2086 spu_emit_branch_hint (rtx_insn *before, rtx_insn *branch, rtx target,
2087 		      int distance, sbitmap blocks)
2088 {
2089   rtx branch_label = 0;
2090   rtx_insn *hint;
2091   rtx_insn *insn;
2092   rtx_jump_table_data *table;
2093 
2094   if (before == 0 || branch == 0 || target == 0)
2095     return;
2096 
2097   /* While scheduling we require hints to be no further than 600, so
2098      we need to enforce that here too */
2099   if (distance > 600)
2100     return;
2101 
2102   /* If we have a Basic block note, emit it after the basic block note.  */
2103   if (NOTE_INSN_BASIC_BLOCK_P (before))
2104     before = NEXT_INSN (before);
2105 
2106   branch_label = gen_label_rtx ();
2107   LABEL_NUSES (branch_label)++;
2108   LABEL_PRESERVE_P (branch_label) = 1;
2109   insn = emit_label_before (branch_label, branch);
2110   branch_label = gen_rtx_LABEL_REF (VOIDmode, branch_label);
2111   bitmap_set_bit (blocks, BLOCK_FOR_INSN (branch)->index);
2112 
2113   hint = emit_insn_before (gen_hbr (branch_label, target), before);
2114   recog_memoized (hint);
2115   INSN_LOCATION (hint) = INSN_LOCATION (branch);
2116   HINTED_P (branch) = 1;
2117 
2118   if (GET_CODE (target) == LABEL_REF)
2119     HINTED_P (XEXP (target, 0)) = 1;
2120   else if (tablejump_p (branch, 0, &table))
2121     {
2122       rtvec vec;
2123       int j;
2124       if (GET_CODE (PATTERN (table)) == ADDR_VEC)
2125 	vec = XVEC (PATTERN (table), 0);
2126       else
2127 	vec = XVEC (PATTERN (table), 1);
2128       for (j = GET_NUM_ELEM (vec) - 1; j >= 0; --j)
2129 	HINTED_P (XEXP (RTVEC_ELT (vec, j), 0)) = 1;
2130     }
2131 
2132   if (distance >= 588)
2133     {
2134       /* Make sure the hint isn't scheduled any earlier than this point,
2135          which could make it too far for the branch offest to fit */
2136       insn = emit_insn_before (gen_blockage (), hint);
2137       recog_memoized (insn);
2138       INSN_LOCATION (insn) = INSN_LOCATION (hint);
2139     }
2140   else if (distance <= 8 * 4)
2141     {
2142       /* To guarantee at least 8 insns between the hint and branch we
2143          insert nops. */
2144       int d;
2145       for (d = distance; d < 8 * 4; d += 4)
2146 	{
2147 	  insn =
2148 	    emit_insn_after (gen_nopn_nv (gen_rtx_REG (SImode, 127)), hint);
2149 	  recog_memoized (insn);
2150 	  INSN_LOCATION (insn) = INSN_LOCATION (hint);
2151 	}
2152 
2153       /* Make sure any nops inserted aren't scheduled before the hint. */
2154       insn = emit_insn_after (gen_blockage (), hint);
2155       recog_memoized (insn);
2156       INSN_LOCATION (insn) = INSN_LOCATION (hint);
2157 
2158       /* Make sure any nops inserted aren't scheduled after the call. */
2159       if (CALL_P (branch) && distance < 8 * 4)
2160 	{
2161 	  insn = emit_insn_before (gen_blockage (), branch);
2162 	  recog_memoized (insn);
2163 	  INSN_LOCATION (insn) = INSN_LOCATION (branch);
2164 	}
2165     }
2166 }
2167 
2168 /* Returns 0 if we don't want a hint for this branch.  Otherwise return
2169    the rtx for the branch target. */
2170 static rtx
get_branch_target(rtx_insn * branch)2171 get_branch_target (rtx_insn *branch)
2172 {
2173   if (JUMP_P (branch))
2174     {
2175       rtx set, src;
2176 
2177       /* Return statements */
2178       if (GET_CODE (PATTERN (branch)) == RETURN)
2179 	return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
2180 
2181      /* ASM GOTOs. */
2182      if (extract_asm_operands (PATTERN (branch)) != NULL)
2183 	return NULL;
2184 
2185       set = single_set (branch);
2186       src = SET_SRC (set);
2187       if (GET_CODE (SET_DEST (set)) != PC)
2188 	abort ();
2189 
2190       if (GET_CODE (src) == IF_THEN_ELSE)
2191 	{
2192 	  rtx lab = 0;
2193 	  rtx note = find_reg_note (branch, REG_BR_PROB, 0);
2194 	  if (note)
2195 	    {
2196 	      /* If the more probable case is not a fall through, then
2197 	         try a branch hint.  */
2198 	      int prob = XINT (note, 0);
2199 	      if (prob > (REG_BR_PROB_BASE * 6 / 10)
2200 		  && GET_CODE (XEXP (src, 1)) != PC)
2201 		lab = XEXP (src, 1);
2202 	      else if (prob < (REG_BR_PROB_BASE * 4 / 10)
2203 		       && GET_CODE (XEXP (src, 2)) != PC)
2204 		lab = XEXP (src, 2);
2205 	    }
2206 	  if (lab)
2207 	    {
2208 	      if (GET_CODE (lab) == RETURN)
2209 		return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
2210 	      return lab;
2211 	    }
2212 	  return 0;
2213 	}
2214 
2215       return src;
2216     }
2217   else if (CALL_P (branch))
2218     {
2219       rtx call;
2220       /* All of our call patterns are in a PARALLEL and the CALL is
2221          the first pattern in the PARALLEL. */
2222       if (GET_CODE (PATTERN (branch)) != PARALLEL)
2223 	abort ();
2224       call = XVECEXP (PATTERN (branch), 0, 0);
2225       if (GET_CODE (call) == SET)
2226 	call = SET_SRC (call);
2227       if (GET_CODE (call) != CALL)
2228 	abort ();
2229       return XEXP (XEXP (call, 0), 0);
2230     }
2231   return 0;
2232 }
2233 
2234 /* The special $hbr register is used to prevent the insn scheduler from
2235    moving hbr insns across instructions which invalidate them.  It
2236    should only be used in a clobber, and this function searches for
2237    insns which clobber it.  */
2238 static bool
insn_clobbers_hbr(rtx_insn * insn)2239 insn_clobbers_hbr (rtx_insn *insn)
2240 {
2241   if (INSN_P (insn)
2242       && GET_CODE (PATTERN (insn)) == PARALLEL)
2243     {
2244       rtx parallel = PATTERN (insn);
2245       rtx clobber;
2246       int j;
2247       for (j = XVECLEN (parallel, 0) - 1; j >= 0; j--)
2248 	{
2249 	  clobber = XVECEXP (parallel, 0, j);
2250 	  if (GET_CODE (clobber) == CLOBBER
2251 	      && GET_CODE (XEXP (clobber, 0)) == REG
2252 	      && REGNO (XEXP (clobber, 0)) == HBR_REGNUM)
2253 	    return 1;
2254 	}
2255     }
2256   return 0;
2257 }
2258 
2259 /* Search up to 32 insns starting at FIRST:
2260    - at any kind of hinted branch, just return
2261    - at any unconditional branch in the first 15 insns, just return
2262    - at a call or indirect branch, after the first 15 insns, force it to
2263      an even address and return
2264    - at any unconditional branch, after the first 15 insns, force it to
2265      an even address.
2266    At then end of the search, insert an hbrp within 4 insns of FIRST,
2267    and an hbrp within 16 instructions of FIRST.
2268  */
2269 static void
insert_hbrp_for_ilb_runout(rtx_insn * first)2270 insert_hbrp_for_ilb_runout (rtx_insn *first)
2271 {
2272   rtx_insn *insn, *before_4 = 0, *before_16 = 0;
2273   int addr = 0, length, first_addr = -1;
2274   int hbrp_addr0 = 128 * 4, hbrp_addr1 = 128 * 4;
2275   int insert_lnop_after = 0;
2276   for (insn = first; insn; insn = NEXT_INSN (insn))
2277     if (INSN_P (insn))
2278       {
2279 	if (first_addr == -1)
2280 	  first_addr = INSN_ADDRESSES (INSN_UID (insn));
2281 	addr = INSN_ADDRESSES (INSN_UID (insn)) - first_addr;
2282 	length = get_attr_length (insn);
2283 
2284 	if (before_4 == 0 && addr + length >= 4 * 4)
2285 	  before_4 = insn;
2286 	/* We test for 14 instructions because the first hbrp will add
2287 	   up to 2 instructions. */
2288 	if (before_16 == 0 && addr + length >= 14 * 4)
2289 	  before_16 = insn;
2290 
2291 	if (INSN_CODE (insn) == CODE_FOR_hbr)
2292 	  {
2293 	    /* Make sure an hbrp is at least 2 cycles away from a hint.
2294 	       Insert an lnop after the hbrp when necessary. */
2295 	    if (before_4 == 0 && addr > 0)
2296 	      {
2297 		before_4 = insn;
2298 		insert_lnop_after |= 1;
2299 	      }
2300 	    else if (before_4 && addr <= 4 * 4)
2301 	      insert_lnop_after |= 1;
2302 	    if (before_16 == 0 && addr > 10 * 4)
2303 	      {
2304 		before_16 = insn;
2305 		insert_lnop_after |= 2;
2306 	      }
2307 	    else if (before_16 && addr <= 14 * 4)
2308 	      insert_lnop_after |= 2;
2309 	  }
2310 
2311 	if (INSN_CODE (insn) == CODE_FOR_iprefetch)
2312 	  {
2313 	    if (addr < hbrp_addr0)
2314 	      hbrp_addr0 = addr;
2315 	    else if (addr < hbrp_addr1)
2316 	      hbrp_addr1 = addr;
2317 	  }
2318 
2319 	if (CALL_P (insn) || JUMP_P (insn))
2320 	  {
2321 	    if (HINTED_P (insn))
2322 	      return;
2323 
2324 	    /* Any branch after the first 15 insns should be on an even
2325 	       address to avoid a special case branch.  There might be
2326 	       some nops and/or hbrps inserted, so we test after 10
2327 	       insns. */
2328 	    if (addr > 10 * 4)
2329 	      SCHED_ON_EVEN_P (insn) = 1;
2330 	  }
2331 
2332 	if (CALL_P (insn) || tablejump_p (insn, 0, 0))
2333 	  return;
2334 
2335 
2336 	if (addr + length >= 32 * 4)
2337 	  {
2338 	    gcc_assert (before_4 && before_16);
2339 	    if (hbrp_addr0 > 4 * 4)
2340 	      {
2341 		insn =
2342 		  emit_insn_before (gen_iprefetch (GEN_INT (1)), before_4);
2343 		recog_memoized (insn);
2344 		INSN_LOCATION (insn) = INSN_LOCATION (before_4);
2345 		INSN_ADDRESSES_NEW (insn,
2346 				    INSN_ADDRESSES (INSN_UID (before_4)));
2347 		PUT_MODE (insn, GET_MODE (before_4));
2348 		PUT_MODE (before_4, TImode);
2349 		if (insert_lnop_after & 1)
2350 		  {
2351 		    insn = emit_insn_before (gen_lnop (), before_4);
2352 		    recog_memoized (insn);
2353 		    INSN_LOCATION (insn) = INSN_LOCATION (before_4);
2354 		    INSN_ADDRESSES_NEW (insn,
2355 					INSN_ADDRESSES (INSN_UID (before_4)));
2356 		    PUT_MODE (insn, TImode);
2357 		  }
2358 	      }
2359 	    if ((hbrp_addr0 <= 4 * 4 || hbrp_addr0 > 16 * 4)
2360 		&& hbrp_addr1 > 16 * 4)
2361 	      {
2362 		insn =
2363 		  emit_insn_before (gen_iprefetch (GEN_INT (2)), before_16);
2364 		recog_memoized (insn);
2365 		INSN_LOCATION (insn) = INSN_LOCATION (before_16);
2366 		INSN_ADDRESSES_NEW (insn,
2367 				    INSN_ADDRESSES (INSN_UID (before_16)));
2368 		PUT_MODE (insn, GET_MODE (before_16));
2369 		PUT_MODE (before_16, TImode);
2370 		if (insert_lnop_after & 2)
2371 		  {
2372 		    insn = emit_insn_before (gen_lnop (), before_16);
2373 		    recog_memoized (insn);
2374 		    INSN_LOCATION (insn) = INSN_LOCATION (before_16);
2375 		    INSN_ADDRESSES_NEW (insn,
2376 					INSN_ADDRESSES (INSN_UID
2377 							(before_16)));
2378 		    PUT_MODE (insn, TImode);
2379 		  }
2380 	      }
2381 	    return;
2382 	  }
2383       }
2384     else if (BARRIER_P (insn))
2385       return;
2386 
2387 }
2388 
2389 /* The SPU might hang when it executes 48 inline instructions after a
2390    hinted branch jumps to its hinted target.  The beginning of a
2391    function and the return from a call might have been hinted, and
2392    must be handled as well.  To prevent a hang we insert 2 hbrps.  The
2393    first should be within 6 insns of the branch target.  The second
2394    should be within 22 insns of the branch target.  When determining
2395    if hbrps are necessary, we look for only 32 inline instructions,
2396    because up to 12 nops and 4 hbrps could be inserted.  Similarily,
2397    when inserting new hbrps, we insert them within 4 and 16 insns of
2398    the target.  */
2399 static void
insert_hbrp(void)2400 insert_hbrp (void)
2401 {
2402   rtx_insn *insn;
2403   if (TARGET_SAFE_HINTS)
2404     {
2405       shorten_branches (get_insns ());
2406       /* Insert hbrp at beginning of function */
2407       insn = next_active_insn (get_insns ());
2408       if (insn)
2409 	insert_hbrp_for_ilb_runout (insn);
2410       /* Insert hbrp after hinted targets. */
2411       for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
2412 	if ((LABEL_P (insn) && HINTED_P (insn)) || CALL_P (insn))
2413 	  insert_hbrp_for_ilb_runout (next_active_insn (insn));
2414     }
2415 }
2416 
2417 static int in_spu_reorg;
2418 
2419 static void
spu_var_tracking(void)2420 spu_var_tracking (void)
2421 {
2422   if (flag_var_tracking)
2423     {
2424       df_analyze ();
2425       timevar_push (TV_VAR_TRACKING);
2426       variable_tracking_main ();
2427       timevar_pop (TV_VAR_TRACKING);
2428       df_finish_pass (false);
2429     }
2430 }
2431 
2432 /* Insert branch hints.  There are no branch optimizations after this
2433    pass, so it's safe to set our branch hints now. */
2434 static void
spu_machine_dependent_reorg(void)2435 spu_machine_dependent_reorg (void)
2436 {
2437   sbitmap blocks;
2438   basic_block bb;
2439   rtx_insn *branch, *insn;
2440   rtx branch_target = 0;
2441   int branch_addr = 0, insn_addr, required_dist = 0;
2442   int i;
2443   unsigned int j;
2444 
2445   if (!TARGET_BRANCH_HINTS || optimize == 0)
2446     {
2447       /* We still do it for unoptimized code because an external
2448          function might have hinted a call or return. */
2449       compute_bb_for_insn ();
2450       insert_hbrp ();
2451       pad_bb ();
2452       spu_var_tracking ();
2453       free_bb_for_insn ();
2454       return;
2455     }
2456 
2457   blocks = sbitmap_alloc (last_basic_block_for_fn (cfun));
2458   bitmap_clear (blocks);
2459 
2460   in_spu_reorg = 1;
2461   compute_bb_for_insn ();
2462 
2463   /* (Re-)discover loops so that bb->loop_father can be used
2464      in the analysis below.  */
2465   loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
2466 
2467   compact_blocks ();
2468 
2469   spu_bb_info =
2470     (struct spu_bb_info *) xcalloc (n_basic_blocks_for_fn (cfun),
2471 				    sizeof (struct spu_bb_info));
2472 
2473   /* We need exact insn addresses and lengths.  */
2474   shorten_branches (get_insns ());
2475 
2476   for (i = n_basic_blocks_for_fn (cfun) - 1; i >= 0; i--)
2477     {
2478       bb = BASIC_BLOCK_FOR_FN (cfun, i);
2479       branch = 0;
2480       if (spu_bb_info[i].prop_jump)
2481 	{
2482 	  branch = spu_bb_info[i].prop_jump;
2483 	  branch_target = get_branch_target (branch);
2484 	  branch_addr = INSN_ADDRESSES (INSN_UID (branch));
2485 	  required_dist = spu_hint_dist;
2486 	}
2487       /* Search from end of a block to beginning.   In this loop, find
2488          jumps which need a branch and emit them only when:
2489          - it's an indirect branch and we're at the insn which sets
2490          the register
2491          - we're at an insn that will invalidate the hint. e.g., a
2492          call, another hint insn, inline asm that clobbers $hbr, and
2493          some inlined operations (divmodsi4).  Don't consider jumps
2494          because they are only at the end of a block and are
2495          considered when we are deciding whether to propagate
2496          - we're getting too far away from the branch.  The hbr insns
2497          only have a signed 10 bit offset
2498          We go back as far as possible so the branch will be considered
2499          for propagation when we get to the beginning of the block.  */
2500       for (insn = BB_END (bb); insn; insn = PREV_INSN (insn))
2501 	{
2502 	  if (INSN_P (insn))
2503 	    {
2504 	      insn_addr = INSN_ADDRESSES (INSN_UID (insn));
2505 	      if (branch
2506 		  && ((GET_CODE (branch_target) == REG
2507 		       && set_of (branch_target, insn) != NULL_RTX)
2508 		      || insn_clobbers_hbr (insn)
2509 		      || branch_addr - insn_addr > 600))
2510 		{
2511 		  rtx_insn *next = NEXT_INSN (insn);
2512 		  int next_addr = INSN_ADDRESSES (INSN_UID (next));
2513 		  if (insn != BB_END (bb)
2514 		      && branch_addr - next_addr >= required_dist)
2515 		    {
2516 		      if (dump_file)
2517 			fprintf (dump_file,
2518 				 "hint for %i in block %i before %i\n",
2519 				 INSN_UID (branch), bb->index,
2520 				 INSN_UID (next));
2521 		      spu_emit_branch_hint (next, branch, branch_target,
2522 					    branch_addr - next_addr, blocks);
2523 		    }
2524 		  branch = 0;
2525 		}
2526 
2527 	      /* JUMP_P will only be true at the end of a block.  When
2528 	         branch is already set it means we've previously decided
2529 	         to propagate a hint for that branch into this block. */
2530 	      if (CALL_P (insn) || (JUMP_P (insn) && !branch))
2531 		{
2532 		  branch = 0;
2533 		  if ((branch_target = get_branch_target (insn)))
2534 		    {
2535 		      branch = insn;
2536 		      branch_addr = insn_addr;
2537 		      required_dist = spu_hint_dist;
2538 		    }
2539 		}
2540 	    }
2541 	  if (insn == BB_HEAD (bb))
2542 	    break;
2543 	}
2544 
2545       if (branch)
2546 	{
2547 	  /* If we haven't emitted a hint for this branch yet, it might
2548 	     be profitable to emit it in one of the predecessor blocks,
2549 	     especially for loops.  */
2550 	  rtx_insn *bbend;
2551 	  basic_block prev = 0, prop = 0, prev2 = 0;
2552 	  int loop_exit = 0, simple_loop = 0;
2553 	  int next_addr = INSN_ADDRESSES (INSN_UID (NEXT_INSN (insn)));
2554 
2555 	  for (j = 0; j < EDGE_COUNT (bb->preds); j++)
2556 	    if (EDGE_PRED (bb, j)->flags & EDGE_FALLTHRU)
2557 	      prev = EDGE_PRED (bb, j)->src;
2558 	    else
2559 	      prev2 = EDGE_PRED (bb, j)->src;
2560 
2561 	  for (j = 0; j < EDGE_COUNT (bb->succs); j++)
2562 	    if (EDGE_SUCC (bb, j)->flags & EDGE_LOOP_EXIT)
2563 	      loop_exit = 1;
2564 	    else if (EDGE_SUCC (bb, j)->dest == bb)
2565 	      simple_loop = 1;
2566 
2567 	  /* If this branch is a loop exit then propagate to previous
2568 	     fallthru block. This catches the cases when it is a simple
2569 	     loop or when there is an initial branch into the loop. */
2570 	  if (prev && (loop_exit || simple_loop)
2571 	      && bb_loop_depth (prev) <= bb_loop_depth (bb))
2572 	    prop = prev;
2573 
2574 	  /* If there is only one adjacent predecessor.  Don't propagate
2575 	     outside this loop.  */
2576 	  else if (prev && single_pred_p (bb)
2577 		   && prev->loop_father == bb->loop_father)
2578 	    prop = prev;
2579 
2580 	  /* If this is the JOIN block of a simple IF-THEN then
2581 	     propagate the hint to the HEADER block. */
2582 	  else if (prev && prev2
2583 		   && EDGE_COUNT (bb->preds) == 2
2584 		   && EDGE_COUNT (prev->preds) == 1
2585 		   && EDGE_PRED (prev, 0)->src == prev2
2586 		   && prev2->loop_father == bb->loop_father
2587 		   && GET_CODE (branch_target) != REG)
2588 	    prop = prev;
2589 
2590 	  /* Don't propagate when:
2591 	     - this is a simple loop and the hint would be too far
2592 	     - this is not a simple loop and there are 16 insns in
2593 	     this block already
2594 	     - the predecessor block ends in a branch that will be
2595 	     hinted
2596 	     - the predecessor block ends in an insn that invalidates
2597 	     the hint */
2598 	  if (prop
2599 	      && prop->index >= 0
2600 	      && (bbend = BB_END (prop))
2601 	      && branch_addr - INSN_ADDRESSES (INSN_UID (bbend)) <
2602 	      (simple_loop ? 600 : 16 * 4) && get_branch_target (bbend) == 0
2603 	      && (JUMP_P (bbend) || !insn_clobbers_hbr (bbend)))
2604 	    {
2605 	      if (dump_file)
2606 		fprintf (dump_file, "propagate from %i to %i (loop depth %i) "
2607 			 "for %i (loop_exit %i simple_loop %i dist %i)\n",
2608 			 bb->index, prop->index, bb_loop_depth (bb),
2609 			 INSN_UID (branch), loop_exit, simple_loop,
2610 			 branch_addr - INSN_ADDRESSES (INSN_UID (bbend)));
2611 
2612 	      spu_bb_info[prop->index].prop_jump = branch;
2613 	      spu_bb_info[prop->index].bb_index = i;
2614 	    }
2615 	  else if (branch_addr - next_addr >= required_dist)
2616 	    {
2617 	      if (dump_file)
2618 		fprintf (dump_file, "hint for %i in block %i before %i\n",
2619 			 INSN_UID (branch), bb->index,
2620 			 INSN_UID (NEXT_INSN (insn)));
2621 	      spu_emit_branch_hint (NEXT_INSN (insn), branch, branch_target,
2622 				    branch_addr - next_addr, blocks);
2623 	    }
2624 	  branch = 0;
2625 	}
2626     }
2627   free (spu_bb_info);
2628 
2629   if (!bitmap_empty_p (blocks))
2630     find_many_sub_basic_blocks (blocks);
2631 
2632   /* We have to schedule to make sure alignment is ok. */
2633   FOR_EACH_BB_FN (bb, cfun) bb->flags &= ~BB_DISABLE_SCHEDULE;
2634 
2635   /* The hints need to be scheduled, so call it again. */
2636   schedule_insns ();
2637   df_finish_pass (true);
2638 
2639   insert_hbrp ();
2640 
2641   pad_bb ();
2642 
2643   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
2644     if (NONJUMP_INSN_P (insn) && INSN_CODE (insn) == CODE_FOR_hbr)
2645       {
2646 	/* Adjust the LABEL_REF in a hint when we have inserted a nop
2647 	   between its branch label and the branch .  We don't move the
2648 	   label because GCC expects it at the beginning of the block. */
2649 	rtx unspec = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2650 	rtx label_ref = XVECEXP (unspec, 0, 0);
2651 	rtx_insn *label = as_a <rtx_insn *> (XEXP (label_ref, 0));
2652 	rtx_insn *branch;
2653 	int offset = 0;
2654 	for (branch = NEXT_INSN (label);
2655 	     !JUMP_P (branch) && !CALL_P (branch);
2656 	     branch = NEXT_INSN (branch))
2657 	  if (NONJUMP_INSN_P (branch))
2658 	    offset += get_attr_length (branch);
2659 	if (offset > 0)
2660 	  XVECEXP (unspec, 0, 0) = plus_constant (Pmode, label_ref, offset);
2661       }
2662 
2663   spu_var_tracking ();
2664 
2665   loop_optimizer_finalize ();
2666 
2667   free_bb_for_insn ();
2668 
2669   in_spu_reorg = 0;
2670 }
2671 
2672 
2673 /* Insn scheduling routines, primarily for dual issue. */
2674 static int
spu_sched_issue_rate(void)2675 spu_sched_issue_rate (void)
2676 {
2677   return 2;
2678 }
2679 
2680 static int
uses_ls_unit(rtx_insn * insn)2681 uses_ls_unit(rtx_insn *insn)
2682 {
2683   rtx set = single_set (insn);
2684   if (set != 0
2685       && (GET_CODE (SET_DEST (set)) == MEM
2686 	  || GET_CODE (SET_SRC (set)) == MEM))
2687     return 1;
2688   return 0;
2689 }
2690 
2691 static int
get_pipe(rtx_insn * insn)2692 get_pipe (rtx_insn *insn)
2693 {
2694   enum attr_type t;
2695   /* Handle inline asm */
2696   if (INSN_CODE (insn) == -1)
2697     return -1;
2698   t = get_attr_type (insn);
2699   switch (t)
2700     {
2701     case TYPE_CONVERT:
2702       return -2;
2703     case TYPE_MULTI0:
2704       return -1;
2705 
2706     case TYPE_FX2:
2707     case TYPE_FX3:
2708     case TYPE_SPR:
2709     case TYPE_NOP:
2710     case TYPE_FXB:
2711     case TYPE_FPD:
2712     case TYPE_FP6:
2713     case TYPE_FP7:
2714       return 0;
2715 
2716     case TYPE_LNOP:
2717     case TYPE_SHUF:
2718     case TYPE_LOAD:
2719     case TYPE_STORE:
2720     case TYPE_BR:
2721     case TYPE_MULTI1:
2722     case TYPE_HBR:
2723     case TYPE_IPREFETCH:
2724       return 1;
2725     default:
2726       abort ();
2727     }
2728 }
2729 
2730 
2731 /* haifa-sched.c has a static variable that keeps track of the current
2732    cycle.  It is passed to spu_sched_reorder, and we record it here for
2733    use by spu_sched_variable_issue.  It won't be accurate if the
2734    scheduler updates it's clock_var between the two calls. */
2735 static int clock_var;
2736 
2737 /* This is used to keep track of insn alignment.  Set to 0 at the
2738    beginning of each block and increased by the "length" attr of each
2739    insn scheduled. */
2740 static int spu_sched_length;
2741 
2742 /* Record when we've issued pipe0 and pipe1 insns so we can reorder the
2743    ready list appropriately in spu_sched_reorder(). */
2744 static int pipe0_clock;
2745 static int pipe1_clock;
2746 
2747 static int prev_clock_var;
2748 
2749 static int prev_priority;
2750 
2751 /* The SPU needs to load the next ilb sometime during the execution of
2752    the previous ilb.  There is a potential conflict if every cycle has a
2753    load or store.  To avoid the conflict we make sure the load/store
2754    unit is free for at least one cycle during the execution of insns in
2755    the previous ilb. */
2756 static int spu_ls_first;
2757 static int prev_ls_clock;
2758 
2759 static void
spu_sched_init_global(FILE * file ATTRIBUTE_UNUSED,int verbose ATTRIBUTE_UNUSED,int max_ready ATTRIBUTE_UNUSED)2760 spu_sched_init_global (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
2761 		       int max_ready ATTRIBUTE_UNUSED)
2762 {
2763   spu_sched_length = 0;
2764 }
2765 
2766 static void
spu_sched_init(FILE * file ATTRIBUTE_UNUSED,int verbose ATTRIBUTE_UNUSED,int max_ready ATTRIBUTE_UNUSED)2767 spu_sched_init (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
2768 		int max_ready ATTRIBUTE_UNUSED)
2769 {
2770   if (align_labels > 4 || align_loops > 4 || align_jumps > 4)
2771     {
2772       /* When any block might be at least 8-byte aligned, assume they
2773          will all be at least 8-byte aligned to make sure dual issue
2774          works out correctly. */
2775       spu_sched_length = 0;
2776     }
2777   spu_ls_first = INT_MAX;
2778   clock_var = -1;
2779   prev_ls_clock = -1;
2780   pipe0_clock = -1;
2781   pipe1_clock = -1;
2782   prev_clock_var = -1;
2783   prev_priority = -1;
2784 }
2785 
2786 static int
spu_sched_variable_issue(FILE * file ATTRIBUTE_UNUSED,int verbose ATTRIBUTE_UNUSED,rtx_insn * insn,int more)2787 spu_sched_variable_issue (FILE *file ATTRIBUTE_UNUSED,
2788 			  int verbose ATTRIBUTE_UNUSED,
2789 			  rtx_insn *insn, int more)
2790 {
2791   int len;
2792   int p;
2793   if (GET_CODE (PATTERN (insn)) == USE
2794       || GET_CODE (PATTERN (insn)) == CLOBBER
2795       || (len = get_attr_length (insn)) == 0)
2796     return more;
2797 
2798   spu_sched_length += len;
2799 
2800   /* Reset on inline asm */
2801   if (INSN_CODE (insn) == -1)
2802     {
2803       spu_ls_first = INT_MAX;
2804       pipe0_clock = -1;
2805       pipe1_clock = -1;
2806       return 0;
2807     }
2808   p = get_pipe (insn);
2809   if (p == 0)
2810     pipe0_clock = clock_var;
2811   else
2812     pipe1_clock = clock_var;
2813 
2814   if (in_spu_reorg)
2815     {
2816       if (clock_var - prev_ls_clock > 1
2817 	  || INSN_CODE (insn) == CODE_FOR_iprefetch)
2818 	spu_ls_first = INT_MAX;
2819       if (uses_ls_unit (insn))
2820 	{
2821 	  if (spu_ls_first == INT_MAX)
2822 	    spu_ls_first = spu_sched_length;
2823 	  prev_ls_clock = clock_var;
2824 	}
2825 
2826       /* The scheduler hasn't inserted the nop, but we will later on.
2827          Include those nops in spu_sched_length. */
2828       if (prev_clock_var == clock_var && (spu_sched_length & 7))
2829 	spu_sched_length += 4;
2830       prev_clock_var = clock_var;
2831 
2832       /* more is -1 when called from spu_sched_reorder for new insns
2833          that don't have INSN_PRIORITY */
2834       if (more >= 0)
2835 	prev_priority = INSN_PRIORITY (insn);
2836     }
2837 
2838   /* Always try issuing more insns.  spu_sched_reorder will decide
2839      when the cycle should be advanced. */
2840   return 1;
2841 }
2842 
2843 /* This function is called for both TARGET_SCHED_REORDER and
2844    TARGET_SCHED_REORDER2.  */
2845 static int
spu_sched_reorder(FILE * file ATTRIBUTE_UNUSED,int verbose ATTRIBUTE_UNUSED,rtx_insn ** ready,int * nreadyp,int clock)2846 spu_sched_reorder (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
2847 		   rtx_insn **ready, int *nreadyp, int clock)
2848 {
2849   int i, nready = *nreadyp;
2850   int pipe_0, pipe_1, pipe_hbrp, pipe_ls, schedule_i;
2851   rtx_insn *insn;
2852 
2853   clock_var = clock;
2854 
2855   if (nready <= 0 || pipe1_clock >= clock)
2856     return 0;
2857 
2858   /* Find any rtl insns that don't generate assembly insns and schedule
2859      them first. */
2860   for (i = nready - 1; i >= 0; i--)
2861     {
2862       insn = ready[i];
2863       if (INSN_CODE (insn) == -1
2864 	  || INSN_CODE (insn) == CODE_FOR_blockage
2865 	  || (INSN_P (insn) && get_attr_length (insn) == 0))
2866 	{
2867 	  ready[i] = ready[nready - 1];
2868 	  ready[nready - 1] = insn;
2869 	  return 1;
2870 	}
2871     }
2872 
2873   pipe_0 = pipe_1 = pipe_hbrp = pipe_ls = schedule_i = -1;
2874   for (i = 0; i < nready; i++)
2875     if (INSN_CODE (ready[i]) != -1)
2876       {
2877 	insn = ready[i];
2878 	switch (get_attr_type (insn))
2879 	  {
2880 	  default:
2881 	  case TYPE_MULTI0:
2882 	  case TYPE_CONVERT:
2883 	  case TYPE_FX2:
2884 	  case TYPE_FX3:
2885 	  case TYPE_SPR:
2886 	  case TYPE_NOP:
2887 	  case TYPE_FXB:
2888 	  case TYPE_FPD:
2889 	  case TYPE_FP6:
2890 	  case TYPE_FP7:
2891 	    pipe_0 = i;
2892 	    break;
2893 	  case TYPE_LOAD:
2894 	  case TYPE_STORE:
2895 	    pipe_ls = i;
2896 	  case TYPE_LNOP:
2897 	  case TYPE_SHUF:
2898 	  case TYPE_BR:
2899 	  case TYPE_MULTI1:
2900 	  case TYPE_HBR:
2901 	    pipe_1 = i;
2902 	    break;
2903 	  case TYPE_IPREFETCH:
2904 	    pipe_hbrp = i;
2905 	    break;
2906 	  }
2907       }
2908 
2909   /* In the first scheduling phase, schedule loads and stores together
2910      to increase the chance they will get merged during postreload CSE. */
2911   if (!reload_completed && pipe_ls >= 0)
2912     {
2913       insn = ready[pipe_ls];
2914       ready[pipe_ls] = ready[nready - 1];
2915       ready[nready - 1] = insn;
2916       return 1;
2917     }
2918 
2919   /* If there is an hbrp ready, prefer it over other pipe 1 insns. */
2920   if (pipe_hbrp >= 0)
2921     pipe_1 = pipe_hbrp;
2922 
2923   /* When we have loads/stores in every cycle of the last 15 insns and
2924      we are about to schedule another load/store, emit an hbrp insn
2925      instead. */
2926   if (in_spu_reorg
2927       && spu_sched_length - spu_ls_first >= 4 * 15
2928       && !(pipe0_clock < clock && pipe_0 >= 0) && pipe_1 == pipe_ls)
2929     {
2930       insn = sched_emit_insn (gen_iprefetch (GEN_INT (3)));
2931       recog_memoized (insn);
2932       if (pipe0_clock < clock)
2933 	PUT_MODE (insn, TImode);
2934       spu_sched_variable_issue (file, verbose, insn, -1);
2935       return 0;
2936     }
2937 
2938   /* In general, we want to emit nops to increase dual issue, but dual
2939      issue isn't faster when one of the insns could be scheduled later
2940      without effecting the critical path.  We look at INSN_PRIORITY to
2941      make a good guess, but it isn't perfect so -mdual-nops=n can be
2942      used to effect it. */
2943   if (in_spu_reorg && spu_dual_nops < 10)
2944     {
2945       /* When we are at an even address and we are not issuing nops to
2946          improve scheduling then we need to advance the cycle.  */
2947       if ((spu_sched_length & 7) == 0 && prev_clock_var == clock
2948 	  && (spu_dual_nops == 0
2949 	      || (pipe_1 != -1
2950 		  && prev_priority >
2951 		  INSN_PRIORITY (ready[pipe_1]) + spu_dual_nops)))
2952 	return 0;
2953 
2954       /* When at an odd address, schedule the highest priority insn
2955          without considering pipeline. */
2956       if ((spu_sched_length & 7) == 4 && prev_clock_var != clock
2957 	  && (spu_dual_nops == 0
2958 	      || (prev_priority >
2959 		  INSN_PRIORITY (ready[nready - 1]) + spu_dual_nops)))
2960 	return 1;
2961     }
2962 
2963 
2964   /* We haven't issued a pipe0 insn yet this cycle, if there is a
2965      pipe0 insn in the ready list, schedule it. */
2966   if (pipe0_clock < clock && pipe_0 >= 0)
2967     schedule_i = pipe_0;
2968 
2969   /* Either we've scheduled a pipe0 insn already or there is no pipe0
2970      insn to schedule.  Put a pipe1 insn at the front of the ready list. */
2971   else
2972     schedule_i = pipe_1;
2973 
2974   if (schedule_i > -1)
2975     {
2976       insn = ready[schedule_i];
2977       ready[schedule_i] = ready[nready - 1];
2978       ready[nready - 1] = insn;
2979       return 1;
2980     }
2981   return 0;
2982 }
2983 
2984 /* INSN is dependent on DEP_INSN. */
2985 static int
spu_sched_adjust_cost(rtx_insn * insn,rtx link,rtx_insn * dep_insn,int cost)2986 spu_sched_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep_insn, int cost)
2987 {
2988   rtx set;
2989 
2990   /* The blockage pattern is used to prevent instructions from being
2991      moved across it and has no cost. */
2992   if (INSN_CODE (insn) == CODE_FOR_blockage
2993       || INSN_CODE (dep_insn) == CODE_FOR_blockage)
2994     return 0;
2995 
2996   if ((INSN_P (insn) && get_attr_length (insn) == 0)
2997       || (INSN_P (dep_insn) && get_attr_length (dep_insn) == 0))
2998     return 0;
2999 
3000   /* Make sure hbrps are spread out. */
3001   if (INSN_CODE (insn) == CODE_FOR_iprefetch
3002       && INSN_CODE (dep_insn) == CODE_FOR_iprefetch)
3003     return 8;
3004 
3005   /* Make sure hints and hbrps are 2 cycles apart. */
3006   if ((INSN_CODE (insn) == CODE_FOR_iprefetch
3007        || INSN_CODE (insn) == CODE_FOR_hbr)
3008        && (INSN_CODE (dep_insn) == CODE_FOR_iprefetch
3009 	   || INSN_CODE (dep_insn) == CODE_FOR_hbr))
3010     return 2;
3011 
3012   /* An hbrp has no real dependency on other insns. */
3013   if (INSN_CODE (insn) == CODE_FOR_iprefetch
3014       || INSN_CODE (dep_insn) == CODE_FOR_iprefetch)
3015     return 0;
3016 
3017   /* Assuming that it is unlikely an argument register will be used in
3018      the first cycle of the called function, we reduce the cost for
3019      slightly better scheduling of dep_insn.  When not hinted, the
3020      mispredicted branch would hide the cost as well.  */
3021   if (CALL_P (insn))
3022   {
3023     rtx target = get_branch_target (insn);
3024     if (GET_CODE (target) != REG || !set_of (target, insn))
3025       return cost - 2;
3026     return cost;
3027   }
3028 
3029   /* And when returning from a function, let's assume the return values
3030      are completed sooner too. */
3031   if (CALL_P (dep_insn))
3032     return cost - 2;
3033 
3034   /* Make sure an instruction that loads from the back chain is schedule
3035      away from the return instruction so a hint is more likely to get
3036      issued. */
3037   if (INSN_CODE (insn) == CODE_FOR__return
3038       && (set = single_set (dep_insn))
3039       && GET_CODE (SET_DEST (set)) == REG
3040       && REGNO (SET_DEST (set)) == LINK_REGISTER_REGNUM)
3041     return 20;
3042 
3043   /* The dfa scheduler sets cost to 0 for all anti-dependencies and the
3044      scheduler makes every insn in a block anti-dependent on the final
3045      jump_insn.  We adjust here so higher cost insns will get scheduled
3046      earlier. */
3047   if (JUMP_P (insn) && REG_NOTE_KIND (link) == REG_DEP_ANTI)
3048     return insn_cost (dep_insn) - 3;
3049 
3050   return cost;
3051 }
3052 
3053 /* Create a CONST_DOUBLE from a string.  */
3054 rtx
spu_float_const(const char * string,machine_mode mode)3055 spu_float_const (const char *string, machine_mode mode)
3056 {
3057   REAL_VALUE_TYPE value;
3058   value = REAL_VALUE_ATOF (string, mode);
3059   return const_double_from_real_value (value, mode);
3060 }
3061 
3062 int
spu_constant_address_p(rtx x)3063 spu_constant_address_p (rtx x)
3064 {
3065   return (GET_CODE (x) == LABEL_REF || GET_CODE (x) == SYMBOL_REF
3066 	  || GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST
3067 	  || GET_CODE (x) == HIGH);
3068 }
3069 
3070 static enum spu_immediate
which_immediate_load(HOST_WIDE_INT val)3071 which_immediate_load (HOST_WIDE_INT val)
3072 {
3073   gcc_assert (val == trunc_int_for_mode (val, SImode));
3074 
3075   if (val >= -0x8000 && val <= 0x7fff)
3076     return SPU_IL;
3077   if (val >= 0 && val <= 0x3ffff)
3078     return SPU_ILA;
3079   if ((val & 0xffff) == ((val >> 16) & 0xffff))
3080     return SPU_ILH;
3081   if ((val & 0xffff) == 0)
3082     return SPU_ILHU;
3083 
3084   return SPU_NONE;
3085 }
3086 
3087 /* Return true when OP can be loaded by one of the il instructions, or
3088    when flow2 is not completed and OP can be loaded using ilhu and iohl. */
3089 int
immediate_load_p(rtx op,machine_mode mode)3090 immediate_load_p (rtx op, machine_mode mode)
3091 {
3092   if (CONSTANT_P (op))
3093     {
3094       enum immediate_class c = classify_immediate (op, mode);
3095       return c == IC_IL1 || c == IC_IL1s
3096 	     || (!epilogue_completed && (c == IC_IL2 || c == IC_IL2s));
3097     }
3098   return 0;
3099 }
3100 
3101 /* Return true if the first SIZE bytes of arr is a constant that can be
3102    generated with cbd, chd, cwd or cdd.  When non-NULL, PRUN and PSTART
3103    represent the size and offset of the instruction to use. */
3104 static int
cpat_info(unsigned char * arr,int size,int * prun,int * pstart)3105 cpat_info(unsigned char *arr, int size, int *prun, int *pstart)
3106 {
3107   int cpat, run, i, start;
3108   cpat = 1;
3109   run = 0;
3110   start = -1;
3111   for (i = 0; i < size && cpat; i++)
3112     if (arr[i] != i+16)
3113       {
3114 	if (!run)
3115 	  {
3116 	    start = i;
3117 	    if (arr[i] == 3)
3118 	      run = 1;
3119 	    else if (arr[i] == 2 && arr[i+1] == 3)
3120 	      run = 2;
3121 	    else if (arr[i] == 0)
3122 	      {
3123 		while (arr[i+run] == run && i+run < 16)
3124 		  run++;
3125 		if (run != 4 && run != 8)
3126 		  cpat = 0;
3127 	      }
3128 	    else
3129 	      cpat = 0;
3130 	    if ((i & (run-1)) != 0)
3131 	      cpat = 0;
3132 	    i += run;
3133 	  }
3134 	else
3135 	  cpat = 0;
3136       }
3137   if (cpat && (run || size < 16))
3138     {
3139       if (run == 0)
3140 	run = 1;
3141       if (prun)
3142 	*prun = run;
3143       if (pstart)
3144 	*pstart = start == -1 ? 16-run : start;
3145       return 1;
3146     }
3147   return 0;
3148 }
3149 
3150 /* OP is a CONSTANT_P.  Determine what instructions can be used to load
3151    it into a register.  MODE is only valid when OP is a CONST_INT. */
3152 static enum immediate_class
classify_immediate(rtx op,machine_mode mode)3153 classify_immediate (rtx op, machine_mode mode)
3154 {
3155   HOST_WIDE_INT val;
3156   unsigned char arr[16];
3157   int i, j, repeated, fsmbi, repeat;
3158 
3159   gcc_assert (CONSTANT_P (op));
3160 
3161   if (GET_MODE (op) != VOIDmode)
3162     mode = GET_MODE (op);
3163 
3164   /* A V4SI const_vector with all identical symbols is ok. */
3165   if (!flag_pic
3166       && mode == V4SImode
3167       && GET_CODE (op) == CONST_VECTOR
3168       && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_INT
3169       && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_DOUBLE)
3170     op = unwrap_const_vec_duplicate (op);
3171 
3172   switch (GET_CODE (op))
3173     {
3174     case SYMBOL_REF:
3175     case LABEL_REF:
3176       return TARGET_LARGE_MEM ? IC_IL2s : IC_IL1s;
3177 
3178     case CONST:
3179       /* We can never know if the resulting address fits in 18 bits and can be
3180 	 loaded with ila.  For now, assume the address will not overflow if
3181 	 the displacement is "small" (fits 'K' constraint).  */
3182       if (!TARGET_LARGE_MEM && GET_CODE (XEXP (op, 0)) == PLUS)
3183 	{
3184 	  rtx sym = XEXP (XEXP (op, 0), 0);
3185 	  rtx cst = XEXP (XEXP (op, 0), 1);
3186 
3187 	  if (GET_CODE (sym) == SYMBOL_REF
3188 	      && GET_CODE (cst) == CONST_INT
3189 	      && satisfies_constraint_K (cst))
3190 	    return IC_IL1s;
3191 	}
3192       return IC_IL2s;
3193 
3194     case HIGH:
3195       return IC_IL1s;
3196 
3197     case CONST_VECTOR:
3198       for (i = 0; i < GET_MODE_NUNITS (mode); i++)
3199 	if (GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_INT
3200 	    && GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_DOUBLE)
3201 	  return IC_POOL;
3202       /* Fall through. */
3203 
3204     case CONST_INT:
3205     case CONST_DOUBLE:
3206       constant_to_array (mode, op, arr);
3207 
3208       /* Check that each 4-byte slot is identical. */
3209       repeated = 1;
3210       for (i = 4; i < 16; i += 4)
3211 	for (j = 0; j < 4; j++)
3212 	  if (arr[j] != arr[i + j])
3213 	    repeated = 0;
3214 
3215       if (repeated)
3216 	{
3217 	  val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3218 	  val = trunc_int_for_mode (val, SImode);
3219 
3220 	  if (which_immediate_load (val) != SPU_NONE)
3221 	    return IC_IL1;
3222 	}
3223 
3224       /* Any mode of 2 bytes or smaller can be loaded with an il
3225          instruction. */
3226       gcc_assert (GET_MODE_SIZE (mode) > 2);
3227 
3228       fsmbi = 1;
3229       repeat = 0;
3230       for (i = 0; i < 16 && fsmbi; i++)
3231 	if (arr[i] != 0 && repeat == 0)
3232 	  repeat = arr[i];
3233 	else if (arr[i] != 0 && arr[i] != repeat)
3234 	  fsmbi = 0;
3235       if (fsmbi)
3236 	return repeat == 0xff ? IC_FSMBI : IC_FSMBI2;
3237 
3238       if (cpat_info (arr, GET_MODE_SIZE (mode), 0, 0))
3239 	return IC_CPAT;
3240 
3241       if (repeated)
3242 	return IC_IL2;
3243 
3244       return IC_POOL;
3245     default:
3246       break;
3247     }
3248   gcc_unreachable ();
3249 }
3250 
3251 static enum spu_immediate
which_logical_immediate(HOST_WIDE_INT val)3252 which_logical_immediate (HOST_WIDE_INT val)
3253 {
3254   gcc_assert (val == trunc_int_for_mode (val, SImode));
3255 
3256   if (val >= -0x200 && val <= 0x1ff)
3257     return SPU_ORI;
3258   if (val >= 0 && val <= 0xffff)
3259     return SPU_IOHL;
3260   if ((val & 0xffff) == ((val >> 16) & 0xffff))
3261     {
3262       val = trunc_int_for_mode (val, HImode);
3263       if (val >= -0x200 && val <= 0x1ff)
3264 	return SPU_ORHI;
3265       if ((val & 0xff) == ((val >> 8) & 0xff))
3266 	{
3267 	  val = trunc_int_for_mode (val, QImode);
3268 	  if (val >= -0x200 && val <= 0x1ff)
3269 	    return SPU_ORBI;
3270 	}
3271     }
3272   return SPU_NONE;
3273 }
3274 
3275 /* Return TRUE when X, a CONST_VECTOR, only contains CONST_INTs or
3276    CONST_DOUBLEs. */
3277 static int
const_vector_immediate_p(rtx x)3278 const_vector_immediate_p (rtx x)
3279 {
3280   int i;
3281   gcc_assert (GET_CODE (x) == CONST_VECTOR);
3282   for (i = 0; i < GET_MODE_NUNITS (GET_MODE (x)); i++)
3283     if (GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_INT
3284 	&& GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_DOUBLE)
3285       return 0;
3286   return 1;
3287 }
3288 
3289 int
logical_immediate_p(rtx op,machine_mode mode)3290 logical_immediate_p (rtx op, machine_mode mode)
3291 {
3292   HOST_WIDE_INT val;
3293   unsigned char arr[16];
3294   int i, j;
3295 
3296   gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3297 	      || GET_CODE (op) == CONST_VECTOR);
3298 
3299   if (GET_CODE (op) == CONST_VECTOR
3300       && !const_vector_immediate_p (op))
3301     return 0;
3302 
3303   if (GET_MODE (op) != VOIDmode)
3304     mode = GET_MODE (op);
3305 
3306   constant_to_array (mode, op, arr);
3307 
3308   /* Check that bytes are repeated. */
3309   for (i = 4; i < 16; i += 4)
3310     for (j = 0; j < 4; j++)
3311       if (arr[j] != arr[i + j])
3312 	return 0;
3313 
3314   val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3315   val = trunc_int_for_mode (val, SImode);
3316 
3317   i = which_logical_immediate (val);
3318   return i != SPU_NONE && i != SPU_IOHL;
3319 }
3320 
3321 int
iohl_immediate_p(rtx op,machine_mode mode)3322 iohl_immediate_p (rtx op, machine_mode mode)
3323 {
3324   HOST_WIDE_INT val;
3325   unsigned char arr[16];
3326   int i, j;
3327 
3328   gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3329 	      || GET_CODE (op) == CONST_VECTOR);
3330 
3331   if (GET_CODE (op) == CONST_VECTOR
3332       && !const_vector_immediate_p (op))
3333     return 0;
3334 
3335   if (GET_MODE (op) != VOIDmode)
3336     mode = GET_MODE (op);
3337 
3338   constant_to_array (mode, op, arr);
3339 
3340   /* Check that bytes are repeated. */
3341   for (i = 4; i < 16; i += 4)
3342     for (j = 0; j < 4; j++)
3343       if (arr[j] != arr[i + j])
3344 	return 0;
3345 
3346   val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3347   val = trunc_int_for_mode (val, SImode);
3348 
3349   return val >= 0 && val <= 0xffff;
3350 }
3351 
3352 int
arith_immediate_p(rtx op,machine_mode mode,HOST_WIDE_INT low,HOST_WIDE_INT high)3353 arith_immediate_p (rtx op, machine_mode mode,
3354 		   HOST_WIDE_INT low, HOST_WIDE_INT high)
3355 {
3356   HOST_WIDE_INT val;
3357   unsigned char arr[16];
3358   int bytes, i, j;
3359 
3360   gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3361 	      || GET_CODE (op) == CONST_VECTOR);
3362 
3363   if (GET_CODE (op) == CONST_VECTOR
3364       && !const_vector_immediate_p (op))
3365     return 0;
3366 
3367   if (GET_MODE (op) != VOIDmode)
3368     mode = GET_MODE (op);
3369 
3370   constant_to_array (mode, op, arr);
3371 
3372   bytes = GET_MODE_UNIT_SIZE (mode);
3373   mode = mode_for_size (GET_MODE_UNIT_BITSIZE (mode), MODE_INT, 0);
3374 
3375   /* Check that bytes are repeated. */
3376   for (i = bytes; i < 16; i += bytes)
3377     for (j = 0; j < bytes; j++)
3378       if (arr[j] != arr[i + j])
3379 	return 0;
3380 
3381   val = arr[0];
3382   for (j = 1; j < bytes; j++)
3383     val = (val << 8) | arr[j];
3384 
3385   val = trunc_int_for_mode (val, mode);
3386 
3387   return val >= low && val <= high;
3388 }
3389 
3390 /* TRUE when op is an immediate and an exact power of 2, and given that
3391    OP is 2^scale, scale >= LOW && scale <= HIGH.  When OP is a vector,
3392    all entries must be the same. */
3393 bool
exp2_immediate_p(rtx op,machine_mode mode,int low,int high)3394 exp2_immediate_p (rtx op, machine_mode mode, int low, int high)
3395 {
3396   machine_mode int_mode;
3397   HOST_WIDE_INT val;
3398   unsigned char arr[16];
3399   int bytes, i, j;
3400 
3401   gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3402 	      || GET_CODE (op) == CONST_VECTOR);
3403 
3404   if (GET_CODE (op) == CONST_VECTOR
3405       && !const_vector_immediate_p (op))
3406     return 0;
3407 
3408   if (GET_MODE (op) != VOIDmode)
3409     mode = GET_MODE (op);
3410 
3411   constant_to_array (mode, op, arr);
3412 
3413   mode = GET_MODE_INNER (mode);
3414 
3415   bytes = GET_MODE_SIZE (mode);
3416   int_mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
3417 
3418   /* Check that bytes are repeated. */
3419   for (i = bytes; i < 16; i += bytes)
3420     for (j = 0; j < bytes; j++)
3421       if (arr[j] != arr[i + j])
3422 	return 0;
3423 
3424   val = arr[0];
3425   for (j = 1; j < bytes; j++)
3426     val = (val << 8) | arr[j];
3427 
3428   val = trunc_int_for_mode (val, int_mode);
3429 
3430   /* Currently, we only handle SFmode */
3431   gcc_assert (mode == SFmode);
3432   if (mode == SFmode)
3433     {
3434       int exp = (val >> 23) - 127;
3435       return val > 0 && (val & 0x007fffff) == 0
3436 	     &&  exp >= low && exp <= high;
3437     }
3438   return FALSE;
3439 }
3440 
3441 /* Return true if X is a SYMBOL_REF to an __ea qualified variable.  */
3442 
3443 static bool
ea_symbol_ref_p(const_rtx x)3444 ea_symbol_ref_p (const_rtx x)
3445 {
3446   tree decl;
3447 
3448   if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
3449     {
3450       rtx plus = XEXP (x, 0);
3451       rtx op0 = XEXP (plus, 0);
3452       rtx op1 = XEXP (plus, 1);
3453       if (GET_CODE (op1) == CONST_INT)
3454 	x = op0;
3455     }
3456 
3457   return (GET_CODE (x) == SYMBOL_REF
3458  	  && (decl = SYMBOL_REF_DECL (x)) != 0
3459  	  && TREE_CODE (decl) == VAR_DECL
3460  	  && TYPE_ADDR_SPACE (TREE_TYPE (decl)));
3461 }
3462 
3463 /* We accept:
3464    - any 32-bit constant (SImode, SFmode)
3465    - any constant that can be generated with fsmbi (any mode)
3466    - a 64-bit constant where the high and low bits are identical
3467      (DImode, DFmode)
3468    - a 128-bit constant where the four 32-bit words match.  */
3469 bool
spu_legitimate_constant_p(machine_mode mode,rtx x)3470 spu_legitimate_constant_p (machine_mode mode, rtx x)
3471 {
3472   subrtx_iterator::array_type array;
3473   if (GET_CODE (x) == HIGH)
3474     x = XEXP (x, 0);
3475 
3476   /* Reject any __ea qualified reference.  These can't appear in
3477      instructions but must be forced to the constant pool.  */
3478   FOR_EACH_SUBRTX (iter, array, x, ALL)
3479     if (ea_symbol_ref_p (*iter))
3480       return 0;
3481 
3482   /* V4SI with all identical symbols is valid. */
3483   if (!flag_pic
3484       && mode == V4SImode
3485       && (GET_CODE (CONST_VECTOR_ELT (x, 0)) == SYMBOL_REF
3486 	  || GET_CODE (CONST_VECTOR_ELT (x, 0)) == LABEL_REF
3487 	  || GET_CODE (CONST_VECTOR_ELT (x, 0)) == CONST))
3488     return const_vec_duplicate_p (x);
3489 
3490   if (GET_CODE (x) == CONST_VECTOR
3491       && !const_vector_immediate_p (x))
3492     return 0;
3493   return 1;
3494 }
3495 
3496 /* Valid address are:
3497    - symbol_ref, label_ref, const
3498    - reg
3499    - reg + const_int, where const_int is 16 byte aligned
3500    - reg + reg, alignment doesn't matter
3501   The alignment matters in the reg+const case because lqd and stqd
3502   ignore the 4 least significant bits of the const.  We only care about
3503   16 byte modes because the expand phase will change all smaller MEM
3504   references to TImode.  */
3505 static bool
spu_legitimate_address_p(machine_mode mode,rtx x,bool reg_ok_strict)3506 spu_legitimate_address_p (machine_mode mode,
3507 			  rtx x, bool reg_ok_strict)
3508 {
3509   int aligned = GET_MODE_SIZE (mode) >= 16;
3510   if (aligned
3511       && GET_CODE (x) == AND
3512       && GET_CODE (XEXP (x, 1)) == CONST_INT
3513       && INTVAL (XEXP (x, 1)) == (HOST_WIDE_INT) - 16)
3514     x = XEXP (x, 0);
3515   switch (GET_CODE (x))
3516     {
3517     case LABEL_REF:
3518       return !TARGET_LARGE_MEM;
3519 
3520     case SYMBOL_REF:
3521     case CONST:
3522       /* Keep __ea references until reload so that spu_expand_mov can see them
3523 	 in MEMs.  */
3524       if (ea_symbol_ref_p (x))
3525 	return !reload_in_progress && !reload_completed;
3526       return !TARGET_LARGE_MEM;
3527 
3528     case CONST_INT:
3529       return INTVAL (x) >= 0 && INTVAL (x) <= 0x3ffff;
3530 
3531     case SUBREG:
3532       x = XEXP (x, 0);
3533       if (REG_P (x))
3534 	return 0;
3535 
3536     case REG:
3537       return INT_REG_OK_FOR_BASE_P (x, reg_ok_strict);
3538 
3539     case PLUS:
3540     case LO_SUM:
3541       {
3542 	rtx op0 = XEXP (x, 0);
3543 	rtx op1 = XEXP (x, 1);
3544 	if (GET_CODE (op0) == SUBREG)
3545 	  op0 = XEXP (op0, 0);
3546 	if (GET_CODE (op1) == SUBREG)
3547 	  op1 = XEXP (op1, 0);
3548 	if (GET_CODE (op0) == REG
3549 	    && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
3550 	    && GET_CODE (op1) == CONST_INT
3551 	    && ((INTVAL (op1) >= -0x2000 && INTVAL (op1) <= 0x1fff)
3552 		/* If virtual registers are involved, the displacement will
3553 		   change later on anyway, so checking would be premature.
3554 		   Reload will make sure the final displacement after
3555 		   register elimination is OK.  */
3556 		|| op0 == arg_pointer_rtx
3557 		|| op0 == frame_pointer_rtx
3558 		|| op0 == virtual_stack_vars_rtx)
3559 	    && (!aligned || (INTVAL (op1) & 15) == 0))
3560 	  return TRUE;
3561 	if (GET_CODE (op0) == REG
3562 	    && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
3563 	    && GET_CODE (op1) == REG
3564 	    && INT_REG_OK_FOR_INDEX_P (op1, reg_ok_strict))
3565 	  return TRUE;
3566       }
3567       break;
3568 
3569     default:
3570       break;
3571     }
3572   return FALSE;
3573 }
3574 
3575 /* Like spu_legitimate_address_p, except with named addresses.  */
3576 static bool
spu_addr_space_legitimate_address_p(machine_mode mode,rtx x,bool reg_ok_strict,addr_space_t as)3577 spu_addr_space_legitimate_address_p (machine_mode mode, rtx x,
3578 				     bool reg_ok_strict, addr_space_t as)
3579 {
3580   if (as == ADDR_SPACE_EA)
3581     return (REG_P (x) && (GET_MODE (x) == EAmode));
3582 
3583   else if (as != ADDR_SPACE_GENERIC)
3584     gcc_unreachable ();
3585 
3586   return spu_legitimate_address_p (mode, x, reg_ok_strict);
3587 }
3588 
3589 /* When the address is reg + const_int, force the const_int into a
3590    register.  */
3591 static rtx
spu_legitimize_address(rtx x,rtx oldx ATTRIBUTE_UNUSED,machine_mode mode ATTRIBUTE_UNUSED)3592 spu_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
3593 			machine_mode mode ATTRIBUTE_UNUSED)
3594 {
3595   rtx op0, op1;
3596   /* Make sure both operands are registers.  */
3597   if (GET_CODE (x) == PLUS)
3598     {
3599       op0 = XEXP (x, 0);
3600       op1 = XEXP (x, 1);
3601       if (ALIGNED_SYMBOL_REF_P (op0))
3602 	{
3603 	  op0 = force_reg (Pmode, op0);
3604 	  mark_reg_pointer (op0, 128);
3605 	}
3606       else if (GET_CODE (op0) != REG)
3607 	op0 = force_reg (Pmode, op0);
3608       if (ALIGNED_SYMBOL_REF_P (op1))
3609 	{
3610 	  op1 = force_reg (Pmode, op1);
3611 	  mark_reg_pointer (op1, 128);
3612 	}
3613       else if (GET_CODE (op1) != REG)
3614 	op1 = force_reg (Pmode, op1);
3615       x = gen_rtx_PLUS (Pmode, op0, op1);
3616     }
3617   return x;
3618 }
3619 
3620 /* Like spu_legitimate_address, except with named address support.  */
3621 static rtx
spu_addr_space_legitimize_address(rtx x,rtx oldx,machine_mode mode,addr_space_t as)3622 spu_addr_space_legitimize_address (rtx x, rtx oldx, machine_mode mode,
3623 				   addr_space_t as)
3624 {
3625   if (as != ADDR_SPACE_GENERIC)
3626     return x;
3627 
3628   return spu_legitimize_address (x, oldx, mode);
3629 }
3630 
3631 /* Reload reg + const_int for out-of-range displacements.  */
3632 rtx
spu_legitimize_reload_address(rtx ad,machine_mode mode ATTRIBUTE_UNUSED,int opnum,int type)3633 spu_legitimize_reload_address (rtx ad, machine_mode mode ATTRIBUTE_UNUSED,
3634 			       int opnum, int type)
3635 {
3636   bool removed_and = false;
3637 
3638   if (GET_CODE (ad) == AND
3639       && CONST_INT_P (XEXP (ad, 1))
3640       && INTVAL (XEXP (ad, 1)) == (HOST_WIDE_INT) - 16)
3641     {
3642       ad = XEXP (ad, 0);
3643       removed_and = true;
3644     }
3645 
3646   if (GET_CODE (ad) == PLUS
3647       && REG_P (XEXP (ad, 0))
3648       && CONST_INT_P (XEXP (ad, 1))
3649       && !(INTVAL (XEXP (ad, 1)) >= -0x2000
3650 	   && INTVAL (XEXP (ad, 1)) <= 0x1fff))
3651     {
3652       /* Unshare the sum.  */
3653       ad = copy_rtx (ad);
3654 
3655       /* Reload the displacement.  */
3656       push_reload (XEXP (ad, 1), NULL_RTX, &XEXP (ad, 1), NULL,
3657 		   BASE_REG_CLASS, GET_MODE (ad), VOIDmode, 0, 0,
3658 		   opnum, (enum reload_type) type);
3659 
3660       /* Add back AND for alignment if we stripped it.  */
3661       if (removed_and)
3662 	ad = gen_rtx_AND (GET_MODE (ad), ad, GEN_INT (-16));
3663 
3664       return ad;
3665     }
3666 
3667   return NULL_RTX;
3668 }
3669 
3670 /* Handle an attribute requiring a FUNCTION_DECL; arguments as in
3671    struct attribute_spec.handler.  */
3672 static tree
spu_handle_fndecl_attribute(tree * node,tree name,tree args ATTRIBUTE_UNUSED,int flags ATTRIBUTE_UNUSED,bool * no_add_attrs)3673 spu_handle_fndecl_attribute (tree * node,
3674 			     tree name,
3675 			     tree args ATTRIBUTE_UNUSED,
3676 			     int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
3677 {
3678   if (TREE_CODE (*node) != FUNCTION_DECL)
3679     {
3680       warning (0, "%qE attribute only applies to functions",
3681 	       name);
3682       *no_add_attrs = true;
3683     }
3684 
3685   return NULL_TREE;
3686 }
3687 
3688 /* Handle the "vector" attribute.  */
3689 static tree
spu_handle_vector_attribute(tree * node,tree name,tree args ATTRIBUTE_UNUSED,int flags ATTRIBUTE_UNUSED,bool * no_add_attrs)3690 spu_handle_vector_attribute (tree * node, tree name,
3691 			     tree args ATTRIBUTE_UNUSED,
3692 			     int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
3693 {
3694   tree type = *node, result = NULL_TREE;
3695   machine_mode mode;
3696   int unsigned_p;
3697 
3698   while (POINTER_TYPE_P (type)
3699 	 || TREE_CODE (type) == FUNCTION_TYPE
3700 	 || TREE_CODE (type) == METHOD_TYPE || TREE_CODE (type) == ARRAY_TYPE)
3701     type = TREE_TYPE (type);
3702 
3703   mode = TYPE_MODE (type);
3704 
3705   unsigned_p = TYPE_UNSIGNED (type);
3706   switch (mode)
3707     {
3708     case DImode:
3709       result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
3710       break;
3711     case SImode:
3712       result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
3713       break;
3714     case HImode:
3715       result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
3716       break;
3717     case QImode:
3718       result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
3719       break;
3720     case SFmode:
3721       result = V4SF_type_node;
3722       break;
3723     case DFmode:
3724       result = V2DF_type_node;
3725       break;
3726     default:
3727       break;
3728     }
3729 
3730   /* Propagate qualifiers attached to the element type
3731      onto the vector type.  */
3732   if (result && result != type && TYPE_QUALS (type))
3733     result = build_qualified_type (result, TYPE_QUALS (type));
3734 
3735   *no_add_attrs = true;		/* No need to hang on to the attribute.  */
3736 
3737   if (!result)
3738     warning (0, "%qE attribute ignored", name);
3739   else
3740     *node = lang_hooks.types.reconstruct_complex_type (*node, result);
3741 
3742   return NULL_TREE;
3743 }
3744 
3745 /* Return nonzero if FUNC is a naked function.  */
3746 static int
spu_naked_function_p(tree func)3747 spu_naked_function_p (tree func)
3748 {
3749   tree a;
3750 
3751   if (TREE_CODE (func) != FUNCTION_DECL)
3752     abort ();
3753 
3754   a = lookup_attribute ("naked", DECL_ATTRIBUTES (func));
3755   return a != NULL_TREE;
3756 }
3757 
3758 int
spu_initial_elimination_offset(int from,int to)3759 spu_initial_elimination_offset (int from, int to)
3760 {
3761   int saved_regs_size = spu_saved_regs_size ();
3762   int sp_offset = 0;
3763   if (!crtl->is_leaf || crtl->outgoing_args_size
3764       || get_frame_size () || saved_regs_size)
3765     sp_offset = STACK_POINTER_OFFSET;
3766   if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
3767     return get_frame_size () + crtl->outgoing_args_size + sp_offset;
3768   else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3769     return get_frame_size ();
3770   else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
3771     return sp_offset + crtl->outgoing_args_size
3772       + get_frame_size () + saved_regs_size + STACK_POINTER_OFFSET;
3773   else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3774     return get_frame_size () + saved_regs_size + sp_offset;
3775   else
3776     gcc_unreachable ();
3777 }
3778 
3779 rtx
spu_function_value(const_tree type,const_tree func ATTRIBUTE_UNUSED)3780 spu_function_value (const_tree type, const_tree func ATTRIBUTE_UNUSED)
3781 {
3782   machine_mode mode = TYPE_MODE (type);
3783   int byte_size = ((mode == BLKmode)
3784 		   ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3785 
3786   /* Make sure small structs are left justified in a register. */
3787   if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
3788       && byte_size <= UNITS_PER_WORD * MAX_REGISTER_RETURN && byte_size > 0)
3789     {
3790       machine_mode smode;
3791       rtvec v;
3792       int i;
3793       int nregs = (byte_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3794       int n = byte_size / UNITS_PER_WORD;
3795       v = rtvec_alloc (nregs);
3796       for (i = 0; i < n; i++)
3797 	{
3798 	  RTVEC_ELT (v, i) = gen_rtx_EXPR_LIST (VOIDmode,
3799 						gen_rtx_REG (TImode,
3800 							     FIRST_RETURN_REGNUM
3801 							     + i),
3802 						GEN_INT (UNITS_PER_WORD * i));
3803 	  byte_size -= UNITS_PER_WORD;
3804 	}
3805 
3806       if (n < nregs)
3807 	{
3808 	  if (byte_size < 4)
3809 	    byte_size = 4;
3810 	  smode =
3811 	    smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
3812 	  RTVEC_ELT (v, n) =
3813 	    gen_rtx_EXPR_LIST (VOIDmode,
3814 			       gen_rtx_REG (smode, FIRST_RETURN_REGNUM + n),
3815 			       GEN_INT (UNITS_PER_WORD * n));
3816 	}
3817       return gen_rtx_PARALLEL (mode, v);
3818     }
3819   return gen_rtx_REG (mode, FIRST_RETURN_REGNUM);
3820 }
3821 
3822 static rtx
spu_function_arg(cumulative_args_t cum_v,machine_mode mode,const_tree type,bool named ATTRIBUTE_UNUSED)3823 spu_function_arg (cumulative_args_t cum_v,
3824 		  machine_mode mode,
3825 		  const_tree type, bool named ATTRIBUTE_UNUSED)
3826 {
3827   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
3828   int byte_size;
3829 
3830   if (*cum >= MAX_REGISTER_ARGS)
3831     return 0;
3832 
3833   byte_size = ((mode == BLKmode)
3834 	       ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3835 
3836   /* The ABI does not allow parameters to be passed partially in
3837      reg and partially in stack. */
3838   if ((*cum + (byte_size + 15) / 16) > MAX_REGISTER_ARGS)
3839     return 0;
3840 
3841   /* Make sure small structs are left justified in a register. */
3842   if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
3843       && byte_size < UNITS_PER_WORD && byte_size > 0)
3844     {
3845       machine_mode smode;
3846       rtx gr_reg;
3847       if (byte_size < 4)
3848 	byte_size = 4;
3849       smode = smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
3850       gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
3851 				  gen_rtx_REG (smode, FIRST_ARG_REGNUM + *cum),
3852 				  const0_rtx);
3853       return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg));
3854     }
3855   else
3856     return gen_rtx_REG (mode, FIRST_ARG_REGNUM + *cum);
3857 }
3858 
3859 static void
spu_function_arg_advance(cumulative_args_t cum_v,machine_mode mode,const_tree type,bool named ATTRIBUTE_UNUSED)3860 spu_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
3861 			  const_tree type, bool named ATTRIBUTE_UNUSED)
3862 {
3863   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
3864 
3865   *cum += (type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
3866 	   ? 1
3867 	   : mode == BLKmode
3868 	   ? ((int_size_in_bytes (type) + 15) / 16)
3869 	   : mode == VOIDmode
3870 	   ? 1
3871 	   : HARD_REGNO_NREGS (cum, mode));
3872 }
3873 
3874 /* Variable sized types are passed by reference.  */
3875 static bool
spu_pass_by_reference(cumulative_args_t cum ATTRIBUTE_UNUSED,machine_mode mode ATTRIBUTE_UNUSED,const_tree type,bool named ATTRIBUTE_UNUSED)3876 spu_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
3877 		       machine_mode mode ATTRIBUTE_UNUSED,
3878 		       const_tree type, bool named ATTRIBUTE_UNUSED)
3879 {
3880   return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
3881 }
3882 
3883 
3884 /* Var args. */
3885 
3886 /* Create and return the va_list datatype.
3887 
3888    On SPU, va_list is an array type equivalent to
3889 
3890       typedef struct __va_list_tag
3891         {
3892             void *__args __attribute__((__aligned(16)));
3893             void *__skip __attribute__((__aligned(16)));
3894 
3895         } va_list[1];
3896 
3897    where __args points to the arg that will be returned by the next
3898    va_arg(), and __skip points to the previous stack frame such that
3899    when __args == __skip we should advance __args by 32 bytes. */
3900 static tree
spu_build_builtin_va_list(void)3901 spu_build_builtin_va_list (void)
3902 {
3903   tree f_args, f_skip, record, type_decl;
3904   bool owp;
3905 
3906   record = (*lang_hooks.types.make_type) (RECORD_TYPE);
3907 
3908   type_decl =
3909     build_decl (BUILTINS_LOCATION,
3910 		TYPE_DECL, get_identifier ("__va_list_tag"), record);
3911 
3912   f_args = build_decl (BUILTINS_LOCATION,
3913 		       FIELD_DECL, get_identifier ("__args"), ptr_type_node);
3914   f_skip = build_decl (BUILTINS_LOCATION,
3915 		       FIELD_DECL, get_identifier ("__skip"), ptr_type_node);
3916 
3917   DECL_FIELD_CONTEXT (f_args) = record;
3918   DECL_ALIGN (f_args) = 128;
3919   DECL_USER_ALIGN (f_args) = 1;
3920 
3921   DECL_FIELD_CONTEXT (f_skip) = record;
3922   DECL_ALIGN (f_skip) = 128;
3923   DECL_USER_ALIGN (f_skip) = 1;
3924 
3925   TYPE_STUB_DECL (record) = type_decl;
3926   TYPE_NAME (record) = type_decl;
3927   TYPE_FIELDS (record) = f_args;
3928   DECL_CHAIN (f_args) = f_skip;
3929 
3930   /* We know this is being padded and we want it too.  It is an internal
3931      type so hide the warnings from the user. */
3932   owp = warn_padded;
3933   warn_padded = false;
3934 
3935   layout_type (record);
3936 
3937   warn_padded = owp;
3938 
3939   /* The correct type is an array type of one element.  */
3940   return build_array_type (record, build_index_type (size_zero_node));
3941 }
3942 
3943 /* Implement va_start by filling the va_list structure VALIST.
3944    NEXTARG points to the first anonymous stack argument.
3945 
3946    The following global variables are used to initialize
3947    the va_list structure:
3948 
3949      crtl->args.info;
3950        the CUMULATIVE_ARGS for this function
3951 
3952      crtl->args.arg_offset_rtx:
3953        holds the offset of the first anonymous stack argument
3954        (relative to the virtual arg pointer).  */
3955 
3956 static void
spu_va_start(tree valist,rtx nextarg)3957 spu_va_start (tree valist, rtx nextarg)
3958 {
3959   tree f_args, f_skip;
3960   tree args, skip, t;
3961 
3962   f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3963   f_skip = DECL_CHAIN (f_args);
3964 
3965   valist = build_simple_mem_ref (valist);
3966   args =
3967     build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
3968   skip =
3969     build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
3970 
3971   /* Find the __args area.  */
3972   t = make_tree (TREE_TYPE (args), nextarg);
3973   if (crtl->args.pretend_args_size > 0)
3974     t = fold_build_pointer_plus_hwi (t, -STACK_POINTER_OFFSET);
3975   t = build2 (MODIFY_EXPR, TREE_TYPE (args), args, t);
3976   TREE_SIDE_EFFECTS (t) = 1;
3977   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3978 
3979   /* Find the __skip area.  */
3980   t = make_tree (TREE_TYPE (skip), virtual_incoming_args_rtx);
3981   t = fold_build_pointer_plus_hwi (t, (crtl->args.pretend_args_size
3982 				       - STACK_POINTER_OFFSET));
3983   t = build2 (MODIFY_EXPR, TREE_TYPE (skip), skip, t);
3984   TREE_SIDE_EFFECTS (t) = 1;
3985   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3986 }
3987 
3988 /* Gimplify va_arg by updating the va_list structure
3989    VALIST as required to retrieve an argument of type
3990    TYPE, and returning that argument.
3991 
3992    ret = va_arg(VALIST, TYPE);
3993 
3994    generates code equivalent to:
3995 
3996     paddedsize = (sizeof(TYPE) + 15) & -16;
3997     if (VALIST.__args + paddedsize > VALIST.__skip
3998 	&& VALIST.__args <= VALIST.__skip)
3999       addr = VALIST.__skip + 32;
4000     else
4001       addr = VALIST.__args;
4002     VALIST.__args = addr + paddedsize;
4003     ret = *(TYPE *)addr;
4004  */
4005 static tree
spu_gimplify_va_arg_expr(tree valist,tree type,gimple_seq * pre_p,gimple_seq * post_p ATTRIBUTE_UNUSED)4006 spu_gimplify_va_arg_expr (tree valist, tree type, gimple_seq * pre_p,
4007 			  gimple_seq * post_p ATTRIBUTE_UNUSED)
4008 {
4009   tree f_args, f_skip;
4010   tree args, skip;
4011   HOST_WIDE_INT size, rsize;
4012   tree addr, tmp;
4013   bool pass_by_reference_p;
4014 
4015   f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4016   f_skip = DECL_CHAIN (f_args);
4017 
4018   args =
4019     build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
4020   skip =
4021     build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
4022 
4023   addr = create_tmp_var (ptr_type_node, "va_arg");
4024 
4025   /* if an object is dynamically sized, a pointer to it is passed
4026      instead of the object itself. */
4027   pass_by_reference_p = pass_by_reference (NULL, TYPE_MODE (type), type,
4028 					   false);
4029   if (pass_by_reference_p)
4030     type = build_pointer_type (type);
4031   size = int_size_in_bytes (type);
4032   rsize = ((size + UNITS_PER_WORD - 1) / UNITS_PER_WORD) * UNITS_PER_WORD;
4033 
4034   /* build conditional expression to calculate addr. The expression
4035      will be gimplified later. */
4036   tmp = fold_build_pointer_plus_hwi (unshare_expr (args), rsize);
4037   tmp = build2 (TRUTH_AND_EXPR, boolean_type_node,
4038 		build2 (GT_EXPR, boolean_type_node, tmp, unshare_expr (skip)),
4039 		build2 (LE_EXPR, boolean_type_node, unshare_expr (args),
4040 		unshare_expr (skip)));
4041 
4042   tmp = build3 (COND_EXPR, ptr_type_node, tmp,
4043 		fold_build_pointer_plus_hwi (unshare_expr (skip), 32),
4044 		unshare_expr (args));
4045 
4046   gimplify_assign (addr, tmp, pre_p);
4047 
4048   /* update VALIST.__args */
4049   tmp = fold_build_pointer_plus_hwi (addr, rsize);
4050   gimplify_assign (unshare_expr (args), tmp, pre_p);
4051 
4052   addr = fold_convert (build_pointer_type_for_mode (type, ptr_mode, true),
4053 		       addr);
4054 
4055   if (pass_by_reference_p)
4056     addr = build_va_arg_indirect_ref (addr);
4057 
4058   return build_va_arg_indirect_ref (addr);
4059 }
4060 
4061 /* Save parameter registers starting with the register that corresponds
4062    to the first unnamed parameters.  If the first unnamed parameter is
4063    in the stack then save no registers.  Set pretend_args_size to the
4064    amount of space needed to save the registers. */
4065 static void
spu_setup_incoming_varargs(cumulative_args_t cum,machine_mode mode,tree type,int * pretend_size,int no_rtl)4066 spu_setup_incoming_varargs (cumulative_args_t cum, machine_mode mode,
4067 			    tree type, int *pretend_size, int no_rtl)
4068 {
4069   if (!no_rtl)
4070     {
4071       rtx tmp;
4072       int regno;
4073       int offset;
4074       int ncum = *get_cumulative_args (cum);
4075 
4076       /* cum currently points to the last named argument, we want to
4077          start at the next argument. */
4078       spu_function_arg_advance (pack_cumulative_args (&ncum), mode, type, true);
4079 
4080       offset = -STACK_POINTER_OFFSET;
4081       for (regno = ncum; regno < MAX_REGISTER_ARGS; regno++)
4082 	{
4083 	  tmp = gen_frame_mem (V4SImode,
4084 			       plus_constant (Pmode, virtual_incoming_args_rtx,
4085 					      offset));
4086 	  emit_move_insn (tmp,
4087 			  gen_rtx_REG (V4SImode, FIRST_ARG_REGNUM + regno));
4088 	  offset += 16;
4089 	}
4090       *pretend_size = offset + STACK_POINTER_OFFSET;
4091     }
4092 }
4093 
4094 static void
spu_conditional_register_usage(void)4095 spu_conditional_register_usage (void)
4096 {
4097   if (flag_pic)
4098     {
4099       fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
4100       call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
4101     }
4102 }
4103 
4104 /* This is called any time we inspect the alignment of a register for
4105    addresses.  */
4106 static int
reg_aligned_for_addr(rtx x)4107 reg_aligned_for_addr (rtx x)
4108 {
4109   int regno =
4110     REGNO (x) < FIRST_PSEUDO_REGISTER ? ORIGINAL_REGNO (x) : REGNO (x);
4111   return REGNO_POINTER_ALIGN (regno) >= 128;
4112 }
4113 
4114 /* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF
4115    into its SYMBOL_REF_FLAGS.  */
4116 static void
spu_encode_section_info(tree decl,rtx rtl,int first)4117 spu_encode_section_info (tree decl, rtx rtl, int first)
4118 {
4119   default_encode_section_info (decl, rtl, first);
4120 
4121   /* If a variable has a forced alignment to < 16 bytes, mark it with
4122      SYMBOL_FLAG_ALIGN1.  */
4123   if (TREE_CODE (decl) == VAR_DECL
4124       && DECL_USER_ALIGN (decl) && DECL_ALIGN (decl) < 128)
4125     SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_ALIGN1;
4126 }
4127 
4128 /* Return TRUE if we are certain the mem refers to a complete object
4129    which is both 16-byte aligned and padded to a 16-byte boundary.  This
4130    would make it safe to store with a single instruction.
4131    We guarantee the alignment and padding for static objects by aligning
4132    all of them to 16-bytes. (DATA_ALIGNMENT and CONSTANT_ALIGNMENT.)
4133    FIXME: We currently cannot guarantee this for objects on the stack
4134    because assign_parm_setup_stack calls assign_stack_local with the
4135    alignment of the parameter mode and in that case the alignment never
4136    gets adjusted by LOCAL_ALIGNMENT. */
4137 static int
store_with_one_insn_p(rtx mem)4138 store_with_one_insn_p (rtx mem)
4139 {
4140   machine_mode mode = GET_MODE (mem);
4141   rtx addr = XEXP (mem, 0);
4142   if (mode == BLKmode)
4143     return 0;
4144   if (GET_MODE_SIZE (mode) >= 16)
4145     return 1;
4146   /* Only static objects. */
4147   if (GET_CODE (addr) == SYMBOL_REF)
4148     {
4149       /* We use the associated declaration to make sure the access is
4150          referring to the whole object.
4151          We check both MEM_EXPR and SYMBOL_REF_DECL.  I'm not sure
4152          if it is necessary.  Will there be cases where one exists, and
4153          the other does not?  Will there be cases where both exist, but
4154          have different types?  */
4155       tree decl = MEM_EXPR (mem);
4156       if (decl
4157 	  && TREE_CODE (decl) == VAR_DECL
4158 	  && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
4159 	return 1;
4160       decl = SYMBOL_REF_DECL (addr);
4161       if (decl
4162 	  && TREE_CODE (decl) == VAR_DECL
4163 	  && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
4164 	return 1;
4165     }
4166   return 0;
4167 }
4168 
4169 /* Return 1 when the address is not valid for a simple load and store as
4170    required by the '_mov*' patterns.   We could make this less strict
4171    for loads, but we prefer mem's to look the same so they are more
4172    likely to be merged.  */
4173 static int
address_needs_split(rtx mem)4174 address_needs_split (rtx mem)
4175 {
4176   if (GET_MODE_SIZE (GET_MODE (mem)) < 16
4177       && (GET_MODE_SIZE (GET_MODE (mem)) < 4
4178 	  || !(store_with_one_insn_p (mem)
4179 	       || mem_is_padded_component_ref (mem))))
4180     return 1;
4181 
4182   return 0;
4183 }
4184 
4185 static GTY(()) rtx cache_fetch;		  /* __cache_fetch function */
4186 static GTY(()) rtx cache_fetch_dirty;	  /* __cache_fetch_dirty function */
4187 static alias_set_type ea_alias_set = -1;  /* alias set for __ea memory */
4188 
4189 /* MEM is known to be an __ea qualified memory access.  Emit a call to
4190    fetch the ppu memory to local store, and return its address in local
4191    store.  */
4192 
4193 static void
ea_load_store(rtx mem,bool is_store,rtx ea_addr,rtx data_addr)4194 ea_load_store (rtx mem, bool is_store, rtx ea_addr, rtx data_addr)
4195 {
4196   if (is_store)
4197     {
4198       rtx ndirty = GEN_INT (GET_MODE_SIZE (GET_MODE (mem)));
4199       if (!cache_fetch_dirty)
4200 	cache_fetch_dirty = init_one_libfunc ("__cache_fetch_dirty");
4201       emit_library_call_value (cache_fetch_dirty, data_addr, LCT_NORMAL, Pmode,
4202 			       2, ea_addr, EAmode, ndirty, SImode);
4203     }
4204   else
4205     {
4206       if (!cache_fetch)
4207 	cache_fetch = init_one_libfunc ("__cache_fetch");
4208       emit_library_call_value (cache_fetch, data_addr, LCT_NORMAL, Pmode,
4209 			       1, ea_addr, EAmode);
4210     }
4211 }
4212 
4213 /* Like ea_load_store, but do the cache tag comparison and, for stores,
4214    dirty bit marking, inline.
4215 
4216    The cache control data structure is an array of
4217 
4218    struct __cache_tag_array
4219      {
4220         unsigned int tag_lo[4];
4221         unsigned int tag_hi[4];
4222         void *data_pointer[4];
4223         int reserved[4];
4224         vector unsigned short dirty_bits[4];
4225      }  */
4226 
4227 static void
ea_load_store_inline(rtx mem,bool is_store,rtx ea_addr,rtx data_addr)4228 ea_load_store_inline (rtx mem, bool is_store, rtx ea_addr, rtx data_addr)
4229 {
4230   rtx ea_addr_si;
4231   HOST_WIDE_INT v;
4232   rtx tag_size_sym = gen_rtx_SYMBOL_REF (Pmode, "__cache_tag_array_size");
4233   rtx tag_arr_sym = gen_rtx_SYMBOL_REF (Pmode, "__cache_tag_array");
4234   rtx index_mask = gen_reg_rtx (SImode);
4235   rtx tag_arr = gen_reg_rtx (Pmode);
4236   rtx splat_mask = gen_reg_rtx (TImode);
4237   rtx splat = gen_reg_rtx (V4SImode);
4238   rtx splat_hi = NULL_RTX;
4239   rtx tag_index = gen_reg_rtx (Pmode);
4240   rtx block_off = gen_reg_rtx (SImode);
4241   rtx tag_addr = gen_reg_rtx (Pmode);
4242   rtx tag = gen_reg_rtx (V4SImode);
4243   rtx cache_tag = gen_reg_rtx (V4SImode);
4244   rtx cache_tag_hi = NULL_RTX;
4245   rtx cache_ptrs = gen_reg_rtx (TImode);
4246   rtx cache_ptrs_si = gen_reg_rtx (SImode);
4247   rtx tag_equal = gen_reg_rtx (V4SImode);
4248   rtx tag_equal_hi = NULL_RTX;
4249   rtx tag_eq_pack = gen_reg_rtx (V4SImode);
4250   rtx tag_eq_pack_si = gen_reg_rtx (SImode);
4251   rtx eq_index = gen_reg_rtx (SImode);
4252   rtx bcomp, hit_label, hit_ref, cont_label;
4253   rtx_insn *insn;
4254 
4255   if (spu_ea_model != 32)
4256     {
4257       splat_hi = gen_reg_rtx (V4SImode);
4258       cache_tag_hi = gen_reg_rtx (V4SImode);
4259       tag_equal_hi = gen_reg_rtx (V4SImode);
4260     }
4261 
4262   emit_move_insn (index_mask, plus_constant (Pmode, tag_size_sym, -128));
4263   emit_move_insn (tag_arr, tag_arr_sym);
4264   v = 0x0001020300010203LL;
4265   emit_move_insn (splat_mask, immed_double_const (v, v, TImode));
4266   ea_addr_si = ea_addr;
4267   if (spu_ea_model != 32)
4268     ea_addr_si = convert_to_mode (SImode, ea_addr, 1);
4269 
4270   /* tag_index = ea_addr & (tag_array_size - 128)  */
4271   emit_insn (gen_andsi3 (tag_index, ea_addr_si, index_mask));
4272 
4273   /* splat ea_addr to all 4 slots.  */
4274   emit_insn (gen_shufb (splat, ea_addr_si, ea_addr_si, splat_mask));
4275   /* Similarly for high 32 bits of ea_addr.  */
4276   if (spu_ea_model != 32)
4277     emit_insn (gen_shufb (splat_hi, ea_addr, ea_addr, splat_mask));
4278 
4279   /* block_off = ea_addr & 127  */
4280   emit_insn (gen_andsi3 (block_off, ea_addr_si, spu_const (SImode, 127)));
4281 
4282   /* tag_addr = tag_arr + tag_index  */
4283   emit_insn (gen_addsi3 (tag_addr, tag_arr, tag_index));
4284 
4285   /* Read cache tags.  */
4286   emit_move_insn (cache_tag, gen_rtx_MEM (V4SImode, tag_addr));
4287   if (spu_ea_model != 32)
4288     emit_move_insn (cache_tag_hi, gen_rtx_MEM (V4SImode,
4289 					       plus_constant (Pmode,
4290 							      tag_addr, 16)));
4291 
4292   /* tag = ea_addr & -128  */
4293   emit_insn (gen_andv4si3 (tag, splat, spu_const (V4SImode, -128)));
4294 
4295   /* Read all four cache data pointers.  */
4296   emit_move_insn (cache_ptrs, gen_rtx_MEM (TImode,
4297 					   plus_constant (Pmode,
4298 							  tag_addr, 32)));
4299 
4300   /* Compare tags.  */
4301   emit_insn (gen_ceq_v4si (tag_equal, tag, cache_tag));
4302   if (spu_ea_model != 32)
4303     {
4304       emit_insn (gen_ceq_v4si (tag_equal_hi, splat_hi, cache_tag_hi));
4305       emit_insn (gen_andv4si3 (tag_equal, tag_equal, tag_equal_hi));
4306     }
4307 
4308   /* At most one of the tags compare equal, so tag_equal has one
4309      32-bit slot set to all 1's, with the other slots all zero.
4310      gbb picks off low bit from each byte in the 128-bit registers,
4311      so tag_eq_pack is one of 0xf000, 0x0f00, 0x00f0, 0x000f, assuming
4312      we have a hit.  */
4313   emit_insn (gen_spu_gbb (tag_eq_pack, spu_gen_subreg (V16QImode, tag_equal)));
4314   emit_insn (gen_spu_convert (tag_eq_pack_si, tag_eq_pack));
4315 
4316   /* So counting leading zeros will set eq_index to 16, 20, 24 or 28.  */
4317   emit_insn (gen_clzsi2 (eq_index, tag_eq_pack_si));
4318 
4319   /* Allowing us to rotate the corresponding cache data pointer to slot0.
4320      (rotating eq_index mod 16 bytes).  */
4321   emit_insn (gen_rotqby_ti (cache_ptrs, cache_ptrs, eq_index));
4322   emit_insn (gen_spu_convert (cache_ptrs_si, cache_ptrs));
4323 
4324   /* Add block offset to form final data address.  */
4325   emit_insn (gen_addsi3 (data_addr, cache_ptrs_si, block_off));
4326 
4327   /* Check that we did hit.  */
4328   hit_label = gen_label_rtx ();
4329   hit_ref = gen_rtx_LABEL_REF (VOIDmode, hit_label);
4330   bcomp = gen_rtx_NE (SImode, tag_eq_pack_si, const0_rtx);
4331   insn = emit_jump_insn (gen_rtx_SET (pc_rtx,
4332 				      gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
4333 							    hit_ref, pc_rtx)));
4334   /* Say that this branch is very likely to happen.  */
4335   v = REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100 - 1;
4336   add_int_reg_note (insn, REG_BR_PROB, v);
4337 
4338   ea_load_store (mem, is_store, ea_addr, data_addr);
4339   cont_label = gen_label_rtx ();
4340   emit_jump_insn (gen_jump (cont_label));
4341   emit_barrier ();
4342 
4343   emit_label (hit_label);
4344 
4345   if (is_store)
4346     {
4347       HOST_WIDE_INT v_hi;
4348       rtx dirty_bits = gen_reg_rtx (TImode);
4349       rtx dirty_off = gen_reg_rtx (SImode);
4350       rtx dirty_128 = gen_reg_rtx (TImode);
4351       rtx neg_block_off = gen_reg_rtx (SImode);
4352 
4353       /* Set up mask with one dirty bit per byte of the mem we are
4354 	 writing, starting from top bit.  */
4355       v_hi = v = -1;
4356       v <<= (128 - GET_MODE_SIZE (GET_MODE (mem))) & 63;
4357       if ((128 - GET_MODE_SIZE (GET_MODE (mem))) >= 64)
4358 	{
4359 	  v_hi = v;
4360 	  v = 0;
4361 	}
4362       emit_move_insn (dirty_bits, immed_double_const (v, v_hi, TImode));
4363 
4364       /* Form index into cache dirty_bits.  eq_index is one of
4365 	 0x10, 0x14, 0x18 or 0x1c.  Multiplying by 4 gives us
4366 	 0x40, 0x50, 0x60 or 0x70 which just happens to be the
4367 	 offset to each of the four dirty_bits elements.  */
4368       emit_insn (gen_ashlsi3 (dirty_off, eq_index, spu_const (SImode, 2)));
4369 
4370       emit_insn (gen_spu_lqx (dirty_128, tag_addr, dirty_off));
4371 
4372       /* Rotate bit mask to proper bit.  */
4373       emit_insn (gen_negsi2 (neg_block_off, block_off));
4374       emit_insn (gen_rotqbybi_ti (dirty_bits, dirty_bits, neg_block_off));
4375       emit_insn (gen_rotqbi_ti (dirty_bits, dirty_bits, neg_block_off));
4376 
4377       /* Or in the new dirty bits.  */
4378       emit_insn (gen_iorti3 (dirty_128, dirty_bits, dirty_128));
4379 
4380       /* Store.  */
4381       emit_insn (gen_spu_stqx (dirty_128, tag_addr, dirty_off));
4382     }
4383 
4384   emit_label (cont_label);
4385 }
4386 
4387 static rtx
expand_ea_mem(rtx mem,bool is_store)4388 expand_ea_mem (rtx mem, bool is_store)
4389 {
4390   rtx ea_addr;
4391   rtx data_addr = gen_reg_rtx (Pmode);
4392   rtx new_mem;
4393 
4394   ea_addr = force_reg (EAmode, XEXP (mem, 0));
4395   if (optimize_size || optimize == 0)
4396     ea_load_store (mem, is_store, ea_addr, data_addr);
4397   else
4398     ea_load_store_inline (mem, is_store, ea_addr, data_addr);
4399 
4400   if (ea_alias_set == -1)
4401     ea_alias_set = new_alias_set ();
4402 
4403   /* We generate a new MEM RTX to refer to the copy of the data
4404      in the cache.  We do not copy memory attributes (except the
4405      alignment) from the original MEM, as they may no longer apply
4406      to the cache copy.  */
4407   new_mem = gen_rtx_MEM (GET_MODE (mem), data_addr);
4408   set_mem_alias_set (new_mem, ea_alias_set);
4409   set_mem_align (new_mem, MIN (MEM_ALIGN (mem), 128 * 8));
4410 
4411   return new_mem;
4412 }
4413 
4414 int
spu_expand_mov(rtx * ops,machine_mode mode)4415 spu_expand_mov (rtx * ops, machine_mode mode)
4416 {
4417   if (GET_CODE (ops[0]) == SUBREG && !valid_subreg (ops[0]))
4418     {
4419       /* Perform the move in the destination SUBREG's inner mode.  */
4420       ops[0] = SUBREG_REG (ops[0]);
4421       mode = GET_MODE (ops[0]);
4422       ops[1] = gen_lowpart_common (mode, ops[1]);
4423       gcc_assert (ops[1]);
4424     }
4425 
4426   if (GET_CODE (ops[1]) == SUBREG && !valid_subreg (ops[1]))
4427     {
4428       rtx from = SUBREG_REG (ops[1]);
4429       machine_mode imode = int_mode_for_mode (GET_MODE (from));
4430 
4431       gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
4432 		  && GET_MODE_CLASS (imode) == MODE_INT
4433 		  && subreg_lowpart_p (ops[1]));
4434 
4435       if (GET_MODE_SIZE (imode) < 4)
4436 	imode = SImode;
4437       if (imode != GET_MODE (from))
4438 	from = gen_rtx_SUBREG (imode, from, 0);
4439 
4440       if (GET_MODE_SIZE (mode) < GET_MODE_SIZE (imode))
4441 	{
4442 	  enum insn_code icode = convert_optab_handler (trunc_optab,
4443 							mode, imode);
4444 	  emit_insn (GEN_FCN (icode) (ops[0], from));
4445 	}
4446       else
4447 	emit_insn (gen_extend_insn (ops[0], from, mode, imode, 1));
4448       return 1;
4449     }
4450 
4451   /* At least one of the operands needs to be a register. */
4452   if ((reload_in_progress | reload_completed) == 0
4453       && !register_operand (ops[0], mode) && !register_operand (ops[1], mode))
4454     {
4455       rtx temp = force_reg (mode, ops[1]);
4456       emit_move_insn (ops[0], temp);
4457       return 1;
4458     }
4459   if (reload_in_progress || reload_completed)
4460     {
4461       if (CONSTANT_P (ops[1]))
4462 	return spu_split_immediate (ops);
4463       return 0;
4464     }
4465 
4466   /* Catch the SImode immediates greater than 0x7fffffff, and sign
4467      extend them. */
4468   if (GET_CODE (ops[1]) == CONST_INT)
4469     {
4470       HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (ops[1]), mode);
4471       if (val != INTVAL (ops[1]))
4472 	{
4473 	  emit_move_insn (ops[0], GEN_INT (val));
4474 	  return 1;
4475 	}
4476     }
4477   if (MEM_P (ops[0]))
4478     {
4479       if (MEM_ADDR_SPACE (ops[0]))
4480 	ops[0] = expand_ea_mem (ops[0], true);
4481       return spu_split_store (ops);
4482     }
4483   if (MEM_P (ops[1]))
4484     {
4485       if (MEM_ADDR_SPACE (ops[1]))
4486 	ops[1] = expand_ea_mem (ops[1], false);
4487       return spu_split_load (ops);
4488     }
4489 
4490   return 0;
4491 }
4492 
4493 static void
spu_convert_move(rtx dst,rtx src)4494 spu_convert_move (rtx dst, rtx src)
4495 {
4496   machine_mode mode = GET_MODE (dst);
4497   machine_mode int_mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
4498   rtx reg;
4499   gcc_assert (GET_MODE (src) == TImode);
4500   reg = int_mode != mode ? gen_reg_rtx (int_mode) : dst;
4501   emit_insn (gen_rtx_SET (reg,
4502 	       gen_rtx_TRUNCATE (int_mode,
4503 		 gen_rtx_LSHIFTRT (TImode, src,
4504 		   GEN_INT (int_mode == DImode ? 64 : 96)))));
4505   if (int_mode != mode)
4506     {
4507       reg = simplify_gen_subreg (mode, reg, int_mode, 0);
4508       emit_move_insn (dst, reg);
4509     }
4510 }
4511 
4512 /* Load TImode values into DST0 and DST1 (when it is non-NULL) using
4513    the address from SRC and SRC+16.  Return a REG or CONST_INT that
4514    specifies how many bytes to rotate the loaded registers, plus any
4515    extra from EXTRA_ROTQBY.  The address and rotate amounts are
4516    normalized to improve merging of loads and rotate computations. */
4517 static rtx
spu_expand_load(rtx dst0,rtx dst1,rtx src,int extra_rotby)4518 spu_expand_load (rtx dst0, rtx dst1, rtx src, int extra_rotby)
4519 {
4520   rtx addr = XEXP (src, 0);
4521   rtx p0, p1, rot, addr0, addr1;
4522   int rot_amt;
4523 
4524   rot = 0;
4525   rot_amt = 0;
4526 
4527   if (MEM_ALIGN (src) >= 128)
4528     /* Address is already aligned; simply perform a TImode load.  */ ;
4529   else if (GET_CODE (addr) == PLUS)
4530     {
4531       /* 8 cases:
4532          aligned reg   + aligned reg     => lqx
4533          aligned reg   + unaligned reg   => lqx, rotqby
4534          aligned reg   + aligned const   => lqd
4535          aligned reg   + unaligned const => lqd, rotqbyi
4536          unaligned reg + aligned reg     => lqx, rotqby
4537          unaligned reg + unaligned reg   => lqx, a, rotqby (1 scratch)
4538          unaligned reg + aligned const   => lqd, rotqby
4539          unaligned reg + unaligned const -> not allowed by legitimate address
4540        */
4541       p0 = XEXP (addr, 0);
4542       p1 = XEXP (addr, 1);
4543       if (!reg_aligned_for_addr (p0))
4544 	{
4545 	  if (REG_P (p1) && !reg_aligned_for_addr (p1))
4546 	    {
4547 	      rot = gen_reg_rtx (SImode);
4548 	      emit_insn (gen_addsi3 (rot, p0, p1));
4549 	    }
4550 	  else if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15))
4551 	    {
4552 	      if (INTVAL (p1) > 0
4553 		  && REG_POINTER (p0)
4554 		  && INTVAL (p1) * BITS_PER_UNIT
4555 		     < REGNO_POINTER_ALIGN (REGNO (p0)))
4556 		{
4557 		  rot = gen_reg_rtx (SImode);
4558 		  emit_insn (gen_addsi3 (rot, p0, p1));
4559 		  addr = p0;
4560 		}
4561 	      else
4562 		{
4563 		  rtx x = gen_reg_rtx (SImode);
4564 		  emit_move_insn (x, p1);
4565 		  if (!spu_arith_operand (p1, SImode))
4566 		    p1 = x;
4567 		  rot = gen_reg_rtx (SImode);
4568 		  emit_insn (gen_addsi3 (rot, p0, p1));
4569 		  addr = gen_rtx_PLUS (Pmode, p0, x);
4570 		}
4571 	    }
4572 	  else
4573 	    rot = p0;
4574 	}
4575       else
4576 	{
4577 	  if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15))
4578 	    {
4579 	      rot_amt = INTVAL (p1) & 15;
4580 	      if (INTVAL (p1) & -16)
4581 		{
4582 		  p1 = GEN_INT (INTVAL (p1) & -16);
4583 		  addr = gen_rtx_PLUS (SImode, p0, p1);
4584 		}
4585 	      else
4586 		addr = p0;
4587 	    }
4588 	  else if (REG_P (p1) && !reg_aligned_for_addr (p1))
4589 	    rot = p1;
4590 	}
4591     }
4592   else if (REG_P (addr))
4593     {
4594       if (!reg_aligned_for_addr (addr))
4595 	rot = addr;
4596     }
4597   else if (GET_CODE (addr) == CONST)
4598     {
4599       if (GET_CODE (XEXP (addr, 0)) == PLUS
4600 	  && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
4601 	  && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
4602 	{
4603 	  rot_amt = INTVAL (XEXP (XEXP (addr, 0), 1));
4604 	  if (rot_amt & -16)
4605 	    addr = gen_rtx_CONST (Pmode,
4606 				  gen_rtx_PLUS (Pmode,
4607 						XEXP (XEXP (addr, 0), 0),
4608 						GEN_INT (rot_amt & -16)));
4609 	  else
4610 	    addr = XEXP (XEXP (addr, 0), 0);
4611 	}
4612       else
4613 	{
4614 	  rot = gen_reg_rtx (Pmode);
4615 	  emit_move_insn (rot, addr);
4616 	}
4617     }
4618   else if (GET_CODE (addr) == CONST_INT)
4619     {
4620       rot_amt = INTVAL (addr);
4621       addr = GEN_INT (rot_amt & -16);
4622     }
4623   else if (!ALIGNED_SYMBOL_REF_P (addr))
4624     {
4625       rot = gen_reg_rtx (Pmode);
4626       emit_move_insn (rot, addr);
4627     }
4628 
4629   rot_amt += extra_rotby;
4630 
4631   rot_amt &= 15;
4632 
4633   if (rot && rot_amt)
4634     {
4635       rtx x = gen_reg_rtx (SImode);
4636       emit_insn (gen_addsi3 (x, rot, GEN_INT (rot_amt)));
4637       rot = x;
4638       rot_amt = 0;
4639     }
4640   if (!rot && rot_amt)
4641     rot = GEN_INT (rot_amt);
4642 
4643   addr0 = copy_rtx (addr);
4644   addr0 = gen_rtx_AND (SImode, copy_rtx (addr), GEN_INT (-16));
4645   emit_insn (gen__movti (dst0, change_address (src, TImode, addr0)));
4646 
4647   if (dst1)
4648     {
4649       addr1 = plus_constant (SImode, copy_rtx (addr), 16);
4650       addr1 = gen_rtx_AND (SImode, addr1, GEN_INT (-16));
4651       emit_insn (gen__movti (dst1, change_address (src, TImode, addr1)));
4652     }
4653 
4654   return rot;
4655 }
4656 
4657 int
spu_split_load(rtx * ops)4658 spu_split_load (rtx * ops)
4659 {
4660   machine_mode mode = GET_MODE (ops[0]);
4661   rtx addr, load, rot;
4662   int rot_amt;
4663 
4664   if (GET_MODE_SIZE (mode) >= 16)
4665     return 0;
4666 
4667   addr = XEXP (ops[1], 0);
4668   gcc_assert (GET_CODE (addr) != AND);
4669 
4670   if (!address_needs_split (ops[1]))
4671     {
4672       ops[1] = change_address (ops[1], TImode, addr);
4673       load = gen_reg_rtx (TImode);
4674       emit_insn (gen__movti (load, ops[1]));
4675       spu_convert_move (ops[0], load);
4676       return 1;
4677     }
4678 
4679   rot_amt = GET_MODE_SIZE (mode) < 4 ? GET_MODE_SIZE (mode) - 4 : 0;
4680 
4681   load = gen_reg_rtx (TImode);
4682   rot = spu_expand_load (load, 0, ops[1], rot_amt);
4683 
4684   if (rot)
4685     emit_insn (gen_rotqby_ti (load, load, rot));
4686 
4687   spu_convert_move (ops[0], load);
4688   return 1;
4689 }
4690 
4691 int
spu_split_store(rtx * ops)4692 spu_split_store (rtx * ops)
4693 {
4694   machine_mode mode = GET_MODE (ops[0]);
4695   rtx reg;
4696   rtx addr, p0, p1, p1_lo, smem;
4697   int aform;
4698   int scalar;
4699 
4700   if (GET_MODE_SIZE (mode) >= 16)
4701     return 0;
4702 
4703   addr = XEXP (ops[0], 0);
4704   gcc_assert (GET_CODE (addr) != AND);
4705 
4706   if (!address_needs_split (ops[0]))
4707     {
4708       reg = gen_reg_rtx (TImode);
4709       emit_insn (gen_spu_convert (reg, ops[1]));
4710       ops[0] = change_address (ops[0], TImode, addr);
4711       emit_move_insn (ops[0], reg);
4712       return 1;
4713     }
4714 
4715   if (GET_CODE (addr) == PLUS)
4716     {
4717       /* 8 cases:
4718          aligned reg   + aligned reg     => lqx, c?x, shuf, stqx
4719          aligned reg   + unaligned reg   => lqx, c?x, shuf, stqx
4720          aligned reg   + aligned const   => lqd, c?d, shuf, stqx
4721          aligned reg   + unaligned const => lqd, c?d, shuf, stqx
4722          unaligned reg + aligned reg     => lqx, c?x, shuf, stqx
4723          unaligned reg + unaligned reg   => lqx, c?x, shuf, stqx
4724          unaligned reg + aligned const   => lqd, c?d, shuf, stqx
4725          unaligned reg + unaligned const -> lqx, c?d, shuf, stqx
4726        */
4727       aform = 0;
4728       p0 = XEXP (addr, 0);
4729       p1 = p1_lo = XEXP (addr, 1);
4730       if (REG_P (p0) && GET_CODE (p1) == CONST_INT)
4731 	{
4732 	  p1_lo = GEN_INT (INTVAL (p1) & 15);
4733 	  if (reg_aligned_for_addr (p0))
4734 	    {
4735 	      p1 = GEN_INT (INTVAL (p1) & -16);
4736 	      if (p1 == const0_rtx)
4737 		addr = p0;
4738 	      else
4739 		addr = gen_rtx_PLUS (SImode, p0, p1);
4740 	    }
4741 	  else
4742 	    {
4743 	      rtx x = gen_reg_rtx (SImode);
4744 	      emit_move_insn (x, p1);
4745 	      addr = gen_rtx_PLUS (SImode, p0, x);
4746 	    }
4747 	}
4748     }
4749   else if (REG_P (addr))
4750     {
4751       aform = 0;
4752       p0 = addr;
4753       p1 = p1_lo = const0_rtx;
4754     }
4755   else
4756     {
4757       aform = 1;
4758       p0 = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
4759       p1 = 0;			/* aform doesn't use p1 */
4760       p1_lo = addr;
4761       if (ALIGNED_SYMBOL_REF_P (addr))
4762 	p1_lo = const0_rtx;
4763       else if (GET_CODE (addr) == CONST
4764 	       && GET_CODE (XEXP (addr, 0)) == PLUS
4765 	       && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
4766 	       && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
4767 	{
4768 	  HOST_WIDE_INT v = INTVAL (XEXP (XEXP (addr, 0), 1));
4769 	  if ((v & -16) != 0)
4770 	    addr = gen_rtx_CONST (Pmode,
4771 				  gen_rtx_PLUS (Pmode,
4772 						XEXP (XEXP (addr, 0), 0),
4773 						GEN_INT (v & -16)));
4774 	  else
4775 	    addr = XEXP (XEXP (addr, 0), 0);
4776 	  p1_lo = GEN_INT (v & 15);
4777 	}
4778       else if (GET_CODE (addr) == CONST_INT)
4779 	{
4780 	  p1_lo = GEN_INT (INTVAL (addr) & 15);
4781 	  addr = GEN_INT (INTVAL (addr) & -16);
4782 	}
4783       else
4784 	{
4785 	  p1_lo = gen_reg_rtx (SImode);
4786 	  emit_move_insn (p1_lo, addr);
4787 	}
4788     }
4789 
4790   gcc_assert (aform == 0 || aform == 1);
4791   reg = gen_reg_rtx (TImode);
4792 
4793   scalar = store_with_one_insn_p (ops[0]);
4794   if (!scalar)
4795     {
4796       /* We could copy the flags from the ops[0] MEM to mem here,
4797          We don't because we want this load to be optimized away if
4798          possible, and copying the flags will prevent that in certain
4799          cases, e.g. consider the volatile flag. */
4800 
4801       rtx pat = gen_reg_rtx (TImode);
4802       rtx lmem = change_address (ops[0], TImode, copy_rtx (addr));
4803       set_mem_alias_set (lmem, 0);
4804       emit_insn (gen_movti (reg, lmem));
4805 
4806       if (!p0 || reg_aligned_for_addr (p0))
4807 	p0 = stack_pointer_rtx;
4808       if (!p1_lo)
4809 	p1_lo = const0_rtx;
4810 
4811       emit_insn (gen_cpat (pat, p0, p1_lo, GEN_INT (GET_MODE_SIZE (mode))));
4812       emit_insn (gen_shufb (reg, ops[1], reg, pat));
4813     }
4814   else
4815     {
4816       if (GET_CODE (ops[1]) == REG)
4817 	emit_insn (gen_spu_convert (reg, ops[1]));
4818       else if (GET_CODE (ops[1]) == SUBREG)
4819 	emit_insn (gen_spu_convert (reg, SUBREG_REG (ops[1])));
4820       else
4821 	abort ();
4822     }
4823 
4824   if (GET_MODE_SIZE (mode) < 4 && scalar)
4825     emit_insn (gen_ashlti3
4826 	       (reg, reg, GEN_INT (32 - GET_MODE_BITSIZE (mode))));
4827 
4828   smem = change_address (ops[0], TImode, copy_rtx (addr));
4829   /* We can't use the previous alias set because the memory has changed
4830      size and can potentially overlap objects of other types.  */
4831   set_mem_alias_set (smem, 0);
4832 
4833   emit_insn (gen_movti (smem, reg));
4834   return 1;
4835 }
4836 
4837 /* Return TRUE if X is MEM which is a struct member reference
4838    and the member can safely be loaded and stored with a single
4839    instruction because it is padded. */
4840 static int
mem_is_padded_component_ref(rtx x)4841 mem_is_padded_component_ref (rtx x)
4842 {
4843   tree t = MEM_EXPR (x);
4844   tree r;
4845   if (!t || TREE_CODE (t) != COMPONENT_REF)
4846     return 0;
4847   t = TREE_OPERAND (t, 1);
4848   if (!t || TREE_CODE (t) != FIELD_DECL
4849       || DECL_ALIGN (t) < 128 || AGGREGATE_TYPE_P (TREE_TYPE (t)))
4850     return 0;
4851   /* Only do this for RECORD_TYPEs, not UNION_TYPEs. */
4852   r = DECL_FIELD_CONTEXT (t);
4853   if (!r || TREE_CODE (r) != RECORD_TYPE)
4854     return 0;
4855   /* Make sure they are the same mode */
4856   if (GET_MODE (x) != TYPE_MODE (TREE_TYPE (t)))
4857     return 0;
4858   /* If there are no following fields then the field alignment assures
4859      the structure is padded to the alignment which means this field is
4860      padded too.  */
4861   if (TREE_CHAIN (t) == 0)
4862     return 1;
4863   /* If the following field is also aligned then this field will be
4864      padded. */
4865   t = TREE_CHAIN (t);
4866   if (TREE_CODE (t) == FIELD_DECL && DECL_ALIGN (t) >= 128)
4867     return 1;
4868   return 0;
4869 }
4870 
4871 /* Parse the -mfixed-range= option string.  */
4872 static void
fix_range(const char * const_str)4873 fix_range (const char *const_str)
4874 {
4875   int i, first, last;
4876   char *str, *dash, *comma;
4877 
4878   /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
4879      REG2 are either register names or register numbers.  The effect
4880      of this option is to mark the registers in the range from REG1 to
4881      REG2 as ``fixed'' so they won't be used by the compiler.  */
4882 
4883   i = strlen (const_str);
4884   str = (char *) alloca (i + 1);
4885   memcpy (str, const_str, i + 1);
4886 
4887   while (1)
4888     {
4889       dash = strchr (str, '-');
4890       if (!dash)
4891 	{
4892 	  warning (0, "value of -mfixed-range must have form REG1-REG2");
4893 	  return;
4894 	}
4895       *dash = '\0';
4896       comma = strchr (dash + 1, ',');
4897       if (comma)
4898 	*comma = '\0';
4899 
4900       first = decode_reg_name (str);
4901       if (first < 0)
4902 	{
4903 	  warning (0, "unknown register name: %s", str);
4904 	  return;
4905 	}
4906 
4907       last = decode_reg_name (dash + 1);
4908       if (last < 0)
4909 	{
4910 	  warning (0, "unknown register name: %s", dash + 1);
4911 	  return;
4912 	}
4913 
4914       *dash = '-';
4915 
4916       if (first > last)
4917 	{
4918 	  warning (0, "%s-%s is an empty range", str, dash + 1);
4919 	  return;
4920 	}
4921 
4922       for (i = first; i <= last; ++i)
4923 	fixed_regs[i] = call_used_regs[i] = 1;
4924 
4925       if (!comma)
4926 	break;
4927 
4928       *comma = ',';
4929       str = comma + 1;
4930     }
4931 }
4932 
4933 /* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
4934    can be generated using the fsmbi instruction. */
4935 int
fsmbi_const_p(rtx x)4936 fsmbi_const_p (rtx x)
4937 {
4938   if (CONSTANT_P (x))
4939     {
4940       /* We can always choose TImode for CONST_INT because the high bits
4941          of an SImode will always be all 1s, i.e., valid for fsmbi. */
4942       enum immediate_class c = classify_immediate (x, TImode);
4943       return c == IC_FSMBI || (!epilogue_completed && c == IC_FSMBI2);
4944     }
4945   return 0;
4946 }
4947 
4948 /* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
4949    can be generated using the cbd, chd, cwd or cdd instruction. */
4950 int
cpat_const_p(rtx x,machine_mode mode)4951 cpat_const_p (rtx x, machine_mode mode)
4952 {
4953   if (CONSTANT_P (x))
4954     {
4955       enum immediate_class c = classify_immediate (x, mode);
4956       return c == IC_CPAT;
4957     }
4958   return 0;
4959 }
4960 
4961 rtx
gen_cpat_const(rtx * ops)4962 gen_cpat_const (rtx * ops)
4963 {
4964   unsigned char dst[16];
4965   int i, offset, shift, isize;
4966   if (GET_CODE (ops[3]) != CONST_INT
4967       || GET_CODE (ops[2]) != CONST_INT
4968       || (GET_CODE (ops[1]) != CONST_INT
4969 	  && GET_CODE (ops[1]) != REG))
4970     return 0;
4971   if (GET_CODE (ops[1]) == REG
4972       && (!REG_POINTER (ops[1])
4973 	  || REGNO_POINTER_ALIGN (ORIGINAL_REGNO (ops[1])) < 128))
4974     return 0;
4975 
4976   for (i = 0; i < 16; i++)
4977     dst[i] = i + 16;
4978   isize = INTVAL (ops[3]);
4979   if (isize == 1)
4980     shift = 3;
4981   else if (isize == 2)
4982     shift = 2;
4983   else
4984     shift = 0;
4985   offset = (INTVAL (ops[2]) +
4986 	    (GET_CODE (ops[1]) ==
4987 	     CONST_INT ? INTVAL (ops[1]) : 0)) & 15;
4988   for (i = 0; i < isize; i++)
4989     dst[offset + i] = i + shift;
4990   return array_to_constant (TImode, dst);
4991 }
4992 
4993 /* Convert a CONST_INT, CONST_DOUBLE, or CONST_VECTOR into a 16 byte
4994    array.  Use MODE for CONST_INT's.  When the constant's mode is smaller
4995    than 16 bytes, the value is repeated across the rest of the array. */
4996 void
constant_to_array(machine_mode mode,rtx x,unsigned char arr[16])4997 constant_to_array (machine_mode mode, rtx x, unsigned char arr[16])
4998 {
4999   HOST_WIDE_INT val;
5000   int i, j, first;
5001 
5002   memset (arr, 0, 16);
5003   mode = GET_MODE (x) != VOIDmode ? GET_MODE (x) : mode;
5004   if (GET_CODE (x) == CONST_INT
5005       || (GET_CODE (x) == CONST_DOUBLE
5006 	  && (mode == SFmode || mode == DFmode)))
5007     {
5008       gcc_assert (mode != VOIDmode && mode != BLKmode);
5009 
5010       if (GET_CODE (x) == CONST_DOUBLE)
5011 	val = const_double_to_hwint (x);
5012       else
5013 	val = INTVAL (x);
5014       first = GET_MODE_SIZE (mode) - 1;
5015       for (i = first; i >= 0; i--)
5016 	{
5017 	  arr[i] = val & 0xff;
5018 	  val >>= 8;
5019 	}
5020       /* Splat the constant across the whole array. */
5021       for (j = 0, i = first + 1; i < 16; i++)
5022 	{
5023 	  arr[i] = arr[j];
5024 	  j = (j == first) ? 0 : j + 1;
5025 	}
5026     }
5027   else if (GET_CODE (x) == CONST_DOUBLE)
5028     {
5029       val = CONST_DOUBLE_LOW (x);
5030       for (i = 15; i >= 8; i--)
5031 	{
5032 	  arr[i] = val & 0xff;
5033 	  val >>= 8;
5034 	}
5035       val = CONST_DOUBLE_HIGH (x);
5036       for (i = 7; i >= 0; i--)
5037 	{
5038 	  arr[i] = val & 0xff;
5039 	  val >>= 8;
5040 	}
5041     }
5042   else if (GET_CODE (x) == CONST_VECTOR)
5043     {
5044       int units;
5045       rtx elt;
5046       mode = GET_MODE_INNER (mode);
5047       units = CONST_VECTOR_NUNITS (x);
5048       for (i = 0; i < units; i++)
5049 	{
5050 	  elt = CONST_VECTOR_ELT (x, i);
5051 	  if (GET_CODE (elt) == CONST_INT || GET_CODE (elt) == CONST_DOUBLE)
5052 	    {
5053 	      if (GET_CODE (elt) == CONST_DOUBLE)
5054 		val = const_double_to_hwint (elt);
5055 	      else
5056 		val = INTVAL (elt);
5057 	      first = GET_MODE_SIZE (mode) - 1;
5058 	      if (first + i * GET_MODE_SIZE (mode) > 16)
5059 		abort ();
5060 	      for (j = first; j >= 0; j--)
5061 		{
5062 		  arr[j + i * GET_MODE_SIZE (mode)] = val & 0xff;
5063 		  val >>= 8;
5064 		}
5065 	    }
5066 	}
5067     }
5068   else
5069     gcc_unreachable();
5070 }
5071 
5072 /* Convert a 16 byte array to a constant of mode MODE.  When MODE is
5073    smaller than 16 bytes, use the bytes that would represent that value
5074    in a register, e.g., for QImode return the value of arr[3].  */
5075 rtx
array_to_constant(machine_mode mode,const unsigned char arr[16])5076 array_to_constant (machine_mode mode, const unsigned char arr[16])
5077 {
5078   machine_mode inner_mode;
5079   rtvec v;
5080   int units, size, i, j, k;
5081   HOST_WIDE_INT val;
5082 
5083   if (GET_MODE_CLASS (mode) == MODE_INT
5084       && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
5085     {
5086       j = GET_MODE_SIZE (mode);
5087       i = j < 4 ? 4 - j : 0;
5088       for (val = 0; i < j; i++)
5089 	val = (val << 8) | arr[i];
5090       val = trunc_int_for_mode (val, mode);
5091       return GEN_INT (val);
5092     }
5093 
5094   if (mode == TImode)
5095     {
5096       HOST_WIDE_INT high;
5097       for (i = high = 0; i < 8; i++)
5098 	high = (high << 8) | arr[i];
5099       for (i = 8, val = 0; i < 16; i++)
5100 	val = (val << 8) | arr[i];
5101       return immed_double_const (val, high, TImode);
5102     }
5103   if (mode == SFmode)
5104     {
5105       val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
5106       val = trunc_int_for_mode (val, SImode);
5107       return hwint_to_const_double (SFmode, val);
5108     }
5109   if (mode == DFmode)
5110     {
5111       for (i = 0, val = 0; i < 8; i++)
5112 	val = (val << 8) | arr[i];
5113       return hwint_to_const_double (DFmode, val);
5114     }
5115 
5116   if (!VECTOR_MODE_P (mode))
5117     abort ();
5118 
5119   units = GET_MODE_NUNITS (mode);
5120   size = GET_MODE_UNIT_SIZE (mode);
5121   inner_mode = GET_MODE_INNER (mode);
5122   v = rtvec_alloc (units);
5123 
5124   for (k = i = 0; i < units; ++i)
5125     {
5126       val = 0;
5127       for (j = 0; j < size; j++, k++)
5128 	val = (val << 8) | arr[k];
5129 
5130       if (GET_MODE_CLASS (inner_mode) == MODE_FLOAT)
5131 	RTVEC_ELT (v, i) = hwint_to_const_double (inner_mode, val);
5132       else
5133 	RTVEC_ELT (v, i) = GEN_INT (trunc_int_for_mode (val, inner_mode));
5134     }
5135   if (k > 16)
5136     abort ();
5137 
5138   return gen_rtx_CONST_VECTOR (mode, v);
5139 }
5140 
5141 static void
reloc_diagnostic(rtx x)5142 reloc_diagnostic (rtx x)
5143 {
5144   tree decl = 0;
5145   if (!flag_pic || !(TARGET_WARN_RELOC || TARGET_ERROR_RELOC))
5146     return;
5147 
5148   if (GET_CODE (x) == SYMBOL_REF)
5149     decl = SYMBOL_REF_DECL (x);
5150   else if (GET_CODE (x) == CONST
5151 	   && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
5152     decl = SYMBOL_REF_DECL (XEXP (XEXP (x, 0), 0));
5153 
5154   /* SYMBOL_REF_DECL is not necessarily a DECL. */
5155   if (decl && !DECL_P (decl))
5156     decl = 0;
5157 
5158   /* The decl could be a string constant.  */
5159   if (decl && DECL_P (decl))
5160     {
5161       location_t loc;
5162       /* We use last_assemble_variable_decl to get line information.  It's
5163 	 not always going to be right and might not even be close, but will
5164 	 be right for the more common cases. */
5165       if (!last_assemble_variable_decl || in_section == ctors_section)
5166 	loc = DECL_SOURCE_LOCATION (decl);
5167       else
5168 	loc = DECL_SOURCE_LOCATION (last_assemble_variable_decl);
5169 
5170       if (TARGET_WARN_RELOC)
5171 	warning_at (loc, 0,
5172 		    "creating run-time relocation for %qD", decl);
5173       else
5174 	error_at (loc,
5175 		  "creating run-time relocation for %qD", decl);
5176     }
5177   else
5178     {
5179       if (TARGET_WARN_RELOC)
5180 	warning_at (input_location, 0, "creating run-time relocation");
5181       else
5182 	error_at (input_location, "creating run-time relocation");
5183     }
5184 }
5185 
5186 /* Hook into assemble_integer so we can generate an error for run-time
5187    relocations.  The SPU ABI disallows them. */
5188 static bool
spu_assemble_integer(rtx x,unsigned int size,int aligned_p)5189 spu_assemble_integer (rtx x, unsigned int size, int aligned_p)
5190 {
5191   /* By default run-time relocations aren't supported, but we allow them
5192      in case users support it in their own run-time loader.  And we provide
5193      a warning for those users that don't.  */
5194   if ((GET_CODE (x) == SYMBOL_REF)
5195       || GET_CODE (x) == LABEL_REF || GET_CODE (x) == CONST)
5196     reloc_diagnostic (x);
5197 
5198   return default_assemble_integer (x, size, aligned_p);
5199 }
5200 
5201 static void
spu_asm_globalize_label(FILE * file,const char * name)5202 spu_asm_globalize_label (FILE * file, const char *name)
5203 {
5204   fputs ("\t.global\t", file);
5205   assemble_name (file, name);
5206   fputs ("\n", file);
5207 }
5208 
5209 static bool
spu_rtx_costs(rtx x,machine_mode mode,int outer_code ATTRIBUTE_UNUSED,int opno ATTRIBUTE_UNUSED,int * total,bool speed ATTRIBUTE_UNUSED)5210 spu_rtx_costs (rtx x, machine_mode mode, int outer_code ATTRIBUTE_UNUSED,
5211 	       int opno ATTRIBUTE_UNUSED, int *total,
5212 	       bool speed ATTRIBUTE_UNUSED)
5213 {
5214   int code = GET_CODE (x);
5215   int cost = COSTS_N_INSNS (2);
5216 
5217   /* Folding to a CONST_VECTOR will use extra space but there might
5218      be only a small savings in cycles.  We'd like to use a CONST_VECTOR
5219      only if it allows us to fold away multiple insns.  Changing the cost
5220      of a CONST_VECTOR here (or in CONST_COSTS) doesn't help though
5221      because this cost will only be compared against a single insn.
5222      if (code == CONST_VECTOR)
5223        return spu_legitimate_constant_p (mode, x) ? cost : COSTS_N_INSNS (6);
5224    */
5225 
5226   /* Use defaults for float operations.  Not accurate but good enough. */
5227   if (mode == DFmode)
5228     {
5229       *total = COSTS_N_INSNS (13);
5230       return true;
5231     }
5232   if (mode == SFmode)
5233     {
5234       *total = COSTS_N_INSNS (6);
5235       return true;
5236     }
5237   switch (code)
5238     {
5239     case CONST_INT:
5240       if (satisfies_constraint_K (x))
5241 	*total = 0;
5242       else if (INTVAL (x) >= -0x80000000ll && INTVAL (x) <= 0xffffffffll)
5243 	*total = COSTS_N_INSNS (1);
5244       else
5245 	*total = COSTS_N_INSNS (3);
5246       return true;
5247 
5248     case CONST:
5249       *total = COSTS_N_INSNS (3);
5250       return true;
5251 
5252     case LABEL_REF:
5253     case SYMBOL_REF:
5254       *total = COSTS_N_INSNS (0);
5255       return true;
5256 
5257     case CONST_DOUBLE:
5258       *total = COSTS_N_INSNS (5);
5259       return true;
5260 
5261     case FLOAT_EXTEND:
5262     case FLOAT_TRUNCATE:
5263     case FLOAT:
5264     case UNSIGNED_FLOAT:
5265     case FIX:
5266     case UNSIGNED_FIX:
5267       *total = COSTS_N_INSNS (7);
5268       return true;
5269 
5270     case PLUS:
5271       if (mode == TImode)
5272 	{
5273 	  *total = COSTS_N_INSNS (9);
5274 	  return true;
5275 	}
5276       break;
5277 
5278     case MULT:
5279       cost =
5280 	GET_CODE (XEXP (x, 0)) ==
5281 	REG ? COSTS_N_INSNS (12) : COSTS_N_INSNS (7);
5282       if (mode == SImode && GET_CODE (XEXP (x, 0)) == REG)
5283 	{
5284 	  if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5285 	    {
5286 	      HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
5287 	      cost = COSTS_N_INSNS (14);
5288 	      if ((val & 0xffff) == 0)
5289 		cost = COSTS_N_INSNS (9);
5290 	      else if (val > 0 && val < 0x10000)
5291 		cost = COSTS_N_INSNS (11);
5292 	    }
5293 	}
5294       *total = cost;
5295       return true;
5296     case DIV:
5297     case UDIV:
5298     case MOD:
5299     case UMOD:
5300       *total = COSTS_N_INSNS (20);
5301       return true;
5302     case ROTATE:
5303     case ROTATERT:
5304     case ASHIFT:
5305     case ASHIFTRT:
5306     case LSHIFTRT:
5307       *total = COSTS_N_INSNS (4);
5308       return true;
5309     case UNSPEC:
5310       if (XINT (x, 1) == UNSPEC_CONVERT)
5311 	*total = COSTS_N_INSNS (0);
5312       else
5313 	*total = COSTS_N_INSNS (4);
5314       return true;
5315     }
5316   /* Scale cost by mode size.  Except when initializing (cfun->decl == 0). */
5317   if (GET_MODE_CLASS (mode) == MODE_INT
5318       && GET_MODE_SIZE (mode) > GET_MODE_SIZE (SImode) && cfun && cfun->decl)
5319     cost = cost * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode))
5320       * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode));
5321   *total = cost;
5322   return true;
5323 }
5324 
5325 static machine_mode
spu_unwind_word_mode(void)5326 spu_unwind_word_mode (void)
5327 {
5328   return SImode;
5329 }
5330 
5331 /* Decide whether we can make a sibling call to a function.  DECL is the
5332    declaration of the function being targeted by the call and EXP is the
5333    CALL_EXPR representing the call.  */
5334 static bool
spu_function_ok_for_sibcall(tree decl,tree exp ATTRIBUTE_UNUSED)5335 spu_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
5336 {
5337   return decl && !TARGET_LARGE_MEM;
5338 }
5339 
5340 /* We need to correctly update the back chain pointer and the Available
5341    Stack Size (which is in the second slot of the sp register.) */
5342 void
spu_allocate_stack(rtx op0,rtx op1)5343 spu_allocate_stack (rtx op0, rtx op1)
5344 {
5345   HOST_WIDE_INT v;
5346   rtx chain = gen_reg_rtx (V4SImode);
5347   rtx stack_bot = gen_frame_mem (V4SImode, stack_pointer_rtx);
5348   rtx sp = gen_reg_rtx (V4SImode);
5349   rtx splatted = gen_reg_rtx (V4SImode);
5350   rtx pat = gen_reg_rtx (TImode);
5351 
5352   /* copy the back chain so we can save it back again. */
5353   emit_move_insn (chain, stack_bot);
5354 
5355   op1 = force_reg (SImode, op1);
5356 
5357   v = 0x1020300010203ll;
5358   emit_move_insn (pat, immed_double_const (v, v, TImode));
5359   emit_insn (gen_shufb (splatted, op1, op1, pat));
5360 
5361   emit_insn (gen_spu_convert (sp, stack_pointer_rtx));
5362   emit_insn (gen_subv4si3 (sp, sp, splatted));
5363 
5364   if (flag_stack_check)
5365     {
5366       rtx avail = gen_reg_rtx(SImode);
5367       rtx result = gen_reg_rtx(SImode);
5368       emit_insn (gen_vec_extractv4si (avail, sp, GEN_INT (1)));
5369       emit_insn (gen_cgt_si(result, avail, GEN_INT (-1)));
5370       emit_insn (gen_spu_heq (result, GEN_INT(0) ));
5371     }
5372 
5373   emit_insn (gen_spu_convert (stack_pointer_rtx, sp));
5374 
5375   emit_move_insn (stack_bot, chain);
5376 
5377   emit_move_insn (op0, virtual_stack_dynamic_rtx);
5378 }
5379 
5380 void
spu_restore_stack_nonlocal(rtx op0 ATTRIBUTE_UNUSED,rtx op1)5381 spu_restore_stack_nonlocal (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
5382 {
5383   static unsigned char arr[16] =
5384     { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
5385   rtx temp = gen_reg_rtx (SImode);
5386   rtx temp2 = gen_reg_rtx (SImode);
5387   rtx temp3 = gen_reg_rtx (V4SImode);
5388   rtx temp4 = gen_reg_rtx (V4SImode);
5389   rtx pat = gen_reg_rtx (TImode);
5390   rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
5391 
5392   /* Restore the backchain from the first word, sp from the second.  */
5393   emit_move_insn (temp2, adjust_address_nv (op1, SImode, 0));
5394   emit_move_insn (temp, adjust_address_nv (op1, SImode, 4));
5395 
5396   emit_move_insn (pat, array_to_constant (TImode, arr));
5397 
5398   /* Compute Available Stack Size for sp */
5399   emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
5400   emit_insn (gen_shufb (temp3, temp, temp, pat));
5401 
5402   /* Compute Available Stack Size for back chain */
5403   emit_insn (gen_subsi3 (temp2, temp2, stack_pointer_rtx));
5404   emit_insn (gen_shufb (temp4, temp2, temp2, pat));
5405   emit_insn (gen_addv4si3 (temp4, sp, temp4));
5406 
5407   emit_insn (gen_addv4si3 (sp, sp, temp3));
5408   emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp4);
5409 }
5410 
5411 static void
spu_init_libfuncs(void)5412 spu_init_libfuncs (void)
5413 {
5414   set_optab_libfunc (smul_optab, DImode, "__muldi3");
5415   set_optab_libfunc (sdiv_optab, DImode, "__divdi3");
5416   set_optab_libfunc (smod_optab, DImode, "__moddi3");
5417   set_optab_libfunc (udiv_optab, DImode, "__udivdi3");
5418   set_optab_libfunc (umod_optab, DImode, "__umoddi3");
5419   set_optab_libfunc (udivmod_optab, DImode, "__udivmoddi4");
5420   set_optab_libfunc (ffs_optab, DImode, "__ffsdi2");
5421   set_optab_libfunc (clz_optab, DImode, "__clzdi2");
5422   set_optab_libfunc (ctz_optab, DImode, "__ctzdi2");
5423   set_optab_libfunc (clrsb_optab, DImode, "__clrsbdi2");
5424   set_optab_libfunc (popcount_optab, DImode, "__popcountdi2");
5425   set_optab_libfunc (parity_optab, DImode, "__paritydi2");
5426 
5427   set_conv_libfunc (ufloat_optab, DFmode, SImode, "__float_unssidf");
5428   set_conv_libfunc (ufloat_optab, DFmode, DImode, "__float_unsdidf");
5429 
5430   set_optab_libfunc (addv_optab, SImode, "__addvsi3");
5431   set_optab_libfunc (subv_optab, SImode, "__subvsi3");
5432   set_optab_libfunc (smulv_optab, SImode, "__mulvsi3");
5433   set_optab_libfunc (sdivv_optab, SImode, "__divvsi3");
5434   set_optab_libfunc (negv_optab, SImode, "__negvsi2");
5435   set_optab_libfunc (absv_optab, SImode, "__absvsi2");
5436   set_optab_libfunc (addv_optab, DImode, "__addvdi3");
5437   set_optab_libfunc (subv_optab, DImode, "__subvdi3");
5438   set_optab_libfunc (smulv_optab, DImode, "__mulvdi3");
5439   set_optab_libfunc (sdivv_optab, DImode, "__divvdi3");
5440   set_optab_libfunc (negv_optab, DImode, "__negvdi2");
5441   set_optab_libfunc (absv_optab, DImode, "__absvdi2");
5442 
5443   set_optab_libfunc (smul_optab, TImode, "__multi3");
5444   set_optab_libfunc (sdiv_optab, TImode, "__divti3");
5445   set_optab_libfunc (smod_optab, TImode, "__modti3");
5446   set_optab_libfunc (udiv_optab, TImode, "__udivti3");
5447   set_optab_libfunc (umod_optab, TImode, "__umodti3");
5448   set_optab_libfunc (udivmod_optab, TImode, "__udivmodti4");
5449 }
5450 
5451 /* Make a subreg, stripping any existing subreg.  We could possibly just
5452    call simplify_subreg, but in this case we know what we want. */
5453 rtx
spu_gen_subreg(machine_mode mode,rtx x)5454 spu_gen_subreg (machine_mode mode, rtx x)
5455 {
5456   if (GET_CODE (x) == SUBREG)
5457     x = SUBREG_REG (x);
5458   if (GET_MODE (x) == mode)
5459     return x;
5460   return gen_rtx_SUBREG (mode, x, 0);
5461 }
5462 
5463 static bool
spu_return_in_memory(const_tree type,const_tree fntype ATTRIBUTE_UNUSED)5464 spu_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
5465 {
5466   return (TYPE_MODE (type) == BLKmode
5467 	  && ((type) == 0
5468 	      || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
5469 	      || int_size_in_bytes (type) >
5470 	      (MAX_REGISTER_RETURN * UNITS_PER_WORD)));
5471 }
5472 
5473 /* Create the built-in types and functions */
5474 
5475 enum spu_function_code
5476 {
5477 #define DEF_BUILTIN(fcode, icode, name, type, params) fcode,
5478 #include "spu-builtins.def"
5479 #undef DEF_BUILTIN
5480    NUM_SPU_BUILTINS
5481 };
5482 
5483 extern GTY(()) struct spu_builtin_description spu_builtins[NUM_SPU_BUILTINS];
5484 
5485 struct spu_builtin_description spu_builtins[] = {
5486 #define DEF_BUILTIN(fcode, icode, name, type, params) \
5487   {fcode, icode, name, type, params},
5488 #include "spu-builtins.def"
5489 #undef DEF_BUILTIN
5490 };
5491 
5492 static GTY(()) tree spu_builtin_decls[NUM_SPU_BUILTINS];
5493 
5494 /* Returns the spu builtin decl for CODE.  */
5495 
5496 static tree
spu_builtin_decl(unsigned code,bool initialize_p ATTRIBUTE_UNUSED)5497 spu_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
5498 {
5499   if (code >= NUM_SPU_BUILTINS)
5500     return error_mark_node;
5501 
5502   return spu_builtin_decls[code];
5503 }
5504 
5505 
5506 static void
spu_init_builtins(void)5507 spu_init_builtins (void)
5508 {
5509   struct spu_builtin_description *d;
5510   unsigned int i;
5511 
5512   V16QI_type_node = build_vector_type (intQI_type_node, 16);
5513   V8HI_type_node = build_vector_type (intHI_type_node, 8);
5514   V4SI_type_node = build_vector_type (intSI_type_node, 4);
5515   V2DI_type_node = build_vector_type (intDI_type_node, 2);
5516   V4SF_type_node = build_vector_type (float_type_node, 4);
5517   V2DF_type_node = build_vector_type (double_type_node, 2);
5518 
5519   unsigned_V16QI_type_node = build_vector_type (unsigned_intQI_type_node, 16);
5520   unsigned_V8HI_type_node = build_vector_type (unsigned_intHI_type_node, 8);
5521   unsigned_V4SI_type_node = build_vector_type (unsigned_intSI_type_node, 4);
5522   unsigned_V2DI_type_node = build_vector_type (unsigned_intDI_type_node, 2);
5523 
5524   spu_builtin_types[SPU_BTI_QUADWORD] = V16QI_type_node;
5525 
5526   spu_builtin_types[SPU_BTI_7] = global_trees[TI_INTSI_TYPE];
5527   spu_builtin_types[SPU_BTI_S7] = global_trees[TI_INTSI_TYPE];
5528   spu_builtin_types[SPU_BTI_U7] = global_trees[TI_INTSI_TYPE];
5529   spu_builtin_types[SPU_BTI_S10] = global_trees[TI_INTSI_TYPE];
5530   spu_builtin_types[SPU_BTI_S10_4] = global_trees[TI_INTSI_TYPE];
5531   spu_builtin_types[SPU_BTI_U14] = global_trees[TI_INTSI_TYPE];
5532   spu_builtin_types[SPU_BTI_16] = global_trees[TI_INTSI_TYPE];
5533   spu_builtin_types[SPU_BTI_S16] = global_trees[TI_INTSI_TYPE];
5534   spu_builtin_types[SPU_BTI_S16_2] = global_trees[TI_INTSI_TYPE];
5535   spu_builtin_types[SPU_BTI_U16] = global_trees[TI_INTSI_TYPE];
5536   spu_builtin_types[SPU_BTI_U16_2] = global_trees[TI_INTSI_TYPE];
5537   spu_builtin_types[SPU_BTI_U18] = global_trees[TI_INTSI_TYPE];
5538 
5539   spu_builtin_types[SPU_BTI_INTQI] = global_trees[TI_INTQI_TYPE];
5540   spu_builtin_types[SPU_BTI_INTHI] = global_trees[TI_INTHI_TYPE];
5541   spu_builtin_types[SPU_BTI_INTSI] = global_trees[TI_INTSI_TYPE];
5542   spu_builtin_types[SPU_BTI_INTDI] = global_trees[TI_INTDI_TYPE];
5543   spu_builtin_types[SPU_BTI_UINTQI] = global_trees[TI_UINTQI_TYPE];
5544   spu_builtin_types[SPU_BTI_UINTHI] = global_trees[TI_UINTHI_TYPE];
5545   spu_builtin_types[SPU_BTI_UINTSI] = global_trees[TI_UINTSI_TYPE];
5546   spu_builtin_types[SPU_BTI_UINTDI] = global_trees[TI_UINTDI_TYPE];
5547 
5548   spu_builtin_types[SPU_BTI_FLOAT] = global_trees[TI_FLOAT_TYPE];
5549   spu_builtin_types[SPU_BTI_DOUBLE] = global_trees[TI_DOUBLE_TYPE];
5550 
5551   spu_builtin_types[SPU_BTI_VOID] = global_trees[TI_VOID_TYPE];
5552 
5553   spu_builtin_types[SPU_BTI_PTR] =
5554     build_pointer_type (build_qualified_type
5555 			(void_type_node,
5556 			 TYPE_QUAL_CONST | TYPE_QUAL_VOLATILE));
5557 
5558   /* For each builtin we build a new prototype.  The tree code will make
5559      sure nodes are shared. */
5560   for (i = 0, d = spu_builtins; i < NUM_SPU_BUILTINS; i++, d++)
5561     {
5562       tree p;
5563       char name[64];		/* build_function will make a copy. */
5564       int parm;
5565 
5566       if (d->name == 0)
5567 	continue;
5568 
5569       /* Find last parm.  */
5570       for (parm = 1; d->parm[parm] != SPU_BTI_END_OF_PARAMS; parm++)
5571 	;
5572 
5573       p = void_list_node;
5574       while (parm > 1)
5575 	p = tree_cons (NULL_TREE, spu_builtin_types[d->parm[--parm]], p);
5576 
5577       p = build_function_type (spu_builtin_types[d->parm[0]], p);
5578 
5579       sprintf (name, "__builtin_%s", d->name);
5580       spu_builtin_decls[i] =
5581 	add_builtin_function (name, p, i, BUILT_IN_MD, NULL, NULL_TREE);
5582       if (d->fcode == SPU_MASK_FOR_LOAD)
5583 	TREE_READONLY (spu_builtin_decls[i]) = 1;
5584 
5585       /* These builtins don't throw.  */
5586       TREE_NOTHROW (spu_builtin_decls[i]) = 1;
5587     }
5588 }
5589 
5590 void
spu_restore_stack_block(rtx op0 ATTRIBUTE_UNUSED,rtx op1)5591 spu_restore_stack_block (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
5592 {
5593   static unsigned char arr[16] =
5594     { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
5595 
5596   rtx temp = gen_reg_rtx (Pmode);
5597   rtx temp2 = gen_reg_rtx (V4SImode);
5598   rtx temp3 = gen_reg_rtx (V4SImode);
5599   rtx pat = gen_reg_rtx (TImode);
5600   rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
5601 
5602   emit_move_insn (pat, array_to_constant (TImode, arr));
5603 
5604   /* Restore the sp.  */
5605   emit_move_insn (temp, op1);
5606   emit_move_insn (temp2, gen_frame_mem (V4SImode, stack_pointer_rtx));
5607 
5608   /* Compute available stack size for sp.  */
5609   emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
5610   emit_insn (gen_shufb (temp3, temp, temp, pat));
5611 
5612   emit_insn (gen_addv4si3 (sp, sp, temp3));
5613   emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp2);
5614 }
5615 
5616 int
spu_safe_dma(HOST_WIDE_INT channel)5617 spu_safe_dma (HOST_WIDE_INT channel)
5618 {
5619   return TARGET_SAFE_DMA && channel >= 21 && channel <= 27;
5620 }
5621 
5622 void
spu_builtin_splats(rtx ops[])5623 spu_builtin_splats (rtx ops[])
5624 {
5625   machine_mode mode = GET_MODE (ops[0]);
5626   if (GET_CODE (ops[1]) == CONST_INT || GET_CODE (ops[1]) == CONST_DOUBLE)
5627     {
5628       unsigned char arr[16];
5629       constant_to_array (GET_MODE_INNER (mode), ops[1], arr);
5630       emit_move_insn (ops[0], array_to_constant (mode, arr));
5631     }
5632   else
5633     {
5634       rtx reg = gen_reg_rtx (TImode);
5635       rtx shuf;
5636       if (GET_CODE (ops[1]) != REG
5637 	  && GET_CODE (ops[1]) != SUBREG)
5638 	ops[1] = force_reg (GET_MODE_INNER (mode), ops[1]);
5639       switch (mode)
5640 	{
5641 	case V2DImode:
5642 	case V2DFmode:
5643 	  shuf =
5644 	    immed_double_const (0x0001020304050607ll, 0x1011121314151617ll,
5645 				TImode);
5646 	  break;
5647 	case V4SImode:
5648 	case V4SFmode:
5649 	  shuf =
5650 	    immed_double_const (0x0001020300010203ll, 0x0001020300010203ll,
5651 				TImode);
5652 	  break;
5653 	case V8HImode:
5654 	  shuf =
5655 	    immed_double_const (0x0203020302030203ll, 0x0203020302030203ll,
5656 				TImode);
5657 	  break;
5658 	case V16QImode:
5659 	  shuf =
5660 	    immed_double_const (0x0303030303030303ll, 0x0303030303030303ll,
5661 				TImode);
5662 	  break;
5663 	default:
5664 	  abort ();
5665 	}
5666       emit_move_insn (reg, shuf);
5667       emit_insn (gen_shufb (ops[0], ops[1], ops[1], reg));
5668     }
5669 }
5670 
5671 void
spu_builtin_extract(rtx ops[])5672 spu_builtin_extract (rtx ops[])
5673 {
5674   machine_mode mode;
5675   rtx rot, from, tmp;
5676 
5677   mode = GET_MODE (ops[1]);
5678 
5679   if (GET_CODE (ops[2]) == CONST_INT)
5680     {
5681       switch (mode)
5682 	{
5683 	case V16QImode:
5684 	  emit_insn (gen_vec_extractv16qi (ops[0], ops[1], ops[2]));
5685 	  break;
5686 	case V8HImode:
5687 	  emit_insn (gen_vec_extractv8hi (ops[0], ops[1], ops[2]));
5688 	  break;
5689 	case V4SFmode:
5690 	  emit_insn (gen_vec_extractv4sf (ops[0], ops[1], ops[2]));
5691 	  break;
5692 	case V4SImode:
5693 	  emit_insn (gen_vec_extractv4si (ops[0], ops[1], ops[2]));
5694 	  break;
5695 	case V2DImode:
5696 	  emit_insn (gen_vec_extractv2di (ops[0], ops[1], ops[2]));
5697 	  break;
5698 	case V2DFmode:
5699 	  emit_insn (gen_vec_extractv2df (ops[0], ops[1], ops[2]));
5700 	  break;
5701 	default:
5702 	  abort ();
5703 	}
5704       return;
5705     }
5706 
5707   from = spu_gen_subreg (TImode, ops[1]);
5708   rot = gen_reg_rtx (TImode);
5709   tmp = gen_reg_rtx (SImode);
5710 
5711   switch (mode)
5712     {
5713     case V16QImode:
5714       emit_insn (gen_addsi3 (tmp, ops[2], GEN_INT (-3)));
5715       break;
5716     case V8HImode:
5717       emit_insn (gen_addsi3 (tmp, ops[2], ops[2]));
5718       emit_insn (gen_addsi3 (tmp, tmp, GEN_INT (-2)));
5719       break;
5720     case V4SFmode:
5721     case V4SImode:
5722       emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (2)));
5723       break;
5724     case V2DImode:
5725     case V2DFmode:
5726       emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (3)));
5727       break;
5728     default:
5729       abort ();
5730     }
5731   emit_insn (gen_rotqby_ti (rot, from, tmp));
5732 
5733   emit_insn (gen_spu_convert (ops[0], rot));
5734 }
5735 
5736 void
spu_builtin_insert(rtx ops[])5737 spu_builtin_insert (rtx ops[])
5738 {
5739   machine_mode mode = GET_MODE (ops[0]);
5740   machine_mode imode = GET_MODE_INNER (mode);
5741   rtx mask = gen_reg_rtx (TImode);
5742   rtx offset;
5743 
5744   if (GET_CODE (ops[3]) == CONST_INT)
5745     offset = GEN_INT (INTVAL (ops[3]) * GET_MODE_SIZE (imode));
5746   else
5747     {
5748       offset = gen_reg_rtx (SImode);
5749       emit_insn (gen_mulsi3
5750 		 (offset, ops[3], GEN_INT (GET_MODE_SIZE (imode))));
5751     }
5752   emit_insn (gen_cpat
5753 	     (mask, stack_pointer_rtx, offset,
5754 	      GEN_INT (GET_MODE_SIZE (imode))));
5755   emit_insn (gen_shufb (ops[0], ops[1], ops[2], mask));
5756 }
5757 
5758 void
spu_builtin_promote(rtx ops[])5759 spu_builtin_promote (rtx ops[])
5760 {
5761   machine_mode mode, imode;
5762   rtx rot, from, offset;
5763   HOST_WIDE_INT pos;
5764 
5765   mode = GET_MODE (ops[0]);
5766   imode = GET_MODE_INNER (mode);
5767 
5768   from = gen_reg_rtx (TImode);
5769   rot = spu_gen_subreg (TImode, ops[0]);
5770 
5771   emit_insn (gen_spu_convert (from, ops[1]));
5772 
5773   if (GET_CODE (ops[2]) == CONST_INT)
5774     {
5775       pos = -GET_MODE_SIZE (imode) * INTVAL (ops[2]);
5776       if (GET_MODE_SIZE (imode) < 4)
5777 	pos += 4 - GET_MODE_SIZE (imode);
5778       offset = GEN_INT (pos & 15);
5779     }
5780   else
5781     {
5782       offset = gen_reg_rtx (SImode);
5783       switch (mode)
5784 	{
5785 	case V16QImode:
5786 	  emit_insn (gen_subsi3 (offset, GEN_INT (3), ops[2]));
5787 	  break;
5788 	case V8HImode:
5789 	  emit_insn (gen_subsi3 (offset, GEN_INT (1), ops[2]));
5790 	  emit_insn (gen_addsi3 (offset, offset, offset));
5791 	  break;
5792 	case V4SFmode:
5793 	case V4SImode:
5794 	  emit_insn (gen_subsi3 (offset, GEN_INT (0), ops[2]));
5795 	  emit_insn (gen_ashlsi3 (offset, offset, GEN_INT (2)));
5796 	  break;
5797 	case V2DImode:
5798 	case V2DFmode:
5799 	  emit_insn (gen_ashlsi3 (offset, ops[2], GEN_INT (3)));
5800 	  break;
5801 	default:
5802 	  abort ();
5803 	}
5804     }
5805   emit_insn (gen_rotqby_ti (rot, from, offset));
5806 }
5807 
5808 static void
spu_trampoline_init(rtx m_tramp,tree fndecl,rtx cxt)5809 spu_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
5810 {
5811   rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
5812   rtx shuf = gen_reg_rtx (V4SImode);
5813   rtx insn = gen_reg_rtx (V4SImode);
5814   rtx shufc;
5815   rtx insnc;
5816   rtx mem;
5817 
5818   fnaddr = force_reg (SImode, fnaddr);
5819   cxt = force_reg (SImode, cxt);
5820 
5821   if (TARGET_LARGE_MEM)
5822     {
5823       rtx rotl = gen_reg_rtx (V4SImode);
5824       rtx mask = gen_reg_rtx (V4SImode);
5825       rtx bi = gen_reg_rtx (SImode);
5826       static unsigned char const shufa[16] = {
5827 	2, 3, 0, 1, 18, 19, 16, 17,
5828 	0, 1, 2, 3, 16, 17, 18, 19
5829       };
5830       static unsigned char const insna[16] = {
5831 	0x41, 0, 0, 79,
5832 	0x41, 0, 0, STATIC_CHAIN_REGNUM,
5833 	0x60, 0x80, 0, 79,
5834 	0x60, 0x80, 0, STATIC_CHAIN_REGNUM
5835       };
5836 
5837       shufc = force_reg (TImode, array_to_constant (TImode, shufa));
5838       insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
5839 
5840       emit_insn (gen_shufb (shuf, fnaddr, cxt, shufc));
5841       emit_insn (gen_vrotlv4si3 (rotl, shuf, spu_const (V4SImode, 7)));
5842       emit_insn (gen_movv4si (mask, spu_const (V4SImode, 0xffff << 7)));
5843       emit_insn (gen_selb (insn, insnc, rotl, mask));
5844 
5845       mem = adjust_address (m_tramp, V4SImode, 0);
5846       emit_move_insn (mem, insn);
5847 
5848       emit_move_insn (bi, GEN_INT (0x35000000 + (79 << 7)));
5849       mem = adjust_address (m_tramp, Pmode, 16);
5850       emit_move_insn (mem, bi);
5851     }
5852   else
5853     {
5854       rtx scxt = gen_reg_rtx (SImode);
5855       rtx sfnaddr = gen_reg_rtx (SImode);
5856       static unsigned char const insna[16] = {
5857 	0x42, 0, 0, STATIC_CHAIN_REGNUM,
5858 	0x30, 0, 0, 0,
5859 	0, 0, 0, 0,
5860 	0, 0, 0, 0
5861       };
5862 
5863       shufc = gen_reg_rtx (TImode);
5864       insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
5865 
5866       /* By or'ing all of cxt with the ila opcode we are assuming cxt
5867 	 fits 18 bits and the last 4 are zeros.  This will be true if
5868 	 the stack pointer is initialized to 0x3fff0 at program start,
5869 	 otherwise the ila instruction will be garbage. */
5870 
5871       emit_insn (gen_ashlsi3 (scxt, cxt, GEN_INT (7)));
5872       emit_insn (gen_ashlsi3 (sfnaddr, fnaddr, GEN_INT (5)));
5873       emit_insn (gen_cpat
5874 		 (shufc, stack_pointer_rtx, GEN_INT (4), GEN_INT (4)));
5875       emit_insn (gen_shufb (shuf, sfnaddr, scxt, shufc));
5876       emit_insn (gen_iorv4si3 (insn, insnc, shuf));
5877 
5878       mem = adjust_address (m_tramp, V4SImode, 0);
5879       emit_move_insn (mem, insn);
5880     }
5881   emit_insn (gen_sync ());
5882 }
5883 
5884 static bool
spu_warn_func_return(tree decl)5885 spu_warn_func_return (tree decl)
5886 {
5887   /* Naked functions are implemented entirely in assembly, including the
5888      return sequence, so suppress warnings about this.  */
5889   return !spu_naked_function_p (decl);
5890 }
5891 
5892 void
spu_expand_sign_extend(rtx ops[])5893 spu_expand_sign_extend (rtx ops[])
5894 {
5895   unsigned char arr[16];
5896   rtx pat = gen_reg_rtx (TImode);
5897   rtx sign, c;
5898   int i, last;
5899   last = GET_MODE (ops[0]) == DImode ? 7 : 15;
5900   if (GET_MODE (ops[1]) == QImode)
5901     {
5902       sign = gen_reg_rtx (HImode);
5903       emit_insn (gen_extendqihi2 (sign, ops[1]));
5904       for (i = 0; i < 16; i++)
5905 	arr[i] = 0x12;
5906       arr[last] = 0x13;
5907     }
5908   else
5909     {
5910       for (i = 0; i < 16; i++)
5911 	arr[i] = 0x10;
5912       switch (GET_MODE (ops[1]))
5913 	{
5914 	case HImode:
5915 	  sign = gen_reg_rtx (SImode);
5916 	  emit_insn (gen_extendhisi2 (sign, ops[1]));
5917 	  arr[last] = 0x03;
5918 	  arr[last - 1] = 0x02;
5919 	  break;
5920 	case SImode:
5921 	  sign = gen_reg_rtx (SImode);
5922 	  emit_insn (gen_ashrsi3 (sign, ops[1], GEN_INT (31)));
5923 	  for (i = 0; i < 4; i++)
5924 	    arr[last - i] = 3 - i;
5925 	  break;
5926 	case DImode:
5927 	  sign = gen_reg_rtx (SImode);
5928 	  c = gen_reg_rtx (SImode);
5929 	  emit_insn (gen_spu_convert (c, ops[1]));
5930 	  emit_insn (gen_ashrsi3 (sign, c, GEN_INT (31)));
5931 	  for (i = 0; i < 8; i++)
5932 	    arr[last - i] = 7 - i;
5933 	  break;
5934 	default:
5935 	  abort ();
5936 	}
5937     }
5938   emit_move_insn (pat, array_to_constant (TImode, arr));
5939   emit_insn (gen_shufb (ops[0], ops[1], sign, pat));
5940 }
5941 
5942 /* expand vector initialization. If there are any constant parts,
5943    load constant parts first. Then load any non-constant parts.  */
5944 void
spu_expand_vector_init(rtx target,rtx vals)5945 spu_expand_vector_init (rtx target, rtx vals)
5946 {
5947   machine_mode mode = GET_MODE (target);
5948   int n_elts = GET_MODE_NUNITS (mode);
5949   int n_var = 0;
5950   bool all_same = true;
5951   rtx first, x = NULL_RTX, first_constant = NULL_RTX;
5952   int i;
5953 
5954   first = XVECEXP (vals, 0, 0);
5955   for (i = 0; i < n_elts; ++i)
5956     {
5957       x = XVECEXP (vals, 0, i);
5958       if (!(CONST_INT_P (x)
5959 	    || GET_CODE (x) == CONST_DOUBLE
5960 	    || GET_CODE (x) == CONST_FIXED))
5961 	++n_var;
5962       else
5963 	{
5964 	  if (first_constant == NULL_RTX)
5965 	    first_constant = x;
5966 	}
5967       if (i > 0 && !rtx_equal_p (x, first))
5968 	all_same = false;
5969     }
5970 
5971   /* if all elements are the same, use splats to repeat elements */
5972   if (all_same)
5973     {
5974       if (!CONSTANT_P (first)
5975 	  && !register_operand (first, GET_MODE (x)))
5976 	first = force_reg (GET_MODE (first), first);
5977       emit_insn (gen_spu_splats (target, first));
5978       return;
5979     }
5980 
5981   /* load constant parts */
5982   if (n_var != n_elts)
5983     {
5984       if (n_var == 0)
5985 	{
5986 	  emit_move_insn (target,
5987 			  gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
5988 	}
5989       else
5990 	{
5991 	  rtx constant_parts_rtx = copy_rtx (vals);
5992 
5993 	  gcc_assert (first_constant != NULL_RTX);
5994 	  /* fill empty slots with the first constant, this increases
5995 	     our chance of using splats in the recursive call below. */
5996 	  for (i = 0; i < n_elts; ++i)
5997 	    {
5998 	      x = XVECEXP (constant_parts_rtx, 0, i);
5999 	      if (!(CONST_INT_P (x)
6000 		    || GET_CODE (x) == CONST_DOUBLE
6001 		    || GET_CODE (x) == CONST_FIXED))
6002 		XVECEXP (constant_parts_rtx, 0, i) = first_constant;
6003 	    }
6004 
6005 	  spu_expand_vector_init (target, constant_parts_rtx);
6006 	}
6007     }
6008 
6009   /* load variable parts */
6010   if (n_var != 0)
6011     {
6012       rtx insert_operands[4];
6013 
6014       insert_operands[0] = target;
6015       insert_operands[2] = target;
6016       for (i = 0; i < n_elts; ++i)
6017 	{
6018 	  x = XVECEXP (vals, 0, i);
6019 	  if (!(CONST_INT_P (x)
6020 		|| GET_CODE (x) == CONST_DOUBLE
6021 		|| GET_CODE (x) == CONST_FIXED))
6022 	    {
6023 	      if (!register_operand (x, GET_MODE (x)))
6024 		x = force_reg (GET_MODE (x), x);
6025 	      insert_operands[1] = x;
6026 	      insert_operands[3] = GEN_INT (i);
6027 	      spu_builtin_insert (insert_operands);
6028 	    }
6029 	}
6030     }
6031 }
6032 
6033 /* Return insn index for the vector compare instruction for given CODE,
6034    and DEST_MODE, OP_MODE. Return -1 if valid insn is not available.  */
6035 
6036 static int
get_vec_cmp_insn(enum rtx_code code,machine_mode dest_mode,machine_mode op_mode)6037 get_vec_cmp_insn (enum rtx_code code,
6038                   machine_mode dest_mode,
6039                   machine_mode op_mode)
6040 
6041 {
6042   switch (code)
6043     {
6044     case EQ:
6045       if (dest_mode == V16QImode && op_mode == V16QImode)
6046         return CODE_FOR_ceq_v16qi;
6047       if (dest_mode == V8HImode && op_mode == V8HImode)
6048         return CODE_FOR_ceq_v8hi;
6049       if (dest_mode == V4SImode && op_mode == V4SImode)
6050         return CODE_FOR_ceq_v4si;
6051       if (dest_mode == V4SImode && op_mode == V4SFmode)
6052         return CODE_FOR_ceq_v4sf;
6053       if (dest_mode == V2DImode && op_mode == V2DFmode)
6054         return CODE_FOR_ceq_v2df;
6055       break;
6056     case GT:
6057       if (dest_mode == V16QImode && op_mode == V16QImode)
6058         return CODE_FOR_cgt_v16qi;
6059       if (dest_mode == V8HImode && op_mode == V8HImode)
6060         return CODE_FOR_cgt_v8hi;
6061       if (dest_mode == V4SImode && op_mode == V4SImode)
6062         return CODE_FOR_cgt_v4si;
6063       if (dest_mode == V4SImode && op_mode == V4SFmode)
6064         return CODE_FOR_cgt_v4sf;
6065       if (dest_mode == V2DImode && op_mode == V2DFmode)
6066         return CODE_FOR_cgt_v2df;
6067       break;
6068     case GTU:
6069       if (dest_mode == V16QImode && op_mode == V16QImode)
6070         return CODE_FOR_clgt_v16qi;
6071       if (dest_mode == V8HImode && op_mode == V8HImode)
6072         return CODE_FOR_clgt_v8hi;
6073       if (dest_mode == V4SImode && op_mode == V4SImode)
6074         return CODE_FOR_clgt_v4si;
6075       break;
6076     default:
6077       break;
6078     }
6079   return -1;
6080 }
6081 
6082 /* Emit vector compare for operands OP0 and OP1 using code RCODE.
6083    DMODE is expected destination mode. This is a recursive function.  */
6084 
6085 static rtx
spu_emit_vector_compare(enum rtx_code rcode,rtx op0,rtx op1,machine_mode dmode)6086 spu_emit_vector_compare (enum rtx_code rcode,
6087                          rtx op0, rtx op1,
6088                          machine_mode dmode)
6089 {
6090   int vec_cmp_insn;
6091   rtx mask;
6092   machine_mode dest_mode;
6093   machine_mode op_mode = GET_MODE (op1);
6094 
6095   gcc_assert (GET_MODE (op0) == GET_MODE (op1));
6096 
6097   /* Floating point vector compare instructions uses destination V4SImode.
6098      Double floating point vector compare instructions uses destination V2DImode.
6099      Move destination to appropriate mode later.  */
6100   if (dmode == V4SFmode)
6101     dest_mode = V4SImode;
6102   else if (dmode == V2DFmode)
6103     dest_mode = V2DImode;
6104   else
6105     dest_mode = dmode;
6106 
6107   mask = gen_reg_rtx (dest_mode);
6108   vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
6109 
6110   if (vec_cmp_insn == -1)
6111     {
6112       bool swap_operands = false;
6113       bool try_again = false;
6114       switch (rcode)
6115         {
6116         case LT:
6117           rcode = GT;
6118           swap_operands = true;
6119           try_again = true;
6120           break;
6121         case LTU:
6122           rcode = GTU;
6123           swap_operands = true;
6124           try_again = true;
6125           break;
6126         case NE:
6127 	case UNEQ:
6128 	case UNLE:
6129 	case UNLT:
6130 	case UNGE:
6131 	case UNGT:
6132 	case UNORDERED:
6133           /* Treat A != B as ~(A==B).  */
6134           {
6135 	    enum rtx_code rev_code;
6136             enum insn_code nor_code;
6137 	    rtx rev_mask;
6138 
6139 	    rev_code = reverse_condition_maybe_unordered (rcode);
6140             rev_mask = spu_emit_vector_compare (rev_code, op0, op1, dest_mode);
6141 
6142             nor_code = optab_handler (one_cmpl_optab, dest_mode);
6143             gcc_assert (nor_code != CODE_FOR_nothing);
6144             emit_insn (GEN_FCN (nor_code) (mask, rev_mask));
6145             if (dmode != dest_mode)
6146               {
6147                 rtx temp = gen_reg_rtx (dest_mode);
6148                 convert_move (temp, mask, 0);
6149                 return temp;
6150               }
6151             return mask;
6152           }
6153           break;
6154         case GE:
6155         case GEU:
6156         case LE:
6157         case LEU:
6158           /* Try GT/GTU/LT/LTU OR EQ */
6159           {
6160             rtx c_rtx, eq_rtx;
6161             enum insn_code ior_code;
6162             enum rtx_code new_code;
6163 
6164             switch (rcode)
6165               {
6166               case GE:  new_code = GT;  break;
6167               case GEU: new_code = GTU; break;
6168               case LE:  new_code = LT;  break;
6169               case LEU: new_code = LTU; break;
6170               default:
6171                 gcc_unreachable ();
6172               }
6173 
6174             c_rtx = spu_emit_vector_compare (new_code, op0, op1, dest_mode);
6175             eq_rtx = spu_emit_vector_compare (EQ, op0, op1, dest_mode);
6176 
6177             ior_code = optab_handler (ior_optab, dest_mode);
6178             gcc_assert (ior_code != CODE_FOR_nothing);
6179             emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
6180             if (dmode != dest_mode)
6181               {
6182                 rtx temp = gen_reg_rtx (dest_mode);
6183                 convert_move (temp, mask, 0);
6184                 return temp;
6185               }
6186             return mask;
6187           }
6188           break;
6189         case LTGT:
6190           /* Try LT OR GT */
6191           {
6192             rtx lt_rtx, gt_rtx;
6193             enum insn_code ior_code;
6194 
6195             lt_rtx = spu_emit_vector_compare (LT, op0, op1, dest_mode);
6196             gt_rtx = spu_emit_vector_compare (GT, op0, op1, dest_mode);
6197 
6198             ior_code = optab_handler (ior_optab, dest_mode);
6199             gcc_assert (ior_code != CODE_FOR_nothing);
6200             emit_insn (GEN_FCN (ior_code) (mask, lt_rtx, gt_rtx));
6201             if (dmode != dest_mode)
6202               {
6203                 rtx temp = gen_reg_rtx (dest_mode);
6204                 convert_move (temp, mask, 0);
6205                 return temp;
6206               }
6207             return mask;
6208           }
6209           break;
6210         case ORDERED:
6211           /* Implement as (A==A) & (B==B) */
6212           {
6213             rtx a_rtx, b_rtx;
6214             enum insn_code and_code;
6215 
6216             a_rtx = spu_emit_vector_compare (EQ, op0, op0, dest_mode);
6217             b_rtx = spu_emit_vector_compare (EQ, op1, op1, dest_mode);
6218 
6219             and_code = optab_handler (and_optab, dest_mode);
6220             gcc_assert (and_code != CODE_FOR_nothing);
6221             emit_insn (GEN_FCN (and_code) (mask, a_rtx, b_rtx));
6222             if (dmode != dest_mode)
6223               {
6224                 rtx temp = gen_reg_rtx (dest_mode);
6225                 convert_move (temp, mask, 0);
6226                 return temp;
6227               }
6228             return mask;
6229           }
6230           break;
6231         default:
6232           gcc_unreachable ();
6233         }
6234 
6235       /* You only get two chances.  */
6236       if (try_again)
6237           vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
6238 
6239       gcc_assert (vec_cmp_insn != -1);
6240 
6241       if (swap_operands)
6242         {
6243           rtx tmp;
6244           tmp = op0;
6245           op0 = op1;
6246           op1 = tmp;
6247         }
6248     }
6249 
6250   emit_insn (GEN_FCN (vec_cmp_insn) (mask, op0, op1));
6251   if (dmode != dest_mode)
6252     {
6253       rtx temp = gen_reg_rtx (dest_mode);
6254       convert_move (temp, mask, 0);
6255       return temp;
6256     }
6257   return mask;
6258 }
6259 
6260 
6261 /* Emit vector conditional expression.
6262    DEST is destination. OP1 and OP2 are two VEC_COND_EXPR operands.
6263    CC_OP0 and CC_OP1 are the two operands for the relation operation COND.  */
6264 
6265 int
spu_emit_vector_cond_expr(rtx dest,rtx op1,rtx op2,rtx cond,rtx cc_op0,rtx cc_op1)6266 spu_emit_vector_cond_expr (rtx dest, rtx op1, rtx op2,
6267                            rtx cond, rtx cc_op0, rtx cc_op1)
6268 {
6269   machine_mode dest_mode = GET_MODE (dest);
6270   enum rtx_code rcode = GET_CODE (cond);
6271   rtx mask;
6272 
6273   /* Get the vector mask for the given relational operations.  */
6274   mask = spu_emit_vector_compare (rcode, cc_op0, cc_op1, dest_mode);
6275 
6276   emit_insn(gen_selb (dest, op2, op1, mask));
6277 
6278   return 1;
6279 }
6280 
6281 static rtx
spu_force_reg(machine_mode mode,rtx op)6282 spu_force_reg (machine_mode mode, rtx op)
6283 {
6284   rtx x, r;
6285   if (GET_MODE (op) == VOIDmode || GET_MODE (op) == BLKmode)
6286     {
6287       if ((SCALAR_INT_MODE_P (mode) && GET_CODE (op) == CONST_INT)
6288 	  || GET_MODE (op) == BLKmode)
6289 	return force_reg (mode, convert_to_mode (mode, op, 0));
6290       abort ();
6291     }
6292 
6293   r = force_reg (GET_MODE (op), op);
6294   if (GET_MODE_SIZE (GET_MODE (op)) == GET_MODE_SIZE (mode))
6295     {
6296       x = simplify_gen_subreg (mode, r, GET_MODE (op), 0);
6297       if (x)
6298 	return x;
6299     }
6300 
6301   x = gen_reg_rtx (mode);
6302   emit_insn (gen_spu_convert (x, r));
6303   return x;
6304 }
6305 
6306 static void
spu_check_builtin_parm(struct spu_builtin_description * d,rtx op,int p)6307 spu_check_builtin_parm (struct spu_builtin_description *d, rtx op, int p)
6308 {
6309   HOST_WIDE_INT v = 0;
6310   int lsbits;
6311   /* Check the range of immediate operands. */
6312   if (p >= SPU_BTI_7 && p <= SPU_BTI_U18)
6313     {
6314       int range = p - SPU_BTI_7;
6315 
6316       if (!CONSTANT_P (op))
6317 	error ("%s expects an integer literal in the range [%d, %d]",
6318 	       d->name,
6319 	       spu_builtin_range[range].low, spu_builtin_range[range].high);
6320 
6321       if (GET_CODE (op) == CONST
6322 	  && (GET_CODE (XEXP (op, 0)) == PLUS
6323 	      || GET_CODE (XEXP (op, 0)) == MINUS))
6324 	{
6325 	  v = INTVAL (XEXP (XEXP (op, 0), 1));
6326 	  op = XEXP (XEXP (op, 0), 0);
6327 	}
6328       else if (GET_CODE (op) == CONST_INT)
6329 	v = INTVAL (op);
6330       else if (GET_CODE (op) == CONST_VECTOR
6331 	       && GET_CODE (CONST_VECTOR_ELT (op, 0)) == CONST_INT)
6332 	v = INTVAL (CONST_VECTOR_ELT (op, 0));
6333 
6334       /* The default for v is 0 which is valid in every range. */
6335       if (v < spu_builtin_range[range].low
6336 	  || v > spu_builtin_range[range].high)
6337 	error ("%s expects an integer literal in the range [%d, %d]. (%wd)",
6338 	       d->name,
6339 	       spu_builtin_range[range].low, spu_builtin_range[range].high,
6340 	       v);
6341 
6342       switch (p)
6343 	{
6344 	case SPU_BTI_S10_4:
6345 	  lsbits = 4;
6346 	  break;
6347 	case SPU_BTI_U16_2:
6348 	  /* This is only used in lqa, and stqa.  Even though the insns
6349 	     encode 16 bits of the address (all but the 2 least
6350 	     significant), only 14 bits are used because it is masked to
6351 	     be 16 byte aligned. */
6352 	  lsbits = 4;
6353 	  break;
6354 	case SPU_BTI_S16_2:
6355 	  /* This is used for lqr and stqr. */
6356 	  lsbits = 2;
6357 	  break;
6358 	default:
6359 	  lsbits = 0;
6360 	}
6361 
6362       if (GET_CODE (op) == LABEL_REF
6363 	  || (GET_CODE (op) == SYMBOL_REF
6364 	      && SYMBOL_REF_FUNCTION_P (op))
6365 	  || (v & ((1 << lsbits) - 1)) != 0)
6366 	warning (0, "%d least significant bits of %s are ignored", lsbits,
6367 		 d->name);
6368     }
6369 }
6370 
6371 
6372 static int
expand_builtin_args(struct spu_builtin_description * d,tree exp,rtx target,rtx ops[])6373 expand_builtin_args (struct spu_builtin_description *d, tree exp,
6374 		     rtx target, rtx ops[])
6375 {
6376   enum insn_code icode = (enum insn_code) d->icode;
6377   int i = 0, a;
6378 
6379   /* Expand the arguments into rtl. */
6380 
6381   if (d->parm[0] != SPU_BTI_VOID)
6382     ops[i++] = target;
6383 
6384   for (a = 0; d->parm[a+1] != SPU_BTI_END_OF_PARAMS; i++, a++)
6385     {
6386       tree arg = CALL_EXPR_ARG (exp, a);
6387       if (arg == 0)
6388 	abort ();
6389       ops[i] = expand_expr (arg, NULL_RTX, VOIDmode, EXPAND_NORMAL);
6390     }
6391 
6392   gcc_assert (i == insn_data[icode].n_generator_args);
6393   return i;
6394 }
6395 
6396 static rtx
spu_expand_builtin_1(struct spu_builtin_description * d,tree exp,rtx target)6397 spu_expand_builtin_1 (struct spu_builtin_description *d,
6398 		      tree exp, rtx target)
6399 {
6400   rtx pat;
6401   rtx ops[8];
6402   enum insn_code icode = (enum insn_code) d->icode;
6403   machine_mode mode, tmode;
6404   int i, p;
6405   int n_operands;
6406   tree return_type;
6407 
6408   /* Set up ops[] with values from arglist. */
6409   n_operands = expand_builtin_args (d, exp, target, ops);
6410 
6411   /* Handle the target operand which must be operand 0. */
6412   i = 0;
6413   if (d->parm[0] != SPU_BTI_VOID)
6414     {
6415 
6416       /* We prefer the mode specified for the match_operand otherwise
6417          use the mode from the builtin function prototype. */
6418       tmode = insn_data[d->icode].operand[0].mode;
6419       if (tmode == VOIDmode)
6420 	tmode = TYPE_MODE (spu_builtin_types[d->parm[0]]);
6421 
6422       /* Try to use target because not using it can lead to extra copies
6423          and when we are using all of the registers extra copies leads
6424          to extra spills.  */
6425       if (target && GET_CODE (target) == REG && GET_MODE (target) == tmode)
6426 	ops[0] = target;
6427       else
6428 	target = ops[0] = gen_reg_rtx (tmode);
6429 
6430       if (!(*insn_data[icode].operand[0].predicate) (ops[0], tmode))
6431 	abort ();
6432 
6433       i++;
6434     }
6435 
6436   if (d->fcode == SPU_MASK_FOR_LOAD)
6437     {
6438       machine_mode mode = insn_data[icode].operand[1].mode;
6439       tree arg;
6440       rtx addr, op, pat;
6441 
6442       /* get addr */
6443       arg = CALL_EXPR_ARG (exp, 0);
6444       gcc_assert (POINTER_TYPE_P (TREE_TYPE (arg)));
6445       op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL);
6446       addr = memory_address (mode, op);
6447 
6448       /* negate addr */
6449       op = gen_reg_rtx (GET_MODE (addr));
6450       emit_insn (gen_rtx_SET (op, gen_rtx_NEG (GET_MODE (addr), addr)));
6451       op = gen_rtx_MEM (mode, op);
6452 
6453       pat = GEN_FCN (icode) (target, op);
6454       if (!pat)
6455         return 0;
6456       emit_insn (pat);
6457       return target;
6458     }
6459 
6460   /* Ignore align_hint, but still expand it's args in case they have
6461      side effects. */
6462   if (icode == CODE_FOR_spu_align_hint)
6463     return 0;
6464 
6465   /* Handle the rest of the operands. */
6466   for (p = 1; i < n_operands; i++, p++)
6467     {
6468       if (insn_data[d->icode].operand[i].mode != VOIDmode)
6469 	mode = insn_data[d->icode].operand[i].mode;
6470       else
6471 	mode = TYPE_MODE (spu_builtin_types[d->parm[i]]);
6472 
6473       /* mode can be VOIDmode here for labels */
6474 
6475       /* For specific intrinsics with an immediate operand, e.g.,
6476          si_ai(), we sometimes need to convert the scalar argument to a
6477          vector argument by splatting the scalar. */
6478       if (VECTOR_MODE_P (mode)
6479 	  && (GET_CODE (ops[i]) == CONST_INT
6480 	      || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_INT
6481 	      || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_FLOAT))
6482 	{
6483 	  if (GET_CODE (ops[i]) == CONST_INT)
6484 	    ops[i] = spu_const (mode, INTVAL (ops[i]));
6485 	  else
6486 	    {
6487 	      rtx reg = gen_reg_rtx (mode);
6488 	      machine_mode imode = GET_MODE_INNER (mode);
6489 	      if (!spu_nonmem_operand (ops[i], GET_MODE (ops[i])))
6490 		ops[i] = force_reg (GET_MODE (ops[i]), ops[i]);
6491 	      if (imode != GET_MODE (ops[i]))
6492 		ops[i] = convert_to_mode (imode, ops[i],
6493 					  TYPE_UNSIGNED (spu_builtin_types
6494 							 [d->parm[i]]));
6495 	      emit_insn (gen_spu_splats (reg, ops[i]));
6496 	      ops[i] = reg;
6497 	    }
6498 	}
6499 
6500       spu_check_builtin_parm (d, ops[i], d->parm[p]);
6501 
6502       if (!(*insn_data[icode].operand[i].predicate) (ops[i], mode))
6503 	ops[i] = spu_force_reg (mode, ops[i]);
6504     }
6505 
6506   switch (n_operands)
6507     {
6508     case 0:
6509       pat = GEN_FCN (icode) (0);
6510       break;
6511     case 1:
6512       pat = GEN_FCN (icode) (ops[0]);
6513       break;
6514     case 2:
6515       pat = GEN_FCN (icode) (ops[0], ops[1]);
6516       break;
6517     case 3:
6518       pat = GEN_FCN (icode) (ops[0], ops[1], ops[2]);
6519       break;
6520     case 4:
6521       pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3]);
6522       break;
6523     case 5:
6524       pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4]);
6525       break;
6526     case 6:
6527       pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4], ops[5]);
6528       break;
6529     default:
6530       abort ();
6531     }
6532 
6533   if (!pat)
6534     abort ();
6535 
6536   if (d->type == B_CALL || d->type == B_BISLED)
6537     emit_call_insn (pat);
6538   else if (d->type == B_JUMP)
6539     {
6540       emit_jump_insn (pat);
6541       emit_barrier ();
6542     }
6543   else
6544     emit_insn (pat);
6545 
6546   return_type = spu_builtin_types[d->parm[0]];
6547   if (d->parm[0] != SPU_BTI_VOID
6548       && GET_MODE (target) != TYPE_MODE (return_type))
6549     {
6550       /* target is the return value.  It should always be the mode of
6551          the builtin function prototype. */
6552       target = spu_force_reg (TYPE_MODE (return_type), target);
6553     }
6554 
6555   return target;
6556 }
6557 
6558 rtx
spu_expand_builtin(tree exp,rtx target,rtx subtarget ATTRIBUTE_UNUSED,machine_mode mode ATTRIBUTE_UNUSED,int ignore ATTRIBUTE_UNUSED)6559 spu_expand_builtin (tree exp,
6560 		    rtx target,
6561 		    rtx subtarget ATTRIBUTE_UNUSED,
6562 		    machine_mode mode ATTRIBUTE_UNUSED,
6563 		    int ignore ATTRIBUTE_UNUSED)
6564 {
6565   tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
6566   unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
6567   struct spu_builtin_description *d;
6568 
6569   if (fcode < NUM_SPU_BUILTINS)
6570     {
6571       d = &spu_builtins[fcode];
6572 
6573       return spu_expand_builtin_1 (d, exp, target);
6574     }
6575   abort ();
6576 }
6577 
6578 /* Implement targetm.vectorize.builtin_mask_for_load.  */
6579 static tree
spu_builtin_mask_for_load(void)6580 spu_builtin_mask_for_load (void)
6581 {
6582   return spu_builtin_decls[SPU_MASK_FOR_LOAD];
6583 }
6584 
6585 /* Implement targetm.vectorize.builtin_vectorization_cost.  */
6586 static int
spu_builtin_vectorization_cost(enum vect_cost_for_stmt type_of_cost,tree vectype,int misalign ATTRIBUTE_UNUSED)6587 spu_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
6588                                 tree vectype,
6589                                 int misalign ATTRIBUTE_UNUSED)
6590 {
6591   unsigned elements;
6592 
6593   switch (type_of_cost)
6594     {
6595       case scalar_stmt:
6596       case vector_stmt:
6597       case vector_load:
6598       case vector_store:
6599       case vec_to_scalar:
6600       case scalar_to_vec:
6601       case cond_branch_not_taken:
6602       case vec_perm:
6603       case vec_promote_demote:
6604         return 1;
6605 
6606       case scalar_store:
6607         return 10;
6608 
6609       case scalar_load:
6610         /* Load + rotate.  */
6611         return 2;
6612 
6613       case unaligned_load:
6614         return 2;
6615 
6616       case cond_branch_taken:
6617         return 6;
6618 
6619       case vec_construct:
6620 	elements = TYPE_VECTOR_SUBPARTS (vectype);
6621 	return elements / 2 + 1;
6622 
6623       default:
6624         gcc_unreachable ();
6625     }
6626 }
6627 
6628 /* Implement targetm.vectorize.init_cost.  */
6629 
6630 static void *
spu_init_cost(struct loop * loop_info ATTRIBUTE_UNUSED)6631 spu_init_cost (struct loop *loop_info ATTRIBUTE_UNUSED)
6632 {
6633   unsigned *cost = XNEWVEC (unsigned, 3);
6634   cost[vect_prologue] = cost[vect_body] = cost[vect_epilogue] = 0;
6635   return cost;
6636 }
6637 
6638 /* Implement targetm.vectorize.add_stmt_cost.  */
6639 
6640 static unsigned
spu_add_stmt_cost(void * data,int count,enum vect_cost_for_stmt kind,struct _stmt_vec_info * stmt_info,int misalign,enum vect_cost_model_location where)6641 spu_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
6642 		   struct _stmt_vec_info *stmt_info, int misalign,
6643 		   enum vect_cost_model_location where)
6644 {
6645   unsigned *cost = (unsigned *) data;
6646   unsigned retval = 0;
6647 
6648   if (flag_vect_cost_model)
6649     {
6650       tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
6651       int stmt_cost = spu_builtin_vectorization_cost (kind, vectype, misalign);
6652 
6653       /* Statements in an inner loop relative to the loop being
6654 	 vectorized are weighted more heavily.  The value here is
6655 	 arbitrary and could potentially be improved with analysis.  */
6656       if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
6657 	count *= 50;  /* FIXME.  */
6658 
6659       retval = (unsigned) (count * stmt_cost);
6660       cost[where] += retval;
6661     }
6662 
6663   return retval;
6664 }
6665 
6666 /* Implement targetm.vectorize.finish_cost.  */
6667 
6668 static void
spu_finish_cost(void * data,unsigned * prologue_cost,unsigned * body_cost,unsigned * epilogue_cost)6669 spu_finish_cost (void *data, unsigned *prologue_cost,
6670 		 unsigned *body_cost, unsigned *epilogue_cost)
6671 {
6672   unsigned *cost = (unsigned *) data;
6673   *prologue_cost = cost[vect_prologue];
6674   *body_cost     = cost[vect_body];
6675   *epilogue_cost = cost[vect_epilogue];
6676 }
6677 
6678 /* Implement targetm.vectorize.destroy_cost_data.  */
6679 
6680 static void
spu_destroy_cost_data(void * data)6681 spu_destroy_cost_data (void *data)
6682 {
6683   free (data);
6684 }
6685 
6686 /* Return true iff, data reference of TYPE can reach vector alignment (16)
6687    after applying N number of iterations.  This routine does not determine
6688    how may iterations are required to reach desired alignment.  */
6689 
6690 static bool
spu_vector_alignment_reachable(const_tree type ATTRIBUTE_UNUSED,bool is_packed)6691 spu_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed)
6692 {
6693   if (is_packed)
6694     return false;
6695 
6696   /* All other types are naturally aligned.  */
6697   return true;
6698 }
6699 
6700 /* Return the appropriate mode for a named address pointer.  */
6701 static machine_mode
spu_addr_space_pointer_mode(addr_space_t addrspace)6702 spu_addr_space_pointer_mode (addr_space_t addrspace)
6703 {
6704   switch (addrspace)
6705     {
6706     case ADDR_SPACE_GENERIC:
6707       return ptr_mode;
6708     case ADDR_SPACE_EA:
6709       return EAmode;
6710     default:
6711       gcc_unreachable ();
6712     }
6713 }
6714 
6715 /* Return the appropriate mode for a named address address.  */
6716 static machine_mode
spu_addr_space_address_mode(addr_space_t addrspace)6717 spu_addr_space_address_mode (addr_space_t addrspace)
6718 {
6719   switch (addrspace)
6720     {
6721     case ADDR_SPACE_GENERIC:
6722       return Pmode;
6723     case ADDR_SPACE_EA:
6724       return EAmode;
6725     default:
6726       gcc_unreachable ();
6727     }
6728 }
6729 
6730 /* Determine if one named address space is a subset of another.  */
6731 
6732 static bool
spu_addr_space_subset_p(addr_space_t subset,addr_space_t superset)6733 spu_addr_space_subset_p (addr_space_t subset, addr_space_t superset)
6734 {
6735   gcc_assert (subset == ADDR_SPACE_GENERIC || subset == ADDR_SPACE_EA);
6736   gcc_assert (superset == ADDR_SPACE_GENERIC || superset == ADDR_SPACE_EA);
6737 
6738   if (subset == superset)
6739     return true;
6740 
6741   /* If we have -mno-address-space-conversion, treat __ea and generic as not
6742      being subsets but instead as disjoint address spaces.  */
6743   else if (!TARGET_ADDRESS_SPACE_CONVERSION)
6744     return false;
6745 
6746   else
6747     return (subset == ADDR_SPACE_GENERIC && superset == ADDR_SPACE_EA);
6748 }
6749 
6750 /* Convert from one address space to another.  */
6751 static rtx
spu_addr_space_convert(rtx op,tree from_type,tree to_type)6752 spu_addr_space_convert (rtx op, tree from_type, tree to_type)
6753 {
6754   addr_space_t from_as = TYPE_ADDR_SPACE (TREE_TYPE (from_type));
6755   addr_space_t to_as = TYPE_ADDR_SPACE (TREE_TYPE (to_type));
6756 
6757   gcc_assert (from_as == ADDR_SPACE_GENERIC || from_as == ADDR_SPACE_EA);
6758   gcc_assert (to_as == ADDR_SPACE_GENERIC || to_as == ADDR_SPACE_EA);
6759 
6760   if (to_as == ADDR_SPACE_GENERIC && from_as == ADDR_SPACE_EA)
6761     {
6762       rtx result, ls;
6763 
6764       ls = gen_const_mem (DImode,
6765 			  gen_rtx_SYMBOL_REF (Pmode, "__ea_local_store"));
6766       set_mem_align (ls, 128);
6767 
6768       result = gen_reg_rtx (Pmode);
6769       ls = force_reg (Pmode, convert_modes (Pmode, DImode, ls, 1));
6770       op = force_reg (Pmode, convert_modes (Pmode, EAmode, op, 1));
6771       ls = emit_conditional_move (ls, NE, op, const0_rtx, Pmode,
6772 					  ls, const0_rtx, Pmode, 1);
6773 
6774       emit_insn (gen_subsi3 (result, op, ls));
6775 
6776       return result;
6777     }
6778 
6779   else if (to_as == ADDR_SPACE_EA && from_as == ADDR_SPACE_GENERIC)
6780     {
6781       rtx result, ls;
6782 
6783       ls = gen_const_mem (DImode,
6784 			  gen_rtx_SYMBOL_REF (Pmode, "__ea_local_store"));
6785       set_mem_align (ls, 128);
6786 
6787       result = gen_reg_rtx (EAmode);
6788       ls = force_reg (EAmode, convert_modes (EAmode, DImode, ls, 1));
6789       op = force_reg (Pmode, op);
6790       ls = emit_conditional_move (ls, NE, op, const0_rtx, Pmode,
6791 					  ls, const0_rtx, EAmode, 1);
6792       op = force_reg (EAmode, convert_modes (EAmode, Pmode, op, 1));
6793 
6794       if (EAmode == SImode)
6795 	emit_insn (gen_addsi3 (result, op, ls));
6796       else
6797 	emit_insn (gen_adddi3 (result, op, ls));
6798 
6799       return result;
6800     }
6801 
6802   else
6803     gcc_unreachable ();
6804 }
6805 
6806 
6807 /* Count the total number of instructions in each pipe and return the
6808    maximum, which is used as the Minimum Iteration Interval (MII)
6809    in the modulo scheduler.  get_pipe() will return -2, -1, 0, or 1.
6810    -2 are instructions that can go in pipe0 or pipe1.  */
6811 static int
spu_sms_res_mii(struct ddg * g)6812 spu_sms_res_mii (struct ddg *g)
6813 {
6814   int i;
6815   unsigned t[4] = {0, 0, 0, 0};
6816 
6817   for (i = 0; i < g->num_nodes; i++)
6818     {
6819       rtx_insn *insn = g->nodes[i].insn;
6820       int p = get_pipe (insn) + 2;
6821 
6822       gcc_assert (p >= 0);
6823       gcc_assert (p < 4);
6824 
6825       t[p]++;
6826       if (dump_file && INSN_P (insn))
6827             fprintf (dump_file, "i%d %s %d %d\n",
6828                      INSN_UID (insn),
6829                      insn_data[INSN_CODE(insn)].name,
6830                      p, t[p]);
6831     }
6832   if (dump_file)
6833     fprintf (dump_file, "%d %d %d %d\n", t[0], t[1], t[2], t[3]);
6834 
6835   return MAX ((t[0] + t[2] + t[3] + 1) / 2, MAX (t[2], t[3]));
6836 }
6837 
6838 
6839 void
spu_init_expanders(void)6840 spu_init_expanders (void)
6841 {
6842   if (cfun)
6843     {
6844       rtx r0, r1;
6845       /* HARD_FRAME_REGISTER is only 128 bit aligned when
6846          frame_pointer_needed is true.  We don't know that until we're
6847          expanding the prologue. */
6848       REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = 8;
6849 
6850       /* A number of passes use LAST_VIRTUAL_REGISTER+1 and
6851 	 LAST_VIRTUAL_REGISTER+2 to test the back-end.  We want them
6852 	 to be treated as aligned, so generate them here. */
6853       r0 = gen_reg_rtx (SImode);
6854       r1 = gen_reg_rtx (SImode);
6855       mark_reg_pointer (r0, 128);
6856       mark_reg_pointer (r1, 128);
6857       gcc_assert (REGNO (r0) == LAST_VIRTUAL_REGISTER + 1
6858 		  && REGNO (r1) == LAST_VIRTUAL_REGISTER + 2);
6859     }
6860 }
6861 
6862 static machine_mode
spu_libgcc_cmp_return_mode(void)6863 spu_libgcc_cmp_return_mode (void)
6864 {
6865 
6866 /* For SPU word mode is TI mode so it is better to use SImode
6867    for compare returns.  */
6868   return SImode;
6869 }
6870 
6871 static machine_mode
spu_libgcc_shift_count_mode(void)6872 spu_libgcc_shift_count_mode (void)
6873 {
6874 /* For SPU word mode is TI mode so it is better to use SImode
6875    for shift counts.  */
6876   return SImode;
6877 }
6878 
6879 /* Implement targetm.section_type_flags.  */
6880 static unsigned int
spu_section_type_flags(tree decl,const char * name,int reloc)6881 spu_section_type_flags (tree decl, const char *name, int reloc)
6882 {
6883   /* .toe needs to have type @nobits.  */
6884   if (strcmp (name, ".toe") == 0)
6885     return SECTION_BSS;
6886   /* Don't load _ea into the current address space.  */
6887   if (strcmp (name, "._ea") == 0)
6888     return SECTION_WRITE | SECTION_DEBUG;
6889   return default_section_type_flags (decl, name, reloc);
6890 }
6891 
6892 /* Implement targetm.select_section.  */
6893 static section *
spu_select_section(tree decl,int reloc,unsigned HOST_WIDE_INT align)6894 spu_select_section (tree decl, int reloc, unsigned HOST_WIDE_INT align)
6895 {
6896   /* Variables and constants defined in the __ea address space
6897      go into a special section named "._ea".  */
6898   if (TREE_TYPE (decl) != error_mark_node
6899       && TYPE_ADDR_SPACE (TREE_TYPE (decl)) == ADDR_SPACE_EA)
6900     {
6901       /* We might get called with string constants, but get_named_section
6902 	 doesn't like them as they are not DECLs.  Also, we need to set
6903 	 flags in that case.  */
6904       if (!DECL_P (decl))
6905 	return get_section ("._ea", SECTION_WRITE | SECTION_DEBUG, NULL);
6906 
6907       return get_named_section (decl, "._ea", reloc);
6908     }
6909 
6910   return default_elf_select_section (decl, reloc, align);
6911 }
6912 
6913 /* Implement targetm.unique_section.  */
6914 static void
spu_unique_section(tree decl,int reloc)6915 spu_unique_section (tree decl, int reloc)
6916 {
6917   /* We don't support unique section names in the __ea address
6918      space for now.  */
6919   if (TREE_TYPE (decl) != error_mark_node
6920       && TYPE_ADDR_SPACE (TREE_TYPE (decl)) != 0)
6921     return;
6922 
6923   default_unique_section (decl, reloc);
6924 }
6925 
6926 /* Generate a constant or register which contains 2^SCALE.  We assume
6927    the result is valid for MODE.  Currently, MODE must be V4SFmode and
6928    SCALE must be SImode. */
6929 rtx
spu_gen_exp2(machine_mode mode,rtx scale)6930 spu_gen_exp2 (machine_mode mode, rtx scale)
6931 {
6932   gcc_assert (mode == V4SFmode);
6933   gcc_assert (GET_MODE (scale) == SImode || GET_CODE (scale) == CONST_INT);
6934   if (GET_CODE (scale) != CONST_INT)
6935     {
6936       /* unsigned int exp = (127 + scale) << 23;
6937 	__vector float m = (__vector float) spu_splats (exp); */
6938       rtx reg = force_reg (SImode, scale);
6939       rtx exp = gen_reg_rtx (SImode);
6940       rtx mul = gen_reg_rtx (mode);
6941       emit_insn (gen_addsi3 (exp, reg, GEN_INT (127)));
6942       emit_insn (gen_ashlsi3 (exp, exp, GEN_INT (23)));
6943       emit_insn (gen_spu_splats (mul, gen_rtx_SUBREG (GET_MODE_INNER (mode), exp, 0)));
6944       return mul;
6945     }
6946   else
6947     {
6948       HOST_WIDE_INT exp = 127 + INTVAL (scale);
6949       unsigned char arr[16];
6950       arr[0] = arr[4] = arr[8] = arr[12] = exp >> 1;
6951       arr[1] = arr[5] = arr[9] = arr[13] = exp << 7;
6952       arr[2] = arr[6] = arr[10] = arr[14] = 0;
6953       arr[3] = arr[7] = arr[11] = arr[15] = 0;
6954       return array_to_constant (mode, arr);
6955     }
6956 }
6957 
6958 /* After reload, just change the convert into a move instruction
6959    or a dead instruction. */
6960 void
spu_split_convert(rtx ops[])6961 spu_split_convert (rtx ops[])
6962 {
6963   if (REGNO (ops[0]) == REGNO (ops[1]))
6964     emit_note (NOTE_INSN_DELETED);
6965   else
6966     {
6967       /* Use TImode always as this might help hard reg copyprop.  */
6968       rtx op0 = gen_rtx_REG (TImode, REGNO (ops[0]));
6969       rtx op1 = gen_rtx_REG (TImode, REGNO (ops[1]));
6970       emit_insn (gen_move_insn (op0, op1));
6971     }
6972 }
6973 
6974 void
spu_function_profiler(FILE * file,int labelno ATTRIBUTE_UNUSED)6975 spu_function_profiler (FILE * file, int labelno ATTRIBUTE_UNUSED)
6976 {
6977   fprintf (file, "# profile\n");
6978   fprintf (file, "brsl $75,  _mcount\n");
6979 }
6980 
6981 /* Implement targetm.ref_may_alias_errno.  */
6982 static bool
spu_ref_may_alias_errno(ao_ref * ref)6983 spu_ref_may_alias_errno (ao_ref *ref)
6984 {
6985   tree base = ao_ref_base (ref);
6986 
6987   /* With SPU newlib, errno is defined as something like
6988          _impure_data._errno
6989      The default implementation of this target macro does not
6990      recognize such expressions, so special-code for it here.  */
6991 
6992   if (TREE_CODE (base) == VAR_DECL
6993       && !TREE_STATIC (base)
6994       && DECL_EXTERNAL (base)
6995       && TREE_CODE (TREE_TYPE (base)) == RECORD_TYPE
6996       && strcmp (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (base)),
6997 		 "_impure_data") == 0
6998       /* _errno is the first member of _impure_data.  */
6999       && ref->offset == 0)
7000     return true;
7001 
7002   return default_ref_may_alias_errno (ref);
7003 }
7004 
7005 /* Output thunk to FILE that implements a C++ virtual function call (with
7006    multiple inheritance) to FUNCTION.  The thunk adjusts the this pointer
7007    by DELTA, and unless VCALL_OFFSET is zero, applies an additional adjustment
7008    stored at VCALL_OFFSET in the vtable whose address is located at offset 0
7009    relative to the resulting this pointer.  */
7010 
7011 static void
spu_output_mi_thunk(FILE * file,tree thunk ATTRIBUTE_UNUSED,HOST_WIDE_INT delta,HOST_WIDE_INT vcall_offset,tree function)7012 spu_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
7013 		     HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
7014 		     tree function)
7015 {
7016   rtx op[8];
7017 
7018   /* Make sure unwind info is emitted for the thunk if needed.  */
7019   final_start_function (emit_barrier (), file, 1);
7020 
7021   /* Operand 0 is the target function.  */
7022   op[0] = XEXP (DECL_RTL (function), 0);
7023 
7024   /* Operand 1 is the 'this' pointer.  */
7025   if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
7026     op[1] = gen_rtx_REG (Pmode, FIRST_ARG_REGNUM + 1);
7027   else
7028     op[1] = gen_rtx_REG (Pmode, FIRST_ARG_REGNUM);
7029 
7030   /* Operands 2/3 are the low/high halfwords of delta.  */
7031   op[2] = GEN_INT (trunc_int_for_mode (delta, HImode));
7032   op[3] = GEN_INT (trunc_int_for_mode (delta >> 16, HImode));
7033 
7034   /* Operands 4/5 are the low/high halfwords of vcall_offset.  */
7035   op[4] = GEN_INT (trunc_int_for_mode (vcall_offset, HImode));
7036   op[5] = GEN_INT (trunc_int_for_mode (vcall_offset >> 16, HImode));
7037 
7038   /* Operands 6/7 are temporary registers.  */
7039   op[6] = gen_rtx_REG (Pmode, 79);
7040   op[7] = gen_rtx_REG (Pmode, 78);
7041 
7042   /* Add DELTA to this pointer.  */
7043   if (delta)
7044     {
7045       if (delta >= -0x200 && delta < 0x200)
7046 	output_asm_insn ("ai\t%1,%1,%2", op);
7047       else if (delta >= -0x8000 && delta < 0x8000)
7048 	{
7049 	  output_asm_insn ("il\t%6,%2", op);
7050 	  output_asm_insn ("a\t%1,%1,%6", op);
7051 	}
7052       else
7053 	{
7054 	  output_asm_insn ("ilhu\t%6,%3", op);
7055 	  output_asm_insn ("iohl\t%6,%2", op);
7056 	  output_asm_insn ("a\t%1,%1,%6", op);
7057 	}
7058     }
7059 
7060   /* Perform vcall adjustment.  */
7061   if (vcall_offset)
7062     {
7063       output_asm_insn ("lqd\t%7,0(%1)", op);
7064       output_asm_insn ("rotqby\t%7,%7,%1", op);
7065 
7066       if (vcall_offset >= -0x200 && vcall_offset < 0x200)
7067 	output_asm_insn ("ai\t%7,%7,%4", op);
7068       else if (vcall_offset >= -0x8000 && vcall_offset < 0x8000)
7069 	{
7070 	  output_asm_insn ("il\t%6,%4", op);
7071 	  output_asm_insn ("a\t%7,%7,%6", op);
7072 	}
7073       else
7074 	{
7075 	  output_asm_insn ("ilhu\t%6,%5", op);
7076 	  output_asm_insn ("iohl\t%6,%4", op);
7077 	  output_asm_insn ("a\t%7,%7,%6", op);
7078 	}
7079 
7080       output_asm_insn ("lqd\t%6,0(%7)", op);
7081       output_asm_insn ("rotqby\t%6,%6,%7", op);
7082       output_asm_insn ("a\t%1,%1,%6", op);
7083     }
7084 
7085   /* Jump to target.  */
7086   output_asm_insn ("br\t%0", op);
7087 
7088   final_end_function ();
7089 }
7090 
7091 /* Canonicalize a comparison from one we don't have to one we do have.  */
7092 static void
spu_canonicalize_comparison(int * code,rtx * op0,rtx * op1,bool op0_preserve_value)7093 spu_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
7094 			     bool op0_preserve_value)
7095 {
7096   if (!op0_preserve_value
7097       && (*code == LE || *code == LT || *code == LEU || *code == LTU))
7098     {
7099       rtx tem = *op0;
7100       *op0 = *op1;
7101       *op1 = tem;
7102       *code = (int)swap_condition ((enum rtx_code)*code);
7103     }
7104 }
7105 
7106 /* Expand an atomic fetch-and-operate pattern.  CODE is the binary operation
7107    to perform.  MEM is the memory on which to operate.  VAL is the second
7108    operand of the binary operator.  BEFORE and AFTER are optional locations to
7109    return the value of MEM either before of after the operation.  */
7110 void
spu_expand_atomic_op(enum rtx_code code,rtx mem,rtx val,rtx orig_before,rtx orig_after)7111 spu_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
7112 		      rtx orig_before, rtx orig_after)
7113 {
7114   machine_mode mode = GET_MODE (mem);
7115   rtx before = orig_before, after = orig_after;
7116 
7117   if (before == NULL_RTX)
7118     before = gen_reg_rtx (mode);
7119 
7120   emit_move_insn (before, mem);
7121 
7122   if (code == MULT)  /* NAND operation */
7123     {
7124       rtx x = expand_simple_binop (mode, AND, before, val,
7125 				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
7126       after = expand_simple_unop (mode, NOT, x, after, 1);
7127     }
7128   else
7129     {
7130       after = expand_simple_binop (mode, code, before, val,
7131 				   after, 1, OPTAB_LIB_WIDEN);
7132     }
7133 
7134   emit_move_insn (mem, after);
7135 
7136   if (orig_after && after != orig_after)
7137     emit_move_insn (orig_after, after);
7138 }
7139 
7140 
7141 /*  Table of machine attributes.  */
7142 static const struct attribute_spec spu_attribute_table[] =
7143 {
7144   /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
7145        affects_type_identity } */
7146   { "naked",          0, 0, true,  false, false, spu_handle_fndecl_attribute,
7147     false },
7148   { "spu_vector",     0, 0, false, true,  false, spu_handle_vector_attribute,
7149     false },
7150   { NULL,             0, 0, false, false, false, NULL, false }
7151 };
7152 
7153 /*  TARGET overrides.  */
7154 
7155 #undef TARGET_ADDR_SPACE_POINTER_MODE
7156 #define TARGET_ADDR_SPACE_POINTER_MODE spu_addr_space_pointer_mode
7157 
7158 #undef TARGET_ADDR_SPACE_ADDRESS_MODE
7159 #define TARGET_ADDR_SPACE_ADDRESS_MODE spu_addr_space_address_mode
7160 
7161 #undef TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P
7162 #define TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P \
7163   spu_addr_space_legitimate_address_p
7164 
7165 #undef TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS
7166 #define TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS spu_addr_space_legitimize_address
7167 
7168 #undef TARGET_ADDR_SPACE_SUBSET_P
7169 #define TARGET_ADDR_SPACE_SUBSET_P spu_addr_space_subset_p
7170 
7171 #undef TARGET_ADDR_SPACE_CONVERT
7172 #define TARGET_ADDR_SPACE_CONVERT spu_addr_space_convert
7173 
7174 #undef TARGET_INIT_BUILTINS
7175 #define TARGET_INIT_BUILTINS spu_init_builtins
7176 #undef TARGET_BUILTIN_DECL
7177 #define TARGET_BUILTIN_DECL spu_builtin_decl
7178 
7179 #undef TARGET_EXPAND_BUILTIN
7180 #define TARGET_EXPAND_BUILTIN spu_expand_builtin
7181 
7182 #undef TARGET_UNWIND_WORD_MODE
7183 #define TARGET_UNWIND_WORD_MODE spu_unwind_word_mode
7184 
7185 #undef TARGET_LEGITIMIZE_ADDRESS
7186 #define TARGET_LEGITIMIZE_ADDRESS spu_legitimize_address
7187 
7188 /* The current assembler doesn't like .4byte foo@ppu, so use the normal .long
7189    and .quad for the debugger.  When it is known that the assembler is fixed,
7190    these can be removed.  */
7191 #undef TARGET_ASM_UNALIGNED_SI_OP
7192 #define TARGET_ASM_UNALIGNED_SI_OP	"\t.long\t"
7193 
7194 #undef TARGET_ASM_ALIGNED_DI_OP
7195 #define TARGET_ASM_ALIGNED_DI_OP	"\t.quad\t"
7196 
7197 /* The .8byte directive doesn't seem to work well for a 32 bit
7198    architecture. */
7199 #undef TARGET_ASM_UNALIGNED_DI_OP
7200 #define TARGET_ASM_UNALIGNED_DI_OP NULL
7201 
7202 #undef TARGET_RTX_COSTS
7203 #define TARGET_RTX_COSTS spu_rtx_costs
7204 
7205 #undef TARGET_ADDRESS_COST
7206 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
7207 
7208 #undef TARGET_SCHED_ISSUE_RATE
7209 #define TARGET_SCHED_ISSUE_RATE spu_sched_issue_rate
7210 
7211 #undef TARGET_SCHED_INIT_GLOBAL
7212 #define TARGET_SCHED_INIT_GLOBAL spu_sched_init_global
7213 
7214 #undef TARGET_SCHED_INIT
7215 #define TARGET_SCHED_INIT spu_sched_init
7216 
7217 #undef TARGET_SCHED_VARIABLE_ISSUE
7218 #define TARGET_SCHED_VARIABLE_ISSUE spu_sched_variable_issue
7219 
7220 #undef TARGET_SCHED_REORDER
7221 #define TARGET_SCHED_REORDER spu_sched_reorder
7222 
7223 #undef TARGET_SCHED_REORDER2
7224 #define TARGET_SCHED_REORDER2 spu_sched_reorder
7225 
7226 #undef TARGET_SCHED_ADJUST_COST
7227 #define TARGET_SCHED_ADJUST_COST spu_sched_adjust_cost
7228 
7229 #undef  TARGET_ATTRIBUTE_TABLE
7230 #define TARGET_ATTRIBUTE_TABLE spu_attribute_table
7231 
7232 #undef TARGET_ASM_INTEGER
7233 #define TARGET_ASM_INTEGER spu_assemble_integer
7234 
7235 #undef TARGET_SCALAR_MODE_SUPPORTED_P
7236 #define TARGET_SCALAR_MODE_SUPPORTED_P	spu_scalar_mode_supported_p
7237 
7238 #undef TARGET_VECTOR_MODE_SUPPORTED_P
7239 #define TARGET_VECTOR_MODE_SUPPORTED_P	spu_vector_mode_supported_p
7240 
7241 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
7242 #define TARGET_FUNCTION_OK_FOR_SIBCALL spu_function_ok_for_sibcall
7243 
7244 #undef TARGET_ASM_GLOBALIZE_LABEL
7245 #define TARGET_ASM_GLOBALIZE_LABEL spu_asm_globalize_label
7246 
7247 #undef TARGET_PASS_BY_REFERENCE
7248 #define TARGET_PASS_BY_REFERENCE spu_pass_by_reference
7249 
7250 #undef TARGET_FUNCTION_ARG
7251 #define TARGET_FUNCTION_ARG spu_function_arg
7252 
7253 #undef TARGET_FUNCTION_ARG_ADVANCE
7254 #define TARGET_FUNCTION_ARG_ADVANCE spu_function_arg_advance
7255 
7256 #undef TARGET_MUST_PASS_IN_STACK
7257 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
7258 
7259 #undef TARGET_BUILD_BUILTIN_VA_LIST
7260 #define TARGET_BUILD_BUILTIN_VA_LIST spu_build_builtin_va_list
7261 
7262 #undef TARGET_EXPAND_BUILTIN_VA_START
7263 #define TARGET_EXPAND_BUILTIN_VA_START spu_va_start
7264 
7265 #undef TARGET_SETUP_INCOMING_VARARGS
7266 #define TARGET_SETUP_INCOMING_VARARGS spu_setup_incoming_varargs
7267 
7268 #undef TARGET_MACHINE_DEPENDENT_REORG
7269 #define TARGET_MACHINE_DEPENDENT_REORG spu_machine_dependent_reorg
7270 
7271 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
7272 #define TARGET_GIMPLIFY_VA_ARG_EXPR spu_gimplify_va_arg_expr
7273 
7274 #undef TARGET_INIT_LIBFUNCS
7275 #define TARGET_INIT_LIBFUNCS spu_init_libfuncs
7276 
7277 #undef TARGET_RETURN_IN_MEMORY
7278 #define TARGET_RETURN_IN_MEMORY spu_return_in_memory
7279 
7280 #undef  TARGET_ENCODE_SECTION_INFO
7281 #define TARGET_ENCODE_SECTION_INFO spu_encode_section_info
7282 
7283 #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
7284 #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD spu_builtin_mask_for_load
7285 
7286 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
7287 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST spu_builtin_vectorization_cost
7288 
7289 #undef TARGET_VECTORIZE_INIT_COST
7290 #define TARGET_VECTORIZE_INIT_COST spu_init_cost
7291 
7292 #undef TARGET_VECTORIZE_ADD_STMT_COST
7293 #define TARGET_VECTORIZE_ADD_STMT_COST spu_add_stmt_cost
7294 
7295 #undef TARGET_VECTORIZE_FINISH_COST
7296 #define TARGET_VECTORIZE_FINISH_COST spu_finish_cost
7297 
7298 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
7299 #define TARGET_VECTORIZE_DESTROY_COST_DATA spu_destroy_cost_data
7300 
7301 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
7302 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE spu_vector_alignment_reachable
7303 
7304 #undef TARGET_LIBGCC_CMP_RETURN_MODE
7305 #define TARGET_LIBGCC_CMP_RETURN_MODE spu_libgcc_cmp_return_mode
7306 
7307 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
7308 #define TARGET_LIBGCC_SHIFT_COUNT_MODE spu_libgcc_shift_count_mode
7309 
7310 #undef TARGET_SCHED_SMS_RES_MII
7311 #define TARGET_SCHED_SMS_RES_MII spu_sms_res_mii
7312 
7313 #undef TARGET_SECTION_TYPE_FLAGS
7314 #define TARGET_SECTION_TYPE_FLAGS spu_section_type_flags
7315 
7316 #undef TARGET_ASM_SELECT_SECTION
7317 #define TARGET_ASM_SELECT_SECTION  spu_select_section
7318 
7319 #undef TARGET_ASM_UNIQUE_SECTION
7320 #define TARGET_ASM_UNIQUE_SECTION  spu_unique_section
7321 
7322 #undef TARGET_LEGITIMATE_ADDRESS_P
7323 #define TARGET_LEGITIMATE_ADDRESS_P spu_legitimate_address_p
7324 
7325 #undef TARGET_LEGITIMATE_CONSTANT_P
7326 #define TARGET_LEGITIMATE_CONSTANT_P spu_legitimate_constant_p
7327 
7328 #undef TARGET_TRAMPOLINE_INIT
7329 #define TARGET_TRAMPOLINE_INIT spu_trampoline_init
7330 
7331 #undef TARGET_WARN_FUNC_RETURN
7332 #define TARGET_WARN_FUNC_RETURN spu_warn_func_return
7333 
7334 #undef TARGET_OPTION_OVERRIDE
7335 #define TARGET_OPTION_OVERRIDE spu_option_override
7336 
7337 #undef TARGET_CONDITIONAL_REGISTER_USAGE
7338 #define TARGET_CONDITIONAL_REGISTER_USAGE spu_conditional_register_usage
7339 
7340 #undef TARGET_REF_MAY_ALIAS_ERRNO
7341 #define TARGET_REF_MAY_ALIAS_ERRNO spu_ref_may_alias_errno
7342 
7343 #undef TARGET_ASM_OUTPUT_MI_THUNK
7344 #define TARGET_ASM_OUTPUT_MI_THUNK spu_output_mi_thunk
7345 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
7346 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
7347 
7348 /* Variable tracking should be run after all optimizations which
7349    change order of insns.  It also needs a valid CFG.  */
7350 #undef TARGET_DELAY_VARTRACK
7351 #define TARGET_DELAY_VARTRACK true
7352 
7353 #undef TARGET_CANONICALIZE_COMPARISON
7354 #define TARGET_CANONICALIZE_COMPARISON spu_canonicalize_comparison
7355 
7356 #undef TARGET_CAN_USE_DOLOOP_P
7357 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
7358 
7359 struct gcc_target targetm = TARGET_INITIALIZER;
7360 
7361 #include "gt-spu.h"
7362