1 /* Copyright (C) 2006-2019 Free Software Foundation, Inc.
2 
3    This file is free software; you can redistribute it and/or modify it under
4    the terms of the GNU General Public License as published by the Free
5    Software Foundation; either version 3 of the License, or (at your option)
6    any later version.
7 
8    This file is distributed in the hope that it will be useful, but WITHOUT
9    ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10    FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
11    for more details.
12 
13    You should have received a copy of the GNU General Public License
14    along with GCC; see the file COPYING3.  If not see
15    <http://www.gnu.org/licenses/>.  */
16 
17 #define IN_TARGET_CODE 1
18 
19 #include "config.h"
20 #include "system.h"
21 #include "coretypes.h"
22 #include "backend.h"
23 #include "target.h"
24 #include "rtl.h"
25 #include "tree.h"
26 #include "gimple.h"
27 #include "cfghooks.h"
28 #include "cfgloop.h"
29 #include "df.h"
30 #include "memmodel.h"
31 #include "tm_p.h"
32 #include "stringpool.h"
33 #include "attribs.h"
34 #include "expmed.h"
35 #include "optabs.h"
36 #include "regs.h"
37 #include "emit-rtl.h"
38 #include "recog.h"
39 #include "diagnostic-core.h"
40 #include "insn-attr.h"
41 #include "alias.h"
42 #include "fold-const.h"
43 #include "stor-layout.h"
44 #include "calls.h"
45 #include "varasm.h"
46 #include "explow.h"
47 #include "expr.h"
48 #include "output.h"
49 #include "cfgrtl.h"
50 #include "cfgbuild.h"
51 #include "langhooks.h"
52 #include "reload.h"
53 #include "sched-int.h"
54 #include "params.h"
55 #include "gimplify.h"
56 #include "tm-constrs.h"
57 #include "ddg.h"
58 #include "dumpfile.h"
59 #include "builtins.h"
60 #include "rtl-iter.h"
61 #include "flags.h"
62 #include "toplev.h"
63 
64 /* This file should be included last.  */
65 #include "target-def.h"
66 
67 /* Builtin types, data and prototypes. */
68 
69 enum spu_builtin_type_index
70 {
71   SPU_BTI_END_OF_PARAMS,
72 
73   /* We create new type nodes for these. */
74   SPU_BTI_V16QI,
75   SPU_BTI_V8HI,
76   SPU_BTI_V4SI,
77   SPU_BTI_V2DI,
78   SPU_BTI_V4SF,
79   SPU_BTI_V2DF,
80   SPU_BTI_UV16QI,
81   SPU_BTI_UV8HI,
82   SPU_BTI_UV4SI,
83   SPU_BTI_UV2DI,
84 
85   /* A 16-byte type. (Implemented with V16QI_type_node) */
86   SPU_BTI_QUADWORD,
87 
88   /* These all correspond to intSI_type_node */
89   SPU_BTI_7,
90   SPU_BTI_S7,
91   SPU_BTI_U7,
92   SPU_BTI_S10,
93   SPU_BTI_S10_4,
94   SPU_BTI_U14,
95   SPU_BTI_16,
96   SPU_BTI_S16,
97   SPU_BTI_S16_2,
98   SPU_BTI_U16,
99   SPU_BTI_U16_2,
100   SPU_BTI_U18,
101 
102   /* These correspond to the standard types */
103   SPU_BTI_INTQI,
104   SPU_BTI_INTHI,
105   SPU_BTI_INTSI,
106   SPU_BTI_INTDI,
107 
108   SPU_BTI_UINTQI,
109   SPU_BTI_UINTHI,
110   SPU_BTI_UINTSI,
111   SPU_BTI_UINTDI,
112 
113   SPU_BTI_FLOAT,
114   SPU_BTI_DOUBLE,
115 
116   SPU_BTI_VOID,
117   SPU_BTI_PTR,
118 
119   SPU_BTI_MAX
120 };
121 
122 #define V16QI_type_node               (spu_builtin_types[SPU_BTI_V16QI])
123 #define V8HI_type_node                (spu_builtin_types[SPU_BTI_V8HI])
124 #define V4SI_type_node                (spu_builtin_types[SPU_BTI_V4SI])
125 #define V2DI_type_node                (spu_builtin_types[SPU_BTI_V2DI])
126 #define V4SF_type_node                (spu_builtin_types[SPU_BTI_V4SF])
127 #define V2DF_type_node                (spu_builtin_types[SPU_BTI_V2DF])
128 #define unsigned_V16QI_type_node      (spu_builtin_types[SPU_BTI_UV16QI])
129 #define unsigned_V8HI_type_node       (spu_builtin_types[SPU_BTI_UV8HI])
130 #define unsigned_V4SI_type_node       (spu_builtin_types[SPU_BTI_UV4SI])
131 #define unsigned_V2DI_type_node       (spu_builtin_types[SPU_BTI_UV2DI])
132 
133 static GTY(()) tree spu_builtin_types[SPU_BTI_MAX];
134 
135 struct spu_builtin_range
136 {
137   int low, high;
138 };
139 
140 static struct spu_builtin_range spu_builtin_range[] = {
141   {-0x40ll, 0x7fll},		/* SPU_BTI_7     */
142   {-0x40ll, 0x3fll},		/* SPU_BTI_S7    */
143   {0ll, 0x7fll},		/* SPU_BTI_U7    */
144   {-0x200ll, 0x1ffll},		/* SPU_BTI_S10   */
145   {-0x2000ll, 0x1fffll},	/* SPU_BTI_S10_4 */
146   {0ll, 0x3fffll},		/* SPU_BTI_U14   */
147   {-0x8000ll, 0xffffll},	/* SPU_BTI_16    */
148   {-0x8000ll, 0x7fffll},	/* SPU_BTI_S16   */
149   {-0x20000ll, 0x1ffffll},	/* SPU_BTI_S16_2 */
150   {0ll, 0xffffll},		/* SPU_BTI_U16   */
151   {0ll, 0x3ffffll},		/* SPU_BTI_U16_2 */
152   {0ll, 0x3ffffll},		/* SPU_BTI_U18   */
153 };
154 
155 
156 /*  Target specific attribute specifications.  */
157 char regs_ever_allocated[FIRST_PSEUDO_REGISTER];
158 
159 /*  Prototypes and external defs.  */
160 static int get_pipe (rtx_insn *insn);
161 static int spu_naked_function_p (tree func);
162 static int mem_is_padded_component_ref (rtx x);
163 static void fix_range (const char *);
164 static rtx spu_expand_load (rtx, rtx, rtx, int);
165 
166 /* Which instruction set architecture to use.  */
167 int spu_arch;
168 /* Which cpu are we tuning for.  */
169 int spu_tune;
170 
171 /* The hardware requires 8 insns between a hint and the branch it
172    effects.  This variable describes how many rtl instructions the
173    compiler needs to see before inserting a hint, and then the compiler
174    will insert enough nops to make it at least 8 insns.  The default is
175    for the compiler to allow up to 2 nops be emitted.  The nops are
176    inserted in pairs, so we round down. */
177 int spu_hint_dist = (8*4) - (2*4);
178 
179 enum spu_immediate {
180   SPU_NONE,
181   SPU_IL,
182   SPU_ILA,
183   SPU_ILH,
184   SPU_ILHU,
185   SPU_ORI,
186   SPU_ORHI,
187   SPU_ORBI,
188   SPU_IOHL
189 };
190 enum immediate_class
191 {
192   IC_POOL,			/* constant pool */
193   IC_IL1,			/* one il* instruction */
194   IC_IL2,			/* both ilhu and iohl instructions */
195   IC_IL1s,			/* one il* instruction */
196   IC_IL2s,			/* both ilhu and iohl instructions */
197   IC_FSMBI,			/* the fsmbi instruction */
198   IC_CPAT,			/* one of the c*d instructions */
199   IC_FSMBI2			/* fsmbi plus 1 other instruction */
200 };
201 
202 static enum spu_immediate which_immediate_load (HOST_WIDE_INT val);
203 static enum spu_immediate which_logical_immediate (HOST_WIDE_INT val);
204 static int cpat_info(unsigned char *arr, int size, int *prun, int *pstart);
205 static enum immediate_class classify_immediate (rtx op,
206 						machine_mode mode);
207 
208 /* Pointer mode for __ea references.  */
209 #define EAmode (spu_ea_model != 32 ? DImode : SImode)
210 
211 
212 /* Define the structure for the machine field in struct function.  */
213 struct GTY(()) machine_function
214 {
215   /* Register to use for PIC accesses.  */
216   rtx pic_reg;
217 };
218 
219 /* How to allocate a 'struct machine_function'.  */
220 static struct machine_function *
spu_init_machine_status(void)221 spu_init_machine_status (void)
222 {
223   return ggc_cleared_alloc<machine_function> ();
224 }
225 
226 /* Implement TARGET_OPTION_OVERRIDE.  */
227 static void
spu_option_override(void)228 spu_option_override (void)
229 {
230   /* Set up function hooks.  */
231   init_machine_status = spu_init_machine_status;
232 
233   /* Small loops will be unpeeled at -O3.  For SPU it is more important
234      to keep code small by default.  */
235   if (!flag_unroll_loops && !flag_peel_loops)
236     maybe_set_param_value (PARAM_MAX_COMPLETELY_PEEL_TIMES, 4,
237 			   global_options.x_param_values,
238 			   global_options_set.x_param_values);
239 
240   flag_omit_frame_pointer = 1;
241 
242   /* Functions must be 8 byte aligned so we correctly handle dual issue */
243   parse_alignment_opts ();
244   if (align_functions.levels[0].get_value () < 8)
245     str_align_functions = "8";
246 
247   spu_hint_dist = 8*4 - spu_max_nops*4;
248   if (spu_hint_dist < 0)
249     spu_hint_dist = 0;
250 
251   if (spu_fixed_range_string)
252     fix_range (spu_fixed_range_string);
253 
254   /* Determine processor architectural level.  */
255   if (spu_arch_string)
256     {
257       if (strcmp (&spu_arch_string[0], "cell") == 0)
258         spu_arch = PROCESSOR_CELL;
259       else if (strcmp (&spu_arch_string[0], "celledp") == 0)
260         spu_arch = PROCESSOR_CELLEDP;
261       else
262 	error ("bad value (%s) for %<-march=%> switch", spu_arch_string);
263     }
264 
265   /* Determine processor to tune for.  */
266   if (spu_tune_string)
267     {
268       if (strcmp (&spu_tune_string[0], "cell") == 0)
269         spu_tune = PROCESSOR_CELL;
270       else if (strcmp (&spu_tune_string[0], "celledp") == 0)
271         spu_tune = PROCESSOR_CELLEDP;
272       else
273 	error ("bad value (%s) for %<-mtune=%> switch", spu_tune_string);
274     }
275 
276   /* Change defaults according to the processor architecture.  */
277   if (spu_arch == PROCESSOR_CELLEDP)
278     {
279       /* If no command line option has been otherwise specified, change
280 	 the default to -mno-safe-hints on celledp -- only the original
281 	 Cell/B.E. processors require this workaround.  */
282       if (!(target_flags_explicit & MASK_SAFE_HINTS))
283 	target_flags &= ~MASK_SAFE_HINTS;
284     }
285 
286   REAL_MODE_FORMAT (SFmode) = &spu_single_format;
287 }
288 
289 /* Implement TARGET_HARD_REGNO_NREGS.  */
290 
291 static unsigned int
spu_hard_regno_nregs(unsigned int,machine_mode mode)292 spu_hard_regno_nregs (unsigned int, machine_mode mode)
293 {
294   return CEIL (GET_MODE_BITSIZE (mode), MAX_FIXED_MODE_SIZE);
295 }
296 
297 /* Handle an attribute requiring a FUNCTION_DECL; arguments as in
298    struct attribute_spec.handler.  */
299 
300 /* True if MODE is valid for the target.  By "valid", we mean able to
301    be manipulated in non-trivial ways.  In particular, this means all
302    the arithmetic is supported.  */
303 static bool
spu_scalar_mode_supported_p(scalar_mode mode)304 spu_scalar_mode_supported_p (scalar_mode mode)
305 {
306   switch (mode)
307     {
308     case E_QImode:
309     case E_HImode:
310     case E_SImode:
311     case E_SFmode:
312     case E_DImode:
313     case E_TImode:
314     case E_DFmode:
315       return true;
316 
317     default:
318       return false;
319     }
320 }
321 
322 /* Similarly for vector modes.  "Supported" here is less strict.  At
323    least some operations are supported; need to check optabs or builtins
324    for further details.  */
325 static bool
spu_vector_mode_supported_p(machine_mode mode)326 spu_vector_mode_supported_p (machine_mode mode)
327 {
328   switch (mode)
329     {
330     case E_V16QImode:
331     case E_V8HImode:
332     case E_V4SImode:
333     case E_V2DImode:
334     case E_V4SFmode:
335     case E_V2DFmode:
336       return true;
337 
338     default:
339       return false;
340     }
341 }
342 
343 /* GCC assumes that in a paradoxical SUBREG the inner mode occupies the
344    least significant bytes of the outer mode.  This function returns
345    TRUE for the SUBREG's where this is correct.  */
346 int
valid_subreg(rtx op)347 valid_subreg (rtx op)
348 {
349   machine_mode om = GET_MODE (op);
350   machine_mode im = GET_MODE (SUBREG_REG (op));
351   return om != VOIDmode && im != VOIDmode
352     && (GET_MODE_SIZE (im) == GET_MODE_SIZE (om)
353 	|| (GET_MODE_SIZE (im) <= 4 && GET_MODE_SIZE (om) <= 4)
354 	|| (GET_MODE_SIZE (im) >= 16 && GET_MODE_SIZE (om) >= 16));
355 }
356 
357 /* When insv and ext[sz]v ar passed a TI SUBREG, we want to strip it off
358    and adjust the start offset.  */
359 static rtx
adjust_operand(rtx op,HOST_WIDE_INT * start)360 adjust_operand (rtx op, HOST_WIDE_INT * start)
361 {
362   machine_mode mode;
363   int op_size;
364   /* Strip any paradoxical SUBREG.  */
365   if (GET_CODE (op) == SUBREG
366       && (GET_MODE_BITSIZE (GET_MODE (op))
367 	  > GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)))))
368     {
369       if (start)
370 	*start -=
371 	  GET_MODE_BITSIZE (GET_MODE (op)) -
372 	  GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)));
373       op = SUBREG_REG (op);
374     }
375   /* If it is smaller than SI, assure a SUBREG */
376   op_size = GET_MODE_BITSIZE (GET_MODE (op));
377   if (op_size < 32)
378     {
379       if (start)
380 	*start += 32 - op_size;
381       op_size = 32;
382     }
383   /* If it is not a MODE_INT (and/or it is smaller than SI) add a SUBREG. */
384   mode = int_mode_for_size (op_size, 0).require ();
385   if (mode != GET_MODE (op))
386     op = gen_rtx_SUBREG (mode, op, 0);
387   return op;
388 }
389 
390 void
spu_expand_extv(rtx ops[],int unsignedp)391 spu_expand_extv (rtx ops[], int unsignedp)
392 {
393   rtx dst = ops[0], src = ops[1];
394   HOST_WIDE_INT width = INTVAL (ops[2]);
395   HOST_WIDE_INT start = INTVAL (ops[3]);
396   HOST_WIDE_INT align_mask;
397   rtx s0, s1, mask, r0;
398 
399   gcc_assert (REG_P (dst) && GET_MODE (dst) == TImode);
400 
401   if (MEM_P (src))
402     {
403       /* First, determine if we need 1 TImode load or 2.  We need only 1
404          if the bits being extracted do not cross the alignment boundary
405          as determined by the MEM and its address. */
406 
407       align_mask = -MEM_ALIGN (src);
408       if ((start & align_mask) == ((start + width - 1) & align_mask))
409 	{
410 	  /* Alignment is sufficient for 1 load. */
411 	  s0 = gen_reg_rtx (TImode);
412 	  r0 = spu_expand_load (s0, 0, src, start / 8);
413 	  start &= 7;
414 	  if (r0)
415 	    emit_insn (gen_rotqby_ti (s0, s0, r0));
416 	}
417       else
418 	{
419 	  /* Need 2 loads. */
420 	  s0 = gen_reg_rtx (TImode);
421 	  s1 = gen_reg_rtx (TImode);
422 	  r0 = spu_expand_load (s0, s1, src, start / 8);
423 	  start &= 7;
424 
425 	  gcc_assert (start + width <= 128);
426 	  if (r0)
427 	    {
428 	      rtx r1 = gen_reg_rtx (SImode);
429 	      mask = gen_reg_rtx (TImode);
430 	      emit_move_insn (mask, GEN_INT (-1));
431 	      emit_insn (gen_rotqby_ti (s0, s0, r0));
432 	      emit_insn (gen_rotqby_ti (s1, s1, r0));
433 	      if (GET_CODE (r0) == CONST_INT)
434 		r1 = GEN_INT (INTVAL (r0) & 15);
435 	      else
436 		emit_insn (gen_andsi3 (r1, r0, GEN_INT (15)));
437 	      emit_insn (gen_shlqby_ti (mask, mask, r1));
438 	      emit_insn (gen_selb (s0, s1, s0, mask));
439 	    }
440 	}
441 
442     }
443   else if (GET_CODE (src) == SUBREG)
444     {
445       rtx r = SUBREG_REG (src);
446       gcc_assert (REG_P (r) && SCALAR_INT_MODE_P (GET_MODE (r)));
447       s0 = gen_reg_rtx (TImode);
448       if (GET_MODE_SIZE (GET_MODE (r)) < GET_MODE_SIZE (TImode))
449 	emit_insn (gen_rtx_SET (s0, gen_rtx_ZERO_EXTEND (TImode, r)));
450       else
451 	emit_move_insn (s0, src);
452     }
453   else
454     {
455       gcc_assert (REG_P (src) && GET_MODE (src) == TImode);
456       s0 = gen_reg_rtx (TImode);
457       emit_move_insn (s0, src);
458     }
459 
460   /* Now s0 is TImode and contains the bits to extract at start. */
461 
462   if (start)
463     emit_insn (gen_rotlti3 (s0, s0, GEN_INT (start)));
464 
465   if (128 - width)
466     s0 = expand_shift (RSHIFT_EXPR, TImode, s0, 128 - width, s0, unsignedp);
467 
468   emit_move_insn (dst, s0);
469 }
470 
471 void
spu_expand_insv(rtx ops[])472 spu_expand_insv (rtx ops[])
473 {
474   HOST_WIDE_INT width = INTVAL (ops[1]);
475   HOST_WIDE_INT start = INTVAL (ops[2]);
476   unsigned HOST_WIDE_INT maskbits;
477   machine_mode dst_mode;
478   rtx dst = ops[0], src = ops[3];
479   int dst_size;
480   rtx mask;
481   rtx shift_reg;
482   int shift;
483 
484 
485   if (GET_CODE (ops[0]) == MEM)
486     dst = gen_reg_rtx (TImode);
487   else
488     dst = adjust_operand (dst, &start);
489   dst_mode = GET_MODE (dst);
490   dst_size = GET_MODE_BITSIZE (GET_MODE (dst));
491 
492   if (CONSTANT_P (src))
493     {
494       machine_mode m =
495 	(width <= 32 ? SImode : width <= 64 ? DImode : TImode);
496       src = force_reg (m, convert_to_mode (m, src, 0));
497     }
498   src = adjust_operand (src, 0);
499 
500   mask = gen_reg_rtx (dst_mode);
501   shift_reg = gen_reg_rtx (dst_mode);
502   shift = dst_size - start - width;
503 
504   /* It's not safe to use subreg here because the compiler assumes
505      that the SUBREG_REG is right justified in the SUBREG. */
506   convert_move (shift_reg, src, 1);
507 
508   if (shift > 0)
509     {
510       switch (dst_mode)
511 	{
512 	case E_SImode:
513 	  emit_insn (gen_ashlsi3 (shift_reg, shift_reg, GEN_INT (shift)));
514 	  break;
515 	case E_DImode:
516 	  emit_insn (gen_ashldi3 (shift_reg, shift_reg, GEN_INT (shift)));
517 	  break;
518 	case E_TImode:
519 	  emit_insn (gen_ashlti3 (shift_reg, shift_reg, GEN_INT (shift)));
520 	  break;
521 	default:
522 	  abort ();
523 	}
524     }
525   else if (shift < 0)
526     abort ();
527 
528   switch (dst_size)
529     {
530     case 32:
531       maskbits = (~(unsigned HOST_WIDE_INT)0 << (32 - width - start));
532       if (start)
533 	maskbits += ((unsigned HOST_WIDE_INT)1 << (32 - start));
534       emit_move_insn (mask, GEN_INT (maskbits));
535       break;
536     case 64:
537       maskbits = (~(unsigned HOST_WIDE_INT)0 << (64 - width - start));
538       if (start)
539 	maskbits += ((unsigned HOST_WIDE_INT)1 << (64 - start));
540       emit_move_insn (mask, GEN_INT (maskbits));
541       break;
542     case 128:
543       {
544 	unsigned char arr[16];
545 	int i = start / 8;
546 	memset (arr, 0, sizeof (arr));
547 	arr[i] = 0xff >> (start & 7);
548 	for (i++; i <= (start + width - 1) / 8; i++)
549 	  arr[i] = 0xff;
550 	arr[i - 1] &= 0xff << (7 - ((start + width - 1) & 7));
551 	emit_move_insn (mask, array_to_constant (TImode, arr));
552       }
553       break;
554     default:
555       abort ();
556     }
557   if (GET_CODE (ops[0]) == MEM)
558     {
559       rtx low = gen_reg_rtx (SImode);
560       rtx rotl = gen_reg_rtx (SImode);
561       rtx mask0 = gen_reg_rtx (TImode);
562       rtx addr;
563       rtx addr0;
564       rtx addr1;
565       rtx mem;
566 
567       addr = force_reg (Pmode, XEXP (ops[0], 0));
568       addr0 = gen_rtx_AND (Pmode, addr, GEN_INT (-16));
569       emit_insn (gen_andsi3 (low, addr, GEN_INT (15)));
570       emit_insn (gen_negsi2 (rotl, low));
571       emit_insn (gen_rotqby_ti (shift_reg, shift_reg, rotl));
572       emit_insn (gen_rotqmby_ti (mask0, mask, rotl));
573       mem = change_address (ops[0], TImode, addr0);
574       set_mem_alias_set (mem, 0);
575       emit_move_insn (dst, mem);
576       emit_insn (gen_selb (dst, dst, shift_reg, mask0));
577       if (start + width > MEM_ALIGN (ops[0]))
578 	{
579 	  rtx shl = gen_reg_rtx (SImode);
580 	  rtx mask1 = gen_reg_rtx (TImode);
581 	  rtx dst1 = gen_reg_rtx (TImode);
582 	  rtx mem1;
583 	  addr1 = plus_constant (Pmode, addr, 16);
584 	  addr1 = gen_rtx_AND (Pmode, addr1, GEN_INT (-16));
585 	  emit_insn (gen_subsi3 (shl, GEN_INT (16), low));
586 	  emit_insn (gen_shlqby_ti (mask1, mask, shl));
587 	  mem1 = change_address (ops[0], TImode, addr1);
588 	  set_mem_alias_set (mem1, 0);
589 	  emit_move_insn (dst1, mem1);
590 	  emit_insn (gen_selb (dst1, dst1, shift_reg, mask1));
591 	  emit_move_insn (mem1, dst1);
592 	}
593       emit_move_insn (mem, dst);
594     }
595   else
596     emit_insn (gen_selb (dst, copy_rtx (dst), shift_reg, mask));
597 }
598 
599 
600 int
spu_expand_block_move(rtx ops[])601 spu_expand_block_move (rtx ops[])
602 {
603   HOST_WIDE_INT bytes, align, offset;
604   rtx src, dst, sreg, dreg, target;
605   int i;
606   if (GET_CODE (ops[2]) != CONST_INT
607       || GET_CODE (ops[3]) != CONST_INT
608       || INTVAL (ops[2]) > (HOST_WIDE_INT) (MOVE_RATIO (optimize_insn_for_speed_p ()) * 8))
609     return 0;
610 
611   bytes = INTVAL (ops[2]);
612   align = INTVAL (ops[3]);
613 
614   if (bytes <= 0)
615     return 1;
616 
617   dst = ops[0];
618   src = ops[1];
619 
620   if (align == 16)
621     {
622       for (offset = 0; offset + 16 <= bytes; offset += 16)
623 	{
624 	  dst = adjust_address (ops[0], V16QImode, offset);
625 	  src = adjust_address (ops[1], V16QImode, offset);
626 	  emit_move_insn (dst, src);
627 	}
628       if (offset < bytes)
629 	{
630 	  rtx mask;
631 	  unsigned char arr[16] = { 0 };
632 	  for (i = 0; i < bytes - offset; i++)
633 	    arr[i] = 0xff;
634 	  dst = adjust_address (ops[0], V16QImode, offset);
635 	  src = adjust_address (ops[1], V16QImode, offset);
636 	  mask = gen_reg_rtx (V16QImode);
637 	  sreg = gen_reg_rtx (V16QImode);
638 	  dreg = gen_reg_rtx (V16QImode);
639 	  target = gen_reg_rtx (V16QImode);
640 	  emit_move_insn (mask, array_to_constant (V16QImode, arr));
641 	  emit_move_insn (dreg, dst);
642 	  emit_move_insn (sreg, src);
643 	  emit_insn (gen_selb (target, dreg, sreg, mask));
644 	  emit_move_insn (dst, target);
645 	}
646       return 1;
647     }
648   return 0;
649 }
650 
651 enum spu_comp_code
652 { SPU_EQ, SPU_GT, SPU_GTU };
653 
654 int spu_comp_icode[12][3] = {
655  {CODE_FOR_ceq_qi, CODE_FOR_cgt_qi, CODE_FOR_clgt_qi},
656  {CODE_FOR_ceq_hi, CODE_FOR_cgt_hi, CODE_FOR_clgt_hi},
657  {CODE_FOR_ceq_si, CODE_FOR_cgt_si, CODE_FOR_clgt_si},
658  {CODE_FOR_ceq_di, CODE_FOR_cgt_di, CODE_FOR_clgt_di},
659  {CODE_FOR_ceq_ti, CODE_FOR_cgt_ti, CODE_FOR_clgt_ti},
660  {CODE_FOR_ceq_sf, CODE_FOR_cgt_sf, 0},
661  {CODE_FOR_ceq_df, CODE_FOR_cgt_df, 0},
662  {CODE_FOR_ceq_v16qi, CODE_FOR_cgt_v16qi, CODE_FOR_clgt_v16qi},
663  {CODE_FOR_ceq_v8hi,  CODE_FOR_cgt_v8hi,  CODE_FOR_clgt_v8hi},
664  {CODE_FOR_ceq_v4si,  CODE_FOR_cgt_v4si,  CODE_FOR_clgt_v4si},
665  {CODE_FOR_ceq_v4sf,  CODE_FOR_cgt_v4sf, 0},
666  {CODE_FOR_ceq_v2df,  CODE_FOR_cgt_v2df, 0},
667 };
668 
669 /* Generate a compare for CODE.  Return a brand-new rtx that represents
670    the result of the compare.   GCC can figure this out too if we don't
671    provide all variations of compares, but GCC always wants to use
672    WORD_MODE, we can generate better code in most cases if we do it
673    ourselves.  */
674 void
spu_emit_branch_or_set(int is_set,rtx cmp,rtx operands[])675 spu_emit_branch_or_set (int is_set, rtx cmp, rtx operands[])
676 {
677   int reverse_compare = 0;
678   int reverse_test = 0;
679   rtx compare_result, eq_result;
680   rtx comp_rtx, eq_rtx;
681   machine_mode comp_mode;
682   machine_mode op_mode;
683   enum spu_comp_code scode, eq_code;
684   enum insn_code ior_code;
685   enum rtx_code code = GET_CODE (cmp);
686   rtx op0 = XEXP (cmp, 0);
687   rtx op1 = XEXP (cmp, 1);
688   int index;
689   int eq_test = 0;
690 
691   /* When op1 is a CONST_INT change (X >= C) to (X > C-1),
692      and so on, to keep the constant in operand 1. */
693   if (GET_CODE (op1) == CONST_INT)
694     {
695       HOST_WIDE_INT val = INTVAL (op1) - 1;
696       if (trunc_int_for_mode (val, GET_MODE (op0)) == val)
697 	switch (code)
698 	  {
699 	  case GE:
700 	    op1 = GEN_INT (val);
701 	    code = GT;
702 	    break;
703 	  case LT:
704 	    op1 = GEN_INT (val);
705 	    code = LE;
706 	    break;
707 	  case GEU:
708 	    op1 = GEN_INT (val);
709 	    code = GTU;
710 	    break;
711 	  case LTU:
712 	    op1 = GEN_INT (val);
713 	    code = LEU;
714 	    break;
715 	  default:
716 	    break;
717 	  }
718     }
719 
720   /* However, if we generate an integer result, performing a reverse test
721      would require an extra negation, so avoid that where possible.  */
722   if (GET_CODE (op1) == CONST_INT && is_set == 1)
723     {
724       HOST_WIDE_INT val = INTVAL (op1) + 1;
725       if (trunc_int_for_mode (val, GET_MODE (op0)) == val)
726 	switch (code)
727 	  {
728 	  case LE:
729 	    op1 = GEN_INT (val);
730 	    code = LT;
731 	    break;
732 	  case LEU:
733 	    op1 = GEN_INT (val);
734 	    code = LTU;
735 	    break;
736 	  default:
737 	    break;
738 	  }
739     }
740 
741   comp_mode = SImode;
742   op_mode = GET_MODE (op0);
743 
744   switch (code)
745     {
746     case GE:
747       scode = SPU_GT;
748       if (HONOR_NANS (op_mode))
749 	{
750 	  reverse_compare = 0;
751 	  reverse_test = 0;
752 	  eq_test = 1;
753 	  eq_code = SPU_EQ;
754 	}
755       else
756 	{
757 	  reverse_compare = 1;
758 	  reverse_test = 1;
759 	}
760       break;
761     case LE:
762       scode = SPU_GT;
763       if (HONOR_NANS (op_mode))
764 	{
765 	  reverse_compare = 1;
766 	  reverse_test = 0;
767 	  eq_test = 1;
768 	  eq_code = SPU_EQ;
769 	}
770       else
771 	{
772 	  reverse_compare = 0;
773 	  reverse_test = 1;
774 	}
775       break;
776     case LT:
777       reverse_compare = 1;
778       reverse_test = 0;
779       scode = SPU_GT;
780       break;
781     case GEU:
782       reverse_compare = 1;
783       reverse_test = 1;
784       scode = SPU_GTU;
785       break;
786     case LEU:
787       reverse_compare = 0;
788       reverse_test = 1;
789       scode = SPU_GTU;
790       break;
791     case LTU:
792       reverse_compare = 1;
793       reverse_test = 0;
794       scode = SPU_GTU;
795       break;
796     case NE:
797       reverse_compare = 0;
798       reverse_test = 1;
799       scode = SPU_EQ;
800       break;
801 
802     case EQ:
803       scode = SPU_EQ;
804       break;
805     case GT:
806       scode = SPU_GT;
807       break;
808     case GTU:
809       scode = SPU_GTU;
810       break;
811     default:
812       scode = SPU_EQ;
813       break;
814     }
815 
816   switch (op_mode)
817     {
818     case E_QImode:
819       index = 0;
820       comp_mode = QImode;
821       break;
822     case E_HImode:
823       index = 1;
824       comp_mode = HImode;
825       break;
826     case E_SImode:
827       index = 2;
828       break;
829     case E_DImode:
830       index = 3;
831       break;
832     case E_TImode:
833       index = 4;
834       break;
835     case E_SFmode:
836       index = 5;
837       break;
838     case E_DFmode:
839       index = 6;
840       break;
841     case E_V16QImode:
842       index = 7;
843       comp_mode = op_mode;
844       break;
845     case E_V8HImode:
846       index = 8;
847       comp_mode = op_mode;
848       break;
849     case E_V4SImode:
850       index = 9;
851       comp_mode = op_mode;
852       break;
853     case E_V4SFmode:
854       index = 10;
855       comp_mode = V4SImode;
856       break;
857     case E_V2DFmode:
858       index = 11;
859       comp_mode = V2DImode;
860       break;
861     case E_V2DImode:
862     default:
863       abort ();
864     }
865 
866   if (GET_MODE (op1) == DFmode
867       && (scode != SPU_GT && scode != SPU_EQ))
868     abort ();
869 
870   if (is_set == 0 && op1 == const0_rtx
871       && (GET_MODE (op0) == SImode
872 	  || GET_MODE (op0) == HImode
873 	  || GET_MODE (op0) == QImode) && scode == SPU_EQ)
874     {
875       /* Don't need to set a register with the result when we are
876          comparing against zero and branching. */
877       reverse_test = !reverse_test;
878       compare_result = op0;
879     }
880   else
881     {
882       compare_result = gen_reg_rtx (comp_mode);
883 
884       if (reverse_compare)
885 	{
886 	  rtx t = op1;
887 	  op1 = op0;
888 	  op0 = t;
889 	}
890 
891       if (spu_comp_icode[index][scode] == 0)
892 	abort ();
893 
894       if (!(*insn_data[spu_comp_icode[index][scode]].operand[1].predicate)
895 	  (op0, op_mode))
896 	op0 = force_reg (op_mode, op0);
897       if (!(*insn_data[spu_comp_icode[index][scode]].operand[2].predicate)
898 	  (op1, op_mode))
899 	op1 = force_reg (op_mode, op1);
900       comp_rtx = GEN_FCN (spu_comp_icode[index][scode]) (compare_result,
901 							 op0, op1);
902       if (comp_rtx == 0)
903 	abort ();
904       emit_insn (comp_rtx);
905 
906       if (eq_test)
907         {
908           eq_result = gen_reg_rtx (comp_mode);
909           eq_rtx = GEN_FCN (spu_comp_icode[index][eq_code]) (eq_result,
910 							     op0, op1);
911           if (eq_rtx == 0)
912 	    abort ();
913           emit_insn (eq_rtx);
914           ior_code = optab_handler (ior_optab, comp_mode);
915           gcc_assert (ior_code != CODE_FOR_nothing);
916           emit_insn (GEN_FCN (ior_code)
917 		     (compare_result, compare_result, eq_result));
918         }
919     }
920 
921   if (is_set == 0)
922     {
923       rtx bcomp;
924       rtx loc_ref;
925 
926       /* We don't have branch on QI compare insns, so we convert the
927          QI compare result to a HI result. */
928       if (comp_mode == QImode)
929 	{
930 	  rtx old_res = compare_result;
931 	  compare_result = gen_reg_rtx (HImode);
932 	  comp_mode = HImode;
933 	  emit_insn (gen_extendqihi2 (compare_result, old_res));
934 	}
935 
936       if (reverse_test)
937 	bcomp = gen_rtx_EQ (comp_mode, compare_result, const0_rtx);
938       else
939 	bcomp = gen_rtx_NE (comp_mode, compare_result, const0_rtx);
940 
941       loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands[3]);
942       emit_jump_insn (gen_rtx_SET (pc_rtx,
943 				   gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
944 							 loc_ref, pc_rtx)));
945     }
946   else if (is_set == 2)
947     {
948       rtx target = operands[0];
949       int compare_size = GET_MODE_BITSIZE (comp_mode);
950       int target_size = GET_MODE_BITSIZE (GET_MODE (target));
951       machine_mode mode = int_mode_for_size (target_size, 0).require ();
952       rtx select_mask;
953       rtx op_t = operands[2];
954       rtx op_f = operands[3];
955 
956       /* The result of the comparison can be SI, HI or QI mode.  Create a
957          mask based on that result. */
958       if (target_size > compare_size)
959 	{
960 	  select_mask = gen_reg_rtx (mode);
961 	  emit_insn (gen_extend_compare (select_mask, compare_result));
962 	}
963       else if (target_size < compare_size)
964 	select_mask =
965 	  gen_rtx_SUBREG (mode, compare_result,
966 			  (compare_size - target_size) / BITS_PER_UNIT);
967       else if (comp_mode != mode)
968 	select_mask = gen_rtx_SUBREG (mode, compare_result, 0);
969       else
970 	select_mask = compare_result;
971 
972       if (GET_MODE (target) != GET_MODE (op_t)
973 	  || GET_MODE (target) != GET_MODE (op_f))
974 	abort ();
975 
976       if (reverse_test)
977 	emit_insn (gen_selb (target, op_t, op_f, select_mask));
978       else
979 	emit_insn (gen_selb (target, op_f, op_t, select_mask));
980     }
981   else
982     {
983       rtx target = operands[0];
984       if (reverse_test)
985 	emit_insn (gen_rtx_SET (compare_result,
986 				gen_rtx_NOT (comp_mode, compare_result)));
987       if (GET_MODE (target) == SImode && GET_MODE (compare_result) == HImode)
988 	emit_insn (gen_extendhisi2 (target, compare_result));
989       else if (GET_MODE (target) == SImode
990 	       && GET_MODE (compare_result) == QImode)
991 	emit_insn (gen_extend_compare (target, compare_result));
992       else
993 	emit_move_insn (target, compare_result);
994     }
995 }
996 
997 HOST_WIDE_INT
const_double_to_hwint(rtx x)998 const_double_to_hwint (rtx x)
999 {
1000   HOST_WIDE_INT val;
1001   if (GET_MODE (x) == SFmode)
1002     REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), val);
1003   else if (GET_MODE (x) == DFmode)
1004     {
1005       long l[2];
1006       REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), l);
1007       val = l[0];
1008       val = (val << 32) | (l[1] & 0xffffffff);
1009     }
1010   else
1011     abort ();
1012   return val;
1013 }
1014 
1015 rtx
hwint_to_const_double(machine_mode mode,HOST_WIDE_INT v)1016 hwint_to_const_double (machine_mode mode, HOST_WIDE_INT v)
1017 {
1018   long tv[2];
1019   REAL_VALUE_TYPE rv;
1020   gcc_assert (mode == SFmode || mode == DFmode);
1021 
1022   if (mode == SFmode)
1023     tv[0] = (v << 32) >> 32;
1024   else if (mode == DFmode)
1025     {
1026       tv[1] = (v << 32) >> 32;
1027       tv[0] = v >> 32;
1028     }
1029   real_from_target (&rv, tv, mode);
1030   return const_double_from_real_value (rv, mode);
1031 }
1032 
1033 void
print_operand_address(FILE * file,register rtx addr)1034 print_operand_address (FILE * file, register rtx addr)
1035 {
1036   rtx reg;
1037   rtx offset;
1038 
1039   if (GET_CODE (addr) == AND
1040       && GET_CODE (XEXP (addr, 1)) == CONST_INT
1041       && INTVAL (XEXP (addr, 1)) == -16)
1042     addr = XEXP (addr, 0);
1043 
1044   switch (GET_CODE (addr))
1045     {
1046     case REG:
1047       fprintf (file, "0(%s)", reg_names[REGNO (addr)]);
1048       break;
1049 
1050     case PLUS:
1051       reg = XEXP (addr, 0);
1052       offset = XEXP (addr, 1);
1053       if (GET_CODE (offset) == REG)
1054 	{
1055 	  fprintf (file, "%s,%s", reg_names[REGNO (reg)],
1056 		   reg_names[REGNO (offset)]);
1057 	}
1058       else if (GET_CODE (offset) == CONST_INT)
1059 	{
1060 	  fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
1061 		   INTVAL (offset), reg_names[REGNO (reg)]);
1062 	}
1063       else
1064 	abort ();
1065       break;
1066 
1067     case CONST:
1068     case LABEL_REF:
1069     case SYMBOL_REF:
1070     case CONST_INT:
1071       output_addr_const (file, addr);
1072       break;
1073 
1074     default:
1075       debug_rtx (addr);
1076       abort ();
1077     }
1078 }
1079 
1080 void
print_operand(FILE * file,rtx x,int code)1081 print_operand (FILE * file, rtx x, int code)
1082 {
1083   machine_mode mode = GET_MODE (x);
1084   HOST_WIDE_INT val;
1085   unsigned char arr[16];
1086   int xcode = GET_CODE (x);
1087   int i, info;
1088   if (GET_MODE (x) == VOIDmode)
1089     switch (code)
1090       {
1091       case 'L':			/* 128 bits, signed */
1092       case 'm':			/* 128 bits, signed */
1093       case 'T':			/* 128 bits, signed */
1094       case 't':			/* 128 bits, signed */
1095 	mode = TImode;
1096 	break;
1097       case 'K':			/* 64 bits, signed */
1098       case 'k':			/* 64 bits, signed */
1099       case 'D':			/* 64 bits, signed */
1100       case 'd':			/* 64 bits, signed */
1101 	mode = DImode;
1102 	break;
1103       case 'J':			/* 32 bits, signed */
1104       case 'j':			/* 32 bits, signed */
1105       case 's':			/* 32 bits, signed */
1106       case 'S':			/* 32 bits, signed */
1107 	mode = SImode;
1108 	break;
1109       }
1110   switch (code)
1111     {
1112 
1113     case 'j':			/* 32 bits, signed */
1114     case 'k':			/* 64 bits, signed */
1115     case 'm':			/* 128 bits, signed */
1116       if (xcode == CONST_INT
1117 	  || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1118 	{
1119 	  gcc_assert (logical_immediate_p (x, mode));
1120 	  constant_to_array (mode, x, arr);
1121 	  val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1122 	  val = trunc_int_for_mode (val, SImode);
1123 	  switch (which_logical_immediate (val))
1124 	  {
1125 	  case SPU_ORI:
1126 	    break;
1127 	  case SPU_ORHI:
1128 	    fprintf (file, "h");
1129 	    break;
1130 	  case SPU_ORBI:
1131 	    fprintf (file, "b");
1132 	    break;
1133 	  default:
1134 	    gcc_unreachable();
1135 	  }
1136 	}
1137       else
1138 	gcc_unreachable();
1139       return;
1140 
1141     case 'J':			/* 32 bits, signed */
1142     case 'K':			/* 64 bits, signed */
1143     case 'L':			/* 128 bits, signed */
1144       if (xcode == CONST_INT
1145 	  || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1146 	{
1147 	  gcc_assert (logical_immediate_p (x, mode)
1148 		      || iohl_immediate_p (x, mode));
1149 	  constant_to_array (mode, x, arr);
1150 	  val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1151 	  val = trunc_int_for_mode (val, SImode);
1152 	  switch (which_logical_immediate (val))
1153 	  {
1154 	  case SPU_ORI:
1155 	  case SPU_IOHL:
1156 	    break;
1157 	  case SPU_ORHI:
1158 	    val = trunc_int_for_mode (val, HImode);
1159 	    break;
1160 	  case SPU_ORBI:
1161 	    val = trunc_int_for_mode (val, QImode);
1162 	    break;
1163 	  default:
1164 	    gcc_unreachable();
1165 	  }
1166 	  fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1167 	}
1168       else
1169 	gcc_unreachable();
1170       return;
1171 
1172     case 't':			/* 128 bits, signed */
1173     case 'd':			/* 64 bits, signed */
1174     case 's':			/* 32 bits, signed */
1175       if (CONSTANT_P (x))
1176 	{
1177 	  enum immediate_class c = classify_immediate (x, mode);
1178 	  switch (c)
1179 	    {
1180 	    case IC_IL1:
1181 	      constant_to_array (mode, x, arr);
1182 	      val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1183 	      val = trunc_int_for_mode (val, SImode);
1184 	      switch (which_immediate_load (val))
1185 		{
1186 		case SPU_IL:
1187 		  break;
1188 		case SPU_ILA:
1189 		  fprintf (file, "a");
1190 		  break;
1191 		case SPU_ILH:
1192 		  fprintf (file, "h");
1193 		  break;
1194 		case SPU_ILHU:
1195 		  fprintf (file, "hu");
1196 		  break;
1197 		default:
1198 		  gcc_unreachable ();
1199 		}
1200 	      break;
1201 	    case IC_CPAT:
1202 	      constant_to_array (mode, x, arr);
1203 	      cpat_info (arr, GET_MODE_SIZE (mode), &info, 0);
1204 	      if (info == 1)
1205 		fprintf (file, "b");
1206 	      else if (info == 2)
1207 		fprintf (file, "h");
1208 	      else if (info == 4)
1209 		fprintf (file, "w");
1210 	      else if (info == 8)
1211 		fprintf (file, "d");
1212 	      break;
1213 	    case IC_IL1s:
1214 	      if (xcode == CONST_VECTOR)
1215 		{
1216 		  x = CONST_VECTOR_ELT (x, 0);
1217 		  xcode = GET_CODE (x);
1218 		}
1219 	      if (xcode == SYMBOL_REF || xcode == LABEL_REF || xcode == CONST)
1220 		fprintf (file, "a");
1221 	      else if (xcode == HIGH)
1222 		fprintf (file, "hu");
1223 	      break;
1224 	    case IC_FSMBI:
1225 	    case IC_FSMBI2:
1226 	    case IC_IL2:
1227 	    case IC_IL2s:
1228 	    case IC_POOL:
1229 	      abort ();
1230 	    }
1231 	}
1232       else
1233 	gcc_unreachable ();
1234       return;
1235 
1236     case 'T':			/* 128 bits, signed */
1237     case 'D':			/* 64 bits, signed */
1238     case 'S':			/* 32 bits, signed */
1239       if (CONSTANT_P (x))
1240 	{
1241 	  enum immediate_class c = classify_immediate (x, mode);
1242 	  switch (c)
1243 	    {
1244 	    case IC_IL1:
1245 	      constant_to_array (mode, x, arr);
1246 	      val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1247 	      val = trunc_int_for_mode (val, SImode);
1248 	      switch (which_immediate_load (val))
1249 		{
1250 		case SPU_IL:
1251 		case SPU_ILA:
1252 		  break;
1253 		case SPU_ILH:
1254 		case SPU_ILHU:
1255 		  val = trunc_int_for_mode (((arr[0] << 8) | arr[1]), HImode);
1256 		  break;
1257 		default:
1258 		  gcc_unreachable ();
1259 		}
1260 	      fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1261 	      break;
1262 	    case IC_FSMBI:
1263 	      constant_to_array (mode, x, arr);
1264 	      val = 0;
1265 	      for (i = 0; i < 16; i++)
1266 		{
1267 		  val <<= 1;
1268 		  val |= arr[i] & 1;
1269 		}
1270 	      print_operand (file, GEN_INT (val), 0);
1271 	      break;
1272 	    case IC_CPAT:
1273 	      constant_to_array (mode, x, arr);
1274 	      cpat_info (arr, GET_MODE_SIZE (mode), 0, &info);
1275 	      fprintf (file, HOST_WIDE_INT_PRINT_DEC, (HOST_WIDE_INT)info);
1276 	      break;
1277 	    case IC_IL1s:
1278 	      if (xcode == HIGH)
1279 		x = XEXP (x, 0);
1280 	      if (GET_CODE (x) == CONST_VECTOR)
1281 		x = CONST_VECTOR_ELT (x, 0);
1282 	      output_addr_const (file, x);
1283 	      if (xcode == HIGH)
1284 		fprintf (file, "@h");
1285 	      break;
1286 	    case IC_IL2:
1287 	    case IC_IL2s:
1288 	    case IC_FSMBI2:
1289 	    case IC_POOL:
1290 	      abort ();
1291 	    }
1292 	}
1293       else
1294 	gcc_unreachable ();
1295       return;
1296 
1297     case 'C':
1298       if (xcode == CONST_INT)
1299 	{
1300 	  /* Only 4 least significant bits are relevant for generate
1301 	     control word instructions. */
1302 	  fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 15);
1303 	  return;
1304 	}
1305       break;
1306 
1307     case 'M':			/* print code for c*d */
1308       if (GET_CODE (x) == CONST_INT)
1309 	switch (INTVAL (x))
1310 	  {
1311 	  case 1:
1312 	    fprintf (file, "b");
1313 	    break;
1314 	  case 2:
1315 	    fprintf (file, "h");
1316 	    break;
1317 	  case 4:
1318 	    fprintf (file, "w");
1319 	    break;
1320 	  case 8:
1321 	    fprintf (file, "d");
1322 	    break;
1323 	  default:
1324 	    gcc_unreachable();
1325 	  }
1326       else
1327 	gcc_unreachable();
1328       return;
1329 
1330     case 'N':			/* Negate the operand */
1331       if (xcode == CONST_INT)
1332 	fprintf (file, HOST_WIDE_INT_PRINT_DEC, -INTVAL (x));
1333       else if (xcode == CONST_VECTOR)
1334 	fprintf (file, HOST_WIDE_INT_PRINT_DEC,
1335 		 -INTVAL (CONST_VECTOR_ELT (x, 0)));
1336       return;
1337 
1338     case 'I':			/* enable/disable interrupts */
1339       if (xcode == CONST_INT)
1340 	fprintf (file, "%s",  INTVAL (x) == 0 ? "d" : "e");
1341       return;
1342 
1343     case 'b':			/* branch modifiers */
1344       if (xcode == REG)
1345 	fprintf (file, "%s", GET_MODE (x) == HImode ? "h" : "");
1346       else if (COMPARISON_P (x))
1347 	fprintf (file, "%s", xcode == NE ? "n" : "");
1348       return;
1349 
1350     case 'i':			/* indirect call */
1351       if (xcode == MEM)
1352 	{
1353 	  if (GET_CODE (XEXP (x, 0)) == REG)
1354 	    /* Used in indirect function calls. */
1355 	    fprintf (file, "%s", reg_names[REGNO (XEXP (x, 0))]);
1356 	  else
1357 	    output_address (GET_MODE (x), XEXP (x, 0));
1358 	}
1359       return;
1360 
1361     case 'p':			/* load/store */
1362       if (xcode == MEM)
1363 	{
1364 	  x = XEXP (x, 0);
1365 	  xcode = GET_CODE (x);
1366 	}
1367       if (xcode == AND)
1368 	{
1369 	  x = XEXP (x, 0);
1370 	  xcode = GET_CODE (x);
1371 	}
1372       if (xcode == REG)
1373 	fprintf (file, "d");
1374       else if (xcode == CONST_INT)
1375 	fprintf (file, "a");
1376       else if (xcode == CONST || xcode == SYMBOL_REF || xcode == LABEL_REF)
1377 	fprintf (file, "r");
1378       else if (xcode == PLUS || xcode == LO_SUM)
1379 	{
1380 	  if (GET_CODE (XEXP (x, 1)) == REG)
1381 	    fprintf (file, "x");
1382 	  else
1383 	    fprintf (file, "d");
1384 	}
1385       return;
1386 
1387     case 'e':
1388       val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1389       val &= 0x7;
1390       output_addr_const (file, GEN_INT (val));
1391       return;
1392 
1393     case 'f':
1394       val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1395       val &= 0x1f;
1396       output_addr_const (file, GEN_INT (val));
1397       return;
1398 
1399     case 'g':
1400       val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1401       val &= 0x3f;
1402       output_addr_const (file, GEN_INT (val));
1403       return;
1404 
1405     case 'h':
1406       val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1407       val = (val >> 3) & 0x1f;
1408       output_addr_const (file, GEN_INT (val));
1409       return;
1410 
1411     case 'E':
1412       val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1413       val = -val;
1414       val &= 0x7;
1415       output_addr_const (file, GEN_INT (val));
1416       return;
1417 
1418     case 'F':
1419       val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1420       val = -val;
1421       val &= 0x1f;
1422       output_addr_const (file, GEN_INT (val));
1423       return;
1424 
1425     case 'G':
1426       val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1427       val = -val;
1428       val &= 0x3f;
1429       output_addr_const (file, GEN_INT (val));
1430       return;
1431 
1432     case 'H':
1433       val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1434       val = -(val & -8ll);
1435       val = (val >> 3) & 0x1f;
1436       output_addr_const (file, GEN_INT (val));
1437       return;
1438 
1439     case 'v':
1440     case 'w':
1441       constant_to_array (mode, x, arr);
1442       val = (((arr[0] << 1) + (arr[1] >> 7)) & 0xff) - 127;
1443       output_addr_const (file, GEN_INT (code == 'w' ? -val : val));
1444       return;
1445 
1446     case 0:
1447       if (xcode == REG)
1448 	fprintf (file, "%s", reg_names[REGNO (x)]);
1449       else if (xcode == MEM)
1450 	output_address (GET_MODE (x), XEXP (x, 0));
1451       else if (xcode == CONST_VECTOR)
1452 	print_operand (file, CONST_VECTOR_ELT (x, 0), 0);
1453       else
1454 	output_addr_const (file, x);
1455       return;
1456 
1457       /* unused letters
1458 	              o qr  u   yz
1459 	AB            OPQR  UVWXYZ */
1460     default:
1461       output_operand_lossage ("invalid %%xn code");
1462     }
1463   gcc_unreachable ();
1464 }
1465 
1466 /* For PIC mode we've reserved PIC_OFFSET_TABLE_REGNUM, which is a
1467    caller saved register.  For leaf functions it is more efficient to
1468    use a volatile register because we won't need to save and restore the
1469    pic register.  This routine is only valid after register allocation
1470    is completed, so we can pick an unused register.  */
1471 static rtx
get_pic_reg(void)1472 get_pic_reg (void)
1473 {
1474   if (!reload_completed && !reload_in_progress)
1475     abort ();
1476 
1477   /* If we've already made the decision, we need to keep with it.  Once we've
1478      decided to use LAST_ARG_REGNUM, future calls to df_regs_ever_live_p may
1479      return true since the register is now live; this should not cause us to
1480      "switch back" to using pic_offset_table_rtx.  */
1481   if (!cfun->machine->pic_reg)
1482     {
1483       if (crtl->is_leaf && !df_regs_ever_live_p (LAST_ARG_REGNUM))
1484 	cfun->machine->pic_reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
1485       else
1486 	cfun->machine->pic_reg = pic_offset_table_rtx;
1487     }
1488 
1489   return cfun->machine->pic_reg;
1490 }
1491 
1492 /* Split constant addresses to handle cases that are too large.
1493    Add in the pic register when in PIC mode.
1494    Split immediates that require more than 1 instruction. */
1495 int
spu_split_immediate(rtx * ops)1496 spu_split_immediate (rtx * ops)
1497 {
1498   machine_mode mode = GET_MODE (ops[0]);
1499   enum immediate_class c = classify_immediate (ops[1], mode);
1500 
1501   switch (c)
1502     {
1503     case IC_IL2:
1504       {
1505 	unsigned char arrhi[16];
1506 	unsigned char arrlo[16];
1507 	rtx to, temp, hi, lo;
1508 	int i;
1509 	/* We need to do reals as ints because the constant used in the
1510 	   IOR might not be a legitimate real constant. */
1511 	scalar_int_mode imode = int_mode_for_mode (mode).require ();
1512 	constant_to_array (mode, ops[1], arrhi);
1513 	if (imode != mode)
1514 	  to = simplify_gen_subreg (imode, ops[0], mode, 0);
1515 	else
1516 	  to = ops[0];
1517 	temp = !can_create_pseudo_p () ? to : gen_reg_rtx (imode);
1518 	for (i = 0; i < 16; i += 4)
1519 	  {
1520 	    arrlo[i + 2] = arrhi[i + 2];
1521 	    arrlo[i + 3] = arrhi[i + 3];
1522 	    arrlo[i + 0] = arrlo[i + 1] = 0;
1523 	    arrhi[i + 2] = arrhi[i + 3] = 0;
1524 	  }
1525 	hi = array_to_constant (imode, arrhi);
1526 	lo = array_to_constant (imode, arrlo);
1527 	emit_move_insn (temp, hi);
1528 	emit_insn (gen_rtx_SET (to, gen_rtx_IOR (imode, temp, lo)));
1529 	return 1;
1530       }
1531     case IC_FSMBI2:
1532       {
1533 	unsigned char arr_fsmbi[16];
1534 	unsigned char arr_andbi[16];
1535 	rtx to, reg_fsmbi, reg_and;
1536 	int i;
1537 	/* We need to do reals as ints because the constant used in the
1538 	 * AND might not be a legitimate real constant. */
1539 	scalar_int_mode imode = int_mode_for_mode (mode).require ();
1540 	constant_to_array (mode, ops[1], arr_fsmbi);
1541 	if (imode != mode)
1542 	  to = simplify_gen_subreg(imode, ops[0], GET_MODE (ops[0]), 0);
1543 	else
1544 	  to = ops[0];
1545 	for (i = 0; i < 16; i++)
1546 	  if (arr_fsmbi[i] != 0)
1547 	    {
1548 	      arr_andbi[0] = arr_fsmbi[i];
1549 	      arr_fsmbi[i] = 0xff;
1550 	    }
1551 	for (i = 1; i < 16; i++)
1552 	  arr_andbi[i] = arr_andbi[0];
1553 	reg_fsmbi = array_to_constant (imode, arr_fsmbi);
1554 	reg_and = array_to_constant (imode, arr_andbi);
1555 	emit_move_insn (to, reg_fsmbi);
1556 	emit_insn (gen_rtx_SET (to, gen_rtx_AND (imode, to, reg_and)));
1557 	return 1;
1558       }
1559     case IC_POOL:
1560       if (reload_in_progress || reload_completed)
1561 	{
1562 	  rtx mem = force_const_mem (mode, ops[1]);
1563 	  if (TARGET_LARGE_MEM)
1564 	    {
1565 	      rtx addr = gen_rtx_REG (Pmode, REGNO (ops[0]));
1566 	      emit_move_insn (addr, XEXP (mem, 0));
1567 	      mem = replace_equiv_address (mem, addr);
1568 	    }
1569 	  emit_move_insn (ops[0], mem);
1570 	  return 1;
1571 	}
1572       break;
1573     case IC_IL1s:
1574     case IC_IL2s:
1575       if (reload_completed && GET_CODE (ops[1]) != HIGH)
1576 	{
1577 	  if (c == IC_IL2s)
1578 	    {
1579 	      emit_move_insn (ops[0], gen_rtx_HIGH (mode, ops[1]));
1580 	      emit_move_insn (ops[0], gen_rtx_LO_SUM (mode, ops[0], ops[1]));
1581 	    }
1582 	  else if (flag_pic)
1583 	    emit_insn (gen_pic (ops[0], ops[1]));
1584 	  if (flag_pic)
1585 	    {
1586 	      rtx pic_reg = get_pic_reg ();
1587 	      emit_insn (gen_addsi3 (ops[0], ops[0], pic_reg));
1588 	    }
1589 	  return flag_pic || c == IC_IL2s;
1590 	}
1591       break;
1592     case IC_IL1:
1593     case IC_FSMBI:
1594     case IC_CPAT:
1595       break;
1596     }
1597   return 0;
1598 }
1599 
1600 /* SAVING is TRUE when we are generating the actual load and store
1601    instructions for REGNO.  When determining the size of the stack
1602    needed for saving register we must allocate enough space for the
1603    worst case, because we don't always have the information early enough
1604    to not allocate it.  But we can at least eliminate the actual loads
1605    and stores during the prologue/epilogue.  */
1606 static int
need_to_save_reg(int regno,int saving)1607 need_to_save_reg (int regno, int saving)
1608 {
1609   if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
1610     return 1;
1611   if (flag_pic
1612       && regno == PIC_OFFSET_TABLE_REGNUM
1613       && (!saving || cfun->machine->pic_reg == pic_offset_table_rtx))
1614     return 1;
1615   return 0;
1616 }
1617 
1618 /* This function is only correct starting with local register
1619    allocation */
1620 int
spu_saved_regs_size(void)1621 spu_saved_regs_size (void)
1622 {
1623   int reg_save_size = 0;
1624   int regno;
1625 
1626   for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; --regno)
1627     if (need_to_save_reg (regno, 0))
1628       reg_save_size += 0x10;
1629   return reg_save_size;
1630 }
1631 
1632 static rtx_insn *
frame_emit_store(int regno,rtx addr,HOST_WIDE_INT offset)1633 frame_emit_store (int regno, rtx addr, HOST_WIDE_INT offset)
1634 {
1635   rtx reg = gen_rtx_REG (V4SImode, regno);
1636   rtx mem =
1637     gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1638   return emit_insn (gen_movv4si (mem, reg));
1639 }
1640 
1641 static rtx_insn *
frame_emit_load(int regno,rtx addr,HOST_WIDE_INT offset)1642 frame_emit_load (int regno, rtx addr, HOST_WIDE_INT offset)
1643 {
1644   rtx reg = gen_rtx_REG (V4SImode, regno);
1645   rtx mem =
1646     gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1647   return emit_insn (gen_movv4si (reg, mem));
1648 }
1649 
1650 /* This happens after reload, so we need to expand it.  */
1651 static rtx_insn *
frame_emit_add_imm(rtx dst,rtx src,HOST_WIDE_INT imm,rtx scratch)1652 frame_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm, rtx scratch)
1653 {
1654   rtx_insn *insn;
1655   if (satisfies_constraint_K (GEN_INT (imm)))
1656     {
1657       insn = emit_insn (gen_addsi3 (dst, src, GEN_INT (imm)));
1658     }
1659   else
1660     {
1661       emit_insn (gen_movsi (scratch, gen_int_mode (imm, SImode)));
1662       insn = emit_insn (gen_addsi3 (dst, src, scratch));
1663       if (REGNO (src) == REGNO (scratch))
1664 	abort ();
1665     }
1666   return insn;
1667 }
1668 
1669 /* Return nonzero if this function is known to have a null epilogue.  */
1670 
1671 int
direct_return(void)1672 direct_return (void)
1673 {
1674   if (reload_completed)
1675     {
1676       if (cfun->static_chain_decl == 0
1677 	  && (spu_saved_regs_size ()
1678 	      + get_frame_size ()
1679 	      + crtl->outgoing_args_size
1680 	      + crtl->args.pretend_args_size == 0)
1681 	  && crtl->is_leaf)
1682 	return 1;
1683     }
1684   return 0;
1685 }
1686 
1687 /*
1688    The stack frame looks like this:
1689          +-------------+
1690          |  incoming   |
1691          |    args     |
1692    AP -> +-------------+
1693          | $lr save    |
1694          +-------------+
1695  prev SP | back chain  |
1696          +-------------+
1697          |  var args   |
1698          |  reg save   | crtl->args.pretend_args_size bytes
1699          +-------------+
1700          |    ...      |
1701          | saved regs  | spu_saved_regs_size() bytes
1702    FP -> +-------------+
1703          |    ...      |
1704          |   vars      | get_frame_size()  bytes
1705   HFP -> +-------------+
1706          |    ...      |
1707          |  outgoing   |
1708          |    args     | crtl->outgoing_args_size bytes
1709          +-------------+
1710          | $lr of next |
1711          |   frame     |
1712          +-------------+
1713          | back chain  |
1714    SP -> +-------------+
1715 
1716 */
1717 void
spu_expand_prologue(void)1718 spu_expand_prologue (void)
1719 {
1720   HOST_WIDE_INT size = get_frame_size (), offset, regno;
1721   HOST_WIDE_INT total_size;
1722   HOST_WIDE_INT saved_regs_size;
1723   rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
1724   rtx scratch_reg_0, scratch_reg_1;
1725   rtx_insn *insn;
1726   rtx real;
1727 
1728   if (flag_pic && optimize == 0 && !cfun->machine->pic_reg)
1729     cfun->machine->pic_reg = pic_offset_table_rtx;
1730 
1731   if (spu_naked_function_p (current_function_decl))
1732     return;
1733 
1734   scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
1735   scratch_reg_1 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 2);
1736 
1737   saved_regs_size = spu_saved_regs_size ();
1738   total_size = size + saved_regs_size
1739     + crtl->outgoing_args_size
1740     + crtl->args.pretend_args_size;
1741 
1742   if (!crtl->is_leaf
1743       || cfun->calls_alloca || total_size > 0)
1744     total_size += STACK_POINTER_OFFSET;
1745 
1746   /* Save this first because code after this might use the link
1747      register as a scratch register. */
1748   if (!crtl->is_leaf)
1749     {
1750       insn = frame_emit_store (LINK_REGISTER_REGNUM, sp_reg, 16);
1751       RTX_FRAME_RELATED_P (insn) = 1;
1752     }
1753 
1754   if (total_size > 0)
1755     {
1756       offset = -crtl->args.pretend_args_size;
1757       for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
1758 	if (need_to_save_reg (regno, 1))
1759 	  {
1760 	    offset -= 16;
1761 	    insn = frame_emit_store (regno, sp_reg, offset);
1762 	    RTX_FRAME_RELATED_P (insn) = 1;
1763 	  }
1764     }
1765 
1766   if (flag_pic && cfun->machine->pic_reg)
1767     {
1768       rtx pic_reg = cfun->machine->pic_reg;
1769       insn = emit_insn (gen_load_pic_offset (pic_reg, scratch_reg_0));
1770       insn = emit_insn (gen_subsi3 (pic_reg, pic_reg, scratch_reg_0));
1771     }
1772 
1773   if (total_size > 0)
1774     {
1775       if (flag_stack_check || flag_stack_clash_protection)
1776 	{
1777 	  /* We compare against total_size-1 because
1778 	     ($sp >= total_size) <=> ($sp > total_size-1) */
1779 	  rtx scratch_v4si = gen_rtx_REG (V4SImode, REGNO (scratch_reg_0));
1780 	  rtx sp_v4si = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
1781 	  rtx size_v4si = spu_const (V4SImode, total_size - 1);
1782 	  if (!satisfies_constraint_K (GEN_INT (total_size - 1)))
1783 	    {
1784 	      emit_move_insn (scratch_v4si, size_v4si);
1785 	      size_v4si = scratch_v4si;
1786 	    }
1787 	  emit_insn (gen_cgt_v4si (scratch_v4si, sp_v4si, size_v4si));
1788 	  emit_insn (gen_vec_extractv4sisi
1789 		     (scratch_reg_0, scratch_v4si, GEN_INT (1)));
1790 	  emit_insn (gen_spu_heq (scratch_reg_0, GEN_INT (0)));
1791 	}
1792 
1793       /* Adjust the stack pointer, and make sure scratch_reg_0 contains
1794          the value of the previous $sp because we save it as the back
1795          chain. */
1796       if (total_size <= 2000)
1797 	{
1798 	  /* In this case we save the back chain first. */
1799 	  insn = frame_emit_store (STACK_POINTER_REGNUM, sp_reg, -total_size);
1800 	  insn =
1801 	    frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_0);
1802 	}
1803       else
1804 	{
1805 	  insn = emit_move_insn (scratch_reg_0, sp_reg);
1806 	  insn =
1807 	    frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_1);
1808 	}
1809       RTX_FRAME_RELATED_P (insn) = 1;
1810       real = gen_addsi3 (sp_reg, sp_reg, GEN_INT (-total_size));
1811       add_reg_note (insn, REG_FRAME_RELATED_EXPR, real);
1812 
1813       if (total_size > 2000)
1814 	{
1815 	  /* Save the back chain ptr */
1816 	  insn = frame_emit_store (REGNO (scratch_reg_0), sp_reg, 0);
1817 	}
1818 
1819       if (frame_pointer_needed)
1820 	{
1821 	  rtx fp_reg = gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM);
1822 	  HOST_WIDE_INT fp_offset = STACK_POINTER_OFFSET
1823 	    + crtl->outgoing_args_size;
1824 	  /* Set the new frame_pointer */
1825 	  insn = frame_emit_add_imm (fp_reg, sp_reg, fp_offset, scratch_reg_0);
1826 	  RTX_FRAME_RELATED_P (insn) = 1;
1827 	  real = gen_addsi3 (fp_reg, sp_reg, GEN_INT (fp_offset));
1828 	  add_reg_note (insn, REG_FRAME_RELATED_EXPR, real);
1829           REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = STACK_BOUNDARY;
1830 	}
1831     }
1832 
1833   if (flag_stack_usage_info)
1834     current_function_static_stack_size = total_size;
1835 }
1836 
1837 void
spu_expand_epilogue(bool sibcall_p)1838 spu_expand_epilogue (bool sibcall_p)
1839 {
1840   int size = get_frame_size (), offset, regno;
1841   HOST_WIDE_INT saved_regs_size, total_size;
1842   rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
1843   rtx scratch_reg_0;
1844 
1845   if (spu_naked_function_p (current_function_decl))
1846     return;
1847 
1848   scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
1849 
1850   saved_regs_size = spu_saved_regs_size ();
1851   total_size = size + saved_regs_size
1852     + crtl->outgoing_args_size
1853     + crtl->args.pretend_args_size;
1854 
1855   if (!crtl->is_leaf
1856       || cfun->calls_alloca || total_size > 0)
1857     total_size += STACK_POINTER_OFFSET;
1858 
1859   if (total_size > 0)
1860     {
1861       if (cfun->calls_alloca)
1862 	frame_emit_load (STACK_POINTER_REGNUM, sp_reg, 0);
1863       else
1864 	frame_emit_add_imm (sp_reg, sp_reg, total_size, scratch_reg_0);
1865 
1866 
1867       if (saved_regs_size > 0)
1868 	{
1869 	  offset = -crtl->args.pretend_args_size;
1870 	  for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
1871 	    if (need_to_save_reg (regno, 1))
1872 	      {
1873 		offset -= 0x10;
1874 		frame_emit_load (regno, sp_reg, offset);
1875 	      }
1876 	}
1877     }
1878 
1879   if (!crtl->is_leaf)
1880     frame_emit_load (LINK_REGISTER_REGNUM, sp_reg, 16);
1881 
1882   if (!sibcall_p)
1883     {
1884       emit_use (gen_rtx_REG (SImode, LINK_REGISTER_REGNUM));
1885       emit_jump_insn (gen__return ());
1886     }
1887 }
1888 
1889 rtx
spu_return_addr(int count,rtx frame ATTRIBUTE_UNUSED)1890 spu_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
1891 {
1892   if (count != 0)
1893     return 0;
1894   /* This is inefficient because it ends up copying to a save-register
1895      which then gets saved even though $lr has already been saved.  But
1896      it does generate better code for leaf functions and we don't need
1897      to use RETURN_ADDRESS_POINTER_REGNUM to get it working.  It's only
1898      used for __builtin_return_address anyway, so maybe we don't care if
1899      it's inefficient. */
1900   return get_hard_reg_initial_val (Pmode, LINK_REGISTER_REGNUM);
1901 }
1902 
1903 
1904 /* Given VAL, generate a constant appropriate for MODE.
1905    If MODE is a vector mode, every element will be VAL.
1906    For TImode, VAL will be zero extended to 128 bits. */
1907 rtx
spu_const(machine_mode mode,HOST_WIDE_INT val)1908 spu_const (machine_mode mode, HOST_WIDE_INT val)
1909 {
1910   rtx inner;
1911 
1912   gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
1913 	      || GET_MODE_CLASS (mode) == MODE_FLOAT
1914 	      || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
1915 	      || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT);
1916 
1917   if (GET_MODE_CLASS (mode) == MODE_INT)
1918     return immed_double_const (val, 0, mode);
1919 
1920   /* val is the bit representation of the float */
1921   if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1922     return hwint_to_const_double (mode, val);
1923 
1924   if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
1925     inner = immed_double_const (val, 0, GET_MODE_INNER (mode));
1926   else
1927     inner = hwint_to_const_double (GET_MODE_INNER (mode), val);
1928 
1929   return gen_const_vec_duplicate (mode, inner);
1930 }
1931 
1932 /* Create a MODE vector constant from 4 ints. */
1933 rtx
spu_const_from_ints(machine_mode mode,int a,int b,int c,int d)1934 spu_const_from_ints(machine_mode mode, int a, int b, int c, int d)
1935 {
1936   unsigned char arr[16];
1937   arr[0] = (a >> 24) & 0xff;
1938   arr[1] = (a >> 16) & 0xff;
1939   arr[2] = (a >> 8) & 0xff;
1940   arr[3] = (a >> 0) & 0xff;
1941   arr[4] = (b >> 24) & 0xff;
1942   arr[5] = (b >> 16) & 0xff;
1943   arr[6] = (b >> 8) & 0xff;
1944   arr[7] = (b >> 0) & 0xff;
1945   arr[8] = (c >> 24) & 0xff;
1946   arr[9] = (c >> 16) & 0xff;
1947   arr[10] = (c >> 8) & 0xff;
1948   arr[11] = (c >> 0) & 0xff;
1949   arr[12] = (d >> 24) & 0xff;
1950   arr[13] = (d >> 16) & 0xff;
1951   arr[14] = (d >> 8) & 0xff;
1952   arr[15] = (d >> 0) & 0xff;
1953   return array_to_constant(mode, arr);
1954 }
1955 
1956 /* branch hint stuff */
1957 
1958 /* An array of these is used to propagate hints to predecessor blocks. */
1959 struct spu_bb_info
1960 {
1961   rtx_insn *prop_jump; /* propagated from another block */
1962   int bb_index;  /* the original block. */
1963 };
1964 static struct spu_bb_info *spu_bb_info;
1965 
1966 #define STOP_HINT_P(INSN) \
1967 		(CALL_P(INSN) \
1968 		 || INSN_CODE(INSN) == CODE_FOR_divmodsi4 \
1969 		 || INSN_CODE(INSN) == CODE_FOR_udivmodsi4)
1970 
1971 /* 1 when RTX is a hinted branch or its target.  We keep track of
1972    what has been hinted so the safe-hint code can test it easily.  */
1973 #define HINTED_P(RTX)						\
1974   (RTL_FLAG_CHECK3("HINTED_P", (RTX), CODE_LABEL, JUMP_INSN, CALL_INSN)->unchanging)
1975 
1976 /* 1 when RTX is an insn that must be scheduled on an even boundary. */
1977 #define SCHED_ON_EVEN_P(RTX)						\
1978   (RTL_FLAG_CHECK2("SCHED_ON_EVEN_P", (RTX), JUMP_INSN, CALL_INSN)->in_struct)
1979 
1980 /* Emit a nop for INSN such that the two will dual issue.  This assumes
1981    INSN is 8-byte aligned.  When INSN is inline asm we emit an lnop.
1982    We check for TImode to handle a MULTI1 insn which has dual issued its
1983    first instruction.  get_pipe returns -1 for MULTI0 or inline asm.  */
1984 static void
emit_nop_for_insn(rtx_insn * insn)1985 emit_nop_for_insn (rtx_insn *insn)
1986 {
1987   int p;
1988   rtx_insn *new_insn;
1989 
1990   /* We need to handle JUMP_TABLE_DATA separately.  */
1991   if (JUMP_TABLE_DATA_P (insn))
1992     {
1993       new_insn = emit_insn_after (gen_lnop(), insn);
1994       recog_memoized (new_insn);
1995       INSN_LOCATION (new_insn) = UNKNOWN_LOCATION;
1996       return;
1997     }
1998 
1999   p = get_pipe (insn);
2000   if ((CALL_P (insn) || JUMP_P (insn)) && SCHED_ON_EVEN_P (insn))
2001     new_insn = emit_insn_after (gen_lnop (), insn);
2002   else if (p == 1 && GET_MODE (insn) == TImode)
2003     {
2004       new_insn = emit_insn_before (gen_nopn (GEN_INT (127)), insn);
2005       PUT_MODE (new_insn, TImode);
2006       PUT_MODE (insn, VOIDmode);
2007     }
2008   else
2009     new_insn = emit_insn_after (gen_lnop (), insn);
2010   recog_memoized (new_insn);
2011   INSN_LOCATION (new_insn) = INSN_LOCATION (insn);
2012 }
2013 
2014 /* Insert nops in basic blocks to meet dual issue alignment
2015    requirements.  Also make sure hbrp and hint instructions are at least
2016    one cycle apart, possibly inserting a nop.  */
2017 static void
pad_bb(void)2018 pad_bb(void)
2019 {
2020   rtx_insn *insn, *next_insn, *prev_insn, *hbr_insn = 0;
2021   int length;
2022   int addr;
2023 
2024   /* This sets up INSN_ADDRESSES. */
2025   shorten_branches (get_insns ());
2026 
2027   /* Keep track of length added by nops. */
2028   length = 0;
2029 
2030   prev_insn = 0;
2031   insn = get_insns ();
2032   if (!active_insn_p (insn))
2033     insn = next_active_insn (insn);
2034   for (; insn; insn = next_insn)
2035     {
2036       next_insn = next_active_insn (insn);
2037       if (INSN_P (insn)
2038           && (INSN_CODE (insn) == CODE_FOR_iprefetch
2039 	      || INSN_CODE (insn) == CODE_FOR_hbr))
2040 	{
2041 	  if (hbr_insn)
2042 	    {
2043 	      int a0 = INSN_ADDRESSES (INSN_UID (hbr_insn));
2044 	      int a1 = INSN_ADDRESSES (INSN_UID (insn));
2045 	      if ((a1 - a0 == 8 && GET_MODE (insn) != TImode)
2046 		  || (a1 - a0 == 4))
2047 		{
2048 		  prev_insn = emit_insn_before (gen_lnop (), insn);
2049 		  PUT_MODE (prev_insn, GET_MODE (insn));
2050 		  PUT_MODE (insn, TImode);
2051 		  INSN_LOCATION (prev_insn) = INSN_LOCATION (insn);
2052 		  length += 4;
2053 		}
2054 	    }
2055 	  hbr_insn = insn;
2056 	}
2057       if (INSN_P (insn) && INSN_CODE (insn) == CODE_FOR_blockage && next_insn)
2058 	{
2059 	  if (GET_MODE (insn) == TImode)
2060 	    PUT_MODE (next_insn, TImode);
2061 	  insn = next_insn;
2062 	  next_insn = next_active_insn (insn);
2063 	}
2064       addr = INSN_ADDRESSES (INSN_UID (insn));
2065       if ((CALL_P (insn) || JUMP_P (insn)) && SCHED_ON_EVEN_P (insn))
2066 	{
2067 	  if (((addr + length) & 7) != 0)
2068 	    {
2069 	      emit_nop_for_insn (prev_insn);
2070 	      length += 4;
2071 	    }
2072 	}
2073       else if (GET_MODE (insn) == TImode
2074 	       && ((next_insn && GET_MODE (next_insn) != TImode)
2075 		   || get_attr_type (insn) == TYPE_MULTI0)
2076 	       && ((addr + length) & 7) != 0)
2077 	{
2078 	  /* prev_insn will always be set because the first insn is
2079 	     always 8-byte aligned. */
2080 	  emit_nop_for_insn (prev_insn);
2081 	  length += 4;
2082 	}
2083       prev_insn = insn;
2084     }
2085 }
2086 
2087 
2088 /* Routines for branch hints. */
2089 
2090 static void
spu_emit_branch_hint(rtx_insn * before,rtx_insn * branch,rtx target,int distance,sbitmap blocks)2091 spu_emit_branch_hint (rtx_insn *before, rtx_insn *branch, rtx target,
2092 		      int distance, sbitmap blocks)
2093 {
2094   rtx_insn *hint;
2095   rtx_insn *insn;
2096   rtx_jump_table_data *table;
2097 
2098   if (before == 0 || branch == 0 || target == 0)
2099     return;
2100 
2101   /* While scheduling we require hints to be no further than 600, so
2102      we need to enforce that here too */
2103   if (distance > 600)
2104     return;
2105 
2106   /* If we have a Basic block note, emit it after the basic block note.  */
2107   if (NOTE_INSN_BASIC_BLOCK_P (before))
2108     before = NEXT_INSN (before);
2109 
2110   rtx_code_label *branch_label = gen_label_rtx ();
2111   LABEL_NUSES (branch_label)++;
2112   LABEL_PRESERVE_P (branch_label) = 1;
2113   insn = emit_label_before (branch_label, branch);
2114   rtx branch_label_ref = gen_rtx_LABEL_REF (VOIDmode, branch_label);
2115   bitmap_set_bit (blocks, BLOCK_FOR_INSN (branch)->index);
2116 
2117   hint = emit_insn_before (gen_hbr (branch_label_ref, target), before);
2118   recog_memoized (hint);
2119   INSN_LOCATION (hint) = INSN_LOCATION (branch);
2120   HINTED_P (branch) = 1;
2121 
2122   if (GET_CODE (target) == LABEL_REF)
2123     HINTED_P (XEXP (target, 0)) = 1;
2124   else if (tablejump_p (branch, 0, &table))
2125     {
2126       rtvec vec;
2127       int j;
2128       if (GET_CODE (PATTERN (table)) == ADDR_VEC)
2129 	vec = XVEC (PATTERN (table), 0);
2130       else
2131 	vec = XVEC (PATTERN (table), 1);
2132       for (j = GET_NUM_ELEM (vec) - 1; j >= 0; --j)
2133 	HINTED_P (XEXP (RTVEC_ELT (vec, j), 0)) = 1;
2134     }
2135 
2136   if (distance >= 588)
2137     {
2138       /* Make sure the hint isn't scheduled any earlier than this point,
2139          which could make it too far for the branch offest to fit */
2140       insn = emit_insn_before (gen_blockage (), hint);
2141       recog_memoized (insn);
2142       INSN_LOCATION (insn) = INSN_LOCATION (hint);
2143     }
2144   else if (distance <= 8 * 4)
2145     {
2146       /* To guarantee at least 8 insns between the hint and branch we
2147          insert nops. */
2148       int d;
2149       for (d = distance; d < 8 * 4; d += 4)
2150 	{
2151 	  insn =
2152 	    emit_insn_after (gen_nopn_nv (gen_rtx_REG (SImode, 127)), hint);
2153 	  recog_memoized (insn);
2154 	  INSN_LOCATION (insn) = INSN_LOCATION (hint);
2155 	}
2156 
2157       /* Make sure any nops inserted aren't scheduled before the hint. */
2158       insn = emit_insn_after (gen_blockage (), hint);
2159       recog_memoized (insn);
2160       INSN_LOCATION (insn) = INSN_LOCATION (hint);
2161 
2162       /* Make sure any nops inserted aren't scheduled after the call. */
2163       if (CALL_P (branch) && distance < 8 * 4)
2164 	{
2165 	  insn = emit_insn_before (gen_blockage (), branch);
2166 	  recog_memoized (insn);
2167 	  INSN_LOCATION (insn) = INSN_LOCATION (branch);
2168 	}
2169     }
2170 }
2171 
2172 /* Returns 0 if we don't want a hint for this branch.  Otherwise return
2173    the rtx for the branch target. */
2174 static rtx
get_branch_target(rtx_insn * branch)2175 get_branch_target (rtx_insn *branch)
2176 {
2177   if (JUMP_P (branch))
2178     {
2179       rtx set, src;
2180 
2181       /* Return statements */
2182       if (GET_CODE (PATTERN (branch)) == RETURN)
2183 	return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
2184 
2185      /* ASM GOTOs. */
2186      if (extract_asm_operands (PATTERN (branch)) != NULL)
2187 	return NULL;
2188 
2189       set = single_set (branch);
2190       src = SET_SRC (set);
2191       if (GET_CODE (SET_DEST (set)) != PC)
2192 	abort ();
2193 
2194       if (GET_CODE (src) == IF_THEN_ELSE)
2195 	{
2196 	  rtx lab = 0;
2197 	  rtx note = find_reg_note (branch, REG_BR_PROB, 0);
2198 	  if (note)
2199 	    {
2200 	      /* If the more probable case is not a fall through, then
2201 	         try a branch hint.  */
2202 	      int prob = profile_probability::from_reg_br_prob_note
2203 			    (XINT (note, 0)).to_reg_br_prob_base ();
2204 	      if (prob > (REG_BR_PROB_BASE * 6 / 10)
2205 		  && GET_CODE (XEXP (src, 1)) != PC)
2206 		lab = XEXP (src, 1);
2207 	      else if (prob < (REG_BR_PROB_BASE * 4 / 10)
2208 		       && GET_CODE (XEXP (src, 2)) != PC)
2209 		lab = XEXP (src, 2);
2210 	    }
2211 	  if (lab)
2212 	    {
2213 	      if (GET_CODE (lab) == RETURN)
2214 		return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
2215 	      return lab;
2216 	    }
2217 	  return 0;
2218 	}
2219 
2220       return src;
2221     }
2222   else if (CALL_P (branch))
2223     {
2224       rtx call;
2225       /* All of our call patterns are in a PARALLEL and the CALL is
2226          the first pattern in the PARALLEL. */
2227       if (GET_CODE (PATTERN (branch)) != PARALLEL)
2228 	abort ();
2229       call = XVECEXP (PATTERN (branch), 0, 0);
2230       if (GET_CODE (call) == SET)
2231 	call = SET_SRC (call);
2232       if (GET_CODE (call) != CALL)
2233 	abort ();
2234       return XEXP (XEXP (call, 0), 0);
2235     }
2236   return 0;
2237 }
2238 
2239 /* The special $hbr register is used to prevent the insn scheduler from
2240    moving hbr insns across instructions which invalidate them.  It
2241    should only be used in a clobber, and this function searches for
2242    insns which clobber it.  */
2243 static bool
insn_clobbers_hbr(rtx_insn * insn)2244 insn_clobbers_hbr (rtx_insn *insn)
2245 {
2246   if (INSN_P (insn)
2247       && GET_CODE (PATTERN (insn)) == PARALLEL)
2248     {
2249       rtx parallel = PATTERN (insn);
2250       rtx clobber;
2251       int j;
2252       for (j = XVECLEN (parallel, 0) - 1; j >= 0; j--)
2253 	{
2254 	  clobber = XVECEXP (parallel, 0, j);
2255 	  if (GET_CODE (clobber) == CLOBBER
2256 	      && GET_CODE (XEXP (clobber, 0)) == REG
2257 	      && REGNO (XEXP (clobber, 0)) == HBR_REGNUM)
2258 	    return 1;
2259 	}
2260     }
2261   return 0;
2262 }
2263 
2264 /* Search up to 32 insns starting at FIRST:
2265    - at any kind of hinted branch, just return
2266    - at any unconditional branch in the first 15 insns, just return
2267    - at a call or indirect branch, after the first 15 insns, force it to
2268      an even address and return
2269    - at any unconditional branch, after the first 15 insns, force it to
2270      an even address.
2271    At then end of the search, insert an hbrp within 4 insns of FIRST,
2272    and an hbrp within 16 instructions of FIRST.
2273  */
2274 static void
insert_hbrp_for_ilb_runout(rtx_insn * first)2275 insert_hbrp_for_ilb_runout (rtx_insn *first)
2276 {
2277   rtx_insn *insn, *before_4 = 0, *before_16 = 0;
2278   int addr = 0, length, first_addr = -1;
2279   int hbrp_addr0 = 128 * 4, hbrp_addr1 = 128 * 4;
2280   int insert_lnop_after = 0;
2281   for (insn = first; insn; insn = NEXT_INSN (insn))
2282     if (INSN_P (insn))
2283       {
2284 	if (first_addr == -1)
2285 	  first_addr = INSN_ADDRESSES (INSN_UID (insn));
2286 	addr = INSN_ADDRESSES (INSN_UID (insn)) - first_addr;
2287 	length = get_attr_length (insn);
2288 
2289 	if (before_4 == 0 && addr + length >= 4 * 4)
2290 	  before_4 = insn;
2291 	/* We test for 14 instructions because the first hbrp will add
2292 	   up to 2 instructions. */
2293 	if (before_16 == 0 && addr + length >= 14 * 4)
2294 	  before_16 = insn;
2295 
2296 	if (INSN_CODE (insn) == CODE_FOR_hbr)
2297 	  {
2298 	    /* Make sure an hbrp is at least 2 cycles away from a hint.
2299 	       Insert an lnop after the hbrp when necessary. */
2300 	    if (before_4 == 0 && addr > 0)
2301 	      {
2302 		before_4 = insn;
2303 		insert_lnop_after |= 1;
2304 	      }
2305 	    else if (before_4 && addr <= 4 * 4)
2306 	      insert_lnop_after |= 1;
2307 	    if (before_16 == 0 && addr > 10 * 4)
2308 	      {
2309 		before_16 = insn;
2310 		insert_lnop_after |= 2;
2311 	      }
2312 	    else if (before_16 && addr <= 14 * 4)
2313 	      insert_lnop_after |= 2;
2314 	  }
2315 
2316 	if (INSN_CODE (insn) == CODE_FOR_iprefetch)
2317 	  {
2318 	    if (addr < hbrp_addr0)
2319 	      hbrp_addr0 = addr;
2320 	    else if (addr < hbrp_addr1)
2321 	      hbrp_addr1 = addr;
2322 	  }
2323 
2324 	if (CALL_P (insn) || JUMP_P (insn))
2325 	  {
2326 	    if (HINTED_P (insn))
2327 	      return;
2328 
2329 	    /* Any branch after the first 15 insns should be on an even
2330 	       address to avoid a special case branch.  There might be
2331 	       some nops and/or hbrps inserted, so we test after 10
2332 	       insns. */
2333 	    if (addr > 10 * 4)
2334 	      SCHED_ON_EVEN_P (insn) = 1;
2335 	  }
2336 
2337 	if (CALL_P (insn) || tablejump_p (insn, 0, 0))
2338 	  return;
2339 
2340 
2341 	if (addr + length >= 32 * 4)
2342 	  {
2343 	    gcc_assert (before_4 && before_16);
2344 	    if (hbrp_addr0 > 4 * 4)
2345 	      {
2346 		insn =
2347 		  emit_insn_before (gen_iprefetch (GEN_INT (1)), before_4);
2348 		recog_memoized (insn);
2349 		INSN_LOCATION (insn) = INSN_LOCATION (before_4);
2350 		INSN_ADDRESSES_NEW (insn,
2351 				    INSN_ADDRESSES (INSN_UID (before_4)));
2352 		PUT_MODE (insn, GET_MODE (before_4));
2353 		PUT_MODE (before_4, TImode);
2354 		if (insert_lnop_after & 1)
2355 		  {
2356 		    insn = emit_insn_before (gen_lnop (), before_4);
2357 		    recog_memoized (insn);
2358 		    INSN_LOCATION (insn) = INSN_LOCATION (before_4);
2359 		    INSN_ADDRESSES_NEW (insn,
2360 					INSN_ADDRESSES (INSN_UID (before_4)));
2361 		    PUT_MODE (insn, TImode);
2362 		  }
2363 	      }
2364 	    if ((hbrp_addr0 <= 4 * 4 || hbrp_addr0 > 16 * 4)
2365 		&& hbrp_addr1 > 16 * 4)
2366 	      {
2367 		insn =
2368 		  emit_insn_before (gen_iprefetch (GEN_INT (2)), before_16);
2369 		recog_memoized (insn);
2370 		INSN_LOCATION (insn) = INSN_LOCATION (before_16);
2371 		INSN_ADDRESSES_NEW (insn,
2372 				    INSN_ADDRESSES (INSN_UID (before_16)));
2373 		PUT_MODE (insn, GET_MODE (before_16));
2374 		PUT_MODE (before_16, TImode);
2375 		if (insert_lnop_after & 2)
2376 		  {
2377 		    insn = emit_insn_before (gen_lnop (), before_16);
2378 		    recog_memoized (insn);
2379 		    INSN_LOCATION (insn) = INSN_LOCATION (before_16);
2380 		    INSN_ADDRESSES_NEW (insn,
2381 					INSN_ADDRESSES (INSN_UID
2382 							(before_16)));
2383 		    PUT_MODE (insn, TImode);
2384 		  }
2385 	      }
2386 	    return;
2387 	  }
2388       }
2389     else if (BARRIER_P (insn))
2390       return;
2391 
2392 }
2393 
2394 /* The SPU might hang when it executes 48 inline instructions after a
2395    hinted branch jumps to its hinted target.  The beginning of a
2396    function and the return from a call might have been hinted, and
2397    must be handled as well.  To prevent a hang we insert 2 hbrps.  The
2398    first should be within 6 insns of the branch target.  The second
2399    should be within 22 insns of the branch target.  When determining
2400    if hbrps are necessary, we look for only 32 inline instructions,
2401    because up to 12 nops and 4 hbrps could be inserted.  Similarily,
2402    when inserting new hbrps, we insert them within 4 and 16 insns of
2403    the target.  */
2404 static void
insert_hbrp(void)2405 insert_hbrp (void)
2406 {
2407   rtx_insn *insn;
2408   if (TARGET_SAFE_HINTS)
2409     {
2410       shorten_branches (get_insns ());
2411       /* Insert hbrp at beginning of function */
2412       insn = next_active_insn (get_insns ());
2413       if (insn)
2414 	insert_hbrp_for_ilb_runout (insn);
2415       /* Insert hbrp after hinted targets. */
2416       for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
2417 	if ((LABEL_P (insn) && HINTED_P (insn)) || CALL_P (insn))
2418 	  insert_hbrp_for_ilb_runout (next_active_insn (insn));
2419     }
2420 }
2421 
2422 static int in_spu_reorg;
2423 
2424 static void
spu_var_tracking(void)2425 spu_var_tracking (void)
2426 {
2427   if (flag_var_tracking)
2428     {
2429       df_analyze ();
2430       timevar_push (TV_VAR_TRACKING);
2431       variable_tracking_main ();
2432       timevar_pop (TV_VAR_TRACKING);
2433       df_finish_pass (false);
2434     }
2435 }
2436 
2437 /* Insert branch hints.  There are no branch optimizations after this
2438    pass, so it's safe to set our branch hints now. */
2439 static void
spu_machine_dependent_reorg(void)2440 spu_machine_dependent_reorg (void)
2441 {
2442   sbitmap blocks;
2443   basic_block bb;
2444   rtx_insn *branch, *insn;
2445   rtx branch_target = 0;
2446   int branch_addr = 0, insn_addr, required_dist = 0;
2447   int i;
2448   unsigned int j;
2449 
2450   if (!TARGET_BRANCH_HINTS || optimize == 0)
2451     {
2452       /* We still do it for unoptimized code because an external
2453          function might have hinted a call or return. */
2454       compute_bb_for_insn ();
2455       insert_hbrp ();
2456       pad_bb ();
2457       spu_var_tracking ();
2458       free_bb_for_insn ();
2459       return;
2460     }
2461 
2462   blocks = sbitmap_alloc (last_basic_block_for_fn (cfun));
2463   bitmap_clear (blocks);
2464 
2465   in_spu_reorg = 1;
2466   compute_bb_for_insn ();
2467 
2468   /* (Re-)discover loops so that bb->loop_father can be used
2469      in the analysis below.  */
2470   loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
2471 
2472   compact_blocks ();
2473 
2474   spu_bb_info =
2475     (struct spu_bb_info *) xcalloc (n_basic_blocks_for_fn (cfun),
2476 				    sizeof (struct spu_bb_info));
2477 
2478   /* We need exact insn addresses and lengths.  */
2479   shorten_branches (get_insns ());
2480 
2481   for (i = n_basic_blocks_for_fn (cfun) - 1; i >= 0; i--)
2482     {
2483       bb = BASIC_BLOCK_FOR_FN (cfun, i);
2484       branch = 0;
2485       if (spu_bb_info[i].prop_jump)
2486 	{
2487 	  branch = spu_bb_info[i].prop_jump;
2488 	  branch_target = get_branch_target (branch);
2489 	  branch_addr = INSN_ADDRESSES (INSN_UID (branch));
2490 	  required_dist = spu_hint_dist;
2491 	}
2492       /* Search from end of a block to beginning.   In this loop, find
2493          jumps which need a branch and emit them only when:
2494          - it's an indirect branch and we're at the insn which sets
2495          the register
2496          - we're at an insn that will invalidate the hint. e.g., a
2497          call, another hint insn, inline asm that clobbers $hbr, and
2498          some inlined operations (divmodsi4).  Don't consider jumps
2499          because they are only at the end of a block and are
2500          considered when we are deciding whether to propagate
2501          - we're getting too far away from the branch.  The hbr insns
2502          only have a signed 10 bit offset
2503          We go back as far as possible so the branch will be considered
2504          for propagation when we get to the beginning of the block.  */
2505       for (insn = BB_END (bb); insn; insn = PREV_INSN (insn))
2506 	{
2507 	  if (INSN_P (insn))
2508 	    {
2509 	      insn_addr = INSN_ADDRESSES (INSN_UID (insn));
2510 	      if (branch
2511 		  && ((GET_CODE (branch_target) == REG
2512 		       && set_of (branch_target, insn) != NULL_RTX)
2513 		      || insn_clobbers_hbr (insn)
2514 		      || branch_addr - insn_addr > 600))
2515 		{
2516 		  rtx_insn *next = NEXT_INSN (insn);
2517 		  int next_addr = INSN_ADDRESSES (INSN_UID (next));
2518 		  if (insn != BB_END (bb)
2519 		      && branch_addr - next_addr >= required_dist)
2520 		    {
2521 		      if (dump_file)
2522 			fprintf (dump_file,
2523 				 "hint for %i in block %i before %i\n",
2524 				 INSN_UID (branch), bb->index,
2525 				 INSN_UID (next));
2526 		      spu_emit_branch_hint (next, branch, branch_target,
2527 					    branch_addr - next_addr, blocks);
2528 		    }
2529 		  branch = 0;
2530 		}
2531 
2532 	      /* JUMP_P will only be true at the end of a block.  When
2533 	         branch is already set it means we've previously decided
2534 	         to propagate a hint for that branch into this block. */
2535 	      if (CALL_P (insn) || (JUMP_P (insn) && !branch))
2536 		{
2537 		  branch = 0;
2538 		  if ((branch_target = get_branch_target (insn)))
2539 		    {
2540 		      branch = insn;
2541 		      branch_addr = insn_addr;
2542 		      required_dist = spu_hint_dist;
2543 		    }
2544 		}
2545 	    }
2546 	  if (insn == BB_HEAD (bb))
2547 	    break;
2548 	}
2549 
2550       if (branch)
2551 	{
2552 	  /* If we haven't emitted a hint for this branch yet, it might
2553 	     be profitable to emit it in one of the predecessor blocks,
2554 	     especially for loops.  */
2555 	  rtx_insn *bbend;
2556 	  basic_block prev = 0, prop = 0, prev2 = 0;
2557 	  int loop_exit = 0, simple_loop = 0;
2558 	  int next_addr = INSN_ADDRESSES (INSN_UID (NEXT_INSN (insn)));
2559 
2560 	  for (j = 0; j < EDGE_COUNT (bb->preds); j++)
2561 	    if (EDGE_PRED (bb, j)->flags & EDGE_FALLTHRU)
2562 	      prev = EDGE_PRED (bb, j)->src;
2563 	    else
2564 	      prev2 = EDGE_PRED (bb, j)->src;
2565 
2566 	  for (j = 0; j < EDGE_COUNT (bb->succs); j++)
2567 	    if (EDGE_SUCC (bb, j)->flags & EDGE_LOOP_EXIT)
2568 	      loop_exit = 1;
2569 	    else if (EDGE_SUCC (bb, j)->dest == bb)
2570 	      simple_loop = 1;
2571 
2572 	  /* If this branch is a loop exit then propagate to previous
2573 	     fallthru block. This catches the cases when it is a simple
2574 	     loop or when there is an initial branch into the loop. */
2575 	  if (prev && (loop_exit || simple_loop)
2576 	      && bb_loop_depth (prev) <= bb_loop_depth (bb))
2577 	    prop = prev;
2578 
2579 	  /* If there is only one adjacent predecessor.  Don't propagate
2580 	     outside this loop.  */
2581 	  else if (prev && single_pred_p (bb)
2582 		   && prev->loop_father == bb->loop_father)
2583 	    prop = prev;
2584 
2585 	  /* If this is the JOIN block of a simple IF-THEN then
2586 	     propagate the hint to the HEADER block. */
2587 	  else if (prev && prev2
2588 		   && EDGE_COUNT (bb->preds) == 2
2589 		   && EDGE_COUNT (prev->preds) == 1
2590 		   && EDGE_PRED (prev, 0)->src == prev2
2591 		   && prev2->loop_father == bb->loop_father
2592 		   && GET_CODE (branch_target) != REG)
2593 	    prop = prev;
2594 
2595 	  /* Don't propagate when:
2596 	     - this is a simple loop and the hint would be too far
2597 	     - this is not a simple loop and there are 16 insns in
2598 	     this block already
2599 	     - the predecessor block ends in a branch that will be
2600 	     hinted
2601 	     - the predecessor block ends in an insn that invalidates
2602 	     the hint */
2603 	  if (prop
2604 	      && prop->index >= 0
2605 	      && (bbend = BB_END (prop))
2606 	      && branch_addr - INSN_ADDRESSES (INSN_UID (bbend)) <
2607 	      (simple_loop ? 600 : 16 * 4) && get_branch_target (bbend) == 0
2608 	      && (JUMP_P (bbend) || !insn_clobbers_hbr (bbend)))
2609 	    {
2610 	      if (dump_file)
2611 		fprintf (dump_file, "propagate from %i to %i (loop depth %i) "
2612 			 "for %i (loop_exit %i simple_loop %i dist %i)\n",
2613 			 bb->index, prop->index, bb_loop_depth (bb),
2614 			 INSN_UID (branch), loop_exit, simple_loop,
2615 			 branch_addr - INSN_ADDRESSES (INSN_UID (bbend)));
2616 
2617 	      spu_bb_info[prop->index].prop_jump = branch;
2618 	      spu_bb_info[prop->index].bb_index = i;
2619 	    }
2620 	  else if (branch_addr - next_addr >= required_dist)
2621 	    {
2622 	      if (dump_file)
2623 		fprintf (dump_file, "hint for %i in block %i before %i\n",
2624 			 INSN_UID (branch), bb->index,
2625 			 INSN_UID (NEXT_INSN (insn)));
2626 	      spu_emit_branch_hint (NEXT_INSN (insn), branch, branch_target,
2627 				    branch_addr - next_addr, blocks);
2628 	    }
2629 	  branch = 0;
2630 	}
2631     }
2632   free (spu_bb_info);
2633 
2634   if (!bitmap_empty_p (blocks))
2635     find_many_sub_basic_blocks (blocks);
2636 
2637   /* We have to schedule to make sure alignment is ok. */
2638   FOR_EACH_BB_FN (bb, cfun) bb->flags &= ~BB_DISABLE_SCHEDULE;
2639 
2640   /* The hints need to be scheduled, so call it again. */
2641   schedule_insns ();
2642   df_finish_pass (true);
2643 
2644   insert_hbrp ();
2645 
2646   pad_bb ();
2647 
2648   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
2649     if (NONJUMP_INSN_P (insn) && INSN_CODE (insn) == CODE_FOR_hbr)
2650       {
2651 	/* Adjust the LABEL_REF in a hint when we have inserted a nop
2652 	   between its branch label and the branch .  We don't move the
2653 	   label because GCC expects it at the beginning of the block. */
2654 	rtx unspec = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2655 	rtx label_ref = XVECEXP (unspec, 0, 0);
2656 	rtx_insn *label = as_a <rtx_insn *> (XEXP (label_ref, 0));
2657 	rtx_insn *branch;
2658 	int offset = 0;
2659 	for (branch = NEXT_INSN (label);
2660 	     !JUMP_P (branch) && !CALL_P (branch);
2661 	     branch = NEXT_INSN (branch))
2662 	  if (NONJUMP_INSN_P (branch))
2663 	    offset += get_attr_length (branch);
2664 	if (offset > 0)
2665 	  XVECEXP (unspec, 0, 0) = plus_constant (Pmode, label_ref, offset);
2666       }
2667 
2668   spu_var_tracking ();
2669 
2670   loop_optimizer_finalize ();
2671 
2672   free_bb_for_insn ();
2673 
2674   in_spu_reorg = 0;
2675 }
2676 
2677 
2678 /* Insn scheduling routines, primarily for dual issue. */
2679 static int
spu_sched_issue_rate(void)2680 spu_sched_issue_rate (void)
2681 {
2682   return 2;
2683 }
2684 
2685 static int
uses_ls_unit(rtx_insn * insn)2686 uses_ls_unit(rtx_insn *insn)
2687 {
2688   rtx set = single_set (insn);
2689   if (set != 0
2690       && (GET_CODE (SET_DEST (set)) == MEM
2691 	  || GET_CODE (SET_SRC (set)) == MEM))
2692     return 1;
2693   return 0;
2694 }
2695 
2696 static int
get_pipe(rtx_insn * insn)2697 get_pipe (rtx_insn *insn)
2698 {
2699   enum attr_type t;
2700   /* Handle inline asm */
2701   if (INSN_CODE (insn) == -1)
2702     return -1;
2703   t = get_attr_type (insn);
2704   switch (t)
2705     {
2706     case TYPE_CONVERT:
2707       return -2;
2708     case TYPE_MULTI0:
2709       return -1;
2710 
2711     case TYPE_FX2:
2712     case TYPE_FX3:
2713     case TYPE_SPR:
2714     case TYPE_NOP:
2715     case TYPE_FXB:
2716     case TYPE_FPD:
2717     case TYPE_FP6:
2718     case TYPE_FP7:
2719       return 0;
2720 
2721     case TYPE_LNOP:
2722     case TYPE_SHUF:
2723     case TYPE_LOAD:
2724     case TYPE_STORE:
2725     case TYPE_BR:
2726     case TYPE_MULTI1:
2727     case TYPE_HBR:
2728     case TYPE_IPREFETCH:
2729       return 1;
2730     default:
2731       abort ();
2732     }
2733 }
2734 
2735 
2736 /* haifa-sched.c has a static variable that keeps track of the current
2737    cycle.  It is passed to spu_sched_reorder, and we record it here for
2738    use by spu_sched_variable_issue.  It won't be accurate if the
2739    scheduler updates it's clock_var between the two calls. */
2740 static int clock_var;
2741 
2742 /* This is used to keep track of insn alignment.  Set to 0 at the
2743    beginning of each block and increased by the "length" attr of each
2744    insn scheduled. */
2745 static int spu_sched_length;
2746 
2747 /* Record when we've issued pipe0 and pipe1 insns so we can reorder the
2748    ready list appropriately in spu_sched_reorder(). */
2749 static int pipe0_clock;
2750 static int pipe1_clock;
2751 
2752 static int prev_clock_var;
2753 
2754 static int prev_priority;
2755 
2756 /* The SPU needs to load the next ilb sometime during the execution of
2757    the previous ilb.  There is a potential conflict if every cycle has a
2758    load or store.  To avoid the conflict we make sure the load/store
2759    unit is free for at least one cycle during the execution of insns in
2760    the previous ilb. */
2761 static int spu_ls_first;
2762 static int prev_ls_clock;
2763 
2764 static void
spu_sched_init_global(FILE * file ATTRIBUTE_UNUSED,int verbose ATTRIBUTE_UNUSED,int max_ready ATTRIBUTE_UNUSED)2765 spu_sched_init_global (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
2766 		       int max_ready ATTRIBUTE_UNUSED)
2767 {
2768   spu_sched_length = 0;
2769 }
2770 
2771 static void
spu_sched_init(FILE * file ATTRIBUTE_UNUSED,int verbose ATTRIBUTE_UNUSED,int max_ready ATTRIBUTE_UNUSED)2772 spu_sched_init (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
2773 		int max_ready ATTRIBUTE_UNUSED)
2774 {
2775   if (align_labels.levels[0].get_value () > 4
2776       || align_loops.levels[0].get_value () > 4
2777       || align_jumps.levels[0].get_value () > 4)
2778     {
2779       /* When any block might be at least 8-byte aligned, assume they
2780          will all be at least 8-byte aligned to make sure dual issue
2781          works out correctly. */
2782       spu_sched_length = 0;
2783     }
2784   spu_ls_first = INT_MAX;
2785   clock_var = -1;
2786   prev_ls_clock = -1;
2787   pipe0_clock = -1;
2788   pipe1_clock = -1;
2789   prev_clock_var = -1;
2790   prev_priority = -1;
2791 }
2792 
2793 static int
spu_sched_variable_issue(FILE * file ATTRIBUTE_UNUSED,int verbose ATTRIBUTE_UNUSED,rtx_insn * insn,int more)2794 spu_sched_variable_issue (FILE *file ATTRIBUTE_UNUSED,
2795 			  int verbose ATTRIBUTE_UNUSED,
2796 			  rtx_insn *insn, int more)
2797 {
2798   int len;
2799   int p;
2800   if (GET_CODE (PATTERN (insn)) == USE
2801       || GET_CODE (PATTERN (insn)) == CLOBBER
2802       || (len = get_attr_length (insn)) == 0)
2803     return more;
2804 
2805   spu_sched_length += len;
2806 
2807   /* Reset on inline asm */
2808   if (INSN_CODE (insn) == -1)
2809     {
2810       spu_ls_first = INT_MAX;
2811       pipe0_clock = -1;
2812       pipe1_clock = -1;
2813       return 0;
2814     }
2815   p = get_pipe (insn);
2816   if (p == 0)
2817     pipe0_clock = clock_var;
2818   else
2819     pipe1_clock = clock_var;
2820 
2821   if (in_spu_reorg)
2822     {
2823       if (clock_var - prev_ls_clock > 1
2824 	  || INSN_CODE (insn) == CODE_FOR_iprefetch)
2825 	spu_ls_first = INT_MAX;
2826       if (uses_ls_unit (insn))
2827 	{
2828 	  if (spu_ls_first == INT_MAX)
2829 	    spu_ls_first = spu_sched_length;
2830 	  prev_ls_clock = clock_var;
2831 	}
2832 
2833       /* The scheduler hasn't inserted the nop, but we will later on.
2834          Include those nops in spu_sched_length. */
2835       if (prev_clock_var == clock_var && (spu_sched_length & 7))
2836 	spu_sched_length += 4;
2837       prev_clock_var = clock_var;
2838 
2839       /* more is -1 when called from spu_sched_reorder for new insns
2840          that don't have INSN_PRIORITY */
2841       if (more >= 0)
2842 	prev_priority = INSN_PRIORITY (insn);
2843     }
2844 
2845   /* Always try issuing more insns.  spu_sched_reorder will decide
2846      when the cycle should be advanced. */
2847   return 1;
2848 }
2849 
2850 /* This function is called for both TARGET_SCHED_REORDER and
2851    TARGET_SCHED_REORDER2.  */
2852 static int
spu_sched_reorder(FILE * file ATTRIBUTE_UNUSED,int verbose ATTRIBUTE_UNUSED,rtx_insn ** ready,int * nreadyp,int clock)2853 spu_sched_reorder (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
2854 		   rtx_insn **ready, int *nreadyp, int clock)
2855 {
2856   int i, nready = *nreadyp;
2857   int pipe_0, pipe_1, pipe_hbrp, pipe_ls, schedule_i;
2858   rtx_insn *insn;
2859 
2860   clock_var = clock;
2861 
2862   if (nready <= 0 || pipe1_clock >= clock)
2863     return 0;
2864 
2865   /* Find any rtl insns that don't generate assembly insns and schedule
2866      them first. */
2867   for (i = nready - 1; i >= 0; i--)
2868     {
2869       insn = ready[i];
2870       if (INSN_CODE (insn) == -1
2871 	  || INSN_CODE (insn) == CODE_FOR_blockage
2872 	  || (INSN_P (insn) && get_attr_length (insn) == 0))
2873 	{
2874 	  ready[i] = ready[nready - 1];
2875 	  ready[nready - 1] = insn;
2876 	  return 1;
2877 	}
2878     }
2879 
2880   pipe_0 = pipe_1 = pipe_hbrp = pipe_ls = schedule_i = -1;
2881   for (i = 0; i < nready; i++)
2882     if (INSN_CODE (ready[i]) != -1)
2883       {
2884 	insn = ready[i];
2885 	switch (get_attr_type (insn))
2886 	  {
2887 	  default:
2888 	  case TYPE_MULTI0:
2889 	  case TYPE_CONVERT:
2890 	  case TYPE_FX2:
2891 	  case TYPE_FX3:
2892 	  case TYPE_SPR:
2893 	  case TYPE_NOP:
2894 	  case TYPE_FXB:
2895 	  case TYPE_FPD:
2896 	  case TYPE_FP6:
2897 	  case TYPE_FP7:
2898 	    pipe_0 = i;
2899 	    break;
2900 	  case TYPE_LOAD:
2901 	  case TYPE_STORE:
2902 	    pipe_ls = i;
2903 	    /* FALLTHRU */
2904 	  case TYPE_LNOP:
2905 	  case TYPE_SHUF:
2906 	  case TYPE_BR:
2907 	  case TYPE_MULTI1:
2908 	  case TYPE_HBR:
2909 	    pipe_1 = i;
2910 	    break;
2911 	  case TYPE_IPREFETCH:
2912 	    pipe_hbrp = i;
2913 	    break;
2914 	  }
2915       }
2916 
2917   /* In the first scheduling phase, schedule loads and stores together
2918      to increase the chance they will get merged during postreload CSE. */
2919   if (!reload_completed && pipe_ls >= 0)
2920     {
2921       insn = ready[pipe_ls];
2922       ready[pipe_ls] = ready[nready - 1];
2923       ready[nready - 1] = insn;
2924       return 1;
2925     }
2926 
2927   /* If there is an hbrp ready, prefer it over other pipe 1 insns. */
2928   if (pipe_hbrp >= 0)
2929     pipe_1 = pipe_hbrp;
2930 
2931   /* When we have loads/stores in every cycle of the last 15 insns and
2932      we are about to schedule another load/store, emit an hbrp insn
2933      instead. */
2934   if (in_spu_reorg
2935       && spu_sched_length - spu_ls_first >= 4 * 15
2936       && !(pipe0_clock < clock && pipe_0 >= 0) && pipe_1 == pipe_ls)
2937     {
2938       insn = sched_emit_insn (gen_iprefetch (GEN_INT (3)));
2939       recog_memoized (insn);
2940       if (pipe0_clock < clock)
2941 	PUT_MODE (insn, TImode);
2942       spu_sched_variable_issue (file, verbose, insn, -1);
2943       return 0;
2944     }
2945 
2946   /* In general, we want to emit nops to increase dual issue, but dual
2947      issue isn't faster when one of the insns could be scheduled later
2948      without effecting the critical path.  We look at INSN_PRIORITY to
2949      make a good guess, but it isn't perfect so -mdual-nops=n can be
2950      used to effect it. */
2951   if (in_spu_reorg && spu_dual_nops < 10)
2952     {
2953       /* When we are at an even address and we are not issuing nops to
2954          improve scheduling then we need to advance the cycle.  */
2955       if ((spu_sched_length & 7) == 0 && prev_clock_var == clock
2956 	  && (spu_dual_nops == 0
2957 	      || (pipe_1 != -1
2958 		  && prev_priority >
2959 		  INSN_PRIORITY (ready[pipe_1]) + spu_dual_nops)))
2960 	return 0;
2961 
2962       /* When at an odd address, schedule the highest priority insn
2963          without considering pipeline. */
2964       if ((spu_sched_length & 7) == 4 && prev_clock_var != clock
2965 	  && (spu_dual_nops == 0
2966 	      || (prev_priority >
2967 		  INSN_PRIORITY (ready[nready - 1]) + spu_dual_nops)))
2968 	return 1;
2969     }
2970 
2971 
2972   /* We haven't issued a pipe0 insn yet this cycle, if there is a
2973      pipe0 insn in the ready list, schedule it. */
2974   if (pipe0_clock < clock && pipe_0 >= 0)
2975     schedule_i = pipe_0;
2976 
2977   /* Either we've scheduled a pipe0 insn already or there is no pipe0
2978      insn to schedule.  Put a pipe1 insn at the front of the ready list. */
2979   else
2980     schedule_i = pipe_1;
2981 
2982   if (schedule_i > -1)
2983     {
2984       insn = ready[schedule_i];
2985       ready[schedule_i] = ready[nready - 1];
2986       ready[nready - 1] = insn;
2987       return 1;
2988     }
2989   return 0;
2990 }
2991 
2992 /* INSN is dependent on DEP_INSN. */
2993 static int
spu_sched_adjust_cost(rtx_insn * insn,int dep_type,rtx_insn * dep_insn,int cost,unsigned int)2994 spu_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn,
2995 		       int cost, unsigned int)
2996 {
2997   rtx set;
2998 
2999   /* The blockage pattern is used to prevent instructions from being
3000      moved across it and has no cost. */
3001   if (INSN_CODE (insn) == CODE_FOR_blockage
3002       || INSN_CODE (dep_insn) == CODE_FOR_blockage)
3003     return 0;
3004 
3005   if ((INSN_P (insn) && get_attr_length (insn) == 0)
3006       || (INSN_P (dep_insn) && get_attr_length (dep_insn) == 0))
3007     return 0;
3008 
3009   /* Make sure hbrps are spread out. */
3010   if (INSN_CODE (insn) == CODE_FOR_iprefetch
3011       && INSN_CODE (dep_insn) == CODE_FOR_iprefetch)
3012     return 8;
3013 
3014   /* Make sure hints and hbrps are 2 cycles apart. */
3015   if ((INSN_CODE (insn) == CODE_FOR_iprefetch
3016        || INSN_CODE (insn) == CODE_FOR_hbr)
3017        && (INSN_CODE (dep_insn) == CODE_FOR_iprefetch
3018 	   || INSN_CODE (dep_insn) == CODE_FOR_hbr))
3019     return 2;
3020 
3021   /* An hbrp has no real dependency on other insns. */
3022   if (INSN_CODE (insn) == CODE_FOR_iprefetch
3023       || INSN_CODE (dep_insn) == CODE_FOR_iprefetch)
3024     return 0;
3025 
3026   /* Assuming that it is unlikely an argument register will be used in
3027      the first cycle of the called function, we reduce the cost for
3028      slightly better scheduling of dep_insn.  When not hinted, the
3029      mispredicted branch would hide the cost as well.  */
3030   if (CALL_P (insn))
3031   {
3032     rtx target = get_branch_target (insn);
3033     if (GET_CODE (target) != REG || !set_of (target, insn))
3034       return cost - 2;
3035     return cost;
3036   }
3037 
3038   /* And when returning from a function, let's assume the return values
3039      are completed sooner too. */
3040   if (CALL_P (dep_insn))
3041     return cost - 2;
3042 
3043   /* Make sure an instruction that loads from the back chain is schedule
3044      away from the return instruction so a hint is more likely to get
3045      issued. */
3046   if (INSN_CODE (insn) == CODE_FOR__return
3047       && (set = single_set (dep_insn))
3048       && GET_CODE (SET_DEST (set)) == REG
3049       && REGNO (SET_DEST (set)) == LINK_REGISTER_REGNUM)
3050     return 20;
3051 
3052   /* The dfa scheduler sets cost to 0 for all anti-dependencies and the
3053      scheduler makes every insn in a block anti-dependent on the final
3054      jump_insn.  We adjust here so higher cost insns will get scheduled
3055      earlier. */
3056   if (JUMP_P (insn) && dep_type == REG_DEP_ANTI)
3057     return insn_sched_cost (dep_insn) - 3;
3058 
3059   return cost;
3060 }
3061 
3062 /* Create a CONST_DOUBLE from a string.  */
3063 rtx
spu_float_const(const char * string,machine_mode mode)3064 spu_float_const (const char *string, machine_mode mode)
3065 {
3066   REAL_VALUE_TYPE value;
3067   value = REAL_VALUE_ATOF (string, mode);
3068   return const_double_from_real_value (value, mode);
3069 }
3070 
3071 int
spu_constant_address_p(rtx x)3072 spu_constant_address_p (rtx x)
3073 {
3074   return (GET_CODE (x) == LABEL_REF || GET_CODE (x) == SYMBOL_REF
3075 	  || GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST
3076 	  || GET_CODE (x) == HIGH);
3077 }
3078 
3079 static enum spu_immediate
which_immediate_load(HOST_WIDE_INT val)3080 which_immediate_load (HOST_WIDE_INT val)
3081 {
3082   gcc_assert (val == trunc_int_for_mode (val, SImode));
3083 
3084   if (val >= -0x8000 && val <= 0x7fff)
3085     return SPU_IL;
3086   if (val >= 0 && val <= 0x3ffff)
3087     return SPU_ILA;
3088   if ((val & 0xffff) == ((val >> 16) & 0xffff))
3089     return SPU_ILH;
3090   if ((val & 0xffff) == 0)
3091     return SPU_ILHU;
3092 
3093   return SPU_NONE;
3094 }
3095 
3096 /* Return true when OP can be loaded by one of the il instructions, or
3097    when flow2 is not completed and OP can be loaded using ilhu and iohl. */
3098 int
immediate_load_p(rtx op,machine_mode mode)3099 immediate_load_p (rtx op, machine_mode mode)
3100 {
3101   if (CONSTANT_P (op))
3102     {
3103       enum immediate_class c = classify_immediate (op, mode);
3104       return c == IC_IL1 || c == IC_IL1s
3105 	     || (!epilogue_completed && (c == IC_IL2 || c == IC_IL2s));
3106     }
3107   return 0;
3108 }
3109 
3110 /* Return true if the first SIZE bytes of arr is a constant that can be
3111    generated with cbd, chd, cwd or cdd.  When non-NULL, PRUN and PSTART
3112    represent the size and offset of the instruction to use. */
3113 static int
cpat_info(unsigned char * arr,int size,int * prun,int * pstart)3114 cpat_info(unsigned char *arr, int size, int *prun, int *pstart)
3115 {
3116   int cpat, run, i, start;
3117   cpat = 1;
3118   run = 0;
3119   start = -1;
3120   for (i = 0; i < size && cpat; i++)
3121     if (arr[i] != i+16)
3122       {
3123 	if (!run)
3124 	  {
3125 	    start = i;
3126 	    if (arr[i] == 3)
3127 	      run = 1;
3128 	    else if (arr[i] == 2 && arr[i+1] == 3)
3129 	      run = 2;
3130 	    else if (arr[i] == 0)
3131 	      {
3132 		while (arr[i+run] == run && i+run < 16)
3133 		  run++;
3134 		if (run != 4 && run != 8)
3135 		  cpat = 0;
3136 	      }
3137 	    else
3138 	      cpat = 0;
3139 	    if ((i & (run-1)) != 0)
3140 	      cpat = 0;
3141 	    i += run;
3142 	  }
3143 	else
3144 	  cpat = 0;
3145       }
3146   if (cpat && (run || size < 16))
3147     {
3148       if (run == 0)
3149 	run = 1;
3150       if (prun)
3151 	*prun = run;
3152       if (pstart)
3153 	*pstart = start == -1 ? 16-run : start;
3154       return 1;
3155     }
3156   return 0;
3157 }
3158 
3159 /* OP is a CONSTANT_P.  Determine what instructions can be used to load
3160    it into a register.  MODE is only valid when OP is a CONST_INT. */
3161 static enum immediate_class
classify_immediate(rtx op,machine_mode mode)3162 classify_immediate (rtx op, machine_mode mode)
3163 {
3164   HOST_WIDE_INT val;
3165   unsigned char arr[16];
3166   int i, j, repeated, fsmbi, repeat;
3167 
3168   gcc_assert (CONSTANT_P (op));
3169 
3170   if (GET_MODE (op) != VOIDmode)
3171     mode = GET_MODE (op);
3172 
3173   /* A V4SI const_vector with all identical symbols is ok. */
3174   if (!flag_pic
3175       && mode == V4SImode
3176       && GET_CODE (op) == CONST_VECTOR
3177       && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_INT
3178       && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_DOUBLE)
3179     op = unwrap_const_vec_duplicate (op);
3180 
3181   switch (GET_CODE (op))
3182     {
3183     case SYMBOL_REF:
3184     case LABEL_REF:
3185       return TARGET_LARGE_MEM ? IC_IL2s : IC_IL1s;
3186 
3187     case CONST:
3188       /* We can never know if the resulting address fits in 18 bits and can be
3189 	 loaded with ila.  For now, assume the address will not overflow if
3190 	 the displacement is "small" (fits 'K' constraint).  */
3191       if (!TARGET_LARGE_MEM && GET_CODE (XEXP (op, 0)) == PLUS)
3192 	{
3193 	  rtx sym = XEXP (XEXP (op, 0), 0);
3194 	  rtx cst = XEXP (XEXP (op, 0), 1);
3195 
3196 	  if (GET_CODE (sym) == SYMBOL_REF
3197 	      && GET_CODE (cst) == CONST_INT
3198 	      && satisfies_constraint_K (cst))
3199 	    return IC_IL1s;
3200 	}
3201       return IC_IL2s;
3202 
3203     case HIGH:
3204       return IC_IL1s;
3205 
3206     case CONST_VECTOR:
3207       for (i = 0; i < GET_MODE_NUNITS (mode); i++)
3208 	if (GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_INT
3209 	    && GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_DOUBLE)
3210 	  return IC_POOL;
3211       /* Fall through. */
3212 
3213     case CONST_INT:
3214     case CONST_DOUBLE:
3215       constant_to_array (mode, op, arr);
3216 
3217       /* Check that each 4-byte slot is identical. */
3218       repeated = 1;
3219       for (i = 4; i < 16; i += 4)
3220 	for (j = 0; j < 4; j++)
3221 	  if (arr[j] != arr[i + j])
3222 	    repeated = 0;
3223 
3224       if (repeated)
3225 	{
3226 	  val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3227 	  val = trunc_int_for_mode (val, SImode);
3228 
3229 	  if (which_immediate_load (val) != SPU_NONE)
3230 	    return IC_IL1;
3231 	}
3232 
3233       /* Any mode of 2 bytes or smaller can be loaded with an il
3234          instruction. */
3235       gcc_assert (GET_MODE_SIZE (mode) > 2);
3236 
3237       fsmbi = 1;
3238       repeat = 0;
3239       for (i = 0; i < 16 && fsmbi; i++)
3240 	if (arr[i] != 0 && repeat == 0)
3241 	  repeat = arr[i];
3242 	else if (arr[i] != 0 && arr[i] != repeat)
3243 	  fsmbi = 0;
3244       if (fsmbi)
3245 	return repeat == 0xff ? IC_FSMBI : IC_FSMBI2;
3246 
3247       if (cpat_info (arr, GET_MODE_SIZE (mode), 0, 0))
3248 	return IC_CPAT;
3249 
3250       if (repeated)
3251 	return IC_IL2;
3252 
3253       return IC_POOL;
3254     default:
3255       break;
3256     }
3257   gcc_unreachable ();
3258 }
3259 
3260 static enum spu_immediate
which_logical_immediate(HOST_WIDE_INT val)3261 which_logical_immediate (HOST_WIDE_INT val)
3262 {
3263   gcc_assert (val == trunc_int_for_mode (val, SImode));
3264 
3265   if (val >= -0x200 && val <= 0x1ff)
3266     return SPU_ORI;
3267   if (val >= 0 && val <= 0xffff)
3268     return SPU_IOHL;
3269   if ((val & 0xffff) == ((val >> 16) & 0xffff))
3270     {
3271       val = trunc_int_for_mode (val, HImode);
3272       if (val >= -0x200 && val <= 0x1ff)
3273 	return SPU_ORHI;
3274       if ((val & 0xff) == ((val >> 8) & 0xff))
3275 	{
3276 	  val = trunc_int_for_mode (val, QImode);
3277 	  if (val >= -0x200 && val <= 0x1ff)
3278 	    return SPU_ORBI;
3279 	}
3280     }
3281   return SPU_NONE;
3282 }
3283 
3284 /* Return TRUE when X, a CONST_VECTOR, only contains CONST_INTs or
3285    CONST_DOUBLEs. */
3286 static int
const_vector_immediate_p(rtx x)3287 const_vector_immediate_p (rtx x)
3288 {
3289   int i;
3290   gcc_assert (GET_CODE (x) == CONST_VECTOR);
3291   for (i = 0; i < GET_MODE_NUNITS (GET_MODE (x)); i++)
3292     if (GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_INT
3293 	&& GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_DOUBLE)
3294       return 0;
3295   return 1;
3296 }
3297 
3298 int
logical_immediate_p(rtx op,machine_mode mode)3299 logical_immediate_p (rtx op, machine_mode mode)
3300 {
3301   HOST_WIDE_INT val;
3302   unsigned char arr[16];
3303   int i, j;
3304 
3305   gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3306 	      || GET_CODE (op) == CONST_VECTOR);
3307 
3308   if (GET_CODE (op) == CONST_VECTOR
3309       && !const_vector_immediate_p (op))
3310     return 0;
3311 
3312   if (GET_MODE (op) != VOIDmode)
3313     mode = GET_MODE (op);
3314 
3315   constant_to_array (mode, op, arr);
3316 
3317   /* Check that bytes are repeated. */
3318   for (i = 4; i < 16; i += 4)
3319     for (j = 0; j < 4; j++)
3320       if (arr[j] != arr[i + j])
3321 	return 0;
3322 
3323   val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3324   val = trunc_int_for_mode (val, SImode);
3325 
3326   i = which_logical_immediate (val);
3327   return i != SPU_NONE && i != SPU_IOHL;
3328 }
3329 
3330 int
iohl_immediate_p(rtx op,machine_mode mode)3331 iohl_immediate_p (rtx op, machine_mode mode)
3332 {
3333   HOST_WIDE_INT val;
3334   unsigned char arr[16];
3335   int i, j;
3336 
3337   gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3338 	      || GET_CODE (op) == CONST_VECTOR);
3339 
3340   if (GET_CODE (op) == CONST_VECTOR
3341       && !const_vector_immediate_p (op))
3342     return 0;
3343 
3344   if (GET_MODE (op) != VOIDmode)
3345     mode = GET_MODE (op);
3346 
3347   constant_to_array (mode, op, arr);
3348 
3349   /* Check that bytes are repeated. */
3350   for (i = 4; i < 16; i += 4)
3351     for (j = 0; j < 4; j++)
3352       if (arr[j] != arr[i + j])
3353 	return 0;
3354 
3355   val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3356   val = trunc_int_for_mode (val, SImode);
3357 
3358   return val >= 0 && val <= 0xffff;
3359 }
3360 
3361 int
arith_immediate_p(rtx op,machine_mode mode,HOST_WIDE_INT low,HOST_WIDE_INT high)3362 arith_immediate_p (rtx op, machine_mode mode,
3363 		   HOST_WIDE_INT low, HOST_WIDE_INT high)
3364 {
3365   HOST_WIDE_INT val;
3366   unsigned char arr[16];
3367   int bytes, i, j;
3368 
3369   gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3370 	      || GET_CODE (op) == CONST_VECTOR);
3371 
3372   if (GET_CODE (op) == CONST_VECTOR
3373       && !const_vector_immediate_p (op))
3374     return 0;
3375 
3376   if (GET_MODE (op) != VOIDmode)
3377     mode = GET_MODE (op);
3378 
3379   constant_to_array (mode, op, arr);
3380 
3381   bytes = GET_MODE_UNIT_SIZE (mode);
3382   mode = int_mode_for_mode (GET_MODE_INNER (mode)).require ();
3383 
3384   /* Check that bytes are repeated. */
3385   for (i = bytes; i < 16; i += bytes)
3386     for (j = 0; j < bytes; j++)
3387       if (arr[j] != arr[i + j])
3388 	return 0;
3389 
3390   val = arr[0];
3391   for (j = 1; j < bytes; j++)
3392     val = (val << 8) | arr[j];
3393 
3394   val = trunc_int_for_mode (val, mode);
3395 
3396   return val >= low && val <= high;
3397 }
3398 
3399 /* TRUE when op is an immediate and an exact power of 2, and given that
3400    OP is 2^scale, scale >= LOW && scale <= HIGH.  When OP is a vector,
3401    all entries must be the same. */
3402 bool
exp2_immediate_p(rtx op,machine_mode mode,int low,int high)3403 exp2_immediate_p (rtx op, machine_mode mode, int low, int high)
3404 {
3405   machine_mode int_mode;
3406   HOST_WIDE_INT val;
3407   unsigned char arr[16];
3408   int bytes, i, j;
3409 
3410   gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3411 	      || GET_CODE (op) == CONST_VECTOR);
3412 
3413   if (GET_CODE (op) == CONST_VECTOR
3414       && !const_vector_immediate_p (op))
3415     return 0;
3416 
3417   if (GET_MODE (op) != VOIDmode)
3418     mode = GET_MODE (op);
3419 
3420   constant_to_array (mode, op, arr);
3421 
3422   mode = GET_MODE_INNER (mode);
3423 
3424   bytes = GET_MODE_SIZE (mode);
3425   int_mode = int_mode_for_mode (mode).require ();
3426 
3427   /* Check that bytes are repeated. */
3428   for (i = bytes; i < 16; i += bytes)
3429     for (j = 0; j < bytes; j++)
3430       if (arr[j] != arr[i + j])
3431 	return 0;
3432 
3433   val = arr[0];
3434   for (j = 1; j < bytes; j++)
3435     val = (val << 8) | arr[j];
3436 
3437   val = trunc_int_for_mode (val, int_mode);
3438 
3439   /* Currently, we only handle SFmode */
3440   gcc_assert (mode == SFmode);
3441   if (mode == SFmode)
3442     {
3443       int exp = (val >> 23) - 127;
3444       return val > 0 && (val & 0x007fffff) == 0
3445 	     &&  exp >= low && exp <= high;
3446     }
3447   return FALSE;
3448 }
3449 
3450 /* Return true if X is a SYMBOL_REF to an __ea qualified variable.  */
3451 
3452 static bool
ea_symbol_ref_p(const_rtx x)3453 ea_symbol_ref_p (const_rtx x)
3454 {
3455   tree decl;
3456 
3457   if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
3458     {
3459       rtx plus = XEXP (x, 0);
3460       rtx op0 = XEXP (plus, 0);
3461       rtx op1 = XEXP (plus, 1);
3462       if (GET_CODE (op1) == CONST_INT)
3463 	x = op0;
3464     }
3465 
3466   return (GET_CODE (x) == SYMBOL_REF
3467  	  && (decl = SYMBOL_REF_DECL (x)) != 0
3468  	  && TREE_CODE (decl) == VAR_DECL
3469  	  && TYPE_ADDR_SPACE (TREE_TYPE (decl)));
3470 }
3471 
3472 /* We accept:
3473    - any 32-bit constant (SImode, SFmode)
3474    - any constant that can be generated with fsmbi (any mode)
3475    - a 64-bit constant where the high and low bits are identical
3476      (DImode, DFmode)
3477    - a 128-bit constant where the four 32-bit words match.  */
3478 bool
spu_legitimate_constant_p(machine_mode mode,rtx x)3479 spu_legitimate_constant_p (machine_mode mode, rtx x)
3480 {
3481   subrtx_iterator::array_type array;
3482   if (GET_CODE (x) == HIGH)
3483     x = XEXP (x, 0);
3484 
3485   /* Reject any __ea qualified reference.  These can't appear in
3486      instructions but must be forced to the constant pool.  */
3487   FOR_EACH_SUBRTX (iter, array, x, ALL)
3488     if (ea_symbol_ref_p (*iter))
3489       return 0;
3490 
3491   /* V4SI with all identical symbols is valid. */
3492   if (!flag_pic
3493       && mode == V4SImode
3494       && (GET_CODE (CONST_VECTOR_ELT (x, 0)) == SYMBOL_REF
3495 	  || GET_CODE (CONST_VECTOR_ELT (x, 0)) == LABEL_REF
3496 	  || GET_CODE (CONST_VECTOR_ELT (x, 0)) == CONST))
3497     return const_vec_duplicate_p (x);
3498 
3499   if (GET_CODE (x) == CONST_VECTOR
3500       && !const_vector_immediate_p (x))
3501     return 0;
3502   return 1;
3503 }
3504 
3505 /* Valid address are:
3506    - symbol_ref, label_ref, const
3507    - reg
3508    - reg + const_int, where const_int is 16 byte aligned
3509    - reg + reg, alignment doesn't matter
3510   The alignment matters in the reg+const case because lqd and stqd
3511   ignore the 4 least significant bits of the const.  We only care about
3512   16 byte modes because the expand phase will change all smaller MEM
3513   references to TImode.  */
3514 static bool
spu_legitimate_address_p(machine_mode mode,rtx x,bool reg_ok_strict)3515 spu_legitimate_address_p (machine_mode mode,
3516 			  rtx x, bool reg_ok_strict)
3517 {
3518   int aligned = GET_MODE_SIZE (mode) >= 16;
3519   if (aligned
3520       && GET_CODE (x) == AND
3521       && GET_CODE (XEXP (x, 1)) == CONST_INT
3522       && INTVAL (XEXP (x, 1)) == (HOST_WIDE_INT) - 16)
3523     x = XEXP (x, 0);
3524   switch (GET_CODE (x))
3525     {
3526     case LABEL_REF:
3527       return !TARGET_LARGE_MEM;
3528 
3529     case SYMBOL_REF:
3530     case CONST:
3531       /* Keep __ea references until reload so that spu_expand_mov can see them
3532 	 in MEMs.  */
3533       if (ea_symbol_ref_p (x))
3534 	return !reload_in_progress && !reload_completed;
3535       return !TARGET_LARGE_MEM;
3536 
3537     case CONST_INT:
3538       return INTVAL (x) >= 0 && INTVAL (x) <= 0x3ffff;
3539 
3540     case SUBREG:
3541       x = XEXP (x, 0);
3542       if (!REG_P (x))
3543 	return 0;
3544       /* FALLTHRU */
3545 
3546     case REG:
3547       return INT_REG_OK_FOR_BASE_P (x, reg_ok_strict);
3548 
3549     case PLUS:
3550     case LO_SUM:
3551       {
3552 	rtx op0 = XEXP (x, 0);
3553 	rtx op1 = XEXP (x, 1);
3554 	if (GET_CODE (op0) == SUBREG)
3555 	  op0 = XEXP (op0, 0);
3556 	if (GET_CODE (op1) == SUBREG)
3557 	  op1 = XEXP (op1, 0);
3558 	if (GET_CODE (op0) == REG
3559 	    && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
3560 	    && GET_CODE (op1) == CONST_INT
3561 	    && ((INTVAL (op1) >= -0x2000 && INTVAL (op1) <= 0x1fff)
3562 		/* If virtual registers are involved, the displacement will
3563 		   change later on anyway, so checking would be premature.
3564 		   Reload will make sure the final displacement after
3565 		   register elimination is OK.  */
3566 		|| op0 == arg_pointer_rtx
3567 		|| op0 == frame_pointer_rtx
3568 		|| op0 == virtual_stack_vars_rtx)
3569 	    && (!aligned || (INTVAL (op1) & 15) == 0))
3570 	  return TRUE;
3571 	if (GET_CODE (op0) == REG
3572 	    && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
3573 	    && GET_CODE (op1) == REG
3574 	    && INT_REG_OK_FOR_INDEX_P (op1, reg_ok_strict))
3575 	  return TRUE;
3576       }
3577       break;
3578 
3579     default:
3580       break;
3581     }
3582   return FALSE;
3583 }
3584 
3585 /* Like spu_legitimate_address_p, except with named addresses.  */
3586 static bool
spu_addr_space_legitimate_address_p(machine_mode mode,rtx x,bool reg_ok_strict,addr_space_t as)3587 spu_addr_space_legitimate_address_p (machine_mode mode, rtx x,
3588 				     bool reg_ok_strict, addr_space_t as)
3589 {
3590   if (as == ADDR_SPACE_EA)
3591     return (REG_P (x) && (GET_MODE (x) == EAmode));
3592 
3593   else if (as != ADDR_SPACE_GENERIC)
3594     gcc_unreachable ();
3595 
3596   return spu_legitimate_address_p (mode, x, reg_ok_strict);
3597 }
3598 
3599 /* When the address is reg + const_int, force the const_int into a
3600    register.  */
3601 static rtx
spu_legitimize_address(rtx x,rtx oldx ATTRIBUTE_UNUSED,machine_mode mode ATTRIBUTE_UNUSED)3602 spu_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
3603 			machine_mode mode ATTRIBUTE_UNUSED)
3604 {
3605   rtx op0, op1;
3606   /* Make sure both operands are registers.  */
3607   if (GET_CODE (x) == PLUS)
3608     {
3609       op0 = XEXP (x, 0);
3610       op1 = XEXP (x, 1);
3611       if (ALIGNED_SYMBOL_REF_P (op0))
3612 	{
3613 	  op0 = force_reg (Pmode, op0);
3614 	  mark_reg_pointer (op0, 128);
3615 	}
3616       else if (GET_CODE (op0) != REG)
3617 	op0 = force_reg (Pmode, op0);
3618       if (ALIGNED_SYMBOL_REF_P (op1))
3619 	{
3620 	  op1 = force_reg (Pmode, op1);
3621 	  mark_reg_pointer (op1, 128);
3622 	}
3623       else if (GET_CODE (op1) != REG)
3624 	op1 = force_reg (Pmode, op1);
3625       x = gen_rtx_PLUS (Pmode, op0, op1);
3626     }
3627   return x;
3628 }
3629 
3630 /* Like spu_legitimate_address, except with named address support.  */
3631 static rtx
spu_addr_space_legitimize_address(rtx x,rtx oldx,machine_mode mode,addr_space_t as)3632 spu_addr_space_legitimize_address (rtx x, rtx oldx, machine_mode mode,
3633 				   addr_space_t as)
3634 {
3635   if (as != ADDR_SPACE_GENERIC)
3636     return x;
3637 
3638   return spu_legitimize_address (x, oldx, mode);
3639 }
3640 
3641 /* Reload reg + const_int for out-of-range displacements.  */
3642 rtx
spu_legitimize_reload_address(rtx ad,machine_mode mode ATTRIBUTE_UNUSED,int opnum,int type)3643 spu_legitimize_reload_address (rtx ad, machine_mode mode ATTRIBUTE_UNUSED,
3644 			       int opnum, int type)
3645 {
3646   bool removed_and = false;
3647 
3648   if (GET_CODE (ad) == AND
3649       && CONST_INT_P (XEXP (ad, 1))
3650       && INTVAL (XEXP (ad, 1)) == (HOST_WIDE_INT) - 16)
3651     {
3652       ad = XEXP (ad, 0);
3653       removed_and = true;
3654     }
3655 
3656   if (GET_CODE (ad) == PLUS
3657       && REG_P (XEXP (ad, 0))
3658       && CONST_INT_P (XEXP (ad, 1))
3659       && !(INTVAL (XEXP (ad, 1)) >= -0x2000
3660 	   && INTVAL (XEXP (ad, 1)) <= 0x1fff))
3661     {
3662       /* Unshare the sum.  */
3663       ad = copy_rtx (ad);
3664 
3665       /* Reload the displacement.  */
3666       push_reload (XEXP (ad, 1), NULL_RTX, &XEXP (ad, 1), NULL,
3667 		   BASE_REG_CLASS, GET_MODE (ad), VOIDmode, 0, 0,
3668 		   opnum, (enum reload_type) type);
3669 
3670       /* Add back AND for alignment if we stripped it.  */
3671       if (removed_and)
3672 	ad = gen_rtx_AND (GET_MODE (ad), ad, GEN_INT (-16));
3673 
3674       return ad;
3675     }
3676 
3677   return NULL_RTX;
3678 }
3679 
3680 /* Handle an attribute requiring a FUNCTION_DECL; arguments as in
3681    struct attribute_spec.handler.  */
3682 static tree
spu_handle_fndecl_attribute(tree * node,tree name,tree args ATTRIBUTE_UNUSED,int flags ATTRIBUTE_UNUSED,bool * no_add_attrs)3683 spu_handle_fndecl_attribute (tree * node,
3684 			     tree name,
3685 			     tree args ATTRIBUTE_UNUSED,
3686 			     int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
3687 {
3688   if (TREE_CODE (*node) != FUNCTION_DECL)
3689     {
3690       warning (0, "%qE attribute only applies to functions",
3691 	       name);
3692       *no_add_attrs = true;
3693     }
3694 
3695   return NULL_TREE;
3696 }
3697 
3698 /* Handle the "vector" attribute.  */
3699 static tree
spu_handle_vector_attribute(tree * node,tree name,tree args ATTRIBUTE_UNUSED,int flags ATTRIBUTE_UNUSED,bool * no_add_attrs)3700 spu_handle_vector_attribute (tree * node, tree name,
3701 			     tree args ATTRIBUTE_UNUSED,
3702 			     int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
3703 {
3704   tree type = *node, result = NULL_TREE;
3705   machine_mode mode;
3706   int unsigned_p;
3707 
3708   while (POINTER_TYPE_P (type)
3709 	 || TREE_CODE (type) == FUNCTION_TYPE
3710 	 || TREE_CODE (type) == METHOD_TYPE || TREE_CODE (type) == ARRAY_TYPE)
3711     type = TREE_TYPE (type);
3712 
3713   mode = TYPE_MODE (type);
3714 
3715   unsigned_p = TYPE_UNSIGNED (type);
3716   switch (mode)
3717     {
3718     case E_DImode:
3719       result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
3720       break;
3721     case E_SImode:
3722       result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
3723       break;
3724     case E_HImode:
3725       result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
3726       break;
3727     case E_QImode:
3728       result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
3729       break;
3730     case E_SFmode:
3731       result = V4SF_type_node;
3732       break;
3733     case E_DFmode:
3734       result = V2DF_type_node;
3735       break;
3736     default:
3737       break;
3738     }
3739 
3740   /* Propagate qualifiers attached to the element type
3741      onto the vector type.  */
3742   if (result && result != type && TYPE_QUALS (type))
3743     result = build_qualified_type (result, TYPE_QUALS (type));
3744 
3745   *no_add_attrs = true;		/* No need to hang on to the attribute.  */
3746 
3747   if (!result)
3748     warning (0, "%qE attribute ignored", name);
3749   else
3750     *node = lang_hooks.types.reconstruct_complex_type (*node, result);
3751 
3752   return NULL_TREE;
3753 }
3754 
3755 /* Return nonzero if FUNC is a naked function.  */
3756 static int
spu_naked_function_p(tree func)3757 spu_naked_function_p (tree func)
3758 {
3759   tree a;
3760 
3761   if (TREE_CODE (func) != FUNCTION_DECL)
3762     abort ();
3763 
3764   a = lookup_attribute ("naked", DECL_ATTRIBUTES (func));
3765   return a != NULL_TREE;
3766 }
3767 
3768 int
spu_initial_elimination_offset(int from,int to)3769 spu_initial_elimination_offset (int from, int to)
3770 {
3771   int saved_regs_size = spu_saved_regs_size ();
3772   int sp_offset = 0;
3773   if (!crtl->is_leaf || crtl->outgoing_args_size
3774       || get_frame_size () || saved_regs_size)
3775     sp_offset = STACK_POINTER_OFFSET;
3776   if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
3777     return get_frame_size () + crtl->outgoing_args_size + sp_offset;
3778   else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3779     return get_frame_size ();
3780   else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
3781     return sp_offset + crtl->outgoing_args_size
3782       + get_frame_size () + saved_regs_size + STACK_POINTER_OFFSET;
3783   else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3784     return get_frame_size () + saved_regs_size + sp_offset;
3785   else
3786     gcc_unreachable ();
3787 }
3788 
3789 rtx
spu_function_value(const_tree type,const_tree func ATTRIBUTE_UNUSED)3790 spu_function_value (const_tree type, const_tree func ATTRIBUTE_UNUSED)
3791 {
3792   machine_mode mode = TYPE_MODE (type);
3793   int byte_size = ((mode == BLKmode)
3794 		   ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3795 
3796   /* Make sure small structs are left justified in a register. */
3797   if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
3798       && byte_size <= UNITS_PER_WORD * MAX_REGISTER_RETURN && byte_size > 0)
3799     {
3800       machine_mode smode;
3801       rtvec v;
3802       int i;
3803       int nregs = (byte_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3804       int n = byte_size / UNITS_PER_WORD;
3805       v = rtvec_alloc (nregs);
3806       for (i = 0; i < n; i++)
3807 	{
3808 	  RTVEC_ELT (v, i) = gen_rtx_EXPR_LIST (VOIDmode,
3809 						gen_rtx_REG (TImode,
3810 							     FIRST_RETURN_REGNUM
3811 							     + i),
3812 						GEN_INT (UNITS_PER_WORD * i));
3813 	  byte_size -= UNITS_PER_WORD;
3814 	}
3815 
3816       if (n < nregs)
3817 	{
3818 	  if (byte_size < 4)
3819 	    byte_size = 4;
3820 	  smode = smallest_int_mode_for_size (byte_size * BITS_PER_UNIT);
3821 	  RTVEC_ELT (v, n) =
3822 	    gen_rtx_EXPR_LIST (VOIDmode,
3823 			       gen_rtx_REG (smode, FIRST_RETURN_REGNUM + n),
3824 			       GEN_INT (UNITS_PER_WORD * n));
3825 	}
3826       return gen_rtx_PARALLEL (mode, v);
3827     }
3828   return gen_rtx_REG (mode, FIRST_RETURN_REGNUM);
3829 }
3830 
3831 static rtx
spu_function_arg(cumulative_args_t cum_v,machine_mode mode,const_tree type,bool named ATTRIBUTE_UNUSED)3832 spu_function_arg (cumulative_args_t cum_v,
3833 		  machine_mode mode,
3834 		  const_tree type, bool named ATTRIBUTE_UNUSED)
3835 {
3836   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
3837   int byte_size;
3838 
3839   if (*cum >= MAX_REGISTER_ARGS)
3840     return 0;
3841 
3842   byte_size = ((mode == BLKmode)
3843 	       ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3844 
3845   /* The ABI does not allow parameters to be passed partially in
3846      reg and partially in stack. */
3847   if ((*cum + (byte_size + 15) / 16) > MAX_REGISTER_ARGS)
3848     return 0;
3849 
3850   /* Make sure small structs are left justified in a register. */
3851   if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
3852       && byte_size < UNITS_PER_WORD && byte_size > 0)
3853     {
3854       machine_mode smode;
3855       rtx gr_reg;
3856       if (byte_size < 4)
3857 	byte_size = 4;
3858       smode = smallest_int_mode_for_size (byte_size * BITS_PER_UNIT);
3859       gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
3860 				  gen_rtx_REG (smode, FIRST_ARG_REGNUM + *cum),
3861 				  const0_rtx);
3862       return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg));
3863     }
3864   else
3865     return gen_rtx_REG (mode, FIRST_ARG_REGNUM + *cum);
3866 }
3867 
3868 static void
spu_function_arg_advance(cumulative_args_t cum_v,machine_mode mode,const_tree type,bool named ATTRIBUTE_UNUSED)3869 spu_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
3870 			  const_tree type, bool named ATTRIBUTE_UNUSED)
3871 {
3872   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
3873 
3874   *cum += (type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
3875 	   ? 1
3876 	   : mode == BLKmode
3877 	   ? ((int_size_in_bytes (type) + 15) / 16)
3878 	   : mode == VOIDmode
3879 	   ? 1
3880 	   : spu_hard_regno_nregs (FIRST_ARG_REGNUM, mode));
3881 }
3882 
3883 /* Implement TARGET_FUNCTION_ARG_OFFSET.  The SPU ABI wants 32/64-bit
3884    types at offset 0 in the quad-word on the stack.  8/16-bit types
3885    should be at offsets 3/2 respectively.  */
3886 
3887 static HOST_WIDE_INT
spu_function_arg_offset(machine_mode mode,const_tree type)3888 spu_function_arg_offset (machine_mode mode, const_tree type)
3889 {
3890   if (type && INTEGRAL_TYPE_P (type) && GET_MODE_SIZE (mode) < 4)
3891     return 4 - GET_MODE_SIZE (mode);
3892   return 0;
3893 }
3894 
3895 /* Implement TARGET_FUNCTION_ARG_PADDING.  */
3896 
3897 static pad_direction
spu_function_arg_padding(machine_mode,const_tree)3898 spu_function_arg_padding (machine_mode, const_tree)
3899 {
3900   return PAD_UPWARD;
3901 }
3902 
3903 /* Variable sized types are passed by reference.  */
3904 static bool
spu_pass_by_reference(cumulative_args_t cum ATTRIBUTE_UNUSED,machine_mode mode ATTRIBUTE_UNUSED,const_tree type,bool named ATTRIBUTE_UNUSED)3905 spu_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
3906 		       machine_mode mode ATTRIBUTE_UNUSED,
3907 		       const_tree type, bool named ATTRIBUTE_UNUSED)
3908 {
3909   return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
3910 }
3911 
3912 
3913 /* Var args. */
3914 
3915 /* Create and return the va_list datatype.
3916 
3917    On SPU, va_list is an array type equivalent to
3918 
3919       typedef struct __va_list_tag
3920         {
3921             void *__args __attribute__((__aligned(16)));
3922             void *__skip __attribute__((__aligned(16)));
3923 
3924         } va_list[1];
3925 
3926    where __args points to the arg that will be returned by the next
3927    va_arg(), and __skip points to the previous stack frame such that
3928    when __args == __skip we should advance __args by 32 bytes. */
3929 static tree
spu_build_builtin_va_list(void)3930 spu_build_builtin_va_list (void)
3931 {
3932   tree f_args, f_skip, record, type_decl;
3933   bool owp;
3934 
3935   record = (*lang_hooks.types.make_type) (RECORD_TYPE);
3936 
3937   type_decl =
3938     build_decl (BUILTINS_LOCATION,
3939 		TYPE_DECL, get_identifier ("__va_list_tag"), record);
3940 
3941   f_args = build_decl (BUILTINS_LOCATION,
3942 		       FIELD_DECL, get_identifier ("__args"), ptr_type_node);
3943   f_skip = build_decl (BUILTINS_LOCATION,
3944 		       FIELD_DECL, get_identifier ("__skip"), ptr_type_node);
3945 
3946   DECL_FIELD_CONTEXT (f_args) = record;
3947   SET_DECL_ALIGN (f_args, 128);
3948   DECL_USER_ALIGN (f_args) = 1;
3949 
3950   DECL_FIELD_CONTEXT (f_skip) = record;
3951   SET_DECL_ALIGN (f_skip, 128);
3952   DECL_USER_ALIGN (f_skip) = 1;
3953 
3954   TYPE_STUB_DECL (record) = type_decl;
3955   TYPE_NAME (record) = type_decl;
3956   TYPE_FIELDS (record) = f_args;
3957   DECL_CHAIN (f_args) = f_skip;
3958 
3959   /* We know this is being padded and we want it too.  It is an internal
3960      type so hide the warnings from the user. */
3961   owp = warn_padded;
3962   warn_padded = false;
3963 
3964   layout_type (record);
3965 
3966   warn_padded = owp;
3967 
3968   /* The correct type is an array type of one element.  */
3969   return build_array_type (record, build_index_type (size_zero_node));
3970 }
3971 
3972 /* Implement va_start by filling the va_list structure VALIST.
3973    NEXTARG points to the first anonymous stack argument.
3974 
3975    The following global variables are used to initialize
3976    the va_list structure:
3977 
3978      crtl->args.info;
3979        the CUMULATIVE_ARGS for this function
3980 
3981      crtl->args.arg_offset_rtx:
3982        holds the offset of the first anonymous stack argument
3983        (relative to the virtual arg pointer).  */
3984 
3985 static void
spu_va_start(tree valist,rtx nextarg)3986 spu_va_start (tree valist, rtx nextarg)
3987 {
3988   tree f_args, f_skip;
3989   tree args, skip, t;
3990 
3991   f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3992   f_skip = DECL_CHAIN (f_args);
3993 
3994   valist = build_simple_mem_ref (valist);
3995   args =
3996     build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
3997   skip =
3998     build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
3999 
4000   /* Find the __args area.  */
4001   t = make_tree (TREE_TYPE (args), nextarg);
4002   if (crtl->args.pretend_args_size > 0)
4003     t = fold_build_pointer_plus_hwi (t, -STACK_POINTER_OFFSET);
4004   t = build2 (MODIFY_EXPR, TREE_TYPE (args), args, t);
4005   TREE_SIDE_EFFECTS (t) = 1;
4006   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4007 
4008   /* Find the __skip area.  */
4009   t = make_tree (TREE_TYPE (skip), virtual_incoming_args_rtx);
4010   t = fold_build_pointer_plus_hwi (t, (crtl->args.pretend_args_size
4011 				       - STACK_POINTER_OFFSET));
4012   t = build2 (MODIFY_EXPR, TREE_TYPE (skip), skip, t);
4013   TREE_SIDE_EFFECTS (t) = 1;
4014   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4015 }
4016 
4017 /* Gimplify va_arg by updating the va_list structure
4018    VALIST as required to retrieve an argument of type
4019    TYPE, and returning that argument.
4020 
4021    ret = va_arg(VALIST, TYPE);
4022 
4023    generates code equivalent to:
4024 
4025     paddedsize = (sizeof(TYPE) + 15) & -16;
4026     if (VALIST.__args + paddedsize > VALIST.__skip
4027 	&& VALIST.__args <= VALIST.__skip)
4028       addr = VALIST.__skip + 32;
4029     else
4030       addr = VALIST.__args;
4031     VALIST.__args = addr + paddedsize;
4032     ret = *(TYPE *)addr;
4033  */
4034 static tree
spu_gimplify_va_arg_expr(tree valist,tree type,gimple_seq * pre_p,gimple_seq * post_p ATTRIBUTE_UNUSED)4035 spu_gimplify_va_arg_expr (tree valist, tree type, gimple_seq * pre_p,
4036 			  gimple_seq * post_p ATTRIBUTE_UNUSED)
4037 {
4038   tree f_args, f_skip;
4039   tree args, skip;
4040   HOST_WIDE_INT size, rsize;
4041   tree addr, tmp;
4042   bool pass_by_reference_p;
4043 
4044   f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4045   f_skip = DECL_CHAIN (f_args);
4046 
4047   args =
4048     build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
4049   skip =
4050     build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
4051 
4052   addr = create_tmp_var (ptr_type_node, "va_arg");
4053 
4054   /* if an object is dynamically sized, a pointer to it is passed
4055      instead of the object itself. */
4056   pass_by_reference_p = pass_by_reference (NULL, TYPE_MODE (type), type,
4057 					   false);
4058   if (pass_by_reference_p)
4059     type = build_pointer_type (type);
4060   size = int_size_in_bytes (type);
4061   rsize = ((size + UNITS_PER_WORD - 1) / UNITS_PER_WORD) * UNITS_PER_WORD;
4062 
4063   /* build conditional expression to calculate addr. The expression
4064      will be gimplified later. */
4065   tmp = fold_build_pointer_plus_hwi (unshare_expr (args), rsize);
4066   tmp = build2 (TRUTH_AND_EXPR, boolean_type_node,
4067 		build2 (GT_EXPR, boolean_type_node, tmp, unshare_expr (skip)),
4068 		build2 (LE_EXPR, boolean_type_node, unshare_expr (args),
4069 		unshare_expr (skip)));
4070 
4071   tmp = build3 (COND_EXPR, ptr_type_node, tmp,
4072 		fold_build_pointer_plus_hwi (unshare_expr (skip), 32),
4073 		unshare_expr (args));
4074 
4075   gimplify_assign (addr, tmp, pre_p);
4076 
4077   /* update VALIST.__args */
4078   tmp = fold_build_pointer_plus_hwi (addr, rsize);
4079   gimplify_assign (unshare_expr (args), tmp, pre_p);
4080 
4081   addr = fold_convert (build_pointer_type_for_mode (type, ptr_mode, true),
4082 		       addr);
4083 
4084   if (pass_by_reference_p)
4085     addr = build_va_arg_indirect_ref (addr);
4086 
4087   return build_va_arg_indirect_ref (addr);
4088 }
4089 
4090 /* Save parameter registers starting with the register that corresponds
4091    to the first unnamed parameters.  If the first unnamed parameter is
4092    in the stack then save no registers.  Set pretend_args_size to the
4093    amount of space needed to save the registers. */
4094 static void
spu_setup_incoming_varargs(cumulative_args_t cum,machine_mode mode,tree type,int * pretend_size,int no_rtl)4095 spu_setup_incoming_varargs (cumulative_args_t cum, machine_mode mode,
4096 			    tree type, int *pretend_size, int no_rtl)
4097 {
4098   if (!no_rtl)
4099     {
4100       rtx tmp;
4101       int regno;
4102       int offset;
4103       int ncum = *get_cumulative_args (cum);
4104 
4105       /* cum currently points to the last named argument, we want to
4106          start at the next argument. */
4107       spu_function_arg_advance (pack_cumulative_args (&ncum), mode, type, true);
4108 
4109       offset = -STACK_POINTER_OFFSET;
4110       for (regno = ncum; regno < MAX_REGISTER_ARGS; regno++)
4111 	{
4112 	  tmp = gen_frame_mem (V4SImode,
4113 			       plus_constant (Pmode, virtual_incoming_args_rtx,
4114 					      offset));
4115 	  emit_move_insn (tmp,
4116 			  gen_rtx_REG (V4SImode, FIRST_ARG_REGNUM + regno));
4117 	  offset += 16;
4118 	}
4119       *pretend_size = offset + STACK_POINTER_OFFSET;
4120     }
4121 }
4122 
4123 static void
spu_conditional_register_usage(void)4124 spu_conditional_register_usage (void)
4125 {
4126   if (flag_pic)
4127     {
4128       fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
4129       call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
4130     }
4131 }
4132 
4133 /* This is called any time we inspect the alignment of a register for
4134    addresses.  */
4135 static int
reg_aligned_for_addr(rtx x)4136 reg_aligned_for_addr (rtx x)
4137 {
4138   int regno =
4139     REGNO (x) < FIRST_PSEUDO_REGISTER ? ORIGINAL_REGNO (x) : REGNO (x);
4140   return REGNO_POINTER_ALIGN (regno) >= 128;
4141 }
4142 
4143 /* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF
4144    into its SYMBOL_REF_FLAGS.  */
4145 static void
spu_encode_section_info(tree decl,rtx rtl,int first)4146 spu_encode_section_info (tree decl, rtx rtl, int first)
4147 {
4148   default_encode_section_info (decl, rtl, first);
4149 
4150   /* If a variable has a forced alignment to < 16 bytes, mark it with
4151      SYMBOL_FLAG_ALIGN1.  */
4152   if (TREE_CODE (decl) == VAR_DECL
4153       && DECL_USER_ALIGN (decl) && DECL_ALIGN (decl) < 128)
4154     SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_ALIGN1;
4155 }
4156 
4157 /* Return TRUE if we are certain the mem refers to a complete object
4158    which is both 16-byte aligned and padded to a 16-byte boundary.  This
4159    would make it safe to store with a single instruction.
4160    We guarantee the alignment and padding for static objects by aligning
4161    all of them to 16-bytes. (DATA_ALIGNMENT and TARGET_CONSTANT_ALIGNMENT.)
4162    FIXME: We currently cannot guarantee this for objects on the stack
4163    because assign_parm_setup_stack calls assign_stack_local with the
4164    alignment of the parameter mode and in that case the alignment never
4165    gets adjusted by LOCAL_ALIGNMENT. */
4166 static int
store_with_one_insn_p(rtx mem)4167 store_with_one_insn_p (rtx mem)
4168 {
4169   machine_mode mode = GET_MODE (mem);
4170   rtx addr = XEXP (mem, 0);
4171   if (mode == BLKmode)
4172     return 0;
4173   if (GET_MODE_SIZE (mode) >= 16)
4174     return 1;
4175   /* Only static objects. */
4176   if (GET_CODE (addr) == SYMBOL_REF)
4177     {
4178       /* We use the associated declaration to make sure the access is
4179          referring to the whole object.
4180          We check both MEM_EXPR and SYMBOL_REF_DECL.  I'm not sure
4181          if it is necessary.  Will there be cases where one exists, and
4182          the other does not?  Will there be cases where both exist, but
4183          have different types?  */
4184       tree decl = MEM_EXPR (mem);
4185       if (decl
4186 	  && TREE_CODE (decl) == VAR_DECL
4187 	  && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
4188 	return 1;
4189       decl = SYMBOL_REF_DECL (addr);
4190       if (decl
4191 	  && TREE_CODE (decl) == VAR_DECL
4192 	  && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
4193 	return 1;
4194     }
4195   return 0;
4196 }
4197 
4198 /* Return 1 when the address is not valid for a simple load and store as
4199    required by the '_mov*' patterns.   We could make this less strict
4200    for loads, but we prefer mem's to look the same so they are more
4201    likely to be merged.  */
4202 static int
address_needs_split(rtx mem)4203 address_needs_split (rtx mem)
4204 {
4205   if (GET_MODE_SIZE (GET_MODE (mem)) < 16
4206       && (GET_MODE_SIZE (GET_MODE (mem)) < 4
4207 	  || !(store_with_one_insn_p (mem)
4208 	       || mem_is_padded_component_ref (mem))))
4209     return 1;
4210 
4211   return 0;
4212 }
4213 
4214 static GTY(()) rtx cache_fetch;		  /* __cache_fetch function */
4215 static GTY(()) rtx cache_fetch_dirty;	  /* __cache_fetch_dirty function */
4216 static alias_set_type ea_alias_set = -1;  /* alias set for __ea memory */
4217 
4218 /* MEM is known to be an __ea qualified memory access.  Emit a call to
4219    fetch the ppu memory to local store, and return its address in local
4220    store.  */
4221 
4222 static void
ea_load_store(rtx mem,bool is_store,rtx ea_addr,rtx data_addr)4223 ea_load_store (rtx mem, bool is_store, rtx ea_addr, rtx data_addr)
4224 {
4225   if (is_store)
4226     {
4227       rtx ndirty = GEN_INT (GET_MODE_SIZE (GET_MODE (mem)));
4228       if (!cache_fetch_dirty)
4229 	cache_fetch_dirty = init_one_libfunc ("__cache_fetch_dirty");
4230       emit_library_call_value (cache_fetch_dirty, data_addr, LCT_NORMAL, Pmode,
4231 			       ea_addr, EAmode, ndirty, SImode);
4232     }
4233   else
4234     {
4235       if (!cache_fetch)
4236 	cache_fetch = init_one_libfunc ("__cache_fetch");
4237       emit_library_call_value (cache_fetch, data_addr, LCT_NORMAL, Pmode,
4238 			       ea_addr, EAmode);
4239     }
4240 }
4241 
4242 /* Like ea_load_store, but do the cache tag comparison and, for stores,
4243    dirty bit marking, inline.
4244 
4245    The cache control data structure is an array of
4246 
4247    struct __cache_tag_array
4248      {
4249         unsigned int tag_lo[4];
4250         unsigned int tag_hi[4];
4251         void *data_pointer[4];
4252         int reserved[4];
4253         vector unsigned short dirty_bits[4];
4254      }  */
4255 
4256 static void
ea_load_store_inline(rtx mem,bool is_store,rtx ea_addr,rtx data_addr)4257 ea_load_store_inline (rtx mem, bool is_store, rtx ea_addr, rtx data_addr)
4258 {
4259   rtx ea_addr_si;
4260   HOST_WIDE_INT v;
4261   rtx tag_size_sym = gen_rtx_SYMBOL_REF (Pmode, "__cache_tag_array_size");
4262   rtx tag_arr_sym = gen_rtx_SYMBOL_REF (Pmode, "__cache_tag_array");
4263   rtx index_mask = gen_reg_rtx (SImode);
4264   rtx tag_arr = gen_reg_rtx (Pmode);
4265   rtx splat_mask = gen_reg_rtx (TImode);
4266   rtx splat = gen_reg_rtx (V4SImode);
4267   rtx splat_hi = NULL_RTX;
4268   rtx tag_index = gen_reg_rtx (Pmode);
4269   rtx block_off = gen_reg_rtx (SImode);
4270   rtx tag_addr = gen_reg_rtx (Pmode);
4271   rtx tag = gen_reg_rtx (V4SImode);
4272   rtx cache_tag = gen_reg_rtx (V4SImode);
4273   rtx cache_tag_hi = NULL_RTX;
4274   rtx cache_ptrs = gen_reg_rtx (TImode);
4275   rtx cache_ptrs_si = gen_reg_rtx (SImode);
4276   rtx tag_equal = gen_reg_rtx (V4SImode);
4277   rtx tag_equal_hi = NULL_RTX;
4278   rtx tag_eq_pack = gen_reg_rtx (V4SImode);
4279   rtx tag_eq_pack_si = gen_reg_rtx (SImode);
4280   rtx eq_index = gen_reg_rtx (SImode);
4281   rtx bcomp, hit_label, hit_ref, cont_label;
4282   rtx_insn *insn;
4283 
4284   if (spu_ea_model != 32)
4285     {
4286       splat_hi = gen_reg_rtx (V4SImode);
4287       cache_tag_hi = gen_reg_rtx (V4SImode);
4288       tag_equal_hi = gen_reg_rtx (V4SImode);
4289     }
4290 
4291   emit_move_insn (index_mask, plus_constant (Pmode, tag_size_sym, -128));
4292   emit_move_insn (tag_arr, tag_arr_sym);
4293   v = 0x0001020300010203LL;
4294   emit_move_insn (splat_mask, immed_double_const (v, v, TImode));
4295   ea_addr_si = ea_addr;
4296   if (spu_ea_model != 32)
4297     ea_addr_si = convert_to_mode (SImode, ea_addr, 1);
4298 
4299   /* tag_index = ea_addr & (tag_array_size - 128)  */
4300   emit_insn (gen_andsi3 (tag_index, ea_addr_si, index_mask));
4301 
4302   /* splat ea_addr to all 4 slots.  */
4303   emit_insn (gen_shufb (splat, ea_addr_si, ea_addr_si, splat_mask));
4304   /* Similarly for high 32 bits of ea_addr.  */
4305   if (spu_ea_model != 32)
4306     emit_insn (gen_shufb (splat_hi, ea_addr, ea_addr, splat_mask));
4307 
4308   /* block_off = ea_addr & 127  */
4309   emit_insn (gen_andsi3 (block_off, ea_addr_si, spu_const (SImode, 127)));
4310 
4311   /* tag_addr = tag_arr + tag_index  */
4312   emit_insn (gen_addsi3 (tag_addr, tag_arr, tag_index));
4313 
4314   /* Read cache tags.  */
4315   emit_move_insn (cache_tag, gen_rtx_MEM (V4SImode, tag_addr));
4316   if (spu_ea_model != 32)
4317     emit_move_insn (cache_tag_hi, gen_rtx_MEM (V4SImode,
4318 					       plus_constant (Pmode,
4319 							      tag_addr, 16)));
4320 
4321   /* tag = ea_addr & -128  */
4322   emit_insn (gen_andv4si3 (tag, splat, spu_const (V4SImode, -128)));
4323 
4324   /* Read all four cache data pointers.  */
4325   emit_move_insn (cache_ptrs, gen_rtx_MEM (TImode,
4326 					   plus_constant (Pmode,
4327 							  tag_addr, 32)));
4328 
4329   /* Compare tags.  */
4330   emit_insn (gen_ceq_v4si (tag_equal, tag, cache_tag));
4331   if (spu_ea_model != 32)
4332     {
4333       emit_insn (gen_ceq_v4si (tag_equal_hi, splat_hi, cache_tag_hi));
4334       emit_insn (gen_andv4si3 (tag_equal, tag_equal, tag_equal_hi));
4335     }
4336 
4337   /* At most one of the tags compare equal, so tag_equal has one
4338      32-bit slot set to all 1's, with the other slots all zero.
4339      gbb picks off low bit from each byte in the 128-bit registers,
4340      so tag_eq_pack is one of 0xf000, 0x0f00, 0x00f0, 0x000f, assuming
4341      we have a hit.  */
4342   emit_insn (gen_spu_gbb (tag_eq_pack, spu_gen_subreg (V16QImode, tag_equal)));
4343   emit_insn (gen_spu_convert (tag_eq_pack_si, tag_eq_pack));
4344 
4345   /* So counting leading zeros will set eq_index to 16, 20, 24 or 28.  */
4346   emit_insn (gen_clzsi2 (eq_index, tag_eq_pack_si));
4347 
4348   /* Allowing us to rotate the corresponding cache data pointer to slot0.
4349      (rotating eq_index mod 16 bytes).  */
4350   emit_insn (gen_rotqby_ti (cache_ptrs, cache_ptrs, eq_index));
4351   emit_insn (gen_spu_convert (cache_ptrs_si, cache_ptrs));
4352 
4353   /* Add block offset to form final data address.  */
4354   emit_insn (gen_addsi3 (data_addr, cache_ptrs_si, block_off));
4355 
4356   /* Check that we did hit.  */
4357   hit_label = gen_label_rtx ();
4358   hit_ref = gen_rtx_LABEL_REF (VOIDmode, hit_label);
4359   bcomp = gen_rtx_NE (SImode, tag_eq_pack_si, const0_rtx);
4360   insn = emit_jump_insn (gen_rtx_SET (pc_rtx,
4361 				      gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
4362 							    hit_ref, pc_rtx)));
4363   /* Say that this branch is very likely to happen.  */
4364   add_reg_br_prob_note (insn, profile_probability::very_likely ());
4365 
4366   ea_load_store (mem, is_store, ea_addr, data_addr);
4367   cont_label = gen_label_rtx ();
4368   emit_jump_insn (gen_jump (cont_label));
4369   emit_barrier ();
4370 
4371   emit_label (hit_label);
4372 
4373   if (is_store)
4374     {
4375       HOST_WIDE_INT v_hi;
4376       rtx dirty_bits = gen_reg_rtx (TImode);
4377       rtx dirty_off = gen_reg_rtx (SImode);
4378       rtx dirty_128 = gen_reg_rtx (TImode);
4379       rtx neg_block_off = gen_reg_rtx (SImode);
4380 
4381       /* Set up mask with one dirty bit per byte of the mem we are
4382 	 writing, starting from top bit.  */
4383       v_hi = v = -1;
4384       v <<= (128 - GET_MODE_SIZE (GET_MODE (mem))) & 63;
4385       if ((128 - GET_MODE_SIZE (GET_MODE (mem))) >= 64)
4386 	{
4387 	  v_hi = v;
4388 	  v = 0;
4389 	}
4390       emit_move_insn (dirty_bits, immed_double_const (v, v_hi, TImode));
4391 
4392       /* Form index into cache dirty_bits.  eq_index is one of
4393 	 0x10, 0x14, 0x18 or 0x1c.  Multiplying by 4 gives us
4394 	 0x40, 0x50, 0x60 or 0x70 which just happens to be the
4395 	 offset to each of the four dirty_bits elements.  */
4396       emit_insn (gen_ashlsi3 (dirty_off, eq_index, spu_const (SImode, 2)));
4397 
4398       emit_insn (gen_spu_lqx (dirty_128, tag_addr, dirty_off));
4399 
4400       /* Rotate bit mask to proper bit.  */
4401       emit_insn (gen_negsi2 (neg_block_off, block_off));
4402       emit_insn (gen_rotqbybi_ti (dirty_bits, dirty_bits, neg_block_off));
4403       emit_insn (gen_rotqbi_ti (dirty_bits, dirty_bits, neg_block_off));
4404 
4405       /* Or in the new dirty bits.  */
4406       emit_insn (gen_iorti3 (dirty_128, dirty_bits, dirty_128));
4407 
4408       /* Store.  */
4409       emit_insn (gen_spu_stqx (dirty_128, tag_addr, dirty_off));
4410     }
4411 
4412   emit_label (cont_label);
4413 }
4414 
4415 static rtx
expand_ea_mem(rtx mem,bool is_store)4416 expand_ea_mem (rtx mem, bool is_store)
4417 {
4418   rtx ea_addr;
4419   rtx data_addr = gen_reg_rtx (Pmode);
4420   rtx new_mem;
4421 
4422   ea_addr = force_reg (EAmode, XEXP (mem, 0));
4423   if (optimize_size || optimize == 0)
4424     ea_load_store (mem, is_store, ea_addr, data_addr);
4425   else
4426     ea_load_store_inline (mem, is_store, ea_addr, data_addr);
4427 
4428   if (ea_alias_set == -1)
4429     ea_alias_set = new_alias_set ();
4430 
4431   /* We generate a new MEM RTX to refer to the copy of the data
4432      in the cache.  We do not copy memory attributes (except the
4433      alignment) from the original MEM, as they may no longer apply
4434      to the cache copy.  */
4435   new_mem = gen_rtx_MEM (GET_MODE (mem), data_addr);
4436   set_mem_alias_set (new_mem, ea_alias_set);
4437   set_mem_align (new_mem, MIN (MEM_ALIGN (mem), 128 * 8));
4438 
4439   return new_mem;
4440 }
4441 
4442 int
spu_expand_mov(rtx * ops,machine_mode mode)4443 spu_expand_mov (rtx * ops, machine_mode mode)
4444 {
4445   if (GET_CODE (ops[0]) == SUBREG && !valid_subreg (ops[0]))
4446     {
4447       /* Perform the move in the destination SUBREG's inner mode.  */
4448       ops[0] = SUBREG_REG (ops[0]);
4449       mode = GET_MODE (ops[0]);
4450       ops[1] = gen_lowpart_common (mode, ops[1]);
4451       gcc_assert (ops[1]);
4452     }
4453 
4454   if (GET_CODE (ops[1]) == SUBREG && !valid_subreg (ops[1]))
4455     {
4456       rtx from = SUBREG_REG (ops[1]);
4457       scalar_int_mode imode = int_mode_for_mode (GET_MODE (from)).require ();
4458 
4459       gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
4460 		  && GET_MODE_CLASS (imode) == MODE_INT
4461 		  && subreg_lowpart_p (ops[1]));
4462 
4463       if (GET_MODE_SIZE (imode) < 4)
4464 	imode = SImode;
4465       if (imode != GET_MODE (from))
4466 	from = gen_rtx_SUBREG (imode, from, 0);
4467 
4468       if (GET_MODE_SIZE (mode) < GET_MODE_SIZE (imode))
4469 	{
4470 	  enum insn_code icode = convert_optab_handler (trunc_optab,
4471 							mode, imode);
4472 	  emit_insn (GEN_FCN (icode) (ops[0], from));
4473 	}
4474       else
4475 	emit_insn (gen_extend_insn (ops[0], from, mode, imode, 1));
4476       return 1;
4477     }
4478 
4479   /* At least one of the operands needs to be a register. */
4480   if ((reload_in_progress | reload_completed) == 0
4481       && !register_operand (ops[0], mode) && !register_operand (ops[1], mode))
4482     {
4483       rtx temp = force_reg (mode, ops[1]);
4484       emit_move_insn (ops[0], temp);
4485       return 1;
4486     }
4487   if (reload_in_progress || reload_completed)
4488     {
4489       if (CONSTANT_P (ops[1]))
4490 	return spu_split_immediate (ops);
4491       return 0;
4492     }
4493 
4494   /* Catch the SImode immediates greater than 0x7fffffff, and sign
4495      extend them. */
4496   if (GET_CODE (ops[1]) == CONST_INT)
4497     {
4498       HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (ops[1]), mode);
4499       if (val != INTVAL (ops[1]))
4500 	{
4501 	  emit_move_insn (ops[0], GEN_INT (val));
4502 	  return 1;
4503 	}
4504     }
4505   if (MEM_P (ops[0]))
4506     {
4507       if (MEM_ADDR_SPACE (ops[0]))
4508 	ops[0] = expand_ea_mem (ops[0], true);
4509       return spu_split_store (ops);
4510     }
4511   if (MEM_P (ops[1]))
4512     {
4513       if (MEM_ADDR_SPACE (ops[1]))
4514 	ops[1] = expand_ea_mem (ops[1], false);
4515       return spu_split_load (ops);
4516     }
4517 
4518   return 0;
4519 }
4520 
4521 static void
spu_convert_move(rtx dst,rtx src)4522 spu_convert_move (rtx dst, rtx src)
4523 {
4524   machine_mode mode = GET_MODE (dst);
4525   machine_mode int_mode = int_mode_for_mode (mode).require ();
4526   rtx reg;
4527   gcc_assert (GET_MODE (src) == TImode);
4528   reg = int_mode != mode ? gen_reg_rtx (int_mode) : dst;
4529   emit_insn (gen_rtx_SET (reg,
4530 	       gen_rtx_TRUNCATE (int_mode,
4531 		 gen_rtx_LSHIFTRT (TImode, src,
4532 		   GEN_INT (int_mode == DImode ? 64 : 96)))));
4533   if (int_mode != mode)
4534     {
4535       reg = simplify_gen_subreg (mode, reg, int_mode, 0);
4536       emit_move_insn (dst, reg);
4537     }
4538 }
4539 
4540 /* Load TImode values into DST0 and DST1 (when it is non-NULL) using
4541    the address from SRC and SRC+16.  Return a REG or CONST_INT that
4542    specifies how many bytes to rotate the loaded registers, plus any
4543    extra from EXTRA_ROTQBY.  The address and rotate amounts are
4544    normalized to improve merging of loads and rotate computations. */
4545 static rtx
spu_expand_load(rtx dst0,rtx dst1,rtx src,int extra_rotby)4546 spu_expand_load (rtx dst0, rtx dst1, rtx src, int extra_rotby)
4547 {
4548   rtx addr = XEXP (src, 0);
4549   rtx p0, p1, rot, addr0, addr1;
4550   int rot_amt;
4551 
4552   rot = 0;
4553   rot_amt = 0;
4554 
4555   if (MEM_ALIGN (src) >= 128)
4556     /* Address is already aligned; simply perform a TImode load.  */ ;
4557   else if (GET_CODE (addr) == PLUS)
4558     {
4559       /* 8 cases:
4560          aligned reg   + aligned reg     => lqx
4561          aligned reg   + unaligned reg   => lqx, rotqby
4562          aligned reg   + aligned const   => lqd
4563          aligned reg   + unaligned const => lqd, rotqbyi
4564          unaligned reg + aligned reg     => lqx, rotqby
4565          unaligned reg + unaligned reg   => lqx, a, rotqby (1 scratch)
4566          unaligned reg + aligned const   => lqd, rotqby
4567          unaligned reg + unaligned const -> not allowed by legitimate address
4568        */
4569       p0 = XEXP (addr, 0);
4570       p1 = XEXP (addr, 1);
4571       if (!reg_aligned_for_addr (p0))
4572 	{
4573 	  if (REG_P (p1) && !reg_aligned_for_addr (p1))
4574 	    {
4575 	      rot = gen_reg_rtx (SImode);
4576 	      emit_insn (gen_addsi3 (rot, p0, p1));
4577 	    }
4578 	  else if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15))
4579 	    {
4580 	      if (INTVAL (p1) > 0
4581 		  && REG_POINTER (p0)
4582 		  && INTVAL (p1) * BITS_PER_UNIT
4583 		     < REGNO_POINTER_ALIGN (REGNO (p0)))
4584 		{
4585 		  rot = gen_reg_rtx (SImode);
4586 		  emit_insn (gen_addsi3 (rot, p0, p1));
4587 		  addr = p0;
4588 		}
4589 	      else
4590 		{
4591 		  rtx x = gen_reg_rtx (SImode);
4592 		  emit_move_insn (x, p1);
4593 		  if (!spu_arith_operand (p1, SImode))
4594 		    p1 = x;
4595 		  rot = gen_reg_rtx (SImode);
4596 		  emit_insn (gen_addsi3 (rot, p0, p1));
4597 		  addr = gen_rtx_PLUS (Pmode, p0, x);
4598 		}
4599 	    }
4600 	  else
4601 	    rot = p0;
4602 	}
4603       else
4604 	{
4605 	  if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15))
4606 	    {
4607 	      rot_amt = INTVAL (p1) & 15;
4608 	      if (INTVAL (p1) & -16)
4609 		{
4610 		  p1 = GEN_INT (INTVAL (p1) & -16);
4611 		  addr = gen_rtx_PLUS (SImode, p0, p1);
4612 		}
4613 	      else
4614 		addr = p0;
4615 	    }
4616 	  else if (REG_P (p1) && !reg_aligned_for_addr (p1))
4617 	    rot = p1;
4618 	}
4619     }
4620   else if (REG_P (addr))
4621     {
4622       if (!reg_aligned_for_addr (addr))
4623 	rot = addr;
4624     }
4625   else if (GET_CODE (addr) == CONST)
4626     {
4627       if (GET_CODE (XEXP (addr, 0)) == PLUS
4628 	  && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
4629 	  && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
4630 	{
4631 	  rot_amt = INTVAL (XEXP (XEXP (addr, 0), 1));
4632 	  if (rot_amt & -16)
4633 	    addr = gen_rtx_CONST (Pmode,
4634 				  gen_rtx_PLUS (Pmode,
4635 						XEXP (XEXP (addr, 0), 0),
4636 						GEN_INT (rot_amt & -16)));
4637 	  else
4638 	    addr = XEXP (XEXP (addr, 0), 0);
4639 	}
4640       else
4641 	{
4642 	  rot = gen_reg_rtx (Pmode);
4643 	  emit_move_insn (rot, addr);
4644 	}
4645     }
4646   else if (GET_CODE (addr) == CONST_INT)
4647     {
4648       rot_amt = INTVAL (addr);
4649       addr = GEN_INT (rot_amt & -16);
4650     }
4651   else if (!ALIGNED_SYMBOL_REF_P (addr))
4652     {
4653       rot = gen_reg_rtx (Pmode);
4654       emit_move_insn (rot, addr);
4655     }
4656 
4657   rot_amt += extra_rotby;
4658 
4659   rot_amt &= 15;
4660 
4661   if (rot && rot_amt)
4662     {
4663       rtx x = gen_reg_rtx (SImode);
4664       emit_insn (gen_addsi3 (x, rot, GEN_INT (rot_amt)));
4665       rot = x;
4666       rot_amt = 0;
4667     }
4668   if (!rot && rot_amt)
4669     rot = GEN_INT (rot_amt);
4670 
4671   addr0 = copy_rtx (addr);
4672   addr0 = gen_rtx_AND (SImode, copy_rtx (addr), GEN_INT (-16));
4673   emit_insn (gen__movti (dst0, change_address (src, TImode, addr0)));
4674 
4675   if (dst1)
4676     {
4677       addr1 = plus_constant (SImode, copy_rtx (addr), 16);
4678       addr1 = gen_rtx_AND (SImode, addr1, GEN_INT (-16));
4679       emit_insn (gen__movti (dst1, change_address (src, TImode, addr1)));
4680     }
4681 
4682   return rot;
4683 }
4684 
4685 int
spu_split_load(rtx * ops)4686 spu_split_load (rtx * ops)
4687 {
4688   machine_mode mode = GET_MODE (ops[0]);
4689   rtx addr, load, rot;
4690   int rot_amt;
4691 
4692   if (GET_MODE_SIZE (mode) >= 16)
4693     return 0;
4694 
4695   addr = XEXP (ops[1], 0);
4696   gcc_assert (GET_CODE (addr) != AND);
4697 
4698   if (!address_needs_split (ops[1]))
4699     {
4700       ops[1] = change_address (ops[1], TImode, addr);
4701       load = gen_reg_rtx (TImode);
4702       emit_insn (gen__movti (load, ops[1]));
4703       spu_convert_move (ops[0], load);
4704       return 1;
4705     }
4706 
4707   rot_amt = GET_MODE_SIZE (mode) < 4 ? GET_MODE_SIZE (mode) - 4 : 0;
4708 
4709   load = gen_reg_rtx (TImode);
4710   rot = spu_expand_load (load, 0, ops[1], rot_amt);
4711 
4712   if (rot)
4713     emit_insn (gen_rotqby_ti (load, load, rot));
4714 
4715   spu_convert_move (ops[0], load);
4716   return 1;
4717 }
4718 
4719 int
spu_split_store(rtx * ops)4720 spu_split_store (rtx * ops)
4721 {
4722   machine_mode mode = GET_MODE (ops[0]);
4723   rtx reg;
4724   rtx addr, p0, p1, p1_lo, smem;
4725   int aform;
4726   int scalar;
4727 
4728   if (GET_MODE_SIZE (mode) >= 16)
4729     return 0;
4730 
4731   addr = XEXP (ops[0], 0);
4732   gcc_assert (GET_CODE (addr) != AND);
4733 
4734   if (!address_needs_split (ops[0]))
4735     {
4736       reg = gen_reg_rtx (TImode);
4737       emit_insn (gen_spu_convert (reg, ops[1]));
4738       ops[0] = change_address (ops[0], TImode, addr);
4739       emit_move_insn (ops[0], reg);
4740       return 1;
4741     }
4742 
4743   if (GET_CODE (addr) == PLUS)
4744     {
4745       /* 8 cases:
4746          aligned reg   + aligned reg     => lqx, c?x, shuf, stqx
4747          aligned reg   + unaligned reg   => lqx, c?x, shuf, stqx
4748          aligned reg   + aligned const   => lqd, c?d, shuf, stqx
4749          aligned reg   + unaligned const => lqd, c?d, shuf, stqx
4750          unaligned reg + aligned reg     => lqx, c?x, shuf, stqx
4751          unaligned reg + unaligned reg   => lqx, c?x, shuf, stqx
4752          unaligned reg + aligned const   => lqd, c?d, shuf, stqx
4753          unaligned reg + unaligned const -> lqx, c?d, shuf, stqx
4754        */
4755       aform = 0;
4756       p0 = XEXP (addr, 0);
4757       p1 = p1_lo = XEXP (addr, 1);
4758       if (REG_P (p0) && GET_CODE (p1) == CONST_INT)
4759 	{
4760 	  p1_lo = GEN_INT (INTVAL (p1) & 15);
4761 	  if (reg_aligned_for_addr (p0))
4762 	    {
4763 	      p1 = GEN_INT (INTVAL (p1) & -16);
4764 	      if (p1 == const0_rtx)
4765 		addr = p0;
4766 	      else
4767 		addr = gen_rtx_PLUS (SImode, p0, p1);
4768 	    }
4769 	  else
4770 	    {
4771 	      rtx x = gen_reg_rtx (SImode);
4772 	      emit_move_insn (x, p1);
4773 	      addr = gen_rtx_PLUS (SImode, p0, x);
4774 	    }
4775 	}
4776     }
4777   else if (REG_P (addr))
4778     {
4779       aform = 0;
4780       p0 = addr;
4781       p1 = p1_lo = const0_rtx;
4782     }
4783   else
4784     {
4785       aform = 1;
4786       p0 = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
4787       p1 = 0;			/* aform doesn't use p1 */
4788       p1_lo = addr;
4789       if (ALIGNED_SYMBOL_REF_P (addr))
4790 	p1_lo = const0_rtx;
4791       else if (GET_CODE (addr) == CONST
4792 	       && GET_CODE (XEXP (addr, 0)) == PLUS
4793 	       && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
4794 	       && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
4795 	{
4796 	  HOST_WIDE_INT v = INTVAL (XEXP (XEXP (addr, 0), 1));
4797 	  if ((v & -16) != 0)
4798 	    addr = gen_rtx_CONST (Pmode,
4799 				  gen_rtx_PLUS (Pmode,
4800 						XEXP (XEXP (addr, 0), 0),
4801 						GEN_INT (v & -16)));
4802 	  else
4803 	    addr = XEXP (XEXP (addr, 0), 0);
4804 	  p1_lo = GEN_INT (v & 15);
4805 	}
4806       else if (GET_CODE (addr) == CONST_INT)
4807 	{
4808 	  p1_lo = GEN_INT (INTVAL (addr) & 15);
4809 	  addr = GEN_INT (INTVAL (addr) & -16);
4810 	}
4811       else
4812 	{
4813 	  p1_lo = gen_reg_rtx (SImode);
4814 	  emit_move_insn (p1_lo, addr);
4815 	}
4816     }
4817 
4818   gcc_assert (aform == 0 || aform == 1);
4819   reg = gen_reg_rtx (TImode);
4820 
4821   scalar = store_with_one_insn_p (ops[0]);
4822   if (!scalar)
4823     {
4824       /* We could copy the flags from the ops[0] MEM to mem here,
4825          We don't because we want this load to be optimized away if
4826          possible, and copying the flags will prevent that in certain
4827          cases, e.g. consider the volatile flag. */
4828 
4829       rtx pat = gen_reg_rtx (TImode);
4830       rtx lmem = change_address (ops[0], TImode, copy_rtx (addr));
4831       set_mem_alias_set (lmem, 0);
4832       emit_insn (gen_movti (reg, lmem));
4833 
4834       if (!p0 || reg_aligned_for_addr (p0))
4835 	p0 = stack_pointer_rtx;
4836       if (!p1_lo)
4837 	p1_lo = const0_rtx;
4838 
4839       emit_insn (gen_cpat (pat, p0, p1_lo, GEN_INT (GET_MODE_SIZE (mode))));
4840       emit_insn (gen_shufb (reg, ops[1], reg, pat));
4841     }
4842   else
4843     {
4844       if (GET_CODE (ops[1]) == REG)
4845 	emit_insn (gen_spu_convert (reg, ops[1]));
4846       else if (GET_CODE (ops[1]) == SUBREG)
4847 	emit_insn (gen_spu_convert (reg, SUBREG_REG (ops[1])));
4848       else
4849 	abort ();
4850     }
4851 
4852   if (GET_MODE_SIZE (mode) < 4 && scalar)
4853     emit_insn (gen_ashlti3
4854 	       (reg, reg, GEN_INT (32 - GET_MODE_BITSIZE (mode))));
4855 
4856   smem = change_address (ops[0], TImode, copy_rtx (addr));
4857   /* We can't use the previous alias set because the memory has changed
4858      size and can potentially overlap objects of other types.  */
4859   set_mem_alias_set (smem, 0);
4860 
4861   emit_insn (gen_movti (smem, reg));
4862   return 1;
4863 }
4864 
4865 /* Return TRUE if X is MEM which is a struct member reference
4866    and the member can safely be loaded and stored with a single
4867    instruction because it is padded. */
4868 static int
mem_is_padded_component_ref(rtx x)4869 mem_is_padded_component_ref (rtx x)
4870 {
4871   tree t = MEM_EXPR (x);
4872   tree r;
4873   if (!t || TREE_CODE (t) != COMPONENT_REF)
4874     return 0;
4875   t = TREE_OPERAND (t, 1);
4876   if (!t || TREE_CODE (t) != FIELD_DECL
4877       || DECL_ALIGN (t) < 128 || AGGREGATE_TYPE_P (TREE_TYPE (t)))
4878     return 0;
4879   /* Only do this for RECORD_TYPEs, not UNION_TYPEs. */
4880   r = DECL_FIELD_CONTEXT (t);
4881   if (!r || TREE_CODE (r) != RECORD_TYPE)
4882     return 0;
4883   /* Make sure they are the same mode */
4884   if (GET_MODE (x) != TYPE_MODE (TREE_TYPE (t)))
4885     return 0;
4886   /* If there are no following fields then the field alignment assures
4887      the structure is padded to the alignment which means this field is
4888      padded too.  */
4889   if (TREE_CHAIN (t) == 0)
4890     return 1;
4891   /* If the following field is also aligned then this field will be
4892      padded. */
4893   t = TREE_CHAIN (t);
4894   if (TREE_CODE (t) == FIELD_DECL && DECL_ALIGN (t) >= 128)
4895     return 1;
4896   return 0;
4897 }
4898 
4899 /* Parse the -mfixed-range= option string.  */
4900 static void
fix_range(const char * const_str)4901 fix_range (const char *const_str)
4902 {
4903   int i, first, last;
4904   char *str, *dash, *comma;
4905 
4906   /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
4907      REG2 are either register names or register numbers.  The effect
4908      of this option is to mark the registers in the range from REG1 to
4909      REG2 as ``fixed'' so they won't be used by the compiler.  */
4910 
4911   i = strlen (const_str);
4912   str = (char *) alloca (i + 1);
4913   memcpy (str, const_str, i + 1);
4914 
4915   while (1)
4916     {
4917       dash = strchr (str, '-');
4918       if (!dash)
4919 	{
4920 	  warning (0, "value of %<-mfixed-range%> must have form REG1-REG2");
4921 	  return;
4922 	}
4923       *dash = '\0';
4924       comma = strchr (dash + 1, ',');
4925       if (comma)
4926 	*comma = '\0';
4927 
4928       first = decode_reg_name (str);
4929       if (first < 0)
4930 	{
4931 	  warning (0, "unknown register name: %s", str);
4932 	  return;
4933 	}
4934 
4935       last = decode_reg_name (dash + 1);
4936       if (last < 0)
4937 	{
4938 	  warning (0, "unknown register name: %s", dash + 1);
4939 	  return;
4940 	}
4941 
4942       *dash = '-';
4943 
4944       if (first > last)
4945 	{
4946 	  warning (0, "%s-%s is an empty range", str, dash + 1);
4947 	  return;
4948 	}
4949 
4950       for (i = first; i <= last; ++i)
4951 	fixed_regs[i] = call_used_regs[i] = 1;
4952 
4953       if (!comma)
4954 	break;
4955 
4956       *comma = ',';
4957       str = comma + 1;
4958     }
4959 }
4960 
4961 /* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
4962    can be generated using the fsmbi instruction. */
4963 int
fsmbi_const_p(rtx x)4964 fsmbi_const_p (rtx x)
4965 {
4966   if (CONSTANT_P (x))
4967     {
4968       /* We can always choose TImode for CONST_INT because the high bits
4969          of an SImode will always be all 1s, i.e., valid for fsmbi. */
4970       enum immediate_class c = classify_immediate (x, TImode);
4971       return c == IC_FSMBI || (!epilogue_completed && c == IC_FSMBI2);
4972     }
4973   return 0;
4974 }
4975 
4976 /* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
4977    can be generated using the cbd, chd, cwd or cdd instruction. */
4978 int
cpat_const_p(rtx x,machine_mode mode)4979 cpat_const_p (rtx x, machine_mode mode)
4980 {
4981   if (CONSTANT_P (x))
4982     {
4983       enum immediate_class c = classify_immediate (x, mode);
4984       return c == IC_CPAT;
4985     }
4986   return 0;
4987 }
4988 
4989 rtx
gen_cpat_const(rtx * ops)4990 gen_cpat_const (rtx * ops)
4991 {
4992   unsigned char dst[16];
4993   int i, offset, shift, isize;
4994   if (GET_CODE (ops[3]) != CONST_INT
4995       || GET_CODE (ops[2]) != CONST_INT
4996       || (GET_CODE (ops[1]) != CONST_INT
4997 	  && GET_CODE (ops[1]) != REG))
4998     return 0;
4999   if (GET_CODE (ops[1]) == REG
5000       && (!REG_POINTER (ops[1])
5001 	  || REGNO_POINTER_ALIGN (ORIGINAL_REGNO (ops[1])) < 128))
5002     return 0;
5003 
5004   for (i = 0; i < 16; i++)
5005     dst[i] = i + 16;
5006   isize = INTVAL (ops[3]);
5007   if (isize == 1)
5008     shift = 3;
5009   else if (isize == 2)
5010     shift = 2;
5011   else
5012     shift = 0;
5013   offset = (INTVAL (ops[2]) +
5014 	    (GET_CODE (ops[1]) ==
5015 	     CONST_INT ? INTVAL (ops[1]) : 0)) & 15;
5016   for (i = 0; i < isize; i++)
5017     dst[offset + i] = i + shift;
5018   return array_to_constant (TImode, dst);
5019 }
5020 
5021 /* Convert a CONST_INT, CONST_DOUBLE, or CONST_VECTOR into a 16 byte
5022    array.  Use MODE for CONST_INT's.  When the constant's mode is smaller
5023    than 16 bytes, the value is repeated across the rest of the array. */
5024 void
constant_to_array(machine_mode mode,rtx x,unsigned char arr[16])5025 constant_to_array (machine_mode mode, rtx x, unsigned char arr[16])
5026 {
5027   HOST_WIDE_INT val;
5028   int i, j, first;
5029 
5030   memset (arr, 0, 16);
5031   mode = GET_MODE (x) != VOIDmode ? GET_MODE (x) : mode;
5032   if (GET_CODE (x) == CONST_INT
5033       || (GET_CODE (x) == CONST_DOUBLE
5034 	  && (mode == SFmode || mode == DFmode)))
5035     {
5036       gcc_assert (mode != VOIDmode && mode != BLKmode);
5037 
5038       if (GET_CODE (x) == CONST_DOUBLE)
5039 	val = const_double_to_hwint (x);
5040       else
5041 	val = INTVAL (x);
5042       first = GET_MODE_SIZE (mode) - 1;
5043       for (i = first; i >= 0; i--)
5044 	{
5045 	  arr[i] = val & 0xff;
5046 	  val >>= 8;
5047 	}
5048       /* Splat the constant across the whole array. */
5049       for (j = 0, i = first + 1; i < 16; i++)
5050 	{
5051 	  arr[i] = arr[j];
5052 	  j = (j == first) ? 0 : j + 1;
5053 	}
5054     }
5055   else if (GET_CODE (x) == CONST_DOUBLE)
5056     {
5057       val = CONST_DOUBLE_LOW (x);
5058       for (i = 15; i >= 8; i--)
5059 	{
5060 	  arr[i] = val & 0xff;
5061 	  val >>= 8;
5062 	}
5063       val = CONST_DOUBLE_HIGH (x);
5064       for (i = 7; i >= 0; i--)
5065 	{
5066 	  arr[i] = val & 0xff;
5067 	  val >>= 8;
5068 	}
5069     }
5070   else if (GET_CODE (x) == CONST_VECTOR)
5071     {
5072       int units;
5073       rtx elt;
5074       mode = GET_MODE_INNER (mode);
5075       units = CONST_VECTOR_NUNITS (x);
5076       for (i = 0; i < units; i++)
5077 	{
5078 	  elt = CONST_VECTOR_ELT (x, i);
5079 	  if (GET_CODE (elt) == CONST_INT || GET_CODE (elt) == CONST_DOUBLE)
5080 	    {
5081 	      if (GET_CODE (elt) == CONST_DOUBLE)
5082 		val = const_double_to_hwint (elt);
5083 	      else
5084 		val = INTVAL (elt);
5085 	      first = GET_MODE_SIZE (mode) - 1;
5086 	      if (first + i * GET_MODE_SIZE (mode) > 16)
5087 		abort ();
5088 	      for (j = first; j >= 0; j--)
5089 		{
5090 		  arr[j + i * GET_MODE_SIZE (mode)] = val & 0xff;
5091 		  val >>= 8;
5092 		}
5093 	    }
5094 	}
5095     }
5096   else
5097     gcc_unreachable();
5098 }
5099 
5100 /* Convert a 16 byte array to a constant of mode MODE.  When MODE is
5101    smaller than 16 bytes, use the bytes that would represent that value
5102    in a register, e.g., for QImode return the value of arr[3].  */
5103 rtx
array_to_constant(machine_mode mode,const unsigned char arr[16])5104 array_to_constant (machine_mode mode, const unsigned char arr[16])
5105 {
5106   machine_mode inner_mode;
5107   rtvec v;
5108   int units, size, i, j, k;
5109   HOST_WIDE_INT val;
5110 
5111   if (GET_MODE_CLASS (mode) == MODE_INT
5112       && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
5113     {
5114       j = GET_MODE_SIZE (mode);
5115       i = j < 4 ? 4 - j : 0;
5116       for (val = 0; i < j; i++)
5117 	val = (val << 8) | arr[i];
5118       val = trunc_int_for_mode (val, mode);
5119       return GEN_INT (val);
5120     }
5121 
5122   if (mode == TImode)
5123     {
5124       HOST_WIDE_INT high;
5125       for (i = high = 0; i < 8; i++)
5126 	high = (high << 8) | arr[i];
5127       for (i = 8, val = 0; i < 16; i++)
5128 	val = (val << 8) | arr[i];
5129       return immed_double_const (val, high, TImode);
5130     }
5131   if (mode == SFmode)
5132     {
5133       val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
5134       val = trunc_int_for_mode (val, SImode);
5135       return hwint_to_const_double (SFmode, val);
5136     }
5137   if (mode == DFmode)
5138     {
5139       for (i = 0, val = 0; i < 8; i++)
5140 	val = (val << 8) | arr[i];
5141       return hwint_to_const_double (DFmode, val);
5142     }
5143 
5144   if (!VECTOR_MODE_P (mode))
5145     abort ();
5146 
5147   units = GET_MODE_NUNITS (mode);
5148   size = GET_MODE_UNIT_SIZE (mode);
5149   inner_mode = GET_MODE_INNER (mode);
5150   v = rtvec_alloc (units);
5151 
5152   for (k = i = 0; i < units; ++i)
5153     {
5154       val = 0;
5155       for (j = 0; j < size; j++, k++)
5156 	val = (val << 8) | arr[k];
5157 
5158       if (GET_MODE_CLASS (inner_mode) == MODE_FLOAT)
5159 	RTVEC_ELT (v, i) = hwint_to_const_double (inner_mode, val);
5160       else
5161 	RTVEC_ELT (v, i) = GEN_INT (trunc_int_for_mode (val, inner_mode));
5162     }
5163   if (k > 16)
5164     abort ();
5165 
5166   return gen_rtx_CONST_VECTOR (mode, v);
5167 }
5168 
5169 static void
reloc_diagnostic(rtx x)5170 reloc_diagnostic (rtx x)
5171 {
5172   tree decl = 0;
5173   if (!flag_pic || !(TARGET_WARN_RELOC || TARGET_ERROR_RELOC))
5174     return;
5175 
5176   if (GET_CODE (x) == SYMBOL_REF)
5177     decl = SYMBOL_REF_DECL (x);
5178   else if (GET_CODE (x) == CONST
5179 	   && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
5180     decl = SYMBOL_REF_DECL (XEXP (XEXP (x, 0), 0));
5181 
5182   /* SYMBOL_REF_DECL is not necessarily a DECL. */
5183   if (decl && !DECL_P (decl))
5184     decl = 0;
5185 
5186   /* The decl could be a string constant.  */
5187   if (decl && DECL_P (decl))
5188     {
5189       location_t loc;
5190       /* We use last_assemble_variable_decl to get line information.  It's
5191 	 not always going to be right and might not even be close, but will
5192 	 be right for the more common cases. */
5193       if (!last_assemble_variable_decl || in_section == ctors_section)
5194 	loc = DECL_SOURCE_LOCATION (decl);
5195       else
5196 	loc = DECL_SOURCE_LOCATION (last_assemble_variable_decl);
5197 
5198       if (TARGET_WARN_RELOC)
5199 	warning_at (loc, 0,
5200 		    "creating run-time relocation for %qD", decl);
5201       else
5202 	error_at (loc,
5203 		  "creating run-time relocation for %qD", decl);
5204     }
5205   else
5206     {
5207       if (TARGET_WARN_RELOC)
5208 	warning_at (input_location, 0, "creating run-time relocation");
5209       else
5210 	error_at (input_location, "creating run-time relocation");
5211     }
5212 }
5213 
5214 /* Hook into assemble_integer so we can generate an error for run-time
5215    relocations.  The SPU ABI disallows them. */
5216 static bool
spu_assemble_integer(rtx x,unsigned int size,int aligned_p)5217 spu_assemble_integer (rtx x, unsigned int size, int aligned_p)
5218 {
5219   /* By default run-time relocations aren't supported, but we allow them
5220      in case users support it in their own run-time loader.  And we provide
5221      a warning for those users that don't.  */
5222   if ((GET_CODE (x) == SYMBOL_REF)
5223       || GET_CODE (x) == LABEL_REF || GET_CODE (x) == CONST)
5224     reloc_diagnostic (x);
5225 
5226   return default_assemble_integer (x, size, aligned_p);
5227 }
5228 
5229 static void
spu_asm_globalize_label(FILE * file,const char * name)5230 spu_asm_globalize_label (FILE * file, const char *name)
5231 {
5232   fputs ("\t.global\t", file);
5233   assemble_name (file, name);
5234   fputs ("\n", file);
5235 }
5236 
5237 static bool
spu_rtx_costs(rtx x,machine_mode mode,int outer_code ATTRIBUTE_UNUSED,int opno ATTRIBUTE_UNUSED,int * total,bool speed ATTRIBUTE_UNUSED)5238 spu_rtx_costs (rtx x, machine_mode mode, int outer_code ATTRIBUTE_UNUSED,
5239 	       int opno ATTRIBUTE_UNUSED, int *total,
5240 	       bool speed ATTRIBUTE_UNUSED)
5241 {
5242   int code = GET_CODE (x);
5243   int cost = COSTS_N_INSNS (2);
5244 
5245   /* Folding to a CONST_VECTOR will use extra space but there might
5246      be only a small savings in cycles.  We'd like to use a CONST_VECTOR
5247      only if it allows us to fold away multiple insns.  Changing the cost
5248      of a CONST_VECTOR here (or in CONST_COSTS) doesn't help though
5249      because this cost will only be compared against a single insn.
5250      if (code == CONST_VECTOR)
5251        return spu_legitimate_constant_p (mode, x) ? cost : COSTS_N_INSNS (6);
5252    */
5253 
5254   /* Use defaults for float operations.  Not accurate but good enough. */
5255   if (mode == DFmode)
5256     {
5257       *total = COSTS_N_INSNS (13);
5258       return true;
5259     }
5260   if (mode == SFmode)
5261     {
5262       *total = COSTS_N_INSNS (6);
5263       return true;
5264     }
5265   switch (code)
5266     {
5267     case CONST_INT:
5268       if (satisfies_constraint_K (x))
5269 	*total = 0;
5270       else if (INTVAL (x) >= -0x80000000ll && INTVAL (x) <= 0xffffffffll)
5271 	*total = COSTS_N_INSNS (1);
5272       else
5273 	*total = COSTS_N_INSNS (3);
5274       return true;
5275 
5276     case CONST:
5277       *total = COSTS_N_INSNS (3);
5278       return true;
5279 
5280     case LABEL_REF:
5281     case SYMBOL_REF:
5282       *total = COSTS_N_INSNS (0);
5283       return true;
5284 
5285     case CONST_DOUBLE:
5286       *total = COSTS_N_INSNS (5);
5287       return true;
5288 
5289     case FLOAT_EXTEND:
5290     case FLOAT_TRUNCATE:
5291     case FLOAT:
5292     case UNSIGNED_FLOAT:
5293     case FIX:
5294     case UNSIGNED_FIX:
5295       *total = COSTS_N_INSNS (7);
5296       return true;
5297 
5298     case PLUS:
5299       if (mode == TImode)
5300 	{
5301 	  *total = COSTS_N_INSNS (9);
5302 	  return true;
5303 	}
5304       break;
5305 
5306     case MULT:
5307       cost =
5308 	GET_CODE (XEXP (x, 0)) ==
5309 	REG ? COSTS_N_INSNS (12) : COSTS_N_INSNS (7);
5310       if (mode == SImode && GET_CODE (XEXP (x, 0)) == REG)
5311 	{
5312 	  if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5313 	    {
5314 	      HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
5315 	      cost = COSTS_N_INSNS (14);
5316 	      if ((val & 0xffff) == 0)
5317 		cost = COSTS_N_INSNS (9);
5318 	      else if (val > 0 && val < 0x10000)
5319 		cost = COSTS_N_INSNS (11);
5320 	    }
5321 	}
5322       *total = cost;
5323       return true;
5324     case DIV:
5325     case UDIV:
5326     case MOD:
5327     case UMOD:
5328       *total = COSTS_N_INSNS (20);
5329       return true;
5330     case ROTATE:
5331     case ROTATERT:
5332     case ASHIFT:
5333     case ASHIFTRT:
5334     case LSHIFTRT:
5335       *total = COSTS_N_INSNS (4);
5336       return true;
5337     case UNSPEC:
5338       if (XINT (x, 1) == UNSPEC_CONVERT)
5339 	*total = COSTS_N_INSNS (0);
5340       else
5341 	*total = COSTS_N_INSNS (4);
5342       return true;
5343     }
5344   /* Scale cost by mode size.  Except when initializing (cfun->decl == 0). */
5345   if (GET_MODE_CLASS (mode) == MODE_INT
5346       && GET_MODE_SIZE (mode) > GET_MODE_SIZE (SImode) && cfun && cfun->decl)
5347     cost = cost * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode))
5348       * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode));
5349   *total = cost;
5350   return true;
5351 }
5352 
5353 static scalar_int_mode
spu_unwind_word_mode(void)5354 spu_unwind_word_mode (void)
5355 {
5356   return SImode;
5357 }
5358 
5359 /* Decide whether we can make a sibling call to a function.  DECL is the
5360    declaration of the function being targeted by the call and EXP is the
5361    CALL_EXPR representing the call.  */
5362 static bool
spu_function_ok_for_sibcall(tree decl,tree exp ATTRIBUTE_UNUSED)5363 spu_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
5364 {
5365   return decl && !TARGET_LARGE_MEM;
5366 }
5367 
5368 /* We need to correctly update the back chain pointer and the Available
5369    Stack Size (which is in the second slot of the sp register.) */
5370 void
spu_allocate_stack(rtx op0,rtx op1)5371 spu_allocate_stack (rtx op0, rtx op1)
5372 {
5373   HOST_WIDE_INT v;
5374   rtx chain = gen_reg_rtx (V4SImode);
5375   rtx stack_bot = gen_frame_mem (V4SImode, stack_pointer_rtx);
5376   rtx sp = gen_reg_rtx (V4SImode);
5377   rtx splatted = gen_reg_rtx (V4SImode);
5378   rtx pat = gen_reg_rtx (TImode);
5379 
5380   /* copy the back chain so we can save it back again. */
5381   emit_move_insn (chain, stack_bot);
5382 
5383   op1 = force_reg (SImode, op1);
5384 
5385   v = 0x1020300010203ll;
5386   emit_move_insn (pat, immed_double_const (v, v, TImode));
5387   emit_insn (gen_shufb (splatted, op1, op1, pat));
5388 
5389   emit_insn (gen_spu_convert (sp, stack_pointer_rtx));
5390   emit_insn (gen_subv4si3 (sp, sp, splatted));
5391 
5392   if (flag_stack_check || flag_stack_clash_protection)
5393     {
5394       rtx avail = gen_reg_rtx(SImode);
5395       rtx result = gen_reg_rtx(SImode);
5396       emit_insn (gen_vec_extractv4sisi (avail, sp, GEN_INT (1)));
5397       emit_insn (gen_cgt_si(result, avail, GEN_INT (-1)));
5398       emit_insn (gen_spu_heq (result, GEN_INT(0) ));
5399     }
5400 
5401   emit_insn (gen_spu_convert (stack_pointer_rtx, sp));
5402 
5403   emit_move_insn (stack_bot, chain);
5404 
5405   emit_move_insn (op0, virtual_stack_dynamic_rtx);
5406 }
5407 
5408 void
spu_restore_stack_nonlocal(rtx op0 ATTRIBUTE_UNUSED,rtx op1)5409 spu_restore_stack_nonlocal (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
5410 {
5411   static unsigned char arr[16] =
5412     { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
5413   rtx temp = gen_reg_rtx (SImode);
5414   rtx temp2 = gen_reg_rtx (SImode);
5415   rtx temp3 = gen_reg_rtx (V4SImode);
5416   rtx temp4 = gen_reg_rtx (V4SImode);
5417   rtx pat = gen_reg_rtx (TImode);
5418   rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
5419 
5420   /* Restore the backchain from the first word, sp from the second.  */
5421   emit_move_insn (temp2, adjust_address_nv (op1, SImode, 0));
5422   emit_move_insn (temp, adjust_address_nv (op1, SImode, 4));
5423 
5424   emit_move_insn (pat, array_to_constant (TImode, arr));
5425 
5426   /* Compute Available Stack Size for sp */
5427   emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
5428   emit_insn (gen_shufb (temp3, temp, temp, pat));
5429 
5430   /* Compute Available Stack Size for back chain */
5431   emit_insn (gen_subsi3 (temp2, temp2, stack_pointer_rtx));
5432   emit_insn (gen_shufb (temp4, temp2, temp2, pat));
5433   emit_insn (gen_addv4si3 (temp4, sp, temp4));
5434 
5435   emit_insn (gen_addv4si3 (sp, sp, temp3));
5436   emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp4);
5437 }
5438 
5439 static void
spu_init_libfuncs(void)5440 spu_init_libfuncs (void)
5441 {
5442   set_optab_libfunc (smul_optab, DImode, "__muldi3");
5443   set_optab_libfunc (sdiv_optab, DImode, "__divdi3");
5444   set_optab_libfunc (smod_optab, DImode, "__moddi3");
5445   set_optab_libfunc (udiv_optab, DImode, "__udivdi3");
5446   set_optab_libfunc (umod_optab, DImode, "__umoddi3");
5447   set_optab_libfunc (udivmod_optab, DImode, "__udivmoddi4");
5448   set_optab_libfunc (ffs_optab, DImode, "__ffsdi2");
5449   set_optab_libfunc (clz_optab, DImode, "__clzdi2");
5450   set_optab_libfunc (ctz_optab, DImode, "__ctzdi2");
5451   set_optab_libfunc (clrsb_optab, DImode, "__clrsbdi2");
5452   set_optab_libfunc (popcount_optab, DImode, "__popcountdi2");
5453   set_optab_libfunc (parity_optab, DImode, "__paritydi2");
5454 
5455   set_conv_libfunc (ufloat_optab, DFmode, SImode, "__float_unssidf");
5456   set_conv_libfunc (ufloat_optab, DFmode, DImode, "__float_unsdidf");
5457 
5458   set_optab_libfunc (addv_optab, SImode, "__addvsi3");
5459   set_optab_libfunc (subv_optab, SImode, "__subvsi3");
5460   set_optab_libfunc (smulv_optab, SImode, "__mulvsi3");
5461   set_optab_libfunc (sdivv_optab, SImode, "__divvsi3");
5462   set_optab_libfunc (negv_optab, SImode, "__negvsi2");
5463   set_optab_libfunc (absv_optab, SImode, "__absvsi2");
5464   set_optab_libfunc (addv_optab, DImode, "__addvdi3");
5465   set_optab_libfunc (subv_optab, DImode, "__subvdi3");
5466   set_optab_libfunc (smulv_optab, DImode, "__mulvdi3");
5467   set_optab_libfunc (sdivv_optab, DImode, "__divvdi3");
5468   set_optab_libfunc (negv_optab, DImode, "__negvdi2");
5469   set_optab_libfunc (absv_optab, DImode, "__absvdi2");
5470 
5471   set_optab_libfunc (smul_optab, TImode, "__multi3");
5472   set_optab_libfunc (sdiv_optab, TImode, "__divti3");
5473   set_optab_libfunc (smod_optab, TImode, "__modti3");
5474   set_optab_libfunc (udiv_optab, TImode, "__udivti3");
5475   set_optab_libfunc (umod_optab, TImode, "__umodti3");
5476   set_optab_libfunc (udivmod_optab, TImode, "__udivmodti4");
5477 }
5478 
5479 /* Make a subreg, stripping any existing subreg.  We could possibly just
5480    call simplify_subreg, but in this case we know what we want. */
5481 rtx
spu_gen_subreg(machine_mode mode,rtx x)5482 spu_gen_subreg (machine_mode mode, rtx x)
5483 {
5484   if (GET_CODE (x) == SUBREG)
5485     x = SUBREG_REG (x);
5486   if (GET_MODE (x) == mode)
5487     return x;
5488   return gen_rtx_SUBREG (mode, x, 0);
5489 }
5490 
5491 static bool
spu_return_in_memory(const_tree type,const_tree fntype ATTRIBUTE_UNUSED)5492 spu_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
5493 {
5494   return (TYPE_MODE (type) == BLKmode
5495 	  && ((type) == 0
5496 	      || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
5497 	      || int_size_in_bytes (type) >
5498 	      (MAX_REGISTER_RETURN * UNITS_PER_WORD)));
5499 }
5500 
5501 /* Create the built-in types and functions */
5502 
5503 enum spu_function_code
5504 {
5505 #define DEF_BUILTIN(fcode, icode, name, type, params) fcode,
5506 #include "spu-builtins.def"
5507 #undef DEF_BUILTIN
5508    NUM_SPU_BUILTINS
5509 };
5510 
5511 extern GTY(()) struct spu_builtin_description spu_builtins[NUM_SPU_BUILTINS];
5512 
5513 struct spu_builtin_description spu_builtins[] = {
5514 #define DEF_BUILTIN(fcode, icode, name, type, params) \
5515   {fcode, icode, name, type, params},
5516 #include "spu-builtins.def"
5517 #undef DEF_BUILTIN
5518 };
5519 
5520 static GTY(()) tree spu_builtin_decls[NUM_SPU_BUILTINS];
5521 
5522 /* Returns the spu builtin decl for CODE.  */
5523 
5524 static tree
spu_builtin_decl(unsigned code,bool initialize_p ATTRIBUTE_UNUSED)5525 spu_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
5526 {
5527   if (code >= NUM_SPU_BUILTINS)
5528     return error_mark_node;
5529 
5530   return spu_builtin_decls[code];
5531 }
5532 
5533 
5534 static void
spu_init_builtins(void)5535 spu_init_builtins (void)
5536 {
5537   struct spu_builtin_description *d;
5538   unsigned int i;
5539 
5540   V16QI_type_node = build_vector_type (intQI_type_node, 16);
5541   V8HI_type_node = build_vector_type (intHI_type_node, 8);
5542   V4SI_type_node = build_vector_type (intSI_type_node, 4);
5543   V2DI_type_node = build_vector_type (intDI_type_node, 2);
5544   V4SF_type_node = build_vector_type (float_type_node, 4);
5545   V2DF_type_node = build_vector_type (double_type_node, 2);
5546 
5547   unsigned_V16QI_type_node = build_vector_type (unsigned_intQI_type_node, 16);
5548   unsigned_V8HI_type_node = build_vector_type (unsigned_intHI_type_node, 8);
5549   unsigned_V4SI_type_node = build_vector_type (unsigned_intSI_type_node, 4);
5550   unsigned_V2DI_type_node = build_vector_type (unsigned_intDI_type_node, 2);
5551 
5552   spu_builtin_types[SPU_BTI_QUADWORD] = V16QI_type_node;
5553 
5554   spu_builtin_types[SPU_BTI_7] = global_trees[TI_INTSI_TYPE];
5555   spu_builtin_types[SPU_BTI_S7] = global_trees[TI_INTSI_TYPE];
5556   spu_builtin_types[SPU_BTI_U7] = global_trees[TI_INTSI_TYPE];
5557   spu_builtin_types[SPU_BTI_S10] = global_trees[TI_INTSI_TYPE];
5558   spu_builtin_types[SPU_BTI_S10_4] = global_trees[TI_INTSI_TYPE];
5559   spu_builtin_types[SPU_BTI_U14] = global_trees[TI_INTSI_TYPE];
5560   spu_builtin_types[SPU_BTI_16] = global_trees[TI_INTSI_TYPE];
5561   spu_builtin_types[SPU_BTI_S16] = global_trees[TI_INTSI_TYPE];
5562   spu_builtin_types[SPU_BTI_S16_2] = global_trees[TI_INTSI_TYPE];
5563   spu_builtin_types[SPU_BTI_U16] = global_trees[TI_INTSI_TYPE];
5564   spu_builtin_types[SPU_BTI_U16_2] = global_trees[TI_INTSI_TYPE];
5565   spu_builtin_types[SPU_BTI_U18] = global_trees[TI_INTSI_TYPE];
5566 
5567   spu_builtin_types[SPU_BTI_INTQI] = global_trees[TI_INTQI_TYPE];
5568   spu_builtin_types[SPU_BTI_INTHI] = global_trees[TI_INTHI_TYPE];
5569   spu_builtin_types[SPU_BTI_INTSI] = global_trees[TI_INTSI_TYPE];
5570   spu_builtin_types[SPU_BTI_INTDI] = global_trees[TI_INTDI_TYPE];
5571   spu_builtin_types[SPU_BTI_UINTQI] = global_trees[TI_UINTQI_TYPE];
5572   spu_builtin_types[SPU_BTI_UINTHI] = global_trees[TI_UINTHI_TYPE];
5573   spu_builtin_types[SPU_BTI_UINTSI] = global_trees[TI_UINTSI_TYPE];
5574   spu_builtin_types[SPU_BTI_UINTDI] = global_trees[TI_UINTDI_TYPE];
5575 
5576   spu_builtin_types[SPU_BTI_FLOAT] = global_trees[TI_FLOAT_TYPE];
5577   spu_builtin_types[SPU_BTI_DOUBLE] = global_trees[TI_DOUBLE_TYPE];
5578 
5579   spu_builtin_types[SPU_BTI_VOID] = global_trees[TI_VOID_TYPE];
5580 
5581   spu_builtin_types[SPU_BTI_PTR] =
5582     build_pointer_type (build_qualified_type
5583 			(void_type_node,
5584 			 TYPE_QUAL_CONST | TYPE_QUAL_VOLATILE));
5585 
5586   /* For each builtin we build a new prototype.  The tree code will make
5587      sure nodes are shared. */
5588   for (i = 0, d = spu_builtins; i < NUM_SPU_BUILTINS; i++, d++)
5589     {
5590       tree p;
5591       char name[64];		/* build_function will make a copy. */
5592       int parm;
5593 
5594       if (d->name == 0)
5595 	continue;
5596 
5597       /* Find last parm.  */
5598       for (parm = 1; d->parm[parm] != SPU_BTI_END_OF_PARAMS; parm++)
5599 	;
5600 
5601       p = void_list_node;
5602       while (parm > 1)
5603 	p = tree_cons (NULL_TREE, spu_builtin_types[d->parm[--parm]], p);
5604 
5605       p = build_function_type (spu_builtin_types[d->parm[0]], p);
5606 
5607       sprintf (name, "__builtin_%s", d->name);
5608       spu_builtin_decls[i] =
5609 	add_builtin_function (name, p, i, BUILT_IN_MD, NULL, NULL_TREE);
5610       if (d->fcode == SPU_MASK_FOR_LOAD)
5611 	TREE_READONLY (spu_builtin_decls[i]) = 1;
5612 
5613       /* These builtins don't throw.  */
5614       TREE_NOTHROW (spu_builtin_decls[i]) = 1;
5615     }
5616 }
5617 
5618 void
spu_restore_stack_block(rtx op0 ATTRIBUTE_UNUSED,rtx op1)5619 spu_restore_stack_block (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
5620 {
5621   static unsigned char arr[16] =
5622     { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
5623 
5624   rtx temp = gen_reg_rtx (Pmode);
5625   rtx temp2 = gen_reg_rtx (V4SImode);
5626   rtx temp3 = gen_reg_rtx (V4SImode);
5627   rtx pat = gen_reg_rtx (TImode);
5628   rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
5629 
5630   emit_move_insn (pat, array_to_constant (TImode, arr));
5631 
5632   /* Restore the sp.  */
5633   emit_move_insn (temp, op1);
5634   emit_move_insn (temp2, gen_frame_mem (V4SImode, stack_pointer_rtx));
5635 
5636   /* Compute available stack size for sp.  */
5637   emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
5638   emit_insn (gen_shufb (temp3, temp, temp, pat));
5639 
5640   emit_insn (gen_addv4si3 (sp, sp, temp3));
5641   emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp2);
5642 }
5643 
5644 int
spu_safe_dma(HOST_WIDE_INT channel)5645 spu_safe_dma (HOST_WIDE_INT channel)
5646 {
5647   return TARGET_SAFE_DMA && channel >= 21 && channel <= 27;
5648 }
5649 
5650 void
spu_builtin_splats(rtx ops[])5651 spu_builtin_splats (rtx ops[])
5652 {
5653   machine_mode mode = GET_MODE (ops[0]);
5654   if (GET_CODE (ops[1]) == CONST_INT || GET_CODE (ops[1]) == CONST_DOUBLE)
5655     {
5656       unsigned char arr[16];
5657       constant_to_array (GET_MODE_INNER (mode), ops[1], arr);
5658       emit_move_insn (ops[0], array_to_constant (mode, arr));
5659     }
5660   else
5661     {
5662       rtx reg = gen_reg_rtx (TImode);
5663       rtx shuf;
5664       if (GET_CODE (ops[1]) != REG
5665 	  && GET_CODE (ops[1]) != SUBREG)
5666 	ops[1] = force_reg (GET_MODE_INNER (mode), ops[1]);
5667       switch (mode)
5668 	{
5669 	case E_V2DImode:
5670 	case E_V2DFmode:
5671 	  shuf =
5672 	    immed_double_const (0x0001020304050607ll, 0x1011121314151617ll,
5673 				TImode);
5674 	  break;
5675 	case E_V4SImode:
5676 	case E_V4SFmode:
5677 	  shuf =
5678 	    immed_double_const (0x0001020300010203ll, 0x0001020300010203ll,
5679 				TImode);
5680 	  break;
5681 	case E_V8HImode:
5682 	  shuf =
5683 	    immed_double_const (0x0203020302030203ll, 0x0203020302030203ll,
5684 				TImode);
5685 	  break;
5686 	case E_V16QImode:
5687 	  shuf =
5688 	    immed_double_const (0x0303030303030303ll, 0x0303030303030303ll,
5689 				TImode);
5690 	  break;
5691 	default:
5692 	  abort ();
5693 	}
5694       emit_move_insn (reg, shuf);
5695       emit_insn (gen_shufb (ops[0], ops[1], ops[1], reg));
5696     }
5697 }
5698 
5699 void
spu_builtin_extract(rtx ops[])5700 spu_builtin_extract (rtx ops[])
5701 {
5702   machine_mode mode;
5703   rtx rot, from, tmp;
5704 
5705   mode = GET_MODE (ops[1]);
5706 
5707   if (GET_CODE (ops[2]) == CONST_INT)
5708     {
5709       switch (mode)
5710 	{
5711 	case E_V16QImode:
5712 	  emit_insn (gen_vec_extractv16qiqi (ops[0], ops[1], ops[2]));
5713 	  break;
5714 	case E_V8HImode:
5715 	  emit_insn (gen_vec_extractv8hihi (ops[0], ops[1], ops[2]));
5716 	  break;
5717 	case E_V4SFmode:
5718 	  emit_insn (gen_vec_extractv4sfsf (ops[0], ops[1], ops[2]));
5719 	  break;
5720 	case E_V4SImode:
5721 	  emit_insn (gen_vec_extractv4sisi (ops[0], ops[1], ops[2]));
5722 	  break;
5723 	case E_V2DImode:
5724 	  emit_insn (gen_vec_extractv2didi (ops[0], ops[1], ops[2]));
5725 	  break;
5726 	case E_V2DFmode:
5727 	  emit_insn (gen_vec_extractv2dfdf (ops[0], ops[1], ops[2]));
5728 	  break;
5729 	default:
5730 	  abort ();
5731 	}
5732       return;
5733     }
5734 
5735   from = spu_gen_subreg (TImode, ops[1]);
5736   rot = gen_reg_rtx (TImode);
5737   tmp = gen_reg_rtx (SImode);
5738 
5739   switch (mode)
5740     {
5741     case E_V16QImode:
5742       emit_insn (gen_addsi3 (tmp, ops[2], GEN_INT (-3)));
5743       break;
5744     case E_V8HImode:
5745       emit_insn (gen_addsi3 (tmp, ops[2], ops[2]));
5746       emit_insn (gen_addsi3 (tmp, tmp, GEN_INT (-2)));
5747       break;
5748     case E_V4SFmode:
5749     case E_V4SImode:
5750       emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (2)));
5751       break;
5752     case E_V2DImode:
5753     case E_V2DFmode:
5754       emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (3)));
5755       break;
5756     default:
5757       abort ();
5758     }
5759   emit_insn (gen_rotqby_ti (rot, from, tmp));
5760 
5761   emit_insn (gen_spu_convert (ops[0], rot));
5762 }
5763 
5764 void
spu_builtin_insert(rtx ops[])5765 spu_builtin_insert (rtx ops[])
5766 {
5767   machine_mode mode = GET_MODE (ops[0]);
5768   machine_mode imode = GET_MODE_INNER (mode);
5769   rtx mask = gen_reg_rtx (TImode);
5770   rtx offset;
5771 
5772   if (GET_CODE (ops[3]) == CONST_INT)
5773     offset = GEN_INT (INTVAL (ops[3]) * GET_MODE_SIZE (imode));
5774   else
5775     {
5776       offset = gen_reg_rtx (SImode);
5777       emit_insn (gen_mulsi3
5778 		 (offset, ops[3], GEN_INT (GET_MODE_SIZE (imode))));
5779     }
5780   emit_insn (gen_cpat
5781 	     (mask, stack_pointer_rtx, offset,
5782 	      GEN_INT (GET_MODE_SIZE (imode))));
5783   emit_insn (gen_shufb (ops[0], ops[1], ops[2], mask));
5784 }
5785 
5786 void
spu_builtin_promote(rtx ops[])5787 spu_builtin_promote (rtx ops[])
5788 {
5789   machine_mode mode, imode;
5790   rtx rot, from, offset;
5791   HOST_WIDE_INT pos;
5792 
5793   mode = GET_MODE (ops[0]);
5794   imode = GET_MODE_INNER (mode);
5795 
5796   from = gen_reg_rtx (TImode);
5797   rot = spu_gen_subreg (TImode, ops[0]);
5798 
5799   emit_insn (gen_spu_convert (from, ops[1]));
5800 
5801   if (GET_CODE (ops[2]) == CONST_INT)
5802     {
5803       pos = -GET_MODE_SIZE (imode) * INTVAL (ops[2]);
5804       if (GET_MODE_SIZE (imode) < 4)
5805 	pos += 4 - GET_MODE_SIZE (imode);
5806       offset = GEN_INT (pos & 15);
5807     }
5808   else
5809     {
5810       offset = gen_reg_rtx (SImode);
5811       switch (mode)
5812 	{
5813 	case E_V16QImode:
5814 	  emit_insn (gen_subsi3 (offset, GEN_INT (3), ops[2]));
5815 	  break;
5816 	case E_V8HImode:
5817 	  emit_insn (gen_subsi3 (offset, GEN_INT (1), ops[2]));
5818 	  emit_insn (gen_addsi3 (offset, offset, offset));
5819 	  break;
5820 	case E_V4SFmode:
5821 	case E_V4SImode:
5822 	  emit_insn (gen_subsi3 (offset, GEN_INT (0), ops[2]));
5823 	  emit_insn (gen_ashlsi3 (offset, offset, GEN_INT (2)));
5824 	  break;
5825 	case E_V2DImode:
5826 	case E_V2DFmode:
5827 	  emit_insn (gen_ashlsi3 (offset, ops[2], GEN_INT (3)));
5828 	  break;
5829 	default:
5830 	  abort ();
5831 	}
5832     }
5833   emit_insn (gen_rotqby_ti (rot, from, offset));
5834 }
5835 
5836 static void
spu_trampoline_init(rtx m_tramp,tree fndecl,rtx cxt)5837 spu_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
5838 {
5839   rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
5840   rtx shuf = gen_reg_rtx (V4SImode);
5841   rtx insn = gen_reg_rtx (V4SImode);
5842   rtx shufc;
5843   rtx insnc;
5844   rtx mem;
5845 
5846   fnaddr = force_reg (SImode, fnaddr);
5847   cxt = force_reg (SImode, cxt);
5848 
5849   if (TARGET_LARGE_MEM)
5850     {
5851       rtx rotl = gen_reg_rtx (V4SImode);
5852       rtx mask = gen_reg_rtx (V4SImode);
5853       rtx bi = gen_reg_rtx (SImode);
5854       static unsigned char const shufa[16] = {
5855 	2, 3, 0, 1, 18, 19, 16, 17,
5856 	0, 1, 2, 3, 16, 17, 18, 19
5857       };
5858       static unsigned char const insna[16] = {
5859 	0x41, 0, 0, 79,
5860 	0x41, 0, 0, STATIC_CHAIN_REGNUM,
5861 	0x60, 0x80, 0, 79,
5862 	0x60, 0x80, 0, STATIC_CHAIN_REGNUM
5863       };
5864 
5865       shufc = force_reg (TImode, array_to_constant (TImode, shufa));
5866       insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
5867 
5868       emit_insn (gen_shufb (shuf, fnaddr, cxt, shufc));
5869       emit_insn (gen_vrotlv4si3 (rotl, shuf, spu_const (V4SImode, 7)));
5870       emit_insn (gen_movv4si (mask, spu_const (V4SImode, 0xffff << 7)));
5871       emit_insn (gen_selb (insn, insnc, rotl, mask));
5872 
5873       mem = adjust_address (m_tramp, V4SImode, 0);
5874       emit_move_insn (mem, insn);
5875 
5876       emit_move_insn (bi, GEN_INT (0x35000000 + (79 << 7)));
5877       mem = adjust_address (m_tramp, Pmode, 16);
5878       emit_move_insn (mem, bi);
5879     }
5880   else
5881     {
5882       rtx scxt = gen_reg_rtx (SImode);
5883       rtx sfnaddr = gen_reg_rtx (SImode);
5884       static unsigned char const insna[16] = {
5885 	0x42, 0, 0, STATIC_CHAIN_REGNUM,
5886 	0x30, 0, 0, 0,
5887 	0, 0, 0, 0,
5888 	0, 0, 0, 0
5889       };
5890 
5891       shufc = gen_reg_rtx (TImode);
5892       insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
5893 
5894       /* By or'ing all of cxt with the ila opcode we are assuming cxt
5895 	 fits 18 bits and the last 4 are zeros.  This will be true if
5896 	 the stack pointer is initialized to 0x3fff0 at program start,
5897 	 otherwise the ila instruction will be garbage. */
5898 
5899       emit_insn (gen_ashlsi3 (scxt, cxt, GEN_INT (7)));
5900       emit_insn (gen_ashlsi3 (sfnaddr, fnaddr, GEN_INT (5)));
5901       emit_insn (gen_cpat
5902 		 (shufc, stack_pointer_rtx, GEN_INT (4), GEN_INT (4)));
5903       emit_insn (gen_shufb (shuf, sfnaddr, scxt, shufc));
5904       emit_insn (gen_iorv4si3 (insn, insnc, shuf));
5905 
5906       mem = adjust_address (m_tramp, V4SImode, 0);
5907       emit_move_insn (mem, insn);
5908     }
5909   emit_insn (gen_sync ());
5910 }
5911 
5912 static bool
spu_warn_func_return(tree decl)5913 spu_warn_func_return (tree decl)
5914 {
5915   /* Naked functions are implemented entirely in assembly, including the
5916      return sequence, so suppress warnings about this.  */
5917   return !spu_naked_function_p (decl);
5918 }
5919 
5920 void
spu_expand_sign_extend(rtx ops[])5921 spu_expand_sign_extend (rtx ops[])
5922 {
5923   unsigned char arr[16];
5924   rtx pat = gen_reg_rtx (TImode);
5925   rtx sign, c;
5926   int i, last;
5927   last = GET_MODE (ops[0]) == DImode ? 7 : 15;
5928   if (GET_MODE (ops[1]) == QImode)
5929     {
5930       sign = gen_reg_rtx (HImode);
5931       emit_insn (gen_extendqihi2 (sign, ops[1]));
5932       for (i = 0; i < 16; i++)
5933 	arr[i] = 0x12;
5934       arr[last] = 0x13;
5935     }
5936   else
5937     {
5938       for (i = 0; i < 16; i++)
5939 	arr[i] = 0x10;
5940       switch (GET_MODE (ops[1]))
5941 	{
5942 	case E_HImode:
5943 	  sign = gen_reg_rtx (SImode);
5944 	  emit_insn (gen_extendhisi2 (sign, ops[1]));
5945 	  arr[last] = 0x03;
5946 	  arr[last - 1] = 0x02;
5947 	  break;
5948 	case E_SImode:
5949 	  sign = gen_reg_rtx (SImode);
5950 	  emit_insn (gen_ashrsi3 (sign, ops[1], GEN_INT (31)));
5951 	  for (i = 0; i < 4; i++)
5952 	    arr[last - i] = 3 - i;
5953 	  break;
5954 	case E_DImode:
5955 	  sign = gen_reg_rtx (SImode);
5956 	  c = gen_reg_rtx (SImode);
5957 	  emit_insn (gen_spu_convert (c, ops[1]));
5958 	  emit_insn (gen_ashrsi3 (sign, c, GEN_INT (31)));
5959 	  for (i = 0; i < 8; i++)
5960 	    arr[last - i] = 7 - i;
5961 	  break;
5962 	default:
5963 	  abort ();
5964 	}
5965     }
5966   emit_move_insn (pat, array_to_constant (TImode, arr));
5967   emit_insn (gen_shufb (ops[0], ops[1], sign, pat));
5968 }
5969 
5970 /* expand vector initialization. If there are any constant parts,
5971    load constant parts first. Then load any non-constant parts.  */
5972 void
spu_expand_vector_init(rtx target,rtx vals)5973 spu_expand_vector_init (rtx target, rtx vals)
5974 {
5975   machine_mode mode = GET_MODE (target);
5976   int n_elts = GET_MODE_NUNITS (mode);
5977   int n_var = 0;
5978   bool all_same = true;
5979   rtx first, x = NULL_RTX, first_constant = NULL_RTX;
5980   int i;
5981 
5982   first = XVECEXP (vals, 0, 0);
5983   for (i = 0; i < n_elts; ++i)
5984     {
5985       x = XVECEXP (vals, 0, i);
5986       if (!(CONST_INT_P (x)
5987 	    || GET_CODE (x) == CONST_DOUBLE
5988 	    || GET_CODE (x) == CONST_FIXED))
5989 	++n_var;
5990       else
5991 	{
5992 	  if (first_constant == NULL_RTX)
5993 	    first_constant = x;
5994 	}
5995       if (i > 0 && !rtx_equal_p (x, first))
5996 	all_same = false;
5997     }
5998 
5999   /* if all elements are the same, use splats to repeat elements */
6000   if (all_same)
6001     {
6002       if (!CONSTANT_P (first)
6003 	  && !register_operand (first, GET_MODE (x)))
6004 	first = force_reg (GET_MODE (first), first);
6005       emit_insn (gen_spu_splats (target, first));
6006       return;
6007     }
6008 
6009   /* load constant parts */
6010   if (n_var != n_elts)
6011     {
6012       if (n_var == 0)
6013 	{
6014 	  emit_move_insn (target,
6015 			  gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
6016 	}
6017       else
6018 	{
6019 	  rtx constant_parts_rtx = copy_rtx (vals);
6020 
6021 	  gcc_assert (first_constant != NULL_RTX);
6022 	  /* fill empty slots with the first constant, this increases
6023 	     our chance of using splats in the recursive call below. */
6024 	  for (i = 0; i < n_elts; ++i)
6025 	    {
6026 	      x = XVECEXP (constant_parts_rtx, 0, i);
6027 	      if (!(CONST_INT_P (x)
6028 		    || GET_CODE (x) == CONST_DOUBLE
6029 		    || GET_CODE (x) == CONST_FIXED))
6030 		XVECEXP (constant_parts_rtx, 0, i) = first_constant;
6031 	    }
6032 
6033 	  spu_expand_vector_init (target, constant_parts_rtx);
6034 	}
6035     }
6036 
6037   /* load variable parts */
6038   if (n_var != 0)
6039     {
6040       rtx insert_operands[4];
6041 
6042       insert_operands[0] = target;
6043       insert_operands[2] = target;
6044       for (i = 0; i < n_elts; ++i)
6045 	{
6046 	  x = XVECEXP (vals, 0, i);
6047 	  if (!(CONST_INT_P (x)
6048 		|| GET_CODE (x) == CONST_DOUBLE
6049 		|| GET_CODE (x) == CONST_FIXED))
6050 	    {
6051 	      if (!register_operand (x, GET_MODE (x)))
6052 		x = force_reg (GET_MODE (x), x);
6053 	      insert_operands[1] = x;
6054 	      insert_operands[3] = GEN_INT (i);
6055 	      spu_builtin_insert (insert_operands);
6056 	    }
6057 	}
6058     }
6059 }
6060 
6061 /* Return insn index for the vector compare instruction for given CODE,
6062    and DEST_MODE, OP_MODE. Return -1 if valid insn is not available.  */
6063 
6064 static int
get_vec_cmp_insn(enum rtx_code code,machine_mode dest_mode,machine_mode op_mode)6065 get_vec_cmp_insn (enum rtx_code code,
6066                   machine_mode dest_mode,
6067                   machine_mode op_mode)
6068 
6069 {
6070   switch (code)
6071     {
6072     case EQ:
6073       if (dest_mode == V16QImode && op_mode == V16QImode)
6074         return CODE_FOR_ceq_v16qi;
6075       if (dest_mode == V8HImode && op_mode == V8HImode)
6076         return CODE_FOR_ceq_v8hi;
6077       if (dest_mode == V4SImode && op_mode == V4SImode)
6078         return CODE_FOR_ceq_v4si;
6079       if (dest_mode == V4SImode && op_mode == V4SFmode)
6080         return CODE_FOR_ceq_v4sf;
6081       if (dest_mode == V2DImode && op_mode == V2DFmode)
6082         return CODE_FOR_ceq_v2df;
6083       break;
6084     case GT:
6085       if (dest_mode == V16QImode && op_mode == V16QImode)
6086         return CODE_FOR_cgt_v16qi;
6087       if (dest_mode == V8HImode && op_mode == V8HImode)
6088         return CODE_FOR_cgt_v8hi;
6089       if (dest_mode == V4SImode && op_mode == V4SImode)
6090         return CODE_FOR_cgt_v4si;
6091       if (dest_mode == V4SImode && op_mode == V4SFmode)
6092         return CODE_FOR_cgt_v4sf;
6093       if (dest_mode == V2DImode && op_mode == V2DFmode)
6094         return CODE_FOR_cgt_v2df;
6095       break;
6096     case GTU:
6097       if (dest_mode == V16QImode && op_mode == V16QImode)
6098         return CODE_FOR_clgt_v16qi;
6099       if (dest_mode == V8HImode && op_mode == V8HImode)
6100         return CODE_FOR_clgt_v8hi;
6101       if (dest_mode == V4SImode && op_mode == V4SImode)
6102         return CODE_FOR_clgt_v4si;
6103       break;
6104     default:
6105       break;
6106     }
6107   return -1;
6108 }
6109 
6110 /* Emit vector compare for operands OP0 and OP1 using code RCODE.
6111    DMODE is expected destination mode. This is a recursive function.  */
6112 
6113 static rtx
spu_emit_vector_compare(enum rtx_code rcode,rtx op0,rtx op1,machine_mode dmode)6114 spu_emit_vector_compare (enum rtx_code rcode,
6115                          rtx op0, rtx op1,
6116                          machine_mode dmode)
6117 {
6118   int vec_cmp_insn;
6119   rtx mask;
6120   machine_mode dest_mode;
6121   machine_mode op_mode = GET_MODE (op1);
6122 
6123   gcc_assert (GET_MODE (op0) == GET_MODE (op1));
6124 
6125   /* Floating point vector compare instructions uses destination V4SImode.
6126      Double floating point vector compare instructions uses destination V2DImode.
6127      Move destination to appropriate mode later.  */
6128   if (dmode == V4SFmode)
6129     dest_mode = V4SImode;
6130   else if (dmode == V2DFmode)
6131     dest_mode = V2DImode;
6132   else
6133     dest_mode = dmode;
6134 
6135   mask = gen_reg_rtx (dest_mode);
6136   vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
6137 
6138   if (vec_cmp_insn == -1)
6139     {
6140       bool swap_operands = false;
6141       bool try_again = false;
6142       switch (rcode)
6143         {
6144         case LT:
6145           rcode = GT;
6146           swap_operands = true;
6147           try_again = true;
6148           break;
6149         case LTU:
6150           rcode = GTU;
6151           swap_operands = true;
6152           try_again = true;
6153           break;
6154         case NE:
6155 	case UNEQ:
6156 	case UNLE:
6157 	case UNLT:
6158 	case UNGE:
6159 	case UNGT:
6160 	case UNORDERED:
6161           /* Treat A != B as ~(A==B).  */
6162           {
6163 	    enum rtx_code rev_code;
6164             enum insn_code nor_code;
6165 	    rtx rev_mask;
6166 
6167 	    rev_code = reverse_condition_maybe_unordered (rcode);
6168             rev_mask = spu_emit_vector_compare (rev_code, op0, op1, dest_mode);
6169 
6170             nor_code = optab_handler (one_cmpl_optab, dest_mode);
6171             gcc_assert (nor_code != CODE_FOR_nothing);
6172             emit_insn (GEN_FCN (nor_code) (mask, rev_mask));
6173             if (dmode != dest_mode)
6174               {
6175                 rtx temp = gen_reg_rtx (dest_mode);
6176                 convert_move (temp, mask, 0);
6177                 return temp;
6178               }
6179             return mask;
6180           }
6181           break;
6182         case GE:
6183         case GEU:
6184         case LE:
6185         case LEU:
6186           /* Try GT/GTU/LT/LTU OR EQ */
6187           {
6188             rtx c_rtx, eq_rtx;
6189             enum insn_code ior_code;
6190             enum rtx_code new_code;
6191 
6192             switch (rcode)
6193               {
6194               case GE:  new_code = GT;  break;
6195               case GEU: new_code = GTU; break;
6196               case LE:  new_code = LT;  break;
6197               case LEU: new_code = LTU; break;
6198               default:
6199                 gcc_unreachable ();
6200               }
6201 
6202             c_rtx = spu_emit_vector_compare (new_code, op0, op1, dest_mode);
6203             eq_rtx = spu_emit_vector_compare (EQ, op0, op1, dest_mode);
6204 
6205             ior_code = optab_handler (ior_optab, dest_mode);
6206             gcc_assert (ior_code != CODE_FOR_nothing);
6207             emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
6208             if (dmode != dest_mode)
6209               {
6210                 rtx temp = gen_reg_rtx (dest_mode);
6211                 convert_move (temp, mask, 0);
6212                 return temp;
6213               }
6214             return mask;
6215           }
6216           break;
6217         case LTGT:
6218           /* Try LT OR GT */
6219           {
6220             rtx lt_rtx, gt_rtx;
6221             enum insn_code ior_code;
6222 
6223             lt_rtx = spu_emit_vector_compare (LT, op0, op1, dest_mode);
6224             gt_rtx = spu_emit_vector_compare (GT, op0, op1, dest_mode);
6225 
6226             ior_code = optab_handler (ior_optab, dest_mode);
6227             gcc_assert (ior_code != CODE_FOR_nothing);
6228             emit_insn (GEN_FCN (ior_code) (mask, lt_rtx, gt_rtx));
6229             if (dmode != dest_mode)
6230               {
6231                 rtx temp = gen_reg_rtx (dest_mode);
6232                 convert_move (temp, mask, 0);
6233                 return temp;
6234               }
6235             return mask;
6236           }
6237           break;
6238         case ORDERED:
6239           /* Implement as (A==A) & (B==B) */
6240           {
6241             rtx a_rtx, b_rtx;
6242             enum insn_code and_code;
6243 
6244             a_rtx = spu_emit_vector_compare (EQ, op0, op0, dest_mode);
6245             b_rtx = spu_emit_vector_compare (EQ, op1, op1, dest_mode);
6246 
6247             and_code = optab_handler (and_optab, dest_mode);
6248             gcc_assert (and_code != CODE_FOR_nothing);
6249             emit_insn (GEN_FCN (and_code) (mask, a_rtx, b_rtx));
6250             if (dmode != dest_mode)
6251               {
6252                 rtx temp = gen_reg_rtx (dest_mode);
6253                 convert_move (temp, mask, 0);
6254                 return temp;
6255               }
6256             return mask;
6257           }
6258           break;
6259         default:
6260           gcc_unreachable ();
6261         }
6262 
6263       /* You only get two chances.  */
6264       if (try_again)
6265           vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
6266 
6267       gcc_assert (vec_cmp_insn != -1);
6268 
6269       if (swap_operands)
6270         {
6271           rtx tmp;
6272           tmp = op0;
6273           op0 = op1;
6274           op1 = tmp;
6275         }
6276     }
6277 
6278   emit_insn (GEN_FCN (vec_cmp_insn) (mask, op0, op1));
6279   if (dmode != dest_mode)
6280     {
6281       rtx temp = gen_reg_rtx (dest_mode);
6282       convert_move (temp, mask, 0);
6283       return temp;
6284     }
6285   return mask;
6286 }
6287 
6288 
6289 /* Emit vector conditional expression.
6290    DEST is destination. OP1 and OP2 are two VEC_COND_EXPR operands.
6291    CC_OP0 and CC_OP1 are the two operands for the relation operation COND.  */
6292 
6293 int
spu_emit_vector_cond_expr(rtx dest,rtx op1,rtx op2,rtx cond,rtx cc_op0,rtx cc_op1)6294 spu_emit_vector_cond_expr (rtx dest, rtx op1, rtx op2,
6295                            rtx cond, rtx cc_op0, rtx cc_op1)
6296 {
6297   machine_mode dest_mode = GET_MODE (dest);
6298   enum rtx_code rcode = GET_CODE (cond);
6299   rtx mask;
6300 
6301   /* Get the vector mask for the given relational operations.  */
6302   mask = spu_emit_vector_compare (rcode, cc_op0, cc_op1, dest_mode);
6303 
6304   emit_insn(gen_selb (dest, op2, op1, mask));
6305 
6306   return 1;
6307 }
6308 
6309 static rtx
spu_force_reg(machine_mode mode,rtx op)6310 spu_force_reg (machine_mode mode, rtx op)
6311 {
6312   rtx x, r;
6313   if (GET_MODE (op) == VOIDmode || GET_MODE (op) == BLKmode)
6314     {
6315       if ((SCALAR_INT_MODE_P (mode) && GET_CODE (op) == CONST_INT)
6316 	  || GET_MODE (op) == BLKmode)
6317 	return force_reg (mode, convert_to_mode (mode, op, 0));
6318       abort ();
6319     }
6320 
6321   r = force_reg (GET_MODE (op), op);
6322   if (GET_MODE_SIZE (GET_MODE (op)) == GET_MODE_SIZE (mode))
6323     {
6324       x = simplify_gen_subreg (mode, r, GET_MODE (op), 0);
6325       if (x)
6326 	return x;
6327     }
6328 
6329   x = gen_reg_rtx (mode);
6330   emit_insn (gen_spu_convert (x, r));
6331   return x;
6332 }
6333 
6334 static void
spu_check_builtin_parm(struct spu_builtin_description * d,rtx op,int p)6335 spu_check_builtin_parm (struct spu_builtin_description *d, rtx op, int p)
6336 {
6337   HOST_WIDE_INT v = 0;
6338   int lsbits;
6339   /* Check the range of immediate operands. */
6340   if (p >= SPU_BTI_7 && p <= SPU_BTI_U18)
6341     {
6342       int range = p - SPU_BTI_7;
6343 
6344       if (!CONSTANT_P (op))
6345 	error ("%s expects an integer literal in the range [%d, %d]",
6346 	       d->name,
6347 	       spu_builtin_range[range].low, spu_builtin_range[range].high);
6348 
6349       if (GET_CODE (op) == CONST
6350 	  && (GET_CODE (XEXP (op, 0)) == PLUS
6351 	      || GET_CODE (XEXP (op, 0)) == MINUS))
6352 	{
6353 	  v = INTVAL (XEXP (XEXP (op, 0), 1));
6354 	  op = XEXP (XEXP (op, 0), 0);
6355 	}
6356       else if (GET_CODE (op) == CONST_INT)
6357 	v = INTVAL (op);
6358       else if (GET_CODE (op) == CONST_VECTOR
6359 	       && GET_CODE (CONST_VECTOR_ELT (op, 0)) == CONST_INT)
6360 	v = INTVAL (CONST_VECTOR_ELT (op, 0));
6361 
6362       /* The default for v is 0 which is valid in every range. */
6363       if (v < spu_builtin_range[range].low
6364 	  || v > spu_builtin_range[range].high)
6365 	error ("%s expects an integer literal in the range [%d, %d]. (%wd)",
6366 	       d->name,
6367 	       spu_builtin_range[range].low, spu_builtin_range[range].high,
6368 	       v);
6369 
6370       switch (p)
6371 	{
6372 	case SPU_BTI_S10_4:
6373 	  lsbits = 4;
6374 	  break;
6375 	case SPU_BTI_U16_2:
6376 	  /* This is only used in lqa, and stqa.  Even though the insns
6377 	     encode 16 bits of the address (all but the 2 least
6378 	     significant), only 14 bits are used because it is masked to
6379 	     be 16 byte aligned. */
6380 	  lsbits = 4;
6381 	  break;
6382 	case SPU_BTI_S16_2:
6383 	  /* This is used for lqr and stqr. */
6384 	  lsbits = 2;
6385 	  break;
6386 	default:
6387 	  lsbits = 0;
6388 	}
6389 
6390       if (GET_CODE (op) == LABEL_REF
6391 	  || (GET_CODE (op) == SYMBOL_REF
6392 	      && SYMBOL_REF_FUNCTION_P (op))
6393 	  || (v & ((1 << lsbits) - 1)) != 0)
6394 	warning (0, "%d least significant bits of %s are ignored", lsbits,
6395 		 d->name);
6396     }
6397 }
6398 
6399 
6400 static int
expand_builtin_args(struct spu_builtin_description * d,tree exp,rtx target,rtx ops[])6401 expand_builtin_args (struct spu_builtin_description *d, tree exp,
6402 		     rtx target, rtx ops[])
6403 {
6404   enum insn_code icode = (enum insn_code) d->icode;
6405   int i = 0, a;
6406 
6407   /* Expand the arguments into rtl. */
6408 
6409   if (d->parm[0] != SPU_BTI_VOID)
6410     ops[i++] = target;
6411 
6412   for (a = 0; d->parm[a+1] != SPU_BTI_END_OF_PARAMS; i++, a++)
6413     {
6414       tree arg = CALL_EXPR_ARG (exp, a);
6415       if (arg == 0)
6416 	abort ();
6417       ops[i] = expand_expr (arg, NULL_RTX, VOIDmode, EXPAND_NORMAL);
6418     }
6419 
6420   gcc_assert (i == insn_data[icode].n_generator_args);
6421   return i;
6422 }
6423 
6424 static rtx
spu_expand_builtin_1(struct spu_builtin_description * d,tree exp,rtx target)6425 spu_expand_builtin_1 (struct spu_builtin_description *d,
6426 		      tree exp, rtx target)
6427 {
6428   rtx pat;
6429   rtx ops[8];
6430   enum insn_code icode = (enum insn_code) d->icode;
6431   machine_mode mode, tmode;
6432   int i, p;
6433   int n_operands;
6434   tree return_type;
6435 
6436   /* Set up ops[] with values from arglist. */
6437   n_operands = expand_builtin_args (d, exp, target, ops);
6438 
6439   /* Handle the target operand which must be operand 0. */
6440   i = 0;
6441   if (d->parm[0] != SPU_BTI_VOID)
6442     {
6443 
6444       /* We prefer the mode specified for the match_operand otherwise
6445          use the mode from the builtin function prototype. */
6446       tmode = insn_data[d->icode].operand[0].mode;
6447       if (tmode == VOIDmode)
6448 	tmode = TYPE_MODE (spu_builtin_types[d->parm[0]]);
6449 
6450       /* Try to use target because not using it can lead to extra copies
6451          and when we are using all of the registers extra copies leads
6452          to extra spills.  */
6453       if (target && GET_CODE (target) == REG && GET_MODE (target) == tmode)
6454 	ops[0] = target;
6455       else
6456 	target = ops[0] = gen_reg_rtx (tmode);
6457 
6458       if (!(*insn_data[icode].operand[0].predicate) (ops[0], tmode))
6459 	abort ();
6460 
6461       i++;
6462     }
6463 
6464   if (d->fcode == SPU_MASK_FOR_LOAD)
6465     {
6466       machine_mode mode = insn_data[icode].operand[1].mode;
6467       tree arg;
6468       rtx addr, op, pat;
6469 
6470       /* get addr */
6471       arg = CALL_EXPR_ARG (exp, 0);
6472       gcc_assert (POINTER_TYPE_P (TREE_TYPE (arg)));
6473       op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL);
6474       addr = memory_address (mode, op);
6475 
6476       /* negate addr */
6477       op = gen_reg_rtx (GET_MODE (addr));
6478       emit_insn (gen_rtx_SET (op, gen_rtx_NEG (GET_MODE (addr), addr)));
6479       op = gen_rtx_MEM (mode, op);
6480 
6481       pat = GEN_FCN (icode) (target, op);
6482       if (!pat)
6483         return 0;
6484       emit_insn (pat);
6485       return target;
6486     }
6487 
6488   /* Ignore align_hint, but still expand it's args in case they have
6489      side effects. */
6490   if (icode == CODE_FOR_spu_align_hint)
6491     return 0;
6492 
6493   /* Handle the rest of the operands. */
6494   for (p = 1; i < n_operands; i++, p++)
6495     {
6496       if (insn_data[d->icode].operand[i].mode != VOIDmode)
6497 	mode = insn_data[d->icode].operand[i].mode;
6498       else
6499 	mode = TYPE_MODE (spu_builtin_types[d->parm[i]]);
6500 
6501       /* mode can be VOIDmode here for labels */
6502 
6503       /* For specific intrinsics with an immediate operand, e.g.,
6504          si_ai(), we sometimes need to convert the scalar argument to a
6505          vector argument by splatting the scalar. */
6506       if (VECTOR_MODE_P (mode)
6507 	  && (GET_CODE (ops[i]) == CONST_INT
6508 	      || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_INT
6509 	      || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_FLOAT))
6510 	{
6511 	  if (GET_CODE (ops[i]) == CONST_INT)
6512 	    ops[i] = spu_const (mode, INTVAL (ops[i]));
6513 	  else
6514 	    {
6515 	      rtx reg = gen_reg_rtx (mode);
6516 	      machine_mode imode = GET_MODE_INNER (mode);
6517 	      if (!spu_nonmem_operand (ops[i], GET_MODE (ops[i])))
6518 		ops[i] = force_reg (GET_MODE (ops[i]), ops[i]);
6519 	      if (imode != GET_MODE (ops[i]))
6520 		ops[i] = convert_to_mode (imode, ops[i],
6521 					  TYPE_UNSIGNED (spu_builtin_types
6522 							 [d->parm[i]]));
6523 	      emit_insn (gen_spu_splats (reg, ops[i]));
6524 	      ops[i] = reg;
6525 	    }
6526 	}
6527 
6528       spu_check_builtin_parm (d, ops[i], d->parm[p]);
6529 
6530       if (!(*insn_data[icode].operand[i].predicate) (ops[i], mode))
6531 	ops[i] = spu_force_reg (mode, ops[i]);
6532     }
6533 
6534   switch (n_operands)
6535     {
6536     case 0:
6537       pat = GEN_FCN (icode) (0);
6538       break;
6539     case 1:
6540       pat = GEN_FCN (icode) (ops[0]);
6541       break;
6542     case 2:
6543       pat = GEN_FCN (icode) (ops[0], ops[1]);
6544       break;
6545     case 3:
6546       pat = GEN_FCN (icode) (ops[0], ops[1], ops[2]);
6547       break;
6548     case 4:
6549       pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3]);
6550       break;
6551     case 5:
6552       pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4]);
6553       break;
6554     case 6:
6555       pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4], ops[5]);
6556       break;
6557     default:
6558       abort ();
6559     }
6560 
6561   if (!pat)
6562     abort ();
6563 
6564   if (d->type == B_CALL || d->type == B_BISLED)
6565     emit_call_insn (pat);
6566   else if (d->type == B_JUMP)
6567     {
6568       emit_jump_insn (pat);
6569       emit_barrier ();
6570     }
6571   else
6572     emit_insn (pat);
6573 
6574   return_type = spu_builtin_types[d->parm[0]];
6575   if (d->parm[0] != SPU_BTI_VOID
6576       && GET_MODE (target) != TYPE_MODE (return_type))
6577     {
6578       /* target is the return value.  It should always be the mode of
6579          the builtin function prototype. */
6580       target = spu_force_reg (TYPE_MODE (return_type), target);
6581     }
6582 
6583   return target;
6584 }
6585 
6586 rtx
spu_expand_builtin(tree exp,rtx target,rtx subtarget ATTRIBUTE_UNUSED,machine_mode mode ATTRIBUTE_UNUSED,int ignore ATTRIBUTE_UNUSED)6587 spu_expand_builtin (tree exp,
6588 		    rtx target,
6589 		    rtx subtarget ATTRIBUTE_UNUSED,
6590 		    machine_mode mode ATTRIBUTE_UNUSED,
6591 		    int ignore ATTRIBUTE_UNUSED)
6592 {
6593   tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
6594   unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
6595   struct spu_builtin_description *d;
6596 
6597   if (fcode < NUM_SPU_BUILTINS)
6598     {
6599       d = &spu_builtins[fcode];
6600 
6601       return spu_expand_builtin_1 (d, exp, target);
6602     }
6603   abort ();
6604 }
6605 
6606 /* Implement targetm.vectorize.builtin_mask_for_load.  */
6607 static tree
spu_builtin_mask_for_load(void)6608 spu_builtin_mask_for_load (void)
6609 {
6610   return spu_builtin_decls[SPU_MASK_FOR_LOAD];
6611 }
6612 
6613 /* Implement targetm.vectorize.builtin_vectorization_cost.  */
6614 static int
spu_builtin_vectorization_cost(enum vect_cost_for_stmt type_of_cost,tree vectype,int misalign ATTRIBUTE_UNUSED)6615 spu_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
6616                                 tree vectype,
6617                                 int misalign ATTRIBUTE_UNUSED)
6618 {
6619   unsigned elements;
6620 
6621   switch (type_of_cost)
6622     {
6623       case scalar_stmt:
6624       case vector_stmt:
6625       case vector_load:
6626       case vector_store:
6627       case vec_to_scalar:
6628       case scalar_to_vec:
6629       case cond_branch_not_taken:
6630       case vec_perm:
6631       case vec_promote_demote:
6632         return 1;
6633 
6634       case scalar_store:
6635         return 10;
6636 
6637       case scalar_load:
6638         /* Load + rotate.  */
6639         return 2;
6640 
6641       case unaligned_load:
6642       case vector_gather_load:
6643       case vector_scatter_store:
6644         return 2;
6645 
6646       case cond_branch_taken:
6647         return 6;
6648 
6649       case vec_construct:
6650 	elements = TYPE_VECTOR_SUBPARTS (vectype);
6651 	return elements / 2 + 1;
6652 
6653       default:
6654         gcc_unreachable ();
6655     }
6656 }
6657 
6658 /* Implement targetm.vectorize.init_cost.  */
6659 
6660 static void *
spu_init_cost(struct loop * loop_info ATTRIBUTE_UNUSED)6661 spu_init_cost (struct loop *loop_info ATTRIBUTE_UNUSED)
6662 {
6663   unsigned *cost = XNEWVEC (unsigned, 3);
6664   cost[vect_prologue] = cost[vect_body] = cost[vect_epilogue] = 0;
6665   return cost;
6666 }
6667 
6668 /* Implement targetm.vectorize.add_stmt_cost.  */
6669 
6670 static unsigned
spu_add_stmt_cost(void * data,int count,enum vect_cost_for_stmt kind,struct _stmt_vec_info * stmt_info,int misalign,enum vect_cost_model_location where)6671 spu_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
6672 		   struct _stmt_vec_info *stmt_info, int misalign,
6673 		   enum vect_cost_model_location where)
6674 {
6675   unsigned *cost = (unsigned *) data;
6676   unsigned retval = 0;
6677 
6678   if (flag_vect_cost_model)
6679     {
6680       tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
6681       int stmt_cost = spu_builtin_vectorization_cost (kind, vectype, misalign);
6682 
6683       /* Statements in an inner loop relative to the loop being
6684 	 vectorized are weighted more heavily.  The value here is
6685 	 arbitrary and could potentially be improved with analysis.  */
6686       if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
6687 	count *= 50;  /* FIXME.  */
6688 
6689       retval = (unsigned) (count * stmt_cost);
6690       cost[where] += retval;
6691     }
6692 
6693   return retval;
6694 }
6695 
6696 /* Implement targetm.vectorize.finish_cost.  */
6697 
6698 static void
spu_finish_cost(void * data,unsigned * prologue_cost,unsigned * body_cost,unsigned * epilogue_cost)6699 spu_finish_cost (void *data, unsigned *prologue_cost,
6700 		 unsigned *body_cost, unsigned *epilogue_cost)
6701 {
6702   unsigned *cost = (unsigned *) data;
6703   *prologue_cost = cost[vect_prologue];
6704   *body_cost     = cost[vect_body];
6705   *epilogue_cost = cost[vect_epilogue];
6706 }
6707 
6708 /* Implement targetm.vectorize.destroy_cost_data.  */
6709 
6710 static void
spu_destroy_cost_data(void * data)6711 spu_destroy_cost_data (void *data)
6712 {
6713   free (data);
6714 }
6715 
6716 /* Return true iff, data reference of TYPE can reach vector alignment (16)
6717    after applying N number of iterations.  This routine does not determine
6718    how may iterations are required to reach desired alignment.  */
6719 
6720 static bool
spu_vector_alignment_reachable(const_tree type ATTRIBUTE_UNUSED,bool is_packed)6721 spu_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed)
6722 {
6723   if (is_packed)
6724     return false;
6725 
6726   /* All other types are naturally aligned.  */
6727   return true;
6728 }
6729 
6730 /* Return the appropriate mode for a named address pointer.  */
6731 static scalar_int_mode
spu_addr_space_pointer_mode(addr_space_t addrspace)6732 spu_addr_space_pointer_mode (addr_space_t addrspace)
6733 {
6734   switch (addrspace)
6735     {
6736     case ADDR_SPACE_GENERIC:
6737       return ptr_mode;
6738     case ADDR_SPACE_EA:
6739       return EAmode;
6740     default:
6741       gcc_unreachable ();
6742     }
6743 }
6744 
6745 /* Return the appropriate mode for a named address address.  */
6746 static scalar_int_mode
spu_addr_space_address_mode(addr_space_t addrspace)6747 spu_addr_space_address_mode (addr_space_t addrspace)
6748 {
6749   switch (addrspace)
6750     {
6751     case ADDR_SPACE_GENERIC:
6752       return Pmode;
6753     case ADDR_SPACE_EA:
6754       return EAmode;
6755     default:
6756       gcc_unreachable ();
6757     }
6758 }
6759 
6760 /* Determine if one named address space is a subset of another.  */
6761 
6762 static bool
spu_addr_space_subset_p(addr_space_t subset,addr_space_t superset)6763 spu_addr_space_subset_p (addr_space_t subset, addr_space_t superset)
6764 {
6765   gcc_assert (subset == ADDR_SPACE_GENERIC || subset == ADDR_SPACE_EA);
6766   gcc_assert (superset == ADDR_SPACE_GENERIC || superset == ADDR_SPACE_EA);
6767 
6768   if (subset == superset)
6769     return true;
6770 
6771   /* If we have -mno-address-space-conversion, treat __ea and generic as not
6772      being subsets but instead as disjoint address spaces.  */
6773   else if (!TARGET_ADDRESS_SPACE_CONVERSION)
6774     return false;
6775 
6776   else
6777     return (subset == ADDR_SPACE_GENERIC && superset == ADDR_SPACE_EA);
6778 }
6779 
6780 /* Convert from one address space to another.  */
6781 static rtx
spu_addr_space_convert(rtx op,tree from_type,tree to_type)6782 spu_addr_space_convert (rtx op, tree from_type, tree to_type)
6783 {
6784   addr_space_t from_as = TYPE_ADDR_SPACE (TREE_TYPE (from_type));
6785   addr_space_t to_as = TYPE_ADDR_SPACE (TREE_TYPE (to_type));
6786 
6787   gcc_assert (from_as == ADDR_SPACE_GENERIC || from_as == ADDR_SPACE_EA);
6788   gcc_assert (to_as == ADDR_SPACE_GENERIC || to_as == ADDR_SPACE_EA);
6789 
6790   if (to_as == ADDR_SPACE_GENERIC && from_as == ADDR_SPACE_EA)
6791     {
6792       rtx result, ls;
6793 
6794       ls = gen_const_mem (DImode,
6795 			  gen_rtx_SYMBOL_REF (Pmode, "__ea_local_store"));
6796       set_mem_align (ls, 128);
6797 
6798       result = gen_reg_rtx (Pmode);
6799       ls = force_reg (Pmode, convert_modes (Pmode, DImode, ls, 1));
6800       op = force_reg (Pmode, convert_modes (Pmode, EAmode, op, 1));
6801       ls = emit_conditional_move (ls, NE, op, const0_rtx, Pmode,
6802 					  ls, const0_rtx, Pmode, 1);
6803 
6804       emit_insn (gen_subsi3 (result, op, ls));
6805 
6806       return result;
6807     }
6808 
6809   else if (to_as == ADDR_SPACE_EA && from_as == ADDR_SPACE_GENERIC)
6810     {
6811       rtx result, ls;
6812 
6813       ls = gen_const_mem (DImode,
6814 			  gen_rtx_SYMBOL_REF (Pmode, "__ea_local_store"));
6815       set_mem_align (ls, 128);
6816 
6817       result = gen_reg_rtx (EAmode);
6818       ls = force_reg (EAmode, convert_modes (EAmode, DImode, ls, 1));
6819       op = force_reg (Pmode, op);
6820       ls = emit_conditional_move (ls, NE, op, const0_rtx, Pmode,
6821 					  ls, const0_rtx, EAmode, 1);
6822       op = force_reg (EAmode, convert_modes (EAmode, Pmode, op, 1));
6823 
6824       if (EAmode == SImode)
6825 	emit_insn (gen_addsi3 (result, op, ls));
6826       else
6827 	emit_insn (gen_adddi3 (result, op, ls));
6828 
6829       return result;
6830     }
6831 
6832   else
6833     gcc_unreachable ();
6834 }
6835 
6836 
6837 /* Count the total number of instructions in each pipe and return the
6838    maximum, which is used as the Minimum Iteration Interval (MII)
6839    in the modulo scheduler.  get_pipe() will return -2, -1, 0, or 1.
6840    -2 are instructions that can go in pipe0 or pipe1.  */
6841 static int
spu_sms_res_mii(struct ddg * g)6842 spu_sms_res_mii (struct ddg *g)
6843 {
6844   int i;
6845   unsigned t[4] = {0, 0, 0, 0};
6846 
6847   for (i = 0; i < g->num_nodes; i++)
6848     {
6849       rtx_insn *insn = g->nodes[i].insn;
6850       int p = get_pipe (insn) + 2;
6851 
6852       gcc_assert (p >= 0);
6853       gcc_assert (p < 4);
6854 
6855       t[p]++;
6856       if (dump_file && INSN_P (insn))
6857             fprintf (dump_file, "i%d %s %d %d\n",
6858                      INSN_UID (insn),
6859                      insn_data[INSN_CODE(insn)].name,
6860                      p, t[p]);
6861     }
6862   if (dump_file)
6863     fprintf (dump_file, "%d %d %d %d\n", t[0], t[1], t[2], t[3]);
6864 
6865   return MAX ((t[0] + t[2] + t[3] + 1) / 2, MAX (t[2], t[3]));
6866 }
6867 
6868 
6869 void
spu_init_expanders(void)6870 spu_init_expanders (void)
6871 {
6872   if (cfun)
6873     {
6874       rtx r0, r1;
6875       /* HARD_FRAME_REGISTER is only 128 bit aligned when
6876          frame_pointer_needed is true.  We don't know that until we're
6877          expanding the prologue. */
6878       REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = 8;
6879 
6880       /* A number of passes use LAST_VIRTUAL_REGISTER+1 and
6881 	 LAST_VIRTUAL_REGISTER+2 to test the back-end.  We want them
6882 	 to be treated as aligned, so generate them here. */
6883       r0 = gen_reg_rtx (SImode);
6884       r1 = gen_reg_rtx (SImode);
6885       mark_reg_pointer (r0, 128);
6886       mark_reg_pointer (r1, 128);
6887       gcc_assert (REGNO (r0) == LAST_VIRTUAL_REGISTER + 1
6888 		  && REGNO (r1) == LAST_VIRTUAL_REGISTER + 2);
6889     }
6890 }
6891 
6892 static scalar_int_mode
spu_libgcc_cmp_return_mode(void)6893 spu_libgcc_cmp_return_mode (void)
6894 {
6895 
6896 /* For SPU word mode is TI mode so it is better to use SImode
6897    for compare returns.  */
6898   return SImode;
6899 }
6900 
6901 static scalar_int_mode
spu_libgcc_shift_count_mode(void)6902 spu_libgcc_shift_count_mode (void)
6903 {
6904 /* For SPU word mode is TI mode so it is better to use SImode
6905    for shift counts.  */
6906   return SImode;
6907 }
6908 
6909 /* Implement targetm.section_type_flags.  */
6910 static unsigned int
spu_section_type_flags(tree decl,const char * name,int reloc)6911 spu_section_type_flags (tree decl, const char *name, int reloc)
6912 {
6913   /* .toe needs to have type @nobits.  */
6914   if (strcmp (name, ".toe") == 0)
6915     return SECTION_BSS;
6916   /* Don't load _ea into the current address space.  */
6917   if (strcmp (name, "._ea") == 0)
6918     return SECTION_WRITE | SECTION_DEBUG;
6919   return default_section_type_flags (decl, name, reloc);
6920 }
6921 
6922 /* Implement targetm.select_section.  */
6923 static section *
spu_select_section(tree decl,int reloc,unsigned HOST_WIDE_INT align)6924 spu_select_section (tree decl, int reloc, unsigned HOST_WIDE_INT align)
6925 {
6926   /* Variables and constants defined in the __ea address space
6927      go into a special section named "._ea".  */
6928   if (TREE_TYPE (decl) != error_mark_node
6929       && TYPE_ADDR_SPACE (TREE_TYPE (decl)) == ADDR_SPACE_EA)
6930     {
6931       /* We might get called with string constants, but get_named_section
6932 	 doesn't like them as they are not DECLs.  Also, we need to set
6933 	 flags in that case.  */
6934       if (!DECL_P (decl))
6935 	return get_section ("._ea", SECTION_WRITE | SECTION_DEBUG, NULL);
6936 
6937       return get_named_section (decl, "._ea", reloc);
6938     }
6939 
6940   return default_elf_select_section (decl, reloc, align);
6941 }
6942 
6943 /* Implement targetm.unique_section.  */
6944 static void
spu_unique_section(tree decl,int reloc)6945 spu_unique_section (tree decl, int reloc)
6946 {
6947   /* We don't support unique section names in the __ea address
6948      space for now.  */
6949   if (TREE_TYPE (decl) != error_mark_node
6950       && TYPE_ADDR_SPACE (TREE_TYPE (decl)) != 0)
6951     return;
6952 
6953   default_unique_section (decl, reloc);
6954 }
6955 
6956 /* Generate a constant or register which contains 2^SCALE.  We assume
6957    the result is valid for MODE.  Currently, MODE must be V4SFmode and
6958    SCALE must be SImode. */
6959 rtx
spu_gen_exp2(machine_mode mode,rtx scale)6960 spu_gen_exp2 (machine_mode mode, rtx scale)
6961 {
6962   gcc_assert (mode == V4SFmode);
6963   gcc_assert (GET_MODE (scale) == SImode || GET_CODE (scale) == CONST_INT);
6964   if (GET_CODE (scale) != CONST_INT)
6965     {
6966       /* unsigned int exp = (127 + scale) << 23;
6967 	__vector float m = (__vector float) spu_splats (exp); */
6968       rtx reg = force_reg (SImode, scale);
6969       rtx exp = gen_reg_rtx (SImode);
6970       rtx mul = gen_reg_rtx (mode);
6971       emit_insn (gen_addsi3 (exp, reg, GEN_INT (127)));
6972       emit_insn (gen_ashlsi3 (exp, exp, GEN_INT (23)));
6973       emit_insn (gen_spu_splats (mul, gen_rtx_SUBREG (GET_MODE_INNER (mode), exp, 0)));
6974       return mul;
6975     }
6976   else
6977     {
6978       HOST_WIDE_INT exp = 127 + INTVAL (scale);
6979       unsigned char arr[16];
6980       arr[0] = arr[4] = arr[8] = arr[12] = exp >> 1;
6981       arr[1] = arr[5] = arr[9] = arr[13] = exp << 7;
6982       arr[2] = arr[6] = arr[10] = arr[14] = 0;
6983       arr[3] = arr[7] = arr[11] = arr[15] = 0;
6984       return array_to_constant (mode, arr);
6985     }
6986 }
6987 
6988 /* After reload, just change the convert into a move instruction
6989    or a dead instruction. */
6990 void
spu_split_convert(rtx ops[])6991 spu_split_convert (rtx ops[])
6992 {
6993   if (REGNO (ops[0]) == REGNO (ops[1]))
6994     emit_note (NOTE_INSN_DELETED);
6995   else
6996     {
6997       /* Use TImode always as this might help hard reg copyprop.  */
6998       rtx op0 = gen_rtx_REG (TImode, REGNO (ops[0]));
6999       rtx op1 = gen_rtx_REG (TImode, REGNO (ops[1]));
7000       emit_insn (gen_move_insn (op0, op1));
7001     }
7002 }
7003 
7004 void
spu_function_profiler(FILE * file,int labelno ATTRIBUTE_UNUSED)7005 spu_function_profiler (FILE * file, int labelno ATTRIBUTE_UNUSED)
7006 {
7007   fprintf (file, "# profile\n");
7008   fprintf (file, "brsl $75,  _mcount\n");
7009 }
7010 
7011 /* Implement targetm.ref_may_alias_errno.  */
7012 static bool
spu_ref_may_alias_errno(ao_ref * ref)7013 spu_ref_may_alias_errno (ao_ref *ref)
7014 {
7015   tree base = ao_ref_base (ref);
7016 
7017   /* With SPU newlib, errno is defined as something like
7018          _impure_data._errno
7019      The default implementation of this target macro does not
7020      recognize such expressions, so special-code for it here.  */
7021 
7022   if (TREE_CODE (base) == VAR_DECL
7023       && !TREE_STATIC (base)
7024       && DECL_EXTERNAL (base)
7025       && TREE_CODE (TREE_TYPE (base)) == RECORD_TYPE
7026       && strcmp (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (base)),
7027 		 "_impure_data") == 0
7028       /* _errno is the first member of _impure_data.  */
7029       && ref->offset == 0)
7030     return true;
7031 
7032   return default_ref_may_alias_errno (ref);
7033 }
7034 
7035 /* Output thunk to FILE that implements a C++ virtual function call (with
7036    multiple inheritance) to FUNCTION.  The thunk adjusts the this pointer
7037    by DELTA, and unless VCALL_OFFSET is zero, applies an additional adjustment
7038    stored at VCALL_OFFSET in the vtable whose address is located at offset 0
7039    relative to the resulting this pointer.  */
7040 
7041 static void
spu_output_mi_thunk(FILE * file,tree thunk ATTRIBUTE_UNUSED,HOST_WIDE_INT delta,HOST_WIDE_INT vcall_offset,tree function)7042 spu_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
7043 		     HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
7044 		     tree function)
7045 {
7046   rtx op[8];
7047 
7048   /* Make sure unwind info is emitted for the thunk if needed.  */
7049   final_start_function (emit_barrier (), file, 1);
7050 
7051   /* Operand 0 is the target function.  */
7052   op[0] = XEXP (DECL_RTL (function), 0);
7053 
7054   /* Operand 1 is the 'this' pointer.  */
7055   if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
7056     op[1] = gen_rtx_REG (Pmode, FIRST_ARG_REGNUM + 1);
7057   else
7058     op[1] = gen_rtx_REG (Pmode, FIRST_ARG_REGNUM);
7059 
7060   /* Operands 2/3 are the low/high halfwords of delta.  */
7061   op[2] = GEN_INT (trunc_int_for_mode (delta, HImode));
7062   op[3] = GEN_INT (trunc_int_for_mode (delta >> 16, HImode));
7063 
7064   /* Operands 4/5 are the low/high halfwords of vcall_offset.  */
7065   op[4] = GEN_INT (trunc_int_for_mode (vcall_offset, HImode));
7066   op[5] = GEN_INT (trunc_int_for_mode (vcall_offset >> 16, HImode));
7067 
7068   /* Operands 6/7 are temporary registers.  */
7069   op[6] = gen_rtx_REG (Pmode, 79);
7070   op[7] = gen_rtx_REG (Pmode, 78);
7071 
7072   /* Add DELTA to this pointer.  */
7073   if (delta)
7074     {
7075       if (delta >= -0x200 && delta < 0x200)
7076 	output_asm_insn ("ai\t%1,%1,%2", op);
7077       else if (delta >= -0x8000 && delta < 0x8000)
7078 	{
7079 	  output_asm_insn ("il\t%6,%2", op);
7080 	  output_asm_insn ("a\t%1,%1,%6", op);
7081 	}
7082       else
7083 	{
7084 	  output_asm_insn ("ilhu\t%6,%3", op);
7085 	  output_asm_insn ("iohl\t%6,%2", op);
7086 	  output_asm_insn ("a\t%1,%1,%6", op);
7087 	}
7088     }
7089 
7090   /* Perform vcall adjustment.  */
7091   if (vcall_offset)
7092     {
7093       output_asm_insn ("lqd\t%7,0(%1)", op);
7094       output_asm_insn ("rotqby\t%7,%7,%1", op);
7095 
7096       if (vcall_offset >= -0x200 && vcall_offset < 0x200)
7097 	output_asm_insn ("ai\t%7,%7,%4", op);
7098       else if (vcall_offset >= -0x8000 && vcall_offset < 0x8000)
7099 	{
7100 	  output_asm_insn ("il\t%6,%4", op);
7101 	  output_asm_insn ("a\t%7,%7,%6", op);
7102 	}
7103       else
7104 	{
7105 	  output_asm_insn ("ilhu\t%6,%5", op);
7106 	  output_asm_insn ("iohl\t%6,%4", op);
7107 	  output_asm_insn ("a\t%7,%7,%6", op);
7108 	}
7109 
7110       output_asm_insn ("lqd\t%6,0(%7)", op);
7111       output_asm_insn ("rotqby\t%6,%6,%7", op);
7112       output_asm_insn ("a\t%1,%1,%6", op);
7113     }
7114 
7115   /* Jump to target.  */
7116   output_asm_insn ("br\t%0", op);
7117 
7118   final_end_function ();
7119 }
7120 
7121 /* Canonicalize a comparison from one we don't have to one we do have.  */
7122 static void
spu_canonicalize_comparison(int * code,rtx * op0,rtx * op1,bool op0_preserve_value)7123 spu_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
7124 			     bool op0_preserve_value)
7125 {
7126   if (!op0_preserve_value
7127       && (*code == LE || *code == LT || *code == LEU || *code == LTU))
7128     {
7129       rtx tem = *op0;
7130       *op0 = *op1;
7131       *op1 = tem;
7132       *code = (int)swap_condition ((enum rtx_code)*code);
7133     }
7134 }
7135 
7136 /* Expand an atomic fetch-and-operate pattern.  CODE is the binary operation
7137    to perform.  MEM is the memory on which to operate.  VAL is the second
7138    operand of the binary operator.  BEFORE and AFTER are optional locations to
7139    return the value of MEM either before of after the operation.  */
7140 void
spu_expand_atomic_op(enum rtx_code code,rtx mem,rtx val,rtx orig_before,rtx orig_after)7141 spu_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
7142 		      rtx orig_before, rtx orig_after)
7143 {
7144   machine_mode mode = GET_MODE (mem);
7145   rtx before = orig_before, after = orig_after;
7146 
7147   if (before == NULL_RTX)
7148     before = gen_reg_rtx (mode);
7149 
7150   emit_move_insn (before, mem);
7151 
7152   if (code == MULT)  /* NAND operation */
7153     {
7154       rtx x = expand_simple_binop (mode, AND, before, val,
7155 				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
7156       after = expand_simple_unop (mode, NOT, x, after, 1);
7157     }
7158   else
7159     {
7160       after = expand_simple_binop (mode, code, before, val,
7161 				   after, 1, OPTAB_LIB_WIDEN);
7162     }
7163 
7164   emit_move_insn (mem, after);
7165 
7166   if (orig_after && after != orig_after)
7167     emit_move_insn (orig_after, after);
7168 }
7169 
7170 /* Implement TARGET_MODES_TIEABLE_P.  */
7171 
7172 static bool
spu_modes_tieable_p(machine_mode mode1,machine_mode mode2)7173 spu_modes_tieable_p (machine_mode mode1, machine_mode mode2)
7174 {
7175   return (GET_MODE_BITSIZE (mode1) <= MAX_FIXED_MODE_SIZE
7176 	  && GET_MODE_BITSIZE (mode2) <= MAX_FIXED_MODE_SIZE);
7177 }
7178 
7179 /* Implement TARGET_CAN_CHANGE_MODE_CLASS.  GCC assumes that modes are
7180    in the lowpart of a register, which is only true for SPU.  */
7181 
7182 static bool
spu_can_change_mode_class(machine_mode from,machine_mode to,reg_class_t)7183 spu_can_change_mode_class (machine_mode from, machine_mode to, reg_class_t)
7184 {
7185   return (GET_MODE_SIZE (from) == GET_MODE_SIZE (to)
7186 	  || (GET_MODE_SIZE (from) <= 4 && GET_MODE_SIZE (to) <= 4)
7187 	  || (GET_MODE_SIZE (from) >= 16 && GET_MODE_SIZE (to) >= 16));
7188 }
7189 
7190 /* Implement TARGET_TRULY_NOOP_TRUNCATION.  */
7191 
7192 static bool
spu_truly_noop_truncation(poly_uint64 outprec,poly_uint64 inprec)7193 spu_truly_noop_truncation (poly_uint64 outprec, poly_uint64 inprec)
7194 {
7195   return inprec <= 32 && outprec <= inprec;
7196 }
7197 
7198 /* Implement TARGET_STATIC_RTX_ALIGNMENT.
7199 
7200    Make all static objects 16-byte aligned.  This allows us to assume
7201    they are also padded to 16 bytes, which means we can use a single
7202    load or store instruction to access them.  */
7203 
7204 static HOST_WIDE_INT
spu_static_rtx_alignment(machine_mode mode)7205 spu_static_rtx_alignment (machine_mode mode)
7206 {
7207   return MAX (GET_MODE_ALIGNMENT (mode), 128);
7208 }
7209 
7210 /* Implement TARGET_CONSTANT_ALIGNMENT.
7211 
7212    Make all static objects 16-byte aligned.  This allows us to assume
7213    they are also padded to 16 bytes, which means we can use a single
7214    load or store instruction to access them.  */
7215 
7216 static HOST_WIDE_INT
spu_constant_alignment(const_tree,HOST_WIDE_INT align)7217 spu_constant_alignment (const_tree, HOST_WIDE_INT align)
7218 {
7219   return MAX (align, 128);
7220 }
7221 
7222 /*  Table of machine attributes.  */
7223 static const struct attribute_spec spu_attribute_table[] =
7224 {
7225   /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
7226        affects_type_identity, handler, exclude } */
7227   { "naked",          0, 0, true,  false, false, false,
7228     spu_handle_fndecl_attribute, NULL },
7229   { "spu_vector",     0, 0, false, true,  false, false,
7230     spu_handle_vector_attribute, NULL },
7231   { NULL,             0, 0, false, false, false, false, NULL, NULL }
7232 };
7233 
7234 /*  TARGET overrides.  */
7235 
7236 #undef TARGET_LRA_P
7237 #define TARGET_LRA_P hook_bool_void_false
7238 
7239 #undef TARGET_ADDR_SPACE_POINTER_MODE
7240 #define TARGET_ADDR_SPACE_POINTER_MODE spu_addr_space_pointer_mode
7241 
7242 #undef TARGET_ADDR_SPACE_ADDRESS_MODE
7243 #define TARGET_ADDR_SPACE_ADDRESS_MODE spu_addr_space_address_mode
7244 
7245 #undef TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P
7246 #define TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P \
7247   spu_addr_space_legitimate_address_p
7248 
7249 #undef TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS
7250 #define TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS spu_addr_space_legitimize_address
7251 
7252 #undef TARGET_ADDR_SPACE_SUBSET_P
7253 #define TARGET_ADDR_SPACE_SUBSET_P spu_addr_space_subset_p
7254 
7255 #undef TARGET_ADDR_SPACE_CONVERT
7256 #define TARGET_ADDR_SPACE_CONVERT spu_addr_space_convert
7257 
7258 #undef TARGET_INIT_BUILTINS
7259 #define TARGET_INIT_BUILTINS spu_init_builtins
7260 #undef TARGET_BUILTIN_DECL
7261 #define TARGET_BUILTIN_DECL spu_builtin_decl
7262 
7263 #undef TARGET_EXPAND_BUILTIN
7264 #define TARGET_EXPAND_BUILTIN spu_expand_builtin
7265 
7266 #undef TARGET_UNWIND_WORD_MODE
7267 #define TARGET_UNWIND_WORD_MODE spu_unwind_word_mode
7268 
7269 #undef TARGET_LEGITIMIZE_ADDRESS
7270 #define TARGET_LEGITIMIZE_ADDRESS spu_legitimize_address
7271 
7272 /* The current assembler doesn't like .4byte foo@ppu, so use the normal .long
7273    and .quad for the debugger.  When it is known that the assembler is fixed,
7274    these can be removed.  */
7275 #undef TARGET_ASM_UNALIGNED_SI_OP
7276 #define TARGET_ASM_UNALIGNED_SI_OP	"\t.long\t"
7277 
7278 #undef TARGET_ASM_ALIGNED_DI_OP
7279 #define TARGET_ASM_ALIGNED_DI_OP	"\t.quad\t"
7280 
7281 /* The .8byte directive doesn't seem to work well for a 32 bit
7282    architecture. */
7283 #undef TARGET_ASM_UNALIGNED_DI_OP
7284 #define TARGET_ASM_UNALIGNED_DI_OP NULL
7285 
7286 #undef TARGET_RTX_COSTS
7287 #define TARGET_RTX_COSTS spu_rtx_costs
7288 
7289 #undef TARGET_ADDRESS_COST
7290 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
7291 
7292 #undef TARGET_SCHED_ISSUE_RATE
7293 #define TARGET_SCHED_ISSUE_RATE spu_sched_issue_rate
7294 
7295 #undef TARGET_SCHED_INIT_GLOBAL
7296 #define TARGET_SCHED_INIT_GLOBAL spu_sched_init_global
7297 
7298 #undef TARGET_SCHED_INIT
7299 #define TARGET_SCHED_INIT spu_sched_init
7300 
7301 #undef TARGET_SCHED_VARIABLE_ISSUE
7302 #define TARGET_SCHED_VARIABLE_ISSUE spu_sched_variable_issue
7303 
7304 #undef TARGET_SCHED_REORDER
7305 #define TARGET_SCHED_REORDER spu_sched_reorder
7306 
7307 #undef TARGET_SCHED_REORDER2
7308 #define TARGET_SCHED_REORDER2 spu_sched_reorder
7309 
7310 #undef TARGET_SCHED_ADJUST_COST
7311 #define TARGET_SCHED_ADJUST_COST spu_sched_adjust_cost
7312 
7313 #undef  TARGET_ATTRIBUTE_TABLE
7314 #define TARGET_ATTRIBUTE_TABLE spu_attribute_table
7315 
7316 #undef TARGET_ASM_INTEGER
7317 #define TARGET_ASM_INTEGER spu_assemble_integer
7318 
7319 #undef TARGET_SCALAR_MODE_SUPPORTED_P
7320 #define TARGET_SCALAR_MODE_SUPPORTED_P	spu_scalar_mode_supported_p
7321 
7322 #undef TARGET_VECTOR_MODE_SUPPORTED_P
7323 #define TARGET_VECTOR_MODE_SUPPORTED_P	spu_vector_mode_supported_p
7324 
7325 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
7326 #define TARGET_FUNCTION_OK_FOR_SIBCALL spu_function_ok_for_sibcall
7327 
7328 #undef TARGET_ASM_GLOBALIZE_LABEL
7329 #define TARGET_ASM_GLOBALIZE_LABEL spu_asm_globalize_label
7330 
7331 #undef TARGET_PASS_BY_REFERENCE
7332 #define TARGET_PASS_BY_REFERENCE spu_pass_by_reference
7333 
7334 #undef TARGET_FUNCTION_ARG
7335 #define TARGET_FUNCTION_ARG spu_function_arg
7336 
7337 #undef TARGET_FUNCTION_ARG_ADVANCE
7338 #define TARGET_FUNCTION_ARG_ADVANCE spu_function_arg_advance
7339 
7340 #undef TARGET_FUNCTION_ARG_OFFSET
7341 #define TARGET_FUNCTION_ARG_OFFSET spu_function_arg_offset
7342 
7343 #undef TARGET_FUNCTION_ARG_PADDING
7344 #define TARGET_FUNCTION_ARG_PADDING spu_function_arg_padding
7345 
7346 #undef TARGET_MUST_PASS_IN_STACK
7347 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
7348 
7349 #undef TARGET_BUILD_BUILTIN_VA_LIST
7350 #define TARGET_BUILD_BUILTIN_VA_LIST spu_build_builtin_va_list
7351 
7352 #undef TARGET_EXPAND_BUILTIN_VA_START
7353 #define TARGET_EXPAND_BUILTIN_VA_START spu_va_start
7354 
7355 #undef TARGET_SETUP_INCOMING_VARARGS
7356 #define TARGET_SETUP_INCOMING_VARARGS spu_setup_incoming_varargs
7357 
7358 #undef TARGET_MACHINE_DEPENDENT_REORG
7359 #define TARGET_MACHINE_DEPENDENT_REORG spu_machine_dependent_reorg
7360 
7361 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
7362 #define TARGET_GIMPLIFY_VA_ARG_EXPR spu_gimplify_va_arg_expr
7363 
7364 #undef TARGET_INIT_LIBFUNCS
7365 #define TARGET_INIT_LIBFUNCS spu_init_libfuncs
7366 
7367 #undef TARGET_RETURN_IN_MEMORY
7368 #define TARGET_RETURN_IN_MEMORY spu_return_in_memory
7369 
7370 #undef  TARGET_ENCODE_SECTION_INFO
7371 #define TARGET_ENCODE_SECTION_INFO spu_encode_section_info
7372 
7373 #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
7374 #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD spu_builtin_mask_for_load
7375 
7376 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
7377 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST spu_builtin_vectorization_cost
7378 
7379 #undef TARGET_VECTORIZE_INIT_COST
7380 #define TARGET_VECTORIZE_INIT_COST spu_init_cost
7381 
7382 #undef TARGET_VECTORIZE_ADD_STMT_COST
7383 #define TARGET_VECTORIZE_ADD_STMT_COST spu_add_stmt_cost
7384 
7385 #undef TARGET_VECTORIZE_FINISH_COST
7386 #define TARGET_VECTORIZE_FINISH_COST spu_finish_cost
7387 
7388 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
7389 #define TARGET_VECTORIZE_DESTROY_COST_DATA spu_destroy_cost_data
7390 
7391 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
7392 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE spu_vector_alignment_reachable
7393 
7394 #undef TARGET_LIBGCC_CMP_RETURN_MODE
7395 #define TARGET_LIBGCC_CMP_RETURN_MODE spu_libgcc_cmp_return_mode
7396 
7397 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
7398 #define TARGET_LIBGCC_SHIFT_COUNT_MODE spu_libgcc_shift_count_mode
7399 
7400 #undef TARGET_SCHED_SMS_RES_MII
7401 #define TARGET_SCHED_SMS_RES_MII spu_sms_res_mii
7402 
7403 #undef TARGET_SECTION_TYPE_FLAGS
7404 #define TARGET_SECTION_TYPE_FLAGS spu_section_type_flags
7405 
7406 #undef TARGET_ASM_SELECT_SECTION
7407 #define TARGET_ASM_SELECT_SECTION  spu_select_section
7408 
7409 #undef TARGET_ASM_UNIQUE_SECTION
7410 #define TARGET_ASM_UNIQUE_SECTION  spu_unique_section
7411 
7412 #undef TARGET_LEGITIMATE_ADDRESS_P
7413 #define TARGET_LEGITIMATE_ADDRESS_P spu_legitimate_address_p
7414 
7415 #undef TARGET_LEGITIMATE_CONSTANT_P
7416 #define TARGET_LEGITIMATE_CONSTANT_P spu_legitimate_constant_p
7417 
7418 #undef TARGET_TRAMPOLINE_INIT
7419 #define TARGET_TRAMPOLINE_INIT spu_trampoline_init
7420 
7421 #undef TARGET_WARN_FUNC_RETURN
7422 #define TARGET_WARN_FUNC_RETURN spu_warn_func_return
7423 
7424 #undef TARGET_OPTION_OVERRIDE
7425 #define TARGET_OPTION_OVERRIDE spu_option_override
7426 
7427 #undef TARGET_CONDITIONAL_REGISTER_USAGE
7428 #define TARGET_CONDITIONAL_REGISTER_USAGE spu_conditional_register_usage
7429 
7430 #undef TARGET_REF_MAY_ALIAS_ERRNO
7431 #define TARGET_REF_MAY_ALIAS_ERRNO spu_ref_may_alias_errno
7432 
7433 #undef TARGET_ASM_OUTPUT_MI_THUNK
7434 #define TARGET_ASM_OUTPUT_MI_THUNK spu_output_mi_thunk
7435 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
7436 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
7437 
7438 /* Variable tracking should be run after all optimizations which
7439    change order of insns.  It also needs a valid CFG.  */
7440 #undef TARGET_DELAY_VARTRACK
7441 #define TARGET_DELAY_VARTRACK true
7442 
7443 #undef TARGET_CANONICALIZE_COMPARISON
7444 #define TARGET_CANONICALIZE_COMPARISON spu_canonicalize_comparison
7445 
7446 #undef TARGET_CAN_USE_DOLOOP_P
7447 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
7448 
7449 #undef TARGET_MODES_TIEABLE_P
7450 #define TARGET_MODES_TIEABLE_P spu_modes_tieable_p
7451 
7452 #undef TARGET_HARD_REGNO_NREGS
7453 #define TARGET_HARD_REGNO_NREGS spu_hard_regno_nregs
7454 
7455 #undef TARGET_CAN_CHANGE_MODE_CLASS
7456 #define TARGET_CAN_CHANGE_MODE_CLASS spu_can_change_mode_class
7457 
7458 #undef TARGET_TRULY_NOOP_TRUNCATION
7459 #define TARGET_TRULY_NOOP_TRUNCATION spu_truly_noop_truncation
7460 
7461 #undef TARGET_STATIC_RTX_ALIGNMENT
7462 #define TARGET_STATIC_RTX_ALIGNMENT spu_static_rtx_alignment
7463 #undef TARGET_CONSTANT_ALIGNMENT
7464 #define TARGET_CONSTANT_ALIGNMENT spu_constant_alignment
7465 
7466 #undef  TARGET_HAVE_SPECULATION_SAFE_VALUE
7467 #define TARGET_HAVE_SPECULATION_SAFE_VALUE speculation_safe_value_not_needed
7468 
7469 struct gcc_target targetm = TARGET_INITIALIZER;
7470 
7471 #include "gt-spu.h"
7472